summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authored <ed@FreeBSD.org>2009-06-02 17:52:33 +0000
committered <ed@FreeBSD.org>2009-06-02 17:52:33 +0000
commit3277b69d734b9c90b44ebde4ede005717e2c3b2e (patch)
tree64ba909838c23261cace781ece27d106134ea451 /lib
downloadFreeBSD-src-3277b69d734b9c90b44ebde4ede005717e2c3b2e.zip
FreeBSD-src-3277b69d734b9c90b44ebde4ede005717e2c3b2e.tar.gz
Import LLVM, at r72732.
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/AliasAnalysis.cpp248
-rw-r--r--lib/Analysis/AliasAnalysisCounter.cpp173
-rw-r--r--lib/Analysis/AliasAnalysisEvaluator.cpp246
-rw-r--r--lib/Analysis/AliasDebugger.cpp123
-rw-r--r--lib/Analysis/AliasSetTracker.cpp608
-rw-r--r--lib/Analysis/Analysis.cpp44
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp838
-rw-r--r--lib/Analysis/CFGPrinter.cpp221
-rw-r--r--lib/Analysis/CMakeLists.txt34
-rw-r--r--lib/Analysis/CaptureTracking.cpp112
-rw-r--r--lib/Analysis/ConstantFolding.cpp829
-rw-r--r--lib/Analysis/DbgInfoPrinter.cpp167
-rw-r--r--lib/Analysis/DebugInfo.cpp1079
-rw-r--r--lib/Analysis/IPA/Andersens.cpp2878
-rw-r--r--lib/Analysis/IPA/CMakeLists.txt7
-rw-r--r--lib/Analysis/IPA/CallGraph.cpp314
-rw-r--r--lib/Analysis/IPA/CallGraphSCCPass.cpp207
-rw-r--r--lib/Analysis/IPA/FindUsedTypes.cpp104
-rw-r--r--lib/Analysis/IPA/GlobalsModRef.cpp567
-rw-r--r--lib/Analysis/IPA/Makefile14
-rw-r--r--lib/Analysis/IVUsers.cpp391
-rw-r--r--lib/Analysis/InstCount.cpp86
-rw-r--r--lib/Analysis/Interval.cpp57
-rw-r--r--lib/Analysis/IntervalPartition.cpp114
-rw-r--r--lib/Analysis/LibCallAliasAnalysis.cpp141
-rw-r--r--lib/Analysis/LibCallSemantics.cpp65
-rw-r--r--lib/Analysis/LiveValues.cpp191
-rw-r--r--lib/Analysis/LoopInfo.cpp50
-rw-r--r--lib/Analysis/LoopPass.cpp340
-rw-r--r--lib/Analysis/LoopVR.cpp291
-rw-r--r--lib/Analysis/Makefile16
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp1142
-rw-r--r--lib/Analysis/PostDominators.cpp94
-rw-r--r--lib/Analysis/ProfileInfo.cpp100
-rw-r--r--lib/Analysis/ProfileInfoLoader.cpp277
-rw-r--r--lib/Analysis/ProfileInfoLoaderPass.cpp92
-rw-r--r--lib/Analysis/ScalarEvolution.cpp3824
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp646
-rw-r--r--lib/Analysis/SparsePropagation.cpp331
-rw-r--r--lib/Analysis/Trace.cpp50
-rw-r--r--lib/Analysis/ValueTracking.cpp1079
-rw-r--r--lib/Archive/Archive.cpp266
-rw-r--r--lib/Archive/ArchiveInternals.h85
-rw-r--r--lib/Archive/ArchiveReader.cpp627
-rw-r--r--lib/Archive/ArchiveWriter.cpp482
-rw-r--r--lib/Archive/CMakeLists.txt5
-rw-r--r--lib/Archive/Makefile17
-rw-r--r--lib/AsmParser/CMakeLists.txt6
-rw-r--r--lib/AsmParser/LLLexer.cpp835
-rw-r--r--lib/AsmParser/LLLexer.h84
-rw-r--r--lib/AsmParser/LLParser.cpp3279
-rw-r--r--lib/AsmParser/LLParser.h276
-rw-r--r--lib/AsmParser/LLToken.h130
-rw-r--r--lib/AsmParser/Makefile14
-rw-r--r--lib/AsmParser/Parser.cpp87
-rw-r--r--lib/Bitcode/Makefile14
-rw-r--r--lib/Bitcode/Reader/BitReader.cpp51
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp2126
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.h214
-rw-r--r--lib/Bitcode/Reader/CMakeLists.txt7
-rw-r--r--lib/Bitcode/Reader/Deserialize.cpp454
-rw-r--r--lib/Bitcode/Reader/DeserializeAPFloat.cpp24
-rw-r--r--lib/Bitcode/Reader/DeserializeAPInt.cpp33
-rw-r--r--lib/Bitcode/Reader/Makefile15
-rw-r--r--lib/Bitcode/Writer/BitWriter.cpp58
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp1449
-rw-r--r--lib/Bitcode/Writer/BitcodeWriterPass.cpp56
-rw-r--r--lib/Bitcode/Writer/CMakeLists.txt9
-rw-r--r--lib/Bitcode/Writer/Makefile15
-rw-r--r--lib/Bitcode/Writer/Serialize.cpp118
-rw-r--r--lib/Bitcode/Writer/SerializeAPFloat.cpp21
-rw-r--r--lib/Bitcode/Writer/SerializeAPInt.cpp31
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.cpp347
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.h127
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp1724
-rw-r--r--lib/CodeGen/AsmPrinter/CMakeLists.txt10
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp518
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.h549
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp2610
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h561
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.cpp706
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.h178
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfLabel.cpp35
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfLabel.h56
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfPrinter.cpp235
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfPrinter.h153
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfWriter.cpp129
-rw-r--r--lib/CodeGen/AsmPrinter/Makefile15
-rw-r--r--lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp160
-rw-r--r--lib/CodeGen/BranchFolding.cpp1204
-rw-r--r--lib/CodeGen/CMakeLists.txt62
-rw-r--r--lib/CodeGen/CodePlacementOpt.cpp358
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp161
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp397
-rw-r--r--lib/CodeGen/ELFWriter.cpp575
-rw-r--r--lib/CodeGen/ELFWriter.h230
-rw-r--r--lib/CodeGen/GCMetadata.cpp212
-rw-r--r--lib/CodeGen/GCMetadataPrinter.cpp30
-rw-r--r--lib/CodeGen/GCStrategy.cpp392
-rw-r--r--lib/CodeGen/IfConversion.cpp1229
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp892
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp289
-rw-r--r--lib/CodeGen/LatencyPriorityQueue.cpp114
-rw-r--r--lib/CodeGen/LiveInterval.cpp853
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp2298
-rw-r--r--lib/CodeGen/LiveStackAnalysis.cpp66
-rw-r--r--lib/CodeGen/LiveVariables.cpp695
-rw-r--r--lib/CodeGen/LowerSubregs.cpp292
-rw-r--r--lib/CodeGen/MachOWriter.cpp976
-rw-r--r--lib/CodeGen/MachOWriter.h629
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp372
-rw-r--r--lib/CodeGen/MachineDominators.cpp53
-rw-r--r--lib/CodeGen/MachineFunction.cpp598
-rw-r--r--lib/CodeGen/MachineInstr.cpp1105
-rw-r--r--lib/CodeGen/MachineLICM.cpp406
-rw-r--r--lib/CodeGen/MachineLoopInfo.cpp40
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp368
-rw-r--r--lib/CodeGen/MachinePassRegistry.cpp41
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp125
-rw-r--r--lib/CodeGen/MachineSink.cpp257
-rw-r--r--lib/CodeGen/MachineVerifier.cpp690
-rw-r--r--lib/CodeGen/Makefile22
-rw-r--r--lib/CodeGen/OcamlGC.cpp38
-rw-r--r--lib/CodeGen/PBQP.cpp1395
-rw-r--r--lib/CodeGen/PBQP.h284
-rw-r--r--lib/CodeGen/PHIElimination.cpp431
-rw-r--r--lib/CodeGen/Passes.cpp54
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp941
-rw-r--r--lib/CodeGen/PreAllocSplitting.cpp1485
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp679
-rw-r--r--lib/CodeGen/PrologEpilogInserter.h167
-rw-r--r--lib/CodeGen/PseudoSourceValue.cpp92
-rw-r--r--lib/CodeGen/README.txt208
-rw-r--r--lib/CodeGen/RegAllocBigBlock.cpp892
-rw-r--r--lib/CodeGen/RegAllocLinearScan.cpp1535
-rw-r--r--lib/CodeGen/RegAllocLocal.cpp1068
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp871
-rw-r--r--lib/CodeGen/RegAllocSimple.cpp257
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp41
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp480
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp572
-rw-r--r--lib/CodeGen/ScheduleDAGEmit.cpp71
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp468
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.h184
-rw-r--r--lib/CodeGen/ScheduleDAGPrinter.cpp97
-rw-r--r--lib/CodeGen/SelectionDAG/CMakeLists.txt22
-rw-r--r--lib/CodeGen/SelectionDAG/CallingConvLower.cpp148
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp6203
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp1033
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp3091
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp1388
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp2382
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp1074
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h736
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp453
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp335
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp2151
-rw-r--r--lib/CodeGen/SelectionDAG/Makefile15
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp635
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp268
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp1533
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp294
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h179
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp668
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp5743
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp6052
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuild.h558
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp1347
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp416
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp2592
-rw-r--r--lib/CodeGen/ShadowStackGC.cpp439
-rw-r--r--lib/CodeGen/ShrinkWrapping.cpp1141
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.cpp2827
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.h313
-rw-r--r--lib/CodeGen/Spiller.cpp229
-rw-r--r--lib/CodeGen/Spiller.h37
-rw-r--r--lib/CodeGen/StackProtector.cpp224
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp733
-rw-r--r--lib/CodeGen/StrongPHIElimination.cpp1053
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp194
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp997
-rw-r--r--lib/CodeGen/UnreachableBlockElim.cpp199
-rw-r--r--lib/CodeGen/VirtRegMap.cpp269
-rw-r--r--lib/CodeGen/VirtRegMap.h495
-rw-r--r--lib/CodeGen/VirtRegRewriter.cpp2225
-rw-r--r--lib/CodeGen/VirtRegRewriter.h56
-rw-r--r--lib/CompilerDriver/Action.cpp78
-rw-r--r--lib/CompilerDriver/CMakeLists.txt10
-rw-r--r--lib/CompilerDriver/CompilationGraph.cpp536
-rw-r--r--lib/CompilerDriver/Makefile19
-rw-r--r--lib/CompilerDriver/Plugin.cpp73
-rw-r--r--lib/CompilerDriver/Tool.cpp74
-rw-r--r--lib/Debugger/CMakeLists.txt10
-rw-r--r--lib/Debugger/Debugger.cpp230
-rw-r--r--lib/Debugger/Makefile16
-rw-r--r--lib/Debugger/ProgramInfo.cpp377
-rw-r--r--lib/Debugger/README.txt7
-rw-r--r--lib/Debugger/RuntimeInfo.cpp69
-rw-r--r--lib/Debugger/SourceFile.cpp82
-rw-r--r--lib/Debugger/SourceLanguage-CFamily.cpp28
-rw-r--r--lib/Debugger/SourceLanguage-CPlusPlus.cpp27
-rw-r--r--lib/Debugger/SourceLanguage-Unknown.cpp138
-rw-r--r--lib/Debugger/SourceLanguage.cpp54
-rw-r--r--lib/ExecutionEngine/CMakeLists.txt4
-rw-r--r--lib/ExecutionEngine/ExecutionEngine.cpp1010
-rw-r--r--lib/ExecutionEngine/ExecutionEngineBindings.cpp206
-rw-r--r--lib/ExecutionEngine/Interpreter/CMakeLists.txt5
-rw-r--r--lib/ExecutionEngine/Interpreter/Execution.cpp1382
-rw-r--r--lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp542
-rw-r--r--lib/ExecutionEngine/Interpreter/Interpreter.cpp104
-rw-r--r--lib/ExecutionEngine/Interpreter/Interpreter.h241
-rw-r--r--lib/ExecutionEngine/Interpreter/Makefile12
-rw-r--r--lib/ExecutionEngine/JIT/CMakeLists.txt11
-rw-r--r--lib/ExecutionEngine/JIT/Intercept.cpp148
-rw-r--r--lib/ExecutionEngine/JIT/JIT.cpp708
-rw-r--r--lib/ExecutionEngine/JIT/JIT.h176
-rw-r--r--lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp1056
-rw-r--r--lib/ExecutionEngine/JIT/JITDwarfEmitter.h87
-rw-r--r--lib/ExecutionEngine/JIT/JITEmitter.cpp1615
-rw-r--r--lib/ExecutionEngine/JIT/JITMemoryManager.cpp541
-rw-r--r--lib/ExecutionEngine/JIT/Makefile37
-rw-r--r--lib/ExecutionEngine/JIT/TargetSelect.cpp83
-rw-r--r--lib/ExecutionEngine/Makefile13
-rw-r--r--lib/Linker/CMakeLists.txt6
-rw-r--r--lib/Linker/LinkArchives.cpp201
-rw-r--r--lib/Linker/LinkItems.cpp238
-rw-r--r--lib/Linker/LinkModules.cpp1328
-rw-r--r--lib/Linker/Linker.cpp178
-rw-r--r--lib/Linker/Makefile15
-rw-r--r--lib/Makefile15
-rw-r--r--lib/Support/APFloat.cpp2950
-rw-r--r--lib/Support/APInt.cpp2816
-rw-r--r--lib/Support/APSInt.cpp23
-rw-r--r--lib/Support/Allocator.cpp141
-rw-r--r--lib/Support/Annotation.cpp115
-rw-r--r--lib/Support/CMakeLists.txt31
-rw-r--r--lib/Support/CommandLine.cpp1184
-rw-r--r--lib/Support/ConstantRange.cpp472
-rw-r--r--lib/Support/Debug.cpp77
-rw-r--r--lib/Support/Dwarf.cpp589
-rw-r--r--lib/Support/FileUtilities.cpp263
-rw-r--r--lib/Support/FoldingSet.cpp378
-rw-r--r--lib/Support/GraphWriter.cpp89
-rw-r--r--lib/Support/IsInf.cpp49
-rw-r--r--lib/Support/IsNAN.cpp33
-rw-r--r--lib/Support/Makefile17
-rw-r--r--lib/Support/ManagedStatic.cpp91
-rw-r--r--lib/Support/MemoryBuffer.cpp279
-rw-r--r--lib/Support/PluginLoader.cpp43
-rw-r--r--lib/Support/PrettyStackTrace.cpp108
-rw-r--r--lib/Support/SlowOperationInformer.cpp66
-rw-r--r--lib/Support/SmallPtrSet.cpp223
-rw-r--r--lib/Support/Statistic.cpp126
-rw-r--r--lib/Support/Streams.cpp30
-rw-r--r--lib/Support/StringExtras.cpp114
-rw-r--r--lib/Support/StringMap.cpp234
-rw-r--r--lib/Support/StringPool.cpp35
-rw-r--r--lib/Support/SystemUtils.cpp58
-rw-r--r--lib/Support/Timer.cpp387
-rw-r--r--lib/Support/Triple.cpp187
-rw-r--r--lib/Support/raw_ostream.cpp376
-rw-r--r--lib/System/Alarm.cpp33
-rw-r--r--lib/System/Atomic.cpp53
-rw-r--r--lib/System/CMakeLists.txt19
-rw-r--r--lib/System/Disassembler.cpp79
-rw-r--r--lib/System/DynamicLibrary.cpp165
-rw-r--r--lib/System/Host.cpp24
-rw-r--r--lib/System/IncludeFile.cpp20
-rw-r--r--lib/System/LICENSE.TXT6
-rw-r--r--lib/System/Makefile19
-rw-r--r--lib/System/Memory.cpp62
-rw-r--r--lib/System/Mutex.cpp160
-rw-r--r--lib/System/Path.cpp287
-rw-r--r--lib/System/Process.cpp33
-rw-r--r--lib/System/Program.cpp33
-rw-r--r--lib/System/README.txt43
-rw-r--r--lib/System/Signals.cpp34
-rw-r--r--lib/System/TimeValue.cpp58
-rw-r--r--lib/System/Unix/Alarm.inc72
-rw-r--r--lib/System/Unix/Host.inc58
-rw-r--r--lib/System/Unix/Memory.inc150
-rw-r--r--lib/System/Unix/Mutex.inc49
-rw-r--r--lib/System/Unix/Path.inc876
-rw-r--r--lib/System/Unix/Process.inc237
-rw-r--r--lib/System/Unix/Program.inc287
-rw-r--r--lib/System/Unix/README.txt16
-rw-r--r--lib/System/Unix/Signals.inc230
-rw-r--r--lib/System/Unix/TimeValue.inc56
-rw-r--r--lib/System/Unix/Unix.h104
-rw-r--r--lib/System/Win32/Alarm.inc43
-rw-r--r--lib/System/Win32/DynamicLibrary.inc219
-rw-r--r--lib/System/Win32/Host.inc23
-rw-r--r--lib/System/Win32/Memory.inc72
-rw-r--r--lib/System/Win32/Mutex.inc58
-rw-r--r--lib/System/Win32/Path.inc825
-rw-r--r--lib/System/Win32/Process.inc150
-rw-r--r--lib/System/Win32/Program.inc316
-rw-r--r--lib/System/Win32/Signals.inc270
-rw-r--r--lib/System/Win32/TimeValue.inc51
-rw-r--r--lib/System/Win32/Win32.h57
-rw-r--r--lib/Target/ARM/ARM.h121
-rw-r--r--lib/Target/ARM/ARM.td136
-rw-r--r--lib/Target/ARM/ARMAddressingModes.h394
-rw-r--r--lib/Target/ARM/ARMBuildAttrs.h64
-rw-r--r--lib/Target/ARM/ARMCallingConv.td87
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp1411
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp1285
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.cpp100
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h92
-rw-r--r--lib/Target/ARM/ARMFrameInfo.h32
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp911
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp2346
-rw-r--r--lib/Target/ARM/ARMISelLowering.h184
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td868
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp1025
-rw-r--r--lib/Target/ARM/ARMInstrInfo.h258
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td1390
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td562
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td12
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td398
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp298
-rw-r--r--lib/Target/ARM/ARMJITInfo.h178
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp778
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h238
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.cpp1528
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.h102
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td221
-rw-r--r--lib/Target/ARM/ARMRelocations.h56
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp84
-rw-r--r--lib/Target/ARM/ARMSubtarget.h122
-rw-r--r--lib/Target/ARM/ARMTargetAsmInfo.cpp291
-rw-r--r--lib/Target/ARM/ARMTargetAsmInfo.h64
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp242
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h104
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp1117
-rw-r--r--lib/Target/ARM/AsmPrinter/CMakeLists.txt9
-rw-r--r--lib/Target/ARM/AsmPrinter/Makefile15
-rw-r--r--lib/Target/ARM/CMakeLists.txt27
-rw-r--r--lib/Target/ARM/Makefile23
-rw-r--r--lib/Target/ARM/README-Thumb.txt228
-rw-r--r--lib/Target/ARM/README.txt554
-rw-r--r--lib/Target/Alpha/Alpha.h51
-rw-r--r--lib/Target/Alpha/Alpha.td66
-rw-r--r--lib/Target/Alpha/AlphaBranchSelector.cpp67
-rw-r--r--lib/Target/Alpha/AlphaCodeEmitter.cpp242
-rw-r--r--lib/Target/Alpha/AlphaISelDAGToDAG.cpp553
-rw-r--r--lib/Target/Alpha/AlphaISelLowering.cpp798
-rw-r--r--lib/Target/Alpha/AlphaISelLowering.h114
-rw-r--r--lib/Target/Alpha/AlphaInstrFormats.td268
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.cpp450
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.h97
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.td1137
-rw-r--r--lib/Target/Alpha/AlphaJITInfo.cpp307
-rw-r--r--lib/Target/Alpha/AlphaJITInfo.h47
-rw-r--r--lib/Target/Alpha/AlphaLLRP.cpp158
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.cpp335
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.h67
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.td171
-rw-r--r--lib/Target/Alpha/AlphaRelocations.h31
-rw-r--r--lib/Target/Alpha/AlphaSchedule.td84
-rw-r--r--lib/Target/Alpha/AlphaSubtarget.cpp25
-rw-r--r--lib/Target/Alpha/AlphaSubtarget.h47
-rw-r--r--lib/Target/Alpha/AlphaTargetAsmInfo.cpp31
-rw-r--r--lib/Target/Alpha/AlphaTargetAsmInfo.h32
-rw-r--r--lib/Target/Alpha/AlphaTargetMachine.cpp126
-rw-r--r--lib/Target/Alpha/AlphaTargetMachine.h82
-rw-r--r--lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp305
-rw-r--r--lib/Target/Alpha/AsmPrinter/CMakeLists.txt9
-rw-r--r--lib/Target/Alpha/AsmPrinter/Makefile15
-rw-r--r--lib/Target/Alpha/CMakeLists.txt25
-rw-r--r--lib/Target/Alpha/Makefile22
-rw-r--r--lib/Target/Alpha/README.txt42
-rw-r--r--lib/Target/CBackend/CBackend.cpp3601
-rw-r--r--lib/Target/CBackend/CMakeLists.txt3
-rw-r--r--lib/Target/CBackend/CTargetMachine.h43
-rw-r--r--lib/Target/CBackend/Makefile14
-rw-r--r--lib/Target/CMakeLists.txt17
-rw-r--r--lib/Target/CellSPU/AsmPrinter/CMakeLists.txt12
-rw-r--r--lib/Target/CellSPU/AsmPrinter/Makefile17
-rw-r--r--lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp623
-rw-r--r--lib/Target/CellSPU/CMakeLists.txt24
-rw-r--r--lib/Target/CellSPU/CellSDKIntrinsics.td448
-rw-r--r--lib/Target/CellSPU/Makefile22
-rw-r--r--lib/Target/CellSPU/README.txt90
-rw-r--r--lib/Target/CellSPU/SPU.h102
-rw-r--r--lib/Target/CellSPU/SPU.td66
-rw-r--r--lib/Target/CellSPU/SPU128InstrInfo.td41
-rw-r--r--lib/Target/CellSPU/SPU64InstrInfo.td394
-rw-r--r--lib/Target/CellSPU/SPUCallingConv.td115
-rw-r--r--lib/Target/CellSPU/SPUFrameInfo.cpp29
-rw-r--r--lib/Target/CellSPU/SPUFrameInfo.h79
-rw-r--r--lib/Target/CellSPU/SPUHazardRecognizers.cpp138
-rw-r--r--lib/Target/CellSPU/SPUHazardRecognizers.h41
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp1244
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp2980
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.h154
-rw-r--r--lib/Target/CellSPU/SPUInstrBuilder.h43
-rw-r--r--lib/Target/CellSPU/SPUInstrFormats.td298
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp693
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.h114
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td4614
-rw-r--r--lib/Target/CellSPU/SPUMachineFunction.h43
-rw-r--r--lib/Target/CellSPU/SPUMathInstr.td97
-rw-r--r--lib/Target/CellSPU/SPUNodes.td156
-rw-r--r--lib/Target/CellSPU/SPUOperands.td655
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp614
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.h101
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.td429
-rw-r--r--lib/Target/CellSPU/SPURegisterNames.h18
-rw-r--r--lib/Target/CellSPU/SPUSchedule.td57
-rw-r--r--lib/Target/CellSPU/SPUSubtarget.cpp40
-rw-r--r--lib/Target/CellSPU/SPUSubtarget.h95
-rw-r--r--lib/Target/CellSPU/SPUTargetAsmInfo.cpp74
-rw-r--r--lib/Target/CellSPU/SPUTargetAsmInfo.h51
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.cpp98
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.h95
-rw-r--r--lib/Target/CppBackend/CMakeLists.txt3
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp2007
-rw-r--r--lib/Target/CppBackend/CPPTargetMachine.h44
-rw-r--r--lib/Target/CppBackend/Makefile14
-rw-r--r--lib/Target/DarwinTargetAsmInfo.cpp169
-rw-r--r--lib/Target/ELFTargetAsmInfo.cpp227
-rw-r--r--lib/Target/IA64/AsmPrinter/CMakeLists.txt12
-rw-r--r--lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp376
-rw-r--r--lib/Target/IA64/AsmPrinter/Makefile17
-rw-r--r--lib/Target/IA64/CMakeLists.txt20
-rw-r--r--lib/Target/IA64/IA64.h58
-rw-r--r--lib/Target/IA64/IA64.td39
-rw-r--r--lib/Target/IA64/IA64Bundling.cpp118
-rw-r--r--lib/Target/IA64/IA64ISelDAGToDAG.cpp575
-rw-r--r--lib/Target/IA64/IA64ISelLowering.cpp622
-rw-r--r--lib/Target/IA64/IA64ISelLowering.h76
-rw-r--r--lib/Target/IA64/IA64InstrBuilder.h40
-rw-r--r--lib/Target/IA64/IA64InstrFormats.td80
-rw-r--r--lib/Target/IA64/IA64InstrInfo.cpp193
-rw-r--r--lib/Target/IA64/IA64InstrInfo.h70
-rw-r--r--lib/Target/IA64/IA64InstrInfo.td751
-rw-r--r--lib/Target/IA64/IA64MachineFunctionInfo.h34
-rw-r--r--lib/Target/IA64/IA64RegisterInfo.cpp319
-rw-r--r--lib/Target/IA64/IA64RegisterInfo.h63
-rw-r--r--lib/Target/IA64/IA64RegisterInfo.td509
-rw-r--r--lib/Target/IA64/IA64Subtarget.cpp18
-rw-r--r--lib/Target/IA64/IA64Subtarget.h28
-rw-r--r--lib/Target/IA64/IA64TargetAsmInfo.cpp44
-rw-r--r--lib/Target/IA64/IA64TargetAsmInfo.h33
-rw-r--r--lib/Target/IA64/IA64TargetMachine.cpp94
-rw-r--r--lib/Target/IA64/IA64TargetMachine.h64
-rw-r--r--lib/Target/IA64/Makefile20
-rw-r--r--lib/Target/IA64/README48
-rw-r--r--lib/Target/MSIL/CMakeLists.txt3
-rw-r--r--lib/Target/MSIL/MSILWriter.cpp1680
-rw-r--r--lib/Target/MSIL/MSILWriter.h255
-rw-r--r--lib/Target/MSIL/Makefile14
-rw-r--r--lib/Target/MSIL/README.TXT26
-rw-r--r--lib/Target/MSP430/CMakeLists.txt23
-rw-r--r--lib/Target/MSP430/MSP430.h40
-rw-r--r--lib/Target/MSP430/MSP430.td60
-rw-r--r--lib/Target/MSP430/MSP430AsmPrinter.cpp267
-rw-r--r--lib/Target/MSP430/MSP430CallingConv.td37
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp194
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp670
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h103
-rw-r--r--lib/Target/MSP430/MSP430InstrFormats.td67
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp177
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h84
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.td901
-rw-r--r--lib/Target/MSP430/MSP430MachineFunctionInfo.h39
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp355
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.h70
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.td122
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.cpp27
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.h41
-rw-r--r--lib/Target/MSP430/MSP430TargetAsmInfo.cpp22
-rw-r--r--lib/Target/MSP430/MSP430TargetAsmInfo.h31
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp76
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.h68
-rw-r--r--lib/Target/MSP430/Makefile21
-rw-r--r--lib/Target/MSP430/README.txt42
-rw-r--r--lib/Target/Makefile20
-rw-r--r--lib/Target/Mips/AsmPrinter/CMakeLists.txt12
-rw-r--r--lib/Target/Mips/AsmPrinter/Makefile17
-rw-r--r--lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp580
-rw-r--r--lib/Target/Mips/CMakeLists.txt22
-rw-r--r--lib/Target/Mips/Makefile23
-rw-r--r--lib/Target/Mips/Mips.h41
-rw-r--r--lib/Target/Mips/Mips.td88
-rw-r--r--lib/Target/Mips/MipsCallingConv.td86
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp77
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp392
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp1254
-rw-r--r--lib/Target/Mips/MipsISelLowering.h130
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td304
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td182
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp623
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h223
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td707
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h131
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp535
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h78
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td252
-rw-r--r--lib/Target/Mips/MipsSchedule.td63
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp77
-rw-r--r--lib/Target/Mips/MipsSubtarget.h139
-rw-r--r--lib/Target/Mips/MipsTargetAsmInfo.cpp98
-rw-r--r--lib/Target/Mips/MipsTargetAsmInfo.h51
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp133
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h80
-rw-r--r--lib/Target/PIC16/CMakeLists.txt24
-rw-r--r--lib/Target/PIC16/Makefile21
-rw-r--r--lib/Target/PIC16/PIC16.h345
-rw-r--r--lib/Target/PIC16/PIC16.td40
-rw-r--r--lib/Target/PIC16/PIC16AsmPrinter.cpp404
-rw-r--r--lib/Target/PIC16/PIC16AsmPrinter.h70
-rw-r--r--lib/Target/PIC16/PIC16DebugInfo.cpp270
-rw-r--r--lib/Target/PIC16/PIC16DebugInfo.h114
-rw-r--r--lib/Target/PIC16/PIC16ISelDAGToDAG.cpp59
-rw-r--r--lib/Target/PIC16/PIC16ISelDAGToDAG.h60
-rw-r--r--lib/Target/PIC16/PIC16ISelLowering.cpp1756
-rw-r--r--lib/Target/PIC16/PIC16ISelLowering.h227
-rw-r--r--lib/Target/PIC16/PIC16InstrFormats.td117
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.cpp186
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.h70
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.td522
-rw-r--r--lib/Target/PIC16/PIC16MemSelOpt.cpp169
-rw-r--r--lib/Target/PIC16/PIC16RegisterInfo.cpp91
-rw-r--r--lib/Target/PIC16/PIC16RegisterInfo.h68
-rw-r--r--lib/Target/PIC16/PIC16RegisterInfo.td33
-rw-r--r--lib/Target/PIC16/PIC16Subtarget.cpp27
-rw-r--r--lib/Target/PIC16/PIC16Subtarget.h45
-rw-r--r--lib/Target/PIC16/PIC16TargetAsmInfo.cpp264
-rw-r--r--lib/Target/PIC16/PIC16TargetAsmInfo.h79
-rw-r--r--lib/Target/PIC16/PIC16TargetMachine.cpp79
-rw-r--r--lib/Target/PIC16/PIC16TargetMachine.h76
-rw-r--r--lib/Target/PowerPC/AsmPrinter/CMakeLists.txt9
-rw-r--r--lib/Target/PowerPC/AsmPrinter/Makefile15
-rw-r--r--lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp1204
-rw-r--r--lib/Target/PowerPC/CMakeLists.txt28
-rw-r--r--lib/Target/PowerPC/Makefile22
-rw-r--r--lib/Target/PowerPC/PPC.h49
-rw-r--r--lib/Target/PowerPC/PPC.td114
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp174
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td66
-rw-r--r--lib/Target/PowerPC/PPCCodeEmitter.cpp266
-rw-r--r--lib/Target/PowerPC/PPCFrameInfo.h93
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.cpp304
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.h73
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp1170
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp4878
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h394
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td723
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td668
-rw-r--r--lib/Target/PowerPC/PPCInstrBuilder.h43
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td875
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp818
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h168
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td1475
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.cpp437
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.h48
-rw-r--r--lib/Target/PowerPC/PPCMachOWriterInfo.cpp151
-rw-r--r--lib/Target/PowerPC/PPCMachOWriterInfo.h55
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h104
-rw-r--r--lib/Target/PowerPC/PPCPerfectShuffle.h6586
-rw-r--r--lib/Target/PowerPC/PPCPredicates.cpp30
-rw-r--r--lib/Target/PowerPC/PPCPredicates.h39
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp1446
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h95
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td360
-rw-r--r--lib/Target/PowerPC/PPCRelocations.h56
-rw-r--r--lib/Target/PowerPC/PPCSchedule.td508
-rw-r--r--lib/Target/PowerPC/PPCScheduleG3.td63
-rw-r--r--lib/Target/PowerPC/PPCScheduleG4.td73
-rw-r--r--lib/Target/PowerPC/PPCScheduleG4Plus.td76
-rw-r--r--lib/Target/PowerPC/PPCScheduleG5.td83
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp152
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h160
-rw-r--r--lib/Target/PowerPC/PPCTargetAsmInfo.cpp161
-rw-r--r--lib/Target/PowerPC/PPCTargetAsmInfo.h62
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp250
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h120
-rw-r--r--lib/Target/PowerPC/README.txt799
-rw-r--r--lib/Target/PowerPC/README_ALTIVEC.txt211
-rw-r--r--lib/Target/README.txt1679
-rw-r--r--lib/Target/Sparc/AsmPrinter/CMakeLists.txt9
-rw-r--r--lib/Target/Sparc/AsmPrinter/Makefile15
-rw-r--r--lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp355
-rw-r--r--lib/Target/Sparc/CMakeLists.txt23
-rw-r--r--lib/Target/Sparc/DelaySlotFiller.cpp76
-rw-r--r--lib/Target/Sparc/FPMover.cpp139
-rw-r--r--lib/Target/Sparc/Makefile22
-rw-r--r--lib/Target/Sparc/README.txt58
-rw-r--r--lib/Target/Sparc/Sparc.h119
-rw-r--r--lib/Target/Sparc/Sparc.td76
-rw-r--r--lib/Target/Sparc/SparcCallingConv.td32
-rw-r--r--lib/Target/Sparc/SparcISelDAGToDAG.cpp215
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp1049
-rw-r--r--lib/Target/Sparc/SparcISelLowering.h79
-rw-r--r--lib/Target/Sparc/SparcInstrFormats.td114
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp277
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h114
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td769
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp196
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h67
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.td158
-rw-r--r--lib/Target/Sparc/SparcSubtarget.cpp43
-rw-r--r--lib/Target/Sparc/SparcSubtarget.h43
-rw-r--r--lib/Target/Sparc/SparcTargetAsmInfo.cpp50
-rw-r--r--lib/Target/Sparc/SparcTargetAsmInfo.h33
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp94
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h63
-rw-r--r--lib/Target/SubtargetFeature.cpp364
-rw-r--r--lib/Target/Target.cpp94
-rw-r--r--lib/Target/TargetAsmInfo.cpp461
-rw-r--r--lib/Target/TargetData.cpp603
-rw-r--r--lib/Target/TargetFrameInfo.cpp19
-rw-r--r--lib/Target/TargetInstrInfo.cpp50
-rw-r--r--lib/Target/TargetIntrinsicInfo.cpp22
-rw-r--r--lib/Target/TargetMachOWriterInfo.cpp25
-rw-r--r--lib/Target/TargetMachine.cpp229
-rw-r--r--lib/Target/TargetMachineRegistry.cpp78
-rw-r--r--lib/Target/TargetRegisterInfo.cpp144
-rw-r--r--lib/Target/TargetSubtarget.cpp22
-rw-r--r--lib/Target/X86/AsmPrinter/CMakeLists.txt11
-rw-r--r--lib/Target/X86/AsmPrinter/Makefile15
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp1075
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h164
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp50
-rw-r--r--lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp609
-rw-r--r--lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h152
-rw-r--r--lib/Target/X86/CMakeLists.txt29
-rw-r--r--lib/Target/X86/Makefile23
-rw-r--r--lib/Target/X86/README-FPStack.txt85
-rw-r--r--lib/Target/X86/README-MMX.txt71
-rw-r--r--lib/Target/X86/README-SSE.txt918
-rw-r--r--lib/Target/X86/README-UNIMPLEMENTED.txt14
-rw-r--r--lib/Target/X86/README-X86-64.txt251
-rw-r--r--lib/Target/X86/README.txt1899
-rw-r--r--lib/Target/X86/X86.h84
-rw-r--r--lib/Target/X86/X86.td184
-rw-r--r--lib/Target/X86/X86COFF.h95
-rw-r--r--lib/Target/X86/X86CallingConv.td360
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp811
-rw-r--r--lib/Target/X86/X86CompilationCallback_Win64.asm67
-rw-r--r--lib/Target/X86/X86ELFWriterInfo.cpp18
-rw-r--r--lib/Target/X86/X86ELFWriterInfo.h29
-rw-r--r--lib/Target/X86/X86FastISel.cpp1549
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp1187
-rw-r--r--lib/Target/X86/X86FloatingPointRegKill.cpp139
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp1716
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp8794
-rw-r--r--lib/Target/X86/X86ISelLowering.h705
-rw-r--r--lib/Target/X86/X86Instr64bit.td1937
-rw-r--r--lib/Target/X86/X86InstrBuilder.h168
-rw-r--r--lib/Target/X86/X86InstrFPStack.td597
-rw-r--r--lib/Target/X86/X86InstrFormats.td285
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp3227
-rw-r--r--lib/Target/X86/X86InstrInfo.h461
-rw-r--r--lib/Target/X86/X86InstrInfo.td3961
-rw-r--r--lib/Target/X86/X86InstrMMX.td694
-rw-r--r--lib/Target/X86/X86InstrSSE.td3643
-rw-r--r--lib/Target/X86/X86JITInfo.cpp560
-rw-r--r--lib/Target/X86/X86JITInfo.h84
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.h112
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp1280
-rw-r--r--lib/Target/X86/X86RegisterInfo.h163
-rw-r--r--lib/Target/X86/X86RegisterInfo.td762
-rw-r--r--lib/Target/X86/X86Relocations.h42
-rw-r--r--lib/Target/X86/X86Subtarget.cpp446
-rw-r--r--lib/Target/X86/X86Subtarget.h224
-rw-r--r--lib/Target/X86/X86TargetAsmInfo.cpp461
-rw-r--r--lib/Target/X86/X86TargetAsmInfo.h75
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp317
-rw-r--r--lib/Target/X86/X86TargetMachine.h124
-rw-r--r--lib/Target/XCore/CMakeLists.txt23
-rw-r--r--lib/Target/XCore/Makefile21
-rw-r--r--lib/Target/XCore/README.txt8
-rw-r--r--lib/Target/XCore/XCore.h42
-rw-r--r--lib/Target/XCore/XCore.td62
-rw-r--r--lib/Target/XCore/XCoreAsmPrinter.cpp472
-rw-r--r--lib/Target/XCore/XCoreCallingConv.td33
-rw-r--r--lib/Target/XCore/XCoreFrameInfo.cpp27
-rw-r--r--lib/Target/XCore/XCoreFrameInfo.h34
-rw-r--r--lib/Target/XCore/XCoreISelDAGToDAG.cpp230
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp934
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h123
-rw-r--r--lib/Target/XCore/XCoreInstrFormats.td120
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp524
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.h110
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td991
-rw-r--r--lib/Target/XCore/XCoreMachineFunctionInfo.h69
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp598
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h94
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.td91
-rw-r--r--lib/Target/XCore/XCoreSubtarget.cpp28
-rw-r--r--lib/Target/XCore/XCoreSubtarget.h46
-rw-r--r--lib/Target/XCore/XCoreTargetAsmInfo.cpp201
-rw-r--r--lib/Target/XCore/XCoreTargetAsmInfo.h45
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp71
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h63
-rw-r--r--lib/Transforms/Hello/CMakeLists.txt3
-rw-r--r--lib/Transforms/Hello/Hello.cpp67
-rw-r--r--lib/Transforms/Hello/Makefile16
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp863
-rw-r--r--lib/Transforms/IPO/CMakeLists.txt25
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp114
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp944
-rw-r--r--lib/Transforms/IPO/DeadTypeElimination.cpp107
-rw-r--r--lib/Transforms/IPO/ExtractGV.cpp173
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp347
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp227
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp2485
-rw-r--r--lib/Transforms/IPO/IPConstantPropagation.cpp277
-rw-r--r--lib/Transforms/IPO/IPO.cpp75
-rw-r--r--lib/Transforms/IPO/IndMemRemoval.cpp89
-rw-r--r--lib/Transforms/IPO/InlineAlways.cpp75
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp106
-rw-r--r--lib/Transforms/IPO/Inliner.cpp278
-rw-r--r--lib/Transforms/IPO/Internalize.cpp184
-rw-r--r--lib/Transforms/IPO/LoopExtractor.cpp261
-rw-r--r--lib/Transforms/IPO/LowerSetJmp.cpp536
-rw-r--r--lib/Transforms/IPO/Makefile15
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp377
-rw-r--r--lib/Transforms/IPO/PartialSpecialization.cpp191
-rw-r--r--lib/Transforms/IPO/PruneEH.cpp255
-rw-r--r--lib/Transforms/IPO/RaiseAllocations.cpp251
-rw-r--r--lib/Transforms/IPO/StripDeadPrototypes.cpp72
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp415
-rw-r--r--lib/Transforms/IPO/StructRetPromotion.cpp351
-rw-r--r--lib/Transforms/Instrumentation/BlockProfiling.cpp126
-rw-r--r--lib/Transforms/Instrumentation/CMakeLists.txt6
-rw-r--r--lib/Transforms/Instrumentation/EdgeProfiling.cpp101
-rw-r--r--lib/Transforms/Instrumentation/Makefile15
-rw-r--r--lib/Transforms/Instrumentation/ProfilingUtils.cpp120
-rw-r--r--lib/Transforms/Instrumentation/ProfilingUtils.h31
-rw-r--r--lib/Transforms/Instrumentation/RSProfiling.cpp653
-rw-r--r--lib/Transforms/Instrumentation/RSProfiling.h31
-rw-r--r--lib/Transforms/Makefile20
-rw-r--r--lib/Transforms/Scalar/ADCE.cpp98
-rw-r--r--lib/Transforms/Scalar/BasicBlockPlacement.cpp148
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt33
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp873
-rw-r--r--lib/Transforms/Scalar/CondPropagate.cpp295
-rw-r--r--lib/Transforms/Scalar/ConstantProp.cpp90
-rw-r--r--lib/Transforms/Scalar/DCE.cpp133
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp461
-rw-r--r--lib/Transforms/Scalar/GVN.cpp1738
-rw-r--r--lib/Transforms/Scalar/GVNPRE.cpp1885
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp880
-rw-r--r--lib/Transforms/Scalar/InstructionCombining.cpp12919
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp954
-rw-r--r--lib/Transforms/Scalar/LICM.cpp885
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp280
-rw-r--r--lib/Transforms/Scalar/LoopIndexSplit.cpp1237
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp572
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp2605
-rw-r--r--lib/Transforms/Scalar/LoopUnroll.cpp183
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp1098
-rw-r--r--lib/Transforms/Scalar/Makefile15
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp741
-rw-r--r--lib/Transforms/Scalar/PredicateSimplifier.cpp2725
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp896
-rw-r--r--lib/Transforms/Scalar/Reg2Mem.cpp125
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp1855
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp111
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp1820
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp232
-rw-r--r--lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp159
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp2429
-rw-r--r--lib/Transforms/Scalar/TailDuplication.cpp365
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp479
-rw-r--r--lib/Transforms/Utils/AddrModeMatcher.cpp594
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp622
-rw-r--r--lib/Transforms/Utils/BasicInliner.cpp181
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp282
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt27
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp533
-rw-r--r--lib/Transforms/Utils/CloneLoop.cpp152
-rw-r--r--lib/Transforms/Utils/CloneModule.cpp126
-rw-r--r--lib/Transforms/Utils/CloneTrace.cpp119
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp746
-rw-r--r--lib/Transforms/Utils/DemoteRegToStack.cpp144
-rw-r--r--lib/Transforms/Utils/InlineCost.cpp315
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp656
-rw-r--r--lib/Transforms/Utils/InstructionNamer.cpp63
-rw-r--r--lib/Transforms/Utils/LCSSA.cpp276
-rw-r--r--lib/Transforms/Utils/Local.cpp338
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp600
-rw-r--r--lib/Transforms/Utils/LowerAllocations.cpp177
-rw-r--r--lib/Transforms/Utils/LowerInvoke.cpp614
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp323
-rw-r--r--lib/Transforms/Utils/Makefile15
-rw-r--r--lib/Transforms/Utils/Mem2Reg.cpp92
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp1003
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp2213
-rw-r--r--lib/Transforms/Utils/UnifyFunctionExitNodes.cpp139
-rw-r--r--lib/Transforms/Utils/UnrollLoop.cpp369
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp143
-rw-r--r--lib/VMCore/AsmWriter.cpp1880
-rw-r--r--lib/VMCore/Attributes.cpp310
-rw-r--r--lib/VMCore/AutoUpgrade.cpp430
-rw-r--r--lib/VMCore/BasicBlock.cpp274
-rw-r--r--lib/VMCore/CMakeLists.txt30
-rw-r--r--lib/VMCore/ConstantFold.cpp1681
-rw-r--r--lib/VMCore/ConstantFold.h60
-rw-r--r--lib/VMCore/Constants.cpp2832
-rw-r--r--lib/VMCore/Core.cpp1450
-rw-r--r--lib/VMCore/Dominators.cpp287
-rw-r--r--lib/VMCore/Function.cpp367
-rw-r--r--lib/VMCore/Globals.cpp273
-rw-r--r--lib/VMCore/InlineAsm.cpp231
-rw-r--r--lib/VMCore/Instruction.cpp387
-rw-r--r--lib/VMCore/Instructions.cpp2963
-rw-r--r--lib/VMCore/IntrinsicInst.cpp77
-rw-r--r--lib/VMCore/LeakDetector.cpp131
-rw-r--r--lib/VMCore/Makefile33
-rw-r--r--lib/VMCore/Mangler.cpp196
-rw-r--r--lib/VMCore/Module.cpp381
-rw-r--r--lib/VMCore/ModuleProvider.cpp26
-rw-r--r--lib/VMCore/Pass.cpp323
-rw-r--r--lib/VMCore/PassManager.cpp1710
-rw-r--r--lib/VMCore/PrintModulePass.cpp99
-rw-r--r--lib/VMCore/SymbolTableListTraitsImpl.h118
-rw-r--r--lib/VMCore/Type.cpp1457
-rw-r--r--lib/VMCore/TypeSymbolTable.cpp165
-rw-r--r--lib/VMCore/Use.cpp233
-rw-r--r--lib/VMCore/Value.cpp581
-rw-r--r--lib/VMCore/ValueSymbolTable.cpp137
-rw-r--r--lib/VMCore/ValueTypes.cpp185
-rw-r--r--lib/VMCore/Verifier.cpp1770
828 files changed, 372534 insertions, 0 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
new file mode 100644
index 0000000..c5523ec
--- /dev/null
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -0,0 +1,248 @@
+//===- AliasAnalysis.cpp - Generic Alias Analysis Interface Implementation -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the generic AliasAnalysis interface which is used as the
+// common interface used by all clients and implementations of alias analysis.
+//
+// This file also implements the default version of the AliasAnalysis interface
+// that is to be used when no other implementation is specified. This does some
+// simple tests that detect obvious cases: two different global pointers cannot
+// alias, a global cannot alias a malloc, two different mallocs cannot alias,
+// etc.
+//
+// This alias analysis implementation really isn't very good for anything, but
+// it is very fast, and makes a nice clean default implementation. Because it
+// handles lots of little corner cases, other, more complex, alias analysis
+// implementations may choose to rely on this pass to resolve these simple and
+// easy cases.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Pass.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+// Register the AliasAnalysis interface, providing a nice name to refer to.
+static RegisterAnalysisGroup<AliasAnalysis> Z("Alias Analysis");
+char AliasAnalysis::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// Default chaining methods
+//===----------------------------------------------------------------------===//
+
+AliasAnalysis::AliasResult
+AliasAnalysis::alias(const Value *V1, unsigned V1Size,
+ const Value *V2, unsigned V2Size) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ return AA->alias(V1, V1Size, V2, V2Size);
+}
+
+void AliasAnalysis::getMustAliases(Value *P, std::vector<Value*> &RetVals) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ return AA->getMustAliases(P, RetVals);
+}
+
+bool AliasAnalysis::pointsToConstantMemory(const Value *P) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ return AA->pointsToConstantMemory(P);
+}
+
+bool AliasAnalysis::hasNoModRefInfoForCalls() const {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ return AA->hasNoModRefInfoForCalls();
+}
+
+void AliasAnalysis::deleteValue(Value *V) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ AA->deleteValue(V);
+}
+
+void AliasAnalysis::copyValue(Value *From, Value *To) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ AA->copyValue(From, To);
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) {
+ // FIXME: we can do better.
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ return AA->getModRefInfo(CS1, CS2);
+}
+
+
+//===----------------------------------------------------------------------===//
+// AliasAnalysis non-virtual helper method implementation
+//===----------------------------------------------------------------------===//
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(LoadInst *L, Value *P, unsigned Size) {
+ return alias(L->getOperand(0), TD->getTypeStoreSize(L->getType()),
+ P, Size) ? Ref : NoModRef;
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(StoreInst *S, Value *P, unsigned Size) {
+ // If the stored address cannot alias the pointer in question, then the
+ // pointer cannot be modified by the store.
+ if (!alias(S->getOperand(1),
+ TD->getTypeStoreSize(S->getOperand(0)->getType()), P, Size))
+ return NoModRef;
+
+ // If the pointer is a pointer to constant memory, then it could not have been
+ // modified by this store.
+ return pointsToConstantMemory(P) ? NoModRef : Mod;
+}
+
+AliasAnalysis::ModRefBehavior
+AliasAnalysis::getModRefBehavior(CallSite CS,
+ std::vector<PointerAccessInfo> *Info) {
+ if (CS.doesNotAccessMemory())
+ // Can't do better than this.
+ return DoesNotAccessMemory;
+ ModRefBehavior MRB = getModRefBehavior(CS.getCalledFunction(), Info);
+ if (MRB != DoesNotAccessMemory && CS.onlyReadsMemory())
+ return OnlyReadsMemory;
+ return MRB;
+}
+
+AliasAnalysis::ModRefBehavior
+AliasAnalysis::getModRefBehavior(Function *F,
+ std::vector<PointerAccessInfo> *Info) {
+ if (F) {
+ if (F->doesNotAccessMemory())
+ // Can't do better than this.
+ return DoesNotAccessMemory;
+ if (F->onlyReadsMemory())
+ return OnlyReadsMemory;
+ if (unsigned id = F->getIntrinsicID()) {
+#define GET_INTRINSIC_MODREF_BEHAVIOR
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_MODREF_BEHAVIOR
+ }
+ }
+ return UnknownModRefBehavior;
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
+ ModRefResult Mask = ModRef;
+ ModRefBehavior MRB = getModRefBehavior(CS);
+ if (MRB == DoesNotAccessMemory)
+ return NoModRef;
+ else if (MRB == OnlyReadsMemory)
+ Mask = Ref;
+ else if (MRB == AliasAnalysis::AccessesArguments) {
+ bool doesAlias = false;
+ for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+ AI != AE; ++AI)
+ if (alias(*AI, ~0U, P, Size) != NoAlias) {
+ doesAlias = true;
+ break;
+ }
+
+ if (!doesAlias)
+ return NoModRef;
+ }
+
+ if (!AA) return Mask;
+
+ // If P points to a constant memory location, the call definitely could not
+ // modify the memory location.
+ if ((Mask & Mod) && AA->pointsToConstantMemory(P))
+ Mask = ModRefResult(Mask & ~Mod);
+
+ return ModRefResult(Mask & AA->getModRefInfo(CS, P, Size));
+}
+
+// AliasAnalysis destructor: DO NOT move this to the header file for
+// AliasAnalysis or else clients of the AliasAnalysis class may not depend on
+// the AliasAnalysis.o file in the current .a file, causing alias analysis
+// support to not be included in the tool correctly!
+//
+AliasAnalysis::~AliasAnalysis() {}
+
+/// InitializeAliasAnalysis - Subclasses must call this method to initialize the
+/// AliasAnalysis interface before any other methods are called.
+///
+void AliasAnalysis::InitializeAliasAnalysis(Pass *P) {
+ TD = &P->getAnalysis<TargetData>();
+ AA = &P->getAnalysis<AliasAnalysis>();
+}
+
+// getAnalysisUsage - All alias analysis implementations should invoke this
+// directly (using AliasAnalysis::getAnalysisUsage(AU)) to make sure that
+// TargetData is required by the pass.
+void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetData>(); // All AA's need TargetData.
+ AU.addRequired<AliasAnalysis>(); // All AA's chain
+}
+
+/// canBasicBlockModify - Return true if it is possible for execution of the
+/// specified basic block to modify the value pointed to by Ptr.
+///
+bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB,
+ const Value *Ptr, unsigned Size) {
+ return canInstructionRangeModify(BB.front(), BB.back(), Ptr, Size);
+}
+
+/// canInstructionRangeModify - Return true if it is possible for the execution
+/// of the specified instructions to modify the value pointed to by Ptr. The
+/// instructions to consider are all of the instructions in the range of [I1,I2]
+/// INCLUSIVE. I1 and I2 must be in the same basic block.
+///
+bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1,
+ const Instruction &I2,
+ const Value *Ptr, unsigned Size) {
+ assert(I1.getParent() == I2.getParent() &&
+ "Instructions not in same basic block!");
+ BasicBlock::iterator I = const_cast<Instruction*>(&I1);
+ BasicBlock::iterator E = const_cast<Instruction*>(&I2);
+ ++E; // Convert from inclusive to exclusive range.
+
+ for (; I != E; ++I) // Check every instruction in range
+ if (getModRefInfo(I, const_cast<Value*>(Ptr), Size) & Mod)
+ return true;
+ return false;
+}
+
+/// isNoAliasCall - Return true if this pointer is returned by a noalias
+/// function.
+bool llvm::isNoAliasCall(const Value *V) {
+ if (isa<CallInst>(V) || isa<InvokeInst>(V))
+ return CallSite(const_cast<Instruction*>(cast<Instruction>(V)))
+ .paramHasAttr(0, Attribute::NoAlias);
+ return false;
+}
+
+/// isIdentifiedObject - Return true if this pointer refers to a distinct and
+/// identifiable object. This returns true for:
+/// Global Variables and Functions
+/// Allocas and Mallocs
+/// ByVal and NoAlias Arguments
+/// NoAlias returns
+///
+bool llvm::isIdentifiedObject(const Value *V) {
+ if (isa<GlobalValue>(V) || isa<AllocationInst>(V) || isNoAliasCall(V))
+ return true;
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return A->hasNoAliasAttr() || A->hasByValAttr();
+ return false;
+}
+
+// Because of the way .a files work, we must force the BasicAA implementation to
+// be pulled in if the AliasAnalysis classes are pulled in. Otherwise we run
+// the risk of AliasAnalysis being used, but the default implementation not
+// being linked into the tool that uses it.
+DEFINING_FILE_FOR(AliasAnalysis)
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp
new file mode 100644
index 0000000..4362d7d
--- /dev/null
+++ b/lib/Analysis/AliasAnalysisCounter.cpp
@@ -0,0 +1,173 @@
+//===- AliasAnalysisCounter.cpp - Alias Analysis Query Counter ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass which can be used to count how many alias queries
+// are being made and how the alias analysis implementation being used responds.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Streams.h"
+using namespace llvm;
+
+static cl::opt<bool>
+PrintAll("count-aa-print-all-queries", cl::ReallyHidden);
+static cl::opt<bool>
+PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden);
+
+namespace {
+ class VISIBILITY_HIDDEN AliasAnalysisCounter
+ : public ModulePass, public AliasAnalysis {
+ unsigned No, May, Must;
+ unsigned NoMR, JustRef, JustMod, MR;
+ const char *Name;
+ Module *M;
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ AliasAnalysisCounter() : ModulePass(&ID) {
+ No = May = Must = 0;
+ NoMR = JustRef = JustMod = MR = 0;
+ }
+
+ void printLine(const char *Desc, unsigned Val, unsigned Sum) {
+ cerr << " " << Val << " " << Desc << " responses ("
+ << Val*100/Sum << "%)\n";
+ }
+ ~AliasAnalysisCounter() {
+ unsigned AASum = No+May+Must;
+ unsigned MRSum = NoMR+JustRef+JustMod+MR;
+ if (AASum + MRSum) { // Print a report if any counted queries occurred...
+ cerr << "\n===== Alias Analysis Counter Report =====\n"
+ << " Analysis counted: " << Name << "\n"
+ << " " << AASum << " Total Alias Queries Performed\n";
+ if (AASum) {
+ printLine("no alias", No, AASum);
+ printLine("may alias", May, AASum);
+ printLine("must alias", Must, AASum);
+ cerr << " Alias Analysis Counter Summary: " << No*100/AASum << "%/"
+ << May*100/AASum << "%/" << Must*100/AASum<<"%\n\n";
+ }
+
+ cerr << " " << MRSum << " Total Mod/Ref Queries Performed\n";
+ if (MRSum) {
+ printLine("no mod/ref", NoMR, MRSum);
+ printLine("ref", JustRef, MRSum);
+ printLine("mod", JustMod, MRSum);
+ printLine("mod/ref", MR, MRSum);
+ cerr << " Mod/Ref Analysis Counter Summary: " <<NoMR*100/MRSum<< "%/"
+ << JustRef*100/MRSum << "%/" << JustMod*100/MRSum << "%/"
+ << MR*100/MRSum <<"%\n\n";
+ }
+ }
+ }
+
+ bool runOnModule(Module &M) {
+ this->M = &M;
+ InitializeAliasAnalysis(this);
+ Name = dynamic_cast<Pass*>(&getAnalysis<AliasAnalysis>())->getPassName();
+ return false;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AliasAnalysis::getAnalysisUsage(AU);
+ AU.addRequired<AliasAnalysis>();
+ AU.setPreservesAll();
+ }
+
+ // FIXME: We could count these too...
+ bool pointsToConstantMemory(const Value *P) {
+ return getAnalysis<AliasAnalysis>().pointsToConstantMemory(P);
+ }
+ bool doesNotAccessMemory(CallSite CS) {
+ return getAnalysis<AliasAnalysis>().doesNotAccessMemory(CS);
+ }
+ bool doesNotAccessMemory(Function *F) {
+ return getAnalysis<AliasAnalysis>().doesNotAccessMemory(F);
+ }
+ bool onlyReadsMemory(CallSite CS) {
+ return getAnalysis<AliasAnalysis>().onlyReadsMemory(CS);
+ }
+ bool onlyReadsMemory(Function *F) {
+ return getAnalysis<AliasAnalysis>().onlyReadsMemory(F);
+ }
+
+
+ // Forwarding functions: just delegate to a real AA implementation, counting
+ // the number of responses...
+ AliasResult alias(const Value *V1, unsigned V1Size,
+ const Value *V2, unsigned V2Size);
+
+ ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size);
+ ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) {
+ return AliasAnalysis::getModRefInfo(CS1,CS2);
+ }
+ };
+}
+
+char AliasAnalysisCounter::ID = 0;
+static RegisterPass<AliasAnalysisCounter>
+X("count-aa", "Count Alias Analysis Query Responses", false, true);
+static RegisterAnalysisGroup<AliasAnalysis> Y(X);
+
+ModulePass *llvm::createAliasAnalysisCounterPass() {
+ return new AliasAnalysisCounter();
+}
+
+AliasAnalysis::AliasResult
+AliasAnalysisCounter::alias(const Value *V1, unsigned V1Size,
+ const Value *V2, unsigned V2Size) {
+ AliasResult R = getAnalysis<AliasAnalysis>().alias(V1, V1Size, V2, V2Size);
+
+ const char *AliasString;
+ switch (R) {
+ default: assert(0 && "Unknown alias type!");
+ case NoAlias: No++; AliasString = "No alias"; break;
+ case MayAlias: May++; AliasString = "May alias"; break;
+ case MustAlias: Must++; AliasString = "Must alias"; break;
+ }
+
+ if (PrintAll || (PrintAllFailures && R == MayAlias)) {
+ cerr << AliasString << ":\t";
+ cerr << "[" << V1Size << "B] ";
+ WriteAsOperand(*cerr.stream(), V1, true, M);
+ cerr << ", ";
+ cerr << "[" << V2Size << "B] ";
+ WriteAsOperand(*cerr.stream(), V2, true, M);
+ cerr << "\n";
+ }
+
+ return R;
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysisCounter::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
+ ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, P, Size);
+
+ const char *MRString;
+ switch (R) {
+ default: assert(0 && "Unknown mod/ref type!");
+ case NoModRef: NoMR++; MRString = "NoModRef"; break;
+ case Ref: JustRef++; MRString = "JustRef"; break;
+ case Mod: JustMod++; MRString = "JustMod"; break;
+ case ModRef: MR++; MRString = "ModRef"; break;
+ }
+
+ if (PrintAll || (PrintAllFailures && R == ModRef)) {
+ cerr << MRString << ": Ptr: ";
+ cerr << "[" << Size << "B] ";
+ WriteAsOperand(*cerr.stream(), P, true, M);
+ cerr << "\t<->" << *CS.getInstruction();
+ }
+ return R;
+}
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
new file mode 100644
index 0000000..07820e3
--- /dev/null
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -0,0 +1,246 @@
+//===- AliasAnalysisEvaluator.cpp - Alias Analysis Accuracy Evaluator -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple N^2 alias analysis accuracy evaluator.
+// Basically, for each function in the program, it simply queries to see how the
+// alias analysis implementation answers alias queries between each pair of
+// pointers in the function.
+//
+// This is inspired and adapted from code by: Naveen Neelakantam, Francesco
+// Spadini, and Wojciech Stryjewski.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Streams.h"
+#include <set>
+#include <sstream>
+using namespace llvm;
+
+static cl::opt<bool> PrintAll("print-all-alias-modref-info", cl::ReallyHidden);
+
+static cl::opt<bool> PrintNoAlias("print-no-aliases", cl::ReallyHidden);
+static cl::opt<bool> PrintMayAlias("print-may-aliases", cl::ReallyHidden);
+static cl::opt<bool> PrintMustAlias("print-must-aliases", cl::ReallyHidden);
+
+static cl::opt<bool> PrintNoModRef("print-no-modref", cl::ReallyHidden);
+static cl::opt<bool> PrintMod("print-mod", cl::ReallyHidden);
+static cl::opt<bool> PrintRef("print-ref", cl::ReallyHidden);
+static cl::opt<bool> PrintModRef("print-modref", cl::ReallyHidden);
+
+namespace {
+ class VISIBILITY_HIDDEN AAEval : public FunctionPass {
+ unsigned NoAlias, MayAlias, MustAlias;
+ unsigned NoModRef, Mod, Ref, ModRef;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ AAEval() : FunctionPass(&ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.setPreservesAll();
+ }
+
+ bool doInitialization(Module &M) {
+ NoAlias = MayAlias = MustAlias = 0;
+ NoModRef = Mod = Ref = ModRef = 0;
+
+ if (PrintAll) {
+ PrintNoAlias = PrintMayAlias = PrintMustAlias = true;
+ PrintNoModRef = PrintMod = PrintRef = PrintModRef = true;
+ }
+ return false;
+ }
+
+ bool runOnFunction(Function &F);
+ bool doFinalization(Module &M);
+ };
+}
+
+char AAEval::ID = 0;
+static RegisterPass<AAEval>
+X("aa-eval", "Exhaustive Alias Analysis Precision Evaluator", false, true);
+
+FunctionPass *llvm::createAAEvalPass() { return new AAEval(); }
+
+static void PrintResults(const char *Msg, bool P, const Value *V1, const Value *V2,
+ const Module *M) {
+ if (P) {
+ std::stringstream s1, s2;
+ WriteAsOperand(s1, V1, true, M);
+ WriteAsOperand(s2, V2, true, M);
+ std::string o1(s1.str()), o2(s2.str());
+ if (o2 < o1)
+ std::swap(o1, o2);
+ cerr << " " << Msg << ":\t"
+ << o1 << ", "
+ << o2 << "\n";
+ }
+}
+
+static inline void
+PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr,
+ Module *M) {
+ if (P) {
+ cerr << " " << Msg << ": Ptr: ";
+ WriteAsOperand(*cerr.stream(), Ptr, true, M);
+ cerr << "\t<->" << *I;
+ }
+}
+
+bool AAEval::runOnFunction(Function &F) {
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+ const TargetData &TD = AA.getTargetData();
+
+ std::set<Value *> Pointers;
+ std::set<CallSite> CallSites;
+
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
+ if (isa<PointerType>(I->getType())) // Add all pointer arguments
+ Pointers.insert(I);
+
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+ if (isa<PointerType>(I->getType())) // Add all pointer instructions
+ Pointers.insert(&*I);
+ Instruction &Inst = *I;
+ User::op_iterator OI = Inst.op_begin();
+ CallSite CS = CallSite::get(&Inst);
+ if (CS.getInstruction() &&
+ isa<Function>(CS.getCalledValue()))
+ ++OI; // Skip actual functions for direct function calls.
+ for (; OI != Inst.op_end(); ++OI)
+ if (isa<PointerType>((*OI)->getType()) && !isa<ConstantPointerNull>(*OI))
+ Pointers.insert(*OI);
+
+ if (CS.getInstruction()) CallSites.insert(CS);
+ }
+
+ if (PrintNoAlias || PrintMayAlias || PrintMustAlias ||
+ PrintNoModRef || PrintMod || PrintRef || PrintModRef)
+ cerr << "Function: " << F.getName() << ": " << Pointers.size()
+ << " pointers, " << CallSites.size() << " call sites\n";
+
+ // iterate over the worklist, and run the full (n^2)/2 disambiguations
+ for (std::set<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end();
+ I1 != E; ++I1) {
+ unsigned I1Size = 0;
+ const Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType();
+ if (I1ElTy->isSized()) I1Size = TD.getTypeStoreSize(I1ElTy);
+
+ for (std::set<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
+ unsigned I2Size = 0;
+ const Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType();
+ if (I2ElTy->isSized()) I2Size = TD.getTypeStoreSize(I2ElTy);
+
+ switch (AA.alias(*I1, I1Size, *I2, I2Size)) {
+ case AliasAnalysis::NoAlias:
+ PrintResults("NoAlias", PrintNoAlias, *I1, *I2, F.getParent());
+ ++NoAlias; break;
+ case AliasAnalysis::MayAlias:
+ PrintResults("MayAlias", PrintMayAlias, *I1, *I2, F.getParent());
+ ++MayAlias; break;
+ case AliasAnalysis::MustAlias:
+ PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent());
+ ++MustAlias; break;
+ default:
+ cerr << "Unknown alias query result!\n";
+ }
+ }
+ }
+
+ // Mod/ref alias analysis: compare all pairs of calls and values
+ for (std::set<CallSite>::iterator C = CallSites.begin(),
+ Ce = CallSites.end(); C != Ce; ++C) {
+ Instruction *I = C->getInstruction();
+
+ for (std::set<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end();
+ V != Ve; ++V) {
+ unsigned Size = 0;
+ const Type *ElTy = cast<PointerType>((*V)->getType())->getElementType();
+ if (ElTy->isSized()) Size = TD.getTypeStoreSize(ElTy);
+
+ switch (AA.getModRefInfo(*C, *V, Size)) {
+ case AliasAnalysis::NoModRef:
+ PrintModRefResults("NoModRef", PrintNoModRef, I, *V, F.getParent());
+ ++NoModRef; break;
+ case AliasAnalysis::Mod:
+ PrintModRefResults(" Mod", PrintMod, I, *V, F.getParent());
+ ++Mod; break;
+ case AliasAnalysis::Ref:
+ PrintModRefResults(" Ref", PrintRef, I, *V, F.getParent());
+ ++Ref; break;
+ case AliasAnalysis::ModRef:
+ PrintModRefResults(" ModRef", PrintModRef, I, *V, F.getParent());
+ ++ModRef; break;
+ default:
+ cerr << "Unknown alias query result!\n";
+ }
+ }
+ }
+
+ return false;
+}
+
+static void PrintPercent(unsigned Num, unsigned Sum) {
+ cerr << "(" << Num*100ULL/Sum << "."
+ << ((Num*1000ULL/Sum) % 10) << "%)\n";
+}
+
+bool AAEval::doFinalization(Module &M) {
+ unsigned AliasSum = NoAlias + MayAlias + MustAlias;
+ cerr << "===== Alias Analysis Evaluator Report =====\n";
+ if (AliasSum == 0) {
+ cerr << " Alias Analysis Evaluator Summary: No pointers!\n";
+ } else {
+ cerr << " " << AliasSum << " Total Alias Queries Performed\n";
+ cerr << " " << NoAlias << " no alias responses ";
+ PrintPercent(NoAlias, AliasSum);
+ cerr << " " << MayAlias << " may alias responses ";
+ PrintPercent(MayAlias, AliasSum);
+ cerr << " " << MustAlias << " must alias responses ";
+ PrintPercent(MustAlias, AliasSum);
+ cerr << " Alias Analysis Evaluator Pointer Alias Summary: "
+ << NoAlias*100/AliasSum << "%/" << MayAlias*100/AliasSum << "%/"
+ << MustAlias*100/AliasSum << "%\n";
+ }
+
+ // Display the summary for mod/ref analysis
+ unsigned ModRefSum = NoModRef + Mod + Ref + ModRef;
+ if (ModRefSum == 0) {
+ cerr << " Alias Analysis Mod/Ref Evaluator Summary: no mod/ref!\n";
+ } else {
+ cerr << " " << ModRefSum << " Total ModRef Queries Performed\n";
+ cerr << " " << NoModRef << " no mod/ref responses ";
+ PrintPercent(NoModRef, ModRefSum);
+ cerr << " " << Mod << " mod responses ";
+ PrintPercent(Mod, ModRefSum);
+ cerr << " " << Ref << " ref responses ";
+ PrintPercent(Ref, ModRefSum);
+ cerr << " " << ModRef << " mod & ref responses ";
+ PrintPercent(ModRef, ModRefSum);
+ cerr << " Alias Analysis Evaluator Mod/Ref Summary: "
+ << NoModRef*100/ModRefSum << "%/" << Mod*100/ModRefSum << "%/"
+ << Ref*100/ModRefSum << "%/" << ModRef*100/ModRefSum << "%\n";
+ }
+
+ return false;
+}
diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp
new file mode 100644
index 0000000..1e82621
--- /dev/null
+++ b/lib/Analysis/AliasDebugger.cpp
@@ -0,0 +1,123 @@
+//===- AliasDebugger.cpp - Simple Alias Analysis Use Checker --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This simple pass checks alias analysis users to ensure that if they
+// create a new value, they do not query AA without informing it of the value.
+// It acts as a shim over any other AA pass you want.
+//
+// Yes keeping track of every value in the program is expensive, but this is
+// a debugging pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Instructions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Support/Compiler.h"
+#include <set>
+using namespace llvm;
+
+namespace {
+
+ class VISIBILITY_HIDDEN AliasDebugger
+ : public ModulePass, public AliasAnalysis {
+
+ //What we do is simple. Keep track of every value the AA could
+ //know about, and verify that queries are one of those.
+ //A query to a value that didn't exist when the AA was created
+ //means someone forgot to update the AA when creating new values
+
+ std::set<const Value*> Vals;
+
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ AliasDebugger() : ModulePass(&ID) {}
+
+ bool runOnModule(Module &M) {
+ InitializeAliasAnalysis(this); // set up super class
+
+ for(Module::global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I)
+ Vals.insert(&*I);
+
+ for(Module::iterator I = M.begin(),
+ E = M.end(); I != E; ++I){
+ Vals.insert(&*I);
+ if(!I->isDeclaration()) {
+ for (Function::arg_iterator AI = I->arg_begin(), AE = I->arg_end();
+ AI != AE; ++AI)
+ Vals.insert(&*AI);
+ for (Function::const_iterator FI = I->begin(), FE = I->end();
+ FI != FE; ++FI)
+ for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end();
+ BI != BE; ++BI)
+ Vals.insert(&*BI);
+ }
+
+ }
+ return false;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AliasAnalysis::getAnalysisUsage(AU);
+ AU.setPreservesAll(); // Does not transform code
+ }
+
+ //------------------------------------------------
+ // Implement the AliasAnalysis API
+ //
+ AliasResult alias(const Value *V1, unsigned V1Size,
+ const Value *V2, unsigned V2Size) {
+ assert(Vals.find(V1) != Vals.end() && "Never seen value in AA before");
+ assert(Vals.find(V2) != Vals.end() && "Never seen value in AA before");
+ return AliasAnalysis::alias(V1, V1Size, V2, V2Size);
+ }
+
+ ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size) {
+ assert(Vals.find(P) != Vals.end() && "Never seen value in AA before");
+ return AliasAnalysis::getModRefInfo(CS, P, Size);
+ }
+
+ ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) {
+ return AliasAnalysis::getModRefInfo(CS1,CS2);
+ }
+
+ void getMustAliases(Value *P, std::vector<Value*> &RetVals) {
+ assert(Vals.find(P) != Vals.end() && "Never seen value in AA before");
+ return AliasAnalysis::getMustAliases(P, RetVals);
+ }
+
+ bool pointsToConstantMemory(const Value *P) {
+ assert(Vals.find(P) != Vals.end() && "Never seen value in AA before");
+ return AliasAnalysis::pointsToConstantMemory(P);
+ }
+
+ virtual void deleteValue(Value *V) {
+ assert(Vals.find(V) != Vals.end() && "Never seen value in AA before");
+ AliasAnalysis::deleteValue(V);
+ }
+ virtual void copyValue(Value *From, Value *To) {
+ Vals.insert(To);
+ AliasAnalysis::copyValue(From, To);
+ }
+
+ };
+}
+
+char AliasDebugger::ID = 0;
+static RegisterPass<AliasDebugger>
+X("debug-aa", "AA use debugger", false, true);
+static RegisterAnalysisGroup<AliasAnalysis> Y(X);
+
+Pass *llvm::createAliasDebugger() { return new AliasDebugger(); }
+
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
new file mode 100644
index 0000000..18c2b665
--- /dev/null
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -0,0 +1,608 @@
+//===- AliasSetTracker.cpp - Alias Sets Tracker implementation-------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AliasSetTracker and AliasSet classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/Streams.h"
+using namespace llvm;
+
+/// mergeSetIn - Merge the specified alias set into this alias set.
+///
+void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
+ assert(!AS.Forward && "Alias set is already forwarding!");
+ assert(!Forward && "This set is a forwarding set!!");
+
+ // Update the alias and access types of this set...
+ AccessTy |= AS.AccessTy;
+ AliasTy |= AS.AliasTy;
+
+ if (AliasTy == MustAlias) {
+ // Check that these two merged sets really are must aliases. Since both
+ // used to be must-alias sets, we can just check any pointer from each set
+ // for aliasing.
+ AliasAnalysis &AA = AST.getAliasAnalysis();
+ PointerRec *L = getSomePointer();
+ PointerRec *R = AS.getSomePointer();
+
+ // If the pointers are not a must-alias pair, this set becomes a may alias.
+ if (AA.alias(L->getValue(), L->getSize(), R->getValue(), R->getSize())
+ != AliasAnalysis::MustAlias)
+ AliasTy = MayAlias;
+ }
+
+ if (CallSites.empty()) { // Merge call sites...
+ if (!AS.CallSites.empty())
+ std::swap(CallSites, AS.CallSites);
+ } else if (!AS.CallSites.empty()) {
+ CallSites.insert(CallSites.end(), AS.CallSites.begin(), AS.CallSites.end());
+ AS.CallSites.clear();
+ }
+
+ AS.Forward = this; // Forward across AS now...
+ addRef(); // AS is now pointing to us...
+
+ // Merge the list of constituent pointers...
+ if (AS.PtrList) {
+ *PtrListEnd = AS.PtrList;
+ AS.PtrList->setPrevInList(PtrListEnd);
+ PtrListEnd = AS.PtrListEnd;
+
+ AS.PtrList = 0;
+ AS.PtrListEnd = &AS.PtrList;
+ assert(*AS.PtrListEnd == 0 && "End of list is not null?");
+ }
+}
+
+void AliasSetTracker::removeAliasSet(AliasSet *AS) {
+ if (AliasSet *Fwd = AS->Forward) {
+ Fwd->dropRef(*this);
+ AS->Forward = 0;
+ }
+ AliasSets.erase(AS);
+}
+
+void AliasSet::removeFromTracker(AliasSetTracker &AST) {
+ assert(RefCount == 0 && "Cannot remove non-dead alias set from tracker!");
+ AST.removeAliasSet(this);
+}
+
+void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
+ unsigned Size, bool KnownMustAlias) {
+ assert(!Entry.hasAliasSet() && "Entry already in set!");
+
+ // Check to see if we have to downgrade to _may_ alias.
+ if (isMustAlias() && !KnownMustAlias)
+ if (PointerRec *P = getSomePointer()) {
+ AliasAnalysis &AA = AST.getAliasAnalysis();
+ AliasAnalysis::AliasResult Result =
+ AA.alias(P->getValue(), P->getSize(), Entry.getValue(), Size);
+ if (Result == AliasAnalysis::MayAlias)
+ AliasTy = MayAlias;
+ else // First entry of must alias must have maximum size!
+ P->updateSize(Size);
+ assert(Result != AliasAnalysis::NoAlias && "Cannot be part of must set!");
+ }
+
+ Entry.setAliasSet(this);
+ Entry.updateSize(Size);
+
+ // Add it to the end of the list...
+ assert(*PtrListEnd == 0 && "End of list is not null?");
+ *PtrListEnd = &Entry;
+ PtrListEnd = Entry.setPrevInList(PtrListEnd);
+ assert(*PtrListEnd == 0 && "End of list is not null?");
+ addRef(); // Entry points to alias set...
+}
+
+void AliasSet::addCallSite(CallSite CS, AliasAnalysis &AA) {
+ CallSites.push_back(CS);
+
+ AliasAnalysis::ModRefBehavior Behavior = AA.getModRefBehavior(CS);
+ if (Behavior == AliasAnalysis::DoesNotAccessMemory)
+ return;
+ else if (Behavior == AliasAnalysis::OnlyReadsMemory) {
+ AliasTy = MayAlias;
+ AccessTy |= Refs;
+ return;
+ }
+
+ // FIXME: This should use mod/ref information to make this not suck so bad
+ AliasTy = MayAlias;
+ AccessTy = ModRef;
+}
+
+/// aliasesPointer - Return true if the specified pointer "may" (or must)
+/// alias one of the members in the set.
+///
+bool AliasSet::aliasesPointer(const Value *Ptr, unsigned Size,
+ AliasAnalysis &AA) const {
+ if (AliasTy == MustAlias) {
+ assert(CallSites.empty() && "Illegal must alias set!");
+
+ // If this is a set of MustAliases, only check to see if the pointer aliases
+ // SOME value in the set...
+ PointerRec *SomePtr = getSomePointer();
+ assert(SomePtr && "Empty must-alias set??");
+ return AA.alias(SomePtr->getValue(), SomePtr->getSize(), Ptr, Size);
+ }
+
+ // If this is a may-alias set, we have to check all of the pointers in the set
+ // to be sure it doesn't alias the set...
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ if (AA.alias(Ptr, Size, I.getPointer(), I.getSize()))
+ return true;
+
+ // Check the call sites list and invoke list...
+ if (!CallSites.empty()) {
+ if (AA.hasNoModRefInfoForCalls())
+ return true;
+
+ for (unsigned i = 0, e = CallSites.size(); i != e; ++i)
+ if (AA.getModRefInfo(CallSites[i], const_cast<Value*>(Ptr), Size)
+ != AliasAnalysis::NoModRef)
+ return true;
+ }
+
+ return false;
+}
+
+bool AliasSet::aliasesCallSite(CallSite CS, AliasAnalysis &AA) const {
+ if (AA.doesNotAccessMemory(CS))
+ return false;
+
+ if (AA.hasNoModRefInfoForCalls())
+ return true;
+
+ for (unsigned i = 0, e = CallSites.size(); i != e; ++i)
+ if (AA.getModRefInfo(CallSites[i], CS) != AliasAnalysis::NoModRef ||
+ AA.getModRefInfo(CS, CallSites[i]) != AliasAnalysis::NoModRef)
+ return true;
+
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ if (AA.getModRefInfo(CS, I.getPointer(), I.getSize()) !=
+ AliasAnalysis::NoModRef)
+ return true;
+
+ return false;
+}
+
+void AliasSetTracker::clear() {
+ // Delete all the PointerRec entries.
+ for (DenseMap<Value*, AliasSet::PointerRec*>::iterator I = PointerMap.begin(),
+ E = PointerMap.end(); I != E; ++I)
+ I->second->eraseFromList();
+
+ PointerMap.clear();
+
+ // The alias sets should all be clear now.
+ AliasSets.clear();
+}
+
+
+/// findAliasSetForPointer - Given a pointer, find the one alias set to put the
+/// instruction referring to the pointer into. If there are multiple alias sets
+/// that may alias the pointer, merge them together and return the unified set.
+///
+AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
+ unsigned Size) {
+ AliasSet *FoundSet = 0;
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ if (!I->Forward && I->aliasesPointer(Ptr, Size, AA)) {
+ if (FoundSet == 0) { // If this is the first alias set ptr can go into.
+ FoundSet = I; // Remember it.
+ } else { // Otherwise, we must merge the sets.
+ FoundSet->mergeSetIn(*I, *this); // Merge in contents.
+ }
+ }
+
+ return FoundSet;
+}
+
+/// containsPointer - Return true if the specified location is represented by
+/// this alias set, false otherwise. This does not modify the AST object or
+/// alias sets.
+bool AliasSetTracker::containsPointer(Value *Ptr, unsigned Size) const {
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ if (!I->Forward && I->aliasesPointer(Ptr, Size, AA))
+ return true;
+ return false;
+}
+
+
+
+AliasSet *AliasSetTracker::findAliasSetForCallSite(CallSite CS) {
+ AliasSet *FoundSet = 0;
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ if (!I->Forward && I->aliasesCallSite(CS, AA)) {
+ if (FoundSet == 0) { // If this is the first alias set ptr can go into.
+ FoundSet = I; // Remember it.
+ } else if (!I->Forward) { // Otherwise, we must merge the sets.
+ FoundSet->mergeSetIn(*I, *this); // Merge in contents.
+ }
+ }
+
+ return FoundSet;
+}
+
+
+
+
+/// getAliasSetForPointer - Return the alias set that the specified pointer
+/// lives in.
+AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, unsigned Size,
+ bool *New) {
+ AliasSet::PointerRec &Entry = getEntryFor(Pointer);
+
+ // Check to see if the pointer is already known...
+ if (Entry.hasAliasSet()) {
+ Entry.updateSize(Size);
+ // Return the set!
+ return *Entry.getAliasSet(*this)->getForwardedTarget(*this);
+ } else if (AliasSet *AS = findAliasSetForPointer(Pointer, Size)) {
+ // Add it to the alias set it aliases...
+ AS->addPointer(*this, Entry, Size);
+ return *AS;
+ } else {
+ if (New) *New = true;
+ // Otherwise create a new alias set to hold the loaded pointer...
+ AliasSets.push_back(new AliasSet());
+ AliasSets.back().addPointer(*this, Entry, Size);
+ return AliasSets.back();
+ }
+}
+
+bool AliasSetTracker::add(Value *Ptr, unsigned Size) {
+ bool NewPtr;
+ addPointer(Ptr, Size, AliasSet::NoModRef, NewPtr);
+ return NewPtr;
+}
+
+
+bool AliasSetTracker::add(LoadInst *LI) {
+ bool NewPtr;
+ AliasSet &AS = addPointer(LI->getOperand(0),
+ AA.getTargetData().getTypeStoreSize(LI->getType()),
+ AliasSet::Refs, NewPtr);
+ if (LI->isVolatile()) AS.setVolatile();
+ return NewPtr;
+}
+
+bool AliasSetTracker::add(StoreInst *SI) {
+ bool NewPtr;
+ Value *Val = SI->getOperand(0);
+ AliasSet &AS = addPointer(SI->getOperand(1),
+ AA.getTargetData().getTypeStoreSize(Val->getType()),
+ AliasSet::Mods, NewPtr);
+ if (SI->isVolatile()) AS.setVolatile();
+ return NewPtr;
+}
+
+bool AliasSetTracker::add(FreeInst *FI) {
+ bool NewPtr;
+ addPointer(FI->getOperand(0), ~0, AliasSet::Mods, NewPtr);
+ return NewPtr;
+}
+
+bool AliasSetTracker::add(VAArgInst *VAAI) {
+ bool NewPtr;
+ addPointer(VAAI->getOperand(0), ~0, AliasSet::ModRef, NewPtr);
+ return NewPtr;
+}
+
+
+bool AliasSetTracker::add(CallSite CS) {
+ if (isa<DbgInfoIntrinsic>(CS.getInstruction()))
+ return true; // Ignore DbgInfo Intrinsics.
+ if (AA.doesNotAccessMemory(CS))
+ return true; // doesn't alias anything
+
+ AliasSet *AS = findAliasSetForCallSite(CS);
+ if (!AS) {
+ AliasSets.push_back(new AliasSet());
+ AS = &AliasSets.back();
+ AS->addCallSite(CS, AA);
+ return true;
+ } else {
+ AS->addCallSite(CS, AA);
+ return false;
+ }
+}
+
+bool AliasSetTracker::add(Instruction *I) {
+ // Dispatch to one of the other add methods...
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return add(LI);
+ else if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return add(SI);
+ else if (CallInst *CI = dyn_cast<CallInst>(I))
+ return add(CI);
+ else if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+ return add(II);
+ else if (FreeInst *FI = dyn_cast<FreeInst>(I))
+ return add(FI);
+ else if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
+ return add(VAAI);
+ return true;
+}
+
+void AliasSetTracker::add(BasicBlock &BB) {
+ for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
+ add(I);
+}
+
+void AliasSetTracker::add(const AliasSetTracker &AST) {
+ assert(&AA == &AST.AA &&
+ "Merging AliasSetTracker objects with different Alias Analyses!");
+
+ // Loop over all of the alias sets in AST, adding the pointers contained
+ // therein into the current alias sets. This can cause alias sets to be
+ // merged together in the current AST.
+ for (const_iterator I = AST.begin(), E = AST.end(); I != E; ++I)
+ if (!I->Forward) { // Ignore forwarding alias sets
+ AliasSet &AS = const_cast<AliasSet&>(*I);
+
+ // If there are any call sites in the alias set, add them to this AST.
+ for (unsigned i = 0, e = AS.CallSites.size(); i != e; ++i)
+ add(AS.CallSites[i]);
+
+ // Loop over all of the pointers in this alias set...
+ AliasSet::iterator I = AS.begin(), E = AS.end();
+ bool X;
+ for (; I != E; ++I) {
+ AliasSet &NewAS = addPointer(I.getPointer(), I.getSize(),
+ (AliasSet::AccessType)AS.AccessTy, X);
+ if (AS.isVolatile()) NewAS.setVolatile();
+ }
+ }
+}
+
+/// remove - Remove the specified (potentially non-empty) alias set from the
+/// tracker.
+void AliasSetTracker::remove(AliasSet &AS) {
+ // Drop all call sites.
+ AS.CallSites.clear();
+
+ // Clear the alias set.
+ unsigned NumRefs = 0;
+ while (!AS.empty()) {
+ AliasSet::PointerRec *P = AS.PtrList;
+
+ Value *ValToRemove = P->getValue();
+
+ // Unlink and delete entry from the list of values.
+ P->eraseFromList();
+
+ // Remember how many references need to be dropped.
+ ++NumRefs;
+
+ // Finally, remove the entry.
+ PointerMap.erase(ValToRemove);
+ }
+
+ // Stop using the alias set, removing it.
+ AS.RefCount -= NumRefs;
+ if (AS.RefCount == 0)
+ AS.removeFromTracker(*this);
+}
+
+bool AliasSetTracker::remove(Value *Ptr, unsigned Size) {
+ AliasSet *AS = findAliasSetForPointer(Ptr, Size);
+ if (!AS) return false;
+ remove(*AS);
+ return true;
+}
+
+bool AliasSetTracker::remove(LoadInst *LI) {
+ unsigned Size = AA.getTargetData().getTypeStoreSize(LI->getType());
+ AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size);
+ if (!AS) return false;
+ remove(*AS);
+ return true;
+}
+
+bool AliasSetTracker::remove(StoreInst *SI) {
+ unsigned Size =
+ AA.getTargetData().getTypeStoreSize(SI->getOperand(0)->getType());
+ AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size);
+ if (!AS) return false;
+ remove(*AS);
+ return true;
+}
+
+bool AliasSetTracker::remove(FreeInst *FI) {
+ AliasSet *AS = findAliasSetForPointer(FI->getOperand(0), ~0);
+ if (!AS) return false;
+ remove(*AS);
+ return true;
+}
+
+bool AliasSetTracker::remove(VAArgInst *VAAI) {
+ AliasSet *AS = findAliasSetForPointer(VAAI->getOperand(0), ~0);
+ if (!AS) return false;
+ remove(*AS);
+ return true;
+}
+
+bool AliasSetTracker::remove(CallSite CS) {
+ if (AA.doesNotAccessMemory(CS))
+ return false; // doesn't alias anything
+
+ AliasSet *AS = findAliasSetForCallSite(CS);
+ if (!AS) return false;
+ remove(*AS);
+ return true;
+}
+
+bool AliasSetTracker::remove(Instruction *I) {
+ // Dispatch to one of the other remove methods...
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return remove(LI);
+ else if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return remove(SI);
+ else if (CallInst *CI = dyn_cast<CallInst>(I))
+ return remove(CI);
+ else if (FreeInst *FI = dyn_cast<FreeInst>(I))
+ return remove(FI);
+ else if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
+ return remove(VAAI);
+ return true;
+}
+
+
+// deleteValue method - This method is used to remove a pointer value from the
+// AliasSetTracker entirely. It should be used when an instruction is deleted
+// from the program to update the AST. If you don't use this, you would have
+// dangling pointers to deleted instructions.
+//
+void AliasSetTracker::deleteValue(Value *PtrVal) {
+ // Notify the alias analysis implementation that this value is gone.
+ AA.deleteValue(PtrVal);
+
+ // If this is a call instruction, remove the callsite from the appropriate
+ // AliasSet.
+ CallSite CS = CallSite::get(PtrVal);
+ if (CS.getInstruction())
+ if (!AA.doesNotAccessMemory(CS))
+ if (AliasSet *AS = findAliasSetForCallSite(CS))
+ AS->removeCallSite(CS);
+
+ // First, look up the PointerRec for this pointer.
+ DenseMap<Value*, AliasSet::PointerRec*>::iterator I = PointerMap.find(PtrVal);
+ if (I == PointerMap.end()) return; // Noop
+
+ // If we found one, remove the pointer from the alias set it is in.
+ AliasSet::PointerRec *PtrValEnt = I->second;
+ AliasSet *AS = PtrValEnt->getAliasSet(*this);
+
+ // Unlink and delete from the list of values.
+ PtrValEnt->eraseFromList();
+
+ // Stop using the alias set.
+ AS->dropRef(*this);
+
+ PointerMap.erase(I);
+}
+
+// copyValue - This method should be used whenever a preexisting value in the
+// program is copied or cloned, introducing a new value. Note that it is ok for
+// clients that use this method to introduce the same value multiple times: if
+// the tracker already knows about a value, it will ignore the request.
+//
+void AliasSetTracker::copyValue(Value *From, Value *To) {
+ // Notify the alias analysis implementation that this value is copied.
+ AA.copyValue(From, To);
+
+ // First, look up the PointerRec for this pointer.
+ DenseMap<Value*, AliasSet::PointerRec*>::iterator I = PointerMap.find(From);
+ if (I == PointerMap.end())
+ return; // Noop
+ assert(I->second->hasAliasSet() && "Dead entry?");
+
+ AliasSet::PointerRec &Entry = getEntryFor(To);
+ if (Entry.hasAliasSet()) return; // Already in the tracker!
+
+ // Add it to the alias set it aliases...
+ I = PointerMap.find(From);
+ AliasSet *AS = I->second->getAliasSet(*this);
+ AS->addPointer(*this, Entry, I->second->getSize(), true);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// AliasSet/AliasSetTracker Printing Support
+//===----------------------------------------------------------------------===//
+
+void AliasSet::print(std::ostream &OS) const {
+ OS << " AliasSet[" << (void*)this << "," << RefCount << "] ";
+ OS << (AliasTy == MustAlias ? "must" : "may") << " alias, ";
+ switch (AccessTy) {
+ case NoModRef: OS << "No access "; break;
+ case Refs : OS << "Ref "; break;
+ case Mods : OS << "Mod "; break;
+ case ModRef : OS << "Mod/Ref "; break;
+ default: assert(0 && "Bad value for AccessTy!");
+ }
+ if (isVolatile()) OS << "[volatile] ";
+ if (Forward)
+ OS << " forwarding to " << (void*)Forward;
+
+
+ if (!empty()) {
+ OS << "Pointers: ";
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ if (I != begin()) OS << ", ";
+ WriteAsOperand(OS << "(", I.getPointer());
+ OS << ", " << I.getSize() << ")";
+ }
+ }
+ if (!CallSites.empty()) {
+ OS << "\n " << CallSites.size() << " Call Sites: ";
+ for (unsigned i = 0, e = CallSites.size(); i != e; ++i) {
+ if (i) OS << ", ";
+ WriteAsOperand(OS, CallSites[i].getCalledValue());
+ }
+ }
+ OS << "\n";
+}
+
+void AliasSetTracker::print(std::ostream &OS) const {
+ OS << "Alias Set Tracker: " << AliasSets.size() << " alias sets for "
+ << PointerMap.size() << " pointer values.\n";
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ I->print(OS);
+ OS << "\n";
+}
+
+void AliasSet::dump() const { print (cerr); }
+void AliasSetTracker::dump() const { print(cerr); }
+
+//===----------------------------------------------------------------------===//
+// AliasSetPrinter Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+ class VISIBILITY_HIDDEN AliasSetPrinter : public FunctionPass {
+ AliasSetTracker *Tracker;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ AliasSetPrinter() : FunctionPass(&ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<AliasAnalysis>();
+ }
+
+ virtual bool runOnFunction(Function &F) {
+ Tracker = new AliasSetTracker(getAnalysis<AliasAnalysis>());
+
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+ Tracker->add(&*I);
+ Tracker->print(cerr);
+ delete Tracker;
+ return false;
+ }
+ };
+}
+
+char AliasSetPrinter::ID = 0;
+static RegisterPass<AliasSetPrinter>
+X("print-alias-sets", "Alias Set Printer", false, true);
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
new file mode 100644
index 0000000..493c6e8
--- /dev/null
+++ b/lib/Analysis/Analysis.cpp
@@ -0,0 +1,44 @@
+//===-- Analysis.cpp ------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Analysis.h"
+#include "llvm/Analysis/Verifier.h"
+#include <fstream>
+#include <cstring>
+
+using namespace llvm;
+
+int LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,
+ char **OutMessages) {
+ std::string Messages;
+
+ int Result = verifyModule(*unwrap(M),
+ static_cast<VerifierFailureAction>(Action),
+ OutMessages? &Messages : 0);
+
+ if (OutMessages)
+ *OutMessages = strdup(Messages.c_str());
+
+ return Result;
+}
+
+int LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action) {
+ return verifyFunction(*unwrap<Function>(Fn),
+ static_cast<VerifierFailureAction>(Action));
+}
+
+void LLVMViewFunctionCFG(LLVMValueRef Fn) {
+ Function *F = unwrap<Function>(Fn);
+ F->viewCFG();
+}
+
+void LLVMViewFunctionCFGOnly(LLVMValueRef Fn) {
+ Function *F = unwrap<Function>(Fn);
+ F->viewCFGOnly();
+}
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
new file mode 100644
index 0000000..d062045
--- /dev/null
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -0,0 +1,838 @@
+//===- BasicAliasAnalysis.cpp - Local Alias Analysis Impl -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the default implementation of the Alias Analysis interface
+// that simply implements a few identities (two different globals cannot alias,
+// etc), but otherwise does no analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/ManagedStatic.h"
+#include <algorithm>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Useful predicates
+//===----------------------------------------------------------------------===//
+
+static const User *isGEP(const Value *V) {
+ if (isa<GetElementPtrInst>(V) ||
+ (isa<ConstantExpr>(V) &&
+ cast<ConstantExpr>(V)->getOpcode() == Instruction::GetElementPtr))
+ return cast<User>(V);
+ return 0;
+}
+
+static const Value *GetGEPOperands(const Value *V,
+ SmallVector<Value*, 16> &GEPOps) {
+ assert(GEPOps.empty() && "Expect empty list to populate!");
+ GEPOps.insert(GEPOps.end(), cast<User>(V)->op_begin()+1,
+ cast<User>(V)->op_end());
+
+ // Accumulate all of the chained indexes into the operand array
+ V = cast<User>(V)->getOperand(0);
+
+ while (const User *G = isGEP(V)) {
+ if (!isa<Constant>(GEPOps[0]) || isa<GlobalValue>(GEPOps[0]) ||
+ !cast<Constant>(GEPOps[0])->isNullValue())
+ break; // Don't handle folding arbitrary pointer offsets yet...
+ GEPOps.erase(GEPOps.begin()); // Drop the zero index
+ GEPOps.insert(GEPOps.begin(), G->op_begin()+1, G->op_end());
+ V = G->getOperand(0);
+ }
+ return V;
+}
+
+/// isKnownNonNull - Return true if we know that the specified value is never
+/// null.
+static bool isKnownNonNull(const Value *V) {
+ // Alloca never returns null, malloc might.
+ if (isa<AllocaInst>(V)) return true;
+
+ // A byval argument is never null.
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return A->hasByValAttr();
+
+ // Global values are not null unless extern weak.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+ return !GV->hasExternalWeakLinkage();
+ return false;
+}
+
+/// isNonEscapingLocalObject - Return true if the pointer is to a function-local
+/// object that never escapes from the function.
+static bool isNonEscapingLocalObject(const Value *V) {
+ // If this is a local allocation, check to see if it escapes.
+ if (isa<AllocationInst>(V) || isNoAliasCall(V))
+ return !PointerMayBeCaptured(V, false);
+
+ // If this is an argument that corresponds to a byval or noalias argument,
+ // then it has not escaped before entering the function. Check if it escapes
+ // inside the function.
+ if (const Argument *A = dyn_cast<Argument>(V))
+ if (A->hasByValAttr() || A->hasNoAliasAttr()) {
+ // Don't bother analyzing arguments already known not to escape.
+ if (A->hasNoCaptureAttr())
+ return true;
+ return !PointerMayBeCaptured(V, false);
+ }
+ return false;
+}
+
+
+/// isObjectSmallerThan - Return true if we can prove that the object specified
+/// by V is smaller than Size.
+static bool isObjectSmallerThan(const Value *V, unsigned Size,
+ const TargetData &TD) {
+ const Type *AccessTy;
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+ AccessTy = GV->getType()->getElementType();
+ } else if (const AllocationInst *AI = dyn_cast<AllocationInst>(V)) {
+ if (!AI->isArrayAllocation())
+ AccessTy = AI->getType()->getElementType();
+ else
+ return false;
+ } else if (const Argument *A = dyn_cast<Argument>(V)) {
+ if (A->hasByValAttr())
+ AccessTy = cast<PointerType>(A->getType())->getElementType();
+ else
+ return false;
+ } else {
+ return false;
+ }
+
+ if (AccessTy->isSized())
+ return TD.getTypeAllocSize(AccessTy) < Size;
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// NoAA Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// NoAA - This class implements the -no-aa pass, which always returns "I
+ /// don't know" for alias queries. NoAA is unlike other alias analysis
+ /// implementations, in that it does not chain to a previous analysis. As
+ /// such it doesn't follow many of the rules that other alias analyses must.
+ ///
+ struct VISIBILITY_HIDDEN NoAA : public ImmutablePass, public AliasAnalysis {
+ static char ID; // Class identification, replacement for typeinfo
+ NoAA() : ImmutablePass(&ID) {}
+ explicit NoAA(void *PID) : ImmutablePass(PID) { }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetData>();
+ }
+
+ virtual void initializePass() {
+ TD = &getAnalysis<TargetData>();
+ }
+
+ virtual AliasResult alias(const Value *V1, unsigned V1Size,
+ const Value *V2, unsigned V2Size) {
+ return MayAlias;
+ }
+
+ virtual void getArgumentAccesses(Function *F, CallSite CS,
+ std::vector<PointerAccessInfo> &Info) {
+ assert(0 && "This method may not be called on this function!");
+ }
+
+ virtual void getMustAliases(Value *P, std::vector<Value*> &RetVals) { }
+ virtual bool pointsToConstantMemory(const Value *P) { return false; }
+ virtual ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size) {
+ return ModRef;
+ }
+ virtual ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) {
+ return ModRef;
+ }
+ virtual bool hasNoModRefInfoForCalls() const { return true; }
+
+ virtual void deleteValue(Value *V) {}
+ virtual void copyValue(Value *From, Value *To) {}
+ };
+} // End of anonymous namespace
+
+// Register this pass...
+char NoAA::ID = 0;
+static RegisterPass<NoAA>
+U("no-aa", "No Alias Analysis (always returns 'may' alias)", true, true);
+
+// Declare that we implement the AliasAnalysis interface
+static RegisterAnalysisGroup<AliasAnalysis> V(U);
+
+ImmutablePass *llvm::createNoAAPass() { return new NoAA(); }
+
+//===----------------------------------------------------------------------===//
+// BasicAA Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// BasicAliasAnalysis - This is the default alias analysis implementation.
+ /// Because it doesn't chain to a previous alias analysis (like -no-aa), it
+ /// derives from the NoAA class.
+ struct VISIBILITY_HIDDEN BasicAliasAnalysis : public NoAA {
+ static char ID; // Class identification, replacement for typeinfo
+ BasicAliasAnalysis() : NoAA(&ID) {}
+ AliasResult alias(const Value *V1, unsigned V1Size,
+ const Value *V2, unsigned V2Size);
+
+ ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size);
+ ModRefResult getModRefInfo(CallSite CS1, CallSite CS2);
+
+ /// hasNoModRefInfoForCalls - We can provide mod/ref information against
+ /// non-escaping allocations.
+ virtual bool hasNoModRefInfoForCalls() const { return false; }
+
+ /// pointsToConstantMemory - Chase pointers until we find a (constant
+ /// global) or not.
+ bool pointsToConstantMemory(const Value *P);
+
+ private:
+ // CheckGEPInstructions - Check two GEP instructions with known
+ // must-aliasing base pointers. This checks to see if the index expressions
+ // preclude the pointers from aliasing...
+ AliasResult
+ CheckGEPInstructions(const Type* BasePtr1Ty,
+ Value **GEP1Ops, unsigned NumGEP1Ops, unsigned G1Size,
+ const Type *BasePtr2Ty,
+ Value **GEP2Ops, unsigned NumGEP2Ops, unsigned G2Size);
+ };
+} // End of anonymous namespace
+
+// Register this pass...
+char BasicAliasAnalysis::ID = 0;
+static RegisterPass<BasicAliasAnalysis>
+X("basicaa", "Basic Alias Analysis (default AA impl)", false, true);
+
+// Declare that we implement the AliasAnalysis interface
+static RegisterAnalysisGroup<AliasAnalysis, true> Y(X);
+
+ImmutablePass *llvm::createBasicAliasAnalysisPass() {
+ return new BasicAliasAnalysis();
+}
+
+
+/// pointsToConstantMemory - Chase pointers until we find a (constant
+/// global) or not.
+bool BasicAliasAnalysis::pointsToConstantMemory(const Value *P) {
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(P->getUnderlyingObject()))
+ return GV->isConstant();
+ return false;
+}
+
+
+// getModRefInfo - Check to see if the specified callsite can clobber the
+// specified memory object. Since we only look at local properties of this
+// function, we really can't say much about this query. We do, however, use
+// simple "address taken" analysis on local objects.
+//
+AliasAnalysis::ModRefResult
+BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
+ if (!isa<Constant>(P)) {
+ const Value *Object = P->getUnderlyingObject();
+
+ // If this is a tail call and P points to a stack location, we know that
+ // the tail call cannot access or modify the local stack.
+ // We cannot exclude byval arguments here; these belong to the caller of
+ // the current function not to the current function, and a tail callee
+ // may reference them.
+ if (isa<AllocaInst>(Object))
+ if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
+ if (CI->isTailCall())
+ return NoModRef;
+
+ // If the pointer is to a locally allocated object that does not escape,
+ // then the call can not mod/ref the pointer unless the call takes the
+ // argument without capturing it.
+ if (isNonEscapingLocalObject(Object) && CS.getInstruction() != Object) {
+ bool passedAsArg = false;
+ // TODO: Eventually only check 'nocapture' arguments.
+ for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+ CI != CE; ++CI)
+ if (isa<PointerType>((*CI)->getType()) &&
+ alias(cast<Value>(CI), ~0U, P, ~0U) != NoAlias)
+ passedAsArg = true;
+
+ if (!passedAsArg)
+ return NoModRef;
+ }
+ }
+
+ // The AliasAnalysis base class has some smarts, lets use them.
+ return AliasAnalysis::getModRefInfo(CS, P, Size);
+}
+
+
+AliasAnalysis::ModRefResult
+BasicAliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) {
+ // If CS1 or CS2 are readnone, they don't interact.
+ ModRefBehavior CS1B = AliasAnalysis::getModRefBehavior(CS1);
+ if (CS1B == DoesNotAccessMemory) return NoModRef;
+
+ ModRefBehavior CS2B = AliasAnalysis::getModRefBehavior(CS2);
+ if (CS2B == DoesNotAccessMemory) return NoModRef;
+
+ // If they both only read from memory, just return ref.
+ if (CS1B == OnlyReadsMemory && CS2B == OnlyReadsMemory)
+ return Ref;
+
+ // Otherwise, fall back to NoAA (mod+ref).
+ return NoAA::getModRefInfo(CS1, CS2);
+}
+
+
+// alias - Provide a bunch of ad-hoc rules to disambiguate in common cases, such
+// as array references.
+//
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::alias(const Value *V1, unsigned V1Size,
+ const Value *V2, unsigned V2Size) {
+ // Strip off any constant expression casts if they exist
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V1))
+ if (CE->isCast() && isa<PointerType>(CE->getOperand(0)->getType()))
+ V1 = CE->getOperand(0);
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V2))
+ if (CE->isCast() && isa<PointerType>(CE->getOperand(0)->getType()))
+ V2 = CE->getOperand(0);
+
+ // Are we checking for alias of the same value?
+ if (V1 == V2) return MustAlias;
+
+ if (!isa<PointerType>(V1->getType()) || !isa<PointerType>(V2->getType()))
+ return NoAlias; // Scalars cannot alias each other
+
+ // Strip off cast instructions. Since V1 and V2 are pointers, they must be
+ // pointer<->pointer bitcasts.
+ if (const BitCastInst *I = dyn_cast<BitCastInst>(V1))
+ return alias(I->getOperand(0), V1Size, V2, V2Size);
+ if (const BitCastInst *I = dyn_cast<BitCastInst>(V2))
+ return alias(V1, V1Size, I->getOperand(0), V2Size);
+
+ // Figure out what objects these things are pointing to if we can.
+ const Value *O1 = V1->getUnderlyingObject();
+ const Value *O2 = V2->getUnderlyingObject();
+
+ if (O1 != O2) {
+ // If V1/V2 point to two different objects we know that we have no alias.
+ if (isIdentifiedObject(O1) && isIdentifiedObject(O2))
+ return NoAlias;
+
+ // Arguments can't alias with local allocations or noalias calls.
+ if ((isa<Argument>(O1) && (isa<AllocationInst>(O2) || isNoAliasCall(O2))) ||
+ (isa<Argument>(O2) && (isa<AllocationInst>(O1) || isNoAliasCall(O1))))
+ return NoAlias;
+
+ // Most objects can't alias null.
+ if ((isa<ConstantPointerNull>(V2) && isKnownNonNull(O1)) ||
+ (isa<ConstantPointerNull>(V1) && isKnownNonNull(O2)))
+ return NoAlias;
+ }
+
+ // If the size of one access is larger than the entire object on the other
+ // side, then we know such behavior is undefined and can assume no alias.
+ const TargetData &TD = getTargetData();
+ if ((V1Size != ~0U && isObjectSmallerThan(O2, V1Size, TD)) ||
+ (V2Size != ~0U && isObjectSmallerThan(O1, V2Size, TD)))
+ return NoAlias;
+
+ // If one pointer is the result of a call/invoke and the other is a
+ // non-escaping local object, then we know the object couldn't escape to a
+ // point where the call could return it.
+ if ((isa<CallInst>(O1) || isa<InvokeInst>(O1)) &&
+ isNonEscapingLocalObject(O2) && O1 != O2)
+ return NoAlias;
+ if ((isa<CallInst>(O2) || isa<InvokeInst>(O2)) &&
+ isNonEscapingLocalObject(O1) && O1 != O2)
+ return NoAlias;
+
+ // If we have two gep instructions with must-alias'ing base pointers, figure
+ // out if the indexes to the GEP tell us anything about the derived pointer.
+ // Note that we also handle chains of getelementptr instructions as well as
+ // constant expression getelementptrs here.
+ //
+ if (isGEP(V1) && isGEP(V2)) {
+ const User *GEP1 = cast<User>(V1);
+ const User *GEP2 = cast<User>(V2);
+
+ // If V1 and V2 are identical GEPs, just recurse down on both of them.
+ // This allows us to analyze things like:
+ // P = gep A, 0, i, 1
+ // Q = gep B, 0, i, 1
+ // by just analyzing A and B. This is even safe for variable indices.
+ if (GEP1->getType() == GEP2->getType() &&
+ GEP1->getNumOperands() == GEP2->getNumOperands() &&
+ GEP1->getOperand(0)->getType() == GEP2->getOperand(0)->getType() &&
+ // All operands are the same, ignoring the base.
+ std::equal(GEP1->op_begin()+1, GEP1->op_end(), GEP2->op_begin()+1))
+ return alias(GEP1->getOperand(0), V1Size, GEP2->getOperand(0), V2Size);
+
+
+ // Drill down into the first non-gep value, to test for must-aliasing of
+ // the base pointers.
+ while (isGEP(GEP1->getOperand(0)) &&
+ GEP1->getOperand(1) ==
+ Constant::getNullValue(GEP1->getOperand(1)->getType()))
+ GEP1 = cast<User>(GEP1->getOperand(0));
+ const Value *BasePtr1 = GEP1->getOperand(0);
+
+ while (isGEP(GEP2->getOperand(0)) &&
+ GEP2->getOperand(1) ==
+ Constant::getNullValue(GEP2->getOperand(1)->getType()))
+ GEP2 = cast<User>(GEP2->getOperand(0));
+ const Value *BasePtr2 = GEP2->getOperand(0);
+
+ // Do the base pointers alias?
+ AliasResult BaseAlias = alias(BasePtr1, ~0U, BasePtr2, ~0U);
+ if (BaseAlias == NoAlias) return NoAlias;
+ if (BaseAlias == MustAlias) {
+ // If the base pointers alias each other exactly, check to see if we can
+ // figure out anything about the resultant pointers, to try to prove
+ // non-aliasing.
+
+ // Collect all of the chained GEP operands together into one simple place
+ SmallVector<Value*, 16> GEP1Ops, GEP2Ops;
+ BasePtr1 = GetGEPOperands(V1, GEP1Ops);
+ BasePtr2 = GetGEPOperands(V2, GEP2Ops);
+
+ // If GetGEPOperands were able to fold to the same must-aliased pointer,
+ // do the comparison.
+ if (BasePtr1 == BasePtr2) {
+ AliasResult GAlias =
+ CheckGEPInstructions(BasePtr1->getType(),
+ &GEP1Ops[0], GEP1Ops.size(), V1Size,
+ BasePtr2->getType(),
+ &GEP2Ops[0], GEP2Ops.size(), V2Size);
+ if (GAlias != MayAlias)
+ return GAlias;
+ }
+ }
+ }
+
+ // Check to see if these two pointers are related by a getelementptr
+ // instruction. If one pointer is a GEP with a non-zero index of the other
+ // pointer, we know they cannot alias.
+ //
+ if (isGEP(V2)) {
+ std::swap(V1, V2);
+ std::swap(V1Size, V2Size);
+ }
+
+ if (V1Size != ~0U && V2Size != ~0U)
+ if (isGEP(V1)) {
+ SmallVector<Value*, 16> GEPOperands;
+ const Value *BasePtr = GetGEPOperands(V1, GEPOperands);
+
+ AliasResult R = alias(BasePtr, V1Size, V2, V2Size);
+ if (R == MustAlias) {
+ // If there is at least one non-zero constant index, we know they cannot
+ // alias.
+ bool ConstantFound = false;
+ bool AllZerosFound = true;
+ for (unsigned i = 0, e = GEPOperands.size(); i != e; ++i)
+ if (const Constant *C = dyn_cast<Constant>(GEPOperands[i])) {
+ if (!C->isNullValue()) {
+ ConstantFound = true;
+ AllZerosFound = false;
+ break;
+ }
+ } else {
+ AllZerosFound = false;
+ }
+
+ // If we have getelementptr <ptr>, 0, 0, 0, 0, ... and V2 must aliases
+ // the ptr, the end result is a must alias also.
+ if (AllZerosFound)
+ return MustAlias;
+
+ if (ConstantFound) {
+ if (V2Size <= 1 && V1Size <= 1) // Just pointer check?
+ return NoAlias;
+
+ // Otherwise we have to check to see that the distance is more than
+ // the size of the argument... build an index vector that is equal to
+ // the arguments provided, except substitute 0's for any variable
+ // indexes we find...
+ if (cast<PointerType>(
+ BasePtr->getType())->getElementType()->isSized()) {
+ for (unsigned i = 0; i != GEPOperands.size(); ++i)
+ if (!isa<ConstantInt>(GEPOperands[i]))
+ GEPOperands[i] =
+ Constant::getNullValue(GEPOperands[i]->getType());
+ int64_t Offset =
+ getTargetData().getIndexedOffset(BasePtr->getType(),
+ &GEPOperands[0],
+ GEPOperands.size());
+
+ if (Offset >= (int64_t)V2Size || Offset <= -(int64_t)V1Size)
+ return NoAlias;
+ }
+ }
+ }
+ }
+
+ return MayAlias;
+}
+
+// This function is used to determine if the indices of two GEP instructions are
+// equal. V1 and V2 are the indices.
+static bool IndexOperandsEqual(Value *V1, Value *V2) {
+ if (V1->getType() == V2->getType())
+ return V1 == V2;
+ if (Constant *C1 = dyn_cast<Constant>(V1))
+ if (Constant *C2 = dyn_cast<Constant>(V2)) {
+ // Sign extend the constants to long types, if necessary
+ if (C1->getType() != Type::Int64Ty)
+ C1 = ConstantExpr::getSExt(C1, Type::Int64Ty);
+ if (C2->getType() != Type::Int64Ty)
+ C2 = ConstantExpr::getSExt(C2, Type::Int64Ty);
+ return C1 == C2;
+ }
+ return false;
+}
+
+/// CheckGEPInstructions - Check two GEP instructions with known must-aliasing
+/// base pointers. This checks to see if the index expressions preclude the
+/// pointers from aliasing...
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::CheckGEPInstructions(
+ const Type* BasePtr1Ty, Value **GEP1Ops, unsigned NumGEP1Ops, unsigned G1S,
+ const Type *BasePtr2Ty, Value **GEP2Ops, unsigned NumGEP2Ops, unsigned G2S) {
+ // We currently can't handle the case when the base pointers have different
+ // primitive types. Since this is uncommon anyway, we are happy being
+ // extremely conservative.
+ if (BasePtr1Ty != BasePtr2Ty)
+ return MayAlias;
+
+ const PointerType *GEPPointerTy = cast<PointerType>(BasePtr1Ty);
+
+ // Find the (possibly empty) initial sequence of equal values... which are not
+ // necessarily constants.
+ unsigned NumGEP1Operands = NumGEP1Ops, NumGEP2Operands = NumGEP2Ops;
+ unsigned MinOperands = std::min(NumGEP1Operands, NumGEP2Operands);
+ unsigned MaxOperands = std::max(NumGEP1Operands, NumGEP2Operands);
+ unsigned UnequalOper = 0;
+ while (UnequalOper != MinOperands &&
+ IndexOperandsEqual(GEP1Ops[UnequalOper], GEP2Ops[UnequalOper])) {
+ // Advance through the type as we go...
+ ++UnequalOper;
+ if (const CompositeType *CT = dyn_cast<CompositeType>(BasePtr1Ty))
+ BasePtr1Ty = CT->getTypeAtIndex(GEP1Ops[UnequalOper-1]);
+ else {
+ // If all operands equal each other, then the derived pointers must
+ // alias each other...
+ BasePtr1Ty = 0;
+ assert(UnequalOper == NumGEP1Operands && UnequalOper == NumGEP2Operands &&
+ "Ran out of type nesting, but not out of operands?");
+ return MustAlias;
+ }
+ }
+
+ // If we have seen all constant operands, and run out of indexes on one of the
+ // getelementptrs, check to see if the tail of the leftover one is all zeros.
+ // If so, return mustalias.
+ if (UnequalOper == MinOperands) {
+ if (NumGEP1Ops < NumGEP2Ops) {
+ std::swap(GEP1Ops, GEP2Ops);
+ std::swap(NumGEP1Ops, NumGEP2Ops);
+ }
+
+ bool AllAreZeros = true;
+ for (unsigned i = UnequalOper; i != MaxOperands; ++i)
+ if (!isa<Constant>(GEP1Ops[i]) ||
+ !cast<Constant>(GEP1Ops[i])->isNullValue()) {
+ AllAreZeros = false;
+ break;
+ }
+ if (AllAreZeros) return MustAlias;
+ }
+
+
+ // So now we know that the indexes derived from the base pointers,
+ // which are known to alias, are different. We can still determine a
+ // no-alias result if there are differing constant pairs in the index
+ // chain. For example:
+ // A[i][0] != A[j][1] iff (&A[0][1]-&A[0][0] >= std::max(G1S, G2S))
+ //
+ // We have to be careful here about array accesses. In particular, consider:
+ // A[1][0] vs A[0][i]
+ // In this case, we don't *know* that the array will be accessed in bounds:
+ // the index could even be negative. Because of this, we have to
+ // conservatively *give up* and return may alias. We disregard differing
+ // array subscripts that are followed by a variable index without going
+ // through a struct.
+ //
+ unsigned SizeMax = std::max(G1S, G2S);
+ if (SizeMax == ~0U) return MayAlias; // Avoid frivolous work.
+
+ // Scan for the first operand that is constant and unequal in the
+ // two getelementptrs...
+ unsigned FirstConstantOper = UnequalOper;
+ for (; FirstConstantOper != MinOperands; ++FirstConstantOper) {
+ const Value *G1Oper = GEP1Ops[FirstConstantOper];
+ const Value *G2Oper = GEP2Ops[FirstConstantOper];
+
+ if (G1Oper != G2Oper) // Found non-equal constant indexes...
+ if (Constant *G1OC = dyn_cast<ConstantInt>(const_cast<Value*>(G1Oper)))
+ if (Constant *G2OC = dyn_cast<ConstantInt>(const_cast<Value*>(G2Oper))){
+ if (G1OC->getType() != G2OC->getType()) {
+ // Sign extend both operands to long.
+ if (G1OC->getType() != Type::Int64Ty)
+ G1OC = ConstantExpr::getSExt(G1OC, Type::Int64Ty);
+ if (G2OC->getType() != Type::Int64Ty)
+ G2OC = ConstantExpr::getSExt(G2OC, Type::Int64Ty);
+ GEP1Ops[FirstConstantOper] = G1OC;
+ GEP2Ops[FirstConstantOper] = G2OC;
+ }
+
+ if (G1OC != G2OC) {
+ // Handle the "be careful" case above: if this is an array/vector
+ // subscript, scan for a subsequent variable array index.
+ if (const SequentialType *STy =
+ dyn_cast<SequentialType>(BasePtr1Ty)) {
+ const Type *NextTy = STy;
+ bool isBadCase = false;
+
+ for (unsigned Idx = FirstConstantOper;
+ Idx != MinOperands && isa<SequentialType>(NextTy); ++Idx) {
+ const Value *V1 = GEP1Ops[Idx], *V2 = GEP2Ops[Idx];
+ if (!isa<Constant>(V1) || !isa<Constant>(V2)) {
+ isBadCase = true;
+ break;
+ }
+ // If the array is indexed beyond the bounds of the static type
+ // at this level, it will also fall into the "be careful" case.
+ // It would theoretically be possible to analyze these cases,
+ // but for now just be conservatively correct.
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(STy))
+ if (cast<ConstantInt>(G1OC)->getZExtValue() >=
+ ATy->getNumElements() ||
+ cast<ConstantInt>(G2OC)->getZExtValue() >=
+ ATy->getNumElements()) {
+ isBadCase = true;
+ break;
+ }
+ if (const VectorType *VTy = dyn_cast<VectorType>(STy))
+ if (cast<ConstantInt>(G1OC)->getZExtValue() >=
+ VTy->getNumElements() ||
+ cast<ConstantInt>(G2OC)->getZExtValue() >=
+ VTy->getNumElements()) {
+ isBadCase = true;
+ break;
+ }
+ STy = cast<SequentialType>(NextTy);
+ NextTy = cast<SequentialType>(NextTy)->getElementType();
+ }
+
+ if (isBadCase) G1OC = 0;
+ }
+
+ // Make sure they are comparable (ie, not constant expressions), and
+ // make sure the GEP with the smaller leading constant is GEP1.
+ if (G1OC) {
+ Constant *Compare = ConstantExpr::getICmp(ICmpInst::ICMP_SGT,
+ G1OC, G2OC);
+ if (ConstantInt *CV = dyn_cast<ConstantInt>(Compare)) {
+ if (CV->getZExtValue()) { // If they are comparable and G2 > G1
+ std::swap(GEP1Ops, GEP2Ops); // Make GEP1 < GEP2
+ std::swap(NumGEP1Ops, NumGEP2Ops);
+ }
+ break;
+ }
+ }
+ }
+ }
+ BasePtr1Ty = cast<CompositeType>(BasePtr1Ty)->getTypeAtIndex(G1Oper);
+ }
+
+ // No shared constant operands, and we ran out of common operands. At this
+ // point, the GEP instructions have run through all of their operands, and we
+ // haven't found evidence that there are any deltas between the GEP's.
+ // However, one GEP may have more operands than the other. If this is the
+ // case, there may still be hope. Check this now.
+ if (FirstConstantOper == MinOperands) {
+ // Make GEP1Ops be the longer one if there is a longer one.
+ if (NumGEP1Ops < NumGEP2Ops) {
+ std::swap(GEP1Ops, GEP2Ops);
+ std::swap(NumGEP1Ops, NumGEP2Ops);
+ }
+
+ // Is there anything to check?
+ if (NumGEP1Ops > MinOperands) {
+ for (unsigned i = FirstConstantOper; i != MaxOperands; ++i)
+ if (isa<ConstantInt>(GEP1Ops[i]) &&
+ !cast<ConstantInt>(GEP1Ops[i])->isZero()) {
+ // Yup, there's a constant in the tail. Set all variables to
+ // constants in the GEP instruction to make it suitable for
+ // TargetData::getIndexedOffset.
+ for (i = 0; i != MaxOperands; ++i)
+ if (!isa<ConstantInt>(GEP1Ops[i]))
+ GEP1Ops[i] = Constant::getNullValue(GEP1Ops[i]->getType());
+ // Okay, now get the offset. This is the relative offset for the full
+ // instruction.
+ const TargetData &TD = getTargetData();
+ int64_t Offset1 = TD.getIndexedOffset(GEPPointerTy, GEP1Ops,
+ NumGEP1Ops);
+
+ // Now check without any constants at the end.
+ int64_t Offset2 = TD.getIndexedOffset(GEPPointerTy, GEP1Ops,
+ MinOperands);
+
+ // Make sure we compare the absolute difference.
+ if (Offset1 > Offset2)
+ std::swap(Offset1, Offset2);
+
+ // If the tail provided a bit enough offset, return noalias!
+ if ((uint64_t)(Offset2-Offset1) >= SizeMax)
+ return NoAlias;
+ // Otherwise break - we don't look for another constant in the tail.
+ break;
+ }
+ }
+
+ // Couldn't find anything useful.
+ return MayAlias;
+ }
+
+ // If there are non-equal constants arguments, then we can figure
+ // out a minimum known delta between the two index expressions... at
+ // this point we know that the first constant index of GEP1 is less
+ // than the first constant index of GEP2.
+
+ // Advance BasePtr[12]Ty over this first differing constant operand.
+ BasePtr2Ty = cast<CompositeType>(BasePtr1Ty)->
+ getTypeAtIndex(GEP2Ops[FirstConstantOper]);
+ BasePtr1Ty = cast<CompositeType>(BasePtr1Ty)->
+ getTypeAtIndex(GEP1Ops[FirstConstantOper]);
+
+ // We are going to be using TargetData::getIndexedOffset to determine the
+ // offset that each of the GEP's is reaching. To do this, we have to convert
+ // all variable references to constant references. To do this, we convert the
+ // initial sequence of array subscripts into constant zeros to start with.
+ const Type *ZeroIdxTy = GEPPointerTy;
+ for (unsigned i = 0; i != FirstConstantOper; ++i) {
+ if (!isa<StructType>(ZeroIdxTy))
+ GEP1Ops[i] = GEP2Ops[i] = Constant::getNullValue(Type::Int32Ty);
+
+ if (const CompositeType *CT = dyn_cast<CompositeType>(ZeroIdxTy))
+ ZeroIdxTy = CT->getTypeAtIndex(GEP1Ops[i]);
+ }
+
+ // We know that GEP1Ops[FirstConstantOper] & GEP2Ops[FirstConstantOper] are ok
+
+ // Loop over the rest of the operands...
+ for (unsigned i = FirstConstantOper+1; i != MaxOperands; ++i) {
+ const Value *Op1 = i < NumGEP1Ops ? GEP1Ops[i] : 0;
+ const Value *Op2 = i < NumGEP2Ops ? GEP2Ops[i] : 0;
+ // If they are equal, use a zero index...
+ if (Op1 == Op2 && BasePtr1Ty == BasePtr2Ty) {
+ if (!isa<ConstantInt>(Op1))
+ GEP1Ops[i] = GEP2Ops[i] = Constant::getNullValue(Op1->getType());
+ // Otherwise, just keep the constants we have.
+ } else {
+ if (Op1) {
+ if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ // If this is an array index, make sure the array element is in range.
+ if (const ArrayType *AT = dyn_cast<ArrayType>(BasePtr1Ty)) {
+ if (Op1C->getZExtValue() >= AT->getNumElements())
+ return MayAlias; // Be conservative with out-of-range accesses
+ } else if (const VectorType *VT = dyn_cast<VectorType>(BasePtr1Ty)) {
+ if (Op1C->getZExtValue() >= VT->getNumElements())
+ return MayAlias; // Be conservative with out-of-range accesses
+ }
+
+ } else {
+ // GEP1 is known to produce a value less than GEP2. To be
+ // conservatively correct, we must assume the largest possible
+ // constant is used in this position. This cannot be the initial
+ // index to the GEP instructions (because we know we have at least one
+ // element before this one with the different constant arguments), so
+ // we know that the current index must be into either a struct or
+ // array. Because we know it's not constant, this cannot be a
+ // structure index. Because of this, we can calculate the maximum
+ // value possible.
+ //
+ if (const ArrayType *AT = dyn_cast<ArrayType>(BasePtr1Ty))
+ GEP1Ops[i] = ConstantInt::get(Type::Int64Ty,AT->getNumElements()-1);
+ else if (const VectorType *VT = dyn_cast<VectorType>(BasePtr1Ty))
+ GEP1Ops[i] = ConstantInt::get(Type::Int64Ty,VT->getNumElements()-1);
+ }
+ }
+
+ if (Op2) {
+ if (const ConstantInt *Op2C = dyn_cast<ConstantInt>(Op2)) {
+ // If this is an array index, make sure the array element is in range.
+ if (const ArrayType *AT = dyn_cast<ArrayType>(BasePtr2Ty)) {
+ if (Op2C->getZExtValue() >= AT->getNumElements())
+ return MayAlias; // Be conservative with out-of-range accesses
+ } else if (const VectorType *VT = dyn_cast<VectorType>(BasePtr2Ty)) {
+ if (Op2C->getZExtValue() >= VT->getNumElements())
+ return MayAlias; // Be conservative with out-of-range accesses
+ }
+ } else { // Conservatively assume the minimum value for this index
+ GEP2Ops[i] = Constant::getNullValue(Op2->getType());
+ }
+ }
+ }
+
+ if (BasePtr1Ty && Op1) {
+ if (const CompositeType *CT = dyn_cast<CompositeType>(BasePtr1Ty))
+ BasePtr1Ty = CT->getTypeAtIndex(GEP1Ops[i]);
+ else
+ BasePtr1Ty = 0;
+ }
+
+ if (BasePtr2Ty && Op2) {
+ if (const CompositeType *CT = dyn_cast<CompositeType>(BasePtr2Ty))
+ BasePtr2Ty = CT->getTypeAtIndex(GEP2Ops[i]);
+ else
+ BasePtr2Ty = 0;
+ }
+ }
+
+ if (GEPPointerTy->getElementType()->isSized()) {
+ int64_t Offset1 =
+ getTargetData().getIndexedOffset(GEPPointerTy, GEP1Ops, NumGEP1Ops);
+ int64_t Offset2 =
+ getTargetData().getIndexedOffset(GEPPointerTy, GEP2Ops, NumGEP2Ops);
+ assert(Offset1 != Offset2 &&
+ "There is at least one different constant here!");
+
+ // Make sure we compare the absolute difference.
+ if (Offset1 > Offset2)
+ std::swap(Offset1, Offset2);
+
+ if ((uint64_t)(Offset2-Offset1) >= SizeMax) {
+ //cerr << "Determined that these two GEP's don't alias ["
+ // << SizeMax << " bytes]: \n" << *GEP1 << *GEP2;
+ return NoAlias;
+ }
+ }
+ return MayAlias;
+}
+
+// Make sure that anything that uses AliasAnalysis pulls in this file...
+DEFINING_FILE_FOR(BasicAliasAnalysis)
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
new file mode 100644
index 0000000..143220c
--- /dev/null
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -0,0 +1,221 @@
+//===- CFGPrinter.cpp - DOT printer for the control flow graph ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a '-dot-cfg' analysis pass, which emits the
+// cfg.<fnname>.dot file for each function in the program, with a graph of the
+// CFG for that function.
+//
+// The other main feature of this file is that it implements the
+// Function::viewCFG method, which is useful for debugging passes which operate
+// on the CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/CFGPrinter.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Config/config.h"
+#include <iosfwd>
+#include <sstream>
+#include <fstream>
+using namespace llvm;
+
+/// CFGOnly flag - This is used to control whether or not the CFG graph printer
+/// prints out the contents of basic blocks or not. This is acceptable because
+/// this code is only really used for debugging purposes.
+///
+static bool CFGOnly = false;
+
+namespace llvm {
+template<>
+struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits {
+ static std::string getGraphName(const Function *F) {
+ return "CFG for '" + F->getName() + "' function";
+ }
+
+ static std::string getNodeLabel(const BasicBlock *Node,
+ const Function *Graph) {
+ if (CFGOnly && !Node->getName().empty())
+ return Node->getName() + ":";
+
+ std::ostringstream Out;
+ if (CFGOnly) {
+ WriteAsOperand(Out, Node, false);
+ return Out.str();
+ }
+
+ if (Node->getName().empty()) {
+ WriteAsOperand(Out, Node, false);
+ Out << ":";
+ }
+
+ Out << *Node;
+ std::string OutStr = Out.str();
+ if (OutStr[0] == '\n') OutStr.erase(OutStr.begin());
+
+ // Process string output to make it nicer...
+ for (unsigned i = 0; i != OutStr.length(); ++i)
+ if (OutStr[i] == '\n') { // Left justify
+ OutStr[i] = '\\';
+ OutStr.insert(OutStr.begin()+i+1, 'l');
+ } else if (OutStr[i] == ';') { // Delete comments!
+ unsigned Idx = OutStr.find('\n', i+1); // Find end of line
+ OutStr.erase(OutStr.begin()+i, OutStr.begin()+Idx);
+ --i;
+ }
+
+ return OutStr;
+ }
+
+ static std::string getEdgeSourceLabel(const BasicBlock *Node,
+ succ_const_iterator I) {
+ // Label source of conditional branches with "T" or "F"
+ if (const BranchInst *BI = dyn_cast<BranchInst>(Node->getTerminator()))
+ if (BI->isConditional())
+ return (I == succ_begin(Node)) ? "T" : "F";
+ return "";
+ }
+};
+}
+
+namespace {
+ struct VISIBILITY_HIDDEN CFGViewer : public FunctionPass {
+ static char ID; // Pass identifcation, replacement for typeid
+ CFGViewer() : FunctionPass(&ID) {}
+
+ virtual bool runOnFunction(Function &F) {
+ F.viewCFG();
+ return false;
+ }
+
+ void print(std::ostream &OS, const Module* = 0) const {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char CFGViewer::ID = 0;
+static RegisterPass<CFGViewer>
+V0("view-cfg", "View CFG of function", false, true);
+
+namespace {
+ struct VISIBILITY_HIDDEN CFGOnlyViewer : public FunctionPass {
+ static char ID; // Pass identifcation, replacement for typeid
+ CFGOnlyViewer() : FunctionPass(&ID) {}
+
+ virtual bool runOnFunction(Function &F) {
+ CFGOnly = true;
+ F.viewCFG();
+ CFGOnly = false;
+ return false;
+ }
+
+ void print(std::ostream &OS, const Module* = 0) const {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char CFGOnlyViewer::ID = 0;
+static RegisterPass<CFGOnlyViewer>
+V1("view-cfg-only",
+ "View CFG of function (with no function bodies)", false, true);
+
+namespace {
+ struct VISIBILITY_HIDDEN CFGPrinter : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ CFGPrinter() : FunctionPass(&ID) {}
+ explicit CFGPrinter(void *pid) : FunctionPass(pid) {}
+
+ virtual bool runOnFunction(Function &F) {
+ std::string Filename = "cfg." + F.getName() + ".dot";
+ cerr << "Writing '" << Filename << "'...";
+ std::ofstream File(Filename.c_str());
+
+ if (File.good())
+ WriteGraph(File, (const Function*)&F);
+ else
+ cerr << " error opening file for writing!";
+ cerr << "\n";
+ return false;
+ }
+
+ void print(std::ostream &OS, const Module* = 0) const {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char CFGPrinter::ID = 0;
+static RegisterPass<CFGPrinter>
+P1("dot-cfg", "Print CFG of function to 'dot' file", false, true);
+
+namespace {
+ struct VISIBILITY_HIDDEN CFGOnlyPrinter : public CFGPrinter {
+ static char ID; // Pass identification, replacement for typeid
+ CFGOnlyPrinter() : CFGPrinter(&ID) {}
+ virtual bool runOnFunction(Function &F) {
+ bool OldCFGOnly = CFGOnly;
+ CFGOnly = true;
+ CFGPrinter::runOnFunction(F);
+ CFGOnly = OldCFGOnly;
+ return false;
+ }
+ void print(std::ostream &OS, const Module* = 0) const {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char CFGOnlyPrinter::ID = 0;
+static RegisterPass<CFGOnlyPrinter>
+P2("dot-cfg-only",
+ "Print CFG of function to 'dot' file (with no function bodies)", false, true);
+
+/// viewCFG - This function is meant for use from the debugger. You can just
+/// say 'call F->viewCFG()' and a ghostview window should pop up from the
+/// program, displaying the CFG of the current function. This depends on there
+/// being a 'dot' and 'gv' program in your path.
+///
+void Function::viewCFG() const {
+ ViewGraph(this, "cfg" + getName());
+}
+
+/// viewCFGOnly - This function is meant for use from the debugger. It works
+/// just like viewCFG, but it does not include the contents of basic blocks
+/// into the nodes, just the label. If you are only interested in the CFG t
+/// his can make the graph smaller.
+///
+void Function::viewCFGOnly() const {
+ CFGOnly = true;
+ viewCFG();
+ CFGOnly = false;
+}
+
+FunctionPass *llvm::createCFGPrinterPass () {
+ return new CFGPrinter();
+}
+
+FunctionPass *llvm::createCFGOnlyPrinterPass () {
+ return new CFGOnlyPrinter();
+}
+
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
new file mode 100644
index 0000000..093aa69
--- /dev/null
+++ b/lib/Analysis/CMakeLists.txt
@@ -0,0 +1,34 @@
+add_llvm_library(LLVMAnalysis
+ AliasAnalysis.cpp
+ AliasAnalysisCounter.cpp
+ AliasAnalysisEvaluator.cpp
+ AliasDebugger.cpp
+ AliasSetTracker.cpp
+ Analysis.cpp
+ BasicAliasAnalysis.cpp
+ CaptureTracking.cpp
+ CFGPrinter.cpp
+ ConstantFolding.cpp
+ DbgInfoPrinter.cpp
+ DebugInfo.cpp
+ InstCount.cpp
+ Interval.cpp
+ IntervalPartition.cpp
+ IVUsers.cpp
+ LibCallAliasAnalysis.cpp
+ LibCallSemantics.cpp
+ LiveValues.cpp
+ LoopInfo.cpp
+ LoopPass.cpp
+ LoopVR.cpp
+ MemoryDependenceAnalysis.cpp
+ PostDominators.cpp
+ ProfileInfo.cpp
+ ProfileInfoLoader.cpp
+ ProfileInfoLoaderPass.cpp
+ ScalarEvolution.cpp
+ ScalarEvolutionExpander.cpp
+ SparsePropagation.cpp
+ Trace.cpp
+ ValueTracking.cpp
+ )
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
new file mode 100644
index 0000000..a19b8e4
--- /dev/null
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -0,0 +1,112 @@
+//===--- CaptureTracking.cpp - Determine whether a pointer is captured ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines that help determine which pointers are captured.
+// A pointer value is captured if the function makes a copy of any part of the
+// pointer that outlives the call. Not being captured means, more or less, that
+// the pointer is only dereferenced and not stored in a global. Returning part
+// of the pointer as the function return value may or may not count as capturing
+// the pointer, depending on the context.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Instructions.h"
+#include "llvm/Value.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CallSite.h"
+using namespace llvm;
+
+/// PointerMayBeCaptured - Return true if this pointer value may be captured
+/// by the enclosing function (which is required to exist). This routine can
+/// be expensive, so consider caching the results. The boolean ReturnCaptures
+/// specifies whether returning the value (or part of it) from the function
+/// counts as capturing it or not.
+bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures) {
+ assert(isa<PointerType>(V->getType()) && "Capture is for pointers only!");
+ SmallVector<Use*, 16> Worklist;
+ SmallSet<Use*, 16> Visited;
+
+ for (Value::use_const_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ Use *U = &UI.getUse();
+ Visited.insert(U);
+ Worklist.push_back(U);
+ }
+
+ while (!Worklist.empty()) {
+ Use *U = Worklist.pop_back_val();
+ Instruction *I = cast<Instruction>(U->getUser());
+ V = U->get();
+
+ switch (I->getOpcode()) {
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ CallSite CS = CallSite::get(I);
+ // Not captured if the callee is readonly, doesn't return a copy through
+ // its return value and doesn't unwind (a readonly function can leak bits
+ // by throwing an exception or not depending on the input value).
+ if (CS.onlyReadsMemory() && CS.doesNotThrow() &&
+ I->getType() == Type::VoidTy)
+ break;
+
+ // Not captured if only passed via 'nocapture' arguments. Note that
+ // calling a function pointer does not in itself cause the pointer to
+ // be captured. This is a subtle point considering that (for example)
+ // the callee might return its own address. It is analogous to saying
+ // that loading a value from a pointer does not cause the pointer to be
+ // captured, even though the loaded value might be the pointer itself
+ // (think of self-referential objects).
+ CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
+ for (CallSite::arg_iterator A = B; A != E; ++A)
+ if (A->get() == V && !CS.paramHasAttr(A - B + 1, Attribute::NoCapture))
+ // The parameter is not marked 'nocapture' - captured.
+ return true;
+ // Only passed via 'nocapture' arguments, or is the called function - not
+ // captured.
+ break;
+ }
+ case Instruction::Free:
+ // Freeing a pointer does not cause it to be captured.
+ break;
+ case Instruction::Load:
+ // Loading from a pointer does not cause it to be captured.
+ break;
+ case Instruction::Ret:
+ if (ReturnCaptures)
+ return true;
+ break;
+ case Instruction::Store:
+ if (V == I->getOperand(0))
+ // Stored the pointer - it may be captured.
+ return true;
+ // Storing to the pointee does not cause the pointer to be captured.
+ break;
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::PHI:
+ case Instruction::Select:
+ // The original value is not captured via this if the new value isn't.
+ for (Instruction::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI) {
+ Use *U = &UI.getUse();
+ if (Visited.insert(U))
+ Worklist.push_back(U);
+ }
+ break;
+ default:
+ // Something else - be conservative and say it is captured.
+ return true;
+ }
+ }
+
+ // All uses examined - not captured.
+ return false;
+}
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
new file mode 100644
index 0000000..e5ab322
--- /dev/null
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -0,0 +1,829 @@
+//===-- ConstantFolding.cpp - Analyze constant folding possibilities ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions determines the possibility of performing constant
+// folding.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include <cerrno>
+#include <cmath>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Constant Folding internal helper functions
+//===----------------------------------------------------------------------===//
+
+/// IsConstantOffsetFromGlobal - If this constant is actually a constant offset
+/// from a global, return the global and the constant. Because of
+/// constantexprs, this function is recursive.
+static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
+ int64_t &Offset, const TargetData &TD) {
+ // Trivial case, constant is the global.
+ if ((GV = dyn_cast<GlobalValue>(C))) {
+ Offset = 0;
+ return true;
+ }
+
+ // Otherwise, if this isn't a constant expr, bail out.
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+ if (!CE) return false;
+
+ // Look through ptr->int and ptr->ptr casts.
+ if (CE->getOpcode() == Instruction::PtrToInt ||
+ CE->getOpcode() == Instruction::BitCast)
+ return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD);
+
+ // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ // Cannot compute this if the element type of the pointer is missing size
+ // info.
+ if (!cast<PointerType>(CE->getOperand(0)->getType())
+ ->getElementType()->isSized())
+ return false;
+
+ // If the base isn't a global+constant, we aren't either.
+ if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD))
+ return false;
+
+ // Otherwise, add any offset that our operands provide.
+ gep_type_iterator GTI = gep_type_begin(CE);
+ for (User::const_op_iterator i = CE->op_begin() + 1, e = CE->op_end();
+ i != e; ++i, ++GTI) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(*i);
+ if (!CI) return false; // Index isn't a simple constant?
+ if (CI->getZExtValue() == 0) continue; // Not adding anything.
+
+ if (const StructType *ST = dyn_cast<StructType>(*GTI)) {
+ // N = N + Offset
+ Offset += TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue());
+ } else {
+ const SequentialType *SQT = cast<SequentialType>(*GTI);
+ Offset += TD.getTypeAllocSize(SQT->getElementType())*CI->getSExtValue();
+ }
+ }
+ return true;
+ }
+
+ return false;
+}
+
+
+/// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression.
+/// Attempt to symbolically evaluate the result of a binary operator merging
+/// these together. If target data info is available, it is provided as TD,
+/// otherwise TD is null.
+static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
+ Constant *Op1, const TargetData *TD){
+ // SROA
+
+ // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
+ // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute
+ // bits.
+
+
+ // If the constant expr is something like &A[123] - &A[4].f, fold this into a
+ // constant. This happens frequently when iterating over a global array.
+ if (Opc == Instruction::Sub && TD) {
+ GlobalValue *GV1, *GV2;
+ int64_t Offs1, Offs2;
+
+ if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *TD))
+ if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *TD) &&
+ GV1 == GV2) {
+ // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
+ return ConstantInt::get(Op0->getType(), Offs1-Offs2);
+ }
+ }
+
+ return 0;
+}
+
+/// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP
+/// constant expression, do so.
+static Constant *SymbolicallyEvaluateGEP(Constant* const* Ops, unsigned NumOps,
+ const Type *ResultTy,
+ const TargetData *TD) {
+ Constant *Ptr = Ops[0];
+ if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized())
+ return 0;
+
+ uint64_t BasePtr = 0;
+ if (!Ptr->isNullValue()) {
+ // If this is a inttoptr from a constant int, we can fold this as the base,
+ // otherwise we can't.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+ if (CE->getOpcode() == Instruction::IntToPtr)
+ if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0)))
+ BasePtr = Base->getZExtValue();
+
+ if (BasePtr == 0)
+ return 0;
+ }
+
+ // If this is a constant expr gep that is effectively computing an
+ // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
+ for (unsigned i = 1; i != NumOps; ++i)
+ if (!isa<ConstantInt>(Ops[i]))
+ return false;
+
+ uint64_t Offset = TD->getIndexedOffset(Ptr->getType(),
+ (Value**)Ops+1, NumOps-1);
+ Constant *C = ConstantInt::get(TD->getIntPtrType(), Offset+BasePtr);
+ return ConstantExpr::getIntToPtr(C, ResultTy);
+}
+
+/// FoldBitCast - Constant fold bitcast, symbolically evaluating it with
+/// targetdata. Return 0 if unfoldable.
+static Constant *FoldBitCast(Constant *C, const Type *DestTy,
+ const TargetData &TD) {
+ // If this is a bitcast from constant vector -> vector, fold it.
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
+ if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
+ // If the element types match, VMCore can fold it.
+ unsigned NumDstElt = DestVTy->getNumElements();
+ unsigned NumSrcElt = CV->getNumOperands();
+ if (NumDstElt == NumSrcElt)
+ return 0;
+
+ const Type *SrcEltTy = CV->getType()->getElementType();
+ const Type *DstEltTy = DestVTy->getElementType();
+
+ // Otherwise, we're changing the number of elements in a vector, which
+ // requires endianness information to do the right thing. For example,
+ // bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+ // folds to (little endian):
+ // <4 x i32> <i32 0, i32 0, i32 1, i32 0>
+ // and to (big endian):
+ // <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+
+ // First thing is first. We only want to think about integer here, so if
+ // we have something in FP form, recast it as integer.
+ if (DstEltTy->isFloatingPoint()) {
+ // Fold to an vector of integers with same size as our FP type.
+ unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
+ const Type *DestIVTy = VectorType::get(IntegerType::get(FPWidth),
+ NumDstElt);
+ // Recursively handle this integer conversion, if possible.
+ C = FoldBitCast(C, DestIVTy, TD);
+ if (!C) return 0;
+
+ // Finally, VMCore can handle this now that #elts line up.
+ return ConstantExpr::getBitCast(C, DestTy);
+ }
+
+ // Okay, we know the destination is integer, if the input is FP, convert
+ // it to integer first.
+ if (SrcEltTy->isFloatingPoint()) {
+ unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
+ const Type *SrcIVTy = VectorType::get(IntegerType::get(FPWidth),
+ NumSrcElt);
+ // Ask VMCore to do the conversion now that #elts line up.
+ C = ConstantExpr::getBitCast(C, SrcIVTy);
+ CV = dyn_cast<ConstantVector>(C);
+ if (!CV) return 0; // If VMCore wasn't able to fold it, bail out.
+ }
+
+ // Now we know that the input and output vectors are both integer vectors
+ // of the same size, and that their #elements is not the same. Do the
+ // conversion here, which depends on whether the input or output has
+ // more elements.
+ bool isLittleEndian = TD.isLittleEndian();
+
+ SmallVector<Constant*, 32> Result;
+ if (NumDstElt < NumSrcElt) {
+ // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
+ Constant *Zero = Constant::getNullValue(DstEltTy);
+ unsigned Ratio = NumSrcElt/NumDstElt;
+ unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
+ unsigned SrcElt = 0;
+ for (unsigned i = 0; i != NumDstElt; ++i) {
+ // Build each element of the result.
+ Constant *Elt = Zero;
+ unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
+ for (unsigned j = 0; j != Ratio; ++j) {
+ Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(SrcElt++));
+ if (!Src) return 0; // Reject constantexpr elements.
+
+ // Zero extend the element to the right size.
+ Src = ConstantExpr::getZExt(Src, Elt->getType());
+
+ // Shift it to the right place, depending on endianness.
+ Src = ConstantExpr::getShl(Src,
+ ConstantInt::get(Src->getType(), ShiftAmt));
+ ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
+
+ // Mix it in.
+ Elt = ConstantExpr::getOr(Elt, Src);
+ }
+ Result.push_back(Elt);
+ }
+ } else {
+ // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+ unsigned Ratio = NumDstElt/NumSrcElt;
+ unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits();
+
+ // Loop over each source value, expanding into multiple results.
+ for (unsigned i = 0; i != NumSrcElt; ++i) {
+ Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(i));
+ if (!Src) return 0; // Reject constantexpr elements.
+
+ unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
+ for (unsigned j = 0; j != Ratio; ++j) {
+ // Shift the piece of the value into the right place, depending on
+ // endianness.
+ Constant *Elt = ConstantExpr::getLShr(Src,
+ ConstantInt::get(Src->getType(), ShiftAmt));
+ ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
+
+ // Truncate and remember this piece.
+ Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy));
+ }
+ }
+ }
+
+ return ConstantVector::get(Result.data(), Result.size());
+ }
+ }
+
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Constant Folding public APIs
+//===----------------------------------------------------------------------===//
+
+
+/// ConstantFoldInstruction - Attempt to constant fold the specified
+/// instruction. If successful, the constant result is returned, if not, null
+/// is returned. Note that this function can only fail when attempting to fold
+/// instructions like loads and stores, which have no constant expression form.
+///
+Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ if (PN->getNumIncomingValues() == 0)
+ return UndefValue::get(PN->getType());
+
+ Constant *Result = dyn_cast<Constant>(PN->getIncomingValue(0));
+ if (Result == 0) return 0;
+
+ // Handle PHI nodes specially here...
+ for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) != Result && PN->getIncomingValue(i) != PN)
+ return 0; // Not all the same incoming constants...
+
+ // If we reach here, all incoming values are the same constant.
+ return Result;
+ }
+
+ // Scan the operand list, checking to see if they are all constants, if so,
+ // hand off to ConstantFoldInstOperands.
+ SmallVector<Constant*, 8> Ops;
+ for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
+ if (Constant *Op = dyn_cast<Constant>(*i))
+ Ops.push_back(Op);
+ else
+ return 0; // All operands not constant!
+
+ if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+ return ConstantFoldCompareInstOperands(CI->getPredicate(),
+ Ops.data(), Ops.size(), TD);
+ else
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+ Ops.data(), Ops.size(), TD);
+}
+
+/// ConstantFoldConstantExpression - Attempt to fold the constant expression
+/// using the specified TargetData. If successful, the constant result is
+/// result is returned, if not, null is returned.
+Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE,
+ const TargetData *TD) {
+ assert(TD && "ConstantFoldConstantExpression requires a valid TargetData.");
+
+ SmallVector<Constant*, 8> Ops;
+ for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i)
+ Ops.push_back(cast<Constant>(*i));
+
+ if (CE->isCompare())
+ return ConstantFoldCompareInstOperands(CE->getPredicate(),
+ Ops.data(), Ops.size(), TD);
+ else
+ return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(),
+ Ops.data(), Ops.size(), TD);
+}
+
+/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
+/// specified opcode and operands. If successful, the constant result is
+/// returned, if not, null is returned. Note that this function can fail when
+/// attempting to fold instructions like loads and stores, which have no
+/// constant expression form.
+///
+Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
+ Constant* const* Ops, unsigned NumOps,
+ const TargetData *TD) {
+ // Handle easy binops first.
+ if (Instruction::isBinaryOp(Opcode)) {
+ if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1]))
+ if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD))
+ return C;
+
+ return ConstantExpr::get(Opcode, Ops[0], Ops[1]);
+ }
+
+ switch (Opcode) {
+ default: return 0;
+ case Instruction::Call:
+ if (Function *F = dyn_cast<Function>(Ops[0]))
+ if (canConstantFoldCallTo(F))
+ return ConstantFoldCall(F, Ops+1, NumOps-1);
+ return 0;
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ case Instruction::VICmp:
+ case Instruction::VFCmp:
+ assert(0 &&"This function is invalid for compares: no predicate specified");
+ case Instruction::PtrToInt:
+ // If the input is a inttoptr, eliminate the pair. This requires knowing
+ // the width of a pointer, so it can't be done in ConstantExpr::getCast.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
+ if (TD && CE->getOpcode() == Instruction::IntToPtr) {
+ Constant *Input = CE->getOperand(0);
+ unsigned InWidth = Input->getType()->getPrimitiveSizeInBits();
+ if (TD->getPointerSizeInBits() < InWidth) {
+ Constant *Mask =
+ ConstantInt::get(APInt::getLowBitsSet(InWidth,
+ TD->getPointerSizeInBits()));
+ Input = ConstantExpr::getAnd(Input, Mask);
+ }
+ // Do a zext or trunc to get to the dest size.
+ return ConstantExpr::getIntegerCast(Input, DestTy, false);
+ }
+ }
+ return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
+ case Instruction::IntToPtr:
+ // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
+ // the int size is >= the ptr size. This requires knowing the width of a
+ // pointer, so it can't be done in ConstantExpr::getCast.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
+ if (TD &&
+ TD->getPointerSizeInBits() <=
+ CE->getType()->getPrimitiveSizeInBits()) {
+ if (CE->getOpcode() == Instruction::PtrToInt) {
+ Constant *Input = CE->getOperand(0);
+ Constant *C = FoldBitCast(Input, DestTy, *TD);
+ return C ? C : ConstantExpr::getBitCast(Input, DestTy);
+ }
+ // If there's a constant offset added to the integer value before
+ // it is casted back to a pointer, see if the expression can be
+ // converted into a GEP.
+ if (CE->getOpcode() == Instruction::Add)
+ if (ConstantInt *L = dyn_cast<ConstantInt>(CE->getOperand(0)))
+ if (ConstantExpr *R = dyn_cast<ConstantExpr>(CE->getOperand(1)))
+ if (R->getOpcode() == Instruction::PtrToInt)
+ if (GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(R->getOperand(0))) {
+ const PointerType *GVTy = cast<PointerType>(GV->getType());
+ if (const ArrayType *AT =
+ dyn_cast<ArrayType>(GVTy->getElementType())) {
+ const Type *ElTy = AT->getElementType();
+ uint64_t AllocSize = TD->getTypeAllocSize(ElTy);
+ APInt PSA(L->getValue().getBitWidth(), AllocSize);
+ if (ElTy == cast<PointerType>(DestTy)->getElementType() &&
+ L->getValue().urem(PSA) == 0) {
+ APInt ElemIdx = L->getValue().udiv(PSA);
+ if (ElemIdx.ult(APInt(ElemIdx.getBitWidth(),
+ AT->getNumElements()))) {
+ Constant *Index[] = {
+ Constant::getNullValue(CE->getType()),
+ ConstantInt::get(ElemIdx)
+ };
+ return ConstantExpr::getGetElementPtr(GV, &Index[0], 2);
+ }
+ }
+ }
+ }
+ }
+ }
+ return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
+ case Instruction::BitCast:
+ if (TD)
+ if (Constant *C = FoldBitCast(Ops[0], DestTy, *TD))
+ return C;
+ return ConstantExpr::getBitCast(Ops[0], DestTy);
+ case Instruction::Select:
+ return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
+ case Instruction::ExtractElement:
+ return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
+ case Instruction::InsertElement:
+ return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
+ case Instruction::ShuffleVector:
+ return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
+ case Instruction::GetElementPtr:
+ if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, TD))
+ return C;
+
+ return ConstantExpr::getGetElementPtr(Ops[0], Ops+1, NumOps-1);
+ }
+}
+
+/// ConstantFoldCompareInstOperands - Attempt to constant fold a compare
+/// instruction (icmp/fcmp) with the specified operands. If it fails, it
+/// returns a constant expression of the specified operands.
+///
+Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
+ Constant*const * Ops,
+ unsigned NumOps,
+ const TargetData *TD) {
+ // fold: icmp (inttoptr x), null -> icmp x, 0
+ // fold: icmp (ptrtoint x), 0 -> icmp x, null
+ // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
+ // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y
+ //
+ // ConstantExpr::getCompare cannot do this, because it doesn't have TD
+ // around to know if bit truncation is happening.
+ if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops[0])) {
+ if (TD && Ops[1]->isNullValue()) {
+ const Type *IntPtrTy = TD->getIntPtrType();
+ if (CE0->getOpcode() == Instruction::IntToPtr) {
+ // Convert the integer value to the right size to ensure we get the
+ // proper extension or truncation.
+ Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
+ IntPtrTy, false);
+ Constant *NewOps[] = { C, Constant::getNullValue(C->getType()) };
+ return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD);
+ }
+
+ // Only do this transformation if the int is intptrty in size, otherwise
+ // there is a truncation or extension that we aren't modeling.
+ if (CE0->getOpcode() == Instruction::PtrToInt &&
+ CE0->getType() == IntPtrTy) {
+ Constant *C = CE0->getOperand(0);
+ Constant *NewOps[] = { C, Constant::getNullValue(C->getType()) };
+ // FIXME!
+ return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD);
+ }
+ }
+
+ if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops[1])) {
+ if (TD && CE0->getOpcode() == CE1->getOpcode()) {
+ const Type *IntPtrTy = TD->getIntPtrType();
+
+ if (CE0->getOpcode() == Instruction::IntToPtr) {
+ // Convert the integer value to the right size to ensure we get the
+ // proper extension or truncation.
+ Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0),
+ IntPtrTy, false);
+ Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0),
+ IntPtrTy, false);
+ Constant *NewOps[] = { C0, C1 };
+ return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD);
+ }
+
+ // Only do this transformation if the int is intptrty in size, otherwise
+ // there is a truncation or extension that we aren't modeling.
+ if ((CE0->getOpcode() == Instruction::PtrToInt &&
+ CE0->getType() == IntPtrTy &&
+ CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType())) {
+ Constant *NewOps[] = {
+ CE0->getOperand(0), CE1->getOperand(0)
+ };
+ return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD);
+ }
+ }
+ }
+ }
+ return ConstantExpr::getCompare(Predicate, Ops[0], Ops[1]);
+}
+
+
+/// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a
+/// getelementptr constantexpr, return the constant value being addressed by the
+/// constant expression, or null if something is funny and we can't decide.
+Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C,
+ ConstantExpr *CE) {
+ if (CE->getOperand(1) != Constant::getNullValue(CE->getOperand(1)->getType()))
+ return 0; // Do not allow stepping over the value!
+
+ // Loop over all of the operands, tracking down which value we are
+ // addressing...
+ gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE);
+ for (++I; I != E; ++I)
+ if (const StructType *STy = dyn_cast<StructType>(*I)) {
+ ConstantInt *CU = cast<ConstantInt>(I.getOperand());
+ assert(CU->getZExtValue() < STy->getNumElements() &&
+ "Struct index out of range!");
+ unsigned El = (unsigned)CU->getZExtValue();
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
+ C = CS->getOperand(El);
+ } else if (isa<ConstantAggregateZero>(C)) {
+ C = Constant::getNullValue(STy->getElementType(El));
+ } else if (isa<UndefValue>(C)) {
+ C = UndefValue::get(STy->getElementType(El));
+ } else {
+ return 0;
+ }
+ } else if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand())) {
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(*I)) {
+ if (CI->getZExtValue() >= ATy->getNumElements())
+ return 0;
+ if (ConstantArray *CA = dyn_cast<ConstantArray>(C))
+ C = CA->getOperand(CI->getZExtValue());
+ else if (isa<ConstantAggregateZero>(C))
+ C = Constant::getNullValue(ATy->getElementType());
+ else if (isa<UndefValue>(C))
+ C = UndefValue::get(ATy->getElementType());
+ else
+ return 0;
+ } else if (const VectorType *PTy = dyn_cast<VectorType>(*I)) {
+ if (CI->getZExtValue() >= PTy->getNumElements())
+ return 0;
+ if (ConstantVector *CP = dyn_cast<ConstantVector>(C))
+ C = CP->getOperand(CI->getZExtValue());
+ else if (isa<ConstantAggregateZero>(C))
+ C = Constant::getNullValue(PTy->getElementType());
+ else if (isa<UndefValue>(C))
+ C = UndefValue::get(PTy->getElementType());
+ else
+ return 0;
+ } else {
+ return 0;
+ }
+ } else {
+ return 0;
+ }
+ return C;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Constant Folding for Calls
+//
+
+/// canConstantFoldCallTo - Return true if its even possible to fold a call to
+/// the specified function.
+bool
+llvm::canConstantFoldCallTo(const Function *F) {
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::sqrt:
+ case Intrinsic::powi:
+ case Intrinsic::bswap:
+ case Intrinsic::ctpop:
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz:
+ return true;
+ default: break;
+ }
+
+ if (!F->hasName()) return false;
+ const char *Str = F->getNameStart();
+ unsigned Len = F->getNameLen();
+
+ // In these cases, the check of the length is required. We don't want to
+ // return true for a name like "cos\0blah" which strcmp would return equal to
+ // "cos", but has length 8.
+ switch (Str[0]) {
+ default: return false;
+ case 'a':
+ if (Len == 4)
+ return !strcmp(Str, "acos") || !strcmp(Str, "asin") ||
+ !strcmp(Str, "atan");
+ else if (Len == 5)
+ return !strcmp(Str, "atan2");
+ return false;
+ case 'c':
+ if (Len == 3)
+ return !strcmp(Str, "cos");
+ else if (Len == 4)
+ return !strcmp(Str, "ceil") || !strcmp(Str, "cosf") ||
+ !strcmp(Str, "cosh");
+ return false;
+ case 'e':
+ if (Len == 3)
+ return !strcmp(Str, "exp");
+ return false;
+ case 'f':
+ if (Len == 4)
+ return !strcmp(Str, "fabs") || !strcmp(Str, "fmod");
+ else if (Len == 5)
+ return !strcmp(Str, "floor");
+ return false;
+ break;
+ case 'l':
+ if (Len == 3 && !strcmp(Str, "log"))
+ return true;
+ if (Len == 5 && !strcmp(Str, "log10"))
+ return true;
+ return false;
+ case 'p':
+ if (Len == 3 && !strcmp(Str, "pow"))
+ return true;
+ return false;
+ case 's':
+ if (Len == 3)
+ return !strcmp(Str, "sin");
+ if (Len == 4)
+ return !strcmp(Str, "sinh") || !strcmp(Str, "sqrt") ||
+ !strcmp(Str, "sinf");
+ if (Len == 5)
+ return !strcmp(Str, "sqrtf");
+ return false;
+ case 't':
+ if (Len == 3 && !strcmp(Str, "tan"))
+ return true;
+ else if (Len == 4 && !strcmp(Str, "tanh"))
+ return true;
+ return false;
+ }
+}
+
+static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
+ const Type *Ty) {
+ errno = 0;
+ V = NativeFP(V);
+ if (errno != 0) {
+ errno = 0;
+ return 0;
+ }
+
+ if (Ty == Type::FloatTy)
+ return ConstantFP::get(APFloat((float)V));
+ if (Ty == Type::DoubleTy)
+ return ConstantFP::get(APFloat(V));
+ assert(0 && "Can only constant fold float/double");
+ return 0; // dummy return to suppress warning
+}
+
+static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
+ double V, double W,
+ const Type *Ty) {
+ errno = 0;
+ V = NativeFP(V, W);
+ if (errno != 0) {
+ errno = 0;
+ return 0;
+ }
+
+ if (Ty == Type::FloatTy)
+ return ConstantFP::get(APFloat((float)V));
+ if (Ty == Type::DoubleTy)
+ return ConstantFP::get(APFloat(V));
+ assert(0 && "Can only constant fold float/double");
+ return 0; // dummy return to suppress warning
+}
+
+/// ConstantFoldCall - Attempt to constant fold a call to the specified function
+/// with the specified arguments, returning null if unsuccessful.
+
+Constant *
+llvm::ConstantFoldCall(Function *F,
+ Constant* const* Operands, unsigned NumOperands) {
+ if (!F->hasName()) return 0;
+ const char *Str = F->getNameStart();
+ unsigned Len = F->getNameLen();
+
+ const Type *Ty = F->getReturnType();
+ if (NumOperands == 1) {
+ if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) {
+ if (Ty!=Type::FloatTy && Ty!=Type::DoubleTy)
+ return 0;
+ /// Currently APFloat versions of these functions do not exist, so we use
+ /// the host native double versions. Float versions are not called
+ /// directly but for all these it is true (float)(f((double)arg)) ==
+ /// f(arg). Long double not supported yet.
+ double V = Ty==Type::FloatTy ? (double)Op->getValueAPF().convertToFloat():
+ Op->getValueAPF().convertToDouble();
+ switch (Str[0]) {
+ case 'a':
+ if (Len == 4 && !strcmp(Str, "acos"))
+ return ConstantFoldFP(acos, V, Ty);
+ else if (Len == 4 && !strcmp(Str, "asin"))
+ return ConstantFoldFP(asin, V, Ty);
+ else if (Len == 4 && !strcmp(Str, "atan"))
+ return ConstantFoldFP(atan, V, Ty);
+ break;
+ case 'c':
+ if (Len == 4 && !strcmp(Str, "ceil"))
+ return ConstantFoldFP(ceil, V, Ty);
+ else if (Len == 3 && !strcmp(Str, "cos"))
+ return ConstantFoldFP(cos, V, Ty);
+ else if (Len == 4 && !strcmp(Str, "cosh"))
+ return ConstantFoldFP(cosh, V, Ty);
+ else if (Len == 4 && !strcmp(Str, "cosf"))
+ return ConstantFoldFP(cos, V, Ty);
+ break;
+ case 'e':
+ if (Len == 3 && !strcmp(Str, "exp"))
+ return ConstantFoldFP(exp, V, Ty);
+ break;
+ case 'f':
+ if (Len == 4 && !strcmp(Str, "fabs"))
+ return ConstantFoldFP(fabs, V, Ty);
+ else if (Len == 5 && !strcmp(Str, "floor"))
+ return ConstantFoldFP(floor, V, Ty);
+ break;
+ case 'l':
+ if (Len == 3 && !strcmp(Str, "log") && V > 0)
+ return ConstantFoldFP(log, V, Ty);
+ else if (Len == 5 && !strcmp(Str, "log10") && V > 0)
+ return ConstantFoldFP(log10, V, Ty);
+ else if (!strcmp(Str, "llvm.sqrt.f32") ||
+ !strcmp(Str, "llvm.sqrt.f64")) {
+ if (V >= -0.0)
+ return ConstantFoldFP(sqrt, V, Ty);
+ else // Undefined
+ return Constant::getNullValue(Ty);
+ }
+ break;
+ case 's':
+ if (Len == 3 && !strcmp(Str, "sin"))
+ return ConstantFoldFP(sin, V, Ty);
+ else if (Len == 4 && !strcmp(Str, "sinh"))
+ return ConstantFoldFP(sinh, V, Ty);
+ else if (Len == 4 && !strcmp(Str, "sqrt") && V >= 0)
+ return ConstantFoldFP(sqrt, V, Ty);
+ else if (Len == 5 && !strcmp(Str, "sqrtf") && V >= 0)
+ return ConstantFoldFP(sqrt, V, Ty);
+ else if (Len == 4 && !strcmp(Str, "sinf"))
+ return ConstantFoldFP(sin, V, Ty);
+ break;
+ case 't':
+ if (Len == 3 && !strcmp(Str, "tan"))
+ return ConstantFoldFP(tan, V, Ty);
+ else if (Len == 4 && !strcmp(Str, "tanh"))
+ return ConstantFoldFP(tanh, V, Ty);
+ break;
+ default:
+ break;
+ }
+ } else if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) {
+ if (Len > 11 && !memcmp(Str, "llvm.bswap", 10))
+ return ConstantInt::get(Op->getValue().byteSwap());
+ else if (Len > 11 && !memcmp(Str, "llvm.ctpop", 10))
+ return ConstantInt::get(Ty, Op->getValue().countPopulation());
+ else if (Len > 10 && !memcmp(Str, "llvm.cttz", 9))
+ return ConstantInt::get(Ty, Op->getValue().countTrailingZeros());
+ else if (Len > 10 && !memcmp(Str, "llvm.ctlz", 9))
+ return ConstantInt::get(Ty, Op->getValue().countLeadingZeros());
+ }
+ } else if (NumOperands == 2) {
+ if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
+ if (Ty!=Type::FloatTy && Ty!=Type::DoubleTy)
+ return 0;
+ double Op1V = Ty==Type::FloatTy ?
+ (double)Op1->getValueAPF().convertToFloat():
+ Op1->getValueAPF().convertToDouble();
+ if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
+ double Op2V = Ty==Type::FloatTy ?
+ (double)Op2->getValueAPF().convertToFloat():
+ Op2->getValueAPF().convertToDouble();
+
+ if (Len == 3 && !strcmp(Str, "pow")) {
+ return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
+ } else if (Len == 4 && !strcmp(Str, "fmod")) {
+ return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
+ } else if (Len == 5 && !strcmp(Str, "atan2")) {
+ return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
+ }
+ } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
+ if (!strcmp(Str, "llvm.powi.f32")) {
+ return ConstantFP::get(APFloat((float)std::pow((float)Op1V,
+ (int)Op2C->getZExtValue())));
+ } else if (!strcmp(Str, "llvm.powi.f64")) {
+ return ConstantFP::get(APFloat((double)std::pow((double)Op1V,
+ (int)Op2C->getZExtValue())));
+ }
+ }
+ }
+ }
+ return 0;
+}
+
diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp
new file mode 100644
index 0000000..d80d581
--- /dev/null
+++ b/lib/Analysis/DbgInfoPrinter.cpp
@@ -0,0 +1,167 @@
+//===- DbgInfoPrinter.cpp - Print debug info in a human readable form ------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass that prints instructions, and associated debug
+// info:
+//
+// - source/line/col information
+// - original variable name
+// - original type name
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+PrintDirectory("print-fullpath",
+ cl::desc("Print fullpath when printing debug info"),
+ cl::Hidden);
+
+namespace {
+ class VISIBILITY_HIDDEN PrintDbgInfo : public FunctionPass {
+ raw_ostream &Out;
+ void printStopPoint(const DbgStopPointInst *DSI);
+ void printFuncStart(const DbgFuncStartInst *FS);
+ void printVariableDeclaration(const Value *V);
+ public:
+ static char ID; // Pass identification
+ PrintDbgInfo() : FunctionPass(&ID), Out(outs()) {}
+
+ virtual bool runOnFunction(Function &F);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+ char PrintDbgInfo::ID = 0;
+ static RegisterPass<PrintDbgInfo> X("print-dbginfo",
+ "Print debug info in human readable form");
+}
+
+FunctionPass *llvm::createDbgInfoPrinterPass() { return new PrintDbgInfo(); }
+
+void PrintDbgInfo::printVariableDeclaration(const Value *V) {
+ std::string DisplayName, File, Directory, Type;
+ unsigned LineNo;
+
+ if (!getLocationInfo(V, DisplayName, Type, LineNo, File, Directory))
+ return;
+
+ Out << "; ";
+ WriteAsOperand(Out, V, false, 0);
+ Out << " is variable " << DisplayName
+ << " of type " << Type << " declared at ";
+
+ if (PrintDirectory)
+ Out << Directory << "/";
+
+ Out << File << ":" << LineNo << "\n";
+}
+
+void PrintDbgInfo::printStopPoint(const DbgStopPointInst *DSI) {
+ if (PrintDirectory) {
+ std::string dir;
+ GetConstantStringInfo(DSI->getDirectory(), dir);
+ Out << dir << "/";
+ }
+
+ std::string file;
+ GetConstantStringInfo(DSI->getFileName(), file);
+ Out << file << ":" << DSI->getLine();
+
+ if (unsigned Col = DSI->getColumn())
+ Out << ":" << Col;
+}
+
+void PrintDbgInfo::printFuncStart(const DbgFuncStartInst *FS) {
+ DISubprogram Subprogram(cast<GlobalVariable>(FS->getSubprogram()));
+ std::string Res1, Res2;
+ Out << "; fully qualified function name: " << Subprogram.getDisplayName(Res1)
+ << " return type: " << Subprogram.getType().getName(Res2)
+ << " at line " << Subprogram.getLineNumber()
+ << "\n\n";
+}
+
+bool PrintDbgInfo::runOnFunction(Function &F) {
+ if (F.isDeclaration())
+ return false;
+
+ Out << "function " << F.getName() << "\n\n";
+
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ BasicBlock *BB = I;
+
+ if (I != F.begin() && (pred_begin(BB) == pred_end(BB)))
+ // Skip dead blocks.
+ continue;
+
+ const DbgStopPointInst *DSI = findBBStopPoint(BB);
+ Out << BB->getName();
+ Out << ":";
+
+ if (DSI) {
+ Out << "; (";
+ printStopPoint(DSI);
+ Out << ")";
+ }
+
+ Out << "\n";
+
+ // A dbgstoppoint's information is valid until we encounter a new one.
+ const DbgStopPointInst *LastDSP = DSI;
+ bool Printed = DSI != 0;
+ for (BasicBlock::const_iterator i = BB->begin(), e = BB->end();
+ i != e; ++i) {
+ if (isa<DbgInfoIntrinsic>(i)) {
+ if ((DSI = dyn_cast<DbgStopPointInst>(i))) {
+ if (DSI->getContext() == LastDSP->getContext() &&
+ DSI->getLineValue() == LastDSP->getLineValue() &&
+ DSI->getColumnValue() == LastDSP->getColumnValue())
+ // Don't print same location twice.
+ continue;
+
+ LastDSP = cast<DbgStopPointInst>(i);
+
+ // Don't print consecutive stoppoints, use a flag to know which one we
+ // printed.
+ Printed = false;
+ } else if (const DbgFuncStartInst *FS = dyn_cast<DbgFuncStartInst>(i)) {
+ printFuncStart(FS);
+ }
+ } else {
+ if (!Printed && LastDSP) {
+ Out << "; ";
+ printStopPoint(LastDSP);
+ Out << "\n";
+ Printed = true;
+ }
+
+ Out << *i;
+ printVariableDeclaration(i);
+
+ if (const User *U = dyn_cast<User>(i)) {
+ for(unsigned i=0;i<U->getNumOperands();i++)
+ printVariableDeclaration(U->getOperand(i));
+ }
+ }
+ }
+ }
+
+ return false;
+}
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
new file mode 100644
index 0000000..6bdb64c
--- /dev/null
+++ b/lib/Analysis/DebugInfo.cpp
@@ -0,0 +1,1079 @@
+//===--- DebugInfo.cpp - Debug Information Helper Classes -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the helper classes used to build and interpret debug
+// information in LLVM IR form.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Streams.h"
+
+using namespace llvm;
+using namespace llvm::dwarf;
+
+//===----------------------------------------------------------------------===//
+// DIDescriptor
+//===----------------------------------------------------------------------===//
+
+/// ValidDebugInfo - Return true if V represents valid debug info value.
+bool DIDescriptor::ValidDebugInfo(Value *V, CodeGenOpt::Level OptLevel) {
+ if (!V)
+ return false;
+
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(V->stripPointerCasts());
+ if (!GV)
+ return false;
+
+ if (!GV->hasInternalLinkage () && !GV->hasLinkOnceLinkage())
+ return false;
+
+ DIDescriptor DI(GV);
+
+ // Check current version. Allow Version6 for now.
+ unsigned Version = DI.getVersion();
+ if (Version != LLVMDebugVersion && Version != LLVMDebugVersion6)
+ return false;
+
+ unsigned Tag = DI.getTag();
+ switch (Tag) {
+ case DW_TAG_variable:
+ assert(DIVariable(GV).Verify() && "Invalid DebugInfo value");
+ break;
+ case DW_TAG_compile_unit:
+ assert(DICompileUnit(GV).Verify() && "Invalid DebugInfo value");
+ break;
+ case DW_TAG_subprogram:
+ assert(DISubprogram(GV).Verify() && "Invalid DebugInfo value");
+ break;
+ case DW_TAG_lexical_block:
+ // FIXME: This interfers with the quality of generated code during
+ // optimization.
+ if (OptLevel != CodeGenOpt::None)
+ return false;
+ // FALLTHROUGH
+ default:
+ break;
+ }
+
+ return true;
+}
+
+DIDescriptor::DIDescriptor(GlobalVariable *gv, unsigned RequiredTag) {
+ GV = gv;
+
+ // If this is non-null, check to see if the Tag matches. If not, set to null.
+ if (GV && getTag() != RequiredTag)
+ GV = 0;
+}
+
+const std::string &
+DIDescriptor::getStringField(unsigned Elt, std::string &Result) const {
+ if (GV == 0) {
+ Result.clear();
+ return Result;
+ }
+
+ Constant *C = GV->getInitializer();
+ if (C == 0 || Elt >= C->getNumOperands()) {
+ Result.clear();
+ return Result;
+ }
+
+ // Fills in the string if it succeeds
+ if (!GetConstantStringInfo(C->getOperand(Elt), Result))
+ Result.clear();
+
+ return Result;
+}
+
+uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
+ if (GV == 0) return 0;
+
+ Constant *C = GV->getInitializer();
+ if (C == 0 || Elt >= C->getNumOperands())
+ return 0;
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C->getOperand(Elt)))
+ return CI->getZExtValue();
+ return 0;
+}
+
+DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
+ if (GV == 0) return DIDescriptor();
+
+ Constant *C = GV->getInitializer();
+ if (C == 0 || Elt >= C->getNumOperands())
+ return DIDescriptor();
+
+ C = C->getOperand(Elt);
+ return DIDescriptor(dyn_cast<GlobalVariable>(C->stripPointerCasts()));
+}
+
+GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const {
+ if (GV == 0) return 0;
+
+ Constant *C = GV->getInitializer();
+ if (C == 0 || Elt >= C->getNumOperands())
+ return 0;
+
+ C = C->getOperand(Elt);
+ return dyn_cast<GlobalVariable>(C->stripPointerCasts());
+}
+
+//===----------------------------------------------------------------------===//
+// Simple Descriptor Constructors and other Methods
+//===----------------------------------------------------------------------===//
+
+// Needed by DIVariable::getType().
+DIType::DIType(GlobalVariable *gv) : DIDescriptor(gv) {
+ if (!gv) return;
+ unsigned tag = getTag();
+ if (tag != dwarf::DW_TAG_base_type && !DIDerivedType::isDerivedType(tag) &&
+ !DICompositeType::isCompositeType(tag))
+ GV = 0;
+}
+
+/// isDerivedType - Return true if the specified tag is legal for
+/// DIDerivedType.
+bool DIType::isDerivedType(unsigned Tag) {
+ switch (Tag) {
+ case dwarf::DW_TAG_typedef:
+ case dwarf::DW_TAG_pointer_type:
+ case dwarf::DW_TAG_reference_type:
+ case dwarf::DW_TAG_const_type:
+ case dwarf::DW_TAG_volatile_type:
+ case dwarf::DW_TAG_restrict_type:
+ case dwarf::DW_TAG_member:
+ case dwarf::DW_TAG_inheritance:
+ return true;
+ default:
+ // FIXME: Even though it doesn't make sense, CompositeTypes are current
+ // modelled as DerivedTypes, this should return true for them as well.
+ return false;
+ }
+}
+
+/// isCompositeType - Return true if the specified tag is legal for
+/// DICompositeType.
+bool DIType::isCompositeType(unsigned TAG) {
+ switch (TAG) {
+ case dwarf::DW_TAG_array_type:
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_enumeration_type:
+ case dwarf::DW_TAG_vector_type:
+ case dwarf::DW_TAG_subroutine_type:
+ case dwarf::DW_TAG_class_type:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// isVariable - Return true if the specified tag is legal for DIVariable.
+bool DIVariable::isVariable(unsigned Tag) {
+ switch (Tag) {
+ case dwarf::DW_TAG_auto_variable:
+ case dwarf::DW_TAG_arg_variable:
+ case dwarf::DW_TAG_return_variable:
+ return true;
+ default:
+ return false;
+ }
+}
+
+unsigned DIArray::getNumElements() const {
+ assert (GV && "Invalid DIArray");
+ Constant *C = GV->getInitializer();
+ assert (C && "Invalid DIArray initializer");
+ return C->getNumOperands();
+}
+
+/// Verify - Verify that a compile unit is well formed.
+bool DICompileUnit::Verify() const {
+ if (isNull())
+ return false;
+ std::string Res;
+ if (getFilename(Res).empty())
+ return false;
+ // It is possible that directory and produce string is empty.
+ return true;
+}
+
+/// Verify - Verify that a type descriptor is well formed.
+bool DIType::Verify() const {
+ if (isNull())
+ return false;
+ if (getContext().isNull())
+ return false;
+
+ DICompileUnit CU = getCompileUnit();
+ if (!CU.isNull() && !CU.Verify())
+ return false;
+ return true;
+}
+
+/// Verify - Verify that a composite type descriptor is well formed.
+bool DICompositeType::Verify() const {
+ if (isNull())
+ return false;
+ if (getContext().isNull())
+ return false;
+
+ DICompileUnit CU = getCompileUnit();
+ if (!CU.isNull() && !CU.Verify())
+ return false;
+ return true;
+}
+
+/// Verify - Verify that a subprogram descriptor is well formed.
+bool DISubprogram::Verify() const {
+ if (isNull())
+ return false;
+
+ if (getContext().isNull())
+ return false;
+
+ DICompileUnit CU = getCompileUnit();
+ if (!CU.Verify())
+ return false;
+
+ DICompositeType Ty = getType();
+ if (!Ty.isNull() && !Ty.Verify())
+ return false;
+ return true;
+}
+
+/// Verify - Verify that a global variable descriptor is well formed.
+bool DIGlobalVariable::Verify() const {
+ if (isNull())
+ return false;
+
+ if (getContext().isNull())
+ return false;
+
+ DICompileUnit CU = getCompileUnit();
+ if (!CU.isNull() && !CU.Verify())
+ return false;
+
+ DIType Ty = getType();
+ if (!Ty.Verify())
+ return false;
+
+ if (!getGlobal())
+ return false;
+
+ return true;
+}
+
+/// Verify - Verify that a variable descriptor is well formed.
+bool DIVariable::Verify() const {
+ if (isNull())
+ return false;
+
+ if (getContext().isNull())
+ return false;
+
+ DIType Ty = getType();
+ if (!Ty.Verify())
+ return false;
+
+ return true;
+}
+
+/// getOriginalTypeSize - If this type is derived from a base type then
+/// return base type size.
+uint64_t DIDerivedType::getOriginalTypeSize() const {
+ if (getTag() != dwarf::DW_TAG_member)
+ return getSizeInBits();
+ DIType BT = getTypeDerivedFrom();
+ if (BT.getTag() != dwarf::DW_TAG_base_type)
+ return getSizeInBits();
+ return BT.getSizeInBits();
+}
+
+/// describes - Return true if this subprogram provides debugging
+/// information for the function F.
+bool DISubprogram::describes(const Function *F) {
+ assert (F && "Invalid function");
+ std::string Name;
+ getLinkageName(Name);
+ if (Name.empty())
+ getName(Name);
+ if (!Name.empty() && (strcmp(Name.c_str(), F->getNameStart()) == false))
+ return true;
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// DIFactory: Basic Helpers
+//===----------------------------------------------------------------------===//
+
+DIFactory::DIFactory(Module &m)
+ : M(m), StopPointFn(0), FuncStartFn(0), RegionStartFn(0), RegionEndFn(0),
+ DeclareFn(0) {
+ EmptyStructPtr = PointerType::getUnqual(StructType::get(NULL, NULL));
+}
+
+/// getCastToEmpty - Return this descriptor as a Constant* with type '{}*'.
+/// This is only valid when the descriptor is non-null.
+Constant *DIFactory::getCastToEmpty(DIDescriptor D) {
+ if (D.isNull()) return Constant::getNullValue(EmptyStructPtr);
+ return ConstantExpr::getBitCast(D.getGV(), EmptyStructPtr);
+}
+
+Constant *DIFactory::GetTagConstant(unsigned TAG) {
+ assert((TAG & LLVMDebugVersionMask) == 0 &&
+ "Tag too large for debug encoding!");
+ return ConstantInt::get(Type::Int32Ty, TAG | LLVMDebugVersion);
+}
+
+Constant *DIFactory::GetStringConstant(const std::string &String) {
+ // Check string cache for previous edition.
+ Constant *&Slot = StringCache[String];
+
+ // Return Constant if previously defined.
+ if (Slot) return Slot;
+
+ const PointerType *DestTy = PointerType::getUnqual(Type::Int8Ty);
+
+ // If empty string then use a sbyte* null instead.
+ if (String.empty())
+ return Slot = ConstantPointerNull::get(DestTy);
+
+ // Construct string as an llvm constant.
+ Constant *ConstStr = ConstantArray::get(String);
+
+ // Otherwise create and return a new string global.
+ GlobalVariable *StrGV = new GlobalVariable(ConstStr->getType(), true,
+ GlobalVariable::InternalLinkage,
+ ConstStr, ".str", &M);
+ StrGV->setSection("llvm.metadata");
+ return Slot = ConstantExpr::getBitCast(StrGV, DestTy);
+}
+
+/// GetOrCreateAnchor - Look up an anchor for the specified tag and name. If it
+/// already exists, return it. If not, create a new one and return it.
+DIAnchor DIFactory::GetOrCreateAnchor(unsigned TAG, const char *Name) {
+ const Type *EltTy = StructType::get(Type::Int32Ty, Type::Int32Ty, NULL);
+
+ // Otherwise, create the global or return it if already in the module.
+ Constant *C = M.getOrInsertGlobal(Name, EltTy);
+ assert(isa<GlobalVariable>(C) && "Incorrectly typed anchor?");
+ GlobalVariable *GV = cast<GlobalVariable>(C);
+
+ // If it has an initializer, it is already in the module.
+ if (GV->hasInitializer())
+ return SubProgramAnchor = DIAnchor(GV);
+
+ GV->setLinkage(GlobalValue::LinkOnceAnyLinkage);
+ GV->setSection("llvm.metadata");
+ GV->setConstant(true);
+ M.addTypeName("llvm.dbg.anchor.type", EltTy);
+
+ // Otherwise, set the initializer.
+ Constant *Elts[] = {
+ GetTagConstant(dwarf::DW_TAG_anchor),
+ ConstantInt::get(Type::Int32Ty, TAG)
+ };
+
+ GV->setInitializer(ConstantStruct::get(Elts, 2));
+ return DIAnchor(GV);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// DIFactory: Primary Constructors
+//===----------------------------------------------------------------------===//
+
+/// GetOrCreateCompileUnitAnchor - Return the anchor for compile units,
+/// creating a new one if there isn't already one in the module.
+DIAnchor DIFactory::GetOrCreateCompileUnitAnchor() {
+ // If we already created one, just return it.
+ if (!CompileUnitAnchor.isNull())
+ return CompileUnitAnchor;
+ return CompileUnitAnchor = GetOrCreateAnchor(dwarf::DW_TAG_compile_unit,
+ "llvm.dbg.compile_units");
+}
+
+/// GetOrCreateSubprogramAnchor - Return the anchor for subprograms,
+/// creating a new one if there isn't already one in the module.
+DIAnchor DIFactory::GetOrCreateSubprogramAnchor() {
+ // If we already created one, just return it.
+ if (!SubProgramAnchor.isNull())
+ return SubProgramAnchor;
+ return SubProgramAnchor = GetOrCreateAnchor(dwarf::DW_TAG_subprogram,
+ "llvm.dbg.subprograms");
+}
+
+/// GetOrCreateGlobalVariableAnchor - Return the anchor for globals,
+/// creating a new one if there isn't already one in the module.
+DIAnchor DIFactory::GetOrCreateGlobalVariableAnchor() {
+ // If we already created one, just return it.
+ if (!GlobalVariableAnchor.isNull())
+ return GlobalVariableAnchor;
+ return GlobalVariableAnchor = GetOrCreateAnchor(dwarf::DW_TAG_variable,
+ "llvm.dbg.global_variables");
+}
+
+/// GetOrCreateArray - Create an descriptor for an array of descriptors.
+/// This implicitly uniques the arrays created.
+DIArray DIFactory::GetOrCreateArray(DIDescriptor *Tys, unsigned NumTys) {
+ SmallVector<Constant*, 16> Elts;
+
+ for (unsigned i = 0; i != NumTys; ++i)
+ Elts.push_back(getCastToEmpty(Tys[i]));
+
+ Constant *Init = ConstantArray::get(ArrayType::get(EmptyStructPtr,
+ Elts.size()),
+ Elts.data(), Elts.size());
+ // If we already have this array, just return the uniqued version.
+ DIDescriptor &Entry = SimpleConstantCache[Init];
+ if (!Entry.isNull()) return DIArray(Entry.getGV());
+
+ GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
+ GlobalValue::InternalLinkage,
+ Init, "llvm.dbg.array", &M);
+ GV->setSection("llvm.metadata");
+ Entry = DIDescriptor(GV);
+ return DIArray(GV);
+}
+
+/// GetOrCreateSubrange - Create a descriptor for a value range. This
+/// implicitly uniques the values returned.
+DISubrange DIFactory::GetOrCreateSubrange(int64_t Lo, int64_t Hi) {
+ Constant *Elts[] = {
+ GetTagConstant(dwarf::DW_TAG_subrange_type),
+ ConstantInt::get(Type::Int64Ty, Lo),
+ ConstantInt::get(Type::Int64Ty, Hi)
+ };
+
+ Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
+
+ // If we already have this range, just return the uniqued version.
+ DIDescriptor &Entry = SimpleConstantCache[Init];
+ if (!Entry.isNull()) return DISubrange(Entry.getGV());
+
+ M.addTypeName("llvm.dbg.subrange.type", Init->getType());
+
+ GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
+ GlobalValue::InternalLinkage,
+ Init, "llvm.dbg.subrange", &M);
+ GV->setSection("llvm.metadata");
+ Entry = DIDescriptor(GV);
+ return DISubrange(GV);
+}
+
+
+
+/// CreateCompileUnit - Create a new descriptor for the specified compile
+/// unit. Note that this does not unique compile units within the module.
+DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID,
+ const std::string &Filename,
+ const std::string &Directory,
+ const std::string &Producer,
+ bool isMain,
+ bool isOptimized,
+ const char *Flags,
+ unsigned RunTimeVer) {
+ Constant *Elts[] = {
+ GetTagConstant(dwarf::DW_TAG_compile_unit),
+ getCastToEmpty(GetOrCreateCompileUnitAnchor()),
+ ConstantInt::get(Type::Int32Ty, LangID),
+ GetStringConstant(Filename),
+ GetStringConstant(Directory),
+ GetStringConstant(Producer),
+ ConstantInt::get(Type::Int1Ty, isMain),
+ ConstantInt::get(Type::Int1Ty, isOptimized),
+ GetStringConstant(Flags),
+ ConstantInt::get(Type::Int32Ty, RunTimeVer)
+ };
+
+ Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
+
+ M.addTypeName("llvm.dbg.compile_unit.type", Init->getType());
+ GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
+ GlobalValue::InternalLinkage,
+ Init, "llvm.dbg.compile_unit", &M);
+ GV->setSection("llvm.metadata");
+ return DICompileUnit(GV);
+}
+
+/// CreateEnumerator - Create a single enumerator value.
+DIEnumerator DIFactory::CreateEnumerator(const std::string &Name, uint64_t Val){
+ Constant *Elts[] = {
+ GetTagConstant(dwarf::DW_TAG_enumerator),
+ GetStringConstant(Name),
+ ConstantInt::get(Type::Int64Ty, Val)
+ };
+
+ Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
+
+ M.addTypeName("llvm.dbg.enumerator.type", Init->getType());
+ GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
+ GlobalValue::InternalLinkage,
+ Init, "llvm.dbg.enumerator", &M);
+ GV->setSection("llvm.metadata");
+ return DIEnumerator(GV);
+}
+
+
+/// CreateBasicType - Create a basic type like int, float, etc.
+DIBasicType DIFactory::CreateBasicType(DIDescriptor Context,
+ const std::string &Name,
+ DICompileUnit CompileUnit,
+ unsigned LineNumber,
+ uint64_t SizeInBits,
+ uint64_t AlignInBits,
+ uint64_t OffsetInBits, unsigned Flags,
+ unsigned Encoding) {
+ Constant *Elts[] = {
+ GetTagConstant(dwarf::DW_TAG_base_type),
+ getCastToEmpty(Context),
+ GetStringConstant(Name),
+ getCastToEmpty(CompileUnit),
+ ConstantInt::get(Type::Int32Ty, LineNumber),
+ ConstantInt::get(Type::Int64Ty, SizeInBits),
+ ConstantInt::get(Type::Int64Ty, AlignInBits),
+ ConstantInt::get(Type::Int64Ty, OffsetInBits),
+ ConstantInt::get(Type::Int32Ty, Flags),
+ ConstantInt::get(Type::Int32Ty, Encoding)
+ };
+
+ Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
+
+ M.addTypeName("llvm.dbg.basictype.type", Init->getType());
+ GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
+ GlobalValue::InternalLinkage,
+ Init, "llvm.dbg.basictype", &M);
+ GV->setSection("llvm.metadata");
+ return DIBasicType(GV);
+}
+
+/// CreateDerivedType - Create a derived type like const qualified type,
+/// pointer, typedef, etc.
+DIDerivedType DIFactory::CreateDerivedType(unsigned Tag,
+ DIDescriptor Context,
+ const std::string &Name,
+ DICompileUnit CompileUnit,
+ unsigned LineNumber,
+ uint64_t SizeInBits,
+ uint64_t AlignInBits,
+ uint64_t OffsetInBits,
+ unsigned Flags,
+ DIType DerivedFrom) {
+ Constant *Elts[] = {
+ GetTagConstant(Tag),
+ getCastToEmpty(Context),
+ GetStringConstant(Name),
+ getCastToEmpty(CompileUnit),
+ ConstantInt::get(Type::Int32Ty, LineNumber),
+ ConstantInt::get(Type::Int64Ty, SizeInBits),
+ ConstantInt::get(Type::Int64Ty, AlignInBits),
+ ConstantInt::get(Type::Int64Ty, OffsetInBits),
+ ConstantInt::get(Type::Int32Ty, Flags),
+ getCastToEmpty(DerivedFrom)
+ };
+
+ Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
+
+ M.addTypeName("llvm.dbg.derivedtype.type", Init->getType());
+ GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
+ GlobalValue::InternalLinkage,
+ Init, "llvm.dbg.derivedtype", &M);
+ GV->setSection("llvm.metadata");
+ return DIDerivedType(GV);
+}
+
+/// CreateCompositeType - Create a composite type like array, struct, etc.
+DICompositeType DIFactory::CreateCompositeType(unsigned Tag,
+ DIDescriptor Context,
+ const std::string &Name,
+ DICompileUnit CompileUnit,
+ unsigned LineNumber,
+ uint64_t SizeInBits,
+ uint64_t AlignInBits,
+ uint64_t OffsetInBits,
+ unsigned Flags,
+ DIType DerivedFrom,
+ DIArray Elements,
+ unsigned RuntimeLang) {
+
+ Constant *Elts[] = {
+ GetTagConstant(Tag),
+ getCastToEmpty(Context),
+ GetStringConstant(Name),
+ getCastToEmpty(CompileUnit),
+ ConstantInt::get(Type::Int32Ty, LineNumber),
+ ConstantInt::get(Type::Int64Ty, SizeInBits),
+ ConstantInt::get(Type::Int64Ty, AlignInBits),
+ ConstantInt::get(Type::Int64Ty, OffsetInBits),
+ ConstantInt::get(Type::Int32Ty, Flags),
+ getCastToEmpty(DerivedFrom),
+ getCastToEmpty(Elements),
+ ConstantInt::get(Type::Int32Ty, RuntimeLang)
+ };
+
+ Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
+
+ M.addTypeName("llvm.dbg.composite.type", Init->getType());
+ GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
+ GlobalValue::InternalLinkage,
+ Init, "llvm.dbg.composite", &M);
+ GV->setSection("llvm.metadata");
+ return DICompositeType(GV);
+}
+
+
+/// CreateSubprogram - Create a new descriptor for the specified subprogram.
+/// See comments in DISubprogram for descriptions of these fields. This
+/// method does not unique the generated descriptors.
+DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
+ const std::string &Name,
+ const std::string &DisplayName,
+ const std::string &LinkageName,
+ DICompileUnit CompileUnit,
+ unsigned LineNo, DIType Type,
+ bool isLocalToUnit,
+ bool isDefinition) {
+
+ Constant *Elts[] = {
+ GetTagConstant(dwarf::DW_TAG_subprogram),
+ getCastToEmpty(GetOrCreateSubprogramAnchor()),
+ getCastToEmpty(Context),
+ GetStringConstant(Name),
+ GetStringConstant(DisplayName),
+ GetStringConstant(LinkageName),
+ getCastToEmpty(CompileUnit),
+ ConstantInt::get(Type::Int32Ty, LineNo),
+ getCastToEmpty(Type),
+ ConstantInt::get(Type::Int1Ty, isLocalToUnit),
+ ConstantInt::get(Type::Int1Ty, isDefinition)
+ };
+
+ Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
+
+ M.addTypeName("llvm.dbg.subprogram.type", Init->getType());
+ GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
+ GlobalValue::InternalLinkage,
+ Init, "llvm.dbg.subprogram", &M);
+ GV->setSection("llvm.metadata");
+ return DISubprogram(GV);
+}
+
+/// CreateGlobalVariable - Create a new descriptor for the specified global.
+DIGlobalVariable
+DIFactory::CreateGlobalVariable(DIDescriptor Context, const std::string &Name,
+ const std::string &DisplayName,
+ const std::string &LinkageName,
+ DICompileUnit CompileUnit,
+ unsigned LineNo, DIType Type,bool isLocalToUnit,
+ bool isDefinition, llvm::GlobalVariable *Val) {
+ Constant *Elts[] = {
+ GetTagConstant(dwarf::DW_TAG_variable),
+ getCastToEmpty(GetOrCreateGlobalVariableAnchor()),
+ getCastToEmpty(Context),
+ GetStringConstant(Name),
+ GetStringConstant(DisplayName),
+ GetStringConstant(LinkageName),
+ getCastToEmpty(CompileUnit),
+ ConstantInt::get(Type::Int32Ty, LineNo),
+ getCastToEmpty(Type),
+ ConstantInt::get(Type::Int1Ty, isLocalToUnit),
+ ConstantInt::get(Type::Int1Ty, isDefinition),
+ ConstantExpr::getBitCast(Val, EmptyStructPtr)
+ };
+
+ Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
+
+ M.addTypeName("llvm.dbg.global_variable.type", Init->getType());
+ GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
+ GlobalValue::InternalLinkage,
+ Init, "llvm.dbg.global_variable", &M);
+ GV->setSection("llvm.metadata");
+ return DIGlobalVariable(GV);
+}
+
+
+/// CreateVariable - Create a new descriptor for the specified variable.
+DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context,
+ const std::string &Name,
+ DICompileUnit CompileUnit, unsigned LineNo,
+ DIType Type) {
+ Constant *Elts[] = {
+ GetTagConstant(Tag),
+ getCastToEmpty(Context),
+ GetStringConstant(Name),
+ getCastToEmpty(CompileUnit),
+ ConstantInt::get(Type::Int32Ty, LineNo),
+ getCastToEmpty(Type)
+ };
+
+ Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
+
+ M.addTypeName("llvm.dbg.variable.type", Init->getType());
+ GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
+ GlobalValue::InternalLinkage,
+ Init, "llvm.dbg.variable", &M);
+ GV->setSection("llvm.metadata");
+ return DIVariable(GV);
+}
+
+
+/// CreateBlock - This creates a descriptor for a lexical block with the
+/// specified parent context.
+DIBlock DIFactory::CreateBlock(DIDescriptor Context) {
+ Constant *Elts[] = {
+ GetTagConstant(dwarf::DW_TAG_lexical_block),
+ getCastToEmpty(Context)
+ };
+
+ Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0]));
+
+ M.addTypeName("llvm.dbg.block.type", Init->getType());
+ GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
+ GlobalValue::InternalLinkage,
+ Init, "llvm.dbg.block", &M);
+ GV->setSection("llvm.metadata");
+ return DIBlock(GV);
+}
+
+
+//===----------------------------------------------------------------------===//
+// DIFactory: Routines for inserting code into a function
+//===----------------------------------------------------------------------===//
+
+/// InsertStopPoint - Create a new llvm.dbg.stoppoint intrinsic invocation,
+/// inserting it at the end of the specified basic block.
+void DIFactory::InsertStopPoint(DICompileUnit CU, unsigned LineNo,
+ unsigned ColNo, BasicBlock *BB) {
+
+ // Lazily construct llvm.dbg.stoppoint function.
+ if (!StopPointFn)
+ StopPointFn = llvm::Intrinsic::getDeclaration(&M,
+ llvm::Intrinsic::dbg_stoppoint);
+
+ // Invoke llvm.dbg.stoppoint
+ Value *Args[] = {
+ llvm::ConstantInt::get(llvm::Type::Int32Ty, LineNo),
+ llvm::ConstantInt::get(llvm::Type::Int32Ty, ColNo),
+ getCastToEmpty(CU)
+ };
+ CallInst::Create(StopPointFn, Args, Args+3, "", BB);
+}
+
+/// InsertSubprogramStart - Create a new llvm.dbg.func.start intrinsic to
+/// mark the start of the specified subprogram.
+void DIFactory::InsertSubprogramStart(DISubprogram SP, BasicBlock *BB) {
+ // Lazily construct llvm.dbg.func.start.
+ if (!FuncStartFn)
+ FuncStartFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_func_start);
+
+ // Call llvm.dbg.func.start which also implicitly sets a stoppoint.
+ CallInst::Create(FuncStartFn, getCastToEmpty(SP), "", BB);
+}
+
+/// InsertRegionStart - Insert a new llvm.dbg.region.start intrinsic call to
+/// mark the start of a region for the specified scoping descriptor.
+void DIFactory::InsertRegionStart(DIDescriptor D, BasicBlock *BB) {
+ // Lazily construct llvm.dbg.region.start function.
+ if (!RegionStartFn)
+ RegionStartFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_region_start);
+
+ // Call llvm.dbg.func.start.
+ CallInst::Create(RegionStartFn, getCastToEmpty(D), "", BB);
+}
+
+/// InsertRegionEnd - Insert a new llvm.dbg.region.end intrinsic call to
+/// mark the end of a region for the specified scoping descriptor.
+void DIFactory::InsertRegionEnd(DIDescriptor D, BasicBlock *BB) {
+ // Lazily construct llvm.dbg.region.end function.
+ if (!RegionEndFn)
+ RegionEndFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_region_end);
+
+ // Call llvm.dbg.region.end.
+ CallInst::Create(RegionEndFn, getCastToEmpty(D), "", BB);
+}
+
+/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+void DIFactory::InsertDeclare(Value *Storage, DIVariable D, BasicBlock *BB) {
+ // Cast the storage to a {}* for the call to llvm.dbg.declare.
+ Storage = new BitCastInst(Storage, EmptyStructPtr, "", BB);
+
+ if (!DeclareFn)
+ DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
+
+ Value *Args[] = { Storage, getCastToEmpty(D) };
+ CallInst::Create(DeclareFn, Args, Args+2, "", BB);
+}
+
+namespace llvm {
+ /// findStopPoint - Find the stoppoint coressponding to this instruction, that
+ /// is the stoppoint that dominates this instruction.
+ const DbgStopPointInst *findStopPoint(const Instruction *Inst) {
+ if (const DbgStopPointInst *DSI = dyn_cast<DbgStopPointInst>(Inst))
+ return DSI;
+
+ const BasicBlock *BB = Inst->getParent();
+ BasicBlock::const_iterator I = Inst, B;
+ while (BB) {
+ B = BB->begin();
+
+ // A BB consisting only of a terminator can't have a stoppoint.
+ while (I != B) {
+ --I;
+ if (const DbgStopPointInst *DSI = dyn_cast<DbgStopPointInst>(I))
+ return DSI;
+ }
+
+ // This BB didn't have a stoppoint: if there is only one predecessor, look
+ // for a stoppoint there. We could use getIDom(), but that would require
+ // dominator info.
+ BB = I->getParent()->getUniquePredecessor();
+ if (BB)
+ I = BB->getTerminator();
+ }
+
+ return 0;
+ }
+
+ /// findBBStopPoint - Find the stoppoint corresponding to first real
+ /// (non-debug intrinsic) instruction in this Basic Block, and return the
+ /// stoppoint for it.
+ const DbgStopPointInst *findBBStopPoint(const BasicBlock *BB) {
+ for(BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (const DbgStopPointInst *DSI = dyn_cast<DbgStopPointInst>(I))
+ return DSI;
+
+ // Fallback to looking for stoppoint of unique predecessor. Useful if this
+ // BB contains no stoppoints, but unique predecessor does.
+ BB = BB->getUniquePredecessor();
+ if (BB)
+ return findStopPoint(BB->getTerminator());
+
+ return 0;
+ }
+
+ Value *findDbgGlobalDeclare(GlobalVariable *V) {
+ const Module *M = V->getParent();
+ const Type *Ty = M->getTypeByName("llvm.dbg.global_variable.type");
+ if (!Ty) return 0;
+
+ Ty = PointerType::get(Ty, 0);
+
+ Value *Val = V->stripPointerCasts();
+ for (Value::use_iterator I = Val->use_begin(), E = Val->use_end();
+ I != E; ++I) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(I)) {
+ if (CE->getOpcode() == Instruction::BitCast) {
+ Value *VV = CE;
+
+ while (VV->hasOneUse())
+ VV = *VV->use_begin();
+
+ if (VV->getType() == Ty)
+ return VV;
+ }
+ }
+ }
+
+ if (Val->getType() == Ty)
+ return Val;
+
+ return 0;
+ }
+
+ /// Finds the llvm.dbg.declare intrinsic corresponding to this value if any.
+ /// It looks through pointer casts too.
+ const DbgDeclareInst *findDbgDeclare(const Value *V, bool stripCasts) {
+ if (stripCasts) {
+ V = V->stripPointerCasts();
+
+ // Look for the bitcast.
+ for (Value::use_const_iterator I = V->use_begin(), E =V->use_end();
+ I != E; ++I)
+ if (isa<BitCastInst>(I))
+ return findDbgDeclare(*I, false);
+
+ return 0;
+ }
+
+ // Find llvm.dbg.declare among uses of the instruction.
+ for (Value::use_const_iterator I = V->use_begin(), E =V->use_end();
+ I != E; ++I)
+ if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I))
+ return DDI;
+
+ return 0;
+ }
+
+ bool getLocationInfo(const Value *V, std::string &DisplayName,
+ std::string &Type, unsigned &LineNo, std::string &File,
+ std::string &Dir) {
+ DICompileUnit Unit;
+ DIType TypeD;
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(const_cast<Value*>(V))) {
+ Value *DIGV = findDbgGlobalDeclare(GV);
+ if (!DIGV) return false;
+ DIGlobalVariable Var(cast<GlobalVariable>(DIGV));
+
+ Var.getDisplayName(DisplayName);
+ LineNo = Var.getLineNumber();
+ Unit = Var.getCompileUnit();
+ TypeD = Var.getType();
+ } else {
+ const DbgDeclareInst *DDI = findDbgDeclare(V);
+ if (!DDI) return false;
+ DIVariable Var(cast<GlobalVariable>(DDI->getVariable()));
+
+ Var.getName(DisplayName);
+ LineNo = Var.getLineNumber();
+ Unit = Var.getCompileUnit();
+ TypeD = Var.getType();
+ }
+
+ TypeD.getName(Type);
+ Unit.getFilename(File);
+ Unit.getDirectory(Dir);
+ return true;
+ }
+}
+
+/// dump - Print descriptor.
+void DIDescriptor::dump() const {
+ cerr << "[" << dwarf::TagString(getTag()) << "] ";
+ cerr << std::hex << "[GV:" << GV << "]" << std::dec;
+}
+
+/// dump - Print compile unit.
+void DICompileUnit::dump() const {
+ if (getLanguage())
+ cerr << " [" << dwarf::LanguageString(getLanguage()) << "] ";
+
+ std::string Res1, Res2;
+ cerr << " [" << getDirectory(Res1) << "/" << getFilename(Res2) << " ]";
+}
+
+/// dump - Print type.
+void DIType::dump() const {
+ if (isNull()) return;
+
+ std::string Res;
+ if (!getName(Res).empty())
+ cerr << " [" << Res << "] ";
+
+ unsigned Tag = getTag();
+ cerr << " [" << dwarf::TagString(Tag) << "] ";
+
+ // TODO : Print context
+ getCompileUnit().dump();
+ cerr << " ["
+ << getLineNumber() << ", "
+ << getSizeInBits() << ", "
+ << getAlignInBits() << ", "
+ << getOffsetInBits()
+ << "] ";
+
+ if (isPrivate())
+ cerr << " [private] ";
+ else if (isProtected())
+ cerr << " [protected] ";
+
+ if (isForwardDecl())
+ cerr << " [fwd] ";
+
+ if (isBasicType(Tag))
+ DIBasicType(GV).dump();
+ else if (isDerivedType(Tag))
+ DIDerivedType(GV).dump();
+ else if (isCompositeType(Tag))
+ DICompositeType(GV).dump();
+ else {
+ cerr << "Invalid DIType\n";
+ return;
+ }
+
+ cerr << "\n";
+}
+
+/// dump - Print basic type.
+void DIBasicType::dump() const {
+ cerr << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] ";
+}
+
+/// dump - Print derived type.
+void DIDerivedType::dump() const {
+ cerr << "\n\t Derived From: "; getTypeDerivedFrom().dump();
+}
+
+/// dump - Print composite type.
+void DICompositeType::dump() const {
+ DIArray A = getTypeArray();
+ if (A.isNull())
+ return;
+ cerr << " [" << A.getNumElements() << " elements]";
+}
+
+/// dump - Print global.
+void DIGlobal::dump() const {
+ std::string Res;
+ if (!getName(Res).empty())
+ cerr << " [" << Res << "] ";
+
+ unsigned Tag = getTag();
+ cerr << " [" << dwarf::TagString(Tag) << "] ";
+
+ // TODO : Print context
+ getCompileUnit().dump();
+ cerr << " [" << getLineNumber() << "] ";
+
+ if (isLocalToUnit())
+ cerr << " [local] ";
+
+ if (isDefinition())
+ cerr << " [def] ";
+
+ if (isGlobalVariable(Tag))
+ DIGlobalVariable(GV).dump();
+
+ cerr << "\n";
+}
+
+/// dump - Print subprogram.
+void DISubprogram::dump() const {
+ DIGlobal::dump();
+}
+
+/// dump - Print global variable.
+void DIGlobalVariable::dump() const {
+ cerr << " ["; getGlobal()->dump(); cerr << "] ";
+}
+
+/// dump - Print variable.
+void DIVariable::dump() const {
+ std::string Res;
+ if (!getName(Res).empty())
+ cerr << " [" << Res << "] ";
+
+ getCompileUnit().dump();
+ cerr << " [" << getLineNumber() << "] ";
+ getType().dump();
+ cerr << "\n";
+}
diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp
new file mode 100644
index 0000000..8584d06
--- /dev/null
+++ b/lib/Analysis/IPA/Andersens.cpp
@@ -0,0 +1,2878 @@
+//===- Andersens.cpp - Andersen's Interprocedural Alias Analysis ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an implementation of Andersen's interprocedural alias
+// analysis
+//
+// In pointer analysis terms, this is a subset-based, flow-insensitive,
+// field-sensitive, and context-insensitive algorithm pointer algorithm.
+//
+// This algorithm is implemented as three stages:
+// 1. Object identification.
+// 2. Inclusion constraint identification.
+// 3. Offline constraint graph optimization
+// 4. Inclusion constraint solving.
+//
+// The object identification stage identifies all of the memory objects in the
+// program, which includes globals, heap allocated objects, and stack allocated
+// objects.
+//
+// The inclusion constraint identification stage finds all inclusion constraints
+// in the program by scanning the program, looking for pointer assignments and
+// other statements that effect the points-to graph. For a statement like "A =
+// B", this statement is processed to indicate that A can point to anything that
+// B can point to. Constraints can handle copies, loads, and stores, and
+// address taking.
+//
+// The offline constraint graph optimization portion includes offline variable
+// substitution algorithms intended to compute pointer and location
+// equivalences. Pointer equivalences are those pointers that will have the
+// same points-to sets, and location equivalences are those variables that
+// always appear together in points-to sets. It also includes an offline
+// cycle detection algorithm that allows cycles to be collapsed sooner
+// during solving.
+//
+// The inclusion constraint solving phase iteratively propagates the inclusion
+// constraints until a fixed point is reached. This is an O(N^3) algorithm.
+//
+// Function constraints are handled as if they were structs with X fields.
+// Thus, an access to argument X of function Y is an access to node index
+// getNode(Y) + X. This representation allows handling of indirect calls
+// without any issues. To wit, an indirect call Y(a,b) is equivalent to
+// *(Y + 1) = a, *(Y + 2) = b.
+// The return node for a function is always located at getNode(F) +
+// CallReturnPos. The arguments start at getNode(F) + CallArgPos.
+//
+// Future Improvements:
+// Use of BDD's.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "anders-aa"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/DenseSet.h"
+#include <algorithm>
+#include <set>
+#include <list>
+#include <map>
+#include <stack>
+#include <vector>
+#include <queue>
+
+// Determining the actual set of nodes the universal set can consist of is very
+// expensive because it means propagating around very large sets. We rely on
+// other analysis being able to determine which nodes can never be pointed to in
+// order to disambiguate further than "points-to anything".
+#define FULL_UNIVERSAL 0
+
+using namespace llvm;
+STATISTIC(NumIters , "Number of iterations to reach convergence");
+STATISTIC(NumConstraints, "Number of constraints");
+STATISTIC(NumNodes , "Number of nodes");
+STATISTIC(NumUnified , "Number of variables unified");
+STATISTIC(NumErased , "Number of redundant constraints erased");
+
+static const unsigned SelfRep = (unsigned)-1;
+static const unsigned Unvisited = (unsigned)-1;
+// Position of the function return node relative to the function node.
+static const unsigned CallReturnPos = 1;
+// Position of the function call node relative to the function node.
+static const unsigned CallFirstArgPos = 2;
+
+namespace {
+ struct BitmapKeyInfo {
+ static inline SparseBitVector<> *getEmptyKey() {
+ return reinterpret_cast<SparseBitVector<> *>(-1);
+ }
+ static inline SparseBitVector<> *getTombstoneKey() {
+ return reinterpret_cast<SparseBitVector<> *>(-2);
+ }
+ static unsigned getHashValue(const SparseBitVector<> *bitmap) {
+ return bitmap->getHashValue();
+ }
+ static bool isEqual(const SparseBitVector<> *LHS,
+ const SparseBitVector<> *RHS) {
+ if (LHS == RHS)
+ return true;
+ else if (LHS == getEmptyKey() || RHS == getEmptyKey()
+ || LHS == getTombstoneKey() || RHS == getTombstoneKey())
+ return false;
+
+ return *LHS == *RHS;
+ }
+
+ static bool isPod() { return true; }
+ };
+
+ class VISIBILITY_HIDDEN Andersens : public ModulePass, public AliasAnalysis,
+ private InstVisitor<Andersens> {
+ struct Node;
+
+ /// Constraint - Objects of this structure are used to represent the various
+ /// constraints identified by the algorithm. The constraints are 'copy',
+ /// for statements like "A = B", 'load' for statements like "A = *B",
+ /// 'store' for statements like "*A = B", and AddressOf for statements like
+ /// A = alloca; The Offset is applied as *(A + K) = B for stores,
+ /// A = *(B + K) for loads, and A = B + K for copies. It is
+ /// illegal on addressof constraints (because it is statically
+ /// resolvable to A = &C where C = B + K)
+
+ struct Constraint {
+ enum ConstraintType { Copy, Load, Store, AddressOf } Type;
+ unsigned Dest;
+ unsigned Src;
+ unsigned Offset;
+
+ Constraint(ConstraintType Ty, unsigned D, unsigned S, unsigned O = 0)
+ : Type(Ty), Dest(D), Src(S), Offset(O) {
+ assert((Offset == 0 || Ty != AddressOf) &&
+ "Offset is illegal on addressof constraints");
+ }
+
+ bool operator==(const Constraint &RHS) const {
+ return RHS.Type == Type
+ && RHS.Dest == Dest
+ && RHS.Src == Src
+ && RHS.Offset == Offset;
+ }
+
+ bool operator!=(const Constraint &RHS) const {
+ return !(*this == RHS);
+ }
+
+ bool operator<(const Constraint &RHS) const {
+ if (RHS.Type != Type)
+ return RHS.Type < Type;
+ else if (RHS.Dest != Dest)
+ return RHS.Dest < Dest;
+ else if (RHS.Src != Src)
+ return RHS.Src < Src;
+ return RHS.Offset < Offset;
+ }
+ };
+
+ // Information DenseSet requires implemented in order to be able to do
+ // it's thing
+ struct PairKeyInfo {
+ static inline std::pair<unsigned, unsigned> getEmptyKey() {
+ return std::make_pair(~0U, ~0U);
+ }
+ static inline std::pair<unsigned, unsigned> getTombstoneKey() {
+ return std::make_pair(~0U - 1, ~0U - 1);
+ }
+ static unsigned getHashValue(const std::pair<unsigned, unsigned> &P) {
+ return P.first ^ P.second;
+ }
+ static unsigned isEqual(const std::pair<unsigned, unsigned> &LHS,
+ const std::pair<unsigned, unsigned> &RHS) {
+ return LHS == RHS;
+ }
+ };
+
+ struct ConstraintKeyInfo {
+ static inline Constraint getEmptyKey() {
+ return Constraint(Constraint::Copy, ~0U, ~0U, ~0U);
+ }
+ static inline Constraint getTombstoneKey() {
+ return Constraint(Constraint::Copy, ~0U - 1, ~0U - 1, ~0U - 1);
+ }
+ static unsigned getHashValue(const Constraint &C) {
+ return C.Src ^ C.Dest ^ C.Type ^ C.Offset;
+ }
+ static bool isEqual(const Constraint &LHS,
+ const Constraint &RHS) {
+ return LHS.Type == RHS.Type && LHS.Dest == RHS.Dest
+ && LHS.Src == RHS.Src && LHS.Offset == RHS.Offset;
+ }
+ };
+
+ // Node class - This class is used to represent a node in the constraint
+ // graph. Due to various optimizations, it is not always the case that
+ // there is a mapping from a Node to a Value. In particular, we add
+ // artificial Node's that represent the set of pointed-to variables shared
+ // for each location equivalent Node.
+ struct Node {
+ private:
+ static unsigned Counter;
+
+ public:
+ Value *Val;
+ SparseBitVector<> *Edges;
+ SparseBitVector<> *PointsTo;
+ SparseBitVector<> *OldPointsTo;
+ std::list<Constraint> Constraints;
+
+ // Pointer and location equivalence labels
+ unsigned PointerEquivLabel;
+ unsigned LocationEquivLabel;
+ // Predecessor edges, both real and implicit
+ SparseBitVector<> *PredEdges;
+ SparseBitVector<> *ImplicitPredEdges;
+ // Set of nodes that point to us, only use for location equivalence.
+ SparseBitVector<> *PointedToBy;
+ // Number of incoming edges, used during variable substitution to early
+ // free the points-to sets
+ unsigned NumInEdges;
+ // True if our points-to set is in the Set2PEClass map
+ bool StoredInHash;
+ // True if our node has no indirect constraints (complex or otherwise)
+ bool Direct;
+ // True if the node is address taken, *or* it is part of a group of nodes
+ // that must be kept together. This is set to true for functions and
+ // their arg nodes, which must be kept at the same position relative to
+ // their base function node.
+ bool AddressTaken;
+
+ // Nodes in cycles (or in equivalence classes) are united together using a
+ // standard union-find representation with path compression. NodeRep
+ // gives the index into GraphNodes for the representative Node.
+ unsigned NodeRep;
+
+ // Modification timestamp. Assigned from Counter.
+ // Used for work list prioritization.
+ unsigned Timestamp;
+
+ explicit Node(bool direct = true) :
+ Val(0), Edges(0), PointsTo(0), OldPointsTo(0),
+ PointerEquivLabel(0), LocationEquivLabel(0), PredEdges(0),
+ ImplicitPredEdges(0), PointedToBy(0), NumInEdges(0),
+ StoredInHash(false), Direct(direct), AddressTaken(false),
+ NodeRep(SelfRep), Timestamp(0) { }
+
+ Node *setValue(Value *V) {
+ assert(Val == 0 && "Value already set for this node!");
+ Val = V;
+ return this;
+ }
+
+ /// getValue - Return the LLVM value corresponding to this node.
+ ///
+ Value *getValue() const { return Val; }
+
+ /// addPointerTo - Add a pointer to the list of pointees of this node,
+ /// returning true if this caused a new pointer to be added, or false if
+ /// we already knew about the points-to relation.
+ bool addPointerTo(unsigned Node) {
+ return PointsTo->test_and_set(Node);
+ }
+
+ /// intersects - Return true if the points-to set of this node intersects
+ /// with the points-to set of the specified node.
+ bool intersects(Node *N) const;
+
+ /// intersectsIgnoring - Return true if the points-to set of this node
+ /// intersects with the points-to set of the specified node on any nodes
+ /// except for the specified node to ignore.
+ bool intersectsIgnoring(Node *N, unsigned) const;
+
+ // Timestamp a node (used for work list prioritization)
+ void Stamp() {
+ Timestamp = Counter++;
+ }
+
+ bool isRep() const {
+ return( (int) NodeRep < 0 );
+ }
+ };
+
+ struct WorkListElement {
+ Node* node;
+ unsigned Timestamp;
+ WorkListElement(Node* n, unsigned t) : node(n), Timestamp(t) {}
+
+ // Note that we reverse the sense of the comparison because we
+ // actually want to give low timestamps the priority over high,
+ // whereas priority is typically interpreted as a greater value is
+ // given high priority.
+ bool operator<(const WorkListElement& that) const {
+ return( this->Timestamp > that.Timestamp );
+ }
+ };
+
+ // Priority-queue based work list specialized for Nodes.
+ class WorkList {
+ std::priority_queue<WorkListElement> Q;
+
+ public:
+ void insert(Node* n) {
+ Q.push( WorkListElement(n, n->Timestamp) );
+ }
+
+ // We automatically discard non-representative nodes and nodes
+ // that were in the work list twice (we keep a copy of the
+ // timestamp in the work list so we can detect this situation by
+ // comparing against the node's current timestamp).
+ Node* pop() {
+ while( !Q.empty() ) {
+ WorkListElement x = Q.top(); Q.pop();
+ Node* INode = x.node;
+
+ if( INode->isRep() &&
+ INode->Timestamp == x.Timestamp ) {
+ return(x.node);
+ }
+ }
+ return(0);
+ }
+
+ bool empty() {
+ return Q.empty();
+ }
+ };
+
+ /// GraphNodes - This vector is populated as part of the object
+ /// identification stage of the analysis, which populates this vector with a
+ /// node for each memory object and fills in the ValueNodes map.
+ std::vector<Node> GraphNodes;
+
+ /// ValueNodes - This map indicates the Node that a particular Value* is
+ /// represented by. This contains entries for all pointers.
+ DenseMap<Value*, unsigned> ValueNodes;
+
+ /// ObjectNodes - This map contains entries for each memory object in the
+ /// program: globals, alloca's and mallocs.
+ DenseMap<Value*, unsigned> ObjectNodes;
+
+ /// ReturnNodes - This map contains an entry for each function in the
+ /// program that returns a value.
+ DenseMap<Function*, unsigned> ReturnNodes;
+
+ /// VarargNodes - This map contains the entry used to represent all pointers
+ /// passed through the varargs portion of a function call for a particular
+ /// function. An entry is not present in this map for functions that do not
+ /// take variable arguments.
+ DenseMap<Function*, unsigned> VarargNodes;
+
+
+ /// Constraints - This vector contains a list of all of the constraints
+ /// identified by the program.
+ std::vector<Constraint> Constraints;
+
+ // Map from graph node to maximum K value that is allowed (for functions,
+ // this is equivalent to the number of arguments + CallFirstArgPos)
+ std::map<unsigned, unsigned> MaxK;
+
+ /// This enum defines the GraphNodes indices that correspond to important
+ /// fixed sets.
+ enum {
+ UniversalSet = 0,
+ NullPtr = 1,
+ NullObject = 2,
+ NumberSpecialNodes
+ };
+ // Stack for Tarjan's
+ std::stack<unsigned> SCCStack;
+ // Map from Graph Node to DFS number
+ std::vector<unsigned> Node2DFS;
+ // Map from Graph Node to Deleted from graph.
+ std::vector<bool> Node2Deleted;
+ // Same as Node Maps, but implemented as std::map because it is faster to
+ // clear
+ std::map<unsigned, unsigned> Tarjan2DFS;
+ std::map<unsigned, bool> Tarjan2Deleted;
+ // Current DFS number
+ unsigned DFSNumber;
+
+ // Work lists.
+ WorkList w1, w2;
+ WorkList *CurrWL, *NextWL; // "current" and "next" work lists
+
+ // Offline variable substitution related things
+
+ // Temporary rep storage, used because we can't collapse SCC's in the
+ // predecessor graph by uniting the variables permanently, we can only do so
+ // for the successor graph.
+ std::vector<unsigned> VSSCCRep;
+ // Mapping from node to whether we have visited it during SCC finding yet.
+ std::vector<bool> Node2Visited;
+ // During variable substitution, we create unknowns to represent the unknown
+ // value that is a dereference of a variable. These nodes are known as
+ // "ref" nodes (since they represent the value of dereferences).
+ unsigned FirstRefNode;
+ // During HVN, we create represent address taken nodes as if they were
+ // unknown (since HVN, unlike HU, does not evaluate unions).
+ unsigned FirstAdrNode;
+ // Current pointer equivalence class number
+ unsigned PEClass;
+ // Mapping from points-to sets to equivalence classes
+ typedef DenseMap<SparseBitVector<> *, unsigned, BitmapKeyInfo> BitVectorMap;
+ BitVectorMap Set2PEClass;
+ // Mapping from pointer equivalences to the representative node. -1 if we
+ // have no representative node for this pointer equivalence class yet.
+ std::vector<int> PEClass2Node;
+ // Mapping from pointer equivalences to representative node. This includes
+ // pointer equivalent but not location equivalent variables. -1 if we have
+ // no representative node for this pointer equivalence class yet.
+ std::vector<int> PENLEClass2Node;
+ // Union/Find for HCD
+ std::vector<unsigned> HCDSCCRep;
+ // HCD's offline-detected cycles; "Statically DeTected"
+ // -1 if not part of such a cycle, otherwise a representative node.
+ std::vector<int> SDT;
+ // Whether to use SDT (UniteNodes can use it during solving, but not before)
+ bool SDTActive;
+
+ public:
+ static char ID;
+ Andersens() : ModulePass(&ID) {}
+
+ bool runOnModule(Module &M) {
+ InitializeAliasAnalysis(this);
+ IdentifyObjects(M);
+ CollectConstraints(M);
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "anders-aa-constraints"
+ DEBUG(PrintConstraints());
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "anders-aa"
+ SolveConstraints();
+ DEBUG(PrintPointsToGraph());
+
+ // Free the constraints list, as we don't need it to respond to alias
+ // requests.
+ std::vector<Constraint>().swap(Constraints);
+ //These are needed for Print() (-analyze in opt)
+ //ObjectNodes.clear();
+ //ReturnNodes.clear();
+ //VarargNodes.clear();
+ return false;
+ }
+
+ void releaseMemory() {
+ // FIXME: Until we have transitively required passes working correctly,
+ // this cannot be enabled! Otherwise, using -count-aa with the pass
+ // causes memory to be freed too early. :(
+#if 0
+ // The memory objects and ValueNodes data structures at the only ones that
+ // are still live after construction.
+ std::vector<Node>().swap(GraphNodes);
+ ValueNodes.clear();
+#endif
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AliasAnalysis::getAnalysisUsage(AU);
+ AU.setPreservesAll(); // Does not transform code
+ }
+
+ //------------------------------------------------
+ // Implement the AliasAnalysis API
+ //
+ AliasResult alias(const Value *V1, unsigned V1Size,
+ const Value *V2, unsigned V2Size);
+ virtual ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size);
+ virtual ModRefResult getModRefInfo(CallSite CS1, CallSite CS2);
+ void getMustAliases(Value *P, std::vector<Value*> &RetVals);
+ bool pointsToConstantMemory(const Value *P);
+
+ virtual void deleteValue(Value *V) {
+ ValueNodes.erase(V);
+ getAnalysis<AliasAnalysis>().deleteValue(V);
+ }
+
+ virtual void copyValue(Value *From, Value *To) {
+ ValueNodes[To] = ValueNodes[From];
+ getAnalysis<AliasAnalysis>().copyValue(From, To);
+ }
+
+ private:
+ /// getNode - Return the node corresponding to the specified pointer scalar.
+ ///
+ unsigned getNode(Value *V) {
+ if (Constant *C = dyn_cast<Constant>(V))
+ if (!isa<GlobalValue>(C))
+ return getNodeForConstantPointer(C);
+
+ DenseMap<Value*, unsigned>::iterator I = ValueNodes.find(V);
+ if (I == ValueNodes.end()) {
+#ifndef NDEBUG
+ V->dump();
+#endif
+ assert(0 && "Value does not have a node in the points-to graph!");
+ }
+ return I->second;
+ }
+
+ /// getObject - Return the node corresponding to the memory object for the
+ /// specified global or allocation instruction.
+ unsigned getObject(Value *V) const {
+ DenseMap<Value*, unsigned>::iterator I = ObjectNodes.find(V);
+ assert(I != ObjectNodes.end() &&
+ "Value does not have an object in the points-to graph!");
+ return I->second;
+ }
+
+ /// getReturnNode - Return the node representing the return value for the
+ /// specified function.
+ unsigned getReturnNode(Function *F) const {
+ DenseMap<Function*, unsigned>::iterator I = ReturnNodes.find(F);
+ assert(I != ReturnNodes.end() && "Function does not return a value!");
+ return I->second;
+ }
+
+ /// getVarargNode - Return the node representing the variable arguments
+ /// formal for the specified function.
+ unsigned getVarargNode(Function *F) const {
+ DenseMap<Function*, unsigned>::iterator I = VarargNodes.find(F);
+ assert(I != VarargNodes.end() && "Function does not take var args!");
+ return I->second;
+ }
+
+ /// getNodeValue - Get the node for the specified LLVM value and set the
+ /// value for it to be the specified value.
+ unsigned getNodeValue(Value &V) {
+ unsigned Index = getNode(&V);
+ GraphNodes[Index].setValue(&V);
+ return Index;
+ }
+
+ unsigned UniteNodes(unsigned First, unsigned Second,
+ bool UnionByRank = true);
+ unsigned FindNode(unsigned Node);
+ unsigned FindNode(unsigned Node) const;
+
+ void IdentifyObjects(Module &M);
+ void CollectConstraints(Module &M);
+ bool AnalyzeUsesOfFunction(Value *);
+ void CreateConstraintGraph();
+ void OptimizeConstraints();
+ unsigned FindEquivalentNode(unsigned, unsigned);
+ void ClumpAddressTaken();
+ void RewriteConstraints();
+ void HU();
+ void HVN();
+ void HCD();
+ void Search(unsigned Node);
+ void UnitePointerEquivalences();
+ void SolveConstraints();
+ bool QueryNode(unsigned Node);
+ void Condense(unsigned Node);
+ void HUValNum(unsigned Node);
+ void HVNValNum(unsigned Node);
+ unsigned getNodeForConstantPointer(Constant *C);
+ unsigned getNodeForConstantPointerTarget(Constant *C);
+ void AddGlobalInitializerConstraints(unsigned, Constant *C);
+
+ void AddConstraintsForNonInternalLinkage(Function *F);
+ void AddConstraintsForCall(CallSite CS, Function *F);
+ bool AddConstraintsForExternalCall(CallSite CS, Function *F);
+
+
+ void PrintNode(const Node *N) const;
+ void PrintConstraints() const ;
+ void PrintConstraint(const Constraint &) const;
+ void PrintLabels() const;
+ void PrintPointsToGraph() const;
+
+ //===------------------------------------------------------------------===//
+ // Instruction visitation methods for adding constraints
+ //
+ friend class InstVisitor<Andersens>;
+ void visitReturnInst(ReturnInst &RI);
+ void visitInvokeInst(InvokeInst &II) { visitCallSite(CallSite(&II)); }
+ void visitCallInst(CallInst &CI) { visitCallSite(CallSite(&CI)); }
+ void visitCallSite(CallSite CS);
+ void visitAllocationInst(AllocationInst &AI);
+ void visitLoadInst(LoadInst &LI);
+ void visitStoreInst(StoreInst &SI);
+ void visitGetElementPtrInst(GetElementPtrInst &GEP);
+ void visitPHINode(PHINode &PN);
+ void visitCastInst(CastInst &CI);
+ void visitICmpInst(ICmpInst &ICI) {} // NOOP!
+ void visitFCmpInst(FCmpInst &ICI) {} // NOOP!
+ void visitSelectInst(SelectInst &SI);
+ void visitVAArg(VAArgInst &I);
+ void visitInstruction(Instruction &I);
+
+ //===------------------------------------------------------------------===//
+ // Implement Analyize interface
+ //
+ void print(std::ostream &O, const Module* M) const {
+ PrintPointsToGraph();
+ }
+ };
+}
+
+char Andersens::ID = 0;
+static RegisterPass<Andersens>
+X("anders-aa", "Andersen's Interprocedural Alias Analysis", false, true);
+static RegisterAnalysisGroup<AliasAnalysis> Y(X);
+
+// Initialize Timestamp Counter (static).
+unsigned Andersens::Node::Counter = 0;
+
+ModulePass *llvm::createAndersensPass() { return new Andersens(); }
+
+//===----------------------------------------------------------------------===//
+// AliasAnalysis Interface Implementation
+//===----------------------------------------------------------------------===//
+
+AliasAnalysis::AliasResult Andersens::alias(const Value *V1, unsigned V1Size,
+ const Value *V2, unsigned V2Size) {
+ Node *N1 = &GraphNodes[FindNode(getNode(const_cast<Value*>(V1)))];
+ Node *N2 = &GraphNodes[FindNode(getNode(const_cast<Value*>(V2)))];
+
+ // Check to see if the two pointers are known to not alias. They don't alias
+ // if their points-to sets do not intersect.
+ if (!N1->intersectsIgnoring(N2, NullObject))
+ return NoAlias;
+
+ return AliasAnalysis::alias(V1, V1Size, V2, V2Size);
+}
+
+AliasAnalysis::ModRefResult
+Andersens::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
+ // The only thing useful that we can contribute for mod/ref information is
+ // when calling external function calls: if we know that memory never escapes
+ // from the program, it cannot be modified by an external call.
+ //
+ // NOTE: This is not really safe, at least not when the entire program is not
+ // available. The deal is that the external function could call back into the
+ // program and modify stuff. We ignore this technical niggle for now. This
+ // is, after all, a "research quality" implementation of Andersen's analysis.
+ if (Function *F = CS.getCalledFunction())
+ if (F->isDeclaration()) {
+ Node *N1 = &GraphNodes[FindNode(getNode(P))];
+
+ if (N1->PointsTo->empty())
+ return NoModRef;
+#if FULL_UNIVERSAL
+ if (!UniversalSet->PointsTo->test(FindNode(getNode(P))))
+ return NoModRef; // Universal set does not contain P
+#else
+ if (!N1->PointsTo->test(UniversalSet))
+ return NoModRef; // P doesn't point to the universal set.
+#endif
+ }
+
+ return AliasAnalysis::getModRefInfo(CS, P, Size);
+}
+
+AliasAnalysis::ModRefResult
+Andersens::getModRefInfo(CallSite CS1, CallSite CS2) {
+ return AliasAnalysis::getModRefInfo(CS1,CS2);
+}
+
+/// getMustAlias - We can provide must alias information if we know that a
+/// pointer can only point to a specific function or the null pointer.
+/// Unfortunately we cannot determine must-alias information for global
+/// variables or any other memory memory objects because we do not track whether
+/// a pointer points to the beginning of an object or a field of it.
+void Andersens::getMustAliases(Value *P, std::vector<Value*> &RetVals) {
+ Node *N = &GraphNodes[FindNode(getNode(P))];
+ if (N->PointsTo->count() == 1) {
+ Node *Pointee = &GraphNodes[N->PointsTo->find_first()];
+ // If a function is the only object in the points-to set, then it must be
+ // the destination. Note that we can't handle global variables here,
+ // because we don't know if the pointer is actually pointing to a field of
+ // the global or to the beginning of it.
+ if (Value *V = Pointee->getValue()) {
+ if (Function *F = dyn_cast<Function>(V))
+ RetVals.push_back(F);
+ } else {
+ // If the object in the points-to set is the null object, then the null
+ // pointer is a must alias.
+ if (Pointee == &GraphNodes[NullObject])
+ RetVals.push_back(Constant::getNullValue(P->getType()));
+ }
+ }
+ AliasAnalysis::getMustAliases(P, RetVals);
+}
+
+/// pointsToConstantMemory - If we can determine that this pointer only points
+/// to constant memory, return true. In practice, this means that if the
+/// pointer can only point to constant globals, functions, or the null pointer,
+/// return true.
+///
+bool Andersens::pointsToConstantMemory(const Value *P) {
+ Node *N = &GraphNodes[FindNode(getNode(const_cast<Value*>(P)))];
+ unsigned i;
+
+ for (SparseBitVector<>::iterator bi = N->PointsTo->begin();
+ bi != N->PointsTo->end();
+ ++bi) {
+ i = *bi;
+ Node *Pointee = &GraphNodes[i];
+ if (Value *V = Pointee->getValue()) {
+ if (!isa<GlobalValue>(V) || (isa<GlobalVariable>(V) &&
+ !cast<GlobalVariable>(V)->isConstant()))
+ return AliasAnalysis::pointsToConstantMemory(P);
+ } else {
+ if (i != NullObject)
+ return AliasAnalysis::pointsToConstantMemory(P);
+ }
+ }
+
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Object Identification Phase
+//===----------------------------------------------------------------------===//
+
+/// IdentifyObjects - This stage scans the program, adding an entry to the
+/// GraphNodes list for each memory object in the program (global stack or
+/// heap), and populates the ValueNodes and ObjectNodes maps for these objects.
+///
+void Andersens::IdentifyObjects(Module &M) {
+ unsigned NumObjects = 0;
+
+ // Object #0 is always the universal set: the object that we don't know
+ // anything about.
+ assert(NumObjects == UniversalSet && "Something changed!");
+ ++NumObjects;
+
+ // Object #1 always represents the null pointer.
+ assert(NumObjects == NullPtr && "Something changed!");
+ ++NumObjects;
+
+ // Object #2 always represents the null object (the object pointed to by null)
+ assert(NumObjects == NullObject && "Something changed!");
+ ++NumObjects;
+
+ // Add all the globals first.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ ObjectNodes[I] = NumObjects++;
+ ValueNodes[I] = NumObjects++;
+ }
+
+ // Add nodes for all of the functions and the instructions inside of them.
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ // The function itself is a memory object.
+ unsigned First = NumObjects;
+ ValueNodes[F] = NumObjects++;
+ if (isa<PointerType>(F->getFunctionType()->getReturnType()))
+ ReturnNodes[F] = NumObjects++;
+ if (F->getFunctionType()->isVarArg())
+ VarargNodes[F] = NumObjects++;
+
+
+ // Add nodes for all of the incoming pointer arguments.
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I)
+ {
+ if (isa<PointerType>(I->getType()))
+ ValueNodes[I] = NumObjects++;
+ }
+ MaxK[First] = NumObjects - First;
+
+ // Scan the function body, creating a memory object for each heap/stack
+ // allocation in the body of the function and a node to represent all
+ // pointer values defined by instructions and used as operands.
+ for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
+ // If this is an heap or stack allocation, create a node for the memory
+ // object.
+ if (isa<PointerType>(II->getType())) {
+ ValueNodes[&*II] = NumObjects++;
+ if (AllocationInst *AI = dyn_cast<AllocationInst>(&*II))
+ ObjectNodes[AI] = NumObjects++;
+ }
+
+ // Calls to inline asm need to be added as well because the callee isn't
+ // referenced anywhere else.
+ if (CallInst *CI = dyn_cast<CallInst>(&*II)) {
+ Value *Callee = CI->getCalledValue();
+ if (isa<InlineAsm>(Callee))
+ ValueNodes[Callee] = NumObjects++;
+ }
+ }
+ }
+
+ // Now that we know how many objects to create, make them all now!
+ GraphNodes.resize(NumObjects);
+ NumNodes += NumObjects;
+}
+
+//===----------------------------------------------------------------------===//
+// Constraint Identification Phase
+//===----------------------------------------------------------------------===//
+
+/// getNodeForConstantPointer - Return the node corresponding to the constant
+/// pointer itself.
+unsigned Andersens::getNodeForConstantPointer(Constant *C) {
+ assert(isa<PointerType>(C->getType()) && "Not a constant pointer!");
+
+ if (isa<ConstantPointerNull>(C) || isa<UndefValue>(C))
+ return NullPtr;
+ else if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return getNode(GV);
+ else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ switch (CE->getOpcode()) {
+ case Instruction::GetElementPtr:
+ return getNodeForConstantPointer(CE->getOperand(0));
+ case Instruction::IntToPtr:
+ return UniversalSet;
+ case Instruction::BitCast:
+ return getNodeForConstantPointer(CE->getOperand(0));
+ default:
+ cerr << "Constant Expr not yet handled: " << *CE << "\n";
+ assert(0);
+ }
+ } else {
+ assert(0 && "Unknown constant pointer!");
+ }
+ return 0;
+}
+
+/// getNodeForConstantPointerTarget - Return the node POINTED TO by the
+/// specified constant pointer.
+unsigned Andersens::getNodeForConstantPointerTarget(Constant *C) {
+ assert(isa<PointerType>(C->getType()) && "Not a constant pointer!");
+
+ if (isa<ConstantPointerNull>(C))
+ return NullObject;
+ else if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return getObject(GV);
+ else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ switch (CE->getOpcode()) {
+ case Instruction::GetElementPtr:
+ return getNodeForConstantPointerTarget(CE->getOperand(0));
+ case Instruction::IntToPtr:
+ return UniversalSet;
+ case Instruction::BitCast:
+ return getNodeForConstantPointerTarget(CE->getOperand(0));
+ default:
+ cerr << "Constant Expr not yet handled: " << *CE << "\n";
+ assert(0);
+ }
+ } else {
+ assert(0 && "Unknown constant pointer!");
+ }
+ return 0;
+}
+
+/// AddGlobalInitializerConstraints - Add inclusion constraints for the memory
+/// object N, which contains values indicated by C.
+void Andersens::AddGlobalInitializerConstraints(unsigned NodeIndex,
+ Constant *C) {
+ if (C->getType()->isSingleValueType()) {
+ if (isa<PointerType>(C->getType()))
+ Constraints.push_back(Constraint(Constraint::Copy, NodeIndex,
+ getNodeForConstantPointer(C)));
+ } else if (C->isNullValue()) {
+ Constraints.push_back(Constraint(Constraint::Copy, NodeIndex,
+ NullObject));
+ return;
+ } else if (!isa<UndefValue>(C)) {
+ // If this is an array or struct, include constraints for each element.
+ assert(isa<ConstantArray>(C) || isa<ConstantStruct>(C));
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
+ AddGlobalInitializerConstraints(NodeIndex,
+ cast<Constant>(C->getOperand(i)));
+ }
+}
+
+/// AddConstraintsForNonInternalLinkage - If this function does not have
+/// internal linkage, realize that we can't trust anything passed into or
+/// returned by this function.
+void Andersens::AddConstraintsForNonInternalLinkage(Function *F) {
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
+ if (isa<PointerType>(I->getType()))
+ // If this is an argument of an externally accessible function, the
+ // incoming pointer might point to anything.
+ Constraints.push_back(Constraint(Constraint::Copy, getNode(I),
+ UniversalSet));
+}
+
+/// AddConstraintsForCall - If this is a call to a "known" function, add the
+/// constraints and return true. If this is a call to an unknown function,
+/// return false.
+bool Andersens::AddConstraintsForExternalCall(CallSite CS, Function *F) {
+ assert(F->isDeclaration() && "Not an external function!");
+
+ // These functions don't induce any points-to constraints.
+ if (F->getName() == "atoi" || F->getName() == "atof" ||
+ F->getName() == "atol" || F->getName() == "atoll" ||
+ F->getName() == "remove" || F->getName() == "unlink" ||
+ F->getName() == "rename" || F->getName() == "memcmp" ||
+ F->getName() == "llvm.memset" ||
+ F->getName() == "strcmp" || F->getName() == "strncmp" ||
+ F->getName() == "execl" || F->getName() == "execlp" ||
+ F->getName() == "execle" || F->getName() == "execv" ||
+ F->getName() == "execvp" || F->getName() == "chmod" ||
+ F->getName() == "puts" || F->getName() == "write" ||
+ F->getName() == "open" || F->getName() == "create" ||
+ F->getName() == "truncate" || F->getName() == "chdir" ||
+ F->getName() == "mkdir" || F->getName() == "rmdir" ||
+ F->getName() == "read" || F->getName() == "pipe" ||
+ F->getName() == "wait" || F->getName() == "time" ||
+ F->getName() == "stat" || F->getName() == "fstat" ||
+ F->getName() == "lstat" || F->getName() == "strtod" ||
+ F->getName() == "strtof" || F->getName() == "strtold" ||
+ F->getName() == "fopen" || F->getName() == "fdopen" ||
+ F->getName() == "freopen" ||
+ F->getName() == "fflush" || F->getName() == "feof" ||
+ F->getName() == "fileno" || F->getName() == "clearerr" ||
+ F->getName() == "rewind" || F->getName() == "ftell" ||
+ F->getName() == "ferror" || F->getName() == "fgetc" ||
+ F->getName() == "fgetc" || F->getName() == "_IO_getc" ||
+ F->getName() == "fwrite" || F->getName() == "fread" ||
+ F->getName() == "fgets" || F->getName() == "ungetc" ||
+ F->getName() == "fputc" ||
+ F->getName() == "fputs" || F->getName() == "putc" ||
+ F->getName() == "ftell" || F->getName() == "rewind" ||
+ F->getName() == "_IO_putc" || F->getName() == "fseek" ||
+ F->getName() == "fgetpos" || F->getName() == "fsetpos" ||
+ F->getName() == "printf" || F->getName() == "fprintf" ||
+ F->getName() == "sprintf" || F->getName() == "vprintf" ||
+ F->getName() == "vfprintf" || F->getName() == "vsprintf" ||
+ F->getName() == "scanf" || F->getName() == "fscanf" ||
+ F->getName() == "sscanf" || F->getName() == "__assert_fail" ||
+ F->getName() == "modf")
+ return true;
+
+
+ // These functions do induce points-to edges.
+ if (F->getName() == "llvm.memcpy" ||
+ F->getName() == "llvm.memmove" ||
+ F->getName() == "memmove") {
+
+ const FunctionType *FTy = F->getFunctionType();
+ if (FTy->getNumParams() > 1 &&
+ isa<PointerType>(FTy->getParamType(0)) &&
+ isa<PointerType>(FTy->getParamType(1))) {
+
+ // *Dest = *Src, which requires an artificial graph node to represent the
+ // constraint. It is broken up into *Dest = temp, temp = *Src
+ unsigned FirstArg = getNode(CS.getArgument(0));
+ unsigned SecondArg = getNode(CS.getArgument(1));
+ unsigned TempArg = GraphNodes.size();
+ GraphNodes.push_back(Node());
+ Constraints.push_back(Constraint(Constraint::Store,
+ FirstArg, TempArg));
+ Constraints.push_back(Constraint(Constraint::Load,
+ TempArg, SecondArg));
+ // In addition, Dest = Src
+ Constraints.push_back(Constraint(Constraint::Copy,
+ FirstArg, SecondArg));
+ return true;
+ }
+ }
+
+ // Result = Arg0
+ if (F->getName() == "realloc" || F->getName() == "strchr" ||
+ F->getName() == "strrchr" || F->getName() == "strstr" ||
+ F->getName() == "strtok") {
+ const FunctionType *FTy = F->getFunctionType();
+ if (FTy->getNumParams() > 0 &&
+ isa<PointerType>(FTy->getParamType(0))) {
+ Constraints.push_back(Constraint(Constraint::Copy,
+ getNode(CS.getInstruction()),
+ getNode(CS.getArgument(0))));
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+
+/// AnalyzeUsesOfFunction - Look at all of the users of the specified function.
+/// If this is used by anything complex (i.e., the address escapes), return
+/// true.
+bool Andersens::AnalyzeUsesOfFunction(Value *V) {
+
+ if (!isa<PointerType>(V->getType())) return true;
+
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
+ if (dyn_cast<LoadInst>(*UI)) {
+ return false;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
+ if (V == SI->getOperand(1)) {
+ return false;
+ } else if (SI->getOperand(1)) {
+ return true; // Storing the pointer
+ }
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) {
+ if (AnalyzeUsesOfFunction(GEP)) return true;
+ } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
+ // Make sure that this is just the function being called, not that it is
+ // passing into the function.
+ for (unsigned i = 1, e = CI->getNumOperands(); i != e; ++i)
+ if (CI->getOperand(i) == V) return true;
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) {
+ // Make sure that this is just the function being called, not that it is
+ // passing into the function.
+ for (unsigned i = 3, e = II->getNumOperands(); i != e; ++i)
+ if (II->getOperand(i) == V) return true;
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) {
+ if (CE->getOpcode() == Instruction::GetElementPtr ||
+ CE->getOpcode() == Instruction::BitCast) {
+ if (AnalyzeUsesOfFunction(CE))
+ return true;
+ } else {
+ return true;
+ }
+ } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(*UI)) {
+ if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
+ return true; // Allow comparison against null.
+ } else if (dyn_cast<FreeInst>(*UI)) {
+ return false;
+ } else {
+ return true;
+ }
+ return false;
+}
+
+/// CollectConstraints - This stage scans the program, adding a constraint to
+/// the Constraints list for each instruction in the program that induces a
+/// constraint, and setting up the initial points-to graph.
+///
+void Andersens::CollectConstraints(Module &M) {
+ // First, the universal set points to itself.
+ Constraints.push_back(Constraint(Constraint::AddressOf, UniversalSet,
+ UniversalSet));
+ Constraints.push_back(Constraint(Constraint::Store, UniversalSet,
+ UniversalSet));
+
+ // Next, the null pointer points to the null object.
+ Constraints.push_back(Constraint(Constraint::AddressOf, NullPtr, NullObject));
+
+ // Next, add any constraints on global variables and their initializers.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ // Associate the address of the global object as pointing to the memory for
+ // the global: &G = <G memory>
+ unsigned ObjectIndex = getObject(I);
+ Node *Object = &GraphNodes[ObjectIndex];
+ Object->setValue(I);
+ Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(*I),
+ ObjectIndex));
+
+ if (I->hasInitializer()) {
+ AddGlobalInitializerConstraints(ObjectIndex, I->getInitializer());
+ } else {
+ // If it doesn't have an initializer (i.e. it's defined in another
+ // translation unit), it points to the universal set.
+ Constraints.push_back(Constraint(Constraint::Copy, ObjectIndex,
+ UniversalSet));
+ }
+ }
+
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ // Set up the return value node.
+ if (isa<PointerType>(F->getFunctionType()->getReturnType()))
+ GraphNodes[getReturnNode(F)].setValue(F);
+ if (F->getFunctionType()->isVarArg())
+ GraphNodes[getVarargNode(F)].setValue(F);
+
+ // Set up incoming argument nodes.
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I)
+ if (isa<PointerType>(I->getType()))
+ getNodeValue(*I);
+
+ // At some point we should just add constraints for the escaping functions
+ // at solve time, but this slows down solving. For now, we simply mark
+ // address taken functions as escaping and treat them as external.
+ if (!F->hasLocalLinkage() || AnalyzeUsesOfFunction(F))
+ AddConstraintsForNonInternalLinkage(F);
+
+ if (!F->isDeclaration()) {
+ // Scan the function body, creating a memory object for each heap/stack
+ // allocation in the body of the function and a node to represent all
+ // pointer values defined by instructions and used as operands.
+ visit(F);
+ } else {
+ // External functions that return pointers return the universal set.
+ if (isa<PointerType>(F->getFunctionType()->getReturnType()))
+ Constraints.push_back(Constraint(Constraint::Copy,
+ getReturnNode(F),
+ UniversalSet));
+
+ // Any pointers that are passed into the function have the universal set
+ // stored into them.
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I)
+ if (isa<PointerType>(I->getType())) {
+ // Pointers passed into external functions could have anything stored
+ // through them.
+ Constraints.push_back(Constraint(Constraint::Store, getNode(I),
+ UniversalSet));
+ // Memory objects passed into external function calls can have the
+ // universal set point to them.
+#if FULL_UNIVERSAL
+ Constraints.push_back(Constraint(Constraint::Copy,
+ UniversalSet,
+ getNode(I)));
+#else
+ Constraints.push_back(Constraint(Constraint::Copy,
+ getNode(I),
+ UniversalSet));
+#endif
+ }
+
+ // If this is an external varargs function, it can also store pointers
+ // into any pointers passed through the varargs section.
+ if (F->getFunctionType()->isVarArg())
+ Constraints.push_back(Constraint(Constraint::Store, getVarargNode(F),
+ UniversalSet));
+ }
+ }
+ NumConstraints += Constraints.size();
+}
+
+
+void Andersens::visitInstruction(Instruction &I) {
+#ifdef NDEBUG
+ return; // This function is just a big assert.
+#endif
+ if (isa<BinaryOperator>(I))
+ return;
+ // Most instructions don't have any effect on pointer values.
+ switch (I.getOpcode()) {
+ case Instruction::Br:
+ case Instruction::Switch:
+ case Instruction::Unwind:
+ case Instruction::Unreachable:
+ case Instruction::Free:
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return;
+ default:
+ // Is this something we aren't handling yet?
+ cerr << "Unknown instruction: " << I;
+ abort();
+ }
+}
+
+void Andersens::visitAllocationInst(AllocationInst &AI) {
+ unsigned ObjectIndex = getObject(&AI);
+ GraphNodes[ObjectIndex].setValue(&AI);
+ Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(AI),
+ ObjectIndex));
+}
+
+void Andersens::visitReturnInst(ReturnInst &RI) {
+ if (RI.getNumOperands() && isa<PointerType>(RI.getOperand(0)->getType()))
+ // return V --> <Copy/retval{F}/v>
+ Constraints.push_back(Constraint(Constraint::Copy,
+ getReturnNode(RI.getParent()->getParent()),
+ getNode(RI.getOperand(0))));
+}
+
+void Andersens::visitLoadInst(LoadInst &LI) {
+ if (isa<PointerType>(LI.getType()))
+ // P1 = load P2 --> <Load/P1/P2>
+ Constraints.push_back(Constraint(Constraint::Load, getNodeValue(LI),
+ getNode(LI.getOperand(0))));
+}
+
+void Andersens::visitStoreInst(StoreInst &SI) {
+ if (isa<PointerType>(SI.getOperand(0)->getType()))
+ // store P1, P2 --> <Store/P2/P1>
+ Constraints.push_back(Constraint(Constraint::Store,
+ getNode(SI.getOperand(1)),
+ getNode(SI.getOperand(0))));
+}
+
+void Andersens::visitGetElementPtrInst(GetElementPtrInst &GEP) {
+ // P1 = getelementptr P2, ... --> <Copy/P1/P2>
+ Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(GEP),
+ getNode(GEP.getOperand(0))));
+}
+
+void Andersens::visitPHINode(PHINode &PN) {
+ if (isa<PointerType>(PN.getType())) {
+ unsigned PNN = getNodeValue(PN);
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+ // P1 = phi P2, P3 --> <Copy/P1/P2>, <Copy/P1/P3>, ...
+ Constraints.push_back(Constraint(Constraint::Copy, PNN,
+ getNode(PN.getIncomingValue(i))));
+ }
+}
+
+void Andersens::visitCastInst(CastInst &CI) {
+ Value *Op = CI.getOperand(0);
+ if (isa<PointerType>(CI.getType())) {
+ if (isa<PointerType>(Op->getType())) {
+ // P1 = cast P2 --> <Copy/P1/P2>
+ Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(CI),
+ getNode(CI.getOperand(0))));
+ } else {
+ // P1 = cast int --> <Copy/P1/Univ>
+#if 0
+ Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(CI),
+ UniversalSet));
+#else
+ getNodeValue(CI);
+#endif
+ }
+ } else if (isa<PointerType>(Op->getType())) {
+ // int = cast P1 --> <Copy/Univ/P1>
+#if 0
+ Constraints.push_back(Constraint(Constraint::Copy,
+ UniversalSet,
+ getNode(CI.getOperand(0))));
+#else
+ getNode(CI.getOperand(0));
+#endif
+ }
+}
+
+void Andersens::visitSelectInst(SelectInst &SI) {
+ if (isa<PointerType>(SI.getType())) {
+ unsigned SIN = getNodeValue(SI);
+ // P1 = select C, P2, P3 ---> <Copy/P1/P2>, <Copy/P1/P3>
+ Constraints.push_back(Constraint(Constraint::Copy, SIN,
+ getNode(SI.getOperand(1))));
+ Constraints.push_back(Constraint(Constraint::Copy, SIN,
+ getNode(SI.getOperand(2))));
+ }
+}
+
+void Andersens::visitVAArg(VAArgInst &I) {
+ assert(0 && "vaarg not handled yet!");
+}
+
+/// AddConstraintsForCall - Add constraints for a call with actual arguments
+/// specified by CS to the function specified by F. Note that the types of
+/// arguments might not match up in the case where this is an indirect call and
+/// the function pointer has been casted. If this is the case, do something
+/// reasonable.
+void Andersens::AddConstraintsForCall(CallSite CS, Function *F) {
+ Value *CallValue = CS.getCalledValue();
+ bool IsDeref = F == NULL;
+
+ // If this is a call to an external function, try to handle it directly to get
+ // some taste of context sensitivity.
+ if (F && F->isDeclaration() && AddConstraintsForExternalCall(CS, F))
+ return;
+
+ if (isa<PointerType>(CS.getType())) {
+ unsigned CSN = getNode(CS.getInstruction());
+ if (!F || isa<PointerType>(F->getFunctionType()->getReturnType())) {
+ if (IsDeref)
+ Constraints.push_back(Constraint(Constraint::Load, CSN,
+ getNode(CallValue), CallReturnPos));
+ else
+ Constraints.push_back(Constraint(Constraint::Copy, CSN,
+ getNode(CallValue) + CallReturnPos));
+ } else {
+ // If the function returns a non-pointer value, handle this just like we
+ // treat a nonpointer cast to pointer.
+ Constraints.push_back(Constraint(Constraint::Copy, CSN,
+ UniversalSet));
+ }
+ } else if (F && isa<PointerType>(F->getFunctionType()->getReturnType())) {
+#if FULL_UNIVERSAL
+ Constraints.push_back(Constraint(Constraint::Copy,
+ UniversalSet,
+ getNode(CallValue) + CallReturnPos));
+#else
+ Constraints.push_back(Constraint(Constraint::Copy,
+ getNode(CallValue) + CallReturnPos,
+ UniversalSet));
+#endif
+
+
+ }
+
+ CallSite::arg_iterator ArgI = CS.arg_begin(), ArgE = CS.arg_end();
+ bool external = !F || F->isDeclaration();
+ if (F) {
+ // Direct Call
+ Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ for (; AI != AE && ArgI != ArgE; ++AI, ++ArgI)
+ {
+#if !FULL_UNIVERSAL
+ if (external && isa<PointerType>((*ArgI)->getType()))
+ {
+ // Add constraint that ArgI can now point to anything due to
+ // escaping, as can everything it points to. The second portion of
+ // this should be taken care of by universal = *universal
+ Constraints.push_back(Constraint(Constraint::Copy,
+ getNode(*ArgI),
+ UniversalSet));
+ }
+#endif
+ if (isa<PointerType>(AI->getType())) {
+ if (isa<PointerType>((*ArgI)->getType())) {
+ // Copy the actual argument into the formal argument.
+ Constraints.push_back(Constraint(Constraint::Copy, getNode(AI),
+ getNode(*ArgI)));
+ } else {
+ Constraints.push_back(Constraint(Constraint::Copy, getNode(AI),
+ UniversalSet));
+ }
+ } else if (isa<PointerType>((*ArgI)->getType())) {
+#if FULL_UNIVERSAL
+ Constraints.push_back(Constraint(Constraint::Copy,
+ UniversalSet,
+ getNode(*ArgI)));
+#else
+ Constraints.push_back(Constraint(Constraint::Copy,
+ getNode(*ArgI),
+ UniversalSet));
+#endif
+ }
+ }
+ } else {
+ //Indirect Call
+ unsigned ArgPos = CallFirstArgPos;
+ for (; ArgI != ArgE; ++ArgI) {
+ if (isa<PointerType>((*ArgI)->getType())) {
+ // Copy the actual argument into the formal argument.
+ Constraints.push_back(Constraint(Constraint::Store,
+ getNode(CallValue),
+ getNode(*ArgI), ArgPos++));
+ } else {
+ Constraints.push_back(Constraint(Constraint::Store,
+ getNode (CallValue),
+ UniversalSet, ArgPos++));
+ }
+ }
+ }
+ // Copy all pointers passed through the varargs section to the varargs node.
+ if (F && F->getFunctionType()->isVarArg())
+ for (; ArgI != ArgE; ++ArgI)
+ if (isa<PointerType>((*ArgI)->getType()))
+ Constraints.push_back(Constraint(Constraint::Copy, getVarargNode(F),
+ getNode(*ArgI)));
+ // If more arguments are passed in than we track, just drop them on the floor.
+}
+
+void Andersens::visitCallSite(CallSite CS) {
+ if (isa<PointerType>(CS.getType()))
+ getNodeValue(*CS.getInstruction());
+
+ if (Function *F = CS.getCalledFunction()) {
+ AddConstraintsForCall(CS, F);
+ } else {
+ AddConstraintsForCall(CS, NULL);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Constraint Solving Phase
+//===----------------------------------------------------------------------===//
+
+/// intersects - Return true if the points-to set of this node intersects
+/// with the points-to set of the specified node.
+bool Andersens::Node::intersects(Node *N) const {
+ return PointsTo->intersects(N->PointsTo);
+}
+
+/// intersectsIgnoring - Return true if the points-to set of this node
+/// intersects with the points-to set of the specified node on any nodes
+/// except for the specified node to ignore.
+bool Andersens::Node::intersectsIgnoring(Node *N, unsigned Ignoring) const {
+ // TODO: If we are only going to call this with the same value for Ignoring,
+ // we should move the special values out of the points-to bitmap.
+ bool WeHadIt = PointsTo->test(Ignoring);
+ bool NHadIt = N->PointsTo->test(Ignoring);
+ bool Result = false;
+ if (WeHadIt)
+ PointsTo->reset(Ignoring);
+ if (NHadIt)
+ N->PointsTo->reset(Ignoring);
+ Result = PointsTo->intersects(N->PointsTo);
+ if (WeHadIt)
+ PointsTo->set(Ignoring);
+ if (NHadIt)
+ N->PointsTo->set(Ignoring);
+ return Result;
+}
+
+void dumpToDOUT(SparseBitVector<> *bitmap) {
+#ifndef NDEBUG
+ dump(*bitmap, DOUT);
+#endif
+}
+
+
+/// Clump together address taken variables so that the points-to sets use up
+/// less space and can be operated on faster.
+
+void Andersens::ClumpAddressTaken() {
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "anders-aa-renumber"
+ std::vector<unsigned> Translate;
+ std::vector<Node> NewGraphNodes;
+
+ Translate.resize(GraphNodes.size());
+ unsigned NewPos = 0;
+
+ for (unsigned i = 0; i < Constraints.size(); ++i) {
+ Constraint &C = Constraints[i];
+ if (C.Type == Constraint::AddressOf) {
+ GraphNodes[C.Src].AddressTaken = true;
+ }
+ }
+ for (unsigned i = 0; i < NumberSpecialNodes; ++i) {
+ unsigned Pos = NewPos++;
+ Translate[i] = Pos;
+ NewGraphNodes.push_back(GraphNodes[i]);
+ DOUT << "Renumbering node " << i << " to node " << Pos << "\n";
+ }
+
+ // I believe this ends up being faster than making two vectors and splicing
+ // them.
+ for (unsigned i = NumberSpecialNodes; i < GraphNodes.size(); ++i) {
+ if (GraphNodes[i].AddressTaken) {
+ unsigned Pos = NewPos++;
+ Translate[i] = Pos;
+ NewGraphNodes.push_back(GraphNodes[i]);
+ DOUT << "Renumbering node " << i << " to node " << Pos << "\n";
+ }
+ }
+
+ for (unsigned i = NumberSpecialNodes; i < GraphNodes.size(); ++i) {
+ if (!GraphNodes[i].AddressTaken) {
+ unsigned Pos = NewPos++;
+ Translate[i] = Pos;
+ NewGraphNodes.push_back(GraphNodes[i]);
+ DOUT << "Renumbering node " << i << " to node " << Pos << "\n";
+ }
+ }
+
+ for (DenseMap<Value*, unsigned>::iterator Iter = ValueNodes.begin();
+ Iter != ValueNodes.end();
+ ++Iter)
+ Iter->second = Translate[Iter->second];
+
+ for (DenseMap<Value*, unsigned>::iterator Iter = ObjectNodes.begin();
+ Iter != ObjectNodes.end();
+ ++Iter)
+ Iter->second = Translate[Iter->second];
+
+ for (DenseMap<Function*, unsigned>::iterator Iter = ReturnNodes.begin();
+ Iter != ReturnNodes.end();
+ ++Iter)
+ Iter->second = Translate[Iter->second];
+
+ for (DenseMap<Function*, unsigned>::iterator Iter = VarargNodes.begin();
+ Iter != VarargNodes.end();
+ ++Iter)
+ Iter->second = Translate[Iter->second];
+
+ for (unsigned i = 0; i < Constraints.size(); ++i) {
+ Constraint &C = Constraints[i];
+ C.Src = Translate[C.Src];
+ C.Dest = Translate[C.Dest];
+ }
+
+ GraphNodes.swap(NewGraphNodes);
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "anders-aa"
+}
+
+/// The technique used here is described in "Exploiting Pointer and Location
+/// Equivalence to Optimize Pointer Analysis. In the 14th International Static
+/// Analysis Symposium (SAS), August 2007." It is known as the "HVN" algorithm,
+/// and is equivalent to value numbering the collapsed constraint graph without
+/// evaluating unions. This is used as a pre-pass to HU in order to resolve
+/// first order pointer dereferences and speed up/reduce memory usage of HU.
+/// Running both is equivalent to HRU without the iteration
+/// HVN in more detail:
+/// Imagine the set of constraints was simply straight line code with no loops
+/// (we eliminate cycles, so there are no loops), such as:
+/// E = &D
+/// E = &C
+/// E = F
+/// F = G
+/// G = F
+/// Applying value numbering to this code tells us:
+/// G == F == E
+///
+/// For HVN, this is as far as it goes. We assign new value numbers to every
+/// "address node", and every "reference node".
+/// To get the optimal result for this, we use a DFS + SCC (since all nodes in a
+/// cycle must have the same value number since the = operation is really
+/// inclusion, not overwrite), and value number nodes we receive points-to sets
+/// before we value our own node.
+/// The advantage of HU over HVN is that HU considers the inclusion property, so
+/// that if you have
+/// E = &D
+/// E = &C
+/// E = F
+/// F = G
+/// F = &D
+/// G = F
+/// HU will determine that G == F == E. HVN will not, because it cannot prove
+/// that the points to information ends up being the same because they all
+/// receive &D from E anyway.
+
+void Andersens::HVN() {
+ DOUT << "Beginning HVN\n";
+ // Build a predecessor graph. This is like our constraint graph with the
+ // edges going in the opposite direction, and there are edges for all the
+ // constraints, instead of just copy constraints. We also build implicit
+ // edges for constraints are implied but not explicit. I.E for the constraint
+ // a = &b, we add implicit edges *a = b. This helps us capture more cycles
+ for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
+ Constraint &C = Constraints[i];
+ if (C.Type == Constraint::AddressOf) {
+ GraphNodes[C.Src].AddressTaken = true;
+ GraphNodes[C.Src].Direct = false;
+
+ // Dest = &src edge
+ unsigned AdrNode = C.Src + FirstAdrNode;
+ if (!GraphNodes[C.Dest].PredEdges)
+ GraphNodes[C.Dest].PredEdges = new SparseBitVector<>;
+ GraphNodes[C.Dest].PredEdges->set(AdrNode);
+
+ // *Dest = src edge
+ unsigned RefNode = C.Dest + FirstRefNode;
+ if (!GraphNodes[RefNode].ImplicitPredEdges)
+ GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>;
+ GraphNodes[RefNode].ImplicitPredEdges->set(C.Src);
+ } else if (C.Type == Constraint::Load) {
+ if (C.Offset == 0) {
+ // dest = *src edge
+ if (!GraphNodes[C.Dest].PredEdges)
+ GraphNodes[C.Dest].PredEdges = new SparseBitVector<>;
+ GraphNodes[C.Dest].PredEdges->set(C.Src + FirstRefNode);
+ } else {
+ GraphNodes[C.Dest].Direct = false;
+ }
+ } else if (C.Type == Constraint::Store) {
+ if (C.Offset == 0) {
+ // *dest = src edge
+ unsigned RefNode = C.Dest + FirstRefNode;
+ if (!GraphNodes[RefNode].PredEdges)
+ GraphNodes[RefNode].PredEdges = new SparseBitVector<>;
+ GraphNodes[RefNode].PredEdges->set(C.Src);
+ }
+ } else {
+ // Dest = Src edge and *Dest = *Src edge
+ if (!GraphNodes[C.Dest].PredEdges)
+ GraphNodes[C.Dest].PredEdges = new SparseBitVector<>;
+ GraphNodes[C.Dest].PredEdges->set(C.Src);
+ unsigned RefNode = C.Dest + FirstRefNode;
+ if (!GraphNodes[RefNode].ImplicitPredEdges)
+ GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>;
+ GraphNodes[RefNode].ImplicitPredEdges->set(C.Src + FirstRefNode);
+ }
+ }
+ PEClass = 1;
+ // Do SCC finding first to condense our predecessor graph
+ DFSNumber = 0;
+ Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0);
+ Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false);
+ Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false);
+
+ for (unsigned i = 0; i < FirstRefNode; ++i) {
+ unsigned Node = VSSCCRep[i];
+ if (!Node2Visited[Node])
+ HVNValNum(Node);
+ }
+ for (BitVectorMap::iterator Iter = Set2PEClass.begin();
+ Iter != Set2PEClass.end();
+ ++Iter)
+ delete Iter->first;
+ Set2PEClass.clear();
+ Node2DFS.clear();
+ Node2Deleted.clear();
+ Node2Visited.clear();
+ DOUT << "Finished HVN\n";
+
+}
+
+/// This is the workhorse of HVN value numbering. We combine SCC finding at the
+/// same time because it's easy.
+void Andersens::HVNValNum(unsigned NodeIndex) {
+ unsigned MyDFS = DFSNumber++;
+ Node *N = &GraphNodes[NodeIndex];
+ Node2Visited[NodeIndex] = true;
+ Node2DFS[NodeIndex] = MyDFS;
+
+ // First process all our explicit edges
+ if (N->PredEdges)
+ for (SparseBitVector<>::iterator Iter = N->PredEdges->begin();
+ Iter != N->PredEdges->end();
+ ++Iter) {
+ unsigned j = VSSCCRep[*Iter];
+ if (!Node2Deleted[j]) {
+ if (!Node2Visited[j])
+ HVNValNum(j);
+ if (Node2DFS[NodeIndex] > Node2DFS[j])
+ Node2DFS[NodeIndex] = Node2DFS[j];
+ }
+ }
+
+ // Now process all the implicit edges
+ if (N->ImplicitPredEdges)
+ for (SparseBitVector<>::iterator Iter = N->ImplicitPredEdges->begin();
+ Iter != N->ImplicitPredEdges->end();
+ ++Iter) {
+ unsigned j = VSSCCRep[*Iter];
+ if (!Node2Deleted[j]) {
+ if (!Node2Visited[j])
+ HVNValNum(j);
+ if (Node2DFS[NodeIndex] > Node2DFS[j])
+ Node2DFS[NodeIndex] = Node2DFS[j];
+ }
+ }
+
+ // See if we found any cycles
+ if (MyDFS == Node2DFS[NodeIndex]) {
+ while (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS) {
+ unsigned CycleNodeIndex = SCCStack.top();
+ Node *CycleNode = &GraphNodes[CycleNodeIndex];
+ VSSCCRep[CycleNodeIndex] = NodeIndex;
+ // Unify the nodes
+ N->Direct &= CycleNode->Direct;
+
+ if (CycleNode->PredEdges) {
+ if (!N->PredEdges)
+ N->PredEdges = new SparseBitVector<>;
+ *(N->PredEdges) |= CycleNode->PredEdges;
+ delete CycleNode->PredEdges;
+ CycleNode->PredEdges = NULL;
+ }
+ if (CycleNode->ImplicitPredEdges) {
+ if (!N->ImplicitPredEdges)
+ N->ImplicitPredEdges = new SparseBitVector<>;
+ *(N->ImplicitPredEdges) |= CycleNode->ImplicitPredEdges;
+ delete CycleNode->ImplicitPredEdges;
+ CycleNode->ImplicitPredEdges = NULL;
+ }
+
+ SCCStack.pop();
+ }
+
+ Node2Deleted[NodeIndex] = true;
+
+ if (!N->Direct) {
+ GraphNodes[NodeIndex].PointerEquivLabel = PEClass++;
+ return;
+ }
+
+ // Collect labels of successor nodes
+ bool AllSame = true;
+ unsigned First = ~0;
+ SparseBitVector<> *Labels = new SparseBitVector<>;
+ bool Used = false;
+
+ if (N->PredEdges)
+ for (SparseBitVector<>::iterator Iter = N->PredEdges->begin();
+ Iter != N->PredEdges->end();
+ ++Iter) {
+ unsigned j = VSSCCRep[*Iter];
+ unsigned Label = GraphNodes[j].PointerEquivLabel;
+ // Ignore labels that are equal to us or non-pointers
+ if (j == NodeIndex || Label == 0)
+ continue;
+ if (First == (unsigned)~0)
+ First = Label;
+ else if (First != Label)
+ AllSame = false;
+ Labels->set(Label);
+ }
+
+ // We either have a non-pointer, a copy of an existing node, or a new node.
+ // Assign the appropriate pointer equivalence label.
+ if (Labels->empty()) {
+ GraphNodes[NodeIndex].PointerEquivLabel = 0;
+ } else if (AllSame) {
+ GraphNodes[NodeIndex].PointerEquivLabel = First;
+ } else {
+ GraphNodes[NodeIndex].PointerEquivLabel = Set2PEClass[Labels];
+ if (GraphNodes[NodeIndex].PointerEquivLabel == 0) {
+ unsigned EquivClass = PEClass++;
+ Set2PEClass[Labels] = EquivClass;
+ GraphNodes[NodeIndex].PointerEquivLabel = EquivClass;
+ Used = true;
+ }
+ }
+ if (!Used)
+ delete Labels;
+ } else {
+ SCCStack.push(NodeIndex);
+ }
+}
+
+/// The technique used here is described in "Exploiting Pointer and Location
+/// Equivalence to Optimize Pointer Analysis. In the 14th International Static
+/// Analysis Symposium (SAS), August 2007." It is known as the "HU" algorithm,
+/// and is equivalent to value numbering the collapsed constraint graph
+/// including evaluating unions.
+void Andersens::HU() {
+ DOUT << "Beginning HU\n";
+ // Build a predecessor graph. This is like our constraint graph with the
+ // edges going in the opposite direction, and there are edges for all the
+ // constraints, instead of just copy constraints. We also build implicit
+ // edges for constraints are implied but not explicit. I.E for the constraint
+ // a = &b, we add implicit edges *a = b. This helps us capture more cycles
+ for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
+ Constraint &C = Constraints[i];
+ if (C.Type == Constraint::AddressOf) {
+ GraphNodes[C.Src].AddressTaken = true;
+ GraphNodes[C.Src].Direct = false;
+
+ GraphNodes[C.Dest].PointsTo->set(C.Src);
+ // *Dest = src edge
+ unsigned RefNode = C.Dest + FirstRefNode;
+ if (!GraphNodes[RefNode].ImplicitPredEdges)
+ GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>;
+ GraphNodes[RefNode].ImplicitPredEdges->set(C.Src);
+ GraphNodes[C.Src].PointedToBy->set(C.Dest);
+ } else if (C.Type == Constraint::Load) {
+ if (C.Offset == 0) {
+ // dest = *src edge
+ if (!GraphNodes[C.Dest].PredEdges)
+ GraphNodes[C.Dest].PredEdges = new SparseBitVector<>;
+ GraphNodes[C.Dest].PredEdges->set(C.Src + FirstRefNode);
+ } else {
+ GraphNodes[C.Dest].Direct = false;
+ }
+ } else if (C.Type == Constraint::Store) {
+ if (C.Offset == 0) {
+ // *dest = src edge
+ unsigned RefNode = C.Dest + FirstRefNode;
+ if (!GraphNodes[RefNode].PredEdges)
+ GraphNodes[RefNode].PredEdges = new SparseBitVector<>;
+ GraphNodes[RefNode].PredEdges->set(C.Src);
+ }
+ } else {
+ // Dest = Src edge and *Dest = *Src edg
+ if (!GraphNodes[C.Dest].PredEdges)
+ GraphNodes[C.Dest].PredEdges = new SparseBitVector<>;
+ GraphNodes[C.Dest].PredEdges->set(C.Src);
+ unsigned RefNode = C.Dest + FirstRefNode;
+ if (!GraphNodes[RefNode].ImplicitPredEdges)
+ GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>;
+ GraphNodes[RefNode].ImplicitPredEdges->set(C.Src + FirstRefNode);
+ }
+ }
+ PEClass = 1;
+ // Do SCC finding first to condense our predecessor graph
+ DFSNumber = 0;
+ Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0);
+ Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false);
+ Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false);
+
+ for (unsigned i = 0; i < FirstRefNode; ++i) {
+ if (FindNode(i) == i) {
+ unsigned Node = VSSCCRep[i];
+ if (!Node2Visited[Node])
+ Condense(Node);
+ }
+ }
+
+ // Reset tables for actual labeling
+ Node2DFS.clear();
+ Node2Visited.clear();
+ Node2Deleted.clear();
+ // Pre-grow our densemap so that we don't get really bad behavior
+ Set2PEClass.resize(GraphNodes.size());
+
+ // Visit the condensed graph and generate pointer equivalence labels.
+ Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false);
+ for (unsigned i = 0; i < FirstRefNode; ++i) {
+ if (FindNode(i) == i) {
+ unsigned Node = VSSCCRep[i];
+ if (!Node2Visited[Node])
+ HUValNum(Node);
+ }
+ }
+ // PEClass nodes will be deleted by the deleting of N->PointsTo in our caller.
+ Set2PEClass.clear();
+ DOUT << "Finished HU\n";
+}
+
+
+/// Implementation of standard Tarjan SCC algorithm as modified by Nuutilla.
+void Andersens::Condense(unsigned NodeIndex) {
+ unsigned MyDFS = DFSNumber++;
+ Node *N = &GraphNodes[NodeIndex];
+ Node2Visited[NodeIndex] = true;
+ Node2DFS[NodeIndex] = MyDFS;
+
+ // First process all our explicit edges
+ if (N->PredEdges)
+ for (SparseBitVector<>::iterator Iter = N->PredEdges->begin();
+ Iter != N->PredEdges->end();
+ ++Iter) {
+ unsigned j = VSSCCRep[*Iter];
+ if (!Node2Deleted[j]) {
+ if (!Node2Visited[j])
+ Condense(j);
+ if (Node2DFS[NodeIndex] > Node2DFS[j])
+ Node2DFS[NodeIndex] = Node2DFS[j];
+ }
+ }
+
+ // Now process all the implicit edges
+ if (N->ImplicitPredEdges)
+ for (SparseBitVector<>::iterator Iter = N->ImplicitPredEdges->begin();
+ Iter != N->ImplicitPredEdges->end();
+ ++Iter) {
+ unsigned j = VSSCCRep[*Iter];
+ if (!Node2Deleted[j]) {
+ if (!Node2Visited[j])
+ Condense(j);
+ if (Node2DFS[NodeIndex] > Node2DFS[j])
+ Node2DFS[NodeIndex] = Node2DFS[j];
+ }
+ }
+
+ // See if we found any cycles
+ if (MyDFS == Node2DFS[NodeIndex]) {
+ while (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS) {
+ unsigned CycleNodeIndex = SCCStack.top();
+ Node *CycleNode = &GraphNodes[CycleNodeIndex];
+ VSSCCRep[CycleNodeIndex] = NodeIndex;
+ // Unify the nodes
+ N->Direct &= CycleNode->Direct;
+
+ *(N->PointsTo) |= CycleNode->PointsTo;
+ delete CycleNode->PointsTo;
+ CycleNode->PointsTo = NULL;
+ if (CycleNode->PredEdges) {
+ if (!N->PredEdges)
+ N->PredEdges = new SparseBitVector<>;
+ *(N->PredEdges) |= CycleNode->PredEdges;
+ delete CycleNode->PredEdges;
+ CycleNode->PredEdges = NULL;
+ }
+ if (CycleNode->ImplicitPredEdges) {
+ if (!N->ImplicitPredEdges)
+ N->ImplicitPredEdges = new SparseBitVector<>;
+ *(N->ImplicitPredEdges) |= CycleNode->ImplicitPredEdges;
+ delete CycleNode->ImplicitPredEdges;
+ CycleNode->ImplicitPredEdges = NULL;
+ }
+ SCCStack.pop();
+ }
+
+ Node2Deleted[NodeIndex] = true;
+
+ // Set up number of incoming edges for other nodes
+ if (N->PredEdges)
+ for (SparseBitVector<>::iterator Iter = N->PredEdges->begin();
+ Iter != N->PredEdges->end();
+ ++Iter)
+ ++GraphNodes[VSSCCRep[*Iter]].NumInEdges;
+ } else {
+ SCCStack.push(NodeIndex);
+ }
+}
+
+void Andersens::HUValNum(unsigned NodeIndex) {
+ Node *N = &GraphNodes[NodeIndex];
+ Node2Visited[NodeIndex] = true;
+
+ // Eliminate dereferences of non-pointers for those non-pointers we have
+ // already identified. These are ref nodes whose non-ref node:
+ // 1. Has already been visited determined to point to nothing (and thus, a
+ // dereference of it must point to nothing)
+ // 2. Any direct node with no predecessor edges in our graph and with no
+ // points-to set (since it can't point to anything either, being that it
+ // receives no points-to sets and has none).
+ if (NodeIndex >= FirstRefNode) {
+ unsigned j = VSSCCRep[FindNode(NodeIndex - FirstRefNode)];
+ if ((Node2Visited[j] && !GraphNodes[j].PointerEquivLabel)
+ || (GraphNodes[j].Direct && !GraphNodes[j].PredEdges
+ && GraphNodes[j].PointsTo->empty())){
+ return;
+ }
+ }
+ // Process all our explicit edges
+ if (N->PredEdges)
+ for (SparseBitVector<>::iterator Iter = N->PredEdges->begin();
+ Iter != N->PredEdges->end();
+ ++Iter) {
+ unsigned j = VSSCCRep[*Iter];
+ if (!Node2Visited[j])
+ HUValNum(j);
+
+ // If this edge turned out to be the same as us, or got no pointer
+ // equivalence label (and thus points to nothing) , just decrement our
+ // incoming edges and continue.
+ if (j == NodeIndex || GraphNodes[j].PointerEquivLabel == 0) {
+ --GraphNodes[j].NumInEdges;
+ continue;
+ }
+
+ *(N->PointsTo) |= GraphNodes[j].PointsTo;
+
+ // If we didn't end up storing this in the hash, and we're done with all
+ // the edges, we don't need the points-to set anymore.
+ --GraphNodes[j].NumInEdges;
+ if (!GraphNodes[j].NumInEdges && !GraphNodes[j].StoredInHash) {
+ delete GraphNodes[j].PointsTo;
+ GraphNodes[j].PointsTo = NULL;
+ }
+ }
+ // If this isn't a direct node, generate a fresh variable.
+ if (!N->Direct) {
+ N->PointsTo->set(FirstRefNode + NodeIndex);
+ }
+
+ // See If we have something equivalent to us, if not, generate a new
+ // equivalence class.
+ if (N->PointsTo->empty()) {
+ delete N->PointsTo;
+ N->PointsTo = NULL;
+ } else {
+ if (N->Direct) {
+ N->PointerEquivLabel = Set2PEClass[N->PointsTo];
+ if (N->PointerEquivLabel == 0) {
+ unsigned EquivClass = PEClass++;
+ N->StoredInHash = true;
+ Set2PEClass[N->PointsTo] = EquivClass;
+ N->PointerEquivLabel = EquivClass;
+ }
+ } else {
+ N->PointerEquivLabel = PEClass++;
+ }
+ }
+}
+
+/// Rewrite our list of constraints so that pointer equivalent nodes are
+/// replaced by their the pointer equivalence class representative.
+void Andersens::RewriteConstraints() {
+ std::vector<Constraint> NewConstraints;
+ DenseSet<Constraint, ConstraintKeyInfo> Seen;
+
+ PEClass2Node.clear();
+ PENLEClass2Node.clear();
+
+ // We may have from 1 to Graphnodes + 1 equivalence classes.
+ PEClass2Node.insert(PEClass2Node.begin(), GraphNodes.size() + 1, -1);
+ PENLEClass2Node.insert(PENLEClass2Node.begin(), GraphNodes.size() + 1, -1);
+
+ // Rewrite constraints, ignoring non-pointer constraints, uniting equivalent
+ // nodes, and rewriting constraints to use the representative nodes.
+ for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
+ Constraint &C = Constraints[i];
+ unsigned RHSNode = FindNode(C.Src);
+ unsigned LHSNode = FindNode(C.Dest);
+ unsigned RHSLabel = GraphNodes[VSSCCRep[RHSNode]].PointerEquivLabel;
+ unsigned LHSLabel = GraphNodes[VSSCCRep[LHSNode]].PointerEquivLabel;
+
+ // First we try to eliminate constraints for things we can prove don't point
+ // to anything.
+ if (LHSLabel == 0) {
+ DEBUG(PrintNode(&GraphNodes[LHSNode]));
+ DOUT << " is a non-pointer, ignoring constraint.\n";
+ continue;
+ }
+ if (RHSLabel == 0) {
+ DEBUG(PrintNode(&GraphNodes[RHSNode]));
+ DOUT << " is a non-pointer, ignoring constraint.\n";
+ continue;
+ }
+ // This constraint may be useless, and it may become useless as we translate
+ // it.
+ if (C.Src == C.Dest && C.Type == Constraint::Copy)
+ continue;
+
+ C.Src = FindEquivalentNode(RHSNode, RHSLabel);
+ C.Dest = FindEquivalentNode(FindNode(LHSNode), LHSLabel);
+ if ((C.Src == C.Dest && C.Type == Constraint::Copy)
+ || Seen.count(C))
+ continue;
+
+ Seen.insert(C);
+ NewConstraints.push_back(C);
+ }
+ Constraints.swap(NewConstraints);
+ PEClass2Node.clear();
+}
+
+/// See if we have a node that is pointer equivalent to the one being asked
+/// about, and if so, unite them and return the equivalent node. Otherwise,
+/// return the original node.
+unsigned Andersens::FindEquivalentNode(unsigned NodeIndex,
+ unsigned NodeLabel) {
+ if (!GraphNodes[NodeIndex].AddressTaken) {
+ if (PEClass2Node[NodeLabel] != -1) {
+ // We found an existing node with the same pointer label, so unify them.
+ // We specifically request that Union-By-Rank not be used so that
+ // PEClass2Node[NodeLabel] U= NodeIndex and not the other way around.
+ return UniteNodes(PEClass2Node[NodeLabel], NodeIndex, false);
+ } else {
+ PEClass2Node[NodeLabel] = NodeIndex;
+ PENLEClass2Node[NodeLabel] = NodeIndex;
+ }
+ } else if (PENLEClass2Node[NodeLabel] == -1) {
+ PENLEClass2Node[NodeLabel] = NodeIndex;
+ }
+
+ return NodeIndex;
+}
+
+void Andersens::PrintLabels() const {
+ for (unsigned i = 0; i < GraphNodes.size(); ++i) {
+ if (i < FirstRefNode) {
+ PrintNode(&GraphNodes[i]);
+ } else if (i < FirstAdrNode) {
+ DOUT << "REF(";
+ PrintNode(&GraphNodes[i-FirstRefNode]);
+ DOUT <<")";
+ } else {
+ DOUT << "ADR(";
+ PrintNode(&GraphNodes[i-FirstAdrNode]);
+ DOUT <<")";
+ }
+
+ DOUT << " has pointer label " << GraphNodes[i].PointerEquivLabel
+ << " and SCC rep " << VSSCCRep[i]
+ << " and is " << (GraphNodes[i].Direct ? "Direct" : "Not direct")
+ << "\n";
+ }
+}
+
+/// The technique used here is described in "The Ant and the
+/// Grasshopper: Fast and Accurate Pointer Analysis for Millions of
+/// Lines of Code. In Programming Language Design and Implementation
+/// (PLDI), June 2007." It is known as the "HCD" (Hybrid Cycle
+/// Detection) algorithm. It is called a hybrid because it performs an
+/// offline analysis and uses its results during the solving (online)
+/// phase. This is just the offline portion; the results of this
+/// operation are stored in SDT and are later used in SolveContraints()
+/// and UniteNodes().
+void Andersens::HCD() {
+ DOUT << "Starting HCD.\n";
+ HCDSCCRep.resize(GraphNodes.size());
+
+ for (unsigned i = 0; i < GraphNodes.size(); ++i) {
+ GraphNodes[i].Edges = new SparseBitVector<>;
+ HCDSCCRep[i] = i;
+ }
+
+ for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
+ Constraint &C = Constraints[i];
+ assert (C.Src < GraphNodes.size() && C.Dest < GraphNodes.size());
+ if (C.Type == Constraint::AddressOf) {
+ continue;
+ } else if (C.Type == Constraint::Load) {
+ if( C.Offset == 0 )
+ GraphNodes[C.Dest].Edges->set(C.Src + FirstRefNode);
+ } else if (C.Type == Constraint::Store) {
+ if( C.Offset == 0 )
+ GraphNodes[C.Dest + FirstRefNode].Edges->set(C.Src);
+ } else {
+ GraphNodes[C.Dest].Edges->set(C.Src);
+ }
+ }
+
+ Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0);
+ Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false);
+ Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false);
+ SDT.insert(SDT.begin(), GraphNodes.size() / 2, -1);
+
+ DFSNumber = 0;
+ for (unsigned i = 0; i < GraphNodes.size(); ++i) {
+ unsigned Node = HCDSCCRep[i];
+ if (!Node2Deleted[Node])
+ Search(Node);
+ }
+
+ for (unsigned i = 0; i < GraphNodes.size(); ++i)
+ if (GraphNodes[i].Edges != NULL) {
+ delete GraphNodes[i].Edges;
+ GraphNodes[i].Edges = NULL;
+ }
+
+ while( !SCCStack.empty() )
+ SCCStack.pop();
+
+ Node2DFS.clear();
+ Node2Visited.clear();
+ Node2Deleted.clear();
+ HCDSCCRep.clear();
+ DOUT << "HCD complete.\n";
+}
+
+// Component of HCD:
+// Use Nuutila's variant of Tarjan's algorithm to detect
+// Strongly-Connected Components (SCCs). For non-trivial SCCs
+// containing ref nodes, insert the appropriate information in SDT.
+void Andersens::Search(unsigned Node) {
+ unsigned MyDFS = DFSNumber++;
+
+ Node2Visited[Node] = true;
+ Node2DFS[Node] = MyDFS;
+
+ for (SparseBitVector<>::iterator Iter = GraphNodes[Node].Edges->begin(),
+ End = GraphNodes[Node].Edges->end();
+ Iter != End;
+ ++Iter) {
+ unsigned J = HCDSCCRep[*Iter];
+ assert(GraphNodes[J].isRep() && "Debug check; must be representative");
+ if (!Node2Deleted[J]) {
+ if (!Node2Visited[J])
+ Search(J);
+ if (Node2DFS[Node] > Node2DFS[J])
+ Node2DFS[Node] = Node2DFS[J];
+ }
+ }
+
+ if( MyDFS != Node2DFS[Node] ) {
+ SCCStack.push(Node);
+ return;
+ }
+
+ // This node is the root of a SCC, so process it.
+ //
+ // If the SCC is "non-trivial" (not a singleton) and contains a reference
+ // node, we place this SCC into SDT. We unite the nodes in any case.
+ if (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS) {
+ SparseBitVector<> SCC;
+
+ SCC.set(Node);
+
+ bool Ref = (Node >= FirstRefNode);
+
+ Node2Deleted[Node] = true;
+
+ do {
+ unsigned P = SCCStack.top(); SCCStack.pop();
+ Ref |= (P >= FirstRefNode);
+ SCC.set(P);
+ HCDSCCRep[P] = Node;
+ } while (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS);
+
+ if (Ref) {
+ unsigned Rep = SCC.find_first();
+ assert(Rep < FirstRefNode && "The SCC didn't have a non-Ref node!");
+
+ SparseBitVector<>::iterator i = SCC.begin();
+
+ // Skip over the non-ref nodes
+ while( *i < FirstRefNode )
+ ++i;
+
+ while( i != SCC.end() )
+ SDT[ (*i++) - FirstRefNode ] = Rep;
+ }
+ }
+}
+
+
+/// Optimize the constraints by performing offline variable substitution and
+/// other optimizations.
+void Andersens::OptimizeConstraints() {
+ DOUT << "Beginning constraint optimization\n";
+
+ SDTActive = false;
+
+ // Function related nodes need to stay in the same relative position and can't
+ // be location equivalent.
+ for (std::map<unsigned, unsigned>::iterator Iter = MaxK.begin();
+ Iter != MaxK.end();
+ ++Iter) {
+ for (unsigned i = Iter->first;
+ i != Iter->first + Iter->second;
+ ++i) {
+ GraphNodes[i].AddressTaken = true;
+ GraphNodes[i].Direct = false;
+ }
+ }
+
+ ClumpAddressTaken();
+ FirstRefNode = GraphNodes.size();
+ FirstAdrNode = FirstRefNode + GraphNodes.size();
+ GraphNodes.insert(GraphNodes.end(), 2 * GraphNodes.size(),
+ Node(false));
+ VSSCCRep.resize(GraphNodes.size());
+ for (unsigned i = 0; i < GraphNodes.size(); ++i) {
+ VSSCCRep[i] = i;
+ }
+ HVN();
+ for (unsigned i = 0; i < GraphNodes.size(); ++i) {
+ Node *N = &GraphNodes[i];
+ delete N->PredEdges;
+ N->PredEdges = NULL;
+ delete N->ImplicitPredEdges;
+ N->ImplicitPredEdges = NULL;
+ }
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "anders-aa-labels"
+ DEBUG(PrintLabels());
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "anders-aa"
+ RewriteConstraints();
+ // Delete the adr nodes.
+ GraphNodes.resize(FirstRefNode * 2);
+
+ // Now perform HU
+ for (unsigned i = 0; i < GraphNodes.size(); ++i) {
+ Node *N = &GraphNodes[i];
+ if (FindNode(i) == i) {
+ N->PointsTo = new SparseBitVector<>;
+ N->PointedToBy = new SparseBitVector<>;
+ // Reset our labels
+ }
+ VSSCCRep[i] = i;
+ N->PointerEquivLabel = 0;
+ }
+ HU();
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "anders-aa-labels"
+ DEBUG(PrintLabels());
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "anders-aa"
+ RewriteConstraints();
+ for (unsigned i = 0; i < GraphNodes.size(); ++i) {
+ if (FindNode(i) == i) {
+ Node *N = &GraphNodes[i];
+ delete N->PointsTo;
+ N->PointsTo = NULL;
+ delete N->PredEdges;
+ N->PredEdges = NULL;
+ delete N->ImplicitPredEdges;
+ N->ImplicitPredEdges = NULL;
+ delete N->PointedToBy;
+ N->PointedToBy = NULL;
+ }
+ }
+
+ // perform Hybrid Cycle Detection (HCD)
+ HCD();
+ SDTActive = true;
+
+ // No longer any need for the upper half of GraphNodes (for ref nodes).
+ GraphNodes.erase(GraphNodes.begin() + FirstRefNode, GraphNodes.end());
+
+ // HCD complete.
+
+ DOUT << "Finished constraint optimization\n";
+ FirstRefNode = 0;
+ FirstAdrNode = 0;
+}
+
+/// Unite pointer but not location equivalent variables, now that the constraint
+/// graph is built.
+void Andersens::UnitePointerEquivalences() {
+ DOUT << "Uniting remaining pointer equivalences\n";
+ for (unsigned i = 0; i < GraphNodes.size(); ++i) {
+ if (GraphNodes[i].AddressTaken && GraphNodes[i].isRep()) {
+ unsigned Label = GraphNodes[i].PointerEquivLabel;
+
+ if (Label && PENLEClass2Node[Label] != -1)
+ UniteNodes(i, PENLEClass2Node[Label]);
+ }
+ }
+ DOUT << "Finished remaining pointer equivalences\n";
+ PENLEClass2Node.clear();
+}
+
+/// Create the constraint graph used for solving points-to analysis.
+///
+void Andersens::CreateConstraintGraph() {
+ for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
+ Constraint &C = Constraints[i];
+ assert (C.Src < GraphNodes.size() && C.Dest < GraphNodes.size());
+ if (C.Type == Constraint::AddressOf)
+ GraphNodes[C.Dest].PointsTo->set(C.Src);
+ else if (C.Type == Constraint::Load)
+ GraphNodes[C.Src].Constraints.push_back(C);
+ else if (C.Type == Constraint::Store)
+ GraphNodes[C.Dest].Constraints.push_back(C);
+ else if (C.Offset != 0)
+ GraphNodes[C.Src].Constraints.push_back(C);
+ else
+ GraphNodes[C.Src].Edges->set(C.Dest);
+ }
+}
+
+// Perform DFS and cycle detection.
+bool Andersens::QueryNode(unsigned Node) {
+ assert(GraphNodes[Node].isRep() && "Querying a non-rep node");
+ unsigned OurDFS = ++DFSNumber;
+ SparseBitVector<> ToErase;
+ SparseBitVector<> NewEdges;
+ Tarjan2DFS[Node] = OurDFS;
+
+ // Changed denotes a change from a recursive call that we will bubble up.
+ // Merged is set if we actually merge a node ourselves.
+ bool Changed = false, Merged = false;
+
+ for (SparseBitVector<>::iterator bi = GraphNodes[Node].Edges->begin();
+ bi != GraphNodes[Node].Edges->end();
+ ++bi) {
+ unsigned RepNode = FindNode(*bi);
+ // If this edge points to a non-representative node but we are
+ // already planning to add an edge to its representative, we have no
+ // need for this edge anymore.
+ if (RepNode != *bi && NewEdges.test(RepNode)){
+ ToErase.set(*bi);
+ continue;
+ }
+
+ // Continue about our DFS.
+ if (!Tarjan2Deleted[RepNode]){
+ if (Tarjan2DFS[RepNode] == 0) {
+ Changed |= QueryNode(RepNode);
+ // May have been changed by QueryNode
+ RepNode = FindNode(RepNode);
+ }
+ if (Tarjan2DFS[RepNode] < Tarjan2DFS[Node])
+ Tarjan2DFS[Node] = Tarjan2DFS[RepNode];
+ }
+
+ // We may have just discovered that this node is part of a cycle, in
+ // which case we can also erase it.
+ if (RepNode != *bi) {
+ ToErase.set(*bi);
+ NewEdges.set(RepNode);
+ }
+ }
+
+ GraphNodes[Node].Edges->intersectWithComplement(ToErase);
+ GraphNodes[Node].Edges |= NewEdges;
+
+ // If this node is a root of a non-trivial SCC, place it on our
+ // worklist to be processed.
+ if (OurDFS == Tarjan2DFS[Node]) {
+ while (!SCCStack.empty() && Tarjan2DFS[SCCStack.top()] >= OurDFS) {
+ Node = UniteNodes(Node, SCCStack.top());
+
+ SCCStack.pop();
+ Merged = true;
+ }
+ Tarjan2Deleted[Node] = true;
+
+ if (Merged)
+ NextWL->insert(&GraphNodes[Node]);
+ } else {
+ SCCStack.push(Node);
+ }
+
+ return(Changed | Merged);
+}
+
+/// SolveConstraints - This stage iteratively processes the constraints list
+/// propagating constraints (adding edges to the Nodes in the points-to graph)
+/// until a fixed point is reached.
+///
+/// We use a variant of the technique called "Lazy Cycle Detection", which is
+/// described in "The Ant and the Grasshopper: Fast and Accurate Pointer
+/// Analysis for Millions of Lines of Code. In Programming Language Design and
+/// Implementation (PLDI), June 2007."
+/// The paper describes performing cycle detection one node at a time, which can
+/// be expensive if there are no cycles, but there are long chains of nodes that
+/// it heuristically believes are cycles (because it will DFS from each node
+/// without state from previous nodes).
+/// Instead, we use the heuristic to build a worklist of nodes to check, then
+/// cycle detect them all at the same time to do this more cheaply. This
+/// catches cycles slightly later than the original technique did, but does it
+/// make significantly cheaper.
+
+void Andersens::SolveConstraints() {
+ CurrWL = &w1;
+ NextWL = &w2;
+
+ OptimizeConstraints();
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "anders-aa-constraints"
+ DEBUG(PrintConstraints());
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "anders-aa"
+
+ for (unsigned i = 0; i < GraphNodes.size(); ++i) {
+ Node *N = &GraphNodes[i];
+ N->PointsTo = new SparseBitVector<>;
+ N->OldPointsTo = new SparseBitVector<>;
+ N->Edges = new SparseBitVector<>;
+ }
+ CreateConstraintGraph();
+ UnitePointerEquivalences();
+ assert(SCCStack.empty() && "SCC Stack should be empty by now!");
+ Node2DFS.clear();
+ Node2Deleted.clear();
+ Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0);
+ Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false);
+ DFSNumber = 0;
+ DenseSet<Constraint, ConstraintKeyInfo> Seen;
+ DenseSet<std::pair<unsigned,unsigned>, PairKeyInfo> EdgesChecked;
+
+ // Order graph and add initial nodes to work list.
+ for (unsigned i = 0; i < GraphNodes.size(); ++i) {
+ Node *INode = &GraphNodes[i];
+
+ // Add to work list if it's a representative and can contribute to the
+ // calculation right now.
+ if (INode->isRep() && !INode->PointsTo->empty()
+ && (!INode->Edges->empty() || !INode->Constraints.empty())) {
+ INode->Stamp();
+ CurrWL->insert(INode);
+ }
+ }
+ std::queue<unsigned int> TarjanWL;
+#if !FULL_UNIVERSAL
+ // "Rep and special variables" - in order for HCD to maintain conservative
+ // results when !FULL_UNIVERSAL, we need to treat the special variables in
+ // the same way that the !FULL_UNIVERSAL tweak does throughout the rest of
+ // the analysis - it's ok to add edges from the special nodes, but never
+ // *to* the special nodes.
+ std::vector<unsigned int> RSV;
+#endif
+ while( !CurrWL->empty() ) {
+ DOUT << "Starting iteration #" << ++NumIters << "\n";
+
+ Node* CurrNode;
+ unsigned CurrNodeIndex;
+
+ // Actual cycle checking code. We cycle check all of the lazy cycle
+ // candidates from the last iteration in one go.
+ if (!TarjanWL.empty()) {
+ DFSNumber = 0;
+
+ Tarjan2DFS.clear();
+ Tarjan2Deleted.clear();
+ while (!TarjanWL.empty()) {
+ unsigned int ToTarjan = TarjanWL.front();
+ TarjanWL.pop();
+ if (!Tarjan2Deleted[ToTarjan]
+ && GraphNodes[ToTarjan].isRep()
+ && Tarjan2DFS[ToTarjan] == 0)
+ QueryNode(ToTarjan);
+ }
+ }
+
+ // Add to work list if it's a representative and can contribute to the
+ // calculation right now.
+ while( (CurrNode = CurrWL->pop()) != NULL ) {
+ CurrNodeIndex = CurrNode - &GraphNodes[0];
+ CurrNode->Stamp();
+
+
+ // Figure out the changed points to bits
+ SparseBitVector<> CurrPointsTo;
+ CurrPointsTo.intersectWithComplement(CurrNode->PointsTo,
+ CurrNode->OldPointsTo);
+ if (CurrPointsTo.empty())
+ continue;
+
+ *(CurrNode->OldPointsTo) |= CurrPointsTo;
+
+ // Check the offline-computed equivalencies from HCD.
+ bool SCC = false;
+ unsigned Rep;
+
+ if (SDT[CurrNodeIndex] >= 0) {
+ SCC = true;
+ Rep = FindNode(SDT[CurrNodeIndex]);
+
+#if !FULL_UNIVERSAL
+ RSV.clear();
+#endif
+ for (SparseBitVector<>::iterator bi = CurrPointsTo.begin();
+ bi != CurrPointsTo.end(); ++bi) {
+ unsigned Node = FindNode(*bi);
+#if !FULL_UNIVERSAL
+ if (Node < NumberSpecialNodes) {
+ RSV.push_back(Node);
+ continue;
+ }
+#endif
+ Rep = UniteNodes(Rep,Node);
+ }
+#if !FULL_UNIVERSAL
+ RSV.push_back(Rep);
+#endif
+
+ NextWL->insert(&GraphNodes[Rep]);
+
+ if ( ! CurrNode->isRep() )
+ continue;
+ }
+
+ Seen.clear();
+
+ /* Now process the constraints for this node. */
+ for (std::list<Constraint>::iterator li = CurrNode->Constraints.begin();
+ li != CurrNode->Constraints.end(); ) {
+ li->Src = FindNode(li->Src);
+ li->Dest = FindNode(li->Dest);
+
+ // Delete redundant constraints
+ if( Seen.count(*li) ) {
+ std::list<Constraint>::iterator lk = li; li++;
+
+ CurrNode->Constraints.erase(lk);
+ ++NumErased;
+ continue;
+ }
+ Seen.insert(*li);
+
+ // Src and Dest will be the vars we are going to process.
+ // This may look a bit ugly, but what it does is allow us to process
+ // both store and load constraints with the same code.
+ // Load constraints say that every member of our RHS solution has K
+ // added to it, and that variable gets an edge to LHS. We also union
+ // RHS+K's solution into the LHS solution.
+ // Store constraints say that every member of our LHS solution has K
+ // added to it, and that variable gets an edge from RHS. We also union
+ // RHS's solution into the LHS+K solution.
+ unsigned *Src;
+ unsigned *Dest;
+ unsigned K = li->Offset;
+ unsigned CurrMember;
+ if (li->Type == Constraint::Load) {
+ Src = &CurrMember;
+ Dest = &li->Dest;
+ } else if (li->Type == Constraint::Store) {
+ Src = &li->Src;
+ Dest = &CurrMember;
+ } else {
+ // TODO Handle offseted copy constraint
+ li++;
+ continue;
+ }
+
+ // See if we can use Hybrid Cycle Detection (that is, check
+ // if it was a statically detected offline equivalence that
+ // involves pointers; if so, remove the redundant constraints).
+ if( SCC && K == 0 ) {
+#if FULL_UNIVERSAL
+ CurrMember = Rep;
+
+ if (GraphNodes[*Src].Edges->test_and_set(*Dest))
+ if (GraphNodes[*Dest].PointsTo |= *(GraphNodes[*Src].PointsTo))
+ NextWL->insert(&GraphNodes[*Dest]);
+#else
+ for (unsigned i=0; i < RSV.size(); ++i) {
+ CurrMember = RSV[i];
+
+ if (*Dest < NumberSpecialNodes)
+ continue;
+ if (GraphNodes[*Src].Edges->test_and_set(*Dest))
+ if (GraphNodes[*Dest].PointsTo |= *(GraphNodes[*Src].PointsTo))
+ NextWL->insert(&GraphNodes[*Dest]);
+ }
+#endif
+ // since all future elements of the points-to set will be
+ // equivalent to the current ones, the complex constraints
+ // become redundant.
+ //
+ std::list<Constraint>::iterator lk = li; li++;
+#if !FULL_UNIVERSAL
+ // In this case, we can still erase the constraints when the
+ // elements of the points-to sets are referenced by *Dest,
+ // but not when they are referenced by *Src (i.e. for a Load
+ // constraint). This is because if another special variable is
+ // put into the points-to set later, we still need to add the
+ // new edge from that special variable.
+ if( lk->Type != Constraint::Load)
+#endif
+ GraphNodes[CurrNodeIndex].Constraints.erase(lk);
+ } else {
+ const SparseBitVector<> &Solution = CurrPointsTo;
+
+ for (SparseBitVector<>::iterator bi = Solution.begin();
+ bi != Solution.end();
+ ++bi) {
+ CurrMember = *bi;
+
+ // Need to increment the member by K since that is where we are
+ // supposed to copy to/from. Note that in positive weight cycles,
+ // which occur in address taking of fields, K can go past
+ // MaxK[CurrMember] elements, even though that is all it could point
+ // to.
+ if (K > 0 && K > MaxK[CurrMember])
+ continue;
+ else
+ CurrMember = FindNode(CurrMember + K);
+
+ // Add an edge to the graph, so we can just do regular
+ // bitmap ior next time. It may also let us notice a cycle.
+#if !FULL_UNIVERSAL
+ if (*Dest < NumberSpecialNodes)
+ continue;
+#endif
+ if (GraphNodes[*Src].Edges->test_and_set(*Dest))
+ if (GraphNodes[*Dest].PointsTo |= *(GraphNodes[*Src].PointsTo))
+ NextWL->insert(&GraphNodes[*Dest]);
+
+ }
+ li++;
+ }
+ }
+ SparseBitVector<> NewEdges;
+ SparseBitVector<> ToErase;
+
+ // Now all we have left to do is propagate points-to info along the
+ // edges, erasing the redundant edges.
+ for (SparseBitVector<>::iterator bi = CurrNode->Edges->begin();
+ bi != CurrNode->Edges->end();
+ ++bi) {
+
+ unsigned DestVar = *bi;
+ unsigned Rep = FindNode(DestVar);
+
+ // If we ended up with this node as our destination, or we've already
+ // got an edge for the representative, delete the current edge.
+ if (Rep == CurrNodeIndex ||
+ (Rep != DestVar && NewEdges.test(Rep))) {
+ ToErase.set(DestVar);
+ continue;
+ }
+
+ std::pair<unsigned,unsigned> edge(CurrNodeIndex,Rep);
+
+ // This is where we do lazy cycle detection.
+ // If this is a cycle candidate (equal points-to sets and this
+ // particular edge has not been cycle-checked previously), add to the
+ // list to check for cycles on the next iteration.
+ if (!EdgesChecked.count(edge) &&
+ *(GraphNodes[Rep].PointsTo) == *(CurrNode->PointsTo)) {
+ EdgesChecked.insert(edge);
+ TarjanWL.push(Rep);
+ }
+ // Union the points-to sets into the dest
+#if !FULL_UNIVERSAL
+ if (Rep >= NumberSpecialNodes)
+#endif
+ if (GraphNodes[Rep].PointsTo |= CurrPointsTo) {
+ NextWL->insert(&GraphNodes[Rep]);
+ }
+ // If this edge's destination was collapsed, rewrite the edge.
+ if (Rep != DestVar) {
+ ToErase.set(DestVar);
+ NewEdges.set(Rep);
+ }
+ }
+ CurrNode->Edges->intersectWithComplement(ToErase);
+ CurrNode->Edges |= NewEdges;
+ }
+
+ // Switch to other work list.
+ WorkList* t = CurrWL; CurrWL = NextWL; NextWL = t;
+ }
+
+
+ Node2DFS.clear();
+ Node2Deleted.clear();
+ for (unsigned i = 0; i < GraphNodes.size(); ++i) {
+ Node *N = &GraphNodes[i];
+ delete N->OldPointsTo;
+ delete N->Edges;
+ }
+ SDTActive = false;
+ SDT.clear();
+}
+
+//===----------------------------------------------------------------------===//
+// Union-Find
+//===----------------------------------------------------------------------===//
+
+// Unite nodes First and Second, returning the one which is now the
+// representative node. First and Second are indexes into GraphNodes
+unsigned Andersens::UniteNodes(unsigned First, unsigned Second,
+ bool UnionByRank) {
+ assert (First < GraphNodes.size() && Second < GraphNodes.size() &&
+ "Attempting to merge nodes that don't exist");
+
+ Node *FirstNode = &GraphNodes[First];
+ Node *SecondNode = &GraphNodes[Second];
+
+ assert (SecondNode->isRep() && FirstNode->isRep() &&
+ "Trying to unite two non-representative nodes!");
+ if (First == Second)
+ return First;
+
+ if (UnionByRank) {
+ int RankFirst = (int) FirstNode ->NodeRep;
+ int RankSecond = (int) SecondNode->NodeRep;
+
+ // Rank starts at -1 and gets decremented as it increases.
+ // Translation: higher rank, lower NodeRep value, which is always negative.
+ if (RankFirst > RankSecond) {
+ unsigned t = First; First = Second; Second = t;
+ Node* tp = FirstNode; FirstNode = SecondNode; SecondNode = tp;
+ } else if (RankFirst == RankSecond) {
+ FirstNode->NodeRep = (unsigned) (RankFirst - 1);
+ }
+ }
+
+ SecondNode->NodeRep = First;
+#if !FULL_UNIVERSAL
+ if (First >= NumberSpecialNodes)
+#endif
+ if (FirstNode->PointsTo && SecondNode->PointsTo)
+ FirstNode->PointsTo |= *(SecondNode->PointsTo);
+ if (FirstNode->Edges && SecondNode->Edges)
+ FirstNode->Edges |= *(SecondNode->Edges);
+ if (!SecondNode->Constraints.empty())
+ FirstNode->Constraints.splice(FirstNode->Constraints.begin(),
+ SecondNode->Constraints);
+ if (FirstNode->OldPointsTo) {
+ delete FirstNode->OldPointsTo;
+ FirstNode->OldPointsTo = new SparseBitVector<>;
+ }
+
+ // Destroy interesting parts of the merged-from node.
+ delete SecondNode->OldPointsTo;
+ delete SecondNode->Edges;
+ delete SecondNode->PointsTo;
+ SecondNode->Edges = NULL;
+ SecondNode->PointsTo = NULL;
+ SecondNode->OldPointsTo = NULL;
+
+ NumUnified++;
+ DOUT << "Unified Node ";
+ DEBUG(PrintNode(FirstNode));
+ DOUT << " and Node ";
+ DEBUG(PrintNode(SecondNode));
+ DOUT << "\n";
+
+ if (SDTActive)
+ if (SDT[Second] >= 0) {
+ if (SDT[First] < 0)
+ SDT[First] = SDT[Second];
+ else {
+ UniteNodes( FindNode(SDT[First]), FindNode(SDT[Second]) );
+ First = FindNode(First);
+ }
+ }
+
+ return First;
+}
+
+// Find the index into GraphNodes of the node representing Node, performing
+// path compression along the way
+unsigned Andersens::FindNode(unsigned NodeIndex) {
+ assert (NodeIndex < GraphNodes.size()
+ && "Attempting to find a node that can't exist");
+ Node *N = &GraphNodes[NodeIndex];
+ if (N->isRep())
+ return NodeIndex;
+ else
+ return (N->NodeRep = FindNode(N->NodeRep));
+}
+
+// Find the index into GraphNodes of the node representing Node,
+// don't perform path compression along the way (for Print)
+unsigned Andersens::FindNode(unsigned NodeIndex) const {
+ assert (NodeIndex < GraphNodes.size()
+ && "Attempting to find a node that can't exist");
+ const Node *N = &GraphNodes[NodeIndex];
+ if (N->isRep())
+ return NodeIndex;
+ else
+ return FindNode(N->NodeRep);
+}
+
+//===----------------------------------------------------------------------===//
+// Debugging Output
+//===----------------------------------------------------------------------===//
+
+void Andersens::PrintNode(const Node *N) const {
+ if (N == &GraphNodes[UniversalSet]) {
+ cerr << "<universal>";
+ return;
+ } else if (N == &GraphNodes[NullPtr]) {
+ cerr << "<nullptr>";
+ return;
+ } else if (N == &GraphNodes[NullObject]) {
+ cerr << "<null>";
+ return;
+ }
+ if (!N->getValue()) {
+ cerr << "artificial" << (intptr_t) N;
+ return;
+ }
+
+ assert(N->getValue() != 0 && "Never set node label!");
+ Value *V = N->getValue();
+ if (Function *F = dyn_cast<Function>(V)) {
+ if (isa<PointerType>(F->getFunctionType()->getReturnType()) &&
+ N == &GraphNodes[getReturnNode(F)]) {
+ cerr << F->getName() << ":retval";
+ return;
+ } else if (F->getFunctionType()->isVarArg() &&
+ N == &GraphNodes[getVarargNode(F)]) {
+ cerr << F->getName() << ":vararg";
+ return;
+ }
+ }
+
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ cerr << I->getParent()->getParent()->getName() << ":";
+ else if (Argument *Arg = dyn_cast<Argument>(V))
+ cerr << Arg->getParent()->getName() << ":";
+
+ if (V->hasName())
+ cerr << V->getName();
+ else
+ cerr << "(unnamed)";
+
+ if (isa<GlobalValue>(V) || isa<AllocationInst>(V))
+ if (N == &GraphNodes[getObject(V)])
+ cerr << "<mem>";
+}
+void Andersens::PrintConstraint(const Constraint &C) const {
+ if (C.Type == Constraint::Store) {
+ cerr << "*";
+ if (C.Offset != 0)
+ cerr << "(";
+ }
+ PrintNode(&GraphNodes[C.Dest]);
+ if (C.Type == Constraint::Store && C.Offset != 0)
+ cerr << " + " << C.Offset << ")";
+ cerr << " = ";
+ if (C.Type == Constraint::Load) {
+ cerr << "*";
+ if (C.Offset != 0)
+ cerr << "(";
+ }
+ else if (C.Type == Constraint::AddressOf)
+ cerr << "&";
+ PrintNode(&GraphNodes[C.Src]);
+ if (C.Offset != 0 && C.Type != Constraint::Store)
+ cerr << " + " << C.Offset;
+ if (C.Type == Constraint::Load && C.Offset != 0)
+ cerr << ")";
+ cerr << "\n";
+}
+
+void Andersens::PrintConstraints() const {
+ cerr << "Constraints:\n";
+
+ for (unsigned i = 0, e = Constraints.size(); i != e; ++i)
+ PrintConstraint(Constraints[i]);
+}
+
+void Andersens::PrintPointsToGraph() const {
+ cerr << "Points-to graph:\n";
+ for (unsigned i = 0, e = GraphNodes.size(); i != e; ++i) {
+ const Node *N = &GraphNodes[i];
+ if (FindNode(i) != i) {
+ PrintNode(N);
+ cerr << "\t--> same as ";
+ PrintNode(&GraphNodes[FindNode(i)]);
+ cerr << "\n";
+ } else {
+ cerr << "[" << (N->PointsTo->count()) << "] ";
+ PrintNode(N);
+ cerr << "\t--> ";
+
+ bool first = true;
+ for (SparseBitVector<>::iterator bi = N->PointsTo->begin();
+ bi != N->PointsTo->end();
+ ++bi) {
+ if (!first)
+ cerr << ", ";
+ PrintNode(&GraphNodes[*bi]);
+ first = false;
+ }
+ cerr << "\n";
+ }
+ }
+}
diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt
new file mode 100644
index 0000000..1ebb0be
--- /dev/null
+++ b/lib/Analysis/IPA/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_library(LLVMipa
+ Andersens.cpp
+ CallGraph.cpp
+ CallGraphSCCPass.cpp
+ FindUsedTypes.cpp
+ GlobalsModRef.cpp
+ )
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
new file mode 100644
index 0000000..6dabcdb
--- /dev/null
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -0,0 +1,314 @@
+//===- CallGraph.cpp - Build a Module's call graph ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CallGraph class and provides the BasicCallGraph
+// default implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Streams.h"
+#include <ostream>
+using namespace llvm;
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// BasicCallGraph class definition
+//
+class VISIBILITY_HIDDEN BasicCallGraph : public CallGraph, public ModulePass {
+ // Root is root of the call graph, or the external node if a 'main' function
+ // couldn't be found.
+ //
+ CallGraphNode *Root;
+
+ // ExternalCallingNode - This node has edges to all external functions and
+ // those internal functions that have their address taken.
+ CallGraphNode *ExternalCallingNode;
+
+ // CallsExternalNode - This node has edges to it from all functions making
+ // indirect calls or calling an external function.
+ CallGraphNode *CallsExternalNode;
+
+public:
+ static char ID; // Class identification, replacement for typeinfo
+ BasicCallGraph() : ModulePass(&ID), Root(0),
+ ExternalCallingNode(0), CallsExternalNode(0) {}
+
+ // runOnModule - Compute the call graph for the specified module.
+ virtual bool runOnModule(Module &M) {
+ CallGraph::initialize(M);
+
+ ExternalCallingNode = getOrInsertFunction(0);
+ CallsExternalNode = new CallGraphNode(0);
+ Root = 0;
+
+ // Add every function to the call graph...
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ addToCallGraph(I);
+
+ // If we didn't find a main function, use the external call graph node
+ if (Root == 0) Root = ExternalCallingNode;
+
+ return false;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ void print(std::ostream *o, const Module *M) const {
+ if (o) print(*o, M);
+ }
+
+ virtual void print(std::ostream &o, const Module *M) const {
+ o << "CallGraph Root is: ";
+ if (Function *F = getRoot()->getFunction())
+ o << F->getName() << "\n";
+ else
+ o << "<<null function: 0x" << getRoot() << ">>\n";
+
+ CallGraph::print(o, M);
+ }
+
+ virtual void releaseMemory() {
+ destroy();
+ }
+
+ /// dump - Print out this call graph.
+ ///
+ inline void dump() const {
+ print(cerr, Mod);
+ }
+
+ CallGraphNode* getExternalCallingNode() const { return ExternalCallingNode; }
+ CallGraphNode* getCallsExternalNode() const { return CallsExternalNode; }
+
+ // getRoot - Return the root of the call graph, which is either main, or if
+ // main cannot be found, the external node.
+ //
+ CallGraphNode *getRoot() { return Root; }
+ const CallGraphNode *getRoot() const { return Root; }
+
+private:
+ //===---------------------------------------------------------------------
+ // Implementation of CallGraph construction
+ //
+
+ // addToCallGraph - Add a function to the call graph, and link the node to all
+ // of the functions that it calls.
+ //
+ void addToCallGraph(Function *F) {
+ CallGraphNode *Node = getOrInsertFunction(F);
+
+ // If this function has external linkage, anything could call it.
+ if (!F->hasLocalLinkage()) {
+ ExternalCallingNode->addCalledFunction(CallSite(), Node);
+
+ // Found the entry point?
+ if (F->getName() == "main") {
+ if (Root) // Found multiple external mains? Don't pick one.
+ Root = ExternalCallingNode;
+ else
+ Root = Node; // Found a main, keep track of it!
+ }
+ }
+
+ // Loop over all of the users of the function, looking for non-call uses.
+ for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; ++I)
+ if ((!isa<CallInst>(I) && !isa<InvokeInst>(I))
+ || !CallSite(cast<Instruction>(I)).isCallee(I)) {
+ // Not a call, or being used as a parameter rather than as the callee.
+ ExternalCallingNode->addCalledFunction(CallSite(), Node);
+ break;
+ }
+
+ // If this function is not defined in this translation unit, it could call
+ // anything.
+ if (F->isDeclaration() && !F->isIntrinsic())
+ Node->addCalledFunction(CallSite(), CallsExternalNode);
+
+ // Look for calls by this function.
+ for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
+ II != IE; ++II) {
+ CallSite CS = CallSite::get(II);
+ if (CS.getInstruction() && !isa<DbgInfoIntrinsic>(II)) {
+ const Function *Callee = CS.getCalledFunction();
+ if (Callee)
+ Node->addCalledFunction(CS, getOrInsertFunction(Callee));
+ else
+ Node->addCalledFunction(CS, CallsExternalNode);
+ }
+ }
+ }
+
+ //
+ // destroy - Release memory for the call graph
+ virtual void destroy() {
+ /// CallsExternalNode is not in the function map, delete it explicitly.
+ delete CallsExternalNode;
+ CallsExternalNode = 0;
+ CallGraph::destroy();
+ }
+};
+
+} //End anonymous namespace
+
+static RegisterAnalysisGroup<CallGraph> X("Call Graph");
+static RegisterPass<BasicCallGraph>
+Y("basiccg", "Basic CallGraph Construction", false, true);
+static RegisterAnalysisGroup<CallGraph, true> Z(Y);
+
+char CallGraph::ID = 0;
+char BasicCallGraph::ID = 0;
+
+void CallGraph::initialize(Module &M) {
+ Mod = &M;
+}
+
+void CallGraph::destroy() {
+ if (!FunctionMap.empty()) {
+ for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
+ I != E; ++I)
+ delete I->second;
+ FunctionMap.clear();
+ }
+}
+
+void CallGraph::print(std::ostream &OS, const Module *M) const {
+ for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I)
+ I->second->print(OS);
+}
+
+void CallGraph::dump() const {
+ print(cerr, 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Implementations of public modification methods
+//
+
+// removeFunctionFromModule - Unlink the function from this module, returning
+// it. Because this removes the function from the module, the call graph node
+// is destroyed. This is only valid if the function does not call any other
+// functions (ie, there are no edges in it's CGN). The easiest way to do this
+// is to dropAllReferences before calling this.
+//
+Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {
+ assert(CGN->CalledFunctions.empty() && "Cannot remove function from call "
+ "graph if it references other functions!");
+ Function *F = CGN->getFunction(); // Get the function for the call graph node
+ delete CGN; // Delete the call graph node for this func
+ FunctionMap.erase(F); // Remove the call graph node from the map
+
+ Mod->getFunctionList().remove(F);
+ return F;
+}
+
+// changeFunction - This method changes the function associated with this
+// CallGraphNode, for use by transformations that need to change the prototype
+// of a Function (thus they must create a new Function and move the old code
+// over).
+void CallGraph::changeFunction(Function *OldF, Function *NewF) {
+ iterator I = FunctionMap.find(OldF);
+ CallGraphNode *&New = FunctionMap[NewF];
+ assert(I != FunctionMap.end() && I->second && !New &&
+ "OldF didn't exist in CG or NewF already does!");
+ New = I->second;
+ New->F = NewF;
+ FunctionMap.erase(I);
+}
+
+// getOrInsertFunction - This method is identical to calling operator[], but
+// it will insert a new CallGraphNode for the specified function if one does
+// not already exist.
+CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) {
+ CallGraphNode *&CGN = FunctionMap[F];
+ if (CGN) return CGN;
+
+ assert((!F || F->getParent() == Mod) && "Function not in current module!");
+ return CGN = new CallGraphNode(const_cast<Function*>(F));
+}
+
+void CallGraphNode::print(std::ostream &OS) const {
+ if (Function *F = getFunction())
+ OS << "Call graph node for function: '" << F->getName() <<"'\n";
+ else
+ OS << "Call graph node <<null function: 0x" << this << ">>:\n";
+
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ if (Function *FI = I->second->getFunction())
+ OS << " Calls function '" << FI->getName() <<"'\n";
+ else
+ OS << " Calls external node\n";
+ OS << "\n";
+}
+
+void CallGraphNode::dump() const { print(cerr); }
+
+/// removeCallEdgeFor - This method removes the edge in the node for the
+/// specified call site. Note that this method takes linear time, so it
+/// should be used sparingly.
+void CallGraphNode::removeCallEdgeFor(CallSite CS) {
+ for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
+ assert(I != CalledFunctions.end() && "Cannot find callsite to remove!");
+ if (I->first == CS) {
+ CalledFunctions.erase(I);
+ return;
+ }
+ }
+}
+
+
+// removeAnyCallEdgeTo - This method removes any call edges from this node to
+// the specified callee function. This takes more time to execute than
+// removeCallEdgeTo, so it should not be used unless necessary.
+void CallGraphNode::removeAnyCallEdgeTo(CallGraphNode *Callee) {
+ for (unsigned i = 0, e = CalledFunctions.size(); i != e; ++i)
+ if (CalledFunctions[i].second == Callee) {
+ CalledFunctions[i] = CalledFunctions.back();
+ CalledFunctions.pop_back();
+ --i; --e;
+ }
+}
+
+/// removeOneAbstractEdgeTo - Remove one edge associated with a null callsite
+/// from this node to the specified callee function.
+void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) {
+ for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
+ assert(I != CalledFunctions.end() && "Cannot find callee to remove!");
+ CallRecord &CR = *I;
+ if (CR.second == Callee && !CR.first.getInstruction()) {
+ CalledFunctions.erase(I);
+ return;
+ }
+ }
+}
+
+/// replaceCallSite - Make the edge in the node for Old CallSite be for
+/// New CallSite instead. Note that this method takes linear time, so it
+/// should be used sparingly.
+void CallGraphNode::replaceCallSite(CallSite Old, CallSite New) {
+ for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
+ assert(I != CalledFunctions.end() && "Cannot find callsite to replace!");
+ if (I->first == Old) {
+ I->first = New;
+ return;
+ }
+ }
+}
+
+// Enuse that users of CallGraph.h also link with this file
+DEFINING_FILE_FOR(CallGraph)
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
new file mode 100644
index 0000000..3880d0a
--- /dev/null
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -0,0 +1,207 @@
+//===- CallGraphSCCPass.cpp - Pass that operates BU on call graph ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CallGraphSCCPass class, which is used for passes
+// which are implemented as bottom-up traversals on the call graph. Because
+// there may be cycles in the call graph, passes of this type operate on the
+// call-graph in SCC order: that is, they process function bottom-up, except for
+// recursive functions, which they process all at once.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/PassManagers.h"
+#include "llvm/Function.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// CGPassManager
+//
+/// CGPassManager manages FPPassManagers and CalLGraphSCCPasses.
+
+namespace {
+
+class CGPassManager : public ModulePass, public PMDataManager {
+
+public:
+ static char ID;
+ explicit CGPassManager(int Depth)
+ : ModulePass(&ID), PMDataManager(Depth) { }
+
+ /// run - Execute all of the passes scheduled for execution. Keep track of
+ /// whether any of the passes modifies the module, and if so, return true.
+ bool runOnModule(Module &M);
+
+ bool doInitialization(CallGraph &CG);
+ bool doFinalization(CallGraph &CG);
+
+ /// Pass Manager itself does not invalidate any analysis info.
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ // CGPassManager walks SCC and it needs CallGraph.
+ Info.addRequired<CallGraph>();
+ Info.setPreservesAll();
+ }
+
+ virtual const char *getPassName() const {
+ return "CallGraph Pass Manager";
+ }
+
+ // Print passes managed by this manager
+ void dumpPassStructure(unsigned Offset) {
+ llvm::cerr << std::string(Offset*2, ' ') << "Call Graph SCC Pass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ P->dumpPassStructure(Offset + 1);
+ dumpLastUses(P, Offset+1);
+ }
+ }
+
+ Pass *getContainedPass(unsigned N) {
+ assert ( N < PassVector.size() && "Pass number out of range!");
+ Pass *FP = static_cast<Pass *>(PassVector[N]);
+ return FP;
+ }
+
+ virtual PassManagerType getPassManagerType() const {
+ return PMT_CallGraphPassManager;
+ }
+};
+
+}
+
+char CGPassManager::ID = 0;
+/// run - Execute all of the passes scheduled for execution. Keep track of
+/// whether any of the passes modifies the module, and if so, return true.
+bool CGPassManager::runOnModule(Module &M) {
+ CallGraph &CG = getAnalysis<CallGraph>();
+ bool Changed = doInitialization(CG);
+
+ // Walk SCC
+ for (scc_iterator<CallGraph*> I = scc_begin(&CG), E = scc_end(&CG);
+ I != E; ++I) {
+
+ // Run all passes on current SCC
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+
+ dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, "");
+ dumpRequiredSet(P);
+
+ initializeAnalysisImpl(P);
+
+ StartPassTimer(P);
+ if (CallGraphSCCPass *CGSP = dynamic_cast<CallGraphSCCPass *>(P))
+ Changed |= CGSP->runOnSCC(*I); // TODO : What if CG is changed ?
+ else {
+ FPPassManager *FPP = dynamic_cast<FPPassManager *>(P);
+ assert (FPP && "Invalid CGPassManager member");
+
+ // Run pass P on all functions current SCC
+ std::vector<CallGraphNode*> &SCC = *I;
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
+ Function *F = SCC[i]->getFunction();
+ if (F) {
+ dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getNameStart());
+ Changed |= FPP->runOnFunction(*F);
+ }
+ }
+ }
+ StopPassTimer(P);
+
+ if (Changed)
+ dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, "");
+ dumpPreservedSet(P);
+
+ verifyPreservedAnalysis(P);
+ removeNotPreservedAnalysis(P);
+ recordAvailableAnalysis(P);
+ removeDeadPasses(P, "", ON_CG_MSG);
+ }
+ }
+ Changed |= doFinalization(CG);
+ return Changed;
+}
+
+/// Initialize CG
+bool CGPassManager::doInitialization(CallGraph &CG) {
+ bool Changed = false;
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ if (CallGraphSCCPass *CGSP = dynamic_cast<CallGraphSCCPass *>(P)) {
+ Changed |= CGSP->doInitialization(CG);
+ } else {
+ FPPassManager *FP = dynamic_cast<FPPassManager *>(P);
+ assert (FP && "Invalid CGPassManager member");
+ Changed |= FP->doInitialization(CG.getModule());
+ }
+ }
+ return Changed;
+}
+
+/// Finalize CG
+bool CGPassManager::doFinalization(CallGraph &CG) {
+ bool Changed = false;
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ if (CallGraphSCCPass *CGSP = dynamic_cast<CallGraphSCCPass *>(P)) {
+ Changed |= CGSP->doFinalization(CG);
+ } else {
+ FPPassManager *FP = dynamic_cast<FPPassManager *>(P);
+ assert (FP && "Invalid CGPassManager member");
+ Changed |= FP->doFinalization(CG.getModule());
+ }
+ }
+ return Changed;
+}
+
+/// Assign pass manager to manage this pass.
+void CallGraphSCCPass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+ // Find CGPassManager
+ while (!PMS.empty() &&
+ PMS.top()->getPassManagerType() > PMT_CallGraphPassManager)
+ PMS.pop();
+
+ assert (!PMS.empty() && "Unable to handle Call Graph Pass");
+ CGPassManager *CGP = dynamic_cast<CGPassManager *>(PMS.top());
+
+ // Create new Call Graph SCC Pass Manager if it does not exist.
+ if (!CGP) {
+
+ assert (!PMS.empty() && "Unable to create Call Graph Pass Manager");
+ PMDataManager *PMD = PMS.top();
+
+ // [1] Create new Call Graph Pass Manager
+ CGP = new CGPassManager(PMD->getDepth() + 1);
+
+ // [2] Set up new manager's top level manager
+ PMTopLevelManager *TPM = PMD->getTopLevelManager();
+ TPM->addIndirectPassManager(CGP);
+
+ // [3] Assign manager to manage this new manager. This may create
+ // and push new managers into PMS
+ Pass *P = dynamic_cast<Pass *>(CGP);
+ TPM->schedulePass(P);
+
+ // [4] Push new manager into PMS
+ PMS.push(CGP);
+ }
+
+ CGP->add(this);
+}
+
+/// getAnalysisUsage - For this class, we declare that we require and preserve
+/// the call graph. If the derived class implements this method, it should
+/// always explicitly call the implementation here.
+void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<CallGraph>();
+ AU.addPreserved<CallGraph>();
+}
diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp
new file mode 100644
index 0000000..920ee37
--- /dev/null
+++ b/lib/Analysis/IPA/FindUsedTypes.cpp
@@ -0,0 +1,104 @@
+//===- FindUsedTypes.cpp - Find all Types used by a module ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is used to seek out all of the types in use by the program. Note
+// that this analysis explicitly does not include types only used by the symbol
+// table.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+char FindUsedTypes::ID = 0;
+static RegisterPass<FindUsedTypes>
+X("print-used-types", "Find Used Types", false, true);
+
+// IncorporateType - Incorporate one type and all of its subtypes into the
+// collection of used types.
+//
+void FindUsedTypes::IncorporateType(const Type *Ty) {
+ // If ty doesn't already exist in the used types map, add it now, otherwise
+ // return.
+ if (!UsedTypes.insert(Ty).second) return; // Already contain Ty.
+
+ // Make sure to add any types this type references now.
+ //
+ for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+ I != E; ++I)
+ IncorporateType(*I);
+}
+
+void FindUsedTypes::IncorporateValue(const Value *V) {
+ IncorporateType(V->getType());
+
+ // If this is a constant, it could be using other types...
+ if (const Constant *C = dyn_cast<Constant>(V)) {
+ if (!isa<GlobalValue>(C))
+ for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
+ OI != OE; ++OI)
+ IncorporateValue(*OI);
+ }
+}
+
+
+// run - This incorporates all types used by the specified module
+//
+bool FindUsedTypes::runOnModule(Module &m) {
+ UsedTypes.clear(); // reset if run multiple times...
+
+ // Loop over global variables, incorporating their types
+ for (Module::const_global_iterator I = m.global_begin(), E = m.global_end();
+ I != E; ++I) {
+ IncorporateType(I->getType());
+ if (I->hasInitializer())
+ IncorporateValue(I->getInitializer());
+ }
+
+ for (Module::iterator MI = m.begin(), ME = m.end(); MI != ME; ++MI) {
+ IncorporateType(MI->getType());
+ const Function &F = *MI;
+
+ // Loop over all of the instructions in the function, adding their return
+ // type as well as the types of their operands.
+ //
+ for (const_inst_iterator II = inst_begin(F), IE = inst_end(F);
+ II != IE; ++II) {
+ const Instruction &I = *II;
+
+ IncorporateType(I.getType()); // Incorporate the type of the instruction
+ for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
+ OI != OE; ++OI)
+ IncorporateValue(*OI); // Insert inst operand types as well
+ }
+ }
+
+ return false;
+}
+
+// Print the types found in the module. If the optional Module parameter is
+// passed in, then the types are printed symbolically if possible, using the
+// symbol table from the module.
+//
+void FindUsedTypes::print(std::ostream &OS, const Module *M) const {
+ raw_os_ostream RO(OS);
+ RO << "Types in use by this module:\n";
+ for (std::set<const Type *>::const_iterator I = UsedTypes.begin(),
+ E = UsedTypes.end(); I != E; ++I) {
+ RO << " ";
+ WriteTypeSymbolic(RO, *I, M);
+ RO << '\n';
+ }
+}
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
new file mode 100644
index 0000000..2e9884a
--- /dev/null
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -0,0 +1,567 @@
+//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This simple pass provides alias and mod/ref information for global values
+// that do not have their address taken, and keeps track of whether functions
+// read or write memory (are "pure"). For this simple (but very common) case,
+// we can provide pretty accurate and useful information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "globalsmodref-aa"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Instructions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SCCIterator.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumNonAddrTakenGlobalVars,
+ "Number of global vars without address taken");
+STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken");
+STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory");
+STATISTIC(NumReadMemFunctions, "Number of functions that only read memory");
+STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects");
+
+namespace {
+ /// FunctionRecord - One instance of this structure is stored for every
+ /// function in the program. Later, the entries for these functions are
+ /// removed if the function is found to call an external function (in which
+ /// case we know nothing about it.
+ struct VISIBILITY_HIDDEN FunctionRecord {
+ /// GlobalInfo - Maintain mod/ref info for all of the globals without
+ /// addresses taken that are read or written (transitively) by this
+ /// function.
+ std::map<GlobalValue*, unsigned> GlobalInfo;
+
+ /// MayReadAnyGlobal - May read global variables, but it is not known which.
+ bool MayReadAnyGlobal;
+
+ unsigned getInfoForGlobal(GlobalValue *GV) const {
+ unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0;
+ std::map<GlobalValue*, unsigned>::const_iterator I = GlobalInfo.find(GV);
+ if (I != GlobalInfo.end())
+ Effect |= I->second;
+ return Effect;
+ }
+
+ /// FunctionEffect - Capture whether or not this function reads or writes to
+ /// ANY memory. If not, we can do a lot of aggressive analysis on it.
+ unsigned FunctionEffect;
+
+ FunctionRecord() : MayReadAnyGlobal (false), FunctionEffect(0) {}
+ };
+
+ /// GlobalsModRef - The actual analysis pass.
+ class VISIBILITY_HIDDEN GlobalsModRef
+ : public ModulePass, public AliasAnalysis {
+ /// NonAddressTakenGlobals - The globals that do not have their addresses
+ /// taken.
+ std::set<GlobalValue*> NonAddressTakenGlobals;
+
+ /// IndirectGlobals - The memory pointed to by this global is known to be
+ /// 'owned' by the global.
+ std::set<GlobalValue*> IndirectGlobals;
+
+ /// AllocsForIndirectGlobals - If an instruction allocates memory for an
+ /// indirect global, this map indicates which one.
+ std::map<Value*, GlobalValue*> AllocsForIndirectGlobals;
+
+ /// FunctionInfo - For each function, keep track of what globals are
+ /// modified or read.
+ std::map<Function*, FunctionRecord> FunctionInfo;
+
+ public:
+ static char ID;
+ GlobalsModRef() : ModulePass(&ID) {}
+
+ bool runOnModule(Module &M) {
+ InitializeAliasAnalysis(this); // set up super class
+ AnalyzeGlobals(M); // find non-addr taken globals
+ AnalyzeCallGraph(getAnalysis<CallGraph>(), M); // Propagate on CG
+ return false;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AliasAnalysis::getAnalysisUsage(AU);
+ AU.addRequired<CallGraph>();
+ AU.setPreservesAll(); // Does not transform code
+ }
+
+ //------------------------------------------------
+ // Implement the AliasAnalysis API
+ //
+ AliasResult alias(const Value *V1, unsigned V1Size,
+ const Value *V2, unsigned V2Size);
+ ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size);
+ ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) {
+ return AliasAnalysis::getModRefInfo(CS1,CS2);
+ }
+ bool hasNoModRefInfoForCalls() const { return false; }
+
+ /// getModRefBehavior - Return the behavior of the specified function if
+ /// called from the specified call site. The call site may be null in which
+ /// case the most generic behavior of this function should be returned.
+ ModRefBehavior getModRefBehavior(Function *F,
+ std::vector<PointerAccessInfo> *Info) {
+ if (FunctionRecord *FR = getFunctionInfo(F)) {
+ if (FR->FunctionEffect == 0)
+ return DoesNotAccessMemory;
+ else if ((FR->FunctionEffect & Mod) == 0)
+ return OnlyReadsMemory;
+ }
+ return AliasAnalysis::getModRefBehavior(F, Info);
+ }
+
+ /// getModRefBehavior - Return the behavior of the specified function if
+ /// called from the specified call site. The call site may be null in which
+ /// case the most generic behavior of this function should be returned.
+ ModRefBehavior getModRefBehavior(CallSite CS,
+ std::vector<PointerAccessInfo> *Info) {
+ Function* F = CS.getCalledFunction();
+ if (!F) return AliasAnalysis::getModRefBehavior(CS, Info);
+ if (FunctionRecord *FR = getFunctionInfo(F)) {
+ if (FR->FunctionEffect == 0)
+ return DoesNotAccessMemory;
+ else if ((FR->FunctionEffect & Mod) == 0)
+ return OnlyReadsMemory;
+ }
+ return AliasAnalysis::getModRefBehavior(CS, Info);
+ }
+
+ virtual void deleteValue(Value *V);
+ virtual void copyValue(Value *From, Value *To);
+
+ private:
+ /// getFunctionInfo - Return the function info for the function, or null if
+ /// we don't have anything useful to say about it.
+ FunctionRecord *getFunctionInfo(Function *F) {
+ std::map<Function*, FunctionRecord>::iterator I = FunctionInfo.find(F);
+ if (I != FunctionInfo.end())
+ return &I->second;
+ return 0;
+ }
+
+ void AnalyzeGlobals(Module &M);
+ void AnalyzeCallGraph(CallGraph &CG, Module &M);
+ bool AnalyzeUsesOfPointer(Value *V, std::vector<Function*> &Readers,
+ std::vector<Function*> &Writers,
+ GlobalValue *OkayStoreDest = 0);
+ bool AnalyzeIndirectGlobalMemory(GlobalValue *GV);
+ };
+}
+
+char GlobalsModRef::ID = 0;
+static RegisterPass<GlobalsModRef>
+X("globalsmodref-aa", "Simple mod/ref analysis for globals", false, true);
+static RegisterAnalysisGroup<AliasAnalysis> Y(X);
+
+Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); }
+
+/// AnalyzeGlobals - Scan through the users of all of the internal
+/// GlobalValue's in the program. If none of them have their "address taken"
+/// (really, their address passed to something nontrivial), record this fact,
+/// and record the functions that they are used directly in.
+void GlobalsModRef::AnalyzeGlobals(Module &M) {
+ std::vector<Function*> Readers, Writers;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (I->hasLocalLinkage()) {
+ if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
+ // Remember that we are tracking this global.
+ NonAddressTakenGlobals.insert(I);
+ ++NumNonAddrTakenFunctions;
+ }
+ Readers.clear(); Writers.clear();
+ }
+
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (I->hasLocalLinkage()) {
+ if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
+ // Remember that we are tracking this global, and the mod/ref fns
+ NonAddressTakenGlobals.insert(I);
+
+ for (unsigned i = 0, e = Readers.size(); i != e; ++i)
+ FunctionInfo[Readers[i]].GlobalInfo[I] |= Ref;
+
+ if (!I->isConstant()) // No need to keep track of writers to constants
+ for (unsigned i = 0, e = Writers.size(); i != e; ++i)
+ FunctionInfo[Writers[i]].GlobalInfo[I] |= Mod;
+ ++NumNonAddrTakenGlobalVars;
+
+ // If this global holds a pointer type, see if it is an indirect global.
+ if (isa<PointerType>(I->getType()->getElementType()) &&
+ AnalyzeIndirectGlobalMemory(I))
+ ++NumIndirectGlobalVars;
+ }
+ Readers.clear(); Writers.clear();
+ }
+}
+
+/// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer.
+/// If this is used by anything complex (i.e., the address escapes), return
+/// true. Also, while we are at it, keep track of those functions that read and
+/// write to the value.
+///
+/// If OkayStoreDest is non-null, stores into this global are allowed.
+bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
+ std::vector<Function*> &Readers,
+ std::vector<Function*> &Writers,
+ GlobalValue *OkayStoreDest) {
+ if (!isa<PointerType>(V->getType())) return true;
+
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
+ if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+ Readers.push_back(LI->getParent()->getParent());
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
+ if (V == SI->getOperand(1)) {
+ Writers.push_back(SI->getParent()->getParent());
+ } else if (SI->getOperand(1) != OkayStoreDest) {
+ return true; // Storing the pointer
+ }
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) {
+ if (AnalyzeUsesOfPointer(GEP, Readers, Writers)) return true;
+ } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
+ // Make sure that this is just the function being called, not that it is
+ // passing into the function.
+ for (unsigned i = 1, e = CI->getNumOperands(); i != e; ++i)
+ if (CI->getOperand(i) == V) return true;
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) {
+ // Make sure that this is just the function being called, not that it is
+ // passing into the function.
+ for (unsigned i = 3, e = II->getNumOperands(); i != e; ++i)
+ if (II->getOperand(i) == V) return true;
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) {
+ if (CE->getOpcode() == Instruction::GetElementPtr ||
+ CE->getOpcode() == Instruction::BitCast) {
+ if (AnalyzeUsesOfPointer(CE, Readers, Writers))
+ return true;
+ } else {
+ return true;
+ }
+ } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(*UI)) {
+ if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
+ return true; // Allow comparison against null.
+ } else if (FreeInst *F = dyn_cast<FreeInst>(*UI)) {
+ Writers.push_back(F->getParent()->getParent());
+ } else {
+ return true;
+ }
+ return false;
+}
+
+/// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable
+/// which holds a pointer type. See if the global always points to non-aliased
+/// heap memory: that is, all initializers of the globals are allocations, and
+/// those allocations have no use other than initialization of the global.
+/// Further, all loads out of GV must directly use the memory, not store the
+/// pointer somewhere. If this is true, we consider the memory pointed to by
+/// GV to be owned by GV and can disambiguate other pointers from it.
+bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
+ // Keep track of values related to the allocation of the memory, f.e. the
+ // value produced by the malloc call and any casts.
+ std::vector<Value*> AllocRelatedValues;
+
+ // Walk the user list of the global. If we find anything other than a direct
+ // load or store, bail out.
+ for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){
+ if (LoadInst *LI = dyn_cast<LoadInst>(*I)) {
+ // The pointer loaded from the global can only be used in simple ways:
+ // we allow addressing of it and loading storing to it. We do *not* allow
+ // storing the loaded pointer somewhere else or passing to a function.
+ std::vector<Function*> ReadersWriters;
+ if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters))
+ return false; // Loaded pointer escapes.
+ // TODO: Could try some IP mod/ref of the loaded pointer.
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(*I)) {
+ // Storing the global itself.
+ if (SI->getOperand(0) == GV) return false;
+
+ // If storing the null pointer, ignore it.
+ if (isa<ConstantPointerNull>(SI->getOperand(0)))
+ continue;
+
+ // Check the value being stored.
+ Value *Ptr = SI->getOperand(0)->getUnderlyingObject();
+
+ if (isa<MallocInst>(Ptr)) {
+ // Okay, easy case.
+ } else if (CallInst *CI = dyn_cast<CallInst>(Ptr)) {
+ Function *F = CI->getCalledFunction();
+ if (!F || !F->isDeclaration()) return false; // Too hard to analyze.
+ if (F->getName() != "calloc") return false; // Not calloc.
+ } else {
+ return false; // Too hard to analyze.
+ }
+
+ // Analyze all uses of the allocation. If any of them are used in a
+ // non-simple way (e.g. stored to another global) bail out.
+ std::vector<Function*> ReadersWriters;
+ if (AnalyzeUsesOfPointer(Ptr, ReadersWriters, ReadersWriters, GV))
+ return false; // Loaded pointer escapes.
+
+ // Remember that this allocation is related to the indirect global.
+ AllocRelatedValues.push_back(Ptr);
+ } else {
+ // Something complex, bail out.
+ return false;
+ }
+ }
+
+ // Okay, this is an indirect global. Remember all of the allocations for
+ // this global in AllocsForIndirectGlobals.
+ while (!AllocRelatedValues.empty()) {
+ AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV;
+ AllocRelatedValues.pop_back();
+ }
+ IndirectGlobals.insert(GV);
+ return true;
+}
+
+/// AnalyzeCallGraph - At this point, we know the functions where globals are
+/// immediately stored to and read from. Propagate this information up the call
+/// graph to all callers and compute the mod/ref info for all memory for each
+/// function.
+void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
+ // We do a bottom-up SCC traversal of the call graph. In other words, we
+ // visit all callees before callers (leaf-first).
+ for (scc_iterator<CallGraph*> I = scc_begin(&CG), E = scc_end(&CG); I != E;
+ ++I) {
+ std::vector<CallGraphNode *> &SCC = *I;
+ assert(!SCC.empty() && "SCC with no functions?");
+
+ if (!SCC[0]->getFunction()) {
+ // Calls externally - can't say anything useful. Remove any existing
+ // function records (may have been created when scanning globals).
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i)
+ FunctionInfo.erase(SCC[i]->getFunction());
+ continue;
+ }
+
+ FunctionRecord &FR = FunctionInfo[SCC[0]->getFunction()];
+
+ bool KnowNothing = false;
+ unsigned FunctionEffect = 0;
+
+ // Collect the mod/ref properties due to called functions. We only compute
+ // one mod-ref set.
+ for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) {
+ Function *F = SCC[i]->getFunction();
+ if (!F) {
+ KnowNothing = true;
+ break;
+ }
+
+ if (F->isDeclaration()) {
+ // Try to get mod/ref behaviour from function attributes.
+ if (F->doesNotAccessMemory()) {
+ // Can't do better than that!
+ } else if (F->onlyReadsMemory()) {
+ FunctionEffect |= Ref;
+ if (!F->isIntrinsic())
+ // This function might call back into the module and read a global -
+ // consider every global as possibly being read by this function.
+ FR.MayReadAnyGlobal = true;
+ } else {
+ FunctionEffect |= ModRef;
+ // Can't say anything useful unless it's an intrinsic - they don't
+ // read or write global variables of the kind considered here.
+ KnowNothing = !F->isIntrinsic();
+ }
+ continue;
+ }
+
+ for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end();
+ CI != E && !KnowNothing; ++CI)
+ if (Function *Callee = CI->second->getFunction()) {
+ if (FunctionRecord *CalleeFR = getFunctionInfo(Callee)) {
+ // Propagate function effect up.
+ FunctionEffect |= CalleeFR->FunctionEffect;
+
+ // Incorporate callee's effects on globals into our info.
+ for (std::map<GlobalValue*, unsigned>::iterator GI =
+ CalleeFR->GlobalInfo.begin(), E = CalleeFR->GlobalInfo.end();
+ GI != E; ++GI)
+ FR.GlobalInfo[GI->first] |= GI->second;
+ FR.MayReadAnyGlobal |= CalleeFR->MayReadAnyGlobal;
+ } else {
+ // Can't say anything about it. However, if it is inside our SCC,
+ // then nothing needs to be done.
+ CallGraphNode *CalleeNode = CG[Callee];
+ if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end())
+ KnowNothing = true;
+ }
+ } else {
+ KnowNothing = true;
+ }
+ }
+
+ // If we can't say anything useful about this SCC, remove all SCC functions
+ // from the FunctionInfo map.
+ if (KnowNothing) {
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i)
+ FunctionInfo.erase(SCC[i]->getFunction());
+ continue;
+ }
+
+ // Scan the function bodies for explicit loads or stores.
+ for (unsigned i = 0, e = SCC.size(); i != e && FunctionEffect != ModRef;++i)
+ for (inst_iterator II = inst_begin(SCC[i]->getFunction()),
+ E = inst_end(SCC[i]->getFunction());
+ II != E && FunctionEffect != ModRef; ++II)
+ if (isa<LoadInst>(*II)) {
+ FunctionEffect |= Ref;
+ if (cast<LoadInst>(*II).isVolatile())
+ // Volatile loads may have side-effects, so mark them as writing
+ // memory (for example, a flag inside the processor).
+ FunctionEffect |= Mod;
+ } else if (isa<StoreInst>(*II)) {
+ FunctionEffect |= Mod;
+ if (cast<StoreInst>(*II).isVolatile())
+ // Treat volatile stores as reading memory somewhere.
+ FunctionEffect |= Ref;
+ } else if (isa<MallocInst>(*II) || isa<FreeInst>(*II)) {
+ FunctionEffect |= ModRef;
+ }
+
+ if ((FunctionEffect & Mod) == 0)
+ ++NumReadMemFunctions;
+ if (FunctionEffect == 0)
+ ++NumNoMemFunctions;
+ FR.FunctionEffect = FunctionEffect;
+
+ // Finally, now that we know the full effect on this SCC, clone the
+ // information to each function in the SCC.
+ for (unsigned i = 1, e = SCC.size(); i != e; ++i)
+ FunctionInfo[SCC[i]->getFunction()] = FR;
+ }
+}
+
+
+
+/// alias - If one of the pointers is to a global that we are tracking, and the
+/// other is some random pointer, we know there cannot be an alias, because the
+/// address of the global isn't taken.
+AliasAnalysis::AliasResult
+GlobalsModRef::alias(const Value *V1, unsigned V1Size,
+ const Value *V2, unsigned V2Size) {
+ // Get the base object these pointers point to.
+ Value *UV1 = const_cast<Value*>(V1->getUnderlyingObject());
+ Value *UV2 = const_cast<Value*>(V2->getUnderlyingObject());
+
+ // If either of the underlying values is a global, they may be non-addr-taken
+ // globals, which we can answer queries about.
+ GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1);
+ GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2);
+ if (GV1 || GV2) {
+ // If the global's address is taken, pretend we don't know it's a pointer to
+ // the global.
+ if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = 0;
+ if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = 0;
+
+ // If the the two pointers are derived from two different non-addr-taken
+ // globals, or if one is and the other isn't, we know these can't alias.
+ if ((GV1 || GV2) && GV1 != GV2)
+ return NoAlias;
+
+ // Otherwise if they are both derived from the same addr-taken global, we
+ // can't know the two accesses don't overlap.
+ }
+
+ // These pointers may be based on the memory owned by an indirect global. If
+ // so, we may be able to handle this. First check to see if the base pointer
+ // is a direct load from an indirect global.
+ GV1 = GV2 = 0;
+ if (LoadInst *LI = dyn_cast<LoadInst>(UV1))
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
+ if (IndirectGlobals.count(GV))
+ GV1 = GV;
+ if (LoadInst *LI = dyn_cast<LoadInst>(UV2))
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
+ if (IndirectGlobals.count(GV))
+ GV2 = GV;
+
+ // These pointers may also be from an allocation for the indirect global. If
+ // so, also handle them.
+ if (AllocsForIndirectGlobals.count(UV1))
+ GV1 = AllocsForIndirectGlobals[UV1];
+ if (AllocsForIndirectGlobals.count(UV2))
+ GV2 = AllocsForIndirectGlobals[UV2];
+
+ // Now that we know whether the two pointers are related to indirect globals,
+ // use this to disambiguate the pointers. If either pointer is based on an
+ // indirect global and if they are not both based on the same indirect global,
+ // they cannot alias.
+ if ((GV1 || GV2) && GV1 != GV2)
+ return NoAlias;
+
+ return AliasAnalysis::alias(V1, V1Size, V2, V2Size);
+}
+
+AliasAnalysis::ModRefResult
+GlobalsModRef::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
+ unsigned Known = ModRef;
+
+ // If we are asking for mod/ref info of a direct call with a pointer to a
+ // global we are tracking, return information if we have it.
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(P->getUnderlyingObject()))
+ if (GV->hasLocalLinkage())
+ if (Function *F = CS.getCalledFunction())
+ if (NonAddressTakenGlobals.count(GV))
+ if (FunctionRecord *FR = getFunctionInfo(F))
+ Known = FR->getInfoForGlobal(GV);
+
+ if (Known == NoModRef)
+ return NoModRef; // No need to query other mod/ref analyses
+ return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, P, Size));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Methods to update the analysis as a result of the client transformation.
+//
+void GlobalsModRef::deleteValue(Value *V) {
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ if (NonAddressTakenGlobals.erase(GV)) {
+ // This global might be an indirect global. If so, remove it and remove
+ // any AllocRelatedValues for it.
+ if (IndirectGlobals.erase(GV)) {
+ // Remove any entries in AllocsForIndirectGlobals for this global.
+ for (std::map<Value*, GlobalValue*>::iterator
+ I = AllocsForIndirectGlobals.begin(),
+ E = AllocsForIndirectGlobals.end(); I != E; ) {
+ if (I->second == GV) {
+ AllocsForIndirectGlobals.erase(I++);
+ } else {
+ ++I;
+ }
+ }
+ }
+ }
+ }
+
+ // Otherwise, if this is an allocation related to an indirect global, remove
+ // it.
+ AllocsForIndirectGlobals.erase(V);
+
+ AliasAnalysis::deleteValue(V);
+}
+
+void GlobalsModRef::copyValue(Value *From, Value *To) {
+ AliasAnalysis::copyValue(From, To);
+}
diff --git a/lib/Analysis/IPA/Makefile b/lib/Analysis/IPA/Makefile
new file mode 100644
index 0000000..adacb16
--- /dev/null
+++ b/lib/Analysis/IPA/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Analysis/IPA/Makefile ---------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMipa
+BUILD_ARCHIVE = 1
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
new file mode 100644
index 0000000..7af9130
--- /dev/null
+++ b/lib/Analysis/IVUsers.cpp
@@ -0,0 +1,391 @@
+//===- IVUsers.cpp - Induction Variable Users -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements bookkeeping for "interesting" users of expressions
+// computed from induction variables.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "iv-users"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+char IVUsers::ID = 0;
+static RegisterPass<IVUsers>
+X("iv-users", "Induction Variable Users", false, true);
+
+Pass *llvm::createIVUsersPass() {
+ return new IVUsers();
+}
+
+/// containsAddRecFromDifferentLoop - Determine whether expression S involves a
+/// subexpression that is an AddRec from a loop other than L. An outer loop
+/// of L is OK, but not an inner loop nor a disjoint loop.
+static bool containsAddRecFromDifferentLoop(SCEVHandle S, Loop *L) {
+ // This is very common, put it first.
+ if (isa<SCEVConstant>(S))
+ return false;
+ if (const SCEVCommutativeExpr *AE = dyn_cast<SCEVCommutativeExpr>(S)) {
+ for (unsigned int i=0; i< AE->getNumOperands(); i++)
+ if (containsAddRecFromDifferentLoop(AE->getOperand(i), L))
+ return true;
+ return false;
+ }
+ if (const SCEVAddRecExpr *AE = dyn_cast<SCEVAddRecExpr>(S)) {
+ if (const Loop *newLoop = AE->getLoop()) {
+ if (newLoop == L)
+ return false;
+ // if newLoop is an outer loop of L, this is OK.
+ if (!LoopInfoBase<BasicBlock>::isNotAlreadyContainedIn(L, newLoop))
+ return false;
+ }
+ return true;
+ }
+ if (const SCEVUDivExpr *DE = dyn_cast<SCEVUDivExpr>(S))
+ return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
+ containsAddRecFromDifferentLoop(DE->getRHS(), L);
+#if 0
+ // SCEVSDivExpr has been backed out temporarily, but will be back; we'll
+ // need this when it is.
+ if (const SCEVSDivExpr *DE = dyn_cast<SCEVSDivExpr>(S))
+ return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
+ containsAddRecFromDifferentLoop(DE->getRHS(), L);
+#endif
+ if (const SCEVCastExpr *CE = dyn_cast<SCEVCastExpr>(S))
+ return containsAddRecFromDifferentLoop(CE->getOperand(), L);
+ return false;
+}
+
+/// getSCEVStartAndStride - Compute the start and stride of this expression,
+/// returning false if the expression is not a start/stride pair, or true if it
+/// is. The stride must be a loop invariant expression, but the start may be
+/// a mix of loop invariant and loop variant expressions. The start cannot,
+/// however, contain an AddRec from a different loop, unless that loop is an
+/// outer loop of the current loop.
+static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Loop *UseLoop,
+ SCEVHandle &Start, SCEVHandle &Stride,
+ bool &isSigned,
+ ScalarEvolution *SE, DominatorTree *DT) {
+ SCEVHandle TheAddRec = Start; // Initialize to zero.
+ bool isSExt = false;
+ bool isZExt = false;
+
+ // If the outer level is an AddExpr, the operands are all start values except
+ // for a nested AddRecExpr.
+ if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) {
+ for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i)
+ if (const SCEVAddRecExpr *AddRec =
+ dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) {
+ if (AddRec->getLoop() == L)
+ TheAddRec = SE->getAddExpr(AddRec, TheAddRec);
+ else
+ return false; // Nested IV of some sort?
+ } else {
+ Start = SE->getAddExpr(Start, AE->getOperand(i));
+ }
+
+ } else if (const SCEVZeroExtendExpr *Z = dyn_cast<SCEVZeroExtendExpr>(SH)) {
+ TheAddRec = Z->getOperand();
+ isZExt = true;
+ } else if (const SCEVSignExtendExpr *S = dyn_cast<SCEVSignExtendExpr>(SH)) {
+ TheAddRec = S->getOperand();
+ isSExt = true;
+ } else if (isa<SCEVAddRecExpr>(SH)) {
+ TheAddRec = SH;
+ } else {
+ return false; // not analyzable.
+ }
+
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(TheAddRec);
+ if (!AddRec || AddRec->getLoop() != L) return false;
+
+ // Use getSCEVAtScope to attempt to simplify other loops out of
+ // the picture.
+ SCEVHandle AddRecStart = AddRec->getStart();
+ SCEVHandle BetterAddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop);
+ if (!isa<SCEVCouldNotCompute>(BetterAddRecStart))
+ AddRecStart = BetterAddRecStart;
+
+ // FIXME: If Start contains an SCEVAddRecExpr from a different loop, other
+ // than an outer loop of the current loop, reject it. LSR has no concept of
+ // operating on more than one loop at a time so don't confuse it with such
+ // expressions.
+ if (containsAddRecFromDifferentLoop(AddRecStart, L))
+ return false;
+
+ if (isSExt || isZExt)
+ Start = SE->getTruncateExpr(Start, AddRec->getType());
+
+ Start = SE->getAddExpr(Start, AddRecStart);
+
+ if (!isa<SCEVConstant>(AddRec->getStepRecurrence(*SE))) {
+ // If stride is an instruction, make sure it dominates the loop preheader.
+ // Otherwise we could end up with a use before def situation.
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!AddRec->getStepRecurrence(*SE)->dominates(Preheader, DT))
+ return false;
+
+ DOUT << "[" << L->getHeader()->getName()
+ << "] Variable stride: " << *AddRec << "\n";
+ }
+
+ Stride = AddRec->getStepRecurrence(*SE);
+ isSigned = isSExt;
+ return true;
+}
+
+/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
+/// and now we need to decide whether the user should use the preinc or post-inc
+/// value. If this user should use the post-inc version of the IV, return true.
+///
+/// Choosing wrong here can break dominance properties (if we choose to use the
+/// post-inc value when we cannot) or it can end up adding extra live-ranges to
+/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
+/// should use the post-inc value).
+static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
+ Loop *L, LoopInfo *LI, DominatorTree *DT,
+ Pass *P) {
+ // If the user is in the loop, use the preinc value.
+ if (L->contains(User->getParent())) return false;
+
+ BasicBlock *LatchBlock = L->getLoopLatch();
+
+ // Ok, the user is outside of the loop. If it is dominated by the latch
+ // block, use the post-inc value.
+ if (DT->dominates(LatchBlock, User->getParent()))
+ return true;
+
+ // There is one case we have to be careful of: PHI nodes. These little guys
+ // can live in blocks that are not dominated by the latch block, but (since
+ // their uses occur in the predecessor block, not the block the PHI lives in)
+ // should still use the post-inc value. Check for this case now.
+ PHINode *PN = dyn_cast<PHINode>(User);
+ if (!PN) return false; // not a phi, not dominated by latch block.
+
+ // Look at all of the uses of IV by the PHI node. If any use corresponds to
+ // a block that is not dominated by the latch block, give up and use the
+ // preincremented value.
+ unsigned NumUses = 0;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == IV) {
+ ++NumUses;
+ if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
+ return false;
+ }
+
+ // Okay, all uses of IV by PN are in predecessor blocks that really are
+ // dominated by the latch block. Use the post-incremented value.
+ return true;
+}
+
+/// AddUsersIfInteresting - Inspect the specified instruction. If it is a
+/// reducible SCEV, recursively add its users to the IVUsesByStride set and
+/// return true. Otherwise, return false.
+bool IVUsers::AddUsersIfInteresting(Instruction *I) {
+ if (!SE->isSCEVable(I->getType()))
+ return false; // Void and FP expressions cannot be reduced.
+
+ // LSR is not APInt clean, do not touch integers bigger than 64-bits.
+ if (SE->getTypeSizeInBits(I->getType()) > 64)
+ return false;
+
+ if (!Processed.insert(I))
+ return true; // Instruction already handled.
+
+ // Get the symbolic expression for this instruction.
+ SCEVHandle ISE = SE->getSCEV(I);
+ if (isa<SCEVCouldNotCompute>(ISE)) return false;
+
+ // Get the start and stride for this expression.
+ Loop *UseLoop = LI->getLoopFor(I->getParent());
+ SCEVHandle Start = SE->getIntegerSCEV(0, ISE->getType());
+ SCEVHandle Stride = Start;
+ bool isSigned = false; // Arbitrary initial value - pacifies compiler.
+
+ if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, isSigned, SE, DT))
+ return false; // Non-reducible symbolic expression, bail out.
+
+ SmallPtrSet<Instruction *, 4> UniqueUsers;
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (!UniqueUsers.insert(User))
+ continue;
+
+ // Do not infinitely recurse on PHI nodes.
+ if (isa<PHINode>(User) && Processed.count(User))
+ continue;
+
+ // Descend recursively, but not into PHI nodes outside the current loop.
+ // It's important to see the entire expression outside the loop to get
+ // choices that depend on addressing mode use right, although we won't
+ // consider references ouside the loop in all cases.
+ // If User is already in Processed, we don't want to recurse into it again,
+ // but do want to record a second reference in the same instruction.
+ bool AddUserToIVUsers = false;
+ if (LI->getLoopFor(User->getParent()) != L) {
+ if (isa<PHINode>(User) || Processed.count(User) ||
+ !AddUsersIfInteresting(User)) {
+ DOUT << "FOUND USER in other loop: " << *User
+ << " OF SCEV: " << *ISE << "\n";
+ AddUserToIVUsers = true;
+ }
+ } else if (Processed.count(User) ||
+ !AddUsersIfInteresting(User)) {
+ DOUT << "FOUND USER: " << *User
+ << " OF SCEV: " << *ISE << "\n";
+ AddUserToIVUsers = true;
+ }
+
+ if (AddUserToIVUsers) {
+ IVUsersOfOneStride *StrideUses = IVUsesByStride[Stride];
+ if (!StrideUses) { // First occurrence of this stride?
+ StrideOrder.push_back(Stride);
+ StrideUses = new IVUsersOfOneStride(Stride);
+ IVUses.push_back(StrideUses);
+ IVUsesByStride[Stride] = StrideUses;
+ }
+
+ // Okay, we found a user that we cannot reduce. Analyze the instruction
+ // and decide what to do with it. If we are a use inside of the loop, use
+ // the value before incrementation, otherwise use it after incrementation.
+ if (IVUseShouldUsePostIncValue(User, I, L, LI, DT, this)) {
+ // The value used will be incremented by the stride more than we are
+ // expecting, so subtract this off.
+ SCEVHandle NewStart = SE->getMinusSCEV(Start, Stride);
+ StrideUses->addUser(NewStart, User, I, isSigned);
+ StrideUses->Users.back().setIsUseOfPostIncrementedValue(true);
+ DOUT << " USING POSTINC SCEV, START=" << *NewStart<< "\n";
+ } else {
+ StrideUses->addUser(Start, User, I, isSigned);
+ }
+ }
+ }
+ return true;
+}
+
+IVUsers::IVUsers()
+ : LoopPass(&ID) {
+}
+
+void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<ScalarEvolution>();
+ AU.setPreservesAll();
+}
+
+bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
+
+ L = l;
+ LI = &getAnalysis<LoopInfo>();
+ DT = &getAnalysis<DominatorTree>();
+ SE = &getAnalysis<ScalarEvolution>();
+
+ // Find all uses of induction variables in this loop, and categorize
+ // them by stride. Start by finding all of the PHI nodes in the header for
+ // this loop. If they are induction variables, inspect their uses.
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
+ AddUsersIfInteresting(I);
+
+ return false;
+}
+
+/// getReplacementExpr - Return a SCEV expression which computes the
+/// value of the OperandValToReplace of the given IVStrideUse.
+SCEVHandle IVUsers::getReplacementExpr(const IVStrideUse &U) const {
+ const Type *UseTy = U.getOperandValToReplace()->getType();
+ // Start with zero.
+ SCEVHandle RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType());
+ // Create the basic add recurrence.
+ RetVal = SE->getAddRecExpr(RetVal, U.getParent()->Stride, L);
+ // Add the offset in a separate step, because it may be loop-variant.
+ RetVal = SE->getAddExpr(RetVal, U.getOffset());
+ // For uses of post-incremented values, add an extra stride to compute
+ // the actual replacement value.
+ if (U.isUseOfPostIncrementedValue())
+ RetVal = SE->getAddExpr(RetVal, U.getParent()->Stride);
+ // Evaluate the expression out of the loop, if possible.
+ if (!L->contains(U.getUser()->getParent())) {
+ SCEVHandle ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop());
+ if (!isa<SCEVCouldNotCompute>(ExitVal) && ExitVal->isLoopInvariant(L))
+ RetVal = ExitVal;
+ }
+ // Promote the result to the type of the use.
+ if (SE->getTypeSizeInBits(RetVal->getType()) !=
+ SE->getTypeSizeInBits(UseTy)) {
+ if (U.isSigned())
+ RetVal = SE->getSignExtendExpr(RetVal, UseTy);
+ else
+ RetVal = SE->getZeroExtendExpr(RetVal, UseTy);
+ }
+ return RetVal;
+}
+
+void IVUsers::print(raw_ostream &OS, const Module *M) const {
+ OS << "IV Users for loop ";
+ WriteAsOperand(OS, L->getHeader(), false);
+ if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
+ OS << " with backedge-taken count "
+ << *SE->getBackedgeTakenCount(L);
+ }
+ OS << ":\n";
+
+ for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) {
+ std::map<SCEVHandle, IVUsersOfOneStride*>::const_iterator SI =
+ IVUsesByStride.find(StrideOrder[Stride]);
+ assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
+ OS << " Stride " << *SI->first->getType() << " " << *SI->first << ":\n";
+
+ for (ilist<IVStrideUse>::const_iterator UI = SI->second->Users.begin(),
+ E = SI->second->Users.end(); UI != E; ++UI) {
+ OS << " ";
+ WriteAsOperand(OS, UI->getOperandValToReplace(), false);
+ OS << " = ";
+ OS << *getReplacementExpr(*UI);
+ if (UI->isUseOfPostIncrementedValue())
+ OS << " (post-inc)";
+ OS << " in ";
+ UI->getUser()->print(OS);
+ }
+ }
+}
+
+void IVUsers::print(std::ostream &o, const Module *M) const {
+ raw_os_ostream OS(o);
+ print(OS, M);
+}
+
+void IVUsers::dump() const {
+ print(errs());
+}
+
+void IVUsers::releaseMemory() {
+ IVUsesByStride.clear();
+ StrideOrder.clear();
+ Processed.clear();
+}
+
+void IVStrideUse::deleted() {
+ // Remove this user from the list.
+ Parent->Users.erase(this);
+ // this now dangles!
+}
diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp
new file mode 100644
index 0000000..2dea7b3
--- /dev/null
+++ b/lib/Analysis/InstCount.cpp
@@ -0,0 +1,86 @@
+//===-- InstCount.cpp - Collects the count of all instructions ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass collects the count of all instructions and reports them
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instcount"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/ADT/Statistic.h"
+#include <ostream>
+using namespace llvm;
+
+STATISTIC(TotalInsts , "Number of instructions (of all types)");
+STATISTIC(TotalBlocks, "Number of basic blocks");
+STATISTIC(TotalFuncs , "Number of non-external functions");
+STATISTIC(TotalMemInst, "Number of memory instructions");
+
+#define HANDLE_INST(N, OPCODE, CLASS) \
+ STATISTIC(Num ## OPCODE ## Inst, "Number of " #OPCODE " insts");
+
+#include "llvm/Instruction.def"
+
+
+namespace {
+ class VISIBILITY_HIDDEN InstCount
+ : public FunctionPass, public InstVisitor<InstCount> {
+ friend class InstVisitor<InstCount>;
+
+ void visitFunction (Function &F) { ++TotalFuncs; }
+ void visitBasicBlock(BasicBlock &BB) { ++TotalBlocks; }
+
+#define HANDLE_INST(N, OPCODE, CLASS) \
+ void visit##OPCODE(CLASS &) { ++Num##OPCODE##Inst; ++TotalInsts; }
+
+#include "llvm/Instruction.def"
+
+ void visitInstruction(Instruction &I) {
+ cerr << "Instruction Count does not know about " << I;
+ abort();
+ }
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ InstCount() : FunctionPass(&ID) {}
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ virtual void print(std::ostream &O, const Module *M) const {}
+
+ };
+}
+
+char InstCount::ID = 0;
+static RegisterPass<InstCount>
+X("instcount", "Counts the various types of Instructions", false, true);
+
+FunctionPass *llvm::createInstCountPass() { return new InstCount(); }
+
+// InstCount::run - This is the main Analysis entry point for a
+// function.
+//
+bool InstCount::runOnFunction(Function &F) {
+ unsigned StartMemInsts =
+ NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst +
+ NumInvokeInst + NumAllocaInst + NumMallocInst + NumFreeInst;
+ visit(F);
+ unsigned EndMemInsts =
+ NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst +
+ NumInvokeInst + NumAllocaInst + NumMallocInst + NumFreeInst;
+ TotalMemInst += EndMemInsts-StartMemInsts;
+ return false;
+}
diff --git a/lib/Analysis/Interval.cpp b/lib/Analysis/Interval.cpp
new file mode 100644
index 0000000..16b1947
--- /dev/null
+++ b/lib/Analysis/Interval.cpp
@@ -0,0 +1,57 @@
+//===- Interval.cpp - Interval class code ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definition of the Interval class, which represents a
+// partition of a control flow graph of some kind.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Interval.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Support/CFG.h"
+#include <algorithm>
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Interval Implementation
+//===----------------------------------------------------------------------===//
+
+// isLoop - Find out if there is a back edge in this interval...
+//
+bool Interval::isLoop() const {
+ // There is a loop in this interval iff one of the predecessors of the header
+ // node lives in the interval.
+ for (::pred_iterator I = ::pred_begin(HeaderNode), E = ::pred_end(HeaderNode);
+ I != E; ++I) {
+ if (contains(*I)) return true;
+ }
+ return false;
+}
+
+
+void Interval::print(std::ostream &o) const {
+ o << "-------------------------------------------------------------\n"
+ << "Interval Contents:\n";
+
+ // Print out all of the basic blocks in the interval...
+ for (std::vector<BasicBlock*>::const_iterator I = Nodes.begin(),
+ E = Nodes.end(); I != E; ++I)
+ o << **I << "\n";
+
+ o << "Interval Predecessors:\n";
+ for (std::vector<BasicBlock*>::const_iterator I = Predecessors.begin(),
+ E = Predecessors.end(); I != E; ++I)
+ o << **I << "\n";
+
+ o << "Interval Successors:\n";
+ for (std::vector<BasicBlock*>::const_iterator I = Successors.begin(),
+ E = Successors.end(); I != E; ++I)
+ o << **I << "\n";
+}
diff --git a/lib/Analysis/IntervalPartition.cpp b/lib/Analysis/IntervalPartition.cpp
new file mode 100644
index 0000000..cb8a85d
--- /dev/null
+++ b/lib/Analysis/IntervalPartition.cpp
@@ -0,0 +1,114 @@
+//===- IntervalPartition.cpp - Interval Partition module code -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definition of the IntervalPartition class, which
+// calculates and represent the interval partition of a function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/IntervalIterator.h"
+using namespace llvm;
+
+char IntervalPartition::ID = 0;
+static RegisterPass<IntervalPartition>
+X("intervals", "Interval Partition Construction", true, true);
+
+//===----------------------------------------------------------------------===//
+// IntervalPartition Implementation
+//===----------------------------------------------------------------------===//
+
+// releaseMemory - Reset state back to before function was analyzed
+void IntervalPartition::releaseMemory() {
+ for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
+ delete Intervals[i];
+ IntervalMap.clear();
+ Intervals.clear();
+ RootInterval = 0;
+}
+
+void IntervalPartition::print(std::ostream &O, const Module*) const {
+ for(unsigned i = 0, e = Intervals.size(); i != e; ++i)
+ Intervals[i]->print(O);
+}
+
+// addIntervalToPartition - Add an interval to the internal list of intervals,
+// and then add mappings from all of the basic blocks in the interval to the
+// interval itself (in the IntervalMap).
+//
+void IntervalPartition::addIntervalToPartition(Interval *I) {
+ Intervals.push_back(I);
+
+ // Add mappings for all of the basic blocks in I to the IntervalPartition
+ for (Interval::node_iterator It = I->Nodes.begin(), End = I->Nodes.end();
+ It != End; ++It)
+ IntervalMap.insert(std::make_pair(*It, I));
+}
+
+// updatePredecessors - Interval generation only sets the successor fields of
+// the interval data structures. After interval generation is complete,
+// run through all of the intervals and propagate successor info as
+// predecessor info.
+//
+void IntervalPartition::updatePredecessors(Interval *Int) {
+ BasicBlock *Header = Int->getHeaderNode();
+ for (Interval::succ_iterator I = Int->Successors.begin(),
+ E = Int->Successors.end(); I != E; ++I)
+ getBlockInterval(*I)->Predecessors.push_back(Header);
+}
+
+// IntervalPartition ctor - Build the first level interval partition for the
+// specified function...
+//
+bool IntervalPartition::runOnFunction(Function &F) {
+ // Pass false to intervals_begin because we take ownership of it's memory
+ function_interval_iterator I = intervals_begin(&F, false);
+ assert(I != intervals_end(&F) && "No intervals in function!?!?!");
+
+ addIntervalToPartition(RootInterval = *I);
+
+ ++I; // After the first one...
+
+ // Add the rest of the intervals to the partition.
+ for (function_interval_iterator E = intervals_end(&F); I != E; ++I)
+ addIntervalToPartition(*I);
+
+ // Now that we know all of the successor information, propagate this to the
+ // predecessors for each block.
+ for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
+ updatePredecessors(Intervals[i]);
+ return false;
+}
+
+
+// IntervalPartition ctor - Build a reduced interval partition from an
+// existing interval graph. This takes an additional boolean parameter to
+// distinguish it from a copy constructor. Always pass in false for now.
+//
+IntervalPartition::IntervalPartition(IntervalPartition &IP, bool)
+ : FunctionPass(&ID) {
+ assert(IP.getRootInterval() && "Cannot operate on empty IntervalPartitions!");
+
+ // Pass false to intervals_begin because we take ownership of it's memory
+ interval_part_interval_iterator I = intervals_begin(IP, false);
+ assert(I != intervals_end(IP) && "No intervals in interval partition!?!?!");
+
+ addIntervalToPartition(RootInterval = *I);
+
+ ++I; // After the first one...
+
+ // Add the rest of the intervals to the partition.
+ for (interval_part_interval_iterator E = intervals_end(IP); I != E; ++I)
+ addIntervalToPartition(*I);
+
+ // Now that we know all of the successor information, propagate this to the
+ // predecessors for each block.
+ for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
+ updatePredecessors(Intervals[i]);
+}
+
diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp
new file mode 100644
index 0000000..971e6e7
--- /dev/null
+++ b/lib/Analysis/LibCallAliasAnalysis.cpp
@@ -0,0 +1,141 @@
+//===- LibCallAliasAnalysis.cpp - Implement AliasAnalysis for libcalls ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LibCallAliasAnalysis class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LibCallAliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+// Register this pass...
+char LibCallAliasAnalysis::ID = 0;
+static RegisterPass<LibCallAliasAnalysis>
+X("libcall-aa", "LibCall Alias Analysis", false, true);
+
+// Declare that we implement the AliasAnalysis interface
+static RegisterAnalysisGroup<AliasAnalysis> Y(X);
+
+FunctionPass *llvm::createLibCallAliasAnalysisPass(LibCallInfo *LCI) {
+ return new LibCallAliasAnalysis(LCI);
+}
+
+LibCallAliasAnalysis::~LibCallAliasAnalysis() {
+ delete LCI;
+}
+
+void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AliasAnalysis::getAnalysisUsage(AU);
+ AU.addRequired<TargetData>();
+ AU.setPreservesAll(); // Does not transform code
+}
+
+
+
+/// AnalyzeLibCallDetails - Given a call to a function with the specified
+/// LibCallFunctionInfo, see if we can improve the mod/ref footprint of the call
+/// vs the specified pointer/size.
+AliasAnalysis::ModRefResult
+LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
+ CallSite CS, Value *P,
+ unsigned Size) {
+ // If we have a function, check to see what kind of mod/ref effects it
+ // has. Start by including any info globally known about the function.
+ AliasAnalysis::ModRefResult MRInfo = FI->UniversalBehavior;
+ if (MRInfo == NoModRef) return MRInfo;
+
+ // If that didn't tell us that the function is 'readnone', check to see
+ // if we have detailed info and if 'P' is any of the locations we know
+ // about.
+ const LibCallFunctionInfo::LocationMRInfo *Details = FI->LocationDetails;
+ if (Details == 0)
+ return MRInfo;
+
+ // If the details array is of the 'DoesNot' kind, we only know something if
+ // the pointer is a match for one of the locations in 'Details'. If we find a
+ // match, we can prove some interactions cannot happen.
+ //
+ if (FI->DetailsType == LibCallFunctionInfo::DoesNot) {
+ // Find out if the pointer refers to a known location.
+ for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) {
+ const LibCallLocationInfo &Loc =
+ LCI->getLocationInfo(Details[i].LocationID);
+ LibCallLocationInfo::LocResult Res = Loc.isLocation(CS, P, Size);
+ if (Res != LibCallLocationInfo::Yes) continue;
+
+ // If we find a match against a location that we 'do not' interact with,
+ // learn this info into MRInfo.
+ return ModRefResult(MRInfo & ~Details[i].MRInfo);
+ }
+ return MRInfo;
+ }
+
+ // If the details are of the 'DoesOnly' sort, we know something if the pointer
+ // is a match for one of the locations in 'Details'. Also, if we can prove
+ // that the pointers is *not* one of the locations in 'Details', we know that
+ // the call is NoModRef.
+ assert(FI->DetailsType == LibCallFunctionInfo::DoesOnly);
+
+ // Find out if the pointer refers to a known location.
+ bool NoneMatch = true;
+ for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) {
+ const LibCallLocationInfo &Loc =
+ LCI->getLocationInfo(Details[i].LocationID);
+ LibCallLocationInfo::LocResult Res = Loc.isLocation(CS, P, Size);
+ if (Res == LibCallLocationInfo::No) continue;
+
+ // If we don't know if this pointer points to the location, then we have to
+ // assume it might alias in some case.
+ if (Res == LibCallLocationInfo::Unknown) {
+ NoneMatch = false;
+ continue;
+ }
+
+ // If we know that this pointer definitely is pointing into the location,
+ // merge in this information.
+ return ModRefResult(MRInfo & Details[i].MRInfo);
+ }
+
+ // If we found that the pointer is guaranteed to not match any of the
+ // locations in our 'DoesOnly' rule, then we know that the pointer must point
+ // to some other location. Since the libcall doesn't mod/ref any other
+ // locations, return NoModRef.
+ if (NoneMatch)
+ return NoModRef;
+
+ // Otherwise, return any other info gained so far.
+ return MRInfo;
+}
+
+// getModRefInfo - Check to see if the specified callsite can clobber the
+// specified memory object.
+//
+AliasAnalysis::ModRefResult
+LibCallAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
+ ModRefResult MRInfo = ModRef;
+
+ // If this is a direct call to a function that LCI knows about, get the
+ // information about the runtime function.
+ if (LCI) {
+ if (Function *F = CS.getCalledFunction()) {
+ if (const LibCallFunctionInfo *FI = LCI->getFunctionInfo(F)) {
+ MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, P, Size));
+ if (MRInfo == NoModRef) return NoModRef;
+ }
+ }
+ }
+
+ // The AliasAnalysis base class has some smarts, lets use them.
+ return (ModRefResult)(MRInfo | AliasAnalysis::getModRefInfo(CS, P, Size));
+}
diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp
new file mode 100644
index 0000000..2985047
--- /dev/null
+++ b/lib/Analysis/LibCallSemantics.cpp
@@ -0,0 +1,65 @@
+//===- LibCallSemantics.cpp - Describe library semantics ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements interfaces that can be used to describe language
+// specific runtime library interfaces (e.g. libc, libm, etc) to LLVM
+// optimizers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Function.h"
+using namespace llvm;
+
+/// getMap - This impl pointer in ~LibCallInfo is actually a StringMap. This
+/// helper does the cast.
+static StringMap<const LibCallFunctionInfo*> *getMap(void *Ptr) {
+ return static_cast<StringMap<const LibCallFunctionInfo*> *>(Ptr);
+}
+
+LibCallInfo::~LibCallInfo() {
+ delete getMap(Impl);
+}
+
+const LibCallLocationInfo &LibCallInfo::getLocationInfo(unsigned LocID) const {
+ // Get location info on the first call.
+ if (NumLocations == 0)
+ NumLocations = getLocationInfo(Locations);
+
+ assert(LocID < NumLocations && "Invalid location ID!");
+ return Locations[LocID];
+}
+
+
+/// getFunctionInfo - Return the LibCallFunctionInfo object corresponding to
+/// the specified function if we have it. If not, return null.
+const LibCallFunctionInfo *LibCallInfo::getFunctionInfo(Function *F) const {
+ StringMap<const LibCallFunctionInfo*> *Map = getMap(Impl);
+
+ /// If this is the first time we are querying for this info, lazily construct
+ /// the StringMap to index it.
+ if (Map == 0) {
+ Impl = Map = new StringMap<const LibCallFunctionInfo*>();
+
+ const LibCallFunctionInfo *Array = getFunctionInfoArray();
+ if (Array == 0) return 0;
+
+ // We now have the array of entries. Populate the StringMap.
+ for (unsigned i = 0; Array[i].Name; ++i)
+ (*Map)[Array[i].Name] = Array+i;
+ }
+
+ // Look up this function in the string map.
+ const char *ValueName = F->getNameStart();
+ StringMap<const LibCallFunctionInfo*>::iterator I =
+ Map->find(ValueName, ValueName+F->getNameLen());
+ return I != Map->end() ? I->second : 0;
+}
+
diff --git a/lib/Analysis/LiveValues.cpp b/lib/Analysis/LiveValues.cpp
new file mode 100644
index 0000000..2bbe98a
--- /dev/null
+++ b/lib/Analysis/LiveValues.cpp
@@ -0,0 +1,191 @@
+//===- LiveValues.cpp - Liveness information for LLVM IR Values. ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the implementation for the LLVM IR Value liveness
+// analysis pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LiveValues.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+using namespace llvm;
+
+FunctionPass *llvm::createLiveValuesPass() { return new LiveValues(); }
+
+char LiveValues::ID = 0;
+static RegisterPass<LiveValues>
+X("live-values", "Value Liveness Analysis", false, true);
+
+LiveValues::LiveValues() : FunctionPass(&ID) {}
+
+void LiveValues::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+ AU.setPreservesAll();
+}
+
+bool LiveValues::runOnFunction(Function &F) {
+ DT = &getAnalysis<DominatorTree>();
+ LI = &getAnalysis<LoopInfo>();
+
+ // This pass' values are computed lazily, so there's nothing to do here.
+
+ return false;
+}
+
+void LiveValues::releaseMemory() {
+ Memos.clear();
+}
+
+/// isUsedInBlock - Test if the given value is used in the given block.
+///
+bool LiveValues::isUsedInBlock(const Value *V, const BasicBlock *BB) {
+ Memo &M = getMemo(V);
+ return M.Used.count(BB);
+}
+
+/// isLiveThroughBlock - Test if the given value is known to be
+/// live-through the given block, meaning that the block is properly
+/// dominated by the value's definition, and there exists a block
+/// reachable from it that contains a use. This uses a conservative
+/// approximation that errs on the side of returning false.
+///
+bool LiveValues::isLiveThroughBlock(const Value *V,
+ const BasicBlock *BB) {
+ Memo &M = getMemo(V);
+ return M.LiveThrough.count(BB);
+}
+
+/// isKilledInBlock - Test if the given value is known to be killed in
+/// the given block, meaning that the block contains a use of the value,
+/// and no blocks reachable from the block contain a use. This uses a
+/// conservative approximation that errs on the side of returning false.
+///
+bool LiveValues::isKilledInBlock(const Value *V, const BasicBlock *BB) {
+ Memo &M = getMemo(V);
+ return M.Killed.count(BB);
+}
+
+/// getMemo - Retrieve an existing Memo for the given value if one
+/// is available, otherwise compute a new one.
+///
+LiveValues::Memo &LiveValues::getMemo(const Value *V) {
+ DenseMap<const Value *, Memo>::iterator I = Memos.find(V);
+ if (I != Memos.end())
+ return I->second;
+ return compute(V);
+}
+
+/// getImmediateDominator - A handy utility for the specific DominatorTree
+/// query that we need here.
+///
+static const BasicBlock *getImmediateDominator(const BasicBlock *BB,
+ const DominatorTree *DT) {
+ DomTreeNode *Node = DT->getNode(const_cast<BasicBlock *>(BB))->getIDom();
+ return Node ? Node->getBlock() : 0;
+}
+
+/// compute - Compute a new Memo for the given value.
+///
+LiveValues::Memo &LiveValues::compute(const Value *V) {
+ Memo &M = Memos[V];
+
+ // Determine the block containing the definition.
+ const BasicBlock *DefBB;
+ // Instructions define values with meaningful live ranges.
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ DefBB = I->getParent();
+ // Arguments can be analyzed as values defined in the entry block.
+ else if (const Argument *A = dyn_cast<Argument>(V))
+ DefBB = &A->getParent()->getEntryBlock();
+ // Constants and other things aren't meaningful here, so just
+ // return having computed an empty Memo so that we don't come
+ // here again. The assumption here is that client code won't
+ // be asking about such values very often.
+ else
+ return M;
+
+ // Determine if the value is defined inside a loop. This is used
+ // to track whether the value is ever used outside the loop, so
+ // it'll be set to null if the value is either not defined in a
+ // loop or used outside the loop in which it is defined.
+ const Loop *L = LI->getLoopFor(DefBB);
+
+ // Track whether the value is used anywhere outside of the block
+ // in which it is defined.
+ bool LiveOutOfDefBB = false;
+
+ // Examine each use of the value.
+ for (Value::use_const_iterator I = V->use_begin(), E = V->use_end();
+ I != E; ++I) {
+ const User *U = *I;
+ const BasicBlock *UseBB = cast<Instruction>(U)->getParent();
+
+ // Note the block in which this use occurs.
+ M.Used.insert(UseBB);
+
+ // If the use block doesn't have successors, the value can be
+ // considered killed.
+ if (succ_begin(UseBB) == succ_end(UseBB))
+ M.Killed.insert(UseBB);
+
+ // Observe whether the value is used outside of the loop in which
+ // it is defined. Switch to an enclosing loop if necessary.
+ for (; L; L = L->getParentLoop())
+ if (L->contains(UseBB))
+ break;
+
+ // Search for live-through blocks.
+ const BasicBlock *BB;
+ if (const PHINode *PHI = dyn_cast<PHINode>(U)) {
+ // For PHI nodes, start the search at the incoming block paired with the
+ // incoming value, which must be dominated by the definition.
+ unsigned Num = PHI->getIncomingValueNumForOperand(I.getOperandNo());
+ BB = PHI->getIncomingBlock(Num);
+
+ // A PHI-node use means the value is live-out of it's defining block
+ // even if that block also contains the only use.
+ LiveOutOfDefBB = true;
+ } else {
+ // Otherwise just start the search at the use.
+ BB = UseBB;
+
+ // Note if the use is outside the defining block.
+ LiveOutOfDefBB |= UseBB != DefBB;
+ }
+
+ // Climb the immediate dominator tree from the use to the definition
+ // and mark all intermediate blocks as live-through.
+ for (; BB != DefBB; BB = getImmediateDominator(BB, DT)) {
+ if (BB != UseBB && !M.LiveThrough.insert(BB))
+ break;
+ }
+ }
+
+ // If the value is defined inside a loop and is not live outside
+ // the loop, then each exit block of the loop in which the value
+ // is used is a kill block.
+ if (L) {
+ SmallVector<BasicBlock *, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ const BasicBlock *ExitingBlock = ExitingBlocks[i];
+ if (M.Used.count(ExitingBlock))
+ M.Killed.insert(ExitingBlock);
+ }
+ }
+
+ // If the value was never used outside the the block in which it was
+ // defined, it's killed in that block.
+ if (!LiveOutOfDefBB)
+ M.Killed.insert(DefBB);
+
+ return M;
+}
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
new file mode 100644
index 0000000..de6480a
--- /dev/null
+++ b/lib/Analysis/LoopInfo.cpp
@@ -0,0 +1,50 @@
+//===- LoopInfo.cpp - Natural Loop Calculator -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the LoopInfo class that is used to identify natural loops
+// and determine the loop depth of various nodes of the CFG. Note that the
+// loops identified may actually be several natural loops that share the same
+// header node... not just a single natural loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <algorithm>
+#include <ostream>
+using namespace llvm;
+
+char LoopInfo::ID = 0;
+static RegisterPass<LoopInfo>
+X("loops", "Natural Loop Information", true, true);
+
+//===----------------------------------------------------------------------===//
+// Loop implementation
+//
+
+//===----------------------------------------------------------------------===//
+// LoopInfo implementation
+//
+bool LoopInfo::runOnFunction(Function &) {
+ releaseMemory();
+ LI->Calculate(getAnalysis<DominatorTree>().getBase()); // Update
+ return false;
+}
+
+void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<DominatorTree>();
+}
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
new file mode 100644
index 0000000..08c25f4
--- /dev/null
+++ b/lib/Analysis/LoopPass.cpp
@@ -0,0 +1,340 @@
+//===- LoopPass.cpp - Loop Pass and Loop Pass Manager ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements LoopPass and LPPassManager. All loop optimization
+// and transformation passes are derived from LoopPass. LPPassManager is
+// responsible for managing LoopPasses.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopPass.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// LPPassManager
+//
+
+char LPPassManager::ID = 0;
+/// LPPassManager manages FPPassManagers and CalLGraphSCCPasses.
+
+LPPassManager::LPPassManager(int Depth)
+ : FunctionPass(&ID), PMDataManager(Depth) {
+ skipThisLoop = false;
+ redoThisLoop = false;
+ LI = NULL;
+ CurrentLoop = NULL;
+}
+
+/// Delete loop from the loop queue and loop hierarchy (LoopInfo).
+void LPPassManager::deleteLoopFromQueue(Loop *L) {
+
+ if (Loop *ParentLoop = L->getParentLoop()) { // Not a top-level loop.
+ // Reparent all of the blocks in this loop. Since BBLoop had a parent,
+ // they are now all in it.
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I)
+ if (LI->getLoopFor(*I) == L) // Don't change blocks in subloops.
+ LI->changeLoopFor(*I, ParentLoop);
+
+ // Remove the loop from its parent loop.
+ for (Loop::iterator I = ParentLoop->begin(), E = ParentLoop->end();;
+ ++I) {
+ assert(I != E && "Couldn't find loop");
+ if (*I == L) {
+ ParentLoop->removeChildLoop(I);
+ break;
+ }
+ }
+
+ // Move all subloops into the parent loop.
+ while (!L->empty())
+ ParentLoop->addChildLoop(L->removeChildLoop(L->end()-1));
+ } else {
+ // Reparent all of the blocks in this loop. Since BBLoop had no parent,
+ // they no longer in a loop at all.
+
+ for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
+ // Don't change blocks in subloops.
+ if (LI->getLoopFor(L->getBlocks()[i]) == L) {
+ LI->removeBlock(L->getBlocks()[i]);
+ --i;
+ }
+ }
+
+ // Remove the loop from the top-level LoopInfo object.
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end();; ++I) {
+ assert(I != E && "Couldn't find loop");
+ if (*I == L) {
+ LI->removeLoop(I);
+ break;
+ }
+ }
+
+ // Move all of the subloops to the top-level.
+ while (!L->empty())
+ LI->addTopLevelLoop(L->removeChildLoop(L->end()-1));
+ }
+
+ delete L;
+
+ // If L is current loop then skip rest of the passes and let
+ // runOnFunction remove L from LQ. Otherwise, remove L from LQ now
+ // and continue applying other passes on CurrentLoop.
+ if (CurrentLoop == L) {
+ skipThisLoop = true;
+ return;
+ }
+
+ for (std::deque<Loop *>::iterator I = LQ.begin(),
+ E = LQ.end(); I != E; ++I) {
+ if (*I == L) {
+ LQ.erase(I);
+ break;
+ }
+ }
+}
+
+// Inset loop into loop nest (LoopInfo) and loop queue (LQ).
+void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) {
+
+ assert (CurrentLoop != L && "Cannot insert CurrentLoop");
+
+ // Insert into loop nest
+ if (ParentLoop)
+ ParentLoop->addChildLoop(L);
+ else
+ LI->addTopLevelLoop(L);
+
+ // Insert L into loop queue
+ if (L == CurrentLoop)
+ redoLoop(L);
+ else if (!ParentLoop)
+ // This is top level loop.
+ LQ.push_front(L);
+ else {
+ // Insert L after ParentLoop
+ for (std::deque<Loop *>::iterator I = LQ.begin(),
+ E = LQ.end(); I != E; ++I) {
+ if (*I == ParentLoop) {
+ // deque does not support insert after.
+ ++I;
+ LQ.insert(I, 1, L);
+ break;
+ }
+ }
+ }
+}
+
+// Reoptimize this loop. LPPassManager will re-insert this loop into the
+// queue. This allows LoopPass to change loop nest for the loop. This
+// utility may send LPPassManager into infinite loops so use caution.
+void LPPassManager::redoLoop(Loop *L) {
+ assert (CurrentLoop == L && "Can redo only CurrentLoop");
+ redoThisLoop = true;
+}
+
+/// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for
+/// all loop passes.
+void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From,
+ BasicBlock *To, Loop *L) {
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ LoopPass *LP = dynamic_cast<LoopPass *>(P);
+ LP->cloneBasicBlockAnalysis(From, To, L);
+ }
+}
+
+/// deleteSimpleAnalysisValue - Invoke deleteAnalysisValue hook for all passes.
+void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) {
+ if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) {
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;
+ ++BI) {
+ Instruction &I = *BI;
+ deleteSimpleAnalysisValue(&I, L);
+ }
+ }
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ LoopPass *LP = dynamic_cast<LoopPass *>(P);
+ LP->deleteAnalysisValue(V, L);
+ }
+}
+
+
+// Recurse through all subloops and all loops into LQ.
+static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) {
+ LQ.push_back(L);
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ addLoopIntoQueue(*I, LQ);
+}
+
+/// Pass Manager itself does not invalidate any analysis info.
+void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
+ // LPPassManager needs LoopInfo. In the long term LoopInfo class will
+ // become part of LPPassManager.
+ Info.addRequired<LoopInfo>();
+ Info.setPreservesAll();
+}
+
+/// run - Execute all of the passes scheduled for execution. Keep track of
+/// whether any of the passes modifies the function, and if so, return true.
+bool LPPassManager::runOnFunction(Function &F) {
+ LI = &getAnalysis<LoopInfo>();
+ bool Changed = false;
+
+ // Collect inherited analysis from Module level pass manager.
+ populateInheritedAnalysis(TPM->activeStack);
+
+ // Populate Loop Queue
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ addLoopIntoQueue(*I, LQ);
+
+ // Initialization
+ for (std::deque<Loop *>::const_iterator I = LQ.begin(), E = LQ.end();
+ I != E; ++I) {
+ Loop *L = *I;
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ LoopPass *LP = dynamic_cast<LoopPass *>(P);
+ if (LP)
+ Changed |= LP->doInitialization(L, *this);
+ }
+ }
+
+ // Walk Loops
+ while (!LQ.empty()) {
+
+ CurrentLoop = LQ.back();
+ skipThisLoop = false;
+ redoThisLoop = false;
+
+ // Run all passes on current SCC
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+
+ dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG, "");
+ dumpRequiredSet(P);
+
+ initializeAnalysisImpl(P);
+
+ LoopPass *LP = dynamic_cast<LoopPass *>(P);
+ {
+ PassManagerPrettyStackEntry X(LP, *CurrentLoop->getHeader());
+ StartPassTimer(P);
+ assert(LP && "Invalid LPPassManager member");
+ Changed |= LP->runOnLoop(CurrentLoop, *this);
+ StopPassTimer(P);
+ }
+
+ if (Changed)
+ dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG, "");
+ dumpPreservedSet(P);
+
+ verifyPreservedAnalysis(LP);
+ removeNotPreservedAnalysis(P);
+ recordAvailableAnalysis(P);
+ removeDeadPasses(P, "", ON_LOOP_MSG);
+
+ // If dominator information is available then verify the info if requested.
+ verifyDomInfo(*LP, F);
+
+ if (skipThisLoop)
+ // Do not run other passes on this loop.
+ break;
+ }
+
+ // Pop the loop from queue after running all passes.
+ LQ.pop_back();
+
+ if (redoThisLoop)
+ LQ.push_back(CurrentLoop);
+ }
+
+ // Finalization
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ LoopPass *LP = dynamic_cast <LoopPass *>(P);
+ if (LP)
+ Changed |= LP->doFinalization();
+ }
+
+ return Changed;
+}
+
+/// Print passes managed by this manager
+void LPPassManager::dumpPassStructure(unsigned Offset) {
+ llvm::cerr << std::string(Offset*2, ' ') << "Loop Pass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ P->dumpPassStructure(Offset + 1);
+ dumpLastUses(P, Offset+1);
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// LoopPass
+
+// Check if this pass is suitable for the current LPPassManager, if
+// available. This pass P is not suitable for a LPPassManager if P
+// is not preserving higher level analysis info used by other
+// LPPassManager passes. In such case, pop LPPassManager from the
+// stack. This will force assignPassManager() to create new
+// LPPassManger as expected.
+void LoopPass::preparePassManager(PMStack &PMS) {
+
+ // Find LPPassManager
+ while (!PMS.empty() &&
+ PMS.top()->getPassManagerType() > PMT_LoopPassManager)
+ PMS.pop();
+
+ LPPassManager *LPPM = dynamic_cast<LPPassManager *>(PMS.top());
+
+ // If this pass is destroying high level information that is used
+ // by other passes that are managed by LPM then do not insert
+ // this pass in current LPM. Use new LPPassManager.
+ if (LPPM && !LPPM->preserveHigherLevelAnalysis(this))
+ PMS.pop();
+}
+
+/// Assign pass manager to manage this pass.
+void LoopPass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+ // Find LPPassManager
+ while (!PMS.empty() &&
+ PMS.top()->getPassManagerType() > PMT_LoopPassManager)
+ PMS.pop();
+
+ LPPassManager *LPPM = dynamic_cast<LPPassManager *>(PMS.top());
+
+ // Create new Loop Pass Manager if it does not exist.
+ if (!LPPM) {
+
+ assert (!PMS.empty() && "Unable to create Loop Pass Manager");
+ PMDataManager *PMD = PMS.top();
+
+ // [1] Create new Call Graph Pass Manager
+ LPPM = new LPPassManager(PMD->getDepth() + 1);
+ LPPM->populateInheritedAnalysis(PMS);
+
+ // [2] Set up new manager's top level manager
+ PMTopLevelManager *TPM = PMD->getTopLevelManager();
+ TPM->addIndirectPassManager(LPPM);
+
+ // [3] Assign manager to manage this new manager. This may create
+ // and push new managers into PMS
+ Pass *P = dynamic_cast<Pass *>(LPPM);
+ TPM->schedulePass(P);
+
+ // [4] Push new manager into PMS
+ PMS.push(LPPM);
+ }
+
+ LPPM->add(this);
+}
diff --git a/lib/Analysis/LoopVR.cpp b/lib/Analysis/LoopVR.cpp
new file mode 100644
index 0000000..0a3d06b
--- /dev/null
+++ b/lib/Analysis/LoopVR.cpp
@@ -0,0 +1,291 @@
+//===- LoopVR.cpp - Value Range analysis driven by loop information -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// FIXME: What does this do?
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loopvr"
+#include "llvm/Analysis/LoopVR.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+char LoopVR::ID = 0;
+static RegisterPass<LoopVR> X("loopvr", "Loop Value Ranges", false, true);
+
+/// getRange - determine the range for a particular SCEV within a given Loop
+ConstantRange LoopVR::getRange(SCEVHandle S, Loop *L, ScalarEvolution &SE) {
+ SCEVHandle T = SE.getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(T))
+ return ConstantRange(cast<IntegerType>(S->getType())->getBitWidth(), true);
+
+ T = SE.getTruncateOrZeroExtend(T, S->getType());
+ return getRange(S, T, SE);
+}
+
+/// getRange - determine the range for a particular SCEV with a given trip count
+ConstantRange LoopVR::getRange(SCEVHandle S, SCEVHandle T, ScalarEvolution &SE){
+
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+ return ConstantRange(C->getValue()->getValue());
+
+ ConstantRange FullSet(cast<IntegerType>(S->getType())->getBitWidth(), true);
+
+ // {x,+,y,+,...z}. We detect overflow by checking the size of the set after
+ // summing the upper and lower.
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ ConstantRange X = getRange(Add->getOperand(0), T, SE);
+ if (X.isFullSet()) return FullSet;
+ for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) {
+ ConstantRange Y = getRange(Add->getOperand(i), T, SE);
+ if (Y.isFullSet()) return FullSet;
+
+ APInt Spread_X = X.getSetSize(), Spread_Y = Y.getSetSize();
+ APInt NewLower = X.getLower() + Y.getLower();
+ APInt NewUpper = X.getUpper() + Y.getUpper() - 1;
+ if (NewLower == NewUpper)
+ return FullSet;
+
+ X = ConstantRange(NewLower, NewUpper);
+ if (X.getSetSize().ult(Spread_X) || X.getSetSize().ult(Spread_Y))
+ return FullSet; // we've wrapped, therefore, full set.
+ }
+ return X;
+ }
+
+ // {x,*,y,*,...,z}. In order to detect overflow, we use k*bitwidth where
+ // k is the number of terms being multiplied.
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+ ConstantRange X = getRange(Mul->getOperand(0), T, SE);
+ if (X.isFullSet()) return FullSet;
+
+ const IntegerType *Ty = IntegerType::get(X.getBitWidth());
+ const IntegerType *ExTy = IntegerType::get(X.getBitWidth() *
+ Mul->getNumOperands());
+ ConstantRange XExt = X.zeroExtend(ExTy->getBitWidth());
+
+ for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) {
+ ConstantRange Y = getRange(Mul->getOperand(i), T, SE);
+ if (Y.isFullSet()) return FullSet;
+
+ ConstantRange YExt = Y.zeroExtend(ExTy->getBitWidth());
+ XExt = ConstantRange(XExt.getLower() * YExt.getLower(),
+ ((XExt.getUpper()-1) * (YExt.getUpper()-1)) + 1);
+ }
+ return XExt.truncate(Ty->getBitWidth());
+ }
+
+ // X smax Y smax ... Z is: range(smax(X_smin, Y_smin, ..., Z_smin),
+ // smax(X_smax, Y_smax, ..., Z_smax))
+ // It doesn't matter if one of the SCEVs has FullSet because we're taking
+ // a maximum of the minimums across all of them.
+ if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
+ ConstantRange X = getRange(SMax->getOperand(0), T, SE);
+ if (X.isFullSet()) return FullSet;
+
+ APInt smin = X.getSignedMin(), smax = X.getSignedMax();
+ for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) {
+ ConstantRange Y = getRange(SMax->getOperand(i), T, SE);
+ smin = APIntOps::smax(smin, Y.getSignedMin());
+ smax = APIntOps::smax(smax, Y.getSignedMax());
+ }
+ if (smax + 1 == smin) return FullSet;
+ return ConstantRange(smin, smax + 1);
+ }
+
+ // X umax Y umax ... Z is: range(umax(X_umin, Y_umin, ..., Z_umin),
+ // umax(X_umax, Y_umax, ..., Z_umax))
+ // It doesn't matter if one of the SCEVs has FullSet because we're taking
+ // a maximum of the minimums across all of them.
+ if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
+ ConstantRange X = getRange(UMax->getOperand(0), T, SE);
+ if (X.isFullSet()) return FullSet;
+
+ APInt umin = X.getUnsignedMin(), umax = X.getUnsignedMax();
+ for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) {
+ ConstantRange Y = getRange(UMax->getOperand(i), T, SE);
+ umin = APIntOps::umax(umin, Y.getUnsignedMin());
+ umax = APIntOps::umax(umax, Y.getUnsignedMax());
+ }
+ if (umax + 1 == umin) return FullSet;
+ return ConstantRange(umin, umax + 1);
+ }
+
+ // L udiv R. Luckily, there's only ever 2 sides to a udiv.
+ if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
+ ConstantRange L = getRange(UDiv->getLHS(), T, SE);
+ ConstantRange R = getRange(UDiv->getRHS(), T, SE);
+ if (L.isFullSet() && R.isFullSet()) return FullSet;
+
+ if (R.getUnsignedMax() == 0) {
+ // RHS must be single-element zero. Return an empty set.
+ return ConstantRange(R.getBitWidth(), false);
+ }
+
+ APInt Lower = L.getUnsignedMin().udiv(R.getUnsignedMax());
+
+ APInt Upper;
+
+ if (R.getUnsignedMin() == 0) {
+ // Just because it contains zero, doesn't mean it will also contain one.
+ // Use maximalIntersectWith to get the right behaviour.
+ ConstantRange NotZero(APInt(L.getBitWidth(), 1),
+ APInt::getNullValue(L.getBitWidth()));
+ R = R.maximalIntersectWith(NotZero);
+ }
+
+ // But, the maximal intersection might still include zero. If it does, then
+ // we know it also included one.
+ if (R.contains(APInt::getNullValue(L.getBitWidth())))
+ Upper = L.getUnsignedMax();
+ else
+ Upper = L.getUnsignedMax().udiv(R.getUnsignedMin());
+
+ return ConstantRange(Lower, Upper);
+ }
+
+ // ConstantRange already implements the cast operators.
+
+ if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
+ T = SE.getTruncateOrZeroExtend(T, ZExt->getOperand()->getType());
+ ConstantRange X = getRange(ZExt->getOperand(), T, SE);
+ return X.zeroExtend(cast<IntegerType>(ZExt->getType())->getBitWidth());
+ }
+
+ if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
+ T = SE.getTruncateOrZeroExtend(T, SExt->getOperand()->getType());
+ ConstantRange X = getRange(SExt->getOperand(), T, SE);
+ return X.signExtend(cast<IntegerType>(SExt->getType())->getBitWidth());
+ }
+
+ if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
+ T = SE.getTruncateOrZeroExtend(T, Trunc->getOperand()->getType());
+ ConstantRange X = getRange(Trunc->getOperand(), T, SE);
+ if (X.isFullSet()) return FullSet;
+ return X.truncate(cast<IntegerType>(Trunc->getType())->getBitWidth());
+ }
+
+ if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
+ const SCEVConstant *Trip = dyn_cast<SCEVConstant>(T);
+ if (!Trip) return FullSet;
+
+ if (AddRec->isAffine()) {
+ SCEVHandle StartHandle = AddRec->getStart();
+ SCEVHandle StepHandle = AddRec->getOperand(1);
+
+ const SCEVConstant *Step = dyn_cast<SCEVConstant>(StepHandle);
+ if (!Step) return FullSet;
+
+ uint32_t ExWidth = 2 * Trip->getValue()->getBitWidth();
+ APInt TripExt = Trip->getValue()->getValue(); TripExt.zext(ExWidth);
+ APInt StepExt = Step->getValue()->getValue(); StepExt.zext(ExWidth);
+ if ((TripExt * StepExt).ugt(APInt::getLowBitsSet(ExWidth, ExWidth >> 1)))
+ return FullSet;
+
+ SCEVHandle EndHandle = SE.getAddExpr(StartHandle,
+ SE.getMulExpr(T, StepHandle));
+ const SCEVConstant *Start = dyn_cast<SCEVConstant>(StartHandle);
+ const SCEVConstant *End = dyn_cast<SCEVConstant>(EndHandle);
+ if (!Start || !End) return FullSet;
+
+ const APInt &StartInt = Start->getValue()->getValue();
+ const APInt &EndInt = End->getValue()->getValue();
+ const APInt &StepInt = Step->getValue()->getValue();
+
+ if (StepInt.isNegative()) {
+ if (EndInt == StartInt + 1) return FullSet;
+ return ConstantRange(EndInt, StartInt + 1);
+ } else {
+ if (StartInt == EndInt + 1) return FullSet;
+ return ConstantRange(StartInt, EndInt + 1);
+ }
+ }
+ }
+
+ // TODO: non-affine addrec, udiv, SCEVUnknown (narrowed from elsewhere)?
+
+ return FullSet;
+}
+
+bool LoopVR::runOnFunction(Function &F) { Map.clear(); return false; }
+
+void LoopVR::print(std::ostream &os, const Module *) const {
+ raw_os_ostream OS(os);
+ for (std::map<Value *, ConstantRange *>::const_iterator I = Map.begin(),
+ E = Map.end(); I != E; ++I) {
+ OS << *I->first << ": " << *I->second << '\n';
+ }
+}
+
+void LoopVR::releaseMemory() {
+ for (std::map<Value *, ConstantRange *>::iterator I = Map.begin(),
+ E = Map.end(); I != E; ++I) {
+ delete I->second;
+ }
+
+ Map.clear();
+}
+
+ConstantRange LoopVR::compute(Value *V) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ return ConstantRange(CI->getValue());
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return ConstantRange(cast<IntegerType>(V->getType())->getBitWidth(), false);
+
+ LoopInfo &LI = getAnalysis<LoopInfo>();
+
+ Loop *L = LI.getLoopFor(I->getParent());
+ if (!L || L->isLoopInvariant(I))
+ return ConstantRange(cast<IntegerType>(V->getType())->getBitWidth(), false);
+
+ ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
+
+ SCEVHandle S = SE.getSCEV(I);
+ if (isa<SCEVUnknown>(S) || isa<SCEVCouldNotCompute>(S))
+ return ConstantRange(cast<IntegerType>(V->getType())->getBitWidth(), false);
+
+ return ConstantRange(getRange(S, L, SE));
+}
+
+ConstantRange LoopVR::get(Value *V) {
+ std::map<Value *, ConstantRange *>::iterator I = Map.find(V);
+ if (I == Map.end()) {
+ ConstantRange *CR = new ConstantRange(compute(V));
+ Map[V] = CR;
+ return *CR;
+ }
+
+ return *I->second;
+}
+
+void LoopVR::remove(Value *V) {
+ std::map<Value *, ConstantRange *>::iterator I = Map.find(V);
+ if (I != Map.end()) {
+ delete I->second;
+ Map.erase(I);
+ }
+}
+
+void LoopVR::narrow(Value *V, const ConstantRange &CR) {
+ if (CR.isFullSet()) return;
+
+ std::map<Value *, ConstantRange *>::iterator I = Map.find(V);
+ if (I == Map.end())
+ Map[V] = new ConstantRange(CR);
+ else
+ Map[V] = new ConstantRange(Map[V]->maximalIntersectWith(CR));
+}
diff --git a/lib/Analysis/Makefile b/lib/Analysis/Makefile
new file mode 100644
index 0000000..4af6d35
--- /dev/null
+++ b/lib/Analysis/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Analysis/Makefile -------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMAnalysis
+DIRS = IPA
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
new file mode 100644
index 0000000..3b21029
--- /dev/null
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -0,0 +1,1142 @@
+//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an analysis that determines, for a given memory
+// operation, what preceding memory operations it depends on. It builds on
+// alias analysis information, and tries to provide a lazy, caching interface to
+// a common kind of alias information query.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "memdep"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/PredIteratorCache.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
+STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
+STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
+
+STATISTIC(NumCacheNonLocalPtr,
+ "Number of fully cached non-local ptr responses");
+STATISTIC(NumCacheDirtyNonLocalPtr,
+ "Number of cached, but dirty, non-local ptr responses");
+STATISTIC(NumUncacheNonLocalPtr,
+ "Number of uncached non-local ptr responses");
+STATISTIC(NumCacheCompleteNonLocalPtr,
+ "Number of block queries that were completely cached");
+
+char MemoryDependenceAnalysis::ID = 0;
+
+// Register this pass...
+static RegisterPass<MemoryDependenceAnalysis> X("memdep",
+ "Memory Dependence Analysis", false, true);
+
+MemoryDependenceAnalysis::MemoryDependenceAnalysis()
+: FunctionPass(&ID), PredCache(0) {
+}
+MemoryDependenceAnalysis::~MemoryDependenceAnalysis() {
+}
+
+/// Clean up memory in between runs
+void MemoryDependenceAnalysis::releaseMemory() {
+ LocalDeps.clear();
+ NonLocalDeps.clear();
+ NonLocalPointerDeps.clear();
+ ReverseLocalDeps.clear();
+ ReverseNonLocalDeps.clear();
+ ReverseNonLocalPtrDeps.clear();
+ PredCache->clear();
+}
+
+
+
+/// getAnalysisUsage - Does not modify anything. It uses Alias Analysis.
+///
+void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<AliasAnalysis>();
+ AU.addRequiredTransitive<TargetData>();
+}
+
+bool MemoryDependenceAnalysis::runOnFunction(Function &) {
+ AA = &getAnalysis<AliasAnalysis>();
+ TD = &getAnalysis<TargetData>();
+ if (PredCache == 0)
+ PredCache.reset(new PredIteratorCache());
+ return false;
+}
+
+/// RemoveFromReverseMap - This is a helper function that removes Val from
+/// 'Inst's set in ReverseMap. If the set becomes empty, remove Inst's entry.
+template <typename KeyTy>
+static void RemoveFromReverseMap(DenseMap<Instruction*,
+ SmallPtrSet<KeyTy, 4> > &ReverseMap,
+ Instruction *Inst, KeyTy Val) {
+ typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator
+ InstIt = ReverseMap.find(Inst);
+ assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
+ bool Found = InstIt->second.erase(Val);
+ assert(Found && "Invalid reverse map!"); Found=Found;
+ if (InstIt->second.empty())
+ ReverseMap.erase(InstIt);
+}
+
+
+/// getCallSiteDependencyFrom - Private helper for finding the local
+/// dependencies of a call site.
+MemDepResult MemoryDependenceAnalysis::
+getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
+ BasicBlock::iterator ScanIt, BasicBlock *BB) {
+ // Walk backwards through the block, looking for dependencies
+ while (ScanIt != BB->begin()) {
+ Instruction *Inst = --ScanIt;
+
+ // If this inst is a memory op, get the pointer it accessed
+ Value *Pointer = 0;
+ uint64_t PointerSize = 0;
+ if (StoreInst *S = dyn_cast<StoreInst>(Inst)) {
+ Pointer = S->getPointerOperand();
+ PointerSize = TD->getTypeStoreSize(S->getOperand(0)->getType());
+ } else if (VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
+ Pointer = V->getOperand(0);
+ PointerSize = TD->getTypeStoreSize(V->getType());
+ } else if (FreeInst *F = dyn_cast<FreeInst>(Inst)) {
+ Pointer = F->getPointerOperand();
+
+ // FreeInsts erase the entire structure
+ PointerSize = ~0ULL;
+ } else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) {
+ // Debug intrinsics don't cause dependences.
+ if (isa<DbgInfoIntrinsic>(Inst)) continue;
+ CallSite InstCS = CallSite::get(Inst);
+ // If these two calls do not interfere, look past it.
+ switch (AA->getModRefInfo(CS, InstCS)) {
+ case AliasAnalysis::NoModRef:
+ // If the two calls don't interact (e.g. InstCS is readnone) keep
+ // scanning.
+ continue;
+ case AliasAnalysis::Ref:
+ // If the two calls read the same memory locations and CS is a readonly
+ // function, then we have two cases: 1) the calls may not interfere with
+ // each other at all. 2) the calls may produce the same value. In case
+ // #1 we want to ignore the values, in case #2, we want to return Inst
+ // as a Def dependence. This allows us to CSE in cases like:
+ // X = strlen(P);
+ // memchr(...);
+ // Y = strlen(P); // Y = X
+ if (isReadOnlyCall) {
+ if (CS.getCalledFunction() != 0 &&
+ CS.getCalledFunction() == InstCS.getCalledFunction())
+ return MemDepResult::getDef(Inst);
+ // Ignore unrelated read/read call dependences.
+ continue;
+ }
+ // FALL THROUGH
+ default:
+ return MemDepResult::getClobber(Inst);
+ }
+ } else {
+ // Non-memory instruction.
+ continue;
+ }
+
+ if (AA->getModRefInfo(CS, Pointer, PointerSize) != AliasAnalysis::NoModRef)
+ return MemDepResult::getClobber(Inst);
+ }
+
+ // No dependence found. If this is the entry block of the function, it is a
+ // clobber, otherwise it is non-local.
+ if (BB != &BB->getParent()->getEntryBlock())
+ return MemDepResult::getNonLocal();
+ return MemDepResult::getClobber(ScanIt);
+}
+
+/// getPointerDependencyFrom - Return the instruction on which a memory
+/// location depends. If isLoad is true, this routine ignore may-aliases with
+/// read-only operations.
+MemDepResult MemoryDependenceAnalysis::
+getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
+ BasicBlock::iterator ScanIt, BasicBlock *BB) {
+
+ // Walk backwards through the basic block, looking for dependencies.
+ while (ScanIt != BB->begin()) {
+ Instruction *Inst = --ScanIt;
+
+ // Debug intrinsics don't cause dependences.
+ if (isa<DbgInfoIntrinsic>(Inst)) continue;
+
+ // Values depend on loads if the pointers are must aliased. This means that
+ // a load depends on another must aliased load from the same value.
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ Value *Pointer = LI->getPointerOperand();
+ uint64_t PointerSize = TD->getTypeStoreSize(LI->getType());
+
+ // If we found a pointer, check if it could be the same as our pointer.
+ AliasAnalysis::AliasResult R =
+ AA->alias(Pointer, PointerSize, MemPtr, MemSize);
+ if (R == AliasAnalysis::NoAlias)
+ continue;
+
+ // May-alias loads don't depend on each other without a dependence.
+ if (isLoad && R == AliasAnalysis::MayAlias)
+ continue;
+ // Stores depend on may and must aliased loads, loads depend on must-alias
+ // loads.
+ return MemDepResult::getDef(Inst);
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // If alias analysis can tell that this store is guaranteed to not modify
+ // the query pointer, ignore it. Use getModRefInfo to handle cases where
+ // the query pointer points to constant memory etc.
+ if (AA->getModRefInfo(SI, MemPtr, MemSize) == AliasAnalysis::NoModRef)
+ continue;
+
+ // Ok, this store might clobber the query pointer. Check to see if it is
+ // a must alias: in this case, we want to return this as a def.
+ Value *Pointer = SI->getPointerOperand();
+ uint64_t PointerSize = TD->getTypeStoreSize(SI->getOperand(0)->getType());
+
+ // If we found a pointer, check if it could be the same as our pointer.
+ AliasAnalysis::AliasResult R =
+ AA->alias(Pointer, PointerSize, MemPtr, MemSize);
+
+ if (R == AliasAnalysis::NoAlias)
+ continue;
+ if (R == AliasAnalysis::MayAlias)
+ return MemDepResult::getClobber(Inst);
+ return MemDepResult::getDef(Inst);
+ }
+
+ // If this is an allocation, and if we know that the accessed pointer is to
+ // the allocation, return Def. This means that there is no dependence and
+ // the access can be optimized based on that. For example, a load could
+ // turn into undef.
+ if (AllocationInst *AI = dyn_cast<AllocationInst>(Inst)) {
+ Value *AccessPtr = MemPtr->getUnderlyingObject();
+
+ if (AccessPtr == AI ||
+ AA->alias(AI, 1, AccessPtr, 1) == AliasAnalysis::MustAlias)
+ return MemDepResult::getDef(AI);
+ continue;
+ }
+
+ // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
+ switch (AA->getModRefInfo(Inst, MemPtr, MemSize)) {
+ case AliasAnalysis::NoModRef:
+ // If the call has no effect on the queried pointer, just ignore it.
+ continue;
+ case AliasAnalysis::Ref:
+ // If the call is known to never store to the pointer, and if this is a
+ // load query, we can safely ignore it (scan past it).
+ if (isLoad)
+ continue;
+ // FALL THROUGH.
+ default:
+ // Otherwise, there is a potential dependence. Return a clobber.
+ return MemDepResult::getClobber(Inst);
+ }
+ }
+
+ // No dependence found. If this is the entry block of the function, it is a
+ // clobber, otherwise it is non-local.
+ if (BB != &BB->getParent()->getEntryBlock())
+ return MemDepResult::getNonLocal();
+ return MemDepResult::getClobber(ScanIt);
+}
+
+/// getDependency - Return the instruction on which a memory operation
+/// depends.
+MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
+ Instruction *ScanPos = QueryInst;
+
+ // Check for a cached result
+ MemDepResult &LocalCache = LocalDeps[QueryInst];
+
+ // If the cached entry is non-dirty, just return it. Note that this depends
+ // on MemDepResult's default constructing to 'dirty'.
+ if (!LocalCache.isDirty())
+ return LocalCache;
+
+ // Otherwise, if we have a dirty entry, we know we can start the scan at that
+ // instruction, which may save us some work.
+ if (Instruction *Inst = LocalCache.getInst()) {
+ ScanPos = Inst;
+
+ RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
+ }
+
+ BasicBlock *QueryParent = QueryInst->getParent();
+
+ Value *MemPtr = 0;
+ uint64_t MemSize = 0;
+
+ // Do the scan.
+ if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
+ // No dependence found. If this is the entry block of the function, it is a
+ // clobber, otherwise it is non-local.
+ if (QueryParent != &QueryParent->getParent()->getEntryBlock())
+ LocalCache = MemDepResult::getNonLocal();
+ else
+ LocalCache = MemDepResult::getClobber(QueryInst);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(QueryInst)) {
+ // If this is a volatile store, don't mess around with it. Just return the
+ // previous instruction as a clobber.
+ if (SI->isVolatile())
+ LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
+ else {
+ MemPtr = SI->getPointerOperand();
+ MemSize = TD->getTypeStoreSize(SI->getOperand(0)->getType());
+ }
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(QueryInst)) {
+ // If this is a volatile load, don't mess around with it. Just return the
+ // previous instruction as a clobber.
+ if (LI->isVolatile())
+ LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
+ else {
+ MemPtr = LI->getPointerOperand();
+ MemSize = TD->getTypeStoreSize(LI->getType());
+ }
+ } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
+ CallSite QueryCS = CallSite::get(QueryInst);
+ bool isReadOnly = AA->onlyReadsMemory(QueryCS);
+ LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
+ QueryParent);
+ } else if (FreeInst *FI = dyn_cast<FreeInst>(QueryInst)) {
+ MemPtr = FI->getPointerOperand();
+ // FreeInsts erase the entire structure, not just a field.
+ MemSize = ~0UL;
+ } else {
+ // Non-memory instruction.
+ LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
+ }
+
+ // If we need to do a pointer scan, make it happen.
+ if (MemPtr)
+ LocalCache = getPointerDependencyFrom(MemPtr, MemSize,
+ isa<LoadInst>(QueryInst),
+ ScanPos, QueryParent);
+
+ // Remember the result!
+ if (Instruction *I = LocalCache.getInst())
+ ReverseLocalDeps[I].insert(QueryInst);
+
+ return LocalCache;
+}
+
+#ifndef NDEBUG
+/// AssertSorted - This method is used when -debug is specified to verify that
+/// cache arrays are properly kept sorted.
+static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
+ int Count = -1) {
+ if (Count == -1) Count = Cache.size();
+ if (Count == 0) return;
+
+ for (unsigned i = 1; i != unsigned(Count); ++i)
+ assert(Cache[i-1] <= Cache[i] && "Cache isn't sorted!");
+}
+#endif
+
+/// getNonLocalCallDependency - Perform a full dependency query for the
+/// specified call, returning the set of blocks that the value is
+/// potentially live across. The returned set of results will include a
+/// "NonLocal" result for all blocks where the value is live across.
+///
+/// This method assumes the instruction returns a "NonLocal" dependency
+/// within its own block.
+///
+/// This returns a reference to an internal data structure that may be
+/// invalidated on the next non-local query or when an instruction is
+/// removed. Clients must copy this data if they want it around longer than
+/// that.
+const MemoryDependenceAnalysis::NonLocalDepInfo &
+MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
+ assert(getDependency(QueryCS.getInstruction()).isNonLocal() &&
+ "getNonLocalCallDependency should only be used on calls with non-local deps!");
+ PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()];
+ NonLocalDepInfo &Cache = CacheP.first;
+
+ /// DirtyBlocks - This is the set of blocks that need to be recomputed. In
+ /// the cached case, this can happen due to instructions being deleted etc. In
+ /// the uncached case, this starts out as the set of predecessors we care
+ /// about.
+ SmallVector<BasicBlock*, 32> DirtyBlocks;
+
+ if (!Cache.empty()) {
+ // Okay, we have a cache entry. If we know it is not dirty, just return it
+ // with no computation.
+ if (!CacheP.second) {
+ NumCacheNonLocal++;
+ return Cache;
+ }
+
+ // If we already have a partially computed set of results, scan them to
+ // determine what is dirty, seeding our initial DirtyBlocks worklist.
+ for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end();
+ I != E; ++I)
+ if (I->second.isDirty())
+ DirtyBlocks.push_back(I->first);
+
+ // Sort the cache so that we can do fast binary search lookups below.
+ std::sort(Cache.begin(), Cache.end());
+
+ ++NumCacheDirtyNonLocal;
+ //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
+ // << Cache.size() << " cached: " << *QueryInst;
+ } else {
+ // Seed DirtyBlocks with each of the preds of QueryInst's block.
+ BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
+ for (BasicBlock **PI = PredCache->GetPreds(QueryBB); *PI; ++PI)
+ DirtyBlocks.push_back(*PI);
+ NumUncacheNonLocal++;
+ }
+
+ // isReadonlyCall - If this is a read-only call, we can be more aggressive.
+ bool isReadonlyCall = AA->onlyReadsMemory(QueryCS);
+
+ SmallPtrSet<BasicBlock*, 64> Visited;
+
+ unsigned NumSortedEntries = Cache.size();
+ DEBUG(AssertSorted(Cache));
+
+ // Iterate while we still have blocks to update.
+ while (!DirtyBlocks.empty()) {
+ BasicBlock *DirtyBB = DirtyBlocks.back();
+ DirtyBlocks.pop_back();
+
+ // Already processed this block?
+ if (!Visited.insert(DirtyBB))
+ continue;
+
+ // Do a binary search to see if we already have an entry for this block in
+ // the cache set. If so, find it.
+ DEBUG(AssertSorted(Cache, NumSortedEntries));
+ NonLocalDepInfo::iterator Entry =
+ std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries,
+ std::make_pair(DirtyBB, MemDepResult()));
+ if (Entry != Cache.begin() && prior(Entry)->first == DirtyBB)
+ --Entry;
+
+ MemDepResult *ExistingResult = 0;
+ if (Entry != Cache.begin()+NumSortedEntries &&
+ Entry->first == DirtyBB) {
+ // If we already have an entry, and if it isn't already dirty, the block
+ // is done.
+ if (!Entry->second.isDirty())
+ continue;
+
+ // Otherwise, remember this slot so we can update the value.
+ ExistingResult = &Entry->second;
+ }
+
+ // If the dirty entry has a pointer, start scanning from it so we don't have
+ // to rescan the entire block.
+ BasicBlock::iterator ScanPos = DirtyBB->end();
+ if (ExistingResult) {
+ if (Instruction *Inst = ExistingResult->getInst()) {
+ ScanPos = Inst;
+ // We're removing QueryInst's use of Inst.
+ RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
+ QueryCS.getInstruction());
+ }
+ }
+
+ // Find out if this block has a local dependency for QueryInst.
+ MemDepResult Dep;
+
+ if (ScanPos != DirtyBB->begin()) {
+ Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB);
+ } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
+ // No dependence found. If this is the entry block of the function, it is
+ // a clobber, otherwise it is non-local.
+ Dep = MemDepResult::getNonLocal();
+ } else {
+ Dep = MemDepResult::getClobber(ScanPos);
+ }
+
+ // If we had a dirty entry for the block, update it. Otherwise, just add
+ // a new entry.
+ if (ExistingResult)
+ *ExistingResult = Dep;
+ else
+ Cache.push_back(std::make_pair(DirtyBB, Dep));
+
+ // If the block has a dependency (i.e. it isn't completely transparent to
+ // the value), remember the association!
+ if (!Dep.isNonLocal()) {
+ // Keep the ReverseNonLocalDeps map up to date so we can efficiently
+ // update this when we remove instructions.
+ if (Instruction *Inst = Dep.getInst())
+ ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
+ } else {
+
+ // If the block *is* completely transparent to the load, we need to check
+ // the predecessors of this block. Add them to our worklist.
+ for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI)
+ DirtyBlocks.push_back(*PI);
+ }
+ }
+
+ return Cache;
+}
+
+/// getNonLocalPointerDependency - Perform a full dependency query for an
+/// access to the specified (non-volatile) memory location, returning the
+/// set of instructions that either define or clobber the value.
+///
+/// This method assumes the pointer has a "NonLocal" dependency within its
+/// own block.
+///
+void MemoryDependenceAnalysis::
+getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB,
+ SmallVectorImpl<NonLocalDepEntry> &Result) {
+ assert(isa<PointerType>(Pointer->getType()) &&
+ "Can't get pointer deps of a non-pointer!");
+ Result.clear();
+
+ // We know that the pointer value is live into FromBB find the def/clobbers
+ // from presecessors.
+ const Type *EltTy = cast<PointerType>(Pointer->getType())->getElementType();
+ uint64_t PointeeSize = TD->getTypeStoreSize(EltTy);
+
+ // This is the set of blocks we've inspected, and the pointer we consider in
+ // each block. Because of critical edges, we currently bail out if querying
+ // a block with multiple different pointers. This can happen during PHI
+ // translation.
+ DenseMap<BasicBlock*, Value*> Visited;
+ if (!getNonLocalPointerDepFromBB(Pointer, PointeeSize, isLoad, FromBB,
+ Result, Visited, true))
+ return;
+ Result.clear();
+ Result.push_back(std::make_pair(FromBB,
+ MemDepResult::getClobber(FromBB->begin())));
+}
+
+/// GetNonLocalInfoForBlock - Compute the memdep value for BB with
+/// Pointer/PointeeSize using either cached information in Cache or by doing a
+/// lookup (which may use dirty cache info if available). If we do a lookup,
+/// add the result to the cache.
+MemDepResult MemoryDependenceAnalysis::
+GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize,
+ bool isLoad, BasicBlock *BB,
+ NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
+
+ // Do a binary search to see if we already have an entry for this block in
+ // the cache set. If so, find it.
+ NonLocalDepInfo::iterator Entry =
+ std::upper_bound(Cache->begin(), Cache->begin()+NumSortedEntries,
+ std::make_pair(BB, MemDepResult()));
+ if (Entry != Cache->begin() && prior(Entry)->first == BB)
+ --Entry;
+
+ MemDepResult *ExistingResult = 0;
+ if (Entry != Cache->begin()+NumSortedEntries && Entry->first == BB)
+ ExistingResult = &Entry->second;
+
+ // If we have a cached entry, and it is non-dirty, use it as the value for
+ // this dependency.
+ if (ExistingResult && !ExistingResult->isDirty()) {
+ ++NumCacheNonLocalPtr;
+ return *ExistingResult;
+ }
+
+ // Otherwise, we have to scan for the value. If we have a dirty cache
+ // entry, start scanning from its position, otherwise we scan from the end
+ // of the block.
+ BasicBlock::iterator ScanPos = BB->end();
+ if (ExistingResult && ExistingResult->getInst()) {
+ assert(ExistingResult->getInst()->getParent() == BB &&
+ "Instruction invalidated?");
+ ++NumCacheDirtyNonLocalPtr;
+ ScanPos = ExistingResult->getInst();
+
+ // Eliminating the dirty entry from 'Cache', so update the reverse info.
+ ValueIsLoadPair CacheKey(Pointer, isLoad);
+ RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey);
+ } else {
+ ++NumUncacheNonLocalPtr;
+ }
+
+ // Scan the block for the dependency.
+ MemDepResult Dep = getPointerDependencyFrom(Pointer, PointeeSize, isLoad,
+ ScanPos, BB);
+
+ // If we had a dirty entry for the block, update it. Otherwise, just add
+ // a new entry.
+ if (ExistingResult)
+ *ExistingResult = Dep;
+ else
+ Cache->push_back(std::make_pair(BB, Dep));
+
+ // If the block has a dependency (i.e. it isn't completely transparent to
+ // the value), remember the reverse association because we just added it
+ // to Cache!
+ if (Dep.isNonLocal())
+ return Dep;
+
+ // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
+ // update MemDep when we remove instructions.
+ Instruction *Inst = Dep.getInst();
+ assert(Inst && "Didn't depend on anything?");
+ ValueIsLoadPair CacheKey(Pointer, isLoad);
+ ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
+ return Dep;
+}
+
+
+/// getNonLocalPointerDepFromBB - Perform a dependency query based on
+/// pointer/pointeesize starting at the end of StartBB. Add any clobber/def
+/// results to the results vector and keep track of which blocks are visited in
+/// 'Visited'.
+///
+/// This has special behavior for the first block queries (when SkipFirstBlock
+/// is true). In this special case, it ignores the contents of the specified
+/// block and starts returning dependence info for its predecessors.
+///
+/// This function returns false on success, or true to indicate that it could
+/// not compute dependence information for some reason. This should be treated
+/// as a clobber dependence on the first instruction in the predecessor block.
+bool MemoryDependenceAnalysis::
+getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
+ bool isLoad, BasicBlock *StartBB,
+ SmallVectorImpl<NonLocalDepEntry> &Result,
+ DenseMap<BasicBlock*, Value*> &Visited,
+ bool SkipFirstBlock) {
+
+ // Look up the cached info for Pointer.
+ ValueIsLoadPair CacheKey(Pointer, isLoad);
+
+ std::pair<BBSkipFirstBlockPair, NonLocalDepInfo> *CacheInfo =
+ &NonLocalPointerDeps[CacheKey];
+ NonLocalDepInfo *Cache = &CacheInfo->second;
+
+ // If we have valid cached information for exactly the block we are
+ // investigating, just return it with no recomputation.
+ if (CacheInfo->first == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
+ // We have a fully cached result for this query then we can just return the
+ // cached results and populate the visited set. However, we have to verify
+ // that we don't already have conflicting results for these blocks. Check
+ // to ensure that if a block in the results set is in the visited set that
+ // it was for the same pointer query.
+ if (!Visited.empty()) {
+ for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
+ I != E; ++I) {
+ DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->first);
+ if (VI == Visited.end() || VI->second == Pointer) continue;
+
+ // We have a pointer mismatch in a block. Just return clobber, saying
+ // that something was clobbered in this result. We could also do a
+ // non-fully cached query, but there is little point in doing this.
+ return true;
+ }
+ }
+
+ for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
+ I != E; ++I) {
+ Visited.insert(std::make_pair(I->first, Pointer));
+ if (!I->second.isNonLocal())
+ Result.push_back(*I);
+ }
+ ++NumCacheCompleteNonLocalPtr;
+ return false;
+ }
+
+ // Otherwise, either this is a new block, a block with an invalid cache
+ // pointer or one that we're about to invalidate by putting more info into it
+ // than its valid cache info. If empty, the result will be valid cache info,
+ // otherwise it isn't.
+ if (Cache->empty())
+ CacheInfo->first = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
+ else
+ CacheInfo->first = BBSkipFirstBlockPair();
+
+ SmallVector<BasicBlock*, 32> Worklist;
+ Worklist.push_back(StartBB);
+
+ // Keep track of the entries that we know are sorted. Previously cached
+ // entries will all be sorted. The entries we add we only sort on demand (we
+ // don't insert every element into its sorted position). We know that we
+ // won't get any reuse from currently inserted values, because we don't
+ // revisit blocks after we insert info for them.
+ unsigned NumSortedEntries = Cache->size();
+ DEBUG(AssertSorted(*Cache));
+
+ while (!Worklist.empty()) {
+ BasicBlock *BB = Worklist.pop_back_val();
+
+ // Skip the first block if we have it.
+ if (!SkipFirstBlock) {
+ // Analyze the dependency of *Pointer in FromBB. See if we already have
+ // been here.
+ assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
+
+ // Get the dependency info for Pointer in BB. If we have cached
+ // information, we will use it, otherwise we compute it.
+ DEBUG(AssertSorted(*Cache, NumSortedEntries));
+ MemDepResult Dep = GetNonLocalInfoForBlock(Pointer, PointeeSize, isLoad,
+ BB, Cache, NumSortedEntries);
+
+ // If we got a Def or Clobber, add this to the list of results.
+ if (!Dep.isNonLocal()) {
+ Result.push_back(NonLocalDepEntry(BB, Dep));
+ continue;
+ }
+ }
+
+ // If 'Pointer' is an instruction defined in this block, then we need to do
+ // phi translation to change it into a value live in the predecessor block.
+ // If phi translation fails, then we can't continue dependence analysis.
+ Instruction *PtrInst = dyn_cast<Instruction>(Pointer);
+ bool NeedsPHITranslation = PtrInst && PtrInst->getParent() == BB;
+
+ // If no PHI translation is needed, just add all the predecessors of this
+ // block to scan them as well.
+ if (!NeedsPHITranslation) {
+ SkipFirstBlock = false;
+ for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
+ // Verify that we haven't looked at this block yet.
+ std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
+ InsertRes = Visited.insert(std::make_pair(*PI, Pointer));
+ if (InsertRes.second) {
+ // First time we've looked at *PI.
+ Worklist.push_back(*PI);
+ continue;
+ }
+
+ // If we have seen this block before, but it was with a different
+ // pointer then we have a phi translation failure and we have to treat
+ // this as a clobber.
+ if (InsertRes.first->second != Pointer)
+ goto PredTranslationFailure;
+ }
+ continue;
+ }
+
+ // If we do need to do phi translation, then there are a bunch of different
+ // cases, because we have to find a Value* live in the predecessor block. We
+ // know that PtrInst is defined in this block at least.
+
+ // If this is directly a PHI node, just use the incoming values for each
+ // pred as the phi translated version.
+ if (PHINode *PtrPHI = dyn_cast<PHINode>(PtrInst)) {
+ for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
+ BasicBlock *Pred = *PI;
+ Value *PredPtr = PtrPHI->getIncomingValueForBlock(Pred);
+
+ // Check to see if we have already visited this pred block with another
+ // pointer. If so, we can't do this lookup. This failure can occur
+ // with PHI translation when a critical edge exists and the PHI node in
+ // the successor translates to a pointer value different than the
+ // pointer the block was first analyzed with.
+ std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
+ InsertRes = Visited.insert(std::make_pair(Pred, PredPtr));
+
+ if (!InsertRes.second) {
+ // If the predecessor was visited with PredPtr, then we already did
+ // the analysis and can ignore it.
+ if (InsertRes.first->second == PredPtr)
+ continue;
+
+ // Otherwise, the block was previously analyzed with a different
+ // pointer. We can't represent the result of this case, so we just
+ // treat this as a phi translation failure.
+ goto PredTranslationFailure;
+ }
+
+ // We may have added values to the cache list before this PHI
+ // translation. If so, we haven't done anything to ensure that the
+ // cache remains sorted. Sort it now (if needed) so that recursive
+ // invocations of getNonLocalPointerDepFromBB that could reuse the cache
+ // value will only see properly sorted cache arrays.
+ if (Cache && NumSortedEntries != Cache->size())
+ std::sort(Cache->begin(), Cache->end());
+ Cache = 0;
+
+ // FIXME: it is entirely possible that PHI translating will end up with
+ // the same value. Consider PHI translating something like:
+ // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
+ // to recurse here, pedantically speaking.
+
+ // If we have a problem phi translating, fall through to the code below
+ // to handle the failure condition.
+ if (getNonLocalPointerDepFromBB(PredPtr, PointeeSize, isLoad, Pred,
+ Result, Visited))
+ goto PredTranslationFailure;
+ }
+
+ // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
+ CacheInfo = &NonLocalPointerDeps[CacheKey];
+ Cache = &CacheInfo->second;
+ NumSortedEntries = Cache->size();
+
+ // Since we did phi translation, the "Cache" set won't contain all of the
+ // results for the query. This is ok (we can still use it to accelerate
+ // specific block queries) but we can't do the fastpath "return all
+ // results from the set" Clear out the indicator for this.
+ CacheInfo->first = BBSkipFirstBlockPair();
+ SkipFirstBlock = false;
+ continue;
+ }
+
+ // TODO: BITCAST, GEP.
+
+ // cerr << "MEMDEP: Could not PHI translate: " << *Pointer;
+ // if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst))
+ // cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0);
+ PredTranslationFailure:
+
+ if (Cache == 0) {
+ // Refresh the CacheInfo/Cache pointer if it got invalidated.
+ CacheInfo = &NonLocalPointerDeps[CacheKey];
+ Cache = &CacheInfo->second;
+ NumSortedEntries = Cache->size();
+ } else if (NumSortedEntries != Cache->size()) {
+ std::sort(Cache->begin(), Cache->end());
+ NumSortedEntries = Cache->size();
+ }
+
+ // Since we did phi translation, the "Cache" set won't contain all of the
+ // results for the query. This is ok (we can still use it to accelerate
+ // specific block queries) but we can't do the fastpath "return all
+ // results from the set" Clear out the indicator for this.
+ CacheInfo->first = BBSkipFirstBlockPair();
+
+ // If *nothing* works, mark the pointer as being clobbered by the first
+ // instruction in this block.
+ //
+ // If this is the magic first block, return this as a clobber of the whole
+ // incoming value. Since we can't phi translate to one of the predecessors,
+ // we have to bail out.
+ if (SkipFirstBlock)
+ return true;
+
+ for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) {
+ assert(I != Cache->rend() && "Didn't find current block??");
+ if (I->first != BB)
+ continue;
+
+ assert(I->second.isNonLocal() &&
+ "Should only be here with transparent block");
+ I->second = MemDepResult::getClobber(BB->begin());
+ ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey);
+ Result.push_back(*I);
+ break;
+ }
+ }
+
+ // Okay, we're done now. If we added new values to the cache, re-sort it.
+ switch (Cache->size()-NumSortedEntries) {
+ case 0:
+ // done, no new entries.
+ break;
+ case 2: {
+ // Two new entries, insert the last one into place.
+ NonLocalDepEntry Val = Cache->back();
+ Cache->pop_back();
+ NonLocalDepInfo::iterator Entry =
+ std::upper_bound(Cache->begin(), Cache->end()-1, Val);
+ Cache->insert(Entry, Val);
+ // FALL THROUGH.
+ }
+ case 1:
+ // One new entry, Just insert the new value at the appropriate position.
+ if (Cache->size() != 1) {
+ NonLocalDepEntry Val = Cache->back();
+ Cache->pop_back();
+ NonLocalDepInfo::iterator Entry =
+ std::upper_bound(Cache->begin(), Cache->end(), Val);
+ Cache->insert(Entry, Val);
+ }
+ break;
+ default:
+ // Added many values, do a full scale sort.
+ std::sort(Cache->begin(), Cache->end());
+ }
+ DEBUG(AssertSorted(*Cache));
+ return false;
+}
+
+/// RemoveCachedNonLocalPointerDependencies - If P exists in
+/// CachedNonLocalPointerInfo, remove it.
+void MemoryDependenceAnalysis::
+RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
+ CachedNonLocalPointerInfo::iterator It =
+ NonLocalPointerDeps.find(P);
+ if (It == NonLocalPointerDeps.end()) return;
+
+ // Remove all of the entries in the BB->val map. This involves removing
+ // instructions from the reverse map.
+ NonLocalDepInfo &PInfo = It->second.second;
+
+ for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
+ Instruction *Target = PInfo[i].second.getInst();
+ if (Target == 0) continue; // Ignore non-local dep results.
+ assert(Target->getParent() == PInfo[i].first);
+
+ // Eliminating the dirty entry from 'Cache', so update the reverse info.
+ RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
+ }
+
+ // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
+ NonLocalPointerDeps.erase(It);
+}
+
+
+/// invalidateCachedPointerInfo - This method is used to invalidate cached
+/// information about the specified pointer, because it may be too
+/// conservative in memdep. This is an optional call that can be used when
+/// the client detects an equivalence between the pointer and some other
+/// value and replaces the other value with ptr. This can make Ptr available
+/// in more places that cached info does not necessarily keep.
+void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) {
+ // If Ptr isn't really a pointer, just ignore it.
+ if (!isa<PointerType>(Ptr->getType())) return;
+ // Flush store info for the pointer.
+ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
+ // Flush load info for the pointer.
+ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
+}
+
+/// removeInstruction - Remove an instruction from the dependence analysis,
+/// updating the dependence of instructions that previously depended on it.
+/// This method attempts to keep the cache coherent using the reverse map.
+void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
+ // Walk through the Non-local dependencies, removing this one as the value
+ // for any cached queries.
+ NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
+ if (NLDI != NonLocalDeps.end()) {
+ NonLocalDepInfo &BlockMap = NLDI->second.first;
+ for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end();
+ DI != DE; ++DI)
+ if (Instruction *Inst = DI->second.getInst())
+ RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
+ NonLocalDeps.erase(NLDI);
+ }
+
+ // If we have a cached local dependence query for this instruction, remove it.
+ //
+ LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
+ if (LocalDepEntry != LocalDeps.end()) {
+ // Remove us from DepInst's reverse set now that the local dep info is gone.
+ if (Instruction *Inst = LocalDepEntry->second.getInst())
+ RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
+
+ // Remove this local dependency info.
+ LocalDeps.erase(LocalDepEntry);
+ }
+
+ // If we have any cached pointer dependencies on this instruction, remove
+ // them. If the instruction has non-pointer type, then it can't be a pointer
+ // base.
+
+ // Remove it from both the load info and the store info. The instruction
+ // can't be in either of these maps if it is non-pointer.
+ if (isa<PointerType>(RemInst->getType())) {
+ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
+ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
+ }
+
+ // Loop over all of the things that depend on the instruction we're removing.
+ //
+ SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd;
+
+ // If we find RemInst as a clobber or Def in any of the maps for other values,
+ // we need to replace its entry with a dirty version of the instruction after
+ // it. If RemInst is a terminator, we use a null dirty value.
+ //
+ // Using a dirty version of the instruction after RemInst saves having to scan
+ // the entire block to get to this point.
+ MemDepResult NewDirtyVal;
+ if (!RemInst->isTerminator())
+ NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst));
+
+ ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
+ if (ReverseDepIt != ReverseLocalDeps.end()) {
+ SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second;
+ // RemInst can't be the terminator if it has local stuff depending on it.
+ assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) &&
+ "Nothing can locally depend on a terminator");
+
+ for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(),
+ E = ReverseDeps.end(); I != E; ++I) {
+ Instruction *InstDependingOnRemInst = *I;
+ assert(InstDependingOnRemInst != RemInst &&
+ "Already removed our local dep info");
+
+ LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
+
+ // Make sure to remember that new things depend on NewDepInst.
+ assert(NewDirtyVal.getInst() && "There is no way something else can have "
+ "a local dep on this if it is a terminator!");
+ ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(),
+ InstDependingOnRemInst));
+ }
+
+ ReverseLocalDeps.erase(ReverseDepIt);
+
+ // Add new reverse deps after scanning the set, to avoid invalidating the
+ // 'ReverseDeps' reference.
+ while (!ReverseDepsToAdd.empty()) {
+ ReverseLocalDeps[ReverseDepsToAdd.back().first]
+ .insert(ReverseDepsToAdd.back().second);
+ ReverseDepsToAdd.pop_back();
+ }
+ }
+
+ ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
+ if (ReverseDepIt != ReverseNonLocalDeps.end()) {
+ SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second;
+ for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end();
+ I != E; ++I) {
+ assert(*I != RemInst && "Already removed NonLocalDep info for RemInst");
+
+ PerInstNLInfo &INLD = NonLocalDeps[*I];
+ // The information is now dirty!
+ INLD.second = true;
+
+ for (NonLocalDepInfo::iterator DI = INLD.first.begin(),
+ DE = INLD.first.end(); DI != DE; ++DI) {
+ if (DI->second.getInst() != RemInst) continue;
+
+ // Convert to a dirty entry for the subsequent instruction.
+ DI->second = NewDirtyVal;
+
+ if (Instruction *NextI = NewDirtyVal.getInst())
+ ReverseDepsToAdd.push_back(std::make_pair(NextI, *I));
+ }
+ }
+
+ ReverseNonLocalDeps.erase(ReverseDepIt);
+
+ // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
+ while (!ReverseDepsToAdd.empty()) {
+ ReverseNonLocalDeps[ReverseDepsToAdd.back().first]
+ .insert(ReverseDepsToAdd.back().second);
+ ReverseDepsToAdd.pop_back();
+ }
+ }
+
+ // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
+ // value in the NonLocalPointerDeps info.
+ ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
+ ReverseNonLocalPtrDeps.find(RemInst);
+ if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
+ SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second;
+ SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd;
+
+ for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(),
+ E = Set.end(); I != E; ++I) {
+ ValueIsLoadPair P = *I;
+ assert(P.getPointer() != RemInst &&
+ "Already removed NonLocalPointerDeps info for RemInst");
+
+ NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].second;
+
+ // The cache is not valid for any specific block anymore.
+ NonLocalPointerDeps[P].first = BBSkipFirstBlockPair();
+
+ // Update any entries for RemInst to use the instruction after it.
+ for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end();
+ DI != DE; ++DI) {
+ if (DI->second.getInst() != RemInst) continue;
+
+ // Convert to a dirty entry for the subsequent instruction.
+ DI->second = NewDirtyVal;
+
+ if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
+ ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
+ }
+
+ // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
+ // subsequent value may invalidate the sortedness.
+ std::sort(NLPDI.begin(), NLPDI.end());
+ }
+
+ ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
+
+ while (!ReversePtrDepsToAdd.empty()) {
+ ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first]
+ .insert(ReversePtrDepsToAdd.back().second);
+ ReversePtrDepsToAdd.pop_back();
+ }
+ }
+
+
+ assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
+ AA->deleteValue(RemInst);
+ DEBUG(verifyRemoved(RemInst));
+}
+/// verifyRemoved - Verify that the specified instruction does not occur
+/// in our internal data structures.
+void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
+ for (LocalDepMapType::const_iterator I = LocalDeps.begin(),
+ E = LocalDeps.end(); I != E; ++I) {
+ assert(I->first != D && "Inst occurs in data structures");
+ assert(I->second.getInst() != D &&
+ "Inst occurs in data structures");
+ }
+
+ for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(),
+ E = NonLocalPointerDeps.end(); I != E; ++I) {
+ assert(I->first.getPointer() != D && "Inst occurs in NLPD map key");
+ const NonLocalDepInfo &Val = I->second.second;
+ for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end();
+ II != E; ++II)
+ assert(II->second.getInst() != D && "Inst occurs as NLPD value");
+ }
+
+ for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(),
+ E = NonLocalDeps.end(); I != E; ++I) {
+ assert(I->first != D && "Inst occurs in data structures");
+ const PerInstNLInfo &INLD = I->second;
+ for (NonLocalDepInfo::const_iterator II = INLD.first.begin(),
+ EE = INLD.first.end(); II != EE; ++II)
+ assert(II->second.getInst() != D && "Inst occurs in data structures");
+ }
+
+ for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(),
+ E = ReverseLocalDeps.end(); I != E; ++I) {
+ assert(I->first != D && "Inst occurs in data structures");
+ for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
+ EE = I->second.end(); II != EE; ++II)
+ assert(*II != D && "Inst occurs in data structures");
+ }
+
+ for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(),
+ E = ReverseNonLocalDeps.end();
+ I != E; ++I) {
+ assert(I->first != D && "Inst occurs in data structures");
+ for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
+ EE = I->second.end(); II != EE; ++II)
+ assert(*II != D && "Inst occurs in data structures");
+ }
+
+ for (ReverseNonLocalPtrDepTy::const_iterator
+ I = ReverseNonLocalPtrDeps.begin(),
+ E = ReverseNonLocalPtrDeps.end(); I != E; ++I) {
+ assert(I->first != D && "Inst occurs in rev NLPD map");
+
+ for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(),
+ E = I->second.end(); II != E; ++II)
+ assert(*II != ValueIsLoadPair(D, false) &&
+ *II != ValueIsLoadPair(D, true) &&
+ "Inst occurs in ReverseNonLocalPtrDeps map");
+ }
+
+}
diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp
new file mode 100644
index 0000000..4853c2a
--- /dev/null
+++ b/lib/Analysis/PostDominators.cpp
@@ -0,0 +1,94 @@
+//===- PostDominators.cpp - Post-Dominator Calculation --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the post-dominator construction algorithms.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "postdomtree"
+
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/Analysis/DominatorInternals.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// PostDominatorTree Implementation
+//===----------------------------------------------------------------------===//
+
+char PostDominatorTree::ID = 0;
+char PostDominanceFrontier::ID = 0;
+static RegisterPass<PostDominatorTree>
+F("postdomtree", "Post-Dominator Tree Construction", true, true);
+
+bool PostDominatorTree::runOnFunction(Function &F) {
+ DT->recalculate(F);
+ DEBUG(DT->dump());
+ return false;
+}
+
+PostDominatorTree::~PostDominatorTree()
+{
+ delete DT;
+}
+
+FunctionPass* llvm::createPostDomTree() {
+ return new PostDominatorTree();
+}
+
+//===----------------------------------------------------------------------===//
+// PostDominanceFrontier Implementation
+//===----------------------------------------------------------------------===//
+
+static RegisterPass<PostDominanceFrontier>
+H("postdomfrontier", "Post-Dominance Frontier Construction", true, true);
+
+const DominanceFrontier::DomSetType &
+PostDominanceFrontier::calculate(const PostDominatorTree &DT,
+ const DomTreeNode *Node) {
+ // Loop over CFG successors to calculate DFlocal[Node]
+ BasicBlock *BB = Node->getBlock();
+ DomSetType &S = Frontiers[BB]; // The new set to fill in...
+ if (getRoots().empty()) return S;
+
+ if (BB)
+ for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB);
+ SI != SE; ++SI) {
+ // Does Node immediately dominate this predecessor?
+ DomTreeNode *SINode = DT[*SI];
+ if (SINode && SINode->getIDom() != Node)
+ S.insert(*SI);
+ }
+
+ // At this point, S is DFlocal. Now we union in DFup's of our children...
+ // Loop through and visit the nodes that Node immediately dominates (Node's
+ // children in the IDomTree)
+ //
+ for (DomTreeNode::const_iterator
+ NI = Node->begin(), NE = Node->end(); NI != NE; ++NI) {
+ DomTreeNode *IDominee = *NI;
+ const DomSetType &ChildDF = calculate(DT, IDominee);
+
+ DomSetType::const_iterator CDFI = ChildDF.begin(), CDFE = ChildDF.end();
+ for (; CDFI != CDFE; ++CDFI) {
+ if (!DT.properlyDominates(Node, DT[*CDFI]))
+ S.insert(*CDFI);
+ }
+ }
+
+ return S;
+}
+
+FunctionPass* llvm::createPostDomFrontier() {
+ return new PostDominanceFrontier();
+}
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
new file mode 100644
index 0000000..a0965b6
--- /dev/null
+++ b/lib/Analysis/ProfileInfo.cpp
@@ -0,0 +1,100 @@
+//===- ProfileInfo.cpp - Profile Info Interface ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the abstract ProfileInfo interface, and the default
+// "no profile" implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include <set>
+using namespace llvm;
+
+// Register the ProfileInfo interface, providing a nice name to refer to.
+static RegisterAnalysisGroup<ProfileInfo> Z("Profile Information");
+char ProfileInfo::ID = 0;
+
+ProfileInfo::~ProfileInfo() {}
+
+unsigned ProfileInfo::getExecutionCount(BasicBlock *BB) const {
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+
+ // Are there zero predecessors of this block?
+ if (PI == PE) {
+ // If this is the entry block, look for the Null -> Entry edge.
+ if (BB == &BB->getParent()->getEntryBlock())
+ return getEdgeWeight(0, BB);
+ else
+ return 0; // Otherwise, this is a dead block.
+ }
+
+ // Otherwise, if there are predecessors, the execution count of this block is
+ // the sum of the edge frequencies from the incoming edges. Note that if
+ // there are multiple edges from a predecessor to this block that we don't
+ // want to count its weight multiple times. For this reason, we keep track of
+ // the predecessors we've seen and only count them if we haven't run into them
+ // yet.
+ //
+ // We don't want to create an std::set unless we are dealing with a block that
+ // has a LARGE number of in-edges. Handle the common case of having only a
+ // few in-edges with special code.
+ //
+ BasicBlock *FirstPred = *PI;
+ unsigned Count = getEdgeWeight(FirstPred, BB);
+ ++PI;
+ if (PI == PE) return Count; // Quick exit for single predecessor blocks
+
+ BasicBlock *SecondPred = *PI;
+ if (SecondPred != FirstPred) Count += getEdgeWeight(SecondPred, BB);
+ ++PI;
+ if (PI == PE) return Count; // Quick exit for two predecessor blocks
+
+ BasicBlock *ThirdPred = *PI;
+ if (ThirdPred != FirstPred && ThirdPred != SecondPred)
+ Count += getEdgeWeight(ThirdPred, BB);
+ ++PI;
+ if (PI == PE) return Count; // Quick exit for three predecessor blocks
+
+ std::set<BasicBlock*> ProcessedPreds;
+ ProcessedPreds.insert(FirstPred);
+ ProcessedPreds.insert(SecondPred);
+ ProcessedPreds.insert(ThirdPred);
+ for (; PI != PE; ++PI)
+ if (ProcessedPreds.insert(*PI).second)
+ Count += getEdgeWeight(*PI, BB);
+ return Count;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// NoProfile ProfileInfo implementation
+//
+
+namespace {
+ struct VISIBILITY_HIDDEN NoProfileInfo
+ : public ImmutablePass, public ProfileInfo {
+ static char ID; // Class identification, replacement for typeinfo
+ NoProfileInfo() : ImmutablePass(&ID) {}
+ };
+} // End of anonymous namespace
+
+char NoProfileInfo::ID = 0;
+// Register this pass...
+static RegisterPass<NoProfileInfo>
+X("no-profile", "No Profile Information", false, true);
+
+// Declare that we implement the ProfileInfo interface
+static RegisterAnalysisGroup<ProfileInfo, true> Y(X);
+
+ImmutablePass *llvm::createNoProfileInfoPass() { return new NoProfileInfo(); }
diff --git a/lib/Analysis/ProfileInfoLoader.cpp b/lib/Analysis/ProfileInfoLoader.cpp
new file mode 100644
index 0000000..3a0a740
--- /dev/null
+++ b/lib/Analysis/ProfileInfoLoader.cpp
@@ -0,0 +1,277 @@
+//===- ProfileInfoLoad.cpp - Load profile information from disk -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The ProfileInfoLoader class is used to load and represent profiling
+// information read in from the dump file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ProfileInfoLoader.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/Module.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Support/Streams.h"
+#include <cstdio>
+#include <cstdlib>
+#include <map>
+using namespace llvm;
+
+// ByteSwap - Byteswap 'Var' if 'Really' is true.
+//
+static inline unsigned ByteSwap(unsigned Var, bool Really) {
+ if (!Really) return Var;
+ return ((Var & (255<< 0)) << 24) |
+ ((Var & (255<< 8)) << 8) |
+ ((Var & (255<<16)) >> 8) |
+ ((Var & (255<<24)) >> 24);
+}
+
+static void ReadProfilingBlock(const char *ToolName, FILE *F,
+ bool ShouldByteSwap,
+ std::vector<unsigned> &Data) {
+ // Read the number of entries...
+ unsigned NumEntries;
+ if (fread(&NumEntries, sizeof(unsigned), 1, F) != 1) {
+ cerr << ToolName << ": data packet truncated!\n";
+ perror(0);
+ exit(1);
+ }
+ NumEntries = ByteSwap(NumEntries, ShouldByteSwap);
+
+ // Read the counts...
+ std::vector<unsigned> TempSpace(NumEntries);
+
+ // Read in the block of data...
+ if (fread(&TempSpace[0], sizeof(unsigned)*NumEntries, 1, F) != 1) {
+ cerr << ToolName << ": data packet truncated!\n";
+ perror(0);
+ exit(1);
+ }
+
+ // Make sure we have enough space...
+ if (Data.size() < NumEntries)
+ Data.resize(NumEntries);
+
+ // Accumulate the data we just read into the data.
+ if (!ShouldByteSwap) {
+ for (unsigned i = 0; i != NumEntries; ++i)
+ Data[i] += TempSpace[i];
+ } else {
+ for (unsigned i = 0; i != NumEntries; ++i)
+ Data[i] += ByteSwap(TempSpace[i], true);
+ }
+}
+
+// ProfileInfoLoader ctor - Read the specified profiling data file, exiting the
+// program if the file is invalid or broken.
+//
+ProfileInfoLoader::ProfileInfoLoader(const char *ToolName,
+ const std::string &Filename,
+ Module &TheModule) : M(TheModule) {
+ FILE *F = fopen(Filename.c_str(), "r");
+ if (F == 0) {
+ cerr << ToolName << ": Error opening '" << Filename << "': ";
+ perror(0);
+ exit(1);
+ }
+
+ // Keep reading packets until we run out of them.
+ unsigned PacketType;
+ while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) {
+ // If the low eight bits of the packet are zero, we must be dealing with an
+ // endianness mismatch. Byteswap all words read from the profiling
+ // information.
+ bool ShouldByteSwap = (char)PacketType == 0;
+ PacketType = ByteSwap(PacketType, ShouldByteSwap);
+
+ switch (PacketType) {
+ case ArgumentInfo: {
+ unsigned ArgLength;
+ if (fread(&ArgLength, sizeof(unsigned), 1, F) != 1) {
+ cerr << ToolName << ": arguments packet truncated!\n";
+ perror(0);
+ exit(1);
+ }
+ ArgLength = ByteSwap(ArgLength, ShouldByteSwap);
+
+ // Read in the arguments...
+ std::vector<char> Chars(ArgLength+4);
+
+ if (ArgLength)
+ if (fread(&Chars[0], (ArgLength+3) & ~3, 1, F) != 1) {
+ cerr << ToolName << ": arguments packet truncated!\n";
+ perror(0);
+ exit(1);
+ }
+ CommandLines.push_back(std::string(&Chars[0], &Chars[ArgLength]));
+ break;
+ }
+
+ case FunctionInfo:
+ ReadProfilingBlock(ToolName, F, ShouldByteSwap, FunctionCounts);
+ break;
+
+ case BlockInfo:
+ ReadProfilingBlock(ToolName, F, ShouldByteSwap, BlockCounts);
+ break;
+
+ case EdgeInfo:
+ ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts);
+ break;
+
+ case BBTraceInfo:
+ ReadProfilingBlock(ToolName, F, ShouldByteSwap, BBTrace);
+ break;
+
+ default:
+ cerr << ToolName << ": Unknown packet type #" << PacketType << "!\n";
+ exit(1);
+ }
+ }
+
+ fclose(F);
+}
+
+
+// getFunctionCounts - This method is used by consumers of function counting
+// information. If we do not directly have function count information, we
+// compute it from other, more refined, types of profile information.
+//
+void ProfileInfoLoader::getFunctionCounts(std::vector<std::pair<Function*,
+ unsigned> > &Counts) {
+ if (FunctionCounts.empty()) {
+ if (hasAccurateBlockCounts()) {
+ // Synthesize function frequency information from the number of times
+ // their entry blocks were executed.
+ std::vector<std::pair<BasicBlock*, unsigned> > BlockCounts;
+ getBlockCounts(BlockCounts);
+
+ for (unsigned i = 0, e = BlockCounts.size(); i != e; ++i)
+ if (&BlockCounts[i].first->getParent()->getEntryBlock() ==
+ BlockCounts[i].first)
+ Counts.push_back(std::make_pair(BlockCounts[i].first->getParent(),
+ BlockCounts[i].second));
+ } else {
+ cerr << "Function counts are not available!\n";
+ }
+ return;
+ }
+
+ unsigned Counter = 0;
+ for (Module::iterator I = M.begin(), E = M.end();
+ I != E && Counter != FunctionCounts.size(); ++I)
+ if (!I->isDeclaration())
+ Counts.push_back(std::make_pair(I, FunctionCounts[Counter++]));
+}
+
+// getBlockCounts - This method is used by consumers of block counting
+// information. If we do not directly have block count information, we
+// compute it from other, more refined, types of profile information.
+//
+void ProfileInfoLoader::getBlockCounts(std::vector<std::pair<BasicBlock*,
+ unsigned> > &Counts) {
+ if (BlockCounts.empty()) {
+ if (hasAccurateEdgeCounts()) {
+ // Synthesize block count information from edge frequency information.
+ // The block execution frequency is equal to the sum of the execution
+ // frequency of all outgoing edges from a block.
+ //
+ // If a block has no successors, this will not be correct, so we have to
+ // special case it. :(
+ std::vector<std::pair<Edge, unsigned> > EdgeCounts;
+ getEdgeCounts(EdgeCounts);
+
+ std::map<BasicBlock*, unsigned> InEdgeFreqs;
+
+ BasicBlock *LastBlock = 0;
+ TerminatorInst *TI = 0;
+ for (unsigned i = 0, e = EdgeCounts.size(); i != e; ++i) {
+ if (EdgeCounts[i].first.first != LastBlock) {
+ LastBlock = EdgeCounts[i].first.first;
+ TI = LastBlock->getTerminator();
+ Counts.push_back(std::make_pair(LastBlock, 0));
+ }
+ Counts.back().second += EdgeCounts[i].second;
+ unsigned SuccNum = EdgeCounts[i].first.second;
+ if (SuccNum >= TI->getNumSuccessors()) {
+ static bool Warned = false;
+ if (!Warned) {
+ cerr << "WARNING: profile info doesn't seem to match"
+ << " the program!\n";
+ Warned = true;
+ }
+ } else {
+ // If this successor has no successors of its own, we will never
+ // compute an execution count for that block. Remember the incoming
+ // edge frequencies to add later.
+ BasicBlock *Succ = TI->getSuccessor(SuccNum);
+ if (Succ->getTerminator()->getNumSuccessors() == 0)
+ InEdgeFreqs[Succ] += EdgeCounts[i].second;
+ }
+ }
+
+ // Now we have to accumulate information for those blocks without
+ // successors into our table.
+ for (std::map<BasicBlock*, unsigned>::iterator I = InEdgeFreqs.begin(),
+ E = InEdgeFreqs.end(); I != E; ++I) {
+ unsigned i = 0;
+ for (; i != Counts.size() && Counts[i].first != I->first; ++i)
+ /*empty*/;
+ if (i == Counts.size()) Counts.push_back(std::make_pair(I->first, 0));
+ Counts[i].second += I->second;
+ }
+
+ } else {
+ cerr << "Block counts are not available!\n";
+ }
+ return;
+ }
+
+ unsigned Counter = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ Counts.push_back(std::make_pair(BB, BlockCounts[Counter++]));
+ if (Counter == BlockCounts.size())
+ return;
+ }
+}
+
+// getEdgeCounts - This method is used by consumers of edge counting
+// information. If we do not directly have edge count information, we compute
+// it from other, more refined, types of profile information.
+//
+void ProfileInfoLoader::getEdgeCounts(std::vector<std::pair<Edge,
+ unsigned> > &Counts) {
+ if (EdgeCounts.empty()) {
+ cerr << "Edge counts not available, and no synthesis "
+ << "is implemented yet!\n";
+ return;
+ }
+
+ unsigned Counter = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (unsigned i = 0, e = BB->getTerminator()->getNumSuccessors();
+ i != e; ++i) {
+ Counts.push_back(std::make_pair(Edge(BB, i), EdgeCounts[Counter++]));
+ if (Counter == EdgeCounts.size())
+ return;
+ }
+}
+
+// getBBTrace - This method is used by consumers of basic-block trace
+// information.
+//
+void ProfileInfoLoader::getBBTrace(std::vector<BasicBlock *> &Trace) {
+ if (BBTrace.empty ()) {
+ cerr << "Basic block trace is not available!\n";
+ return;
+ }
+ cerr << "Basic block trace loading is not implemented yet!\n";
+}
diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp
new file mode 100644
index 0000000..0a8a87b
--- /dev/null
+++ b/lib/Analysis/ProfileInfoLoaderPass.cpp
@@ -0,0 +1,92 @@
+//===- ProfileInfoLoaderPass.cpp - LLVM Pass to load profile info ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a concrete implementation of profiling information that
+// loads the information from a profile dump file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BasicBlock.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ProfileInfoLoader.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Streams.h"
+using namespace llvm;
+
+static cl::opt<std::string>
+ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"),
+ cl::value_desc("filename"),
+ cl::desc("Profile file loaded by -profile-loader"));
+
+namespace {
+ class VISIBILITY_HIDDEN LoaderPass : public ModulePass, public ProfileInfo {
+ std::string Filename;
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ explicit LoaderPass(const std::string &filename = "")
+ : ModulePass(&ID), Filename(filename) {
+ if (filename.empty()) Filename = ProfileInfoFilename;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ virtual const char *getPassName() const {
+ return "Profiling information loader";
+ }
+
+ /// run - Load the profile information from the specified file.
+ virtual bool runOnModule(Module &M);
+ };
+} // End of anonymous namespace
+
+char LoaderPass::ID = 0;
+static RegisterPass<LoaderPass>
+X("profile-loader", "Load profile information from llvmprof.out", false, true);
+
+static RegisterAnalysisGroup<ProfileInfo> Y(X);
+
+ModulePass *llvm::createProfileLoaderPass() { return new LoaderPass(); }
+
+/// createProfileLoaderPass - This function returns a Pass that loads the
+/// profiling information for the module from the specified filename, making it
+/// available to the optimizers.
+Pass *llvm::createProfileLoaderPass(const std::string &Filename) {
+ return new LoaderPass(Filename);
+}
+
+bool LoaderPass::runOnModule(Module &M) {
+ ProfileInfoLoader PIL("profile-loader", Filename, M);
+ EdgeCounts.clear();
+ bool PrintedWarning = false;
+
+ std::vector<std::pair<ProfileInfoLoader::Edge, unsigned> > ECs;
+ PIL.getEdgeCounts(ECs);
+ for (unsigned i = 0, e = ECs.size(); i != e; ++i) {
+ BasicBlock *BB = ECs[i].first.first;
+ unsigned SuccNum = ECs[i].first.second;
+ TerminatorInst *TI = BB->getTerminator();
+ if (SuccNum >= TI->getNumSuccessors()) {
+ if (!PrintedWarning) {
+ cerr << "WARNING: profile information is inconsistent with "
+ << "the current program!\n";
+ PrintedWarning = true;
+ }
+ } else {
+ EdgeCounts[std::make_pair(BB, TI->getSuccessor(SuccNum))]+= ECs[i].second;
+ }
+ }
+
+ return false;
+}
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
new file mode 100644
index 0000000..f7f1849
--- /dev/null
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -0,0 +1,3824 @@
+//===- ScalarEvolution.cpp - Scalar Evolution Analysis ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the scalar evolution analysis
+// engine, which is used primarily to analyze expressions involving induction
+// variables in loops.
+//
+// There are several aspects to this library. First is the representation of
+// scalar expressions, which are represented as subclasses of the SCEV class.
+// These classes are used to represent certain types of subexpressions that we
+// can handle. These classes are reference counted, managed by the SCEVHandle
+// class. We only create one SCEV of a particular shape, so pointer-comparisons
+// for equality are legal.
+//
+// One important aspect of the SCEV objects is that they are never cyclic, even
+// if there is a cycle in the dataflow for an expression (ie, a PHI node). If
+// the PHI node is one of the idioms that we can represent (e.g., a polynomial
+// recurrence) then we represent it directly as a recurrence node, otherwise we
+// represent it as a SCEVUnknown node.
+//
+// In addition to being able to represent expressions of various types, we also
+// have folders that are used to build the *canonical* representation for a
+// particular expression. These folders are capable of using a variety of
+// rewrite rules to simplify the expressions.
+//
+// Once the folders are defined, we can implement the more interesting
+// higher-level code, such as the code that recognizes PHI nodes of various
+// types, computes the execution count of a loop, etc.
+//
+// TODO: We should use these routines and value representations to implement
+// dependence analysis!
+//
+//===----------------------------------------------------------------------===//
+//
+// There are several good references for the techniques used in this analysis.
+//
+// Chains of recurrences -- a method to expedite the evaluation
+// of closed-form functions
+// Olaf Bachmann, Paul S. Wang, Eugene V. Zima
+//
+// On computational properties of chains of recurrences
+// Eugene V. Zima
+//
+// Symbolic Evaluation of Chains of Recurrences for Loop Optimization
+// Robert A. van Engelen
+//
+// Efficient Symbolic Analysis for Optimizing Compilers
+// Robert A. van Engelen
+//
+// Using the chains of recurrences algebra for data dependence testing and
+// induction variable substitution
+// MS Thesis, Johnie Birch
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scalar-evolution"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <ostream>
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumArrayLenItCounts,
+ "Number of trip counts computed with array length");
+STATISTIC(NumTripCountsComputed,
+ "Number of loops with predictable loop counts");
+STATISTIC(NumTripCountsNotComputed,
+ "Number of loops without predictable loop counts");
+STATISTIC(NumBruteForceTripCountsComputed,
+ "Number of loops with trip counts computed by force");
+
+static cl::opt<unsigned>
+MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
+ cl::desc("Maximum number of iterations SCEV will "
+ "symbolically execute a constant derived loop"),
+ cl::init(100));
+
+static RegisterPass<ScalarEvolution>
+R("scalar-evolution", "Scalar Evolution Analysis", false, true);
+char ScalarEvolution::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// SCEV class definitions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Implementation of the SCEV class.
+//
+SCEV::~SCEV() {}
+void SCEV::dump() const {
+ print(errs());
+ errs() << '\n';
+}
+
+void SCEV::print(std::ostream &o) const {
+ raw_os_ostream OS(o);
+ print(OS);
+}
+
+bool SCEV::isZero() const {
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
+ return SC->getValue()->isZero();
+ return false;
+}
+
+bool SCEV::isOne() const {
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
+ return SC->getValue()->isOne();
+ return false;
+}
+
+SCEVCouldNotCompute::SCEVCouldNotCompute() : SCEV(scCouldNotCompute) {}
+SCEVCouldNotCompute::~SCEVCouldNotCompute() {}
+
+bool SCEVCouldNotCompute::isLoopInvariant(const Loop *L) const {
+ assert(0 && "Attempt to use a SCEVCouldNotCompute object!");
+ return false;
+}
+
+const Type *SCEVCouldNotCompute::getType() const {
+ assert(0 && "Attempt to use a SCEVCouldNotCompute object!");
+ return 0;
+}
+
+bool SCEVCouldNotCompute::hasComputableLoopEvolution(const Loop *L) const {
+ assert(0 && "Attempt to use a SCEVCouldNotCompute object!");
+ return false;
+}
+
+SCEVHandle SCEVCouldNotCompute::
+replaceSymbolicValuesWithConcrete(const SCEVHandle &Sym,
+ const SCEVHandle &Conc,
+ ScalarEvolution &SE) const {
+ return this;
+}
+
+void SCEVCouldNotCompute::print(raw_ostream &OS) const {
+ OS << "***COULDNOTCOMPUTE***";
+}
+
+bool SCEVCouldNotCompute::classof(const SCEV *S) {
+ return S->getSCEVType() == scCouldNotCompute;
+}
+
+
+// SCEVConstants - Only allow the creation of one SCEVConstant for any
+// particular value. Don't use a SCEVHandle here, or else the object will
+// never be deleted!
+static ManagedStatic<std::map<ConstantInt*, SCEVConstant*> > SCEVConstants;
+
+
+SCEVConstant::~SCEVConstant() {
+ SCEVConstants->erase(V);
+}
+
+SCEVHandle ScalarEvolution::getConstant(ConstantInt *V) {
+ SCEVConstant *&R = (*SCEVConstants)[V];
+ if (R == 0) R = new SCEVConstant(V);
+ return R;
+}
+
+SCEVHandle ScalarEvolution::getConstant(const APInt& Val) {
+ return getConstant(ConstantInt::get(Val));
+}
+
+const Type *SCEVConstant::getType() const { return V->getType(); }
+
+void SCEVConstant::print(raw_ostream &OS) const {
+ WriteAsOperand(OS, V, false);
+}
+
+SCEVCastExpr::SCEVCastExpr(unsigned SCEVTy,
+ const SCEVHandle &op, const Type *ty)
+ : SCEV(SCEVTy), Op(op), Ty(ty) {}
+
+SCEVCastExpr::~SCEVCastExpr() {}
+
+bool SCEVCastExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
+ return Op->dominates(BB, DT);
+}
+
+// SCEVTruncates - Only allow the creation of one SCEVTruncateExpr for any
+// particular input. Don't use a SCEVHandle here, or else the object will
+// never be deleted!
+static ManagedStatic<std::map<std::pair<const SCEV*, const Type*>,
+ SCEVTruncateExpr*> > SCEVTruncates;
+
+SCEVTruncateExpr::SCEVTruncateExpr(const SCEVHandle &op, const Type *ty)
+ : SCEVCastExpr(scTruncate, op, ty) {
+ assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
+ (Ty->isInteger() || isa<PointerType>(Ty)) &&
+ "Cannot truncate non-integer value!");
+}
+
+SCEVTruncateExpr::~SCEVTruncateExpr() {
+ SCEVTruncates->erase(std::make_pair(Op, Ty));
+}
+
+void SCEVTruncateExpr::print(raw_ostream &OS) const {
+ OS << "(trunc " << *Op->getType() << " " << *Op << " to " << *Ty << ")";
+}
+
+// SCEVZeroExtends - Only allow the creation of one SCEVZeroExtendExpr for any
+// particular input. Don't use a SCEVHandle here, or else the object will never
+// be deleted!
+static ManagedStatic<std::map<std::pair<const SCEV*, const Type*>,
+ SCEVZeroExtendExpr*> > SCEVZeroExtends;
+
+SCEVZeroExtendExpr::SCEVZeroExtendExpr(const SCEVHandle &op, const Type *ty)
+ : SCEVCastExpr(scZeroExtend, op, ty) {
+ assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
+ (Ty->isInteger() || isa<PointerType>(Ty)) &&
+ "Cannot zero extend non-integer value!");
+}
+
+SCEVZeroExtendExpr::~SCEVZeroExtendExpr() {
+ SCEVZeroExtends->erase(std::make_pair(Op, Ty));
+}
+
+void SCEVZeroExtendExpr::print(raw_ostream &OS) const {
+ OS << "(zext " << *Op->getType() << " " << *Op << " to " << *Ty << ")";
+}
+
+// SCEVSignExtends - Only allow the creation of one SCEVSignExtendExpr for any
+// particular input. Don't use a SCEVHandle here, or else the object will never
+// be deleted!
+static ManagedStatic<std::map<std::pair<const SCEV*, const Type*>,
+ SCEVSignExtendExpr*> > SCEVSignExtends;
+
+SCEVSignExtendExpr::SCEVSignExtendExpr(const SCEVHandle &op, const Type *ty)
+ : SCEVCastExpr(scSignExtend, op, ty) {
+ assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
+ (Ty->isInteger() || isa<PointerType>(Ty)) &&
+ "Cannot sign extend non-integer value!");
+}
+
+SCEVSignExtendExpr::~SCEVSignExtendExpr() {
+ SCEVSignExtends->erase(std::make_pair(Op, Ty));
+}
+
+void SCEVSignExtendExpr::print(raw_ostream &OS) const {
+ OS << "(sext " << *Op->getType() << " " << *Op << " to " << *Ty << ")";
+}
+
+// SCEVCommExprs - Only allow the creation of one SCEVCommutativeExpr for any
+// particular input. Don't use a SCEVHandle here, or else the object will never
+// be deleted!
+static ManagedStatic<std::map<std::pair<unsigned, std::vector<const SCEV*> >,
+ SCEVCommutativeExpr*> > SCEVCommExprs;
+
+SCEVCommutativeExpr::~SCEVCommutativeExpr() {
+ std::vector<const SCEV*> SCEVOps(Operands.begin(), Operands.end());
+ SCEVCommExprs->erase(std::make_pair(getSCEVType(), SCEVOps));
+}
+
+void SCEVCommutativeExpr::print(raw_ostream &OS) const {
+ assert(Operands.size() > 1 && "This plus expr shouldn't exist!");
+ const char *OpStr = getOperationStr();
+ OS << "(" << *Operands[0];
+ for (unsigned i = 1, e = Operands.size(); i != e; ++i)
+ OS << OpStr << *Operands[i];
+ OS << ")";
+}
+
+SCEVHandle SCEVCommutativeExpr::
+replaceSymbolicValuesWithConcrete(const SCEVHandle &Sym,
+ const SCEVHandle &Conc,
+ ScalarEvolution &SE) const {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ SCEVHandle H =
+ getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE);
+ if (H != getOperand(i)) {
+ std::vector<SCEVHandle> NewOps;
+ NewOps.reserve(getNumOperands());
+ for (unsigned j = 0; j != i; ++j)
+ NewOps.push_back(getOperand(j));
+ NewOps.push_back(H);
+ for (++i; i != e; ++i)
+ NewOps.push_back(getOperand(i)->
+ replaceSymbolicValuesWithConcrete(Sym, Conc, SE));
+
+ if (isa<SCEVAddExpr>(this))
+ return SE.getAddExpr(NewOps);
+ else if (isa<SCEVMulExpr>(this))
+ return SE.getMulExpr(NewOps);
+ else if (isa<SCEVSMaxExpr>(this))
+ return SE.getSMaxExpr(NewOps);
+ else if (isa<SCEVUMaxExpr>(this))
+ return SE.getUMaxExpr(NewOps);
+ else
+ assert(0 && "Unknown commutative expr!");
+ }
+ }
+ return this;
+}
+
+bool SCEVNAryExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ if (!getOperand(i)->dominates(BB, DT))
+ return false;
+ }
+ return true;
+}
+
+
+// SCEVUDivs - Only allow the creation of one SCEVUDivExpr for any particular
+// input. Don't use a SCEVHandle here, or else the object will never be
+// deleted!
+static ManagedStatic<std::map<std::pair<const SCEV*, const SCEV*>,
+ SCEVUDivExpr*> > SCEVUDivs;
+
+SCEVUDivExpr::~SCEVUDivExpr() {
+ SCEVUDivs->erase(std::make_pair(LHS, RHS));
+}
+
+bool SCEVUDivExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
+ return LHS->dominates(BB, DT) && RHS->dominates(BB, DT);
+}
+
+void SCEVUDivExpr::print(raw_ostream &OS) const {
+ OS << "(" << *LHS << " /u " << *RHS << ")";
+}
+
+const Type *SCEVUDivExpr::getType() const {
+ // In most cases the types of LHS and RHS will be the same, but in some
+ // crazy cases one or the other may be a pointer. ScalarEvolution doesn't
+ // depend on the type for correctness, but handling types carefully can
+ // avoid extra casts in the SCEVExpander. The LHS is more likely to be
+ // a pointer type than the RHS, so use the RHS' type here.
+ return RHS->getType();
+}
+
+// SCEVAddRecExprs - Only allow the creation of one SCEVAddRecExpr for any
+// particular input. Don't use a SCEVHandle here, or else the object will never
+// be deleted!
+static ManagedStatic<std::map<std::pair<const Loop *,
+ std::vector<const SCEV*> >,
+ SCEVAddRecExpr*> > SCEVAddRecExprs;
+
+SCEVAddRecExpr::~SCEVAddRecExpr() {
+ std::vector<const SCEV*> SCEVOps(Operands.begin(), Operands.end());
+ SCEVAddRecExprs->erase(std::make_pair(L, SCEVOps));
+}
+
+SCEVHandle SCEVAddRecExpr::
+replaceSymbolicValuesWithConcrete(const SCEVHandle &Sym,
+ const SCEVHandle &Conc,
+ ScalarEvolution &SE) const {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ SCEVHandle H =
+ getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE);
+ if (H != getOperand(i)) {
+ std::vector<SCEVHandle> NewOps;
+ NewOps.reserve(getNumOperands());
+ for (unsigned j = 0; j != i; ++j)
+ NewOps.push_back(getOperand(j));
+ NewOps.push_back(H);
+ for (++i; i != e; ++i)
+ NewOps.push_back(getOperand(i)->
+ replaceSymbolicValuesWithConcrete(Sym, Conc, SE));
+
+ return SE.getAddRecExpr(NewOps, L);
+ }
+ }
+ return this;
+}
+
+
+bool SCEVAddRecExpr::isLoopInvariant(const Loop *QueryLoop) const {
+ // This recurrence is invariant w.r.t to QueryLoop iff QueryLoop doesn't
+ // contain L and if the start is invariant.
+ // Add recurrences are never invariant in the function-body (null loop).
+ return QueryLoop &&
+ !QueryLoop->contains(L->getHeader()) &&
+ getOperand(0)->isLoopInvariant(QueryLoop);
+}
+
+
+void SCEVAddRecExpr::print(raw_ostream &OS) const {
+ OS << "{" << *Operands[0];
+ for (unsigned i = 1, e = Operands.size(); i != e; ++i)
+ OS << ",+," << *Operands[i];
+ OS << "}<" << L->getHeader()->getName() + ">";
+}
+
+// SCEVUnknowns - Only allow the creation of one SCEVUnknown for any particular
+// value. Don't use a SCEVHandle here, or else the object will never be
+// deleted!
+static ManagedStatic<std::map<Value*, SCEVUnknown*> > SCEVUnknowns;
+
+SCEVUnknown::~SCEVUnknown() { SCEVUnknowns->erase(V); }
+
+bool SCEVUnknown::isLoopInvariant(const Loop *L) const {
+ // All non-instruction values are loop invariant. All instructions are loop
+ // invariant if they are not contained in the specified loop.
+ // Instructions are never considered invariant in the function body
+ // (null loop) because they are defined within the "loop".
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return L && !L->contains(I->getParent());
+ return true;
+}
+
+bool SCEVUnknown::dominates(BasicBlock *BB, DominatorTree *DT) const {
+ if (Instruction *I = dyn_cast<Instruction>(getValue()))
+ return DT->dominates(I->getParent(), BB);
+ return true;
+}
+
+const Type *SCEVUnknown::getType() const {
+ return V->getType();
+}
+
+void SCEVUnknown::print(raw_ostream &OS) const {
+ WriteAsOperand(OS, V, false);
+}
+
+//===----------------------------------------------------------------------===//
+// SCEV Utilities
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// SCEVComplexityCompare - Return true if the complexity of the LHS is less
+ /// than the complexity of the RHS. This comparator is used to canonicalize
+ /// expressions.
+ class VISIBILITY_HIDDEN SCEVComplexityCompare {
+ LoopInfo *LI;
+ public:
+ explicit SCEVComplexityCompare(LoopInfo *li) : LI(li) {}
+
+ bool operator()(const SCEV *LHS, const SCEV *RHS) const {
+ // Primarily, sort the SCEVs by their getSCEVType().
+ if (LHS->getSCEVType() != RHS->getSCEVType())
+ return LHS->getSCEVType() < RHS->getSCEVType();
+
+ // Aside from the getSCEVType() ordering, the particular ordering
+ // isn't very important except that it's beneficial to be consistent,
+ // so that (a + b) and (b + a) don't end up as different expressions.
+
+ // Sort SCEVUnknown values with some loose heuristics. TODO: This is
+ // not as complete as it could be.
+ if (const SCEVUnknown *LU = dyn_cast<SCEVUnknown>(LHS)) {
+ const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
+
+ // Order pointer values after integer values. This helps SCEVExpander
+ // form GEPs.
+ if (isa<PointerType>(LU->getType()) && !isa<PointerType>(RU->getType()))
+ return false;
+ if (isa<PointerType>(RU->getType()) && !isa<PointerType>(LU->getType()))
+ return true;
+
+ // Compare getValueID values.
+ if (LU->getValue()->getValueID() != RU->getValue()->getValueID())
+ return LU->getValue()->getValueID() < RU->getValue()->getValueID();
+
+ // Sort arguments by their position.
+ if (const Argument *LA = dyn_cast<Argument>(LU->getValue())) {
+ const Argument *RA = cast<Argument>(RU->getValue());
+ return LA->getArgNo() < RA->getArgNo();
+ }
+
+ // For instructions, compare their loop depth, and their opcode.
+ // This is pretty loose.
+ if (Instruction *LV = dyn_cast<Instruction>(LU->getValue())) {
+ Instruction *RV = cast<Instruction>(RU->getValue());
+
+ // Compare loop depths.
+ if (LI->getLoopDepth(LV->getParent()) !=
+ LI->getLoopDepth(RV->getParent()))
+ return LI->getLoopDepth(LV->getParent()) <
+ LI->getLoopDepth(RV->getParent());
+
+ // Compare opcodes.
+ if (LV->getOpcode() != RV->getOpcode())
+ return LV->getOpcode() < RV->getOpcode();
+
+ // Compare the number of operands.
+ if (LV->getNumOperands() != RV->getNumOperands())
+ return LV->getNumOperands() < RV->getNumOperands();
+ }
+
+ return false;
+ }
+
+ // Constant sorting doesn't matter since they'll be folded.
+ if (isa<SCEVConstant>(LHS))
+ return false;
+
+ // Lexicographically compare n-ary expressions.
+ if (const SCEVNAryExpr *LC = dyn_cast<SCEVNAryExpr>(LHS)) {
+ const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
+ for (unsigned i = 0, e = LC->getNumOperands(); i != e; ++i) {
+ if (i >= RC->getNumOperands())
+ return false;
+ if (operator()(LC->getOperand(i), RC->getOperand(i)))
+ return true;
+ if (operator()(RC->getOperand(i), LC->getOperand(i)))
+ return false;
+ }
+ return LC->getNumOperands() < RC->getNumOperands();
+ }
+
+ // Lexicographically compare udiv expressions.
+ if (const SCEVUDivExpr *LC = dyn_cast<SCEVUDivExpr>(LHS)) {
+ const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
+ if (operator()(LC->getLHS(), RC->getLHS()))
+ return true;
+ if (operator()(RC->getLHS(), LC->getLHS()))
+ return false;
+ if (operator()(LC->getRHS(), RC->getRHS()))
+ return true;
+ if (operator()(RC->getRHS(), LC->getRHS()))
+ return false;
+ return false;
+ }
+
+ // Compare cast expressions by operand.
+ if (const SCEVCastExpr *LC = dyn_cast<SCEVCastExpr>(LHS)) {
+ const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
+ return operator()(LC->getOperand(), RC->getOperand());
+ }
+
+ assert(0 && "Unknown SCEV kind!");
+ return false;
+ }
+ };
+}
+
+/// GroupByComplexity - Given a list of SCEV objects, order them by their
+/// complexity, and group objects of the same complexity together by value.
+/// When this routine is finished, we know that any duplicates in the vector are
+/// consecutive and that complexity is monotonically increasing.
+///
+/// Note that we go take special precautions to ensure that we get determinstic
+/// results from this routine. In other words, we don't want the results of
+/// this to depend on where the addresses of various SCEV objects happened to
+/// land in memory.
+///
+static void GroupByComplexity(std::vector<SCEVHandle> &Ops,
+ LoopInfo *LI) {
+ if (Ops.size() < 2) return; // Noop
+ if (Ops.size() == 2) {
+ // This is the common case, which also happens to be trivially simple.
+ // Special case it.
+ if (SCEVComplexityCompare(LI)(Ops[1], Ops[0]))
+ std::swap(Ops[0], Ops[1]);
+ return;
+ }
+
+ // Do the rough sort by complexity.
+ std::stable_sort(Ops.begin(), Ops.end(), SCEVComplexityCompare(LI));
+
+ // Now that we are sorted by complexity, group elements of the same
+ // complexity. Note that this is, at worst, N^2, but the vector is likely to
+ // be extremely short in practice. Note that we take this approach because we
+ // do not want to depend on the addresses of the objects we are grouping.
+ for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) {
+ const SCEV *S = Ops[i];
+ unsigned Complexity = S->getSCEVType();
+
+ // If there are any objects of the same complexity and same value as this
+ // one, group them.
+ for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) {
+ if (Ops[j] == S) { // Found a duplicate.
+ // Move it to immediately after i'th element.
+ std::swap(Ops[i+1], Ops[j]);
+ ++i; // no need to rescan it.
+ if (i == e-2) return; // Done!
+ }
+ }
+ }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Simple SCEV method implementations
+//===----------------------------------------------------------------------===//
+
+/// BinomialCoefficient - Compute BC(It, K). The result has width W.
+/// Assume, K > 0.
+static SCEVHandle BinomialCoefficient(SCEVHandle It, unsigned K,
+ ScalarEvolution &SE,
+ const Type* ResultTy) {
+ // Handle the simplest case efficiently.
+ if (K == 1)
+ return SE.getTruncateOrZeroExtend(It, ResultTy);
+
+ // We are using the following formula for BC(It, K):
+ //
+ // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K!
+ //
+ // Suppose, W is the bitwidth of the return value. We must be prepared for
+ // overflow. Hence, we must assure that the result of our computation is
+ // equal to the accurate one modulo 2^W. Unfortunately, division isn't
+ // safe in modular arithmetic.
+ //
+ // However, this code doesn't use exactly that formula; the formula it uses
+ // is something like the following, where T is the number of factors of 2 in
+ // K! (i.e. trailing zeros in the binary representation of K!), and ^ is
+ // exponentiation:
+ //
+ // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T)
+ //
+ // This formula is trivially equivalent to the previous formula. However,
+ // this formula can be implemented much more efficiently. The trick is that
+ // K! / 2^T is odd, and exact division by an odd number *is* safe in modular
+ // arithmetic. To do exact division in modular arithmetic, all we have
+ // to do is multiply by the inverse. Therefore, this step can be done at
+ // width W.
+ //
+ // The next issue is how to safely do the division by 2^T. The way this
+ // is done is by doing the multiplication step at a width of at least W + T
+ // bits. This way, the bottom W+T bits of the product are accurate. Then,
+ // when we perform the division by 2^T (which is equivalent to a right shift
+ // by T), the bottom W bits are accurate. Extra bits are okay; they'll get
+ // truncated out after the division by 2^T.
+ //
+ // In comparison to just directly using the first formula, this technique
+ // is much more efficient; using the first formula requires W * K bits,
+ // but this formula less than W + K bits. Also, the first formula requires
+ // a division step, whereas this formula only requires multiplies and shifts.
+ //
+ // It doesn't matter whether the subtraction step is done in the calculation
+ // width or the input iteration count's width; if the subtraction overflows,
+ // the result must be zero anyway. We prefer here to do it in the width of
+ // the induction variable because it helps a lot for certain cases; CodeGen
+ // isn't smart enough to ignore the overflow, which leads to much less
+ // efficient code if the width of the subtraction is wider than the native
+ // register width.
+ //
+ // (It's possible to not widen at all by pulling out factors of 2 before
+ // the multiplication; for example, K=2 can be calculated as
+ // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires
+ // extra arithmetic, so it's not an obvious win, and it gets
+ // much more complicated for K > 3.)
+
+ // Protection from insane SCEVs; this bound is conservative,
+ // but it probably doesn't matter.
+ if (K > 1000)
+ return SE.getCouldNotCompute();
+
+ unsigned W = SE.getTypeSizeInBits(ResultTy);
+
+ // Calculate K! / 2^T and T; we divide out the factors of two before
+ // multiplying for calculating K! / 2^T to avoid overflow.
+ // Other overflow doesn't matter because we only care about the bottom
+ // W bits of the result.
+ APInt OddFactorial(W, 1);
+ unsigned T = 1;
+ for (unsigned i = 3; i <= K; ++i) {
+ APInt Mult(W, i);
+ unsigned TwoFactors = Mult.countTrailingZeros();
+ T += TwoFactors;
+ Mult = Mult.lshr(TwoFactors);
+ OddFactorial *= Mult;
+ }
+
+ // We need at least W + T bits for the multiplication step
+ unsigned CalculationBits = W + T;
+
+ // Calcuate 2^T, at width T+W.
+ APInt DivFactor = APInt(CalculationBits, 1).shl(T);
+
+ // Calculate the multiplicative inverse of K! / 2^T;
+ // this multiplication factor will perform the exact division by
+ // K! / 2^T.
+ APInt Mod = APInt::getSignedMinValue(W+1);
+ APInt MultiplyFactor = OddFactorial.zext(W+1);
+ MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod);
+ MultiplyFactor = MultiplyFactor.trunc(W);
+
+ // Calculate the product, at width T+W
+ const IntegerType *CalculationTy = IntegerType::get(CalculationBits);
+ SCEVHandle Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
+ for (unsigned i = 1; i != K; ++i) {
+ SCEVHandle S = SE.getMinusSCEV(It, SE.getIntegerSCEV(i, It->getType()));
+ Dividend = SE.getMulExpr(Dividend,
+ SE.getTruncateOrZeroExtend(S, CalculationTy));
+ }
+
+ // Divide by 2^T
+ SCEVHandle DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));
+
+ // Truncate the result, and divide by K! / 2^T.
+
+ return SE.getMulExpr(SE.getConstant(MultiplyFactor),
+ SE.getTruncateOrZeroExtend(DivResult, ResultTy));
+}
+
+/// evaluateAtIteration - Return the value of this chain of recurrences at
+/// the specified iteration number. We can evaluate this recurrence by
+/// multiplying each element in the chain by the binomial coefficient
+/// corresponding to it. In other words, we can evaluate {A,+,B,+,C,+,D} as:
+///
+/// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3)
+///
+/// where BC(It, k) stands for binomial coefficient.
+///
+SCEVHandle SCEVAddRecExpr::evaluateAtIteration(SCEVHandle It,
+ ScalarEvolution &SE) const {
+ SCEVHandle Result = getStart();
+ for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+ // The computation is correct in the face of overflow provided that the
+ // multiplication is performed _after_ the evaluation of the binomial
+ // coefficient.
+ SCEVHandle Coeff = BinomialCoefficient(It, i, SE, getType());
+ if (isa<SCEVCouldNotCompute>(Coeff))
+ return Coeff;
+
+ Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff));
+ }
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// SCEV Expression folder implementations
+//===----------------------------------------------------------------------===//
+
+SCEVHandle ScalarEvolution::getTruncateExpr(const SCEVHandle &Op,
+ const Type *Ty) {
+ assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
+ "This is not a truncating conversion!");
+ assert(isSCEVable(Ty) &&
+ "This is not a conversion to a SCEVable type!");
+ Ty = getEffectiveSCEVType(Ty);
+
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+ return getUnknown(
+ ConstantExpr::getTrunc(SC->getValue(), Ty));
+
+ // trunc(trunc(x)) --> trunc(x)
+ if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
+ return getTruncateExpr(ST->getOperand(), Ty);
+
+ // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
+ if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
+ return getTruncateOrSignExtend(SS->getOperand(), Ty);
+
+ // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
+ if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
+ return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
+
+ // If the input value is a chrec scev made out of constants, truncate
+ // all of the constants.
+ if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
+ std::vector<SCEVHandle> Operands;
+ for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
+ Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty));
+ return getAddRecExpr(Operands, AddRec->getLoop());
+ }
+
+ SCEVTruncateExpr *&Result = (*SCEVTruncates)[std::make_pair(Op, Ty)];
+ if (Result == 0) Result = new SCEVTruncateExpr(Op, Ty);
+ return Result;
+}
+
+SCEVHandle ScalarEvolution::getZeroExtendExpr(const SCEVHandle &Op,
+ const Type *Ty) {
+ assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
+ "This is not an extending conversion!");
+ assert(isSCEVable(Ty) &&
+ "This is not a conversion to a SCEVable type!");
+ Ty = getEffectiveSCEVType(Ty);
+
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) {
+ const Type *IntTy = getEffectiveSCEVType(Ty);
+ Constant *C = ConstantExpr::getZExt(SC->getValue(), IntTy);
+ if (IntTy != Ty) C = ConstantExpr::getIntToPtr(C, Ty);
+ return getUnknown(C);
+ }
+
+ // zext(zext(x)) --> zext(x)
+ if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
+ return getZeroExtendExpr(SZ->getOperand(), Ty);
+
+ // If the input value is a chrec scev, and we can prove that the value
+ // did not overflow the old, smaller, value, we can zero extend all of the
+ // operands (often constants). This allows analysis of something like
+ // this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
+ if (AR->isAffine()) {
+ // Check whether the backedge-taken count is SCEVCouldNotCompute.
+ // Note that this serves two purposes: It filters out loops that are
+ // simply not analyzable, and it covers the case where this code is
+ // being called from within backedge-taken count analysis, such that
+ // attempting to ask for the backedge-taken count would likely result
+ // in infinite recursion. In the later case, the analysis code will
+ // cope with a conservative value, and it will take care to purge
+ // that value once it has finished.
+ SCEVHandle MaxBECount = getMaxBackedgeTakenCount(AR->getLoop());
+ if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
+ // Manually compute the final value for AR, checking for
+ // overflow.
+ SCEVHandle Start = AR->getStart();
+ SCEVHandle Step = AR->getStepRecurrence(*this);
+
+ // Check whether the backedge-taken count can be losslessly casted to
+ // the addrec's type. The count is always unsigned.
+ SCEVHandle CastedMaxBECount =
+ getTruncateOrZeroExtend(MaxBECount, Start->getType());
+ SCEVHandle RecastedMaxBECount =
+ getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
+ if (MaxBECount == RecastedMaxBECount) {
+ const Type *WideTy =
+ IntegerType::get(getTypeSizeInBits(Start->getType()) * 2);
+ // Check whether Start+Step*MaxBECount has no unsigned overflow.
+ SCEVHandle ZMul =
+ getMulExpr(CastedMaxBECount,
+ getTruncateOrZeroExtend(Step, Start->getType()));
+ SCEVHandle Add = getAddExpr(Start, ZMul);
+ SCEVHandle OperandExtendedAdd =
+ getAddExpr(getZeroExtendExpr(Start, WideTy),
+ getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+ getZeroExtendExpr(Step, WideTy)));
+ if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd)
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+ getZeroExtendExpr(Step, Ty),
+ AR->getLoop());
+
+ // Similar to above, only this time treat the step value as signed.
+ // This covers loops that count down.
+ SCEVHandle SMul =
+ getMulExpr(CastedMaxBECount,
+ getTruncateOrSignExtend(Step, Start->getType()));
+ Add = getAddExpr(Start, SMul);
+ OperandExtendedAdd =
+ getAddExpr(getZeroExtendExpr(Start, WideTy),
+ getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+ getSignExtendExpr(Step, WideTy)));
+ if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd)
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+ getSignExtendExpr(Step, Ty),
+ AR->getLoop());
+ }
+ }
+ }
+
+ SCEVZeroExtendExpr *&Result = (*SCEVZeroExtends)[std::make_pair(Op, Ty)];
+ if (Result == 0) Result = new SCEVZeroExtendExpr(Op, Ty);
+ return Result;
+}
+
+SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op,
+ const Type *Ty) {
+ assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
+ "This is not an extending conversion!");
+ assert(isSCEVable(Ty) &&
+ "This is not a conversion to a SCEVable type!");
+ Ty = getEffectiveSCEVType(Ty);
+
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) {
+ const Type *IntTy = getEffectiveSCEVType(Ty);
+ Constant *C = ConstantExpr::getSExt(SC->getValue(), IntTy);
+ if (IntTy != Ty) C = ConstantExpr::getIntToPtr(C, Ty);
+ return getUnknown(C);
+ }
+
+ // sext(sext(x)) --> sext(x)
+ if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
+ return getSignExtendExpr(SS->getOperand(), Ty);
+
+ // If the input value is a chrec scev, and we can prove that the value
+ // did not overflow the old, smaller, value, we can sign extend all of the
+ // operands (often constants). This allows analysis of something like
+ // this: for (signed char X = 0; X < 100; ++X) { int Y = X; }
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
+ if (AR->isAffine()) {
+ // Check whether the backedge-taken count is SCEVCouldNotCompute.
+ // Note that this serves two purposes: It filters out loops that are
+ // simply not analyzable, and it covers the case where this code is
+ // being called from within backedge-taken count analysis, such that
+ // attempting to ask for the backedge-taken count would likely result
+ // in infinite recursion. In the later case, the analysis code will
+ // cope with a conservative value, and it will take care to purge
+ // that value once it has finished.
+ SCEVHandle MaxBECount = getMaxBackedgeTakenCount(AR->getLoop());
+ if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
+ // Manually compute the final value for AR, checking for
+ // overflow.
+ SCEVHandle Start = AR->getStart();
+ SCEVHandle Step = AR->getStepRecurrence(*this);
+
+ // Check whether the backedge-taken count can be losslessly casted to
+ // the addrec's type. The count is always unsigned.
+ SCEVHandle CastedMaxBECount =
+ getTruncateOrZeroExtend(MaxBECount, Start->getType());
+ SCEVHandle RecastedMaxBECount =
+ getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
+ if (MaxBECount == RecastedMaxBECount) {
+ const Type *WideTy =
+ IntegerType::get(getTypeSizeInBits(Start->getType()) * 2);
+ // Check whether Start+Step*MaxBECount has no signed overflow.
+ SCEVHandle SMul =
+ getMulExpr(CastedMaxBECount,
+ getTruncateOrSignExtend(Step, Start->getType()));
+ SCEVHandle Add = getAddExpr(Start, SMul);
+ SCEVHandle OperandExtendedAdd =
+ getAddExpr(getSignExtendExpr(Start, WideTy),
+ getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+ getSignExtendExpr(Step, WideTy)));
+ if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd)
+ // Return the expression with the addrec on the outside.
+ return getAddRecExpr(getSignExtendExpr(Start, Ty),
+ getSignExtendExpr(Step, Ty),
+ AR->getLoop());
+ }
+ }
+ }
+
+ SCEVSignExtendExpr *&Result = (*SCEVSignExtends)[std::make_pair(Op, Ty)];
+ if (Result == 0) Result = new SCEVSignExtendExpr(Op, Ty);
+ return Result;
+}
+
+/// getAddExpr - Get a canonical add expression, or something simpler if
+/// possible.
+SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
+ assert(!Ops.empty() && "Cannot get empty add!");
+ if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+ for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+ assert(getEffectiveSCEVType(Ops[i]->getType()) ==
+ getEffectiveSCEVType(Ops[0]->getType()) &&
+ "SCEVAddExpr operand types don't match!");
+#endif
+
+ // Sort by complexity, this groups all similar expression types together.
+ GroupByComplexity(Ops, LI);
+
+ // If there are any constants, fold them together.
+ unsigned Idx = 0;
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+ ++Idx;
+ assert(Idx < Ops.size());
+ while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+ // We found two constants, fold them together!
+ ConstantInt *Fold = ConstantInt::get(LHSC->getValue()->getValue() +
+ RHSC->getValue()->getValue());
+ Ops[0] = getConstant(Fold);
+ Ops.erase(Ops.begin()+1); // Erase the folded element
+ if (Ops.size() == 1) return Ops[0];
+ LHSC = cast<SCEVConstant>(Ops[0]);
+ }
+
+ // If we are left with a constant zero being added, strip it off.
+ if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
+ Ops.erase(Ops.begin());
+ --Idx;
+ }
+ }
+
+ if (Ops.size() == 1) return Ops[0];
+
+ // Okay, check to see if the same value occurs in the operand list twice. If
+ // so, merge them together into an multiply expression. Since we sorted the
+ // list, these values are required to be adjacent.
+ const Type *Ty = Ops[0]->getType();
+ for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
+ if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2
+ // Found a match, merge the two values into a multiply, and add any
+ // remaining values to the result.
+ SCEVHandle Two = getIntegerSCEV(2, Ty);
+ SCEVHandle Mul = getMulExpr(Ops[i], Two);
+ if (Ops.size() == 2)
+ return Mul;
+ Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
+ Ops.push_back(Mul);
+ return getAddExpr(Ops);
+ }
+
+ // Check for truncates. If all the operands are truncated from the same
+ // type, see if factoring out the truncate would permit the result to be
+ // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n)
+ // if the contents of the resulting outer trunc fold to something simple.
+ for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) {
+ const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
+ const Type *DstType = Trunc->getType();
+ const Type *SrcType = Trunc->getOperand()->getType();
+ std::vector<SCEVHandle> LargeOps;
+ bool Ok = true;
+ // Check all the operands to see if they can be represented in the
+ // source type of the truncate.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) {
+ if (T->getOperand()->getType() != SrcType) {
+ Ok = false;
+ break;
+ }
+ LargeOps.push_back(T->getOperand());
+ } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
+ // This could be either sign or zero extension, but sign extension
+ // is much more likely to be foldable here.
+ LargeOps.push_back(getSignExtendExpr(C, SrcType));
+ } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
+ std::vector<SCEVHandle> LargeMulOps;
+ for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
+ if (const SCEVTruncateExpr *T =
+ dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
+ if (T->getOperand()->getType() != SrcType) {
+ Ok = false;
+ break;
+ }
+ LargeMulOps.push_back(T->getOperand());
+ } else if (const SCEVConstant *C =
+ dyn_cast<SCEVConstant>(M->getOperand(j))) {
+ // This could be either sign or zero extension, but sign extension
+ // is much more likely to be foldable here.
+ LargeMulOps.push_back(getSignExtendExpr(C, SrcType));
+ } else {
+ Ok = false;
+ break;
+ }
+ }
+ if (Ok)
+ LargeOps.push_back(getMulExpr(LargeMulOps));
+ } else {
+ Ok = false;
+ break;
+ }
+ }
+ if (Ok) {
+ // Evaluate the expression in the larger type.
+ SCEVHandle Fold = getAddExpr(LargeOps);
+ // If it folds to something simple, use it. Otherwise, don't.
+ if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
+ return getTruncateExpr(Fold, DstType);
+ }
+ }
+
+ // Skip past any other cast SCEVs.
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
+ ++Idx;
+
+ // If there are add operands they would be next.
+ if (Idx < Ops.size()) {
+ bool DeletedAdd = false;
+ while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
+ // If we have an add, expand the add operands onto the end of the operands
+ // list.
+ Ops.insert(Ops.end(), Add->op_begin(), Add->op_end());
+ Ops.erase(Ops.begin()+Idx);
+ DeletedAdd = true;
+ }
+
+ // If we deleted at least one add, we added operands to the end of the list,
+ // and they are not necessarily sorted. Recurse to resort and resimplify
+ // any operands we just aquired.
+ if (DeletedAdd)
+ return getAddExpr(Ops);
+ }
+
+ // Skip over the add expression until we get to a multiply.
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
+ ++Idx;
+
+ // If we are adding something to a multiply expression, make sure the
+ // something is not already an operand of the multiply. If so, merge it into
+ // the multiply.
+ for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) {
+ const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
+ for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
+ const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
+ for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
+ if (MulOpSCEV == Ops[AddOp] && !isa<SCEVConstant>(MulOpSCEV)) {
+ // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1))
+ SCEVHandle InnerMul = Mul->getOperand(MulOp == 0);
+ if (Mul->getNumOperands() != 2) {
+ // If the multiply has more than two operands, we must get the
+ // Y*Z term.
+ std::vector<SCEVHandle> MulOps(Mul->op_begin(), Mul->op_end());
+ MulOps.erase(MulOps.begin()+MulOp);
+ InnerMul = getMulExpr(MulOps);
+ }
+ SCEVHandle One = getIntegerSCEV(1, Ty);
+ SCEVHandle AddOne = getAddExpr(InnerMul, One);
+ SCEVHandle OuterMul = getMulExpr(AddOne, Ops[AddOp]);
+ if (Ops.size() == 2) return OuterMul;
+ if (AddOp < Idx) {
+ Ops.erase(Ops.begin()+AddOp);
+ Ops.erase(Ops.begin()+Idx-1);
+ } else {
+ Ops.erase(Ops.begin()+Idx);
+ Ops.erase(Ops.begin()+AddOp-1);
+ }
+ Ops.push_back(OuterMul);
+ return getAddExpr(Ops);
+ }
+
+ // Check this multiply against other multiplies being added together.
+ for (unsigned OtherMulIdx = Idx+1;
+ OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]);
+ ++OtherMulIdx) {
+ const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]);
+ // If MulOp occurs in OtherMul, we can fold the two multiplies
+ // together.
+ for (unsigned OMulOp = 0, e = OtherMul->getNumOperands();
+ OMulOp != e; ++OMulOp)
+ if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
+ // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
+ SCEVHandle InnerMul1 = Mul->getOperand(MulOp == 0);
+ if (Mul->getNumOperands() != 2) {
+ std::vector<SCEVHandle> MulOps(Mul->op_begin(), Mul->op_end());
+ MulOps.erase(MulOps.begin()+MulOp);
+ InnerMul1 = getMulExpr(MulOps);
+ }
+ SCEVHandle InnerMul2 = OtherMul->getOperand(OMulOp == 0);
+ if (OtherMul->getNumOperands() != 2) {
+ std::vector<SCEVHandle> MulOps(OtherMul->op_begin(),
+ OtherMul->op_end());
+ MulOps.erase(MulOps.begin()+OMulOp);
+ InnerMul2 = getMulExpr(MulOps);
+ }
+ SCEVHandle InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
+ SCEVHandle OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
+ if (Ops.size() == 2) return OuterMul;
+ Ops.erase(Ops.begin()+Idx);
+ Ops.erase(Ops.begin()+OtherMulIdx-1);
+ Ops.push_back(OuterMul);
+ return getAddExpr(Ops);
+ }
+ }
+ }
+ }
+
+ // If there are any add recurrences in the operands list, see if any other
+ // added values are loop invariant. If so, we can fold them into the
+ // recurrence.
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
+ ++Idx;
+
+ // Scan over all recurrences, trying to fold loop invariants into them.
+ for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
+ // Scan all of the other operands to this add and add them to the vector if
+ // they are loop invariant w.r.t. the recurrence.
+ std::vector<SCEVHandle> LIOps;
+ const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (Ops[i]->isLoopInvariant(AddRec->getLoop())) {
+ LIOps.push_back(Ops[i]);
+ Ops.erase(Ops.begin()+i);
+ --i; --e;
+ }
+
+ // If we found some loop invariants, fold them into the recurrence.
+ if (!LIOps.empty()) {
+ // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step}
+ LIOps.push_back(AddRec->getStart());
+
+ std::vector<SCEVHandle> AddRecOps(AddRec->op_begin(), AddRec->op_end());
+ AddRecOps[0] = getAddExpr(LIOps);
+
+ SCEVHandle NewRec = getAddRecExpr(AddRecOps, AddRec->getLoop());
+ // If all of the other operands were loop invariant, we are done.
+ if (Ops.size() == 1) return NewRec;
+
+ // Otherwise, add the folded AddRec by the non-liv parts.
+ for (unsigned i = 0;; ++i)
+ if (Ops[i] == AddRec) {
+ Ops[i] = NewRec;
+ break;
+ }
+ return getAddExpr(Ops);
+ }
+
+ // Okay, if there weren't any loop invariants to be folded, check to see if
+ // there are multiple AddRec's with the same loop induction variable being
+ // added together. If so, we can fold them.
+ for (unsigned OtherIdx = Idx+1;
+ OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);++OtherIdx)
+ if (OtherIdx != Idx) {
+ const SCEVAddRecExpr *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
+ if (AddRec->getLoop() == OtherAddRec->getLoop()) {
+ // Other + {A,+,B} + {C,+,D} --> Other + {A+C,+,B+D}
+ std::vector<SCEVHandle> NewOps(AddRec->op_begin(), AddRec->op_end());
+ for (unsigned i = 0, e = OtherAddRec->getNumOperands(); i != e; ++i) {
+ if (i >= NewOps.size()) {
+ NewOps.insert(NewOps.end(), OtherAddRec->op_begin()+i,
+ OtherAddRec->op_end());
+ break;
+ }
+ NewOps[i] = getAddExpr(NewOps[i], OtherAddRec->getOperand(i));
+ }
+ SCEVHandle NewAddRec = getAddRecExpr(NewOps, AddRec->getLoop());
+
+ if (Ops.size() == 2) return NewAddRec;
+
+ Ops.erase(Ops.begin()+Idx);
+ Ops.erase(Ops.begin()+OtherIdx-1);
+ Ops.push_back(NewAddRec);
+ return getAddExpr(Ops);
+ }
+ }
+
+ // Otherwise couldn't fold anything into this recurrence. Move onto the
+ // next one.
+ }
+
+ // Okay, it looks like we really DO need an add expr. Check to see if we
+ // already have one, otherwise create a new one.
+ std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end());
+ SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scAddExpr,
+ SCEVOps)];
+ if (Result == 0) Result = new SCEVAddExpr(Ops);
+ return Result;
+}
+
+
+/// getMulExpr - Get a canonical multiply expression, or something simpler if
+/// possible.
+SCEVHandle ScalarEvolution::getMulExpr(std::vector<SCEVHandle> &Ops) {
+ assert(!Ops.empty() && "Cannot get empty mul!");
+#ifndef NDEBUG
+ for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+ assert(getEffectiveSCEVType(Ops[i]->getType()) ==
+ getEffectiveSCEVType(Ops[0]->getType()) &&
+ "SCEVMulExpr operand types don't match!");
+#endif
+
+ // Sort by complexity, this groups all similar expression types together.
+ GroupByComplexity(Ops, LI);
+
+ // If there are any constants, fold them together.
+ unsigned Idx = 0;
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+
+ // C1*(C2+V) -> C1*C2 + C1*V
+ if (Ops.size() == 2)
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
+ if (Add->getNumOperands() == 2 &&
+ isa<SCEVConstant>(Add->getOperand(0)))
+ return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)),
+ getMulExpr(LHSC, Add->getOperand(1)));
+
+
+ ++Idx;
+ while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+ // We found two constants, fold them together!
+ ConstantInt *Fold = ConstantInt::get(LHSC->getValue()->getValue() *
+ RHSC->getValue()->getValue());
+ Ops[0] = getConstant(Fold);
+ Ops.erase(Ops.begin()+1); // Erase the folded element
+ if (Ops.size() == 1) return Ops[0];
+ LHSC = cast<SCEVConstant>(Ops[0]);
+ }
+
+ // If we are left with a constant one being multiplied, strip it off.
+ if (cast<SCEVConstant>(Ops[0])->getValue()->equalsInt(1)) {
+ Ops.erase(Ops.begin());
+ --Idx;
+ } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
+ // If we have a multiply of zero, it will always be zero.
+ return Ops[0];
+ }
+ }
+
+ // Skip over the add expression until we get to a multiply.
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
+ ++Idx;
+
+ if (Ops.size() == 1)
+ return Ops[0];
+
+ // If there are mul operands inline them all into this expression.
+ if (Idx < Ops.size()) {
+ bool DeletedMul = false;
+ while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
+ // If we have an mul, expand the mul operands onto the end of the operands
+ // list.
+ Ops.insert(Ops.end(), Mul->op_begin(), Mul->op_end());
+ Ops.erase(Ops.begin()+Idx);
+ DeletedMul = true;
+ }
+
+ // If we deleted at least one mul, we added operands to the end of the list,
+ // and they are not necessarily sorted. Recurse to resort and resimplify
+ // any operands we just aquired.
+ if (DeletedMul)
+ return getMulExpr(Ops);
+ }
+
+ // If there are any add recurrences in the operands list, see if any other
+ // added values are loop invariant. If so, we can fold them into the
+ // recurrence.
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
+ ++Idx;
+
+ // Scan over all recurrences, trying to fold loop invariants into them.
+ for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
+ // Scan all of the other operands to this mul and add them to the vector if
+ // they are loop invariant w.r.t. the recurrence.
+ std::vector<SCEVHandle> LIOps;
+ const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (Ops[i]->isLoopInvariant(AddRec->getLoop())) {
+ LIOps.push_back(Ops[i]);
+ Ops.erase(Ops.begin()+i);
+ --i; --e;
+ }
+
+ // If we found some loop invariants, fold them into the recurrence.
+ if (!LIOps.empty()) {
+ // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step}
+ std::vector<SCEVHandle> NewOps;
+ NewOps.reserve(AddRec->getNumOperands());
+ if (LIOps.size() == 1) {
+ const SCEV *Scale = LIOps[0];
+ for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
+ NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
+ } else {
+ for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
+ std::vector<SCEVHandle> MulOps(LIOps);
+ MulOps.push_back(AddRec->getOperand(i));
+ NewOps.push_back(getMulExpr(MulOps));
+ }
+ }
+
+ SCEVHandle NewRec = getAddRecExpr(NewOps, AddRec->getLoop());
+
+ // If all of the other operands were loop invariant, we are done.
+ if (Ops.size() == 1) return NewRec;
+
+ // Otherwise, multiply the folded AddRec by the non-liv parts.
+ for (unsigned i = 0;; ++i)
+ if (Ops[i] == AddRec) {
+ Ops[i] = NewRec;
+ break;
+ }
+ return getMulExpr(Ops);
+ }
+
+ // Okay, if there weren't any loop invariants to be folded, check to see if
+ // there are multiple AddRec's with the same loop induction variable being
+ // multiplied together. If so, we can fold them.
+ for (unsigned OtherIdx = Idx+1;
+ OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);++OtherIdx)
+ if (OtherIdx != Idx) {
+ const SCEVAddRecExpr *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
+ if (AddRec->getLoop() == OtherAddRec->getLoop()) {
+ // F * G --> {A,+,B} * {C,+,D} --> {A*C,+,F*D + G*B + B*D}
+ const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec;
+ SCEVHandle NewStart = getMulExpr(F->getStart(),
+ G->getStart());
+ SCEVHandle B = F->getStepRecurrence(*this);
+ SCEVHandle D = G->getStepRecurrence(*this);
+ SCEVHandle NewStep = getAddExpr(getMulExpr(F, D),
+ getMulExpr(G, B),
+ getMulExpr(B, D));
+ SCEVHandle NewAddRec = getAddRecExpr(NewStart, NewStep,
+ F->getLoop());
+ if (Ops.size() == 2) return NewAddRec;
+
+ Ops.erase(Ops.begin()+Idx);
+ Ops.erase(Ops.begin()+OtherIdx-1);
+ Ops.push_back(NewAddRec);
+ return getMulExpr(Ops);
+ }
+ }
+
+ // Otherwise couldn't fold anything into this recurrence. Move onto the
+ // next one.
+ }
+
+ // Okay, it looks like we really DO need an mul expr. Check to see if we
+ // already have one, otherwise create a new one.
+ std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end());
+ SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scMulExpr,
+ SCEVOps)];
+ if (Result == 0)
+ Result = new SCEVMulExpr(Ops);
+ return Result;
+}
+
+/// getUDivExpr - Get a canonical multiply expression, or something simpler if
+/// possible.
+SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS,
+ const SCEVHandle &RHS) {
+ assert(getEffectiveSCEVType(LHS->getType()) ==
+ getEffectiveSCEVType(RHS->getType()) &&
+ "SCEVUDivExpr operand types don't match!");
+
+ if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
+ if (RHSC->getValue()->equalsInt(1))
+ return LHS; // X udiv 1 --> x
+ if (RHSC->isZero())
+ return getIntegerSCEV(0, LHS->getType()); // value is undefined
+
+ // Determine if the division can be folded into the operands of
+ // its operands.
+ // TODO: Generalize this to non-constants by using known-bits information.
+ const Type *Ty = LHS->getType();
+ unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
+ unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ;
+ // For non-power-of-two values, effectively round the value up to the
+ // nearest power of two.
+ if (!RHSC->getValue()->getValue().isPowerOf2())
+ ++MaxShiftAmt;
+ const IntegerType *ExtTy =
+ IntegerType::get(getTypeSizeInBits(Ty) + MaxShiftAmt);
+ // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
+ if (const SCEVConstant *Step =
+ dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this)))
+ if (!Step->getValue()->getValue()
+ .urem(RHSC->getValue()->getValue()) &&
+ getZeroExtendExpr(AR, ExtTy) ==
+ getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
+ getZeroExtendExpr(Step, ExtTy),
+ AR->getLoop())) {
+ std::vector<SCEVHandle> Operands;
+ for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
+ Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
+ return getAddRecExpr(Operands, AR->getLoop());
+ }
+ // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
+ std::vector<SCEVHandle> Operands;
+ for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
+ Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
+ if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
+ // Find an operand that's safely divisible.
+ for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
+ SCEVHandle Op = M->getOperand(i);
+ SCEVHandle Div = getUDivExpr(Op, RHSC);
+ if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
+ Operands = M->getOperands();
+ Operands[i] = Div;
+ return getMulExpr(Operands);
+ }
+ }
+ }
+ // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
+ if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(LHS)) {
+ std::vector<SCEVHandle> Operands;
+ for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
+ Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
+ if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
+ Operands.clear();
+ for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
+ SCEVHandle Op = getUDivExpr(A->getOperand(i), RHS);
+ if (isa<SCEVUDivExpr>(Op) || getMulExpr(Op, RHS) != A->getOperand(i))
+ break;
+ Operands.push_back(Op);
+ }
+ if (Operands.size() == A->getNumOperands())
+ return getAddExpr(Operands);
+ }
+ }
+
+ // Fold if both operands are constant.
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
+ Constant *LHSCV = LHSC->getValue();
+ Constant *RHSCV = RHSC->getValue();
+ return getUnknown(ConstantExpr::getUDiv(LHSCV, RHSCV));
+ }
+ }
+
+ SCEVUDivExpr *&Result = (*SCEVUDivs)[std::make_pair(LHS, RHS)];
+ if (Result == 0) Result = new SCEVUDivExpr(LHS, RHS);
+ return Result;
+}
+
+
+/// getAddRecExpr - Get an add recurrence expression for the specified loop.
+/// Simplify the expression as much as possible.
+SCEVHandle ScalarEvolution::getAddRecExpr(const SCEVHandle &Start,
+ const SCEVHandle &Step, const Loop *L) {
+ std::vector<SCEVHandle> Operands;
+ Operands.push_back(Start);
+ if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
+ if (StepChrec->getLoop() == L) {
+ Operands.insert(Operands.end(), StepChrec->op_begin(),
+ StepChrec->op_end());
+ return getAddRecExpr(Operands, L);
+ }
+
+ Operands.push_back(Step);
+ return getAddRecExpr(Operands, L);
+}
+
+/// getAddRecExpr - Get an add recurrence expression for the specified loop.
+/// Simplify the expression as much as possible.
+SCEVHandle ScalarEvolution::getAddRecExpr(std::vector<SCEVHandle> &Operands,
+ const Loop *L) {
+ if (Operands.size() == 1) return Operands[0];
+#ifndef NDEBUG
+ for (unsigned i = 1, e = Operands.size(); i != e; ++i)
+ assert(getEffectiveSCEVType(Operands[i]->getType()) ==
+ getEffectiveSCEVType(Operands[0]->getType()) &&
+ "SCEVAddRecExpr operand types don't match!");
+#endif
+
+ if (Operands.back()->isZero()) {
+ Operands.pop_back();
+ return getAddRecExpr(Operands, L); // {X,+,0} --> X
+ }
+
+ // Canonicalize nested AddRecs in by nesting them in order of loop depth.
+ if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
+ const Loop* NestedLoop = NestedAR->getLoop();
+ if (L->getLoopDepth() < NestedLoop->getLoopDepth()) {
+ std::vector<SCEVHandle> NestedOperands(NestedAR->op_begin(),
+ NestedAR->op_end());
+ SCEVHandle NestedARHandle(NestedAR);
+ Operands[0] = NestedAR->getStart();
+ NestedOperands[0] = getAddRecExpr(Operands, L);
+ return getAddRecExpr(NestedOperands, NestedLoop);
+ }
+ }
+
+ std::vector<const SCEV*> SCEVOps(Operands.begin(), Operands.end());
+ SCEVAddRecExpr *&Result = (*SCEVAddRecExprs)[std::make_pair(L, SCEVOps)];
+ if (Result == 0) Result = new SCEVAddRecExpr(Operands, L);
+ return Result;
+}
+
+SCEVHandle ScalarEvolution::getSMaxExpr(const SCEVHandle &LHS,
+ const SCEVHandle &RHS) {
+ std::vector<SCEVHandle> Ops;
+ Ops.push_back(LHS);
+ Ops.push_back(RHS);
+ return getSMaxExpr(Ops);
+}
+
+SCEVHandle ScalarEvolution::getSMaxExpr(std::vector<SCEVHandle> Ops) {
+ assert(!Ops.empty() && "Cannot get empty smax!");
+ if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+ for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+ assert(getEffectiveSCEVType(Ops[i]->getType()) ==
+ getEffectiveSCEVType(Ops[0]->getType()) &&
+ "SCEVSMaxExpr operand types don't match!");
+#endif
+
+ // Sort by complexity, this groups all similar expression types together.
+ GroupByComplexity(Ops, LI);
+
+ // If there are any constants, fold them together.
+ unsigned Idx = 0;
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+ ++Idx;
+ assert(Idx < Ops.size());
+ while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+ // We found two constants, fold them together!
+ ConstantInt *Fold = ConstantInt::get(
+ APIntOps::smax(LHSC->getValue()->getValue(),
+ RHSC->getValue()->getValue()));
+ Ops[0] = getConstant(Fold);
+ Ops.erase(Ops.begin()+1); // Erase the folded element
+ if (Ops.size() == 1) return Ops[0];
+ LHSC = cast<SCEVConstant>(Ops[0]);
+ }
+
+ // If we are left with a constant -inf, strip it off.
+ if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
+ Ops.erase(Ops.begin());
+ --Idx;
+ }
+ }
+
+ if (Ops.size() == 1) return Ops[0];
+
+ // Find the first SMax
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
+ ++Idx;
+
+ // Check to see if one of the operands is an SMax. If so, expand its operands
+ // onto our operand list, and recurse to simplify.
+ if (Idx < Ops.size()) {
+ bool DeletedSMax = false;
+ while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) {
+ Ops.insert(Ops.end(), SMax->op_begin(), SMax->op_end());
+ Ops.erase(Ops.begin()+Idx);
+ DeletedSMax = true;
+ }
+
+ if (DeletedSMax)
+ return getSMaxExpr(Ops);
+ }
+
+ // Okay, check to see if the same value occurs in the operand list twice. If
+ // so, delete one. Since we sorted the list, these values are required to
+ // be adjacent.
+ for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
+ if (Ops[i] == Ops[i+1]) { // X smax Y smax Y --> X smax Y
+ Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
+ --i; --e;
+ }
+
+ if (Ops.size() == 1) return Ops[0];
+
+ assert(!Ops.empty() && "Reduced smax down to nothing!");
+
+ // Okay, it looks like we really DO need an smax expr. Check to see if we
+ // already have one, otherwise create a new one.
+ std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end());
+ SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scSMaxExpr,
+ SCEVOps)];
+ if (Result == 0) Result = new SCEVSMaxExpr(Ops);
+ return Result;
+}
+
+SCEVHandle ScalarEvolution::getUMaxExpr(const SCEVHandle &LHS,
+ const SCEVHandle &RHS) {
+ std::vector<SCEVHandle> Ops;
+ Ops.push_back(LHS);
+ Ops.push_back(RHS);
+ return getUMaxExpr(Ops);
+}
+
+SCEVHandle ScalarEvolution::getUMaxExpr(std::vector<SCEVHandle> Ops) {
+ assert(!Ops.empty() && "Cannot get empty umax!");
+ if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+ for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+ assert(getEffectiveSCEVType(Ops[i]->getType()) ==
+ getEffectiveSCEVType(Ops[0]->getType()) &&
+ "SCEVUMaxExpr operand types don't match!");
+#endif
+
+ // Sort by complexity, this groups all similar expression types together.
+ GroupByComplexity(Ops, LI);
+
+ // If there are any constants, fold them together.
+ unsigned Idx = 0;
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+ ++Idx;
+ assert(Idx < Ops.size());
+ while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+ // We found two constants, fold them together!
+ ConstantInt *Fold = ConstantInt::get(
+ APIntOps::umax(LHSC->getValue()->getValue(),
+ RHSC->getValue()->getValue()));
+ Ops[0] = getConstant(Fold);
+ Ops.erase(Ops.begin()+1); // Erase the folded element
+ if (Ops.size() == 1) return Ops[0];
+ LHSC = cast<SCEVConstant>(Ops[0]);
+ }
+
+ // If we are left with a constant zero, strip it off.
+ if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
+ Ops.erase(Ops.begin());
+ --Idx;
+ }
+ }
+
+ if (Ops.size() == 1) return Ops[0];
+
+ // Find the first UMax
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
+ ++Idx;
+
+ // Check to see if one of the operands is a UMax. If so, expand its operands
+ // onto our operand list, and recurse to simplify.
+ if (Idx < Ops.size()) {
+ bool DeletedUMax = false;
+ while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) {
+ Ops.insert(Ops.end(), UMax->op_begin(), UMax->op_end());
+ Ops.erase(Ops.begin()+Idx);
+ DeletedUMax = true;
+ }
+
+ if (DeletedUMax)
+ return getUMaxExpr(Ops);
+ }
+
+ // Okay, check to see if the same value occurs in the operand list twice. If
+ // so, delete one. Since we sorted the list, these values are required to
+ // be adjacent.
+ for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
+ if (Ops[i] == Ops[i+1]) { // X umax Y umax Y --> X umax Y
+ Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
+ --i; --e;
+ }
+
+ if (Ops.size() == 1) return Ops[0];
+
+ assert(!Ops.empty() && "Reduced umax down to nothing!");
+
+ // Okay, it looks like we really DO need a umax expr. Check to see if we
+ // already have one, otherwise create a new one.
+ std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end());
+ SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scUMaxExpr,
+ SCEVOps)];
+ if (Result == 0) Result = new SCEVUMaxExpr(Ops);
+ return Result;
+}
+
+SCEVHandle ScalarEvolution::getUnknown(Value *V) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ return getConstant(CI);
+ if (isa<ConstantPointerNull>(V))
+ return getIntegerSCEV(0, V->getType());
+ SCEVUnknown *&Result = (*SCEVUnknowns)[V];
+ if (Result == 0) Result = new SCEVUnknown(V);
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// Basic SCEV Analysis and PHI Idiom Recognition Code
+//
+
+/// isSCEVable - Test if values of the given type are analyzable within
+/// the SCEV framework. This primarily includes integer types, and it
+/// can optionally include pointer types if the ScalarEvolution class
+/// has access to target-specific information.
+bool ScalarEvolution::isSCEVable(const Type *Ty) const {
+ // Integers are always SCEVable.
+ if (Ty->isInteger())
+ return true;
+
+ // Pointers are SCEVable if TargetData information is available
+ // to provide pointer size information.
+ if (isa<PointerType>(Ty))
+ return TD != NULL;
+
+ // Otherwise it's not SCEVable.
+ return false;
+}
+
+/// getTypeSizeInBits - Return the size in bits of the specified type,
+/// for which isSCEVable must return true.
+uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const {
+ assert(isSCEVable(Ty) && "Type is not SCEVable!");
+
+ // If we have a TargetData, use it!
+ if (TD)
+ return TD->getTypeSizeInBits(Ty);
+
+ // Otherwise, we support only integer types.
+ assert(Ty->isInteger() && "isSCEVable permitted a non-SCEVable type!");
+ return Ty->getPrimitiveSizeInBits();
+}
+
+/// getEffectiveSCEVType - Return a type with the same bitwidth as
+/// the given type and which represents how SCEV will treat the given
+/// type, for which isSCEVable must return true. For pointer types,
+/// this is the pointer-sized integer type.
+const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const {
+ assert(isSCEVable(Ty) && "Type is not SCEVable!");
+
+ if (Ty->isInteger())
+ return Ty;
+
+ assert(isa<PointerType>(Ty) && "Unexpected non-pointer non-integer type!");
+ return TD->getIntPtrType();
+}
+
+SCEVHandle ScalarEvolution::getCouldNotCompute() {
+ return UnknownValue;
+}
+
+/// hasSCEV - Return true if the SCEV for this value has already been
+/// computed.
+bool ScalarEvolution::hasSCEV(Value *V) const {
+ return Scalars.count(V);
+}
+
+/// getSCEV - Return an existing SCEV if it exists, otherwise analyze the
+/// expression and create a new one.
+SCEVHandle ScalarEvolution::getSCEV(Value *V) {
+ assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
+
+ std::map<SCEVCallbackVH, SCEVHandle>::iterator I = Scalars.find(V);
+ if (I != Scalars.end()) return I->second;
+ SCEVHandle S = createSCEV(V);
+ Scalars.insert(std::make_pair(SCEVCallbackVH(V, this), S));
+ return S;
+}
+
+/// getIntegerSCEV - Given an integer or FP type, create a constant for the
+/// specified signed integer value and return a SCEV for the constant.
+SCEVHandle ScalarEvolution::getIntegerSCEV(int Val, const Type *Ty) {
+ Ty = getEffectiveSCEVType(Ty);
+ Constant *C;
+ if (Val == 0)
+ C = Constant::getNullValue(Ty);
+ else if (Ty->isFloatingPoint())
+ C = ConstantFP::get(APFloat(Ty==Type::FloatTy ? APFloat::IEEEsingle :
+ APFloat::IEEEdouble, Val));
+ else
+ C = ConstantInt::get(Ty, Val);
+ return getUnknown(C);
+}
+
+/// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
+///
+SCEVHandle ScalarEvolution::getNegativeSCEV(const SCEVHandle &V) {
+ if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
+ return getUnknown(ConstantExpr::getNeg(VC->getValue()));
+
+ const Type *Ty = V->getType();
+ Ty = getEffectiveSCEVType(Ty);
+ return getMulExpr(V, getConstant(ConstantInt::getAllOnesValue(Ty)));
+}
+
+/// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
+SCEVHandle ScalarEvolution::getNotSCEV(const SCEVHandle &V) {
+ if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
+ return getUnknown(ConstantExpr::getNot(VC->getValue()));
+
+ const Type *Ty = V->getType();
+ Ty = getEffectiveSCEVType(Ty);
+ SCEVHandle AllOnes = getConstant(ConstantInt::getAllOnesValue(Ty));
+ return getMinusSCEV(AllOnes, V);
+}
+
+/// getMinusSCEV - Return a SCEV corresponding to LHS - RHS.
+///
+SCEVHandle ScalarEvolution::getMinusSCEV(const SCEVHandle &LHS,
+ const SCEVHandle &RHS) {
+ // X - Y --> X + -Y
+ return getAddExpr(LHS, getNegativeSCEV(RHS));
+}
+
+/// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type. If the type must be extended, it is zero
+/// extended.
+SCEVHandle
+ScalarEvolution::getTruncateOrZeroExtend(const SCEVHandle &V,
+ const Type *Ty) {
+ const Type *SrcTy = V->getType();
+ assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) &&
+ (Ty->isInteger() || (TD && isa<PointerType>(Ty))) &&
+ "Cannot truncate or zero extend with non-integer arguments!");
+ if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+ return V; // No conversion
+ if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
+ return getTruncateExpr(V, Ty);
+ return getZeroExtendExpr(V, Ty);
+}
+
+/// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type. If the type must be extended, it is sign
+/// extended.
+SCEVHandle
+ScalarEvolution::getTruncateOrSignExtend(const SCEVHandle &V,
+ const Type *Ty) {
+ const Type *SrcTy = V->getType();
+ assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) &&
+ (Ty->isInteger() || (TD && isa<PointerType>(Ty))) &&
+ "Cannot truncate or zero extend with non-integer arguments!");
+ if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+ return V; // No conversion
+ if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
+ return getTruncateExpr(V, Ty);
+ return getSignExtendExpr(V, Ty);
+}
+
+/// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type. If the type must be extended, it is zero
+/// extended. The conversion must not be narrowing.
+SCEVHandle
+ScalarEvolution::getNoopOrZeroExtend(const SCEVHandle &V, const Type *Ty) {
+ const Type *SrcTy = V->getType();
+ assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) &&
+ (Ty->isInteger() || (TD && isa<PointerType>(Ty))) &&
+ "Cannot noop or zero extend with non-integer arguments!");
+ assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
+ "getNoopOrZeroExtend cannot truncate!");
+ if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+ return V; // No conversion
+ return getZeroExtendExpr(V, Ty);
+}
+
+/// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type. If the type must be extended, it is sign
+/// extended. The conversion must not be narrowing.
+SCEVHandle
+ScalarEvolution::getNoopOrSignExtend(const SCEVHandle &V, const Type *Ty) {
+ const Type *SrcTy = V->getType();
+ assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) &&
+ (Ty->isInteger() || (TD && isa<PointerType>(Ty))) &&
+ "Cannot noop or sign extend with non-integer arguments!");
+ assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
+ "getNoopOrSignExtend cannot truncate!");
+ if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+ return V; // No conversion
+ return getSignExtendExpr(V, Ty);
+}
+
+/// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type. The conversion must not be widening.
+SCEVHandle
+ScalarEvolution::getTruncateOrNoop(const SCEVHandle &V, const Type *Ty) {
+ const Type *SrcTy = V->getType();
+ assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) &&
+ (Ty->isInteger() || (TD && isa<PointerType>(Ty))) &&
+ "Cannot truncate or noop with non-integer arguments!");
+ assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
+ "getTruncateOrNoop cannot extend!");
+ if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+ return V; // No conversion
+ return getTruncateExpr(V, Ty);
+}
+
+/// ReplaceSymbolicValueWithConcrete - This looks up the computed SCEV value for
+/// the specified instruction and replaces any references to the symbolic value
+/// SymName with the specified value. This is used during PHI resolution.
+void ScalarEvolution::
+ReplaceSymbolicValueWithConcrete(Instruction *I, const SCEVHandle &SymName,
+ const SCEVHandle &NewVal) {
+ std::map<SCEVCallbackVH, SCEVHandle>::iterator SI =
+ Scalars.find(SCEVCallbackVH(I, this));
+ if (SI == Scalars.end()) return;
+
+ SCEVHandle NV =
+ SI->second->replaceSymbolicValuesWithConcrete(SymName, NewVal, *this);
+ if (NV == SI->second) return; // No change.
+
+ SI->second = NV; // Update the scalars map!
+
+ // Any instruction values that use this instruction might also need to be
+ // updated!
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI)
+ ReplaceSymbolicValueWithConcrete(cast<Instruction>(*UI), SymName, NewVal);
+}
+
+/// createNodeForPHI - PHI nodes have two cases. Either the PHI node exists in
+/// a loop header, making it a potential recurrence, or it doesn't.
+///
+SCEVHandle ScalarEvolution::createNodeForPHI(PHINode *PN) {
+ if (PN->getNumIncomingValues() == 2) // The loops have been canonicalized.
+ if (const Loop *L = LI->getLoopFor(PN->getParent()))
+ if (L->getHeader() == PN->getParent()) {
+ // If it lives in the loop header, it has two incoming values, one
+ // from outside the loop, and one from inside.
+ unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
+ unsigned BackEdge = IncomingEdge^1;
+
+ // While we are analyzing this PHI node, handle its value symbolically.
+ SCEVHandle SymbolicName = getUnknown(PN);
+ assert(Scalars.find(PN) == Scalars.end() &&
+ "PHI node already processed?");
+ Scalars.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
+
+ // Using this symbolic name for the PHI, analyze the value coming around
+ // the back-edge.
+ SCEVHandle BEValue = getSCEV(PN->getIncomingValue(BackEdge));
+
+ // NOTE: If BEValue is loop invariant, we know that the PHI node just
+ // has a special value for the first iteration of the loop.
+
+ // If the value coming around the backedge is an add with the symbolic
+ // value we just inserted, then we found a simple induction variable!
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
+ // If there is a single occurrence of the symbolic value, replace it
+ // with a recurrence.
+ unsigned FoundIndex = Add->getNumOperands();
+ for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+ if (Add->getOperand(i) == SymbolicName)
+ if (FoundIndex == e) {
+ FoundIndex = i;
+ break;
+ }
+
+ if (FoundIndex != Add->getNumOperands()) {
+ // Create an add with everything but the specified operand.
+ std::vector<SCEVHandle> Ops;
+ for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+ if (i != FoundIndex)
+ Ops.push_back(Add->getOperand(i));
+ SCEVHandle Accum = getAddExpr(Ops);
+
+ // This is not a valid addrec if the step amount is varying each
+ // loop iteration, but is not itself an addrec in this loop.
+ if (Accum->isLoopInvariant(L) ||
+ (isa<SCEVAddRecExpr>(Accum) &&
+ cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
+ SCEVHandle StartVal = getSCEV(PN->getIncomingValue(IncomingEdge));
+ SCEVHandle PHISCEV = getAddRecExpr(StartVal, Accum, L);
+
+ // Okay, for the entire analysis of this edge we assumed the PHI
+ // to be symbolic. We now need to go back and update all of the
+ // entries for the scalars that use the PHI (except for the PHI
+ // itself) to use the new analyzed value instead of the "symbolic"
+ // value.
+ ReplaceSymbolicValueWithConcrete(PN, SymbolicName, PHISCEV);
+ return PHISCEV;
+ }
+ }
+ } else if (const SCEVAddRecExpr *AddRec =
+ dyn_cast<SCEVAddRecExpr>(BEValue)) {
+ // Otherwise, this could be a loop like this:
+ // i = 0; for (j = 1; ..; ++j) { .... i = j; }
+ // In this case, j = {1,+,1} and BEValue is j.
+ // Because the other in-value of i (0) fits the evolution of BEValue
+ // i really is an addrec evolution.
+ if (AddRec->getLoop() == L && AddRec->isAffine()) {
+ SCEVHandle StartVal = getSCEV(PN->getIncomingValue(IncomingEdge));
+
+ // If StartVal = j.start - j.stride, we can use StartVal as the
+ // initial step of the addrec evolution.
+ if (StartVal == getMinusSCEV(AddRec->getOperand(0),
+ AddRec->getOperand(1))) {
+ SCEVHandle PHISCEV =
+ getAddRecExpr(StartVal, AddRec->getOperand(1), L);
+
+ // Okay, for the entire analysis of this edge we assumed the PHI
+ // to be symbolic. We now need to go back and update all of the
+ // entries for the scalars that use the PHI (except for the PHI
+ // itself) to use the new analyzed value instead of the "symbolic"
+ // value.
+ ReplaceSymbolicValueWithConcrete(PN, SymbolicName, PHISCEV);
+ return PHISCEV;
+ }
+ }
+ }
+
+ return SymbolicName;
+ }
+
+ // If it's not a loop phi, we can't handle it yet.
+ return getUnknown(PN);
+}
+
+/// createNodeForGEP - Expand GEP instructions into add and multiply
+/// operations. This allows them to be analyzed by regular SCEV code.
+///
+SCEVHandle ScalarEvolution::createNodeForGEP(User *GEP) {
+
+ const Type *IntPtrTy = TD->getIntPtrType();
+ Value *Base = GEP->getOperand(0);
+ // Don't attempt to analyze GEPs over unsized objects.
+ if (!cast<PointerType>(Base->getType())->getElementType()->isSized())
+ return getUnknown(GEP);
+ SCEVHandle TotalOffset = getIntegerSCEV(0, IntPtrTy);
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (GetElementPtrInst::op_iterator I = next(GEP->op_begin()),
+ E = GEP->op_end();
+ I != E; ++I) {
+ Value *Index = *I;
+ // Compute the (potentially symbolic) offset in bytes for this index.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI++)) {
+ // For a struct, add the member offset.
+ const StructLayout &SL = *TD->getStructLayout(STy);
+ unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
+ uint64_t Offset = SL.getElementOffset(FieldNo);
+ TotalOffset = getAddExpr(TotalOffset,
+ getIntegerSCEV(Offset, IntPtrTy));
+ } else {
+ // For an array, add the element offset, explicitly scaled.
+ SCEVHandle LocalOffset = getSCEV(Index);
+ if (!isa<PointerType>(LocalOffset->getType()))
+ // Getelementptr indicies are signed.
+ LocalOffset = getTruncateOrSignExtend(LocalOffset,
+ IntPtrTy);
+ LocalOffset =
+ getMulExpr(LocalOffset,
+ getIntegerSCEV(TD->getTypeAllocSize(*GTI),
+ IntPtrTy));
+ TotalOffset = getAddExpr(TotalOffset, LocalOffset);
+ }
+ }
+ return getAddExpr(getSCEV(Base), TotalOffset);
+}
+
+/// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
+/// guaranteed to end in (at every loop iteration). It is, at the same time,
+/// the minimum number of times S is divisible by 2. For example, given {4,+,8}
+/// it returns 2. If S is guaranteed to be 0, it returns the bitwidth of S.
+static uint32_t GetMinTrailingZeros(SCEVHandle S, const ScalarEvolution &SE) {
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+ return C->getValue()->getValue().countTrailingZeros();
+
+ if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
+ return std::min(GetMinTrailingZeros(T->getOperand(), SE),
+ (uint32_t)SE.getTypeSizeInBits(T->getType()));
+
+ if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
+ uint32_t OpRes = GetMinTrailingZeros(E->getOperand(), SE);
+ return OpRes == SE.getTypeSizeInBits(E->getOperand()->getType()) ?
+ SE.getTypeSizeInBits(E->getType()) : OpRes;
+ }
+
+ if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
+ uint32_t OpRes = GetMinTrailingZeros(E->getOperand(), SE);
+ return OpRes == SE.getTypeSizeInBits(E->getOperand()->getType()) ?
+ SE.getTypeSizeInBits(E->getType()) : OpRes;
+ }
+
+ if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
+ // The result is the min of all operands results.
+ uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0), SE);
+ for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
+ MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i), SE));
+ return MinOpRes;
+ }
+
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
+ // The result is the sum of all operands results.
+ uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0), SE);
+ uint32_t BitWidth = SE.getTypeSizeInBits(M->getType());
+ for (unsigned i = 1, e = M->getNumOperands();
+ SumOpRes != BitWidth && i != e; ++i)
+ SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i), SE),
+ BitWidth);
+ return SumOpRes;
+ }
+
+ if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
+ // The result is the min of all operands results.
+ uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0), SE);
+ for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
+ MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i), SE));
+ return MinOpRes;
+ }
+
+ if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) {
+ // The result is the min of all operands results.
+ uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0), SE);
+ for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
+ MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i), SE));
+ return MinOpRes;
+ }
+
+ if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) {
+ // The result is the min of all operands results.
+ uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0), SE);
+ for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
+ MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i), SE));
+ return MinOpRes;
+ }
+
+ // SCEVUDivExpr, SCEVUnknown
+ return 0;
+}
+
+/// createSCEV - We know that there is no SCEV for the specified value.
+/// Analyze the expression.
+///
+SCEVHandle ScalarEvolution::createSCEV(Value *V) {
+ if (!isSCEVable(V->getType()))
+ return getUnknown(V);
+
+ unsigned Opcode = Instruction::UserOp1;
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ Opcode = I->getOpcode();
+ else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ Opcode = CE->getOpcode();
+ else
+ return getUnknown(V);
+
+ User *U = cast<User>(V);
+ switch (Opcode) {
+ case Instruction::Add:
+ return getAddExpr(getSCEV(U->getOperand(0)),
+ getSCEV(U->getOperand(1)));
+ case Instruction::Mul:
+ return getMulExpr(getSCEV(U->getOperand(0)),
+ getSCEV(U->getOperand(1)));
+ case Instruction::UDiv:
+ return getUDivExpr(getSCEV(U->getOperand(0)),
+ getSCEV(U->getOperand(1)));
+ case Instruction::Sub:
+ return getMinusSCEV(getSCEV(U->getOperand(0)),
+ getSCEV(U->getOperand(1)));
+ case Instruction::And:
+ // For an expression like x&255 that merely masks off the high bits,
+ // use zext(trunc(x)) as the SCEV expression.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ if (CI->isNullValue())
+ return getSCEV(U->getOperand(1));
+ if (CI->isAllOnesValue())
+ return getSCEV(U->getOperand(0));
+ const APInt &A = CI->getValue();
+ unsigned Ones = A.countTrailingOnes();
+ if (APIntOps::isMask(Ones, A))
+ return
+ getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)),
+ IntegerType::get(Ones)),
+ U->getType());
+ }
+ break;
+ case Instruction::Or:
+ // If the RHS of the Or is a constant, we may have something like:
+ // X*4+1 which got turned into X*4|1. Handle this as an Add so loop
+ // optimizations will transparently handle this case.
+ //
+ // In order for this transformation to be safe, the LHS must be of the
+ // form X*(2^n) and the Or constant must be less than 2^n.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ SCEVHandle LHS = getSCEV(U->getOperand(0));
+ const APInt &CIVal = CI->getValue();
+ if (GetMinTrailingZeros(LHS, *this) >=
+ (CIVal.getBitWidth() - CIVal.countLeadingZeros()))
+ return getAddExpr(LHS, getSCEV(U->getOperand(1)));
+ }
+ break;
+ case Instruction::Xor:
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ // If the RHS of the xor is a signbit, then this is just an add.
+ // Instcombine turns add of signbit into xor as a strength reduction step.
+ if (CI->getValue().isSignBit())
+ return getAddExpr(getSCEV(U->getOperand(0)),
+ getSCEV(U->getOperand(1)));
+
+ // If the RHS of xor is -1, then this is a not operation.
+ if (CI->isAllOnesValue())
+ return getNotSCEV(getSCEV(U->getOperand(0)));
+
+ // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask.
+ // This is a variant of the check for xor with -1, and it handles
+ // the case where instcombine has trimmed non-demanded bits out
+ // of an xor with -1.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0)))
+ if (ConstantInt *LCI = dyn_cast<ConstantInt>(BO->getOperand(1)))
+ if (BO->getOpcode() == Instruction::And &&
+ LCI->getValue() == CI->getValue())
+ if (const SCEVZeroExtendExpr *Z =
+ dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0))))
+ return getZeroExtendExpr(getNotSCEV(Z->getOperand()),
+ U->getType());
+ }
+ break;
+
+ case Instruction::Shl:
+ // Turn shift left of a constant amount into a multiply.
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+ Constant *X = ConstantInt::get(
+ APInt(BitWidth, 1).shl(SA->getLimitedValue(BitWidth)));
+ return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
+ }
+ break;
+
+ case Instruction::LShr:
+ // Turn logical shift right of a constant into a unsigned divide.
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+ Constant *X = ConstantInt::get(
+ APInt(BitWidth, 1).shl(SA->getLimitedValue(BitWidth)));
+ return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X));
+ }
+ break;
+
+ case Instruction::AShr:
+ // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1)))
+ if (Instruction *L = dyn_cast<Instruction>(U->getOperand(0)))
+ if (L->getOpcode() == Instruction::Shl &&
+ L->getOperand(1) == U->getOperand(1)) {
+ unsigned BitWidth = getTypeSizeInBits(U->getType());
+ uint64_t Amt = BitWidth - CI->getZExtValue();
+ if (Amt == BitWidth)
+ return getSCEV(L->getOperand(0)); // shift by zero --> noop
+ if (Amt > BitWidth)
+ return getIntegerSCEV(0, U->getType()); // value is undefined
+ return
+ getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)),
+ IntegerType::get(Amt)),
+ U->getType());
+ }
+ break;
+
+ case Instruction::Trunc:
+ return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType());
+
+ case Instruction::ZExt:
+ return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType());
+
+ case Instruction::SExt:
+ return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType());
+
+ case Instruction::BitCast:
+ // BitCasts are no-op casts so we just eliminate the cast.
+ if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType()))
+ return getSCEV(U->getOperand(0));
+ break;
+
+ case Instruction::IntToPtr:
+ if (!TD) break; // Without TD we can't analyze pointers.
+ return getTruncateOrZeroExtend(getSCEV(U->getOperand(0)),
+ TD->getIntPtrType());
+
+ case Instruction::PtrToInt:
+ if (!TD) break; // Without TD we can't analyze pointers.
+ return getTruncateOrZeroExtend(getSCEV(U->getOperand(0)),
+ U->getType());
+
+ case Instruction::GetElementPtr:
+ if (!TD) break; // Without TD we can't analyze pointers.
+ return createNodeForGEP(U);
+
+ case Instruction::PHI:
+ return createNodeForPHI(cast<PHINode>(U));
+
+ case Instruction::Select:
+ // This could be a smax or umax that was lowered earlier.
+ // Try to recover it.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(U->getOperand(0))) {
+ Value *LHS = ICI->getOperand(0);
+ Value *RHS = ICI->getOperand(1);
+ switch (ICI->getPredicate()) {
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ std::swap(LHS, RHS);
+ // fall through
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ if (LHS == U->getOperand(1) && RHS == U->getOperand(2))
+ return getSMaxExpr(getSCEV(LHS), getSCEV(RHS));
+ else if (LHS == U->getOperand(2) && RHS == U->getOperand(1))
+ // ~smax(~x, ~y) == smin(x, y).
+ return getNotSCEV(getSMaxExpr(
+ getNotSCEV(getSCEV(LHS)),
+ getNotSCEV(getSCEV(RHS))));
+ break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ std::swap(LHS, RHS);
+ // fall through
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ if (LHS == U->getOperand(1) && RHS == U->getOperand(2))
+ return getUMaxExpr(getSCEV(LHS), getSCEV(RHS));
+ else if (LHS == U->getOperand(2) && RHS == U->getOperand(1))
+ // ~umax(~x, ~y) == umin(x, y)
+ return getNotSCEV(getUMaxExpr(getNotSCEV(getSCEV(LHS)),
+ getNotSCEV(getSCEV(RHS))));
+ break;
+ default:
+ break;
+ }
+ }
+
+ default: // We cannot analyze this expression.
+ break;
+ }
+
+ return getUnknown(V);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Iteration Count Computation Code
+//
+
+/// getBackedgeTakenCount - If the specified loop has a predictable
+/// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
+/// object. The backedge-taken count is the number of times the loop header
+/// will be branched to from within the loop. This is one less than the
+/// trip count of the loop, since it doesn't count the first iteration,
+/// when the header is branched to from outside the loop.
+///
+/// Note that it is not valid to call this method on a loop without a
+/// loop-invariant backedge-taken count (see
+/// hasLoopInvariantBackedgeTakenCount).
+///
+SCEVHandle ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
+ return getBackedgeTakenInfo(L).Exact;
+}
+
+/// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
+/// return the least SCEV value that is known never to be less than the
+/// actual backedge taken count.
+SCEVHandle ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
+ return getBackedgeTakenInfo(L).Max;
+}
+
+const ScalarEvolution::BackedgeTakenInfo &
+ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
+ // Initially insert a CouldNotCompute for this loop. If the insertion
+ // succeeds, procede to actually compute a backedge-taken count and
+ // update the value. The temporary CouldNotCompute value tells SCEV
+ // code elsewhere that it shouldn't attempt to request a new
+ // backedge-taken count, which could result in infinite recursion.
+ std::pair<std::map<const Loop*, BackedgeTakenInfo>::iterator, bool> Pair =
+ BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute()));
+ if (Pair.second) {
+ BackedgeTakenInfo ItCount = ComputeBackedgeTakenCount(L);
+ if (ItCount.Exact != UnknownValue) {
+ assert(ItCount.Exact->isLoopInvariant(L) &&
+ ItCount.Max->isLoopInvariant(L) &&
+ "Computed trip count isn't loop invariant for loop!");
+ ++NumTripCountsComputed;
+
+ // Update the value in the map.
+ Pair.first->second = ItCount;
+ } else if (isa<PHINode>(L->getHeader()->begin())) {
+ // Only count loops that have phi nodes as not being computable.
+ ++NumTripCountsNotComputed;
+ }
+
+ // Now that we know more about the trip count for this loop, forget any
+ // existing SCEV values for PHI nodes in this loop since they are only
+ // conservative estimates made without the benefit
+ // of trip count information.
+ if (ItCount.hasAnyInfo())
+ forgetLoopPHIs(L);
+ }
+ return Pair.first->second;
+}
+
+/// forgetLoopBackedgeTakenCount - This method should be called by the
+/// client when it has changed a loop in a way that may effect
+/// ScalarEvolution's ability to compute a trip count, or if the loop
+/// is deleted.
+void ScalarEvolution::forgetLoopBackedgeTakenCount(const Loop *L) {
+ BackedgeTakenCounts.erase(L);
+ forgetLoopPHIs(L);
+}
+
+/// forgetLoopPHIs - Delete the memoized SCEVs associated with the
+/// PHI nodes in the given loop. This is used when the trip count of
+/// the loop may have changed.
+void ScalarEvolution::forgetLoopPHIs(const Loop *L) {
+ BasicBlock *Header = L->getHeader();
+
+ // Push all Loop-header PHIs onto the Worklist stack, except those
+ // that are presently represented via a SCEVUnknown. SCEVUnknown for
+ // a PHI either means that it has an unrecognized structure, or it's
+ // a PHI that's in the progress of being computed by createNodeForPHI.
+ // In the former case, additional loop trip count information isn't
+ // going to change anything. In the later case, createNodeForPHI will
+ // perform the necessary updates on its own when it gets to that point.
+ SmallVector<Instruction *, 16> Worklist;
+ for (BasicBlock::iterator I = Header->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ std::map<SCEVCallbackVH, SCEVHandle>::iterator It = Scalars.find((Value*)I);
+ if (It != Scalars.end() && !isa<SCEVUnknown>(It->second))
+ Worklist.push_back(PN);
+ }
+
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+ if (Scalars.erase(I))
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ Worklist.push_back(cast<Instruction>(UI));
+ }
+}
+
+/// ComputeBackedgeTakenCount - Compute the number of times the backedge
+/// of the specified loop will execute.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
+ // If the loop has a non-one exit block count, we can't analyze it.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ if (ExitBlocks.size() != 1) return UnknownValue;
+
+ // Okay, there is one exit block. Try to find the condition that causes the
+ // loop to be exited.
+ BasicBlock *ExitBlock = ExitBlocks[0];
+
+ BasicBlock *ExitingBlock = 0;
+ for (pred_iterator PI = pred_begin(ExitBlock), E = pred_end(ExitBlock);
+ PI != E; ++PI)
+ if (L->contains(*PI)) {
+ if (ExitingBlock == 0)
+ ExitingBlock = *PI;
+ else
+ return UnknownValue; // More than one block exiting!
+ }
+ assert(ExitingBlock && "No exits from loop, something is broken!");
+
+ // Okay, we've computed the exiting block. See what condition causes us to
+ // exit.
+ //
+ // FIXME: we should be able to handle switch instructions (with a single exit)
+ BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (ExitBr == 0) return UnknownValue;
+ assert(ExitBr->isConditional() && "If unconditional, it can't be in loop!");
+
+ // At this point, we know we have a conditional branch that determines whether
+ // the loop is exited. However, we don't know if the branch is executed each
+ // time through the loop. If not, then the execution count of the branch will
+ // not be equal to the trip count of the loop.
+ //
+ // Currently we check for this by checking to see if the Exit branch goes to
+ // the loop header. If so, we know it will always execute the same number of
+ // times as the loop. We also handle the case where the exit block *is* the
+ // loop header. This is common for un-rotated loops. More extensive analysis
+ // could be done to handle more cases here.
+ if (ExitBr->getSuccessor(0) != L->getHeader() &&
+ ExitBr->getSuccessor(1) != L->getHeader() &&
+ ExitBr->getParent() != L->getHeader())
+ return UnknownValue;
+
+ ICmpInst *ExitCond = dyn_cast<ICmpInst>(ExitBr->getCondition());
+
+ // If it's not an integer or pointer comparison then compute it the hard way.
+ if (ExitCond == 0)
+ return ComputeBackedgeTakenCountExhaustively(L, ExitBr->getCondition(),
+ ExitBr->getSuccessor(0) == ExitBlock);
+
+ // If the condition was exit on true, convert the condition to exit on false
+ ICmpInst::Predicate Cond;
+ if (ExitBr->getSuccessor(1) == ExitBlock)
+ Cond = ExitCond->getPredicate();
+ else
+ Cond = ExitCond->getInversePredicate();
+
+ // Handle common loops like: for (X = "string"; *X; ++X)
+ if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
+ if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
+ SCEVHandle ItCnt =
+ ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond);
+ if (!isa<SCEVCouldNotCompute>(ItCnt)) return ItCnt;
+ }
+
+ SCEVHandle LHS = getSCEV(ExitCond->getOperand(0));
+ SCEVHandle RHS = getSCEV(ExitCond->getOperand(1));
+
+ // Try to evaluate any dependencies out of the loop.
+ LHS = getSCEVAtScope(LHS, L);
+ RHS = getSCEVAtScope(RHS, L);
+
+ // At this point, we would like to compute how many iterations of the
+ // loop the predicate will return true for these inputs.
+ if (LHS->isLoopInvariant(L) && !RHS->isLoopInvariant(L)) {
+ // If there is a loop-invariant, force it into the RHS.
+ std::swap(LHS, RHS);
+ Cond = ICmpInst::getSwappedPredicate(Cond);
+ }
+
+ // If we have a comparison of a chrec against a constant, try to use value
+ // ranges to answer this query.
+ if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS))
+ if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS))
+ if (AddRec->getLoop() == L) {
+ // Form the constant range.
+ ConstantRange CompRange(
+ ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue()));
+
+ SCEVHandle Ret = AddRec->getNumIterationsInRange(CompRange, *this);
+ if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
+ }
+
+ switch (Cond) {
+ case ICmpInst::ICMP_NE: { // while (X != Y)
+ // Convert to: while (X-Y != 0)
+ SCEVHandle TC = HowFarToZero(getMinusSCEV(LHS, RHS), L);
+ if (!isa<SCEVCouldNotCompute>(TC)) return TC;
+ break;
+ }
+ case ICmpInst::ICMP_EQ: {
+ // Convert to: while (X-Y == 0) // while (X == Y)
+ SCEVHandle TC = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
+ if (!isa<SCEVCouldNotCompute>(TC)) return TC;
+ break;
+ }
+ case ICmpInst::ICMP_SLT: {
+ BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, true);
+ if (BTI.hasAnyInfo()) return BTI;
+ break;
+ }
+ case ICmpInst::ICMP_SGT: {
+ BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS),
+ getNotSCEV(RHS), L, true);
+ if (BTI.hasAnyInfo()) return BTI;
+ break;
+ }
+ case ICmpInst::ICMP_ULT: {
+ BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, false);
+ if (BTI.hasAnyInfo()) return BTI;
+ break;
+ }
+ case ICmpInst::ICMP_UGT: {
+ BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS),
+ getNotSCEV(RHS), L, false);
+ if (BTI.hasAnyInfo()) return BTI;
+ break;
+ }
+ default:
+#if 0
+ errs() << "ComputeBackedgeTakenCount ";
+ if (ExitCond->getOperand(0)->getType()->isUnsigned())
+ errs() << "[unsigned] ";
+ errs() << *LHS << " "
+ << Instruction::getOpcodeName(Instruction::ICmp)
+ << " " << *RHS << "\n";
+#endif
+ break;
+ }
+ return
+ ComputeBackedgeTakenCountExhaustively(L, ExitCond,
+ ExitBr->getSuccessor(0) == ExitBlock);
+}
+
+static ConstantInt *
+EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
+ ScalarEvolution &SE) {
+ SCEVHandle InVal = SE.getConstant(C);
+ SCEVHandle Val = AddRec->evaluateAtIteration(InVal, SE);
+ assert(isa<SCEVConstant>(Val) &&
+ "Evaluation of SCEV at constant didn't fold correctly?");
+ return cast<SCEVConstant>(Val)->getValue();
+}
+
+/// GetAddressedElementFromGlobal - Given a global variable with an initializer
+/// and a GEP expression (missing the pointer index) indexing into it, return
+/// the addressed element of the initializer or null if the index expression is
+/// invalid.
+static Constant *
+GetAddressedElementFromGlobal(GlobalVariable *GV,
+ const std::vector<ConstantInt*> &Indices) {
+ Constant *Init = GV->getInitializer();
+ for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
+ uint64_t Idx = Indices[i]->getZExtValue();
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) {
+ assert(Idx < CS->getNumOperands() && "Bad struct index!");
+ Init = cast<Constant>(CS->getOperand(Idx));
+ } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
+ if (Idx >= CA->getNumOperands()) return 0; // Bogus program
+ Init = cast<Constant>(CA->getOperand(Idx));
+ } else if (isa<ConstantAggregateZero>(Init)) {
+ if (const StructType *STy = dyn_cast<StructType>(Init->getType())) {
+ assert(Idx < STy->getNumElements() && "Bad struct index!");
+ Init = Constant::getNullValue(STy->getElementType(Idx));
+ } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) {
+ if (Idx >= ATy->getNumElements()) return 0; // Bogus program
+ Init = Constant::getNullValue(ATy->getElementType());
+ } else {
+ assert(0 && "Unknown constant aggregate type!");
+ }
+ return 0;
+ } else {
+ return 0; // Unknown initializer type
+ }
+ }
+ return Init;
+}
+
+/// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition of
+/// 'icmp op load X, cst', try to see if we can compute the backedge
+/// execution count.
+SCEVHandle ScalarEvolution::
+ComputeLoadConstantCompareBackedgeTakenCount(LoadInst *LI, Constant *RHS,
+ const Loop *L,
+ ICmpInst::Predicate predicate) {
+ if (LI->isVolatile()) return UnknownValue;
+
+ // Check to see if the loaded pointer is a getelementptr of a global.
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
+ if (!GEP) return UnknownValue;
+
+ // Make sure that it is really a constant global we are gepping, with an
+ // initializer, and make sure the first IDX is really 0.
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
+ if (!GV || !GV->isConstant() || !GV->hasInitializer() ||
+ GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) ||
+ !cast<Constant>(GEP->getOperand(1))->isNullValue())
+ return UnknownValue;
+
+ // Okay, we allow one non-constant index into the GEP instruction.
+ Value *VarIdx = 0;
+ std::vector<ConstantInt*> Indexes;
+ unsigned VarIdxNum = 0;
+ for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
+ Indexes.push_back(CI);
+ } else if (!isa<ConstantInt>(GEP->getOperand(i))) {
+ if (VarIdx) return UnknownValue; // Multiple non-constant idx's.
+ VarIdx = GEP->getOperand(i);
+ VarIdxNum = i-2;
+ Indexes.push_back(0);
+ }
+
+ // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
+ // Check to see if X is a loop variant variable value now.
+ SCEVHandle Idx = getSCEV(VarIdx);
+ Idx = getSCEVAtScope(Idx, L);
+
+ // We can only recognize very limited forms of loop index expressions, in
+ // particular, only affine AddRec's like {C1,+,C2}.
+ const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx);
+ if (!IdxExpr || !IdxExpr->isAffine() || IdxExpr->isLoopInvariant(L) ||
+ !isa<SCEVConstant>(IdxExpr->getOperand(0)) ||
+ !isa<SCEVConstant>(IdxExpr->getOperand(1)))
+ return UnknownValue;
+
+ unsigned MaxSteps = MaxBruteForceIterations;
+ for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
+ ConstantInt *ItCst =
+ ConstantInt::get(IdxExpr->getType(), IterationNum);
+ ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this);
+
+ // Form the GEP offset.
+ Indexes[VarIdxNum] = Val;
+
+ Constant *Result = GetAddressedElementFromGlobal(GV, Indexes);
+ if (Result == 0) break; // Cannot compute!
+
+ // Evaluate the condition for this iteration.
+ Result = ConstantExpr::getICmp(predicate, Result, RHS);
+ if (!isa<ConstantInt>(Result)) break; // Couldn't decide for sure
+ if (cast<ConstantInt>(Result)->getValue().isMinValue()) {
+#if 0
+ errs() << "\n***\n*** Computed loop count " << *ItCst
+ << "\n*** From global " << *GV << "*** BB: " << *L->getHeader()
+ << "***\n";
+#endif
+ ++NumArrayLenItCounts;
+ return getConstant(ItCst); // Found terminating iteration!
+ }
+ }
+ return UnknownValue;
+}
+
+
+/// CanConstantFold - Return true if we can constant fold an instruction of the
+/// specified type, assuming that all operands were constants.
+static bool CanConstantFold(const Instruction *I) {
+ if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
+ isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I))
+ return true;
+
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (const Function *F = CI->getCalledFunction())
+ return canConstantFoldCallTo(F);
+ return false;
+}
+
+/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
+/// in the loop that V is derived from. We allow arbitrary operations along the
+/// way, but the operands of an operation must either be constants or a value
+/// derived from a constant PHI. If this expression does not fit with these
+/// constraints, return null.
+static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
+ // If this is not an instruction, or if this is an instruction outside of the
+ // loop, it can't be derived from a loop PHI.
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0 || !L->contains(I->getParent())) return 0;
+
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ if (L->getHeader() == I->getParent())
+ return PN;
+ else
+ // We don't currently keep track of the control flow needed to evaluate
+ // PHIs, so we cannot handle PHIs inside of loops.
+ return 0;
+ }
+
+ // If we won't be able to constant fold this expression even if the operands
+ // are constants, return early.
+ if (!CanConstantFold(I)) return 0;
+
+ // Otherwise, we can evaluate this instruction if all of its operands are
+ // constant or derived from a PHI node themselves.
+ PHINode *PHI = 0;
+ for (unsigned Op = 0, e = I->getNumOperands(); Op != e; ++Op)
+ if (!(isa<Constant>(I->getOperand(Op)) ||
+ isa<GlobalValue>(I->getOperand(Op)))) {
+ PHINode *P = getConstantEvolvingPHI(I->getOperand(Op), L);
+ if (P == 0) return 0; // Not evolving from PHI
+ if (PHI == 0)
+ PHI = P;
+ else if (PHI != P)
+ return 0; // Evolving from multiple different PHIs.
+ }
+
+ // This is a expression evolving from a constant PHI!
+ return PHI;
+}
+
+/// EvaluateExpression - Given an expression that passes the
+/// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
+/// in the loop has the value PHIVal. If we can't fold this expression for some
+/// reason, return null.
+static Constant *EvaluateExpression(Value *V, Constant *PHIVal) {
+ if (isa<PHINode>(V)) return PHIVal;
+ if (Constant *C = dyn_cast<Constant>(V)) return C;
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) return GV;
+ Instruction *I = cast<Instruction>(V);
+
+ std::vector<Constant*> Operands;
+ Operands.resize(I->getNumOperands());
+
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal);
+ if (Operands[i] == 0) return 0;
+ }
+
+ if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+ return ConstantFoldCompareInstOperands(CI->getPredicate(),
+ &Operands[0], Operands.size());
+ else
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+ &Operands[0], Operands.size());
+}
+
+/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
+/// in the header of its containing loop, we know the loop executes a
+/// constant number of times, and the PHI node is just a recurrence
+/// involving constants, fold it.
+Constant *ScalarEvolution::
+getConstantEvolutionLoopExitValue(PHINode *PN, const APInt& BEs, const Loop *L){
+ std::map<PHINode*, Constant*>::iterator I =
+ ConstantEvolutionLoopExitValue.find(PN);
+ if (I != ConstantEvolutionLoopExitValue.end())
+ return I->second;
+
+ if (BEs.ugt(APInt(BEs.getBitWidth(),MaxBruteForceIterations)))
+ return ConstantEvolutionLoopExitValue[PN] = 0; // Not going to evaluate it.
+
+ Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
+
+ // Since the loop is canonicalized, the PHI node must have two entries. One
+ // entry must be a constant (coming in from outside of the loop), and the
+ // second must be derived from the same PHI.
+ bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
+ Constant *StartCST =
+ dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge));
+ if (StartCST == 0)
+ return RetVal = 0; // Must be a constant.
+
+ Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
+ PHINode *PN2 = getConstantEvolvingPHI(BEValue, L);
+ if (PN2 != PN)
+ return RetVal = 0; // Not derived from same PHI.
+
+ // Execute the loop symbolically to determine the exit value.
+ if (BEs.getActiveBits() >= 32)
+ return RetVal = 0; // More than 2^32-1 iterations?? Not doing it!
+
+ unsigned NumIterations = BEs.getZExtValue(); // must be in range
+ unsigned IterationNum = 0;
+ for (Constant *PHIVal = StartCST; ; ++IterationNum) {
+ if (IterationNum == NumIterations)
+ return RetVal = PHIVal; // Got exit value!
+
+ // Compute the value of the PHI node for the next iteration.
+ Constant *NextPHI = EvaluateExpression(BEValue, PHIVal);
+ if (NextPHI == PHIVal)
+ return RetVal = NextPHI; // Stopped evolving!
+ if (NextPHI == 0)
+ return 0; // Couldn't evaluate!
+ PHIVal = NextPHI;
+ }
+}
+
+/// ComputeBackedgeTakenCountExhaustively - If the trip is known to execute a
+/// constant number of times (the condition evolves only from constants),
+/// try to evaluate a few iterations of the loop until we get the exit
+/// condition gets a value of ExitWhen (true or false). If we cannot
+/// evaluate the trip count of the loop, return UnknownValue.
+SCEVHandle ScalarEvolution::
+ComputeBackedgeTakenCountExhaustively(const Loop *L, Value *Cond, bool ExitWhen) {
+ PHINode *PN = getConstantEvolvingPHI(Cond, L);
+ if (PN == 0) return UnknownValue;
+
+ // Since the loop is canonicalized, the PHI node must have two entries. One
+ // entry must be a constant (coming in from outside of the loop), and the
+ // second must be derived from the same PHI.
+ bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
+ Constant *StartCST =
+ dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge));
+ if (StartCST == 0) return UnknownValue; // Must be a constant.
+
+ Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
+ PHINode *PN2 = getConstantEvolvingPHI(BEValue, L);
+ if (PN2 != PN) return UnknownValue; // Not derived from same PHI.
+
+ // Okay, we find a PHI node that defines the trip count of this loop. Execute
+ // the loop symbolically to determine when the condition gets a value of
+ // "ExitWhen".
+ unsigned IterationNum = 0;
+ unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis.
+ for (Constant *PHIVal = StartCST;
+ IterationNum != MaxIterations; ++IterationNum) {
+ ConstantInt *CondVal =
+ dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, PHIVal));
+
+ // Couldn't symbolically evaluate.
+ if (!CondVal) return UnknownValue;
+
+ if (CondVal->getValue() == uint64_t(ExitWhen)) {
+ ConstantEvolutionLoopExitValue[PN] = PHIVal;
+ ++NumBruteForceTripCountsComputed;
+ return getConstant(ConstantInt::get(Type::Int32Ty, IterationNum));
+ }
+
+ // Compute the value of the PHI node for the next iteration.
+ Constant *NextPHI = EvaluateExpression(BEValue, PHIVal);
+ if (NextPHI == 0 || NextPHI == PHIVal)
+ return UnknownValue; // Couldn't evaluate or not making progress...
+ PHIVal = NextPHI;
+ }
+
+ // Too many iterations were needed to evaluate.
+ return UnknownValue;
+}
+
+/// getSCEVAtScope - Return a SCEV expression handle for the specified value
+/// at the specified scope in the program. The L value specifies a loop
+/// nest to evaluate the expression at, where null is the top-level or a
+/// specified loop is immediately inside of the loop.
+///
+/// This method can be used to compute the exit value for a variable defined
+/// in a loop by querying what the value will hold in the parent loop.
+///
+/// In the case that a relevant loop exit value cannot be computed, the
+/// original value V is returned.
+SCEVHandle ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
+ // FIXME: this should be turned into a virtual method on SCEV!
+
+ if (isa<SCEVConstant>(V)) return V;
+
+ // If this instruction is evolved from a constant-evolving PHI, compute the
+ // exit value from the loop without using SCEVs.
+ if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
+ if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
+ const Loop *LI = (*this->LI)[I->getParent()];
+ if (LI && LI->getParentLoop() == L) // Looking for loop exit value.
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ if (PN->getParent() == LI->getHeader()) {
+ // Okay, there is no closed form solution for the PHI node. Check
+ // to see if the loop that contains it has a known backedge-taken
+ // count. If so, we may be able to force computation of the exit
+ // value.
+ SCEVHandle BackedgeTakenCount = getBackedgeTakenCount(LI);
+ if (const SCEVConstant *BTCC =
+ dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
+ // Okay, we know how many times the containing loop executes. If
+ // this is a constant evolving PHI node, get the final value at
+ // the specified iteration number.
+ Constant *RV = getConstantEvolutionLoopExitValue(PN,
+ BTCC->getValue()->getValue(),
+ LI);
+ if (RV) return getUnknown(RV);
+ }
+ }
+
+ // Okay, this is an expression that we cannot symbolically evaluate
+ // into a SCEV. Check to see if it's possible to symbolically evaluate
+ // the arguments into constants, and if so, try to constant propagate the
+ // result. This is particularly useful for computing loop exit values.
+ if (CanConstantFold(I)) {
+ // Check to see if we've folded this instruction at this loop before.
+ std::map<const Loop *, Constant *> &Values = ValuesAtScopes[I];
+ std::pair<std::map<const Loop *, Constant *>::iterator, bool> Pair =
+ Values.insert(std::make_pair(L, static_cast<Constant *>(0)));
+ if (!Pair.second)
+ return Pair.first->second ? &*getUnknown(Pair.first->second) : V;
+
+ std::vector<Constant*> Operands;
+ Operands.reserve(I->getNumOperands());
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Value *Op = I->getOperand(i);
+ if (Constant *C = dyn_cast<Constant>(Op)) {
+ Operands.push_back(C);
+ } else {
+ // If any of the operands is non-constant and if they are
+ // non-integer and non-pointer, don't even try to analyze them
+ // with scev techniques.
+ if (!isSCEVable(Op->getType()))
+ return V;
+
+ SCEVHandle OpV = getSCEVAtScope(getSCEV(Op), L);
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OpV)) {
+ Constant *C = SC->getValue();
+ if (C->getType() != Op->getType())
+ C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+ Op->getType(),
+ false),
+ C, Op->getType());
+ Operands.push_back(C);
+ } else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(OpV)) {
+ if (Constant *C = dyn_cast<Constant>(SU->getValue())) {
+ if (C->getType() != Op->getType())
+ C =
+ ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+ Op->getType(),
+ false),
+ C, Op->getType());
+ Operands.push_back(C);
+ } else
+ return V;
+ } else {
+ return V;
+ }
+ }
+ }
+
+ Constant *C;
+ if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+ C = ConstantFoldCompareInstOperands(CI->getPredicate(),
+ &Operands[0], Operands.size());
+ else
+ C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+ &Operands[0], Operands.size());
+ Pair.first->second = C;
+ return getUnknown(C);
+ }
+ }
+
+ // This is some other type of SCEVUnknown, just return it.
+ return V;
+ }
+
+ if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) {
+ // Avoid performing the look-up in the common case where the specified
+ // expression has no loop-variant portions.
+ for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) {
+ SCEVHandle OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
+ if (OpAtScope != Comm->getOperand(i)) {
+ // Okay, at least one of these operands is loop variant but might be
+ // foldable. Build a new instance of the folded commutative expression.
+ std::vector<SCEVHandle> NewOps(Comm->op_begin(), Comm->op_begin()+i);
+ NewOps.push_back(OpAtScope);
+
+ for (++i; i != e; ++i) {
+ OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
+ NewOps.push_back(OpAtScope);
+ }
+ if (isa<SCEVAddExpr>(Comm))
+ return getAddExpr(NewOps);
+ if (isa<SCEVMulExpr>(Comm))
+ return getMulExpr(NewOps);
+ if (isa<SCEVSMaxExpr>(Comm))
+ return getSMaxExpr(NewOps);
+ if (isa<SCEVUMaxExpr>(Comm))
+ return getUMaxExpr(NewOps);
+ assert(0 && "Unknown commutative SCEV type!");
+ }
+ }
+ // If we got here, all operands are loop invariant.
+ return Comm;
+ }
+
+ if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) {
+ SCEVHandle LHS = getSCEVAtScope(Div->getLHS(), L);
+ SCEVHandle RHS = getSCEVAtScope(Div->getRHS(), L);
+ if (LHS == Div->getLHS() && RHS == Div->getRHS())
+ return Div; // must be loop invariant
+ return getUDivExpr(LHS, RHS);
+ }
+
+ // If this is a loop recurrence for a loop that does not contain L, then we
+ // are dealing with the final value computed by the loop.
+ if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
+ if (!L || !AddRec->getLoop()->contains(L->getHeader())) {
+ // To evaluate this recurrence, we need to know how many times the AddRec
+ // loop iterates. Compute this now.
+ SCEVHandle BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop());
+ if (BackedgeTakenCount == UnknownValue) return AddRec;
+
+ // Then, evaluate the AddRec.
+ return AddRec->evaluateAtIteration(BackedgeTakenCount, *this);
+ }
+ return AddRec;
+ }
+
+ if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) {
+ SCEVHandle Op = getSCEVAtScope(Cast->getOperand(), L);
+ if (Op == Cast->getOperand())
+ return Cast; // must be loop invariant
+ return getZeroExtendExpr(Op, Cast->getType());
+ }
+
+ if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) {
+ SCEVHandle Op = getSCEVAtScope(Cast->getOperand(), L);
+ if (Op == Cast->getOperand())
+ return Cast; // must be loop invariant
+ return getSignExtendExpr(Op, Cast->getType());
+ }
+
+ if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) {
+ SCEVHandle Op = getSCEVAtScope(Cast->getOperand(), L);
+ if (Op == Cast->getOperand())
+ return Cast; // must be loop invariant
+ return getTruncateExpr(Op, Cast->getType());
+ }
+
+ assert(0 && "Unknown SCEV type!");
+ return 0;
+}
+
+/// getSCEVAtScope - This is a convenience function which does
+/// getSCEVAtScope(getSCEV(V), L).
+SCEVHandle ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
+ return getSCEVAtScope(getSCEV(V), L);
+}
+
+/// SolveLinEquationWithOverflow - Finds the minimum unsigned root of the
+/// following equation:
+///
+/// A * X = B (mod N)
+///
+/// where N = 2^BW and BW is the common bit width of A and B. The signedness of
+/// A and B isn't important.
+///
+/// If the equation does not have a solution, SCEVCouldNotCompute is returned.
+static SCEVHandle SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
+ ScalarEvolution &SE) {
+ uint32_t BW = A.getBitWidth();
+ assert(BW == B.getBitWidth() && "Bit widths must be the same.");
+ assert(A != 0 && "A must be non-zero.");
+
+ // 1. D = gcd(A, N)
+ //
+ // The gcd of A and N may have only one prime factor: 2. The number of
+ // trailing zeros in A is its multiplicity
+ uint32_t Mult2 = A.countTrailingZeros();
+ // D = 2^Mult2
+
+ // 2. Check if B is divisible by D.
+ //
+ // B is divisible by D if and only if the multiplicity of prime factor 2 for B
+ // is not less than multiplicity of this prime factor for D.
+ if (B.countTrailingZeros() < Mult2)
+ return SE.getCouldNotCompute();
+
+ // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic
+ // modulo (N / D).
+ //
+ // (N / D) may need BW+1 bits in its representation. Hence, we'll use this
+ // bit width during computations.
+ APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D
+ APInt Mod(BW + 1, 0);
+ Mod.set(BW - Mult2); // Mod = N / D
+ APInt I = AD.multiplicativeInverse(Mod);
+
+ // 4. Compute the minimum unsigned root of the equation:
+ // I * (B / D) mod (N / D)
+ APInt Result = (I * B.lshr(Mult2).zext(BW + 1)).urem(Mod);
+
+ // The result is guaranteed to be less than 2^BW so we may truncate it to BW
+ // bits.
+ return SE.getConstant(Result.trunc(BW));
+}
+
+/// SolveQuadraticEquation - Find the roots of the quadratic equation for the
+/// given quadratic chrec {L,+,M,+,N}. This returns either the two roots (which
+/// might be the same) or two SCEVCouldNotCompute objects.
+///
+static std::pair<SCEVHandle,SCEVHandle>
+SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
+ assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
+ const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0));
+ const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1));
+ const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2));
+
+ // We currently can only solve this if the coefficients are constants.
+ if (!LC || !MC || !NC) {
+ const SCEV *CNC = SE.getCouldNotCompute();
+ return std::make_pair(CNC, CNC);
+ }
+
+ uint32_t BitWidth = LC->getValue()->getValue().getBitWidth();
+ const APInt &L = LC->getValue()->getValue();
+ const APInt &M = MC->getValue()->getValue();
+ const APInt &N = NC->getValue()->getValue();
+ APInt Two(BitWidth, 2);
+ APInt Four(BitWidth, 4);
+
+ {
+ using namespace APIntOps;
+ const APInt& C = L;
+ // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
+ // The B coefficient is M-N/2
+ APInt B(M);
+ B -= sdiv(N,Two);
+
+ // The A coefficient is N/2
+ APInt A(N.sdiv(Two));
+
+ // Compute the B^2-4ac term.
+ APInt SqrtTerm(B);
+ SqrtTerm *= B;
+ SqrtTerm -= Four * (A * C);
+
+ // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
+ // integer value or else APInt::sqrt() will assert.
+ APInt SqrtVal(SqrtTerm.sqrt());
+
+ // Compute the two solutions for the quadratic formula.
+ // The divisions must be performed as signed divisions.
+ APInt NegB(-B);
+ APInt TwoA( A << 1 );
+ if (TwoA.isMinValue()) {
+ const SCEV *CNC = SE.getCouldNotCompute();
+ return std::make_pair(CNC, CNC);
+ }
+
+ ConstantInt *Solution1 = ConstantInt::get((NegB + SqrtVal).sdiv(TwoA));
+ ConstantInt *Solution2 = ConstantInt::get((NegB - SqrtVal).sdiv(TwoA));
+
+ return std::make_pair(SE.getConstant(Solution1),
+ SE.getConstant(Solution2));
+ } // end APIntOps namespace
+}
+
+/// HowFarToZero - Return the number of times a backedge comparing the specified
+/// value to zero will execute. If not computable, return UnknownValue.
+SCEVHandle ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
+ // If the value is a constant
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
+ // If the value is already zero, the branch will execute zero times.
+ if (C->getValue()->isZero()) return C;
+ return UnknownValue; // Otherwise it will loop infinitely.
+ }
+
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V);
+ if (!AddRec || AddRec->getLoop() != L)
+ return UnknownValue;
+
+ if (AddRec->isAffine()) {
+ // If this is an affine expression, the execution count of this branch is
+ // the minimum unsigned root of the following equation:
+ //
+ // Start + Step*N = 0 (mod 2^BW)
+ //
+ // equivalent to:
+ //
+ // Step*N = -Start (mod 2^BW)
+ //
+ // where BW is the common bit width of Start and Step.
+
+ // Get the initial value for the loop.
+ SCEVHandle Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop());
+ SCEVHandle Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop());
+
+ if (const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step)) {
+ // For now we handle only constant steps.
+
+ // First, handle unitary steps.
+ if (StepC->getValue()->equalsInt(1)) // 1*N = -Start (mod 2^BW), so:
+ return getNegativeSCEV(Start); // N = -Start (as unsigned)
+ if (StepC->getValue()->isAllOnesValue()) // -1*N = -Start (mod 2^BW), so:
+ return Start; // N = Start (as unsigned)
+
+ // Then, try to solve the above equation provided that Start is constant.
+ if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
+ return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
+ -StartC->getValue()->getValue(),
+ *this);
+ }
+ } else if (AddRec->isQuadratic() && AddRec->getType()->isInteger()) {
+ // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
+ // the quadratic equation to solve it.
+ std::pair<SCEVHandle,SCEVHandle> Roots = SolveQuadraticEquation(AddRec,
+ *this);
+ const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
+ const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
+ if (R1) {
+#if 0
+ errs() << "HFTZ: " << *V << " - sol#1: " << *R1
+ << " sol#2: " << *R2 << "\n";
+#endif
+ // Pick the smallest positive root value.
+ if (ConstantInt *CB =
+ dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
+ R1->getValue(), R2->getValue()))) {
+ if (CB->getZExtValue() == false)
+ std::swap(R1, R2); // R1 is the minimum root now.
+
+ // We can only use this value if the chrec ends up with an exact zero
+ // value at this index. When solving for "X*X != 5", for example, we
+ // should not accept a root of 2.
+ SCEVHandle Val = AddRec->evaluateAtIteration(R1, *this);
+ if (Val->isZero())
+ return R1; // We found a quadratic root!
+ }
+ }
+ }
+
+ return UnknownValue;
+}
+
+/// HowFarToNonZero - Return the number of times a backedge checking the
+/// specified value for nonzero will execute. If not computable, return
+/// UnknownValue
+SCEVHandle ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
+ // Loops that look like: while (X == 0) are very strange indeed. We don't
+ // handle them yet except for the trivial case. This could be expanded in the
+ // future as needed.
+
+ // If the value is a constant, check to see if it is known to be non-zero
+ // already. If so, the backedge will execute zero times.
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
+ if (!C->getValue()->isNullValue())
+ return getIntegerSCEV(0, C->getType());
+ return UnknownValue; // Otherwise it will loop infinitely.
+ }
+
+ // We could implement others, but I really doubt anyone writes loops like
+ // this, and if they did, they would already be constant folded.
+ return UnknownValue;
+}
+
+/// getLoopPredecessor - If the given loop's header has exactly one unique
+/// predecessor outside the loop, return it. Otherwise return null.
+///
+BasicBlock *ScalarEvolution::getLoopPredecessor(const Loop *L) {
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Pred = 0;
+ for (pred_iterator PI = pred_begin(Header), E = pred_end(Header);
+ PI != E; ++PI)
+ if (!L->contains(*PI)) {
+ if (Pred && Pred != *PI) return 0; // Multiple predecessors.
+ Pred = *PI;
+ }
+ return Pred;
+}
+
+/// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB
+/// (which may not be an immediate predecessor) which has exactly one
+/// successor from which BB is reachable, or null if no such block is
+/// found.
+///
+BasicBlock *
+ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
+ // If the block has a unique predecessor, then there is no path from the
+ // predecessor to the block that does not go through the direct edge
+ // from the predecessor to the block.
+ if (BasicBlock *Pred = BB->getSinglePredecessor())
+ return Pred;
+
+ // A loop's header is defined to be a block that dominates the loop.
+ // If the header has a unique predecessor outside the loop, it must be
+ // a block that has exactly one successor that can reach the loop.
+ if (Loop *L = LI->getLoopFor(BB))
+ return getLoopPredecessor(L);
+
+ return 0;
+}
+
+/// isLoopGuardedByCond - Test whether entry to the loop is protected by
+/// a conditional between LHS and RHS. This is used to help avoid max
+/// expressions in loop trip counts.
+bool ScalarEvolution::isLoopGuardedByCond(const Loop *L,
+ ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS) {
+ // Interpret a null as meaning no loop, where there is obviously no guard
+ // (interprocedural conditions notwithstanding).
+ if (!L) return false;
+
+ BasicBlock *Predecessor = getLoopPredecessor(L);
+ BasicBlock *PredecessorDest = L->getHeader();
+
+ // Starting at the loop predecessor, climb up the predecessor chain, as long
+ // as there are predecessors that can be found that have unique successors
+ // leading to the original header.
+ for (; Predecessor;
+ PredecessorDest = Predecessor,
+ Predecessor = getPredecessorWithUniqueSuccessorForBB(Predecessor)) {
+
+ BranchInst *LoopEntryPredicate =
+ dyn_cast<BranchInst>(Predecessor->getTerminator());
+ if (!LoopEntryPredicate ||
+ LoopEntryPredicate->isUnconditional())
+ continue;
+
+ ICmpInst *ICI = dyn_cast<ICmpInst>(LoopEntryPredicate->getCondition());
+ if (!ICI) continue;
+
+ // Now that we found a conditional branch that dominates the loop, check to
+ // see if it is the comparison we are looking for.
+ Value *PreCondLHS = ICI->getOperand(0);
+ Value *PreCondRHS = ICI->getOperand(1);
+ ICmpInst::Predicate Cond;
+ if (LoopEntryPredicate->getSuccessor(0) == PredecessorDest)
+ Cond = ICI->getPredicate();
+ else
+ Cond = ICI->getInversePredicate();
+
+ if (Cond == Pred)
+ ; // An exact match.
+ else if (!ICmpInst::isTrueWhenEqual(Cond) && Pred == ICmpInst::ICMP_NE)
+ ; // The actual condition is beyond sufficient.
+ else
+ // Check a few special cases.
+ switch (Cond) {
+ case ICmpInst::ICMP_UGT:
+ if (Pred == ICmpInst::ICMP_ULT) {
+ std::swap(PreCondLHS, PreCondRHS);
+ Cond = ICmpInst::ICMP_ULT;
+ break;
+ }
+ continue;
+ case ICmpInst::ICMP_SGT:
+ if (Pred == ICmpInst::ICMP_SLT) {
+ std::swap(PreCondLHS, PreCondRHS);
+ Cond = ICmpInst::ICMP_SLT;
+ break;
+ }
+ continue;
+ case ICmpInst::ICMP_NE:
+ // Expressions like (x >u 0) are often canonicalized to (x != 0),
+ // so check for this case by checking if the NE is comparing against
+ // a minimum or maximum constant.
+ if (!ICmpInst::isTrueWhenEqual(Pred))
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(PreCondRHS)) {
+ const APInt &A = CI->getValue();
+ switch (Pred) {
+ case ICmpInst::ICMP_SLT:
+ if (A.isMaxSignedValue()) break;
+ continue;
+ case ICmpInst::ICMP_SGT:
+ if (A.isMinSignedValue()) break;
+ continue;
+ case ICmpInst::ICMP_ULT:
+ if (A.isMaxValue()) break;
+ continue;
+ case ICmpInst::ICMP_UGT:
+ if (A.isMinValue()) break;
+ continue;
+ default:
+ continue;
+ }
+ Cond = ICmpInst::ICMP_NE;
+ // NE is symmetric but the original comparison may not be. Swap
+ // the operands if necessary so that they match below.
+ if (isa<SCEVConstant>(LHS))
+ std::swap(PreCondLHS, PreCondRHS);
+ break;
+ }
+ continue;
+ default:
+ // We weren't able to reconcile the condition.
+ continue;
+ }
+
+ if (!PreCondLHS->getType()->isInteger()) continue;
+
+ SCEVHandle PreCondLHSSCEV = getSCEV(PreCondLHS);
+ SCEVHandle PreCondRHSSCEV = getSCEV(PreCondRHS);
+ if ((LHS == PreCondLHSSCEV && RHS == PreCondRHSSCEV) ||
+ (LHS == getNotSCEV(PreCondRHSSCEV) &&
+ RHS == getNotSCEV(PreCondLHSSCEV)))
+ return true;
+ }
+
+ return false;
+}
+
+/// HowManyLessThans - Return the number of times a backedge containing the
+/// specified less-than comparison will execute. If not computable, return
+/// UnknownValue.
+ScalarEvolution::BackedgeTakenInfo ScalarEvolution::
+HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
+ const Loop *L, bool isSigned) {
+ // Only handle: "ADDREC < LoopInvariant".
+ if (!RHS->isLoopInvariant(L)) return UnknownValue;
+
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS);
+ if (!AddRec || AddRec->getLoop() != L)
+ return UnknownValue;
+
+ if (AddRec->isAffine()) {
+ // FORNOW: We only support unit strides.
+ unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
+ SCEVHandle Step = AddRec->getStepRecurrence(*this);
+ SCEVHandle NegOne = getIntegerSCEV(-1, AddRec->getType());
+
+ // TODO: handle non-constant strides.
+ const SCEVConstant *CStep = dyn_cast<SCEVConstant>(Step);
+ if (!CStep || CStep->isZero())
+ return UnknownValue;
+ if (CStep->isOne()) {
+ // With unit stride, the iteration never steps past the limit value.
+ } else if (CStep->getValue()->getValue().isStrictlyPositive()) {
+ if (const SCEVConstant *CLimit = dyn_cast<SCEVConstant>(RHS)) {
+ // Test whether a positive iteration iteration can step past the limit
+ // value and past the maximum value for its type in a single step.
+ if (isSigned) {
+ APInt Max = APInt::getSignedMaxValue(BitWidth);
+ if ((Max - CStep->getValue()->getValue())
+ .slt(CLimit->getValue()->getValue()))
+ return UnknownValue;
+ } else {
+ APInt Max = APInt::getMaxValue(BitWidth);
+ if ((Max - CStep->getValue()->getValue())
+ .ult(CLimit->getValue()->getValue()))
+ return UnknownValue;
+ }
+ } else
+ // TODO: handle non-constant limit values below.
+ return UnknownValue;
+ } else
+ // TODO: handle negative strides below.
+ return UnknownValue;
+
+ // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant
+ // m. So, we count the number of iterations in which {n,+,s} < m is true.
+ // Note that we cannot simply return max(m-n,0)/s because it's not safe to
+ // treat m-n as signed nor unsigned due to overflow possibility.
+
+ // First, we get the value of the LHS in the first iteration: n
+ SCEVHandle Start = AddRec->getOperand(0);
+
+ // Determine the minimum constant start value.
+ SCEVHandle MinStart = isa<SCEVConstant>(Start) ? Start :
+ getConstant(isSigned ? APInt::getSignedMinValue(BitWidth) :
+ APInt::getMinValue(BitWidth));
+
+ // If we know that the condition is true in order to enter the loop,
+ // then we know that it will run exactly (m-n)/s times. Otherwise, we
+ // only know that it will execute (max(m,n)-n)/s times. In both cases,
+ // the division must round up.
+ SCEVHandle End = RHS;
+ if (!isLoopGuardedByCond(L,
+ isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ getMinusSCEV(Start, Step), RHS))
+ End = isSigned ? getSMaxExpr(RHS, Start)
+ : getUMaxExpr(RHS, Start);
+
+ // Determine the maximum constant end value.
+ SCEVHandle MaxEnd = isa<SCEVConstant>(End) ? End :
+ getConstant(isSigned ? APInt::getSignedMaxValue(BitWidth) :
+ APInt::getMaxValue(BitWidth));
+
+ // Finally, we subtract these two values and divide, rounding up, to get
+ // the number of times the backedge is executed.
+ SCEVHandle BECount = getUDivExpr(getAddExpr(getMinusSCEV(End, Start),
+ getAddExpr(Step, NegOne)),
+ Step);
+
+ // The maximum backedge count is similar, except using the minimum start
+ // value and the maximum end value.
+ SCEVHandle MaxBECount = getUDivExpr(getAddExpr(getMinusSCEV(MaxEnd,
+ MinStart),
+ getAddExpr(Step, NegOne)),
+ Step);
+
+ return BackedgeTakenInfo(BECount, MaxBECount);
+ }
+
+ return UnknownValue;
+}
+
+/// getNumIterationsInRange - Return the number of iterations of this loop that
+/// produce values in the specified constant range. Another way of looking at
+/// this is that it returns the first iteration number where the value is not in
+/// the condition, thus computing the exit count. If the iteration count can't
+/// be computed, an instance of SCEVCouldNotCompute is returned.
+SCEVHandle SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
+ ScalarEvolution &SE) const {
+ if (Range.isFullSet()) // Infinite loop.
+ return SE.getCouldNotCompute();
+
+ // If the start is a non-zero constant, shift the range to simplify things.
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
+ if (!SC->getValue()->isZero()) {
+ std::vector<SCEVHandle> Operands(op_begin(), op_end());
+ Operands[0] = SE.getIntegerSCEV(0, SC->getType());
+ SCEVHandle Shifted = SE.getAddRecExpr(Operands, getLoop());
+ if (const SCEVAddRecExpr *ShiftedAddRec =
+ dyn_cast<SCEVAddRecExpr>(Shifted))
+ return ShiftedAddRec->getNumIterationsInRange(
+ Range.subtract(SC->getValue()->getValue()), SE);
+ // This is strange and shouldn't happen.
+ return SE.getCouldNotCompute();
+ }
+
+ // The only time we can solve this is when we have all constant indices.
+ // Otherwise, we cannot determine the overflow conditions.
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (!isa<SCEVConstant>(getOperand(i)))
+ return SE.getCouldNotCompute();
+
+
+ // Okay at this point we know that all elements of the chrec are constants and
+ // that the start element is zero.
+
+ // First check to see if the range contains zero. If not, the first
+ // iteration exits.
+ unsigned BitWidth = SE.getTypeSizeInBits(getType());
+ if (!Range.contains(APInt(BitWidth, 0)))
+ return SE.getConstant(ConstantInt::get(getType(),0));
+
+ if (isAffine()) {
+ // If this is an affine expression then we have this situation:
+ // Solve {0,+,A} in Range === Ax in Range
+
+ // We know that zero is in the range. If A is positive then we know that
+ // the upper value of the range must be the first possible exit value.
+ // If A is negative then the lower of the range is the last possible loop
+ // value. Also note that we already checked for a full range.
+ APInt One(BitWidth,1);
+ APInt A = cast<SCEVConstant>(getOperand(1))->getValue()->getValue();
+ APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower();
+
+ // The exit value should be (End+A)/A.
+ APInt ExitVal = (End + A).udiv(A);
+ ConstantInt *ExitValue = ConstantInt::get(ExitVal);
+
+ // Evaluate at the exit value. If we really did fall out of the valid
+ // range, then we computed our trip count, otherwise wrap around or other
+ // things must have happened.
+ ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE);
+ if (Range.contains(Val->getValue()))
+ return SE.getCouldNotCompute(); // Something strange happened
+
+ // Ensure that the previous value is in the range. This is a sanity check.
+ assert(Range.contains(
+ EvaluateConstantChrecAtConstant(this,
+ ConstantInt::get(ExitVal - One), SE)->getValue()) &&
+ "Linear scev computation is off in a bad way!");
+ return SE.getConstant(ExitValue);
+ } else if (isQuadratic()) {
+ // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the
+ // quadratic equation to solve it. To do this, we must frame our problem in
+ // terms of figuring out when zero is crossed, instead of when
+ // Range.getUpper() is crossed.
+ std::vector<SCEVHandle> NewOps(op_begin(), op_end());
+ NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
+ SCEVHandle NewAddRec = SE.getAddRecExpr(NewOps, getLoop());
+
+ // Next, solve the constructed addrec
+ std::pair<SCEVHandle,SCEVHandle> Roots =
+ SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE);
+ const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
+ const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
+ if (R1) {
+ // Pick the smallest positive root value.
+ if (ConstantInt *CB =
+ dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
+ R1->getValue(), R2->getValue()))) {
+ if (CB->getZExtValue() == false)
+ std::swap(R1, R2); // R1 is the minimum root now.
+
+ // Make sure the root is not off by one. The returned iteration should
+ // not be in the range, but the previous one should be. When solving
+ // for "X*X < 5", for example, we should not return a root of 2.
+ ConstantInt *R1Val = EvaluateConstantChrecAtConstant(this,
+ R1->getValue(),
+ SE);
+ if (Range.contains(R1Val->getValue())) {
+ // The next iteration must be out of the range...
+ ConstantInt *NextVal = ConstantInt::get(R1->getValue()->getValue()+1);
+
+ R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
+ if (!Range.contains(R1Val->getValue()))
+ return SE.getConstant(NextVal);
+ return SE.getCouldNotCompute(); // Something strange happened
+ }
+
+ // If R1 was not in the range, then it is a good return value. Make
+ // sure that R1-1 WAS in the range though, just in case.
+ ConstantInt *NextVal = ConstantInt::get(R1->getValue()->getValue()-1);
+ R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
+ if (Range.contains(R1Val->getValue()))
+ return R1;
+ return SE.getCouldNotCompute(); // Something strange happened
+ }
+ }
+ }
+
+ return SE.getCouldNotCompute();
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// SCEVCallbackVH Class Implementation
+//===----------------------------------------------------------------------===//
+
+void ScalarEvolution::SCEVCallbackVH::deleted() {
+ assert(SE && "SCEVCallbackVH called with a non-null ScalarEvolution!");
+ if (PHINode *PN = dyn_cast<PHINode>(getValPtr()))
+ SE->ConstantEvolutionLoopExitValue.erase(PN);
+ if (Instruction *I = dyn_cast<Instruction>(getValPtr()))
+ SE->ValuesAtScopes.erase(I);
+ SE->Scalars.erase(getValPtr());
+ // this now dangles!
+}
+
+void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *) {
+ assert(SE && "SCEVCallbackVH called with a non-null ScalarEvolution!");
+
+ // Forget all the expressions associated with users of the old value,
+ // so that future queries will recompute the expressions using the new
+ // value.
+ SmallVector<User *, 16> Worklist;
+ Value *Old = getValPtr();
+ bool DeleteOld = false;
+ for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
+ UI != UE; ++UI)
+ Worklist.push_back(*UI);
+ while (!Worklist.empty()) {
+ User *U = Worklist.pop_back_val();
+ // Deleting the Old value will cause this to dangle. Postpone
+ // that until everything else is done.
+ if (U == Old) {
+ DeleteOld = true;
+ continue;
+ }
+ if (PHINode *PN = dyn_cast<PHINode>(U))
+ SE->ConstantEvolutionLoopExitValue.erase(PN);
+ if (Instruction *I = dyn_cast<Instruction>(U))
+ SE->ValuesAtScopes.erase(I);
+ if (SE->Scalars.erase(U))
+ for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
+ UI != UE; ++UI)
+ Worklist.push_back(*UI);
+ }
+ if (DeleteOld) {
+ if (PHINode *PN = dyn_cast<PHINode>(Old))
+ SE->ConstantEvolutionLoopExitValue.erase(PN);
+ if (Instruction *I = dyn_cast<Instruction>(Old))
+ SE->ValuesAtScopes.erase(I);
+ SE->Scalars.erase(Old);
+ // this now dangles!
+ }
+ // this may dangle!
+}
+
+ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
+ : CallbackVH(V), SE(se) {}
+
+//===----------------------------------------------------------------------===//
+// ScalarEvolution Class Implementation
+//===----------------------------------------------------------------------===//
+
+ScalarEvolution::ScalarEvolution()
+ : FunctionPass(&ID), UnknownValue(new SCEVCouldNotCompute()) {
+}
+
+bool ScalarEvolution::runOnFunction(Function &F) {
+ this->F = &F;
+ LI = &getAnalysis<LoopInfo>();
+ TD = getAnalysisIfAvailable<TargetData>();
+ return false;
+}
+
+void ScalarEvolution::releaseMemory() {
+ Scalars.clear();
+ BackedgeTakenCounts.clear();
+ ConstantEvolutionLoopExitValue.clear();
+ ValuesAtScopes.clear();
+}
+
+void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<LoopInfo>();
+}
+
+bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
+ return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L));
+}
+
+static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
+ const Loop *L) {
+ // Print all inner loops first
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ PrintLoopInfo(OS, SE, *I);
+
+ OS << "Loop " << L->getHeader()->getName() << ": ";
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ if (ExitBlocks.size() != 1)
+ OS << "<multiple exits> ";
+
+ if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
+ OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L);
+ } else {
+ OS << "Unpredictable backedge-taken count. ";
+ }
+
+ OS << "\n";
+}
+
+void ScalarEvolution::print(raw_ostream &OS, const Module* ) const {
+ // ScalarEvolution's implementaiton of the print method is to print
+ // out SCEV values of all instructions that are interesting. Doing
+ // this potentially causes it to create new SCEV objects though,
+ // which technically conflicts with the const qualifier. This isn't
+ // observable from outside the class though (the hasSCEV function
+ // notwithstanding), so casting away the const isn't dangerous.
+ ScalarEvolution &SE = *const_cast<ScalarEvolution*>(this);
+
+ OS << "Classifying expressions for: " << F->getName() << "\n";
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+ if (isSCEVable(I->getType())) {
+ OS << *I;
+ OS << " --> ";
+ SCEVHandle SV = SE.getSCEV(&*I);
+ SV->print(OS);
+ OS << "\t\t";
+
+ if (const Loop *L = LI->getLoopFor((*I).getParent())) {
+ OS << "Exits: ";
+ SCEVHandle ExitValue = SE.getSCEVAtScope(&*I, L->getParentLoop());
+ if (!ExitValue->isLoopInvariant(L)) {
+ OS << "<<Unknown>>";
+ } else {
+ OS << *ExitValue;
+ }
+ }
+
+ OS << "\n";
+ }
+
+ OS << "Determining loop execution counts for: " << F->getName() << "\n";
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ PrintLoopInfo(OS, &SE, *I);
+}
+
+void ScalarEvolution::print(std::ostream &o, const Module *M) const {
+ raw_os_ostream OS(o);
+ print(OS, M);
+}
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
new file mode 100644
index 0000000..7ba8268
--- /dev/null
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -0,0 +1,646 @@
+//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the scalar evolution expander,
+// which is used to generate the code corresponding to a given scalar evolution
+// expression.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+/// InsertCastOfTo - Insert a cast of V to the specified type, doing what
+/// we can to share the casts.
+Value *SCEVExpander::InsertCastOfTo(Instruction::CastOps opcode, Value *V,
+ const Type *Ty) {
+ // Short-circuit unnecessary bitcasts.
+ if (opcode == Instruction::BitCast && V->getType() == Ty)
+ return V;
+
+ // Short-circuit unnecessary inttoptr<->ptrtoint casts.
+ if ((opcode == Instruction::PtrToInt || opcode == Instruction::IntToPtr) &&
+ SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
+ if (CastInst *CI = dyn_cast<CastInst>(V))
+ if ((CI->getOpcode() == Instruction::PtrToInt ||
+ CI->getOpcode() == Instruction::IntToPtr) &&
+ SE.getTypeSizeInBits(CI->getType()) ==
+ SE.getTypeSizeInBits(CI->getOperand(0)->getType()))
+ return CI->getOperand(0);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if ((CE->getOpcode() == Instruction::PtrToInt ||
+ CE->getOpcode() == Instruction::IntToPtr) &&
+ SE.getTypeSizeInBits(CE->getType()) ==
+ SE.getTypeSizeInBits(CE->getOperand(0)->getType()))
+ return CE->getOperand(0);
+ }
+
+ // FIXME: keep track of the cast instruction.
+ if (Constant *C = dyn_cast<Constant>(V))
+ return ConstantExpr::getCast(opcode, C, Ty);
+
+ if (Argument *A = dyn_cast<Argument>(V)) {
+ // Check to see if there is already a cast!
+ for (Value::use_iterator UI = A->use_begin(), E = A->use_end();
+ UI != E; ++UI) {
+ if ((*UI)->getType() == Ty)
+ if (CastInst *CI = dyn_cast<CastInst>(cast<Instruction>(*UI)))
+ if (CI->getOpcode() == opcode) {
+ // If the cast isn't the first instruction of the function, move it.
+ if (BasicBlock::iterator(CI) !=
+ A->getParent()->getEntryBlock().begin()) {
+ // If the CastInst is the insert point, change the insert point.
+ if (CI == InsertPt) ++InsertPt;
+ // Splice the cast at the beginning of the entry block.
+ CI->moveBefore(A->getParent()->getEntryBlock().begin());
+ }
+ return CI;
+ }
+ }
+ Instruction *I = CastInst::Create(opcode, V, Ty, V->getName(),
+ A->getParent()->getEntryBlock().begin());
+ InsertedValues.insert(I);
+ return I;
+ }
+
+ Instruction *I = cast<Instruction>(V);
+
+ // Check to see if there is already a cast. If there is, use it.
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ if ((*UI)->getType() == Ty)
+ if (CastInst *CI = dyn_cast<CastInst>(cast<Instruction>(*UI)))
+ if (CI->getOpcode() == opcode) {
+ BasicBlock::iterator It = I; ++It;
+ if (isa<InvokeInst>(I))
+ It = cast<InvokeInst>(I)->getNormalDest()->begin();
+ while (isa<PHINode>(It)) ++It;
+ if (It != BasicBlock::iterator(CI)) {
+ // If the CastInst is the insert point, change the insert point.
+ if (CI == InsertPt) ++InsertPt;
+ // Splice the cast immediately after the operand in question.
+ CI->moveBefore(It);
+ }
+ return CI;
+ }
+ }
+ BasicBlock::iterator IP = I; ++IP;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+ IP = II->getNormalDest()->begin();
+ while (isa<PHINode>(IP)) ++IP;
+ Instruction *CI = CastInst::Create(opcode, V, Ty, V->getName(), IP);
+ InsertedValues.insert(CI);
+ return CI;
+}
+
+/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
+/// which must be possible with a noop cast.
+Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) {
+ Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
+ assert((Op == Instruction::BitCast ||
+ Op == Instruction::PtrToInt ||
+ Op == Instruction::IntToPtr) &&
+ "InsertNoopCastOfTo cannot perform non-noop casts!");
+ assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
+ "InsertNoopCastOfTo cannot change sizes!");
+ return InsertCastOfTo(Op, V, Ty);
+}
+
+/// InsertBinop - Insert the specified binary operator, doing a small amount
+/// of work to avoid inserting an obviously redundant operation.
+Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, Value *LHS,
+ Value *RHS, BasicBlock::iterator InsertPt) {
+ // Fold a binop with constant operands.
+ if (Constant *CLHS = dyn_cast<Constant>(LHS))
+ if (Constant *CRHS = dyn_cast<Constant>(RHS))
+ return ConstantExpr::get(Opcode, CLHS, CRHS);
+
+ // Do a quick scan to see if we have this binop nearby. If so, reuse it.
+ unsigned ScanLimit = 6;
+ BasicBlock::iterator BlockBegin = InsertPt->getParent()->begin();
+ if (InsertPt != BlockBegin) {
+ // Scanning starts from the last instruction before InsertPt.
+ BasicBlock::iterator IP = InsertPt;
+ --IP;
+ for (; ScanLimit; --IP, --ScanLimit) {
+ if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
+ IP->getOperand(1) == RHS)
+ return IP;
+ if (IP == BlockBegin) break;
+ }
+ }
+
+ // If we haven't found this binop, insert it.
+ Instruction *BO = BinaryOperator::Create(Opcode, LHS, RHS, "tmp", InsertPt);
+ InsertedValues.insert(BO);
+ return BO;
+}
+
+/// FactorOutConstant - Test if S is divisible by Factor, using signed
+/// division. If so, update S with Factor divided out and return true.
+/// S need not be evenly divisble if a reasonable remainder can be
+/// computed.
+/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made
+/// unnecessary; in its place, just signed-divide Ops[i] by the scale and
+/// check to see if the divide was folded.
+static bool FactorOutConstant(SCEVHandle &S,
+ SCEVHandle &Remainder,
+ const APInt &Factor,
+ ScalarEvolution &SE) {
+ // Everything is divisible by one.
+ if (Factor == 1)
+ return true;
+
+ // For a Constant, check for a multiple of the given factor.
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+ ConstantInt *CI =
+ ConstantInt::get(C->getValue()->getValue().sdiv(Factor));
+ // If the quotient is zero and the remainder is non-zero, reject
+ // the value at this scale. It will be considered for subsequent
+ // smaller scales.
+ if (C->isZero() || !CI->isZero()) {
+ SCEVHandle Div = SE.getConstant(CI);
+ S = Div;
+ Remainder =
+ SE.getAddExpr(Remainder,
+ SE.getConstant(C->getValue()->getValue().srem(Factor)));
+ return true;
+ }
+ }
+
+ // In a Mul, check if there is a constant operand which is a multiple
+ // of the given factor.
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S))
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
+ if (!C->getValue()->getValue().srem(Factor)) {
+ std::vector<SCEVHandle> NewMulOps(M->getOperands());
+ NewMulOps[0] =
+ SE.getConstant(C->getValue()->getValue().sdiv(Factor));
+ S = SE.getMulExpr(NewMulOps);
+ return true;
+ }
+
+ // In an AddRec, check if both start and step are divisible.
+ if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
+ SCEVHandle Step = A->getStepRecurrence(SE);
+ SCEVHandle StepRem = SE.getIntegerSCEV(0, Step->getType());
+ if (!FactorOutConstant(Step, StepRem, Factor, SE))
+ return false;
+ if (!StepRem->isZero())
+ return false;
+ SCEVHandle Start = A->getStart();
+ if (!FactorOutConstant(Start, Remainder, Factor, SE))
+ return false;
+ S = SE.getAddRecExpr(Start, Step, A->getLoop());
+ return true;
+ }
+
+ return false;
+}
+
+/// expandAddToGEP - Expand a SCEVAddExpr with a pointer type into a GEP
+/// instead of using ptrtoint+arithmetic+inttoptr. This helps
+/// BasicAliasAnalysis analyze the result. However, it suffers from the
+/// underlying bug described in PR2831. Addition in LLVM currently always
+/// has two's complement wrapping guaranteed. However, the semantics for
+/// getelementptr overflow are ambiguous. In the common case though, this
+/// expansion gets used when a GEP in the original code has been converted
+/// into integer arithmetic, in which case the resulting code will be no
+/// more undefined than it was originally.
+///
+/// Design note: It might seem desirable for this function to be more
+/// loop-aware. If some of the indices are loop-invariant while others
+/// aren't, it might seem desirable to emit multiple GEPs, keeping the
+/// loop-invariant portions of the overall computation outside the loop.
+/// However, there are a few reasons this is not done here. Hoisting simple
+/// arithmetic is a low-level optimization that often isn't very
+/// important until late in the optimization process. In fact, passes
+/// like InstructionCombining will combine GEPs, even if it means
+/// pushing loop-invariant computation down into loops, so even if the
+/// GEPs were split here, the work would quickly be undone. The
+/// LoopStrengthReduction pass, which is usually run quite late (and
+/// after the last InstructionCombining pass), takes care of hoisting
+/// loop-invariant portions of expressions, after considering what
+/// can be folded using target addressing modes.
+///
+Value *SCEVExpander::expandAddToGEP(const SCEVHandle *op_begin,
+ const SCEVHandle *op_end,
+ const PointerType *PTy,
+ const Type *Ty,
+ Value *V) {
+ const Type *ElTy = PTy->getElementType();
+ SmallVector<Value *, 4> GepIndices;
+ std::vector<SCEVHandle> Ops(op_begin, op_end);
+ bool AnyNonZeroIndices = false;
+
+ // Decend down the pointer's type and attempt to convert the other
+ // operands into GEP indices, at each level. The first index in a GEP
+ // indexes into the array implied by the pointer operand; the rest of
+ // the indices index into the element or field type selected by the
+ // preceding index.
+ for (;;) {
+ APInt ElSize = APInt(SE.getTypeSizeInBits(Ty),
+ ElTy->isSized() ? SE.TD->getTypeAllocSize(ElTy) : 0);
+ std::vector<SCEVHandle> NewOps;
+ std::vector<SCEVHandle> ScaledOps;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ // Split AddRecs up into parts as either of the parts may be usable
+ // without the other.
+ if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i]))
+ if (!A->getStart()->isZero()) {
+ SCEVHandle Start = A->getStart();
+ Ops.push_back(SE.getAddRecExpr(SE.getIntegerSCEV(0, A->getType()),
+ A->getStepRecurrence(SE),
+ A->getLoop()));
+ Ops[i] = Start;
+ ++e;
+ }
+ // If the scale size is not 0, attempt to factor out a scale.
+ if (ElSize != 0) {
+ SCEVHandle Op = Ops[i];
+ SCEVHandle Remainder = SE.getIntegerSCEV(0, Op->getType());
+ if (FactorOutConstant(Op, Remainder, ElSize, SE)) {
+ ScaledOps.push_back(Op); // Op now has ElSize factored out.
+ NewOps.push_back(Remainder);
+ continue;
+ }
+ }
+ // If the operand was not divisible, add it to the list of operands
+ // we'll scan next iteration.
+ NewOps.push_back(Ops[i]);
+ }
+ Ops = NewOps;
+ AnyNonZeroIndices |= !ScaledOps.empty();
+ Value *Scaled = ScaledOps.empty() ?
+ Constant::getNullValue(Ty) :
+ expandCodeFor(SE.getAddExpr(ScaledOps), Ty);
+ GepIndices.push_back(Scaled);
+
+ // Collect struct field index operands.
+ if (!Ops.empty())
+ while (const StructType *STy = dyn_cast<StructType>(ElTy)) {
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
+ if (SE.getTypeSizeInBits(C->getType()) <= 64) {
+ const StructLayout &SL = *SE.TD->getStructLayout(STy);
+ uint64_t FullOffset = C->getValue()->getZExtValue();
+ if (FullOffset < SL.getSizeInBytes()) {
+ unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
+ GepIndices.push_back(ConstantInt::get(Type::Int32Ty, ElIdx));
+ ElTy = STy->getTypeAtIndex(ElIdx);
+ Ops[0] =
+ SE.getConstant(ConstantInt::get(Ty,
+ FullOffset -
+ SL.getElementOffset(ElIdx)));
+ AnyNonZeroIndices = true;
+ continue;
+ }
+ }
+ break;
+ }
+
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(ElTy)) {
+ ElTy = ATy->getElementType();
+ continue;
+ }
+ break;
+ }
+
+ // If none of the operands were convertable to proper GEP indices, cast
+ // the base to i8* and do an ugly getelementptr with that. It's still
+ // better than ptrtoint+arithmetic+inttoptr at least.
+ if (!AnyNonZeroIndices) {
+ V = InsertNoopCastOfTo(V,
+ Type::Int8Ty->getPointerTo(PTy->getAddressSpace()));
+ Value *Idx = expand(SE.getAddExpr(Ops));
+ Idx = InsertNoopCastOfTo(Idx, Ty);
+
+ // Fold a GEP with constant operands.
+ if (Constant *CLHS = dyn_cast<Constant>(V))
+ if (Constant *CRHS = dyn_cast<Constant>(Idx))
+ return ConstantExpr::getGetElementPtr(CLHS, &CRHS, 1);
+
+ // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
+ unsigned ScanLimit = 6;
+ BasicBlock::iterator BlockBegin = InsertPt->getParent()->begin();
+ if (InsertPt != BlockBegin) {
+ // Scanning starts from the last instruction before InsertPt.
+ BasicBlock::iterator IP = InsertPt;
+ --IP;
+ for (; ScanLimit; --IP, --ScanLimit) {
+ if (IP->getOpcode() == Instruction::GetElementPtr &&
+ IP->getOperand(0) == V && IP->getOperand(1) == Idx)
+ return IP;
+ if (IP == BlockBegin) break;
+ }
+ }
+
+ Value *GEP = GetElementPtrInst::Create(V, Idx, "scevgep", InsertPt);
+ InsertedValues.insert(GEP);
+ return GEP;
+ }
+
+ // Insert a pretty getelementptr.
+ Value *GEP = GetElementPtrInst::Create(V,
+ GepIndices.begin(),
+ GepIndices.end(),
+ "scevgep", InsertPt);
+ Ops.push_back(SE.getUnknown(GEP));
+ InsertedValues.insert(GEP);
+ return expand(SE.getAddExpr(Ops));
+}
+
+Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Value *V = expand(S->getOperand(S->getNumOperands()-1));
+
+ // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
+ // comments on expandAddToGEP for details.
+ if (SE.TD)
+ if (const PointerType *PTy = dyn_cast<PointerType>(V->getType())) {
+ const std::vector<SCEVHandle> &Ops = S->getOperands();
+ return expandAddToGEP(&Ops[0], &Ops[Ops.size() - 1],
+ PTy, Ty, V);
+ }
+
+ V = InsertNoopCastOfTo(V, Ty);
+
+ // Emit a bunch of add instructions
+ for (int i = S->getNumOperands()-2; i >= 0; --i) {
+ Value *W = expand(S->getOperand(i));
+ W = InsertNoopCastOfTo(W, Ty);
+ V = InsertBinop(Instruction::Add, V, W, InsertPt);
+ }
+ return V;
+}
+
+Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ int FirstOp = 0; // Set if we should emit a subtract.
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(0)))
+ if (SC->getValue()->isAllOnesValue())
+ FirstOp = 1;
+
+ int i = S->getNumOperands()-2;
+ Value *V = expand(S->getOperand(i+1));
+ V = InsertNoopCastOfTo(V, Ty);
+
+ // Emit a bunch of multiply instructions
+ for (; i >= FirstOp; --i) {
+ Value *W = expand(S->getOperand(i));
+ W = InsertNoopCastOfTo(W, Ty);
+ V = InsertBinop(Instruction::Mul, V, W, InsertPt);
+ }
+
+ // -1 * ... ---> 0 - ...
+ if (FirstOp == 1)
+ V = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), V, InsertPt);
+ return V;
+}
+
+Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+ Value *LHS = expand(S->getLHS());
+ LHS = InsertNoopCastOfTo(LHS, Ty);
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
+ const APInt &RHS = SC->getValue()->getValue();
+ if (RHS.isPowerOf2())
+ return InsertBinop(Instruction::LShr, LHS,
+ ConstantInt::get(Ty, RHS.logBase2()),
+ InsertPt);
+ }
+
+ Value *RHS = expand(S->getRHS());
+ RHS = InsertNoopCastOfTo(RHS, Ty);
+ return InsertBinop(Instruction::UDiv, LHS, RHS, InsertPt);
+}
+
+/// Move parts of Base into Rest to leave Base with the minimal
+/// expression that provides a pointer operand suitable for a
+/// GEP expansion.
+static void ExposePointerBase(SCEVHandle &Base, SCEVHandle &Rest,
+ ScalarEvolution &SE) {
+ while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) {
+ Base = A->getStart();
+ Rest = SE.getAddExpr(Rest,
+ SE.getAddRecExpr(SE.getIntegerSCEV(0, A->getType()),
+ A->getStepRecurrence(SE),
+ A->getLoop()));
+ }
+ if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) {
+ Base = A->getOperand(A->getNumOperands()-1);
+ std::vector<SCEVHandle> NewAddOps(A->op_begin(), A->op_end());
+ NewAddOps.back() = Rest;
+ Rest = SE.getAddExpr(NewAddOps);
+ ExposePointerBase(Base, Rest, SE);
+ }
+}
+
+Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ const Loop *L = S->getLoop();
+
+ // {X,+,F} --> X + {0,+,F}
+ if (!S->getStart()->isZero()) {
+ std::vector<SCEVHandle> NewOps(S->getOperands());
+ NewOps[0] = SE.getIntegerSCEV(0, Ty);
+ SCEVHandle Rest = SE.getAddRecExpr(NewOps, L);
+
+ // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
+ // comments on expandAddToGEP for details.
+ if (SE.TD) {
+ SCEVHandle Base = S->getStart();
+ SCEVHandle RestArray[1] = { Rest };
+ // Dig into the expression to find the pointer base for a GEP.
+ ExposePointerBase(Base, RestArray[0], SE);
+ // If we found a pointer, expand the AddRec with a GEP.
+ if (const PointerType *PTy = dyn_cast<PointerType>(Base->getType())) {
+ // Make sure the Base isn't something exotic, such as a multiplied
+ // or divided pointer value. In those cases, the result type isn't
+ // actually a pointer type.
+ if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) {
+ Value *StartV = expand(Base);
+ assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
+ return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV);
+ }
+ }
+ }
+
+ Value *RestV = expand(Rest);
+ return expand(SE.getAddExpr(S->getStart(), SE.getUnknown(RestV)));
+ }
+
+ // {0,+,1} --> Insert a canonical induction variable into the loop!
+ if (S->isAffine() &&
+ S->getOperand(1) == SE.getIntegerSCEV(1, Ty)) {
+ // Create and insert the PHI node for the induction variable in the
+ // specified loop.
+ BasicBlock *Header = L->getHeader();
+ PHINode *PN = PHINode::Create(Ty, "indvar", Header->begin());
+ InsertedValues.insert(PN);
+ PN->addIncoming(Constant::getNullValue(Ty), L->getLoopPreheader());
+
+ pred_iterator HPI = pred_begin(Header);
+ assert(HPI != pred_end(Header) && "Loop with zero preds???");
+ if (!L->contains(*HPI)) ++HPI;
+ assert(HPI != pred_end(Header) && L->contains(*HPI) &&
+ "No backedge in loop?");
+
+ // Insert a unit add instruction right before the terminator corresponding
+ // to the back-edge.
+ Constant *One = ConstantInt::get(Ty, 1);
+ Instruction *Add = BinaryOperator::CreateAdd(PN, One, "indvar.next",
+ (*HPI)->getTerminator());
+ InsertedValues.insert(Add);
+
+ pred_iterator PI = pred_begin(Header);
+ if (*PI == L->getLoopPreheader())
+ ++PI;
+ PN->addIncoming(Add, *PI);
+ return PN;
+ }
+
+ // Get the canonical induction variable I for this loop.
+ Value *I = getOrInsertCanonicalInductionVariable(L, Ty);
+
+ // If this is a simple linear addrec, emit it now as a special case.
+ if (S->isAffine()) { // {0,+,F} --> i*F
+ Value *F = expand(S->getOperand(1));
+ F = InsertNoopCastOfTo(F, Ty);
+
+ // IF the step is by one, just return the inserted IV.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(F))
+ if (CI->getValue() == 1)
+ return I;
+
+ // If the insert point is directly inside of the loop, emit the multiply at
+ // the insert point. Otherwise, L is a loop that is a parent of the insert
+ // point loop. If we can, move the multiply to the outer most loop that it
+ // is safe to be in.
+ BasicBlock::iterator MulInsertPt = getInsertionPoint();
+ Loop *InsertPtLoop = SE.LI->getLoopFor(MulInsertPt->getParent());
+ if (InsertPtLoop != L && InsertPtLoop &&
+ L->contains(InsertPtLoop->getHeader())) {
+ do {
+ // If we cannot hoist the multiply out of this loop, don't.
+ if (!InsertPtLoop->isLoopInvariant(F)) break;
+
+ BasicBlock *InsertPtLoopPH = InsertPtLoop->getLoopPreheader();
+
+ // If this loop hasn't got a preheader, we aren't able to hoist the
+ // multiply.
+ if (!InsertPtLoopPH)
+ break;
+
+ // Otherwise, move the insert point to the preheader.
+ MulInsertPt = InsertPtLoopPH->getTerminator();
+ InsertPtLoop = InsertPtLoop->getParentLoop();
+ } while (InsertPtLoop != L);
+ }
+
+ return InsertBinop(Instruction::Mul, I, F, MulInsertPt);
+ }
+
+ // If this is a chain of recurrences, turn it into a closed form, using the
+ // folders, then expandCodeFor the closed form. This allows the folders to
+ // simplify the expression without having to build a bunch of special code
+ // into this folder.
+ SCEVHandle IH = SE.getUnknown(I); // Get I as a "symbolic" SCEV.
+
+ SCEVHandle V = S->evaluateAtIteration(IH, SE);
+ //cerr << "Evaluated: " << *this << "\n to: " << *V << "\n";
+
+ return expand(V);
+}
+
+Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Value *V = expand(S->getOperand());
+ V = InsertNoopCastOfTo(V, SE.getEffectiveSCEVType(V->getType()));
+ Instruction *I = new TruncInst(V, Ty, "tmp.", InsertPt);
+ InsertedValues.insert(I);
+ return I;
+}
+
+Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Value *V = expand(S->getOperand());
+ V = InsertNoopCastOfTo(V, SE.getEffectiveSCEVType(V->getType()));
+ Instruction *I = new ZExtInst(V, Ty, "tmp.", InsertPt);
+ InsertedValues.insert(I);
+ return I;
+}
+
+Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Value *V = expand(S->getOperand());
+ V = InsertNoopCastOfTo(V, SE.getEffectiveSCEVType(V->getType()));
+ Instruction *I = new SExtInst(V, Ty, "tmp.", InsertPt);
+ InsertedValues.insert(I);
+ return I;
+}
+
+Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Value *LHS = expand(S->getOperand(0));
+ LHS = InsertNoopCastOfTo(LHS, Ty);
+ for (unsigned i = 1; i < S->getNumOperands(); ++i) {
+ Value *RHS = expand(S->getOperand(i));
+ RHS = InsertNoopCastOfTo(RHS, Ty);
+ Instruction *ICmp =
+ new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS, "tmp", InsertPt);
+ InsertedValues.insert(ICmp);
+ Instruction *Sel = SelectInst::Create(ICmp, LHS, RHS, "smax", InsertPt);
+ InsertedValues.insert(Sel);
+ LHS = Sel;
+ }
+ return LHS;
+}
+
+Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Value *LHS = expand(S->getOperand(0));
+ LHS = InsertNoopCastOfTo(LHS, Ty);
+ for (unsigned i = 1; i < S->getNumOperands(); ++i) {
+ Value *RHS = expand(S->getOperand(i));
+ RHS = InsertNoopCastOfTo(RHS, Ty);
+ Instruction *ICmp =
+ new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS, "tmp", InsertPt);
+ InsertedValues.insert(ICmp);
+ Instruction *Sel = SelectInst::Create(ICmp, LHS, RHS, "umax", InsertPt);
+ InsertedValues.insert(Sel);
+ LHS = Sel;
+ }
+ return LHS;
+}
+
+Value *SCEVExpander::expandCodeFor(SCEVHandle SH, const Type *Ty) {
+ // Expand the code for this SCEV.
+ Value *V = expand(SH);
+ if (Ty) {
+ assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
+ "non-trivial casts should be done with the SCEVs directly!");
+ V = InsertNoopCastOfTo(V, Ty);
+ }
+ return V;
+}
+
+Value *SCEVExpander::expand(const SCEV *S) {
+ // Check to see if we already expanded this.
+ std::map<SCEVHandle, AssertingVH<Value> >::iterator I =
+ InsertedExpressions.find(S);
+ if (I != InsertedExpressions.end())
+ return I->second;
+
+ Value *V = visit(S);
+ InsertedExpressions[S] = V;
+ return V;
+}
diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp
new file mode 100644
index 0000000..5433068
--- /dev/null
+++ b/lib/Analysis/SparsePropagation.cpp
@@ -0,0 +1,331 @@
+//===- SparsePropagation.cpp - Sparse Conditional Property Propagation ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an abstract sparse conditional propagation algorithm,
+// modeled after SCCP, but with a customizable lattice function.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sparseprop"
+#include "llvm/Analysis/SparsePropagation.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// AbstractLatticeFunction Implementation
+//===----------------------------------------------------------------------===//
+
+AbstractLatticeFunction::~AbstractLatticeFunction() {}
+
+/// PrintValue - Render the specified lattice value to the specified stream.
+void AbstractLatticeFunction::PrintValue(LatticeVal V, std::ostream &OS) {
+ if (V == UndefVal)
+ OS << "undefined";
+ else if (V == OverdefinedVal)
+ OS << "overdefined";
+ else if (V == UntrackedVal)
+ OS << "untracked";
+ else
+ OS << "unknown lattice value";
+}
+
+//===----------------------------------------------------------------------===//
+// SparseSolver Implementation
+//===----------------------------------------------------------------------===//
+
+/// getOrInitValueState - Return the LatticeVal object that corresponds to the
+/// value, initializing the value's state if it hasn't been entered into the
+/// map yet. This function is necessary because not all values should start
+/// out in the underdefined state... Arguments should be overdefined, and
+/// constants should be marked as constants.
+///
+SparseSolver::LatticeVal SparseSolver::getOrInitValueState(Value *V) {
+ DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(V);
+ if (I != ValueState.end()) return I->second; // Common case, in the map
+
+ LatticeVal LV;
+ if (LatticeFunc->IsUntrackedValue(V))
+ return LatticeFunc->getUntrackedVal();
+ else if (Constant *C = dyn_cast<Constant>(V))
+ LV = LatticeFunc->ComputeConstant(C);
+ else if (Argument *A = dyn_cast<Argument>(V))
+ LV = LatticeFunc->ComputeArgument(A);
+ else if (!isa<Instruction>(V))
+ // All other non-instructions are overdefined.
+ LV = LatticeFunc->getOverdefinedVal();
+ else
+ // All instructions are underdefined by default.
+ LV = LatticeFunc->getUndefVal();
+
+ // If this value is untracked, don't add it to the map.
+ if (LV == LatticeFunc->getUntrackedVal())
+ return LV;
+ return ValueState[V] = LV;
+}
+
+/// UpdateState - When the state for some instruction is potentially updated,
+/// this function notices and adds I to the worklist if needed.
+void SparseSolver::UpdateState(Instruction &Inst, LatticeVal V) {
+ DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(&Inst);
+ if (I != ValueState.end() && I->second == V)
+ return; // No change.
+
+ // An update. Visit uses of I.
+ ValueState[&Inst] = V;
+ InstWorkList.push_back(&Inst);
+}
+
+/// MarkBlockExecutable - This method can be used by clients to mark all of
+/// the blocks that are known to be intrinsically live in the processed unit.
+void SparseSolver::MarkBlockExecutable(BasicBlock *BB) {
+ DOUT << "Marking Block Executable: " << BB->getNameStart() << "\n";
+ BBExecutable.insert(BB); // Basic block is executable!
+ BBWorkList.push_back(BB); // Add the block to the work list!
+}
+
+/// markEdgeExecutable - Mark a basic block as executable, adding it to the BB
+/// work list if it is not already executable...
+void SparseSolver::markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) {
+ if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second)
+ return; // This edge is already known to be executable!
+
+ DOUT << "Marking Edge Executable: " << Source->getNameStart()
+ << " -> " << Dest->getNameStart() << "\n";
+
+ if (BBExecutable.count(Dest)) {
+ // The destination is already executable, but we just made an edge
+ // feasible that wasn't before. Revisit the PHI nodes in the block
+ // because they have potentially new operands.
+ for (BasicBlock::iterator I = Dest->begin(); isa<PHINode>(I); ++I)
+ visitPHINode(*cast<PHINode>(I));
+
+ } else {
+ MarkBlockExecutable(Dest);
+ }
+}
+
+
+/// getFeasibleSuccessors - Return a vector of booleans to indicate which
+/// successors are reachable from a given terminator instruction.
+void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI,
+ SmallVectorImpl<bool> &Succs,
+ bool AggressiveUndef) {
+ Succs.resize(TI.getNumSuccessors());
+ if (TI.getNumSuccessors() == 0) return;
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) {
+ if (BI->isUnconditional()) {
+ Succs[0] = true;
+ return;
+ }
+
+ LatticeVal BCValue;
+ if (AggressiveUndef)
+ BCValue = getOrInitValueState(BI->getCondition());
+ else
+ BCValue = getLatticeState(BI->getCondition());
+
+ if (BCValue == LatticeFunc->getOverdefinedVal() ||
+ BCValue == LatticeFunc->getUntrackedVal()) {
+ // Overdefined condition variables can branch either way.
+ Succs[0] = Succs[1] = true;
+ return;
+ }
+
+ // If undefined, neither is feasible yet.
+ if (BCValue == LatticeFunc->getUndefVal())
+ return;
+
+ Constant *C = LatticeFunc->GetConstant(BCValue, BI->getCondition(), *this);
+ if (C == 0 || !isa<ConstantInt>(C)) {
+ // Non-constant values can go either way.
+ Succs[0] = Succs[1] = true;
+ return;
+ }
+
+ // Constant condition variables mean the branch can only go a single way
+ Succs[C == ConstantInt::getFalse()] = true;
+ return;
+ }
+
+ if (isa<InvokeInst>(TI)) {
+ // Invoke instructions successors are always executable.
+ // TODO: Could ask the lattice function if the value can throw.
+ Succs[0] = Succs[1] = true;
+ return;
+ }
+
+ SwitchInst &SI = cast<SwitchInst>(TI);
+ LatticeVal SCValue;
+ if (AggressiveUndef)
+ SCValue = getOrInitValueState(SI.getCondition());
+ else
+ SCValue = getLatticeState(SI.getCondition());
+
+ if (SCValue == LatticeFunc->getOverdefinedVal() ||
+ SCValue == LatticeFunc->getUntrackedVal()) {
+ // All destinations are executable!
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
+ // If undefined, neither is feasible yet.
+ if (SCValue == LatticeFunc->getUndefVal())
+ return;
+
+ Constant *C = LatticeFunc->GetConstant(SCValue, SI.getCondition(), *this);
+ if (C == 0 || !isa<ConstantInt>(C)) {
+ // All destinations are executable!
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
+ Succs[SI.findCaseValue(cast<ConstantInt>(C))] = true;
+}
+
+
+/// isEdgeFeasible - Return true if the control flow edge from the 'From'
+/// basic block to the 'To' basic block is currently feasible...
+bool SparseSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To,
+ bool AggressiveUndef) {
+ SmallVector<bool, 16> SuccFeasible;
+ TerminatorInst *TI = From->getTerminator();
+ getFeasibleSuccessors(*TI, SuccFeasible, AggressiveUndef);
+
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ if (TI->getSuccessor(i) == To && SuccFeasible[i])
+ return true;
+
+ return false;
+}
+
+void SparseSolver::visitTerminatorInst(TerminatorInst &TI) {
+ SmallVector<bool, 16> SuccFeasible;
+ getFeasibleSuccessors(TI, SuccFeasible, true);
+
+ BasicBlock *BB = TI.getParent();
+
+ // Mark all feasible successors executable...
+ for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i)
+ if (SuccFeasible[i])
+ markEdgeExecutable(BB, TI.getSuccessor(i));
+}
+
+void SparseSolver::visitPHINode(PHINode &PN) {
+ LatticeVal PNIV = getOrInitValueState(&PN);
+ LatticeVal Overdefined = LatticeFunc->getOverdefinedVal();
+
+ // If this value is already overdefined (common) just return.
+ if (PNIV == Overdefined || PNIV == LatticeFunc->getUntrackedVal())
+ return; // Quick exit
+
+ // Super-extra-high-degree PHI nodes are unlikely to ever be interesting,
+ // and slow us down a lot. Just mark them overdefined.
+ if (PN.getNumIncomingValues() > 64) {
+ UpdateState(PN, Overdefined);
+ return;
+ }
+
+ // Look at all of the executable operands of the PHI node. If any of them
+ // are overdefined, the PHI becomes overdefined as well. Otherwise, ask the
+ // transfer function to give us the merge of the incoming values.
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ // If the edge is not yet known to be feasible, it doesn't impact the PHI.
+ if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent(), true))
+ continue;
+
+ // Merge in this value.
+ LatticeVal OpVal = getOrInitValueState(PN.getIncomingValue(i));
+ if (OpVal != PNIV)
+ PNIV = LatticeFunc->MergeValues(PNIV, OpVal);
+
+ if (PNIV == Overdefined)
+ break; // Rest of input values don't matter.
+ }
+
+ // Update the PHI with the compute value, which is the merge of the inputs.
+ UpdateState(PN, PNIV);
+}
+
+
+void SparseSolver::visitInst(Instruction &I) {
+ // PHIs are handled by the propagation logic, they are never passed into the
+ // transfer functions.
+ if (PHINode *PN = dyn_cast<PHINode>(&I))
+ return visitPHINode(*PN);
+
+ // Otherwise, ask the transfer function what the result is. If this is
+ // something that we care about, remember it.
+ LatticeVal IV = LatticeFunc->ComputeInstructionState(I, *this);
+ if (IV != LatticeFunc->getUntrackedVal())
+ UpdateState(I, IV);
+
+ if (TerminatorInst *TI = dyn_cast<TerminatorInst>(&I))
+ visitTerminatorInst(*TI);
+}
+
+void SparseSolver::Solve(Function &F) {
+ MarkBlockExecutable(&F.getEntryBlock());
+
+ // Process the work lists until they are empty!
+ while (!BBWorkList.empty() || !InstWorkList.empty()) {
+ // Process the instruction work list.
+ while (!InstWorkList.empty()) {
+ Instruction *I = InstWorkList.back();
+ InstWorkList.pop_back();
+
+ DOUT << "\nPopped off I-WL: " << *I;
+
+ // "I" got into the work list because it made a transition. See if any
+ // users are both live and in need of updating.
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ Instruction *U = cast<Instruction>(*UI);
+ if (BBExecutable.count(U->getParent())) // Inst is executable?
+ visitInst(*U);
+ }
+ }
+
+ // Process the basic block work list.
+ while (!BBWorkList.empty()) {
+ BasicBlock *BB = BBWorkList.back();
+ BBWorkList.pop_back();
+
+ DOUT << "\nPopped off BBWL: " << *BB;
+
+ // Notify all instructions in this basic block that they are newly
+ // executable.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ visitInst(*I);
+ }
+ }
+}
+
+void SparseSolver::Print(Function &F, std::ostream &OS) const {
+ OS << "\nFUNCTION: " << F.getNameStr() << "\n";
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (!BBExecutable.count(BB))
+ OS << "INFEASIBLE: ";
+ OS << "\t";
+ if (BB->hasName())
+ OS << BB->getNameStr() << ":\n";
+ else
+ OS << "; anon bb\n";
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ LatticeFunc->PrintValue(getLatticeState(I), OS);
+ OS << *I;
+ }
+
+ OS << "\n";
+ }
+}
+
diff --git a/lib/Analysis/Trace.cpp b/lib/Analysis/Trace.cpp
new file mode 100644
index 0000000..8f19fda
--- /dev/null
+++ b/lib/Analysis/Trace.cpp
@@ -0,0 +1,50 @@
+//===- Trace.cpp - Implementation of Trace class --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents a single trace of LLVM basic blocks. A trace is a
+// single entry, multiple exit, region of code that is often hot. Trace-based
+// optimizations treat traces almost like they are a large, strange, basic
+// block: because the trace path is assumed to be hot, optimizations for the
+// fall-through path are made at the expense of the non-fall-through paths.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Trace.h"
+#include "llvm/Function.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/Streams.h"
+using namespace llvm;
+
+Function *Trace::getFunction() const {
+ return getEntryBasicBlock()->getParent();
+}
+
+Module *Trace::getModule() const {
+ return getFunction()->getParent();
+}
+
+/// print - Write trace to output stream.
+///
+void Trace::print(std::ostream &O) const {
+ Function *F = getFunction ();
+ O << "; Trace from function " << F->getName() << ", blocks:\n";
+ for (const_iterator i = begin(), e = end(); i != e; ++i) {
+ O << "; ";
+ WriteAsOperand(O, *i, true, getModule());
+ O << "\n";
+ }
+ O << "; Trace parent function: \n" << *F;
+}
+
+/// dump - Debugger convenience method; writes trace to standard error
+/// output stream.
+///
+void Trace::dump() const {
+ print(cerr);
+}
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
new file mode 100644
index 0000000..29ff8aa
--- /dev/null
+++ b/lib/Analysis/ValueTracking.cpp
@@ -0,0 +1,1079 @@
+//===- ValueTracking.cpp - Walk computations to compute properties --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines that help analyze properties that chains of
+// computations have.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include <cstring>
+using namespace llvm;
+
+/// getOpcode - If this is an Instruction or a ConstantExpr, return the
+/// opcode value. Otherwise return UserOp1.
+static unsigned getOpcode(const Value *V) {
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ return I->getOpcode();
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ return CE->getOpcode();
+ // Use UserOp1 to mean there's no opcode.
+ return Instruction::UserOp1;
+}
+
+
+/// ComputeMaskedBits - Determine which of the bits specified in Mask are
+/// known to be either zero or one and return them in the KnownZero/KnownOne
+/// bit sets. This code only analyzes bits in Mask, in order to short-circuit
+/// processing.
+/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that
+/// we cannot optimize based on the assumption that it is zero without changing
+/// it to be an explicit zero. If we don't change it to zero, other code could
+/// optimized based on the contradictory assumption that it is non-zero.
+/// Because instcombine aggressively folds operations with undef args anyway,
+/// this won't lose us code quality.
+void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
+ APInt &KnownZero, APInt &KnownOne,
+ TargetData *TD, unsigned Depth) {
+ const unsigned MaxDepth = 6;
+ assert(V && "No Value?");
+ assert(Depth <= MaxDepth && "Limit Search Depth");
+ unsigned BitWidth = Mask.getBitWidth();
+ assert((V->getType()->isInteger() || isa<PointerType>(V->getType())) &&
+ "Not integer or pointer type!");
+ assert((!TD || TD->getTypeSizeInBits(V->getType()) == BitWidth) &&
+ (!isa<IntegerType>(V->getType()) ||
+ V->getType()->getPrimitiveSizeInBits() == BitWidth) &&
+ KnownZero.getBitWidth() == BitWidth &&
+ KnownOne.getBitWidth() == BitWidth &&
+ "V, Mask, KnownOne and KnownZero should have same BitWidth");
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ // We know all of the bits for a constant!
+ KnownOne = CI->getValue() & Mask;
+ KnownZero = ~KnownOne & Mask;
+ return;
+ }
+ // Null is all-zeros.
+ if (isa<ConstantPointerNull>(V)) {
+ KnownOne.clear();
+ KnownZero = Mask;
+ return;
+ }
+ // The address of an aligned GlobalValue has trailing zeros.
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ unsigned Align = GV->getAlignment();
+ if (Align == 0 && TD && GV->getType()->getElementType()->isSized())
+ Align = TD->getPrefTypeAlignment(GV->getType()->getElementType());
+ if (Align > 0)
+ KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
+ CountTrailingZeros_32(Align));
+ else
+ KnownZero.clear();
+ KnownOne.clear();
+ return;
+ }
+
+ KnownZero.clear(); KnownOne.clear(); // Start out not knowing anything.
+
+ if (Depth == MaxDepth || Mask == 0)
+ return; // Limit search depth.
+
+ User *I = dyn_cast<User>(V);
+ if (!I) return;
+
+ APInt KnownZero2(KnownZero), KnownOne2(KnownOne);
+ switch (getOpcode(I)) {
+ default: break;
+ case Instruction::And: {
+ // If either the LHS or the RHS are Zero, the result is zero.
+ ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1);
+ APInt Mask2(Mask & ~KnownZero);
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne &= KnownOne2;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero |= KnownZero2;
+ return;
+ }
+ case Instruction::Or: {
+ ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1);
+ APInt Mask2(Mask & ~KnownOne);
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero &= KnownZero2;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne |= KnownOne2;
+ return;
+ }
+ case Instruction::Xor: {
+ ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+ KnownZero = KnownZeroOut;
+ return;
+ }
+ case Instruction::Mul: {
+ APInt Mask2 = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, TD,Depth+1);
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If low bits are zero in either operand, output low known-0 bits.
+ // Also compute a conserative estimate for high known-0 bits.
+ // More trickiness is possible, but this is sufficient for the
+ // interesting case of alignment computation.
+ KnownOne.clear();
+ unsigned TrailZ = KnownZero.countTrailingOnes() +
+ KnownZero2.countTrailingOnes();
+ unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
+ KnownZero2.countLeadingOnes(),
+ BitWidth) - BitWidth;
+
+ TrailZ = std::min(TrailZ, BitWidth);
+ LeadZ = std::min(LeadZ, BitWidth);
+ KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+ APInt::getHighBitsSet(BitWidth, LeadZ);
+ KnownZero &= Mask;
+ return;
+ }
+ case Instruction::UDiv: {
+ // For the purposes of computing leading zeros we can conservatively
+ // treat a udiv as a logical right shift by the power of 2 known to
+ // be less than the denominator.
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(I->getOperand(0),
+ AllOnes, KnownZero2, KnownOne2, TD, Depth+1);
+ unsigned LeadZ = KnownZero2.countLeadingOnes();
+
+ KnownOne2.clear();
+ KnownZero2.clear();
+ ComputeMaskedBits(I->getOperand(1),
+ AllOnes, KnownZero2, KnownOne2, TD, Depth+1);
+ unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
+ if (RHSUnknownLeadingOnes != BitWidth)
+ LeadZ = std::min(BitWidth,
+ LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
+
+ KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask;
+ return;
+ }
+ case Instruction::Select:
+ ComputeMaskedBits(I->getOperand(2), Mask, KnownZero, KnownOne, TD, Depth+1);
+ ComputeMaskedBits(I->getOperand(1), Mask, KnownZero2, KnownOne2, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ return;
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ return; // Can't work with floating point.
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ // We can't handle these if we don't know the pointer size.
+ if (!TD) return;
+ // FALL THROUGH and handle them the same as zext/trunc.
+ case Instruction::ZExt:
+ case Instruction::Trunc: {
+ // Note that we handle pointer operands here because of inttoptr/ptrtoint
+ // which fall through here.
+ const Type *SrcTy = I->getOperand(0)->getType();
+ unsigned SrcBitWidth = TD ?
+ TD->getTypeSizeInBits(SrcTy) :
+ SrcTy->getPrimitiveSizeInBits();
+ APInt MaskIn(Mask);
+ MaskIn.zextOrTrunc(SrcBitWidth);
+ KnownZero.zextOrTrunc(SrcBitWidth);
+ KnownOne.zextOrTrunc(SrcBitWidth);
+ ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD,
+ Depth+1);
+ KnownZero.zextOrTrunc(BitWidth);
+ KnownOne.zextOrTrunc(BitWidth);
+ // Any top bits are known to be zero.
+ if (BitWidth > SrcBitWidth)
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+ return;
+ }
+ case Instruction::BitCast: {
+ const Type *SrcTy = I->getOperand(0)->getType();
+ if (SrcTy->isInteger() || isa<PointerType>(SrcTy)) {
+ ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, TD,
+ Depth+1);
+ return;
+ }
+ break;
+ }
+ case Instruction::SExt: {
+ // Compute the bits in the result that are not present in the input.
+ const IntegerType *SrcTy = cast<IntegerType>(I->getOperand(0)->getType());
+ unsigned SrcBitWidth = SrcTy->getBitWidth();
+
+ APInt MaskIn(Mask);
+ MaskIn.trunc(SrcBitWidth);
+ KnownZero.trunc(SrcBitWidth);
+ KnownOne.trunc(SrcBitWidth);
+ ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero.zext(BitWidth);
+ KnownOne.zext(BitWidth);
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+ if (KnownZero[SrcBitWidth-1]) // Input sign bit known zero
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+ else if (KnownOne[SrcBitWidth-1]) // Input sign bit known set
+ KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+ return;
+ }
+ case Instruction::Shl:
+ // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+ APInt Mask2(Mask.lshr(ShiftAmt));
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero <<= ShiftAmt;
+ KnownOne <<= ShiftAmt;
+ KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0
+ return;
+ }
+ break;
+ case Instruction::LShr:
+ // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // Compute the new bits that are at the top now.
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+
+ // Unsigned shift right.
+ APInt Mask2(Mask.shl(ShiftAmt));
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+ KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+ KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
+ // high bits known zero.
+ KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+ return;
+ }
+ break;
+ case Instruction::AShr:
+ // (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // Compute the new bits that are at the top now.
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+
+ // Signed shift right.
+ APInt Mask2(Mask.shl(ShiftAmt));
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+ KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+ KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
+
+ APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
+ if (KnownZero[BitWidth-ShiftAmt-1]) // New bits are known zero.
+ KnownZero |= HighBits;
+ else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one.
+ KnownOne |= HighBits;
+ return;
+ }
+ break;
+ case Instruction::Sub: {
+ if (ConstantInt *CLHS = dyn_cast<ConstantInt>(I->getOperand(0))) {
+ // We know that the top bits of C-X are clear if X contains less bits
+ // than C (i.e. no wrap-around can happen). For example, 20-X is
+ // positive if we can prove that X is >= 0 and < 16.
+ if (!CLHS->getValue().isNegative()) {
+ unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
+ // NLZ can't be BitWidth with no sign bit
+ APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
+ ComputeMaskedBits(I->getOperand(1), MaskV, KnownZero2, KnownOne2,
+ TD, Depth+1);
+
+ // If all of the MaskV bits are known to be zero, then we know the
+ // output top bits are zero, because we now know that the output is
+ // from [0-C].
+ if ((KnownZero2 & MaskV) == MaskV) {
+ unsigned NLZ2 = CLHS->getValue().countLeadingZeros();
+ // Top bits known zero.
+ KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
+ }
+ }
+ }
+ }
+ // fall through
+ case Instruction::Add: {
+ // If one of the operands has trailing zeros, than the bits that the
+ // other operand has in those bit positions will be preserved in the
+ // result. For an add, this works with either operand. For a subtract,
+ // this only works if the known zeros are in the right operand.
+ APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+ APInt Mask2 = APInt::getLowBitsSet(BitWidth,
+ BitWidth - Mask.countLeadingZeros());
+ ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD,
+ Depth+1);
+ assert((LHSKnownZero & LHSKnownOne) == 0 &&
+ "Bits known to be one AND zero?");
+ unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes();
+
+ ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero2, KnownOne2, TD,
+ Depth+1);
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes();
+
+ // Determine which operand has more trailing zeros, and use that
+ // many bits from the other operand.
+ if (LHSKnownZeroOut > RHSKnownZeroOut) {
+ if (getOpcode(I) == Instruction::Add) {
+ APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut);
+ KnownZero |= KnownZero2 & Mask;
+ KnownOne |= KnownOne2 & Mask;
+ } else {
+ // If the known zeros are in the left operand for a subtract,
+ // fall back to the minimum known zeros in both operands.
+ KnownZero |= APInt::getLowBitsSet(BitWidth,
+ std::min(LHSKnownZeroOut,
+ RHSKnownZeroOut));
+ }
+ } else if (RHSKnownZeroOut >= LHSKnownZeroOut) {
+ APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut);
+ KnownZero |= LHSKnownZero & Mask;
+ KnownOne |= LHSKnownOne & Mask;
+ }
+ return;
+ }
+ case Instruction::SRem:
+ if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ APInt RA = Rem->getValue();
+ if (RA.isPowerOf2() || (-RA).isPowerOf2()) {
+ APInt LowBits = RA.isStrictlyPositive() ? (RA - 1) : ~RA;
+ APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
+ Depth+1);
+
+ // If the sign bit of the first operand is zero, the sign bit of
+ // the result is zero. If the first operand has no one bits below
+ // the second operand's single 1 bit, its sign will be zero.
+ if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
+ KnownZero2 |= ~LowBits;
+
+ KnownZero |= KnownZero2 & Mask;
+
+ assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+ }
+ }
+ break;
+ case Instruction::URem: {
+ if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ APInt RA = Rem->getValue();
+ if (RA.isPowerOf2()) {
+ APInt LowBits = (RA - 1);
+ APInt Mask2 = LowBits & Mask;
+ KnownZero |= ~LowBits & Mask;
+ ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+ break;
+ }
+ }
+
+ // Since the result is less than or equal to either operand, any leading
+ // zero bits in either operand must also exist in the result.
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(I->getOperand(0), AllOnes, KnownZero, KnownOne,
+ TD, Depth+1);
+ ComputeMaskedBits(I->getOperand(1), AllOnes, KnownZero2, KnownOne2,
+ TD, Depth+1);
+
+ unsigned Leaders = std::max(KnownZero.countLeadingOnes(),
+ KnownZero2.countLeadingOnes());
+ KnownOne.clear();
+ KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
+ break;
+ }
+
+ case Instruction::Alloca:
+ case Instruction::Malloc: {
+ AllocationInst *AI = cast<AllocationInst>(V);
+ unsigned Align = AI->getAlignment();
+ if (Align == 0 && TD) {
+ if (isa<AllocaInst>(AI))
+ Align = TD->getABITypeAlignment(AI->getType()->getElementType());
+ else if (isa<MallocInst>(AI)) {
+ // Malloc returns maximally aligned memory.
+ Align = TD->getABITypeAlignment(AI->getType()->getElementType());
+ Align =
+ std::max(Align,
+ (unsigned)TD->getABITypeAlignment(Type::DoubleTy));
+ Align =
+ std::max(Align,
+ (unsigned)TD->getABITypeAlignment(Type::Int64Ty));
+ }
+ }
+
+ if (Align > 0)
+ KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
+ CountTrailingZeros_32(Align));
+ break;
+ }
+ case Instruction::GetElementPtr: {
+ // Analyze all of the subscripts of this getelementptr instruction
+ // to determine if we can prove known low zero bits.
+ APInt LocalMask = APInt::getAllOnesValue(BitWidth);
+ APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0);
+ ComputeMaskedBits(I->getOperand(0), LocalMask,
+ LocalKnownZero, LocalKnownOne, TD, Depth+1);
+ unsigned TrailZ = LocalKnownZero.countTrailingOnes();
+
+ gep_type_iterator GTI = gep_type_begin(I);
+ for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
+ Value *Index = I->getOperand(i);
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ // Handle struct member offset arithmetic.
+ if (!TD) return;
+ const StructLayout *SL = TD->getStructLayout(STy);
+ unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
+ uint64_t Offset = SL->getElementOffset(Idx);
+ TrailZ = std::min(TrailZ,
+ CountTrailingZeros_64(Offset));
+ } else {
+ // Handle array index arithmetic.
+ const Type *IndexedTy = GTI.getIndexedType();
+ if (!IndexedTy->isSized()) return;
+ unsigned GEPOpiBits = Index->getType()->getPrimitiveSizeInBits();
+ uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1;
+ LocalMask = APInt::getAllOnesValue(GEPOpiBits);
+ LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
+ ComputeMaskedBits(Index, LocalMask,
+ LocalKnownZero, LocalKnownOne, TD, Depth+1);
+ TrailZ = std::min(TrailZ,
+ unsigned(CountTrailingZeros_64(TypeSize) +
+ LocalKnownZero.countTrailingOnes()));
+ }
+ }
+
+ KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) & Mask;
+ break;
+ }
+ case Instruction::PHI: {
+ PHINode *P = cast<PHINode>(I);
+ // Handle the case of a simple two-predecessor recurrence PHI.
+ // There's a lot more that could theoretically be done here, but
+ // this is sufficient to catch some interesting cases.
+ if (P->getNumIncomingValues() == 2) {
+ for (unsigned i = 0; i != 2; ++i) {
+ Value *L = P->getIncomingValue(i);
+ Value *R = P->getIncomingValue(!i);
+ User *LU = dyn_cast<User>(L);
+ if (!LU)
+ continue;
+ unsigned Opcode = getOpcode(LU);
+ // Check for operations that have the property that if
+ // both their operands have low zero bits, the result
+ // will have low zero bits.
+ if (Opcode == Instruction::Add ||
+ Opcode == Instruction::Sub ||
+ Opcode == Instruction::And ||
+ Opcode == Instruction::Or ||
+ Opcode == Instruction::Mul) {
+ Value *LL = LU->getOperand(0);
+ Value *LR = LU->getOperand(1);
+ // Find a recurrence.
+ if (LL == I)
+ L = LR;
+ else if (LR == I)
+ L = LL;
+ else
+ break;
+ // Ok, we have a PHI of the form L op= R. Check for low
+ // zero bits.
+ APInt Mask2 = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(R, Mask2, KnownZero2, KnownOne2, TD, Depth+1);
+ Mask2 = APInt::getLowBitsSet(BitWidth,
+ KnownZero2.countTrailingOnes());
+
+ // We need to take the minimum number of known bits
+ APInt KnownZero3(KnownZero), KnownOne3(KnownOne);
+ ComputeMaskedBits(L, Mask2, KnownZero3, KnownOne3, TD, Depth+1);
+
+ KnownZero = Mask &
+ APInt::getLowBitsSet(BitWidth,
+ std::min(KnownZero2.countTrailingOnes(),
+ KnownZero3.countTrailingOnes()));
+ break;
+ }
+ }
+ }
+
+ // Otherwise take the unions of the known bit sets of the operands,
+ // taking conservative care to avoid excessive recursion.
+ if (Depth < MaxDepth - 1 && !KnownZero && !KnownOne) {
+ KnownZero = APInt::getAllOnesValue(BitWidth);
+ KnownOne = APInt::getAllOnesValue(BitWidth);
+ for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) {
+ // Skip direct self references.
+ if (P->getIncomingValue(i) == P) continue;
+
+ KnownZero2 = APInt(BitWidth, 0);
+ KnownOne2 = APInt(BitWidth, 0);
+ // Recurse, but cap the recursion to one level, because we don't
+ // want to waste time spinning around in loops.
+ ComputeMaskedBits(P->getIncomingValue(i), KnownZero | KnownOne,
+ KnownZero2, KnownOne2, TD, MaxDepth-1);
+ KnownZero &= KnownZero2;
+ KnownOne &= KnownOne2;
+ // If all bits have been ruled out, there's no need to check
+ // more operands.
+ if (!KnownZero && !KnownOne)
+ break;
+ }
+ }
+ break;
+ }
+ case Instruction::Call:
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::ctpop:
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz: {
+ unsigned LowBits = Log2_32(BitWidth)+1;
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+ break;
+ }
+ }
+ }
+ break;
+ }
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
+/// this predicate to simplify operations downstream. Mask is known to be zero
+/// for bits that V cannot have.
+bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
+ TargetData *TD, unsigned Depth) {
+ APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
+ ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ return (KnownZero & Mask) == Mask;
+}
+
+
+
+/// ComputeNumSignBits - Return the number of times the sign bit of the
+/// register is replicated into the other bits. We know that at least 1 bit
+/// is always equal to the sign bit (itself), but other cases can give us
+/// information. For example, immediately after an "ashr X, 2", we know that
+/// the top 3 bits are all equal to each other, so we return 3.
+///
+/// 'Op' must have a scalar integer type.
+///
+unsigned llvm::ComputeNumSignBits(Value *V, TargetData *TD, unsigned Depth) {
+ const IntegerType *Ty = cast<IntegerType>(V->getType());
+ unsigned TyBits = Ty->getBitWidth();
+ unsigned Tmp, Tmp2;
+ unsigned FirstAnswer = 1;
+
+ // Note that ConstantInt is handled by the general ComputeMaskedBits case
+ // below.
+
+ if (Depth == 6)
+ return 1; // Limit search depth.
+
+ User *U = dyn_cast<User>(V);
+ switch (getOpcode(V)) {
+ default: break;
+ case Instruction::SExt:
+ Tmp = TyBits-cast<IntegerType>(U->getOperand(0)->getType())->getBitWidth();
+ return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp;
+
+ case Instruction::AShr:
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ // ashr X, C -> adds C sign bits.
+ if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ Tmp += C->getZExtValue();
+ if (Tmp > TyBits) Tmp = TyBits;
+ }
+ return Tmp;
+ case Instruction::Shl:
+ if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ // shl destroys sign bits.
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ if (C->getZExtValue() >= TyBits || // Bad shift.
+ C->getZExtValue() >= Tmp) break; // Shifted all sign bits out.
+ return Tmp - C->getZExtValue();
+ }
+ break;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: // NOT is handled here.
+ // Logical binary ops preserve the number of sign bits at the worst.
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ if (Tmp != 1) {
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ FirstAnswer = std::min(Tmp, Tmp2);
+ // We computed what we know about the sign bits as our first
+ // answer. Now proceed to the generic code that uses
+ // ComputeMaskedBits, and pick whichever answer is better.
+ }
+ break;
+
+ case Instruction::Select:
+ Tmp = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1);
+ return std::min(Tmp, Tmp2);
+
+ case Instruction::Add:
+ // Add can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+
+ // Special case decrementing a value (ADD X, -1):
+ if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(1)))
+ if (CRHS->isAllOnesValue()) {
+ APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
+ APInt Mask = APInt::getAllOnesValue(TyBits);
+ ComputeMaskedBits(U->getOperand(0), Mask, KnownZero, KnownOne, TD,
+ Depth+1);
+
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero | APInt(TyBits, 1)) == Mask)
+ return TyBits;
+
+ // If we are subtracting one from a positive number, there is no carry
+ // out of the result.
+ if (KnownZero.isNegative())
+ return Tmp;
+ }
+
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ if (Tmp2 == 1) return 1;
+ return std::min(Tmp, Tmp2)-1;
+ break;
+
+ case Instruction::Sub:
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ if (Tmp2 == 1) return 1;
+
+ // Handle NEG.
+ if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0)))
+ if (CLHS->isNullValue()) {
+ APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
+ APInt Mask = APInt::getAllOnesValue(TyBits);
+ ComputeMaskedBits(U->getOperand(1), Mask, KnownZero, KnownOne,
+ TD, Depth+1);
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero | APInt(TyBits, 1)) == Mask)
+ return TyBits;
+
+ // If the input is known to be positive (the sign bit is known clear),
+ // the output of the NEG has the same number of sign bits as the input.
+ if (KnownZero.isNegative())
+ return Tmp2;
+
+ // Otherwise, we treat this like a SUB.
+ }
+
+ // Sub can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ return std::min(Tmp, Tmp2)-1;
+ break;
+ case Instruction::Trunc:
+ // FIXME: it's tricky to do anything useful for this, but it is an important
+ // case for targets like X86.
+ break;
+ }
+
+ // Finally, if we can prove that the top bits of the result are 0's or 1's,
+ // use this information.
+ APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
+ APInt Mask = APInt::getAllOnesValue(TyBits);
+ ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+
+ if (KnownZero.isNegative()) { // sign bit is 0
+ Mask = KnownZero;
+ } else if (KnownOne.isNegative()) { // sign bit is 1;
+ Mask = KnownOne;
+ } else {
+ // Nothing known.
+ return FirstAnswer;
+ }
+
+ // Okay, we know that the sign bit in Mask is set. Use CLZ to determine
+ // the number of identical bits in the top of the input value.
+ Mask = ~Mask;
+ Mask <<= Mask.getBitWidth()-TyBits;
+ // Return # leading zeros. We use 'min' here in case Val was zero before
+ // shifting. We don't want to return '64' as for an i32 "0".
+ return std::max(FirstAnswer, std::min(TyBits, Mask.countLeadingZeros()));
+}
+
+/// CannotBeNegativeZero - Return true if we can prove that the specified FP
+/// value is never equal to -0.0.
+///
+/// NOTE: this function will need to be revisited when we support non-default
+/// rounding modes!
+///
+bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
+ return !CFP->getValueAPF().isNegZero();
+
+ if (Depth == 6)
+ return 1; // Limit search depth.
+
+ const Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return false;
+
+ // (add x, 0.0) is guaranteed to return +0.0, not -0.0.
+ if (I->getOpcode() == Instruction::Add &&
+ isa<ConstantFP>(I->getOperand(1)) &&
+ cast<ConstantFP>(I->getOperand(1))->isNullValue())
+ return true;
+
+ // sitofp and uitofp turn into +0.0 for zero.
+ if (isa<SIToFPInst>(I) || isa<UIToFPInst>(I))
+ return true;
+
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ // sqrt(-0.0) = -0.0, no other negative results are possible.
+ if (II->getIntrinsicID() == Intrinsic::sqrt)
+ return CannotBeNegativeZero(II->getOperand(1), Depth+1);
+
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (const Function *F = CI->getCalledFunction()) {
+ if (F->isDeclaration()) {
+ switch (F->getNameLen()) {
+ case 3: // abs(x) != -0.0
+ if (!strcmp(F->getNameStart(), "abs")) return true;
+ break;
+ case 4: // abs[lf](x) != -0.0
+ if (!strcmp(F->getNameStart(), "absf")) return true;
+ if (!strcmp(F->getNameStart(), "absl")) return true;
+ break;
+ }
+ }
+ }
+
+ return false;
+}
+
+// This is the recursive version of BuildSubAggregate. It takes a few different
+// arguments. Idxs is the index within the nested struct From that we are
+// looking at now (which is of type IndexedType). IdxSkip is the number of
+// indices from Idxs that should be left out when inserting into the resulting
+// struct. To is the result struct built so far, new insertvalue instructions
+// build on that.
+Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
+ SmallVector<unsigned, 10> &Idxs,
+ unsigned IdxSkip,
+ Instruction *InsertBefore) {
+ const llvm::StructType *STy = llvm::dyn_cast<llvm::StructType>(IndexedType);
+ if (STy) {
+ // Save the original To argument so we can modify it
+ Value *OrigTo = To;
+ // General case, the type indexed by Idxs is a struct
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ // Process each struct element recursively
+ Idxs.push_back(i);
+ Value *PrevTo = To;
+ To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip,
+ InsertBefore);
+ Idxs.pop_back();
+ if (!To) {
+ // Couldn't find any inserted value for this index? Cleanup
+ while (PrevTo != OrigTo) {
+ InsertValueInst* Del = cast<InsertValueInst>(PrevTo);
+ PrevTo = Del->getAggregateOperand();
+ Del->eraseFromParent();
+ }
+ // Stop processing elements
+ break;
+ }
+ }
+ // If we succesfully found a value for each of our subaggregates
+ if (To)
+ return To;
+ }
+ // Base case, the type indexed by SourceIdxs is not a struct, or not all of
+ // the struct's elements had a value that was inserted directly. In the latter
+ // case, perhaps we can't determine each of the subelements individually, but
+ // we might be able to find the complete struct somewhere.
+
+ // Find the value that is at that particular spot
+ Value *V = FindInsertedValue(From, Idxs.begin(), Idxs.end());
+
+ if (!V)
+ return NULL;
+
+ // Insert the value in the new (sub) aggregrate
+ return llvm::InsertValueInst::Create(To, V, Idxs.begin() + IdxSkip,
+ Idxs.end(), "tmp", InsertBefore);
+}
+
+// This helper takes a nested struct and extracts a part of it (which is again a
+// struct) into a new value. For example, given the struct:
+// { a, { b, { c, d }, e } }
+// and the indices "1, 1" this returns
+// { c, d }.
+//
+// It does this by inserting an insertvalue for each element in the resulting
+// struct, as opposed to just inserting a single struct. This will only work if
+// each of the elements of the substruct are known (ie, inserted into From by an
+// insertvalue instruction somewhere).
+//
+// All inserted insertvalue instructions are inserted before InsertBefore
+Value *BuildSubAggregate(Value *From, const unsigned *idx_begin,
+ const unsigned *idx_end, Instruction *InsertBefore) {
+ assert(InsertBefore && "Must have someplace to insert!");
+ const Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(),
+ idx_begin,
+ idx_end);
+ Value *To = UndefValue::get(IndexedType);
+ SmallVector<unsigned, 10> Idxs(idx_begin, idx_end);
+ unsigned IdxSkip = Idxs.size();
+
+ return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
+}
+
+/// FindInsertedValue - Given an aggregrate and an sequence of indices, see if
+/// the scalar value indexed is already around as a register, for example if it
+/// were inserted directly into the aggregrate.
+///
+/// If InsertBefore is not null, this function will duplicate (modified)
+/// insertvalues when a part of a nested struct is extracted.
+Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
+ const unsigned *idx_end, Instruction *InsertBefore) {
+ // Nothing to index? Just return V then (this is useful at the end of our
+ // recursion)
+ if (idx_begin == idx_end)
+ return V;
+ // We have indices, so V should have an indexable type
+ assert((isa<StructType>(V->getType()) || isa<ArrayType>(V->getType()))
+ && "Not looking at a struct or array?");
+ assert(ExtractValueInst::getIndexedType(V->getType(), idx_begin, idx_end)
+ && "Invalid indices for type?");
+ const CompositeType *PTy = cast<CompositeType>(V->getType());
+
+ if (isa<UndefValue>(V))
+ return UndefValue::get(ExtractValueInst::getIndexedType(PTy,
+ idx_begin,
+ idx_end));
+ else if (isa<ConstantAggregateZero>(V))
+ return Constant::getNullValue(ExtractValueInst::getIndexedType(PTy,
+ idx_begin,
+ idx_end));
+ else if (Constant *C = dyn_cast<Constant>(V)) {
+ if (isa<ConstantArray>(C) || isa<ConstantStruct>(C))
+ // Recursively process this constant
+ return FindInsertedValue(C->getOperand(*idx_begin), idx_begin + 1, idx_end,
+ InsertBefore);
+ } else if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
+ // Loop the indices for the insertvalue instruction in parallel with the
+ // requested indices
+ const unsigned *req_idx = idx_begin;
+ for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
+ i != e; ++i, ++req_idx) {
+ if (req_idx == idx_end) {
+ if (InsertBefore)
+ // The requested index identifies a part of a nested aggregate. Handle
+ // this specially. For example,
+ // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
+ // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
+ // %C = extractvalue {i32, { i32, i32 } } %B, 1
+ // This can be changed into
+ // %A = insertvalue {i32, i32 } undef, i32 10, 0
+ // %C = insertvalue {i32, i32 } %A, i32 11, 1
+ // which allows the unused 0,0 element from the nested struct to be
+ // removed.
+ return BuildSubAggregate(V, idx_begin, req_idx, InsertBefore);
+ else
+ // We can't handle this without inserting insertvalues
+ return 0;
+ }
+
+ // This insert value inserts something else than what we are looking for.
+ // See if the (aggregrate) value inserted into has the value we are
+ // looking for, then.
+ if (*req_idx != *i)
+ return FindInsertedValue(I->getAggregateOperand(), idx_begin, idx_end,
+ InsertBefore);
+ }
+ // If we end up here, the indices of the insertvalue match with those
+ // requested (though possibly only partially). Now we recursively look at
+ // the inserted value, passing any remaining indices.
+ return FindInsertedValue(I->getInsertedValueOperand(), req_idx, idx_end,
+ InsertBefore);
+ } else if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
+ // If we're extracting a value from an aggregrate that was extracted from
+ // something else, we can extract from that something else directly instead.
+ // However, we will need to chain I's indices with the requested indices.
+
+ // Calculate the number of indices required
+ unsigned size = I->getNumIndices() + (idx_end - idx_begin);
+ // Allocate some space to put the new indices in
+ SmallVector<unsigned, 5> Idxs;
+ Idxs.reserve(size);
+ // Add indices from the extract value instruction
+ for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
+ i != e; ++i)
+ Idxs.push_back(*i);
+
+ // Add requested indices
+ for (const unsigned *i = idx_begin, *e = idx_end; i != e; ++i)
+ Idxs.push_back(*i);
+
+ assert(Idxs.size() == size
+ && "Number of indices added not correct?");
+
+ return FindInsertedValue(I->getAggregateOperand(), Idxs.begin(), Idxs.end(),
+ InsertBefore);
+ }
+ // Otherwise, we don't know (such as, extracting from a function return value
+ // or load instruction)
+ return 0;
+}
+
+/// GetConstantStringInfo - This function computes the length of a
+/// null-terminated C string pointed to by V. If successful, it returns true
+/// and returns the string in Str. If unsuccessful, it returns false.
+bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
+ bool StopAtNul) {
+ // If V is NULL then return false;
+ if (V == NULL) return false;
+
+ // Look through bitcast instructions.
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+ return GetConstantStringInfo(BCI->getOperand(0), Str, Offset, StopAtNul);
+
+ // If the value is not a GEP instruction nor a constant expression with a
+ // GEP instruction, then return false because ConstantArray can't occur
+ // any other way
+ User *GEP = 0;
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
+ GEP = GEPI;
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (CE->getOpcode() == Instruction::BitCast)
+ return GetConstantStringInfo(CE->getOperand(0), Str, Offset, StopAtNul);
+ if (CE->getOpcode() != Instruction::GetElementPtr)
+ return false;
+ GEP = CE;
+ }
+
+ if (GEP) {
+ // Make sure the GEP has exactly three arguments.
+ if (GEP->getNumOperands() != 3)
+ return false;
+
+ // Make sure the index-ee is a pointer to array of i8.
+ const PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType());
+ const ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType());
+ if (AT == 0 || AT->getElementType() != Type::Int8Ty)
+ return false;
+
+ // Check to make sure that the first operand of the GEP is an integer and
+ // has value 0 so that we are sure we're indexing into the initializer.
+ ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
+ if (FirstIdx == 0 || !FirstIdx->isZero())
+ return false;
+
+ // If the second index isn't a ConstantInt, then this is a variable index
+ // into the array. If this occurs, we can't say anything meaningful about
+ // the string.
+ uint64_t StartIdx = 0;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
+ StartIdx = CI->getZExtValue();
+ else
+ return false;
+ return GetConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset,
+ StopAtNul);
+ }
+
+ // The GEP instruction, constant or instruction, must reference a global
+ // variable that is a constant and is initialized. The referenced constant
+ // initializer is the array that we'll use for optimization.
+ GlobalVariable* GV = dyn_cast<GlobalVariable>(V);
+ if (!GV || !GV->isConstant() || !GV->hasInitializer())
+ return false;
+ Constant *GlobalInit = GV->getInitializer();
+
+ // Handle the ConstantAggregateZero case
+ if (isa<ConstantAggregateZero>(GlobalInit)) {
+ // This is a degenerate case. The initializer is constant zero so the
+ // length of the string must be zero.
+ Str.clear();
+ return true;
+ }
+
+ // Must be a Constant Array
+ ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
+ if (Array == 0 || Array->getType()->getElementType() != Type::Int8Ty)
+ return false;
+
+ // Get the number of elements in the array
+ uint64_t NumElts = Array->getType()->getNumElements();
+
+ if (Offset > NumElts)
+ return false;
+
+ // Traverse the constant array from 'Offset' which is the place the GEP refers
+ // to in the array.
+ Str.reserve(NumElts-Offset);
+ for (unsigned i = Offset; i != NumElts; ++i) {
+ Constant *Elt = Array->getOperand(i);
+ ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
+ if (!CI) // This array isn't suitable, non-int initializer.
+ return false;
+ if (StopAtNul && CI->isZero())
+ return true; // we found end of string, success!
+ Str += (char)CI->getZExtValue();
+ }
+
+ // The array isn't null terminated, but maybe this is a memcpy, not a strcpy.
+ return true;
+}
diff --git a/lib/Archive/Archive.cpp b/lib/Archive/Archive.cpp
new file mode 100644
index 0000000..c6c89d2
--- /dev/null
+++ b/lib/Archive/Archive.cpp
@@ -0,0 +1,266 @@
+//===-- Archive.cpp - Generic LLVM archive functions ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the Archive and ArchiveMember
+// classes that is common to both reading and writing archives..
+//
+//===----------------------------------------------------------------------===//
+
+#include "ArchiveInternals.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/ModuleProvider.h"
+#include "llvm/Module.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/System/Process.h"
+#include <memory>
+#include <cstring>
+using namespace llvm;
+
+// getMemberSize - compute the actual physical size of the file member as seen
+// on disk. This isn't the size of member's payload. Use getSize() for that.
+unsigned
+ArchiveMember::getMemberSize() const {
+ // Basically its the file size plus the header size
+ unsigned result = info.fileSize + sizeof(ArchiveMemberHeader);
+
+ // If it has a long filename, include the name length
+ if (hasLongFilename())
+ result += path.toString().length() + 1;
+
+ // If its now odd lengthed, include the padding byte
+ if (result % 2 != 0 )
+ result++;
+
+ return result;
+}
+
+// This default constructor is only use by the ilist when it creates its
+// sentry node. We give it specific static values to make it stand out a bit.
+ArchiveMember::ArchiveMember()
+ : parent(0), path("--invalid--"), flags(0), data(0)
+{
+ info.user = sys::Process::GetCurrentUserId();
+ info.group = sys::Process::GetCurrentGroupId();
+ info.mode = 0777;
+ info.fileSize = 0;
+ info.modTime = sys::TimeValue::now();
+}
+
+// This is the constructor that the Archive class uses when it is building or
+// reading an archive. It just defaults a few things and ensures the parent is
+// set for the iplist. The Archive class fills in the ArchiveMember's data.
+// This is required because correctly setting the data may depend on other
+// things in the Archive.
+ArchiveMember::ArchiveMember(Archive* PAR)
+ : parent(PAR), path(), flags(0), data(0)
+{
+}
+
+// This method allows an ArchiveMember to be replaced with the data for a
+// different file, presumably as an update to the member. It also makes sure
+// the flags are reset correctly.
+bool ArchiveMember::replaceWith(const sys::Path& newFile, std::string* ErrMsg) {
+ if (!newFile.exists()) {
+ if (ErrMsg)
+ *ErrMsg = "Can not replace an archive member with a non-existent file";
+ return true;
+ }
+
+ data = 0;
+ path = newFile;
+
+ // SVR4 symbol tables have an empty name
+ if (path.toString() == ARFILE_SVR4_SYMTAB_NAME)
+ flags |= SVR4SymbolTableFlag;
+ else
+ flags &= ~SVR4SymbolTableFlag;
+
+ // BSD4.4 symbol tables have a special name
+ if (path.toString() == ARFILE_BSD4_SYMTAB_NAME)
+ flags |= BSD4SymbolTableFlag;
+ else
+ flags &= ~BSD4SymbolTableFlag;
+
+ // LLVM symbol tables have a very specific name
+ if (path.toString() == ARFILE_LLVM_SYMTAB_NAME)
+ flags |= LLVMSymbolTableFlag;
+ else
+ flags &= ~LLVMSymbolTableFlag;
+
+ // String table name
+ if (path.toString() == ARFILE_STRTAB_NAME)
+ flags |= StringTableFlag;
+ else
+ flags &= ~StringTableFlag;
+
+ // If it has a slash then it has a path
+ bool hasSlash = path.toString().find('/') != std::string::npos;
+ if (hasSlash)
+ flags |= HasPathFlag;
+ else
+ flags &= ~HasPathFlag;
+
+ // If it has a slash or its over 15 chars then its a long filename format
+ if (hasSlash || path.toString().length() > 15)
+ flags |= HasLongFilenameFlag;
+ else
+ flags &= ~HasLongFilenameFlag;
+
+ // Get the signature and status info
+ const char* signature = (const char*) data;
+ std::string magic;
+ if (!signature) {
+ path.getMagicNumber(magic,4);
+ signature = magic.c_str();
+ std::string err;
+ const sys::FileStatus *FSinfo = path.getFileStatus(false, ErrMsg);
+ if (FSinfo)
+ info = *FSinfo;
+ else
+ return true;
+ }
+
+ // Determine what kind of file it is
+ switch (sys::IdentifyFileType(signature,4)) {
+ default:
+ flags &= ~BitcodeFlag;
+ break;
+ }
+ return false;
+}
+
+// Archive constructor - this is the only constructor that gets used for the
+// Archive class. Everything else (default,copy) is deprecated. This just
+// initializes and maps the file into memory, if requested.
+Archive::Archive(const sys::Path& filename)
+ : archPath(filename), members(), mapfile(0), base(0), symTab(), strtab(),
+ symTabSize(0), firstFileOffset(0), modules(), foreignST(0) {
+}
+
+bool
+Archive::mapToMemory(std::string* ErrMsg) {
+ mapfile = MemoryBuffer::getFile(archPath.c_str(), ErrMsg);
+ if (mapfile == 0)
+ return true;
+ base = mapfile->getBufferStart();
+ return false;
+}
+
+void Archive::cleanUpMemory() {
+ // Shutdown the file mapping
+ delete mapfile;
+ mapfile = 0;
+ base = 0;
+
+ // Forget the entire symbol table
+ symTab.clear();
+ symTabSize = 0;
+
+ firstFileOffset = 0;
+
+ // Free the foreign symbol table member
+ if (foreignST) {
+ delete foreignST;
+ foreignST = 0;
+ }
+
+ // Delete any ModuleProviders and ArchiveMember's we've allocated as a result
+ // of symbol table searches.
+ for (ModuleMap::iterator I=modules.begin(), E=modules.end(); I != E; ++I ) {
+ delete I->second.first;
+ delete I->second.second;
+ }
+}
+
+// Archive destructor - just clean up memory
+Archive::~Archive() {
+ cleanUpMemory();
+}
+
+
+
+static void getSymbols(Module*M, std::vector<std::string>& symbols) {
+ // Loop over global variables
+ for (Module::global_iterator GI = M->global_begin(), GE=M->global_end(); GI != GE; ++GI)
+ if (!GI->isDeclaration() && !GI->hasLocalLinkage())
+ if (!GI->getName().empty())
+ symbols.push_back(GI->getName());
+
+ // Loop over functions
+ for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
+ if (!FI->isDeclaration() && !FI->hasLocalLinkage())
+ if (!FI->getName().empty())
+ symbols.push_back(FI->getName());
+
+ // Loop over aliases
+ for (Module::alias_iterator AI = M->alias_begin(), AE = M->alias_end();
+ AI != AE; ++AI) {
+ if (AI->hasName())
+ symbols.push_back(AI->getName());
+ }
+}
+
+// Get just the externally visible defined symbols from the bitcode
+bool llvm::GetBitcodeSymbols(const sys::Path& fName,
+ std::vector<std::string>& symbols,
+ std::string* ErrMsg) {
+ std::auto_ptr<MemoryBuffer> Buffer(
+ MemoryBuffer::getFileOrSTDIN(fName.c_str()));
+ if (!Buffer.get()) {
+ if (ErrMsg) *ErrMsg = "Could not open file '" + fName.toString() + "'";
+ return true;
+ }
+
+ ModuleProvider *MP = getBitcodeModuleProvider(Buffer.get(), ErrMsg);
+ if (!MP)
+ return true;
+
+ // Get the module from the provider
+ Module* M = MP->materializeModule();
+ if (M == 0) {
+ delete MP;
+ return true;
+ }
+
+ // Get the symbols
+ getSymbols(M, symbols);
+
+ // Done with the module.
+ delete MP;
+ return true;
+}
+
+ModuleProvider*
+llvm::GetBitcodeSymbols(const unsigned char *BufPtr, unsigned Length,
+ const std::string& ModuleID,
+ std::vector<std::string>& symbols,
+ std::string* ErrMsg) {
+ // Get the module provider
+ MemoryBuffer *Buffer =MemoryBuffer::getNewMemBuffer(Length, ModuleID.c_str());
+ memcpy((char*)Buffer->getBufferStart(), BufPtr, Length);
+
+ ModuleProvider *MP = getBitcodeModuleProvider(Buffer, ErrMsg);
+ if (!MP)
+ return 0;
+
+ // Get the module from the provider
+ Module* M = MP->materializeModule();
+ if (M == 0) {
+ delete MP;
+ return 0;
+ }
+
+ // Get the symbols
+ getSymbols(M, symbols);
+
+ // Done with the module. Note that ModuleProvider will delete the
+ // Module when it is deleted. Also note that its the caller's responsibility
+ // to delete the ModuleProvider.
+ return MP;
+}
diff --git a/lib/Archive/ArchiveInternals.h b/lib/Archive/ArchiveInternals.h
new file mode 100644
index 0000000..7ba3024
--- /dev/null
+++ b/lib/Archive/ArchiveInternals.h
@@ -0,0 +1,85 @@
+//===-- lib/Archive/ArchiveInternals.h -------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Internal implementation header for LLVM Archive files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIB_ARCHIVE_ARCHIVEINTERNALS_H
+#define LIB_ARCHIVE_ARCHIVEINTERNALS_H
+
+#include "llvm/Bitcode/Archive.h"
+#include "llvm/System/TimeValue.h"
+#include "llvm/ADT/StringExtras.h"
+
+#include <cstring>
+
+#define ARFILE_MAGIC "!<arch>\n" ///< magic string
+#define ARFILE_MAGIC_LEN (sizeof(ARFILE_MAGIC)-1) ///< length of magic string
+#define ARFILE_SVR4_SYMTAB_NAME "/ " ///< SVR4 symtab entry name
+#define ARFILE_LLVM_SYMTAB_NAME "#_LLVM_SYM_TAB_#" ///< LLVM symtab entry name
+#define ARFILE_BSD4_SYMTAB_NAME "__.SYMDEF SORTED" ///< BSD4 symtab entry name
+#define ARFILE_STRTAB_NAME "// " ///< Name of string table
+#define ARFILE_PAD "\n" ///< inter-file align padding
+#define ARFILE_MEMBER_MAGIC "`\n" ///< fmag field magic #
+
+namespace llvm {
+
+ /// The ArchiveMemberHeader structure is used internally for bitcode
+ /// archives.
+ /// The header precedes each file member in the archive. This structure is
+ /// defined using character arrays for direct and correct interpretation
+ /// regardless of the endianess of the machine that produced it.
+ /// @brief Archive File Member Header
+ class ArchiveMemberHeader {
+ /// @name Data
+ /// @{
+ public:
+ char name[16]; ///< Name of the file member.
+ char date[12]; ///< File date, decimal seconds since Epoch
+ char uid[6]; ///< user id in ASCII decimal
+ char gid[6]; ///< group id in ASCII decimal
+ char mode[8]; ///< file mode in ASCII octal
+ char size[10]; ///< file size in ASCII decimal
+ char fmag[2]; ///< Always contains ARFILE_MAGIC_TERMINATOR
+
+ /// @}
+ /// @name Methods
+ /// @{
+ public:
+ void init() {
+ memset(name,' ',16);
+ memset(date,' ',12);
+ memset(uid,' ',6);
+ memset(gid,' ',6);
+ memset(mode,' ',8);
+ memset(size,' ',10);
+ fmag[0] = '`';
+ fmag[1] = '\n';
+ }
+
+ bool checkSignature() {
+ return 0 == memcmp(fmag, ARFILE_MEMBER_MAGIC,2);
+ }
+ };
+
+ // Get just the externally visible defined symbols from the bitcode
+ bool GetBitcodeSymbols(const sys::Path& fName,
+ std::vector<std::string>& symbols,
+ std::string* ErrMsg);
+
+ ModuleProvider* GetBitcodeSymbols(const unsigned char*Buffer,unsigned Length,
+ const std::string& ModuleID,
+ std::vector<std::string>& symbols,
+ std::string* ErrMsg);
+}
+
+#endif
+
+// vim: sw=2 ai
diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp
new file mode 100644
index 0000000..b07e884
--- /dev/null
+++ b/lib/Archive/ArchiveReader.cpp
@@ -0,0 +1,627 @@
+//===-- ArchiveReader.cpp - Read LLVM archive files -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Builds up standard unix archive files (.a) containing LLVM bitcode.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ArchiveInternals.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Module.h"
+#include <cstdlib>
+#include <memory>
+using namespace llvm;
+
+/// Read a variable-bit-rate encoded unsigned integer
+static inline unsigned readInteger(const char*&At, const char*End) {
+ unsigned Shift = 0;
+ unsigned Result = 0;
+
+ do {
+ if (At == End)
+ return Result;
+ Result |= (unsigned)((*At++) & 0x7F) << Shift;
+ Shift += 7;
+ } while (At[-1] & 0x80);
+ return Result;
+}
+
+// Completely parse the Archive's symbol table and populate symTab member var.
+bool
+Archive::parseSymbolTable(const void* data, unsigned size, std::string* error) {
+ const char* At = (const char*) data;
+ const char* End = At + size;
+ while (At < End) {
+ unsigned offset = readInteger(At, End);
+ if (At == End) {
+ if (error)
+ *error = "Ran out of data reading vbr_uint for symtab offset!";
+ return false;
+ }
+ unsigned length = readInteger(At, End);
+ if (At == End) {
+ if (error)
+ *error = "Ran out of data reading vbr_uint for symtab length!";
+ return false;
+ }
+ if (At + length > End) {
+ if (error)
+ *error = "Malformed symbol table: length not consistent with size";
+ return false;
+ }
+ // we don't care if it can't be inserted (duplicate entry)
+ symTab.insert(std::make_pair(std::string(At, length), offset));
+ At += length;
+ }
+ symTabSize = size;
+ return true;
+}
+
+// This member parses an ArchiveMemberHeader that is presumed to be pointed to
+// by At. The At pointer is updated to the byte just after the header, which
+// can be variable in size.
+ArchiveMember*
+Archive::parseMemberHeader(const char*& At, const char* End, std::string* error)
+{
+ if (At + sizeof(ArchiveMemberHeader) >= End) {
+ if (error)
+ *error = "Unexpected end of file";
+ return 0;
+ }
+
+ // Cast archive member header
+ ArchiveMemberHeader* Hdr = (ArchiveMemberHeader*)At;
+ At += sizeof(ArchiveMemberHeader);
+
+ // Extract the size and determine if the file is
+ // compressed or not (negative length).
+ int flags = 0;
+ int MemberSize = atoi(Hdr->size);
+ if (MemberSize < 0) {
+ flags |= ArchiveMember::CompressedFlag;
+ MemberSize = -MemberSize;
+ }
+
+ // Check the size of the member for sanity
+ if (At + MemberSize > End) {
+ if (error)
+ *error = "invalid member length in archive file";
+ return 0;
+ }
+
+ // Check the member signature
+ if (!Hdr->checkSignature()) {
+ if (error)
+ *error = "invalid file member signature";
+ return 0;
+ }
+
+ // Convert and check the member name
+ // The empty name ( '/' and 15 blanks) is for a foreign (non-LLVM) symbol
+ // table. The special name "//" and 14 blanks is for a string table, used
+ // for long file names. This library doesn't generate either of those but
+ // it will accept them. If the name starts with #1/ and the remainder is
+ // digits, then those digits specify the length of the name that is
+ // stored immediately following the header. The special name
+ // __LLVM_SYM_TAB__ identifies the symbol table for LLVM bitcode.
+ // Anything else is a regular, short filename that is terminated with
+ // a '/' and blanks.
+
+ std::string pathname;
+ switch (Hdr->name[0]) {
+ case '#':
+ if (Hdr->name[1] == '1' && Hdr->name[2] == '/') {
+ if (isdigit(Hdr->name[3])) {
+ unsigned len = atoi(&Hdr->name[3]);
+ pathname.assign(At, len);
+ At += len;
+ MemberSize -= len;
+ flags |= ArchiveMember::HasLongFilenameFlag;
+ } else {
+ if (error)
+ *error = "invalid long filename";
+ return 0;
+ }
+ } else if (Hdr->name[1] == '_' &&
+ (0 == memcmp(Hdr->name, ARFILE_LLVM_SYMTAB_NAME, 16))) {
+ // The member is using a long file name (>15 chars) format.
+ // This format is standard for 4.4BSD and Mac OSX operating
+ // systems. LLVM uses it similarly. In this format, the
+ // remainder of the name field (after #1/) specifies the
+ // length of the file name which occupy the first bytes of
+ // the member's data. The pathname already has the #1/ stripped.
+ pathname.assign(ARFILE_LLVM_SYMTAB_NAME);
+ flags |= ArchiveMember::LLVMSymbolTableFlag;
+ }
+ break;
+ case '/':
+ if (Hdr->name[1]== '/') {
+ if (0 == memcmp(Hdr->name, ARFILE_STRTAB_NAME, 16)) {
+ pathname.assign(ARFILE_STRTAB_NAME);
+ flags |= ArchiveMember::StringTableFlag;
+ } else {
+ if (error)
+ *error = "invalid string table name";
+ return 0;
+ }
+ } else if (Hdr->name[1] == ' ') {
+ if (0 == memcmp(Hdr->name, ARFILE_SVR4_SYMTAB_NAME, 16)) {
+ pathname.assign(ARFILE_SVR4_SYMTAB_NAME);
+ flags |= ArchiveMember::SVR4SymbolTableFlag;
+ } else {
+ if (error)
+ *error = "invalid SVR4 symbol table name";
+ return 0;
+ }
+ } else if (isdigit(Hdr->name[1])) {
+ unsigned index = atoi(&Hdr->name[1]);
+ if (index < strtab.length()) {
+ const char* namep = strtab.c_str() + index;
+ const char* endp = strtab.c_str() + strtab.length();
+ const char* p = namep;
+ const char* last_p = p;
+ while (p < endp) {
+ if (*p == '\n' && *last_p == '/') {
+ pathname.assign(namep, last_p - namep);
+ flags |= ArchiveMember::HasLongFilenameFlag;
+ break;
+ }
+ last_p = p;
+ p++;
+ }
+ if (p >= endp) {
+ if (error)
+ *error = "missing name termiantor in string table";
+ return 0;
+ }
+ } else {
+ if (error)
+ *error = "name index beyond string table";
+ return 0;
+ }
+ }
+ break;
+ case '_':
+ if (Hdr->name[1] == '_' &&
+ (0 == memcmp(Hdr->name, ARFILE_BSD4_SYMTAB_NAME, 16))) {
+ pathname.assign(ARFILE_BSD4_SYMTAB_NAME);
+ flags |= ArchiveMember::BSD4SymbolTableFlag;
+ break;
+ }
+ /* FALL THROUGH */
+
+ default:
+ char* slash = (char*) memchr(Hdr->name, '/', 16);
+ if (slash == 0)
+ slash = Hdr->name + 16;
+ pathname.assign(Hdr->name, slash - Hdr->name);
+ break;
+ }
+
+ // Determine if this is a bitcode file
+ switch (sys::IdentifyFileType(At, 4)) {
+ case sys::Bitcode_FileType:
+ flags |= ArchiveMember::BitcodeFlag;
+ break;
+ default:
+ flags &= ~ArchiveMember::BitcodeFlag;
+ break;
+ }
+
+ // Instantiate the ArchiveMember to be filled
+ ArchiveMember* member = new ArchiveMember(this);
+
+ // Fill in fields of the ArchiveMember
+ member->parent = this;
+ member->path.set(pathname);
+ member->info.fileSize = MemberSize;
+ member->info.modTime.fromEpochTime(atoi(Hdr->date));
+ unsigned int mode;
+ sscanf(Hdr->mode, "%o", &mode);
+ member->info.mode = mode;
+ member->info.user = atoi(Hdr->uid);
+ member->info.group = atoi(Hdr->gid);
+ member->flags = flags;
+ member->data = At;
+
+ return member;
+}
+
+bool
+Archive::checkSignature(std::string* error) {
+ // Check the magic string at file's header
+ if (mapfile->getBufferSize() < 8 || memcmp(base, ARFILE_MAGIC, 8)) {
+ if (error)
+ *error = "invalid signature for an archive file";
+ return false;
+ }
+ return true;
+}
+
+// This function loads the entire archive and fully populates its ilist with
+// the members of the archive file. This is typically used in preparation for
+// editing the contents of the archive.
+bool
+Archive::loadArchive(std::string* error) {
+
+ // Set up parsing
+ members.clear();
+ symTab.clear();
+ const char *At = base;
+ const char *End = mapfile->getBufferEnd();
+
+ if (!checkSignature(error))
+ return false;
+
+ At += 8; // Skip the magic string.
+
+ bool seenSymbolTable = false;
+ bool foundFirstFile = false;
+ while (At < End) {
+ // parse the member header
+ const char* Save = At;
+ ArchiveMember* mbr = parseMemberHeader(At, End, error);
+ if (!mbr)
+ return false;
+
+ // check if this is the foreign symbol table
+ if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) {
+ // We just save this but don't do anything special
+ // with it. It doesn't count as the "first file".
+ if (foreignST) {
+ // What? Multiple foreign symbol tables? Just chuck it
+ // and retain the last one found.
+ delete foreignST;
+ }
+ foreignST = mbr;
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ } else if (mbr->isStringTable()) {
+ // Simply suck the entire string table into a string
+ // variable. This will be used to get the names of the
+ // members that use the "/ddd" format for their names
+ // (SVR4 style long names).
+ strtab.assign(At, mbr->getSize());
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ delete mbr;
+ } else if (mbr->isLLVMSymbolTable()) {
+ // This is the LLVM symbol table for the archive. If we've seen it
+ // already, its an error. Otherwise, parse the symbol table and move on.
+ if (seenSymbolTable) {
+ if (error)
+ *error = "invalid archive: multiple symbol tables";
+ return false;
+ }
+ if (!parseSymbolTable(mbr->getData(), mbr->getSize(), error))
+ return false;
+ seenSymbolTable = true;
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ delete mbr; // We don't need this member in the list of members.
+ } else {
+ // This is just a regular file. If its the first one, save its offset.
+ // Otherwise just push it on the list and move on to the next file.
+ if (!foundFirstFile) {
+ firstFileOffset = Save - base;
+ foundFirstFile = true;
+ }
+ members.push_back(mbr);
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ }
+ }
+ return true;
+}
+
+// Open and completely load the archive file.
+Archive*
+Archive::OpenAndLoad(const sys::Path& file, std::string* ErrorMessage)
+{
+ std::auto_ptr<Archive> result ( new Archive(file));
+ if (result->mapToMemory(ErrorMessage))
+ return 0;
+ if (!result->loadArchive(ErrorMessage))
+ return 0;
+ return result.release();
+}
+
+// Get all the bitcode modules from the archive
+bool
+Archive::getAllModules(std::vector<Module*>& Modules, std::string* ErrMessage) {
+
+ for (iterator I=begin(), E=end(); I != E; ++I) {
+ if (I->isBitcode()) {
+ std::string FullMemberName = archPath.toString() +
+ "(" + I->getPath().toString() + ")";
+ MemoryBuffer *Buffer =
+ MemoryBuffer::getNewMemBuffer(I->getSize(), FullMemberName.c_str());
+ memcpy((char*)Buffer->getBufferStart(), I->getData(), I->getSize());
+
+ Module *M = ParseBitcodeFile(Buffer, ErrMessage);
+ delete Buffer;
+ if (!M)
+ return true;
+
+ Modules.push_back(M);
+ }
+ }
+ return false;
+}
+
+// Load just the symbol table from the archive file
+bool
+Archive::loadSymbolTable(std::string* ErrorMsg) {
+
+ // Set up parsing
+ members.clear();
+ symTab.clear();
+ const char *At = base;
+ const char *End = mapfile->getBufferEnd();
+
+ // Make sure we're dealing with an archive
+ if (!checkSignature(ErrorMsg))
+ return false;
+
+ At += 8; // Skip signature
+
+ // Parse the first file member header
+ const char* FirstFile = At;
+ ArchiveMember* mbr = parseMemberHeader(At, End, ErrorMsg);
+ if (!mbr)
+ return false;
+
+ if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) {
+ // Skip the foreign symbol table, we don't do anything with it
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ delete mbr;
+
+ // Read the next one
+ FirstFile = At;
+ mbr = parseMemberHeader(At, End, ErrorMsg);
+ if (!mbr) {
+ delete mbr;
+ return false;
+ }
+ }
+
+ if (mbr->isStringTable()) {
+ // Process the string table entry
+ strtab.assign((const char*)mbr->getData(), mbr->getSize());
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ delete mbr;
+ // Get the next one
+ FirstFile = At;
+ mbr = parseMemberHeader(At, End, ErrorMsg);
+ if (!mbr) {
+ delete mbr;
+ return false;
+ }
+ }
+
+ // See if its the symbol table
+ if (mbr->isLLVMSymbolTable()) {
+ if (!parseSymbolTable(mbr->getData(), mbr->getSize(), ErrorMsg)) {
+ delete mbr;
+ return false;
+ }
+
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ delete mbr;
+ // Can't be any more symtab headers so just advance
+ FirstFile = At;
+ } else {
+ // There's no symbol table in the file. We have to rebuild it from scratch
+ // because the intent of this method is to get the symbol table loaded so
+ // it can be searched efficiently.
+ // Add the member to the members list
+ members.push_back(mbr);
+ }
+
+ firstFileOffset = FirstFile - base;
+ return true;
+}
+
+// Open the archive and load just the symbol tables
+Archive*
+Archive::OpenAndLoadSymbols(const sys::Path& file, std::string* ErrorMessage) {
+ std::auto_ptr<Archive> result ( new Archive(file) );
+ if (result->mapToMemory(ErrorMessage))
+ return 0;
+ if (!result->loadSymbolTable(ErrorMessage))
+ return 0;
+ return result.release();
+}
+
+// Look up one symbol in the symbol table and return a ModuleProvider for the
+// module that defines that symbol.
+ModuleProvider*
+Archive::findModuleDefiningSymbol(const std::string& symbol,
+ std::string* ErrMsg) {
+ SymTabType::iterator SI = symTab.find(symbol);
+ if (SI == symTab.end())
+ return 0;
+
+ // The symbol table was previously constructed assuming that the members were
+ // written without the symbol table header. Because VBR encoding is used, the
+ // values could not be adjusted to account for the offset of the symbol table
+ // because that could affect the size of the symbol table due to VBR encoding.
+ // We now have to account for this by adjusting the offset by the size of the
+ // symbol table and its header.
+ unsigned fileOffset =
+ SI->second + // offset in symbol-table-less file
+ firstFileOffset; // add offset to first "real" file in archive
+
+ // See if the module is already loaded
+ ModuleMap::iterator MI = modules.find(fileOffset);
+ if (MI != modules.end())
+ return MI->second.first;
+
+ // Module hasn't been loaded yet, we need to load it
+ const char* modptr = base + fileOffset;
+ ArchiveMember* mbr = parseMemberHeader(modptr, mapfile->getBufferEnd(),
+ ErrMsg);
+ if (!mbr)
+ return 0;
+
+ // Now, load the bitcode module to get the ModuleProvider
+ std::string FullMemberName = archPath.toString() + "(" +
+ mbr->getPath().toString() + ")";
+ MemoryBuffer *Buffer =MemoryBuffer::getNewMemBuffer(mbr->getSize(),
+ FullMemberName.c_str());
+ memcpy((char*)Buffer->getBufferStart(), mbr->getData(), mbr->getSize());
+
+ ModuleProvider *mp = getBitcodeModuleProvider(Buffer, ErrMsg);
+ if (!mp)
+ return 0;
+
+ modules.insert(std::make_pair(fileOffset, std::make_pair(mp, mbr)));
+
+ return mp;
+}
+
+// Look up multiple symbols in the symbol table and return a set of
+// ModuleProviders that define those symbols.
+bool
+Archive::findModulesDefiningSymbols(std::set<std::string>& symbols,
+ std::set<ModuleProvider*>& result,
+ std::string* error) {
+ if (!mapfile || !base) {
+ if (error)
+ *error = "Empty archive invalid for finding modules defining symbols";
+ return false;
+ }
+
+ if (symTab.empty()) {
+ // We don't have a symbol table, so we must build it now but lets also
+ // make sure that we populate the modules table as we do this to ensure
+ // that we don't load them twice when findModuleDefiningSymbol is called
+ // below.
+
+ // Get a pointer to the first file
+ const char* At = base + firstFileOffset;
+ const char* End = mapfile->getBufferEnd();
+
+ while ( At < End) {
+ // Compute the offset to be put in the symbol table
+ unsigned offset = At - base - firstFileOffset;
+
+ // Parse the file's header
+ ArchiveMember* mbr = parseMemberHeader(At, End, error);
+ if (!mbr)
+ return false;
+
+ // If it contains symbols
+ if (mbr->isBitcode()) {
+ // Get the symbols
+ std::vector<std::string> symbols;
+ std::string FullMemberName = archPath.toString() + "(" +
+ mbr->getPath().toString() + ")";
+ ModuleProvider* MP =
+ GetBitcodeSymbols((const unsigned char*)At, mbr->getSize(),
+ FullMemberName, symbols, error);
+
+ if (MP) {
+ // Insert the module's symbols into the symbol table
+ for (std::vector<std::string>::iterator I = symbols.begin(),
+ E=symbols.end(); I != E; ++I ) {
+ symTab.insert(std::make_pair(*I, offset));
+ }
+ // Insert the ModuleProvider and the ArchiveMember into the table of
+ // modules.
+ modules.insert(std::make_pair(offset, std::make_pair(MP, mbr)));
+ } else {
+ if (error)
+ *error = "Can't parse bitcode member: " +
+ mbr->getPath().toString() + ": " + *error;
+ delete mbr;
+ return false;
+ }
+ }
+
+ // Go to the next file location
+ At += mbr->getSize();
+ if ((intptr_t(At) & 1) == 1)
+ At++;
+ }
+ }
+
+ // At this point we have a valid symbol table (one way or another) so we
+ // just use it to quickly find the symbols requested.
+
+ for (std::set<std::string>::iterator I=symbols.begin(),
+ E=symbols.end(); I != E;) {
+ // See if this symbol exists
+ ModuleProvider* mp = findModuleDefiningSymbol(*I,error);
+ if (mp) {
+ // The symbol exists, insert the ModuleProvider into our result,
+ // duplicates wil be ignored
+ result.insert(mp);
+
+ // Remove the symbol now that its been resolved, being careful to
+ // post-increment the iterator.
+ symbols.erase(I++);
+ } else {
+ ++I;
+ }
+ }
+ return true;
+}
+
+bool Archive::isBitcodeArchive() {
+ // Make sure the symTab has been loaded. In most cases this should have been
+ // done when the archive was constructed, but still, this is just in case.
+ if (symTab.empty())
+ if (!loadSymbolTable(0))
+ return false;
+
+ // Now that we know it's been loaded, return true
+ // if it has a size
+ if (symTab.size()) return true;
+
+ // We still can't be sure it isn't a bitcode archive
+ if (!loadArchive(0))
+ return false;
+
+ std::vector<Module *> Modules;
+ std::string ErrorMessage;
+
+ // Scan the archive, trying to load a bitcode member. We only load one to
+ // see if this works.
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ if (!I->isBitcode())
+ continue;
+
+ std::string FullMemberName =
+ archPath.toString() + "(" + I->getPath().toString() + ")";
+
+ MemoryBuffer *Buffer =
+ MemoryBuffer::getNewMemBuffer(I->getSize(), FullMemberName.c_str());
+ memcpy((char*)Buffer->getBufferStart(), I->getData(), I->getSize());
+ Module *M = ParseBitcodeFile(Buffer);
+ delete Buffer;
+ if (!M)
+ return false; // Couldn't parse bitcode, not a bitcode archive.
+ delete M;
+ return true;
+ }
+
+ return false;
+}
diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp
new file mode 100644
index 0000000..336a2bd
--- /dev/null
+++ b/lib/Archive/ArchiveWriter.cpp
@@ -0,0 +1,482 @@
+//===-- ArchiveWriter.cpp - Write LLVM archive files ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Builds up an LLVM archive file (.a) containing LLVM bitcode.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ArchiveInternals.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/System/Signals.h"
+#include "llvm/System/Process.h"
+#include "llvm/ModuleProvider.h"
+#include <fstream>
+#include <ostream>
+#include <iomanip>
+using namespace llvm;
+
+// Write an integer using variable bit rate encoding. This saves a few bytes
+// per entry in the symbol table.
+static inline void writeInteger(unsigned num, std::ofstream& ARFile) {
+ while (1) {
+ if (num < 0x80) { // done?
+ ARFile << (unsigned char)num;
+ return;
+ }
+
+ // Nope, we are bigger than a character, output the next 7 bits and set the
+ // high bit to say that there is more coming...
+ ARFile << (unsigned char)(0x80 | ((unsigned char)num & 0x7F));
+ num >>= 7; // Shift out 7 bits now...
+ }
+}
+
+// Compute how many bytes are taken by a given VBR encoded value. This is needed
+// to pre-compute the size of the symbol table.
+static inline unsigned numVbrBytes(unsigned num) {
+
+ // Note that the following nested ifs are somewhat equivalent to a binary
+ // search. We split it in half by comparing against 2^14 first. This allows
+ // most reasonable values to be done in 2 comparisons instead of 1 for
+ // small ones and four for large ones. We expect this to access file offsets
+ // in the 2^10 to 2^24 range and symbol lengths in the 2^0 to 2^8 range,
+ // so this approach is reasonable.
+ if (num < 1<<14) {
+ if (num < 1<<7)
+ return 1;
+ else
+ return 2;
+ }
+ if (num < 1<<21)
+ return 3;
+
+ if (num < 1<<28)
+ return 4;
+ return 5; // anything >= 2^28 takes 5 bytes
+}
+
+// Create an empty archive.
+Archive*
+Archive::CreateEmpty(const sys::Path& FilePath ) {
+ Archive* result = new Archive(FilePath);
+ return result;
+}
+
+// Fill the ArchiveMemberHeader with the information from a member. If
+// TruncateNames is true, names are flattened to 15 chars or less. The sz field
+// is provided here instead of coming from the mbr because the member might be
+// stored compressed and the compressed size is not the ArchiveMember's size.
+// Furthermore compressed files have negative size fields to identify them as
+// compressed.
+bool
+Archive::fillHeader(const ArchiveMember &mbr, ArchiveMemberHeader& hdr,
+ int sz, bool TruncateNames) const {
+
+ // Set the permissions mode, uid and gid
+ hdr.init();
+ char buffer[32];
+ sprintf(buffer, "%-8o", mbr.getMode());
+ memcpy(hdr.mode,buffer,8);
+ sprintf(buffer, "%-6u", mbr.getUser());
+ memcpy(hdr.uid,buffer,6);
+ sprintf(buffer, "%-6u", mbr.getGroup());
+ memcpy(hdr.gid,buffer,6);
+
+ // Set the last modification date
+ uint64_t secondsSinceEpoch = mbr.getModTime().toEpochTime();
+ sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch));
+ memcpy(hdr.date,buffer,12);
+
+ // Get rid of trailing blanks in the name
+ std::string mbrPath = mbr.getPath().toString();
+ size_t mbrLen = mbrPath.length();
+ while (mbrLen > 0 && mbrPath[mbrLen-1] == ' ') {
+ mbrPath.erase(mbrLen-1,1);
+ mbrLen--;
+ }
+
+ // Set the name field in one of its various flavors.
+ bool writeLongName = false;
+ if (mbr.isStringTable()) {
+ memcpy(hdr.name,ARFILE_STRTAB_NAME,16);
+ } else if (mbr.isSVR4SymbolTable()) {
+ memcpy(hdr.name,ARFILE_SVR4_SYMTAB_NAME,16);
+ } else if (mbr.isBSD4SymbolTable()) {
+ memcpy(hdr.name,ARFILE_BSD4_SYMTAB_NAME,16);
+ } else if (mbr.isLLVMSymbolTable()) {
+ memcpy(hdr.name,ARFILE_LLVM_SYMTAB_NAME,16);
+ } else if (TruncateNames) {
+ const char* nm = mbrPath.c_str();
+ unsigned len = mbrPath.length();
+ size_t slashpos = mbrPath.rfind('/');
+ if (slashpos != std::string::npos) {
+ nm += slashpos + 1;
+ len -= slashpos +1;
+ }
+ if (len > 15)
+ len = 15;
+ memcpy(hdr.name,nm,len);
+ hdr.name[len] = '/';
+ } else if (mbrPath.length() < 16 && mbrPath.find('/') == std::string::npos) {
+ memcpy(hdr.name,mbrPath.c_str(),mbrPath.length());
+ hdr.name[mbrPath.length()] = '/';
+ } else {
+ std::string nm = "#1/";
+ nm += utostr(mbrPath.length());
+ memcpy(hdr.name,nm.data(),nm.length());
+ if (sz < 0)
+ sz -= mbrPath.length();
+ else
+ sz += mbrPath.length();
+ writeLongName = true;
+ }
+
+ // Set the size field
+ if (sz < 0) {
+ buffer[0] = '-';
+ sprintf(&buffer[1],"%-9u",(unsigned)-sz);
+ } else {
+ sprintf(buffer, "%-10u", (unsigned)sz);
+ }
+ memcpy(hdr.size,buffer,10);
+
+ return writeLongName;
+}
+
+// Insert a file into the archive before some other member. This also takes care
+// of extracting the necessary flags and information from the file.
+bool
+Archive::addFileBefore(const sys::Path& filePath, iterator where,
+ std::string* ErrMsg) {
+ if (!filePath.exists()) {
+ if (ErrMsg)
+ *ErrMsg = "Can not add a non-existent file to archive";
+ return true;
+ }
+
+ ArchiveMember* mbr = new ArchiveMember(this);
+
+ mbr->data = 0;
+ mbr->path = filePath;
+ const sys::FileStatus *FSInfo = mbr->path.getFileStatus(false, ErrMsg);
+ if (FSInfo)
+ mbr->info = *FSInfo;
+ else
+ return true;
+
+ unsigned flags = 0;
+ bool hasSlash = filePath.toString().find('/') != std::string::npos;
+ if (hasSlash)
+ flags |= ArchiveMember::HasPathFlag;
+ if (hasSlash || filePath.toString().length() > 15)
+ flags |= ArchiveMember::HasLongFilenameFlag;
+ std::string magic;
+ mbr->path.getMagicNumber(magic,4);
+ switch (sys::IdentifyFileType(magic.c_str(),4)) {
+ case sys::Bitcode_FileType:
+ flags |= ArchiveMember::BitcodeFlag;
+ break;
+ default:
+ break;
+ }
+ mbr->flags = flags;
+ members.insert(where,mbr);
+ return false;
+}
+
+// Write one member out to the file.
+bool
+Archive::writeMember(
+ const ArchiveMember& member,
+ std::ofstream& ARFile,
+ bool CreateSymbolTable,
+ bool TruncateNames,
+ bool ShouldCompress,
+ std::string* ErrMsg
+) {
+
+ unsigned filepos = ARFile.tellp();
+ filepos -= 8;
+
+ // Get the data and its size either from the
+ // member's in-memory data or directly from the file.
+ size_t fSize = member.getSize();
+ const char *data = (const char*)member.getData();
+ MemoryBuffer *mFile = 0;
+ if (!data) {
+ mFile = MemoryBuffer::getFile(member.getPath().c_str(), ErrMsg);
+ if (mFile == 0)
+ return true;
+ data = mFile->getBufferStart();
+ fSize = mFile->getBufferSize();
+ }
+
+ // Now that we have the data in memory, update the
+ // symbol table if its a bitcode file.
+ if (CreateSymbolTable && member.isBitcode()) {
+ std::vector<std::string> symbols;
+ std::string FullMemberName = archPath.toString() + "(" +
+ member.getPath().toString()
+ + ")";
+ ModuleProvider* MP =
+ GetBitcodeSymbols((const unsigned char*)data,fSize,
+ FullMemberName, symbols, ErrMsg);
+
+ // If the bitcode parsed successfully
+ if ( MP ) {
+ for (std::vector<std::string>::iterator SI = symbols.begin(),
+ SE = symbols.end(); SI != SE; ++SI) {
+
+ std::pair<SymTabType::iterator,bool> Res =
+ symTab.insert(std::make_pair(*SI,filepos));
+
+ if (Res.second) {
+ symTabSize += SI->length() +
+ numVbrBytes(SI->length()) +
+ numVbrBytes(filepos);
+ }
+ }
+ // We don't need this module any more.
+ delete MP;
+ } else {
+ delete mFile;
+ if (ErrMsg)
+ *ErrMsg = "Can't parse bitcode member: " + member.getPath().toString()
+ + ": " + *ErrMsg;
+ return true;
+ }
+ }
+
+ int hdrSize = fSize;
+
+ // Compute the fields of the header
+ ArchiveMemberHeader Hdr;
+ bool writeLongName = fillHeader(member,Hdr,hdrSize,TruncateNames);
+
+ // Write header to archive file
+ ARFile.write((char*)&Hdr, sizeof(Hdr));
+
+ // Write the long filename if its long
+ if (writeLongName) {
+ ARFile.write(member.getPath().toString().data(),
+ member.getPath().toString().length());
+ }
+
+ // Write the (possibly compressed) member's content to the file.
+ ARFile.write(data,fSize);
+
+ // Make sure the member is an even length
+ if ((ARFile.tellp() & 1) == 1)
+ ARFile << ARFILE_PAD;
+
+ // Close the mapped file if it was opened
+ delete mFile;
+ return false;
+}
+
+// Write out the LLVM symbol table as an archive member to the file.
+void
+Archive::writeSymbolTable(std::ofstream& ARFile) {
+
+ // Construct the symbol table's header
+ ArchiveMemberHeader Hdr;
+ Hdr.init();
+ memcpy(Hdr.name,ARFILE_LLVM_SYMTAB_NAME,16);
+ uint64_t secondsSinceEpoch = sys::TimeValue::now().toEpochTime();
+ char buffer[32];
+ sprintf(buffer, "%-8o", 0644);
+ memcpy(Hdr.mode,buffer,8);
+ sprintf(buffer, "%-6u", sys::Process::GetCurrentUserId());
+ memcpy(Hdr.uid,buffer,6);
+ sprintf(buffer, "%-6u", sys::Process::GetCurrentGroupId());
+ memcpy(Hdr.gid,buffer,6);
+ sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch));
+ memcpy(Hdr.date,buffer,12);
+ sprintf(buffer,"%-10u",symTabSize);
+ memcpy(Hdr.size,buffer,10);
+
+ // Write the header
+ ARFile.write((char*)&Hdr, sizeof(Hdr));
+
+#ifndef NDEBUG
+ // Save the starting position of the symbol tables data content.
+ unsigned startpos = ARFile.tellp();
+#endif
+
+ // Write out the symbols sequentially
+ for ( Archive::SymTabType::iterator I = symTab.begin(), E = symTab.end();
+ I != E; ++I)
+ {
+ // Write out the file index
+ writeInteger(I->second, ARFile);
+ // Write out the length of the symbol
+ writeInteger(I->first.length(), ARFile);
+ // Write out the symbol
+ ARFile.write(I->first.data(), I->first.length());
+ }
+
+#ifndef NDEBUG
+ // Now that we're done with the symbol table, get the ending file position
+ unsigned endpos = ARFile.tellp();
+#endif
+
+ // Make sure that the amount we wrote is what we pre-computed. This is
+ // critical for file integrity purposes.
+ assert(endpos - startpos == symTabSize && "Invalid symTabSize computation");
+
+ // Make sure the symbol table is even sized
+ if (symTabSize % 2 != 0 )
+ ARFile << ARFILE_PAD;
+}
+
+// Write the entire archive to the file specified when the archive was created.
+// This writes to a temporary file first. Options are for creating a symbol
+// table, flattening the file names (no directories, 15 chars max) and
+// compressing each archive member.
+bool
+Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
+ std::string* ErrMsg)
+{
+ // Make sure they haven't opened up the file, not loaded it,
+ // but are now trying to write it which would wipe out the file.
+ if (members.empty() && mapfile && mapfile->getBufferSize() > 8) {
+ if (ErrMsg)
+ *ErrMsg = "Can't write an archive not opened for writing";
+ return true;
+ }
+
+ // Create a temporary file to store the archive in
+ sys::Path TmpArchive = archPath;
+ if (TmpArchive.createTemporaryFileOnDisk(ErrMsg))
+ return true;
+
+ // Make sure the temporary gets removed if we crash
+ sys::RemoveFileOnSignal(TmpArchive);
+
+ // Create archive file for output.
+ std::ios::openmode io_mode = std::ios::out | std::ios::trunc |
+ std::ios::binary;
+ std::ofstream ArchiveFile(TmpArchive.c_str(), io_mode);
+
+ // Check for errors opening or creating archive file.
+ if (!ArchiveFile.is_open() || ArchiveFile.bad()) {
+ if (TmpArchive.exists())
+ TmpArchive.eraseFromDisk();
+ if (ErrMsg)
+ *ErrMsg = "Error opening archive file: " + archPath.toString();
+ return true;
+ }
+
+ // If we're creating a symbol table, reset it now
+ if (CreateSymbolTable) {
+ symTabSize = 0;
+ symTab.clear();
+ }
+
+ // Write magic string to archive.
+ ArchiveFile << ARFILE_MAGIC;
+
+ // Loop over all member files, and write them out. Note that this also
+ // builds the symbol table, symTab.
+ for (MembersList::iterator I = begin(), E = end(); I != E; ++I) {
+ if (writeMember(*I, ArchiveFile, CreateSymbolTable,
+ TruncateNames, Compress, ErrMsg)) {
+ if (TmpArchive.exists())
+ TmpArchive.eraseFromDisk();
+ ArchiveFile.close();
+ return true;
+ }
+ }
+
+ // Close archive file.
+ ArchiveFile.close();
+
+ // Write the symbol table
+ if (CreateSymbolTable) {
+ // At this point we have written a file that is a legal archive but it
+ // doesn't have a symbol table in it. To aid in faster reading and to
+ // ensure compatibility with other archivers we need to put the symbol
+ // table first in the file. Unfortunately, this means mapping the file
+ // we just wrote back in and copying it to the destination file.
+ sys::Path FinalFilePath = archPath;
+
+ // Map in the archive we just wrote.
+ {
+ OwningPtr<MemoryBuffer> arch(MemoryBuffer::getFile(TmpArchive.c_str()));
+ if (arch == 0) return true;
+ const char* base = arch->getBufferStart();
+
+ // Open another temporary file in order to avoid invalidating the
+ // mmapped data
+ if (FinalFilePath.createTemporaryFileOnDisk(ErrMsg))
+ return true;
+ sys::RemoveFileOnSignal(FinalFilePath);
+
+ std::ofstream FinalFile(FinalFilePath.c_str(), io_mode);
+ if (!FinalFile.is_open() || FinalFile.bad()) {
+ if (TmpArchive.exists())
+ TmpArchive.eraseFromDisk();
+ if (ErrMsg)
+ *ErrMsg = "Error opening archive file: " + FinalFilePath.toString();
+ return true;
+ }
+
+ // Write the file magic number
+ FinalFile << ARFILE_MAGIC;
+
+ // If there is a foreign symbol table, put it into the file now. Most
+ // ar(1) implementations require the symbol table to be first but llvm-ar
+ // can deal with it being after a foreign symbol table. This ensures
+ // compatibility with other ar(1) implementations as well as allowing the
+ // archive to store both native .o and LLVM .bc files, both indexed.
+ if (foreignST) {
+ if (writeMember(*foreignST, FinalFile, false, false, false, ErrMsg)) {
+ FinalFile.close();
+ if (TmpArchive.exists())
+ TmpArchive.eraseFromDisk();
+ return true;
+ }
+ }
+
+ // Put out the LLVM symbol table now.
+ writeSymbolTable(FinalFile);
+
+ // Copy the temporary file contents being sure to skip the file's magic
+ // number.
+ FinalFile.write(base + sizeof(ARFILE_MAGIC)-1,
+ arch->getBufferSize()-sizeof(ARFILE_MAGIC)+1);
+
+ // Close up shop
+ FinalFile.close();
+ } // free arch.
+
+ // Move the final file over top of TmpArchive
+ if (FinalFilePath.renamePathOnDisk(TmpArchive, ErrMsg))
+ return true;
+ }
+
+ // Before we replace the actual archive, we need to forget all the
+ // members, since they point to data in that old archive. We need to do
+ // this because we cannot replace an open file on Windows.
+ cleanUpMemory();
+
+ if (TmpArchive.renamePathOnDisk(archPath, ErrMsg))
+ return true;
+
+ // Set correct read and write permissions after temporary file is moved
+ // to final destination path.
+ if (archPath.makeReadableOnDisk(ErrMsg))
+ return true;
+ if (archPath.makeWriteableOnDisk(ErrMsg))
+ return true;
+
+ return false;
+}
diff --git a/lib/Archive/CMakeLists.txt b/lib/Archive/CMakeLists.txt
new file mode 100644
index 0000000..27698cb
--- /dev/null
+++ b/lib/Archive/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_library(LLVMArchive
+ Archive.cpp
+ ArchiveReader.cpp
+ ArchiveWriter.cpp
+ ) \ No newline at end of file
diff --git a/lib/Archive/Makefile b/lib/Archive/Makefile
new file mode 100644
index 0000000..da97804
--- /dev/null
+++ b/lib/Archive/Makefile
@@ -0,0 +1,17 @@
+##===- lib/Archive/Makefile --------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMArchive
+
+# We only want an archive so only those modules actually used by a tool are
+# included.
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/AsmParser/CMakeLists.txt b/lib/AsmParser/CMakeLists.txt
new file mode 100644
index 0000000..985ebe2
--- /dev/null
+++ b/lib/AsmParser/CMakeLists.txt
@@ -0,0 +1,6 @@
+# AsmParser
+add_llvm_library(LLVMAsmParser
+ LLLexer.cpp
+ LLParser.cpp
+ Parser.cpp
+ )
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
new file mode 100644
index 0000000..f2e6890
--- /dev/null
+++ b/lib/AsmParser/LLLexer.cpp
@@ -0,0 +1,835 @@
+//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement the Lexer for .ll files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLLexer.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instruction.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Assembly/Parser.h"
+#include <cstdlib>
+#include <cstring>
+using namespace llvm;
+
+bool LLLexer::Error(LocTy ErrorLoc, const std::string &Msg) const {
+ // Scan backward to find the start of the line.
+ const char *LineStart = ErrorLoc;
+ while (LineStart != CurBuf->getBufferStart() &&
+ LineStart[-1] != '\n' && LineStart[-1] != '\r')
+ --LineStart;
+ // Get the end of the line.
+ const char *LineEnd = ErrorLoc;
+ while (LineEnd != CurBuf->getBufferEnd() &&
+ LineEnd[0] != '\n' && LineEnd[0] != '\r')
+ ++LineEnd;
+
+ unsigned LineNo = 1;
+ for (const char *FP = CurBuf->getBufferStart(); FP != ErrorLoc; ++FP)
+ if (*FP == '\n') ++LineNo;
+
+ std::string LineContents(LineStart, LineEnd);
+ ErrorInfo.setError(Msg, LineNo, ErrorLoc-LineStart, LineContents);
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions.
+//===----------------------------------------------------------------------===//
+
+// atoull - Convert an ascii string of decimal digits into the unsigned long
+// long representation... this does not have to do input error checking,
+// because we know that the input will be matched by a suitable regex...
+//
+uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
+ uint64_t Result = 0;
+ for (; Buffer != End; Buffer++) {
+ uint64_t OldRes = Result;
+ Result *= 10;
+ Result += *Buffer-'0';
+ if (Result < OldRes) { // Uh, oh, overflow detected!!!
+ Error("constant bigger than 64 bits detected!");
+ return 0;
+ }
+ }
+ return Result;
+}
+
+uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
+ uint64_t Result = 0;
+ for (; Buffer != End; ++Buffer) {
+ uint64_t OldRes = Result;
+ Result *= 16;
+ char C = *Buffer;
+ if (C >= '0' && C <= '9')
+ Result += C-'0';
+ else if (C >= 'A' && C <= 'F')
+ Result += C-'A'+10;
+ else if (C >= 'a' && C <= 'f')
+ Result += C-'a'+10;
+
+ if (Result < OldRes) { // Uh, oh, overflow detected!!!
+ Error("constant bigger than 64 bits detected!");
+ return 0;
+ }
+ }
+ return Result;
+}
+
+void LLLexer::HexToIntPair(const char *Buffer, const char *End,
+ uint64_t Pair[2]) {
+ Pair[0] = 0;
+ for (int i=0; i<16; i++, Buffer++) {
+ assert(Buffer != End);
+ Pair[0] *= 16;
+ char C = *Buffer;
+ if (C >= '0' && C <= '9')
+ Pair[0] += C-'0';
+ else if (C >= 'A' && C <= 'F')
+ Pair[0] += C-'A'+10;
+ else if (C >= 'a' && C <= 'f')
+ Pair[0] += C-'a'+10;
+ }
+ Pair[1] = 0;
+ for (int i=0; i<16 && Buffer != End; i++, Buffer++) {
+ Pair[1] *= 16;
+ char C = *Buffer;
+ if (C >= '0' && C <= '9')
+ Pair[1] += C-'0';
+ else if (C >= 'A' && C <= 'F')
+ Pair[1] += C-'A'+10;
+ else if (C >= 'a' && C <= 'f')
+ Pair[1] += C-'a'+10;
+ }
+ if (Buffer != End)
+ Error("constant bigger than 128 bits detected!");
+}
+
+/// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
+/// { low64, high16 } as usual for an APInt.
+void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
+ uint64_t Pair[2]) {
+ Pair[1] = 0;
+ for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
+ assert(Buffer != End);
+ Pair[1] *= 16;
+ char C = *Buffer;
+ if (C >= '0' && C <= '9')
+ Pair[1] += C-'0';
+ else if (C >= 'A' && C <= 'F')
+ Pair[1] += C-'A'+10;
+ else if (C >= 'a' && C <= 'f')
+ Pair[1] += C-'a'+10;
+ }
+ Pair[0] = 0;
+ for (int i=0; i<16; i++, Buffer++) {
+ Pair[0] *= 16;
+ char C = *Buffer;
+ if (C >= '0' && C <= '9')
+ Pair[0] += C-'0';
+ else if (C >= 'A' && C <= 'F')
+ Pair[0] += C-'A'+10;
+ else if (C >= 'a' && C <= 'f')
+ Pair[0] += C-'a'+10;
+ }
+ if (Buffer != End)
+ Error("constant bigger than 128 bits detected!");
+}
+
+// UnEscapeLexed - Run through the specified buffer and change \xx codes to the
+// appropriate character.
+static void UnEscapeLexed(std::string &Str) {
+ if (Str.empty()) return;
+
+ char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size();
+ char *BOut = Buffer;
+ for (char *BIn = Buffer; BIn != EndBuffer; ) {
+ if (BIn[0] == '\\') {
+ if (BIn < EndBuffer-1 && BIn[1] == '\\') {
+ *BOut++ = '\\'; // Two \ becomes one
+ BIn += 2;
+ } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) {
+ char Tmp = BIn[3]; BIn[3] = 0; // Terminate string
+ *BOut = (char)strtol(BIn+1, 0, 16); // Convert to number
+ BIn[3] = Tmp; // Restore character
+ BIn += 3; // Skip over handled chars
+ ++BOut;
+ } else {
+ *BOut++ = *BIn++;
+ }
+ } else {
+ *BOut++ = *BIn++;
+ }
+ }
+ Str.resize(BOut-Buffer);
+}
+
+/// isLabelChar - Return true for [-a-zA-Z$._0-9].
+static bool isLabelChar(char C) {
+ return isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_';
+}
+
+
+/// isLabelTail - Return true if this pointer points to a valid end of a label.
+static const char *isLabelTail(const char *CurPtr) {
+ while (1) {
+ if (CurPtr[0] == ':') return CurPtr+1;
+ if (!isLabelChar(CurPtr[0])) return 0;
+ ++CurPtr;
+ }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Lexer definition.
+//===----------------------------------------------------------------------===//
+
+LLLexer::LLLexer(MemoryBuffer *StartBuf, ParseError &Err)
+ : CurBuf(StartBuf), ErrorInfo(Err), APFloatVal(0.0) {
+ CurPtr = CurBuf->getBufferStart();
+}
+
+std::string LLLexer::getFilename() const {
+ return CurBuf->getBufferIdentifier();
+}
+
+int LLLexer::getNextChar() {
+ char CurChar = *CurPtr++;
+ switch (CurChar) {
+ default: return (unsigned char)CurChar;
+ case 0:
+ // A nul character in the stream is either the end of the current buffer or
+ // a random nul in the file. Disambiguate that here.
+ if (CurPtr-1 != CurBuf->getBufferEnd())
+ return 0; // Just whitespace.
+
+ // Otherwise, return end of file.
+ --CurPtr; // Another call to lex will return EOF again.
+ return EOF;
+ }
+}
+
+
+lltok::Kind LLLexer::LexToken() {
+ TokStart = CurPtr;
+
+ int CurChar = getNextChar();
+ switch (CurChar) {
+ default:
+ // Handle letters: [a-zA-Z_]
+ if (isalpha(CurChar) || CurChar == '_')
+ return LexIdentifier();
+
+ return lltok::Error;
+ case EOF: return lltok::Eof;
+ case 0:
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ // Ignore whitespace.
+ return LexToken();
+ case '+': return LexPositive();
+ case '@': return LexAt();
+ case '%': return LexPercent();
+ case '"': return LexQuote();
+ case '.':
+ if (const char *Ptr = isLabelTail(CurPtr)) {
+ CurPtr = Ptr;
+ StrVal.assign(TokStart, CurPtr-1);
+ return lltok::LabelStr;
+ }
+ if (CurPtr[0] == '.' && CurPtr[1] == '.') {
+ CurPtr += 2;
+ return lltok::dotdotdot;
+ }
+ return lltok::Error;
+ case '$':
+ if (const char *Ptr = isLabelTail(CurPtr)) {
+ CurPtr = Ptr;
+ StrVal.assign(TokStart, CurPtr-1);
+ return lltok::LabelStr;
+ }
+ return lltok::Error;
+ case ';':
+ SkipLineComment();
+ return LexToken();
+ case '!': return lltok::Metadata;
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case '-':
+ return LexDigitOrNegative();
+ case '=': return lltok::equal;
+ case '[': return lltok::lsquare;
+ case ']': return lltok::rsquare;
+ case '{': return lltok::lbrace;
+ case '}': return lltok::rbrace;
+ case '<': return lltok::less;
+ case '>': return lltok::greater;
+ case '(': return lltok::lparen;
+ case ')': return lltok::rparen;
+ case ',': return lltok::comma;
+ case '*': return lltok::star;
+ case '\\': return lltok::backslash;
+ }
+}
+
+void LLLexer::SkipLineComment() {
+ while (1) {
+ if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
+ return;
+ }
+}
+
+/// LexAt - Lex all tokens that start with an @ character:
+/// GlobalVar @\"[^\"]*\"
+/// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]*
+/// GlobalVarID @[0-9]+
+lltok::Kind LLLexer::LexAt() {
+ // Handle AtStringConstant: @\"[^\"]*\"
+ if (CurPtr[0] == '"') {
+ ++CurPtr;
+
+ while (1) {
+ int CurChar = getNextChar();
+
+ if (CurChar == EOF) {
+ Error("end of file in global variable name");
+ return lltok::Error;
+ }
+ if (CurChar == '"') {
+ StrVal.assign(TokStart+2, CurPtr-1);
+ UnEscapeLexed(StrVal);
+ return lltok::GlobalVar;
+ }
+ }
+ }
+
+ // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]*
+ if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+ CurPtr[0] == '.' || CurPtr[0] == '_') {
+ ++CurPtr;
+ while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+ CurPtr[0] == '.' || CurPtr[0] == '_')
+ ++CurPtr;
+
+ StrVal.assign(TokStart+1, CurPtr); // Skip @
+ return lltok::GlobalVar;
+ }
+
+ // Handle GlobalVarID: @[0-9]+
+ if (isdigit(CurPtr[0])) {
+ for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
+ /*empty*/;
+
+ uint64_t Val = atoull(TokStart+1, CurPtr);
+ if ((unsigned)Val != Val)
+ Error("invalid value number (too large)!");
+ UIntVal = unsigned(Val);
+ return lltok::GlobalID;
+ }
+
+ return lltok::Error;
+}
+
+
+/// LexPercent - Lex all tokens that start with a % character:
+/// LocalVar ::= %\"[^\"]*\"
+/// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
+/// LocalVarID ::= %[0-9]+
+lltok::Kind LLLexer::LexPercent() {
+ // Handle LocalVarName: %\"[^\"]*\"
+ if (CurPtr[0] == '"') {
+ ++CurPtr;
+
+ while (1) {
+ int CurChar = getNextChar();
+
+ if (CurChar == EOF) {
+ Error("end of file in string constant");
+ return lltok::Error;
+ }
+ if (CurChar == '"') {
+ StrVal.assign(TokStart+2, CurPtr-1);
+ UnEscapeLexed(StrVal);
+ return lltok::LocalVar;
+ }
+ }
+ }
+
+ // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]*
+ if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+ CurPtr[0] == '.' || CurPtr[0] == '_') {
+ ++CurPtr;
+ while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+ CurPtr[0] == '.' || CurPtr[0] == '_')
+ ++CurPtr;
+
+ StrVal.assign(TokStart+1, CurPtr); // Skip %
+ return lltok::LocalVar;
+ }
+
+ // Handle LocalVarID: %[0-9]+
+ if (isdigit(CurPtr[0])) {
+ for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
+ /*empty*/;
+
+ uint64_t Val = atoull(TokStart+1, CurPtr);
+ if ((unsigned)Val != Val)
+ Error("invalid value number (too large)!");
+ UIntVal = unsigned(Val);
+ return lltok::LocalVarID;
+ }
+
+ return lltok::Error;
+}
+
+/// LexQuote - Lex all tokens that start with a " character:
+/// QuoteLabel "[^"]+":
+/// StringConstant "[^"]*"
+lltok::Kind LLLexer::LexQuote() {
+ while (1) {
+ int CurChar = getNextChar();
+
+ if (CurChar == EOF) {
+ Error("end of file in quoted string");
+ return lltok::Error;
+ }
+
+ if (CurChar != '"') continue;
+
+ if (CurPtr[0] != ':') {
+ StrVal.assign(TokStart+1, CurPtr-1);
+ UnEscapeLexed(StrVal);
+ return lltok::StringConstant;
+ }
+
+ ++CurPtr;
+ StrVal.assign(TokStart+1, CurPtr-2);
+ UnEscapeLexed(StrVal);
+ return lltok::LabelStr;
+ }
+}
+
+static bool JustWhitespaceNewLine(const char *&Ptr) {
+ const char *ThisPtr = Ptr;
+ while (*ThisPtr == ' ' || *ThisPtr == '\t')
+ ++ThisPtr;
+ if (*ThisPtr == '\n' || *ThisPtr == '\r') {
+ Ptr = ThisPtr;
+ return true;
+ }
+ return false;
+}
+
+
+/// LexIdentifier: Handle several related productions:
+/// Label [-a-zA-Z$._0-9]+:
+/// IntegerType i[0-9]+
+/// Keyword sdiv, float, ...
+/// HexIntConstant [us]0x[0-9A-Fa-f]+
+lltok::Kind LLLexer::LexIdentifier() {
+ const char *StartChar = CurPtr;
+ const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar;
+ const char *KeywordEnd = 0;
+
+ for (; isLabelChar(*CurPtr); ++CurPtr) {
+ // If we decide this is an integer, remember the end of the sequence.
+ if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr;
+ if (!KeywordEnd && !isalnum(*CurPtr) && *CurPtr != '_') KeywordEnd = CurPtr;
+ }
+
+ // If we stopped due to a colon, this really is a label.
+ if (*CurPtr == ':') {
+ StrVal.assign(StartChar-1, CurPtr++);
+ return lltok::LabelStr;
+ }
+
+ // Otherwise, this wasn't a label. If this was valid as an integer type,
+ // return it.
+ if (IntEnd == 0) IntEnd = CurPtr;
+ if (IntEnd != StartChar) {
+ CurPtr = IntEnd;
+ uint64_t NumBits = atoull(StartChar, CurPtr);
+ if (NumBits < IntegerType::MIN_INT_BITS ||
+ NumBits > IntegerType::MAX_INT_BITS) {
+ Error("bitwidth for integer type out of range!");
+ return lltok::Error;
+ }
+ TyVal = IntegerType::get(NumBits);
+ return lltok::Type;
+ }
+
+ // Otherwise, this was a letter sequence. See which keyword this is.
+ if (KeywordEnd == 0) KeywordEnd = CurPtr;
+ CurPtr = KeywordEnd;
+ --StartChar;
+ unsigned Len = CurPtr-StartChar;
+#define KEYWORD(STR) \
+ if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \
+ return lltok::kw_##STR;
+
+ KEYWORD(begin); KEYWORD(end);
+ KEYWORD(true); KEYWORD(false);
+ KEYWORD(declare); KEYWORD(define);
+ KEYWORD(global); KEYWORD(constant);
+
+ KEYWORD(private);
+ KEYWORD(internal);
+ KEYWORD(available_externally);
+ KEYWORD(linkonce);
+ KEYWORD(linkonce_odr);
+ KEYWORD(weak);
+ KEYWORD(weak_odr);
+ KEYWORD(appending);
+ KEYWORD(dllimport);
+ KEYWORD(dllexport);
+ KEYWORD(common);
+ KEYWORD(default);
+ KEYWORD(hidden);
+ KEYWORD(protected);
+ KEYWORD(extern_weak);
+ KEYWORD(external);
+ KEYWORD(thread_local);
+ KEYWORD(zeroinitializer);
+ KEYWORD(undef);
+ KEYWORD(null);
+ KEYWORD(to);
+ KEYWORD(tail);
+ KEYWORD(target);
+ KEYWORD(triple);
+ KEYWORD(deplibs);
+ KEYWORD(datalayout);
+ KEYWORD(volatile);
+ KEYWORD(align);
+ KEYWORD(addrspace);
+ KEYWORD(section);
+ KEYWORD(alias);
+ KEYWORD(module);
+ KEYWORD(asm);
+ KEYWORD(sideeffect);
+ KEYWORD(gc);
+
+ KEYWORD(ccc);
+ KEYWORD(fastcc);
+ KEYWORD(coldcc);
+ KEYWORD(x86_stdcallcc);
+ KEYWORD(x86_fastcallcc);
+ KEYWORD(cc);
+ KEYWORD(c);
+
+ KEYWORD(signext);
+ KEYWORD(zeroext);
+ KEYWORD(inreg);
+ KEYWORD(sret);
+ KEYWORD(nounwind);
+ KEYWORD(noreturn);
+ KEYWORD(noalias);
+ KEYWORD(nocapture);
+ KEYWORD(byval);
+ KEYWORD(nest);
+ KEYWORD(readnone);
+ KEYWORD(readonly);
+
+ KEYWORD(noinline);
+ KEYWORD(alwaysinline);
+ KEYWORD(optsize);
+ KEYWORD(ssp);
+ KEYWORD(sspreq);
+
+ KEYWORD(type);
+ KEYWORD(opaque);
+
+ KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
+ KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
+ KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
+ KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
+
+ KEYWORD(x);
+#undef KEYWORD
+
+ // Keywords for types.
+#define TYPEKEYWORD(STR, LLVMTY) \
+ if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \
+ TyVal = LLVMTY; return lltok::Type; }
+ TYPEKEYWORD("void", Type::VoidTy);
+ TYPEKEYWORD("float", Type::FloatTy);
+ TYPEKEYWORD("double", Type::DoubleTy);
+ TYPEKEYWORD("x86_fp80", Type::X86_FP80Ty);
+ TYPEKEYWORD("fp128", Type::FP128Ty);
+ TYPEKEYWORD("ppc_fp128", Type::PPC_FP128Ty);
+ TYPEKEYWORD("label", Type::LabelTy);
+ TYPEKEYWORD("metadata", Type::MetadataTy);
+#undef TYPEKEYWORD
+
+ // Handle special forms for autoupgrading. Drop these in LLVM 3.0. This is
+ // to avoid conflicting with the sext/zext instructions, below.
+ if (Len == 4 && !memcmp(StartChar, "sext", 4)) {
+ // Scan CurPtr ahead, seeing if there is just whitespace before the newline.
+ if (JustWhitespaceNewLine(CurPtr))
+ return lltok::kw_signext;
+ } else if (Len == 4 && !memcmp(StartChar, "zext", 4)) {
+ // Scan CurPtr ahead, seeing if there is just whitespace before the newline.
+ if (JustWhitespaceNewLine(CurPtr))
+ return lltok::kw_zeroext;
+ }
+
+ // Keywords for instructions.
+#define INSTKEYWORD(STR, Enum) \
+ if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \
+ UIntVal = Instruction::Enum; return lltok::kw_##STR; }
+
+ INSTKEYWORD(add, Add); INSTKEYWORD(sub, Sub); INSTKEYWORD(mul, Mul);
+ INSTKEYWORD(udiv, UDiv); INSTKEYWORD(sdiv, SDiv); INSTKEYWORD(fdiv, FDiv);
+ INSTKEYWORD(urem, URem); INSTKEYWORD(srem, SRem); INSTKEYWORD(frem, FRem);
+ INSTKEYWORD(shl, Shl); INSTKEYWORD(lshr, LShr); INSTKEYWORD(ashr, AShr);
+ INSTKEYWORD(and, And); INSTKEYWORD(or, Or); INSTKEYWORD(xor, Xor);
+ INSTKEYWORD(icmp, ICmp); INSTKEYWORD(fcmp, FCmp);
+ INSTKEYWORD(vicmp, VICmp); INSTKEYWORD(vfcmp, VFCmp);
+
+ INSTKEYWORD(phi, PHI);
+ INSTKEYWORD(call, Call);
+ INSTKEYWORD(trunc, Trunc);
+ INSTKEYWORD(zext, ZExt);
+ INSTKEYWORD(sext, SExt);
+ INSTKEYWORD(fptrunc, FPTrunc);
+ INSTKEYWORD(fpext, FPExt);
+ INSTKEYWORD(uitofp, UIToFP);
+ INSTKEYWORD(sitofp, SIToFP);
+ INSTKEYWORD(fptoui, FPToUI);
+ INSTKEYWORD(fptosi, FPToSI);
+ INSTKEYWORD(inttoptr, IntToPtr);
+ INSTKEYWORD(ptrtoint, PtrToInt);
+ INSTKEYWORD(bitcast, BitCast);
+ INSTKEYWORD(select, Select);
+ INSTKEYWORD(va_arg, VAArg);
+ INSTKEYWORD(ret, Ret);
+ INSTKEYWORD(br, Br);
+ INSTKEYWORD(switch, Switch);
+ INSTKEYWORD(invoke, Invoke);
+ INSTKEYWORD(unwind, Unwind);
+ INSTKEYWORD(unreachable, Unreachable);
+
+ INSTKEYWORD(malloc, Malloc);
+ INSTKEYWORD(alloca, Alloca);
+ INSTKEYWORD(free, Free);
+ INSTKEYWORD(load, Load);
+ INSTKEYWORD(store, Store);
+ INSTKEYWORD(getelementptr, GetElementPtr);
+
+ INSTKEYWORD(extractelement, ExtractElement);
+ INSTKEYWORD(insertelement, InsertElement);
+ INSTKEYWORD(shufflevector, ShuffleVector);
+ INSTKEYWORD(getresult, ExtractValue);
+ INSTKEYWORD(extractvalue, ExtractValue);
+ INSTKEYWORD(insertvalue, InsertValue);
+#undef INSTKEYWORD
+
+ // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
+ // the CFE to avoid forcing it to deal with 64-bit numbers.
+ if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
+ TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) {
+ int len = CurPtr-TokStart-3;
+ uint32_t bits = len * 4;
+ APInt Tmp(bits, TokStart+3, len, 16);
+ uint32_t activeBits = Tmp.getActiveBits();
+ if (activeBits > 0 && activeBits < bits)
+ Tmp.trunc(activeBits);
+ APSIntVal = APSInt(Tmp, TokStart[0] == 'u');
+ return lltok::APSInt;
+ }
+
+ // If this is "cc1234", return this as just "cc".
+ if (TokStart[0] == 'c' && TokStart[1] == 'c') {
+ CurPtr = TokStart+2;
+ return lltok::kw_cc;
+ }
+
+ // If this starts with "call", return it as CALL. This is to support old
+ // broken .ll files. FIXME: remove this with LLVM 3.0.
+ if (CurPtr-TokStart > 4 && !memcmp(TokStart, "call", 4)) {
+ CurPtr = TokStart+4;
+ UIntVal = Instruction::Call;
+ return lltok::kw_call;
+ }
+
+ // Finally, if this isn't known, return an error.
+ CurPtr = TokStart+1;
+ return lltok::Error;
+}
+
+
+/// Lex0x: Handle productions that start with 0x, knowing that it matches and
+/// that this is not a label:
+/// HexFPConstant 0x[0-9A-Fa-f]+
+/// HexFP80Constant 0xK[0-9A-Fa-f]+
+/// HexFP128Constant 0xL[0-9A-Fa-f]+
+/// HexPPC128Constant 0xM[0-9A-Fa-f]+
+lltok::Kind LLLexer::Lex0x() {
+ CurPtr = TokStart + 2;
+
+ char Kind;
+ if (CurPtr[0] >= 'K' && CurPtr[0] <= 'M') {
+ Kind = *CurPtr++;
+ } else {
+ Kind = 'J';
+ }
+
+ if (!isxdigit(CurPtr[0])) {
+ // Bad token, return it as an error.
+ CurPtr = TokStart+1;
+ return lltok::Error;
+ }
+
+ while (isxdigit(CurPtr[0]))
+ ++CurPtr;
+
+ if (Kind == 'J') {
+ // HexFPConstant - Floating point constant represented in IEEE format as a
+ // hexadecimal number for when exponential notation is not precise enough.
+ // Float and double only.
+ APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr)));
+ return lltok::APFloat;
+ }
+
+ uint64_t Pair[2];
+ switch (Kind) {
+ default: assert(0 && "Unknown kind!");
+ case 'K':
+ // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
+ FP80HexToIntPair(TokStart+3, CurPtr, Pair);
+ APFloatVal = APFloat(APInt(80, 2, Pair));
+ return lltok::APFloat;
+ case 'L':
+ // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
+ HexToIntPair(TokStart+3, CurPtr, Pair);
+ APFloatVal = APFloat(APInt(128, 2, Pair), true);
+ return lltok::APFloat;
+ case 'M':
+ // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
+ HexToIntPair(TokStart+3, CurPtr, Pair);
+ APFloatVal = APFloat(APInt(128, 2, Pair));
+ return lltok::APFloat;
+ }
+}
+
+/// LexIdentifier: Handle several related productions:
+/// Label [-a-zA-Z$._0-9]+:
+/// NInteger -[0-9]+
+/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
+/// PInteger [0-9]+
+/// HexFPConstant 0x[0-9A-Fa-f]+
+/// HexFP80Constant 0xK[0-9A-Fa-f]+
+/// HexFP128Constant 0xL[0-9A-Fa-f]+
+/// HexPPC128Constant 0xM[0-9A-Fa-f]+
+lltok::Kind LLLexer::LexDigitOrNegative() {
+ // If the letter after the negative is a number, this is probably a label.
+ if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) {
+ // Okay, this is not a number after the -, it's probably a label.
+ if (const char *End = isLabelTail(CurPtr)) {
+ StrVal.assign(TokStart, End-1);
+ CurPtr = End;
+ return lltok::LabelStr;
+ }
+
+ return lltok::Error;
+ }
+
+ // At this point, it is either a label, int or fp constant.
+
+ // Skip digits, we have at least one.
+ for (; isdigit(CurPtr[0]); ++CurPtr)
+ /*empty*/;
+
+ // Check to see if this really is a label afterall, e.g. "-1:".
+ if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
+ if (const char *End = isLabelTail(CurPtr)) {
+ StrVal.assign(TokStart, End-1);
+ CurPtr = End;
+ return lltok::LabelStr;
+ }
+ }
+
+ // If the next character is a '.', then it is a fp value, otherwise its
+ // integer.
+ if (CurPtr[0] != '.') {
+ if (TokStart[0] == '0' && TokStart[1] == 'x')
+ return Lex0x();
+ unsigned Len = CurPtr-TokStart;
+ uint32_t numBits = ((Len * 64) / 19) + 2;
+ APInt Tmp(numBits, TokStart, Len, 10);
+ if (TokStart[0] == '-') {
+ uint32_t minBits = Tmp.getMinSignedBits();
+ if (minBits > 0 && minBits < numBits)
+ Tmp.trunc(minBits);
+ APSIntVal = APSInt(Tmp, false);
+ } else {
+ uint32_t activeBits = Tmp.getActiveBits();
+ if (activeBits > 0 && activeBits < numBits)
+ Tmp.trunc(activeBits);
+ APSIntVal = APSInt(Tmp, true);
+ }
+ return lltok::APSInt;
+ }
+
+ ++CurPtr;
+
+ // Skip over [0-9]*([eE][-+]?[0-9]+)?
+ while (isdigit(CurPtr[0])) ++CurPtr;
+
+ if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
+ if (isdigit(CurPtr[1]) ||
+ ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
+ CurPtr += 2;
+ while (isdigit(CurPtr[0])) ++CurPtr;
+ }
+ }
+
+ APFloatVal = APFloat(atof(TokStart));
+ return lltok::APFloat;
+}
+
+/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
+lltok::Kind LLLexer::LexPositive() {
+ // If the letter after the negative is a number, this is probably not a
+ // label.
+ if (!isdigit(CurPtr[0]))
+ return lltok::Error;
+
+ // Skip digits.
+ for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
+ /*empty*/;
+
+ // At this point, we need a '.'.
+ if (CurPtr[0] != '.') {
+ CurPtr = TokStart+1;
+ return lltok::Error;
+ }
+
+ ++CurPtr;
+
+ // Skip over [0-9]*([eE][-+]?[0-9]+)?
+ while (isdigit(CurPtr[0])) ++CurPtr;
+
+ if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
+ if (isdigit(CurPtr[1]) ||
+ ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
+ CurPtr += 2;
+ while (isdigit(CurPtr[0])) ++CurPtr;
+ }
+ }
+
+ APFloatVal = APFloat(atof(TokStart));
+ return lltok::APFloat;
+}
diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h
new file mode 100644
index 0000000..995aa4e
--- /dev/null
+++ b/lib/AsmParser/LLLexer.h
@@ -0,0 +1,84 @@
+//===- LLLexer.h - Lexer for LLVM Assembly Files ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents the Lexer for .ll files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIB_ASMPARSER_LLLEXER_H
+#define LIB_ASMPARSER_LLLEXER_H
+
+#include "LLToken.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/APFloat.h"
+#include <string>
+
+namespace llvm {
+ class MemoryBuffer;
+ class Type;
+ class ParseError;
+
+ class LLLexer {
+ const char *CurPtr;
+ MemoryBuffer *CurBuf;
+ ParseError &ErrorInfo;
+
+ // Information about the current token.
+ const char *TokStart;
+ lltok::Kind CurKind;
+ std::string StrVal;
+ unsigned UIntVal;
+ const Type *TyVal;
+ APFloat APFloatVal;
+ APSInt APSIntVal;
+
+ std::string TheError;
+ public:
+ explicit LLLexer(MemoryBuffer *StartBuf, ParseError &);
+ ~LLLexer() {}
+
+ lltok::Kind Lex() {
+ return CurKind = LexToken();
+ }
+
+ typedef const char* LocTy;
+ LocTy getLoc() const { return TokStart; }
+ lltok::Kind getKind() const { return CurKind; }
+ const std::string getStrVal() const { return StrVal; }
+ const Type *getTyVal() const { return TyVal; }
+ unsigned getUIntVal() const { return UIntVal; }
+ const APSInt &getAPSIntVal() const { return APSIntVal; }
+ const APFloat &getAPFloatVal() const { return APFloatVal; }
+
+
+ bool Error(LocTy L, const std::string &Msg) const;
+ bool Error(const std::string &Msg) const { return Error(CurPtr, Msg); }
+ std::string getFilename() const;
+
+ private:
+ lltok::Kind LexToken();
+
+ int getNextChar();
+ void SkipLineComment();
+ lltok::Kind LexIdentifier();
+ lltok::Kind LexDigitOrNegative();
+ lltok::Kind LexPositive();
+ lltok::Kind LexAt();
+ lltok::Kind LexPercent();
+ lltok::Kind LexQuote();
+ lltok::Kind Lex0x();
+
+ uint64_t atoull(const char *Buffer, const char *End);
+ uint64_t HexIntToVal(const char *Buffer, const char *End);
+ void HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]);
+ void FP80HexToIntPair(const char *Buff, const char *End, uint64_t Pair[2]);
+ };
+} // end namespace llvm
+
+#endif
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
new file mode 100644
index 0000000..8db4c71
--- /dev/null
+++ b/lib/AsmParser/LLParser.cpp
@@ -0,0 +1,3279 @@
+//===-- LLParser.cpp - Parser Class ---------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the parser class for .ll files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLParser.h"
+#include "llvm/AutoUpgrade.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/MDNode.h"
+#include "llvm/Module.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace llvm {
+ /// ValID - Represents a reference of a definition of some sort with no type.
+ /// There are several cases where we have to parse the value but where the
+ /// type can depend on later context. This may either be a numeric reference
+ /// or a symbolic (%var) reference. This is just a discriminated union.
+ struct ValID {
+ enum {
+ t_LocalID, t_GlobalID, // ID in UIntVal.
+ t_LocalName, t_GlobalName, // Name in StrVal.
+ t_APSInt, t_APFloat, // Value in APSIntVal/APFloatVal.
+ t_Null, t_Undef, t_Zero, // No value.
+ t_EmptyArray, // No value: []
+ t_Constant, // Value in ConstantVal.
+ t_InlineAsm // Value in StrVal/StrVal2/UIntVal.
+ } Kind;
+
+ LLParser::LocTy Loc;
+ unsigned UIntVal;
+ std::string StrVal, StrVal2;
+ APSInt APSIntVal;
+ APFloat APFloatVal;
+ Constant *ConstantVal;
+ ValID() : APFloatVal(0.0) {}
+ };
+}
+
+/// Run: module ::= toplevelentity*
+bool LLParser::Run() {
+ // Prime the lexer.
+ Lex.Lex();
+
+ return ParseTopLevelEntities() ||
+ ValidateEndOfModule();
+}
+
+/// ValidateEndOfModule - Do final validity and sanity checks at the end of the
+/// module.
+bool LLParser::ValidateEndOfModule() {
+ if (!ForwardRefTypes.empty())
+ return Error(ForwardRefTypes.begin()->second.second,
+ "use of undefined type named '" +
+ ForwardRefTypes.begin()->first + "'");
+ if (!ForwardRefTypeIDs.empty())
+ return Error(ForwardRefTypeIDs.begin()->second.second,
+ "use of undefined type '%" +
+ utostr(ForwardRefTypeIDs.begin()->first) + "'");
+
+ if (!ForwardRefVals.empty())
+ return Error(ForwardRefVals.begin()->second.second,
+ "use of undefined value '@" + ForwardRefVals.begin()->first +
+ "'");
+
+ if (!ForwardRefValIDs.empty())
+ return Error(ForwardRefValIDs.begin()->second.second,
+ "use of undefined value '@" +
+ utostr(ForwardRefValIDs.begin()->first) + "'");
+
+ // Look for intrinsic functions and CallInst that need to be upgraded
+ for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; )
+ UpgradeCallsToIntrinsic(FI++); // must be post-increment, as we remove
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level Entities
+//===----------------------------------------------------------------------===//
+
+bool LLParser::ParseTopLevelEntities() {
+ while (1) {
+ switch (Lex.getKind()) {
+ default: return TokError("expected top-level entity");
+ case lltok::Eof: return false;
+ //case lltok::kw_define:
+ case lltok::kw_declare: if (ParseDeclare()) return true; break;
+ case lltok::kw_define: if (ParseDefine()) return true; break;
+ case lltok::kw_module: if (ParseModuleAsm()) return true; break;
+ case lltok::kw_target: if (ParseTargetDefinition()) return true; break;
+ case lltok::kw_deplibs: if (ParseDepLibs()) return true; break;
+ case lltok::kw_type: if (ParseUnnamedType()) return true; break;
+ case lltok::StringConstant: // FIXME: REMOVE IN LLVM 3.0
+ case lltok::LocalVar: if (ParseNamedType()) return true; break;
+ case lltok::GlobalVar: if (ParseNamedGlobal()) return true; break;
+
+ // The Global variable production with no name can have many different
+ // optional leading prefixes, the production is:
+ // GlobalVar ::= OptionalLinkage OptionalVisibility OptionalThreadLocal
+ // OptionalAddrSpace ('constant'|'global') ...
+ case lltok::kw_private: // OptionalLinkage
+ case lltok::kw_internal: // OptionalLinkage
+ case lltok::kw_weak: // OptionalLinkage
+ case lltok::kw_weak_odr: // OptionalLinkage
+ case lltok::kw_linkonce: // OptionalLinkage
+ case lltok::kw_linkonce_odr: // OptionalLinkage
+ case lltok::kw_appending: // OptionalLinkage
+ case lltok::kw_dllexport: // OptionalLinkage
+ case lltok::kw_common: // OptionalLinkage
+ case lltok::kw_dllimport: // OptionalLinkage
+ case lltok::kw_extern_weak: // OptionalLinkage
+ case lltok::kw_external: { // OptionalLinkage
+ unsigned Linkage, Visibility;
+ if (ParseOptionalLinkage(Linkage) ||
+ ParseOptionalVisibility(Visibility) ||
+ ParseGlobal("", 0, Linkage, true, Visibility))
+ return true;
+ break;
+ }
+ case lltok::kw_default: // OptionalVisibility
+ case lltok::kw_hidden: // OptionalVisibility
+ case lltok::kw_protected: { // OptionalVisibility
+ unsigned Visibility;
+ if (ParseOptionalVisibility(Visibility) ||
+ ParseGlobal("", 0, 0, false, Visibility))
+ return true;
+ break;
+ }
+
+ case lltok::kw_thread_local: // OptionalThreadLocal
+ case lltok::kw_addrspace: // OptionalAddrSpace
+ case lltok::kw_constant: // GlobalType
+ case lltok::kw_global: // GlobalType
+ if (ParseGlobal("", 0, 0, false, 0)) return true;
+ break;
+ }
+ }
+}
+
+
+/// toplevelentity
+/// ::= 'module' 'asm' STRINGCONSTANT
+bool LLParser::ParseModuleAsm() {
+ assert(Lex.getKind() == lltok::kw_module);
+ Lex.Lex();
+
+ std::string AsmStr;
+ if (ParseToken(lltok::kw_asm, "expected 'module asm'") ||
+ ParseStringConstant(AsmStr)) return true;
+
+ const std::string &AsmSoFar = M->getModuleInlineAsm();
+ if (AsmSoFar.empty())
+ M->setModuleInlineAsm(AsmStr);
+ else
+ M->setModuleInlineAsm(AsmSoFar+"\n"+AsmStr);
+ return false;
+}
+
+/// toplevelentity
+/// ::= 'target' 'triple' '=' STRINGCONSTANT
+/// ::= 'target' 'datalayout' '=' STRINGCONSTANT
+bool LLParser::ParseTargetDefinition() {
+ assert(Lex.getKind() == lltok::kw_target);
+ std::string Str;
+ switch (Lex.Lex()) {
+ default: return TokError("unknown target property");
+ case lltok::kw_triple:
+ Lex.Lex();
+ if (ParseToken(lltok::equal, "expected '=' after target triple") ||
+ ParseStringConstant(Str))
+ return true;
+ M->setTargetTriple(Str);
+ return false;
+ case lltok::kw_datalayout:
+ Lex.Lex();
+ if (ParseToken(lltok::equal, "expected '=' after target datalayout") ||
+ ParseStringConstant(Str))
+ return true;
+ M->setDataLayout(Str);
+ return false;
+ }
+}
+
+/// toplevelentity
+/// ::= 'deplibs' '=' '[' ']'
+/// ::= 'deplibs' '=' '[' STRINGCONSTANT (',' STRINGCONSTANT)* ']'
+bool LLParser::ParseDepLibs() {
+ assert(Lex.getKind() == lltok::kw_deplibs);
+ Lex.Lex();
+ if (ParseToken(lltok::equal, "expected '=' after deplibs") ||
+ ParseToken(lltok::lsquare, "expected '=' after deplibs"))
+ return true;
+
+ if (EatIfPresent(lltok::rsquare))
+ return false;
+
+ std::string Str;
+ if (ParseStringConstant(Str)) return true;
+ M->addLibrary(Str);
+
+ while (EatIfPresent(lltok::comma)) {
+ if (ParseStringConstant(Str)) return true;
+ M->addLibrary(Str);
+ }
+
+ return ParseToken(lltok::rsquare, "expected ']' at end of list");
+}
+
+/// toplevelentity
+/// ::= 'type' type
+bool LLParser::ParseUnnamedType() {
+ assert(Lex.getKind() == lltok::kw_type);
+ LocTy TypeLoc = Lex.getLoc();
+ Lex.Lex(); // eat kw_type
+
+ PATypeHolder Ty(Type::VoidTy);
+ if (ParseType(Ty)) return true;
+
+ unsigned TypeID = NumberedTypes.size();
+
+ // See if this type was previously referenced.
+ std::map<unsigned, std::pair<PATypeHolder, LocTy> >::iterator
+ FI = ForwardRefTypeIDs.find(TypeID);
+ if (FI != ForwardRefTypeIDs.end()) {
+ if (FI->second.first.get() == Ty)
+ return Error(TypeLoc, "self referential type is invalid");
+
+ cast<DerivedType>(FI->second.first.get())->refineAbstractTypeTo(Ty);
+ Ty = FI->second.first.get();
+ ForwardRefTypeIDs.erase(FI);
+ }
+
+ NumberedTypes.push_back(Ty);
+
+ return false;
+}
+
+/// toplevelentity
+/// ::= LocalVar '=' 'type' type
+bool LLParser::ParseNamedType() {
+ std::string Name = Lex.getStrVal();
+ LocTy NameLoc = Lex.getLoc();
+ Lex.Lex(); // eat LocalVar.
+
+ PATypeHolder Ty(Type::VoidTy);
+
+ if (ParseToken(lltok::equal, "expected '=' after name") ||
+ ParseToken(lltok::kw_type, "expected 'type' after name") ||
+ ParseType(Ty))
+ return true;
+
+ // Set the type name, checking for conflicts as we do so.
+ bool AlreadyExists = M->addTypeName(Name, Ty);
+ if (!AlreadyExists) return false;
+
+ // See if this type is a forward reference. We need to eagerly resolve
+ // types to allow recursive type redefinitions below.
+ std::map<std::string, std::pair<PATypeHolder, LocTy> >::iterator
+ FI = ForwardRefTypes.find(Name);
+ if (FI != ForwardRefTypes.end()) {
+ if (FI->second.first.get() == Ty)
+ return Error(NameLoc, "self referential type is invalid");
+
+ cast<DerivedType>(FI->second.first.get())->refineAbstractTypeTo(Ty);
+ Ty = FI->second.first.get();
+ ForwardRefTypes.erase(FI);
+ }
+
+ // Inserting a name that is already defined, get the existing name.
+ const Type *Existing = M->getTypeByName(Name);
+ assert(Existing && "Conflict but no matching type?!");
+
+ // Otherwise, this is an attempt to redefine a type. That's okay if
+ // the redefinition is identical to the original.
+ // FIXME: REMOVE REDEFINITIONS IN LLVM 3.0
+ if (Existing == Ty) return false;
+
+ // Any other kind of (non-equivalent) redefinition is an error.
+ return Error(NameLoc, "redefinition of type named '" + Name + "' of type '" +
+ Ty->getDescription() + "'");
+}
+
+
+/// toplevelentity
+/// ::= 'declare' FunctionHeader
+bool LLParser::ParseDeclare() {
+ assert(Lex.getKind() == lltok::kw_declare);
+ Lex.Lex();
+
+ Function *F;
+ return ParseFunctionHeader(F, false);
+}
+
+/// toplevelentity
+/// ::= 'define' FunctionHeader '{' ...
+bool LLParser::ParseDefine() {
+ assert(Lex.getKind() == lltok::kw_define);
+ Lex.Lex();
+
+ Function *F;
+ return ParseFunctionHeader(F, true) ||
+ ParseFunctionBody(*F);
+}
+
+/// ParseGlobalType
+/// ::= 'constant'
+/// ::= 'global'
+bool LLParser::ParseGlobalType(bool &IsConstant) {
+ if (Lex.getKind() == lltok::kw_constant)
+ IsConstant = true;
+ else if (Lex.getKind() == lltok::kw_global)
+ IsConstant = false;
+ else {
+ IsConstant = false;
+ return TokError("expected 'global' or 'constant'");
+ }
+ Lex.Lex();
+ return false;
+}
+
+/// ParseNamedGlobal:
+/// GlobalVar '=' OptionalVisibility ALIAS ...
+/// GlobalVar '=' OptionalLinkage OptionalVisibility ... -> global variable
+bool LLParser::ParseNamedGlobal() {
+ assert(Lex.getKind() == lltok::GlobalVar);
+ LocTy NameLoc = Lex.getLoc();
+ std::string Name = Lex.getStrVal();
+ Lex.Lex();
+
+ bool HasLinkage;
+ unsigned Linkage, Visibility;
+ if (ParseToken(lltok::equal, "expected '=' in global variable") ||
+ ParseOptionalLinkage(Linkage, HasLinkage) ||
+ ParseOptionalVisibility(Visibility))
+ return true;
+
+ if (HasLinkage || Lex.getKind() != lltok::kw_alias)
+ return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility);
+ return ParseAlias(Name, NameLoc, Visibility);
+}
+
+/// ParseAlias:
+/// ::= GlobalVar '=' OptionalVisibility 'alias' OptionalLinkage Aliasee
+/// Aliasee
+/// ::= TypeAndValue
+/// ::= 'bitcast' '(' TypeAndValue 'to' Type ')'
+/// ::= 'getelementptr' '(' ... ')'
+///
+/// Everything through visibility has already been parsed.
+///
+bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
+ unsigned Visibility) {
+ assert(Lex.getKind() == lltok::kw_alias);
+ Lex.Lex();
+ unsigned Linkage;
+ LocTy LinkageLoc = Lex.getLoc();
+ if (ParseOptionalLinkage(Linkage))
+ return true;
+
+ if (Linkage != GlobalValue::ExternalLinkage &&
+ Linkage != GlobalValue::WeakAnyLinkage &&
+ Linkage != GlobalValue::WeakODRLinkage &&
+ Linkage != GlobalValue::InternalLinkage &&
+ Linkage != GlobalValue::PrivateLinkage)
+ return Error(LinkageLoc, "invalid linkage type for alias");
+
+ Constant *Aliasee;
+ LocTy AliaseeLoc = Lex.getLoc();
+ if (Lex.getKind() != lltok::kw_bitcast &&
+ Lex.getKind() != lltok::kw_getelementptr) {
+ if (ParseGlobalTypeAndValue(Aliasee)) return true;
+ } else {
+ // The bitcast dest type is not present, it is implied by the dest type.
+ ValID ID;
+ if (ParseValID(ID)) return true;
+ if (ID.Kind != ValID::t_Constant)
+ return Error(AliaseeLoc, "invalid aliasee");
+ Aliasee = ID.ConstantVal;
+ }
+
+ if (!isa<PointerType>(Aliasee->getType()))
+ return Error(AliaseeLoc, "alias must have pointer type");
+
+ // Okay, create the alias but do not insert it into the module yet.
+ GlobalAlias* GA = new GlobalAlias(Aliasee->getType(),
+ (GlobalValue::LinkageTypes)Linkage, Name,
+ Aliasee);
+ GA->setVisibility((GlobalValue::VisibilityTypes)Visibility);
+
+ // See if this value already exists in the symbol table. If so, it is either
+ // a redefinition or a definition of a forward reference.
+ if (GlobalValue *Val =
+ cast_or_null<GlobalValue>(M->getValueSymbolTable().lookup(Name))) {
+ // See if this was a redefinition. If so, there is no entry in
+ // ForwardRefVals.
+ std::map<std::string, std::pair<GlobalValue*, LocTy> >::iterator
+ I = ForwardRefVals.find(Name);
+ if (I == ForwardRefVals.end())
+ return Error(NameLoc, "redefinition of global named '@" + Name + "'");
+
+ // Otherwise, this was a definition of forward ref. Verify that types
+ // agree.
+ if (Val->getType() != GA->getType())
+ return Error(NameLoc,
+ "forward reference and definition of alias have different types");
+
+ // If they agree, just RAUW the old value with the alias and remove the
+ // forward ref info.
+ Val->replaceAllUsesWith(GA);
+ Val->eraseFromParent();
+ ForwardRefVals.erase(I);
+ }
+
+ // Insert into the module, we know its name won't collide now.
+ M->getAliasList().push_back(GA);
+ assert(GA->getNameStr() == Name && "Should not be a name conflict!");
+
+ return false;
+}
+
+/// ParseGlobal
+/// ::= GlobalVar '=' OptionalLinkage OptionalVisibility OptionalThreadLocal
+/// OptionalAddrSpace GlobalType Type Const
+/// ::= OptionalLinkage OptionalVisibility OptionalThreadLocal
+/// OptionalAddrSpace GlobalType Type Const
+///
+/// Everything through visibility has been parsed already.
+///
+bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
+ unsigned Linkage, bool HasLinkage,
+ unsigned Visibility) {
+ unsigned AddrSpace;
+ bool ThreadLocal, IsConstant;
+ LocTy TyLoc;
+
+ PATypeHolder Ty(Type::VoidTy);
+ if (ParseOptionalToken(lltok::kw_thread_local, ThreadLocal) ||
+ ParseOptionalAddrSpace(AddrSpace) ||
+ ParseGlobalType(IsConstant) ||
+ ParseType(Ty, TyLoc))
+ return true;
+
+ // If the linkage is specified and is external, then no initializer is
+ // present.
+ Constant *Init = 0;
+ if (!HasLinkage || (Linkage != GlobalValue::DLLImportLinkage &&
+ Linkage != GlobalValue::ExternalWeakLinkage &&
+ Linkage != GlobalValue::ExternalLinkage)) {
+ if (ParseGlobalValue(Ty, Init))
+ return true;
+ }
+
+ if (isa<FunctionType>(Ty) || Ty == Type::LabelTy)
+ return Error(TyLoc, "invalid type for global variable");
+
+ GlobalVariable *GV = 0;
+
+ // See if the global was forward referenced, if so, use the global.
+ if (!Name.empty()) {
+ if ((GV = M->getGlobalVariable(Name, true)) &&
+ !ForwardRefVals.erase(Name))
+ return Error(NameLoc, "redefinition of global '@" + Name + "'");
+ } else {
+ std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator
+ I = ForwardRefValIDs.find(NumberedVals.size());
+ if (I != ForwardRefValIDs.end()) {
+ GV = cast<GlobalVariable>(I->second.first);
+ ForwardRefValIDs.erase(I);
+ }
+ }
+
+ if (GV == 0) {
+ GV = new GlobalVariable(Ty, false, GlobalValue::ExternalLinkage, 0, Name,
+ M, false, AddrSpace);
+ } else {
+ if (GV->getType()->getElementType() != Ty)
+ return Error(TyLoc,
+ "forward reference and definition of global have different types");
+
+ // Move the forward-reference to the correct spot in the module.
+ M->getGlobalList().splice(M->global_end(), M->getGlobalList(), GV);
+ }
+
+ if (Name.empty())
+ NumberedVals.push_back(GV);
+
+ // Set the parsed properties on the global.
+ if (Init)
+ GV->setInitializer(Init);
+ GV->setConstant(IsConstant);
+ GV->setLinkage((GlobalValue::LinkageTypes)Linkage);
+ GV->setVisibility((GlobalValue::VisibilityTypes)Visibility);
+ GV->setThreadLocal(ThreadLocal);
+
+ // Parse attributes on the global.
+ while (Lex.getKind() == lltok::comma) {
+ Lex.Lex();
+
+ if (Lex.getKind() == lltok::kw_section) {
+ Lex.Lex();
+ GV->setSection(Lex.getStrVal());
+ if (ParseToken(lltok::StringConstant, "expected global section string"))
+ return true;
+ } else if (Lex.getKind() == lltok::kw_align) {
+ unsigned Alignment;
+ if (ParseOptionalAlignment(Alignment)) return true;
+ GV->setAlignment(Alignment);
+ } else {
+ TokError("unknown global variable property!");
+ }
+ }
+
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// GlobalValue Reference/Resolution Routines.
+//===----------------------------------------------------------------------===//
+
+/// GetGlobalVal - Get a value with the specified name or ID, creating a
+/// forward reference record if needed. This can return null if the value
+/// exists but does not have the right type.
+GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty,
+ LocTy Loc) {
+ const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ if (PTy == 0) {
+ Error(Loc, "global variable reference must have pointer type");
+ return 0;
+ }
+
+ // Look this name up in the normal function symbol table.
+ GlobalValue *Val =
+ cast_or_null<GlobalValue>(M->getValueSymbolTable().lookup(Name));
+
+ // If this is a forward reference for the value, see if we already created a
+ // forward ref record.
+ if (Val == 0) {
+ std::map<std::string, std::pair<GlobalValue*, LocTy> >::iterator
+ I = ForwardRefVals.find(Name);
+ if (I != ForwardRefVals.end())
+ Val = I->second.first;
+ }
+
+ // If we have the value in the symbol table or fwd-ref table, return it.
+ if (Val) {
+ if (Val->getType() == Ty) return Val;
+ Error(Loc, "'@" + Name + "' defined with type '" +
+ Val->getType()->getDescription() + "'");
+ return 0;
+ }
+
+ // Otherwise, create a new forward reference for this value and remember it.
+ GlobalValue *FwdVal;
+ if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) {
+ // Function types can return opaque but functions can't.
+ if (isa<OpaqueType>(FT->getReturnType())) {
+ Error(Loc, "function may not return opaque type");
+ return 0;
+ }
+
+ FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M);
+ } else {
+ FwdVal = new GlobalVariable(PTy->getElementType(), false,
+ GlobalValue::ExternalWeakLinkage, 0, Name, M);
+ }
+
+ ForwardRefVals[Name] = std::make_pair(FwdVal, Loc);
+ return FwdVal;
+}
+
+GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) {
+ const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ if (PTy == 0) {
+ Error(Loc, "global variable reference must have pointer type");
+ return 0;
+ }
+
+ GlobalValue *Val = ID < NumberedVals.size() ? NumberedVals[ID] : 0;
+
+ // If this is a forward reference for the value, see if we already created a
+ // forward ref record.
+ if (Val == 0) {
+ std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator
+ I = ForwardRefValIDs.find(ID);
+ if (I != ForwardRefValIDs.end())
+ Val = I->second.first;
+ }
+
+ // If we have the value in the symbol table or fwd-ref table, return it.
+ if (Val) {
+ if (Val->getType() == Ty) return Val;
+ Error(Loc, "'@" + utostr(ID) + "' defined with type '" +
+ Val->getType()->getDescription() + "'");
+ return 0;
+ }
+
+ // Otherwise, create a new forward reference for this value and remember it.
+ GlobalValue *FwdVal;
+ if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) {
+ // Function types can return opaque but functions can't.
+ if (isa<OpaqueType>(FT->getReturnType())) {
+ Error(Loc, "function may not return opaque type");
+ return 0;
+ }
+ FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, "", M);
+ } else {
+ FwdVal = new GlobalVariable(PTy->getElementType(), false,
+ GlobalValue::ExternalWeakLinkage, 0, "", M);
+ }
+
+ ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc);
+ return FwdVal;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Helper Routines.
+//===----------------------------------------------------------------------===//
+
+/// ParseToken - If the current token has the specified kind, eat it and return
+/// success. Otherwise, emit the specified error and return failure.
+bool LLParser::ParseToken(lltok::Kind T, const char *ErrMsg) {
+ if (Lex.getKind() != T)
+ return TokError(ErrMsg);
+ Lex.Lex();
+ return false;
+}
+
+/// ParseStringConstant
+/// ::= StringConstant
+bool LLParser::ParseStringConstant(std::string &Result) {
+ if (Lex.getKind() != lltok::StringConstant)
+ return TokError("expected string constant");
+ Result = Lex.getStrVal();
+ Lex.Lex();
+ return false;
+}
+
+/// ParseUInt32
+/// ::= uint32
+bool LLParser::ParseUInt32(unsigned &Val) {
+ if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned())
+ return TokError("expected integer");
+ uint64_t Val64 = Lex.getAPSIntVal().getLimitedValue(0xFFFFFFFFULL+1);
+ if (Val64 != unsigned(Val64))
+ return TokError("expected 32-bit integer (too large)");
+ Val = Val64;
+ Lex.Lex();
+ return false;
+}
+
+
+/// ParseOptionalAddrSpace
+/// := /*empty*/
+/// := 'addrspace' '(' uint32 ')'
+bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) {
+ AddrSpace = 0;
+ if (!EatIfPresent(lltok::kw_addrspace))
+ return false;
+ return ParseToken(lltok::lparen, "expected '(' in address space") ||
+ ParseUInt32(AddrSpace) ||
+ ParseToken(lltok::rparen, "expected ')' in address space");
+}
+
+/// ParseOptionalAttrs - Parse a potentially empty attribute list. AttrKind
+/// indicates what kind of attribute list this is: 0: function arg, 1: result,
+/// 2: function attr.
+/// 3: function arg after value: FIXME: REMOVE IN LLVM 3.0
+bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
+ Attrs = Attribute::None;
+ LocTy AttrLoc = Lex.getLoc();
+
+ while (1) {
+ switch (Lex.getKind()) {
+ case lltok::kw_sext:
+ case lltok::kw_zext:
+ // Treat these as signext/zeroext if they occur in the argument list after
+ // the value, as in "call i8 @foo(i8 10 sext)". If they occur before the
+ // value, as in "call i8 @foo(i8 sext (" then it is part of a constant
+ // expr.
+ // FIXME: REMOVE THIS IN LLVM 3.0
+ if (AttrKind == 3) {
+ if (Lex.getKind() == lltok::kw_sext)
+ Attrs |= Attribute::SExt;
+ else
+ Attrs |= Attribute::ZExt;
+ break;
+ }
+ // FALL THROUGH.
+ default: // End of attributes.
+ if (AttrKind != 2 && (Attrs & Attribute::FunctionOnly))
+ return Error(AttrLoc, "invalid use of function-only attribute");
+
+ if (AttrKind != 0 && AttrKind != 3 && (Attrs & Attribute::ParameterOnly))
+ return Error(AttrLoc, "invalid use of parameter-only attribute");
+
+ return false;
+ case lltok::kw_zeroext: Attrs |= Attribute::ZExt; break;
+ case lltok::kw_signext: Attrs |= Attribute::SExt; break;
+ case lltok::kw_inreg: Attrs |= Attribute::InReg; break;
+ case lltok::kw_sret: Attrs |= Attribute::StructRet; break;
+ case lltok::kw_noalias: Attrs |= Attribute::NoAlias; break;
+ case lltok::kw_nocapture: Attrs |= Attribute::NoCapture; break;
+ case lltok::kw_byval: Attrs |= Attribute::ByVal; break;
+ case lltok::kw_nest: Attrs |= Attribute::Nest; break;
+
+ case lltok::kw_noreturn: Attrs |= Attribute::NoReturn; break;
+ case lltok::kw_nounwind: Attrs |= Attribute::NoUnwind; break;
+ case lltok::kw_noinline: Attrs |= Attribute::NoInline; break;
+ case lltok::kw_readnone: Attrs |= Attribute::ReadNone; break;
+ case lltok::kw_readonly: Attrs |= Attribute::ReadOnly; break;
+ case lltok::kw_alwaysinline: Attrs |= Attribute::AlwaysInline; break;
+ case lltok::kw_optsize: Attrs |= Attribute::OptimizeForSize; break;
+ case lltok::kw_ssp: Attrs |= Attribute::StackProtect; break;
+ case lltok::kw_sspreq: Attrs |= Attribute::StackProtectReq; break;
+
+
+ case lltok::kw_align: {
+ unsigned Alignment;
+ if (ParseOptionalAlignment(Alignment))
+ return true;
+ Attrs |= Attribute::constructAlignmentFromInt(Alignment);
+ continue;
+ }
+ }
+ Lex.Lex();
+ }
+}
+
+/// ParseOptionalLinkage
+/// ::= /*empty*/
+/// ::= 'private'
+/// ::= 'internal'
+/// ::= 'weak'
+/// ::= 'weak_odr'
+/// ::= 'linkonce'
+/// ::= 'linkonce_odr'
+/// ::= 'appending'
+/// ::= 'dllexport'
+/// ::= 'common'
+/// ::= 'dllimport'
+/// ::= 'extern_weak'
+/// ::= 'external'
+bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) {
+ HasLinkage = false;
+ switch (Lex.getKind()) {
+ default: Res = GlobalValue::ExternalLinkage; return false;
+ case lltok::kw_private: Res = GlobalValue::PrivateLinkage; break;
+ case lltok::kw_internal: Res = GlobalValue::InternalLinkage; break;
+ case lltok::kw_weak: Res = GlobalValue::WeakAnyLinkage; break;
+ case lltok::kw_weak_odr: Res = GlobalValue::WeakODRLinkage; break;
+ case lltok::kw_linkonce: Res = GlobalValue::LinkOnceAnyLinkage; break;
+ case lltok::kw_linkonce_odr: Res = GlobalValue::LinkOnceODRLinkage; break;
+ case lltok::kw_available_externally:
+ Res = GlobalValue::AvailableExternallyLinkage;
+ break;
+ case lltok::kw_appending: Res = GlobalValue::AppendingLinkage; break;
+ case lltok::kw_dllexport: Res = GlobalValue::DLLExportLinkage; break;
+ case lltok::kw_common: Res = GlobalValue::CommonLinkage; break;
+ case lltok::kw_dllimport: Res = GlobalValue::DLLImportLinkage; break;
+ case lltok::kw_extern_weak: Res = GlobalValue::ExternalWeakLinkage; break;
+ case lltok::kw_external: Res = GlobalValue::ExternalLinkage; break;
+ }
+ Lex.Lex();
+ HasLinkage = true;
+ return false;
+}
+
+/// ParseOptionalVisibility
+/// ::= /*empty*/
+/// ::= 'default'
+/// ::= 'hidden'
+/// ::= 'protected'
+///
+bool LLParser::ParseOptionalVisibility(unsigned &Res) {
+ switch (Lex.getKind()) {
+ default: Res = GlobalValue::DefaultVisibility; return false;
+ case lltok::kw_default: Res = GlobalValue::DefaultVisibility; break;
+ case lltok::kw_hidden: Res = GlobalValue::HiddenVisibility; break;
+ case lltok::kw_protected: Res = GlobalValue::ProtectedVisibility; break;
+ }
+ Lex.Lex();
+ return false;
+}
+
+/// ParseOptionalCallingConv
+/// ::= /*empty*/
+/// ::= 'ccc'
+/// ::= 'fastcc'
+/// ::= 'coldcc'
+/// ::= 'x86_stdcallcc'
+/// ::= 'x86_fastcallcc'
+/// ::= 'cc' UINT
+///
+bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
+ switch (Lex.getKind()) {
+ default: CC = CallingConv::C; return false;
+ case lltok::kw_ccc: CC = CallingConv::C; break;
+ case lltok::kw_fastcc: CC = CallingConv::Fast; break;
+ case lltok::kw_coldcc: CC = CallingConv::Cold; break;
+ case lltok::kw_x86_stdcallcc: CC = CallingConv::X86_StdCall; break;
+ case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break;
+ case lltok::kw_cc: Lex.Lex(); return ParseUInt32(CC);
+ }
+ Lex.Lex();
+ return false;
+}
+
+/// ParseOptionalAlignment
+/// ::= /* empty */
+/// ::= 'align' 4
+bool LLParser::ParseOptionalAlignment(unsigned &Alignment) {
+ Alignment = 0;
+ if (!EatIfPresent(lltok::kw_align))
+ return false;
+ LocTy AlignLoc = Lex.getLoc();
+ if (ParseUInt32(Alignment)) return true;
+ if (!isPowerOf2_32(Alignment))
+ return Error(AlignLoc, "alignment is not a power of two");
+ return false;
+}
+
+/// ParseOptionalCommaAlignment
+/// ::= /* empty */
+/// ::= ',' 'align' 4
+bool LLParser::ParseOptionalCommaAlignment(unsigned &Alignment) {
+ Alignment = 0;
+ if (!EatIfPresent(lltok::comma))
+ return false;
+ return ParseToken(lltok::kw_align, "expected 'align'") ||
+ ParseUInt32(Alignment);
+}
+
+/// ParseIndexList
+/// ::= (',' uint32)+
+bool LLParser::ParseIndexList(SmallVectorImpl<unsigned> &Indices) {
+ if (Lex.getKind() != lltok::comma)
+ return TokError("expected ',' as start of index list");
+
+ while (EatIfPresent(lltok::comma)) {
+ unsigned Idx;
+ if (ParseUInt32(Idx)) return true;
+ Indices.push_back(Idx);
+ }
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Type Parsing.
+//===----------------------------------------------------------------------===//
+
+/// ParseType - Parse and resolve a full type.
+bool LLParser::ParseType(PATypeHolder &Result, bool AllowVoid) {
+ LocTy TypeLoc = Lex.getLoc();
+ if (ParseTypeRec(Result)) return true;
+
+ // Verify no unresolved uprefs.
+ if (!UpRefs.empty())
+ return Error(UpRefs.back().Loc, "invalid unresolved type up reference");
+
+ if (!AllowVoid && Result.get() == Type::VoidTy)
+ return Error(TypeLoc, "void type only allowed for function results");
+
+ return false;
+}
+
+/// HandleUpRefs - Every time we finish a new layer of types, this function is
+/// called. It loops through the UpRefs vector, which is a list of the
+/// currently active types. For each type, if the up-reference is contained in
+/// the newly completed type, we decrement the level count. When the level
+/// count reaches zero, the up-referenced type is the type that is passed in:
+/// thus we can complete the cycle.
+///
+PATypeHolder LLParser::HandleUpRefs(const Type *ty) {
+ // If Ty isn't abstract, or if there are no up-references in it, then there is
+ // nothing to resolve here.
+ if (!ty->isAbstract() || UpRefs.empty()) return ty;
+
+ PATypeHolder Ty(ty);
+#if 0
+ errs() << "Type '" << Ty->getDescription()
+ << "' newly formed. Resolving upreferences.\n"
+ << UpRefs.size() << " upreferences active!\n";
+#endif
+
+ // If we find any resolvable upreferences (i.e., those whose NestingLevel goes
+ // to zero), we resolve them all together before we resolve them to Ty. At
+ // the end of the loop, if there is anything to resolve to Ty, it will be in
+ // this variable.
+ OpaqueType *TypeToResolve = 0;
+
+ for (unsigned i = 0; i != UpRefs.size(); ++i) {
+ // Determine if 'Ty' directly contains this up-references 'LastContainedTy'.
+ bool ContainsType =
+ std::find(Ty->subtype_begin(), Ty->subtype_end(),
+ UpRefs[i].LastContainedTy) != Ty->subtype_end();
+
+#if 0
+ errs() << " UR#" << i << " - TypeContains(" << Ty->getDescription() << ", "
+ << UpRefs[i].LastContainedTy->getDescription() << ") = "
+ << (ContainsType ? "true" : "false")
+ << " level=" << UpRefs[i].NestingLevel << "\n";
+#endif
+ if (!ContainsType)
+ continue;
+
+ // Decrement level of upreference
+ unsigned Level = --UpRefs[i].NestingLevel;
+ UpRefs[i].LastContainedTy = Ty;
+
+ // If the Up-reference has a non-zero level, it shouldn't be resolved yet.
+ if (Level != 0)
+ continue;
+
+#if 0
+ errs() << " * Resolving upreference for " << UpRefs[i].UpRefTy << "\n";
+#endif
+ if (!TypeToResolve)
+ TypeToResolve = UpRefs[i].UpRefTy;
+ else
+ UpRefs[i].UpRefTy->refineAbstractTypeTo(TypeToResolve);
+ UpRefs.erase(UpRefs.begin()+i); // Remove from upreference list.
+ --i; // Do not skip the next element.
+ }
+
+ if (TypeToResolve)
+ TypeToResolve->refineAbstractTypeTo(Ty);
+
+ return Ty;
+}
+
+
+/// ParseTypeRec - The recursive function used to process the internal
+/// implementation details of types.
+bool LLParser::ParseTypeRec(PATypeHolder &Result) {
+ switch (Lex.getKind()) {
+ default:
+ return TokError("expected type");
+ case lltok::Type:
+ // TypeRec ::= 'float' | 'void' (etc)
+ Result = Lex.getTyVal();
+ Lex.Lex();
+ break;
+ case lltok::kw_opaque:
+ // TypeRec ::= 'opaque'
+ Result = OpaqueType::get();
+ Lex.Lex();
+ break;
+ case lltok::lbrace:
+ // TypeRec ::= '{' ... '}'
+ if (ParseStructType(Result, false))
+ return true;
+ break;
+ case lltok::lsquare:
+ // TypeRec ::= '[' ... ']'
+ Lex.Lex(); // eat the lsquare.
+ if (ParseArrayVectorType(Result, false))
+ return true;
+ break;
+ case lltok::less: // Either vector or packed struct.
+ // TypeRec ::= '<' ... '>'
+ Lex.Lex();
+ if (Lex.getKind() == lltok::lbrace) {
+ if (ParseStructType(Result, true) ||
+ ParseToken(lltok::greater, "expected '>' at end of packed struct"))
+ return true;
+ } else if (ParseArrayVectorType(Result, true))
+ return true;
+ break;
+ case lltok::LocalVar:
+ case lltok::StringConstant: // FIXME: REMOVE IN LLVM 3.0
+ // TypeRec ::= %foo
+ if (const Type *T = M->getTypeByName(Lex.getStrVal())) {
+ Result = T;
+ } else {
+ Result = OpaqueType::get();
+ ForwardRefTypes.insert(std::make_pair(Lex.getStrVal(),
+ std::make_pair(Result,
+ Lex.getLoc())));
+ M->addTypeName(Lex.getStrVal(), Result.get());
+ }
+ Lex.Lex();
+ break;
+
+ case lltok::LocalVarID:
+ // TypeRec ::= %4
+ if (Lex.getUIntVal() < NumberedTypes.size())
+ Result = NumberedTypes[Lex.getUIntVal()];
+ else {
+ std::map<unsigned, std::pair<PATypeHolder, LocTy> >::iterator
+ I = ForwardRefTypeIDs.find(Lex.getUIntVal());
+ if (I != ForwardRefTypeIDs.end())
+ Result = I->second.first;
+ else {
+ Result = OpaqueType::get();
+ ForwardRefTypeIDs.insert(std::make_pair(Lex.getUIntVal(),
+ std::make_pair(Result,
+ Lex.getLoc())));
+ }
+ }
+ Lex.Lex();
+ break;
+ case lltok::backslash: {
+ // TypeRec ::= '\' 4
+ Lex.Lex();
+ unsigned Val;
+ if (ParseUInt32(Val)) return true;
+ OpaqueType *OT = OpaqueType::get(); // Use temporary placeholder.
+ UpRefs.push_back(UpRefRecord(Lex.getLoc(), Val, OT));
+ Result = OT;
+ break;
+ }
+ }
+
+ // Parse the type suffixes.
+ while (1) {
+ switch (Lex.getKind()) {
+ // End of type.
+ default: return false;
+
+ // TypeRec ::= TypeRec '*'
+ case lltok::star:
+ if (Result.get() == Type::LabelTy)
+ return TokError("basic block pointers are invalid");
+ if (Result.get() == Type::VoidTy)
+ return TokError("pointers to void are invalid; use i8* instead");
+ Result = HandleUpRefs(PointerType::getUnqual(Result.get()));
+ Lex.Lex();
+ break;
+
+ // TypeRec ::= TypeRec 'addrspace' '(' uint32 ')' '*'
+ case lltok::kw_addrspace: {
+ if (Result.get() == Type::LabelTy)
+ return TokError("basic block pointers are invalid");
+ if (Result.get() == Type::VoidTy)
+ return TokError("pointers to void are invalid; use i8* instead");
+ unsigned AddrSpace;
+ if (ParseOptionalAddrSpace(AddrSpace) ||
+ ParseToken(lltok::star, "expected '*' in address space"))
+ return true;
+
+ Result = HandleUpRefs(PointerType::get(Result.get(), AddrSpace));
+ break;
+ }
+
+ /// Types '(' ArgTypeListI ')' OptFuncAttrs
+ case lltok::lparen:
+ if (ParseFunctionType(Result))
+ return true;
+ break;
+ }
+ }
+}
+
+/// ParseParameterList
+/// ::= '(' ')'
+/// ::= '(' Arg (',' Arg)* ')'
+/// Arg
+/// ::= Type OptionalAttributes Value OptionalAttributes
+bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
+ PerFunctionState &PFS) {
+ if (ParseToken(lltok::lparen, "expected '(' in call"))
+ return true;
+
+ while (Lex.getKind() != lltok::rparen) {
+ // If this isn't the first argument, we need a comma.
+ if (!ArgList.empty() &&
+ ParseToken(lltok::comma, "expected ',' in argument list"))
+ return true;
+
+ // Parse the argument.
+ LocTy ArgLoc;
+ PATypeHolder ArgTy(Type::VoidTy);
+ unsigned ArgAttrs1, ArgAttrs2;
+ Value *V;
+ if (ParseType(ArgTy, ArgLoc) ||
+ ParseOptionalAttrs(ArgAttrs1, 0) ||
+ ParseValue(ArgTy, V, PFS) ||
+ // FIXME: Should not allow attributes after the argument, remove this in
+ // LLVM 3.0.
+ ParseOptionalAttrs(ArgAttrs2, 3))
+ return true;
+ ArgList.push_back(ParamInfo(ArgLoc, V, ArgAttrs1|ArgAttrs2));
+ }
+
+ Lex.Lex(); // Lex the ')'.
+ return false;
+}
+
+
+
+/// ParseArgumentList - Parse the argument list for a function type or function
+/// prototype. If 'inType' is true then we are parsing a FunctionType.
+/// ::= '(' ArgTypeListI ')'
+/// ArgTypeListI
+/// ::= /*empty*/
+/// ::= '...'
+/// ::= ArgTypeList ',' '...'
+/// ::= ArgType (',' ArgType)*
+///
+bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList,
+ bool &isVarArg, bool inType) {
+ isVarArg = false;
+ assert(Lex.getKind() == lltok::lparen);
+ Lex.Lex(); // eat the (.
+
+ if (Lex.getKind() == lltok::rparen) {
+ // empty
+ } else if (Lex.getKind() == lltok::dotdotdot) {
+ isVarArg = true;
+ Lex.Lex();
+ } else {
+ LocTy TypeLoc = Lex.getLoc();
+ PATypeHolder ArgTy(Type::VoidTy);
+ unsigned Attrs;
+ std::string Name;
+
+ // If we're parsing a type, use ParseTypeRec, because we allow recursive
+ // types (such as a function returning a pointer to itself). If parsing a
+ // function prototype, we require fully resolved types.
+ if ((inType ? ParseTypeRec(ArgTy) : ParseType(ArgTy)) ||
+ ParseOptionalAttrs(Attrs, 0)) return true;
+
+ if (ArgTy == Type::VoidTy)
+ return Error(TypeLoc, "argument can not have void type");
+
+ if (Lex.getKind() == lltok::LocalVar ||
+ Lex.getKind() == lltok::StringConstant) { // FIXME: REMOVE IN LLVM 3.0
+ Name = Lex.getStrVal();
+ Lex.Lex();
+ }
+
+ if (!ArgTy->isFirstClassType() && !isa<OpaqueType>(ArgTy))
+ return Error(TypeLoc, "invalid type for function argument");
+
+ ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attrs, Name));
+
+ while (EatIfPresent(lltok::comma)) {
+ // Handle ... at end of arg list.
+ if (EatIfPresent(lltok::dotdotdot)) {
+ isVarArg = true;
+ break;
+ }
+
+ // Otherwise must be an argument type.
+ TypeLoc = Lex.getLoc();
+ if ((inType ? ParseTypeRec(ArgTy) : ParseType(ArgTy)) ||
+ ParseOptionalAttrs(Attrs, 0)) return true;
+
+ if (ArgTy == Type::VoidTy)
+ return Error(TypeLoc, "argument can not have void type");
+
+ if (Lex.getKind() == lltok::LocalVar ||
+ Lex.getKind() == lltok::StringConstant) { // FIXME: REMOVE IN LLVM 3.0
+ Name = Lex.getStrVal();
+ Lex.Lex();
+ } else {
+ Name = "";
+ }
+
+ if (!ArgTy->isFirstClassType() && !isa<OpaqueType>(ArgTy))
+ return Error(TypeLoc, "invalid type for function argument");
+
+ ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attrs, Name));
+ }
+ }
+
+ return ParseToken(lltok::rparen, "expected ')' at end of argument list");
+}
+
+/// ParseFunctionType
+/// ::= Type ArgumentList OptionalAttrs
+bool LLParser::ParseFunctionType(PATypeHolder &Result) {
+ assert(Lex.getKind() == lltok::lparen);
+
+ if (!FunctionType::isValidReturnType(Result))
+ return TokError("invalid function return type");
+
+ std::vector<ArgInfo> ArgList;
+ bool isVarArg;
+ unsigned Attrs;
+ if (ParseArgumentList(ArgList, isVarArg, true) ||
+ // FIXME: Allow, but ignore attributes on function types!
+ // FIXME: Remove in LLVM 3.0
+ ParseOptionalAttrs(Attrs, 2))
+ return true;
+
+ // Reject names on the arguments lists.
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
+ if (!ArgList[i].Name.empty())
+ return Error(ArgList[i].Loc, "argument name invalid in function type");
+ if (!ArgList[i].Attrs != 0) {
+ // Allow but ignore attributes on function types; this permits
+ // auto-upgrade.
+ // FIXME: REJECT ATTRIBUTES ON FUNCTION TYPES in LLVM 3.0
+ }
+ }
+
+ std::vector<const Type*> ArgListTy;
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
+ ArgListTy.push_back(ArgList[i].Type);
+
+ Result = HandleUpRefs(FunctionType::get(Result.get(), ArgListTy, isVarArg));
+ return false;
+}
+
+/// ParseStructType: Handles packed and unpacked types. </> parsed elsewhere.
+/// TypeRec
+/// ::= '{' '}'
+/// ::= '{' TypeRec (',' TypeRec)* '}'
+/// ::= '<' '{' '}' '>'
+/// ::= '<' '{' TypeRec (',' TypeRec)* '}' '>'
+bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) {
+ assert(Lex.getKind() == lltok::lbrace);
+ Lex.Lex(); // Consume the '{'
+
+ if (EatIfPresent(lltok::rbrace)) {
+ Result = StructType::get(std::vector<const Type*>(), Packed);
+ return false;
+ }
+
+ std::vector<PATypeHolder> ParamsList;
+ LocTy EltTyLoc = Lex.getLoc();
+ if (ParseTypeRec(Result)) return true;
+ ParamsList.push_back(Result);
+
+ if (Result == Type::VoidTy)
+ return Error(EltTyLoc, "struct element can not have void type");
+
+ while (EatIfPresent(lltok::comma)) {
+ EltTyLoc = Lex.getLoc();
+ if (ParseTypeRec(Result)) return true;
+
+ if (Result == Type::VoidTy)
+ return Error(EltTyLoc, "struct element can not have void type");
+
+ ParamsList.push_back(Result);
+ }
+
+ if (ParseToken(lltok::rbrace, "expected '}' at end of struct"))
+ return true;
+
+ std::vector<const Type*> ParamsListTy;
+ for (unsigned i = 0, e = ParamsList.size(); i != e; ++i)
+ ParamsListTy.push_back(ParamsList[i].get());
+ Result = HandleUpRefs(StructType::get(ParamsListTy, Packed));
+ return false;
+}
+
+/// ParseArrayVectorType - Parse an array or vector type, assuming the first
+/// token has already been consumed.
+/// TypeRec
+/// ::= '[' APSINTVAL 'x' Types ']'
+/// ::= '<' APSINTVAL 'x' Types '>'
+bool LLParser::ParseArrayVectorType(PATypeHolder &Result, bool isVector) {
+ if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned() ||
+ Lex.getAPSIntVal().getBitWidth() > 64)
+ return TokError("expected number in address space");
+
+ LocTy SizeLoc = Lex.getLoc();
+ uint64_t Size = Lex.getAPSIntVal().getZExtValue();
+ Lex.Lex();
+
+ if (ParseToken(lltok::kw_x, "expected 'x' after element count"))
+ return true;
+
+ LocTy TypeLoc = Lex.getLoc();
+ PATypeHolder EltTy(Type::VoidTy);
+ if (ParseTypeRec(EltTy)) return true;
+
+ if (EltTy == Type::VoidTy)
+ return Error(TypeLoc, "array and vector element type cannot be void");
+
+ if (ParseToken(isVector ? lltok::greater : lltok::rsquare,
+ "expected end of sequential type"))
+ return true;
+
+ if (isVector) {
+ if (Size == 0)
+ return Error(SizeLoc, "zero element vector is illegal");
+ if ((unsigned)Size != Size)
+ return Error(SizeLoc, "size too large for vector");
+ if (!EltTy->isFloatingPoint() && !EltTy->isInteger())
+ return Error(TypeLoc, "vector element type must be fp or integer");
+ Result = VectorType::get(EltTy, unsigned(Size));
+ } else {
+ if (!EltTy->isFirstClassType() && !isa<OpaqueType>(EltTy))
+ return Error(TypeLoc, "invalid array element type");
+ Result = HandleUpRefs(ArrayType::get(EltTy, Size));
+ }
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Function Semantic Analysis.
+//===----------------------------------------------------------------------===//
+
+LLParser::PerFunctionState::PerFunctionState(LLParser &p, Function &f)
+ : P(p), F(f) {
+
+ // Insert unnamed arguments into the NumberedVals list.
+ for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();
+ AI != E; ++AI)
+ if (!AI->hasName())
+ NumberedVals.push_back(AI);
+}
+
+LLParser::PerFunctionState::~PerFunctionState() {
+ // If there were any forward referenced non-basicblock values, delete them.
+ for (std::map<std::string, std::pair<Value*, LocTy> >::iterator
+ I = ForwardRefVals.begin(), E = ForwardRefVals.end(); I != E; ++I)
+ if (!isa<BasicBlock>(I->second.first)) {
+ I->second.first->replaceAllUsesWith(UndefValue::get(I->second.first
+ ->getType()));
+ delete I->second.first;
+ I->second.first = 0;
+ }
+
+ for (std::map<unsigned, std::pair<Value*, LocTy> >::iterator
+ I = ForwardRefValIDs.begin(), E = ForwardRefValIDs.end(); I != E; ++I)
+ if (!isa<BasicBlock>(I->second.first)) {
+ I->second.first->replaceAllUsesWith(UndefValue::get(I->second.first
+ ->getType()));
+ delete I->second.first;
+ I->second.first = 0;
+ }
+}
+
+bool LLParser::PerFunctionState::VerifyFunctionComplete() {
+ if (!ForwardRefVals.empty())
+ return P.Error(ForwardRefVals.begin()->second.second,
+ "use of undefined value '%" + ForwardRefVals.begin()->first +
+ "'");
+ if (!ForwardRefValIDs.empty())
+ return P.Error(ForwardRefValIDs.begin()->second.second,
+ "use of undefined value '%" +
+ utostr(ForwardRefValIDs.begin()->first) + "'");
+ return false;
+}
+
+
+/// GetVal - Get a value with the specified name or ID, creating a
+/// forward reference record if needed. This can return null if the value
+/// exists but does not have the right type.
+Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
+ const Type *Ty, LocTy Loc) {
+ // Look this name up in the normal function symbol table.
+ Value *Val = F.getValueSymbolTable().lookup(Name);
+
+ // If this is a forward reference for the value, see if we already created a
+ // forward ref record.
+ if (Val == 0) {
+ std::map<std::string, std::pair<Value*, LocTy> >::iterator
+ I = ForwardRefVals.find(Name);
+ if (I != ForwardRefVals.end())
+ Val = I->second.first;
+ }
+
+ // If we have the value in the symbol table or fwd-ref table, return it.
+ if (Val) {
+ if (Val->getType() == Ty) return Val;
+ if (Ty == Type::LabelTy)
+ P.Error(Loc, "'%" + Name + "' is not a basic block");
+ else
+ P.Error(Loc, "'%" + Name + "' defined with type '" +
+ Val->getType()->getDescription() + "'");
+ return 0;
+ }
+
+ // Don't make placeholders with invalid type.
+ if (!Ty->isFirstClassType() && !isa<OpaqueType>(Ty) && Ty != Type::LabelTy) {
+ P.Error(Loc, "invalid use of a non-first-class type");
+ return 0;
+ }
+
+ // Otherwise, create a new forward reference for this value and remember it.
+ Value *FwdVal;
+ if (Ty == Type::LabelTy)
+ FwdVal = BasicBlock::Create(Name, &F);
+ else
+ FwdVal = new Argument(Ty, Name);
+
+ ForwardRefVals[Name] = std::make_pair(FwdVal, Loc);
+ return FwdVal;
+}
+
+Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty,
+ LocTy Loc) {
+ // Look this name up in the normal function symbol table.
+ Value *Val = ID < NumberedVals.size() ? NumberedVals[ID] : 0;
+
+ // If this is a forward reference for the value, see if we already created a
+ // forward ref record.
+ if (Val == 0) {
+ std::map<unsigned, std::pair<Value*, LocTy> >::iterator
+ I = ForwardRefValIDs.find(ID);
+ if (I != ForwardRefValIDs.end())
+ Val = I->second.first;
+ }
+
+ // If we have the value in the symbol table or fwd-ref table, return it.
+ if (Val) {
+ if (Val->getType() == Ty) return Val;
+ if (Ty == Type::LabelTy)
+ P.Error(Loc, "'%" + utostr(ID) + "' is not a basic block");
+ else
+ P.Error(Loc, "'%" + utostr(ID) + "' defined with type '" +
+ Val->getType()->getDescription() + "'");
+ return 0;
+ }
+
+ if (!Ty->isFirstClassType() && !isa<OpaqueType>(Ty) && Ty != Type::LabelTy) {
+ P.Error(Loc, "invalid use of a non-first-class type");
+ return 0;
+ }
+
+ // Otherwise, create a new forward reference for this value and remember it.
+ Value *FwdVal;
+ if (Ty == Type::LabelTy)
+ FwdVal = BasicBlock::Create("", &F);
+ else
+ FwdVal = new Argument(Ty);
+
+ ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc);
+ return FwdVal;
+}
+
+/// SetInstName - After an instruction is parsed and inserted into its
+/// basic block, this installs its name.
+bool LLParser::PerFunctionState::SetInstName(int NameID,
+ const std::string &NameStr,
+ LocTy NameLoc, Instruction *Inst) {
+ // If this instruction has void type, it cannot have a name or ID specified.
+ if (Inst->getType() == Type::VoidTy) {
+ if (NameID != -1 || !NameStr.empty())
+ return P.Error(NameLoc, "instructions returning void cannot have a name");
+ return false;
+ }
+
+ // If this was a numbered instruction, verify that the instruction is the
+ // expected value and resolve any forward references.
+ if (NameStr.empty()) {
+ // If neither a name nor an ID was specified, just use the next ID.
+ if (NameID == -1)
+ NameID = NumberedVals.size();
+
+ if (unsigned(NameID) != NumberedVals.size())
+ return P.Error(NameLoc, "instruction expected to be numbered '%" +
+ utostr(NumberedVals.size()) + "'");
+
+ std::map<unsigned, std::pair<Value*, LocTy> >::iterator FI =
+ ForwardRefValIDs.find(NameID);
+ if (FI != ForwardRefValIDs.end()) {
+ if (FI->second.first->getType() != Inst->getType())
+ return P.Error(NameLoc, "instruction forward referenced with type '" +
+ FI->second.first->getType()->getDescription() + "'");
+ FI->second.first->replaceAllUsesWith(Inst);
+ ForwardRefValIDs.erase(FI);
+ }
+
+ NumberedVals.push_back(Inst);
+ return false;
+ }
+
+ // Otherwise, the instruction had a name. Resolve forward refs and set it.
+ std::map<std::string, std::pair<Value*, LocTy> >::iterator
+ FI = ForwardRefVals.find(NameStr);
+ if (FI != ForwardRefVals.end()) {
+ if (FI->second.first->getType() != Inst->getType())
+ return P.Error(NameLoc, "instruction forward referenced with type '" +
+ FI->second.first->getType()->getDescription() + "'");
+ FI->second.first->replaceAllUsesWith(Inst);
+ ForwardRefVals.erase(FI);
+ }
+
+ // Set the name on the instruction.
+ Inst->setName(NameStr);
+
+ if (Inst->getNameStr() != NameStr)
+ return P.Error(NameLoc, "multiple definition of local value named '" +
+ NameStr + "'");
+ return false;
+}
+
+/// GetBB - Get a basic block with the specified name or ID, creating a
+/// forward reference record if needed.
+BasicBlock *LLParser::PerFunctionState::GetBB(const std::string &Name,
+ LocTy Loc) {
+ return cast_or_null<BasicBlock>(GetVal(Name, Type::LabelTy, Loc));
+}
+
+BasicBlock *LLParser::PerFunctionState::GetBB(unsigned ID, LocTy Loc) {
+ return cast_or_null<BasicBlock>(GetVal(ID, Type::LabelTy, Loc));
+}
+
+/// DefineBB - Define the specified basic block, which is either named or
+/// unnamed. If there is an error, this returns null otherwise it returns
+/// the block being defined.
+BasicBlock *LLParser::PerFunctionState::DefineBB(const std::string &Name,
+ LocTy Loc) {
+ BasicBlock *BB;
+ if (Name.empty())
+ BB = GetBB(NumberedVals.size(), Loc);
+ else
+ BB = GetBB(Name, Loc);
+ if (BB == 0) return 0; // Already diagnosed error.
+
+ // Move the block to the end of the function. Forward ref'd blocks are
+ // inserted wherever they happen to be referenced.
+ F.getBasicBlockList().splice(F.end(), F.getBasicBlockList(), BB);
+
+ // Remove the block from forward ref sets.
+ if (Name.empty()) {
+ ForwardRefValIDs.erase(NumberedVals.size());
+ NumberedVals.push_back(BB);
+ } else {
+ // BB forward references are already in the function symbol table.
+ ForwardRefVals.erase(Name);
+ }
+
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Constants.
+//===----------------------------------------------------------------------===//
+
+/// ParseValID - Parse an abstract value that doesn't necessarily have a
+/// type implied. For example, if we parse "4" we don't know what integer type
+/// it has. The value will later be combined with its type and checked for
+/// sanity.
+bool LLParser::ParseValID(ValID &ID) {
+ ID.Loc = Lex.getLoc();
+ switch (Lex.getKind()) {
+ default: return TokError("expected value token");
+ case lltok::GlobalID: // @42
+ ID.UIntVal = Lex.getUIntVal();
+ ID.Kind = ValID::t_GlobalID;
+ break;
+ case lltok::GlobalVar: // @foo
+ ID.StrVal = Lex.getStrVal();
+ ID.Kind = ValID::t_GlobalName;
+ break;
+ case lltok::LocalVarID: // %42
+ ID.UIntVal = Lex.getUIntVal();
+ ID.Kind = ValID::t_LocalID;
+ break;
+ case lltok::LocalVar: // %foo
+ case lltok::StringConstant: // "foo" - FIXME: REMOVE IN LLVM 3.0
+ ID.StrVal = Lex.getStrVal();
+ ID.Kind = ValID::t_LocalName;
+ break;
+ case lltok::Metadata: { // !{...} MDNode, !"foo" MDString
+ ID.Kind = ValID::t_Constant;
+ Lex.Lex();
+ if (Lex.getKind() == lltok::lbrace) {
+ SmallVector<Value*, 16> Elts;
+ if (ParseMDNodeVector(Elts) ||
+ ParseToken(lltok::rbrace, "expected end of metadata node"))
+ return true;
+
+ ID.ConstantVal = MDNode::get(Elts.data(), Elts.size());
+ return false;
+ }
+
+ // MDString:
+ // ::= '!' STRINGCONSTANT
+ std::string Str;
+ if (ParseStringConstant(Str)) return true;
+
+ ID.ConstantVal = MDString::get(Str.data(), Str.data() + Str.size());
+ return false;
+ }
+ case lltok::APSInt:
+ ID.APSIntVal = Lex.getAPSIntVal();
+ ID.Kind = ValID::t_APSInt;
+ break;
+ case lltok::APFloat:
+ ID.APFloatVal = Lex.getAPFloatVal();
+ ID.Kind = ValID::t_APFloat;
+ break;
+ case lltok::kw_true:
+ ID.ConstantVal = ConstantInt::getTrue();
+ ID.Kind = ValID::t_Constant;
+ break;
+ case lltok::kw_false:
+ ID.ConstantVal = ConstantInt::getFalse();
+ ID.Kind = ValID::t_Constant;
+ break;
+ case lltok::kw_null: ID.Kind = ValID::t_Null; break;
+ case lltok::kw_undef: ID.Kind = ValID::t_Undef; break;
+ case lltok::kw_zeroinitializer: ID.Kind = ValID::t_Zero; break;
+
+ case lltok::lbrace: {
+ // ValID ::= '{' ConstVector '}'
+ Lex.Lex();
+ SmallVector<Constant*, 16> Elts;
+ if (ParseGlobalValueVector(Elts) ||
+ ParseToken(lltok::rbrace, "expected end of struct constant"))
+ return true;
+
+ ID.ConstantVal = ConstantStruct::get(Elts.data(), Elts.size(), false);
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+ case lltok::less: {
+ // ValID ::= '<' ConstVector '>' --> Vector.
+ // ValID ::= '<' '{' ConstVector '}' '>' --> Packed Struct.
+ Lex.Lex();
+ bool isPackedStruct = EatIfPresent(lltok::lbrace);
+
+ SmallVector<Constant*, 16> Elts;
+ LocTy FirstEltLoc = Lex.getLoc();
+ if (ParseGlobalValueVector(Elts) ||
+ (isPackedStruct &&
+ ParseToken(lltok::rbrace, "expected end of packed struct")) ||
+ ParseToken(lltok::greater, "expected end of constant"))
+ return true;
+
+ if (isPackedStruct) {
+ ID.ConstantVal = ConstantStruct::get(Elts.data(), Elts.size(), true);
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+
+ if (Elts.empty())
+ return Error(ID.Loc, "constant vector must not be empty");
+
+ if (!Elts[0]->getType()->isInteger() &&
+ !Elts[0]->getType()->isFloatingPoint())
+ return Error(FirstEltLoc,
+ "vector elements must have integer or floating point type");
+
+ // Verify that all the vector elements have the same type.
+ for (unsigned i = 1, e = Elts.size(); i != e; ++i)
+ if (Elts[i]->getType() != Elts[0]->getType())
+ return Error(FirstEltLoc,
+ "vector element #" + utostr(i) +
+ " is not of type '" + Elts[0]->getType()->getDescription());
+
+ ID.ConstantVal = ConstantVector::get(Elts.data(), Elts.size());
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+ case lltok::lsquare: { // Array Constant
+ Lex.Lex();
+ SmallVector<Constant*, 16> Elts;
+ LocTy FirstEltLoc = Lex.getLoc();
+ if (ParseGlobalValueVector(Elts) ||
+ ParseToken(lltok::rsquare, "expected end of array constant"))
+ return true;
+
+ // Handle empty element.
+ if (Elts.empty()) {
+ // Use undef instead of an array because it's inconvenient to determine
+ // the element type at this point, there being no elements to examine.
+ ID.Kind = ValID::t_EmptyArray;
+ return false;
+ }
+
+ if (!Elts[0]->getType()->isFirstClassType())
+ return Error(FirstEltLoc, "invalid array element type: " +
+ Elts[0]->getType()->getDescription());
+
+ ArrayType *ATy = ArrayType::get(Elts[0]->getType(), Elts.size());
+
+ // Verify all elements are correct type!
+ for (unsigned i = 0, e = Elts.size(); i != e; ++i) {
+ if (Elts[i]->getType() != Elts[0]->getType())
+ return Error(FirstEltLoc,
+ "array element #" + utostr(i) +
+ " is not of type '" +Elts[0]->getType()->getDescription());
+ }
+
+ ID.ConstantVal = ConstantArray::get(ATy, Elts.data(), Elts.size());
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+ case lltok::kw_c: // c "foo"
+ Lex.Lex();
+ ID.ConstantVal = ConstantArray::get(Lex.getStrVal(), false);
+ if (ParseToken(lltok::StringConstant, "expected string")) return true;
+ ID.Kind = ValID::t_Constant;
+ return false;
+
+ case lltok::kw_asm: {
+ // ValID ::= 'asm' SideEffect? STRINGCONSTANT ',' STRINGCONSTANT
+ bool HasSideEffect;
+ Lex.Lex();
+ if (ParseOptionalToken(lltok::kw_sideeffect, HasSideEffect) ||
+ ParseStringConstant(ID.StrVal) ||
+ ParseToken(lltok::comma, "expected comma in inline asm expression") ||
+ ParseToken(lltok::StringConstant, "expected constraint string"))
+ return true;
+ ID.StrVal2 = Lex.getStrVal();
+ ID.UIntVal = HasSideEffect;
+ ID.Kind = ValID::t_InlineAsm;
+ return false;
+ }
+
+ case lltok::kw_trunc:
+ case lltok::kw_zext:
+ case lltok::kw_sext:
+ case lltok::kw_fptrunc:
+ case lltok::kw_fpext:
+ case lltok::kw_bitcast:
+ case lltok::kw_uitofp:
+ case lltok::kw_sitofp:
+ case lltok::kw_fptoui:
+ case lltok::kw_fptosi:
+ case lltok::kw_inttoptr:
+ case lltok::kw_ptrtoint: {
+ unsigned Opc = Lex.getUIntVal();
+ PATypeHolder DestTy(Type::VoidTy);
+ Constant *SrcVal;
+ Lex.Lex();
+ if (ParseToken(lltok::lparen, "expected '(' after constantexpr cast") ||
+ ParseGlobalTypeAndValue(SrcVal) ||
+ ParseToken(lltok::kw_to, "expected 'to' int constantexpr cast") ||
+ ParseType(DestTy) ||
+ ParseToken(lltok::rparen, "expected ')' at end of constantexpr cast"))
+ return true;
+ if (!CastInst::castIsValid((Instruction::CastOps)Opc, SrcVal, DestTy))
+ return Error(ID.Loc, "invalid cast opcode for cast from '" +
+ SrcVal->getType()->getDescription() + "' to '" +
+ DestTy->getDescription() + "'");
+ ID.ConstantVal = ConstantExpr::getCast((Instruction::CastOps)Opc, SrcVal,
+ DestTy);
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+ case lltok::kw_extractvalue: {
+ Lex.Lex();
+ Constant *Val;
+ SmallVector<unsigned, 4> Indices;
+ if (ParseToken(lltok::lparen, "expected '(' in extractvalue constantexpr")||
+ ParseGlobalTypeAndValue(Val) ||
+ ParseIndexList(Indices) ||
+ ParseToken(lltok::rparen, "expected ')' in extractvalue constantexpr"))
+ return true;
+ if (!isa<StructType>(Val->getType()) && !isa<ArrayType>(Val->getType()))
+ return Error(ID.Loc, "extractvalue operand must be array or struct");
+ if (!ExtractValueInst::getIndexedType(Val->getType(), Indices.begin(),
+ Indices.end()))
+ return Error(ID.Loc, "invalid indices for extractvalue");
+ ID.ConstantVal =
+ ConstantExpr::getExtractValue(Val, Indices.data(), Indices.size());
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+ case lltok::kw_insertvalue: {
+ Lex.Lex();
+ Constant *Val0, *Val1;
+ SmallVector<unsigned, 4> Indices;
+ if (ParseToken(lltok::lparen, "expected '(' in insertvalue constantexpr")||
+ ParseGlobalTypeAndValue(Val0) ||
+ ParseToken(lltok::comma, "expected comma in insertvalue constantexpr")||
+ ParseGlobalTypeAndValue(Val1) ||
+ ParseIndexList(Indices) ||
+ ParseToken(lltok::rparen, "expected ')' in insertvalue constantexpr"))
+ return true;
+ if (!isa<StructType>(Val0->getType()) && !isa<ArrayType>(Val0->getType()))
+ return Error(ID.Loc, "extractvalue operand must be array or struct");
+ if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(),
+ Indices.end()))
+ return Error(ID.Loc, "invalid indices for insertvalue");
+ ID.ConstantVal =
+ ConstantExpr::getInsertValue(Val0, Val1, Indices.data(), Indices.size());
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+ case lltok::kw_icmp:
+ case lltok::kw_fcmp:
+ case lltok::kw_vicmp:
+ case lltok::kw_vfcmp: {
+ unsigned PredVal, Opc = Lex.getUIntVal();
+ Constant *Val0, *Val1;
+ Lex.Lex();
+ if (ParseCmpPredicate(PredVal, Opc) ||
+ ParseToken(lltok::lparen, "expected '(' in compare constantexpr") ||
+ ParseGlobalTypeAndValue(Val0) ||
+ ParseToken(lltok::comma, "expected comma in compare constantexpr") ||
+ ParseGlobalTypeAndValue(Val1) ||
+ ParseToken(lltok::rparen, "expected ')' in compare constantexpr"))
+ return true;
+
+ if (Val0->getType() != Val1->getType())
+ return Error(ID.Loc, "compare operands must have the same type");
+
+ CmpInst::Predicate Pred = (CmpInst::Predicate)PredVal;
+
+ if (Opc == Instruction::FCmp) {
+ if (!Val0->getType()->isFPOrFPVector())
+ return Error(ID.Loc, "fcmp requires floating point operands");
+ ID.ConstantVal = ConstantExpr::getFCmp(Pred, Val0, Val1);
+ } else if (Opc == Instruction::ICmp) {
+ if (!Val0->getType()->isIntOrIntVector() &&
+ !isa<PointerType>(Val0->getType()))
+ return Error(ID.Loc, "icmp requires pointer or integer operands");
+ ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1);
+ } else if (Opc == Instruction::VFCmp) {
+ // FIXME: REMOVE VFCMP Support
+ if (!Val0->getType()->isFPOrFPVector() ||
+ !isa<VectorType>(Val0->getType()))
+ return Error(ID.Loc, "vfcmp requires vector floating point operands");
+ ID.ConstantVal = ConstantExpr::getVFCmp(Pred, Val0, Val1);
+ } else if (Opc == Instruction::VICmp) {
+ // FIXME: REMOVE VICMP Support
+ if (!Val0->getType()->isIntOrIntVector() ||
+ !isa<VectorType>(Val0->getType()))
+ return Error(ID.Loc, "vicmp requires vector floating point operands");
+ ID.ConstantVal = ConstantExpr::getVICmp(Pred, Val0, Val1);
+ }
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+
+ // Binary Operators.
+ case lltok::kw_add:
+ case lltok::kw_sub:
+ case lltok::kw_mul:
+ case lltok::kw_udiv:
+ case lltok::kw_sdiv:
+ case lltok::kw_fdiv:
+ case lltok::kw_urem:
+ case lltok::kw_srem:
+ case lltok::kw_frem: {
+ unsigned Opc = Lex.getUIntVal();
+ Constant *Val0, *Val1;
+ Lex.Lex();
+ if (ParseToken(lltok::lparen, "expected '(' in binary constantexpr") ||
+ ParseGlobalTypeAndValue(Val0) ||
+ ParseToken(lltok::comma, "expected comma in binary constantexpr") ||
+ ParseGlobalTypeAndValue(Val1) ||
+ ParseToken(lltok::rparen, "expected ')' in binary constantexpr"))
+ return true;
+ if (Val0->getType() != Val1->getType())
+ return Error(ID.Loc, "operands of constexpr must have same type");
+ if (!Val0->getType()->isIntOrIntVector() &&
+ !Val0->getType()->isFPOrFPVector())
+ return Error(ID.Loc,"constexpr requires integer, fp, or vector operands");
+ ID.ConstantVal = ConstantExpr::get(Opc, Val0, Val1);
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+
+ // Logical Operations
+ case lltok::kw_shl:
+ case lltok::kw_lshr:
+ case lltok::kw_ashr:
+ case lltok::kw_and:
+ case lltok::kw_or:
+ case lltok::kw_xor: {
+ unsigned Opc = Lex.getUIntVal();
+ Constant *Val0, *Val1;
+ Lex.Lex();
+ if (ParseToken(lltok::lparen, "expected '(' in logical constantexpr") ||
+ ParseGlobalTypeAndValue(Val0) ||
+ ParseToken(lltok::comma, "expected comma in logical constantexpr") ||
+ ParseGlobalTypeAndValue(Val1) ||
+ ParseToken(lltok::rparen, "expected ')' in logical constantexpr"))
+ return true;
+ if (Val0->getType() != Val1->getType())
+ return Error(ID.Loc, "operands of constexpr must have same type");
+ if (!Val0->getType()->isIntOrIntVector())
+ return Error(ID.Loc,
+ "constexpr requires integer or integer vector operands");
+ ID.ConstantVal = ConstantExpr::get(Opc, Val0, Val1);
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+
+ case lltok::kw_getelementptr:
+ case lltok::kw_shufflevector:
+ case lltok::kw_insertelement:
+ case lltok::kw_extractelement:
+ case lltok::kw_select: {
+ unsigned Opc = Lex.getUIntVal();
+ SmallVector<Constant*, 16> Elts;
+ Lex.Lex();
+ if (ParseToken(lltok::lparen, "expected '(' in constantexpr") ||
+ ParseGlobalValueVector(Elts) ||
+ ParseToken(lltok::rparen, "expected ')' in constantexpr"))
+ return true;
+
+ if (Opc == Instruction::GetElementPtr) {
+ if (Elts.size() == 0 || !isa<PointerType>(Elts[0]->getType()))
+ return Error(ID.Loc, "getelementptr requires pointer operand");
+
+ if (!GetElementPtrInst::getIndexedType(Elts[0]->getType(),
+ (Value**)&Elts[1], Elts.size()-1))
+ return Error(ID.Loc, "invalid indices for getelementptr");
+ ID.ConstantVal = ConstantExpr::getGetElementPtr(Elts[0],
+ &Elts[1], Elts.size()-1);
+ } else if (Opc == Instruction::Select) {
+ if (Elts.size() != 3)
+ return Error(ID.Loc, "expected three operands to select");
+ if (const char *Reason = SelectInst::areInvalidOperands(Elts[0], Elts[1],
+ Elts[2]))
+ return Error(ID.Loc, Reason);
+ ID.ConstantVal = ConstantExpr::getSelect(Elts[0], Elts[1], Elts[2]);
+ } else if (Opc == Instruction::ShuffleVector) {
+ if (Elts.size() != 3)
+ return Error(ID.Loc, "expected three operands to shufflevector");
+ if (!ShuffleVectorInst::isValidOperands(Elts[0], Elts[1], Elts[2]))
+ return Error(ID.Loc, "invalid operands to shufflevector");
+ ID.ConstantVal = ConstantExpr::getShuffleVector(Elts[0], Elts[1],Elts[2]);
+ } else if (Opc == Instruction::ExtractElement) {
+ if (Elts.size() != 2)
+ return Error(ID.Loc, "expected two operands to extractelement");
+ if (!ExtractElementInst::isValidOperands(Elts[0], Elts[1]))
+ return Error(ID.Loc, "invalid extractelement operands");
+ ID.ConstantVal = ConstantExpr::getExtractElement(Elts[0], Elts[1]);
+ } else {
+ assert(Opc == Instruction::InsertElement && "Unknown opcode");
+ if (Elts.size() != 3)
+ return Error(ID.Loc, "expected three operands to insertelement");
+ if (!InsertElementInst::isValidOperands(Elts[0], Elts[1], Elts[2]))
+ return Error(ID.Loc, "invalid insertelement operands");
+ ID.ConstantVal = ConstantExpr::getInsertElement(Elts[0], Elts[1],Elts[2]);
+ }
+
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+ }
+
+ Lex.Lex();
+ return false;
+}
+
+/// ParseGlobalValue - Parse a global value with the specified type.
+bool LLParser::ParseGlobalValue(const Type *Ty, Constant *&V) {
+ V = 0;
+ ValID ID;
+ return ParseValID(ID) ||
+ ConvertGlobalValIDToValue(Ty, ID, V);
+}
+
+/// ConvertGlobalValIDToValue - Apply a type to a ValID to get a fully resolved
+/// constant.
+bool LLParser::ConvertGlobalValIDToValue(const Type *Ty, ValID &ID,
+ Constant *&V) {
+ if (isa<FunctionType>(Ty))
+ return Error(ID.Loc, "functions are not values, refer to them as pointers");
+
+ switch (ID.Kind) {
+ default: assert(0 && "Unknown ValID!");
+ case ValID::t_LocalID:
+ case ValID::t_LocalName:
+ return Error(ID.Loc, "invalid use of function-local name");
+ case ValID::t_InlineAsm:
+ return Error(ID.Loc, "inline asm can only be an operand of call/invoke");
+ case ValID::t_GlobalName:
+ V = GetGlobalVal(ID.StrVal, Ty, ID.Loc);
+ return V == 0;
+ case ValID::t_GlobalID:
+ V = GetGlobalVal(ID.UIntVal, Ty, ID.Loc);
+ return V == 0;
+ case ValID::t_APSInt:
+ if (!isa<IntegerType>(Ty))
+ return Error(ID.Loc, "integer constant must have integer type");
+ ID.APSIntVal.extOrTrunc(Ty->getPrimitiveSizeInBits());
+ V = ConstantInt::get(ID.APSIntVal);
+ return false;
+ case ValID::t_APFloat:
+ if (!Ty->isFloatingPoint() ||
+ !ConstantFP::isValueValidForType(Ty, ID.APFloatVal))
+ return Error(ID.Loc, "floating point constant invalid for type");
+
+ // The lexer has no type info, so builds all float and double FP constants
+ // as double. Fix this here. Long double does not need this.
+ if (&ID.APFloatVal.getSemantics() == &APFloat::IEEEdouble &&
+ Ty == Type::FloatTy) {
+ bool Ignored;
+ ID.APFloatVal.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven,
+ &Ignored);
+ }
+ V = ConstantFP::get(ID.APFloatVal);
+
+ if (V->getType() != Ty)
+ return Error(ID.Loc, "floating point constant does not have type '" +
+ Ty->getDescription() + "'");
+
+ return false;
+ case ValID::t_Null:
+ if (!isa<PointerType>(Ty))
+ return Error(ID.Loc, "null must be a pointer type");
+ V = ConstantPointerNull::get(cast<PointerType>(Ty));
+ return false;
+ case ValID::t_Undef:
+ // FIXME: LabelTy should not be a first-class type.
+ if ((!Ty->isFirstClassType() || Ty == Type::LabelTy) &&
+ !isa<OpaqueType>(Ty))
+ return Error(ID.Loc, "invalid type for undef constant");
+ V = UndefValue::get(Ty);
+ return false;
+ case ValID::t_EmptyArray:
+ if (!isa<ArrayType>(Ty) || cast<ArrayType>(Ty)->getNumElements() != 0)
+ return Error(ID.Loc, "invalid empty array initializer");
+ V = UndefValue::get(Ty);
+ return false;
+ case ValID::t_Zero:
+ // FIXME: LabelTy should not be a first-class type.
+ if (!Ty->isFirstClassType() || Ty == Type::LabelTy)
+ return Error(ID.Loc, "invalid type for null constant");
+ V = Constant::getNullValue(Ty);
+ return false;
+ case ValID::t_Constant:
+ if (ID.ConstantVal->getType() != Ty)
+ return Error(ID.Loc, "constant expression type mismatch");
+ V = ID.ConstantVal;
+ return false;
+ }
+}
+
+bool LLParser::ParseGlobalTypeAndValue(Constant *&V) {
+ PATypeHolder Type(Type::VoidTy);
+ return ParseType(Type) ||
+ ParseGlobalValue(Type, V);
+}
+
+/// ParseGlobalValueVector
+/// ::= /*empty*/
+/// ::= TypeAndValue (',' TypeAndValue)*
+bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts) {
+ // Empty list.
+ if (Lex.getKind() == lltok::rbrace ||
+ Lex.getKind() == lltok::rsquare ||
+ Lex.getKind() == lltok::greater ||
+ Lex.getKind() == lltok::rparen)
+ return false;
+
+ Constant *C;
+ if (ParseGlobalTypeAndValue(C)) return true;
+ Elts.push_back(C);
+
+ while (EatIfPresent(lltok::comma)) {
+ if (ParseGlobalTypeAndValue(C)) return true;
+ Elts.push_back(C);
+ }
+
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Function Parsing.
+//===----------------------------------------------------------------------===//
+
+bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
+ PerFunctionState &PFS) {
+ if (ID.Kind == ValID::t_LocalID)
+ V = PFS.GetVal(ID.UIntVal, Ty, ID.Loc);
+ else if (ID.Kind == ValID::t_LocalName)
+ V = PFS.GetVal(ID.StrVal, Ty, ID.Loc);
+ else if (ID.Kind == ValID::t_InlineAsm) {
+ const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ const FunctionType *FTy =
+ PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : 0;
+ if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2))
+ return Error(ID.Loc, "invalid type for inline asm constraint string");
+ V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal);
+ return false;
+ } else {
+ Constant *C;
+ if (ConvertGlobalValIDToValue(Ty, ID, C)) return true;
+ V = C;
+ return false;
+ }
+
+ return V == 0;
+}
+
+bool LLParser::ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS) {
+ V = 0;
+ ValID ID;
+ return ParseValID(ID) ||
+ ConvertValIDToValue(Ty, ID, V, PFS);
+}
+
+bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState &PFS) {
+ PATypeHolder T(Type::VoidTy);
+ return ParseType(T) ||
+ ParseValue(T, V, PFS);
+}
+
+/// FunctionHeader
+/// ::= OptionalLinkage OptionalVisibility OptionalCallingConv OptRetAttrs
+/// Type GlobalName '(' ArgList ')' OptFuncAttrs OptSection
+/// OptionalAlign OptGC
+bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
+ // Parse the linkage.
+ LocTy LinkageLoc = Lex.getLoc();
+ unsigned Linkage;
+
+ unsigned Visibility, CC, RetAttrs;
+ PATypeHolder RetType(Type::VoidTy);
+ LocTy RetTypeLoc = Lex.getLoc();
+ if (ParseOptionalLinkage(Linkage) ||
+ ParseOptionalVisibility(Visibility) ||
+ ParseOptionalCallingConv(CC) ||
+ ParseOptionalAttrs(RetAttrs, 1) ||
+ ParseType(RetType, RetTypeLoc, true /*void allowed*/))
+ return true;
+
+ // Verify that the linkage is ok.
+ switch ((GlobalValue::LinkageTypes)Linkage) {
+ case GlobalValue::ExternalLinkage:
+ break; // always ok.
+ case GlobalValue::DLLImportLinkage:
+ case GlobalValue::ExternalWeakLinkage:
+ if (isDefine)
+ return Error(LinkageLoc, "invalid linkage for function definition");
+ break;
+ case GlobalValue::PrivateLinkage:
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::AvailableExternallyLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::DLLExportLinkage:
+ if (!isDefine)
+ return Error(LinkageLoc, "invalid linkage for function declaration");
+ break;
+ case GlobalValue::AppendingLinkage:
+ case GlobalValue::GhostLinkage:
+ case GlobalValue::CommonLinkage:
+ return Error(LinkageLoc, "invalid function linkage type");
+ }
+
+ if (!FunctionType::isValidReturnType(RetType) ||
+ isa<OpaqueType>(RetType))
+ return Error(RetTypeLoc, "invalid function return type");
+
+ LocTy NameLoc = Lex.getLoc();
+
+ std::string FunctionName;
+ if (Lex.getKind() == lltok::GlobalVar) {
+ FunctionName = Lex.getStrVal();
+ } else if (Lex.getKind() == lltok::GlobalID) { // @42 is ok.
+ unsigned NameID = Lex.getUIntVal();
+
+ if (NameID != NumberedVals.size())
+ return TokError("function expected to be numbered '%" +
+ utostr(NumberedVals.size()) + "'");
+ } else {
+ return TokError("expected function name");
+ }
+
+ Lex.Lex();
+
+ if (Lex.getKind() != lltok::lparen)
+ return TokError("expected '(' in function argument list");
+
+ std::vector<ArgInfo> ArgList;
+ bool isVarArg;
+ unsigned FuncAttrs;
+ std::string Section;
+ unsigned Alignment;
+ std::string GC;
+
+ if (ParseArgumentList(ArgList, isVarArg, false) ||
+ ParseOptionalAttrs(FuncAttrs, 2) ||
+ (EatIfPresent(lltok::kw_section) &&
+ ParseStringConstant(Section)) ||
+ ParseOptionalAlignment(Alignment) ||
+ (EatIfPresent(lltok::kw_gc) &&
+ ParseStringConstant(GC)))
+ return true;
+
+ // If the alignment was parsed as an attribute, move to the alignment field.
+ if (FuncAttrs & Attribute::Alignment) {
+ Alignment = Attribute::getAlignmentFromAttrs(FuncAttrs);
+ FuncAttrs &= ~Attribute::Alignment;
+ }
+
+ // Okay, if we got here, the function is syntactically valid. Convert types
+ // and do semantic checks.
+ std::vector<const Type*> ParamTypeList;
+ SmallVector<AttributeWithIndex, 8> Attrs;
+ // FIXME : In 3.0, stop accepting zext, sext and inreg as optional function
+ // attributes.
+ unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg;
+ if (FuncAttrs & ObsoleteFuncAttrs) {
+ RetAttrs |= FuncAttrs & ObsoleteFuncAttrs;
+ FuncAttrs &= ~ObsoleteFuncAttrs;
+ }
+
+ if (RetAttrs != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(0, RetAttrs));
+
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
+ ParamTypeList.push_back(ArgList[i].Type);
+ if (ArgList[i].Attrs != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
+ }
+
+ if (FuncAttrs != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(~0, FuncAttrs));
+
+ AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());
+
+ if (PAL.paramHasAttr(1, Attribute::StructRet) &&
+ RetType != Type::VoidTy)
+ return Error(RetTypeLoc, "functions with 'sret' argument must return void");
+
+ const FunctionType *FT = FunctionType::get(RetType, ParamTypeList, isVarArg);
+ const PointerType *PFT = PointerType::getUnqual(FT);
+
+ Fn = 0;
+ if (!FunctionName.empty()) {
+ // If this was a definition of a forward reference, remove the definition
+ // from the forward reference table and fill in the forward ref.
+ std::map<std::string, std::pair<GlobalValue*, LocTy> >::iterator FRVI =
+ ForwardRefVals.find(FunctionName);
+ if (FRVI != ForwardRefVals.end()) {
+ Fn = M->getFunction(FunctionName);
+ ForwardRefVals.erase(FRVI);
+ } else if ((Fn = M->getFunction(FunctionName))) {
+ // If this function already exists in the symbol table, then it is
+ // multiply defined. We accept a few cases for old backwards compat.
+ // FIXME: Remove this stuff for LLVM 3.0.
+ if (Fn->getType() != PFT || Fn->getAttributes() != PAL ||
+ (!Fn->isDeclaration() && isDefine)) {
+ // If the redefinition has different type or different attributes,
+ // reject it. If both have bodies, reject it.
+ return Error(NameLoc, "invalid redefinition of function '" +
+ FunctionName + "'");
+ } else if (Fn->isDeclaration()) {
+ // Make sure to strip off any argument names so we can't get conflicts.
+ for (Function::arg_iterator AI = Fn->arg_begin(), AE = Fn->arg_end();
+ AI != AE; ++AI)
+ AI->setName("");
+ }
+ }
+
+ } else if (FunctionName.empty()) {
+ // If this is a definition of a forward referenced function, make sure the
+ // types agree.
+ std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator I
+ = ForwardRefValIDs.find(NumberedVals.size());
+ if (I != ForwardRefValIDs.end()) {
+ Fn = cast<Function>(I->second.first);
+ if (Fn->getType() != PFT)
+ return Error(NameLoc, "type of definition and forward reference of '@" +
+ utostr(NumberedVals.size()) +"' disagree");
+ ForwardRefValIDs.erase(I);
+ }
+ }
+
+ if (Fn == 0)
+ Fn = Function::Create(FT, GlobalValue::ExternalLinkage, FunctionName, M);
+ else // Move the forward-reference to the correct spot in the module.
+ M->getFunctionList().splice(M->end(), M->getFunctionList(), Fn);
+
+ if (FunctionName.empty())
+ NumberedVals.push_back(Fn);
+
+ Fn->setLinkage((GlobalValue::LinkageTypes)Linkage);
+ Fn->setVisibility((GlobalValue::VisibilityTypes)Visibility);
+ Fn->setCallingConv(CC);
+ Fn->setAttributes(PAL);
+ Fn->setAlignment(Alignment);
+ Fn->setSection(Section);
+ if (!GC.empty()) Fn->setGC(GC.c_str());
+
+ // Add all of the arguments we parsed to the function.
+ Function::arg_iterator ArgIt = Fn->arg_begin();
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i, ++ArgIt) {
+ // If the argument has a name, insert it into the argument symbol table.
+ if (ArgList[i].Name.empty()) continue;
+
+ // Set the name, if it conflicted, it will be auto-renamed.
+ ArgIt->setName(ArgList[i].Name);
+
+ if (ArgIt->getNameStr() != ArgList[i].Name)
+ return Error(ArgList[i].Loc, "redefinition of argument '%" +
+ ArgList[i].Name + "'");
+ }
+
+ return false;
+}
+
+
+/// ParseFunctionBody
+/// ::= '{' BasicBlock+ '}'
+/// ::= 'begin' BasicBlock+ 'end' // FIXME: remove in LLVM 3.0
+///
+bool LLParser::ParseFunctionBody(Function &Fn) {
+ if (Lex.getKind() != lltok::lbrace && Lex.getKind() != lltok::kw_begin)
+ return TokError("expected '{' in function body");
+ Lex.Lex(); // eat the {.
+
+ PerFunctionState PFS(*this, Fn);
+
+ while (Lex.getKind() != lltok::rbrace && Lex.getKind() != lltok::kw_end)
+ if (ParseBasicBlock(PFS)) return true;
+
+ // Eat the }.
+ Lex.Lex();
+
+ // Verify function is ok.
+ return PFS.VerifyFunctionComplete();
+}
+
+/// ParseBasicBlock
+/// ::= LabelStr? Instruction*
+bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
+ // If this basic block starts out with a name, remember it.
+ std::string Name;
+ LocTy NameLoc = Lex.getLoc();
+ if (Lex.getKind() == lltok::LabelStr) {
+ Name = Lex.getStrVal();
+ Lex.Lex();
+ }
+
+ BasicBlock *BB = PFS.DefineBB(Name, NameLoc);
+ if (BB == 0) return true;
+
+ std::string NameStr;
+
+ // Parse the instructions in this block until we get a terminator.
+ Instruction *Inst;
+ do {
+ // This instruction may have three possibilities for a name: a) none
+ // specified, b) name specified "%foo =", c) number specified: "%4 =".
+ LocTy NameLoc = Lex.getLoc();
+ int NameID = -1;
+ NameStr = "";
+
+ if (Lex.getKind() == lltok::LocalVarID) {
+ NameID = Lex.getUIntVal();
+ Lex.Lex();
+ if (ParseToken(lltok::equal, "expected '=' after instruction id"))
+ return true;
+ } else if (Lex.getKind() == lltok::LocalVar ||
+ // FIXME: REMOVE IN LLVM 3.0
+ Lex.getKind() == lltok::StringConstant) {
+ NameStr = Lex.getStrVal();
+ Lex.Lex();
+ if (ParseToken(lltok::equal, "expected '=' after instruction name"))
+ return true;
+ }
+
+ if (ParseInstruction(Inst, BB, PFS)) return true;
+
+ BB->getInstList().push_back(Inst);
+
+ // Set the name on the instruction.
+ if (PFS.SetInstName(NameID, NameStr, NameLoc, Inst)) return true;
+ } while (!isa<TerminatorInst>(Inst));
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Parsing.
+//===----------------------------------------------------------------------===//
+
+/// ParseInstruction - Parse one of the many different instructions.
+///
+bool LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
+ PerFunctionState &PFS) {
+ lltok::Kind Token = Lex.getKind();
+ if (Token == lltok::Eof)
+ return TokError("found end of file when expecting more instructions");
+ LocTy Loc = Lex.getLoc();
+ unsigned KeywordVal = Lex.getUIntVal();
+ Lex.Lex(); // Eat the keyword.
+
+ switch (Token) {
+ default: return Error(Loc, "expected instruction opcode");
+ // Terminator Instructions.
+ case lltok::kw_unwind: Inst = new UnwindInst(); return false;
+ case lltok::kw_unreachable: Inst = new UnreachableInst(); return false;
+ case lltok::kw_ret: return ParseRet(Inst, BB, PFS);
+ case lltok::kw_br: return ParseBr(Inst, PFS);
+ case lltok::kw_switch: return ParseSwitch(Inst, PFS);
+ case lltok::kw_invoke: return ParseInvoke(Inst, PFS);
+ // Binary Operators.
+ case lltok::kw_add:
+ case lltok::kw_sub:
+ case lltok::kw_mul: return ParseArithmetic(Inst, PFS, KeywordVal, 0);
+
+ case lltok::kw_udiv:
+ case lltok::kw_sdiv:
+ case lltok::kw_urem:
+ case lltok::kw_srem: return ParseArithmetic(Inst, PFS, KeywordVal, 1);
+ case lltok::kw_fdiv:
+ case lltok::kw_frem: return ParseArithmetic(Inst, PFS, KeywordVal, 2);
+ case lltok::kw_shl:
+ case lltok::kw_lshr:
+ case lltok::kw_ashr:
+ case lltok::kw_and:
+ case lltok::kw_or:
+ case lltok::kw_xor: return ParseLogical(Inst, PFS, KeywordVal);
+ case lltok::kw_icmp:
+ case lltok::kw_fcmp:
+ case lltok::kw_vicmp:
+ case lltok::kw_vfcmp: return ParseCompare(Inst, PFS, KeywordVal);
+ // Casts.
+ case lltok::kw_trunc:
+ case lltok::kw_zext:
+ case lltok::kw_sext:
+ case lltok::kw_fptrunc:
+ case lltok::kw_fpext:
+ case lltok::kw_bitcast:
+ case lltok::kw_uitofp:
+ case lltok::kw_sitofp:
+ case lltok::kw_fptoui:
+ case lltok::kw_fptosi:
+ case lltok::kw_inttoptr:
+ case lltok::kw_ptrtoint: return ParseCast(Inst, PFS, KeywordVal);
+ // Other.
+ case lltok::kw_select: return ParseSelect(Inst, PFS);
+ case lltok::kw_va_arg: return ParseVA_Arg(Inst, PFS);
+ case lltok::kw_extractelement: return ParseExtractElement(Inst, PFS);
+ case lltok::kw_insertelement: return ParseInsertElement(Inst, PFS);
+ case lltok::kw_shufflevector: return ParseShuffleVector(Inst, PFS);
+ case lltok::kw_phi: return ParsePHI(Inst, PFS);
+ case lltok::kw_call: return ParseCall(Inst, PFS, false);
+ case lltok::kw_tail: return ParseCall(Inst, PFS, true);
+ // Memory.
+ case lltok::kw_alloca:
+ case lltok::kw_malloc: return ParseAlloc(Inst, PFS, KeywordVal);
+ case lltok::kw_free: return ParseFree(Inst, PFS);
+ case lltok::kw_load: return ParseLoad(Inst, PFS, false);
+ case lltok::kw_store: return ParseStore(Inst, PFS, false);
+ case lltok::kw_volatile:
+ if (EatIfPresent(lltok::kw_load))
+ return ParseLoad(Inst, PFS, true);
+ else if (EatIfPresent(lltok::kw_store))
+ return ParseStore(Inst, PFS, true);
+ else
+ return TokError("expected 'load' or 'store'");
+ case lltok::kw_getresult: return ParseGetResult(Inst, PFS);
+ case lltok::kw_getelementptr: return ParseGetElementPtr(Inst, PFS);
+ case lltok::kw_extractvalue: return ParseExtractValue(Inst, PFS);
+ case lltok::kw_insertvalue: return ParseInsertValue(Inst, PFS);
+ }
+}
+
+/// ParseCmpPredicate - Parse an integer or fp predicate, based on Kind.
+bool LLParser::ParseCmpPredicate(unsigned &P, unsigned Opc) {
+ // FIXME: REMOVE vicmp/vfcmp!
+ if (Opc == Instruction::FCmp || Opc == Instruction::VFCmp) {
+ switch (Lex.getKind()) {
+ default: TokError("expected fcmp predicate (e.g. 'oeq')");
+ case lltok::kw_oeq: P = CmpInst::FCMP_OEQ; break;
+ case lltok::kw_one: P = CmpInst::FCMP_ONE; break;
+ case lltok::kw_olt: P = CmpInst::FCMP_OLT; break;
+ case lltok::kw_ogt: P = CmpInst::FCMP_OGT; break;
+ case lltok::kw_ole: P = CmpInst::FCMP_OLE; break;
+ case lltok::kw_oge: P = CmpInst::FCMP_OGE; break;
+ case lltok::kw_ord: P = CmpInst::FCMP_ORD; break;
+ case lltok::kw_uno: P = CmpInst::FCMP_UNO; break;
+ case lltok::kw_ueq: P = CmpInst::FCMP_UEQ; break;
+ case lltok::kw_une: P = CmpInst::FCMP_UNE; break;
+ case lltok::kw_ult: P = CmpInst::FCMP_ULT; break;
+ case lltok::kw_ugt: P = CmpInst::FCMP_UGT; break;
+ case lltok::kw_ule: P = CmpInst::FCMP_ULE; break;
+ case lltok::kw_uge: P = CmpInst::FCMP_UGE; break;
+ case lltok::kw_true: P = CmpInst::FCMP_TRUE; break;
+ case lltok::kw_false: P = CmpInst::FCMP_FALSE; break;
+ }
+ } else {
+ switch (Lex.getKind()) {
+ default: TokError("expected icmp predicate (e.g. 'eq')");
+ case lltok::kw_eq: P = CmpInst::ICMP_EQ; break;
+ case lltok::kw_ne: P = CmpInst::ICMP_NE; break;
+ case lltok::kw_slt: P = CmpInst::ICMP_SLT; break;
+ case lltok::kw_sgt: P = CmpInst::ICMP_SGT; break;
+ case lltok::kw_sle: P = CmpInst::ICMP_SLE; break;
+ case lltok::kw_sge: P = CmpInst::ICMP_SGE; break;
+ case lltok::kw_ult: P = CmpInst::ICMP_ULT; break;
+ case lltok::kw_ugt: P = CmpInst::ICMP_UGT; break;
+ case lltok::kw_ule: P = CmpInst::ICMP_ULE; break;
+ case lltok::kw_uge: P = CmpInst::ICMP_UGE; break;
+ }
+ }
+ Lex.Lex();
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Terminator Instructions.
+//===----------------------------------------------------------------------===//
+
+/// ParseRet - Parse a return instruction.
+/// ::= 'ret' void
+/// ::= 'ret' TypeAndValue
+/// ::= 'ret' TypeAndValue (',' TypeAndValue)+ [[obsolete: LLVM 3.0]]
+bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
+ PerFunctionState &PFS) {
+ PATypeHolder Ty(Type::VoidTy);
+ if (ParseType(Ty, true /*void allowed*/)) return true;
+
+ if (Ty == Type::VoidTy) {
+ Inst = ReturnInst::Create();
+ return false;
+ }
+
+ Value *RV;
+ if (ParseValue(Ty, RV, PFS)) return true;
+
+ // The normal case is one return value.
+ if (Lex.getKind() == lltok::comma) {
+ // FIXME: LLVM 3.0 remove MRV support for 'ret i32 1, i32 2', requiring use
+ // of 'ret {i32,i32} {i32 1, i32 2}'
+ SmallVector<Value*, 8> RVs;
+ RVs.push_back(RV);
+
+ while (EatIfPresent(lltok::comma)) {
+ if (ParseTypeAndValue(RV, PFS)) return true;
+ RVs.push_back(RV);
+ }
+
+ RV = UndefValue::get(PFS.getFunction().getReturnType());
+ for (unsigned i = 0, e = RVs.size(); i != e; ++i) {
+ Instruction *I = InsertValueInst::Create(RV, RVs[i], i, "mrv");
+ BB->getInstList().push_back(I);
+ RV = I;
+ }
+ }
+ Inst = ReturnInst::Create(RV);
+ return false;
+}
+
+
+/// ParseBr
+/// ::= 'br' TypeAndValue
+/// ::= 'br' TypeAndValue ',' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseBr(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy Loc, Loc2;
+ Value *Op0, *Op1, *Op2;
+ if (ParseTypeAndValue(Op0, Loc, PFS)) return true;
+
+ if (BasicBlock *BB = dyn_cast<BasicBlock>(Op0)) {
+ Inst = BranchInst::Create(BB);
+ return false;
+ }
+
+ if (Op0->getType() != Type::Int1Ty)
+ return Error(Loc, "branch condition must have 'i1' type");
+
+ if (ParseToken(lltok::comma, "expected ',' after branch condition") ||
+ ParseTypeAndValue(Op1, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after true destination") ||
+ ParseTypeAndValue(Op2, Loc2, PFS))
+ return true;
+
+ if (!isa<BasicBlock>(Op1))
+ return Error(Loc, "true destination of branch must be a basic block");
+ if (!isa<BasicBlock>(Op2))
+ return Error(Loc2, "true destination of branch must be a basic block");
+
+ Inst = BranchInst::Create(cast<BasicBlock>(Op1), cast<BasicBlock>(Op2), Op0);
+ return false;
+}
+
+/// ParseSwitch
+/// Instruction
+/// ::= 'switch' TypeAndValue ',' TypeAndValue '[' JumpTable ']'
+/// JumpTable
+/// ::= (TypeAndValue ',' TypeAndValue)*
+bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy CondLoc, BBLoc;
+ Value *Cond, *DefaultBB;
+ if (ParseTypeAndValue(Cond, CondLoc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after switch condition") ||
+ ParseTypeAndValue(DefaultBB, BBLoc, PFS) ||
+ ParseToken(lltok::lsquare, "expected '[' with switch table"))
+ return true;
+
+ if (!isa<IntegerType>(Cond->getType()))
+ return Error(CondLoc, "switch condition must have integer type");
+ if (!isa<BasicBlock>(DefaultBB))
+ return Error(BBLoc, "default destination must be a basic block");
+
+ // Parse the jump table pairs.
+ SmallPtrSet<Value*, 32> SeenCases;
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 32> Table;
+ while (Lex.getKind() != lltok::rsquare) {
+ Value *Constant, *DestBB;
+
+ if (ParseTypeAndValue(Constant, CondLoc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after case value") ||
+ ParseTypeAndValue(DestBB, BBLoc, PFS))
+ return true;
+
+ if (!SeenCases.insert(Constant))
+ return Error(CondLoc, "duplicate case value in switch");
+ if (!isa<ConstantInt>(Constant))
+ return Error(CondLoc, "case value is not a constant integer");
+ if (!isa<BasicBlock>(DestBB))
+ return Error(BBLoc, "case destination is not a basic block");
+
+ Table.push_back(std::make_pair(cast<ConstantInt>(Constant),
+ cast<BasicBlock>(DestBB)));
+ }
+
+ Lex.Lex(); // Eat the ']'.
+
+ SwitchInst *SI = SwitchInst::Create(Cond, cast<BasicBlock>(DefaultBB),
+ Table.size());
+ for (unsigned i = 0, e = Table.size(); i != e; ++i)
+ SI->addCase(Table[i].first, Table[i].second);
+ Inst = SI;
+ return false;
+}
+
+/// ParseInvoke
+/// ::= 'invoke' OptionalCallingConv OptionalAttrs Type Value ParamList
+/// OptionalAttrs 'to' TypeAndValue 'unwind' TypeAndValue
+bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy CallLoc = Lex.getLoc();
+ unsigned CC, RetAttrs, FnAttrs;
+ PATypeHolder RetType(Type::VoidTy);
+ LocTy RetTypeLoc;
+ ValID CalleeID;
+ SmallVector<ParamInfo, 16> ArgList;
+
+ Value *NormalBB, *UnwindBB;
+ if (ParseOptionalCallingConv(CC) ||
+ ParseOptionalAttrs(RetAttrs, 1) ||
+ ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
+ ParseValID(CalleeID) ||
+ ParseParameterList(ArgList, PFS) ||
+ ParseOptionalAttrs(FnAttrs, 2) ||
+ ParseToken(lltok::kw_to, "expected 'to' in invoke") ||
+ ParseTypeAndValue(NormalBB, PFS) ||
+ ParseToken(lltok::kw_unwind, "expected 'unwind' in invoke") ||
+ ParseTypeAndValue(UnwindBB, PFS))
+ return true;
+
+ if (!isa<BasicBlock>(NormalBB))
+ return Error(CallLoc, "normal destination is not a basic block");
+ if (!isa<BasicBlock>(UnwindBB))
+ return Error(CallLoc, "unwind destination is not a basic block");
+
+ // If RetType is a non-function pointer type, then this is the short syntax
+ // for the call, which means that RetType is just the return type. Infer the
+ // rest of the function argument types from the arguments that are present.
+ const PointerType *PFTy = 0;
+ const FunctionType *Ty = 0;
+ if (!(PFTy = dyn_cast<PointerType>(RetType)) ||
+ !(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) {
+ // Pull out the types of all of the arguments...
+ std::vector<const Type*> ParamTypes;
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
+ ParamTypes.push_back(ArgList[i].V->getType());
+
+ if (!FunctionType::isValidReturnType(RetType))
+ return Error(RetTypeLoc, "Invalid result type for LLVM function");
+
+ Ty = FunctionType::get(RetType, ParamTypes, false);
+ PFTy = PointerType::getUnqual(Ty);
+ }
+
+ // Look up the callee.
+ Value *Callee;
+ if (ConvertValIDToValue(PFTy, CalleeID, Callee, PFS)) return true;
+
+ // FIXME: In LLVM 3.0, stop accepting zext, sext and inreg as optional
+ // function attributes.
+ unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg;
+ if (FnAttrs & ObsoleteFuncAttrs) {
+ RetAttrs |= FnAttrs & ObsoleteFuncAttrs;
+ FnAttrs &= ~ObsoleteFuncAttrs;
+ }
+
+ // Set up the Attributes for the function.
+ SmallVector<AttributeWithIndex, 8> Attrs;
+ if (RetAttrs != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(0, RetAttrs));
+
+ SmallVector<Value*, 8> Args;
+
+ // Loop through FunctionType's arguments and ensure they are specified
+ // correctly. Also, gather any parameter attributes.
+ FunctionType::param_iterator I = Ty->param_begin();
+ FunctionType::param_iterator E = Ty->param_end();
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
+ const Type *ExpectedTy = 0;
+ if (I != E) {
+ ExpectedTy = *I++;
+ } else if (!Ty->isVarArg()) {
+ return Error(ArgList[i].Loc, "too many arguments specified");
+ }
+
+ if (ExpectedTy && ExpectedTy != ArgList[i].V->getType())
+ return Error(ArgList[i].Loc, "argument is not of expected type '" +
+ ExpectedTy->getDescription() + "'");
+ Args.push_back(ArgList[i].V);
+ if (ArgList[i].Attrs != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
+ }
+
+ if (I != E)
+ return Error(CallLoc, "not enough parameters specified for call");
+
+ if (FnAttrs != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+ // Finish off the Attributes and check them
+ AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());
+
+ InvokeInst *II = InvokeInst::Create(Callee, cast<BasicBlock>(NormalBB),
+ cast<BasicBlock>(UnwindBB),
+ Args.begin(), Args.end());
+ II->setCallingConv(CC);
+ II->setAttributes(PAL);
+ Inst = II;
+ return false;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Binary Operators.
+//===----------------------------------------------------------------------===//
+
+/// ParseArithmetic
+/// ::= ArithmeticOps TypeAndValue ',' Value
+///
+/// If OperandType is 0, then any FP or integer operand is allowed. If it is 1,
+/// then any integer operand is allowed, if it is 2, any fp operand is allowed.
+bool LLParser::ParseArithmetic(Instruction *&Inst, PerFunctionState &PFS,
+ unsigned Opc, unsigned OperandType) {
+ LocTy Loc; Value *LHS, *RHS;
+ if (ParseTypeAndValue(LHS, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' in arithmetic operation") ||
+ ParseValue(LHS->getType(), RHS, PFS))
+ return true;
+
+ bool Valid;
+ switch (OperandType) {
+ default: assert(0 && "Unknown operand type!");
+ case 0: // int or FP.
+ Valid = LHS->getType()->isIntOrIntVector() ||
+ LHS->getType()->isFPOrFPVector();
+ break;
+ case 1: Valid = LHS->getType()->isIntOrIntVector(); break;
+ case 2: Valid = LHS->getType()->isFPOrFPVector(); break;
+ }
+
+ if (!Valid)
+ return Error(Loc, "invalid operand type for instruction");
+
+ Inst = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+ return false;
+}
+
+/// ParseLogical
+/// ::= ArithmeticOps TypeAndValue ',' Value {
+bool LLParser::ParseLogical(Instruction *&Inst, PerFunctionState &PFS,
+ unsigned Opc) {
+ LocTy Loc; Value *LHS, *RHS;
+ if (ParseTypeAndValue(LHS, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' in logical operation") ||
+ ParseValue(LHS->getType(), RHS, PFS))
+ return true;
+
+ if (!LHS->getType()->isIntOrIntVector())
+ return Error(Loc,"instruction requires integer or integer vector operands");
+
+ Inst = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+ return false;
+}
+
+
+/// ParseCompare
+/// ::= 'icmp' IPredicates TypeAndValue ',' Value
+/// ::= 'fcmp' FPredicates TypeAndValue ',' Value
+/// ::= 'vicmp' IPredicates TypeAndValue ',' Value
+/// ::= 'vfcmp' FPredicates TypeAndValue ',' Value
+bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS,
+ unsigned Opc) {
+ // Parse the integer/fp comparison predicate.
+ LocTy Loc;
+ unsigned Pred;
+ Value *LHS, *RHS;
+ if (ParseCmpPredicate(Pred, Opc) ||
+ ParseTypeAndValue(LHS, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after compare value") ||
+ ParseValue(LHS->getType(), RHS, PFS))
+ return true;
+
+ if (Opc == Instruction::FCmp) {
+ if (!LHS->getType()->isFPOrFPVector())
+ return Error(Loc, "fcmp requires floating point operands");
+ Inst = new FCmpInst(CmpInst::Predicate(Pred), LHS, RHS);
+ } else if (Opc == Instruction::ICmp) {
+ if (!LHS->getType()->isIntOrIntVector() &&
+ !isa<PointerType>(LHS->getType()))
+ return Error(Loc, "icmp requires integer operands");
+ Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS);
+ } else if (Opc == Instruction::VFCmp) {
+ if (!LHS->getType()->isFPOrFPVector() || !isa<VectorType>(LHS->getType()))
+ return Error(Loc, "vfcmp requires vector floating point operands");
+ Inst = new VFCmpInst(CmpInst::Predicate(Pred), LHS, RHS);
+ } else if (Opc == Instruction::VICmp) {
+ if (!LHS->getType()->isIntOrIntVector() || !isa<VectorType>(LHS->getType()))
+ return Error(Loc, "vicmp requires vector floating point operands");
+ Inst = new VICmpInst(CmpInst::Predicate(Pred), LHS, RHS);
+ }
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Other Instructions.
+//===----------------------------------------------------------------------===//
+
+
+/// ParseCast
+/// ::= CastOpc TypeAndValue 'to' Type
+bool LLParser::ParseCast(Instruction *&Inst, PerFunctionState &PFS,
+ unsigned Opc) {
+ LocTy Loc; Value *Op;
+ PATypeHolder DestTy(Type::VoidTy);
+ if (ParseTypeAndValue(Op, Loc, PFS) ||
+ ParseToken(lltok::kw_to, "expected 'to' after cast value") ||
+ ParseType(DestTy))
+ return true;
+
+ if (!CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy)) {
+ CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy);
+ return Error(Loc, "invalid cast opcode for cast from '" +
+ Op->getType()->getDescription() + "' to '" +
+ DestTy->getDescription() + "'");
+ }
+ Inst = CastInst::Create((Instruction::CastOps)Opc, Op, DestTy);
+ return false;
+}
+
+/// ParseSelect
+/// ::= 'select' TypeAndValue ',' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseSelect(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy Loc;
+ Value *Op0, *Op1, *Op2;
+ if (ParseTypeAndValue(Op0, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after select condition") ||
+ ParseTypeAndValue(Op1, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after select value") ||
+ ParseTypeAndValue(Op2, PFS))
+ return true;
+
+ if (const char *Reason = SelectInst::areInvalidOperands(Op0, Op1, Op2))
+ return Error(Loc, Reason);
+
+ Inst = SelectInst::Create(Op0, Op1, Op2);
+ return false;
+}
+
+/// ParseVA_Arg
+/// ::= 'va_arg' TypeAndValue ',' Type
+bool LLParser::ParseVA_Arg(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Op;
+ PATypeHolder EltTy(Type::VoidTy);
+ LocTy TypeLoc;
+ if (ParseTypeAndValue(Op, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after vaarg operand") ||
+ ParseType(EltTy, TypeLoc))
+ return true;
+
+ if (!EltTy->isFirstClassType())
+ return Error(TypeLoc, "va_arg requires operand with first class type");
+
+ Inst = new VAArgInst(Op, EltTy);
+ return false;
+}
+
+/// ParseExtractElement
+/// ::= 'extractelement' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseExtractElement(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy Loc;
+ Value *Op0, *Op1;
+ if (ParseTypeAndValue(Op0, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after extract value") ||
+ ParseTypeAndValue(Op1, PFS))
+ return true;
+
+ if (!ExtractElementInst::isValidOperands(Op0, Op1))
+ return Error(Loc, "invalid extractelement operands");
+
+ Inst = new ExtractElementInst(Op0, Op1);
+ return false;
+}
+
+/// ParseInsertElement
+/// ::= 'insertelement' TypeAndValue ',' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseInsertElement(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy Loc;
+ Value *Op0, *Op1, *Op2;
+ if (ParseTypeAndValue(Op0, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after insertelement value") ||
+ ParseTypeAndValue(Op1, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after insertelement value") ||
+ ParseTypeAndValue(Op2, PFS))
+ return true;
+
+ if (!InsertElementInst::isValidOperands(Op0, Op1, Op2))
+ return Error(Loc, "invalid extractelement operands");
+
+ Inst = InsertElementInst::Create(Op0, Op1, Op2);
+ return false;
+}
+
+/// ParseShuffleVector
+/// ::= 'shufflevector' TypeAndValue ',' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseShuffleVector(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy Loc;
+ Value *Op0, *Op1, *Op2;
+ if (ParseTypeAndValue(Op0, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after shuffle mask") ||
+ ParseTypeAndValue(Op1, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after shuffle value") ||
+ ParseTypeAndValue(Op2, PFS))
+ return true;
+
+ if (!ShuffleVectorInst::isValidOperands(Op0, Op1, Op2))
+ return Error(Loc, "invalid extractelement operands");
+
+ Inst = new ShuffleVectorInst(Op0, Op1, Op2);
+ return false;
+}
+
+/// ParsePHI
+/// ::= 'phi' Type '[' Value ',' Value ']' (',' '[' Value ',' Valueß ']')*
+bool LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) {
+ PATypeHolder Ty(Type::VoidTy);
+ Value *Op0, *Op1;
+ LocTy TypeLoc = Lex.getLoc();
+
+ if (ParseType(Ty) ||
+ ParseToken(lltok::lsquare, "expected '[' in phi value list") ||
+ ParseValue(Ty, Op0, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after insertelement value") ||
+ ParseValue(Type::LabelTy, Op1, PFS) ||
+ ParseToken(lltok::rsquare, "expected ']' in phi value list"))
+ return true;
+
+ SmallVector<std::pair<Value*, BasicBlock*>, 16> PHIVals;
+ while (1) {
+ PHIVals.push_back(std::make_pair(Op0, cast<BasicBlock>(Op1)));
+
+ if (!EatIfPresent(lltok::comma))
+ break;
+
+ if (ParseToken(lltok::lsquare, "expected '[' in phi value list") ||
+ ParseValue(Ty, Op0, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after insertelement value") ||
+ ParseValue(Type::LabelTy, Op1, PFS) ||
+ ParseToken(lltok::rsquare, "expected ']' in phi value list"))
+ return true;
+ }
+
+ if (!Ty->isFirstClassType())
+ return Error(TypeLoc, "phi node must have first class type");
+
+ PHINode *PN = PHINode::Create(Ty);
+ PN->reserveOperandSpace(PHIVals.size());
+ for (unsigned i = 0, e = PHIVals.size(); i != e; ++i)
+ PN->addIncoming(PHIVals[i].first, PHIVals[i].second);
+ Inst = PN;
+ return false;
+}
+
+/// ParseCall
+/// ::= 'tail'? 'call' OptionalCallingConv OptionalAttrs Type Value
+/// ParameterList OptionalAttrs
+bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
+ bool isTail) {
+ unsigned CC, RetAttrs, FnAttrs;
+ PATypeHolder RetType(Type::VoidTy);
+ LocTy RetTypeLoc;
+ ValID CalleeID;
+ SmallVector<ParamInfo, 16> ArgList;
+ LocTy CallLoc = Lex.getLoc();
+
+ if ((isTail && ParseToken(lltok::kw_call, "expected 'tail call'")) ||
+ ParseOptionalCallingConv(CC) ||
+ ParseOptionalAttrs(RetAttrs, 1) ||
+ ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
+ ParseValID(CalleeID) ||
+ ParseParameterList(ArgList, PFS) ||
+ ParseOptionalAttrs(FnAttrs, 2))
+ return true;
+
+ // If RetType is a non-function pointer type, then this is the short syntax
+ // for the call, which means that RetType is just the return type. Infer the
+ // rest of the function argument types from the arguments that are present.
+ const PointerType *PFTy = 0;
+ const FunctionType *Ty = 0;
+ if (!(PFTy = dyn_cast<PointerType>(RetType)) ||
+ !(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) {
+ // Pull out the types of all of the arguments...
+ std::vector<const Type*> ParamTypes;
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
+ ParamTypes.push_back(ArgList[i].V->getType());
+
+ if (!FunctionType::isValidReturnType(RetType))
+ return Error(RetTypeLoc, "Invalid result type for LLVM function");
+
+ Ty = FunctionType::get(RetType, ParamTypes, false);
+ PFTy = PointerType::getUnqual(Ty);
+ }
+
+ // Look up the callee.
+ Value *Callee;
+ if (ConvertValIDToValue(PFTy, CalleeID, Callee, PFS)) return true;
+
+ // FIXME: In LLVM 3.0, stop accepting zext, sext and inreg as optional
+ // function attributes.
+ unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg;
+ if (FnAttrs & ObsoleteFuncAttrs) {
+ RetAttrs |= FnAttrs & ObsoleteFuncAttrs;
+ FnAttrs &= ~ObsoleteFuncAttrs;
+ }
+
+ // Set up the Attributes for the function.
+ SmallVector<AttributeWithIndex, 8> Attrs;
+ if (RetAttrs != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(0, RetAttrs));
+
+ SmallVector<Value*, 8> Args;
+
+ // Loop through FunctionType's arguments and ensure they are specified
+ // correctly. Also, gather any parameter attributes.
+ FunctionType::param_iterator I = Ty->param_begin();
+ FunctionType::param_iterator E = Ty->param_end();
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
+ const Type *ExpectedTy = 0;
+ if (I != E) {
+ ExpectedTy = *I++;
+ } else if (!Ty->isVarArg()) {
+ return Error(ArgList[i].Loc, "too many arguments specified");
+ }
+
+ if (ExpectedTy && ExpectedTy != ArgList[i].V->getType())
+ return Error(ArgList[i].Loc, "argument is not of expected type '" +
+ ExpectedTy->getDescription() + "'");
+ Args.push_back(ArgList[i].V);
+ if (ArgList[i].Attrs != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
+ }
+
+ if (I != E)
+ return Error(CallLoc, "not enough parameters specified for call");
+
+ if (FnAttrs != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+ // Finish off the Attributes and check them
+ AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());
+
+ CallInst *CI = CallInst::Create(Callee, Args.begin(), Args.end());
+ CI->setTailCall(isTail);
+ CI->setCallingConv(CC);
+ CI->setAttributes(PAL);
+ Inst = CI;
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Memory Instructions.
+//===----------------------------------------------------------------------===//
+
+/// ParseAlloc
+/// ::= 'malloc' Type (',' TypeAndValue)? (',' OptionalAlignment)?
+/// ::= 'alloca' Type (',' TypeAndValue)? (',' OptionalAlignment)?
+bool LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS,
+ unsigned Opc) {
+ PATypeHolder Ty(Type::VoidTy);
+ Value *Size = 0;
+ LocTy SizeLoc = 0;
+ unsigned Alignment = 0;
+ if (ParseType(Ty)) return true;
+
+ if (EatIfPresent(lltok::comma)) {
+ if (Lex.getKind() == lltok::kw_align) {
+ if (ParseOptionalAlignment(Alignment)) return true;
+ } else if (ParseTypeAndValue(Size, SizeLoc, PFS) ||
+ ParseOptionalCommaAlignment(Alignment)) {
+ return true;
+ }
+ }
+
+ if (Size && Size->getType() != Type::Int32Ty)
+ return Error(SizeLoc, "element count must be i32");
+
+ if (Opc == Instruction::Malloc)
+ Inst = new MallocInst(Ty, Size, Alignment);
+ else
+ Inst = new AllocaInst(Ty, Size, Alignment);
+ return false;
+}
+
+/// ParseFree
+/// ::= 'free' TypeAndValue
+bool LLParser::ParseFree(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Val; LocTy Loc;
+ if (ParseTypeAndValue(Val, Loc, PFS)) return true;
+ if (!isa<PointerType>(Val->getType()))
+ return Error(Loc, "operand to free must be a pointer");
+ Inst = new FreeInst(Val);
+ return false;
+}
+
+/// ParseLoad
+/// ::= 'volatile'? 'load' TypeAndValue (',' 'align' uint)?
+bool LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
+ bool isVolatile) {
+ Value *Val; LocTy Loc;
+ unsigned Alignment;
+ if (ParseTypeAndValue(Val, Loc, PFS) ||
+ ParseOptionalCommaAlignment(Alignment))
+ return true;
+
+ if (!isa<PointerType>(Val->getType()) ||
+ !cast<PointerType>(Val->getType())->getElementType()->isFirstClassType())
+ return Error(Loc, "load operand must be a pointer to a first class type");
+
+ Inst = new LoadInst(Val, "", isVolatile, Alignment);
+ return false;
+}
+
+/// ParseStore
+/// ::= 'volatile'? 'store' TypeAndValue ',' TypeAndValue (',' 'align' uint)?
+bool LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
+ bool isVolatile) {
+ Value *Val, *Ptr; LocTy Loc, PtrLoc;
+ unsigned Alignment;
+ if (ParseTypeAndValue(Val, Loc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after store operand") ||
+ ParseTypeAndValue(Ptr, PtrLoc, PFS) ||
+ ParseOptionalCommaAlignment(Alignment))
+ return true;
+
+ if (!isa<PointerType>(Ptr->getType()))
+ return Error(PtrLoc, "store operand must be a pointer");
+ if (!Val->getType()->isFirstClassType())
+ return Error(Loc, "store operand must be a first class value");
+ if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType())
+ return Error(Loc, "stored value and pointer type do not match");
+
+ Inst = new StoreInst(Val, Ptr, isVolatile, Alignment);
+ return false;
+}
+
+/// ParseGetResult
+/// ::= 'getresult' TypeAndValue ',' uint
+/// FIXME: Remove support for getresult in LLVM 3.0
+bool LLParser::ParseGetResult(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Val; LocTy ValLoc, EltLoc;
+ unsigned Element;
+ if (ParseTypeAndValue(Val, ValLoc, PFS) ||
+ ParseToken(lltok::comma, "expected ',' after getresult operand") ||
+ ParseUInt32(Element, EltLoc))
+ return true;
+
+ if (!isa<StructType>(Val->getType()) && !isa<ArrayType>(Val->getType()))
+ return Error(ValLoc, "getresult inst requires an aggregate operand");
+ if (!ExtractValueInst::getIndexedType(Val->getType(), Element))
+ return Error(EltLoc, "invalid getresult index for value");
+ Inst = ExtractValueInst::Create(Val, Element);
+ return false;
+}
+
+/// ParseGetElementPtr
+/// ::= 'getelementptr' TypeAndValue (',' TypeAndValue)*
+bool LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Ptr, *Val; LocTy Loc, EltLoc;
+ if (ParseTypeAndValue(Ptr, Loc, PFS)) return true;
+
+ if (!isa<PointerType>(Ptr->getType()))
+ return Error(Loc, "base of getelementptr must be a pointer");
+
+ SmallVector<Value*, 16> Indices;
+ while (EatIfPresent(lltok::comma)) {
+ if (ParseTypeAndValue(Val, EltLoc, PFS)) return true;
+ if (!isa<IntegerType>(Val->getType()))
+ return Error(EltLoc, "getelementptr index must be an integer");
+ Indices.push_back(Val);
+ }
+
+ if (!GetElementPtrInst::getIndexedType(Ptr->getType(),
+ Indices.begin(), Indices.end()))
+ return Error(Loc, "invalid getelementptr indices");
+ Inst = GetElementPtrInst::Create(Ptr, Indices.begin(), Indices.end());
+ return false;
+}
+
+/// ParseExtractValue
+/// ::= 'extractvalue' TypeAndValue (',' uint32)+
+bool LLParser::ParseExtractValue(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Val; LocTy Loc;
+ SmallVector<unsigned, 4> Indices;
+ if (ParseTypeAndValue(Val, Loc, PFS) ||
+ ParseIndexList(Indices))
+ return true;
+
+ if (!isa<StructType>(Val->getType()) && !isa<ArrayType>(Val->getType()))
+ return Error(Loc, "extractvalue operand must be array or struct");
+
+ if (!ExtractValueInst::getIndexedType(Val->getType(), Indices.begin(),
+ Indices.end()))
+ return Error(Loc, "invalid indices for extractvalue");
+ Inst = ExtractValueInst::Create(Val, Indices.begin(), Indices.end());
+ return false;
+}
+
+/// ParseInsertValue
+/// ::= 'insertvalue' TypeAndValue ',' TypeAndValue (',' uint32)+
+bool LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *Val0, *Val1; LocTy Loc0, Loc1;
+ SmallVector<unsigned, 4> Indices;
+ if (ParseTypeAndValue(Val0, Loc0, PFS) ||
+ ParseToken(lltok::comma, "expected comma after insertvalue operand") ||
+ ParseTypeAndValue(Val1, Loc1, PFS) ||
+ ParseIndexList(Indices))
+ return true;
+
+ if (!isa<StructType>(Val0->getType()) && !isa<ArrayType>(Val0->getType()))
+ return Error(Loc0, "extractvalue operand must be array or struct");
+
+ if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(),
+ Indices.end()))
+ return Error(Loc0, "invalid indices for insertvalue");
+ Inst = InsertValueInst::Create(Val0, Val1, Indices.begin(), Indices.end());
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Embedded metadata.
+//===----------------------------------------------------------------------===//
+
+/// ParseMDNodeVector
+/// ::= Element (',' Element)*
+/// Element
+/// ::= 'null' | TypeAndValue
+bool LLParser::ParseMDNodeVector(SmallVectorImpl<Value*> &Elts) {
+ assert(Lex.getKind() == lltok::lbrace);
+ Lex.Lex();
+ do {
+ Value *V;
+ if (Lex.getKind() == lltok::kw_null) {
+ Lex.Lex();
+ V = 0;
+ } else {
+ Constant *C;
+ if (ParseGlobalTypeAndValue(C)) return true;
+ V = C;
+ }
+ Elts.push_back(V);
+ } while (EatIfPresent(lltok::comma));
+
+ return false;
+}
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
new file mode 100644
index 0000000..7106689
--- /dev/null
+++ b/lib/AsmParser/LLParser.h
@@ -0,0 +1,276 @@
+//===-- LLParser.h - Parser Class -------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the parser class for .ll files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ASMPARSER_LLPARSER_H
+#define LLVM_ASMPARSER_LLPARSER_H
+
+#include "LLLexer.h"
+#include "llvm/Type.h"
+#include <map>
+
+namespace llvm {
+ class Module;
+ class OpaqueType;
+ class Function;
+ class Value;
+ class BasicBlock;
+ class Instruction;
+ class Constant;
+ class GlobalValue;
+ class MDString;
+ class MDNode;
+ struct ValID;
+
+ class LLParser {
+ public:
+ typedef LLLexer::LocTy LocTy;
+ private:
+
+ LLLexer Lex;
+ Module *M;
+
+ // Type resolution handling data structures.
+ std::map<std::string, std::pair<PATypeHolder, LocTy> > ForwardRefTypes;
+ std::map<unsigned, std::pair<PATypeHolder, LocTy> > ForwardRefTypeIDs;
+ std::vector<PATypeHolder> NumberedTypes;
+
+ struct UpRefRecord {
+ /// Loc - This is the location of the upref.
+ LocTy Loc;
+
+ /// NestingLevel - The number of nesting levels that need to be popped
+ /// before this type is resolved.
+ unsigned NestingLevel;
+
+ /// LastContainedTy - This is the type at the current binding level for
+ /// the type. Every time we reduce the nesting level, this gets updated.
+ const Type *LastContainedTy;
+
+ /// UpRefTy - This is the actual opaque type that the upreference is
+ /// represented with.
+ OpaqueType *UpRefTy;
+
+ UpRefRecord(LocTy L, unsigned NL, OpaqueType *URTy)
+ : Loc(L), NestingLevel(NL), LastContainedTy((Type*)URTy),
+ UpRefTy(URTy) {}
+ };
+ std::vector<UpRefRecord> UpRefs;
+
+ // Global Value reference information.
+ std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals;
+ std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs;
+ std::vector<GlobalValue*> NumberedVals;
+ public:
+ LLParser(MemoryBuffer *F, ParseError &Err, Module *m) : Lex(F, Err), M(m) {}
+ bool Run();
+
+ private:
+
+ bool Error(LocTy L, const std::string &Msg) const {
+ return Lex.Error(L, Msg);
+ }
+ bool TokError(const std::string &Msg) const {
+ return Error(Lex.getLoc(), Msg);
+ }
+
+ /// GetGlobalVal - Get a value with the specified name or ID, creating a
+ /// forward reference record if needed. This can return null if the value
+ /// exists but does not have the right type.
+ GlobalValue *GetGlobalVal(const std::string &N, const Type *Ty, LocTy Loc);
+ GlobalValue *GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc);
+
+ // Helper Routines.
+ bool ParseToken(lltok::Kind T, const char *ErrMsg);
+ bool EatIfPresent(lltok::Kind T) {
+ if (Lex.getKind() != T) return false;
+ Lex.Lex();
+ return true;
+ }
+ bool ParseOptionalToken(lltok::Kind T, bool &Present) {
+ if (Lex.getKind() != T) {
+ Present = false;
+ } else {
+ Lex.Lex();
+ Present = true;
+ }
+ return false;
+ }
+ bool ParseStringConstant(std::string &Result);
+ bool ParseUInt32(unsigned &Val);
+ bool ParseUInt32(unsigned &Val, LocTy &Loc) {
+ Loc = Lex.getLoc();
+ return ParseUInt32(Val);
+ }
+ bool ParseOptionalAddrSpace(unsigned &AddrSpace);
+ bool ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind);
+ bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage);
+ bool ParseOptionalLinkage(unsigned &Linkage) {
+ bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage);
+ }
+ bool ParseOptionalVisibility(unsigned &Visibility);
+ bool ParseOptionalCallingConv(unsigned &CC);
+ bool ParseOptionalAlignment(unsigned &Alignment);
+ bool ParseOptionalCommaAlignment(unsigned &Alignment);
+ bool ParseIndexList(SmallVectorImpl<unsigned> &Indices);
+
+ // Top-Level Entities
+ bool ParseTopLevelEntities();
+ bool ValidateEndOfModule();
+ bool ParseTargetDefinition();
+ bool ParseDepLibs();
+ bool ParseModuleAsm();
+ bool ParseUnnamedType();
+ bool ParseNamedType();
+ bool ParseDeclare();
+ bool ParseDefine();
+
+ bool ParseGlobalType(bool &IsConstant);
+ bool ParseNamedGlobal();
+ bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage,
+ bool HasLinkage, unsigned Visibility);
+ bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility);
+
+ // Type Parsing.
+ bool ParseType(PATypeHolder &Result, bool AllowVoid = false);
+ bool ParseType(PATypeHolder &Result, LocTy &Loc, bool AllowVoid = false) {
+ Loc = Lex.getLoc();
+ return ParseType(Result, AllowVoid);
+ }
+ bool ParseTypeRec(PATypeHolder &H);
+ bool ParseStructType(PATypeHolder &H, bool Packed);
+ bool ParseArrayVectorType(PATypeHolder &H, bool isVector);
+ bool ParseFunctionType(PATypeHolder &Result);
+ PATypeHolder HandleUpRefs(const Type *Ty);
+
+ // Constants.
+ bool ParseValID(ValID &ID);
+ bool ConvertGlobalValIDToValue(const Type *Ty, ValID &ID, Constant *&V);
+ bool ParseGlobalValue(const Type *Ty, Constant *&V);
+ bool ParseGlobalTypeAndValue(Constant *&V);
+ bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts);
+ bool ParseMDNodeVector(SmallVectorImpl<Value*> &);
+
+
+ // Function Semantic Analysis.
+ class PerFunctionState {
+ LLParser &P;
+ Function &F;
+ std::map<std::string, std::pair<Value*, LocTy> > ForwardRefVals;
+ std::map<unsigned, std::pair<Value*, LocTy> > ForwardRefValIDs;
+ std::vector<Value*> NumberedVals;
+ public:
+ PerFunctionState(LLParser &p, Function &f);
+ ~PerFunctionState();
+
+ Function &getFunction() const { return F; }
+
+ bool VerifyFunctionComplete();
+
+ /// GetVal - Get a value with the specified name or ID, creating a
+ /// forward reference record if needed. This can return null if the value
+ /// exists but does not have the right type.
+ Value *GetVal(const std::string &Name, const Type *Ty, LocTy Loc);
+ Value *GetVal(unsigned ID, const Type *Ty, LocTy Loc);
+
+ /// SetInstName - After an instruction is parsed and inserted into its
+ /// basic block, this installs its name.
+ bool SetInstName(int NameID, const std::string &NameStr, LocTy NameLoc,
+ Instruction *Inst);
+
+ /// GetBB - Get a basic block with the specified name or ID, creating a
+ /// forward reference record if needed. This can return null if the value
+ /// is not a BasicBlock.
+ BasicBlock *GetBB(const std::string &Name, LocTy Loc);
+ BasicBlock *GetBB(unsigned ID, LocTy Loc);
+
+ /// DefineBB - Define the specified basic block, which is either named or
+ /// unnamed. If there is an error, this returns null otherwise it returns
+ /// the block being defined.
+ BasicBlock *DefineBB(const std::string &Name, LocTy Loc);
+ };
+
+ bool ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
+ PerFunctionState &PFS);
+
+ bool ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS);
+ bool ParseValue(const Type *Ty, Value *&V, LocTy &Loc,
+ PerFunctionState &PFS) {
+ Loc = Lex.getLoc();
+ return ParseValue(Ty, V, PFS);
+ }
+
+ bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS);
+ bool ParseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) {
+ Loc = Lex.getLoc();
+ return ParseTypeAndValue(V, PFS);
+ }
+
+ struct ParamInfo {
+ LocTy Loc;
+ Value *V;
+ unsigned Attrs;
+ ParamInfo(LocTy loc, Value *v, unsigned attrs)
+ : Loc(loc), V(v), Attrs(attrs) {}
+ };
+ bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
+ PerFunctionState &PFS);
+
+ // Function Parsing.
+ struct ArgInfo {
+ LocTy Loc;
+ PATypeHolder Type;
+ unsigned Attrs;
+ std::string Name;
+ ArgInfo(LocTy L, PATypeHolder Ty, unsigned Attr, const std::string &N)
+ : Loc(L), Type(Ty), Attrs(Attr), Name(N) {}
+ };
+ bool ParseArgumentList(std::vector<ArgInfo> &ArgList,
+ bool &isVarArg, bool inType);
+ bool ParseFunctionHeader(Function *&Fn, bool isDefine);
+ bool ParseFunctionBody(Function &Fn);
+ bool ParseBasicBlock(PerFunctionState &PFS);
+
+ // Instruction Parsing.
+ bool ParseInstruction(Instruction *&Inst, BasicBlock *BB,
+ PerFunctionState &PFS);
+ bool ParseCmpPredicate(unsigned &Pred, unsigned Opc);
+
+ bool ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
+ bool ParseBr(Instruction *&Inst, PerFunctionState &PFS);
+ bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS);
+ bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS);
+
+ bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc,
+ unsigned OperandType);
+ bool ParseLogical(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
+ bool ParseCompare(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
+ bool ParseCast(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
+ bool ParseSelect(Instruction *&I, PerFunctionState &PFS);
+ bool ParseVA_Arg(Instruction *&I, PerFunctionState &PFS);
+ bool ParseExtractElement(Instruction *&I, PerFunctionState &PFS);
+ bool ParseInsertElement(Instruction *&I, PerFunctionState &PFS);
+ bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS);
+ bool ParsePHI(Instruction *&I, PerFunctionState &PFS);
+ bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail);
+ bool ParseAlloc(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
+ bool ParseFree(Instruction *&I, PerFunctionState &PFS);
+ bool ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
+ bool ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
+ bool ParseGetResult(Instruction *&I, PerFunctionState &PFS);
+ bool ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS);
+ bool ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
+ bool ParseInsertValue(Instruction *&I, PerFunctionState &PFS);
+ };
+} // End llvm namespace
+
+#endif
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
new file mode 100644
index 0000000..d8bd38a
--- /dev/null
+++ b/lib/AsmParser/LLToken.h
@@ -0,0 +1,130 @@
+//===- LLToken.h - Token Codes for LLVM Assembly Files ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the enums for the .ll lexer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIBS_ASMPARSER_LLTOKEN_H
+#define LIBS_ASMPARSER_LLTOKEN_H
+
+namespace llvm {
+namespace lltok {
+ enum Kind {
+ // Markers
+ Eof, Error,
+
+ // Tokens with no info.
+ dotdotdot, // ...
+ equal, comma, // = ,
+ star, // *
+ lsquare, rsquare, // [ ]
+ lbrace, rbrace, // { }
+ less, greater, // < >
+ lparen, rparen, // ( )
+ backslash, // \ (not /)
+
+ kw_x,
+ kw_begin, kw_end,
+ kw_true, kw_false,
+ kw_declare, kw_define,
+ kw_global, kw_constant,
+
+ kw_private, kw_internal, kw_linkonce, kw_linkonce_odr, kw_weak, kw_weak_odr,
+ kw_appending, kw_dllimport, kw_dllexport, kw_common,kw_available_externally,
+ kw_default, kw_hidden, kw_protected,
+ kw_extern_weak,
+ kw_external, kw_thread_local,
+ kw_zeroinitializer,
+ kw_undef, kw_null,
+ kw_to,
+ kw_tail,
+ kw_target,
+ kw_triple,
+ kw_deplibs,
+ kw_datalayout,
+ kw_volatile,
+ kw_align,
+ kw_addrspace,
+ kw_section,
+ kw_alias,
+ kw_module,
+ kw_asm,
+ kw_sideeffect,
+ kw_gc,
+ kw_c,
+
+ kw_cc, kw_ccc, kw_fastcc, kw_coldcc, kw_x86_stdcallcc, kw_x86_fastcallcc,
+
+ kw_signext,
+ kw_zeroext,
+ kw_inreg,
+ kw_sret,
+ kw_nounwind,
+ kw_noreturn,
+ kw_noalias,
+ kw_nocapture,
+ kw_byval,
+ kw_nest,
+ kw_readnone,
+ kw_readonly,
+
+ kw_noinline,
+ kw_alwaysinline,
+ kw_optsize,
+ kw_ssp,
+ kw_sspreq,
+
+ kw_type,
+ kw_opaque,
+
+ kw_eq, kw_ne, kw_slt, kw_sgt, kw_sle, kw_sge, kw_ult, kw_ugt, kw_ule,
+ kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord, kw_uno,
+ kw_ueq, kw_une,
+
+ // Instruction Opcodes (Opcode in UIntVal).
+ kw_add, kw_sub, kw_mul, kw_udiv, kw_sdiv, kw_fdiv,
+ kw_urem, kw_srem, kw_frem, kw_shl, kw_lshr, kw_ashr,
+ kw_and, kw_or, kw_xor, kw_icmp, kw_fcmp, kw_vicmp, kw_vfcmp,
+
+ kw_phi, kw_call,
+ kw_trunc, kw_zext, kw_sext, kw_fptrunc, kw_fpext, kw_uitofp, kw_sitofp,
+ kw_fptoui, kw_fptosi, kw_inttoptr, kw_ptrtoint, kw_bitcast,
+ kw_select, kw_va_arg,
+
+ kw_ret, kw_br, kw_switch, kw_invoke, kw_unwind, kw_unreachable,
+
+ kw_malloc, kw_alloca, kw_free, kw_load, kw_store, kw_getelementptr,
+
+ kw_extractelement, kw_insertelement, kw_shufflevector, kw_getresult,
+ kw_extractvalue, kw_insertvalue,
+
+ // Unsigned Valued tokens (UIntVal).
+ GlobalID, // @42
+ LocalVarID, // %42
+
+ // String valued tokens (StrVal).
+ LabelStr, // foo:
+ GlobalVar, // @foo @"foo"
+ LocalVar, // %foo %"foo"
+ StringConstant, // "foo"
+
+ // Metadata valued tokens.
+ Metadata, // !"foo" !{i8 42}
+
+ // Type valued tokens (TyVal).
+ Type,
+
+ APFloat, // APFloatVal
+ APSInt // APSInt
+ };
+} // end namespace lltok
+} // end namespace llvm
+
+#endif
diff --git a/lib/AsmParser/Makefile b/lib/AsmParser/Makefile
new file mode 100644
index 0000000..995bb0e
--- /dev/null
+++ b/lib/AsmParser/Makefile
@@ -0,0 +1,14 @@
+##===- lib/AsmParser/Makefile ------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME := LLVMAsmParser
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp
new file mode 100644
index 0000000..759e00e
--- /dev/null
+++ b/lib/AsmParser/Parser.cpp
@@ -0,0 +1,87 @@
+//===- Parser.cpp - Main dispatch module for the Parser library -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library implements the functionality defined in llvm/Assembly/Parser.h
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Assembly/Parser.h"
+#include "LLParser.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstring>
+using namespace llvm;
+
+Module *llvm::ParseAssemblyFile(const std::string &Filename, ParseError &Err) {
+ Err.setFilename(Filename);
+
+ std::string ErrorStr;
+ OwningPtr<MemoryBuffer>
+ F(MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrorStr));
+ if (F == 0) {
+ Err.setError("Could not open input file '" + Filename + "'");
+ return 0;
+ }
+
+ OwningPtr<Module> M(new Module(Filename));
+ if (LLParser(F.get(), Err, M.get()).Run())
+ return 0;
+ return M.take();
+}
+
+Module *llvm::ParseAssemblyString(const char *AsmString, Module *M,
+ ParseError &Err) {
+ Err.setFilename("<string>");
+
+ OwningPtr<MemoryBuffer>
+ F(MemoryBuffer::getMemBuffer(AsmString, AsmString+strlen(AsmString),
+ "<string>"));
+
+ // If we are parsing into an existing module, do it.
+ if (M)
+ return LLParser(F.get(), Err, M).Run() ? 0 : M;
+
+ // Otherwise create a new module.
+ OwningPtr<Module> M2(new Module("<string>"));
+ if (LLParser(F.get(), Err, M2.get()).Run())
+ return 0;
+ return M2.take();
+}
+
+
+//===------------------------------------------------------------------------===
+// ParseError Class
+//===------------------------------------------------------------------------===
+
+void ParseError::PrintError(const char *ProgName, raw_ostream &S) {
+ errs() << ProgName << ": ";
+ if (Filename == "-")
+ errs() << "<stdin>";
+ else
+ errs() << Filename;
+
+ if (LineNo != -1) {
+ errs() << ':' << LineNo;
+ if (ColumnNo != -1)
+ errs() << ':' << (ColumnNo+1);
+ }
+
+ errs() << ": " << Message << '\n';
+
+ if (LineNo != -1 && ColumnNo != -1) {
+ errs() << LineContents << '\n';
+
+ // Print out spaces/tabs before the caret.
+ for (unsigned i = 0; i != unsigned(ColumnNo); ++i)
+ errs() << (LineContents[i] == '\t' ? '\t' : ' ');
+ errs() << "^\n";
+ }
+}
diff --git a/lib/Bitcode/Makefile b/lib/Bitcode/Makefile
new file mode 100644
index 0000000..2d6b5ad
--- /dev/null
+++ b/lib/Bitcode/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Bitcode/Makefile --------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+PARALLEL_DIRS = Reader Writer
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp
new file mode 100644
index 0000000..52851cd
--- /dev/null
+++ b/lib/Bitcode/Reader/BitReader.cpp
@@ -0,0 +1,51 @@
+//===-- BitReader.cpp -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/BitReader.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <string>
+#include <cstring>
+
+using namespace llvm;
+
+/* Builds a module from the bitcode in the specified memory buffer, returning a
+ reference to the module via the OutModule parameter. Returns 0 on success.
+ Optionally returns a human-readable error message via OutMessage. */
+int LLVMParseBitcode(LLVMMemoryBufferRef MemBuf,
+ LLVMModuleRef *OutModule, char **OutMessage) {
+ std::string Message;
+
+ *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), &Message));
+ if (!*OutModule) {
+ if (OutMessage)
+ *OutMessage = strdup(Message.c_str());
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Reads a module from the specified path, returning via the OutModule parameter
+ a module provider which performs lazy deserialization. Returns 0 on success.
+ Optionally returns a human-readable error message via OutMessage. */
+int LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
+ LLVMModuleProviderRef *OutMP,
+ char **OutMessage) {
+ std::string Message;
+
+ *OutMP = wrap(getBitcodeModuleProvider(unwrap(MemBuf), &Message));
+ if (!*OutMP) {
+ if (OutMessage)
+ *OutMessage = strdup(Message.c_str());
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
new file mode 100644
index 0000000..1dad04b
--- /dev/null
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -0,0 +1,2126 @@
+//===- BitcodeReader.cpp - Internal BitcodeReader implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the BitcodeReader class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "BitcodeReader.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/MDNode.h"
+#include "llvm/Module.h"
+#include "llvm/AutoUpgrade.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/OperandTraits.h"
+using namespace llvm;
+
+void BitcodeReader::FreeState() {
+ delete Buffer;
+ Buffer = 0;
+ std::vector<PATypeHolder>().swap(TypeList);
+ ValueList.clear();
+
+ std::vector<AttrListPtr>().swap(MAttributes);
+ std::vector<BasicBlock*>().swap(FunctionBBs);
+ std::vector<Function*>().swap(FunctionsWithBodies);
+ DeferredFunctionInfo.clear();
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions to implement forward reference resolution, etc.
+//===----------------------------------------------------------------------===//
+
+/// ConvertToString - Convert a string from a record into an std::string, return
+/// true on failure.
+template<typename StrTy>
+static bool ConvertToString(SmallVector<uint64_t, 64> &Record, unsigned Idx,
+ StrTy &Result) {
+ if (Idx > Record.size())
+ return true;
+
+ for (unsigned i = Idx, e = Record.size(); i != e; ++i)
+ Result += (char)Record[i];
+ return false;
+}
+
+static GlobalValue::LinkageTypes GetDecodedLinkage(unsigned Val) {
+ switch (Val) {
+ default: // Map unknown/new linkages to external
+ case 0: return GlobalValue::ExternalLinkage;
+ case 1: return GlobalValue::WeakAnyLinkage;
+ case 2: return GlobalValue::AppendingLinkage;
+ case 3: return GlobalValue::InternalLinkage;
+ case 4: return GlobalValue::LinkOnceAnyLinkage;
+ case 5: return GlobalValue::DLLImportLinkage;
+ case 6: return GlobalValue::DLLExportLinkage;
+ case 7: return GlobalValue::ExternalWeakLinkage;
+ case 8: return GlobalValue::CommonLinkage;
+ case 9: return GlobalValue::PrivateLinkage;
+ case 10: return GlobalValue::WeakODRLinkage;
+ case 11: return GlobalValue::LinkOnceODRLinkage;
+ case 12: return GlobalValue::AvailableExternallyLinkage;
+ }
+}
+
+static GlobalValue::VisibilityTypes GetDecodedVisibility(unsigned Val) {
+ switch (Val) {
+ default: // Map unknown visibilities to default.
+ case 0: return GlobalValue::DefaultVisibility;
+ case 1: return GlobalValue::HiddenVisibility;
+ case 2: return GlobalValue::ProtectedVisibility;
+ }
+}
+
+static int GetDecodedCastOpcode(unsigned Val) {
+ switch (Val) {
+ default: return -1;
+ case bitc::CAST_TRUNC : return Instruction::Trunc;
+ case bitc::CAST_ZEXT : return Instruction::ZExt;
+ case bitc::CAST_SEXT : return Instruction::SExt;
+ case bitc::CAST_FPTOUI : return Instruction::FPToUI;
+ case bitc::CAST_FPTOSI : return Instruction::FPToSI;
+ case bitc::CAST_UITOFP : return Instruction::UIToFP;
+ case bitc::CAST_SITOFP : return Instruction::SIToFP;
+ case bitc::CAST_FPTRUNC : return Instruction::FPTrunc;
+ case bitc::CAST_FPEXT : return Instruction::FPExt;
+ case bitc::CAST_PTRTOINT: return Instruction::PtrToInt;
+ case bitc::CAST_INTTOPTR: return Instruction::IntToPtr;
+ case bitc::CAST_BITCAST : return Instruction::BitCast;
+ }
+}
+static int GetDecodedBinaryOpcode(unsigned Val, const Type *Ty) {
+ switch (Val) {
+ default: return -1;
+ case bitc::BINOP_ADD: return Instruction::Add;
+ case bitc::BINOP_SUB: return Instruction::Sub;
+ case bitc::BINOP_MUL: return Instruction::Mul;
+ case bitc::BINOP_UDIV: return Instruction::UDiv;
+ case bitc::BINOP_SDIV:
+ return Ty->isFPOrFPVector() ? Instruction::FDiv : Instruction::SDiv;
+ case bitc::BINOP_UREM: return Instruction::URem;
+ case bitc::BINOP_SREM:
+ return Ty->isFPOrFPVector() ? Instruction::FRem : Instruction::SRem;
+ case bitc::BINOP_SHL: return Instruction::Shl;
+ case bitc::BINOP_LSHR: return Instruction::LShr;
+ case bitc::BINOP_ASHR: return Instruction::AShr;
+ case bitc::BINOP_AND: return Instruction::And;
+ case bitc::BINOP_OR: return Instruction::Or;
+ case bitc::BINOP_XOR: return Instruction::Xor;
+ }
+}
+
+namespace llvm {
+namespace {
+ /// @brief A class for maintaining the slot number definition
+ /// as a placeholder for the actual definition for forward constants defs.
+ class ConstantPlaceHolder : public ConstantExpr {
+ ConstantPlaceHolder(); // DO NOT IMPLEMENT
+ void operator=(const ConstantPlaceHolder &); // DO NOT IMPLEMENT
+ public:
+ // allocate space for exactly one operand
+ void *operator new(size_t s) {
+ return User::operator new(s, 1);
+ }
+ explicit ConstantPlaceHolder(const Type *Ty)
+ : ConstantExpr(Ty, Instruction::UserOp1, &Op<0>(), 1) {
+ Op<0>() = UndefValue::get(Type::Int32Ty);
+ }
+
+ /// @brief Methods to support type inquiry through isa, cast, and dyn_cast.
+ static inline bool classof(const ConstantPlaceHolder *) { return true; }
+ static bool classof(const Value *V) {
+ return isa<ConstantExpr>(V) &&
+ cast<ConstantExpr>(V)->getOpcode() == Instruction::UserOp1;
+ }
+
+
+ /// Provide fast operand accessors
+ //DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+ };
+}
+
+// FIXME: can we inherit this from ConstantExpr?
+template <>
+struct OperandTraits<ConstantPlaceHolder> : FixedNumOperandTraits<1> {
+};
+}
+
+
+void BitcodeReaderValueList::AssignValue(Value *V, unsigned Idx) {
+ if (Idx == size()) {
+ push_back(V);
+ return;
+ }
+
+ if (Idx >= size())
+ resize(Idx+1);
+
+ WeakVH &OldV = ValuePtrs[Idx];
+ if (OldV == 0) {
+ OldV = V;
+ return;
+ }
+
+ // Handle constants and non-constants (e.g. instrs) differently for
+ // efficiency.
+ if (Constant *PHC = dyn_cast<Constant>(&*OldV)) {
+ ResolveConstants.push_back(std::make_pair(PHC, Idx));
+ OldV = V;
+ } else {
+ // If there was a forward reference to this value, replace it.
+ Value *PrevVal = OldV;
+ OldV->replaceAllUsesWith(V);
+ delete PrevVal;
+ }
+}
+
+
+Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx,
+ const Type *Ty) {
+ if (Idx >= size())
+ resize(Idx + 1);
+
+ if (Value *V = ValuePtrs[Idx]) {
+ assert(Ty == V->getType() && "Type mismatch in constant table!");
+ return cast<Constant>(V);
+ }
+
+ // Create and return a placeholder, which will later be RAUW'd.
+ Constant *C = new ConstantPlaceHolder(Ty);
+ ValuePtrs[Idx] = C;
+ return C;
+}
+
+Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, const Type *Ty) {
+ if (Idx >= size())
+ resize(Idx + 1);
+
+ if (Value *V = ValuePtrs[Idx]) {
+ assert((Ty == 0 || Ty == V->getType()) && "Type mismatch in value table!");
+ return V;
+ }
+
+ // No type specified, must be invalid reference.
+ if (Ty == 0) return 0;
+
+ // Create and return a placeholder, which will later be RAUW'd.
+ Value *V = new Argument(Ty);
+ ValuePtrs[Idx] = V;
+ return V;
+}
+
+/// ResolveConstantForwardRefs - Once all constants are read, this method bulk
+/// resolves any forward references. The idea behind this is that we sometimes
+/// get constants (such as large arrays) which reference *many* forward ref
+/// constants. Replacing each of these causes a lot of thrashing when
+/// building/reuniquing the constant. Instead of doing this, we look at all the
+/// uses and rewrite all the place holders at once for any constant that uses
+/// a placeholder.
+void BitcodeReaderValueList::ResolveConstantForwardRefs() {
+ // Sort the values by-pointer so that they are efficient to look up with a
+ // binary search.
+ std::sort(ResolveConstants.begin(), ResolveConstants.end());
+
+ SmallVector<Constant*, 64> NewOps;
+
+ while (!ResolveConstants.empty()) {
+ Value *RealVal = operator[](ResolveConstants.back().second);
+ Constant *Placeholder = ResolveConstants.back().first;
+ ResolveConstants.pop_back();
+
+ // Loop over all users of the placeholder, updating them to reference the
+ // new value. If they reference more than one placeholder, update them all
+ // at once.
+ while (!Placeholder->use_empty()) {
+ Value::use_iterator UI = Placeholder->use_begin();
+
+ // If the using object isn't uniqued, just update the operands. This
+ // handles instructions and initializers for global variables.
+ if (!isa<Constant>(*UI) || isa<GlobalValue>(*UI)) {
+ UI.getUse().set(RealVal);
+ continue;
+ }
+
+ // Otherwise, we have a constant that uses the placeholder. Replace that
+ // constant with a new constant that has *all* placeholder uses updated.
+ Constant *UserC = cast<Constant>(*UI);
+ for (User::op_iterator I = UserC->op_begin(), E = UserC->op_end();
+ I != E; ++I) {
+ Value *NewOp;
+ if (!isa<ConstantPlaceHolder>(*I)) {
+ // Not a placeholder reference.
+ NewOp = *I;
+ } else if (*I == Placeholder) {
+ // Common case is that it just references this one placeholder.
+ NewOp = RealVal;
+ } else {
+ // Otherwise, look up the placeholder in ResolveConstants.
+ ResolveConstantsTy::iterator It =
+ std::lower_bound(ResolveConstants.begin(), ResolveConstants.end(),
+ std::pair<Constant*, unsigned>(cast<Constant>(*I),
+ 0));
+ assert(It != ResolveConstants.end() && It->first == *I);
+ NewOp = operator[](It->second);
+ }
+
+ NewOps.push_back(cast<Constant>(NewOp));
+ }
+
+ // Make the new constant.
+ Constant *NewC;
+ if (ConstantArray *UserCA = dyn_cast<ConstantArray>(UserC)) {
+ NewC = ConstantArray::get(UserCA->getType(), &NewOps[0], NewOps.size());
+ } else if (ConstantStruct *UserCS = dyn_cast<ConstantStruct>(UserC)) {
+ NewC = ConstantStruct::get(&NewOps[0], NewOps.size(),
+ UserCS->getType()->isPacked());
+ } else if (isa<ConstantVector>(UserC)) {
+ NewC = ConstantVector::get(&NewOps[0], NewOps.size());
+ } else {
+ assert(isa<ConstantExpr>(UserC) && "Must be a ConstantExpr.");
+ NewC = cast<ConstantExpr>(UserC)->getWithOperands(&NewOps[0],
+ NewOps.size());
+ }
+
+ UserC->replaceAllUsesWith(NewC);
+ UserC->destroyConstant();
+ NewOps.clear();
+ }
+
+ // Update all ValueHandles, they should be the only users at this point.
+ Placeholder->replaceAllUsesWith(RealVal);
+ delete Placeholder;
+ }
+}
+
+
+const Type *BitcodeReader::getTypeByID(unsigned ID, bool isTypeTable) {
+ // If the TypeID is in range, return it.
+ if (ID < TypeList.size())
+ return TypeList[ID].get();
+ if (!isTypeTable) return 0;
+
+ // The type table allows forward references. Push as many Opaque types as
+ // needed to get up to ID.
+ while (TypeList.size() <= ID)
+ TypeList.push_back(OpaqueType::get());
+ return TypeList.back().get();
+}
+
+//===----------------------------------------------------------------------===//
+// Functions for parsing blocks from the bitcode file
+//===----------------------------------------------------------------------===//
+
+bool BitcodeReader::ParseAttributeBlock() {
+ if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID))
+ return Error("Malformed block record");
+
+ if (!MAttributes.empty())
+ return Error("Multiple PARAMATTR blocks found!");
+
+ SmallVector<uint64_t, 64> Record;
+
+ SmallVector<AttributeWithIndex, 8> Attrs;
+
+ // Read all the records.
+ while (1) {
+ unsigned Code = Stream.ReadCode();
+ if (Code == bitc::END_BLOCK) {
+ if (Stream.ReadBlockEnd())
+ return Error("Error at end of PARAMATTR block");
+ return false;
+ }
+
+ if (Code == bitc::ENTER_SUBBLOCK) {
+ // No known subblocks, always skip them.
+ Stream.ReadSubBlockID();
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ continue;
+ }
+
+ if (Code == bitc::DEFINE_ABBREV) {
+ Stream.ReadAbbrevRecord();
+ continue;
+ }
+
+ // Read a record.
+ Record.clear();
+ switch (Stream.ReadRecord(Code, Record)) {
+ default: // Default behavior: ignore.
+ break;
+ case bitc::PARAMATTR_CODE_ENTRY: { // ENTRY: [paramidx0, attr0, ...]
+ if (Record.size() & 1)
+ return Error("Invalid ENTRY record");
+
+ // FIXME : Remove this autoupgrade code in LLVM 3.0.
+ // If Function attributes are using index 0 then transfer them
+ // to index ~0. Index 0 is used for return value attributes but used to be
+ // used for function attributes.
+ Attributes RetAttribute = Attribute::None;
+ Attributes FnAttribute = Attribute::None;
+ for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
+ // FIXME: remove in LLVM 3.0
+ // The alignment is stored as a 16-bit raw value from bits 31--16.
+ // We shift the bits above 31 down by 11 bits.
+
+ unsigned Alignment = (Record[i+1] & (0xffffull << 16)) >> 16;
+ if (Alignment && !isPowerOf2_32(Alignment))
+ return Error("Alignment is not a power of two.");
+
+ Attributes ReconstitutedAttr = Record[i+1] & 0xffff;
+ if (Alignment)
+ ReconstitutedAttr |= Attribute::constructAlignmentFromInt(Alignment);
+ ReconstitutedAttr |= (Record[i+1] & (0xffffull << 32)) >> 11;
+ Record[i+1] = ReconstitutedAttr;
+
+ if (Record[i] == 0)
+ RetAttribute = Record[i+1];
+ else if (Record[i] == ~0U)
+ FnAttribute = Record[i+1];
+ }
+
+ unsigned OldRetAttrs = (Attribute::NoUnwind|Attribute::NoReturn|
+ Attribute::ReadOnly|Attribute::ReadNone);
+
+ if (FnAttribute == Attribute::None && RetAttribute != Attribute::None &&
+ (RetAttribute & OldRetAttrs) != 0) {
+ if (FnAttribute == Attribute::None) { // add a slot so they get added.
+ Record.push_back(~0U);
+ Record.push_back(0);
+ }
+
+ FnAttribute |= RetAttribute & OldRetAttrs;
+ RetAttribute &= ~OldRetAttrs;
+ }
+
+ for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
+ if (Record[i] == 0) {
+ if (RetAttribute != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(0, RetAttribute));
+ } else if (Record[i] == ~0U) {
+ if (FnAttribute != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(~0U, FnAttribute));
+ } else if (Record[i+1] != Attribute::None)
+ Attrs.push_back(AttributeWithIndex::get(Record[i], Record[i+1]));
+ }
+
+ MAttributes.push_back(AttrListPtr::get(Attrs.begin(), Attrs.end()));
+ Attrs.clear();
+ break;
+ }
+ }
+ }
+}
+
+
+bool BitcodeReader::ParseTypeTable() {
+ if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID))
+ return Error("Malformed block record");
+
+ if (!TypeList.empty())
+ return Error("Multiple TYPE_BLOCKs found!");
+
+ SmallVector<uint64_t, 64> Record;
+ unsigned NumRecords = 0;
+
+ // Read all the records for this type table.
+ while (1) {
+ unsigned Code = Stream.ReadCode();
+ if (Code == bitc::END_BLOCK) {
+ if (NumRecords != TypeList.size())
+ return Error("Invalid type forward reference in TYPE_BLOCK");
+ if (Stream.ReadBlockEnd())
+ return Error("Error at end of type table block");
+ return false;
+ }
+
+ if (Code == bitc::ENTER_SUBBLOCK) {
+ // No known subblocks, always skip them.
+ Stream.ReadSubBlockID();
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ continue;
+ }
+
+ if (Code == bitc::DEFINE_ABBREV) {
+ Stream.ReadAbbrevRecord();
+ continue;
+ }
+
+ // Read a record.
+ Record.clear();
+ const Type *ResultTy = 0;
+ switch (Stream.ReadRecord(Code, Record)) {
+ default: // Default behavior: unknown type.
+ ResultTy = 0;
+ break;
+ case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
+ // TYPE_CODE_NUMENTRY contains a count of the number of types in the
+ // type list. This allows us to reserve space.
+ if (Record.size() < 1)
+ return Error("Invalid TYPE_CODE_NUMENTRY record");
+ TypeList.reserve(Record[0]);
+ continue;
+ case bitc::TYPE_CODE_VOID: // VOID
+ ResultTy = Type::VoidTy;
+ break;
+ case bitc::TYPE_CODE_FLOAT: // FLOAT
+ ResultTy = Type::FloatTy;
+ break;
+ case bitc::TYPE_CODE_DOUBLE: // DOUBLE
+ ResultTy = Type::DoubleTy;
+ break;
+ case bitc::TYPE_CODE_X86_FP80: // X86_FP80
+ ResultTy = Type::X86_FP80Ty;
+ break;
+ case bitc::TYPE_CODE_FP128: // FP128
+ ResultTy = Type::FP128Ty;
+ break;
+ case bitc::TYPE_CODE_PPC_FP128: // PPC_FP128
+ ResultTy = Type::PPC_FP128Ty;
+ break;
+ case bitc::TYPE_CODE_LABEL: // LABEL
+ ResultTy = Type::LabelTy;
+ break;
+ case bitc::TYPE_CODE_OPAQUE: // OPAQUE
+ ResultTy = 0;
+ break;
+ case bitc::TYPE_CODE_METADATA: // METADATA
+ ResultTy = Type::MetadataTy;
+ break;
+ case bitc::TYPE_CODE_INTEGER: // INTEGER: [width]
+ if (Record.size() < 1)
+ return Error("Invalid Integer type record");
+
+ ResultTy = IntegerType::get(Record[0]);
+ break;
+ case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or
+ // [pointee type, address space]
+ if (Record.size() < 1)
+ return Error("Invalid POINTER type record");
+ unsigned AddressSpace = 0;
+ if (Record.size() == 2)
+ AddressSpace = Record[1];
+ ResultTy = PointerType::get(getTypeByID(Record[0], true), AddressSpace);
+ break;
+ }
+ case bitc::TYPE_CODE_FUNCTION: {
+ // FIXME: attrid is dead, remove it in LLVM 3.0
+ // FUNCTION: [vararg, attrid, retty, paramty x N]
+ if (Record.size() < 3)
+ return Error("Invalid FUNCTION type record");
+ std::vector<const Type*> ArgTys;
+ for (unsigned i = 3, e = Record.size(); i != e; ++i)
+ ArgTys.push_back(getTypeByID(Record[i], true));
+
+ ResultTy = FunctionType::get(getTypeByID(Record[2], true), ArgTys,
+ Record[0]);
+ break;
+ }
+ case bitc::TYPE_CODE_STRUCT: { // STRUCT: [ispacked, eltty x N]
+ if (Record.size() < 1)
+ return Error("Invalid STRUCT type record");
+ std::vector<const Type*> EltTys;
+ for (unsigned i = 1, e = Record.size(); i != e; ++i)
+ EltTys.push_back(getTypeByID(Record[i], true));
+ ResultTy = StructType::get(EltTys, Record[0]);
+ break;
+ }
+ case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty]
+ if (Record.size() < 2)
+ return Error("Invalid ARRAY type record");
+ ResultTy = ArrayType::get(getTypeByID(Record[1], true), Record[0]);
+ break;
+ case bitc::TYPE_CODE_VECTOR: // VECTOR: [numelts, eltty]
+ if (Record.size() < 2)
+ return Error("Invalid VECTOR type record");
+ ResultTy = VectorType::get(getTypeByID(Record[1], true), Record[0]);
+ break;
+ }
+
+ if (NumRecords == TypeList.size()) {
+ // If this is a new type slot, just append it.
+ TypeList.push_back(ResultTy ? ResultTy : OpaqueType::get());
+ ++NumRecords;
+ } else if (ResultTy == 0) {
+ // Otherwise, this was forward referenced, so an opaque type was created,
+ // but the result type is actually just an opaque. Leave the one we
+ // created previously.
+ ++NumRecords;
+ } else {
+ // Otherwise, this was forward referenced, so an opaque type was created.
+ // Resolve the opaque type to the real type now.
+ assert(NumRecords < TypeList.size() && "Typelist imbalance");
+ const OpaqueType *OldTy = cast<OpaqueType>(TypeList[NumRecords++].get());
+
+ // Don't directly push the new type on the Tab. Instead we want to replace
+ // the opaque type we previously inserted with the new concrete value. The
+ // refinement from the abstract (opaque) type to the new type causes all
+ // uses of the abstract type to use the concrete type (NewTy). This will
+ // also cause the opaque type to be deleted.
+ const_cast<OpaqueType*>(OldTy)->refineAbstractTypeTo(ResultTy);
+
+ // This should have replaced the old opaque type with the new type in the
+ // value table... or with a preexisting type that was already in the
+ // system. Let's just make sure it did.
+ assert(TypeList[NumRecords-1].get() != OldTy &&
+ "refineAbstractType didn't work!");
+ }
+ }
+}
+
+
+bool BitcodeReader::ParseTypeSymbolTable() {
+ if (Stream.EnterSubBlock(bitc::TYPE_SYMTAB_BLOCK_ID))
+ return Error("Malformed block record");
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Read all the records for this type table.
+ std::string TypeName;
+ while (1) {
+ unsigned Code = Stream.ReadCode();
+ if (Code == bitc::END_BLOCK) {
+ if (Stream.ReadBlockEnd())
+ return Error("Error at end of type symbol table block");
+ return false;
+ }
+
+ if (Code == bitc::ENTER_SUBBLOCK) {
+ // No known subblocks, always skip them.
+ Stream.ReadSubBlockID();
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ continue;
+ }
+
+ if (Code == bitc::DEFINE_ABBREV) {
+ Stream.ReadAbbrevRecord();
+ continue;
+ }
+
+ // Read a record.
+ Record.clear();
+ switch (Stream.ReadRecord(Code, Record)) {
+ default: // Default behavior: unknown type.
+ break;
+ case bitc::TST_CODE_ENTRY: // TST_ENTRY: [typeid, namechar x N]
+ if (ConvertToString(Record, 1, TypeName))
+ return Error("Invalid TST_ENTRY record");
+ unsigned TypeID = Record[0];
+ if (TypeID >= TypeList.size())
+ return Error("Invalid Type ID in TST_ENTRY record");
+
+ TheModule->addTypeName(TypeName, TypeList[TypeID].get());
+ TypeName.clear();
+ break;
+ }
+ }
+}
+
+bool BitcodeReader::ParseValueSymbolTable() {
+ if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
+ return Error("Malformed block record");
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Read all the records for this value table.
+ SmallString<128> ValueName;
+ while (1) {
+ unsigned Code = Stream.ReadCode();
+ if (Code == bitc::END_BLOCK) {
+ if (Stream.ReadBlockEnd())
+ return Error("Error at end of value symbol table block");
+ return false;
+ }
+ if (Code == bitc::ENTER_SUBBLOCK) {
+ // No known subblocks, always skip them.
+ Stream.ReadSubBlockID();
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ continue;
+ }
+
+ if (Code == bitc::DEFINE_ABBREV) {
+ Stream.ReadAbbrevRecord();
+ continue;
+ }
+
+ // Read a record.
+ Record.clear();
+ switch (Stream.ReadRecord(Code, Record)) {
+ default: // Default behavior: unknown type.
+ break;
+ case bitc::VST_CODE_ENTRY: { // VST_ENTRY: [valueid, namechar x N]
+ if (ConvertToString(Record, 1, ValueName))
+ return Error("Invalid VST_ENTRY record");
+ unsigned ValueID = Record[0];
+ if (ValueID >= ValueList.size())
+ return Error("Invalid Value ID in VST_ENTRY record");
+ Value *V = ValueList[ValueID];
+
+ V->setName(&ValueName[0], ValueName.size());
+ ValueName.clear();
+ break;
+ }
+ case bitc::VST_CODE_BBENTRY: {
+ if (ConvertToString(Record, 1, ValueName))
+ return Error("Invalid VST_BBENTRY record");
+ BasicBlock *BB = getBasicBlock(Record[0]);
+ if (BB == 0)
+ return Error("Invalid BB ID in VST_BBENTRY record");
+
+ BB->setName(&ValueName[0], ValueName.size());
+ ValueName.clear();
+ break;
+ }
+ }
+ }
+}
+
+/// DecodeSignRotatedValue - Decode a signed value stored with the sign bit in
+/// the LSB for dense VBR encoding.
+static uint64_t DecodeSignRotatedValue(uint64_t V) {
+ if ((V & 1) == 0)
+ return V >> 1;
+ if (V != 1)
+ return -(V >> 1);
+ // There is no such thing as -0 with integers. "-0" really means MININT.
+ return 1ULL << 63;
+}
+
+/// ResolveGlobalAndAliasInits - Resolve all of the initializers for global
+/// values and aliases that we can.
+bool BitcodeReader::ResolveGlobalAndAliasInits() {
+ std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInitWorklist;
+ std::vector<std::pair<GlobalAlias*, unsigned> > AliasInitWorklist;
+
+ GlobalInitWorklist.swap(GlobalInits);
+ AliasInitWorklist.swap(AliasInits);
+
+ while (!GlobalInitWorklist.empty()) {
+ unsigned ValID = GlobalInitWorklist.back().second;
+ if (ValID >= ValueList.size()) {
+ // Not ready to resolve this yet, it requires something later in the file.
+ GlobalInits.push_back(GlobalInitWorklist.back());
+ } else {
+ if (Constant *C = dyn_cast<Constant>(ValueList[ValID]))
+ GlobalInitWorklist.back().first->setInitializer(C);
+ else
+ return Error("Global variable initializer is not a constant!");
+ }
+ GlobalInitWorklist.pop_back();
+ }
+
+ while (!AliasInitWorklist.empty()) {
+ unsigned ValID = AliasInitWorklist.back().second;
+ if (ValID >= ValueList.size()) {
+ AliasInits.push_back(AliasInitWorklist.back());
+ } else {
+ if (Constant *C = dyn_cast<Constant>(ValueList[ValID]))
+ AliasInitWorklist.back().first->setAliasee(C);
+ else
+ return Error("Alias initializer is not a constant!");
+ }
+ AliasInitWorklist.pop_back();
+ }
+ return false;
+}
+
+
+bool BitcodeReader::ParseConstants() {
+ if (Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID))
+ return Error("Malformed block record");
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Read all the records for this value table.
+ const Type *CurTy = Type::Int32Ty;
+ unsigned NextCstNo = ValueList.size();
+ while (1) {
+ unsigned Code = Stream.ReadCode();
+ if (Code == bitc::END_BLOCK)
+ break;
+
+ if (Code == bitc::ENTER_SUBBLOCK) {
+ // No known subblocks, always skip them.
+ Stream.ReadSubBlockID();
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ continue;
+ }
+
+ if (Code == bitc::DEFINE_ABBREV) {
+ Stream.ReadAbbrevRecord();
+ continue;
+ }
+
+ // Read a record.
+ Record.clear();
+ Value *V = 0;
+ switch (Stream.ReadRecord(Code, Record)) {
+ default: // Default behavior: unknown constant
+ case bitc::CST_CODE_UNDEF: // UNDEF
+ V = UndefValue::get(CurTy);
+ break;
+ case bitc::CST_CODE_SETTYPE: // SETTYPE: [typeid]
+ if (Record.empty())
+ return Error("Malformed CST_SETTYPE record");
+ if (Record[0] >= TypeList.size())
+ return Error("Invalid Type ID in CST_SETTYPE record");
+ CurTy = TypeList[Record[0]];
+ continue; // Skip the ValueList manipulation.
+ case bitc::CST_CODE_NULL: // NULL
+ V = Constant::getNullValue(CurTy);
+ break;
+ case bitc::CST_CODE_INTEGER: // INTEGER: [intval]
+ if (!isa<IntegerType>(CurTy) || Record.empty())
+ return Error("Invalid CST_INTEGER record");
+ V = ConstantInt::get(CurTy, DecodeSignRotatedValue(Record[0]));
+ break;
+ case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval]
+ if (!isa<IntegerType>(CurTy) || Record.empty())
+ return Error("Invalid WIDE_INTEGER record");
+
+ unsigned NumWords = Record.size();
+ SmallVector<uint64_t, 8> Words;
+ Words.resize(NumWords);
+ for (unsigned i = 0; i != NumWords; ++i)
+ Words[i] = DecodeSignRotatedValue(Record[i]);
+ V = ConstantInt::get(APInt(cast<IntegerType>(CurTy)->getBitWidth(),
+ NumWords, &Words[0]));
+ break;
+ }
+ case bitc::CST_CODE_FLOAT: { // FLOAT: [fpval]
+ if (Record.empty())
+ return Error("Invalid FLOAT record");
+ if (CurTy == Type::FloatTy)
+ V = ConstantFP::get(APFloat(APInt(32, (uint32_t)Record[0])));
+ else if (CurTy == Type::DoubleTy)
+ V = ConstantFP::get(APFloat(APInt(64, Record[0])));
+ else if (CurTy == Type::X86_FP80Ty) {
+ // Bits are not stored the same way as a normal i80 APInt, compensate.
+ uint64_t Rearrange[2];
+ Rearrange[0] = (Record[1] & 0xffffLL) | (Record[0] << 16);
+ Rearrange[1] = Record[0] >> 48;
+ V = ConstantFP::get(APFloat(APInt(80, 2, Rearrange)));
+ } else if (CurTy == Type::FP128Ty)
+ V = ConstantFP::get(APFloat(APInt(128, 2, &Record[0]), true));
+ else if (CurTy == Type::PPC_FP128Ty)
+ V = ConstantFP::get(APFloat(APInt(128, 2, &Record[0])));
+ else
+ V = UndefValue::get(CurTy);
+ break;
+ }
+
+ case bitc::CST_CODE_AGGREGATE: {// AGGREGATE: [n x value number]
+ if (Record.empty())
+ return Error("Invalid CST_AGGREGATE record");
+
+ unsigned Size = Record.size();
+ std::vector<Constant*> Elts;
+
+ if (const StructType *STy = dyn_cast<StructType>(CurTy)) {
+ for (unsigned i = 0; i != Size; ++i)
+ Elts.push_back(ValueList.getConstantFwdRef(Record[i],
+ STy->getElementType(i)));
+ V = ConstantStruct::get(STy, Elts);
+ } else if (const ArrayType *ATy = dyn_cast<ArrayType>(CurTy)) {
+ const Type *EltTy = ATy->getElementType();
+ for (unsigned i = 0; i != Size; ++i)
+ Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy));
+ V = ConstantArray::get(ATy, Elts);
+ } else if (const VectorType *VTy = dyn_cast<VectorType>(CurTy)) {
+ const Type *EltTy = VTy->getElementType();
+ for (unsigned i = 0; i != Size; ++i)
+ Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy));
+ V = ConstantVector::get(Elts);
+ } else {
+ V = UndefValue::get(CurTy);
+ }
+ break;
+ }
+ case bitc::CST_CODE_STRING: { // STRING: [values]
+ if (Record.empty())
+ return Error("Invalid CST_AGGREGATE record");
+
+ const ArrayType *ATy = cast<ArrayType>(CurTy);
+ const Type *EltTy = ATy->getElementType();
+
+ unsigned Size = Record.size();
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0; i != Size; ++i)
+ Elts.push_back(ConstantInt::get(EltTy, Record[i]));
+ V = ConstantArray::get(ATy, Elts);
+ break;
+ }
+ case bitc::CST_CODE_CSTRING: { // CSTRING: [values]
+ if (Record.empty())
+ return Error("Invalid CST_AGGREGATE record");
+
+ const ArrayType *ATy = cast<ArrayType>(CurTy);
+ const Type *EltTy = ATy->getElementType();
+
+ unsigned Size = Record.size();
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0; i != Size; ++i)
+ Elts.push_back(ConstantInt::get(EltTy, Record[i]));
+ Elts.push_back(Constant::getNullValue(EltTy));
+ V = ConstantArray::get(ATy, Elts);
+ break;
+ }
+ case bitc::CST_CODE_CE_BINOP: { // CE_BINOP: [opcode, opval, opval]
+ if (Record.size() < 3) return Error("Invalid CE_BINOP record");
+ int Opc = GetDecodedBinaryOpcode(Record[0], CurTy);
+ if (Opc < 0) {
+ V = UndefValue::get(CurTy); // Unknown binop.
+ } else {
+ Constant *LHS = ValueList.getConstantFwdRef(Record[1], CurTy);
+ Constant *RHS = ValueList.getConstantFwdRef(Record[2], CurTy);
+ V = ConstantExpr::get(Opc, LHS, RHS);
+ }
+ break;
+ }
+ case bitc::CST_CODE_CE_CAST: { // CE_CAST: [opcode, opty, opval]
+ if (Record.size() < 3) return Error("Invalid CE_CAST record");
+ int Opc = GetDecodedCastOpcode(Record[0]);
+ if (Opc < 0) {
+ V = UndefValue::get(CurTy); // Unknown cast.
+ } else {
+ const Type *OpTy = getTypeByID(Record[1]);
+ if (!OpTy) return Error("Invalid CE_CAST record");
+ Constant *Op = ValueList.getConstantFwdRef(Record[2], OpTy);
+ V = ConstantExpr::getCast(Opc, Op, CurTy);
+ }
+ break;
+ }
+ case bitc::CST_CODE_CE_GEP: { // CE_GEP: [n x operands]
+ if (Record.size() & 1) return Error("Invalid CE_GEP record");
+ SmallVector<Constant*, 16> Elts;
+ for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
+ const Type *ElTy = getTypeByID(Record[i]);
+ if (!ElTy) return Error("Invalid CE_GEP record");
+ Elts.push_back(ValueList.getConstantFwdRef(Record[i+1], ElTy));
+ }
+ V = ConstantExpr::getGetElementPtr(Elts[0], &Elts[1], Elts.size()-1);
+ break;
+ }
+ case bitc::CST_CODE_CE_SELECT: // CE_SELECT: [opval#, opval#, opval#]
+ if (Record.size() < 3) return Error("Invalid CE_SELECT record");
+ V = ConstantExpr::getSelect(ValueList.getConstantFwdRef(Record[0],
+ Type::Int1Ty),
+ ValueList.getConstantFwdRef(Record[1],CurTy),
+ ValueList.getConstantFwdRef(Record[2],CurTy));
+ break;
+ case bitc::CST_CODE_CE_EXTRACTELT: { // CE_EXTRACTELT: [opty, opval, opval]
+ if (Record.size() < 3) return Error("Invalid CE_EXTRACTELT record");
+ const VectorType *OpTy =
+ dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
+ if (OpTy == 0) return Error("Invalid CE_EXTRACTELT record");
+ Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
+ Constant *Op1 = ValueList.getConstantFwdRef(Record[2], Type::Int32Ty);
+ V = ConstantExpr::getExtractElement(Op0, Op1);
+ break;
+ }
+ case bitc::CST_CODE_CE_INSERTELT: { // CE_INSERTELT: [opval, opval, opval]
+ const VectorType *OpTy = dyn_cast<VectorType>(CurTy);
+ if (Record.size() < 3 || OpTy == 0)
+ return Error("Invalid CE_INSERTELT record");
+ Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
+ Constant *Op1 = ValueList.getConstantFwdRef(Record[1],
+ OpTy->getElementType());
+ Constant *Op2 = ValueList.getConstantFwdRef(Record[2], Type::Int32Ty);
+ V = ConstantExpr::getInsertElement(Op0, Op1, Op2);
+ break;
+ }
+ case bitc::CST_CODE_CE_SHUFFLEVEC: { // CE_SHUFFLEVEC: [opval, opval, opval]
+ const VectorType *OpTy = dyn_cast<VectorType>(CurTy);
+ if (Record.size() < 3 || OpTy == 0)
+ return Error("Invalid CE_SHUFFLEVEC record");
+ Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
+ Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy);
+ const Type *ShufTy=VectorType::get(Type::Int32Ty, OpTy->getNumElements());
+ Constant *Op2 = ValueList.getConstantFwdRef(Record[2], ShufTy);
+ V = ConstantExpr::getShuffleVector(Op0, Op1, Op2);
+ break;
+ }
+ case bitc::CST_CODE_CE_SHUFVEC_EX: { // [opty, opval, opval, opval]
+ const VectorType *RTy = dyn_cast<VectorType>(CurTy);
+ const VectorType *OpTy = dyn_cast<VectorType>(getTypeByID(Record[0]));
+ if (Record.size() < 4 || RTy == 0 || OpTy == 0)
+ return Error("Invalid CE_SHUFVEC_EX record");
+ Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
+ Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
+ const Type *ShufTy=VectorType::get(Type::Int32Ty, RTy->getNumElements());
+ Constant *Op2 = ValueList.getConstantFwdRef(Record[3], ShufTy);
+ V = ConstantExpr::getShuffleVector(Op0, Op1, Op2);
+ break;
+ }
+ case bitc::CST_CODE_CE_CMP: { // CE_CMP: [opty, opval, opval, pred]
+ if (Record.size() < 4) return Error("Invalid CE_CMP record");
+ const Type *OpTy = getTypeByID(Record[0]);
+ if (OpTy == 0) return Error("Invalid CE_CMP record");
+ Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
+ Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
+
+ if (OpTy->isFloatingPoint())
+ V = ConstantExpr::getFCmp(Record[3], Op0, Op1);
+ else if (!isa<VectorType>(OpTy))
+ V = ConstantExpr::getICmp(Record[3], Op0, Op1);
+ else if (OpTy->isFPOrFPVector())
+ V = ConstantExpr::getVFCmp(Record[3], Op0, Op1);
+ else
+ V = ConstantExpr::getVICmp(Record[3], Op0, Op1);
+ break;
+ }
+ case bitc::CST_CODE_INLINEASM: {
+ if (Record.size() < 2) return Error("Invalid INLINEASM record");
+ std::string AsmStr, ConstrStr;
+ bool HasSideEffects = Record[0];
+ unsigned AsmStrSize = Record[1];
+ if (2+AsmStrSize >= Record.size())
+ return Error("Invalid INLINEASM record");
+ unsigned ConstStrSize = Record[2+AsmStrSize];
+ if (3+AsmStrSize+ConstStrSize > Record.size())
+ return Error("Invalid INLINEASM record");
+
+ for (unsigned i = 0; i != AsmStrSize; ++i)
+ AsmStr += (char)Record[2+i];
+ for (unsigned i = 0; i != ConstStrSize; ++i)
+ ConstrStr += (char)Record[3+AsmStrSize+i];
+ const PointerType *PTy = cast<PointerType>(CurTy);
+ V = InlineAsm::get(cast<FunctionType>(PTy->getElementType()),
+ AsmStr, ConstrStr, HasSideEffects);
+ break;
+ }
+ case bitc::CST_CODE_MDSTRING: {
+ if (Record.size() < 2) return Error("Invalid MDSTRING record");
+ unsigned MDStringLength = Record.size();
+ SmallString<8> String;
+ String.resize(MDStringLength);
+ for (unsigned i = 0; i != MDStringLength; ++i)
+ String[i] = Record[i];
+ V = MDString::get(String.c_str(), String.c_str() + MDStringLength);
+ break;
+ }
+ case bitc::CST_CODE_MDNODE: {
+ if (Record.empty() || Record.size() % 2 == 1)
+ return Error("Invalid CST_MDNODE record");
+
+ unsigned Size = Record.size();
+ SmallVector<Value*, 8> Elts;
+ for (unsigned i = 0; i != Size; i += 2) {
+ const Type *Ty = getTypeByID(Record[i], false);
+ if (Ty != Type::VoidTy)
+ Elts.push_back(ValueList.getValueFwdRef(Record[i+1], Ty));
+ else
+ Elts.push_back(NULL);
+ }
+ V = MDNode::get(&Elts[0], Elts.size());
+ break;
+ }
+ }
+
+ ValueList.AssignValue(V, NextCstNo);
+ ++NextCstNo;
+ }
+
+ if (NextCstNo != ValueList.size())
+ return Error("Invalid constant reference!");
+
+ if (Stream.ReadBlockEnd())
+ return Error("Error at end of constants block");
+
+ // Once all the constants have been read, go through and resolve forward
+ // references.
+ ValueList.ResolveConstantForwardRefs();
+ return false;
+}
+
+/// RememberAndSkipFunctionBody - When we see the block for a function body,
+/// remember where it is and then skip it. This lets us lazily deserialize the
+/// functions.
+bool BitcodeReader::RememberAndSkipFunctionBody() {
+ // Get the function we are talking about.
+ if (FunctionsWithBodies.empty())
+ return Error("Insufficient function protos");
+
+ Function *Fn = FunctionsWithBodies.back();
+ FunctionsWithBodies.pop_back();
+
+ // Save the current stream state.
+ uint64_t CurBit = Stream.GetCurrentBitNo();
+ DeferredFunctionInfo[Fn] = std::make_pair(CurBit, Fn->getLinkage());
+
+ // Set the functions linkage to GhostLinkage so we know it is lazily
+ // deserialized.
+ Fn->setLinkage(GlobalValue::GhostLinkage);
+
+ // Skip over the function block for now.
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ return false;
+}
+
+bool BitcodeReader::ParseModule(const std::string &ModuleID) {
+ // Reject multiple MODULE_BLOCK's in a single bitstream.
+ if (TheModule)
+ return Error("Multiple MODULE_BLOCKs in same stream");
+
+ if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+ return Error("Malformed block record");
+
+ // Otherwise, create the module.
+ TheModule = new Module(ModuleID);
+
+ SmallVector<uint64_t, 64> Record;
+ std::vector<std::string> SectionTable;
+ std::vector<std::string> GCTable;
+
+ // Read all the records for this module.
+ while (!Stream.AtEndOfStream()) {
+ unsigned Code = Stream.ReadCode();
+ if (Code == bitc::END_BLOCK) {
+ if (Stream.ReadBlockEnd())
+ return Error("Error at end of module block");
+
+ // Patch the initializers for globals and aliases up.
+ ResolveGlobalAndAliasInits();
+ if (!GlobalInits.empty() || !AliasInits.empty())
+ return Error("Malformed global initializer set");
+ if (!FunctionsWithBodies.empty())
+ return Error("Too few function bodies found");
+
+ // Look for intrinsic functions which need to be upgraded at some point
+ for (Module::iterator FI = TheModule->begin(), FE = TheModule->end();
+ FI != FE; ++FI) {
+ Function* NewFn;
+ if (UpgradeIntrinsicFunction(FI, NewFn))
+ UpgradedIntrinsics.push_back(std::make_pair(FI, NewFn));
+ }
+
+ // Force deallocation of memory for these vectors to favor the client that
+ // want lazy deserialization.
+ std::vector<std::pair<GlobalVariable*, unsigned> >().swap(GlobalInits);
+ std::vector<std::pair<GlobalAlias*, unsigned> >().swap(AliasInits);
+ std::vector<Function*>().swap(FunctionsWithBodies);
+ return false;
+ }
+
+ if (Code == bitc::ENTER_SUBBLOCK) {
+ switch (Stream.ReadSubBlockID()) {
+ default: // Skip unknown content.
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ break;
+ case bitc::BLOCKINFO_BLOCK_ID:
+ if (Stream.ReadBlockInfoBlock())
+ return Error("Malformed BlockInfoBlock");
+ break;
+ case bitc::PARAMATTR_BLOCK_ID:
+ if (ParseAttributeBlock())
+ return true;
+ break;
+ case bitc::TYPE_BLOCK_ID:
+ if (ParseTypeTable())
+ return true;
+ break;
+ case bitc::TYPE_SYMTAB_BLOCK_ID:
+ if (ParseTypeSymbolTable())
+ return true;
+ break;
+ case bitc::VALUE_SYMTAB_BLOCK_ID:
+ if (ParseValueSymbolTable())
+ return true;
+ break;
+ case bitc::CONSTANTS_BLOCK_ID:
+ if (ParseConstants() || ResolveGlobalAndAliasInits())
+ return true;
+ break;
+ case bitc::FUNCTION_BLOCK_ID:
+ // If this is the first function body we've seen, reverse the
+ // FunctionsWithBodies list.
+ if (!HasReversedFunctionsWithBodies) {
+ std::reverse(FunctionsWithBodies.begin(), FunctionsWithBodies.end());
+ HasReversedFunctionsWithBodies = true;
+ }
+
+ if (RememberAndSkipFunctionBody())
+ return true;
+ break;
+ }
+ continue;
+ }
+
+ if (Code == bitc::DEFINE_ABBREV) {
+ Stream.ReadAbbrevRecord();
+ continue;
+ }
+
+ // Read a record.
+ switch (Stream.ReadRecord(Code, Record)) {
+ default: break; // Default behavior, ignore unknown content.
+ case bitc::MODULE_CODE_VERSION: // VERSION: [version#]
+ if (Record.size() < 1)
+ return Error("Malformed MODULE_CODE_VERSION");
+ // Only version #0 is supported so far.
+ if (Record[0] != 0)
+ return Error("Unknown bitstream version!");
+ break;
+ case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N]
+ std::string S;
+ if (ConvertToString(Record, 0, S))
+ return Error("Invalid MODULE_CODE_TRIPLE record");
+ TheModule->setTargetTriple(S);
+ break;
+ }
+ case bitc::MODULE_CODE_DATALAYOUT: { // DATALAYOUT: [strchr x N]
+ std::string S;
+ if (ConvertToString(Record, 0, S))
+ return Error("Invalid MODULE_CODE_DATALAYOUT record");
+ TheModule->setDataLayout(S);
+ break;
+ }
+ case bitc::MODULE_CODE_ASM: { // ASM: [strchr x N]
+ std::string S;
+ if (ConvertToString(Record, 0, S))
+ return Error("Invalid MODULE_CODE_ASM record");
+ TheModule->setModuleInlineAsm(S);
+ break;
+ }
+ case bitc::MODULE_CODE_DEPLIB: { // DEPLIB: [strchr x N]
+ std::string S;
+ if (ConvertToString(Record, 0, S))
+ return Error("Invalid MODULE_CODE_DEPLIB record");
+ TheModule->addLibrary(S);
+ break;
+ }
+ case bitc::MODULE_CODE_SECTIONNAME: { // SECTIONNAME: [strchr x N]
+ std::string S;
+ if (ConvertToString(Record, 0, S))
+ return Error("Invalid MODULE_CODE_SECTIONNAME record");
+ SectionTable.push_back(S);
+ break;
+ }
+ case bitc::MODULE_CODE_GCNAME: { // SECTIONNAME: [strchr x N]
+ std::string S;
+ if (ConvertToString(Record, 0, S))
+ return Error("Invalid MODULE_CODE_GCNAME record");
+ GCTable.push_back(S);
+ break;
+ }
+ // GLOBALVAR: [pointer type, isconst, initid,
+ // linkage, alignment, section, visibility, threadlocal]
+ case bitc::MODULE_CODE_GLOBALVAR: {
+ if (Record.size() < 6)
+ return Error("Invalid MODULE_CODE_GLOBALVAR record");
+ const Type *Ty = getTypeByID(Record[0]);
+ if (!isa<PointerType>(Ty))
+ return Error("Global not a pointer type!");
+ unsigned AddressSpace = cast<PointerType>(Ty)->getAddressSpace();
+ Ty = cast<PointerType>(Ty)->getElementType();
+
+ bool isConstant = Record[1];
+ GlobalValue::LinkageTypes Linkage = GetDecodedLinkage(Record[3]);
+ unsigned Alignment = (1 << Record[4]) >> 1;
+ std::string Section;
+ if (Record[5]) {
+ if (Record[5]-1 >= SectionTable.size())
+ return Error("Invalid section ID");
+ Section = SectionTable[Record[5]-1];
+ }
+ GlobalValue::VisibilityTypes Visibility = GlobalValue::DefaultVisibility;
+ if (Record.size() > 6)
+ Visibility = GetDecodedVisibility(Record[6]);
+ bool isThreadLocal = false;
+ if (Record.size() > 7)
+ isThreadLocal = Record[7];
+
+ GlobalVariable *NewGV =
+ new GlobalVariable(Ty, isConstant, Linkage, 0, "", TheModule,
+ isThreadLocal, AddressSpace);
+ NewGV->setAlignment(Alignment);
+ if (!Section.empty())
+ NewGV->setSection(Section);
+ NewGV->setVisibility(Visibility);
+ NewGV->setThreadLocal(isThreadLocal);
+
+ ValueList.push_back(NewGV);
+
+ // Remember which value to use for the global initializer.
+ if (unsigned InitID = Record[2])
+ GlobalInits.push_back(std::make_pair(NewGV, InitID-1));
+ break;
+ }
+ // FUNCTION: [type, callingconv, isproto, linkage, paramattr,
+ // alignment, section, visibility, gc]
+ case bitc::MODULE_CODE_FUNCTION: {
+ if (Record.size() < 8)
+ return Error("Invalid MODULE_CODE_FUNCTION record");
+ const Type *Ty = getTypeByID(Record[0]);
+ if (!isa<PointerType>(Ty))
+ return Error("Function not a pointer type!");
+ const FunctionType *FTy =
+ dyn_cast<FunctionType>(cast<PointerType>(Ty)->getElementType());
+ if (!FTy)
+ return Error("Function not a pointer to function type!");
+
+ Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage,
+ "", TheModule);
+
+ Func->setCallingConv(Record[1]);
+ bool isProto = Record[2];
+ Func->setLinkage(GetDecodedLinkage(Record[3]));
+ Func->setAttributes(getAttributes(Record[4]));
+
+ Func->setAlignment((1 << Record[5]) >> 1);
+ if (Record[6]) {
+ if (Record[6]-1 >= SectionTable.size())
+ return Error("Invalid section ID");
+ Func->setSection(SectionTable[Record[6]-1]);
+ }
+ Func->setVisibility(GetDecodedVisibility(Record[7]));
+ if (Record.size() > 8 && Record[8]) {
+ if (Record[8]-1 > GCTable.size())
+ return Error("Invalid GC ID");
+ Func->setGC(GCTable[Record[8]-1].c_str());
+ }
+ ValueList.push_back(Func);
+
+ // If this is a function with a body, remember the prototype we are
+ // creating now, so that we can match up the body with them later.
+ if (!isProto)
+ FunctionsWithBodies.push_back(Func);
+ break;
+ }
+ // ALIAS: [alias type, aliasee val#, linkage]
+ // ALIAS: [alias type, aliasee val#, linkage, visibility]
+ case bitc::MODULE_CODE_ALIAS: {
+ if (Record.size() < 3)
+ return Error("Invalid MODULE_ALIAS record");
+ const Type *Ty = getTypeByID(Record[0]);
+ if (!isa<PointerType>(Ty))
+ return Error("Function not a pointer type!");
+
+ GlobalAlias *NewGA = new GlobalAlias(Ty, GetDecodedLinkage(Record[2]),
+ "", 0, TheModule);
+ // Old bitcode files didn't have visibility field.
+ if (Record.size() > 3)
+ NewGA->setVisibility(GetDecodedVisibility(Record[3]));
+ ValueList.push_back(NewGA);
+ AliasInits.push_back(std::make_pair(NewGA, Record[1]));
+ break;
+ }
+ /// MODULE_CODE_PURGEVALS: [numvals]
+ case bitc::MODULE_CODE_PURGEVALS:
+ // Trim down the value list to the specified size.
+ if (Record.size() < 1 || Record[0] > ValueList.size())
+ return Error("Invalid MODULE_PURGEVALS record");
+ ValueList.shrinkTo(Record[0]);
+ break;
+ }
+ Record.clear();
+ }
+
+ return Error("Premature end of bitstream");
+}
+
+bool BitcodeReader::ParseBitcode() {
+ TheModule = 0;
+
+ if (Buffer->getBufferSize() & 3)
+ return Error("Bitcode stream should be a multiple of 4 bytes in length");
+
+ unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
+ unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
+
+ // If we have a wrapper header, parse it and ignore the non-bc file contents.
+ // The magic number is 0x0B17C0DE stored in little endian.
+ if (isBitcodeWrapper(BufPtr, BufEnd))
+ if (SkipBitcodeWrapperHeader(BufPtr, BufEnd))
+ return Error("Invalid bitcode wrapper header");
+
+ StreamFile.init(BufPtr, BufEnd);
+ Stream.init(StreamFile);
+
+ // Sniff for the signature.
+ if (Stream.Read(8) != 'B' ||
+ Stream.Read(8) != 'C' ||
+ Stream.Read(4) != 0x0 ||
+ Stream.Read(4) != 0xC ||
+ Stream.Read(4) != 0xE ||
+ Stream.Read(4) != 0xD)
+ return Error("Invalid bitcode signature");
+
+ // We expect a number of well-defined blocks, though we don't necessarily
+ // need to understand them all.
+ while (!Stream.AtEndOfStream()) {
+ unsigned Code = Stream.ReadCode();
+
+ if (Code != bitc::ENTER_SUBBLOCK)
+ return Error("Invalid record at top-level");
+
+ unsigned BlockID = Stream.ReadSubBlockID();
+
+ // We only know the MODULE subblock ID.
+ switch (BlockID) {
+ case bitc::BLOCKINFO_BLOCK_ID:
+ if (Stream.ReadBlockInfoBlock())
+ return Error("Malformed BlockInfoBlock");
+ break;
+ case bitc::MODULE_BLOCK_ID:
+ if (ParseModule(Buffer->getBufferIdentifier()))
+ return true;
+ break;
+ default:
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ break;
+ }
+ }
+
+ return false;
+}
+
+
+/// ParseFunctionBody - Lazily parse the specified function body block.
+bool BitcodeReader::ParseFunctionBody(Function *F) {
+ if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
+ return Error("Malformed block record");
+
+ unsigned ModuleValueListSize = ValueList.size();
+
+ // Add all the function arguments to the value table.
+ for(Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
+ ValueList.push_back(I);
+
+ unsigned NextValueNo = ValueList.size();
+ BasicBlock *CurBB = 0;
+ unsigned CurBBNo = 0;
+
+ // Read all the records.
+ SmallVector<uint64_t, 64> Record;
+ while (1) {
+ unsigned Code = Stream.ReadCode();
+ if (Code == bitc::END_BLOCK) {
+ if (Stream.ReadBlockEnd())
+ return Error("Error at end of function block");
+ break;
+ }
+
+ if (Code == bitc::ENTER_SUBBLOCK) {
+ switch (Stream.ReadSubBlockID()) {
+ default: // Skip unknown content.
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ break;
+ case bitc::CONSTANTS_BLOCK_ID:
+ if (ParseConstants()) return true;
+ NextValueNo = ValueList.size();
+ break;
+ case bitc::VALUE_SYMTAB_BLOCK_ID:
+ if (ParseValueSymbolTable()) return true;
+ break;
+ }
+ continue;
+ }
+
+ if (Code == bitc::DEFINE_ABBREV) {
+ Stream.ReadAbbrevRecord();
+ continue;
+ }
+
+ // Read a record.
+ Record.clear();
+ Instruction *I = 0;
+ switch (Stream.ReadRecord(Code, Record)) {
+ default: // Default behavior: reject
+ return Error("Unknown instruction");
+ case bitc::FUNC_CODE_DECLAREBLOCKS: // DECLAREBLOCKS: [nblocks]
+ if (Record.size() < 1 || Record[0] == 0)
+ return Error("Invalid DECLAREBLOCKS record");
+ // Create all the basic blocks for the function.
+ FunctionBBs.resize(Record[0]);
+ for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i)
+ FunctionBBs[i] = BasicBlock::Create("", F);
+ CurBB = FunctionBBs[0];
+ continue;
+
+ case bitc::FUNC_CODE_INST_BINOP: { // BINOP: [opval, ty, opval, opcode]
+ unsigned OpNum = 0;
+ Value *LHS, *RHS;
+ if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
+ getValue(Record, OpNum, LHS->getType(), RHS) ||
+ OpNum+1 != Record.size())
+ return Error("Invalid BINOP record");
+
+ int Opc = GetDecodedBinaryOpcode(Record[OpNum], LHS->getType());
+ if (Opc == -1) return Error("Invalid BINOP record");
+ I = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_CAST: { // CAST: [opval, opty, destty, castopc]
+ unsigned OpNum = 0;
+ Value *Op;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
+ OpNum+2 != Record.size())
+ return Error("Invalid CAST record");
+
+ const Type *ResTy = getTypeByID(Record[OpNum]);
+ int Opc = GetDecodedCastOpcode(Record[OpNum+1]);
+ if (Opc == -1 || ResTy == 0)
+ return Error("Invalid CAST record");
+ I = CastInst::Create((Instruction::CastOps)Opc, Op, ResTy);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_GEP: { // GEP: [n x operands]
+ unsigned OpNum = 0;
+ Value *BasePtr;
+ if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr))
+ return Error("Invalid GEP record");
+
+ SmallVector<Value*, 16> GEPIdx;
+ while (OpNum != Record.size()) {
+ Value *Op;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ return Error("Invalid GEP record");
+ GEPIdx.push_back(Op);
+ }
+
+ I = GetElementPtrInst::Create(BasePtr, GEPIdx.begin(), GEPIdx.end());
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_EXTRACTVAL: {
+ // EXTRACTVAL: [opty, opval, n x indices]
+ unsigned OpNum = 0;
+ Value *Agg;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
+ return Error("Invalid EXTRACTVAL record");
+
+ SmallVector<unsigned, 4> EXTRACTVALIdx;
+ for (unsigned RecSize = Record.size();
+ OpNum != RecSize; ++OpNum) {
+ uint64_t Index = Record[OpNum];
+ if ((unsigned)Index != Index)
+ return Error("Invalid EXTRACTVAL index");
+ EXTRACTVALIdx.push_back((unsigned)Index);
+ }
+
+ I = ExtractValueInst::Create(Agg,
+ EXTRACTVALIdx.begin(), EXTRACTVALIdx.end());
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_INSERTVAL: {
+ // INSERTVAL: [opty, opval, opty, opval, n x indices]
+ unsigned OpNum = 0;
+ Value *Agg;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
+ return Error("Invalid INSERTVAL record");
+ Value *Val;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Val))
+ return Error("Invalid INSERTVAL record");
+
+ SmallVector<unsigned, 4> INSERTVALIdx;
+ for (unsigned RecSize = Record.size();
+ OpNum != RecSize; ++OpNum) {
+ uint64_t Index = Record[OpNum];
+ if ((unsigned)Index != Index)
+ return Error("Invalid INSERTVAL index");
+ INSERTVALIdx.push_back((unsigned)Index);
+ }
+
+ I = InsertValueInst::Create(Agg, Val,
+ INSERTVALIdx.begin(), INSERTVALIdx.end());
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_SELECT: { // SELECT: [opval, ty, opval, opval]
+ // obsolete form of select
+ // handles select i1 ... in old bitcode
+ unsigned OpNum = 0;
+ Value *TrueVal, *FalseVal, *Cond;
+ if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
+ getValue(Record, OpNum, TrueVal->getType(), FalseVal) ||
+ getValue(Record, OpNum, Type::Int1Ty, Cond))
+ return Error("Invalid SELECT record");
+
+ I = SelectInst::Create(Cond, TrueVal, FalseVal);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_VSELECT: {// VSELECT: [ty,opval,opval,predty,pred]
+ // new form of select
+ // handles select i1 or select [N x i1]
+ unsigned OpNum = 0;
+ Value *TrueVal, *FalseVal, *Cond;
+ if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
+ getValue(Record, OpNum, TrueVal->getType(), FalseVal) ||
+ getValueTypePair(Record, OpNum, NextValueNo, Cond))
+ return Error("Invalid SELECT record");
+
+ // select condition can be either i1 or [N x i1]
+ if (const VectorType* vector_type =
+ dyn_cast<const VectorType>(Cond->getType())) {
+ // expect <n x i1>
+ if (vector_type->getElementType() != Type::Int1Ty)
+ return Error("Invalid SELECT condition type");
+ } else {
+ // expect i1
+ if (Cond->getType() != Type::Int1Ty)
+ return Error("Invalid SELECT condition type");
+ }
+
+ I = SelectInst::Create(Cond, TrueVal, FalseVal);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_EXTRACTELT: { // EXTRACTELT: [opty, opval, opval]
+ unsigned OpNum = 0;
+ Value *Vec, *Idx;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
+ getValue(Record, OpNum, Type::Int32Ty, Idx))
+ return Error("Invalid EXTRACTELT record");
+ I = new ExtractElementInst(Vec, Idx);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_INSERTELT: { // INSERTELT: [ty, opval,opval,opval]
+ unsigned OpNum = 0;
+ Value *Vec, *Elt, *Idx;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
+ getValue(Record, OpNum,
+ cast<VectorType>(Vec->getType())->getElementType(), Elt) ||
+ getValue(Record, OpNum, Type::Int32Ty, Idx))
+ return Error("Invalid INSERTELT record");
+ I = InsertElementInst::Create(Vec, Elt, Idx);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_SHUFFLEVEC: {// SHUFFLEVEC: [opval,ty,opval,opval]
+ unsigned OpNum = 0;
+ Value *Vec1, *Vec2, *Mask;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Vec1) ||
+ getValue(Record, OpNum, Vec1->getType(), Vec2))
+ return Error("Invalid SHUFFLEVEC record");
+
+ if (getValueTypePair(Record, OpNum, NextValueNo, Mask))
+ return Error("Invalid SHUFFLEVEC record");
+ I = new ShuffleVectorInst(Vec1, Vec2, Mask);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_CMP: { // CMP: [opty, opval, opval, pred]
+ // VFCmp/VICmp
+ // or old form of ICmp/FCmp returning bool
+ unsigned OpNum = 0;
+ Value *LHS, *RHS;
+ if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
+ getValue(Record, OpNum, LHS->getType(), RHS) ||
+ OpNum+1 != Record.size())
+ return Error("Invalid CMP record");
+
+ if (LHS->getType()->isFloatingPoint())
+ I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS);
+ else if (!isa<VectorType>(LHS->getType()))
+ I = new ICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS);
+ else if (LHS->getType()->isFPOrFPVector())
+ I = new VFCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS);
+ else
+ I = new VICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_CMP2: { // CMP2: [opty, opval, opval, pred]
+ // Fcmp/ICmp returning bool or vector of bool
+ unsigned OpNum = 0;
+ Value *LHS, *RHS;
+ if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
+ getValue(Record, OpNum, LHS->getType(), RHS) ||
+ OpNum+1 != Record.size())
+ return Error("Invalid CMP2 record");
+
+ if (LHS->getType()->isFPOrFPVector())
+ I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS);
+ else
+ I = new ICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_GETRESULT: { // GETRESULT: [ty, val, n]
+ if (Record.size() != 2)
+ return Error("Invalid GETRESULT record");
+ unsigned OpNum = 0;
+ Value *Op;
+ getValueTypePair(Record, OpNum, NextValueNo, Op);
+ unsigned Index = Record[1];
+ I = ExtractValueInst::Create(Op, Index);
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_RET: // RET: [opty,opval<optional>]
+ {
+ unsigned Size = Record.size();
+ if (Size == 0) {
+ I = ReturnInst::Create();
+ break;
+ }
+
+ unsigned OpNum = 0;
+ SmallVector<Value *,4> Vs;
+ do {
+ Value *Op = NULL;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ return Error("Invalid RET record");
+ Vs.push_back(Op);
+ } while(OpNum != Record.size());
+
+ const Type *ReturnType = F->getReturnType();
+ if (Vs.size() > 1 ||
+ (isa<StructType>(ReturnType) &&
+ (Vs.empty() || Vs[0]->getType() != ReturnType))) {
+ Value *RV = UndefValue::get(ReturnType);
+ for (unsigned i = 0, e = Vs.size(); i != e; ++i) {
+ I = InsertValueInst::Create(RV, Vs[i], i, "mrv");
+ CurBB->getInstList().push_back(I);
+ ValueList.AssignValue(I, NextValueNo++);
+ RV = I;
+ }
+ I = ReturnInst::Create(RV);
+ break;
+ }
+
+ I = ReturnInst::Create(Vs[0]);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_BR: { // BR: [bb#, bb#, opval] or [bb#]
+ if (Record.size() != 1 && Record.size() != 3)
+ return Error("Invalid BR record");
+ BasicBlock *TrueDest = getBasicBlock(Record[0]);
+ if (TrueDest == 0)
+ return Error("Invalid BR record");
+
+ if (Record.size() == 1)
+ I = BranchInst::Create(TrueDest);
+ else {
+ BasicBlock *FalseDest = getBasicBlock(Record[1]);
+ Value *Cond = getFnValueByID(Record[2], Type::Int1Ty);
+ if (FalseDest == 0 || Cond == 0)
+ return Error("Invalid BR record");
+ I = BranchInst::Create(TrueDest, FalseDest, Cond);
+ }
+ break;
+ }
+ case bitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, opval, n, n x ops]
+ if (Record.size() < 3 || (Record.size() & 1) == 0)
+ return Error("Invalid SWITCH record");
+ const Type *OpTy = getTypeByID(Record[0]);
+ Value *Cond = getFnValueByID(Record[1], OpTy);
+ BasicBlock *Default = getBasicBlock(Record[2]);
+ if (OpTy == 0 || Cond == 0 || Default == 0)
+ return Error("Invalid SWITCH record");
+ unsigned NumCases = (Record.size()-3)/2;
+ SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases);
+ for (unsigned i = 0, e = NumCases; i != e; ++i) {
+ ConstantInt *CaseVal =
+ dyn_cast_or_null<ConstantInt>(getFnValueByID(Record[3+i*2], OpTy));
+ BasicBlock *DestBB = getBasicBlock(Record[1+3+i*2]);
+ if (CaseVal == 0 || DestBB == 0) {
+ delete SI;
+ return Error("Invalid SWITCH record!");
+ }
+ SI->addCase(CaseVal, DestBB);
+ }
+ I = SI;
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_INVOKE: {
+ // INVOKE: [attrs, cc, normBB, unwindBB, fnty, op0,op1,op2, ...]
+ if (Record.size() < 4) return Error("Invalid INVOKE record");
+ AttrListPtr PAL = getAttributes(Record[0]);
+ unsigned CCInfo = Record[1];
+ BasicBlock *NormalBB = getBasicBlock(Record[2]);
+ BasicBlock *UnwindBB = getBasicBlock(Record[3]);
+
+ unsigned OpNum = 4;
+ Value *Callee;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
+ return Error("Invalid INVOKE record");
+
+ const PointerType *CalleeTy = dyn_cast<PointerType>(Callee->getType());
+ const FunctionType *FTy = !CalleeTy ? 0 :
+ dyn_cast<FunctionType>(CalleeTy->getElementType());
+
+ // Check that the right number of fixed parameters are here.
+ if (FTy == 0 || NormalBB == 0 || UnwindBB == 0 ||
+ Record.size() < OpNum+FTy->getNumParams())
+ return Error("Invalid INVOKE record");
+
+ SmallVector<Value*, 16> Ops;
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
+ Ops.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i)));
+ if (Ops.back() == 0) return Error("Invalid INVOKE record");
+ }
+
+ if (!FTy->isVarArg()) {
+ if (Record.size() != OpNum)
+ return Error("Invalid INVOKE record");
+ } else {
+ // Read type/value pairs for varargs params.
+ while (OpNum != Record.size()) {
+ Value *Op;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ return Error("Invalid INVOKE record");
+ Ops.push_back(Op);
+ }
+ }
+
+ I = InvokeInst::Create(Callee, NormalBB, UnwindBB,
+ Ops.begin(), Ops.end());
+ cast<InvokeInst>(I)->setCallingConv(CCInfo);
+ cast<InvokeInst>(I)->setAttributes(PAL);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_UNWIND: // UNWIND
+ I = new UnwindInst();
+ break;
+ case bitc::FUNC_CODE_INST_UNREACHABLE: // UNREACHABLE
+ I = new UnreachableInst();
+ break;
+ case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...]
+ if (Record.size() < 1 || ((Record.size()-1)&1))
+ return Error("Invalid PHI record");
+ const Type *Ty = getTypeByID(Record[0]);
+ if (!Ty) return Error("Invalid PHI record");
+
+ PHINode *PN = PHINode::Create(Ty);
+ PN->reserveOperandSpace((Record.size()-1)/2);
+
+ for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) {
+ Value *V = getFnValueByID(Record[1+i], Ty);
+ BasicBlock *BB = getBasicBlock(Record[2+i]);
+ if (!V || !BB) return Error("Invalid PHI record");
+ PN->addIncoming(V, BB);
+ }
+ I = PN;
+ break;
+ }
+
+ case bitc::FUNC_CODE_INST_MALLOC: { // MALLOC: [instty, op, align]
+ if (Record.size() < 3)
+ return Error("Invalid MALLOC record");
+ const PointerType *Ty =
+ dyn_cast_or_null<PointerType>(getTypeByID(Record[0]));
+ Value *Size = getFnValueByID(Record[1], Type::Int32Ty);
+ unsigned Align = Record[2];
+ if (!Ty || !Size) return Error("Invalid MALLOC record");
+ I = new MallocInst(Ty->getElementType(), Size, (1 << Align) >> 1);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_FREE: { // FREE: [op, opty]
+ unsigned OpNum = 0;
+ Value *Op;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
+ OpNum != Record.size())
+ return Error("Invalid FREE record");
+ I = new FreeInst(Op);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, op, align]
+ if (Record.size() < 3)
+ return Error("Invalid ALLOCA record");
+ const PointerType *Ty =
+ dyn_cast_or_null<PointerType>(getTypeByID(Record[0]));
+ Value *Size = getFnValueByID(Record[1], Type::Int32Ty);
+ unsigned Align = Record[2];
+ if (!Ty || !Size) return Error("Invalid ALLOCA record");
+ I = new AllocaInst(Ty->getElementType(), Size, (1 << Align) >> 1);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_LOAD: { // LOAD: [opty, op, align, vol]
+ unsigned OpNum = 0;
+ Value *Op;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
+ OpNum+2 != Record.size())
+ return Error("Invalid LOAD record");
+
+ I = new LoadInst(Op, "", Record[OpNum+1], (1 << Record[OpNum]) >> 1);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_STORE2: { // STORE2:[ptrty, ptr, val, align, vol]
+ unsigned OpNum = 0;
+ Value *Val, *Ptr;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
+ getValue(Record, OpNum,
+ cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
+ OpNum+2 != Record.size())
+ return Error("Invalid STORE record");
+
+ I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_STORE: { // STORE:[val, valty, ptr, align, vol]
+ // FIXME: Legacy form of store instruction. Should be removed in LLVM 3.0.
+ unsigned OpNum = 0;
+ Value *Val, *Ptr;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Val) ||
+ getValue(Record, OpNum, PointerType::getUnqual(Val->getType()), Ptr)||
+ OpNum+2 != Record.size())
+ return Error("Invalid STORE record");
+
+ I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_CALL: {
+ // CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...]
+ if (Record.size() < 3)
+ return Error("Invalid CALL record");
+
+ AttrListPtr PAL = getAttributes(Record[0]);
+ unsigned CCInfo = Record[1];
+
+ unsigned OpNum = 2;
+ Value *Callee;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
+ return Error("Invalid CALL record");
+
+ const PointerType *OpTy = dyn_cast<PointerType>(Callee->getType());
+ const FunctionType *FTy = 0;
+ if (OpTy) FTy = dyn_cast<FunctionType>(OpTy->getElementType());
+ if (!FTy || Record.size() < FTy->getNumParams()+OpNum)
+ return Error("Invalid CALL record");
+
+ SmallVector<Value*, 16> Args;
+ // Read the fixed params.
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
+ if (FTy->getParamType(i)->getTypeID()==Type::LabelTyID)
+ Args.push_back(getBasicBlock(Record[OpNum]));
+ else
+ Args.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i)));
+ if (Args.back() == 0) return Error("Invalid CALL record");
+ }
+
+ // Read type/value pairs for varargs params.
+ if (!FTy->isVarArg()) {
+ if (OpNum != Record.size())
+ return Error("Invalid CALL record");
+ } else {
+ while (OpNum != Record.size()) {
+ Value *Op;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ return Error("Invalid CALL record");
+ Args.push_back(Op);
+ }
+ }
+
+ I = CallInst::Create(Callee, Args.begin(), Args.end());
+ cast<CallInst>(I)->setCallingConv(CCInfo>>1);
+ cast<CallInst>(I)->setTailCall(CCInfo & 1);
+ cast<CallInst>(I)->setAttributes(PAL);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_VAARG: { // VAARG: [valistty, valist, instty]
+ if (Record.size() < 3)
+ return Error("Invalid VAARG record");
+ const Type *OpTy = getTypeByID(Record[0]);
+ Value *Op = getFnValueByID(Record[1], OpTy);
+ const Type *ResTy = getTypeByID(Record[2]);
+ if (!OpTy || !Op || !ResTy)
+ return Error("Invalid VAARG record");
+ I = new VAArgInst(Op, ResTy);
+ break;
+ }
+ }
+
+ // Add instruction to end of current BB. If there is no current BB, reject
+ // this file.
+ if (CurBB == 0) {
+ delete I;
+ return Error("Invalid instruction with no BB");
+ }
+ CurBB->getInstList().push_back(I);
+
+ // If this was a terminator instruction, move to the next block.
+ if (isa<TerminatorInst>(I)) {
+ ++CurBBNo;
+ CurBB = CurBBNo < FunctionBBs.size() ? FunctionBBs[CurBBNo] : 0;
+ }
+
+ // Non-void values get registered in the value table for future use.
+ if (I && I->getType() != Type::VoidTy)
+ ValueList.AssignValue(I, NextValueNo++);
+ }
+
+ // Check the function list for unresolved values.
+ if (Argument *A = dyn_cast<Argument>(ValueList.back())) {
+ if (A->getParent() == 0) {
+ // We found at least one unresolved value. Nuke them all to avoid leaks.
+ for (unsigned i = ModuleValueListSize, e = ValueList.size(); i != e; ++i){
+ if ((A = dyn_cast<Argument>(ValueList.back())) && A->getParent() == 0) {
+ A->replaceAllUsesWith(UndefValue::get(A->getType()));
+ delete A;
+ }
+ }
+ return Error("Never resolved value found in function!");
+ }
+ }
+
+ // Trim the value list down to the size it was before we parsed this function.
+ ValueList.shrinkTo(ModuleValueListSize);
+ std::vector<BasicBlock*>().swap(FunctionBBs);
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// ModuleProvider implementation
+//===----------------------------------------------------------------------===//
+
+
+bool BitcodeReader::materializeFunction(Function *F, std::string *ErrInfo) {
+ // If it already is material, ignore the request.
+ if (!F->hasNotBeenReadFromBitcode()) return false;
+
+ DenseMap<Function*, std::pair<uint64_t, unsigned> >::iterator DFII =
+ DeferredFunctionInfo.find(F);
+ assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!");
+
+ // Move the bit stream to the saved position of the deferred function body and
+ // restore the real linkage type for the function.
+ Stream.JumpToBit(DFII->second.first);
+ F->setLinkage((GlobalValue::LinkageTypes)DFII->second.second);
+
+ if (ParseFunctionBody(F)) {
+ if (ErrInfo) *ErrInfo = ErrorString;
+ return true;
+ }
+
+ // Upgrade any old intrinsic calls in the function.
+ for (UpgradedIntrinsicMap::iterator I = UpgradedIntrinsics.begin(),
+ E = UpgradedIntrinsics.end(); I != E; ++I) {
+ if (I->first != I->second) {
+ for (Value::use_iterator UI = I->first->use_begin(),
+ UE = I->first->use_end(); UI != UE; ) {
+ if (CallInst* CI = dyn_cast<CallInst>(*UI++))
+ UpgradeIntrinsicCall(CI, I->second);
+ }
+ }
+ }
+
+ return false;
+}
+
+void BitcodeReader::dematerializeFunction(Function *F) {
+ // If this function isn't materialized, or if it is a proto, this is a noop.
+ if (F->hasNotBeenReadFromBitcode() || F->isDeclaration())
+ return;
+
+ assert(DeferredFunctionInfo.count(F) && "No info to read function later?");
+
+ // Just forget the function body, we can remat it later.
+ F->deleteBody();
+ F->setLinkage(GlobalValue::GhostLinkage);
+}
+
+
+Module *BitcodeReader::materializeModule(std::string *ErrInfo) {
+ for (DenseMap<Function*, std::pair<uint64_t, unsigned> >::iterator I =
+ DeferredFunctionInfo.begin(), E = DeferredFunctionInfo.end(); I != E;
+ ++I) {
+ Function *F = I->first;
+ if (F->hasNotBeenReadFromBitcode() &&
+ materializeFunction(F, ErrInfo))
+ return 0;
+ }
+
+ // Upgrade any intrinsic calls that slipped through (should not happen!) and
+ // delete the old functions to clean up. We can't do this unless the entire
+ // module is materialized because there could always be another function body
+ // with calls to the old function.
+ for (std::vector<std::pair<Function*, Function*> >::iterator I =
+ UpgradedIntrinsics.begin(), E = UpgradedIntrinsics.end(); I != E; ++I) {
+ if (I->first != I->second) {
+ for (Value::use_iterator UI = I->first->use_begin(),
+ UE = I->first->use_end(); UI != UE; ) {
+ if (CallInst* CI = dyn_cast<CallInst>(*UI++))
+ UpgradeIntrinsicCall(CI, I->second);
+ }
+ if (!I->first->use_empty())
+ I->first->replaceAllUsesWith(I->second);
+ I->first->eraseFromParent();
+ }
+ }
+ std::vector<std::pair<Function*, Function*> >().swap(UpgradedIntrinsics);
+
+ return TheModule;
+}
+
+
+/// This method is provided by the parent ModuleProvde class and overriden
+/// here. It simply releases the module from its provided and frees up our
+/// state.
+/// @brief Release our hold on the generated module
+Module *BitcodeReader::releaseModule(std::string *ErrInfo) {
+ // Since we're losing control of this Module, we must hand it back complete
+ Module *M = ModuleProvider::releaseModule(ErrInfo);
+ FreeState();
+ return M;
+}
+
+
+//===----------------------------------------------------------------------===//
+// External interface
+//===----------------------------------------------------------------------===//
+
+/// getBitcodeModuleProvider - lazy function-at-a-time loading from a file.
+///
+ModuleProvider *llvm::getBitcodeModuleProvider(MemoryBuffer *Buffer,
+ std::string *ErrMsg) {
+ BitcodeReader *R = new BitcodeReader(Buffer);
+ if (R->ParseBitcode()) {
+ if (ErrMsg)
+ *ErrMsg = R->getErrorString();
+
+ // Don't let the BitcodeReader dtor delete 'Buffer'.
+ R->releaseMemoryBuffer();
+ delete R;
+ return 0;
+ }
+ return R;
+}
+
+/// ParseBitcodeFile - Read the specified bitcode file, returning the module.
+/// If an error occurs, return null and fill in *ErrMsg if non-null.
+Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, std::string *ErrMsg){
+ BitcodeReader *R;
+ R = static_cast<BitcodeReader*>(getBitcodeModuleProvider(Buffer, ErrMsg));
+ if (!R) return 0;
+
+ // Read in the entire module.
+ Module *M = R->materializeModule(ErrMsg);
+
+ // Don't let the BitcodeReader dtor delete 'Buffer', regardless of whether
+ // there was an error.
+ R->releaseMemoryBuffer();
+
+ // If there was no error, tell ModuleProvider not to delete it when its dtor
+ // is run.
+ if (M)
+ M = R->releaseModule(ErrMsg);
+
+ delete R;
+ return M;
+}
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
new file mode 100644
index 0000000..0dc470b
--- /dev/null
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -0,0 +1,214 @@
+//===- BitcodeReader.h - Internal BitcodeReader impl ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the BitcodeReader class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BITCODE_READER_H
+#define BITCODE_READER_H
+
+#include "llvm/ModuleProvider.h"
+#include "llvm/Attributes.h"
+#include "llvm/Type.h"
+#include "llvm/OperandTraits.h"
+#include "llvm/Bitcode/BitstreamReader.h"
+#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/ADT/DenseMap.h"
+#include <vector>
+
+namespace llvm {
+ class MemoryBuffer;
+
+//===----------------------------------------------------------------------===//
+// BitcodeReaderValueList Class
+//===----------------------------------------------------------------------===//
+
+class BitcodeReaderValueList {
+ std::vector<WeakVH> ValuePtrs;
+
+ /// ResolveConstants - As we resolve forward-referenced constants, we add
+ /// information about them to this vector. This allows us to resolve them in
+ /// bulk instead of resolving each reference at a time. See the code in
+ /// ResolveConstantForwardRefs for more information about this.
+ ///
+ /// The key of this vector is the placeholder constant, the value is the slot
+ /// number that holds the resolved value.
+ typedef std::vector<std::pair<Constant*, unsigned> > ResolveConstantsTy;
+ ResolveConstantsTy ResolveConstants;
+public:
+ BitcodeReaderValueList() {}
+ ~BitcodeReaderValueList() {
+ assert(ResolveConstants.empty() && "Constants not resolved?");
+ }
+
+ // vector compatibility methods
+ unsigned size() const { return ValuePtrs.size(); }
+ void resize(unsigned N) { ValuePtrs.resize(N); }
+ void push_back(Value *V) {
+ ValuePtrs.push_back(V);
+ }
+
+ void clear() {
+ assert(ResolveConstants.empty() && "Constants not resolved?");
+ ValuePtrs.clear();
+ }
+
+ Value *operator[](unsigned i) const {
+ assert(i < ValuePtrs.size());
+ return ValuePtrs[i];
+ }
+
+ Value *back() const { return ValuePtrs.back(); }
+ void pop_back() { ValuePtrs.pop_back(); }
+ bool empty() const { return ValuePtrs.empty(); }
+ void shrinkTo(unsigned N) {
+ assert(N <= size() && "Invalid shrinkTo request!");
+ ValuePtrs.resize(N);
+ }
+
+ Constant *getConstantFwdRef(unsigned Idx, const Type *Ty);
+ Value *getValueFwdRef(unsigned Idx, const Type *Ty);
+
+ void AssignValue(Value *V, unsigned Idx);
+
+ /// ResolveConstantForwardRefs - Once all constants are read, this method bulk
+ /// resolves any forward references.
+ void ResolveConstantForwardRefs();
+};
+
+class BitcodeReader : public ModuleProvider {
+ MemoryBuffer *Buffer;
+ BitstreamReader StreamFile;
+ BitstreamCursor Stream;
+
+ const char *ErrorString;
+
+ std::vector<PATypeHolder> TypeList;
+ BitcodeReaderValueList ValueList;
+ std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
+ std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;
+
+ /// MAttributes - The set of attributes by index. Index zero in the
+ /// file is for null, and is thus not represented here. As such all indices
+ /// are off by one.
+ std::vector<AttrListPtr> MAttributes;
+
+ /// FunctionBBs - While parsing a function body, this is a list of the basic
+ /// blocks for the function.
+ std::vector<BasicBlock*> FunctionBBs;
+
+ // When reading the module header, this list is populated with functions that
+ // have bodies later in the file.
+ std::vector<Function*> FunctionsWithBodies;
+
+ // When intrinsic functions are encountered which require upgrading they are
+ // stored here with their replacement function.
+ typedef std::vector<std::pair<Function*, Function*> > UpgradedIntrinsicMap;
+ UpgradedIntrinsicMap UpgradedIntrinsics;
+
+ // After the module header has been read, the FunctionsWithBodies list is
+ // reversed. This keeps track of whether we've done this yet.
+ bool HasReversedFunctionsWithBodies;
+
+ /// DeferredFunctionInfo - When function bodies are initially scanned, this
+ /// map contains info about where to find deferred function body (in the
+ /// stream) and what linkage the original function had.
+ DenseMap<Function*, std::pair<uint64_t, unsigned> > DeferredFunctionInfo;
+public:
+ explicit BitcodeReader(MemoryBuffer *buffer)
+ : Buffer(buffer), ErrorString(0) {
+ HasReversedFunctionsWithBodies = false;
+ }
+ ~BitcodeReader() {
+ FreeState();
+ }
+
+ void FreeState();
+
+ /// releaseMemoryBuffer - This causes the reader to completely forget about
+ /// the memory buffer it contains, which prevents the buffer from being
+ /// destroyed when it is deleted.
+ void releaseMemoryBuffer() {
+ Buffer = 0;
+ }
+
+ virtual bool materializeFunction(Function *F, std::string *ErrInfo = 0);
+ virtual Module *materializeModule(std::string *ErrInfo = 0);
+ virtual void dematerializeFunction(Function *F);
+ virtual Module *releaseModule(std::string *ErrInfo = 0);
+
+ bool Error(const char *Str) {
+ ErrorString = Str;
+ return true;
+ }
+ const char *getErrorString() const { return ErrorString; }
+
+ /// @brief Main interface to parsing a bitcode buffer.
+ /// @returns true if an error occurred.
+ bool ParseBitcode();
+private:
+ const Type *getTypeByID(unsigned ID, bool isTypeTable = false);
+ Value *getFnValueByID(unsigned ID, const Type *Ty) {
+ return ValueList.getValueFwdRef(ID, Ty);
+ }
+ BasicBlock *getBasicBlock(unsigned ID) const {
+ if (ID >= FunctionBBs.size()) return 0; // Invalid ID
+ return FunctionBBs[ID];
+ }
+ AttrListPtr getAttributes(unsigned i) const {
+ if (i-1 < MAttributes.size())
+ return MAttributes[i-1];
+ return AttrListPtr();
+ }
+
+ /// getValueTypePair - Read a value/type pair out of the specified record from
+ /// slot 'Slot'. Increment Slot past the number of slots used in the record.
+ /// Return true on failure.
+ bool getValueTypePair(SmallVector<uint64_t, 64> &Record, unsigned &Slot,
+ unsigned InstNum, Value *&ResVal) {
+ if (Slot == Record.size()) return true;
+ unsigned ValNo = (unsigned)Record[Slot++];
+ if (ValNo < InstNum) {
+ // If this is not a forward reference, just return the value we already
+ // have.
+ ResVal = getFnValueByID(ValNo, 0);
+ return ResVal == 0;
+ } else if (Slot == Record.size()) {
+ return true;
+ }
+
+ unsigned TypeNo = (unsigned)Record[Slot++];
+ ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo));
+ return ResVal == 0;
+ }
+ bool getValue(SmallVector<uint64_t, 64> &Record, unsigned &Slot,
+ const Type *Ty, Value *&ResVal) {
+ if (Slot == Record.size()) return true;
+ unsigned ValNo = (unsigned)Record[Slot++];
+ ResVal = getFnValueByID(ValNo, Ty);
+ return ResVal == 0;
+ }
+
+
+ bool ParseModule(const std::string &ModuleID);
+ bool ParseAttributeBlock();
+ bool ParseTypeTable();
+ bool ParseTypeSymbolTable();
+ bool ParseValueSymbolTable();
+ bool ParseConstants();
+ bool RememberAndSkipFunctionBody();
+ bool ParseFunctionBody(Function *F);
+ bool ResolveGlobalAndAliasInits();
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Bitcode/Reader/CMakeLists.txt b/lib/Bitcode/Reader/CMakeLists.txt
new file mode 100644
index 0000000..a19c79a
--- /dev/null
+++ b/lib/Bitcode/Reader/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_library(LLVMBitReader
+ BitReader.cpp
+ BitcodeReader.cpp
+ Deserialize.cpp
+ DeserializeAPFloat.cpp
+ DeserializeAPInt.cpp
+ ) \ No newline at end of file
diff --git a/lib/Bitcode/Reader/Deserialize.cpp b/lib/Bitcode/Reader/Deserialize.cpp
new file mode 100644
index 0000000..06da6ce
--- /dev/null
+++ b/lib/Bitcode/Reader/Deserialize.cpp
@@ -0,0 +1,454 @@
+//==- Deserialize.cpp - Generic Object Serialization to Bitcode --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the internal methods used for object serialization.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/Deserialize.h"
+
+#ifdef DEBUG_BACKPATCH
+#include "llvm/Support/Streams.h"
+#endif
+
+using namespace llvm;
+
+Deserializer::Deserializer(BitstreamReader& stream)
+ : Stream(stream), RecIdx(0), FreeList(NULL), AbbrevNo(0), RecordCode(0) {
+
+ StreamStart = Stream.GetCurrentBitNo();
+}
+
+Deserializer::~Deserializer() {
+ assert (RecIdx >= Record.size() &&
+ "Still scanning bitcode record when deserialization completed.");
+
+#ifdef DEBUG_BACKPATCH
+ for (MapTy::iterator I=BPatchMap.begin(), E=BPatchMap.end(); I!=E; ++I)
+ assert (I->first.hasFinalPtr() &&
+ "Some pointers were not backpatched.");
+#endif
+}
+
+
+bool Deserializer::inRecord() {
+ if (Record.size() > 0) {
+ if (RecIdx >= Record.size()) {
+ RecIdx = 0;
+ Record.clear();
+ AbbrevNo = 0;
+ return false;
+ }
+ else
+ return true;
+ }
+
+ return false;
+}
+
+bool Deserializer::AdvanceStream() {
+ assert (!inRecord() &&
+ "Cannot advance stream. Still processing a record.");
+
+ if (AbbrevNo == bitc::ENTER_SUBBLOCK ||
+ AbbrevNo >= bitc::UNABBREV_RECORD)
+ return true;
+
+ while (!Stream.AtEndOfStream()) {
+
+ uint64_t Pos = Stream.GetCurrentBitNo();
+ AbbrevNo = Stream.ReadCode();
+
+ switch (AbbrevNo) {
+ case bitc::ENTER_SUBBLOCK: {
+ unsigned id = Stream.ReadSubBlockID();
+
+ // Determine the extent of the block. This is useful for jumping around
+ // the stream. This is hack: we read the header of the block, save
+ // the length, and then revert the bitstream to a location just before
+ // the block is entered.
+ uint64_t BPos = Stream.GetCurrentBitNo();
+ Stream.ReadVBR(bitc::CodeLenWidth); // Skip the code size.
+ Stream.SkipToWord();
+ unsigned NumWords = Stream.Read(bitc::BlockSizeWidth);
+ Stream.JumpToBit(BPos);
+
+ BlockStack.push_back(Location(Pos,id,NumWords));
+ break;
+ }
+
+ case bitc::END_BLOCK: {
+ bool x = Stream.ReadBlockEnd();
+ assert(!x && "Error at block end."); x=x;
+ BlockStack.pop_back();
+ continue;
+ }
+
+ case bitc::DEFINE_ABBREV:
+ Stream.ReadAbbrevRecord();
+ continue;
+
+ default:
+ break;
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
+void Deserializer::ReadRecord() {
+
+ while (AdvanceStream() && AbbrevNo == bitc::ENTER_SUBBLOCK) {
+ assert (!BlockStack.empty());
+ Stream.EnterSubBlock(BlockStack.back().BlockID);
+ AbbrevNo = 0;
+ }
+
+ if (Stream.AtEndOfStream())
+ return;
+
+ assert (Record.empty());
+ assert (AbbrevNo >= bitc::UNABBREV_RECORD);
+ RecordCode = Stream.ReadRecord(AbbrevNo,Record);
+ assert (Record.size() > 0);
+}
+
+void Deserializer::SkipBlock() {
+ assert (!inRecord());
+
+ if (AtEnd())
+ return;
+
+ AdvanceStream();
+
+ assert (AbbrevNo == bitc::ENTER_SUBBLOCK);
+ BlockStack.pop_back();
+ Stream.SkipBlock();
+
+ AbbrevNo = 0;
+ AdvanceStream();
+}
+
+bool Deserializer::SkipToBlock(unsigned BlockID) {
+ assert (!inRecord());
+
+ AdvanceStream();
+ assert (AbbrevNo == bitc::ENTER_SUBBLOCK);
+
+ unsigned BlockLevel = BlockStack.size();
+
+ while (!AtEnd() &&
+ BlockLevel == BlockStack.size() &&
+ getCurrentBlockID() != BlockID)
+ SkipBlock();
+
+ return !(AtEnd() || BlockLevel != BlockStack.size());
+}
+
+Deserializer::Location Deserializer::getCurrentBlockLocation() {
+ if (!inRecord())
+ AdvanceStream();
+
+ return BlockStack.back();
+}
+
+bool Deserializer::JumpTo(const Location& Loc) {
+
+ assert (!inRecord());
+
+ AdvanceStream();
+
+ assert (!BlockStack.empty() || AtEnd());
+
+ uint64_t LastBPos = StreamStart;
+
+ while (!BlockStack.empty()) {
+
+ LastBPos = BlockStack.back().BitNo;
+
+ // Determine of the current block contains the location of the block
+ // we are looking for.
+ if (BlockStack.back().contains(Loc)) {
+ // We found the enclosing block. We must first POP it off to
+ // destroy any accumulated context within the block scope. We then
+ // jump to the position of the block and enter it.
+ Stream.JumpToBit(LastBPos);
+
+ if (BlockStack.size() == Stream.BlockScope.size())
+ Stream.PopBlockScope();
+
+ BlockStack.pop_back();
+
+ AbbrevNo = 0;
+ AdvanceStream();
+ assert (AbbrevNo == bitc::ENTER_SUBBLOCK);
+
+ Stream.EnterSubBlock(BlockStack.back().BlockID);
+ break;
+ }
+
+ // This block does not contain the block we are looking for. Pop it.
+ if (BlockStack.size() == Stream.BlockScope.size())
+ Stream.PopBlockScope();
+
+ BlockStack.pop_back();
+
+ }
+
+ // Check if we have popped our way to the outermost scope. If so,
+ // we need to adjust our position.
+ if (BlockStack.empty()) {
+ assert (Stream.BlockScope.empty());
+
+ Stream.JumpToBit(Loc.BitNo < LastBPos ? StreamStart : LastBPos);
+ AbbrevNo = 0;
+ AdvanceStream();
+ }
+
+ assert (AbbrevNo == bitc::ENTER_SUBBLOCK);
+ assert (!BlockStack.empty());
+
+ while (!AtEnd() && BlockStack.back() != Loc) {
+ if (BlockStack.back().contains(Loc)) {
+ Stream.EnterSubBlock(BlockStack.back().BlockID);
+ AbbrevNo = 0;
+ AdvanceStream();
+ continue;
+ }
+ else
+ SkipBlock();
+ }
+
+ if (AtEnd())
+ return false;
+
+ assert (BlockStack.back() == Loc);
+
+ return true;
+}
+
+void Deserializer::Rewind() {
+ while (!Stream.BlockScope.empty())
+ Stream.PopBlockScope();
+
+ while (!BlockStack.empty())
+ BlockStack.pop_back();
+
+ Stream.JumpToBit(StreamStart);
+ AbbrevNo = 0;
+}
+
+
+unsigned Deserializer::getCurrentBlockID() {
+ if (!inRecord())
+ AdvanceStream();
+
+ return BlockStack.back().BlockID;
+}
+
+unsigned Deserializer::getRecordCode() {
+ if (!inRecord()) {
+ AdvanceStream();
+ assert (AbbrevNo >= bitc::UNABBREV_RECORD);
+ ReadRecord();
+ }
+
+ return RecordCode;
+}
+
+bool Deserializer::FinishedBlock(Location BlockLoc) {
+ if (!inRecord())
+ AdvanceStream();
+
+ for (llvm::SmallVector<Location,8>::reverse_iterator
+ I=BlockStack.rbegin(), E=BlockStack.rend(); I!=E; ++I)
+ if (*I == BlockLoc)
+ return false;
+
+ return true;
+}
+
+unsigned Deserializer::getAbbrevNo() {
+ if (!inRecord())
+ AdvanceStream();
+
+ return AbbrevNo;
+}
+
+bool Deserializer::AtEnd() {
+ if (inRecord())
+ return false;
+
+ if (!AdvanceStream())
+ return true;
+
+ return false;
+}
+
+uint64_t Deserializer::ReadInt() {
+ // FIXME: Any error recovery/handling with incomplete or bad files?
+ if (!inRecord())
+ ReadRecord();
+
+ return Record[RecIdx++];
+}
+
+int64_t Deserializer::ReadSInt() {
+ uint64_t x = ReadInt();
+ int64_t magnitude = x >> 1;
+ return x & 0x1 ? -magnitude : magnitude;
+}
+
+char* Deserializer::ReadCStr(char* cstr, unsigned MaxLen, bool isNullTerm) {
+ if (cstr == NULL)
+ MaxLen = 0; // Zero this just in case someone does something funny.
+
+ unsigned len = ReadInt();
+
+ assert (MaxLen == 0 || (len + (isNullTerm ? 1 : 0)) <= MaxLen);
+
+ if (!cstr)
+ cstr = new char[len + (isNullTerm ? 1 : 0)];
+
+ assert (cstr != NULL);
+
+ for (unsigned i = 0; i < len; ++i)
+ cstr[i] = (char) ReadInt();
+
+ if (isNullTerm)
+ cstr[len] = '\0';
+
+ return cstr;
+}
+
+void Deserializer::ReadCStr(std::vector<char>& buff, bool isNullTerm,
+ unsigned Idx) {
+
+ unsigned len = ReadInt();
+
+ // If Idx is beyond the current before size, reduce Idx to refer to the
+ // element after the last element.
+ if (Idx > buff.size())
+ Idx = buff.size();
+
+ buff.reserve(len+Idx);
+ buff.resize(Idx);
+
+ for (unsigned i = 0; i < len; ++i)
+ buff.push_back((char) ReadInt());
+
+ if (isNullTerm)
+ buff.push_back('\0');
+}
+
+void Deserializer::RegisterPtr(const SerializedPtrID& PtrId,
+ const void* Ptr) {
+
+ MapTy::value_type& E = BPatchMap.FindAndConstruct(BPKey(PtrId));
+
+ assert (!HasFinalPtr(E) && "Pointer already registered.");
+
+#ifdef DEBUG_BACKPATCH
+ llvm::cerr << "RegisterPtr: " << PtrId << " => " << Ptr << "\n";
+#endif
+
+ SetPtr(E,Ptr);
+}
+
+void Deserializer::ReadUIntPtr(uintptr_t& PtrRef,
+ const SerializedPtrID& PtrId,
+ bool AllowBackpatch) {
+ if (PtrId == 0) {
+ PtrRef = 0;
+ return;
+ }
+
+ MapTy::value_type& E = BPatchMap.FindAndConstruct(BPKey(PtrId));
+
+ if (HasFinalPtr(E)) {
+ PtrRef = GetFinalPtr(E);
+
+#ifdef DEBUG_BACKPATCH
+ llvm::cerr << "ReadUintPtr: " << PtrId
+ << " <-- " << (void*) GetFinalPtr(E) << '\n';
+#endif
+ }
+ else {
+ assert (AllowBackpatch &&
+ "Client forbids backpatching for this pointer.");
+
+#ifdef DEBUG_BACKPATCH
+ llvm::cerr << "ReadUintPtr: " << PtrId << " (NO PTR YET)\n";
+#endif
+
+ // Register backpatch. Check the freelist for a BPNode.
+ BPNode* N;
+
+ if (FreeList) {
+ N = FreeList;
+ FreeList = FreeList->Next;
+ }
+ else // No available BPNode. Allocate one.
+ N = (BPNode*) Allocator.Allocate<BPNode>();
+
+ new (N) BPNode(GetBPNode(E),PtrRef);
+ SetBPNode(E,N);
+ }
+}
+
+uintptr_t Deserializer::ReadInternalRefPtr() {
+ SerializedPtrID PtrId = ReadPtrID();
+
+ assert (PtrId != 0 && "References cannot refer the NULL address.");
+
+ MapTy::value_type& E = BPatchMap.FindAndConstruct(BPKey(PtrId));
+
+ assert (HasFinalPtr(E) &&
+ "Cannot backpatch references. Object must be already deserialized.");
+
+ return GetFinalPtr(E);
+}
+
+void Deserializer::BPEntry::SetPtr(BPNode*& FreeList, void* P) {
+ BPNode* Last = NULL;
+
+ for (BPNode* N = Head; N != NULL; N=N->Next) {
+ Last = N;
+ N->PtrRef |= reinterpret_cast<uintptr_t>(P);
+ }
+
+ if (Last) {
+ Last->Next = FreeList;
+ FreeList = Head;
+ }
+
+ Ptr = const_cast<void*>(P);
+}
+
+
+#define INT_READ(TYPE)\
+void SerializeTrait<TYPE>::Read(Deserializer& D, TYPE& X) {\
+ X = (TYPE) D.ReadInt(); }
+
+INT_READ(bool)
+INT_READ(unsigned char)
+INT_READ(unsigned short)
+INT_READ(unsigned int)
+INT_READ(unsigned long)
+
+#define SINT_READ(TYPE)\
+void SerializeTrait<TYPE>::Read(Deserializer& D, TYPE& X) {\
+ X = (TYPE) D.ReadSInt(); }
+
+INT_READ(signed char)
+INT_READ(signed short)
+INT_READ(signed int)
+INT_READ(signed long)
diff --git a/lib/Bitcode/Reader/DeserializeAPFloat.cpp b/lib/Bitcode/Reader/DeserializeAPFloat.cpp
new file mode 100644
index 0000000..ee24b68
--- /dev/null
+++ b/lib/Bitcode/Reader/DeserializeAPFloat.cpp
@@ -0,0 +1,24 @@
+//===-- SerializeAPInt.cpp - Serialization for APFloat ---------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements deserialization of APFloat.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/Bitcode/Deserialize.h"
+
+using namespace llvm;
+
+APFloat APFloat::ReadVal(Deserializer& D) {
+ APInt x;
+ D.Read(x);
+ return APFloat(x);
+}
+
diff --git a/lib/Bitcode/Reader/DeserializeAPInt.cpp b/lib/Bitcode/Reader/DeserializeAPInt.cpp
new file mode 100644
index 0000000..1b5b2bf
--- /dev/null
+++ b/lib/Bitcode/Reader/DeserializeAPInt.cpp
@@ -0,0 +1,33 @@
+//===-- DeserializeAPInt.cpp - Deserialization for APInts ------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements deserialization of APInts.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/Bitcode/Deserialize.h"
+#include <cassert>
+
+using namespace llvm;
+
+void APInt::Read(Deserializer& D) {
+ BitWidth = D.ReadInt();
+
+ if (isSingleWord())
+ VAL = D.ReadInt();
+ else {
+ uint32_t NumWords = D.ReadInt();
+ assert (NumWords > 1);
+ pVal = new uint64_t[NumWords];
+ assert (pVal && "Allocation in deserialization of APInt failed.");
+ for (unsigned i = 0; i < NumWords; ++i)
+ pVal[i] = D.ReadInt();
+ }
+}
diff --git a/lib/Bitcode/Reader/Makefile b/lib/Bitcode/Reader/Makefile
new file mode 100644
index 0000000..59af8d53
--- /dev/null
+++ b/lib/Bitcode/Reader/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Bitcode/Reader/Makefile -------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMBitReader
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Bitcode/Writer/BitWriter.cpp b/lib/Bitcode/Writer/BitWriter.cpp
new file mode 100644
index 0000000..8834964
--- /dev/null
+++ b/lib/Bitcode/Writer/BitWriter.cpp
@@ -0,0 +1,58 @@
+//===-- BitWriter.cpp -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/BitWriter.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include <fstream>
+
+using namespace llvm;
+
+
+/*===-- Operations on modules ---------------------------------------------===*/
+
+int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) {
+ std::ofstream OS(Path, std::ios_base::out|std::ios::trunc|std::ios::binary);
+
+ if (!OS.fail())
+ WriteBitcodeToFile(unwrap(M), OS);
+
+ if (OS.fail())
+ return -1;
+
+ return 0;
+}
+
+#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR >= 4)
+#include <ext/stdio_filebuf.h>
+
+// FIXME: Control this with configure? Provide some portable abstraction in
+// libSystem? As is, the user will just get a linker error if they use this on
+// non-GCC. Some C++ stdlibs even have ofstream::ofstream(int fd).
+int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int FileHandle) {
+ __gnu_cxx::stdio_filebuf<char> Buffer(FileHandle, std::ios_base::out |
+ std::ios::trunc |
+ std::ios::binary);
+ std::ostream OS(&Buffer);
+
+ if (!OS.fail())
+ WriteBitcodeToFile(unwrap(M), OS);
+
+ if (OS.fail())
+ return -1;
+
+ return 0;
+}
+
+#else
+
+int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int FileHandle) {
+ return -1; // Not supported.
+}
+
+#endif
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
new file mode 100644
index 0000000..bfc029c
--- /dev/null
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -0,0 +1,1449 @@
+//===--- Bitcode/Writer/BitcodeWriter.cpp - Bitcode Writer ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Bitcode writer implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/BitstreamWriter.h"
+#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "ValueEnumerator.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/MDNode.h"
+#include "llvm/Module.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Program.h"
+using namespace llvm;
+
+/// These are manifest constants used by the bitcode writer. They do not need to
+/// be kept in sync with the reader, but need to be consistent within this file.
+enum {
+ CurVersion = 0,
+
+ // VALUE_SYMTAB_BLOCK abbrev id's.
+ VST_ENTRY_8_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
+ VST_ENTRY_7_ABBREV,
+ VST_ENTRY_6_ABBREV,
+ VST_BBENTRY_6_ABBREV,
+
+ // CONSTANTS_BLOCK abbrev id's.
+ CONSTANTS_SETTYPE_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
+ CONSTANTS_INTEGER_ABBREV,
+ CONSTANTS_CE_CAST_Abbrev,
+ CONSTANTS_NULL_Abbrev,
+
+ // FUNCTION_BLOCK abbrev id's.
+ FUNCTION_INST_LOAD_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
+ FUNCTION_INST_BINOP_ABBREV,
+ FUNCTION_INST_CAST_ABBREV,
+ FUNCTION_INST_RET_VOID_ABBREV,
+ FUNCTION_INST_RET_VAL_ABBREV,
+ FUNCTION_INST_UNREACHABLE_ABBREV
+};
+
+
+static unsigned GetEncodedCastOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default: assert(0 && "Unknown cast instruction!");
+ case Instruction::Trunc : return bitc::CAST_TRUNC;
+ case Instruction::ZExt : return bitc::CAST_ZEXT;
+ case Instruction::SExt : return bitc::CAST_SEXT;
+ case Instruction::FPToUI : return bitc::CAST_FPTOUI;
+ case Instruction::FPToSI : return bitc::CAST_FPTOSI;
+ case Instruction::UIToFP : return bitc::CAST_UITOFP;
+ case Instruction::SIToFP : return bitc::CAST_SITOFP;
+ case Instruction::FPTrunc : return bitc::CAST_FPTRUNC;
+ case Instruction::FPExt : return bitc::CAST_FPEXT;
+ case Instruction::PtrToInt: return bitc::CAST_PTRTOINT;
+ case Instruction::IntToPtr: return bitc::CAST_INTTOPTR;
+ case Instruction::BitCast : return bitc::CAST_BITCAST;
+ }
+}
+
+static unsigned GetEncodedBinaryOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default: assert(0 && "Unknown binary instruction!");
+ case Instruction::Add: return bitc::BINOP_ADD;
+ case Instruction::Sub: return bitc::BINOP_SUB;
+ case Instruction::Mul: return bitc::BINOP_MUL;
+ case Instruction::UDiv: return bitc::BINOP_UDIV;
+ case Instruction::FDiv:
+ case Instruction::SDiv: return bitc::BINOP_SDIV;
+ case Instruction::URem: return bitc::BINOP_UREM;
+ case Instruction::FRem:
+ case Instruction::SRem: return bitc::BINOP_SREM;
+ case Instruction::Shl: return bitc::BINOP_SHL;
+ case Instruction::LShr: return bitc::BINOP_LSHR;
+ case Instruction::AShr: return bitc::BINOP_ASHR;
+ case Instruction::And: return bitc::BINOP_AND;
+ case Instruction::Or: return bitc::BINOP_OR;
+ case Instruction::Xor: return bitc::BINOP_XOR;
+ }
+}
+
+
+
+static void WriteStringRecord(unsigned Code, const std::string &Str,
+ unsigned AbbrevToUse, BitstreamWriter &Stream) {
+ SmallVector<unsigned, 64> Vals;
+
+ // Code: [strchar x N]
+ for (unsigned i = 0, e = Str.size(); i != e; ++i)
+ Vals.push_back(Str[i]);
+
+ // Emit the finished record.
+ Stream.EmitRecord(Code, Vals, AbbrevToUse);
+}
+
+// Emit information about parameter attributes.
+static void WriteAttributeTable(const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ const std::vector<AttrListPtr> &Attrs = VE.getAttributes();
+ if (Attrs.empty()) return;
+
+ Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3);
+
+ SmallVector<uint64_t, 64> Record;
+ for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
+ const AttrListPtr &A = Attrs[i];
+ for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) {
+ const AttributeWithIndex &PAWI = A.getSlot(i);
+ Record.push_back(PAWI.Index);
+
+ // FIXME: remove in LLVM 3.0
+ // Store the alignment in the bitcode as a 16-bit raw value instead of a
+ // 5-bit log2 encoded value. Shift the bits above the alignment up by
+ // 11 bits.
+ uint64_t FauxAttr = PAWI.Attrs & 0xffff;
+ if (PAWI.Attrs & Attribute::Alignment)
+ FauxAttr |= (1ull<<16)<<(((PAWI.Attrs & Attribute::Alignment)-1) >> 16);
+ FauxAttr |= (PAWI.Attrs & (0x3FFull << 21)) << 11;
+
+ Record.push_back(FauxAttr);
+ }
+
+ Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record);
+ Record.clear();
+ }
+
+ Stream.ExitBlock();
+}
+
+/// WriteTypeTable - Write out the type table for a module.
+static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
+ const ValueEnumerator::TypeList &TypeList = VE.getTypes();
+
+ Stream.EnterSubblock(bitc::TYPE_BLOCK_ID, 4 /*count from # abbrevs */);
+ SmallVector<uint64_t, 64> TypeVals;
+
+ // Abbrev for TYPE_CODE_POINTER.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(VE.getTypes().size()+1)));
+ Abbv->Add(BitCodeAbbrevOp(0)); // Addrspace = 0
+ unsigned PtrAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // Abbrev for TYPE_CODE_FUNCTION.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isvararg
+ Abbv->Add(BitCodeAbbrevOp(0)); // FIXME: DEAD value, remove in LLVM 3.0
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(VE.getTypes().size()+1)));
+ unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // Abbrev for TYPE_CODE_STRUCT.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(VE.getTypes().size()+1)));
+ unsigned StructAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // Abbrev for TYPE_CODE_ARRAY.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // size
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(VE.getTypes().size()+1)));
+ unsigned ArrayAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // Emit an entry count so the reader can reserve space.
+ TypeVals.push_back(TypeList.size());
+ Stream.EmitRecord(bitc::TYPE_CODE_NUMENTRY, TypeVals);
+ TypeVals.clear();
+
+ // Loop over all of the types, emitting each in turn.
+ for (unsigned i = 0, e = TypeList.size(); i != e; ++i) {
+ const Type *T = TypeList[i].first;
+ int AbbrevToUse = 0;
+ unsigned Code = 0;
+
+ switch (T->getTypeID()) {
+ default: assert(0 && "Unknown type!");
+ case Type::VoidTyID: Code = bitc::TYPE_CODE_VOID; break;
+ case Type::FloatTyID: Code = bitc::TYPE_CODE_FLOAT; break;
+ case Type::DoubleTyID: Code = bitc::TYPE_CODE_DOUBLE; break;
+ case Type::X86_FP80TyID: Code = bitc::TYPE_CODE_X86_FP80; break;
+ case Type::FP128TyID: Code = bitc::TYPE_CODE_FP128; break;
+ case Type::PPC_FP128TyID: Code = bitc::TYPE_CODE_PPC_FP128; break;
+ case Type::LabelTyID: Code = bitc::TYPE_CODE_LABEL; break;
+ case Type::OpaqueTyID: Code = bitc::TYPE_CODE_OPAQUE; break;
+ case Type::MetadataTyID: Code = bitc::TYPE_CODE_METADATA; break;
+ case Type::IntegerTyID:
+ // INTEGER: [width]
+ Code = bitc::TYPE_CODE_INTEGER;
+ TypeVals.push_back(cast<IntegerType>(T)->getBitWidth());
+ break;
+ case Type::PointerTyID: {
+ const PointerType *PTy = cast<PointerType>(T);
+ // POINTER: [pointee type, address space]
+ Code = bitc::TYPE_CODE_POINTER;
+ TypeVals.push_back(VE.getTypeID(PTy->getElementType()));
+ unsigned AddressSpace = PTy->getAddressSpace();
+ TypeVals.push_back(AddressSpace);
+ if (AddressSpace == 0) AbbrevToUse = PtrAbbrev;
+ break;
+ }
+ case Type::FunctionTyID: {
+ const FunctionType *FT = cast<FunctionType>(T);
+ // FUNCTION: [isvararg, attrid, retty, paramty x N]
+ Code = bitc::TYPE_CODE_FUNCTION;
+ TypeVals.push_back(FT->isVarArg());
+ TypeVals.push_back(0); // FIXME: DEAD: remove in llvm 3.0
+ TypeVals.push_back(VE.getTypeID(FT->getReturnType()));
+ for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i)
+ TypeVals.push_back(VE.getTypeID(FT->getParamType(i)));
+ AbbrevToUse = FunctionAbbrev;
+ break;
+ }
+ case Type::StructTyID: {
+ const StructType *ST = cast<StructType>(T);
+ // STRUCT: [ispacked, eltty x N]
+ Code = bitc::TYPE_CODE_STRUCT;
+ TypeVals.push_back(ST->isPacked());
+ // Output all of the element types.
+ for (StructType::element_iterator I = ST->element_begin(),
+ E = ST->element_end(); I != E; ++I)
+ TypeVals.push_back(VE.getTypeID(*I));
+ AbbrevToUse = StructAbbrev;
+ break;
+ }
+ case Type::ArrayTyID: {
+ const ArrayType *AT = cast<ArrayType>(T);
+ // ARRAY: [numelts, eltty]
+ Code = bitc::TYPE_CODE_ARRAY;
+ TypeVals.push_back(AT->getNumElements());
+ TypeVals.push_back(VE.getTypeID(AT->getElementType()));
+ AbbrevToUse = ArrayAbbrev;
+ break;
+ }
+ case Type::VectorTyID: {
+ const VectorType *VT = cast<VectorType>(T);
+ // VECTOR [numelts, eltty]
+ Code = bitc::TYPE_CODE_VECTOR;
+ TypeVals.push_back(VT->getNumElements());
+ TypeVals.push_back(VE.getTypeID(VT->getElementType()));
+ break;
+ }
+ }
+
+ // Emit the finished record.
+ Stream.EmitRecord(Code, TypeVals, AbbrevToUse);
+ TypeVals.clear();
+ }
+
+ Stream.ExitBlock();
+}
+
+static unsigned getEncodedLinkage(const GlobalValue *GV) {
+ switch (GV->getLinkage()) {
+ default: assert(0 && "Invalid linkage!");
+ case GlobalValue::GhostLinkage: // Map ghost linkage onto external.
+ case GlobalValue::ExternalLinkage: return 0;
+ case GlobalValue::WeakAnyLinkage: return 1;
+ case GlobalValue::AppendingLinkage: return 2;
+ case GlobalValue::InternalLinkage: return 3;
+ case GlobalValue::LinkOnceAnyLinkage: return 4;
+ case GlobalValue::DLLImportLinkage: return 5;
+ case GlobalValue::DLLExportLinkage: return 6;
+ case GlobalValue::ExternalWeakLinkage: return 7;
+ case GlobalValue::CommonLinkage: return 8;
+ case GlobalValue::PrivateLinkage: return 9;
+ case GlobalValue::WeakODRLinkage: return 10;
+ case GlobalValue::LinkOnceODRLinkage: return 11;
+ case GlobalValue::AvailableExternallyLinkage: return 12;
+ }
+}
+
+static unsigned getEncodedVisibility(const GlobalValue *GV) {
+ switch (GV->getVisibility()) {
+ default: assert(0 && "Invalid visibility!");
+ case GlobalValue::DefaultVisibility: return 0;
+ case GlobalValue::HiddenVisibility: return 1;
+ case GlobalValue::ProtectedVisibility: return 2;
+ }
+}
+
+// Emit top-level description of module, including target triple, inline asm,
+// descriptors for global variables, and function prototype info.
+static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ // Emit the list of dependent libraries for the Module.
+ for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I)
+ WriteStringRecord(bitc::MODULE_CODE_DEPLIB, *I, 0/*TODO*/, Stream);
+
+ // Emit various pieces of data attached to a module.
+ if (!M->getTargetTriple().empty())
+ WriteStringRecord(bitc::MODULE_CODE_TRIPLE, M->getTargetTriple(),
+ 0/*TODO*/, Stream);
+ if (!M->getDataLayout().empty())
+ WriteStringRecord(bitc::MODULE_CODE_DATALAYOUT, M->getDataLayout(),
+ 0/*TODO*/, Stream);
+ if (!M->getModuleInlineAsm().empty())
+ WriteStringRecord(bitc::MODULE_CODE_ASM, M->getModuleInlineAsm(),
+ 0/*TODO*/, Stream);
+
+ // Emit information about sections and GC, computing how many there are. Also
+ // compute the maximum alignment value.
+ std::map<std::string, unsigned> SectionMap;
+ std::map<std::string, unsigned> GCMap;
+ unsigned MaxAlignment = 0;
+ unsigned MaxGlobalType = 0;
+ for (Module::const_global_iterator GV = M->global_begin(),E = M->global_end();
+ GV != E; ++GV) {
+ MaxAlignment = std::max(MaxAlignment, GV->getAlignment());
+ MaxGlobalType = std::max(MaxGlobalType, VE.getTypeID(GV->getType()));
+
+ if (!GV->hasSection()) continue;
+ // Give section names unique ID's.
+ unsigned &Entry = SectionMap[GV->getSection()];
+ if (Entry != 0) continue;
+ WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, GV->getSection(),
+ 0/*TODO*/, Stream);
+ Entry = SectionMap.size();
+ }
+ for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
+ MaxAlignment = std::max(MaxAlignment, F->getAlignment());
+ if (F->hasSection()) {
+ // Give section names unique ID's.
+ unsigned &Entry = SectionMap[F->getSection()];
+ if (!Entry) {
+ WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, F->getSection(),
+ 0/*TODO*/, Stream);
+ Entry = SectionMap.size();
+ }
+ }
+ if (F->hasGC()) {
+ // Same for GC names.
+ unsigned &Entry = GCMap[F->getGC()];
+ if (!Entry) {
+ WriteStringRecord(bitc::MODULE_CODE_GCNAME, F->getGC(),
+ 0/*TODO*/, Stream);
+ Entry = GCMap.size();
+ }
+ }
+ }
+
+ // Emit abbrev for globals, now that we know # sections and max alignment.
+ unsigned SimpleGVarAbbrev = 0;
+ if (!M->global_empty()) {
+ // Add an abbrev for common globals with no visibility or thread localness.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_GLOBALVAR));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(MaxGlobalType+1)));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Constant.
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Initializer.
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // Linkage.
+ if (MaxAlignment == 0) // Alignment.
+ Abbv->Add(BitCodeAbbrevOp(0));
+ else {
+ unsigned MaxEncAlignment = Log2_32(MaxAlignment)+1;
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(MaxEncAlignment+1)));
+ }
+ if (SectionMap.empty()) // Section.
+ Abbv->Add(BitCodeAbbrevOp(0));
+ else
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(SectionMap.size()+1)));
+ // Don't bother emitting vis + thread local.
+ SimpleGVarAbbrev = Stream.EmitAbbrev(Abbv);
+ }
+
+ // Emit the global variable information.
+ SmallVector<unsigned, 64> Vals;
+ for (Module::const_global_iterator GV = M->global_begin(),E = M->global_end();
+ GV != E; ++GV) {
+ unsigned AbbrevToUse = 0;
+
+ // GLOBALVAR: [type, isconst, initid,
+ // linkage, alignment, section, visibility, threadlocal]
+ Vals.push_back(VE.getTypeID(GV->getType()));
+ Vals.push_back(GV->isConstant());
+ Vals.push_back(GV->isDeclaration() ? 0 :
+ (VE.getValueID(GV->getInitializer()) + 1));
+ Vals.push_back(getEncodedLinkage(GV));
+ Vals.push_back(Log2_32(GV->getAlignment())+1);
+ Vals.push_back(GV->hasSection() ? SectionMap[GV->getSection()] : 0);
+ if (GV->isThreadLocal() ||
+ GV->getVisibility() != GlobalValue::DefaultVisibility) {
+ Vals.push_back(getEncodedVisibility(GV));
+ Vals.push_back(GV->isThreadLocal());
+ } else {
+ AbbrevToUse = SimpleGVarAbbrev;
+ }
+
+ Stream.EmitRecord(bitc::MODULE_CODE_GLOBALVAR, Vals, AbbrevToUse);
+ Vals.clear();
+ }
+
+ // Emit the function proto information.
+ for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
+ // FUNCTION: [type, callingconv, isproto, paramattr,
+ // linkage, alignment, section, visibility, gc]
+ Vals.push_back(VE.getTypeID(F->getType()));
+ Vals.push_back(F->getCallingConv());
+ Vals.push_back(F->isDeclaration());
+ Vals.push_back(getEncodedLinkage(F));
+ Vals.push_back(VE.getAttributeID(F->getAttributes()));
+ Vals.push_back(Log2_32(F->getAlignment())+1);
+ Vals.push_back(F->hasSection() ? SectionMap[F->getSection()] : 0);
+ Vals.push_back(getEncodedVisibility(F));
+ Vals.push_back(F->hasGC() ? GCMap[F->getGC()] : 0);
+
+ unsigned AbbrevToUse = 0;
+ Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse);
+ Vals.clear();
+ }
+
+
+ // Emit the alias information.
+ for (Module::const_alias_iterator AI = M->alias_begin(), E = M->alias_end();
+ AI != E; ++AI) {
+ Vals.push_back(VE.getTypeID(AI->getType()));
+ Vals.push_back(VE.getValueID(AI->getAliasee()));
+ Vals.push_back(getEncodedLinkage(AI));
+ Vals.push_back(getEncodedVisibility(AI));
+ unsigned AbbrevToUse = 0;
+ Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals, AbbrevToUse);
+ Vals.clear();
+ }
+}
+
+
+static void WriteConstants(unsigned FirstVal, unsigned LastVal,
+ const ValueEnumerator &VE,
+ BitstreamWriter &Stream, bool isGlobal) {
+ if (FirstVal == LastVal) return;
+
+ Stream.EnterSubblock(bitc::CONSTANTS_BLOCK_ID, 4);
+
+ unsigned AggregateAbbrev = 0;
+ unsigned String8Abbrev = 0;
+ unsigned CString7Abbrev = 0;
+ unsigned CString6Abbrev = 0;
+ unsigned MDString8Abbrev = 0;
+ unsigned MDString6Abbrev = 0;
+ // If this is a constant pool for the module, emit module-specific abbrevs.
+ if (isGlobal) {
+ // Abbrev for CST_CODE_AGGREGATE.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_AGGREGATE));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(LastVal+1)));
+ AggregateAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // Abbrev for CST_CODE_STRING.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_STRING));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+ String8Abbrev = Stream.EmitAbbrev(Abbv);
+ // Abbrev for CST_CODE_CSTRING.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
+ CString7Abbrev = Stream.EmitAbbrev(Abbv);
+ // Abbrev for CST_CODE_CSTRING.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+ CString6Abbrev = Stream.EmitAbbrev(Abbv);
+
+ // Abbrev for CST_CODE_MDSTRING.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_MDSTRING));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+ MDString8Abbrev = Stream.EmitAbbrev(Abbv);
+ // Abbrev for CST_CODE_MDSTRING.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_MDSTRING));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+ MDString6Abbrev = Stream.EmitAbbrev(Abbv);
+ }
+
+ SmallVector<uint64_t, 64> Record;
+
+ const ValueEnumerator::ValueList &Vals = VE.getValues();
+ const Type *LastTy = 0;
+ for (unsigned i = FirstVal; i != LastVal; ++i) {
+ const Value *V = Vals[i].first;
+ // If we need to switch types, do so now.
+ if (V->getType() != LastTy) {
+ LastTy = V->getType();
+ Record.push_back(VE.getTypeID(LastTy));
+ Stream.EmitRecord(bitc::CST_CODE_SETTYPE, Record,
+ CONSTANTS_SETTYPE_ABBREV);
+ Record.clear();
+ }
+
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
+ Record.push_back(unsigned(IA->hasSideEffects()));
+
+ // Add the asm string.
+ const std::string &AsmStr = IA->getAsmString();
+ Record.push_back(AsmStr.size());
+ for (unsigned i = 0, e = AsmStr.size(); i != e; ++i)
+ Record.push_back(AsmStr[i]);
+
+ // Add the constraint string.
+ const std::string &ConstraintStr = IA->getConstraintString();
+ Record.push_back(ConstraintStr.size());
+ for (unsigned i = 0, e = ConstraintStr.size(); i != e; ++i)
+ Record.push_back(ConstraintStr[i]);
+ Stream.EmitRecord(bitc::CST_CODE_INLINEASM, Record);
+ Record.clear();
+ continue;
+ }
+ const Constant *C = cast<Constant>(V);
+ unsigned Code = -1U;
+ unsigned AbbrevToUse = 0;
+ if (C->isNullValue()) {
+ Code = bitc::CST_CODE_NULL;
+ } else if (isa<UndefValue>(C)) {
+ Code = bitc::CST_CODE_UNDEF;
+ } else if (const ConstantInt *IV = dyn_cast<ConstantInt>(C)) {
+ if (IV->getBitWidth() <= 64) {
+ int64_t V = IV->getSExtValue();
+ if (V >= 0)
+ Record.push_back(V << 1);
+ else
+ Record.push_back((-V << 1) | 1);
+ Code = bitc::CST_CODE_INTEGER;
+ AbbrevToUse = CONSTANTS_INTEGER_ABBREV;
+ } else { // Wide integers, > 64 bits in size.
+ // We have an arbitrary precision integer value to write whose
+ // bit width is > 64. However, in canonical unsigned integer
+ // format it is likely that the high bits are going to be zero.
+ // So, we only write the number of active words.
+ unsigned NWords = IV->getValue().getActiveWords();
+ const uint64_t *RawWords = IV->getValue().getRawData();
+ for (unsigned i = 0; i != NWords; ++i) {
+ int64_t V = RawWords[i];
+ if (V >= 0)
+ Record.push_back(V << 1);
+ else
+ Record.push_back((-V << 1) | 1);
+ }
+ Code = bitc::CST_CODE_WIDE_INTEGER;
+ }
+ } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+ Code = bitc::CST_CODE_FLOAT;
+ const Type *Ty = CFP->getType();
+ if (Ty == Type::FloatTy || Ty == Type::DoubleTy) {
+ Record.push_back(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+ } else if (Ty == Type::X86_FP80Ty) {
+ // api needed to prevent premature destruction
+ // bits are not in the same order as a normal i80 APInt, compensate.
+ APInt api = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = api.getRawData();
+ Record.push_back((p[1] << 48) | (p[0] >> 16));
+ Record.push_back(p[0] & 0xffffLL);
+ } else if (Ty == Type::FP128Ty || Ty == Type::PPC_FP128Ty) {
+ APInt api = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = api.getRawData();
+ Record.push_back(p[0]);
+ Record.push_back(p[1]);
+ } else {
+ assert (0 && "Unknown FP type!");
+ }
+ } else if (isa<ConstantArray>(C) && cast<ConstantArray>(C)->isString()) {
+ // Emit constant strings specially.
+ unsigned NumOps = C->getNumOperands();
+ // If this is a null-terminated string, use the denser CSTRING encoding.
+ if (C->getOperand(NumOps-1)->isNullValue()) {
+ Code = bitc::CST_CODE_CSTRING;
+ --NumOps; // Don't encode the null, which isn't allowed by char6.
+ } else {
+ Code = bitc::CST_CODE_STRING;
+ AbbrevToUse = String8Abbrev;
+ }
+ bool isCStr7 = Code == bitc::CST_CODE_CSTRING;
+ bool isCStrChar6 = Code == bitc::CST_CODE_CSTRING;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ unsigned char V = cast<ConstantInt>(C->getOperand(i))->getZExtValue();
+ Record.push_back(V);
+ isCStr7 &= (V & 128) == 0;
+ if (isCStrChar6)
+ isCStrChar6 = BitCodeAbbrevOp::isChar6(V);
+ }
+
+ if (isCStrChar6)
+ AbbrevToUse = CString6Abbrev;
+ else if (isCStr7)
+ AbbrevToUse = CString7Abbrev;
+ } else if (isa<ConstantArray>(C) || isa<ConstantStruct>(V) ||
+ isa<ConstantVector>(V)) {
+ Code = bitc::CST_CODE_AGGREGATE;
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
+ Record.push_back(VE.getValueID(C->getOperand(i)));
+ AbbrevToUse = AggregateAbbrev;
+ } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ switch (CE->getOpcode()) {
+ default:
+ if (Instruction::isCast(CE->getOpcode())) {
+ Code = bitc::CST_CODE_CE_CAST;
+ Record.push_back(GetEncodedCastOpcode(CE->getOpcode()));
+ Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ AbbrevToUse = CONSTANTS_CE_CAST_Abbrev;
+ } else {
+ assert(CE->getNumOperands() == 2 && "Unknown constant expr!");
+ Code = bitc::CST_CODE_CE_BINOP;
+ Record.push_back(GetEncodedBinaryOpcode(CE->getOpcode()));
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ }
+ break;
+ case Instruction::GetElementPtr:
+ Code = bitc::CST_CODE_CE_GEP;
+ for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) {
+ Record.push_back(VE.getTypeID(C->getOperand(i)->getType()));
+ Record.push_back(VE.getValueID(C->getOperand(i)));
+ }
+ break;
+ case Instruction::Select:
+ Code = bitc::CST_CODE_CE_SELECT;
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ Record.push_back(VE.getValueID(C->getOperand(2)));
+ break;
+ case Instruction::ExtractElement:
+ Code = bitc::CST_CODE_CE_EXTRACTELT;
+ Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ break;
+ case Instruction::InsertElement:
+ Code = bitc::CST_CODE_CE_INSERTELT;
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ Record.push_back(VE.getValueID(C->getOperand(2)));
+ break;
+ case Instruction::ShuffleVector:
+ // If the return type and argument types are the same, this is a
+ // standard shufflevector instruction. If the types are different,
+ // then the shuffle is widening or truncating the input vectors, and
+ // the argument type must also be encoded.
+ if (C->getType() == C->getOperand(0)->getType()) {
+ Code = bitc::CST_CODE_CE_SHUFFLEVEC;
+ } else {
+ Code = bitc::CST_CODE_CE_SHUFVEC_EX;
+ Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+ }
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ Record.push_back(VE.getValueID(C->getOperand(2)));
+ break;
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ case Instruction::VICmp:
+ case Instruction::VFCmp:
+ if (isa<VectorType>(C->getOperand(0)->getType())
+ && (CE->getOpcode() == Instruction::ICmp
+ || CE->getOpcode() == Instruction::FCmp)) {
+ // compare returning vector of Int1Ty
+ assert(0 && "Unsupported constant!");
+ } else {
+ Code = bitc::CST_CODE_CE_CMP;
+ }
+ Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ Record.push_back(CE->getPredicate());
+ break;
+ }
+ } else if (const MDString *S = dyn_cast<MDString>(C)) {
+ Code = bitc::CST_CODE_MDSTRING;
+ AbbrevToUse = MDString6Abbrev;
+ for (unsigned i = 0, e = S->size(); i != e; ++i) {
+ char V = S->begin()[i];
+ Record.push_back(V);
+
+ if (!BitCodeAbbrevOp::isChar6(V))
+ AbbrevToUse = MDString8Abbrev;
+ }
+ } else if (const MDNode *N = dyn_cast<MDNode>(C)) {
+ Code = bitc::CST_CODE_MDNODE;
+ for (unsigned i = 0, e = N->getNumElements(); i != e; ++i) {
+ if (N->getElement(i)) {
+ Record.push_back(VE.getTypeID(N->getElement(i)->getType()));
+ Record.push_back(VE.getValueID(N->getElement(i)));
+ } else {
+ Record.push_back(VE.getTypeID(Type::VoidTy));
+ Record.push_back(0);
+ }
+ }
+ } else {
+ assert(0 && "Unknown constant!");
+ }
+ Stream.EmitRecord(Code, Record, AbbrevToUse);
+ Record.clear();
+ }
+
+ Stream.ExitBlock();
+}
+
+static void WriteModuleConstants(const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ const ValueEnumerator::ValueList &Vals = VE.getValues();
+
+ // Find the first constant to emit, which is the first non-globalvalue value.
+ // We know globalvalues have been emitted by WriteModuleInfo.
+ for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
+ if (!isa<GlobalValue>(Vals[i].first)) {
+ WriteConstants(i, Vals.size(), VE, Stream, true);
+ return;
+ }
+ }
+}
+
+/// PushValueAndType - The file has to encode both the value and type id for
+/// many values, because we need to know what type to create for forward
+/// references. However, most operands are not forward references, so this type
+/// field is not needed.
+///
+/// This function adds V's value ID to Vals. If the value ID is higher than the
+/// instruction ID, then it is a forward reference, and it also includes the
+/// type ID.
+static bool PushValueAndType(const Value *V, unsigned InstID,
+ SmallVector<unsigned, 64> &Vals,
+ ValueEnumerator &VE) {
+ unsigned ValID = VE.getValueID(V);
+ Vals.push_back(ValID);
+ if (ValID >= InstID) {
+ Vals.push_back(VE.getTypeID(V->getType()));
+ return true;
+ }
+ return false;
+}
+
+/// WriteInstruction - Emit an instruction to the specified stream.
+static void WriteInstruction(const Instruction &I, unsigned InstID,
+ ValueEnumerator &VE, BitstreamWriter &Stream,
+ SmallVector<unsigned, 64> &Vals) {
+ unsigned Code = 0;
+ unsigned AbbrevToUse = 0;
+ switch (I.getOpcode()) {
+ default:
+ if (Instruction::isCast(I.getOpcode())) {
+ Code = bitc::FUNC_CODE_INST_CAST;
+ if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE))
+ AbbrevToUse = FUNCTION_INST_CAST_ABBREV;
+ Vals.push_back(VE.getTypeID(I.getType()));
+ Vals.push_back(GetEncodedCastOpcode(I.getOpcode()));
+ } else {
+ assert(isa<BinaryOperator>(I) && "Unknown instruction!");
+ Code = bitc::FUNC_CODE_INST_BINOP;
+ if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE))
+ AbbrevToUse = FUNCTION_INST_BINOP_ABBREV;
+ Vals.push_back(VE.getValueID(I.getOperand(1)));
+ Vals.push_back(GetEncodedBinaryOpcode(I.getOpcode()));
+ }
+ break;
+
+ case Instruction::GetElementPtr:
+ Code = bitc::FUNC_CODE_INST_GEP;
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+ PushValueAndType(I.getOperand(i), InstID, Vals, VE);
+ break;
+ case Instruction::ExtractValue: {
+ Code = bitc::FUNC_CODE_INST_EXTRACTVAL;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ const ExtractValueInst *EVI = cast<ExtractValueInst>(&I);
+ for (const unsigned *i = EVI->idx_begin(), *e = EVI->idx_end(); i != e; ++i)
+ Vals.push_back(*i);
+ break;
+ }
+ case Instruction::InsertValue: {
+ Code = bitc::FUNC_CODE_INST_INSERTVAL;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ PushValueAndType(I.getOperand(1), InstID, Vals, VE);
+ const InsertValueInst *IVI = cast<InsertValueInst>(&I);
+ for (const unsigned *i = IVI->idx_begin(), *e = IVI->idx_end(); i != e; ++i)
+ Vals.push_back(*i);
+ break;
+ }
+ case Instruction::Select:
+ Code = bitc::FUNC_CODE_INST_VSELECT;
+ PushValueAndType(I.getOperand(1), InstID, Vals, VE);
+ Vals.push_back(VE.getValueID(I.getOperand(2)));
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ break;
+ case Instruction::ExtractElement:
+ Code = bitc::FUNC_CODE_INST_EXTRACTELT;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ Vals.push_back(VE.getValueID(I.getOperand(1)));
+ break;
+ case Instruction::InsertElement:
+ Code = bitc::FUNC_CODE_INST_INSERTELT;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ Vals.push_back(VE.getValueID(I.getOperand(1)));
+ Vals.push_back(VE.getValueID(I.getOperand(2)));
+ break;
+ case Instruction::ShuffleVector:
+ Code = bitc::FUNC_CODE_INST_SHUFFLEVEC;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ Vals.push_back(VE.getValueID(I.getOperand(1)));
+ Vals.push_back(VE.getValueID(I.getOperand(2)));
+ break;
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ case Instruction::VICmp:
+ case Instruction::VFCmp:
+ if (I.getOpcode() == Instruction::ICmp
+ || I.getOpcode() == Instruction::FCmp) {
+ // compare returning Int1Ty or vector of Int1Ty
+ Code = bitc::FUNC_CODE_INST_CMP2;
+ } else {
+ Code = bitc::FUNC_CODE_INST_CMP;
+ }
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ Vals.push_back(VE.getValueID(I.getOperand(1)));
+ Vals.push_back(cast<CmpInst>(I).getPredicate());
+ break;
+
+ case Instruction::Ret:
+ {
+ Code = bitc::FUNC_CODE_INST_RET;
+ unsigned NumOperands = I.getNumOperands();
+ if (NumOperands == 0)
+ AbbrevToUse = FUNCTION_INST_RET_VOID_ABBREV;
+ else if (NumOperands == 1) {
+ if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE))
+ AbbrevToUse = FUNCTION_INST_RET_VAL_ABBREV;
+ } else {
+ for (unsigned i = 0, e = NumOperands; i != e; ++i)
+ PushValueAndType(I.getOperand(i), InstID, Vals, VE);
+ }
+ }
+ break;
+ case Instruction::Br:
+ {
+ Code = bitc::FUNC_CODE_INST_BR;
+ BranchInst &II(cast<BranchInst>(I));
+ Vals.push_back(VE.getValueID(II.getSuccessor(0)));
+ if (II.isConditional()) {
+ Vals.push_back(VE.getValueID(II.getSuccessor(1)));
+ Vals.push_back(VE.getValueID(II.getCondition()));
+ }
+ }
+ break;
+ case Instruction::Switch:
+ Code = bitc::FUNC_CODE_INST_SWITCH;
+ Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+ Vals.push_back(VE.getValueID(I.getOperand(i)));
+ break;
+ case Instruction::Invoke: {
+ const InvokeInst *II = cast<InvokeInst>(&I);
+ const Value *Callee(II->getCalledValue());
+ const PointerType *PTy = cast<PointerType>(Callee->getType());
+ const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ Code = bitc::FUNC_CODE_INST_INVOKE;
+
+ Vals.push_back(VE.getAttributeID(II->getAttributes()));
+ Vals.push_back(II->getCallingConv());
+ Vals.push_back(VE.getValueID(II->getNormalDest()));
+ Vals.push_back(VE.getValueID(II->getUnwindDest()));
+ PushValueAndType(Callee, InstID, Vals, VE);
+
+ // Emit value #'s for the fixed parameters.
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+ Vals.push_back(VE.getValueID(I.getOperand(i+3))); // fixed param.
+
+ // Emit type/value pairs for varargs params.
+ if (FTy->isVarArg()) {
+ for (unsigned i = 3+FTy->getNumParams(), e = I.getNumOperands();
+ i != e; ++i)
+ PushValueAndType(I.getOperand(i), InstID, Vals, VE); // vararg
+ }
+ break;
+ }
+ case Instruction::Unwind:
+ Code = bitc::FUNC_CODE_INST_UNWIND;
+ break;
+ case Instruction::Unreachable:
+ Code = bitc::FUNC_CODE_INST_UNREACHABLE;
+ AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV;
+ break;
+
+ case Instruction::PHI:
+ Code = bitc::FUNC_CODE_INST_PHI;
+ Vals.push_back(VE.getTypeID(I.getType()));
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+ Vals.push_back(VE.getValueID(I.getOperand(i)));
+ break;
+
+ case Instruction::Malloc:
+ Code = bitc::FUNC_CODE_INST_MALLOC;
+ Vals.push_back(VE.getTypeID(I.getType()));
+ Vals.push_back(VE.getValueID(I.getOperand(0))); // size.
+ Vals.push_back(Log2_32(cast<MallocInst>(I).getAlignment())+1);
+ break;
+
+ case Instruction::Free:
+ Code = bitc::FUNC_CODE_INST_FREE;
+ PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+ break;
+
+ case Instruction::Alloca:
+ Code = bitc::FUNC_CODE_INST_ALLOCA;
+ Vals.push_back(VE.getTypeID(I.getType()));
+ Vals.push_back(VE.getValueID(I.getOperand(0))); // size.
+ Vals.push_back(Log2_32(cast<AllocaInst>(I).getAlignment())+1);
+ break;
+
+ case Instruction::Load:
+ Code = bitc::FUNC_CODE_INST_LOAD;
+ if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE)) // ptr
+ AbbrevToUse = FUNCTION_INST_LOAD_ABBREV;
+
+ Vals.push_back(Log2_32(cast<LoadInst>(I).getAlignment())+1);
+ Vals.push_back(cast<LoadInst>(I).isVolatile());
+ break;
+ case Instruction::Store:
+ Code = bitc::FUNC_CODE_INST_STORE2;
+ PushValueAndType(I.getOperand(1), InstID, Vals, VE); // ptrty + ptr
+ Vals.push_back(VE.getValueID(I.getOperand(0))); // val.
+ Vals.push_back(Log2_32(cast<StoreInst>(I).getAlignment())+1);
+ Vals.push_back(cast<StoreInst>(I).isVolatile());
+ break;
+ case Instruction::Call: {
+ const PointerType *PTy = cast<PointerType>(I.getOperand(0)->getType());
+ const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+
+ Code = bitc::FUNC_CODE_INST_CALL;
+
+ const CallInst *CI = cast<CallInst>(&I);
+ Vals.push_back(VE.getAttributeID(CI->getAttributes()));
+ Vals.push_back((CI->getCallingConv() << 1) | unsigned(CI->isTailCall()));
+ PushValueAndType(CI->getOperand(0), InstID, Vals, VE); // Callee
+
+ // Emit value #'s for the fixed parameters.
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+ Vals.push_back(VE.getValueID(I.getOperand(i+1))); // fixed param.
+
+ // Emit type/value pairs for varargs params.
+ if (FTy->isVarArg()) {
+ unsigned NumVarargs = I.getNumOperands()-1-FTy->getNumParams();
+ for (unsigned i = I.getNumOperands()-NumVarargs, e = I.getNumOperands();
+ i != e; ++i)
+ PushValueAndType(I.getOperand(i), InstID, Vals, VE); // varargs
+ }
+ break;
+ }
+ case Instruction::VAArg:
+ Code = bitc::FUNC_CODE_INST_VAARG;
+ Vals.push_back(VE.getTypeID(I.getOperand(0)->getType())); // valistty
+ Vals.push_back(VE.getValueID(I.getOperand(0))); // valist.
+ Vals.push_back(VE.getTypeID(I.getType())); // restype.
+ break;
+ }
+
+ Stream.EmitRecord(Code, Vals, AbbrevToUse);
+ Vals.clear();
+}
+
+// Emit names for globals/functions etc.
+static void WriteValueSymbolTable(const ValueSymbolTable &VST,
+ const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ if (VST.empty()) return;
+ Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
+
+ // FIXME: Set up the abbrev, we know how many values there are!
+ // FIXME: We know if the type names can use 7-bit ascii.
+ SmallVector<unsigned, 64> NameVals;
+
+ for (ValueSymbolTable::const_iterator SI = VST.begin(), SE = VST.end();
+ SI != SE; ++SI) {
+
+ const ValueName &Name = *SI;
+
+ // Figure out the encoding to use for the name.
+ bool is7Bit = true;
+ bool isChar6 = true;
+ for (const char *C = Name.getKeyData(), *E = C+Name.getKeyLength();
+ C != E; ++C) {
+ if (isChar6)
+ isChar6 = BitCodeAbbrevOp::isChar6(*C);
+ if ((unsigned char)*C & 128) {
+ is7Bit = false;
+ break; // don't bother scanning the rest.
+ }
+ }
+
+ unsigned AbbrevToUse = VST_ENTRY_8_ABBREV;
+
+ // VST_ENTRY: [valueid, namechar x N]
+ // VST_BBENTRY: [bbid, namechar x N]
+ unsigned Code;
+ if (isa<BasicBlock>(SI->getValue())) {
+ Code = bitc::VST_CODE_BBENTRY;
+ if (isChar6)
+ AbbrevToUse = VST_BBENTRY_6_ABBREV;
+ } else {
+ Code = bitc::VST_CODE_ENTRY;
+ if (isChar6)
+ AbbrevToUse = VST_ENTRY_6_ABBREV;
+ else if (is7Bit)
+ AbbrevToUse = VST_ENTRY_7_ABBREV;
+ }
+
+ NameVals.push_back(VE.getValueID(SI->getValue()));
+ for (const char *P = Name.getKeyData(),
+ *E = Name.getKeyData()+Name.getKeyLength(); P != E; ++P)
+ NameVals.push_back((unsigned char)*P);
+
+ // Emit the finished record.
+ Stream.EmitRecord(Code, NameVals, AbbrevToUse);
+ NameVals.clear();
+ }
+ Stream.ExitBlock();
+}
+
+/// WriteFunction - Emit a function body to the module stream.
+static void WriteFunction(const Function &F, ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ Stream.EnterSubblock(bitc::FUNCTION_BLOCK_ID, 4);
+ VE.incorporateFunction(F);
+
+ SmallVector<unsigned, 64> Vals;
+
+ // Emit the number of basic blocks, so the reader can create them ahead of
+ // time.
+ Vals.push_back(VE.getBasicBlocks().size());
+ Stream.EmitRecord(bitc::FUNC_CODE_DECLAREBLOCKS, Vals);
+ Vals.clear();
+
+ // If there are function-local constants, emit them now.
+ unsigned CstStart, CstEnd;
+ VE.getFunctionConstantRange(CstStart, CstEnd);
+ WriteConstants(CstStart, CstEnd, VE, Stream, false);
+
+ // Keep a running idea of what the instruction ID is.
+ unsigned InstID = CstEnd;
+
+ // Finally, emit all the instructions, in order.
+ for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ WriteInstruction(*I, InstID, VE, Stream, Vals);
+ if (I->getType() != Type::VoidTy)
+ ++InstID;
+ }
+
+ // Emit names for all the instructions etc.
+ WriteValueSymbolTable(F.getValueSymbolTable(), VE, Stream);
+
+ VE.purgeFunction();
+ Stream.ExitBlock();
+}
+
+/// WriteTypeSymbolTable - Emit a block for the specified type symtab.
+static void WriteTypeSymbolTable(const TypeSymbolTable &TST,
+ const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ if (TST.empty()) return;
+
+ Stream.EnterSubblock(bitc::TYPE_SYMTAB_BLOCK_ID, 3);
+
+ // 7-bit fixed width VST_CODE_ENTRY strings.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(VE.getTypes().size()+1)));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
+ unsigned V7Abbrev = Stream.EmitAbbrev(Abbv);
+
+ SmallVector<unsigned, 64> NameVals;
+
+ for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end();
+ TI != TE; ++TI) {
+ // TST_ENTRY: [typeid, namechar x N]
+ NameVals.push_back(VE.getTypeID(TI->second));
+
+ const std::string &Str = TI->first;
+ bool is7Bit = true;
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ NameVals.push_back((unsigned char)Str[i]);
+ if (Str[i] & 128)
+ is7Bit = false;
+ }
+
+ // Emit the finished record.
+ Stream.EmitRecord(bitc::VST_CODE_ENTRY, NameVals, is7Bit ? V7Abbrev : 0);
+ NameVals.clear();
+ }
+
+ Stream.ExitBlock();
+}
+
+// Emit blockinfo, which defines the standard abbreviations etc.
+static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
+ // We only want to emit block info records for blocks that have multiple
+ // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK. Other
+ // blocks can defined their abbrevs inline.
+ Stream.EnterBlockInfoBlock(2);
+
+ { // 8-bit fixed-width VST_ENTRY/VST_BBENTRY strings.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+ if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+ Abbv) != VST_ENTRY_8_ABBREV)
+ assert(0 && "Unexpected abbrev ordering!");
+ }
+
+ { // 7-bit fixed width VST_ENTRY strings.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
+ if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+ Abbv) != VST_ENTRY_7_ABBREV)
+ assert(0 && "Unexpected abbrev ordering!");
+ }
+ { // 6-bit char6 VST_ENTRY strings.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+ if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+ Abbv) != VST_ENTRY_6_ABBREV)
+ assert(0 && "Unexpected abbrev ordering!");
+ }
+ { // 6-bit char6 VST_BBENTRY strings.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_BBENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+ if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+ Abbv) != VST_BBENTRY_6_ABBREV)
+ assert(0 && "Unexpected abbrev ordering!");
+ }
+
+
+
+ { // SETTYPE abbrev for CONSTANTS_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_SETTYPE));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(VE.getTypes().size()+1)));
+ if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
+ Abbv) != CONSTANTS_SETTYPE_ABBREV)
+ assert(0 && "Unexpected abbrev ordering!");
+ }
+
+ { // INTEGER abbrev for CONSTANTS_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_INTEGER));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
+ Abbv) != CONSTANTS_INTEGER_ABBREV)
+ assert(0 && "Unexpected abbrev ordering!");
+ }
+
+ { // CE_CAST abbrev for CONSTANTS_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CE_CAST));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // cast opc
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // typeid
+ Log2_32_Ceil(VE.getTypes().size()+1)));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
+
+ if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
+ Abbv) != CONSTANTS_CE_CAST_Abbrev)
+ assert(0 && "Unexpected abbrev ordering!");
+ }
+ { // NULL abbrev for CONSTANTS_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_NULL));
+ if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
+ Abbv) != CONSTANTS_NULL_Abbrev)
+ assert(0 && "Unexpected abbrev ordering!");
+ }
+
+ // FIXME: This should only use space for first class types!
+
+ { // INST_LOAD abbrev for FUNCTION_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_LOAD));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Ptr
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // Align
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // volatile
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+ Abbv) != FUNCTION_INST_LOAD_ABBREV)
+ assert(0 && "Unexpected abbrev ordering!");
+ }
+ { // INST_BINOP abbrev for FUNCTION_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+ Abbv) != FUNCTION_INST_BINOP_ABBREV)
+ assert(0 && "Unexpected abbrev ordering!");
+ }
+ { // INST_CAST abbrev for FUNCTION_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_CAST));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // OpVal
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty
+ Log2_32_Ceil(VE.getTypes().size()+1)));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+ Abbv) != FUNCTION_INST_CAST_ABBREV)
+ assert(0 && "Unexpected abbrev ordering!");
+ }
+
+ { // INST_RET abbrev for FUNCTION_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET));
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+ Abbv) != FUNCTION_INST_RET_VOID_ABBREV)
+ assert(0 && "Unexpected abbrev ordering!");
+ }
+ { // INST_RET abbrev for FUNCTION_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ValID
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+ Abbv) != FUNCTION_INST_RET_VAL_ABBREV)
+ assert(0 && "Unexpected abbrev ordering!");
+ }
+ { // INST_UNREACHABLE abbrev for FUNCTION_BLOCK.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_UNREACHABLE));
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+ Abbv) != FUNCTION_INST_UNREACHABLE_ABBREV)
+ assert(0 && "Unexpected abbrev ordering!");
+ }
+
+ Stream.ExitBlock();
+}
+
+
+/// WriteModule - Emit the specified module to the bitstream.
+static void WriteModule(const Module *M, BitstreamWriter &Stream) {
+ Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
+
+ // Emit the version number if it is non-zero.
+ if (CurVersion) {
+ SmallVector<unsigned, 1> Vals;
+ Vals.push_back(CurVersion);
+ Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals);
+ }
+
+ // Analyze the module, enumerating globals, functions, etc.
+ ValueEnumerator VE(M);
+
+ // Emit blockinfo, which defines the standard abbreviations etc.
+ WriteBlockInfo(VE, Stream);
+
+ // Emit information about parameter attributes.
+ WriteAttributeTable(VE, Stream);
+
+ // Emit information describing all of the types in the module.
+ WriteTypeTable(VE, Stream);
+
+ // Emit top-level description of module, including target triple, inline asm,
+ // descriptors for global variables, and function prototype info.
+ WriteModuleInfo(M, VE, Stream);
+
+ // Emit constants.
+ WriteModuleConstants(VE, Stream);
+
+ // If we have any aggregate values in the value table, purge them - these can
+ // only be used to initialize global variables. Doing so makes the value
+ // namespace smaller for code in functions.
+ int NumNonAggregates = VE.PurgeAggregateValues();
+ if (NumNonAggregates != -1) {
+ SmallVector<unsigned, 1> Vals;
+ Vals.push_back(NumNonAggregates);
+ Stream.EmitRecord(bitc::MODULE_CODE_PURGEVALS, Vals);
+ }
+
+ // Emit function bodies.
+ for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I)
+ if (!I->isDeclaration())
+ WriteFunction(*I, VE, Stream);
+
+ // Emit the type symbol table information.
+ WriteTypeSymbolTable(M->getTypeSymbolTable(), VE, Stream);
+
+ // Emit names for globals/functions etc.
+ WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);
+
+ Stream.ExitBlock();
+}
+
+/// EmitDarwinBCHeader - If generating a bc file on darwin, we have to emit a
+/// header and trailer to make it compatible with the system archiver. To do
+/// this we emit the following header, and then emit a trailer that pads the
+/// file out to be a multiple of 16 bytes.
+///
+/// struct bc_header {
+/// uint32_t Magic; // 0x0B17C0DE
+/// uint32_t Version; // Version, currently always 0.
+/// uint32_t BitcodeOffset; // Offset to traditional bitcode file.
+/// uint32_t BitcodeSize; // Size of traditional bitcode file.
+/// uint32_t CPUType; // CPU specifier.
+/// ... potentially more later ...
+/// };
+enum {
+ DarwinBCSizeFieldOffset = 3*4, // Offset to bitcode_size.
+ DarwinBCHeaderSize = 5*4
+};
+
+static void EmitDarwinBCHeader(BitstreamWriter &Stream,
+ const std::string &TT) {
+ unsigned CPUType = ~0U;
+
+ // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*. The CPUType is a
+ // magic number from /usr/include/mach/machine.h. It is ok to reproduce the
+ // specific constants here because they are implicitly part of the Darwin ABI.
+ enum {
+ DARWIN_CPU_ARCH_ABI64 = 0x01000000,
+ DARWIN_CPU_TYPE_X86 = 7,
+ DARWIN_CPU_TYPE_POWERPC = 18
+ };
+
+ if (TT.find("x86_64-") == 0)
+ CPUType = DARWIN_CPU_TYPE_X86 | DARWIN_CPU_ARCH_ABI64;
+ else if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' &&
+ TT[4] == '-' && TT[1] - '3' < 6)
+ CPUType = DARWIN_CPU_TYPE_X86;
+ else if (TT.find("powerpc-") == 0)
+ CPUType = DARWIN_CPU_TYPE_POWERPC;
+ else if (TT.find("powerpc64-") == 0)
+ CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64;
+
+ // Traditional Bitcode starts after header.
+ unsigned BCOffset = DarwinBCHeaderSize;
+
+ Stream.Emit(0x0B17C0DE, 32);
+ Stream.Emit(0 , 32); // Version.
+ Stream.Emit(BCOffset , 32);
+ Stream.Emit(0 , 32); // Filled in later.
+ Stream.Emit(CPUType , 32);
+}
+
+/// EmitDarwinBCTrailer - Emit the darwin epilog after the bitcode file and
+/// finalize the header.
+static void EmitDarwinBCTrailer(BitstreamWriter &Stream, unsigned BufferSize) {
+ // Update the size field in the header.
+ Stream.BackpatchWord(DarwinBCSizeFieldOffset, BufferSize-DarwinBCHeaderSize);
+
+ // If the file is not a multiple of 16 bytes, insert dummy padding.
+ while (BufferSize & 15) {
+ Stream.Emit(0, 8);
+ ++BufferSize;
+ }
+}
+
+
+/// WriteBitcodeToFile - Write the specified module to the specified output
+/// stream.
+void llvm::WriteBitcodeToFile(const Module *M, std::ostream &Out) {
+ raw_os_ostream RawOut(Out);
+ // If writing to stdout, set binary mode.
+ if (llvm::cout == Out)
+ sys::Program::ChangeStdoutToBinary();
+ WriteBitcodeToFile(M, RawOut);
+}
+
+/// WriteBitcodeToFile - Write the specified module to the specified output
+/// stream.
+void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) {
+ std::vector<unsigned char> Buffer;
+ BitstreamWriter Stream(Buffer);
+
+ Buffer.reserve(256*1024);
+
+ WriteBitcodeToStream( M, Stream );
+
+ // If writing to stdout, set binary mode.
+ if (&llvm::outs() == &Out)
+ sys::Program::ChangeStdoutToBinary();
+
+ // Write the generated bitstream to "Out".
+ Out.write((char*)&Buffer.front(), Buffer.size());
+
+ // Make sure it hits disk now.
+ Out.flush();
+}
+
+/// WriteBitcodeToStream - Write the specified module to the specified output
+/// stream.
+void llvm::WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream) {
+ // If this is darwin, emit a file header and trailer if needed.
+ bool isDarwin = M->getTargetTriple().find("-darwin") != std::string::npos;
+ if (isDarwin)
+ EmitDarwinBCHeader(Stream, M->getTargetTriple());
+
+ // Emit the file header.
+ Stream.Emit((unsigned)'B', 8);
+ Stream.Emit((unsigned)'C', 8);
+ Stream.Emit(0x0, 4);
+ Stream.Emit(0xC, 4);
+ Stream.Emit(0xE, 4);
+ Stream.Emit(0xD, 4);
+
+ // Emit the module.
+ WriteModule(M, Stream);
+
+ if (isDarwin)
+ EmitDarwinBCTrailer(Stream, Stream.getBuffer().size());
+}
diff --git a/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
new file mode 100644
index 0000000..209cf09
--- /dev/null
+++ b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -0,0 +1,56 @@
+//===--- Bitcode/Writer/BitcodeWriterPass.cpp - Bitcode Writer ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// BitcodeWriterPass implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+ class WriteBitcodePass : public ModulePass {
+ // FIXME: Kill off std::ostream
+ std::ostream *Out;
+ raw_ostream *RawOut; // raw_ostream to print on
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit WriteBitcodePass(std::ostream &o)
+ : ModulePass(&ID), Out(&o), RawOut(0) {}
+ explicit WriteBitcodePass(raw_ostream &o)
+ : ModulePass(&ID), Out(0), RawOut(&o) {}
+
+ const char *getPassName() const { return "Bitcode Writer"; }
+
+ bool runOnModule(Module &M) {
+ if (Out) {
+ WriteBitcodeToFile(&M, *Out);
+ } else {
+ WriteBitcodeToFile(&M, *RawOut);
+ }
+ return false;
+ }
+ };
+}
+
+char WriteBitcodePass::ID = 0;
+
+/// CreateBitcodeWriterPass - Create and return a pass that writes the module
+/// to the specified ostream.
+ModulePass *llvm::CreateBitcodeWriterPass(std::ostream &Str) {
+ return new WriteBitcodePass(Str);
+}
+
+
+/// createBitcodeWriterPass - Create and return a pass that writes the module
+/// to the specified ostream.
+ModulePass *llvm::createBitcodeWriterPass(raw_ostream &Str) {
+ return new WriteBitcodePass(Str);
+}
diff --git a/lib/Bitcode/Writer/CMakeLists.txt b/lib/Bitcode/Writer/CMakeLists.txt
new file mode 100644
index 0000000..ac5bb99
--- /dev/null
+++ b/lib/Bitcode/Writer/CMakeLists.txt
@@ -0,0 +1,9 @@
+add_llvm_library(LLVMBitWriter
+ BitWriter.cpp
+ BitcodeWriter.cpp
+ BitcodeWriterPass.cpp
+ Serialize.cpp
+ SerializeAPFloat.cpp
+ SerializeAPInt.cpp
+ ValueEnumerator.cpp
+ )
diff --git a/lib/Bitcode/Writer/Makefile b/lib/Bitcode/Writer/Makefile
new file mode 100644
index 0000000..7b0bd72
--- /dev/null
+++ b/lib/Bitcode/Writer/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Bitcode/Reader/Makefile -------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMBitWriter
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Bitcode/Writer/Serialize.cpp b/lib/Bitcode/Writer/Serialize.cpp
new file mode 100644
index 0000000..79464a6
--- /dev/null
+++ b/lib/Bitcode/Writer/Serialize.cpp
@@ -0,0 +1,118 @@
+//==- Serialize.cpp - Generic Object Serialization to Bitcode ----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the internal methods used for object serialization.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/Serialize.h"
+#include "string.h"
+
+#ifdef DEBUG_BACKPATCH
+#include "llvm/Support/Streams.h"
+#endif
+
+using namespace llvm;
+
+Serializer::Serializer(BitstreamWriter& stream)
+ : Stream(stream), BlockLevel(0) {}
+
+Serializer::~Serializer() {
+ if (inRecord())
+ EmitRecord();
+
+ while (BlockLevel > 0)
+ Stream.ExitBlock();
+
+ Stream.FlushToWord();
+}
+
+void Serializer::EmitRecord() {
+ assert(Record.size() > 0 && "Cannot emit empty record.");
+ Stream.EmitRecord(8,Record);
+ Record.clear();
+}
+
+void Serializer::EnterBlock(unsigned BlockID,unsigned CodeLen) {
+ FlushRecord();
+ Stream.EnterSubblock(BlockID,CodeLen);
+ ++BlockLevel;
+}
+
+void Serializer::ExitBlock() {
+ assert (BlockLevel > 0);
+ --BlockLevel;
+ FlushRecord();
+ Stream.ExitBlock();
+}
+
+void Serializer::EmitInt(uint64_t X) {
+ assert (BlockLevel > 0);
+ Record.push_back(X);
+}
+
+void Serializer::EmitSInt(int64_t X) {
+ if (X >= 0)
+ EmitInt(X << 1);
+ else
+ EmitInt((-X << 1) | 1);
+}
+
+void Serializer::EmitCStr(const char* s, const char* end) {
+ Record.push_back(end - s);
+
+ while(s != end) {
+ Record.push_back(*s);
+ ++s;
+ }
+}
+
+void Serializer::EmitCStr(const char* s) {
+ EmitCStr(s,s+strlen(s));
+}
+
+SerializedPtrID Serializer::getPtrId(const void* ptr) {
+ if (!ptr)
+ return 0;
+
+ MapTy::iterator I = PtrMap.find(ptr);
+
+ if (I == PtrMap.end()) {
+ unsigned id = PtrMap.size()+1;
+#ifdef DEBUG_BACKPATCH
+ llvm::cerr << "Registered PTR: " << ptr << " => " << id << "\n";
+#endif
+ PtrMap[ptr] = id;
+ return id;
+ }
+ else return I->second;
+}
+
+bool Serializer::isRegistered(const void* ptr) const {
+ MapTy::const_iterator I = PtrMap.find(ptr);
+ return I != PtrMap.end();
+}
+
+
+#define INT_EMIT(TYPE)\
+void SerializeTrait<TYPE>::Emit(Serializer&S, TYPE X) { S.EmitInt(X); }
+
+INT_EMIT(bool)
+INT_EMIT(unsigned char)
+INT_EMIT(unsigned short)
+INT_EMIT(unsigned int)
+INT_EMIT(unsigned long)
+
+#define SINT_EMIT(TYPE)\
+void SerializeTrait<TYPE>::Emit(Serializer&S, TYPE X) { S.EmitSInt(X); }
+
+SINT_EMIT(signed char)
+SINT_EMIT(signed short)
+SINT_EMIT(signed int)
+SINT_EMIT(signed long)
diff --git a/lib/Bitcode/Writer/SerializeAPFloat.cpp b/lib/Bitcode/Writer/SerializeAPFloat.cpp
new file mode 100644
index 0000000..25d954f
--- /dev/null
+++ b/lib/Bitcode/Writer/SerializeAPFloat.cpp
@@ -0,0 +1,21 @@
+//===-- SerializeAPInt.cpp - Serialization for APFloat ---------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements serialization of APFloat.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/Bitcode/Serialize.h"
+
+using namespace llvm;
+
+void APFloat::Emit(Serializer& S) const {
+ S.Emit(bitcastToAPInt());
+}
diff --git a/lib/Bitcode/Writer/SerializeAPInt.cpp b/lib/Bitcode/Writer/SerializeAPInt.cpp
new file mode 100644
index 0000000..47792c7
--- /dev/null
+++ b/lib/Bitcode/Writer/SerializeAPInt.cpp
@@ -0,0 +1,31 @@
+//===-- SerializeAPInt.cpp - Serialization for APInts ----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements serialization of APInts.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/Bitcode/Serialize.h"
+#include <cassert>
+
+using namespace llvm;
+
+void APInt::Emit(Serializer& S) const {
+ S.EmitInt(BitWidth);
+
+ if (isSingleWord())
+ S.EmitInt(VAL);
+ else {
+ uint32_t NumWords = getNumWords();
+ S.EmitInt(NumWords);
+ for (unsigned i = 0; i < NumWords; ++i)
+ S.EmitInt(pVal[i]);
+ }
+}
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
new file mode 100644
index 0000000..8002a36
--- /dev/null
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -0,0 +1,347 @@
+//===-- ValueEnumerator.cpp - Number values and types for bitcode writer --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ValueEnumerator class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ValueEnumerator.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/MDNode.h"
+#include "llvm/Module.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/Instructions.h"
+#include <algorithm>
+using namespace llvm;
+
+static bool isSingleValueType(const std::pair<const llvm::Type*,
+ unsigned int> &P) {
+ return P.first->isSingleValueType();
+}
+
+static bool isIntegerValue(const std::pair<const Value*, unsigned> &V) {
+ return isa<IntegerType>(V.first->getType());
+}
+
+static bool CompareByFrequency(const std::pair<const llvm::Type*,
+ unsigned int> &P1,
+ const std::pair<const llvm::Type*,
+ unsigned int> &P2) {
+ return P1.second > P2.second;
+}
+
+/// ValueEnumerator - Enumerate module-level information.
+ValueEnumerator::ValueEnumerator(const Module *M) {
+ // Enumerate the global variables.
+ for (Module::const_global_iterator I = M->global_begin(),
+ E = M->global_end(); I != E; ++I)
+ EnumerateValue(I);
+
+ // Enumerate the functions.
+ for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+ EnumerateValue(I);
+ EnumerateAttributes(cast<Function>(I)->getAttributes());
+ }
+
+ // Enumerate the aliases.
+ for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E; ++I)
+ EnumerateValue(I);
+
+ // Remember what is the cutoff between globalvalue's and other constants.
+ unsigned FirstConstant = Values.size();
+
+ // Enumerate the global variable initializers.
+ for (Module::const_global_iterator I = M->global_begin(),
+ E = M->global_end(); I != E; ++I)
+ if (I->hasInitializer())
+ EnumerateValue(I->getInitializer());
+
+ // Enumerate the aliasees.
+ for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E; ++I)
+ EnumerateValue(I->getAliasee());
+
+ // Enumerate types used by the type symbol table.
+ EnumerateTypeSymbolTable(M->getTypeSymbolTable());
+
+ // Insert constants that are named at module level into the slot pool so that
+ // the module symbol table can refer to them...
+ EnumerateValueSymbolTable(M->getValueSymbolTable());
+
+ // Enumerate types used by function bodies and argument lists.
+ for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
+
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I)
+ EnumerateType(I->getType());
+
+ for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;++I){
+ for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
+ OI != E; ++OI)
+ EnumerateOperandType(*OI);
+ EnumerateType(I->getType());
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ EnumerateAttributes(CI->getAttributes());
+ else if (const InvokeInst *II = dyn_cast<InvokeInst>(I))
+ EnumerateAttributes(II->getAttributes());
+ }
+ }
+
+ // Optimize constant ordering.
+ OptimizeConstants(FirstConstant, Values.size());
+
+ // Sort the type table by frequency so that most commonly used types are early
+ // in the table (have low bit-width).
+ std::stable_sort(Types.begin(), Types.end(), CompareByFrequency);
+
+ // Partition the Type ID's so that the single-value types occur before the
+ // aggregate types. This allows the aggregate types to be dropped from the
+ // type table after parsing the global variable initializers.
+ std::partition(Types.begin(), Types.end(), isSingleValueType);
+
+ // Now that we rearranged the type table, rebuild TypeMap.
+ for (unsigned i = 0, e = Types.size(); i != e; ++i)
+ TypeMap[Types[i].first] = i+1;
+}
+
+// Optimize constant ordering.
+namespace {
+ struct CstSortPredicate {
+ ValueEnumerator &VE;
+ explicit CstSortPredicate(ValueEnumerator &ve) : VE(ve) {}
+ bool operator()(const std::pair<const Value*, unsigned> &LHS,
+ const std::pair<const Value*, unsigned> &RHS) {
+ // Sort by plane.
+ if (LHS.first->getType() != RHS.first->getType())
+ return VE.getTypeID(LHS.first->getType()) <
+ VE.getTypeID(RHS.first->getType());
+ // Then by frequency.
+ return LHS.second > RHS.second;
+ }
+ };
+}
+
+/// OptimizeConstants - Reorder constant pool for denser encoding.
+void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
+ if (CstStart == CstEnd || CstStart+1 == CstEnd) return;
+
+ CstSortPredicate P(*this);
+ std::stable_sort(Values.begin()+CstStart, Values.begin()+CstEnd, P);
+
+ // Ensure that integer constants are at the start of the constant pool. This
+ // is important so that GEP structure indices come before gep constant exprs.
+ std::partition(Values.begin()+CstStart, Values.begin()+CstEnd,
+ isIntegerValue);
+
+ // Rebuild the modified portion of ValueMap.
+ for (; CstStart != CstEnd; ++CstStart)
+ ValueMap[Values[CstStart].first] = CstStart+1;
+}
+
+
+/// EnumerateTypeSymbolTable - Insert all of the types in the specified symbol
+/// table.
+void ValueEnumerator::EnumerateTypeSymbolTable(const TypeSymbolTable &TST) {
+ for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end();
+ TI != TE; ++TI)
+ EnumerateType(TI->second);
+}
+
+/// EnumerateValueSymbolTable - Insert all of the values in the specified symbol
+/// table into the values table.
+void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
+ for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end();
+ VI != VE; ++VI)
+ EnumerateValue(VI->getValue());
+}
+
+void ValueEnumerator::EnumerateValue(const Value *V) {
+ assert(V->getType() != Type::VoidTy && "Can't insert void values!");
+
+ // Check to see if it's already in!
+ unsigned &ValueID = ValueMap[V];
+ if (ValueID) {
+ // Increment use count.
+ Values[ValueID-1].second++;
+ return;
+ }
+
+ // Enumerate the type of this value.
+ EnumerateType(V->getType());
+
+ if (const Constant *C = dyn_cast<Constant>(V)) {
+ if (isa<GlobalValue>(C)) {
+ // Initializers for globals are handled explicitly elsewhere.
+ } else if (isa<ConstantArray>(C) && cast<ConstantArray>(C)->isString()) {
+ // Do not enumerate the initializers for an array of simple characters.
+ // The initializers just polute the value table, and we emit the strings
+ // specially.
+ } else if (C->getNumOperands()) {
+ // If a constant has operands, enumerate them. This makes sure that if a
+ // constant has uses (for example an array of const ints), that they are
+ // inserted also.
+
+ // We prefer to enumerate them with values before we enumerate the user
+ // itself. This makes it more likely that we can avoid forward references
+ // in the reader. We know that there can be no cycles in the constants
+ // graph that don't go through a global variable.
+ for (User::const_op_iterator I = C->op_begin(), E = C->op_end();
+ I != E; ++I)
+ EnumerateValue(*I);
+
+ // Finally, add the value. Doing this could make the ValueID reference be
+ // dangling, don't reuse it.
+ Values.push_back(std::make_pair(V, 1U));
+ ValueMap[V] = Values.size();
+ return;
+ } else if (const MDNode *N = dyn_cast<MDNode>(C)) {
+ for (MDNode::const_elem_iterator I = N->elem_begin(), E = N->elem_end();
+ I != E; ++I) {
+ if (*I)
+ EnumerateValue(*I);
+ else
+ EnumerateType(Type::VoidTy);
+ }
+
+ Values.push_back(std::make_pair(V, 1U));
+ ValueMap[V] = Values.size();
+ return;
+ }
+ }
+
+ // Add the value.
+ Values.push_back(std::make_pair(V, 1U));
+ ValueID = Values.size();
+}
+
+
+void ValueEnumerator::EnumerateType(const Type *Ty) {
+ unsigned &TypeID = TypeMap[Ty];
+
+ if (TypeID) {
+ // If we've already seen this type, just increase its occurrence count.
+ Types[TypeID-1].second++;
+ return;
+ }
+
+ // First time we saw this type, add it.
+ Types.push_back(std::make_pair(Ty, 1U));
+ TypeID = Types.size();
+
+ // Enumerate subtypes.
+ for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+ I != E; ++I)
+ EnumerateType(*I);
+}
+
+// Enumerate the types for the specified value. If the value is a constant,
+// walk through it, enumerating the types of the constant.
+void ValueEnumerator::EnumerateOperandType(const Value *V) {
+ EnumerateType(V->getType());
+ if (const Constant *C = dyn_cast<Constant>(V)) {
+ // If this constant is already enumerated, ignore it, we know its type must
+ // be enumerated.
+ if (ValueMap.count(V)) return;
+
+ // This constant may have operands, make sure to enumerate the types in
+ // them.
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
+ EnumerateOperandType(C->getOperand(i));
+
+ if (const MDNode *N = dyn_cast<MDNode>(V)) {
+ for (unsigned i = 0, e = N->getNumElements(); i != e; ++i)
+ EnumerateOperandType(N->getElement(i));
+ }
+ }
+}
+
+void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) {
+ if (PAL.isEmpty()) return; // null is always 0.
+ // Do a lookup.
+ unsigned &Entry = AttributeMap[PAL.getRawPointer()];
+ if (Entry == 0) {
+ // Never saw this before, add it.
+ Attributes.push_back(PAL);
+ Entry = Attributes.size();
+ }
+}
+
+
+/// PurgeAggregateValues - If there are any aggregate values at the end of the
+/// value list, remove them and return the count of the remaining values. If
+/// there are none, return -1.
+int ValueEnumerator::PurgeAggregateValues() {
+ // If there are no aggregate values at the end of the list, return -1.
+ if (Values.empty() || Values.back().first->getType()->isSingleValueType())
+ return -1;
+
+ // Otherwise, remove aggregate values...
+ while (!Values.empty() && !Values.back().first->getType()->isSingleValueType())
+ Values.pop_back();
+
+ // ... and return the new size.
+ return Values.size();
+}
+
+void ValueEnumerator::incorporateFunction(const Function &F) {
+ NumModuleValues = Values.size();
+
+ // Adding function arguments to the value table.
+ for(Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
+ I != E; ++I)
+ EnumerateValue(I);
+
+ FirstFuncConstantID = Values.size();
+
+ // Add all function-level constants to the value table.
+ for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I)
+ for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
+ OI != E; ++OI) {
+ if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
+ isa<InlineAsm>(*OI))
+ EnumerateValue(*OI);
+ }
+ BasicBlocks.push_back(BB);
+ ValueMap[BB] = BasicBlocks.size();
+ }
+
+ // Optimize the constant layout.
+ OptimizeConstants(FirstFuncConstantID, Values.size());
+
+ // Add the function's parameter attributes so they are available for use in
+ // the function's instruction.
+ EnumerateAttributes(F.getAttributes());
+
+ FirstInstID = Values.size();
+
+ // Add all of the instructions.
+ for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
+ if (I->getType() != Type::VoidTy)
+ EnumerateValue(I);
+ }
+ }
+}
+
+void ValueEnumerator::purgeFunction() {
+ /// Remove purged values from the ValueMap.
+ for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i)
+ ValueMap.erase(Values[i].first);
+ for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i)
+ ValueMap.erase(BasicBlocks[i]);
+
+ Values.resize(NumModuleValues);
+ BasicBlocks.clear();
+}
+
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
new file mode 100644
index 0000000..bb0324b
--- /dev/null
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -0,0 +1,127 @@
+//===-- Bitcode/Writer/ValueEnumerator.h - Number values --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class gives values and types Unique ID's.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef VALUE_ENUMERATOR_H
+#define VALUE_ENUMERATOR_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Attributes.h"
+#include <vector>
+
+namespace llvm {
+
+class Type;
+class Value;
+class BasicBlock;
+class Function;
+class Module;
+class AttrListPtr;
+class TypeSymbolTable;
+class ValueSymbolTable;
+
+class ValueEnumerator {
+public:
+ // For each type, we remember its Type* and occurrence frequency.
+ typedef std::vector<std::pair<const Type*, unsigned> > TypeList;
+
+ // For each value, we remember its Value* and occurrence frequency.
+ typedef std::vector<std::pair<const Value*, unsigned> > ValueList;
+private:
+ typedef DenseMap<const Type*, unsigned> TypeMapType;
+ TypeMapType TypeMap;
+ TypeList Types;
+
+ typedef DenseMap<const Value*, unsigned> ValueMapType;
+ ValueMapType ValueMap;
+ ValueList Values;
+
+ typedef DenseMap<void*, unsigned> AttributeMapType;
+ AttributeMapType AttributeMap;
+ std::vector<AttrListPtr> Attributes;
+
+ /// BasicBlocks - This contains all the basic blocks for the currently
+ /// incorporated function. Their reverse mapping is stored in ValueMap.
+ std::vector<const BasicBlock*> BasicBlocks;
+
+ /// When a function is incorporated, this is the size of the Values list
+ /// before incorporation.
+ unsigned NumModuleValues;
+ unsigned FirstFuncConstantID;
+ unsigned FirstInstID;
+
+ ValueEnumerator(const ValueEnumerator &); // DO NOT IMPLEMENT
+ void operator=(const ValueEnumerator &); // DO NOT IMPLEMENT
+public:
+ ValueEnumerator(const Module *M);
+
+ unsigned getValueID(const Value *V) const {
+ ValueMapType::const_iterator I = ValueMap.find(V);
+ assert(I != ValueMap.end() && "Value not in slotcalculator!");
+ return I->second-1;
+ }
+
+ unsigned getTypeID(const Type *T) const {
+ TypeMapType::const_iterator I = TypeMap.find(T);
+ assert(I != TypeMap.end() && "Type not in ValueEnumerator!");
+ return I->second-1;
+ }
+
+ unsigned getAttributeID(const AttrListPtr &PAL) const {
+ if (PAL.isEmpty()) return 0; // Null maps to zero.
+ AttributeMapType::const_iterator I = AttributeMap.find(PAL.getRawPointer());
+ assert(I != AttributeMap.end() && "Attribute not in ValueEnumerator!");
+ return I->second;
+ }
+
+ /// getFunctionConstantRange - Return the range of values that corresponds to
+ /// function-local constants.
+ void getFunctionConstantRange(unsigned &Start, unsigned &End) const {
+ Start = FirstFuncConstantID;
+ End = FirstInstID;
+ }
+
+ const ValueList &getValues() const { return Values; }
+ const TypeList &getTypes() const { return Types; }
+ const std::vector<const BasicBlock*> &getBasicBlocks() const {
+ return BasicBlocks;
+ }
+ const std::vector<AttrListPtr> &getAttributes() const {
+ return Attributes;
+ }
+
+ /// PurgeAggregateValues - If there are any aggregate values at the end of the
+ /// value list, remove them and return the count of the remaining values. If
+ /// there are none, return -1.
+ int PurgeAggregateValues();
+
+ /// incorporateFunction/purgeFunction - If you'd like to deal with a function,
+ /// use these two methods to get its data into the ValueEnumerator!
+ ///
+ void incorporateFunction(const Function &F);
+ void purgeFunction();
+
+private:
+ void OptimizeConstants(unsigned CstStart, unsigned CstEnd);
+
+ void EnumerateValue(const Value *V);
+ void EnumerateType(const Type *T);
+ void EnumerateOperandType(const Value *V);
+ void EnumerateAttributes(const AttrListPtr &PAL);
+
+ void EnumerateTypeSymbolTable(const TypeSymbolTable &ST);
+ void EnumerateValueSymbolTable(const ValueSymbolTable &ST);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
new file mode 100644
index 0000000..45462da
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -0,0 +1,1724 @@
+//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AsmPrinter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include <cerrno>
+using namespace llvm;
+
+static cl::opt<cl::boolOrDefault>
+AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
+ cl::init(cl::BOU_UNSET));
+
+char AsmPrinter::ID = 0;
+AsmPrinter::AsmPrinter(raw_ostream &o, TargetMachine &tm,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL, bool VDef)
+ : MachineFunctionPass(&ID), FunctionNumber(0), OptLevel(OL), O(o),
+ TM(tm), TAI(T), TRI(tm.getRegisterInfo()),
+ IsInTextSection(false)
+{
+ switch (AsmVerbose) {
+ case cl::BOU_UNSET: VerboseAsm = VDef; break;
+ case cl::BOU_TRUE: VerboseAsm = true; break;
+ case cl::BOU_FALSE: VerboseAsm = false; break;
+ }
+}
+
+AsmPrinter::~AsmPrinter() {
+ for (gcp_iterator I = GCMetadataPrinters.begin(),
+ E = GCMetadataPrinters.end(); I != E; ++I)
+ delete I->second;
+}
+
+/// SwitchToTextSection - Switch to the specified text section of the executable
+/// if we are not already in it!
+///
+void AsmPrinter::SwitchToTextSection(const char *NewSection,
+ const GlobalValue *GV) {
+ std::string NS;
+ if (GV && GV->hasSection())
+ NS = TAI->getSwitchToSectionDirective() + GV->getSection();
+ else
+ NS = NewSection;
+
+ // If we're already in this section, we're done.
+ if (CurrentSection == NS) return;
+
+ // Close the current section, if applicable.
+ if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty())
+ O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n';
+
+ CurrentSection = NS;
+
+ if (!CurrentSection.empty())
+ O << CurrentSection << TAI->getTextSectionStartSuffix() << '\n';
+
+ IsInTextSection = true;
+}
+
+/// SwitchToDataSection - Switch to the specified data section of the executable
+/// if we are not already in it!
+///
+void AsmPrinter::SwitchToDataSection(const char *NewSection,
+ const GlobalValue *GV) {
+ std::string NS;
+ if (GV && GV->hasSection())
+ NS = TAI->getSwitchToSectionDirective() + GV->getSection();
+ else
+ NS = NewSection;
+
+ // If we're already in this section, we're done.
+ if (CurrentSection == NS) return;
+
+ // Close the current section, if applicable.
+ if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty())
+ O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n';
+
+ CurrentSection = NS;
+
+ if (!CurrentSection.empty())
+ O << CurrentSection << TAI->getDataSectionStartSuffix() << '\n';
+
+ IsInTextSection = false;
+}
+
+/// SwitchToSection - Switch to the specified section of the executable if we
+/// are not already in it!
+void AsmPrinter::SwitchToSection(const Section* NS) {
+ const std::string& NewSection = NS->getName();
+
+ // If we're already in this section, we're done.
+ if (CurrentSection == NewSection) return;
+
+ // Close the current section, if applicable.
+ if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty())
+ O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n';
+
+ // FIXME: Make CurrentSection a Section* in the future
+ CurrentSection = NewSection;
+ CurrentSection_ = NS;
+
+ if (!CurrentSection.empty()) {
+ // If section is named we need to switch into it via special '.section'
+ // directive and also append funky flags. Otherwise - section name is just
+ // some magic assembler directive.
+ if (NS->isNamed())
+ O << TAI->getSwitchToSectionDirective()
+ << CurrentSection
+ << TAI->getSectionFlags(NS->getFlags());
+ else
+ O << CurrentSection;
+ O << TAI->getDataSectionStartSuffix() << '\n';
+ }
+
+ IsInTextSection = (NS->getFlags() & SectionFlags::Code);
+}
+
+void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<GCModuleInfo>();
+}
+
+bool AsmPrinter::doInitialization(Module &M) {
+ Mang = new Mangler(M, TAI->getGlobalPrefix(), TAI->getPrivateGlobalPrefix());
+
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+
+ if (TAI->hasSingleParameterDotFile()) {
+ /* Very minimal debug info. It is ignored if we emit actual
+ debug info. If we don't, this at helps the user find where
+ a function came from. */
+ O << "\t.file\t\"" << M.getModuleIdentifier() << "\"\n";
+ }
+
+ for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
+ if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I))
+ MP->beginAssembly(O, *this, *TAI);
+
+ if (!M.getModuleInlineAsm().empty())
+ O << TAI->getCommentString() << " Start of file scope inline assembly\n"
+ << M.getModuleInlineAsm()
+ << '\n' << TAI->getCommentString()
+ << " End of file scope inline assembly\n";
+
+ SwitchToDataSection(""); // Reset back to no section.
+
+ MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ if (MMI) MMI->AnalyzeModule(M);
+ DW = getAnalysisIfAvailable<DwarfWriter>();
+ return false;
+}
+
+bool AsmPrinter::doFinalization(Module &M) {
+ if (TAI->getWeakRefDirective()) {
+ if (!ExtWeakSymbols.empty())
+ SwitchToDataSection("");
+
+ for (std::set<const GlobalValue*>::iterator i = ExtWeakSymbols.begin(),
+ e = ExtWeakSymbols.end(); i != e; ++i)
+ O << TAI->getWeakRefDirective() << Mang->getValueName(*i) << '\n';
+ }
+
+ if (TAI->getSetDirective()) {
+ if (!M.alias_empty())
+ SwitchToSection(TAI->getTextSection());
+
+ O << '\n';
+ for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I!=E; ++I) {
+ std::string Name = Mang->getValueName(I);
+ std::string Target;
+
+ const GlobalValue *GV = cast<GlobalValue>(I->getAliasedGlobal());
+ Target = Mang->getValueName(GV);
+
+ if (I->hasExternalLinkage() || !TAI->getWeakRefDirective())
+ O << "\t.globl\t" << Name << '\n';
+ else if (I->hasWeakLinkage())
+ O << TAI->getWeakRefDirective() << Name << '\n';
+ else if (!I->hasLocalLinkage())
+ assert(0 && "Invalid alias linkage");
+
+ printVisibility(Name, I->getVisibility());
+
+ O << TAI->getSetDirective() << ' ' << Name << ", " << Target << '\n';
+ }
+ }
+
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+ for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; )
+ if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I))
+ MP->finishAssembly(O, *this, *TAI);
+
+ // If we don't have any trampolines, then we don't require stack memory
+ // to be executable. Some targets have a directive to declare this.
+ Function* InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
+ if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
+ if (TAI->getNonexecutableStackDirective())
+ O << TAI->getNonexecutableStackDirective() << '\n';
+
+ delete Mang; Mang = 0;
+ return false;
+}
+
+const std::string &
+AsmPrinter::getCurrentFunctionEHName(const MachineFunction *MF,
+ std::string &Name) const {
+ assert(MF && "No machine function?");
+ Name = MF->getFunction()->getName();
+ if (Name.empty())
+ Name = Mang->getValueName(MF->getFunction());
+ Name = Mang->makeNameProper(TAI->getEHGlobalPrefix() +
+ Name + ".eh", TAI->getGlobalPrefix());
+ return Name;
+}
+
+void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
+ // What's my mangled name?
+ CurrentFnName = Mang->getValueName(MF.getFunction());
+ IncrementFunctionNumber();
+}
+
+namespace {
+ // SectionCPs - Keep track the alignment, constpool entries per Section.
+ struct SectionCPs {
+ const Section *S;
+ unsigned Alignment;
+ SmallVector<unsigned, 4> CPEs;
+ SectionCPs(const Section *s, unsigned a) : S(s), Alignment(a) {};
+ };
+}
+
+/// EmitConstantPool - Print to the current output stream assembly
+/// representations of the constants in the constant pool MCP. This is
+/// used to print out constants which have been "spilled to memory" by
+/// the code generator.
+///
+void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) {
+ const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+ if (CP.empty()) return;
+
+ // Calculate sections for constant pool entries. We collect entries to go into
+ // the same section together to reduce amount of section switch statements.
+ SmallVector<SectionCPs, 4> CPSections;
+ for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+ MachineConstantPoolEntry CPE = CP[i];
+ unsigned Align = CPE.getAlignment();
+ const Section* S = TAI->SelectSectionForMachineConst(CPE.getType());
+ // The number of sections are small, just do a linear search from the
+ // last section to the first.
+ bool Found = false;
+ unsigned SecIdx = CPSections.size();
+ while (SecIdx != 0) {
+ if (CPSections[--SecIdx].S == S) {
+ Found = true;
+ break;
+ }
+ }
+ if (!Found) {
+ SecIdx = CPSections.size();
+ CPSections.push_back(SectionCPs(S, Align));
+ }
+
+ if (Align > CPSections[SecIdx].Alignment)
+ CPSections[SecIdx].Alignment = Align;
+ CPSections[SecIdx].CPEs.push_back(i);
+ }
+
+ // Now print stuff into the calculated sections.
+ for (unsigned i = 0, e = CPSections.size(); i != e; ++i) {
+ SwitchToSection(CPSections[i].S);
+ EmitAlignment(Log2_32(CPSections[i].Alignment));
+
+ unsigned Offset = 0;
+ for (unsigned j = 0, ee = CPSections[i].CPEs.size(); j != ee; ++j) {
+ unsigned CPI = CPSections[i].CPEs[j];
+ MachineConstantPoolEntry CPE = CP[CPI];
+
+ // Emit inter-object padding for alignment.
+ unsigned AlignMask = CPE.getAlignment() - 1;
+ unsigned NewOffset = (Offset + AlignMask) & ~AlignMask;
+ EmitZeros(NewOffset - Offset);
+
+ const Type *Ty = CPE.getType();
+ Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty);
+
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+ << CPI << ":\t\t\t\t\t";
+ if (VerboseAsm) {
+ O << TAI->getCommentString() << ' ';
+ WriteTypeSymbolic(O, CPE.getType(), 0);
+ }
+ O << '\n';
+ if (CPE.isMachineConstantPoolEntry())
+ EmitMachineConstantPoolValue(CPE.Val.MachineCPVal);
+ else
+ EmitGlobalConstant(CPE.Val.ConstVal);
+ }
+ }
+}
+
+/// EmitJumpTableInfo - Print assembly representations of the jump tables used
+/// by the current function to the current output stream.
+///
+void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI,
+ MachineFunction &MF) {
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty()) return;
+
+ bool IsPic = TM.getRelocationModel() == Reloc::PIC_;
+
+ // Pick the directive to use to print the jump table entries, and switch to
+ // the appropriate section.
+ TargetLowering *LoweringInfo = TM.getTargetLowering();
+
+ const char* JumpTableDataSection = TAI->getJumpTableDataSection();
+ const Function *F = MF.getFunction();
+ unsigned SectionFlags = TAI->SectionFlagsForGlobal(F);
+ if ((IsPic && !(LoweringInfo && LoweringInfo->usesGlobalOffsetTable())) ||
+ !JumpTableDataSection ||
+ SectionFlags & SectionFlags::Linkonce) {
+ // In PIC mode, we need to emit the jump table to the same section as the
+ // function body itself, otherwise the label differences won't make sense.
+ // We should also do if the section name is NULL or function is declared in
+ // discardable section.
+ SwitchToSection(TAI->SectionForGlobal(F));
+ } else {
+ SwitchToDataSection(JumpTableDataSection);
+ }
+
+ EmitAlignment(Log2_32(MJTI->getAlignment()));
+
+ for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[i].MBBs;
+
+ // If this jump table was deleted, ignore it.
+ if (JTBBs.empty()) continue;
+
+ // For PIC codegen, if possible we want to use the SetDirective to reduce
+ // the number of relocations the assembler will generate for the jump table.
+ // Set directives are all printed before the jump table itself.
+ SmallPtrSet<MachineBasicBlock*, 16> EmittedSets;
+ if (TAI->getSetDirective() && IsPic)
+ for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
+ if (EmittedSets.insert(JTBBs[ii]))
+ printPICJumpTableSetLabel(i, JTBBs[ii]);
+
+ // On some targets (e.g. darwin) we want to emit two consequtive labels
+ // before each jump table. The first label is never referenced, but tells
+ // the assembler and linker the extents of the jump table object. The
+ // second label is actually referenced by the code.
+ if (const char *JTLabelPrefix = TAI->getJumpTableSpecialLabelPrefix())
+ O << JTLabelPrefix << "JTI" << getFunctionNumber() << '_' << i << ":\n";
+
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << i << ":\n";
+
+ for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
+ printPICJumpTableEntry(MJTI, JTBBs[ii], i);
+ O << '\n';
+ }
+ }
+}
+
+void AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned uid) const {
+ bool IsPic = TM.getRelocationModel() == Reloc::PIC_;
+
+ // Use JumpTableDirective otherwise honor the entry size from the jump table
+ // info.
+ const char *JTEntryDirective = TAI->getJumpTableDirective();
+ bool HadJTEntryDirective = JTEntryDirective != NULL;
+ if (!HadJTEntryDirective) {
+ JTEntryDirective = MJTI->getEntrySize() == 4 ?
+ TAI->getData32bitsDirective() : TAI->getData64bitsDirective();
+ }
+
+ O << JTEntryDirective << ' ';
+
+ // If we have emitted set directives for the jump table entries, print
+ // them rather than the entries themselves. If we're emitting PIC, then
+ // emit the table entries as differences between two text section labels.
+ // If we're emitting non-PIC code, then emit the entries as direct
+ // references to the target basic blocks.
+ if (IsPic) {
+ if (TAI->getSetDirective()) {
+ O << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
+ << '_' << uid << "_set_" << MBB->getNumber();
+ } else {
+ printBasicBlockLabel(MBB, false, false, false);
+ // If the arch uses custom Jump Table directives, don't calc relative to
+ // JT
+ if (!HadJTEntryDirective)
+ O << '-' << TAI->getPrivateGlobalPrefix() << "JTI"
+ << getFunctionNumber() << '_' << uid;
+ }
+ } else {
+ printBasicBlockLabel(MBB, false, false, false);
+ }
+}
+
+
+/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
+/// special global used by LLVM. If so, emit it and return true, otherwise
+/// do nothing and return false.
+bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
+ if (GV->getName() == "llvm.used") {
+ if (TAI->getUsedDirective() != 0) // No need to emit this at all.
+ EmitLLVMUsedList(GV->getInitializer());
+ return true;
+ }
+
+ // Ignore debug and non-emitted data.
+ if (GV->getSection() == "llvm.metadata" ||
+ GV->hasAvailableExternallyLinkage())
+ return true;
+
+ if (!GV->hasAppendingLinkage()) return false;
+
+ assert(GV->hasInitializer() && "Not a special LLVM global!");
+
+ const TargetData *TD = TM.getTargetData();
+ unsigned Align = Log2_32(TD->getPointerPrefAlignment());
+ if (GV->getName() == "llvm.global_ctors") {
+ SwitchToDataSection(TAI->getStaticCtorsSection());
+ EmitAlignment(Align, 0);
+ EmitXXStructorList(GV->getInitializer());
+ return true;
+ }
+
+ if (GV->getName() == "llvm.global_dtors") {
+ SwitchToDataSection(TAI->getStaticDtorsSection());
+ EmitAlignment(Align, 0);
+ EmitXXStructorList(GV->getInitializer());
+ return true;
+ }
+
+ return false;
+}
+
+/// findGlobalValue - if CV is an expression equivalent to a single
+/// global value, return that value.
+const GlobalValue * AsmPrinter::findGlobalValue(const Constant *CV) {
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
+ return GV;
+ else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ const TargetData *TD = TM.getTargetData();
+ unsigned Opcode = CE->getOpcode();
+ switch (Opcode) {
+ case Instruction::GetElementPtr: {
+ const Constant *ptrVal = CE->getOperand(0);
+ SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
+ if (TD->getIndexedOffset(ptrVal->getType(), &idxVec[0], idxVec.size()))
+ return 0;
+ return findGlobalValue(ptrVal);
+ }
+ case Instruction::BitCast:
+ return findGlobalValue(CE->getOperand(0));
+ default:
+ return 0;
+ }
+ }
+ return 0;
+}
+
+/// EmitLLVMUsedList - For targets that define a TAI::UsedDirective, mark each
+/// global in the specified llvm.used list for which emitUsedDirectiveFor
+/// is true, as being used with this directive.
+
+void AsmPrinter::EmitLLVMUsedList(Constant *List) {
+ const char *Directive = TAI->getUsedDirective();
+
+ // Should be an array of 'sbyte*'.
+ ConstantArray *InitList = dyn_cast<ConstantArray>(List);
+ if (InitList == 0) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+ const GlobalValue *GV = findGlobalValue(InitList->getOperand(i));
+ if (TAI->emitUsedDirectiveFor(GV, Mang)) {
+ O << Directive;
+ EmitConstantValueOnly(InitList->getOperand(i));
+ O << '\n';
+ }
+ }
+}
+
+/// EmitXXStructorList - Emit the ctor or dtor list. This just prints out the
+/// function pointers, ignoring the init priority.
+void AsmPrinter::EmitXXStructorList(Constant *List) {
+ // Should be an array of '{ int, void ()* }' structs. The first value is the
+ // init priority, which we ignore.
+ if (!isa<ConstantArray>(List)) return;
+ ConstantArray *InitList = cast<ConstantArray>(List);
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
+ if (CS->getNumOperands() != 2) return; // Not array of 2-element structs.
+
+ if (CS->getOperand(1)->isNullValue())
+ return; // Found a null terminator, exit printing.
+ // Emit the function pointer.
+ EmitGlobalConstant(CS->getOperand(1));
+ }
+}
+
+/// getGlobalLinkName - Returns the asm/link name of of the specified
+/// global variable. Should be overridden by each target asm printer to
+/// generate the appropriate value.
+const std::string &AsmPrinter::getGlobalLinkName(const GlobalVariable *GV,
+ std::string &LinkName) const {
+ if (isa<Function>(GV)) {
+ LinkName += TAI->getFunctionAddrPrefix();
+ LinkName += Mang->getValueName(GV);
+ LinkName += TAI->getFunctionAddrSuffix();
+ } else {
+ LinkName += TAI->getGlobalVarAddrPrefix();
+ LinkName += Mang->getValueName(GV);
+ LinkName += TAI->getGlobalVarAddrSuffix();
+ }
+
+ return LinkName;
+}
+
+/// EmitExternalGlobal - Emit the external reference to a global variable.
+/// Should be overridden if an indirect reference should be used.
+void AsmPrinter::EmitExternalGlobal(const GlobalVariable *GV) {
+ std::string GLN;
+ O << getGlobalLinkName(GV, GLN);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+/// LEB 128 number encoding.
+
+/// PrintULEB128 - Print a series of hexidecimal values (separated by commas)
+/// representing an unsigned leb128 value.
+void AsmPrinter::PrintULEB128(unsigned Value) const {
+ char Buffer[20];
+ do {
+ unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
+ Value >>= 7;
+ if (Value) Byte |= 0x80;
+ O << "0x" << utohex_buffer(Byte, Buffer+20);
+ if (Value) O << ", ";
+ } while (Value);
+}
+
+/// PrintSLEB128 - Print a series of hexidecimal values (separated by commas)
+/// representing a signed leb128 value.
+void AsmPrinter::PrintSLEB128(int Value) const {
+ int Sign = Value >> (8 * sizeof(Value) - 1);
+ bool IsMore;
+ char Buffer[20];
+
+ do {
+ unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
+ Value >>= 7;
+ IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+ if (IsMore) Byte |= 0x80;
+ O << "0x" << utohex_buffer(Byte, Buffer+20);
+ if (IsMore) O << ", ";
+ } while (IsMore);
+}
+
+//===--------------------------------------------------------------------===//
+// Emission and print routines
+//
+
+/// PrintHex - Print a value as a hexidecimal value.
+///
+void AsmPrinter::PrintHex(int Value) const {
+ char Buffer[20];
+ O << "0x" << utohex_buffer(static_cast<unsigned>(Value), Buffer+20);
+}
+
+/// EOL - Print a newline character to asm stream. If a comment is present
+/// then it will be printed first. Comments should not contain '\n'.
+void AsmPrinter::EOL() const {
+ O << '\n';
+}
+
+void AsmPrinter::EOL(const std::string &Comment) const {
+ if (VerboseAsm && !Comment.empty()) {
+ O << '\t'
+ << TAI->getCommentString()
+ << ' '
+ << Comment;
+ }
+ O << '\n';
+}
+
+void AsmPrinter::EOL(const char* Comment) const {
+ if (VerboseAsm && *Comment) {
+ O << '\t'
+ << TAI->getCommentString()
+ << ' '
+ << Comment;
+ }
+ O << '\n';
+}
+
+/// EmitULEB128Bytes - Emit an assembler byte data directive to compose an
+/// unsigned leb128 value.
+void AsmPrinter::EmitULEB128Bytes(unsigned Value) const {
+ if (TAI->hasLEB128()) {
+ O << "\t.uleb128\t"
+ << Value;
+ } else {
+ O << TAI->getData8bitsDirective();
+ PrintULEB128(Value);
+ }
+}
+
+/// EmitSLEB128Bytes - print an assembler byte data directive to compose a
+/// signed leb128 value.
+void AsmPrinter::EmitSLEB128Bytes(int Value) const {
+ if (TAI->hasLEB128()) {
+ O << "\t.sleb128\t"
+ << Value;
+ } else {
+ O << TAI->getData8bitsDirective();
+ PrintSLEB128(Value);
+ }
+}
+
+/// EmitInt8 - Emit a byte directive and value.
+///
+void AsmPrinter::EmitInt8(int Value) const {
+ O << TAI->getData8bitsDirective();
+ PrintHex(Value & 0xFF);
+}
+
+/// EmitInt16 - Emit a short directive and value.
+///
+void AsmPrinter::EmitInt16(int Value) const {
+ O << TAI->getData16bitsDirective();
+ PrintHex(Value & 0xFFFF);
+}
+
+/// EmitInt32 - Emit a long directive and value.
+///
+void AsmPrinter::EmitInt32(int Value) const {
+ O << TAI->getData32bitsDirective();
+ PrintHex(Value);
+}
+
+/// EmitInt64 - Emit a long long directive and value.
+///
+void AsmPrinter::EmitInt64(uint64_t Value) const {
+ if (TAI->getData64bitsDirective()) {
+ O << TAI->getData64bitsDirective();
+ PrintHex(Value);
+ } else {
+ if (TM.getTargetData()->isBigEndian()) {
+ EmitInt32(unsigned(Value >> 32)); O << '\n';
+ EmitInt32(unsigned(Value));
+ } else {
+ EmitInt32(unsigned(Value)); O << '\n';
+ EmitInt32(unsigned(Value >> 32));
+ }
+ }
+}
+
+/// toOctal - Convert the low order bits of X into an octal digit.
+///
+static inline char toOctal(int X) {
+ return (X&7)+'0';
+}
+
+/// printStringChar - Print a char, escaped if necessary.
+///
+static void printStringChar(raw_ostream &O, unsigned char C) {
+ if (C == '"') {
+ O << "\\\"";
+ } else if (C == '\\') {
+ O << "\\\\";
+ } else if (isprint((unsigned char)C)) {
+ O << C;
+ } else {
+ switch(C) {
+ case '\b': O << "\\b"; break;
+ case '\f': O << "\\f"; break;
+ case '\n': O << "\\n"; break;
+ case '\r': O << "\\r"; break;
+ case '\t': O << "\\t"; break;
+ default:
+ O << '\\';
+ O << toOctal(C >> 6);
+ O << toOctal(C >> 3);
+ O << toOctal(C >> 0);
+ break;
+ }
+ }
+}
+
+/// EmitString - Emit a string with quotes and a null terminator.
+/// Special characters are emitted properly.
+/// \literal (Eg. '\t') \endliteral
+void AsmPrinter::EmitString(const std::string &String) const {
+ EmitString(String.c_str(), String.size());
+}
+
+void AsmPrinter::EmitString(const char *String, unsigned Size) const {
+ const char* AscizDirective = TAI->getAscizDirective();
+ if (AscizDirective)
+ O << AscizDirective;
+ else
+ O << TAI->getAsciiDirective();
+ O << '\"';
+ for (unsigned i = 0; i < Size; ++i)
+ printStringChar(O, String[i]);
+ if (AscizDirective)
+ O << '\"';
+ else
+ O << "\\0\"";
+}
+
+
+/// EmitFile - Emit a .file directive.
+void AsmPrinter::EmitFile(unsigned Number, const std::string &Name) const {
+ O << "\t.file\t" << Number << " \"";
+ for (unsigned i = 0, N = Name.size(); i < N; ++i)
+ printStringChar(O, Name[i]);
+ O << '\"';
+}
+
+
+//===----------------------------------------------------------------------===//
+
+// EmitAlignment - Emit an alignment directive to the specified power of
+// two boundary. For example, if you pass in 3 here, you will get an 8
+// byte alignment. If a global value is specified, and if that global has
+// an explicit alignment requested, it will unconditionally override the
+// alignment request. However, if ForcedAlignBits is specified, this value
+// has final say: the ultimate alignment will be the max of ForcedAlignBits
+// and the alignment computed with NumBits and the global.
+//
+// The algorithm is:
+// Align = NumBits;
+// if (GV && GV->hasalignment) Align = GV->getalignment();
+// Align = std::max(Align, ForcedAlignBits);
+//
+void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV,
+ unsigned ForcedAlignBits,
+ bool UseFillExpr) const {
+ if (GV && GV->getAlignment())
+ NumBits = Log2_32(GV->getAlignment());
+ NumBits = std::max(NumBits, ForcedAlignBits);
+
+ if (NumBits == 0) return; // No need to emit alignment.
+ if (TAI->getAlignmentIsInBytes()) NumBits = 1 << NumBits;
+ O << TAI->getAlignDirective() << NumBits;
+
+ unsigned FillValue = TAI->getTextAlignFillValue();
+ UseFillExpr &= IsInTextSection && FillValue;
+ if (UseFillExpr) {
+ O << ',';
+ PrintHex(FillValue);
+ }
+ O << '\n';
+}
+
+
+/// EmitZeros - Emit a block of zeros.
+///
+void AsmPrinter::EmitZeros(uint64_t NumZeros, unsigned AddrSpace) const {
+ if (NumZeros) {
+ if (TAI->getZeroDirective()) {
+ O << TAI->getZeroDirective() << NumZeros;
+ if (TAI->getZeroDirectiveSuffix())
+ O << TAI->getZeroDirectiveSuffix();
+ O << '\n';
+ } else {
+ for (; NumZeros; --NumZeros)
+ O << TAI->getData8bitsDirective(AddrSpace) << "0\n";
+ }
+ }
+}
+
+// Print out the specified constant, without a storage class. Only the
+// constants valid in constant expressions can occur here.
+void AsmPrinter::EmitConstantValueOnly(const Constant *CV) {
+ if (CV->isNullValue() || isa<UndefValue>(CV))
+ O << '0';
+ else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ O << CI->getZExtValue();
+ } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+ // This is a constant address for a global variable or function. Use the
+ // name of the variable or function as the address value, possibly
+ // decorating it with GlobalVarAddrPrefix/Suffix or
+ // FunctionAddrPrefix/Suffix (these all default to "" )
+ if (isa<Function>(GV)) {
+ O << TAI->getFunctionAddrPrefix()
+ << Mang->getValueName(GV)
+ << TAI->getFunctionAddrSuffix();
+ } else {
+ O << TAI->getGlobalVarAddrPrefix()
+ << Mang->getValueName(GV)
+ << TAI->getGlobalVarAddrSuffix();
+ }
+ } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ const TargetData *TD = TM.getTargetData();
+ unsigned Opcode = CE->getOpcode();
+ switch (Opcode) {
+ case Instruction::GetElementPtr: {
+ // generate a symbolic expression for the byte address
+ const Constant *ptrVal = CE->getOperand(0);
+ SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
+ if (int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0],
+ idxVec.size())) {
+ // Truncate/sext the offset to the pointer size.
+ if (TD->getPointerSizeInBits() != 64) {
+ int SExtAmount = 64-TD->getPointerSizeInBits();
+ Offset = (Offset << SExtAmount) >> SExtAmount;
+ }
+
+ if (Offset)
+ O << '(';
+ EmitConstantValueOnly(ptrVal);
+ if (Offset > 0)
+ O << ") + " << Offset;
+ else if (Offset < 0)
+ O << ") - " << -Offset;
+ } else {
+ EmitConstantValueOnly(ptrVal);
+ }
+ break;
+ }
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ assert(0 && "FIXME: Don't yet support this kind of constant cast expr");
+ break;
+ case Instruction::BitCast:
+ return EmitConstantValueOnly(CE->getOperand(0));
+
+ case Instruction::IntToPtr: {
+ // Handle casts to pointers by changing them into casts to the appropriate
+ // integer type. This promotes constant folding and simplifies this code.
+ Constant *Op = CE->getOperand(0);
+ Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(), false/*ZExt*/);
+ return EmitConstantValueOnly(Op);
+ }
+
+
+ case Instruction::PtrToInt: {
+ // Support only foldable casts to/from pointers that can be eliminated by
+ // changing the pointer to the appropriately sized integer type.
+ Constant *Op = CE->getOperand(0);
+ const Type *Ty = CE->getType();
+
+ // We can emit the pointer value into this slot if the slot is an
+ // integer slot greater or equal to the size of the pointer.
+ if (TD->getTypeAllocSize(Ty) >= TD->getTypeAllocSize(Op->getType()))
+ return EmitConstantValueOnly(Op);
+
+ O << "((";
+ EmitConstantValueOnly(Op);
+ APInt ptrMask = APInt::getAllOnesValue(TD->getTypeAllocSizeInBits(Ty));
+
+ SmallString<40> S;
+ ptrMask.toStringUnsigned(S);
+ O << ") & " << S.c_str() << ')';
+ break;
+ }
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ O << '(';
+ EmitConstantValueOnly(CE->getOperand(0));
+ O << ')';
+ switch (Opcode) {
+ case Instruction::Add:
+ O << " + ";
+ break;
+ case Instruction::Sub:
+ O << " - ";
+ break;
+ case Instruction::And:
+ O << " & ";
+ break;
+ case Instruction::Or:
+ O << " | ";
+ break;
+ case Instruction::Xor:
+ O << " ^ ";
+ break;
+ default:
+ break;
+ }
+ O << '(';
+ EmitConstantValueOnly(CE->getOperand(1));
+ O << ')';
+ break;
+ default:
+ assert(0 && "Unsupported operator!");
+ }
+ } else {
+ assert(0 && "Unknown constant value!");
+ }
+}
+
+/// printAsCString - Print the specified array as a C compatible string, only if
+/// the predicate isString is true.
+///
+static void printAsCString(raw_ostream &O, const ConstantArray *CVA,
+ unsigned LastElt) {
+ assert(CVA->isString() && "Array is not string compatible!");
+
+ O << '\"';
+ for (unsigned i = 0; i != LastElt; ++i) {
+ unsigned char C =
+ (unsigned char)cast<ConstantInt>(CVA->getOperand(i))->getZExtValue();
+ printStringChar(O, C);
+ }
+ O << '\"';
+}
+
+/// EmitString - Emit a zero-byte-terminated string constant.
+///
+void AsmPrinter::EmitString(const ConstantArray *CVA) const {
+ unsigned NumElts = CVA->getNumOperands();
+ if (TAI->getAscizDirective() && NumElts &&
+ cast<ConstantInt>(CVA->getOperand(NumElts-1))->getZExtValue() == 0) {
+ O << TAI->getAscizDirective();
+ printAsCString(O, CVA, NumElts-1);
+ } else {
+ O << TAI->getAsciiDirective();
+ printAsCString(O, CVA, NumElts);
+ }
+ O << '\n';
+}
+
+void AsmPrinter::EmitGlobalConstantArray(const ConstantArray *CVA,
+ unsigned AddrSpace) {
+ if (CVA->isString()) {
+ EmitString(CVA);
+ } else { // Not a string. Print the values in successive locations
+ for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i)
+ EmitGlobalConstant(CVA->getOperand(i), AddrSpace);
+ }
+}
+
+void AsmPrinter::EmitGlobalConstantVector(const ConstantVector *CP) {
+ const VectorType *PTy = CP->getType();
+
+ for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I)
+ EmitGlobalConstant(CP->getOperand(I));
+}
+
+void AsmPrinter::EmitGlobalConstantStruct(const ConstantStruct *CVS,
+ unsigned AddrSpace) {
+ // Print the fields in successive locations. Pad to align if needed!
+ const TargetData *TD = TM.getTargetData();
+ unsigned Size = TD->getTypeAllocSize(CVS->getType());
+ const StructLayout *cvsLayout = TD->getStructLayout(CVS->getType());
+ uint64_t sizeSoFar = 0;
+ for (unsigned i = 0, e = CVS->getNumOperands(); i != e; ++i) {
+ const Constant* field = CVS->getOperand(i);
+
+ // Check if padding is needed and insert one or more 0s.
+ uint64_t fieldSize = TD->getTypeAllocSize(field->getType());
+ uint64_t padSize = ((i == e-1 ? Size : cvsLayout->getElementOffset(i+1))
+ - cvsLayout->getElementOffset(i)) - fieldSize;
+ sizeSoFar += fieldSize + padSize;
+
+ // Now print the actual field value.
+ EmitGlobalConstant(field, AddrSpace);
+
+ // Insert padding - this may include padding to increase the size of the
+ // current field up to the ABI size (if the struct is not packed) as well
+ // as padding to ensure that the next field starts at the right offset.
+ EmitZeros(padSize, AddrSpace);
+ }
+ assert(sizeSoFar == cvsLayout->getSizeInBytes() &&
+ "Layout of constant struct may be incorrect!");
+}
+
+void AsmPrinter::EmitGlobalConstantFP(const ConstantFP *CFP,
+ unsigned AddrSpace) {
+ // FP Constants are printed as integer constants to avoid losing
+ // precision...
+ const TargetData *TD = TM.getTargetData();
+ if (CFP->getType() == Type::DoubleTy) {
+ double Val = CFP->getValueAPF().convertToDouble(); // for comment only
+ uint64_t i = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ if (TAI->getData64bitsDirective(AddrSpace)) {
+ O << TAI->getData64bitsDirective(AddrSpace) << i;
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString() << " double value: " << Val;
+ O << '\n';
+ } else if (TD->isBigEndian()) {
+ O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " double most significant word " << Val;
+ O << '\n';
+ O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " double least significant word " << Val;
+ O << '\n';
+ } else {
+ O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " double least significant word " << Val;
+ O << '\n';
+ O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " double most significant word " << Val;
+ O << '\n';
+ }
+ return;
+ } else if (CFP->getType() == Type::FloatTy) {
+ float Val = CFP->getValueAPF().convertToFloat(); // for comment only
+ O << TAI->getData32bitsDirective(AddrSpace)
+ << CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString() << " float " << Val;
+ O << '\n';
+ return;
+ } else if (CFP->getType() == Type::X86_FP80Ty) {
+ // all long double variants are printed as hex
+ // api needed to prevent premature destruction
+ APInt api = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = api.getRawData();
+ // Convert to double so we can print the approximate val as a comment.
+ APFloat DoubleVal = CFP->getValueAPF();
+ bool ignored;
+ DoubleVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+ &ignored);
+ if (TD->isBigEndian()) {
+ O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double most significant halfword of ~"
+ << DoubleVal.convertToDouble();
+ O << '\n';
+ O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString() << " long double next halfword";
+ O << '\n';
+ O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString() << " long double next halfword";
+ O << '\n';
+ O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString() << " long double next halfword";
+ O << '\n';
+ O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double least significant halfword";
+ O << '\n';
+ } else {
+ O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double least significant halfword of ~"
+ << DoubleVal.convertToDouble();
+ O << '\n';
+ O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double next halfword";
+ O << '\n';
+ O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double next halfword";
+ O << '\n';
+ O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double next halfword";
+ O << '\n';
+ O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double most significant halfword";
+ O << '\n';
+ }
+ EmitZeros(TD->getTypeAllocSize(Type::X86_FP80Ty) -
+ TD->getTypeStoreSize(Type::X86_FP80Ty), AddrSpace);
+ return;
+ } else if (CFP->getType() == Type::PPC_FP128Ty) {
+ // all long double variants are printed as hex
+ // api needed to prevent premature destruction
+ APInt api = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = api.getRawData();
+ if (TD->isBigEndian()) {
+ O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double most significant word";
+ O << '\n';
+ O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double next word";
+ O << '\n';
+ O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double next word";
+ O << '\n';
+ O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double least significant word";
+ O << '\n';
+ } else {
+ O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double least significant word";
+ O << '\n';
+ O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double next word";
+ O << '\n';
+ O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double next word";
+ O << '\n';
+ O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double most significant word";
+ O << '\n';
+ }
+ return;
+ } else assert(0 && "Floating point constant type not handled");
+}
+
+void AsmPrinter::EmitGlobalConstantLargeInt(const ConstantInt *CI,
+ unsigned AddrSpace) {
+ const TargetData *TD = TM.getTargetData();
+ unsigned BitWidth = CI->getBitWidth();
+ assert(isPowerOf2_32(BitWidth) &&
+ "Non-power-of-2-sized integers not handled!");
+
+ // We don't expect assemblers to support integer data directives
+ // for more than 64 bits, so we emit the data in at most 64-bit
+ // quantities at a time.
+ const uint64_t *RawData = CI->getValue().getRawData();
+ for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
+ uint64_t Val;
+ if (TD->isBigEndian())
+ Val = RawData[e - i - 1];
+ else
+ Val = RawData[i];
+
+ if (TAI->getData64bitsDirective(AddrSpace))
+ O << TAI->getData64bitsDirective(AddrSpace) << Val << '\n';
+ else if (TD->isBigEndian()) {
+ O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " Double-word most significant word " << Val;
+ O << '\n';
+ O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " Double-word least significant word " << Val;
+ O << '\n';
+ } else {
+ O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " Double-word least significant word " << Val;
+ O << '\n';
+ O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " Double-word most significant word " << Val;
+ O << '\n';
+ }
+ }
+}
+
+/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
+void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
+ const TargetData *TD = TM.getTargetData();
+ const Type *type = CV->getType();
+ unsigned Size = TD->getTypeAllocSize(type);
+
+ if (CV->isNullValue() || isa<UndefValue>(CV)) {
+ EmitZeros(Size, AddrSpace);
+ return;
+ } else if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) {
+ EmitGlobalConstantArray(CVA , AddrSpace);
+ return;
+ } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) {
+ EmitGlobalConstantStruct(CVS, AddrSpace);
+ return;
+ } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+ EmitGlobalConstantFP(CFP, AddrSpace);
+ return;
+ } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ // Small integers are handled below; large integers are handled here.
+ if (Size > 4) {
+ EmitGlobalConstantLargeInt(CI, AddrSpace);
+ return;
+ }
+ } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+ EmitGlobalConstantVector(CP);
+ return;
+ }
+
+ printDataDirective(type, AddrSpace);
+ EmitConstantValueOnly(CV);
+ if (VerboseAsm) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ SmallString<40> S;
+ CI->getValue().toStringUnsigned(S, 16);
+ O << "\t\t\t" << TAI->getCommentString() << " 0x" << S.c_str();
+ }
+ }
+ O << '\n';
+}
+
+void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+ // Target doesn't support this yet!
+ abort();
+}
+
+/// PrintSpecial - Print information related to the specified machine instr
+/// that is independent of the operand, and may be independent of the instr
+/// itself. This can be useful for portably encoding the comment character
+/// or other bits of target-specific knowledge into the asmstrings. The
+/// syntax used is ${:comment}. Targets can override this to add support
+/// for their own strange codes.
+void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const {
+ if (!strcmp(Code, "private")) {
+ O << TAI->getPrivateGlobalPrefix();
+ } else if (!strcmp(Code, "comment")) {
+ if (VerboseAsm)
+ O << TAI->getCommentString();
+ } else if (!strcmp(Code, "uid")) {
+ // Assign a unique ID to this machine instruction.
+ static const MachineInstr *LastMI = 0;
+ static const Function *F = 0;
+ static unsigned Counter = 0U-1;
+
+ // Comparing the address of MI isn't sufficient, because machineinstrs may
+ // be allocated to the same address across functions.
+ const Function *ThisF = MI->getParent()->getParent()->getFunction();
+
+ // If this is a new machine instruction, bump the counter.
+ if (LastMI != MI || F != ThisF) {
+ ++Counter;
+ LastMI = MI;
+ F = ThisF;
+ }
+ O << Counter;
+ } else {
+ cerr << "Unknown special formatter '" << Code
+ << "' for machine instr: " << *MI;
+ exit(1);
+ }
+}
+
+/// processDebugLoc - Processes the debug information of each machine
+/// instruction's DebugLoc.
+void AsmPrinter::processDebugLoc(DebugLoc DL) {
+ if (TAI->doesSupportDebugInformation() && DW->ShouldEmitDwarfDebug()) {
+ if (!DL.isUnknown()) {
+ static DebugLocTuple PrevDLT(0, ~0U, ~0U);
+ DebugLocTuple CurDLT = MF->getDebugLocTuple(DL);
+
+ if (CurDLT.CompileUnit != 0 && PrevDLT != CurDLT)
+ printLabel(DW->RecordSourceLine(CurDLT.Line, CurDLT.Col,
+ DICompileUnit(CurDLT.CompileUnit)));
+
+ PrevDLT = CurDLT;
+ }
+ }
+}
+
+/// printInlineAsm - This method formats and prints the specified machine
+/// instruction that is an inline asm.
+void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
+ unsigned NumOperands = MI->getNumOperands();
+
+ // Count the number of register definitions.
+ unsigned NumDefs = 0;
+ for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+ ++NumDefs)
+ assert(NumDefs != NumOperands-1 && "No asm string?");
+
+ assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
+
+ // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
+ const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+
+ // If this asmstr is empty, just print the #APP/#NOAPP markers.
+ // These are useful to see where empty asm's wound up.
+ if (AsmStr[0] == 0) {
+ O << TAI->getInlineAsmStart() << "\n\t" << TAI->getInlineAsmEnd() << '\n';
+ return;
+ }
+
+ O << TAI->getInlineAsmStart() << "\n\t";
+
+ // The variant of the current asmprinter.
+ int AsmPrinterVariant = TAI->getAssemblerDialect();
+
+ int CurVariant = -1; // The number of the {.|.|.} region we are in.
+ const char *LastEmitted = AsmStr; // One past the last character emitted.
+
+ while (*LastEmitted) {
+ switch (*LastEmitted) {
+ default: {
+ // Not a special case, emit the string section literally.
+ const char *LiteralEnd = LastEmitted+1;
+ while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
+ *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
+ ++LiteralEnd;
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ O.write(LastEmitted, LiteralEnd-LastEmitted);
+ LastEmitted = LiteralEnd;
+ break;
+ }
+ case '\n':
+ ++LastEmitted; // Consume newline character.
+ O << '\n'; // Indent code with newline.
+ break;
+ case '$': {
+ ++LastEmitted; // Consume '$' character.
+ bool Done = true;
+
+ // Handle escapes.
+ switch (*LastEmitted) {
+ default: Done = false; break;
+ case '$': // $$ -> $
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ O << '$';
+ ++LastEmitted; // Consume second '$' character.
+ break;
+ case '(': // $( -> same as GCC's { character.
+ ++LastEmitted; // Consume '(' character.
+ if (CurVariant != -1) {
+ cerr << "Nested variants found in inline asm string: '"
+ << AsmStr << "'\n";
+ exit(1);
+ }
+ CurVariant = 0; // We're in the first variant now.
+ break;
+ case '|':
+ ++LastEmitted; // consume '|' character.
+ if (CurVariant == -1)
+ O << '|'; // this is gcc's behavior for | outside a variant
+ else
+ ++CurVariant; // We're in the next variant.
+ break;
+ case ')': // $) -> same as GCC's } char.
+ ++LastEmitted; // consume ')' character.
+ if (CurVariant == -1)
+ O << '}'; // this is gcc's behavior for } outside a variant
+ else
+ CurVariant = -1;
+ break;
+ }
+ if (Done) break;
+
+ bool HasCurlyBraces = false;
+ if (*LastEmitted == '{') { // ${variable}
+ ++LastEmitted; // Consume '{' character.
+ HasCurlyBraces = true;
+ }
+
+ // If we have ${:foo}, then this is not a real operand reference, it is a
+ // "magic" string reference, just like in .td files. Arrange to call
+ // PrintSpecial.
+ if (HasCurlyBraces && *LastEmitted == ':') {
+ ++LastEmitted;
+ const char *StrStart = LastEmitted;
+ const char *StrEnd = strchr(StrStart, '}');
+ if (StrEnd == 0) {
+ cerr << "Unterminated ${:foo} operand in inline asm string: '"
+ << AsmStr << "'\n";
+ exit(1);
+ }
+
+ std::string Val(StrStart, StrEnd);
+ PrintSpecial(MI, Val.c_str());
+ LastEmitted = StrEnd+1;
+ break;
+ }
+
+ const char *IDStart = LastEmitted;
+ char *IDEnd;
+ errno = 0;
+ long Val = strtol(IDStart, &IDEnd, 10); // We only accept numbers for IDs.
+ if (!isdigit(*IDStart) || (Val == 0 && errno == EINVAL)) {
+ cerr << "Bad $ operand number in inline asm string: '"
+ << AsmStr << "'\n";
+ exit(1);
+ }
+ LastEmitted = IDEnd;
+
+ char Modifier[2] = { 0, 0 };
+
+ if (HasCurlyBraces) {
+ // If we have curly braces, check for a modifier character. This
+ // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
+ if (*LastEmitted == ':') {
+ ++LastEmitted; // Consume ':' character.
+ if (*LastEmitted == 0) {
+ cerr << "Bad ${:} expression in inline asm string: '"
+ << AsmStr << "'\n";
+ exit(1);
+ }
+
+ Modifier[0] = *LastEmitted;
+ ++LastEmitted; // Consume modifier character.
+ }
+
+ if (*LastEmitted != '}') {
+ cerr << "Bad ${} expression in inline asm string: '"
+ << AsmStr << "'\n";
+ exit(1);
+ }
+ ++LastEmitted; // Consume '}' character.
+ }
+
+ if ((unsigned)Val >= NumOperands-1) {
+ cerr << "Invalid $ operand number in inline asm string: '"
+ << AsmStr << "'\n";
+ exit(1);
+ }
+
+ // Okay, we finally have a value number. Ask the target to print this
+ // operand!
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
+ unsigned OpNo = 1;
+
+ bool Error = false;
+
+ // Scan to find the machine operand number for the operand.
+ for (; Val; --Val) {
+ if (OpNo >= MI->getNumOperands()) break;
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+ }
+
+ if (OpNo >= MI->getNumOperands()) {
+ Error = true;
+ } else {
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ ++OpNo; // Skip over the ID number.
+
+ if (Modifier[0]=='l') // labels are target independent
+ printBasicBlockLabel(MI->getOperand(OpNo).getMBB(),
+ false, false, false);
+ else {
+ AsmPrinter *AP = const_cast<AsmPrinter*>(this);
+ if ((OpFlags & 7) == 4) {
+ Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant,
+ Modifier[0] ? Modifier : 0);
+ } else {
+ Error = AP->PrintAsmOperand(MI, OpNo, AsmPrinterVariant,
+ Modifier[0] ? Modifier : 0);
+ }
+ }
+ }
+ if (Error) {
+ cerr << "Invalid operand found in inline asm: '"
+ << AsmStr << "'\n";
+ MI->dump();
+ exit(1);
+ }
+ }
+ break;
+ }
+ }
+ }
+ O << "\n\t" << TAI->getInlineAsmEnd() << '\n';
+}
+
+/// printImplicitDef - This method prints the specified machine instruction
+/// that is an implicit def.
+void AsmPrinter::printImplicitDef(const MachineInstr *MI) const {
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString() << " implicit-def: "
+ << TRI->getAsmName(MI->getOperand(0).getReg()) << '\n';
+}
+
+/// printLabel - This method prints a local label used by debug and
+/// exception handling tables.
+void AsmPrinter::printLabel(const MachineInstr *MI) const {
+ printLabel(MI->getOperand(0).getImm());
+}
+
+void AsmPrinter::printLabel(unsigned Id) const {
+ O << TAI->getPrivateGlobalPrefix() << "label" << Id << ":\n";
+}
+
+/// printDeclare - This method prints a local variable declaration used by
+/// debug tables.
+/// FIXME: It doesn't really print anything rather it inserts a DebugVariable
+/// entry into dwarf table.
+void AsmPrinter::printDeclare(const MachineInstr *MI) const {
+ unsigned FI = MI->getOperand(0).getIndex();
+ GlobalValue *GV = MI->getOperand(1).getGlobal();
+ DW->RecordVariable(cast<GlobalVariable>(GV), FI, MI);
+}
+
+/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
+/// instruction, using the specified assembler variant. Targets should
+/// overried this to format as appropriate.
+bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode) {
+ // Target doesn't support this yet!
+ return true;
+}
+
+bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ // Target doesn't support this yet!
+ return true;
+}
+
+/// printBasicBlockLabel - This method prints the label for the specified
+/// MachineBasicBlock
+void AsmPrinter::printBasicBlockLabel(const MachineBasicBlock *MBB,
+ bool printAlign,
+ bool printColon,
+ bool printComment) const {
+ if (printAlign) {
+ unsigned Align = MBB->getAlignment();
+ if (Align)
+ EmitAlignment(Log2_32(Align));
+ }
+
+ O << TAI->getPrivateGlobalPrefix() << "BB" << getFunctionNumber() << '_'
+ << MBB->getNumber();
+ if (printColon)
+ O << ':';
+ if (printComment && MBB->getBasicBlock())
+ O << '\t' << TAI->getCommentString() << ' '
+ << MBB->getBasicBlock()->getNameStart();
+}
+
+/// printPICJumpTableSetLabel - This method prints a set label for the
+/// specified MachineBasicBlock for a jumptable entry.
+void AsmPrinter::printPICJumpTableSetLabel(unsigned uid,
+ const MachineBasicBlock *MBB) const {
+ if (!TAI->getSetDirective())
+ return;
+
+ O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix()
+ << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ',';
+ printBasicBlockLabel(MBB, false, false, false);
+ O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << uid << '\n';
+}
+
+void AsmPrinter::printPICJumpTableSetLabel(unsigned uid, unsigned uid2,
+ const MachineBasicBlock *MBB) const {
+ if (!TAI->getSetDirective())
+ return;
+
+ O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix()
+ << getFunctionNumber() << '_' << uid << '_' << uid2
+ << "_set_" << MBB->getNumber() << ',';
+ printBasicBlockLabel(MBB, false, false, false);
+ O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << uid << '_' << uid2 << '\n';
+}
+
+/// printDataDirective - This method prints the asm directive for the
+/// specified type.
+void AsmPrinter::printDataDirective(const Type *type, unsigned AddrSpace) {
+ const TargetData *TD = TM.getTargetData();
+ switch (type->getTypeID()) {
+ case Type::IntegerTyID: {
+ unsigned BitWidth = cast<IntegerType>(type)->getBitWidth();
+ if (BitWidth <= 8)
+ O << TAI->getData8bitsDirective(AddrSpace);
+ else if (BitWidth <= 16)
+ O << TAI->getData16bitsDirective(AddrSpace);
+ else if (BitWidth <= 32)
+ O << TAI->getData32bitsDirective(AddrSpace);
+ else if (BitWidth <= 64) {
+ assert(TAI->getData64bitsDirective(AddrSpace) &&
+ "Target cannot handle 64-bit constant exprs!");
+ O << TAI->getData64bitsDirective(AddrSpace);
+ } else {
+ assert(0 && "Target cannot handle given data directive width!");
+ }
+ break;
+ }
+ case Type::PointerTyID:
+ if (TD->getPointerSize() == 8) {
+ assert(TAI->getData64bitsDirective(AddrSpace) &&
+ "Target cannot handle 64-bit pointer exprs!");
+ O << TAI->getData64bitsDirective(AddrSpace);
+ } else if (TD->getPointerSize() == 2) {
+ O << TAI->getData16bitsDirective(AddrSpace);
+ } else if (TD->getPointerSize() == 1) {
+ O << TAI->getData8bitsDirective(AddrSpace);
+ } else {
+ O << TAI->getData32bitsDirective(AddrSpace);
+ }
+ break;
+ case Type::FloatTyID: case Type::DoubleTyID:
+ case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID:
+ assert (0 && "Should have already output floating point constant.");
+ default:
+ assert (0 && "Can't handle printing this type of thing");
+ break;
+ }
+}
+
+void AsmPrinter::printSuffixedName(const char *Name, const char *Suffix,
+ const char *Prefix) {
+ if (Name[0]=='\"')
+ O << '\"';
+ O << TAI->getPrivateGlobalPrefix();
+ if (Prefix) O << Prefix;
+ if (Name[0]=='\"')
+ O << '\"';
+ if (Name[0]=='\"')
+ O << Name[1];
+ else
+ O << Name;
+ O << Suffix;
+ if (Name[0]=='\"')
+ O << '\"';
+}
+
+void AsmPrinter::printSuffixedName(const std::string &Name, const char* Suffix) {
+ printSuffixedName(Name.c_str(), Suffix);
+}
+
+void AsmPrinter::printVisibility(const std::string& Name,
+ unsigned Visibility) const {
+ if (Visibility == GlobalValue::HiddenVisibility) {
+ if (const char *Directive = TAI->getHiddenDirective())
+ O << Directive << Name << '\n';
+ } else if (Visibility == GlobalValue::ProtectedVisibility) {
+ if (const char *Directive = TAI->getProtectedDirective())
+ O << Directive << Name << '\n';
+ }
+}
+
+void AsmPrinter::printOffset(int64_t Offset) const {
+ if (Offset > 0)
+ O << '+' << Offset;
+ else if (Offset < 0)
+ O << Offset;
+}
+
+GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
+ if (!S->usesMetadata())
+ return 0;
+
+ gcp_iterator GCPI = GCMetadataPrinters.find(S);
+ if (GCPI != GCMetadataPrinters.end())
+ return GCPI->second;
+
+ const char *Name = S->getName().c_str();
+
+ for (GCMetadataPrinterRegistry::iterator
+ I = GCMetadataPrinterRegistry::begin(),
+ E = GCMetadataPrinterRegistry::end(); I != E; ++I)
+ if (strcmp(Name, I->getName()) == 0) {
+ GCMetadataPrinter *GMP = I->instantiate();
+ GMP->S = S;
+ GCMetadataPrinters.insert(std::make_pair(S, GMP));
+ return GMP;
+ }
+
+ cerr << "no GCMetadataPrinter registered for GC: " << Name << "\n";
+ abort();
+}
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
new file mode 100644
index 0000000..066aaab
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,10 @@
+add_llvm_library(LLVMAsmPrinter
+ AsmPrinter.cpp
+ DIE.cpp
+ DwarfDebug.cpp
+ DwarfException.cpp
+ DwarfLabel.cpp
+ DwarfPrinter.cpp
+ DwarfWriter.cpp
+ OcamlGCPrinter.cpp
+ )
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
new file mode 100644
index 0000000..dc149cf
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -0,0 +1,518 @@
+//===--- lib/CodeGen/DIE.cpp - DWARF Info Entries -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Data structures for DWARF info entries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DIE.h"
+#include "DwarfPrinter.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include <ostream>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// DIEAbbrevData Implementation
+//===----------------------------------------------------------------------===//
+
+/// Profile - Used to gather unique data for the abbreviation folding set.
+///
+void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(Attribute);
+ ID.AddInteger(Form);
+}
+
+//===----------------------------------------------------------------------===//
+// DIEAbbrev Implementation
+//===----------------------------------------------------------------------===//
+
+/// Profile - Used to gather unique data for the abbreviation folding set.
+///
+void DIEAbbrev::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(Tag);
+ ID.AddInteger(ChildrenFlag);
+
+ // For each attribute description.
+ for (unsigned i = 0, N = Data.size(); i < N; ++i)
+ Data[i].Profile(ID);
+}
+
+/// Emit - Print the abbreviation using the specified asm printer.
+///
+void DIEAbbrev::Emit(const AsmPrinter *Asm) const {
+ // Emit its Dwarf tag type.
+ Asm->EmitULEB128Bytes(Tag);
+ Asm->EOL(dwarf::TagString(Tag));
+
+ // Emit whether it has children DIEs.
+ Asm->EmitULEB128Bytes(ChildrenFlag);
+ Asm->EOL(dwarf::ChildrenString(ChildrenFlag));
+
+ // For each attribute description.
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ const DIEAbbrevData &AttrData = Data[i];
+
+ // Emit attribute type.
+ Asm->EmitULEB128Bytes(AttrData.getAttribute());
+ Asm->EOL(dwarf::AttributeString(AttrData.getAttribute()));
+
+ // Emit form type.
+ Asm->EmitULEB128Bytes(AttrData.getForm());
+ Asm->EOL(dwarf::FormEncodingString(AttrData.getForm()));
+ }
+
+ // Mark end of abbreviation.
+ Asm->EmitULEB128Bytes(0); Asm->EOL("EOM(1)");
+ Asm->EmitULEB128Bytes(0); Asm->EOL("EOM(2)");
+}
+
+#ifndef NDEBUG
+void DIEAbbrev::print(std::ostream &O) {
+ O << "Abbreviation @"
+ << std::hex << (intptr_t)this << std::dec
+ << " "
+ << dwarf::TagString(Tag)
+ << " "
+ << dwarf::ChildrenString(ChildrenFlag)
+ << "\n";
+
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ O << " "
+ << dwarf::AttributeString(Data[i].getAttribute())
+ << " "
+ << dwarf::FormEncodingString(Data[i].getForm())
+ << "\n";
+ }
+}
+void DIEAbbrev::dump() { print(cerr); }
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIE Implementation
+//===----------------------------------------------------------------------===//
+
+DIE::~DIE() {
+ for (unsigned i = 0, N = Children.size(); i < N; ++i)
+ delete Children[i];
+}
+
+/// AddSiblingOffset - Add a sibling offset field to the front of the DIE.
+///
+void DIE::AddSiblingOffset() {
+ DIEInteger *DI = new DIEInteger(0);
+ Values.insert(Values.begin(), DI);
+ Abbrev.AddFirstAttribute(dwarf::DW_AT_sibling, dwarf::DW_FORM_ref4);
+}
+
+/// Profile - Used to gather unique data for the value folding set.
+///
+void DIE::Profile(FoldingSetNodeID &ID) {
+ Abbrev.Profile(ID);
+
+ for (unsigned i = 0, N = Children.size(); i < N; ++i)
+ ID.AddPointer(Children[i]);
+
+ for (unsigned j = 0, M = Values.size(); j < M; ++j)
+ ID.AddPointer(Values[j]);
+}
+
+#ifndef NDEBUG
+void DIE::print(std::ostream &O, unsigned IncIndent) {
+ static unsigned IndentCount = 0;
+ IndentCount += IncIndent;
+ const std::string Indent(IndentCount, ' ');
+ bool isBlock = Abbrev.getTag() == 0;
+
+ if (!isBlock) {
+ O << Indent
+ << "Die: "
+ << "0x" << std::hex << (intptr_t)this << std::dec
+ << ", Offset: " << Offset
+ << ", Size: " << Size
+ << "\n";
+
+ O << Indent
+ << dwarf::TagString(Abbrev.getTag())
+ << " "
+ << dwarf::ChildrenString(Abbrev.getChildrenFlag());
+ } else {
+ O << "Size: " << Size;
+ }
+ O << "\n";
+
+ const SmallVector<DIEAbbrevData, 8> &Data = Abbrev.getData();
+
+ IndentCount += 2;
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ O << Indent;
+
+ if (!isBlock)
+ O << dwarf::AttributeString(Data[i].getAttribute());
+ else
+ O << "Blk[" << i << "]";
+
+ O << " "
+ << dwarf::FormEncodingString(Data[i].getForm())
+ << " ";
+ Values[i]->print(O);
+ O << "\n";
+ }
+ IndentCount -= 2;
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j) {
+ Children[j]->print(O, 4);
+ }
+
+ if (!isBlock) O << "\n";
+ IndentCount -= IncIndent;
+}
+
+void DIE::dump() {
+ print(cerr);
+}
+#endif
+
+
+#ifndef NDEBUG
+void DIEValue::dump() {
+ print(cerr);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEInteger Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit integer of appropriate size.
+///
+void DIEInteger::EmitValue(Dwarf *D, unsigned Form) const {
+ const AsmPrinter *Asm = D->getAsm();
+ switch (Form) {
+ case dwarf::DW_FORM_flag: // Fall thru
+ case dwarf::DW_FORM_ref1: // Fall thru
+ case dwarf::DW_FORM_data1: Asm->EmitInt8(Integer); break;
+ case dwarf::DW_FORM_ref2: // Fall thru
+ case dwarf::DW_FORM_data2: Asm->EmitInt16(Integer); break;
+ case dwarf::DW_FORM_ref4: // Fall thru
+ case dwarf::DW_FORM_data4: Asm->EmitInt32(Integer); break;
+ case dwarf::DW_FORM_ref8: // Fall thru
+ case dwarf::DW_FORM_data8: Asm->EmitInt64(Integer); break;
+ case dwarf::DW_FORM_udata: Asm->EmitULEB128Bytes(Integer); break;
+ case dwarf::DW_FORM_sdata: Asm->EmitSLEB128Bytes(Integer); break;
+ default: assert(0 && "DIE Value form not supported yet"); break;
+ }
+}
+
+/// SizeOf - Determine size of integer value in bytes.
+///
+unsigned DIEInteger::SizeOf(const TargetData *TD, unsigned Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_flag: // Fall thru
+ case dwarf::DW_FORM_ref1: // Fall thru
+ case dwarf::DW_FORM_data1: return sizeof(int8_t);
+ case dwarf::DW_FORM_ref2: // Fall thru
+ case dwarf::DW_FORM_data2: return sizeof(int16_t);
+ case dwarf::DW_FORM_ref4: // Fall thru
+ case dwarf::DW_FORM_data4: return sizeof(int32_t);
+ case dwarf::DW_FORM_ref8: // Fall thru
+ case dwarf::DW_FORM_data8: return sizeof(int64_t);
+ case dwarf::DW_FORM_udata: return TargetAsmInfo::getULEB128Size(Integer);
+ case dwarf::DW_FORM_sdata: return TargetAsmInfo::getSLEB128Size(Integer);
+ default: assert(0 && "DIE Value form not supported yet"); break;
+ }
+ return 0;
+}
+
+/// Profile - Used to gather unique data for the value folding set.
+///
+void DIEInteger::Profile(FoldingSetNodeID &ID, unsigned Int) {
+ ID.AddInteger(isInteger);
+ ID.AddInteger(Int);
+}
+void DIEInteger::Profile(FoldingSetNodeID &ID) {
+ Profile(ID, Integer);
+}
+
+#ifndef NDEBUG
+void DIEInteger::print(std::ostream &O) {
+ O << "Int: " << (int64_t)Integer
+ << " 0x" << std::hex << Integer << std::dec;
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEString Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit string value.
+///
+void DIEString::EmitValue(Dwarf *D, unsigned Form) const {
+ D->getAsm()->EmitString(Str);
+}
+
+/// Profile - Used to gather unique data for the value folding set.
+///
+void DIEString::Profile(FoldingSetNodeID &ID, const std::string &Str) {
+ ID.AddInteger(isString);
+ ID.AddString(Str);
+}
+void DIEString::Profile(FoldingSetNodeID &ID) {
+ Profile(ID, Str);
+}
+
+#ifndef NDEBUG
+void DIEString::print(std::ostream &O) {
+ O << "Str: \"" << Str << "\"";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEDwarfLabel Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit label value.
+///
+void DIEDwarfLabel::EmitValue(Dwarf *D, unsigned Form) const {
+ bool IsSmall = Form == dwarf::DW_FORM_data4;
+ D->EmitReference(Label, false, IsSmall);
+}
+
+/// SizeOf - Determine size of label value in bytes.
+///
+unsigned DIEDwarfLabel::SizeOf(const TargetData *TD, unsigned Form) const {
+ if (Form == dwarf::DW_FORM_data4) return 4;
+ return TD->getPointerSize();
+}
+
+/// Profile - Used to gather unique data for the value folding set.
+///
+void DIEDwarfLabel::Profile(FoldingSetNodeID &ID, const DWLabel &Label) {
+ ID.AddInteger(isLabel);
+ Label.Profile(ID);
+}
+void DIEDwarfLabel::Profile(FoldingSetNodeID &ID) {
+ Profile(ID, Label);
+}
+
+#ifndef NDEBUG
+void DIEDwarfLabel::print(std::ostream &O) {
+ O << "Lbl: ";
+ Label.print(O);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEObjectLabel Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit label value.
+///
+void DIEObjectLabel::EmitValue(Dwarf *D, unsigned Form) const {
+ bool IsSmall = Form == dwarf::DW_FORM_data4;
+ D->EmitReference(Label, false, IsSmall);
+}
+
+/// SizeOf - Determine size of label value in bytes.
+///
+unsigned DIEObjectLabel::SizeOf(const TargetData *TD, unsigned Form) const {
+ if (Form == dwarf::DW_FORM_data4) return 4;
+ return TD->getPointerSize();
+}
+
+/// Profile - Used to gather unique data for the value folding set.
+///
+void DIEObjectLabel::Profile(FoldingSetNodeID &ID, const std::string &Label) {
+ ID.AddInteger(isAsIsLabel);
+ ID.AddString(Label);
+}
+void DIEObjectLabel::Profile(FoldingSetNodeID &ID) {
+ Profile(ID, Label.c_str());
+}
+
+#ifndef NDEBUG
+void DIEObjectLabel::print(std::ostream &O) {
+ O << "Obj: " << Label;
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIESectionOffset Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit delta value.
+///
+void DIESectionOffset::EmitValue(Dwarf *D, unsigned Form) const {
+ bool IsSmall = Form == dwarf::DW_FORM_data4;
+ D->EmitSectionOffset(Label.getTag(), Section.getTag(),
+ Label.getNumber(), Section.getNumber(),
+ IsSmall, IsEH, UseSet);
+}
+
+/// SizeOf - Determine size of delta value in bytes.
+///
+unsigned DIESectionOffset::SizeOf(const TargetData *TD, unsigned Form) const {
+ if (Form == dwarf::DW_FORM_data4) return 4;
+ return TD->getPointerSize();
+}
+
+/// Profile - Used to gather unique data for the value folding set.
+///
+void DIESectionOffset::Profile(FoldingSetNodeID &ID, const DWLabel &Label,
+ const DWLabel &Section) {
+ ID.AddInteger(isSectionOffset);
+ Label.Profile(ID);
+ Section.Profile(ID);
+ // IsEH and UseSet are specific to the Label/Section that we will emit the
+ // offset for; so Label/Section are enough for uniqueness.
+}
+void DIESectionOffset::Profile(FoldingSetNodeID &ID) {
+ Profile(ID, Label, Section);
+}
+
+#ifndef NDEBUG
+void DIESectionOffset::print(std::ostream &O) {
+ O << "Off: ";
+ Label.print(O);
+ O << "-";
+ Section.print(O);
+ O << "-" << IsEH << "-" << UseSet;
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEDelta Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit delta value.
+///
+void DIEDelta::EmitValue(Dwarf *D, unsigned Form) const {
+ bool IsSmall = Form == dwarf::DW_FORM_data4;
+ D->EmitDifference(LabelHi, LabelLo, IsSmall);
+}
+
+/// SizeOf - Determine size of delta value in bytes.
+///
+unsigned DIEDelta::SizeOf(const TargetData *TD, unsigned Form) const {
+ if (Form == dwarf::DW_FORM_data4) return 4;
+ return TD->getPointerSize();
+}
+
+/// Profile - Used to gather unique data for the value folding set.
+///
+void DIEDelta::Profile(FoldingSetNodeID &ID, const DWLabel &LabelHi,
+ const DWLabel &LabelLo) {
+ ID.AddInteger(isDelta);
+ LabelHi.Profile(ID);
+ LabelLo.Profile(ID);
+}
+void DIEDelta::Profile(FoldingSetNodeID &ID) {
+ Profile(ID, LabelHi, LabelLo);
+}
+
+#ifndef NDEBUG
+void DIEDelta::print(std::ostream &O) {
+ O << "Del: ";
+ LabelHi.print(O);
+ O << "-";
+ LabelLo.print(O);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEEntry Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit debug information entry offset.
+///
+void DIEEntry::EmitValue(Dwarf *D, unsigned Form) const {
+ D->getAsm()->EmitInt32(Entry->getOffset());
+}
+
+/// Profile - Used to gather unique data for the value folding set.
+///
+void DIEEntry::Profile(FoldingSetNodeID &ID, DIE *Entry) {
+ ID.AddInteger(isEntry);
+ ID.AddPointer(Entry);
+}
+void DIEEntry::Profile(FoldingSetNodeID &ID) {
+ ID.AddInteger(isEntry);
+
+ if (Entry)
+ ID.AddPointer(Entry);
+ else
+ ID.AddPointer(this);
+}
+
+#ifndef NDEBUG
+void DIEEntry::print(std::ostream &O) {
+ O << "Die: 0x" << std::hex << (intptr_t)Entry << std::dec;
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEBlock Implementation
+//===----------------------------------------------------------------------===//
+
+/// ComputeSize - calculate the size of the block.
+///
+unsigned DIEBlock::ComputeSize(const TargetData *TD) {
+ if (!Size) {
+ const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
+ for (unsigned i = 0, N = Values.size(); i < N; ++i)
+ Size += Values[i]->SizeOf(TD, AbbrevData[i].getForm());
+ }
+
+ return Size;
+}
+
+/// EmitValue - Emit block data.
+///
+void DIEBlock::EmitValue(Dwarf *D, unsigned Form) const {
+ const AsmPrinter *Asm = D->getAsm();
+ switch (Form) {
+ case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break;
+ case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break;
+ case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break;
+ case dwarf::DW_FORM_block: Asm->EmitULEB128Bytes(Size); break;
+ default: assert(0 && "Improper form for block"); break;
+ }
+
+ const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
+ for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+ Asm->EOL();
+ Values[i]->EmitValue(D, AbbrevData[i].getForm());
+ }
+}
+
+/// SizeOf - Determine size of block data in bytes.
+///
+unsigned DIEBlock::SizeOf(const TargetData *TD, unsigned Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
+ case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
+ case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
+ case dwarf::DW_FORM_block: return Size + TargetAsmInfo::getULEB128Size(Size);
+ default: assert(0 && "Improper form for block"); break;
+ }
+ return 0;
+}
+
+void DIEBlock::Profile(FoldingSetNodeID &ID) {
+ ID.AddInteger(isBlock);
+ DIE::Profile(ID);
+}
+
+#ifndef NDEBUG
+void DIEBlock::print(std::ostream &O) {
+ O << "Blk: ";
+ DIE::print(O, 5);
+}
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
new file mode 100644
index 0000000..b14d91c
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -0,0 +1,549 @@
+//===--- lib/CodeGen/DIE.h - DWARF Info Entries -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Data structures for DWARF info entries.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DIE_H__
+#define CODEGEN_ASMPRINTER_DIE_H__
+
+#include "DwarfLabel.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/raw_ostream.h"
+#include <iosfwd>
+
+namespace llvm {
+ class AsmPrinter;
+ class Dwarf;
+ class TargetData;
+
+ //===--------------------------------------------------------------------===//
+ /// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a
+ /// Dwarf abbreviation.
+ class VISIBILITY_HIDDEN DIEAbbrevData {
+ /// Attribute - Dwarf attribute code.
+ ///
+ unsigned Attribute;
+
+ /// Form - Dwarf form code.
+ ///
+ unsigned Form;
+ public:
+ DIEAbbrevData(unsigned A, unsigned F) : Attribute(A), Form(F) {}
+
+ // Accessors.
+ unsigned getAttribute() const { return Attribute; }
+ unsigned getForm() const { return Form; }
+
+ /// Profile - Used to gather unique data for the abbreviation folding set.
+ ///
+ void Profile(FoldingSetNodeID &ID) const;
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug
+ /// information object.
+ class VISIBILITY_HIDDEN DIEAbbrev : public FoldingSetNode {
+ /// Tag - Dwarf tag code.
+ ///
+ unsigned Tag;
+
+ /// Unique number for node.
+ ///
+ unsigned Number;
+
+ /// ChildrenFlag - Dwarf children flag.
+ ///
+ unsigned ChildrenFlag;
+
+ /// Data - Raw data bytes for abbreviation.
+ ///
+ SmallVector<DIEAbbrevData, 8> Data;
+ public:
+ DIEAbbrev(unsigned T, unsigned C) : Tag(T), ChildrenFlag(C), Data() {}
+ virtual ~DIEAbbrev() {}
+
+ // Accessors.
+ unsigned getTag() const { return Tag; }
+ unsigned getNumber() const { return Number; }
+ unsigned getChildrenFlag() const { return ChildrenFlag; }
+ const SmallVector<DIEAbbrevData, 8> &getData() const { return Data; }
+ void setTag(unsigned T) { Tag = T; }
+ void setChildrenFlag(unsigned CF) { ChildrenFlag = CF; }
+ void setNumber(unsigned N) { Number = N; }
+
+ /// AddAttribute - Adds another set of attribute information to the
+ /// abbreviation.
+ void AddAttribute(unsigned Attribute, unsigned Form) {
+ Data.push_back(DIEAbbrevData(Attribute, Form));
+ }
+
+ /// AddFirstAttribute - Adds a set of attribute information to the front
+ /// of the abbreviation.
+ void AddFirstAttribute(unsigned Attribute, unsigned Form) {
+ Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form));
+ }
+
+ /// Profile - Used to gather unique data for the abbreviation folding set.
+ ///
+ void Profile(FoldingSetNodeID &ID) const;
+
+ /// Emit - Print the abbreviation using the specified asm printer.
+ ///
+ void Emit(const AsmPrinter *Asm) const;
+
+#ifndef NDEBUG
+ void print(std::ostream *O) {
+ if (O) print(*O);
+ }
+ void print(std::ostream &O);
+ void dump();
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIE - A structured debug information entry. Has an abbreviation which
+ /// describes it's organization.
+ class CompileUnit;
+ class DIEValue;
+
+ class VISIBILITY_HIDDEN DIE : public FoldingSetNode {
+ protected:
+ /// Abbrev - Buffer for constructing abbreviation.
+ ///
+ DIEAbbrev Abbrev;
+
+ /// Offset - Offset in debug info section.
+ ///
+ unsigned Offset;
+
+ /// Size - Size of instance + children.
+ ///
+ unsigned Size;
+
+ /// Children DIEs.
+ ///
+ std::vector<DIE *> Children;
+
+ /// Attributes values.
+ ///
+ SmallVector<DIEValue*, 32> Values;
+
+ /// Abstract compile unit.
+ CompileUnit *AbstractCU;
+ public:
+ explicit DIE(unsigned Tag)
+ : Abbrev(Tag, dwarf::DW_CHILDREN_no), Offset(0), Size(0) {}
+ virtual ~DIE();
+
+ // Accessors.
+ DIEAbbrev &getAbbrev() { return Abbrev; }
+ unsigned getAbbrevNumber() const { return Abbrev.getNumber(); }
+ unsigned getTag() const { return Abbrev.getTag(); }
+ unsigned getOffset() const { return Offset; }
+ unsigned getSize() const { return Size; }
+ const std::vector<DIE *> &getChildren() const { return Children; }
+ SmallVector<DIEValue*, 32> &getValues() { return Values; }
+ CompileUnit *getAbstractCompileUnit() const { return AbstractCU; }
+
+ void setTag(unsigned Tag) { Abbrev.setTag(Tag); }
+ void setOffset(unsigned O) { Offset = O; }
+ void setSize(unsigned S) { Size = S; }
+ void setAbstractCompileUnit(CompileUnit *CU) { AbstractCU = CU; }
+
+ /// AddValue - Add a value and attributes to a DIE.
+ ///
+ void AddValue(unsigned Attribute, unsigned Form, DIEValue *Value) {
+ Abbrev.AddAttribute(Attribute, Form);
+ Values.push_back(Value);
+ }
+
+ /// SiblingOffset - Return the offset of the debug information entry's
+ /// sibling.
+ unsigned SiblingOffset() const { return Offset + Size; }
+
+ /// AddSiblingOffset - Add a sibling offset field to the front of the DIE.
+ ///
+ void AddSiblingOffset();
+
+ /// AddChild - Add a child to the DIE.
+ ///
+ void AddChild(DIE *Child) {
+ Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes);
+ Children.push_back(Child);
+ }
+
+ /// Detach - Detaches objects connected to it after copying.
+ ///
+ void Detach() {
+ Children.clear();
+ }
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ void Profile(FoldingSetNodeID &ID) ;
+
+#ifndef NDEBUG
+ void print(std::ostream *O, unsigned IncIndent = 0) {
+ if (O) print(*O, IncIndent);
+ }
+ void print(std::ostream &O, unsigned IncIndent = 0);
+ void dump();
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEValue - A debug information entry value.
+ ///
+ class VISIBILITY_HIDDEN DIEValue : public FoldingSetNode {
+ public:
+ enum {
+ isInteger,
+ isString,
+ isLabel,
+ isAsIsLabel,
+ isSectionOffset,
+ isDelta,
+ isEntry,
+ isBlock
+ };
+ protected:
+ /// Type - Type of data stored in the value.
+ ///
+ unsigned Type;
+ public:
+ explicit DIEValue(unsigned T) : Type(T) {}
+ virtual ~DIEValue() {}
+
+ // Accessors
+ unsigned getType() const { return Type; }
+
+ /// EmitValue - Emit value via the Dwarf writer.
+ ///
+ virtual void EmitValue(Dwarf *D, unsigned Form) const = 0;
+
+ /// SizeOf - Return the size of a value in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const = 0;
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ virtual void Profile(FoldingSetNodeID &ID) = 0;
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEValue *) { return true; }
+
+#ifndef NDEBUG
+ void print(std::ostream *O) {
+ if (O) print(*O);
+ }
+ virtual void print(std::ostream &O) = 0;
+ void dump();
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEInteger - An integer value DIE.
+ ///
+ class VISIBILITY_HIDDEN DIEInteger : public DIEValue {
+ uint64_t Integer;
+ public:
+ explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {}
+
+ /// BestForm - Choose the best form for integer.
+ ///
+ static unsigned BestForm(bool IsSigned, uint64_t Int) {
+ if (IsSigned) {
+ if ((char)Int == (signed)Int) return dwarf::DW_FORM_data1;
+ if ((short)Int == (signed)Int) return dwarf::DW_FORM_data2;
+ if ((int)Int == (signed)Int) return dwarf::DW_FORM_data4;
+ } else {
+ if ((unsigned char)Int == Int) return dwarf::DW_FORM_data1;
+ if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2;
+ if ((unsigned int)Int == Int) return dwarf::DW_FORM_data4;
+ }
+ return dwarf::DW_FORM_data8;
+ }
+
+ /// EmitValue - Emit integer of appropriate size.
+ ///
+ virtual void EmitValue(Dwarf *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of integer value in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ static void Profile(FoldingSetNodeID &ID, unsigned Int);
+ virtual void Profile(FoldingSetNodeID &ID);
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEInteger *) { return true; }
+ static bool classof(const DIEValue *I) { return I->getType() == isInteger; }
+
+#ifndef NDEBUG
+ virtual void print(std::ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEString - A string value DIE.
+ ///
+ class VISIBILITY_HIDDEN DIEString : public DIEValue {
+ const std::string Str;
+ public:
+ explicit DIEString(const std::string &S) : DIEValue(isString), Str(S) {}
+
+ /// EmitValue - Emit string value.
+ ///
+ virtual void EmitValue(Dwarf *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of string value in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *, unsigned /*Form*/) const {
+ return Str.size() + sizeof(char); // sizeof('\0');
+ }
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ static void Profile(FoldingSetNodeID &ID, const std::string &Str);
+ virtual void Profile(FoldingSetNodeID &ID);
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEString *) { return true; }
+ static bool classof(const DIEValue *S) { return S->getType() == isString; }
+
+#ifndef NDEBUG
+ virtual void print(std::ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEDwarfLabel - A Dwarf internal label expression DIE.
+ //
+ class VISIBILITY_HIDDEN DIEDwarfLabel : public DIEValue {
+ const DWLabel Label;
+ public:
+ explicit DIEDwarfLabel(const DWLabel &L) : DIEValue(isLabel), Label(L) {}
+
+ /// EmitValue - Emit label value.
+ ///
+ virtual void EmitValue(Dwarf *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of label value in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ static void Profile(FoldingSetNodeID &ID, const DWLabel &Label);
+ virtual void Profile(FoldingSetNodeID &ID);
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEDwarfLabel *) { return true; }
+ static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
+
+#ifndef NDEBUG
+ virtual void print(std::ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEObjectLabel - A label to an object in code or data.
+ //
+ class VISIBILITY_HIDDEN DIEObjectLabel : public DIEValue {
+ const std::string Label;
+ public:
+ explicit DIEObjectLabel(const std::string &L)
+ : DIEValue(isAsIsLabel), Label(L) {}
+
+ /// EmitValue - Emit label value.
+ ///
+ virtual void EmitValue(Dwarf *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of label value in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ static void Profile(FoldingSetNodeID &ID, const std::string &Label);
+ virtual void Profile(FoldingSetNodeID &ID);
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEObjectLabel *) { return true; }
+ static bool classof(const DIEValue *L) {
+ return L->getType() == isAsIsLabel;
+ }
+
+#ifndef NDEBUG
+ virtual void print(std::ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIESectionOffset - A section offset DIE.
+ ///
+ class VISIBILITY_HIDDEN DIESectionOffset : public DIEValue {
+ const DWLabel Label;
+ const DWLabel Section;
+ bool IsEH : 1;
+ bool UseSet : 1;
+ public:
+ DIESectionOffset(const DWLabel &Lab, const DWLabel &Sec,
+ bool isEH = false, bool useSet = true)
+ : DIEValue(isSectionOffset), Label(Lab), Section(Sec),
+ IsEH(isEH), UseSet(useSet) {}
+
+ /// EmitValue - Emit section offset.
+ ///
+ virtual void EmitValue(Dwarf *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of section offset value in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ static void Profile(FoldingSetNodeID &ID, const DWLabel &Label,
+ const DWLabel &Section);
+ virtual void Profile(FoldingSetNodeID &ID);
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIESectionOffset *) { return true; }
+ static bool classof(const DIEValue *D) {
+ return D->getType() == isSectionOffset;
+ }
+
+#ifndef NDEBUG
+ virtual void print(std::ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEDelta - A simple label difference DIE.
+ ///
+ class VISIBILITY_HIDDEN DIEDelta : public DIEValue {
+ const DWLabel LabelHi;
+ const DWLabel LabelLo;
+ public:
+ DIEDelta(const DWLabel &Hi, const DWLabel &Lo)
+ : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {}
+
+ /// EmitValue - Emit delta value.
+ ///
+ virtual void EmitValue(Dwarf *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of delta value in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ static void Profile(FoldingSetNodeID &ID, const DWLabel &LabelHi,
+ const DWLabel &LabelLo);
+ virtual void Profile(FoldingSetNodeID &ID);
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEDelta *) { return true; }
+ static bool classof(const DIEValue *D) { return D->getType() == isDelta; }
+
+#ifndef NDEBUG
+ virtual void print(std::ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEntry - A pointer to another debug information entry. An instance of
+ /// this class can also be used as a proxy for a debug information entry not
+ /// yet defined (ie. types.)
+ class VISIBILITY_HIDDEN DIEEntry : public DIEValue {
+ DIE *Entry;
+ public:
+ explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {}
+
+ DIE *getEntry() const { return Entry; }
+ void setEntry(DIE *E) { Entry = E; }
+
+ /// EmitValue - Emit debug information entry offset.
+ ///
+ virtual void EmitValue(Dwarf *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of debug information entry in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const {
+ return sizeof(int32_t);
+ }
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ static void Profile(FoldingSetNodeID &ID, DIE *Entry);
+ virtual void Profile(FoldingSetNodeID &ID);
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEEntry *) { return true; }
+ static bool classof(const DIEValue *E) { return E->getType() == isEntry; }
+
+#ifndef NDEBUG
+ virtual void print(std::ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEBlock - A block of values. Primarily used for location expressions.
+ //
+ class VISIBILITY_HIDDEN DIEBlock : public DIEValue, public DIE {
+ unsigned Size; // Size in bytes excluding size header.
+ public:
+ DIEBlock()
+ : DIEValue(isBlock), DIE(0), Size(0) {}
+ virtual ~DIEBlock() {}
+
+ /// ComputeSize - calculate the size of the block.
+ ///
+ unsigned ComputeSize(const TargetData *TD);
+
+ /// BestForm - Choose the best form for data.
+ ///
+ unsigned BestForm() const {
+ if ((unsigned char)Size == Size) return dwarf::DW_FORM_block1;
+ if ((unsigned short)Size == Size) return dwarf::DW_FORM_block2;
+ if ((unsigned int)Size == Size) return dwarf::DW_FORM_block4;
+ return dwarf::DW_FORM_block;
+ }
+
+ /// EmitValue - Emit block data.
+ ///
+ virtual void EmitValue(Dwarf *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of block data in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ virtual void Profile(FoldingSetNodeID &ID);
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEBlock *) { return true; }
+ static bool classof(const DIEValue *E) { return E->getType() == isBlock; }
+
+#ifndef NDEBUG
+ virtual void print(std::ostream &O);
+#endif
+ };
+
+} // end llvm namespace
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
new file mode 100644
index 0000000..25217b0
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -0,0 +1,2610 @@
+//===-- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfDebug.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/System/Path.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include <ostream>
+using namespace llvm;
+
+static TimerGroup &getDwarfTimerGroup() {
+ static TimerGroup DwarfTimerGroup("Dwarf Debugging");
+ return DwarfTimerGroup;
+}
+
+//===----------------------------------------------------------------------===//
+
+/// Configuration values for initial hash set sizes (log2).
+///
+static const unsigned InitDiesSetSize = 9; // log2(512)
+static const unsigned InitAbbreviationsSetSize = 9; // log2(512)
+static const unsigned InitValuesSetSize = 9; // log2(512)
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+/// CompileUnit - This dwarf writer support class manages information associate
+/// with a source file.
+class VISIBILITY_HIDDEN CompileUnit {
+ /// ID - File identifier for source.
+ ///
+ unsigned ID;
+
+ /// Die - Compile unit debug information entry.
+ ///
+ DIE *Die;
+
+ /// GVToDieMap - Tracks the mapping of unit level debug informaton
+ /// variables to debug information entries.
+ std::map<GlobalVariable *, DIE *> GVToDieMap;
+
+ /// GVToDIEEntryMap - Tracks the mapping of unit level debug informaton
+ /// descriptors to debug information entries using a DIEEntry proxy.
+ std::map<GlobalVariable *, DIEEntry *> GVToDIEEntryMap;
+
+ /// Globals - A map of globally visible named entities for this unit.
+ ///
+ StringMap<DIE*> Globals;
+
+ /// DiesSet - Used to uniquely define dies within the compile unit.
+ ///
+ FoldingSet<DIE> DiesSet;
+public:
+ CompileUnit(unsigned I, DIE *D)
+ : ID(I), Die(D), DiesSet(InitDiesSetSize) {}
+ ~CompileUnit() { delete Die; }
+
+ // Accessors.
+ unsigned getID() const { return ID; }
+ DIE* getDie() const { return Die; }
+ StringMap<DIE*> &getGlobals() { return Globals; }
+
+ /// hasContent - Return true if this compile unit has something to write out.
+ ///
+ bool hasContent() const { return !Die->getChildren().empty(); }
+
+ /// AddGlobal - Add a new global entity to the compile unit.
+ ///
+ void AddGlobal(const std::string &Name, DIE *Die) { Globals[Name] = Die; }
+
+ /// getDieMapSlotFor - Returns the debug information entry map slot for the
+ /// specified debug variable.
+ DIE *&getDieMapSlotFor(GlobalVariable *GV) { return GVToDieMap[GV]; }
+
+ /// getDIEEntrySlotFor - Returns the debug information entry proxy slot for the
+ /// specified debug variable.
+ DIEEntry *&getDIEEntrySlotFor(GlobalVariable *GV) {
+ return GVToDIEEntryMap[GV];
+ }
+
+ /// AddDie - Adds or interns the DIE to the compile unit.
+ ///
+ DIE *AddDie(DIE &Buffer) {
+ FoldingSetNodeID ID;
+ Buffer.Profile(ID);
+ void *Where;
+ DIE *Die = DiesSet.FindNodeOrInsertPos(ID, Where);
+
+ if (!Die) {
+ Die = new DIE(Buffer);
+ DiesSet.InsertNode(Die, Where);
+ this->Die->AddChild(Die);
+ Buffer.Detach();
+ }
+
+ return Die;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+/// DbgVariable - This class is used to track local variable information.
+///
+class VISIBILITY_HIDDEN DbgVariable {
+ DIVariable Var; // Variable Descriptor.
+ unsigned FrameIndex; // Variable frame index.
+ bool InlinedFnVar; // Variable for an inlined function.
+public:
+ DbgVariable(DIVariable V, unsigned I, bool IFV)
+ : Var(V), FrameIndex(I), InlinedFnVar(IFV) {}
+
+ // Accessors.
+ DIVariable getVariable() const { return Var; }
+ unsigned getFrameIndex() const { return FrameIndex; }
+ bool isInlinedFnVar() const { return InlinedFnVar; }
+};
+
+//===----------------------------------------------------------------------===//
+/// DbgScope - This class is used to track scope information.
+///
+class DbgConcreteScope;
+class VISIBILITY_HIDDEN DbgScope {
+ DbgScope *Parent; // Parent to this scope.
+ DIDescriptor Desc; // Debug info descriptor for scope.
+ // Either subprogram or block.
+ unsigned StartLabelID; // Label ID of the beginning of scope.
+ unsigned EndLabelID; // Label ID of the end of scope.
+ SmallVector<DbgScope *, 4> Scopes; // Scopes defined in scope.
+ SmallVector<DbgVariable *, 8> Variables;// Variables declared in scope.
+ SmallVector<DbgConcreteScope *, 8> ConcreteInsts;// Concrete insts of funcs.
+public:
+ DbgScope(DbgScope *P, DIDescriptor D)
+ : Parent(P), Desc(D), StartLabelID(0), EndLabelID(0) {}
+ virtual ~DbgScope();
+
+ // Accessors.
+ DbgScope *getParent() const { return Parent; }
+ DIDescriptor getDesc() const { return Desc; }
+ unsigned getStartLabelID() const { return StartLabelID; }
+ unsigned getEndLabelID() const { return EndLabelID; }
+ SmallVector<DbgScope *, 4> &getScopes() { return Scopes; }
+ SmallVector<DbgVariable *, 8> &getVariables() { return Variables; }
+ SmallVector<DbgConcreteScope*,8> &getConcreteInsts() { return ConcreteInsts; }
+ void setStartLabelID(unsigned S) { StartLabelID = S; }
+ void setEndLabelID(unsigned E) { EndLabelID = E; }
+
+ /// AddScope - Add a scope to the scope.
+ ///
+ void AddScope(DbgScope *S) { Scopes.push_back(S); }
+
+ /// AddVariable - Add a variable to the scope.
+ ///
+ void AddVariable(DbgVariable *V) { Variables.push_back(V); }
+
+ /// AddConcreteInst - Add a concrete instance to the scope.
+ ///
+ void AddConcreteInst(DbgConcreteScope *C) { ConcreteInsts.push_back(C); }
+
+#ifndef NDEBUG
+ void dump() const;
+#endif
+};
+
+#ifndef NDEBUG
+void DbgScope::dump() const {
+ static unsigned IndentLevel = 0;
+ std::string Indent(IndentLevel, ' ');
+
+ cerr << Indent; Desc.dump();
+ cerr << " [" << StartLabelID << ", " << EndLabelID << "]\n";
+
+ IndentLevel += 2;
+
+ for (unsigned i = 0, e = Scopes.size(); i != e; ++i)
+ if (Scopes[i] != this)
+ Scopes[i]->dump();
+
+ IndentLevel -= 2;
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+/// DbgConcreteScope - This class is used to track a scope that holds concrete
+/// instance information.
+///
+class VISIBILITY_HIDDEN DbgConcreteScope : public DbgScope {
+ CompileUnit *Unit;
+ DIE *Die; // Debug info for this concrete scope.
+public:
+ DbgConcreteScope(DIDescriptor D) : DbgScope(NULL, D) {}
+
+ // Accessors.
+ DIE *getDie() const { return Die; }
+ void setDie(DIE *D) { Die = D; }
+};
+
+DbgScope::~DbgScope() {
+ for (unsigned i = 0, N = Scopes.size(); i < N; ++i)
+ delete Scopes[i];
+ for (unsigned j = 0, M = Variables.size(); j < M; ++j)
+ delete Variables[j];
+ for (unsigned k = 0, O = ConcreteInsts.size(); k < O; ++k)
+ delete ConcreteInsts[k];
+}
+
+} // end llvm namespace
+
+DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T)
+ : Dwarf(OS, A, T, "dbg"), MainCU(0),
+ AbbreviationsSet(InitAbbreviationsSetSize), Abbreviations(),
+ ValuesSet(InitValuesSetSize), Values(), StringPool(), SectionMap(),
+ SectionSourceLines(), didInitial(false), shouldEmit(false),
+ FunctionDbgScope(0), DebugTimer(0) {
+ if (TimePassesIsEnabled)
+ DebugTimer = new Timer("Dwarf Debug Writer",
+ getDwarfTimerGroup());
+}
+DwarfDebug::~DwarfDebug() {
+ for (unsigned j = 0, M = Values.size(); j < M; ++j)
+ delete Values[j];
+
+ for (DenseMap<const GlobalVariable *, DbgScope *>::iterator
+ I = AbstractInstanceRootMap.begin(),
+ E = AbstractInstanceRootMap.end(); I != E;++I)
+ delete I->second;
+
+ delete DebugTimer;
+}
+
+/// AssignAbbrevNumber - Define a unique number for the abbreviation.
+///
+void DwarfDebug::AssignAbbrevNumber(DIEAbbrev &Abbrev) {
+ // Profile the node so that we can make it unique.
+ FoldingSetNodeID ID;
+ Abbrev.Profile(ID);
+
+ // Check the set for priors.
+ DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev);
+
+ // If it's newly added.
+ if (InSet == &Abbrev) {
+ // Add to abbreviation list.
+ Abbreviations.push_back(&Abbrev);
+
+ // Assign the vector position + 1 as its number.
+ Abbrev.setNumber(Abbreviations.size());
+ } else {
+ // Assign existing abbreviation number.
+ Abbrev.setNumber(InSet->getNumber());
+ }
+}
+
+/// CreateDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+/// information entry.
+DIEEntry *DwarfDebug::CreateDIEEntry(DIE *Entry) {
+ DIEEntry *Value;
+
+ if (Entry) {
+ FoldingSetNodeID ID;
+ DIEEntry::Profile(ID, Entry);
+ void *Where;
+ Value = static_cast<DIEEntry *>(ValuesSet.FindNodeOrInsertPos(ID, Where));
+
+ if (Value) return Value;
+
+ Value = new DIEEntry(Entry);
+ ValuesSet.InsertNode(Value, Where);
+ } else {
+ Value = new DIEEntry(Entry);
+ }
+
+ Values.push_back(Value);
+ return Value;
+}
+
+/// SetDIEEntry - Set a DIEEntry once the debug information entry is defined.
+///
+void DwarfDebug::SetDIEEntry(DIEEntry *Value, DIE *Entry) {
+ Value->setEntry(Entry);
+
+ // Add to values set if not already there. If it is, we merely have a
+ // duplicate in the values list (no harm.)
+ ValuesSet.GetOrInsertNode(Value);
+}
+
+/// AddUInt - Add an unsigned integer attribute data and value.
+///
+void DwarfDebug::AddUInt(DIE *Die, unsigned Attribute,
+ unsigned Form, uint64_t Integer) {
+ if (!Form) Form = DIEInteger::BestForm(false, Integer);
+
+ FoldingSetNodeID ID;
+ DIEInteger::Profile(ID, Integer);
+ void *Where;
+ DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+
+ if (!Value) {
+ Value = new DIEInteger(Integer);
+ ValuesSet.InsertNode(Value, Where);
+ Values.push_back(Value);
+ }
+
+ Die->AddValue(Attribute, Form, Value);
+}
+
+/// AddSInt - Add an signed integer attribute data and value.
+///
+void DwarfDebug::AddSInt(DIE *Die, unsigned Attribute,
+ unsigned Form, int64_t Integer) {
+ if (!Form) Form = DIEInteger::BestForm(true, Integer);
+
+ FoldingSetNodeID ID;
+ DIEInteger::Profile(ID, (uint64_t)Integer);
+ void *Where;
+ DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+
+ if (!Value) {
+ Value = new DIEInteger(Integer);
+ ValuesSet.InsertNode(Value, Where);
+ Values.push_back(Value);
+ }
+
+ Die->AddValue(Attribute, Form, Value);
+}
+
+/// AddString - Add a string attribute data and value.
+///
+void DwarfDebug::AddString(DIE *Die, unsigned Attribute, unsigned Form,
+ const std::string &String) {
+ FoldingSetNodeID ID;
+ DIEString::Profile(ID, String);
+ void *Where;
+ DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+
+ if (!Value) {
+ Value = new DIEString(String);
+ ValuesSet.InsertNode(Value, Where);
+ Values.push_back(Value);
+ }
+
+ Die->AddValue(Attribute, Form, Value);
+}
+
+/// AddLabel - Add a Dwarf label attribute data and value.
+///
+void DwarfDebug::AddLabel(DIE *Die, unsigned Attribute, unsigned Form,
+ const DWLabel &Label) {
+ FoldingSetNodeID ID;
+ DIEDwarfLabel::Profile(ID, Label);
+ void *Where;
+ DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+
+ if (!Value) {
+ Value = new DIEDwarfLabel(Label);
+ ValuesSet.InsertNode(Value, Where);
+ Values.push_back(Value);
+ }
+
+ Die->AddValue(Attribute, Form, Value);
+}
+
+/// AddObjectLabel - Add an non-Dwarf label attribute data and value.
+///
+void DwarfDebug::AddObjectLabel(DIE *Die, unsigned Attribute, unsigned Form,
+ const std::string &Label) {
+ FoldingSetNodeID ID;
+ DIEObjectLabel::Profile(ID, Label);
+ void *Where;
+ DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+
+ if (!Value) {
+ Value = new DIEObjectLabel(Label);
+ ValuesSet.InsertNode(Value, Where);
+ Values.push_back(Value);
+ }
+
+ Die->AddValue(Attribute, Form, Value);
+}
+
+/// AddSectionOffset - Add a section offset label attribute data and value.
+///
+void DwarfDebug::AddSectionOffset(DIE *Die, unsigned Attribute, unsigned Form,
+ const DWLabel &Label, const DWLabel &Section,
+ bool isEH, bool useSet) {
+ FoldingSetNodeID ID;
+ DIESectionOffset::Profile(ID, Label, Section);
+ void *Where;
+ DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+
+ if (!Value) {
+ Value = new DIESectionOffset(Label, Section, isEH, useSet);
+ ValuesSet.InsertNode(Value, Where);
+ Values.push_back(Value);
+ }
+
+ Die->AddValue(Attribute, Form, Value);
+}
+
+/// AddDelta - Add a label delta attribute data and value.
+///
+void DwarfDebug::AddDelta(DIE *Die, unsigned Attribute, unsigned Form,
+ const DWLabel &Hi, const DWLabel &Lo) {
+ FoldingSetNodeID ID;
+ DIEDelta::Profile(ID, Hi, Lo);
+ void *Where;
+ DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+
+ if (!Value) {
+ Value = new DIEDelta(Hi, Lo);
+ ValuesSet.InsertNode(Value, Where);
+ Values.push_back(Value);
+ }
+
+ Die->AddValue(Attribute, Form, Value);
+}
+
+/// AddBlock - Add block data.
+///
+void DwarfDebug::AddBlock(DIE *Die, unsigned Attribute, unsigned Form,
+ DIEBlock *Block) {
+ Block->ComputeSize(TD);
+ FoldingSetNodeID ID;
+ Block->Profile(ID);
+ void *Where;
+ DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+
+ if (!Value) {
+ Value = Block;
+ ValuesSet.InsertNode(Value, Where);
+ Values.push_back(Value);
+ } else {
+ // Already exists, reuse the previous one.
+ delete Block;
+ Block = cast<DIEBlock>(Value);
+ }
+
+ Die->AddValue(Attribute, Block->BestForm(), Value);
+}
+
+/// AddSourceLine - Add location information to specified debug information
+/// entry.
+void DwarfDebug::AddSourceLine(DIE *Die, const DIVariable *V) {
+ // If there is no compile unit specified, don't add a line #.
+ if (V->getCompileUnit().isNull())
+ return;
+
+ unsigned Line = V->getLineNumber();
+ unsigned FileID = FindCompileUnit(V->getCompileUnit()).getID();
+ assert(FileID && "Invalid file id");
+ AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// AddSourceLine - Add location information to specified debug information
+/// entry.
+void DwarfDebug::AddSourceLine(DIE *Die, const DIGlobal *G) {
+ // If there is no compile unit specified, don't add a line #.
+ if (G->getCompileUnit().isNull())
+ return;
+
+ unsigned Line = G->getLineNumber();
+ unsigned FileID = FindCompileUnit(G->getCompileUnit()).getID();
+ assert(FileID && "Invalid file id");
+ AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+void DwarfDebug::AddSourceLine(DIE *Die, const DIType *Ty) {
+ // If there is no compile unit specified, don't add a line #.
+ DICompileUnit CU = Ty->getCompileUnit();
+ if (CU.isNull())
+ return;
+
+ unsigned Line = Ty->getLineNumber();
+ unsigned FileID = FindCompileUnit(CU).getID();
+ assert(FileID && "Invalid file id");
+ AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// AddAddress - Add an address attribute to a die based on the location
+/// provided.
+void DwarfDebug::AddAddress(DIE *Die, unsigned Attribute,
+ const MachineLocation &Location) {
+ unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
+ DIEBlock *Block = new DIEBlock();
+
+ if (Location.isReg()) {
+ if (Reg < 32) {
+ AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
+ } else {
+ AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
+ AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+ }
+ } else {
+ if (Reg < 32) {
+ AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+ } else {
+ AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+ AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+ }
+
+ AddUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+ }
+
+ AddBlock(Die, Attribute, 0, Block);
+}
+
+/// AddType - Add a new type attribute to the specified entity.
+void DwarfDebug::AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) {
+ if (Ty.isNull())
+ return;
+
+ // Check for pre-existence.
+ DIEEntry *&Slot = DW_Unit->getDIEEntrySlotFor(Ty.getGV());
+
+ // If it exists then use the existing value.
+ if (Slot) {
+ Entity->AddValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Slot);
+ return;
+ }
+
+ // Set up proxy.
+ Slot = CreateDIEEntry();
+
+ // Construct type.
+ DIE Buffer(dwarf::DW_TAG_base_type);
+ if (Ty.isBasicType(Ty.getTag()))
+ ConstructTypeDIE(DW_Unit, Buffer, DIBasicType(Ty.getGV()));
+ else if (Ty.isDerivedType(Ty.getTag()))
+ ConstructTypeDIE(DW_Unit, Buffer, DIDerivedType(Ty.getGV()));
+ else {
+ assert(Ty.isCompositeType(Ty.getTag()) && "Unknown kind of DIType");
+ ConstructTypeDIE(DW_Unit, Buffer, DICompositeType(Ty.getGV()));
+ }
+
+ // Add debug information entry to entity and appropriate context.
+ DIE *Die = NULL;
+ DIDescriptor Context = Ty.getContext();
+ if (!Context.isNull())
+ Die = DW_Unit->getDieMapSlotFor(Context.getGV());
+
+ if (Die) {
+ DIE *Child = new DIE(Buffer);
+ Die->AddChild(Child);
+ Buffer.Detach();
+ SetDIEEntry(Slot, Child);
+ } else {
+ Die = DW_Unit->AddDie(Buffer);
+ SetDIEEntry(Slot, Die);
+ }
+
+ Entity->AddValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Slot);
+}
+
+/// ConstructTypeDIE - Construct basic type die from DIBasicType.
+void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+ DIBasicType BTy) {
+ // Get core information.
+ std::string Name;
+ BTy.getName(Name);
+ Buffer.setTag(dwarf::DW_TAG_base_type);
+ AddUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ BTy.getEncoding());
+
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ uint64_t Size = BTy.getSizeInBits() >> 3;
+ AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+}
+
+/// ConstructTypeDIE - Construct derived type die from DIDerivedType.
+void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+ DIDerivedType DTy) {
+ // Get core information.
+ std::string Name;
+ DTy.getName(Name);
+ uint64_t Size = DTy.getSizeInBits() >> 3;
+ unsigned Tag = DTy.getTag();
+
+ // FIXME - Workaround for templates.
+ if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type;
+
+ Buffer.setTag(Tag);
+
+ // Map to main type, void will not have a type.
+ DIType FromTy = DTy.getTypeDerivedFrom();
+ AddType(DW_Unit, &Buffer, FromTy);
+
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+ // Add size if non-zero (derived types might be zero-sized.)
+ if (Size)
+ AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+
+ // Add source line info if available and TyDesc is not a forward declaration.
+ if (!DTy.isForwardDecl())
+ AddSourceLine(&Buffer, &DTy);
+}
+
+/// ConstructTypeDIE - Construct type DIE from DICompositeType.
+void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+ DICompositeType CTy) {
+ // Get core information.
+ std::string Name;
+ CTy.getName(Name);
+
+ uint64_t Size = CTy.getSizeInBits() >> 3;
+ unsigned Tag = CTy.getTag();
+ Buffer.setTag(Tag);
+
+ switch (Tag) {
+ case dwarf::DW_TAG_vector_type:
+ case dwarf::DW_TAG_array_type:
+ ConstructArrayTypeDIE(DW_Unit, Buffer, &CTy);
+ break;
+ case dwarf::DW_TAG_enumeration_type: {
+ DIArray Elements = CTy.getTypeArray();
+
+ // Add enumerators to enumeration type.
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIE *ElemDie = NULL;
+ DIEnumerator Enum(Elements.getElement(i).getGV());
+ ElemDie = ConstructEnumTypeDIE(DW_Unit, &Enum);
+ Buffer.AddChild(ElemDie);
+ }
+ }
+ break;
+ case dwarf::DW_TAG_subroutine_type: {
+ // Add return type.
+ DIArray Elements = CTy.getTypeArray();
+ DIDescriptor RTy = Elements.getElement(0);
+ AddType(DW_Unit, &Buffer, DIType(RTy.getGV()));
+
+ // Add prototype flag.
+ AddUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+
+ // Add arguments.
+ for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ DIDescriptor Ty = Elements.getElement(i);
+ AddType(DW_Unit, Arg, DIType(Ty.getGV()));
+ Buffer.AddChild(Arg);
+ }
+ }
+ break;
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_class_type: {
+ // Add elements to structure type.
+ DIArray Elements = CTy.getTypeArray();
+
+ // A forward struct declared type may not have elements available.
+ if (Elements.isNull())
+ break;
+
+ // Add elements to structure type.
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ DIE *ElemDie = NULL;
+ if (Element.getTag() == dwarf::DW_TAG_subprogram)
+ ElemDie = CreateSubprogramDIE(DW_Unit,
+ DISubprogram(Element.getGV()));
+ else if (Element.getTag() == dwarf::DW_TAG_variable) // ??
+ ElemDie = CreateGlobalVariableDIE(DW_Unit,
+ DIGlobalVariable(Element.getGV()));
+ else
+ ElemDie = CreateMemberDIE(DW_Unit,
+ DIDerivedType(Element.getGV()));
+ Buffer.AddChild(ElemDie);
+ }
+
+ // FIXME: We'd like an API to register additional attributes for the
+ // frontend to use while synthesizing, and then we'd use that api in clang
+ // instead of this.
+ if (Name == "__block_literal_generic")
+ AddUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
+
+ unsigned RLang = CTy.getRunTimeLang();
+ if (RLang)
+ AddUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
+ dwarf::DW_FORM_data1, RLang);
+ break;
+ }
+ default:
+ break;
+ }
+
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+ if (Tag == dwarf::DW_TAG_enumeration_type ||
+ Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) {
+ // Add size if non-zero (derived types might be zero-sized.)
+ if (Size)
+ AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+ else {
+ // Add zero size if it is not a forward declaration.
+ if (CTy.isForwardDecl())
+ AddUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+ else
+ AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0);
+ }
+
+ // Add source line info if available.
+ if (!CTy.isForwardDecl())
+ AddSourceLine(&Buffer, &CTy);
+ }
+}
+
+/// ConstructSubrangeDIE - Construct subrange DIE from DISubrange.
+void DwarfDebug::ConstructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
+ int64_t L = SR.getLo();
+ int64_t H = SR.getHi();
+ DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
+
+ if (L != H) {
+ AddDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
+ if (L)
+ AddSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
+ AddSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
+ }
+
+ Buffer.AddChild(DW_Subrange);
+}
+
+/// ConstructArrayTypeDIE - Construct array type DIE from DICompositeType.
+void DwarfDebug::ConstructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+ DICompositeType *CTy) {
+ Buffer.setTag(dwarf::DW_TAG_array_type);
+ if (CTy->getTag() == dwarf::DW_TAG_vector_type)
+ AddUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1);
+
+ // Emit derived type.
+ AddType(DW_Unit, &Buffer, CTy->getTypeDerivedFrom());
+ DIArray Elements = CTy->getTypeArray();
+
+ // Construct an anonymous type for index type.
+ DIE IdxBuffer(dwarf::DW_TAG_base_type);
+ AddUInt(&IdxBuffer, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
+ AddUInt(&IdxBuffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ dwarf::DW_ATE_signed);
+ DIE *IndexTy = DW_Unit->AddDie(IdxBuffer);
+
+ // Add subranges to array type.
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ if (Element.getTag() == dwarf::DW_TAG_subrange_type)
+ ConstructSubrangeDIE(Buffer, DISubrange(Element.getGV()), IndexTy);
+ }
+}
+
+/// ConstructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+DIE *DwarfDebug::ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) {
+ DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
+ std::string Name;
+ ETy->getName(Name);
+ AddString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ int64_t Value = ETy->getEnumValue();
+ AddSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
+ return Enumerator;
+}
+
+/// CreateGlobalVariableDIE - Create new DIE using GV.
+DIE *DwarfDebug::CreateGlobalVariableDIE(CompileUnit *DW_Unit,
+ const DIGlobalVariable &GV) {
+ DIE *GVDie = new DIE(dwarf::DW_TAG_variable);
+ std::string Name;
+ GV.getDisplayName(Name);
+ AddString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ std::string LinkageName;
+ GV.getLinkageName(LinkageName);
+ if (!LinkageName.empty())
+ AddString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
+ LinkageName);
+ AddType(DW_Unit, GVDie, GV.getType());
+ if (!GV.isLocalToUnit())
+ AddUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+ AddSourceLine(GVDie, &GV);
+ return GVDie;
+}
+
+/// CreateMemberDIE - Create new member DIE.
+DIE *DwarfDebug::CreateMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){
+ DIE *MemberDie = new DIE(DT.getTag());
+ std::string Name;
+ DT.getName(Name);
+ if (!Name.empty())
+ AddString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+ AddType(DW_Unit, MemberDie, DT.getTypeDerivedFrom());
+
+ AddSourceLine(MemberDie, &DT);
+
+ uint64_t Size = DT.getSizeInBits();
+ uint64_t FieldSize = DT.getOriginalTypeSize();
+
+ if (Size != FieldSize) {
+ // Handle bitfield.
+ AddUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3);
+ AddUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits());
+
+ uint64_t Offset = DT.getOffsetInBits();
+ uint64_t FieldOffset = Offset;
+ uint64_t AlignMask = ~(DT.getAlignInBits() - 1);
+ uint64_t HiMark = (Offset + FieldSize) & AlignMask;
+ FieldOffset = (HiMark - FieldSize);
+ Offset -= FieldOffset;
+
+ // Maybe we need to work from the other end.
+ if (TD->isLittleEndian()) Offset = FieldSize - (Offset + Size);
+ AddUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset);
+ }
+
+ DIEBlock *Block = new DIEBlock();
+ AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ AddUInt(Block, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3);
+ AddBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, Block);
+
+ if (DT.isProtected())
+ AddUInt(MemberDie, dwarf::DW_AT_accessibility, 0,
+ dwarf::DW_ACCESS_protected);
+ else if (DT.isPrivate())
+ AddUInt(MemberDie, dwarf::DW_AT_accessibility, 0,
+ dwarf::DW_ACCESS_private);
+
+ return MemberDie;
+}
+
+/// CreateSubprogramDIE - Create new DIE using SP.
+DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit,
+ const DISubprogram &SP,
+ bool IsConstructor,
+ bool IsInlined) {
+ DIE *SPDie = new DIE(dwarf::DW_TAG_subprogram);
+
+ std::string Name;
+ SP.getName(Name);
+ AddString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+ std::string LinkageName;
+ SP.getLinkageName(LinkageName);
+
+ if (!LinkageName.empty())
+ AddString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
+ LinkageName);
+
+ AddSourceLine(SPDie, &SP);
+
+ DICompositeType SPTy = SP.getType();
+ DIArray Args = SPTy.getTypeArray();
+
+ // Add prototyped tag, if C or ObjC.
+ unsigned Lang = SP.getCompileUnit().getLanguage();
+ if (Lang == dwarf::DW_LANG_C99 || Lang == dwarf::DW_LANG_C89 ||
+ Lang == dwarf::DW_LANG_ObjC)
+ AddUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+
+ // Add Return Type.
+ unsigned SPTag = SPTy.getTag();
+ if (!IsConstructor) {
+ if (Args.isNull() || SPTag != dwarf::DW_TAG_subroutine_type)
+ AddType(DW_Unit, SPDie, SPTy);
+ else
+ AddType(DW_Unit, SPDie, DIType(Args.getElement(0).getGV()));
+ }
+
+ if (!SP.isDefinition()) {
+ AddUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+
+ // Add arguments. Do not add arguments for subprogram definition. They will
+ // be handled through RecordVariable.
+ if (SPTag == dwarf::DW_TAG_subroutine_type)
+ for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ AddType(DW_Unit, Arg, DIType(Args.getElement(i).getGV()));
+ AddUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ??
+ SPDie->AddChild(Arg);
+ }
+ }
+
+ if (!SP.isLocalToUnit() && !IsInlined)
+ AddUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+
+ // DW_TAG_inlined_subroutine may refer to this DIE.
+ DIE *&Slot = DW_Unit->getDieMapSlotFor(SP.getGV());
+ Slot = SPDie;
+ return SPDie;
+}
+
+/// FindCompileUnit - Get the compile unit for the given descriptor.
+///
+CompileUnit &DwarfDebug::FindCompileUnit(DICompileUnit Unit) const {
+ DenseMap<Value *, CompileUnit *>::const_iterator I =
+ CompileUnitMap.find(Unit.getGV());
+ assert(I != CompileUnitMap.end() && "Missing compile unit.");
+ return *I->second;
+}
+
+/// CreateDbgScopeVariable - Create a new scope variable.
+///
+DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
+ // Get the descriptor.
+ const DIVariable &VD = DV->getVariable();
+
+ // Translate tag to proper Dwarf tag. The result variable is dropped for
+ // now.
+ unsigned Tag;
+ switch (VD.getTag()) {
+ case dwarf::DW_TAG_return_variable:
+ return NULL;
+ case dwarf::DW_TAG_arg_variable:
+ Tag = dwarf::DW_TAG_formal_parameter;
+ break;
+ case dwarf::DW_TAG_auto_variable: // fall thru
+ default:
+ Tag = dwarf::DW_TAG_variable;
+ break;
+ }
+
+ // Define variable debug information entry.
+ DIE *VariableDie = new DIE(Tag);
+ std::string Name;
+ VD.getName(Name);
+ AddString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+ // Add source line info if available.
+ AddSourceLine(VariableDie, &VD);
+
+ // Add variable type.
+ AddType(Unit, VariableDie, VD.getType());
+
+ // Add variable address.
+ if (!DV->isInlinedFnVar()) {
+ // Variables for abstract instances of inlined functions don't get a
+ // location.
+ MachineLocation Location;
+ Location.set(RI->getFrameRegister(*MF),
+ RI->getFrameIndexOffset(*MF, DV->getFrameIndex()));
+ AddAddress(VariableDie, dwarf::DW_AT_location, Location);
+ }
+
+ return VariableDie;
+}
+
+/// getOrCreateScope - Returns the scope associated with the given descriptor.
+///
+DbgScope *DwarfDebug::getOrCreateScope(GlobalVariable *V) {
+ DbgScope *&Slot = DbgScopeMap[V];
+ if (Slot) return Slot;
+
+ DbgScope *Parent = NULL;
+ DIBlock Block(V);
+
+ // Don't create a new scope if we already created one for an inlined function.
+ DenseMap<const GlobalVariable *, DbgScope *>::iterator
+ II = AbstractInstanceRootMap.find(V);
+ if (II != AbstractInstanceRootMap.end())
+ return LexicalScopeStack.back();
+
+ if (!Block.isNull()) {
+ DIDescriptor ParentDesc = Block.getContext();
+ Parent =
+ ParentDesc.isNull() ? NULL : getOrCreateScope(ParentDesc.getGV());
+ }
+
+ Slot = new DbgScope(Parent, DIDescriptor(V));
+
+ if (Parent)
+ Parent->AddScope(Slot);
+ else
+ // First function is top level function.
+ FunctionDbgScope = Slot;
+
+ return Slot;
+}
+
+/// ConstructDbgScope - Construct the components of a scope.
+///
+void DwarfDebug::ConstructDbgScope(DbgScope *ParentScope,
+ unsigned ParentStartID,
+ unsigned ParentEndID,
+ DIE *ParentDie, CompileUnit *Unit) {
+ // Add variables to scope.
+ SmallVector<DbgVariable *, 8> &Variables = ParentScope->getVariables();
+ for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
+ DIE *VariableDie = CreateDbgScopeVariable(Variables[i], Unit);
+ if (VariableDie) ParentDie->AddChild(VariableDie);
+ }
+
+ // Add concrete instances to scope.
+ SmallVector<DbgConcreteScope *, 8> &ConcreteInsts =
+ ParentScope->getConcreteInsts();
+ for (unsigned i = 0, N = ConcreteInsts.size(); i < N; ++i) {
+ DbgConcreteScope *ConcreteInst = ConcreteInsts[i];
+ DIE *Die = ConcreteInst->getDie();
+
+ unsigned StartID = ConcreteInst->getStartLabelID();
+ unsigned EndID = ConcreteInst->getEndLabelID();
+
+ // Add the scope bounds.
+ if (StartID)
+ AddLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ DWLabel("label", StartID));
+ else
+ AddLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_begin", SubprogramCount));
+
+ if (EndID)
+ AddLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ DWLabel("label", EndID));
+ else
+ AddLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_end", SubprogramCount));
+
+ ParentDie->AddChild(Die);
+ }
+
+ // Add nested scopes.
+ SmallVector<DbgScope *, 4> &Scopes = ParentScope->getScopes();
+ for (unsigned j = 0, M = Scopes.size(); j < M; ++j) {
+ // Define the Scope debug information entry.
+ DbgScope *Scope = Scopes[j];
+
+ unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID());
+ unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID());
+
+ // Ignore empty scopes.
+ if (StartID == EndID && StartID != 0) continue;
+
+ // Do not ignore inlined scopes even if they don't have any variables or
+ // scopes.
+ if (Scope->getScopes().empty() && Scope->getVariables().empty() &&
+ Scope->getConcreteInsts().empty())
+ continue;
+
+ if (StartID == ParentStartID && EndID == ParentEndID) {
+ // Just add stuff to the parent scope.
+ ConstructDbgScope(Scope, ParentStartID, ParentEndID, ParentDie, Unit);
+ } else {
+ DIE *ScopeDie = new DIE(dwarf::DW_TAG_lexical_block);
+
+ // Add the scope bounds.
+ if (StartID)
+ AddLabel(ScopeDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ DWLabel("label", StartID));
+ else
+ AddLabel(ScopeDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_begin", SubprogramCount));
+
+ if (EndID)
+ AddLabel(ScopeDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ DWLabel("label", EndID));
+ else
+ AddLabel(ScopeDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_end", SubprogramCount));
+
+ // Add the scope's contents.
+ ConstructDbgScope(Scope, StartID, EndID, ScopeDie, Unit);
+ ParentDie->AddChild(ScopeDie);
+ }
+ }
+}
+
+/// ConstructFunctionDbgScope - Construct the scope for the subprogram.
+///
+void DwarfDebug::ConstructFunctionDbgScope(DbgScope *RootScope,
+ bool AbstractScope) {
+ // Exit if there is no root scope.
+ if (!RootScope) return;
+ DIDescriptor Desc = RootScope->getDesc();
+ if (Desc.isNull())
+ return;
+
+ // Get the subprogram debug information entry.
+ DISubprogram SPD(Desc.getGV());
+
+ // Get the compile unit context.
+ CompileUnit *Unit = MainCU;
+ if (!Unit)
+ Unit = &FindCompileUnit(SPD.getCompileUnit());
+
+ // Get the subprogram die.
+ DIE *SPDie = Unit->getDieMapSlotFor(SPD.getGV());
+ assert(SPDie && "Missing subprogram descriptor");
+
+ if (!AbstractScope) {
+ // Add the function bounds.
+ AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_begin", SubprogramCount));
+ AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_end", SubprogramCount));
+ MachineLocation Location(RI->getFrameRegister(*MF));
+ AddAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+ }
+
+ ConstructDbgScope(RootScope, 0, 0, SPDie, Unit);
+}
+
+/// ConstructDefaultDbgScope - Construct a default scope for the subprogram.
+///
+void DwarfDebug::ConstructDefaultDbgScope(MachineFunction *MF) {
+ const char *FnName = MF->getFunction()->getNameStart();
+ if (MainCU) {
+ StringMap<DIE*> &Globals = MainCU->getGlobals();
+ StringMap<DIE*>::iterator GI = Globals.find(FnName);
+ if (GI != Globals.end()) {
+ DIE *SPDie = GI->second;
+
+ // Add the function bounds.
+ AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_begin", SubprogramCount));
+ AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_end", SubprogramCount));
+
+ MachineLocation Location(RI->getFrameRegister(*MF));
+ AddAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+ return;
+ }
+ } else {
+ for (unsigned i = 0, e = CompileUnits.size(); i != e; ++i) {
+ CompileUnit *Unit = CompileUnits[i];
+ StringMap<DIE*> &Globals = Unit->getGlobals();
+ StringMap<DIE*>::iterator GI = Globals.find(FnName);
+ if (GI != Globals.end()) {
+ DIE *SPDie = GI->second;
+
+ // Add the function bounds.
+ AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_begin", SubprogramCount));
+ AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_end", SubprogramCount));
+
+ MachineLocation Location(RI->getFrameRegister(*MF));
+ AddAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+ return;
+ }
+ }
+ }
+
+#if 0
+ // FIXME: This is causing an abort because C++ mangled names are compared with
+ // their unmangled counterparts. See PR2885. Don't do this assert.
+ assert(0 && "Couldn't find DIE for machine function!");
+#endif
+}
+
+/// GetOrCreateSourceID - Look up the source id with the given directory and
+/// source file names. If none currently exists, create a new id and insert it
+/// in the SourceIds map. This can update DirectoryNames and SourceFileNames
+/// maps as well.
+unsigned DwarfDebug::GetOrCreateSourceID(const std::string &DirName,
+ const std::string &FileName) {
+ unsigned DId;
+ StringMap<unsigned>::iterator DI = DirectoryIdMap.find(DirName);
+ if (DI != DirectoryIdMap.end()) {
+ DId = DI->getValue();
+ } else {
+ DId = DirectoryNames.size() + 1;
+ DirectoryIdMap[DirName] = DId;
+ DirectoryNames.push_back(DirName);
+ }
+
+ unsigned FId;
+ StringMap<unsigned>::iterator FI = SourceFileIdMap.find(FileName);
+ if (FI != SourceFileIdMap.end()) {
+ FId = FI->getValue();
+ } else {
+ FId = SourceFileNames.size() + 1;
+ SourceFileIdMap[FileName] = FId;
+ SourceFileNames.push_back(FileName);
+ }
+
+ DenseMap<std::pair<unsigned, unsigned>, unsigned>::iterator SI =
+ SourceIdMap.find(std::make_pair(DId, FId));
+ if (SI != SourceIdMap.end())
+ return SI->second;
+
+ unsigned SrcId = SourceIds.size() + 1; // DW_AT_decl_file cannot be 0.
+ SourceIdMap[std::make_pair(DId, FId)] = SrcId;
+ SourceIds.push_back(std::make_pair(DId, FId));
+
+ return SrcId;
+}
+
+void DwarfDebug::ConstructCompileUnit(GlobalVariable *GV) {
+ DICompileUnit DIUnit(GV);
+ std::string Dir, FN, Prod;
+ unsigned ID = GetOrCreateSourceID(DIUnit.getDirectory(Dir),
+ DIUnit.getFilename(FN));
+
+ DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
+ AddSectionOffset(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+ DWLabel("section_line", 0), DWLabel("section_line", 0),
+ false);
+ AddString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
+ DIUnit.getProducer(Prod));
+ AddUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1,
+ DIUnit.getLanguage());
+ AddString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
+
+ if (!Dir.empty())
+ AddString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
+ if (DIUnit.isOptimized())
+ AddUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+
+ std::string Flags;
+ DIUnit.getFlags(Flags);
+ if (!Flags.empty())
+ AddString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags);
+
+ unsigned RVer = DIUnit.getRunTimeVersion();
+ if (RVer)
+ AddUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
+ dwarf::DW_FORM_data1, RVer);
+
+ CompileUnit *Unit = new CompileUnit(ID, Die);
+ if (DIUnit.isMain()) {
+ assert(!MainCU && "Multiple main compile units are found!");
+ MainCU = Unit;
+ }
+
+ CompileUnitMap[DIUnit.getGV()] = Unit;
+ CompileUnits.push_back(Unit);
+}
+
+/// ConstructCompileUnits - Create a compile unit DIEs.
+void DwarfDebug::ConstructCompileUnits() {
+ GlobalVariable *Root = M->getGlobalVariable("llvm.dbg.compile_units");
+ if (!Root)
+ return;
+ assert(Root->hasLinkOnceLinkage() && Root->hasOneUse() &&
+ "Malformed compile unit descriptor anchor type");
+ Constant *RootC = cast<Constant>(*Root->use_begin());
+ assert(RootC->hasNUsesOrMore(1) &&
+ "Malformed compile unit descriptor anchor type");
+
+ for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end();
+ UI != UE; ++UI)
+ for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end();
+ UUI != UUE; ++UUI) {
+ GlobalVariable *GV = cast<GlobalVariable>(*UUI);
+ ConstructCompileUnit(GV);
+ }
+}
+
+bool DwarfDebug::ConstructGlobalVariableDIE(GlobalVariable *GV) {
+ DIGlobalVariable DI_GV(GV);
+ CompileUnit *DW_Unit = MainCU;
+ if (!DW_Unit)
+ DW_Unit = &FindCompileUnit(DI_GV.getCompileUnit());
+
+ // Check for pre-existence.
+ DIE *&Slot = DW_Unit->getDieMapSlotFor(DI_GV.getGV());
+ if (Slot)
+ return false;
+
+ DIE *VariableDie = CreateGlobalVariableDIE(DW_Unit, DI_GV);
+
+ // Add address.
+ DIEBlock *Block = new DIEBlock();
+ AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+ std::string GLN;
+ AddObjectLabel(Block, 0, dwarf::DW_FORM_udata,
+ Asm->getGlobalLinkName(DI_GV.getGlobal(), GLN));
+ AddBlock(VariableDie, dwarf::DW_AT_location, 0, Block);
+
+ // Add to map.
+ Slot = VariableDie;
+
+ // Add to context owner.
+ DW_Unit->getDie()->AddChild(VariableDie);
+
+ // Expose as global. FIXME - need to check external flag.
+ std::string Name;
+ DW_Unit->AddGlobal(DI_GV.getName(Name), VariableDie);
+ return true;
+}
+
+/// ConstructGlobalVariableDIEs - Create DIEs for each of the externally visible
+/// global variables. Return true if at least one global DIE is created.
+bool DwarfDebug::ConstructGlobalVariableDIEs() {
+ GlobalVariable *Root = M->getGlobalVariable("llvm.dbg.global_variables");
+ if (!Root)
+ return false;
+
+ assert(Root->hasLinkOnceLinkage() && Root->hasOneUse() &&
+ "Malformed global variable descriptor anchor type");
+ Constant *RootC = cast<Constant>(*Root->use_begin());
+ assert(RootC->hasNUsesOrMore(1) &&
+ "Malformed global variable descriptor anchor type");
+
+ bool Result = false;
+ for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end();
+ UI != UE; ++UI)
+ for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end();
+ UUI != UUE; ++UUI)
+ Result |= ConstructGlobalVariableDIE(cast<GlobalVariable>(*UUI));
+
+ return Result;
+}
+
+bool DwarfDebug::ConstructSubprogram(GlobalVariable *GV) {
+ DISubprogram SP(GV);
+ CompileUnit *Unit = MainCU;
+ if (!Unit)
+ Unit = &FindCompileUnit(SP.getCompileUnit());
+
+ // Check for pre-existence.
+ DIE *&Slot = Unit->getDieMapSlotFor(GV);
+ if (Slot)
+ return false;
+
+ if (!SP.isDefinition())
+ // This is a method declaration which will be handled while constructing
+ // class type.
+ return false;
+
+ DIE *SubprogramDie = CreateSubprogramDIE(Unit, SP);
+
+ // Add to map.
+ Slot = SubprogramDie;
+
+ // Add to context owner.
+ Unit->getDie()->AddChild(SubprogramDie);
+
+ // Expose as global.
+ std::string Name;
+ Unit->AddGlobal(SP.getName(Name), SubprogramDie);
+ return true;
+}
+
+/// ConstructSubprograms - Create DIEs for each of the externally visible
+/// subprograms. Return true if at least one subprogram DIE is created.
+bool DwarfDebug::ConstructSubprograms() {
+ GlobalVariable *Root = M->getGlobalVariable("llvm.dbg.subprograms");
+ if (!Root)
+ return false;
+
+ assert(Root->hasLinkOnceLinkage() && Root->hasOneUse() &&
+ "Malformed subprogram descriptor anchor type");
+ Constant *RootC = cast<Constant>(*Root->use_begin());
+ assert(RootC->hasNUsesOrMore(1) &&
+ "Malformed subprogram descriptor anchor type");
+
+ bool Result = false;
+ for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end();
+ UI != UE; ++UI)
+ for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end();
+ UUI != UUE; ++UUI)
+ Result |= ConstructSubprogram(cast<GlobalVariable>(*UUI));
+
+ return Result;
+}
+
+/// SetDebugInfo - Create global DIEs and emit initial debug info sections.
+/// This is inovked by the target AsmPrinter.
+void DwarfDebug::SetDebugInfo(MachineModuleInfo *mmi) {
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ // Create all the compile unit DIEs.
+ ConstructCompileUnits();
+
+ if (CompileUnits.empty()) {
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return;
+ }
+
+ // Create DIEs for each of the externally visible global variables.
+ bool globalDIEs = ConstructGlobalVariableDIEs();
+
+ // Create DIEs for each of the externally visible subprograms.
+ bool subprogramDIEs = ConstructSubprograms();
+
+ // If there is not any debug info available for any global variables and any
+ // subprograms then there is not any debug info to emit.
+ if (!globalDIEs && !subprogramDIEs) {
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return;
+ }
+
+ MMI = mmi;
+ shouldEmit = true;
+ MMI->setDebugInfoAvailability(true);
+
+ // Prime section data.
+ SectionMap.insert(TAI->getTextSection());
+
+ // Print out .file directives to specify files for .loc directives. These are
+ // printed out early so that they precede any .loc directives.
+ if (TAI->hasDotLocAndDotFile()) {
+ for (unsigned i = 1, e = getNumSourceIds()+1; i != e; ++i) {
+ // Remember source id starts at 1.
+ std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(i);
+ sys::Path FullPath(getSourceDirectoryName(Id.first));
+ bool AppendOk =
+ FullPath.appendComponent(getSourceFileName(Id.second));
+ assert(AppendOk && "Could not append filename to directory!");
+ AppendOk = false;
+ Asm->EmitFile(i, FullPath.toString());
+ Asm->EOL();
+ }
+ }
+
+ // Emit initial sections
+ EmitInitial();
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+}
+
+/// EndModule - Emit all Dwarf sections that should come after the content.
+///
+void DwarfDebug::EndModule() {
+ if (!ShouldEmitDwarfDebug())
+ return;
+
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ // Standard sections final addresses.
+ Asm->SwitchToSection(TAI->getTextSection());
+ EmitLabel("text_end", 0);
+ Asm->SwitchToSection(TAI->getDataSection());
+ EmitLabel("data_end", 0);
+
+ // End text sections.
+ for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) {
+ Asm->SwitchToSection(SectionMap[i]);
+ EmitLabel("section_end", i);
+ }
+
+ // Emit common frame information.
+ EmitCommonDebugFrame();
+
+ // Emit function debug frame information
+ for (std::vector<FunctionDebugFrameInfo>::iterator I = DebugFrames.begin(),
+ E = DebugFrames.end(); I != E; ++I)
+ EmitFunctionDebugFrame(*I);
+
+ // Compute DIE offsets and sizes.
+ SizeAndOffsets();
+
+ // Emit all the DIEs into a debug info section
+ EmitDebugInfo();
+
+ // Corresponding abbreviations into a abbrev section.
+ EmitAbbreviations();
+
+ // Emit source line correspondence into a debug line section.
+ EmitDebugLines();
+
+ // Emit info into a debug pubnames section.
+ EmitDebugPubNames();
+
+ // Emit info into a debug str section.
+ EmitDebugStr();
+
+ // Emit info into a debug loc section.
+ EmitDebugLoc();
+
+ // Emit info into a debug aranges section.
+ EmitDebugARanges();
+
+ // Emit info into a debug ranges section.
+ EmitDebugRanges();
+
+ // Emit info into a debug macinfo section.
+ EmitDebugMacInfo();
+
+ // Emit inline info.
+ EmitDebugInlineInfo();
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+}
+
+/// BeginFunction - Gather pre-function debug information. Assumes being
+/// emitted immediately after the function entry point.
+void DwarfDebug::BeginFunction(MachineFunction *MF) {
+ this->MF = MF;
+
+ if (!ShouldEmitDwarfDebug()) return;
+
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ // Begin accumulating function debug information.
+ MMI->BeginFunction(MF);
+
+ // Assumes in correct section after the entry point.
+ EmitLabel("func_begin", ++SubprogramCount);
+
+ // Emit label for the implicitly defined dbg.stoppoint at the start of the
+ // function.
+ DebugLoc FDL = MF->getDefaultDebugLoc();
+ if (!FDL.isUnknown()) {
+ DebugLocTuple DLT = MF->getDebugLocTuple(FDL);
+ unsigned LabelID = RecordSourceLine(DLT.Line, DLT.Col,
+ DICompileUnit(DLT.CompileUnit));
+ Asm->printLabel(LabelID);
+ }
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+}
+
+/// EndFunction - Gather and emit post-function debug information.
+///
+void DwarfDebug::EndFunction(MachineFunction *MF) {
+ if (!ShouldEmitDwarfDebug()) return;
+
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ // Define end label for subprogram.
+ EmitLabel("func_end", SubprogramCount);
+
+ // Get function line info.
+ if (!Lines.empty()) {
+ // Get section line info.
+ unsigned ID = SectionMap.insert(Asm->CurrentSection_);
+ if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID);
+ std::vector<SrcLineInfo> &SectionLineInfos = SectionSourceLines[ID-1];
+ // Append the function info to section info.
+ SectionLineInfos.insert(SectionLineInfos.end(),
+ Lines.begin(), Lines.end());
+ }
+
+ // Construct the DbgScope for abstract instances.
+ for (SmallVector<DbgScope *, 32>::iterator
+ I = AbstractInstanceRootList.begin(),
+ E = AbstractInstanceRootList.end(); I != E; ++I)
+ ConstructFunctionDbgScope(*I);
+
+ // Construct scopes for subprogram.
+ if (FunctionDbgScope)
+ ConstructFunctionDbgScope(FunctionDbgScope);
+ else
+ // FIXME: This is wrong. We are essentially getting past a problem with
+ // debug information not being able to handle unreachable blocks that have
+ // debug information in them. In particular, those unreachable blocks that
+ // have "region end" info in them. That situation results in the "root
+ // scope" not being created. If that's the case, then emit a "default"
+ // scope, i.e., one that encompasses the whole function. This isn't
+ // desirable. And a better way of handling this (and all of the debugging
+ // information) needs to be explored.
+ ConstructDefaultDbgScope(MF);
+
+ DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount,
+ MMI->getFrameMoves()));
+
+ // Clear debug info
+ if (FunctionDbgScope) {
+ delete FunctionDbgScope;
+ DbgScopeMap.clear();
+ DbgAbstractScopeMap.clear();
+ DbgConcreteScopeMap.clear();
+ InlinedVariableScopes.clear();
+ FunctionDbgScope = NULL;
+ LexicalScopeStack.clear();
+ AbstractInstanceRootList.clear();
+ }
+
+ Lines.clear();
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+}
+
+/// RecordSourceLine - Records location information and associates it with a
+/// label. Returns a unique label ID used to generate a label and provide
+/// correspondence to the source line list.
+unsigned DwarfDebug::RecordSourceLine(Value *V, unsigned Line, unsigned Col) {
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ CompileUnit *Unit = CompileUnitMap[V];
+ assert(Unit && "Unable to find CompileUnit");
+ unsigned ID = MMI->NextLabelID();
+ Lines.push_back(SrcLineInfo(Line, Col, Unit->getID(), ID));
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return ID;
+}
+
+/// RecordSourceLine - Records location information and associates it with a
+/// label. Returns a unique label ID used to generate a label and provide
+/// correspondence to the source line list.
+unsigned DwarfDebug::RecordSourceLine(unsigned Line, unsigned Col,
+ DICompileUnit CU) {
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ std::string Dir, Fn;
+ unsigned Src = GetOrCreateSourceID(CU.getDirectory(Dir),
+ CU.getFilename(Fn));
+ unsigned ID = MMI->NextLabelID();
+ Lines.push_back(SrcLineInfo(Line, Col, Src, ID));
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return ID;
+}
+
+/// getOrCreateSourceID - Public version of GetOrCreateSourceID. This can be
+/// timed. Look up the source id with the given directory and source file
+/// names. If none currently exists, create a new id and insert it in the
+/// SourceIds map. This can update DirectoryNames and SourceFileNames maps as
+/// well.
+unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName,
+ const std::string &FileName) {
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ unsigned SrcId = GetOrCreateSourceID(DirName, FileName);
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return SrcId;
+}
+
+/// RecordRegionStart - Indicate the start of a region.
+unsigned DwarfDebug::RecordRegionStart(GlobalVariable *V) {
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ DbgScope *Scope = getOrCreateScope(V);
+ unsigned ID = MMI->NextLabelID();
+ if (!Scope->getStartLabelID()) Scope->setStartLabelID(ID);
+ LexicalScopeStack.push_back(Scope);
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return ID;
+}
+
+/// RecordRegionEnd - Indicate the end of a region.
+unsigned DwarfDebug::RecordRegionEnd(GlobalVariable *V) {
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ DbgScope *Scope = getOrCreateScope(V);
+ unsigned ID = MMI->NextLabelID();
+ Scope->setEndLabelID(ID);
+ if (LexicalScopeStack.size() != 0)
+ LexicalScopeStack.pop_back();
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return ID;
+}
+
+/// RecordVariable - Indicate the declaration of a local variable.
+void DwarfDebug::RecordVariable(GlobalVariable *GV, unsigned FrameIndex,
+ const MachineInstr *MI) {
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ DIDescriptor Desc(GV);
+ DbgScope *Scope = NULL;
+ bool InlinedFnVar = false;
+
+ if (Desc.getTag() == dwarf::DW_TAG_variable) {
+ // GV is a global variable.
+ DIGlobalVariable DG(GV);
+ Scope = getOrCreateScope(DG.getContext().getGV());
+ } else {
+ DenseMap<const MachineInstr *, DbgScope *>::iterator
+ SI = InlinedVariableScopes.find(MI);
+
+ if (SI != InlinedVariableScopes.end()) {
+ // or GV is an inlined local variable.
+ Scope = SI->second;
+ } else {
+ DIVariable DV(GV);
+ GlobalVariable *V = DV.getContext().getGV();
+
+ // FIXME: The code that checks for the inlined local variable is a hack!
+ DenseMap<const GlobalVariable *, DbgScope *>::iterator
+ AI = AbstractInstanceRootMap.find(V);
+
+ if (AI != AbstractInstanceRootMap.end()) {
+ // This method is called each time a DECLARE node is encountered. For an
+ // inlined function, this could be many, many times. We don't want to
+ // re-add variables to that DIE for each time. We just want to add them
+ // once. Check to make sure that we haven't added them already.
+ DenseMap<const GlobalVariable *,
+ SmallSet<const GlobalVariable *, 32> >::iterator
+ IP = InlinedParamMap.find(V);
+
+ if (IP != InlinedParamMap.end() && IP->second.count(GV) > 0) {
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+ return;
+ }
+
+ // or GV is an inlined local variable.
+ Scope = AI->second;
+ InlinedParamMap[V].insert(GV);
+ InlinedFnVar = true;
+ } else {
+ // or GV is a local variable.
+ Scope = getOrCreateScope(V);
+ }
+ }
+ }
+
+ assert(Scope && "Unable to find the variable's scope");
+ DbgVariable *DV = new DbgVariable(DIVariable(GV), FrameIndex, InlinedFnVar);
+ Scope->AddVariable(DV);
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+}
+
+//// RecordInlinedFnStart - Indicate the start of inlined subroutine.
+unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
+ unsigned Line, unsigned Col) {
+ unsigned LabelID = MMI->NextLabelID();
+
+ if (!TAI->doesDwarfUsesInlineInfoSection())
+ return LabelID;
+
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ GlobalVariable *GV = SP.getGV();
+ DenseMap<const GlobalVariable *, DbgScope *>::iterator
+ II = AbstractInstanceRootMap.find(GV);
+
+ if (II == AbstractInstanceRootMap.end()) {
+ // Create an abstract instance entry for this inlined function if it doesn't
+ // already exist.
+ DbgScope *Scope = new DbgScope(NULL, DIDescriptor(GV));
+
+ // Get the compile unit context.
+ CompileUnit *Unit = &FindCompileUnit(SP.getCompileUnit());
+ DIE *SPDie = Unit->getDieMapSlotFor(GV);
+ if (!SPDie)
+ SPDie = CreateSubprogramDIE(Unit, SP, false, true);
+
+ // Mark as being inlined. This makes this subprogram entry an abstract
+ // instance root.
+ // FIXME: Our debugger doesn't care about the value of DW_AT_inline, only
+ // that it's defined. That probably won't change in the future. However,
+ // this could be more elegant.
+ AddUInt(SPDie, dwarf::DW_AT_inline, 0, dwarf::DW_INL_declared_not_inlined);
+
+ // Keep track of the abstract scope for this function.
+ DbgAbstractScopeMap[GV] = Scope;
+
+ AbstractInstanceRootMap[GV] = Scope;
+ AbstractInstanceRootList.push_back(Scope);
+ }
+
+ // Create a concrete inlined instance for this inlined function.
+ DbgConcreteScope *ConcreteScope = new DbgConcreteScope(DIDescriptor(GV));
+ DIE *ScopeDie = new DIE(dwarf::DW_TAG_inlined_subroutine);
+ CompileUnit *Unit = &FindCompileUnit(SP.getCompileUnit());
+ ScopeDie->setAbstractCompileUnit(Unit);
+
+ DIE *Origin = Unit->getDieMapSlotFor(GV);
+ AddDIEEntry(ScopeDie, dwarf::DW_AT_abstract_origin,
+ dwarf::DW_FORM_ref4, Origin);
+ AddUInt(ScopeDie, dwarf::DW_AT_call_file, 0, Unit->getID());
+ AddUInt(ScopeDie, dwarf::DW_AT_call_line, 0, Line);
+ AddUInt(ScopeDie, dwarf::DW_AT_call_column, 0, Col);
+
+ ConcreteScope->setDie(ScopeDie);
+ ConcreteScope->setStartLabelID(LabelID);
+ MMI->RecordUsedDbgLabel(LabelID);
+
+ LexicalScopeStack.back()->AddConcreteInst(ConcreteScope);
+
+ // Keep track of the concrete scope that's inlined into this function.
+ DenseMap<GlobalVariable *, SmallVector<DbgScope *, 8> >::iterator
+ SI = DbgConcreteScopeMap.find(GV);
+
+ if (SI == DbgConcreteScopeMap.end())
+ DbgConcreteScopeMap[GV].push_back(ConcreteScope);
+ else
+ SI->second.push_back(ConcreteScope);
+
+ // Track the start label for this inlined function.
+ DenseMap<GlobalVariable *, SmallVector<unsigned, 4> >::iterator
+ I = InlineInfo.find(GV);
+
+ if (I == InlineInfo.end())
+ InlineInfo[GV].push_back(LabelID);
+ else
+ I->second.push_back(LabelID);
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return LabelID;
+}
+
+/// RecordInlinedFnEnd - Indicate the end of inlined subroutine.
+unsigned DwarfDebug::RecordInlinedFnEnd(DISubprogram &SP) {
+ if (!TAI->doesDwarfUsesInlineInfoSection())
+ return 0;
+
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ GlobalVariable *GV = SP.getGV();
+ DenseMap<GlobalVariable *, SmallVector<DbgScope *, 8> >::iterator
+ I = DbgConcreteScopeMap.find(GV);
+
+ if (I == DbgConcreteScopeMap.end()) {
+ // FIXME: Can this situation actually happen? And if so, should it?
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return 0;
+ }
+
+ SmallVector<DbgScope *, 8> &Scopes = I->second;
+ assert(!Scopes.empty() && "We should have at least one debug scope!");
+ DbgScope *Scope = Scopes.back(); Scopes.pop_back();
+ unsigned ID = MMI->NextLabelID();
+ MMI->RecordUsedDbgLabel(ID);
+ Scope->setEndLabelID(ID);
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return ID;
+}
+
+/// RecordVariableScope - Record scope for the variable declared by
+/// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE. Record scopes
+/// for only inlined subroutine variables. Other variables's scopes are
+/// determined during RecordVariable().
+void DwarfDebug::RecordVariableScope(DIVariable &DV,
+ const MachineInstr *DeclareMI) {
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ DISubprogram SP(DV.getContext().getGV());
+
+ if (SP.isNull()) {
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return;
+ }
+
+ DenseMap<GlobalVariable *, DbgScope *>::iterator
+ I = DbgAbstractScopeMap.find(SP.getGV());
+ if (I != DbgAbstractScopeMap.end())
+ InlinedVariableScopes[DeclareMI] = I->second;
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+}
+
+//===----------------------------------------------------------------------===//
+// Emit Methods
+//===----------------------------------------------------------------------===//
+
+/// SizeAndOffsetDie - Compute the size and offset of a DIE.
+///
+unsigned DwarfDebug::SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last) {
+ // Get the children.
+ const std::vector<DIE *> &Children = Die->getChildren();
+
+ // If not last sibling and has children then add sibling offset attribute.
+ if (!Last && !Children.empty()) Die->AddSiblingOffset();
+
+ // Record the abbreviation.
+ AssignAbbrevNumber(Die->getAbbrev());
+
+ // Get the abbreviation for this DIE.
+ unsigned AbbrevNumber = Die->getAbbrevNumber();
+ const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+
+ // Set DIE offset
+ Die->setOffset(Offset);
+
+ // Start the size with the size of abbreviation code.
+ Offset += TargetAsmInfo::getULEB128Size(AbbrevNumber);
+
+ const SmallVector<DIEValue*, 32> &Values = Die->getValues();
+ const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
+
+ // Size the DIE attribute values.
+ for (unsigned i = 0, N = Values.size(); i < N; ++i)
+ // Size attribute value.
+ Offset += Values[i]->SizeOf(TD, AbbrevData[i].getForm());
+
+ // Size the DIE children if any.
+ if (!Children.empty()) {
+ assert(Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes &&
+ "Children flag not set");
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j)
+ Offset = SizeAndOffsetDie(Children[j], Offset, (j + 1) == M);
+
+ // End of children marker.
+ Offset += sizeof(int8_t);
+ }
+
+ Die->setSize(Offset - Die->getOffset());
+ return Offset;
+}
+
+/// SizeAndOffsets - Compute the size and offset of all the DIEs.
+///
+void DwarfDebug::SizeAndOffsets() {
+ // Compute size of compile unit header.
+ static unsigned Offset =
+ sizeof(int32_t) + // Length of Compilation Unit Info
+ sizeof(int16_t) + // DWARF version number
+ sizeof(int32_t) + // Offset Into Abbrev. Section
+ sizeof(int8_t); // Pointer Size (in bytes)
+
+ // Process base compile unit.
+ if (MainCU) {
+ SizeAndOffsetDie(MainCU->getDie(), Offset, true);
+ CompileUnitOffsets[MainCU] = 0;
+ return;
+ }
+
+ // Process all compile units.
+ unsigned PrevOffset = 0;
+
+ for (unsigned i = 0, e = CompileUnits.size(); i != e; ++i) {
+ CompileUnit *Unit = CompileUnits[i];
+ CompileUnitOffsets[Unit] = PrevOffset;
+ PrevOffset += SizeAndOffsetDie(Unit->getDie(), Offset, true)
+ + sizeof(int32_t); // FIXME - extra pad for gdb bug.
+ }
+}
+
+/// EmitInitial - Emit initial Dwarf declarations. This is necessary for cc
+/// tools to recognize the object file contains Dwarf information.
+void DwarfDebug::EmitInitial() {
+ // Check to see if we already emitted intial headers.
+ if (didInitial) return;
+ didInitial = true;
+
+ // Dwarf sections base addresses.
+ if (TAI->doesDwarfRequireFrameSection()) {
+ Asm->SwitchToDataSection(TAI->getDwarfFrameSection());
+ EmitLabel("section_debug_frame", 0);
+ }
+
+ Asm->SwitchToDataSection(TAI->getDwarfInfoSection());
+ EmitLabel("section_info", 0);
+ Asm->SwitchToDataSection(TAI->getDwarfAbbrevSection());
+ EmitLabel("section_abbrev", 0);
+ Asm->SwitchToDataSection(TAI->getDwarfARangesSection());
+ EmitLabel("section_aranges", 0);
+
+ if (TAI->doesSupportMacInfoSection()) {
+ Asm->SwitchToDataSection(TAI->getDwarfMacInfoSection());
+ EmitLabel("section_macinfo", 0);
+ }
+
+ Asm->SwitchToDataSection(TAI->getDwarfLineSection());
+ EmitLabel("section_line", 0);
+ Asm->SwitchToDataSection(TAI->getDwarfLocSection());
+ EmitLabel("section_loc", 0);
+ Asm->SwitchToDataSection(TAI->getDwarfPubNamesSection());
+ EmitLabel("section_pubnames", 0);
+ Asm->SwitchToDataSection(TAI->getDwarfStrSection());
+ EmitLabel("section_str", 0);
+ Asm->SwitchToDataSection(TAI->getDwarfRangesSection());
+ EmitLabel("section_ranges", 0);
+
+ Asm->SwitchToSection(TAI->getTextSection());
+ EmitLabel("text_begin", 0);
+ Asm->SwitchToSection(TAI->getDataSection());
+ EmitLabel("data_begin", 0);
+}
+
+/// EmitDIE - Recusively Emits a debug information entry.
+///
+void DwarfDebug::EmitDIE(DIE *Die) {
+ // Get the abbreviation for this DIE.
+ unsigned AbbrevNumber = Die->getAbbrevNumber();
+ const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+
+ Asm->EOL();
+
+ // Emit the code (index) for the abbreviation.
+ Asm->EmitULEB128Bytes(AbbrevNumber);
+
+ if (Asm->isVerbose())
+ Asm->EOL(std::string("Abbrev [" +
+ utostr(AbbrevNumber) +
+ "] 0x" + utohexstr(Die->getOffset()) +
+ ":0x" + utohexstr(Die->getSize()) + " " +
+ dwarf::TagString(Abbrev->getTag())));
+ else
+ Asm->EOL();
+
+ SmallVector<DIEValue*, 32> &Values = Die->getValues();
+ const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
+
+ // Emit the DIE attribute values.
+ for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+ unsigned Attr = AbbrevData[i].getAttribute();
+ unsigned Form = AbbrevData[i].getForm();
+ assert(Form && "Too many attributes for DIE (check abbreviation)");
+
+ switch (Attr) {
+ case dwarf::DW_AT_sibling:
+ Asm->EmitInt32(Die->SiblingOffset());
+ break;
+ case dwarf::DW_AT_abstract_origin: {
+ DIEEntry *E = cast<DIEEntry>(Values[i]);
+ DIE *Origin = E->getEntry();
+ unsigned Addr =
+ CompileUnitOffsets[Die->getAbstractCompileUnit()] +
+ Origin->getOffset();
+
+ Asm->EmitInt32(Addr);
+ break;
+ }
+ default:
+ // Emit an attribute using the defined form.
+ Values[i]->EmitValue(this, Form);
+ break;
+ }
+
+ Asm->EOL(dwarf::AttributeString(Attr));
+ }
+
+ // Emit the DIE children if any.
+ if (Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes) {
+ const std::vector<DIE *> &Children = Die->getChildren();
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j)
+ EmitDIE(Children[j]);
+
+ Asm->EmitInt8(0); Asm->EOL("End Of Children Mark");
+ }
+}
+
+/// EmitDebugInfo / EmitDebugInfoPerCU - Emit the debug info section.
+///
+void DwarfDebug::EmitDebugInfoPerCU(CompileUnit *Unit) {
+ DIE *Die = Unit->getDie();
+
+ // Emit the compile units header.
+ EmitLabel("info_begin", Unit->getID());
+
+ // Emit size of content not including length itself
+ unsigned ContentSize = Die->getSize() +
+ sizeof(int16_t) + // DWARF version number
+ sizeof(int32_t) + // Offset Into Abbrev. Section
+ sizeof(int8_t) + // Pointer Size (in bytes)
+ sizeof(int32_t); // FIXME - extra pad for gdb bug.
+
+ Asm->EmitInt32(ContentSize); Asm->EOL("Length of Compilation Unit Info");
+ Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("DWARF version number");
+ EmitSectionOffset("abbrev_begin", "section_abbrev", 0, 0, true, false);
+ Asm->EOL("Offset Into Abbrev. Section");
+ Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Address Size (in bytes)");
+
+ EmitDIE(Die);
+ // FIXME - extra padding for gdb bug.
+ Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
+ Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
+ Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
+ Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
+ EmitLabel("info_end", Unit->getID());
+
+ Asm->EOL();
+}
+
+void DwarfDebug::EmitDebugInfo() {
+ // Start debug info section.
+ Asm->SwitchToDataSection(TAI->getDwarfInfoSection());
+
+ if (MainCU) {
+ EmitDebugInfoPerCU(MainCU);
+ return;
+ }
+
+ for (unsigned i = 0, e = CompileUnits.size(); i != e; ++i)
+ EmitDebugInfoPerCU(CompileUnits[i]);
+}
+
+/// EmitAbbreviations - Emit the abbreviation section.
+///
+void DwarfDebug::EmitAbbreviations() const {
+ // Check to see if it is worth the effort.
+ if (!Abbreviations.empty()) {
+ // Start the debug abbrev section.
+ Asm->SwitchToDataSection(TAI->getDwarfAbbrevSection());
+
+ EmitLabel("abbrev_begin", 0);
+
+ // For each abbrevation.
+ for (unsigned i = 0, N = Abbreviations.size(); i < N; ++i) {
+ // Get abbreviation data
+ const DIEAbbrev *Abbrev = Abbreviations[i];
+
+ // Emit the abbrevations code (base 1 index.)
+ Asm->EmitULEB128Bytes(Abbrev->getNumber());
+ Asm->EOL("Abbreviation Code");
+
+ // Emit the abbreviations data.
+ Abbrev->Emit(Asm);
+
+ Asm->EOL();
+ }
+
+ // Mark end of abbreviations.
+ Asm->EmitULEB128Bytes(0); Asm->EOL("EOM(3)");
+
+ EmitLabel("abbrev_end", 0);
+ Asm->EOL();
+ }
+}
+
+/// EmitEndOfLineMatrix - Emit the last address of the section and the end of
+/// the line matrix.
+///
+void DwarfDebug::EmitEndOfLineMatrix(unsigned SectionEnd) {
+ // Define last address of section.
+ Asm->EmitInt8(0); Asm->EOL("Extended Op");
+ Asm->EmitInt8(TD->getPointerSize() + 1); Asm->EOL("Op size");
+ Asm->EmitInt8(dwarf::DW_LNE_set_address); Asm->EOL("DW_LNE_set_address");
+ EmitReference("section_end", SectionEnd); Asm->EOL("Section end label");
+
+ // Mark end of matrix.
+ Asm->EmitInt8(0); Asm->EOL("DW_LNE_end_sequence");
+ Asm->EmitULEB128Bytes(1); Asm->EOL();
+ Asm->EmitInt8(1); Asm->EOL();
+}
+
+/// EmitDebugLines - Emit source line information.
+///
+void DwarfDebug::EmitDebugLines() {
+ // If the target is using .loc/.file, the assembler will be emitting the
+ // .debug_line table automatically.
+ if (TAI->hasDotLocAndDotFile())
+ return;
+
+ // Minimum line delta, thus ranging from -10..(255-10).
+ const int MinLineDelta = -(dwarf::DW_LNS_fixed_advance_pc + 1);
+ // Maximum line delta, thus ranging from -10..(255-10).
+ const int MaxLineDelta = 255 + MinLineDelta;
+
+ // Start the dwarf line section.
+ Asm->SwitchToDataSection(TAI->getDwarfLineSection());
+
+ // Construct the section header.
+ EmitDifference("line_end", 0, "line_begin", 0, true);
+ Asm->EOL("Length of Source Line Info");
+ EmitLabel("line_begin", 0);
+
+ Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("DWARF version number");
+
+ EmitDifference("line_prolog_end", 0, "line_prolog_begin", 0, true);
+ Asm->EOL("Prolog Length");
+ EmitLabel("line_prolog_begin", 0);
+
+ Asm->EmitInt8(1); Asm->EOL("Minimum Instruction Length");
+
+ Asm->EmitInt8(1); Asm->EOL("Default is_stmt_start flag");
+
+ Asm->EmitInt8(MinLineDelta); Asm->EOL("Line Base Value (Special Opcodes)");
+
+ Asm->EmitInt8(MaxLineDelta); Asm->EOL("Line Range Value (Special Opcodes)");
+
+ Asm->EmitInt8(-MinLineDelta); Asm->EOL("Special Opcode Base");
+
+ // Line number standard opcode encodings argument count
+ Asm->EmitInt8(0); Asm->EOL("DW_LNS_copy arg count");
+ Asm->EmitInt8(1); Asm->EOL("DW_LNS_advance_pc arg count");
+ Asm->EmitInt8(1); Asm->EOL("DW_LNS_advance_line arg count");
+ Asm->EmitInt8(1); Asm->EOL("DW_LNS_set_file arg count");
+ Asm->EmitInt8(1); Asm->EOL("DW_LNS_set_column arg count");
+ Asm->EmitInt8(0); Asm->EOL("DW_LNS_negate_stmt arg count");
+ Asm->EmitInt8(0); Asm->EOL("DW_LNS_set_basic_block arg count");
+ Asm->EmitInt8(0); Asm->EOL("DW_LNS_const_add_pc arg count");
+ Asm->EmitInt8(1); Asm->EOL("DW_LNS_fixed_advance_pc arg count");
+
+ // Emit directories.
+ for (unsigned DI = 1, DE = getNumSourceDirectories()+1; DI != DE; ++DI) {
+ Asm->EmitString(getSourceDirectoryName(DI));
+ Asm->EOL("Directory");
+ }
+
+ Asm->EmitInt8(0); Asm->EOL("End of directories");
+
+ // Emit files.
+ for (unsigned SI = 1, SE = getNumSourceIds()+1; SI != SE; ++SI) {
+ // Remember source id starts at 1.
+ std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(SI);
+ Asm->EmitString(getSourceFileName(Id.second));
+ Asm->EOL("Source");
+ Asm->EmitULEB128Bytes(Id.first);
+ Asm->EOL("Directory #");
+ Asm->EmitULEB128Bytes(0);
+ Asm->EOL("Mod date");
+ Asm->EmitULEB128Bytes(0);
+ Asm->EOL("File size");
+ }
+
+ Asm->EmitInt8(0); Asm->EOL("End of files");
+
+ EmitLabel("line_prolog_end", 0);
+
+ // A sequence for each text section.
+ unsigned SecSrcLinesSize = SectionSourceLines.size();
+
+ for (unsigned j = 0; j < SecSrcLinesSize; ++j) {
+ // Isolate current sections line info.
+ const std::vector<SrcLineInfo> &LineInfos = SectionSourceLines[j];
+
+ if (Asm->isVerbose()) {
+ const Section* S = SectionMap[j + 1];
+ O << '\t' << TAI->getCommentString() << " Section"
+ << S->getName() << '\n';
+ } else {
+ Asm->EOL();
+ }
+
+ // Dwarf assumes we start with first line of first source file.
+ unsigned Source = 1;
+ unsigned Line = 1;
+
+ // Construct rows of the address, source, line, column matrix.
+ for (unsigned i = 0, N = LineInfos.size(); i < N; ++i) {
+ const SrcLineInfo &LineInfo = LineInfos[i];
+ unsigned LabelID = MMI->MappedLabel(LineInfo.getLabelID());
+ if (!LabelID) continue;
+
+ if (!Asm->isVerbose())
+ Asm->EOL();
+ else {
+ std::pair<unsigned, unsigned> SourceID =
+ getSourceDirectoryAndFileIds(LineInfo.getSourceID());
+ O << '\t' << TAI->getCommentString() << ' '
+ << getSourceDirectoryName(SourceID.first) << ' '
+ << getSourceFileName(SourceID.second)
+ <<" :" << utostr_32(LineInfo.getLine()) << '\n';
+ }
+
+ // Define the line address.
+ Asm->EmitInt8(0); Asm->EOL("Extended Op");
+ Asm->EmitInt8(TD->getPointerSize() + 1); Asm->EOL("Op size");
+ Asm->EmitInt8(dwarf::DW_LNE_set_address); Asm->EOL("DW_LNE_set_address");
+ EmitReference("label", LabelID); Asm->EOL("Location label");
+
+ // If change of source, then switch to the new source.
+ if (Source != LineInfo.getSourceID()) {
+ Source = LineInfo.getSourceID();
+ Asm->EmitInt8(dwarf::DW_LNS_set_file); Asm->EOL("DW_LNS_set_file");
+ Asm->EmitULEB128Bytes(Source); Asm->EOL("New Source");
+ }
+
+ // If change of line.
+ if (Line != LineInfo.getLine()) {
+ // Determine offset.
+ int Offset = LineInfo.getLine() - Line;
+ int Delta = Offset - MinLineDelta;
+
+ // Update line.
+ Line = LineInfo.getLine();
+
+ // If delta is small enough and in range...
+ if (Delta >= 0 && Delta < (MaxLineDelta - 1)) {
+ // ... then use fast opcode.
+ Asm->EmitInt8(Delta - MinLineDelta); Asm->EOL("Line Delta");
+ } else {
+ // ... otherwise use long hand.
+ Asm->EmitInt8(dwarf::DW_LNS_advance_line);
+ Asm->EOL("DW_LNS_advance_line");
+ Asm->EmitSLEB128Bytes(Offset); Asm->EOL("Line Offset");
+ Asm->EmitInt8(dwarf::DW_LNS_copy); Asm->EOL("DW_LNS_copy");
+ }
+ } else {
+ // Copy the previous row (different address or source)
+ Asm->EmitInt8(dwarf::DW_LNS_copy); Asm->EOL("DW_LNS_copy");
+ }
+ }
+
+ EmitEndOfLineMatrix(j + 1);
+ }
+
+ if (SecSrcLinesSize == 0)
+ // Because we're emitting a debug_line section, we still need a line
+ // table. The linker and friends expect it to exist. If there's nothing to
+ // put into it, emit an empty table.
+ EmitEndOfLineMatrix(1);
+
+ EmitLabel("line_end", 0);
+ Asm->EOL();
+}
+
+/// EmitCommonDebugFrame - Emit common frame info into a debug frame section.
+///
+void DwarfDebug::EmitCommonDebugFrame() {
+ if (!TAI->doesDwarfRequireFrameSection())
+ return;
+
+ int stackGrowth =
+ Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
+ TargetFrameInfo::StackGrowsUp ?
+ TD->getPointerSize() : -TD->getPointerSize();
+
+ // Start the dwarf frame section.
+ Asm->SwitchToDataSection(TAI->getDwarfFrameSection());
+
+ EmitLabel("debug_frame_common", 0);
+ EmitDifference("debug_frame_common_end", 0,
+ "debug_frame_common_begin", 0, true);
+ Asm->EOL("Length of Common Information Entry");
+
+ EmitLabel("debug_frame_common_begin", 0);
+ Asm->EmitInt32((int)dwarf::DW_CIE_ID);
+ Asm->EOL("CIE Identifier Tag");
+ Asm->EmitInt8(dwarf::DW_CIE_VERSION);
+ Asm->EOL("CIE Version");
+ Asm->EmitString("");
+ Asm->EOL("CIE Augmentation");
+ Asm->EmitULEB128Bytes(1);
+ Asm->EOL("CIE Code Alignment Factor");
+ Asm->EmitSLEB128Bytes(stackGrowth);
+ Asm->EOL("CIE Data Alignment Factor");
+ Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), false));
+ Asm->EOL("CIE RA Column");
+
+ std::vector<MachineMove> Moves;
+ RI->getInitialFrameState(Moves);
+
+ EmitFrameMoves(NULL, 0, Moves, false);
+
+ Asm->EmitAlignment(2, 0, 0, false);
+ EmitLabel("debug_frame_common_end", 0);
+
+ Asm->EOL();
+}
+
+/// EmitFunctionDebugFrame - Emit per function frame info into a debug frame
+/// section.
+void
+DwarfDebug::EmitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){
+ if (!TAI->doesDwarfRequireFrameSection())
+ return;
+
+ // Start the dwarf frame section.
+ Asm->SwitchToDataSection(TAI->getDwarfFrameSection());
+
+ EmitDifference("debug_frame_end", DebugFrameInfo.Number,
+ "debug_frame_begin", DebugFrameInfo.Number, true);
+ Asm->EOL("Length of Frame Information Entry");
+
+ EmitLabel("debug_frame_begin", DebugFrameInfo.Number);
+
+ EmitSectionOffset("debug_frame_common", "section_debug_frame",
+ 0, 0, true, false);
+ Asm->EOL("FDE CIE offset");
+
+ EmitReference("func_begin", DebugFrameInfo.Number);
+ Asm->EOL("FDE initial location");
+ EmitDifference("func_end", DebugFrameInfo.Number,
+ "func_begin", DebugFrameInfo.Number);
+ Asm->EOL("FDE address range");
+
+ EmitFrameMoves("func_begin", DebugFrameInfo.Number, DebugFrameInfo.Moves,
+ false);
+
+ Asm->EmitAlignment(2, 0, 0, false);
+ EmitLabel("debug_frame_end", DebugFrameInfo.Number);
+
+ Asm->EOL();
+}
+
+void DwarfDebug::EmitDebugPubNamesPerCU(CompileUnit *Unit) {
+ EmitDifference("pubnames_end", Unit->getID(),
+ "pubnames_begin", Unit->getID(), true);
+ Asm->EOL("Length of Public Names Info");
+
+ EmitLabel("pubnames_begin", Unit->getID());
+
+ Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("DWARF Version");
+
+ EmitSectionOffset("info_begin", "section_info",
+ Unit->getID(), 0, true, false);
+ Asm->EOL("Offset of Compilation Unit Info");
+
+ EmitDifference("info_end", Unit->getID(), "info_begin", Unit->getID(),
+ true);
+ Asm->EOL("Compilation Unit Length");
+
+ StringMap<DIE*> &Globals = Unit->getGlobals();
+ for (StringMap<DIE*>::const_iterator
+ GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ DIE * Entity = GI->second;
+
+ Asm->EmitInt32(Entity->getOffset()); Asm->EOL("DIE offset");
+ Asm->EmitString(Name, strlen(Name)); Asm->EOL("External Name");
+ }
+
+ Asm->EmitInt32(0); Asm->EOL("End Mark");
+ EmitLabel("pubnames_end", Unit->getID());
+
+ Asm->EOL();
+}
+
+/// EmitDebugPubNames - Emit visible names into a debug pubnames section.
+///
+void DwarfDebug::EmitDebugPubNames() {
+ // Start the dwarf pubnames section.
+ Asm->SwitchToDataSection(TAI->getDwarfPubNamesSection());
+
+ if (MainCU) {
+ EmitDebugPubNamesPerCU(MainCU);
+ return;
+ }
+
+ for (unsigned i = 0, e = CompileUnits.size(); i != e; ++i)
+ EmitDebugPubNamesPerCU(CompileUnits[i]);
+}
+
+/// EmitDebugStr - Emit visible names into a debug str section.
+///
+void DwarfDebug::EmitDebugStr() {
+ // Check to see if it is worth the effort.
+ if (!StringPool.empty()) {
+ // Start the dwarf str section.
+ Asm->SwitchToDataSection(TAI->getDwarfStrSection());
+
+ // For each of strings in the string pool.
+ for (unsigned StringID = 1, N = StringPool.size();
+ StringID <= N; ++StringID) {
+ // Emit a label for reference from debug information entries.
+ EmitLabel("string", StringID);
+
+ // Emit the string itself.
+ const std::string &String = StringPool[StringID];
+ Asm->EmitString(String); Asm->EOL();
+ }
+
+ Asm->EOL();
+ }
+}
+
+/// EmitDebugLoc - Emit visible names into a debug loc section.
+///
+void DwarfDebug::EmitDebugLoc() {
+ // Start the dwarf loc section.
+ Asm->SwitchToDataSection(TAI->getDwarfLocSection());
+ Asm->EOL();
+}
+
+/// EmitDebugARanges - Emit visible names into a debug aranges section.
+///
+void DwarfDebug::EmitDebugARanges() {
+ // Start the dwarf aranges section.
+ Asm->SwitchToDataSection(TAI->getDwarfARangesSection());
+
+ // FIXME - Mock up
+#if 0
+ CompileUnit *Unit = GetBaseCompileUnit();
+
+ // Don't include size of length
+ Asm->EmitInt32(0x1c); Asm->EOL("Length of Address Ranges Info");
+
+ Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("Dwarf Version");
+
+ EmitReference("info_begin", Unit->getID());
+ Asm->EOL("Offset of Compilation Unit Info");
+
+ Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Size of Address");
+
+ Asm->EmitInt8(0); Asm->EOL("Size of Segment Descriptor");
+
+ Asm->EmitInt16(0); Asm->EOL("Pad (1)");
+ Asm->EmitInt16(0); Asm->EOL("Pad (2)");
+
+ // Range 1
+ EmitReference("text_begin", 0); Asm->EOL("Address");
+ EmitDifference("text_end", 0, "text_begin", 0, true); Asm->EOL("Length");
+
+ Asm->EmitInt32(0); Asm->EOL("EOM (1)");
+ Asm->EmitInt32(0); Asm->EOL("EOM (2)");
+#endif
+
+ Asm->EOL();
+}
+
+/// EmitDebugRanges - Emit visible names into a debug ranges section.
+///
+void DwarfDebug::EmitDebugRanges() {
+ // Start the dwarf ranges section.
+ Asm->SwitchToDataSection(TAI->getDwarfRangesSection());
+ Asm->EOL();
+}
+
+/// EmitDebugMacInfo - Emit visible names into a debug macinfo section.
+///
+void DwarfDebug::EmitDebugMacInfo() {
+ if (TAI->doesSupportMacInfoSection()) {
+ // Start the dwarf macinfo section.
+ Asm->SwitchToDataSection(TAI->getDwarfMacInfoSection());
+ Asm->EOL();
+ }
+}
+
+/// EmitDebugInlineInfo - Emit inline info using following format.
+/// Section Header:
+/// 1. length of section
+/// 2. Dwarf version number
+/// 3. address size.
+///
+/// Entries (one "entry" for each function that was inlined):
+///
+/// 1. offset into __debug_str section for MIPS linkage name, if exists;
+/// otherwise offset into __debug_str for regular function name.
+/// 2. offset into __debug_str section for regular function name.
+/// 3. an unsigned LEB128 number indicating the number of distinct inlining
+/// instances for the function.
+///
+/// The rest of the entry consists of a {die_offset, low_pc} pair for each
+/// inlined instance; the die_offset points to the inlined_subroutine die in the
+/// __debug_info section, and the low_pc is the starting address for the
+/// inlining instance.
+void DwarfDebug::EmitDebugInlineInfo() {
+ if (!TAI->doesDwarfUsesInlineInfoSection())
+ return;
+
+ if (!MainCU)
+ return;
+
+ Asm->SwitchToDataSection(TAI->getDwarfDebugInlineSection());
+ Asm->EOL();
+ EmitDifference("debug_inlined_end", 1,
+ "debug_inlined_begin", 1, true);
+ Asm->EOL("Length of Debug Inlined Information Entry");
+
+ EmitLabel("debug_inlined_begin", 1);
+
+ Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("Dwarf Version");
+ Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Address Size (in bytes)");
+
+ for (DenseMap<GlobalVariable *, SmallVector<unsigned, 4> >::iterator
+ I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) {
+ GlobalVariable *GV = I->first;
+ SmallVector<unsigned, 4> &Labels = I->second;
+ DISubprogram SP(GV);
+ std::string Name;
+ std::string LName;
+
+ SP.getLinkageName(LName);
+ SP.getName(Name);
+
+ Asm->EmitString(LName.empty() ? Name : LName);
+ Asm->EOL("MIPS linkage name");
+
+ Asm->EmitString(Name); Asm->EOL("Function name");
+
+ Asm->EmitULEB128Bytes(Labels.size()); Asm->EOL("Inline count");
+
+ for (SmallVector<unsigned, 4>::iterator LI = Labels.begin(),
+ LE = Labels.end(); LI != LE; ++LI) {
+ DIE *SP = MainCU->getDieMapSlotFor(GV);
+ Asm->EmitInt32(SP->getOffset()); Asm->EOL("DIE offset");
+
+ if (TD->getPointerSize() == sizeof(int32_t))
+ O << TAI->getData32bitsDirective();
+ else
+ O << TAI->getData64bitsDirective();
+
+ PrintLabelName("label", *LI); Asm->EOL("low_pc");
+ }
+ }
+
+ EmitLabel("debug_inlined_end", 1);
+ Asm->EOL();
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
new file mode 100644
index 0000000..9824566
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -0,0 +1,561 @@
+//===-- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework ------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__
+#define CODEGEN_ASMPRINTER_DWARFDEBUG_H__
+
+#include "DIE.h"
+#include "DwarfPrinter.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/UniqueVector.h"
+#include <string>
+
+namespace llvm {
+
+class CompileUnit;
+class DbgVariable;
+class DbgScope;
+class DbgConcreteScope;
+class MachineFrameInfo;
+class MachineModuleInfo;
+class TargetAsmInfo;
+class Timer;
+
+//===----------------------------------------------------------------------===//
+/// SrcLineInfo - This class is used to record source line correspondence.
+///
+class VISIBILITY_HIDDEN SrcLineInfo {
+ unsigned Line; // Source line number.
+ unsigned Column; // Source column.
+ unsigned SourceID; // Source ID number.
+ unsigned LabelID; // Label in code ID number.
+public:
+ SrcLineInfo(unsigned L, unsigned C, unsigned S, unsigned I)
+ : Line(L), Column(C), SourceID(S), LabelID(I) {}
+
+ // Accessors
+ unsigned getLine() const { return Line; }
+ unsigned getColumn() const { return Column; }
+ unsigned getSourceID() const { return SourceID; }
+ unsigned getLabelID() const { return LabelID; }
+};
+
+class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
+ //===--------------------------------------------------------------------===//
+ // Attributes used to construct specific Dwarf sections.
+ //
+
+ /// CompileUnitMap - A map of global variables representing compile units to
+ /// compile units.
+ DenseMap<Value *, CompileUnit *> CompileUnitMap;
+
+ /// CompileUnits - All the compile units in this module.
+ ///
+ SmallVector<CompileUnit *, 8> CompileUnits;
+
+ /// MainCU - Some platform prefers one compile unit per .o file. In such
+ /// cases, all dies are inserted in MainCU.
+ CompileUnit *MainCU;
+
+ /// AbbreviationsSet - Used to uniquely define abbreviations.
+ ///
+ FoldingSet<DIEAbbrev> AbbreviationsSet;
+
+ /// Abbreviations - A list of all the unique abbreviations in use.
+ ///
+ std::vector<DIEAbbrev *> Abbreviations;
+
+ /// DirectoryIdMap - Directory name to directory id map.
+ ///
+ StringMap<unsigned> DirectoryIdMap;
+
+ /// DirectoryNames - A list of directory names.
+ SmallVector<std::string, 8> DirectoryNames;
+
+ /// SourceFileIdMap - Source file name to source file id map.
+ ///
+ StringMap<unsigned> SourceFileIdMap;
+
+ /// SourceFileNames - A list of source file names.
+ SmallVector<std::string, 8> SourceFileNames;
+
+ /// SourceIdMap - Source id map, i.e. pair of directory id and source file
+ /// id mapped to a unique id.
+ DenseMap<std::pair<unsigned, unsigned>, unsigned> SourceIdMap;
+
+ /// SourceIds - Reverse map from source id to directory id + file id pair.
+ ///
+ SmallVector<std::pair<unsigned, unsigned>, 8> SourceIds;
+
+ /// Lines - List of of source line correspondence.
+ std::vector<SrcLineInfo> Lines;
+
+ /// ValuesSet - Used to uniquely define values.
+ ///
+ FoldingSet<DIEValue> ValuesSet;
+
+ /// Values - A list of all the unique values in use.
+ ///
+ std::vector<DIEValue *> Values;
+
+ /// StringPool - A UniqueVector of strings used by indirect references.
+ ///
+ UniqueVector<std::string> StringPool;
+
+ /// SectionMap - Provides a unique id per text section.
+ ///
+ UniqueVector<const Section*> SectionMap;
+
+ /// SectionSourceLines - Tracks line numbers per text section.
+ ///
+ std::vector<std::vector<SrcLineInfo> > SectionSourceLines;
+
+ /// didInitial - Flag to indicate if initial emission has been done.
+ ///
+ bool didInitial;
+
+ /// shouldEmit - Flag to indicate if debug information should be emitted.
+ ///
+ bool shouldEmit;
+
+ // FunctionDbgScope - Top level scope for the current function.
+ //
+ DbgScope *FunctionDbgScope;
+
+ /// DbgScopeMap - Tracks the scopes in the current function.
+ DenseMap<GlobalVariable *, DbgScope *> DbgScopeMap;
+
+ /// DbgAbstractScopeMap - Tracks abstract instance scopes in the current
+ /// function.
+ DenseMap<GlobalVariable *, DbgScope *> DbgAbstractScopeMap;
+
+ /// DbgConcreteScopeMap - Tracks concrete instance scopes in the current
+ /// function.
+ DenseMap<GlobalVariable *,
+ SmallVector<DbgScope *, 8> > DbgConcreteScopeMap;
+
+ /// InlineInfo - Keep track of inlined functions and their location. This
+ /// information is used to populate debug_inlined section.
+ DenseMap<GlobalVariable *, SmallVector<unsigned, 4> > InlineInfo;
+
+ /// InlinedVariableScopes - Scopes information for the inlined subroutine
+ /// variables.
+ DenseMap<const MachineInstr *, DbgScope *> InlinedVariableScopes;
+
+ /// AbstractInstanceRootMap - Map of abstract instance roots of inlined
+ /// functions. These are subroutine entries that contain a DW_AT_inline
+ /// attribute.
+ DenseMap<const GlobalVariable *, DbgScope *> AbstractInstanceRootMap;
+
+ /// InlinedParamMap - A map keeping track of which parameters are assigned to
+ /// which abstract instance.
+ DenseMap<const GlobalVariable *,
+ SmallSet<const GlobalVariable *, 32> > InlinedParamMap;
+
+ /// AbstractInstanceRootList - List of abstract instance roots of inlined
+ /// functions. These are subroutine entries that contain a DW_AT_inline
+ /// attribute.
+ SmallVector<DbgScope *, 32> AbstractInstanceRootList;
+
+ /// LexicalScopeStack - A stack of lexical scopes. The top one is the current
+ /// scope.
+ SmallVector<DbgScope *, 16> LexicalScopeStack;
+
+ /// CompileUnitOffsets - A vector of the offsets of the compile units. This is
+ /// used when calculating the "origin" of a concrete instance of an inlined
+ /// function.
+ DenseMap<CompileUnit *, unsigned> CompileUnitOffsets;
+
+ /// DebugTimer - Timer for the Dwarf debug writer.
+ Timer *DebugTimer;
+
+ struct FunctionDebugFrameInfo {
+ unsigned Number;
+ std::vector<MachineMove> Moves;
+
+ FunctionDebugFrameInfo(unsigned Num, const std::vector<MachineMove> &M)
+ : Number(Num), Moves(M) {}
+ };
+
+ std::vector<FunctionDebugFrameInfo> DebugFrames;
+
+ /// getSourceDirectoryAndFileIds - Return the directory and file ids that
+ /// maps to the source id. Source id starts at 1.
+ std::pair<unsigned, unsigned>
+ getSourceDirectoryAndFileIds(unsigned SId) const {
+ return SourceIds[SId-1];
+ }
+
+ /// getNumSourceDirectories - Return the number of source directories in the
+ /// debug info.
+ unsigned getNumSourceDirectories() const {
+ return DirectoryNames.size();
+ }
+
+ /// getSourceDirectoryName - Return the name of the directory corresponding
+ /// to the id.
+ const std::string &getSourceDirectoryName(unsigned Id) const {
+ return DirectoryNames[Id - 1];
+ }
+
+ /// getSourceFileName - Return the name of the source file corresponding
+ /// to the id.
+ const std::string &getSourceFileName(unsigned Id) const {
+ return SourceFileNames[Id - 1];
+ }
+
+ /// getNumSourceIds - Return the number of unique source ids.
+ unsigned getNumSourceIds() const {
+ return SourceIds.size();
+ }
+
+ /// AssignAbbrevNumber - Define a unique number for the abbreviation.
+ ///
+ void AssignAbbrevNumber(DIEAbbrev &Abbrev);
+
+ /// CreateDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+ /// information entry.
+ DIEEntry *CreateDIEEntry(DIE *Entry = NULL);
+
+ /// SetDIEEntry - Set a DIEEntry once the debug information entry is defined.
+ ///
+ void SetDIEEntry(DIEEntry *Value, DIE *Entry);
+
+ /// AddUInt - Add an unsigned integer attribute data and value.
+ ///
+ void AddUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer);
+
+ /// AddSInt - Add an signed integer attribute data and value.
+ ///
+ void AddSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer);
+
+ /// AddString - Add a string attribute data and value.
+ ///
+ void AddString(DIE *Die, unsigned Attribute, unsigned Form,
+ const std::string &String);
+
+ /// AddLabel - Add a Dwarf label attribute data and value.
+ ///
+ void AddLabel(DIE *Die, unsigned Attribute, unsigned Form,
+ const DWLabel &Label);
+
+ /// AddObjectLabel - Add an non-Dwarf label attribute data and value.
+ ///
+ void AddObjectLabel(DIE *Die, unsigned Attribute, unsigned Form,
+ const std::string &Label);
+
+ /// AddSectionOffset - Add a section offset label attribute data and value.
+ ///
+ void AddSectionOffset(DIE *Die, unsigned Attribute, unsigned Form,
+ const DWLabel &Label, const DWLabel &Section,
+ bool isEH = false, bool useSet = true);
+
+ /// AddDelta - Add a label delta attribute data and value.
+ ///
+ void AddDelta(DIE *Die, unsigned Attribute, unsigned Form,
+ const DWLabel &Hi, const DWLabel &Lo);
+
+ /// AddDIEEntry - Add a DIE attribute data and value.
+ ///
+ void AddDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry) {
+ Die->AddValue(Attribute, Form, CreateDIEEntry(Entry));
+ }
+
+ /// AddBlock - Add block data.
+ ///
+ void AddBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block);
+
+ /// AddSourceLine - Add location information to specified debug information
+ /// entry.
+ void AddSourceLine(DIE *Die, const DIVariable *V);
+
+ /// AddSourceLine - Add location information to specified debug information
+ /// entry.
+ void AddSourceLine(DIE *Die, const DIGlobal *G);
+
+ void AddSourceLine(DIE *Die, const DIType *Ty);
+
+ /// AddAddress - Add an address attribute to a die based on the location
+ /// provided.
+ void AddAddress(DIE *Die, unsigned Attribute,
+ const MachineLocation &Location);
+
+ /// AddType - Add a new type attribute to the specified entity.
+ void AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty);
+
+ /// ConstructTypeDIE - Construct basic type die from DIBasicType.
+ void ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+ DIBasicType BTy);
+
+ /// ConstructTypeDIE - Construct derived type die from DIDerivedType.
+ void ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+ DIDerivedType DTy);
+
+ /// ConstructTypeDIE - Construct type DIE from DICompositeType.
+ void ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+ DICompositeType CTy);
+
+ /// ConstructSubrangeDIE - Construct subrange DIE from DISubrange.
+ void ConstructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
+
+ /// ConstructArrayTypeDIE - Construct array type DIE from DICompositeType.
+ void ConstructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+ DICompositeType *CTy);
+
+ /// ConstructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+ DIE *ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy);
+
+ /// CreateGlobalVariableDIE - Create new DIE using GV.
+ DIE *CreateGlobalVariableDIE(CompileUnit *DW_Unit,
+ const DIGlobalVariable &GV);
+
+ /// CreateMemberDIE - Create new member DIE.
+ DIE *CreateMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT);
+
+ /// CreateSubprogramDIE - Create new DIE using SP.
+ DIE *CreateSubprogramDIE(CompileUnit *DW_Unit,
+ const DISubprogram &SP,
+ bool IsConstructor = false,
+ bool IsInlined = false);
+
+ /// FindCompileUnit - Get the compile unit for the given descriptor.
+ ///
+ CompileUnit &FindCompileUnit(DICompileUnit Unit) const;
+
+ /// CreateDbgScopeVariable - Create a new scope variable.
+ ///
+ DIE *CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit);
+
+ /// getOrCreateScope - Returns the scope associated with the given descriptor.
+ ///
+ DbgScope *getOrCreateScope(GlobalVariable *V);
+
+ /// ConstructDbgScope - Construct the components of a scope.
+ ///
+ void ConstructDbgScope(DbgScope *ParentScope,
+ unsigned ParentStartID, unsigned ParentEndID,
+ DIE *ParentDie, CompileUnit *Unit);
+
+ /// ConstructFunctionDbgScope - Construct the scope for the subprogram.
+ ///
+ void ConstructFunctionDbgScope(DbgScope *RootScope,
+ bool AbstractScope = false);
+
+ /// ConstructDefaultDbgScope - Construct a default scope for the subprogram.
+ ///
+ void ConstructDefaultDbgScope(MachineFunction *MF);
+
+ /// EmitInitial - Emit initial Dwarf declarations. This is necessary for cc
+ /// tools to recognize the object file contains Dwarf information.
+ void EmitInitial();
+
+ /// EmitDIE - Recusively Emits a debug information entry.
+ ///
+ void EmitDIE(DIE *Die);
+
+ /// SizeAndOffsetDie - Compute the size and offset of a DIE.
+ ///
+ unsigned SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last);
+
+ /// SizeAndOffsets - Compute the size and offset of all the DIEs.
+ ///
+ void SizeAndOffsets();
+
+ /// EmitDebugInfo / EmitDebugInfoPerCU - Emit the debug info section.
+ ///
+ void EmitDebugInfoPerCU(CompileUnit *Unit);
+
+ void EmitDebugInfo();
+
+ /// EmitAbbreviations - Emit the abbreviation section.
+ ///
+ void EmitAbbreviations() const;
+
+ /// EmitEndOfLineMatrix - Emit the last address of the section and the end of
+ /// the line matrix.
+ ///
+ void EmitEndOfLineMatrix(unsigned SectionEnd);
+
+ /// EmitDebugLines - Emit source line information.
+ ///
+ void EmitDebugLines();
+
+ /// EmitCommonDebugFrame - Emit common frame info into a debug frame section.
+ ///
+ void EmitCommonDebugFrame();
+
+ /// EmitFunctionDebugFrame - Emit per function frame info into a debug frame
+ /// section.
+ void EmitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo);
+
+ void EmitDebugPubNamesPerCU(CompileUnit *Unit);
+
+ /// EmitDebugPubNames - Emit visible names into a debug pubnames section.
+ ///
+ void EmitDebugPubNames();
+
+ /// EmitDebugStr - Emit visible names into a debug str section.
+ ///
+ void EmitDebugStr();
+
+ /// EmitDebugLoc - Emit visible names into a debug loc section.
+ ///
+ void EmitDebugLoc();
+
+ /// EmitDebugARanges - Emit visible names into a debug aranges section.
+ ///
+ void EmitDebugARanges();
+
+ /// EmitDebugRanges - Emit visible names into a debug ranges section.
+ ///
+ void EmitDebugRanges();
+
+ /// EmitDebugMacInfo - Emit visible names into a debug macinfo section.
+ ///
+ void EmitDebugMacInfo();
+
+ /// EmitDebugInlineInfo - Emit inline info using following format.
+ /// Section Header:
+ /// 1. length of section
+ /// 2. Dwarf version number
+ /// 3. address size.
+ ///
+ /// Entries (one "entry" for each function that was inlined):
+ ///
+ /// 1. offset into __debug_str section for MIPS linkage name, if exists;
+ /// otherwise offset into __debug_str for regular function name.
+ /// 2. offset into __debug_str section for regular function name.
+ /// 3. an unsigned LEB128 number indicating the number of distinct inlining
+ /// instances for the function.
+ ///
+ /// The rest of the entry consists of a {die_offset, low_pc} pair for each
+ /// inlined instance; the die_offset points to the inlined_subroutine die in
+ /// the __debug_info section, and the low_pc is the starting address for the
+ /// inlining instance.
+ void EmitDebugInlineInfo();
+
+ /// GetOrCreateSourceID - Look up the source id with the given directory and
+ /// source file names. If none currently exists, create a new id and insert it
+ /// in the SourceIds map. This can update DirectoryNames and SourceFileNames maps
+ /// as well.
+ unsigned GetOrCreateSourceID(const std::string &DirName,
+ const std::string &FileName);
+
+ void ConstructCompileUnit(GlobalVariable *GV);
+
+ /// ConstructCompileUnits - Create a compile unit DIEs.
+ void ConstructCompileUnits();
+
+ bool ConstructGlobalVariableDIE(GlobalVariable *GV);
+
+ /// ConstructGlobalVariableDIEs - Create DIEs for each of the externally
+ /// visible global variables. Return true if at least one global DIE is
+ /// created.
+ bool ConstructGlobalVariableDIEs();
+
+ bool ConstructSubprogram(GlobalVariable *GV);
+
+ /// ConstructSubprograms - Create DIEs for each of the externally visible
+ /// subprograms. Return true if at least one subprogram DIE is created.
+ bool ConstructSubprograms();
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfDebug(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T);
+ virtual ~DwarfDebug();
+
+ /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should
+ /// be emitted.
+ bool ShouldEmitDwarfDebug() const { return shouldEmit; }
+
+ /// SetDebugInfo - Create global DIEs and emit initial debug info sections.
+ /// This is inovked by the target AsmPrinter.
+ void SetDebugInfo(MachineModuleInfo *mmi);
+
+ /// BeginModule - Emit all Dwarf sections that should come prior to the
+ /// content.
+ void BeginModule(Module *M) {
+ this->M = M;
+ }
+
+ /// EndModule - Emit all Dwarf sections that should come after the content.
+ ///
+ void EndModule();
+
+ /// BeginFunction - Gather pre-function debug information. Assumes being
+ /// emitted immediately after the function entry point.
+ void BeginFunction(MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function debug information.
+ ///
+ void EndFunction(MachineFunction *MF);
+
+ /// RecordSourceLine - Records location information and associates it with a
+ /// label. Returns a unique label ID used to generate a label and provide
+ /// correspondence to the source line list.
+ unsigned RecordSourceLine(Value *V, unsigned Line, unsigned Col);
+
+ /// RecordSourceLine - Records location information and associates it with a
+ /// label. Returns a unique label ID used to generate a label and provide
+ /// correspondence to the source line list.
+ unsigned RecordSourceLine(unsigned Line, unsigned Col, DICompileUnit CU);
+
+ /// getRecordSourceLineCount - Return the number of source lines in the debug
+ /// info.
+ unsigned getRecordSourceLineCount() const {
+ return Lines.size();
+ }
+
+ /// getOrCreateSourceID - Public version of GetOrCreateSourceID. This can be
+ /// timed. Look up the source id with the given directory and source file
+ /// names. If none currently exists, create a new id and insert it in the
+ /// SourceIds map. This can update DirectoryNames and SourceFileNames maps as
+ /// well.
+ unsigned getOrCreateSourceID(const std::string &DirName,
+ const std::string &FileName);
+
+ /// RecordRegionStart - Indicate the start of a region.
+ unsigned RecordRegionStart(GlobalVariable *V);
+
+ /// RecordRegionEnd - Indicate the end of a region.
+ unsigned RecordRegionEnd(GlobalVariable *V);
+
+ /// RecordVariable - Indicate the declaration of a local variable.
+ void RecordVariable(GlobalVariable *GV, unsigned FrameIndex,
+ const MachineInstr *MI);
+
+ //// RecordInlinedFnStart - Indicate the start of inlined subroutine.
+ unsigned RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
+ unsigned Line, unsigned Col);
+
+ /// RecordInlinedFnEnd - Indicate the end of inlined subroutine.
+ unsigned RecordInlinedFnEnd(DISubprogram &SP);
+
+ /// RecordVariableScope - Record scope for the variable declared by
+ /// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE. Record scopes
+ /// for only inlined subroutine variables. Other variables's scopes are
+ /// determined during RecordVariable().
+ void RecordVariableScope(DIVariable &DV, const MachineInstr *DeclareMI);
+};
+
+} // End of namespace llvm
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
new file mode 100644
index 0000000..37466ab
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -0,0 +1,706 @@
+//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+static TimerGroup &getDwarfTimerGroup() {
+ static TimerGroup DwarfTimerGroup("Dwarf Exception");
+ return DwarfTimerGroup;
+}
+
+DwarfException::DwarfException(raw_ostream &OS, AsmPrinter *A,
+ const TargetAsmInfo *T)
+ : Dwarf(OS, A, T, "eh"), shouldEmitTable(false), shouldEmitMoves(false),
+ shouldEmitTableModule(false), shouldEmitMovesModule(false),
+ ExceptionTimer(0) {
+ if (TimePassesIsEnabled)
+ ExceptionTimer = new Timer("Dwarf Exception Writer",
+ getDwarfTimerGroup());
+}
+
+DwarfException::~DwarfException() {
+ delete ExceptionTimer;
+}
+
+void DwarfException::EmitCommonEHFrame(const Function *Personality,
+ unsigned Index) {
+ // Size and sign of stack growth.
+ int stackGrowth =
+ Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
+ TargetFrameInfo::StackGrowsUp ?
+ TD->getPointerSize() : -TD->getPointerSize();
+
+ // Begin eh frame section.
+ Asm->SwitchToTextSection(TAI->getDwarfEHFrameSection());
+
+ if (!TAI->doesRequireNonLocalEHFrameLabel())
+ O << TAI->getEHGlobalPrefix();
+
+ O << "EH_frame" << Index << ":\n";
+ EmitLabel("section_eh_frame", Index);
+
+ // Define base labels.
+ EmitLabel("eh_frame_common", Index);
+
+ // Define the eh frame length.
+ EmitDifference("eh_frame_common_end", Index,
+ "eh_frame_common_begin", Index, true);
+ Asm->EOL("Length of Common Information Entry");
+
+ // EH frame header.
+ EmitLabel("eh_frame_common_begin", Index);
+ Asm->EmitInt32((int)0);
+ Asm->EOL("CIE Identifier Tag");
+ Asm->EmitInt8(dwarf::DW_CIE_VERSION);
+ Asm->EOL("CIE Version");
+
+ // The personality presence indicates that language specific information will
+ // show up in the eh frame.
+ Asm->EmitString(Personality ? "zPLR" : "zR");
+ Asm->EOL("CIE Augmentation");
+
+ // Round out reader.
+ Asm->EmitULEB128Bytes(1);
+ Asm->EOL("CIE Code Alignment Factor");
+ Asm->EmitSLEB128Bytes(stackGrowth);
+ Asm->EOL("CIE Data Alignment Factor");
+ Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true));
+ Asm->EOL("CIE Return Address Column");
+
+ // If there is a personality, we need to indicate the functions location.
+ if (Personality) {
+ Asm->EmitULEB128Bytes(7);
+ Asm->EOL("Augmentation Size");
+
+ if (TAI->getNeedsIndirectEncoding()) {
+ Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 |
+ dwarf::DW_EH_PE_indirect);
+ Asm->EOL("Personality (pcrel sdata4 indirect)");
+ } else {
+ Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+ Asm->EOL("Personality (pcrel sdata4)");
+ }
+
+ PrintRelDirective(true);
+ O << TAI->getPersonalityPrefix();
+ Asm->EmitExternalGlobal((const GlobalVariable *)(Personality));
+ O << TAI->getPersonalitySuffix();
+ if (strcmp(TAI->getPersonalitySuffix(), "+4@GOTPCREL"))
+ O << "-" << TAI->getPCSymbol();
+ Asm->EOL("Personality");
+
+ Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+ Asm->EOL("LSDA Encoding (pcrel sdata4)");
+
+ Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+ Asm->EOL("FDE Encoding (pcrel sdata4)");
+ } else {
+ Asm->EmitULEB128Bytes(1);
+ Asm->EOL("Augmentation Size");
+
+ Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+ Asm->EOL("FDE Encoding (pcrel sdata4)");
+ }
+
+ // Indicate locations of general callee saved registers in frame.
+ std::vector<MachineMove> Moves;
+ RI->getInitialFrameState(Moves);
+ EmitFrameMoves(NULL, 0, Moves, true);
+
+ // On Darwin the linker honors the alignment of eh_frame, which means it must
+ // be 8-byte on 64-bit targets to match what gcc does. Otherwise you get
+ // holes which confuse readers of eh_frame.
+ Asm->EmitAlignment(TD->getPointerSize() == sizeof(int32_t) ? 2 : 3,
+ 0, 0, false);
+ EmitLabel("eh_frame_common_end", Index);
+
+ Asm->EOL();
+}
+
+/// EmitEHFrame - Emit function exception frame information.
+///
+void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) {
+ assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() &&
+ "Should not emit 'available externally' functions at all");
+
+ Function::LinkageTypes linkage = EHFrameInfo.function->getLinkage();
+ Asm->SwitchToTextSection(TAI->getDwarfEHFrameSection());
+
+ // Externally visible entry into the functions eh frame info. If the
+ // corresponding function is static, this should not be externally visible.
+ if (linkage != Function::InternalLinkage &&
+ linkage != Function::PrivateLinkage) {
+ if (const char *GlobalEHDirective = TAI->getGlobalEHDirective())
+ O << GlobalEHDirective << EHFrameInfo.FnName << "\n";
+ }
+
+ // If corresponding function is weak definition, this should be too.
+ if ((linkage == Function::WeakAnyLinkage ||
+ linkage == Function::WeakODRLinkage ||
+ linkage == Function::LinkOnceAnyLinkage ||
+ linkage == Function::LinkOnceODRLinkage) &&
+ TAI->getWeakDefDirective())
+ O << TAI->getWeakDefDirective() << EHFrameInfo.FnName << "\n";
+
+ // If there are no calls then you can't unwind. This may mean we can omit the
+ // EH Frame, but some environments do not handle weak absolute symbols. If
+ // UnwindTablesMandatory is set we cannot do this optimization; the unwind
+ // info is to be available for non-EH uses.
+ if (!EHFrameInfo.hasCalls &&
+ !UnwindTablesMandatory &&
+ ((linkage != Function::WeakAnyLinkage &&
+ linkage != Function::WeakODRLinkage &&
+ linkage != Function::LinkOnceAnyLinkage &&
+ linkage != Function::LinkOnceODRLinkage) ||
+ !TAI->getWeakDefDirective() ||
+ TAI->getSupportsWeakOmittedEHFrame())) {
+ O << EHFrameInfo.FnName << " = 0\n";
+ // This name has no connection to the function, so it might get
+ // dead-stripped when the function is not, erroneously. Prohibit
+ // dead-stripping unconditionally.
+ if (const char *UsedDirective = TAI->getUsedDirective())
+ O << UsedDirective << EHFrameInfo.FnName << "\n\n";
+ } else {
+ O << EHFrameInfo.FnName << ":\n";
+
+ // EH frame header.
+ EmitDifference("eh_frame_end", EHFrameInfo.Number,
+ "eh_frame_begin", EHFrameInfo.Number, true);
+ Asm->EOL("Length of Frame Information Entry");
+
+ EmitLabel("eh_frame_begin", EHFrameInfo.Number);
+
+ if (TAI->doesRequireNonLocalEHFrameLabel()) {
+ PrintRelDirective(true, true);
+ PrintLabelName("eh_frame_begin", EHFrameInfo.Number);
+
+ if (!TAI->isAbsoluteEHSectionOffsets())
+ O << "-EH_frame" << EHFrameInfo.PersonalityIndex;
+ } else {
+ EmitSectionOffset("eh_frame_begin", "eh_frame_common",
+ EHFrameInfo.Number, EHFrameInfo.PersonalityIndex,
+ true, true, false);
+ }
+
+ Asm->EOL("FDE CIE offset");
+
+ EmitReference("eh_func_begin", EHFrameInfo.Number, true, true);
+ Asm->EOL("FDE initial location");
+ EmitDifference("eh_func_end", EHFrameInfo.Number,
+ "eh_func_begin", EHFrameInfo.Number, true);
+ Asm->EOL("FDE address range");
+
+ // If there is a personality and landing pads then point to the language
+ // specific data area in the exception table.
+ if (EHFrameInfo.PersonalityIndex) {
+ Asm->EmitULEB128Bytes(4);
+ Asm->EOL("Augmentation size");
+
+ if (EHFrameInfo.hasLandingPads)
+ EmitReference("exception", EHFrameInfo.Number, true, true);
+ else
+ Asm->EmitInt32((int)0);
+ Asm->EOL("Language Specific Data Area");
+ } else {
+ Asm->EmitULEB128Bytes(0);
+ Asm->EOL("Augmentation size");
+ }
+
+ // Indicate locations of function specific callee saved registers in frame.
+ EmitFrameMoves("eh_func_begin", EHFrameInfo.Number, EHFrameInfo.Moves,
+ true);
+
+ // On Darwin the linker honors the alignment of eh_frame, which means it
+ // must be 8-byte on 64-bit targets to match what gcc does. Otherwise you
+ // get holes which confuse readers of eh_frame.
+ Asm->EmitAlignment(TD->getPointerSize() == sizeof(int32_t) ? 2 : 3,
+ 0, 0, false);
+ EmitLabel("eh_frame_end", EHFrameInfo.Number);
+
+ // If the function is marked used, this table should be also. We cannot
+ // make the mark unconditional in this case, since retaining the table also
+ // retains the function in this case, and there is code around that depends
+ // on unused functions (calling undefined externals) being dead-stripped to
+ // link correctly. Yes, there really is.
+ if (MMI->getUsedFunctions().count(EHFrameInfo.function))
+ if (const char *UsedDirective = TAI->getUsedDirective())
+ O << UsedDirective << EHFrameInfo.FnName << "\n\n";
+ }
+}
+
+/// EmitExceptionTable - Emit landing pads and actions.
+///
+/// The general organization of the table is complex, but the basic concepts are
+/// easy. First there is a header which describes the location and organization
+/// of the three components that follow.
+///
+/// 1. The landing pad site information describes the range of code covered by
+/// the try. In our case it's an accumulation of the ranges covered by the
+/// invokes in the try. There is also a reference to the landing pad that
+/// handles the exception once processed. Finally an index into the actions
+/// table.
+/// 2. The action table, in our case, is composed of pairs of type ids and next
+/// action offset. Starting with the action index from the landing pad
+/// site, each type Id is checked for a match to the current exception. If
+/// it matches then the exception and type id are passed on to the landing
+/// pad. Otherwise the next action is looked up. This chain is terminated
+/// with a next action of zero. If no type id is found the the frame is
+/// unwound and handling continues.
+/// 3. Type id table contains references to all the C++ typeinfo for all
+/// catches in the function. This tables is reversed indexed base 1.
+
+/// SharedTypeIds - How many leading type ids two landing pads have in common.
+unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L,
+ const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ unsigned LSize = LIds.size(), RSize = RIds.size();
+ unsigned MinSize = LSize < RSize ? LSize : RSize;
+ unsigned Count = 0;
+
+ for (; Count != MinSize; ++Count)
+ if (LIds[Count] != RIds[Count])
+ return Count;
+
+ return Count;
+}
+
+/// PadLT - Order landing pads lexicographically by type id.
+bool DwarfException::PadLT(const LandingPadInfo *L, const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ unsigned LSize = LIds.size(), RSize = RIds.size();
+ unsigned MinSize = LSize < RSize ? LSize : RSize;
+
+ for (unsigned i = 0; i != MinSize; ++i)
+ if (LIds[i] != RIds[i])
+ return LIds[i] < RIds[i];
+
+ return LSize < RSize;
+}
+
+void DwarfException::EmitExceptionTable() {
+ const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+ const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+ if (PadInfos.empty()) return;
+
+ // Sort the landing pads in order of their type ids. This is used to fold
+ // duplicate actions.
+ SmallVector<const LandingPadInfo *, 64> LandingPads;
+ LandingPads.reserve(PadInfos.size());
+ for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
+ LandingPads.push_back(&PadInfos[i]);
+ std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
+
+ // Negative type ids index into FilterIds, positive type ids index into
+ // TypeInfos. The value written for a positive type id is just the type id
+ // itself. For a negative type id, however, the value written is the
+ // (negative) byte offset of the corresponding FilterIds entry. The byte
+ // offset is usually equal to the type id, because the FilterIds entries are
+ // written using a variable width encoding which outputs one byte per entry as
+ // long as the value written is not too large, but can differ. This kind of
+ // complication does not occur for positive type ids because type infos are
+ // output using a fixed width encoding. FilterOffsets[i] holds the byte
+ // offset corresponding to FilterIds[i].
+ SmallVector<int, 16> FilterOffsets;
+ FilterOffsets.reserve(FilterIds.size());
+ int Offset = -1;
+ for(std::vector<unsigned>::const_iterator I = FilterIds.begin(),
+ E = FilterIds.end(); I != E; ++I) {
+ FilterOffsets.push_back(Offset);
+ Offset -= TargetAsmInfo::getULEB128Size(*I);
+ }
+
+ // Compute the actions table and gather the first action index for each
+ // landing pad site.
+ SmallVector<ActionEntry, 32> Actions;
+ SmallVector<unsigned, 64> FirstActions;
+ FirstActions.reserve(LandingPads.size());
+
+ int FirstAction = 0;
+ unsigned SizeActions = 0;
+ for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+ const LandingPadInfo *LP = LandingPads[i];
+ const std::vector<int> &TypeIds = LP->TypeIds;
+ const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0;
+ unsigned SizeSiteActions = 0;
+
+ if (NumShared < TypeIds.size()) {
+ unsigned SizeAction = 0;
+ ActionEntry *PrevAction = 0;
+
+ if (NumShared) {
+ const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size();
+ assert(Actions.size());
+ PrevAction = &Actions.back();
+ SizeAction = TargetAsmInfo::getSLEB128Size(PrevAction->NextAction) +
+ TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+
+ for (unsigned j = NumShared; j != SizePrevIds; ++j) {
+ SizeAction -=
+ TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+ SizeAction += -PrevAction->NextAction;
+ PrevAction = PrevAction->Previous;
+ }
+ }
+
+ // Compute the actions.
+ for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) {
+ int TypeID = TypeIds[I];
+ assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
+ int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
+ unsigned SizeTypeID = TargetAsmInfo::getSLEB128Size(ValueForTypeID);
+
+ int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
+ SizeAction = SizeTypeID + TargetAsmInfo::getSLEB128Size(NextAction);
+ SizeSiteActions += SizeAction;
+
+ ActionEntry Action = {ValueForTypeID, NextAction, PrevAction};
+ Actions.push_back(Action);
+
+ PrevAction = &Actions.back();
+ }
+
+ // Record the first action of the landing pad site.
+ FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
+ } // else identical - re-use previous FirstAction
+
+ FirstActions.push_back(FirstAction);
+
+ // Compute this sites contribution to size.
+ SizeActions += SizeSiteActions;
+ }
+
+ // Compute the call-site table. The entry for an invoke has a try-range
+ // containing the call, a non-zero landing pad and an appropriate action. The
+ // entry for an ordinary call has a try-range containing the call and zero for
+ // the landing pad and the action. Calls marked 'nounwind' have no entry and
+ // must not be contained in the try-range of any entry - they form gaps in the
+ // table. Entries must be ordered by try-range address.
+ SmallVector<CallSiteEntry, 64> CallSites;
+
+ RangeMapType PadMap;
+
+ // Invokes and nounwind calls have entries in PadMap (due to being bracketed
+ // by try-range labels when lowered). Ordinary calls do not, so appropriate
+ // try-ranges for them need be deduced.
+ for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+ const LandingPadInfo *LandingPad = LandingPads[i];
+ for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
+ unsigned BeginLabel = LandingPad->BeginLabels[j];
+ assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
+ PadRange P = { i, j };
+ PadMap[BeginLabel] = P;
+ }
+ }
+
+ // The end label of the previous invoke or nounwind try-range.
+ unsigned LastLabel = 0;
+
+ // Whether there is a potentially throwing instruction (currently this means
+ // an ordinary call) between the end of the previous try-range and now.
+ bool SawPotentiallyThrowing = false;
+
+ // Whether the last callsite entry was for an invoke.
+ bool PreviousIsInvoke = false;
+
+ // Visit all instructions in order of address.
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
+ MI != E; ++MI) {
+ if (!MI->isLabel()) {
+ SawPotentiallyThrowing |= MI->getDesc().isCall();
+ continue;
+ }
+
+ unsigned BeginLabel = MI->getOperand(0).getImm();
+ assert(BeginLabel && "Invalid label!");
+
+ // End of the previous try-range?
+ if (BeginLabel == LastLabel)
+ SawPotentiallyThrowing = false;
+
+ // Beginning of a new try-range?
+ RangeMapType::iterator L = PadMap.find(BeginLabel);
+ if (L == PadMap.end())
+ // Nope, it was just some random label.
+ continue;
+
+ PadRange P = L->second;
+ const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+
+ assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
+ "Inconsistent landing pad map!");
+
+ // If some instruction between the previous try-range and this one may
+ // throw, create a call-site entry with no landing pad for the region
+ // between the try-ranges.
+ if (SawPotentiallyThrowing) {
+ CallSiteEntry Site = {LastLabel, BeginLabel, 0, 0};
+ CallSites.push_back(Site);
+ PreviousIsInvoke = false;
+ }
+
+ LastLabel = LandingPad->EndLabels[P.RangeIndex];
+ assert(BeginLabel && LastLabel && "Invalid landing pad!");
+
+ if (LandingPad->LandingPadLabel) {
+ // This try-range is for an invoke.
+ CallSiteEntry Site = {BeginLabel, LastLabel,
+ LandingPad->LandingPadLabel,
+ FirstActions[P.PadIndex]};
+
+ // Try to merge with the previous call-site.
+ if (PreviousIsInvoke) {
+ CallSiteEntry &Prev = CallSites.back();
+ if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
+ // Extend the range of the previous entry.
+ Prev.EndLabel = Site.EndLabel;
+ continue;
+ }
+ }
+
+ // Otherwise, create a new call-site.
+ CallSites.push_back(Site);
+ PreviousIsInvoke = true;
+ } else {
+ // Create a gap.
+ PreviousIsInvoke = false;
+ }
+ }
+ }
+
+ // If some instruction between the previous try-range and the end of the
+ // function may throw, create a call-site entry with no landing pad for the
+ // region following the try-range.
+ if (SawPotentiallyThrowing) {
+ CallSiteEntry Site = {LastLabel, 0, 0, 0};
+ CallSites.push_back(Site);
+ }
+
+ // Final tallies.
+
+ // Call sites.
+ const unsigned SiteStartSize = sizeof(int32_t); // DW_EH_PE_udata4
+ const unsigned SiteLengthSize = sizeof(int32_t); // DW_EH_PE_udata4
+ const unsigned LandingPadSize = sizeof(int32_t); // DW_EH_PE_udata4
+ unsigned SizeSites = CallSites.size() * (SiteStartSize +
+ SiteLengthSize +
+ LandingPadSize);
+ for (unsigned i = 0, e = CallSites.size(); i < e; ++i)
+ SizeSites += TargetAsmInfo::getULEB128Size(CallSites[i].Action);
+
+ // Type infos.
+ const unsigned TypeInfoSize = TD->getPointerSize(); // DW_EH_PE_absptr
+ unsigned SizeTypes = TypeInfos.size() * TypeInfoSize;
+
+ unsigned TypeOffset = sizeof(int8_t) + // Call site format
+ TargetAsmInfo::getULEB128Size(SizeSites) + // Call-site table length
+ SizeSites + SizeActions + SizeTypes;
+
+ unsigned TotalSize = sizeof(int8_t) + // LPStart format
+ sizeof(int8_t) + // TType format
+ TargetAsmInfo::getULEB128Size(TypeOffset) + // TType base offset
+ TypeOffset;
+
+ unsigned SizeAlign = (4 - TotalSize) & 3;
+
+ // Begin the exception table.
+ Asm->SwitchToDataSection(TAI->getDwarfExceptionSection());
+ Asm->EmitAlignment(2, 0, 0, false);
+ O << "GCC_except_table" << SubprogramCount << ":\n";
+
+ for (unsigned i = 0; i != SizeAlign; ++i) {
+ Asm->EmitInt8(0);
+ Asm->EOL("Padding");
+ }
+
+ EmitLabel("exception", SubprogramCount);
+
+ // Emit the header.
+ Asm->EmitInt8(dwarf::DW_EH_PE_omit);
+ Asm->EOL("LPStart format (DW_EH_PE_omit)");
+ Asm->EmitInt8(dwarf::DW_EH_PE_absptr);
+ Asm->EOL("TType format (DW_EH_PE_absptr)");
+ Asm->EmitULEB128Bytes(TypeOffset);
+ Asm->EOL("TType base offset");
+ Asm->EmitInt8(dwarf::DW_EH_PE_udata4);
+ Asm->EOL("Call site format (DW_EH_PE_udata4)");
+ Asm->EmitULEB128Bytes(SizeSites);
+ Asm->EOL("Call-site table length");
+
+ // Emit the landing pad site information.
+ for (unsigned i = 0; i < CallSites.size(); ++i) {
+ CallSiteEntry &S = CallSites[i];
+ const char *BeginTag;
+ unsigned BeginNumber;
+
+ if (!S.BeginLabel) {
+ BeginTag = "eh_func_begin";
+ BeginNumber = SubprogramCount;
+ } else {
+ BeginTag = "label";
+ BeginNumber = S.BeginLabel;
+ }
+
+ EmitSectionOffset(BeginTag, "eh_func_begin", BeginNumber, SubprogramCount,
+ true, true);
+ Asm->EOL("Region start");
+
+ if (!S.EndLabel)
+ EmitDifference("eh_func_end", SubprogramCount, BeginTag, BeginNumber,
+ true);
+ else
+ EmitDifference("label", S.EndLabel, BeginTag, BeginNumber, true);
+
+ Asm->EOL("Region length");
+
+ if (!S.PadLabel)
+ Asm->EmitInt32(0);
+ else
+ EmitSectionOffset("label", "eh_func_begin", S.PadLabel, SubprogramCount,
+ true, true);
+
+ Asm->EOL("Landing pad");
+
+ Asm->EmitULEB128Bytes(S.Action);
+ Asm->EOL("Action");
+ }
+
+ // Emit the actions.
+ for (unsigned I = 0, N = Actions.size(); I != N; ++I) {
+ ActionEntry &Action = Actions[I];
+
+ Asm->EmitSLEB128Bytes(Action.ValueForTypeID);
+ Asm->EOL("TypeInfo index");
+ Asm->EmitSLEB128Bytes(Action.NextAction);
+ Asm->EOL("Next action");
+ }
+
+ // Emit the type ids.
+ for (unsigned M = TypeInfos.size(); M; --M) {
+ GlobalVariable *GV = TypeInfos[M - 1];
+ PrintRelDirective();
+
+ if (GV) {
+ std::string GLN;
+ O << Asm->getGlobalLinkName(GV, GLN);
+ } else {
+ O << "0";
+ }
+
+ Asm->EOL("TypeInfo");
+ }
+
+ // Emit the filter typeids.
+ for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) {
+ unsigned TypeID = FilterIds[j];
+ Asm->EmitULEB128Bytes(TypeID);
+ Asm->EOL("Filter TypeInfo index");
+ }
+
+ Asm->EmitAlignment(2, 0, 0, false);
+}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfException::EndModule() {
+ if (TimePassesIsEnabled)
+ ExceptionTimer->startTimer();
+
+ if (shouldEmitMovesModule || shouldEmitTableModule) {
+ const std::vector<Function *> Personalities = MMI->getPersonalities();
+ for (unsigned i = 0; i < Personalities.size(); ++i)
+ EmitCommonEHFrame(Personalities[i], i);
+
+ for (std::vector<FunctionEHFrameInfo>::iterator I = EHFrames.begin(),
+ E = EHFrames.end(); I != E; ++I)
+ EmitEHFrame(*I);
+ }
+
+ if (TimePassesIsEnabled)
+ ExceptionTimer->stopTimer();
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes being
+/// emitted immediately after the function entry point.
+void DwarfException::BeginFunction(MachineFunction *MF) {
+ if (TimePassesIsEnabled)
+ ExceptionTimer->startTimer();
+
+ this->MF = MF;
+ shouldEmitTable = shouldEmitMoves = false;
+
+ if (MMI && TAI->doesSupportExceptionHandling()) {
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+
+ // If any landing pads survive, we need an EH table.
+ if (MMI->getLandingPads().size())
+ shouldEmitTable = true;
+
+ // See if we need frame move info.
+ if (!MF->getFunction()->doesNotThrow() || UnwindTablesMandatory)
+ shouldEmitMoves = true;
+
+ if (shouldEmitMoves || shouldEmitTable)
+ // Assumes in correct section after the entry point.
+ EmitLabel("eh_func_begin", ++SubprogramCount);
+ }
+
+ shouldEmitTableModule |= shouldEmitTable;
+ shouldEmitMovesModule |= shouldEmitMoves;
+
+ if (TimePassesIsEnabled)
+ ExceptionTimer->stopTimer();
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfException::EndFunction() {
+ if (TimePassesIsEnabled)
+ ExceptionTimer->startTimer();
+
+ if (shouldEmitMoves || shouldEmitTable) {
+ EmitLabel("eh_func_end", SubprogramCount);
+ EmitExceptionTable();
+
+ // Save EH frame information
+ std::string Name;
+ EHFrames.push_back(
+ FunctionEHFrameInfo(getAsm()->getCurrentFunctionEHName(MF, Name),
+ SubprogramCount,
+ MMI->getPersonalityIndex(),
+ MF->getFrameInfo()->hasCalls(),
+ !MMI->getLandingPads().empty(),
+ MMI->getFrameMoves(),
+ MF->getFunction()));
+ }
+
+ if (TimePassesIsEnabled)
+ ExceptionTimer->stopTimer();
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
new file mode 100644
index 0000000..4479af2
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -0,0 +1,178 @@
+//===-- DwarfException.h - Dwarf Exception Framework -----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFEXCEPTION_H__
+#define CODEGEN_ASMPRINTER_DWARFEXCEPTION_H__
+
+#include "DIE.h"
+#include "DwarfPrinter.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/ADT/DenseMap.h"
+#include <string>
+
+namespace llvm {
+
+struct LandingPadInfo;
+class MachineModuleInfo;
+class TargetAsmInfo;
+class Timer;
+class raw_ostream;
+
+//===----------------------------------------------------------------------===//
+/// DwarfException - Emits Dwarf exception handling directives.
+///
+class VISIBILITY_HIDDEN DwarfException : public Dwarf {
+ struct FunctionEHFrameInfo {
+ std::string FnName;
+ unsigned Number;
+ unsigned PersonalityIndex;
+ bool hasCalls;
+ bool hasLandingPads;
+ std::vector<MachineMove> Moves;
+ const Function * function;
+
+ FunctionEHFrameInfo(const std::string &FN, unsigned Num, unsigned P,
+ bool hC, bool hL,
+ const std::vector<MachineMove> &M,
+ const Function *f):
+ FnName(FN), Number(Num), PersonalityIndex(P),
+ hasCalls(hC), hasLandingPads(hL), Moves(M), function (f) { }
+ };
+
+ std::vector<FunctionEHFrameInfo> EHFrames;
+
+ /// shouldEmitTable - Per-function flag to indicate if EH tables should
+ /// be emitted.
+ bool shouldEmitTable;
+
+ /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+ /// should be emitted.
+ bool shouldEmitMoves;
+
+ /// shouldEmitTableModule - Per-module flag to indicate if EH tables
+ /// should be emitted.
+ bool shouldEmitTableModule;
+
+ /// shouldEmitFrameModule - Per-module flag to indicate if frame moves
+ /// should be emitted.
+ bool shouldEmitMovesModule;
+
+ /// ExceptionTimer - Timer for the Dwarf exception writer.
+ Timer *ExceptionTimer;
+
+ /// EmitCommonEHFrame - Emit the common eh unwind frame.
+ ///
+ void EmitCommonEHFrame(const Function *Personality, unsigned Index);
+
+ /// EmitEHFrame - Emit function exception frame information.
+ ///
+ void EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo);
+
+ /// EmitExceptionTable - Emit landing pads and actions.
+ ///
+ /// The general organization of the table is complex, but the basic concepts
+ /// are easy. First there is a header which describes the location and
+ /// organization of the three components that follow.
+ /// 1. The landing pad site information describes the range of code covered
+ /// by the try. In our case it's an accumulation of the ranges covered
+ /// by the invokes in the try. There is also a reference to the landing
+ /// pad that handles the exception once processed. Finally an index into
+ /// the actions table.
+ /// 2. The action table, in our case, is composed of pairs of type ids
+ /// and next action offset. Starting with the action index from the
+ /// landing pad site, each type Id is checked for a match to the current
+ /// exception. If it matches then the exception and type id are passed
+ /// on to the landing pad. Otherwise the next action is looked up. This
+ /// chain is terminated with a next action of zero. If no type id is
+ /// found the the frame is unwound and handling continues.
+ /// 3. Type id table contains references to all the C++ typeinfo for all
+ /// catches in the function. This tables is reversed indexed base 1.
+
+ /// SharedTypeIds - How many leading type ids two landing pads have in common.
+ static unsigned SharedTypeIds(const LandingPadInfo *L,
+ const LandingPadInfo *R);
+
+ /// PadLT - Order landing pads lexicographically by type id.
+ static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R);
+
+ struct KeyInfo {
+ static inline unsigned getEmptyKey() { return -1U; }
+ static inline unsigned getTombstoneKey() { return -2U; }
+ static unsigned getHashValue(const unsigned &Key) { return Key; }
+ static bool isEqual(unsigned LHS, unsigned RHS) { return LHS == RHS; }
+ static bool isPod() { return true; }
+ };
+
+ /// ActionEntry - Structure describing an entry in the actions table.
+ struct ActionEntry {
+ int ValueForTypeID; // The value to write - may not be equal to the type id.
+ int NextAction;
+ struct ActionEntry *Previous;
+ };
+
+ /// PadRange - Structure holding a try-range and the associated landing pad.
+ struct PadRange {
+ // The index of the landing pad.
+ unsigned PadIndex;
+ // The index of the begin and end labels in the landing pad's label lists.
+ unsigned RangeIndex;
+ };
+
+ typedef DenseMap<unsigned, PadRange, KeyInfo> RangeMapType;
+
+ /// CallSiteEntry - Structure describing an entry in the call-site table.
+ struct CallSiteEntry {
+ // The 'try-range' is BeginLabel .. EndLabel.
+ unsigned BeginLabel; // zero indicates the start of the function.
+ unsigned EndLabel; // zero indicates the end of the function.
+ // The landing pad starts at PadLabel.
+ unsigned PadLabel; // zero indicates that there is no landing pad.
+ unsigned Action;
+ };
+
+ void EmitExceptionTable();
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfException(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T);
+ virtual ~DwarfException();
+
+ /// SetModuleInfo - Set machine module information when it's known that pass
+ /// manager has created it. Set by the target AsmPrinter.
+ void SetModuleInfo(MachineModuleInfo *mmi) {
+ MMI = mmi;
+ }
+
+ /// BeginModule - Emit all exception information that should come prior to the
+ /// content.
+ void BeginModule(Module *M) {
+ this->M = M;
+ }
+
+ /// EndModule - Emit all exception information that should come after the
+ /// content.
+ void EndModule();
+
+ /// BeginFunction - Gather pre-function exception information. Assumes being
+ /// emitted immediately after the function entry point.
+ void BeginFunction(MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function exception information.
+ void EndFunction();
+};
+
+} // End of namespace llvm
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfLabel.cpp b/lib/CodeGen/AsmPrinter/DwarfLabel.cpp
new file mode 100644
index 0000000..8021b7c
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfLabel.cpp
@@ -0,0 +1,35 @@
+//===--- lib/CodeGen/DwarfLabel.cpp - Dwarf Label -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// DWARF Labels
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfLabel.h"
+#include "llvm/ADT/FoldingSet.h"
+#include <ostream>
+
+using namespace llvm;
+
+/// Profile - Used to gather unique data for the folding set.
+///
+void DWLabel::Profile(FoldingSetNodeID &ID) const {
+ ID.AddString(Tag);
+ ID.AddInteger(Number);
+}
+
+#ifndef NDEBUG
+void DWLabel::print(std::ostream *O) const {
+ if (O) print(*O);
+}
+void DWLabel::print(std::ostream &O) const {
+ O << "." << Tag;
+ if (Number) O << Number;
+}
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfLabel.h b/lib/CodeGen/AsmPrinter/DwarfLabel.h
new file mode 100644
index 0000000..b493903
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfLabel.h
@@ -0,0 +1,56 @@
+//===--- lib/CodeGen/DwarfLabel.h - Dwarf Label -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// DWARF Labels.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFLABEL_H__
+#define CODEGEN_ASMPRINTER_DWARFLABEL_H__
+
+#include "llvm/Support/Compiler.h"
+#include <iosfwd>
+#include <vector>
+
+namespace llvm {
+ class FoldingSetNodeID;
+
+ //===--------------------------------------------------------------------===//
+ /// DWLabel - Labels are used to track locations in the assembler file.
+ /// Labels appear in the form @verbatim <prefix><Tag><Number> @endverbatim,
+ /// where the tag is a category of label (Ex. location) and number is a value
+ /// unique in that category.
+ class VISIBILITY_HIDDEN DWLabel {
+ /// Tag - Label category tag. Should always be a statically declared C
+ /// string.
+ ///
+ const char *Tag;
+
+ /// Number - Value to make label unique.
+ ///
+ unsigned Number;
+ public:
+ DWLabel(const char *T, unsigned N) : Tag(T), Number(N) {}
+
+ // Accessors.
+ const char *getTag() const { return Tag; }
+ unsigned getNumber() const { return Number; }
+
+ /// Profile - Used to gather unique data for the folding set.
+ ///
+ void Profile(FoldingSetNodeID &ID) const;
+
+#ifndef NDEBUG
+ void print(std::ostream *O) const;
+ void print(std::ostream &O) const;
+#endif
+ };
+} // end llvm namespace
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
new file mode 100644
index 0000000..45e7dd3
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
@@ -0,0 +1,235 @@
+//===--- lib/CodeGen/DwarfPrinter.cpp - Dwarf Printer ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Emit general DWARF directives.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfPrinter.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <ostream>
+
+using namespace llvm;
+
+Dwarf::Dwarf(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T,
+ const char *flavor)
+: O(OS), Asm(A), TAI(T), TD(Asm->TM.getTargetData()),
+ RI(Asm->TM.getRegisterInfo()), M(NULL), MF(NULL), MMI(NULL),
+ SubprogramCount(0), Flavor(flavor), SetCounter(1) {}
+
+void Dwarf::PrintRelDirective(bool Force32Bit, bool isInSection) const {
+ if (isInSection && TAI->getDwarfSectionOffsetDirective())
+ O << TAI->getDwarfSectionOffsetDirective();
+ else if (Force32Bit || TD->getPointerSize() == sizeof(int32_t))
+ O << TAI->getData32bitsDirective();
+ else
+ O << TAI->getData64bitsDirective();
+}
+
+/// PrintLabelName - Print label name in form used by Dwarf writer.
+///
+void Dwarf::PrintLabelName(const char *Tag, unsigned Number) const {
+ O << TAI->getPrivateGlobalPrefix() << Tag;
+ if (Number) O << Number;
+}
+void Dwarf::PrintLabelName(const char *Tag, unsigned Number,
+ const char *Suffix) const {
+ O << TAI->getPrivateGlobalPrefix() << Tag;
+ if (Number) O << Number;
+ O << Suffix;
+}
+
+/// EmitLabel - Emit location label for internal use by Dwarf.
+///
+void Dwarf::EmitLabel(const char *Tag, unsigned Number) const {
+ PrintLabelName(Tag, Number);
+ O << ":\n";
+}
+
+/// EmitReference - Emit a reference to a label.
+///
+void Dwarf::EmitReference(const char *Tag, unsigned Number,
+ bool IsPCRelative, bool Force32Bit) const {
+ PrintRelDirective(Force32Bit);
+ PrintLabelName(Tag, Number);
+ if (IsPCRelative) O << "-" << TAI->getPCSymbol();
+}
+void Dwarf::EmitReference(const std::string &Name, bool IsPCRelative,
+ bool Force32Bit) const {
+ PrintRelDirective(Force32Bit);
+ O << Name;
+ if (IsPCRelative) O << "-" << TAI->getPCSymbol();
+}
+
+/// EmitDifference - Emit the difference between two labels. Some assemblers do
+/// not behave with absolute expressions with data directives, so there is an
+/// option (needsSet) to use an intermediary set expression.
+void Dwarf::EmitDifference(const char *TagHi, unsigned NumberHi,
+ const char *TagLo, unsigned NumberLo,
+ bool IsSmall) {
+ if (TAI->needsSet()) {
+ O << "\t.set\t";
+ PrintLabelName("set", SetCounter, Flavor);
+ O << ",";
+ PrintLabelName(TagHi, NumberHi);
+ O << "-";
+ PrintLabelName(TagLo, NumberLo);
+ O << "\n";
+
+ PrintRelDirective(IsSmall);
+ PrintLabelName("set", SetCounter, Flavor);
+ ++SetCounter;
+ } else {
+ PrintRelDirective(IsSmall);
+ PrintLabelName(TagHi, NumberHi);
+ O << "-";
+ PrintLabelName(TagLo, NumberLo);
+ }
+}
+
+void Dwarf::EmitSectionOffset(const char* Label, const char* Section,
+ unsigned LabelNumber, unsigned SectionNumber,
+ bool IsSmall, bool isEH,
+ bool useSet) {
+ bool printAbsolute = false;
+ if (isEH)
+ printAbsolute = TAI->isAbsoluteEHSectionOffsets();
+ else
+ printAbsolute = TAI->isAbsoluteDebugSectionOffsets();
+
+ if (TAI->needsSet() && useSet) {
+ O << "\t.set\t";
+ PrintLabelName("set", SetCounter, Flavor);
+ O << ",";
+ PrintLabelName(Label, LabelNumber);
+
+ if (!printAbsolute) {
+ O << "-";
+ PrintLabelName(Section, SectionNumber);
+ }
+
+ O << "\n";
+ PrintRelDirective(IsSmall);
+ PrintLabelName("set", SetCounter, Flavor);
+ ++SetCounter;
+ } else {
+ PrintRelDirective(IsSmall, true);
+ PrintLabelName(Label, LabelNumber);
+
+ if (!printAbsolute) {
+ O << "-";
+ PrintLabelName(Section, SectionNumber);
+ }
+ }
+}
+
+/// EmitFrameMoves - Emit frame instructions to describe the layout of the
+/// frame.
+void Dwarf::EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID,
+ const std::vector<MachineMove> &Moves, bool isEH) {
+ int stackGrowth =
+ Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
+ TargetFrameInfo::StackGrowsUp ?
+ TD->getPointerSize() : -TD->getPointerSize();
+ bool IsLocal = BaseLabel && strcmp(BaseLabel, "label") == 0;
+
+ for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
+ const MachineMove &Move = Moves[i];
+ unsigned LabelID = Move.getLabelID();
+
+ if (LabelID) {
+ LabelID = MMI->MappedLabel(LabelID);
+
+ // Throw out move if the label is invalid.
+ if (!LabelID) continue;
+ }
+
+ const MachineLocation &Dst = Move.getDestination();
+ const MachineLocation &Src = Move.getSource();
+
+ // Advance row if new location.
+ if (BaseLabel && LabelID && (BaseLabelID != LabelID || !IsLocal)) {
+ Asm->EmitInt8(dwarf::DW_CFA_advance_loc4);
+ Asm->EOL("DW_CFA_advance_loc4");
+ EmitDifference("label", LabelID, BaseLabel, BaseLabelID, true);
+ Asm->EOL();
+
+ BaseLabelID = LabelID;
+ BaseLabel = "label";
+ IsLocal = true;
+ }
+
+ // If advancing cfa.
+ if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+ if (!Src.isReg()) {
+ if (Src.getReg() == MachineLocation::VirtualFP) {
+ Asm->EmitInt8(dwarf::DW_CFA_def_cfa_offset);
+ Asm->EOL("DW_CFA_def_cfa_offset");
+ } else {
+ Asm->EmitInt8(dwarf::DW_CFA_def_cfa);
+ Asm->EOL("DW_CFA_def_cfa");
+ Asm->EmitULEB128Bytes(RI->getDwarfRegNum(Src.getReg(), isEH));
+ Asm->EOL("Register");
+ }
+
+ int Offset = -Src.getOffset();
+
+ Asm->EmitULEB128Bytes(Offset);
+ Asm->EOL("Offset");
+ } else {
+ assert(0 && "Machine move no supported yet.");
+ }
+ } else if (Src.isReg() &&
+ Src.getReg() == MachineLocation::VirtualFP) {
+ if (Dst.isReg()) {
+ Asm->EmitInt8(dwarf::DW_CFA_def_cfa_register);
+ Asm->EOL("DW_CFA_def_cfa_register");
+ Asm->EmitULEB128Bytes(RI->getDwarfRegNum(Dst.getReg(), isEH));
+ Asm->EOL("Register");
+ } else {
+ assert(0 && "Machine move no supported yet.");
+ }
+ } else {
+ unsigned Reg = RI->getDwarfRegNum(Src.getReg(), isEH);
+ int Offset = Dst.getOffset() / stackGrowth;
+
+ if (Offset < 0) {
+ Asm->EmitInt8(dwarf::DW_CFA_offset_extended_sf);
+ Asm->EOL("DW_CFA_offset_extended_sf");
+ Asm->EmitULEB128Bytes(Reg);
+ Asm->EOL("Reg");
+ Asm->EmitSLEB128Bytes(Offset);
+ Asm->EOL("Offset");
+ } else if (Reg < 64) {
+ Asm->EmitInt8(dwarf::DW_CFA_offset + Reg);
+ if (Asm->isVerbose())
+ Asm->EOL("DW_CFA_offset + Reg (" + utostr(Reg) + ")");
+ else
+ Asm->EOL();
+ Asm->EmitULEB128Bytes(Offset);
+ Asm->EOL("Offset");
+ } else {
+ Asm->EmitInt8(dwarf::DW_CFA_offset_extended);
+ Asm->EOL("DW_CFA_offset_extended");
+ Asm->EmitULEB128Bytes(Reg);
+ Asm->EOL("Reg");
+ Asm->EmitULEB128Bytes(Offset);
+ Asm->EOL("Offset");
+ }
+ }
+ }
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.h b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
new file mode 100644
index 0000000..6e75992
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
@@ -0,0 +1,153 @@
+//===--- lib/CodeGen/DwarfPrinter.h - Dwarf Printer -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Emit general DWARF directives.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFPRINTER_H__
+#define CODEGEN_ASMPRINTER_DWARFPRINTER_H__
+
+#include "DwarfLabel.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+
+namespace llvm {
+ class AsmPrinter;
+ class MachineFunction;
+ class MachineModuleInfo;
+ class Module;
+ class TargetAsmInfo;
+ class TargetData;
+ class TargetRegisterInfo;
+
+ class VISIBILITY_HIDDEN Dwarf {
+ protected:
+ //===-------------------------------------------------------------==---===//
+ // Core attributes used by the DWARF printer.
+ //
+
+ /// O - Stream to .s file.
+ ///
+ raw_ostream &O;
+
+ /// Asm - Target of Dwarf emission.
+ ///
+ AsmPrinter *Asm;
+
+ /// TAI - Target asm information.
+ ///
+ const TargetAsmInfo *TAI;
+
+ /// TD - Target data.
+ ///
+ const TargetData *TD;
+
+ /// RI - Register Information.
+ ///
+ const TargetRegisterInfo *RI;
+
+ /// M - Current module.
+ ///
+ Module *M;
+
+ /// MF - Current machine function.
+ ///
+ MachineFunction *MF;
+
+ /// MMI - Collected machine module information.
+ ///
+ MachineModuleInfo *MMI;
+
+ /// SubprogramCount - The running count of functions being compiled.
+ ///
+ unsigned SubprogramCount;
+
+ /// Flavor - A unique string indicating what dwarf producer this is, used to
+ /// unique labels.
+ ///
+ const char * const Flavor;
+
+ /// SetCounter - A unique number for each '.set' directive.
+ ///
+ unsigned SetCounter;
+
+ Dwarf(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T,
+ const char *flavor);
+ public:
+ //===------------------------------------------------------------------===//
+ // Accessors.
+ //
+ const AsmPrinter *getAsm() const { return Asm; }
+ MachineModuleInfo *getMMI() const { return MMI; }
+ const TargetAsmInfo *getTargetAsmInfo() const { return TAI; }
+ const TargetData *getTargetData() const { return TD; }
+
+ void PrintRelDirective(bool Force32Bit = false,
+ bool isInSection = false) const;
+
+
+ /// PrintLabelName - Print label name in form used by Dwarf writer.
+ ///
+ void PrintLabelName(const DWLabel &Label) const {
+ PrintLabelName(Label.getTag(), Label.getNumber());
+ }
+ void PrintLabelName(const char *Tag, unsigned Number) const;
+ void PrintLabelName(const char *Tag, unsigned Number,
+ const char *Suffix) const;
+
+ /// EmitLabel - Emit location label for internal use by Dwarf.
+ ///
+ void EmitLabel(const DWLabel &Label) const {
+ EmitLabel(Label.getTag(), Label.getNumber());
+ }
+ void EmitLabel(const char *Tag, unsigned Number) const;
+
+ /// EmitReference - Emit a reference to a label.
+ ///
+ void EmitReference(const DWLabel &Label, bool IsPCRelative = false,
+ bool Force32Bit = false) const {
+ EmitReference(Label.getTag(), Label.getNumber(),
+ IsPCRelative, Force32Bit);
+ }
+ void EmitReference(const char *Tag, unsigned Number,
+ bool IsPCRelative = false,
+ bool Force32Bit = false) const;
+ void EmitReference(const std::string &Name, bool IsPCRelative = false,
+ bool Force32Bit = false) const;
+
+ /// EmitDifference - Emit the difference between two labels. Some
+ /// assemblers do not behave with absolute expressions with data directives,
+ /// so there is an option (needsSet) to use an intermediary set expression.
+ void EmitDifference(const DWLabel &LabelHi, const DWLabel &LabelLo,
+ bool IsSmall = false) {
+ EmitDifference(LabelHi.getTag(), LabelHi.getNumber(),
+ LabelLo.getTag(), LabelLo.getNumber(),
+ IsSmall);
+ }
+ void EmitDifference(const char *TagHi, unsigned NumberHi,
+ const char *TagLo, unsigned NumberLo,
+ bool IsSmall = false);
+
+ void EmitSectionOffset(const char* Label, const char* Section,
+ unsigned LabelNumber, unsigned SectionNumber,
+ bool IsSmall = false, bool isEH = false,
+ bool useSet = true);
+
+ /// EmitFrameMoves - Emit frame instructions to describe the layout of the
+ /// frame.
+ void EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID,
+ const std::vector<MachineMove> &Moves, bool isEH);
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
new file mode 100644
index 0000000..483ee559
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
@@ -0,0 +1,129 @@
+//===-- llvm/CodeGen/DwarfWriter.cpp - Dwarf Framework --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "DwarfDebug.h"
+#include "DwarfException.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+
+using namespace llvm;
+
+static RegisterPass<DwarfWriter>
+X("dwarfwriter", "DWARF Information Writer");
+char DwarfWriter::ID = 0;
+
+//===----------------------------------------------------------------------===//
+/// DwarfWriter Implementation
+///
+
+DwarfWriter::DwarfWriter()
+ : ImmutablePass(&ID), DD(0), DE(0) {}
+
+DwarfWriter::~DwarfWriter() {
+ delete DE;
+ delete DD;
+}
+
+/// BeginModule - Emit all Dwarf sections that should come prior to the
+/// content.
+void DwarfWriter::BeginModule(Module *M,
+ MachineModuleInfo *MMI,
+ raw_ostream &OS, AsmPrinter *A,
+ const TargetAsmInfo *T) {
+ DE = new DwarfException(OS, A, T);
+ DD = new DwarfDebug(OS, A, T);
+ DE->BeginModule(M);
+ DD->BeginModule(M);
+ DD->SetDebugInfo(MMI);
+ DE->SetModuleInfo(MMI);
+}
+
+/// EndModule - Emit all Dwarf sections that should come after the content.
+///
+void DwarfWriter::EndModule() {
+ DE->EndModule();
+ DD->EndModule();
+}
+
+/// BeginFunction - Gather pre-function debug information. Assumes being
+/// emitted immediately after the function entry point.
+void DwarfWriter::BeginFunction(MachineFunction *MF) {
+ DE->BeginFunction(MF);
+ DD->BeginFunction(MF);
+}
+
+/// EndFunction - Gather and emit post-function debug information.
+///
+void DwarfWriter::EndFunction(MachineFunction *MF) {
+ DD->EndFunction(MF);
+ DE->EndFunction();
+
+ if (MachineModuleInfo *MMI = DD->getMMI() ? DD->getMMI() : DE->getMMI())
+ // Clear function debug information.
+ MMI->EndFunction();
+}
+
+/// RecordSourceLine - Records location information and associates it with a
+/// label. Returns a unique label ID used to generate a label and provide
+/// correspondence to the source line list.
+unsigned DwarfWriter::RecordSourceLine(unsigned Line, unsigned Col,
+ DICompileUnit CU) {
+ return DD->RecordSourceLine(Line, Col, CU);
+}
+
+/// RecordRegionStart - Indicate the start of a region.
+unsigned DwarfWriter::RecordRegionStart(GlobalVariable *V) {
+ return DD->RecordRegionStart(V);
+}
+
+/// RecordRegionEnd - Indicate the end of a region.
+unsigned DwarfWriter::RecordRegionEnd(GlobalVariable *V) {
+ return DD->RecordRegionEnd(V);
+}
+
+/// getRecordSourceLineCount - Count source lines.
+unsigned DwarfWriter::getRecordSourceLineCount() {
+ return DD->getRecordSourceLineCount();
+}
+
+/// RecordVariable - Indicate the declaration of a local variable.
+///
+void DwarfWriter::RecordVariable(GlobalVariable *GV, unsigned FrameIndex,
+ const MachineInstr *MI) {
+ DD->RecordVariable(GV, FrameIndex, MI);
+}
+
+/// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should
+/// be emitted.
+bool DwarfWriter::ShouldEmitDwarfDebug() const {
+ return DD && DD->ShouldEmitDwarfDebug();
+}
+
+//// RecordInlinedFnStart - Global variable GV is inlined at the location marked
+//// by LabelID label.
+unsigned DwarfWriter::RecordInlinedFnStart(DISubprogram SP, DICompileUnit CU,
+ unsigned Line, unsigned Col) {
+ return DD->RecordInlinedFnStart(SP, CU, Line, Col);
+}
+
+/// RecordInlinedFnEnd - Indicate the end of inlined subroutine.
+unsigned DwarfWriter::RecordInlinedFnEnd(DISubprogram SP) {
+ return DD->RecordInlinedFnEnd(SP);
+}
+
+/// RecordVariableScope - Record scope for the variable declared by
+/// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE.
+void DwarfWriter::RecordVariableScope(DIVariable &DV,
+ const MachineInstr *DeclareMI) {
+ DD->RecordVariableScope(DV, DeclareMI);
+}
diff --git a/lib/CodeGen/AsmPrinter/Makefile b/lib/CodeGen/AsmPrinter/Makefile
new file mode 100644
index 0000000..cb5b3f6
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/CodeGen/SelectionDAG/Makefile -------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMAsmPrinter
+PARALLEL_DIRS =
+BUILD_ARCHIVE = 1
+DONT_BUILD_RELINKED = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
new file mode 100644
index 0000000..8ba903a
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -0,0 +1,160 @@
+//===-- OcamlGCPrinter.cpp - Ocaml frametable emitter ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements printing the assembly code for an Ocaml frametable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/Module.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+ class VISIBILITY_HIDDEN OcamlGCMetadataPrinter : public GCMetadataPrinter {
+ public:
+ void beginAssembly(raw_ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI);
+
+ void finishAssembly(raw_ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI);
+ };
+
+}
+
+static GCMetadataPrinterRegistry::Add<OcamlGCMetadataPrinter>
+Y("ocaml", "ocaml 3.10-compatible collector");
+
+void llvm::linkOcamlGCPrinter() { }
+
+static void EmitCamlGlobal(const Module &M, raw_ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI, const char *Id) {
+ const std::string &MId = M.getModuleIdentifier();
+
+ std::string Mangled;
+ Mangled += TAI.getGlobalPrefix();
+ Mangled += "caml";
+ size_t Letter = Mangled.size();
+ Mangled.append(MId.begin(), std::find(MId.begin(), MId.end(), '.'));
+ Mangled += "__";
+ Mangled += Id;
+
+ // Capitalize the first letter of the module name.
+ Mangled[Letter] = toupper(Mangled[Letter]);
+
+ if (const char *GlobalDirective = TAI.getGlobalDirective())
+ OS << GlobalDirective << Mangled << "\n";
+ OS << Mangled << ":\n";
+}
+
+void OcamlGCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI) {
+ AP.SwitchToSection(TAI.getTextSection());
+ EmitCamlGlobal(getModule(), OS, AP, TAI, "code_begin");
+
+ AP.SwitchToSection(TAI.getDataSection());
+ EmitCamlGlobal(getModule(), OS, AP, TAI, "data_begin");
+}
+
+/// emitAssembly - Print the frametable. The ocaml frametable format is thus:
+///
+/// extern "C" struct align(sizeof(intptr_t)) {
+/// uint16_t NumDescriptors;
+/// struct align(sizeof(intptr_t)) {
+/// void *ReturnAddress;
+/// uint16_t FrameSize;
+/// uint16_t NumLiveOffsets;
+/// uint16_t LiveOffsets[NumLiveOffsets];
+/// } Descriptors[NumDescriptors];
+/// } caml${module}__frametable;
+///
+/// Note that this precludes programs from stack frames larger than 64K
+/// (FrameSize and LiveOffsets would overflow). FrameTablePrinter will abort if
+/// either condition is detected in a function which uses the GC.
+///
+void OcamlGCMetadataPrinter::finishAssembly(raw_ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI) {
+ const char *AddressDirective;
+ int AddressAlignLog;
+ if (AP.TM.getTargetData()->getPointerSize() == sizeof(int32_t)) {
+ AddressDirective = TAI.getData32bitsDirective();
+ AddressAlignLog = 2;
+ } else {
+ AddressDirective = TAI.getData64bitsDirective();
+ AddressAlignLog = 3;
+ }
+
+ AP.SwitchToSection(TAI.getTextSection());
+ EmitCamlGlobal(getModule(), OS, AP, TAI, "code_end");
+
+ AP.SwitchToSection(TAI.getDataSection());
+ EmitCamlGlobal(getModule(), OS, AP, TAI, "data_end");
+
+ OS << AddressDirective << 0; // FIXME: Why does ocaml emit this??
+ AP.EOL();
+
+ AP.SwitchToSection(TAI.getDataSection());
+ EmitCamlGlobal(getModule(), OS, AP, TAI, "frametable");
+
+ for (iterator I = begin(), IE = end(); I != IE; ++I) {
+ GCFunctionInfo &FI = **I;
+
+ uint64_t FrameSize = FI.getFrameSize();
+ if (FrameSize >= 1<<16) {
+ cerr << "Function '" << FI.getFunction().getNameStart()
+ << "' is too large for the ocaml GC! "
+ << "Frame size " << FrameSize << " >= 65536.\n";
+ cerr << "(" << uintptr_t(&FI) << ")\n";
+ abort(); // Very rude!
+ }
+
+ OS << "\t" << TAI.getCommentString() << " live roots for "
+ << FI.getFunction().getNameStart() << "\n";
+
+ for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
+ size_t LiveCount = FI.live_size(J);
+ if (LiveCount >= 1<<16) {
+ cerr << "Function '" << FI.getFunction().getNameStart()
+ << "' is too large for the ocaml GC! "
+ << "Live root count " << LiveCount << " >= 65536.\n";
+ abort(); // Very rude!
+ }
+
+ OS << AddressDirective
+ << TAI.getPrivateGlobalPrefix() << "label" << J->Num;
+ AP.EOL("call return address");
+
+ AP.EmitInt16(FrameSize);
+ AP.EOL("stack frame size");
+
+ AP.EmitInt16(LiveCount);
+ AP.EOL("live root count");
+
+ for (GCFunctionInfo::live_iterator K = FI.live_begin(J),
+ KE = FI.live_end(J); K != KE; ++K) {
+ assert(K->StackOffset < 1<<16 &&
+ "GC root stack offset is outside of fixed stack frame and out "
+ "of range for ocaml GC!");
+
+ OS << "\t.word\t" << K->StackOffset;
+ AP.EOL("stack offset");
+ }
+
+ AP.EmitAlignment(AddressAlignLog);
+ }
+ }
+}
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
new file mode 100644
index 0000000..2635303
--- /dev/null
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -0,0 +1,1204 @@
+//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass forwards branches to unconditional branches to make them branch
+// directly to the target block. This pass often results in dead MBB's, which
+// it then removes.
+//
+// Note that this pass must be run after register allocation, it cannot handle
+// SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "branchfolding"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+STATISTIC(NumBranchOpts, "Number of branches optimized");
+STATISTIC(NumTailMerge , "Number of block tails merged");
+static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
+ cl::init(cl::BOU_UNSET), cl::Hidden);
+// Throttle for huge numbers of predecessors (compile speed problems)
+static cl::opt<unsigned>
+TailMergeThreshold("tail-merge-threshold",
+ cl::desc("Max number of predecessors to consider tail merging"),
+ cl::init(150), cl::Hidden);
+
+namespace {
+ struct VISIBILITY_HIDDEN BranchFolder : public MachineFunctionPass {
+ static char ID;
+ explicit BranchFolder(bool defaultEnableTailMerge) :
+ MachineFunctionPass(&ID) {
+ switch (FlagEnableTailMerge) {
+ case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
+ case cl::BOU_TRUE: EnableTailMerge = true; break;
+ case cl::BOU_FALSE: EnableTailMerge = false; break;
+ }
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char *getPassName() const { return "Control Flow Optimizer"; }
+ const TargetInstrInfo *TII;
+ MachineModuleInfo *MMI;
+ bool MadeChange;
+ private:
+ // Tail Merging.
+ bool EnableTailMerge;
+ bool TailMergeBlocks(MachineFunction &MF);
+ bool TryMergeBlocks(MachineBasicBlock* SuccBB,
+ MachineBasicBlock* PredBB);
+ void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+ MachineBasicBlock *NewDest);
+ MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
+ MachineBasicBlock::iterator BBI1);
+ unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength);
+ void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
+ MachineBasicBlock* PredBB);
+ unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+ unsigned maxCommonTailLength);
+
+ typedef std::pair<unsigned,MachineBasicBlock*> MergePotentialsElt;
+ typedef std::vector<MergePotentialsElt>::iterator MPIterator;
+ std::vector<MergePotentialsElt> MergePotentials;
+
+ typedef std::pair<MPIterator, MachineBasicBlock::iterator> SameTailElt;
+ std::vector<SameTailElt> SameTails;
+
+ const TargetRegisterInfo *RegInfo;
+ RegScavenger *RS;
+ // Branch optzn.
+ bool OptimizeBranches(MachineFunction &MF);
+ void OptimizeBlock(MachineBasicBlock *MBB);
+ void RemoveDeadBlock(MachineBasicBlock *MBB);
+ bool OptimizeImpDefsBlock(MachineBasicBlock *MBB);
+
+ bool CanFallThrough(MachineBasicBlock *CurBB);
+ bool CanFallThrough(MachineBasicBlock *CurBB, bool BranchUnAnalyzable,
+ MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond);
+ };
+ char BranchFolder::ID = 0;
+}
+
+FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) {
+ return new BranchFolder(DefaultEnableTailMerge); }
+
+/// RemoveDeadBlock - Remove the specified dead machine basic block from the
+/// function, updating the CFG.
+void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
+ assert(MBB->pred_empty() && "MBB must be dead!");
+ DOUT << "\nRemoving MBB: " << *MBB;
+
+ MachineFunction *MF = MBB->getParent();
+ // drop all successors.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_end()-1);
+
+ // If there are any labels in the basic block, unregister them from
+ // MachineModuleInfo.
+ if (MMI && !MBB->empty()) {
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if (I->isLabel())
+ // The label ID # is always operand #0, an immediate.
+ MMI->InvalidateLabel(I->getOperand(0).getImm());
+ }
+ }
+
+ // Remove the block.
+ MF->erase(MBB);
+}
+
+/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def
+/// followed by terminators, and if the implicitly defined registers are not
+/// used by the terminators, remove those implicit_def's. e.g.
+/// BB1:
+/// r0 = implicit_def
+/// r1 = implicit_def
+/// br
+/// This block can be optimized away later if the implicit instructions are
+/// removed.
+bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
+ SmallSet<unsigned, 4> ImpDefRegs;
+ MachineBasicBlock::iterator I = MBB->begin();
+ while (I != MBB->end()) {
+ if (I->getOpcode() != TargetInstrInfo::IMPLICIT_DEF)
+ break;
+ unsigned Reg = I->getOperand(0).getReg();
+ ImpDefRegs.insert(Reg);
+ for (const unsigned *SubRegs = RegInfo->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs)
+ ImpDefRegs.insert(SubReg);
+ ++I;
+ }
+ if (ImpDefRegs.empty())
+ return false;
+
+ MachineBasicBlock::iterator FirstTerm = I;
+ while (I != MBB->end()) {
+ if (!TII->isUnpredicatedTerminator(I))
+ return false;
+ // See if it uses any of the implicitly defined registers.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = I->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (ImpDefRegs.count(Reg))
+ return false;
+ }
+ ++I;
+ }
+
+ I = MBB->begin();
+ while (I != FirstTerm) {
+ MachineInstr *ImpDefMI = &*I;
+ ++I;
+ MBB->erase(ImpDefMI);
+ }
+
+ return true;
+}
+
+bool BranchFolder::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ if (!TII) return false;
+
+ RegInfo = MF.getTarget().getRegisterInfo();
+
+ // Fix CFG. The later algorithms expect it to be right.
+ bool EverMadeChange = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) {
+ MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true))
+ EverMadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+ EverMadeChange |= OptimizeImpDefsBlock(MBB);
+ }
+
+ RS = RegInfo->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL;
+
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+
+ bool MadeChangeThisIteration = true;
+ while (MadeChangeThisIteration) {
+ MadeChangeThisIteration = false;
+ MadeChangeThisIteration |= TailMergeBlocks(MF);
+ MadeChangeThisIteration |= OptimizeBranches(MF);
+ EverMadeChange |= MadeChangeThisIteration;
+ }
+
+ // See if any jump tables have become mergable or dead as the code generator
+ // did its thing.
+ MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JTs = JTI->getJumpTables();
+ if (!JTs.empty()) {
+ // Figure out how these jump tables should be merged.
+ std::vector<unsigned> JTMapping;
+ JTMapping.reserve(JTs.size());
+
+ // We always keep the 0th jump table.
+ JTMapping.push_back(0);
+
+ // Scan the jump tables, seeing if there are any duplicates. Note that this
+ // is N^2, which should be fixed someday.
+ for (unsigned i = 1, e = JTs.size(); i != e; ++i)
+ JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs));
+
+ // If a jump table was merge with another one, walk the function rewriting
+ // references to jump tables to reference the new JT ID's. Keep track of
+ // whether we see a jump table idx, if not, we can delete the JT.
+ BitVector JTIsLive(JTs.size());
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
+ BB != E; ++BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+ I != E; ++I)
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
+ MachineOperand &Op = I->getOperand(op);
+ if (!Op.isJTI()) continue;
+ unsigned NewIdx = JTMapping[Op.getIndex()];
+ Op.setIndex(NewIdx);
+
+ // Remember that this JT is live.
+ JTIsLive.set(NewIdx);
+ }
+ }
+
+ // Finally, remove dead jump tables. This happens either because the
+ // indirect jump was unreachable (and thus deleted) or because the jump
+ // table was merged with some other one.
+ for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
+ if (!JTIsLive.test(i)) {
+ JTI->RemoveJumpTable(i);
+ EverMadeChange = true;
+ }
+ }
+
+ delete RS;
+ return EverMadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Tail Merging of Blocks
+//===----------------------------------------------------------------------===//
+
+/// HashMachineInstr - Compute a hash value for MI and its operands.
+static unsigned HashMachineInstr(const MachineInstr *MI) {
+ unsigned Hash = MI->getOpcode();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &Op = MI->getOperand(i);
+
+ // Merge in bits from the operand if easy.
+ unsigned OperandHash = 0;
+ switch (Op.getType()) {
+ case MachineOperand::MO_Register: OperandHash = Op.getReg(); break;
+ case MachineOperand::MO_Immediate: OperandHash = Op.getImm(); break;
+ case MachineOperand::MO_MachineBasicBlock:
+ OperandHash = Op.getMBB()->getNumber();
+ break;
+ case MachineOperand::MO_FrameIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_JumpTableIndex:
+ OperandHash = Op.getIndex();
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ // Global address / external symbol are too hard, don't bother, but do
+ // pull in the offset.
+ OperandHash = Op.getOffset();
+ break;
+ default: break;
+ }
+
+ Hash += ((OperandHash << 3) | Op.getType()) << (i&31);
+ }
+ return Hash;
+}
+
+/// HashEndOfMBB - Hash the last few instructions in the MBB. For blocks
+/// with no successors, we hash two instructions, because cross-jumping
+/// only saves code when at least two instructions are removed (since a
+/// branch must be inserted). For blocks with a successor, one of the
+/// two blocks to be tail-merged will end with a branch already, so
+/// it gains to cross-jump even for one instruction.
+
+static unsigned HashEndOfMBB(const MachineBasicBlock *MBB,
+ unsigned minCommonTailLength) {
+ MachineBasicBlock::const_iterator I = MBB->end();
+ if (I == MBB->begin())
+ return 0; // Empty MBB.
+
+ --I;
+ unsigned Hash = HashMachineInstr(I);
+
+ if (I == MBB->begin() || minCommonTailLength == 1)
+ return Hash; // Single instr MBB.
+
+ --I;
+ // Hash in the second-to-last instruction.
+ Hash ^= HashMachineInstr(I) << 2;
+ return Hash;
+}
+
+/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
+/// of instructions they actually have in common together at their end. Return
+/// iterators for the first shared instruction in each block.
+static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2,
+ MachineBasicBlock::iterator &I1,
+ MachineBasicBlock::iterator &I2) {
+ I1 = MBB1->end();
+ I2 = MBB2->end();
+
+ unsigned TailLen = 0;
+ while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
+ --I1; --I2;
+ if (!I1->isIdenticalTo(I2) ||
+ // FIXME: This check is dubious. It's used to get around a problem where
+ // people incorrectly expect inline asm directives to remain in the same
+ // relative order. This is untenable because normal compiler
+ // optimizations (like this one) may reorder and/or merge these
+ // directives.
+ I1->getOpcode() == TargetInstrInfo::INLINEASM) {
+ ++I1; ++I2;
+ break;
+ }
+ ++TailLen;
+ }
+ return TailLen;
+}
+
+/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
+/// after it, replacing it with an unconditional branch to NewDest. This
+/// returns true if OldInst's block is modified, false if NewDest is modified.
+void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+ MachineBasicBlock *NewDest) {
+ MachineBasicBlock *OldBB = OldInst->getParent();
+
+ // Remove all the old successors of OldBB from the CFG.
+ while (!OldBB->succ_empty())
+ OldBB->removeSuccessor(OldBB->succ_begin());
+
+ // Remove all the dead instructions from the end of OldBB.
+ OldBB->erase(OldInst, OldBB->end());
+
+ // If OldBB isn't immediately before OldBB, insert a branch to it.
+ if (++MachineFunction::iterator(OldBB) != MachineFunction::iterator(NewDest))
+ TII->InsertBranch(*OldBB, NewDest, 0, SmallVector<MachineOperand, 0>());
+ OldBB->addSuccessor(NewDest);
+ ++NumTailMerge;
+}
+
+/// SplitMBBAt - Given a machine basic block and an iterator into it, split the
+/// MBB so that the part before the iterator falls into the part starting at the
+/// iterator. This returns the new MBB.
+MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
+ MachineBasicBlock::iterator BBI1) {
+ MachineFunction &MF = *CurMBB.getParent();
+
+ // Create the fall-through block.
+ MachineFunction::iterator MBBI = &CurMBB;
+ MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(CurMBB.getBasicBlock());
+ CurMBB.getParent()->insert(++MBBI, NewMBB);
+
+ // Move all the successors of this block to the specified block.
+ NewMBB->transferSuccessors(&CurMBB);
+
+ // Add an edge from CurMBB to NewMBB for the fall-through.
+ CurMBB.addSuccessor(NewMBB);
+
+ // Splice the code over.
+ NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
+
+ // For targets that use the register scavenger, we must maintain LiveIns.
+ if (RS) {
+ RS->enterBasicBlock(&CurMBB);
+ if (!CurMBB.empty())
+ RS->forward(prior(CurMBB.end()));
+ BitVector RegsLiveAtExit(RegInfo->getNumRegs());
+ RS->getRegsUsed(RegsLiveAtExit, false);
+ for (unsigned int i=0, e=RegInfo->getNumRegs(); i!=e; i++)
+ if (RegsLiveAtExit[i])
+ NewMBB->addLiveIn(i);
+ }
+
+ return NewMBB;
+}
+
+/// EstimateRuntime - Make a rough estimate for how long it will take to run
+/// the specified code.
+static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E) {
+ unsigned Time = 0;
+ for (; I != E; ++I) {
+ const TargetInstrDesc &TID = I->getDesc();
+ if (TID.isCall())
+ Time += 10;
+ else if (TID.mayLoad() || TID.mayStore())
+ Time += 2;
+ else
+ ++Time;
+ }
+ return Time;
+}
+
+// CurMBB needs to add an unconditional branch to SuccMBB (we removed these
+// branches temporarily for tail merging). In the case where CurMBB ends
+// with a conditional branch to the next block, optimize by reversing the
+// test and conditionally branching to SuccMBB instead.
+
+static void FixTail(MachineBasicBlock* CurMBB, MachineBasicBlock *SuccBB,
+ const TargetInstrInfo *TII) {
+ MachineFunction *MF = CurMBB->getParent();
+ MachineFunction::iterator I = next(MachineFunction::iterator(CurMBB));
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (I != MF->end() &&
+ !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
+ MachineBasicBlock *NextBB = I;
+ if (TBB == NextBB && !Cond.empty() && !FBB) {
+ if (!TII->ReverseBranchCondition(Cond)) {
+ TII->RemoveBranch(*CurMBB);
+ TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond);
+ return;
+ }
+ }
+ }
+ TII->InsertBranch(*CurMBB, SuccBB, NULL, SmallVector<MachineOperand, 0>());
+}
+
+static bool MergeCompare(const std::pair<unsigned,MachineBasicBlock*> &p,
+ const std::pair<unsigned,MachineBasicBlock*> &q) {
+ if (p.first < q.first)
+ return true;
+ else if (p.first > q.first)
+ return false;
+ else if (p.second->getNumber() < q.second->getNumber())
+ return true;
+ else if (p.second->getNumber() > q.second->getNumber())
+ return false;
+ else {
+ // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
+ // an object with itself.
+#ifndef _GLIBCXX_DEBUG
+ assert(0 && "Predecessor appears twice");
+#endif
+ return false;
+ }
+}
+
+/// ComputeSameTails - Look through all the blocks in MergePotentials that have
+/// hash CurHash (guaranteed to match the last element). Build the vector
+/// SameTails of all those that have the (same) largest number of instructions
+/// in common of any pair of these blocks. SameTails entries contain an
+/// iterator into MergePotentials (from which the MachineBasicBlock can be
+/// found) and a MachineBasicBlock::iterator into that MBB indicating the
+/// instruction where the matching code sequence begins.
+/// Order of elements in SameTails is the reverse of the order in which
+/// those blocks appear in MergePotentials (where they are not necessarily
+/// consecutive).
+unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
+ unsigned minCommonTailLength) {
+ unsigned maxCommonTailLength = 0U;
+ SameTails.clear();
+ MachineBasicBlock::iterator TrialBBI1, TrialBBI2;
+ MPIterator HighestMPIter = prior(MergePotentials.end());
+ for (MPIterator CurMPIter = prior(MergePotentials.end()),
+ B = MergePotentials.begin();
+ CurMPIter!=B && CurMPIter->first==CurHash;
+ --CurMPIter) {
+ for (MPIterator I = prior(CurMPIter); I->first==CurHash ; --I) {
+ unsigned CommonTailLen = ComputeCommonTailLength(
+ CurMPIter->second,
+ I->second,
+ TrialBBI1, TrialBBI2);
+ // If we will have to split a block, there should be at least
+ // minCommonTailLength instructions in common; if not, at worst
+ // we will be replacing a fallthrough into the common tail with a
+ // branch, which at worst breaks even with falling through into
+ // the duplicated common tail, so 1 instruction in common is enough.
+ // We will always pick a block we do not have to split as the common
+ // tail if there is one.
+ // (Empty blocks will get forwarded and need not be considered.)
+ if (CommonTailLen >= minCommonTailLength ||
+ (CommonTailLen > 0 &&
+ (TrialBBI1==CurMPIter->second->begin() ||
+ TrialBBI2==I->second->begin()))) {
+ if (CommonTailLen > maxCommonTailLength) {
+ SameTails.clear();
+ maxCommonTailLength = CommonTailLen;
+ HighestMPIter = CurMPIter;
+ SameTails.push_back(std::make_pair(CurMPIter, TrialBBI1));
+ }
+ if (HighestMPIter == CurMPIter &&
+ CommonTailLen == maxCommonTailLength)
+ SameTails.push_back(std::make_pair(I, TrialBBI2));
+ }
+ if (I==B)
+ break;
+ }
+ }
+ return maxCommonTailLength;
+}
+
+/// RemoveBlocksWithHash - Remove all blocks with hash CurHash from
+/// MergePotentials, restoring branches at ends of blocks as appropriate.
+void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
+ MachineBasicBlock* SuccBB,
+ MachineBasicBlock* PredBB) {
+ MPIterator CurMPIter, B;
+ for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin();
+ CurMPIter->first==CurHash;
+ --CurMPIter) {
+ // Put the unconditional branch back, if we need one.
+ MachineBasicBlock *CurMBB = CurMPIter->second;
+ if (SuccBB && CurMBB != PredBB)
+ FixTail(CurMBB, SuccBB, TII);
+ if (CurMPIter==B)
+ break;
+ }
+ if (CurMPIter->first!=CurHash)
+ CurMPIter++;
+ MergePotentials.erase(CurMPIter, MergePotentials.end());
+}
+
+/// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist
+/// only of the common tail. Create a block that does by splitting one.
+unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+ unsigned maxCommonTailLength) {
+ unsigned i, commonTailIndex;
+ unsigned TimeEstimate = ~0U;
+ for (i=0, commonTailIndex=0; i<SameTails.size(); i++) {
+ // Use PredBB if possible; that doesn't require a new branch.
+ if (SameTails[i].first->second==PredBB) {
+ commonTailIndex = i;
+ break;
+ }
+ // Otherwise, make a (fairly bogus) choice based on estimate of
+ // how long it will take the various blocks to execute.
+ unsigned t = EstimateRuntime(SameTails[i].first->second->begin(),
+ SameTails[i].second);
+ if (t<=TimeEstimate) {
+ TimeEstimate = t;
+ commonTailIndex = i;
+ }
+ }
+
+ MachineBasicBlock::iterator BBI = SameTails[commonTailIndex].second;
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second;
+
+ DOUT << "\nSplitting " << MBB->getNumber() << ", size " <<
+ maxCommonTailLength;
+
+ MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI);
+ SameTails[commonTailIndex].first->second = newMBB;
+ SameTails[commonTailIndex].second = newMBB->begin();
+ // If we split PredBB, newMBB is the new predecessor.
+ if (PredBB==MBB)
+ PredBB = newMBB;
+
+ return commonTailIndex;
+}
+
+// See if any of the blocks in MergePotentials (which all have a common single
+// successor, or all have no successor) can be tail-merged. If there is a
+// successor, any blocks in MergePotentials that are not tail-merged and
+// are not immediately before Succ must have an unconditional branch to
+// Succ added (but the predecessor/successor lists need no adjustment).
+// The lone predecessor of Succ that falls through into Succ,
+// if any, is given in PredBB.
+
+bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB,
+ MachineBasicBlock* PredBB) {
+ // It doesn't make sense to save a single instruction since tail merging
+ // will add a jump.
+ // FIXME: Ask the target to provide the threshold?
+ unsigned minCommonTailLength = (SuccBB ? 1 : 2) + 1;
+ MadeChange = false;
+
+ DOUT << "\nTryMergeBlocks " << MergePotentials.size() << '\n';
+
+ // Sort by hash value so that blocks with identical end sequences sort
+ // together.
+ std::stable_sort(MergePotentials.begin(), MergePotentials.end(),MergeCompare);
+
+ // Walk through equivalence sets looking for actual exact matches.
+ while (MergePotentials.size() > 1) {
+ unsigned CurHash = prior(MergePotentials.end())->first;
+
+ // Build SameTails, identifying the set of blocks with this hash code
+ // and with the maximum number of instructions in common.
+ unsigned maxCommonTailLength = ComputeSameTails(CurHash,
+ minCommonTailLength);
+
+ // If we didn't find any pair that has at least minCommonTailLength
+ // instructions in common, remove all blocks with this hash code and retry.
+ if (SameTails.empty()) {
+ RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
+ continue;
+ }
+
+ // If one of the blocks is the entire common tail (and not the entry
+ // block, which we can't jump to), we can treat all blocks with this same
+ // tail at once. Use PredBB if that is one of the possibilities, as that
+ // will not introduce any extra branches.
+ MachineBasicBlock *EntryBB = MergePotentials.begin()->second->
+ getParent()->begin();
+ unsigned int commonTailIndex, i;
+ for (commonTailIndex=SameTails.size(), i=0; i<SameTails.size(); i++) {
+ MachineBasicBlock *MBB = SameTails[i].first->second;
+ if (MBB->begin() == SameTails[i].second && MBB != EntryBB) {
+ commonTailIndex = i;
+ if (MBB==PredBB)
+ break;
+ }
+ }
+
+ if (commonTailIndex==SameTails.size()) {
+ // None of the blocks consist entirely of the common tail.
+ // Split a block so that one does.
+ commonTailIndex = CreateCommonTailOnlyBlock(PredBB, maxCommonTailLength);
+ }
+
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second;
+ // MBB is common tail. Adjust all other BB's to jump to this one.
+ // Traversal must be forwards so erases work.
+ DOUT << "\nUsing common tail " << MBB->getNumber() << " for ";
+ for (unsigned int i=0; i<SameTails.size(); ++i) {
+ if (commonTailIndex==i)
+ continue;
+ DOUT << SameTails[i].first->second->getNumber() << ",";
+ // Hack the end off BB i, making it jump to BB commonTailIndex instead.
+ ReplaceTailWithBranchTo(SameTails[i].second, MBB);
+ // BB i is no longer a predecessor of SuccBB; remove it from the worklist.
+ MergePotentials.erase(SameTails[i].first);
+ }
+ DOUT << "\n";
+ // We leave commonTailIndex in the worklist in case there are other blocks
+ // that match it with a smaller number of instructions.
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
+
+ if (!EnableTailMerge) return false;
+
+ MadeChange = false;
+
+ // First find blocks with no successors.
+ MergePotentials.clear();
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ if (I->succ_empty())
+ MergePotentials.push_back(std::make_pair(HashEndOfMBB(I, 2U), I));
+ }
+ // See if we can do any tail merging on those.
+ if (MergePotentials.size() < TailMergeThreshold &&
+ MergePotentials.size() >= 2)
+ MadeChange |= TryMergeBlocks(NULL, NULL);
+
+ // Look at blocks (IBB) with multiple predecessors (PBB).
+ // We change each predecessor to a canonical form, by
+ // (1) temporarily removing any unconditional branch from the predecessor
+ // to IBB, and
+ // (2) alter conditional branches so they branch to the other block
+ // not IBB; this may require adding back an unconditional branch to IBB
+ // later, where there wasn't one coming in. E.g.
+ // Bcc IBB
+ // fallthrough to QBB
+ // here becomes
+ // Bncc QBB
+ // with a conceptual B to IBB after that, which never actually exists.
+ // With those changes, we see whether the predecessors' tails match,
+ // and merge them if so. We change things out of canonical form and
+ // back to the way they were later in the process. (OptimizeBranches
+ // would undo some of this, but we can't use it, because we'd get into
+ // a compile-time infinite loop repeatedly doing and undoing the same
+ // transformations.)
+
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) {
+ MachineBasicBlock *IBB = I;
+ MachineBasicBlock *PredBB = prior(I);
+ MergePotentials.clear();
+ for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
+ E2 = I->pred_end();
+ P != E2; ++P) {
+ MachineBasicBlock* PBB = *P;
+ // Skip blocks that loop to themselves, can't tail merge these.
+ if (PBB==IBB)
+ continue;
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
+ // Failing case: IBB is the target of a cbr, and
+ // we cannot reverse the branch.
+ SmallVector<MachineOperand, 4> NewCond(Cond);
+ if (!Cond.empty() && TBB==IBB) {
+ if (TII->ReverseBranchCondition(NewCond))
+ continue;
+ // This is the QBB case described above
+ if (!FBB)
+ FBB = next(MachineFunction::iterator(PBB));
+ }
+ // Failing case: the only way IBB can be reached from PBB is via
+ // exception handling. Happens for landing pads. Would be nice
+ // to have a bit in the edge so we didn't have to do all this.
+ if (IBB->isLandingPad()) {
+ MachineFunction::iterator IP = PBB; IP++;
+ MachineBasicBlock* PredNextBB = NULL;
+ if (IP!=MF.end())
+ PredNextBB = IP;
+ if (TBB==NULL) {
+ if (IBB!=PredNextBB) // fallthrough
+ continue;
+ } else if (FBB) {
+ if (TBB!=IBB && FBB!=IBB) // cbr then ubr
+ continue;
+ } else if (Cond.empty()) {
+ if (TBB!=IBB) // ubr
+ continue;
+ } else {
+ if (TBB!=IBB && IBB!=PredNextBB) // cbr
+ continue;
+ }
+ }
+ // Remove the unconditional branch at the end, if any.
+ if (TBB && (Cond.empty() || FBB)) {
+ TII->RemoveBranch(*PBB);
+ if (!Cond.empty())
+ // reinsert conditional branch only, for now
+ TII->InsertBranch(*PBB, (TBB==IBB) ? FBB : TBB, 0, NewCond);
+ }
+ MergePotentials.push_back(std::make_pair(HashEndOfMBB(PBB, 1U), *P));
+ }
+ }
+ if (MergePotentials.size() >= 2)
+ MadeChange |= TryMergeBlocks(I, PredBB);
+ // Reinsert an unconditional branch if needed.
+ // The 1 below can occur as a result of removing blocks in TryMergeBlocks.
+ PredBB = prior(I); // this may have been changed in TryMergeBlocks
+ if (MergePotentials.size()==1 &&
+ MergePotentials.begin()->second != PredBB)
+ FixTail(MergePotentials.begin()->second, I, TII);
+ }
+ }
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Optimization
+//===----------------------------------------------------------------------===//
+
+bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
+ MadeChange = false;
+
+ // Make sure blocks are numbered in order
+ MF.RenumberBlocks();
+
+ for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
+ MachineBasicBlock *MBB = I++;
+ OptimizeBlock(MBB);
+
+ // If it is dead, remove it.
+ if (MBB->pred_empty()) {
+ RemoveDeadBlock(MBB);
+ MadeChange = true;
+ ++NumDeadBlocks;
+ }
+ }
+ return MadeChange;
+}
+
+
+/// CanFallThrough - Return true if the specified block (with the specified
+/// branch condition) can implicitly transfer control to the block after it by
+/// falling off the end of it. This should return false if it can reach the
+/// block after it, but it uses an explicit branch to do so (e.g. a table jump).
+///
+/// True is a conservative answer.
+///
+bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB,
+ bool BranchUnAnalyzable,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) {
+ MachineFunction::iterator Fallthrough = CurBB;
+ ++Fallthrough;
+ // If FallthroughBlock is off the end of the function, it can't fall through.
+ if (Fallthrough == CurBB->getParent()->end())
+ return false;
+
+ // If FallthroughBlock isn't a successor of CurBB, no fallthrough is possible.
+ if (!CurBB->isSuccessor(Fallthrough))
+ return false;
+
+ // If we couldn't analyze the branch, assume it could fall through.
+ if (BranchUnAnalyzable) return true;
+
+ // If there is no branch, control always falls through.
+ if (TBB == 0) return true;
+
+ // If there is some explicit branch to the fallthrough block, it can obviously
+ // reach, even though the branch should get folded to fall through implicitly.
+ if (MachineFunction::iterator(TBB) == Fallthrough ||
+ MachineFunction::iterator(FBB) == Fallthrough)
+ return true;
+
+ // If it's an unconditional branch to some block not the fall through, it
+ // doesn't fall through.
+ if (Cond.empty()) return false;
+
+ // Otherwise, if it is conditional and has no explicit false block, it falls
+ // through.
+ return FBB == 0;
+}
+
+/// CanFallThrough - Return true if the specified can implicitly transfer
+/// control to the block after it by falling off the end of it. This should
+/// return false if it can reach the block after it, but it uses an explicit
+/// branch to do so (e.g. a table jump).
+///
+/// True is a conservative answer.
+///
+bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB) {
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ bool CurUnAnalyzable = TII->AnalyzeBranch(*CurBB, TBB, FBB, Cond, true);
+ return CanFallThrough(CurBB, CurUnAnalyzable, TBB, FBB, Cond);
+}
+
+/// IsBetterFallthrough - Return true if it would be clearly better to
+/// fall-through to MBB1 than to fall through into MBB2. This has to return
+/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will
+/// result in infinite loops.
+static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2) {
+ // Right now, we use a simple heuristic. If MBB2 ends with a call, and
+ // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to
+ // optimize branches that branch to either a return block or an assert block
+ // into a fallthrough to the return.
+ if (MBB1->empty() || MBB2->empty()) return false;
+
+ // If there is a clear successor ordering we make sure that one block
+ // will fall through to the next
+ if (MBB1->isSuccessor(MBB2)) return true;
+ if (MBB2->isSuccessor(MBB1)) return false;
+
+ MachineInstr *MBB1I = --MBB1->end();
+ MachineInstr *MBB2I = --MBB2->end();
+ return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall();
+}
+
+/// OptimizeBlock - Analyze and optimize control flow related to the specified
+/// block. This is never called on the entry block.
+void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
+ MachineFunction::iterator FallThrough = MBB;
+ ++FallThrough;
+
+ // If this block is empty, make everyone use its fall-through, not the block
+ // explicitly. Landing pads should not do this since the landing-pad table
+ // points to this block.
+ if (MBB->empty() && !MBB->isLandingPad()) {
+ // Dead block? Leave for cleanup later.
+ if (MBB->pred_empty()) return;
+
+ if (FallThrough == MBB->getParent()->end()) {
+ // TODO: Simplify preds to not branch here if possible!
+ } else {
+ // Rewrite all predecessors of the old block to go to the fallthrough
+ // instead.
+ while (!MBB->pred_empty()) {
+ MachineBasicBlock *Pred = *(MBB->pred_end()-1);
+ Pred->ReplaceUsesOfBlockWith(MBB, FallThrough);
+ }
+
+ // If MBB was the target of a jump table, update jump tables to go to the
+ // fallthrough instead.
+ MBB->getParent()->getJumpTableInfo()->
+ ReplaceMBBInJumpTables(MBB, FallThrough);
+ MadeChange = true;
+ }
+ return;
+ }
+
+ // Check to see if we can simplify the terminator of the block before this
+ // one.
+ MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(MBB));
+
+ MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
+ SmallVector<MachineOperand, 4> PriorCond;
+ bool PriorUnAnalyzable =
+ TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
+ if (!PriorUnAnalyzable) {
+ // If the CFG for the prior block has extra edges, remove them.
+ MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB,
+ !PriorCond.empty());
+
+ // If the previous branch is conditional and both conditions go to the same
+ // destination, remove the branch, replacing it with an unconditional one or
+ // a fall-through.
+ if (PriorTBB && PriorTBB == PriorFBB) {
+ TII->RemoveBranch(PrevBB);
+ PriorCond.clear();
+ if (PriorTBB != MBB)
+ TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond);
+ MadeChange = true;
+ ++NumBranchOpts;
+ return OptimizeBlock(MBB);
+ }
+
+ // If the previous branch *only* branches to *this* block (conditional or
+ // not) remove the branch.
+ if (PriorTBB == MBB && PriorFBB == 0) {
+ TII->RemoveBranch(PrevBB);
+ MadeChange = true;
+ ++NumBranchOpts;
+ return OptimizeBlock(MBB);
+ }
+
+ // If the prior block branches somewhere else on the condition and here if
+ // the condition is false, remove the uncond second branch.
+ if (PriorFBB == MBB) {
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond);
+ MadeChange = true;
+ ++NumBranchOpts;
+ return OptimizeBlock(MBB);
+ }
+
+ // If the prior block branches here on true and somewhere else on false, and
+ // if the branch condition is reversible, reverse the branch to create a
+ // fall-through.
+ if (PriorTBB == MBB) {
+ SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
+ if (!TII->ReverseBranchCondition(NewPriorCond)) {
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond);
+ MadeChange = true;
+ ++NumBranchOpts;
+ return OptimizeBlock(MBB);
+ }
+ }
+
+ // If this block doesn't fall through (e.g. it ends with an uncond branch or
+ // has no successors) and if the pred falls through into this block, and if
+ // it would otherwise fall through into the block after this, move this
+ // block to the end of the function.
+ //
+ // We consider it more likely that execution will stay in the function (e.g.
+ // due to loops) than it is to exit it. This asserts in loops etc, moving
+ // the assert condition out of the loop body.
+ if (!PriorCond.empty() && PriorFBB == 0 &&
+ MachineFunction::iterator(PriorTBB) == FallThrough &&
+ !CanFallThrough(MBB)) {
+ bool DoTransform = true;
+
+ // We have to be careful that the succs of PredBB aren't both no-successor
+ // blocks. If neither have successors and if PredBB is the second from
+ // last block in the function, we'd just keep swapping the two blocks for
+ // last. Only do the swap if one is clearly better to fall through than
+ // the other.
+ if (FallThrough == --MBB->getParent()->end() &&
+ !IsBetterFallthrough(PriorTBB, MBB))
+ DoTransform = false;
+
+ // We don't want to do this transformation if we have control flow like:
+ // br cond BB2
+ // BB1:
+ // ..
+ // jmp BBX
+ // BB2:
+ // ..
+ // ret
+ //
+ // In this case, we could actually be moving the return block *into* a
+ // loop!
+ if (DoTransform && !MBB->succ_empty() &&
+ (!CanFallThrough(PriorTBB) || PriorTBB->empty()))
+ DoTransform = false;
+
+
+ if (DoTransform) {
+ // Reverse the branch so we will fall through on the previous true cond.
+ SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
+ if (!TII->ReverseBranchCondition(NewPriorCond)) {
+ DOUT << "\nMoving MBB: " << *MBB;
+ DOUT << "To make fallthrough to: " << *PriorTBB << "\n";
+
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond);
+
+ // Move this block to the end of the function.
+ MBB->moveAfter(--MBB->getParent()->end());
+ MadeChange = true;
+ ++NumBranchOpts;
+ return;
+ }
+ }
+ }
+ }
+
+ // Analyze the branch in the current block.
+ MachineBasicBlock *CurTBB = 0, *CurFBB = 0;
+ SmallVector<MachineOperand, 4> CurCond;
+ bool CurUnAnalyzable= TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true);
+ if (!CurUnAnalyzable) {
+ // If the CFG for the prior block has extra edges, remove them.
+ MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty());
+
+ // If this is a two-way branch, and the FBB branches to this block, reverse
+ // the condition so the single-basic-block loop is faster. Instead of:
+ // Loop: xxx; jcc Out; jmp Loop
+ // we want:
+ // Loop: xxx; jncc Loop; jmp Out
+ if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
+ SmallVector<MachineOperand, 4> NewCond(CurCond);
+ if (!TII->ReverseBranchCondition(NewCond)) {
+ TII->RemoveBranch(*MBB);
+ TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond);
+ MadeChange = true;
+ ++NumBranchOpts;
+ return OptimizeBlock(MBB);
+ }
+ }
+
+
+ // If this branch is the only thing in its block, see if we can forward
+ // other blocks across it.
+ if (CurTBB && CurCond.empty() && CurFBB == 0 &&
+ MBB->begin()->getDesc().isBranch() && CurTBB != MBB) {
+ // This block may contain just an unconditional branch. Because there can
+ // be 'non-branch terminators' in the block, try removing the branch and
+ // then seeing if the block is empty.
+ TII->RemoveBranch(*MBB);
+
+ // If this block is just an unconditional branch to CurTBB, we can
+ // usually completely eliminate the block. The only case we cannot
+ // completely eliminate the block is when the block before this one
+ // falls through into MBB and we can't understand the prior block's branch
+ // condition.
+ if (MBB->empty()) {
+ bool PredHasNoFallThrough = TII->BlockHasNoFallThrough(PrevBB);
+ if (PredHasNoFallThrough || !PriorUnAnalyzable ||
+ !PrevBB.isSuccessor(MBB)) {
+ // If the prior block falls through into us, turn it into an
+ // explicit branch to us to make updates simpler.
+ if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) &&
+ PriorTBB != MBB && PriorFBB != MBB) {
+ if (PriorTBB == 0) {
+ assert(PriorCond.empty() && PriorFBB == 0 &&
+ "Bad branch analysis");
+ PriorTBB = MBB;
+ } else {
+ assert(PriorFBB == 0 && "Machine CFG out of date!");
+ PriorFBB = MBB;
+ }
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond);
+ }
+
+ // Iterate through all the predecessors, revectoring each in-turn.
+ size_t PI = 0;
+ bool DidChange = false;
+ bool HasBranchToSelf = false;
+ while(PI != MBB->pred_size()) {
+ MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI);
+ if (PMBB == MBB) {
+ // If this block has an uncond branch to itself, leave it.
+ ++PI;
+ HasBranchToSelf = true;
+ } else {
+ DidChange = true;
+ PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB);
+ // If this change resulted in PMBB ending in a conditional
+ // branch where both conditions go to the same destination,
+ // change this to an unconditional branch (and fix the CFG).
+ MachineBasicBlock *NewCurTBB = 0, *NewCurFBB = 0;
+ SmallVector<MachineOperand, 4> NewCurCond;
+ bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB,
+ NewCurFBB, NewCurCond, true);
+ if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
+ TII->RemoveBranch(*PMBB);
+ NewCurCond.clear();
+ TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond);
+ MadeChange = true;
+ ++NumBranchOpts;
+ PMBB->CorrectExtraCFGEdges(NewCurTBB, NewCurFBB, false);
+ }
+ }
+ }
+
+ // Change any jumptables to go to the new MBB.
+ MBB->getParent()->getJumpTableInfo()->
+ ReplaceMBBInJumpTables(MBB, CurTBB);
+ if (DidChange) {
+ ++NumBranchOpts;
+ MadeChange = true;
+ if (!HasBranchToSelf) return;
+ }
+ }
+ }
+
+ // Add the branch back if the block is more than just an uncond branch.
+ TII->InsertBranch(*MBB, CurTBB, 0, CurCond);
+ }
+ }
+
+ // If the prior block doesn't fall through into this block, and if this
+ // block doesn't fall through into some other block, see if we can find a
+ // place to move this block where a fall-through will happen.
+ if (!CanFallThrough(&PrevBB, PriorUnAnalyzable,
+ PriorTBB, PriorFBB, PriorCond)) {
+ // Now we know that there was no fall-through into this block, check to
+ // see if it has a fall-through into its successor.
+ bool CurFallsThru = CanFallThrough(MBB, CurUnAnalyzable, CurTBB, CurFBB,
+ CurCond);
+
+ if (!MBB->isLandingPad()) {
+ // Check all the predecessors of this block. If one of them has no fall
+ // throughs, move this block right after it.
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ E = MBB->pred_end(); PI != E; ++PI) {
+ // Analyze the branch at the end of the pred.
+ MachineBasicBlock *PredBB = *PI;
+ MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough;
+ if (PredBB != MBB && !CanFallThrough(PredBB)
+ && (!CurFallsThru || !CurTBB || !CurFBB)
+ && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
+ // If the current block doesn't fall through, just move it.
+ // If the current block can fall through and does not end with a
+ // conditional branch, we need to append an unconditional jump to
+ // the (current) next block. To avoid a possible compile-time
+ // infinite loop, move blocks only backward in this case.
+ // Also, if there are already 2 branches here, we cannot add a third;
+ // this means we have the case
+ // Bcc next
+ // B elsewhere
+ // next:
+ if (CurFallsThru) {
+ MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB));
+ CurCond.clear();
+ TII->InsertBranch(*MBB, NextBB, 0, CurCond);
+ }
+ MBB->moveAfter(PredBB);
+ MadeChange = true;
+ return OptimizeBlock(MBB);
+ }
+ }
+ }
+
+ if (!CurFallsThru) {
+ // Check all successors to see if we can move this block before it.
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ E = MBB->succ_end(); SI != E; ++SI) {
+ // Analyze the branch at the end of the block before the succ.
+ MachineBasicBlock *SuccBB = *SI;
+ MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev;
+ std::vector<MachineOperand> SuccPrevCond;
+
+ // If this block doesn't already fall-through to that successor, and if
+ // the succ doesn't already have a block that can fall through into it,
+ // and if the successor isn't an EH destination, we can arrange for the
+ // fallthrough to happen.
+ if (SuccBB != MBB && !CanFallThrough(SuccPrev) &&
+ !SuccBB->isLandingPad()) {
+ MBB->moveBefore(SuccBB);
+ MadeChange = true;
+ return OptimizeBlock(MBB);
+ }
+ }
+
+ // Okay, there is no really great place to put this block. If, however,
+ // the block before this one would be a fall-through if this block were
+ // removed, move this block to the end of the function.
+ if (FallThrough != MBB->getParent()->end() &&
+ PrevBB.isSuccessor(FallThrough)) {
+ MBB->moveAfter(--MBB->getParent()->end());
+ MadeChange = true;
+ return;
+ }
+ }
+ }
+}
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
new file mode 100644
index 0000000..ca4b31c
--- /dev/null
+++ b/lib/CodeGen/CMakeLists.txt
@@ -0,0 +1,62 @@
+add_llvm_library(LLVMCodeGen
+ BranchFolding.cpp
+ CodePlacementOpt.cpp
+ DeadMachineInstructionElim.cpp
+ DwarfEHPrepare.cpp
+ ELFWriter.cpp
+ GCMetadata.cpp
+ GCMetadataPrinter.cpp
+ GCStrategy.cpp
+ IfConversion.cpp
+ IntrinsicLowering.cpp
+ LLVMTargetMachine.cpp
+ LatencyPriorityQueue.cpp
+ LiveInterval.cpp
+ LiveIntervalAnalysis.cpp
+ LiveStackAnalysis.cpp
+ LiveVariables.cpp
+ LowerSubregs.cpp
+ MachOWriter.cpp
+ MachineBasicBlock.cpp
+ MachineDominators.cpp
+ MachineFunction.cpp
+ MachineInstr.cpp
+ MachineLICM.cpp
+ MachineLoopInfo.cpp
+ MachineModuleInfo.cpp
+ MachinePassRegistry.cpp
+ MachineRegisterInfo.cpp
+ MachineSink.cpp
+ MachineVerifier.cpp
+ OcamlGC.cpp
+ PBQP.cpp
+ PHIElimination.cpp
+ Passes.cpp
+ PostRASchedulerList.cpp
+ PreAllocSplitting.cpp
+ PrologEpilogInserter.cpp
+ PseudoSourceValue.cpp
+ RegAllocBigBlock.cpp
+ RegAllocLinearScan.cpp
+ RegAllocLocal.cpp
+ RegAllocPBQP.cpp
+ RegAllocSimple.cpp
+ RegisterCoalescer.cpp
+ RegisterScavenging.cpp
+ ScheduleDAG.cpp
+ ScheduleDAGEmit.cpp
+ ScheduleDAGInstrs.cpp
+ ScheduleDAGPrinter.cpp
+ ShadowStackGC.cpp
+ ShrinkWrapping.cpp
+ SimpleRegisterCoalescing.cpp
+ Spiller.cpp
+ StackProtector.cpp
+ StackSlotColoring.cpp
+ StrongPHIElimination.cpp
+ TargetInstrInfoImpl.cpp
+ TwoAddressInstructionPass.cpp
+ UnreachableBlockElim.cpp
+ VirtRegMap.cpp
+ VirtRegRewriter.cpp
+ )
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
new file mode 100644
index 0000000..383098e
--- /dev/null
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -0,0 +1,358 @@
+//===-- CodePlacementOpt.cpp - Code Placement pass. -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the pass that optimize code placement and align loop
+// headers to target specific alignment boundary.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "code-placement"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumHeaderAligned, "Number of loop header aligned");
+STATISTIC(NumIntraElim, "Number of intra loop branches eliminated");
+STATISTIC(NumIntraMoved, "Number of intra loop branches moved");
+
+namespace {
+ class CodePlacementOpt : public MachineFunctionPass {
+ const MachineLoopInfo *MLI;
+ const TargetInstrInfo *TII;
+ const TargetLowering *TLI;
+
+ /// ChangedMBBs - BBs which are modified by OptimizeIntraLoopEdges.
+ SmallPtrSet<MachineBasicBlock*, 8> ChangedMBBs;
+
+ /// UncondJmpMBBs - A list of BBs which are in loops and end with
+ /// unconditional branches.
+ SmallVector<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 4>
+ UncondJmpMBBs;
+
+ /// LoopHeaders - A list of BBs which are loop headers.
+ SmallVector<MachineBasicBlock*, 4> LoopHeaders;
+
+ public:
+ static char ID;
+ CodePlacementOpt() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char *getPassName() const {
+ return "Code Placement Optimizater";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ bool OptimizeIntraLoopEdges();
+ bool HeaderShouldBeAligned(MachineBasicBlock *MBB, MachineLoop *L,
+ SmallPtrSet<MachineBasicBlock*, 4> &DoNotAlign);
+ bool AlignLoops(MachineFunction &MF);
+ };
+
+ char CodePlacementOpt::ID = 0;
+} // end anonymous namespace
+
+FunctionPass *llvm::createCodePlacementOptPass() {
+ return new CodePlacementOpt();
+}
+
+/// OptimizeBackEdges - Place loop back edges to move unconditional branches
+/// out of the loop.
+///
+/// A:
+/// ...
+/// <fallthrough to B>
+///
+/// B: --> loop header
+/// ...
+/// jcc <cond> C, [exit]
+///
+/// C:
+/// ...
+/// jmp B
+///
+/// ==>
+///
+/// A:
+/// ...
+/// jmp B
+///
+/// C: --> new loop header
+/// ...
+/// <fallthough to B>
+///
+/// B:
+/// ...
+/// jcc <cond> C, [exit]
+///
+bool CodePlacementOpt::OptimizeIntraLoopEdges() {
+ if (!TLI->shouldOptimizeCodePlacement())
+ return false;
+
+ bool Changed = false;
+ for (unsigned i = 0, e = UncondJmpMBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = UncondJmpMBBs[i].first;
+ MachineBasicBlock *SuccMBB = UncondJmpMBBs[i].second;
+ MachineLoop *L = MLI->getLoopFor(MBB);
+ assert(L && "BB is expected to be in a loop!");
+
+ if (ChangedMBBs.count(MBB)) {
+ // BB has been modified, re-analyze.
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond) || !Cond.empty())
+ continue;
+ if (MLI->getLoopFor(TBB) != L || TBB->isLandingPad())
+ continue;
+ SuccMBB = TBB;
+ } else {
+ assert(MLI->getLoopFor(SuccMBB) == L &&
+ "Successor is not in the same loop!");
+ }
+
+ if (MBB->isLayoutSuccessor(SuccMBB)) {
+ // Successor is right after MBB, just eliminate the unconditional jmp.
+ // Can this happen?
+ TII->RemoveBranch(*MBB);
+ ChangedMBBs.insert(MBB);
+ ++NumIntraElim;
+ Changed = true;
+ continue;
+ }
+
+ // Now check if the predecessor is fallthrough from any BB. If there is,
+ // that BB should be from outside the loop since edge will become a jmp.
+ bool OkToMove = true;
+ MachineBasicBlock *FtMBB = 0, *FtTBB = 0, *FtFBB = 0;
+ SmallVector<MachineOperand, 4> FtCond;
+ for (MachineBasicBlock::pred_iterator PI = SuccMBB->pred_begin(),
+ PE = SuccMBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredMBB = *PI;
+ if (PredMBB->isLayoutSuccessor(SuccMBB)) {
+ if (TII->AnalyzeBranch(*PredMBB, FtTBB, FtFBB, FtCond)) {
+ OkToMove = false;
+ break;
+ }
+ if (!FtTBB)
+ FtTBB = SuccMBB;
+ else if (!FtFBB) {
+ assert(FtFBB != SuccMBB && "Unexpected control flow!");
+ FtFBB = SuccMBB;
+ }
+
+ // A fallthrough.
+ FtMBB = PredMBB;
+ MachineLoop *PL = MLI->getLoopFor(PredMBB);
+ if (PL && (PL == L || PL->getLoopDepth() >= L->getLoopDepth()))
+ OkToMove = false;
+
+ break;
+ }
+ }
+
+ if (!OkToMove)
+ continue;
+
+ // Is it profitable? If SuccMBB can fallthrough itself, that can be changed
+ // into a jmp.
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->AnalyzeBranch(*SuccMBB, TBB, FBB, Cond))
+ continue;
+ if (!TBB && Cond.empty())
+ TBB = next(MachineFunction::iterator(SuccMBB));
+ else if (!FBB && !Cond.empty())
+ FBB = next(MachineFunction::iterator(SuccMBB));
+
+ // This calculate the cost of the transformation. Also, it finds the *only*
+ // intra-loop edge if there is one.
+ int Cost = 0;
+ bool HasOneIntraSucc = true;
+ MachineBasicBlock *IntraSucc = 0;
+ for (MachineBasicBlock::succ_iterator SI = SuccMBB->succ_begin(),
+ SE = SuccMBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *SSMBB = *SI;
+ if (MLI->getLoopFor(SSMBB) == L) {
+ if (!IntraSucc)
+ IntraSucc = SSMBB;
+ else
+ HasOneIntraSucc = false;
+ }
+
+ if (SuccMBB->isLayoutSuccessor(SSMBB))
+ // This will become a jmp.
+ ++Cost;
+ else if (MBB->isLayoutSuccessor(SSMBB)) {
+ // One of the successor will become the new fallthrough.
+ if (SSMBB == FBB) {
+ FBB = 0;
+ --Cost;
+ } else if (!FBB && SSMBB == TBB && Cond.empty()) {
+ TBB = 0;
+ --Cost;
+ } else if (!Cond.empty() && !TII->ReverseBranchCondition(Cond)) {
+ assert(SSMBB == TBB);
+ TBB = FBB;
+ FBB = 0;
+ --Cost;
+ }
+ }
+ }
+ if (Cost)
+ continue;
+
+ // Now, let's move the successor to below the BB to eliminate the jmp.
+ SuccMBB->moveAfter(MBB);
+ TII->RemoveBranch(*MBB);
+ TII->RemoveBranch(*SuccMBB);
+ if (TBB)
+ TII->InsertBranch(*SuccMBB, TBB, FBB, Cond);
+ ChangedMBBs.insert(MBB);
+ ChangedMBBs.insert(SuccMBB);
+ if (FtMBB) {
+ TII->RemoveBranch(*FtMBB);
+ TII->InsertBranch(*FtMBB, FtTBB, FtFBB, FtCond);
+ ChangedMBBs.insert(FtMBB);
+ }
+ Changed = true;
+
+ // If BB is the loop latch, we may have a new loop headr.
+ if (MBB == L->getLoopLatch()) {
+ assert(MLI->isLoopHeader(SuccMBB) &&
+ "Only succ of loop latch is not the header?");
+ if (HasOneIntraSucc && IntraSucc)
+ std::replace(LoopHeaders.begin(),LoopHeaders.end(), SuccMBB, IntraSucc);
+ }
+ }
+
+ ++NumIntraMoved;
+ return Changed;
+}
+
+/// HeaderShouldBeAligned - Return true if the specified loop header block
+/// should be aligned. For now, we will not align it if all the predcessors
+/// (i.e. loop back edges) are laid out above the header. FIXME: Do not
+/// align small loops.
+bool
+CodePlacementOpt::HeaderShouldBeAligned(MachineBasicBlock *MBB, MachineLoop *L,
+ SmallPtrSet<MachineBasicBlock*, 4> &DoNotAlign) {
+ if (DoNotAlign.count(MBB))
+ return false;
+
+ bool BackEdgeBelow = false;
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredMBB = *PI;
+ if (PredMBB == MBB || PredMBB->getNumber() > MBB->getNumber()) {
+ BackEdgeBelow = true;
+ break;
+ }
+ }
+
+ if (!BackEdgeBelow)
+ return false;
+
+ // Ok, we are going to align this loop header. If it's an inner loop,
+ // do not align its outer loop.
+ MachineBasicBlock *PreHeader = L->getLoopPreheader();
+ if (PreHeader) {
+ MachineLoop *L = MLI->getLoopFor(PreHeader);
+ if (L) {
+ MachineBasicBlock *HeaderBlock = L->getHeader();
+ HeaderBlock->setAlignment(0);
+ DoNotAlign.insert(HeaderBlock);
+ }
+ }
+ return true;
+}
+
+/// AlignLoops - Align loop headers to target preferred alignments.
+///
+bool CodePlacementOpt::AlignLoops(MachineFunction &MF) {
+ const Function *F = MF.getFunction();
+ if (F->hasFnAttr(Attribute::OptimizeForSize))
+ return false;
+
+ unsigned Align = TLI->getPrefLoopAlignment();
+ if (!Align)
+ return false; // Don't care about loop alignment.
+
+ // Make sure blocks are numbered in order
+ MF.RenumberBlocks();
+
+ bool Changed = false;
+ SmallPtrSet<MachineBasicBlock*, 4> DoNotAlign;
+ for (unsigned i = 0, e = LoopHeaders.size(); i != e; ++i) {
+ MachineBasicBlock *HeaderMBB = LoopHeaders[i];
+ MachineBasicBlock *PredMBB = prior(MachineFunction::iterator(HeaderMBB));
+ MachineLoop *L = MLI->getLoopFor(HeaderMBB);
+ if (L == MLI->getLoopFor(PredMBB))
+ // If previously BB is in the same loop, don't align this BB. We want
+ // to prevent adding noop's inside a loop.
+ continue;
+ if (HeaderShouldBeAligned(HeaderMBB, L, DoNotAlign)) {
+ HeaderMBB->setAlignment(Align);
+ Changed = true;
+ ++NumHeaderAligned;
+ }
+ }
+
+ return Changed;
+}
+
+bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) {
+ MLI = &getAnalysis<MachineLoopInfo>();
+ if (MLI->empty())
+ return false; // No loops.
+
+ TLI = MF.getTarget().getTargetLowering();
+ TII = MF.getTarget().getInstrInfo();
+
+ // Analyze the BBs first and keep track of loop headers and BBs that
+ // end with an unconditional jmp to another block in the same loop.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I;
+ if (MBB->isLandingPad())
+ continue;
+ MachineLoop *L = MLI->getLoopFor(MBB);
+ if (!L)
+ continue;
+ if (MLI->isLoopHeader(MBB))
+ LoopHeaders.push_back(MBB);
+
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond) || !Cond.empty())
+ continue;
+ if (MLI->getLoopFor(TBB) == L && !TBB->isLandingPad())
+ UncondJmpMBBs.push_back(std::make_pair(MBB, TBB));
+ }
+
+ bool Changed = OptimizeIntraLoopEdges();
+
+ Changed |= AlignLoops(MF);
+
+ ChangedMBBs.clear();
+ UncondJmpMBBs.clear();
+ LoopHeaders.clear();
+
+ return Changed;
+}
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
new file mode 100644
index 0000000..4832a5e
--- /dev/null
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -0,0 +1,161 @@
+//===- DeadMachineInstructionElim.cpp - Remove dead machine instructions --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an extremely simple MachineInstr-level dead-code-elimination pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+namespace {
+ class VISIBILITY_HIDDEN DeadMachineInstructionElim :
+ public MachineFunctionPass {
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ BitVector LivePhysRegs;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ DeadMachineInstructionElim() : MachineFunctionPass(&ID) {}
+
+ private:
+ bool isDead(MachineInstr *MI) const;
+ };
+}
+char DeadMachineInstructionElim::ID = 0;
+
+static RegisterPass<DeadMachineInstructionElim>
+Y("dead-mi-elimination",
+ "Remove dead machine instructions");
+
+FunctionPass *llvm::createDeadMachineInstructionElimPass() {
+ return new DeadMachineInstructionElim();
+}
+
+bool DeadMachineInstructionElim::isDead(MachineInstr *MI) const {
+ // Don't delete instructions with side effects.
+ bool SawStore = false;
+ if (!MI->isSafeToMove(TII, SawStore))
+ return false;
+
+ // Examine each operand.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ?
+ LivePhysRegs[Reg] : !MRI->use_empty(Reg)) {
+ // This def has a use. Don't delete the instruction!
+ return false;
+ }
+ }
+ }
+
+ // If there are no defs with uses, the instruction is dead.
+ return true;
+}
+
+bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
+ bool AnyChanges = false;
+ MRI = &MF.getRegInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getTarget().getInstrInfo();
+
+ // Compute a bitvector to represent all non-allocatable physregs.
+ BitVector NonAllocatableRegs = TRI->getAllocatableSet(MF);
+ NonAllocatableRegs.flip();
+
+ // Loop over all instructions in all blocks, from bottom to top, so that it's
+ // more likely that chains of dependent but ultimately dead instructions will
+ // be cleaned up.
+ for (MachineFunction::reverse_iterator I = MF.rbegin(), E = MF.rend();
+ I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+
+ // Start out assuming that all non-allocatable registers are live
+ // out of this block.
+ LivePhysRegs = NonAllocatableRegs;
+
+ // Also add any explicit live-out physregs for this block.
+ if (!MBB->empty() && MBB->back().getDesc().isReturn())
+ for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(),
+ LOE = MRI->liveout_end(); LOI != LOE; ++LOI) {
+ unsigned Reg = *LOI;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ LivePhysRegs.set(Reg);
+ }
+
+ // Now scan the instructions and delete dead ones, tracking physreg
+ // liveness as we go.
+ for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(),
+ MIE = MBB->rend(); MII != MIE; ) {
+ MachineInstr *MI = &*MII;
+
+ // If the instruction is dead, delete it!
+ if (isDead(MI)) {
+ DOUT << "DeadMachineInstructionElim: DELETING: " << *MI;
+ AnyChanges = true;
+ MI->eraseFromParent();
+ MIE = MBB->rend();
+ // MII is now pointing to the next instruction to process,
+ // so don't increment it.
+ continue;
+ }
+
+ // Record the physreg defs.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ LivePhysRegs.reset(Reg);
+ // Check the subreg set, not the alias set, because a def
+ // of a super-register may still be partially live after
+ // this def.
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ *SubRegs; ++SubRegs)
+ LivePhysRegs.reset(*SubRegs);
+ }
+ }
+ }
+ // Record the physreg uses, after the defs, in case a physreg is
+ // both defined and used in the same instruction.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse()) {
+ unsigned Reg = MO.getReg();
+ if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ LivePhysRegs.set(Reg);
+ for (const unsigned *AliasSet = TRI->getAliasSet(Reg);
+ *AliasSet; ++AliasSet)
+ LivePhysRegs.set(*AliasSet);
+ }
+ }
+ }
+
+ // We didn't delete the current instruction, so increment MII to
+ // the next one.
+ ++MII;
+ }
+ }
+
+ LivePhysRegs.clear();
+ return AnyChanges;
+}
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
new file mode 100644
index 0000000..720e3d1
--- /dev/null
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -0,0 +1,397 @@
+//===-- DwarfEHPrepare - Prepare exception handling for code generation ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass mulches exception handling code into a form adapted to code
+// generation. Required if using dwarf exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dwarfehprepare"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+using namespace llvm;
+
+STATISTIC(NumLandingPadsSplit, "Number of landing pads split");
+STATISTIC(NumUnwindsLowered, "Number of unwind instructions lowered");
+STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved");
+STATISTIC(NumStackTempsIntroduced, "Number of stack temporaries introduced");
+
+namespace {
+ class VISIBILITY_HIDDEN DwarfEHPrepare : public FunctionPass {
+ const TargetLowering *TLI;
+ bool CompileFast;
+
+ // The eh.exception intrinsic.
+ Function *ExceptionValueIntrinsic;
+
+ // _Unwind_Resume or the target equivalent.
+ Constant *RewindFunction;
+
+ // Dominator info is used when turning stack temporaries into registers.
+ DominatorTree *DT;
+ DominanceFrontier *DF;
+
+ // The function we are running on.
+ Function *F;
+
+ // The landing pads for this function.
+ typedef SmallPtrSet<BasicBlock*, 8> BBSet;
+ BBSet LandingPads;
+
+ // Stack temporary used to hold eh.exception values.
+ AllocaInst *ExceptionValueVar;
+
+ bool NormalizeLandingPads();
+ bool LowerUnwinds();
+ bool MoveExceptionValueCalls();
+ bool FinishStackTemporaries();
+ bool PromoteStackTemporaries();
+
+ Instruction *CreateExceptionValueCall(BasicBlock *BB);
+ Instruction *CreateValueLoad(BasicBlock *BB);
+
+ /// CreateReadOfExceptionValue - Return the result of the eh.exception
+ /// intrinsic by calling the intrinsic if in a landing pad, or loading
+ /// it from the exception value variable otherwise.
+ Instruction *CreateReadOfExceptionValue(BasicBlock *BB) {
+ return LandingPads.count(BB) ?
+ CreateExceptionValueCall(BB) : CreateValueLoad(BB);
+ }
+
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ DwarfEHPrepare(const TargetLowering *tli, bool fast) :
+ FunctionPass(&ID), TLI(tli), CompileFast(fast),
+ ExceptionValueIntrinsic(0), RewindFunction(0) {}
+
+ virtual bool runOnFunction(Function &Fn);
+
+ // getAnalysisUsage - We need dominance frontiers for memory promotion.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ if (!CompileFast)
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+ if (!CompileFast)
+ AU.addRequired<DominanceFrontier>();
+ AU.addPreserved<DominanceFrontier>();
+ }
+
+ const char *getPassName() const {
+ return "Exception handling preparation";
+ }
+
+ };
+} // end anonymous namespace
+
+char DwarfEHPrepare::ID = 0;
+
+FunctionPass *llvm::createDwarfEHPass(const TargetLowering *tli, bool fast) {
+ return new DwarfEHPrepare(tli, fast);
+}
+
+/// NormalizeLandingPads - Normalize and discover landing pads, noting them
+/// in the LandingPads set. A landing pad is normal if the only CFG edges
+/// that end at it are unwind edges from invoke instructions.
+/// Abnormal landing pads are fixed up by redirecting all unwind edges to
+/// a new basic block which falls through to the original.
+bool DwarfEHPrepare::NormalizeLandingPads() {
+ bool Changed = false;
+
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ TerminatorInst *TI = I->getTerminator();
+ if (!isa<InvokeInst>(TI))
+ continue;
+ BasicBlock *LPad = TI->getSuccessor(1);
+ // Skip landing pads that have already been normalized.
+ if (LandingPads.count(LPad))
+ continue;
+
+ // Check that only invoke unwind edges end at the landing pad.
+ bool OnlyUnwoundTo = true;
+ for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad);
+ PI != PE; ++PI) {
+ TerminatorInst *PT = (*PI)->getTerminator();
+ if (!isa<InvokeInst>(PT) || LPad == PT->getSuccessor(0)) {
+ OnlyUnwoundTo = false;
+ break;
+ }
+ }
+ if (OnlyUnwoundTo) {
+ // Only unwind edges lead to the landing pad. Remember the landing pad.
+ LandingPads.insert(LPad);
+ continue;
+ }
+
+ // At least one normal edge ends at the landing pad. Redirect the unwind
+ // edges to a new basic block which falls through into this one.
+
+ // Create the new basic block.
+ BasicBlock *NewBB = BasicBlock::Create(LPad->getName() + "_unwind_edge");
+
+ // Insert it into the function right before the original landing pad.
+ LPad->getParent()->getBasicBlockList().insert(LPad, NewBB);
+
+ // Redirect unwind edges from the original landing pad to NewBB.
+ for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ) {
+ TerminatorInst *PT = (*PI++)->getTerminator();
+ if (isa<InvokeInst>(PT) && PT->getSuccessor(1) == LPad)
+ // Unwind to the new block.
+ PT->setSuccessor(1, NewBB);
+ }
+
+ // If there are any PHI nodes in LPad, we need to update them so that they
+ // merge incoming values from NewBB instead.
+ for (BasicBlock::iterator II = LPad->begin(); isa<PHINode>(II); ++II) {
+ PHINode *PN = cast<PHINode>(II);
+ pred_iterator PB = pred_begin(NewBB), PE = pred_end(NewBB);
+
+ // Check to see if all of the values coming in via unwind edges are the
+ // same. If so, we don't need to create a new PHI node.
+ Value *InVal = PN->getIncomingValueForBlock(*PB);
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
+ if (PI != PB && InVal != PN->getIncomingValueForBlock(*PI)) {
+ InVal = 0;
+ break;
+ }
+ }
+
+ if (InVal == 0) {
+ // Different unwind edges have different values. Create a new PHI node
+ // in NewBB.
+ PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".unwind",
+ NewBB);
+ // Add an entry for each unwind edge, using the value from the old PHI.
+ for (pred_iterator PI = PB; PI != PE; ++PI)
+ NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI);
+
+ // Now use this new PHI as the common incoming value for NewBB in PN.
+ InVal = NewPN;
+ }
+
+ // Revector exactly one entry in the PHI node to come from NewBB
+ // and delete all other entries that come from unwind edges. If
+ // there are both normal and unwind edges from the same predecessor,
+ // this leaves an entry for the normal edge.
+ for (pred_iterator PI = PB; PI != PE; ++PI)
+ PN->removeIncomingValue(*PI);
+ PN->addIncoming(InVal, NewBB);
+ }
+
+ // Add a fallthrough from NewBB to the original landing pad.
+ BranchInst::Create(LPad, NewBB);
+
+ // Now update DominatorTree and DominanceFrontier analysis information.
+ if (DT)
+ DT->splitBlock(NewBB);
+ if (DF)
+ DF->splitBlock(NewBB);
+
+ // Remember the newly constructed landing pad. The original landing pad
+ // LPad is no longer a landing pad now that all unwind edges have been
+ // revectored to NewBB.
+ LandingPads.insert(NewBB);
+ ++NumLandingPadsSplit;
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// LowerUnwinds - Turn unwind instructions into calls to _Unwind_Resume,
+/// rethrowing any previously caught exception. This will crash horribly
+/// at runtime if there is no such exception: using unwind to throw a new
+/// exception is currently not supported.
+bool DwarfEHPrepare::LowerUnwinds() {
+ bool Changed = false;
+
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ TerminatorInst *TI = I->getTerminator();
+ if (!isa<UnwindInst>(TI))
+ continue;
+
+ // Replace the unwind instruction with a call to _Unwind_Resume (or the
+ // appropriate target equivalent) followed by an UnreachableInst.
+
+ // Find the rewind function if we didn't already.
+ if (!RewindFunction) {
+ std::vector<const Type*> Params(1, PointerType::getUnqual(Type::Int8Ty));
+ FunctionType *FTy = FunctionType::get(Type::VoidTy, Params, false);
+ const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
+ RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy);
+ }
+
+ // Create the call...
+ CallInst::Create(RewindFunction, CreateReadOfExceptionValue(I), "", TI);
+ // ...followed by an UnreachableInst.
+ new UnreachableInst(TI);
+
+ // Nuke the unwind instruction.
+ TI->eraseFromParent();
+ ++NumUnwindsLowered;
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// MoveExceptionValueCalls - Ensure that eh.exception is only ever called from
+/// landing pads by replacing calls outside of landing pads with loads from a
+/// stack temporary. Move eh.exception calls inside landing pads to the start
+/// of the landing pad (optional, but may make things simpler for later passes).
+bool DwarfEHPrepare::MoveExceptionValueCalls() {
+ // If the eh.exception intrinsic is not declared in the module then there is
+ // nothing to do. Speed up compilation by checking for this common case.
+ if (!ExceptionValueIntrinsic &&
+ !F->getParent()->getFunction(Intrinsic::getName(Intrinsic::eh_exception)))
+ return false;
+
+ bool Changed = false;
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
+ if (CI->getIntrinsicID() == Intrinsic::eh_exception) {
+ if (!CI->use_empty()) {
+ Value *ExceptionValue = CreateReadOfExceptionValue(BB);
+ if (CI == ExceptionValue) {
+ // The call was at the start of a landing pad - leave it alone.
+ assert(LandingPads.count(BB) &&
+ "Created eh.exception call outside landing pad!");
+ continue;
+ }
+ CI->replaceAllUsesWith(ExceptionValue);
+ }
+ CI->eraseFromParent();
+ ++NumExceptionValuesMoved;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// FinishStackTemporaries - If we introduced a stack variable to hold the
+/// exception value then initialize it in each landing pad.
+bool DwarfEHPrepare::FinishStackTemporaries() {
+ if (!ExceptionValueVar)
+ // Nothing to do.
+ return false;
+
+ bool Changed = false;
+
+ // Make sure that there is a store of the exception value at the start of
+ // each landing pad.
+ for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end();
+ LI != LE; ++LI) {
+ Instruction *ExceptionValue = CreateReadOfExceptionValue(*LI);
+ Instruction *Store = new StoreInst(ExceptionValue, ExceptionValueVar);
+ Store->insertAfter(ExceptionValue);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// PromoteStackTemporaries - Turn any stack temporaries we introduced into
+/// registers if possible.
+bool DwarfEHPrepare::PromoteStackTemporaries() {
+ if (ExceptionValueVar && DT && DF && isAllocaPromotable(ExceptionValueVar)) {
+ // Turn the exception temporary into registers and phi nodes if possible.
+ std::vector<AllocaInst*> Allocas(1, ExceptionValueVar);
+ PromoteMemToReg(Allocas, *DT, *DF);
+ return true;
+ }
+ return false;
+}
+
+/// CreateExceptionValueCall - Insert a call to the eh.exception intrinsic at
+/// the start of the basic block (unless there already is one, in which case
+/// the existing call is returned).
+Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) {
+ Instruction *Start = BB->getFirstNonPHI();
+ // Is this a call to eh.exception?
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Start))
+ if (CI->getIntrinsicID() == Intrinsic::eh_exception)
+ // Reuse the existing call.
+ return Start;
+
+ // Find the eh.exception intrinsic if we didn't already.
+ if (!ExceptionValueIntrinsic)
+ ExceptionValueIntrinsic = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::eh_exception);
+
+ // Create the call.
+ return CallInst::Create(ExceptionValueIntrinsic, "eh.value.call", Start);
+}
+
+/// CreateValueLoad - Insert a load of the exception value stack variable
+/// (creating it if necessary) at the start of the basic block (unless
+/// there already is a load, in which case the existing load is returned).
+Instruction *DwarfEHPrepare::CreateValueLoad(BasicBlock *BB) {
+ Instruction *Start = BB->getFirstNonPHI();
+ // Is this a load of the exception temporary?
+ if (ExceptionValueVar)
+ if (LoadInst* LI = dyn_cast<LoadInst>(Start))
+ if (LI->getPointerOperand() == ExceptionValueVar)
+ // Reuse the existing load.
+ return Start;
+
+ // Create the temporary if we didn't already.
+ if (!ExceptionValueVar) {
+ ExceptionValueVar = new AllocaInst(PointerType::getUnqual(Type::Int8Ty),
+ "eh.value", F->begin()->begin());
+ ++NumStackTempsIntroduced;
+ }
+
+ // Load the value.
+ return new LoadInst(ExceptionValueVar, "eh.value.load", Start);
+}
+
+bool DwarfEHPrepare::runOnFunction(Function &Fn) {
+ bool Changed = false;
+
+ // Initialize internal state.
+ DT = getAnalysisIfAvailable<DominatorTree>();
+ DF = getAnalysisIfAvailable<DominanceFrontier>();
+ ExceptionValueVar = 0;
+ F = &Fn;
+
+ // Ensure that only unwind edges end at landing pads (a landing pad is a
+ // basic block where an invoke unwind edge ends).
+ Changed |= NormalizeLandingPads();
+
+ // Turn unwind instructions into libcalls.
+ Changed |= LowerUnwinds();
+
+ // TODO: Move eh.selector calls to landing pads and combine them.
+
+ // Move eh.exception calls to landing pads.
+ Changed |= MoveExceptionValueCalls();
+
+ // Initialize any stack temporaries we introduced.
+ Changed |= FinishStackTemporaries();
+
+ // Turn any stack temporaries into registers if possible.
+ if (!CompileFast)
+ Changed |= PromoteStackTemporaries();
+
+ LandingPads.clear();
+
+ return Changed;
+}
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
new file mode 100644
index 0000000..7cc1162
--- /dev/null
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -0,0 +1,575 @@
+//===-- ELFWriter.cpp - Target-independent ELF Writer code ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the target-independent ELF writer. This file writes out
+// the ELF file in the following order:
+//
+// #1. ELF Header
+// #2. '.text' section
+// #3. '.data' section
+// #4. '.bss' section (conceptual position in file)
+// ...
+// #X. '.shstrtab' section
+// #Y. Section Table
+//
+// The entries in the section table are laid out as:
+// #0. Null entry [required]
+// #1. ".text" entry - the program code
+// #2. ".data" entry - global variables with initializers. [ if needed ]
+// #3. ".bss" entry - global variables without initializers. [ if needed ]
+// ...
+// #N. ".shstrtab" entry - String table for the section names.
+//
+// NOTE: This code should eventually be extended to support 64-bit ELF (this
+// won't be hard), but we haven't done so yet!
+//
+//===----------------------------------------------------------------------===//
+
+#include "ELFWriter.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/FileWriters.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/OutputBuffer.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
+#include <list>
+using namespace llvm;
+
+char ELFWriter::ID = 0;
+/// AddELFWriter - Concrete function to add the ELF writer to the function pass
+/// manager.
+MachineCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM,
+ raw_ostream &O,
+ TargetMachine &TM) {
+ ELFWriter *EW = new ELFWriter(O, TM);
+ PM.add(EW);
+ return &EW->getMachineCodeEmitter();
+}
+
+//===----------------------------------------------------------------------===//
+// ELFCodeEmitter Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+ /// ELFCodeEmitter - This class is used by the ELFWriter to emit the code for
+ /// functions to the ELF file.
+ class ELFCodeEmitter : public MachineCodeEmitter {
+ ELFWriter &EW;
+ TargetMachine &TM;
+ ELFWriter::ELFSection *ES; // Section to write to.
+ std::vector<unsigned char> *OutBuffer;
+ size_t FnStart;
+ public:
+ explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM), OutBuffer(0) {}
+
+ void startFunction(MachineFunction &F);
+ bool finishFunction(MachineFunction &F);
+
+ void addRelocation(const MachineRelocation &MR) {
+ assert(0 && "relo not handled yet!");
+ }
+
+ virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
+ }
+
+ virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const {
+ assert(0 && "CP not implementated yet!");
+ return 0;
+ }
+ virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const {
+ assert(0 && "JT not implementated yet!");
+ return 0;
+ }
+
+ virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
+ assert(0 && "JT not implementated yet!");
+ return 0;
+ }
+
+ virtual uintptr_t getLabelAddress(uint64_t Label) const {
+ assert(0 && "Label address not implementated yet!");
+ abort();
+ return 0;
+ }
+
+ virtual void emitLabel(uint64_t LabelID) {
+ assert(0 && "emit Label not implementated yet!");
+ abort();
+ }
+
+
+ virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) { }
+
+
+ /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE!
+ void startGVStub(const GlobalValue* F, unsigned StubSize,
+ unsigned Alignment = 1) {
+ assert(0 && "JIT specific function called!");
+ abort();
+ }
+ void startGVStub(const GlobalValue* F, void *Buffer, unsigned StubSize) {
+ assert(0 && "JIT specific function called!");
+ abort();
+ }
+ void *finishGVStub(const GlobalValue *F) {
+ assert(0 && "JIT specific function called!");
+ abort();
+ return 0;
+ }
+ };
+}
+
+/// startFunction - This callback is invoked when a new machine function is
+/// about to be emitted.
+void ELFCodeEmitter::startFunction(MachineFunction &F) {
+ // Align the output buffer to the appropriate alignment.
+ unsigned Align = 16; // FIXME: GENERICIZE!!
+ // Get the ELF Section that this function belongs in.
+ ES = &EW.getSection(".text", ELFWriter::ELFSection::SHT_PROGBITS,
+ ELFWriter::ELFSection::SHF_EXECINSTR |
+ ELFWriter::ELFSection::SHF_ALLOC);
+ OutBuffer = &ES->SectionData;
+ cerr << "FIXME: This code needs to be updated for changes in the "
+ << "CodeEmitter interfaces. In particular, this should set "
+ << "BufferBegin/BufferEnd/CurBufferPtr, not deal with OutBuffer!";
+ abort();
+
+ // Upgrade the section alignment if required.
+ if (ES->Align < Align) ES->Align = Align;
+
+ // Add padding zeros to the end of the buffer to make sure that the
+ // function will start on the correct byte alignment within the section.
+ OutputBuffer OB(*OutBuffer,
+ TM.getTargetData()->getPointerSizeInBits() == 64,
+ TM.getTargetData()->isLittleEndian());
+ OB.align(Align);
+ FnStart = OutBuffer->size();
+}
+
+/// finishFunction - This callback is invoked after the function is completely
+/// finished.
+bool ELFCodeEmitter::finishFunction(MachineFunction &F) {
+ // We now know the size of the function, add a symbol to represent it.
+ ELFWriter::ELFSym FnSym(F.getFunction());
+
+ // Figure out the binding (linkage) of the symbol.
+ switch (F.getFunction()->getLinkage()) {
+ default:
+ // appending linkage is illegal for functions.
+ assert(0 && "Unknown linkage type!");
+ case GlobalValue::ExternalLinkage:
+ FnSym.SetBind(ELFWriter::ELFSym::STB_GLOBAL);
+ break;
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ FnSym.SetBind(ELFWriter::ELFSym::STB_WEAK);
+ break;
+ case GlobalValue::PrivateLinkage:
+ assert (0 && "PrivateLinkage should not be in the symbol table.");
+ case GlobalValue::InternalLinkage:
+ FnSym.SetBind(ELFWriter::ELFSym::STB_LOCAL);
+ break;
+ }
+
+ ES->Size = OutBuffer->size();
+
+ FnSym.SetType(ELFWriter::ELFSym::STT_FUNC);
+ FnSym.SectionIdx = ES->SectionIdx;
+ FnSym.Value = FnStart; // Value = Offset from start of Section.
+ FnSym.Size = OutBuffer->size()-FnStart;
+
+ // Finally, add it to the symtab.
+ EW.SymbolTable.push_back(FnSym);
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// ELFWriter Implementation
+//===----------------------------------------------------------------------===//
+
+ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
+ : MachineFunctionPass(&ID), O(o), TM(tm) {
+ e_flags = 0; // e_flags defaults to 0, no flags.
+
+ is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
+ isLittleEndian = TM.getTargetData()->isLittleEndian();
+
+ // Create the machine code emitter object for this target.
+ MCE = new ELFCodeEmitter(*this);
+ NumSections = 0;
+}
+
+ELFWriter::~ELFWriter() {
+ delete MCE;
+}
+
+// doInitialization - Emit the file header and all of the global variables for
+// the module to the ELF file.
+bool ELFWriter::doInitialization(Module &M) {
+ Mang = new Mangler(M);
+
+ // Local alias to shortenify coming code.
+ std::vector<unsigned char> &FH = FileHeader;
+ OutputBuffer FHOut(FH, is64Bit, isLittleEndian);
+
+ FHOut.outbyte(0x7F); // EI_MAG0
+ FHOut.outbyte('E'); // EI_MAG1
+ FHOut.outbyte('L'); // EI_MAG2
+ FHOut.outbyte('F'); // EI_MAG3
+ FHOut.outbyte(is64Bit ? 2 : 1); // EI_CLASS
+ FHOut.outbyte(isLittleEndian ? 1 : 2); // EI_DATA
+ FHOut.outbyte(1); // EI_VERSION
+ FH.resize(16); // EI_PAD up to 16 bytes.
+
+ // This should change for shared objects.
+ FHOut.outhalf(1); // e_type = ET_REL
+ FHOut.outhalf(TM.getELFWriterInfo()->getEMachine()); // target-defined
+ FHOut.outword(1); // e_version = 1
+ FHOut.outaddr(0); // e_entry = 0 -> no entry point in .o file
+ FHOut.outaddr(0); // e_phoff = 0 -> no program header for .o
+
+ ELFHeader_e_shoff_Offset = FH.size();
+ FHOut.outaddr(0); // e_shoff
+ FHOut.outword(e_flags); // e_flags = whatever the target wants
+
+ FHOut.outhalf(is64Bit ? 64 : 52); // e_ehsize = ELF header size
+ FHOut.outhalf(0); // e_phentsize = prog header entry size
+ FHOut.outhalf(0); // e_phnum = # prog header entries = 0
+ FHOut.outhalf(is64Bit ? 64 : 40); // e_shentsize = sect hdr entry size
+
+
+ ELFHeader_e_shnum_Offset = FH.size();
+ FHOut.outhalf(0); // e_shnum = # of section header ents
+ ELFHeader_e_shstrndx_Offset = FH.size();
+ FHOut.outhalf(0); // e_shstrndx = Section # of '.shstrtab'
+
+ // Add the null section, which is required to be first in the file.
+ getSection("", 0, 0);
+
+ // Start up the symbol table. The first entry in the symtab is the null
+ // entry.
+ SymbolTable.push_back(ELFSym(0));
+
+ return false;
+}
+
+void ELFWriter::EmitGlobal(GlobalVariable *GV) {
+ // If this is an external global, emit it now. TODO: Note that it would be
+ // better to ignore the symbol here and only add it to the symbol table if
+ // referenced.
+ if (!GV->hasInitializer()) {
+ ELFSym ExternalSym(GV);
+ ExternalSym.SetBind(ELFSym::STB_GLOBAL);
+ ExternalSym.SetType(ELFSym::STT_NOTYPE);
+ ExternalSym.SectionIdx = ELFSection::SHN_UNDEF;
+ SymbolTable.push_back(ExternalSym);
+ return;
+ }
+
+ unsigned Align = TM.getTargetData()->getPreferredAlignment(GV);
+ unsigned Size =
+ TM.getTargetData()->getTypeAllocSize(GV->getType()->getElementType());
+
+ // If this global has a zero initializer, it is part of the .bss or common
+ // section.
+ if (GV->getInitializer()->isNullValue()) {
+ // If this global is part of the common block, add it now. Variables are
+ // part of the common block if they are zero initialized and allowed to be
+ // merged with other symbols.
+ if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() ||
+ GV->hasCommonLinkage()) {
+ ELFSym CommonSym(GV);
+ // Value for common symbols is the alignment required.
+ CommonSym.Value = Align;
+ CommonSym.Size = Size;
+ CommonSym.SetBind(ELFSym::STB_GLOBAL);
+ CommonSym.SetType(ELFSym::STT_OBJECT);
+ // TODO SOMEDAY: add ELF visibility.
+ CommonSym.SectionIdx = ELFSection::SHN_COMMON;
+ SymbolTable.push_back(CommonSym);
+ return;
+ }
+
+ // Otherwise, this symbol is part of the .bss section. Emit it now.
+
+ // Handle alignment. Ensure section is aligned at least as much as required
+ // by this symbol.
+ ELFSection &BSSSection = getBSSSection();
+ BSSSection.Align = std::max(BSSSection.Align, Align);
+
+ // Within the section, emit enough virtual padding to get us to an alignment
+ // boundary.
+ if (Align)
+ BSSSection.Size = (BSSSection.Size + Align - 1) & ~(Align-1);
+
+ ELFSym BSSSym(GV);
+ BSSSym.Value = BSSSection.Size;
+ BSSSym.Size = Size;
+ BSSSym.SetType(ELFSym::STT_OBJECT);
+
+ switch (GV->getLinkage()) {
+ default: // weak/linkonce/common handled above
+ assert(0 && "Unexpected linkage type!");
+ case GlobalValue::AppendingLinkage: // FIXME: This should be improved!
+ case GlobalValue::ExternalLinkage:
+ BSSSym.SetBind(ELFSym::STB_GLOBAL);
+ break;
+ case GlobalValue::InternalLinkage:
+ BSSSym.SetBind(ELFSym::STB_LOCAL);
+ break;
+ }
+
+ // Set the idx of the .bss section
+ BSSSym.SectionIdx = BSSSection.SectionIdx;
+ if (!GV->hasPrivateLinkage())
+ SymbolTable.push_back(BSSSym);
+
+ // Reserve space in the .bss section for this symbol.
+ BSSSection.Size += Size;
+ return;
+ }
+
+ // FIXME: handle .rodata
+ //assert(!GV->isConstant() && "unimp");
+
+ // FIXME: handle .data
+ //assert(0 && "unimp");
+}
+
+
+bool ELFWriter::runOnMachineFunction(MachineFunction &MF) {
+ // Nothing to do here, this is all done through the MCE object above.
+ return false;
+}
+
+/// doFinalization - Now that the module has been completely processed, emit
+/// the ELF file to 'O'.
+bool ELFWriter::doFinalization(Module &M) {
+ // Okay, the ELF header and .text sections have been completed, build the
+ // .data, .bss, and "common" sections next.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ EmitGlobal(I);
+
+ // Emit the symbol table now, if non-empty.
+ EmitSymbolTable();
+
+ // FIXME: Emit the relocations now.
+
+ // Emit the string table for the sections in the ELF file we have.
+ EmitSectionTableStringTable();
+
+ // Emit the sections to the .o file, and emit the section table for the file.
+ OutputSectionsAndSectionTable();
+
+ // We are done with the abstract symbols.
+ SectionList.clear();
+ NumSections = 0;
+
+ // Release the name mangler object.
+ delete Mang; Mang = 0;
+ return false;
+}
+
+/// EmitSymbolTable - If the current symbol table is non-empty, emit the string
+/// table for it and then the symbol table itself.
+void ELFWriter::EmitSymbolTable() {
+ if (SymbolTable.size() == 1) return; // Only the null entry.
+
+ // FIXME: compact all local symbols to the start of the symtab.
+ unsigned FirstNonLocalSymbol = 1;
+
+ ELFSection &StrTab = getSection(".strtab", ELFSection::SHT_STRTAB, 0);
+ StrTab.Align = 1;
+
+ DataBuffer &StrTabBuf = StrTab.SectionData;
+ OutputBuffer StrTabOut(StrTabBuf, is64Bit, isLittleEndian);
+
+ // Set the zero'th symbol to a null byte, as required.
+ StrTabOut.outbyte(0);
+ SymbolTable[0].NameIdx = 0;
+ unsigned Index = 1;
+ for (unsigned i = 1, e = SymbolTable.size(); i != e; ++i) {
+ // Use the name mangler to uniquify the LLVM symbol.
+ std::string Name = Mang->getValueName(SymbolTable[i].GV);
+
+ if (Name.empty()) {
+ SymbolTable[i].NameIdx = 0;
+ } else {
+ SymbolTable[i].NameIdx = Index;
+
+ // Add the name to the output buffer, including the null terminator.
+ StrTabBuf.insert(StrTabBuf.end(), Name.begin(), Name.end());
+
+ // Add a null terminator.
+ StrTabBuf.push_back(0);
+
+ // Keep track of the number of bytes emitted to this section.
+ Index += Name.size()+1;
+ }
+ }
+ assert(Index == StrTabBuf.size());
+ StrTab.Size = Index;
+
+ // Now that we have emitted the string table and know the offset into the
+ // string table of each symbol, emit the symbol table itself.
+ ELFSection &SymTab = getSection(".symtab", ELFSection::SHT_SYMTAB, 0);
+ SymTab.Align = is64Bit ? 8 : 4;
+ SymTab.Link = SymTab.SectionIdx; // Section Index of .strtab.
+ SymTab.Info = FirstNonLocalSymbol; // First non-STB_LOCAL symbol.
+ SymTab.EntSize = 16; // Size of each symtab entry. FIXME: wrong for ELF64
+ DataBuffer &SymTabBuf = SymTab.SectionData;
+ OutputBuffer SymTabOut(SymTabBuf, is64Bit, isLittleEndian);
+
+ if (!is64Bit) { // 32-bit and 64-bit formats are shuffled a bit.
+ for (unsigned i = 0, e = SymbolTable.size(); i != e; ++i) {
+ ELFSym &Sym = SymbolTable[i];
+ SymTabOut.outword(Sym.NameIdx);
+ SymTabOut.outaddr32(Sym.Value);
+ SymTabOut.outword(Sym.Size);
+ SymTabOut.outbyte(Sym.Info);
+ SymTabOut.outbyte(Sym.Other);
+ SymTabOut.outhalf(Sym.SectionIdx);
+ }
+ } else {
+ for (unsigned i = 0, e = SymbolTable.size(); i != e; ++i) {
+ ELFSym &Sym = SymbolTable[i];
+ SymTabOut.outword(Sym.NameIdx);
+ SymTabOut.outbyte(Sym.Info);
+ SymTabOut.outbyte(Sym.Other);
+ SymTabOut.outhalf(Sym.SectionIdx);
+ SymTabOut.outaddr64(Sym.Value);
+ SymTabOut.outxword(Sym.Size);
+ }
+ }
+
+ SymTab.Size = SymTabBuf.size();
+}
+
+/// EmitSectionTableStringTable - This method adds and emits a section for the
+/// ELF Section Table string table: the string table that holds all of the
+/// section names.
+void ELFWriter::EmitSectionTableStringTable() {
+ // First step: add the section for the string table to the list of sections:
+ ELFSection &SHStrTab = getSection(".shstrtab", ELFSection::SHT_STRTAB, 0);
+
+ // Now that we know which section number is the .shstrtab section, update the
+ // e_shstrndx entry in the ELF header.
+ OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian);
+ FHOut.fixhalf(SHStrTab.SectionIdx, ELFHeader_e_shstrndx_Offset);
+
+ // Set the NameIdx of each section in the string table and emit the bytes for
+ // the string table.
+ unsigned Index = 0;
+ DataBuffer &Buf = SHStrTab.SectionData;
+
+ for (std::list<ELFSection>::iterator I = SectionList.begin(),
+ E = SectionList.end(); I != E; ++I) {
+ // Set the index into the table. Note if we have lots of entries with
+ // common suffixes, we could memoize them here if we cared.
+ I->NameIdx = Index;
+
+ // Add the name to the output buffer, including the null terminator.
+ Buf.insert(Buf.end(), I->Name.begin(), I->Name.end());
+
+ // Add a null terminator.
+ Buf.push_back(0);
+
+ // Keep track of the number of bytes emitted to this section.
+ Index += I->Name.size()+1;
+ }
+
+ // Set the size of .shstrtab now that we know what it is.
+ assert(Index == Buf.size());
+ SHStrTab.Size = Index;
+}
+
+/// OutputSectionsAndSectionTable - Now that we have constructed the file header
+/// and all of the sections, emit these to the ostream destination and emit the
+/// SectionTable.
+void ELFWriter::OutputSectionsAndSectionTable() {
+ // Pass #1: Compute the file offset for each section.
+ size_t FileOff = FileHeader.size(); // File header first.
+
+ // Emit all of the section data in order.
+ for (std::list<ELFSection>::iterator I = SectionList.begin(),
+ E = SectionList.end(); I != E; ++I) {
+ // Align FileOff to whatever the alignment restrictions of the section are.
+ if (I->Align)
+ FileOff = (FileOff+I->Align-1) & ~(I->Align-1);
+ I->Offset = FileOff;
+ FileOff += I->SectionData.size();
+ }
+
+ // Align Section Header.
+ unsigned TableAlign = is64Bit ? 8 : 4;
+ FileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
+
+ // Now that we know where all of the sections will be emitted, set the e_shnum
+ // entry in the ELF header.
+ OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian);
+ FHOut.fixhalf(NumSections, ELFHeader_e_shnum_Offset);
+
+ // Now that we know the offset in the file of the section table, update the
+ // e_shoff address in the ELF header.
+ FHOut.fixaddr(FileOff, ELFHeader_e_shoff_Offset);
+
+ // Now that we know all of the data in the file header, emit it and all of the
+ // sections!
+ O.write((char*)&FileHeader[0], FileHeader.size());
+ FileOff = FileHeader.size();
+ DataBuffer().swap(FileHeader);
+
+ DataBuffer Table;
+ OutputBuffer TableOut(Table, is64Bit, isLittleEndian);
+
+ // Emit all of the section data and build the section table itself.
+ while (!SectionList.empty()) {
+ const ELFSection &S = *SectionList.begin();
+
+ // Align FileOff to whatever the alignment restrictions of the section are.
+ if (S.Align)
+ for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1);
+ FileOff != NewFileOff; ++FileOff)
+ O << (char)0xAB;
+ O.write((char*)&S.SectionData[0], S.SectionData.size());
+ FileOff += S.SectionData.size();
+
+ TableOut.outword(S.NameIdx); // sh_name - Symbol table name idx
+ TableOut.outword(S.Type); // sh_type - Section contents & semantics
+ TableOut.outword(S.Flags); // sh_flags - Section flags.
+ TableOut.outaddr(S.Addr); // sh_addr - The mem addr this section is in.
+ TableOut.outaddr(S.Offset); // sh_offset - Offset from the file start.
+ TableOut.outword(S.Size); // sh_size - The section size.
+ TableOut.outword(S.Link); // sh_link - Section header table index link.
+ TableOut.outword(S.Info); // sh_info - Auxillary information.
+ TableOut.outword(S.Align); // sh_addralign - Alignment of section.
+ TableOut.outword(S.EntSize); // sh_entsize - Size of entries in the section
+
+ SectionList.pop_front();
+ }
+
+ // Align output for the section table.
+ for (size_t NewFileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
+ FileOff != NewFileOff; ++FileOff)
+ O << (char)0xAB;
+
+ // Emit the section table itself.
+ O.write((char*)&Table[0], Table.size());
+}
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
new file mode 100644
index 0000000..31aa05a
--- /dev/null
+++ b/lib/CodeGen/ELFWriter.h
@@ -0,0 +1,230 @@
+//===-- ELFWriter.h - Target-independent ELF writer support -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ELFWriter class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ELFWRITER_H
+#define ELFWRITER_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include <list>
+#include <map>
+
+namespace llvm {
+ class GlobalVariable;
+ class Mangler;
+ class MachineCodeEmitter;
+ class ELFCodeEmitter;
+ class raw_ostream;
+
+ /// ELFWriter - This class implements the common target-independent code for
+ /// writing ELF files. Targets should derive a class from this to
+ /// parameterize the output format.
+ ///
+ class ELFWriter : public MachineFunctionPass {
+ friend class ELFCodeEmitter;
+ public:
+ static char ID;
+
+ MachineCodeEmitter &getMachineCodeEmitter() const {
+ return *(MachineCodeEmitter*)MCE;
+ }
+
+ ELFWriter(raw_ostream &O, TargetMachine &TM);
+ ~ELFWriter();
+
+ typedef std::vector<unsigned char> DataBuffer;
+
+ protected:
+ /// Output stream to send the resultant object file to.
+ ///
+ raw_ostream &O;
+
+ /// Target machine description.
+ ///
+ TargetMachine &TM;
+
+ /// Mang - The object used to perform name mangling for this module.
+ ///
+ Mangler *Mang;
+
+ /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
+ /// code for functions to the .o file.
+ ELFCodeEmitter *MCE;
+
+ //===------------------------------------------------------------------===//
+ // Properties to be set by the derived class ctor, used to configure the
+ // ELFWriter.
+
+ // e_machine - This field is the target specific value to emit as the
+ // e_machine member of the ELF header.
+ unsigned short e_machine;
+
+ // e_flags - The machine flags for the target. This defaults to zero.
+ unsigned e_flags;
+
+ //===------------------------------------------------------------------===//
+ // Properties inferred automatically from the target machine.
+ //
+
+ /// is64Bit/isLittleEndian - This information is inferred from the target
+ /// machine directly, indicating whether to emit a 32- or 64-bit ELF file.
+ bool is64Bit, isLittleEndian;
+
+ /// doInitialization - Emit the file header and all of the global variables
+ /// for the module to the ELF file.
+ bool doInitialization(Module &M);
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+
+ /// doFinalization - Now that the module has been completely processed, emit
+ /// the ELF file to 'O'.
+ bool doFinalization(Module &M);
+
+ private:
+ // The buffer we accumulate the file header into. Note that this should be
+ // changed into something much more efficient later (and the bitcode writer
+ // as well!).
+ DataBuffer FileHeader;
+
+ /// ELFSection - This struct contains information about each section that is
+ /// emitted to the file. This is eventually turned into the section header
+ /// table at the end of the file.
+ struct ELFSection {
+ std::string Name; // Name of the section.
+ unsigned NameIdx; // Index in .shstrtab of name, once emitted.
+ unsigned Type;
+ unsigned Flags;
+ uint64_t Addr;
+ unsigned Offset;
+ unsigned Size;
+ unsigned Link;
+ unsigned Info;
+ unsigned Align;
+ unsigned EntSize;
+
+ /// SectionIdx - The number of the section in the Section Table.
+ ///
+ unsigned short SectionIdx;
+
+ /// SectionData - The actual data for this section which we are building
+ /// up for emission to the file.
+ DataBuffer SectionData;
+
+ enum { SHT_NULL = 0, SHT_PROGBITS = 1, SHT_SYMTAB = 2, SHT_STRTAB = 3,
+ SHT_RELA = 4, SHT_HASH = 5, SHT_DYNAMIC = 6, SHT_NOTE = 7,
+ SHT_NOBITS = 8, SHT_REL = 9, SHT_SHLIB = 10, SHT_DYNSYM = 11 };
+ enum { SHN_UNDEF = 0, SHN_ABS = 0xFFF1, SHN_COMMON = 0xFFF2 };
+ enum { // SHF - ELF Section Header Flags
+ SHF_WRITE = 1 << 0, // Writable
+ SHF_ALLOC = 1 << 1, // Mapped into the process addr space
+ SHF_EXECINSTR = 1 << 2, // Executable
+ SHF_MERGE = 1 << 4, // Might be merged if equal
+ SHF_STRINGS = 1 << 5, // Contains null-terminated strings
+ SHF_INFO_LINK = 1 << 6, // 'sh_info' contains SHT index
+ SHF_LINK_ORDER = 1 << 7, // Preserve order after combining
+ SHF_OS_NONCONFORMING = 1 << 8, // nonstandard OS support required
+ SHF_GROUP = 1 << 9, // Section is a member of a group
+ SHF_TLS = 1 << 10 // Section holds thread-local data
+ };
+
+ ELFSection(const std::string &name)
+ : Name(name), Type(0), Flags(0), Addr(0), Offset(0), Size(0),
+ Link(0), Info(0), Align(0), EntSize(0) {
+ }
+ };
+
+ /// SectionList - This is the list of sections that we have emitted to the
+ /// file. Once the file has been completely built, the section header table
+ /// is constructed from this info.
+ std::list<ELFSection> SectionList;
+ unsigned NumSections; // Always = SectionList.size()
+
+ /// SectionLookup - This is a mapping from section name to section number in
+ /// the SectionList.
+ std::map<std::string, ELFSection*> SectionLookup;
+
+ /// getSection - Return the section with the specified name, creating a new
+ /// section if one does not already exist.
+ ELFSection &getSection(const std::string &Name,
+ unsigned Type, unsigned Flags = 0) {
+ ELFSection *&SN = SectionLookup[Name];
+ if (SN) return *SN;
+
+ SectionList.push_back(Name);
+ SN = &SectionList.back();
+ SN->SectionIdx = NumSections++;
+ SN->Type = Type;
+ SN->Flags = Flags;
+ return *SN;
+ }
+
+ ELFSection &getDataSection() {
+ return getSection(".data", ELFSection::SHT_PROGBITS,
+ ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC);
+ }
+ ELFSection &getBSSSection() {
+ return getSection(".bss", ELFSection::SHT_NOBITS,
+ ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC);
+ }
+
+ /// ELFSym - This struct contains information about each symbol that is
+ /// added to logical symbol table for the module. This is eventually
+ /// turned into a real symbol table in the file.
+ struct ELFSym {
+ const GlobalValue *GV; // The global value this corresponds to.
+ unsigned NameIdx; // Index in .strtab of name, once emitted.
+ uint64_t Value;
+ unsigned Size;
+ unsigned char Info;
+ unsigned char Other;
+ unsigned short SectionIdx;
+
+ enum { STB_LOCAL = 0, STB_GLOBAL = 1, STB_WEAK = 2 };
+ enum { STT_NOTYPE = 0, STT_OBJECT = 1, STT_FUNC = 2, STT_SECTION = 3,
+ STT_FILE = 4 };
+ ELFSym(const GlobalValue *gv) : GV(gv), Value(0), Size(0), Info(0),
+ Other(0), SectionIdx(0) {}
+
+ void SetBind(unsigned X) {
+ assert(X == (X & 0xF) && "Bind value out of range!");
+ Info = (Info & 0x0F) | (X << 4);
+ }
+ void SetType(unsigned X) {
+ assert(X == (X & 0xF) && "Type value out of range!");
+ Info = (Info & 0xF0) | X;
+ }
+ };
+
+ /// SymbolTable - This is the list of symbols we have emitted to the file.
+ /// This actually gets rearranged before emission to the file (to put the
+ /// local symbols first in the list).
+ std::vector<ELFSym> SymbolTable;
+
+ // As we complete the ELF file, we need to update fields in the ELF header
+ // (e.g. the location of the section table). These members keep track of
+ // the offset in ELFHeader of these various pieces to update and other
+ // locations in the file.
+ unsigned ELFHeader_e_shoff_Offset; // e_shoff in ELF header.
+ unsigned ELFHeader_e_shstrndx_Offset; // e_shstrndx in ELF header.
+ unsigned ELFHeader_e_shnum_Offset; // e_shnum in ELF header.
+ private:
+ void EmitGlobal(GlobalVariable *GV);
+
+ void EmitSymbolTable();
+
+ void EmitSectionTableStringTable();
+ void OutputSectionsAndSectionTable();
+ };
+}
+
+#endif
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
new file mode 100644
index 0000000..cf2ebb3
--- /dev/null
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -0,0 +1,212 @@
+//===-- GCMetadata.cpp - Garbage collector metadata -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the GCFunctionInfo class and GCModuleInfo pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Compiler.h"
+
+using namespace llvm;
+
+namespace {
+
+ class VISIBILITY_HIDDEN Printer : public FunctionPass {
+ static char ID;
+ std::ostream &OS;
+
+ public:
+ explicit Printer(std::ostream &OS = *cerr);
+
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnFunction(Function &F);
+ };
+
+ class VISIBILITY_HIDDEN Deleter : public FunctionPass {
+ static char ID;
+
+ public:
+ Deleter();
+
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnFunction(Function &F);
+ bool doFinalization(Module &M);
+ };
+
+}
+
+static RegisterPass<GCModuleInfo>
+X("collector-metadata", "Create Garbage Collector Module Metadata");
+
+// -----------------------------------------------------------------------------
+
+GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S)
+ : F(F), S(S), FrameSize(~0LL) {}
+
+GCFunctionInfo::~GCFunctionInfo() {}
+
+// -----------------------------------------------------------------------------
+
+char GCModuleInfo::ID = 0;
+
+GCModuleInfo::GCModuleInfo()
+ : ImmutablePass(&ID) {}
+
+GCModuleInfo::~GCModuleInfo() {
+ clear();
+}
+
+GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M,
+ const std::string &Name) {
+ const char *Start = Name.c_str();
+
+ strategy_map_type::iterator NMI =
+ StrategyMap.find(Start, Start + Name.size());
+ if (NMI != StrategyMap.end())
+ return NMI->getValue();
+
+ for (GCRegistry::iterator I = GCRegistry::begin(),
+ E = GCRegistry::end(); I != E; ++I) {
+ if (strcmp(Start, I->getName()) == 0) {
+ GCStrategy *S = I->instantiate();
+ S->M = M;
+ S->Name = Name;
+ StrategyMap.GetOrCreateValue(Start, Start + Name.size()).setValue(S);
+ StrategyList.push_back(S);
+ return S;
+ }
+ }
+
+ cerr << "unsupported GC: " << Name << "\n";
+ abort();
+}
+
+GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
+ assert(!F.isDeclaration() && "Can only get GCFunctionInfo for a definition!");
+ assert(F.hasGC());
+
+ finfo_map_type::iterator I = FInfoMap.find(&F);
+ if (I != FInfoMap.end())
+ return *I->second;
+
+ GCStrategy *S = getOrCreateStrategy(F.getParent(), F.getGC());
+ GCFunctionInfo *GFI = S->insertFunctionInfo(F);
+ FInfoMap[&F] = GFI;
+ return *GFI;
+}
+
+void GCModuleInfo::clear() {
+ FInfoMap.clear();
+ StrategyMap.clear();
+
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+ StrategyList.clear();
+}
+
+// -----------------------------------------------------------------------------
+
+char Printer::ID = 0;
+
+FunctionPass *llvm::createGCInfoPrinter(std::ostream &OS) {
+ return new Printer(OS);
+}
+
+Printer::Printer(std::ostream &OS)
+ : FunctionPass(&ID), OS(OS) {}
+
+const char *Printer::getPassName() const {
+ return "Print Garbage Collector Information";
+}
+
+void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<GCModuleInfo>();
+}
+
+static const char *DescKind(GC::PointKind Kind) {
+ switch (Kind) {
+ default: assert(0 && "Unknown GC point kind");
+ case GC::Loop: return "loop";
+ case GC::Return: return "return";
+ case GC::PreCall: return "pre-call";
+ case GC::PostCall: return "post-call";
+ }
+}
+
+bool Printer::runOnFunction(Function &F) {
+ if (!F.hasGC()) {
+ GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+
+ OS << "GC roots for " << FD->getFunction().getNameStart() << ":\n";
+ for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
+ RE = FD->roots_end(); RI != RE; ++RI)
+ OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
+
+ OS << "GC safe points for " << FD->getFunction().getNameStart() << ":\n";
+ for (GCFunctionInfo::iterator PI = FD->begin(),
+ PE = FD->end(); PI != PE; ++PI) {
+
+ OS << "\tlabel " << PI->Num << ": " << DescKind(PI->Kind) << ", live = {";
+
+ for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI),
+ RE = FD->live_end(PI);;) {
+ OS << " " << RI->Num;
+ if (++RI == RE)
+ break;
+ OS << ",";
+ }
+
+ OS << " }\n";
+ }
+ }
+
+ return false;
+}
+
+// -----------------------------------------------------------------------------
+
+char Deleter::ID = 0;
+
+FunctionPass *llvm::createGCInfoDeleter() {
+ return new Deleter();
+}
+
+Deleter::Deleter() : FunctionPass(&ID) {}
+
+const char *Deleter::getPassName() const {
+ return "Delete Garbage Collector Information";
+}
+
+void Deleter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<GCModuleInfo>();
+}
+
+bool Deleter::runOnFunction(Function &MF) {
+ return false;
+}
+
+bool Deleter::doFinalization(Module &M) {
+ GCModuleInfo *GMI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(GMI && "Deleter didn't require GCModuleInfo?!");
+ GMI->clear();
+ return false;
+}
diff --git a/lib/CodeGen/GCMetadataPrinter.cpp b/lib/CodeGen/GCMetadataPrinter.cpp
new file mode 100644
index 0000000..5a5ef84
--- /dev/null
+++ b/lib/CodeGen/GCMetadataPrinter.cpp
@@ -0,0 +1,30 @@
+//===-- GCMetadataPrinter.cpp - Garbage collection infrastructure ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the abstract base class GCMetadataPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+
+using namespace llvm;
+
+GCMetadataPrinter::GCMetadataPrinter() { }
+
+GCMetadataPrinter::~GCMetadataPrinter() { }
+
+void GCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI) {
+ // Default is no action.
+}
+
+void GCMetadataPrinter::finishAssembly(raw_ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI) {
+ // Default is no action.
+}
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
new file mode 100644
index 0000000..ad7421a
--- /dev/null
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -0,0 +1,392 @@
+//===-- GCStrategy.cpp - Garbage collection infrastructure -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements target- and collector-independent garbage collection
+// infrastructure.
+//
+// MachineCodeAnalysis identifies the GC safe points in the machine code. Roots
+// are identified in SelectionDAGISel.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+
+using namespace llvm;
+
+namespace {
+
+ /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or
+ /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as
+ /// directed by the GCStrategy. It also performs automatic root initialization
+ /// and custom intrinsic lowering.
+ class VISIBILITY_HIDDEN LowerIntrinsics : public FunctionPass {
+ static bool NeedsDefaultLoweringPass(const GCStrategy &C);
+ static bool NeedsCustomLoweringPass(const GCStrategy &C);
+ static bool CouldBecomeSafePoint(Instruction *I);
+ bool PerformDefaultLowering(Function &F, GCStrategy &Coll);
+ static bool InsertRootInitializers(Function &F,
+ AllocaInst **Roots, unsigned Count);
+
+ public:
+ static char ID;
+
+ LowerIntrinsics();
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool doInitialization(Module &M);
+ bool runOnFunction(Function &F);
+ };
+
+
+ /// MachineCodeAnalysis - This is a target-independent pass over the machine
+ /// function representation to identify safe points for the garbage collector
+ /// in the machine code. It inserts labels at safe points and populates a
+ /// GCMetadata record for each function.
+ class VISIBILITY_HIDDEN MachineCodeAnalysis : public MachineFunctionPass {
+ const TargetMachine *TM;
+ GCFunctionInfo *FI;
+ MachineModuleInfo *MMI;
+ const TargetInstrInfo *TII;
+
+ void FindSafePoints(MachineFunction &MF);
+ void VisitCallPoint(MachineBasicBlock::iterator MI);
+ unsigned InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+ void FindStackOffsets(MachineFunction &MF);
+
+ public:
+ static char ID;
+
+ MachineCodeAnalysis();
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnMachineFunction(MachineFunction &MF);
+ };
+
+}
+
+// -----------------------------------------------------------------------------
+
+GCStrategy::GCStrategy() :
+ NeededSafePoints(0),
+ CustomReadBarriers(false),
+ CustomWriteBarriers(false),
+ CustomRoots(false),
+ InitRoots(true),
+ UsesMetadata(false)
+{}
+
+GCStrategy::~GCStrategy() {
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+
+ Functions.clear();
+}
+
+bool GCStrategy::initializeCustomLowering(Module &M) { return false; }
+
+bool GCStrategy::performCustomLowering(Function &F) {
+ cerr << "gc " << getName() << " must override performCustomLowering.\n";
+ abort();
+ return 0;
+}
+
+GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) {
+ GCFunctionInfo *FI = new GCFunctionInfo(F, *this);
+ Functions.push_back(FI);
+ return FI;
+}
+
+// -----------------------------------------------------------------------------
+
+FunctionPass *llvm::createGCLoweringPass() {
+ return new LowerIntrinsics();
+}
+
+char LowerIntrinsics::ID = 0;
+
+LowerIntrinsics::LowerIntrinsics()
+ : FunctionPass(&ID) {}
+
+const char *LowerIntrinsics::getPassName() const {
+ return "Lower Garbage Collection Instructions";
+}
+
+void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<GCModuleInfo>();
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now.
+bool LowerIntrinsics::doInitialization(Module &M) {
+ // FIXME: This is rather antisocial in the context of a JIT since it performs
+ // work against the entire module. But this cannot be done at
+ // runFunction time (initializeCustomLowering likely needs to change
+ // the module).
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?");
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!I->isDeclaration() && I->hasGC())
+ MI->getFunctionInfo(*I); // Instantiate the GC strategy.
+
+ bool MadeChange = false;
+ for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
+ if (NeedsCustomLoweringPass(**I))
+ if ((*I)->initializeCustomLowering(M))
+ MadeChange = true;
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
+ unsigned Count) {
+ // Scroll past alloca instructions.
+ BasicBlock::iterator IP = F.getEntryBlock().begin();
+ while (isa<AllocaInst>(IP)) ++IP;
+
+ // Search for initializers in the initial BB.
+ SmallPtrSet<AllocaInst*,16> InitedRoots;
+ for (; !CouldBecomeSafePoint(IP); ++IP)
+ if (StoreInst *SI = dyn_cast<StoreInst>(IP))
+ if (AllocaInst *AI =
+ dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
+ InitedRoots.insert(AI);
+
+ // Add root initializers.
+ bool MadeChange = false;
+
+ for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
+ if (!InitedRoots.count(*I)) {
+ new StoreInst(ConstantPointerNull::get(cast<PointerType>(
+ cast<PointerType>((*I)->getType())->getElementType())),
+ *I, IP);
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::NeedsDefaultLoweringPass(const GCStrategy &C) {
+ // Default lowering is necessary only if read or write barriers have a default
+ // action. The default for roots is no action.
+ return !C.customWriteBarrier()
+ || !C.customReadBarrier()
+ || C.initializeRoots();
+}
+
+bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) {
+ // Custom lowering is only necessary if enabled for some action.
+ return C.customWriteBarrier()
+ || C.customReadBarrier()
+ || C.customRoots();
+}
+
+/// CouldBecomeSafePoint - Predicate to conservatively determine whether the
+/// instruction could introduce a safe point.
+bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) {
+ // The natural definition of instructions which could introduce safe points
+ // are:
+ //
+ // - call, invoke (AfterCall, BeforeCall)
+ // - phis (Loops)
+ // - invoke, ret, unwind (Exit)
+ //
+ // However, instructions as seemingly inoccuous as arithmetic can become
+ // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead
+ // it is necessary to take a conservative approach.
+
+ if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) ||
+ isa<StoreInst>(I) || isa<LoadInst>(I))
+ return false;
+
+ // llvm.gcroot is safe because it doesn't do anything at runtime.
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ if (Function *F = CI->getCalledFunction())
+ if (unsigned IID = F->getIntrinsicID())
+ if (IID == Intrinsic::gcroot)
+ return false;
+
+ return true;
+}
+
+/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores.
+/// Leave gcroot intrinsics; the code generator needs to see those.
+bool LowerIntrinsics::runOnFunction(Function &F) {
+ // Quick exit for functions that do not use GC.
+ if (!F.hasGC())
+ return false;
+
+ GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+ GCStrategy &S = FI.getStrategy();
+
+ bool MadeChange = false;
+
+ if (NeedsDefaultLoweringPass(S))
+ MadeChange |= PerformDefaultLowering(F, S);
+
+ if (NeedsCustomLoweringPass(S))
+ MadeChange |= S.performCustomLowering(F);
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
+ bool LowerWr = !S.customWriteBarrier();
+ bool LowerRd = !S.customReadBarrier();
+ bool InitRoots = S.initializeRoots();
+
+ SmallVector<AllocaInst*,32> Roots;
+
+ bool MadeChange = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) {
+ Function *F = CI->getCalledFunction();
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::gcwrite:
+ if (LowerWr) {
+ // Replace a write barrier with a simple store.
+ Value *St = new StoreInst(CI->getOperand(1), CI->getOperand(3), CI);
+ CI->replaceAllUsesWith(St);
+ CI->eraseFromParent();
+ }
+ break;
+ case Intrinsic::gcread:
+ if (LowerRd) {
+ // Replace a read barrier with a simple load.
+ Value *Ld = new LoadInst(CI->getOperand(2), "", CI);
+ Ld->takeName(CI);
+ CI->replaceAllUsesWith(Ld);
+ CI->eraseFromParent();
+ }
+ break;
+ case Intrinsic::gcroot:
+ if (InitRoots) {
+ // Initialize the GC root, but do not delete the intrinsic. The
+ // backend needs the intrinsic to flag the stack slot.
+ Roots.push_back(cast<AllocaInst>(
+ CI->getOperand(1)->stripPointerCasts()));
+ }
+ break;
+ default:
+ continue;
+ }
+
+ MadeChange = true;
+ }
+ }
+ }
+
+ if (Roots.size())
+ MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
+
+ return MadeChange;
+}
+
+// -----------------------------------------------------------------------------
+
+FunctionPass *llvm::createGCMachineCodeAnalysisPass() {
+ return new MachineCodeAnalysis();
+}
+
+char MachineCodeAnalysis::ID = 0;
+
+MachineCodeAnalysis::MachineCodeAnalysis()
+ : MachineFunctionPass(&ID) {}
+
+const char *MachineCodeAnalysis::getPassName() const {
+ return "Analyze Machine Code For Garbage Collection";
+}
+
+void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<GCModuleInfo>();
+}
+
+unsigned MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ unsigned Label = MMI->NextLabelID();
+ // N.B. we assume that MI is *not* equal to the "end()" iterator.
+ BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(TargetInstrInfo::GC_LABEL)).addImm(Label);
+ return Label;
+}
+
+void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
+ // Find the return address (next instruction), too, so as to bracket the call
+ // instruction.
+ MachineBasicBlock::iterator RAI = CI;
+ ++RAI;
+
+ if (FI->getStrategy().needsSafePoint(GC::PreCall))
+ FI->addSafePoint(GC::PreCall, InsertLabel(*CI->getParent(), CI));
+
+ if (FI->getStrategy().needsSafePoint(GC::PostCall))
+ FI->addSafePoint(GC::PostCall, InsertLabel(*CI->getParent(), RAI));
+}
+
+void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
+ for (MachineFunction::iterator BBI = MF.begin(),
+ BBE = MF.end(); BBI != BBE; ++BBI)
+ for (MachineBasicBlock::iterator MI = BBI->begin(),
+ ME = BBI->end(); MI != ME; ++MI)
+ if (MI->getDesc().isCall())
+ VisitCallPoint(MI);
+}
+
+void MachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
+ const TargetRegisterInfo *TRI = TM->getRegisterInfo();
+ assert(TRI && "TargetRegisterInfo not available!");
+
+ for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(),
+ RE = FI->roots_end(); RI != RE; ++RI)
+ RI->StackOffset = TRI->getFrameIndexOffset(MF, RI->Num);
+}
+
+bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
+ // Quick exit for functions that do not use GC.
+ if (!MF.getFunction()->hasGC())
+ return false;
+
+ FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
+ if (!FI->getStrategy().needsSafePoints())
+ return false;
+
+ TM = &MF.getTarget();
+ MMI = &getAnalysis<MachineModuleInfo>();
+ TII = TM->getInstrInfo();
+
+ // Find the size of the stack frame.
+ FI->setFrameSize(MF.getFrameInfo()->getStackSize());
+
+ // Find all safe points.
+ FindSafePoints(MF);
+
+ // Find the stack offsets for all roots.
+ FindStackOffsets(MF);
+
+ return false;
+}
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
new file mode 100644
index 0000000..1d0887f
--- /dev/null
+++ b/lib/CodeGen/IfConversion.cpp
@@ -0,0 +1,1229 @@
+//===-- IfConversion.cpp - Machine code if conversion pass. ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine instruction level if-conversion pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ifcvt"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+// Hidden options for help debugging.
+static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden);
+static cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden);
+static cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden);
+static cl::opt<bool> DisableSimple("disable-ifcvt-simple",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
+ cl::init(false), cl::Hidden);
+
+STATISTIC(NumSimple, "Number of simple if-conversions performed");
+STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed");
+STATISTIC(NumTriangle, "Number of triangle if-conversions performed");
+STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed");
+STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");
+STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
+STATISTIC(NumDiamonds, "Number of diamond if-conversions performed");
+STATISTIC(NumIfConvBBs, "Number of if-converted blocks");
+STATISTIC(NumDupBBs, "Number of duplicated blocks");
+
+namespace {
+ class VISIBILITY_HIDDEN IfConverter : public MachineFunctionPass {
+ enum IfcvtKind {
+ ICNotClassfied, // BB data valid, but not classified.
+ ICSimpleFalse, // Same as ICSimple, but on the false path.
+ ICSimple, // BB is entry of an one split, no rejoin sub-CFG.
+ ICTriangleFRev, // Same as ICTriangleFalse, but false path rev condition.
+ ICTriangleRev, // Same as ICTriangle, but true path rev condition.
+ ICTriangleFalse, // Same as ICTriangle, but on the false path.
+ ICTriangle, // BB is entry of a triangle sub-CFG.
+ ICDiamond // BB is entry of a diamond sub-CFG.
+ };
+
+ /// BBInfo - One per MachineBasicBlock, this is used to cache the result
+ /// if-conversion feasibility analysis. This includes results from
+ /// TargetInstrInfo::AnalyzeBranch() (i.e. TBB, FBB, and Cond), and its
+ /// classification, and common tail block of its successors (if it's a
+ /// diamond shape), its size, whether it's predicable, and whether any
+ /// instruction can clobber the 'would-be' predicate.
+ ///
+ /// IsDone - True if BB is not to be considered for ifcvt.
+ /// IsBeingAnalyzed - True if BB is currently being analyzed.
+ /// IsAnalyzed - True if BB has been analyzed (info is still valid).
+ /// IsEnqueued - True if BB has been enqueued to be ifcvt'ed.
+ /// IsBrAnalyzable - True if AnalyzeBranch() returns false.
+ /// HasFallThrough - True if BB may fallthrough to the following BB.
+ /// IsUnpredicable - True if BB is known to be unpredicable.
+ /// ClobbersPred - True if BB could modify predicates (e.g. has
+ /// cmp, call, etc.)
+ /// NonPredSize - Number of non-predicated instructions.
+ /// BB - Corresponding MachineBasicBlock.
+ /// TrueBB / FalseBB- See AnalyzeBranch().
+ /// BrCond - Conditions for end of block conditional branches.
+ /// Predicate - Predicate used in the BB.
+ struct BBInfo {
+ bool IsDone : 1;
+ bool IsBeingAnalyzed : 1;
+ bool IsAnalyzed : 1;
+ bool IsEnqueued : 1;
+ bool IsBrAnalyzable : 1;
+ bool HasFallThrough : 1;
+ bool IsUnpredicable : 1;
+ bool CannotBeCopied : 1;
+ bool ClobbersPred : 1;
+ unsigned NonPredSize;
+ MachineBasicBlock *BB;
+ MachineBasicBlock *TrueBB;
+ MachineBasicBlock *FalseBB;
+ SmallVector<MachineOperand, 4> BrCond;
+ SmallVector<MachineOperand, 4> Predicate;
+ BBInfo() : IsDone(false), IsBeingAnalyzed(false),
+ IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
+ HasFallThrough(false), IsUnpredicable(false),
+ CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
+ BB(0), TrueBB(0), FalseBB(0) {}
+ };
+
+ /// IfcvtToken - Record information about pending if-conversions to attemp:
+ /// BBI - Corresponding BBInfo.
+ /// Kind - Type of block. See IfcvtKind.
+ /// NeedSubsumption - True if the to-be-predicated BB has already been
+ /// predicated.
+ /// NumDups - Number of instructions that would be duplicated due
+ /// to this if-conversion. (For diamonds, the number of
+ /// identical instructions at the beginnings of both
+ /// paths).
+ /// NumDups2 - For diamonds, the number of identical instructions
+ /// at the ends of both paths.
+ struct IfcvtToken {
+ BBInfo &BBI;
+ IfcvtKind Kind;
+ bool NeedSubsumption;
+ unsigned NumDups;
+ unsigned NumDups2;
+ IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0)
+ : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {}
+ };
+
+ /// Roots - Basic blocks that do not have successors. These are the starting
+ /// points of Graph traversal.
+ std::vector<MachineBasicBlock*> Roots;
+
+ /// BBAnalysis - Results of if-conversion feasibility analysis indexed by
+ /// basic block number.
+ std::vector<BBInfo> BBAnalysis;
+
+ const TargetLowering *TLI;
+ const TargetInstrInfo *TII;
+ bool MadeChange;
+ public:
+ static char ID;
+ IfConverter() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char *getPassName() const { return "If Converter"; }
+
+ private:
+ bool ReverseBranchCondition(BBInfo &BBI);
+ bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const;
+ bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ bool FalseBranch, unsigned &Dups) const;
+ bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2) const;
+ void ScanInstructions(BBInfo &BBI);
+ BBInfo &AnalyzeBlock(MachineBasicBlock *BB,
+ std::vector<IfcvtToken*> &Tokens);
+ bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond,
+ bool isTriangle = false, bool RevBranch = false);
+ bool AnalyzeBlocks(MachineFunction &MF,
+ std::vector<IfcvtToken*> &Tokens);
+ void InvalidatePreds(MachineBasicBlock *BB);
+ void RemoveExtraEdges(BBInfo &BBI);
+ bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
+ bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
+ bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2);
+ void PredicateBlock(BBInfo &BBI,
+ MachineBasicBlock::iterator E,
+ SmallVectorImpl<MachineOperand> &Cond);
+ void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool IgnoreBr = false);
+ void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI);
+
+ bool MeetIfcvtSizeLimit(unsigned Size) const {
+ return Size > 0 && Size <= TLI->getIfCvtBlockSizeLimit();
+ }
+
+ // blockAlwaysFallThrough - Block ends without a terminator.
+ bool blockAlwaysFallThrough(BBInfo &BBI) const {
+ return BBI.IsBrAnalyzable && BBI.TrueBB == NULL;
+ }
+
+ // IfcvtTokenCmp - Used to sort if-conversion candidates.
+ static bool IfcvtTokenCmp(IfcvtToken *C1, IfcvtToken *C2) {
+ int Incr1 = (C1->Kind == ICDiamond)
+ ? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups;
+ int Incr2 = (C2->Kind == ICDiamond)
+ ? -(int)(C2->NumDups + C2->NumDups2) : (int)C2->NumDups;
+ if (Incr1 > Incr2)
+ return true;
+ else if (Incr1 == Incr2) {
+ // Favors subsumption.
+ if (C1->NeedSubsumption == false && C2->NeedSubsumption == true)
+ return true;
+ else if (C1->NeedSubsumption == C2->NeedSubsumption) {
+ // Favors diamond over triangle, etc.
+ if ((unsigned)C1->Kind < (unsigned)C2->Kind)
+ return true;
+ else if (C1->Kind == C2->Kind)
+ return C1->BBI.BB->getNumber() < C2->BBI.BB->getNumber();
+ }
+ }
+ return false;
+ }
+ };
+
+ char IfConverter::ID = 0;
+}
+
+static RegisterPass<IfConverter>
+X("if-converter", "If Converter");
+
+FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); }
+
+bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
+ TLI = MF.getTarget().getTargetLowering();
+ TII = MF.getTarget().getInstrInfo();
+ if (!TII) return false;
+
+ static int FnNum = -1;
+ DOUT << "\nIfcvt: function (" << ++FnNum << ") \'"
+ << MF.getFunction()->getName() << "\'";
+
+ if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) {
+ DOUT << " skipped\n";
+ return false;
+ }
+ DOUT << "\n";
+
+ MF.RenumberBlocks();
+ BBAnalysis.resize(MF.getNumBlockIDs());
+
+ // Look for root nodes, i.e. blocks without successors.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ if (I->succ_empty())
+ Roots.push_back(I);
+
+ std::vector<IfcvtToken*> Tokens;
+ MadeChange = false;
+ unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle +
+ NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+ while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) {
+ // Do an initial analysis for each basic block and find all the potential
+ // candidates to perform if-conversion.
+ bool Change = AnalyzeBlocks(MF, Tokens);
+ while (!Tokens.empty()) {
+ IfcvtToken *Token = Tokens.back();
+ Tokens.pop_back();
+ BBInfo &BBI = Token->BBI;
+ IfcvtKind Kind = Token->Kind;
+ unsigned NumDups = Token->NumDups;
+ unsigned NumDups2 = Token->NumDups2;
+
+ delete Token;
+
+ // If the block has been evicted out of the queue or it has already been
+ // marked dead (due to it being predicated), then skip it.
+ if (BBI.IsDone)
+ BBI.IsEnqueued = false;
+ if (!BBI.IsEnqueued)
+ continue;
+
+ BBI.IsEnqueued = false;
+
+ bool RetVal = false;
+ switch (Kind) {
+ default: assert(false && "Unexpected!");
+ break;
+ case ICSimple:
+ case ICSimpleFalse: {
+ bool isFalse = Kind == ICSimpleFalse;
+ if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
+ DOUT << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"")
+ << "): BB#" << BBI.BB->getNumber() << " ("
+ << ((Kind == ICSimpleFalse)
+ ? BBI.FalseBB->getNumber()
+ : BBI.TrueBB->getNumber()) << ") ";
+ RetVal = IfConvertSimple(BBI, Kind);
+ DOUT << (RetVal ? "succeeded!" : "failed!") << "\n";
+ if (RetVal) {
+ if (isFalse) NumSimpleFalse++;
+ else NumSimple++;
+ }
+ break;
+ }
+ case ICTriangle:
+ case ICTriangleRev:
+ case ICTriangleFalse:
+ case ICTriangleFRev: {
+ bool isFalse = Kind == ICTriangleFalse;
+ bool isRev = (Kind == ICTriangleRev || Kind == ICTriangleFRev);
+ if (DisableTriangle && !isFalse && !isRev) break;
+ if (DisableTriangleR && !isFalse && isRev) break;
+ if (DisableTriangleF && isFalse && !isRev) break;
+ if (DisableTriangleFR && isFalse && isRev) break;
+ DOUT << "Ifcvt (Triangle";
+ if (isFalse)
+ DOUT << " false";
+ if (isRev)
+ DOUT << " rev";
+ DOUT << "): BB#" << BBI.BB->getNumber() << " (T:"
+ << BBI.TrueBB->getNumber() << ",F:"
+ << BBI.FalseBB->getNumber() << ") ";
+ RetVal = IfConvertTriangle(BBI, Kind);
+ DOUT << (RetVal ? "succeeded!" : "failed!") << "\n";
+ if (RetVal) {
+ if (isFalse) {
+ if (isRev) NumTriangleFRev++;
+ else NumTriangleFalse++;
+ } else {
+ if (isRev) NumTriangleRev++;
+ else NumTriangle++;
+ }
+ }
+ break;
+ }
+ case ICDiamond: {
+ if (DisableDiamond) break;
+ DOUT << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
+ << BBI.TrueBB->getNumber() << ",F:"
+ << BBI.FalseBB->getNumber() << ") ";
+ RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2);
+ DOUT << (RetVal ? "succeeded!" : "failed!") << "\n";
+ if (RetVal) NumDiamonds++;
+ break;
+ }
+ }
+
+ Change |= RetVal;
+
+ NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev +
+ NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+ if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit)
+ break;
+ }
+
+ if (!Change)
+ break;
+ MadeChange |= Change;
+ }
+
+ // Delete tokens in case of early exit.
+ while (!Tokens.empty()) {
+ IfcvtToken *Token = Tokens.back();
+ Tokens.pop_back();
+ delete Token;
+ }
+
+ Tokens.clear();
+ Roots.clear();
+ BBAnalysis.clear();
+
+ return MadeChange;
+}
+
+/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
+/// its 'true' successor.
+static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *TrueBB) {
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ E = BB->succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ if (SuccBB != TrueBB)
+ return SuccBB;
+ }
+ return NULL;
+}
+
+/// ReverseBranchCondition - Reverse the condition of the end of the block
+/// branch. Swap block's 'true' and 'false' successors.
+bool IfConverter::ReverseBranchCondition(BBInfo &BBI) {
+ if (!TII->ReverseBranchCondition(BBI.BrCond)) {
+ TII->RemoveBranch(*BBI.BB);
+ TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond);
+ std::swap(BBI.TrueBB, BBI.FalseBB);
+ return true;
+ }
+ return false;
+}
+
+/// getNextBlock - Returns the next block in the function blocks ordering. If
+/// it is the end, returns NULL.
+static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
+ MachineFunction::iterator I = BB;
+ MachineFunction::iterator E = BB->getParent()->end();
+ if (++I == E)
+ return NULL;
+ return I;
+}
+
+/// ValidSimple - Returns true if the 'true' block (along with its
+/// predecessor) forms a valid simple shape for ifcvt. It also returns the
+/// number of instructions that the ifcvt would need to duplicate if performed
+/// in Dups.
+bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const {
+ Dups = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+ return false;
+
+ if (TrueBBI.IsBrAnalyzable)
+ return false;
+
+ if (TrueBBI.BB->pred_size() > 1) {
+ if (TrueBBI.CannotBeCopied ||
+ TrueBBI.NonPredSize > TLI->getIfCvtDupBlockSizeLimit())
+ return false;
+ Dups = TrueBBI.NonPredSize;
+ }
+
+ return true;
+}
+
+/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid triangle shape for ifcvt.
+/// If 'FalseBranch' is true, it checks if 'true' block's false branch
+/// branches to the false branch rather than the other way around. It also
+/// returns the number of instructions that the ifcvt would need to duplicate
+/// if performed in 'Dups'.
+bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ bool FalseBranch, unsigned &Dups) const {
+ Dups = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+ return false;
+
+ if (TrueBBI.BB->pred_size() > 1) {
+ if (TrueBBI.CannotBeCopied)
+ return false;
+
+ unsigned Size = TrueBBI.NonPredSize;
+ if (TrueBBI.IsBrAnalyzable) {
+ if (TrueBBI.TrueBB && TrueBBI.BrCond.empty())
+ // Ends with an unconditional branch. It will be removed.
+ --Size;
+ else {
+ MachineBasicBlock *FExit = FalseBranch
+ ? TrueBBI.TrueBB : TrueBBI.FalseBB;
+ if (FExit)
+ // Require a conditional branch
+ ++Size;
+ }
+ }
+ if (Size > TLI->getIfCvtDupBlockSizeLimit())
+ return false;
+ Dups = Size;
+ }
+
+ MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB;
+ if (!TExit && blockAlwaysFallThrough(TrueBBI)) {
+ MachineFunction::iterator I = TrueBBI.BB;
+ if (++I == TrueBBI.BB->getParent()->end())
+ return false;
+ TExit = I;
+ }
+ return TExit && TExit == FalseBBI.BB;
+}
+
+static
+MachineBasicBlock::iterator firstNonBranchInst(MachineBasicBlock *BB,
+ const TargetInstrInfo *TII) {
+ MachineBasicBlock::iterator I = BB->end();
+ while (I != BB->begin()) {
+ --I;
+ if (!I->getDesc().isBranch())
+ break;
+ }
+ return I;
+}
+
+/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid diamond shape for ifcvt.
+bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2) const {
+ Dups1 = Dups2 = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
+ FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
+ return false;
+
+ MachineBasicBlock *TT = TrueBBI.TrueBB;
+ MachineBasicBlock *FT = FalseBBI.TrueBB;
+
+ if (!TT && blockAlwaysFallThrough(TrueBBI))
+ TT = getNextBlock(TrueBBI.BB);
+ if (!FT && blockAlwaysFallThrough(FalseBBI))
+ FT = getNextBlock(FalseBBI.BB);
+ if (TT != FT)
+ return false;
+ if (TT == NULL && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable))
+ return false;
+ if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
+ return false;
+
+ // FIXME: Allow true block to have an early exit?
+ if (TrueBBI.FalseBB || FalseBBI.FalseBB ||
+ (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
+ return false;
+
+ MachineBasicBlock::iterator TI = TrueBBI.BB->begin();
+ MachineBasicBlock::iterator FI = FalseBBI.BB->begin();
+ while (TI != TrueBBI.BB->end() && FI != FalseBBI.BB->end()) {
+ if (!TI->isIdenticalTo(FI))
+ break;
+ ++Dups1;
+ ++TI;
+ ++FI;
+ }
+
+ TI = firstNonBranchInst(TrueBBI.BB, TII);
+ FI = firstNonBranchInst(FalseBBI.BB, TII);
+ while (TI != TrueBBI.BB->begin() && FI != FalseBBI.BB->begin()) {
+ if (!TI->isIdenticalTo(FI))
+ break;
+ ++Dups2;
+ --TI;
+ --FI;
+ }
+
+ return true;
+}
+
+/// ScanInstructions - Scan all the instructions in the block to determine if
+/// the block is predicable. In most cases, that means all the instructions
+/// in the block are isPredicable(). Also checks if the block contains any
+/// instruction which can clobber a predicate (e.g. condition code register).
+/// If so, the block is not predicable unless it's the last instruction.
+void IfConverter::ScanInstructions(BBInfo &BBI) {
+ if (BBI.IsDone)
+ return;
+
+ bool AlreadyPredicated = BBI.Predicate.size() > 0;
+ // First analyze the end of BB branches.
+ BBI.TrueBB = BBI.FalseBB = NULL;
+ BBI.BrCond.clear();
+ BBI.IsBrAnalyzable =
+ !TII->AnalyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
+ BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == NULL;
+
+ if (BBI.BrCond.size()) {
+ // No false branch. This BB must end with a conditional branch and a
+ // fallthrough.
+ if (!BBI.FalseBB)
+ BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);
+ assert(BBI.FalseBB && "Expected to find the fallthrough block!");
+ }
+
+ // Then scan all the instructions.
+ BBI.NonPredSize = 0;
+ BBI.ClobbersPred = false;
+ for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end();
+ I != E; ++I) {
+ const TargetInstrDesc &TID = I->getDesc();
+ if (TID.isNotDuplicable())
+ BBI.CannotBeCopied = true;
+
+ bool isPredicated = TII->isPredicated(I);
+ bool isCondBr = BBI.IsBrAnalyzable && TID.isConditionalBranch();
+
+ if (!isCondBr) {
+ if (!isPredicated)
+ BBI.NonPredSize++;
+ else if (!AlreadyPredicated) {
+ // FIXME: This instruction is already predicated before the
+ // if-conversion pass. It's probably something like a conditional move.
+ // Mark this block unpredicable for now.
+ BBI.IsUnpredicable = true;
+ return;
+ }
+ }
+
+ if (BBI.ClobbersPred && !isPredicated) {
+ // Predicate modification instruction should end the block (except for
+ // already predicated instructions and end of block branches).
+ if (isCondBr) {
+ // A conditional branch is not predicable, but it may be eliminated.
+ continue;
+ }
+
+ // Predicate may have been modified, the subsequent (currently)
+ // unpredicated instructions cannot be correctly predicated.
+ BBI.IsUnpredicable = true;
+ return;
+ }
+
+ // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are
+ // still potentially predicable.
+ std::vector<MachineOperand> PredDefs;
+ if (TII->DefinesPredicate(I, PredDefs))
+ BBI.ClobbersPred = true;
+
+ if (!TID.isPredicable()) {
+ BBI.IsUnpredicable = true;
+ return;
+ }
+ }
+}
+
+/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be
+/// predicated by the specified predicate.
+bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
+ SmallVectorImpl<MachineOperand> &Pred,
+ bool isTriangle, bool RevBranch) {
+ // If the block is dead or unpredicable, then it cannot be predicated.
+ if (BBI.IsDone || BBI.IsUnpredicable)
+ return false;
+
+ // If it is already predicated, check if its predicate subsumes the new
+ // predicate.
+ if (BBI.Predicate.size() && !TII->SubsumesPredicate(BBI.Predicate, Pred))
+ return false;
+
+ if (BBI.BrCond.size()) {
+ if (!isTriangle)
+ return false;
+
+ // Test predicate subsumption.
+ SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end());
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (RevBranch) {
+ if (TII->ReverseBranchCondition(Cond))
+ return false;
+ }
+ if (TII->ReverseBranchCondition(RevPred) ||
+ !TII->SubsumesPredicate(Cond, RevPred))
+ return false;
+ }
+
+ return true;
+}
+
+/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from
+/// the specified block. Record its successors and whether it looks like an
+/// if-conversion candidate.
+IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
+ std::vector<IfcvtToken*> &Tokens) {
+ BBInfo &BBI = BBAnalysis[BB->getNumber()];
+
+ if (BBI.IsAnalyzed || BBI.IsBeingAnalyzed)
+ return BBI;
+
+ BBI.BB = BB;
+ BBI.IsBeingAnalyzed = true;
+
+ ScanInstructions(BBI);
+
+ // Unanalyzable or ends with fallthrough or unconditional branch.
+ if (!BBI.IsBrAnalyzable || BBI.BrCond.empty()) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ // Do not ifcvt if either path is a back edge to the entry block.
+ if (BBI.TrueBB == BB || BBI.FalseBB == BB) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ BBInfo &TrueBBI = AnalyzeBlock(BBI.TrueBB, Tokens);
+ BBInfo &FalseBBI = AnalyzeBlock(BBI.FalseBB, Tokens);
+
+ if (TrueBBI.IsDone && FalseBBI.IsDone) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+ bool CanRevCond = !TII->ReverseBranchCondition(RevCond);
+
+ unsigned Dups = 0;
+ unsigned Dups2 = 0;
+ bool TNeedSub = TrueBBI.Predicate.size() > 0;
+ bool FNeedSub = FalseBBI.Predicate.size() > 0;
+ bool Enqueued = false;
+ if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
+ MeetIfcvtSizeLimit(TrueBBI.NonPredSize - (Dups + Dups2)) &&
+ MeetIfcvtSizeLimit(FalseBBI.NonPredSize - (Dups + Dups2)) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
+ FeasibilityAnalysis(FalseBBI, RevCond)) {
+ // Diamond:
+ // EBB
+ // / \_
+ // | |
+ // TBB FBB
+ // \ /
+ // TailBB
+ // Note TailBB can be empty.
+ Tokens.push_back(new IfcvtToken(BBI, ICDiamond, TNeedSub|FNeedSub, Dups,
+ Dups2));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(TrueBBI, FalseBBI, false, Dups) &&
+ MeetIfcvtSizeLimit(TrueBBI.NonPredSize) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
+ // Triangle:
+ // EBB
+ // | \_
+ // | |
+ // | TBB
+ // | /
+ // FBB
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(TrueBBI, FalseBBI, true, Dups) &&
+ MeetIfcvtSizeLimit(TrueBBI.NonPredSize) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidSimple(TrueBBI, Dups) &&
+ MeetIfcvtSizeLimit(TrueBBI.NonPredSize) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
+ // Simple (split, no rejoin):
+ // EBB
+ // | \_
+ // | |
+ // | TBB---> exit
+ // |
+ // FBB
+ Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (CanRevCond) {
+ // Try the other path...
+ if (ValidTriangle(FalseBBI, TrueBBI, false, Dups) &&
+ MeetIfcvtSizeLimit(FalseBBI.NonPredSize) &&
+ FeasibilityAnalysis(FalseBBI, RevCond, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(FalseBBI, TrueBBI, true, Dups) &&
+ MeetIfcvtSizeLimit(FalseBBI.NonPredSize) &&
+ FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidSimple(FalseBBI, Dups) &&
+ MeetIfcvtSizeLimit(FalseBBI.NonPredSize) &&
+ FeasibilityAnalysis(FalseBBI, RevCond)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
+ Enqueued = true;
+ }
+ }
+
+ BBI.IsEnqueued = Enqueued;
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+}
+
+/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion
+/// candidates. It returns true if any CFG restructuring is done to expose more
+/// if-conversion opportunities.
+bool IfConverter::AnalyzeBlocks(MachineFunction &MF,
+ std::vector<IfcvtToken*> &Tokens) {
+ bool Change = false;
+ std::set<MachineBasicBlock*> Visited;
+ for (unsigned i = 0, e = Roots.size(); i != e; ++i) {
+ for (idf_ext_iterator<MachineBasicBlock*> I=idf_ext_begin(Roots[i],Visited),
+ E = idf_ext_end(Roots[i], Visited); I != E; ++I) {
+ MachineBasicBlock *BB = *I;
+ AnalyzeBlock(BB, Tokens);
+ }
+ }
+
+ // Sort to favor more complex ifcvt scheme.
+ std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
+
+ return Change;
+}
+
+/// canFallThroughTo - Returns true either if ToBB is the next block after BB or
+/// that all the intervening blocks are empty (given BB can fall through to its
+/// next block).
+static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
+ MachineFunction::iterator I = BB;
+ MachineFunction::iterator TI = ToBB;
+ MachineFunction::iterator E = BB->getParent()->end();
+ while (++I != TI)
+ if (I == E || !I->empty())
+ return false;
+ return true;
+}
+
+/// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed
+/// to determine if it can be if-converted. If predecessor is already enqueued,
+/// dequeue it!
+void IfConverter::InvalidatePreds(MachineBasicBlock *BB) {
+ for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ E = BB->pred_end(); PI != E; ++PI) {
+ BBInfo &PBBI = BBAnalysis[(*PI)->getNumber()];
+ if (PBBI.IsDone || PBBI.BB == BB)
+ continue;
+ PBBI.IsAnalyzed = false;
+ PBBI.IsEnqueued = false;
+ }
+}
+
+/// InsertUncondBranch - Inserts an unconditional branch from BB to ToBB.
+///
+static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB,
+ const TargetInstrInfo *TII) {
+ SmallVector<MachineOperand, 0> NoCond;
+ TII->InsertBranch(*BB, ToBB, NULL, NoCond);
+}
+
+/// RemoveExtraEdges - Remove true / false edges if either / both are no longer
+/// successors.
+void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
+ MachineBasicBlock *TBB = NULL, *FBB = NULL;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond))
+ BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+}
+
+/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG.
+///
+bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ BBInfo *CvtBBI = &TrueBBI;
+ BBInfo *NextBBI = &FalseBBI;
+
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (Kind == ICSimpleFalse)
+ std::swap(CvtBBI, NextBBI);
+
+ if (CvtBBI->IsDone ||
+ (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+ // Something has changed. It's no longer safe to predicate this block.
+ BBI.IsAnalyzed = false;
+ CvtBBI->IsAnalyzed = false;
+ return false;
+ }
+
+ if (Kind == ICSimpleFalse)
+ if (TII->ReverseBranchCondition(Cond))
+ assert(false && "Unable to reverse branch condition!");
+
+ if (CvtBBI->BB->pred_size() > 1) {
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ // Copy instructions in the true block, predicate them, and add them to
+ // the entry block.
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond);
+ } else {
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
+
+ // Merge converted block into entry block.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ MergeBlocks(BBI, *CvtBBI);
+ }
+
+ bool IterIfcvt = true;
+ if (!canFallThroughTo(BBI.BB, NextBBI->BB)) {
+ InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+ BBI.HasFallThrough = false;
+ // Now ifcvt'd block will look like this:
+ // BB:
+ // ...
+ // t, f = cmp
+ // if t op
+ // b BBf
+ //
+ // We cannot further ifcvt this block because the unconditional branch
+ // will have to be predicated on the new condition, that will not be
+ // available if cmp executes.
+ IterIfcvt = false;
+ }
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info. BB can be iteratively if-converted.
+ if (!IterIfcvt)
+ BBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+ CvtBBI->IsDone = true;
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// IfConvertTriangle - If convert a triangle sub-CFG.
+///
+bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ BBInfo *CvtBBI = &TrueBBI;
+ BBInfo *NextBBI = &FalseBBI;
+
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+ std::swap(CvtBBI, NextBBI);
+
+ if (CvtBBI->IsDone ||
+ (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+ // Something has changed. It's no longer safe to predicate this block.
+ BBI.IsAnalyzed = false;
+ CvtBBI->IsAnalyzed = false;
+ return false;
+ }
+
+ if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+ if (TII->ReverseBranchCondition(Cond))
+ assert(false && "Unable to reverse branch condition!");
+
+ if (Kind == ICTriangleRev || Kind == ICTriangleFRev) {
+ if (ReverseBranchCondition(*CvtBBI)) {
+ // BB has been changed, modify its predecessors (except for this
+ // one) so they don't get ifcvt'ed based on bad intel.
+ for (MachineBasicBlock::pred_iterator PI = CvtBBI->BB->pred_begin(),
+ E = CvtBBI->BB->pred_end(); PI != E; ++PI) {
+ MachineBasicBlock *PBB = *PI;
+ if (PBB == BBI.BB)
+ continue;
+ BBInfo &PBBI = BBAnalysis[PBB->getNumber()];
+ if (PBBI.IsEnqueued) {
+ PBBI.IsAnalyzed = false;
+ PBBI.IsEnqueued = false;
+ }
+ }
+ }
+ }
+
+ bool HasEarlyExit = CvtBBI->FalseBB != NULL;
+ bool DupBB = CvtBBI->BB->pred_size() > 1;
+ if (DupBB) {
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ // Copy instructions in the true block, predicate them, and add them to
+ // the entry block.
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true);
+ } else {
+ // Predicate the 'true' block after removing its branch.
+ CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
+
+ // Now merge the entry of the triangle with the true block.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ MergeBlocks(BBI, *CvtBBI);
+ }
+
+ // If 'true' block has a 'false' successor, add an exit branch to it.
+ if (HasEarlyExit) {
+ SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(),
+ CvtBBI->BrCond.end());
+ if (TII->ReverseBranchCondition(RevCond))
+ assert(false && "Unable to reverse branch condition!");
+ TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond);
+ BBI.BB->addSuccessor(CvtBBI->FalseBB);
+ }
+
+ // Merge in the 'false' block if the 'false' block has no other
+ // predecessors. Otherwise, add an unconditional branch to 'false'.
+ bool FalseBBDead = false;
+ bool IterIfcvt = true;
+ bool isFallThrough = canFallThroughTo(BBI.BB, NextBBI->BB);
+ if (!isFallThrough) {
+ // Only merge them if the true block does not fallthrough to the false
+ // block. By not merging them, we make it possible to iteratively
+ // ifcvt the blocks.
+ if (!HasEarlyExit &&
+ NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough) {
+ MergeBlocks(BBI, *NextBBI);
+ FalseBBDead = true;
+ } else {
+ InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+ BBI.HasFallThrough = false;
+ }
+ // Mixed predicated and unpredicated code. This cannot be iteratively
+ // predicated.
+ IterIfcvt = false;
+ }
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info. BB can be iteratively if-converted.
+ if (!IterIfcvt)
+ BBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+ CvtBBI->IsDone = true;
+ if (FalseBBDead)
+ NextBBI->IsDone = true;
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// IfConvertDiamond - If convert a diamond sub-CFG.
+///
+bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ MachineBasicBlock *TailBB = TrueBBI.TrueBB;
+ // True block must fall through or end with an unanalyzable terminator.
+ if (!TailBB) {
+ if (blockAlwaysFallThrough(TrueBBI))
+ TailBB = FalseBBI.TrueBB;
+ assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
+ }
+
+ if (TrueBBI.IsDone || FalseBBI.IsDone ||
+ TrueBBI.BB->pred_size() > 1 ||
+ FalseBBI.BB->pred_size() > 1) {
+ // Something has changed. It's no longer safe to predicate these blocks.
+ BBI.IsAnalyzed = false;
+ TrueBBI.IsAnalyzed = false;
+ FalseBBI.IsAnalyzed = false;
+ return false;
+ }
+
+ // Merge the 'true' and 'false' blocks by copying the instructions
+ // from the 'false' block to the 'true' block. That is, unless the true
+ // block would clobber the predicate, in that case, do the opposite.
+ BBInfo *BBI1 = &TrueBBI;
+ BBInfo *BBI2 = &FalseBBI;
+ SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (TII->ReverseBranchCondition(RevCond))
+ assert(false && "Unable to reverse branch condition!");
+ SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond;
+ SmallVector<MachineOperand, 4> *Cond2 = &RevCond;
+
+ // Figure out the more profitable ordering.
+ bool DoSwap = false;
+ if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred)
+ DoSwap = true;
+ else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) {
+ if (TrueBBI.NonPredSize > FalseBBI.NonPredSize)
+ DoSwap = true;
+ }
+ if (DoSwap) {
+ std::swap(BBI1, BBI2);
+ std::swap(Cond1, Cond2);
+ }
+
+ // Remove the conditional branch from entry to the blocks.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+
+ // Remove the duplicated instructions at the beginnings of both paths.
+ MachineBasicBlock::iterator DI1 = BBI1->BB->begin();
+ MachineBasicBlock::iterator DI2 = BBI2->BB->begin();
+ BBI1->NonPredSize -= NumDups1;
+ BBI2->NonPredSize -= NumDups1;
+ while (NumDups1 != 0) {
+ ++DI1;
+ ++DI2;
+ --NumDups1;
+ }
+ BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
+ BBI2->BB->erase(BBI2->BB->begin(), DI2);
+
+ // Predicate the 'true' block after removing its branch.
+ BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
+ DI1 = BBI1->BB->end();
+ for (unsigned i = 0; i != NumDups2; ++i)
+ --DI1;
+ BBI1->BB->erase(DI1, BBI1->BB->end());
+ PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1);
+
+ // Predicate the 'false' block.
+ BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
+ DI2 = BBI2->BB->end();
+ while (NumDups2 != 0) {
+ --DI2;
+ --NumDups2;
+ }
+ PredicateBlock(*BBI2, DI2, *Cond2);
+
+ // Merge the true block into the entry of the diamond.
+ MergeBlocks(BBI, *BBI1);
+ MergeBlocks(BBI, *BBI2);
+
+ // If the if-converted block falls through or unconditionally branches into
+ // the tail block, and the tail block does not have other predecessors, then
+ // fold the tail block in as well. Otherwise, unless it falls through to the
+ // tail, add a unconditional branch to it.
+ if (TailBB) {
+ BBInfo TailBBI = BBAnalysis[TailBB->getNumber()];
+ if (TailBB->pred_size() == 1 && !TailBBI.HasFallThrough) {
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ MergeBlocks(BBI, TailBBI);
+ TailBBI.IsDone = true;
+ } else {
+ InsertUncondBranch(BBI.BB, TailBB, TII);
+ BBI.HasFallThrough = false;
+ }
+ }
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info.
+ BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// PredicateBlock - Predicate instructions from the start of the block to the
+/// specified end with the specified condition.
+void IfConverter::PredicateBlock(BBInfo &BBI,
+ MachineBasicBlock::iterator E,
+ SmallVectorImpl<MachineOperand> &Cond) {
+ for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) {
+ if (TII->isPredicated(I))
+ continue;
+ if (!TII->PredicateInstruction(I, Cond)) {
+ cerr << "Unable to predicate " << *I << "!\n";
+ abort();
+ }
+ }
+
+ std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate));
+
+ BBI.IsAnalyzed = false;
+ BBI.NonPredSize = 0;
+
+ NumIfConvBBs++;
+}
+
+/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to
+/// the destination block. Skip end of block branches if IgnoreBr is true.
+void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool IgnoreBr) {
+ MachineFunction &MF = *ToBBI.BB->getParent();
+
+ for (MachineBasicBlock::iterator I = FromBBI.BB->begin(),
+ E = FromBBI.BB->end(); I != E; ++I) {
+ const TargetInstrDesc &TID = I->getDesc();
+ bool isPredicated = TII->isPredicated(I);
+ // Do not copy the end of the block branches.
+ if (IgnoreBr && !isPredicated && TID.isBranch())
+ break;
+
+ MachineInstr *MI = MF.CloneMachineInstr(I);
+ ToBBI.BB->insert(ToBBI.BB->end(), MI);
+ ToBBI.NonPredSize++;
+
+ if (!isPredicated)
+ if (!TII->PredicateInstruction(MI, Cond)) {
+ cerr << "Unable to predicate " << *MI << "!\n";
+ abort();
+ }
+ }
+
+ std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+ FromBBI.BB->succ_end());
+ MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = Succs[i];
+ // Fallthrough edge can't be transferred.
+ if (Succ == FallThrough)
+ continue;
+ ToBBI.BB->addSuccessor(Succ);
+ }
+
+ std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
+ std::back_inserter(ToBBI.Predicate));
+ std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate));
+
+ ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+ ToBBI.IsAnalyzed = false;
+
+ NumDupBBs++;
+}
+
+/// MergeBlocks - Move all instructions from FromBB to the end of ToBB.
+///
+void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI) {
+ ToBBI.BB->splice(ToBBI.BB->end(),
+ FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end());
+
+ // Redirect all branches to FromBB to ToBB.
+ std::vector<MachineBasicBlock *> Preds(FromBBI.BB->pred_begin(),
+ FromBBI.BB->pred_end());
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ MachineBasicBlock *Pred = Preds[i];
+ if (Pred == ToBBI.BB)
+ continue;
+ Pred->ReplaceUsesOfBlockWith(FromBBI.BB, ToBBI.BB);
+ }
+
+ std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+ FromBBI.BB->succ_end());
+ MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = Succs[i];
+ // Fallthrough edge can't be transferred.
+ if (Succ == FallThrough)
+ continue;
+ FromBBI.BB->removeSuccessor(Succ);
+ ToBBI.BB->addSuccessor(Succ);
+ }
+
+ // Now FromBBI always falls through to the next block!
+ if (NBB && !FromBBI.BB->isSuccessor(NBB))
+ FromBBI.BB->addSuccessor(NBB);
+
+ std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
+ std::back_inserter(ToBBI.Predicate));
+ FromBBI.Predicate.clear();
+
+ ToBBI.NonPredSize += FromBBI.NonPredSize;
+ FromBBI.NonPredSize = 0;
+
+ ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+ ToBBI.HasFallThrough = FromBBI.HasFallThrough;
+ ToBBI.IsAnalyzed = false;
+ FromBBI.IsAnalyzed = false;
+}
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
new file mode 100644
index 0000000..e6912b8
--- /dev/null
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -0,0 +1,892 @@
+//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IntrinsicLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+template <class ArgIt>
+static void EnsureFunctionExists(Module &M, const char *Name,
+ ArgIt ArgBegin, ArgIt ArgEnd,
+ const Type *RetTy) {
+ // Insert a correctly-typed definition now.
+ std::vector<const Type *> ParamTys;
+ for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+ ParamTys.push_back(I->getType());
+ M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
+}
+
+static void EnsureFPIntrinsicsExist(Module &M, Function *Fn,
+ const char *FName,
+ const char *DName, const char *LDName) {
+ // Insert definitions for all the floating point types.
+ switch((int)Fn->arg_begin()->getType()->getTypeID()) {
+ case Type::FloatTyID:
+ EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(),
+ Type::FloatTy);
+ break;
+ case Type::DoubleTyID:
+ EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(),
+ Type::DoubleTy);
+ break;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ EnsureFunctionExists(M, LDName, Fn->arg_begin(), Fn->arg_end(),
+ Fn->arg_begin()->getType());
+ break;
+ }
+}
+
+/// ReplaceCallWith - This function is used when we want to lower an intrinsic
+/// call to a call of an external function. This handles hard cases such as
+/// when there was already a prototype for the external function, and if that
+/// prototype doesn't match the arguments we expect to pass in.
+template <class ArgIt>
+static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
+ ArgIt ArgBegin, ArgIt ArgEnd,
+ const Type *RetTy, Constant *&FCache) {
+ if (!FCache) {
+ // If we haven't already looked up this function, check to see if the
+ // program already contains a function with this name.
+ Module *M = CI->getParent()->getParent()->getParent();
+ // Get or insert the definition now.
+ std::vector<const Type *> ParamTys;
+ for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+ ParamTys.push_back((*I)->getType());
+ FCache = M->getOrInsertFunction(NewFn,
+ FunctionType::get(RetTy, ParamTys, false));
+ }
+
+ IRBuilder<> Builder(CI->getParent(), CI);
+ SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
+ CallInst *NewCI = Builder.CreateCall(FCache, Args.begin(), Args.end());
+ NewCI->setName(CI->getName());
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(NewCI);
+ return NewCI;
+}
+
+void IntrinsicLowering::AddPrototypes(Module &M) {
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (I->isDeclaration() && !I->use_empty())
+ switch (I->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::setjmp:
+ EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(),
+ Type::Int32Ty);
+ break;
+ case Intrinsic::longjmp:
+ EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(),
+ Type::VoidTy);
+ break;
+ case Intrinsic::siglongjmp:
+ EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(),
+ Type::VoidTy);
+ break;
+ case Intrinsic::memcpy:
+ M.getOrInsertFunction("memcpy", PointerType::getUnqual(Type::Int8Ty),
+ PointerType::getUnqual(Type::Int8Ty),
+ PointerType::getUnqual(Type::Int8Ty),
+ TD.getIntPtrType(), (Type *)0);
+ break;
+ case Intrinsic::memmove:
+ M.getOrInsertFunction("memmove", PointerType::getUnqual(Type::Int8Ty),
+ PointerType::getUnqual(Type::Int8Ty),
+ PointerType::getUnqual(Type::Int8Ty),
+ TD.getIntPtrType(), (Type *)0);
+ break;
+ case Intrinsic::memset:
+ M.getOrInsertFunction("memset", PointerType::getUnqual(Type::Int8Ty),
+ PointerType::getUnqual(Type::Int8Ty),
+ Type::Int32Ty,
+ TD.getIntPtrType(), (Type *)0);
+ break;
+ case Intrinsic::sqrt:
+ EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl");
+ break;
+ case Intrinsic::sin:
+ EnsureFPIntrinsicsExist(M, I, "sinf", "sin", "sinl");
+ break;
+ case Intrinsic::cos:
+ EnsureFPIntrinsicsExist(M, I, "cosf", "cos", "cosl");
+ break;
+ case Intrinsic::pow:
+ EnsureFPIntrinsicsExist(M, I, "powf", "pow", "powl");
+ break;
+ case Intrinsic::log:
+ EnsureFPIntrinsicsExist(M, I, "logf", "log", "logl");
+ break;
+ case Intrinsic::log2:
+ EnsureFPIntrinsicsExist(M, I, "log2f", "log2", "log2l");
+ break;
+ case Intrinsic::log10:
+ EnsureFPIntrinsicsExist(M, I, "log10f", "log10", "log10l");
+ break;
+ case Intrinsic::exp:
+ EnsureFPIntrinsicsExist(M, I, "expf", "exp", "expl");
+ break;
+ case Intrinsic::exp2:
+ EnsureFPIntrinsicsExist(M, I, "exp2f", "exp2", "exp2l");
+ break;
+ }
+}
+
+/// LowerBSWAP - Emit the code to lower bswap of V before the specified
+/// instruction IP.
+static Value *LowerBSWAP(Value *V, Instruction *IP) {
+ assert(V->getType()->isInteger() && "Can't bswap a non-integer type!");
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+
+ IRBuilder<> Builder(IP->getParent(), IP);
+
+ switch(BitSize) {
+ default: assert(0 && "Unhandled type size of value to byteswap!");
+ case 16: {
+ Value *Tmp1 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.2");
+ Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.1");
+ V = Builder.CreateOr(Tmp1, Tmp2, "bswap.i16");
+ break;
+ }
+ case 32: {
+ Value *Tmp4 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
+ "bswap.4");
+ Value *Tmp3 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.3");
+ Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.2");
+ Value *Tmp1 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 24),
+ "bswap.1");
+ Tmp3 = Builder.CreateAnd(Tmp3, ConstantInt::get(Type::Int32Ty, 0xFF0000),
+ "bswap.and3");
+ Tmp2 = Builder.CreateAnd(Tmp2, ConstantInt::get(Type::Int32Ty, 0xFF00),
+ "bswap.and2");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1");
+ Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2");
+ V = Builder.CreateOr(Tmp4, Tmp2, "bswap.i32");
+ break;
+ }
+ case 64: {
+ Value *Tmp8 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 56),
+ "bswap.8");
+ Value *Tmp7 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 40),
+ "bswap.7");
+ Value *Tmp6 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
+ "bswap.6");
+ Value *Tmp5 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.5");
+ Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.4");
+ Value* Tmp3 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 24),
+ "bswap.3");
+ Value* Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 40),
+ "bswap.2");
+ Value* Tmp1 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 56),
+ "bswap.1");
+ Tmp7 = Builder.CreateAnd(Tmp7,
+ ConstantInt::get(Type::Int64Ty,
+ 0xFF000000000000ULL),
+ "bswap.and7");
+ Tmp6 = Builder.CreateAnd(Tmp6,
+ ConstantInt::get(Type::Int64Ty,
+ 0xFF0000000000ULL),
+ "bswap.and6");
+ Tmp5 = Builder.CreateAnd(Tmp5,
+ ConstantInt::get(Type::Int64Ty, 0xFF00000000ULL),
+ "bswap.and5");
+ Tmp4 = Builder.CreateAnd(Tmp4,
+ ConstantInt::get(Type::Int64Ty, 0xFF000000ULL),
+ "bswap.and4");
+ Tmp3 = Builder.CreateAnd(Tmp3,
+ ConstantInt::get(Type::Int64Ty, 0xFF0000ULL),
+ "bswap.and3");
+ Tmp2 = Builder.CreateAnd(Tmp2,
+ ConstantInt::get(Type::Int64Ty, 0xFF00ULL),
+ "bswap.and2");
+ Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1");
+ Tmp6 = Builder.CreateOr(Tmp6, Tmp5, "bswap.or2");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or3");
+ Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or4");
+ Tmp8 = Builder.CreateOr(Tmp8, Tmp6, "bswap.or5");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp2, "bswap.or6");
+ V = Builder.CreateOr(Tmp8, Tmp4, "bswap.i64");
+ break;
+ }
+ }
+ return V;
+}
+
+/// LowerCTPOP - Emit the code to lower ctpop of V before the specified
+/// instruction IP.
+static Value *LowerCTPOP(Value *V, Instruction *IP) {
+ assert(V->getType()->isInteger() && "Can't ctpop a non-integer type!");
+
+ static const uint64_t MaskValues[6] = {
+ 0x5555555555555555ULL, 0x3333333333333333ULL,
+ 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+ 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
+ };
+
+ IRBuilder<> Builder(IP->getParent(), IP);
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+ unsigned WordSize = (BitSize + 63) / 64;
+ Value *Count = ConstantInt::get(V->getType(), 0);
+
+ for (unsigned n = 0; n < WordSize; ++n) {
+ Value *PartValue = V;
+ for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize);
+ i <<= 1, ++ct) {
+ Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
+ Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1");
+ Value *VShift = Builder.CreateLShr(PartValue,
+ ConstantInt::get(V->getType(), i),
+ "ctpop.sh");
+ Value *RHS = Builder.CreateAnd(VShift, MaskCst, "cppop.and2");
+ PartValue = Builder.CreateAdd(LHS, RHS, "ctpop.step");
+ }
+ Count = Builder.CreateAdd(PartValue, Count, "ctpop.part");
+ if (BitSize > 64) {
+ V = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 64),
+ "ctpop.part.sh");
+ BitSize -= 64;
+ }
+ }
+
+ return Count;
+}
+
+/// LowerCTLZ - Emit the code to lower ctlz of V before the specified
+/// instruction IP.
+static Value *LowerCTLZ(Value *V, Instruction *IP) {
+
+ IRBuilder<> Builder(IP->getParent(), IP);
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+ for (unsigned i = 1; i < BitSize; i <<= 1) {
+ Value *ShVal = ConstantInt::get(V->getType(), i);
+ ShVal = Builder.CreateLShr(V, ShVal, "ctlz.sh");
+ V = Builder.CreateOr(V, ShVal, "ctlz.step");
+ }
+
+ V = Builder.CreateNot(V);
+ return LowerCTPOP(V, IP);
+}
+
+/// Convert the llvm.part.select.iX.iY intrinsic. This intrinsic takes
+/// three integer arguments. The first argument is the Value from which the
+/// bits will be selected. It may be of any bit width. The second and third
+/// arguments specify a range of bits to select with the second argument
+/// specifying the low bit and the third argument specifying the high bit. Both
+/// must be type i32. The result is the corresponding selected bits from the
+/// Value in the same width as the Value (first argument). If the low bit index
+/// is higher than the high bit index then the inverse selection is done and
+/// the bits are returned in inverse order.
+/// @brief Lowering of llvm.part.select intrinsic.
+static Instruction *LowerPartSelect(CallInst *CI) {
+ IRBuilder<> Builder;
+
+ // Make sure we're dealing with a part select intrinsic here
+ Function *F = CI->getCalledFunction();
+ const FunctionType *FT = F->getFunctionType();
+ if (!F->isDeclaration() || !FT->getReturnType()->isInteger() ||
+ FT->getNumParams() != 3 || !FT->getParamType(0)->isInteger() ||
+ !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger())
+ return CI;
+
+ // Get the intrinsic implementation function by converting all the . to _
+ // in the intrinsic's function name and then reconstructing the function
+ // declaration.
+ std::string Name(F->getName());
+ for (unsigned i = 4; i < Name.length(); ++i)
+ if (Name[i] == '.')
+ Name[i] = '_';
+ Module* M = F->getParent();
+ F = cast<Function>(M->getOrInsertFunction(Name, FT));
+ F->setLinkage(GlobalValue::WeakAnyLinkage);
+
+ // If we haven't defined the impl function yet, do so now
+ if (F->isDeclaration()) {
+
+ // Get the arguments to the function
+ Function::arg_iterator args = F->arg_begin();
+ Value* Val = args++; Val->setName("Val");
+ Value* Lo = args++; Lo->setName("Lo");
+ Value* Hi = args++; Hi->setName("High");
+
+ // We want to select a range of bits here such that [Hi, Lo] is shifted
+ // down to the low bits. However, it is quite possible that Hi is smaller
+ // than Lo in which case the bits have to be reversed.
+
+ // Create the blocks we will need for the two cases (forward, reverse)
+ BasicBlock* CurBB = BasicBlock::Create("entry", F);
+ BasicBlock *RevSize = BasicBlock::Create("revsize", CurBB->getParent());
+ BasicBlock *FwdSize = BasicBlock::Create("fwdsize", CurBB->getParent());
+ BasicBlock *Compute = BasicBlock::Create("compute", CurBB->getParent());
+ BasicBlock *Reverse = BasicBlock::Create("reverse", CurBB->getParent());
+ BasicBlock *RsltBlk = BasicBlock::Create("result", CurBB->getParent());
+
+ Builder.SetInsertPoint(CurBB);
+
+ // Cast Hi and Lo to the size of Val so the widths are all the same
+ if (Hi->getType() != Val->getType())
+ Hi = Builder.CreateIntCast(Hi, Val->getType(), /* isSigned */ false,
+ "tmp");
+ if (Lo->getType() != Val->getType())
+ Lo = Builder.CreateIntCast(Lo, Val->getType(), /* isSigned */ false,
+ "tmp");
+
+ // Compute a few things that both cases will need, up front.
+ Constant* Zero = ConstantInt::get(Val->getType(), 0);
+ Constant* One = ConstantInt::get(Val->getType(), 1);
+ Constant* AllOnes = ConstantInt::getAllOnesValue(Val->getType());
+
+ // Compare the Hi and Lo bit positions. This is used to determine
+ // which case we have (forward or reverse)
+ Value *Cmp = Builder.CreateICmpULT(Hi, Lo, "less");
+ Builder.CreateCondBr(Cmp, RevSize, FwdSize);
+
+ // First, compute the number of bits in the forward case.
+ Builder.SetInsertPoint(FwdSize);
+ Value* FBitSize = Builder.CreateSub(Hi, Lo, "fbits");
+ Builder.CreateBr(Compute);
+
+ // Second, compute the number of bits in the reverse case.
+ Builder.SetInsertPoint(RevSize);
+ Value* RBitSize = Builder.CreateSub(Lo, Hi, "rbits");
+ Builder.CreateBr(Compute);
+
+ // Now, compute the bit range. Start by getting the bitsize and the shift
+ // amount (either Hi or Lo) from PHI nodes. Then we compute a mask for
+ // the number of bits we want in the range. We shift the bits down to the
+ // least significant bits, apply the mask to zero out unwanted high bits,
+ // and we have computed the "forward" result. It may still need to be
+ // reversed.
+ Builder.SetInsertPoint(Compute);
+
+ // Get the BitSize from one of the two subtractions
+ PHINode *BitSize = Builder.CreatePHI(Val->getType(), "bits");
+ BitSize->reserveOperandSpace(2);
+ BitSize->addIncoming(FBitSize, FwdSize);
+ BitSize->addIncoming(RBitSize, RevSize);
+
+ // Get the ShiftAmount as the smaller of Hi/Lo
+ PHINode *ShiftAmt = Builder.CreatePHI(Val->getType(), "shiftamt");
+ ShiftAmt->reserveOperandSpace(2);
+ ShiftAmt->addIncoming(Lo, FwdSize);
+ ShiftAmt->addIncoming(Hi, RevSize);
+
+ // Increment the bit size
+ Value *BitSizePlusOne = Builder.CreateAdd(BitSize, One, "bits");
+
+ // Create a Mask to zero out the high order bits.
+ Value* Mask = Builder.CreateShl(AllOnes, BitSizePlusOne, "mask");
+ Mask = Builder.CreateNot(Mask, "mask");
+
+ // Shift the bits down and apply the mask
+ Value* FRes = Builder.CreateLShr(Val, ShiftAmt, "fres");
+ FRes = Builder.CreateAnd(FRes, Mask, "fres");
+ Builder.CreateCondBr(Cmp, Reverse, RsltBlk);
+
+ // In the Reverse block we have the mask already in FRes but we must reverse
+ // it by shifting FRes bits right and putting them in RRes by shifting them
+ // in from left.
+ Builder.SetInsertPoint(Reverse);
+
+ // First set up our loop counters
+ PHINode *Count = Builder.CreatePHI(Val->getType(), "count");
+ Count->reserveOperandSpace(2);
+ Count->addIncoming(BitSizePlusOne, Compute);
+
+ // Next, get the value that we are shifting.
+ PHINode *BitsToShift = Builder.CreatePHI(Val->getType(), "val");
+ BitsToShift->reserveOperandSpace(2);
+ BitsToShift->addIncoming(FRes, Compute);
+
+ // Finally, get the result of the last computation
+ PHINode *RRes = Builder.CreatePHI(Val->getType(), "rres");
+ RRes->reserveOperandSpace(2);
+ RRes->addIncoming(Zero, Compute);
+
+ // Decrement the counter
+ Value *Decr = Builder.CreateSub(Count, One, "decr");
+ Count->addIncoming(Decr, Reverse);
+
+ // Compute the Bit that we want to move
+ Value *Bit = Builder.CreateAnd(BitsToShift, One, "bit");
+
+ // Compute the new value for next iteration.
+ Value *NewVal = Builder.CreateLShr(BitsToShift, One, "rshift");
+ BitsToShift->addIncoming(NewVal, Reverse);
+
+ // Shift the bit into the low bits of the result.
+ Value *NewRes = Builder.CreateShl(RRes, One, "lshift");
+ NewRes = Builder.CreateOr(NewRes, Bit, "addbit");
+ RRes->addIncoming(NewRes, Reverse);
+
+ // Terminate loop if we've moved all the bits.
+ Value *Cond = Builder.CreateICmpEQ(Decr, Zero, "cond");
+ Builder.CreateCondBr(Cond, RsltBlk, Reverse);
+
+ // Finally, in the result block, select one of the two results with a PHI
+ // node and return the result;
+ Builder.SetInsertPoint(RsltBlk);
+ PHINode *BitSelect = Builder.CreatePHI(Val->getType(), "part_select");
+ BitSelect->reserveOperandSpace(2);
+ BitSelect->addIncoming(FRes, Compute);
+ BitSelect->addIncoming(NewRes, Reverse);
+ Builder.CreateRet(BitSelect);
+ }
+
+ // Return a call to the implementation function
+ Builder.SetInsertPoint(CI->getParent(), CI);
+ CallInst *NewCI = Builder.CreateCall3(F, CI->getOperand(1),
+ CI->getOperand(2), CI->getOperand(3));
+ NewCI->setName(CI->getName());
+ return NewCI;
+}
+
+/// Convert the llvm.part.set.iX.iY.iZ intrinsic. This intrinsic takes
+/// four integer arguments (iAny %Value, iAny %Replacement, i32 %Low, i32 %High)
+/// The first two arguments can be any bit width. The result is the same width
+/// as %Value. The operation replaces bits between %Low and %High with the value
+/// in %Replacement. If %Replacement is not the same width, it is truncated or
+/// zero extended as appropriate to fit the bits being replaced. If %Low is
+/// greater than %High then the inverse set of bits are replaced.
+/// @brief Lowering of llvm.bit.part.set intrinsic.
+static Instruction *LowerPartSet(CallInst *CI) {
+ IRBuilder<> Builder;
+
+ // Make sure we're dealing with a part select intrinsic here
+ Function *F = CI->getCalledFunction();
+ const FunctionType *FT = F->getFunctionType();
+ if (!F->isDeclaration() || !FT->getReturnType()->isInteger() ||
+ FT->getNumParams() != 4 || !FT->getParamType(0)->isInteger() ||
+ !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger() ||
+ !FT->getParamType(3)->isInteger())
+ return CI;
+
+ // Get the intrinsic implementation function by converting all the . to _
+ // in the intrinsic's function name and then reconstructing the function
+ // declaration.
+ std::string Name(F->getName());
+ for (unsigned i = 4; i < Name.length(); ++i)
+ if (Name[i] == '.')
+ Name[i] = '_';
+ Module* M = F->getParent();
+ F = cast<Function>(M->getOrInsertFunction(Name, FT));
+ F->setLinkage(GlobalValue::WeakAnyLinkage);
+
+ // If we haven't defined the impl function yet, do so now
+ if (F->isDeclaration()) {
+ // Get the arguments for the function.
+ Function::arg_iterator args = F->arg_begin();
+ Value* Val = args++; Val->setName("Val");
+ Value* Rep = args++; Rep->setName("Rep");
+ Value* Lo = args++; Lo->setName("Lo");
+ Value* Hi = args++; Hi->setName("Hi");
+
+ // Get some types we need
+ const IntegerType* ValTy = cast<IntegerType>(Val->getType());
+ const IntegerType* RepTy = cast<IntegerType>(Rep->getType());
+ uint32_t RepBits = RepTy->getBitWidth();
+
+ // Constant Definitions
+ ConstantInt* RepBitWidth = ConstantInt::get(Type::Int32Ty, RepBits);
+ ConstantInt* RepMask = ConstantInt::getAllOnesValue(RepTy);
+ ConstantInt* ValMask = ConstantInt::getAllOnesValue(ValTy);
+ ConstantInt* One = ConstantInt::get(Type::Int32Ty, 1);
+ ConstantInt* ValOne = ConstantInt::get(ValTy, 1);
+ ConstantInt* Zero = ConstantInt::get(Type::Int32Ty, 0);
+ ConstantInt* ValZero = ConstantInt::get(ValTy, 0);
+
+ // Basic blocks we fill in below.
+ BasicBlock* entry = BasicBlock::Create("entry", F, 0);
+ BasicBlock* large = BasicBlock::Create("large", F, 0);
+ BasicBlock* small = BasicBlock::Create("small", F, 0);
+ BasicBlock* reverse = BasicBlock::Create("reverse", F, 0);
+ BasicBlock* result = BasicBlock::Create("result", F, 0);
+
+ // BASIC BLOCK: entry
+ Builder.SetInsertPoint(entry);
+ // First, get the number of bits that we're placing as an i32
+ Value* is_forward = Builder.CreateICmpULT(Lo, Hi);
+ Value* Hi_pn = Builder.CreateSelect(is_forward, Hi, Lo);
+ Value* Lo_pn = Builder.CreateSelect(is_forward, Lo, Hi);
+ Value* NumBits = Builder.CreateSub(Hi_pn, Lo_pn);
+ NumBits = Builder.CreateAdd(NumBits, One);
+ // Now, convert Lo and Hi to ValTy bit width
+ Lo = Builder.CreateIntCast(Lo_pn, ValTy, /* isSigned */ false);
+ // Determine if the replacement bits are larger than the number of bits we
+ // are replacing and deal with it.
+ Value* is_large = Builder.CreateICmpULT(NumBits, RepBitWidth);
+ Builder.CreateCondBr(is_large, large, small);
+
+ // BASIC BLOCK: large
+ Builder.SetInsertPoint(large);
+ Value* MaskBits = Builder.CreateSub(RepBitWidth, NumBits);
+ MaskBits = Builder.CreateIntCast(MaskBits, RepMask->getType(),
+ /* isSigned */ false);
+ Value* Mask1 = Builder.CreateLShr(RepMask, MaskBits);
+ Value* Rep2 = Builder.CreateAnd(Mask1, Rep);
+ Builder.CreateBr(small);
+
+ // BASIC BLOCK: small
+ Builder.SetInsertPoint(small);
+ PHINode* Rep3 = Builder.CreatePHI(RepTy);
+ Rep3->reserveOperandSpace(2);
+ Rep3->addIncoming(Rep2, large);
+ Rep3->addIncoming(Rep, entry);
+ Value* Rep4 = Builder.CreateIntCast(Rep3, ValTy, /* isSigned */ false);
+ Builder.CreateCondBr(is_forward, result, reverse);
+
+ // BASIC BLOCK: reverse (reverses the bits of the replacement)
+ Builder.SetInsertPoint(reverse);
+ // Set up our loop counter as a PHI so we can decrement on each iteration.
+ // We will loop for the number of bits in the replacement value.
+ PHINode *Count = Builder.CreatePHI(Type::Int32Ty, "count");
+ Count->reserveOperandSpace(2);
+ Count->addIncoming(NumBits, small);
+
+ // Get the value that we are shifting bits out of as a PHI because
+ // we'll change this with each iteration.
+ PHINode *BitsToShift = Builder.CreatePHI(Val->getType(), "val");
+ BitsToShift->reserveOperandSpace(2);
+ BitsToShift->addIncoming(Rep4, small);
+
+ // Get the result of the last computation or zero on first iteration
+ PHINode *RRes = Builder.CreatePHI(Val->getType(), "rres");
+ RRes->reserveOperandSpace(2);
+ RRes->addIncoming(ValZero, small);
+
+ // Decrement the loop counter by one
+ Value *Decr = Builder.CreateSub(Count, One);
+ Count->addIncoming(Decr, reverse);
+
+ // Get the bit that we want to move into the result
+ Value *Bit = Builder.CreateAnd(BitsToShift, ValOne);
+
+ // Compute the new value of the bits to shift for the next iteration.
+ Value *NewVal = Builder.CreateLShr(BitsToShift, ValOne);
+ BitsToShift->addIncoming(NewVal, reverse);
+
+ // Shift the bit we extracted into the low bit of the result.
+ Value *NewRes = Builder.CreateShl(RRes, ValOne);
+ NewRes = Builder.CreateOr(NewRes, Bit);
+ RRes->addIncoming(NewRes, reverse);
+
+ // Terminate loop if we've moved all the bits.
+ Value *Cond = Builder.CreateICmpEQ(Decr, Zero);
+ Builder.CreateCondBr(Cond, result, reverse);
+
+ // BASIC BLOCK: result
+ Builder.SetInsertPoint(result);
+ PHINode *Rplcmnt = Builder.CreatePHI(Val->getType());
+ Rplcmnt->reserveOperandSpace(2);
+ Rplcmnt->addIncoming(NewRes, reverse);
+ Rplcmnt->addIncoming(Rep4, small);
+ Value* t0 = Builder.CreateIntCast(NumBits, ValTy, /* isSigned */ false);
+ Value* t1 = Builder.CreateShl(ValMask, Lo);
+ Value* t2 = Builder.CreateNot(t1);
+ Value* t3 = Builder.CreateShl(t1, t0);
+ Value* t4 = Builder.CreateOr(t2, t3);
+ Value* t5 = Builder.CreateAnd(t4, Val);
+ Value* t6 = Builder.CreateShl(Rplcmnt, Lo);
+ Value* Rslt = Builder.CreateOr(t5, t6, "part_set");
+ Builder.CreateRet(Rslt);
+ }
+
+ // Return a call to the implementation function
+ Builder.SetInsertPoint(CI->getParent(), CI);
+ CallInst *NewCI = Builder.CreateCall4(F, CI->getOperand(1),
+ CI->getOperand(2), CI->getOperand(3),
+ CI->getOperand(4));
+ NewCI->setName(CI->getName());
+ return NewCI;
+}
+
+static void ReplaceFPIntrinsicWithCall(CallInst *CI, Constant *FCache,
+ Constant *DCache, Constant *LDCache,
+ const char *Fname, const char *Dname,
+ const char *LDname) {
+ switch (CI->getOperand(1)->getType()->getTypeID()) {
+ default: assert(0 && "Invalid type in intrinsic"); abort();
+ case Type::FloatTyID:
+ ReplaceCallWith(Fname, CI, CI->op_begin() + 1, CI->op_end(),
+ Type::FloatTy, FCache);
+ break;
+ case Type::DoubleTyID:
+ ReplaceCallWith(Dname, CI, CI->op_begin() + 1, CI->op_end(),
+ Type::DoubleTy, DCache);
+ break;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ ReplaceCallWith(LDname, CI, CI->op_begin() + 1, CI->op_end(),
+ CI->getOperand(1)->getType(), LDCache);
+ break;
+ }
+}
+
+void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
+ IRBuilder<> Builder(CI->getParent(), CI);
+
+ Function *Callee = CI->getCalledFunction();
+ assert(Callee && "Cannot lower an indirect call!");
+
+ switch (Callee->getIntrinsicID()) {
+ case Intrinsic::not_intrinsic:
+ cerr << "Cannot lower a call to a non-intrinsic function '"
+ << Callee->getName() << "'!\n";
+ abort();
+ default:
+ cerr << "Error: Code generator does not support intrinsic function '"
+ << Callee->getName() << "'!\n";
+ abort();
+
+ // The setjmp/longjmp intrinsics should only exist in the code if it was
+ // never optimized (ie, right out of the CFE), or if it has been hacked on
+ // by the lowerinvoke pass. In both cases, the right thing to do is to
+ // convert the call to an explicit setjmp or longjmp call.
+ case Intrinsic::setjmp: {
+ static Constant *SetjmpFCache = 0;
+ Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin() + 1, CI->op_end(),
+ Type::Int32Ty, SetjmpFCache);
+ if (CI->getType() != Type::VoidTy)
+ CI->replaceAllUsesWith(V);
+ break;
+ }
+ case Intrinsic::sigsetjmp:
+ if (CI->getType() != Type::VoidTy)
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+
+ case Intrinsic::longjmp: {
+ static Constant *LongjmpFCache = 0;
+ ReplaceCallWith("longjmp", CI, CI->op_begin() + 1, CI->op_end(),
+ Type::VoidTy, LongjmpFCache);
+ break;
+ }
+
+ case Intrinsic::siglongjmp: {
+ // Insert the call to abort
+ static Constant *AbortFCache = 0;
+ ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(),
+ Type::VoidTy, AbortFCache);
+ break;
+ }
+ case Intrinsic::ctpop:
+ CI->replaceAllUsesWith(LowerCTPOP(CI->getOperand(1), CI));
+ break;
+
+ case Intrinsic::bswap:
+ CI->replaceAllUsesWith(LowerBSWAP(CI->getOperand(1), CI));
+ break;
+
+ case Intrinsic::ctlz:
+ CI->replaceAllUsesWith(LowerCTLZ(CI->getOperand(1), CI));
+ break;
+
+ case Intrinsic::cttz: {
+ // cttz(x) -> ctpop(~X & (X-1))
+ Value *Src = CI->getOperand(1);
+ Value *NotSrc = Builder.CreateNot(Src);
+ NotSrc->setName(Src->getName() + ".not");
+ Value *SrcM1 = ConstantInt::get(Src->getType(), 1);
+ SrcM1 = Builder.CreateSub(Src, SrcM1);
+ Src = LowerCTPOP(Builder.CreateAnd(NotSrc, SrcM1), CI);
+ CI->replaceAllUsesWith(Src);
+ break;
+ }
+
+ case Intrinsic::part_select:
+ CI->replaceAllUsesWith(LowerPartSelect(CI));
+ break;
+
+ case Intrinsic::part_set:
+ CI->replaceAllUsesWith(LowerPartSet(CI));
+ break;
+
+ case Intrinsic::stacksave:
+ case Intrinsic::stackrestore: {
+ static bool Warned = false;
+ if (!Warned)
+ cerr << "WARNING: this target does not support the llvm.stack"
+ << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
+ "save" : "restore") << " intrinsic.\n";
+ Warned = true;
+ if (Callee->getIntrinsicID() == Intrinsic::stacksave)
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+ }
+
+ case Intrinsic::returnaddress:
+ case Intrinsic::frameaddress:
+ cerr << "WARNING: this target does not support the llvm."
+ << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
+ "return" : "frame") << "address intrinsic.\n";
+ CI->replaceAllUsesWith(ConstantPointerNull::get(
+ cast<PointerType>(CI->getType())));
+ break;
+
+ case Intrinsic::prefetch:
+ break; // Simply strip out prefetches on unsupported architectures
+
+ case Intrinsic::pcmarker:
+ break; // Simply strip out pcmarker on unsupported architectures
+ case Intrinsic::readcyclecounter: {
+ cerr << "WARNING: this target does not support the llvm.readcyclecoun"
+ << "ter intrinsic. It is being lowered to a constant 0\n";
+ CI->replaceAllUsesWith(ConstantInt::get(Type::Int64Ty, 0));
+ break;
+ }
+
+ case Intrinsic::dbg_stoppoint:
+ case Intrinsic::dbg_region_start:
+ case Intrinsic::dbg_region_end:
+ case Intrinsic::dbg_func_start:
+ case Intrinsic::dbg_declare:
+ break; // Simply strip out debugging intrinsics
+
+ case Intrinsic::eh_exception:
+ case Intrinsic::eh_selector_i32:
+ case Intrinsic::eh_selector_i64:
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+
+ case Intrinsic::eh_typeid_for_i32:
+ case Intrinsic::eh_typeid_for_i64:
+ // Return something different to eh_selector.
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+ break;
+
+ case Intrinsic::var_annotation:
+ break; // Strip out annotate intrinsic
+
+ case Intrinsic::memcpy: {
+ static Constant *MemcpyFCache = 0;
+ const IntegerType *IntPtr = TD.getIntPtrType();
+ Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getOperand(1);
+ Ops[1] = CI->getOperand(2);
+ Ops[2] = Size;
+ ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
+ MemcpyFCache);
+ break;
+ }
+ case Intrinsic::memmove: {
+ static Constant *MemmoveFCache = 0;
+ const IntegerType *IntPtr = TD.getIntPtrType();
+ Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getOperand(1);
+ Ops[1] = CI->getOperand(2);
+ Ops[2] = Size;
+ ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
+ MemmoveFCache);
+ break;
+ }
+ case Intrinsic::memset: {
+ static Constant *MemsetFCache = 0;
+ const IntegerType *IntPtr = TD.getIntPtrType();
+ Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getOperand(1);
+ // Extend the amount to i32.
+ Ops[1] = Builder.CreateIntCast(CI->getOperand(2), Type::Int32Ty,
+ /* isSigned */ false);
+ Ops[2] = Size;
+ ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
+ MemsetFCache);
+ break;
+ }
+ case Intrinsic::sqrt: {
+ static Constant *sqrtFCache = 0;
+ static Constant *sqrtDCache = 0;
+ static Constant *sqrtLDCache = 0;
+ ReplaceFPIntrinsicWithCall(CI, sqrtFCache, sqrtDCache, sqrtLDCache,
+ "sqrtf", "sqrt", "sqrtl");
+ break;
+ }
+ case Intrinsic::log: {
+ static Constant *logFCache = 0;
+ static Constant *logDCache = 0;
+ static Constant *logLDCache = 0;
+ ReplaceFPIntrinsicWithCall(CI, logFCache, logDCache, logLDCache,
+ "logf", "log", "logl");
+ break;
+ }
+ case Intrinsic::log2: {
+ static Constant *log2FCache = 0;
+ static Constant *log2DCache = 0;
+ static Constant *log2LDCache = 0;
+ ReplaceFPIntrinsicWithCall(CI, log2FCache, log2DCache, log2LDCache,
+ "log2f", "log2", "log2l");
+ break;
+ }
+ case Intrinsic::log10: {
+ static Constant *log10FCache = 0;
+ static Constant *log10DCache = 0;
+ static Constant *log10LDCache = 0;
+ ReplaceFPIntrinsicWithCall(CI, log10FCache, log10DCache, log10LDCache,
+ "log10f", "log10", "log10l");
+ break;
+ }
+ case Intrinsic::exp: {
+ static Constant *expFCache = 0;
+ static Constant *expDCache = 0;
+ static Constant *expLDCache = 0;
+ ReplaceFPIntrinsicWithCall(CI, expFCache, expDCache, expLDCache,
+ "expf", "exp", "expl");
+ break;
+ }
+ case Intrinsic::exp2: {
+ static Constant *exp2FCache = 0;
+ static Constant *exp2DCache = 0;
+ static Constant *exp2LDCache = 0;
+ ReplaceFPIntrinsicWithCall(CI, exp2FCache, exp2DCache, exp2LDCache,
+ "exp2f", "exp2", "exp2l");
+ break;
+ }
+ case Intrinsic::pow: {
+ static Constant *powFCache = 0;
+ static Constant *powDCache = 0;
+ static Constant *powLDCache = 0;
+ ReplaceFPIntrinsicWithCall(CI, powFCache, powDCache, powLDCache,
+ "powf", "pow", "powl");
+ break;
+ }
+ case Intrinsic::flt_rounds:
+ // Lower to "round to the nearest"
+ if (CI->getType() != Type::VoidTy)
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+ break;
+ }
+
+ assert(CI->use_empty() &&
+ "Lowering should have eliminated any uses of the intrinsic call!");
+ CI->eraseFromParent();
+}
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
new file mode 100644
index 0000000..b3c60e6
--- /dev/null
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -0,0 +1,289 @@
+//===-- LLVMTargetMachine.cpp - Implement the LLVMTargetMachine class -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVMTargetMachine class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace llvm {
+ bool EnableFastISel;
+}
+
+static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
+ cl::desc("Print LLVM IR produced by the loop-reduce pass"));
+static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
+ cl::desc("Print LLVM IR input to isel pass"));
+static cl::opt<bool> PrintEmittedAsm("print-emitted-asm", cl::Hidden,
+ cl::desc("Dump emitter generated instructions as assembly"));
+static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
+ cl::desc("Dump garbage collector data"));
+static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
+ cl::desc("Verify generated machine code"),
+ cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
+
+// When this works it will be on by default.
+static cl::opt<bool>
+DisablePostRAScheduler("disable-post-RA-scheduler",
+ cl::desc("Disable scheduling after register allocation"),
+ cl::init(true));
+
+// Enable or disable FastISel. Both options are needed, because
+// FastISel is enabled by default with -fast, and we wish to be
+// able to enable or disable fast-isel independently from -fast.
+static cl::opt<cl::boolOrDefault>
+EnableFastISelOption("fast-isel", cl::Hidden,
+ cl::desc("Enable the experimental \"fast\" instruction selector"));
+
+FileModel::Model
+LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel) {
+ // Add common CodeGen passes.
+ if (addCommonCodeGenPasses(PM, OptLevel))
+ return FileModel::Error;
+
+ // Fold redundant debug labels.
+ PM.add(createDebugLabelFoldingPass());
+
+ if (PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(cerr));
+
+ if (addPreEmitPass(PM, OptLevel) && PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(cerr));
+
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createCodePlacementOptPass());
+
+ switch (FileType) {
+ default:
+ break;
+ case TargetMachine::AssemblyFile:
+ if (addAssemblyEmitter(PM, OptLevel, getAsmVerbosityDefault(), Out))
+ return FileModel::Error;
+ return FileModel::AsmFile;
+ case TargetMachine::ObjectFile:
+ if (getMachOWriterInfo())
+ return FileModel::MachOFile;
+ else if (getELFWriterInfo())
+ return FileModel::ElfFile;
+ }
+
+ return FileModel::Error;
+}
+
+/// addPassesToEmitFileFinish - If the passes to emit the specified file had to
+/// be split up (e.g., to add an object writer pass), this method can be used to
+/// finish up adding passes to emit the file, if necessary.
+bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM,
+ MachineCodeEmitter *MCE,
+ CodeGenOpt::Level OptLevel) {
+ if (MCE)
+ addSimpleCodeEmitter(PM, OptLevel, PrintEmittedAsm, *MCE);
+
+ PM.add(createGCInfoDeleter());
+
+ // Delete machine code for this function
+ PM.add(createMachineCodeDeleter());
+
+ return false; // success!
+}
+
+/// addPassesToEmitFileFinish - If the passes to emit the specified file had to
+/// be split up (e.g., to add an object writer pass), this method can be used to
+/// finish up adding passes to emit the file, if necessary.
+bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM,
+ JITCodeEmitter *JCE,
+ CodeGenOpt::Level OptLevel) {
+ if (JCE)
+ addSimpleCodeEmitter(PM, OptLevel, PrintEmittedAsm, *JCE);
+
+ PM.add(createGCInfoDeleter());
+
+ // Delete machine code for this function
+ PM.add(createMachineCodeDeleter());
+
+ return false; // success!
+}
+
+/// addPassesToEmitMachineCode - Add passes to the specified pass manager to
+/// get machine code emitted. This uses a MachineCodeEmitter object to handle
+/// actually outputting the machine code and resolving things like the address
+/// of functions. This method should returns true if machine code emission is
+/// not supported.
+///
+bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
+ MachineCodeEmitter &MCE,
+ CodeGenOpt::Level OptLevel) {
+ // Add common CodeGen passes.
+ if (addCommonCodeGenPasses(PM, OptLevel))
+ return true;
+
+ if (addPreEmitPass(PM, OptLevel) && PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(cerr));
+
+ addCodeEmitter(PM, OptLevel, PrintEmittedAsm, MCE);
+
+ PM.add(createGCInfoDeleter());
+
+ // Delete machine code for this function
+ PM.add(createMachineCodeDeleter());
+
+ return false; // success!
+}
+
+/// addPassesToEmitMachineCode - Add passes to the specified pass manager to
+/// get machine code emitted. This uses a MachineCodeEmitter object to handle
+/// actually outputting the machine code and resolving things like the address
+/// of functions. This method should returns true if machine code emission is
+/// not supported.
+///
+bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
+ JITCodeEmitter &JCE,
+ CodeGenOpt::Level OptLevel) {
+ // Add common CodeGen passes.
+ if (addCommonCodeGenPasses(PM, OptLevel))
+ return true;
+
+ if (addPreEmitPass(PM, OptLevel) && PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(cerr));
+
+ addCodeEmitter(PM, OptLevel, PrintEmittedAsm, JCE);
+
+ PM.add(createGCInfoDeleter());
+
+ // Delete machine code for this function
+ PM.add(createMachineCodeDeleter());
+
+ return false; // success!
+}
+
+static void printAndVerify(PassManagerBase &PM,
+ bool allowDoubleDefs = false) {
+ if (PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(cerr));
+
+ if (VerifyMachineCode)
+ PM.add(createMachineVerifierPass(allowDoubleDefs));
+}
+
+/// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both
+/// emitting to assembly files or machine code output.
+///
+bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Standard LLVM-Level Passes.
+
+ // Run loop strength reduction before anything else.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createLoopStrengthReducePass(getTargetLowering()));
+ if (PrintLSR)
+ PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &errs()));
+ }
+
+ // Turn exception handling constructs into something the code generators can
+ // handle.
+ if (!getTargetAsmInfo()->doesSupportExceptionHandling())
+ PM.add(createLowerInvokePass(getTargetLowering()));
+ else
+ PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None));
+
+ PM.add(createGCLoweringPass());
+
+ // Make sure that no unreachable blocks are instruction selected.
+ PM.add(createUnreachableBlockEliminationPass());
+
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createCodeGenPreparePass(getTargetLowering()));
+
+ PM.add(createStackProtectorPass(getTargetLowering()));
+
+ if (PrintISelInput)
+ PM.add(createPrintFunctionPass("\n\n"
+ "*** Final LLVM Code input to ISel ***\n",
+ &errs()));
+
+ // Standard Lower-Level Passes.
+
+ // Enable FastISel with -fast, but allow that to be overridden.
+ if (EnableFastISelOption == cl::BOU_TRUE ||
+ (OptLevel == CodeGenOpt::None && EnableFastISelOption != cl::BOU_FALSE))
+ EnableFastISel = true;
+
+ // Ask the target for an isel.
+ if (addInstSelector(PM, OptLevel))
+ return true;
+
+ // Print the instruction selected machine code...
+ printAndVerify(PM, /* allowDoubleDefs= */ true);
+
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createMachineLICMPass());
+ PM.add(createMachineSinkingPass());
+ printAndVerify(PM, /* allowDoubleDefs= */ true);
+ }
+
+ // Run pre-ra passes.
+ if (addPreRegAlloc(PM, OptLevel))
+ printAndVerify(PM);
+
+ // Perform register allocation.
+ PM.add(createRegisterAllocator());
+
+ // Perform stack slot coloring.
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createStackSlotColoringPass(OptLevel >= CodeGenOpt::Aggressive));
+
+ printAndVerify(PM); // Print the register-allocated code
+
+ // Run post-ra passes.
+ if (addPostRegAlloc(PM, OptLevel))
+ printAndVerify(PM);
+
+ PM.add(createLowerSubregsPass());
+ printAndVerify(PM);
+
+ // Insert prolog/epilog code. Eliminate abstract frame index references...
+ PM.add(createPrologEpilogCodeInserter());
+ printAndVerify(PM);
+
+ // Second pass scheduler.
+ if (OptLevel != CodeGenOpt::None && !DisablePostRAScheduler) {
+ PM.add(createPostRAScheduler());
+ printAndVerify(PM);
+ }
+
+ // Branch folding must be run after regalloc and prolog/epilog insertion.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
+ printAndVerify(PM);
+ }
+
+ PM.add(createGCMachineCodeAnalysisPass());
+ printAndVerify(PM);
+
+ if (PrintGCInfo)
+ PM.add(createGCInfoPrinter(*cerr));
+
+ return false;
+}
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
new file mode 100644
index 0000000..2e7b89c
--- /dev/null
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -0,0 +1,114 @@
+//===---- LatencyPriorityQueue.cpp - A latency-oriented priority queue ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LatencyPriorityQueue class, which is a
+// SchedulingPriorityQueue that schedules using latency information to
+// reduce the length of the critical path through the basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scheduler"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+ // The isScheduleHigh flag allows nodes with wraparound dependencies that
+ // cannot easily be modeled as edges with latencies to be scheduled as
+ // soon as possible in a top-down schedule.
+ if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
+ return false;
+ if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
+ return true;
+
+ unsigned LHSNum = LHS->NodeNum;
+ unsigned RHSNum = RHS->NodeNum;
+
+ // The most important heuristic is scheduling the critical path.
+ unsigned LHSLatency = PQ->getLatency(LHSNum);
+ unsigned RHSLatency = PQ->getLatency(RHSNum);
+ if (LHSLatency < RHSLatency) return true;
+ if (LHSLatency > RHSLatency) return false;
+
+ // After that, if two nodes have identical latencies, look to see if one will
+ // unblock more other nodes than the other.
+ unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+ unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+ if (LHSBlocked < RHSBlocked) return true;
+ if (LHSBlocked > RHSBlocked) return false;
+
+ // Finally, just to provide a stable ordering, use the node number as a
+ // deciding factor.
+ return LHSNum < RHSNum;
+}
+
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+ SUnit *OnlyAvailablePred = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit &Pred = *I->getSUnit();
+ if (!Pred.isScheduled) {
+ // We found an available, but not scheduled, predecessor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+ return 0;
+ OnlyAvailablePred = &Pred;
+ }
+ }
+
+ return OnlyAvailablePred;
+}
+
+void LatencyPriorityQueue::push_impl(SUnit *SU) {
+ // Look at all of the successors of this node. Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ unsigned NumNodesBlocking = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+ ++NumNodesBlocking;
+ NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+
+ Queue.push(SU);
+}
+
+
+// ScheduledNode - As nodes are scheduled, we look to see if there are any
+// successor nodes that have a single unscheduled predecessor. If so, that
+// single predecessor has a higher priority, since scheduling it will make
+// the node available.
+void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ AdjustPriorityOfUnscheduledPreds(I->getSUnit());
+}
+
+/// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled. If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet. If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
+ if (SU->isAvailable) return; // All preds scheduled.
+
+ SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+ if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return;
+
+ // Okay, we found a single predecessor that is available, but not scheduled.
+ // Since it is available, it must be in the priority queue. First remove it.
+ remove(OnlyAvailablePred);
+
+ // Reinsert the node into the priority queue, which recomputes its
+ // NumNodesSolelyBlocking value.
+ push(OnlyAvailablePred);
+}
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
new file mode 100644
index 0000000..67120b8
--- /dev/null
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -0,0 +1,853 @@
+//===-- LiveInterval.cpp - Live Interval Representation -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveRange and LiveInterval classes. Given some
+// numbering of each the machine instructions an interval [i, j) is said to be a
+// live interval for register v if there is no instruction with number j' > j
+// such that v is live at j' abd there is no instruction with number i' < i such
+// that v is live at i'. In this implementation intervals can have holes,
+// i.e. an interval might look like [1,20), [50,65), [1000,1001). Each
+// individual range is represented as an instance of LiveRange, and the whole
+// interval is represented as an instance of LiveInterval.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <ostream>
+using namespace llvm;
+
+// An example for liveAt():
+//
+// this = [1,4), liveAt(0) will return false. The instruction defining this
+// spans slots [0,3]. The interval belongs to an spilled definition of the
+// variable it represents. This is because slot 1 is used (def slot) and spans
+// up to slot 3 (store slot).
+//
+bool LiveInterval::liveAt(unsigned I) const {
+ Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
+
+ if (r == ranges.begin())
+ return false;
+
+ --r;
+ return r->contains(I);
+}
+
+// liveBeforeAndAt - Check if the interval is live at the index and the index
+// just before it. If index is liveAt, check if it starts a new live range.
+// If it does, then check if the previous live range ends at index-1.
+bool LiveInterval::liveBeforeAndAt(unsigned I) const {
+ Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
+
+ if (r == ranges.begin())
+ return false;
+
+ --r;
+ if (!r->contains(I))
+ return false;
+ if (I != r->start)
+ return true;
+ // I is the start of a live range. Check if the previous live range ends
+ // at I-1.
+ if (r == ranges.begin())
+ return false;
+ return r->end == I;
+}
+
+// overlaps - Return true if the intersection of the two live intervals is
+// not empty.
+//
+// An example for overlaps():
+//
+// 0: A = ...
+// 4: B = ...
+// 8: C = A + B ;; last use of A
+//
+// The live intervals should look like:
+//
+// A = [3, 11)
+// B = [7, x)
+// C = [11, y)
+//
+// A->overlaps(C) should return false since we want to be able to join
+// A and C.
+//
+bool LiveInterval::overlapsFrom(const LiveInterval& other,
+ const_iterator StartPos) const {
+ const_iterator i = begin();
+ const_iterator ie = end();
+ const_iterator j = StartPos;
+ const_iterator je = other.end();
+
+ assert((StartPos->start <= i->start || StartPos == other.begin()) &&
+ StartPos != other.end() && "Bogus start position hint!");
+
+ if (i->start < j->start) {
+ i = std::upper_bound(i, ie, j->start);
+ if (i != ranges.begin()) --i;
+ } else if (j->start < i->start) {
+ ++StartPos;
+ if (StartPos != other.end() && StartPos->start <= i->start) {
+ assert(StartPos < other.end() && i < end());
+ j = std::upper_bound(j, je, i->start);
+ if (j != other.ranges.begin()) --j;
+ }
+ } else {
+ return true;
+ }
+
+ if (j == je) return false;
+
+ while (i != ie) {
+ if (i->start > j->start) {
+ std::swap(i, j);
+ std::swap(ie, je);
+ }
+
+ if (i->end > j->start)
+ return true;
+ ++i;
+ }
+
+ return false;
+}
+
+/// overlaps - Return true if the live interval overlaps a range specified
+/// by [Start, End).
+bool LiveInterval::overlaps(unsigned Start, unsigned End) const {
+ assert(Start < End && "Invalid range");
+ const_iterator I = begin();
+ const_iterator E = end();
+ const_iterator si = std::upper_bound(I, E, Start);
+ const_iterator ei = std::upper_bound(I, E, End);
+ if (si != ei)
+ return true;
+ if (si == I)
+ return false;
+ --si;
+ return si->contains(Start);
+}
+
+/// extendIntervalEndTo - This method is used when we want to extend the range
+/// specified by I to end at the specified endpoint. To do this, we should
+/// merge and eliminate all ranges that this will overlap with. The iterator is
+/// not invalidated.
+void LiveInterval::extendIntervalEndTo(Ranges::iterator I, unsigned NewEnd) {
+ assert(I != ranges.end() && "Not a valid interval!");
+ VNInfo *ValNo = I->valno;
+ unsigned OldEnd = I->end;
+
+ // Search for the first interval that we can't merge with.
+ Ranges::iterator MergeTo = next(I);
+ for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) {
+ assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+ }
+
+ // If NewEnd was in the middle of an interval, make sure to get its endpoint.
+ I->end = std::max(NewEnd, prior(MergeTo)->end);
+
+ // Erase any dead ranges.
+ ranges.erase(next(I), MergeTo);
+
+ // Update kill info.
+ removeKills(ValNo, OldEnd, I->end-1);
+
+ // If the newly formed range now touches the range after it and if they have
+ // the same value number, merge the two ranges into one range.
+ Ranges::iterator Next = next(I);
+ if (Next != ranges.end() && Next->start <= I->end && Next->valno == ValNo) {
+ I->end = Next->end;
+ ranges.erase(Next);
+ }
+}
+
+
+/// extendIntervalStartTo - This method is used when we want to extend the range
+/// specified by I to start at the specified endpoint. To do this, we should
+/// merge and eliminate all ranges that this will overlap with.
+LiveInterval::Ranges::iterator
+LiveInterval::extendIntervalStartTo(Ranges::iterator I, unsigned NewStart) {
+ assert(I != ranges.end() && "Not a valid interval!");
+ VNInfo *ValNo = I->valno;
+
+ // Search for the first interval that we can't merge with.
+ Ranges::iterator MergeTo = I;
+ do {
+ if (MergeTo == ranges.begin()) {
+ I->start = NewStart;
+ ranges.erase(MergeTo, I);
+ return I;
+ }
+ assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+ --MergeTo;
+ } while (NewStart <= MergeTo->start);
+
+ // If we start in the middle of another interval, just delete a range and
+ // extend that interval.
+ if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) {
+ MergeTo->end = I->end;
+ } else {
+ // Otherwise, extend the interval right after.
+ ++MergeTo;
+ MergeTo->start = NewStart;
+ MergeTo->end = I->end;
+ }
+
+ ranges.erase(next(MergeTo), next(I));
+ return MergeTo;
+}
+
+LiveInterval::iterator
+LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
+ unsigned Start = LR.start, End = LR.end;
+ iterator it = std::upper_bound(From, ranges.end(), Start);
+
+ // If the inserted interval starts in the middle or right at the end of
+ // another interval, just extend that interval to contain the range of LR.
+ if (it != ranges.begin()) {
+ iterator B = prior(it);
+ if (LR.valno == B->valno) {
+ if (B->start <= Start && B->end >= Start) {
+ extendIntervalEndTo(B, End);
+ return B;
+ }
+ } else {
+ // Check to make sure that we are not overlapping two live ranges with
+ // different valno's.
+ assert(B->end <= Start &&
+ "Cannot overlap two LiveRanges with differing ValID's"
+ " (did you def the same reg twice in a MachineInstr?)");
+ }
+ }
+
+ // Otherwise, if this range ends in the middle of, or right next to, another
+ // interval, merge it into that interval.
+ if (it != ranges.end()) {
+ if (LR.valno == it->valno) {
+ if (it->start <= End) {
+ it = extendIntervalStartTo(it, Start);
+
+ // If LR is a complete superset of an interval, we may need to grow its
+ // endpoint as well.
+ if (End > it->end)
+ extendIntervalEndTo(it, End);
+ else if (End < it->end)
+ // Overlapping intervals, there might have been a kill here.
+ removeKill(it->valno, End);
+ return it;
+ }
+ } else {
+ // Check to make sure that we are not overlapping two live ranges with
+ // different valno's.
+ assert(it->start >= End &&
+ "Cannot overlap two LiveRanges with differing ValID's");
+ }
+ }
+
+ // Otherwise, this is just a new range that doesn't interact with anything.
+ // Insert it.
+ return ranges.insert(it, LR);
+}
+
+/// isInOneLiveRange - Return true if the range specified is entirely in the
+/// a single LiveRange of the live interval.
+bool LiveInterval::isInOneLiveRange(unsigned Start, unsigned End) {
+ Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
+ if (I == ranges.begin())
+ return false;
+ --I;
+ return I->contains(Start) && I->contains(End-1);
+}
+
+
+/// removeRange - Remove the specified range from this interval. Note that
+/// the range must be in a single LiveRange in its entirety.
+void LiveInterval::removeRange(unsigned Start, unsigned End,
+ bool RemoveDeadValNo) {
+ // Find the LiveRange containing this span.
+ Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
+ assert(I != ranges.begin() && "Range is not in interval!");
+ --I;
+ assert(I->contains(Start) && I->contains(End-1) &&
+ "Range is not entirely in interval!");
+
+ // If the span we are removing is at the start of the LiveRange, adjust it.
+ VNInfo *ValNo = I->valno;
+ if (I->start == Start) {
+ if (I->end == End) {
+ removeKills(I->valno, Start, End);
+ if (RemoveDeadValNo) {
+ // Check if val# is dead.
+ bool isDead = true;
+ for (const_iterator II = begin(), EE = end(); II != EE; ++II)
+ if (II != I && II->valno == ValNo) {
+ isDead = false;
+ break;
+ }
+ if (isDead) {
+ // Now that ValNo is dead, remove it. If it is the largest value
+ // number, just nuke it (and any other deleted values neighboring it),
+ // otherwise mark it as ~1U so it can be nuked later.
+ if (ValNo->id == getNumValNums()-1) {
+ do {
+ VNInfo *VNI = valnos.back();
+ valnos.pop_back();
+ VNI->~VNInfo();
+ } while (!valnos.empty() && valnos.back()->def == ~1U);
+ } else {
+ ValNo->def = ~1U;
+ }
+ }
+ }
+
+ ranges.erase(I); // Removed the whole LiveRange.
+ } else
+ I->start = End;
+ return;
+ }
+
+ // Otherwise if the span we are removing is at the end of the LiveRange,
+ // adjust the other way.
+ if (I->end == End) {
+ removeKills(ValNo, Start, End);
+ I->end = Start;
+ return;
+ }
+
+ // Otherwise, we are splitting the LiveRange into two pieces.
+ unsigned OldEnd = I->end;
+ I->end = Start; // Trim the old interval.
+
+ // Insert the new one.
+ ranges.insert(next(I), LiveRange(End, OldEnd, ValNo));
+}
+
+/// removeValNo - Remove all the ranges defined by the specified value#.
+/// Also remove the value# from value# list.
+void LiveInterval::removeValNo(VNInfo *ValNo) {
+ if (empty()) return;
+ Ranges::iterator I = ranges.end();
+ Ranges::iterator E = ranges.begin();
+ do {
+ --I;
+ if (I->valno == ValNo)
+ ranges.erase(I);
+ } while (I != E);
+ // Now that ValNo is dead, remove it. If it is the largest value
+ // number, just nuke it (and any other deleted values neighboring it),
+ // otherwise mark it as ~1U so it can be nuked later.
+ if (ValNo->id == getNumValNums()-1) {
+ do {
+ VNInfo *VNI = valnos.back();
+ valnos.pop_back();
+ VNI->~VNInfo();
+ } while (!valnos.empty() && valnos.back()->def == ~1U);
+ } else {
+ ValNo->def = ~1U;
+ }
+}
+
+/// scaleNumbering - Renumber VNI and ranges to provide gaps for new
+/// instructions.
+void LiveInterval::scaleNumbering(unsigned factor) {
+ // Scale ranges.
+ for (iterator RI = begin(), RE = end(); RI != RE; ++RI) {
+ RI->start = InstrSlots::scale(RI->start, factor);
+ RI->end = InstrSlots::scale(RI->end, factor);
+ }
+
+ // Scale VNI info.
+ for (vni_iterator VNI = vni_begin(), VNIE = vni_end(); VNI != VNIE; ++VNI) {
+ VNInfo *vni = *VNI;
+ if (vni->def != ~0U && vni->def != ~1U) {
+ vni->def = InstrSlots::scale(vni->def, factor);
+ }
+
+ for (unsigned i = 0; i < vni->kills.size(); ++i) {
+ if (vni->kills[i] != 0)
+ vni->kills[i] = InstrSlots::scale(vni->kills[i], factor);
+ }
+ }
+}
+
+/// getLiveRangeContaining - Return the live range that contains the
+/// specified index, or null if there is none.
+LiveInterval::const_iterator
+LiveInterval::FindLiveRangeContaining(unsigned Idx) const {
+ const_iterator It = std::upper_bound(begin(), end(), Idx);
+ if (It != ranges.begin()) {
+ --It;
+ if (It->contains(Idx))
+ return It;
+ }
+
+ return end();
+}
+
+LiveInterval::iterator
+LiveInterval::FindLiveRangeContaining(unsigned Idx) {
+ iterator It = std::upper_bound(begin(), end(), Idx);
+ if (It != begin()) {
+ --It;
+ if (It->contains(Idx))
+ return It;
+ }
+
+ return end();
+}
+
+/// findDefinedVNInfo - Find the VNInfo that's defined at the specified index
+/// (register interval) or defined by the specified register (stack inteval).
+VNInfo *LiveInterval::findDefinedVNInfo(unsigned DefIdxOrReg) const {
+ VNInfo *VNI = NULL;
+ for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end();
+ i != e; ++i)
+ if ((*i)->def == DefIdxOrReg) {
+ VNI = *i;
+ break;
+ }
+ return VNI;
+}
+
+
+/// join - Join two live intervals (this, and other) together. This applies
+/// mappings to the value numbers in the LHS/RHS intervals as specified. If
+/// the intervals are not joinable, this aborts.
+void LiveInterval::join(LiveInterval &Other, const int *LHSValNoAssignments,
+ const int *RHSValNoAssignments,
+ SmallVector<VNInfo*, 16> &NewVNInfo) {
+ // Determine if any of our live range values are mapped. This is uncommon, so
+ // we want to avoid the interval scan if not.
+ bool MustMapCurValNos = false;
+ unsigned NumVals = getNumValNums();
+ unsigned NumNewVals = NewVNInfo.size();
+ for (unsigned i = 0; i != NumVals; ++i) {
+ unsigned LHSValID = LHSValNoAssignments[i];
+ if (i != LHSValID ||
+ (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i)))
+ MustMapCurValNos = true;
+ }
+
+ // If we have to apply a mapping to our base interval assignment, rewrite it
+ // now.
+ if (MustMapCurValNos) {
+ // Map the first live range.
+ iterator OutIt = begin();
+ OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]];
+ ++OutIt;
+ for (iterator I = OutIt, E = end(); I != E; ++I) {
+ OutIt->valno = NewVNInfo[LHSValNoAssignments[I->valno->id]];
+
+ // If this live range has the same value # as its immediate predecessor,
+ // and if they are neighbors, remove one LiveRange. This happens when we
+ // have [0,3:0)[4,7:1) and map 0/1 onto the same value #.
+ if (OutIt->valno == (OutIt-1)->valno && (OutIt-1)->end == OutIt->start) {
+ (OutIt-1)->end = OutIt->end;
+ } else {
+ if (I != OutIt) {
+ OutIt->start = I->start;
+ OutIt->end = I->end;
+ }
+
+ // Didn't merge, on to the next one.
+ ++OutIt;
+ }
+ }
+
+ // If we merge some live ranges, chop off the end.
+ ranges.erase(OutIt, end());
+ }
+
+ // Remember assignements because val# ids are changing.
+ SmallVector<unsigned, 16> OtherAssignments;
+ for (iterator I = Other.begin(), E = Other.end(); I != E; ++I)
+ OtherAssignments.push_back(RHSValNoAssignments[I->valno->id]);
+
+ // Update val# info. Renumber them and make sure they all belong to this
+ // LiveInterval now. Also remove dead val#'s.
+ unsigned NumValNos = 0;
+ for (unsigned i = 0; i < NumNewVals; ++i) {
+ VNInfo *VNI = NewVNInfo[i];
+ if (VNI) {
+ if (NumValNos >= NumVals)
+ valnos.push_back(VNI);
+ else
+ valnos[NumValNos] = VNI;
+ VNI->id = NumValNos++; // Renumber val#.
+ }
+ }
+ if (NumNewVals < NumVals)
+ valnos.resize(NumNewVals); // shrinkify
+
+ // Okay, now insert the RHS live ranges into the LHS.
+ iterator InsertPos = begin();
+ unsigned RangeNo = 0;
+ for (iterator I = Other.begin(), E = Other.end(); I != E; ++I, ++RangeNo) {
+ // Map the valno in the other live range to the current live range.
+ I->valno = NewVNInfo[OtherAssignments[RangeNo]];
+ assert(I->valno && "Adding a dead range?");
+ InsertPos = addRangeFrom(*I, InsertPos);
+ }
+
+ weight += Other.weight;
+ if (Other.preference && !preference)
+ preference = Other.preference;
+}
+
+/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live
+/// interval as the specified value number. The LiveRanges in RHS are
+/// allowed to overlap with LiveRanges in the current interval, but only if
+/// the overlapping LiveRanges have the specified value number.
+void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
+ VNInfo *LHSValNo) {
+ // TODO: Make this more efficient.
+ iterator InsertPos = begin();
+ for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
+ // Map the valno in the other live range to the current live range.
+ LiveRange Tmp = *I;
+ Tmp.valno = LHSValNo;
+ InsertPos = addRangeFrom(Tmp, InsertPos);
+ }
+}
+
+
+/// MergeValueInAsValue - Merge all of the live ranges of a specific val#
+/// in RHS into this live interval as the specified value number.
+/// The LiveRanges in RHS are allowed to overlap with LiveRanges in the
+/// current interval, it will replace the value numbers of the overlaped
+/// live ranges with the specified value number.
+void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS,
+ const VNInfo *RHSValNo, VNInfo *LHSValNo) {
+ SmallVector<VNInfo*, 4> ReplacedValNos;
+ iterator IP = begin();
+ for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
+ if (I->valno != RHSValNo)
+ continue;
+ unsigned Start = I->start, End = I->end;
+ IP = std::upper_bound(IP, end(), Start);
+ // If the start of this range overlaps with an existing liverange, trim it.
+ if (IP != begin() && IP[-1].end > Start) {
+ if (IP[-1].valno != LHSValNo) {
+ ReplacedValNos.push_back(IP[-1].valno);
+ IP[-1].valno = LHSValNo; // Update val#.
+ }
+ Start = IP[-1].end;
+ // Trimmed away the whole range?
+ if (Start >= End) continue;
+ }
+ // If the end of this range overlaps with an existing liverange, trim it.
+ if (IP != end() && End > IP->start) {
+ if (IP->valno != LHSValNo) {
+ ReplacedValNos.push_back(IP->valno);
+ IP->valno = LHSValNo; // Update val#.
+ }
+ End = IP->start;
+ // If this trimmed away the whole range, ignore it.
+ if (Start == End) continue;
+ }
+
+ // Map the valno in the other live range to the current live range.
+ IP = addRangeFrom(LiveRange(Start, End, LHSValNo), IP);
+ }
+
+
+ SmallSet<VNInfo*, 4> Seen;
+ for (unsigned i = 0, e = ReplacedValNos.size(); i != e; ++i) {
+ VNInfo *V1 = ReplacedValNos[i];
+ if (Seen.insert(V1)) {
+ bool isDead = true;
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ if (I->valno == V1) {
+ isDead = false;
+ break;
+ }
+ if (isDead) {
+ // Now that V1 is dead, remove it. If it is the largest value number,
+ // just nuke it (and any other deleted values neighboring it), otherwise
+ // mark it as ~1U so it can be nuked later.
+ if (V1->id == getNumValNums()-1) {
+ do {
+ VNInfo *VNI = valnos.back();
+ valnos.pop_back();
+ VNI->~VNInfo();
+ } while (!valnos.empty() && valnos.back()->def == ~1U);
+ } else {
+ V1->def = ~1U;
+ }
+ }
+ }
+ }
+}
+
+
+/// MergeInClobberRanges - For any live ranges that are not defined in the
+/// current interval, but are defined in the Clobbers interval, mark them
+/// used with an unknown definition value.
+void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers,
+ BumpPtrAllocator &VNInfoAllocator) {
+ if (Clobbers.empty()) return;
+
+ DenseMap<VNInfo*, VNInfo*> ValNoMaps;
+ VNInfo *UnusedValNo = 0;
+ iterator IP = begin();
+ for (const_iterator I = Clobbers.begin(), E = Clobbers.end(); I != E; ++I) {
+ // For every val# in the Clobbers interval, create a new "unknown" val#.
+ VNInfo *ClobberValNo = 0;
+ DenseMap<VNInfo*, VNInfo*>::iterator VI = ValNoMaps.find(I->valno);
+ if (VI != ValNoMaps.end())
+ ClobberValNo = VI->second;
+ else if (UnusedValNo)
+ ClobberValNo = UnusedValNo;
+ else {
+ UnusedValNo = ClobberValNo = getNextValue(~0U, 0, VNInfoAllocator);
+ ValNoMaps.insert(std::make_pair(I->valno, ClobberValNo));
+ }
+
+ bool Done = false;
+ unsigned Start = I->start, End = I->end;
+ // If a clobber range starts before an existing range and ends after
+ // it, the clobber range will need to be split into multiple ranges.
+ // Loop until the entire clobber range is handled.
+ while (!Done) {
+ Done = true;
+ IP = std::upper_bound(IP, end(), Start);
+ unsigned SubRangeStart = Start;
+ unsigned SubRangeEnd = End;
+
+ // If the start of this range overlaps with an existing liverange, trim it.
+ if (IP != begin() && IP[-1].end > SubRangeStart) {
+ SubRangeStart = IP[-1].end;
+ // Trimmed away the whole range?
+ if (SubRangeStart >= SubRangeEnd) continue;
+ }
+ // If the end of this range overlaps with an existing liverange, trim it.
+ if (IP != end() && SubRangeEnd > IP->start) {
+ // If the clobber live range extends beyond the existing live range,
+ // it'll need at least another live range, so set the flag to keep
+ // iterating.
+ if (SubRangeEnd > IP->end) {
+ Start = IP->end;
+ Done = false;
+ }
+ SubRangeEnd = IP->start;
+ // If this trimmed away the whole range, ignore it.
+ if (SubRangeStart == SubRangeEnd) continue;
+ }
+
+ // Insert the clobber interval.
+ IP = addRangeFrom(LiveRange(SubRangeStart, SubRangeEnd, ClobberValNo),
+ IP);
+ UnusedValNo = 0;
+ }
+ }
+
+ if (UnusedValNo) {
+ // Delete the last unused val#.
+ valnos.pop_back();
+ UnusedValNo->~VNInfo();
+ }
+}
+
+void LiveInterval::MergeInClobberRange(unsigned Start, unsigned End,
+ BumpPtrAllocator &VNInfoAllocator) {
+ // Find a value # to use for the clobber ranges. If there is already a value#
+ // for unknown values, use it.
+ VNInfo *ClobberValNo = getNextValue(~0U, 0, VNInfoAllocator);
+
+ iterator IP = begin();
+ IP = std::upper_bound(IP, end(), Start);
+
+ // If the start of this range overlaps with an existing liverange, trim it.
+ if (IP != begin() && IP[-1].end > Start) {
+ Start = IP[-1].end;
+ // Trimmed away the whole range?
+ if (Start >= End) return;
+ }
+ // If the end of this range overlaps with an existing liverange, trim it.
+ if (IP != end() && End > IP->start) {
+ End = IP->start;
+ // If this trimmed away the whole range, ignore it.
+ if (Start == End) return;
+ }
+
+ // Insert the clobber interval.
+ addRangeFrom(LiveRange(Start, End, ClobberValNo), IP);
+}
+
+/// MergeValueNumberInto - This method is called when two value nubmers
+/// are found to be equivalent. This eliminates V1, replacing all
+/// LiveRanges with the V1 value number with the V2 value number. This can
+/// cause merging of V1/V2 values numbers and compaction of the value space.
+VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
+ assert(V1 != V2 && "Identical value#'s are always equivalent!");
+
+ // This code actually merges the (numerically) larger value number into the
+ // smaller value number, which is likely to allow us to compactify the value
+ // space. The only thing we have to be careful of is to preserve the
+ // instruction that defines the result value.
+
+ // Make sure V2 is smaller than V1.
+ if (V1->id < V2->id) {
+ copyValNumInfo(V1, V2);
+ std::swap(V1, V2);
+ }
+
+ // Merge V1 live ranges into V2.
+ for (iterator I = begin(); I != end(); ) {
+ iterator LR = I++;
+ if (LR->valno != V1) continue; // Not a V1 LiveRange.
+
+ // Okay, we found a V1 live range. If it had a previous, touching, V2 live
+ // range, extend it.
+ if (LR != begin()) {
+ iterator Prev = LR-1;
+ if (Prev->valno == V2 && Prev->end == LR->start) {
+ Prev->end = LR->end;
+
+ // Erase this live-range.
+ ranges.erase(LR);
+ I = Prev+1;
+ LR = Prev;
+ }
+ }
+
+ // Okay, now we have a V1 or V2 live range that is maximally merged forward.
+ // Ensure that it is a V2 live-range.
+ LR->valno = V2;
+
+ // If we can merge it into later V2 live ranges, do so now. We ignore any
+ // following V1 live ranges, as they will be merged in subsequent iterations
+ // of the loop.
+ if (I != end()) {
+ if (I->start == LR->end && I->valno == V2) {
+ LR->end = I->end;
+ ranges.erase(I);
+ I = LR+1;
+ }
+ }
+ }
+
+ // Now that V1 is dead, remove it. If it is the largest value number, just
+ // nuke it (and any other deleted values neighboring it), otherwise mark it as
+ // ~1U so it can be nuked later.
+ if (V1->id == getNumValNums()-1) {
+ do {
+ VNInfo *VNI = valnos.back();
+ valnos.pop_back();
+ VNI->~VNInfo();
+ } while (valnos.back()->def == ~1U);
+ } else {
+ V1->def = ~1U;
+ }
+
+ return V2;
+}
+
+void LiveInterval::Copy(const LiveInterval &RHS,
+ BumpPtrAllocator &VNInfoAllocator) {
+ ranges.clear();
+ valnos.clear();
+ preference = RHS.preference;
+ weight = RHS.weight;
+ for (unsigned i = 0, e = RHS.getNumValNums(); i != e; ++i) {
+ const VNInfo *VNI = RHS.getValNumInfo(i);
+ VNInfo *NewVNI = getNextValue(~0U, 0, VNInfoAllocator);
+ copyValNumInfo(NewVNI, VNI);
+ }
+ for (unsigned i = 0, e = RHS.ranges.size(); i != e; ++i) {
+ const LiveRange &LR = RHS.ranges[i];
+ addRange(LiveRange(LR.start, LR.end, getValNumInfo(LR.valno->id)));
+ }
+}
+
+unsigned LiveInterval::getSize() const {
+ unsigned Sum = 0;
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ Sum += I->end - I->start;
+ return Sum;
+}
+
+std::ostream& llvm::operator<<(std::ostream& os, const LiveRange &LR) {
+ return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")";
+}
+
+void LiveRange::dump() const {
+ cerr << *this << "\n";
+}
+
+void LiveInterval::print(std::ostream &OS,
+ const TargetRegisterInfo *TRI) const {
+ if (isStackSlot())
+ OS << "SS#" << getStackSlotIndex();
+ else if (TRI && TargetRegisterInfo::isPhysicalRegister(reg))
+ OS << TRI->getName(reg);
+ else
+ OS << "%reg" << reg;
+
+ OS << ',' << weight;
+
+ if (empty())
+ OS << " EMPTY";
+ else {
+ OS << " = ";
+ for (LiveInterval::Ranges::const_iterator I = ranges.begin(),
+ E = ranges.end(); I != E; ++I)
+ OS << *I;
+ }
+
+ // Print value number info.
+ if (getNumValNums()) {
+ OS << " ";
+ unsigned vnum = 0;
+ for (const_vni_iterator i = vni_begin(), e = vni_end(); i != e;
+ ++i, ++vnum) {
+ const VNInfo *vni = *i;
+ if (vnum) OS << " ";
+ OS << vnum << "@";
+ if (vni->def == ~1U) {
+ OS << "x";
+ } else {
+ if (vni->def == ~0U)
+ OS << "?";
+ else
+ OS << vni->def;
+ unsigned ee = vni->kills.size();
+ if (ee || vni->hasPHIKill) {
+ OS << "-(";
+ for (unsigned j = 0; j != ee; ++j) {
+ OS << vni->kills[j];
+ if (j != ee-1)
+ OS << " ";
+ }
+ if (vni->hasPHIKill) {
+ if (ee)
+ OS << " ";
+ OS << "phi";
+ }
+ OS << ")";
+ }
+ }
+ }
+ }
+}
+
+void LiveInterval::dump() const {
+ cerr << *this << "\n";
+}
+
+
+void LiveRange::print(std::ostream &os) const {
+ os << *this;
+}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
new file mode 100644
index 0000000..cf0a648
--- /dev/null
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -0,0 +1,2298 @@
+//===-- LiveIntervalAnalysis.cpp - Live Interval Analysis -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveInterval analysis pass which is used
+// by the Linear Scan Register allocator. This pass linearizes the
+// basic blocks of the function in DFS order and uses the
+// LiveVariables pass to conservatively compute live intervals for
+// each virtual and physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "liveintervals"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "VirtRegMap.h"
+#include "llvm/Value.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <limits>
+#include <cmath>
+using namespace llvm;
+
+// Hidden options for help debugging.
+static cl::opt<bool> DisableReMat("disable-rematerialization",
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool> SplitAtBB("split-intervals-at-bb",
+ cl::init(true), cl::Hidden);
+static cl::opt<int> SplitLimit("split-limit",
+ cl::init(-1), cl::Hidden);
+
+static cl::opt<bool> EnableAggressiveRemat("aggressive-remat", cl::Hidden);
+
+static cl::opt<bool> EnableFastSpilling("fast-spill",
+ cl::init(false), cl::Hidden);
+
+STATISTIC(numIntervals, "Number of original intervals");
+STATISTIC(numFolds , "Number of loads/stores folded into instructions");
+STATISTIC(numSplits , "Number of intervals split");
+
+char LiveIntervals::ID = 0;
+static RegisterPass<LiveIntervals> X("liveintervals", "Live Interval Analysis");
+
+void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<LiveVariables>();
+ AU.addRequired<LiveVariables>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+
+ if (!StrongPHIElim) {
+ AU.addPreservedID(PHIEliminationID);
+ AU.addRequiredID(PHIEliminationID);
+ }
+
+ AU.addRequiredID(TwoAddressInstructionPassID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void LiveIntervals::releaseMemory() {
+ // Free the live intervals themselves.
+ for (DenseMap<unsigned, LiveInterval*>::iterator I = r2iMap_.begin(),
+ E = r2iMap_.end(); I != E; ++I)
+ delete I->second;
+
+ MBB2IdxMap.clear();
+ Idx2MBBMap.clear();
+ mi2iMap_.clear();
+ i2miMap_.clear();
+ r2iMap_.clear();
+ // Release VNInfo memroy regions after all VNInfo objects are dtor'd.
+ VNInfoAllocator.Reset();
+ while (!ClonedMIs.empty()) {
+ MachineInstr *MI = ClonedMIs.back();
+ ClonedMIs.pop_back();
+ mf_->DeleteMachineInstr(MI);
+ }
+}
+
+void LiveIntervals::computeNumbering() {
+ Index2MiMap OldI2MI = i2miMap_;
+ std::vector<IdxMBBPair> OldI2MBB = Idx2MBBMap;
+
+ Idx2MBBMap.clear();
+ MBB2IdxMap.clear();
+ mi2iMap_.clear();
+ i2miMap_.clear();
+
+ FunctionSize = 0;
+
+ // Number MachineInstrs and MachineBasicBlocks.
+ // Initialize MBB indexes to a sentinal.
+ MBB2IdxMap.resize(mf_->getNumBlockIDs(), std::make_pair(~0U,~0U));
+
+ unsigned MIIndex = 0;
+ for (MachineFunction::iterator MBB = mf_->begin(), E = mf_->end();
+ MBB != E; ++MBB) {
+ unsigned StartIdx = MIIndex;
+
+ // Insert an empty slot at the beginning of each block.
+ MIIndex += InstrSlots::NUM;
+ i2miMap_.push_back(0);
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ bool inserted = mi2iMap_.insert(std::make_pair(I, MIIndex)).second;
+ assert(inserted && "multiple MachineInstr -> index mappings");
+ inserted = true;
+ i2miMap_.push_back(I);
+ MIIndex += InstrSlots::NUM;
+ FunctionSize++;
+
+ // Insert max(1, numdefs) empty slots after every instruction.
+ unsigned Slots = I->getDesc().getNumDefs();
+ if (Slots == 0)
+ Slots = 1;
+ MIIndex += InstrSlots::NUM * Slots;
+ while (Slots--)
+ i2miMap_.push_back(0);
+ }
+
+ // Set the MBB2IdxMap entry for this MBB.
+ MBB2IdxMap[MBB->getNumber()] = std::make_pair(StartIdx, MIIndex - 1);
+ Idx2MBBMap.push_back(std::make_pair(StartIdx, MBB));
+ }
+ std::sort(Idx2MBBMap.begin(), Idx2MBBMap.end(), Idx2MBBCompare());
+
+ if (!OldI2MI.empty())
+ for (iterator OI = begin(), OE = end(); OI != OE; ++OI) {
+ for (LiveInterval::iterator LI = OI->second->begin(),
+ LE = OI->second->end(); LI != LE; ++LI) {
+
+ // Remap the start index of the live range to the corresponding new
+ // number, or our best guess at what it _should_ correspond to if the
+ // original instruction has been erased. This is either the following
+ // instruction or its predecessor.
+ unsigned index = LI->start / InstrSlots::NUM;
+ unsigned offset = LI->start % InstrSlots::NUM;
+ if (offset == InstrSlots::LOAD) {
+ std::vector<IdxMBBPair>::const_iterator I =
+ std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), LI->start);
+ // Take the pair containing the index
+ std::vector<IdxMBBPair>::const_iterator J =
+ (I == OldI2MBB.end() && OldI2MBB.size()>0) ? (I-1): I;
+
+ LI->start = getMBBStartIdx(J->second);
+ } else {
+ LI->start = mi2iMap_[OldI2MI[index]] + offset;
+ }
+
+ // Remap the ending index in the same way that we remapped the start,
+ // except for the final step where we always map to the immediately
+ // following instruction.
+ index = (LI->end - 1) / InstrSlots::NUM;
+ offset = LI->end % InstrSlots::NUM;
+ if (offset == InstrSlots::LOAD) {
+ // VReg dies at end of block.
+ std::vector<IdxMBBPair>::const_iterator I =
+ std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), LI->end);
+ --I;
+
+ LI->end = getMBBEndIdx(I->second) + 1;
+ } else {
+ unsigned idx = index;
+ while (index < OldI2MI.size() && !OldI2MI[index]) ++index;
+
+ if (index != OldI2MI.size())
+ LI->end = mi2iMap_[OldI2MI[index]] + (idx == index ? offset : 0);
+ else
+ LI->end = InstrSlots::NUM * i2miMap_.size();
+ }
+ }
+
+ for (LiveInterval::vni_iterator VNI = OI->second->vni_begin(),
+ VNE = OI->second->vni_end(); VNI != VNE; ++VNI) {
+ VNInfo* vni = *VNI;
+
+ // Remap the VNInfo def index, which works the same as the
+ // start indices above. VN's with special sentinel defs
+ // don't need to be remapped.
+ if (vni->def != ~0U && vni->def != ~1U) {
+ unsigned index = vni->def / InstrSlots::NUM;
+ unsigned offset = vni->def % InstrSlots::NUM;
+ if (offset == InstrSlots::LOAD) {
+ std::vector<IdxMBBPair>::const_iterator I =
+ std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), vni->def);
+ // Take the pair containing the index
+ std::vector<IdxMBBPair>::const_iterator J =
+ (I == OldI2MBB.end() && OldI2MBB.size()>0) ? (I-1): I;
+
+ vni->def = getMBBStartIdx(J->second);
+ } else {
+ vni->def = mi2iMap_[OldI2MI[index]] + offset;
+ }
+ }
+
+ // Remap the VNInfo kill indices, which works the same as
+ // the end indices above.
+ for (size_t i = 0; i < vni->kills.size(); ++i) {
+ // PHI kills don't need to be remapped.
+ if (!vni->kills[i]) continue;
+
+ unsigned index = (vni->kills[i]-1) / InstrSlots::NUM;
+ unsigned offset = vni->kills[i] % InstrSlots::NUM;
+ if (offset == InstrSlots::LOAD) {
+ std::vector<IdxMBBPair>::const_iterator I =
+ std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), vni->kills[i]);
+ --I;
+
+ vni->kills[i] = getMBBEndIdx(I->second);
+ } else {
+ unsigned idx = index;
+ while (index < OldI2MI.size() && !OldI2MI[index]) ++index;
+
+ if (index != OldI2MI.size())
+ vni->kills[i] = mi2iMap_[OldI2MI[index]] +
+ (idx == index ? offset : 0);
+ else
+ vni->kills[i] = InstrSlots::NUM * i2miMap_.size();
+ }
+ }
+ }
+ }
+}
+
+void LiveIntervals::scaleNumbering(int factor) {
+ // Need to
+ // * scale MBB begin and end points
+ // * scale all ranges.
+ // * Update VNI structures.
+ // * Scale instruction numberings
+
+ // Scale the MBB indices.
+ Idx2MBBMap.clear();
+ for (MachineFunction::iterator MBB = mf_->begin(), MBBE = mf_->end();
+ MBB != MBBE; ++MBB) {
+ std::pair<unsigned, unsigned> &mbbIndices = MBB2IdxMap[MBB->getNumber()];
+ mbbIndices.first = InstrSlots::scale(mbbIndices.first, factor);
+ mbbIndices.second = InstrSlots::scale(mbbIndices.second, factor);
+ Idx2MBBMap.push_back(std::make_pair(mbbIndices.first, MBB));
+ }
+ std::sort(Idx2MBBMap.begin(), Idx2MBBMap.end(), Idx2MBBCompare());
+
+ // Scale the intervals.
+ for (iterator LI = begin(), LE = end(); LI != LE; ++LI) {
+ LI->second->scaleNumbering(factor);
+ }
+
+ // Scale MachineInstrs.
+ Mi2IndexMap oldmi2iMap = mi2iMap_;
+ unsigned highestSlot = 0;
+ for (Mi2IndexMap::iterator MI = oldmi2iMap.begin(), ME = oldmi2iMap.end();
+ MI != ME; ++MI) {
+ unsigned newSlot = InstrSlots::scale(MI->second, factor);
+ mi2iMap_[MI->first] = newSlot;
+ highestSlot = std::max(highestSlot, newSlot);
+ }
+
+ i2miMap_.clear();
+ i2miMap_.resize(highestSlot + 1);
+ for (Mi2IndexMap::iterator MI = mi2iMap_.begin(), ME = mi2iMap_.end();
+ MI != ME; ++MI) {
+ i2miMap_[MI->second] = MI->first;
+ }
+
+}
+
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
+ mf_ = &fn;
+ mri_ = &mf_->getRegInfo();
+ tm_ = &fn.getTarget();
+ tri_ = tm_->getRegisterInfo();
+ tii_ = tm_->getInstrInfo();
+ aa_ = &getAnalysis<AliasAnalysis>();
+ lv_ = &getAnalysis<LiveVariables>();
+ allocatableRegs_ = tri_->getAllocatableSet(fn);
+
+ computeNumbering();
+ computeIntervals();
+
+ numIntervals += getNumIntervals();
+
+ DEBUG(dump());
+ return true;
+}
+
+/// print - Implement the dump method.
+void LiveIntervals::print(std::ostream &O, const Module* ) const {
+ O << "********** INTERVALS **********\n";
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ I->second->print(O, tri_);
+ O << "\n";
+ }
+
+ O << "********** MACHINEINSTRS **********\n";
+ for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
+ mbbi != mbbe; ++mbbi) {
+ O << ((Value*)mbbi->getBasicBlock())->getName() << ":\n";
+ for (MachineBasicBlock::iterator mii = mbbi->begin(),
+ mie = mbbi->end(); mii != mie; ++mii) {
+ O << getInstructionIndex(mii) << '\t' << *mii;
+ }
+ }
+}
+
+/// conflictsWithPhysRegDef - Returns true if the specified register
+/// is defined during the duration of the specified interval.
+bool LiveIntervals::conflictsWithPhysRegDef(const LiveInterval &li,
+ VirtRegMap &vrm, unsigned reg) {
+ for (LiveInterval::Ranges::const_iterator
+ I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
+ for (unsigned index = getBaseIndex(I->start),
+ end = getBaseIndex(I->end-1) + InstrSlots::NUM; index != end;
+ index += InstrSlots::NUM) {
+ // skip deleted instructions
+ while (index != end && !getInstructionFromIndex(index))
+ index += InstrSlots::NUM;
+ if (index == end) break;
+
+ MachineInstr *MI = getInstructionFromIndex(index);
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ if (SrcReg == li.reg || DstReg == li.reg)
+ continue;
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& mop = MI->getOperand(i);
+ if (!mop.isReg())
+ continue;
+ unsigned PhysReg = mop.getReg();
+ if (PhysReg == 0 || PhysReg == li.reg)
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(PhysReg)) {
+ if (!vrm.hasPhys(PhysReg))
+ continue;
+ PhysReg = vrm.getPhys(PhysReg);
+ }
+ if (PhysReg && tri_->regsOverlap(PhysReg, reg))
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+/// conflictsWithPhysRegRef - Similar to conflictsWithPhysRegRef except
+/// it can check use as well.
+bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li,
+ unsigned Reg, bool CheckUse,
+ SmallPtrSet<MachineInstr*,32> &JoinedCopies) {
+ for (LiveInterval::Ranges::const_iterator
+ I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
+ for (unsigned index = getBaseIndex(I->start),
+ end = getBaseIndex(I->end-1) + InstrSlots::NUM; index != end;
+ index += InstrSlots::NUM) {
+ // Skip deleted instructions.
+ MachineInstr *MI = 0;
+ while (index != end) {
+ MI = getInstructionFromIndex(index);
+ if (MI)
+ break;
+ index += InstrSlots::NUM;
+ }
+ if (index == end) break;
+
+ if (JoinedCopies.count(MI))
+ continue;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isUse() && !CheckUse)
+ continue;
+ unsigned PhysReg = MO.getReg();
+ if (PhysReg == 0 || TargetRegisterInfo::isVirtualRegister(PhysReg))
+ continue;
+ if (tri_->isSubRegister(Reg, PhysReg))
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+
+void LiveIntervals::printRegName(unsigned reg) const {
+ if (TargetRegisterInfo::isPhysicalRegister(reg))
+ cerr << tri_->getName(reg);
+ else
+ cerr << "%reg" << reg;
+}
+
+void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
+ MachineBasicBlock::iterator mi,
+ unsigned MIIdx, MachineOperand& MO,
+ unsigned MOIdx,
+ LiveInterval &interval) {
+ DOUT << "\t\tregister: "; DEBUG(printRegName(interval.reg));
+ LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg);
+
+ if (mi->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+ DOUT << "is a implicit_def\n";
+ return;
+ }
+
+ // Virtual registers may be defined multiple times (due to phi
+ // elimination and 2-addr elimination). Much of what we do only has to be
+ // done once for the vreg. We use an empty interval to detect the first
+ // time we see a vreg.
+ if (interval.empty()) {
+ // Get the Idx of the defining instructions.
+ unsigned defIndex = getDefIndex(MIIdx);
+ // Earlyclobbers move back one.
+ if (MO.isEarlyClobber())
+ defIndex = getUseIndex(MIIdx);
+ VNInfo *ValNo;
+ MachineInstr *CopyMI = NULL;
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (mi->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG ||
+ mi->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+ mi->getOpcode() == TargetInstrInfo::SUBREG_TO_REG ||
+ tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ CopyMI = mi;
+ // Earlyclobbers move back one.
+ ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+
+ assert(ValNo->id == 0 && "First value in interval is not 0?");
+
+ // Loop over all of the blocks that the vreg is defined in. There are
+ // two cases we have to handle here. The most common case is a vreg
+ // whose lifetime is contained within a basic block. In this case there
+ // will be a single kill, in MBB, which comes after the definition.
+ if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) {
+ // FIXME: what about dead vars?
+ unsigned killIdx;
+ if (vi.Kills[0] != mi)
+ killIdx = getUseIndex(getInstructionIndex(vi.Kills[0]))+1;
+ else
+ killIdx = defIndex+1;
+
+ // If the kill happens after the definition, we have an intra-block
+ // live range.
+ if (killIdx > defIndex) {
+ assert(vi.AliveBlocks.empty() &&
+ "Shouldn't be alive across any blocks!");
+ LiveRange LR(defIndex, killIdx, ValNo);
+ interval.addRange(LR);
+ DOUT << " +" << LR << "\n";
+ interval.addKill(ValNo, killIdx);
+ return;
+ }
+ }
+
+ // The other case we handle is when a virtual register lives to the end
+ // of the defining block, potentially live across some blocks, then is
+ // live into some number of blocks, but gets killed. Start by adding a
+ // range that goes from this definition to the end of the defining block.
+ LiveRange NewLR(defIndex, getMBBEndIdx(mbb)+1, ValNo);
+ DOUT << " +" << NewLR;
+ interval.addRange(NewLR);
+
+ // Iterate over all of the blocks that the variable is completely
+ // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the
+ // live interval.
+ for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(),
+ E = vi.AliveBlocks.end(); I != E; ++I) {
+ LiveRange LR(getMBBStartIdx(*I),
+ getMBBEndIdx(*I)+1, // MBB ends at -1.
+ ValNo);
+ interval.addRange(LR);
+ DOUT << " +" << LR;
+ }
+
+ // Finally, this virtual register is live from the start of any killing
+ // block to the 'use' slot of the killing instruction.
+ for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) {
+ MachineInstr *Kill = vi.Kills[i];
+ unsigned killIdx = getUseIndex(getInstructionIndex(Kill))+1;
+ LiveRange LR(getMBBStartIdx(Kill->getParent()),
+ killIdx, ValNo);
+ interval.addRange(LR);
+ interval.addKill(ValNo, killIdx);
+ DOUT << " +" << LR;
+ }
+
+ } else {
+ // If this is the second time we see a virtual register definition, it
+ // must be due to phi elimination or two addr elimination. If this is
+ // the result of two address elimination, then the vreg is one of the
+ // def-and-use register operand.
+ if (mi->isRegTiedToUseOperand(MOIdx)) {
+ // If this is a two-address definition, then we have already processed
+ // the live range. The only problem is that we didn't realize there
+ // are actually two values in the live interval. Because of this we
+ // need to take the LiveRegion that defines this register and split it
+ // into two values.
+ assert(interval.containsOneValue());
+ unsigned DefIndex = getDefIndex(interval.getValNumInfo(0)->def);
+ unsigned RedefIndex = getDefIndex(MIIdx);
+ if (MO.isEarlyClobber())
+ RedefIndex = getUseIndex(MIIdx);
+
+ const LiveRange *OldLR = interval.getLiveRangeContaining(RedefIndex-1);
+ VNInfo *OldValNo = OldLR->valno;
+
+ // Delete the initial value, which should be short and continuous,
+ // because the 2-addr copy must be in the same MBB as the redef.
+ interval.removeRange(DefIndex, RedefIndex);
+
+ // Two-address vregs should always only be redefined once. This means
+ // that at this point, there should be exactly one value number in it.
+ assert(interval.containsOneValue() && "Unexpected 2-addr liveint!");
+
+ // The new value number (#1) is defined by the instruction we claimed
+ // defined value #0.
+ VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->copy,
+ VNInfoAllocator);
+
+ // Value#0 is now defined by the 2-addr instruction.
+ OldValNo->def = RedefIndex;
+ OldValNo->copy = 0;
+ if (MO.isEarlyClobber())
+ OldValNo->redefByEC = true;
+
+ // Add the new live interval which replaces the range for the input copy.
+ LiveRange LR(DefIndex, RedefIndex, ValNo);
+ DOUT << " replace range with " << LR;
+ interval.addRange(LR);
+ interval.addKill(ValNo, RedefIndex);
+
+ // If this redefinition is dead, we need to add a dummy unit live
+ // range covering the def slot.
+ if (MO.isDead())
+ interval.addRange(LiveRange(RedefIndex, RedefIndex+1, OldValNo));
+
+ DOUT << " RESULT: ";
+ interval.print(DOUT, tri_);
+
+ } else {
+ // Otherwise, this must be because of phi elimination. If this is the
+ // first redefinition of the vreg that we have seen, go back and change
+ // the live range in the PHI block to be a different value number.
+ if (interval.containsOneValue()) {
+ assert(vi.Kills.size() == 1 &&
+ "PHI elimination vreg should have one kill, the PHI itself!");
+
+ // Remove the old range that we now know has an incorrect number.
+ VNInfo *VNI = interval.getValNumInfo(0);
+ MachineInstr *Killer = vi.Kills[0];
+ unsigned Start = getMBBStartIdx(Killer->getParent());
+ unsigned End = getUseIndex(getInstructionIndex(Killer))+1;
+ DOUT << " Removing [" << Start << "," << End << "] from: ";
+ interval.print(DOUT, tri_); DOUT << "\n";
+ interval.removeRange(Start, End);
+ VNI->hasPHIKill = true;
+ DOUT << " RESULT: "; interval.print(DOUT, tri_);
+
+ // Replace the interval with one of a NEW value number. Note that this
+ // value number isn't actually defined by an instruction, weird huh? :)
+ LiveRange LR(Start, End, interval.getNextValue(~0, 0, VNInfoAllocator));
+ DOUT << " replace range with " << LR;
+ interval.addRange(LR);
+ interval.addKill(LR.valno, End);
+ DOUT << " RESULT: "; interval.print(DOUT, tri_);
+ }
+
+ // In the case of PHI elimination, each variable definition is only
+ // live until the end of the block. We've already taken care of the
+ // rest of the live range.
+ unsigned defIndex = getDefIndex(MIIdx);
+ if (MO.isEarlyClobber())
+ defIndex = getUseIndex(MIIdx);
+
+ VNInfo *ValNo;
+ MachineInstr *CopyMI = NULL;
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (mi->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG ||
+ mi->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+ mi->getOpcode() == TargetInstrInfo::SUBREG_TO_REG ||
+ tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ CopyMI = mi;
+ ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+
+ unsigned killIndex = getMBBEndIdx(mbb) + 1;
+ LiveRange LR(defIndex, killIndex, ValNo);
+ interval.addRange(LR);
+ interval.addKill(ValNo, killIndex);
+ ValNo->hasPHIKill = true;
+ DOUT << " +" << LR;
+ }
+ }
+
+ DOUT << '\n';
+}
+
+void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator mi,
+ unsigned MIIdx,
+ MachineOperand& MO,
+ LiveInterval &interval,
+ MachineInstr *CopyMI) {
+ // A physical register cannot be live across basic block, so its
+ // lifetime must end somewhere in its defining basic block.
+ DOUT << "\t\tregister: "; DEBUG(printRegName(interval.reg));
+
+ unsigned baseIndex = MIIdx;
+ unsigned start = getDefIndex(baseIndex);
+ // Earlyclobbers move back one.
+ if (MO.isEarlyClobber())
+ start = getUseIndex(MIIdx);
+ unsigned end = start;
+
+ // If it is not used after definition, it is considered dead at
+ // the instruction defining it. Hence its interval is:
+ // [defSlot(def), defSlot(def)+1)
+ if (MO.isDead()) {
+ DOUT << " dead";
+ end = start + 1;
+ goto exit;
+ }
+
+ // If it is not dead on definition, it must be killed by a
+ // subsequent instruction. Hence its interval is:
+ // [defSlot(def), useSlot(kill)+1)
+ baseIndex += InstrSlots::NUM;
+ while (++mi != MBB->end()) {
+ while (baseIndex / InstrSlots::NUM < i2miMap_.size() &&
+ getInstructionFromIndex(baseIndex) == 0)
+ baseIndex += InstrSlots::NUM;
+ if (mi->killsRegister(interval.reg, tri_)) {
+ DOUT << " killed";
+ end = getUseIndex(baseIndex) + 1;
+ goto exit;
+ } else {
+ int DefIdx = mi->findRegisterDefOperandIdx(interval.reg, false, tri_);
+ if (DefIdx != -1) {
+ if (mi->isRegTiedToUseOperand(DefIdx)) {
+ // Two-address instruction.
+ end = getDefIndex(baseIndex);
+ if (mi->getOperand(DefIdx).isEarlyClobber())
+ end = getUseIndex(baseIndex);
+ } else {
+ // Another instruction redefines the register before it is ever read.
+ // Then the register is essentially dead at the instruction that defines
+ // it. Hence its interval is:
+ // [defSlot(def), defSlot(def)+1)
+ DOUT << " dead";
+ end = start + 1;
+ }
+ goto exit;
+ }
+ }
+
+ baseIndex += InstrSlots::NUM;
+ }
+
+ // The only case we should have a dead physreg here without a killing or
+ // instruction where we know it's dead is if it is live-in to the function
+ // and never used. Another possible case is the implicit use of the
+ // physical register has been deleted by two-address pass.
+ end = start + 1;
+
+exit:
+ assert(start < end && "did not find end of interval?");
+
+ // Already exists? Extend old live interval.
+ LiveInterval::iterator OldLR = interval.FindLiveRangeContaining(start);
+ bool Extend = OldLR != interval.end();
+ VNInfo *ValNo = Extend
+ ? OldLR->valno : interval.getNextValue(start, CopyMI, VNInfoAllocator);
+ if (MO.isEarlyClobber() && Extend)
+ ValNo->redefByEC = true;
+ LiveRange LR(start, end, ValNo);
+ interval.addRange(LR);
+ interval.addKill(LR.valno, end);
+ DOUT << " +" << LR << '\n';
+}
+
+void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned MIIdx,
+ MachineOperand& MO,
+ unsigned MOIdx) {
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx,
+ getOrCreateInterval(MO.getReg()));
+ else if (allocatableRegs_[MO.getReg()]) {
+ MachineInstr *CopyMI = NULL;
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG ||
+ MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+ MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG ||
+ tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ CopyMI = MI;
+ handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
+ getOrCreateInterval(MO.getReg()), CopyMI);
+ // Def of a register also defines its sub-registers.
+ for (const unsigned* AS = tri_->getSubRegisters(MO.getReg()); *AS; ++AS)
+ // If MI also modifies the sub-register explicitly, avoid processing it
+ // more than once. Do not pass in TRI here so it checks for exact match.
+ if (!MI->modifiesRegister(*AS))
+ handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
+ getOrCreateInterval(*AS), 0);
+ }
+}
+
+void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
+ unsigned MIIdx,
+ LiveInterval &interval, bool isAlias) {
+ DOUT << "\t\tlivein register: "; DEBUG(printRegName(interval.reg));
+
+ // Look for kills, if it reaches a def before it's killed, then it shouldn't
+ // be considered a livein.
+ MachineBasicBlock::iterator mi = MBB->begin();
+ unsigned baseIndex = MIIdx;
+ unsigned start = baseIndex;
+ while (baseIndex / InstrSlots::NUM < i2miMap_.size() &&
+ getInstructionFromIndex(baseIndex) == 0)
+ baseIndex += InstrSlots::NUM;
+ unsigned end = baseIndex;
+ bool SeenDefUse = false;
+
+ while (mi != MBB->end()) {
+ if (mi->killsRegister(interval.reg, tri_)) {
+ DOUT << " killed";
+ end = getUseIndex(baseIndex) + 1;
+ SeenDefUse = true;
+ goto exit;
+ } else if (mi->modifiesRegister(interval.reg, tri_)) {
+ // Another instruction redefines the register before it is ever read.
+ // Then the register is essentially dead at the instruction that defines
+ // it. Hence its interval is:
+ // [defSlot(def), defSlot(def)+1)
+ DOUT << " dead";
+ end = getDefIndex(start) + 1;
+ SeenDefUse = true;
+ goto exit;
+ }
+
+ baseIndex += InstrSlots::NUM;
+ ++mi;
+ if (mi != MBB->end()) {
+ while (baseIndex / InstrSlots::NUM < i2miMap_.size() &&
+ getInstructionFromIndex(baseIndex) == 0)
+ baseIndex += InstrSlots::NUM;
+ }
+ }
+
+exit:
+ // Live-in register might not be used at all.
+ if (!SeenDefUse) {
+ if (isAlias) {
+ DOUT << " dead";
+ end = getDefIndex(MIIdx) + 1;
+ } else {
+ DOUT << " live through";
+ end = baseIndex;
+ }
+ }
+
+ LiveRange LR(start, end, interval.getNextValue(~0U, 0, VNInfoAllocator));
+ interval.addRange(LR);
+ interval.addKill(LR.valno, end);
+ DOUT << " +" << LR << '\n';
+}
+
+/// computeIntervals - computes the live intervals for virtual
+/// registers. for some ordering of the machine instructions [1,N] a
+/// live interval is an interval [i, j) where 1 <= i <= j < N for
+/// which a variable is live
+void LiveIntervals::computeIntervals() {
+
+ DOUT << "********** COMPUTING LIVE INTERVALS **********\n"
+ << "********** Function: "
+ << ((Value*)mf_->getFunction())->getName() << '\n';
+
+ for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = MBBI;
+ // Track the index of the current machine instr.
+ unsigned MIIndex = getMBBStartIdx(MBB);
+ DOUT << ((Value*)MBB->getBasicBlock())->getName() << ":\n";
+
+ MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end();
+
+ // Create intervals for live-ins to this BB first.
+ for (MachineBasicBlock::const_livein_iterator LI = MBB->livein_begin(),
+ LE = MBB->livein_end(); LI != LE; ++LI) {
+ handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI));
+ // Multiple live-ins can alias the same register.
+ for (const unsigned* AS = tri_->getSubRegisters(*LI); *AS; ++AS)
+ if (!hasInterval(*AS))
+ handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS),
+ true);
+ }
+
+ // Skip over empty initial indices.
+ while (MIIndex / InstrSlots::NUM < i2miMap_.size() &&
+ getInstructionFromIndex(MIIndex) == 0)
+ MIIndex += InstrSlots::NUM;
+
+ for (; MI != miEnd; ++MI) {
+ DOUT << MIIndex << "\t" << *MI;
+
+ // Handle defs.
+ for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
+ MachineOperand &MO = MI->getOperand(i);
+ // handle register defs - build intervals
+ if (MO.isReg() && MO.getReg() && MO.isDef()) {
+ handleRegisterDef(MBB, MI, MIIndex, MO, i);
+ }
+ }
+
+ // Skip over the empty slots after each instruction.
+ unsigned Slots = MI->getDesc().getNumDefs();
+ if (Slots == 0)
+ Slots = 1;
+ MIIndex += InstrSlots::NUM * Slots;
+
+ // Skip over empty indices.
+ while (MIIndex / InstrSlots::NUM < i2miMap_.size() &&
+ getInstructionFromIndex(MIIndex) == 0)
+ MIIndex += InstrSlots::NUM;
+ }
+ }
+}
+
+bool LiveIntervals::findLiveInMBBs(unsigned Start, unsigned End,
+ SmallVectorImpl<MachineBasicBlock*> &MBBs) const {
+ std::vector<IdxMBBPair>::const_iterator I =
+ std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), Start);
+
+ bool ResVal = false;
+ while (I != Idx2MBBMap.end()) {
+ if (I->first >= End)
+ break;
+ MBBs.push_back(I->second);
+ ResVal = true;
+ ++I;
+ }
+ return ResVal;
+}
+
+bool LiveIntervals::findReachableMBBs(unsigned Start, unsigned End,
+ SmallVectorImpl<MachineBasicBlock*> &MBBs) const {
+ std::vector<IdxMBBPair>::const_iterator I =
+ std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), Start);
+
+ bool ResVal = false;
+ while (I != Idx2MBBMap.end()) {
+ if (I->first > End)
+ break;
+ MachineBasicBlock *MBB = I->second;
+ if (getMBBEndIdx(MBB) > End)
+ break;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ MBBs.push_back(*SI);
+ ResVal = true;
+ ++I;
+ }
+ return ResVal;
+}
+
+LiveInterval* LiveIntervals::createInterval(unsigned reg) {
+ float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? HUGE_VALF : 0.0F;
+ return new LiveInterval(reg, Weight);
+}
+
+/// dupInterval - Duplicate a live interval. The caller is responsible for
+/// managing the allocated memory.
+LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) {
+ LiveInterval *NewLI = createInterval(li->reg);
+ NewLI->Copy(*li, getVNInfoAllocator());
+ return NewLI;
+}
+
+/// getVNInfoSourceReg - Helper function that parses the specified VNInfo
+/// copy field and returns the source register that defines it.
+unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const {
+ if (!VNI->copy)
+ return 0;
+
+ if (VNI->copy->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
+ // If it's extracting out of a physical register, return the sub-register.
+ unsigned Reg = VNI->copy->getOperand(1).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ Reg = tri_->getSubReg(Reg, VNI->copy->getOperand(2).getImm());
+ return Reg;
+ } else if (VNI->copy->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+ VNI->copy->getOpcode() == TargetInstrInfo::SUBREG_TO_REG)
+ return VNI->copy->getOperand(2).getReg();
+
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (tii_->isMoveInstr(*VNI->copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ return SrcReg;
+ assert(0 && "Unrecognized copy instruction!");
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Register allocator hooks.
+//
+
+/// getReMatImplicitUse - If the remat definition MI has one (for now, we only
+/// allow one) virtual register operand, then its uses are implicitly using
+/// the register. Returns the virtual register.
+unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
+ MachineInstr *MI) const {
+ unsigned RegOp = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || Reg == li.reg)
+ continue;
+ // FIXME: For now, only remat MI with at most one register operand.
+ assert(!RegOp &&
+ "Can't rematerialize instruction with multiple register operand!");
+ RegOp = MO.getReg();
+#ifndef NDEBUG
+ break;
+#endif
+ }
+ return RegOp;
+}
+
+/// isValNoAvailableAt - Return true if the val# of the specified interval
+/// which reaches the given instruction also reaches the specified use index.
+bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI,
+ unsigned UseIdx) const {
+ unsigned Index = getInstructionIndex(MI);
+ VNInfo *ValNo = li.FindLiveRangeContaining(Index)->valno;
+ LiveInterval::const_iterator UI = li.FindLiveRangeContaining(UseIdx);
+ return UI != li.end() && UI->valno == ValNo;
+}
+
+/// isReMaterializable - Returns true if the definition MI of the specified
+/// val# of the specified interval is re-materializable.
+bool LiveIntervals::isReMaterializable(const LiveInterval &li,
+ const VNInfo *ValNo, MachineInstr *MI,
+ SmallVectorImpl<LiveInterval*> &SpillIs,
+ bool &isLoad) {
+ if (DisableReMat)
+ return false;
+
+ if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
+ return true;
+
+ int FrameIdx = 0;
+ if (tii_->isLoadFromStackSlot(MI, FrameIdx) &&
+ mf_->getFrameInfo()->isImmutableObjectIndex(FrameIdx))
+ // FIXME: Let target specific isReallyTriviallyReMaterializable determines
+ // this but remember this is not safe to fold into a two-address
+ // instruction.
+ // This is a load from fixed stack slot. It can be rematerialized.
+ return true;
+
+ // If the target-specific rules don't identify an instruction as
+ // being trivially rematerializable, use some target-independent
+ // rules.
+ if (!MI->getDesc().isRematerializable() ||
+ !tii_->isTriviallyReMaterializable(MI)) {
+ if (!EnableAggressiveRemat)
+ return false;
+
+ // If the instruction accesses memory but the memoperands have been lost,
+ // we can't analyze it.
+ const TargetInstrDesc &TID = MI->getDesc();
+ if ((TID.mayLoad() || TID.mayStore()) && MI->memoperands_empty())
+ return false;
+
+ // Avoid instructions obviously unsafe for remat.
+ if (TID.hasUnmodeledSideEffects() || TID.isNotDuplicable())
+ return false;
+
+ // If the instruction accesses memory and the memory could be non-constant,
+ // assume the instruction is not rematerializable.
+ for (std::list<MachineMemOperand>::const_iterator
+ I = MI->memoperands_begin(), E = MI->memoperands_end(); I != E; ++I){
+ const MachineMemOperand &MMO = *I;
+ if (MMO.isVolatile() || MMO.isStore())
+ return false;
+ const Value *V = MMO.getValue();
+ if (!V)
+ return false;
+ if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+ if (!PSV->isConstant(mf_->getFrameInfo()))
+ return false;
+ } else if (!aa_->pointsToConstantMemory(V))
+ return false;
+ }
+
+ // If any of the registers accessed are non-constant, conservatively assume
+ // the instruction is not rematerializable.
+ unsigned ImpUse = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return false;
+
+ // Only allow one def, and that in the first operand.
+ if (MO.isDef() != (i == 0))
+ return false;
+
+ // Only allow constant-valued registers.
+ bool IsLiveIn = mri_->isLiveIn(Reg);
+ MachineRegisterInfo::def_iterator I = mri_->def_begin(Reg),
+ E = mri_->def_end();
+
+ // For the def, it should be the only def of that register.
+ if (MO.isDef() && (next(I) != E || IsLiveIn))
+ return false;
+
+ if (MO.isUse()) {
+ // Only allow one use other register use, as that's all the
+ // remat mechanisms support currently.
+ if (Reg != li.reg) {
+ if (ImpUse == 0)
+ ImpUse = Reg;
+ else if (Reg != ImpUse)
+ return false;
+ }
+ // For the use, there should be only one associated def.
+ if (I != E && (next(I) != E || IsLiveIn))
+ return false;
+ }
+ }
+ }
+ }
+
+ unsigned ImpUse = getReMatImplicitUse(li, MI);
+ if (ImpUse) {
+ const LiveInterval &ImpLi = getInterval(ImpUse);
+ for (MachineRegisterInfo::use_iterator ri = mri_->use_begin(li.reg),
+ re = mri_->use_end(); ri != re; ++ri) {
+ MachineInstr *UseMI = &*ri;
+ unsigned UseIdx = getInstructionIndex(UseMI);
+ if (li.FindLiveRangeContaining(UseIdx)->valno != ValNo)
+ continue;
+ if (!isValNoAvailableAt(ImpLi, MI, UseIdx))
+ return false;
+ }
+
+ // If a register operand of the re-materialized instruction is going to
+ // be spilled next, then it's not legal to re-materialize this instruction.
+ for (unsigned i = 0, e = SpillIs.size(); i != e; ++i)
+ if (ImpUse == SpillIs[i]->reg)
+ return false;
+ }
+ return true;
+}
+
+/// isReMaterializable - Returns true if the definition MI of the specified
+/// val# of the specified interval is re-materializable.
+bool LiveIntervals::isReMaterializable(const LiveInterval &li,
+ const VNInfo *ValNo, MachineInstr *MI) {
+ SmallVector<LiveInterval*, 4> Dummy1;
+ bool Dummy2;
+ return isReMaterializable(li, ValNo, MI, Dummy1, Dummy2);
+}
+
+/// isReMaterializable - Returns true if every definition of MI of every
+/// val# of the specified interval is re-materializable.
+bool LiveIntervals::isReMaterializable(const LiveInterval &li,
+ SmallVectorImpl<LiveInterval*> &SpillIs,
+ bool &isLoad) {
+ isLoad = false;
+ for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
+ i != e; ++i) {
+ const VNInfo *VNI = *i;
+ unsigned DefIdx = VNI->def;
+ if (DefIdx == ~1U)
+ continue; // Dead val#.
+ // Is the def for the val# rematerializable?
+ if (DefIdx == ~0u)
+ return false;
+ MachineInstr *ReMatDefMI = getInstructionFromIndex(DefIdx);
+ bool DefIsLoad = false;
+ if (!ReMatDefMI ||
+ !isReMaterializable(li, VNI, ReMatDefMI, SpillIs, DefIsLoad))
+ return false;
+ isLoad |= DefIsLoad;
+ }
+ return true;
+}
+
+/// FilterFoldedOps - Filter out two-address use operands. Return
+/// true if it finds any issue with the operands that ought to prevent
+/// folding.
+static bool FilterFoldedOps(MachineInstr *MI,
+ SmallVector<unsigned, 2> &Ops,
+ unsigned &MRInfo,
+ SmallVector<unsigned, 2> &FoldOps) {
+ MRInfo = 0;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ unsigned OpIdx = Ops[i];
+ MachineOperand &MO = MI->getOperand(OpIdx);
+ // FIXME: fold subreg use.
+ if (MO.getSubReg())
+ return true;
+ if (MO.isDef())
+ MRInfo |= (unsigned)VirtRegMap::isMod;
+ else {
+ // Filter out two-address use operand(s).
+ if (MI->isRegTiedToDefOperand(OpIdx)) {
+ MRInfo = VirtRegMap::isModRef;
+ continue;
+ }
+ MRInfo |= (unsigned)VirtRegMap::isRef;
+ }
+ FoldOps.push_back(OpIdx);
+ }
+ return false;
+}
+
+
+/// tryFoldMemoryOperand - Attempts to fold either a spill / restore from
+/// slot / to reg or any rematerialized load into ith operand of specified
+/// MI. If it is successul, MI is updated with the newly created MI and
+/// returns true.
+bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
+ VirtRegMap &vrm, MachineInstr *DefMI,
+ unsigned InstrIdx,
+ SmallVector<unsigned, 2> &Ops,
+ bool isSS, int Slot, unsigned Reg) {
+ // If it is an implicit def instruction, just delete it.
+ if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+ RemoveMachineInstrFromMaps(MI);
+ vrm.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ ++numFolds;
+ return true;
+ }
+
+ // Filter the list of operand indexes that are to be folded. Abort if
+ // any operand will prevent folding.
+ unsigned MRInfo = 0;
+ SmallVector<unsigned, 2> FoldOps;
+ if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps))
+ return false;
+
+ // The only time it's safe to fold into a two address instruction is when
+ // it's folding reload and spill from / into a spill stack slot.
+ if (DefMI && (MRInfo & VirtRegMap::isMod))
+ return false;
+
+ MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(*mf_, MI, FoldOps, Slot)
+ : tii_->foldMemoryOperand(*mf_, MI, FoldOps, DefMI);
+ if (fmi) {
+ // Remember this instruction uses the spill slot.
+ if (isSS) vrm.addSpillSlotUse(Slot, fmi);
+
+ // Attempt to fold the memory reference into the instruction. If
+ // we can do this, we don't need to insert spill code.
+ MachineBasicBlock &MBB = *MI->getParent();
+ if (isSS && !mf_->getFrameInfo()->isImmutableObjectIndex(Slot))
+ vrm.virtFolded(Reg, MI, fmi, (VirtRegMap::ModRef)MRInfo);
+ vrm.transferSpillPts(MI, fmi);
+ vrm.transferRestorePts(MI, fmi);
+ vrm.transferEmergencySpills(MI, fmi);
+ mi2iMap_.erase(MI);
+ i2miMap_[InstrIdx /InstrSlots::NUM] = fmi;
+ mi2iMap_[fmi] = InstrIdx;
+ MI = MBB.insert(MBB.erase(MI), fmi);
+ ++numFolds;
+ return true;
+ }
+ return false;
+}
+
+/// canFoldMemoryOperand - Returns true if the specified load / store
+/// folding is possible.
+bool LiveIntervals::canFoldMemoryOperand(MachineInstr *MI,
+ SmallVector<unsigned, 2> &Ops,
+ bool ReMat) const {
+ // Filter the list of operand indexes that are to be folded. Abort if
+ // any operand will prevent folding.
+ unsigned MRInfo = 0;
+ SmallVector<unsigned, 2> FoldOps;
+ if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps))
+ return false;
+
+ // It's only legal to remat for a use, not a def.
+ if (ReMat && (MRInfo & VirtRegMap::isMod))
+ return false;
+
+ return tii_->canFoldMemoryOperand(MI, FoldOps);
+}
+
+bool LiveIntervals::intervalIsInOneMBB(const LiveInterval &li) const {
+ SmallPtrSet<MachineBasicBlock*, 4> MBBs;
+ for (LiveInterval::Ranges::const_iterator
+ I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
+ std::vector<IdxMBBPair>::const_iterator II =
+ std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), I->start);
+ if (II == Idx2MBBMap.end())
+ continue;
+ if (I->end > II->first) // crossing a MBB.
+ return false;
+ MBBs.insert(II->second);
+ if (MBBs.size() > 1)
+ return false;
+ }
+ return true;
+}
+
+/// rewriteImplicitOps - Rewrite implicit use operands of MI (i.e. uses of
+/// interval on to-be re-materialized operands of MI) with new register.
+void LiveIntervals::rewriteImplicitOps(const LiveInterval &li,
+ MachineInstr *MI, unsigned NewVReg,
+ VirtRegMap &vrm) {
+ // There is an implicit use. That means one of the other operand is
+ // being remat'ed and the remat'ed instruction has li.reg as an
+ // use operand. Make sure we rewrite that as well.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (!vrm.isReMaterialized(Reg))
+ continue;
+ MachineInstr *ReMatMI = vrm.getReMaterializedMI(Reg);
+ MachineOperand *UseMO = ReMatMI->findRegisterUseOperand(li.reg);
+ if (UseMO)
+ UseMO->setReg(NewVReg);
+ }
+}
+
+/// rewriteInstructionForSpills, rewriteInstructionsForSpills - Helper functions
+/// for addIntervalsForSpills to rewrite uses / defs for the given live range.
+bool LiveIntervals::
+rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
+ bool TrySplit, unsigned index, unsigned end, MachineInstr *MI,
+ MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
+ unsigned Slot, int LdSlot,
+ bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
+ VirtRegMap &vrm,
+ const TargetRegisterClass* rc,
+ SmallVector<int, 4> &ReMatIds,
+ const MachineLoopInfo *loopInfo,
+ unsigned &NewVReg, unsigned ImpUse, bool &HasDef, bool &HasUse,
+ DenseMap<unsigned,unsigned> &MBBVRegsMap,
+ std::vector<LiveInterval*> &NewLIs) {
+ bool CanFold = false;
+ RestartInstruction:
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& mop = MI->getOperand(i);
+ if (!mop.isReg())
+ continue;
+ unsigned Reg = mop.getReg();
+ unsigned RegI = Reg;
+ if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (Reg != li.reg)
+ continue;
+
+ bool TryFold = !DefIsReMat;
+ bool FoldSS = true; // Default behavior unless it's a remat.
+ int FoldSlot = Slot;
+ if (DefIsReMat) {
+ // If this is the rematerializable definition MI itself and
+ // all of its uses are rematerialized, simply delete it.
+ if (MI == ReMatOrigDefMI && CanDelete) {
+ DOUT << "\t\t\t\tErasing re-materlizable def: ";
+ DOUT << MI << '\n';
+ RemoveMachineInstrFromMaps(MI);
+ vrm.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ break;
+ }
+
+ // If def for this use can't be rematerialized, then try folding.
+ // If def is rematerializable and it's a load, also try folding.
+ TryFold = !ReMatDefMI || (ReMatDefMI && (MI == ReMatOrigDefMI || isLoad));
+ if (isLoad) {
+ // Try fold loads (from stack slot, constant pool, etc.) into uses.
+ FoldSS = isLoadSS;
+ FoldSlot = LdSlot;
+ }
+ }
+
+ // Scan all of the operands of this instruction rewriting operands
+ // to use NewVReg instead of li.reg as appropriate. We do this for
+ // two reasons:
+ //
+ // 1. If the instr reads the same spilled vreg multiple times, we
+ // want to reuse the NewVReg.
+ // 2. If the instr is a two-addr instruction, we are required to
+ // keep the src/dst regs pinned.
+ //
+ // Keep track of whether we replace a use and/or def so that we can
+ // create the spill interval with the appropriate range.
+
+ HasUse = mop.isUse();
+ HasDef = mop.isDef();
+ SmallVector<unsigned, 2> Ops;
+ Ops.push_back(i);
+ for (unsigned j = i+1, e = MI->getNumOperands(); j != e; ++j) {
+ const MachineOperand &MOj = MI->getOperand(j);
+ if (!MOj.isReg())
+ continue;
+ unsigned RegJ = MOj.getReg();
+ if (RegJ == 0 || TargetRegisterInfo::isPhysicalRegister(RegJ))
+ continue;
+ if (RegJ == RegI) {
+ Ops.push_back(j);
+ HasUse |= MOj.isUse();
+ HasDef |= MOj.isDef();
+ }
+ }
+
+ if (HasUse && !li.liveAt(getUseIndex(index)))
+ // Must be defined by an implicit def. It should not be spilled. Note,
+ // this is for correctness reason. e.g.
+ // 8 %reg1024<def> = IMPLICIT_DEF
+ // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
+ // The live range [12, 14) are not part of the r1024 live interval since
+ // it's defined by an implicit def. It will not conflicts with live
+ // interval of r1025. Now suppose both registers are spilled, you can
+ // easily see a situation where both registers are reloaded before
+ // the INSERT_SUBREG and both target registers that would overlap.
+ HasUse = false;
+
+ // Create a new virtual register for the spill interval.
+ // Create the new register now so we can map the fold instruction
+ // to the new register so when it is unfolded we get the correct
+ // answer.
+ bool CreatedNewVReg = false;
+ if (NewVReg == 0) {
+ NewVReg = mri_->createVirtualRegister(rc);
+ vrm.grow();
+ CreatedNewVReg = true;
+ }
+
+ if (!TryFold)
+ CanFold = false;
+ else {
+ // Do not fold load / store here if we are splitting. We'll find an
+ // optimal point to insert a load / store later.
+ if (!TrySplit) {
+ if (tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index,
+ Ops, FoldSS, FoldSlot, NewVReg)) {
+ // Folding the load/store can completely change the instruction in
+ // unpredictable ways, rescan it from the beginning.
+
+ if (FoldSS) {
+ // We need to give the new vreg the same stack slot as the
+ // spilled interval.
+ vrm.assignVirt2StackSlot(NewVReg, FoldSlot);
+ }
+
+ HasUse = false;
+ HasDef = false;
+ CanFold = false;
+ if (isNotInMIMap(MI))
+ break;
+ goto RestartInstruction;
+ }
+ } else {
+ // We'll try to fold it later if it's profitable.
+ CanFold = canFoldMemoryOperand(MI, Ops, DefIsReMat);
+ }
+ }
+
+ mop.setReg(NewVReg);
+ if (mop.isImplicit())
+ rewriteImplicitOps(li, MI, NewVReg, vrm);
+
+ // Reuse NewVReg for other reads.
+ for (unsigned j = 0, e = Ops.size(); j != e; ++j) {
+ MachineOperand &mopj = MI->getOperand(Ops[j]);
+ mopj.setReg(NewVReg);
+ if (mopj.isImplicit())
+ rewriteImplicitOps(li, MI, NewVReg, vrm);
+ }
+
+ if (CreatedNewVReg) {
+ if (DefIsReMat) {
+ vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI/*, CanDelete*/);
+ if (ReMatIds[VNI->id] == VirtRegMap::MAX_STACK_SLOT) {
+ // Each valnum may have its own remat id.
+ ReMatIds[VNI->id] = vrm.assignVirtReMatId(NewVReg);
+ } else {
+ vrm.assignVirtReMatId(NewVReg, ReMatIds[VNI->id]);
+ }
+ if (!CanDelete || (HasUse && HasDef)) {
+ // If this is a two-addr instruction then its use operands are
+ // rematerializable but its def is not. It should be assigned a
+ // stack slot.
+ vrm.assignVirt2StackSlot(NewVReg, Slot);
+ }
+ } else {
+ vrm.assignVirt2StackSlot(NewVReg, Slot);
+ }
+ } else if (HasUse && HasDef &&
+ vrm.getStackSlot(NewVReg) == VirtRegMap::NO_STACK_SLOT) {
+ // If this interval hasn't been assigned a stack slot (because earlier
+ // def is a deleted remat def), do it now.
+ assert(Slot != VirtRegMap::NO_STACK_SLOT);
+ vrm.assignVirt2StackSlot(NewVReg, Slot);
+ }
+
+ // Re-matting an instruction with virtual register use. Add the
+ // register as an implicit use on the use MI.
+ if (DefIsReMat && ImpUse)
+ MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true));
+
+ // Create a new register interval for this spill / remat.
+ LiveInterval &nI = getOrCreateInterval(NewVReg);
+ if (CreatedNewVReg) {
+ NewLIs.push_back(&nI);
+ MBBVRegsMap.insert(std::make_pair(MI->getParent()->getNumber(), NewVReg));
+ if (TrySplit)
+ vrm.setIsSplitFromReg(NewVReg, li.reg);
+ }
+
+ if (HasUse) {
+ if (CreatedNewVReg) {
+ LiveRange LR(getLoadIndex(index), getUseIndex(index)+1,
+ nI.getNextValue(~0U, 0, VNInfoAllocator));
+ DOUT << " +" << LR;
+ nI.addRange(LR);
+ } else {
+ // Extend the split live interval to this def / use.
+ unsigned End = getUseIndex(index)+1;
+ LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End,
+ nI.getValNumInfo(nI.getNumValNums()-1));
+ DOUT << " +" << LR;
+ nI.addRange(LR);
+ }
+ }
+ if (HasDef) {
+ LiveRange LR(getDefIndex(index), getStoreIndex(index),
+ nI.getNextValue(~0U, 0, VNInfoAllocator));
+ DOUT << " +" << LR;
+ nI.addRange(LR);
+ }
+
+ DOUT << "\t\t\t\tAdded new interval: ";
+ nI.print(DOUT, tri_);
+ DOUT << '\n';
+ }
+ return CanFold;
+}
+bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li,
+ const VNInfo *VNI,
+ MachineBasicBlock *MBB, unsigned Idx) const {
+ unsigned End = getMBBEndIdx(MBB);
+ for (unsigned j = 0, ee = VNI->kills.size(); j != ee; ++j) {
+ unsigned KillIdx = VNI->kills[j];
+ if (KillIdx > Idx && KillIdx < End)
+ return true;
+ }
+ return false;
+}
+
+/// RewriteInfo - Keep track of machine instrs that will be rewritten
+/// during spilling.
+namespace {
+ struct RewriteInfo {
+ unsigned Index;
+ MachineInstr *MI;
+ bool HasUse;
+ bool HasDef;
+ RewriteInfo(unsigned i, MachineInstr *mi, bool u, bool d)
+ : Index(i), MI(mi), HasUse(u), HasDef(d) {}
+ };
+
+ struct RewriteInfoCompare {
+ bool operator()(const RewriteInfo &LHS, const RewriteInfo &RHS) const {
+ return LHS.Index < RHS.Index;
+ }
+ };
+}
+
+void LiveIntervals::
+rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
+ LiveInterval::Ranges::const_iterator &I,
+ MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
+ unsigned Slot, int LdSlot,
+ bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
+ VirtRegMap &vrm,
+ const TargetRegisterClass* rc,
+ SmallVector<int, 4> &ReMatIds,
+ const MachineLoopInfo *loopInfo,
+ BitVector &SpillMBBs,
+ DenseMap<unsigned, std::vector<SRInfo> > &SpillIdxes,
+ BitVector &RestoreMBBs,
+ DenseMap<unsigned, std::vector<SRInfo> > &RestoreIdxes,
+ DenseMap<unsigned,unsigned> &MBBVRegsMap,
+ std::vector<LiveInterval*> &NewLIs) {
+ bool AllCanFold = true;
+ unsigned NewVReg = 0;
+ unsigned start = getBaseIndex(I->start);
+ unsigned end = getBaseIndex(I->end-1) + InstrSlots::NUM;
+
+ // First collect all the def / use in this live range that will be rewritten.
+ // Make sure they are sorted according to instruction index.
+ std::vector<RewriteInfo> RewriteMIs;
+ for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg),
+ re = mri_->reg_end(); ri != re; ) {
+ MachineInstr *MI = &*ri;
+ MachineOperand &O = ri.getOperand();
+ ++ri;
+ assert(!O.isImplicit() && "Spilling register that's used as implicit use?");
+ unsigned index = getInstructionIndex(MI);
+ if (index < start || index >= end)
+ continue;
+ if (O.isUse() && !li.liveAt(getUseIndex(index)))
+ // Must be defined by an implicit def. It should not be spilled. Note,
+ // this is for correctness reason. e.g.
+ // 8 %reg1024<def> = IMPLICIT_DEF
+ // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
+ // The live range [12, 14) are not part of the r1024 live interval since
+ // it's defined by an implicit def. It will not conflicts with live
+ // interval of r1025. Now suppose both registers are spilled, you can
+ // easily see a situation where both registers are reloaded before
+ // the INSERT_SUBREG and both target registers that would overlap.
+ continue;
+ RewriteMIs.push_back(RewriteInfo(index, MI, O.isUse(), O.isDef()));
+ }
+ std::sort(RewriteMIs.begin(), RewriteMIs.end(), RewriteInfoCompare());
+
+ unsigned ImpUse = DefIsReMat ? getReMatImplicitUse(li, ReMatDefMI) : 0;
+ // Now rewrite the defs and uses.
+ for (unsigned i = 0, e = RewriteMIs.size(); i != e; ) {
+ RewriteInfo &rwi = RewriteMIs[i];
+ ++i;
+ unsigned index = rwi.Index;
+ bool MIHasUse = rwi.HasUse;
+ bool MIHasDef = rwi.HasDef;
+ MachineInstr *MI = rwi.MI;
+ // If MI def and/or use the same register multiple times, then there
+ // are multiple entries.
+ unsigned NumUses = MIHasUse;
+ while (i != e && RewriteMIs[i].MI == MI) {
+ assert(RewriteMIs[i].Index == index);
+ bool isUse = RewriteMIs[i].HasUse;
+ if (isUse) ++NumUses;
+ MIHasUse |= isUse;
+ MIHasDef |= RewriteMIs[i].HasDef;
+ ++i;
+ }
+ MachineBasicBlock *MBB = MI->getParent();
+
+ if (ImpUse && MI != ReMatDefMI) {
+ // Re-matting an instruction with virtual register use. Update the
+ // register interval's spill weight to HUGE_VALF to prevent it from
+ // being spilled.
+ LiveInterval &ImpLi = getInterval(ImpUse);
+ ImpLi.weight = HUGE_VALF;
+ }
+
+ unsigned MBBId = MBB->getNumber();
+ unsigned ThisVReg = 0;
+ if (TrySplit) {
+ DenseMap<unsigned,unsigned>::iterator NVI = MBBVRegsMap.find(MBBId);
+ if (NVI != MBBVRegsMap.end()) {
+ ThisVReg = NVI->second;
+ // One common case:
+ // x = use
+ // ...
+ // ...
+ // def = ...
+ // = use
+ // It's better to start a new interval to avoid artifically
+ // extend the new interval.
+ if (MIHasDef && !MIHasUse) {
+ MBBVRegsMap.erase(MBB->getNumber());
+ ThisVReg = 0;
+ }
+ }
+ }
+
+ bool IsNew = ThisVReg == 0;
+ if (IsNew) {
+ // This ends the previous live interval. If all of its def / use
+ // can be folded, give it a low spill weight.
+ if (NewVReg && TrySplit && AllCanFold) {
+ LiveInterval &nI = getOrCreateInterval(NewVReg);
+ nI.weight /= 10.0F;
+ }
+ AllCanFold = true;
+ }
+ NewVReg = ThisVReg;
+
+ bool HasDef = false;
+ bool HasUse = false;
+ bool CanFold = rewriteInstructionForSpills(li, I->valno, TrySplit,
+ index, end, MI, ReMatOrigDefMI, ReMatDefMI,
+ Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
+ CanDelete, vrm, rc, ReMatIds, loopInfo, NewVReg,
+ ImpUse, HasDef, HasUse, MBBVRegsMap, NewLIs);
+ if (!HasDef && !HasUse)
+ continue;
+
+ AllCanFold &= CanFold;
+
+ // Update weight of spill interval.
+ LiveInterval &nI = getOrCreateInterval(NewVReg);
+ if (!TrySplit) {
+ // The spill weight is now infinity as it cannot be spilled again.
+ nI.weight = HUGE_VALF;
+ continue;
+ }
+
+ // Keep track of the last def and first use in each MBB.
+ if (HasDef) {
+ if (MI != ReMatOrigDefMI || !CanDelete) {
+ bool HasKill = false;
+ if (!HasUse)
+ HasKill = anyKillInMBBAfterIdx(li, I->valno, MBB, getDefIndex(index));
+ else {
+ // If this is a two-address code, then this index starts a new VNInfo.
+ const VNInfo *VNI = li.findDefinedVNInfo(getDefIndex(index));
+ if (VNI)
+ HasKill = anyKillInMBBAfterIdx(li, VNI, MBB, getDefIndex(index));
+ }
+ DenseMap<unsigned, std::vector<SRInfo> >::iterator SII =
+ SpillIdxes.find(MBBId);
+ if (!HasKill) {
+ if (SII == SpillIdxes.end()) {
+ std::vector<SRInfo> S;
+ S.push_back(SRInfo(index, NewVReg, true));
+ SpillIdxes.insert(std::make_pair(MBBId, S));
+ } else if (SII->second.back().vreg != NewVReg) {
+ SII->second.push_back(SRInfo(index, NewVReg, true));
+ } else if ((int)index > SII->second.back().index) {
+ // If there is an earlier def and this is a two-address
+ // instruction, then it's not possible to fold the store (which
+ // would also fold the load).
+ SRInfo &Info = SII->second.back();
+ Info.index = index;
+ Info.canFold = !HasUse;
+ }
+ SpillMBBs.set(MBBId);
+ } else if (SII != SpillIdxes.end() &&
+ SII->second.back().vreg == NewVReg &&
+ (int)index > SII->second.back().index) {
+ // There is an earlier def that's not killed (must be two-address).
+ // The spill is no longer needed.
+ SII->second.pop_back();
+ if (SII->second.empty()) {
+ SpillIdxes.erase(MBBId);
+ SpillMBBs.reset(MBBId);
+ }
+ }
+ }
+ }
+
+ if (HasUse) {
+ DenseMap<unsigned, std::vector<SRInfo> >::iterator SII =
+ SpillIdxes.find(MBBId);
+ if (SII != SpillIdxes.end() &&
+ SII->second.back().vreg == NewVReg &&
+ (int)index > SII->second.back().index)
+ // Use(s) following the last def, it's not safe to fold the spill.
+ SII->second.back().canFold = false;
+ DenseMap<unsigned, std::vector<SRInfo> >::iterator RII =
+ RestoreIdxes.find(MBBId);
+ if (RII != RestoreIdxes.end() && RII->second.back().vreg == NewVReg)
+ // If we are splitting live intervals, only fold if it's the first
+ // use and there isn't another use later in the MBB.
+ RII->second.back().canFold = false;
+ else if (IsNew) {
+ // Only need a reload if there isn't an earlier def / use.
+ if (RII == RestoreIdxes.end()) {
+ std::vector<SRInfo> Infos;
+ Infos.push_back(SRInfo(index, NewVReg, true));
+ RestoreIdxes.insert(std::make_pair(MBBId, Infos));
+ } else {
+ RII->second.push_back(SRInfo(index, NewVReg, true));
+ }
+ RestoreMBBs.set(MBBId);
+ }
+ }
+
+ // Update spill weight.
+ unsigned loopDepth = loopInfo->getLoopDepth(MBB);
+ nI.weight += getSpillWeight(HasDef, HasUse, loopDepth);
+ }
+
+ if (NewVReg && TrySplit && AllCanFold) {
+ // If all of its def / use can be folded, give it a low spill weight.
+ LiveInterval &nI = getOrCreateInterval(NewVReg);
+ nI.weight /= 10.0F;
+ }
+}
+
+bool LiveIntervals::alsoFoldARestore(int Id, int index, unsigned vr,
+ BitVector &RestoreMBBs,
+ DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
+ if (!RestoreMBBs[Id])
+ return false;
+ std::vector<SRInfo> &Restores = RestoreIdxes[Id];
+ for (unsigned i = 0, e = Restores.size(); i != e; ++i)
+ if (Restores[i].index == index &&
+ Restores[i].vreg == vr &&
+ Restores[i].canFold)
+ return true;
+ return false;
+}
+
+void LiveIntervals::eraseRestoreInfo(int Id, int index, unsigned vr,
+ BitVector &RestoreMBBs,
+ DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
+ if (!RestoreMBBs[Id])
+ return;
+ std::vector<SRInfo> &Restores = RestoreIdxes[Id];
+ for (unsigned i = 0, e = Restores.size(); i != e; ++i)
+ if (Restores[i].index == index && Restores[i].vreg)
+ Restores[i].index = -1;
+}
+
+/// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being
+/// spilled and create empty intervals for their uses.
+void
+LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm,
+ const TargetRegisterClass* rc,
+ std::vector<LiveInterval*> &NewLIs) {
+ for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg),
+ re = mri_->reg_end(); ri != re; ) {
+ MachineOperand &O = ri.getOperand();
+ MachineInstr *MI = &*ri;
+ ++ri;
+ if (O.isDef()) {
+ assert(MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF &&
+ "Register def was not rewritten?");
+ RemoveMachineInstrFromMaps(MI);
+ vrm.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ } else {
+ // This must be an use of an implicit_def so it's not part of the live
+ // interval. Create a new empty live interval for it.
+ // FIXME: Can we simply erase some of the instructions? e.g. Stores?
+ unsigned NewVReg = mri_->createVirtualRegister(rc);
+ vrm.grow();
+ vrm.setIsImplicitlyDefined(NewVReg);
+ NewLIs.push_back(&getOrCreateInterval(NewVReg));
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == li.reg)
+ MO.setReg(NewVReg);
+ }
+ }
+ }
+}
+
+std::vector<LiveInterval*> LiveIntervals::
+addIntervalsForSpillsFast(const LiveInterval &li,
+ const MachineLoopInfo *loopInfo,
+ VirtRegMap &vrm) {
+ unsigned slot = vrm.assignVirt2StackSlot(li.reg);
+
+ std::vector<LiveInterval*> added;
+
+ assert(li.weight != HUGE_VALF &&
+ "attempt to spill already spilled interval!");
+
+ DOUT << "\t\t\t\tadding intervals for spills for interval: ";
+ DEBUG(li.dump());
+ DOUT << '\n';
+
+ const TargetRegisterClass* rc = mri_->getRegClass(li.reg);
+
+ MachineRegisterInfo::reg_iterator RI = mri_->reg_begin(li.reg);
+ while (RI != mri_->reg_end()) {
+ MachineInstr* MI = &*RI;
+
+ SmallVector<unsigned, 2> Indices;
+ bool HasUse = false;
+ bool HasDef = false;
+
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& mop = MI->getOperand(i);
+ if (!mop.isReg() || mop.getReg() != li.reg) continue;
+
+ HasUse |= MI->getOperand(i).isUse();
+ HasDef |= MI->getOperand(i).isDef();
+
+ Indices.push_back(i);
+ }
+
+ if (!tryFoldMemoryOperand(MI, vrm, NULL, getInstructionIndex(MI),
+ Indices, true, slot, li.reg)) {
+ unsigned NewVReg = mri_->createVirtualRegister(rc);
+ vrm.grow();
+ vrm.assignVirt2StackSlot(NewVReg, slot);
+
+ // create a new register for this spill
+ LiveInterval &nI = getOrCreateInterval(NewVReg);
+
+ // the spill weight is now infinity as it
+ // cannot be spilled again
+ nI.weight = HUGE_VALF;
+
+ // Rewrite register operands to use the new vreg.
+ for (SmallVectorImpl<unsigned>::iterator I = Indices.begin(),
+ E = Indices.end(); I != E; ++I) {
+ MI->getOperand(*I).setReg(NewVReg);
+
+ if (MI->getOperand(*I).isUse())
+ MI->getOperand(*I).setIsKill(true);
+ }
+
+ // Fill in the new live interval.
+ unsigned index = getInstructionIndex(MI);
+ if (HasUse) {
+ LiveRange LR(getLoadIndex(index), getUseIndex(index),
+ nI.getNextValue(~0U, 0, getVNInfoAllocator()));
+ DOUT << " +" << LR;
+ nI.addRange(LR);
+ vrm.addRestorePoint(NewVReg, MI);
+ }
+ if (HasDef) {
+ LiveRange LR(getDefIndex(index), getStoreIndex(index),
+ nI.getNextValue(~0U, 0, getVNInfoAllocator()));
+ DOUT << " +" << LR;
+ nI.addRange(LR);
+ vrm.addSpillPoint(NewVReg, true, MI);
+ }
+
+ added.push_back(&nI);
+
+ DOUT << "\t\t\t\tadded new interval: ";
+ DEBUG(nI.dump());
+ DOUT << '\n';
+ }
+
+
+ RI = mri_->reg_begin(li.reg);
+ }
+
+ return added;
+}
+
+std::vector<LiveInterval*> LiveIntervals::
+addIntervalsForSpills(const LiveInterval &li,
+ SmallVectorImpl<LiveInterval*> &SpillIs,
+ const MachineLoopInfo *loopInfo, VirtRegMap &vrm) {
+
+ if (EnableFastSpilling)
+ return addIntervalsForSpillsFast(li, loopInfo, vrm);
+
+ assert(li.weight != HUGE_VALF &&
+ "attempt to spill already spilled interval!");
+
+ DOUT << "\t\t\t\tadding intervals for spills for interval: ";
+ li.print(DOUT, tri_);
+ DOUT << '\n';
+
+ // Each bit specify whether a spill is required in the MBB.
+ BitVector SpillMBBs(mf_->getNumBlockIDs());
+ DenseMap<unsigned, std::vector<SRInfo> > SpillIdxes;
+ BitVector RestoreMBBs(mf_->getNumBlockIDs());
+ DenseMap<unsigned, std::vector<SRInfo> > RestoreIdxes;
+ DenseMap<unsigned,unsigned> MBBVRegsMap;
+ std::vector<LiveInterval*> NewLIs;
+ const TargetRegisterClass* rc = mri_->getRegClass(li.reg);
+
+ unsigned NumValNums = li.getNumValNums();
+ SmallVector<MachineInstr*, 4> ReMatDefs;
+ ReMatDefs.resize(NumValNums, NULL);
+ SmallVector<MachineInstr*, 4> ReMatOrigDefs;
+ ReMatOrigDefs.resize(NumValNums, NULL);
+ SmallVector<int, 4> ReMatIds;
+ ReMatIds.resize(NumValNums, VirtRegMap::MAX_STACK_SLOT);
+ BitVector ReMatDelete(NumValNums);
+ unsigned Slot = VirtRegMap::MAX_STACK_SLOT;
+
+ // Spilling a split live interval. It cannot be split any further. Also,
+ // it's also guaranteed to be a single val# / range interval.
+ if (vrm.getPreSplitReg(li.reg)) {
+ vrm.setIsSplitFromReg(li.reg, 0);
+ // Unset the split kill marker on the last use.
+ unsigned KillIdx = vrm.getKillPoint(li.reg);
+ if (KillIdx) {
+ MachineInstr *KillMI = getInstructionFromIndex(KillIdx);
+ assert(KillMI && "Last use disappeared?");
+ int KillOp = KillMI->findRegisterUseOperandIdx(li.reg, true);
+ assert(KillOp != -1 && "Last use disappeared?");
+ KillMI->getOperand(KillOp).setIsKill(false);
+ }
+ vrm.removeKillPoint(li.reg);
+ bool DefIsReMat = vrm.isReMaterialized(li.reg);
+ Slot = vrm.getStackSlot(li.reg);
+ assert(Slot != VirtRegMap::MAX_STACK_SLOT);
+ MachineInstr *ReMatDefMI = DefIsReMat ?
+ vrm.getReMaterializedMI(li.reg) : NULL;
+ int LdSlot = 0;
+ bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
+ bool isLoad = isLoadSS ||
+ (DefIsReMat && (ReMatDefMI->getDesc().canFoldAsLoad()));
+ bool IsFirstRange = true;
+ for (LiveInterval::Ranges::const_iterator
+ I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
+ // If this is a split live interval with multiple ranges, it means there
+ // are two-address instructions that re-defined the value. Only the
+ // first def can be rematerialized!
+ if (IsFirstRange) {
+ // Note ReMatOrigDefMI has already been deleted.
+ rewriteInstructionsForSpills(li, false, I, NULL, ReMatDefMI,
+ Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
+ false, vrm, rc, ReMatIds, loopInfo,
+ SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
+ MBBVRegsMap, NewLIs);
+ } else {
+ rewriteInstructionsForSpills(li, false, I, NULL, 0,
+ Slot, 0, false, false, false,
+ false, vrm, rc, ReMatIds, loopInfo,
+ SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
+ MBBVRegsMap, NewLIs);
+ }
+ IsFirstRange = false;
+ }
+
+ handleSpilledImpDefs(li, vrm, rc, NewLIs);
+ return NewLIs;
+ }
+
+ bool TrySplit = SplitAtBB && !intervalIsInOneMBB(li);
+ if (SplitLimit != -1 && (int)numSplits >= SplitLimit)
+ TrySplit = false;
+ if (TrySplit)
+ ++numSplits;
+ bool NeedStackSlot = false;
+ for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
+ i != e; ++i) {
+ const VNInfo *VNI = *i;
+ unsigned VN = VNI->id;
+ unsigned DefIdx = VNI->def;
+ if (DefIdx == ~1U)
+ continue; // Dead val#.
+ // Is the def for the val# rematerializable?
+ MachineInstr *ReMatDefMI = (DefIdx == ~0u)
+ ? 0 : getInstructionFromIndex(DefIdx);
+ bool dummy;
+ if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) {
+ // Remember how to remat the def of this val#.
+ ReMatOrigDefs[VN] = ReMatDefMI;
+ // Original def may be modified so we have to make a copy here.
+ MachineInstr *Clone = mf_->CloneMachineInstr(ReMatDefMI);
+ ClonedMIs.push_back(Clone);
+ ReMatDefs[VN] = Clone;
+
+ bool CanDelete = true;
+ if (VNI->hasPHIKill) {
+ // A kill is a phi node, not all of its uses can be rematerialized.
+ // It must not be deleted.
+ CanDelete = false;
+ // Need a stack slot if there is any live range where uses cannot be
+ // rematerialized.
+ NeedStackSlot = true;
+ }
+ if (CanDelete)
+ ReMatDelete.set(VN);
+ } else {
+ // Need a stack slot if there is any live range where uses cannot be
+ // rematerialized.
+ NeedStackSlot = true;
+ }
+ }
+
+ // One stack slot per live interval.
+ if (NeedStackSlot && vrm.getPreSplitReg(li.reg) == 0) {
+ if (vrm.getStackSlot(li.reg) == VirtRegMap::NO_STACK_SLOT)
+ Slot = vrm.assignVirt2StackSlot(li.reg);
+
+ // This case only occurs when the prealloc splitter has already assigned
+ // a stack slot to this vreg.
+ else
+ Slot = vrm.getStackSlot(li.reg);
+ }
+
+ // Create new intervals and rewrite defs and uses.
+ for (LiveInterval::Ranges::const_iterator
+ I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
+ MachineInstr *ReMatDefMI = ReMatDefs[I->valno->id];
+ MachineInstr *ReMatOrigDefMI = ReMatOrigDefs[I->valno->id];
+ bool DefIsReMat = ReMatDefMI != NULL;
+ bool CanDelete = ReMatDelete[I->valno->id];
+ int LdSlot = 0;
+ bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
+ bool isLoad = isLoadSS ||
+ (DefIsReMat && ReMatDefMI->getDesc().canFoldAsLoad());
+ rewriteInstructionsForSpills(li, TrySplit, I, ReMatOrigDefMI, ReMatDefMI,
+ Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
+ CanDelete, vrm, rc, ReMatIds, loopInfo,
+ SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
+ MBBVRegsMap, NewLIs);
+ }
+
+ // Insert spills / restores if we are splitting.
+ if (!TrySplit) {
+ handleSpilledImpDefs(li, vrm, rc, NewLIs);
+ return NewLIs;
+ }
+
+ SmallPtrSet<LiveInterval*, 4> AddedKill;
+ SmallVector<unsigned, 2> Ops;
+ if (NeedStackSlot) {
+ int Id = SpillMBBs.find_first();
+ while (Id != -1) {
+ std::vector<SRInfo> &spills = SpillIdxes[Id];
+ for (unsigned i = 0, e = spills.size(); i != e; ++i) {
+ int index = spills[i].index;
+ unsigned VReg = spills[i].vreg;
+ LiveInterval &nI = getOrCreateInterval(VReg);
+ bool isReMat = vrm.isReMaterialized(VReg);
+ MachineInstr *MI = getInstructionFromIndex(index);
+ bool CanFold = false;
+ bool FoundUse = false;
+ Ops.clear();
+ if (spills[i].canFold) {
+ CanFold = true;
+ for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+ MachineOperand &MO = MI->getOperand(j);
+ if (!MO.isReg() || MO.getReg() != VReg)
+ continue;
+
+ Ops.push_back(j);
+ if (MO.isDef())
+ continue;
+ if (isReMat ||
+ (!FoundUse && !alsoFoldARestore(Id, index, VReg,
+ RestoreMBBs, RestoreIdxes))) {
+ // MI has two-address uses of the same register. If the use
+ // isn't the first and only use in the BB, then we can't fold
+ // it. FIXME: Move this to rewriteInstructionsForSpills.
+ CanFold = false;
+ break;
+ }
+ FoundUse = true;
+ }
+ }
+ // Fold the store into the def if possible.
+ bool Folded = false;
+ if (CanFold && !Ops.empty()) {
+ if (tryFoldMemoryOperand(MI, vrm, NULL, index, Ops, true, Slot,VReg)){
+ Folded = true;
+ if (FoundUse) {
+ // Also folded uses, do not issue a load.
+ eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes);
+ nI.removeRange(getLoadIndex(index), getUseIndex(index)+1);
+ }
+ nI.removeRange(getDefIndex(index), getStoreIndex(index));
+ }
+ }
+
+ // Otherwise tell the spiller to issue a spill.
+ if (!Folded) {
+ LiveRange *LR = &nI.ranges[nI.ranges.size()-1];
+ bool isKill = LR->end == getStoreIndex(index);
+ if (!MI->registerDefIsDead(nI.reg))
+ // No need to spill a dead def.
+ vrm.addSpillPoint(VReg, isKill, MI);
+ if (isKill)
+ AddedKill.insert(&nI);
+ }
+ }
+ Id = SpillMBBs.find_next(Id);
+ }
+ }
+
+ int Id = RestoreMBBs.find_first();
+ while (Id != -1) {
+ std::vector<SRInfo> &restores = RestoreIdxes[Id];
+ for (unsigned i = 0, e = restores.size(); i != e; ++i) {
+ int index = restores[i].index;
+ if (index == -1)
+ continue;
+ unsigned VReg = restores[i].vreg;
+ LiveInterval &nI = getOrCreateInterval(VReg);
+ bool isReMat = vrm.isReMaterialized(VReg);
+ MachineInstr *MI = getInstructionFromIndex(index);
+ bool CanFold = false;
+ Ops.clear();
+ if (restores[i].canFold) {
+ CanFold = true;
+ for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+ MachineOperand &MO = MI->getOperand(j);
+ if (!MO.isReg() || MO.getReg() != VReg)
+ continue;
+
+ if (MO.isDef()) {
+ // If this restore were to be folded, it would have been folded
+ // already.
+ CanFold = false;
+ break;
+ }
+ Ops.push_back(j);
+ }
+ }
+
+ // Fold the load into the use if possible.
+ bool Folded = false;
+ if (CanFold && !Ops.empty()) {
+ if (!isReMat)
+ Folded = tryFoldMemoryOperand(MI, vrm, NULL,index,Ops,true,Slot,VReg);
+ else {
+ MachineInstr *ReMatDefMI = vrm.getReMaterializedMI(VReg);
+ int LdSlot = 0;
+ bool isLoadSS = tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
+ // If the rematerializable def is a load, also try to fold it.
+ if (isLoadSS || ReMatDefMI->getDesc().canFoldAsLoad())
+ Folded = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index,
+ Ops, isLoadSS, LdSlot, VReg);
+ if (!Folded) {
+ unsigned ImpUse = getReMatImplicitUse(li, ReMatDefMI);
+ if (ImpUse) {
+ // Re-matting an instruction with virtual register use. Add the
+ // register as an implicit use on the use MI and update the register
+ // interval's spill weight to HUGE_VALF to prevent it from being
+ // spilled.
+ LiveInterval &ImpLi = getInterval(ImpUse);
+ ImpLi.weight = HUGE_VALF;
+ MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true));
+ }
+ }
+ }
+ }
+ // If folding is not possible / failed, then tell the spiller to issue a
+ // load / rematerialization for us.
+ if (Folded)
+ nI.removeRange(getLoadIndex(index), getUseIndex(index)+1);
+ else
+ vrm.addRestorePoint(VReg, MI);
+ }
+ Id = RestoreMBBs.find_next(Id);
+ }
+
+ // Finalize intervals: add kills, finalize spill weights, and filter out
+ // dead intervals.
+ std::vector<LiveInterval*> RetNewLIs;
+ for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) {
+ LiveInterval *LI = NewLIs[i];
+ if (!LI->empty()) {
+ LI->weight /= InstrSlots::NUM * getApproximateInstructionCount(*LI);
+ if (!AddedKill.count(LI)) {
+ LiveRange *LR = &LI->ranges[LI->ranges.size()-1];
+ unsigned LastUseIdx = getBaseIndex(LR->end);
+ MachineInstr *LastUse = getInstructionFromIndex(LastUseIdx);
+ int UseIdx = LastUse->findRegisterUseOperandIdx(LI->reg, false);
+ assert(UseIdx != -1);
+ if (!LastUse->isRegTiedToDefOperand(UseIdx)) {
+ LastUse->getOperand(UseIdx).setIsKill();
+ vrm.addKillPoint(LI->reg, LastUseIdx);
+ }
+ }
+ RetNewLIs.push_back(LI);
+ }
+ }
+
+ handleSpilledImpDefs(li, vrm, rc, RetNewLIs);
+ return RetNewLIs;
+}
+
+/// hasAllocatableSuperReg - Return true if the specified physical register has
+/// any super register that's allocatable.
+bool LiveIntervals::hasAllocatableSuperReg(unsigned Reg) const {
+ for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS)
+ if (allocatableRegs_[*AS] && hasInterval(*AS))
+ return true;
+ return false;
+}
+
+/// getRepresentativeReg - Find the largest super register of the specified
+/// physical register.
+unsigned LiveIntervals::getRepresentativeReg(unsigned Reg) const {
+ // Find the largest super-register that is allocatable.
+ unsigned BestReg = Reg;
+ for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) {
+ unsigned SuperReg = *AS;
+ if (!hasAllocatableSuperReg(SuperReg) && hasInterval(SuperReg)) {
+ BestReg = SuperReg;
+ break;
+ }
+ }
+ return BestReg;
+}
+
+/// getNumConflictsWithPhysReg - Return the number of uses and defs of the
+/// specified interval that conflicts with the specified physical register.
+unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li,
+ unsigned PhysReg) const {
+ unsigned NumConflicts = 0;
+ const LiveInterval &pli = getInterval(getRepresentativeReg(PhysReg));
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
+ E = mri_->reg_end(); I != E; ++I) {
+ MachineOperand &O = I.getOperand();
+ MachineInstr *MI = O.getParent();
+ unsigned Index = getInstructionIndex(MI);
+ if (pli.liveAt(Index))
+ ++NumConflicts;
+ }
+ return NumConflicts;
+}
+
+/// spillPhysRegAroundRegDefsUses - Spill the specified physical register
+/// around all defs and uses of the specified interval. Return true if it
+/// was able to cut its interval.
+bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
+ unsigned PhysReg, VirtRegMap &vrm) {
+ unsigned SpillReg = getRepresentativeReg(PhysReg);
+
+ for (const unsigned *AS = tri_->getAliasSet(PhysReg); *AS; ++AS)
+ // If there are registers which alias PhysReg, but which are not a
+ // sub-register of the chosen representative super register. Assert
+ // since we can't handle it yet.
+ assert(*AS == SpillReg || !allocatableRegs_[*AS] || !hasInterval(*AS) ||
+ tri_->isSuperRegister(*AS, SpillReg));
+
+ bool Cut = false;
+ LiveInterval &pli = getInterval(SpillReg);
+ SmallPtrSet<MachineInstr*, 8> SeenMIs;
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
+ E = mri_->reg_end(); I != E; ++I) {
+ MachineOperand &O = I.getOperand();
+ MachineInstr *MI = O.getParent();
+ if (SeenMIs.count(MI))
+ continue;
+ SeenMIs.insert(MI);
+ unsigned Index = getInstructionIndex(MI);
+ if (pli.liveAt(Index)) {
+ vrm.addEmergencySpill(SpillReg, MI);
+ unsigned StartIdx = getLoadIndex(Index);
+ unsigned EndIdx = getStoreIndex(Index)+1;
+ if (pli.isInOneLiveRange(StartIdx, EndIdx)) {
+ pli.removeRange(StartIdx, EndIdx);
+ Cut = true;
+ } else {
+ cerr << "Ran out of registers during register allocation!\n";
+ if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {
+ cerr << "Please check your inline asm statement for invalid "
+ << "constraints:\n";
+ MI->print(cerr.stream(), tm_);
+ }
+ exit(1);
+ }
+ for (const unsigned* AS = tri_->getSubRegisters(SpillReg); *AS; ++AS) {
+ if (!hasInterval(*AS))
+ continue;
+ LiveInterval &spli = getInterval(*AS);
+ if (spli.liveAt(Index))
+ spli.removeRange(getLoadIndex(Index), getStoreIndex(Index)+1);
+ }
+ }
+ }
+ return Cut;
+}
+
+LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
+ MachineInstr* startInst) {
+ LiveInterval& Interval = getOrCreateInterval(reg);
+ VNInfo* VN = Interval.getNextValue(
+ getInstructionIndex(startInst) + InstrSlots::DEF,
+ startInst, getVNInfoAllocator());
+ VN->hasPHIKill = true;
+ VN->kills.push_back(getMBBEndIdx(startInst->getParent()));
+ LiveRange LR(getInstructionIndex(startInst) + InstrSlots::DEF,
+ getMBBEndIdx(startInst->getParent()) + 1, VN);
+ Interval.addRange(LR);
+
+ return LR;
+}
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
new file mode 100644
index 0000000..86f7ea2
--- /dev/null
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -0,0 +1,66 @@
+//===-- LiveStackAnalysis.cpp - Live Stack Slot Analysis ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the live stack slot analysis pass. It is analogous to
+// live interval analysis except it's analyzing liveness of stack slots rather
+// than registers.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "livestacks"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include <limits>
+using namespace llvm;
+
+char LiveStacks::ID = 0;
+static RegisterPass<LiveStacks> X("livestacks", "Live Stack Slot Analysis");
+
+void LiveStacks::scaleNumbering(int factor) {
+ // Scale the intervals.
+ for (iterator LI = begin(), LE = end(); LI != LE; ++LI) {
+ LI->second.scaleNumbering(factor);
+ }
+}
+
+void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void LiveStacks::releaseMemory() {
+ // Release VNInfo memroy regions after all VNInfo objects are dtor'd.
+ VNInfoAllocator.Reset();
+ S2IMap.clear();
+ S2RCMap.clear();
+}
+
+bool LiveStacks::runOnMachineFunction(MachineFunction &) {
+ // FIXME: No analysis is being done right now. We are relying on the
+ // register allocators to provide the information.
+ return false;
+}
+
+/// print - Implement the dump method.
+void LiveStacks::print(std::ostream &O, const Module*) const {
+ O << "********** INTERVALS **********\n";
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ I->second.print(O);
+ int Slot = I->first;
+ const TargetRegisterClass *RC = getIntervalRegClass(Slot);
+ if (RC)
+ O << " [" << RC->getName() << "]\n";
+ else
+ O << " [Unknown]\n";
+ }
+}
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
new file mode 100644
index 0000000..c33d81e
--- /dev/null
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -0,0 +1,695 @@
+//===-- LiveVariables.cpp - Live Variable Analysis for Machine Code -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveVariable analysis pass. For each machine
+// instruction in the function, this pass calculates the set of registers that
+// are immediately dead after the instruction (i.e., the instruction calculates
+// the value, but it is never used) and the set of registers that are used by
+// the instruction, but are never used after the instruction (i.e., they are
+// killed).
+//
+// This class computes live variables using are sparse implementation based on
+// the machine code SSA form. This class computes live variable information for
+// each virtual and _register allocatable_ physical register in a function. It
+// uses the dominance properties of SSA form to efficiently compute live
+// variables for virtual registers, and assumes that physical registers are only
+// live within a single basic block (allowing it to do a single local analysis
+// to resolve physical register lifetimes in each basic block). If a physical
+// register is not register allocatable, it is not tracked. This is useful for
+// things like the stack pointer and condition codes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Config/alloca.h"
+#include <algorithm>
+using namespace llvm;
+
+char LiveVariables::ID = 0;
+static RegisterPass<LiveVariables> X("livevars", "Live Variable Analysis");
+
+
+void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(UnreachableMachineBlockElimID);
+ AU.setPreservesAll();
+}
+
+void LiveVariables::VarInfo::dump() const {
+ cerr << " Alive in blocks: ";
+ for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
+ E = AliveBlocks.end(); I != E; ++I)
+ cerr << *I << ", ";
+ cerr << "\n Killed by:";
+ if (Kills.empty())
+ cerr << " No instructions.\n";
+ else {
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+ cerr << "\n #" << i << ": " << *Kills[i];
+ cerr << "\n";
+ }
+}
+
+/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg.
+LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
+ assert(TargetRegisterInfo::isVirtualRegister(RegIdx) &&
+ "getVarInfo: not a virtual register!");
+ RegIdx -= TargetRegisterInfo::FirstVirtualRegister;
+ if (RegIdx >= VirtRegInfo.size()) {
+ if (RegIdx >= 2*VirtRegInfo.size())
+ VirtRegInfo.resize(RegIdx*2);
+ else
+ VirtRegInfo.resize(2*VirtRegInfo.size());
+ }
+ return VirtRegInfo[RegIdx];
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
+ MachineBasicBlock *DefBlock,
+ MachineBasicBlock *MBB,
+ std::vector<MachineBasicBlock*> &WorkList) {
+ unsigned BBNum = MBB->getNumber();
+
+ // Check to see if this basic block is one of the killing blocks. If so,
+ // remove it.
+ for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
+ if (VRInfo.Kills[i]->getParent() == MBB) {
+ VRInfo.Kills.erase(VRInfo.Kills.begin()+i); // Erase entry
+ break;
+ }
+
+ if (MBB == DefBlock) return; // Terminate recursion
+
+ if (VRInfo.AliveBlocks.test(BBNum))
+ return; // We already know the block is live
+
+ // Mark the variable known alive in this bb
+ VRInfo.AliveBlocks.set(BBNum);
+
+ for (MachineBasicBlock::const_pred_reverse_iterator PI = MBB->pred_rbegin(),
+ E = MBB->pred_rend(); PI != E; ++PI)
+ WorkList.push_back(*PI);
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
+ MachineBasicBlock *DefBlock,
+ MachineBasicBlock *MBB) {
+ std::vector<MachineBasicBlock*> WorkList;
+ MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList);
+
+ while (!WorkList.empty()) {
+ MachineBasicBlock *Pred = WorkList.back();
+ WorkList.pop_back();
+ MarkVirtRegAliveInBlock(VRInfo, DefBlock, Pred, WorkList);
+ }
+}
+
+void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
+ MachineInstr *MI) {
+ assert(MRI->getVRegDef(reg) && "Register use before def!");
+
+ unsigned BBNum = MBB->getNumber();
+
+ VarInfo& VRInfo = getVarInfo(reg);
+ VRInfo.NumUses++;
+
+ // Check to see if this basic block is already a kill block.
+ if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) {
+ // Yes, this register is killed in this basic block already. Increase the
+ // live range by updating the kill instruction.
+ VRInfo.Kills.back() = MI;
+ return;
+ }
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
+ assert(VRInfo.Kills[i]->getParent() != MBB && "entry should be at end!");
+#endif
+
+ // This situation can occur:
+ //
+ // ,------.
+ // | |
+ // | v
+ // | t2 = phi ... t1 ...
+ // | |
+ // | v
+ // | t1 = ...
+ // | ... = ... t1 ...
+ // | |
+ // `------'
+ //
+ // where there is a use in a PHI node that's a predecessor to the defining
+ // block. We don't want to mark all predecessors as having the value "alive"
+ // in this case.
+ if (MBB == MRI->getVRegDef(reg)->getParent()) return;
+
+ // Add a new kill entry for this basic block. If this virtual register is
+ // already marked as alive in this basic block, that means it is alive in at
+ // least one of the successor blocks, it's not a kill.
+ if (!VRInfo.AliveBlocks.test(BBNum))
+ VRInfo.Kills.push_back(MI);
+
+ // Update all dominating blocks to mark them as "known live".
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ E = MBB->pred_end(); PI != E; ++PI)
+ MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(reg)->getParent(), *PI);
+}
+
+void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr *MI) {
+ VarInfo &VRInfo = getVarInfo(Reg);
+
+ if (VRInfo.AliveBlocks.empty())
+ // If vr is not alive in any block, then defaults to dead.
+ VRInfo.Kills.push_back(MI);
+}
+
+/// FindLastPartialDef - Return the last partial def of the specified register.
+/// Also returns the sub-register that's defined.
+MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
+ unsigned &PartDefReg) {
+ unsigned LastDefReg = 0;
+ unsigned LastDefDist = 0;
+ MachineInstr *LastDef = NULL;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ MachineInstr *Def = PhysRegDef[SubReg];
+ if (!Def)
+ continue;
+ unsigned Dist = DistanceMap[Def];
+ if (Dist > LastDefDist) {
+ LastDefReg = SubReg;
+ LastDef = Def;
+ LastDefDist = Dist;
+ }
+ }
+ PartDefReg = LastDefReg;
+ return LastDef;
+}
+
+/// HandlePhysRegUse - Turn previous partial def's into read/mod/writes. Add
+/// implicit defs to a machine instruction if there was an earlier def of its
+/// super-register.
+void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
+ // If there was a previous use or a "full" def all is well.
+ if (!PhysRegDef[Reg] && !PhysRegUse[Reg]) {
+ // Otherwise, the last sub-register def implicitly defines this register.
+ // e.g.
+ // AH =
+ // AL = ... <imp-def EAX>, <imp-kill AH>
+ // = AH
+ // ...
+ // = EAX
+ // All of the sub-registers must have been defined before the use of Reg!
+ unsigned PartDefReg = 0;
+ MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefReg);
+ // If LastPartialDef is NULL, it must be using a livein register.
+ if (LastPartialDef) {
+ LastPartialDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/,
+ true/*IsImp*/));
+ PhysRegDef[Reg] = LastPartialDef;
+ SmallSet<unsigned, 8> Processed;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ if (Processed.count(SubReg))
+ continue;
+ if (SubReg == PartDefReg || TRI->isSubRegister(PartDefReg, SubReg))
+ continue;
+ // This part of Reg was defined before the last partial def. It's killed
+ // here.
+ LastPartialDef->addOperand(MachineOperand::CreateReg(SubReg,
+ false/*IsDef*/,
+ true/*IsImp*/));
+ PhysRegDef[SubReg] = LastPartialDef;
+ for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ Processed.insert(*SS);
+ }
+ }
+ }
+
+ // There was an earlier def of a super-register. Add implicit def to that MI.
+ //
+ // A: EAX = ...
+ // B: ... = AX
+ //
+ // Add implicit def to A if there isn't a use of AX (or EAX) before B.
+ if (!PhysRegUse[Reg]) {
+ MachineInstr *Def = PhysRegDef[Reg];
+ if (Def && !Def->modifiesRegister(Reg))
+ Def->addOperand(MachineOperand::CreateReg(Reg,
+ true /*IsDef*/,
+ true /*IsImp*/));
+ }
+
+ // Remember this use.
+ PhysRegUse[Reg] = MI;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs)
+ PhysRegUse[SubReg] = MI;
+}
+
+/// hasRegisterUseBelow - Return true if the specified register is used after
+/// the current instruction and before it's next definition.
+bool LiveVariables::hasRegisterUseBelow(unsigned Reg,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock *MBB) {
+ if (I == MBB->end())
+ return false;
+
+ // First find out if there are any uses / defs below.
+ bool hasDistInfo = true;
+ unsigned CurDist = DistanceMap[I];
+ SmallVector<MachineInstr*, 4> Uses;
+ SmallVector<MachineInstr*, 4> Defs;
+ for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg),
+ RE = MRI->reg_end(); RI != RE; ++RI) {
+ MachineOperand &UDO = RI.getOperand();
+ MachineInstr *UDMI = &*RI;
+ if (UDMI->getParent() != MBB)
+ continue;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI);
+ bool isBelow = false;
+ if (DI == DistanceMap.end()) {
+ // Must be below if it hasn't been assigned a distance yet.
+ isBelow = true;
+ hasDistInfo = false;
+ } else if (DI->second > CurDist)
+ isBelow = true;
+ if (isBelow) {
+ if (UDO.isUse())
+ Uses.push_back(UDMI);
+ if (UDO.isDef())
+ Defs.push_back(UDMI);
+ }
+ }
+
+ if (Uses.empty())
+ // No uses below.
+ return false;
+ else if (!Uses.empty() && Defs.empty())
+ // There are uses below but no defs below.
+ return true;
+ // There are both uses and defs below. We need to know which comes first.
+ if (!hasDistInfo) {
+ // Complete DistanceMap for this MBB. This information is computed only
+ // once per MBB.
+ ++I;
+ ++CurDist;
+ for (MachineBasicBlock::iterator E = MBB->end(); I != E; ++I, ++CurDist)
+ DistanceMap.insert(std::make_pair(I, CurDist));
+ }
+
+ unsigned EarliestUse = DistanceMap[Uses[0]];
+ for (unsigned i = 1, e = Uses.size(); i != e; ++i) {
+ unsigned Dist = DistanceMap[Uses[i]];
+ if (Dist < EarliestUse)
+ EarliestUse = Dist;
+ }
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Dist = DistanceMap[Defs[i]];
+ if (Dist < EarliestUse)
+ // The register is defined before its first use below.
+ return false;
+ }
+ return true;
+}
+
+bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
+ if (!PhysRegUse[Reg] && !PhysRegDef[Reg])
+ return false;
+
+ MachineInstr *LastRefOrPartRef = PhysRegUse[Reg]
+ ? PhysRegUse[Reg] : PhysRegDef[Reg];
+ unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
+ // The whole register is used.
+ // AL =
+ // AH =
+ //
+ // = AX
+ // = AL, AX<imp-use, kill>
+ // AX =
+ //
+ // Or whole register is defined, but not used at all.
+ // AX<dead> =
+ // ...
+ // AX =
+ //
+ // Or whole register is defined, but only partly used.
+ // AX<dead> = AL<imp-def>
+ // = AL<kill>
+ // AX =
+ SmallSet<unsigned, 8> PartUses;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ if (MachineInstr *Use = PhysRegUse[SubReg]) {
+ PartUses.insert(SubReg);
+ for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ PartUses.insert(*SS);
+ unsigned Dist = DistanceMap[Use];
+ if (Dist > LastRefOrPartRefDist) {
+ LastRefOrPartRefDist = Dist;
+ LastRefOrPartRef = Use;
+ }
+ }
+ }
+
+ if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI)
+ // If the last reference is the last def, then it's not used at all.
+ // That is, unless we are currently processing the last reference itself.
+ LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
+
+ /* Partial uses. Mark register def dead and add implicit def of
+ sub-registers which are used.
+ FIXME: LiveIntervalAnalysis can't handle this yet!
+ EAX<dead> = op AL<imp-def>
+ That is, EAX def is dead but AL def extends pass it.
+ Enable this after live interval analysis is fixed to improve codegen!
+ else if (!PhysRegUse[Reg]) {
+ PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ if (PartUses.count(SubReg)) {
+ PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg,
+ true, true));
+ LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
+ for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ PartUses.erase(*SS);
+ }
+ }
+ } */
+ else
+ LastRefOrPartRef->addRegisterKilled(Reg, TRI, true);
+ return true;
+}
+
+void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI) {
+ // What parts of the register are previously defined?
+ SmallSet<unsigned, 32> Live;
+ if (PhysRegDef[Reg] || PhysRegUse[Reg]) {
+ Live.insert(Reg);
+ for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
+ Live.insert(*SS);
+ } else {
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ // If a register isn't itself defined, but all parts that make up of it
+ // are defined, then consider it also defined.
+ // e.g.
+ // AL =
+ // AH =
+ // = AX
+ if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) {
+ Live.insert(SubReg);
+ for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ Live.insert(*SS);
+ }
+ }
+ }
+
+ // Start from the largest piece, find the last time any part of the register
+ // is referenced.
+ if (!HandlePhysRegKill(Reg, MI)) {
+ // Only some of the sub-registers are used.
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ if (!Live.count(SubReg))
+ // Skip if this sub-register isn't defined.
+ continue;
+ if (HandlePhysRegKill(SubReg, MI)) {
+ Live.erase(SubReg);
+ for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ Live.erase(*SS);
+ }
+ }
+ assert(Live.empty() && "Not all defined registers are killed / dead?");
+ }
+
+ if (MI) {
+ // Does this extend the live range of a super-register?
+ SmallSet<unsigned, 8> Processed;
+ for (const unsigned *SuperRegs = TRI->getSuperRegisters(Reg);
+ unsigned SuperReg = *SuperRegs; ++SuperRegs) {
+ if (Processed.count(SuperReg))
+ continue;
+ MachineInstr *LastRef = PhysRegUse[SuperReg]
+ ? PhysRegUse[SuperReg] : PhysRegDef[SuperReg];
+ if (LastRef && LastRef != MI) {
+ // The larger register is previously defined. Now a smaller part is
+ // being re-defined. Treat it as read/mod/write if there are uses
+ // below.
+ // EAX =
+ // AX = EAX<imp-use,kill>, EAX<imp-def>
+ // ...
+ /// = EAX
+ if (hasRegisterUseBelow(SuperReg, MI, MI->getParent())) {
+ MI->addOperand(MachineOperand::CreateReg(SuperReg, false/*IsDef*/,
+ true/*IsImp*/,true/*IsKill*/));
+ MI->addOperand(MachineOperand::CreateReg(SuperReg, true/*IsDef*/,
+ true/*IsImp*/));
+ PhysRegDef[SuperReg] = MI;
+ PhysRegUse[SuperReg] = NULL;
+ Processed.insert(SuperReg);
+ for (const unsigned *SS = TRI->getSubRegisters(SuperReg); *SS; ++SS) {
+ PhysRegDef[*SS] = MI;
+ PhysRegUse[*SS] = NULL;
+ Processed.insert(*SS);
+ }
+ } else {
+ // Otherwise, the super register is killed.
+ if (HandlePhysRegKill(SuperReg, MI)) {
+ PhysRegDef[SuperReg] = NULL;
+ PhysRegUse[SuperReg] = NULL;
+ for (const unsigned *SS = TRI->getSubRegisters(SuperReg); *SS; ++SS) {
+ PhysRegDef[*SS] = NULL;
+ PhysRegUse[*SS] = NULL;
+ Processed.insert(*SS);
+ }
+ }
+ }
+ }
+ }
+
+ // Remember this def.
+ PhysRegDef[Reg] = MI;
+ PhysRegUse[Reg] = NULL;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ PhysRegDef[SubReg] = MI;
+ PhysRegUse[SubReg] = NULL;
+ }
+ }
+}
+
+bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ MRI = &mf.getRegInfo();
+ TRI = MF->getTarget().getRegisterInfo();
+
+ ReservedRegisters = TRI->getReservedRegs(mf);
+
+ unsigned NumRegs = TRI->getNumRegs();
+ PhysRegDef = new MachineInstr*[NumRegs];
+ PhysRegUse = new MachineInstr*[NumRegs];
+ PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()];
+ std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0);
+ std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0);
+
+ /// Get some space for a respectable number of registers.
+ VirtRegInfo.resize(64);
+
+ analyzePHINodes(mf);
+
+ // Calculate live variable information in depth first order on the CFG of the
+ // function. This guarantees that we will see the definition of a virtual
+ // register before its uses due to dominance properties of SSA (except for PHI
+ // nodes, which are treated as a special case).
+ MachineBasicBlock *Entry = MF->begin();
+ SmallPtrSet<MachineBasicBlock*,16> Visited;
+
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
+ DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+ DFI != E; ++DFI) {
+ MachineBasicBlock *MBB = *DFI;
+
+ // Mark live-in registers as live-in.
+ for (MachineBasicBlock::const_livein_iterator II = MBB->livein_begin(),
+ EE = MBB->livein_end(); II != EE; ++II) {
+ assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
+ "Cannot have a live-in virtual register!");
+ HandlePhysRegDef(*II, 0);
+ }
+
+ // Loop over all of the instructions, processing them.
+ DistanceMap.clear();
+ unsigned Dist = 0;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ MachineInstr *MI = I;
+ DistanceMap.insert(std::make_pair(MI, Dist++));
+
+ // Process all of the operands of the instruction...
+ unsigned NumOperandsToProcess = MI->getNumOperands();
+
+ // Unless it is a PHI node. In this case, ONLY process the DEF, not any
+ // of the uses. They will be handled in other basic blocks.
+ if (MI->getOpcode() == TargetInstrInfo::PHI)
+ NumOperandsToProcess = 1;
+
+ SmallVector<unsigned, 4> UseRegs;
+ SmallVector<unsigned, 4> DefRegs;
+ for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MO.isUse())
+ UseRegs.push_back(MOReg);
+ if (MO.isDef())
+ DefRegs.push_back(MOReg);
+ }
+
+ // Process all uses.
+ for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) {
+ unsigned MOReg = UseRegs[i];
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ HandleVirtRegUse(MOReg, MBB, MI);
+ else if (!ReservedRegisters[MOReg])
+ HandlePhysRegUse(MOReg, MI);
+ }
+
+ // Process all defs.
+ for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
+ unsigned MOReg = DefRegs[i];
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ HandleVirtRegDef(MOReg, MI);
+ else if (!ReservedRegisters[MOReg])
+ HandlePhysRegDef(MOReg, MI);
+ }
+ }
+
+ // Handle any virtual assignments from PHI nodes which might be at the
+ // bottom of this basic block. We check all of our successor blocks to see
+ // if they have PHI nodes, and if so, we simulate an assignment at the end
+ // of the current block.
+ if (!PHIVarInfo[MBB->getNumber()].empty()) {
+ SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()];
+
+ for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(),
+ E = VarInfoVec.end(); I != E; ++I)
+ // Mark it alive only in the block we are representing.
+ MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(),
+ MBB);
+ }
+
+ // Finally, if the last instruction in the block is a return, make sure to
+ // mark it as using all of the live-out values in the function.
+ if (!MBB->empty() && MBB->back().getDesc().isReturn()) {
+ MachineInstr *Ret = &MBB->back();
+
+ for (MachineRegisterInfo::liveout_iterator
+ I = MF->getRegInfo().liveout_begin(),
+ E = MF->getRegInfo().liveout_end(); I != E; ++I) {
+ assert(TargetRegisterInfo::isPhysicalRegister(*I) &&
+ "Cannot have a live-out virtual register!");
+ HandlePhysRegUse(*I, Ret);
+
+ // Add live-out registers as implicit uses.
+ if (!Ret->readsRegister(*I))
+ Ret->addOperand(MachineOperand::CreateReg(*I, false, true));
+ }
+ }
+
+ // Loop over PhysRegDef / PhysRegUse, killing any registers that are
+ // available at the end of the basic block.
+ for (unsigned i = 0; i != NumRegs; ++i)
+ if (PhysRegDef[i] || PhysRegUse[i])
+ HandlePhysRegDef(i, 0);
+
+ std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0);
+ std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0);
+ }
+
+ // Convert and transfer the dead / killed information we have gathered into
+ // VirtRegInfo onto MI's.
+ for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i)
+ for (unsigned j = 0, e2 = VirtRegInfo[i].Kills.size(); j != e2; ++j)
+ if (VirtRegInfo[i].Kills[j] ==
+ MRI->getVRegDef(i + TargetRegisterInfo::FirstVirtualRegister))
+ VirtRegInfo[i]
+ .Kills[j]->addRegisterDead(i +
+ TargetRegisterInfo::FirstVirtualRegister,
+ TRI);
+ else
+ VirtRegInfo[i]
+ .Kills[j]->addRegisterKilled(i +
+ TargetRegisterInfo::FirstVirtualRegister,
+ TRI);
+
+ // Check to make sure there are no unreachable blocks in the MC CFG for the
+ // function. If so, it is due to a bug in the instruction selector or some
+ // other part of the code generator if this happens.
+#ifndef NDEBUG
+ for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i)
+ assert(Visited.count(&*i) != 0 && "unreachable basic block found");
+#endif
+
+ delete[] PhysRegDef;
+ delete[] PhysRegUse;
+ delete[] PHIVarInfo;
+
+ return false;
+}
+
+/// replaceKillInstruction - Update register kill info by replacing a kill
+/// instruction with a new one.
+void LiveVariables::replaceKillInstruction(unsigned Reg, MachineInstr *OldMI,
+ MachineInstr *NewMI) {
+ VarInfo &VI = getVarInfo(Reg);
+ std::replace(VI.Kills.begin(), VI.Kills.end(), OldMI, NewMI);
+}
+
+/// removeVirtualRegistersKilled - Remove all killed info for the specified
+/// instruction.
+void LiveVariables::removeVirtualRegistersKilled(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isKill()) {
+ MO.setIsKill(false);
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ bool removed = getVarInfo(Reg).removeKill(MI);
+ assert(removed && "kill not in register's VarInfo?");
+ removed = true;
+ }
+ }
+ }
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the variable information of a virtual register
+/// which is used in a PHI node. We map that to the BB the vreg is coming from.
+///
+void LiveVariables::analyzePHINodes(const MachineFunction& Fn) {
+ for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end();
+ I != E; ++I)
+ for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI)
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+ PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()]
+ .push_back(BBI->getOperand(i).getReg());
+}
diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp
new file mode 100644
index 0000000..14acb71
--- /dev/null
+++ b/lib/CodeGen/LowerSubregs.cpp
@@ -0,0 +1,292 @@
+//===-- LowerSubregs.cpp - Subregister Lowering instruction pass ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a MachineFunction pass which runs after register
+// allocation that turns subreg insert/extract instructions into register
+// copies, as needed. This ensures correct codegen even if the coalescer
+// isn't able to remove all subreg instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lowersubregs"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+using namespace llvm;
+
+namespace {
+ struct VISIBILITY_HIDDEN LowerSubregsInstructionPass
+ : public MachineFunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ LowerSubregsInstructionPass() : MachineFunctionPass(&ID) {}
+
+ const char *getPassName() const {
+ return "Subregister lowering instruction pass";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// runOnMachineFunction - pass entry point
+ bool runOnMachineFunction(MachineFunction&);
+
+ bool LowerExtract(MachineInstr *MI);
+ bool LowerInsert(MachineInstr *MI);
+ bool LowerSubregToReg(MachineInstr *MI);
+
+ void TransferDeadFlag(MachineInstr *MI, unsigned DstReg,
+ const TargetRegisterInfo &TRI);
+ void TransferKillFlag(MachineInstr *MI, unsigned SrcReg,
+ const TargetRegisterInfo &TRI);
+ };
+
+ char LowerSubregsInstructionPass::ID = 0;
+}
+
+FunctionPass *llvm::createLowerSubregsPass() {
+ return new LowerSubregsInstructionPass();
+}
+
+/// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead,
+/// and the lowered replacement instructions immediately precede it.
+/// Mark the replacement instructions with the dead flag.
+void
+LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI,
+ unsigned DstReg,
+ const TargetRegisterInfo &TRI) {
+ for (MachineBasicBlock::iterator MII =
+ prior(MachineBasicBlock::iterator(MI)); ; --MII) {
+ if (MII->addRegisterDead(DstReg, &TRI))
+ break;
+ assert(MII != MI->getParent()->begin() &&
+ "copyRegToReg output doesn't reference destination register!");
+ }
+}
+
+/// TransferKillFlag - MI is a pseudo-instruction with SrcReg killed,
+/// and the lowered replacement instructions immediately precede it.
+/// Mark the replacement instructions with the kill flag.
+void
+LowerSubregsInstructionPass::TransferKillFlag(MachineInstr *MI,
+ unsigned SrcReg,
+ const TargetRegisterInfo &TRI) {
+ for (MachineBasicBlock::iterator MII =
+ prior(MachineBasicBlock::iterator(MI)); ; --MII) {
+ if (MII->addRegisterKilled(SrcReg, &TRI))
+ break;
+ assert(MII != MI->getParent()->begin() &&
+ "copyRegToReg output doesn't reference source register!");
+ }
+}
+
+bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) {
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+ const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ assert(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
+ MI->getOperand(1).isReg() && MI->getOperand(1).isUse() &&
+ MI->getOperand(2).isImm() && "Malformed extract_subreg");
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SuperReg = MI->getOperand(1).getReg();
+ unsigned SubIdx = MI->getOperand(2).getImm();
+ unsigned SrcReg = TRI.getSubReg(SuperReg, SubIdx);
+
+ assert(TargetRegisterInfo::isPhysicalRegister(SuperReg) &&
+ "Extract supperg source must be a physical register");
+ assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
+ "Extract destination must be in a physical register");
+
+ DOUT << "subreg: CONVERTING: " << *MI;
+
+ if (SrcReg == DstReg) {
+ // No need to insert an identify copy instruction.
+ DOUT << "subreg: eliminated!";
+ // Find the kill of the destination register's live range, and insert
+ // a kill of the source register at that point.
+ if (MI->getOperand(1).isKill() && !MI->getOperand(0).isDead())
+ for (MachineBasicBlock::iterator MII =
+ next(MachineBasicBlock::iterator(MI));
+ MII != MBB->end(); ++MII)
+ if (MII->killsRegister(DstReg, &TRI)) {
+ MII->addRegisterKilled(SuperReg, &TRI, /*AddIfNotFound=*/true);
+ break;
+ }
+ } else {
+ // Insert copy
+ const TargetRegisterClass *TRC = TRI.getPhysicalRegisterRegClass(DstReg);
+ assert(TRC == TRI.getPhysicalRegisterRegClass(SrcReg) &&
+ "Extract subreg and Dst must be of same register class");
+ TII.copyRegToReg(*MBB, MI, DstReg, SrcReg, TRC, TRC);
+ // Transfer the kill/dead flags, if needed.
+ if (MI->getOperand(0).isDead())
+ TransferDeadFlag(MI, DstReg, TRI);
+ if (MI->getOperand(1).isKill())
+ TransferKillFlag(MI, SrcReg, TRI);
+
+#ifndef NDEBUG
+ MachineBasicBlock::iterator dMI = MI;
+ DOUT << "subreg: " << *(--dMI);
+#endif
+ }
+
+ DOUT << "\n";
+ MBB->erase(MI);
+ return true;
+}
+
+bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+ const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) &&
+ MI->getOperand(1).isImm() &&
+ (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) &&
+ MI->getOperand(3).isImm() && "Invalid subreg_to_reg");
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned InsReg = MI->getOperand(2).getReg();
+ unsigned InsSIdx = MI->getOperand(2).getSubReg();
+ unsigned SubIdx = MI->getOperand(3).getImm();
+
+ assert(SubIdx != 0 && "Invalid index for insert_subreg");
+ unsigned DstSubReg = TRI.getSubReg(DstReg, SubIdx);
+
+ assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
+ "Insert destination must be in a physical register");
+ assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
+ "Inserted value must be in a physical register");
+
+ DOUT << "subreg: CONVERTING: " << *MI;
+
+ if (DstSubReg == InsReg && InsSIdx == 0) {
+ // No need to insert an identify copy instruction.
+ // Watch out for case like this:
+ // %RAX<def> = ...
+ // %RAX<def> = SUBREG_TO_REG 0, %EAX:3<kill>, 3
+ // The first def is defining RAX, not EAX so the top bits were not
+ // zero extended.
+ DOUT << "subreg: eliminated!";
+ } else {
+ // Insert sub-register copy
+ const TargetRegisterClass *TRC0= TRI.getPhysicalRegisterRegClass(DstSubReg);
+ const TargetRegisterClass *TRC1= TRI.getPhysicalRegisterRegClass(InsReg);
+ TII.copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1);
+ // Transfer the kill/dead flags, if needed.
+ if (MI->getOperand(0).isDead())
+ TransferDeadFlag(MI, DstSubReg, TRI);
+ if (MI->getOperand(2).isKill())
+ TransferKillFlag(MI, InsReg, TRI);
+
+#ifndef NDEBUG
+ MachineBasicBlock::iterator dMI = MI;
+ DOUT << "subreg: " << *(--dMI);
+#endif
+ }
+
+ DOUT << "\n";
+ MBB->erase(MI);
+ return true;
+}
+
+bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+ const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) &&
+ (MI->getOperand(1).isReg() && MI->getOperand(1).isUse()) &&
+ (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) &&
+ MI->getOperand(3).isImm() && "Invalid insert_subreg");
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+#ifndef NDEBUG
+ unsigned SrcReg = MI->getOperand(1).getReg();
+#endif
+ unsigned InsReg = MI->getOperand(2).getReg();
+ unsigned SubIdx = MI->getOperand(3).getImm();
+
+ assert(DstReg == SrcReg && "insert_subreg not a two-address instruction?");
+ assert(SubIdx != 0 && "Invalid index for insert_subreg");
+ unsigned DstSubReg = TRI.getSubReg(DstReg, SubIdx);
+
+ assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ "Insert superreg source must be in a physical register");
+ assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
+ "Inserted value must be in a physical register");
+
+ DOUT << "subreg: CONVERTING: " << *MI;
+
+ if (DstSubReg == InsReg) {
+ // No need to insert an identify copy instruction.
+ DOUT << "subreg: eliminated!";
+ } else {
+ // Insert sub-register copy
+ const TargetRegisterClass *TRC0= TRI.getPhysicalRegisterRegClass(DstSubReg);
+ const TargetRegisterClass *TRC1= TRI.getPhysicalRegisterRegClass(InsReg);
+ TII.copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1);
+ // Transfer the kill/dead flags, if needed.
+ if (MI->getOperand(0).isDead())
+ TransferDeadFlag(MI, DstSubReg, TRI);
+ if (MI->getOperand(1).isKill())
+ TransferKillFlag(MI, InsReg, TRI);
+
+#ifndef NDEBUG
+ MachineBasicBlock::iterator dMI = MI;
+ DOUT << "subreg: " << *(--dMI);
+#endif
+ }
+
+ DOUT << "\n";
+ MBB->erase(MI);
+ return true;
+}
+
+/// runOnMachineFunction - Reduce subregister inserts and extracts to register
+/// copies.
+///
+bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) {
+ DOUT << "Machine Function\n";
+
+ bool MadeChange = false;
+
+ DOUT << "********** LOWERING SUBREG INSTRS **********\n";
+ DOUT << "********** Function: " << MF.getFunction()->getName() << '\n';
+
+ for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
+ mbbi != mbbe; ++mbbi) {
+ for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
+ mi != me;) {
+ MachineInstr *MI = mi++;
+
+ if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
+ MadeChange |= LowerExtract(MI);
+ } else if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) {
+ MadeChange |= LowerInsert(MI);
+ } else if (MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) {
+ MadeChange |= LowerSubregToReg(MI);
+ }
+ }
+ }
+
+ return MadeChange;
+}
diff --git a/lib/CodeGen/MachOWriter.cpp b/lib/CodeGen/MachOWriter.cpp
new file mode 100644
index 0000000..4332627
--- /dev/null
+++ b/lib/CodeGen/MachOWriter.cpp
@@ -0,0 +1,976 @@
+//===-- MachOWriter.cpp - Target-independent Mach-O Writer code -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the target-independent Mach-O writer. This file writes
+// out the Mach-O file in the following order:
+//
+// #1 FatHeader (universal-only)
+// #2 FatArch (universal-only, 1 per universal arch)
+// Per arch:
+// #3 Header
+// #4 Load Commands
+// #5 Sections
+// #6 Relocations
+// #7 Symbols
+// #8 Strings
+//
+//===----------------------------------------------------------------------===//
+
+#include "MachOWriter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/FileWriters.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/OutputBuffer.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstring>
+using namespace llvm;
+
+/// AddMachOWriter - Concrete function to add the Mach-O writer to the function
+/// pass manager.
+MachineCodeEmitter *llvm::AddMachOWriter(PassManagerBase &PM,
+ raw_ostream &O,
+ TargetMachine &TM) {
+ MachOWriter *MOW = new MachOWriter(O, TM);
+ PM.add(MOW);
+ return &MOW->getMachineCodeEmitter();
+}
+
+//===----------------------------------------------------------------------===//
+// MachOCodeEmitter Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+ /// MachOCodeEmitter - This class is used by the MachOWriter to emit the code
+ /// for functions to the Mach-O file.
+ class MachOCodeEmitter : public MachineCodeEmitter {
+ MachOWriter &MOW;
+
+ /// Target machine description.
+ TargetMachine &TM;
+
+ /// is64Bit/isLittleEndian - This information is inferred from the target
+ /// machine directly, indicating what header values and flags to set.
+ bool is64Bit, isLittleEndian;
+
+ /// Relocations - These are the relocations that the function needs, as
+ /// emitted.
+ std::vector<MachineRelocation> Relocations;
+
+ /// CPLocations - This is a map of constant pool indices to offsets from the
+ /// start of the section for that constant pool index.
+ std::vector<uintptr_t> CPLocations;
+
+ /// CPSections - This is a map of constant pool indices to the MachOSection
+ /// containing the constant pool entry for that index.
+ std::vector<unsigned> CPSections;
+
+ /// JTLocations - This is a map of jump table indices to offsets from the
+ /// start of the section for that jump table index.
+ std::vector<uintptr_t> JTLocations;
+
+ /// MBBLocations - This vector is a mapping from MBB ID's to their address.
+ /// It is filled in by the StartMachineBasicBlock callback and queried by
+ /// the getMachineBasicBlockAddress callback.
+ std::vector<uintptr_t> MBBLocations;
+
+ public:
+ MachOCodeEmitter(MachOWriter &mow) : MOW(mow), TM(MOW.TM) {
+ is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
+ isLittleEndian = TM.getTargetData()->isLittleEndian();
+ }
+
+ virtual void startFunction(MachineFunction &MF);
+ virtual bool finishFunction(MachineFunction &MF);
+
+ virtual void addRelocation(const MachineRelocation &MR) {
+ Relocations.push_back(MR);
+ }
+
+ void emitConstantPool(MachineConstantPool *MCP);
+ void emitJumpTables(MachineJumpTableInfo *MJTI);
+
+ virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const {
+ assert(CPLocations.size() > Index && "CP not emitted!");
+ return CPLocations[Index];
+ }
+ virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const {
+ assert(JTLocations.size() > Index && "JT not emitted!");
+ return JTLocations[Index];
+ }
+
+ virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
+ if (MBBLocations.size() <= (unsigned)MBB->getNumber())
+ MBBLocations.resize((MBB->getNumber()+1)*2);
+ MBBLocations[MBB->getNumber()] = getCurrentPCOffset();
+ }
+
+ virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
+ assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
+ MBBLocations[MBB->getNumber()] && "MBB not emitted!");
+ return MBBLocations[MBB->getNumber()];
+ }
+
+ virtual uintptr_t getLabelAddress(uint64_t Label) const {
+ assert(0 && "get Label not implemented");
+ abort();
+ return 0;
+ }
+
+ virtual void emitLabel(uint64_t LabelID) {
+ assert(0 && "emit Label not implemented");
+ abort();
+ }
+
+
+ virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) { }
+
+ /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE!
+ virtual void startGVStub(const GlobalValue* F, unsigned StubSize,
+ unsigned Alignment = 1) {
+ assert(0 && "JIT specific function called!");
+ abort();
+ }
+ virtual void startGVStub(const GlobalValue* F, void *Buffer,
+ unsigned StubSize) {
+ assert(0 && "JIT specific function called!");
+ abort();
+ }
+ virtual void *finishGVStub(const GlobalValue* F) {
+ assert(0 && "JIT specific function called!");
+ abort();
+ return 0;
+ }
+ };
+}
+
+/// startFunction - This callback is invoked when a new machine function is
+/// about to be emitted.
+void MachOCodeEmitter::startFunction(MachineFunction &MF) {
+ const TargetData *TD = TM.getTargetData();
+ const Function *F = MF.getFunction();
+
+ // Align the output buffer to the appropriate alignment, power of 2.
+ unsigned FnAlign = F->getAlignment();
+ unsigned TDAlign = TD->getPrefTypeAlignment(F->getType());
+ unsigned Align = Log2_32(std::max(FnAlign, TDAlign));
+ assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
+
+ // Get the Mach-O Section that this function belongs in.
+ MachOWriter::MachOSection *MOS = MOW.getTextSection();
+
+ // FIXME: better memory management
+ MOS->SectionData.reserve(4096);
+ BufferBegin = &MOS->SectionData[0];
+ BufferEnd = BufferBegin + MOS->SectionData.capacity();
+
+ // Upgrade the section alignment if required.
+ if (MOS->align < Align) MOS->align = Align;
+
+ // Round the size up to the correct alignment for starting the new function.
+ if ((MOS->size & ((1 << Align) - 1)) != 0) {
+ MOS->size += (1 << Align);
+ MOS->size &= ~((1 << Align) - 1);
+ }
+
+ // FIXME: Using MOS->size directly here instead of calculating it from the
+ // output buffer size (impossible because the code emitter deals only in raw
+ // bytes) forces us to manually synchronize size and write padding zero bytes
+ // to the output buffer for all non-text sections. For text sections, we do
+ // not synchonize the output buffer, and we just blow up if anyone tries to
+ // write non-code to it. An assert should probably be added to
+ // AddSymbolToSection to prevent calling it on the text section.
+ CurBufferPtr = BufferBegin + MOS->size;
+
+ // Clear per-function data structures.
+ CPLocations.clear();
+ CPSections.clear();
+ JTLocations.clear();
+ MBBLocations.clear();
+}
+
+/// finishFunction - This callback is invoked after the function is completely
+/// finished.
+bool MachOCodeEmitter::finishFunction(MachineFunction &MF) {
+ // Get the Mach-O Section that this function belongs in.
+ MachOWriter::MachOSection *MOS = MOW.getTextSection();
+
+ // Get a symbol for the function to add to the symbol table
+ // FIXME: it seems like we should call something like AddSymbolToSection
+ // in startFunction rather than changing the section size and symbol n_value
+ // here.
+ const GlobalValue *FuncV = MF.getFunction();
+ MachOSym FnSym(FuncV, MOW.Mang->getValueName(FuncV), MOS->Index, TM);
+ FnSym.n_value = MOS->size;
+ MOS->size = CurBufferPtr - BufferBegin;
+
+ // Emit constant pool to appropriate section(s)
+ emitConstantPool(MF.getConstantPool());
+
+ // Emit jump tables to appropriate section
+ emitJumpTables(MF.getJumpTableInfo());
+
+ // If we have emitted any relocations to function-specific objects such as
+ // basic blocks, constant pools entries, or jump tables, record their
+ // addresses now so that we can rewrite them with the correct addresses
+ // later.
+ for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
+ MachineRelocation &MR = Relocations[i];
+ intptr_t Addr;
+
+ if (MR.isBasicBlock()) {
+ Addr = getMachineBasicBlockAddress(MR.getBasicBlock());
+ MR.setConstantVal(MOS->Index);
+ MR.setResultPointer((void*)Addr);
+ } else if (MR.isJumpTableIndex()) {
+ Addr = getJumpTableEntryAddress(MR.getJumpTableIndex());
+ MR.setConstantVal(MOW.getJumpTableSection()->Index);
+ MR.setResultPointer((void*)Addr);
+ } else if (MR.isConstantPoolIndex()) {
+ Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex());
+ MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]);
+ MR.setResultPointer((void*)Addr);
+ } else if (MR.isGlobalValue()) {
+ // FIXME: This should be a set or something that uniques
+ MOW.PendingGlobals.push_back(MR.getGlobalValue());
+ } else {
+ assert(0 && "Unhandled relocation type");
+ }
+ MOS->Relocations.push_back(MR);
+ }
+ Relocations.clear();
+
+ // Finally, add it to the symtab.
+ MOW.SymbolTable.push_back(FnSym);
+ return false;
+}
+
+/// emitConstantPool - For each constant pool entry, figure out which section
+/// the constant should live in, allocate space for it, and emit it to the
+/// Section data buffer.
+void MachOCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
+ const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+ if (CP.empty()) return;
+
+ // FIXME: handle PIC codegen
+ assert(TM.getRelocationModel() != Reloc::PIC_ &&
+ "PIC codegen not yet handled for mach-o jump tables!");
+
+ // Although there is no strict necessity that I am aware of, we will do what
+ // gcc for OS X does and put each constant pool entry in a section of constant
+ // objects of a certain size. That means that float constants go in the
+ // literal4 section, and double objects go in literal8, etc.
+ //
+ // FIXME: revisit this decision if we ever do the "stick everything into one
+ // "giant object for PIC" optimization.
+ for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+ const Type *Ty = CP[i].getType();
+ unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty);
+
+ MachOWriter::MachOSection *Sec = MOW.getConstSection(CP[i].Val.ConstVal);
+ OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian);
+
+ CPLocations.push_back(Sec->SectionData.size());
+ CPSections.push_back(Sec->Index);
+
+ // FIXME: remove when we have unified size + output buffer
+ Sec->size += Size;
+
+ // Allocate space in the section for the global.
+ // FIXME: need alignment?
+ // FIXME: share between here and AddSymbolToSection?
+ for (unsigned j = 0; j < Size; ++j)
+ SecDataOut.outbyte(0);
+
+ MOW.InitMem(CP[i].Val.ConstVal, &Sec->SectionData[0], CPLocations[i],
+ TM.getTargetData(), Sec->Relocations);
+ }
+}
+
+/// emitJumpTables - Emit all the jump tables for a given jump table info
+/// record to the appropriate section.
+void MachOCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) {
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty()) return;
+
+ // FIXME: handle PIC codegen
+ assert(TM.getRelocationModel() != Reloc::PIC_ &&
+ "PIC codegen not yet handled for mach-o jump tables!");
+
+ MachOWriter::MachOSection *Sec = MOW.getJumpTableSection();
+ unsigned TextSecIndex = MOW.getTextSection()->Index;
+ OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian);
+
+ for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+ // For each jump table, record its offset from the start of the section,
+ // reserve space for the relocations to the MBBs, and add the relocations.
+ const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+ JTLocations.push_back(Sec->SectionData.size());
+ for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
+ MachineRelocation MR(MOW.GetJTRelocation(Sec->SectionData.size(),
+ MBBs[mi]));
+ MR.setResultPointer((void *)JTLocations[i]);
+ MR.setConstantVal(TextSecIndex);
+ Sec->Relocations.push_back(MR);
+ SecDataOut.outaddr(0);
+ }
+ }
+ // FIXME: remove when we have unified size + output buffer
+ Sec->size = Sec->SectionData.size();
+}
+
+//===----------------------------------------------------------------------===//
+// MachOWriter Implementation
+//===----------------------------------------------------------------------===//
+
+char MachOWriter::ID = 0;
+MachOWriter::MachOWriter(raw_ostream &o, TargetMachine &tm)
+ : MachineFunctionPass(&ID), O(o), TM(tm) {
+ is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
+ isLittleEndian = TM.getTargetData()->isLittleEndian();
+
+ // Create the machine code emitter object for this target.
+ MCE = new MachOCodeEmitter(*this);
+}
+
+MachOWriter::~MachOWriter() {
+ delete MCE;
+}
+
+void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) {
+ const Type *Ty = GV->getType()->getElementType();
+ unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty);
+ unsigned Align = TM.getTargetData()->getPreferredAlignment(GV);
+
+ // Reserve space in the .bss section for this symbol while maintaining the
+ // desired section alignment, which must be at least as much as required by
+ // this symbol.
+ OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian);
+
+ if (Align) {
+ uint64_t OrigSize = Sec->size;
+ Align = Log2_32(Align);
+ Sec->align = std::max(unsigned(Sec->align), Align);
+ Sec->size = (Sec->size + Align - 1) & ~(Align-1);
+
+ // Add alignment padding to buffer as well.
+ // FIXME: remove when we have unified size + output buffer
+ unsigned AlignedSize = Sec->size - OrigSize;
+ for (unsigned i = 0; i < AlignedSize; ++i)
+ SecDataOut.outbyte(0);
+ }
+ // Globals without external linkage apparently do not go in the symbol table.
+ if (!GV->hasLocalLinkage()) {
+ MachOSym Sym(GV, Mang->getValueName(GV), Sec->Index, TM);
+ Sym.n_value = Sec->size;
+ SymbolTable.push_back(Sym);
+ }
+
+ // Record the offset of the symbol, and then allocate space for it.
+ // FIXME: remove when we have unified size + output buffer
+ Sec->size += Size;
+
+ // Now that we know what section the GlovalVariable is going to be emitted
+ // into, update our mappings.
+ // FIXME: We may also need to update this when outputting non-GlobalVariable
+ // GlobalValues such as functions.
+ GVSection[GV] = Sec;
+ GVOffset[GV] = Sec->SectionData.size();
+
+ // Allocate space in the section for the global.
+ for (unsigned i = 0; i < Size; ++i)
+ SecDataOut.outbyte(0);
+}
+
+void MachOWriter::EmitGlobal(GlobalVariable *GV) {
+ const Type *Ty = GV->getType()->getElementType();
+ unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty);
+ bool NoInit = !GV->hasInitializer();
+
+ // If this global has a zero initializer, it is part of the .bss or common
+ // section.
+ if (NoInit || GV->getInitializer()->isNullValue()) {
+ // If this global is part of the common block, add it now. Variables are
+ // part of the common block if they are zero initialized and allowed to be
+ // merged with other symbols.
+ if (NoInit || GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() ||
+ GV->hasCommonLinkage()) {
+ MachOSym ExtOrCommonSym(GV, Mang->getValueName(GV), MachOSym::NO_SECT,TM);
+ // For undefined (N_UNDF) external (N_EXT) types, n_value is the size in
+ // bytes of the symbol.
+ ExtOrCommonSym.n_value = Size;
+ SymbolTable.push_back(ExtOrCommonSym);
+ // Remember that we've seen this symbol
+ GVOffset[GV] = Size;
+ return;
+ }
+ // Otherwise, this symbol is part of the .bss section.
+ MachOSection *BSS = getBSSSection();
+ AddSymbolToSection(BSS, GV);
+ return;
+ }
+
+ // Scalar read-only data goes in a literal section if the scalar is 4, 8, or
+ // 16 bytes, or a cstring. Other read only data goes into a regular const
+ // section. Read-write data goes in the data section.
+ MachOSection *Sec = GV->isConstant() ? getConstSection(GV->getInitializer()) :
+ getDataSection();
+ AddSymbolToSection(Sec, GV);
+ InitMem(GV->getInitializer(), &Sec->SectionData[0], GVOffset[GV],
+ TM.getTargetData(), Sec->Relocations);
+}
+
+
+bool MachOWriter::runOnMachineFunction(MachineFunction &MF) {
+ // Nothing to do here, this is all done through the MCE object.
+ return false;
+}
+
+bool MachOWriter::doInitialization(Module &M) {
+ // Set the magic value, now that we know the pointer size and endianness
+ Header.setMagic(isLittleEndian, is64Bit);
+
+ // Set the file type
+ // FIXME: this only works for object files, we do not support the creation
+ // of dynamic libraries or executables at this time.
+ Header.filetype = MachOHeader::MH_OBJECT;
+
+ Mang = new Mangler(M);
+ return false;
+}
+
+/// doFinalization - Now that the module has been completely processed, emit
+/// the Mach-O file to 'O'.
+bool MachOWriter::doFinalization(Module &M) {
+ // FIXME: we don't handle debug info yet, we should probably do that.
+
+ // Okay, the.text section has been completed, build the .data, .bss, and
+ // "common" sections next.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ EmitGlobal(I);
+
+ // Emit the header and load commands.
+ EmitHeaderAndLoadCommands();
+
+ // Emit the various sections and their relocation info.
+ EmitSections();
+
+ // Write the symbol table and the string table to the end of the file.
+ O.write((char*)&SymT[0], SymT.size());
+ O.write((char*)&StrT[0], StrT.size());
+
+ // We are done with the abstract symbols.
+ SectionList.clear();
+ SymbolTable.clear();
+ DynamicSymbolTable.clear();
+
+ // Release the name mangler object.
+ delete Mang; Mang = 0;
+ return false;
+}
+
+void MachOWriter::EmitHeaderAndLoadCommands() {
+ // Step #0: Fill in the segment load command size, since we need it to figure
+ // out the rest of the header fields
+ MachOSegment SEG("", is64Bit);
+ SEG.nsects = SectionList.size();
+ SEG.cmdsize = SEG.cmdSize(is64Bit) +
+ SEG.nsects * SectionList[0]->cmdSize(is64Bit);
+
+ // Step #1: calculate the number of load commands. We always have at least
+ // one, for the LC_SEGMENT load command, plus two for the normal
+ // and dynamic symbol tables, if there are any symbols.
+ Header.ncmds = SymbolTable.empty() ? 1 : 3;
+
+ // Step #2: calculate the size of the load commands
+ Header.sizeofcmds = SEG.cmdsize;
+ if (!SymbolTable.empty())
+ Header.sizeofcmds += SymTab.cmdsize + DySymTab.cmdsize;
+
+ // Step #3: write the header to the file
+ // Local alias to shortenify coming code.
+ DataBuffer &FH = Header.HeaderData;
+ OutputBuffer FHOut(FH, is64Bit, isLittleEndian);
+
+ FHOut.outword(Header.magic);
+ FHOut.outword(TM.getMachOWriterInfo()->getCPUType());
+ FHOut.outword(TM.getMachOWriterInfo()->getCPUSubType());
+ FHOut.outword(Header.filetype);
+ FHOut.outword(Header.ncmds);
+ FHOut.outword(Header.sizeofcmds);
+ FHOut.outword(Header.flags);
+ if (is64Bit)
+ FHOut.outword(Header.reserved);
+
+ // Step #4: Finish filling in the segment load command and write it out
+ for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
+ E = SectionList.end(); I != E; ++I)
+ SEG.filesize += (*I)->size;
+
+ SEG.vmsize = SEG.filesize;
+ SEG.fileoff = Header.cmdSize(is64Bit) + Header.sizeofcmds;
+
+ FHOut.outword(SEG.cmd);
+ FHOut.outword(SEG.cmdsize);
+ FHOut.outstring(SEG.segname, 16);
+ FHOut.outaddr(SEG.vmaddr);
+ FHOut.outaddr(SEG.vmsize);
+ FHOut.outaddr(SEG.fileoff);
+ FHOut.outaddr(SEG.filesize);
+ FHOut.outword(SEG.maxprot);
+ FHOut.outword(SEG.initprot);
+ FHOut.outword(SEG.nsects);
+ FHOut.outword(SEG.flags);
+
+ // Step #5: Finish filling in the fields of the MachOSections
+ uint64_t currentAddr = 0;
+ for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
+ E = SectionList.end(); I != E; ++I) {
+ MachOSection *MOS = *I;
+ MOS->addr = currentAddr;
+ MOS->offset = currentAddr + SEG.fileoff;
+
+ // FIXME: do we need to do something with alignment here?
+ currentAddr += MOS->size;
+ }
+
+ // Step #6: Emit the symbol table to temporary buffers, so that we know the
+ // size of the string table when we write the next load command. This also
+ // sorts and assigns indices to each of the symbols, which is necessary for
+ // emitting relocations to externally-defined objects.
+ BufferSymbolAndStringTable();
+
+ // Step #7: Calculate the number of relocations for each section and write out
+ // the section commands for each section
+ currentAddr += SEG.fileoff;
+ for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
+ E = SectionList.end(); I != E; ++I) {
+ MachOSection *MOS = *I;
+ // Convert the relocations to target-specific relocations, and fill in the
+ // relocation offset for this section.
+ CalculateRelocations(*MOS);
+ MOS->reloff = MOS->nreloc ? currentAddr : 0;
+ currentAddr += MOS->nreloc * 8;
+
+ // write the finalized section command to the output buffer
+ FHOut.outstring(MOS->sectname, 16);
+ FHOut.outstring(MOS->segname, 16);
+ FHOut.outaddr(MOS->addr);
+ FHOut.outaddr(MOS->size);
+ FHOut.outword(MOS->offset);
+ FHOut.outword(MOS->align);
+ FHOut.outword(MOS->reloff);
+ FHOut.outword(MOS->nreloc);
+ FHOut.outword(MOS->flags);
+ FHOut.outword(MOS->reserved1);
+ FHOut.outword(MOS->reserved2);
+ if (is64Bit)
+ FHOut.outword(MOS->reserved3);
+ }
+
+ // Step #8: Emit LC_SYMTAB/LC_DYSYMTAB load commands
+ SymTab.symoff = currentAddr;
+ SymTab.nsyms = SymbolTable.size();
+ SymTab.stroff = SymTab.symoff + SymT.size();
+ SymTab.strsize = StrT.size();
+ FHOut.outword(SymTab.cmd);
+ FHOut.outword(SymTab.cmdsize);
+ FHOut.outword(SymTab.symoff);
+ FHOut.outword(SymTab.nsyms);
+ FHOut.outword(SymTab.stroff);
+ FHOut.outword(SymTab.strsize);
+
+ // FIXME: set DySymTab fields appropriately
+ // We should probably just update these in BufferSymbolAndStringTable since
+ // thats where we're partitioning up the different kinds of symbols.
+ FHOut.outword(DySymTab.cmd);
+ FHOut.outword(DySymTab.cmdsize);
+ FHOut.outword(DySymTab.ilocalsym);
+ FHOut.outword(DySymTab.nlocalsym);
+ FHOut.outword(DySymTab.iextdefsym);
+ FHOut.outword(DySymTab.nextdefsym);
+ FHOut.outword(DySymTab.iundefsym);
+ FHOut.outword(DySymTab.nundefsym);
+ FHOut.outword(DySymTab.tocoff);
+ FHOut.outword(DySymTab.ntoc);
+ FHOut.outword(DySymTab.modtaboff);
+ FHOut.outword(DySymTab.nmodtab);
+ FHOut.outword(DySymTab.extrefsymoff);
+ FHOut.outword(DySymTab.nextrefsyms);
+ FHOut.outword(DySymTab.indirectsymoff);
+ FHOut.outword(DySymTab.nindirectsyms);
+ FHOut.outword(DySymTab.extreloff);
+ FHOut.outword(DySymTab.nextrel);
+ FHOut.outword(DySymTab.locreloff);
+ FHOut.outword(DySymTab.nlocrel);
+
+ O.write((char*)&FH[0], FH.size());
+}
+
+/// EmitSections - Now that we have constructed the file header and load
+/// commands, emit the data for each section to the file.
+void MachOWriter::EmitSections() {
+ for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
+ E = SectionList.end(); I != E; ++I)
+ // Emit the contents of each section
+ O.write((char*)&(*I)->SectionData[0], (*I)->size);
+ for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
+ E = SectionList.end(); I != E; ++I)
+ // Emit the relocation entry data for each section.
+ O.write((char*)&(*I)->RelocBuffer[0], (*I)->RelocBuffer.size());
+}
+
+/// PartitionByLocal - Simple boolean predicate that returns true if Sym is
+/// a local symbol rather than an external symbol.
+bool MachOWriter::PartitionByLocal(const MachOSym &Sym) {
+ return (Sym.n_type & (MachOSym::N_EXT | MachOSym::N_PEXT)) == 0;
+}
+
+/// PartitionByDefined - Simple boolean predicate that returns true if Sym is
+/// defined in this module.
+bool MachOWriter::PartitionByDefined(const MachOSym &Sym) {
+ // FIXME: Do N_ABS or N_INDR count as defined?
+ return (Sym.n_type & MachOSym::N_SECT) == MachOSym::N_SECT;
+}
+
+/// BufferSymbolAndStringTable - Sort the symbols we encountered and assign them
+/// each a string table index so that they appear in the correct order in the
+/// output file.
+void MachOWriter::BufferSymbolAndStringTable() {
+ // The order of the symbol table is:
+ // 1. local symbols
+ // 2. defined external symbols (sorted by name)
+ // 3. undefined external symbols (sorted by name)
+
+ // Before sorting the symbols, check the PendingGlobals for any undefined
+ // globals that need to be put in the symbol table.
+ for (std::vector<GlobalValue*>::iterator I = PendingGlobals.begin(),
+ E = PendingGlobals.end(); I != E; ++I) {
+ if (GVOffset[*I] == 0 && GVSection[*I] == 0) {
+ MachOSym UndfSym(*I, Mang->getValueName(*I), MachOSym::NO_SECT, TM);
+ SymbolTable.push_back(UndfSym);
+ GVOffset[*I] = -1;
+ }
+ }
+
+ // Sort the symbols by name, so that when we partition the symbols by scope
+ // of definition, we won't have to sort by name within each partition.
+ std::sort(SymbolTable.begin(), SymbolTable.end(), MachOSymCmp());
+
+ // Parition the symbol table entries so that all local symbols come before
+ // all symbols with external linkage. { 1 | 2 3 }
+ std::partition(SymbolTable.begin(), SymbolTable.end(), PartitionByLocal);
+
+ // Advance iterator to beginning of external symbols and partition so that
+ // all external symbols defined in this module come before all external
+ // symbols defined elsewhere. { 1 | 2 | 3 }
+ for (std::vector<MachOSym>::iterator I = SymbolTable.begin(),
+ E = SymbolTable.end(); I != E; ++I) {
+ if (!PartitionByLocal(*I)) {
+ std::partition(I, E, PartitionByDefined);
+ break;
+ }
+ }
+
+ // Calculate the starting index for each of the local, extern defined, and
+ // undefined symbols, as well as the number of each to put in the LC_DYSYMTAB
+ // load command.
+ for (std::vector<MachOSym>::iterator I = SymbolTable.begin(),
+ E = SymbolTable.end(); I != E; ++I) {
+ if (PartitionByLocal(*I)) {
+ ++DySymTab.nlocalsym;
+ ++DySymTab.iextdefsym;
+ ++DySymTab.iundefsym;
+ } else if (PartitionByDefined(*I)) {
+ ++DySymTab.nextdefsym;
+ ++DySymTab.iundefsym;
+ } else {
+ ++DySymTab.nundefsym;
+ }
+ }
+
+ // Write out a leading zero byte when emitting string table, for n_strx == 0
+ // which means an empty string.
+ OutputBuffer StrTOut(StrT, is64Bit, isLittleEndian);
+ StrTOut.outbyte(0);
+
+ // The order of the string table is:
+ // 1. strings for external symbols
+ // 2. strings for local symbols
+ // Since this is the opposite order from the symbol table, which we have just
+ // sorted, we can walk the symbol table backwards to output the string table.
+ for (std::vector<MachOSym>::reverse_iterator I = SymbolTable.rbegin(),
+ E = SymbolTable.rend(); I != E; ++I) {
+ if (I->GVName == "") {
+ I->n_strx = 0;
+ } else {
+ I->n_strx = StrT.size();
+ StrTOut.outstring(I->GVName, I->GVName.length()+1);
+ }
+ }
+
+ OutputBuffer SymTOut(SymT, is64Bit, isLittleEndian);
+
+ unsigned index = 0;
+ for (std::vector<MachOSym>::iterator I = SymbolTable.begin(),
+ E = SymbolTable.end(); I != E; ++I, ++index) {
+ // Add the section base address to the section offset in the n_value field
+ // to calculate the full address.
+ // FIXME: handle symbols where the n_value field is not the address
+ GlobalValue *GV = const_cast<GlobalValue*>(I->GV);
+ if (GV && GVSection[GV])
+ I->n_value += GVSection[GV]->addr;
+ if (GV && (GVOffset[GV] == -1))
+ GVOffset[GV] = index;
+
+ // Emit nlist to buffer
+ SymTOut.outword(I->n_strx);
+ SymTOut.outbyte(I->n_type);
+ SymTOut.outbyte(I->n_sect);
+ SymTOut.outhalf(I->n_desc);
+ SymTOut.outaddr(I->n_value);
+ }
+}
+
+/// CalculateRelocations - For each MachineRelocation in the current section,
+/// calculate the index of the section containing the object to be relocated,
+/// and the offset into that section. From this information, create the
+/// appropriate target-specific MachORelocation type and add buffer it to be
+/// written out after we are finished writing out sections.
+void MachOWriter::CalculateRelocations(MachOSection &MOS) {
+ for (unsigned i = 0, e = MOS.Relocations.size(); i != e; ++i) {
+ MachineRelocation &MR = MOS.Relocations[i];
+ unsigned TargetSection = MR.getConstantVal();
+ unsigned TargetAddr = 0;
+ unsigned TargetIndex = 0;
+
+ // This is a scattered relocation entry if it points to a global value with
+ // a non-zero offset.
+ bool Scattered = false;
+ bool Extern = false;
+
+ // Since we may not have seen the GlobalValue we were interested in yet at
+ // the time we emitted the relocation for it, fix it up now so that it
+ // points to the offset into the correct section.
+ if (MR.isGlobalValue()) {
+ GlobalValue *GV = MR.getGlobalValue();
+ MachOSection *MOSPtr = GVSection[GV];
+ intptr_t Offset = GVOffset[GV];
+
+ // If we have never seen the global before, it must be to a symbol
+ // defined in another module (N_UNDF).
+ if (!MOSPtr) {
+ // FIXME: need to append stub suffix
+ Extern = true;
+ TargetAddr = 0;
+ TargetIndex = GVOffset[GV];
+ } else {
+ Scattered = TargetSection != 0;
+ TargetSection = MOSPtr->Index;
+ }
+ MR.setResultPointer((void*)Offset);
+ }
+
+ // If the symbol is locally defined, pass in the address of the section and
+ // the section index to the code which will generate the target relocation.
+ if (!Extern) {
+ MachOSection &To = *SectionList[TargetSection - 1];
+ TargetAddr = To.addr;
+ TargetIndex = To.Index;
+ }
+
+ OutputBuffer RelocOut(MOS.RelocBuffer, is64Bit, isLittleEndian);
+ OutputBuffer SecOut(MOS.SectionData, is64Bit, isLittleEndian);
+
+ MOS.nreloc += GetTargetRelocation(MR, MOS.Index, TargetAddr, TargetIndex,
+ RelocOut, SecOut, Scattered, Extern);
+ }
+}
+
+// InitMem - Write the value of a Constant to the specified memory location,
+// converting it into bytes and relocations.
+void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset,
+ const TargetData *TD,
+ std::vector<MachineRelocation> &MRs) {
+ typedef std::pair<const Constant*, intptr_t> CPair;
+ std::vector<CPair> WorkList;
+
+ WorkList.push_back(CPair(C,(intptr_t)Addr + Offset));
+
+ intptr_t ScatteredOffset = 0;
+
+ while (!WorkList.empty()) {
+ const Constant *PC = WorkList.back().first;
+ intptr_t PA = WorkList.back().second;
+ WorkList.pop_back();
+
+ if (isa<UndefValue>(PC)) {
+ continue;
+ } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(PC)) {
+ unsigned ElementSize =
+ TD->getTypeAllocSize(CP->getType()->getElementType());
+ for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
+ WorkList.push_back(CPair(CP->getOperand(i), PA+i*ElementSize));
+ } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(PC)) {
+ //
+ // FIXME: Handle ConstantExpression. See EE::getConstantValue()
+ //
+ switch (CE->getOpcode()) {
+ case Instruction::GetElementPtr: {
+ SmallVector<Value*, 8> Indices(CE->op_begin()+1, CE->op_end());
+ ScatteredOffset = TD->getIndexedOffset(CE->getOperand(0)->getType(),
+ &Indices[0], Indices.size());
+ WorkList.push_back(CPair(CE->getOperand(0), PA));
+ break;
+ }
+ case Instruction::Add:
+ default:
+ cerr << "ConstantExpr not handled as global var init: " << *CE << "\n";
+ abort();
+ break;
+ }
+ } else if (PC->getType()->isSingleValueType()) {
+ uint8_t *ptr = (uint8_t *)PA;
+ switch (PC->getType()->getTypeID()) {
+ case Type::IntegerTyID: {
+ unsigned NumBits = cast<IntegerType>(PC->getType())->getBitWidth();
+ uint64_t val = cast<ConstantInt>(PC)->getZExtValue();
+ if (NumBits <= 8)
+ ptr[0] = val;
+ else if (NumBits <= 16) {
+ if (TD->isBigEndian())
+ val = ByteSwap_16(val);
+ ptr[0] = val;
+ ptr[1] = val >> 8;
+ } else if (NumBits <= 32) {
+ if (TD->isBigEndian())
+ val = ByteSwap_32(val);
+ ptr[0] = val;
+ ptr[1] = val >> 8;
+ ptr[2] = val >> 16;
+ ptr[3] = val >> 24;
+ } else if (NumBits <= 64) {
+ if (TD->isBigEndian())
+ val = ByteSwap_64(val);
+ ptr[0] = val;
+ ptr[1] = val >> 8;
+ ptr[2] = val >> 16;
+ ptr[3] = val >> 24;
+ ptr[4] = val >> 32;
+ ptr[5] = val >> 40;
+ ptr[6] = val >> 48;
+ ptr[7] = val >> 56;
+ } else {
+ assert(0 && "Not implemented: bit widths > 64");
+ }
+ break;
+ }
+ case Type::FloatTyID: {
+ uint32_t val = cast<ConstantFP>(PC)->getValueAPF().bitcastToAPInt().
+ getZExtValue();
+ if (TD->isBigEndian())
+ val = ByteSwap_32(val);
+ ptr[0] = val;
+ ptr[1] = val >> 8;
+ ptr[2] = val >> 16;
+ ptr[3] = val >> 24;
+ break;
+ }
+ case Type::DoubleTyID: {
+ uint64_t val = cast<ConstantFP>(PC)->getValueAPF().bitcastToAPInt().
+ getZExtValue();
+ if (TD->isBigEndian())
+ val = ByteSwap_64(val);
+ ptr[0] = val;
+ ptr[1] = val >> 8;
+ ptr[2] = val >> 16;
+ ptr[3] = val >> 24;
+ ptr[4] = val >> 32;
+ ptr[5] = val >> 40;
+ ptr[6] = val >> 48;
+ ptr[7] = val >> 56;
+ break;
+ }
+ case Type::PointerTyID:
+ if (isa<ConstantPointerNull>(PC))
+ memset(ptr, 0, TD->getPointerSize());
+ else if (const GlobalValue* GV = dyn_cast<GlobalValue>(PC)) {
+ // FIXME: what about function stubs?
+ MRs.push_back(MachineRelocation::getGV(PA-(intptr_t)Addr,
+ MachineRelocation::VANILLA,
+ const_cast<GlobalValue*>(GV),
+ ScatteredOffset));
+ ScatteredOffset = 0;
+ } else
+ assert(0 && "Unknown constant pointer type!");
+ break;
+ default:
+ cerr << "ERROR: Constant unimp for type: " << *PC->getType() << "\n";
+ abort();
+ }
+ } else if (isa<ConstantAggregateZero>(PC)) {
+ memset((void*)PA, 0, (size_t)TD->getTypeAllocSize(PC->getType()));
+ } else if (const ConstantArray *CPA = dyn_cast<ConstantArray>(PC)) {
+ unsigned ElementSize =
+ TD->getTypeAllocSize(CPA->getType()->getElementType());
+ for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i)
+ WorkList.push_back(CPair(CPA->getOperand(i), PA+i*ElementSize));
+ } else if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(PC)) {
+ const StructLayout *SL =
+ TD->getStructLayout(cast<StructType>(CPS->getType()));
+ for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i)
+ WorkList.push_back(CPair(CPS->getOperand(i),
+ PA+SL->getElementOffset(i)));
+ } else {
+ cerr << "Bad Type: " << *PC->getType() << "\n";
+ assert(0 && "Unknown constant type to initialize memory with!");
+ }
+ }
+}
+
+MachOSym::MachOSym(const GlobalValue *gv, std::string name, uint8_t sect,
+ TargetMachine &TM) :
+ GV(gv), n_strx(0), n_type(sect == NO_SECT ? N_UNDF : N_SECT), n_sect(sect),
+ n_desc(0), n_value(0) {
+
+ const TargetAsmInfo *TAI = TM.getTargetAsmInfo();
+
+ switch (GV->getLinkage()) {
+ default:
+ assert(0 && "Unexpected linkage type!");
+ break;
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::CommonLinkage:
+ assert(!isa<Function>(gv) && "Unexpected linkage type for Function!");
+ case GlobalValue::ExternalLinkage:
+ GVName = TAI->getGlobalPrefix() + name;
+ n_type |= GV->hasHiddenVisibility() ? N_PEXT : N_EXT;
+ break;
+ case GlobalValue::PrivateLinkage:
+ GVName = TAI->getPrivateGlobalPrefix() + name;
+ break;
+ case GlobalValue::InternalLinkage:
+ GVName = TAI->getGlobalPrefix() + name;
+ break;
+ }
+}
diff --git a/lib/CodeGen/MachOWriter.h b/lib/CodeGen/MachOWriter.h
new file mode 100644
index 0000000..6ab66ee
--- /dev/null
+++ b/lib/CodeGen/MachOWriter.h
@@ -0,0 +1,629 @@
+//=== MachOWriter.h - Target-independent Mach-O writer support --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachOWriter class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MACHOWRITER_H
+#define MACHOWRITER_H
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetMachOWriterInfo.h"
+#include <map>
+
+namespace llvm {
+ class GlobalVariable;
+ class Mangler;
+ class MachineCodeEmitter;
+ class MachOCodeEmitter;
+ class OutputBuffer;
+ class raw_ostream;
+
+ /// MachOSym - This struct contains information about each symbol that is
+ /// added to logical symbol table for the module. This is eventually
+ /// turned into a real symbol table in the file.
+ struct MachOSym {
+ const GlobalValue *GV; // The global value this corresponds to.
+ std::string GVName; // The mangled name of the global value.
+ uint32_t n_strx; // index into the string table
+ uint8_t n_type; // type flag
+ uint8_t n_sect; // section number or NO_SECT
+ int16_t n_desc; // see <mach-o/stab.h>
+ uint64_t n_value; // value for this symbol (or stab offset)
+
+ // Constants for the n_sect field
+ // see <mach-o/nlist.h>
+ enum { NO_SECT = 0 }; // symbol is not in any section
+
+ // Constants for the n_type field
+ // see <mach-o/nlist.h>
+ enum { N_UNDF = 0x0, // undefined, n_sect == NO_SECT
+ N_ABS = 0x2, // absolute, n_sect == NO_SECT
+ N_SECT = 0xe, // defined in section number n_sect
+ N_PBUD = 0xc, // prebound undefined (defined in a dylib)
+ N_INDR = 0xa // indirect
+ };
+ // The following bits are OR'd into the types above. For example, a type
+ // of 0x0f would be an external N_SECT symbol (0x0e | 0x01).
+ enum { N_EXT = 0x01, // external symbol bit
+ N_PEXT = 0x10 // private external symbol bit
+ };
+
+ // Constants for the n_desc field
+ // see <mach-o/loader.h>
+ enum { REFERENCE_FLAG_UNDEFINED_NON_LAZY = 0,
+ REFERENCE_FLAG_UNDEFINED_LAZY = 1,
+ REFERENCE_FLAG_DEFINED = 2,
+ REFERENCE_FLAG_PRIVATE_DEFINED = 3,
+ REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4,
+ REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY = 5
+ };
+ enum { N_NO_DEAD_STRIP = 0x0020, // symbol is not to be dead stripped
+ N_WEAK_REF = 0x0040, // symbol is weak referenced
+ N_WEAK_DEF = 0x0080 // coalesced symbol is a weak definition
+ };
+
+ MachOSym(const GlobalValue *gv, std::string name, uint8_t sect,
+ TargetMachine &TM);
+ };
+
+ /// MachOWriter - This class implements the common target-independent code for
+ /// writing Mach-O files. Targets should derive a class from this to
+ /// parameterize the output format.
+ ///
+ class MachOWriter : public MachineFunctionPass {
+ friend class MachOCodeEmitter;
+ public:
+ static char ID;
+ MachineCodeEmitter &getMachineCodeEmitter() const {
+ return *(MachineCodeEmitter*)MCE;
+ }
+
+ MachOWriter(raw_ostream &O, TargetMachine &TM);
+ virtual ~MachOWriter();
+
+ virtual const char *getPassName() const {
+ return "Mach-O Writer";
+ }
+
+ typedef std::vector<uint8_t> DataBuffer;
+ protected:
+ /// Output stream to send the resultant object file to.
+ ///
+ raw_ostream &O;
+
+ /// Target machine description.
+ ///
+ TargetMachine &TM;
+
+ /// Mang - The object used to perform name mangling for this module.
+ ///
+ Mangler *Mang;
+
+ /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
+ /// code for functions to the .o file.
+ MachOCodeEmitter *MCE;
+
+ /// is64Bit/isLittleEndian - This information is inferred from the target
+ /// machine directly, indicating what header values and flags to set.
+ bool is64Bit, isLittleEndian;
+
+ /// doInitialization - Emit the file header and all of the global variables
+ /// for the module to the Mach-O file.
+ bool doInitialization(Module &M);
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ /// doFinalization - Now that the module has been completely processed, emit
+ /// the Mach-O file to 'O'.
+ bool doFinalization(Module &M);
+
+ /// MachOHeader - This struct contains the header information about a
+ /// specific architecture type/subtype pair that is emitted to the file.
+ struct MachOHeader {
+ uint32_t magic; // mach magic number identifier
+ uint32_t filetype; // type of file
+ uint32_t ncmds; // number of load commands
+ uint32_t sizeofcmds; // the size of all the load commands
+ uint32_t flags; // flags
+ uint32_t reserved; // 64-bit only
+
+ /// HeaderData - The actual data for the header which we are building
+ /// up for emission to the file.
+ DataBuffer HeaderData;
+
+ // Constants for the filetype field
+ // see <mach-o/loader.h> for additional info on the various types
+ enum { MH_OBJECT = 1, // relocatable object file
+ MH_EXECUTE = 2, // demand paged executable file
+ MH_FVMLIB = 3, // fixed VM shared library file
+ MH_CORE = 4, // core file
+ MH_PRELOAD = 5, // preloaded executable file
+ MH_DYLIB = 6, // dynamically bound shared library
+ MH_DYLINKER = 7, // dynamic link editor
+ MH_BUNDLE = 8, // dynamically bound bundle file
+ MH_DYLIB_STUB = 9, // shared library stub for static linking only
+ MH_DSYM = 10 // companion file wiht only debug sections
+ };
+
+ // Constants for the flags field
+ enum { MH_NOUNDEFS = 1 << 0,
+ // the object file has no undefined references
+ MH_INCRLINK = 1 << 1,
+ // the object file is the output of an incremental link against
+ // a base file and cannot be link edited again
+ MH_DYLDLINK = 1 << 2,
+ // the object file is input for the dynamic linker and cannot be
+ // statically link edited again.
+ MH_BINDATLOAD = 1 << 3,
+ // the object file's undefined references are bound by the
+ // dynamic linker when loaded.
+ MH_PREBOUND = 1 << 4,
+ // the file has its dynamic undefined references prebound
+ MH_SPLIT_SEGS = 1 << 5,
+ // the file has its read-only and read-write segments split
+ // see <mach/shared_memory_server.h>
+ MH_LAZY_INIT = 1 << 6,
+ // the shared library init routine is to be run lazily via
+ // catching memory faults to its writable segments (obsolete)
+ MH_TWOLEVEL = 1 << 7,
+ // the image is using two-level namespace bindings
+ MH_FORCE_FLAT = 1 << 8,
+ // the executable is forcing all images to use flat namespace
+ // bindings.
+ MH_NOMULTIDEFS = 1 << 8,
+ // this umbrella guarantees no multiple definitions of symbols
+ // in its sub-images so the two-level namespace hints can
+ // always be used.
+ MH_NOFIXPREBINDING = 1 << 10,
+ // do not have dyld notify the prebidning agent about this
+ // executable.
+ MH_PREBINDABLE = 1 << 11,
+ // the binary is not prebound but can have its prebinding
+ // redone. only used when MH_PREBOUND is not set.
+ MH_ALLMODSBOUND = 1 << 12,
+ // indicates that this binary binds to all two-level namespace
+ // modules of its dependent libraries. Only used when
+ // MH_PREBINDABLE and MH_TWOLEVEL are both set.
+ MH_SUBSECTIONS_VIA_SYMBOLS = 1 << 13,
+ // safe to divide up the sections into sub-sections via symbols
+ // for dead code stripping.
+ MH_CANONICAL = 1 << 14,
+ // the binary has been canonicalized via the unprebind operation
+ MH_WEAK_DEFINES = 1 << 15,
+ // the final linked image contains external weak symbols
+ MH_BINDS_TO_WEAK = 1 << 16,
+ // the final linked image uses weak symbols
+ MH_ALLOW_STACK_EXECUTION = 1 << 17
+ // When this bit is set, all stacks in the task will be given
+ // stack execution privilege. Only used in MH_EXECUTE filetype
+ };
+
+ MachOHeader() : magic(0), filetype(0), ncmds(0), sizeofcmds(0), flags(0),
+ reserved(0) { }
+
+ /// cmdSize - This routine returns the size of the MachOSection as written
+ /// to disk, depending on whether the destination is a 64 bit Mach-O file.
+ unsigned cmdSize(bool is64Bit) const {
+ if (is64Bit)
+ return 8 * sizeof(uint32_t);
+ else
+ return 7 * sizeof(uint32_t);
+ }
+
+ /// setMagic - This routine sets the appropriate value for the 'magic'
+ /// field based on pointer size and endianness.
+ void setMagic(bool isLittleEndian, bool is64Bit) {
+ if (isLittleEndian)
+ if (is64Bit) magic = 0xcffaedfe;
+ else magic = 0xcefaedfe;
+ else
+ if (is64Bit) magic = 0xfeedfacf;
+ else magic = 0xfeedface;
+ }
+ };
+
+ /// Header - An instance of MachOHeader that we will update while we build
+ /// the file, and then emit during finalization.
+ MachOHeader Header;
+
+ /// MachOSegment - This struct contains the necessary information to
+ /// emit the load commands for each section in the file.
+ struct MachOSegment {
+ uint32_t cmd; // LC_SEGMENT or LC_SEGMENT_64
+ uint32_t cmdsize; // Total size of this struct and section commands
+ std::string segname; // segment name
+ uint64_t vmaddr; // address of this segment
+ uint64_t vmsize; // size of this segment, may be larger than filesize
+ uint64_t fileoff; // offset in file
+ uint64_t filesize; // amount to read from file
+ uint32_t maxprot; // maximum VM protection
+ uint32_t initprot; // initial VM protection
+ uint32_t nsects; // number of sections in this segment
+ uint32_t flags; // flags
+
+ // The following constants are getting pulled in by one of the
+ // system headers, which creates a neat clash with the enum.
+#if !defined(VM_PROT_NONE)
+#define VM_PROT_NONE 0x00
+#endif
+#if !defined(VM_PROT_READ)
+#define VM_PROT_READ 0x01
+#endif
+#if !defined(VM_PROT_WRITE)
+#define VM_PROT_WRITE 0x02
+#endif
+#if !defined(VM_PROT_EXECUTE)
+#define VM_PROT_EXECUTE 0x04
+#endif
+#if !defined(VM_PROT_ALL)
+#define VM_PROT_ALL 0x07
+#endif
+
+ // Constants for the vm protection fields
+ // see <mach-o/vm_prot.h>
+ enum { SEG_VM_PROT_NONE = VM_PROT_NONE,
+ SEG_VM_PROT_READ = VM_PROT_READ, // read permission
+ SEG_VM_PROT_WRITE = VM_PROT_WRITE, // write permission
+ SEG_VM_PROT_EXECUTE = VM_PROT_EXECUTE,
+ SEG_VM_PROT_ALL = VM_PROT_ALL
+ };
+
+ // Constants for the cmd field
+ // see <mach-o/loader.h>
+ enum { LC_SEGMENT = 0x01, // segment of this file to be mapped
+ LC_SEGMENT_64 = 0x19 // 64-bit segment of this file to be mapped
+ };
+
+ /// cmdSize - This routine returns the size of the MachOSection as written
+ /// to disk, depending on whether the destination is a 64 bit Mach-O file.
+ unsigned cmdSize(bool is64Bit) const {
+ if (is64Bit)
+ return 6 * sizeof(uint32_t) + 4 * sizeof(uint64_t) + 16;
+ else
+ return 10 * sizeof(uint32_t) + 16; // addresses only 32 bits
+ }
+
+ MachOSegment(const std::string &seg, bool is64Bit)
+ : cmd(is64Bit ? LC_SEGMENT_64 : LC_SEGMENT), cmdsize(0), segname(seg),
+ vmaddr(0), vmsize(0), fileoff(0), filesize(0), maxprot(VM_PROT_ALL),
+ initprot(VM_PROT_ALL), nsects(0), flags(0) { }
+ };
+
+ /// MachOSection - This struct contains information about each section in a
+ /// particular segment that is emitted to the file. This is eventually
+ /// turned into the SectionCommand in the load command for a particlar
+ /// segment.
+ struct MachOSection {
+ std::string sectname; // name of this section,
+ std::string segname; // segment this section goes in
+ uint64_t addr; // memory address of this section
+ uint64_t size; // size in bytes of this section
+ uint32_t offset; // file offset of this section
+ uint32_t align; // section alignment (power of 2)
+ uint32_t reloff; // file offset of relocation entries
+ uint32_t nreloc; // number of relocation entries
+ uint32_t flags; // flags (section type and attributes)
+ uint32_t reserved1; // reserved (for offset or index)
+ uint32_t reserved2; // reserved (for count or sizeof)
+ uint32_t reserved3; // reserved (64 bit only)
+
+ /// A unique number for this section, which will be used to match symbols
+ /// to the correct section.
+ uint32_t Index;
+
+ /// SectionData - The actual data for this section which we are building
+ /// up for emission to the file.
+ DataBuffer SectionData;
+
+ /// RelocBuffer - A buffer to hold the mach-o relocations before we write
+ /// them out at the appropriate location in the file.
+ DataBuffer RelocBuffer;
+
+ /// Relocations - The relocations that we have encountered so far in this
+ /// section that we will need to convert to MachORelocation entries when
+ /// the file is written.
+ std::vector<MachineRelocation> Relocations;
+
+ // Constants for the section types (low 8 bits of flags field)
+ // see <mach-o/loader.h>
+ enum { S_REGULAR = 0,
+ // regular section
+ S_ZEROFILL = 1,
+ // zero fill on demand section
+ S_CSTRING_LITERALS = 2,
+ // section with only literal C strings
+ S_4BYTE_LITERALS = 3,
+ // section with only 4 byte literals
+ S_8BYTE_LITERALS = 4,
+ // section with only 8 byte literals
+ S_LITERAL_POINTERS = 5,
+ // section with only pointers to literals
+ S_NON_LAZY_SYMBOL_POINTERS = 6,
+ // section with only non-lazy symbol pointers
+ S_LAZY_SYMBOL_POINTERS = 7,
+ // section with only lazy symbol pointers
+ S_SYMBOL_STUBS = 8,
+ // section with only symbol stubs
+ // byte size of stub in the reserved2 field
+ S_MOD_INIT_FUNC_POINTERS = 9,
+ // section with only function pointers for initialization
+ S_MOD_TERM_FUNC_POINTERS = 10,
+ // section with only function pointers for termination
+ S_COALESCED = 11,
+ // section contains symbols that are coalesced
+ S_GB_ZEROFILL = 12,
+ // zero fill on demand section (that can be larger than 4GB)
+ S_INTERPOSING = 13,
+ // section with only pairs of function pointers for interposing
+ S_16BYTE_LITERALS = 14
+ // section with only 16 byte literals
+ };
+
+ // Constants for the section flags (high 24 bits of flags field)
+ // see <mach-o/loader.h>
+ enum { S_ATTR_PURE_INSTRUCTIONS = 1 << 31,
+ // section contains only true machine instructions
+ S_ATTR_NO_TOC = 1 << 30,
+ // section contains coalesced symbols that are not to be in a
+ // ranlib table of contents
+ S_ATTR_STRIP_STATIC_SYMS = 1 << 29,
+ // ok to strip static symbols in this section in files with the
+ // MY_DYLDLINK flag
+ S_ATTR_NO_DEAD_STRIP = 1 << 28,
+ // no dead stripping
+ S_ATTR_LIVE_SUPPORT = 1 << 27,
+ // blocks are live if they reference live blocks
+ S_ATTR_SELF_MODIFYING_CODE = 1 << 26,
+ // used with i386 code stubs written on by dyld
+ S_ATTR_DEBUG = 1 << 25,
+ // a debug section
+ S_ATTR_SOME_INSTRUCTIONS = 1 << 10,
+ // section contains some machine instructions
+ S_ATTR_EXT_RELOC = 1 << 9,
+ // section has external relocation entries
+ S_ATTR_LOC_RELOC = 1 << 8
+ // section has local relocation entries
+ };
+
+ /// cmdSize - This routine returns the size of the MachOSection as written
+ /// to disk, depending on whether the destination is a 64 bit Mach-O file.
+ unsigned cmdSize(bool is64Bit) const {
+ if (is64Bit)
+ return 7 * sizeof(uint32_t) + 2 * sizeof(uint64_t) + 32;
+ else
+ return 9 * sizeof(uint32_t) + 32; // addresses only 32 bits
+ }
+
+ MachOSection(const std::string &seg, const std::string &sect)
+ : sectname(sect), segname(seg), addr(0), size(0), offset(0), align(2),
+ reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0),
+ reserved3(0) { }
+ };
+
+ private:
+
+ /// SectionList - This is the list of sections that we have emitted to the
+ /// file. Once the file has been completely built, the segment load command
+ /// SectionCommands are constructed from this info.
+ std::vector<MachOSection*> SectionList;
+
+ /// SectionLookup - This is a mapping from section name to SectionList entry
+ std::map<std::string, MachOSection*> SectionLookup;
+
+ /// GVSection - This is a mapping from a GlobalValue to a MachOSection,
+ /// to aid in emitting relocations.
+ std::map<GlobalValue*, MachOSection*> GVSection;
+
+ /// GVOffset - This is a mapping from a GlobalValue to an offset from the
+ /// start of the section in which the GV resides, to aid in emitting
+ /// relocations.
+ std::map<GlobalValue*, intptr_t> GVOffset;
+
+ /// getSection - Return the section with the specified name, creating a new
+ /// section if one does not already exist.
+ MachOSection *getSection(const std::string &seg, const std::string &sect,
+ unsigned Flags = 0) {
+ MachOSection *MOS = SectionLookup[seg+sect];
+ if (MOS) return MOS;
+
+ MOS = new MachOSection(seg, sect);
+ SectionList.push_back(MOS);
+ MOS->Index = SectionList.size();
+ MOS->flags = MachOSection::S_REGULAR | Flags;
+ SectionLookup[seg+sect] = MOS;
+ return MOS;
+ }
+ MachOSection *getTextSection(bool isCode = true) {
+ if (isCode)
+ return getSection("__TEXT", "__text",
+ MachOSection::S_ATTR_PURE_INSTRUCTIONS |
+ MachOSection::S_ATTR_SOME_INSTRUCTIONS);
+ else
+ return getSection("__TEXT", "__text");
+ }
+ MachOSection *getBSSSection() {
+ return getSection("__DATA", "__bss", MachOSection::S_ZEROFILL);
+ }
+ MachOSection *getDataSection() {
+ return getSection("__DATA", "__data");
+ }
+ MachOSection *getConstSection(Constant *C) {
+ const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
+ if (CVA && CVA->isCString())
+ return getSection("__TEXT", "__cstring",
+ MachOSection::S_CSTRING_LITERALS);
+
+ const Type *Ty = C->getType();
+ if (Ty->isPrimitiveType() || Ty->isInteger()) {
+ unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty);
+ switch(Size) {
+ default: break; // Fall through to __TEXT,__const
+ case 4:
+ return getSection("__TEXT", "__literal4",
+ MachOSection::S_4BYTE_LITERALS);
+ case 8:
+ return getSection("__TEXT", "__literal8",
+ MachOSection::S_8BYTE_LITERALS);
+ case 16:
+ return getSection("__TEXT", "__literal16",
+ MachOSection::S_16BYTE_LITERALS);
+ }
+ }
+ return getSection("__TEXT", "__const");
+ }
+ MachOSection *getJumpTableSection() {
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return getTextSection(false);
+ else
+ return getSection("__TEXT", "__const");
+ }
+
+ /// MachOSymTab - This struct contains information about the offsets and
+ /// size of symbol table information.
+ /// segment.
+ struct MachOSymTab {
+ uint32_t cmd; // LC_SYMTAB
+ uint32_t cmdsize; // sizeof( MachOSymTab )
+ uint32_t symoff; // symbol table offset
+ uint32_t nsyms; // number of symbol table entries
+ uint32_t stroff; // string table offset
+ uint32_t strsize; // string table size in bytes
+
+ // Constants for the cmd field
+ // see <mach-o/loader.h>
+ enum { LC_SYMTAB = 0x02 // link-edit stab symbol table info
+ };
+
+ MachOSymTab() : cmd(LC_SYMTAB), cmdsize(6 * sizeof(uint32_t)), symoff(0),
+ nsyms(0), stroff(0), strsize(0) { }
+ };
+
+ /// MachOSymTab - This struct contains information about the offsets and
+ /// size of symbol table information.
+ /// segment.
+ struct MachODySymTab {
+ uint32_t cmd; // LC_DYSYMTAB
+ uint32_t cmdsize; // sizeof( MachODySymTab )
+ uint32_t ilocalsym; // index to local symbols
+ uint32_t nlocalsym; // number of local symbols
+ uint32_t iextdefsym; // index to externally defined symbols
+ uint32_t nextdefsym; // number of externally defined symbols
+ uint32_t iundefsym; // index to undefined symbols
+ uint32_t nundefsym; // number of undefined symbols
+ uint32_t tocoff; // file offset to table of contents
+ uint32_t ntoc; // number of entries in table of contents
+ uint32_t modtaboff; // file offset to module table
+ uint32_t nmodtab; // number of module table entries
+ uint32_t extrefsymoff; // offset to referenced symbol table
+ uint32_t nextrefsyms; // number of referenced symbol table entries
+ uint32_t indirectsymoff; // file offset to the indirect symbol table
+ uint32_t nindirectsyms; // number of indirect symbol table entries
+ uint32_t extreloff; // offset to external relocation entries
+ uint32_t nextrel; // number of external relocation entries
+ uint32_t locreloff; // offset to local relocation entries
+ uint32_t nlocrel; // number of local relocation entries
+
+ // Constants for the cmd field
+ // see <mach-o/loader.h>
+ enum { LC_DYSYMTAB = 0x0B // dynamic link-edit symbol table info
+ };
+
+ MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)),
+ ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0),
+ iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0),
+ nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0),
+ nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) { }
+ };
+
+ /// SymTab - The "stab" style symbol table information
+ MachOSymTab SymTab;
+ /// DySymTab - symbol table info for the dynamic link editor
+ MachODySymTab DySymTab;
+
+ struct MachOSymCmp {
+ // FIXME: this does not appear to be sorting 'f' after 'F'
+ bool operator()(const MachOSym &LHS, const MachOSym &RHS) {
+ return LHS.GVName < RHS.GVName;
+ }
+ };
+
+ /// PartitionByLocal - Simple boolean predicate that returns true if Sym is
+ /// a local symbol rather than an external symbol.
+ static bool PartitionByLocal(const MachOSym &Sym);
+
+ /// PartitionByDefined - Simple boolean predicate that returns true if Sym
+ /// is defined in this module.
+ static bool PartitionByDefined(const MachOSym &Sym);
+
+ protected:
+
+ /// SymbolTable - This is the list of symbols we have emitted to the file.
+ /// This actually gets rearranged before emission to the file (to put the
+ /// local symbols first in the list).
+ std::vector<MachOSym> SymbolTable;
+
+ /// SymT - A buffer to hold the symbol table before we write it out at the
+ /// appropriate location in the file.
+ DataBuffer SymT;
+
+ /// StrT - A buffer to hold the string table before we write it out at the
+ /// appropriate location in the file.
+ DataBuffer StrT;
+
+ /// PendingSyms - This is a list of externally defined symbols that we have
+ /// been asked to emit, but have not seen a reference to. When a reference
+ /// is seen, the symbol will move from this list to the SymbolTable.
+ std::vector<GlobalValue*> PendingGlobals;
+
+ /// DynamicSymbolTable - This is just a vector of indices into
+ /// SymbolTable to aid in emitting the DYSYMTAB load command.
+ std::vector<unsigned> DynamicSymbolTable;
+
+ static void InitMem(const Constant *C, void *Addr, intptr_t Offset,
+ const TargetData *TD,
+ std::vector<MachineRelocation> &MRs);
+
+ private:
+ void AddSymbolToSection(MachOSection *MOS, GlobalVariable *GV);
+ void EmitGlobal(GlobalVariable *GV);
+ void EmitHeaderAndLoadCommands();
+ void EmitSections();
+ void BufferSymbolAndStringTable();
+ void CalculateRelocations(MachOSection &MOS);
+
+ MachineRelocation GetJTRelocation(unsigned Offset,
+ MachineBasicBlock *MBB) const {
+ return TM.getMachOWriterInfo()->GetJTRelocation(Offset, MBB);
+ }
+
+ /// GetTargetRelocation - Returns the number of relocations.
+ unsigned GetTargetRelocation(MachineRelocation &MR,
+ unsigned FromIdx,
+ unsigned ToAddr,
+ unsigned ToIndex,
+ OutputBuffer &RelocOut,
+ OutputBuffer &SecOut,
+ bool Scattered,
+ bool Extern) {
+ return TM.getMachOWriterInfo()->GetTargetRelocation(MR, FromIdx, ToAddr,
+ ToIndex, RelocOut,
+ SecOut, Scattered,
+ Extern);
+ }
+ };
+}
+
+#endif
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
new file mode 100644
index 0000000..71e6b3e
--- /dev/null
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -0,0 +1,372 @@
+//===-- llvm/CodeGen/MachineBasicBlock.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect the sequence of machine instructions for a basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrDesc.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/LeakDetector.h"
+#include <algorithm>
+using namespace llvm;
+
+MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb)
+ : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false) {
+ Insts.Parent = this;
+}
+
+MachineBasicBlock::~MachineBasicBlock() {
+ LeakDetector::removeGarbageObject(this);
+}
+
+std::ostream& llvm::operator<<(std::ostream &OS, const MachineBasicBlock &MBB) {
+ MBB.print(OS);
+ return OS;
+}
+
+/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the
+/// parent pointer of the MBB, the MBB numbering, and any instructions in the
+/// MBB to be on the right operand list for registers.
+///
+/// MBBs start out as #-1. When a MBB is added to a MachineFunction, it
+/// gets the next available unique MBB number. If it is removed from a
+/// MachineFunction, it goes back to being #-1.
+void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock* N) {
+ MachineFunction &MF = *N->getParent();
+ N->Number = MF.addToMBBNumbering(N);
+
+ // Make sure the instructions have their operands in the reginfo lists.
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ for (MachineBasicBlock::iterator I = N->begin(), E = N->end(); I != E; ++I)
+ I->AddRegOperandsToUseLists(RegInfo);
+
+ LeakDetector::removeGarbageObject(N);
+}
+
+void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock* N) {
+ N->getParent()->removeFromMBBNumbering(N->Number);
+ N->Number = -1;
+ LeakDetector::addGarbageObject(N);
+}
+
+
+/// addNodeToList (MI) - When we add an instruction to a basic block
+/// list, we update its parent pointer and add its operands from reg use/def
+/// lists if appropriate.
+void ilist_traits<MachineInstr>::addNodeToList(MachineInstr* N) {
+ assert(N->getParent() == 0 && "machine instruction already in a basic block");
+ N->setParent(Parent);
+
+ // Add the instruction's register operands to their corresponding
+ // use/def lists.
+ MachineFunction *MF = Parent->getParent();
+ N->AddRegOperandsToUseLists(MF->getRegInfo());
+
+ LeakDetector::removeGarbageObject(N);
+}
+
+/// removeNodeFromList (MI) - When we remove an instruction from a basic block
+/// list, we update its parent pointer and remove its operands from reg use/def
+/// lists if appropriate.
+void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr* N) {
+ assert(N->getParent() != 0 && "machine instruction not in a basic block");
+
+ // Remove from the use/def lists.
+ N->RemoveRegOperandsFromUseLists();
+
+ N->setParent(0);
+
+ LeakDetector::addGarbageObject(N);
+}
+
+/// transferNodesFromList (MI) - When moving a range of instructions from one
+/// MBB list to another, we need to update the parent pointers and the use/def
+/// lists.
+void ilist_traits<MachineInstr>::transferNodesFromList(
+ ilist_traits<MachineInstr>& fromList,
+ MachineBasicBlock::iterator first,
+ MachineBasicBlock::iterator last) {
+ assert(Parent->getParent() == fromList.Parent->getParent() &&
+ "MachineInstr parent mismatch!");
+
+ // Splice within the same MBB -> no change.
+ if (Parent == fromList.Parent) return;
+
+ // If splicing between two blocks within the same function, just update the
+ // parent pointers.
+ for (; first != last; ++first)
+ first->setParent(Parent);
+}
+
+void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) {
+ assert(!MI->getParent() && "MI is still in a block!");
+ Parent->getParent()->DeleteMachineInstr(MI);
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
+ iterator I = end();
+ while (I != begin() && (--I)->getDesc().isTerminator())
+ ; /*noop */
+ if (I != end() && !I->getDesc().isTerminator()) ++I;
+ return I;
+}
+
+bool
+MachineBasicBlock::isOnlyReachableByFallthrough() const {
+ return !isLandingPad() &&
+ !pred_empty() &&
+ next(pred_begin()) == pred_end() &&
+ (*pred_begin())->isLayoutSuccessor(this) &&
+ ((*pred_begin())->empty() ||
+ !(*pred_begin())->back().getDesc().isBarrier());
+}
+
+void MachineBasicBlock::dump() const {
+ print(*cerr.stream());
+}
+
+static inline void OutputReg(std::ostream &os, unsigned RegNo,
+ const TargetRegisterInfo *TRI = 0) {
+ if (!RegNo || TargetRegisterInfo::isPhysicalRegister(RegNo)) {
+ if (TRI)
+ os << " %" << TRI->get(RegNo).Name;
+ else
+ os << " %mreg(" << RegNo << ")";
+ } else
+ os << " %reg" << RegNo;
+}
+
+void MachineBasicBlock::print(std::ostream &OS) const {
+ const MachineFunction *MF = getParent();
+ if(!MF) {
+ OS << "Can't print out MachineBasicBlock because parent MachineFunction"
+ << " is null\n";
+ return;
+ }
+
+ const BasicBlock *LBB = getBasicBlock();
+ OS << "\n";
+ if (LBB) OS << LBB->getName() << ": ";
+ OS << (const void*)this
+ << ", LLVM BB @" << (const void*) LBB << ", ID#" << getNumber();
+ if (Alignment) OS << ", Alignment " << Alignment;
+ if (isLandingPad()) OS << ", EH LANDING PAD";
+ OS << ":\n";
+
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ if (!livein_empty()) {
+ OS << "Live Ins:";
+ for (const_livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
+ OutputReg(OS, *I, TRI);
+ OS << "\n";
+ }
+ // Print the preds of this block according to the CFG.
+ if (!pred_empty()) {
+ OS << " Predecessors according to CFG:";
+ for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI)
+ OS << " " << *PI << " (#" << (*PI)->getNumber() << ")";
+ OS << "\n";
+ }
+
+ for (const_iterator I = begin(); I != end(); ++I) {
+ OS << "\t";
+ I->print(OS, &getParent()->getTarget());
+ }
+
+ // Print the successors of this block according to the CFG.
+ if (!succ_empty()) {
+ OS << " Successors according to CFG:";
+ for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI)
+ OS << " " << *SI << " (#" << (*SI)->getNumber() << ")";
+ OS << "\n";
+ }
+}
+
+void MachineBasicBlock::removeLiveIn(unsigned Reg) {
+ livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
+ assert(I != livein_end() && "Not a live in!");
+ LiveIns.erase(I);
+}
+
+bool MachineBasicBlock::isLiveIn(unsigned Reg) const {
+ const_livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
+ return I != livein_end();
+}
+
+void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) {
+ getParent()->splice(NewAfter, this);
+}
+
+void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
+ MachineFunction::iterator BBI = NewBefore;
+ getParent()->splice(++BBI, this);
+}
+
+
+void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ) {
+ Successors.push_back(succ);
+ succ->addPredecessor(this);
+}
+
+void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) {
+ succ->removePredecessor(this);
+ succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
+ assert(I != Successors.end() && "Not a current successor!");
+ Successors.erase(I);
+}
+
+MachineBasicBlock::succ_iterator
+MachineBasicBlock::removeSuccessor(succ_iterator I) {
+ assert(I != Successors.end() && "Not a current successor!");
+ (*I)->removePredecessor(this);
+ return Successors.erase(I);
+}
+
+void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) {
+ Predecessors.push_back(pred);
+}
+
+void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) {
+ std::vector<MachineBasicBlock *>::iterator I =
+ std::find(Predecessors.begin(), Predecessors.end(), pred);
+ assert(I != Predecessors.end() && "Pred is not a predecessor of this block!");
+ Predecessors.erase(I);
+}
+
+void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB)
+{
+ if (this == fromMBB)
+ return;
+
+ for(MachineBasicBlock::succ_iterator iter = fromMBB->succ_begin(),
+ end = fromMBB->succ_end(); iter != end; ++iter) {
+ addSuccessor(*iter);
+ }
+ while(!fromMBB->succ_empty())
+ fromMBB->removeSuccessor(fromMBB->succ_begin());
+}
+
+bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
+ std::vector<MachineBasicBlock *>::const_iterator I =
+ std::find(Successors.begin(), Successors.end(), MBB);
+ return I != Successors.end();
+}
+
+bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
+ MachineFunction::const_iterator I(this);
+ return next(I) == MachineFunction::const_iterator(MBB);
+}
+
+/// removeFromParent - This method unlinks 'this' from the containing function,
+/// and returns it, but does not delete it.
+MachineBasicBlock *MachineBasicBlock::removeFromParent() {
+ assert(getParent() && "Not embedded in a function!");
+ getParent()->remove(this);
+ return this;
+}
+
+
+/// eraseFromParent - This method unlinks 'this' from the containing function,
+/// and deletes it.
+void MachineBasicBlock::eraseFromParent() {
+ assert(getParent() && "Not embedded in a function!");
+ getParent()->erase(this);
+}
+
+
+/// ReplaceUsesOfBlockWith - Given a machine basic block that branched to
+/// 'Old', change the code and CFG so that it branches to 'New' instead.
+void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Cannot replace self with self!");
+
+ MachineBasicBlock::iterator I = end();
+ while (I != begin()) {
+ --I;
+ if (!I->getDesc().isTerminator()) break;
+
+ // Scan the operands of this machine instruction, replacing any uses of Old
+ // with New.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (I->getOperand(i).isMBB() &&
+ I->getOperand(i).getMBB() == Old)
+ I->getOperand(i).setMBB(New);
+ }
+
+ // Update the successor information.
+ removeSuccessor(Old);
+ addSuccessor(New);
+}
+
+/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the
+/// CFG to be inserted. If we have proven that MBB can only branch to DestA and
+/// DestB, remove any other MBB successors from the CFG. DestA and DestB can
+/// be null.
+/// Besides DestA and DestB, retain other edges leading to LandingPads
+/// (currently there can be only one; we don't check or require that here).
+/// Note it is possible that DestA and/or DestB are LandingPads.
+bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
+ MachineBasicBlock *DestB,
+ bool isCond) {
+ bool MadeChange = false;
+ bool AddedFallThrough = false;
+
+ MachineFunction::iterator FallThru = next(MachineFunction::iterator(this));
+
+ // If this block ends with a conditional branch that falls through to its
+ // successor, set DestB as the successor.
+ if (isCond) {
+ if (DestB == 0 && FallThru != getParent()->end()) {
+ DestB = FallThru;
+ AddedFallThrough = true;
+ }
+ } else {
+ // If this is an unconditional branch with no explicit dest, it must just be
+ // a fallthrough into DestB.
+ if (DestA == 0 && FallThru != getParent()->end()) {
+ DestA = FallThru;
+ AddedFallThrough = true;
+ }
+ }
+
+ MachineBasicBlock::succ_iterator SI = succ_begin();
+ MachineBasicBlock *OrigDestA = DestA, *OrigDestB = DestB;
+ while (SI != succ_end()) {
+ if (*SI == DestA && DestA == DestB) {
+ DestA = DestB = 0;
+ ++SI;
+ } else if (*SI == DestA) {
+ DestA = 0;
+ ++SI;
+ } else if (*SI == DestB) {
+ DestB = 0;
+ ++SI;
+ } else if ((*SI)->isLandingPad() &&
+ *SI!=OrigDestA && *SI!=OrigDestB) {
+ ++SI;
+ } else {
+ // Otherwise, this is a superfluous edge, remove it.
+ SI = removeSuccessor(SI);
+ MadeChange = true;
+ }
+ }
+ if (!AddedFallThrough) {
+ assert(DestA == 0 && DestB == 0 &&
+ "MachineCFG is missing edges!");
+ } else if (isCond) {
+ assert(DestA == 0 && "MachineCFG is missing edges!");
+ }
+ return MadeChange;
+}
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
new file mode 100644
index 0000000..37c8601
--- /dev/null
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -0,0 +1,53 @@
+//===- MachineDominators.cpp - Machine Dominator Calculation --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements simple dominator construction algorithms for finding
+// forward dominators on machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+TEMPLATE_INSTANTIATION(class DomTreeNodeBase<MachineBasicBlock>);
+TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>);
+
+char MachineDominatorTree::ID = 0;
+
+static RegisterPass<MachineDominatorTree>
+E("machinedomtree", "MachineDominator Tree Construction", true);
+
+const PassInfo *const llvm::MachineDominatorsID = &E;
+
+void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ DT->recalculate(F);
+
+ return false;
+}
+
+MachineDominatorTree::MachineDominatorTree()
+ : MachineFunctionPass(&ID) {
+ DT = new DominatorTreeBase<MachineBasicBlock>(false);
+}
+
+MachineDominatorTree::~MachineDominatorTree() {
+ DT->releaseMemory();
+ delete DT;
+}
+
+void MachineDominatorTree::releaseMemory() {
+ DT->releaseMemory();
+}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
new file mode 100644
index 0000000..cacfed1
--- /dev/null
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -0,0 +1,598 @@
+//===-- MachineFunction.cpp -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect native machine code information for a function. This allows
+// target-specific information about the generated code to be stored with each
+// function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Config/config.h"
+#include <fstream>
+#include <sstream>
+using namespace llvm;
+
+bool MachineFunctionPass::runOnFunction(Function &F) {
+ // Do not codegen any 'available_externally' functions at all, they have
+ // definitions outside the translation unit.
+ if (F.hasAvailableExternallyLinkage())
+ return false;
+
+ return runOnMachineFunction(MachineFunction::get(&F));
+}
+
+namespace {
+ struct VISIBILITY_HIDDEN Printer : public MachineFunctionPass {
+ static char ID;
+
+ std::ostream *OS;
+ const std::string Banner;
+
+ Printer (std::ostream *os, const std::string &banner)
+ : MachineFunctionPass(&ID), OS(os), Banner(banner) {}
+
+ const char *getPassName() const { return "MachineFunction Printer"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) {
+ (*OS) << Banner;
+ MF.print (*OS);
+ return false;
+ }
+ };
+ char Printer::ID = 0;
+}
+
+/// Returns a newly-created MachineFunction Printer pass. The default output
+/// stream is std::cerr; the default banner is empty.
+///
+FunctionPass *llvm::createMachineFunctionPrinterPass(std::ostream *OS,
+ const std::string &Banner){
+ return new Printer(OS, Banner);
+}
+
+namespace {
+ struct VISIBILITY_HIDDEN Deleter : public MachineFunctionPass {
+ static char ID;
+ Deleter() : MachineFunctionPass(&ID) {}
+
+ const char *getPassName() const { return "Machine Code Deleter"; }
+
+ bool runOnMachineFunction(MachineFunction &MF) {
+ // Delete the annotation from the function now.
+ MachineFunction::destruct(MF.getFunction());
+ return true;
+ }
+ };
+ char Deleter::ID = 0;
+}
+
+/// MachineCodeDeletion Pass - This pass deletes all of the machine code for
+/// the current function, which should happen after the function has been
+/// emitted to a .s file or to memory.
+FunctionPass *llvm::createMachineCodeDeleter() {
+ return new Deleter();
+}
+
+
+
+//===---------------------------------------------------------------------===//
+// MachineFunction implementation
+//===---------------------------------------------------------------------===//
+
+void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
+ MBB->getParent()->DeleteMachineBasicBlock(MBB);
+}
+
+MachineFunction::MachineFunction(const Function *F,
+ const TargetMachine &TM)
+ : Annotation(AnnotationManager::getID("CodeGen::MachineCodeForFunction")),
+ Fn(F), Target(TM) {
+ if (TM.getRegisterInfo())
+ RegInfo = new (Allocator.Allocate<MachineRegisterInfo>())
+ MachineRegisterInfo(*TM.getRegisterInfo());
+ else
+ RegInfo = 0;
+ MFInfo = 0;
+ FrameInfo = new (Allocator.Allocate<MachineFrameInfo>())
+ MachineFrameInfo(*TM.getFrameInfo());
+ ConstantPool = new (Allocator.Allocate<MachineConstantPool>())
+ MachineConstantPool(TM.getTargetData());
+
+ // Set up jump table.
+ const TargetData &TD = *TM.getTargetData();
+ bool IsPic = TM.getRelocationModel() == Reloc::PIC_;
+ unsigned EntrySize = IsPic ? 4 : TD.getPointerSize();
+ unsigned Alignment = IsPic ? TD.getABITypeAlignment(Type::Int32Ty)
+ : TD.getPointerABIAlignment();
+ JumpTableInfo = new (Allocator.Allocate<MachineJumpTableInfo>())
+ MachineJumpTableInfo(EntrySize, Alignment);
+}
+
+MachineFunction::~MachineFunction() {
+ BasicBlocks.clear();
+ InstructionRecycler.clear(Allocator);
+ BasicBlockRecycler.clear(Allocator);
+ if (RegInfo)
+ RegInfo->~MachineRegisterInfo(); Allocator.Deallocate(RegInfo);
+ if (MFInfo) {
+ MFInfo->~MachineFunctionInfo(); Allocator.Deallocate(MFInfo);
+ }
+ FrameInfo->~MachineFrameInfo(); Allocator.Deallocate(FrameInfo);
+ ConstantPool->~MachineConstantPool(); Allocator.Deallocate(ConstantPool);
+ JumpTableInfo->~MachineJumpTableInfo(); Allocator.Deallocate(JumpTableInfo);
+}
+
+
+/// RenumberBlocks - This discards all of the MachineBasicBlock numbers and
+/// recomputes them. This guarantees that the MBB numbers are sequential,
+/// dense, and match the ordering of the blocks within the function. If a
+/// specific MachineBasicBlock is specified, only that block and those after
+/// it are renumbered.
+void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
+ if (empty()) { MBBNumbering.clear(); return; }
+ MachineFunction::iterator MBBI, E = end();
+ if (MBB == 0)
+ MBBI = begin();
+ else
+ MBBI = MBB;
+
+ // Figure out the block number this should have.
+ unsigned BlockNo = 0;
+ if (MBBI != begin())
+ BlockNo = prior(MBBI)->getNumber()+1;
+
+ for (; MBBI != E; ++MBBI, ++BlockNo) {
+ if (MBBI->getNumber() != (int)BlockNo) {
+ // Remove use of the old number.
+ if (MBBI->getNumber() != -1) {
+ assert(MBBNumbering[MBBI->getNumber()] == &*MBBI &&
+ "MBB number mismatch!");
+ MBBNumbering[MBBI->getNumber()] = 0;
+ }
+
+ // If BlockNo is already taken, set that block's number to -1.
+ if (MBBNumbering[BlockNo])
+ MBBNumbering[BlockNo]->setNumber(-1);
+
+ MBBNumbering[BlockNo] = MBBI;
+ MBBI->setNumber(BlockNo);
+ }
+ }
+
+ // Okay, all the blocks are renumbered. If we have compactified the block
+ // numbering, shrink MBBNumbering now.
+ assert(BlockNo <= MBBNumbering.size() && "Mismatch!");
+ MBBNumbering.resize(BlockNo);
+}
+
+/// CreateMachineInstr - Allocate a new MachineInstr. Use this instead
+/// of `new MachineInstr'.
+///
+MachineInstr *
+MachineFunction::CreateMachineInstr(const TargetInstrDesc &TID,
+ DebugLoc DL, bool NoImp) {
+ return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
+ MachineInstr(TID, DL, NoImp);
+}
+
+/// CloneMachineInstr - Create a new MachineInstr which is a copy of the
+/// 'Orig' instruction, identical in all ways except the the instruction
+/// has no parent, prev, or next.
+///
+MachineInstr *
+MachineFunction::CloneMachineInstr(const MachineInstr *Orig) {
+ return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
+ MachineInstr(*this, *Orig);
+}
+
+/// DeleteMachineInstr - Delete the given MachineInstr.
+///
+void
+MachineFunction::DeleteMachineInstr(MachineInstr *MI) {
+ // Clear the instructions memoperands. This must be done manually because
+ // the instruction's parent pointer is now null, so it can't properly
+ // deallocate them on its own.
+ MI->clearMemOperands(*this);
+
+ MI->~MachineInstr();
+ InstructionRecycler.Deallocate(Allocator, MI);
+}
+
+/// CreateMachineBasicBlock - Allocate a new MachineBasicBlock. Use this
+/// instead of `new MachineBasicBlock'.
+///
+MachineBasicBlock *
+MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) {
+ return new (BasicBlockRecycler.Allocate<MachineBasicBlock>(Allocator))
+ MachineBasicBlock(*this, bb);
+}
+
+/// DeleteMachineBasicBlock - Delete the given MachineBasicBlock.
+///
+void
+MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
+ assert(MBB->getParent() == this && "MBB parent mismatch!");
+ MBB->~MachineBasicBlock();
+ BasicBlockRecycler.Deallocate(Allocator, MBB);
+}
+
+void MachineFunction::dump() const {
+ print(*cerr.stream());
+}
+
+void MachineFunction::print(std::ostream &OS) const {
+ OS << "# Machine code for " << Fn->getName () << "():\n";
+
+ // Print Frame Information
+ FrameInfo->print(*this, OS);
+
+ // Print JumpTable Information
+ JumpTableInfo->print(OS);
+
+ // Print Constant Pool
+ {
+ raw_os_ostream OSS(OS);
+ ConstantPool->print(OSS);
+ }
+
+ const TargetRegisterInfo *TRI = getTarget().getRegisterInfo();
+
+ if (RegInfo && !RegInfo->livein_empty()) {
+ OS << "Live Ins:";
+ for (MachineRegisterInfo::livein_iterator
+ I = RegInfo->livein_begin(), E = RegInfo->livein_end(); I != E; ++I) {
+ if (TRI)
+ OS << " " << TRI->getName(I->first);
+ else
+ OS << " Reg #" << I->first;
+
+ if (I->second)
+ OS << " in VR#" << I->second << " ";
+ }
+ OS << "\n";
+ }
+ if (RegInfo && !RegInfo->liveout_empty()) {
+ OS << "Live Outs:";
+ for (MachineRegisterInfo::liveout_iterator
+ I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I)
+ if (TRI)
+ OS << " " << TRI->getName(*I);
+ else
+ OS << " Reg #" << *I;
+ OS << "\n";
+ }
+
+ for (const_iterator BB = begin(); BB != end(); ++BB)
+ BB->print(OS);
+
+ OS << "\n# End machine code for " << Fn->getName () << "().\n\n";
+}
+
+/// CFGOnly flag - This is used to control whether or not the CFG graph printer
+/// prints out the contents of basic blocks or not. This is acceptable because
+/// this code is only really used for debugging purposes.
+///
+static bool CFGOnly = false;
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits {
+ static std::string getGraphName(const MachineFunction *F) {
+ return "CFG for '" + F->getFunction()->getName() + "' function";
+ }
+
+ static std::string getNodeLabel(const MachineBasicBlock *Node,
+ const MachineFunction *Graph) {
+ if (CFGOnly && Node->getBasicBlock() &&
+ !Node->getBasicBlock()->getName().empty())
+ return Node->getBasicBlock()->getName() + ":";
+
+ std::ostringstream Out;
+ if (CFGOnly) {
+ Out << Node->getNumber() << ':';
+ return Out.str();
+ }
+
+ Node->print(Out);
+
+ std::string OutStr = Out.str();
+ if (OutStr[0] == '\n') OutStr.erase(OutStr.begin());
+
+ // Process string output to make it nicer...
+ for (unsigned i = 0; i != OutStr.length(); ++i)
+ if (OutStr[i] == '\n') { // Left justify
+ OutStr[i] = '\\';
+ OutStr.insert(OutStr.begin()+i+1, 'l');
+ }
+ return OutStr;
+ }
+ };
+}
+
+void MachineFunction::viewCFG() const
+{
+#ifndef NDEBUG
+ ViewGraph(this, "mf" + getFunction()->getName());
+#else
+ cerr << "SelectionDAG::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+void MachineFunction::viewCFGOnly() const
+{
+ CFGOnly = true;
+ viewCFG();
+ CFGOnly = false;
+}
+
+// The next two methods are used to construct and to retrieve
+// the MachineCodeForFunction object for the given function.
+// construct() -- Allocates and initializes for a given function and target
+// get() -- Returns a handle to the object.
+// This should not be called before "construct()"
+// for a given Function.
+//
+MachineFunction&
+MachineFunction::construct(const Function *Fn, const TargetMachine &Tar)
+{
+ AnnotationID MF_AID =
+ AnnotationManager::getID("CodeGen::MachineCodeForFunction");
+ assert(Fn->getAnnotation(MF_AID) == 0 &&
+ "Object already exists for this function!");
+ MachineFunction* mcInfo = new MachineFunction(Fn, Tar);
+ Fn->addAnnotation(mcInfo);
+ return *mcInfo;
+}
+
+void MachineFunction::destruct(const Function *Fn) {
+ AnnotationID MF_AID =
+ AnnotationManager::getID("CodeGen::MachineCodeForFunction");
+ bool Deleted = Fn->deleteAnnotation(MF_AID);
+ assert(Deleted && "Machine code did not exist for function!");
+ Deleted = Deleted; // silence warning when no assertions.
+}
+
+MachineFunction& MachineFunction::get(const Function *F)
+{
+ AnnotationID MF_AID =
+ AnnotationManager::getID("CodeGen::MachineCodeForFunction");
+ MachineFunction *mc = (MachineFunction*)F->getAnnotation(MF_AID);
+ assert(mc && "Call construct() method first to allocate the object");
+ return *mc;
+}
+
+/// addLiveIn - Add the specified physical register as a live-in value and
+/// create a corresponding virtual register for it.
+unsigned MachineFunction::addLiveIn(unsigned PReg,
+ const TargetRegisterClass *RC) {
+ assert(RC->contains(PReg) && "Not the correct regclass!");
+ unsigned VReg = getRegInfo().createVirtualRegister(RC);
+ getRegInfo().addLiveIn(PReg, VReg);
+ return VReg;
+}
+
+/// getOrCreateDebugLocID - Look up the DebugLocTuple index with the given
+/// source file, line, and column. If none currently exists, create a new
+/// DebugLocTuple, and insert it into the DebugIdMap.
+unsigned MachineFunction::getOrCreateDebugLocID(GlobalVariable *CompileUnit,
+ unsigned Line, unsigned Col) {
+ DebugLocTuple Tuple(CompileUnit, Line, Col);
+ DenseMap<DebugLocTuple, unsigned>::iterator II
+ = DebugLocInfo.DebugIdMap.find(Tuple);
+ if (II != DebugLocInfo.DebugIdMap.end())
+ return II->second;
+ // Add a new tuple.
+ unsigned Id = DebugLocInfo.DebugLocations.size();
+ DebugLocInfo.DebugLocations.push_back(Tuple);
+ DebugLocInfo.DebugIdMap[Tuple] = Id;
+ return Id;
+}
+
+/// getDebugLocTuple - Get the DebugLocTuple for a given DebugLoc object.
+DebugLocTuple MachineFunction::getDebugLocTuple(DebugLoc DL) const {
+ unsigned Idx = DL.getIndex();
+ assert(Idx < DebugLocInfo.DebugLocations.size() &&
+ "Invalid index into debug locations!");
+ return DebugLocInfo.DebugLocations[Idx];
+}
+
+//===----------------------------------------------------------------------===//
+// MachineFrameInfo implementation
+//===----------------------------------------------------------------------===//
+
+/// CreateFixedObject - Create a new object at a fixed location on the stack.
+/// All fixed objects should be created before other objects are created for
+/// efficiency. By default, fixed objects are immutable. This returns an
+/// index with a negative value.
+///
+int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
+ bool Immutable) {
+ assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
+ Objects.insert(Objects.begin(), StackObject(Size, 1, SPOffset, Immutable));
+ return -++NumFixedObjects;
+}
+
+
+void MachineFrameInfo::print(const MachineFunction &MF, std::ostream &OS) const{
+ const TargetFrameInfo *FI = MF.getTarget().getFrameInfo();
+ int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
+
+ for (unsigned i = 0, e = Objects.size(); i != e; ++i) {
+ const StackObject &SO = Objects[i];
+ OS << " <fi#" << (int)(i-NumFixedObjects) << ">: ";
+ if (SO.Size == ~0ULL) {
+ OS << "dead\n";
+ continue;
+ }
+ if (SO.Size == 0)
+ OS << "variable sized";
+ else
+ OS << "size is " << SO.Size << " byte" << (SO.Size != 1 ? "s," : ",");
+ OS << " alignment is " << SO.Alignment << " byte"
+ << (SO.Alignment != 1 ? "s," : ",");
+
+ if (i < NumFixedObjects)
+ OS << " fixed";
+ if (i < NumFixedObjects || SO.SPOffset != -1) {
+ int64_t Off = SO.SPOffset - ValOffset;
+ OS << " at location [SP";
+ if (Off > 0)
+ OS << "+" << Off;
+ else if (Off < 0)
+ OS << Off;
+ OS << "]";
+ }
+ OS << "\n";
+ }
+
+ if (HasVarSizedObjects)
+ OS << " Stack frame contains variable sized objects\n";
+}
+
+void MachineFrameInfo::dump(const MachineFunction &MF) const {
+ print(MF, *cerr.stream());
+}
+
+
+//===----------------------------------------------------------------------===//
+// MachineJumpTableInfo implementation
+//===----------------------------------------------------------------------===//
+
+/// getJumpTableIndex - Create a new jump table entry in the jump table info
+/// or return an existing one.
+///
+unsigned MachineJumpTableInfo::getJumpTableIndex(
+ const std::vector<MachineBasicBlock*> &DestBBs) {
+ assert(!DestBBs.empty() && "Cannot create an empty jump table!");
+ for (unsigned i = 0, e = JumpTables.size(); i != e; ++i)
+ if (JumpTables[i].MBBs == DestBBs)
+ return i;
+
+ JumpTables.push_back(MachineJumpTableEntry(DestBBs));
+ return JumpTables.size()-1;
+}
+
+/// ReplaceMBBInJumpTables - If Old is the target of any jump tables, update
+/// the jump tables to branch to New instead.
+bool
+MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Not making a change?");
+ bool MadeChange = false;
+ for (size_t i = 0, e = JumpTables.size(); i != e; ++i) {
+ MachineJumpTableEntry &JTE = JumpTables[i];
+ for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j)
+ if (JTE.MBBs[j] == Old) {
+ JTE.MBBs[j] = New;
+ MadeChange = true;
+ }
+ }
+ return MadeChange;
+}
+
+void MachineJumpTableInfo::print(std::ostream &OS) const {
+ // FIXME: this is lame, maybe we could print out the MBB numbers or something
+ // like {1, 2, 4, 5, 3, 0}
+ for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) {
+ OS << " <jt#" << i << "> has " << JumpTables[i].MBBs.size()
+ << " entries\n";
+ }
+}
+
+void MachineJumpTableInfo::dump() const { print(*cerr.stream()); }
+
+
+//===----------------------------------------------------------------------===//
+// MachineConstantPool implementation
+//===----------------------------------------------------------------------===//
+
+const Type *MachineConstantPoolEntry::getType() const {
+ if (isMachineConstantPoolEntry())
+ return Val.MachineCPVal->getType();
+ return Val.ConstVal->getType();
+}
+
+MachineConstantPool::~MachineConstantPool() {
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i)
+ if (Constants[i].isMachineConstantPoolEntry())
+ delete Constants[i].Val.MachineCPVal;
+}
+
+/// getConstantPoolIndex - Create a new entry in the constant pool or return
+/// an existing one. User must specify the log2 of the minimum required
+/// alignment for the object.
+///
+unsigned MachineConstantPool::getConstantPoolIndex(Constant *C,
+ unsigned Alignment) {
+ assert(Alignment && "Alignment must be specified!");
+ if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+
+ // Check to see if we already have this constant.
+ //
+ // FIXME, this could be made much more efficient for large constant pools.
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i)
+ if (Constants[i].Val.ConstVal == C &&
+ (Constants[i].getAlignment() & (Alignment - 1)) == 0)
+ return i;
+
+ Constants.push_back(MachineConstantPoolEntry(C, Alignment));
+ return Constants.size()-1;
+}
+
+unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V,
+ unsigned Alignment) {
+ assert(Alignment && "Alignment must be specified!");
+ if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+
+ // Check to see if we already have this constant.
+ //
+ // FIXME, this could be made much more efficient for large constant pools.
+ int Idx = V->getExistingMachineCPValue(this, Alignment);
+ if (Idx != -1)
+ return (unsigned)Idx;
+
+ Constants.push_back(MachineConstantPoolEntry(V, Alignment));
+ return Constants.size()-1;
+}
+
+void MachineConstantPool::print(raw_ostream &OS) const {
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ OS << " <cp#" << i << "> is";
+ if (Constants[i].isMachineConstantPoolEntry())
+ Constants[i].Val.MachineCPVal->print(OS);
+ else
+ OS << *(Value*)Constants[i].Val.ConstVal;
+ OS << " , alignment=" << Constants[i].getAlignment();
+ OS << "\n";
+ }
+}
+
+void MachineConstantPool::dump() const { print(errs()); }
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
new file mode 100644
index 0000000..b8c8563
--- /dev/null
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -0,0 +1,1105 @@
+//===-- lib/CodeGen/MachineInstr.cpp --------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Methods common to all machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Constants.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Value.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetInstrDesc.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/FoldingSet.h"
+#include <ostream>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineOperand Implementation
+//===----------------------------------------------------------------------===//
+
+/// AddRegOperandToRegInfo - Add this register operand to the specified
+/// MachineRegisterInfo. If it is null, then the next/prev fields should be
+/// explicitly nulled out.
+void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) {
+ assert(isReg() && "Can only add reg operand to use lists");
+
+ // If the reginfo pointer is null, just explicitly null out or next/prev
+ // pointers, to ensure they are not garbage.
+ if (RegInfo == 0) {
+ Contents.Reg.Prev = 0;
+ Contents.Reg.Next = 0;
+ return;
+ }
+
+ // Otherwise, add this operand to the head of the registers use/def list.
+ MachineOperand **Head = &RegInfo->getRegUseDefListHead(getReg());
+
+ // For SSA values, we prefer to keep the definition at the start of the list.
+ // we do this by skipping over the definition if it is at the head of the
+ // list.
+ if (*Head && (*Head)->isDef())
+ Head = &(*Head)->Contents.Reg.Next;
+
+ Contents.Reg.Next = *Head;
+ if (Contents.Reg.Next) {
+ assert(getReg() == Contents.Reg.Next->getReg() &&
+ "Different regs on the same list!");
+ Contents.Reg.Next->Contents.Reg.Prev = &Contents.Reg.Next;
+ }
+
+ Contents.Reg.Prev = Head;
+ *Head = this;
+}
+
+/// RemoveRegOperandFromRegInfo - Remove this register operand from the
+/// MachineRegisterInfo it is linked with.
+void MachineOperand::RemoveRegOperandFromRegInfo() {
+ assert(isOnRegUseList() && "Reg operand is not on a use list");
+ // Unlink this from the doubly linked list of operands.
+ MachineOperand *NextOp = Contents.Reg.Next;
+ *Contents.Reg.Prev = NextOp;
+ if (NextOp) {
+ assert(NextOp->getReg() == getReg() && "Corrupt reg use/def chain!");
+ NextOp->Contents.Reg.Prev = Contents.Reg.Prev;
+ }
+ Contents.Reg.Prev = 0;
+ Contents.Reg.Next = 0;
+}
+
+void MachineOperand::setReg(unsigned Reg) {
+ if (getReg() == Reg) return; // No change.
+
+ // Otherwise, we have to change the register. If this operand is embedded
+ // into a machine function, we need to update the old and new register's
+ // use/def lists.
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent()) {
+ RemoveRegOperandFromRegInfo();
+ Contents.Reg.RegNo = Reg;
+ AddRegOperandToRegInfo(&MF->getRegInfo());
+ return;
+ }
+
+ // Otherwise, just change the register, no problem. :)
+ Contents.Reg.RegNo = Reg;
+}
+
+/// ChangeToImmediate - Replace this operand with a new immediate operand of
+/// the specified value. If an operand is known to be an immediate already,
+/// the setImm method should be used.
+void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
+ // If this operand is currently a register operand, and if this is in a
+ // function, deregister the operand from the register's use/def list.
+ if (isReg() && getParent() && getParent()->getParent() &&
+ getParent()->getParent()->getParent())
+ RemoveRegOperandFromRegInfo();
+
+ OpKind = MO_Immediate;
+ Contents.ImmVal = ImmVal;
+}
+
+/// ChangeToRegister - Replace this operand with a new register operand of
+/// the specified value. If an operand is known to be an register already,
+/// the setReg method should be used.
+void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
+ bool isKill, bool isDead) {
+ // If this operand is already a register operand, use setReg to update the
+ // register's use/def lists.
+ if (isReg()) {
+ assert(!isEarlyClobber());
+ setReg(Reg);
+ } else {
+ // Otherwise, change this to a register and set the reg#.
+ OpKind = MO_Register;
+ Contents.Reg.RegNo = Reg;
+
+ // If this operand is embedded in a function, add the operand to the
+ // register's use/def list.
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent())
+ AddRegOperandToRegInfo(&MF->getRegInfo());
+ }
+
+ IsDef = isDef;
+ IsImp = isImp;
+ IsKill = isKill;
+ IsDead = isDead;
+ IsEarlyClobber = false;
+ SubReg = 0;
+}
+
+/// isIdenticalTo - Return true if this operand is identical to the specified
+/// operand.
+bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
+ if (getType() != Other.getType()) return false;
+
+ switch (getType()) {
+ default: assert(0 && "Unrecognized operand type");
+ case MachineOperand::MO_Register:
+ return getReg() == Other.getReg() && isDef() == Other.isDef() &&
+ getSubReg() == Other.getSubReg();
+ case MachineOperand::MO_Immediate:
+ return getImm() == Other.getImm();
+ case MachineOperand::MO_FPImmediate:
+ return getFPImm() == Other.getFPImm();
+ case MachineOperand::MO_MachineBasicBlock:
+ return getMBB() == Other.getMBB();
+ case MachineOperand::MO_FrameIndex:
+ return getIndex() == Other.getIndex();
+ case MachineOperand::MO_ConstantPoolIndex:
+ return getIndex() == Other.getIndex() && getOffset() == Other.getOffset();
+ case MachineOperand::MO_JumpTableIndex:
+ return getIndex() == Other.getIndex();
+ case MachineOperand::MO_GlobalAddress:
+ return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset();
+ case MachineOperand::MO_ExternalSymbol:
+ return !strcmp(getSymbolName(), Other.getSymbolName()) &&
+ getOffset() == Other.getOffset();
+ }
+}
+
+/// print - Print the specified machine operand.
+///
+void MachineOperand::print(std::ostream &OS, const TargetMachine *TM) const {
+ raw_os_ostream RawOS(OS);
+ print(RawOS, TM);
+}
+
+void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
+ switch (getType()) {
+ case MachineOperand::MO_Register:
+ if (getReg() == 0 || TargetRegisterInfo::isVirtualRegister(getReg())) {
+ OS << "%reg" << getReg();
+ } else {
+ // If the instruction is embedded into a basic block, we can find the
+ // target info for the instruction.
+ if (TM == 0)
+ if (const MachineInstr *MI = getParent())
+ if (const MachineBasicBlock *MBB = MI->getParent())
+ if (const MachineFunction *MF = MBB->getParent())
+ TM = &MF->getTarget();
+
+ if (TM)
+ OS << "%" << TM->getRegisterInfo()->get(getReg()).Name;
+ else
+ OS << "%mreg" << getReg();
+ }
+
+ if (getSubReg() != 0) {
+ OS << ":" << getSubReg();
+ }
+
+ if (isDef() || isKill() || isDead() || isImplicit() || isEarlyClobber()) {
+ OS << "<";
+ bool NeedComma = false;
+ if (isImplicit()) {
+ if (NeedComma) OS << ",";
+ OS << (isDef() ? "imp-def" : "imp-use");
+ NeedComma = true;
+ } else if (isDef()) {
+ if (NeedComma) OS << ",";
+ if (isEarlyClobber())
+ OS << "earlyclobber,";
+ OS << "def";
+ NeedComma = true;
+ }
+ if (isKill() || isDead()) {
+ if (NeedComma) OS << ",";
+ if (isKill()) OS << "kill";
+ if (isDead()) OS << "dead";
+ }
+ OS << ">";
+ }
+ break;
+ case MachineOperand::MO_Immediate:
+ OS << getImm();
+ break;
+ case MachineOperand::MO_FPImmediate:
+ if (getFPImm()->getType() == Type::FloatTy) {
+ OS << getFPImm()->getValueAPF().convertToFloat();
+ } else {
+ OS << getFPImm()->getValueAPF().convertToDouble();
+ }
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ OS << "mbb<"
+ << ((Value*)getMBB()->getBasicBlock())->getName()
+ << "," << (void*)getMBB() << ">";
+ break;
+ case MachineOperand::MO_FrameIndex:
+ OS << "<fi#" << getIndex() << ">";
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ OS << "<cp#" << getIndex();
+ if (getOffset()) OS << "+" << getOffset();
+ OS << ">";
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ OS << "<jt#" << getIndex() << ">";
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ OS << "<ga:" << ((Value*)getGlobal())->getName();
+ if (getOffset()) OS << "+" << getOffset();
+ OS << ">";
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ OS << "<es:" << getSymbolName();
+ if (getOffset()) OS << "+" << getOffset();
+ OS << ">";
+ break;
+ default:
+ assert(0 && "Unrecognized operand type");
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// MachineMemOperand Implementation
+//===----------------------------------------------------------------------===//
+
+MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f,
+ int64_t o, uint64_t s, unsigned int a)
+ : Offset(o), Size(s), V(v),
+ Flags((f & 7) | ((Log2_32(a) + 1) << 3)) {
+ assert(isPowerOf2_32(a) && "Alignment is not a power of 2!");
+ assert((isLoad() || isStore()) && "Not a load/store!");
+}
+
+/// Profile - Gather unique data for the object.
+///
+void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(Offset);
+ ID.AddInteger(Size);
+ ID.AddPointer(V);
+ ID.AddInteger(Flags);
+}
+
+//===----------------------------------------------------------------------===//
+// MachineInstr Implementation
+//===----------------------------------------------------------------------===//
+
+/// MachineInstr ctor - This constructor creates a dummy MachineInstr with
+/// TID NULL and no operands.
+MachineInstr::MachineInstr()
+ : TID(0), NumImplicitOps(0), Parent(0), debugLoc(DebugLoc::getUnknownLoc()) {
+ // Make sure that we get added to a machine basicblock
+ LeakDetector::addGarbageObject(this);
+}
+
+void MachineInstr::addImplicitDefUseOperands() {
+ if (TID->ImplicitDefs)
+ for (const unsigned *ImpDefs = TID->ImplicitDefs; *ImpDefs; ++ImpDefs)
+ addOperand(MachineOperand::CreateReg(*ImpDefs, true, true));
+ if (TID->ImplicitUses)
+ for (const unsigned *ImpUses = TID->ImplicitUses; *ImpUses; ++ImpUses)
+ addOperand(MachineOperand::CreateReg(*ImpUses, false, true));
+}
+
+/// MachineInstr ctor - This constructor create a MachineInstr and add the
+/// implicit operands. It reserves space for number of operands specified by
+/// TargetInstrDesc or the numOperands if it is not zero. (for
+/// instructions with variable number of operands).
+MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
+ : TID(&tid), NumImplicitOps(0), Parent(0),
+ debugLoc(DebugLoc::getUnknownLoc()) {
+ if (!NoImp && TID->getImplicitDefs())
+ for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
+ NumImplicitOps++;
+ if (!NoImp && TID->getImplicitUses())
+ for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
+ NumImplicitOps++;
+ Operands.reserve(NumImplicitOps + TID->getNumOperands());
+ if (!NoImp)
+ addImplicitDefUseOperands();
+ // Make sure that we get added to a machine basicblock
+ LeakDetector::addGarbageObject(this);
+}
+
+/// MachineInstr ctor - As above, but with a DebugLoc.
+MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
+ bool NoImp)
+ : TID(&tid), NumImplicitOps(0), Parent(0), debugLoc(dl) {
+ if (!NoImp && TID->getImplicitDefs())
+ for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
+ NumImplicitOps++;
+ if (!NoImp && TID->getImplicitUses())
+ for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
+ NumImplicitOps++;
+ Operands.reserve(NumImplicitOps + TID->getNumOperands());
+ if (!NoImp)
+ addImplicitDefUseOperands();
+ // Make sure that we get added to a machine basicblock
+ LeakDetector::addGarbageObject(this);
+}
+
+/// MachineInstr ctor - Work exactly the same as the ctor two above, except
+/// that the MachineInstr is created and added to the end of the specified
+/// basic block.
+///
+MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
+ : TID(&tid), NumImplicitOps(0), Parent(0),
+ debugLoc(DebugLoc::getUnknownLoc()) {
+ assert(MBB && "Cannot use inserting ctor with null basic block!");
+ if (TID->ImplicitDefs)
+ for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
+ NumImplicitOps++;
+ if (TID->ImplicitUses)
+ for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
+ NumImplicitOps++;
+ Operands.reserve(NumImplicitOps + TID->getNumOperands());
+ addImplicitDefUseOperands();
+ // Make sure that we get added to a machine basicblock
+ LeakDetector::addGarbageObject(this);
+ MBB->push_back(this); // Add instruction to end of basic block!
+}
+
+/// MachineInstr ctor - As above, but with a DebugLoc.
+///
+MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
+ const TargetInstrDesc &tid)
+ : TID(&tid), NumImplicitOps(0), Parent(0), debugLoc(dl) {
+ assert(MBB && "Cannot use inserting ctor with null basic block!");
+ if (TID->ImplicitDefs)
+ for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
+ NumImplicitOps++;
+ if (TID->ImplicitUses)
+ for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
+ NumImplicitOps++;
+ Operands.reserve(NumImplicitOps + TID->getNumOperands());
+ addImplicitDefUseOperands();
+ // Make sure that we get added to a machine basicblock
+ LeakDetector::addGarbageObject(this);
+ MBB->push_back(this); // Add instruction to end of basic block!
+}
+
+/// MachineInstr ctor - Copies MachineInstr arg exactly
+///
+MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
+ : TID(&MI.getDesc()), NumImplicitOps(0), Parent(0),
+ debugLoc(MI.getDebugLoc()) {
+ Operands.reserve(MI.getNumOperands());
+
+ // Add operands
+ for (unsigned i = 0; i != MI.getNumOperands(); ++i)
+ addOperand(MI.getOperand(i));
+ NumImplicitOps = MI.NumImplicitOps;
+
+ // Add memory operands.
+ for (std::list<MachineMemOperand>::const_iterator i = MI.memoperands_begin(),
+ j = MI.memoperands_end(); i != j; ++i)
+ addMemOperand(MF, *i);
+
+ // Set parent to null.
+ Parent = 0;
+
+ LeakDetector::addGarbageObject(this);
+}
+
+MachineInstr::~MachineInstr() {
+ LeakDetector::removeGarbageObject(this);
+ assert(MemOperands.empty() &&
+ "MachineInstr being deleted with live memoperands!");
+#ifndef NDEBUG
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ assert(Operands[i].ParentMI == this && "ParentMI mismatch!");
+ assert((!Operands[i].isReg() || !Operands[i].isOnRegUseList()) &&
+ "Reg operand def/use list corrupted");
+ }
+#endif
+}
+
+/// getRegInfo - If this instruction is embedded into a MachineFunction,
+/// return the MachineRegisterInfo object for the current function, otherwise
+/// return null.
+MachineRegisterInfo *MachineInstr::getRegInfo() {
+ if (MachineBasicBlock *MBB = getParent())
+ return &MBB->getParent()->getRegInfo();
+ return 0;
+}
+
+/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
+/// this instruction from their respective use lists. This requires that the
+/// operands already be on their use lists.
+void MachineInstr::RemoveRegOperandsFromUseLists() {
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ if (Operands[i].isReg())
+ Operands[i].RemoveRegOperandFromRegInfo();
+ }
+}
+
+/// AddRegOperandsToUseLists - Add all of the register operands in
+/// this instruction from their respective use lists. This requires that the
+/// operands not be on their use lists yet.
+void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo) {
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ if (Operands[i].isReg())
+ Operands[i].AddRegOperandToRegInfo(&RegInfo);
+ }
+}
+
+
+/// addOperand - Add the specified operand to the instruction. If it is an
+/// implicit operand, it is added to the end of the operand list. If it is
+/// an explicit operand it is added at the end of the explicit operand list
+/// (before the first implicit operand).
+void MachineInstr::addOperand(const MachineOperand &Op) {
+ bool isImpReg = Op.isReg() && Op.isImplicit();
+ assert((isImpReg || !OperandsComplete()) &&
+ "Trying to add an operand to a machine instr that is already done!");
+
+ MachineRegisterInfo *RegInfo = getRegInfo();
+
+ // If we are adding the operand to the end of the list, our job is simpler.
+ // This is true most of the time, so this is a reasonable optimization.
+ if (isImpReg || NumImplicitOps == 0) {
+ // We can only do this optimization if we know that the operand list won't
+ // reallocate.
+ if (Operands.empty() || Operands.size()+1 <= Operands.capacity()) {
+ Operands.push_back(Op);
+
+ // Set the parent of the operand.
+ Operands.back().ParentMI = this;
+
+ // If the operand is a register, update the operand's use list.
+ if (Op.isReg())
+ Operands.back().AddRegOperandToRegInfo(RegInfo);
+ return;
+ }
+ }
+
+ // Otherwise, we have to insert a real operand before any implicit ones.
+ unsigned OpNo = Operands.size()-NumImplicitOps;
+
+ // If this instruction isn't embedded into a function, then we don't need to
+ // update any operand lists.
+ if (RegInfo == 0) {
+ // Simple insertion, no reginfo update needed for other register operands.
+ Operands.insert(Operands.begin()+OpNo, Op);
+ Operands[OpNo].ParentMI = this;
+
+ // Do explicitly set the reginfo for this operand though, to ensure the
+ // next/prev fields are properly nulled out.
+ if (Operands[OpNo].isReg())
+ Operands[OpNo].AddRegOperandToRegInfo(0);
+
+ } else if (Operands.size()+1 <= Operands.capacity()) {
+ // Otherwise, we have to remove register operands from their register use
+ // list, add the operand, then add the register operands back to their use
+ // list. This also must handle the case when the operand list reallocates
+ // to somewhere else.
+
+ // If insertion of this operand won't cause reallocation of the operand
+ // list, just remove the implicit operands, add the operand, then re-add all
+ // the rest of the operands.
+ for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
+ assert(Operands[i].isReg() && "Should only be an implicit reg!");
+ Operands[i].RemoveRegOperandFromRegInfo();
+ }
+
+ // Add the operand. If it is a register, add it to the reg list.
+ Operands.insert(Operands.begin()+OpNo, Op);
+ Operands[OpNo].ParentMI = this;
+
+ if (Operands[OpNo].isReg())
+ Operands[OpNo].AddRegOperandToRegInfo(RegInfo);
+
+ // Re-add all the implicit ops.
+ for (unsigned i = OpNo+1, e = Operands.size(); i != e; ++i) {
+ assert(Operands[i].isReg() && "Should only be an implicit reg!");
+ Operands[i].AddRegOperandToRegInfo(RegInfo);
+ }
+ } else {
+ // Otherwise, we will be reallocating the operand list. Remove all reg
+ // operands from their list, then readd them after the operand list is
+ // reallocated.
+ RemoveRegOperandsFromUseLists();
+
+ Operands.insert(Operands.begin()+OpNo, Op);
+ Operands[OpNo].ParentMI = this;
+
+ // Re-add all the operands.
+ AddRegOperandsToUseLists(*RegInfo);
+ }
+}
+
+/// RemoveOperand - Erase an operand from an instruction, leaving it with one
+/// fewer operand than it started with.
+///
+void MachineInstr::RemoveOperand(unsigned OpNo) {
+ assert(OpNo < Operands.size() && "Invalid operand number");
+
+ // Special case removing the last one.
+ if (OpNo == Operands.size()-1) {
+ // If needed, remove from the reg def/use list.
+ if (Operands.back().isReg() && Operands.back().isOnRegUseList())
+ Operands.back().RemoveRegOperandFromRegInfo();
+
+ Operands.pop_back();
+ return;
+ }
+
+ // Otherwise, we are removing an interior operand. If we have reginfo to
+ // update, remove all operands that will be shifted down from their reg lists,
+ // move everything down, then re-add them.
+ MachineRegisterInfo *RegInfo = getRegInfo();
+ if (RegInfo) {
+ for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
+ if (Operands[i].isReg())
+ Operands[i].RemoveRegOperandFromRegInfo();
+ }
+ }
+
+ Operands.erase(Operands.begin()+OpNo);
+
+ if (RegInfo) {
+ for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
+ if (Operands[i].isReg())
+ Operands[i].AddRegOperandToRegInfo(RegInfo);
+ }
+ }
+}
+
+/// addMemOperand - Add a MachineMemOperand to the machine instruction,
+/// referencing arbitrary storage.
+void MachineInstr::addMemOperand(MachineFunction &MF,
+ const MachineMemOperand &MO) {
+ MemOperands.push_back(MO);
+}
+
+/// clearMemOperands - Erase all of this MachineInstr's MachineMemOperands.
+void MachineInstr::clearMemOperands(MachineFunction &MF) {
+ MemOperands.clear();
+}
+
+
+/// removeFromParent - This method unlinks 'this' from the containing basic
+/// block, and returns it, but does not delete it.
+MachineInstr *MachineInstr::removeFromParent() {
+ assert(getParent() && "Not embedded in a basic block!");
+ getParent()->remove(this);
+ return this;
+}
+
+
+/// eraseFromParent - This method unlinks 'this' from the containing basic
+/// block, and deletes it.
+void MachineInstr::eraseFromParent() {
+ assert(getParent() && "Not embedded in a basic block!");
+ getParent()->erase(this);
+}
+
+
+/// OperandComplete - Return true if it's illegal to add a new operand
+///
+bool MachineInstr::OperandsComplete() const {
+ unsigned short NumOperands = TID->getNumOperands();
+ if (!TID->isVariadic() && getNumOperands()-NumImplicitOps >= NumOperands)
+ return true; // Broken: we have all the operands of this instruction!
+ return false;
+}
+
+/// getNumExplicitOperands - Returns the number of non-implicit operands.
+///
+unsigned MachineInstr::getNumExplicitOperands() const {
+ unsigned NumOperands = TID->getNumOperands();
+ if (!TID->isVariadic())
+ return NumOperands;
+
+ for (unsigned i = NumOperands, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isImplicit())
+ NumOperands++;
+ }
+ return NumOperands;
+}
+
+
+/// isLabel - Returns true if the MachineInstr represents a label.
+///
+bool MachineInstr::isLabel() const {
+ return getOpcode() == TargetInstrInfo::DBG_LABEL ||
+ getOpcode() == TargetInstrInfo::EH_LABEL ||
+ getOpcode() == TargetInstrInfo::GC_LABEL;
+}
+
+/// isDebugLabel - Returns true if the MachineInstr represents a debug label.
+///
+bool MachineInstr::isDebugLabel() const {
+ return getOpcode() == TargetInstrInfo::DBG_LABEL;
+}
+
+/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
+/// the specific register or -1 if it is not found. It further tightening
+/// the search criteria to a use that kills the register if isKill is true.
+int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill,
+ const TargetRegisterInfo *TRI) const {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MOReg == Reg ||
+ (TRI &&
+ TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ TRI->isSubRegister(MOReg, Reg)))
+ if (!isKill || MO.isKill())
+ return i;
+ }
+ return -1;
+}
+
+/// findRegisterDefOperandIdx() - Returns the operand index that is a def of
+/// the specified register or -1 if it is not found. If isDead is true, defs
+/// that are not dead are skipped. If TargetRegisterInfo is non-null, then it
+/// also checks if there is a def of a super-register.
+int MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead,
+ const TargetRegisterInfo *TRI) const {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MOReg == Reg ||
+ (TRI &&
+ TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ TRI->isSubRegister(MOReg, Reg)))
+ if (!isDead || MO.isDead())
+ return i;
+ }
+ return -1;
+}
+
+/// findFirstPredOperandIdx() - Find the index of the first operand in the
+/// operand list that is used to represent the predicate. It returns -1 if
+/// none is found.
+int MachineInstr::findFirstPredOperandIdx() const {
+ const TargetInstrDesc &TID = getDesc();
+ if (TID.isPredicable()) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (TID.OpInfo[i].isPredicate())
+ return i;
+ }
+
+ return -1;
+}
+
+/// isRegTiedToUseOperand - Given the index of a register def operand,
+/// check if the register def is tied to a source operand, due to either
+/// two-address elimination or inline assembly constraints. Returns the
+/// first tied use operand index by reference is UseOpIdx is not null.
+bool MachineInstr::
+isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
+ if (getOpcode() == TargetInstrInfo::INLINEASM) {
+ assert(DefOpIdx >= 2);
+ const MachineOperand &MO = getOperand(DefOpIdx);
+ if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
+ return false;
+ // Determine the actual operand no corresponding to this index.
+ unsigned DefNo = 0;
+ for (unsigned i = 1, e = getNumOperands(); i < e; ) {
+ const MachineOperand &FMO = getOperand(i);
+ assert(FMO.isImm());
+ // Skip over this def.
+ i += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1;
+ if (i > DefOpIdx)
+ break;
+ ++DefNo;
+ }
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &FMO = getOperand(i);
+ if (!FMO.isImm())
+ continue;
+ if (i+1 >= e || !getOperand(i+1).isReg() || !getOperand(i+1).isUse())
+ continue;
+ unsigned Idx;
+ if (InlineAsm::isUseOperandTiedToDef(FMO.getImm(), Idx) &&
+ Idx == DefNo) {
+ if (UseOpIdx)
+ *UseOpIdx = (unsigned)i + 1;
+ return true;
+ }
+ }
+ }
+
+ assert(getOperand(DefOpIdx).isDef() && "DefOpIdx is not a def!");
+ const TargetInstrDesc &TID = getDesc();
+ for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (MO.isReg() && MO.isUse() &&
+ TID.getOperandConstraint(i, TOI::TIED_TO) == (int)DefOpIdx) {
+ if (UseOpIdx)
+ *UseOpIdx = (unsigned)i;
+ return true;
+ }
+ }
+ return false;
+}
+
+/// isRegTiedToDefOperand - Return true if the operand of the specified index
+/// is a register use and it is tied to an def operand. It also returns the def
+/// operand index by reference.
+bool MachineInstr::
+isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
+ if (getOpcode() == TargetInstrInfo::INLINEASM) {
+ const MachineOperand &MO = getOperand(UseOpIdx);
+ if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0)
+ return false;
+ assert(UseOpIdx > 0);
+ const MachineOperand &UFMO = getOperand(UseOpIdx-1);
+ if (!UFMO.isImm())
+ return false; // Must be physreg uses.
+ unsigned DefNo;
+ if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) {
+ if (!DefOpIdx)
+ return true;
+
+ unsigned DefIdx = 1;
+ // Remember to adjust the index. First operand is asm string, then there
+ // is a flag for each.
+ while (DefNo) {
+ const MachineOperand &FMO = getOperand(DefIdx);
+ assert(FMO.isImm());
+ // Skip over this def.
+ DefIdx += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1;
+ --DefNo;
+ }
+ *DefOpIdx = DefIdx+1;
+ return true;
+ }
+ return false;
+ }
+
+ const TargetInstrDesc &TID = getDesc();
+ if (UseOpIdx >= TID.getNumOperands())
+ return false;
+ const MachineOperand &MO = getOperand(UseOpIdx);
+ if (!MO.isReg() || !MO.isUse())
+ return false;
+ int DefIdx = TID.getOperandConstraint(UseOpIdx, TOI::TIED_TO);
+ if (DefIdx == -1)
+ return false;
+ if (DefOpIdx)
+ *DefOpIdx = (unsigned)DefIdx;
+ return true;
+}
+
+/// copyKillDeadInfo - Copies kill / dead operand properties from MI.
+///
+void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || (!MO.isKill() && !MO.isDead()))
+ continue;
+ for (unsigned j = 0, ee = getNumOperands(); j != ee; ++j) {
+ MachineOperand &MOp = getOperand(j);
+ if (!MOp.isIdenticalTo(MO))
+ continue;
+ if (MO.isKill())
+ MOp.setIsKill();
+ else
+ MOp.setIsDead();
+ break;
+ }
+ }
+}
+
+/// copyPredicates - Copies predicate operand(s) from MI.
+void MachineInstr::copyPredicates(const MachineInstr *MI) {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.isPredicable())
+ return;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (TID.OpInfo[i].isPredicate()) {
+ // Predicated operands must be last operands.
+ addOperand(MI->getOperand(i));
+ }
+ }
+}
+
+/// isSafeToMove - Return true if it is safe to move this instruction. If
+/// SawStore is set to true, it means that there is a store (or call) between
+/// the instruction's location and its intended destination.
+bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
+ bool &SawStore) const {
+ // Ignore stuff that we obviously can't move.
+ if (TID->mayStore() || TID->isCall()) {
+ SawStore = true;
+ return false;
+ }
+ if (TID->isTerminator() || TID->hasUnmodeledSideEffects())
+ return false;
+
+ // See if this instruction does a load. If so, we have to guarantee that the
+ // loaded value doesn't change between the load and the its intended
+ // destination. The check for isInvariantLoad gives the targe the chance to
+ // classify the load as always returning a constant, e.g. a constant pool
+ // load.
+ if (TID->mayLoad() && !TII->isInvariantLoad(this))
+ // Otherwise, this is a real load. If there is a store between the load and
+ // end of block, or if the laod is volatile, we can't move it.
+ return !SawStore && !hasVolatileMemoryRef();
+
+ return true;
+}
+
+/// isSafeToReMat - Return true if it's safe to rematerialize the specified
+/// instruction which defined the specified register instead of copying it.
+bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
+ unsigned DstReg) const {
+ bool SawStore = false;
+ if (!getDesc().isRematerializable() ||
+ !TII->isTriviallyReMaterializable(this) ||
+ !isSafeToMove(TII, SawStore))
+ return false;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg())
+ continue;
+ // FIXME: For now, do not remat any instruction with register operands.
+ // Later on, we can loosen the restriction is the register operands have
+ // not been modified between the def and use. Note, this is different from
+ // MachineSink because the code is no longer in two-address form (at least
+ // partially).
+ if (MO.isUse())
+ return false;
+ else if (!MO.isDead() && MO.getReg() != DstReg)
+ return false;
+ }
+ return true;
+}
+
+/// hasVolatileMemoryRef - Return true if this instruction may have a
+/// volatile memory reference, or if the information describing the
+/// memory reference is not available. Return false if it is known to
+/// have no volatile memory references.
+bool MachineInstr::hasVolatileMemoryRef() const {
+ // An instruction known never to access memory won't have a volatile access.
+ if (!TID->mayStore() &&
+ !TID->mayLoad() &&
+ !TID->isCall() &&
+ !TID->hasUnmodeledSideEffects())
+ return false;
+
+ // Otherwise, if the instruction has no memory reference information,
+ // conservatively assume it wasn't preserved.
+ if (memoperands_empty())
+ return true;
+
+ // Check the memory reference information for volatile references.
+ for (std::list<MachineMemOperand>::const_iterator I = memoperands_begin(),
+ E = memoperands_end(); I != E; ++I)
+ if (I->isVolatile())
+ return true;
+
+ return false;
+}
+
+void MachineInstr::dump() const {
+ cerr << " " << *this;
+}
+
+void MachineInstr::print(std::ostream &OS, const TargetMachine *TM) const {
+ raw_os_ostream RawOS(OS);
+ print(RawOS, TM);
+}
+
+void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
+ // Specialize printing if op#0 is definition
+ unsigned StartOp = 0;
+ if (getNumOperands() && getOperand(0).isReg() && getOperand(0).isDef()) {
+ getOperand(0).print(OS, TM);
+ OS << " = ";
+ ++StartOp; // Don't print this operand again!
+ }
+
+ OS << getDesc().getName();
+
+ for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
+ if (i != StartOp)
+ OS << ",";
+ OS << " ";
+ getOperand(i).print(OS, TM);
+ }
+
+ if (!memoperands_empty()) {
+ OS << ", Mem:";
+ for (std::list<MachineMemOperand>::const_iterator i = memoperands_begin(),
+ e = memoperands_end(); i != e; ++i) {
+ const MachineMemOperand &MRO = *i;
+ const Value *V = MRO.getValue();
+
+ assert((MRO.isLoad() || MRO.isStore()) &&
+ "SV has to be a load, store or both.");
+
+ if (MRO.isVolatile())
+ OS << "Volatile ";
+
+ if (MRO.isLoad())
+ OS << "LD";
+ if (MRO.isStore())
+ OS << "ST";
+
+ OS << "(" << MRO.getSize() << "," << MRO.getAlignment() << ") [";
+
+ if (!V)
+ OS << "<unknown>";
+ else if (!V->getName().empty())
+ OS << V->getName();
+ else if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+ PSV->print(OS);
+ } else
+ OS << V;
+
+ OS << " + " << MRO.getOffset() << "]";
+ }
+ }
+
+ if (!debugLoc.isUnknown()) {
+ const MachineFunction *MF = getParent()->getParent();
+ DebugLocTuple DLT = MF->getDebugLocTuple(debugLoc);
+ DICompileUnit CU(DLT.CompileUnit);
+ std::string Dir, Fn;
+ OS << " [dbg: "
+ << CU.getDirectory(Dir) << '/' << CU.getFilename(Fn) << ","
+ << DLT.Line << ","
+ << DLT.Col << "]";
+ }
+
+ OS << "\n";
+}
+
+bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
+ const TargetRegisterInfo *RegInfo,
+ bool AddIfNotFound) {
+ bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
+ bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg);
+ bool Found = false;
+ SmallVector<unsigned,4> DeadOps;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (Reg == IncomingReg) {
+ if (!Found) {
+ if (MO.isKill())
+ // The register is already marked kill.
+ return true;
+ MO.setIsKill();
+ Found = true;
+ }
+ } else if (hasAliases && MO.isKill() &&
+ TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // A super-register kill already exists.
+ if (RegInfo->isSuperRegister(IncomingReg, Reg))
+ return true;
+ if (RegInfo->isSubRegister(IncomingReg, Reg))
+ DeadOps.push_back(i);
+ }
+ }
+
+ // Trim unneeded kill operands.
+ while (!DeadOps.empty()) {
+ unsigned OpIdx = DeadOps.back();
+ if (getOperand(OpIdx).isImplicit())
+ RemoveOperand(OpIdx);
+ else
+ getOperand(OpIdx).setIsKill(false);
+ DeadOps.pop_back();
+ }
+
+ // If not found, this means an alias of one of the operands is killed. Add a
+ // new implicit operand if required.
+ if (!Found && AddIfNotFound) {
+ addOperand(MachineOperand::CreateReg(IncomingReg,
+ false /*IsDef*/,
+ true /*IsImp*/,
+ true /*IsKill*/));
+ return true;
+ }
+ return Found;
+}
+
+bool MachineInstr::addRegisterDead(unsigned IncomingReg,
+ const TargetRegisterInfo *RegInfo,
+ bool AddIfNotFound) {
+ bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
+ bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg);
+ bool Found = false;
+ SmallVector<unsigned,4> DeadOps;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (Reg == IncomingReg) {
+ if (!Found) {
+ if (MO.isDead())
+ // The register is already marked dead.
+ return true;
+ MO.setIsDead();
+ Found = true;
+ }
+ } else if (hasAliases && MO.isDead() &&
+ TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // There exists a super-register that's marked dead.
+ if (RegInfo->isSuperRegister(IncomingReg, Reg))
+ return true;
+ if (RegInfo->getSubRegisters(IncomingReg) &&
+ RegInfo->getSuperRegisters(Reg) &&
+ RegInfo->isSubRegister(IncomingReg, Reg))
+ DeadOps.push_back(i);
+ }
+ }
+
+ // Trim unneeded dead operands.
+ while (!DeadOps.empty()) {
+ unsigned OpIdx = DeadOps.back();
+ if (getOperand(OpIdx).isImplicit())
+ RemoveOperand(OpIdx);
+ else
+ getOperand(OpIdx).setIsDead(false);
+ DeadOps.pop_back();
+ }
+
+ // If not found, this means an alias of one of the operands is dead. Add a
+ // new implicit operand if required.
+ if (!Found && AddIfNotFound) {
+ addOperand(MachineOperand::CreateReg(IncomingReg,
+ true /*IsDef*/,
+ true /*IsImp*/,
+ false /*IsKill*/,
+ true /*IsDead*/));
+ return true;
+ }
+ return Found;
+}
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
new file mode 100644
index 0000000..aaa4de4
--- /dev/null
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -0,0 +1,406 @@
+//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs loop invariant code motion on machine instructions. We
+// attempt to remove as much code from the body of a loop as possible.
+//
+// This pass does not attempt to throttle itself to limit register pressure.
+// The register allocation phases are expected to perform rematerialization
+// to recover when register pressure is high.
+//
+// This pass is not intended to be a replacement or a complete alternative
+// for the LLVM-IR-level LICM pass. It is only designed to hoist simple
+// constructs that are not exposed before lowering and instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-licm"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops");
+STATISTIC(NumCSEed, "Number of hoisted machine instructions CSEed");
+
+namespace {
+ class VISIBILITY_HIDDEN MachineLICM : public MachineFunctionPass {
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+
+ // Various analyses that we use...
+ MachineLoopInfo *LI; // Current MachineLoopInfo
+ MachineDominatorTree *DT; // Machine dominator tree for the cur loop
+ MachineRegisterInfo *RegInfo; // Machine register information
+
+ // State that is updated as we process loops
+ bool Changed; // True if a loop is changed.
+ MachineLoop *CurLoop; // The current loop we are working on.
+ MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
+
+ // For each BB and opcode pair, keep a list of hoisted instructions.
+ DenseMap<std::pair<unsigned, unsigned>,
+ std::vector<const MachineInstr*> > CSEMap;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ MachineLICM() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "Machine Instruction LICM"; }
+
+ // FIXME: Loop preheaders?
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual void releaseMemory() {
+ CSEMap.clear();
+ }
+
+ private:
+ /// IsLoopInvariantInst - Returns true if the instruction is loop
+ /// invariant. I.e., all virtual register operands are defined outside of
+ /// the loop, physical registers aren't accessed (explicitly or implicitly),
+ /// and the instruction is hoistable.
+ ///
+ bool IsLoopInvariantInst(MachineInstr &I);
+
+ /// IsProfitableToHoist - Return true if it is potentially profitable to
+ /// hoist the given loop invariant.
+ bool IsProfitableToHoist(MachineInstr &MI);
+
+ /// HoistRegion - Walk the specified region of the CFG (defined by all
+ /// blocks dominated by the specified block, and that are in the current
+ /// loop) in depth first order w.r.t the DominatorTree. This allows us to
+ /// visit definitions before uses, allowing us to hoist a loop body in one
+ /// pass without iteration.
+ ///
+ void HoistRegion(MachineDomTreeNode *N);
+
+ /// Hoist - When an instruction is found to only use loop invariant operands
+ /// that is safe to hoist, this instruction is called to do the dirty work.
+ ///
+ void Hoist(MachineInstr &MI);
+ };
+} // end anonymous namespace
+
+char MachineLICM::ID = 0;
+static RegisterPass<MachineLICM>
+X("machinelicm", "Machine Loop Invariant Code Motion");
+
+FunctionPass *llvm::createMachineLICMPass() { return new MachineLICM(); }
+
+/// LoopIsOuterMostWithPreheader - Test if the given loop is the outer-most
+/// loop that has a preheader.
+static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) {
+ for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
+ if (L->getLoopPreheader())
+ return false;
+ return true;
+}
+
+/// Hoist expressions out of the specified loop. Note, alias info for inner loop
+/// is not preserved so it is not a good idea to run LICM multiple times on one
+/// loop.
+///
+bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
+ const Function *F = MF.getFunction();
+ if (F->hasFnAttr(Attribute::OptimizeForSize))
+ return false;
+
+ DOUT << "******** Machine LICM ********\n";
+
+ Changed = false;
+ TM = &MF.getTarget();
+ TII = TM->getInstrInfo();
+ RegInfo = &MF.getRegInfo();
+
+ // Get our Loop information...
+ LI = &getAnalysis<MachineLoopInfo>();
+ DT = &getAnalysis<MachineDominatorTree>();
+
+ for (MachineLoopInfo::iterator
+ I = LI->begin(), E = LI->end(); I != E; ++I) {
+ CurLoop = *I;
+
+ // Only visit outer-most preheader-sporting loops.
+ if (!LoopIsOuterMostWithPreheader(CurLoop))
+ continue;
+
+ // Determine the block to which to hoist instructions. If we can't find a
+ // suitable loop preheader, we can't do any hoisting.
+ //
+ // FIXME: We are only hoisting if the basic block coming into this loop
+ // has only one successor. This isn't the case in general because we haven't
+ // broken critical edges or added preheaders.
+ CurPreheader = CurLoop->getLoopPreheader();
+ if (!CurPreheader)
+ continue;
+
+ HoistRegion(DT->getNode(CurLoop->getHeader()));
+ }
+
+ return Changed;
+}
+
+/// HoistRegion - Walk the specified region of the CFG (defined by all blocks
+/// dominated by the specified block, and that are in the current loop) in depth
+/// first order w.r.t the DominatorTree. This allows us to visit definitions
+/// before uses, allowing us to hoist a loop body in one pass without iteration.
+///
+void MachineLICM::HoistRegion(MachineDomTreeNode *N) {
+ assert(N != 0 && "Null dominator tree node?");
+ MachineBasicBlock *BB = N->getBlock();
+
+ // If this subregion is not in the top level loop at all, exit.
+ if (!CurLoop->contains(BB)) return;
+
+ for (MachineBasicBlock::iterator
+ MII = BB->begin(), E = BB->end(); MII != E; ) {
+ MachineBasicBlock::iterator NextMII = MII; ++NextMII;
+ MachineInstr &MI = *MII;
+
+ Hoist(MI);
+
+ MII = NextMII;
+ }
+
+ const std::vector<MachineDomTreeNode*> &Children = N->getChildren();
+
+ for (unsigned I = 0, E = Children.size(); I != E; ++I)
+ HoistRegion(Children[I]);
+}
+
+/// IsLoopInvariantInst - Returns true if the instruction is loop
+/// invariant. I.e., all virtual register operands are defined outside of the
+/// loop, physical registers aren't accessed explicitly, and there are no side
+/// effects that aren't captured by the operands or other flags.
+///
+bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
+ const TargetInstrDesc &TID = I.getDesc();
+
+ // Ignore stuff that we obviously can't hoist.
+ if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
+ TID.hasUnmodeledSideEffects())
+ return false;
+
+ if (TID.mayLoad()) {
+ // Okay, this instruction does a load. As a refinement, we allow the target
+ // to decide whether the loaded value is actually a constant. If so, we can
+ // actually use it as a load.
+ if (!TII->isInvariantLoad(&I))
+ // FIXME: we should be able to sink loads with no other side effects if
+ // there is nothing that can change memory from here until the end of
+ // block. This is a trivial form of alias analysis.
+ return false;
+ }
+
+ DEBUG({
+ DOUT << "--- Checking if we can hoist " << I;
+ if (I.getDesc().getImplicitUses()) {
+ DOUT << " * Instruction has implicit uses:\n";
+
+ const TargetRegisterInfo *TRI = TM->getRegisterInfo();
+ for (const unsigned *ImpUses = I.getDesc().getImplicitUses();
+ *ImpUses; ++ImpUses)
+ DOUT << " -> " << TRI->getName(*ImpUses) << "\n";
+ }
+
+ if (I.getDesc().getImplicitDefs()) {
+ DOUT << " * Instruction has implicit defines:\n";
+
+ const TargetRegisterInfo *TRI = TM->getRegisterInfo();
+ for (const unsigned *ImpDefs = I.getDesc().getImplicitDefs();
+ *ImpDefs; ++ImpDefs)
+ DOUT << " -> " << TRI->getName(*ImpDefs) << "\n";
+ }
+ });
+
+ if (I.getDesc().getImplicitDefs() || I.getDesc().getImplicitUses()) {
+ DOUT << "Cannot hoist with implicit defines or uses\n";
+ return false;
+ }
+
+ // The instruction is loop invariant if all of its operands are.
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = I.getOperand(i);
+
+ if (!MO.isReg())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ // Don't hoist an instruction that uses or defines a physical register.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return false;
+
+ if (!MO.isUse())
+ continue;
+
+ assert(RegInfo->getVRegDef(Reg) &&
+ "Machine instr not mapped for this vreg?!");
+
+ // If the loop contains the definition of an operand, then the instruction
+ // isn't loop invariant.
+ if (CurLoop->contains(RegInfo->getVRegDef(Reg)->getParent()))
+ return false;
+ }
+
+ // If we got this far, the instruction is loop invariant!
+ return true;
+}
+
+
+/// HasPHIUses - Return true if the specified register has any PHI use.
+static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) {
+ for (MachineRegisterInfo::use_iterator UI = RegInfo->use_begin(Reg),
+ UE = RegInfo->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ if (UseMI->getOpcode() == TargetInstrInfo::PHI)
+ return true;
+ }
+ return false;
+}
+
+/// IsProfitableToHoist - Return true if it is potentially profitable to hoist
+/// the given loop invariant.
+bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
+ if (MI.getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
+ return false;
+
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // FIXME: For now, only hoist re-materilizable instructions. LICM will
+ // increase register pressure. We want to make sure it doesn't increase
+ // spilling.
+ if (!TID.mayLoad() && (!TID.isRematerializable() ||
+ !TII->isTriviallyReMaterializable(&MI)))
+ return false;
+
+ // If result(s) of this instruction is used by PHIs, then don't hoist it.
+ // The presence of joins makes it difficult for current register allocator
+ // implementation to perform remat.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ if (HasPHIUses(MO.getReg(), RegInfo))
+ return false;
+ }
+
+ return true;
+}
+
+static const MachineInstr *LookForDuplicate(const MachineInstr *MI,
+ std::vector<const MachineInstr*> &PrevMIs,
+ MachineRegisterInfo *RegInfo) {
+ unsigned NumOps = MI->getNumOperands();
+ for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
+ const MachineInstr *PrevMI = PrevMIs[i];
+ unsigned NumOps2 = PrevMI->getNumOperands();
+ if (NumOps != NumOps2)
+ continue;
+ bool IsSame = true;
+ for (unsigned j = 0; j != NumOps; ++j) {
+ const MachineOperand &MO = MI->getOperand(j);
+ if (MO.isReg() && MO.isDef()) {
+ if (RegInfo->getRegClass(MO.getReg()) !=
+ RegInfo->getRegClass(PrevMI->getOperand(j).getReg())) {
+ IsSame = false;
+ break;
+ }
+ continue;
+ }
+ if (!MO.isIdenticalTo(PrevMI->getOperand(j))) {
+ IsSame = false;
+ break;
+ }
+ }
+ if (IsSame)
+ return PrevMI;
+ }
+ return 0;
+}
+
+/// Hoist - When an instruction is found to use only loop invariant operands
+/// that are safe to hoist, this instruction is called to do the dirty work.
+///
+void MachineLICM::Hoist(MachineInstr &MI) {
+ if (!IsLoopInvariantInst(MI)) return;
+ if (!IsProfitableToHoist(MI)) return;
+
+ // Now move the instructions to the predecessor, inserting it before any
+ // terminator instructions.
+ DEBUG({
+ DOUT << "Hoisting " << MI;
+ if (CurPreheader->getBasicBlock())
+ DOUT << " to MachineBasicBlock "
+ << CurPreheader->getBasicBlock()->getName();
+ if (MI.getParent()->getBasicBlock())
+ DOUT << " from MachineBasicBlock "
+ << MI.getParent()->getBasicBlock()->getName();
+ DOUT << "\n";
+ });
+
+ // Look for opportunity to CSE the hoisted instruction.
+ std::pair<unsigned, unsigned> BBOpcPair =
+ std::make_pair(CurPreheader->getNumber(), MI.getOpcode());
+ DenseMap<std::pair<unsigned, unsigned>,
+ std::vector<const MachineInstr*> >::iterator CI = CSEMap.find(BBOpcPair);
+ bool DoneCSE = false;
+ if (CI != CSEMap.end()) {
+ const MachineInstr *Dup = LookForDuplicate(&MI, CI->second, RegInfo);
+ if (Dup) {
+ DOUT << "CSEing " << MI;
+ DOUT << " with " << *Dup;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (MO.isReg() && MO.isDef())
+ RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
+ }
+ MI.eraseFromParent();
+ DoneCSE = true;
+ ++NumCSEed;
+ }
+ }
+
+ // Otherwise, splice the instruction to the preheader.
+ if (!DoneCSE) {
+ CurPreheader->splice(CurPreheader->getFirstTerminator(),
+ MI.getParent(), &MI);
+ // Add to the CSE map.
+ if (CI != CSEMap.end())
+ CI->second.push_back(&MI);
+ else {
+ std::vector<const MachineInstr*> CSEMIs;
+ CSEMIs.push_back(&MI);
+ CSEMap.insert(std::make_pair(BBOpcPair, CSEMIs));
+ }
+ }
+
+ ++NumHoisted;
+ Changed = true;
+}
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
new file mode 100644
index 0000000..68ddb7b
--- /dev/null
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -0,0 +1,40 @@
+//===- MachineLoopInfo.cpp - Natural Loop Calculator ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachineLoopInfo class that is used to identify natural
+// loops and determine the loop depth of various nodes of the CFG. Note that
+// the loops identified may actually be several natural loops that share the
+// same header node... not just a single natural loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/Passes.h"
+using namespace llvm;
+
+TEMPLATE_INSTANTIATION(class LoopBase<MachineBasicBlock>);
+TEMPLATE_INSTANTIATION(class LoopInfoBase<MachineBasicBlock>);
+
+char MachineLoopInfo::ID = 0;
+static RegisterPass<MachineLoopInfo>
+X("machine-loops", "Machine Natural Loop Construction", true);
+
+const PassInfo *const llvm::MachineLoopInfoID = &X;
+
+bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
+ releaseMemory();
+ LI->Calculate(getAnalysis<MachineDominatorTree>().getBase()); // Update
+ return false;
+}
+
+void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineDominatorTree>();
+}
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
new file mode 100644
index 0000000..1d8109e
--- /dev/null
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -0,0 +1,368 @@
+//===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleInfo.h"
+
+#include "llvm/Constants.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Streams.h"
+using namespace llvm;
+using namespace llvm::dwarf;
+
+// Handle the Pass registration stuff necessary to use TargetData's.
+static RegisterPass<MachineModuleInfo>
+X("machinemoduleinfo", "Module Information");
+char MachineModuleInfo::ID = 0;
+
+//===----------------------------------------------------------------------===//
+
+MachineModuleInfo::MachineModuleInfo()
+: ImmutablePass(&ID)
+, LabelIDList()
+, FrameMoves()
+, LandingPads()
+, Personalities()
+, CallsEHReturn(0)
+, CallsUnwindInit(0)
+, DbgInfoAvailable(false)
+{
+ // Always emit "no personality" info
+ Personalities.push_back(NULL);
+}
+MachineModuleInfo::~MachineModuleInfo() {
+
+}
+
+/// doInitialization - Initialize the state for a new module.
+///
+bool MachineModuleInfo::doInitialization() {
+ return false;
+}
+
+/// doFinalization - Tear down the state after completion of a module.
+///
+bool MachineModuleInfo::doFinalization() {
+ return false;
+}
+
+/// BeginFunction - Begin gathering function meta information.
+///
+void MachineModuleInfo::BeginFunction(MachineFunction *MF) {
+ // Coming soon.
+}
+
+/// EndFunction - Discard function meta information.
+///
+void MachineModuleInfo::EndFunction() {
+ // Clean up frame info.
+ FrameMoves.clear();
+
+ // Clean up exception info.
+ LandingPads.clear();
+ TypeInfos.clear();
+ FilterIds.clear();
+ FilterEnds.clear();
+ CallsEHReturn = 0;
+ CallsUnwindInit = 0;
+}
+
+/// AnalyzeModule - Scan the module for global debug information.
+///
+void MachineModuleInfo::AnalyzeModule(Module &M) {
+ // Insert functions in the llvm.used array into UsedFunctions.
+ GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+ if (!GV || !GV->hasInitializer()) return;
+
+ // Should be an array of 'i8*'.
+ ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (InitList == 0) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InitList->getOperand(i)))
+ if (CE->getOpcode() == Instruction::BitCast)
+ if (Function *F = dyn_cast<Function>(CE->getOperand(0)))
+ UsedFunctions.insert(F);
+ }
+}
+
+//===-EH-------------------------------------------------------------------===//
+
+/// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the
+/// specified MachineBasicBlock.
+LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo
+ (MachineBasicBlock *LandingPad) {
+ unsigned N = LandingPads.size();
+ for (unsigned i = 0; i < N; ++i) {
+ LandingPadInfo &LP = LandingPads[i];
+ if (LP.LandingPadBlock == LandingPad)
+ return LP;
+ }
+
+ LandingPads.push_back(LandingPadInfo(LandingPad));
+ return LandingPads[N];
+}
+
+/// addInvoke - Provide the begin and end labels of an invoke style call and
+/// associate it with a try landing pad block.
+void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad,
+ unsigned BeginLabel, unsigned EndLabel) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.BeginLabels.push_back(BeginLabel);
+ LP.EndLabels.push_back(EndLabel);
+}
+
+/// addLandingPad - Provide the label of a try LandingPad block.
+///
+unsigned MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
+ unsigned LandingPadLabel = NextLabelID();
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.LandingPadLabel = LandingPadLabel;
+ return LandingPadLabel;
+}
+
+/// addPersonality - Provide the personality function for the exception
+/// information.
+void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
+ Function *Personality) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.Personality = Personality;
+
+ for (unsigned i = 0; i < Personalities.size(); ++i)
+ if (Personalities[i] == Personality)
+ return;
+
+ Personalities.push_back(Personality);
+}
+
+/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
+///
+void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad,
+ std::vector<GlobalVariable *> &TyInfo) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ for (unsigned N = TyInfo.size(); N; --N)
+ LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
+}
+
+/// addFilterTypeInfo - Provide the filter typeinfo for a landing pad.
+///
+void MachineModuleInfo::addFilterTypeInfo(MachineBasicBlock *LandingPad,
+ std::vector<GlobalVariable *> &TyInfo) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ std::vector<unsigned> IdsInFilter(TyInfo.size());
+ for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
+ IdsInFilter[I] = getTypeIDFor(TyInfo[I]);
+ LP.TypeIds.push_back(getFilterIDFor(IdsInFilter));
+}
+
+/// addCleanup - Add a cleanup action for a landing pad.
+///
+void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.TypeIds.push_back(0);
+}
+
+/// TidyLandingPads - Remap landing pad labels and remove any deleted landing
+/// pads.
+void MachineModuleInfo::TidyLandingPads() {
+ for (unsigned i = 0; i != LandingPads.size(); ) {
+ LandingPadInfo &LandingPad = LandingPads[i];
+ LandingPad.LandingPadLabel = MappedLabel(LandingPad.LandingPadLabel);
+
+ // Special case: we *should* emit LPs with null LP MBB. This indicates
+ // "nounwind" case.
+ if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) {
+ LandingPads.erase(LandingPads.begin() + i);
+ continue;
+ }
+
+ for (unsigned j=0; j != LandingPads[i].BeginLabels.size(); ) {
+ unsigned BeginLabel = MappedLabel(LandingPad.BeginLabels[j]);
+ unsigned EndLabel = MappedLabel(LandingPad.EndLabels[j]);
+
+ if (!BeginLabel || !EndLabel) {
+ LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
+ LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
+ continue;
+ }
+
+ LandingPad.BeginLabels[j] = BeginLabel;
+ LandingPad.EndLabels[j] = EndLabel;
+ ++j;
+ }
+
+ // Remove landing pads with no try-ranges.
+ if (LandingPads[i].BeginLabels.empty()) {
+ LandingPads.erase(LandingPads.begin() + i);
+ continue;
+ }
+
+ // If there is no landing pad, ensure that the list of typeids is empty.
+ // If the only typeid is a cleanup, this is the same as having no typeids.
+ if (!LandingPad.LandingPadBlock ||
+ (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0]))
+ LandingPad.TypeIds.clear();
+
+ ++i;
+ }
+}
+
+/// getTypeIDFor - Return the type id for the specified typeinfo. This is
+/// function wide.
+unsigned MachineModuleInfo::getTypeIDFor(GlobalVariable *TI) {
+ for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
+ if (TypeInfos[i] == TI) return i + 1;
+
+ TypeInfos.push_back(TI);
+ return TypeInfos.size();
+}
+
+/// getFilterIDFor - Return the filter id for the specified typeinfos. This is
+/// function wide.
+int MachineModuleInfo::getFilterIDFor(std::vector<unsigned> &TyIds) {
+ // If the new filter coincides with the tail of an existing filter, then
+ // re-use the existing filter. Folding filters more than this requires
+ // re-ordering filters and/or their elements - probably not worth it.
+ for (std::vector<unsigned>::iterator I = FilterEnds.begin(),
+ E = FilterEnds.end(); I != E; ++I) {
+ unsigned i = *I, j = TyIds.size();
+
+ while (i && j)
+ if (FilterIds[--i] != TyIds[--j])
+ goto try_next;
+
+ if (!j)
+ // The new filter coincides with range [i, end) of the existing filter.
+ return -(1 + i);
+
+try_next:;
+ }
+
+ // Add the new filter.
+ int FilterID = -(1 + FilterIds.size());
+ FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
+ for (unsigned I = 0, N = TyIds.size(); I != N; ++I)
+ FilterIds.push_back(TyIds[I]);
+ FilterEnds.push_back(FilterIds.size());
+ FilterIds.push_back(0); // terminator
+ return FilterID;
+}
+
+/// getPersonality - Return the personality function for the current function.
+Function *MachineModuleInfo::getPersonality() const {
+ // FIXME: Until PR1414 will be fixed, we're using 1 personality function per
+ // function
+ return !LandingPads.empty() ? LandingPads[0].Personality : NULL;
+}
+
+/// getPersonalityIndex - Return unique index for current personality
+/// function. NULL personality function should always get zero index.
+unsigned MachineModuleInfo::getPersonalityIndex() const {
+ const Function* Personality = NULL;
+
+ // Scan landing pads. If there is at least one non-NULL personality - use it.
+ for (unsigned i = 0; i != LandingPads.size(); ++i)
+ if (LandingPads[i].Personality) {
+ Personality = LandingPads[i].Personality;
+ break;
+ }
+
+ for (unsigned i = 0; i < Personalities.size(); ++i) {
+ if (Personalities[i] == Personality)
+ return i;
+ }
+
+ // This should never happen
+ assert(0 && "Personality function should be set!");
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+/// DebugLabelFolding pass - This pass prunes out redundant labels. This allows
+/// a info consumer to determine if the range of two labels is empty, by seeing
+/// if the labels map to the same reduced label.
+
+namespace llvm {
+
+struct DebugLabelFolder : public MachineFunctionPass {
+ static char ID;
+ DebugLabelFolder() : MachineFunctionPass(&ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char *getPassName() const { return "Label Folder"; }
+};
+
+char DebugLabelFolder::ID = 0;
+
+bool DebugLabelFolder::runOnMachineFunction(MachineFunction &MF) {
+ // Get machine module info.
+ MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ if (!MMI) return false;
+
+ // Track if change is made.
+ bool MadeChange = false;
+ // No prior label to begin.
+ unsigned PriorLabel = 0;
+
+ // Iterate through basic blocks.
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
+ BB != E; ++BB) {
+ // Iterate through instructions.
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+ // Is it a label.
+ if (I->isDebugLabel() && !MMI->isDbgLabelUsed(I->getOperand(0).getImm())){
+ // The label ID # is always operand #0, an immediate.
+ unsigned NextLabel = I->getOperand(0).getImm();
+
+ // If there was an immediate prior label.
+ if (PriorLabel) {
+ // Remap the current label to prior label.
+ MMI->RemapLabel(NextLabel, PriorLabel);
+ // Delete the current label.
+ I = BB->erase(I);
+ // Indicate a change has been made.
+ MadeChange = true;
+ continue;
+ } else {
+ // Start a new round.
+ PriorLabel = NextLabel;
+ }
+ } else {
+ // No consecutive labels.
+ PriorLabel = 0;
+ }
+
+ ++I;
+ }
+ }
+
+ return MadeChange;
+}
+
+FunctionPass *createDebugLabelFoldingPass() { return new DebugLabelFolder(); }
+
+}
+
diff --git a/lib/CodeGen/MachinePassRegistry.cpp b/lib/CodeGen/MachinePassRegistry.cpp
new file mode 100644
index 0000000..9f4ef12
--- /dev/null
+++ b/lib/CodeGen/MachinePassRegistry.cpp
@@ -0,0 +1,41 @@
+//===-- CodeGen/MachineInstr.cpp ------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the machine function pass registry for register allocators
+// and instruction schedulers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachinePassRegistry.h"
+
+using namespace llvm;
+
+
+/// Add - Adds a function pass to the registration list.
+///
+void MachinePassRegistry::Add(MachinePassRegistryNode *Node) {
+ Node->setNext(List);
+ List = Node;
+ if (Listener) Listener->NotifyAdd(Node->getName(),
+ Node->getCtor(),
+ Node->getDescription());
+}
+
+
+/// Remove - Removes a function pass from the registration list.
+///
+void MachinePassRegistry::Remove(MachinePassRegistryNode *Node) {
+ for (MachinePassRegistryNode **I = &List; *I; I = (*I)->getNextAddress()) {
+ if (*I == Node) {
+ if (Listener) Listener->NotifyRemove(Node->getName());
+ *I = (*I)->getNext();
+ break;
+ }
+ }
+}
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
new file mode 100644
index 0000000..4f5ab1f
--- /dev/null
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -0,0 +1,125 @@
+//===-- lib/Codegen/MachineRegisterInfo.cpp -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the MachineRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+using namespace llvm;
+
+MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) {
+ VRegInfo.reserve(256);
+ RegClass2VRegMap.resize(TRI.getNumRegClasses()+1); // RC ID starts at 1.
+ UsedPhysRegs.resize(TRI.getNumRegs());
+
+ // Create the physreg use/def lists.
+ PhysRegUseDefLists = new MachineOperand*[TRI.getNumRegs()];
+ memset(PhysRegUseDefLists, 0, sizeof(MachineOperand*)*TRI.getNumRegs());
+}
+
+MachineRegisterInfo::~MachineRegisterInfo() {
+#ifndef NDEBUG
+ for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i)
+ assert(VRegInfo[i].second == 0 && "Vreg use list non-empty still?");
+ for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i)
+ assert(!PhysRegUseDefLists[i] &&
+ "PhysRegUseDefLists has entries after all instructions are deleted");
+#endif
+ delete [] PhysRegUseDefLists;
+}
+
+/// setRegClass - Set the register class of the specified virtual register.
+///
+void
+MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
+ unsigned VR = Reg;
+ Reg -= TargetRegisterInfo::FirstVirtualRegister;
+ assert(Reg < VRegInfo.size() && "Invalid vreg!");
+ const TargetRegisterClass *OldRC = VRegInfo[Reg].first;
+ VRegInfo[Reg].first = RC;
+
+ // Remove from old register class's vregs list. This may be slow but
+ // fortunately this operation is rarely needed.
+ std::vector<unsigned> &VRegs = RegClass2VRegMap[OldRC->getID()];
+ std::vector<unsigned>::iterator I=std::find(VRegs.begin(), VRegs.end(), VR);
+ VRegs.erase(I);
+
+ // Add to new register class's vregs list.
+ RegClass2VRegMap[RC->getID()].push_back(VR);
+}
+
+/// createVirtualRegister - Create and return a new virtual register in the
+/// function with the specified register class.
+///
+unsigned
+MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
+ assert(RegClass && "Cannot create register without RegClass!");
+ // Add a reg, but keep track of whether the vector reallocated or not.
+ void *ArrayBase = VRegInfo.empty() ? 0 : &VRegInfo[0];
+ VRegInfo.push_back(std::make_pair(RegClass, (MachineOperand*)0));
+
+ if (!((&VRegInfo[0] == ArrayBase || VRegInfo.size() == 1)))
+ // The vector reallocated, handle this now.
+ HandleVRegListReallocation();
+ unsigned VR = getLastVirtReg();
+ RegClass2VRegMap[RegClass->getID()].push_back(VR);
+ return VR;
+}
+
+/// HandleVRegListReallocation - We just added a virtual register to the
+/// VRegInfo info list and it reallocated. Update the use/def lists info
+/// pointers.
+void MachineRegisterInfo::HandleVRegListReallocation() {
+ // The back pointers for the vreg lists point into the previous vector.
+ // Update them to point to their correct slots.
+ for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i) {
+ MachineOperand *List = VRegInfo[i].second;
+ if (!List) continue;
+ // Update the back-pointer to be accurate once more.
+ List->Contents.Reg.Prev = &VRegInfo[i].second;
+ }
+}
+
+/// replaceRegWith - Replace all instances of FromReg with ToReg in the
+/// machine function. This is like llvm-level X->replaceAllUsesWith(Y),
+/// except that it also changes any definitions of the register as well.
+void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
+ assert(FromReg != ToReg && "Cannot replace a reg with itself");
+
+ // TODO: This could be more efficient by bulk changing the operands.
+ for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) {
+ MachineOperand &O = I.getOperand();
+ ++I;
+ O.setReg(ToReg);
+ }
+}
+
+
+/// getVRegDef - Return the machine instr that defines the specified virtual
+/// register or null if none is found. This assumes that the code is in SSA
+/// form, so there should only be one definition.
+MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
+ assert(Reg-TargetRegisterInfo::FirstVirtualRegister < VRegInfo.size() &&
+ "Invalid vreg!");
+ for (reg_iterator I = reg_begin(Reg), E = reg_end(); I != E; ++I) {
+ // Since we are in SSA form, we can stop at the first definition.
+ if (I.getOperand().isDef())
+ return &*I;
+ }
+ return 0;
+}
+
+
+#ifndef NDEBUG
+void MachineRegisterInfo::dumpUses(unsigned Reg) const {
+ for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I)
+ I.getOperand().getParent()->dump();
+}
+#endif
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
new file mode 100644
index 0000000..0e18fa7
--- /dev/null
+++ b/lib/CodeGen/MachineSink.cpp
@@ -0,0 +1,257 @@
+//===-- MachineSink.cpp - Sinking for machine instructions ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-sink"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+STATISTIC(NumSunk, "Number of machine instructions sunk");
+
+namespace {
+ class VISIBILITY_HIDDEN MachineSinking : public MachineFunctionPass {
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ MachineFunction *CurMF; // Current MachineFunction
+ MachineRegisterInfo *RegInfo; // Machine register information
+ MachineDominatorTree *DT; // Machine dominator tree for the current Loop
+
+ public:
+ static char ID; // Pass identification
+ MachineSinking() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+ private:
+ bool ProcessBlock(MachineBasicBlock &MBB);
+ bool SinkInstruction(MachineInstr *MI, bool &SawStore);
+ bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB) const;
+ };
+} // end anonymous namespace
+
+char MachineSinking::ID = 0;
+static RegisterPass<MachineSinking>
+X("machine-sink", "Machine code sinking");
+
+FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); }
+
+/// AllUsesDominatedByBlock - Return true if all uses of the specified register
+/// occur in blocks dominated by the specified block.
+bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
+ MachineBasicBlock *MBB) const {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+ "Only makes sense for vregs");
+ for (MachineRegisterInfo::reg_iterator I = RegInfo->reg_begin(Reg),
+ E = RegInfo->reg_end(); I != E; ++I) {
+ if (I.getOperand().isDef()) continue; // ignore def.
+
+ // Determine the block of the use.
+ MachineInstr *UseInst = &*I;
+ MachineBasicBlock *UseBlock = UseInst->getParent();
+ if (UseInst->getOpcode() == TargetInstrInfo::PHI) {
+ // PHI nodes use the operand in the predecessor block, not the block with
+ // the PHI.
+ UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB();
+ }
+ // Check that it dominates.
+ if (!DT->dominates(MBB, UseBlock))
+ return false;
+ }
+ return true;
+}
+
+
+
+bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
+ DOUT << "******** Machine Sinking ********\n";
+
+ CurMF = &MF;
+ TM = &CurMF->getTarget();
+ TII = TM->getInstrInfo();
+ RegInfo = &CurMF->getRegInfo();
+ DT = &getAnalysis<MachineDominatorTree>();
+
+ bool EverMadeChange = false;
+
+ while (1) {
+ bool MadeChange = false;
+
+ // Process all basic blocks.
+ for (MachineFunction::iterator I = CurMF->begin(), E = CurMF->end();
+ I != E; ++I)
+ MadeChange |= ProcessBlock(*I);
+
+ // If this iteration over the code changed anything, keep iterating.
+ if (!MadeChange) break;
+ EverMadeChange = true;
+ }
+ return EverMadeChange;
+}
+
+bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
+ // Can't sink anything out of a block that has less than two successors.
+ if (MBB.succ_size() <= 1 || MBB.empty()) return false;
+
+ bool MadeChange = false;
+
+ // Walk the basic block bottom-up. Remember if we saw a store.
+ MachineBasicBlock::iterator I = MBB.end();
+ --I;
+ bool ProcessedBegin, SawStore = false;
+ do {
+ MachineInstr *MI = I; // The instruction to sink.
+
+ // Predecrement I (if it's not begin) so that it isn't invalidated by
+ // sinking.
+ ProcessedBegin = I == MBB.begin();
+ if (!ProcessedBegin)
+ --I;
+
+ if (SinkInstruction(MI, SawStore))
+ ++NumSunk, MadeChange = true;
+
+ // If we just processed the first instruction in the block, we're done.
+ } while (!ProcessedBegin);
+
+ return MadeChange;
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
+ // Check if it's safe to move the instruction.
+ if (!MI->isSafeToMove(TII, SawStore))
+ return false;
+
+ // FIXME: This should include support for sinking instructions within the
+ // block they are currently in to shorten the live ranges. We often get
+ // instructions sunk into the top of a large block, but it would be better to
+ // also sink them down before their first use in the block. This xform has to
+ // be careful not to *increase* register pressure though, e.g. sinking
+ // "x = y + z" down if it kills y and z would increase the live ranges of y
+ // and z only the shrink the live range of x.
+
+ // Loop over all the operands of the specified instruction. If there is
+ // anything we can't handle, bail out.
+ MachineBasicBlock *ParentBlock = MI->getParent();
+
+ // SuccToSinkTo - This is the successor to sink this instruction to, once we
+ // decide.
+ MachineBasicBlock *SuccToSinkTo = 0;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue; // Ignore non-register operands.
+
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // If this is a physical register use, we can't move it. If it is a def,
+ // we can move it, but only if the def is dead.
+ if (MO.isUse() || !MO.isDead())
+ return false;
+ } else {
+ // Virtual register uses are always safe to sink.
+ if (MO.isUse()) continue;
+
+ // If it's not safe to move defs of the register class, then abort.
+ if (!TII->isSafeToMoveRegClassDefs(RegInfo->getRegClass(Reg)))
+ return false;
+
+ // FIXME: This picks a successor to sink into based on having one
+ // successor that dominates all the uses. However, there are cases where
+ // sinking can happen but where the sink point isn't a successor. For
+ // example:
+ // x = computation
+ // if () {} else {}
+ // use x
+ // the instruction could be sunk over the whole diamond for the
+ // if/then/else (or loop, etc), allowing it to be sunk into other blocks
+ // after that.
+
+ // Virtual register defs can only be sunk if all their uses are in blocks
+ // dominated by one of the successors.
+ if (SuccToSinkTo) {
+ // If a previous operand picked a block to sink to, then this operand
+ // must be sinkable to the same block.
+ if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo))
+ return false;
+ continue;
+ }
+
+ // Otherwise, we should look at all the successors and decide which one
+ // we should sink to.
+ for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(),
+ E = ParentBlock->succ_end(); SI != E; ++SI) {
+ if (AllUsesDominatedByBlock(Reg, *SI)) {
+ SuccToSinkTo = *SI;
+ break;
+ }
+ }
+
+ // If we couldn't find a block to sink to, ignore this instruction.
+ if (SuccToSinkTo == 0)
+ return false;
+ }
+ }
+
+ // If there are no outputs, it must have side-effects.
+ if (SuccToSinkTo == 0)
+ return false;
+
+ // It's not safe to sink instructions to EH landing pad. Control flow into
+ // landing pad is implicitly defined.
+ if (SuccToSinkTo->isLandingPad())
+ return false;
+
+ // If is not possible to sink an instruction into its own block. This can
+ // happen with loops.
+ if (MI->getParent() == SuccToSinkTo)
+ return false;
+
+ DEBUG(cerr << "Sink instr " << *MI);
+ DEBUG(cerr << "to block " << *SuccToSinkTo);
+
+ // If the block has multiple predecessors, this would introduce computation on
+ // a path that it doesn't already exist. We could split the critical edge,
+ // but for now we just punt.
+ // FIXME: Split critical edges if not backedges.
+ if (SuccToSinkTo->pred_size() > 1) {
+ DEBUG(cerr << " *** PUNTING: Critical edge found\n");
+ return false;
+ }
+
+ // Determine where to insert into. Skip phi nodes.
+ MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
+ while (InsertPos != SuccToSinkTo->end() &&
+ InsertPos->getOpcode() == TargetInstrInfo::PHI)
+ ++InsertPos;
+
+ // Move the instruction.
+ SuccToSinkTo->splice(InsertPos, ParentBlock, MI,
+ ++MachineBasicBlock::iterator(MI));
+ return true;
+}
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
new file mode 100644
index 0000000..be1396c
--- /dev/null
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -0,0 +1,690 @@
+//===-- MachineVerifier.cpp - Machine Code Verifier -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass to verify generated machine code. The following is checked:
+//
+// Operand counts: All explicit operands must be present.
+//
+// Register classes: All physical and virtual register operands must be
+// compatible with the register class required by the instruction descriptor.
+//
+// Register live intervals: Registers must be defined only once, and must be
+// defined before use.
+//
+// The machine code verifier is enabled from LLVMTargetMachine.cpp with the
+// command-line option -verify-machineinstrs, or by defining the environment
+// variable LLVM_VERIFY_MACHINEINSTRS to the name of a file that will receive
+// the verifier errors.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include <fstream>
+
+using namespace llvm;
+
+namespace {
+ struct VISIBILITY_HIDDEN MachineVerifier : public MachineFunctionPass {
+ static char ID; // Pass ID, replacement for typeid
+
+ MachineVerifier(bool allowDoubleDefs = false) :
+ MachineFunctionPass(&ID),
+ allowVirtDoubleDefs(allowDoubleDefs),
+ allowPhysDoubleDefs(allowDoubleDefs),
+ OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS"))
+ {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ const bool allowVirtDoubleDefs;
+ const bool allowPhysDoubleDefs;
+
+ const char *const OutFileName;
+ std::ostream *OS;
+ const MachineFunction *MF;
+ const TargetMachine *TM;
+ const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+
+ unsigned foundErrors;
+
+ typedef SmallVector<unsigned, 16> RegVector;
+ typedef DenseSet<unsigned> RegSet;
+ typedef DenseMap<unsigned, const MachineInstr*> RegMap;
+
+ BitVector regsReserved;
+ RegSet regsLive;
+ RegVector regsDefined, regsImpDefined, regsDead, regsKilled;
+
+ // Add Reg and any sub-registers to RV
+ void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
+ RV.push_back(Reg);
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++)
+ RV.push_back(*R);
+ }
+
+ // Does RS contain any super-registers of Reg?
+ bool anySuperRegisters(const RegSet &RS, unsigned Reg) {
+ for (const unsigned *R = TRI->getSuperRegisters(Reg); *R; R++)
+ if (RS.count(*R))
+ return true;
+ return false;
+ }
+
+ struct BBInfo {
+ // Is this MBB reachable from the MF entry point?
+ bool reachable;
+
+ // Vregs that must be live in because they are used without being
+ // defined. Map value is the user.
+ RegMap vregsLiveIn;
+
+ // Vregs that must be dead in because they are defined without being
+ // killed first. Map value is the defining instruction.
+ RegMap vregsDeadIn;
+
+ // Regs killed in MBB. They may be defined again, and will then be in both
+ // regsKilled and regsLiveOut.
+ RegSet regsKilled;
+
+ // Regs defined in MBB and live out. Note that vregs passing through may
+ // be live out without being mentioned here.
+ RegSet regsLiveOut;
+
+ // Vregs that pass through MBB untouched. This set is disjoint from
+ // regsKilled and regsLiveOut.
+ RegSet vregsPassed;
+
+ BBInfo() : reachable(false) {}
+
+ // Add register to vregsPassed if it belongs there. Return true if
+ // anything changed.
+ bool addPassed(unsigned Reg) {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return false;
+ if (regsKilled.count(Reg) || regsLiveOut.count(Reg))
+ return false;
+ return vregsPassed.insert(Reg).second;
+ }
+
+ // Same for a full set.
+ bool addPassed(const RegSet &RS) {
+ bool changed = false;
+ for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I)
+ if (addPassed(*I))
+ changed = true;
+ return changed;
+ }
+
+ // Live-out registers are either in regsLiveOut or vregsPassed.
+ bool isLiveOut(unsigned Reg) const {
+ return regsLiveOut.count(Reg) || vregsPassed.count(Reg);
+ }
+ };
+
+ // Extra register info per MBB.
+ DenseMap<const MachineBasicBlock*, BBInfo> MBBInfoMap;
+
+ bool isReserved(unsigned Reg) {
+ return Reg < regsReserved.size() && regsReserved[Reg];
+ }
+
+ void visitMachineFunctionBefore();
+ void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
+ void visitMachineInstrBefore(const MachineInstr *MI);
+ void visitMachineOperand(const MachineOperand *MO, unsigned MONum);
+ void visitMachineInstrAfter(const MachineInstr *MI);
+ void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB);
+ void visitMachineFunctionAfter();
+
+ void report(const char *msg, const MachineFunction *MF);
+ void report(const char *msg, const MachineBasicBlock *MBB);
+ void report(const char *msg, const MachineInstr *MI);
+ void report(const char *msg, const MachineOperand *MO, unsigned MONum);
+
+ void markReachable(const MachineBasicBlock *MBB);
+ void calcMaxRegsPassed();
+ void calcMinRegsPassed();
+ void checkPHIOps(const MachineBasicBlock *MBB);
+ };
+}
+
+char MachineVerifier::ID = 0;
+static RegisterPass<MachineVerifier>
+MachineVer("machineverifier", "Verify generated machine code");
+static const PassInfo *const MachineVerifyID = &MachineVer;
+
+FunctionPass *
+llvm::createMachineVerifierPass(bool allowPhysDoubleDefs)
+{
+ return new MachineVerifier(allowPhysDoubleDefs);
+}
+
+bool
+MachineVerifier::runOnMachineFunction(MachineFunction &MF)
+{
+ std::ofstream OutFile;
+ if (OutFileName) {
+ OutFile.open(OutFileName, std::ios::out | std::ios::app);
+ OS = &OutFile;
+ } else {
+ OS = cerr.stream();
+ }
+
+ foundErrors = 0;
+
+ this->MF = &MF;
+ TM = &MF.getTarget();
+ TRI = TM->getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ visitMachineFunctionBefore();
+ for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
+ MFI!=MFE; ++MFI) {
+ visitMachineBasicBlockBefore(MFI);
+ for (MachineBasicBlock::const_iterator MBBI = MFI->begin(),
+ MBBE = MFI->end(); MBBI != MBBE; ++MBBI) {
+ visitMachineInstrBefore(MBBI);
+ for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
+ visitMachineOperand(&MBBI->getOperand(I), I);
+ visitMachineInstrAfter(MBBI);
+ }
+ visitMachineBasicBlockAfter(MFI);
+ }
+ visitMachineFunctionAfter();
+
+ if (OutFileName)
+ OutFile.close();
+
+ if (foundErrors) {
+ cerr << "\nStopping with " << foundErrors << " machine code errors.\n";
+ exit(1);
+ }
+
+ return false; // no changes
+}
+
+void
+MachineVerifier::report(const char *msg, const MachineFunction *MF)
+{
+ assert(MF);
+ *OS << "\n";
+ if (!foundErrors++)
+ MF->print(OS);
+ *OS << "*** Bad machine code: " << msg << " ***\n"
+ << "- function: " << MF->getFunction()->getName() << "\n";
+}
+
+void
+MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB)
+{
+ assert(MBB);
+ report(msg, MBB->getParent());
+ *OS << "- basic block: " << MBB->getBasicBlock()->getName()
+ << " " << (void*)MBB
+ << " (#" << MBB->getNumber() << ")\n";
+}
+
+void
+MachineVerifier::report(const char *msg, const MachineInstr *MI)
+{
+ assert(MI);
+ report(msg, MI->getParent());
+ *OS << "- instruction: ";
+ MI->print(OS, TM);
+}
+
+void
+MachineVerifier::report(const char *msg,
+ const MachineOperand *MO, unsigned MONum)
+{
+ assert(MO);
+ report(msg, MO->getParent());
+ *OS << "- operand " << MONum << ": ";
+ MO->print(*OS, TM);
+ *OS << "\n";
+}
+
+void
+MachineVerifier::markReachable(const MachineBasicBlock *MBB)
+{
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ if (!MInfo.reachable) {
+ MInfo.reachable = true;
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
+ SuE = MBB->succ_end(); SuI != SuE; ++SuI)
+ markReachable(*SuI);
+ }
+}
+
+void
+MachineVerifier::visitMachineFunctionBefore()
+{
+ regsReserved = TRI->getReservedRegs(*MF);
+ markReachable(&MF->front());
+}
+
+void
+MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB)
+{
+ regsLive.clear();
+ for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I) {
+ if (!TargetRegisterInfo::isPhysicalRegister(*I)) {
+ report("MBB live-in list contains non-physical register", MBB);
+ continue;
+ }
+ regsLive.insert(*I);
+ for (const unsigned *R = TRI->getSubRegisters(*I); *R; R++)
+ regsLive.insert(*R);
+ }
+ regsKilled.clear();
+ regsDefined.clear();
+ regsImpDefined.clear();
+}
+
+void
+MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI)
+{
+ const TargetInstrDesc &TI = MI->getDesc();
+ if (MI->getNumExplicitOperands() < TI.getNumOperands()) {
+ report("Too few operands", MI);
+ *OS << TI.getNumOperands() << " operands expected, but "
+ << MI->getNumExplicitOperands() << " given.\n";
+ }
+ if (!TI.isVariadic()) {
+ if (MI->getNumExplicitOperands() > TI.getNumOperands()) {
+ report("Too many operands", MI);
+ *OS << TI.getNumOperands() << " operands expected, but "
+ << MI->getNumExplicitOperands() << " given.\n";
+ }
+ }
+}
+
+void
+MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum)
+{
+ const MachineInstr *MI = MO->getParent();
+ const TargetInstrDesc &TI = MI->getDesc();
+
+ // The first TI.NumDefs operands must be explicit register defines
+ if (MONum < TI.getNumDefs()) {
+ if (!MO->isReg())
+ report("Explicit definition must be a register", MO, MONum);
+ else if (!MO->isDef())
+ report("Explicit definition marked as use", MO, MONum);
+ else if (MO->isImplicit())
+ report("Explicit definition marked as implicit", MO, MONum);
+ }
+
+ switch (MO->getType()) {
+ case MachineOperand::MO_Register: {
+ const unsigned Reg = MO->getReg();
+ if (!Reg)
+ return;
+
+ // Check Live Variables.
+ if (MO->isUse()) {
+ if (MO->isKill()) {
+ addRegWithSubRegs(regsKilled, Reg);
+ } else {
+ // TwoAddress instr modyfying a reg is treated as kill+def.
+ unsigned defIdx;
+ if (MI->isRegTiedToDefOperand(MONum, &defIdx) &&
+ MI->getOperand(defIdx).getReg() == Reg)
+ addRegWithSubRegs(regsKilled, Reg);
+ }
+ // Explicit use of a dead register.
+ if (!MO->isImplicit() && !regsLive.count(Reg)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // Reserved registers may be used even when 'dead'.
+ if (!isReserved(Reg))
+ report("Using an undefined physical register", MO, MONum);
+ } else {
+ BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+ // We don't know which virtual registers are live in, so only complain
+ // if vreg was killed in this MBB. Otherwise keep track of vregs that
+ // must be live in. PHI instructions are handled separately.
+ if (MInfo.regsKilled.count(Reg))
+ report("Using a killed virtual register", MO, MONum);
+ else if (MI->getOpcode() != TargetInstrInfo::PHI)
+ MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI));
+ }
+ }
+ } else {
+ // Register defined.
+ // TODO: verify that earlyclobber ops are not used.
+ if (MO->isImplicit())
+ addRegWithSubRegs(regsImpDefined, Reg);
+ else
+ addRegWithSubRegs(regsDefined, Reg);
+
+ if (MO->isDead())
+ addRegWithSubRegs(regsDead, Reg);
+ }
+
+ // Check register classes.
+ if (MONum < TI.getNumOperands() && !MO->isImplicit()) {
+ const TargetOperandInfo &TOI = TI.OpInfo[MONum];
+ unsigned SubIdx = MO->getSubReg();
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ unsigned sr = Reg;
+ if (SubIdx) {
+ unsigned s = TRI->getSubReg(Reg, SubIdx);
+ if (!s) {
+ report("Invalid subregister index for physical register",
+ MO, MONum);
+ return;
+ }
+ sr = s;
+ }
+ if (TOI.RegClass) {
+ const TargetRegisterClass *DRC = TRI->getRegClass(TOI.RegClass);
+ if (!DRC->contains(sr)) {
+ report("Illegal physical register for instruction", MO, MONum);
+ *OS << TRI->getName(sr) << " is not a "
+ << DRC->getName() << " register.\n";
+ }
+ }
+ } else {
+ // Virtual register.
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ if (SubIdx) {
+ if (RC->subregclasses_begin()+SubIdx >= RC->subregclasses_end()) {
+ report("Invalid subregister index for virtual register", MO, MONum);
+ return;
+ }
+ RC = *(RC->subregclasses_begin()+SubIdx);
+ }
+ if (TOI.RegClass) {
+ const TargetRegisterClass *DRC = TRI->getRegClass(TOI.RegClass);
+ if (RC != DRC && !RC->hasSuperClass(DRC)) {
+ report("Illegal virtual register for instruction", MO, MONum);
+ *OS << "Expected a " << DRC->getName() << " register, but got a "
+ << RC->getName() << " register\n";
+ }
+ }
+ }
+ }
+ break;
+ }
+ // Can PHI instrs refer to MBBs not in the CFG? X86 and ARM do.
+ // case MachineOperand::MO_MachineBasicBlock:
+ // if (MI->getOpcode() == TargetInstrInfo::PHI) {
+ // if (!MO->getMBB()->isSuccessor(MI->getParent()))
+ // report("PHI operand is not in the CFG", MO, MONum);
+ // }
+ // break;
+ default:
+ break;
+ }
+}
+
+void
+MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI)
+{
+ BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+ set_union(MInfo.regsKilled, regsKilled);
+ set_subtract(regsLive, regsKilled);
+ regsKilled.clear();
+
+ for (RegVector::const_iterator I = regsDefined.begin(),
+ E = regsDefined.end(); I != E; ++I) {
+ if (regsLive.count(*I)) {
+ if (TargetRegisterInfo::isPhysicalRegister(*I)) {
+ // We allow double defines to physical registers with live
+ // super-registers.
+ if (!allowPhysDoubleDefs && !isReserved(*I) &&
+ !anySuperRegisters(regsLive, *I)) {
+ report("Redefining a live physical register", MI);
+ *OS << "Register " << TRI->getName(*I)
+ << " was defined but already live.\n";
+ }
+ } else {
+ if (!allowVirtDoubleDefs) {
+ report("Redefining a live virtual register", MI);
+ *OS << "Virtual register %reg" << *I
+ << " was defined but already live.\n";
+ }
+ }
+ } else if (TargetRegisterInfo::isVirtualRegister(*I) &&
+ !MInfo.regsKilled.count(*I)) {
+ // Virtual register defined without being killed first must be dead on
+ // entry.
+ MInfo.vregsDeadIn.insert(std::make_pair(*I, MI));
+ }
+ }
+
+ set_union(regsLive, regsDefined); regsDefined.clear();
+ set_union(regsLive, regsImpDefined); regsImpDefined.clear();
+ set_subtract(regsLive, regsDead); regsDead.clear();
+}
+
+void
+MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB)
+{
+ MBBInfoMap[MBB].regsLiveOut = regsLive;
+ regsLive.clear();
+}
+
+// Calculate the largest possible vregsPassed sets. These are the registers that
+// can pass through an MBB live, but may not be live every time. It is assumed
+// that all vregsPassed sets are empty before the call.
+void
+MachineVerifier::calcMaxRegsPassed()
+{
+ // First push live-out regs to successors' vregsPassed. Remember the MBBs that
+ // have any vregsPassed.
+ DenseSet<const MachineBasicBlock*> todo;
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ const MachineBasicBlock &MBB(*MFI);
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+ if (!MInfo.reachable)
+ continue;
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB.succ_begin(),
+ SuE = MBB.succ_end(); SuI != SuE; ++SuI) {
+ BBInfo &SInfo = MBBInfoMap[*SuI];
+ if (SInfo.addPassed(MInfo.regsLiveOut))
+ todo.insert(*SuI);
+ }
+ }
+
+ // Iteratively push vregsPassed to successors. This will converge to the same
+ // final state regardless of DenseSet iteration order.
+ while (!todo.empty()) {
+ const MachineBasicBlock *MBB = *todo.begin();
+ todo.erase(MBB);
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
+ SuE = MBB->succ_end(); SuI != SuE; ++SuI) {
+ if (*SuI == MBB)
+ continue;
+ BBInfo &SInfo = MBBInfoMap[*SuI];
+ if (SInfo.addPassed(MInfo.vregsPassed))
+ todo.insert(*SuI);
+ }
+ }
+}
+
+// Calculate the minimum vregsPassed set. These are the registers that always
+// pass live through an MBB. The calculation assumes that calcMaxRegsPassed has
+// been called earlier.
+void
+MachineVerifier::calcMinRegsPassed()
+{
+ DenseSet<const MachineBasicBlock*> todo;
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI)
+ todo.insert(MFI);
+
+ while (!todo.empty()) {
+ const MachineBasicBlock *MBB = *todo.begin();
+ todo.erase(MBB);
+ BBInfo &MInfo = MBBInfoMap[MBB];
+
+ // Remove entries from vRegsPassed that are not live out from all
+ // reachable predecessors.
+ RegSet dead;
+ for (RegSet::iterator I = MInfo.vregsPassed.begin(),
+ E = MInfo.vregsPassed.end(); I != E; ++I) {
+ for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
+ PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
+ BBInfo &PrInfo = MBBInfoMap[*PrI];
+ if (PrInfo.reachable && !PrInfo.isLiveOut(*I)) {
+ dead.insert(*I);
+ break;
+ }
+ }
+ }
+ // If any regs removed, we need to recheck successors.
+ if (!dead.empty()) {
+ set_subtract(MInfo.vregsPassed, dead);
+ todo.insert(MBB->succ_begin(), MBB->succ_end());
+ }
+ }
+}
+
+// Check PHI instructions at the beginning of MBB. It is assumed that
+// calcMinRegsPassed has been run so BBInfo::isLiveOut is valid.
+void
+MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB)
+{
+ for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end();
+ BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) {
+ DenseSet<const MachineBasicBlock*> seen;
+
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
+ unsigned Reg = BBI->getOperand(i).getReg();
+ const MachineBasicBlock *Pre = BBI->getOperand(i + 1).getMBB();
+ if (!Pre->isSuccessor(MBB))
+ continue;
+ seen.insert(Pre);
+ BBInfo &PrInfo = MBBInfoMap[Pre];
+ if (PrInfo.reachable && !PrInfo.isLiveOut(Reg))
+ report("PHI operand is not live-out from predecessor",
+ &BBI->getOperand(i), i);
+ }
+
+ // Did we see all predecessors?
+ for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
+ PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
+ if (!seen.count(*PrI)) {
+ report("Missing PHI operand", BBI);
+ *OS << "MBB #" << (*PrI)->getNumber()
+ << " is a predecessor according to the CFG.\n";
+ }
+ }
+ }
+}
+
+void
+MachineVerifier::visitMachineFunctionAfter()
+{
+ calcMaxRegsPassed();
+
+ // With the maximal set of vregsPassed we can verify dead-in registers.
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ BBInfo &MInfo = MBBInfoMap[MFI];
+
+ // Skip unreachable MBBs.
+ if (!MInfo.reachable)
+ continue;
+
+ for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(),
+ PrE = MFI->pred_end(); PrI != PrE; ++PrI) {
+ BBInfo &PrInfo = MBBInfoMap[*PrI];
+ if (!PrInfo.reachable)
+ continue;
+
+ // Verify physical live-ins. EH landing pads have magic live-ins so we
+ // ignore them.
+ if (!MFI->isLandingPad()) {
+ for (MachineBasicBlock::const_livein_iterator I = MFI->livein_begin(),
+ E = MFI->livein_end(); I != E; ++I) {
+ if (TargetRegisterInfo::isPhysicalRegister(*I) &&
+ !isReserved (*I) && !PrInfo.isLiveOut(*I)) {
+ report("Live-in physical register is not live-out from predecessor",
+ MFI);
+ *OS << "Register " << TRI->getName(*I)
+ << " is not live-out from MBB #" << (*PrI)->getNumber()
+ << ".\n";
+ }
+ }
+ }
+
+
+ // Verify dead-in virtual registers.
+ if (!allowVirtDoubleDefs) {
+ for (RegMap::iterator I = MInfo.vregsDeadIn.begin(),
+ E = MInfo.vregsDeadIn.end(); I != E; ++I) {
+ // DeadIn register must be in neither regsLiveOut or vregsPassed of
+ // any predecessor.
+ if (PrInfo.isLiveOut(I->first)) {
+ report("Live-in virtual register redefined", I->second);
+ *OS << "Register %reg" << I->first
+ << " was live-out from predecessor MBB #"
+ << (*PrI)->getNumber() << ".\n";
+ }
+ }
+ }
+ }
+ }
+
+ calcMinRegsPassed();
+
+ // With the minimal set of vregsPassed we can verify live-in virtual
+ // registers, including PHI instructions.
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ BBInfo &MInfo = MBBInfoMap[MFI];
+
+ // Skip unreachable MBBs.
+ if (!MInfo.reachable)
+ continue;
+
+ checkPHIOps(MFI);
+
+ for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(),
+ PrE = MFI->pred_end(); PrI != PrE; ++PrI) {
+ BBInfo &PrInfo = MBBInfoMap[*PrI];
+ if (!PrInfo.reachable)
+ continue;
+
+ for (RegMap::iterator I = MInfo.vregsLiveIn.begin(),
+ E = MInfo.vregsLiveIn.end(); I != E; ++I) {
+ if (!PrInfo.isLiveOut(I->first)) {
+ report("Used virtual register is not live-in", I->second);
+ *OS << "Register %reg" << I->first
+ << " is not live-out from predecessor MBB #"
+ << (*PrI)->getNumber()
+ << ".\n";
+ }
+ }
+ }
+ }
+}
diff --git a/lib/CodeGen/Makefile b/lib/CodeGen/Makefile
new file mode 100644
index 0000000..4ab3e3c
--- /dev/null
+++ b/lib/CodeGen/Makefile
@@ -0,0 +1,22 @@
+##===- lib/CodeGen/Makefile --------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMCodeGen
+PARALLEL_DIRS = SelectionDAG AsmPrinter
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
+# Xcode prior to 2.4 generates an error in -pedantic mode with use of HUGE_VAL
+# in this directory. Disable -pedantic for this broken compiler.
+ifneq ($(HUGE_VAL_SANITY),yes)
+CompileCommonOpts := $(filter-out -pedantic, $(CompileCommonOpts))
+endif
+
diff --git a/lib/CodeGen/OcamlGC.cpp b/lib/CodeGen/OcamlGC.cpp
new file mode 100644
index 0000000..f7bc9f3
--- /dev/null
+++ b/lib/CodeGen/OcamlGC.cpp
@@ -0,0 +1,38 @@
+//===-- OcamlGC.cpp - Ocaml frametable GC strategy ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering for the llvm.gc* intrinsics compatible with
+// Objective Caml 3.10.0, which uses a liveness-accurate static stack map.
+//
+// The frametable emitter is in OcamlGCPrinter.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/Support/Compiler.h"
+
+using namespace llvm;
+
+namespace {
+ class VISIBILITY_HIDDEN OcamlGC : public GCStrategy {
+ public:
+ OcamlGC();
+ };
+}
+
+static GCRegistry::Add<OcamlGC>
+X("ocaml", "ocaml 3.10-compatible GC");
+
+void llvm::linkOcamlGC() { }
+
+OcamlGC::OcamlGC() {
+ NeededSafePoints = 1 << GC::PostCall;
+ UsesMetadata = true;
+}
diff --git a/lib/CodeGen/PBQP.cpp b/lib/CodeGen/PBQP.cpp
new file mode 100644
index 0000000..562300f
--- /dev/null
+++ b/lib/CodeGen/PBQP.cpp
@@ -0,0 +1,1395 @@
+//===---------------- PBQP.cpp --------- PBQP Solver ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Developed by: Bernhard Scholz
+// The University of Sydney
+// http://www.it.usyd.edu.au/~scholz
+//===----------------------------------------------------------------------===//
+
+#include "PBQP.h"
+#include "llvm/Config/alloca.h"
+#include <limits>
+#include <cassert>
+#include <cstring>
+
+namespace llvm {
+
+/**************************************************************************
+ * Data Structures
+ **************************************************************************/
+
+/* edge of PBQP graph */
+typedef struct adjnode {
+ struct adjnode *prev, /* doubly chained list */
+ *succ,
+ *reverse; /* reverse edge */
+ int adj; /* adj. node */
+ PBQPMatrix *costs; /* cost matrix of edge */
+
+ bool tc_valid; /* flag whether following fields are valid */
+ int *tc_safe_regs; /* safe registers */
+ int tc_impact; /* impact */
+} adjnode;
+
+/* bucket node */
+typedef struct bucketnode {
+ struct bucketnode *prev; /* doubly chained list */
+ struct bucketnode *succ;
+ int u; /* node */
+} bucketnode;
+
+/* data structure of partitioned boolean quadratic problem */
+struct pbqp {
+ int num_nodes; /* number of nodes */
+ int max_deg; /* maximal degree of a node */
+ bool solved; /* flag that indicates whether PBQP has been solved yet */
+ bool optimal; /* flag that indicates whether PBQP is optimal */
+ PBQPNum min;
+ bool changed; /* flag whether graph has changed in simplification */
+
+ /* node fields */
+ PBQPVector **node_costs; /* cost vectors of nodes */
+ int *node_deg; /* node degree of nodes */
+ int *solution; /* solution for node */
+ adjnode **adj_list; /* adj. list */
+ bucketnode **bucket_ptr; /* bucket pointer of a node */
+
+ /* node stack */
+ int *stack; /* stack of nodes */
+ int stack_ptr; /* stack pointer */
+
+ /* bucket fields */
+ bucketnode **bucket_list; /* bucket list */
+
+ int num_r0; /* counters for number statistics */
+ int num_ri;
+ int num_rii;
+ int num_rn;
+ int num_rn_special;
+};
+
+bool isInf(PBQPNum n) { return n == std::numeric_limits<PBQPNum>::infinity(); }
+
+/*****************************************************************************
+ * allocation/de-allocation of pbqp problem
+ ****************************************************************************/
+
+/* allocate new partitioned boolean quadratic program problem */
+pbqp *alloc_pbqp(int num_nodes)
+{
+ pbqp *this_;
+ int u;
+
+ assert(num_nodes > 0);
+
+ /* allocate memory for pbqp data structure */
+ this_ = (pbqp *)malloc(sizeof(pbqp));
+
+ /* Initialize pbqp fields */
+ this_->num_nodes = num_nodes;
+ this_->solved = false;
+ this_->optimal = true;
+ this_->min = 0.0;
+ this_->max_deg = 0;
+ this_->changed = false;
+ this_->num_r0 = 0;
+ this_->num_ri = 0;
+ this_->num_rii = 0;
+ this_->num_rn = 0;
+ this_->num_rn_special = 0;
+
+ /* initialize/allocate stack fields of pbqp */
+ this_->stack = (int *) malloc(sizeof(int)*num_nodes);
+ this_->stack_ptr = 0;
+
+ /* initialize/allocate node fields of pbqp */
+ this_->adj_list = (adjnode **) malloc(sizeof(adjnode *)*num_nodes);
+ this_->node_deg = (int *) malloc(sizeof(int)*num_nodes);
+ this_->solution = (int *) malloc(sizeof(int)*num_nodes);
+ this_->bucket_ptr = (bucketnode **) malloc(sizeof(bucketnode **)*num_nodes);
+ this_->node_costs = (PBQPVector**) malloc(sizeof(PBQPVector*) * num_nodes);
+ for(u=0;u<num_nodes;u++) {
+ this_->solution[u]=-1;
+ this_->adj_list[u]=NULL;
+ this_->node_deg[u]=0;
+ this_->bucket_ptr[u]=NULL;
+ this_->node_costs[u]=NULL;
+ }
+
+ /* initialize bucket list */
+ this_->bucket_list = NULL;
+
+ return this_;
+}
+
+/* free pbqp problem */
+void free_pbqp(pbqp *this_)
+{
+ int u;
+ int deg;
+ adjnode *adj_ptr,*adj_next;
+ bucketnode *bucket,*bucket_next;
+
+ assert(this_ != NULL);
+
+ /* free node cost fields */
+ for(u=0;u < this_->num_nodes;u++) {
+ delete this_->node_costs[u];
+ }
+ free(this_->node_costs);
+
+ /* free bucket list */
+ for(deg=0;deg<=this_->max_deg;deg++) {
+ for(bucket=this_->bucket_list[deg];bucket!=NULL;bucket=bucket_next) {
+ this_->bucket_ptr[bucket->u] = NULL;
+ bucket_next = bucket-> succ;
+ free(bucket);
+ }
+ }
+ free(this_->bucket_list);
+
+ /* free adj. list */
+ assert(this_->adj_list != NULL);
+ for(u=0;u < this_->num_nodes; u++) {
+ for(adj_ptr = this_->adj_list[u]; adj_ptr != NULL; adj_ptr = adj_next) {
+ adj_next = adj_ptr -> succ;
+ if (u < adj_ptr->adj) {
+ assert(adj_ptr != NULL);
+ delete adj_ptr->costs;
+ }
+ if (adj_ptr -> tc_safe_regs != NULL) {
+ free(adj_ptr -> tc_safe_regs);
+ }
+ free(adj_ptr);
+ }
+ }
+ free(this_->adj_list);
+
+ /* free other node fields */
+ free(this_->node_deg);
+ free(this_->solution);
+ free(this_->bucket_ptr);
+
+ /* free stack */
+ free(this_->stack);
+
+ /* free pbqp data structure itself */
+ free(this_);
+}
+
+
+/****************************************************************************
+ * adj. node routines
+ ****************************************************************************/
+
+/* find data structure of adj. node of a given node */
+static
+adjnode *find_adjnode(pbqp *this_,int u,int v)
+{
+ adjnode *adj_ptr;
+
+ assert (this_ != NULL);
+ assert (u >= 0 && u < this_->num_nodes);
+ assert (v >= 0 && v < this_->num_nodes);
+ assert(this_->adj_list != NULL);
+
+ for(adj_ptr = this_ -> adj_list[u];adj_ptr != NULL; adj_ptr = adj_ptr -> succ) {
+ if (adj_ptr->adj == v) {
+ return adj_ptr;
+ }
+ }
+ return NULL;
+}
+
+/* allocate a new data structure for adj. node */
+static
+adjnode *alloc_adjnode(pbqp *this_,int u, PBQPMatrix *costs)
+{
+ adjnode *p;
+
+ assert(this_ != NULL);
+ assert(costs != NULL);
+ assert(u >= 0 && u < this_->num_nodes);
+
+ p = (adjnode *)malloc(sizeof(adjnode));
+ assert(p != NULL);
+
+ p->adj = u;
+ p->costs = costs;
+
+ p->tc_valid= false;
+ p->tc_safe_regs = NULL;
+ p->tc_impact = 0;
+
+ return p;
+}
+
+/* insert adjacence node to adj. list */
+static
+void insert_adjnode(pbqp *this_, int u, adjnode *adj_ptr)
+{
+
+ assert(this_ != NULL);
+ assert(adj_ptr != NULL);
+ assert(u >= 0 && u < this_->num_nodes);
+
+ /* if adjacency list of node is not empty -> update
+ first node of the list */
+ if (this_ -> adj_list[u] != NULL) {
+ assert(this_->adj_list[u]->prev == NULL);
+ this_->adj_list[u] -> prev = adj_ptr;
+ }
+
+ /* update doubly chained list pointers of pointers */
+ adj_ptr -> succ = this_->adj_list[u];
+ adj_ptr -> prev = NULL;
+
+ /* update adjacency list pointer of node u */
+ this_->adj_list[u] = adj_ptr;
+}
+
+/* remove entry in an adj. list */
+static
+void remove_adjnode(pbqp *this_, int u, adjnode *adj_ptr)
+{
+ assert(this_!= NULL);
+ assert(u >= 0 && u <= this_->num_nodes);
+ assert(this_->adj_list != NULL);
+ assert(adj_ptr != NULL);
+
+ if (adj_ptr -> prev == NULL) {
+ this_->adj_list[u] = adj_ptr -> succ;
+ } else {
+ adj_ptr -> prev -> succ = adj_ptr -> succ;
+ }
+
+ if (adj_ptr -> succ != NULL) {
+ adj_ptr -> succ -> prev = adj_ptr -> prev;
+ }
+
+ if(adj_ptr->reverse != NULL) {
+ adjnode *rev = adj_ptr->reverse;
+ rev->reverse = NULL;
+ }
+
+ if (adj_ptr -> tc_safe_regs != NULL) {
+ free(adj_ptr -> tc_safe_regs);
+ }
+
+ free(adj_ptr);
+}
+
+/*****************************************************************************
+ * node functions
+ ****************************************************************************/
+
+/* get degree of a node */
+static
+int get_deg(pbqp *this_,int u)
+{
+ adjnode *adj_ptr;
+ int deg = 0;
+
+ assert(this_ != NULL);
+ assert(u >= 0 && u < this_->num_nodes);
+ assert(this_->adj_list != NULL);
+
+ for(adj_ptr = this_ -> adj_list[u];adj_ptr != NULL; adj_ptr = adj_ptr -> succ) {
+ deg ++;
+ }
+ return deg;
+}
+
+/* reinsert node */
+static
+void reinsert_node(pbqp *this_,int u)
+{
+ adjnode *adj_u,
+ *adj_v;
+
+ assert(this_!= NULL);
+ assert(u >= 0 && u <= this_->num_nodes);
+ assert(this_->adj_list != NULL);
+
+ for(adj_u = this_ -> adj_list[u]; adj_u != NULL; adj_u = adj_u -> succ) {
+ int v = adj_u -> adj;
+ adj_v = alloc_adjnode(this_,u,adj_u->costs);
+ insert_adjnode(this_,v,adj_v);
+ }
+}
+
+/* remove node */
+static
+void remove_node(pbqp *this_,int u)
+{
+ adjnode *adj_ptr;
+
+ assert(this_!= NULL);
+ assert(u >= 0 && u <= this_->num_nodes);
+ assert(this_->adj_list != NULL);
+
+ for(adj_ptr = this_ -> adj_list[u]; adj_ptr != NULL; adj_ptr = adj_ptr -> succ) {
+ remove_adjnode(this_,adj_ptr->adj,adj_ptr -> reverse);
+ }
+}
+
+/*****************************************************************************
+ * edge functions
+ ****************************************************************************/
+
+/* insert edge to graph */
+/* (does not check whether edge exists in graph */
+static
+void insert_edge(pbqp *this_, int u, int v, PBQPMatrix *costs)
+{
+ adjnode *adj_u,
+ *adj_v;
+
+ /* create adjanceny entry for u */
+ adj_u = alloc_adjnode(this_,v,costs);
+ insert_adjnode(this_,u,adj_u);
+
+
+ /* create adjanceny entry for v */
+ adj_v = alloc_adjnode(this_,u,costs);
+ insert_adjnode(this_,v,adj_v);
+
+ /* create link for reverse edge */
+ adj_u -> reverse = adj_v;
+ adj_v -> reverse = adj_u;
+}
+
+/* delete edge */
+static
+void delete_edge(pbqp *this_,int u,int v)
+{
+ adjnode *adj_ptr;
+ adjnode *rev;
+
+ assert(this_ != NULL);
+ assert( u >= 0 && u < this_->num_nodes);
+ assert( v >= 0 && v < this_->num_nodes);
+
+ adj_ptr=find_adjnode(this_,u,v);
+ assert(adj_ptr != NULL);
+ assert(adj_ptr->reverse != NULL);
+
+ delete adj_ptr -> costs;
+
+ rev = adj_ptr->reverse;
+ remove_adjnode(this_,u,adj_ptr);
+ remove_adjnode(this_,v,rev);
+}
+
+/*****************************************************************************
+ * cost functions
+ ****************************************************************************/
+
+/* Note: Since cost(u,v) = transpose(cost(v,u)), it would be necessary to store
+ two matrices for both edges (u,v) and (v,u). However, we only store the
+ matrix for the case u < v. For the other case we transpose the stored matrix
+ if required.
+*/
+
+/* add costs to cost vector of a node */
+void add_pbqp_nodecosts(pbqp *this_,int u, PBQPVector *costs)
+{
+ assert(this_ != NULL);
+ assert(costs != NULL);
+ assert(u >= 0 && u <= this_->num_nodes);
+
+ if (!this_->node_costs[u]) {
+ this_->node_costs[u] = new PBQPVector(*costs);
+ } else {
+ *this_->node_costs[u] += *costs;
+ }
+}
+
+/* get cost matrix ptr */
+static
+PBQPMatrix *get_costmatrix_ptr(pbqp *this_, int u, int v)
+{
+ adjnode *adj_ptr;
+ PBQPMatrix *m = NULL;
+
+ assert (this_ != NULL);
+ assert (u >= 0 && u < this_->num_nodes);
+ assert (v >= 0 && v < this_->num_nodes);
+
+ adj_ptr = find_adjnode(this_,u,v);
+
+ if (adj_ptr != NULL) {
+ m = adj_ptr -> costs;
+ }
+
+ return m;
+}
+
+/* get cost matrix ptr */
+/* Note: only the pointer is returned for
+ cost(u,v), if u < v.
+*/
+static
+PBQPMatrix *pbqp_get_costmatrix(pbqp *this_, int u, int v)
+{
+ adjnode *adj_ptr = find_adjnode(this_,u,v);
+
+ if (adj_ptr != NULL) {
+ if ( u < v) {
+ return new PBQPMatrix(*adj_ptr->costs);
+ } else {
+ return new PBQPMatrix(adj_ptr->costs->transpose());
+ }
+ } else {
+ return NULL;
+ }
+}
+
+/* add costs to cost matrix of an edge */
+void add_pbqp_edgecosts(pbqp *this_,int u,int v, PBQPMatrix *costs)
+{
+ PBQPMatrix *adj_costs;
+
+ assert(this_!= NULL);
+ assert(costs != NULL);
+ assert(u >= 0 && u <= this_->num_nodes);
+ assert(v >= 0 && v <= this_->num_nodes);
+
+ /* does the edge u-v exists ? */
+ if (u == v) {
+ PBQPVector *diag = new PBQPVector(costs->diagonalize());
+ add_pbqp_nodecosts(this_,v,diag);
+ delete diag;
+ } else if ((adj_costs = get_costmatrix_ptr(this_,u,v))!=NULL) {
+ if ( u < v) {
+ *adj_costs += *costs;
+ } else {
+ *adj_costs += costs->transpose();
+ }
+ } else {
+ adj_costs = new PBQPMatrix((u < v) ? *costs : costs->transpose());
+ insert_edge(this_,u,v,adj_costs);
+ }
+}
+
+/* remove bucket from bucket list */
+static
+void pbqp_remove_bucket(pbqp *this_, bucketnode *bucket)
+{
+ int u = bucket->u;
+
+ assert(this_ != NULL);
+ assert(u >= 0 && u < this_->num_nodes);
+ assert(this_->bucket_list != NULL);
+ assert(this_->bucket_ptr[u] != NULL);
+
+ /* update predecessor node in bucket list
+ (if no preceeding bucket exists, then
+ the bucket_list pointer needs to be
+ updated.)
+ */
+ if (bucket->prev != NULL) {
+ bucket->prev-> succ = bucket->succ;
+ } else {
+ this_->bucket_list[this_->node_deg[u]] = bucket -> succ;
+ }
+
+ /* update successor node in bucket list */
+ if (bucket->succ != NULL) {
+ bucket->succ-> prev = bucket->prev;
+ }
+}
+
+/**********************************************************************************
+ * pop functions
+ **********************************************************************************/
+
+/* pop node of given degree */
+static
+int pop_node(pbqp *this_,int deg)
+{
+ bucketnode *bucket;
+ int u;
+
+ assert(this_ != NULL);
+ assert(deg >= 0 && deg <= this_->max_deg);
+ assert(this_->bucket_list != NULL);
+
+ /* get first bucket of bucket list */
+ bucket = this_->bucket_list[deg];
+ assert(bucket != NULL);
+
+ /* remove bucket */
+ pbqp_remove_bucket(this_,bucket);
+ u = bucket->u;
+ free(bucket);
+ return u;
+}
+
+/**********************************************************************************
+ * reorder functions
+ **********************************************************************************/
+
+/* add bucket to bucketlist */
+static
+void add_to_bucketlist(pbqp *this_,bucketnode *bucket, int deg)
+{
+ bucketnode *old_head;
+
+ assert(bucket != NULL);
+ assert(this_ != NULL);
+ assert(deg >= 0 && deg <= this_->max_deg);
+ assert(this_->bucket_list != NULL);
+
+ /* store node degree (for re-ordering purposes)*/
+ this_->node_deg[bucket->u] = deg;
+
+ /* put bucket to front of doubly chained list */
+ old_head = this_->bucket_list[deg];
+ bucket -> prev = NULL;
+ bucket -> succ = old_head;
+ this_ -> bucket_list[deg] = bucket;
+ if (bucket -> succ != NULL ) {
+ assert ( old_head -> prev == NULL);
+ old_head -> prev = bucket;
+ }
+}
+
+
+/* reorder node in bucket list according to
+ current node degree */
+static
+void reorder_node(pbqp *this_, int u)
+{
+ int deg;
+
+ assert(this_ != NULL);
+ assert(u>= 0 && u < this_->num_nodes);
+ assert(this_->bucket_list != NULL);
+ assert(this_->bucket_ptr[u] != NULL);
+
+ /* get current node degree */
+ deg = get_deg(this_,u);
+
+ /* remove bucket from old bucket list only
+ if degree of node has changed. */
+ if (deg != this_->node_deg[u]) {
+ pbqp_remove_bucket(this_,this_->bucket_ptr[u]);
+ add_to_bucketlist(this_,this_->bucket_ptr[u],deg);
+ }
+}
+
+/* reorder adj. nodes of a node */
+static
+void reorder_adjnodes(pbqp *this_,int u)
+{
+ adjnode *adj_ptr;
+
+ assert(this_!= NULL);
+ assert(u >= 0 && u <= this_->num_nodes);
+ assert(this_->adj_list != NULL);
+
+ for(adj_ptr = this_ -> adj_list[u]; adj_ptr != NULL; adj_ptr = adj_ptr -> succ) {
+ reorder_node(this_,adj_ptr->adj);
+ }
+}
+
+/**********************************************************************************
+ * creation functions
+ **********************************************************************************/
+
+/* create new bucket entry */
+/* consistency of the bucket list is not checked! */
+static
+void create_bucket(pbqp *this_,int u,int deg)
+{
+ bucketnode *bucket;
+
+ assert(this_ != NULL);
+ assert(u >= 0 && u < this_->num_nodes);
+ assert(this_->bucket_list != NULL);
+
+ bucket = (bucketnode *)malloc(sizeof(bucketnode));
+ assert(bucket != NULL);
+
+ bucket -> u = u;
+ this_->bucket_ptr[u] = bucket;
+
+ add_to_bucketlist(this_,bucket,deg);
+}
+
+/* create bucket list */
+static
+void create_bucketlist(pbqp *this_)
+{
+ int u;
+ int max_deg;
+ int deg;
+
+ assert(this_ != NULL);
+ assert(this_->bucket_list == NULL);
+
+ /* determine max. degree of the nodes */
+ max_deg = 2; /* at least of degree two! */
+ for(u=0;u<this_->num_nodes;u++) {
+ deg = this_->node_deg[u] = get_deg(this_,u);
+ if (deg > max_deg) {
+ max_deg = deg;
+ }
+ }
+ this_->max_deg = max_deg;
+
+ /* allocate bucket list */
+ this_ -> bucket_list = (bucketnode **)malloc(sizeof(bucketnode *)*(max_deg + 1));
+ memset(this_->bucket_list,0,sizeof(bucketnode *)*(max_deg + 1));
+ assert(this_->bucket_list != NULL);
+
+ /* insert nodes to the list */
+ for(u=0;u<this_->num_nodes;u++) {
+ create_bucket(this_,u,this_->node_deg[u]);
+ }
+}
+
+/*****************************************************************************
+ * PBQP simplification for trivial nodes
+ ****************************************************************************/
+
+/* remove trivial node with cost vector length of one */
+static
+void disconnect_trivialnode(pbqp *this_,int u)
+{
+ int v;
+ adjnode *adj_ptr,
+ *next;
+ PBQPMatrix *c_uv;
+ PBQPVector *c_v;
+
+ assert(this_ != NULL);
+ assert(this_->node_costs != NULL);
+ assert(u >= 0 && u < this_ -> num_nodes);
+ assert(this_->node_costs[u]->getLength() == 1);
+
+ /* add edge costs to node costs of adj. nodes */
+ for(adj_ptr = this_->adj_list[u]; adj_ptr != NULL; adj_ptr = next){
+ next = adj_ptr -> succ;
+ v = adj_ptr -> adj;
+ assert(v >= 0 && v < this_ -> num_nodes);
+
+ /* convert matrix to cost vector offset for adj. node */
+ c_uv = pbqp_get_costmatrix(this_,u,v);
+ c_v = new PBQPVector(c_uv->getRowAsVector(0));
+ *this_->node_costs[v] += *c_v;
+
+ /* delete edge & free vec/mat */
+ delete c_v;
+ delete c_uv;
+ delete_edge(this_,u,v);
+ }
+}
+
+/* find all trivial nodes and disconnect them */
+static
+void eliminate_trivial_nodes(pbqp *this_)
+{
+ int u;
+
+ assert(this_ != NULL);
+ assert(this_ -> node_costs != NULL);
+
+ for(u=0;u < this_ -> num_nodes; u++) {
+ if (this_->node_costs[u]->getLength() == 1) {
+ disconnect_trivialnode(this_,u);
+ }
+ }
+}
+
+/*****************************************************************************
+ * Normal form for PBQP
+ ****************************************************************************/
+
+/* simplify a cost matrix. If the matrix
+ is independent, then simplify_matrix
+ returns true - otherwise false. In
+ vectors u and v the offset values of
+ the decomposition are stored.
+*/
+
+static
+bool normalize_matrix(PBQPMatrix *m, PBQPVector *u, PBQPVector *v)
+{
+ assert( m != NULL);
+ assert( u != NULL);
+ assert( v != NULL);
+ assert( u->getLength() > 0);
+ assert( v->getLength() > 0);
+
+ assert(m->getRows() == u->getLength());
+ assert(m->getCols() == v->getLength());
+
+ /* determine u vector */
+ for(unsigned r = 0; r < m->getRows(); ++r) {
+ PBQPNum min = m->getRowMin(r);
+ (*u)[r] += min;
+ if (!isInf(min)) {
+ m->subFromRow(r, min);
+ } else {
+ m->setRow(r, 0);
+ }
+ }
+
+ /* determine v vector */
+ for(unsigned c = 0; c < m->getCols(); ++c) {
+ PBQPNum min = m->getColMin(c);
+ (*v)[c] += min;
+ if (!isInf(min)) {
+ m->subFromCol(c, min);
+ } else {
+ m->setCol(c, 0);
+ }
+ }
+
+ /* determine whether matrix is
+ independent or not.
+ */
+ return m->isZero();
+}
+
+/* simplify single edge */
+static
+void simplify_edge(pbqp *this_,int u,int v)
+{
+ PBQPMatrix *costs;
+ bool is_zero;
+
+ assert (this_ != NULL);
+ assert (u >= 0 && u <this_->num_nodes);
+ assert (v >= 0 && v <this_->num_nodes);
+ assert (u != v);
+
+ /* swap u and v if u > v in order to avoid un-necessary
+ tranpositions of the cost matrix */
+
+ if (u > v) {
+ int swap = u;
+ u = v;
+ v = swap;
+ }
+
+ /* get cost matrix and simplify it */
+ costs = get_costmatrix_ptr(this_,u,v);
+ is_zero=normalize_matrix(costs,this_->node_costs[u],this_->node_costs[v]);
+
+ /* delete edge */
+ if(is_zero){
+ delete_edge(this_,u,v);
+ this_->changed = true;
+ }
+}
+
+/* normalize cost matrices and remove
+ edges in PBQP if they ary independent,
+ i.e. can be decomposed into two
+ cost vectors.
+*/
+static
+void eliminate_independent_edges(pbqp *this_)
+{
+ int u,v;
+ adjnode *adj_ptr,*next;
+
+ assert(this_ != NULL);
+ assert(this_ -> adj_list != NULL);
+
+ this_->changed = false;
+ for(u=0;u < this_->num_nodes;u++) {
+ for (adj_ptr = this_ -> adj_list[u]; adj_ptr != NULL; adj_ptr = next) {
+ next = adj_ptr -> succ;
+ v = adj_ptr -> adj;
+ assert(v >= 0 && v < this_->num_nodes);
+ if (u < v) {
+ simplify_edge(this_,u,v);
+ }
+ }
+ }
+}
+
+
+/*****************************************************************************
+ * PBQP reduction rules
+ ****************************************************************************/
+
+/* RI reduction
+ This reduction rule is applied for nodes
+ of degree one. */
+
+static
+void apply_RI(pbqp *this_,int x)
+{
+ int y;
+ unsigned xlen,
+ ylen;
+ PBQPMatrix *c_yx;
+ PBQPVector *c_x, *delta;
+
+ assert(this_ != NULL);
+ assert(x >= 0 && x < this_->num_nodes);
+ assert(this_ -> adj_list[x] != NULL);
+ assert(this_ -> adj_list[x] -> succ == NULL);
+
+ /* get adjacence matrix */
+ y = this_ -> adj_list[x] -> adj;
+ assert(y >= 0 && y < this_->num_nodes);
+
+ /* determine length of cost vectors for node x and y */
+ xlen = this_ -> node_costs[x]->getLength();
+ ylen = this_ -> node_costs[y]->getLength();
+
+ /* get cost vector c_x and matrix c_yx */
+ c_x = this_ -> node_costs[x];
+ c_yx = pbqp_get_costmatrix(this_,y,x);
+ assert (c_yx != NULL);
+
+
+ /* allocate delta vector */
+ delta = new PBQPVector(ylen);
+
+ /* compute delta vector */
+ for(unsigned i = 0; i < ylen; ++i) {
+ PBQPNum min = (*c_yx)[i][0] + (*c_x)[0];
+ for(unsigned j = 1; j < xlen; ++j) {
+ PBQPNum c = (*c_yx)[i][j] + (*c_x)[j];
+ if ( c < min )
+ min = c;
+ }
+ (*delta)[i] = min;
+ }
+
+ /* add delta vector */
+ *this_ -> node_costs[y] += *delta;
+
+ /* delete node x */
+ remove_node(this_,x);
+
+ /* reorder adj. nodes of node x */
+ reorder_adjnodes(this_,x);
+
+ /* push node x on stack */
+ assert(this_ -> stack_ptr < this_ -> num_nodes);
+ this_->stack[this_ -> stack_ptr++] = x;
+
+ /* free vec/mat */
+ delete c_yx;
+ delete delta;
+
+ /* increment counter for number statistic */
+ this_->num_ri++;
+}
+
+/* RII reduction
+ This reduction rule is applied for nodes
+ of degree two. */
+
+static
+void apply_RII(pbqp *this_,int x)
+{
+ int y,z;
+ unsigned xlen,ylen,zlen;
+ adjnode *adj_yz;
+
+ PBQPMatrix *c_yx, *c_zx;
+ PBQPVector *cx;
+ PBQPMatrix *delta;
+
+ assert(this_ != NULL);
+ assert(x >= 0 && x < this_->num_nodes);
+ assert(this_ -> adj_list[x] != NULL);
+ assert(this_ -> adj_list[x] -> succ != NULL);
+ assert(this_ -> adj_list[x] -> succ -> succ == NULL);
+
+ /* get adjacence matrix */
+ y = this_ -> adj_list[x] -> adj;
+ z = this_ -> adj_list[x] -> succ -> adj;
+ assert(y >= 0 && y < this_->num_nodes);
+ assert(z >= 0 && z < this_->num_nodes);
+
+ /* determine length of cost vectors for node x and y */
+ xlen = this_ -> node_costs[x]->getLength();
+ ylen = this_ -> node_costs[y]->getLength();
+ zlen = this_ -> node_costs[z]->getLength();
+
+ /* get cost vector c_x and matrix c_yx */
+ cx = this_ -> node_costs[x];
+ c_yx = pbqp_get_costmatrix(this_,y,x);
+ c_zx = pbqp_get_costmatrix(this_,z,x);
+ assert(c_yx != NULL);
+ assert(c_zx != NULL);
+
+ /* Colour Heuristic */
+ if ( (adj_yz = find_adjnode(this_,y,z)) != NULL) {
+ adj_yz->tc_valid = false;
+ adj_yz->reverse->tc_valid = false;
+ }
+
+ /* allocate delta matrix */
+ delta = new PBQPMatrix(ylen, zlen);
+
+ /* compute delta matrix */
+ for(unsigned i=0;i<ylen;i++) {
+ for(unsigned j=0;j<zlen;j++) {
+ PBQPNum min = (*c_yx)[i][0] + (*c_zx)[j][0] + (*cx)[0];
+ for(unsigned k=1;k<xlen;k++) {
+ PBQPNum c = (*c_yx)[i][k] + (*c_zx)[j][k] + (*cx)[k];
+ if ( c < min ) {
+ min = c;
+ }
+ }
+ (*delta)[i][j] = min;
+ }
+ }
+
+ /* add delta matrix */
+ add_pbqp_edgecosts(this_,y,z,delta);
+
+ /* delete node x */
+ remove_node(this_,x);
+
+ /* simplify cost matrix c_yz */
+ simplify_edge(this_,y,z);
+
+ /* reorder adj. nodes */
+ reorder_adjnodes(this_,x);
+
+ /* push node x on stack */
+ assert(this_ -> stack_ptr < this_ -> num_nodes);
+ this_->stack[this_ -> stack_ptr++] = x;
+
+ /* free vec/mat */
+ delete c_yx;
+ delete c_zx;
+ delete delta;
+
+ /* increment counter for number statistic */
+ this_->num_rii++;
+
+}
+
+/* RN reduction */
+static
+void apply_RN(pbqp *this_,int x)
+{
+ unsigned xlen;
+
+ assert(this_ != NULL);
+ assert(x >= 0 && x < this_->num_nodes);
+ assert(this_ -> node_costs[x] != NULL);
+
+ xlen = this_ -> node_costs[x] -> getLength();
+
+ /* after application of RN rule no optimality
+ can be guaranteed! */
+ this_ -> optimal = false;
+
+ /* push node x on stack */
+ assert(this_ -> stack_ptr < this_ -> num_nodes);
+ this_->stack[this_ -> stack_ptr++] = x;
+
+ /* delete node x */
+ remove_node(this_,x);
+
+ /* reorder adj. nodes of node x */
+ reorder_adjnodes(this_,x);
+
+ /* increment counter for number statistic */
+ this_->num_rn++;
+}
+
+
+static
+void compute_tc_info(pbqp *this_, adjnode *p)
+{
+ adjnode *r;
+ PBQPMatrix *m;
+ int x,y;
+ PBQPVector *c_x, *c_y;
+ int *row_inf_counts;
+
+ assert(p->reverse != NULL);
+
+ /* set flags */
+ r = p->reverse;
+ p->tc_valid = true;
+ r->tc_valid = true;
+
+ /* get edge */
+ x = r->adj;
+ y = p->adj;
+
+ /* get cost vectors */
+ c_x = this_ -> node_costs[x];
+ c_y = this_ -> node_costs[y];
+
+ /* get cost matrix */
+ m = pbqp_get_costmatrix(this_, x, y);
+
+
+ /* allocate allowed set for edge (x,y) and (y,x) */
+ if (p->tc_safe_regs == NULL) {
+ p->tc_safe_regs = (int *) malloc(sizeof(int) * c_x->getLength());
+ }
+
+ if (r->tc_safe_regs == NULL ) {
+ r->tc_safe_regs = (int *) malloc(sizeof(int) * c_y->getLength());
+ }
+
+ p->tc_impact = r->tc_impact = 0;
+
+ row_inf_counts = (int *) alloca(sizeof(int) * c_x->getLength());
+
+ /* init arrays */
+ p->tc_safe_regs[0] = 0;
+ row_inf_counts[0] = 0;
+ for(unsigned i = 1; i < c_x->getLength(); ++i){
+ p->tc_safe_regs[i] = 1;
+ row_inf_counts[i] = 0;
+ }
+
+ r->tc_safe_regs[0] = 0;
+ for(unsigned j = 1; j < c_y->getLength(); ++j){
+ r->tc_safe_regs[j] = 1;
+ }
+
+ for(unsigned j = 0; j < c_y->getLength(); ++j) {
+ int col_inf_counts = 0;
+ for (unsigned i = 0; i < c_x->getLength(); ++i) {
+ if (isInf((*m)[i][j])) {
+ ++col_inf_counts;
+ ++row_inf_counts[i];
+
+ p->tc_safe_regs[i] = 0;
+ r->tc_safe_regs[j] = 0;
+ }
+ }
+ if (col_inf_counts > p->tc_impact) {
+ p->tc_impact = col_inf_counts;
+ }
+ }
+
+ for(unsigned i = 0; i < c_x->getLength(); ++i){
+ if (row_inf_counts[i] > r->tc_impact)
+ {
+ r->tc_impact = row_inf_counts[i];
+ }
+ }
+
+ delete m;
+}
+
+/*
+ * Checks whether node x can be locally coloured.
+ */
+static
+int is_colorable(pbqp *this_,int x)
+{
+ adjnode *adj_ptr;
+ PBQPVector *c_x;
+ int result = 1;
+ int *allowed;
+ int num_allowed = 0;
+ unsigned total_impact = 0;
+
+ assert(this_ != NULL);
+ assert(x >= 0 && x < this_->num_nodes);
+ assert(this_ -> node_costs[x] != NULL);
+
+ c_x = this_ -> node_costs[x];
+
+ /* allocate allowed set */
+ allowed = (int *)malloc(sizeof(int) * c_x->getLength());
+ for(unsigned i = 0; i < c_x->getLength(); ++i){
+ if (!isInf((*c_x)[i]) && i > 0) {
+ allowed[i] = 1;
+ ++num_allowed;
+ } else {
+ allowed[i] = 0;
+ }
+ }
+
+ /* determine local minimum */
+ for(adj_ptr=this_->adj_list[x] ;adj_ptr != NULL; adj_ptr = adj_ptr -> succ) {
+ if (!adj_ptr -> tc_valid) {
+ compute_tc_info(this_, adj_ptr);
+ }
+
+ total_impact += adj_ptr->tc_impact;
+
+ if (num_allowed > 0) {
+ for (unsigned i = 1; i < c_x->getLength(); ++i){
+ if (allowed[i]){
+ if (!adj_ptr->tc_safe_regs[i]){
+ allowed[i] = 0;
+ --num_allowed;
+ if (num_allowed == 0)
+ break;
+ }
+ }
+ }
+ }
+
+ if ( total_impact >= c_x->getLength() - 1 && num_allowed == 0 ) {
+ result = 0;
+ break;
+ }
+ }
+ free(allowed);
+
+ return result;
+}
+
+/* use briggs heuristic
+ note: this_ is not a general heuristic. it only is useful for
+ interference graphs.
+ */
+int pop_colorablenode(pbqp *this_)
+{
+ int deg;
+ bucketnode *min_bucket=NULL;
+ PBQPNum min = std::numeric_limits<PBQPNum>::infinity();
+
+ /* select node where the number of colors is less than the node degree */
+ for(deg=this_->max_deg;deg > 2;deg--) {
+ bucketnode *bucket;
+ for(bucket=this_->bucket_list[deg];bucket!= NULL;bucket = bucket -> succ) {
+ int u = bucket->u;
+ if (is_colorable(this_,u)) {
+ pbqp_remove_bucket(this_,bucket);
+ this_->num_rn_special++;
+ free(bucket);
+ return u;
+ }
+ }
+ }
+
+ /* select node with minimal ratio between average node costs and degree of node */
+ for(deg=this_->max_deg;deg >2; deg--) {
+ bucketnode *bucket;
+ for(bucket=this_->bucket_list[deg];bucket!= NULL;bucket = bucket -> succ) {
+ PBQPNum h;
+ int u;
+
+ u = bucket->u;
+ assert(u>=0 && u < this_->num_nodes);
+ h = (*this_->node_costs[u])[0] / (PBQPNum) deg;
+ if (h < min) {
+ min_bucket = bucket;
+ min = h;
+ }
+ }
+ }
+
+ /* return node and free bucket */
+ if (min_bucket != NULL) {
+ int u;
+
+ pbqp_remove_bucket(this_,min_bucket);
+ u = min_bucket->u;
+ free(min_bucket);
+ return u;
+ } else {
+ return -1;
+ }
+}
+
+
+/*****************************************************************************
+ * PBQP graph parsing
+ ****************************************************************************/
+
+/* reduce pbqp problem (first phase) */
+static
+void reduce_pbqp(pbqp *this_)
+{
+ int u;
+
+ assert(this_ != NULL);
+ assert(this_->bucket_list != NULL);
+
+ for(;;){
+
+ if (this_->bucket_list[1] != NULL) {
+ u = pop_node(this_,1);
+ apply_RI(this_,u);
+ } else if (this_->bucket_list[2] != NULL) {
+ u = pop_node(this_,2);
+ apply_RII(this_,u);
+ } else if ((u = pop_colorablenode(this_)) != -1) {
+ apply_RN(this_,u);
+ } else {
+ break;
+ }
+ }
+}
+
+/*****************************************************************************
+ * PBQP back propagation
+ ****************************************************************************/
+
+/* determine solution of a reduced node. Either
+ RI or RII was applied for this_ node. */
+static
+void determine_solution(pbqp *this_,int x)
+{
+ PBQPVector *v = new PBQPVector(*this_ -> node_costs[x]);
+ adjnode *adj_ptr;
+
+ assert(this_ != NULL);
+ assert(x >= 0 && x < this_->num_nodes);
+ assert(this_ -> adj_list != NULL);
+ assert(this_ -> solution != NULL);
+
+ for(adj_ptr=this_->adj_list[x] ;adj_ptr != NULL; adj_ptr = adj_ptr -> succ) {
+ int y = adj_ptr -> adj;
+ int y_sol = this_ -> solution[y];
+
+ PBQPMatrix *c_yx = pbqp_get_costmatrix(this_,y,x);
+ assert(y_sol >= 0 && y_sol < (int)this_->node_costs[y]->getLength());
+ (*v) += c_yx->getRowAsVector(y_sol);
+ delete c_yx;
+ }
+ this_ -> solution[x] = v->minIndex();
+
+ delete v;
+}
+
+/* back popagation phase of PBQP */
+static
+void back_propagate(pbqp *this_)
+{
+ int i;
+
+ assert(this_ != NULL);
+ assert(this_->stack != NULL);
+ assert(this_->stack_ptr < this_->num_nodes);
+
+ for(i=this_ -> stack_ptr-1;i>=0;i--) {
+ int x = this_ -> stack[i];
+ assert( x >= 0 && x < this_ -> num_nodes);
+ reinsert_node(this_,x);
+ determine_solution(this_,x);
+ }
+}
+
+/* solve trivial nodes of degree zero */
+static
+void determine_trivialsolution(pbqp *this_)
+{
+ int u;
+ PBQPNum delta;
+
+ assert( this_ != NULL);
+ assert( this_ -> bucket_list != NULL);
+
+ /* determine trivial solution */
+ while (this_->bucket_list[0] != NULL) {
+ u = pop_node(this_,0);
+
+ assert( u >= 0 && u < this_ -> num_nodes);
+
+ this_->solution[u] = this_->node_costs[u]->minIndex();
+ delta = (*this_->node_costs[u])[this_->solution[u]];
+ this_->min = this_->min + delta;
+
+ /* increment counter for number statistic */
+ this_->num_r0++;
+ }
+}
+
+/*****************************************************************************
+ * debug facilities
+ ****************************************************************************/
+static
+void check_pbqp(pbqp *this_)
+{
+ int u,v;
+ PBQPMatrix *costs;
+ adjnode *adj_ptr;
+
+ assert( this_ != NULL);
+
+ for(u=0;u< this_->num_nodes; u++) {
+ assert (this_ -> node_costs[u] != NULL);
+ for(adj_ptr = this_ -> adj_list[u];adj_ptr != NULL; adj_ptr = adj_ptr -> succ) {
+ v = adj_ptr -> adj;
+ assert( v>= 0 && v < this_->num_nodes);
+ if (u < v ) {
+ costs = adj_ptr -> costs;
+ assert( costs->getRows() == this_->node_costs[u]->getLength() &&
+ costs->getCols() == this_->node_costs[v]->getLength());
+ }
+ }
+ }
+}
+
+/*****************************************************************************
+ * PBQP solve routines
+ ****************************************************************************/
+
+/* solve PBQP problem */
+void solve_pbqp(pbqp *this_)
+{
+ assert(this_ != NULL);
+ assert(!this_->solved);
+
+ /* check vector & matrix dimensions */
+ check_pbqp(this_);
+
+ /* simplify PBQP problem */
+
+ /* eliminate trivial nodes, i.e.
+ nodes with cost vectors of length one. */
+ eliminate_trivial_nodes(this_);
+
+ /* eliminate edges with independent
+ cost matrices and normalize matrices */
+ eliminate_independent_edges(this_);
+
+ /* create bucket list for graph parsing */
+ create_bucketlist(this_);
+
+ /* reduce phase */
+ reduce_pbqp(this_);
+
+ /* solve trivial nodes */
+ determine_trivialsolution(this_);
+
+ /* back propagation phase */
+ back_propagate(this_);
+
+ this_->solved = true;
+}
+
+/* get solution of a node */
+int get_pbqp_solution(pbqp *this_,int x)
+{
+ assert(this_ != NULL);
+ assert(this_->solution != NULL);
+ assert(this_ -> solved);
+
+ return this_->solution[x];
+}
+
+/* is solution optimal? */
+bool is_pbqp_optimal(pbqp *this_)
+{
+ assert(this_ -> solved);
+ return this_->optimal;
+}
+
+}
+
+/* end of pbqp.c */
diff --git a/lib/CodeGen/PBQP.h b/lib/CodeGen/PBQP.h
new file mode 100644
index 0000000..5fd2c06
--- /dev/null
+++ b/lib/CodeGen/PBQP.h
@@ -0,0 +1,284 @@
+//===---------------- PBQP.cpp --------- PBQP Solver ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Developed by: Bernhard Scholz
+// The University of Sydney
+// http://www.it.usyd.edu.au/~scholz
+//===----------------------------------------------------------------------===//
+
+// TODO:
+//
+// * Default to null costs on vector initialisation?
+// * C++-ify the rest of the solver.
+
+#ifndef LLVM_CODEGEN_PBQPSOLVER_H
+#define LLVM_CODEGEN_PBQPSOLVER_H
+
+#include <cassert>
+#include <algorithm>
+#include <functional>
+
+namespace llvm {
+
+//! \brief Floating point type to use in PBQP solver.
+typedef double PBQPNum;
+
+//! \brief PBQP Vector class.
+class PBQPVector {
+public:
+
+ //! \brief Construct a PBQP vector of the given size.
+ explicit PBQPVector(unsigned length) :
+ length(length), data(new PBQPNum[length]) {
+ std::fill(data, data + length, 0);
+ }
+
+ //! \brief Copy construct a PBQP vector.
+ PBQPVector(const PBQPVector &v) :
+ length(v.length), data(new PBQPNum[length]) {
+ std::copy(v.data, v.data + length, data);
+ }
+
+ ~PBQPVector() { delete[] data; }
+
+ //! \brief Assignment operator.
+ PBQPVector& operator=(const PBQPVector &v) {
+ delete[] data;
+ length = v.length;
+ data = new PBQPNum[length];
+ std::copy(v.data, v.data + length, data);
+ return *this;
+ }
+
+ //! \brief Return the length of the vector
+ unsigned getLength() const throw () {
+ return length;
+ }
+
+ //! \brief Element access.
+ PBQPNum& operator[](unsigned index) {
+ assert(index < length && "PBQPVector element access out of bounds.");
+ return data[index];
+ }
+
+ //! \brief Const element access.
+ const PBQPNum& operator[](unsigned index) const {
+ assert(index < length && "PBQPVector element access out of bounds.");
+ return data[index];
+ }
+
+ //! \brief Add another vector to this one.
+ PBQPVector& operator+=(const PBQPVector &v) {
+ assert(length == v.length && "PBQPVector length mismatch.");
+ std::transform(data, data + length, v.data, data, std::plus<PBQPNum>());
+ return *this;
+ }
+
+ //! \brief Subtract another vector from this one.
+ PBQPVector& operator-=(const PBQPVector &v) {
+ assert(length == v.length && "PBQPVector length mismatch.");
+ std::transform(data, data + length, v.data, data, std::minus<PBQPNum>());
+ return *this;
+ }
+
+ //! \brief Returns the index of the minimum value in this vector
+ unsigned minIndex() const {
+ return std::min_element(data, data + length) - data;
+ }
+
+private:
+ unsigned length;
+ PBQPNum *data;
+};
+
+
+//! \brief PBQP Matrix class
+class PBQPMatrix {
+public:
+
+ //! \brief Construct a PBQP Matrix with the given dimensions.
+ PBQPMatrix(unsigned rows, unsigned cols) :
+ rows(rows), cols(cols), data(new PBQPNum[rows * cols]) {
+ std::fill(data, data + (rows * cols), 0);
+ }
+
+ //! \brief Copy construct a PBQP matrix.
+ PBQPMatrix(const PBQPMatrix &m) :
+ rows(m.rows), cols(m.cols), data(new PBQPNum[rows * cols]) {
+ std::copy(m.data, m.data + (rows * cols), data);
+ }
+
+ ~PBQPMatrix() { delete[] data; }
+
+ //! \brief Assignment operator.
+ PBQPMatrix& operator=(const PBQPMatrix &m) {
+ delete[] data;
+ rows = m.rows; cols = m.cols;
+ data = new PBQPNum[rows * cols];
+ std::copy(m.data, m.data + (rows * cols), data);
+ return *this;
+ }
+
+ //! \brief Return the number of rows in this matrix.
+ unsigned getRows() const throw () { return rows; }
+
+ //! \brief Return the number of cols in this matrix.
+ unsigned getCols() const throw () { return cols; }
+
+ //! \brief Matrix element access.
+ PBQPNum* operator[](unsigned r) {
+ assert(r < rows && "Row out of bounds.");
+ return data + (r * cols);
+ }
+
+ //! \brief Matrix element access.
+ const PBQPNum* operator[](unsigned r) const {
+ assert(r < rows && "Row out of bounds.");
+ return data + (r * cols);
+ }
+
+ //! \brief Returns the given row as a vector.
+ PBQPVector getRowAsVector(unsigned r) const {
+ PBQPVector v(cols);
+ for (unsigned c = 0; c < cols; ++c)
+ v[c] = (*this)[r][c];
+ return v;
+ }
+
+ //! \brief Reset the matrix to the given value.
+ PBQPMatrix& reset(PBQPNum val = 0) {
+ std::fill(data, data + (rows * cols), val);
+ return *this;
+ }
+
+ //! \brief Set a single row of this matrix to the given value.
+ PBQPMatrix& setRow(unsigned r, PBQPNum val) {
+ assert(r < rows && "Row out of bounds.");
+ std::fill(data + (r * cols), data + ((r + 1) * cols), val);
+ return *this;
+ }
+
+ //! \brief Set a single column of this matrix to the given value.
+ PBQPMatrix& setCol(unsigned c, PBQPNum val) {
+ assert(c < cols && "Column out of bounds.");
+ for (unsigned r = 0; r < rows; ++r)
+ (*this)[r][c] = val;
+ return *this;
+ }
+
+ //! \brief Matrix transpose.
+ PBQPMatrix transpose() const {
+ PBQPMatrix m(cols, rows);
+ for (unsigned r = 0; r < rows; ++r)
+ for (unsigned c = 0; c < cols; ++c)
+ m[c][r] = (*this)[r][c];
+ return m;
+ }
+
+ //! \brief Returns the diagonal of the matrix as a vector.
+ //!
+ //! Matrix must be square.
+ PBQPVector diagonalize() const {
+ assert(rows == cols && "Attempt to diagonalize non-square matrix.");
+
+ PBQPVector v(rows);
+ for (unsigned r = 0; r < rows; ++r)
+ v[r] = (*this)[r][r];
+ return v;
+ }
+
+ //! \brief Add the given matrix to this one.
+ PBQPMatrix& operator+=(const PBQPMatrix &m) {
+ assert(rows == m.rows && cols == m.cols &&
+ "Matrix dimensions mismatch.");
+ std::transform(data, data + (rows * cols), m.data, data,
+ std::plus<PBQPNum>());
+ return *this;
+ }
+
+ //! \brief Returns the minimum of the given row
+ PBQPNum getRowMin(unsigned r) const {
+ assert(r < rows && "Row out of bounds");
+ return *std::min_element(data + (r * cols), data + ((r + 1) * cols));
+ }
+
+ //! \brief Returns the minimum of the given column
+ PBQPNum getColMin(unsigned c) const {
+ PBQPNum minElem = (*this)[0][c];
+ for (unsigned r = 1; r < rows; ++r)
+ if ((*this)[r][c] < minElem) minElem = (*this)[r][c];
+ return minElem;
+ }
+
+ //! \brief Subtracts the given scalar from the elements of the given row.
+ PBQPMatrix& subFromRow(unsigned r, PBQPNum val) {
+ assert(r < rows && "Row out of bounds");
+ std::transform(data + (r * cols), data + ((r + 1) * cols),
+ data + (r * cols),
+ std::bind2nd(std::minus<PBQPNum>(), val));
+ return *this;
+ }
+
+ //! \brief Subtracts the given scalar from the elements of the given column.
+ PBQPMatrix& subFromCol(unsigned c, PBQPNum val) {
+ for (unsigned r = 0; r < rows; ++r)
+ (*this)[r][c] -= val;
+ return *this;
+ }
+
+ //! \brief Returns true if this is a zero matrix.
+ bool isZero() const {
+ return find_if(data, data + (rows * cols),
+ std::bind2nd(std::not_equal_to<PBQPNum>(), 0)) ==
+ data + (rows * cols);
+ }
+
+private:
+ unsigned rows, cols;
+ PBQPNum *data;
+};
+
+#define EPS (1E-8)
+
+#ifndef PBQP_TYPE
+#define PBQP_TYPE
+struct pbqp;
+typedef struct pbqp pbqp;
+#endif
+
+/*****************
+ * PBQP routines *
+ *****************/
+
+/* allocate pbqp problem */
+pbqp *alloc_pbqp(int num);
+
+/* add node costs */
+void add_pbqp_nodecosts(pbqp *this_,int u, PBQPVector *costs);
+
+/* add edge mat */
+void add_pbqp_edgecosts(pbqp *this_,int u,int v,PBQPMatrix *costs);
+
+/* solve PBQP problem */
+void solve_pbqp(pbqp *this_);
+
+/* get solution of a node */
+int get_pbqp_solution(pbqp *this_,int u);
+
+/* alloc PBQP */
+pbqp *alloc_pbqp(int num);
+
+/* free PBQP */
+void free_pbqp(pbqp *this_);
+
+/* is optimal */
+bool is_pbqp_optimal(pbqp *this_);
+
+}
+#endif
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
new file mode 100644
index 0000000..c5c76fc
--- /dev/null
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -0,0 +1,431 @@
+//===-- PhiElimination.cpp - Eliminate PHI nodes by inserting copies ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates machine instruction PHI nodes by inserting copy
+// instructions. This destroys SSA information, but is the desired input for
+// some register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "phielim"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include <algorithm>
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumAtomic, "Number of atomic phis lowered");
+
+namespace {
+ class VISIBILITY_HIDDEN PNE : public MachineFunctionPass {
+ MachineRegisterInfo *MRI; // Machine register information
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ PNE() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<LiveVariables>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
+ /// in predecessor basic blocks.
+ ///
+ bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
+ void LowerAtomicPHINode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator AfterPHIsIt);
+
+ /// analyzePHINodes - Gather information about the PHI nodes in
+ /// here. In particular, we want to map the number of uses of a virtual
+ /// register which is used in a PHI node. We map that to the BB the
+ /// vreg is coming from. This is used later to determine when the vreg
+ /// is killed in the BB.
+ ///
+ void analyzePHINodes(const MachineFunction& Fn);
+
+ // FindCopyInsertPoint - Find a safe place in MBB to insert a copy from
+ // SrcReg. This needs to be after any def or uses of SrcReg, but before
+ // any subsequent point where control flow might jump out of the basic
+ // block.
+ MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB,
+ unsigned SrcReg);
+
+ // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and
+ // also after any exception handling labels: in landing pads execution
+ // starts at the label, so any copies placed before it won't be executed!
+ MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ // Rather than assuming that EH labels come before other kinds of labels,
+ // just skip all labels.
+ while (I != MBB.end() &&
+ (I->getOpcode() == TargetInstrInfo::PHI || I->isLabel()))
+ ++I;
+ return I;
+ }
+
+ typedef std::pair<const MachineBasicBlock*, unsigned> BBVRegPair;
+ typedef std::map<BBVRegPair, unsigned> VRegPHIUse;
+
+ VRegPHIUse VRegPHIUseCount;
+
+ // Defs of PHI sources which are implicit_def.
+ SmallPtrSet<MachineInstr*, 4> ImpDefs;
+ };
+}
+
+char PNE::ID = 0;
+static RegisterPass<PNE>
+X("phi-node-elimination", "Eliminate PHI nodes for register allocation");
+
+const PassInfo *const llvm::PHIEliminationID = &X;
+
+bool PNE::runOnMachineFunction(MachineFunction &Fn) {
+ MRI = &Fn.getRegInfo();
+
+ analyzePHINodes(Fn);
+
+ bool Changed = false;
+
+ // Eliminate PHI instructions by inserting copies into predecessor blocks.
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+ Changed |= EliminatePHINodes(Fn, *I);
+
+ // Remove dead IMPLICIT_DEF instructions.
+ for (SmallPtrSet<MachineInstr*,4>::iterator I = ImpDefs.begin(),
+ E = ImpDefs.end(); I != E; ++I) {
+ MachineInstr *DefMI = *I;
+ unsigned DefReg = DefMI->getOperand(0).getReg();
+ if (MRI->use_empty(DefReg))
+ DefMI->eraseFromParent();
+ }
+
+ ImpDefs.clear();
+ VRegPHIUseCount.clear();
+ return Changed;
+}
+
+
+/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
+/// predecessor basic blocks.
+///
+bool PNE::EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB) {
+ if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI)
+ return false; // Quick exit for basic blocks without PHIs.
+
+ // Get an iterator to the first instruction after the last PHI node (this may
+ // also be the end of the basic block).
+ MachineBasicBlock::iterator AfterPHIsIt = SkipPHIsAndLabels(MBB, MBB.begin());
+
+ while (MBB.front().getOpcode() == TargetInstrInfo::PHI)
+ LowerAtomicPHINode(MBB, AfterPHIsIt);
+
+ return true;
+}
+
+/// isSourceDefinedByImplicitDef - Return true if all sources of the phi node
+/// are implicit_def's.
+static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
+ const MachineRegisterInfo *MRI) {
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
+ unsigned SrcReg = MPhi->getOperand(i).getReg();
+ const MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (!DefMI || DefMI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF)
+ return false;
+ }
+ return true;
+}
+
+// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg.
+// This needs to be after any def or uses of SrcReg, but before any subsequent
+// point where control flow might jump out of the basic block.
+MachineBasicBlock::iterator PNE::FindCopyInsertPoint(MachineBasicBlock &MBB,
+ unsigned SrcReg) {
+ // Handle the trivial case trivially.
+ if (MBB.empty())
+ return MBB.begin();
+
+ // If this basic block does not contain an invoke, then control flow always
+ // reaches the end of it, so place the copy there. The logic below works in
+ // this case too, but is more expensive.
+ if (!isa<InvokeInst>(MBB.getBasicBlock()->getTerminator()))
+ return MBB.getFirstTerminator();
+
+ // Discover any definition/uses in this basic block.
+ SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
+ for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
+ RE = MRI->reg_end(); RI != RE; ++RI) {
+ MachineInstr *DefUseMI = &*RI;
+ if (DefUseMI->getParent() == &MBB)
+ DefUsesInMBB.insert(DefUseMI);
+ }
+
+ MachineBasicBlock::iterator InsertPoint;
+ if (DefUsesInMBB.empty()) {
+ // No def/uses. Insert the copy at the start of the basic block.
+ InsertPoint = MBB.begin();
+ } else if (DefUsesInMBB.size() == 1) {
+ // Insert the copy immediately after the definition/use.
+ InsertPoint = *DefUsesInMBB.begin();
+ ++InsertPoint;
+ } else {
+ // Insert the copy immediately after the last definition/use.
+ InsertPoint = MBB.end();
+ while (!DefUsesInMBB.count(&*--InsertPoint)) {}
+ ++InsertPoint;
+ }
+
+ // Make sure the copy goes after any phi nodes however.
+ return SkipPHIsAndLabels(MBB, InsertPoint);
+}
+
+/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block,
+/// under the assuption that it needs to be lowered in a way that supports
+/// atomic execution of PHIs. This lowering method is always correct all of the
+/// time.
+///
+void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator AfterPHIsIt) {
+ // Unlink the PHI node from the basic block, but don't delete the PHI yet.
+ MachineInstr *MPhi = MBB.remove(MBB.begin());
+
+ unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2;
+ unsigned DestReg = MPhi->getOperand(0).getReg();
+ bool isDead = MPhi->getOperand(0).isDead();
+
+ // Create a new register for the incoming PHI arguments.
+ MachineFunction &MF = *MBB.getParent();
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
+ unsigned IncomingReg = 0;
+
+ // Insert a register to register copy at the top of the current block (but
+ // after any remaining phi nodes) which copies the new incoming register
+ // into the phi node destination.
+ const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ if (isSourceDefinedByImplicitDef(MPhi, MRI))
+ // If all sources of a PHI node are implicit_def, just emit an
+ // implicit_def instead of a copy.
+ BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
+ TII->get(TargetInstrInfo::IMPLICIT_DEF), DestReg);
+ else {
+ IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
+ TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC);
+ }
+
+ // Update live variable information if there is any.
+ LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>();
+ if (LV) {
+ MachineInstr *PHICopy = prior(AfterPHIsIt);
+
+ if (IncomingReg) {
+ // Increment use count of the newly created virtual register.
+ LV->getVarInfo(IncomingReg).NumUses++;
+
+ // Add information to LiveVariables to know that the incoming value is
+ // killed. Note that because the value is defined in several places (once
+ // each for each incoming block), the "def" block and instruction fields
+ // for the VarInfo is not filled in.
+ LV->addVirtualRegisterKilled(IncomingReg, PHICopy);
+ }
+
+ // Since we are going to be deleting the PHI node, if it is the last use of
+ // any registers, or if the value itself is dead, we need to move this
+ // information over to the new copy we just inserted.
+ LV->removeVirtualRegistersKilled(MPhi);
+
+ // If the result is dead, update LV.
+ if (isDead) {
+ LV->addVirtualRegisterDead(DestReg, PHICopy);
+ LV->removeVirtualRegisterDead(DestReg, MPhi);
+ }
+ }
+
+ // Adjust the VRegPHIUseCount map to account for the removal of this PHI node.
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
+ --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i + 1).getMBB(),
+ MPhi->getOperand(i).getReg())];
+
+ // Now loop over all of the incoming arguments, changing them to copy into the
+ // IncomingReg register in the corresponding predecessor basic block.
+ SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto;
+ for (int i = NumSrcs - 1; i >= 0; --i) {
+ unsigned SrcReg = MPhi->getOperand(i*2+1).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ "Machine PHI Operands must all be virtual registers!");
+
+ // If source is defined by an implicit def, there is no need to insert a
+ // copy.
+ MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+ ImpDefs.insert(DefMI);
+ continue;
+ }
+
+ // Get the MachineBasicBlock equivalent of the BasicBlock that is the source
+ // path the PHI.
+ MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB();
+
+ // Check to make sure we haven't already emitted the copy for this block.
+ // This can happen because PHI nodes may have multiple entries for the same
+ // basic block.
+ if (!MBBsInsertedInto.insert(&opBlock))
+ continue; // If the copy has already been emitted, we're done.
+
+ // Find a safe location to insert the copy, this may be the first terminator
+ // in the block (or end()).
+ MachineBasicBlock::iterator InsertPos = FindCopyInsertPoint(opBlock, SrcReg);
+
+ // Insert the copy.
+ TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC);
+
+ // Now update live variable information if we have it. Otherwise we're done
+ if (!LV) continue;
+
+ // We want to be able to insert a kill of the register if this PHI (aka, the
+ // copy we just inserted) is the last use of the source value. Live
+ // variable analysis conservatively handles this by saying that the value is
+ // live until the end of the block the PHI entry lives in. If the value
+ // really is dead at the PHI copy, there will be no successor blocks which
+ // have the value live-in.
+ //
+ // Check to see if the copy is the last use, and if so, update the live
+ // variables information so that it knows the copy source instruction kills
+ // the incoming value.
+ LiveVariables::VarInfo &InRegVI = LV->getVarInfo(SrcReg);
+
+ // Loop over all of the successors of the basic block, checking to see if
+ // the value is either live in the block, or if it is killed in the block.
+ // Also check to see if this register is in use by another PHI node which
+ // has not yet been eliminated. If so, it will be killed at an appropriate
+ // point later.
+
+ // Is it used by any PHI instructions in this block?
+ bool ValueIsLive = VRegPHIUseCount[BBVRegPair(&opBlock, SrcReg)] != 0;
+
+ std::vector<MachineBasicBlock*> OpSuccBlocks;
+
+ // Otherwise, scan successors, including the BB the PHI node lives in.
+ for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(),
+ E = opBlock.succ_end(); SI != E && !ValueIsLive; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+
+ // Is it alive in this successor?
+ unsigned SuccIdx = SuccMBB->getNumber();
+ if (InRegVI.AliveBlocks.test(SuccIdx)) {
+ ValueIsLive = true;
+ break;
+ }
+
+ OpSuccBlocks.push_back(SuccMBB);
+ }
+
+ // Check to see if this value is live because there is a use in a successor
+ // that kills it.
+ if (!ValueIsLive) {
+ switch (OpSuccBlocks.size()) {
+ case 1: {
+ MachineBasicBlock *MBB = OpSuccBlocks[0];
+ for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i)
+ if (InRegVI.Kills[i]->getParent() == MBB) {
+ ValueIsLive = true;
+ break;
+ }
+ break;
+ }
+ case 2: {
+ MachineBasicBlock *MBB1 = OpSuccBlocks[0], *MBB2 = OpSuccBlocks[1];
+ for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i)
+ if (InRegVI.Kills[i]->getParent() == MBB1 ||
+ InRegVI.Kills[i]->getParent() == MBB2) {
+ ValueIsLive = true;
+ break;
+ }
+ break;
+ }
+ default:
+ std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end());
+ for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i)
+ if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(),
+ InRegVI.Kills[i]->getParent())) {
+ ValueIsLive = true;
+ break;
+ }
+ }
+ }
+
+ // Okay, if we now know that the value is not live out of the block, we can
+ // add a kill marker in this block saying that it kills the incoming value!
+ if (!ValueIsLive) {
+ // In our final twist, we have to decide which instruction kills the
+ // register. In most cases this is the copy, however, the first
+ // terminator instruction at the end of the block may also use the value.
+ // In this case, we should mark *it* as being the killing block, not the
+ // copy.
+ MachineBasicBlock::iterator KillInst = prior(InsertPos);
+ MachineBasicBlock::iterator Term = opBlock.getFirstTerminator();
+ if (Term != opBlock.end()) {
+ if (Term->readsRegister(SrcReg))
+ KillInst = Term;
+
+ // Check that no other terminators use values.
+#ifndef NDEBUG
+ for (MachineBasicBlock::iterator TI = next(Term); TI != opBlock.end();
+ ++TI) {
+ assert(!TI->readsRegister(SrcReg) &&
+ "Terminator instructions cannot use virtual registers unless"
+ "they are the first terminator in a block!");
+ }
+#endif
+ }
+
+ // Finally, mark it killed.
+ LV->addVirtualRegisterKilled(SrcReg, KillInst);
+
+ // This vreg no longer lives all of the way through opBlock.
+ unsigned opBlockNum = opBlock.getNumber();
+ InRegVI.AliveBlocks.reset(opBlockNum);
+ }
+ }
+
+ // Really delete the PHI instruction now!
+ MF.DeleteMachineInstr(MPhi);
+ ++NumAtomic;
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the number of uses of a virtual register which is
+/// used in a PHI node. We map that to the BB the vreg is coming from. This is
+/// used later to determine when the vreg is killed in the BB.
+///
+void PNE::analyzePHINodes(const MachineFunction& Fn) {
+ for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end();
+ I != E; ++I)
+ for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI)
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+ ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i + 1).getMBB(),
+ BBI->getOperand(i).getReg())];
+}
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
new file mode 100644
index 0000000..f67eb79
--- /dev/null
+++ b/lib/CodeGen/Passes.cpp
@@ -0,0 +1,54 @@
+//===-- Passes.cpp - Target independent code generation passes ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines interfaces to access the target independent code
+// generation passes provided by the LLVM backend.
+//
+//===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+//===---------------------------------------------------------------------===//
+///
+/// RegisterRegAlloc class - Track the registration of register allocators.
+///
+//===---------------------------------------------------------------------===//
+MachinePassRegistry RegisterRegAlloc::Registry;
+
+
+//===---------------------------------------------------------------------===//
+///
+/// RegAlloc command line options.
+///
+//===---------------------------------------------------------------------===//
+static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
+ RegisterPassParser<RegisterRegAlloc> >
+RegAlloc("regalloc",
+ cl::init(&createLinearScanRegisterAllocator),
+ cl::desc("Register allocator to use: (default = linearscan)"));
+
+
+//===---------------------------------------------------------------------===//
+///
+/// createRegisterAllocator - choose the appropriate register allocator.
+///
+//===---------------------------------------------------------------------===//
+FunctionPass *llvm::createRegisterAllocator() {
+ RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
+
+ if (!Ctor) {
+ Ctor = RegAlloc;
+ RegisterRegAlloc::setDefault(RegAlloc);
+ }
+
+ return Ctor();
+}
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
new file mode 100644
index 0000000..de774685
--- /dev/null
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -0,0 +1,941 @@
+//===----- SchedulePostRAList.cpp - list scheduler ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumNoops, "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+
+static cl::opt<bool>
+EnableAntiDepBreaking("break-anti-dependencies",
+ cl::desc("Break post-RA scheduling anti-dependencies"),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+EnablePostRAHazardAvoidance("avoid-hazards",
+ cl::desc("Enable simple hazard-avoidance"),
+ cl::init(true), cl::Hidden);
+
+namespace {
+ class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass {
+ public:
+ static char ID;
+ PostRAScheduler() : MachineFunctionPass(&ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ const char *getPassName() const {
+ return "Post RA top-down list latency scheduler";
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn);
+ };
+ char PostRAScheduler::ID = 0;
+
+ class VISIBILITY_HIDDEN SchedulePostRATDList : public ScheduleDAGInstrs {
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ ///
+ LatencyPriorityQueue AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands becomes available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// Topo - A topological ordering for SUnits.
+ ScheduleDAGTopologicalSort Topo;
+
+ /// AllocatableSet - The set of allocatable registers.
+ /// We'll be ignoring anti-dependencies on non-allocatable registers,
+ /// because they may not be safe to break.
+ const BitVector AllocatableSet;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// Classes - For live regs that are only used in one register class in a
+ /// live range, the register class. If the register is not live, the
+ /// corresponding value is null. If the register is live but used in
+ /// multiple register classes, the corresponding value is -1 casted to a
+ /// pointer.
+ const TargetRegisterClass *
+ Classes[TargetRegisterInfo::FirstVirtualRegister];
+
+ /// RegRegs - Map registers to all their references within a live range.
+ std::multimap<unsigned, MachineOperand *> RegRefs;
+
+ /// The index of the most recent kill (proceding bottom-up), or ~0u if
+ /// the register is not live.
+ unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister];
+
+ /// The index of the most recent complete def (proceding bottom up), or ~0u
+ /// if the register is live.
+ unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister];
+
+ public:
+ SchedulePostRATDList(MachineFunction &MF,
+ const MachineLoopInfo &MLI,
+ const MachineDominatorTree &MDT,
+ ScheduleHazardRecognizer *HR)
+ : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits),
+ AllocatableSet(TRI->getAllocatableSet(MF)),
+ HazardRec(HR) {}
+
+ ~SchedulePostRATDList() {
+ delete HazardRec;
+ }
+
+ /// StartBlock - Initialize register live-range state for scheduling in
+ /// this block.
+ ///
+ void StartBlock(MachineBasicBlock *BB);
+
+ /// Schedule - Schedule the instruction range using list scheduling.
+ ///
+ void Schedule();
+
+ /// Observe - Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ ///
+ void Observe(MachineInstr *MI, unsigned Count);
+
+ /// FinishBlock - Clean up register live-range state.
+ ///
+ void FinishBlock();
+
+ private:
+ void PrescanInstruction(MachineInstr *MI);
+ void ScanInstruction(MachineInstr *MI, unsigned Count);
+ void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
+ void ReleaseSuccessors(SUnit *SU);
+ void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+ void ListScheduleTopDown();
+ bool BreakAntiDependencies();
+ };
+
+ /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses
+ /// a coarse classification and attempts to avoid that instructions of
+ /// a given class aren't grouped too densely together.
+ class SimpleHazardRecognizer : public ScheduleHazardRecognizer {
+ /// Class - A simple classification for SUnits.
+ enum Class {
+ Other, Load, Store
+ };
+
+ /// Window - The Class values of the most recently issued
+ /// instructions.
+ Class Window[8];
+
+ /// getClass - Classify the given SUnit.
+ Class getClass(const SUnit *SU) {
+ const MachineInstr *MI = SU->getInstr();
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (TID.mayLoad())
+ return Load;
+ if (TID.mayStore())
+ return Store;
+ return Other;
+ }
+
+ /// Step - Rotate the existing entries in Window and insert the
+ /// given class value in position as the most recent.
+ void Step(Class C) {
+ std::copy(Window+1, array_endof(Window), Window);
+ Window[array_lengthof(Window)-1] = C;
+ }
+
+ public:
+ SimpleHazardRecognizer() : Window() {}
+
+ virtual HazardType getHazardType(SUnit *SU) {
+ Class C = getClass(SU);
+ if (C == Other)
+ return NoHazard;
+ unsigned Score = 0;
+ for (unsigned i = 0; i != array_lengthof(Window); ++i)
+ if (Window[i] == C)
+ Score += i + 1;
+ if (Score > array_lengthof(Window) * 2)
+ return Hazard;
+ return NoHazard;
+ }
+
+ virtual void EmitInstruction(SUnit *SU) {
+ Step(getClass(SU));
+ }
+
+ virtual void AdvanceCycle() {
+ Step(Other);
+ }
+ };
+}
+
+/// isSchedulingBoundary - Test if the given instruction should be
+/// considered a scheduling boundary. This primarily includes labels
+/// and terminators.
+///
+static bool isSchedulingBoundary(const MachineInstr *MI,
+ const MachineFunction &MF) {
+ // Terminators and labels can't be scheduled around.
+ if (MI->getDesc().isTerminator() || MI->isLabel())
+ return true;
+
+ // Don't attempt to schedule around any instruction that modifies
+ // a stack-oriented pointer, as it's unlikely to be profitable. This
+ // saves compile time, because it doesn't require every single
+ // stack slot reference to depend on the instruction that does the
+ // modification.
+ const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
+ if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore()))
+ return true;
+
+ return false;
+}
+
+bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
+ DOUT << "PostRAScheduler\n";
+
+ const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+ ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ?
+ new SimpleHazardRecognizer :
+ new ScheduleHazardRecognizer();
+
+ SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR);
+
+ // Loop over all of the basic blocks
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB) {
+ // Initialize register live-range state for scheduling in this block.
+ Scheduler.StartBlock(MBB);
+
+ // Schedule each sequence of instructions not interrupted by a label
+ // or anything else that effectively needs to shut down scheduling.
+ MachineBasicBlock::iterator Current = MBB->end();
+ unsigned Count = MBB->size(), CurrentCount = Count;
+ for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
+ MachineInstr *MI = prior(I);
+ if (isSchedulingBoundary(MI, Fn)) {
+ Scheduler.Run(MBB, I, Current, CurrentCount);
+ Scheduler.EmitSchedule();
+ Current = MI;
+ CurrentCount = Count - 1;
+ Scheduler.Observe(MI, CurrentCount);
+ }
+ I = MI;
+ --Count;
+ }
+ assert(Count == 0 && "Instruction count mismatch!");
+ assert((MBB->begin() == Current || CurrentCount != 0) &&
+ "Instruction count mismatch!");
+ Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount);
+ Scheduler.EmitSchedule();
+
+ // Clean up register live-range state.
+ Scheduler.FinishBlock();
+ }
+
+ return true;
+}
+
+/// StartBlock - Initialize register live-range state for scheduling in
+/// this block.
+///
+void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
+ // Call the superclass.
+ ScheduleDAGInstrs::StartBlock(BB);
+
+ // Clear out the register class data.
+ std::fill(Classes, array_endof(Classes),
+ static_cast<const TargetRegisterClass *>(0));
+
+ // Initialize the indices to indicate that no registers are live.
+ std::fill(KillIndices, array_endof(KillIndices), ~0u);
+ std::fill(DefIndices, array_endof(DefIndices), BB->size());
+
+ // Determine the live-out physregs for this block.
+ if (!BB->empty() && BB->back().getDesc().isReturn())
+ // In a return block, examine the function live-out regs.
+ for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
+ E = MRI.liveout_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+ // Repeat, for all aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+ else
+ // In a non-return block, examine the live-in regs of all successors.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ E = (*SI)->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+ // Repeat, for all aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+
+ // Consider callee-saved registers as live-out, since we're running after
+ // prologue/epilogue insertion so there's no way to add additional
+ // saved registers.
+ //
+ // TODO: If the callee saves and restores these, then we can potentially
+ // use them between the save and the restore. To do that, we could scan
+ // the exit blocks to see which of these registers are defined.
+ // Alternatively, callee-saved registers that aren't saved and restored
+ // could be marked live-in in every block.
+ for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+ unsigned Reg = *I;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+ // Repeat, for all aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+}
+
+/// Schedule - Schedule the instruction range using list scheduling.
+///
+void SchedulePostRATDList::Schedule() {
+ DOUT << "********** List Scheduling **********\n";
+
+ // Build the scheduling graph.
+ BuildSchedGraph();
+
+ if (EnableAntiDepBreaking) {
+ if (BreakAntiDependencies()) {
+ // We made changes. Update the dependency graph.
+ // Theoretically we could update the graph in place:
+ // When a live range is changed to use a different register, remove
+ // the def's anti-dependence *and* output-dependence edges due to
+ // that register, and add new anti-dependence and output-dependence
+ // edges based on the next live range of the register.
+ SUnits.clear();
+ EntrySU = SUnit();
+ ExitSU = SUnit();
+ BuildSchedGraph();
+ }
+ }
+
+ AvailableQueue.initNodes(SUnits);
+
+ ListScheduleTopDown();
+
+ AvailableQueue.releaseState();
+}
+
+/// Observe - Update liveness information to account for the current
+/// instruction, which will not be scheduled.
+///
+void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) {
+ assert(Count < InsertPosIndex && "Instruction index out of expected range!");
+
+ // Any register which was defined within the previous scheduling region
+ // may have been rescheduled and its lifetime may overlap with registers
+ // in ways not reflected in our current liveness state. For each such
+ // register, adjust the liveness state to be conservatively correct.
+ for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg)
+ if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
+ assert(KillIndices[Reg] == ~0u && "Clobbered register is live!");
+ // Mark this register to be non-renamable.
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ // Move the def index to the end of the previous region, to reflect
+ // that the def could theoretically have been scheduled at the end.
+ DefIndices[Reg] = InsertPosIndex;
+ }
+
+ PrescanInstruction(MI);
+ ScanInstruction(MI, Count);
+}
+
+/// FinishBlock - Clean up register live-range state.
+///
+void SchedulePostRATDList::FinishBlock() {
+ RegRefs.clear();
+
+ // Call the superclass.
+ ScheduleDAGInstrs::FinishBlock();
+}
+
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static SDep *CriticalPathStep(SUnit *SU) {
+ SDep *Next = 0;
+ unsigned NextDepth = 0;
+ // Find the predecessor edge with the greatest depth.
+ for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+ P != PE; ++P) {
+ SUnit *PredSU = P->getSUnit();
+ unsigned PredLatency = P->getLatency();
+ unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+ // In the case of a latency tie, prefer an anti-dependency edge over
+ // other types of edges.
+ if (NextDepth < PredTotalLatency ||
+ (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+ NextDepth = PredTotalLatency;
+ Next = &*P;
+ }
+ }
+ return Next;
+}
+
+void SchedulePostRATDList::PrescanInstruction(MachineInstr *MI) {
+ // Scan the register operands for this instruction and update
+ // Classes and RegRefs.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ const TargetRegisterClass *NewRC =
+ getInstrOperandRegClass(TRI, MI->getDesc(), i);
+
+ // For now, only allow the register to be changed if its register
+ // class is consistent across all uses.
+ if (!Classes[Reg] && NewRC)
+ Classes[Reg] = NewRC;
+ else if (!NewRC || Classes[Reg] != NewRC)
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+ // Now check for aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ // If an alias of the reg is used during the live range, give up.
+ // Note that this allows us to skip checking if AntiDepReg
+ // overlaps with any of the aliases, among other things.
+ unsigned AliasReg = *Alias;
+ if (Classes[AliasReg]) {
+ Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ }
+ }
+
+ // If we're still willing to consider this register, note the reference.
+ if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
+ RegRefs.insert(std::make_pair(Reg, &MO));
+ }
+}
+
+void SchedulePostRATDList::ScanInstruction(MachineInstr *MI,
+ unsigned Count) {
+ // Update liveness.
+ // Proceding upwards, registers that are defed but not used in this
+ // instruction are now dead.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (!MO.isDef()) continue;
+ // Ignore two-addr defs.
+ if (MI->isRegTiedToUseOperand(i)) continue;
+
+ DefIndices[Reg] = Count;
+ KillIndices[Reg] = ~0u;
+ assert(((KillIndices[Reg] == ~0u) !=
+ (DefIndices[Reg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for Reg!");
+ Classes[Reg] = 0;
+ RegRefs.erase(Reg);
+ // Repeat, for all subregs.
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg) {
+ unsigned SubregReg = *Subreg;
+ DefIndices[SubregReg] = Count;
+ KillIndices[SubregReg] = ~0u;
+ Classes[SubregReg] = 0;
+ RegRefs.erase(SubregReg);
+ }
+ // Conservatively mark super-registers as unusable.
+ for (const unsigned *Super = TRI->getSuperRegisters(Reg);
+ *Super; ++Super) {
+ unsigned SuperReg = *Super;
+ Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ }
+ }
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (!MO.isUse()) continue;
+
+ const TargetRegisterClass *NewRC =
+ getInstrOperandRegClass(TRI, MI->getDesc(), i);
+
+ // For now, only allow the register to be changed if its register
+ // class is consistent across all uses.
+ if (!Classes[Reg] && NewRC)
+ Classes[Reg] = NewRC;
+ else if (!NewRC || Classes[Reg] != NewRC)
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+ RegRefs.insert(std::make_pair(Reg, &MO));
+
+ // It wasn't previously live but now it is, this is a kill.
+ if (KillIndices[Reg] == ~0u) {
+ KillIndices[Reg] = Count;
+ DefIndices[Reg] = ~0u;
+ assert(((KillIndices[Reg] == ~0u) !=
+ (DefIndices[Reg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for Reg!");
+ }
+ // Repeat, for all aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ if (KillIndices[AliasReg] == ~0u) {
+ KillIndices[AliasReg] = Count;
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+ }
+}
+
+/// BreakAntiDependencies - Identifiy anti-dependencies along the critical path
+/// of the ScheduleDAG and break them by renaming registers.
+///
+bool SchedulePostRATDList::BreakAntiDependencies() {
+ // The code below assumes that there is at least one instruction,
+ // so just duck out immediately if the block is empty.
+ if (SUnits.empty()) return false;
+
+ // Find the node at the bottom of the critical path.
+ SUnit *Max = 0;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
+ Max = SU;
+ }
+
+ DOUT << "Critical path has total latency "
+ << (Max->getDepth() + Max->Latency) << "\n";
+
+ // Track progress along the critical path through the SUnit graph as we walk
+ // the instructions.
+ SUnit *CriticalPathSU = Max;
+ MachineInstr *CriticalPathMI = CriticalPathSU->getInstr();
+
+ // Consider this pattern:
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // There are three anti-dependencies here, and without special care,
+ // we'd break all of them using the same register:
+ // A = ...
+ // ... = A
+ // B = ...
+ // ... = B
+ // B = ...
+ // ... = B
+ // B = ...
+ // ... = B
+ // because at each anti-dependence, B is the first register that
+ // isn't A which is free. This re-introduces anti-dependencies
+ // at all but one of the original anti-dependencies that we were
+ // trying to break. To avoid this, keep track of the most recent
+ // register that each register was replaced with, avoid avoid
+ // using it to repair an anti-dependence on the same register.
+ // This lets us produce this:
+ // A = ...
+ // ... = A
+ // B = ...
+ // ... = B
+ // C = ...
+ // ... = C
+ // B = ...
+ // ... = B
+ // This still has an anti-dependence on B, but at least it isn't on the
+ // original critical path.
+ //
+ // TODO: If we tracked more than one register here, we could potentially
+ // fix that remaining critical edge too. This is a little more involved,
+ // because unlike the most recent register, less recent registers should
+ // still be considered, though only if no other registers are available.
+ unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {};
+
+ // Attempt to break anti-dependence edges on the critical path. Walk the
+ // instructions from the bottom up, tracking information about liveness
+ // as we go to help determine which registers are available.
+ bool Changed = false;
+ unsigned Count = InsertPosIndex - 1;
+ for (MachineBasicBlock::iterator I = InsertPos, E = Begin;
+ I != E; --Count) {
+ MachineInstr *MI = --I;
+
+ // After regalloc, IMPLICIT_DEF instructions aren't safe to treat as
+ // dependence-breaking. In the case of an INSERT_SUBREG, the IMPLICIT_DEF
+ // is left behind appearing to clobber the super-register, while the
+ // subregister needs to remain live. So we just ignore them.
+ if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
+ continue;
+
+ // Check if this instruction has a dependence on the critical path that
+ // is an anti-dependence that we may be able to break. If it is, set
+ // AntiDepReg to the non-zero register associated with the anti-dependence.
+ //
+ // We limit our attention to the critical path as a heuristic to avoid
+ // breaking anti-dependence edges that aren't going to significantly
+ // impact the overall schedule. There are a limited number of registers
+ // and we want to save them for the important edges.
+ //
+ // TODO: Instructions with multiple defs could have multiple
+ // anti-dependencies. The current code here only knows how to break one
+ // edge per instruction. Note that we'd have to be able to break all of
+ // the anti-dependencies in an instruction in order to be effective.
+ unsigned AntiDepReg = 0;
+ if (MI == CriticalPathMI) {
+ if (SDep *Edge = CriticalPathStep(CriticalPathSU)) {
+ SUnit *NextSU = Edge->getSUnit();
+
+ // Only consider anti-dependence edges.
+ if (Edge->getKind() == SDep::Anti) {
+ AntiDepReg = Edge->getReg();
+ assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
+ // Don't break anti-dependencies on non-allocatable registers.
+ if (!AllocatableSet.test(AntiDepReg))
+ AntiDepReg = 0;
+ else {
+ // If the SUnit has other dependencies on the SUnit that it
+ // anti-depends on, don't bother breaking the anti-dependency
+ // since those edges would prevent such units from being
+ // scheduled past each other regardless.
+ //
+ // Also, if there are dependencies on other SUnits with the
+ // same register as the anti-dependency, don't attempt to
+ // break it.
+ for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(),
+ PE = CriticalPathSU->Preds.end(); P != PE; ++P)
+ if (P->getSUnit() == NextSU ?
+ (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
+ (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
+ AntiDepReg = 0;
+ break;
+ }
+ }
+ }
+ CriticalPathSU = NextSU;
+ CriticalPathMI = CriticalPathSU->getInstr();
+ } else {
+ // We've reached the end of the critical path.
+ CriticalPathSU = 0;
+ CriticalPathMI = 0;
+ }
+ }
+
+ PrescanInstruction(MI);
+
+ // If this instruction has a use of AntiDepReg, breaking it
+ // is invalid.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (MO.isUse() && AntiDepReg == Reg) {
+ AntiDepReg = 0;
+ break;
+ }
+ }
+
+ // Determine AntiDepReg's register class, if it is live and is
+ // consistently used within a single class.
+ const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0;
+ assert((AntiDepReg == 0 || RC != NULL) &&
+ "Register should be live if it's causing an anti-dependence!");
+ if (RC == reinterpret_cast<TargetRegisterClass *>(-1))
+ AntiDepReg = 0;
+
+ // Look for a suitable register to use to break the anti-depenence.
+ //
+ // TODO: Instead of picking the first free register, consider which might
+ // be the best.
+ if (AntiDepReg != 0) {
+ for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF),
+ RE = RC->allocation_order_end(MF); R != RE; ++R) {
+ unsigned NewReg = *R;
+ // Don't replace a register with itself.
+ if (NewReg == AntiDepReg) continue;
+ // Don't replace a register with one that was recently used to repair
+ // an anti-dependence with this AntiDepReg, because that would
+ // re-introduce that anti-dependence.
+ if (NewReg == LastNewReg[AntiDepReg]) continue;
+ // If NewReg is dead and NewReg's most recent def is not before
+ // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
+ assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for AntiDepReg!");
+ assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for NewReg!");
+ if (KillIndices[NewReg] == ~0u &&
+ Classes[NewReg] != reinterpret_cast<TargetRegisterClass *>(-1) &&
+ KillIndices[AntiDepReg] <= DefIndices[NewReg]) {
+ DOUT << "Breaking anti-dependence edge on "
+ << TRI->getName(AntiDepReg)
+ << " with " << RegRefs.count(AntiDepReg) << " references"
+ << " using " << TRI->getName(NewReg) << "!\n";
+
+ // Update the references to the old register to refer to the new
+ // register.
+ std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
+ std::multimap<unsigned, MachineOperand *>::iterator>
+ Range = RegRefs.equal_range(AntiDepReg);
+ for (std::multimap<unsigned, MachineOperand *>::iterator
+ Q = Range.first, QE = Range.second; Q != QE; ++Q)
+ Q->second->setReg(NewReg);
+
+ // We just went back in time and modified history; the
+ // liveness information for the anti-depenence reg is now
+ // inconsistent. Set the state as if it were dead.
+ Classes[NewReg] = Classes[AntiDepReg];
+ DefIndices[NewReg] = DefIndices[AntiDepReg];
+ KillIndices[NewReg] = KillIndices[AntiDepReg];
+ assert(((KillIndices[NewReg] == ~0u) !=
+ (DefIndices[NewReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for NewReg!");
+
+ Classes[AntiDepReg] = 0;
+ DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
+ KillIndices[AntiDepReg] = ~0u;
+ assert(((KillIndices[AntiDepReg] == ~0u) !=
+ (DefIndices[AntiDepReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for AntiDepReg!");
+
+ RegRefs.erase(AntiDepReg);
+ Changed = true;
+ LastNewReg[AntiDepReg] = NewReg;
+ break;
+ }
+ }
+ }
+
+ ScanInstruction(MI, Count);
+ }
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
+ SUnit *SuccSU = SuccEdge->getSUnit();
+ --SuccSU->NumPredsLeft;
+
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft < 0) {
+ cerr << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ cerr << " has been released too many times!\n";
+ assert(0);
+ }
+#endif
+
+ // Compute how many cycles it will be before this actually becomes
+ // available. This is the max of the start time of all predecessors plus
+ // their latencies.
+ SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
+
+ // If all the node's predecessors are scheduled, this node is ready
+ // to be scheduled. Ignore the special ExitSU node.
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+ PendingQueue.push_back(SuccSU);
+}
+
+/// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors.
+void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ ReleaseSucc(SU, &*I);
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+ DOUT << "*** Scheduling [" << CurCycle << "]: ";
+ DEBUG(SU->dump(this));
+
+ Sequence.push_back(SU);
+ assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
+
+ ReleaseSuccessors(SU);
+ SU->isScheduled = true;
+ AvailableQueue.ScheduledNode(SU);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void SchedulePostRATDList::ListScheduleTopDown() {
+ unsigned CurCycle = 0;
+
+ // Release any successors of the special Entry node.
+ ReleaseSuccessors(&EntrySU);
+
+ // All leaves to Available queue.
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ // It is available if it has no predecessors.
+ if (SUnits[i].Preds.empty()) {
+ AvailableQueue.push(&SUnits[i]);
+ SUnits[i].isAvailable = true;
+ }
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ std::vector<SUnit*> NotReady;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue.empty() || !PendingQueue.empty()) {
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ unsigned MinDepth = ~0u;
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ if (PendingQueue[i]->getDepth() <= CurCycle) {
+ AvailableQueue.push(PendingQueue[i]);
+ PendingQueue[i]->isAvailable = true;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ } else if (PendingQueue[i]->getDepth() < MinDepth)
+ MinDepth = PendingQueue[i]->getDepth();
+ }
+
+ // If there are no instructions available, don't try to issue anything, and
+ // don't advance the hazard recognizer.
+ if (AvailableQueue.empty()) {
+ CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1;
+ continue;
+ }
+
+ SUnit *FoundSUnit = 0;
+
+ bool HasNoopHazards = false;
+ while (!AvailableQueue.empty()) {
+ SUnit *CurSUnit = AvailableQueue.pop();
+
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(CurSUnit);
+ if (HT == ScheduleHazardRecognizer::NoHazard) {
+ FoundSUnit = CurSUnit;
+ break;
+ }
+
+ // Remember if this is a noop hazard.
+ HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+ NotReady.push_back(CurSUnit);
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ if (!NotReady.empty()) {
+ AvailableQueue.push_all(NotReady);
+ NotReady.clear();
+ }
+
+ // If we found a node to schedule, do it now.
+ if (FoundSUnit) {
+ ScheduleNodeTopDown(FoundSUnit, CurCycle);
+ HazardRec->EmitInstruction(FoundSUnit);
+
+ // If this is a pseudo-op node, we don't want to increment the current
+ // cycle.
+ if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
+ ++CurCycle;
+ } else if (!HasNoopHazards) {
+ // Otherwise, we have a pipeline stall, but no other problem, just advance
+ // the current cycle and try again.
+ DOUT << "*** Advancing cycle, no work to do\n";
+ HazardRec->AdvanceCycle();
+ ++NumStalls;
+ ++CurCycle;
+ } else {
+ // Otherwise, we have no instructions to issue and we have instructions
+ // that will fault if we don't do this right. This is the case for
+ // processors without pipeline interlocks and other cases.
+ DOUT << "*** Emitting noop\n";
+ HazardRec->EmitNoop();
+ Sequence.push_back(0); // NULL here means noop
+ ++NumNoops;
+ ++CurCycle;
+ }
+ }
+
+#ifndef NDEBUG
+ VerifySchedule(/*isBottomUp=*/false);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createPostRAScheduler() {
+ return new PostRAScheduler();
+}
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
new file mode 100644
index 0000000..97d4728
--- /dev/null
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -0,0 +1,1485 @@
+//===-- PreAllocSplitting.cpp - Pre-allocation Interval Spltting Pass. ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine instruction level pre-register allocation
+// live interval splitting pass. It finds live interval barriers, i.e.
+// instructions which will kill all physical registers in certain register
+// classes, and split all live intervals which cross the barrier.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-alloc-split"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+static cl::opt<int> PreSplitLimit("pre-split-limit", cl::init(-1), cl::Hidden);
+static cl::opt<int> DeadSplitLimit("dead-split-limit", cl::init(-1), cl::Hidden);
+static cl::opt<int> RestoreFoldLimit("restore-fold-limit", cl::init(-1), cl::Hidden);
+
+STATISTIC(NumSplits, "Number of intervals split");
+STATISTIC(NumRemats, "Number of intervals split by rematerialization");
+STATISTIC(NumFolds, "Number of intervals split with spill folding");
+STATISTIC(NumRestoreFolds, "Number of intervals split with restore folding");
+STATISTIC(NumRenumbers, "Number of intervals renumbered into new registers");
+STATISTIC(NumDeadSpills, "Number of dead spills removed");
+
+namespace {
+ class VISIBILITY_HIDDEN PreAllocSplitting : public MachineFunctionPass {
+ MachineFunction *CurrMF;
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo* TRI;
+ MachineFrameInfo *MFI;
+ MachineRegisterInfo *MRI;
+ LiveIntervals *LIs;
+ LiveStacks *LSs;
+ VirtRegMap *VRM;
+
+ // Barrier - Current barrier being processed.
+ MachineInstr *Barrier;
+
+ // BarrierMBB - Basic block where the barrier resides in.
+ MachineBasicBlock *BarrierMBB;
+
+ // Barrier - Current barrier index.
+ unsigned BarrierIdx;
+
+ // CurrLI - Current live interval being split.
+ LiveInterval *CurrLI;
+
+ // CurrSLI - Current stack slot live interval.
+ LiveInterval *CurrSLI;
+
+ // CurrSValNo - Current val# for the stack slot live interval.
+ VNInfo *CurrSValNo;
+
+ // IntervalSSMap - A map from live interval to spill slots.
+ DenseMap<unsigned, int> IntervalSSMap;
+
+ // Def2SpillMap - A map from a def instruction index to spill index.
+ DenseMap<unsigned, unsigned> Def2SpillMap;
+
+ public:
+ static char ID;
+ PreAllocSplitting() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addPreserved<RegisterCoalescer>();
+ if (StrongPHIElim)
+ AU.addPreservedID(StrongPHIEliminationID);
+ else
+ AU.addPreservedID(PHIEliminationID);
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<VirtRegMap>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual void releaseMemory() {
+ IntervalSSMap.clear();
+ Def2SpillMap.clear();
+ }
+
+ virtual const char *getPassName() const {
+ return "Pre-Register Allocaton Live Interval Splitting";
+ }
+
+ /// print - Implement the dump method.
+ virtual void print(std::ostream &O, const Module* M = 0) const {
+ LIs->print(O, M);
+ }
+
+ void print(std::ostream *O, const Module* M = 0) const {
+ if (O) print(*O, M);
+ }
+
+ private:
+ MachineBasicBlock::iterator
+ findNextEmptySlot(MachineBasicBlock*, MachineInstr*,
+ unsigned&);
+
+ MachineBasicBlock::iterator
+ findSpillPoint(MachineBasicBlock*, MachineInstr*, MachineInstr*,
+ SmallPtrSet<MachineInstr*, 4>&, unsigned&);
+
+ MachineBasicBlock::iterator
+ findRestorePoint(MachineBasicBlock*, MachineInstr*, unsigned,
+ SmallPtrSet<MachineInstr*, 4>&, unsigned&);
+
+ int CreateSpillStackSlot(unsigned, const TargetRegisterClass *);
+
+ bool IsAvailableInStack(MachineBasicBlock*, unsigned, unsigned, unsigned,
+ unsigned&, int&) const;
+
+ void UpdateSpillSlotInterval(VNInfo*, unsigned, unsigned);
+
+ bool SplitRegLiveInterval(LiveInterval*);
+
+ bool SplitRegLiveIntervals(const TargetRegisterClass **,
+ SmallPtrSet<LiveInterval*, 8>&);
+
+ bool createsNewJoin(LiveRange* LR, MachineBasicBlock* DefMBB,
+ MachineBasicBlock* BarrierMBB);
+ bool Rematerialize(unsigned vreg, VNInfo* ValNo,
+ MachineInstr* DefMI,
+ MachineBasicBlock::iterator RestorePt,
+ unsigned RestoreIdx,
+ SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
+ MachineInstr* FoldSpill(unsigned vreg, const TargetRegisterClass* RC,
+ MachineInstr* DefMI,
+ MachineInstr* Barrier,
+ MachineBasicBlock* MBB,
+ int& SS,
+ SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
+ MachineInstr* FoldRestore(unsigned vreg,
+ const TargetRegisterClass* RC,
+ MachineInstr* Barrier,
+ MachineBasicBlock* MBB,
+ int SS,
+ SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
+ void RenumberValno(VNInfo* VN);
+ void ReconstructLiveInterval(LiveInterval* LI);
+ bool removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split);
+ unsigned getNumberOfNonSpills(SmallPtrSet<MachineInstr*, 4>& MIs,
+ unsigned Reg, int FrameIndex, bool& TwoAddr);
+ VNInfo* PerformPHIConstruction(MachineBasicBlock::iterator Use,
+ MachineBasicBlock* MBB, LiveInterval* LI,
+ SmallPtrSet<MachineInstr*, 4>& Visited,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
+ DenseMap<MachineInstr*, VNInfo*>& NewVNs,
+ DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
+ DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
+ bool IsTopLevel, bool IsIntraBlock);
+ VNInfo* PerformPHIConstructionFallBack(MachineBasicBlock::iterator Use,
+ MachineBasicBlock* MBB, LiveInterval* LI,
+ SmallPtrSet<MachineInstr*, 4>& Visited,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
+ DenseMap<MachineInstr*, VNInfo*>& NewVNs,
+ DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
+ DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
+ bool IsTopLevel, bool IsIntraBlock);
+};
+} // end anonymous namespace
+
+char PreAllocSplitting::ID = 0;
+
+static RegisterPass<PreAllocSplitting>
+X("pre-alloc-splitting", "Pre-Register Allocation Live Interval Splitting");
+
+const PassInfo *const llvm::PreAllocSplittingID = &X;
+
+
+/// findNextEmptySlot - Find a gap after the given machine instruction in the
+/// instruction index map. If there isn't one, return end().
+MachineBasicBlock::iterator
+PreAllocSplitting::findNextEmptySlot(MachineBasicBlock *MBB, MachineInstr *MI,
+ unsigned &SpotIndex) {
+ MachineBasicBlock::iterator MII = MI;
+ if (++MII != MBB->end()) {
+ unsigned Index = LIs->findGapBeforeInstr(LIs->getInstructionIndex(MII));
+ if (Index) {
+ SpotIndex = Index;
+ return MII;
+ }
+ }
+ return MBB->end();
+}
+
+/// findSpillPoint - Find a gap as far away from the given MI that's suitable
+/// for spilling the current live interval. The index must be before any
+/// defs and uses of the live interval register in the mbb. Return begin() if
+/// none is found.
+MachineBasicBlock::iterator
+PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
+ MachineInstr *DefMI,
+ SmallPtrSet<MachineInstr*, 4> &RefsInMBB,
+ unsigned &SpillIndex) {
+ MachineBasicBlock::iterator Pt = MBB->begin();
+
+ MachineBasicBlock::iterator MII = MI;
+ MachineBasicBlock::iterator EndPt = DefMI
+ ? MachineBasicBlock::iterator(DefMI) : MBB->begin();
+
+ while (MII != EndPt && !RefsInMBB.count(MII) &&
+ MII->getOpcode() != TRI->getCallFrameSetupOpcode())
+ --MII;
+ if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
+
+ while (MII != EndPt && !RefsInMBB.count(MII)) {
+ unsigned Index = LIs->getInstructionIndex(MII);
+
+ // We can't insert the spill between the barrier (a call), and its
+ // corresponding call frame setup.
+ if (MII->getOpcode() == TRI->getCallFrameDestroyOpcode()) {
+ while (MII->getOpcode() != TRI->getCallFrameSetupOpcode()) {
+ --MII;
+ if (MII == EndPt) {
+ return Pt;
+ }
+ }
+ continue;
+ } else if (LIs->hasGapBeforeInstr(Index)) {
+ Pt = MII;
+ SpillIndex = LIs->findGapBeforeInstr(Index, true);
+ }
+
+ if (RefsInMBB.count(MII))
+ return Pt;
+
+
+ --MII;
+ }
+
+ return Pt;
+}
+
+/// findRestorePoint - Find a gap in the instruction index map that's suitable
+/// for restoring the current live interval value. The index must be before any
+/// uses of the live interval register in the mbb. Return end() if none is
+/// found.
+MachineBasicBlock::iterator
+PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
+ unsigned LastIdx,
+ SmallPtrSet<MachineInstr*, 4> &RefsInMBB,
+ unsigned &RestoreIndex) {
+ // FIXME: Allow spill to be inserted to the beginning of the mbb. Update mbb
+ // begin index accordingly.
+ MachineBasicBlock::iterator Pt = MBB->end();
+ MachineBasicBlock::iterator EndPt = MBB->getFirstTerminator();
+
+ // We start at the call, so walk forward until we find the call frame teardown
+ // since we can't insert restores before that. Bail if we encounter a use
+ // during this time.
+ MachineBasicBlock::iterator MII = MI;
+ if (MII == EndPt) return Pt;
+
+ while (MII != EndPt && !RefsInMBB.count(MII) &&
+ MII->getOpcode() != TRI->getCallFrameDestroyOpcode())
+ ++MII;
+ if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
+ ++MII;
+
+ // FIXME: Limit the number of instructions to examine to reduce
+ // compile time?
+ while (MII != EndPt) {
+ unsigned Index = LIs->getInstructionIndex(MII);
+ if (Index > LastIdx)
+ break;
+ unsigned Gap = LIs->findGapBeforeInstr(Index);
+
+ // We can't insert a restore between the barrier (a call) and its
+ // corresponding call frame teardown.
+ if (MII->getOpcode() == TRI->getCallFrameSetupOpcode()) {
+ do {
+ if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
+ ++MII;
+ } while (MII->getOpcode() != TRI->getCallFrameDestroyOpcode());
+ } else if (Gap) {
+ Pt = MII;
+ RestoreIndex = Gap;
+ }
+
+ if (RefsInMBB.count(MII))
+ return Pt;
+
+ ++MII;
+ }
+
+ return Pt;
+}
+
+/// CreateSpillStackSlot - Create a stack slot for the live interval being
+/// split. If the live interval was previously split, just reuse the same
+/// slot.
+int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg,
+ const TargetRegisterClass *RC) {
+ int SS;
+ DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(Reg);
+ if (I != IntervalSSMap.end()) {
+ SS = I->second;
+ } else {
+ SS = MFI->CreateStackObject(RC->getSize(), RC->getAlignment());
+ IntervalSSMap[Reg] = SS;
+ }
+
+ // Create live interval for stack slot.
+ CurrSLI = &LSs->getOrCreateInterval(SS, RC);
+ if (CurrSLI->hasAtLeastOneValue())
+ CurrSValNo = CurrSLI->getValNumInfo(0);
+ else
+ CurrSValNo = CurrSLI->getNextValue(~0U, 0, LSs->getVNInfoAllocator());
+ return SS;
+}
+
+/// IsAvailableInStack - Return true if register is available in a split stack
+/// slot at the specified index.
+bool
+PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB,
+ unsigned Reg, unsigned DefIndex,
+ unsigned RestoreIndex, unsigned &SpillIndex,
+ int& SS) const {
+ if (!DefMBB)
+ return false;
+
+ DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(Reg);
+ if (I == IntervalSSMap.end())
+ return false;
+ DenseMap<unsigned, unsigned>::iterator II = Def2SpillMap.find(DefIndex);
+ if (II == Def2SpillMap.end())
+ return false;
+
+ // If last spill of def is in the same mbb as barrier mbb (where restore will
+ // be), make sure it's not below the intended restore index.
+ // FIXME: Undo the previous spill?
+ assert(LIs->getMBBFromIndex(II->second) == DefMBB);
+ if (DefMBB == BarrierMBB && II->second >= RestoreIndex)
+ return false;
+
+ SS = I->second;
+ SpillIndex = II->second;
+ return true;
+}
+
+/// UpdateSpillSlotInterval - Given the specified val# of the register live
+/// interval being split, and the spill and restore indicies, update the live
+/// interval of the spill stack slot.
+void
+PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, unsigned SpillIndex,
+ unsigned RestoreIndex) {
+ assert(LIs->getMBBFromIndex(RestoreIndex) == BarrierMBB &&
+ "Expect restore in the barrier mbb");
+
+ MachineBasicBlock *MBB = LIs->getMBBFromIndex(SpillIndex);
+ if (MBB == BarrierMBB) {
+ // Intra-block spill + restore. We are done.
+ LiveRange SLR(SpillIndex, RestoreIndex, CurrSValNo);
+ CurrSLI->addRange(SLR);
+ return;
+ }
+
+ SmallPtrSet<MachineBasicBlock*, 4> Processed;
+ unsigned EndIdx = LIs->getMBBEndIdx(MBB);
+ LiveRange SLR(SpillIndex, EndIdx+1, CurrSValNo);
+ CurrSLI->addRange(SLR);
+ Processed.insert(MBB);
+
+ // Start from the spill mbb, figure out the extend of the spill slot's
+ // live interval.
+ SmallVector<MachineBasicBlock*, 4> WorkList;
+ const LiveRange *LR = CurrLI->getLiveRangeContaining(SpillIndex);
+ if (LR->end > EndIdx)
+ // If live range extend beyond end of mbb, add successors to work list.
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ WorkList.push_back(*SI);
+
+ while (!WorkList.empty()) {
+ MachineBasicBlock *MBB = WorkList.back();
+ WorkList.pop_back();
+ if (Processed.count(MBB))
+ continue;
+ unsigned Idx = LIs->getMBBStartIdx(MBB);
+ LR = CurrLI->getLiveRangeContaining(Idx);
+ if (LR && LR->valno == ValNo) {
+ EndIdx = LIs->getMBBEndIdx(MBB);
+ if (Idx <= RestoreIndex && RestoreIndex < EndIdx) {
+ // Spill slot live interval stops at the restore.
+ LiveRange SLR(Idx, RestoreIndex, CurrSValNo);
+ CurrSLI->addRange(SLR);
+ } else if (LR->end > EndIdx) {
+ // Live range extends beyond end of mbb, process successors.
+ LiveRange SLR(Idx, EndIdx+1, CurrSValNo);
+ CurrSLI->addRange(SLR);
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ WorkList.push_back(*SI);
+ } else {
+ LiveRange SLR(Idx, LR->end, CurrSValNo);
+ CurrSLI->addRange(SLR);
+ }
+ Processed.insert(MBB);
+ }
+ }
+}
+
+/// PerformPHIConstruction - From properly set up use and def lists, use a PHI
+/// construction algorithm to compute the ranges and valnos for an interval.
+VNInfo*
+PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
+ MachineBasicBlock* MBB, LiveInterval* LI,
+ SmallPtrSet<MachineInstr*, 4>& Visited,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
+ DenseMap<MachineInstr*, VNInfo*>& NewVNs,
+ DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
+ DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
+ bool IsTopLevel, bool IsIntraBlock) {
+ // Return memoized result if it's available.
+ if (IsTopLevel && Visited.count(UseI) && NewVNs.count(UseI))
+ return NewVNs[UseI];
+ else if (!IsTopLevel && IsIntraBlock && NewVNs.count(UseI))
+ return NewVNs[UseI];
+ else if (!IsIntraBlock && LiveOut.count(MBB))
+ return LiveOut[MBB];
+
+ // Check if our block contains any uses or defs.
+ bool ContainsDefs = Defs.count(MBB);
+ bool ContainsUses = Uses.count(MBB);
+
+ VNInfo* RetVNI = 0;
+
+ // Enumerate the cases of use/def contaning blocks.
+ if (!ContainsDefs && !ContainsUses) {
+ return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, Uses,
+ NewVNs, LiveOut, Phis,
+ IsTopLevel, IsIntraBlock);
+ } else if (ContainsDefs && !ContainsUses) {
+ SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB];
+
+ // Search for the def in this block. If we don't find it before the
+ // instruction we care about, go to the fallback case. Note that that
+ // should never happen: this cannot be intrablock, so use should
+ // always be an end() iterator.
+ assert(UseI == MBB->end() && "No use marked in intrablock");
+
+ MachineBasicBlock::iterator Walker = UseI;
+ --Walker;
+ while (Walker != MBB->begin()) {
+ if (BlockDefs.count(Walker))
+ break;
+ --Walker;
+ }
+
+ // Once we've found it, extend its VNInfo to our instruction.
+ unsigned DefIndex = LIs->getInstructionIndex(Walker);
+ DefIndex = LiveIntervals::getDefIndex(DefIndex);
+ unsigned EndIndex = LIs->getMBBEndIdx(MBB);
+
+ RetVNI = NewVNs[Walker];
+ LI->addRange(LiveRange(DefIndex, EndIndex+1, RetVNI));
+ } else if (!ContainsDefs && ContainsUses) {
+ SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB];
+
+ // Search for the use in this block that precedes the instruction we care
+ // about, going to the fallback case if we don't find it.
+ if (UseI == MBB->begin())
+ return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
+ Uses, NewVNs, LiveOut, Phis,
+ IsTopLevel, IsIntraBlock);
+
+ MachineBasicBlock::iterator Walker = UseI;
+ --Walker;
+ bool found = false;
+ while (Walker != MBB->begin()) {
+ if (BlockUses.count(Walker)) {
+ found = true;
+ break;
+ }
+ --Walker;
+ }
+
+ // Must check begin() too.
+ if (!found) {
+ if (BlockUses.count(Walker))
+ found = true;
+ else
+ return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
+ Uses, NewVNs, LiveOut, Phis,
+ IsTopLevel, IsIntraBlock);
+ }
+
+ unsigned UseIndex = LIs->getInstructionIndex(Walker);
+ UseIndex = LiveIntervals::getUseIndex(UseIndex);
+ unsigned EndIndex = 0;
+ if (IsIntraBlock) {
+ EndIndex = LIs->getInstructionIndex(UseI);
+ EndIndex = LiveIntervals::getUseIndex(EndIndex);
+ } else
+ EndIndex = LIs->getMBBEndIdx(MBB);
+
+ // Now, recursively phi construct the VNInfo for the use we found,
+ // and then extend it to include the instruction we care about
+ RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses,
+ NewVNs, LiveOut, Phis, false, true);
+
+ LI->addRange(LiveRange(UseIndex, EndIndex+1, RetVNI));
+
+ // FIXME: Need to set kills properly for inter-block stuff.
+ if (LI->isKill(RetVNI, UseIndex)) LI->removeKill(RetVNI, UseIndex);
+ if (IsIntraBlock)
+ LI->addKill(RetVNI, EndIndex);
+ } else if (ContainsDefs && ContainsUses) {
+ SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB];
+ SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB];
+
+ // This case is basically a merging of the two preceding case, with the
+ // special note that checking for defs must take precedence over checking
+ // for uses, because of two-address instructions.
+
+ if (UseI == MBB->begin())
+ return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, Uses,
+ NewVNs, LiveOut, Phis,
+ IsTopLevel, IsIntraBlock);
+
+ MachineBasicBlock::iterator Walker = UseI;
+ --Walker;
+ bool foundDef = false;
+ bool foundUse = false;
+ while (Walker != MBB->begin()) {
+ if (BlockDefs.count(Walker)) {
+ foundDef = true;
+ break;
+ } else if (BlockUses.count(Walker)) {
+ foundUse = true;
+ break;
+ }
+ --Walker;
+ }
+
+ // Must check begin() too.
+ if (!foundDef && !foundUse) {
+ if (BlockDefs.count(Walker))
+ foundDef = true;
+ else if (BlockUses.count(Walker))
+ foundUse = true;
+ else
+ return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
+ Uses, NewVNs, LiveOut, Phis,
+ IsTopLevel, IsIntraBlock);
+ }
+
+ unsigned StartIndex = LIs->getInstructionIndex(Walker);
+ StartIndex = foundDef ? LiveIntervals::getDefIndex(StartIndex) :
+ LiveIntervals::getUseIndex(StartIndex);
+ unsigned EndIndex = 0;
+ if (IsIntraBlock) {
+ EndIndex = LIs->getInstructionIndex(UseI);
+ EndIndex = LiveIntervals::getUseIndex(EndIndex);
+ } else
+ EndIndex = LIs->getMBBEndIdx(MBB);
+
+ if (foundDef)
+ RetVNI = NewVNs[Walker];
+ else
+ RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses,
+ NewVNs, LiveOut, Phis, false, true);
+
+ LI->addRange(LiveRange(StartIndex, EndIndex+1, RetVNI));
+
+ if (foundUse && LI->isKill(RetVNI, StartIndex))
+ LI->removeKill(RetVNI, StartIndex);
+ if (IsIntraBlock) {
+ LI->addKill(RetVNI, EndIndex);
+ }
+ }
+
+ // Memoize results so we don't have to recompute them.
+ if (!IsIntraBlock) LiveOut[MBB] = RetVNI;
+ else {
+ if (!NewVNs.count(UseI))
+ NewVNs[UseI] = RetVNI;
+ Visited.insert(UseI);
+ }
+
+ return RetVNI;
+}
+
+/// PerformPHIConstructionFallBack - PerformPHIConstruction fall back path.
+///
+VNInfo*
+PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator UseI,
+ MachineBasicBlock* MBB, LiveInterval* LI,
+ SmallPtrSet<MachineInstr*, 4>& Visited,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
+ DenseMap<MachineInstr*, VNInfo*>& NewVNs,
+ DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
+ DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
+ bool IsTopLevel, bool IsIntraBlock) {
+ // NOTE: Because this is the fallback case from other cases, we do NOT
+ // assume that we are not intrablock here.
+ if (Phis.count(MBB)) return Phis[MBB];
+
+ unsigned StartIndex = LIs->getMBBStartIdx(MBB);
+ VNInfo *RetVNI = Phis[MBB] = LI->getNextValue(~0U, /*FIXME*/ 0,
+ LIs->getVNInfoAllocator());
+ if (!IsIntraBlock) LiveOut[MBB] = RetVNI;
+
+ // If there are no uses or defs between our starting point and the
+ // beginning of the block, then recursive perform phi construction
+ // on our predecessors.
+ DenseMap<MachineBasicBlock*, VNInfo*> IncomingVNs;
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ VNInfo* Incoming = PerformPHIConstruction((*PI)->end(), *PI, LI,
+ Visited, Defs, Uses, NewVNs,
+ LiveOut, Phis, false, false);
+ if (Incoming != 0)
+ IncomingVNs[*PI] = Incoming;
+ }
+
+ if (MBB->pred_size() == 1 && !RetVNI->hasPHIKill) {
+ VNInfo* OldVN = RetVNI;
+ VNInfo* NewVN = IncomingVNs.begin()->second;
+ VNInfo* MergedVN = LI->MergeValueNumberInto(OldVN, NewVN);
+ if (MergedVN == OldVN) std::swap(OldVN, NewVN);
+
+ for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator LOI = LiveOut.begin(),
+ LOE = LiveOut.end(); LOI != LOE; ++LOI)
+ if (LOI->second == OldVN)
+ LOI->second = MergedVN;
+ for (DenseMap<MachineInstr*, VNInfo*>::iterator NVI = NewVNs.begin(),
+ NVE = NewVNs.end(); NVI != NVE; ++NVI)
+ if (NVI->second == OldVN)
+ NVI->second = MergedVN;
+ for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator PI = Phis.begin(),
+ PE = Phis.end(); PI != PE; ++PI)
+ if (PI->second == OldVN)
+ PI->second = MergedVN;
+ RetVNI = MergedVN;
+ } else {
+ // Otherwise, merge the incoming VNInfos with a phi join. Create a new
+ // VNInfo to represent the joined value.
+ for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator I =
+ IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) {
+ I->second->hasPHIKill = true;
+ unsigned KillIndex = LIs->getMBBEndIdx(I->first);
+ if (!LiveInterval::isKill(I->second, KillIndex))
+ LI->addKill(I->second, KillIndex);
+ }
+ }
+
+ unsigned EndIndex = 0;
+ if (IsIntraBlock) {
+ EndIndex = LIs->getInstructionIndex(UseI);
+ EndIndex = LiveIntervals::getUseIndex(EndIndex);
+ } else
+ EndIndex = LIs->getMBBEndIdx(MBB);
+ LI->addRange(LiveRange(StartIndex, EndIndex+1, RetVNI));
+ if (IsIntraBlock)
+ LI->addKill(RetVNI, EndIndex);
+
+ // Memoize results so we don't have to recompute them.
+ if (!IsIntraBlock)
+ LiveOut[MBB] = RetVNI;
+ else {
+ if (!NewVNs.count(UseI))
+ NewVNs[UseI] = RetVNI;
+ Visited.insert(UseI);
+ }
+
+ return RetVNI;
+}
+
+/// ReconstructLiveInterval - Recompute a live interval from scratch.
+void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
+ BumpPtrAllocator& Alloc = LIs->getVNInfoAllocator();
+
+ // Clear the old ranges and valnos;
+ LI->clear();
+
+ // Cache the uses and defs of the register
+ typedef DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> > RegMap;
+ RegMap Defs, Uses;
+
+ // Keep track of the new VNs we're creating.
+ DenseMap<MachineInstr*, VNInfo*> NewVNs;
+ SmallPtrSet<VNInfo*, 2> PhiVNs;
+
+ // Cache defs, and create a new VNInfo for each def.
+ for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg),
+ DE = MRI->def_end(); DI != DE; ++DI) {
+ Defs[(*DI).getParent()].insert(&*DI);
+
+ unsigned DefIdx = LIs->getInstructionIndex(&*DI);
+ DefIdx = LiveIntervals::getDefIndex(DefIdx);
+
+ VNInfo* NewVN = LI->getNextValue(DefIdx, 0, Alloc);
+
+ // If the def is a move, set the copy field.
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (TII->isMoveInstr(*DI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
+ if (DstReg == LI->reg)
+ NewVN->copy = &*DI;
+
+ NewVNs[&*DI] = NewVN;
+ }
+
+ // Cache uses as a separate pass from actually processing them.
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg),
+ UE = MRI->use_end(); UI != UE; ++UI)
+ Uses[(*UI).getParent()].insert(&*UI);
+
+ // Now, actually process every use and use a phi construction algorithm
+ // to walk from it to its reaching definitions, building VNInfos along
+ // the way.
+ DenseMap<MachineBasicBlock*, VNInfo*> LiveOut;
+ DenseMap<MachineBasicBlock*, VNInfo*> Phis;
+ SmallPtrSet<MachineInstr*, 4> Visited;
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ PerformPHIConstruction(&*UI, UI->getParent(), LI, Visited, Defs,
+ Uses, NewVNs, LiveOut, Phis, true, true);
+ }
+
+ // Add ranges for dead defs
+ for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg),
+ DE = MRI->def_end(); DI != DE; ++DI) {
+ unsigned DefIdx = LIs->getInstructionIndex(&*DI);
+ DefIdx = LiveIntervals::getDefIndex(DefIdx);
+
+ if (LI->liveAt(DefIdx)) continue;
+
+ VNInfo* DeadVN = NewVNs[&*DI];
+ LI->addRange(LiveRange(DefIdx, DefIdx+1, DeadVN));
+ LI->addKill(DeadVN, DefIdx);
+ }
+}
+
+/// RenumberValno - Split the given valno out into a new vreg, allowing it to
+/// be allocated to a different register. This function creates a new vreg,
+/// copies the valno and its live ranges over to the new vreg's interval,
+/// removes them from the old interval, and rewrites all uses and defs of
+/// the original reg to the new vreg within those ranges.
+void PreAllocSplitting::RenumberValno(VNInfo* VN) {
+ SmallVector<VNInfo*, 4> Stack;
+ SmallVector<VNInfo*, 4> VNsToCopy;
+ Stack.push_back(VN);
+
+ // Walk through and copy the valno we care about, and any other valnos
+ // that are two-address redefinitions of the one we care about. These
+ // will need to be rewritten as well. We also check for safety of the
+ // renumbering here, by making sure that none of the valno involved has
+ // phi kills.
+ while (!Stack.empty()) {
+ VNInfo* OldVN = Stack.back();
+ Stack.pop_back();
+
+ // Bail out if we ever encounter a valno that has a PHI kill. We can't
+ // renumber these.
+ if (OldVN->hasPHIKill) return;
+
+ VNsToCopy.push_back(OldVN);
+
+ // Locate two-address redefinitions
+ for (SmallVector<unsigned, 4>::iterator KI = OldVN->kills.begin(),
+ KE = OldVN->kills.end(); KI != KE; ++KI) {
+ MachineInstr* MI = LIs->getInstructionFromIndex(*KI);
+ unsigned DefIdx = MI->findRegisterDefOperandIdx(CurrLI->reg);
+ if (DefIdx == ~0U) continue;
+ if (MI->isRegTiedToUseOperand(DefIdx)) {
+ VNInfo* NextVN =
+ CurrLI->findDefinedVNInfo(LiveIntervals::getDefIndex(*KI));
+ if (NextVN == OldVN) continue;
+ Stack.push_back(NextVN);
+ }
+ }
+ }
+
+ // Create the new vreg
+ unsigned NewVReg = MRI->createVirtualRegister(MRI->getRegClass(CurrLI->reg));
+
+ // Create the new live interval
+ LiveInterval& NewLI = LIs->getOrCreateInterval(NewVReg);
+
+ for (SmallVector<VNInfo*, 4>::iterator OI = VNsToCopy.begin(), OE =
+ VNsToCopy.end(); OI != OE; ++OI) {
+ VNInfo* OldVN = *OI;
+
+ // Copy the valno over
+ VNInfo* NewVN = NewLI.getNextValue(OldVN->def, OldVN->copy,
+ LIs->getVNInfoAllocator());
+ NewLI.copyValNumInfo(NewVN, OldVN);
+ NewLI.MergeValueInAsValue(*CurrLI, OldVN, NewVN);
+
+ // Remove the valno from the old interval
+ CurrLI->removeValNo(OldVN);
+ }
+
+ // Rewrite defs and uses. This is done in two stages to avoid invalidating
+ // the reg_iterator.
+ SmallVector<std::pair<MachineInstr*, unsigned>, 8> OpsToChange;
+
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg),
+ E = MRI->reg_end(); I != E; ++I) {
+ MachineOperand& MO = I.getOperand();
+ unsigned InstrIdx = LIs->getInstructionIndex(&*I);
+
+ if ((MO.isUse() && NewLI.liveAt(LiveIntervals::getUseIndex(InstrIdx))) ||
+ (MO.isDef() && NewLI.liveAt(LiveIntervals::getDefIndex(InstrIdx))))
+ OpsToChange.push_back(std::make_pair(&*I, I.getOperandNo()));
+ }
+
+ for (SmallVector<std::pair<MachineInstr*, unsigned>, 8>::iterator I =
+ OpsToChange.begin(), E = OpsToChange.end(); I != E; ++I) {
+ MachineInstr* Inst = I->first;
+ unsigned OpIdx = I->second;
+ MachineOperand& MO = Inst->getOperand(OpIdx);
+ MO.setReg(NewVReg);
+ }
+
+ // Grow the VirtRegMap, since we've created a new vreg.
+ VRM->grow();
+
+ // The renumbered vreg shares a stack slot with the old register.
+ if (IntervalSSMap.count(CurrLI->reg))
+ IntervalSSMap[NewVReg] = IntervalSSMap[CurrLI->reg];
+
+ NumRenumbers++;
+}
+
+bool PreAllocSplitting::Rematerialize(unsigned vreg, VNInfo* ValNo,
+ MachineInstr* DefMI,
+ MachineBasicBlock::iterator RestorePt,
+ unsigned RestoreIdx,
+ SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
+ MachineBasicBlock& MBB = *RestorePt->getParent();
+
+ MachineBasicBlock::iterator KillPt = BarrierMBB->end();
+ unsigned KillIdx = 0;
+ if (ValNo->def == ~0U || DefMI->getParent() == BarrierMBB)
+ KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, KillIdx);
+ else
+ KillPt = findNextEmptySlot(DefMI->getParent(), DefMI, KillIdx);
+
+ if (KillPt == DefMI->getParent()->end())
+ return false;
+
+ TII->reMaterialize(MBB, RestorePt, vreg, DefMI);
+ LIs->InsertMachineInstrInMaps(prior(RestorePt), RestoreIdx);
+
+ ReconstructLiveInterval(CurrLI);
+ unsigned RematIdx = LIs->getInstructionIndex(prior(RestorePt));
+ RematIdx = LiveIntervals::getDefIndex(RematIdx);
+ RenumberValno(CurrLI->findDefinedVNInfo(RematIdx));
+
+ ++NumSplits;
+ ++NumRemats;
+ return true;
+}
+
+MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg,
+ const TargetRegisterClass* RC,
+ MachineInstr* DefMI,
+ MachineInstr* Barrier,
+ MachineBasicBlock* MBB,
+ int& SS,
+ SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
+ MachineBasicBlock::iterator Pt = MBB->begin();
+
+ // Go top down if RefsInMBB is empty.
+ if (RefsInMBB.empty())
+ return 0;
+
+ MachineBasicBlock::iterator FoldPt = Barrier;
+ while (&*FoldPt != DefMI && FoldPt != MBB->begin() &&
+ !RefsInMBB.count(FoldPt))
+ --FoldPt;
+
+ int OpIdx = FoldPt->findRegisterDefOperandIdx(vreg, false);
+ if (OpIdx == -1)
+ return 0;
+
+ SmallVector<unsigned, 1> Ops;
+ Ops.push_back(OpIdx);
+
+ if (!TII->canFoldMemoryOperand(FoldPt, Ops))
+ return 0;
+
+ DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(vreg);
+ if (I != IntervalSSMap.end()) {
+ SS = I->second;
+ } else {
+ SS = MFI->CreateStackObject(RC->getSize(), RC->getAlignment());
+ }
+
+ MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(),
+ FoldPt, Ops, SS);
+
+ if (FMI) {
+ LIs->ReplaceMachineInstrInMaps(FoldPt, FMI);
+ FMI = MBB->insert(MBB->erase(FoldPt), FMI);
+ ++NumFolds;
+
+ IntervalSSMap[vreg] = SS;
+ CurrSLI = &LSs->getOrCreateInterval(SS, RC);
+ if (CurrSLI->hasAtLeastOneValue())
+ CurrSValNo = CurrSLI->getValNumInfo(0);
+ else
+ CurrSValNo = CurrSLI->getNextValue(~0U, 0, LSs->getVNInfoAllocator());
+ }
+
+ return FMI;
+}
+
+MachineInstr* PreAllocSplitting::FoldRestore(unsigned vreg,
+ const TargetRegisterClass* RC,
+ MachineInstr* Barrier,
+ MachineBasicBlock* MBB,
+ int SS,
+ SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
+ if ((int)RestoreFoldLimit != -1 && RestoreFoldLimit == (int)NumRestoreFolds)
+ return 0;
+
+ // Go top down if RefsInMBB is empty.
+ if (RefsInMBB.empty())
+ return 0;
+
+ // Can't fold a restore between a call stack setup and teardown.
+ MachineBasicBlock::iterator FoldPt = Barrier;
+
+ // Advance from barrier to call frame teardown.
+ while (FoldPt != MBB->getFirstTerminator() &&
+ FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) {
+ if (RefsInMBB.count(FoldPt))
+ return 0;
+
+ ++FoldPt;
+ }
+
+ if (FoldPt == MBB->getFirstTerminator())
+ return 0;
+ else
+ ++FoldPt;
+
+ // Now find the restore point.
+ while (FoldPt != MBB->getFirstTerminator() && !RefsInMBB.count(FoldPt)) {
+ if (FoldPt->getOpcode() == TRI->getCallFrameSetupOpcode()) {
+ while (FoldPt != MBB->getFirstTerminator() &&
+ FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) {
+ if (RefsInMBB.count(FoldPt))
+ return 0;
+
+ ++FoldPt;
+ }
+
+ if (FoldPt == MBB->getFirstTerminator())
+ return 0;
+ }
+
+ ++FoldPt;
+ }
+
+ if (FoldPt == MBB->getFirstTerminator())
+ return 0;
+
+ int OpIdx = FoldPt->findRegisterUseOperandIdx(vreg, true);
+ if (OpIdx == -1)
+ return 0;
+
+ SmallVector<unsigned, 1> Ops;
+ Ops.push_back(OpIdx);
+
+ if (!TII->canFoldMemoryOperand(FoldPt, Ops))
+ return 0;
+
+ MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(),
+ FoldPt, Ops, SS);
+
+ if (FMI) {
+ LIs->ReplaceMachineInstrInMaps(FoldPt, FMI);
+ FMI = MBB->insert(MBB->erase(FoldPt), FMI);
+ ++NumRestoreFolds;
+ }
+
+ return FMI;
+}
+
+/// SplitRegLiveInterval - Split (spill and restore) the given live interval
+/// so it would not cross the barrier that's being processed. Shrink wrap
+/// (minimize) the live interval to the last uses.
+bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
+ CurrLI = LI;
+
+ // Find live range where current interval cross the barrier.
+ LiveInterval::iterator LR =
+ CurrLI->FindLiveRangeContaining(LIs->getUseIndex(BarrierIdx));
+ VNInfo *ValNo = LR->valno;
+
+ if (ValNo->def == ~1U) {
+ // Defined by a dead def? How can this be?
+ assert(0 && "Val# is defined by a dead def?");
+ abort();
+ }
+
+ MachineInstr *DefMI = (ValNo->def != ~0U)
+ ? LIs->getInstructionFromIndex(ValNo->def) : NULL;
+
+ // If this would create a new join point, do not split.
+ if (DefMI && createsNewJoin(LR, DefMI->getParent(), Barrier->getParent()))
+ return false;
+
+ // Find all references in the barrier mbb.
+ SmallPtrSet<MachineInstr*, 4> RefsInMBB;
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg),
+ E = MRI->reg_end(); I != E; ++I) {
+ MachineInstr *RefMI = &*I;
+ if (RefMI->getParent() == BarrierMBB)
+ RefsInMBB.insert(RefMI);
+ }
+
+ // Find a point to restore the value after the barrier.
+ unsigned RestoreIndex = 0;
+ MachineBasicBlock::iterator RestorePt =
+ findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB, RestoreIndex);
+ if (RestorePt == BarrierMBB->end())
+ return false;
+
+ if (DefMI && LIs->isReMaterializable(*LI, ValNo, DefMI))
+ if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt,
+ RestoreIndex, RefsInMBB))
+ return true;
+
+ // Add a spill either before the barrier or after the definition.
+ MachineBasicBlock *DefMBB = DefMI ? DefMI->getParent() : NULL;
+ const TargetRegisterClass *RC = MRI->getRegClass(CurrLI->reg);
+ unsigned SpillIndex = 0;
+ MachineInstr *SpillMI = NULL;
+ int SS = -1;
+ if (ValNo->def == ~0U) {
+ // If it's defined by a phi, we must split just before the barrier.
+ if ((SpillMI = FoldSpill(LI->reg, RC, 0, Barrier,
+ BarrierMBB, SS, RefsInMBB))) {
+ SpillIndex = LIs->getInstructionIndex(SpillMI);
+ } else {
+ MachineBasicBlock::iterator SpillPt =
+ findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, SpillIndex);
+ if (SpillPt == BarrierMBB->begin())
+ return false; // No gap to insert spill.
+ // Add spill.
+
+ SS = CreateSpillStackSlot(CurrLI->reg, RC);
+ TII->storeRegToStackSlot(*BarrierMBB, SpillPt, CurrLI->reg, true, SS, RC);
+ SpillMI = prior(SpillPt);
+ LIs->InsertMachineInstrInMaps(SpillMI, SpillIndex);
+ }
+ } else if (!IsAvailableInStack(DefMBB, CurrLI->reg, ValNo->def,
+ RestoreIndex, SpillIndex, SS)) {
+ // If it's already split, just restore the value. There is no need to spill
+ // the def again.
+ if (!DefMI)
+ return false; // Def is dead. Do nothing.
+
+ if ((SpillMI = FoldSpill(LI->reg, RC, DefMI, Barrier,
+ BarrierMBB, SS, RefsInMBB))) {
+ SpillIndex = LIs->getInstructionIndex(SpillMI);
+ } else {
+ // Check if it's possible to insert a spill after the def MI.
+ MachineBasicBlock::iterator SpillPt;
+ if (DefMBB == BarrierMBB) {
+ // Add spill after the def and the last use before the barrier.
+ SpillPt = findSpillPoint(BarrierMBB, Barrier, DefMI,
+ RefsInMBB, SpillIndex);
+ if (SpillPt == DefMBB->begin())
+ return false; // No gap to insert spill.
+ } else {
+ SpillPt = findNextEmptySlot(DefMBB, DefMI, SpillIndex);
+ if (SpillPt == DefMBB->end())
+ return false; // No gap to insert spill.
+ }
+ // Add spill. The store instruction kills the register if def is before
+ // the barrier in the barrier block.
+ SS = CreateSpillStackSlot(CurrLI->reg, RC);
+ TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg,
+ DefMBB == BarrierMBB, SS, RC);
+ SpillMI = prior(SpillPt);
+ LIs->InsertMachineInstrInMaps(SpillMI, SpillIndex);
+ }
+ }
+
+ // Remember def instruction index to spill index mapping.
+ if (DefMI && SpillMI)
+ Def2SpillMap[ValNo->def] = SpillIndex;
+
+ // Add restore.
+ bool FoldedRestore = false;
+ if (MachineInstr* LMI = FoldRestore(CurrLI->reg, RC, Barrier,
+ BarrierMBB, SS, RefsInMBB)) {
+ RestorePt = LMI;
+ RestoreIndex = LIs->getInstructionIndex(RestorePt);
+ FoldedRestore = true;
+ } else {
+ TII->loadRegFromStackSlot(*BarrierMBB, RestorePt, CurrLI->reg, SS, RC);
+ MachineInstr *LoadMI = prior(RestorePt);
+ LIs->InsertMachineInstrInMaps(LoadMI, RestoreIndex);
+ }
+
+ // Update spill stack slot live interval.
+ UpdateSpillSlotInterval(ValNo, LIs->getUseIndex(SpillIndex)+1,
+ LIs->getDefIndex(RestoreIndex));
+
+ ReconstructLiveInterval(CurrLI);
+
+ if (!FoldedRestore) {
+ unsigned RestoreIdx = LIs->getInstructionIndex(prior(RestorePt));
+ RestoreIdx = LiveIntervals::getDefIndex(RestoreIdx);
+ RenumberValno(CurrLI->findDefinedVNInfo(RestoreIdx));
+ }
+
+ ++NumSplits;
+ return true;
+}
+
+/// SplitRegLiveIntervals - Split all register live intervals that cross the
+/// barrier that's being processed.
+bool
+PreAllocSplitting::SplitRegLiveIntervals(const TargetRegisterClass **RCs,
+ SmallPtrSet<LiveInterval*, 8>& Split) {
+ // First find all the virtual registers whose live intervals are intercepted
+ // by the current barrier.
+ SmallVector<LiveInterval*, 8> Intervals;
+ for (const TargetRegisterClass **RC = RCs; *RC; ++RC) {
+ // FIXME: If it's not safe to move any instruction that defines the barrier
+ // register class, then it means there are some special dependencies which
+ // codegen is not modelling. Ignore these barriers for now.
+ if (!TII->isSafeToMoveRegClassDefs(*RC))
+ continue;
+ std::vector<unsigned> &VRs = MRI->getRegClassVirtRegs(*RC);
+ for (unsigned i = 0, e = VRs.size(); i != e; ++i) {
+ unsigned Reg = VRs[i];
+ if (!LIs->hasInterval(Reg))
+ continue;
+ LiveInterval *LI = &LIs->getInterval(Reg);
+ if (LI->liveAt(BarrierIdx) && !Barrier->readsRegister(Reg))
+ // Virtual register live interval is intercepted by the barrier. We
+ // should split and shrink wrap its interval if possible.
+ Intervals.push_back(LI);
+ }
+ }
+
+ // Process the affected live intervals.
+ bool Change = false;
+ while (!Intervals.empty()) {
+ if (PreSplitLimit != -1 && (int)NumSplits == PreSplitLimit)
+ break;
+ else if (NumSplits == 4)
+ Change |= Change;
+ LiveInterval *LI = Intervals.back();
+ Intervals.pop_back();
+ bool result = SplitRegLiveInterval(LI);
+ if (result) Split.insert(LI);
+ Change |= result;
+ }
+
+ return Change;
+}
+
+unsigned PreAllocSplitting::getNumberOfNonSpills(
+ SmallPtrSet<MachineInstr*, 4>& MIs,
+ unsigned Reg, int FrameIndex,
+ bool& FeedsTwoAddr) {
+ unsigned NonSpills = 0;
+ for (SmallPtrSet<MachineInstr*, 4>::iterator UI = MIs.begin(), UE = MIs.end();
+ UI != UE; ++UI) {
+ int StoreFrameIndex;
+ unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex);
+ if (StoreVReg != Reg || StoreFrameIndex != FrameIndex)
+ NonSpills++;
+
+ int DefIdx = (*UI)->findRegisterDefOperandIdx(Reg);
+ if (DefIdx != -1 && (*UI)->isRegTiedToUseOperand(DefIdx))
+ FeedsTwoAddr = true;
+ }
+
+ return NonSpills;
+}
+
+/// removeDeadSpills - After doing splitting, filter through all intervals we've
+/// split, and see if any of the spills are unnecessary. If so, remove them.
+bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
+ bool changed = false;
+
+ // Walk over all of the live intervals that were touched by the splitter,
+ // and see if we can do any DCE and/or folding.
+ for (SmallPtrSet<LiveInterval*, 8>::iterator LI = split.begin(),
+ LE = split.end(); LI != LE; ++LI) {
+ DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> > VNUseCount;
+
+ // First, collect all the uses of the vreg, and sort them by their
+ // reaching definition (VNInfo).
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin((*LI)->reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ unsigned index = LIs->getInstructionIndex(&*UI);
+ index = LiveIntervals::getUseIndex(index);
+
+ const LiveRange* LR = (*LI)->getLiveRangeContaining(index);
+ VNUseCount[LR->valno].insert(&*UI);
+ }
+
+ // Now, take the definitions (VNInfo's) one at a time and try to DCE
+ // and/or fold them away.
+ for (LiveInterval::vni_iterator VI = (*LI)->vni_begin(),
+ VE = (*LI)->vni_end(); VI != VE; ++VI) {
+
+ if (DeadSplitLimit != -1 && (int)NumDeadSpills == DeadSplitLimit)
+ return changed;
+
+ VNInfo* CurrVN = *VI;
+
+ // We don't currently try to handle definitions with PHI kills, because
+ // it would involve processing more than one VNInfo at once.
+ if (CurrVN->hasPHIKill) continue;
+
+ // We also don't try to handle the results of PHI joins, since there's
+ // no defining instruction to analyze.
+ unsigned DefIdx = CurrVN->def;
+ if (DefIdx == ~0U || DefIdx == ~1U) continue;
+
+ // We're only interested in eliminating cruft introduced by the splitter,
+ // is of the form load-use or load-use-store. First, check that the
+ // definition is a load, and remember what stack slot we loaded it from.
+ MachineInstr* DefMI = LIs->getInstructionFromIndex(DefIdx);
+ int FrameIndex;
+ if (!TII->isLoadFromStackSlot(DefMI, FrameIndex)) continue;
+
+ // If the definition has no uses at all, just DCE it.
+ if (VNUseCount[CurrVN].size() == 0) {
+ LIs->RemoveMachineInstrFromMaps(DefMI);
+ (*LI)->removeValNo(CurrVN);
+ DefMI->eraseFromParent();
+ VNUseCount.erase(CurrVN);
+ NumDeadSpills++;
+ changed = true;
+ continue;
+ }
+
+ // Second, get the number of non-store uses of the definition, as well as
+ // a flag indicating whether it feeds into a later two-address definition.
+ bool FeedsTwoAddr = false;
+ unsigned NonSpillCount = getNumberOfNonSpills(VNUseCount[CurrVN],
+ (*LI)->reg, FrameIndex,
+ FeedsTwoAddr);
+
+ // If there's one non-store use and it doesn't feed a two-addr, then
+ // this is a load-use-store case that we can try to fold.
+ if (NonSpillCount == 1 && !FeedsTwoAddr) {
+ // Start by finding the non-store use MachineInstr.
+ SmallPtrSet<MachineInstr*, 4>::iterator UI = VNUseCount[CurrVN].begin();
+ int StoreFrameIndex;
+ unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex);
+ while (UI != VNUseCount[CurrVN].end() &&
+ (StoreVReg == (*LI)->reg && StoreFrameIndex == FrameIndex)) {
+ ++UI;
+ if (UI != VNUseCount[CurrVN].end())
+ StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex);
+ }
+ if (UI == VNUseCount[CurrVN].end()) continue;
+
+ MachineInstr* use = *UI;
+
+ // Attempt to fold it away!
+ int OpIdx = use->findRegisterUseOperandIdx((*LI)->reg, false);
+ if (OpIdx == -1) continue;
+ SmallVector<unsigned, 1> Ops;
+ Ops.push_back(OpIdx);
+ if (!TII->canFoldMemoryOperand(use, Ops)) continue;
+
+ MachineInstr* NewMI =
+ TII->foldMemoryOperand(*use->getParent()->getParent(),
+ use, Ops, FrameIndex);
+
+ if (!NewMI) continue;
+
+ // Update relevant analyses.
+ LIs->RemoveMachineInstrFromMaps(DefMI);
+ LIs->ReplaceMachineInstrInMaps(use, NewMI);
+ (*LI)->removeValNo(CurrVN);
+
+ DefMI->eraseFromParent();
+ MachineBasicBlock* MBB = use->getParent();
+ NewMI = MBB->insert(MBB->erase(use), NewMI);
+ VNUseCount[CurrVN].erase(use);
+
+ // Remove deleted instructions. Note that we need to remove them from
+ // the VNInfo->use map as well, just to be safe.
+ for (SmallPtrSet<MachineInstr*, 4>::iterator II =
+ VNUseCount[CurrVN].begin(), IE = VNUseCount[CurrVN].end();
+ II != IE; ++II) {
+ for (DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> >::iterator
+ VNI = VNUseCount.begin(), VNE = VNUseCount.end(); VNI != VNE;
+ ++VNI)
+ if (VNI->first != CurrVN)
+ VNI->second.erase(*II);
+ LIs->RemoveMachineInstrFromMaps(*II);
+ (*II)->eraseFromParent();
+ }
+
+ VNUseCount.erase(CurrVN);
+
+ for (DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> >::iterator
+ VI = VNUseCount.begin(), VE = VNUseCount.end(); VI != VE; ++VI)
+ if (VI->second.erase(use))
+ VI->second.insert(NewMI);
+
+ NumDeadSpills++;
+ changed = true;
+ continue;
+ }
+
+ // If there's more than one non-store instruction, we can't profitably
+ // fold it, so bail.
+ if (NonSpillCount) continue;
+
+ // Otherwise, this is a load-store case, so DCE them.
+ for (SmallPtrSet<MachineInstr*, 4>::iterator UI =
+ VNUseCount[CurrVN].begin(), UE = VNUseCount[CurrVN].end();
+ UI != UI; ++UI) {
+ LIs->RemoveMachineInstrFromMaps(*UI);
+ (*UI)->eraseFromParent();
+ }
+
+ VNUseCount.erase(CurrVN);
+
+ LIs->RemoveMachineInstrFromMaps(DefMI);
+ (*LI)->removeValNo(CurrVN);
+ DefMI->eraseFromParent();
+ NumDeadSpills++;
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+bool PreAllocSplitting::createsNewJoin(LiveRange* LR,
+ MachineBasicBlock* DefMBB,
+ MachineBasicBlock* BarrierMBB) {
+ if (DefMBB == BarrierMBB)
+ return false;
+
+ if (LR->valno->hasPHIKill)
+ return false;
+
+ unsigned MBBEnd = LIs->getMBBEndIdx(BarrierMBB);
+ if (LR->end < MBBEnd)
+ return false;
+
+ MachineLoopInfo& MLI = getAnalysis<MachineLoopInfo>();
+ if (MLI.getLoopFor(DefMBB) != MLI.getLoopFor(BarrierMBB))
+ return true;
+
+ MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
+ SmallPtrSet<MachineBasicBlock*, 4> Visited;
+ typedef std::pair<MachineBasicBlock*,
+ MachineBasicBlock::succ_iterator> ItPair;
+ SmallVector<ItPair, 4> Stack;
+ Stack.push_back(std::make_pair(BarrierMBB, BarrierMBB->succ_begin()));
+
+ while (!Stack.empty()) {
+ ItPair P = Stack.back();
+ Stack.pop_back();
+
+ MachineBasicBlock* PredMBB = P.first;
+ MachineBasicBlock::succ_iterator S = P.second;
+
+ if (S == PredMBB->succ_end())
+ continue;
+ else if (Visited.count(*S)) {
+ Stack.push_back(std::make_pair(PredMBB, ++S));
+ continue;
+ } else
+ Stack.push_back(std::make_pair(PredMBB, S+1));
+
+ MachineBasicBlock* MBB = *S;
+ Visited.insert(MBB);
+
+ if (MBB == BarrierMBB)
+ return true;
+
+ MachineDomTreeNode* DefMDTN = MDT.getNode(DefMBB);
+ MachineDomTreeNode* BarrierMDTN = MDT.getNode(BarrierMBB);
+ MachineDomTreeNode* MDTN = MDT.getNode(MBB)->getIDom();
+ while (MDTN) {
+ if (MDTN == DefMDTN)
+ return true;
+ else if (MDTN == BarrierMDTN)
+ break;
+ MDTN = MDTN->getIDom();
+ }
+
+ MBBEnd = LIs->getMBBEndIdx(MBB);
+ if (LR->end > MBBEnd)
+ Stack.push_back(std::make_pair(MBB, MBB->succ_begin()));
+ }
+
+ return false;
+}
+
+
+bool PreAllocSplitting::runOnMachineFunction(MachineFunction &MF) {
+ CurrMF = &MF;
+ TM = &MF.getTarget();
+ TRI = TM->getRegisterInfo();
+ TII = TM->getInstrInfo();
+ MFI = MF.getFrameInfo();
+ MRI = &MF.getRegInfo();
+ LIs = &getAnalysis<LiveIntervals>();
+ LSs = &getAnalysis<LiveStacks>();
+ VRM = &getAnalysis<VirtRegMap>();
+
+ bool MadeChange = false;
+
+ // Make sure blocks are numbered in order.
+ MF.RenumberBlocks();
+
+ MachineBasicBlock *Entry = MF.begin();
+ SmallPtrSet<MachineBasicBlock*,16> Visited;
+
+ SmallPtrSet<LiveInterval*, 8> Split;
+
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
+ DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+ DFI != E; ++DFI) {
+ BarrierMBB = *DFI;
+ for (MachineBasicBlock::iterator I = BarrierMBB->begin(),
+ E = BarrierMBB->end(); I != E; ++I) {
+ Barrier = &*I;
+ const TargetRegisterClass **BarrierRCs =
+ Barrier->getDesc().getRegClassBarriers();
+ if (!BarrierRCs)
+ continue;
+ BarrierIdx = LIs->getInstructionIndex(Barrier);
+ MadeChange |= SplitRegLiveIntervals(BarrierRCs, Split);
+ }
+ }
+
+ MadeChange |= removeDeadSpills(Split);
+
+ return MadeChange;
+}
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
new file mode 100644
index 0000000..9e7ad67
--- /dev/null
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -0,0 +1,679 @@
+//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is responsible for finalizing the functions frame layout, saving
+// callee saved registers, and for emitting prolog & epilog code for the
+// function.
+//
+// This pass must be run after register allocation. After this pass is
+// executed, it is illegal to construct MO_FrameIndex operands.
+//
+// This pass provides an optional shrink wrapping variant of prolog/epilog
+// insertion, enabled via --shrink-wrap. See ShrinkWrapping.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PrologEpilogInserter.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/STLExtras.h"
+#include <climits>
+
+using namespace llvm;
+
+char PEI::ID = 0;
+
+static RegisterPass<PEI>
+X("prologepilog", "Prologue/Epilogue Insertion");
+
+/// createPrologEpilogCodeInserter - This function returns a pass that inserts
+/// prolog and epilog code, and eliminates abstract frame references.
+///
+FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
+
+/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+/// frame indexes with appropriate references.
+///
+bool PEI::runOnMachineFunction(MachineFunction &Fn) {
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
+
+ // Get MachineModuleInfo so that we can track the construction of the
+ // frame.
+ if (MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>())
+ Fn.getFrameInfo()->setMachineModuleInfo(MMI);
+
+ // Allow the target machine to make some adjustments to the function
+ // e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
+ TRI->processFunctionBeforeCalleeSavedScan(Fn, RS);
+
+ // Scan the function for modified callee saved registers and insert spill
+ // code for any callee saved registers that are modified. Also calculate
+ // the MaxCallFrameSize and HasCalls variables for the function's frame
+ // information and eliminates call frame pseudo instructions.
+ calculateCalleeSavedRegisters(Fn);
+
+ // Determine placement of CSR spill/restore code:
+ // - with shrink wrapping, place spills and restores to tightly
+ // enclose regions in the Machine CFG of the function where
+ // they are used. Without shrink wrapping
+ // - default (no shrink wrapping), place all spills in the
+ // entry block, all restores in return blocks.
+ placeCSRSpillsAndRestores(Fn);
+
+ // Add the code to save and restore the callee saved registers
+ insertCSRSpillsAndRestores(Fn);
+
+ // Allow the target machine to make final modifications to the function
+ // before the frame layout is finalized.
+ TRI->processFunctionBeforeFrameFinalized(Fn);
+
+ // Calculate actual frame offsets for all abstract stack objects...
+ calculateFrameObjectOffsets(Fn);
+
+ // Add prolog and epilog code to the function. This function is required
+ // to align the stack frame as necessary for any stack variables or
+ // called functions. Because of this, calculateCalleeSavedRegisters
+ // must be called before this function in order to set the HasCalls
+ // and MaxCallFrameSize variables.
+ insertPrologEpilogCode(Fn);
+
+ // Replace all MO_FrameIndex operands with physical register references
+ // and actual offsets.
+ //
+ replaceFrameIndices(Fn);
+
+ delete RS;
+ clearAllSets();
+ return true;
+}
+
+#if 0
+void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ if (ShrinkWrapping || ShrinkWrapFunc != "") {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ }
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+#endif
+
+/// calculateCalleeSavedRegisters - Scan the function for modified callee saved
+/// registers. Also calculate the MaxCallFrameSize and HasCalls variables for
+/// the function's frame information and eliminates call frame pseudo
+/// instructions.
+///
+void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
+ const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+ const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo();
+
+ // Get the callee saved register list...
+ const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
+
+ // Get the function call frame set-up and tear-down instruction opcode
+ int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode();
+ int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode();
+
+ // These are used to keep track the callee-save area. Initialize them.
+ MinCSFrameIndex = INT_MAX;
+ MaxCSFrameIndex = 0;
+
+ // Early exit for targets which have no callee saved registers and no call
+ // frame setup/destroy pseudo instructions.
+ if ((CSRegs == 0 || CSRegs[0] == 0) &&
+ FrameSetupOpcode == -1 && FrameDestroyOpcode == -1)
+ return;
+
+ unsigned MaxCallFrameSize = 0;
+ bool HasCalls = false;
+
+ std::vector<MachineBasicBlock::iterator> FrameSDOps;
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
+ if (I->getOpcode() == FrameSetupOpcode ||
+ I->getOpcode() == FrameDestroyOpcode) {
+ assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo"
+ " instructions should have a single immediate argument!");
+ unsigned Size = I->getOperand(0).getImm();
+ if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
+ HasCalls = true;
+ FrameSDOps.push_back(I);
+ }
+
+ MachineFrameInfo *FFI = Fn.getFrameInfo();
+ FFI->setHasCalls(HasCalls);
+ FFI->setMaxCallFrameSize(MaxCallFrameSize);
+
+ for (unsigned i = 0, e = FrameSDOps.size(); i != e; ++i) {
+ MachineBasicBlock::iterator I = FrameSDOps[i];
+ // If call frames are not being included as part of the stack frame,
+ // and there is no dynamic allocation (therefore referencing frame slots
+ // off sp), leave the pseudo ops alone. We'll eliminate them later.
+ if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn))
+ RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
+ }
+
+ // Now figure out which *callee saved* registers are modified by the current
+ // function, thus needing to be saved and restored in the prolog/epilog.
+ //
+ const TargetRegisterClass* const *CSRegClasses =
+ RegInfo->getCalleeSavedRegClasses(&Fn);
+ std::vector<CalleeSavedInfo> CSI;
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
+ // If the reg is modified, save it!
+ CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
+ } else {
+ for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg);
+ *AliasSet; ++AliasSet) { // Check alias registers too.
+ if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) {
+ CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
+ break;
+ }
+ }
+ }
+ }
+
+ if (CSI.empty())
+ return; // Early exit if no callee saved registers are modified!
+
+ unsigned NumFixedSpillSlots;
+ const std::pair<unsigned,int> *FixedSpillSlots =
+ TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
+
+ // Now that we know which registers need to be saved and restored, allocate
+ // stack slots for them.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = CSI[i].getRegClass();
+
+ // Check to see if this physreg must be spilled to a particular stack slot
+ // on this target.
+ const std::pair<unsigned,int> *FixedSlot = FixedSpillSlots;
+ while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
+ FixedSlot->first != Reg)
+ ++FixedSlot;
+
+ int FrameIdx;
+ if (FixedSlot == FixedSpillSlots+NumFixedSpillSlots) {
+ // Nope, just spill it anywhere convenient.
+ unsigned Align = RC->getAlignment();
+ unsigned StackAlign = TFI->getStackAlignment();
+ // We may not be able to sastify the desired alignment specification of
+ // the TargetRegisterClass if the stack alignment is smaller.
+ // Use the min.
+ Align = std::min(Align, StackAlign);
+ FrameIdx = FFI->CreateStackObject(RC->getSize(), Align);
+ if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
+ if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
+ } else {
+ // Spill it to the stack where we must.
+ FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->second);
+ }
+ CSI[i].setFrameIdx(FrameIdx);
+ }
+
+ FFI->setCalleeSavedInfo(CSI);
+}
+
+/// insertCSRSpillsAndRestores - Insert spill and restore code for
+/// callee saved registers used in the function, handling shrink wrapping.
+///
+void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
+ // Get callee saved register information.
+ MachineFrameInfo *FFI = Fn.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
+
+ // Early exit if no callee saved registers are modified!
+ if (CSI.empty())
+ return;
+
+ const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+ MachineBasicBlock::iterator I;
+
+ if (! ShrinkWrapThisFunction) {
+ // Spill using target interface.
+ I = EntryBlock->begin();
+ if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ // Add the callee-saved register as live-in.
+ // It's killed at the spill.
+ EntryBlock->addLiveIn(CSI[i].getReg());
+
+ // Insert the spill to the stack frame.
+ TII.storeRegToStackSlot(*EntryBlock, I, CSI[i].getReg(), true,
+ CSI[i].getFrameIdx(), CSI[i].getRegClass());
+ }
+ }
+
+ // Restore using target interface.
+ for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) {
+ MachineBasicBlock* MBB = ReturnBlocks[ri];
+ I = MBB->end(); --I;
+
+ // Skip over all terminator instructions, which are part of the return
+ // sequence.
+ MachineBasicBlock::iterator I2 = I;
+ while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+ I = I2;
+
+ bool AtStart = I == MBB->begin();
+ MachineBasicBlock::iterator BeforeI = I;
+ if (!AtStart)
+ --BeforeI;
+
+ // Restore all registers immediately before the return and any
+ // terminators that preceed it.
+ if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(),
+ CSI[i].getFrameIdx(),
+ CSI[i].getRegClass());
+ assert(I != MBB->begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+ if (AtStart)
+ I = MBB->begin();
+ else {
+ I = BeforeI;
+ ++I;
+ }
+ }
+ }
+ }
+ return;
+ }
+
+ // Insert spills.
+ std::vector<CalleeSavedInfo> blockCSI;
+ for (CSRegBlockMap::iterator BI = CSRSave.begin(),
+ BE = CSRSave.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet save = BI->second;
+
+ if (save.empty())
+ continue;
+
+ blockCSI.clear();
+ for (CSRegSet::iterator RI = save.begin(),
+ RE = save.end(); RI != RE; ++RI) {
+ blockCSI.push_back(CSI[*RI]);
+ }
+ assert(blockCSI.size() > 0 &&
+ "Could not collect callee saved register info");
+
+ I = MBB->begin();
+
+ // When shrink wrapping, use stack slot stores/loads.
+ for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
+ // Add the callee-saved register as live-in.
+ // It's killed at the spill.
+ MBB->addLiveIn(blockCSI[i].getReg());
+
+ // Insert the spill to the stack frame.
+ TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(),
+ true,
+ blockCSI[i].getFrameIdx(),
+ blockCSI[i].getRegClass());
+ }
+ }
+
+ for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
+ BE = CSRRestore.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet restore = BI->second;
+
+ if (restore.empty())
+ continue;
+
+ blockCSI.clear();
+ for (CSRegSet::iterator RI = restore.begin(),
+ RE = restore.end(); RI != RE; ++RI) {
+ blockCSI.push_back(CSI[*RI]);
+ }
+ assert(blockCSI.size() > 0 &&
+ "Could not find callee saved register info");
+
+ // If MBB is empty and needs restores, insert at the _beginning_.
+ if (MBB->empty()) {
+ I = MBB->begin();
+ } else {
+ I = MBB->end();
+ --I;
+
+ // Skip over all terminator instructions, which are part of the
+ // return sequence.
+ if (! I->getDesc().isTerminator()) {
+ ++I;
+ } else {
+ MachineBasicBlock::iterator I2 = I;
+ while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+ I = I2;
+ }
+ }
+
+ bool AtStart = I == MBB->begin();
+ MachineBasicBlock::iterator BeforeI = I;
+ if (!AtStart)
+ --BeforeI;
+
+ // Restore all registers immediately before the return and any
+ // terminators that preceed it.
+ for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
+ TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(),
+ blockCSI[i].getFrameIdx(),
+ blockCSI[i].getRegClass());
+ assert(I != MBB->begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+ if (AtStart)
+ I = MBB->begin();
+ else {
+ I = BeforeI;
+ ++I;
+ }
+ }
+ }
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+static inline void
+AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx,
+ bool StackGrowsDown, int64_t &Offset,
+ unsigned &MaxAlign) {
+ // If stack grows down, we need to add size of find the lowest address of the
+ // object.
+ if (StackGrowsDown)
+ Offset += FFI->getObjectSize(FrameIdx);
+
+ unsigned Align = FFI->getObjectAlignment(FrameIdx);
+
+ // If the alignment of this object is greater than that of the stack, then
+ // increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+
+ // Adjust to alignment boundary.
+ Offset = (Offset + Align - 1) / Align * Align;
+
+ if (StackGrowsDown) {
+ FFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
+ } else {
+ FFI->setObjectOffset(FrameIdx, Offset);
+ Offset += FFI->getObjectSize(FrameIdx);
+ }
+}
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+///
+void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
+ const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
+
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+
+ // Loop over all of the stack objects, assigning sequential addresses...
+ MachineFrameInfo *FFI = Fn.getFrameInfo();
+
+ unsigned MaxAlign = FFI->getMaxAlignment();
+
+ // Start at the beginning of the local area.
+ // The Offset is the distance from the stack top in the direction
+ // of stack growth -- so it's always nonnegative.
+ int64_t Offset = TFI.getOffsetOfLocalArea();
+ if (StackGrowsDown)
+ Offset = -Offset;
+ assert(Offset >= 0
+ && "Local area offset should be in direction of stack growth");
+
+ // If there are fixed sized objects that are preallocated in the local area,
+ // non-fixed objects can't be allocated right at the start of local area.
+ // We currently don't support filling in holes in between fixed sized
+ // objects, so we adjust 'Offset' to point to the end of last fixed sized
+ // preallocated object.
+ for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
+ int64_t FixedOff;
+ if (StackGrowsDown) {
+ // The maximum distance from the stack pointer is at lower address of
+ // the object -- which is given by offset. For down growing stack
+ // the offset is negative, so we negate the offset to get the distance.
+ FixedOff = -FFI->getObjectOffset(i);
+ } else {
+ // The maximum distance from the start pointer is at the upper
+ // address of the object.
+ FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i);
+ }
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+
+ // First assign frame offsets to stack objects that are used to spill
+ // callee saved registers.
+ if (StackGrowsDown) {
+ for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
+ // If stack grows down, we need to add size of find the lowest
+ // address of the object.
+ Offset += FFI->getObjectSize(i);
+
+ unsigned Align = FFI->getObjectAlignment(i);
+ // If the alignment of this object is greater than that of the stack,
+ // then increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ FFI->setObjectOffset(i, -Offset); // Set the computed offset
+ }
+ } else {
+ int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
+ for (int i = MaxCSFI; i >= MinCSFI ; --i) {
+ unsigned Align = FFI->getObjectAlignment(i);
+ // If the alignment of this object is greater than that of the stack,
+ // then increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ FFI->setObjectOffset(i, Offset);
+ Offset += FFI->getObjectSize(i);
+ }
+ }
+
+ // Make sure the special register scavenging spill slot is closest to the
+ // frame pointer if a frame pointer is required.
+ const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+ if (RS && RegInfo->hasFP(Fn)) {
+ int SFI = RS->getScavengingFrameIndex();
+ if (SFI >= 0)
+ AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
+ }
+
+ // Make sure that the stack protector comes before the local variables on the
+ // stack.
+ if (FFI->getStackProtectorIndex() >= 0)
+ AdjustStackOffset(FFI, FFI->getStackProtectorIndex(), StackGrowsDown,
+ Offset, MaxAlign);
+
+ // Then assign frame offsets to stack objects that are not used to spill
+ // callee saved registers.
+ for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
+ if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+ continue;
+ if (RS && (int)i == RS->getScavengingFrameIndex())
+ continue;
+ if (FFI->isDeadObjectIndex(i))
+ continue;
+ if (FFI->getStackProtectorIndex() == (int)i)
+ continue;
+
+ AdjustStackOffset(FFI, i, StackGrowsDown, Offset, MaxAlign);
+ }
+
+ // Make sure the special register scavenging spill slot is closest to the
+ // stack pointer.
+ if (RS && !RegInfo->hasFP(Fn)) {
+ int SFI = RS->getScavengingFrameIndex();
+ if (SFI >= 0)
+ AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
+ }
+
+ // Round up the size to a multiple of the alignment, but only if there are
+ // calls or alloca's in the function. This ensures that any calls to
+ // subroutines have their stack frames suitable aligned.
+ // Also do this if we need runtime alignment of the stack. In this case
+ // offsets will be relative to SP not FP; round up the stack size so this
+ // works.
+ if (!RegInfo->targetHandlesStackFrameRounding() &&
+ (FFI->hasCalls() || FFI->hasVarSizedObjects() ||
+ (RegInfo->needsStackRealignment(Fn) &&
+ FFI->getObjectIndexEnd() != 0))) {
+ // If we have reserved argument space for call sites in the function
+ // immediately on entry to the current function, count it as part of the
+ // overall stack size.
+ if (RegInfo->hasReservedCallFrame(Fn))
+ Offset += FFI->getMaxCallFrameSize();
+
+ unsigned AlignMask = std::max(TFI.getStackAlignment(),MaxAlign) - 1;
+ Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+ }
+
+ // Update frame info to pretend that this is part of the stack...
+ FFI->setStackSize(Offset+TFI.getOffsetOfLocalArea());
+
+ // Remember the required stack alignment in case targets need it to perform
+ // dynamic stack alignment.
+ FFI->setMaxAlignment(MaxAlign);
+}
+
+
+/// insertPrologEpilogCode - Scan the function for modified callee saved
+/// registers, insert spill code for these callee saved registers, then add
+/// prolog and epilog code to the function.
+///
+void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+
+ // Add prologue to the function...
+ TRI->emitPrologue(Fn);
+
+ // Add epilogue to restore the callee-save registers in each exiting block
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
+ // If last instruction is a return instruction, add an epilogue
+ if (!I->empty() && I->back().getDesc().isReturn())
+ TRI->emitEpilogue(Fn, *I);
+ }
+}
+
+
+/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
+/// register references and actual offsets.
+///
+void PEI::replaceFrameIndices(MachineFunction &Fn) {
+ if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do?
+
+ const TargetMachine &TM = Fn.getTarget();
+ assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
+ const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+ const TargetFrameInfo *TFI = TM.getFrameInfo();
+ bool StackGrowsDown =
+ TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+ int FrameSetupOpcode = TRI.getCallFrameSetupOpcode();
+ int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode();
+
+ for (MachineFunction::iterator BB = Fn.begin(),
+ E = Fn.end(); BB != E; ++BB) {
+ int SPAdj = 0; // SP offset due to call frame setup / destroy.
+ if (RS) RS->enterBasicBlock(BB);
+
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+ if (I->getOpcode() == TargetInstrInfo::DECLARE) {
+ // Ignore it.
+ ++I;
+ continue;
+ }
+
+ if (I->getOpcode() == FrameSetupOpcode ||
+ I->getOpcode() == FrameDestroyOpcode) {
+ // Remember how much SP has been adjusted to create the call
+ // frame.
+ int Size = I->getOperand(0).getImm();
+
+ if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) ||
+ (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode))
+ Size = -Size;
+
+ SPAdj += Size;
+
+ MachineBasicBlock::iterator PrevI = BB->end();
+ if (I != BB->begin()) PrevI = prior(I);
+ TRI.eliminateCallFramePseudoInstr(Fn, *BB, I);
+
+ // Visit the instructions created by eliminateCallFramePseudoInstr().
+ if (PrevI == BB->end())
+ I = BB->begin(); // The replaced instr was the first in the block.
+ else
+ I = next(PrevI);
+ continue;
+ }
+
+ MachineInstr *MI = I;
+ bool DoIncr = true;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
+ if (MI->getOperand(i).isFI()) {
+ // Some instructions (e.g. inline asm instructions) can have
+ // multiple frame indices and/or cause eliminateFrameIndex
+ // to insert more than one instruction. We need the register
+ // scavenger to go through all of these instructions so that
+ // it can update its register information. We keep the
+ // iterator at the point before insertion so that we can
+ // revisit them in full.
+ bool AtBeginning = (I == BB->begin());
+ if (!AtBeginning) --I;
+
+ // If this instruction has a FrameIndex operand, we need to
+ // use that target machine register info object to eliminate
+ // it.
+
+ TRI.eliminateFrameIndex(MI, SPAdj, RS);
+
+ // Reset the iterator if we were at the beginning of the BB.
+ if (AtBeginning) {
+ I = BB->begin();
+ DoIncr = false;
+ }
+
+ MI = 0;
+ break;
+ }
+
+ if (DoIncr && I != BB->end()) ++I;
+
+ // Update register states.
+ if (RS && MI) RS->forward(MI);
+ }
+
+ assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?");
+ }
+}
+
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
new file mode 100644
index 0000000..c158dd8
--- /dev/null
+++ b/lib/CodeGen/PrologEpilogInserter.h
@@ -0,0 +1,167 @@
+//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -* --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is responsible for finalizing the functions frame layout, saving
+// callee saved registers, and for emitting prolog & epilog code for the
+// function.
+//
+// This pass must be run after register allocation. After this pass is
+// executed, it is illegal to construct MO_FrameIndex operands.
+//
+// This pass also implements a shrink wrapping variant of prolog/epilog
+// insertion.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PEI_H
+#define LLVM_CODEGEN_PEI_H
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+ class RegScavenger;
+ class MachineBasicBlock;
+
+ class PEI : public MachineFunctionPass {
+ public:
+ static char ID;
+ PEI() : MachineFunctionPass(&ID) {}
+
+ const char *getPassName() const {
+ return "Prolog/Epilog Insertion & Frame Finalization";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+ /// frame indexes with appropriate references.
+ ///
+ bool runOnMachineFunction(MachineFunction &Fn);
+
+ private:
+ RegScavenger *RS;
+
+ // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
+ // stack frame indexes.
+ unsigned MinCSFrameIndex, MaxCSFrameIndex;
+
+ // Analysis info for spill/restore placement.
+ // "CSR": "callee saved register".
+
+ // CSRegSet contains indices into the Callee Saved Register Info
+ // vector built by calculateCalleeSavedRegisters() and accessed
+ // via MF.getFrameInfo()->getCalleeSavedInfo().
+ typedef SparseBitVector<> CSRegSet;
+
+ // CSRegBlockMap maps MachineBasicBlocks to sets of callee
+ // saved register indices.
+ typedef DenseMap<MachineBasicBlock*, CSRegSet> CSRegBlockMap;
+
+ // Set and maps for computing CSR spill/restore placement:
+ // used in function (UsedCSRegs)
+ // used in a basic block (CSRUsed)
+ // anticipatable in a basic block (Antic{In,Out})
+ // available in a basic block (Avail{In,Out})
+ // to be spilled at the entry to a basic block (CSRSave)
+ // to be restored at the end of a basic block (CSRRestore)
+ CSRegSet UsedCSRegs;
+ CSRegBlockMap CSRUsed;
+ CSRegBlockMap AnticIn, AnticOut;
+ CSRegBlockMap AvailIn, AvailOut;
+ CSRegBlockMap CSRSave;
+ CSRegBlockMap CSRRestore;
+
+ // Entry and return blocks of the current function.
+ MachineBasicBlock* EntryBlock;
+ SmallVector<MachineBasicBlock*, 4> ReturnBlocks;
+
+ // Map of MBBs to top level MachineLoops.
+ DenseMap<MachineBasicBlock*, MachineLoop*> TLLoops;
+
+ // Flag to control shrink wrapping per-function:
+ // may choose to skip shrink wrapping for certain
+ // functions.
+ bool ShrinkWrapThisFunction;
+
+#ifndef NDEBUG
+ // Machine function handle.
+ MachineFunction* MF;
+
+ // Flag indicating that the current function
+ // has at least one "short" path in the machine
+ // CFG from the entry block to an exit block.
+ bool HasFastExitPath;
+#endif
+
+ bool calculateSets(MachineFunction &Fn);
+ bool calcAnticInOut(MachineBasicBlock* MBB);
+ bool calcAvailInOut(MachineBasicBlock* MBB);
+ void calculateAnticAvail(MachineFunction &Fn);
+ bool addUsesForMEMERegion(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4>& blks);
+ bool addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks);
+ bool calcSpillPlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevSpills);
+ bool calcRestorePlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevRestores);
+ void placeSpillsAndRestores(MachineFunction &Fn);
+ void placeCSRSpillsAndRestores(MachineFunction &Fn);
+ void calculateCalleeSavedRegisters(MachineFunction &Fn);
+ void insertCSRSpillsAndRestores(MachineFunction &Fn);
+ void calculateFrameObjectOffsets(MachineFunction &Fn);
+ void replaceFrameIndices(MachineFunction &Fn);
+ void insertPrologEpilogCode(MachineFunction &Fn);
+
+ // Initialize DFA sets, called before iterations.
+ void clearAnticAvailSets();
+ // Clear all sets constructed by shrink wrapping.
+ void clearAllSets();
+
+ // Initialize all shrink wrapping data.
+ void initShrinkWrappingInfo();
+
+ // Convienences for dealing with machine loops.
+ MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP);
+ MachineLoop* getTopLevelLoopParent(MachineLoop *LP);
+
+ // Propgate CSRs used in MBB to all MBBs of loop LP.
+ void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP);
+
+ // Convenience for recognizing return blocks.
+ bool isReturnBlock(MachineBasicBlock* MBB);
+
+#ifndef NDEBUG
+ // Debugging methods.
+
+ // Mark this function as having fast exit paths.
+ void findFastExitPath();
+
+ // Verify placement of spills/restores.
+ void verifySpillRestorePlacement();
+
+ std::string getBasicBlockName(const MachineBasicBlock* MBB);
+ std::string stringifyCSRegSet(const CSRegSet& s);
+ void dumpSet(const CSRegSet& s);
+ void dumpUsed(MachineBasicBlock* MBB);
+ void dumpAllUsed();
+ void dumpSets(MachineBasicBlock* MBB);
+ void dumpSets1(MachineBasicBlock* MBB);
+ void dumpAllSets();
+ void dumpSRSets();
+#endif
+
+ };
+} // End llvm namespace
+#endif
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
new file mode 100644
index 0000000..b4c20e6
--- /dev/null
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -0,0 +1,92 @@
+//===-- llvm/CodeGen/PseudoSourceValue.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PseudoSourceValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+using namespace llvm;
+
+static ManagedStatic<PseudoSourceValue[4]> PSVs;
+
+const PseudoSourceValue *PseudoSourceValue::getStack()
+{ return &(*PSVs)[0]; }
+const PseudoSourceValue *PseudoSourceValue::getGOT()
+{ return &(*PSVs)[1]; }
+const PseudoSourceValue *PseudoSourceValue::getJumpTable()
+{ return &(*PSVs)[2]; }
+const PseudoSourceValue *PseudoSourceValue::getConstantPool()
+{ return &(*PSVs)[3]; }
+
+static const char *const PSVNames[] = {
+ "Stack",
+ "GOT",
+ "JumpTable",
+ "ConstantPool"
+};
+
+PseudoSourceValue::PseudoSourceValue() :
+ Value(PointerType::getUnqual(Type::Int8Ty), PseudoSourceValueVal) {}
+
+void PseudoSourceValue::dump() const {
+ print(errs()); errs() << '\n';
+}
+
+void PseudoSourceValue::print(raw_ostream &OS) const {
+ OS << PSVNames[this - *PSVs];
+}
+
+namespace {
+ /// FixedStackPseudoSourceValue - A specialized PseudoSourceValue
+ /// for holding FixedStack values, which must include a frame
+ /// index.
+ class VISIBILITY_HIDDEN FixedStackPseudoSourceValue
+ : public PseudoSourceValue {
+ const int FI;
+ public:
+ explicit FixedStackPseudoSourceValue(int fi) : FI(fi) {}
+
+ virtual bool isConstant(const MachineFrameInfo *MFI) const;
+
+ virtual void print(raw_ostream &OS) const {
+ OS << "FixedStack" << FI;
+ }
+ };
+}
+
+static ManagedStatic<std::map<int, const PseudoSourceValue *> > FSValues;
+
+const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) {
+ const PseudoSourceValue *&V = (*FSValues)[FI];
+ if (!V)
+ V = new FixedStackPseudoSourceValue(FI);
+ return V;
+}
+
+bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const {
+ if (this == getStack())
+ return false;
+ if (this == getGOT() ||
+ this == getConstantPool() ||
+ this == getJumpTable())
+ return true;
+ assert(0 && "Unknown PseudoSourceValue!");
+ return false;
+}
+
+bool FixedStackPseudoSourceValue::isConstant(const MachineFrameInfo *MFI) const{
+ return MFI && MFI->isImmutableObjectIndex(FI);
+}
diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt
new file mode 100644
index 0000000..64374ce
--- /dev/null
+++ b/lib/CodeGen/README.txt
@@ -0,0 +1,208 @@
+//===---------------------------------------------------------------------===//
+
+Common register allocation / spilling problem:
+
+ mul lr, r4, lr
+ str lr, [sp, #+52]
+ ldr lr, [r1, #+32]
+ sxth r3, r3
+ ldr r4, [sp, #+52]
+ mla r4, r3, lr, r4
+
+can be:
+
+ mul lr, r4, lr
+ mov r4, lr
+ str lr, [sp, #+52]
+ ldr lr, [r1, #+32]
+ sxth r3, r3
+ mla r4, r3, lr, r4
+
+and then "merge" mul and mov:
+
+ mul r4, r4, lr
+ str lr, [sp, #+52]
+ ldr lr, [r1, #+32]
+ sxth r3, r3
+ mla r4, r3, lr, r4
+
+It also increase the likelyhood the store may become dead.
+
+//===---------------------------------------------------------------------===//
+
+I think we should have a "hasSideEffects" flag (which is automatically set for
+stuff that "isLoad" "isCall" etc), and the remat pass should eventually be able
+to remat any instruction that has no side effects, if it can handle it and if
+profitable.
+
+For now, I'd suggest having the remat stuff work like this:
+
+1. I need to spill/reload this thing.
+2. Check to see if it has side effects.
+3. Check to see if it is simple enough: e.g. it only has one register
+destination and no register input.
+4. If so, clone the instruction, do the xform, etc.
+
+Advantages of this are:
+
+1. the .td file describes the behavior of the instructions, not the way the
+ algorithm should work.
+2. as remat gets smarter in the future, we shouldn't have to be changing the .td
+ files.
+3. it is easier to explain what the flag means in the .td file, because you
+ don't have to pull in the explanation of how the current remat algo works.
+
+Some potential added complexities:
+
+1. Some instructions have to be glued to it's predecessor or successor. All of
+ the PC relative instructions and condition code setting instruction. We could
+ mark them as hasSideEffects, but that's not quite right. PC relative loads
+ from constantpools can be remat'ed, for example. But it requires more than
+ just cloning the instruction. Some instructions can be remat'ed but it
+ expands to more than one instruction. But allocator will have to make a
+ decision.
+
+4. As stated in 3, not as simple as cloning in some cases. The target will have
+ to decide how to remat it. For example, an ARM 2-piece constant generation
+ instruction is remat'ed as a load from constantpool.
+
+//===---------------------------------------------------------------------===//
+
+bb27 ...
+ ...
+ %reg1037 = ADDri %reg1039, 1
+ %reg1038 = ADDrs %reg1032, %reg1039, %NOREG, 10
+ Successors according to CFG: 0x8b03bf0 (#5)
+
+bb76 (0x8b03bf0, LLVM BB @0x8b032d0, ID#5):
+ Predecessors according to CFG: 0x8b0c5f0 (#3) 0x8b0a7c0 (#4)
+ %reg1039 = PHI %reg1070, mbb<bb76.outer,0x8b0c5f0>, %reg1037, mbb<bb27,0x8b0a7c0>
+
+Note ADDri is not a two-address instruction. However, its result %reg1037 is an
+operand of the PHI node in bb76 and its operand %reg1039 is the result of the
+PHI node. We should treat it as a two-address code and make sure the ADDri is
+scheduled after any node that reads %reg1039.
+
+//===---------------------------------------------------------------------===//
+
+Use local info (i.e. register scavenger) to assign it a free register to allow
+reuse:
+ ldr r3, [sp, #+4]
+ add r3, r3, #3
+ ldr r2, [sp, #+8]
+ add r2, r2, #2
+ ldr r1, [sp, #+4] <==
+ add r1, r1, #1
+ ldr r0, [sp, #+4]
+ add r0, r0, #2
+
+//===---------------------------------------------------------------------===//
+
+LLVM aggressively lift CSE out of loop. Sometimes this can be negative side-
+effects:
+
+R1 = X + 4
+R2 = X + 7
+R3 = X + 15
+
+loop:
+load [i + R1]
+...
+load [i + R2]
+...
+load [i + R3]
+
+Suppose there is high register pressure, R1, R2, R3, can be spilled. We need
+to implement proper re-materialization to handle this:
+
+R1 = X + 4
+R2 = X + 7
+R3 = X + 15
+
+loop:
+R1 = X + 4 @ re-materialized
+load [i + R1]
+...
+R2 = X + 7 @ re-materialized
+load [i + R2]
+...
+R3 = X + 15 @ re-materialized
+load [i + R3]
+
+Furthermore, with re-association, we can enable sharing:
+
+R1 = X + 4
+R2 = X + 7
+R3 = X + 15
+
+loop:
+T = i + X
+load [T + 4]
+...
+load [T + 7]
+...
+load [T + 15]
+//===---------------------------------------------------------------------===//
+
+It's not always a good idea to choose rematerialization over spilling. If all
+the load / store instructions would be folded then spilling is cheaper because
+it won't require new live intervals / registers. See 2003-05-31-LongShifts for
+an example.
+
+//===---------------------------------------------------------------------===//
+
+With a copying garbage collector, derived pointers must not be retained across
+collector safe points; the collector could move the objects and invalidate the
+derived pointer. This is bad enough in the first place, but safe points can
+crop up unpredictably. Consider:
+
+ %array = load { i32, [0 x %obj] }** %array_addr
+ %nth_el = getelementptr { i32, [0 x %obj] }* %array, i32 0, i32 %n
+ %old = load %obj** %nth_el
+ %z = div i64 %x, %y
+ store %obj* %new, %obj** %nth_el
+
+If the i64 division is lowered to a libcall, then a safe point will (must)
+appear for the call site. If a collection occurs, %array and %nth_el no longer
+point into the correct object.
+
+The fix for this is to copy address calculations so that dependent pointers
+are never live across safe point boundaries. But the loads cannot be copied
+like this if there was an intervening store, so may be hard to get right.
+
+Only a concurrent mutator can trigger a collection at the libcall safe point.
+So single-threaded programs do not have this requirement, even with a copying
+collector. Still, LLVM optimizations would probably undo a front-end's careful
+work.
+
+//===---------------------------------------------------------------------===//
+
+The ocaml frametable structure supports liveness information. It would be good
+to support it.
+
+//===---------------------------------------------------------------------===//
+
+The FIXME in ComputeCommonTailLength in BranchFolding.cpp needs to be
+revisited. The check is there to work around a misuse of directives in inline
+assembly.
+
+//===---------------------------------------------------------------------===//
+
+It would be good to detect collector/target compatibility instead of silently
+doing the wrong thing.
+
+//===---------------------------------------------------------------------===//
+
+It would be really nice to be able to write patterns in .td files for copies,
+which would eliminate a bunch of explicit predicates on them (e.g. no side
+effects). Once this is in place, it would be even better to have tblgen
+synthesize the various copy insertion/inspection methods in TargetInstrInfo.
+
+//===---------------------------------------------------------------------===//
+
+Stack coloring improvments:
+
+1. Do proper LiveStackAnalysis on all stack objects including those which are
+ not spill slots.
+2. Reorder objects to fill in gaps between objects.
+ e.g. 4, 1, <gap>, 4, 1, 1, 1, <gap>, 4 => 4, 1, 1, 1, 1, 4, 4
diff --git a/lib/CodeGen/RegAllocBigBlock.cpp b/lib/CodeGen/RegAllocBigBlock.cpp
new file mode 100644
index 0000000..91e4099
--- /dev/null
+++ b/lib/CodeGen/RegAllocBigBlock.cpp
@@ -0,0 +1,892 @@
+//===- RegAllocBigBlock.cpp - A register allocator for large basic blocks -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RABigBlock class
+//
+//===----------------------------------------------------------------------===//
+
+// This register allocator is derived from RegAllocLocal.cpp. Like it, this
+// allocator works on one basic block at a time, oblivious to others.
+// However, the algorithm used here is suited for long blocks of
+// instructions - registers are spilled by greedily choosing those holding
+// values that will not be needed for the longest amount of time. This works
+// particularly well for blocks with 10 or more times as many instructions
+// as machine registers, but can be used for general code.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: - automagically invoke linearscan for (groups of) small BBs?
+// - break ties when picking regs? (probably not worth it in a
+// JIT context)
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumFolded, "Number of loads/stores folded into instructions");
+
+static RegisterRegAlloc
+ bigBlockRegAlloc("bigblock", "Big-block register allocator",
+ createBigBlockRegisterAllocator);
+
+namespace {
+/// VRegKeyInfo - Defines magic values required to use VirtRegs as DenseMap
+/// keys.
+ struct VRegKeyInfo {
+ static inline unsigned getEmptyKey() { return -1U; }
+ static inline unsigned getTombstoneKey() { return -2U; }
+ static bool isEqual(unsigned LHS, unsigned RHS) { return LHS == RHS; }
+ static unsigned getHashValue(const unsigned &Key) { return Key; }
+ };
+
+
+/// This register allocator is derived from RegAllocLocal.cpp. Like it, this
+/// allocator works on one basic block at a time, oblivious to others.
+/// However, the algorithm used here is suited for long blocks of
+/// instructions - registers are spilled by greedily choosing those holding
+/// values that will not be needed for the longest amount of time. This works
+/// particularly well for blocks with 10 or more times as many instructions
+/// as machine registers, but can be used for general code.
+///
+/// TODO: - automagically invoke linearscan for (groups of) small BBs?
+/// - break ties when picking regs? (probably not worth it in a
+/// JIT context)
+///
+ class VISIBILITY_HIDDEN RABigBlock : public MachineFunctionPass {
+ public:
+ static char ID;
+ RABigBlock() : MachineFunctionPass(&ID) {}
+ private:
+ /// TM - For getting at TargetMachine info
+ ///
+ const TargetMachine *TM;
+
+ /// MF - Our generic MachineFunction pointer
+ ///
+ MachineFunction *MF;
+
+ /// RegInfo - For dealing with machine register info (aliases, folds
+ /// etc)
+ const TargetRegisterInfo *RegInfo;
+
+ typedef SmallVector<unsigned, 2> VRegTimes;
+
+ /// VRegReadTable - maps VRegs in a BB to the set of times they are read
+ ///
+ DenseMap<unsigned, VRegTimes*, VRegKeyInfo> VRegReadTable;
+
+ /// VRegReadIdx - keeps track of the "current time" in terms of
+ /// positions in VRegReadTable
+ DenseMap<unsigned, unsigned , VRegKeyInfo> VRegReadIdx;
+
+ /// StackSlotForVirtReg - Maps virtual regs to the frame index where these
+ /// values are spilled.
+ IndexedMap<unsigned, VirtReg2IndexFunctor> StackSlotForVirtReg;
+
+ /// Virt2PhysRegMap - This map contains entries for each virtual register
+ /// that is currently available in a physical register.
+ IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap;
+
+ /// PhysRegsUsed - This array is effectively a map, containing entries for
+ /// each physical register that currently has a value (ie, it is in
+ /// Virt2PhysRegMap). The value mapped to is the virtual register
+ /// corresponding to the physical register (the inverse of the
+ /// Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned
+ /// because it is used by a future instruction, and to -2 if it is not
+ /// allocatable. If the entry for a physical register is -1, then the
+ /// physical register is "not in the map".
+ ///
+ std::vector<int> PhysRegsUsed;
+
+ /// VirtRegModified - This bitset contains information about which virtual
+ /// registers need to be spilled back to memory when their registers are
+ /// scavenged. If a virtual register has simply been rematerialized, there
+ /// is no reason to spill it to memory when we need the register back.
+ ///
+ std::vector<int> VirtRegModified;
+
+ /// MBBLastInsnTime - the number of the the last instruction in MBB
+ ///
+ int MBBLastInsnTime;
+
+ /// MBBCurTime - the number of the the instruction being currently processed
+ ///
+ int MBBCurTime;
+
+ unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) {
+ return Virt2PhysRegMap[VirtReg];
+ }
+
+ unsigned &getVirt2StackSlot(unsigned VirtReg) {
+ return StackSlotForVirtReg[VirtReg];
+ }
+
+ /// markVirtRegModified - Lets us flip bits in the VirtRegModified bitset
+ ///
+ void markVirtRegModified(unsigned Reg, bool Val = true) {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+ Reg -= TargetRegisterInfo::FirstVirtualRegister;
+ if (VirtRegModified.size() <= Reg)
+ VirtRegModified.resize(Reg+1);
+ VirtRegModified[Reg] = Val;
+ }
+
+ /// isVirtRegModified - Lets us query the VirtRegModified bitset
+ ///
+ bool isVirtRegModified(unsigned Reg) const {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+ assert(Reg - TargetRegisterInfo::FirstVirtualRegister < VirtRegModified.size()
+ && "Illegal virtual register!");
+ return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister];
+ }
+
+ public:
+ /// getPassName - returns the BigBlock allocator's name
+ ///
+ virtual const char *getPassName() const {
+ return "BigBlock Register Allocator";
+ }
+
+ /// getAnalaysisUsage - declares the required analyses
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(PHIEliminationID);
+ AU.addRequiredID(TwoAddressInstructionPassID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// runOnMachineFunction - Register allocate the whole function
+ ///
+ bool runOnMachineFunction(MachineFunction &Fn);
+
+ /// AllocateBasicBlock - Register allocate the specified basic block.
+ ///
+ void AllocateBasicBlock(MachineBasicBlock &MBB);
+
+ /// FillVRegReadTable - Fill out the table of vreg read times given a BB
+ ///
+ void FillVRegReadTable(MachineBasicBlock &MBB);
+
+ /// areRegsEqual - This method returns true if the specified registers are
+ /// related to each other. To do this, it checks to see if they are equal
+ /// or if the first register is in the alias set of the second register.
+ ///
+ bool areRegsEqual(unsigned R1, unsigned R2) const {
+ if (R1 == R2) return true;
+ for (const unsigned *AliasSet = RegInfo->getAliasSet(R2);
+ *AliasSet; ++AliasSet) {
+ if (*AliasSet == R1) return true;
+ }
+ return false;
+ }
+
+ /// getStackSpaceFor - This returns the frame index of the specified virtual
+ /// register on the stack, allocating space if necessary.
+ int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
+
+ /// removePhysReg - This method marks the specified physical register as no
+ /// longer being in use.
+ ///
+ void removePhysReg(unsigned PhysReg);
+
+ /// spillVirtReg - This method spills the value specified by PhysReg into
+ /// the virtual register slot specified by VirtReg. It then updates the RA
+ /// data structures to indicate the fact that PhysReg is now available.
+ ///
+ void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned VirtReg, unsigned PhysReg);
+
+ /// spillPhysReg - This method spills the specified physical register into
+ /// the virtual register slot associated with it. If OnlyVirtRegs is set to
+ /// true, then the request is ignored if the physical register does not
+ /// contain a virtual register.
+ ///
+ void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+ unsigned PhysReg, bool OnlyVirtRegs = false);
+
+ /// assignVirtToPhysReg - This method updates local state so that we know
+ /// that PhysReg is the proper container for VirtReg now. The physical
+ /// register must not be used for anything else when this is called.
+ ///
+ void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg);
+
+ /// isPhysRegAvailable - Return true if the specified physical register is
+ /// free and available for use. This also includes checking to see if
+ /// aliased registers are all free...
+ ///
+ bool isPhysRegAvailable(unsigned PhysReg) const;
+
+ /// getFreeReg - Look to see if there is a free register available in the
+ /// specified register class. If not, return 0.
+ ///
+ unsigned getFreeReg(const TargetRegisterClass *RC);
+
+ /// chooseReg - Pick a physical register to hold the specified
+ /// virtual register by choosing the one which will be read furthest
+ /// in the future.
+ ///
+ unsigned chooseReg(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned VirtReg);
+
+ /// reloadVirtReg - This method transforms the specified specified virtual
+ /// register use to refer to a physical register. This method may do this
+ /// in one of several ways: if the register is available in a physical
+ /// register already, it uses that physical register. If the value is not
+ /// in a physical register, and if there are physical registers available,
+ /// it loads it into a register. If register pressure is high, and it is
+ /// possible, it tries to fold the load of the virtual register into the
+ /// instruction itself. It avoids doing this if register pressure is low to
+ /// improve the chance that subsequent instructions can use the reloaded
+ /// value. This method returns the modified instruction.
+ ///
+ MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned OpNum);
+
+ };
+ char RABigBlock::ID = 0;
+}
+
+/// getStackSpaceFor - This allocates space for the specified virtual register
+/// to be held on the stack.
+int RABigBlock::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
+ // Find the location Reg would belong...
+ int FrameIdx = getVirt2StackSlot(VirtReg);
+
+ if (FrameIdx)
+ return FrameIdx - 1; // Already has space allocated?
+
+ // Allocate a new stack object for this spill location...
+ FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
+ RC->getAlignment());
+
+ // Assign the slot...
+ getVirt2StackSlot(VirtReg) = FrameIdx + 1;
+ return FrameIdx;
+}
+
+
+/// removePhysReg - This method marks the specified physical register as no
+/// longer being in use.
+///
+void RABigBlock::removePhysReg(unsigned PhysReg) {
+ PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used
+}
+
+
+/// spillVirtReg - This method spills the value specified by PhysReg into the
+/// virtual register slot specified by VirtReg. It then updates the RA data
+/// structures to indicate the fact that PhysReg is now available.
+///
+void RABigBlock::spillVirtReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned VirtReg, unsigned PhysReg) {
+ assert(VirtReg && "Spilling a physical register is illegal!"
+ " Must not have appropriate kill for the register or use exists beyond"
+ " the intended one.");
+ DOUT << " Spilling register " << RegInfo->getName(PhysReg)
+ << " containing %reg" << VirtReg;
+
+ const TargetInstrInfo* TII = MBB.getParent()->getTarget().getInstrInfo();
+
+ if (!isVirtRegModified(VirtReg))
+ DOUT << " which has not been modified, so no store necessary!";
+
+ // Otherwise, there is a virtual register corresponding to this physical
+ // register. We only need to spill it into its stack slot if it has been
+ // modified.
+ if (isVirtRegModified(VirtReg)) {
+ const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+ int FrameIndex = getStackSpaceFor(VirtReg, RC);
+ DOUT << " to stack slot #" << FrameIndex;
+ TII->storeRegToStackSlot(MBB, I, PhysReg, true, FrameIndex, RC);
+ ++NumStores; // Update statistics
+ }
+
+ getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available
+
+ DOUT << "\n";
+ removePhysReg(PhysReg);
+}
+
+
+/// spillPhysReg - This method spills the specified physical register into the
+/// virtual register slot associated with it. If OnlyVirtRegs is set to true,
+/// then the request is ignored if the physical register does not contain a
+/// virtual register.
+///
+void RABigBlock::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+ unsigned PhysReg, bool OnlyVirtRegs) {
+ if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used!
+ assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!");
+ if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs)
+ spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg);
+ } else {
+ // If the selected register aliases any other registers, we must make
+ // sure that one of the aliases isn't alive.
+ for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg);
+ *AliasSet; ++AliasSet)
+ if (PhysRegsUsed[*AliasSet] != -1 && // Spill aliased register.
+ PhysRegsUsed[*AliasSet] != -2) // If allocatable.
+ if (PhysRegsUsed[*AliasSet])
+ spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet);
+ }
+}
+
+
+/// assignVirtToPhysReg - This method updates local state so that we know
+/// that PhysReg is the proper container for VirtReg now. The physical
+/// register must not be used for anything else when this is called.
+///
+void RABigBlock::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
+ assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!");
+ // Update information to note the fact that this register was just used, and
+ // it holds VirtReg.
+ PhysRegsUsed[PhysReg] = VirtReg;
+ getVirt2PhysRegMapSlot(VirtReg) = PhysReg;
+}
+
+
+/// isPhysRegAvailable - Return true if the specified physical register is free
+/// and available for use. This also includes checking to see if aliased
+/// registers are all free...
+///
+bool RABigBlock::isPhysRegAvailable(unsigned PhysReg) const {
+ if (PhysRegsUsed[PhysReg] != -1) return false;
+
+ // If the selected register aliases any other allocated registers, it is
+ // not free!
+ for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg);
+ *AliasSet; ++AliasSet)
+ if (PhysRegsUsed[*AliasSet] >= 0) // Aliased register in use?
+ return false; // Can't use this reg then.
+ return true;
+}
+
+
+/// getFreeReg - Look to see if there is a free register available in the
+/// specified register class. If not, return 0.
+///
+unsigned RABigBlock::getFreeReg(const TargetRegisterClass *RC) {
+ // Get iterators defining the range of registers that are valid to allocate in
+ // this class, which also specifies the preferred allocation order.
+ TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
+ TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
+
+ for (; RI != RE; ++RI)
+ if (isPhysRegAvailable(*RI)) { // Is reg unused?
+ assert(*RI != 0 && "Cannot use register!");
+ return *RI; // Found an unused register!
+ }
+ return 0;
+}
+
+
+/// chooseReg - Pick a physical register to hold the specified
+/// virtual register by choosing the one whose value will be read
+/// furthest in the future.
+///
+unsigned RABigBlock::chooseReg(MachineBasicBlock &MBB, MachineInstr *I,
+ unsigned VirtReg) {
+ const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+ // First check to see if we have a free register of the requested type...
+ unsigned PhysReg = getFreeReg(RC);
+
+ // If we didn't find an unused register, find the one which will be
+ // read at the most distant point in time.
+ if (PhysReg == 0) {
+ unsigned delay=0, longest_delay=0;
+ VRegTimes* ReadTimes;
+
+ unsigned curTime = MBBCurTime;
+
+ // for all physical regs in the RC,
+ for(TargetRegisterClass::iterator pReg = RC->begin();
+ pReg != RC->end(); ++pReg) {
+ // how long until they're read?
+ if(PhysRegsUsed[*pReg]>0) { // ignore non-allocatable regs
+ ReadTimes = VRegReadTable[PhysRegsUsed[*pReg]];
+ if(ReadTimes && !ReadTimes->empty()) {
+ unsigned& pt = VRegReadIdx[PhysRegsUsed[*pReg]];
+ while(pt < ReadTimes->size() && (*ReadTimes)[pt] < curTime) {
+ ++pt;
+ }
+
+ if(pt < ReadTimes->size())
+ delay = (*ReadTimes)[pt] - curTime;
+ else
+ delay = MBBLastInsnTime + 1 - curTime;
+ } else {
+ // This register is only defined, but never
+ // read in this MBB. Therefore the next read
+ // happens after the end of this MBB
+ delay = MBBLastInsnTime + 1 - curTime;
+ }
+
+
+ if(delay > longest_delay) {
+ longest_delay = delay;
+ PhysReg = *pReg;
+ }
+ }
+ }
+
+ if(PhysReg == 0) { // ok, now we're desperate. We couldn't choose
+ // a register to spill by looking through the
+ // read timetable, so now we just spill the
+ // first allocatable register we find.
+
+ // for all physical regs in the RC,
+ for(TargetRegisterClass::iterator pReg = RC->begin();
+ pReg != RC->end(); ++pReg) {
+ // if we find a register we can spill
+ if(PhysRegsUsed[*pReg]>=-1)
+ PhysReg = *pReg; // choose it to be spilled
+ }
+ }
+
+ assert(PhysReg && "couldn't choose a register to spill :( ");
+ // TODO: assert that RC->contains(PhysReg) / handle aliased registers?
+
+ // since we needed to look in the table we need to spill this register.
+ spillPhysReg(MBB, I, PhysReg);
+ }
+
+ // assign the vreg to our chosen physical register
+ assignVirtToPhysReg(VirtReg, PhysReg);
+ return PhysReg; // and return it
+}
+
+
+/// reloadVirtReg - This method transforms an instruction with a virtual
+/// register use to one that references a physical register. It does this as
+/// follows:
+///
+/// 1) If the register is already in a physical register, it uses it.
+/// 2) Otherwise, if there is a free physical register, it uses that.
+/// 3) Otherwise, it calls chooseReg() to get the physical register
+/// holding the most distantly needed value, generating a spill in
+/// the process.
+///
+/// This method returns the modified instruction.
+MachineInstr *RABigBlock::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned OpNum) {
+ unsigned VirtReg = MI->getOperand(OpNum).getReg();
+ const TargetInstrInfo* TII = MBB.getParent()->getTarget().getInstrInfo();
+
+ // If the virtual register is already available in a physical register,
+ // just update the instruction and return.
+ if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) {
+ MI->getOperand(OpNum).setReg(PR);
+ return MI;
+ }
+
+ // Otherwise, if we have free physical registers available to hold the
+ // value, use them.
+ const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+ unsigned PhysReg = getFreeReg(RC);
+ int FrameIndex = getStackSpaceFor(VirtReg, RC);
+
+ if (PhysReg) { // we have a free register, so use it.
+ assignVirtToPhysReg(VirtReg, PhysReg);
+ } else { // no free registers available.
+ // try to fold the spill into the instruction
+ SmallVector<unsigned, 1> Ops;
+ Ops.push_back(OpNum);
+ if(MachineInstr* FMI = TII->foldMemoryOperand(*MF, MI, Ops, FrameIndex)) {
+ ++NumFolded;
+ FMI->copyKillDeadInfo(MI);
+ return MBB.insert(MBB.erase(MI), FMI);
+ }
+
+ // determine which of the physical registers we'll kill off, since we
+ // couldn't fold.
+ PhysReg = chooseReg(MBB, MI, VirtReg);
+ }
+
+ // this virtual register is now unmodified (since we just reloaded it)
+ markVirtRegModified(VirtReg, false);
+
+ DOUT << " Reloading %reg" << VirtReg << " into "
+ << RegInfo->getName(PhysReg) << "\n";
+
+ // Add move instruction(s)
+ TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC);
+ ++NumLoads; // Update statistics
+
+ MF->getRegInfo().setPhysRegUsed(PhysReg);
+ MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register
+ return MI;
+}
+
+/// Fill out the vreg read timetable. Since ReadTime increases
+/// monotonically, the individual readtime sets will be sorted
+/// in ascending order.
+void RABigBlock::FillVRegReadTable(MachineBasicBlock &MBB) {
+ // loop over each instruction
+ MachineBasicBlock::iterator MII;
+ unsigned ReadTime;
+
+ for(ReadTime=0, MII = MBB.begin(); MII != MBB.end(); ++ReadTime, ++MII) {
+ MachineInstr *MI = MII;
+
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ // look for vreg reads..
+ if (MO.isReg() && !MO.isDef() && MO.getReg() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ // ..and add them to the read table.
+ VRegTimes* &Times = VRegReadTable[MO.getReg()];
+ if(!VRegReadTable[MO.getReg()]) {
+ Times = new VRegTimes;
+ VRegReadIdx[MO.getReg()] = 0;
+ }
+ Times->push_back(ReadTime);
+ }
+ }
+
+ }
+
+ MBBLastInsnTime = ReadTime;
+
+ for(DenseMap<unsigned, VRegTimes*, VRegKeyInfo>::iterator Reads = VRegReadTable.begin();
+ Reads != VRegReadTable.end(); ++Reads) {
+ if(Reads->second) {
+ DOUT << "Reads[" << Reads->first << "]=" << Reads->second->size() << "\n";
+ }
+ }
+}
+
+/// isReadModWriteImplicitKill - True if this is an implicit kill for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
+ MO.isDef() && !MO.isDead())
+ return true;
+ }
+ return false;
+}
+
+/// isReadModWriteImplicitDef - True if this is an implicit def for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
+ !MO.isDef() && MO.isKill())
+ return true;
+ }
+ return false;
+}
+
+
+void RABigBlock::AllocateBasicBlock(MachineBasicBlock &MBB) {
+ // loop over each instruction
+ MachineBasicBlock::iterator MII = MBB.begin();
+ const TargetInstrInfo &TII = *TM->getInstrInfo();
+
+ DEBUG(const BasicBlock *LBB = MBB.getBasicBlock();
+ if (LBB) DOUT << "\nStarting RegAlloc of BB: " << LBB->getName());
+
+ // If this is the first basic block in the machine function, add live-in
+ // registers as active.
+ if (&MBB == &*MF->begin()) {
+ for (MachineRegisterInfo::livein_iterator
+ I = MF->getRegInfo().livein_begin(),
+ E = MF->getRegInfo().livein_end(); I != E; ++I) {
+ unsigned Reg = I->first;
+ MF->getRegInfo().setPhysRegUsed(Reg);
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now
+ MF->getRegInfo().setPhysRegUsed(*AliasSet);
+ }
+ }
+ }
+ }
+
+ // Otherwise, sequentially allocate each instruction in the MBB.
+ MBBCurTime = -1;
+ while (MII != MBB.end()) {
+ MachineInstr *MI = MII++;
+ MBBCurTime++;
+ const TargetInstrDesc &TID = MI->getDesc();
+ DEBUG(DOUT << "\nTime=" << MBBCurTime << " Starting RegAlloc of: " << *MI;
+ DOUT << " Regs have values: ";
+ for (unsigned i = 0; i != RegInfo->getNumRegs(); ++i)
+ if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
+ DOUT << "[" << RegInfo->getName(i)
+ << ",%reg" << PhysRegsUsed[i] << "] ";
+ DOUT << "\n");
+
+ SmallVector<unsigned, 8> Kills;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isKill()) {
+ if (!MO.isImplicit())
+ Kills.push_back(MO.getReg());
+ else if (!isReadModWriteImplicitKill(MI, MO.getReg()))
+ // These are extra physical register kills when a sub-register
+ // is defined (def of a sub-register is a read/mod/write of the
+ // larger registers). Ignore.
+ Kills.push_back(MO.getReg());
+ }
+ }
+
+ // Get the used operands into registers. This has the potential to spill
+ // incoming values if we are out of registers. Note that we completely
+ // ignore physical register uses here. We assume that if an explicit
+ // physical register is referenced by the instruction, that it is guaranteed
+ // to be live-in, or the input is badly hosed.
+ //
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ // here we are looking for only used operands (never def&use)
+ if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ MI = reloadVirtReg(MBB, MI, i);
+ }
+
+ // If this instruction is the last user of this register, kill the
+ // value, freeing the register being used, so it doesn't need to be
+ // spilled to memory.
+ //
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+ unsigned VirtReg = Kills[i];
+ unsigned PhysReg = VirtReg;
+ if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+ // If the virtual register was never materialized into a register, it
+ // might not be in the map, but it won't hurt to zero it out anyway.
+ unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+ PhysReg = PhysRegSlot;
+ PhysRegSlot = 0;
+ } else if (PhysRegsUsed[PhysReg] == -2) {
+ // Unallocatable register dead, ignore.
+ continue;
+ } else {
+ assert((!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1) &&
+ "Silently clearing a virtual register?");
+ }
+
+ if (PhysReg) {
+ DOUT << " Last use of " << RegInfo->getName(PhysReg)
+ << "[%reg" << VirtReg <<"], removing it from live set\n";
+ removePhysReg(PhysReg);
+ for (const unsigned *AliasSet = RegInfo->getSubRegisters(PhysReg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ DOUT << " Last use of "
+ << RegInfo->getName(*AliasSet)
+ << "[%reg" << VirtReg <<"], removing it from live set\n";
+ removePhysReg(*AliasSet);
+ }
+ }
+ }
+ }
+
+ // Loop over all of the operands of the instruction, spilling registers that
+ // are defined, and marking explicit destinations in the PhysRegsUsed map.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() && !MO.isImplicit() && MO.getReg() &&
+ TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+ if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP.
+ // These are extra physical register defs when a sub-register
+ // is defined (def of a sub-register is a read/mod/write of the
+ // larger registers). Ignore.
+ if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
+
+ MF->getRegInfo().setPhysRegUsed(Reg);
+ spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now
+ MF->getRegInfo().setPhysRegUsed(*AliasSet);
+ }
+ }
+ }
+ }
+
+ // Loop over the implicit defs, spilling them as well.
+ if (TID.getImplicitDefs()) {
+ for (const unsigned *ImplicitDefs = TID.getImplicitDefs();
+ *ImplicitDefs; ++ImplicitDefs) {
+ unsigned Reg = *ImplicitDefs;
+ if (PhysRegsUsed[Reg] != -2) {
+ spillPhysReg(MBB, MI, Reg, true);
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ }
+ MF->getRegInfo().setPhysRegUsed(Reg);
+ for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now
+ MF->getRegInfo().setPhysRegUsed(*AliasSet);
+ }
+ }
+ }
+ }
+
+ SmallVector<unsigned, 8> DeadDefs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDead())
+ DeadDefs.push_back(MO.getReg());
+ }
+
+ // Okay, we have allocated all of the source operands and spilled any values
+ // that would be destroyed by defs of this instruction. Loop over the
+ // explicit defs and assign them to a register, spilling incoming values if
+ // we need to scavenge a register.
+ //
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() && MO.getReg() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned DestVirtReg = MO.getReg();
+ unsigned DestPhysReg;
+
+ // If DestVirtReg already has a value, use it.
+ if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg)))
+ DestPhysReg = chooseReg(MBB, MI, DestVirtReg);
+ MF->getRegInfo().setPhysRegUsed(DestPhysReg);
+ markVirtRegModified(DestVirtReg);
+ MI->getOperand(i).setReg(DestPhysReg); // Assign the output register
+ }
+ }
+
+ // If this instruction defines any registers that are immediately dead,
+ // kill them now.
+ //
+ for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) {
+ unsigned VirtReg = DeadDefs[i];
+ unsigned PhysReg = VirtReg;
+ if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+ unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+ PhysReg = PhysRegSlot;
+ assert(PhysReg != 0);
+ PhysRegSlot = 0;
+ } else if (PhysRegsUsed[PhysReg] == -2) {
+ // Unallocatable register dead, ignore.
+ continue;
+ }
+
+ if (PhysReg) {
+ DOUT << " Register " << RegInfo->getName(PhysReg)
+ << " [%reg" << VirtReg
+ << "] is never used, removing it from live set\n";
+ removePhysReg(PhysReg);
+ for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ DOUT << " Register " << RegInfo->getName(*AliasSet)
+ << " [%reg" << *AliasSet
+ << "] is never used, removing it from live set\n";
+ removePhysReg(*AliasSet);
+ }
+ }
+ }
+ }
+
+ // Finally, if this is a noop copy instruction, zap it.
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (TII.isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+ SrcReg == DstReg)
+ MBB.erase(MI);
+ }
+
+ MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
+
+ // Spill all physical registers holding virtual registers now.
+ for (unsigned i = 0, e = RegInfo->getNumRegs(); i != e; ++i)
+ if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) {
+ if (unsigned VirtReg = PhysRegsUsed[i])
+ spillVirtReg(MBB, MI, VirtReg, i);
+ else
+ removePhysReg(i);
+ }
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool RABigBlock::runOnMachineFunction(MachineFunction &Fn) {
+ DOUT << "Machine Function " << "\n";
+ MF = &Fn;
+ TM = &Fn.getTarget();
+ RegInfo = TM->getRegisterInfo();
+
+ PhysRegsUsed.assign(RegInfo->getNumRegs(), -1);
+
+ // At various places we want to efficiently check to see whether a register
+ // is allocatable. To handle this, we mark all unallocatable registers as
+ // being pinned down, permanently.
+ {
+ BitVector Allocable = RegInfo->getAllocatableSet(Fn);
+ for (unsigned i = 0, e = Allocable.size(); i != e; ++i)
+ if (!Allocable[i])
+ PhysRegsUsed[i] = -2; // Mark the reg unallocable.
+ }
+
+ // initialize the virtual->physical register map to have a 'null'
+ // mapping for all virtual registers
+ Virt2PhysRegMap.grow(MF->getRegInfo().getLastVirtReg());
+ StackSlotForVirtReg.grow(MF->getRegInfo().getLastVirtReg());
+ VirtRegModified.resize(MF->getRegInfo().getLastVirtReg() -
+ TargetRegisterInfo::FirstVirtualRegister + 1, 0);
+
+ // Loop over all of the basic blocks, eliminating virtual register references
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB) {
+ // fill out the read timetable
+ FillVRegReadTable(*MBB);
+ // use it to allocate the BB
+ AllocateBasicBlock(*MBB);
+ // clear it
+ VRegReadTable.clear();
+ }
+
+ StackSlotForVirtReg.clear();
+ PhysRegsUsed.clear();
+ VirtRegModified.clear();
+ Virt2PhysRegMap.clear();
+ return true;
+}
+
+FunctionPass *llvm::createBigBlockRegisterAllocator() {
+ return new RABigBlock();
+}
+
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
new file mode 100644
index 0000000..ee118de
--- /dev/null
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -0,0 +1,1535 @@
+//===-- RegAllocLinearScan.cpp - Linear Scan register allocator -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a linear scan register allocator.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "VirtRegMap.h"
+#include "VirtRegRewriter.h"
+#include "Spiller.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include <algorithm>
+#include <set>
+#include <queue>
+#include <memory>
+#include <cmath>
+#include <iostream>
+
+using namespace llvm;
+
+STATISTIC(NumIters , "Number of iterations performed");
+STATISTIC(NumBacktracks, "Number of times we had to backtrack");
+STATISTIC(NumCoalesce, "Number of copies coalesced");
+STATISTIC(NumDowngrade, "Number of registers downgraded");
+
+static cl::opt<bool>
+NewHeuristic("new-spilling-heuristic",
+ cl::desc("Use new spilling heuristic"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+PreSplitIntervals("pre-alloc-split",
+ cl::desc("Pre-register allocation live interval splitting"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+NewSpillFramework("new-spill-framework",
+ cl::desc("New spilling framework"),
+ cl::init(false), cl::Hidden);
+
+static RegisterRegAlloc
+linearscanRegAlloc("linearscan", "linear scan register allocator",
+ createLinearScanRegisterAllocator);
+
+namespace {
+ struct VISIBILITY_HIDDEN RALinScan : public MachineFunctionPass {
+ static char ID;
+ RALinScan() : MachineFunctionPass(&ID) {}
+
+ typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr;
+ typedef SmallVector<IntervalPtr, 32> IntervalPtrs;
+ private:
+ /// RelatedRegClasses - This structure is built the first time a function is
+ /// compiled, and keeps track of which register classes have registers that
+ /// belong to multiple classes or have aliases that are in other classes.
+ EquivalenceClasses<const TargetRegisterClass*> RelatedRegClasses;
+ DenseMap<unsigned, const TargetRegisterClass*> OneClassForEachPhysReg;
+
+ // NextReloadMap - For each register in the map, it maps to the another
+ // register which is defined by a reload from the same stack slot and
+ // both reloads are in the same basic block.
+ DenseMap<unsigned, unsigned> NextReloadMap;
+
+ // DowngradedRegs - A set of registers which are being "downgraded", i.e.
+ // un-favored for allocation.
+ SmallSet<unsigned, 8> DowngradedRegs;
+
+ // DowngradeMap - A map from virtual registers to physical registers being
+ // downgraded for the virtual registers.
+ DenseMap<unsigned, unsigned> DowngradeMap;
+
+ MachineFunction* mf_;
+ MachineRegisterInfo* mri_;
+ const TargetMachine* tm_;
+ const TargetRegisterInfo* tri_;
+ const TargetInstrInfo* tii_;
+ BitVector allocatableRegs_;
+ LiveIntervals* li_;
+ LiveStacks* ls_;
+ const MachineLoopInfo *loopInfo;
+
+ /// handled_ - Intervals are added to the handled_ set in the order of their
+ /// start value. This is uses for backtracking.
+ std::vector<LiveInterval*> handled_;
+
+ /// fixed_ - Intervals that correspond to machine registers.
+ ///
+ IntervalPtrs fixed_;
+
+ /// active_ - Intervals that are currently being processed, and which have a
+ /// live range active for the current point.
+ IntervalPtrs active_;
+
+ /// inactive_ - Intervals that are currently being processed, but which have
+ /// a hold at the current point.
+ IntervalPtrs inactive_;
+
+ typedef std::priority_queue<LiveInterval*,
+ SmallVector<LiveInterval*, 64>,
+ greater_ptr<LiveInterval> > IntervalHeap;
+ IntervalHeap unhandled_;
+
+ /// regUse_ - Tracks register usage.
+ SmallVector<unsigned, 32> regUse_;
+ SmallVector<unsigned, 32> regUseBackUp_;
+
+ /// vrm_ - Tracks register assignments.
+ VirtRegMap* vrm_;
+
+ std::auto_ptr<VirtRegRewriter> rewriter_;
+
+ std::auto_ptr<Spiller> spiller_;
+
+ public:
+ virtual const char* getPassName() const {
+ return "Linear Scan Register Allocator";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LiveIntervals>();
+ if (StrongPHIElim)
+ AU.addRequiredID(StrongPHIEliminationID);
+ // Make sure PassManager knows which analyses to make available
+ // to coalescing and which analyses coalescing invalidates.
+ AU.addRequiredTransitive<RegisterCoalescer>();
+ if (PreSplitIntervals)
+ AU.addRequiredID(PreAllocSplittingID);
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// runOnMachineFunction - register allocate the whole function
+ bool runOnMachineFunction(MachineFunction&);
+
+ private:
+ /// linearScan - the linear scan algorithm
+ void linearScan();
+
+ /// initIntervalSets - initialize the interval sets.
+ ///
+ void initIntervalSets();
+
+ /// processActiveIntervals - expire old intervals and move non-overlapping
+ /// ones to the inactive list.
+ void processActiveIntervals(unsigned CurPoint);
+
+ /// processInactiveIntervals - expire old intervals and move overlapping
+ /// ones to the active list.
+ void processInactiveIntervals(unsigned CurPoint);
+
+ /// hasNextReloadInterval - Return the next liveinterval that's being
+ /// defined by a reload from the same SS as the specified one.
+ LiveInterval *hasNextReloadInterval(LiveInterval *cur);
+
+ /// DowngradeRegister - Downgrade a register for allocation.
+ void DowngradeRegister(LiveInterval *li, unsigned Reg);
+
+ /// UpgradeRegister - Upgrade a register for allocation.
+ void UpgradeRegister(unsigned Reg);
+
+ /// assignRegOrStackSlotAtInterval - assign a register if one
+ /// is available, or spill.
+ void assignRegOrStackSlotAtInterval(LiveInterval* cur);
+
+ void updateSpillWeights(std::vector<float> &Weights,
+ unsigned reg, float weight,
+ const TargetRegisterClass *RC);
+
+ /// findIntervalsToSpill - Determine the intervals to spill for the
+ /// specified interval. It's passed the physical registers whose spill
+ /// weight is the lowest among all the registers whose live intervals
+ /// conflict with the interval.
+ void findIntervalsToSpill(LiveInterval *cur,
+ std::vector<std::pair<unsigned,float> > &Candidates,
+ unsigned NumCands,
+ SmallVector<LiveInterval*, 8> &SpillIntervals);
+
+ /// attemptTrivialCoalescing - If a simple interval is defined by a copy,
+ /// try allocate the definition the same register as the source register
+ /// if the register is not defined during live time of the interval. This
+ /// eliminate a copy. This is used to coalesce copies which were not
+ /// coalesced away before allocation either due to dest and src being in
+ /// different register classes or because the coalescer was overly
+ /// conservative.
+ unsigned attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg);
+
+ ///
+ /// Register usage / availability tracking helpers.
+ ///
+
+ void initRegUses() {
+ regUse_.resize(tri_->getNumRegs(), 0);
+ regUseBackUp_.resize(tri_->getNumRegs(), 0);
+ }
+
+ void finalizeRegUses() {
+#ifndef NDEBUG
+ // Verify all the registers are "freed".
+ bool Error = false;
+ for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) {
+ if (regUse_[i] != 0) {
+ cerr << tri_->getName(i) << " is still in use!\n";
+ Error = true;
+ }
+ }
+ if (Error)
+ abort();
+#endif
+ regUse_.clear();
+ regUseBackUp_.clear();
+ }
+
+ void addRegUse(unsigned physReg) {
+ assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
+ "should be physical register!");
+ ++regUse_[physReg];
+ for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as)
+ ++regUse_[*as];
+ }
+
+ void delRegUse(unsigned physReg) {
+ assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
+ "should be physical register!");
+ assert(regUse_[physReg] != 0);
+ --regUse_[physReg];
+ for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) {
+ assert(regUse_[*as] != 0);
+ --regUse_[*as];
+ }
+ }
+
+ bool isRegAvail(unsigned physReg) const {
+ assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
+ "should be physical register!");
+ return regUse_[physReg] == 0;
+ }
+
+ void backUpRegUses() {
+ regUseBackUp_ = regUse_;
+ }
+
+ void restoreRegUses() {
+ regUse_ = regUseBackUp_;
+ }
+
+ ///
+ /// Register handling helpers.
+ ///
+
+ /// getFreePhysReg - return a free physical register for this virtual
+ /// register interval if we have one, otherwise return 0.
+ unsigned getFreePhysReg(LiveInterval* cur);
+ unsigned getFreePhysReg(const TargetRegisterClass *RC,
+ unsigned MaxInactiveCount,
+ SmallVector<unsigned, 256> &inactiveCounts,
+ bool SkipDGRegs);
+
+ /// assignVirt2StackSlot - assigns this virtual register to a
+ /// stack slot. returns the stack slot
+ int assignVirt2StackSlot(unsigned virtReg);
+
+ void ComputeRelatedRegClasses();
+
+ template <typename ItTy>
+ void printIntervals(const char* const str, ItTy i, ItTy e) const {
+ if (str) DOUT << str << " intervals:\n";
+ for (; i != e; ++i) {
+ DOUT << "\t" << *i->first << " -> ";
+ unsigned reg = i->first->reg;
+ if (TargetRegisterInfo::isVirtualRegister(reg)) {
+ reg = vrm_->getPhys(reg);
+ }
+ DOUT << tri_->getName(reg) << '\n';
+ }
+ }
+ };
+ char RALinScan::ID = 0;
+}
+
+static RegisterPass<RALinScan>
+X("linearscan-regalloc", "Linear Scan Register Allocator");
+
+bool validateRegAlloc(MachineFunction *mf, LiveIntervals *lis,
+ VirtRegMap *vrm) {
+
+ MachineRegisterInfo *mri = &mf->getRegInfo();
+ const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
+ bool allocationValid = true;
+
+
+ for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
+ itr != end; ++itr) {
+
+ LiveInterval *li = itr->second;
+
+ if (TargetRegisterInfo::isPhysicalRegister(li->reg)) {
+ continue;
+ }
+
+ if (vrm->hasPhys(li->reg)) {
+ const TargetRegisterClass *trc = mri->getRegClass(li->reg);
+
+ if (lis->hasInterval(vrm->getPhys(li->reg))) {
+ if (li->overlaps(lis->getInterval(vrm->getPhys(li->reg)))) {
+ std::cerr << "vreg " << li->reg << " overlaps its assigned preg "
+ << vrm->getPhys(li->reg) << "(" << tri->getName(vrm->getPhys(li->reg)) << ")\n";
+ }
+ }
+
+ TargetRegisterClass::iterator fReg =
+ std::find(trc->allocation_order_begin(*mf), trc->allocation_order_end(*mf),
+ vrm->getPhys(li->reg));
+
+ if (fReg == trc->allocation_order_end(*mf)) {
+ std::cerr << "preg " << vrm->getPhys(li->reg)
+ << "(" << tri->getName(vrm->getPhys(li->reg)) << ") is not in the allocation set for vreg "
+ << li->reg << "\n";
+ allocationValid &= false;
+ }
+ }
+ else {
+ std::cerr << "No preg for vreg " << li->reg << "\n";
+ // What about conflicting loads/stores?
+ continue;
+ }
+
+ for (LiveIntervals::iterator itr2 = next(itr); itr2 != end; ++itr2) {
+
+ LiveInterval *li2 = itr2->second;
+
+ if (li2->empty())
+ continue;
+
+ if (TargetRegisterInfo::isPhysicalRegister(li2->reg)) {
+ if (li->overlaps(*li2)) {
+ if (vrm->getPhys(li->reg) == li2->reg ||
+ tri->areAliases(vrm->getPhys(li->reg), li2->reg)) {
+ std::cerr << "vreg " << li->reg << " overlaps preg "
+ << li2->reg << "(" << tri->getName(li2->reg) << ") which aliases "
+ << vrm->getPhys(li->reg) << "(" << tri->getName(vrm->getPhys(li->reg)) << ")\n";
+ allocationValid &= false;
+ }
+ }
+ }
+ else {
+
+ if (!vrm->hasPhys(li2->reg)) {
+ continue;
+ }
+
+ if (li->overlaps(*li2)) {
+ if (vrm->getPhys(li->reg) == vrm->getPhys(li2->reg) ||
+ tri->areAliases(vrm->getPhys(li->reg), vrm->getPhys(li2->reg))) {
+ std::cerr << "vreg " << li->reg << " (preg " << vrm->getPhys(li->reg)
+ << ") overlaps vreg " << li2->reg << " (preg " << vrm->getPhys(li2->reg)
+ << ") and " << vrm->getPhys(li->reg) << " aliases " << vrm->getPhys(li2->reg) << "\n";
+ allocationValid &= false;
+ }
+ }
+ }
+ }
+
+ }
+
+ return allocationValid;
+
+}
+
+
+void RALinScan::ComputeRelatedRegClasses() {
+ // First pass, add all reg classes to the union, and determine at least one
+ // reg class that each register is in.
+ bool HasAliases = false;
+ for (TargetRegisterInfo::regclass_iterator RCI = tri_->regclass_begin(),
+ E = tri_->regclass_end(); RCI != E; ++RCI) {
+ RelatedRegClasses.insert(*RCI);
+ for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end();
+ I != E; ++I) {
+ HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0;
+
+ const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I];
+ if (PRC) {
+ // Already processed this register. Just make sure we know that
+ // multiple register classes share a register.
+ RelatedRegClasses.unionSets(PRC, *RCI);
+ } else {
+ PRC = *RCI;
+ }
+ }
+ }
+
+ // Second pass, now that we know conservatively what register classes each reg
+ // belongs to, add info about aliases. We don't need to do this for targets
+ // without register aliases.
+ if (HasAliases)
+ for (DenseMap<unsigned, const TargetRegisterClass*>::iterator
+ I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end();
+ I != E; ++I)
+ for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS)
+ RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]);
+}
+
+/// attemptTrivialCoalescing - If a simple interval is defined by a copy,
+/// try allocate the definition the same register as the source register
+/// if the register is not defined during live time of the interval. This
+/// eliminate a copy. This is used to coalesce copies which were not
+/// coalesced away before allocation either due to dest and src being in
+/// different register classes or because the coalescer was overly
+/// conservative.
+unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
+ if ((cur.preference && cur.preference == Reg) || !cur.containsOneValue())
+ return Reg;
+
+ VNInfo *vni = cur.begin()->valno;
+ if (!vni->def || vni->def == ~1U || vni->def == ~0U)
+ return Reg;
+ MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg, PhysReg;
+ if (!CopyMI ||
+ !tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ return Reg;
+ PhysReg = SrcReg;
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ if (!vrm_->isAssignedReg(SrcReg))
+ return Reg;
+ PhysReg = vrm_->getPhys(SrcReg);
+ }
+ if (Reg == PhysReg)
+ return Reg;
+
+ const TargetRegisterClass *RC = mri_->getRegClass(cur.reg);
+ if (!RC->contains(PhysReg))
+ return Reg;
+
+ // Try to coalesce.
+ if (!li_->conflictsWithPhysRegDef(cur, *vrm_, PhysReg)) {
+ DOUT << "Coalescing: " << cur << " -> " << tri_->getName(PhysReg)
+ << '\n';
+ vrm_->clearVirt(cur.reg);
+ vrm_->assignVirt2Phys(cur.reg, PhysReg);
+
+ // Remove unnecessary kills since a copy does not clobber the register.
+ if (li_->hasInterval(SrcReg)) {
+ LiveInterval &SrcLI = li_->getInterval(SrcReg);
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(cur.reg),
+ E = mri_->reg_end(); I != E; ++I) {
+ MachineOperand &O = I.getOperand();
+ if (!O.isUse() || !O.isKill())
+ continue;
+ MachineInstr *MI = &*I;
+ if (SrcLI.liveAt(li_->getDefIndex(li_->getInstructionIndex(MI))))
+ O.setIsKill(false);
+ }
+ }
+
+ ++NumCoalesce;
+ return SrcReg;
+ }
+
+ return Reg;
+}
+
+bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
+ mf_ = &fn;
+ mri_ = &fn.getRegInfo();
+ tm_ = &fn.getTarget();
+ tri_ = tm_->getRegisterInfo();
+ tii_ = tm_->getInstrInfo();
+ allocatableRegs_ = tri_->getAllocatableSet(fn);
+ li_ = &getAnalysis<LiveIntervals>();
+ ls_ = &getAnalysis<LiveStacks>();
+ loopInfo = &getAnalysis<MachineLoopInfo>();
+
+ // We don't run the coalescer here because we have no reason to
+ // interact with it. If the coalescer requires interaction, it
+ // won't do anything. If it doesn't require interaction, we assume
+ // it was run as a separate pass.
+
+ // If this is the first function compiled, compute the related reg classes.
+ if (RelatedRegClasses.empty())
+ ComputeRelatedRegClasses();
+
+ // Also resize register usage trackers.
+ initRegUses();
+
+ vrm_ = &getAnalysis<VirtRegMap>();
+ if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter());
+
+ if (NewSpillFramework) {
+ spiller_.reset(createSpiller(mf_, li_, ls_, vrm_));
+ }
+
+ initIntervalSets();
+
+ linearScan();
+
+ if (NewSpillFramework) {
+ bool allocValid = validateRegAlloc(mf_, li_, vrm_);
+ }
+
+ // Rewrite spill code and update the PhysRegsUsed set.
+ rewriter_->runOnMachineFunction(*mf_, *vrm_, li_);
+
+ assert(unhandled_.empty() && "Unhandled live intervals remain!");
+
+ finalizeRegUses();
+
+ fixed_.clear();
+ active_.clear();
+ inactive_.clear();
+ handled_.clear();
+ NextReloadMap.clear();
+ DowngradedRegs.clear();
+ DowngradeMap.clear();
+ spiller_.reset(0);
+
+ return true;
+}
+
+/// initIntervalSets - initialize the interval sets.
+///
+void RALinScan::initIntervalSets()
+{
+ assert(unhandled_.empty() && fixed_.empty() &&
+ active_.empty() && inactive_.empty() &&
+ "interval sets should be empty on initialization");
+
+ handled_.reserve(li_->getNumIntervals());
+
+ for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
+ if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) {
+ mri_->setPhysRegUsed(i->second->reg);
+ fixed_.push_back(std::make_pair(i->second, i->second->begin()));
+ } else
+ unhandled_.push(i->second);
+ }
+}
+
+void RALinScan::linearScan()
+{
+ // linear scan algorithm
+ DOUT << "********** LINEAR SCAN **********\n";
+ DOUT << "********** Function: " << mf_->getFunction()->getName() << '\n';
+
+ DEBUG(printIntervals("fixed", fixed_.begin(), fixed_.end()));
+
+ while (!unhandled_.empty()) {
+ // pick the interval with the earliest start point
+ LiveInterval* cur = unhandled_.top();
+ unhandled_.pop();
+ ++NumIters;
+ DOUT << "\n*** CURRENT ***: " << *cur << '\n';
+
+ if (!cur->empty()) {
+ processActiveIntervals(cur->beginNumber());
+ processInactiveIntervals(cur->beginNumber());
+
+ assert(TargetRegisterInfo::isVirtualRegister(cur->reg) &&
+ "Can only allocate virtual registers!");
+ }
+
+ // Allocating a virtual register. try to find a free
+ // physical register or spill an interval (possibly this one) in order to
+ // assign it one.
+ assignRegOrStackSlotAtInterval(cur);
+
+ DEBUG(printIntervals("active", active_.begin(), active_.end()));
+ DEBUG(printIntervals("inactive", inactive_.begin(), inactive_.end()));
+ }
+
+ // Expire any remaining active intervals
+ while (!active_.empty()) {
+ IntervalPtr &IP = active_.back();
+ unsigned reg = IP.first->reg;
+ DOUT << "\tinterval " << *IP.first << " expired\n";
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ delRegUse(reg);
+ active_.pop_back();
+ }
+
+ // Expire any remaining inactive intervals
+ DEBUG(for (IntervalPtrs::reverse_iterator
+ i = inactive_.rbegin(); i != inactive_.rend(); ++i)
+ DOUT << "\tinterval " << *i->first << " expired\n");
+ inactive_.clear();
+
+ // Add live-ins to every BB except for entry. Also perform trivial coalescing.
+ MachineFunction::iterator EntryMBB = mf_->begin();
+ SmallVector<MachineBasicBlock*, 8> LiveInMBBs;
+ for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
+ LiveInterval &cur = *i->second;
+ unsigned Reg = 0;
+ bool isPhys = TargetRegisterInfo::isPhysicalRegister(cur.reg);
+ if (isPhys)
+ Reg = cur.reg;
+ else if (vrm_->isAssignedReg(cur.reg))
+ Reg = attemptTrivialCoalescing(cur, vrm_->getPhys(cur.reg));
+ if (!Reg)
+ continue;
+ // Ignore splited live intervals.
+ if (!isPhys && vrm_->getPreSplitReg(cur.reg))
+ continue;
+ for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end();
+ I != E; ++I) {
+ const LiveRange &LR = *I;
+ if (li_->findLiveInMBBs(LR.start, LR.end, LiveInMBBs)) {
+ for (unsigned i = 0, e = LiveInMBBs.size(); i != e; ++i)
+ if (LiveInMBBs[i] != EntryMBB)
+ LiveInMBBs[i]->addLiveIn(Reg);
+ LiveInMBBs.clear();
+ }
+ }
+ }
+
+ DOUT << *vrm_;
+
+ // Look for physical registers that end up not being allocated even though
+ // register allocator had to spill other registers in its register class.
+ if (ls_->getNumIntervals() == 0)
+ return;
+ if (!vrm_->FindUnusedRegisters(tri_, li_))
+ return;
+}
+
+/// processActiveIntervals - expire old intervals and move non-overlapping ones
+/// to the inactive list.
+void RALinScan::processActiveIntervals(unsigned CurPoint)
+{
+ DOUT << "\tprocessing active intervals:\n";
+
+ for (unsigned i = 0, e = active_.size(); i != e; ++i) {
+ LiveInterval *Interval = active_[i].first;
+ LiveInterval::iterator IntervalPos = active_[i].second;
+ unsigned reg = Interval->reg;
+
+ IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
+
+ if (IntervalPos == Interval->end()) { // Remove expired intervals.
+ DOUT << "\t\tinterval " << *Interval << " expired\n";
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ delRegUse(reg);
+
+ // Pop off the end of the list.
+ active_[i] = active_.back();
+ active_.pop_back();
+ --i; --e;
+
+ } else if (IntervalPos->start > CurPoint) {
+ // Move inactive intervals to inactive list.
+ DOUT << "\t\tinterval " << *Interval << " inactive\n";
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ delRegUse(reg);
+ // add to inactive.
+ inactive_.push_back(std::make_pair(Interval, IntervalPos));
+
+ // Pop off the end of the list.
+ active_[i] = active_.back();
+ active_.pop_back();
+ --i; --e;
+ } else {
+ // Otherwise, just update the iterator position.
+ active_[i].second = IntervalPos;
+ }
+ }
+}
+
+/// processInactiveIntervals - expire old intervals and move overlapping
+/// ones to the active list.
+void RALinScan::processInactiveIntervals(unsigned CurPoint)
+{
+ DOUT << "\tprocessing inactive intervals:\n";
+
+ for (unsigned i = 0, e = inactive_.size(); i != e; ++i) {
+ LiveInterval *Interval = inactive_[i].first;
+ LiveInterval::iterator IntervalPos = inactive_[i].second;
+ unsigned reg = Interval->reg;
+
+ IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
+
+ if (IntervalPos == Interval->end()) { // remove expired intervals.
+ DOUT << "\t\tinterval " << *Interval << " expired\n";
+
+ // Pop off the end of the list.
+ inactive_[i] = inactive_.back();
+ inactive_.pop_back();
+ --i; --e;
+ } else if (IntervalPos->start <= CurPoint) {
+ // move re-activated intervals in active list
+ DOUT << "\t\tinterval " << *Interval << " active\n";
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ addRegUse(reg);
+ // add to active
+ active_.push_back(std::make_pair(Interval, IntervalPos));
+
+ // Pop off the end of the list.
+ inactive_[i] = inactive_.back();
+ inactive_.pop_back();
+ --i; --e;
+ } else {
+ // Otherwise, just update the iterator position.
+ inactive_[i].second = IntervalPos;
+ }
+ }
+}
+
+/// updateSpillWeights - updates the spill weights of the specifed physical
+/// register and its weight.
+void RALinScan::updateSpillWeights(std::vector<float> &Weights,
+ unsigned reg, float weight,
+ const TargetRegisterClass *RC) {
+ SmallSet<unsigned, 4> Processed;
+ SmallSet<unsigned, 4> SuperAdded;
+ SmallVector<unsigned, 4> Supers;
+ Weights[reg] += weight;
+ Processed.insert(reg);
+ for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) {
+ Weights[*as] += weight;
+ Processed.insert(*as);
+ if (tri_->isSubRegister(*as, reg) &&
+ SuperAdded.insert(*as) &&
+ RC->contains(*as)) {
+ Supers.push_back(*as);
+ }
+ }
+
+ // If the alias is a super-register, and the super-register is in the
+ // register class we are trying to allocate. Then add the weight to all
+ // sub-registers of the super-register even if they are not aliases.
+ // e.g. allocating for GR32, bh is not used, updating bl spill weight.
+ // bl should get the same spill weight otherwise it will be choosen
+ // as a spill candidate since spilling bh doesn't make ebx available.
+ for (unsigned i = 0, e = Supers.size(); i != e; ++i) {
+ for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr)
+ if (!Processed.count(*sr))
+ Weights[*sr] += weight;
+ }
+}
+
+static
+RALinScan::IntervalPtrs::iterator
+FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) {
+ for (RALinScan::IntervalPtrs::iterator I = IP.begin(), E = IP.end();
+ I != E; ++I)
+ if (I->first == LI) return I;
+ return IP.end();
+}
+
+static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, unsigned Point){
+ for (unsigned i = 0, e = V.size(); i != e; ++i) {
+ RALinScan::IntervalPtr &IP = V[i];
+ LiveInterval::iterator I = std::upper_bound(IP.first->begin(),
+ IP.second, Point);
+ if (I != IP.first->begin()) --I;
+ IP.second = I;
+ }
+}
+
+/// addStackInterval - Create a LiveInterval for stack if the specified live
+/// interval has been spilled.
+static void addStackInterval(LiveInterval *cur, LiveStacks *ls_,
+ LiveIntervals *li_,
+ MachineRegisterInfo* mri_, VirtRegMap &vrm_) {
+ int SS = vrm_.getStackSlot(cur->reg);
+ if (SS == VirtRegMap::NO_STACK_SLOT)
+ return;
+
+ const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
+ LiveInterval &SI = ls_->getOrCreateInterval(SS, RC);
+
+ VNInfo *VNI;
+ if (SI.hasAtLeastOneValue())
+ VNI = SI.getValNumInfo(0);
+ else
+ VNI = SI.getNextValue(~0U, 0, ls_->getVNInfoAllocator());
+
+ LiveInterval &RI = li_->getInterval(cur->reg);
+ // FIXME: This may be overly conservative.
+ SI.MergeRangesInAsValue(RI, VNI);
+}
+
+/// getConflictWeight - Return the number of conflicts between cur
+/// live interval and defs and uses of Reg weighted by loop depthes.
+static
+float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_,
+ MachineRegisterInfo *mri_,
+ const MachineLoopInfo *loopInfo) {
+ float Conflicts = 0;
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg),
+ E = mri_->reg_end(); I != E; ++I) {
+ MachineInstr *MI = &*I;
+ if (cur->liveAt(li_->getInstructionIndex(MI))) {
+ unsigned loopDepth = loopInfo->getLoopDepth(MI->getParent());
+ Conflicts += powf(10.0f, (float)loopDepth);
+ }
+ }
+ return Conflicts;
+}
+
+/// findIntervalsToSpill - Determine the intervals to spill for the
+/// specified interval. It's passed the physical registers whose spill
+/// weight is the lowest among all the registers whose live intervals
+/// conflict with the interval.
+void RALinScan::findIntervalsToSpill(LiveInterval *cur,
+ std::vector<std::pair<unsigned,float> > &Candidates,
+ unsigned NumCands,
+ SmallVector<LiveInterval*, 8> &SpillIntervals) {
+ // We have figured out the *best* register to spill. But there are other
+ // registers that are pretty good as well (spill weight within 3%). Spill
+ // the one that has fewest defs and uses that conflict with cur.
+ float Conflicts[3] = { 0.0f, 0.0f, 0.0f };
+ SmallVector<LiveInterval*, 8> SLIs[3];
+
+ DOUT << "\tConsidering " << NumCands << " candidates: ";
+ DEBUG(for (unsigned i = 0; i != NumCands; ++i)
+ DOUT << tri_->getName(Candidates[i].first) << " ";
+ DOUT << "\n";);
+
+ // Calculate the number of conflicts of each candidate.
+ for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) {
+ unsigned Reg = i->first->reg;
+ unsigned PhysReg = vrm_->getPhys(Reg);
+ if (!cur->overlapsFrom(*i->first, i->second))
+ continue;
+ for (unsigned j = 0; j < NumCands; ++j) {
+ unsigned Candidate = Candidates[j].first;
+ if (tri_->regsOverlap(PhysReg, Candidate)) {
+ if (NumCands > 1)
+ Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo);
+ SLIs[j].push_back(i->first);
+ }
+ }
+ }
+
+ for (IntervalPtrs::iterator i = inactive_.begin(); i != inactive_.end(); ++i){
+ unsigned Reg = i->first->reg;
+ unsigned PhysReg = vrm_->getPhys(Reg);
+ if (!cur->overlapsFrom(*i->first, i->second-1))
+ continue;
+ for (unsigned j = 0; j < NumCands; ++j) {
+ unsigned Candidate = Candidates[j].first;
+ if (tri_->regsOverlap(PhysReg, Candidate)) {
+ if (NumCands > 1)
+ Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo);
+ SLIs[j].push_back(i->first);
+ }
+ }
+ }
+
+ // Which is the best candidate?
+ unsigned BestCandidate = 0;
+ float MinConflicts = Conflicts[0];
+ for (unsigned i = 1; i != NumCands; ++i) {
+ if (Conflicts[i] < MinConflicts) {
+ BestCandidate = i;
+ MinConflicts = Conflicts[i];
+ }
+ }
+
+ std::copy(SLIs[BestCandidate].begin(), SLIs[BestCandidate].end(),
+ std::back_inserter(SpillIntervals));
+}
+
+namespace {
+ struct WeightCompare {
+ typedef std::pair<unsigned, float> RegWeightPair;
+ bool operator()(const RegWeightPair &LHS, const RegWeightPair &RHS) const {
+ return LHS.second < RHS.second;
+ }
+ };
+}
+
+static bool weightsAreClose(float w1, float w2) {
+ if (!NewHeuristic)
+ return false;
+
+ float diff = w1 - w2;
+ if (diff <= 0.02f) // Within 0.02f
+ return true;
+ return (diff / w2) <= 0.05f; // Within 5%.
+}
+
+LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) {
+ DenseMap<unsigned, unsigned>::iterator I = NextReloadMap.find(cur->reg);
+ if (I == NextReloadMap.end())
+ return 0;
+ return &li_->getInterval(I->second);
+}
+
+void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) {
+ bool isNew = DowngradedRegs.insert(Reg);
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Multiple reloads holding the same register?");
+ DowngradeMap.insert(std::make_pair(li->reg, Reg));
+ for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS) {
+ isNew = DowngradedRegs.insert(*AS);
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Multiple reloads holding the same register?");
+ DowngradeMap.insert(std::make_pair(li->reg, *AS));
+ }
+ ++NumDowngrade;
+}
+
+void RALinScan::UpgradeRegister(unsigned Reg) {
+ if (Reg) {
+ DowngradedRegs.erase(Reg);
+ for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS)
+ DowngradedRegs.erase(*AS);
+ }
+}
+
+namespace {
+ struct LISorter {
+ bool operator()(LiveInterval* A, LiveInterval* B) {
+ return A->beginNumber() < B->beginNumber();
+ }
+ };
+}
+
+/// assignRegOrStackSlotAtInterval - assign a register if one is available, or
+/// spill.
+void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
+{
+ DOUT << "\tallocating current interval: ";
+
+ // This is an implicitly defined live interval, just assign any register.
+ const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
+ if (cur->empty()) {
+ unsigned physReg = cur->preference;
+ if (!physReg)
+ physReg = *RC->allocation_order_begin(*mf_);
+ DOUT << tri_->getName(physReg) << '\n';
+ // Note the register is not really in use.
+ vrm_->assignVirt2Phys(cur->reg, physReg);
+ return;
+ }
+
+ backUpRegUses();
+
+ std::vector<std::pair<unsigned, float> > SpillWeightsToAdd;
+ unsigned StartPosition = cur->beginNumber();
+ const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
+
+ // If start of this live interval is defined by a move instruction and its
+ // source is assigned a physical register that is compatible with the target
+ // register class, then we should try to assign it the same register.
+ // This can happen when the move is from a larger register class to a smaller
+ // one, e.g. X86::mov32to32_. These move instructions are not coalescable.
+ if (!cur->preference && cur->hasAtLeastOneValue()) {
+ VNInfo *vni = cur->begin()->valno;
+ if (vni->def && vni->def != ~1U && vni->def != ~0U) {
+ MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (CopyMI &&
+ tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) {
+ unsigned Reg = 0;
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ Reg = SrcReg;
+ else if (vrm_->isAssignedReg(SrcReg))
+ Reg = vrm_->getPhys(SrcReg);
+ if (Reg) {
+ if (SrcSubReg)
+ Reg = tri_->getSubReg(Reg, SrcSubReg);
+ if (DstSubReg)
+ Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC);
+ if (Reg && allocatableRegs_[Reg] && RC->contains(Reg))
+ cur->preference = Reg;
+ }
+ }
+ }
+ }
+
+ // For every interval in inactive we overlap with, mark the
+ // register as not free and update spill weights.
+ for (IntervalPtrs::const_iterator i = inactive_.begin(),
+ e = inactive_.end(); i != e; ++i) {
+ unsigned Reg = i->first->reg;
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+ "Can only allocate virtual registers!");
+ const TargetRegisterClass *RegRC = mri_->getRegClass(Reg);
+ // If this is not in a related reg class to the register we're allocating,
+ // don't check it.
+ if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
+ cur->overlapsFrom(*i->first, i->second-1)) {
+ Reg = vrm_->getPhys(Reg);
+ addRegUse(Reg);
+ SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight));
+ }
+ }
+
+ // Speculatively check to see if we can get a register right now. If not,
+ // we know we won't be able to by adding more constraints. If so, we can
+ // check to see if it is valid. Doing an exhaustive search of the fixed_ list
+ // is very bad (it contains all callee clobbered registers for any functions
+ // with a call), so we want to avoid doing that if possible.
+ unsigned physReg = getFreePhysReg(cur);
+ unsigned BestPhysReg = physReg;
+ if (physReg) {
+ // We got a register. However, if it's in the fixed_ list, we might
+ // conflict with it. Check to see if we conflict with it or any of its
+ // aliases.
+ SmallSet<unsigned, 8> RegAliases;
+ for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS)
+ RegAliases.insert(*AS);
+
+ bool ConflictsWithFixed = false;
+ for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
+ IntervalPtr &IP = fixed_[i];
+ if (physReg == IP.first->reg || RegAliases.count(IP.first->reg)) {
+ // Okay, this reg is on the fixed list. Check to see if we actually
+ // conflict.
+ LiveInterval *I = IP.first;
+ if (I->endNumber() > StartPosition) {
+ LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
+ IP.second = II;
+ if (II != I->begin() && II->start > StartPosition)
+ --II;
+ if (cur->overlapsFrom(*I, II)) {
+ ConflictsWithFixed = true;
+ break;
+ }
+ }
+ }
+ }
+
+ // Okay, the register picked by our speculative getFreePhysReg call turned
+ // out to be in use. Actually add all of the conflicting fixed registers to
+ // regUse_ so we can do an accurate query.
+ if (ConflictsWithFixed) {
+ // For every interval in fixed we overlap with, mark the register as not
+ // free and update spill weights.
+ for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
+ IntervalPtr &IP = fixed_[i];
+ LiveInterval *I = IP.first;
+
+ const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg];
+ if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
+ I->endNumber() > StartPosition) {
+ LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
+ IP.second = II;
+ if (II != I->begin() && II->start > StartPosition)
+ --II;
+ if (cur->overlapsFrom(*I, II)) {
+ unsigned reg = I->reg;
+ addRegUse(reg);
+ SpillWeightsToAdd.push_back(std::make_pair(reg, I->weight));
+ }
+ }
+ }
+
+ // Using the newly updated regUse_ object, which includes conflicts in the
+ // future, see if there are any registers available.
+ physReg = getFreePhysReg(cur);
+ }
+ }
+
+ // Restore the physical register tracker, removing information about the
+ // future.
+ restoreRegUses();
+
+ // If we find a free register, we are done: assign this virtual to
+ // the free physical register and add this interval to the active
+ // list.
+ if (physReg) {
+ DOUT << tri_->getName(physReg) << '\n';
+ vrm_->assignVirt2Phys(cur->reg, physReg);
+ addRegUse(physReg);
+ active_.push_back(std::make_pair(cur, cur->begin()));
+ handled_.push_back(cur);
+
+ // "Upgrade" the physical register since it has been allocated.
+ UpgradeRegister(physReg);
+ if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) {
+ // "Downgrade" physReg to try to keep physReg from being allocated until
+ // the next reload from the same SS is allocated.
+ NextReloadLI->preference = physReg;
+ DowngradeRegister(cur, physReg);
+ }
+ return;
+ }
+ DOUT << "no free registers\n";
+
+ // Compile the spill weights into an array that is better for scanning.
+ std::vector<float> SpillWeights(tri_->getNumRegs(), 0.0f);
+ for (std::vector<std::pair<unsigned, float> >::iterator
+ I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I)
+ updateSpillWeights(SpillWeights, I->first, I->second, RC);
+
+ // for each interval in active, update spill weights.
+ for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end();
+ i != e; ++i) {
+ unsigned reg = i->first->reg;
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ updateSpillWeights(SpillWeights, reg, i->first->weight, RC);
+ }
+
+ DOUT << "\tassigning stack slot at interval "<< *cur << ":\n";
+
+ // Find a register to spill.
+ float minWeight = HUGE_VALF;
+ unsigned minReg = 0; /*cur->preference*/; // Try the pref register first.
+
+ bool Found = false;
+ std::vector<std::pair<unsigned,float> > RegsWeights;
+ if (!minReg || SpillWeights[minReg] == HUGE_VALF)
+ for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_),
+ e = RC->allocation_order_end(*mf_); i != e; ++i) {
+ unsigned reg = *i;
+ float regWeight = SpillWeights[reg];
+ if (minWeight > regWeight)
+ Found = true;
+ RegsWeights.push_back(std::make_pair(reg, regWeight));
+ }
+
+ // If we didn't find a register that is spillable, try aliases?
+ if (!Found) {
+ for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_),
+ e = RC->allocation_order_end(*mf_); i != e; ++i) {
+ unsigned reg = *i;
+ // No need to worry about if the alias register size < regsize of RC.
+ // We are going to spill all registers that alias it anyway.
+ for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as)
+ RegsWeights.push_back(std::make_pair(*as, SpillWeights[*as]));
+ }
+ }
+
+ // Sort all potential spill candidates by weight.
+ std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare());
+ minReg = RegsWeights[0].first;
+ minWeight = RegsWeights[0].second;
+ if (minWeight == HUGE_VALF) {
+ // All registers must have inf weight. Just grab one!
+ minReg = BestPhysReg ? BestPhysReg : *RC->allocation_order_begin(*mf_);
+ if (cur->weight == HUGE_VALF ||
+ li_->getApproximateInstructionCount(*cur) == 0) {
+ // Spill a physical register around defs and uses.
+ if (li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_)) {
+ // spillPhysRegAroundRegDefsUses may have invalidated iterator stored
+ // in fixed_. Reset them.
+ for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
+ IntervalPtr &IP = fixed_[i];
+ LiveInterval *I = IP.first;
+ if (I->reg == minReg || tri_->isSubRegister(minReg, I->reg))
+ IP.second = I->advanceTo(I->begin(), StartPosition);
+ }
+
+ DowngradedRegs.clear();
+ assignRegOrStackSlotAtInterval(cur);
+ } else {
+ cerr << "Ran out of registers during register allocation!\n";
+ exit(1);
+ }
+ return;
+ }
+ }
+
+ // Find up to 3 registers to consider as spill candidates.
+ unsigned LastCandidate = RegsWeights.size() >= 3 ? 3 : 1;
+ while (LastCandidate > 1) {
+ if (weightsAreClose(RegsWeights[LastCandidate-1].second, minWeight))
+ break;
+ --LastCandidate;
+ }
+
+ DOUT << "\t\tregister(s) with min weight(s): ";
+ DEBUG(for (unsigned i = 0; i != LastCandidate; ++i)
+ DOUT << tri_->getName(RegsWeights[i].first)
+ << " (" << RegsWeights[i].second << ")\n");
+
+ // If the current has the minimum weight, we need to spill it and
+ // add any added intervals back to unhandled, and restart
+ // linearscan.
+ if (cur->weight != HUGE_VALF && cur->weight <= minWeight) {
+ DOUT << "\t\t\tspilling(c): " << *cur << '\n';
+ SmallVector<LiveInterval*, 8> spillIs;
+ std::vector<LiveInterval*> added;
+
+ if (!NewSpillFramework) {
+ added = li_->addIntervalsForSpills(*cur, spillIs, loopInfo, *vrm_);
+ } else {
+ added = spiller_->spill(cur);
+ }
+
+ std::sort(added.begin(), added.end(), LISorter());
+ addStackInterval(cur, ls_, li_, mri_, *vrm_);
+ if (added.empty())
+ return; // Early exit if all spills were folded.
+
+ // Merge added with unhandled. Note that we have already sorted
+ // intervals returned by addIntervalsForSpills by their starting
+ // point.
+ // This also update the NextReloadMap. That is, it adds mapping from a
+ // register defined by a reload from SS to the next reload from SS in the
+ // same basic block.
+ MachineBasicBlock *LastReloadMBB = 0;
+ LiveInterval *LastReload = 0;
+ int LastReloadSS = VirtRegMap::NO_STACK_SLOT;
+ for (unsigned i = 0, e = added.size(); i != e; ++i) {
+ LiveInterval *ReloadLi = added[i];
+ if (ReloadLi->weight == HUGE_VALF &&
+ li_->getApproximateInstructionCount(*ReloadLi) == 0) {
+ unsigned ReloadIdx = ReloadLi->beginNumber();
+ MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
+ int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
+ if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
+ // Last reload of same SS is in the same MBB. We want to try to
+ // allocate both reloads the same register and make sure the reg
+ // isn't clobbered in between if at all possible.
+ assert(LastReload->beginNumber() < ReloadIdx);
+ NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
+ }
+ LastReloadMBB = ReloadMBB;
+ LastReload = ReloadLi;
+ LastReloadSS = ReloadSS;
+ }
+ unhandled_.push(ReloadLi);
+ }
+ return;
+ }
+
+ ++NumBacktracks;
+
+ // Push the current interval back to unhandled since we are going
+ // to re-run at least this iteration. Since we didn't modify it it
+ // should go back right in the front of the list
+ unhandled_.push(cur);
+
+ assert(TargetRegisterInfo::isPhysicalRegister(minReg) &&
+ "did not choose a register to spill?");
+
+ // We spill all intervals aliasing the register with
+ // minimum weight, rollback to the interval with the earliest
+ // start point and let the linear scan algorithm run again
+ SmallVector<LiveInterval*, 8> spillIs;
+
+ // Determine which intervals have to be spilled.
+ findIntervalsToSpill(cur, RegsWeights, LastCandidate, spillIs);
+
+ // Set of spilled vregs (used later to rollback properly)
+ SmallSet<unsigned, 8> spilled;
+
+ // The earliest start of a Spilled interval indicates up to where
+ // in handled we need to roll back
+
+ unsigned earliestStart = cur->beginNumber();
+ LiveInterval *earliestStartInterval = cur;
+
+ // Spill live intervals of virtual regs mapped to the physical register we
+ // want to clear (and its aliases). We only spill those that overlap with the
+ // current interval as the rest do not affect its allocation. we also keep
+ // track of the earliest start of all spilled live intervals since this will
+ // mark our rollback point.
+ std::vector<LiveInterval*> added;
+ while (!spillIs.empty()) {
+ bool epicFail = false;
+ LiveInterval *sli = spillIs.back();
+ spillIs.pop_back();
+ DOUT << "\t\t\tspilling(a): " << *sli << '\n';
+ earliestStart = std::min(earliestStart, sli->beginNumber());
+ earliestStartInterval =
+ (earliestStartInterval->beginNumber() < sli->beginNumber()) ?
+ earliestStartInterval : sli;
+
+ if (earliestStartInterval->beginNumber()!=earliestStart) {
+ epicFail |= true;
+ std::cerr << "What the 1 - "
+ << "earliestStart = " << earliestStart
+ << "earliestStartInterval = " << earliestStartInterval->beginNumber()
+ << "\n";
+ }
+
+ std::vector<LiveInterval*> newIs;
+ if (!NewSpillFramework) {
+ newIs = li_->addIntervalsForSpills(*sli, spillIs, loopInfo, *vrm_);
+ } else {
+ newIs = spiller_->spill(sli);
+ }
+ addStackInterval(sli, ls_, li_, mri_, *vrm_);
+ std::copy(newIs.begin(), newIs.end(), std::back_inserter(added));
+ spilled.insert(sli->reg);
+
+ if (earliestStartInterval->beginNumber()!=earliestStart) {
+ epicFail |= true;
+ std::cerr << "What the 2 - "
+ << "earliestStart = " << earliestStart
+ << "earliestStartInterval = " << earliestStartInterval->beginNumber()
+ << "\n";
+ }
+
+ if (epicFail) {
+ //abort();
+ }
+ }
+
+ earliestStart = earliestStartInterval->beginNumber();
+
+ DOUT << "\t\trolling back to: " << earliestStart << '\n';
+
+ // Scan handled in reverse order up to the earliest start of a
+ // spilled live interval and undo each one, restoring the state of
+ // unhandled.
+ while (!handled_.empty()) {
+ LiveInterval* i = handled_.back();
+ // If this interval starts before t we are done.
+ if (i->beginNumber() < earliestStart)
+ break;
+ DOUT << "\t\t\tundo changes for: " << *i << '\n';
+ handled_.pop_back();
+
+ // When undoing a live interval allocation we must know if it is active or
+ // inactive to properly update regUse_ and the VirtRegMap.
+ IntervalPtrs::iterator it;
+ if ((it = FindIntervalInVector(active_, i)) != active_.end()) {
+ active_.erase(it);
+ assert(!TargetRegisterInfo::isPhysicalRegister(i->reg));
+ if (!spilled.count(i->reg))
+ unhandled_.push(i);
+ delRegUse(vrm_->getPhys(i->reg));
+ vrm_->clearVirt(i->reg);
+ } else if ((it = FindIntervalInVector(inactive_, i)) != inactive_.end()) {
+ inactive_.erase(it);
+ assert(!TargetRegisterInfo::isPhysicalRegister(i->reg));
+ if (!spilled.count(i->reg))
+ unhandled_.push(i);
+ vrm_->clearVirt(i->reg);
+ } else {
+ assert(TargetRegisterInfo::isVirtualRegister(i->reg) &&
+ "Can only allocate virtual registers!");
+ vrm_->clearVirt(i->reg);
+ unhandled_.push(i);
+ }
+
+ DenseMap<unsigned, unsigned>::iterator ii = DowngradeMap.find(i->reg);
+ if (ii == DowngradeMap.end())
+ // It interval has a preference, it must be defined by a copy. Clear the
+ // preference now since the source interval allocation may have been
+ // undone as well.
+ i->preference = 0;
+ else {
+ UpgradeRegister(ii->second);
+ }
+ }
+
+ // Rewind the iterators in the active, inactive, and fixed lists back to the
+ // point we reverted to.
+ RevertVectorIteratorsTo(active_, earliestStart);
+ RevertVectorIteratorsTo(inactive_, earliestStart);
+ RevertVectorIteratorsTo(fixed_, earliestStart);
+
+ // Scan the rest and undo each interval that expired after t and
+ // insert it in active (the next iteration of the algorithm will
+ // put it in inactive if required)
+ for (unsigned i = 0, e = handled_.size(); i != e; ++i) {
+ LiveInterval *HI = handled_[i];
+ if (!HI->expiredAt(earliestStart) &&
+ HI->expiredAt(cur->beginNumber())) {
+ DOUT << "\t\t\tundo changes for: " << *HI << '\n';
+ active_.push_back(std::make_pair(HI, HI->begin()));
+ assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg));
+ addRegUse(vrm_->getPhys(HI->reg));
+ }
+ }
+
+ // Merge added with unhandled.
+ // This also update the NextReloadMap. That is, it adds mapping from a
+ // register defined by a reload from SS to the next reload from SS in the
+ // same basic block.
+ MachineBasicBlock *LastReloadMBB = 0;
+ LiveInterval *LastReload = 0;
+ int LastReloadSS = VirtRegMap::NO_STACK_SLOT;
+ std::sort(added.begin(), added.end(), LISorter());
+ for (unsigned i = 0, e = added.size(); i != e; ++i) {
+ LiveInterval *ReloadLi = added[i];
+ if (ReloadLi->weight == HUGE_VALF &&
+ li_->getApproximateInstructionCount(*ReloadLi) == 0) {
+ unsigned ReloadIdx = ReloadLi->beginNumber();
+ MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
+ int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
+ if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
+ // Last reload of same SS is in the same MBB. We want to try to
+ // allocate both reloads the same register and make sure the reg
+ // isn't clobbered in between if at all possible.
+ assert(LastReload->beginNumber() < ReloadIdx);
+ NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
+ }
+ LastReloadMBB = ReloadMBB;
+ LastReload = ReloadLi;
+ LastReloadSS = ReloadSS;
+ }
+ unhandled_.push(ReloadLi);
+ }
+}
+
+unsigned RALinScan::getFreePhysReg(const TargetRegisterClass *RC,
+ unsigned MaxInactiveCount,
+ SmallVector<unsigned, 256> &inactiveCounts,
+ bool SkipDGRegs) {
+ unsigned FreeReg = 0;
+ unsigned FreeRegInactiveCount = 0;
+
+ TargetRegisterClass::iterator I = RC->allocation_order_begin(*mf_);
+ TargetRegisterClass::iterator E = RC->allocation_order_end(*mf_);
+ assert(I != E && "No allocatable register in this register class!");
+
+ // Scan for the first available register.
+ for (; I != E; ++I) {
+ unsigned Reg = *I;
+ // Ignore "downgraded" registers.
+ if (SkipDGRegs && DowngradedRegs.count(Reg))
+ continue;
+ if (isRegAvail(Reg)) {
+ FreeReg = Reg;
+ if (FreeReg < inactiveCounts.size())
+ FreeRegInactiveCount = inactiveCounts[FreeReg];
+ else
+ FreeRegInactiveCount = 0;
+ break;
+ }
+ }
+
+ // If there are no free regs, or if this reg has the max inactive count,
+ // return this register.
+ if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount)
+ return FreeReg;
+
+ // Continue scanning the registers, looking for the one with the highest
+ // inactive count. Alkis found that this reduced register pressure very
+ // slightly on X86 (in rev 1.94 of this file), though this should probably be
+ // reevaluated now.
+ for (; I != E; ++I) {
+ unsigned Reg = *I;
+ // Ignore "downgraded" registers.
+ if (SkipDGRegs && DowngradedRegs.count(Reg))
+ continue;
+ if (isRegAvail(Reg) && Reg < inactiveCounts.size() &&
+ FreeRegInactiveCount < inactiveCounts[Reg]) {
+ FreeReg = Reg;
+ FreeRegInactiveCount = inactiveCounts[Reg];
+ if (FreeRegInactiveCount == MaxInactiveCount)
+ break; // We found the one with the max inactive count.
+ }
+ }
+
+ return FreeReg;
+}
+
+/// getFreePhysReg - return a free physical register for this virtual register
+/// interval if we have one, otherwise return 0.
+unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
+ SmallVector<unsigned, 256> inactiveCounts;
+ unsigned MaxInactiveCount = 0;
+
+ const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
+ const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
+
+ for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end();
+ i != e; ++i) {
+ unsigned reg = i->first->reg;
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+
+ // If this is not in a related reg class to the register we're allocating,
+ // don't check it.
+ const TargetRegisterClass *RegRC = mri_->getRegClass(reg);
+ if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) {
+ reg = vrm_->getPhys(reg);
+ if (inactiveCounts.size() <= reg)
+ inactiveCounts.resize(reg+1);
+ ++inactiveCounts[reg];
+ MaxInactiveCount = std::max(MaxInactiveCount, inactiveCounts[reg]);
+ }
+ }
+
+ // If copy coalescer has assigned a "preferred" register, check if it's
+ // available first.
+ if (cur->preference) {
+ DOUT << "(preferred: " << tri_->getName(cur->preference) << ") ";
+ if (isRegAvail(cur->preference) &&
+ RC->contains(cur->preference))
+ return cur->preference;
+ }
+
+ if (!DowngradedRegs.empty()) {
+ unsigned FreeReg = getFreePhysReg(RC, MaxInactiveCount, inactiveCounts,
+ true);
+ if (FreeReg)
+ return FreeReg;
+ }
+ return getFreePhysReg(RC, MaxInactiveCount, inactiveCounts, false);
+}
+
+FunctionPass* llvm::createLinearScanRegisterAllocator() {
+ return new RALinScan();
+}
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
new file mode 100644
index 0000000..e1cc20c
--- /dev/null
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -0,0 +1,1068 @@
+//===-- RegAllocLocal.cpp - A BasicBlock generic register allocator -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This register allocator allocates registers to a basic block at a time,
+// attempting to keep values in registers and reusing registers as appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+
+static RegisterRegAlloc
+ localRegAlloc("local", "local register allocator",
+ createLocalRegisterAllocator);
+
+namespace {
+ class VISIBILITY_HIDDEN RALocal : public MachineFunctionPass {
+ public:
+ static char ID;
+ RALocal() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1) {}
+ private:
+ const TargetMachine *TM;
+ MachineFunction *MF;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+
+ // StackSlotForVirtReg - Maps virtual regs to the frame index where these
+ // values are spilled.
+ IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
+
+ // Virt2PhysRegMap - This map contains entries for each virtual register
+ // that is currently available in a physical register.
+ IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap;
+
+ unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) {
+ return Virt2PhysRegMap[VirtReg];
+ }
+
+ // PhysRegsUsed - This array is effectively a map, containing entries for
+ // each physical register that currently has a value (ie, it is in
+ // Virt2PhysRegMap). The value mapped to is the virtual register
+ // corresponding to the physical register (the inverse of the
+ // Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned
+ // because it is used by a future instruction, and to -2 if it is not
+ // allocatable. If the entry for a physical register is -1, then the
+ // physical register is "not in the map".
+ //
+ std::vector<int> PhysRegsUsed;
+
+ // PhysRegsUseOrder - This contains a list of the physical registers that
+ // currently have a virtual register value in them. This list provides an
+ // ordering of registers, imposing a reallocation order. This list is only
+ // used if all registers are allocated and we have to spill one, in which
+ // case we spill the least recently used register. Entries at the front of
+ // the list are the least recently used registers, entries at the back are
+ // the most recently used.
+ //
+ std::vector<unsigned> PhysRegsUseOrder;
+
+ // Virt2LastUseMap - This maps each virtual register to its last use
+ // (MachineInstr*, operand index pair).
+ IndexedMap<std::pair<MachineInstr*, unsigned>, VirtReg2IndexFunctor>
+ Virt2LastUseMap;
+
+ std::pair<MachineInstr*,unsigned>& getVirtRegLastUse(unsigned Reg) {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+ return Virt2LastUseMap[Reg];
+ }
+
+ // VirtRegModified - This bitset contains information about which virtual
+ // registers need to be spilled back to memory when their registers are
+ // scavenged. If a virtual register has simply been rematerialized, there
+ // is no reason to spill it to memory when we need the register back.
+ //
+ BitVector VirtRegModified;
+
+ // UsedInMultipleBlocks - Tracks whether a particular register is used in
+ // more than one block.
+ BitVector UsedInMultipleBlocks;
+
+ void markVirtRegModified(unsigned Reg, bool Val = true) {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+ Reg -= TargetRegisterInfo::FirstVirtualRegister;
+ if (Val)
+ VirtRegModified.set(Reg);
+ else
+ VirtRegModified.reset(Reg);
+ }
+
+ bool isVirtRegModified(unsigned Reg) const {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+ assert(Reg - TargetRegisterInfo::FirstVirtualRegister < VirtRegModified.size()
+ && "Illegal virtual register!");
+ return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister];
+ }
+
+ void AddToPhysRegsUseOrder(unsigned Reg) {
+ std::vector<unsigned>::iterator It =
+ std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), Reg);
+ if (It != PhysRegsUseOrder.end())
+ PhysRegsUseOrder.erase(It);
+ PhysRegsUseOrder.push_back(Reg);
+ }
+
+ void MarkPhysRegRecentlyUsed(unsigned Reg) {
+ if (PhysRegsUseOrder.empty() ||
+ PhysRegsUseOrder.back() == Reg) return; // Already most recently used
+
+ for (unsigned i = PhysRegsUseOrder.size(); i != 0; --i)
+ if (areRegsEqual(Reg, PhysRegsUseOrder[i-1])) {
+ unsigned RegMatch = PhysRegsUseOrder[i-1]; // remove from middle
+ PhysRegsUseOrder.erase(PhysRegsUseOrder.begin()+i-1);
+ // Add it to the end of the list
+ PhysRegsUseOrder.push_back(RegMatch);
+ if (RegMatch == Reg)
+ return; // Found an exact match, exit early
+ }
+ }
+
+ public:
+ virtual const char *getPassName() const {
+ return "Local Register Allocator";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(PHIEliminationID);
+ AU.addRequiredID(TwoAddressInstructionPassID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// runOnMachineFunction - Register allocate the whole function
+ bool runOnMachineFunction(MachineFunction &Fn);
+
+ /// AllocateBasicBlock - Register allocate the specified basic block.
+ void AllocateBasicBlock(MachineBasicBlock &MBB);
+
+
+ /// areRegsEqual - This method returns true if the specified registers are
+ /// related to each other. To do this, it checks to see if they are equal
+ /// or if the first register is in the alias set of the second register.
+ ///
+ bool areRegsEqual(unsigned R1, unsigned R2) const {
+ if (R1 == R2) return true;
+ for (const unsigned *AliasSet = TRI->getAliasSet(R2);
+ *AliasSet; ++AliasSet) {
+ if (*AliasSet == R1) return true;
+ }
+ return false;
+ }
+
+ /// getStackSpaceFor - This returns the frame index of the specified virtual
+ /// register on the stack, allocating space if necessary.
+ int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
+
+ /// removePhysReg - This method marks the specified physical register as no
+ /// longer being in use.
+ ///
+ void removePhysReg(unsigned PhysReg);
+
+ /// spillVirtReg - This method spills the value specified by PhysReg into
+ /// the virtual register slot specified by VirtReg. It then updates the RA
+ /// data structures to indicate the fact that PhysReg is now available.
+ ///
+ void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned VirtReg, unsigned PhysReg);
+
+ /// spillPhysReg - This method spills the specified physical register into
+ /// the virtual register slot associated with it. If OnlyVirtRegs is set to
+ /// true, then the request is ignored if the physical register does not
+ /// contain a virtual register.
+ ///
+ void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+ unsigned PhysReg, bool OnlyVirtRegs = false);
+
+ /// assignVirtToPhysReg - This method updates local state so that we know
+ /// that PhysReg is the proper container for VirtReg now. The physical
+ /// register must not be used for anything else when this is called.
+ ///
+ void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg);
+
+ /// isPhysRegAvailable - Return true if the specified physical register is
+ /// free and available for use. This also includes checking to see if
+ /// aliased registers are all free...
+ ///
+ bool isPhysRegAvailable(unsigned PhysReg) const;
+
+ /// getFreeReg - Look to see if there is a free register available in the
+ /// specified register class. If not, return 0.
+ ///
+ unsigned getFreeReg(const TargetRegisterClass *RC);
+
+ /// getReg - Find a physical register to hold the specified virtual
+ /// register. If all compatible physical registers are used, this method
+ /// spills the last used virtual register to the stack, and uses that
+ /// register. If NoFree is true, that means the caller knows there isn't
+ /// a free register, do not call getFreeReg().
+ unsigned getReg(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned VirtReg, bool NoFree = false);
+
+ /// reloadVirtReg - This method transforms the specified virtual
+ /// register use to refer to a physical register. This method may do this
+ /// in one of several ways: if the register is available in a physical
+ /// register already, it uses that physical register. If the value is not
+ /// in a physical register, and if there are physical registers available,
+ /// it loads it into a register. If register pressure is high, and it is
+ /// possible, it tries to fold the load of the virtual register into the
+ /// instruction itself. It avoids doing this if register pressure is low to
+ /// improve the chance that subsequent instructions can use the reloaded
+ /// value. This method returns the modified instruction.
+ ///
+ MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned OpNum, SmallSet<unsigned, 4> &RRegs);
+
+ /// ComputeLocalLiveness - Computes liveness of registers within a basic
+ /// block, setting the killed/dead flags as appropriate.
+ void ComputeLocalLiveness(MachineBasicBlock& MBB);
+
+ void reloadPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I,
+ unsigned PhysReg);
+ };
+ char RALocal::ID = 0;
+}
+
+/// getStackSpaceFor - This allocates space for the specified virtual register
+/// to be held on the stack.
+int RALocal::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
+ // Find the location Reg would belong...
+ int SS = StackSlotForVirtReg[VirtReg];
+ if (SS != -1)
+ return SS; // Already has space allocated?
+
+ // Allocate a new stack object for this spill location...
+ int FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
+ RC->getAlignment());
+
+ // Assign the slot...
+ StackSlotForVirtReg[VirtReg] = FrameIdx;
+ return FrameIdx;
+}
+
+
+/// removePhysReg - This method marks the specified physical register as no
+/// longer being in use.
+///
+void RALocal::removePhysReg(unsigned PhysReg) {
+ PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used
+
+ std::vector<unsigned>::iterator It =
+ std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), PhysReg);
+ if (It != PhysRegsUseOrder.end())
+ PhysRegsUseOrder.erase(It);
+}
+
+
+/// spillVirtReg - This method spills the value specified by PhysReg into the
+/// virtual register slot specified by VirtReg. It then updates the RA data
+/// structures to indicate the fact that PhysReg is now available.
+///
+void RALocal::spillVirtReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned VirtReg, unsigned PhysReg) {
+ assert(VirtReg && "Spilling a physical register is illegal!"
+ " Must not have appropriate kill for the register or use exists beyond"
+ " the intended one.");
+ DOUT << " Spilling register " << TRI->getName(PhysReg)
+ << " containing %reg" << VirtReg;
+
+ if (!isVirtRegModified(VirtReg)) {
+ DOUT << " which has not been modified, so no store necessary!";
+ std::pair<MachineInstr*, unsigned> &LastUse = getVirtRegLastUse(VirtReg);
+ if (LastUse.first)
+ LastUse.first->getOperand(LastUse.second).setIsKill();
+ } else {
+ // Otherwise, there is a virtual register corresponding to this physical
+ // register. We only need to spill it into its stack slot if it has been
+ // modified.
+ const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+ int FrameIndex = getStackSpaceFor(VirtReg, RC);
+ DOUT << " to stack slot #" << FrameIndex;
+ // If the instruction reads the register that's spilled, (e.g. this can
+ // happen if it is a move to a physical register), then the spill
+ // instruction is not a kill.
+ bool isKill = !(I != MBB.end() && I->readsRegister(PhysReg));
+ TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC);
+ ++NumStores; // Update statistics
+ }
+
+ getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available
+
+ DOUT << "\n";
+ removePhysReg(PhysReg);
+}
+
+
+/// spillPhysReg - This method spills the specified physical register into the
+/// virtual register slot associated with it. If OnlyVirtRegs is set to true,
+/// then the request is ignored if the physical register does not contain a
+/// virtual register.
+///
+void RALocal::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+ unsigned PhysReg, bool OnlyVirtRegs) {
+ if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used!
+ assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!");
+ if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs)
+ spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg);
+ } else {
+ // If the selected register aliases any other registers, we must make
+ // sure that one of the aliases isn't alive.
+ for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+ *AliasSet; ++AliasSet)
+ if (PhysRegsUsed[*AliasSet] != -1 && // Spill aliased register.
+ PhysRegsUsed[*AliasSet] != -2) // If allocatable.
+ if (PhysRegsUsed[*AliasSet])
+ spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet);
+ }
+}
+
+
+/// assignVirtToPhysReg - This method updates local state so that we know
+/// that PhysReg is the proper container for VirtReg now. The physical
+/// register must not be used for anything else when this is called.
+///
+void RALocal::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
+ assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!");
+ // Update information to note the fact that this register was just used, and
+ // it holds VirtReg.
+ PhysRegsUsed[PhysReg] = VirtReg;
+ getVirt2PhysRegMapSlot(VirtReg) = PhysReg;
+ AddToPhysRegsUseOrder(PhysReg); // New use of PhysReg
+}
+
+
+/// isPhysRegAvailable - Return true if the specified physical register is free
+/// and available for use. This also includes checking to see if aliased
+/// registers are all free...
+///
+bool RALocal::isPhysRegAvailable(unsigned PhysReg) const {
+ if (PhysRegsUsed[PhysReg] != -1) return false;
+
+ // If the selected register aliases any other allocated registers, it is
+ // not free!
+ for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+ *AliasSet; ++AliasSet)
+ if (PhysRegsUsed[*AliasSet] >= 0) // Aliased register in use?
+ return false; // Can't use this reg then.
+ return true;
+}
+
+
+/// getFreeReg - Look to see if there is a free register available in the
+/// specified register class. If not, return 0.
+///
+unsigned RALocal::getFreeReg(const TargetRegisterClass *RC) {
+ // Get iterators defining the range of registers that are valid to allocate in
+ // this class, which also specifies the preferred allocation order.
+ TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
+ TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
+
+ for (; RI != RE; ++RI)
+ if (isPhysRegAvailable(*RI)) { // Is reg unused?
+ assert(*RI != 0 && "Cannot use register!");
+ return *RI; // Found an unused register!
+ }
+ return 0;
+}
+
+
+/// getReg - Find a physical register to hold the specified virtual
+/// register. If all compatible physical registers are used, this method spills
+/// the last used virtual register to the stack, and uses that register.
+///
+unsigned RALocal::getReg(MachineBasicBlock &MBB, MachineInstr *I,
+ unsigned VirtReg, bool NoFree) {
+ const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+
+ // First check to see if we have a free register of the requested type...
+ unsigned PhysReg = NoFree ? 0 : getFreeReg(RC);
+
+ // If we didn't find an unused register, scavenge one now!
+ if (PhysReg == 0) {
+ assert(!PhysRegsUseOrder.empty() && "No allocated registers??");
+
+ // Loop over all of the preallocated registers from the least recently used
+ // to the most recently used. When we find one that is capable of holding
+ // our register, use it.
+ for (unsigned i = 0; PhysReg == 0; ++i) {
+ assert(i != PhysRegsUseOrder.size() &&
+ "Couldn't find a register of the appropriate class!");
+
+ unsigned R = PhysRegsUseOrder[i];
+
+ // We can only use this register if it holds a virtual register (ie, it
+ // can be spilled). Do not use it if it is an explicitly allocated
+ // physical register!
+ assert(PhysRegsUsed[R] != -1 &&
+ "PhysReg in PhysRegsUseOrder, but is not allocated?");
+ if (PhysRegsUsed[R] && PhysRegsUsed[R] != -2) {
+ // If the current register is compatible, use it.
+ if (RC->contains(R)) {
+ PhysReg = R;
+ break;
+ } else {
+ // If one of the registers aliased to the current register is
+ // compatible, use it.
+ for (const unsigned *AliasIt = TRI->getAliasSet(R);
+ *AliasIt; ++AliasIt) {
+ if (RC->contains(*AliasIt) &&
+ // If this is pinned down for some reason, don't use it. For
+ // example, if CL is pinned, and we run across CH, don't use
+ // CH as justification for using scavenging ECX (which will
+ // fail).
+ PhysRegsUsed[*AliasIt] != 0 &&
+
+ // Make sure the register is allocatable. Don't allocate SIL on
+ // x86-32.
+ PhysRegsUsed[*AliasIt] != -2) {
+ PhysReg = *AliasIt; // Take an aliased register
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ assert(PhysReg && "Physical register not assigned!?!?");
+
+ // At this point PhysRegsUseOrder[i] is the least recently used register of
+ // compatible register class. Spill it to memory and reap its remains.
+ spillPhysReg(MBB, I, PhysReg);
+ }
+
+ // Now that we know which register we need to assign this to, do it now!
+ assignVirtToPhysReg(VirtReg, PhysReg);
+ return PhysReg;
+}
+
+
+/// reloadVirtReg - This method transforms the specified virtual
+/// register use to refer to a physical register. This method may do this in
+/// one of several ways: if the register is available in a physical register
+/// already, it uses that physical register. If the value is not in a physical
+/// register, and if there are physical registers available, it loads it into a
+/// register. If register pressure is high, and it is possible, it tries to
+/// fold the load of the virtual register into the instruction itself. It
+/// avoids doing this if register pressure is low to improve the chance that
+/// subsequent instructions can use the reloaded value. This method returns the
+/// modified instruction.
+///
+MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned OpNum,
+ SmallSet<unsigned, 4> &ReloadedRegs) {
+ unsigned VirtReg = MI->getOperand(OpNum).getReg();
+
+ // If the virtual register is already available, just update the instruction
+ // and return.
+ if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) {
+ MarkPhysRegRecentlyUsed(PR); // Already have this value available!
+ MI->getOperand(OpNum).setReg(PR); // Assign the input register
+ getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
+ return MI;
+ }
+
+ // Otherwise, we need to fold it into the current instruction, or reload it.
+ // If we have registers available to hold the value, use them.
+ const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+ unsigned PhysReg = getFreeReg(RC);
+ int FrameIndex = getStackSpaceFor(VirtReg, RC);
+
+ if (PhysReg) { // Register is available, allocate it!
+ assignVirtToPhysReg(VirtReg, PhysReg);
+ } else { // No registers available.
+ // Force some poor hapless value out of the register file to
+ // make room for the new register, and reload it.
+ PhysReg = getReg(MBB, MI, VirtReg, true);
+ }
+
+ markVirtRegModified(VirtReg, false); // Note that this reg was just reloaded
+
+ DOUT << " Reloading %reg" << VirtReg << " into "
+ << TRI->getName(PhysReg) << "\n";
+
+ // Add move instruction(s)
+ TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC);
+ ++NumLoads; // Update statistics
+
+ MF->getRegInfo().setPhysRegUsed(PhysReg);
+ MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register
+ getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
+
+ if (!ReloadedRegs.insert(PhysReg)) {
+ cerr << "Ran out of registers during register allocation!\n";
+ if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {
+ cerr << "Please check your inline asm statement for invalid "
+ << "constraints:\n";
+ MI->print(cerr.stream(), TM);
+ }
+ exit(1);
+ }
+ for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
+ *SubRegs; ++SubRegs) {
+ if (!ReloadedRegs.insert(*SubRegs)) {
+ cerr << "Ran out of registers during register allocation!\n";
+ if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {
+ cerr << "Please check your inline asm statement for invalid "
+ << "constraints:\n";
+ MI->print(cerr.stream(), TM);
+ }
+ exit(1);
+ }
+ }
+
+ return MI;
+}
+
+/// isReadModWriteImplicitKill - True if this is an implicit kill for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
+ MO.isDef() && !MO.isDead())
+ return true;
+ }
+ return false;
+}
+
+/// isReadModWriteImplicitDef - True if this is an implicit def for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
+ !MO.isDef() && MO.isKill())
+ return true;
+ }
+ return false;
+}
+
+// precedes - Helper function to determine with MachineInstr A
+// precedes MachineInstr B within the same MBB.
+static bool precedes(MachineBasicBlock::iterator A,
+ MachineBasicBlock::iterator B) {
+ if (A == B)
+ return false;
+
+ MachineBasicBlock::iterator I = A->getParent()->begin();
+ while (I != A->getParent()->end()) {
+ if (I == A)
+ return true;
+ else if (I == B)
+ return false;
+
+ ++I;
+ }
+
+ return false;
+}
+
+/// ComputeLocalLiveness - Computes liveness of registers within a basic
+/// block, setting the killed/dead flags as appropriate.
+void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
+ MachineRegisterInfo& MRI = MBB.getParent()->getRegInfo();
+ // Keep track of the most recently seen previous use or def of each reg,
+ // so that we can update them with dead/kill markers.
+ DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > LastUseDef;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = I->getOperand(i);
+ // Uses don't trigger any flags, but we need to save
+ // them for later. Also, we have to process these
+ // _before_ processing the defs, since an instr
+ // uses regs before it defs them.
+ if (MO.isReg() && MO.getReg() && MO.isUse()) {
+ LastUseDef[MO.getReg()] = std::make_pair(I, i);
+
+
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue;
+
+ const unsigned* Aliases = TRI->getAliasSet(MO.getReg());
+ if (Aliases) {
+ while (*Aliases) {
+ DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
+ alias = LastUseDef.find(*Aliases);
+
+ if (alias != LastUseDef.end() && alias->second.first != I)
+ LastUseDef[*Aliases] = std::make_pair(I, i);
+
+ ++Aliases;
+ }
+ }
+ }
+ }
+
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = I->getOperand(i);
+ // Defs others than 2-addr redefs _do_ trigger flag changes:
+ // - A def followed by a def is dead
+ // - A use followed by a def is a kill
+ if (MO.isReg() && MO.getReg() && MO.isDef()) {
+ DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
+ last = LastUseDef.find(MO.getReg());
+ if (last != LastUseDef.end()) {
+ // Check if this is a two address instruction. If so, then
+ // the def does not kill the use.
+ if (last->second.first == I &&
+ I->isRegTiedToUseOperand(i))
+ continue;
+
+ MachineOperand& lastUD =
+ last->second.first->getOperand(last->second.second);
+ if (lastUD.isDef())
+ lastUD.setIsDead(true);
+ else
+ lastUD.setIsKill(true);
+ }
+
+ LastUseDef[MO.getReg()] = std::make_pair(I, i);
+ }
+ }
+ }
+
+ // Live-out (of the function) registers contain return values of the function,
+ // so we need to make sure they are alive at return time.
+ if (!MBB.empty() && MBB.back().getDesc().isReturn()) {
+ MachineInstr* Ret = &MBB.back();
+ for (MachineRegisterInfo::liveout_iterator
+ I = MF->getRegInfo().liveout_begin(),
+ E = MF->getRegInfo().liveout_end(); I != E; ++I)
+ if (!Ret->readsRegister(*I)) {
+ Ret->addOperand(MachineOperand::CreateReg(*I, false, true));
+ LastUseDef[*I] = std::make_pair(Ret, Ret->getNumOperands()-1);
+ }
+ }
+
+ // Finally, loop over the final use/def of each reg
+ // in the block and determine if it is dead.
+ for (DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
+ I = LastUseDef.begin(), E = LastUseDef.end(); I != E; ++I) {
+ MachineInstr* MI = I->second.first;
+ unsigned idx = I->second.second;
+ MachineOperand& MO = MI->getOperand(idx);
+
+ bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(MO.getReg());
+
+ // A crude approximation of "live-out" calculation
+ bool usedOutsideBlock = isPhysReg ? false :
+ UsedInMultipleBlocks.test(MO.getReg() -
+ TargetRegisterInfo::FirstVirtualRegister);
+ if (!isPhysReg && !usedOutsideBlock)
+ for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()),
+ UE = MRI.reg_end(); UI != UE; ++UI)
+ // Two cases:
+ // - used in another block
+ // - used in the same block before it is defined (loop)
+ if (UI->getParent() != &MBB ||
+ (MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI))) {
+ UsedInMultipleBlocks.set(MO.getReg() -
+ TargetRegisterInfo::FirstVirtualRegister);
+ usedOutsideBlock = true;
+ break;
+ }
+
+ // Physical registers and those that are not live-out of the block
+ // are killed/dead at their last use/def within this block.
+ if (isPhysReg || !usedOutsideBlock) {
+ if (MO.isUse()) {
+ // Don't mark uses that are tied to defs as kills.
+ if (!MI->isRegTiedToDefOperand(idx))
+ MO.setIsKill(true);
+ } else
+ MO.setIsDead(true);
+ }
+ }
+}
+
+void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
+ // loop over each instruction
+ MachineBasicBlock::iterator MII = MBB.begin();
+
+ DEBUG(const BasicBlock *LBB = MBB.getBasicBlock();
+ if (LBB) DOUT << "\nStarting RegAlloc of BB: " << LBB->getName());
+
+ // Add live-in registers as active.
+ for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(),
+ E = MBB.livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ MF->getRegInfo().setPhysRegUsed(Reg);
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ AddToPhysRegsUseOrder(Reg);
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ *SubRegs; ++SubRegs) {
+ if (PhysRegsUsed[*SubRegs] != -2) {
+ AddToPhysRegsUseOrder(*SubRegs);
+ PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
+ MF->getRegInfo().setPhysRegUsed(*SubRegs);
+ }
+ }
+ }
+
+ ComputeLocalLiveness(MBB);
+
+ // Otherwise, sequentially allocate each instruction in the MBB.
+ while (MII != MBB.end()) {
+ MachineInstr *MI = MII++;
+ const TargetInstrDesc &TID = MI->getDesc();
+ DEBUG(DOUT << "\nStarting RegAlloc of: " << *MI;
+ DOUT << " Regs have values: ";
+ for (unsigned i = 0; i != TRI->getNumRegs(); ++i)
+ if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
+ DOUT << "[" << TRI->getName(i)
+ << ",%reg" << PhysRegsUsed[i] << "] ";
+ DOUT << "\n");
+
+ // Loop over the implicit uses, making sure that they are at the head of the
+ // use order list, so they don't get reallocated.
+ if (TID.ImplicitUses) {
+ for (const unsigned *ImplicitUses = TID.ImplicitUses;
+ *ImplicitUses; ++ImplicitUses)
+ MarkPhysRegRecentlyUsed(*ImplicitUses);
+ }
+
+ SmallVector<unsigned, 8> Kills;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isKill()) {
+ if (!MO.isImplicit())
+ Kills.push_back(MO.getReg());
+ else if (!isReadModWriteImplicitKill(MI, MO.getReg()))
+ // These are extra physical register kills when a sub-register
+ // is defined (def of a sub-register is a read/mod/write of the
+ // larger registers). Ignore.
+ Kills.push_back(MO.getReg());
+ }
+ }
+
+ // If any physical regs are earlyclobber, spill any value they might
+ // have in them, then mark them unallocatable.
+ // If any virtual regs are earlyclobber, allocate them now (before
+ // freeing inputs that are killed).
+ if (MI->getOpcode()==TargetInstrInfo::INLINEASM) {
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() && MO.isEarlyClobber() &&
+ MO.getReg()) {
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned DestVirtReg = MO.getReg();
+ unsigned DestPhysReg;
+
+ // If DestVirtReg already has a value, use it.
+ if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg)))
+ DestPhysReg = getReg(MBB, MI, DestVirtReg);
+ MF->getRegInfo().setPhysRegUsed(DestPhysReg);
+ markVirtRegModified(DestVirtReg);
+ getVirtRegLastUse(DestVirtReg) =
+ std::make_pair((MachineInstr*)0, 0);
+ DOUT << " Assigning " << TRI->getName(DestPhysReg)
+ << " to %reg" << DestVirtReg << "\n";
+ MO.setReg(DestPhysReg); // Assign the earlyclobber register
+ } else {
+ unsigned Reg = MO.getReg();
+ if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP.
+ // These are extra physical register defs when a sub-register
+ // is defined (def of a sub-register is a read/mod/write of the
+ // larger registers). Ignore.
+ if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
+
+ MF->getRegInfo().setPhysRegUsed(Reg);
+ spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ AddToPhysRegsUseOrder(Reg);
+
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ *SubRegs; ++SubRegs) {
+ if (PhysRegsUsed[*SubRegs] != -2) {
+ MF->getRegInfo().setPhysRegUsed(*SubRegs);
+ PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
+ AddToPhysRegsUseOrder(*SubRegs);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Get the used operands into registers. This has the potential to spill
+ // incoming values if we are out of registers. Note that we completely
+ // ignore physical register uses here. We assume that if an explicit
+ // physical register is referenced by the instruction, that it is guaranteed
+ // to be live-in, or the input is badly hosed.
+ //
+ SmallSet<unsigned, 4> ReloadedRegs;
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ // here we are looking for only used operands (never def&use)
+ if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ MI = reloadVirtReg(MBB, MI, i, ReloadedRegs);
+ }
+
+ // If this instruction is the last user of this register, kill the
+ // value, freeing the register being used, so it doesn't need to be
+ // spilled to memory.
+ //
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+ unsigned VirtReg = Kills[i];
+ unsigned PhysReg = VirtReg;
+ if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+ // If the virtual register was never materialized into a register, it
+ // might not be in the map, but it won't hurt to zero it out anyway.
+ unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+ PhysReg = PhysRegSlot;
+ PhysRegSlot = 0;
+ } else if (PhysRegsUsed[PhysReg] == -2) {
+ // Unallocatable register dead, ignore.
+ continue;
+ } else {
+ assert((!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1) &&
+ "Silently clearing a virtual register?");
+ }
+
+ if (PhysReg) {
+ DOUT << " Last use of " << TRI->getName(PhysReg)
+ << "[%reg" << VirtReg <<"], removing it from live set\n";
+ removePhysReg(PhysReg);
+ for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
+ *SubRegs; ++SubRegs) {
+ if (PhysRegsUsed[*SubRegs] != -2) {
+ DOUT << " Last use of "
+ << TRI->getName(*SubRegs)
+ << "[%reg" << VirtReg <<"], removing it from live set\n";
+ removePhysReg(*SubRegs);
+ }
+ }
+ }
+ }
+
+ // Loop over all of the operands of the instruction, spilling registers that
+ // are defined, and marking explicit destinations in the PhysRegsUsed map.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() && !MO.isImplicit() && MO.getReg() &&
+ !MO.isEarlyClobber() &&
+ TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+ if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP.
+ // These are extra physical register defs when a sub-register
+ // is defined (def of a sub-register is a read/mod/write of the
+ // larger registers). Ignore.
+ if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
+
+ MF->getRegInfo().setPhysRegUsed(Reg);
+ spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ AddToPhysRegsUseOrder(Reg);
+
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ *SubRegs; ++SubRegs) {
+ if (PhysRegsUsed[*SubRegs] != -2) {
+ MF->getRegInfo().setPhysRegUsed(*SubRegs);
+ PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
+ AddToPhysRegsUseOrder(*SubRegs);
+ }
+ }
+ }
+ }
+
+ // Loop over the implicit defs, spilling them as well.
+ if (TID.ImplicitDefs) {
+ for (const unsigned *ImplicitDefs = TID.ImplicitDefs;
+ *ImplicitDefs; ++ImplicitDefs) {
+ unsigned Reg = *ImplicitDefs;
+ if (PhysRegsUsed[Reg] != -2) {
+ spillPhysReg(MBB, MI, Reg, true);
+ AddToPhysRegsUseOrder(Reg);
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ }
+ MF->getRegInfo().setPhysRegUsed(Reg);
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ *SubRegs; ++SubRegs) {
+ if (PhysRegsUsed[*SubRegs] != -2) {
+ AddToPhysRegsUseOrder(*SubRegs);
+ PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
+ MF->getRegInfo().setPhysRegUsed(*SubRegs);
+ }
+ }
+ }
+ }
+
+ SmallVector<unsigned, 8> DeadDefs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDead())
+ DeadDefs.push_back(MO.getReg());
+ }
+
+ // Okay, we have allocated all of the source operands and spilled any values
+ // that would be destroyed by defs of this instruction. Loop over the
+ // explicit defs and assign them to a register, spilling incoming values if
+ // we need to scavenge a register.
+ //
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() && MO.getReg() &&
+ !MO.isEarlyClobber() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned DestVirtReg = MO.getReg();
+ unsigned DestPhysReg;
+
+ // If DestVirtReg already has a value, use it.
+ if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg)))
+ DestPhysReg = getReg(MBB, MI, DestVirtReg);
+ MF->getRegInfo().setPhysRegUsed(DestPhysReg);
+ markVirtRegModified(DestVirtReg);
+ getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0);
+ DOUT << " Assigning " << TRI->getName(DestPhysReg)
+ << " to %reg" << DestVirtReg << "\n";
+ MO.setReg(DestPhysReg); // Assign the output register
+ }
+ }
+
+ // If this instruction defines any registers that are immediately dead,
+ // kill them now.
+ //
+ for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) {
+ unsigned VirtReg = DeadDefs[i];
+ unsigned PhysReg = VirtReg;
+ if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+ unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+ PhysReg = PhysRegSlot;
+ assert(PhysReg != 0);
+ PhysRegSlot = 0;
+ } else if (PhysRegsUsed[PhysReg] == -2) {
+ // Unallocatable register dead, ignore.
+ continue;
+ }
+
+ if (PhysReg) {
+ DOUT << " Register " << TRI->getName(PhysReg)
+ << " [%reg" << VirtReg
+ << "] is never used, removing it from live set\n";
+ removePhysReg(PhysReg);
+ for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ DOUT << " Register " << TRI->getName(*AliasSet)
+ << " [%reg" << *AliasSet
+ << "] is never used, removing it from live set\n";
+ removePhysReg(*AliasSet);
+ }
+ }
+ }
+ }
+
+ // Finally, if this is a noop copy instruction, zap it. (Except that if
+ // the copy is dead, it must be kept to avoid messing up liveness info for
+ // the register scavenger. See pr4100.)
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+ SrcReg == DstReg && DeadDefs.empty())
+ MBB.erase(MI);
+ }
+
+ MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
+
+ // Spill all physical registers holding virtual registers now.
+ for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
+ if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) {
+ if (unsigned VirtReg = PhysRegsUsed[i])
+ spillVirtReg(MBB, MI, VirtReg, i);
+ else
+ removePhysReg(i);
+ }
+
+#if 0
+ // This checking code is very expensive.
+ bool AllOk = true;
+ for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
+ e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
+ if (unsigned PR = Virt2PhysRegMap[i]) {
+ cerr << "Register still mapped: " << i << " -> " << PR << "\n";
+ AllOk = false;
+ }
+ assert(AllOk && "Virtual registers still in phys regs?");
+#endif
+
+ // Clear any physical register which appear live at the end of the basic
+ // block, but which do not hold any virtual registers. e.g., the stack
+ // pointer.
+ PhysRegsUseOrder.clear();
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool RALocal::runOnMachineFunction(MachineFunction &Fn) {
+ DOUT << "Machine Function " << "\n";
+ MF = &Fn;
+ TM = &Fn.getTarget();
+ TRI = TM->getRegisterInfo();
+ TII = TM->getInstrInfo();
+
+ PhysRegsUsed.assign(TRI->getNumRegs(), -1);
+
+ // At various places we want to efficiently check to see whether a register
+ // is allocatable. To handle this, we mark all unallocatable registers as
+ // being pinned down, permanently.
+ {
+ BitVector Allocable = TRI->getAllocatableSet(Fn);
+ for (unsigned i = 0, e = Allocable.size(); i != e; ++i)
+ if (!Allocable[i])
+ PhysRegsUsed[i] = -2; // Mark the reg unallocable.
+ }
+
+ // initialize the virtual->physical register map to have a 'null'
+ // mapping for all virtual registers
+ unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg();
+ StackSlotForVirtReg.grow(LastVirtReg);
+ Virt2PhysRegMap.grow(LastVirtReg);
+ Virt2LastUseMap.grow(LastVirtReg);
+ VirtRegModified.resize(LastVirtReg+1-TargetRegisterInfo::FirstVirtualRegister);
+ UsedInMultipleBlocks.resize(LastVirtReg+1-TargetRegisterInfo::FirstVirtualRegister);
+
+ // Loop over all of the basic blocks, eliminating virtual register references
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB)
+ AllocateBasicBlock(*MBB);
+
+ StackSlotForVirtReg.clear();
+ PhysRegsUsed.clear();
+ VirtRegModified.clear();
+ UsedInMultipleBlocks.clear();
+ Virt2PhysRegMap.clear();
+ Virt2LastUseMap.clear();
+ return true;
+}
+
+FunctionPass *llvm::createLocalRegisterAllocator() {
+ return new RALocal();
+}
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
new file mode 100644
index 0000000..61450a7
--- /dev/null
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -0,0 +1,871 @@
+//===------ RegAllocPBQP.cpp ---- PBQP Register Allocator -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
+// register allocator for LLVM. This allocator works by constructing a PBQP
+// problem representing the register allocation problem under consideration,
+// solving this using a PBQP solver, and mapping the solution back to a
+// register assignment. If any variables are selected for spilling then spill
+// code is inserted and the process repeated.
+//
+// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
+// for register allocation. For more information on PBQP for register
+// allocation, see the following papers:
+//
+// (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
+// PBQP. In Proceedings of the 7th Joint Modular Languages Conference
+// (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
+//
+// (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
+// architectures. In Proceedings of the Joint Conference on Languages,
+// Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
+// NY, USA, 139-148.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+
+#include "PBQP.h"
+#include "VirtRegMap.h"
+#include "VirtRegRewriter.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <limits>
+#include <map>
+#include <memory>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+static RegisterRegAlloc
+registerPBQPRepAlloc("pbqp", "PBQP register allocator",
+ createPBQPRegisterAllocator);
+
+namespace {
+
+ //!
+ //! PBQP based allocators solve the register allocation problem by mapping
+ //! register allocation problems to Partitioned Boolean Quadratic
+ //! Programming problems.
+ class VISIBILITY_HIDDEN PBQPRegAlloc : public MachineFunctionPass {
+ public:
+
+ static char ID;
+
+ //! Construct a PBQP register allocator.
+ PBQPRegAlloc() : MachineFunctionPass((intptr_t)&ID) {}
+
+ //! Return the pass name.
+ virtual const char* getPassName() const throw() {
+ return "PBQP Register Allocator";
+ }
+
+ //! PBQP analysis usage.
+ virtual void getAnalysisUsage(AnalysisUsage &au) const {
+ au.addRequired<LiveIntervals>();
+ au.addRequiredTransitive<RegisterCoalescer>();
+ au.addRequired<LiveStacks>();
+ au.addPreserved<LiveStacks>();
+ au.addRequired<MachineLoopInfo>();
+ au.addPreserved<MachineLoopInfo>();
+ au.addRequired<VirtRegMap>();
+ MachineFunctionPass::getAnalysisUsage(au);
+ }
+
+ //! Perform register allocation
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
+ typedef std::vector<const LiveInterval*> Node2LIMap;
+ typedef std::vector<unsigned> AllowedSet;
+ typedef std::vector<AllowedSet> AllowedSetMap;
+ typedef std::set<unsigned> RegSet;
+ typedef std::pair<unsigned, unsigned> RegPair;
+ typedef std::map<RegPair, PBQPNum> CoalesceMap;
+
+ typedef std::set<LiveInterval*> LiveIntervalSet;
+
+ MachineFunction *mf;
+ const TargetMachine *tm;
+ const TargetRegisterInfo *tri;
+ const TargetInstrInfo *tii;
+ const MachineLoopInfo *loopInfo;
+ MachineRegisterInfo *mri;
+
+ LiveIntervals *lis;
+ LiveStacks *lss;
+ VirtRegMap *vrm;
+
+ LI2NodeMap li2Node;
+ Node2LIMap node2LI;
+ AllowedSetMap allowedSets;
+ LiveIntervalSet vregIntervalsToAlloc,
+ emptyVRegIntervals;
+
+
+ //! Builds a PBQP cost vector.
+ template <typename RegContainer>
+ PBQPVector* buildCostVector(unsigned vReg,
+ const RegContainer &allowed,
+ const CoalesceMap &cealesces,
+ PBQPNum spillCost) const;
+
+ //! \brief Builds a PBQP interference matrix.
+ //!
+ //! @return Either a pointer to a non-zero PBQP matrix representing the
+ //! allocation option costs, or a null pointer for a zero matrix.
+ //!
+ //! Expects allowed sets for two interfering LiveIntervals. These allowed
+ //! sets should contain only allocable registers from the LiveInterval's
+ //! register class, with any interfering pre-colored registers removed.
+ template <typename RegContainer>
+ PBQPMatrix* buildInterferenceMatrix(const RegContainer &allowed1,
+ const RegContainer &allowed2) const;
+
+ //!
+ //! Expects allowed sets for two potentially coalescable LiveIntervals,
+ //! and an estimated benefit due to coalescing. The allowed sets should
+ //! contain only allocable registers from the LiveInterval's register
+ //! classes, with any interfering pre-colored registers removed.
+ template <typename RegContainer>
+ PBQPMatrix* buildCoalescingMatrix(const RegContainer &allowed1,
+ const RegContainer &allowed2,
+ PBQPNum cBenefit) const;
+
+ //! \brief Finds coalescing opportunities and returns them as a map.
+ //!
+ //! Any entries in the map are guaranteed coalescable, even if their
+ //! corresponding live intervals overlap.
+ CoalesceMap findCoalesces();
+
+ //! \brief Finds the initial set of vreg intervals to allocate.
+ void findVRegIntervalsToAlloc();
+
+ //! \brief Constructs a PBQP problem representation of the register
+ //! allocation problem for this function.
+ //!
+ //! @return a PBQP solver object for the register allocation problem.
+ pbqp* constructPBQPProblem();
+
+ //! \brief Adds a stack interval if the given live interval has been
+ //! spilled. Used to support stack slot coloring.
+ void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri);
+
+ //! \brief Given a solved PBQP problem maps this solution back to a register
+ //! assignment.
+ bool mapPBQPToRegAlloc(pbqp *problem);
+
+ //! \brief Postprocessing before final spilling. Sets basic block "live in"
+ //! variables.
+ void finalizeAlloc() const;
+
+ };
+
+ char PBQPRegAlloc::ID = 0;
+}
+
+
+template <typename RegContainer>
+PBQPVector* PBQPRegAlloc::buildCostVector(unsigned vReg,
+ const RegContainer &allowed,
+ const CoalesceMap &coalesces,
+ PBQPNum spillCost) const {
+
+ typedef typename RegContainer::const_iterator AllowedItr;
+
+ // Allocate vector. Additional element (0th) used for spill option
+ PBQPVector *v = new PBQPVector(allowed.size() + 1);
+
+ (*v)[0] = spillCost;
+
+ // Iterate over the allowed registers inserting coalesce benefits if there
+ // are any.
+ unsigned ai = 0;
+ for (AllowedItr itr = allowed.begin(), end = allowed.end();
+ itr != end; ++itr, ++ai) {
+
+ unsigned pReg = *itr;
+
+ CoalesceMap::const_iterator cmItr =
+ coalesces.find(RegPair(vReg, pReg));
+
+ // No coalesce - on to the next preg.
+ if (cmItr == coalesces.end())
+ continue;
+
+ // We have a coalesce - insert the benefit.
+ (*v)[ai + 1] = -cmItr->second;
+ }
+
+ return v;
+}
+
+template <typename RegContainer>
+PBQPMatrix* PBQPRegAlloc::buildInterferenceMatrix(
+ const RegContainer &allowed1, const RegContainer &allowed2) const {
+
+ typedef typename RegContainer::const_iterator RegContainerIterator;
+
+ // Construct a PBQP matrix representing the cost of allocation options. The
+ // rows and columns correspond to the allocation options for the two live
+ // intervals. Elements will be infinite where corresponding registers alias,
+ // since we cannot allocate aliasing registers to interfering live intervals.
+ // All other elements (non-aliasing combinations) will have zero cost. Note
+ // that the spill option (element 0,0) has zero cost, since we can allocate
+ // both intervals to memory safely (the cost for each individual allocation
+ // to memory is accounted for by the cost vectors for each live interval).
+ PBQPMatrix *m = new PBQPMatrix(allowed1.size() + 1, allowed2.size() + 1);
+
+ // Assume this is a zero matrix until proven otherwise. Zero matrices occur
+ // between interfering live ranges with non-overlapping register sets (e.g.
+ // non-overlapping reg classes, or disjoint sets of allowed regs within the
+ // same class). The term "overlapping" is used advisedly: sets which do not
+ // intersect, but contain registers which alias, will have non-zero matrices.
+ // We optimize zero matrices away to improve solver speed.
+ bool isZeroMatrix = true;
+
+
+ // Row index. Starts at 1, since the 0th row is for the spill option, which
+ // is always zero.
+ unsigned ri = 1;
+
+ // Iterate over allowed sets, insert infinities where required.
+ for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end();
+ a1Itr != a1End; ++a1Itr) {
+
+ // Column index, starts at 1 as for row index.
+ unsigned ci = 1;
+ unsigned reg1 = *a1Itr;
+
+ for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end();
+ a2Itr != a2End; ++a2Itr) {
+
+ unsigned reg2 = *a2Itr;
+
+ // If the row/column regs are identical or alias insert an infinity.
+ if ((reg1 == reg2) || tri->areAliases(reg1, reg2)) {
+ (*m)[ri][ci] = std::numeric_limits<PBQPNum>::infinity();
+ isZeroMatrix = false;
+ }
+
+ ++ci;
+ }
+
+ ++ri;
+ }
+
+ // If this turns out to be a zero matrix...
+ if (isZeroMatrix) {
+ // free it and return null.
+ delete m;
+ return 0;
+ }
+
+ // ...otherwise return the cost matrix.
+ return m;
+}
+
+template <typename RegContainer>
+PBQPMatrix* PBQPRegAlloc::buildCoalescingMatrix(
+ const RegContainer &allowed1, const RegContainer &allowed2,
+ PBQPNum cBenefit) const {
+
+ typedef typename RegContainer::const_iterator RegContainerIterator;
+
+ // Construct a PBQP Matrix representing the benefits of coalescing. As with
+ // interference matrices the rows and columns represent allowed registers
+ // for the LiveIntervals which are (potentially) to be coalesced. The amount
+ // -cBenefit will be placed in any element representing the same register
+ // for both intervals.
+ PBQPMatrix *m = new PBQPMatrix(allowed1.size() + 1, allowed2.size() + 1);
+
+ // Reset costs to zero.
+ m->reset(0);
+
+ // Assume the matrix is zero till proven otherwise. Zero matrices will be
+ // optimized away as in the interference case.
+ bool isZeroMatrix = true;
+
+ // Row index. Starts at 1, since the 0th row is for the spill option, which
+ // is always zero.
+ unsigned ri = 1;
+
+ // Iterate over the allowed sets, insert coalescing benefits where
+ // appropriate.
+ for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end();
+ a1Itr != a1End; ++a1Itr) {
+
+ // Column index, starts at 1 as for row index.
+ unsigned ci = 1;
+ unsigned reg1 = *a1Itr;
+
+ for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end();
+ a2Itr != a2End; ++a2Itr) {
+
+ // If the row and column represent the same register insert a beneficial
+ // cost to preference this allocation - it would allow us to eliminate a
+ // move instruction.
+ if (reg1 == *a2Itr) {
+ (*m)[ri][ci] = -cBenefit;
+ isZeroMatrix = false;
+ }
+
+ ++ci;
+ }
+
+ ++ri;
+ }
+
+ // If this turns out to be a zero matrix...
+ if (isZeroMatrix) {
+ // ...free it and return null.
+ delete m;
+ return 0;
+ }
+
+ return m;
+}
+
+PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() {
+
+ typedef MachineFunction::const_iterator MFIterator;
+ typedef MachineBasicBlock::const_iterator MBBIterator;
+ typedef LiveInterval::const_vni_iterator VNIIterator;
+
+ CoalesceMap coalescesFound;
+
+ // To find coalesces we need to iterate over the function looking for
+ // copy instructions.
+ for (MFIterator bbItr = mf->begin(), bbEnd = mf->end();
+ bbItr != bbEnd; ++bbItr) {
+
+ const MachineBasicBlock *mbb = &*bbItr;
+
+ for (MBBIterator iItr = mbb->begin(), iEnd = mbb->end();
+ iItr != iEnd; ++iItr) {
+
+ const MachineInstr *instr = &*iItr;
+ unsigned srcReg, dstReg, srcSubReg, dstSubReg;
+
+ // If this isn't a copy then continue to the next instruction.
+ if (!tii->isMoveInstr(*instr, srcReg, dstReg, srcSubReg, dstSubReg))
+ continue;
+
+ // If the registers are already the same our job is nice and easy.
+ if (dstReg == srcReg)
+ continue;
+
+ bool srcRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(srcReg),
+ dstRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(dstReg);
+
+ // If both registers are physical then we can't coalesce.
+ if (srcRegIsPhysical && dstRegIsPhysical)
+ continue;
+
+ // If it's a copy that includes a virtual register but the source and
+ // destination classes differ then we can't coalesce, so continue with
+ // the next instruction.
+ const TargetRegisterClass *srcRegClass = srcRegIsPhysical ?
+ tri->getPhysicalRegisterRegClass(srcReg) : mri->getRegClass(srcReg);
+
+ const TargetRegisterClass *dstRegClass = dstRegIsPhysical ?
+ tri->getPhysicalRegisterRegClass(dstReg) : mri->getRegClass(dstReg);
+
+ if (srcRegClass != dstRegClass)
+ continue;
+
+ // We also need any physical regs to be allocable, coalescing with
+ // a non-allocable register is invalid.
+ if (srcRegIsPhysical) {
+ if (std::find(srcRegClass->allocation_order_begin(*mf),
+ srcRegClass->allocation_order_end(*mf), srcReg) ==
+ srcRegClass->allocation_order_end(*mf))
+ continue;
+ }
+
+ if (dstRegIsPhysical) {
+ if (std::find(dstRegClass->allocation_order_begin(*mf),
+ dstRegClass->allocation_order_end(*mf), dstReg) ==
+ dstRegClass->allocation_order_end(*mf))
+ continue;
+ }
+
+ // If we've made it here we have a copy with compatible register classes.
+ // We can probably coalesce, but we need to consider overlap.
+ const LiveInterval *srcLI = &lis->getInterval(srcReg),
+ *dstLI = &lis->getInterval(dstReg);
+
+ if (srcLI->overlaps(*dstLI)) {
+ // Even in the case of an overlap we might still be able to coalesce,
+ // but we need to make sure that no definition of either range occurs
+ // while the other range is live.
+
+ // Otherwise start by assuming we're ok.
+ bool badDef = false;
+
+ // Test all defs of the source range.
+ for (VNIIterator
+ vniItr = srcLI->vni_begin(), vniEnd = srcLI->vni_end();
+ vniItr != vniEnd; ++vniItr) {
+
+ // If we find a def that kills the coalescing opportunity then
+ // record it and break from the loop.
+ if (dstLI->liveAt((*vniItr)->def)) {
+ badDef = true;
+ break;
+ }
+ }
+
+ // If we have a bad def give up, continue to the next instruction.
+ if (badDef)
+ continue;
+
+ // Otherwise test definitions of the destination range.
+ for (VNIIterator
+ vniItr = dstLI->vni_begin(), vniEnd = dstLI->vni_end();
+ vniItr != vniEnd; ++vniItr) {
+
+ // We want to make sure we skip the copy instruction itself.
+ if ((*vniItr)->copy == instr)
+ continue;
+
+ if (srcLI->liveAt((*vniItr)->def)) {
+ badDef = true;
+ break;
+ }
+ }
+
+ // As before a bad def we give up and continue to the next instr.
+ if (badDef)
+ continue;
+ }
+
+ // If we make it to here then either the ranges didn't overlap, or they
+ // did, but none of their definitions would prevent us from coalescing.
+ // We're good to go with the coalesce.
+
+ float cBenefit = powf(10.0f, loopInfo->getLoopDepth(mbb)) / 5.0;
+
+ coalescesFound[RegPair(srcReg, dstReg)] = cBenefit;
+ coalescesFound[RegPair(dstReg, srcReg)] = cBenefit;
+ }
+
+ }
+
+ return coalescesFound;
+}
+
+void PBQPRegAlloc::findVRegIntervalsToAlloc() {
+
+ // Iterate over all live ranges.
+ for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
+ itr != end; ++itr) {
+
+ // Ignore physical ones.
+ if (TargetRegisterInfo::isPhysicalRegister(itr->first))
+ continue;
+
+ LiveInterval *li = itr->second;
+
+ // If this live interval is non-empty we will use pbqp to allocate it.
+ // Empty intervals we allocate in a simple post-processing stage in
+ // finalizeAlloc.
+ if (!li->empty()) {
+ vregIntervalsToAlloc.insert(li);
+ }
+ else {
+ emptyVRegIntervals.insert(li);
+ }
+ }
+}
+
+pbqp* PBQPRegAlloc::constructPBQPProblem() {
+
+ typedef std::vector<const LiveInterval*> LIVector;
+ typedef std::vector<unsigned> RegVector;
+
+ // This will store the physical intervals for easy reference.
+ LIVector physIntervals;
+
+ // Start by clearing the old node <-> live interval mappings & allowed sets
+ li2Node.clear();
+ node2LI.clear();
+ allowedSets.clear();
+
+ // Populate physIntervals, update preg use:
+ for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
+ itr != end; ++itr) {
+
+ if (TargetRegisterInfo::isPhysicalRegister(itr->first)) {
+ physIntervals.push_back(itr->second);
+ mri->setPhysRegUsed(itr->second->reg);
+ }
+ }
+
+ // Iterate over vreg intervals, construct live interval <-> node number
+ // mappings.
+ for (LiveIntervalSet::const_iterator
+ itr = vregIntervalsToAlloc.begin(), end = vregIntervalsToAlloc.end();
+ itr != end; ++itr) {
+ const LiveInterval *li = *itr;
+
+ li2Node[li] = node2LI.size();
+ node2LI.push_back(li);
+ }
+
+ // Get the set of potential coalesces.
+ CoalesceMap coalesces(findCoalesces());
+
+ // Construct a PBQP solver for this problem
+ pbqp *solver = alloc_pbqp(vregIntervalsToAlloc.size());
+
+ // Resize allowedSets container appropriately.
+ allowedSets.resize(vregIntervalsToAlloc.size());
+
+ // Iterate over virtual register intervals to compute allowed sets...
+ for (unsigned node = 0; node < node2LI.size(); ++node) {
+
+ // Grab pointers to the interval and its register class.
+ const LiveInterval *li = node2LI[node];
+ const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
+
+ // Start by assuming all allocable registers in the class are allowed...
+ RegVector liAllowed(liRC->allocation_order_begin(*mf),
+ liRC->allocation_order_end(*mf));
+
+ // Eliminate the physical registers which overlap with this range, along
+ // with all their aliases.
+ for (LIVector::iterator pItr = physIntervals.begin(),
+ pEnd = physIntervals.end(); pItr != pEnd; ++pItr) {
+
+ if (!li->overlaps(**pItr))
+ continue;
+
+ unsigned pReg = (*pItr)->reg;
+
+ // If we get here then the live intervals overlap, but we're still ok
+ // if they're coalescable.
+ if (coalesces.find(RegPair(li->reg, pReg)) != coalesces.end())
+ continue;
+
+ // If we get here then we have a genuine exclusion.
+
+ // Remove the overlapping reg...
+ RegVector::iterator eraseItr =
+ std::find(liAllowed.begin(), liAllowed.end(), pReg);
+
+ if (eraseItr != liAllowed.end())
+ liAllowed.erase(eraseItr);
+
+ const unsigned *aliasItr = tri->getAliasSet(pReg);
+
+ if (aliasItr != 0) {
+ // ...and its aliases.
+ for (; *aliasItr != 0; ++aliasItr) {
+ RegVector::iterator eraseItr =
+ std::find(liAllowed.begin(), liAllowed.end(), *aliasItr);
+
+ if (eraseItr != liAllowed.end()) {
+ liAllowed.erase(eraseItr);
+ }
+ }
+ }
+ }
+
+ // Copy the allowed set into a member vector for use when constructing cost
+ // vectors & matrices, and mapping PBQP solutions back to assignments.
+ allowedSets[node] = AllowedSet(liAllowed.begin(), liAllowed.end());
+
+ // Set the spill cost to the interval weight, or epsilon if the
+ // interval weight is zero
+ PBQPNum spillCost = (li->weight != 0.0) ?
+ li->weight : std::numeric_limits<PBQPNum>::min();
+
+ // Build a cost vector for this interval.
+ add_pbqp_nodecosts(solver, node,
+ buildCostVector(li->reg, allowedSets[node], coalesces,
+ spillCost));
+
+ }
+
+
+ // Now add the cost matrices...
+ for (unsigned node1 = 0; node1 < node2LI.size(); ++node1) {
+ const LiveInterval *li = node2LI[node1];
+
+ // Test for live range overlaps and insert interference matrices.
+ for (unsigned node2 = node1 + 1; node2 < node2LI.size(); ++node2) {
+ const LiveInterval *li2 = node2LI[node2];
+
+ CoalesceMap::const_iterator cmItr =
+ coalesces.find(RegPair(li->reg, li2->reg));
+
+ PBQPMatrix *m = 0;
+
+ if (cmItr != coalesces.end()) {
+ m = buildCoalescingMatrix(allowedSets[node1], allowedSets[node2],
+ cmItr->second);
+ }
+ else if (li->overlaps(*li2)) {
+ m = buildInterferenceMatrix(allowedSets[node1], allowedSets[node2]);
+ }
+
+ if (m != 0) {
+ add_pbqp_edgecosts(solver, node1, node2, m);
+ delete m;
+ }
+ }
+ }
+
+ // We're done, PBQP problem constructed - return it.
+ return solver;
+}
+
+void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled,
+ MachineRegisterInfo* mri) {
+ int stackSlot = vrm->getStackSlot(spilled->reg);
+
+ if (stackSlot == VirtRegMap::NO_STACK_SLOT)
+ return;
+
+ const TargetRegisterClass *RC = mri->getRegClass(spilled->reg);
+ LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC);
+
+ VNInfo *vni;
+ if (stackInterval.getNumValNums() != 0)
+ vni = stackInterval.getValNumInfo(0);
+ else
+ vni = stackInterval.getNextValue(-0U, 0, lss->getVNInfoAllocator());
+
+ LiveInterval &rhsInterval = lis->getInterval(spilled->reg);
+ stackInterval.MergeRangesInAsValue(rhsInterval, vni);
+}
+
+bool PBQPRegAlloc::mapPBQPToRegAlloc(pbqp *problem) {
+
+ // Set to true if we have any spills
+ bool anotherRoundNeeded = false;
+
+ // Clear the existing allocation.
+ vrm->clearAllVirt();
+
+ // Iterate over the nodes mapping the PBQP solution to a register assignment.
+ for (unsigned node = 0; node < node2LI.size(); ++node) {
+ unsigned virtReg = node2LI[node]->reg,
+ allocSelection = get_pbqp_solution(problem, node);
+
+ // If the PBQP solution is non-zero it's a physical register...
+ if (allocSelection != 0) {
+ // Get the physical reg, subtracting 1 to account for the spill option.
+ unsigned physReg = allowedSets[node][allocSelection - 1];
+
+ DOUT << "VREG " << virtReg << " -> " << tri->getName(physReg) << "\n";
+
+ assert(physReg != 0);
+
+ // Add to the virt reg map and update the used phys regs.
+ vrm->assignVirt2Phys(virtReg, physReg);
+ }
+ // ...Otherwise it's a spill.
+ else {
+
+ // Make sure we ignore this virtual reg on the next round
+ // of allocation
+ vregIntervalsToAlloc.erase(&lis->getInterval(virtReg));
+
+ // Insert spill ranges for this live range
+ const LiveInterval *spillInterval = node2LI[node];
+ double oldSpillWeight = spillInterval->weight;
+ SmallVector<LiveInterval*, 8> spillIs;
+ std::vector<LiveInterval*> newSpills =
+ lis->addIntervalsForSpills(*spillInterval, spillIs, loopInfo, *vrm);
+ addStackInterval(spillInterval, mri);
+
+ DOUT << "VREG " << virtReg << " -> SPILLED (Cost: "
+ << oldSpillWeight << ", New vregs: ";
+
+ // Copy any newly inserted live intervals into the list of regs to
+ // allocate.
+ for (std::vector<LiveInterval*>::const_iterator
+ itr = newSpills.begin(), end = newSpills.end();
+ itr != end; ++itr) {
+
+ assert(!(*itr)->empty() && "Empty spill range.");
+
+ DOUT << (*itr)->reg << " ";
+
+ vregIntervalsToAlloc.insert(*itr);
+ }
+
+ DOUT << ")\n";
+
+ // We need another round if spill intervals were added.
+ anotherRoundNeeded |= !newSpills.empty();
+ }
+ }
+
+ return !anotherRoundNeeded;
+}
+
+void PBQPRegAlloc::finalizeAlloc() const {
+ typedef LiveIntervals::iterator LIIterator;
+ typedef LiveInterval::Ranges::const_iterator LRIterator;
+
+ // First allocate registers for the empty intervals.
+ for (LiveIntervalSet::const_iterator
+ itr = emptyVRegIntervals.begin(), end = emptyVRegIntervals.end();
+ itr != end; ++itr) {
+ LiveInterval *li = *itr;
+
+ unsigned physReg = li->preference;
+
+ if (physReg == 0) {
+ const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
+ physReg = *liRC->allocation_order_begin(*mf);
+ }
+
+ vrm->assignVirt2Phys(li->reg, physReg);
+ }
+
+ // Finally iterate over the basic blocks to compute and set the live-in sets.
+ SmallVector<MachineBasicBlock*, 8> liveInMBBs;
+ MachineBasicBlock *entryMBB = &*mf->begin();
+
+ for (LIIterator liItr = lis->begin(), liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+
+ const LiveInterval *li = liItr->second;
+ unsigned reg = 0;
+
+ // Get the physical register for this interval
+ if (TargetRegisterInfo::isPhysicalRegister(li->reg)) {
+ reg = li->reg;
+ }
+ else if (vrm->isAssignedReg(li->reg)) {
+ reg = vrm->getPhys(li->reg);
+ }
+ else {
+ // Ranges which are assigned a stack slot only are ignored.
+ continue;
+ }
+
+ // Ignore unallocated vregs:
+ if (reg == 0) {
+ continue;
+ }
+
+ // Iterate over the ranges of the current interval...
+ for (LRIterator lrItr = li->begin(), lrEnd = li->end();
+ lrItr != lrEnd; ++lrItr) {
+
+ // Find the set of basic blocks which this range is live into...
+ if (lis->findLiveInMBBs(lrItr->start, lrItr->end, liveInMBBs)) {
+ // And add the physreg for this interval to their live-in sets.
+ for (unsigned i = 0; i < liveInMBBs.size(); ++i) {
+ if (liveInMBBs[i] != entryMBB) {
+ if (!liveInMBBs[i]->isLiveIn(reg)) {
+ liveInMBBs[i]->addLiveIn(reg);
+ }
+ }
+ }
+ liveInMBBs.clear();
+ }
+ }
+ }
+
+}
+
+bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
+
+ mf = &MF;
+ tm = &mf->getTarget();
+ tri = tm->getRegisterInfo();
+ tii = tm->getInstrInfo();
+ mri = &mf->getRegInfo();
+
+ lis = &getAnalysis<LiveIntervals>();
+ lss = &getAnalysis<LiveStacks>();
+ loopInfo = &getAnalysis<MachineLoopInfo>();
+
+ vrm = &getAnalysis<VirtRegMap>();
+
+ DOUT << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n";
+
+ // Allocator main loop:
+ //
+ // * Map current regalloc problem to a PBQP problem
+ // * Solve the PBQP problem
+ // * Map the solution back to a register allocation
+ // * Spill if necessary
+ //
+ // This process is continued till no more spills are generated.
+
+ // Find the vreg intervals in need of allocation.
+ findVRegIntervalsToAlloc();
+
+ // If there aren't any then we're done here.
+ if (vregIntervalsToAlloc.empty() && emptyVRegIntervals.empty())
+ return true;
+
+ // If there are non-empty intervals allocate them using pbqp.
+ if (!vregIntervalsToAlloc.empty()) {
+
+ bool pbqpAllocComplete = false;
+ unsigned round = 0;
+
+ while (!pbqpAllocComplete) {
+ DOUT << " PBQP Regalloc round " << round << ":\n";
+
+ pbqp *problem = constructPBQPProblem();
+
+ solve_pbqp(problem);
+
+ pbqpAllocComplete = mapPBQPToRegAlloc(problem);
+
+ free_pbqp(problem);
+
+ ++round;
+ }
+ }
+
+ // Finalise allocation, allocate empty ranges.
+ finalizeAlloc();
+
+ vregIntervalsToAlloc.clear();
+ emptyVRegIntervals.clear();
+ li2Node.clear();
+ node2LI.clear();
+ allowedSets.clear();
+
+ DOUT << "Post alloc VirtRegMap:\n" << *vrm << "\n";
+
+ // Run rewriter
+ std::auto_ptr<VirtRegRewriter> rewriter(createVirtRegRewriter());
+
+ rewriter->runOnMachineFunction(*mf, *vrm, lis);
+
+ return true;
+}
+
+FunctionPass* llvm::createPBQPRegisterAllocator() {
+ return new PBQPRegAlloc();
+}
+
+
+#undef DEBUG_TYPE
diff --git a/lib/CodeGen/RegAllocSimple.cpp b/lib/CodeGen/RegAllocSimple.cpp
new file mode 100644
index 0000000..447e54c
--- /dev/null
+++ b/lib/CodeGen/RegAllocSimple.cpp
@@ -0,0 +1,257 @@
+//===-- RegAllocSimple.cpp - A simple generic register allocator ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple register allocator. *Very* simple: It immediate
+// spills every value right after it is computed, and it reloads all used
+// operands from the spill area to temporary registers before each instruction.
+// It does not keep values in registers across instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+
+namespace {
+ static RegisterRegAlloc
+ simpleRegAlloc("simple", "simple register allocator",
+ createSimpleRegisterAllocator);
+
+ class VISIBILITY_HIDDEN RegAllocSimple : public MachineFunctionPass {
+ public:
+ static char ID;
+ RegAllocSimple() : MachineFunctionPass(&ID) {}
+ private:
+ MachineFunction *MF;
+ const TargetMachine *TM;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+
+ // StackSlotForVirtReg - Maps SSA Regs => frame index on the stack where
+ // these values are spilled
+ std::map<unsigned, int> StackSlotForVirtReg;
+
+ // RegsUsed - Keep track of what registers are currently in use. This is a
+ // bitset.
+ std::vector<bool> RegsUsed;
+
+ // RegClassIdx - Maps RegClass => which index we can take a register
+ // from. Since this is a simple register allocator, when we need a register
+ // of a certain class, we just take the next available one.
+ std::map<const TargetRegisterClass*, unsigned> RegClassIdx;
+
+ public:
+ virtual const char *getPassName() const {
+ return "Simple Register Allocator";
+ }
+
+ /// runOnMachineFunction - Register allocate the whole function
+ bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(PHIEliminationID); // Eliminate PHI nodes
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ private:
+ /// AllocateBasicBlock - Register allocate the specified basic block.
+ void AllocateBasicBlock(MachineBasicBlock &MBB);
+
+ /// getStackSpaceFor - This returns the offset of the specified virtual
+ /// register on the stack, allocating space if necessary.
+ int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
+
+ /// Given a virtual register, return a compatible physical register that is
+ /// currently unused.
+ ///
+ /// Side effect: marks that register as being used until manually cleared
+ ///
+ unsigned getFreeReg(unsigned virtualReg);
+
+ /// Moves value from memory into that register
+ unsigned reloadVirtReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, unsigned VirtReg);
+
+ /// Saves reg value on the stack (maps virtual register to stack value)
+ void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned VirtReg, unsigned PhysReg);
+ };
+ char RegAllocSimple::ID = 0;
+}
+
+/// getStackSpaceFor - This allocates space for the specified virtual
+/// register to be held on the stack.
+int RegAllocSimple::getStackSpaceFor(unsigned VirtReg,
+ const TargetRegisterClass *RC) {
+ // Find the location VirtReg would belong...
+ std::map<unsigned, int>::iterator I = StackSlotForVirtReg.find(VirtReg);
+
+ if (I != StackSlotForVirtReg.end())
+ return I->second; // Already has space allocated?
+
+ // Allocate a new stack object for this spill location...
+ int FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
+ RC->getAlignment());
+
+ // Assign the slot...
+ StackSlotForVirtReg.insert(I, std::make_pair(VirtReg, FrameIdx));
+
+ return FrameIdx;
+}
+
+unsigned RegAllocSimple::getFreeReg(unsigned virtualReg) {
+ const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtualReg);
+ TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
+#ifndef NDEBUG
+ TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
+#endif
+
+ while (1) {
+ unsigned regIdx = RegClassIdx[RC]++;
+ assert(RI+regIdx != RE && "Not enough registers!");
+ unsigned PhysReg = *(RI+regIdx);
+
+ if (!RegsUsed[PhysReg]) {
+ MF->getRegInfo().setPhysRegUsed(PhysReg);
+ return PhysReg;
+ }
+ }
+}
+
+unsigned RegAllocSimple::reloadVirtReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned VirtReg) {
+ const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(VirtReg);
+ int FrameIdx = getStackSpaceFor(VirtReg, RC);
+ unsigned PhysReg = getFreeReg(VirtReg);
+
+ // Add move instruction(s)
+ ++NumLoads;
+ TII->loadRegFromStackSlot(MBB, I, PhysReg, FrameIdx, RC);
+ return PhysReg;
+}
+
+void RegAllocSimple::spillVirtReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned VirtReg, unsigned PhysReg) {
+ const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(VirtReg);
+
+ int FrameIdx = getStackSpaceFor(VirtReg, RC);
+
+ // Add move instruction(s)
+ ++NumStores;
+ TII->storeRegToStackSlot(MBB, I, PhysReg, true, FrameIdx, RC);
+}
+
+
+void RegAllocSimple::AllocateBasicBlock(MachineBasicBlock &MBB) {
+ // loop over each instruction
+ for (MachineBasicBlock::iterator MI = MBB.begin(); MI != MBB.end(); ++MI) {
+ // Made to combat the incorrect allocation of r2 = add r1, r1
+ std::map<unsigned, unsigned> Virt2PhysRegMap;
+
+ RegsUsed.resize(TRI->getNumRegs());
+
+ // This is a preliminary pass that will invalidate any registers that are
+ // used by the instruction (including implicit uses).
+ const TargetInstrDesc &Desc = MI->getDesc();
+ const unsigned *Regs;
+ if (Desc.ImplicitUses) {
+ for (Regs = Desc.ImplicitUses; *Regs; ++Regs)
+ RegsUsed[*Regs] = true;
+ }
+
+ if (Desc.ImplicitDefs) {
+ for (Regs = Desc.ImplicitDefs; *Regs; ++Regs) {
+ RegsUsed[*Regs] = true;
+ MF->getRegInfo().setPhysRegUsed(*Regs);
+ }
+ }
+
+ // Loop over uses, move from memory into registers.
+ for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
+ MachineOperand &MO = MI->getOperand(i);
+
+ if (MO.isReg() && MO.getReg() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned virtualReg = (unsigned) MO.getReg();
+ DOUT << "op: " << MO << "\n";
+ DOUT << "\t inst[" << i << "]: ";
+ DEBUG(MI->print(*cerr.stream(), TM));
+
+ // make sure the same virtual register maps to the same physical
+ // register in any given instruction
+ unsigned physReg = Virt2PhysRegMap[virtualReg];
+ if (physReg == 0) {
+ if (MO.isDef()) {
+ unsigned TiedOp;
+ if (!MI->isRegTiedToUseOperand(i, &TiedOp)) {
+ physReg = getFreeReg(virtualReg);
+ } else {
+ // must be same register number as the source operand that is
+ // tied to. This maps a = b + c into b = b + c, and saves b into
+ // a's spot.
+ assert(MI->getOperand(TiedOp).isReg() &&
+ MI->getOperand(TiedOp).getReg() &&
+ MI->getOperand(TiedOp).isUse() &&
+ "Two address instruction invalid!");
+
+ physReg = MI->getOperand(TiedOp).getReg();
+ }
+ spillVirtReg(MBB, next(MI), virtualReg, physReg);
+ } else {
+ physReg = reloadVirtReg(MBB, MI, virtualReg);
+ Virt2PhysRegMap[virtualReg] = physReg;
+ }
+ }
+ MO.setReg(physReg);
+ DOUT << "virt: " << virtualReg << ", phys: " << MO.getReg() << "\n";
+ }
+ }
+ RegClassIdx.clear();
+ RegsUsed.clear();
+ }
+}
+
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool RegAllocSimple::runOnMachineFunction(MachineFunction &Fn) {
+ DOUT << "Machine Function\n";
+ MF = &Fn;
+ TM = &MF->getTarget();
+ TRI = TM->getRegisterInfo();
+ TII = TM->getInstrInfo();
+
+ // Loop over all of the basic blocks, eliminating virtual register references
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB)
+ AllocateBasicBlock(*MBB);
+
+ StackSlotForVirtReg.clear();
+ return true;
+}
+
+FunctionPass *llvm::createSimpleRegisterAllocator() {
+ return new RegAllocSimple();
+}
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
new file mode 100644
index 0000000..1131e3d
--- /dev/null
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -0,0 +1,41 @@
+//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the generic RegisterCoalescer interface which
+// is used as the common interface used by all clients and
+// implementations of register coalescing.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+// Register the RegisterCoalescer interface, providing a nice name to refer to.
+static RegisterAnalysisGroup<RegisterCoalescer> Z("Register Coalescer");
+char RegisterCoalescer::ID = 0;
+
+// RegisterCoalescer destructor: DO NOT move this to the header file
+// for RegisterCoalescer or else clients of the RegisterCoalescer
+// class may not depend on the RegisterCoalescer.o file in the current
+// .a file, causing alias analysis support to not be included in the
+// tool correctly!
+//
+RegisterCoalescer::~RegisterCoalescer() {}
+
+// Because of the way .a files work, we must force the SimpleRC
+// implementation to be pulled in if the RegisterCoalescer classes are
+// pulled in. Otherwise we run the risk of RegisterCoalescer being
+// used, but the default implementation not being linked into the tool
+// that uses it.
+DEFINING_FILE_FOR(RegisterCoalescer)
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
new file mode 100644
index 0000000..944468e
--- /dev/null
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -0,0 +1,480 @@
+//===-- RegisterScavenging.cpp - Machine register scavenging --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine register scavenger. It can provide
+// information, such as unused registers, at any point in a machine basic block.
+// It also provides a mechanism to make registers available by evicting them to
+// spill slots.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reg-scavenging"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+/// RedefinesSuperRegPart - Return true if the specified register is redefining
+/// part of a super-register.
+static bool RedefinesSuperRegPart(const MachineInstr *MI, unsigned SubReg,
+ const TargetRegisterInfo *TRI) {
+ bool SeenSuperUse = false;
+ bool SeenSuperDef = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (TRI->isSuperRegister(SubReg, MO.getReg())) {
+ if (MO.isUse())
+ SeenSuperUse = true;
+ else if (MO.isImplicit())
+ SeenSuperDef = true;
+ }
+ }
+
+ return SeenSuperDef && SeenSuperUse;
+}
+
+static bool RedefinesSuperRegPart(const MachineInstr *MI,
+ const MachineOperand &MO,
+ const TargetRegisterInfo *TRI) {
+ assert(MO.isReg() && MO.isDef() && "Not a register def!");
+ return RedefinesSuperRegPart(MI, MO.getReg(), TRI);
+}
+
+/// setUsed - Set the register and its sub-registers as being used.
+void RegScavenger::setUsed(unsigned Reg, bool ImpDef) {
+ RegsAvailable.reset(Reg);
+ ImplicitDefed[Reg] = ImpDef;
+
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ RegsAvailable.reset(SubReg);
+ ImplicitDefed[SubReg] = ImpDef;
+ }
+}
+
+/// setUnused - Set the register and its sub-registers as being unused.
+void RegScavenger::setUnused(unsigned Reg, const MachineInstr *MI) {
+ RegsAvailable.set(Reg);
+ ImplicitDefed.reset(Reg);
+
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs)
+ if (!RedefinesSuperRegPart(MI, Reg, TRI)) {
+ RegsAvailable.set(SubReg);
+ ImplicitDefed.reset(SubReg);
+ }
+}
+
+void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
+ MachineFunction &MF = *mbb->getParent();
+ const TargetMachine &TM = MF.getTarget();
+ TII = TM.getInstrInfo();
+ TRI = TM.getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) &&
+ "Target changed?");
+
+ if (!MBB) {
+ NumPhysRegs = TRI->getNumRegs();
+ RegsAvailable.resize(NumPhysRegs);
+ ImplicitDefed.resize(NumPhysRegs);
+
+ // Create reserved registers bitvector.
+ ReservedRegs = TRI->getReservedRegs(MF);
+
+ // Create callee-saved registers bitvector.
+ CalleeSavedRegs.resize(NumPhysRegs);
+ const unsigned *CSRegs = TRI->getCalleeSavedRegs();
+ if (CSRegs != NULL)
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ CalleeSavedRegs.set(CSRegs[i]);
+ }
+
+ MBB = mbb;
+ ScavengedReg = 0;
+ ScavengedRC = NULL;
+ ScavengeRestore = NULL;
+ CurrDist = 0;
+ DistanceMap.clear();
+ ImplicitDefed.reset();
+
+ // All registers started out unused.
+ RegsAvailable.set();
+
+ // Reserved registers are always used.
+ RegsAvailable ^= ReservedRegs;
+
+ // Live-in registers are in use.
+ if (!MBB->livein_empty())
+ for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I)
+ setUsed(*I);
+
+ Tracking = false;
+}
+
+void RegScavenger::restoreScavengedReg() {
+ TII->loadRegFromStackSlot(*MBB, MBBI, ScavengedReg,
+ ScavengingFrameIndex, ScavengedRC);
+ MachineBasicBlock::iterator II = prior(MBBI);
+ TRI->eliminateFrameIndex(II, 0, this);
+ setUsed(ScavengedReg);
+ ScavengedReg = 0;
+ ScavengedRC = NULL;
+}
+
+#ifndef NDEBUG
+/// isLiveInButUnusedBefore - Return true if register is livein the MBB not
+/// not used before it reaches the MI that defines register.
+static bool isLiveInButUnusedBefore(unsigned Reg, MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ const TargetRegisterInfo *TRI,
+ MachineRegisterInfo* MRI) {
+ // First check if register is livein.
+ bool isLiveIn = false;
+ for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I)
+ if (Reg == *I || TRI->isSuperRegister(Reg, *I)) {
+ isLiveIn = true;
+ break;
+ }
+ if (!isLiveIn)
+ return false;
+
+ // Is there any use of it before the specified MI?
+ SmallPtrSet<MachineInstr*, 4> UsesInMBB;
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ if (UseMI->getParent() == MBB)
+ UsesInMBB.insert(UseMI);
+ }
+ if (UsesInMBB.empty())
+ return true;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MI; I != E; ++I)
+ if (UsesInMBB.count(&*I))
+ return false;
+ return true;
+}
+#endif
+
+void RegScavenger::forward() {
+ // Move ptr forward.
+ if (!Tracking) {
+ MBBI = MBB->begin();
+ Tracking = true;
+ } else {
+ assert(MBBI != MBB->end() && "Already at the end of the basic block!");
+ MBBI = next(MBBI);
+ }
+
+ MachineInstr *MI = MBBI;
+ DistanceMap.insert(std::make_pair(MI, CurrDist++));
+
+ if (MI == ScavengeRestore) {
+ ScavengedReg = 0;
+ ScavengedRC = NULL;
+ ScavengeRestore = NULL;
+ }
+
+ bool IsImpDef = MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF;
+
+ // Separate register operands into 3 classes: uses, defs, earlyclobbers.
+ SmallVector<std::pair<const MachineOperand*,unsigned>, 4> UseMOs;
+ SmallVector<std::pair<const MachineOperand*,unsigned>, 4> DefMOs;
+ SmallVector<std::pair<const MachineOperand*,unsigned>, 4> EarlyClobberMOs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ if (MO.isUse())
+ UseMOs.push_back(std::make_pair(&MO,i));
+ else if (MO.isEarlyClobber())
+ EarlyClobberMOs.push_back(std::make_pair(&MO,i));
+ else
+ DefMOs.push_back(std::make_pair(&MO,i));
+ }
+
+ // Process uses first.
+ BitVector UseRegs(NumPhysRegs);
+ for (unsigned i = 0, e = UseMOs.size(); i != e; ++i) {
+ const MachineOperand MO = *UseMOs[i].first;
+ unsigned Reg = MO.getReg();
+
+ assert(isUsed(Reg) && "Using an undefined register!");
+
+ if (MO.isKill() && !isReserved(Reg)) {
+ UseRegs.set(Reg);
+
+ // Mark sub-registers as used.
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs)
+ UseRegs.set(SubReg);
+ }
+ }
+
+ // Change states of all registers after all the uses are processed to guard
+ // against multiple uses.
+ setUnused(UseRegs);
+
+ // Process early clobber defs then process defs. We can have a early clobber
+ // that is dead, it should not conflict with a def that happens one "slot"
+ // (see InstrSlots in LiveIntervalAnalysis.h) later.
+ unsigned NumECs = EarlyClobberMOs.size();
+ unsigned NumDefs = DefMOs.size();
+
+ for (unsigned i = 0, e = NumECs + NumDefs; i != e; ++i) {
+ const MachineOperand &MO = (i < NumECs)
+ ? *EarlyClobberMOs[i].first : *DefMOs[i-NumECs].first;
+ unsigned Idx = (i < NumECs)
+ ? EarlyClobberMOs[i].second : DefMOs[i-NumECs].second;
+ unsigned Reg = MO.getReg();
+
+ // If it's dead upon def, then it is now free.
+ if (MO.isDead()) {
+ setUnused(Reg, MI);
+ continue;
+ }
+
+ // Skip two-address destination operand.
+ if (MI->isRegTiedToUseOperand(Idx)) {
+ assert(isUsed(Reg) && "Using an undefined register!");
+ continue;
+ }
+
+ // Skip if this is merely redefining part of a super-register.
+ if (RedefinesSuperRegPart(MI, MO, TRI))
+ continue;
+
+ // Implicit def is allowed to "re-define" any register. Similarly,
+ // implicitly defined registers can be clobbered.
+ assert((isReserved(Reg) || isUnused(Reg) ||
+ IsImpDef || isImplicitlyDefined(Reg) ||
+ isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) &&
+ "Re-defining a live register!");
+ setUsed(Reg, IsImpDef);
+ }
+}
+
+void RegScavenger::backward() {
+ assert(Tracking && "Not tracking states!");
+ assert(MBBI != MBB->begin() && "Already at start of basic block!");
+ // Move ptr backward.
+ MBBI = prior(MBBI);
+
+ MachineInstr *MI = MBBI;
+ DistanceMap.erase(MI);
+ --CurrDist;
+
+ // Separate register operands into 3 classes: uses, defs, earlyclobbers.
+ SmallVector<std::pair<const MachineOperand*,unsigned>, 4> UseMOs;
+ SmallVector<std::pair<const MachineOperand*,unsigned>, 4> DefMOs;
+ SmallVector<std::pair<const MachineOperand*,unsigned>, 4> EarlyClobberMOs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ if (MO.isUse())
+ UseMOs.push_back(std::make_pair(&MO,i));
+ else if (MO.isEarlyClobber())
+ EarlyClobberMOs.push_back(std::make_pair(&MO,i));
+ else
+ DefMOs.push_back(std::make_pair(&MO,i));
+ }
+
+
+ // Process defs first.
+ unsigned NumECs = EarlyClobberMOs.size();
+ unsigned NumDefs = DefMOs.size();
+ for (unsigned i = 0, e = NumECs + NumDefs; i != e; ++i) {
+ const MachineOperand &MO = (i < NumDefs)
+ ? *DefMOs[i].first : *EarlyClobberMOs[i-NumDefs].first;
+ unsigned Idx = (i < NumECs)
+ ? DefMOs[i].second : EarlyClobberMOs[i-NumDefs].second;
+
+ // Skip two-address destination operand.
+ if (MI->isRegTiedToUseOperand(Idx))
+ continue;
+
+ unsigned Reg = MO.getReg();
+ assert(isUsed(Reg));
+ if (!isReserved(Reg))
+ setUnused(Reg, MI);
+ }
+
+ // Process uses.
+ BitVector UseRegs(NumPhysRegs);
+ for (unsigned i = 0, e = UseMOs.size(); i != e; ++i) {
+ const MachineOperand MO = *UseMOs[i].first;
+ unsigned Reg = MO.getReg();
+ assert(isUnused(Reg) || isReserved(Reg));
+ UseRegs.set(Reg);
+
+ // Set the sub-registers as "used".
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs)
+ UseRegs.set(SubReg);
+ }
+ setUsed(UseRegs);
+}
+
+void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
+ if (includeReserved)
+ used = ~RegsAvailable;
+ else
+ used = ~RegsAvailable & ~ReservedRegs;
+}
+
+/// CreateRegClassMask - Set the bits that represent the registers in the
+/// TargetRegisterClass.
+static void CreateRegClassMask(const TargetRegisterClass *RC, BitVector &Mask) {
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E;
+ ++I)
+ Mask.set(*I);
+}
+
+unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RegClass,
+ const BitVector &Candidates) const {
+ // Mask off the registers which are not in the TargetRegisterClass.
+ BitVector RegsAvailableCopy(NumPhysRegs, false);
+ CreateRegClassMask(RegClass, RegsAvailableCopy);
+ RegsAvailableCopy &= RegsAvailable;
+
+ // Restrict the search to candidates.
+ RegsAvailableCopy &= Candidates;
+
+ // Returns the first unused (bit is set) register, or 0 is none is found.
+ int Reg = RegsAvailableCopy.find_first();
+ return (Reg == -1) ? 0 : Reg;
+}
+
+unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RegClass,
+ bool ExCalleeSaved) const {
+ // Mask off the registers which are not in the TargetRegisterClass.
+ BitVector RegsAvailableCopy(NumPhysRegs, false);
+ CreateRegClassMask(RegClass, RegsAvailableCopy);
+ RegsAvailableCopy &= RegsAvailable;
+
+ // If looking for a non-callee-saved register, mask off all the callee-saved
+ // registers.
+ if (ExCalleeSaved)
+ RegsAvailableCopy &= ~CalleeSavedRegs;
+
+ // Returns the first unused (bit is set) register, or 0 is none is found.
+ int Reg = RegsAvailableCopy.find_first();
+ return (Reg == -1) ? 0 : Reg;
+}
+
+/// findFirstUse - Calculate the distance to the first use of the
+/// specified register.
+MachineInstr*
+RegScavenger::findFirstUse(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I, unsigned Reg,
+ unsigned &Dist) {
+ MachineInstr *UseMI = 0;
+ Dist = ~0U;
+ for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg),
+ RE = MRI->reg_end(); RI != RE; ++RI) {
+ MachineInstr *UDMI = &*RI;
+ if (UDMI->getParent() != MBB)
+ continue;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI);
+ if (DI == DistanceMap.end()) {
+ // If it's not in map, it's below current MI, let's initialize the
+ // map.
+ I = next(I);
+ unsigned Dist = CurrDist + 1;
+ while (I != MBB->end()) {
+ DistanceMap.insert(std::make_pair(I, Dist++));
+ I = next(I);
+ }
+ }
+ DI = DistanceMap.find(UDMI);
+ if (DI->second > CurrDist && DI->second < Dist) {
+ Dist = DI->second;
+ UseMI = UDMI;
+ }
+ }
+ return UseMI;
+}
+
+unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
+ MachineBasicBlock::iterator I,
+ int SPAdj) {
+ assert(ScavengingFrameIndex >= 0 &&
+ "Cannot scavenge a register without an emergency spill slot!");
+
+ // Mask off the registers which are not in the TargetRegisterClass.
+ BitVector Candidates(NumPhysRegs, false);
+ CreateRegClassMask(RC, Candidates);
+ Candidates ^= ReservedRegs; // Do not include reserved registers.
+
+ // Exclude all the registers being used by the instruction.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = I->getOperand(i);
+ if (MO.isReg())
+ Candidates.reset(MO.getReg());
+ }
+
+ // Find the register whose use is furthest away.
+ unsigned SReg = 0;
+ unsigned MaxDist = 0;
+ MachineInstr *MaxUseMI = 0;
+ int Reg = Candidates.find_first();
+ while (Reg != -1) {
+ unsigned Dist;
+ MachineInstr *UseMI = findFirstUse(MBB, I, Reg, Dist);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ unsigned AsDist;
+ MachineInstr *AsUseMI = findFirstUse(MBB, I, *AS, AsDist);
+ if (AsDist < Dist) {
+ Dist = AsDist;
+ UseMI = AsUseMI;
+ }
+ }
+ if (Dist >= MaxDist) {
+ MaxDist = Dist;
+ MaxUseMI = UseMI;
+ SReg = Reg;
+ }
+ Reg = Candidates.find_next(Reg);
+ }
+
+ if (ScavengedReg != 0) {
+ assert(0 && "Scavenger slot is live, unable to scavenge another register!");
+ abort();
+ }
+
+ // Spill the scavenged register before I.
+ TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC);
+ MachineBasicBlock::iterator II = prior(I);
+ TRI->eliminateFrameIndex(II, SPAdj, this);
+
+ // Restore the scavenged register before its use (or first terminator).
+ II = MaxUseMI
+ ? MachineBasicBlock::iterator(MaxUseMI) : MBB->getFirstTerminator();
+ TII->loadRegFromStackSlot(*MBB, II, SReg, ScavengingFrameIndex, RC);
+ ScavengeRestore = prior(II);
+ ScavengedReg = SReg;
+ ScavengedRC = RC;
+
+ return SReg;
+}
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
new file mode 100644
index 0000000..a8452df
--- /dev/null
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -0,0 +1,572 @@
+//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include <climits>
+using namespace llvm;
+
+ScheduleDAG::ScheduleDAG(MachineFunction &mf)
+ : TM(mf.getTarget()),
+ TII(TM.getInstrInfo()),
+ TRI(TM.getRegisterInfo()),
+ TLI(TM.getTargetLowering()),
+ MF(mf), MRI(mf.getRegInfo()),
+ ConstPool(MF.getConstantPool()),
+ EntrySU(), ExitSU() {
+}
+
+ScheduleDAG::~ScheduleDAG() {}
+
+/// dump - dump the schedule.
+void ScheduleDAG::dumpSchedule() const {
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ SU->dump(this);
+ else
+ cerr << "**** NOOP ****\n";
+ }
+}
+
+
+/// Run - perform scheduling.
+///
+void ScheduleDAG::Run(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator insertPos) {
+ BB = bb;
+ InsertPos = insertPos;
+
+ SUnits.clear();
+ Sequence.clear();
+ EntrySU = SUnit();
+ ExitSU = SUnit();
+
+ Schedule();
+
+ DOUT << "*** Final schedule ***\n";
+ DEBUG(dumpSchedule());
+ DOUT << "\n";
+}
+
+/// addPred - This adds the specified edge as a pred of the current node if
+/// not already. It also adds the current node as a successor of the
+/// specified node.
+void SUnit::addPred(const SDep &D) {
+ // If this node already has this depenence, don't add a redundant one.
+ for (SmallVector<SDep, 4>::const_iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I)
+ if (*I == D)
+ return;
+ // Now add a corresponding succ to N.
+ SDep P = D;
+ P.setSUnit(this);
+ SUnit *N = D.getSUnit();
+ // Update the bookkeeping.
+ if (D.getKind() == SDep::Data) {
+ ++NumPreds;
+ ++N->NumSuccs;
+ }
+ if (!N->isScheduled)
+ ++NumPredsLeft;
+ if (!isScheduled)
+ ++N->NumSuccsLeft;
+ Preds.push_back(D);
+ N->Succs.push_back(P);
+ if (P.getLatency() != 0) {
+ this->setDepthDirty();
+ N->setHeightDirty();
+ }
+}
+
+/// removePred - This removes the specified edge as a pred of the current
+/// node if it exists. It also removes the current node as a successor of
+/// the specified node.
+void SUnit::removePred(const SDep &D) {
+ // Find the matching predecessor.
+ for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I)
+ if (*I == D) {
+ bool FoundSucc = false;
+ // Find the corresponding successor in N.
+ SDep P = D;
+ P.setSUnit(this);
+ SUnit *N = D.getSUnit();
+ for (SmallVector<SDep, 4>::iterator II = N->Succs.begin(),
+ EE = N->Succs.end(); II != EE; ++II)
+ if (*II == P) {
+ FoundSucc = true;
+ N->Succs.erase(II);
+ break;
+ }
+ assert(FoundSucc && "Mismatching preds / succs lists!");
+ Preds.erase(I);
+ // Update the bookkeeping.
+ if (P.getKind() == SDep::Data) {
+ --NumPreds;
+ --N->NumSuccs;
+ }
+ if (!N->isScheduled)
+ --NumPredsLeft;
+ if (!isScheduled)
+ --N->NumSuccsLeft;
+ if (P.getLatency() != 0) {
+ this->setDepthDirty();
+ N->setHeightDirty();
+ }
+ return;
+ }
+}
+
+void SUnit::setDepthDirty() {
+ if (!isDepthCurrent) return;
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *SU = WorkList.pop_back_val();
+ SU->isDepthCurrent = false;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(),
+ E = SU->Succs.end(); I != E; ++I) {
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isDepthCurrent)
+ WorkList.push_back(SuccSU);
+ }
+ } while (!WorkList.empty());
+}
+
+void SUnit::setHeightDirty() {
+ if (!isHeightCurrent) return;
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *SU = WorkList.pop_back_val();
+ SU->isHeightCurrent = false;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),
+ E = SU->Preds.end(); I != E; ++I) {
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->isHeightCurrent)
+ WorkList.push_back(PredSU);
+ }
+ } while (!WorkList.empty());
+}
+
+/// setDepthToAtLeast - Update this node's successors to reflect the
+/// fact that this node's depth just increased.
+///
+void SUnit::setDepthToAtLeast(unsigned NewDepth) {
+ if (NewDepth <= getDepth())
+ return;
+ setDepthDirty();
+ Depth = NewDepth;
+ isDepthCurrent = true;
+}
+
+/// setHeightToAtLeast - Update this node's predecessors to reflect the
+/// fact that this node's height just increased.
+///
+void SUnit::setHeightToAtLeast(unsigned NewHeight) {
+ if (NewHeight <= getHeight())
+ return;
+ setHeightDirty();
+ Height = NewHeight;
+ isHeightCurrent = true;
+}
+
+/// ComputeDepth - Calculate the maximal path from the node to the exit.
+///
+void SUnit::ComputeDepth() {
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *Cur = WorkList.back();
+
+ bool Done = true;
+ unsigned MaxPredDepth = 0;
+ for (SUnit::const_pred_iterator I = Cur->Preds.begin(),
+ E = Cur->Preds.end(); I != E; ++I) {
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->isDepthCurrent)
+ MaxPredDepth = std::max(MaxPredDepth,
+ PredSU->Depth + I->getLatency());
+ else {
+ Done = false;
+ WorkList.push_back(PredSU);
+ }
+ }
+
+ if (Done) {
+ WorkList.pop_back();
+ if (MaxPredDepth != Cur->Depth) {
+ Cur->setDepthDirty();
+ Cur->Depth = MaxPredDepth;
+ }
+ Cur->isDepthCurrent = true;
+ }
+ } while (!WorkList.empty());
+}
+
+/// ComputeHeight - Calculate the maximal path from the node to the entry.
+///
+void SUnit::ComputeHeight() {
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *Cur = WorkList.back();
+
+ bool Done = true;
+ unsigned MaxSuccHeight = 0;
+ for (SUnit::const_succ_iterator I = Cur->Succs.begin(),
+ E = Cur->Succs.end(); I != E; ++I) {
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isHeightCurrent)
+ MaxSuccHeight = std::max(MaxSuccHeight,
+ SuccSU->Height + I->getLatency());
+ else {
+ Done = false;
+ WorkList.push_back(SuccSU);
+ }
+ }
+
+ if (Done) {
+ WorkList.pop_back();
+ if (MaxSuccHeight != Cur->Height) {
+ Cur->setHeightDirty();
+ Cur->Height = MaxSuccHeight;
+ }
+ Cur->isHeightCurrent = true;
+ }
+ } while (!WorkList.empty());
+}
+
+/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
+/// a group of nodes flagged together.
+void SUnit::dump(const ScheduleDAG *G) const {
+ cerr << "SU(" << NodeNum << "): ";
+ G->dumpNode(this);
+}
+
+void SUnit::dumpAll(const ScheduleDAG *G) const {
+ dump(G);
+
+ cerr << " # preds left : " << NumPredsLeft << "\n";
+ cerr << " # succs left : " << NumSuccsLeft << "\n";
+ cerr << " Latency : " << Latency << "\n";
+ cerr << " Depth : " << Depth << "\n";
+ cerr << " Height : " << Height << "\n";
+
+ if (Preds.size() != 0) {
+ cerr << " Predecessors:\n";
+ for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I) {
+ cerr << " ";
+ switch (I->getKind()) {
+ case SDep::Data: cerr << "val "; break;
+ case SDep::Anti: cerr << "anti"; break;
+ case SDep::Output: cerr << "out "; break;
+ case SDep::Order: cerr << "ch "; break;
+ }
+ cerr << "#";
+ cerr << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+ if (I->isArtificial())
+ cerr << " *";
+ cerr << "\n";
+ }
+ }
+ if (Succs.size() != 0) {
+ cerr << " Successors:\n";
+ for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end();
+ I != E; ++I) {
+ cerr << " ";
+ switch (I->getKind()) {
+ case SDep::Data: cerr << "val "; break;
+ case SDep::Anti: cerr << "anti"; break;
+ case SDep::Output: cerr << "out "; break;
+ case SDep::Order: cerr << "ch "; break;
+ }
+ cerr << "#";
+ cerr << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+ if (I->isArtificial())
+ cerr << " *";
+ cerr << "\n";
+ }
+ }
+ cerr << "\n";
+}
+
+#ifndef NDEBUG
+/// VerifySchedule - Verify that all SUnits were scheduled and that
+/// their state is consistent.
+///
+void ScheduleDAG::VerifySchedule(bool isBottomUp) {
+ bool AnyNotSched = false;
+ unsigned DeadNodes = 0;
+ unsigned Noops = 0;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ if (!SUnits[i].isScheduled) {
+ if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
+ ++DeadNodes;
+ continue;
+ }
+ if (!AnyNotSched)
+ cerr << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ cerr << "has not been scheduled!\n";
+ AnyNotSched = true;
+ }
+ if (SUnits[i].isScheduled &&
+ (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getHeight()) >
+ unsigned(INT_MAX)) {
+ if (!AnyNotSched)
+ cerr << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ cerr << "has an unexpected "
+ << (isBottomUp ? "Height" : "Depth") << " value!\n";
+ AnyNotSched = true;
+ }
+ if (isBottomUp) {
+ if (SUnits[i].NumSuccsLeft != 0) {
+ if (!AnyNotSched)
+ cerr << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ cerr << "has successors left!\n";
+ AnyNotSched = true;
+ }
+ } else {
+ if (SUnits[i].NumPredsLeft != 0) {
+ if (!AnyNotSched)
+ cerr << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ cerr << "has predecessors left!\n";
+ AnyNotSched = true;
+ }
+ }
+ }
+ for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+ if (!Sequence[i])
+ ++Noops;
+ assert(!AnyNotSched);
+ assert(Sequence.size() + DeadNodes - Noops == SUnits.size() &&
+ "The number of nodes scheduled doesn't match the expected number!");
+}
+#endif
+
+/// InitDAGTopologicalSorting - create the initial topological
+/// ordering from the DAG to be scheduled.
+///
+/// The idea of the algorithm is taken from
+/// "Online algorithms for managing the topological order of
+/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
+/// This is the MNR algorithm, which was first introduced by
+/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
+/// "Maintaining a topological order under edge insertions".
+///
+/// Short description of the algorithm:
+///
+/// Topological ordering, ord, of a DAG maps each node to a topological
+/// index so that for all edges X->Y it is the case that ord(X) < ord(Y).
+///
+/// This means that if there is a path from the node X to the node Z,
+/// then ord(X) < ord(Z).
+///
+/// This property can be used to check for reachability of nodes:
+/// if Z is reachable from X, then an insertion of the edge Z->X would
+/// create a cycle.
+///
+/// The algorithm first computes a topological ordering for the DAG by
+/// initializing the Index2Node and Node2Index arrays and then tries to keep
+/// the ordering up-to-date after edge insertions by reordering the DAG.
+///
+/// On insertion of the edge X->Y, the algorithm first marks by calling DFS
+/// the nodes reachable from Y, and then shifts them using Shift to lie
+/// immediately after X in Index2Node.
+void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
+ unsigned DAGSize = SUnits.size();
+ std::vector<SUnit*> WorkList;
+ WorkList.reserve(DAGSize);
+
+ Index2Node.resize(DAGSize);
+ Node2Index.resize(DAGSize);
+
+ // Initialize the data structures.
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ int NodeNum = SU->NodeNum;
+ unsigned Degree = SU->Succs.size();
+ // Temporarily use the Node2Index array as scratch space for degree counts.
+ Node2Index[NodeNum] = Degree;
+
+ // Is it a node without dependencies?
+ if (Degree == 0) {
+ assert(SU->Succs.empty() && "SUnit should have no successors");
+ // Collect leaf nodes.
+ WorkList.push_back(SU);
+ }
+ }
+
+ int Id = DAGSize;
+ while (!WorkList.empty()) {
+ SUnit *SU = WorkList.back();
+ WorkList.pop_back();
+ Allocate(SU->NodeNum, --Id);
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit *SU = I->getSUnit();
+ if (!--Node2Index[SU->NodeNum])
+ // If all dependencies of the node are processed already,
+ // then the node can be computed now.
+ WorkList.push_back(SU);
+ }
+ }
+
+ Visited.resize(DAGSize);
+
+#ifndef NDEBUG
+ // Check correctness of the ordering
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] &&
+ "Wrong topological sorting");
+ }
+ }
+#endif
+}
+
+/// AddPred - Updates the topological ordering to accomodate an edge
+/// to be added from SUnit X to SUnit Y.
+void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
+ int UpperBound, LowerBound;
+ LowerBound = Node2Index[Y->NodeNum];
+ UpperBound = Node2Index[X->NodeNum];
+ bool HasLoop = false;
+ // Is Ord(X) < Ord(Y) ?
+ if (LowerBound < UpperBound) {
+ // Update the topological order.
+ Visited.reset();
+ DFS(Y, UpperBound, HasLoop);
+ assert(!HasLoop && "Inserted edge creates a loop!");
+ // Recompute topological indexes.
+ Shift(Visited, LowerBound, UpperBound);
+ }
+}
+
+/// RemovePred - Updates the topological ordering to accomodate an
+/// an edge to be removed from the specified node N from the predecessors
+/// of the current node M.
+void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
+ // InitDAGTopologicalSorting();
+}
+
+/// DFS - Make a DFS traversal to mark all nodes reachable from SU and mark
+/// all nodes affected by the edge insertion. These nodes will later get new
+/// topological indexes by means of the Shift method.
+void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
+ bool& HasLoop) {
+ std::vector<const SUnit*> WorkList;
+ WorkList.reserve(SUnits.size());
+
+ WorkList.push_back(SU);
+ do {
+ SU = WorkList.back();
+ WorkList.pop_back();
+ Visited.set(SU->NodeNum);
+ for (int I = SU->Succs.size()-1; I >= 0; --I) {
+ int s = SU->Succs[I].getSUnit()->NodeNum;
+ if (Node2Index[s] == UpperBound) {
+ HasLoop = true;
+ return;
+ }
+ // Visit successors if not already and in affected region.
+ if (!Visited.test(s) && Node2Index[s] < UpperBound) {
+ WorkList.push_back(SU->Succs[I].getSUnit());
+ }
+ }
+ } while (!WorkList.empty());
+}
+
+/// Shift - Renumber the nodes so that the topological ordering is
+/// preserved.
+void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
+ int UpperBound) {
+ std::vector<int> L;
+ int shift = 0;
+ int i;
+
+ for (i = LowerBound; i <= UpperBound; ++i) {
+ // w is node at topological index i.
+ int w = Index2Node[i];
+ if (Visited.test(w)) {
+ // Unmark.
+ Visited.reset(w);
+ L.push_back(w);
+ shift = shift + 1;
+ } else {
+ Allocate(w, i - shift);
+ }
+ }
+
+ for (unsigned j = 0; j < L.size(); ++j) {
+ Allocate(L[j], i - shift);
+ i = i + 1;
+ }
+}
+
+
+/// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
+/// create a cycle.
+bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
+ if (IsReachable(TargetSU, SU))
+ return true;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (I->isAssignedRegDep() &&
+ IsReachable(TargetSU, I->getSUnit()))
+ return true;
+ return false;
+}
+
+/// IsReachable - Checks if SU is reachable from TargetSU.
+bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
+ const SUnit *TargetSU) {
+ // If insertion of the edge SU->TargetSU would create a cycle
+ // then there is a path from TargetSU to SU.
+ int UpperBound, LowerBound;
+ LowerBound = Node2Index[TargetSU->NodeNum];
+ UpperBound = Node2Index[SU->NodeNum];
+ bool HasLoop = false;
+ // Is Ord(TargetSU) < Ord(SU) ?
+ if (LowerBound < UpperBound) {
+ Visited.reset();
+ // There may be a path from TargetSU to SU. Check for it.
+ DFS(TargetSU, UpperBound, HasLoop);
+ }
+ return HasLoop;
+}
+
+/// Allocate - assign the topological index to the node n.
+void ScheduleDAGTopologicalSort::Allocate(int n, int index) {
+ Node2Index[n] = index;
+ Index2Node[index] = n;
+}
+
+ScheduleDAGTopologicalSort::ScheduleDAGTopologicalSort(
+ std::vector<SUnit> &sunits)
+ : SUnits(sunits) {}
+
+ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {}
diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp
new file mode 100644
index 0000000..770f5bb
--- /dev/null
+++ b/lib/CodeGen/ScheduleDAGEmit.cpp
@@ -0,0 +1,71 @@
+//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the Emit routines for the ScheduleDAG class, which creates
+// MachineInstrs according to the computed schedule.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+void ScheduleDAG::AddMemOperand(MachineInstr *MI, const MachineMemOperand &MO) {
+ MI->addMemOperand(MF, MO);
+}
+
+void ScheduleDAG::EmitNoop() {
+ TII->insertNoop(*BB, InsertPos);
+}
+
+void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
+ DenseMap<SUnit*, unsigned> &VRBaseMap) {
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ if (I->getSUnit()->CopyDstRC) {
+ // Copy to physical register.
+ DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
+ assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
+ // Find the destination physical register.
+ unsigned Reg = 0;
+ for (SUnit::const_succ_iterator II = SU->Succs.begin(),
+ EE = SU->Succs.end(); II != EE; ++II) {
+ if (II->getReg()) {
+ Reg = II->getReg();
+ break;
+ }
+ }
+ TII->copyRegToReg(*BB, InsertPos, Reg, VRI->second,
+ SU->CopyDstRC, SU->CopySrcRC);
+ } else {
+ // Copy from physical register.
+ assert(I->getReg() && "Unknown physical register!");
+ unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
+ bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ TII->copyRegToReg(*BB, InsertPos, VRBase, I->getReg(),
+ SU->CopyDstRC, SU->CopySrcRC);
+ }
+ break;
+ }
+}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
new file mode 100644
index 0000000..8e18b3d
--- /dev/null
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -0,0 +1,468 @@
+//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAGInstrs class, which implements re-scheduling
+// of MachineInstrs.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sched-instrs"
+#include "ScheduleDAGInstrs.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+using namespace llvm;
+
+ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
+ const MachineLoopInfo &mli,
+ const MachineDominatorTree &mdt)
+ : ScheduleDAG(mf), MLI(mli), MDT(mdt), LoopRegs(MLI, MDT) {}
+
+/// Run - perform scheduling.
+///
+void ScheduleDAGInstrs::Run(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount) {
+ BB = bb;
+ Begin = begin;
+ InsertPosIndex = endcount;
+
+ ScheduleDAG::Run(bb, end);
+}
+
+/// getOpcode - If this is an Instruction or a ConstantExpr, return the
+/// opcode value. Otherwise return UserOp1.
+static unsigned getOpcode(const Value *V) {
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ return I->getOpcode();
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ return CE->getOpcode();
+ // Use UserOp1 to mean there's no opcode.
+ return Instruction::UserOp1;
+}
+
+/// getUnderlyingObjectFromInt - This is the function that does the work of
+/// looking through basic ptrtoint+arithmetic+inttoptr sequences.
+static const Value *getUnderlyingObjectFromInt(const Value *V) {
+ do {
+ if (const User *U = dyn_cast<User>(V)) {
+ // If we find a ptrtoint, we can transfer control back to the
+ // regular getUnderlyingObjectFromInt.
+ if (getOpcode(U) == Instruction::PtrToInt)
+ return U->getOperand(0);
+ // If we find an add of a constant or a multiplied value, it's
+ // likely that the other operand will lead us to the base
+ // object. We don't have to worry about the case where the
+ // object address is somehow being computed bt the multiply,
+ // because our callers only care when the result is an
+ // identifibale object.
+ if (getOpcode(U) != Instruction::Add ||
+ (!isa<ConstantInt>(U->getOperand(1)) &&
+ getOpcode(U->getOperand(1)) != Instruction::Mul))
+ return V;
+ V = U->getOperand(0);
+ } else {
+ return V;
+ }
+ assert(isa<IntegerType>(V->getType()) && "Unexpected operand type!");
+ } while (1);
+}
+
+/// getUnderlyingObject - This is a wrapper around Value::getUnderlyingObject
+/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
+static const Value *getUnderlyingObject(const Value *V) {
+ // First just call Value::getUnderlyingObject to let it do what it does.
+ do {
+ V = V->getUnderlyingObject();
+ // If it found an inttoptr, use special code to continue climing.
+ if (getOpcode(V) != Instruction::IntToPtr)
+ break;
+ const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
+ // If that succeeded in finding a pointer, continue the search.
+ if (!isa<PointerType>(O->getType()))
+ break;
+ V = O;
+ } while (1);
+ return V;
+}
+
+/// getUnderlyingObjectForInstr - If this machine instr has memory reference
+/// information and it can be tracked to a normal reference to a known
+/// object, return the Value for that object. Otherwise return null.
+static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI) {
+ if (!MI->hasOneMemOperand() ||
+ !MI->memoperands_begin()->getValue() ||
+ MI->memoperands_begin()->isVolatile())
+ return 0;
+
+ const Value *V = MI->memoperands_begin()->getValue();
+ if (!V)
+ return 0;
+
+ V = getUnderlyingObject(V);
+ if (!isa<PseudoSourceValue>(V) && !isIdentifiedObject(V))
+ return 0;
+
+ return V;
+}
+
+void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
+ if (MachineLoop *ML = MLI.getLoopFor(BB))
+ if (BB == ML->getLoopLatch()) {
+ MachineBasicBlock *Header = ML->getHeader();
+ for (MachineBasicBlock::livein_iterator I = Header->livein_begin(),
+ E = Header->livein_end(); I != E; ++I)
+ LoopLiveInRegs.insert(*I);
+ LoopRegs.VisitLoop(ML);
+ }
+}
+
+void ScheduleDAGInstrs::BuildSchedGraph() {
+ // We'll be allocating one SUnit for each instruction, plus one for
+ // the region exit node.
+ SUnits.reserve(BB->size());
+
+ // We build scheduling units by walking a block's instruction list from bottom
+ // to top.
+
+ // Remember where a generic side-effecting instruction is as we procede. If
+ // ChainMMO is null, this is assumed to have arbitrary side-effects. If
+ // ChainMMO is non-null, then Chain makes only a single memory reference.
+ SUnit *Chain = 0;
+ MachineMemOperand *ChainMMO = 0;
+
+ // Memory references to specific known memory locations are tracked so that
+ // they can be given more precise dependencies.
+ std::map<const Value *, SUnit *> MemDefs;
+ std::map<const Value *, std::vector<SUnit *> > MemUses;
+
+ // Check to see if the scheduler cares about latencies.
+ bool UnitLatencies = ForceUnitLatencies();
+
+ // Ask the target if address-backscheduling is desirable, and if so how much.
+ unsigned SpecialAddressLatency =
+ TM.getSubtarget<TargetSubtarget>().getSpecialAddressLatency();
+
+ // Walk the list of instructions, from bottom moving up.
+ for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
+ MII != MIE; --MII) {
+ MachineInstr *MI = prior(MII);
+ const TargetInstrDesc &TID = MI->getDesc();
+ assert(!TID.isTerminator() && !MI->isLabel() &&
+ "Cannot schedule terminators or labels!");
+ // Create the SUnit for this MI.
+ SUnit *SU = NewSUnit(MI);
+
+ // Assign the Latency field of SU using target-provided information.
+ if (UnitLatencies)
+ SU->Latency = 1;
+ else
+ ComputeLatency(SU);
+
+ // Add register-based dependencies (data, anti, and output).
+ for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
+ const MachineOperand &MO = MI->getOperand(j);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
+ std::vector<SUnit *> &UseList = Uses[Reg];
+ std::vector<SUnit *> &DefList = Defs[Reg];
+ // Optionally add output and anti dependencies.
+ // TODO: Using a latency of 1 here assumes there's no cost for
+ // reusing registers.
+ SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
+ for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
+ SUnit *DefSU = DefList[i];
+ if (DefSU != SU &&
+ (Kind != SDep::Output || !MO.isDead() ||
+ !DefSU->getInstr()->registerDefIsDead(Reg)))
+ DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/Reg));
+ }
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ std::vector<SUnit *> &DefList = Defs[*Alias];
+ for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
+ SUnit *DefSU = DefList[i];
+ if (DefSU != SU &&
+ (Kind != SDep::Output || !MO.isDead() ||
+ !DefSU->getInstr()->registerDefIsDead(Reg)))
+ DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/ *Alias));
+ }
+ }
+
+ if (MO.isDef()) {
+ // Add any data dependencies.
+ unsigned DataLatency = SU->Latency;
+ for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
+ SUnit *UseSU = UseList[i];
+ if (UseSU != SU) {
+ unsigned LDataLatency = DataLatency;
+ // Optionally add in a special extra latency for nodes that
+ // feed addresses.
+ // TODO: Do this for register aliases too.
+ if (SpecialAddressLatency != 0 && !UnitLatencies) {
+ MachineInstr *UseMI = UseSU->getInstr();
+ const TargetInstrDesc &UseTID = UseMI->getDesc();
+ int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
+ assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
+ if ((UseTID.mayLoad() || UseTID.mayStore()) &&
+ (unsigned)RegUseIndex < UseTID.getNumOperands() &&
+ UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
+ LDataLatency += SpecialAddressLatency;
+ }
+ UseSU->addPred(SDep(SU, SDep::Data, LDataLatency, Reg));
+ }
+ }
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ std::vector<SUnit *> &UseList = Uses[*Alias];
+ for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
+ SUnit *UseSU = UseList[i];
+ if (UseSU != SU)
+ UseSU->addPred(SDep(SU, SDep::Data, DataLatency, *Alias));
+ }
+ }
+
+ // If a def is going to wrap back around to the top of the loop,
+ // backschedule it.
+ if (!UnitLatencies && DefList.empty()) {
+ LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg);
+ if (I != LoopRegs.Deps.end()) {
+ const MachineOperand *UseMO = I->second.first;
+ unsigned Count = I->second.second;
+ const MachineInstr *UseMI = UseMO->getParent();
+ unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
+ const TargetInstrDesc &UseTID = UseMI->getDesc();
+ // TODO: If we knew the total depth of the region here, we could
+ // handle the case where the whole loop is inside the region but
+ // is large enough that the isScheduleHigh trick isn't needed.
+ if (UseMOIdx < UseTID.getNumOperands()) {
+ // Currently, we only support scheduling regions consisting of
+ // single basic blocks. Check to see if the instruction is in
+ // the same region by checking to see if it has the same parent.
+ if (UseMI->getParent() != MI->getParent()) {
+ unsigned Latency = SU->Latency;
+ if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass())
+ Latency += SpecialAddressLatency;
+ // This is a wild guess as to the portion of the latency which
+ // will be overlapped by work done outside the current
+ // scheduling region.
+ Latency -= std::min(Latency, Count);
+ // Add the artifical edge.
+ ExitSU.addPred(SDep(SU, SDep::Order, Latency,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ } else if (SpecialAddressLatency > 0 &&
+ UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
+ // The entire loop body is within the current scheduling region
+ // and the latency of this operation is assumed to be greater
+ // than the latency of the loop.
+ // TODO: Recursively mark data-edge predecessors as
+ // isScheduleHigh too.
+ SU->isScheduleHigh = true;
+ }
+ }
+ LoopRegs.Deps.erase(I);
+ }
+ }
+
+ UseList.clear();
+ if (!MO.isDead())
+ DefList.clear();
+ DefList.push_back(SU);
+ } else {
+ UseList.push_back(SU);
+ }
+ }
+
+ // Add chain dependencies.
+ // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
+ // after stack slots are lowered to actual addresses.
+ // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
+ // produce more precise dependence information.
+ if (TID.isCall() || TID.hasUnmodeledSideEffects()) {
+ new_chain:
+ // This is the conservative case. Add dependencies on all memory
+ // references.
+ if (Chain)
+ Chain->addPred(SDep(SU, SDep::Order, SU->Latency));
+ Chain = SU;
+ for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+ PendingLoads[k]->addPred(SDep(SU, SDep::Order, SU->Latency));
+ PendingLoads.clear();
+ for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(),
+ E = MemDefs.end(); I != E; ++I) {
+ I->second->addPred(SDep(SU, SDep::Order, SU->Latency));
+ I->second = SU;
+ }
+ for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
+ MemUses.begin(), E = MemUses.end(); I != E; ++I) {
+ for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+ I->second[i]->addPred(SDep(SU, SDep::Order, SU->Latency));
+ I->second.clear();
+ }
+ // See if it is known to just have a single memory reference.
+ MachineInstr *ChainMI = Chain->getInstr();
+ const TargetInstrDesc &ChainTID = ChainMI->getDesc();
+ if (!ChainTID.isCall() &&
+ !ChainTID.hasUnmodeledSideEffects() &&
+ ChainMI->hasOneMemOperand() &&
+ !ChainMI->memoperands_begin()->isVolatile() &&
+ ChainMI->memoperands_begin()->getValue())
+ // We know that the Chain accesses one specific memory location.
+ ChainMMO = &*ChainMI->memoperands_begin();
+ else
+ // Unknown memory accesses. Assume the worst.
+ ChainMMO = 0;
+ } else if (TID.mayStore()) {
+ if (const Value *V = getUnderlyingObjectForInstr(MI)) {
+ // A store to a specific PseudoSourceValue. Add precise dependencies.
+ // Handle the def in MemDefs, if there is one.
+ std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V);
+ if (I != MemDefs.end()) {
+ I->second->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0,
+ /*isNormalMemory=*/true));
+ I->second = SU;
+ } else {
+ MemDefs[V] = SU;
+ }
+ // Handle the uses in MemUses, if there are any.
+ std::map<const Value *, std::vector<SUnit *> >::iterator J =
+ MemUses.find(V);
+ if (J != MemUses.end()) {
+ for (unsigned i = 0, e = J->second.size(); i != e; ++i)
+ J->second[i]->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0,
+ /*isNormalMemory=*/true));
+ J->second.clear();
+ }
+ // Add dependencies from all the PendingLoads, since without
+ // memoperands we must assume they alias anything.
+ for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+ PendingLoads[k]->addPred(SDep(SU, SDep::Order, SU->Latency));
+ // Add a general dependence too, if needed.
+ if (Chain)
+ Chain->addPred(SDep(SU, SDep::Order, SU->Latency));
+ } else
+ // Treat all other stores conservatively.
+ goto new_chain;
+ } else if (TID.mayLoad()) {
+ if (TII->isInvariantLoad(MI)) {
+ // Invariant load, no chain dependencies needed!
+ } else if (const Value *V = getUnderlyingObjectForInstr(MI)) {
+ // A load from a specific PseudoSourceValue. Add precise dependencies.
+ std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V);
+ if (I != MemDefs.end())
+ I->second->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0,
+ /*isNormalMemory=*/true));
+ MemUses[V].push_back(SU);
+
+ // Add a general dependence too, if needed.
+ if (Chain && (!ChainMMO ||
+ (ChainMMO->isStore() || ChainMMO->isVolatile())))
+ Chain->addPred(SDep(SU, SDep::Order, SU->Latency));
+ } else if (MI->hasVolatileMemoryRef()) {
+ // Treat volatile loads conservatively. Note that this includes
+ // cases where memoperand information is unavailable.
+ goto new_chain;
+ } else {
+ // A normal load. Depend on the general chain, as well as on
+ // all stores. In the absense of MachineMemOperand information,
+ // we can't even assume that the load doesn't alias well-behaved
+ // memory locations.
+ if (Chain)
+ Chain->addPred(SDep(SU, SDep::Order, SU->Latency));
+ for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(),
+ E = MemDefs.end(); I != E; ++I)
+ I->second->addPred(SDep(SU, SDep::Order, SU->Latency));
+ PendingLoads.push_back(SU);
+ }
+ }
+ }
+
+ for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+ Defs[i].clear();
+ Uses[i].clear();
+ }
+ PendingLoads.clear();
+}
+
+void ScheduleDAGInstrs::FinishBlock() {
+ // Nothing to do.
+}
+
+void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
+ const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+
+ // Compute the latency for the node. We use the sum of the latencies for
+ // all nodes flagged together into this SUnit.
+ SU->Latency =
+ InstrItins.getLatency(SU->getInstr()->getDesc().getSchedClass());
+
+ // Simplistic target-independent heuristic: assume that loads take
+ // extra time.
+ if (InstrItins.isEmpty())
+ if (SU->getInstr()->getDesc().mayLoad())
+ SU->Latency += 2;
+}
+
+void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
+ SU->getInstr()->dump();
+}
+
+std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
+ std::string s;
+ raw_string_ostream oss(s);
+ if (SU == &EntrySU)
+ oss << "<entry>";
+ else if (SU == &ExitSU)
+ oss << "<exit>";
+ else
+ SU->getInstr()->print(oss);
+ return oss.str();
+}
+
+// EmitSchedule - Emit the machine code in scheduled order.
+MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
+ // For MachineInstr-based scheduling, we're rescheduling the instructions in
+ // the block, so start by removing them from the block.
+ while (Begin != InsertPos) {
+ MachineBasicBlock::iterator I = Begin;
+ ++Begin;
+ BB->remove(I);
+ }
+
+ // Then re-insert them according to the given schedule.
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ SUnit *SU = Sequence[i];
+ if (!SU) {
+ // Null SUnit* is a noop.
+ EmitNoop();
+ continue;
+ }
+
+ BB->insert(InsertPos, SU->getInstr());
+ }
+
+ // Update the Begin iterator, as the first instruction in the block
+ // may have been scheduled later.
+ if (!Sequence.empty())
+ Begin = Sequence[0]->getInstr();
+
+ return BB;
+}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
new file mode 100644
index 0000000..00d6268
--- /dev/null
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@@ -0,0 +1,184 @@
+//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScheduleDAGInstrs class, which implements
+// scheduling for a MachineInstr-based dependency graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCHEDULEDAGINSTRS_H
+#define SCHEDULEDAGINSTRS_H
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <map>
+
+namespace llvm {
+ class MachineLoopInfo;
+ class MachineDominatorTree;
+
+ /// LoopDependencies - This class analyzes loop-oriented register
+ /// dependencies, which are used to guide scheduling decisions.
+ /// For example, loop induction variable increments should be
+ /// scheduled as soon as possible after the variable's last use.
+ ///
+ class VISIBILITY_HIDDEN LoopDependencies {
+ const MachineLoopInfo &MLI;
+ const MachineDominatorTree &MDT;
+
+ public:
+ typedef std::map<unsigned, std::pair<const MachineOperand *, unsigned> >
+ LoopDeps;
+ LoopDeps Deps;
+
+ LoopDependencies(const MachineLoopInfo &mli,
+ const MachineDominatorTree &mdt) :
+ MLI(mli), MDT(mdt) {}
+
+ /// VisitLoop - Clear out any previous state and analyze the given loop.
+ ///
+ void VisitLoop(const MachineLoop *Loop) {
+ Deps.clear();
+ MachineBasicBlock *Header = Loop->getHeader();
+ SmallSet<unsigned, 8> LoopLiveIns;
+ for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(),
+ LE = Header->livein_end(); LI != LE; ++LI)
+ LoopLiveIns.insert(*LI);
+
+ const MachineDomTreeNode *Node = MDT.getNode(Header);
+ const MachineBasicBlock *MBB = Node->getBlock();
+ assert(Loop->contains(MBB) &&
+ "Loop does not contain header!");
+ VisitRegion(Node, MBB, Loop, LoopLiveIns);
+ }
+
+ private:
+ void VisitRegion(const MachineDomTreeNode *Node,
+ const MachineBasicBlock *MBB,
+ const MachineLoop *Loop,
+ const SmallSet<unsigned, 8> &LoopLiveIns) {
+ unsigned Count = 0;
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I, ++Count) {
+ const MachineInstr *MI = I;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (LoopLiveIns.count(MOReg))
+ Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count)));
+ }
+ }
+
+ const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+ for (std::vector<MachineDomTreeNode*>::const_iterator I =
+ Children.begin(), E = Children.end(); I != E; ++I) {
+ const MachineDomTreeNode *ChildNode = *I;
+ MachineBasicBlock *ChildBlock = ChildNode->getBlock();
+ if (Loop->contains(ChildBlock))
+ VisitRegion(ChildNode, ChildBlock, Loop, LoopLiveIns);
+ }
+ }
+ };
+
+ /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
+ /// MachineInstrs.
+ class VISIBILITY_HIDDEN ScheduleDAGInstrs : public ScheduleDAG {
+ const MachineLoopInfo &MLI;
+ const MachineDominatorTree &MDT;
+
+ /// Defs, Uses - Remember where defs and uses of each physical register
+ /// are as we iterate upward through the instructions. This is allocated
+ /// here instead of inside BuildSchedGraph to avoid the need for it to be
+ /// initialized and destructed for each block.
+ std::vector<SUnit *> Defs[TargetRegisterInfo::FirstVirtualRegister];
+ std::vector<SUnit *> Uses[TargetRegisterInfo::FirstVirtualRegister];
+
+ /// PendingLoads - Remember where unknown loads are after the most recent
+ /// unknown store, as we iterate. As with Defs and Uses, this is here
+ /// to minimize construction/destruction.
+ std::vector<SUnit *> PendingLoads;
+
+ /// LoopRegs - Track which registers are used for loop-carried dependencies.
+ ///
+ LoopDependencies LoopRegs;
+
+ /// LoopLiveInRegs - Track which regs are live into a loop, to help guide
+ /// back-edge-aware scheduling.
+ ///
+ SmallSet<unsigned, 8> LoopLiveInRegs;
+
+ public:
+ MachineBasicBlock *BB; // Current basic block
+ MachineBasicBlock::iterator Begin; // The beginning of the range to
+ // be scheduled. The range extends
+ // to InsertPos.
+ unsigned InsertPosIndex; // The index in BB of InsertPos.
+
+ explicit ScheduleDAGInstrs(MachineFunction &mf,
+ const MachineLoopInfo &mli,
+ const MachineDominatorTree &mdt);
+
+ virtual ~ScheduleDAGInstrs() {}
+
+ /// NewSUnit - Creates a new SUnit and return a ptr to it.
+ ///
+ SUnit *NewSUnit(MachineInstr *MI) {
+#ifndef NDEBUG
+ const SUnit *Addr = SUnits.empty() ? 0 : &SUnits[0];
+#endif
+ SUnits.push_back(SUnit(MI, (unsigned)SUnits.size()));
+ assert((Addr == 0 || Addr == &SUnits[0]) &&
+ "SUnits std::vector reallocated on the fly!");
+ SUnits.back().OrigNode = &SUnits.back();
+ return &SUnits.back();
+ }
+
+ /// Run - perform scheduling.
+ ///
+ void Run(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endindex);
+
+ /// BuildSchedGraph - Build SUnits from the MachineBasicBlock that we are
+ /// input.
+ virtual void BuildSchedGraph();
+
+ /// ComputeLatency - Compute node latency.
+ ///
+ virtual void ComputeLatency(SUnit *SU);
+
+ virtual MachineBasicBlock *EmitSchedule();
+
+ /// StartBlock - Prepare to perform scheduling in the given block.
+ ///
+ virtual void StartBlock(MachineBasicBlock *BB);
+
+ /// Schedule - Order nodes according to selected style, filling
+ /// in the Sequence member.
+ ///
+ virtual void Schedule() = 0;
+
+ /// FinishBlock - Clean up after scheduling in the given block.
+ ///
+ virtual void FinishBlock();
+
+ virtual void dumpNode(const SUnit *SU) const;
+
+ virtual std::string getGraphNodeLabel(const SUnit *SU) const;
+ };
+}
+
+#endif
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
new file mode 100644
index 0000000..594c24d
--- /dev/null
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -0,0 +1,97 @@
+//===-- ScheduleDAGPrinter.cpp - Implement ScheduleDAG::viewGraph() -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+#include <fstream>
+using namespace llvm;
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<ScheduleDAG*> : public DefaultDOTGraphTraits {
+ static std::string getGraphName(const ScheduleDAG *G) {
+ return G->MF.getFunction()->getName();
+ }
+
+ static bool renderGraphFromBottomUp() {
+ return true;
+ }
+
+ static bool hasNodeAddressLabel(const SUnit *Node,
+ const ScheduleDAG *Graph) {
+ return true;
+ }
+
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ static std::string getEdgeAttributes(const SUnit *Node,
+ SUnitIterator EI) {
+ if (EI.isArtificialDep())
+ return "color=cyan,style=dashed";
+ if (EI.isCtrlDep())
+ return "color=blue,style=dashed";
+ return "";
+ }
+
+
+ static std::string getNodeLabel(const SUnit *Node,
+ const ScheduleDAG *Graph);
+ static std::string getNodeAttributes(const SUnit *N,
+ const ScheduleDAG *Graph) {
+ return "shape=Mrecord";
+ }
+
+ static void addCustomGraphFeatures(ScheduleDAG *G,
+ GraphWriter<ScheduleDAG*> &GW) {
+ return G->addCustomGraphFeatures(GW);
+ }
+ };
+}
+
+std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
+ const ScheduleDAG *G) {
+ return G->getGraphNodeLabel(SU);
+}
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void ScheduleDAG::viewGraph() {
+// This code is only for debugging!
+#ifndef NDEBUG
+ if (BB->getBasicBlock())
+ ViewGraph(this, "dag." + MF.getFunction()->getName(),
+ "Scheduling-Units Graph for " + MF.getFunction()->getName() + ':' +
+ BB->getBasicBlock()->getName());
+ else
+ ViewGraph(this, "dag." + MF.getFunction()->getName(),
+ "Scheduling-Units Graph for " + MF.getFunction()->getName());
+#else
+ cerr << "ScheduleDAG::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
new file mode 100644
index 0000000..9ea59ea
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -0,0 +1,22 @@
+add_llvm_library(LLVMSelectionDAG
+ CallingConvLower.cpp
+ DAGCombiner.cpp
+ FastISel.cpp
+ LegalizeDAG.cpp
+ LegalizeFloatTypes.cpp
+ LegalizeIntegerTypes.cpp
+ LegalizeTypes.cpp
+ LegalizeTypesGeneric.cpp
+ LegalizeVectorOps.cpp
+ LegalizeVectorTypes.cpp
+ ScheduleDAGSDNodes.cpp
+ ScheduleDAGSDNodesEmit.cpp
+ ScheduleDAGFast.cpp
+ ScheduleDAGList.cpp
+ ScheduleDAGRRList.cpp
+ SelectionDAGBuild.cpp
+ SelectionDAG.cpp
+ SelectionDAGISel.cpp
+ SelectionDAGPrinter.cpp
+ TargetLowering.cpp
+ )
diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
new file mode 100644
index 0000000..7cd2b73
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
@@ -0,0 +1,148 @@
+//===-- CallingConvLower.cpp - Calling Conventions ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CCState class, used for lowering and implementing
+// calling conventions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+CCState::CCState(unsigned CC, bool isVarArg, const TargetMachine &tm,
+ SmallVector<CCValAssign, 16> &locs)
+ : CallingConv(CC), IsVarArg(isVarArg), TM(tm),
+ TRI(*TM.getRegisterInfo()), Locs(locs) {
+ // No stack is used.
+ StackOffset = 0;
+
+ UsedRegs.resize((TRI.getNumRegs()+31)/32);
+}
+
+// HandleByVal - Allocate a stack slot large enough to pass an argument by
+// value. The size and alignment information of the argument is encoded in its
+// parameter attribute.
+void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ int MinSize, int MinAlign,
+ ISD::ArgFlagsTy ArgFlags) {
+ unsigned Align = ArgFlags.getByValAlign();
+ unsigned Size = ArgFlags.getByValSize();
+ if (MinSize > (int)Size)
+ Size = MinSize;
+ if (MinAlign > (int)Align)
+ Align = MinAlign;
+ unsigned Offset = AllocateStack(Size, Align);
+
+ addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+}
+
+/// MarkAllocated - Mark a register and all of its aliases as allocated.
+void CCState::MarkAllocated(unsigned Reg) {
+ UsedRegs[Reg/32] |= 1 << (Reg&31);
+
+ if (const unsigned *RegAliases = TRI.getAliasSet(Reg))
+ for (; (Reg = *RegAliases); ++RegAliases)
+ UsedRegs[Reg/32] |= 1 << (Reg&31);
+}
+
+/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
+/// incorporating info about the formals into this state.
+void CCState::AnalyzeFormalArguments(SDNode *TheArgs, CCAssignFn Fn) {
+ unsigned NumArgs = TheArgs->getNumValues()-1;
+
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ MVT ArgVT = TheArgs->getValueType(i);
+ ISD::ArgFlagsTy ArgFlags =
+ cast<ARG_FLAGSSDNode>(TheArgs->getOperand(3+i))->getArgFlags();
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+ cerr << "Formal argument #" << i << " has unhandled type "
+ << ArgVT.getMVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
+/// incorporating info about the result values into this state.
+void CCState::AnalyzeReturn(SDNode *TheRet, CCAssignFn Fn) {
+ // Determine which register each value should be copied into.
+ for (unsigned i = 0, e = TheRet->getNumOperands() / 2; i != e; ++i) {
+ MVT VT = TheRet->getOperand(i*2+1).getValueType();
+ ISD::ArgFlagsTy ArgFlags =
+ cast<ARG_FLAGSSDNode>(TheRet->getOperand(i*2+2))->getArgFlags();
+ if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)){
+ cerr << "Return operand #" << i << " has unhandled type "
+ << VT.getMVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+
+/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
+/// about the passed values into this state.
+void CCState::AnalyzeCallOperands(CallSDNode *TheCall, CCAssignFn Fn) {
+ unsigned NumOps = TheCall->getNumArgs();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ MVT ArgVT = TheCall->getArg(i).getValueType();
+ ISD::ArgFlagsTy ArgFlags = TheCall->getArgFlags(i);
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+ cerr << "Call operand #" << i << " has unhandled type "
+ << ArgVT.getMVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeCallOperands - Same as above except it takes vectors of types
+/// and argument flags.
+void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+ CCAssignFn Fn) {
+ unsigned NumOps = ArgVTs.size();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ MVT ArgVT = ArgVTs[i];
+ ISD::ArgFlagsTy ArgFlags = Flags[i];
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+ cerr << "Call operand #" << i << " has unhandled type "
+ << ArgVT.getMVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
+/// incorporating info about the passed values into this state.
+void CCState::AnalyzeCallResult(CallSDNode *TheCall, CCAssignFn Fn) {
+ for (unsigned i = 0, e = TheCall->getNumRetVals(); i != e; ++i) {
+ MVT VT = TheCall->getRetValType(i);
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (TheCall->isInreg())
+ Flags.setInReg();
+ if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
+ cerr << "Call result #" << i << " has unhandled type "
+ << VT.getMVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeCallResult - Same as above except it's specialized for calls which
+/// produce a single value.
+void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
+ if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
+ cerr << "Call result has unhandled type "
+ << VT.getMVTString() << "\n";
+ abort();
+ }
+}
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
new file mode 100644
index 0000000..4c1710d
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -0,0 +1,6203 @@
+//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
+// both before and after the DAG is legalized.
+//
+// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
+// primarily intended to handle simplification opportunities that are implicit
+// in the LLVM IR and exposed by the various codegen lowering phases.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dagcombine"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NodesCombined , "Number of dag nodes combined");
+STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
+STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
+STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
+
+namespace {
+ static cl::opt<bool>
+ CombinerAA("combiner-alias-analysis", cl::Hidden,
+ cl::desc("Turn on alias analysis during testing"));
+
+ static cl::opt<bool>
+ CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
+ cl::desc("Include global information in alias analysis"));
+
+//------------------------------ DAGCombiner ---------------------------------//
+
+ class VISIBILITY_HIDDEN DAGCombiner {
+ SelectionDAG &DAG;
+ const TargetLowering &TLI;
+ CombineLevel Level;
+ CodeGenOpt::Level OptLevel;
+ bool LegalOperations;
+ bool LegalTypes;
+
+ // Worklist of all of the nodes that need to be simplified.
+ std::vector<SDNode*> WorkList;
+
+ // AA - Used for DAG load/store alias analysis.
+ AliasAnalysis &AA;
+
+ /// AddUsersToWorkList - When an instruction is simplified, add all users of
+ /// the instruction to the work lists because they might get more simplified
+ /// now.
+ ///
+ void AddUsersToWorkList(SDNode *N) {
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI)
+ AddToWorkList(*UI);
+ }
+
+ /// visit - call the node-specific routine that knows how to fold each
+ /// particular type of node.
+ SDValue visit(SDNode *N);
+
+ public:
+ /// AddToWorkList - Add to the work list making sure it's instance is at the
+ /// the back (next to be processed.)
+ void AddToWorkList(SDNode *N) {
+ removeFromWorkList(N);
+ WorkList.push_back(N);
+ }
+
+ /// removeFromWorkList - remove all instances of N from the worklist.
+ ///
+ void removeFromWorkList(SDNode *N) {
+ WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N),
+ WorkList.end());
+ }
+
+ SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+ bool AddTo = true);
+
+ SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
+ return CombineTo(N, &Res, 1, AddTo);
+ }
+
+ SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
+ bool AddTo = true) {
+ SDValue To[] = { Res0, Res1 };
+ return CombineTo(N, To, 2, AddTo);
+ }
+
+ void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
+
+ private:
+
+ /// SimplifyDemandedBits - Check the specified integer node value to see if
+ /// it can be simplified or if things it uses can be simplified by bit
+ /// propagation. If so, return true.
+ bool SimplifyDemandedBits(SDValue Op) {
+ APInt Demanded = APInt::getAllOnesValue(Op.getValueSizeInBits());
+ return SimplifyDemandedBits(Op, Demanded);
+ }
+
+ bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
+
+ bool CombineToPreIndexedLoadStore(SDNode *N);
+ bool CombineToPostIndexedLoadStore(SDNode *N);
+
+
+ /// combine - call the node-specific routine that knows how to fold each
+ /// particular type of node. If that doesn't do anything, try the
+ /// target-specific DAG combines.
+ SDValue combine(SDNode *N);
+
+ // Visitation implementation - Implement dag node combining for different
+ // node types. The semantics are as follows:
+ // Return Value:
+ // SDValue.getNode() == 0 - No change was made
+ // SDValue.getNode() == N - N was replaced, is dead and has been handled.
+ // otherwise - N should be replaced by the returned Operand.
+ //
+ SDValue visitTokenFactor(SDNode *N);
+ SDValue visitMERGE_VALUES(SDNode *N);
+ SDValue visitADD(SDNode *N);
+ SDValue visitSUB(SDNode *N);
+ SDValue visitADDC(SDNode *N);
+ SDValue visitADDE(SDNode *N);
+ SDValue visitMUL(SDNode *N);
+ SDValue visitSDIV(SDNode *N);
+ SDValue visitUDIV(SDNode *N);
+ SDValue visitSREM(SDNode *N);
+ SDValue visitUREM(SDNode *N);
+ SDValue visitMULHU(SDNode *N);
+ SDValue visitMULHS(SDNode *N);
+ SDValue visitSMUL_LOHI(SDNode *N);
+ SDValue visitUMUL_LOHI(SDNode *N);
+ SDValue visitSDIVREM(SDNode *N);
+ SDValue visitUDIVREM(SDNode *N);
+ SDValue visitAND(SDNode *N);
+ SDValue visitOR(SDNode *N);
+ SDValue visitXOR(SDNode *N);
+ SDValue SimplifyVBinOp(SDNode *N);
+ SDValue visitSHL(SDNode *N);
+ SDValue visitSRA(SDNode *N);
+ SDValue visitSRL(SDNode *N);
+ SDValue visitCTLZ(SDNode *N);
+ SDValue visitCTTZ(SDNode *N);
+ SDValue visitCTPOP(SDNode *N);
+ SDValue visitSELECT(SDNode *N);
+ SDValue visitSELECT_CC(SDNode *N);
+ SDValue visitSETCC(SDNode *N);
+ SDValue visitSIGN_EXTEND(SDNode *N);
+ SDValue visitZERO_EXTEND(SDNode *N);
+ SDValue visitANY_EXTEND(SDNode *N);
+ SDValue visitSIGN_EXTEND_INREG(SDNode *N);
+ SDValue visitTRUNCATE(SDNode *N);
+ SDValue visitBIT_CONVERT(SDNode *N);
+ SDValue visitBUILD_PAIR(SDNode *N);
+ SDValue visitFADD(SDNode *N);
+ SDValue visitFSUB(SDNode *N);
+ SDValue visitFMUL(SDNode *N);
+ SDValue visitFDIV(SDNode *N);
+ SDValue visitFREM(SDNode *N);
+ SDValue visitFCOPYSIGN(SDNode *N);
+ SDValue visitSINT_TO_FP(SDNode *N);
+ SDValue visitUINT_TO_FP(SDNode *N);
+ SDValue visitFP_TO_SINT(SDNode *N);
+ SDValue visitFP_TO_UINT(SDNode *N);
+ SDValue visitFP_ROUND(SDNode *N);
+ SDValue visitFP_ROUND_INREG(SDNode *N);
+ SDValue visitFP_EXTEND(SDNode *N);
+ SDValue visitFNEG(SDNode *N);
+ SDValue visitFABS(SDNode *N);
+ SDValue visitBRCOND(SDNode *N);
+ SDValue visitBR_CC(SDNode *N);
+ SDValue visitLOAD(SDNode *N);
+ SDValue visitSTORE(SDNode *N);
+ SDValue visitINSERT_VECTOR_ELT(SDNode *N);
+ SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue visitBUILD_VECTOR(SDNode *N);
+ SDValue visitCONCAT_VECTORS(SDNode *N);
+ SDValue visitVECTOR_SHUFFLE(SDNode *N);
+
+ SDValue XformToShuffleWithZero(SDNode *N);
+ SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS);
+
+ SDValue visitShiftByConstant(SDNode *N, unsigned Amt);
+
+ bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
+ SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
+ SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2);
+ SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2,
+ SDValue N3, ISD::CondCode CC,
+ bool NotExtCompare = false);
+ SDValue SimplifySetCC(MVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
+ DebugLoc DL, bool foldBooleans = true);
+ SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+ unsigned HiOp);
+ SDValue CombineConsecutiveLoads(SDNode *N, MVT VT);
+ SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, MVT);
+ SDValue BuildSDIV(SDNode *N);
+ SDValue BuildUDIV(SDNode *N);
+ SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
+ SDValue ReduceLoadWidth(SDNode *N);
+ SDValue ReduceLoadOpStoreWidth(SDNode *N);
+
+ SDValue GetDemandedBits(SDValue V, const APInt &Mask);
+
+ /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+ /// looking for aliasing nodes and adding them to the Aliases vector.
+ void GatherAllAliases(SDNode *N, SDValue OriginalChain,
+ SmallVector<SDValue, 8> &Aliases);
+
+ /// isAlias - Return true if there is any possibility that the two addresses
+ /// overlap.
+ bool isAlias(SDValue Ptr1, int64_t Size1,
+ const Value *SrcValue1, int SrcValueOffset1,
+ SDValue Ptr2, int64_t Size2,
+ const Value *SrcValue2, int SrcValueOffset2) const;
+
+ /// FindAliasInfo - Extracts the relevant alias information from the memory
+ /// node. Returns true if the operand was a load.
+ bool FindAliasInfo(SDNode *N,
+ SDValue &Ptr, int64_t &Size,
+ const Value *&SrcValue, int &SrcValueOffset) const;
+
+ /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
+ /// looking for a better chain (aliasing node.)
+ SDValue FindBetterChain(SDNode *N, SDValue Chain);
+
+ /// getShiftAmountTy - Returns a type large enough to hold any valid
+ /// shift amount - before type legalization these can be huge.
+ MVT getShiftAmountTy() {
+ return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy();
+ }
+
+public:
+ DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
+ : DAG(D),
+ TLI(D.getTargetLoweringInfo()),
+ Level(Unrestricted),
+ OptLevel(OL),
+ LegalOperations(false),
+ LegalTypes(false),
+ AA(A) {}
+
+ /// Run - runs the dag combiner on all nodes in the work list
+ void Run(CombineLevel AtLevel);
+ };
+}
+
+
+namespace {
+/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
+/// nodes from the worklist.
+class VISIBILITY_HIDDEN WorkListRemover :
+ public SelectionDAG::DAGUpdateListener {
+ DAGCombiner &DC;
+public:
+ explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {}
+
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ DC.removeFromWorkList(N);
+ }
+
+ virtual void NodeUpdated(SDNode *N) {
+ // Ignore updates.
+ }
+};
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering::DAGCombinerInfo implementation
+//===----------------------------------------------------------------------===//
+
+void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
+ ((DAGCombiner*)DC)->AddToWorkList(N);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
+}
+
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
+}
+
+void TargetLowering::DAGCombinerInfo::
+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
+ return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// isNegatibleForFree - Return 1 if we can compute the negated form of the
+/// specified expression for the same cost as the expression itself, or 2 if we
+/// can compute the negated form more cheaply than the expression itself.
+static char isNegatibleForFree(SDValue Op, bool LegalOperations,
+ unsigned Depth = 0) {
+ // No compile time optimizations on this type.
+ if (Op.getValueType() == MVT::ppcf128)
+ return 0;
+
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG) return 2;
+
+ // Don't allow anything with multiple uses.
+ if (!Op.hasOneUse()) return 0;
+
+ // Don't recurse exponentially.
+ if (Depth > 6) return 0;
+
+ switch (Op.getOpcode()) {
+ default: return false;
+ case ISD::ConstantFP:
+ // Don't invert constant FP values after legalize. The negated constant
+ // isn't necessarily legal.
+ return LegalOperations ? 0 : 1;
+ case ISD::FADD:
+ // FIXME: determine better conditions for this xform.
+ if (!UnsafeFPMath) return 0;
+
+ // fold (fsub (fadd A, B)) -> (fsub (fneg A), B)
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ return V;
+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+ case ISD::FSUB:
+ // We can't turn -(A-B) into B-A when we honor signed zeros.
+ if (!UnsafeFPMath) return 0;
+
+ // fold (fneg (fsub A, B)) -> (fsub B, A)
+ return 1;
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ if (HonorSignDependentRoundingFPMath()) return 0;
+
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ return V;
+
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FSIN:
+ return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1);
+ }
+}
+
+/// GetNegatedExpression - If isNegatibleForFree returns true, this function
+/// returns the newly negated expression.
+static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
+ bool LegalOperations, unsigned Depth = 0) {
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
+
+ // Don't allow anything with multiple uses.
+ assert(Op.hasOneUse() && "Unknown reuse!");
+
+ assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Unknown code");
+ case ISD::ConstantFP: {
+ APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
+ V.changeSign();
+ return DAG.getConstantFP(V, Op.getValueType());
+ }
+ case ISD::FADD:
+ // FIXME: determine better conditions for this xform.
+ assert(UnsafeFPMath);
+
+ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(1));
+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(1), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(0));
+ case ISD::FSUB:
+ // We can't turn -(A-B) into B-A when we honor signed zeros.
+ assert(UnsafeFPMath);
+
+ // fold (fneg (fsub 0, B)) -> B
+ if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
+ if (N0CFP->getValueAPF().isZero())
+ return Op.getOperand(1);
+
+ // fold (fneg (fsub A, B)) -> (fsub B, A)
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(0));
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ assert(!HonorSignDependentRoundingFPMath());
+
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(1));
+
+ // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+ Op.getOperand(0),
+ GetNegatedExpression(Op.getOperand(1), DAG,
+ LegalOperations, Depth+1));
+
+ case ISD::FP_EXTEND:
+ case ISD::FSIN:
+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1));
+ case ISD::FP_ROUND:
+ return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(1));
+ }
+}
+
+
+// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
+// that selects between the values 1 and 0, making it equivalent to a setcc.
+// Also, set the incoming LHS, RHS, and CC references to the appropriate
+// nodes based on the type of node we are checking. This simplifies life a
+// bit for the callers.
+static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
+ SDValue &CC) {
+ if (N.getOpcode() == ISD::SETCC) {
+ LHS = N.getOperand(0);
+ RHS = N.getOperand(1);
+ CC = N.getOperand(2);
+ return true;
+ }
+ if (N.getOpcode() == ISD::SELECT_CC &&
+ N.getOperand(2).getOpcode() == ISD::Constant &&
+ N.getOperand(3).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 &&
+ cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) {
+ LHS = N.getOperand(0);
+ RHS = N.getOperand(1);
+ CC = N.getOperand(4);
+ return true;
+ }
+ return false;
+}
+
+// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
+// one use. If this is true, it allows the users to invert the operation for
+// free when it is profitable to do so.
+static bool isOneUseSetCC(SDValue N) {
+ SDValue N0, N1, N2;
+ if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
+ return true;
+ return false;
+}
+
+SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL,
+ SDValue N0, SDValue N1) {
+ MVT VT = N0.getValueType();
+ if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
+ if (isa<ConstantSDNode>(N1)) {
+ // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
+ SDValue OpNode =
+ DAG.FoldConstantArithmetic(Opc, VT,
+ cast<ConstantSDNode>(N0.getOperand(1)),
+ cast<ConstantSDNode>(N1));
+ return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+ } else if (N0.hasOneUse()) {
+ // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use
+ SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+ AddToWorkList(OpNode.getNode());
+ return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
+ }
+ }
+
+ if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) {
+ if (isa<ConstantSDNode>(N0)) {
+ // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
+ SDValue OpNode =
+ DAG.FoldConstantArithmetic(Opc, VT,
+ cast<ConstantSDNode>(N1.getOperand(1)),
+ cast<ConstantSDNode>(N0));
+ return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
+ } else if (N1.hasOneUse()) {
+ // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use
+ SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
+ N1.getOperand(0), N0);
+ AddToWorkList(OpNode.getNode());
+ return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+ bool AddTo) {
+ assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
+ ++NodesCombined;
+ DOUT << "\nReplacing.1 "; DEBUG(N->dump(&DAG));
+ DOUT << "\nWith: "; DEBUG(To[0].getNode()->dump(&DAG));
+ DOUT << " and " << NumTo-1 << " other values\n";
+ DEBUG(for (unsigned i = 0, e = NumTo; i != e; ++i)
+ assert(N->getValueType(i) == To[i].getValueType() &&
+ "Cannot combine value to value of different type!"));
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesWith(N, To, &DeadNodes);
+
+ if (AddTo) {
+ // Push the new nodes and any users onto the worklist
+ for (unsigned i = 0, e = NumTo; i != e; ++i) {
+ if (To[i].getNode()) {
+ AddToWorkList(To[i].getNode());
+ AddUsersToWorkList(To[i].getNode());
+ }
+ }
+ }
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (N->use_empty()) {
+ // Nodes can be reintroduced into the worklist. Make sure we do not
+ // process a node that has been replaced.
+ removeFromWorkList(N);
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+ }
+ return SDValue(N, 0);
+}
+
+void
+DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &
+ TLO) {
+ // Replace all uses. If any nodes become isomorphic to other nodes and
+ // are deleted, make sure to remove them from our worklist.
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
+
+ // Push the new node and any (possibly new) users onto the worklist.
+ AddToWorkList(TLO.New.getNode());
+ AddUsersToWorkList(TLO.New.getNode());
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (TLO.Old.getNode()->use_empty()) {
+ removeFromWorkList(TLO.Old.getNode());
+
+ // If the operands of this node are only used by the node, they will now
+ // be dead. Make sure to visit them first to delete dead nodes early.
+ for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
+ if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
+ AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
+
+ DAG.DeleteNode(TLO.Old.getNode());
+ }
+}
+
+/// SimplifyDemandedBits - Check the specified integer node value to see if
+/// it can be simplified or if things it uses can be simplified by bit
+/// propagation. If so, return true.
+bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
+ TargetLowering::TargetLoweringOpt TLO(DAG);
+ APInt KnownZero, KnownOne;
+ if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
+ return false;
+
+ // Revisit the node.
+ AddToWorkList(Op.getNode());
+
+ // Replace the old value with the new one.
+ ++NodesCombined;
+ DOUT << "\nReplacing.2 "; DEBUG(TLO.Old.getNode()->dump(&DAG));
+ DOUT << "\nWith: "; DEBUG(TLO.New.getNode()->dump(&DAG));
+ DOUT << '\n';
+
+ CommitTargetLoweringOpt(TLO);
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Main DAG Combiner implementation
+//===----------------------------------------------------------------------===//
+
+void DAGCombiner::Run(CombineLevel AtLevel) {
+ // set the instance variables, so that the various visit routines may use it.
+ Level = AtLevel;
+ LegalOperations = Level >= NoIllegalOperations;
+ LegalTypes = Level >= NoIllegalTypes;
+
+ // Add all the dag nodes to the worklist.
+ WorkList.reserve(DAG.allnodes_size());
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I)
+ WorkList.push_back(I);
+
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted, and tracking any
+ // changes of the root.
+ HandleSDNode Dummy(DAG.getRoot());
+
+ // The root of the dag may dangle to deleted nodes until the dag combiner is
+ // done. Set it to null to avoid confusion.
+ DAG.setRoot(SDValue());
+
+ // while the worklist isn't empty, inspect the node on the end of it and
+ // try and combine it.
+ while (!WorkList.empty()) {
+ SDNode *N = WorkList.back();
+ WorkList.pop_back();
+
+ // If N has no uses, it is dead. Make sure to revisit all N's operands once
+ // N is deleted from the DAG, since they too may now be dead or may have a
+ // reduced number of uses, allowing other xforms.
+ if (N->use_empty() && N != &Dummy) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ AddToWorkList(N->getOperand(i).getNode());
+
+ DAG.DeleteNode(N);
+ continue;
+ }
+
+ SDValue RV = combine(N);
+
+ if (RV.getNode() == 0)
+ continue;
+
+ ++NodesCombined;
+
+ // If we get back the same node we passed in, rather than a new node or
+ // zero, we know that the node must have defined multiple values and
+ // CombineTo was used. Since CombineTo takes care of the worklist
+ // mechanics for us, we have no work to do in this case.
+ if (RV.getNode() == N)
+ continue;
+
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
+ "Node was deleted but visit returned new node!");
+
+ DOUT << "\nReplacing.3 "; DEBUG(N->dump(&DAG));
+ DOUT << "\nWith: "; DEBUG(RV.getNode()->dump(&DAG));
+ DOUT << '\n';
+ WorkListRemover DeadNodes(*this);
+ if (N->getNumValues() == RV.getNode()->getNumValues())
+ DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes);
+ else {
+ assert(N->getValueType(0) == RV.getValueType() &&
+ N->getNumValues() == 1 && "Type mismatch");
+ SDValue OpV = RV;
+ DAG.ReplaceAllUsesWith(N, &OpV, &DeadNodes);
+ }
+
+ // Push the new node and any users onto the worklist
+ AddToWorkList(RV.getNode());
+ AddUsersToWorkList(RV.getNode());
+
+ // Add any uses of the old node to the worklist in case this node is the
+ // last one that uses them. They may become dead after this node is
+ // deleted.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ AddToWorkList(N->getOperand(i).getNode());
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (N->use_empty()) {
+ // Nodes can be reintroduced into the worklist. Make sure we do not
+ // process a node that has been replaced.
+ removeFromWorkList(N);
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+ }
+ }
+
+ // If the root changed (e.g. it was a dead load, update the root).
+ DAG.setRoot(Dummy.getValue());
+}
+
+SDValue DAGCombiner::visit(SDNode *N) {
+ switch(N->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: return visitTokenFactor(N);
+ case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
+ case ISD::ADD: return visitADD(N);
+ case ISD::SUB: return visitSUB(N);
+ case ISD::ADDC: return visitADDC(N);
+ case ISD::ADDE: return visitADDE(N);
+ case ISD::MUL: return visitMUL(N);
+ case ISD::SDIV: return visitSDIV(N);
+ case ISD::UDIV: return visitUDIV(N);
+ case ISD::SREM: return visitSREM(N);
+ case ISD::UREM: return visitUREM(N);
+ case ISD::MULHU: return visitMULHU(N);
+ case ISD::MULHS: return visitMULHS(N);
+ case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
+ case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
+ case ISD::SDIVREM: return visitSDIVREM(N);
+ case ISD::UDIVREM: return visitUDIVREM(N);
+ case ISD::AND: return visitAND(N);
+ case ISD::OR: return visitOR(N);
+ case ISD::XOR: return visitXOR(N);
+ case ISD::SHL: return visitSHL(N);
+ case ISD::SRA: return visitSRA(N);
+ case ISD::SRL: return visitSRL(N);
+ case ISD::CTLZ: return visitCTLZ(N);
+ case ISD::CTTZ: return visitCTTZ(N);
+ case ISD::CTPOP: return visitCTPOP(N);
+ case ISD::SELECT: return visitSELECT(N);
+ case ISD::SELECT_CC: return visitSELECT_CC(N);
+ case ISD::SETCC: return visitSETCC(N);
+ case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
+ case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
+ case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
+ case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
+ case ISD::TRUNCATE: return visitTRUNCATE(N);
+ case ISD::BIT_CONVERT: return visitBIT_CONVERT(N);
+ case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
+ case ISD::FADD: return visitFADD(N);
+ case ISD::FSUB: return visitFSUB(N);
+ case ISD::FMUL: return visitFMUL(N);
+ case ISD::FDIV: return visitFDIV(N);
+ case ISD::FREM: return visitFREM(N);
+ case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
+ case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
+ case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
+ case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
+ case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
+ case ISD::FP_ROUND: return visitFP_ROUND(N);
+ case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
+ case ISD::FP_EXTEND: return visitFP_EXTEND(N);
+ case ISD::FNEG: return visitFNEG(N);
+ case ISD::FABS: return visitFABS(N);
+ case ISD::BRCOND: return visitBRCOND(N);
+ case ISD::BR_CC: return visitBR_CC(N);
+ case ISD::LOAD: return visitLOAD(N);
+ case ISD::STORE: return visitSTORE(N);
+ case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
+ case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
+ case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
+ case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
+ case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::combine(SDNode *N) {
+ SDValue RV = visit(N);
+
+ // If nothing happened, try a target-specific DAG combine.
+ if (RV.getNode() == 0) {
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ "Node was deleted but visit returned NULL!");
+
+ if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
+ TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
+
+ // Expose the DAG combiner to the target combiner impls.
+ TargetLowering::DAGCombinerInfo
+ DagCombineInfo(DAG, Level == Unrestricted, false, this);
+
+ RV = TLI.PerformDAGCombine(N, DagCombineInfo);
+ }
+ }
+
+ // If N is a commutative binary node, try commuting it to enable more
+ // sdisel CSE.
+ if (RV.getNode() == 0 &&
+ SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
+ N->getNumValues() == 1) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Constant operands are canonicalized to RHS.
+ if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
+ SDValue Ops[] = { N1, N0 };
+ SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),
+ Ops, 2);
+ if (CSENode)
+ return SDValue(CSENode, 0);
+ }
+ }
+
+ return RV;
+}
+
+/// getInputChainForNode - Given a node, return its input chain if it has one,
+/// otherwise return a null sd operand.
+static SDValue getInputChainForNode(SDNode *N) {
+ if (unsigned NumOps = N->getNumOperands()) {
+ if (N->getOperand(0).getValueType() == MVT::Other)
+ return N->getOperand(0);
+ else if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
+ return N->getOperand(NumOps-1);
+ for (unsigned i = 1; i < NumOps-1; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ return N->getOperand(i);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
+ // If N has two operands, where one has an input chain equal to the other,
+ // the 'other' chain is redundant.
+ if (N->getNumOperands() == 2) {
+ if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
+ return N->getOperand(0);
+ if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
+ return N->getOperand(1);
+ }
+
+ SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
+ SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
+ SmallPtrSet<SDNode*, 16> SeenOps;
+ bool Changed = false; // If we should replace this token factor.
+
+ // Start out with this token factor.
+ TFs.push_back(N);
+
+ // Iterate through token factors. The TFs grows when new token factors are
+ // encountered.
+ for (unsigned i = 0; i < TFs.size(); ++i) {
+ SDNode *TF = TFs[i];
+
+ // Check each of the operands.
+ for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
+ SDValue Op = TF->getOperand(i);
+
+ switch (Op.getOpcode()) {
+ case ISD::EntryToken:
+ // Entry tokens don't need to be added to the list. They are
+ // rededundant.
+ Changed = true;
+ break;
+
+ case ISD::TokenFactor:
+ if ((CombinerAA || Op.hasOneUse()) &&
+ std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
+ // Queue up for processing.
+ TFs.push_back(Op.getNode());
+ // Clean up in case the token factor is removed.
+ AddToWorkList(Op.getNode());
+ Changed = true;
+ break;
+ }
+ // Fall thru
+
+ default:
+ // Only add if it isn't already in the list.
+ if (SeenOps.insert(Op.getNode()))
+ Ops.push_back(Op);
+ else
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ SDValue Result;
+
+ // If we've change things around then replace token factor.
+ if (Changed) {
+ if (Ops.empty()) {
+ // The entry token is the only possible outcome.
+ Result = DAG.getEntryNode();
+ } else {
+ // New and improved token factor.
+ Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+ MVT::Other, &Ops[0], Ops.size());
+ }
+
+ // Don't add users to work list.
+ return CombineTo(N, Result, false);
+ }
+
+ return Result;
+}
+
+/// MERGE_VALUES can always be eliminated.
+SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
+ WorkListRemover DeadNodes(*this);
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i),
+ &DeadNodes);
+ removeFromWorkList(N);
+ DAG.DeleteNode(N);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
+static
+SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,
+ SelectionDAG &DAG) {
+ MVT VT = N0.getValueType();
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
+
+ if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N00.getOperand(1))) {
+ // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
+ N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT,
+ DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT,
+ N00.getOperand(0), N01),
+ DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT,
+ N00.getOperand(1), N01));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (add x, undef) -> undef
+ if (N0.getOpcode() == ISD::UNDEF)
+ return N0;
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+ // fold (add c1, c2) -> c1+c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0);
+ // fold (add x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (add Sym, c) -> Sym+c
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
+ GA->getOpcode() == ISD::GlobalAddress)
+ return DAG.getGlobalAddress(GA->getGlobal(), VT,
+ GA->getOffset() +
+ (uint64_t)N1C->getSExtValue());
+ // fold ((c1-A)+c2) -> (c1+c2)-A
+ if (N1C && N0.getOpcode() == ISD::SUB)
+ if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(N1C->getAPIntValue()+
+ N0C->getAPIntValue(), VT),
+ N0.getOperand(1));
+ // reassociate add
+ SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1);
+ if (RADD.getNode() != 0)
+ return RADD;
+ // fold ((0-A) + B) -> B-A
+ if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
+ cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1));
+ // fold (A + (0-B)) -> A-B
+ if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
+ cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1));
+ // fold (A+(B-A)) -> B
+ if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
+ return N1.getOperand(0);
+ // fold ((B-A)+A) -> B
+ if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
+ return N0.getOperand(0);
+ // fold (A+(B-(A+C))) to (B-C)
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+ N0 == N1.getOperand(1).getOperand(0))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
+ N1.getOperand(1).getOperand(1));
+ // fold (A+(B-(C+A))) to (B-C)
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+ N0 == N1.getOperand(1).getOperand(1))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
+ N1.getOperand(1).getOperand(0));
+ // fold (A+((B-A)+or-C)) to (B+or-C)
+ if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
+ N1.getOperand(0).getOpcode() == ISD::SUB &&
+ N0 == N1.getOperand(0).getOperand(1))
+ return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT,
+ N1.getOperand(0).getOperand(0), N1.getOperand(1));
+
+ // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
+ if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ SDValue N10 = N1.getOperand(0);
+ SDValue N11 = N1.getOperand(1);
+
+ if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10),
+ DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11));
+ }
+
+ if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (a+b) -> (a|b) iff a and b share no bits.
+ if (VT.isInteger() && !VT.isVector()) {
+ APInt LHSZero, LHSOne;
+ APInt RHSZero, RHSOne;
+ APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+ DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+
+ if (LHSZero.getBoolValue()) {
+ DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+
+ // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+ // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+ if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
+ (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1);
+ }
+ }
+
+ // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
+ if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) {
+ SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG);
+ if (Result.getNode()) return Result;
+ }
+ if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) {
+ SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG);
+ if (Result.getNode()) return Result;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N0.getValueType();
+
+ // If the flag result is dead, turn this into an ADD.
+ if (N->hasNUsesOfValue(0, 1))
+ return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),
+ DAG.getNode(ISD::CARRY_FALSE,
+ N->getDebugLoc(), MVT::Flag));
+
+ // canonicalize constant to RHS.
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
+
+ // fold (addc x, 0) -> x + no carry out
+ if (N1C && N1C->isNullValue())
+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
+ N->getDebugLoc(), MVT::Flag));
+
+ // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
+ APInt LHSZero, LHSOne;
+ APInt RHSZero, RHSOne;
+ APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+ DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+
+ if (LHSZero.getBoolValue()) {
+ DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+
+ // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+ // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+ if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
+ (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+ return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE,
+ N->getDebugLoc(), MVT::Flag));
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(),
+ N1, N0, CarryIn);
+
+ // fold (adde x, y, false) -> (addc x, y)
+ if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+ return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUB(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ MVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (sub x, x) -> 0
+ if (N0 == N1)
+ return DAG.getConstant(0, N->getValueType(0));
+ // fold (sub c1, c2) -> c1-c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
+ // fold (sub x, c) -> (add x, -c)
+ if (N1C)
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(-N1C->getAPIntValue(), VT));
+ // fold (A+B)-A -> B
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
+ return N0.getOperand(1);
+ // fold (A+B)-B -> A
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
+ return N0.getOperand(0);
+ // fold ((A+(B+or-C))-B) -> A+or-C
+ if (N0.getOpcode() == ISD::ADD &&
+ (N0.getOperand(1).getOpcode() == ISD::SUB ||
+ N0.getOperand(1).getOpcode() == ISD::ADD) &&
+ N0.getOperand(1).getOperand(0) == N1)
+ return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1).getOperand(1));
+ // fold ((A+(C+B))-B) -> A+C
+ if (N0.getOpcode() == ISD::ADD &&
+ N0.getOperand(1).getOpcode() == ISD::ADD &&
+ N0.getOperand(1).getOperand(1) == N1)
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1).getOperand(0));
+ // fold ((A-(B-C))-C) -> A-B
+ if (N0.getOpcode() == ISD::SUB &&
+ N0.getOperand(1).getOpcode() == ISD::SUB &&
+ N0.getOperand(1).getOperand(1) == N1)
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1).getOperand(0));
+
+ // If either operand of a sub is undef, the result is undef
+ if (N0.getOpcode() == ISD::UNDEF)
+ return N0;
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ // If the relocation model supports it, consider symbol offsets.
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
+ // fold (sub Sym, c) -> Sym-c
+ if (N1C && GA->getOpcode() == ISD::GlobalAddress)
+ return DAG.getGlobalAddress(GA->getGlobal(), VT,
+ GA->getOffset() -
+ (uint64_t)N1C->getSExtValue());
+ // fold (sub Sym+c1, Sym+c2) -> c1-c2
+ if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
+ if (GA->getGlobal() == GB->getGlobal())
+ return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
+ VT);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMUL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (mul x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // fold (mul c1, c2) -> c1*c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0);
+ // fold (mul x, 0) -> 0
+ if (N1C && N1C->isNullValue())
+ return N1;
+ // fold (mul x, -1) -> 0-x
+ if (N1C && N1C->isAllOnesValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT), N0);
+ // fold (mul x, (1 << c)) -> x << c
+ if (N1C && N1C->getAPIntValue().isPowerOf2())
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(N1C->getAPIntValue().logBase2(),
+ getShiftAmountTy()));
+ // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
+ if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) {
+ unsigned Log2Val = (-N1C->getAPIntValue()).logBase2();
+ // FIXME: If the input is something that is easily negated (e.g. a
+ // single-use add), we should put the negate there.
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT),
+ DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(Log2Val, getShiftAmountTy())));
+ }
+ // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
+ if (N1C && N0.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+ N1, N0.getOperand(1));
+ AddToWorkList(C3.getNode());
+ return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ N0.getOperand(0), C3);
+ }
+
+ // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
+ // use.
+ {
+ SDValue Sh(0,0), Y(0,0);
+ // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
+ if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.getNode()->hasOneUse()) {
+ Sh = N0; Y = N1;
+ } else if (N1.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N1.getOperand(1)) &&
+ N1.getNode()->hasOneUse()) {
+ Sh = N1; Y = N0;
+ }
+
+ if (Sh.getNode()) {
+ SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ Sh.getOperand(0), Y);
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+ Mul, Sh.getOperand(1));
+ }
+ }
+
+ // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
+ if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1)))
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1),
+ DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT,
+ N0.getOperand(1), N1));
+
+ // reassociate mul
+ SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1);
+ if (RMUL.getNode() != 0)
+ return RMUL;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ MVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (sdiv c1, c2) -> c1/c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
+ // fold (sdiv X, 1) -> X
+ if (N1C && N1C->getSExtValue() == 1LL)
+ return N0;
+ // fold (sdiv X, -1) -> 0-X
+ if (N1C && N1C->isAllOnesValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT), N0);
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
+ if (!VT.isVector()) {
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(),
+ N0, N1);
+ }
+ // fold (sdiv X, pow2) -> simple ops after legalize
+ if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() &&
+ (isPowerOf2_64(N1C->getSExtValue()) ||
+ isPowerOf2_64(-N1C->getSExtValue()))) {
+ // If dividing by powers of two is cheap, then don't perform the following
+ // fold.
+ if (TLI.isPow2DivCheap())
+ return SDValue();
+
+ int64_t pow2 = N1C->getSExtValue();
+ int64_t abs2 = pow2 > 0 ? pow2 : -pow2;
+ unsigned lg2 = Log2_64(abs2);
+
+ // Splat the sign bit into the register
+ SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(VT.getSizeInBits()-1,
+ getShiftAmountTy()));
+ AddToWorkList(SGN.getNode());
+
+ // Add (N0 < 0) ? abs2 - 1 : 0;
+ SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN,
+ DAG.getConstant(VT.getSizeInBits() - lg2,
+ getShiftAmountTy()));
+ SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL);
+ AddToWorkList(SRL.getNode());
+ AddToWorkList(ADD.getNode()); // Divide by pow2
+ SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD,
+ DAG.getConstant(lg2, getShiftAmountTy()));
+
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (pow2 > 0)
+ return SRA;
+
+ AddToWorkList(SRA.getNode());
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT), SRA);
+ }
+
+ // if integer divide is expensive and we satisfy the requirements, emit an
+ // alternate sequence.
+ if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) &&
+ !TLI.isIntDivCheap()) {
+ SDValue Op = BuildSDIV(N);
+ if (Op.getNode()) return Op;
+ }
+
+ // undef / X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X / undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ MVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (udiv c1, c2) -> c1/c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
+ // fold (udiv x, (1 << c)) -> x >>u c
+ if (N1C && N1C->getAPIntValue().isPowerOf2())
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(N1C->getAPIntValue().logBase2(),
+ getShiftAmountTy()));
+ // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
+ if (N1.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+ if (SHC->getAPIntValue().isPowerOf2()) {
+ MVT ADDVT = N1.getOperand(1).getValueType();
+ SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT,
+ N1.getOperand(1),
+ DAG.getConstant(SHC->getAPIntValue()
+ .logBase2(),
+ ADDVT));
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add);
+ }
+ }
+ }
+ // fold (udiv x, c) -> alternate
+ if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
+ SDValue Op = BuildUDIV(N);
+ if (Op.getNode()) return Op;
+ }
+
+ // undef / X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X / undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSREM(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N->getValueType(0);
+
+ // fold (srem c1, c2) -> c1%c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
+ if (!VT.isVector()) {
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1);
+ }
+
+ // If X/C can be simplified by the division-by-constant logic, lower
+ // X%C to the equivalent of X-X/C*C.
+ if (N1C && !N1C->isNullValue()) {
+ SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1);
+ AddToWorkList(Div.getNode());
+ SDValue OptimizedDiv = combine(Div.getNode());
+ if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
+ SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ OptimizedDiv, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
+ AddToWorkList(Mul.getNode());
+ return Sub;
+ }
+ }
+
+ // undef % X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X % undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUREM(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N->getValueType(0);
+
+ // fold (urem c1, c2) -> c1%c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
+ // fold (urem x, pow2) -> (and x, pow2-1)
+ if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(N1C->getAPIntValue()-1,VT));
+ // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+ if (N1.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+ if (SHC->getAPIntValue().isPowerOf2()) {
+ SDValue Add =
+ DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1,
+ DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
+ VT));
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add);
+ }
+ }
+ }
+
+ // If X/C can be simplified by the division-by-constant logic, lower
+ // X%C to the equivalent of X-X/C*C.
+ if (N1C && !N1C->isNullValue()) {
+ SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1);
+ AddToWorkList(Div.getNode());
+ SDValue OptimizedDiv = combine(Div.getNode());
+ if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
+ SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ OptimizedDiv, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
+ AddToWorkList(Mul.getNode());
+ return Sub;
+ }
+ }
+
+ // undef % X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X % undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N->getValueType(0);
+
+ // fold (mulhs x, 0) -> 0
+ if (N1C && N1C->isNullValue())
+ return N1;
+ // fold (mulhs x, 1) -> (sra x, size(x)-1)
+ if (N1C && N1C->getAPIntValue() == 1)
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0,
+ DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
+ getShiftAmountTy()));
+ // fold (mulhs x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHU(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N->getValueType(0);
+
+ // fold (mulhu x, 0) -> 0
+ if (N1C && N1C->isNullValue())
+ return N1;
+ // fold (mulhu x, 1) -> 0
+ if (N1C && N1C->getAPIntValue() == 1)
+ return DAG.getConstant(0, N0.getValueType());
+ // fold (mulhu x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+
+ return SDValue();
+}
+
+/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that
+/// compute two values. LoOp and HiOp give the opcodes for the two computations
+/// that are being performed. Return true if a simplification was made.
+///
+SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+ unsigned HiOp) {
+ // If the high half is not needed, just compute the low half.
+ bool HiExists = N->hasAnyUseOfValue(1);
+ if (!HiExists &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(LoOp, N->getValueType(0)))) {
+ SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
+ N->op_begin(), N->getNumOperands());
+ return CombineTo(N, Res, Res);
+ }
+
+ // If the low half is not needed, just compute the high half.
+ bool LoExists = N->hasAnyUseOfValue(0);
+ if (!LoExists &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
+ SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
+ N->op_begin(), N->getNumOperands());
+ return CombineTo(N, Res, Res);
+ }
+
+ // If both halves are used, return as it is.
+ if (LoExists && HiExists)
+ return SDValue();
+
+ // If the two computed results can be simplified separately, separate them.
+ if (LoExists) {
+ SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
+ N->op_begin(), N->getNumOperands());
+ AddToWorkList(Lo.getNode());
+ SDValue LoOpt = combine(Lo.getNode());
+ if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
+ return CombineTo(N, LoOpt, LoOpt);
+ }
+
+ if (HiExists) {
+ SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
+ N->op_begin(), N->getNumOperands());
+ AddToWorkList(Hi.getNode());
+ SDValue HiOpt = combine(Hi.getNode());
+ if (HiOpt.getNode() && HiOpt != Hi &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
+ return CombineTo(N, HiOpt, HiOpt);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
+ if (Res.getNode()) return Res;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
+ if (Res.getNode()) return Res;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
+ if (Res.getNode()) return Res;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
+ if (Res.getNode()) return Res;
+
+ return SDValue();
+}
+
+/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with
+/// two operands of the same opcode, try to simplify it.
+SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ MVT VT = N0.getValueType();
+ assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
+
+ // For each of OP in AND/OR/XOR:
+ // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
+ // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
+ // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
+ // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
+ if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND||
+ N0.getOpcode() == ISD::SIGN_EXTEND ||
+ (N0.getOpcode() == ISD::TRUNCATE &&
+ !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) &&
+ N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) {
+ SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
+ N0.getOperand(0).getValueType(),
+ N0.getOperand(0), N1.getOperand(0));
+ AddToWorkList(ORNode.getNode());
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode);
+ }
+
+ // For each of OP in SHL/SRL/SRA/AND...
+ // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
+ // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
+ // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
+ if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
+ N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
+ N0.getOperand(1) == N1.getOperand(1)) {
+ SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
+ N0.getOperand(0).getValueType(),
+ N0.getOperand(0), N1.getOperand(0));
+ AddToWorkList(ORNode.getNode());
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+ ORNode, N0.getOperand(1));
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitAND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue LL, LR, RL, RR, CC0, CC1;
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N1.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (and x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // fold (and c1, c2) -> c1&c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0);
+ // fold (and x, -1) -> x
+ if (N1C && N1C->isAllOnesValue())
+ return N0;
+ // if (and x, c) is known to be zero, return 0
+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(BitWidth)))
+ return DAG.getConstant(0, VT);
+ // reassociate and
+ SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1);
+ if (RAND.getNode() != 0)
+ return RAND;
+ // fold (and (or x, 0xFFFF), 0xFF) -> 0xFF
+ if (N1C && N0.getOpcode() == ISD::OR)
+ if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+ if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
+ return N1;
+ // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
+ if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+ SDValue N0Op0 = N0.getOperand(0);
+ APInt Mask = ~N1C->getAPIntValue();
+ Mask.trunc(N0Op0.getValueSizeInBits());
+ if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
+ SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(),
+ N0.getValueType(), N0Op0);
+
+ // Replace uses of the AND with uses of the Zero extend node.
+ CombineTo(N, Zext);
+
+ // We actually want to replace all uses of the any_extend with the
+ // zero_extend, to avoid duplicating things. This will later cause this
+ // AND to be folded.
+ CombineTo(N0.getNode(), Zext);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+ if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+ LL.getValueType().isInteger()) {
+ // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
+ if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
+ SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ORNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+ }
+ // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ANDNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);
+ }
+ // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
+ SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ORNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+ }
+ }
+ // canonicalize equivalent to ll == rl
+ if (LL == RR && LR == RL) {
+ Op1 = ISD::getSetCCSwappedOperands(Op1);
+ std::swap(RL, RR);
+ }
+ if (LL == RL && LR == RR) {
+ bool isInteger = LL.getValueType().isInteger();
+ ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
+ if (Result != ISD::SETCC_INVALID &&
+ (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))
+ return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
+ LL, LR, Result);
+ }
+ }
+
+ // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.getNode()) return Tmp;
+ }
+
+ // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
+ // fold (and (sra)) -> (and (srl)) when possible.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+ // fold (zext_inreg (extload x)) -> (zextload x)
+ if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ MVT EVT = LN0->getMemoryVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ unsigned BitWidth = N1.getValueSizeInBits();
+ if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+ BitWidth - EVT.getSizeInBits())) &&
+ ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(), LN0->getAlignment());
+ AddToWorkList(N);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
+ if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ MVT EVT = LN0->getMemoryVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ unsigned BitWidth = N1.getValueSizeInBits();
+ if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+ BitWidth - EVT.getSizeInBits())) &&
+ ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(), LN0->getAlignment());
+ AddToWorkList(N);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (and (load x), 255) -> (zextload x, i8)
+ // fold (and (extload x, i16), 255) -> (zextload x, i8)
+ if (N1C && N0.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ if (LN0->getExtensionType() != ISD::SEXTLOAD &&
+ LN0->isUnindexed() && N0.hasOneUse() &&
+ // Do not change the width of a volatile load.
+ !LN0->isVolatile()) {
+ MVT EVT = MVT::Other;
+ uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
+ if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue()))
+ EVT = MVT::getIntegerVT(ActiveBits);
+
+ MVT LoadedVT = LN0->getMemoryVT();
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (EVT != MVT::Other && LoadedVT.bitsGT(EVT) && EVT.isRound() &&
+ (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) {
+ MVT PtrType = N0.getOperand(1).getValueType();
+
+ // For big endian targets, we need to add an offset to the pointer to
+ // load the correct bytes. For little endian systems, we merely need to
+ // read fewer bytes from the same pointer.
+ unsigned LVTStoreBytes = LoadedVT.getStoreSizeInBits()/8;
+ unsigned EVTStoreBytes = EVT.getStoreSizeInBits()/8;
+ unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
+ unsigned Alignment = LN0->getAlignment();
+ SDValue NewPtr = LN0->getBasePtr();
+
+ if (TLI.isBigEndian()) {
+ NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
+ NewPtr, DAG.getConstant(PtrOff, PtrType));
+ Alignment = MinAlign(Alignment, PtrOff);
+ }
+
+ AddToWorkList(NewPtr.getNode());
+ SDValue Load =
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, LN0->getChain(),
+ NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ EVT, LN0->isVolatile(), Alignment);
+ AddToWorkList(N);
+ CombineTo(N0.getNode(), Load, Load.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue LL, LR, RL, RR, CC0, CC1;
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N1.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (or x, undef) -> -1
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(~0ULL, VT);
+ // fold (or c1, c2) -> c1|c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0);
+ // fold (or x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (or x, -1) -> -1
+ if (N1C && N1C->isAllOnesValue())
+ return N1;
+ // fold (or x, c) -> c iff (x & ~c) == 0
+ if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
+ return N1;
+ // reassociate or
+ SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1);
+ if (ROR.getNode() != 0)
+ return ROR;
+ // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+ if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1),
+ DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
+ }
+ // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+ if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+ LL.getValueType().isInteger()) {
+ // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
+ // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
+ if (cast<ConstantSDNode>(LR)->isNullValue() &&
+ (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
+ SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ORNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+ }
+ // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
+ // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
+ (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ANDNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);
+ }
+ }
+ // canonicalize equivalent to ll == rl
+ if (LL == RR && LR == RL) {
+ Op1 = ISD::getSetCCSwappedOperands(Op1);
+ std::swap(RL, RR);
+ }
+ if (LL == RL && LR == RR) {
+ bool isInteger = LL.getValueType().isInteger();
+ ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
+ if (Result != ISD::SETCC_INVALID &&
+ (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))
+ return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
+ LL, LR, Result);
+ }
+ }
+
+ // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.getNode()) return Tmp;
+ }
+
+ // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
+ if (N0.getOpcode() == ISD::AND &&
+ N1.getOpcode() == ISD::AND &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ N1.getOperand(1).getOpcode() == ISD::Constant &&
+ // Don't increase # computations.
+ (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+ // We can only do this xform if we know that bits from X that are set in C2
+ // but not in C1 are already zero. Likewise for Y.
+ const APInt &LHSMask =
+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ const APInt &RHSMask =
+ cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
+
+ if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
+ DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
+ SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1.getOperand(0));
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X,
+ DAG.getConstant(LHSMask | RHSMask, VT));
+ }
+ }
+
+ // See if this is some rotate idiom.
+ if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc()))
+ return SDValue(Rot, 0);
+
+ return SDValue();
+}
+
+/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
+static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
+ if (Op.getOpcode() == ISD::AND) {
+ if (isa<ConstantSDNode>(Op.getOperand(1))) {
+ Mask = Op.getOperand(1);
+ Op = Op.getOperand(0);
+ } else {
+ return false;
+ }
+ }
+
+ if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
+ Shift = Op;
+ return true;
+ }
+
+ return false;
+}
+
+// MatchRotate - Handle an 'or' of two operands. If this is one of the many
+// idioms for rotate, and if the target supports rotation instructions, generate
+// a rot[lr].
+SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
+ // Must be a legal type. Expanded 'n promoted things won't work with rotates.
+ MVT VT = LHS.getValueType();
+ if (!TLI.isTypeLegal(VT)) return 0;
+
+ // The target must have at least one rotate flavor.
+ bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
+ bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
+ if (!HasROTL && !HasROTR) return 0;
+
+ // Match "(X shl/srl V1) & V2" where V2 may not be present.
+ SDValue LHSShift; // The shift.
+ SDValue LHSMask; // AND value if any.
+ if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
+ return 0; // Not part of a rotate.
+
+ SDValue RHSShift; // The shift.
+ SDValue RHSMask; // AND value if any.
+ if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
+ return 0; // Not part of a rotate.
+
+ if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
+ return 0; // Not shifting the same value.
+
+ if (LHSShift.getOpcode() == RHSShift.getOpcode())
+ return 0; // Shifts must disagree.
+
+ // Canonicalize shl to left side in a shl/srl pair.
+ if (RHSShift.getOpcode() == ISD::SHL) {
+ std::swap(LHS, RHS);
+ std::swap(LHSShift, RHSShift);
+ std::swap(LHSMask , RHSMask );
+ }
+
+ unsigned OpSizeInBits = VT.getSizeInBits();
+ SDValue LHSShiftArg = LHSShift.getOperand(0);
+ SDValue LHSShiftAmt = LHSShift.getOperand(1);
+ SDValue RHSShiftAmt = RHSShift.getOperand(1);
+
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
+ if (LHSShiftAmt.getOpcode() == ISD::Constant &&
+ RHSShiftAmt.getOpcode() == ISD::Constant) {
+ uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
+ uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
+ if ((LShVal + RShVal) != OpSizeInBits)
+ return 0;
+
+ SDValue Rot;
+ if (HasROTL)
+ Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt);
+ else
+ Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt);
+
+ // If there is an AND of either shifted operand, apply it to the result.
+ if (LHSMask.getNode() || RHSMask.getNode()) {
+ APInt Mask = APInt::getAllOnesValue(OpSizeInBits);
+
+ if (LHSMask.getNode()) {
+ APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);
+ Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;
+ }
+ if (RHSMask.getNode()) {
+ APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);
+ Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
+ }
+
+ Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT));
+ }
+
+ return Rot.getNode();
+ }
+
+ // If there is a mask here, and we have a variable shift, we can't be sure
+ // that we're masking out the right stuff.
+ if (LHSMask.getNode() || RHSMask.getNode())
+ return 0;
+
+ // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y)
+ // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y))
+ if (RHSShiftAmt.getOpcode() == ISD::SUB &&
+ LHSShiftAmt == RHSShiftAmt.getOperand(1)) {
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ if (HasROTL)
+ return DAG.getNode(ISD::ROTL, DL, VT,
+ LHSShiftArg, LHSShiftAmt).getNode();
+ else
+ return DAG.getNode(ISD::ROTR, DL, VT,
+ LHSShiftArg, RHSShiftAmt).getNode();
+ }
+ }
+ }
+
+ // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y)
+ // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y))
+ if (LHSShiftAmt.getOpcode() == ISD::SUB &&
+ RHSShiftAmt == LHSShiftAmt.getOperand(1)) {
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ if (HasROTR)
+ return DAG.getNode(ISD::ROTR, DL, VT,
+ LHSShiftArg, RHSShiftAmt).getNode();
+ else
+ return DAG.getNode(ISD::ROTL, DL, VT,
+ LHSShiftArg, LHSShiftAmt).getNode();
+ }
+ }
+ }
+
+ // Look for sign/zext/any-extended or truncate cases:
+ if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
+ || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
+ || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
+ || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
+ (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
+ || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
+ || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
+ || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
+ SDValue LExtOp0 = LHSShiftAmt.getOperand(0);
+ SDValue RExtOp0 = RHSShiftAmt.getOperand(0);
+ if (RExtOp0.getOpcode() == ISD::SUB &&
+ RExtOp0.getOperand(1) == LExtOp0) {
+ // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+ // (rotl x, y)
+ // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+ // (rotr x, (sub 32, y))
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
+ LHSShiftArg,
+ HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
+ }
+ }
+ } else if (LExtOp0.getOpcode() == ISD::SUB &&
+ RExtOp0 == LExtOp0.getOperand(1)) {
+ // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
+ // (rotr x, y)
+ // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
+ // (rotl x, (sub 32, y))
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT,
+ LHSShiftArg,
+ HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+SDValue DAGCombiner::visitXOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue LHS, RHS, CC;
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
+ if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // fold (xor x, undef) -> undef
+ if (N0.getOpcode() == ISD::UNDEF)
+ return N0;
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+ // fold (xor c1, c2) -> c1^c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
+ // fold (xor x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // reassociate xor
+ SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1);
+ if (RXOR.getNode() != 0)
+ return RXOR;
+
+ // fold !(x cc y) -> (x !cc y)
+ if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) {
+ bool isInt = LHS.getValueType().isInteger();
+ ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+ isInt);
+
+ if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) {
+ switch (N0.getOpcode()) {
+ default:
+ assert(0 && "Unhandled SetCC Equivalent!");
+ abort();
+ case ISD::SETCC:
+ return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC);
+ case ISD::SELECT_CC:
+ return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2),
+ N0.getOperand(3), NotCC);
+ }
+ }
+ }
+
+ // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
+ if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getNode()->hasOneUse() &&
+ isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
+ SDValue V = N0.getOperand(0);
+ V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V,
+ DAG.getConstant(1, V.getValueType()));
+ AddToWorkList(V.getNode());
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V);
+ }
+
+ // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
+ if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 &&
+ (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+ SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+ if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
+ unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+ LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS
+ RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS
+ AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+ return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);
+ }
+ }
+ // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
+ if (N1C && N1C->isAllOnesValue() &&
+ (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+ SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+ if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
+ unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+ LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS
+ RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS
+ AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+ return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);
+ }
+ }
+ // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
+ if (N1C && N0.getOpcode() == ISD::XOR) {
+ ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (N00C)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1),
+ DAG.getConstant(N1C->getAPIntValue() ^
+ N00C->getAPIntValue(), VT));
+ if (N01C)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(N1C->getAPIntValue() ^
+ N01C->getAPIntValue(), VT));
+ }
+ // fold (xor x, x) -> 0
+ if (N0 == N1) {
+ if (!VT.isVector()) {
+ return DAG.getConstant(0, VT);
+ } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)){
+ // Produce a vector of zeros.
+ SDValue El = DAG.getConstant(0, VT.getVectorElementType());
+ std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+ }
+ }
+
+ // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.getNode()) return Tmp;
+ }
+
+ // Simplify the expression using non-local knowledge.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+/// visitShiftByConstant - Handle transforms common to the three shifts, when
+/// the shift amount is a constant.
+SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
+ SDNode *LHS = N->getOperand(0).getNode();
+ if (!LHS->hasOneUse()) return SDValue();
+
+ // We want to pull some binops through shifts, so that we have (and (shift))
+ // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
+ // thing happens with address calculations, so it's important to canonicalize
+ // it.
+ bool HighBitSet = false; // Can we transform this if the high bit is set?
+
+ switch (LHS->getOpcode()) {
+ default: return SDValue();
+ case ISD::OR:
+ case ISD::XOR:
+ HighBitSet = false; // We can only transform sra if the high bit is clear.
+ break;
+ case ISD::AND:
+ HighBitSet = true; // We can only transform sra if the high bit is set.
+ break;
+ case ISD::ADD:
+ if (N->getOpcode() != ISD::SHL)
+ return SDValue(); // only shl(add) not sr[al](add).
+ HighBitSet = false; // We can only transform sra if the high bit is clear.
+ break;
+ }
+
+ // We require the RHS of the binop to be a constant as well.
+ ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
+ if (!BinOpCst) return SDValue();
+
+ // FIXME: disable this unless the input to the binop is a shift by a constant.
+ // If it is not a shift, it pessimizes some common cases like:
+ //
+ // void foo(int *X, int i) { X[i & 1235] = 1; }
+ // int bar(int *X, int i) { return X[i & 255]; }
+ SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
+ if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
+ BinOpLHSVal->getOpcode() != ISD::SRA &&
+ BinOpLHSVal->getOpcode() != ISD::SRL) ||
+ !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
+ return SDValue();
+
+ MVT VT = N->getValueType(0);
+
+ // If this is a signed shift right, and the high bit is modified by the
+ // logical operation, do not perform the transformation. The highBitSet
+ // boolean indicates the value of the high bit of the constant which would
+ // cause it to be modified for this operation.
+ if (N->getOpcode() == ISD::SRA) {
+ bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
+ if (BinOpRHSSignSet != HighBitSet)
+ return SDValue();
+ }
+
+ // Fold the constants, shifting the binop RHS by the shift amount.
+ SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(),
+ N->getValueType(0),
+ LHS->getOperand(1), N->getOperand(1));
+
+ // Create the new shift.
+ SDValue NewShift = DAG.getNode(N->getOpcode(), LHS->getOperand(0).getDebugLoc(),
+ VT, LHS->getOperand(0), N->getOperand(1));
+
+ // Create the new binop.
+ return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS);
+}
+
+SDValue DAGCombiner::visitSHL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getSizeInBits();
+
+ // fold (shl c1, c2) -> c1<<c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
+ // fold (shl 0, x) -> 0
+ if (N0C && N0C->isNullValue())
+ return N0;
+ // fold (shl x, c >= size(x)) -> undef
+ if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+ return DAG.getUNDEF(VT);
+ // fold (shl x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // if (shl x, c) is known to be zero, return 0
+ if (DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(VT.getSizeInBits())))
+ return DAG.getConstant(0, VT);
+ // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND &&
+ N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+ SDValue N101 = N1.getOperand(0).getOperand(1);
+ if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+ MVT TruncVT = N1.getValueType();
+ SDValue N100 = N1.getOperand(0).getOperand(0);
+ APInt TruncC = N101C->getAPIntValue();
+ TruncC.trunc(TruncVT.getSizeInBits());
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+ DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT,
+ DAG.getNode(ISD::TRUNCATE,
+ N->getDebugLoc(),
+ TruncVT, N100),
+ DAG.getConstant(TruncC, TruncVT)));
+ }
+ }
+
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SHL &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 + c2 > OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
+ // fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or
+ // (srl (and x, (shl -1, c1)), (sub c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SRL &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(~0ULL << c1, VT));
+ if (c2 > c1)
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask,
+ DAG.getConstant(c2-c1, N1.getValueType()));
+ else
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask,
+ DAG.getConstant(c1-c2, N1.getValueType()));
+ }
+ // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
+ if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1))
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(~0ULL << N1C->getZExtValue(), VT));
+
+ return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
+}
+
+SDValue DAGCombiner::visitSRA(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N0.getValueType();
+
+ // fold (sra c1, c2) -> (sra c1, c2)
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
+ // fold (sra 0, x) -> 0
+ if (N0C && N0C->isNullValue())
+ return N0;
+ // fold (sra -1, x) -> -1
+ if (N0C && N0C->isAllOnesValue())
+ return N0;
+ // fold (sra x, (setge c, size(x))) -> undef
+ if (N1C && N1C->getZExtValue() >= VT.getSizeInBits())
+ return DAG.getUNDEF(VT);
+ // fold (sra x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
+ // sext_inreg.
+ if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
+ unsigned LowBits = VT.getSizeInBits() - (unsigned)N1C->getZExtValue();
+ MVT EVT = MVT::getIntegerVT(LowBits);
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT)))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+ N0.getOperand(0), DAG.getValueType(EVT));
+ }
+
+ // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SRA) {
+ if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
+ if (Sum >= VT.getSizeInBits()) Sum = VT.getSizeInBits()-1;
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(Sum, N1C->getValueType(0)));
+ }
+ }
+
+ // fold (sra (shl X, m), (sub result_size, n))
+ // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
+ // result_size - n != m.
+ // If truncate is free for the target sext(shl) is likely to result in better
+ // code.
+ if (N0.getOpcode() == ISD::SHL) {
+ // Get the two constanst of the shifts, CN0 = m, CN = n.
+ const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (N01C && N1C) {
+ // Determine what the truncate's result bitsize and type would be.
+ unsigned VTValSize = VT.getSizeInBits();
+ MVT TruncVT =
+ MVT::getIntegerVT(VTValSize - N1C->getZExtValue());
+ // Determine the residual right-shift amount.
+ signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
+
+ // If the shift is not a no-op (in which case this should be just a sign
+ // extend already), the truncated to type is legal, sign_extend is legal
+ // on that type, and the the truncate to that type is both legal and free,
+ // perform the transform.
+ if ((ShiftAmt > 0) &&
+ TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
+ TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
+ TLI.isTruncateFree(VT, TruncVT)) {
+
+ SDValue Amt = DAG.getConstant(ShiftAmt, getShiftAmountTy());
+ SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT,
+ N0.getOperand(0), Amt);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT,
+ Shift);
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(),
+ N->getValueType(0), Trunc);
+ }
+ }
+ }
+
+ // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND &&
+ N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+ SDValue N101 = N1.getOperand(0).getOperand(1);
+ if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+ MVT TruncVT = N1.getValueType();
+ SDValue N100 = N1.getOperand(0).getOperand(0);
+ APInt TruncC = N101C->getAPIntValue();
+ TruncC.trunc(TruncVT.getSizeInBits());
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
+ DAG.getNode(ISD::AND, N->getDebugLoc(),
+ TruncVT,
+ DAG.getNode(ISD::TRUNCATE,
+ N->getDebugLoc(),
+ TruncVT, N100),
+ DAG.getConstant(TruncC, TruncVT)));
+ }
+ }
+
+ // Simplify, based on bits shifted out of the LHS.
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+
+ // If the sign bit is known to be zero, switch this to a SRL.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1);
+
+ return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
+}
+
+SDValue DAGCombiner::visitSRL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getSizeInBits();
+
+ // fold (srl c1, c2) -> c1 >>u c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
+ // fold (srl 0, x) -> 0
+ if (N0C && N0C->isNullValue())
+ return N0;
+ // fold (srl x, c >= size(x)) -> undef
+ if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+ return DAG.getUNDEF(VT);
+ // fold (srl x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // if (srl x, c) is known to be zero, return 0
+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(OpSizeInBits)))
+ return DAG.getConstant(0, VT);
+
+ // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SRL &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 + c2 > OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
+
+ // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
+ if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+ // Shifting in all undef bits?
+ MVT SmallVT = N0.getOperand(0).getValueType();
+ if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
+ return DAG.getUNDEF(VT);
+
+ SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
+ N0.getOperand(0), N1);
+ AddToWorkList(SmallShift.getNode());
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
+ }
+
+ // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
+ // bit, which is unmodified by sra.
+ if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) {
+ if (N0.getOpcode() == ISD::SRA)
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1);
+ }
+
+ // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
+ if (N1C && N0.getOpcode() == ISD::CTLZ &&
+ N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+ DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
+
+ // If any of the input bits are KnownOne, then the input couldn't be all
+ // zeros, thus the result of the srl will always be zero.
+ if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT);
+
+ // If all of the bits input the to ctlz node are known to be zero, then
+ // the result of the ctlz is "32" and the result of the shift is one.
+ APInt UnknownBits = ~KnownZero & Mask;
+ if (UnknownBits == 0) return DAG.getConstant(1, VT);
+
+ // Otherwise, check to see if there is exactly one bit input to the ctlz.
+ if ((UnknownBits & (UnknownBits - 1)) == 0) {
+ // Okay, we know that only that the single bit specified by UnknownBits
+ // could be set on input to the CTLZ node. If this bit is set, the SRL
+ // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
+ // to an SRL/XOR pair, which is likely to simplify more.
+ unsigned ShAmt = UnknownBits.countTrailingZeros();
+ SDValue Op = N0.getOperand(0);
+
+ if (ShAmt) {
+ Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op,
+ DAG.getConstant(ShAmt, getShiftAmountTy()));
+ AddToWorkList(Op.getNode());
+ }
+
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
+ Op, DAG.getConstant(1, VT));
+ }
+ }
+
+ // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND &&
+ N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+ SDValue N101 = N1.getOperand(0).getOperand(1);
+ if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+ MVT TruncVT = N1.getValueType();
+ SDValue N100 = N1.getOperand(0).getOperand(0);
+ APInt TruncC = N101C->getAPIntValue();
+ TruncC.trunc(TruncVT.getSizeInBits());
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
+ DAG.getNode(ISD::AND, N->getDebugLoc(),
+ TruncVT,
+ DAG.getNode(ISD::TRUNCATE,
+ N->getDebugLoc(),
+ TruncVT, N100),
+ DAG.getConstant(TruncC, TruncVT)));
+ }
+ }
+
+ // fold operands of srl based on knowledge that the low bits are not
+ // demanded.
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
+}
+
+SDValue DAGCombiner::visitCTLZ(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ MVT VT = N->getValueType(0);
+
+ // fold (ctlz c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTTZ(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ MVT VT = N->getValueType(0);
+
+ // fold (cttz c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTPOP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ MVT VT = N->getValueType(0);
+
+ // fold (ctpop c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSELECT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+ MVT VT = N->getValueType(0);
+ MVT VT0 = N0.getValueType();
+
+ // fold (select C, X, X) -> X
+ if (N1 == N2)
+ return N1;
+ // fold (select true, X, Y) -> X
+ if (N0C && !N0C->isNullValue())
+ return N1;
+ // fold (select false, X, Y) -> Y
+ if (N0C && N0C->isNullValue())
+ return N2;
+ // fold (select C, 1, X) -> (or C, X)
+ if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);
+ // fold (select C, 0, 1) -> (xor C, 1)
+ if (VT.isInteger() &&
+ (VT0 == MVT::i1 ||
+ (VT0.isInteger() &&
+ TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent)) &&
+ N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
+ SDValue XORNode;
+ if (VT == VT0)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0,
+ N0, DAG.getConstant(1, VT0));
+ XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0,
+ N0, DAG.getConstant(1, VT0));
+ AddToWorkList(XORNode.getNode());
+ if (VT.bitsGT(VT0))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode);
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode);
+ }
+ // fold (select C, 0, X) -> (and (not C), X)
+ if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) {
+ SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);
+ AddToWorkList(NOTNode.getNode());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2);
+ }
+ // fold (select C, X, 1) -> (or (not C), X)
+ if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) {
+ SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);
+ AddToWorkList(NOTNode.getNode());
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1);
+ }
+ // fold (select C, X, 0) -> (and C, X)
+ if (VT == MVT::i1 && N2C && N2C->isNullValue())
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);
+ // fold (select X, X, Y) -> (or X, Y)
+ // fold (select X, 1, Y) -> (or X, Y)
+ if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1)))
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);
+ // fold (select X, Y, X) -> (and X, Y)
+ // fold (select X, Y, 0) -> (and X, Y)
+ if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0)))
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);
+
+ // If we can fold this based on the true/false value, do so.
+ if (SimplifySelectOps(N, N1, N2))
+ return SDValue(N, 0); // Don't revisit N.
+
+ // fold selects based on a setcc into other things, such as min/max/abs
+ if (N0.getOpcode() == ISD::SETCC) {
+ // FIXME:
+ // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+ // having to say they don't support SELECT_CC on every type the DAG knows
+ // about, since there is no way to mark an opcode illegal at all value types
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other))
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ N1, N2, N0.getOperand(2));
+ return SimplifySelect(N->getDebugLoc(), N0, N1, N2);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDValue N3 = N->getOperand(3);
+ SDValue N4 = N->getOperand(4);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
+
+ // fold select_cc lhs, rhs, x, x, cc -> x
+ if (N2 == N3)
+ return N2;
+
+ // Determine if the condition we're dealing with is constant
+ SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC, N->getDebugLoc(), false);
+ if (SCC.getNode()) AddToWorkList(SCC.getNode());
+
+ if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
+ if (!SCCC->isNullValue())
+ return N2; // cond always true -> true val
+ else
+ return N3; // cond always false -> false val
+ }
+
+ // Fold to a simpler select_cc
+ if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC)
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(),
+ SCC.getOperand(0), SCC.getOperand(1), N2, N3,
+ SCC.getOperand(2));
+
+ // If we can fold this based on the true/false value, do so.
+ if (SimplifySelectOps(N, N2, N3))
+ return SDValue(N, 0); // Don't revisit N.
+
+ // fold select_cc into other things, such as min/max/abs
+ return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC);
+}
+
+SDValue DAGCombiner::visitSETCC(SDNode *N) {
+ return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
+ cast<CondCodeSDNode>(N->getOperand(2))->get(),
+ N->getDebugLoc());
+}
+
+// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
+// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
+// transformation. Returns true if extension are possible and the above
+// mentioned transformation is profitable.
+static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
+ unsigned ExtOpc,
+ SmallVector<SDNode*, 4> &ExtendNodes,
+ const TargetLowering &TLI) {
+ bool HasCopyToRegUses = false;
+ bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
+ for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
+ UE = N0.getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == N)
+ continue;
+ if (UI.getUse().getResNo() != N0.getResNo())
+ continue;
+ // FIXME: Only extend SETCC N, N and SETCC N, c for now.
+ if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
+ if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
+ // Sign bits will be lost after a zext.
+ return false;
+ bool Add = false;
+ for (unsigned i = 0; i != 2; ++i) {
+ SDValue UseOp = User->getOperand(i);
+ if (UseOp == N0)
+ continue;
+ if (!isa<ConstantSDNode>(UseOp))
+ return false;
+ Add = true;
+ }
+ if (Add)
+ ExtendNodes.push_back(User);
+ continue;
+ }
+ // If truncates aren't free and there are users we can't
+ // extend, it isn't worthwhile.
+ if (!isTruncFree)
+ return false;
+ // Remember if this value is live-out.
+ if (User->getOpcode() == ISD::CopyToReg)
+ HasCopyToRegUses = true;
+ }
+
+ if (HasCopyToRegUses) {
+ bool BothLiveOut = false;
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDUse &Use = UI.getUse();
+ if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
+ BothLiveOut = true;
+ break;
+ }
+ }
+ if (BothLiveOut)
+ // Both unextended and extended values are live out. There had better be
+ // good a reason for the transformation.
+ return ExtendNodes.size();
+ }
+ return true;
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ MVT VT = N->getValueType(0);
+
+ // fold (sext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0);
+
+ // fold (sext (sext x)) -> (sext x)
+ // fold (sext (aext x)) -> (sext x)
+ if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT,
+ N0.getOperand(0));
+
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ // fold (sext (truncate (load x))) -> (sext (smaller load x))
+ // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ if (NarrowLoad.getNode() != N0.getNode())
+ CombineTo(N0.getNode(), NarrowLoad);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // See if the value being truncated is already sign extended. If so, just
+ // eliminate the trunc/sext pair.
+ SDValue Op = N0.getOperand(0);
+ unsigned OpBits = Op.getValueType().getSizeInBits();
+ unsigned MidBits = N0.getValueType().getSizeInBits();
+ unsigned DestBits = VT.getSizeInBits();
+ unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
+
+ if (OpBits == DestBits) {
+ // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
+ // bits, it is already ready.
+ if (NumSignBits > DestBits-MidBits)
+ return Op;
+ } else if (OpBits < DestBits) {
+ // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
+ // bits, just sext from i32.
+ if (NumSignBits > OpBits-MidBits)
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op);
+ } else {
+ // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
+ // bits, just truncate to i32.
+ if (NumSignBits > OpBits-MidBits)
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+ }
+
+ // fold (sext (truncate x)) -> (sextinreg x).
+ if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
+ N0.getValueType())) {
+ if (Op.getValueType().bitsLT(VT))
+ Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op);
+ else if (Op.getValueType().bitsGT(VT))
+ Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op,
+ DAG.getValueType(N0.getValueType()));
+ }
+ }
+
+ // fold (sext (load x)) -> (sext (truncate (sextload x)))
+ if (ISD::isNON_EXTLoad(N0.getNode()) &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad);
+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+
+ // Extend SetCC uses if necessary.
+ for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+ SDNode *SetCC = SetCCs[i];
+ SmallVector<SDValue, 4> Ops;
+
+ for (unsigned j = 0; j != 2; ++j) {
+ SDValue SOp = SetCC->getOperand(j);
+ if (SOp == Trunc)
+ Ops.push_back(ExtLoad);
+ else
+ Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND,
+ N->getDebugLoc(), VT, SOp));
+ }
+
+ Ops.push_back(SetCC->getOperand(2));
+ CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+ SetCC->getValueType(0),
+ &Ops[0], Ops.size()));
+ }
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
+ // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
+ if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ MVT EVT = LN0->getMemoryVT();
+ if ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT)) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDValue SCC =
+ SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstant(~0ULL, VT), DAG.getConstant(0, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+ if (SCC.getNode()) return SCC;
+ }
+
+ // fold (sext x) -> (zext x) if the sign bit is known zero.
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
+ DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ MVT VT = N->getValueType(0);
+
+ // fold (zext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);
+ // fold (zext (zext x)) -> (zext x)
+ // fold (zext (aext x)) -> (zext x)
+ if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT,
+ N0.getOperand(0));
+
+ // fold (zext (truncate (load x))) -> (zext (smaller load x))
+ // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ if (NarrowLoad.getNode() != N0.getNode())
+ CombineTo(N0.getNode(), NarrowLoad);
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
+ }
+ }
+
+ // fold (zext (truncate x)) -> (and x, mask)
+ if (N0.getOpcode() == ISD::TRUNCATE &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
+ SDValue Op = N0.getOperand(0);
+ if (Op.getValueType().bitsLT(VT)) {
+ Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
+ } else if (Op.getValueType().bitsGT(VT)) {
+ Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+ }
+ return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), N0.getValueType());
+ }
+
+ // Fold (zext (and (trunc x), cst)) -> (and x, cst),
+ // if either of the casts is not free.
+ if (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+ N0.getValueType()) ||
+ !TLI.isZExtFree(N0.getValueType(), VT))) {
+ SDValue X = N0.getOperand(0).getOperand(0);
+ if (X.getValueType().bitsLT(VT)) {
+ X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X);
+ } else if (X.getValueType().bitsGT(VT)) {
+ X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
+ }
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask.zext(VT.getSizeInBits());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ X, DAG.getConstant(Mask, VT));
+ }
+
+ // fold (zext (load x)) -> (zext (truncate (zextload x)))
+ if (ISD::isNON_EXTLoad(N0.getNode()) &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad);
+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+
+ // Extend SetCC uses if necessary.
+ for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+ SDNode *SetCC = SetCCs[i];
+ SmallVector<SDValue, 4> Ops;
+
+ for (unsigned j = 0; j != 2; ++j) {
+ SDValue SOp = SetCC->getOperand(j);
+ if (SOp == Trunc)
+ Ops.push_back(ExtLoad);
+ else
+ Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND,
+ N->getDebugLoc(), VT, SOp));
+ }
+
+ Ops.push_back(SetCC->getOperand(2));
+ CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+ SetCC->getValueType(0),
+ &Ops[0], Ops.size()));
+ }
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
+ // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
+ if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ MVT EVT = LN0->getMemoryVT();
+ if ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT)) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(),
+ ExtLoad),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDValue SCC =
+ SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+ if (SCC.getNode()) return SCC;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ MVT VT = N->getValueType(0);
+
+ // fold (aext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0);
+ // fold (aext (aext x)) -> (aext x)
+ // fold (aext (zext x)) -> (zext x)
+ // fold (aext (sext x)) -> (sext x)
+ if (N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND)
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0));
+
+ // fold (aext (truncate (load x))) -> (aext (smaller load x))
+ // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ if (NarrowLoad.getNode() != N0.getNode())
+ CombineTo(N0.getNode(), NarrowLoad);
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
+ }
+ }
+
+ // fold (aext (truncate x))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDValue TruncOp = N0.getOperand(0);
+ if (TruncOp.getValueType() == VT)
+ return TruncOp; // x iff x size == zext size.
+ if (TruncOp.getValueType().bitsGT(VT))
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp);
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp);
+ }
+
+ // Fold (aext (and (trunc x), cst)) -> (and x, cst)
+ // if the trunc is not free.
+ if (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+ N0.getValueType())) {
+ SDValue X = N0.getOperand(0).getOperand(0);
+ if (X.getValueType().bitsLT(VT)) {
+ X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X);
+ } else if (X.getValueType().bitsGT(VT)) {
+ X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X);
+ }
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask.zext(VT.getSizeInBits());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ X, DAG.getConstant(Mask, VT));
+ }
+
+ // fold (aext (load x)) -> (aext (truncate (extload x)))
+ if (ISD::isNON_EXTLoad(N0.getNode()) &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad);
+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+
+ // Extend SetCC uses if necessary.
+ for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+ SDNode *SetCC = SetCCs[i];
+ SmallVector<SDValue, 4> Ops;
+
+ for (unsigned j = 0; j != 2; ++j) {
+ SDValue SOp = SetCC->getOperand(j);
+ if (SOp == Trunc)
+ Ops.push_back(ExtLoad);
+ else
+ Ops.push_back(DAG.getNode(ISD::ANY_EXTEND,
+ N->getDebugLoc(), VT, SOp));
+ }
+
+ Ops.push_back(SetCC->getOperand(2));
+ CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+ SetCC->getValueType(0),
+ &Ops[0], Ops.size()));
+ }
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
+ // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
+ // fold (aext ( extload x)) -> (aext (truncate (extload x)))
+ if (N0.getOpcode() == ISD::LOAD &&
+ !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ MVT EVT = LN0->getMemoryVT();
+ SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),
+ VT, LN0->getChain(), LN0->getBasePtr(),
+ LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDValue SCC =
+ SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+ if (SCC.getNode())
+ return SCC;
+ }
+
+ return SDValue();
+}
+
+/// GetDemandedBits - See if the specified operand can be simplified with the
+/// knowledge that only the bits specified by Mask are used. If so, return the
+/// simpler operand, otherwise return a null SDValue.
+SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
+ switch (V.getOpcode()) {
+ default: break;
+ case ISD::OR:
+ case ISD::XOR:
+ // If the LHS or RHS don't contribute bits to the or, drop them.
+ if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
+ return V.getOperand(1);
+ if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
+ return V.getOperand(0);
+ break;
+ case ISD::SRL:
+ // Only look at single-use SRLs.
+ if (!V.getNode()->hasOneUse())
+ break;
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+ // See if we can recursively simplify the LHS.
+ unsigned Amt = RHSC->getZExtValue();
+
+ // Watch out for shift count overflow though.
+ if (Amt >= Mask.getBitWidth()) break;
+ APInt NewMask = Mask << Amt;
+ SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);
+ if (SimplifyLHS.getNode())
+ return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(),
+ SimplifyLHS, V.getOperand(1));
+ }
+ }
+ return SDValue();
+}
+
+/// ReduceLoadWidth - If the result of a wider load is shifted to right of N
+/// bits and then truncated to a narrower type and where N is a multiple
+/// of number of bits of the narrower type, transform it to a narrower load
+/// from address + N / num of bits of new type. If the result is to be
+/// extended, also fold the extension to form a extending load.
+SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
+ unsigned Opc = N->getOpcode();
+ ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
+ SDValue N0 = N->getOperand(0);
+ MVT VT = N->getValueType(0);
+ MVT EVT = VT;
+
+ // This transformation isn't valid for vector loads.
+ if (VT.isVector())
+ return SDValue();
+
+ // Special case: SIGN_EXTEND_INREG is basically truncating to EVT then
+ // extended to VT.
+ if (Opc == ISD::SIGN_EXTEND_INREG) {
+ ExtType = ISD::SEXTLOAD;
+ EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))
+ return SDValue();
+ }
+
+ unsigned EVTBits = EVT.getSizeInBits();
+ unsigned ShAmt = 0;
+ if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+ if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ ShAmt = N01->getZExtValue();
+ // Is the shift amount a multiple of size of VT?
+ if ((ShAmt & (EVTBits-1)) == 0) {
+ N0 = N0.getOperand(0);
+ if (N0.getValueType().getSizeInBits() <= EVTBits)
+ return SDValue();
+ }
+ }
+ }
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (isa<LoadSDNode>(N0) && N0.hasOneUse() && EVT.isRound() &&
+ cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() > EVTBits &&
+ // Do not change the width of a volatile load.
+ !cast<LoadSDNode>(N0)->isVolatile()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ MVT PtrType = N0.getOperand(1).getValueType();
+
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (TLI.isBigEndian()) {
+ unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
+ unsigned EVTStoreBits = EVT.getStoreSizeInBits();
+ ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
+ }
+
+ uint64_t PtrOff = ShAmt / 8;
+ unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),
+ PtrType, LN0->getBasePtr(),
+ DAG.getConstant(PtrOff, PtrType));
+ AddToWorkList(NewPtr.getNode());
+
+ SDValue Load = (ExtType == ISD::NON_EXTLOAD)
+ ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
+ LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
+ LN0->isVolatile(), NewAlign)
+ : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr,
+ LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
+ EVT, LN0->isVolatile(), NewAlign);
+
+ // Replace the old load's chain with the new load's chain.
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
+ &DeadNodes);
+
+ // Return the new loaded value.
+ return Load;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ MVT VT = N->getValueType(0);
+ MVT EVT = cast<VTSDNode>(N1)->getVT();
+ unsigned VTBits = VT.getSizeInBits();
+ unsigned EVTBits = EVT.getSizeInBits();
+
+ // fold (sext_in_reg c1) -> c1
+ if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1);
+
+ // If the input is already sign extended, just drop the extension.
+ if (DAG.ComputeNumSignBits(N0) >= VT.getSizeInBits()-EVTBits+1)
+ return N0;
+
+ // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
+ if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) {
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+ }
+
+ // fold (sext_in_reg (sext x)) -> (sext x)
+ // fold (sext_in_reg (aext x)) -> (sext x)
+ // if x is small enough.
+ if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getValueType().getSizeInBits() < EVTBits)
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1);
+ }
+
+ // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
+ if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
+ return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT);
+
+ // fold operands of sext_in_reg based on knowledge that the top bits are not
+ // demanded.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (sext_in_reg (load x)) -> (smaller sextload x)
+ // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
+ SDValue NarrowLoad = ReduceLoadWidth(N);
+ if (NarrowLoad.getNode())
+ return NarrowLoad;
+
+ // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
+ // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
+ // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
+ if (N0.getOpcode() == ISD::SRL) {
+ if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+ if (ShAmt->getZExtValue()+EVTBits <= VT.getSizeInBits()) {
+ // We can turn this into an SRA iff the input to the SRL is already sign
+ // extended enough.
+ unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
+ if (VT.getSizeInBits()-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1));
+ }
+ }
+
+ // fold (sext_inreg (extload x)) -> (sextload x)
+ if (ISD::isEXTLoad(N0.getNode()) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
+ if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse() &&
+ EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ MVT VT = N->getValueType(0);
+
+ // noop truncate
+ if (N0.getValueType() == N->getValueType(0))
+ return N0;
+ // fold (truncate c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0);
+ // fold (truncate (truncate x)) -> (truncate x)
+ if (N0.getOpcode() == ISD::TRUNCATE)
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
+ // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
+ if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND||
+ N0.getOpcode() == ISD::ANY_EXTEND) {
+ if (N0.getOperand(0).getValueType().bitsLT(VT))
+ // if the source is smaller than the dest, we still need an extend
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+ N0.getOperand(0));
+ else if (N0.getOperand(0).getValueType().bitsGT(VT))
+ // if the source is larger than the dest, than we just need the truncate
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
+ else
+ // if the source and dest are the same type, we can drop both the extend
+ // and the truncate
+ return N0.getOperand(0);
+ }
+
+ // See if we can simplify the input to this truncate through knowledge that
+ // only the low bits are being used. For example "trunc (or (shl x, 8), y)"
+ // -> trunc y
+ SDValue Shorter =
+ GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
+ VT.getSizeInBits()));
+ if (Shorter.getNode())
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter);
+
+ // fold (truncate (load x)) -> (smaller load x)
+ // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
+ return ReduceLoadWidth(N);
+}
+
+static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
+ SDValue Elt = N->getOperand(i);
+ if (Elt.getOpcode() != ISD::MERGE_VALUES)
+ return Elt.getNode();
+ return Elt.getOperand(Elt.getResNo()).getNode();
+}
+
+/// CombineConsecutiveLoads - build_pair (load, load) -> load
+/// if load locations are consecutive.
+SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) {
+ assert(N->getOpcode() == ISD::BUILD_PAIR);
+
+ SDNode *LD1 = getBuildPairElt(N, 0);
+ if (!ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
+ return SDValue();
+ MVT LD1VT = LD1->getValueType(0);
+ SDNode *LD2 = getBuildPairElt(N, 1);
+ const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+
+ if (ISD::isNON_EXTLoad(LD2) &&
+ LD2->hasOneUse() &&
+ // If both are volatile this would reduce the number of volatile loads.
+ // If one is volatile it might be ok, but play conservative and bail out.
+ !cast<LoadSDNode>(LD1)->isVolatile() &&
+ !cast<LoadSDNode>(LD2)->isVolatile() &&
+ TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) {
+ LoadSDNode *LD = cast<LoadSDNode>(LD1);
+ unsigned Align = LD->getAlignment();
+ unsigned NewAlign = TLI.getTargetData()->
+ getABITypeAlignment(VT.getTypeForMVT());
+
+ if (NewAlign <= Align &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
+ return DAG.getLoad(VT, N->getDebugLoc(), LD->getChain(), LD->getBasePtr(),
+ LD->getSrcValue(), LD->getSrcValueOffset(),
+ false, Align);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ MVT VT = N->getValueType(0);
+
+ // If the input is a BUILD_VECTOR with all constant elements, fold this now.
+ // Only do this before legalize, since afterward the target may be depending
+ // on the bitconvert.
+ // First check to see if this is all constant.
+ if (!LegalTypes &&
+ N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
+ VT.isVector()) {
+ bool isSimple = true;
+ for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i)
+ if (N0.getOperand(i).getOpcode() != ISD::UNDEF &&
+ N0.getOperand(i).getOpcode() != ISD::Constant &&
+ N0.getOperand(i).getOpcode() != ISD::ConstantFP) {
+ isSimple = false;
+ break;
+ }
+
+ MVT DestEltVT = N->getValueType(0).getVectorElementType();
+ assert(!DestEltVT.isVector() &&
+ "Element type of vector ValueType must not be vector!");
+ if (isSimple)
+ return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.getNode(), DestEltVT);
+ }
+
+ // If the input is a constant, let getNode fold it.
+ if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
+ SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0);
+ if (Res.getNode() != N) return Res;
+ }
+
+ // (conv (conv x, t1), t2) -> (conv x, t2)
+ if (N0.getOpcode() == ISD::BIT_CONVERT)
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT,
+ N0.getOperand(0));
+
+ // fold (conv (load x)) -> (load (conv*)x)
+ // If the resultant load doesn't need a higher alignment than the original!
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ // Do not change the width of a volatile load.
+ !cast<LoadSDNode>(N0)->isVolatile() &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ unsigned Align = TLI.getTargetData()->
+ getABITypeAlignment(VT.getTypeForMVT());
+ unsigned OrigAlign = LN0->getAlignment();
+
+ if (Align <= OrigAlign) {
+ SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
+ LN0->getBasePtr(),
+ LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->isVolatile(), OrigAlign);
+ AddToWorkList(N);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+ N0.getValueType(), Load),
+ Load.getValue(1));
+ return Load;
+ }
+ }
+
+ // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
+ // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
+ // This often reduces constant pool loads.
+ if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&
+ N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
+ SDValue NewConv = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), VT,
+ N0.getOperand(0));
+ AddToWorkList(NewConv.getNode());
+
+ APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+ if (N0.getOpcode() == ISD::FNEG)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
+ NewConv, DAG.getConstant(SignBit, VT));
+ assert(N0.getOpcode() == ISD::FABS);
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ NewConv, DAG.getConstant(~SignBit, VT));
+ }
+
+ // fold (bitconvert (fcopysign cst, x)) ->
+ // (or (and (bitconvert x), sign), (and cst, (not sign)))
+ // Note that we don't handle (copysign x, cst) because this can always be
+ // folded to an fneg or fabs.
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
+ isa<ConstantFPSDNode>(N0.getOperand(0)) &&
+ VT.isInteger() && !VT.isVector()) {
+ unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
+ MVT IntXVT = MVT::getIntegerVT(OrigXWidth);
+ if (TLI.isTypeLegal(IntXVT) || !LegalTypes) {
+ SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+ IntXVT, N0.getOperand(1));
+ AddToWorkList(X.getNode());
+
+ // If X has a different width than the result/lhs, sext it or truncate it.
+ unsigned VTWidth = VT.getSizeInBits();
+ if (OrigXWidth < VTWidth) {
+ X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X);
+ AddToWorkList(X.getNode());
+ } else if (OrigXWidth > VTWidth) {
+ // To get the sign bit in the right place, we have to shift it right
+ // before truncating.
+ X = DAG.getNode(ISD::SRL, X.getDebugLoc(),
+ X.getValueType(), X,
+ DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));
+ AddToWorkList(X.getNode());
+ X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
+ AddToWorkList(X.getNode());
+ }
+
+ APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+ X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT,
+ X, DAG.getConstant(SignBit, VT));
+ AddToWorkList(X.getNode());
+
+ SDValue Cst = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+ VT, N0.getOperand(0));
+ Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT,
+ Cst, DAG.getConstant(~SignBit, VT));
+ AddToWorkList(Cst.getNode());
+
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst);
+ }
+ }
+
+ // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
+ if (N0.getOpcode() == ISD::BUILD_PAIR) {
+ SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);
+ if (CombineLD.getNode())
+ return CombineLD;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
+ MVT VT = N->getValueType(0);
+ return CombineConsecutiveLoads(N, VT);
+}
+
+/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector
+/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the
+/// destination element value type.
+SDValue DAGCombiner::
+ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) {
+ MVT SrcEltVT = BV->getValueType(0).getVectorElementType();
+
+ // If this is already the right type, we're done.
+ if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
+
+ unsigned SrcBitSize = SrcEltVT.getSizeInBits();
+ unsigned DstBitSize = DstEltVT.getSizeInBits();
+
+ // If this is a conversion of N elements of one type to N elements of another
+ // type, convert each element. This handles FP<->INT cases.
+ if (SrcBitSize == DstBitSize) {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ SDValue Op = BV->getOperand(i);
+ // If the vector element type is not legal, the BUILD_VECTOR operands
+ // are promoted and implicitly truncated. Make that explicit here.
+ if (Op.getValueType() != SrcEltVT)
+ Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op);
+ Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(),
+ DstEltVT, Op));
+ AddToWorkList(Ops.back().getNode());
+ }
+ MVT VT = MVT::getVectorVT(DstEltVT,
+ BV->getValueType(0).getVectorNumElements());
+ return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+ }
+
+ // Otherwise, we're growing or shrinking the elements. To avoid having to
+ // handle annoying details of growing/shrinking FP values, we convert them to
+ // int first.
+ if (SrcEltVT.isFloatingPoint()) {
+ // Convert the input float vector to a int vector where the elements are the
+ // same sizes.
+ assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
+ MVT IntVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits());
+ BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode();
+ SrcEltVT = IntVT;
+ }
+
+ // Now we know the input is an integer vector. If the output is a FP type,
+ // convert to integer first, then to FP of the right size.
+ if (DstEltVT.isFloatingPoint()) {
+ assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
+ MVT TmpVT = MVT::getIntegerVT(DstEltVT.getSizeInBits());
+ SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode();
+
+ // Next, convert to FP elements of the same size.
+ return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT);
+ }
+
+ // Okay, we know the src/dst types are both integers of differing types.
+ // Handling growing first.
+ assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
+ if (SrcBitSize < DstBitSize) {
+ unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
+
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e;
+ i += NumInputsPerOutput) {
+ bool isLE = TLI.isLittleEndian();
+ APInt NewBits = APInt(DstBitSize, 0);
+ bool EltIsUndef = true;
+ for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
+ // Shift the previously computed bits over.
+ NewBits <<= SrcBitSize;
+ SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
+ if (Op.getOpcode() == ISD::UNDEF) continue;
+ EltIsUndef = false;
+
+ NewBits |= (APInt(cast<ConstantSDNode>(Op)->getAPIntValue()).
+ zextOrTrunc(SrcBitSize).zext(DstBitSize));
+ }
+
+ if (EltIsUndef)
+ Ops.push_back(DAG.getUNDEF(DstEltVT));
+ else
+ Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
+ }
+
+ MVT VT = MVT::getVectorVT(DstEltVT, Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+ }
+
+ // Finally, this must be the case where we are shrinking elements: each input
+ // turns into multiple outputs.
+ bool isS2V = ISD::isScalarToVector(BV);
+ unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
+ MVT VT = MVT::getVectorVT(DstEltVT, NumOutputsPerInput*BV->getNumOperands());
+ SmallVector<SDValue, 8> Ops;
+
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
+ for (unsigned j = 0; j != NumOutputsPerInput; ++j)
+ Ops.push_back(DAG.getUNDEF(DstEltVT));
+ continue;
+ }
+
+ APInt OpVal = APInt(cast<ConstantSDNode>(BV->getOperand(i))->
+ getAPIntValue()).zextOrTrunc(SrcBitSize);
+
+ for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
+ APInt ThisVal = APInt(OpVal).trunc(DstBitSize);
+ Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
+ if (isS2V && i == 0 && j == 0 && APInt(ThisVal).zext(SrcBitSize) == OpVal)
+ // Simply turn this into a SCALAR_TO_VECTOR of the new type.
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
+ Ops[0]);
+ OpVal = OpVal.lshr(DstBitSize);
+ }
+
+ // For big endian targets, swap the order of the pieces of each element.
+ if (TLI.isBigEndian())
+ std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+}
+
+SDValue DAGCombiner::visitFADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ MVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fadd c1, c2) -> (fadd c1, c2)
+ if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1);
+ // canonicalize constant to RHS
+ if (N0CFP && !N1CFP)
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0);
+ // fold (fadd A, 0) -> A
+ if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ return N0;
+ // fold (fadd A, (fneg B)) -> (fsub A, B)
+ if (isNegatibleForFree(N1, LegalOperations) == 2)
+ return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
+ GetNegatedExpression(N1, DAG, LegalOperations));
+ // fold (fadd (fneg A), B) -> (fsub B, A)
+ if (isNegatibleForFree(N0, LegalOperations) == 2)
+ return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
+ GetNegatedExpression(N0, DAG, LegalOperations));
+
+ // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
+ if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD &&
+ N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ N0.getOperand(1), N1));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFSUB(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ MVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fsub c1, c2) -> c1-c2
+ if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1);
+ // fold (fsub A, 0) -> A
+ if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ return N0;
+ // fold (fsub 0, B) -> -B
+ if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) {
+ if (isNegatibleForFree(N1, LegalOperations))
+ return GetNegatedExpression(N1, DAG, LegalOperations);
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
+ }
+ // fold (fsub A, (fneg B)) -> (fadd A, B)
+ if (isNegatibleForFree(N1, LegalOperations))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
+ GetNegatedExpression(N1, DAG, LegalOperations));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFMUL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ MVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fmul c1, c2) -> c1*c2
+ if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1);
+ // canonicalize constant to RHS
+ if (N0CFP && !N1CFP)
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0);
+ // fold (fmul A, 0) -> 0
+ if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ return N1;
+ // fold (fmul X, 2.0) -> (fadd X, X)
+ if (N1CFP && N1CFP->isExactlyValue(+2.0))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0);
+ // fold (fmul X, (fneg 1.0)) -> (fneg X)
+ if (N1CFP && N1CFP->isExactlyValue(-1.0))
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
+
+ // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+ // Both can be negated for free, check to see if at least one is cheaper
+ // negated.
+ if (LHSNeg == 2 || RHSNeg == 2)
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ GetNegatedExpression(N0, DAG, LegalOperations),
+ GetNegatedExpression(N1, DAG, LegalOperations));
+ }
+ }
+
+ // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
+ if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL &&
+ N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(1), N1));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ MVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fdiv c1, c2) -> c1/c2
+ if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1);
+
+
+ // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+ // Both can be negated for free, check to see if at least one is cheaper
+ // negated.
+ if (LHSNeg == 2 || RHSNeg == 2)
+ return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT,
+ GetNegatedExpression(N0, DAG, LegalOperations),
+ GetNegatedExpression(N1, DAG, LegalOperations));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFREM(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ MVT VT = N->getValueType(0);
+
+ // fold (frem c1, c2) -> fmod(c1,c2)
+ if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ MVT VT = N->getValueType(0);
+
+ if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1);
+
+ if (N1CFP) {
+ const APFloat& V = N1CFP->getValueAPF();
+ // copysign(x, c1) -> fabs(x) iff ispos(c1)
+ // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
+ if (!V.isNegative()) {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+ } else {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0));
+ }
+ }
+
+ // copysign(fabs(x), y) -> copysign(x, y)
+ // copysign(fneg(x), y) -> copysign(x, y)
+ // copysign(copysign(x,z), y) -> copysign(x, y)
+ if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
+ N0.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+
+ // copysign(x, abs(y)) -> abs(x)
+ if (N1.getOpcode() == ISD::FABS)
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+
+ // copysign(x, copysign(y,z)) -> copysign(x, z)
+ if (N1.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ N0, N1.getOperand(1));
+
+ // copysign(x, fp_extend(y)) -> copysign(x, y)
+ // copysign(x, fp_round(y)) -> copysign(x, y)
+ if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ N0, N1.getOperand(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ MVT VT = N->getValueType(0);
+ MVT OpVT = N0.getValueType();
+
+ // fold (sint_to_fp c1) -> c1fp
+ if (N0C && OpVT != MVT::ppcf128)
+ return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
+
+ // If the input is a legal type, and SINT_TO_FP is not legal on this target,
+ // but UINT_TO_FP is legal on this target, try to convert.
+ if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
+ TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
+ // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ MVT VT = N->getValueType(0);
+ MVT OpVT = N0.getValueType();
+
+ // fold (uint_to_fp c1) -> c1fp
+ if (N0C && OpVT != MVT::ppcf128)
+ return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
+
+ // If the input is a legal type, and UINT_TO_FP is not legal on this target,
+ // but SINT_TO_FP is legal on this target, try to convert.
+ if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
+ TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
+ // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ MVT VT = N->getValueType(0);
+
+ // fold (fp_to_sint c1fp) -> c1
+ if (N0CFP)
+ return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ MVT VT = N->getValueType(0);
+
+ // fold (fp_to_uint c1fp) -> c1
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ MVT VT = N->getValueType(0);
+
+ // fold (fp_round c1fp) -> c1fp
+ if (N0CFP && N0.getValueType() != MVT::ppcf128)
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1);
+
+ // fold (fp_round (fp_extend x)) -> x
+ if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
+ return N0.getOperand(0);
+
+ // fold (fp_round (fp_round x)) -> (fp_round x)
+ if (N0.getOpcode() == ISD::FP_ROUND) {
+ // This is a value preserving truncation if both round's are.
+ bool IsTrunc = N->getConstantOperandVal(1) == 1 &&
+ N0.getNode()->getConstantOperandVal(1) == 1;
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getIntPtrConstant(IsTrunc));
+ }
+
+ // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
+ SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+ AddToWorkList(Tmp.getNode());
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ Tmp, N0.getOperand(1));
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ MVT VT = N->getValueType(0);
+ MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+
+ // fold (fp_round_inreg c1fp) -> c1fp
+ if (N0CFP && (TLI.isTypeLegal(EVT) || !LegalTypes)) {
+ SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
+ return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ MVT VT = N->getValueType(0);
+
+ // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
+ if (N->hasOneUse() &&
+ N->use_begin()->getOpcode() == ISD::FP_ROUND)
+ return SDValue();
+
+ // fold (fp_extend c1fp) -> c1fp
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0);
+
+ // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
+ // value of X.
+ if (N0.getOpcode() == ISD::FP_ROUND
+ && N0.getNode()->getConstantOperandVal(1) == 1) {
+ SDValue In = N0.getOperand(0);
+ if (In.getValueType() == VT) return In;
+ if (VT.bitsLT(In.getValueType()))
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT,
+ In, N0.getOperand(1));
+ return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In);
+ }
+
+ // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
+ if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFNEG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ if (isNegatibleForFree(N0, LegalOperations))
+ return GetNegatedExpression(N0, DAG, LegalOperations);
+
+ // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
+ // constant pool values.
+ if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() &&
+ N0.getOperand(0).getValueType().isInteger() &&
+ !N0.getOperand(0).getValueType().isVector()) {
+ SDValue Int = N0.getOperand(0);
+ MVT IntVT = Int.getValueType();
+ if (IntVT.isInteger() && !IntVT.isVector()) {
+ Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,
+ DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
+ AddToWorkList(Int.getNode());
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ N->getValueType(0), Int);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFABS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ MVT VT = N->getValueType(0);
+
+ // fold (fabs c1) -> fabs(c1)
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+ // fold (fabs (fabs x)) -> (fabs x)
+ if (N0.getOpcode() == ISD::FABS)
+ return N->getOperand(0);
+ // fold (fabs (fneg x)) -> (fabs x)
+ // fold (fabs (fcopysign x, y)) -> (fabs x)
+ if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0));
+
+ // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
+ // constant pool values.
+ if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() &&
+ N0.getOperand(0).getValueType().isInteger() &&
+ !N0.getOperand(0).getValueType().isVector()) {
+ SDValue Int = N0.getOperand(0);
+ MVT IntVT = Int.getValueType();
+ if (IntVT.isInteger() && !IntVT.isVector()) {
+ Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,
+ DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
+ AddToWorkList(Int.getNode());
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ N->getValueType(0), Int);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBRCOND(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+
+ // never taken branch, fold to chain
+ if (N1C && N1C->isNullValue())
+ return Chain;
+ // unconditional branch
+ if (N1C && N1C->getAPIntValue() == 1)
+ return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other, Chain, N2);
+ // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
+ // on the target.
+ if (N1.getOpcode() == ISD::SETCC &&
+ TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) {
+ return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
+ Chain, N1.getOperand(2),
+ N1.getOperand(0), N1.getOperand(1), N2);
+ }
+
+ if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) {
+ // Match this pattern so that we can generate simpler code:
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = srl i32 %b, 1
+ // brcond i32 %c ...
+ //
+ // into
+ //
+ // %a = ...
+ // %b = and %a, 2
+ // %c = setcc eq %b, 0
+ // brcond %c ...
+ //
+ // This applies only when the AND constant value has one bit set and the
+ // SRL constant is equal to the log2 of the AND constant. The back-end is
+ // smart enough to convert the result into a TEST/JMP sequence.
+ SDValue Op0 = N1.getOperand(0);
+ SDValue Op1 = N1.getOperand(1);
+
+ if (Op0.getOpcode() == ISD::AND &&
+ Op0.hasOneUse() &&
+ Op1.getOpcode() == ISD::Constant) {
+ SDValue AndOp0 = Op0.getOperand(0);
+ SDValue AndOp1 = Op0.getOperand(1);
+
+ if (AndOp1.getOpcode() == ISD::Constant) {
+ const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
+
+ if (AndConst.isPowerOf2() &&
+ cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
+ SDValue SetCC =
+ DAG.getSetCC(N->getDebugLoc(),
+ TLI.getSetCCResultType(Op0.getValueType()),
+ Op0, DAG.getConstant(0, Op0.getValueType()),
+ ISD::SETNE);
+
+ // Replace the uses of SRL with SETCC
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
+ removeFromWorkList(N1.getNode());
+ DAG.DeleteNode(N1.getNode());
+ return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+ MVT::Other, Chain, SetCC, N2);
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
+//
+SDValue DAGCombiner::visitBR_CC(SDNode *N) {
+ CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
+ SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
+
+ // Use SimplifySetCC to simplify SETCC's.
+ SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()),
+ CondLHS, CondRHS, CC->get(), N->getDebugLoc(),
+ false);
+ if (Simp.getNode()) AddToWorkList(Simp.getNode());
+
+ ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(Simp.getNode());
+
+ // fold br_cc true, dest -> br dest (unconditional branch)
+ if (SCCC && !SCCC->isNullValue())
+ return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other,
+ N->getOperand(0), N->getOperand(4));
+ // fold br_cc false, dest -> unconditional fall through
+ if (SCCC && SCCC->isNullValue())
+ return N->getOperand(0);
+
+ // fold to a simpler setcc
+ if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
+ return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
+ N->getOperand(0), Simp.getOperand(2),
+ Simp.getOperand(0), Simp.getOperand(1),
+ N->getOperand(4));
+
+ return SDValue();
+}
+
+/// CombineToPreIndexedLoadStore - Try turning a load / store into a
+/// pre-indexed load / store when the base pointer is an add or subtract
+/// and it has other uses besides the load / store. After the
+/// transformation, the new indexed load / store has effectively folded
+/// the add / subtract in and all of its other uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
+ if (!LegalOperations)
+ return false;
+
+ bool isLoad = true;
+ SDValue Ptr;
+ MVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ if (LD->isIndexed())
+ return false;
+ VT = LD->getMemoryVT();
+ if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
+ !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
+ return false;
+ Ptr = LD->getBasePtr();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ if (ST->isIndexed())
+ return false;
+ VT = ST->getMemoryVT();
+ if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
+ !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
+ return false;
+ Ptr = ST->getBasePtr();
+ isLoad = false;
+ } else {
+ return false;
+ }
+
+ // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
+ // out. There is no reason to make this a preinc/predec.
+ if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
+ Ptr.getNode()->hasOneUse())
+ return false;
+
+ // Ask the target to do addressing mode selection.
+ SDValue BasePtr;
+ SDValue Offset;
+ ISD::MemIndexedMode AM = ISD::UNINDEXED;
+ if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
+ return false;
+ // Don't create a indexed load / store with zero offset.
+ if (isa<ConstantSDNode>(Offset) &&
+ cast<ConstantSDNode>(Offset)->isNullValue())
+ return false;
+
+ // Try turning it into a pre-indexed load / store except when:
+ // 1) The new base ptr is a frame index.
+ // 2) If N is a store and the new base ptr is either the same as or is a
+ // predecessor of the value being stored.
+ // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
+ // that would create a cycle.
+ // 4) All uses are load / store ops that use it as old base ptr.
+
+ // Check #1. Preinc'ing a frame index would require copying the stack pointer
+ // (plus the implicit offset) to a register to preinc anyway.
+ if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+ return false;
+
+ // Check #2.
+ if (!isLoad) {
+ SDValue Val = cast<StoreSDNode>(N)->getValue();
+ if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
+ return false;
+ }
+
+ // Now check for #3 and #4.
+ bool RealUse = false;
+ for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
+ E = Ptr.getNode()->use_end(); I != E; ++I) {
+ SDNode *Use = *I;
+ if (Use == N)
+ continue;
+ if (Use->isPredecessorOf(N))
+ return false;
+
+ if (!((Use->getOpcode() == ISD::LOAD &&
+ cast<LoadSDNode>(Use)->getBasePtr() == Ptr) ||
+ (Use->getOpcode() == ISD::STORE &&
+ cast<StoreSDNode>(Use)->getBasePtr() == Ptr)))
+ RealUse = true;
+ }
+
+ if (!RealUse)
+ return false;
+
+ SDValue Result;
+ if (isLoad)
+ Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM);
+ else
+ Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM);
+ ++PreIndexedNodes;
+ ++NodesCombined;
+ DOUT << "\nReplacing.4 "; DEBUG(N->dump(&DAG));
+ DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG));
+ DOUT << '\n';
+ WorkListRemover DeadNodes(*this);
+ if (isLoad) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
+ &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
+ &DeadNodes);
+ } else {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
+ &DeadNodes);
+ }
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+
+ // Replace the uses of Ptr with uses of the updated base value.
+ DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0),
+ &DeadNodes);
+ removeFromWorkList(Ptr.getNode());
+ DAG.DeleteNode(Ptr.getNode());
+
+ return true;
+}
+
+/// CombineToPostIndexedLoadStore - Try to combine a load / store with a
+/// add / sub of the base pointer node into a post-indexed load / store.
+/// The transformation folded the add / subtract into the new indexed
+/// load / store effectively and all of its uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
+ if (!LegalOperations)
+ return false;
+
+ bool isLoad = true;
+ SDValue Ptr;
+ MVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ if (LD->isIndexed())
+ return false;
+ VT = LD->getMemoryVT();
+ if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
+ !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
+ return false;
+ Ptr = LD->getBasePtr();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ if (ST->isIndexed())
+ return false;
+ VT = ST->getMemoryVT();
+ if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
+ !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
+ return false;
+ Ptr = ST->getBasePtr();
+ isLoad = false;
+ } else {
+ return false;
+ }
+
+ if (Ptr.getNode()->hasOneUse())
+ return false;
+
+ for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
+ E = Ptr.getNode()->use_end(); I != E; ++I) {
+ SDNode *Op = *I;
+ if (Op == N ||
+ (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
+ continue;
+
+ SDValue BasePtr;
+ SDValue Offset;
+ ISD::MemIndexedMode AM = ISD::UNINDEXED;
+ if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
+ if (Ptr == Offset)
+ std::swap(BasePtr, Offset);
+ if (Ptr != BasePtr)
+ continue;
+ // Don't create a indexed load / store with zero offset.
+ if (isa<ConstantSDNode>(Offset) &&
+ cast<ConstantSDNode>(Offset)->isNullValue())
+ continue;
+
+ // Try turning it into a post-indexed load / store except when
+ // 1) All uses are load / store ops that use it as base ptr.
+ // 2) Op must be independent of N, i.e. Op is neither a predecessor
+ // nor a successor of N. Otherwise, if Op is folded that would
+ // create a cycle.
+
+ if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+ continue;
+
+ // Check for #1.
+ bool TryNext = false;
+ for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(),
+ EE = BasePtr.getNode()->use_end(); II != EE; ++II) {
+ SDNode *Use = *II;
+ if (Use == Ptr.getNode())
+ continue;
+
+ // If all the uses are load / store addresses, then don't do the
+ // transformation.
+ if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
+ bool RealUse = false;
+ for (SDNode::use_iterator III = Use->use_begin(),
+ EEE = Use->use_end(); III != EEE; ++III) {
+ SDNode *UseUse = *III;
+ if (!((UseUse->getOpcode() == ISD::LOAD &&
+ cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) ||
+ (UseUse->getOpcode() == ISD::STORE &&
+ cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use)))
+ RealUse = true;
+ }
+
+ if (!RealUse) {
+ TryNext = true;
+ break;
+ }
+ }
+ }
+
+ if (TryNext)
+ continue;
+
+ // Check for #2
+ if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
+ SDValue Result = isLoad
+ ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM)
+ : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM);
+ ++PostIndexedNodes;
+ ++NodesCombined;
+ DOUT << "\nReplacing.5 "; DEBUG(N->dump(&DAG));
+ DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG));
+ DOUT << '\n';
+ WorkListRemover DeadNodes(*this);
+ if (isLoad) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
+ &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
+ &DeadNodes);
+ } else {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
+ &DeadNodes);
+ }
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+
+ // Replace the uses of Use with uses of the updated base value.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
+ Result.getValue(isLoad ? 1 : 0),
+ &DeadNodes);
+ removeFromWorkList(Op);
+ DAG.DeleteNode(Op);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+/// InferAlignment - If we can infer some alignment information from this
+/// pointer, return it.
+static unsigned InferAlignment(SDValue Ptr, SelectionDAG &DAG) {
+ // If this is a direct reference to a stack slot, use information about the
+ // stack slot's alignment.
+ int FrameIdx = 1 << 31;
+ int64_t FrameOffset = 0;
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
+ FrameIdx = FI->getIndex();
+ } else if (Ptr.getOpcode() == ISD::ADD &&
+ isa<ConstantSDNode>(Ptr.getOperand(1)) &&
+ isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+ FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ FrameOffset = Ptr.getConstantOperandVal(1);
+ }
+
+ if (FrameIdx != (1 << 31)) {
+ // FIXME: Handle FI+CST.
+ const MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo();
+ if (MFI.isFixedObjectIndex(FrameIdx)) {
+ int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset;
+
+ // The alignment of the frame index can be determined from its offset from
+ // the incoming frame position. If the frame object is at offset 32 and
+ // the stack is guaranteed to be 16-byte aligned, then we know that the
+ // object is 16-byte aligned.
+ unsigned StackAlign = DAG.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned Align = MinAlign(ObjectOffset, StackAlign);
+
+ // Finally, the frame object itself may have a known alignment. Factor
+ // the alignment + offset into a new alignment. For example, if we know
+ // the FI is 8 byte aligned, but the pointer is 4 off, we really have a
+ // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte
+ // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc.
+ unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
+ FrameOffset);
+ return std::max(Align, FIInfoAlign);
+ }
+ }
+
+ return 0;
+}
+
+SDValue DAGCombiner::visitLOAD(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+
+ // Try to infer better alignment information than the load already has.
+ if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
+ if (unsigned Align = InferAlignment(Ptr, DAG)) {
+ if (Align > LD->getAlignment())
+ return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
+ LD->getValueType(0),
+ Chain, Ptr, LD->getSrcValue(),
+ LD->getSrcValueOffset(), LD->getMemoryVT(),
+ LD->isVolatile(), Align);
+ }
+ }
+
+ // If load is not volatile and there are no uses of the loaded value (and
+ // the updated indexed value in case of indexed loads), change uses of the
+ // chain value into uses of the chain input (i.e. delete the dead load).
+ if (!LD->isVolatile()) {
+ if (N->getValueType(1) == MVT::Other) {
+ // Unindexed loads.
+ if (N->hasNUsesOfValue(0, 0)) {
+ // It's not safe to use the two value CombineTo variant here. e.g.
+ // v1, chain2 = load chain1, loc
+ // v2, chain3 = load chain2, loc
+ // v3 = add v2, c
+ // Now we replace use of chain2 with chain1. This makes the second load
+ // isomorphic to the one we are deleting, and thus makes this load live.
+ DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG));
+ DOUT << "\nWith chain: "; DEBUG(Chain.getNode()->dump(&DAG));
+ DOUT << "\n";
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes);
+
+ if (N->use_empty()) {
+ removeFromWorkList(N);
+ DAG.DeleteNode(N);
+ }
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ } else {
+ // Indexed loads.
+ assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
+ if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
+ SDValue Undef = DAG.getUNDEF(N->getValueType(0));
+ DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG));
+ DOUT << "\nWith: "; DEBUG(Undef.getNode()->dump(&DAG));
+ DOUT << " and 2 other values\n";
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
+ DAG.getUNDEF(N->getValueType(1)),
+ &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain, &DeadNodes);
+ removeFromWorkList(N);
+ DAG.DeleteNode(N);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ // If this load is directly stored, replace the load value with the stored
+ // value.
+ // TODO: Handle store large -> read small portion.
+ // TODO: Handle TRUNCSTORE/LOADEXT
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD &&
+ !LD->isVolatile()) {
+ if (ISD::isNON_TRUNCStore(Chain.getNode())) {
+ StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
+ if (PrevST->getBasePtr() == Ptr &&
+ PrevST->getValue().getValueType() == N->getValueType(0))
+ return CombineTo(N, Chain.getOperand(1), Chain);
+ }
+ }
+
+ if (CombinerAA) {
+ // Walk up chain skipping non-aliasing memory nodes.
+ SDValue BetterChain = FindBetterChain(N, Chain);
+
+ // If there is a better chain.
+ if (Chain != BetterChain) {
+ SDValue ReplLoad;
+
+ // Replace the chain to void dependency.
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
+ ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
+ BetterChain, Ptr,
+ LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->isVolatile(), LD->getAlignment());
+ } else {
+ ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
+ LD->getValueType(0),
+ BetterChain, Ptr, LD->getSrcValue(),
+ LD->getSrcValueOffset(),
+ LD->getMemoryVT(),
+ LD->isVolatile(),
+ LD->getAlignment());
+ }
+
+ // Create token factor to keep old chain connected.
+ SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+ MVT::Other, Chain, ReplLoad.getValue(1));
+
+ // Replace uses with load result and token factor. Don't add users
+ // to work list.
+ return CombineTo(N, ReplLoad.getValue(0), Token, false);
+ }
+ }
+
+ // Try transforming N to an indexed load.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+
+/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
+/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
+/// of the loaded bits, try narrowing the load and store if it would end up
+/// being a win for performance or code size.
+SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ if (ST->isVolatile())
+ return SDValue();
+
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+ MVT VT = Value.getValueType();
+
+ if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
+ return SDValue();
+
+ unsigned Opc = Value.getOpcode();
+ if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
+ Value.getOperand(1).getOpcode() != ISD::Constant)
+ return SDValue();
+
+ SDValue N0 = Value.getOperand(0);
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
+ LoadSDNode *LD = cast<LoadSDNode>(N0);
+ if (LD->getBasePtr() != Ptr)
+ return SDValue();
+
+ // Find the type to narrow it the load / op / store to.
+ SDValue N1 = Value.getOperand(1);
+ unsigned BitWidth = N1.getValueSizeInBits();
+ APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
+ if (Opc == ISD::AND)
+ Imm ^= APInt::getAllOnesValue(BitWidth);
+ if (Imm == 0 || Imm.isAllOnesValue())
+ return SDValue();
+ unsigned ShAmt = Imm.countTrailingZeros();
+ unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
+ unsigned NewBW = NextPowerOf2(MSB - ShAmt);
+ MVT NewVT = MVT::getIntegerVT(NewBW);
+ while (NewBW < BitWidth &&
+ !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
+ TLI.isNarrowingProfitable(VT, NewVT))) {
+ NewBW = NextPowerOf2(NewBW);
+ NewVT = MVT::getIntegerVT(NewBW);
+ }
+ if (NewBW >= BitWidth)
+ return SDValue();
+
+ // If the lsb changed does not start at the type bitwidth boundary,
+ // start at the previous one.
+ if (ShAmt % NewBW)
+ ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
+ APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW);
+ if ((Imm & Mask) == Imm) {
+ APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
+ if (Opc == ISD::AND)
+ NewImm ^= APInt::getAllOnesValue(NewBW);
+ uint64_t PtrOff = ShAmt / 8;
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (TLI.isBigEndian())
+ PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
+
+ unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
+ if (NewAlign <
+ TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForMVT()))
+ return SDValue();
+
+ SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
+ Ptr.getValueType(), Ptr,
+ DAG.getConstant(PtrOff, Ptr.getValueType()));
+ SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
+ LD->getChain(), NewPtr,
+ LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->isVolatile(), NewAlign);
+ SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
+ DAG.getConstant(NewImm, NewVT));
+ SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
+ NewVal, NewPtr,
+ ST->getSrcValue(), ST->getSrcValueOffset(),
+ false, NewAlign);
+
+ AddToWorkList(NewPtr.getNode());
+ AddToWorkList(NewLD.getNode());
+ AddToWorkList(NewVal.getNode());
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1),
+ &DeadNodes);
+ ++OpsNarrowed;
+ return NewST;
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSTORE(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+
+ // Try to infer better alignment information than the store already has.
+ if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
+ if (unsigned Align = InferAlignment(Ptr, DAG)) {
+ if (Align > ST->getAlignment())
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
+ Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(), ST->getMemoryVT(),
+ ST->isVolatile(), Align);
+ }
+ }
+
+ // If this is a store of a bit convert, store the input value if the
+ // resultant store does not need a higher alignment than the original.
+ if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
+ ST->isUnindexed()) {
+ unsigned OrigAlign = ST->getAlignment();
+ MVT SVT = Value.getOperand(0).getValueType();
+ unsigned Align = TLI.getTargetData()->
+ getABITypeAlignment(SVT.getTypeForMVT());
+ if (Align <= OrigAlign &&
+ ((!LegalOperations && !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
+ return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),
+ Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(), ST->isVolatile(), OrigAlign);
+ }
+
+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
+ // NOTE: If the original store is volatile, this transform must not increase
+ // the number of stores. For example, on x86-32 an f64 can be stored in one
+ // processor operation but an i64 (which is not legal) requires two. So the
+ // transform should not be done in this case.
+ if (Value.getOpcode() != ISD::TargetConstantFP) {
+ SDValue Tmp;
+ switch (CFP->getValueType(0).getSimpleVT()) {
+ default: assert(0 && "Unknown FP type");
+ case MVT::f80: // We don't do this for these yet.
+ case MVT::f128:
+ case MVT::ppcf128:
+ break;
+ case MVT::f32:
+ if (((TLI.isTypeLegal(MVT::i32) || !LegalTypes) && !LegalOperations &&
+ !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
+ bitcastToAPInt().getZExtValue(), MVT::i32);
+ return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
+ Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(), ST->isVolatile(),
+ ST->getAlignment());
+ }
+ break;
+ case MVT::f64:
+ if (((TLI.isTypeLegal(MVT::i64) || !LegalTypes) && !LegalOperations &&
+ !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
+ Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ getZExtValue(), MVT::i64);
+ return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
+ Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(), ST->isVolatile(),
+ ST->getAlignment());
+ } else if (!ST->isVolatile() &&
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ // Many FP stores are not made apparent until after legalize, e.g. for
+ // argument passing. Since this is so common, custom legalize the
+ // 64-bit integer store into two 32-bit stores.
+ uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);
+ SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
+ if (TLI.isBigEndian()) std::swap(Lo, Hi);
+
+ int SVOffset = ST->getSrcValueOffset();
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+
+ SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,
+ Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(),
+ isVolatile, ST->getAlignment());
+ Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,
+ DAG.getConstant(4, Ptr.getValueType()));
+ SVOffset += 4;
+ Alignment = MinAlign(Alignment, 4U);
+ SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,
+ Ptr, ST->getSrcValue(),
+ SVOffset, isVolatile, Alignment);
+ return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
+ St0, St1);
+ }
+
+ break;
+ }
+ }
+ }
+
+ if (CombinerAA) {
+ // Walk up chain skipping non-aliasing memory nodes.
+ SDValue BetterChain = FindBetterChain(N, Chain);
+
+ // If there is a better chain.
+ if (Chain != BetterChain) {
+ // Replace the chain to avoid dependency.
+ SDValue ReplStore;
+ if (ST->isTruncatingStore()) {
+ ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
+ ST->getSrcValue(),ST->getSrcValueOffset(),
+ ST->getMemoryVT(),
+ ST->isVolatile(), ST->getAlignment());
+ } else {
+ ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,
+ ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->isVolatile(), ST->getAlignment());
+ }
+
+ // Create token to keep both nodes around.
+ SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+ MVT::Other, Chain, ReplStore);
+
+ // Don't add users to work list.
+ return CombineTo(N, Token, false);
+ }
+ }
+
+ // Try transforming N to an indexed store.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
+ // FIXME: is there such a thing as a truncating indexed store?
+ if (ST->isTruncatingStore() && ST->isUnindexed() &&
+ Value.getValueType().isInteger()) {
+ // See if we can simplify the input to this truncstore with knowledge that
+ // only the low bits are being used. For example:
+ // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
+ SDValue Shorter =
+ GetDemandedBits(Value,
+ APInt::getLowBitsSet(Value.getValueSizeInBits(),
+ ST->getMemoryVT().getSizeInBits()));
+ AddToWorkList(Value.getNode());
+ if (Shorter.getNode())
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
+ Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(), ST->getMemoryVT(),
+ ST->isVolatile(), ST->getAlignment());
+
+ // Otherwise, see if we can simplify the operation with
+ // SimplifyDemandedBits, which only works if the value has a single use.
+ if (SimplifyDemandedBits(Value,
+ APInt::getLowBitsSet(
+ Value.getValueSizeInBits(),
+ ST->getMemoryVT().getSizeInBits())))
+ return SDValue(N, 0);
+ }
+
+ // If this is a load followed by a store to the same location, then the store
+ // is dead/noop.
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
+ if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
+ ST->isUnindexed() && !ST->isVolatile() &&
+ // There can't be any side effects between the load and store, such as
+ // a call or store.
+ Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
+ // The store is dead, remove it.
+ return Chain;
+ }
+ }
+
+ // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
+ // truncating store. We can do this even if this is already a truncstore.
+ if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
+ && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
+ TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
+ ST->getMemoryVT())) {
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),
+ Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(), ST->getMemoryVT(),
+ ST->isVolatile(), ST->getAlignment());
+ }
+
+ return ReduceLoadOpStoreWidth(N);
+}
+
+SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
+ SDValue InVec = N->getOperand(0);
+ SDValue InVal = N->getOperand(1);
+ SDValue EltNo = N->getOperand(2);
+
+ // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new
+ // vector with the inserted element.
+ if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) {
+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ SmallVector<SDValue, 8> Ops(InVec.getNode()->op_begin(),
+ InVec.getNode()->op_end());
+ if (Elt < Ops.size())
+ Ops[Elt] = InVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ InVec.getValueType(), &Ops[0], Ops.size());
+ }
+ // If the invec is an UNDEF and if EltNo is a constant, create a new
+ // BUILD_VECTOR with undef elements and the inserted element.
+ if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF &&
+ isa<ConstantSDNode>(EltNo)) {
+ MVT VT = InVec.getValueType();
+ MVT EVT = VT.getVectorElementType();
+ unsigned NElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EVT));
+
+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ if (Elt < Ops.size())
+ Ops[Elt] = InVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ InVec.getValueType(), &Ops[0], Ops.size());
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
+ // (vextract (scalar_to_vector val, 0) -> val
+ SDValue InVec = N->getOperand(0);
+
+ if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ // If the operand is wider than the vector element type then it is implicitly
+ // truncated. Make that explicit here.
+ MVT EltVT = InVec.getValueType().getVectorElementType();
+ SDValue InOp = InVec.getOperand(0);
+ if (InOp.getValueType() != EltVT)
+ return DAG.getNode(ISD::TRUNCATE, InVec.getDebugLoc(), EltVT, InOp);
+ return InOp;
+ }
+
+ // Perform only after legalization to ensure build_vector / vector_shuffle
+ // optimizations have already been done.
+ if (!LegalOperations) return SDValue();
+
+ // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
+ // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
+ // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
+ SDValue EltNo = N->getOperand(1);
+
+ if (isa<ConstantSDNode>(EltNo)) {
+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ bool NewLoad = false;
+ bool BCNumEltsChanged = false;
+ MVT VT = InVec.getValueType();
+ MVT EVT = VT.getVectorElementType();
+ MVT LVT = EVT;
+
+ if (InVec.getOpcode() == ISD::BIT_CONVERT) {
+ MVT BCVT = InVec.getOperand(0).getValueType();
+ if (!BCVT.isVector() || EVT.bitsGT(BCVT.getVectorElementType()))
+ return SDValue();
+ if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
+ BCNumEltsChanged = true;
+ InVec = InVec.getOperand(0);
+ EVT = BCVT.getVectorElementType();
+ NewLoad = true;
+ }
+
+ LoadSDNode *LN0 = NULL;
+ const ShuffleVectorSDNode *SVN = NULL;
+ if (ISD::isNormalLoad(InVec.getNode())) {
+ LN0 = cast<LoadSDNode>(InVec);
+ } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ InVec.getOperand(0).getValueType() == EVT &&
+ ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
+ LN0 = cast<LoadSDNode>(InVec.getOperand(0));
+ } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
+ // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
+ // =>
+ // (load $addr+1*size)
+
+ // If the bit convert changed the number of elements, it is unsafe
+ // to examine the mask.
+ if (BCNumEltsChanged)
+ return SDValue();
+
+ // Select the input vector, guarding against out of range extract vector.
+ unsigned NumElems = VT.getVectorNumElements();
+ int Idx = (Elt > NumElems) ? -1 : SVN->getMaskElt(Elt);
+ InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
+
+ if (InVec.getOpcode() == ISD::BIT_CONVERT)
+ InVec = InVec.getOperand(0);
+ if (ISD::isNormalLoad(InVec.getNode())) {
+ LN0 = cast<LoadSDNode>(InVec);
+ Elt = (Idx < (int)NumElems) ? Idx : Idx - NumElems;
+ }
+ }
+
+ if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile())
+ return SDValue();
+
+ unsigned Align = LN0->getAlignment();
+ if (NewLoad) {
+ // Check the resultant load doesn't need a higher alignment than the
+ // original load.
+ unsigned NewAlign =
+ TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForMVT());
+
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
+ return SDValue();
+
+ Align = NewAlign;
+ }
+
+ SDValue NewPtr = LN0->getBasePtr();
+ if (Elt) {
+ unsigned PtrOff = LVT.getSizeInBits() * Elt / 8;
+ MVT PtrType = NewPtr.getValueType();
+ if (TLI.isBigEndian())
+ PtrOff = VT.getSizeInBits() / 8 - PtrOff;
+ NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr,
+ DAG.getConstant(PtrOff, PtrType));
+ }
+
+ return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
+ LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->isVolatile(), Align);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+ unsigned NumInScalars = N->getNumOperands();
+ MVT VT = N->getValueType(0);
+ MVT EltType = VT.getVectorElementType();
+
+ // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
+ // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
+ // at most two distinct vectors, turn this into a shuffle node.
+ SDValue VecIn1, VecIn2;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ // Ignore undef inputs.
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+ // If this input is something other than a EXTRACT_VECTOR_ELT with a
+ // constant index, bail out.
+ if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
+ VecIn1 = VecIn2 = SDValue(0, 0);
+ break;
+ }
+
+ // If the input vector type disagrees with the result of the build_vector,
+ // we can't make a shuffle.
+ SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
+ if (ExtractedFromVec.getValueType() != VT) {
+ VecIn1 = VecIn2 = SDValue(0, 0);
+ break;
+ }
+
+ // Otherwise, remember this. We allow up to two distinct input vectors.
+ if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
+ continue;
+
+ if (VecIn1.getNode() == 0) {
+ VecIn1 = ExtractedFromVec;
+ } else if (VecIn2.getNode() == 0) {
+ VecIn2 = ExtractedFromVec;
+ } else {
+ // Too many inputs.
+ VecIn1 = VecIn2 = SDValue(0, 0);
+ break;
+ }
+ }
+
+ // If everything is good, we can make a shuffle operation.
+ if (VecIn1.getNode()) {
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
+ Mask.push_back(-1);
+ continue;
+ }
+
+ // If extracting from the first vector, just use the index directly.
+ SDValue Extract = N->getOperand(i);
+ SDValue ExtVal = Extract.getOperand(1);
+ if (Extract.getOperand(0) == VecIn1) {
+ unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
+ if (ExtIndex > VT.getVectorNumElements())
+ return SDValue();
+
+ Mask.push_back(ExtIndex);
+ continue;
+ }
+
+ // Otherwise, use InIdx + VecSize
+ unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
+ Mask.push_back(Idx+NumInScalars);
+ }
+
+ // Add count and size info.
+ if (!TLI.isTypeLegal(VT) && LegalTypes)
+ return SDValue();
+
+ // Return the new VECTOR_SHUFFLE node.
+ SDValue Ops[2];
+ Ops[0] = VecIn1;
+ Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
+ // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
+ // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector
+ // inputs come from at most two distinct vectors, turn this into a shuffle
+ // node.
+
+ // If we only have one input vector, we don't need to do any concatenation.
+ if (N->getNumOperands() == 1)
+ return N->getOperand(0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
+ return SDValue();
+
+ MVT VT = N->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ assert(N0.getValueType().getVectorNumElements() == NumElts &&
+ "Vector shuffle must be normalized in DAG");
+
+ // FIXME: implement canonicalizations from DAG.getVectorShuffle()
+
+ // If it is a splat, check if the argument vector is a build_vector with
+ // all scalar elements the same.
+ if (cast<ShuffleVectorSDNode>(N)->isSplat()) {
+ SDNode *V = N0.getNode();
+
+
+ // If this is a bit convert that changes the element type of the vector but
+ // not the number of vector elements, look through it. Be careful not to
+ // look though conversions that change things like v4f32 to v2f64.
+ if (V->getOpcode() == ISD::BIT_CONVERT) {
+ SDValue ConvInput = V->getOperand(0);
+ if (ConvInput.getValueType().isVector() &&
+ ConvInput.getValueType().getVectorNumElements() == NumElts)
+ V = ConvInput.getNode();
+ }
+
+ if (V->getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned NumElems = V->getNumOperands();
+ unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex();
+ if (NumElems > BaseIdx) {
+ SDValue Base;
+ bool AllSame = true;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
+ Base = V->getOperand(i);
+ break;
+ }
+ }
+ // Splat of <u, u, u, u>, return <u, u, u, u>
+ if (!Base.getNode())
+ return N0;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (V->getOperand(i) != Base) {
+ AllSame = false;
+ break;
+ }
+ }
+ // Splat of <x, x, x, x>, return <x, x, x, x>
+ if (AllSame)
+ return N0;
+ }
+ }
+ }
+ return SDValue();
+}
+
+/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
+/// an AND to a vector_shuffle with the destination vector and a zero vector.
+/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
+/// vector_shuffle V, Zero, <0, 4, 2, 4>
+SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
+ MVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ if (N->getOpcode() == ISD::AND) {
+ if (RHS.getOpcode() == ISD::BIT_CONVERT)
+ RHS = RHS.getOperand(0);
+ if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<int, 8> Indices;
+ unsigned NumElts = RHS.getNumOperands();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue Elt = RHS.getOperand(i);
+ if (!isa<ConstantSDNode>(Elt))
+ return SDValue();
+ else if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
+ Indices.push_back(i);
+ else if (cast<ConstantSDNode>(Elt)->isNullValue())
+ Indices.push_back(NumElts);
+ else
+ return SDValue();
+ }
+
+ // Let's see if the target supports this vector_shuffle.
+ MVT RVT = RHS.getValueType();
+ if (!TLI.isVectorClearMaskLegal(Indices, RVT))
+ return SDValue();
+
+ // Return the new VECTOR_SHUFFLE node.
+ MVT EVT = RVT.getVectorElementType();
+ SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
+ DAG.getConstant(0, EVT));
+ SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ RVT, &ZeroOps[0], ZeroOps.size());
+ LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS);
+ SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf);
+ }
+ }
+
+ return SDValue();
+}
+
+/// SimplifyVBinOp - Visit a binary vector operation, like ADD.
+SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
+ // After legalize, the target may be depending on adds and other
+ // binary ops to provide legal ways to construct constants or other
+ // things. Simplifying them may result in a loss of legality.
+ if (LegalOperations) return SDValue();
+
+ MVT VT = N->getValueType(0);
+ assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
+
+ MVT EltType = VT.getVectorElementType();
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Shuffle = XformToShuffleWithZero(N);
+ if (Shuffle.getNode()) return Shuffle;
+
+ // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
+ // this operation.
+ if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
+ RHS.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
+ SDValue LHSOp = LHS.getOperand(i);
+ SDValue RHSOp = RHS.getOperand(i);
+ // If these two elements can't be folded, bail out.
+ if ((LHSOp.getOpcode() != ISD::UNDEF &&
+ LHSOp.getOpcode() != ISD::Constant &&
+ LHSOp.getOpcode() != ISD::ConstantFP) ||
+ (RHSOp.getOpcode() != ISD::UNDEF &&
+ RHSOp.getOpcode() != ISD::Constant &&
+ RHSOp.getOpcode() != ISD::ConstantFP))
+ break;
+
+ // Can't fold divide by zero.
+ if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
+ N->getOpcode() == ISD::FDIV) {
+ if ((RHSOp.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) ||
+ (RHSOp.getOpcode() == ISD::ConstantFP &&
+ cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero()))
+ break;
+ }
+
+ Ops.push_back(DAG.getNode(N->getOpcode(), LHS.getDebugLoc(),
+ EltType, LHSOp, RHSOp));
+ AddToWorkList(Ops.back().getNode());
+ assert((Ops.back().getOpcode() == ISD::UNDEF ||
+ Ops.back().getOpcode() == ISD::Constant ||
+ Ops.back().getOpcode() == ISD::ConstantFP) &&
+ "Scalar binop didn't fold!");
+ }
+
+ if (Ops.size() == LHS.getNumOperands()) {
+ MVT VT = LHS.getValueType();
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0,
+ SDValue N1, SDValue N2){
+ assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
+
+ SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+
+ // If we got a simplified select_cc node back from SimplifySelectCC, then
+ // break it down into a new SETCC node, and a new SELECT node, and then return
+ // the SELECT node, since we were called with a SELECT node.
+ if (SCC.getNode()) {
+ // Check to see if we got a select_cc back (to turn into setcc/select).
+ // Otherwise, just return whatever node we got back, like fabs.
+ if (SCC.getOpcode() == ISD::SELECT_CC) {
+ SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(),
+ N0.getValueType(),
+ SCC.getOperand(0), SCC.getOperand(1),
+ SCC.getOperand(4));
+ AddToWorkList(SETCC.getNode());
+ return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(),
+ SCC.getOperand(2), SCC.getOperand(3), SETCC);
+ }
+
+ return SCC;
+ }
+ return SDValue();
+}
+
+/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS
+/// are the two values being selected between, see if we can simplify the
+/// select. Callers of this should assume that TheSelect is deleted if this
+/// returns true. As such, they should return the appropriate thing (e.g. the
+/// node) back to the top-level of the DAG combiner loop to avoid it being
+/// looked at.
+bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
+ SDValue RHS) {
+
+ // If this is a select from two identical things, try to pull the operation
+ // through the select.
+ if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){
+ // If this is a load and the token chain is identical, replace the select
+ // of two loads with a load through a select of the address to load from.
+ // This triggers in things like "select bool X, 10.0, 123.0" after the FP
+ // constants have been dropped into the constant pool.
+ if (LHS.getOpcode() == ISD::LOAD &&
+ // Do not let this transformation reduce the number of volatile loads.
+ !cast<LoadSDNode>(LHS)->isVolatile() &&
+ !cast<LoadSDNode>(RHS)->isVolatile() &&
+ // Token chains must be identical.
+ LHS.getOperand(0) == RHS.getOperand(0)) {
+ LoadSDNode *LLD = cast<LoadSDNode>(LHS);
+ LoadSDNode *RLD = cast<LoadSDNode>(RHS);
+
+ // If this is an EXTLOAD, the VT's must match.
+ if (LLD->getMemoryVT() == RLD->getMemoryVT()) {
+ // FIXME: this conflates two src values, discarding one. This is not
+ // the right thing to do, but nothing uses srcvalues now. When they do,
+ // turn SrcValue into a list of locations.
+ SDValue Addr;
+ if (TheSelect->getOpcode() == ISD::SELECT) {
+ // Check that the condition doesn't reach either load. If so, folding
+ // this will induce a cycle into the DAG.
+ if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
+ !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) {
+ Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0), LLD->getBasePtr(),
+ RLD->getBasePtr());
+ }
+ } else {
+ // Check that the condition doesn't reach either load. If so, folding
+ // this will induce a cycle into the DAG.
+ if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
+ !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
+ !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()) &&
+ !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())) {
+ Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0),
+ TheSelect->getOperand(1),
+ LLD->getBasePtr(), RLD->getBasePtr(),
+ TheSelect->getOperand(4));
+ }
+ }
+
+ if (Addr.getNode()) {
+ SDValue Load;
+ if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
+ Load = DAG.getLoad(TheSelect->getValueType(0),
+ TheSelect->getDebugLoc(),
+ LLD->getChain(),
+ Addr,LLD->getSrcValue(),
+ LLD->getSrcValueOffset(),
+ LLD->isVolatile(),
+ LLD->getAlignment());
+ } else {
+ Load = DAG.getExtLoad(LLD->getExtensionType(),
+ TheSelect->getDebugLoc(),
+ TheSelect->getValueType(0),
+ LLD->getChain(), Addr, LLD->getSrcValue(),
+ LLD->getSrcValueOffset(),
+ LLD->getMemoryVT(),
+ LLD->isVolatile(),
+ LLD->getAlignment());
+ }
+
+ // Users of the select now use the result of the load.
+ CombineTo(TheSelect, Load);
+
+ // Users of the old loads now use the new load's chain. We know the
+ // old-load value is dead now.
+ CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
+ CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3
+/// where 'cond' is the comparison specified by CC.
+SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
+ SDValue N2, SDValue N3,
+ ISD::CondCode CC, bool NotExtCompare) {
+ // (x ? y : y) -> y.
+ if (N2 == N3) return N2;
+
+ MVT VT = N2.getValueType();
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
+
+ // Determine if the condition we're dealing with is constant
+ SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC, DL, false);
+ if (SCC.getNode()) AddToWorkList(SCC.getNode());
+ ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode());
+
+ // fold select_cc true, x, y -> x
+ if (SCCC && !SCCC->isNullValue())
+ return N2;
+ // fold select_cc false, x, y -> y
+ if (SCCC && SCCC->isNullValue())
+ return N3;
+
+ // Check to see if we can simplify the select into an fabs node
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
+ // Allow either -0.0 or 0.0
+ if (CFP->getValueAPF().isZero()) {
+ // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
+ if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
+ N0 == N2 && N3.getOpcode() == ISD::FNEG &&
+ N2 == N3.getOperand(0))
+ return DAG.getNode(ISD::FABS, DL, VT, N0);
+
+ // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
+ if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
+ N0 == N3 && N2.getOpcode() == ISD::FNEG &&
+ N2.getOperand(0) == N3)
+ return DAG.getNode(ISD::FABS, DL, VT, N3);
+ }
+ }
+
+ // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
+ // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
+ // in it. This is a win when the constant is not otherwise available because
+ // it replaces two constant pool loads with one. We only do this if the FP
+ // type is known to be legal, because if it isn't, then we are before legalize
+ // types an we want the other legalization to happen first (e.g. to avoid
+ // messing with soft float) and if the ConstantFP is not legal, because if
+ // it is legal, we may not need to store the FP constant in a constant pool.
+ if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
+ if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
+ if (TLI.isTypeLegal(N2.getValueType()) &&
+ (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
+ TargetLowering::Legal) &&
+ // If both constants have multiple uses, then we won't need to do an
+ // extra load, they are likely around in registers for other users.
+ (TV->hasOneUse() || FV->hasOneUse())) {
+ Constant *Elts[] = {
+ const_cast<ConstantFP*>(FV->getConstantFPValue()),
+ const_cast<ConstantFP*>(TV->getConstantFPValue())
+ };
+ const Type *FPTy = Elts[0]->getType();
+ const TargetData &TD = *TLI.getTargetData();
+
+ // Create a ConstantArray of the two constants.
+ Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2);
+ SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
+ TD.getPrefTypeAlignment(FPTy));
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+
+ // Get the offsets to the 0 and 1 element of the array so that we can
+ // select between them.
+ SDValue Zero = DAG.getIntPtrConstant(0);
+ unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
+ SDValue One = DAG.getIntPtrConstant(EltSize);
+
+ SDValue Cond = DAG.getSetCC(DL,
+ TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC);
+ SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(),
+ Cond, One, Zero);
+ CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
+ CstOffset);
+ return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0, false,
+ Alignment);
+
+ }
+ }
+
+ // Check to see if we can perform the "gzip trick", transforming
+ // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
+ if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
+ N0.getValueType().isInteger() &&
+ N2.getValueType().isInteger() &&
+ (N1C->isNullValue() || // (a < 0) ? b : 0
+ (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0
+ MVT XType = N0.getValueType();
+ MVT AType = N2.getValueType();
+ if (XType.bitsGE(AType)) {
+ // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
+ // single-bit constant.
+ if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) {
+ unsigned ShCtV = N2C->getAPIntValue().logBase2();
+ ShCtV = XType.getSizeInBits()-ShCtV-1;
+ SDValue ShCt = DAG.getConstant(ShCtV, getShiftAmountTy());
+ SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(),
+ XType, N0, ShCt);
+ AddToWorkList(Shift.getNode());
+
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorkList(Shift.getNode());
+ }
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ }
+
+ SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(),
+ XType, N0,
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy()));
+ AddToWorkList(Shift.getNode());
+
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorkList(Shift.getNode());
+ }
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ }
+ }
+
+ // fold select C, 16, 0 -> shl C, 4
+ if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
+ TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) {
+
+ // If the caller doesn't want us to simplify this into a zext of a compare,
+ // don't do it.
+ if (NotExtCompare && N2C->getAPIntValue() == 1)
+ return SDValue();
+
+ // Get a SetCC of the condition
+ // FIXME: Should probably make sure that setcc is legal if we ever have a
+ // target where it isn't.
+ SDValue Temp, SCC;
+ // cast from setcc result type to select result type
+ if (LegalTypes) {
+ SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC);
+ if (N2.getValueType().bitsLT(SCC.getValueType()))
+ Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType());
+ else
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
+ N2.getValueType(), SCC);
+ } else {
+ SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC);
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
+ N2.getValueType(), SCC);
+ }
+
+ AddToWorkList(SCC.getNode());
+ AddToWorkList(Temp.getNode());
+
+ if (N2C->getAPIntValue() == 1)
+ return Temp;
+
+ // shl setcc result by log2 n2c
+ return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
+ DAG.getConstant(N2C->getAPIntValue().logBase2(),
+ getShiftAmountTy()));
+ }
+
+ // Check to see if this is the equivalent of setcc
+ // FIXME: Turn all of these into setcc if setcc if setcc is legal
+ // otherwise, go ahead with the folds.
+ if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {
+ MVT XType = N0.getValueType();
+ if (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) {
+ SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC);
+ if (Res.getValueType() != VT)
+ Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
+ return Res;
+ }
+
+ // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
+ if (N1C && N1C->isNullValue() && CC == ISD::SETEQ &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(ISD::CTLZ, XType))) {
+ SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0);
+ return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
+ DAG.getConstant(Log2_32(XType.getSizeInBits()),
+ getShiftAmountTy()));
+ }
+ // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
+ if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {
+ SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(),
+ XType, DAG.getConstant(0, XType), N0);
+ SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType);
+ return DAG.getNode(ISD::SRL, DL, XType,
+ DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy()));
+ }
+ // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
+ if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
+ SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0,
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy()));
+ return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType));
+ }
+ }
+
+ // Check to see if this is an integer abs. select_cc setl[te] X, 0, -X, X ->
+ // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+ if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) &&
+ N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) &&
+ N2.getOperand(0) == N1 && N0.getValueType().isInteger()) {
+ MVT XType = N0.getValueType();
+ SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0,
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy()));
+ SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), XType,
+ N0, Shift);
+ AddToWorkList(Shift.getNode());
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
+ }
+ // Check to see if this is an integer abs. select_cc setgt X, -1, X, -X ->
+ // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+ if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT &&
+ N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) {
+ if (ConstantSDNode *SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0))) {
+ MVT XType = N0.getValueType();
+ if (SubC->isNullValue() && XType.isInteger()) {
+ SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType,
+ N0,
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy()));
+ SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(),
+ XType, N0, Shift);
+ AddToWorkList(Shift.getNode());
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
+SDValue DAGCombiner::SimplifySetCC(MVT VT, SDValue N0,
+ SDValue N1, ISD::CondCode Cond,
+ DebugLoc DL, bool foldBooleans) {
+ TargetLowering::DAGCombinerInfo
+ DagCombineInfo(DAG, Level == Unrestricted, false, this);
+ return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue DAGCombiner::BuildSDIV(SDNode *N) {
+ std::vector<SDNode*> Built;
+ SDValue S = TLI.BuildSDIV(N, DAG, &Built);
+
+ for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+ ii != ee; ++ii)
+ AddToWorkList(*ii);
+ return S;
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue DAGCombiner::BuildUDIV(SDNode *N) {
+ std::vector<SDNode*> Built;
+ SDValue S = TLI.BuildUDIV(N, DAG, &Built);
+
+ for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+ ii != ee; ++ii)
+ AddToWorkList(*ii);
+ return S;
+}
+
+/// FindBaseOffset - Return true if base is known not to alias with anything
+/// but itself. Provides base object and offset as results.
+static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset) {
+ // Assume it is a primitive operation.
+ Base = Ptr; Offset = 0;
+
+ // If it's an adding a simple constant then integrate the offset.
+ if (Base.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
+ Base = Base.getOperand(0);
+ Offset += C->getZExtValue();
+ }
+ }
+
+ // If it's any of the following then it can't alias with anything but itself.
+ return isa<FrameIndexSDNode>(Base) ||
+ isa<ConstantPoolSDNode>(Base) ||
+ isa<GlobalAddressSDNode>(Base);
+}
+
+/// isAlias - Return true if there is any possibility that the two addresses
+/// overlap.
+bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
+ const Value *SrcValue1, int SrcValueOffset1,
+ SDValue Ptr2, int64_t Size2,
+ const Value *SrcValue2, int SrcValueOffset2) const {
+ // If they are the same then they must be aliases.
+ if (Ptr1 == Ptr2) return true;
+
+ // Gather base node and offset information.
+ SDValue Base1, Base2;
+ int64_t Offset1, Offset2;
+ bool KnownBase1 = FindBaseOffset(Ptr1, Base1, Offset1);
+ bool KnownBase2 = FindBaseOffset(Ptr2, Base2, Offset2);
+
+ // If they have a same base address then...
+ if (Base1 == Base2)
+ // Check to see if the addresses overlap.
+ return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+
+ // If we know both bases then they can't alias.
+ if (KnownBase1 && KnownBase2) return false;
+
+ if (CombinerGlobalAA) {
+ // Use alias analysis information.
+ int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
+ int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
+ int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
+ AliasAnalysis::AliasResult AAResult =
+ AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2);
+ if (AAResult == AliasAnalysis::NoAlias)
+ return false;
+ }
+
+ // Otherwise we have to assume they alias.
+ return true;
+}
+
+/// FindAliasInfo - Extracts the relevant alias information from the memory
+/// node. Returns true if the operand was a load.
+bool DAGCombiner::FindAliasInfo(SDNode *N,
+ SDValue &Ptr, int64_t &Size,
+ const Value *&SrcValue, int &SrcValueOffset) const {
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ Size = LD->getMemoryVT().getSizeInBits() >> 3;
+ SrcValue = LD->getSrcValue();
+ SrcValueOffset = LD->getSrcValueOffset();
+ return true;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ Ptr = ST->getBasePtr();
+ Size = ST->getMemoryVT().getSizeInBits() >> 3;
+ SrcValue = ST->getSrcValue();
+ SrcValueOffset = ST->getSrcValueOffset();
+ } else {
+ assert(0 && "FindAliasInfo expected a memory operand");
+ }
+
+ return false;
+}
+
+/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+/// looking for aliasing nodes and adding them to the Aliases vector.
+void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
+ SmallVector<SDValue, 8> &Aliases) {
+ SmallVector<SDValue, 8> Chains; // List of chains to visit.
+ std::set<SDNode *> Visited; // Visited node set.
+
+ // Get alias information for node.
+ SDValue Ptr;
+ int64_t Size = 0;
+ const Value *SrcValue = 0;
+ int SrcValueOffset = 0;
+ bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset);
+
+ // Starting off.
+ Chains.push_back(OriginalChain);
+
+ // Look at each chain and determine if it is an alias. If so, add it to the
+ // aliases list. If not, then continue up the chain looking for the next
+ // candidate.
+ while (!Chains.empty()) {
+ SDValue Chain = Chains.back();
+ Chains.pop_back();
+
+ // Don't bother if we've been before.
+ if (Visited.find(Chain.getNode()) != Visited.end()) continue;
+ Visited.insert(Chain.getNode());
+
+ switch (Chain.getOpcode()) {
+ case ISD::EntryToken:
+ // Entry token is ideal chain operand, but handled in FindBetterChain.
+ break;
+
+ case ISD::LOAD:
+ case ISD::STORE: {
+ // Get alias information for Chain.
+ SDValue OpPtr;
+ int64_t OpSize = 0;
+ const Value *OpSrcValue = 0;
+ int OpSrcValueOffset = 0;
+ bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
+ OpSrcValue, OpSrcValueOffset);
+
+ // If chain is alias then stop here.
+ if (!(IsLoad && IsOpLoad) &&
+ isAlias(Ptr, Size, SrcValue, SrcValueOffset,
+ OpPtr, OpSize, OpSrcValue, OpSrcValueOffset)) {
+ Aliases.push_back(Chain);
+ } else {
+ // Look further up the chain.
+ Chains.push_back(Chain.getOperand(0));
+ // Clean up old chain.
+ AddToWorkList(Chain.getNode());
+ }
+ break;
+ }
+
+ case ISD::TokenFactor:
+ // We have to check each of the operands of the token factor, so we queue
+ // then up. Adding the operands to the queue (stack) in reverse order
+ // maintains the original order and increases the likelihood that getNode
+ // will find a matching token factor (CSE.)
+ for (unsigned n = Chain.getNumOperands(); n;)
+ Chains.push_back(Chain.getOperand(--n));
+ // Eliminate the token factor if we can.
+ AddToWorkList(Chain.getNode());
+ break;
+
+ default:
+ // For all other instructions we will just have to take what we can get.
+ Aliases.push_back(Chain);
+ break;
+ }
+ }
+}
+
+/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking
+/// for a better chain (aliasing node.)
+SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
+ SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor.
+
+ // Accumulate all the aliases to this node.
+ GatherAllAliases(N, OldChain, Aliases);
+
+ if (Aliases.size() == 0) {
+ // If no operands then chain to entry token.
+ return DAG.getEntryNode();
+ } else if (Aliases.size() == 1) {
+ // If a single operand then chain to it. We don't need to revisit it.
+ return Aliases[0];
+ }
+
+ // Construct a custom tailored token factor.
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
+ &Aliases[0], Aliases.size());
+
+ // Make sure the old chain gets cleaned up.
+ if (NewChain != OldChain) AddToWorkList(OldChain.getNode());
+
+ return NewChain;
+}
+
+// SelectionDAG::Combine - This is the entry point for the file.
+//
+void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
+ CodeGenOpt::Level OptLevel) {
+ /// run - This is the main entry point to this class.
+ ///
+ DAGCombiner(*this, AA, OptLevel).Run(Level);
+}
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
new file mode 100644
index 0000000..6becff3
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -0,0 +1,1033 @@
+///===-- FastISel.cpp - Implementation of the FastISel class --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the FastISel class.
+//
+// "Fast" instruction selection is designed to emit very poor code quickly.
+// Also, it is not designed to be able to do much lowering, so most illegal
+// types (e.g. i64 on 32-bit targets) and operations are not supported. It is
+// also not intended to be able to do much optimization, except in a few cases
+// where doing optimizations reduces overall compile time. For example, folding
+// constants into immediate fields is often done, because it's cheap and it
+// reduces the number of instructions later phases have to examine.
+//
+// "Fast" instruction selection is able to fail gracefully and transfer
+// control to the SelectionDAG selector for operations that it doesn't
+// support. In many cases, this allows us to avoid duplicating a lot of
+// the complicated lowering logic that SelectionDAG currently has.
+//
+// The intended use for "fast" instruction selection is "-O0" mode
+// compilation, where the quality of the generated code is irrelevant when
+// weighed against the speed at which the code can be generated. Also,
+// at -O0, the LLVM optimizers are not running, and this makes the
+// compile time of codegen a much higher portion of the overall compile
+// time. Despite its limitations, "fast" instruction selection is able to
+// handle enough code on its own to provide noticeable overall speedups
+// in -O0 compiles.
+//
+// Basic operations are supported in a target-independent way, by reading
+// the same instruction descriptions that the SelectionDAG selector reads,
+// and identifying simple arithmetic operations that can be directly selected
+// from simple operators. More complicated operations currently require
+// target-specific code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/DebugLoc.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "SelectionDAGBuild.h"
+using namespace llvm;
+
+unsigned FastISel::getRegForValue(Value *V) {
+ MVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true);
+ // Don't handle non-simple values in FastISel.
+ if (!RealVT.isSimple())
+ return 0;
+
+ // Ignore illegal types. We must do this before looking up the value
+ // in ValueMap because Arguments are given virtual registers regardless
+ // of whether FastISel can handle them.
+ MVT::SimpleValueType VT = RealVT.getSimpleVT();
+ if (!TLI.isTypeLegal(VT)) {
+ // Promote MVT::i1 to a legal type though, because it's common and easy.
+ if (VT == MVT::i1)
+ VT = TLI.getTypeToTransformTo(VT).getSimpleVT();
+ else
+ return 0;
+ }
+
+ // Look up the value to see if we already have a register for it. We
+ // cache values defined by Instructions across blocks, and other values
+ // only locally. This is because Instructions already have the SSA
+ // def-dominatess-use requirement enforced.
+ if (ValueMap.count(V))
+ return ValueMap[V];
+ unsigned Reg = LocalValueMap[V];
+ if (Reg != 0)
+ return Reg;
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getValue().getActiveBits() <= 64)
+ Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
+ } else if (isa<AllocaInst>(V)) {
+ Reg = TargetMaterializeAlloca(cast<AllocaInst>(V));
+ } else if (isa<ConstantPointerNull>(V)) {
+ // Translate this as an integer zero so that it can be
+ // local-CSE'd with actual integer zeros.
+ Reg = getRegForValue(Constant::getNullValue(TD.getIntPtrType()));
+ } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
+
+ if (!Reg) {
+ const APFloat &Flt = CF->getValueAPF();
+ MVT IntVT = TLI.getPointerTy();
+
+ uint64_t x[2];
+ uint32_t IntBitWidth = IntVT.getSizeInBits();
+ bool isExact;
+ (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
+ APFloat::rmTowardZero, &isExact);
+ if (isExact) {
+ APInt IntVal(IntBitWidth, 2, x);
+
+ unsigned IntegerReg = getRegForValue(ConstantInt::get(IntVal));
+ if (IntegerReg != 0)
+ Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg);
+ }
+ }
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (!SelectOperator(CE, CE->getOpcode())) return 0;
+ Reg = LocalValueMap[CE];
+ } else if (isa<UndefValue>(V)) {
+ Reg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(MBB, DL, TII.get(TargetInstrInfo::IMPLICIT_DEF), Reg);
+ }
+
+ // If target-independent code couldn't handle the value, give target-specific
+ // code a try.
+ if (!Reg && isa<Constant>(V))
+ Reg = TargetMaterializeConstant(cast<Constant>(V));
+
+ // Don't cache constant materializations in the general ValueMap.
+ // To do so would require tracking what uses they dominate.
+ if (Reg != 0)
+ LocalValueMap[V] = Reg;
+ return Reg;
+}
+
+unsigned FastISel::lookUpRegForValue(Value *V) {
+ // Look up the value to see if we already have a register for it. We
+ // cache values defined by Instructions across blocks, and other values
+ // only locally. This is because Instructions already have the SSA
+ // def-dominatess-use requirement enforced.
+ if (ValueMap.count(V))
+ return ValueMap[V];
+ return LocalValueMap[V];
+}
+
+/// UpdateValueMap - Update the value map to include the new mapping for this
+/// instruction, or insert an extra copy to get the result in a previous
+/// determined register.
+/// NOTE: This is only necessary because we might select a block that uses
+/// a value before we select the block that defines the value. It might be
+/// possible to fix this by selecting blocks in reverse postorder.
+unsigned FastISel::UpdateValueMap(Value* I, unsigned Reg) {
+ if (!isa<Instruction>(I)) {
+ LocalValueMap[I] = Reg;
+ return Reg;
+ }
+
+ unsigned &AssignedReg = ValueMap[I];
+ if (AssignedReg == 0)
+ AssignedReg = Reg;
+ else if (Reg != AssignedReg) {
+ const TargetRegisterClass *RegClass = MRI.getRegClass(Reg);
+ TII.copyRegToReg(*MBB, MBB->end(), AssignedReg,
+ Reg, RegClass, RegClass);
+ }
+ return AssignedReg;
+}
+
+unsigned FastISel::getRegForGEPIndex(Value *Idx) {
+ unsigned IdxN = getRegForValue(Idx);
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return 0;
+
+ // If the index is smaller or larger than intptr_t, truncate or extend it.
+ MVT PtrVT = TLI.getPointerTy();
+ MVT IdxVT = MVT::getMVT(Idx->getType(), /*HandleUnknown=*/false);
+ if (IdxVT.bitsLT(PtrVT))
+ IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT.getSimpleVT(),
+ ISD::SIGN_EXTEND, IdxN);
+ else if (IdxVT.bitsGT(PtrVT))
+ IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT.getSimpleVT(),
+ ISD::TRUNCATE, IdxN);
+ return IdxN;
+}
+
+/// SelectBinaryOp - Select and emit code for a binary operator instruction,
+/// which has an opcode which directly corresponds to the given ISD opcode.
+///
+bool FastISel::SelectBinaryOp(User *I, ISD::NodeType ISDOpcode) {
+ MVT VT = MVT::getMVT(I->getType(), /*HandleUnknown=*/true);
+ if (VT == MVT::Other || !VT.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // We only handle legal types. For example, on x86-32 the instruction
+ // selector contains all of the 64-bit instructions from x86-64,
+ // under the assumption that i64 won't be used if the target doesn't
+ // support it.
+ if (!TLI.isTypeLegal(VT)) {
+ // MVT::i1 is special. Allow AND, OR, or XOR because they
+ // don't require additional zeroing, which makes them easy.
+ if (VT == MVT::i1 &&
+ (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR ||
+ ISDOpcode == ISD::XOR))
+ VT = TLI.getTypeToTransformTo(VT);
+ else
+ return false;
+ }
+
+ unsigned Op0 = getRegForValue(I->getOperand(0));
+ if (Op0 == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ // Check if the second operand is a constant and handle it appropriately.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISDOpcode, Op0, CI->getZExtValue());
+ if (ResultReg != 0) {
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+
+ // Check if the second operand is a constant float.
+ if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) {
+ unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISDOpcode, Op0, CF);
+ if (ResultReg != 0) {
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+
+ unsigned Op1 = getRegForValue(I->getOperand(1));
+ if (Op1 == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ // Now we have both operands in registers. Emit the instruction.
+ unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISDOpcode, Op0, Op1);
+ if (ResultReg == 0)
+ // Target-specific code wasn't able to find a machine opcode for
+ // the given ISD opcode and type. Halt "fast" selection and bail.
+ return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::SelectGetElementPtr(User *I) {
+ unsigned N = getRegForValue(I->getOperand(0));
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ const Type *Ty = I->getOperand(0)->getType();
+ MVT::SimpleValueType VT = TLI.getPointerTy().getSimpleVT();
+ for (GetElementPtrInst::op_iterator OI = I->op_begin()+1, E = I->op_end();
+ OI != E; ++OI) {
+ Value *Idx = *OI;
+ if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+ unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+ if (Field) {
+ // N = N + Offset
+ uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field);
+ // FIXME: This can be optimized by combining the add with a
+ // subsequent one.
+ N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
+ Ty = StTy->getElementType(Field);
+ } else {
+ Ty = cast<SequentialType>(Ty)->getElementType();
+
+ // If this is a constant subscript, handle it quickly.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->getZExtValue() == 0) continue;
+ uint64_t Offs =
+ TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+ N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ continue;
+ }
+
+ // N = N + Idx * ElementSize;
+ uint64_t ElementSize = TD.getTypeAllocSize(Ty);
+ unsigned IdxN = getRegForGEPIndex(Idx);
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ if (ElementSize != 1) {
+ IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, ElementSize, VT);
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
+ N = FastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
+ }
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, N);
+ return true;
+}
+
+bool FastISel::SelectCall(User *I) {
+ Function *F = cast<CallInst>(I)->getCalledFunction();
+ if (!F) return false;
+
+ unsigned IID = F->getIntrinsicID();
+ switch (IID) {
+ default: break;
+ case Intrinsic::dbg_stoppoint: {
+ DbgStopPointInst *SPI = cast<DbgStopPointInst>(I);
+ if (DIDescriptor::ValidDebugInfo(SPI->getContext(), CodeGenOpt::None)) {
+ DICompileUnit CU(cast<GlobalVariable>(SPI->getContext()));
+ unsigned Line = SPI->getLine();
+ unsigned Col = SPI->getColumn();
+ unsigned Idx = MF.getOrCreateDebugLocID(CU.getGV(), Line, Col);
+ setCurDebugLoc(DebugLoc::get(Idx));
+ }
+ return true;
+ }
+ case Intrinsic::dbg_region_start: {
+ DbgRegionStartInst *RSI = cast<DbgRegionStartInst>(I);
+ if (DIDescriptor::ValidDebugInfo(RSI->getContext(), CodeGenOpt::None) &&
+ DW && DW->ShouldEmitDwarfDebug()) {
+ unsigned ID =
+ DW->RecordRegionStart(cast<GlobalVariable>(RSI->getContext()));
+ const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
+ BuildMI(MBB, DL, II).addImm(ID);
+ }
+ return true;
+ }
+ case Intrinsic::dbg_region_end: {
+ DbgRegionEndInst *REI = cast<DbgRegionEndInst>(I);
+ if (DIDescriptor::ValidDebugInfo(REI->getContext(), CodeGenOpt::None) &&
+ DW && DW->ShouldEmitDwarfDebug()) {
+ unsigned ID = 0;
+ DISubprogram Subprogram(cast<GlobalVariable>(REI->getContext()));
+ if (!Subprogram.isNull() && !Subprogram.describes(MF.getFunction())) {
+ // This is end of an inlined function.
+ const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
+ ID = DW->RecordInlinedFnEnd(Subprogram);
+ if (ID)
+ // Returned ID is 0 if this is unbalanced "end of inlined
+ // scope". This could happen if optimizer eats dbg intrinsics
+ // or "beginning of inlined scope" is not recoginized due to
+ // missing location info. In such cases, do ignore this region.end.
+ BuildMI(MBB, DL, II).addImm(ID);
+ } else {
+ const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
+ ID = DW->RecordRegionEnd(cast<GlobalVariable>(REI->getContext()));
+ BuildMI(MBB, DL, II).addImm(ID);
+ }
+ }
+ return true;
+ }
+ case Intrinsic::dbg_func_start: {
+ DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I);
+ Value *SP = FSI->getSubprogram();
+ if (!DIDescriptor::ValidDebugInfo(SP, CodeGenOpt::None))
+ return true;
+
+ // llvm.dbg.func.start implicitly defines a dbg_stoppoint which is what
+ // (most?) gdb expects.
+ DebugLoc PrevLoc = DL;
+ DISubprogram Subprogram(cast<GlobalVariable>(SP));
+ DICompileUnit CompileUnit = Subprogram.getCompileUnit();
+
+ if (!Subprogram.describes(MF.getFunction())) {
+ // This is a beginning of an inlined function.
+
+ // If llvm.dbg.func.start is seen in a new block before any
+ // llvm.dbg.stoppoint intrinsic then the location info is unknown.
+ // FIXME : Why DebugLoc is reset at the beginning of each block ?
+ if (PrevLoc.isUnknown())
+ return true;
+ // Record the source line.
+ unsigned Line = Subprogram.getLineNumber();
+ setCurDebugLoc(DebugLoc::get(MF.getOrCreateDebugLocID(
+ CompileUnit.getGV(), Line, 0)));
+
+ if (DW && DW->ShouldEmitDwarfDebug()) {
+ DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc);
+ unsigned LabelID = DW->RecordInlinedFnStart(Subprogram,
+ DICompileUnit(PrevLocTpl.CompileUnit),
+ PrevLocTpl.Line,
+ PrevLocTpl.Col);
+ const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
+ BuildMI(MBB, DL, II).addImm(LabelID);
+ }
+ } else {
+ // Record the source line.
+ unsigned Line = Subprogram.getLineNumber();
+ MF.setDefaultDebugLoc(DebugLoc::get(MF.getOrCreateDebugLocID(
+ CompileUnit.getGV(), Line, 0)));
+ if (DW && DW->ShouldEmitDwarfDebug()) {
+ // llvm.dbg.func_start also defines beginning of function scope.
+ DW->RecordRegionStart(cast<GlobalVariable>(FSI->getSubprogram()));
+ }
+ }
+
+ return true;
+ }
+ case Intrinsic::dbg_declare: {
+ DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
+ Value *Variable = DI->getVariable();
+ if (DIDescriptor::ValidDebugInfo(Variable, CodeGenOpt::None) &&
+ DW && DW->ShouldEmitDwarfDebug()) {
+ // Determine the address of the declared object.
+ Value *Address = DI->getAddress();
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+ Address = BCI->getOperand(0);
+ AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+ // Don't handle byval struct arguments or VLAs, for example.
+ if (!AI) break;
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ StaticAllocaMap.find(AI);
+ if (SI == StaticAllocaMap.end()) break; // VLAs.
+ int FI = SI->second;
+
+ // Determine the debug globalvariable.
+ GlobalValue *GV = cast<GlobalVariable>(Variable);
+
+ // Build the DECLARE instruction.
+ const TargetInstrDesc &II = TII.get(TargetInstrInfo::DECLARE);
+ MachineInstr *DeclareMI
+ = BuildMI(MBB, DL, II).addFrameIndex(FI).addGlobalAddress(GV);
+ DIVariable DV(cast<GlobalVariable>(GV));
+ if (!DV.isNull()) {
+ // This is a local variable
+ DW->RecordVariableScope(DV, DeclareMI);
+ }
+ }
+ return true;
+ }
+ case Intrinsic::eh_exception: {
+ MVT VT = TLI.getValueType(I->getType());
+ switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) {
+ default: break;
+ case TargetLowering::Expand: {
+ assert(MBB->isLandingPad() && "Call to eh.exception not in landing pad!");
+ unsigned Reg = TLI.getExceptionAddressRegister();
+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+ unsigned ResultReg = createResultReg(RC);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ Reg, RC, RC);
+ assert(InsertedCopy && "Can't copy address registers!");
+ InsertedCopy = InsertedCopy;
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+ break;
+ }
+ case Intrinsic::eh_selector_i32:
+ case Intrinsic::eh_selector_i64: {
+ MVT VT = TLI.getValueType(I->getType());
+ switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) {
+ default: break;
+ case TargetLowering::Expand: {
+ MVT VT = (IID == Intrinsic::eh_selector_i32 ?
+ MVT::i32 : MVT::i64);
+
+ if (MMI) {
+ if (MBB->isLandingPad())
+ AddCatchInfo(*cast<CallInst>(I), MMI, MBB);
+ else {
+#ifndef NDEBUG
+ CatchInfoLost.insert(cast<CallInst>(I));
+#endif
+ // FIXME: Mark exception selector register as live in. Hack for PR1508.
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ if (Reg) MBB->addLiveIn(Reg);
+ }
+
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+ unsigned ResultReg = createResultReg(RC);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ Reg, RC, RC);
+ assert(InsertedCopy && "Can't copy address registers!");
+ InsertedCopy = InsertedCopy;
+ UpdateValueMap(I, ResultReg);
+ } else {
+ unsigned ResultReg =
+ getRegForValue(Constant::getNullValue(I->getType()));
+ UpdateValueMap(I, ResultReg);
+ }
+ return true;
+ }
+ }
+ break;
+ }
+ }
+ return false;
+}
+
+bool FastISel::SelectCast(User *I, ISD::NodeType Opcode) {
+ MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ MVT DstVT = TLI.getValueType(I->getType());
+
+ if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
+ DstVT == MVT::Other || !DstVT.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // Check if the destination type is legal. Or as a special case,
+ // it may be i1 if we're doing a truncate because that's
+ // easy and somewhat common.
+ if (!TLI.isTypeLegal(DstVT))
+ if (DstVT != MVT::i1 || Opcode != ISD::TRUNCATE)
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // Check if the source operand is legal. Or as a special case,
+ // it may be i1 if we're doing zero-extension because that's
+ // easy and somewhat common.
+ if (!TLI.isTypeLegal(SrcVT))
+ if (SrcVT != MVT::i1 || Opcode != ISD::ZERO_EXTEND)
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ unsigned InputReg = getRegForValue(I->getOperand(0));
+ if (!InputReg)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ // If the operand is i1, arrange for the high bits in the register to be zero.
+ if (SrcVT == MVT::i1) {
+ SrcVT = TLI.getTypeToTransformTo(SrcVT);
+ InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg);
+ if (!InputReg)
+ return false;
+ }
+ // If the result is i1, truncate to the target's type for i1 first.
+ if (DstVT == MVT::i1)
+ DstVT = TLI.getTypeToTransformTo(DstVT);
+
+ unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(),
+ DstVT.getSimpleVT(),
+ Opcode,
+ InputReg);
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::SelectBitCast(User *I) {
+ // If the bitcast doesn't change the type, just use the operand value.
+ if (I->getType() == I->getOperand(0)->getType()) {
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0)
+ return false;
+ UpdateValueMap(I, Reg);
+ return true;
+ }
+
+ // Bitcasts of other values become reg-reg copies or BIT_CONVERT operators.
+ MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ MVT DstVT = TLI.getValueType(I->getType());
+
+ if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
+ DstVT == MVT::Other || !DstVT.isSimple() ||
+ !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT))
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ unsigned Op0 = getRegForValue(I->getOperand(0));
+ if (Op0 == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ // First, try to perform the bitcast by inserting a reg-reg copy.
+ unsigned ResultReg = 0;
+ if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
+ TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
+ TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
+ ResultReg = createResultReg(DstClass);
+
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ Op0, DstClass, SrcClass);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+
+ // If the reg-reg copy failed, select a BIT_CONVERT opcode.
+ if (!ResultReg)
+ ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
+ ISD::BIT_CONVERT, Op0);
+
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool
+FastISel::SelectInstruction(Instruction *I) {
+ return SelectOperator(I, I->getOpcode());
+}
+
+/// FastEmitBranch - Emit an unconditional branch to the given block,
+/// unless it is the immediate (fall-through) successor, and update
+/// the CFG.
+void
+FastISel::FastEmitBranch(MachineBasicBlock *MSucc) {
+ MachineFunction::iterator NextMBB =
+ next(MachineFunction::iterator(MBB));
+
+ if (MBB->isLayoutSuccessor(MSucc)) {
+ // The unconditional fall-through case, which needs no instructions.
+ } else {
+ // The unconditional branch case.
+ TII.InsertBranch(*MBB, MSucc, NULL, SmallVector<MachineOperand, 0>());
+ }
+ MBB->addSuccessor(MSucc);
+}
+
+bool
+FastISel::SelectOperator(User *I, unsigned Opcode) {
+ switch (Opcode) {
+ case Instruction::Add: {
+ ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FADD : ISD::ADD;
+ return SelectBinaryOp(I, Opc);
+ }
+ case Instruction::Sub: {
+ ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FSUB : ISD::SUB;
+ return SelectBinaryOp(I, Opc);
+ }
+ case Instruction::Mul: {
+ ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FMUL : ISD::MUL;
+ return SelectBinaryOp(I, Opc);
+ }
+ case Instruction::SDiv:
+ return SelectBinaryOp(I, ISD::SDIV);
+ case Instruction::UDiv:
+ return SelectBinaryOp(I, ISD::UDIV);
+ case Instruction::FDiv:
+ return SelectBinaryOp(I, ISD::FDIV);
+ case Instruction::SRem:
+ return SelectBinaryOp(I, ISD::SREM);
+ case Instruction::URem:
+ return SelectBinaryOp(I, ISD::UREM);
+ case Instruction::FRem:
+ return SelectBinaryOp(I, ISD::FREM);
+ case Instruction::Shl:
+ return SelectBinaryOp(I, ISD::SHL);
+ case Instruction::LShr:
+ return SelectBinaryOp(I, ISD::SRL);
+ case Instruction::AShr:
+ return SelectBinaryOp(I, ISD::SRA);
+ case Instruction::And:
+ return SelectBinaryOp(I, ISD::AND);
+ case Instruction::Or:
+ return SelectBinaryOp(I, ISD::OR);
+ case Instruction::Xor:
+ return SelectBinaryOp(I, ISD::XOR);
+
+ case Instruction::GetElementPtr:
+ return SelectGetElementPtr(I);
+
+ case Instruction::Br: {
+ BranchInst *BI = cast<BranchInst>(I);
+
+ if (BI->isUnconditional()) {
+ BasicBlock *LLVMSucc = BI->getSuccessor(0);
+ MachineBasicBlock *MSucc = MBBMap[LLVMSucc];
+ FastEmitBranch(MSucc);
+ return true;
+ }
+
+ // Conditional branches are not handed yet.
+ // Halt "fast" selection and bail.
+ return false;
+ }
+
+ case Instruction::Unreachable:
+ // Nothing to emit.
+ return true;
+
+ case Instruction::PHI:
+ // PHI nodes are already emitted.
+ return true;
+
+ case Instruction::Alloca:
+ // FunctionLowering has the static-sized case covered.
+ if (StaticAllocaMap.count(cast<AllocaInst>(I)))
+ return true;
+
+ // Dynamic-sized alloca is not handled yet.
+ return false;
+
+ case Instruction::Call:
+ return SelectCall(I);
+
+ case Instruction::BitCast:
+ return SelectBitCast(I);
+
+ case Instruction::FPToSI:
+ return SelectCast(I, ISD::FP_TO_SINT);
+ case Instruction::ZExt:
+ return SelectCast(I, ISD::ZERO_EXTEND);
+ case Instruction::SExt:
+ return SelectCast(I, ISD::SIGN_EXTEND);
+ case Instruction::Trunc:
+ return SelectCast(I, ISD::TRUNCATE);
+ case Instruction::SIToFP:
+ return SelectCast(I, ISD::SINT_TO_FP);
+
+ case Instruction::IntToPtr: // Deliberate fall-through.
+ case Instruction::PtrToInt: {
+ MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ MVT DstVT = TLI.getValueType(I->getType());
+ if (DstVT.bitsGT(SrcVT))
+ return SelectCast(I, ISD::ZERO_EXTEND);
+ if (DstVT.bitsLT(SrcVT))
+ return SelectCast(I, ISD::TRUNCATE);
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0) return false;
+ UpdateValueMap(I, Reg);
+ return true;
+ }
+
+ default:
+ // Unhandled instruction. Halt "fast" selection and bail.
+ return false;
+ }
+}
+
+FastISel::FastISel(MachineFunction &mf,
+ MachineModuleInfo *mmi,
+ DwarfWriter *dw,
+ DenseMap<const Value *, unsigned> &vm,
+ DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
+ DenseMap<const AllocaInst *, int> &am
+#ifndef NDEBUG
+ , SmallSet<Instruction*, 8> &cil
+#endif
+ )
+ : MBB(0),
+ ValueMap(vm),
+ MBBMap(bm),
+ StaticAllocaMap(am),
+#ifndef NDEBUG
+ CatchInfoLost(cil),
+#endif
+ MF(mf),
+ MMI(mmi),
+ DW(dw),
+ MRI(MF.getRegInfo()),
+ MFI(*MF.getFrameInfo()),
+ MCP(*MF.getConstantPool()),
+ TM(MF.getTarget()),
+ TD(*TM.getTargetData()),
+ TII(*TM.getInstrInfo()),
+ TLI(*TM.getTargetLowering()) {
+}
+
+FastISel::~FastISel() {}
+
+unsigned FastISel::FastEmit_(MVT::SimpleValueType, MVT::SimpleValueType,
+ ISD::NodeType) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_r(MVT::SimpleValueType, MVT::SimpleValueType,
+ ISD::NodeType, unsigned /*Op0*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_rr(MVT::SimpleValueType, MVT::SimpleValueType,
+ ISD::NodeType, unsigned /*Op0*/,
+ unsigned /*Op0*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_i(MVT::SimpleValueType, MVT::SimpleValueType,
+ ISD::NodeType, uint64_t /*Imm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_f(MVT::SimpleValueType, MVT::SimpleValueType,
+ ISD::NodeType, ConstantFP * /*FPImm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_ri(MVT::SimpleValueType, MVT::SimpleValueType,
+ ISD::NodeType, unsigned /*Op0*/,
+ uint64_t /*Imm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_rf(MVT::SimpleValueType, MVT::SimpleValueType,
+ ISD::NodeType, unsigned /*Op0*/,
+ ConstantFP * /*FPImm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_rri(MVT::SimpleValueType, MVT::SimpleValueType,
+ ISD::NodeType,
+ unsigned /*Op0*/, unsigned /*Op1*/,
+ uint64_t /*Imm*/) {
+ return 0;
+}
+
+/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries
+/// to emit an instruction with an immediate operand using FastEmit_ri.
+/// If that fails, it materializes the immediate into a register and try
+/// FastEmit_rr instead.
+unsigned FastISel::FastEmit_ri_(MVT::SimpleValueType VT, ISD::NodeType Opcode,
+ unsigned Op0, uint64_t Imm,
+ MVT::SimpleValueType ImmType) {
+ // First check if immediate type is legal. If not, we can't use the ri form.
+ unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Imm);
+ if (ResultReg != 0)
+ return ResultReg;
+ unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
+ if (MaterialReg == 0)
+ return 0;
+ return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg);
+}
+
+/// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries
+/// to emit an instruction with a floating-point immediate operand using
+/// FastEmit_rf. If that fails, it materializes the immediate into a register
+/// and try FastEmit_rr instead.
+unsigned FastISel::FastEmit_rf_(MVT::SimpleValueType VT, ISD::NodeType Opcode,
+ unsigned Op0, ConstantFP *FPImm,
+ MVT::SimpleValueType ImmType) {
+ // First check if immediate type is legal. If not, we can't use the rf form.
+ unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, FPImm);
+ if (ResultReg != 0)
+ return ResultReg;
+
+ // Materialize the constant in a register.
+ unsigned MaterialReg = FastEmit_f(ImmType, ImmType, ISD::ConstantFP, FPImm);
+ if (MaterialReg == 0) {
+ // If the target doesn't have a way to directly enter a floating-point
+ // value into a register, use an alternate approach.
+ // TODO: The current approach only supports floating-point constants
+ // that can be constructed by conversion from integer values. This should
+ // be replaced by code that creates a load from a constant-pool entry,
+ // which will require some target-specific work.
+ const APFloat &Flt = FPImm->getValueAPF();
+ MVT IntVT = TLI.getPointerTy();
+
+ uint64_t x[2];
+ uint32_t IntBitWidth = IntVT.getSizeInBits();
+ bool isExact;
+ (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
+ APFloat::rmTowardZero, &isExact);
+ if (!isExact)
+ return 0;
+ APInt IntVal(IntBitWidth, 2, x);
+
+ unsigned IntegerReg = FastEmit_i(IntVT.getSimpleVT(), IntVT.getSimpleVT(),
+ ISD::Constant, IntVal.getZExtValue());
+ if (IntegerReg == 0)
+ return 0;
+ MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT,
+ ISD::SINT_TO_FP, IntegerReg);
+ if (MaterialReg == 0)
+ return 0;
+ }
+ return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg);
+}
+
+unsigned FastISel::createResultReg(const TargetRegisterClass* RC) {
+ return MRI.createVirtualRegister(RC);
+}
+
+unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ BuildMI(MBB, DL, II, ResultReg);
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0);
+ else {
+ BuildMI(MBB, DL, II).addReg(Op0);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ II.ImplicitDefs[0], RC, RC);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, unsigned Op1) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1);
+ else {
+ BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ II.ImplicitDefs[0], RC, RC);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Imm);
+ else {
+ BuildMI(MBB, DL, II).addReg(Op0).addImm(Imm);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ II.ImplicitDefs[0], RC, RC);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, ConstantFP *FPImm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addFPImm(FPImm);
+ else {
+ BuildMI(MBB, DL, II).addReg(Op0).addFPImm(FPImm);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ II.ImplicitDefs[0], RC, RC);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, unsigned Op1, uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1).addImm(Imm);
+ else {
+ BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1).addImm(Imm);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ II.ImplicitDefs[0], RC, RC);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(MBB, DL, II, ResultReg).addImm(Imm);
+ else {
+ BuildMI(MBB, DL, II).addImm(Imm);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ II.ImplicitDefs[0], RC, RC);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_extractsubreg(MVT::SimpleValueType RetVT,
+ unsigned Op0, uint32_t Idx) {
+ const TargetRegisterClass* RC = MRI.getRegClass(Op0);
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ const TargetInstrDesc &II = TII.get(TargetInstrInfo::EXTRACT_SUBREG);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Idx);
+ else {
+ BuildMI(MBB, DL, II).addReg(Op0).addImm(Idx);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ II.ImplicitDefs[0], RC, RC);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+ return ResultReg;
+}
+
+/// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op
+/// with all but the least significant bit set to zero.
+unsigned FastISel::FastEmitZExtFromI1(MVT::SimpleValueType VT, unsigned Op) {
+ return FastEmit_ri(VT, VT, ISD::AND, Op, 1);
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
new file mode 100644
index 0000000..2cd67e6
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -0,0 +1,3091 @@
+//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::Legalize method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <map>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and
+/// hacks on it until the target machine can handle it. This involves
+/// eliminating value sizes the machine cannot handle (promoting small sizes to
+/// large sizes or splitting up large values into small values) as well as
+/// eliminating operations the machine cannot handle.
+///
+/// This code also does a small amount of optimization and recognition of idioms
+/// as part of its processing. For example, if a target does not support a
+/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
+/// will attempt merge setcc and brc instructions into brcc's.
+///
+namespace {
+class VISIBILITY_HIDDEN SelectionDAGLegalize {
+ TargetLowering &TLI;
+ SelectionDAG &DAG;
+ CodeGenOpt::Level OptLevel;
+
+ // Libcall insertion helpers.
+
+ /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been
+ /// legalized. We use this to ensure that calls are properly serialized
+ /// against each other, including inserted libcalls.
+ SDValue LastCALLSEQ_END;
+
+ /// IsLegalizingCall - This member is used *only* for purposes of providing
+ /// helpful assertions that a libcall isn't created while another call is
+ /// being legalized (which could lead to non-serialized call sequences).
+ bool IsLegalizingCall;
+
+ enum LegalizeAction {
+ Legal, // The target natively supports this operation.
+ Promote, // This operation should be executed in a larger type.
+ Expand // Try to expand this to other ops, otherwise use a libcall.
+ };
+
+ /// ValueTypeActions - This is a bitvector that contains two bits for each
+ /// value type, where the two bits correspond to the LegalizeAction enum.
+ /// This can be queried with "getTypeAction(VT)".
+ TargetLowering::ValueTypeActionImpl ValueTypeActions;
+
+ /// LegalizedNodes - For nodes that are of legal width, and that have more
+ /// than one use, this map indicates what regularized operand to use. This
+ /// allows us to avoid legalizing the same thing more than once.
+ DenseMap<SDValue, SDValue> LegalizedNodes;
+
+ void AddLegalizedOperand(SDValue From, SDValue To) {
+ LegalizedNodes.insert(std::make_pair(From, To));
+ // If someone requests legalization of the new node, return itself.
+ if (From != To)
+ LegalizedNodes.insert(std::make_pair(To, To));
+ }
+
+public:
+ SelectionDAGLegalize(SelectionDAG &DAG, CodeGenOpt::Level ol);
+
+ /// getTypeAction - Return how we should legalize values of this type, either
+ /// it is already legal or we need to expand it into multiple registers of
+ /// smaller integer type, or we need to promote it to a larger type.
+ LegalizeAction getTypeAction(MVT VT) const {
+ return (LegalizeAction)ValueTypeActions.getTypeAction(VT);
+ }
+
+ /// isTypeLegal - Return true if this type is legal on this target.
+ ///
+ bool isTypeLegal(MVT VT) const {
+ return getTypeAction(VT) == Legal;
+ }
+
+ void LegalizeDAG();
+
+private:
+ /// LegalizeOp - We know that the specified value has a legal type.
+ /// Recursively ensure that the operands have legal types, then return the
+ /// result.
+ SDValue LegalizeOp(SDValue O);
+
+ /// PerformInsertVectorEltInMemory - Some target cannot handle a variable
+ /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
+ /// is necessary to spill the vector being inserted into to memory, perform
+ /// the insert there, and then read the result back.
+ SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val,
+ SDValue Idx, DebugLoc dl);
+ SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
+ SDValue Idx, DebugLoc dl);
+
+ /// ShuffleWithNarrowerEltType - Return a vector shuffle operation which
+ /// performs the same shuffe in terms of order or result bytes, but on a type
+ /// whose vector element type is narrower than the original shuffle type.
+ /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+ SDValue ShuffleWithNarrowerEltType(MVT NVT, MVT VT, DebugLoc dl,
+ SDValue N1, SDValue N2,
+ SmallVectorImpl<int> &Mask) const;
+
+ bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
+ SmallPtrSet<SDNode*, 32> &NodesLeadingTo);
+
+ void LegalizeSetCCCondCode(MVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
+ DebugLoc dl);
+
+ SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+ SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_PPCF128);
+ SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128);
+
+ SDValue EmitStackConvert(SDValue SrcOp, MVT SlotVT, MVT DestVT, DebugLoc dl);
+ SDValue ExpandBUILD_VECTOR(SDNode *Node);
+ SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
+ SDValue ExpandDBG_STOPPOINT(SDNode *Node);
+ void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results);
+ SDValue ExpandFCOPYSIGN(SDNode *Node);
+ SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, MVT DestVT,
+ DebugLoc dl);
+ SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, MVT DestVT, bool isSigned,
+ DebugLoc dl);
+ SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, MVT DestVT, bool isSigned,
+ DebugLoc dl);
+
+ SDValue ExpandBSWAP(SDValue Op, DebugLoc dl);
+ SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl);
+
+ SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
+
+ void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+};
+}
+
+/// ShuffleWithNarrowerEltType - Return a vector shuffle operation which
+/// performs the same shuffe in terms of order or result bytes, but on a type
+/// whose vector element type is narrower than the original shuffle type.
+/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+SDValue
+SelectionDAGLegalize::ShuffleWithNarrowerEltType(MVT NVT, MVT VT, DebugLoc dl,
+ SDValue N1, SDValue N2,
+ SmallVectorImpl<int> &Mask) const {
+ MVT EltVT = NVT.getVectorElementType();
+ unsigned NumMaskElts = VT.getVectorNumElements();
+ unsigned NumDestElts = NVT.getVectorNumElements();
+ unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
+
+ assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!");
+
+ if (NumEltsGrowth == 1)
+ return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]);
+
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumMaskElts; ++i) {
+ int Idx = Mask[i];
+ for (unsigned j = 0; j != NumEltsGrowth; ++j) {
+ if (Idx < 0)
+ NewMask.push_back(-1);
+ else
+ NewMask.push_back(Idx * NumEltsGrowth + j);
+ }
+ }
+ assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?");
+ assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?");
+ return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
+}
+
+SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
+ CodeGenOpt::Level ol)
+ : TLI(dag.getTargetLoweringInfo()), DAG(dag), OptLevel(ol),
+ ValueTypeActions(TLI.getValueTypeActions()) {
+ assert(MVT::LAST_VALUETYPE <= 32 &&
+ "Too many value types for ValueTypeActions to hold!");
+}
+
+void SelectionDAGLegalize::LegalizeDAG() {
+ LastCALLSEQ_END = DAG.getEntryNode();
+ IsLegalizingCall = false;
+
+ // The legalize process is inherently a bottom-up recursive process (users
+ // legalize their uses before themselves). Given infinite stack space, we
+ // could just start legalizing on the root and traverse the whole graph. In
+ // practice however, this causes us to run out of stack space on large basic
+ // blocks. To avoid this problem, compute an ordering of the nodes where each
+ // node is only legalized after all of its operands are legalized.
+ DAG.AssignTopologicalOrder();
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = prior(DAG.allnodes_end()); I != next(E); ++I)
+ LegalizeOp(SDValue(I, 0));
+
+ // Finally, it's possible the root changed. Get the new root.
+ SDValue OldRoot = DAG.getRoot();
+ assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
+ DAG.setRoot(LegalizedNodes[OldRoot]);
+
+ LegalizedNodes.clear();
+
+ // Remove dead nodes now.
+ DAG.RemoveDeadNodes();
+}
+
+
+/// FindCallEndFromCallStart - Given a chained node that is part of a call
+/// sequence, find the CALLSEQ_END node that terminates the call sequence.
+static SDNode *FindCallEndFromCallStart(SDNode *Node) {
+ if (Node->getOpcode() == ISD::CALLSEQ_END)
+ return Node;
+ if (Node->use_empty())
+ return 0; // No CallSeqEnd
+
+ // The chain is usually at the end.
+ SDValue TheChain(Node, Node->getNumValues()-1);
+ if (TheChain.getValueType() != MVT::Other) {
+ // Sometimes it's at the beginning.
+ TheChain = SDValue(Node, 0);
+ if (TheChain.getValueType() != MVT::Other) {
+ // Otherwise, hunt for it.
+ for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i)
+ if (Node->getValueType(i) == MVT::Other) {
+ TheChain = SDValue(Node, i);
+ break;
+ }
+
+ // Otherwise, we walked into a node without a chain.
+ if (TheChain.getValueType() != MVT::Other)
+ return 0;
+ }
+ }
+
+ for (SDNode::use_iterator UI = Node->use_begin(),
+ E = Node->use_end(); UI != E; ++UI) {
+
+ // Make sure to only follow users of our token chain.
+ SDNode *User = *UI;
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
+ if (User->getOperand(i) == TheChain)
+ if (SDNode *Result = FindCallEndFromCallStart(User))
+ return Result;
+ }
+ return 0;
+}
+
+/// FindCallStartFromCallEnd - Given a chained node that is part of a call
+/// sequence, find the CALLSEQ_START node that initiates the call sequence.
+static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
+ assert(Node && "Didn't find callseq_start for a call??");
+ if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
+
+ assert(Node->getOperand(0).getValueType() == MVT::Other &&
+ "Node doesn't have a token chain argument!");
+ return FindCallStartFromCallEnd(Node->getOperand(0).getNode());
+}
+
+/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
+/// see if any uses can reach Dest. If no dest operands can get to dest,
+/// legalize them, legalize ourself, and return false, otherwise, return true.
+///
+/// Keep track of the nodes we fine that actually do lead to Dest in
+/// NodesLeadingTo. This avoids retraversing them exponential number of times.
+///
+bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
+ SmallPtrSet<SDNode*, 32> &NodesLeadingTo) {
+ if (N == Dest) return true; // N certainly leads to Dest :)
+
+ // If we've already processed this node and it does lead to Dest, there is no
+ // need to reprocess it.
+ if (NodesLeadingTo.count(N)) return true;
+
+ // If the first result of this node has been already legalized, then it cannot
+ // reach N.
+ if (LegalizedNodes.count(SDValue(N, 0))) return false;
+
+ // Okay, this node has not already been legalized. Check and legalize all
+ // operands. If none lead to Dest, then we can legalize this node.
+ bool OperandsLeadToDest = false;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ OperandsLeadToDest |= // If an operand leads to Dest, so do we.
+ LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, NodesLeadingTo);
+
+ if (OperandsLeadToDest) {
+ NodesLeadingTo.insert(N);
+ return true;
+ }
+
+ // Okay, this node looks safe, legalize it and return false.
+ LegalizeOp(SDValue(N, 0));
+ return false;
+}
+
+/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or
+/// a load from the constant pool.
+static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
+ SelectionDAG &DAG, const TargetLowering &TLI) {
+ bool Extend = false;
+ DebugLoc dl = CFP->getDebugLoc();
+
+ // If a FP immediate is precise when represented as a float and if the
+ // target can do an extending load from float to double, we put it into
+ // the constant pool as a float, even if it's is statically typed as a
+ // double. This shrinks FP constants and canonicalizes them for targets where
+ // an FP extending load is the same cost as a normal load (such as on the x87
+ // fp stack or PPC FP unit).
+ MVT VT = CFP->getValueType(0);
+ ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue());
+ if (!UseCP) {
+ assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion");
+ return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(),
+ (VT == MVT::f64) ? MVT::i64 : MVT::i32);
+ }
+
+ MVT OrigVT = VT;
+ MVT SVT = VT;
+ while (SVT != MVT::f32) {
+ SVT = (MVT::SimpleValueType)(SVT.getSimpleVT() - 1);
+ if (CFP->isValueValidForType(SVT, CFP->getValueAPF()) &&
+ // Only do this if the target has a native EXTLOAD instruction from
+ // smaller type.
+ TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) &&
+ TLI.ShouldShrinkFPConstant(OrigVT)) {
+ const Type *SType = SVT.getTypeForMVT();
+ LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
+ VT = SVT;
+ Extend = true;
+ }
+ }
+
+ SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ if (Extend)
+ return DAG.getExtLoad(ISD::EXTLOAD, dl,
+ OrigVT, DAG.getEntryNode(),
+ CPIdx, PseudoSourceValue::getConstantPool(),
+ 0, VT, false, Alignment);
+ return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0, false, Alignment);
+}
+
+/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores.
+static
+SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ SDValue Val = ST->getValue();
+ MVT VT = Val.getValueType();
+ int Alignment = ST->getAlignment();
+ int SVOffset = ST->getSrcValueOffset();
+ DebugLoc dl = ST->getDebugLoc();
+ if (ST->getMemoryVT().isFloatingPoint() ||
+ ST->getMemoryVT().isVector()) {
+ MVT intVT = MVT::getIntegerVT(VT.getSizeInBits());
+ if (TLI.isTypeLegal(intVT)) {
+ // Expand to a bitconvert of the value to the integer type of the
+ // same size, then a (misaligned) int store.
+ // FIXME: Does not handle truncating floating point stores!
+ SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val);
+ return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(),
+ SVOffset, ST->isVolatile(), Alignment);
+ } else {
+ // Do a (aligned) store to a stack slot, then copy from the stack slot
+ // to the final destination using (unaligned) integer loads and stores.
+ MVT StoredVT = ST->getMemoryVT();
+ MVT RegVT =
+ TLI.getRegisterType(MVT::getIntegerVT(StoredVT.getSizeInBits()));
+ unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
+
+ // Perform the original store, only redirected to the stack slot.
+ SDValue Store = DAG.getTruncStore(Chain, dl,
+ Val, StackPtr, NULL, 0, StoredVT);
+ SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+ SmallVector<SDValue, 8> Stores;
+ unsigned Offset = 0;
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the stack slot.
+ SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0);
+ // Store it to the final location. Remember the store.
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getSrcValue(), SVOffset + Offset,
+ ST->isVolatile(),
+ MinAlign(ST->getAlignment(), Offset)));
+ // Increment the pointers.
+ Offset += RegBytes;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ Increment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ }
+
+ // The last store may be partial. Do a truncating store. On big-endian
+ // machines this requires an extending load from the stack slot to ensure
+ // that the bits are in the right place.
+ MVT MemVT = MVT::getIntegerVT(8 * (StoredBytes - Offset));
+
+ // Load from the stack slot.
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+ NULL, 0, MemVT);
+
+ Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getSrcValue(), SVOffset + Offset,
+ MemVT, ST->isVolatile(),
+ MinAlign(ST->getAlignment(), Offset)));
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+ Stores.size());
+ }
+ }
+ assert(ST->getMemoryVT().isInteger() &&
+ !ST->getMemoryVT().isVector() &&
+ "Unaligned store of unknown type.");
+ // Get the half-size VT
+ MVT NewStoredVT =
+ (MVT::SimpleValueType)(ST->getMemoryVT().getSimpleVT() - 1);
+ int NumBits = NewStoredVT.getSizeInBits();
+ int IncrementSize = NumBits / 8;
+
+ // Divide the stored value in two parts.
+ SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy());
+ SDValue Lo = Val;
+ SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
+
+ // Store the two parts
+ SDValue Store1, Store2;
+ Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,
+ ST->getSrcValue(), SVOffset, NewStoredVT,
+ ST->isVolatile(), Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ Alignment = MinAlign(Alignment, IncrementSize);
+ Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
+ ST->getSrcValue(), SVOffset + IncrementSize,
+ NewStoredVT, ST->isVolatile(), Alignment);
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+}
+
+/// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads.
+static
+SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ int SVOffset = LD->getSrcValueOffset();
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ MVT VT = LD->getValueType(0);
+ MVT LoadedVT = LD->getMemoryVT();
+ DebugLoc dl = LD->getDebugLoc();
+ if (VT.isFloatingPoint() || VT.isVector()) {
+ MVT intVT = MVT::getIntegerVT(LoadedVT.getSizeInBits());
+ if (TLI.isTypeLegal(intVT)) {
+ // Expand to a (misaligned) integer load of the same size,
+ // then bitconvert to floating point or vector.
+ SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(),
+ SVOffset, LD->isVolatile(),
+ LD->getAlignment());
+ SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad);
+ if (VT.isFloatingPoint() && LoadedVT != VT)
+ Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
+
+ SDValue Ops[] = { Result, Chain };
+ return DAG.getMergeValues(Ops, 2, dl);
+ } else {
+ // Copy the value to a (aligned) stack slot using (unaligned) integer
+ // loads and stores, then do a (aligned) load from the stack slot.
+ MVT RegVT = TLI.getRegisterType(intVT);
+ unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
+
+ SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+ SmallVector<SDValue, 8> Stores;
+ SDValue StackPtr = StackBase;
+ unsigned Offset = 0;
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the original location.
+ SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(),
+ SVOffset + Offset, LD->isVolatile(),
+ MinAlign(LD->getAlignment(), Offset));
+ // Follow the load with a store to the stack slot. Remember the store.
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
+ NULL, 0));
+ // Increment the pointers.
+ Offset += RegBytes;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ Increment);
+ }
+
+ // The last copy may be partial. Do an extending load.
+ MVT MemVT = MVT::getIntegerVT(8 * (LoadedBytes - Offset));
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
+ LD->getSrcValue(), SVOffset + Offset,
+ MemVT, LD->isVolatile(),
+ MinAlign(LD->getAlignment(), Offset));
+ // Follow the load with a store to the stack slot. Remember the store.
+ // On big-endian machines this requires a truncating store to ensure
+ // that the bits end up in the right place.
+ Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
+ NULL, 0, MemVT));
+
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+ Stores.size());
+
+ // Finally, perform the original load only redirected to the stack slot.
+ Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
+ NULL, 0, LoadedVT);
+
+ // Callers expect a MERGE_VALUES node.
+ SDValue Ops[] = { Load, TF };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+ }
+ assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
+ "Unaligned load of unsupported type.");
+
+ // Compute the new VT that is half the size of the old one. This is an
+ // integer MVT.
+ unsigned NumBits = LoadedVT.getSizeInBits();
+ MVT NewLoadedVT;
+ NewLoadedVT = MVT::getIntegerVT(NumBits/2);
+ NumBits >>= 1;
+
+ unsigned Alignment = LD->getAlignment();
+ unsigned IncrementSize = NumBits / 8;
+ ISD::LoadExtType HiExtType = LD->getExtensionType();
+
+ // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
+ if (HiExtType == ISD::NON_EXTLOAD)
+ HiExtType = ISD::ZEXTLOAD;
+
+ // Load the value in two parts
+ SDValue Lo, Hi;
+ if (TLI.isLittleEndian()) {
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(),
+ SVOffset, NewLoadedVT, LD->isVolatile(), Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(),
+ SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
+ MinAlign(Alignment, IncrementSize));
+ } else {
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(),
+ SVOffset, NewLoadedVT, LD->isVolatile(), Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(),
+ SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
+ MinAlign(Alignment, IncrementSize));
+ }
+
+ // aggregate the two parts
+ SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy());
+ SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
+ Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ SDValue Ops[] = { Result, TF };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+/// PerformInsertVectorEltInMemory - Some target cannot handle a variable
+/// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
+/// is necessary to spill the vector being inserted into to memory, perform
+/// the insert there, and then read the result back.
+SDValue SelectionDAGLegalize::
+PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
+ DebugLoc dl) {
+ SDValue Tmp1 = Vec;
+ SDValue Tmp2 = Val;
+ SDValue Tmp3 = Idx;
+
+ // If the target doesn't support this, we have to spill the input vector
+ // to a temporary stack slot, update the element, then reload it. This is
+ // badness. We could also load the value into a vector register (either
+ // with a "move to register" or "extload into register" instruction, then
+ // permute it into place, if the idx is a constant and if the idx is
+ // supported by the target.
+ MVT VT = Tmp1.getValueType();
+ MVT EltVT = VT.getVectorElementType();
+ MVT IdxVT = Tmp3.getValueType();
+ MVT PtrVT = TLI.getPointerTy();
+ SDValue StackPtr = DAG.CreateStackTemporary(VT);
+
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+
+ // Store the vector.
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,
+ PseudoSourceValue::getFixedStack(SPFI), 0);
+
+ // Truncate or zero extend offset to target pointer type.
+ unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
+ Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3);
+ // Add the offset to the index.
+ unsigned EltSize = EltVT.getSizeInBits()/8;
+ Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT));
+ SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
+ // Store the scalar value.
+ Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2,
+ PseudoSourceValue::getFixedStack(SPFI), 0, EltVT);
+ // Load the updated vector.
+ return DAG.getLoad(VT, dl, Ch, StackPtr,
+ PseudoSourceValue::getFixedStack(SPFI), 0);
+}
+
+
+SDValue SelectionDAGLegalize::
+ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) {
+ if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) {
+ // SCALAR_TO_VECTOR requires that the type of the value being inserted
+ // match the element type of the vector being created, except for
+ // integers in which case the inserted value can be over width.
+ MVT EltVT = Vec.getValueType().getVectorElementType();
+ if (Val.getValueType() == EltVT ||
+ (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) {
+ SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ Vec.getValueType(), Val);
+
+ unsigned NumElts = Vec.getValueType().getVectorNumElements();
+ // We generate a shuffle of InVec and ScVec, so the shuffle mask
+ // should be 0,1,2,3,4,5... with the appropriate element replaced with
+ // elt 0 of the RHS.
+ SmallVector<int, 8> ShufOps;
+ for (unsigned i = 0; i != NumElts; ++i)
+ ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts);
+
+ return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec,
+ &ShufOps[0]);
+ }
+ }
+ return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl);
+}
+
+/// LegalizeOp - We know that the specified value has a legal type, and
+/// that its operands are legal. Now ensure that the operation itself
+/// is legal, recursively ensuring that the operands' operations remain
+/// legal.
+SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
+ if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
+ return Op;
+
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ assert(getTypeAction(Node->getValueType(i)) == Legal &&
+ "Unexpected illegal type!");
+
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+ assert((isTypeLegal(Node->getOperand(i).getValueType()) ||
+ Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
+ "Unexpected illegal type!");
+
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+ if (I != LegalizedNodes.end()) return I->second;
+
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+ SDValue Result = Op;
+ bool isCustom = false;
+
+ // Figure out the correct action; the way to query this varies by opcode
+ TargetLowering::LegalizeAction Action;
+ bool SimpleFinishLegalizing = true;
+ switch (Node->getOpcode()) {
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ case ISD::VAARG:
+ case ISD::STACKSAVE:
+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
+ break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::EXTRACT_VECTOR_ELT:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(0).getValueType());
+ break;
+ case ISD::FP_ROUND_INREG:
+ case ISD::SIGN_EXTEND_INREG: {
+ MVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
+ break;
+ }
+ case ISD::SELECT_CC:
+ case ISD::SETCC:
+ case ISD::BR_CC: {
+ unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
+ Node->getOpcode() == ISD::SETCC ? 2 : 1;
+ unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
+ MVT OpVT = Node->getOperand(CompareOperand).getValueType();
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get();
+ Action = TLI.getCondCodeAction(CCCode, OpVT);
+ if (Action == TargetLowering::Legal) {
+ if (Node->getOpcode() == ISD::SELECT_CC)
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ else
+ Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
+ }
+ break;
+ }
+ case ISD::LOAD:
+ case ISD::STORE:
+ // FIXME: Model these properly. LOAD and STORE are complicated, and
+ // STORE expects the unlegalized operand in some cases.
+ SimpleFinishLegalizing = false;
+ break;
+ case ISD::CALLSEQ_START:
+ case ISD::CALLSEQ_END:
+ // FIXME: This shouldn't be necessary. These nodes have special properties
+ // dealing with the recursive nature of legalization. Removing this
+ // special case should be done as part of making LegalizeDAG non-recursive.
+ SimpleFinishLegalizing = false;
+ break;
+ case ISD::CALL:
+ // FIXME: Legalization for calls requires custom-lowering the call before
+ // legalizing the operands! (I haven't looked into precisely why.)
+ SimpleFinishLegalizing = false;
+ break;
+ case ISD::EXTRACT_ELEMENT:
+ case ISD::FLT_ROUNDS_:
+ case ISD::SADDO:
+ case ISD::SSUBO:
+ case ISD::UADDO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ case ISD::FPOWI:
+ case ISD::MERGE_VALUES:
+ case ISD::EH_RETURN:
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ // These operations lie about being legal: when they claim to be legal,
+ // they should actually be expanded.
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Legal)
+ Action = TargetLowering::Expand;
+ break;
+ case ISD::TRAMPOLINE:
+ case ISD::FRAMEADDR:
+ case ISD::RETURNADDR:
+ case ISD::FORMAL_ARGUMENTS:
+ // These operations lie about being legal: when they claim to be legal,
+ // they should actually be custom-lowered.
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Legal)
+ Action = TargetLowering::Custom;
+ break;
+ case ISD::BUILD_VECTOR:
+ // A weird case: legalization for BUILD_VECTOR never legalizes the
+ // operands!
+ // FIXME: This really sucks... changing it isn't semantically incorrect,
+ // but it massively pessimizes the code for floating-point BUILD_VECTORs
+ // because ConstantFP operands get legalized into constant pool loads
+ // before the BUILD_VECTOR code can see them. It doesn't usually bite,
+ // though, because BUILD_VECTORS usually get lowered into other nodes
+ // which get legalized properly.
+ SimpleFinishLegalizing = false;
+ break;
+ default:
+ if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
+ Action = TargetLowering::Legal;
+ } else {
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ }
+ break;
+ }
+
+ if (SimpleFinishLegalizing) {
+ SmallVector<SDValue, 8> Ops, ResultVals;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+ Ops.push_back(LegalizeOp(Node->getOperand(i)));
+ switch (Node->getOpcode()) {
+ default: break;
+ case ISD::BR:
+ case ISD::BRIND:
+ case ISD::BR_JT:
+ case ISD::BR_CC:
+ case ISD::BRCOND:
+ case ISD::RET:
+ // Branches tweak the chain to include LastCALLSEQ_END
+ Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0],
+ LastCALLSEQ_END);
+ Ops[0] = LegalizeOp(Ops[0]);
+ LastCALLSEQ_END = DAG.getEntryNode();
+ break;
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ // Legalizing shifts/rotates requires adjusting the shift amount
+ // to the appropriate width.
+ if (!Ops[1].getValueType().isVector())
+ Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[1]));
+ break;
+ }
+
+ Result = DAG.UpdateNodeOperands(Result.getValue(0), Ops.data(),
+ Ops.size());
+ switch (Action) {
+ case TargetLowering::Legal:
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ ResultVals.push_back(Result.getValue(i));
+ break;
+ case TargetLowering::Custom:
+ // FIXME: The handling for custom lowering with multiple results is
+ // a complete mess.
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.getNode()) {
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
+ if (e == 1)
+ ResultVals.push_back(Tmp1);
+ else
+ ResultVals.push_back(Tmp1.getValue(i));
+ }
+ break;
+ }
+
+ // FALL THROUGH
+ case TargetLowering::Expand:
+ ExpandNode(Result.getNode(), ResultVals);
+ break;
+ case TargetLowering::Promote:
+ PromoteNode(Result.getNode(), ResultVals);
+ break;
+ }
+ if (!ResultVals.empty()) {
+ for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) {
+ if (ResultVals[i] != SDValue(Node, i))
+ ResultVals[i] = LegalizeOp(ResultVals[i]);
+ AddLegalizedOperand(SDValue(Node, i), ResultVals[i]);
+ }
+ return ResultVals[Op.getResNo()];
+ }
+ }
+
+ switch (Node->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "NODE: "; Node->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to legalize this operator!");
+ abort();
+ case ISD::CALL:
+ // The only option for this is to custom lower it.
+ Tmp3 = TLI.LowerOperation(Result.getValue(0), DAG);
+ assert(Tmp3.getNode() && "Target didn't custom lower this node!");
+ // A call within a calling sequence must be legalized to something
+ // other than the normal CALLSEQ_END. Violating this gets Legalize
+ // into an infinite loop.
+ assert ((!IsLegalizingCall ||
+ Node->getOpcode() != ISD::CALL ||
+ Tmp3.getNode()->getOpcode() != ISD::CALLSEQ_END) &&
+ "Nested CALLSEQ_START..CALLSEQ_END not supported.");
+
+ // The number of incoming and outgoing values should match; unless the final
+ // outgoing value is a flag.
+ assert((Tmp3.getNode()->getNumValues() == Result.getNode()->getNumValues() ||
+ (Tmp3.getNode()->getNumValues() == Result.getNode()->getNumValues() + 1 &&
+ Tmp3.getNode()->getValueType(Tmp3.getNode()->getNumValues() - 1) ==
+ MVT::Flag)) &&
+ "Lowering call/formal_arguments produced unexpected # results!");
+
+ // Since CALL/FORMAL_ARGUMENTS nodes produce multiple values, make sure to
+ // remember that we legalized all of them, so it doesn't get relegalized.
+ for (unsigned i = 0, e = Tmp3.getNode()->getNumValues(); i != e; ++i) {
+ if (Tmp3.getNode()->getValueType(i) == MVT::Flag)
+ continue;
+ Tmp1 = LegalizeOp(Tmp3.getValue(i));
+ if (Op.getResNo() == i)
+ Tmp2 = Tmp1;
+ AddLegalizedOperand(SDValue(Node, i), Tmp1);
+ }
+ return Tmp2;
+ case ISD::BUILD_VECTOR:
+ switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Custom:
+ Tmp3 = TLI.LowerOperation(Result, DAG);
+ if (Tmp3.getNode()) {
+ Result = Tmp3;
+ break;
+ }
+ // FALLTHROUGH
+ case TargetLowering::Expand:
+ Result = ExpandBUILD_VECTOR(Result.getNode());
+ break;
+ }
+ break;
+ case ISD::CALLSEQ_START: {
+ SDNode *CallEnd = FindCallEndFromCallStart(Node);
+
+ // Recursively Legalize all of the inputs of the call end that do not lead
+ // to this call start. This ensures that any libcalls that need be inserted
+ // are inserted *before* the CALLSEQ_START.
+ {SmallPtrSet<SDNode*, 32> NodesLeadingTo;
+ for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i)
+ LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node,
+ NodesLeadingTo);
+ }
+
+ // Now that we legalized all of the inputs (which may have inserted
+ // libcalls) create the new CALLSEQ_START node.
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+
+ // Merge in the last call, to ensure that this call start after the last
+ // call ended.
+ if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
+ Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Tmp1, LastCALLSEQ_END);
+ Tmp1 = LegalizeOp(Tmp1);
+ }
+
+ // Do not try to legalize the target-specific arguments (#1+).
+ if (Tmp1 != Node->getOperand(0)) {
+ SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
+ Ops[0] = Tmp1;
+ Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+ }
+
+ // Remember that the CALLSEQ_START is legalized.
+ AddLegalizedOperand(Op.getValue(0), Result);
+ if (Node->getNumValues() == 2) // If this has a flag result, remember it.
+ AddLegalizedOperand(Op.getValue(1), Result.getValue(1));
+
+ // Now that the callseq_start and all of the non-call nodes above this call
+ // sequence have been legalized, legalize the call itself. During this
+ // process, no libcalls can/will be inserted, guaranteeing that no calls
+ // can overlap.
+ assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!");
+ // Note that we are selecting this call!
+ LastCALLSEQ_END = SDValue(CallEnd, 0);
+ IsLegalizingCall = true;
+
+ // Legalize the call, starting from the CALLSEQ_END.
+ LegalizeOp(LastCALLSEQ_END);
+ assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!");
+ return Result;
+ }
+ case ISD::CALLSEQ_END:
+ // If the CALLSEQ_START node hasn't been legalized first, legalize it. This
+ // will cause this node to be legalized as well as handling libcalls right.
+ if (LastCALLSEQ_END.getNode() != Node) {
+ LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0));
+ DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+ assert(I != LegalizedNodes.end() &&
+ "Legalizing the call start should have legalized this node!");
+ return I->second;
+ }
+
+ // Otherwise, the call start has been legalized and everything is going
+ // according to plan. Just legalize ourselves normally here.
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+ // Do not try to legalize the target-specific arguments (#1+), except for
+ // an optional flag input.
+ if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Flag){
+ if (Tmp1 != Node->getOperand(0)) {
+ SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
+ Ops[0] = Tmp1;
+ Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+ }
+ } else {
+ Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1));
+ if (Tmp1 != Node->getOperand(0) ||
+ Tmp2 != Node->getOperand(Node->getNumOperands()-1)) {
+ SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
+ Ops[0] = Tmp1;
+ Ops.back() = Tmp2;
+ Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+ }
+ }
+ assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
+ // This finishes up call legalization.
+ IsLegalizingCall = false;
+
+ // If the CALLSEQ_END node has a flag, remember that we legalized it.
+ AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
+ if (Node->getNumValues() == 2)
+ AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1));
+ return Result.getValue(Op.getResNo());
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
+ Tmp1 = LegalizeOp(LD->getChain()); // Legalize the chain.
+ Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer.
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD) {
+ MVT VT = Node->getValueType(0);
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset());
+ Tmp3 = Result.getValue(0);
+ Tmp4 = Result.getValue(1);
+
+ switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses()) {
+ unsigned ABIAlignment = TLI.getTargetData()->
+ getABITypeAlignment(LD->getMemoryVT().getTypeForMVT());
+ if (LD->getAlignment() < ABIAlignment){
+ Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), DAG,
+ TLI);
+ Tmp3 = Result.getOperand(0);
+ Tmp4 = Result.getOperand(1);
+ Tmp3 = LegalizeOp(Tmp3);
+ Tmp4 = LegalizeOp(Tmp4);
+ }
+ }
+ break;
+ case TargetLowering::Custom:
+ Tmp1 = TLI.LowerOperation(Tmp3, DAG);
+ if (Tmp1.getNode()) {
+ Tmp3 = LegalizeOp(Tmp1);
+ Tmp4 = LegalizeOp(Tmp1.getValue(1));
+ }
+ break;
+ case TargetLowering::Promote: {
+ // Only promote a load of vector type to another.
+ assert(VT.isVector() && "Cannot promote this load!");
+ // Change base type to a different vector type.
+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+
+ Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
+ LD->getSrcValueOffset(),
+ LD->isVolatile(), LD->getAlignment());
+ Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1));
+ Tmp4 = LegalizeOp(Tmp1.getValue(1));
+ break;
+ }
+ }
+ // Since loads produce two values, make sure to remember that we
+ // legalized both of them.
+ AddLegalizedOperand(SDValue(Node, 0), Tmp3);
+ AddLegalizedOperand(SDValue(Node, 1), Tmp4);
+ return Op.getResNo() ? Tmp4 : Tmp3;
+ } else {
+ MVT SrcVT = LD->getMemoryVT();
+ unsigned SrcWidth = SrcVT.getSizeInBits();
+ int SVOffset = LD->getSrcValueOffset();
+ unsigned Alignment = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+
+ if (SrcWidth != SrcVT.getStoreSizeInBits() &&
+ // Some targets pretend to have an i1 loading operation, and actually
+ // load an i8. This trick is correct for ZEXTLOAD because the top 7
+ // bits are guaranteed to be zero; it helps the optimizers understand
+ // that these bits are zero. It is also useful for EXTLOAD, since it
+ // tells the optimizers that those bits are undefined. It would be
+ // nice to have an effective generic way of getting these benefits...
+ // Until such a way is found, don't insist on promoting i1 here.
+ (SrcVT != MVT::i1 ||
+ TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
+ // Promote to a byte-sized load if not loading an integral number of
+ // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+ unsigned NewWidth = SrcVT.getStoreSizeInBits();
+ MVT NVT = MVT::getIntegerVT(NewWidth);
+ SDValue Ch;
+
+ // The extra bits are guaranteed to be zero, since we stored them that
+ // way. A zext load from NVT thus automatically gives zext from SrcVT.
+
+ ISD::LoadExtType NewExtType =
+ ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+
+ Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+ Tmp1, Tmp2, LD->getSrcValue(), SVOffset,
+ NVT, isVolatile, Alignment);
+
+ Ch = Result.getValue(1); // The chain.
+
+ if (ExtType == ISD::SEXTLOAD)
+ // Having the top bits zero doesn't help when sign extending.
+ Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
+ // All the top bits are guaranteed to be zero - inform the optimizers.
+ Result = DAG.getNode(ISD::AssertZext, dl,
+ Result.getValueType(), Result,
+ DAG.getValueType(SrcVT));
+
+ Tmp1 = LegalizeOp(Result);
+ Tmp2 = LegalizeOp(Ch);
+ } else if (SrcWidth & (SrcWidth - 1)) {
+ // If not loading a power-of-2 number of bits, expand as two loads.
+ assert(SrcVT.isExtended() && !SrcVT.isVector() &&
+ "Unsupported extload!");
+ unsigned RoundWidth = 1 << Log2_32(SrcWidth);
+ assert(RoundWidth < SrcWidth);
+ unsigned ExtraWidth = SrcWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Load size not an integral number of bytes!");
+ MVT RoundVT = MVT::getIntegerVT(RoundWidth);
+ MVT ExtraVT = MVT::getIntegerVT(ExtraWidth);
+ SDValue Lo, Hi, Ch;
+ unsigned IncrementSize;
+
+ if (TLI.isLittleEndian()) {
+ // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
+ // Load the bottom RoundWidth bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl,
+ Node->getValueType(0), Tmp1, Tmp2,
+ LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
+ Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+ LD->getSrcValue(), SVOffset + IncrementSize,
+ ExtraVT, isVolatile,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+
+ // Join the hi and lo parts.
+ Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ } else {
+ // Big endian - avoid unaligned loads.
+ // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
+ // Load the top RoundWidth bits.
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+ LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
+ Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl,
+ Node->getValueType(0), Tmp1, Tmp2,
+ LD->getSrcValue(), SVOffset + IncrementSize,
+ ExtraVT, isVolatile,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
+
+ // Join the hi and lo parts.
+ Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ }
+
+ Tmp1 = LegalizeOp(Result);
+ Tmp2 = LegalizeOp(Ch);
+ } else {
+ switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH
+ case TargetLowering::Legal:
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset());
+ Tmp1 = Result.getValue(0);
+ Tmp2 = Result.getValue(1);
+
+ if (isCustom) {
+ Tmp3 = TLI.LowerOperation(Result, DAG);
+ if (Tmp3.getNode()) {
+ Tmp1 = LegalizeOp(Tmp3);
+ Tmp2 = LegalizeOp(Tmp3.getValue(1));
+ }
+ } else {
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses()) {
+ unsigned ABIAlignment = TLI.getTargetData()->
+ getABITypeAlignment(LD->getMemoryVT().getTypeForMVT());
+ if (LD->getAlignment() < ABIAlignment){
+ Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), DAG,
+ TLI);
+ Tmp1 = Result.getOperand(0);
+ Tmp2 = Result.getOperand(1);
+ Tmp1 = LegalizeOp(Tmp1);
+ Tmp2 = LegalizeOp(Tmp2);
+ }
+ }
+ }
+ break;
+ case TargetLowering::Expand:
+ // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND
+ if (SrcVT == MVT::f32 && Node->getValueType(0) == MVT::f64) {
+ SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
+ LD->getSrcValueOffset(),
+ LD->isVolatile(), LD->getAlignment());
+ Result = DAG.getNode(ISD::FP_EXTEND, dl,
+ Node->getValueType(0), Load);
+ Tmp1 = LegalizeOp(Result); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Load.getValue(1));
+ break;
+ }
+ assert(ExtType != ISD::EXTLOAD &&"EXTLOAD should always be supported!");
+ // Turn the unsupported load into an EXTLOAD followed by an explicit
+ // zero/sign extend inreg.
+ Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+ Tmp1, Tmp2, LD->getSrcValue(),
+ LD->getSrcValueOffset(), SrcVT,
+ LD->isVolatile(), LD->getAlignment());
+ SDValue ValRes;
+ if (ExtType == ISD::SEXTLOAD)
+ ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else
+ ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT);
+ Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes.
+ break;
+ }
+ }
+
+ // Since loads produce two values, make sure to remember that we legalized
+ // both of them.
+ AddLegalizedOperand(SDValue(Node, 0), Tmp1);
+ AddLegalizedOperand(SDValue(Node, 1), Tmp2);
+ return Op.getResNo() ? Tmp2 : Tmp1;
+ }
+ }
+ case ISD::STORE: {
+ StoreSDNode *ST = cast<StoreSDNode>(Node);
+ Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain.
+ Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer.
+ int SVOffset = ST->getSrcValueOffset();
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+
+ if (!ST->isTruncatingStore()) {
+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+ // FIXME: We shouldn't do this for TargetConstantFP's.
+ // FIXME: move this to the DAG Combiner! Note that we can't regress due
+ // to phase ordering between legalized code and the dag combiner. This
+ // probably means that we need to integrate dag combiner and legalizer
+ // together.
+ // We generally can't do this one for long doubles.
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
+ if (CFP->getValueType(0) == MVT::f32 &&
+ getTypeAction(MVT::i32) == Legal) {
+ Tmp3 = DAG.getConstant(CFP->getValueAPF().
+ bitcastToAPInt().zextOrTrunc(32),
+ MVT::i32);
+ Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+ SVOffset, isVolatile, Alignment);
+ break;
+ } else if (CFP->getValueType(0) == MVT::f64) {
+ // If this target supports 64-bit registers, do a single 64-bit store.
+ if (getTypeAction(MVT::i64) == Legal) {
+ Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ zextOrTrunc(64), MVT::i64);
+ Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+ SVOffset, isVolatile, Alignment);
+ break;
+ } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
+ // Otherwise, if the target supports 32-bit registers, use 2 32-bit
+ // stores. If the target supports neither 32- nor 64-bits, this
+ // xform is certainly not worth it.
+ const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt();
+ SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32);
+ SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
+ if (TLI.isBigEndian()) std::swap(Lo, Hi);
+
+ Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(),
+ SVOffset, isVolatile, Alignment);
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(4));
+ Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4,
+ isVolatile, MinAlign(Alignment, 4U));
+
+ Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ break;
+ }
+ }
+ }
+
+ {
+ Tmp3 = LegalizeOp(ST->getValue());
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2,
+ ST->getOffset());
+
+ MVT VT = Tmp3.getValueType();
+ switch (TLI.getOperationAction(ISD::STORE, VT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses()) {
+ unsigned ABIAlignment = TLI.getTargetData()->
+ getABITypeAlignment(ST->getMemoryVT().getTypeForMVT());
+ if (ST->getAlignment() < ABIAlignment)
+ Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), DAG,
+ TLI);
+ }
+ break;
+ case TargetLowering::Custom:
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.getNode()) Result = Tmp1;
+ break;
+ case TargetLowering::Promote:
+ assert(VT.isVector() && "Unknown legal promote case!");
+ Tmp3 = DAG.getNode(ISD::BIT_CONVERT, dl,
+ TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
+ Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
+ ST->getSrcValue(), SVOffset, isVolatile,
+ Alignment);
+ break;
+ }
+ break;
+ }
+ } else {
+ Tmp3 = LegalizeOp(ST->getValue());
+
+ MVT StVT = ST->getMemoryVT();
+ unsigned StWidth = StVT.getSizeInBits();
+
+ if (StWidth != StVT.getStoreSizeInBits()) {
+ // Promote to a byte-sized store with upper bits zero if not
+ // storing an integral number of bytes. For example, promote
+ // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
+ MVT NVT = MVT::getIntegerVT(StVT.getStoreSizeInBits());
+ Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
+ Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+ SVOffset, NVT, isVolatile, Alignment);
+ } else if (StWidth & (StWidth - 1)) {
+ // If not storing a power-of-2 number of bits, expand as two stores.
+ assert(StVT.isExtended() && !StVT.isVector() &&
+ "Unsupported truncstore!");
+ unsigned RoundWidth = 1 << Log2_32(StWidth);
+ assert(RoundWidth < StWidth);
+ unsigned ExtraWidth = StWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Store size not an integral number of bytes!");
+ MVT RoundVT = MVT::getIntegerVT(RoundWidth);
+ MVT ExtraVT = MVT::getIntegerVT(ExtraWidth);
+ SDValue Lo, Hi;
+ unsigned IncrementSize;
+
+ if (TLI.isLittleEndian()) {
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
+ // Store the bottom RoundWidth bits.
+ Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+ SVOffset, RoundVT,
+ isVolatile, Alignment);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
+ DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+ Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
+ SVOffset + IncrementSize, ExtraVT, isVolatile,
+ MinAlign(Alignment, IncrementSize));
+ } else {
+ // Big endian - avoid unaligned stores.
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
+ // Store the top RoundWidth bits.
+ Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
+ DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
+ Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
+ SVOffset, RoundVT, isVolatile, Alignment);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+ SVOffset + IncrementSize, ExtraVT, isVolatile,
+ MinAlign(Alignment, IncrementSize));
+ }
+
+ // The order of the stores doesn't matter.
+ Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ } else {
+ if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() ||
+ Tmp2 != ST->getBasePtr())
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2,
+ ST->getOffset());
+
+ switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses()) {
+ unsigned ABIAlignment = TLI.getTargetData()->
+ getABITypeAlignment(ST->getMemoryVT().getTypeForMVT());
+ if (ST->getAlignment() < ABIAlignment)
+ Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), DAG,
+ TLI);
+ }
+ break;
+ case TargetLowering::Custom:
+ Result = TLI.LowerOperation(Result, DAG);
+ break;
+ case Expand:
+ // TRUNCSTORE:i16 i32 -> STORE i16
+ assert(isTypeLegal(StVT) && "Do not know how to expand this store!");
+ Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
+ Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+ SVOffset, isVolatile, Alignment);
+ break;
+ }
+ }
+ }
+ break;
+ }
+ }
+ assert(Result.getValueType() == Op.getValueType() &&
+ "Bad legalization!");
+
+ // Make sure that the generated code is itself legal.
+ if (Result != Op)
+ Result = LegalizeOp(Result);
+
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ AddLegalizedOperand(Op, Result);
+ return Result;
+}
+
+SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Idx = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ // Store the value to a temporary stack slot, then LOAD the returned part.
+ SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0);
+
+ // Add the offset to the index.
+ unsigned EltSize =
+ Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+ Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(EltSize, Idx.getValueType()));
+
+ if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
+ Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
+ else
+ Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+ StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
+
+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Tmp1 = Node->getOperand(0);
+ SDValue Tmp2 = Node->getOperand(1);
+ assert((Tmp2.getValueType() == MVT::f32 ||
+ Tmp2.getValueType() == MVT::f64) &&
+ "Ugly special-cased code!");
+ // Get the sign bit of the RHS.
+ SDValue SignBit;
+ MVT IVT = Tmp2.getValueType() == MVT::f64 ? MVT::i64 : MVT::i32;
+ if (isTypeLegal(IVT)) {
+ SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2);
+ } else {
+ assert(isTypeLegal(TLI.getPointerTy()) &&
+ (TLI.getPointerTy() == MVT::i32 ||
+ TLI.getPointerTy() == MVT::i64) &&
+ "Legal type for load?!");
+ SDValue StackPtr = DAG.CreateStackTemporary(Tmp2.getValueType());
+ SDValue StorePtr = StackPtr, LoadPtr = StackPtr;
+ SDValue Ch =
+ DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StorePtr, NULL, 0);
+ if (Tmp2.getValueType() == MVT::f64 && TLI.isLittleEndian())
+ LoadPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(),
+ LoadPtr, DAG.getIntPtrConstant(4));
+ SignBit = DAG.getExtLoad(ISD::SEXTLOAD, dl, TLI.getPointerTy(),
+ Ch, LoadPtr, NULL, 0, MVT::i32);
+ }
+ SignBit =
+ DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()),
+ SignBit, DAG.getConstant(0, SignBit.getValueType()),
+ ISD::SETLT);
+ // Get the absolute value of the result.
+ SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1);
+ // Select between the nabs and abs value based on the sign bit of
+ // the input.
+ return DAG.getNode(ISD::SELECT, dl, AbsVal.getValueType(), SignBit,
+ DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal),
+ AbsVal);
+}
+
+SDValue SelectionDAGLegalize::ExpandDBG_STOPPOINT(SDNode* Node) {
+ DebugLoc dl = Node->getDebugLoc();
+ DwarfWriter *DW = DAG.getDwarfWriter();
+ bool useDEBUG_LOC = TLI.isOperationLegalOrCustom(ISD::DEBUG_LOC,
+ MVT::Other);
+ bool useLABEL = TLI.isOperationLegalOrCustom(ISD::DBG_LABEL, MVT::Other);
+
+ const DbgStopPointSDNode *DSP = cast<DbgStopPointSDNode>(Node);
+ GlobalVariable *CU_GV = cast<GlobalVariable>(DSP->getCompileUnit());
+ if (DW && (useDEBUG_LOC || useLABEL) && !CU_GV->isDeclaration()) {
+ DICompileUnit CU(cast<GlobalVariable>(DSP->getCompileUnit()));
+
+ unsigned Line = DSP->getLine();
+ unsigned Col = DSP->getColumn();
+
+ if (OptLevel == CodeGenOpt::None) {
+ // A bit self-referential to have DebugLoc on Debug_Loc nodes, but it
+ // won't hurt anything.
+ if (useDEBUG_LOC) {
+ return DAG.getNode(ISD::DEBUG_LOC, dl, MVT::Other, Node->getOperand(0),
+ DAG.getConstant(Line, MVT::i32),
+ DAG.getConstant(Col, MVT::i32),
+ DAG.getSrcValue(CU.getGV()));
+ } else {
+ unsigned ID = DW->RecordSourceLine(Line, Col, CU);
+ return DAG.getLabel(ISD::DBG_LABEL, dl, Node->getOperand(0), ID);
+ }
+ }
+ }
+ return Node->getOperand(0);
+}
+
+void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
+ SmallVectorImpl<SDValue> &Results) {
+ unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+ assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
+ " not tell us which reg is the stack pointer!");
+ DebugLoc dl = Node->getDebugLoc();
+ MVT VT = Node->getValueType(0);
+ SDValue Tmp1 = SDValue(Node, 0);
+ SDValue Tmp2 = SDValue(Node, 1);
+ SDValue Tmp3 = Node->getOperand(2);
+ SDValue Chain = Tmp1.getOperand(0);
+
+ // Chain the dynamic stack allocation so that it doesn't modify the stack
+ // pointer when other instructions are using the stack.
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
+
+ SDValue Size = Tmp2.getOperand(1);
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
+ Chain = SP.getValue(1);
+ unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
+ unsigned StackAlign =
+ TLI.getTargetMachine().getFrameInfo()->getStackAlignment();
+ if (Align > StackAlign)
+ SP = DAG.getNode(ISD::AND, dl, VT, SP,
+ DAG.getConstant(-(uint64_t)Align, VT));
+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
+ Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
+
+ Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
+ DAG.getIntPtrConstant(0, true), SDValue());
+
+ Results.push_back(Tmp1);
+ Results.push_back(Tmp2);
+}
+
+/// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and
+/// condition code CC on the current target. This routine assumes LHS and rHS
+/// have already been legalized by LegalizeSetCCOperands. It expands SETCC with
+/// illegal condition code into AND / OR of multiple SETCC values.
+void SelectionDAGLegalize::LegalizeSetCCCondCode(MVT VT,
+ SDValue &LHS, SDValue &RHS,
+ SDValue &CC,
+ DebugLoc dl) {
+ MVT OpVT = LHS.getValueType();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
+ switch (TLI.getCondCodeAction(CCCode, OpVT)) {
+ default: assert(0 && "Unknown condition code action!");
+ case TargetLowering::Legal:
+ // Nothing to do.
+ break;
+ case TargetLowering::Expand: {
+ ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
+ unsigned Opc = 0;
+ switch (CCCode) {
+ default: assert(0 && "Don't know how to expand this condition!"); abort();
+ case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETOLT: CC1 = ISD::SETLT; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETOLE: CC1 = ISD::SETLE; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETONE: CC1 = ISD::SETNE; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETUEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ case ISD::SETUGT: CC1 = ISD::SETGT; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ case ISD::SETUGE: CC1 = ISD::SETGE; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ case ISD::SETULT: CC1 = ISD::SETLT; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ case ISD::SETULE: CC1 = ISD::SETLE; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ case ISD::SETUNE: CC1 = ISD::SETNE; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ // FIXME: Implement more expansions.
+ }
+
+ SDValue SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
+ SDValue SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+ LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
+ RHS = SDValue();
+ CC = SDValue();
+ break;
+ }
+ }
+}
+
+/// EmitStackConvert - Emit a store/load combination to the stack. This stores
+/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does
+/// a load from the stack slot to DestVT, extending it if needed.
+/// The resultant code need not be legal.
+SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
+ MVT SlotVT,
+ MVT DestVT,
+ DebugLoc dl) {
+ // Create the stack frame object.
+ unsigned SrcAlign =
+ TLI.getTargetData()->getPrefTypeAlignment(SrcOp.getValueType().
+ getTypeForMVT());
+ SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign);
+
+ FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
+ int SPFI = StackPtrFI->getIndex();
+ const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+
+ unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
+ unsigned SlotSize = SlotVT.getSizeInBits();
+ unsigned DestSize = DestVT.getSizeInBits();
+ unsigned DestAlign =
+ TLI.getTargetData()->getPrefTypeAlignment(DestVT.getTypeForMVT());
+
+ // Emit a store to the stack slot. Use a truncstore if the input value is
+ // later than DestVT.
+ SDValue Store;
+
+ if (SrcSize > SlotSize)
+ Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
+ SV, 0, SlotVT, false, SrcAlign);
+ else {
+ assert(SrcSize == SlotSize && "Invalid store");
+ Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
+ SV, 0, false, SrcAlign);
+ }
+
+ // Result is a load from the stack slot.
+ if (SlotSize == DestSize)
+ return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, DestAlign);
+
+ assert(SlotSize < DestSize && "Unknown extension!");
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, SV, 0, SlotVT,
+ false, DestAlign);
+}
+
+SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
+ DebugLoc dl = Node->getDebugLoc();
+ // Create a vector sized/aligned stack slot, store the value to element #0,
+ // then load the whole vector back out.
+ SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0));
+
+ FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr);
+ int SPFI = StackPtrFI->getIndex();
+
+ SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),
+ StackPtr,
+ PseudoSourceValue::getFixedStack(SPFI), 0,
+ Node->getValueType(0).getVectorElementType());
+ return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
+ PseudoSourceValue::getFixedStack(SPFI), 0);
+}
+
+
+/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't
+/// support the operation, but do support the resultant vector type.
+SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
+ unsigned NumElems = Node->getNumOperands();
+ SDValue SplatValue = Node->getOperand(0);
+ DebugLoc dl = Node->getDebugLoc();
+ MVT VT = Node->getValueType(0);
+ MVT OpVT = SplatValue.getValueType();
+ MVT EltVT = VT.getVectorElementType();
+
+ // If the only non-undef value is the low element, turn this into a
+ // SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X.
+ bool isOnlyLowElement = true;
+
+ // FIXME: it would be far nicer to change this into map<SDValue,uint64_t>
+ // and use a bitmask instead of a list of elements.
+ // FIXME: this doesn't treat <0, u, 0, u> for example, as a splat.
+ std::map<SDValue, std::vector<unsigned> > Values;
+ Values[SplatValue].push_back(0);
+ bool isConstant = true;
+ if (!isa<ConstantFPSDNode>(SplatValue) && !isa<ConstantSDNode>(SplatValue) &&
+ SplatValue.getOpcode() != ISD::UNDEF)
+ isConstant = false;
+
+ for (unsigned i = 1; i < NumElems; ++i) {
+ SDValue V = Node->getOperand(i);
+ Values[V].push_back(i);
+ if (V.getOpcode() != ISD::UNDEF)
+ isOnlyLowElement = false;
+ if (SplatValue != V)
+ SplatValue = SDValue(0, 0);
+
+ // If this isn't a constant element or an undef, we can't use a constant
+ // pool load.
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V) &&
+ V.getOpcode() != ISD::UNDEF)
+ isConstant = false;
+ }
+
+ if (isOnlyLowElement) {
+ // If the low element is an undef too, then this whole things is an undef.
+ if (Node->getOperand(0).getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(VT);
+ // Otherwise, turn this into a scalar_to_vector node.
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0));
+ }
+
+ // If all elements are constants, create a load from the constant pool.
+ if (isConstant) {
+ std::vector<Constant*> CV;
+ for (unsigned i = 0, e = NumElems; i != e; ++i) {
+ if (ConstantFPSDNode *V =
+ dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) {
+ CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue()));
+ } else if (ConstantSDNode *V =
+ dyn_cast<ConstantSDNode>(Node->getOperand(i))) {
+ CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+ } else {
+ assert(Node->getOperand(i).getOpcode() == ISD::UNDEF);
+ const Type *OpNTy = OpVT.getTypeForMVT();
+ CV.push_back(UndefValue::get(OpNTy));
+ }
+ }
+ Constant *CP = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, Alignment);
+ }
+
+ if (SplatValue.getNode()) { // Splat of one value?
+ // Build the shuffle constant vector: <0, 0, 0, 0>
+ SmallVector<int, 8> ZeroVec(NumElems, 0);
+
+ // If the target supports VECTOR_SHUFFLE and this shuffle mask, use it.
+ if (TLI.isShuffleMaskLegal(ZeroVec, Node->getValueType(0))) {
+ // Get the splatted value into the low element of a vector register.
+ SDValue LowValVec =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, SplatValue);
+
+ // Return shuffle(LowValVec, undef, <0,0,0,0>)
+ return DAG.getVectorShuffle(VT, dl, LowValVec, DAG.getUNDEF(VT),
+ &ZeroVec[0]);
+ }
+ }
+
+ // If there are only two unique elements, we may be able to turn this into a
+ // vector shuffle.
+ if (Values.size() == 2) {
+ // Get the two values in deterministic order.
+ SDValue Val1 = Node->getOperand(1);
+ SDValue Val2;
+ std::map<SDValue, std::vector<unsigned> >::iterator MI = Values.begin();
+ if (MI->first != Val1)
+ Val2 = MI->first;
+ else
+ Val2 = (++MI)->first;
+
+ // If Val1 is an undef, make sure it ends up as Val2, to ensure that our
+ // vector shuffle has the undef vector on the RHS.
+ if (Val1.getOpcode() == ISD::UNDEF)
+ std::swap(Val1, Val2);
+
+ // Build the shuffle constant vector: e.g. <0, 4, 0, 4>
+ SmallVector<int, 8> ShuffleMask(NumElems, -1);
+
+ // Set elements of the shuffle mask for Val1.
+ std::vector<unsigned> &Val1Elts = Values[Val1];
+ for (unsigned i = 0, e = Val1Elts.size(); i != e; ++i)
+ ShuffleMask[Val1Elts[i]] = 0;
+
+ // Set elements of the shuffle mask for Val2.
+ std::vector<unsigned> &Val2Elts = Values[Val2];
+ for (unsigned i = 0, e = Val2Elts.size(); i != e; ++i)
+ if (Val2.getOpcode() != ISD::UNDEF)
+ ShuffleMask[Val2Elts[i]] = NumElems;
+
+ // If the target supports SCALAR_TO_VECTOR and this shuffle mask, use it.
+ if (TLI.isOperationLegalOrCustom(ISD::SCALAR_TO_VECTOR, VT) &&
+ TLI.isShuffleMaskLegal(ShuffleMask, VT)) {
+ Val1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val1);
+ Val2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val2);
+ return DAG.getVectorShuffle(VT, dl, Val1, Val2, &ShuffleMask[0]);
+ }
+ }
+
+ // Otherwise, we can't handle this case efficiently. Allocate a sufficiently
+ // aligned object on the stack, store each element into it, then load
+ // the result as a vector.
+ // Create the stack frame object.
+ SDValue FIPtr = DAG.CreateStackTemporary(VT);
+ int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
+ const Value *SV = PseudoSourceValue::getFixedStack(FI);
+
+ // Emit a store of each element to the stack slot.
+ SmallVector<SDValue, 8> Stores;
+ unsigned TypeByteSize = OpVT.getSizeInBits() / 8;
+ // Store (in the right endianness) the elements to memory.
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ // Ignore undef elements.
+ if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+ unsigned Offset = TypeByteSize*i;
+
+ SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
+
+ Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i),
+ Idx, SV, Offset));
+ }
+
+ SDValue StoreChain;
+ if (!Stores.empty()) // Not all undef elements?
+ StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Stores[0], Stores.size());
+ else
+ StoreChain = DAG.getEntryNode();
+
+ // Result is a load from the stack slot.
+ return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0);
+}
+
+// ExpandLibCall - Expand a node into a call to a libcall. If the result value
+// does not fit into a register, return the lo part and set the hi part to the
+// by-reg argument. If it does fit into a single register, return the result
+// and leave the Hi part unset.
+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
+ bool isSigned) {
+ assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ MVT ArgVT = Node->getOperand(i).getValueType();
+ const Type *ArgTy = ArgVT.getTypeForMVT();
+ Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ // Splice the libcall in wherever FindInputOutputChains tells us to.
+ const Type *RetTy = Node->getValueType(0).getTypeForMVT();
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ CallingConv::C, false, Callee, Args, DAG,
+ Node->getDebugLoc());
+
+ // Legalize the call sequence, starting with the chain. This will advance
+ // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
+ // was added by LowerCallTo (guaranteeing proper serialization of calls).
+ LegalizeOp(CallInfo.second);
+ return CallInfo.first;
+}
+
+SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_PPCF128) {
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT()) {
+ default: assert(0 && "Unexpected request for libcall!");
+ case MVT::f32: LC = Call_F32; break;
+ case MVT::f64: LC = Call_F64; break;
+ case MVT::f80: LC = Call_F80; break;
+ case MVT::ppcf128: LC = Call_PPCF128; break;
+ }
+ return ExpandLibCall(LC, Node, false);
+}
+
+SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
+ RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128) {
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT()) {
+ default: assert(0 && "Unexpected request for libcall!");
+ case MVT::i16: LC = Call_I16; break;
+ case MVT::i32: LC = Call_I32; break;
+ case MVT::i64: LC = Call_I64; break;
+ case MVT::i128: LC = Call_I128; break;
+ }
+ return ExpandLibCall(LC, Node, isSigned);
+}
+
+/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
+/// INT_TO_FP operation of the specified operand when the target requests that
+/// we expand it. At this point, we know that the result and operand types are
+/// legal for the target.
+SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
+ SDValue Op0,
+ MVT DestVT,
+ DebugLoc dl) {
+ if (Op0.getValueType() == MVT::i32) {
+ // simple 32-bit [signed|unsigned] integer to float/double expansion
+
+ // Get the stack frame index of a 8 byte buffer.
+ SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);
+
+ // word offset constant for Hi/Lo address computation
+ SDValue WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy());
+ // set up Hi and Lo (into buffer) address based on endian
+ SDValue Hi = StackSlot;
+ SDValue Lo = DAG.getNode(ISD::ADD, dl,
+ TLI.getPointerTy(), StackSlot, WordOff);
+ if (TLI.isLittleEndian())
+ std::swap(Hi, Lo);
+
+ // if signed map to unsigned space
+ SDValue Op0Mapped;
+ if (isSigned) {
+ // constant used to invert sign bit (signed to unsigned mapping)
+ SDValue SignBit = DAG.getConstant(0x80000000u, MVT::i32);
+ Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit);
+ } else {
+ Op0Mapped = Op0;
+ }
+ // store the lo of the constructed double - based on integer input
+ SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl,
+ Op0Mapped, Lo, NULL, 0);
+ // initial hi portion of constructed double
+ SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
+ // store the hi of the constructed double - biased exponent
+ SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0);
+ // load the constructed double
+ SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0);
+ // FP constant to bias correct the final result
+ SDValue Bias = DAG.getConstantFP(isSigned ?
+ BitsToDouble(0x4330000080000000ULL) :
+ BitsToDouble(0x4330000000000000ULL),
+ MVT::f64);
+ // subtract the bias
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
+ // final result
+ SDValue Result;
+ // handle final rounding
+ if (DestVT == MVT::f64) {
+ // do nothing
+ Result = Sub;
+ } else if (DestVT.bitsLT(MVT::f64)) {
+ Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
+ DAG.getIntPtrConstant(0));
+ } else if (DestVT.bitsGT(MVT::f64)) {
+ Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
+ }
+ return Result;
+ }
+ assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+ SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
+
+ SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()),
+ Op0, DAG.getConstant(0, Op0.getValueType()),
+ ISD::SETLT);
+ SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4);
+ SDValue CstOffset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(),
+ SignSet, Four, Zero);
+
+ // If the sign bit of the integer is set, the large number will be treated
+ // as a negative number. To counteract this, the dynamic code adds an
+ // offset depending on the data type.
+ uint64_t FF;
+ switch (Op0.getValueType().getSimpleVT()) {
+ default: assert(0 && "Unsupported integer type!");
+ case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float)
+ case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float)
+ case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float)
+ case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float)
+ }
+ if (TLI.isLittleEndian()) FF <<= 32;
+ Constant *FudgeFactor = ConstantInt::get(Type::Int64Ty, FF);
+
+ SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset);
+ Alignment = std::min(Alignment, 4u);
+ SDValue FudgeInReg;
+ if (DestVT == MVT::f32)
+ FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, Alignment);
+ else {
+ FudgeInReg =
+ LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
+ DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ MVT::f32, false, Alignment));
+ }
+
+ return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
+}
+
+/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a
+/// *INT_TO_FP operation of the specified operand when the target requests that
+/// we promote it. At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
+/// operation that takes a larger input.
+SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp,
+ MVT DestVT,
+ bool isSigned,
+ DebugLoc dl) {
+ // First step, figure out the appropriate *INT_TO_FP operation to use.
+ MVT NewInTy = LegalOp.getValueType();
+
+ unsigned OpToUse = 0;
+
+ // Scan for the appropriate larger type to use.
+ while (1) {
+ NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT()+1);
+ assert(NewInTy.isInteger() && "Ran out of possibilities!");
+
+ // If the target supports SINT_TO_FP of this type, use it.
+ if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) {
+ OpToUse = ISD::SINT_TO_FP;
+ break;
+ }
+ if (isSigned) continue;
+
+ // If the target supports UINT_TO_FP of this type, use it.
+ if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) {
+ OpToUse = ISD::UINT_TO_FP;
+ break;
+ }
+
+ // Otherwise, try a larger type.
+ }
+
+ // Okay, we found the operation and type to use. Zero extend our input to the
+ // desired type then run the operation on it.
+ return DAG.getNode(OpToUse, dl, DestVT,
+ DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ dl, NewInTy, LegalOp));
+}
+
+/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a
+/// FP_TO_*INT operation of the specified operand when the target requests that
+/// we promote it. At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
+/// operation that returns a larger result.
+SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
+ MVT DestVT,
+ bool isSigned,
+ DebugLoc dl) {
+ // First step, figure out the appropriate FP_TO*INT operation to use.
+ MVT NewOutTy = DestVT;
+
+ unsigned OpToUse = 0;
+
+ // Scan for the appropriate larger type to use.
+ while (1) {
+ NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT()+1);
+ assert(NewOutTy.isInteger() && "Ran out of possibilities!");
+
+ if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) {
+ OpToUse = ISD::FP_TO_SINT;
+ break;
+ }
+
+ if (TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) {
+ OpToUse = ISD::FP_TO_UINT;
+ break;
+ }
+
+ // Otherwise, try a larger type.
+ }
+
+
+ // Okay, we found the operation and type to use.
+ SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);
+
+ // Truncate the result of the extended FP_TO_*INT operation to the desired
+ // size.
+ return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
+}
+
+/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation.
+///
+SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
+ MVT VT = Op.getValueType();
+ MVT SHVT = TLI.getShiftAmountTy();
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
+ switch (VT.getSimpleVT()) {
+ default: assert(0 && "Unhandled Expand type in BSWAP!"); abort();
+ case MVT::i16:
+ Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ case MVT::i32:
+ Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(0xFF0000, VT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, VT));
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+ Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+ return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+ case MVT::i64:
+ Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, SHVT));
+ Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, SHVT));
+ Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, SHVT));
+ Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, DAG.getConstant(255ULL<<48, VT));
+ Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, DAG.getConstant(255ULL<<40, VT));
+ Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, DAG.getConstant(255ULL<<32, VT));
+ Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, VT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(255ULL<<8 , VT));
+ Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
+ Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+ Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+ Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+ return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
+ }
+}
+
+/// ExpandBitCount - Expand the specified bitcount instruction into operations.
+///
+SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
+ DebugLoc dl) {
+ switch (Opc) {
+ default: assert(0 && "Cannot expand this yet!");
+ case ISD::CTPOP: {
+ static const uint64_t mask[6] = {
+ 0x5555555555555555ULL, 0x3333333333333333ULL,
+ 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+ 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
+ };
+ MVT VT = Op.getValueType();
+ MVT ShVT = TLI.getShiftAmountTy();
+ unsigned len = VT.getSizeInBits();
+ for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+ //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8])
+ unsigned EltSize = VT.isVector() ?
+ VT.getVectorElementType().getSizeInBits() : len;
+ SDValue Tmp2 = DAG.getConstant(APInt(EltSize, mask[i]), VT);
+ SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
+ Op = DAG.getNode(ISD::ADD, dl, VT,
+ DAG.getNode(ISD::AND, dl, VT, Op, Tmp2),
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3),
+ Tmp2));
+ }
+ return Op;
+ }
+ case ISD::CTLZ: {
+ // for now, we do this:
+ // x = x | (x >> 1);
+ // x = x | (x >> 2);
+ // ...
+ // x = x | (x >>16);
+ // x = x | (x >>32); // for 64-bit input
+ // return popcount(~x);
+ //
+ // but see also: http://www.hackersdelight.org/HDcode/nlz.cc
+ MVT VT = Op.getValueType();
+ MVT ShVT = TLI.getShiftAmountTy();
+ unsigned len = VT.getSizeInBits();
+ for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+ SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
+ Op = DAG.getNode(ISD::OR, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3));
+ }
+ Op = DAG.getNOT(dl, Op, VT);
+ return DAG.getNode(ISD::CTPOP, dl, VT, Op);
+ }
+ case ISD::CTTZ: {
+ // for now, we use: { return popcount(~x & (x - 1)); }
+ // unless the target has ctlz but not ctpop, in which case we use:
+ // { return 32 - nlz(~x & (x-1)); }
+ // see also http://www.hackersdelight.org/HDcode/ntz.cc
+ MVT VT = Op.getValueType();
+ SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNOT(dl, Op, VT),
+ DAG.getNode(ISD::SUB, dl, VT, Op,
+ DAG.getConstant(1, VT)));
+ // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
+ if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::CTLZ, VT))
+ return DAG.getNode(ISD::SUB, dl, VT,
+ DAG.getConstant(VT.getSizeInBits(), VT),
+ DAG.getNode(ISD::CTLZ, dl, VT, Tmp3));
+ return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3);
+ }
+ }
+}
+
+void SelectionDAGLegalize::ExpandNode(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+ switch (Node->getOpcode()) {
+ case ISD::CTPOP:
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::BSWAP:
+ Results.push_back(ExpandBSWAP(Node->getOperand(0), dl));
+ break;
+ case ISD::FRAMEADDR:
+ case ISD::RETURNADDR:
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ Results.push_back(DAG.getConstant(0, Node->getValueType(0)));
+ break;
+ case ISD::FLT_ROUNDS_:
+ Results.push_back(DAG.getConstant(1, Node->getValueType(0)));
+ break;
+ case ISD::EH_RETURN:
+ case ISD::DECLARE:
+ case ISD::DBG_LABEL:
+ case ISD::EH_LABEL:
+ case ISD::PREFETCH:
+ case ISD::MEMBARRIER:
+ case ISD::VAEND:
+ Results.push_back(Node->getOperand(0));
+ break;
+ case ISD::DBG_STOPPOINT:
+ Results.push_back(ExpandDBG_STOPPOINT(Node));
+ break;
+ case ISD::DYNAMIC_STACKALLOC:
+ ExpandDYNAMIC_STACKALLOC(Node, Results);
+ break;
+ case ISD::MERGE_VALUES:
+ for (unsigned i = 0; i < Node->getNumValues(); i++)
+ Results.push_back(Node->getOperand(i));
+ break;
+ case ISD::UNDEF: {
+ MVT VT = Node->getValueType(0);
+ if (VT.isInteger())
+ Results.push_back(DAG.getConstant(0, VT));
+ else if (VT.isFloatingPoint())
+ Results.push_back(DAG.getConstantFP(0, VT));
+ else
+ assert(0 && "Unknown value type!");
+ break;
+ }
+ case ISD::TRAP: {
+ // If this operation is not supported, lower it to 'abort()' call
+ TargetLowering::ArgListTy Args;
+ std::pair<SDValue, SDValue> CallResult =
+ TLI.LowerCallTo(Node->getOperand(0), Type::VoidTy,
+ false, false, false, false, CallingConv::C, false,
+ DAG.getExternalSymbol("abort", TLI.getPointerTy()),
+ Args, DAG, dl);
+ Results.push_back(CallResult.second);
+ break;
+ }
+ case ISD::FP_ROUND:
+ case ISD::BIT_CONVERT:
+ Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
+ Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FP_EXTEND:
+ Tmp1 = EmitStackConvert(Node->getOperand(0),
+ Node->getOperand(0).getValueType(),
+ Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::SIGN_EXTEND_INREG: {
+ // NOTE: we could fall back on load/store here too for targets without
+ // SAR. However, it is doubtful that any exist.
+ MVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ unsigned BitsDiff = Node->getValueType(0).getSizeInBits() -
+ ExtraVT.getSizeInBits();
+ SDValue ShiftCst = DAG.getConstant(BitsDiff, TLI.getShiftAmountTy());
+ Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0),
+ Node->getOperand(0), ShiftCst);
+ Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::FP_ROUND_INREG: {
+ // The only way we can lower this is to turn it into a TRUNCSTORE,
+ // EXTLOAD pair, targetting a temporary location (a stack slot).
+
+ // NOTE: there is a choice here between constantly creating new stack
+ // slots and always reusing the same one. We currently always create
+ // new ones, as reuse may inhibit scheduling.
+ MVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT,
+ Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP,
+ Node->getOperand(0), Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FP_TO_UINT: {
+ SDValue True, False;
+ MVT VT = Node->getOperand(0).getValueType();
+ MVT NVT = Node->getValueType(0);
+ const uint64_t zero[] = {0, 0};
+ APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero));
+ APInt x = APInt::getSignBit(NVT.getSizeInBits());
+ (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
+ Tmp1 = DAG.getConstantFP(apf, VT);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
+ Node->getOperand(0),
+ Tmp1, ISD::SETLT);
+ True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0));
+ False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT,
+ DAG.getNode(ISD::FSUB, dl, VT,
+ Node->getOperand(0), Tmp1));
+ False = DAG.getNode(ISD::XOR, dl, NVT, False,
+ DAG.getConstant(x, NVT));
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, True, False);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::VAARG: {
+ const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ MVT VT = Node->getValueType(0);
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0);
+ // Increment the pointer, VAList, to the next vaarg
+ Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
+ DAG.getConstant(TLI.getTargetData()->
+ getTypeAllocSize(VT.getTypeForMVT()),
+ TLI.getPointerTy()));
+ // Store the incremented VAList to the legalized pointer
+ Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0);
+ // Load the actual argument out of the pointer VAList
+ Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0));
+ Results.push_back(Results[0].getValue(1));
+ break;
+ }
+ case ISD::VACOPY: {
+ // This defaults to loading a pointer from the input and storing it to the
+ // output, returning the chain.
+ const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
+ const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
+ Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
+ Node->getOperand(2), VS, 0);
+ Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT:
+ if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
+ // This must be an access of the only element. Return it.
+ Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0),
+ Node->getOperand(0));
+ else
+ Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0));
+ Results.push_back(Tmp1);
+ break;
+ case ISD::EXTRACT_SUBVECTOR:
+ Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));
+ break;
+ case ISD::CONCAT_VECTORS: {
+ // Use extract/insert/build vector for now. We might try to be
+ // more clever later.
+ SmallVector<SDValue, 8> Ops;
+ unsigned NumOperands = Node->getNumOperands();
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue SubOp = Node->getOperand(i);
+ MVT VVT = SubOp.getNode()->getValueType(0);
+ MVT EltVT = VVT.getVectorElementType();
+ unsigned NumSubElem = VVT.getVectorNumElements();
+ for (unsigned j=0; j < NumSubElem; ++j) {
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
+ DAG.getIntPtrConstant(j)));
+ }
+ }
+ Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
+ &Ops[0], Ops.size());
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SCALAR_TO_VECTOR:
+ Results.push_back(ExpandSCALAR_TO_VECTOR(Node));
+ break;
+ case ISD::INSERT_VECTOR_ELT:
+ Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0),
+ Node->getOperand(1),
+ Node->getOperand(2), dl));
+ break;
+ case ISD::VECTOR_SHUFFLE: {
+ SmallVector<int, 8> Mask;
+ cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+
+ MVT VT = Node->getValueType(0);
+ MVT EltVT = VT.getVectorElementType();
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (Mask[i] < 0) {
+ Ops.push_back(DAG.getUNDEF(EltVT));
+ continue;
+ }
+ unsigned Idx = Mask[i];
+ if (Idx < NumElems)
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ Node->getOperand(0),
+ DAG.getIntPtrConstant(Idx)));
+ else
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ Node->getOperand(1),
+ DAG.getIntPtrConstant(Idx - NumElems)));
+ }
+ Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::EXTRACT_ELEMENT: {
+ MVT OpTy = Node->getOperand(0).getValueType();
+ if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
+ // 1 -> Hi
+ Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
+ DAG.getConstant(OpTy.getSizeInBits()/2,
+ TLI.getShiftAmountTy()));
+ Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1);
+ } else {
+ // 0 -> Lo
+ Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0),
+ Node->getOperand(0));
+ }
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::STACKSAVE:
+ // Expand to CopyFromReg if the target set
+ // StackPointerRegisterToSaveRestore.
+ if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP,
+ Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(1));
+ } else {
+ Results.push_back(DAG.getUNDEF(Node->getValueType(0)));
+ Results.push_back(Node->getOperand(0));
+ }
+ break;
+ case ISD::STACKRESTORE:
+ // Expand to CopyToReg if the target set
+ // StackPointerRegisterToSaveRestore.
+ if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP,
+ Node->getOperand(1)));
+ } else {
+ Results.push_back(Node->getOperand(0));
+ }
+ break;
+ case ISD::FCOPYSIGN:
+ Results.push_back(ExpandFCOPYSIGN(Node));
+ break;
+ case ISD::FNEG:
+ // Expand Y = FNEG(X) -> Y = SUB -0.0, X
+ Tmp1 = DAG.getConstantFP(-0.0, Node->getValueType(0));
+ Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,
+ Node->getOperand(0));
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FABS: {
+ // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X).
+ MVT VT = Node->getValueType(0);
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = DAG.getConstantFP(0.0, VT);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()),
+ Tmp1, Tmp2, ISD::SETUGT);
+ Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1);
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, VT, Tmp2, Tmp1, Tmp3);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::FSQRT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128));
+ break;
+ case ISD::FSIN:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_PPCF128));
+ break;
+ case ISD::FCOS:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_PPCF128));
+ break;
+ case ISD::FLOG:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_PPCF128));
+ break;
+ case ISD::FLOG2:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128));
+ break;
+ case ISD::FLOG10:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_PPCF128));
+ break;
+ case ISD::FEXP:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_PPCF128));
+ break;
+ case ISD::FEXP2:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128));
+ break;
+ case ISD::FTRUNC:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128));
+ break;
+ case ISD::FFLOOR:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_PPCF128));
+ break;
+ case ISD::FCEIL:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128));
+ break;
+ case ISD::FRINT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_PPCF128));
+ break;
+ case ISD::FNEARBYINT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_PPCF128));
+ break;
+ case ISD::FPOWI:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_PPCF128));
+ break;
+ case ISD::FPOW:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_PPCF128));
+ break;
+ case ISD::FDIV:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
+ RTLIB::DIV_F80, RTLIB::DIV_PPCF128));
+ break;
+ case ISD::FREM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_PPCF128));
+ break;
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
+ // Check to see if this FP immediate is already legal.
+ bool isLegal = false;
+ for (TargetLowering::legal_fpimm_iterator I = TLI.legal_fpimm_begin(),
+ E = TLI.legal_fpimm_end(); I != E; ++I) {
+ if (CFP->isExactlyValue(*I)) {
+ isLegal = true;
+ break;
+ }
+ }
+ // If this is a legal constant, turn it into a TargetConstantFP node.
+ if (isLegal)
+ Results.push_back(SDValue(Node, 0));
+ else
+ Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI));
+ break;
+ }
+ case ISD::EHSELECTION: {
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ assert(Reg && "Can't expand to unknown register!");
+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(1), dl, Reg,
+ Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(1));
+ break;
+ }
+ case ISD::EXCEPTIONADDR: {
+ unsigned Reg = TLI.getExceptionAddressRegister();
+ assert(Reg && "Can't expand to unknown register!");
+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg,
+ Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(1));
+ break;
+ }
+ case ISD::SUB: {
+ MVT VT = Node->getValueType(0);
+ assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
+ "Don't know how to expand this subtraction!");
+ Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1),
+ DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT));
+ Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp2, DAG.getConstant(1, VT));
+ Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1));
+ break;
+ }
+ case ISD::UREM:
+ case ISD::SREM: {
+ MVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ bool isSigned = Node->getOpcode() == ISD::SREM;
+ unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ Tmp2 = Node->getOperand(0);
+ Tmp3 = Node->getOperand(1);
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
+ Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
+ } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
+ // X % Y -> X-X/Y*Y
+ Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
+ Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
+ } else if (isSigned) {
+ Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SREM_I16, RTLIB::SREM_I32,
+ RTLIB::SREM_I64, RTLIB::SREM_I128);
+ } else {
+ Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UREM_I16, RTLIB::UREM_I32,
+ RTLIB::UREM_I64, RTLIB::UREM_I128);
+ }
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::UDIV:
+ case ISD::SDIV: {
+ bool isSigned = Node->getOpcode() == ISD::SDIV;
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ MVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT))
+ Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1));
+ else if (isSigned)
+ Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+ RTLIB::SDIV_I64, RTLIB::SDIV_I128);
+ else
+ Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+ RTLIB::UDIV_I64, RTLIB::UDIV_I128);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::MULHU:
+ case ISD::MULHS: {
+ unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI :
+ ISD::SMUL_LOHI;
+ MVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) &&
+ "If this wasn't legal, it shouldn't have been created!");
+ Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1));
+ Results.push_back(Tmp1.getValue(1));
+ break;
+ }
+ case ISD::MUL: {
+ MVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ // See if multiply or divide can be lowered using two-result operations.
+ // We just need the low half of the multiply; try both the signed
+ // and unsigned forms. If the target supports both SMUL_LOHI and
+ // UMUL_LOHI, form a preference by checking which forms of plain
+ // MULH it supports.
+ bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, VT);
+ bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, VT);
+ bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, VT);
+ bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, VT);
+ unsigned OpToUse = 0;
+ if (HasSMUL_LOHI && !HasMULHS) {
+ OpToUse = ISD::SMUL_LOHI;
+ } else if (HasUMUL_LOHI && !HasMULHU) {
+ OpToUse = ISD::UMUL_LOHI;
+ } else if (HasSMUL_LOHI) {
+ OpToUse = ISD::SMUL_LOHI;
+ } else if (HasUMUL_LOHI) {
+ OpToUse = ISD::UMUL_LOHI;
+ }
+ if (OpToUse) {
+ Results.push_back(DAG.getNode(OpToUse, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1)));
+ break;
+ }
+ Tmp1 = ExpandIntLibCall(Node, false, RTLIB::MUL_I16, RTLIB::MUL_I32,
+ RTLIB::MUL_I64, RTLIB::MUL_I128);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SADDO:
+ case ISD::SSUBO: {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ Results.push_back(Sum);
+ MVT OType = Node->getValueType(1);
+
+ SDValue Zero = DAG.getConstant(0, LHS.getValueType());
+
+ // LHSSign -> LHS >= 0
+ // RHSSign -> RHS >= 0
+ // SumSign -> Sum >= 0
+ //
+ // Add:
+ // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Sub:
+ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ //
+ SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+ SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+ SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+ Node->getOpcode() == ISD::SADDO ?
+ ISD::SETEQ : ISD::SETNE);
+
+ SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
+ SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+ SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+ Results.push_back(Cmp);
+ break;
+ }
+ case ISD::UADDO:
+ case ISD::USUBO: {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ Results.push_back(Sum);
+ Results.push_back(DAG.getSetCC(dl, Node->getValueType(1), Sum, LHS,
+ Node->getOpcode () == ISD::UADDO ?
+ ISD::SETULT : ISD::SETUGT));
+ break;
+ }
+ case ISD::BUILD_PAIR: {
+ MVT PairTy = Node->getValueType(0);
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1));
+ Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2,
+ DAG.getConstant(PairTy.getSizeInBits()/2,
+ TLI.getShiftAmountTy()));
+ Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2));
+ break;
+ }
+ case ISD::SELECT:
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ Tmp3 = Node->getOperand(2);
+ if (Tmp1.getOpcode() == ISD::SETCC) {
+ Tmp1 = DAG.getSelectCC(dl, Tmp1.getOperand(0), Tmp1.getOperand(1),
+ Tmp2, Tmp3,
+ cast<CondCodeSDNode>(Tmp1.getOperand(2))->get());
+ } else {
+ Tmp1 = DAG.getSelectCC(dl, Tmp1,
+ DAG.getConstant(0, Tmp1.getValueType()),
+ Tmp2, Tmp3, ISD::SETNE);
+ }
+ Results.push_back(Tmp1);
+ break;
+ case ISD::BR_JT: {
+ SDValue Chain = Node->getOperand(0);
+ SDValue Table = Node->getOperand(1);
+ SDValue Index = Node->getOperand(2);
+
+ MVT PTy = TLI.getPointerTy();
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned EntrySize = MF.getJumpTableInfo()->getEntrySize();
+ Index= DAG.getNode(ISD::MUL, dl, PTy,
+ Index, DAG.getConstant(EntrySize, PTy));
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
+
+ MVT MemVT = MVT::getIntegerVT(EntrySize * 8);
+ SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+ PseudoSourceValue::getJumpTable(), 0, MemVT);
+ Addr = LD;
+ if (TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ // For PIC, the sequence is:
+ // BRIND(load(Jumptable + index) + RelocBase)
+ // RelocBase can be JumpTable, GOT or some sort of global base.
+ Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr,
+ TLI.getPICJumpTableRelocBase(Table, DAG));
+ }
+ Tmp1 = DAG.getNode(ISD::BRIND, dl, MVT::Other, LD.getValue(1), Addr);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::BRCOND:
+ // Expand brcond's setcc into its constituent parts and create a BR_CC
+ // Node.
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ if (Tmp2.getOpcode() == ISD::SETCC) {
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other,
+ Tmp1, Tmp2.getOperand(2),
+ Tmp2.getOperand(0), Tmp2.getOperand(1),
+ Node->getOperand(2));
+ } else {
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,
+ DAG.getCondCode(ISD::SETNE), Tmp2,
+ DAG.getConstant(0, Tmp2.getValueType()),
+ Node->getOperand(2));
+ }
+ Results.push_back(Tmp1);
+ break;
+ case ISD::SETCC: {
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ Tmp3 = Node->getOperand(2);
+ LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl);
+
+ // If we expanded the SETCC into an AND/OR, return the new node
+ if (Tmp2.getNode() == 0) {
+ Results.push_back(Tmp1);
+ break;
+ }
+
+ // Otherwise, SETCC for the given comparison type must be completely
+ // illegal; expand it into a SELECT_CC.
+ MVT VT = Node->getValueType(0);
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2,
+ DAG.getConstant(1, VT), DAG.getConstant(0, VT), Tmp3);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SELECT_CC: {
+ Tmp1 = Node->getOperand(0); // LHS
+ Tmp2 = Node->getOperand(1); // RHS
+ Tmp3 = Node->getOperand(2); // True
+ Tmp4 = Node->getOperand(3); // False
+ SDValue CC = Node->getOperand(4);
+
+ LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp1.getValueType()),
+ Tmp1, Tmp2, CC, dl);
+
+ assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!");
+ Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
+ CC = DAG.getCondCode(ISD::SETNE);
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
+ Tmp3, Tmp4, CC);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::BR_CC: {
+ Tmp1 = Node->getOperand(0); // Chain
+ Tmp2 = Node->getOperand(2); // LHS
+ Tmp3 = Node->getOperand(3); // RHS
+ Tmp4 = Node->getOperand(1); // CC
+
+ LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()),
+ Tmp2, Tmp3, Tmp4, dl);
+ LastCALLSEQ_END = DAG.getEntryNode();
+
+ assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
+ Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
+ Tmp4 = DAG.getCondCode(ISD::SETNE);
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2,
+ Tmp3, Node->getOperand(4));
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::GLOBAL_OFFSET_TABLE:
+ case ISD::GlobalAddress:
+ case ISD::GlobalTLSAddress:
+ case ISD::ExternalSymbol:
+ case ISD::ConstantPool:
+ case ISD::JumpTable:
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ // FIXME: Custom lowering for these operations shouldn't return null!
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ Results.push_back(SDValue(Node, i));
+ break;
+ }
+}
+void SelectionDAGLegalize::PromoteNode(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ MVT OVT = Node->getValueType(0);
+ if (Node->getOpcode() == ISD::UINT_TO_FP ||
+ Node->getOpcode() == ISD::SINT_TO_FP) {
+ OVT = Node->getOperand(0).getValueType();
+ }
+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Tmp1, Tmp2, Tmp3;
+ switch (Node->getOpcode()) {
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ // Zero extend the argument.
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+ // Perform the larger operation.
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Tmp1);
+ if (Node->getOpcode() == ISD::CTTZ) {
+ //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()),
+ Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT),
+ ISD::SETEQ);
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2,
+ DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1);
+ } else if (Node->getOpcode() == ISD::CTLZ) {
+ // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
+ Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
+ DAG.getConstant(NVT.getSizeInBits() -
+ OVT.getSizeInBits(), NVT));
+ }
+ Results.push_back(Tmp1);
+ break;
+ case ISD::BSWAP: {
+ unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Tmp1);
+ Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1);
+ Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1,
+ DAG.getConstant(DiffBits, TLI.getShiftAmountTy()));
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT:
+ Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0),
+ Node->getOpcode() == ISD::FP_TO_SINT, dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0),
+ Node->getOpcode() == ISD::SINT_TO_FP, dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ assert(OVT.isVector() && "Don't know how to promote scalar logic ops");
+ // Bit convert each of the values to the new type.
+ Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1));
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+ // Bit convert the result back the original type.
+ Results.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1));
+ break;
+ case ISD::SELECT:
+ unsigned ExtOp, TruncOp;
+ if (Node->getValueType(0).isVector()) {
+ ExtOp = ISD::BIT_CONVERT;
+ TruncOp = ISD::BIT_CONVERT;
+ } else if (Node->getValueType(0).isInteger()) {
+ ExtOp = ISD::ANY_EXTEND;
+ TruncOp = ISD::TRUNCATE;
+ } else {
+ ExtOp = ISD::FP_EXTEND;
+ TruncOp = ISD::FP_ROUND;
+ }
+ Tmp1 = Node->getOperand(0);
+ // Promote each of the values to the new type.
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
+ // Perform the larger operation, then round down.
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp1, Tmp2, Tmp3);
+ if (TruncOp != ISD::FP_ROUND)
+ Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1);
+ else
+ Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1,
+ DAG.getIntPtrConstant(0));
+ Results.push_back(Tmp1);
+ break;
+ case ISD::VECTOR_SHUFFLE: {
+ SmallVector<int, 8> Mask;
+ cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+
+ // Cast the two input vectors.
+ Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1));
+
+ // Convert the shuffle mask to the right # elements.
+ Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask);
+ Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SETCC: {
+ // First step, figure out the appropriate operation to use.
+ // Allow SETCC to not be supported for all legal data types
+ // Mostly this targets FP
+ MVT NewInTy = Node->getOperand(0).getValueType();
+ MVT OldVT = NewInTy; OldVT = OldVT;
+
+ // Scan for the appropriate larger type to use.
+ while (1) {
+ NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT()+1);
+
+ assert(NewInTy.isInteger() == OldVT.isInteger() &&
+ "Fell off of the edge of the integer world");
+ assert(NewInTy.isFloatingPoint() == OldVT.isFloatingPoint() &&
+ "Fell off of the edge of the floating point world");
+
+ // If the target supports SETCC of this type, use it.
+ if (TLI.isOperationLegalOrCustom(ISD::SETCC, NewInTy))
+ break;
+ }
+ if (NewInTy.isInteger())
+ assert(0 && "Cannot promote Legal Integer SETCC yet");
+ else {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NewInTy, Tmp1);
+ Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NewInTy, Tmp2);
+ }
+ Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
+ Tmp1, Tmp2, Node->getOperand(2)));
+ break;
+ }
+ }
+}
+
+// SelectionDAG::Legalize - This is the entry point for the file.
+//
+void SelectionDAG::Legalize(bool TypesNeedLegalizing,
+ CodeGenOpt::Level OptLevel) {
+ /// run - This is the main entry point to this class.
+ ///
+ SelectionDAGLegalize(*this, OptLevel).LegalizeDAG();
+}
+
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
new file mode 100644
index 0000000..c3c1bea
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -0,0 +1,1388 @@
+//===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements float type expansion and softening for LegalizeTypes.
+// Softening is the act of turning a computation in an illegal floating point
+// type into a computation in an integer type of the same size; also known as
+// "soft float". For example, turning f32 arithmetic into operations using i32.
+// The resulting integer value is the same as what you would get by performing
+// the floating point operation and bitcasting the result to the integer type.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type. For example,
+// implementing ppcf128 arithmetic in two f64 registers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+using namespace llvm;
+
+/// GetFPLibCall - Return the right libcall for the given floating point type.
+static RTLIB::Libcall GetFPLibCall(MVT VT,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_PPCF128) {
+ return
+ VT == MVT::f32 ? Call_F32 :
+ VT == MVT::f64 ? Call_F64 :
+ VT == MVT::f80 ? Call_F80 :
+ VT == MVT::ppcf128 ? Call_PPCF128 :
+ RTLIB::UNKNOWN_LIBCALL;
+}
+
+//===----------------------------------------------------------------------===//
+// Result Float to Integer Conversion.
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
+ DEBUG(cerr << "Soften float result " << ResNo << ": "; N->dump(&DAG);
+ cerr << "\n");
+ SDValue R = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "SoftenFloatResult #" << ResNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to soften the result of this operator!");
+ abort();
+
+ case ISD::BIT_CONVERT: R = SoftenFloatRes_BIT_CONVERT(N); break;
+ case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
+ case ISD::ConstantFP:
+ R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N));
+ break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::FABS: R = SoftenFloatRes_FABS(N); break;
+ case ISD::FADD: R = SoftenFloatRes_FADD(N); break;
+ case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break;
+ case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break;
+ case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break;
+ case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break;
+ case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break;
+ case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break;
+ case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break;
+ case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break;
+ case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break;
+ case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break;
+ case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break;
+ case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break;
+ case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;
+ case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;
+ case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
+ case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break;
+ case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break;
+ case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
+ case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break;
+ case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break;
+ case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;
+ case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
+ case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
+ case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break;
+ case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break;
+ case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break;
+ case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break;
+ }
+
+ // If R is null, the sub-method took care of registering the result.
+ if (R.getNode())
+ SetSoftenedFloat(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) {
+ return BitConvertToInteger(N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
+ // Convert the inputs to integers, and build a new pair out of them.
+ return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(),
+ TLI.getTypeToTransformTo(N->getValueType(0)),
+ BitConvertToInteger(N->getOperand(0)),
+ BitConvertToInteger(N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) {
+ return DAG.getConstant(N->getValueAPF().bitcastToAPInt(),
+ TLI.getTypeToTransformTo(N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+ NewOp.getValueType().getVectorElementType(),
+ NewOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ unsigned Size = NVT.getSizeInBits();
+
+ // Mask = ~(1 << (Size-1))
+ SDValue Mask = DAG.getConstant(APInt::getAllOnesValue(Size).clear(Size-1),
+ NVT);
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::ADD_F32,
+ RTLIB::ADD_F64,
+ RTLIB::ADD_F80,
+ RTLIB::ADD_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::CEIL_F32,
+ RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80,
+ RTLIB::CEIL_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(0));
+ SDValue RHS = BitConvertToInteger(N->getOperand(1));
+ DebugLoc dl = N->getDebugLoc();
+
+ MVT LVT = LHS.getValueType();
+ MVT RVT = RHS.getValueType();
+
+ unsigned LSize = LVT.getSizeInBits();
+ unsigned RSize = RVT.getSizeInBits();
+
+ // First get the sign bit of second operand.
+ SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, RVT),
+ DAG.getConstant(RSize - 1,
+ TLI.getShiftAmountTy()));
+ SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit);
+
+ // Shift right or sign-extend it if the two operands have different types.
+ int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits();
+ if (SizeDiff > 0) {
+ SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit,
+ DAG.getConstant(SizeDiff, TLI.getShiftAmountTy()));
+ SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit);
+ } else if (SizeDiff < 0) {
+ SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit);
+ SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit,
+ DAG.getConstant(-SizeDiff, TLI.getShiftAmountTy()));
+ }
+
+ // Clear the sign bit of the first operand.
+ SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, LVT),
+ DAG.getConstant(LSize - 1,
+ TLI.getShiftAmountTy()));
+ Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, LVT));
+ LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask);
+
+ // Or the value with the sign bit.
+ return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::COS_F32,
+ RTLIB::COS_F64,
+ RTLIB::COS_F80,
+ RTLIB::COS_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP_F32,
+ RTLIB::EXP_F64,
+ RTLIB::EXP_F80,
+ RTLIB::EXP_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP2_F32,
+ RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80,
+ RTLIB::EXP2_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::FLOOR_F32,
+ RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80,
+ RTLIB::FLOOR_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG_F32,
+ RTLIB::LOG_F64,
+ RTLIB::LOG_F80,
+ RTLIB::LOG_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG2_F32,
+ RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80,
+ RTLIB::LOG2_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG10_F32,
+ RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80,
+ RTLIB::LOG10_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::MUL_F32,
+ RTLIB::MUL_F64,
+ RTLIB::MUL_F80,
+ RTLIB::MUL_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ // Expand Y = FNEG(X) -> Y = SUB -0.0, X
+ SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)),
+ GetSoftenedFloat(N->getOperand(0)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
+ return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
+ return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::POW_F32,
+ RTLIB::POW_F64,
+ RTLIB::POW_F80,
+ RTLIB::POW_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
+ assert(N->getOperand(1).getValueType() == MVT::i32 &&
+ "Unsupported power type!");
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::POWI_F32,
+ RTLIB::POWI_F64,
+ RTLIB::POWI_F80,
+ RTLIB::POWI_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::REM_F32,
+ RTLIB::REM_F64,
+ RTLIB::REM_F80,
+ RTLIB::REM_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::RINT_F32,
+ RTLIB::RINT_F64,
+ RTLIB::RINT_F80,
+ RTLIB::RINT_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::SIN_F32,
+ RTLIB::SIN_F64,
+ RTLIB::SIN_F80,
+ RTLIB::SIN_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::SQRT_F32,
+ RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80,
+ RTLIB::SQRT_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::TRUNC_F32,
+ RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80,
+ RTLIB::TRUNC_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
+ LoadSDNode *L = cast<LoadSDNode>(N);
+ MVT VT = N->getValueType(0);
+ MVT NVT = TLI.getTypeToTransformTo(VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue NewL;
+ if (L->getExtensionType() == ISD::NON_EXTLOAD) {
+ NewL = DAG.getLoad(L->getAddressingMode(), dl, L->getExtensionType(),
+ NVT, L->getChain(), L->getBasePtr(), L->getOffset(),
+ L->getSrcValue(), L->getSrcValueOffset(), NVT,
+ L->isVolatile(), L->getAlignment());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ return NewL;
+ }
+
+ // Do a non-extending load followed by FP_EXTEND.
+ NewL = DAG.getLoad(L->getAddressingMode(), dl, ISD::NON_EXTLOAD,
+ L->getMemoryVT(), L->getChain(),
+ L->getBasePtr(), L->getOffset(),
+ L->getSrcValue(), L->getSrcValueOffset(),
+ L->getMemoryVT(),
+ L->isVolatile(), L->getAlignment());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(1));
+ SDValue RHS = GetSoftenedFloat(N->getOperand(2));
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),LHS,RHS);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(2));
+ SDValue RHS = GetSoftenedFloat(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(TLI.getTypeToTransformTo(N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
+ SDValue Chain = N->getOperand(0); // Get the chain.
+ SDValue Ptr = N->getOperand(1); // Get the pointer.
+ MVT VT = N->getValueType(0);
+ MVT NVT = TLI.getTypeToTransformTo(VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue NewVAARG;
+ NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1));
+ return NewVAARG;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
+ bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
+ MVT SVT = N->getOperand(0).getValueType();
+ MVT RVT = N->getValueType(0);
+ MVT NVT = MVT();
+ DebugLoc dl = N->getDebugLoc();
+
+ // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to
+ // a larger type, eg: i8 -> fp. Even if it is legal, no libcall may exactly
+ // match. Look for an appropriate libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE;
+ t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; ++t) {
+ NVT = (MVT::SimpleValueType)t;
+ // The source needs to big enough to hold the operand.
+ if (NVT.bitsGE(SVT))
+ LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT):RTLIB::getUINTTOFP (NVT, RVT);
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+ // Sign/zero extend the argument if the libcall takes a larger type.
+ SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ NVT, N->getOperand(0));
+ return MakeLibCall(LC, TLI.getTypeToTransformTo(RVT), &Op, 1, false, dl);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Float to Integer Conversion..
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(cerr << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
+ cerr << "\n");
+ SDValue Res = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "SoftenFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to soften this operator's operand!");
+ abort();
+
+ case ISD::BIT_CONVERT: Res = SoftenFloatOp_BIT_CONVERT(N); break;
+ case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
+ case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
+ case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break;
+ case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break;
+ case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
+ case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// SoftenSetCCOperands - Soften the operands of a comparison. This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, DebugLoc dl) {
+ SDValue LHSInt = GetSoftenedFloat(NewLHS);
+ SDValue RHSInt = GetSoftenedFloat(NewRHS);
+ MVT VT = NewLHS.getValueType();
+
+ assert((VT == MVT::f32 || VT == MVT::f64) && "Unsupported setcc type!");
+
+ // Expand into one or more soft-fp libcall(s).
+ RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
+ switch (CCCode) {
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
+ break;
+ case ISD::SETNE:
+ case ISD::SETUNE:
+ LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : RTLIB::UNE_F64;
+ break;
+ case ISD::SETGE:
+ case ISD::SETOGE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
+ break;
+ case ISD::SETLT:
+ case ISD::SETOLT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+ break;
+ case ISD::SETLE:
+ case ISD::SETOLE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
+ break;
+ case ISD::SETGT:
+ case ISD::SETOGT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
+ break;
+ case ISD::SETUO:
+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
+ break;
+ case ISD::SETO:
+ LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : RTLIB::O_F64;
+ break;
+ default:
+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
+ switch (CCCode) {
+ case ISD::SETONE:
+ // SETONE = SETOLT | SETOGT
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+ // Fallthrough
+ case ISD::SETUGT:
+ LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
+ break;
+ case ISD::SETUGE:
+ LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
+ break;
+ case ISD::SETULT:
+ LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+ break;
+ case ISD::SETULE:
+ LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
+ break;
+ case ISD::SETUEQ:
+ LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
+ break;
+ default: assert(false && "Do not know how to soften this setcc!");
+ }
+ }
+
+ MVT RetVT = MVT::i32; // FIXME: is this the correct return type?
+ SDValue Ops[2] = { LHSInt, RHSInt };
+ NewLHS = MakeLibCall(LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+ NewRHS = DAG.getConstant(0, RetVT);
+ CCCode = TLI.getCmpLibcallCC(LC1);
+ if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
+ SDValue Tmp = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT),
+ NewLHS, NewRHS, DAG.getCondCode(CCCode));
+ NewLHS = MakeLibCall(LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+ NewLHS = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), NewLHS,
+ NewRHS, DAG.getCondCode(TLI.getCmpLibcallCC(LC2)));
+ NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
+ NewRHS = SDValue();
+ }
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BIT_CONVERT(SDNode *N) {
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0),
+ GetSoftenedFloat(N->getOperand(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
+ MVT SVT = N->getOperand(0).getValueType();
+ MVT RVT = N->getValueType(0);
+
+ RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If SoftenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {
+ MVT RVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
+ MVT RVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If SoftenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If SoftenSetCCOperands returned a scalar, use it.
+ if (NewRHS.getNode() == 0) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only soften the stored value!");
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Val = ST->getValue();
+ DebugLoc dl = N->getDebugLoc();
+
+ if (ST->isTruncatingStore())
+ // Do an FP_ROUND followed by a non-truncating store.
+ Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(),
+ Val, DAG.getIntPtrConstant(0)));
+ else
+ Val = GetSoftenedFloat(Val);
+
+ return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
+ ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->isVolatile(), ST->getAlignment());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Float Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatResult - This method is called when the specified result of the
+/// specified node is found to need expansion. At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
+ DEBUG(cerr << "Expand float result: "; N->dump(&DAG); cerr << "\n");
+ SDValue Lo, Hi;
+ Lo = Hi = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "ExpandFloatResult #" << ResNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to expand the result of this operator!");
+ abort();
+
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+
+ case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break;
+ case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+ case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+ case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break;
+
+ case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break;
+ case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break;
+ case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break;
+ case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break;
+ case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break;
+ case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break;
+ case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break;
+ case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break;
+ case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break;
+ case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break;
+ case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break;
+ case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break;
+ case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break;
+ case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break;
+ case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break;
+ case ISD::FP_EXTEND: ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break;
+ case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break;
+ case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break;
+ case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break;
+ case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break;
+ case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break;
+ case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break;
+ case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
+ case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetExpandedFloat(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ assert(NVT.getSizeInBits() == integerPartWidth &&
+ "Do not know how to expand this float constant!");
+ APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();
+ Lo = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1,
+ &C.getRawData()[1])), NVT);
+ Hi = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1,
+ &C.getRawData()[0])), NVT);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(N->getValueType(0) == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Tmp;
+ GetExpandedFloat(N->getOperand(0), Lo, Tmp);
+ Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp);
+ // Lo = Hi==fabs(Hi) ? Lo : -Lo;
+ Lo = DAG.getNode(ISD::SELECT_CC, dl, Lo.getValueType(), Tmp, Hi, Lo,
+ DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo),
+ DAG.getCondCode(ISD::SETEQ));
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::ADD_F32, RTLIB::ADD_F64,
+ RTLIB::ADD_F80, RTLIB::ADD_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_PPCF128),
+ N->getValueType(0), Ops, 2, false,
+ N->getDebugLoc());
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::FLOOR_F32,RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80,RTLIB::FLOOR_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG10_F32,RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80,RTLIB::LOG10_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::MUL_F32,
+ RTLIB::MUL_F64,
+ RTLIB::MUL_F80,
+ RTLIB::MUL_PPCF128),
+ N->getValueType(0), Ops, 2, false,
+ N->getDebugLoc());
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedFloat(N->getOperand(0), Lo, Hi);
+ Lo = DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0));
+ Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_PPCF128),
+ N->getValueType(0), Ops, 2, false,
+ N->getDebugLoc());
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ if (ISD::isNormalLoad(N)) {
+ ExpandRes_NormalLoad(N, Lo, Hi);
+ return;
+ }
+
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ DebugLoc dl = N->getDebugLoc();
+
+ MVT NVT = TLI.getTypeToTransformTo(LD->getValueType(0));
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+
+ Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
+ LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->getMemoryVT(),
+ LD->isVolatile(), LD->getAlignment());
+
+ // Remember the chain.
+ Chain = Hi.getValue(1);
+
+ // The low part is zero.
+ Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+
+ // Modified the chain - switch anything that used the old chain to use the
+ // new one.
+ ReplaceValueWith(SDValue(LD, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!");
+ MVT VT = N->getValueType(0);
+ MVT NVT = TLI.getTypeToTransformTo(VT);
+ SDValue Src = N->getOperand(0);
+ MVT SrcVT = Src.getValueType();
+ bool isSigned = N->getOpcode() == ISD::SINT_TO_FP;
+ DebugLoc dl = N->getDebugLoc();
+
+ // First do an SINT_TO_FP, whether the original was signed or unsigned.
+ // When promoting partial word types to i32 we must honor the signedness,
+ // though.
+ if (SrcVT.bitsLE(MVT::i32)) {
+ // The integer can be represented exactly in an f64.
+ Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ MVT::i32, Src);
+ Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+ Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src);
+ } else {
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (SrcVT.bitsLE(MVT::i64)) {
+ Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ MVT::i64, Src);
+ LC = RTLIB::SINTTOFP_I64_PPCF128;
+ } else if (SrcVT.bitsLE(MVT::i128)) {
+ Src = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i128, Src);
+ LC = RTLIB::SINTTOFP_I128_PPCF128;
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+ Hi = MakeLibCall(LC, VT, &Src, 1, true, dl);
+ GetPairElements(Hi, Lo, Hi);
+ }
+
+ if (isSigned)
+ return;
+
+ // Unsigned - fix up the SINT_TO_FP value just calculated.
+ Hi = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi);
+ SrcVT = Src.getValueType();
+
+ // x>=0 ? (ppcf128)(iN)x : (ppcf128)(iN)x + 2^N; N=32,64,128.
+ static const uint64_t TwoE32[] = { 0x41f0000000000000LL, 0 };
+ static const uint64_t TwoE64[] = { 0x43f0000000000000LL, 0 };
+ static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 };
+ const uint64_t *Parts = 0;
+
+ switch (SrcVT.getSimpleVT()) {
+ default:
+ assert(false && "Unsupported UINT_TO_FP!");
+ case MVT::i32:
+ Parts = TwoE32;
+ break;
+ case MVT::i64:
+ Parts = TwoE64;
+ break;
+ case MVT::i128:
+ Parts = TwoE128;
+ break;
+ }
+
+ Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
+ DAG.getConstantFP(APFloat(APInt(128, 2, Parts)),
+ MVT::ppcf128));
+ Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT),
+ Lo, Hi, DAG.getCondCode(ISD::SETLT));
+ GetPairElements(Lo, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Float Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatOperand - This method is called when the specified operand of the
+/// specified node is found to need expansion. At this point, all of the result
+/// types of the node are known to be legal, but other operands of the node may
+/// need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(cerr << "Expand float operand: "; N->dump(&DAG); cerr << "\n");
+ SDValue Res = SDValue();
+
+ if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType())
+ == TargetLowering::Custom)
+ Res = TLI.LowerOperation(SDValue(N, 0), DAG);
+
+ if (Res.getNode() == 0) {
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ cerr << "ExpandFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+ #endif
+ assert(0 && "Do not know how to expand this operator's operand!");
+ abort();
+
+ case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break;
+ case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
+ case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+
+ case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break;
+ case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break;
+ case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
+ case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
+ case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break;
+ case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
+ OpNo); break;
+ }
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// FloatExpandSetCCOperands - Expand the operands of a comparison. This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS,
+ SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ DebugLoc dl) {
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedFloat(NewLHS, LHSLo, LHSHi);
+ GetExpandedFloat(NewRHS, RHSLo, RHSHi);
+
+ MVT VT = NewLHS.getValueType();
+ assert(VT == MVT::ppcf128 && "Unsupported setcc type!");
+
+ // FIXME: This generated code sucks. We want to generate
+ // FCMPU crN, hi1, hi2
+ // BNE crN, L:
+ // FCMPU crN, lo1, lo2
+ // The following can be improved, but not that much.
+ SDValue Tmp1, Tmp2, Tmp3;
+ Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETOEQ);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()),
+ LHSLo, RHSLo, CCCode);
+ Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+ Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETUNE);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, CCCode);
+ Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+ NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3);
+ NewRHS = SDValue(); // LHS is the result, not a compare.
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDValue Lo, Hi;
+ GetExpandedFloat(N->getOperand(0), Lo, Hi);
+ // Round it the rest of the way (e.g. to f32) if needed.
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(),
+ N->getValueType(0), Hi, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
+ MVT RVT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
+ // PPC (the libcall is not available). FIXME: Do this in a less hacky way.
+ if (RVT == MVT::i32) {
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128,
+ N->getOperand(0), DAG.getValueType(MVT::f64));
+ Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res,
+ DAG.getIntPtrConstant(1));
+ return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
+ }
+
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
+ return MakeLibCall(LC, RVT, &N->getOperand(0), 1, false, dl);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
+ MVT RVT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
+ // PPC (the libcall is not available). FIXME: Do this in a less hacky way.
+ if (RVT == MVT::i32) {
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
+ APFloat APF = APFloat(APInt(128, 2, TwoE31));
+ SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128);
+ // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
+ // FIXME: generated code sucks.
+ return DAG.getNode(ISD::SELECT_CC, dl, MVT::i32, N->getOperand(0), Tmp,
+ DAG.getNode(ISD::ADD, dl, MVT::i32,
+ DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
+ DAG.getNode(ISD::FSUB, dl,
+ MVT::ppcf128,
+ N->getOperand(0),
+ Tmp)),
+ DAG.getConstant(0x80000000, MVT::i32)),
+ DAG.getNode(ISD::FP_TO_SINT, dl,
+ MVT::i32, N->getOperand(0)),
+ DAG.getCondCode(ISD::SETGE));
+ }
+
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+ return MakeLibCall(LC, N->getValueType(0), &N->getOperand(0), 1, false, dl);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, use it.
+ if (NewRHS.getNode() == 0) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
+ if (ISD::isNormalStore(N))
+ return ExpandOp_NormalStore(N, OpNo);
+
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+
+ MVT NVT = TLI.getTypeToTransformTo(ST->getValue().getValueType());
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+
+ SDValue Lo, Hi;
+ GetExpandedOp(ST->getValue(), Lo, Hi);
+
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr,
+ ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->getMemoryVT(),
+ ST->isVolatile(), ST->getAlignment());
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
new file mode 100644
index 0000000..eb9342c
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -0,0 +1,2382 @@
+//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements integer type expansion and promotion for LegalizeTypes.
+// Promotion is the act of changing a computation in an illegal type into a
+// computation in a larger type. For example, implementing i8 arithmetic in an
+// i32 register (often needed on powerpc).
+// Expansion is the act of changing a computation in an illegal type into a
+// computation in two identical registers of a smaller type. For example,
+// implementing i64 arithmetic in two i32 registers (often needed on 32-bit
+// targets).
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Integer Result Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerResult - This method is called when a result of a node is
+/// found to be in need of promotion to a larger type. At this point, the node
+/// may also have invalid operands or may have other results that need
+/// expansion, we just know that (at least) one result needs promotion.
+void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
+ DEBUG(cerr << "Promote integer result: "; N->dump(&DAG); cerr << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "PromoteIntegerResult #" << ResNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to promote this operator!");
+ abort();
+ case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
+ case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
+ case ISD::BIT_CONVERT: Res = PromoteIntRes_BIT_CONVERT(N); break;
+ case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break;
+ case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break;
+ case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
+ case ISD::CONVERT_RNDSAT:
+ Res = PromoteIntRes_CONVERT_RNDSAT(N); break;
+ case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break;
+ case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break;
+ case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break;
+ case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break;
+ case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
+ case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
+ case ISD::SHL: Res = PromoteIntRes_SHL(N); break;
+ case ISD::SIGN_EXTEND_INREG:
+ Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
+ case ISD::SRA: Res = PromoteIntRes_SRA(N); break;
+ case ISD::SRL: Res = PromoteIntRes_SRL(N); break;
+ case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;
+ case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
+ case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;
+
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;
+
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break;
+
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
+
+ case ISD::SDIV:
+ case ISD::SREM: Res = PromoteIntRes_SDIV(N); break;
+
+ case ISD::UDIV:
+ case ISD::UREM: Res = PromoteIntRes_UDIV(N); break;
+
+ case ISD::SADDO:
+ case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break;
+ case ISD::UADDO:
+ case ISD::USUBO: Res = PromoteIntRes_UADDSUBO(N, ResNo); break;
+ case ISD::SMULO:
+ case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break;
+
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_SWAP:
+ Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break;
+
+ case ISD::ATOMIC_CMP_SWAP:
+ Res = PromoteIntRes_Atomic2(cast<AtomicSDNode>(N)); break;
+ }
+
+ // If the result is null then the sub-method took care of registering it.
+ if (Res.getNode())
+ SetPromotedInteger(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) {
+ // Sign-extend the new bits, and continue the assertion.
+ SDValue Op = SExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::AssertSext, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) {
+ // Zero the new bits, and continue the assertion.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::AssertZext, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
+ N->getMemoryVT(),
+ N->getChain(), N->getBasePtr(),
+ Op2, N->getSrcValue(), N->getAlignment());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ SDValue Op3 = GetPromotedInteger(N->getOperand(3));
+ SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
+ N->getMemoryVT(), N->getChain(), N->getBasePtr(),
+ Op2, Op3, N->getSrcValue(), N->getAlignment());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ MVT InVT = InOp.getValueType();
+ MVT NInVT = TLI.getTypeToTransformTo(InVT);
+ MVT OutVT = N->getValueType(0);
+ MVT NOutVT = TLI.getTypeToTransformTo(OutVT);
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (getTypeAction(InVT)) {
+ default:
+ assert(false && "Unknown type action!");
+ break;
+ case Legal:
+ break;
+ case PromoteInteger:
+ if (NOutVT.bitsEq(NInVT))
+ // The input promotes to the same size. Convert the promoted value.
+ return DAG.getNode(ISD::BIT_CONVERT, dl,
+ NOutVT, GetPromotedInteger(InOp));
+ break;
+ case SoftenFloat:
+ // Promote the integer operand by hand.
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
+ case ExpandInteger:
+ case ExpandFloat:
+ break;
+ case ScalarizeVector:
+ // Convert the element to an integer and promote it by hand.
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+ BitConvertToInteger(GetScalarizedVector(InOp)));
+ case SplitVector: {
+ // For example, i32 = BIT_CONVERT v2i16 on alpha. Convert the split
+ // pieces of the input into integers and reassemble in the final type.
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = BitConvertToInteger(Lo);
+ Hi = BitConvertToInteger(Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
+ MVT::getIntegerVT(NOutVT.getSizeInBits()),
+ JoinIntegers(Lo, Hi));
+ return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp);
+ }
+ case WidenVector:
+ if (OutVT.bitsEq(NInVT))
+ // The input is widened to the same size. Convert to the widened value.
+ return DAG.getNode(ISD::BIT_CONVERT, dl, OutVT, GetWidenedVector(InOp));
+ }
+
+ // Otherwise, lower the bit-convert to a store/load from the stack.
+ // Create the stack frame object. Make sure it is aligned for both
+ // the source and destination types.
+ SDValue FIPtr = DAG.CreateStackTemporary(InVT, OutVT);
+ int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
+ const Value *SV = PseudoSourceValue::getFixedStack(FI);
+
+ // Emit a store to the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0);
+
+ // Result is an extending load from the stack slot.
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, NOutVT, Store, FIPtr, SV, 0, OutVT);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ MVT OVT = N->getValueType(0);
+ MVT NVT = Op.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
+ return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
+ DAG.getConstant(DiffBits, TLI.getPointerTy()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
+ // The pair element type may be legal, or may not promote to the same type as
+ // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases.
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(),
+ TLI.getTypeToTransformTo(N->getValueType(0)),
+ JoinIntegers(N->getOperand(0), N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
+ MVT VT = N->getValueType(0);
+ // FIXME there is no actual debug info here
+ DebugLoc dl = N->getDebugLoc();
+ // Zero extend things like i1, sign extend everything else. It shouldn't
+ // matter in theory which one we pick, but this tends to give better code?
+ unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ SDValue Result = DAG.getNode(Opc, dl, TLI.getTypeToTransformTo(VT),
+ SDValue(N, 0));
+ assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?");
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) {
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+ assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
+ CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
+ CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) &&
+ "can only promote integers");
+ MVT OutVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ return DAG.getConvertRndSat(OutVT, N->getDebugLoc(), N->getOperand(0),
+ N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), N->getOperand(4), CvtCode);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
+ // Zero extend to the promoted type and do the count there.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ DebugLoc dl = N->getDebugLoc();
+ MVT OVT = N->getValueType(0);
+ MVT NVT = Op.getValueType();
+ Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op);
+ // Subtract off the extra leading bits in the bigger type.
+ return DAG.getNode(ISD::SUB, dl, NVT, Op,
+ DAG.getConstant(NVT.getSizeInBits() -
+ OVT.getSizeInBits(), NVT));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) {
+ // Zero extend to the promoted type and do the count there.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), Op.getValueType(), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ MVT OVT = N->getValueType(0);
+ MVT NVT = Op.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+ // The count is the same in the promoted type except if the original
+ // value was zero. This can be handled by setting the bit just off
+ // the top of the original type.
+ APInt TopBit(NVT.getSizeInBits(), 0);
+ TopBit.set(OVT.getSizeInBits());
+ Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
+ return DAG.getNode(ISD::CTTZ, dl, NVT, Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+ MVT OldVT = N->getValueType(0);
+ SDValue OldVec = N->getOperand(0);
+ if (getTypeAction(OldVec.getValueType()) == WidenVector)
+ OldVec = GetWidenedVector(N->getOperand(0));
+ unsigned OldElts = OldVec.getValueType().getVectorNumElements();
+ DebugLoc dl = N->getDebugLoc();
+
+ if (OldElts == 1) {
+ assert(!isTypeLegal(OldVec.getValueType()) &&
+ "Legal one-element vector of a type needing promotion!");
+ // It is tempting to follow GetScalarizedVector by a call to
+ // GetPromotedInteger, but this would be wrong because the
+ // scalarized value may not yet have been processed.
+ return DAG.getNode(ISD::ANY_EXTEND, dl, TLI.getTypeToTransformTo(OldVT),
+ GetScalarizedVector(OldVec));
+ }
+
+ // Convert to a vector half as long with an element type of twice the width,
+ // for example <4 x i16> -> <2 x i32>.
+ assert(!(OldElts & 1) && "Odd length vectors not supported!");
+ MVT NewVT = MVT::getIntegerVT(2 * OldVT.getSizeInBits());
+ assert(OldVT.isSimple() && NewVT.isSimple());
+
+ SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::getVectorVT(NewVT, OldElts / 2),
+ OldVec);
+
+ // Extract the element at OldIdx / 2 from the new vector.
+ SDValue OldIdx = N->getOperand(1);
+ SDValue NewIdx = DAG.getNode(ISD::SRL, dl, OldIdx.getValueType(), OldIdx,
+ DAG.getConstant(1, TLI.getPointerTy()));
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, NewIdx);
+
+ // Select the appropriate half of the element: Lo if OldIdx was even,
+ // Hi if it was odd.
+ SDValue Lo = Elt;
+ SDValue Hi = DAG.getNode(ISD::SRL, dl, NewVT, Elt,
+ DAG.getConstant(OldVT.getSizeInBits(),
+ TLI.getPointerTy()));
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ // Extend to the promoted type.
+ SDValue Odd = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, OldIdx);
+ SDValue Res = DAG.getNode(ISD::SELECT, dl, NewVT, Odd, Hi, Lo);
+ return DAG.getNode(ISD::ANY_EXTEND, dl, TLI.getTypeToTransformTo(OldVT), Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ unsigned NewOpc = N->getOpcode();
+ DebugLoc dl = N->getDebugLoc();
+
+ // If we're promoting a UINT to a larger size, check to see if the new node
+ // will be legal. If it isn't, check to see if FP_TO_SINT is legal, since
+ // we can use that instead. This allows us to generate better code for
+ // FP_TO_UINT for small destination sizes on targets where FP_TO_UINT is not
+ // legal, such as PowerPC.
+ if (N->getOpcode() == ISD::FP_TO_UINT &&
+ !TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NVT) &&
+ TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
+ NewOpc = ISD::FP_TO_SINT;
+
+ SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));
+
+ // Assert that the converted value fits in the original type. If it doesn't
+ // (eg: because the value being converted is too big), then the result of the
+ // original operation was undefined anyway, so the assert is still correct.
+ return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ?
+ ISD::AssertZext : ISD::AssertSext, dl,
+ NVT, Res, DAG.getValueType(N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+
+ if (getTypeAction(N->getOperand(0).getValueType()) == PromoteInteger) {
+ SDValue Res = GetPromotedInteger(N->getOperand(0));
+ assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!");
+
+ // If the result and operand types are the same after promotion, simplify
+ // to an in-register extension.
+ if (NVT == Res.getValueType()) {
+ // The high bits are not guaranteed to be anything. Insert an extend.
+ if (N->getOpcode() == ISD::SIGN_EXTEND)
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+ DAG.getValueType(N->getOperand(0).getValueType()));
+ if (N->getOpcode() == ISD::ZERO_EXTEND)
+ return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType());
+ assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!");
+ return Res;
+ }
+ }
+
+ // Otherwise, just extend the original operand all the way to the larger type.
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ ISD::LoadExtType ExtType =
+ ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
+ N->getSrcValue(), N->getSrcValueOffset(),
+ N->getMemoryVT(), N->isVolatile(),
+ N->getAlignment());
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+/// Promote the overflow flag of an overflowing arithmetic node.
+SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
+ // Simply change the return type of the boolean result.
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(1));
+ MVT ValueVTs[] = { N->getValueType(0), NVT };
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Res = DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ DAG.getVTList(ValueVTs, 2), Ops, 2);
+
+ // Modified the sum result - switch anything that used the old sum to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 0), Res);
+
+ return SDValue(Res.getNode(), 1);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ // The operation overflowed iff the result in the larger type is not the
+ // sign extension of its truncation to the original type.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+ MVT OVT = N->getOperand(0).getValueType();
+ MVT NVT = LHS.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Do the arithmetic in the larger type.
+ unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB;
+ SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+ // Calculate the overflow flag: sign extend the arithmetic result from
+ // the original type.
+ SDValue Ofl = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+ DAG.getValueType(OVT));
+ // Overflowed if and only if this is not equal to Res.
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) {
+ // Sign extend the input.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
+ SDValue LHS = GetPromotedInteger(N->getOperand(1));
+ SDValue RHS = GetPromotedInteger(N->getOperand(2));
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),LHS,RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetPromotedInteger(N->getOperand(2));
+ SDValue RHS = GetPromotedInteger(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
+ MVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType());
+ assert(isTypeLegal(SVT) && "Illegal SetCC type!");
+ DebugLoc dl = N->getDebugLoc();
+
+ // Get the SETCC result using the canonical SETCC type.
+ SDValue SetCC = DAG.getNode(ISD::SETCC, dl, SVT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(2));
+
+ // Convert to the expected type.
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ assert(NVT.bitsLE(SVT) && "Integer type overpromoted?");
+ return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(),
+ TLI.getTypeToTransformTo(N->getValueType(0)),
+ GetPromotedInteger(N->getOperand(0)), N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
+ // The input may have strange things in the top bits of the registers, but
+ // these operations don't care. They may have weird bits going out, but
+ // that too is okay if they are integer operations.
+ SDValue LHS = GetPromotedInteger(N->getOperand(0));
+ SDValue RHS = GetPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
+ // The input value must be properly sign extended.
+ SDValue Res = SExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(),
+ Res.getValueType(), Res, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
+ // The input value must be properly zero extended.
+ MVT VT = N->getValueType(0);
+ MVT NVT = TLI.getTypeToTransformTo(VT);
+ SDValue Res = ZExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), NVT, Res, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Res;
+
+ switch (getTypeAction(N->getOperand(0).getValueType())) {
+ default: assert(0 && "Unknown type action!");
+ case Legal:
+ case ExpandInteger:
+ Res = N->getOperand(0);
+ break;
+ case PromoteInteger:
+ Res = GetPromotedInteger(N->getOperand(0));
+ break;
+ }
+
+ // Truncate to NVT instead of VT
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ // The operation overflowed iff the result in the larger type is not the
+ // zero extension of its truncation to the original type.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+ MVT OVT = N->getOperand(0).getValueType();
+ MVT NVT = LHS.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Do the arithmetic in the larger type.
+ unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB;
+ SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+ // Calculate the overflow flag: zero extend the arithmetic result from
+ // the original type.
+ SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT);
+ // Overflowed if and only if this is not equal to Res.
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
+ // Zero extend the input.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(TLI.getTypeToTransformTo(N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
+ SDValue Chain = N->getOperand(0); // Get the chain.
+ SDValue Ptr = N->getOperand(1); // Get the pointer.
+ MVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ MVT RegVT = TLI.getRegisterType(VT);
+ unsigned NumRegs = TLI.getNumRegisters(VT);
+ // The argument is passed as NumRegs registers of type RegVT.
+
+ SmallVector<SDValue, 8> Parts(NumRegs);
+ for (unsigned i = 0; i < NumRegs; ++i) {
+ Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2));
+ Chain = Parts[i].getValue(1);
+ }
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::reverse(Parts.begin(), Parts.end());
+
+ // Assemble the parts in the promoted type.
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]);
+ for (unsigned i = 1; i < NumRegs; ++i) {
+ SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]);
+ // Shift it to the right position and "or" it in.
+ Part = DAG.getNode(ISD::SHL, dl, NVT, Part,
+ DAG.getConstant(i * RegVT.getSizeInBits(),
+ TLI.getPointerTy()));
+ Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part);
+ }
+
+ // Modified the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
+ assert(ResNo == 1 && "Only boolean result promotion currently supported!");
+ return PromoteIntRes_Overflow(N);
+}
+
+//===----------------------------------------------------------------------===//
+// Integer Operand Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need promotion. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(cerr << "Promote integer operand: "; N->dump(&DAG); cerr << "\n");
+ SDValue Res = SDValue();
+
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ cerr << "PromoteIntegerOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+ #endif
+ assert(0 && "Do not know how to promote this operator's operand!");
+ abort();
+
+ case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break;
+ case ISD::BIT_CONVERT: Res = PromoteIntOp_BIT_CONVERT(N); break;
+ case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break;
+ case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break;
+ case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break;
+ case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break;
+ case ISD::CONVERT_RNDSAT:
+ Res = PromoteIntOp_CONVERT_RNDSAT(N); break;
+ case ISD::INSERT_VECTOR_ELT:
+ Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
+ case ISD::MEMBARRIER: Res = PromoteIntOp_MEMBARRIER(N); break;
+ case ISD::SCALAR_TO_VECTOR:
+ Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break;
+ case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break;
+ case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;
+ case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break;
+ case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;
+ case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
+ OpNo); break;
+ case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
+ case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
+ case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// PromoteSetCCOperands - Promote the operands of a comparison. This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
+ ISD::CondCode CCCode) {
+ // We have to insert explicit sign or zero extends. Note that we could
+ // insert sign extends for ALL conditions, but zero extend is cheaper on
+ // many machines (an AND instead of two shifts), so prefer it.
+ switch (CCCode) {
+ default: assert(0 && "Unknown integer comparison!");
+ case ISD::SETEQ:
+ case ISD::SETNE:
+ case ISD::SETUGE:
+ case ISD::SETUGT:
+ case ISD::SETULE:
+ case ISD::SETULT:
+ // ALL of these operations will work if we either sign or zero extend
+ // the operands (including the unsigned comparisons!). Zero extend is
+ // usually a simpler/cheaper operation, so prefer it.
+ NewLHS = ZExtPromotedInteger(NewLHS);
+ NewRHS = ZExtPromotedInteger(NewRHS);
+ break;
+ case ISD::SETGE:
+ case ISD::SETGT:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ NewLHS = SExtPromotedInteger(NewLHS);
+ NewRHS = SExtPromotedInteger(NewRHS);
+ break;
+ }
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BIT_CONVERT(SDNode *N) {
+ // This should only occur in unusual situations like bitcasting to an
+ // x86_fp80, so just turn it into a store+load
+ return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 2 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(2);
+ SDValue RHS = N->getOperand(3);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(1))->get());
+
+ // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always
+ // legal types.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 1 && "only know how to promote condition");
+
+ // Promote all the way up to the canonical SetCC type.
+ MVT SVT = TLI.getSetCCResultType(MVT::Other);
+ SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT);
+
+ // The chain (Op#0) and basic block destination (Op#2) are always legal types.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Cond,
+ N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) {
+ // Since the result type is legal, the operands must promote to it.
+ MVT OVT = N->getOperand(0).getValueType();
+ SDValue Lo = ZExtPromotedInteger(N->getOperand(0));
+ SDValue Hi = GetPromotedInteger(N->getOperand(1));
+ assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?");
+ DebugLoc dl = N->getDebugLoc();
+
+ Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi,
+ DAG.getConstant(OVT.getSizeInBits(), TLI.getPointerTy()));
+ return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
+ // The vector type is legal but the element type is not. This implies
+ // that the vector is a power-of-two in length and that the element
+ // type does not have a strange size (eg: it is not i1).
+ MVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ assert(!(NumElts & 1) && "Legal vector of one illegal element?");
+
+ // Promote the inserted value. The type does not need to match the
+ // vector element type. Check that any extra bits introduced will be
+ // truncated away.
+ assert(N->getOperand(0).getValueType().getSizeInBits() >=
+ N->getValueType(0).getVectorElementType().getSizeInBits() &&
+ "Type of inserted value narrower than vector element type!");
+
+ SmallVector<SDValue, 16> NewOps;
+ for (unsigned i = 0; i < NumElts; ++i)
+ NewOps.push_back(GetPromotedInteger(N->getOperand(i)));
+
+ return DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) {
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+ assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
+ CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
+ CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) &&
+ "can only promote integer arguments");
+ SDValue InOp = GetPromotedInteger(N->getOperand(0));
+ return DAG.getConvertRndSat(N->getValueType(0), N->getDebugLoc(), InOp,
+ N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), N->getOperand(4), CvtCode);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
+ unsigned OpNo) {
+ if (OpNo == 1) {
+ // Promote the inserted value. This is valid because the type does not
+ // have to match the vector element type.
+
+ // Check that any extra bits introduced will be truncated away.
+ assert(N->getOperand(1).getValueType().getSizeInBits() >=
+ N->getValueType(0).getVectorElementType().getSizeInBits() &&
+ "Type of inserted value narrower than vector element type!");
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ GetPromotedInteger(N->getOperand(1)),
+ N->getOperand(2));
+ }
+
+ assert(OpNo == 2 && "Different operand and result vector types?");
+
+ // Promote the index.
+ SDValue Idx = ZExtPromotedInteger(N->getOperand(2));
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ N->getOperand(1), Idx);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) {
+ SDValue NewOps[6];
+ DebugLoc dl = N->getDebugLoc();
+ NewOps[0] = N->getOperand(0);
+ for (unsigned i = 1; i < array_lengthof(NewOps); ++i) {
+ SDValue Flag = GetPromotedInteger(N->getOperand(i));
+ NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1);
+ }
+ return DAG.UpdateNodeOperands(SDValue (N, 0), NewOps,
+ array_lengthof(NewOps));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
+ // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote
+ // the operand in place.
+ return DAG.UpdateNodeOperands(SDValue(N, 0),
+ GetPromotedInteger(N->getOperand(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Only know how to promote condition");
+
+ // Promote all the way up to the canonical SetCC type.
+ MVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType());
+ SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT);
+
+ return DAG.UpdateNodeOperands(SDValue(N, 0), Cond,
+ N->getOperand(1), N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get());
+
+ // The CC (#4) and the possible return values (#2 and #3) have legal types.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2),
+ N->getOperand(3), N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get());
+
+ // The CC (#2) is always legal.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ ZExtPromotedInteger(N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ DebugLoc dl = N->getDebugLoc();
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(),
+ Op, DAG.getValueType(N->getOperand(0).getValueType()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
+ return DAG.UpdateNodeOperands(SDValue(N, 0),
+ SExtPromotedInteger(N->getOperand(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
+ int SVOffset = N->getSrcValueOffset();
+ unsigned Alignment = N->getAlignment();
+ bool isVolatile = N->isVolatile();
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value.
+
+ // Truncate the value and store the result.
+ return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(),
+ SVOffset, N->getMemoryVT(),
+ isVolatile, Alignment);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
+ return DAG.UpdateNodeOperands(SDValue(N, 0),
+ ZExtPromotedInteger(N->getOperand(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+ return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Integer Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerResult - This method is called when the specified result of the
+/// specified node is found to need expansion. At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
+ DEBUG(cerr << "Expand integer result: "; N->dump(&DAG); cerr << "\n");
+ SDValue Lo, Hi;
+ Lo = Hi = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "ExpandIntegerResult #" << ResNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to expand the result of this operator!");
+ abort();
+
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+
+ case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break;
+ case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+ case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+ case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break;
+
+ case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break;
+ case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break;
+ case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break;
+ case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break;
+ case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
+ case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break;
+ case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break;
+ case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break;
+ case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
+ case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
+ case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
+ case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
+ case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
+ case ISD::SREM: ExpandIntRes_SREM(N, Lo, Hi); break;
+ case ISD::TRUNCATE: ExpandIntRes_TRUNCATE(N, Lo, Hi); break;
+ case ISD::UDIV: ExpandIntRes_UDIV(N, Lo, Hi); break;
+ case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break;
+ case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break;
+
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break;
+
+ case ISD::ADD:
+ case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break;
+
+ case ISD::ADDC:
+ case ISD::SUBC: ExpandIntRes_ADDSUBC(N, Lo, Hi); break;
+
+ case ISD::ADDE:
+ case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetExpandedInteger(SDValue(N, ResNo), Lo, Hi);
+}
+
+/// ExpandShiftByConstant - N is a shift by a value that needs to be expanded,
+/// and the shift amount is a constant 'Amt'. Expand the operation.
+void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // Expand the incoming operand to be shifted, so that we have its parts
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ MVT NVT = InL.getValueType();
+ unsigned VTBits = N->getValueType(0).getSizeInBits();
+ unsigned NVTBits = NVT.getSizeInBits();
+ MVT ShTy = N->getOperand(1).getValueType();
+
+ if (N->getOpcode() == ISD::SHL) {
+ if (Amt > VTBits) {
+ Lo = Hi = DAG.getConstant(0, NVT);
+ } else if (Amt > NVTBits) {
+ Lo = DAG.getConstant(0, NVT);
+ Hi = DAG.getNode(ISD::SHL, dl,
+ NVT, InL, DAG.getConstant(Amt-NVTBits,ShTy));
+ } else if (Amt == NVTBits) {
+ Lo = DAG.getConstant(0, NVT);
+ Hi = InL;
+ } else if (Amt == 1 &&
+ TLI.isOperationLegalOrCustom(ISD::ADDC,
+ TLI.getTypeToExpandTo(NVT))) {
+ // Emit this X << 1 as X+X.
+ SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
+ SDValue LoOps[2] = { InL, InL };
+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+ SDValue HiOps[3] = { InH, InH, Lo.getValue(1) };
+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+ } else {
+ Lo = DAG.getNode(ISD::SHL, dl, NVT, InL, DAG.getConstant(Amt, ShTy));
+ Hi = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SHL, dl, NVT, InH,
+ DAG.getConstant(Amt, ShTy)),
+ DAG.getNode(ISD::SRL, dl, NVT, InL,
+ DAG.getConstant(NVTBits-Amt, ShTy)));
+ }
+ return;
+ }
+
+ if (N->getOpcode() == ISD::SRL) {
+ if (Amt > VTBits) {
+ Lo = DAG.getConstant(0, NVT);
+ Hi = DAG.getConstant(0, NVT);
+ } else if (Amt > NVTBits) {
+ Lo = DAG.getNode(ISD::SRL, dl,
+ NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy));
+ Hi = DAG.getConstant(0, NVT);
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = DAG.getConstant(0, NVT);
+ } else {
+ Lo = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL,
+ DAG.getConstant(Amt, ShTy)),
+ DAG.getNode(ISD::SHL, dl, NVT, InH,
+ DAG.getConstant(NVTBits-Amt, ShTy)));
+ Hi = DAG.getNode(ISD::SRL, dl, NVT, InH, DAG.getConstant(Amt, ShTy));
+ }
+ return;
+ }
+
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ if (Amt > VTBits) {
+ Hi = Lo = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ DAG.getConstant(NVTBits-1, ShTy));
+ } else if (Amt > NVTBits) {
+ Lo = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ DAG.getConstant(Amt-NVTBits, ShTy));
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ DAG.getConstant(NVTBits-1, ShTy));
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ DAG.getConstant(NVTBits-1, ShTy));
+ } else {
+ Lo = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL,
+ DAG.getConstant(Amt, ShTy)),
+ DAG.getNode(ISD::SHL, dl, NVT, InH,
+ DAG.getConstant(NVTBits-Amt, ShTy)));
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, DAG.getConstant(Amt, ShTy));
+ }
+}
+
+/// ExpandShiftWithKnownAmountBit - Try to determine whether we can simplify
+/// this shift based on knowledge of the high bit of the shift amount. If we
+/// can tell this, we know that it is >= 32 or < 32, without knowing the actual
+/// shift amount.
+bool DAGTypeLegalizer::
+ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue Amt = N->getOperand(1);
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ MVT ShTy = Amt.getValueType();
+ unsigned ShBits = ShTy.getSizeInBits();
+ unsigned NVTBits = NVT.getSizeInBits();
+ assert(isPowerOf2_32(NVTBits) &&
+ "Expanded integer type size not a power of two!");
+ DebugLoc dl = N->getDebugLoc();
+
+ APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
+ APInt KnownZero, KnownOne;
+ DAG.ComputeMaskedBits(N->getOperand(1), HighBitMask, KnownZero, KnownOne);
+
+ // If we don't know anything about the high bits, exit.
+ if (((KnownZero|KnownOne) & HighBitMask) == 0)
+ return false;
+
+ // Get the incoming operand to be shifted.
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ // If we know that any of the high bits of the shift amount are one, then we
+ // can do this as a couple of simple shifts.
+ if (KnownOne.intersects(HighBitMask)) {
+ // Mask out the high bit, which we know is set.
+ Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt,
+ DAG.getConstant(~HighBitMask, ShTy));
+
+ switch (N->getOpcode()) {
+ default: assert(0 && "Unknown shift");
+ case ISD::SHL:
+ Lo = DAG.getConstant(0, NVT); // Low part is zero.
+ Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.
+ return true;
+ case ISD::SRL:
+ Hi = DAG.getConstant(0, NVT); // Hi part is zero.
+ Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part.
+ return true;
+ case ISD::SRA:
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part.
+ DAG.getConstant(NVTBits-1, ShTy));
+ Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part.
+ return true;
+ }
+ }
+
+#if 0
+ // FIXME: This code is broken for shifts with a zero amount!
+ // If we know that all of the high bits of the shift amount are zero, then we
+ // can do this as a couple of simple shifts.
+ if ((KnownZero & HighBitMask) == HighBitMask) {
+ // Compute 32-amt.
+ SDValue Amt2 = DAG.getNode(ISD::SUB, ShTy,
+ DAG.getConstant(NVTBits, ShTy),
+ Amt);
+ unsigned Op1, Op2;
+ switch (N->getOpcode()) {
+ default: assert(0 && "Unknown shift");
+ case ISD::SHL: Op1 = ISD::SHL; Op2 = ISD::SRL; break;
+ case ISD::SRL:
+ case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break;
+ }
+
+ Lo = DAG.getNode(N->getOpcode(), NVT, InL, Amt);
+ Hi = DAG.getNode(ISD::OR, NVT,
+ DAG.getNode(Op1, NVT, InH, Amt),
+ DAG.getNode(Op2, NVT, InL, Amt2));
+ return true;
+ }
+#endif
+
+ return false;
+}
+
+/// ExpandShiftWithUnknownAmountBit - Fully general expansion of integer shift
+/// of any size.
+bool DAGTypeLegalizer::
+ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue Amt = N->getOperand(1);
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ MVT ShTy = Amt.getValueType();
+ unsigned NVTBits = NVT.getSizeInBits();
+ assert(isPowerOf2_32(NVTBits) &&
+ "Expanded integer type size not a power of two!");
+ DebugLoc dl = N->getDebugLoc();
+
+ // Get the incoming operand to be shifted.
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy);
+ SDValue Amt2 = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt);
+ SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy),
+ Amt, NVBitsNode, ISD::SETULT);
+
+ SDValue Lo1, Hi1, Lo2, Hi2;
+ switch (N->getOpcode()) {
+ default: assert(0 && "Unknown shift");
+ case ISD::SHL:
+ // ShAmt < NVTBits
+ Lo1 = DAG.getConstant(0, NVT); // Low part is zero.
+ Hi1 = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.
+
+ // ShAmt >= NVTBits
+ Lo2 = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt);
+ Hi2 = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SHL, dl, NVT, InH, Amt),
+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt2));
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2);
+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2);
+ return true;
+ case ISD::SRL:
+ // ShAmt < NVTBits
+ Hi1 = DAG.getConstant(0, NVT); // Hi part is zero.
+ Lo1 = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part.
+
+ // ShAmt >= NVTBits
+ Hi2 = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt);
+ Lo2 = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+ DAG.getNode(ISD::SHL, dl, NVT, InH, Amt2));
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2);
+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2);
+ return true;
+ case ISD::SRA:
+ // ShAmt < NVTBits
+ Hi1 = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part.
+ DAG.getConstant(NVTBits-1, ShTy));
+ Lo1 = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part.
+
+ // ShAmt >= NVTBits
+ Hi2 = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt);
+ Lo2 = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+ DAG.getNode(ISD::SHL, dl, NVT, InH, Amt2));
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2);
+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2);
+ return true;
+ }
+
+ return false;
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+
+ MVT NVT = LHSL.getValueType();
+ SDValue LoOps[2] = { LHSL, RHSL };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support
+ // them. TODO: Teach operation legalization how to expand unsupported
+ // ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate
+ // a carry of type MVT::Flag, but there doesn't seem to be any way to
+ // generate a value of this type in the expanded code sequence.
+ bool hasCarry =
+ TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
+ ISD::ADDC : ISD::SUBC,
+ TLI.getTypeToExpandTo(NVT));
+
+ if (hasCarry) {
+ SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+ } else {
+ Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
+ }
+ } else {
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
+ SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0],
+ ISD::SETULT);
+ SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1,
+ DAG.getConstant(1, NVT),
+ DAG.getConstant(0, NVT));
+ SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1],
+ ISD::SETULT);
+ SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2,
+ DAG.getConstant(1, NVT), Carry1);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
+ } else {
+ Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2);
+ SDValue Cmp =
+ DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()),
+ LoOps[0], LoOps[1], ISD::SETULT);
+ SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp,
+ DAG.getConstant(1, NVT),
+ DAG.getConstant(0, NVT));
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
+ }
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+ SDValue LoOps[2] = { LHSL, RHSL };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ if (N->getOpcode() == ISD::ADDC) {
+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+ } else {
+ Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
+ }
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+ SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps, 3);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps, 3);
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is any extension of the input (which degenerates to a copy).
+ Lo = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Op);
+ Hi = DAG.getUNDEF(NVT); // The high part is undefined.
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ MVT NVT = Lo.getValueType();
+ MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ unsigned NVTBits = NVT.getSizeInBits();
+ unsigned EVTBits = EVT.getSizeInBits();
+
+ if (NVTBits < EVTBits) {
+ Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi,
+ DAG.getValueType(MVT::getIntegerVT(EVTBits - NVTBits)));
+ } else {
+ Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));
+ // The high part replicates the sign bit of Lo, make it explicit.
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(NVTBits-1, TLI.getPointerTy()));
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ MVT NVT = Lo.getValueType();
+ MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ unsigned NVTBits = NVT.getSizeInBits();
+ unsigned EVTBits = EVT.getSizeInBits();
+
+ if (NVTBits < EVTBits) {
+ Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi,
+ DAG.getValueType(MVT::getIntegerVT(EVTBits - NVTBits)));
+ } else {
+ Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT));
+ // The high part must be zero, make it explicit.
+ Hi = DAG.getConstant(0, NVT);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands.
+ Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ unsigned NBitWidth = NVT.getSizeInBits();
+ const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue();
+ Lo = DAG.getConstant(APInt(Cst).trunc(NBitWidth), NVT);
+ Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ MVT NVT = Lo.getValueType();
+
+ SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi,
+ DAG.getConstant(0, NVT), ISD::SETNE);
+
+ SDValue LoLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Hi);
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ,
+ DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
+ DAG.getConstant(NVT.getSizeInBits(), NVT)));
+ Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ MVT NVT = Lo.getValueType();
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),
+ DAG.getNode(ISD::CTPOP, dl, NVT, Hi));
+ Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ MVT NVT = Lo.getValueType();
+
+ SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo,
+ DAG.getConstant(0, NVT), ISD::SETNE);
+
+ SDValue LoLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Hi);
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ,
+ DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
+ DAG.getConstant(NVT.getSizeInBits(), NVT)));
+ Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ MVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
+ SplitInteger(MakeLibCall(LC, VT, &Op, 1, true/*irrelevant*/, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ MVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
+ SplitInteger(MakeLibCall(LC, VT, &Op, 1, false/*irrelevant*/, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ if (ISD::isNormalLoad(N)) {
+ ExpandRes_NormalLoad(N, Lo, Hi);
+ return;
+ }
+
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+
+ MVT VT = N->getValueType(0);
+ MVT NVT = TLI.getTypeToTransformTo(VT);
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ int SVOffset = N->getSrcValueOffset();
+ unsigned Alignment = N->getAlignment();
+ bool isVolatile = N->isVolatile();
+ DebugLoc dl = N->getDebugLoc();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ if (N->getMemoryVT().bitsLE(NVT)) {
+ MVT EVT = N->getMemoryVT();
+
+ Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
+ EVT, isVolatile, Alignment);
+
+ // Remember the chain.
+ Ch = Lo.getValue(1);
+
+ if (ExtType == ISD::SEXTLOAD) {
+ // The high part is obtained by SRA'ing all but one of the bits of the
+ // lo part.
+ unsigned LoSize = Lo.getValueType().getSizeInBits();
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+ } else if (ExtType == ISD::ZEXTLOAD) {
+ // The high part is just a zero.
+ Hi = DAG.getConstant(0, NVT);
+ } else {
+ assert(ExtType == ISD::EXTLOAD && "Unknown extload!");
+ // The high part is undefined.
+ Hi = DAG.getUNDEF(NVT);
+ }
+ } else if (TLI.isLittleEndian()) {
+ // Little-endian - low bits are at low addresses.
+ Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
+ isVolatile, Alignment);
+
+ unsigned ExcessBits =
+ N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+ MVT NEVT = MVT::getIntegerVT(ExcessBits);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(),
+ SVOffset+IncrementSize, NEVT,
+ isVolatile, MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+ } else {
+ // Big-endian - high bits are at low addresses. Favor aligned loads at
+ // the cost of some bit-fiddling.
+ MVT EVT = N->getMemoryVT();
+ unsigned EBytes = EVT.getStoreSizeInBits()/8;
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ unsigned ExcessBits = (EBytes - IncrementSize)*8;
+
+ // Load both the high bits and maybe some of the low bits.
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
+ MVT::getIntegerVT(EVT.getSizeInBits() - ExcessBits),
+ isVolatile, Alignment);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ // Load the rest of the low bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(),
+ SVOffset+IncrementSize,
+ MVT::getIntegerVT(ExcessBits),
+ isVolatile, MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ if (ExcessBits < NVT.getSizeInBits()) {
+ // Transfer low bits from the bottom of Hi to the top of Lo.
+ Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
+ DAG.getNode(ISD::SHL, dl, NVT, Hi,
+ DAG.getConstant(ExcessBits,
+ TLI.getPointerTy())));
+ // Move high bits to the right position in Hi.
+ Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl,
+ NVT, Hi,
+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+ TLI.getPointerTy()));
+ }
+ }
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Ch);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(N->getOperand(0), LL, LH);
+ GetExpandedInteger(N->getOperand(1), RL, RH);
+ Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LL, RL);
+ Hi = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LH, RH);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ MVT VT = N->getValueType(0);
+ MVT NVT = TLI.getTypeToTransformTo(VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT);
+ bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, NVT);
+ bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, NVT);
+ bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, NVT);
+ if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) {
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(N->getOperand(0), LL, LH);
+ GetExpandedInteger(N->getOperand(1), RL, RH);
+ unsigned OuterBitSize = VT.getSizeInBits();
+ unsigned InnerBitSize = NVT.getSizeInBits();
+ unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0));
+ unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1));
+
+ APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
+ if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) &&
+ DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) {
+ // The inputs are both zero-extended.
+ if (HasUMUL_LOHI) {
+ // We can emit a umul_lohi.
+ Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL);
+ Hi = SDValue(Lo.getNode(), 1);
+ return;
+ }
+ if (HasMULHU) {
+ // We can emit a mulhu+mul.
+ Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL);
+ return;
+ }
+ }
+ if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) {
+ // The input values are both sign-extended.
+ if (HasSMUL_LOHI) {
+ // We can emit a smul_lohi.
+ Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL);
+ Hi = SDValue(Lo.getNode(), 1);
+ return;
+ }
+ if (HasMULHS) {
+ // We can emit a mulhs+mul.
+ Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHS, dl, NVT, LL, RL);
+ return;
+ }
+ }
+ if (HasUMUL_LOHI) {
+ // Lo,Hi = umul LHS, RHS.
+ SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl,
+ DAG.getVTList(NVT, NVT), LL, RL);
+ Lo = UMulLOHI;
+ Hi = UMulLOHI.getValue(1);
+ RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH);
+ LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH);
+ return;
+ }
+ if (HasMULHU) {
+ Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL);
+ RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH);
+ LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH);
+ return;
+ }
+ }
+
+ // If nothing else, we can make a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::MUL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::MUL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::MUL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::MUL_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, true/*irrelevant*/, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ MVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i32)
+ LC = RTLIB::SDIV_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SDIV_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ MVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // If we can emit an efficient shift operation, do so now. Check to see if
+ // the RHS is a constant.
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ return ExpandShiftByConstant(N, CN->getZExtValue(), Lo, Hi);
+
+ // If we can determine that the high bit of the shift is zero or one, even if
+ // the low bits are variable, emit this shift in an optimized form.
+ if (ExpandShiftWithKnownAmountBit(N, Lo, Hi))
+ return;
+
+ // If this target supports shift_PARTS, use it. First, map to the _PARTS opc.
+ unsigned PartsOpc;
+ if (N->getOpcode() == ISD::SHL) {
+ PartsOpc = ISD::SHL_PARTS;
+ } else if (N->getOpcode() == ISD::SRL) {
+ PartsOpc = ISD::SRL_PARTS;
+ } else {
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ PartsOpc = ISD::SRA_PARTS;
+ }
+
+ // Next check to see if the target supports this SHL_PARTS operation or if it
+ // will custom expand it.
+ MVT NVT = TLI.getTypeToTransformTo(VT);
+ TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT);
+ if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+ Action == TargetLowering::Custom) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH;
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+
+ SDValue Ops[] = { LHSL, LHSH, N->getOperand(1) };
+ MVT VT = LHSL.getValueType();
+ Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3);
+ Hi = Lo.getValue(1);
+ return;
+ }
+
+ // Otherwise, emit a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ bool isSigned;
+ if (N->getOpcode() == ISD::SHL) {
+ isSigned = false; /*sign irrelevant*/
+ if (VT == MVT::i16)
+ LC = RTLIB::SHL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SHL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SHL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SHL_I128;
+ } else if (N->getOpcode() == ISD::SRL) {
+ isSigned = false;
+ if (VT == MVT::i16)
+ LC = RTLIB::SRL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SRL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SRL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SRL_I128;
+ } else {
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ isSigned = true;
+ if (VT == MVT::i16)
+ LC = RTLIB::SRA_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SRA_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SRA_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SRA_I128;
+ }
+
+ if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, isSigned, dl), Lo, Hi);
+ return;
+ }
+
+ if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi))
+ assert(0 && "Unsupported shift!");
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is sign extension of the input (degenerates to a copy).
+ Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0));
+ // The high part is obtained by SRA'ing all but one of the bits of low part.
+ unsigned LoSize = NVT.getSizeInBits();
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ unsigned ExcessBits =
+ Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+ Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+ DAG.getValueType(MVT::getIntegerVT(ExcessBits)));
+ }
+}
+
+void DAGTypeLegalizer::
+ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+
+ if (EVT.bitsLE(Lo.getValueType())) {
+ // sext_inreg the low part if needed.
+ Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Lo.getValueType(), Lo,
+ N->getOperand(1));
+
+ // The high part gets the sign extension from the lo-part. This handles
+ // things like sextinreg V:i64 from i8.
+ Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo,
+ DAG.getConstant(Hi.getValueType().getSizeInBits()-1,
+ TLI.getPointerTy()));
+ } else {
+ // For example, extension of an i48 to an i64. Leave the low part alone,
+ // sext_inreg the high part.
+ unsigned ExcessBits =
+ EVT.getSizeInBits() - Lo.getValueType().getSizeInBits();
+ Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+ DAG.getValueType(MVT::getIntegerVT(ExcessBits)));
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ MVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i32)
+ LC = RTLIB::SREM_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SREM_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0));
+ Hi = DAG.getNode(ISD::SRL, dl,
+ N->getOperand(0).getValueType(), N->getOperand(0),
+ DAG.getConstant(NVT.getSizeInBits(), TLI.getPointerTy()));
+ Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ MVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i32)
+ LC = RTLIB::UDIV_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::UDIV_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::UDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ MVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i32)
+ LC = RTLIB::UREM_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::UREM_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::UREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is zero extension of the input (degenerates to a copy).
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0));
+ Hi = DAG.getConstant(0, NVT); // The high part is just a zero.
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ unsigned ExcessBits =
+ Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+ Hi = DAG.getZeroExtendInReg(Hi, dl, MVT::getIntegerVT(ExcessBits));
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Integer Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need expansion. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(cerr << "Expand integer operand: "; N->dump(&DAG); cerr << "\n");
+ SDValue Res = SDValue();
+
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ cerr << "ExpandIntegerOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+ #endif
+ assert(0 && "Do not know how to expand this operator's operand!");
+ abort();
+
+ case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break;
+ case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break;
+ case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
+ case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+ case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break;
+ case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
+ case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break;
+ case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
+ case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break;
+ case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR: Res = ExpandIntOp_Shift(N); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// IntegerExpandSetCCOperands - Expand the operands of a comparison. This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
+ SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ DebugLoc dl) {
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedInteger(NewLHS, LHSLo, LHSHi);
+ GetExpandedInteger(NewRHS, RHSLo, RHSHi);
+
+ MVT VT = NewLHS.getValueType();
+
+ if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {
+ if (RHSLo == RHSHi) {
+ if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) {
+ if (RHSCST->isAllOnesValue()) {
+ // Equality comparison to -1.
+ NewLHS = DAG.getNode(ISD::AND, dl,
+ LHSLo.getValueType(), LHSLo, LHSHi);
+ NewRHS = RHSLo;
+ return;
+ }
+ }
+ }
+
+ NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo);
+ NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi);
+ NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS);
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ return;
+ }
+
+ // If this is a comparison of the sign bit, just look at the top part.
+ // X > -1, x < 0
+ if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS))
+ if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0
+ (CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1
+ NewLHS = LHSHi;
+ NewRHS = RHSHi;
+ return;
+ }
+
+ // FIXME: This generated code sucks.
+ ISD::CondCode LowCC;
+ switch (CCCode) {
+ default: assert(0 && "Unknown integer setcc!");
+ case ISD::SETLT:
+ case ISD::SETULT: LowCC = ISD::SETULT; break;
+ case ISD::SETGT:
+ case ISD::SETUGT: LowCC = ISD::SETUGT; break;
+ case ISD::SETLE:
+ case ISD::SETULE: LowCC = ISD::SETULE; break;
+ case ISD::SETGE:
+ case ISD::SETUGE: LowCC = ISD::SETUGE; break;
+ }
+
+ // Tmp1 = lo(op1) < lo(op2) // Always unsigned comparison
+ // Tmp2 = hi(op1) < hi(op2) // Signedness depends on operands
+ // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2;
+
+ // NOTE: on targets without efficient SELECT of bools, we can always use
+ // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
+ TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, NULL);
+ SDValue Tmp1, Tmp2;
+ Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()),
+ LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl);
+ if (!Tmp1.getNode())
+ Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()),
+ LHSLo, RHSLo, LowCC);
+ Tmp2 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl);
+ if (!Tmp2.getNode())
+ Tmp2 = DAG.getNode(ISD::SETCC, dl,
+ TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, DAG.getCondCode(CCCode));
+
+ ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.getNode());
+ ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.getNode());
+ if ((Tmp1C && Tmp1C->isNullValue()) ||
+ (Tmp2C && Tmp2C->isNullValue() &&
+ (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
+ CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) ||
+ (Tmp2C && Tmp2C->getAPIntValue() == 1 &&
+ (CCCode == ISD::SETLT || CCCode == ISD::SETGT ||
+ CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) {
+ // low part is known false, returns high part.
+ // For LE / GE, if high part is known false, ignore the low part.
+ // For LT / GT, if high part is known true, ignore the low part.
+ NewLHS = Tmp2;
+ NewRHS = SDValue();
+ return;
+ }
+
+ NewLHS = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETEQ, false,
+ DagCombineInfo, dl);
+ if (!NewLHS.getNode())
+ NewLHS = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETEQ);
+ NewLHS = DAG.getNode(ISD::SELECT, dl, Tmp1.getValueType(),
+ NewLHS, Tmp1, Tmp2);
+ NewRHS = SDValue();
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, use it.
+ if (NewRHS.getNode() == 0) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
+ // The value being shifted is legal, but the shift amount is too big.
+ // It follows that either the result of the shift is undefined, or the
+ // upper half of the shift amount is zero. Just use the lower half.
+ SDValue Lo, Hi;
+ GetExpandedInteger(N->getOperand(1), Lo, Hi);
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Lo);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ MVT DstVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Don't know how to expand this SINT_TO_FP!");
+ return MakeLibCall(LC, DstVT, &Op, 1, true, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
+ if (ISD::isNormalStore(N))
+ return ExpandOp_NormalStore(N, OpNo);
+
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+
+ MVT VT = N->getOperand(1).getValueType();
+ MVT NVT = TLI.getTypeToTransformTo(VT);
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ int SVOffset = N->getSrcValueOffset();
+ unsigned Alignment = N->getAlignment();
+ bool isVolatile = N->isVolatile();
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Lo, Hi;
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ if (N->getMemoryVT().bitsLE(NVT)) {
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+ return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ N->getMemoryVT(), isVolatile, Alignment);
+ } else if (TLI.isLittleEndian()) {
+ // Little-endian - low bits are at low addresses.
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+
+ Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ isVolatile, Alignment);
+
+ unsigned ExcessBits =
+ N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+ MVT NEVT = MVT::getIntegerVT(ExcessBits);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
+ SVOffset+IncrementSize, NEVT,
+ isVolatile, MinAlign(Alignment, IncrementSize));
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ } else {
+ // Big-endian - high bits are at low addresses. Favor aligned stores at
+ // the cost of some bit-fiddling.
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+
+ MVT EVT = N->getMemoryVT();
+ unsigned EBytes = EVT.getStoreSizeInBits()/8;
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ unsigned ExcessBits = (EBytes - IncrementSize)*8;
+ MVT HiVT = MVT::getIntegerVT(EVT.getSizeInBits() - ExcessBits);
+
+ if (ExcessBits < NVT.getSizeInBits()) {
+ // Transfer high bits from the top of Lo to the bottom of Hi.
+ Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+ TLI.getPointerTy()));
+ Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
+ DAG.getNode(ISD::SRL, dl, NVT, Lo,
+ DAG.getConstant(ExcessBits,
+ TLI.getPointerTy())));
+ }
+
+ // Store both the high bits and maybe some of the low bits.
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
+ SVOffset, HiVT, isVolatile, Alignment);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ // Store the lowest ExcessBits bits in the second half.
+ Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(),
+ SVOffset+IncrementSize,
+ MVT::getIntegerVT(ExcessBits),
+ isVolatile, MinAlign(Alignment, IncrementSize));
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ }
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+ // Just truncate the low part of the source.
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ MVT SrcVT = Op.getValueType();
+ MVT DstVT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ if (TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
+ // Do a signed conversion then adjust the result.
+ SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op);
+ SignedConv = TLI.LowerOperation(SignedConv, DAG);
+
+ // The result of the signed conversion needs adjusting if the 'sign bit' of
+ // the incoming integer was set. To handle this, we dynamically test to see
+ // if it is set, and, if so, add a fudge factor.
+
+ const uint64_t F32TwoE32 = 0x4F800000ULL;
+ const uint64_t F32TwoE64 = 0x5F800000ULL;
+ const uint64_t F32TwoE128 = 0x7F800000ULL;
+
+ APInt FF(32, 0);
+ if (SrcVT == MVT::i32)
+ FF = APInt(32, F32TwoE32);
+ else if (SrcVT == MVT::i64)
+ FF = APInt(32, F32TwoE64);
+ else if (SrcVT == MVT::i128)
+ FF = APInt(32, F32TwoE128);
+ else
+ assert(false && "Unsupported UINT_TO_FP!");
+
+ // Check whether the sign bit is set.
+ SDValue Lo, Hi;
+ GetExpandedInteger(Op, Lo, Hi);
+ SDValue SignSet = DAG.getSetCC(dl,
+ TLI.getSetCCResultType(Hi.getValueType()),
+ Hi, DAG.getConstant(0, Hi.getValueType()),
+ ISD::SETLT);
+
+ // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
+ SDValue FudgePtr = DAG.getConstantPool(ConstantInt::get(FF.zext(64)),
+ TLI.getPointerTy());
+
+ // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
+ SDValue Zero = DAG.getIntPtrConstant(0);
+ SDValue Four = DAG.getIntPtrConstant(4);
+ if (TLI.isBigEndian()) std::swap(Zero, Four);
+ SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
+ Zero, Four);
+ unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment();
+ FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset);
+ Alignment = std::min(Alignment, 4u);
+
+ // Load the value out, extending it from f32 to the destination float type.
+ // FIXME: Avoid the extend by constructing the right constant pool?
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
+ FudgePtr, NULL, 0, MVT::f32,
+ false, Alignment);
+ return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
+ }
+
+ // Otherwise, use a libcall.
+ RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Don't know how to expand this UINT_TO_FP!");
+ return MakeLibCall(LC, DstVT, &Op, 1, true, dl);
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
new file mode 100644
index 0000000..00d71e1
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -0,0 +1,1074 @@
+//===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeTypes method. It transforms
+// an arbitrary well-formed SelectionDAG to only consist of legal types. This
+// is common code shared among the LegalizeTypes*.cpp files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/CallingConv.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden);
+
+/// PerformExpensiveChecks - Do extensive, expensive, sanity checking.
+void DAGTypeLegalizer::PerformExpensiveChecks() {
+ // If a node is not processed, then none of its values should be mapped by any
+ // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+
+ // If a node is processed, then each value with an illegal type must be mapped
+ // by exactly one of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+ // Values with a legal type may be mapped by ReplacedValues, but not by any of
+ // the other maps.
+
+ // Note that these invariants may not hold momentarily when processing a node:
+ // the node being processed may be put in a map before being marked Processed.
+
+ // Note that it is possible to have nodes marked NewNode in the DAG. This can
+ // occur in two ways. Firstly, a node may be created during legalization but
+ // never passed to the legalization core. This is usually due to the implicit
+ // folding that occurs when using the DAG.getNode operators. Secondly, a new
+ // node may be passed to the legalization core, but when analyzed may morph
+ // into a different node, leaving the original node as a NewNode in the DAG.
+ // A node may morph if one of its operands changes during analysis. Whether
+ // it actually morphs or not depends on whether, after updating its operands,
+ // it is equivalent to an existing node: if so, it morphs into that existing
+ // node (CSE). An operand can change during analysis if the operand is a new
+ // node that morphs, or it is a processed value that was mapped to some other
+ // value (as recorded in ReplacedValues) in which case the operand is turned
+ // into that other value. If a node morphs then the node it morphed into will
+ // be used instead of it for legalization, however the original node continues
+ // to live on in the DAG.
+ // The conclusion is that though there may be nodes marked NewNode in the DAG,
+ // all uses of such nodes are also marked NewNode: the result is a fungus of
+ // NewNodes growing on top of the useful nodes, and perhaps using them, but
+ // not used by them.
+
+ // If a value is mapped by ReplacedValues, then it must have no uses, except
+ // by nodes marked NewNode (see above).
+
+ // The final node obtained by mapping by ReplacedValues is not marked NewNode.
+ // Note that ReplacedValues should be applied iteratively.
+
+ // Note that the ReplacedValues map may also map deleted nodes. By iterating
+ // over the DAG we only consider non-deleted nodes.
+ SmallVector<SDNode*, 16> NewNodes;
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I) {
+ // Remember nodes marked NewNode - they are subject to extra checking below.
+ if (I->getNodeId() == NewNode)
+ NewNodes.push_back(I);
+
+ for (unsigned i = 0, e = I->getNumValues(); i != e; ++i) {
+ SDValue Res(I, i);
+ bool Failed = false;
+
+ unsigned Mapped = 0;
+ if (ReplacedValues.find(Res) != ReplacedValues.end()) {
+ Mapped |= 1;
+ // Check that remapped values are only used by nodes marked NewNode.
+ for (SDNode::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ if (UI.getUse().getResNo() == i)
+ assert(UI->getNodeId() == NewNode &&
+ "Remapped value has non-trivial use!");
+
+ // Check that the final result of applying ReplacedValues is not
+ // marked NewNode.
+ SDValue NewVal = ReplacedValues[Res];
+ DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(NewVal);
+ while (I != ReplacedValues.end()) {
+ NewVal = I->second;
+ I = ReplacedValues.find(NewVal);
+ }
+ assert(NewVal.getNode()->getNodeId() != NewNode &&
+ "ReplacedValues maps to a new node!");
+ }
+ if (PromotedIntegers.find(Res) != PromotedIntegers.end())
+ Mapped |= 2;
+ if (SoftenedFloats.find(Res) != SoftenedFloats.end())
+ Mapped |= 4;
+ if (ScalarizedVectors.find(Res) != ScalarizedVectors.end())
+ Mapped |= 8;
+ if (ExpandedIntegers.find(Res) != ExpandedIntegers.end())
+ Mapped |= 16;
+ if (ExpandedFloats.find(Res) != ExpandedFloats.end())
+ Mapped |= 32;
+ if (SplitVectors.find(Res) != SplitVectors.end())
+ Mapped |= 64;
+ if (WidenedVectors.find(Res) != WidenedVectors.end())
+ Mapped |= 128;
+
+ if (I->getNodeId() != Processed) {
+ if (Mapped != 0) {
+ cerr << "Unprocessed value in a map!";
+ Failed = true;
+ }
+ } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) {
+ if (Mapped > 1) {
+ cerr << "Value with legal type was transformed!";
+ Failed = true;
+ }
+ } else {
+ if (Mapped == 0) {
+ cerr << "Processed value not in any map!";
+ Failed = true;
+ } else if (Mapped & (Mapped - 1)) {
+ cerr << "Value in multiple maps!";
+ Failed = true;
+ }
+ }
+
+ if (Failed) {
+ if (Mapped & 1)
+ cerr << " ReplacedValues";
+ if (Mapped & 2)
+ cerr << " PromotedIntegers";
+ if (Mapped & 4)
+ cerr << " SoftenedFloats";
+ if (Mapped & 8)
+ cerr << " ScalarizedVectors";
+ if (Mapped & 16)
+ cerr << " ExpandedIntegers";
+ if (Mapped & 32)
+ cerr << " ExpandedFloats";
+ if (Mapped & 64)
+ cerr << " SplitVectors";
+ if (Mapped & 128)
+ cerr << " WidenedVectors";
+ cerr << "\n";
+ abort();
+ }
+ }
+ }
+
+ // Checked that NewNodes are only used by other NewNodes.
+ for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
+ SDNode *N = NewNodes[i];
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI)
+ assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!");
+ }
+}
+
+/// run - This is the main entry point for the type legalizer. This does a
+/// top-down traversal of the dag, legalizing types as it goes. Returns "true"
+/// if it made any changes.
+bool DAGTypeLegalizer::run() {
+ bool Changed = false;
+
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted, and tracking any
+ // changes of the root.
+ HandleSDNode Dummy(DAG.getRoot());
+ Dummy.setNodeId(Unanalyzed);
+
+ // The root of the dag may dangle to deleted nodes until the type legalizer is
+ // done. Set it to null to avoid confusion.
+ DAG.setRoot(SDValue());
+
+ // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess'
+ // (and remembering them) if they are leaves and assigning 'Unanalyzed' if
+ // non-leaves.
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I) {
+ if (I->getNumOperands() == 0) {
+ I->setNodeId(ReadyToProcess);
+ Worklist.push_back(I);
+ } else {
+ I->setNodeId(Unanalyzed);
+ }
+ }
+
+ // Now that we have a set of nodes to process, handle them all.
+ while (!Worklist.empty()) {
+#ifndef XDEBUG
+ if (EnableExpensiveChecks)
+#endif
+ PerformExpensiveChecks();
+
+ SDNode *N = Worklist.back();
+ Worklist.pop_back();
+ assert(N->getNodeId() == ReadyToProcess &&
+ "Node should be ready if on worklist!");
+
+ if (IgnoreNodeResults(N))
+ goto ScanOperands;
+
+ // Scan the values produced by the node, checking to see if any result
+ // types are illegal.
+ for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
+ MVT ResultVT = N->getValueType(i);
+ switch (getTypeAction(ResultVT)) {
+ default:
+ assert(false && "Unknown action!");
+ case Legal:
+ break;
+ // The following calls must take care of *all* of the node's results,
+ // not just the illegal result they were passed (this includes results
+ // with a legal type). Results can be remapped using ReplaceValueWith,
+ // or their promoted/expanded/etc values registered in PromotedIntegers,
+ // ExpandedIntegers etc.
+ case PromoteInteger:
+ PromoteIntegerResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case ExpandInteger:
+ ExpandIntegerResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case SoftenFloat:
+ SoftenFloatResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case ExpandFloat:
+ ExpandFloatResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case ScalarizeVector:
+ ScalarizeVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case SplitVector:
+ SplitVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case WidenVector:
+ WidenVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ }
+ }
+
+ScanOperands:
+ // Scan the operand list for the node, handling any nodes with operands that
+ // are illegal.
+ {
+ unsigned NumOperands = N->getNumOperands();
+ bool NeedsReanalyzing = false;
+ unsigned i;
+ for (i = 0; i != NumOperands; ++i) {
+ if (IgnoreNodeResults(N->getOperand(i).getNode()))
+ continue;
+
+ MVT OpVT = N->getOperand(i).getValueType();
+ switch (getTypeAction(OpVT)) {
+ default:
+ assert(false && "Unknown action!");
+ case Legal:
+ continue;
+ // The following calls must either replace all of the node's results
+ // using ReplaceValueWith, and return "false"; or update the node's
+ // operands in place, and return "true".
+ case PromoteInteger:
+ NeedsReanalyzing = PromoteIntegerOperand(N, i);
+ Changed = true;
+ break;
+ case ExpandInteger:
+ NeedsReanalyzing = ExpandIntegerOperand(N, i);
+ Changed = true;
+ break;
+ case SoftenFloat:
+ NeedsReanalyzing = SoftenFloatOperand(N, i);
+ Changed = true;
+ break;
+ case ExpandFloat:
+ NeedsReanalyzing = ExpandFloatOperand(N, i);
+ Changed = true;
+ break;
+ case ScalarizeVector:
+ NeedsReanalyzing = ScalarizeVectorOperand(N, i);
+ Changed = true;
+ break;
+ case SplitVector:
+ NeedsReanalyzing = SplitVectorOperand(N, i);
+ Changed = true;
+ break;
+ case WidenVector:
+ NeedsReanalyzing = WidenVectorOperand(N, i);
+ Changed = true;
+ break;
+ }
+ break;
+ }
+
+ // The sub-method updated N in place. Check to see if any operands are new,
+ // and if so, mark them. If the node needs revisiting, don't add all users
+ // to the worklist etc.
+ if (NeedsReanalyzing) {
+ assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+ N->setNodeId(NewNode);
+ // Recompute the NodeId and correct processed operands, adding the node to
+ // the worklist if ready.
+ SDNode *M = AnalyzeNewNode(N);
+ if (M == N)
+ // The node didn't morph - nothing special to do, it will be revisited.
+ continue;
+
+ // The node morphed - this is equivalent to legalizing by replacing every
+ // value of N with the corresponding value of M. So do that now. However
+ // there is no need to remember the replacement - morphing will make sure
+ // it is never used non-trivially.
+ assert(N->getNumValues() == M->getNumValues() &&
+ "Node morphing changed the number of results!");
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ // Replacing the value takes care of remapping the new value. Do the
+ // replacement without recording it in ReplacedValues. This does not
+ // expunge From but that is fine - it is not really a new node.
+ ReplaceValueWithHelper(SDValue(N, i), SDValue(M, i));
+ assert(N->getNodeId() == NewNode && "Unexpected node state!");
+ // The node continues to live on as part of the NewNode fungus that
+ // grows on top of the useful nodes. Nothing more needs to be done
+ // with it - move on to the next node.
+ continue;
+ }
+
+ if (i == NumOperands) {
+ DEBUG(cerr << "Legally typed node: "; N->dump(&DAG); cerr << "\n");
+ }
+ }
+NodeDone:
+
+ // If we reach here, the node was processed, potentially creating new nodes.
+ // Mark it as processed and add its users to the worklist as appropriate.
+ assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+ N->setNodeId(Processed);
+
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ int NodeId = User->getNodeId();
+
+ // This node has two options: it can either be a new node or its Node ID
+ // may be a count of the number of operands it has that are not ready.
+ if (NodeId > 0) {
+ User->setNodeId(NodeId-1);
+
+ // If this was the last use it was waiting on, add it to the ready list.
+ if (NodeId-1 == ReadyToProcess)
+ Worklist.push_back(User);
+ continue;
+ }
+
+ // If this is an unreachable new node, then ignore it. If it ever becomes
+ // reachable by being used by a newly created node then it will be handled
+ // by AnalyzeNewNode.
+ if (NodeId == NewNode)
+ continue;
+
+ // Otherwise, this node is new: this is the first operand of it that
+ // became ready. Its new NodeId is the number of operands it has minus 1
+ // (as this node is now processed).
+ assert(NodeId == Unanalyzed && "Unknown node ID!");
+ User->setNodeId(User->getNumOperands() - 1);
+
+ // If the node only has a single operand, it is now ready.
+ if (User->getNumOperands() == 1)
+ Worklist.push_back(User);
+ }
+ }
+
+#ifndef XDEBUG
+ if (EnableExpensiveChecks)
+#endif
+ PerformExpensiveChecks();
+
+ // If the root changed (e.g. it was a dead load) update the root.
+ DAG.setRoot(Dummy.getValue());
+
+ // Remove dead nodes. This is important to do for cleanliness but also before
+ // the checking loop below. Implicit folding by the DAG.getNode operators and
+ // node morphing can cause unreachable nodes to be around with their flags set
+ // to new.
+ DAG.RemoveDeadNodes();
+
+ // In a debug build, scan all the nodes to make sure we found them all. This
+ // ensures that there are no cycles and that everything got processed.
+#ifndef NDEBUG
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I) {
+ bool Failed = false;
+
+ // Check that all result types are legal.
+ if (!IgnoreNodeResults(I))
+ for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i)
+ if (!isTypeLegal(I->getValueType(i))) {
+ cerr << "Result type " << i << " illegal!\n";
+ Failed = true;
+ }
+
+ // Check that all operand types are legal.
+ for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i)
+ if (!IgnoreNodeResults(I->getOperand(i).getNode()) &&
+ !isTypeLegal(I->getOperand(i).getValueType())) {
+ cerr << "Operand type " << i << " illegal!\n";
+ Failed = true;
+ }
+
+ if (I->getNodeId() != Processed) {
+ if (I->getNodeId() == NewNode)
+ cerr << "New node not analyzed?\n";
+ else if (I->getNodeId() == Unanalyzed)
+ cerr << "Unanalyzed node not noticed?\n";
+ else if (I->getNodeId() > 0)
+ cerr << "Operand not processed?\n";
+ else if (I->getNodeId() == ReadyToProcess)
+ cerr << "Not added to worklist?\n";
+ Failed = true;
+ }
+
+ if (Failed) {
+ I->dump(&DAG); cerr << "\n";
+ abort();
+ }
+ }
+#endif
+
+ return Changed;
+}
+
+/// AnalyzeNewNode - The specified node is the root of a subtree of potentially
+/// new nodes. Correct any processed operands (this may change the node) and
+/// calculate the NodeId. If the node itself changes to a processed node, it
+/// is not remapped - the caller needs to take care of this.
+/// Returns the potentially changed node.
+SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
+ // If this was an existing node that is already done, we're done.
+ if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed)
+ return N;
+
+ // Remove any stale map entries.
+ ExpungeNode(N);
+
+ // Okay, we know that this node is new. Recursively walk all of its operands
+ // to see if they are new also. The depth of this walk is bounded by the size
+ // of the new tree that was constructed (usually 2-3 nodes), so we don't worry
+ // about revisiting of nodes.
+ //
+ // As we walk the operands, keep track of the number of nodes that are
+ // processed. If non-zero, this will become the new nodeid of this node.
+ // Operands may morph when they are analyzed. If so, the node will be
+ // updated after all operands have been analyzed. Since this is rare,
+ // the code tries to minimize overhead in the non-morphing case.
+
+ SmallVector<SDValue, 8> NewOps;
+ unsigned NumProcessed = 0;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue OrigOp = N->getOperand(i);
+ SDValue Op = OrigOp;
+
+ AnalyzeNewValue(Op); // Op may morph.
+
+ if (Op.getNode()->getNodeId() == Processed)
+ ++NumProcessed;
+
+ if (!NewOps.empty()) {
+ // Some previous operand changed. Add this one to the list.
+ NewOps.push_back(Op);
+ } else if (Op != OrigOp) {
+ // This is the first operand to change - add all operands so far.
+ for (unsigned j = 0; j < i; ++j)
+ NewOps.push_back(N->getOperand(j));
+ NewOps.push_back(Op);
+ }
+ }
+
+ // Some operands changed - update the node.
+ if (!NewOps.empty()) {
+ SDNode *M = DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0],
+ NewOps.size()).getNode();
+ if (M != N) {
+ // The node morphed into a different node. Normally for this to happen
+ // the original node would have to be marked NewNode. However this can
+ // in theory momentarily not be the case while ReplaceValueWith is doing
+ // its stuff. Mark the original node NewNode to help sanity checking.
+ N->setNodeId(NewNode);
+ if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed)
+ // It morphed into a previously analyzed node - nothing more to do.
+ return M;
+
+ // It morphed into a different new node. Do the equivalent of passing
+ // it to AnalyzeNewNode: expunge it and calculate the NodeId. No need
+ // to remap the operands, since they are the same as the operands we
+ // remapped above.
+ N = M;
+ ExpungeNode(N);
+ }
+ }
+
+ // Calculate the NodeId.
+ N->setNodeId(N->getNumOperands() - NumProcessed);
+ if (N->getNodeId() == ReadyToProcess)
+ Worklist.push_back(N);
+
+ return N;
+}
+
+/// AnalyzeNewValue - Call AnalyzeNewNode, updating the node in Val if needed.
+/// If the node changes to a processed node, then remap it.
+void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) {
+ Val.setNode(AnalyzeNewNode(Val.getNode()));
+ if (Val.getNode()->getNodeId() == Processed)
+ // We were passed a processed node, or it morphed into one - remap it.
+ RemapValue(Val);
+}
+
+/// ExpungeNode - If N has a bogus mapping in ReplacedValues, eliminate it.
+/// This can occur when a node is deleted then reallocated as a new node -
+/// the mapping in ReplacedValues applies to the deleted node, not the new
+/// one.
+/// The only map that can have a deleted node as a source is ReplacedValues.
+/// Other maps can have deleted nodes as targets, but since their looked-up
+/// values are always immediately remapped using RemapValue, resulting in a
+/// not-deleted node, this is harmless as long as ReplacedValues/RemapValue
+/// always performs correct mappings. In order to keep the mapping correct,
+/// ExpungeNode should be called on any new nodes *before* adding them as
+/// either source or target to ReplacedValues (which typically means calling
+/// Expunge when a new node is first seen, since it may no longer be marked
+/// NewNode by the time it is added to ReplacedValues).
+void DAGTypeLegalizer::ExpungeNode(SDNode *N) {
+ if (N->getNodeId() != NewNode)
+ return;
+
+ // If N is not remapped by ReplacedValues then there is nothing to do.
+ unsigned i, e;
+ for (i = 0, e = N->getNumValues(); i != e; ++i)
+ if (ReplacedValues.find(SDValue(N, i)) != ReplacedValues.end())
+ break;
+
+ if (i == e)
+ return;
+
+ // Remove N from all maps - this is expensive but rare.
+
+ for (DenseMap<SDValue, SDValue>::iterator I = PromotedIntegers.begin(),
+ E = PromotedIntegers.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = SoftenedFloats.begin(),
+ E = SoftenedFloats.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = ScalarizedVectors.begin(),
+ E = ScalarizedVectors.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = WidenedVectors.begin(),
+ E = WidenedVectors.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+ I = ExpandedIntegers.begin(), E = ExpandedIntegers.end(); I != E; ++I){
+ assert(I->first.getNode() != N);
+ RemapValue(I->second.first);
+ RemapValue(I->second.second);
+ }
+
+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+ I = ExpandedFloats.begin(), E = ExpandedFloats.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second.first);
+ RemapValue(I->second.second);
+ }
+
+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+ I = SplitVectors.begin(), E = SplitVectors.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second.first);
+ RemapValue(I->second.second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.begin(),
+ E = ReplacedValues.end(); I != E; ++I)
+ RemapValue(I->second);
+
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ ReplacedValues.erase(SDValue(N, i));
+}
+
+/// RemapValue - If the specified value was already legalized to another value,
+/// replace it by that value.
+void DAGTypeLegalizer::RemapValue(SDValue &N) {
+ DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N);
+ if (I != ReplacedValues.end()) {
+ // Use path compression to speed up future lookups if values get multiply
+ // replaced with other values.
+ RemapValue(I->second);
+ N = I->second;
+ assert(N.getNode()->getNodeId() != NewNode && "Mapped to new node!");
+ }
+}
+
+namespace {
+ /// NodeUpdateListener - This class is a DAGUpdateListener that listens for
+ /// updates to nodes and recomputes their ready state.
+ class VISIBILITY_HIDDEN NodeUpdateListener :
+ public SelectionDAG::DAGUpdateListener {
+ DAGTypeLegalizer &DTL;
+ SmallSetVector<SDNode*, 16> &NodesToAnalyze;
+ public:
+ explicit NodeUpdateListener(DAGTypeLegalizer &dtl,
+ SmallSetVector<SDNode*, 16> &nta)
+ : DTL(dtl), NodesToAnalyze(nta) {}
+
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+ N->getNodeId() != DAGTypeLegalizer::Processed &&
+ "Invalid node ID for RAUW deletion!");
+ // It is possible, though rare, for the deleted node N to occur as a
+ // target in a map, so note the replacement N -> E in ReplacedValues.
+ assert(E && "Node not replaced?");
+ DTL.NoteDeletion(N, E);
+
+ // In theory the deleted node could also have been scheduled for analysis.
+ // So remove it from the set of nodes which will be analyzed.
+ NodesToAnalyze.remove(N);
+
+ // In general nothing needs to be done for E, since it didn't change but
+ // only gained new uses. However N -> E was just added to ReplacedValues,
+ // and the result of a ReplacedValues mapping is not allowed to be marked
+ // NewNode. So if E is marked NewNode, then it needs to be analyzed.
+ if (E->getNodeId() == DAGTypeLegalizer::NewNode)
+ NodesToAnalyze.insert(E);
+ }
+
+ virtual void NodeUpdated(SDNode *N) {
+ // Node updates can mean pretty much anything. It is possible that an
+ // operand was set to something already processed (f.e.) in which case
+ // this node could become ready. Recompute its flags.
+ assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+ N->getNodeId() != DAGTypeLegalizer::Processed &&
+ "Invalid node ID for RAUW deletion!");
+ N->setNodeId(DAGTypeLegalizer::NewNode);
+ NodesToAnalyze.insert(N);
+ }
+ };
+}
+
+
+/// ReplaceValueWithHelper - Internal helper for ReplaceValueWith. Updates the
+/// DAG causing any uses of From to use To instead, but without expunging From
+/// or recording the replacement in ReplacedValues. Do not call directly unless
+/// you really know what you are doing!
+void DAGTypeLegalizer::ReplaceValueWithHelper(SDValue From, SDValue To) {
+ assert(From.getNode() != To.getNode() && "Potential legalization loop!");
+
+ // If expansion produced new nodes, make sure they are properly marked.
+ AnalyzeNewValue(To); // Expunges To.
+
+ // Anything that used the old node should now use the new one. Note that this
+ // can potentially cause recursive merging.
+ SmallSetVector<SDNode*, 16> NodesToAnalyze;
+ NodeUpdateListener NUL(*this, NodesToAnalyze);
+ DAG.ReplaceAllUsesOfValueWith(From, To, &NUL);
+
+ // Process the list of nodes that need to be reanalyzed.
+ while (!NodesToAnalyze.empty()) {
+ SDNode *N = NodesToAnalyze.back();
+ NodesToAnalyze.pop_back();
+ if (N->getNodeId() != DAGTypeLegalizer::NewNode)
+ // The node was analyzed while reanalyzing an earlier node - it is safe to
+ // skip. Note that this is not a morphing node - otherwise it would still
+ // be marked NewNode.
+ continue;
+
+ // Analyze the node's operands and recalculate the node ID.
+ SDNode *M = AnalyzeNewNode(N);
+ if (M != N) {
+ // The node morphed into a different node. Make everyone use the new node
+ // instead.
+ assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!");
+ assert(N->getNumValues() == M->getNumValues() &&
+ "Node morphing changed the number of results!");
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ SDValue OldVal(N, i);
+ SDValue NewVal(M, i);
+ if (M->getNodeId() == Processed)
+ RemapValue(NewVal);
+ DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL);
+ }
+ // The original node continues to exist in the DAG, marked NewNode.
+ }
+ }
+}
+
+/// ReplaceValueWith - The specified value was legalized to the specified other
+/// value. Update the DAG and NodeIds replacing any uses of From to use To
+/// instead.
+void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
+ assert(From.getNode()->getNodeId() == ReadyToProcess &&
+ "Only the node being processed may be remapped!");
+
+ // If expansion produced new nodes, make sure they are properly marked.
+ ExpungeNode(From.getNode());
+ AnalyzeNewValue(To); // Expunges To.
+
+ // The old node may still be present in a map like ExpandedIntegers or
+ // PromotedIntegers. Inform maps about the replacement.
+ ReplacedValues[From] = To;
+
+ // Do the replacement.
+ ReplaceValueWithHelper(From, To);
+}
+
+void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = PromotedIntegers[Op];
+ assert(OpEntry.getNode() == 0 && "Node is already promoted!");
+ OpEntry = Result;
+}
+
+void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = SoftenedFloats[Op];
+ assert(OpEntry.getNode() == 0 && "Node is already converted to integer!");
+ OpEntry = Result;
+}
+
+void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = ScalarizedVectors[Op];
+ assert(OpEntry.getNode() == 0 && "Node is already scalarized!");
+ OpEntry = Result;
+}
+
+void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
+ RemapValue(Entry.first);
+ RemapValue(Entry.second);
+ assert(Entry.first.getNode() && "Operand isn't expanded");
+ Lo = Entry.first;
+ Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Remember that this is the result of the node.
+ std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
+ assert(Entry.first.getNode() == 0 && "Node already expanded");
+ Entry.first = Lo;
+ Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
+ RemapValue(Entry.first);
+ RemapValue(Entry.second);
+ assert(Entry.first.getNode() && "Operand isn't expanded");
+ Lo = Entry.first;
+ Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Remember that this is the result of the node.
+ std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
+ assert(Entry.first.getNode() == 0 && "Node already expanded");
+ Entry.first = Lo;
+ Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
+ RemapValue(Entry.first);
+ RemapValue(Entry.second);
+ assert(Entry.first.getNode() && "Operand isn't split");
+ Lo = Entry.first;
+ Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Remember that this is the result of the node.
+ std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
+ assert(Entry.first.getNode() == 0 && "Node already split");
+ Entry.first = Lo;
+ Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = WidenedVectors[Op];
+ assert(OpEntry.getNode() == 0 && "Node already widened!");
+ OpEntry = Result;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Utilities.
+//===----------------------------------------------------------------------===//
+
+/// BitConvertToInteger - Convert to an integer of the same size.
+SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
+ unsigned BitWidth = Op.getValueType().getSizeInBits();
+ return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
+ MVT::getIntegerVT(BitWidth), Op);
+}
+
+/// BitConvertVectorToIntegerVector - Convert to a vector of integers of the
+/// same size.
+SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
+ assert(Op.getValueType().isVector() && "Only applies to vectors!");
+ unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();
+ MVT EltNVT = MVT::getIntegerVT(EltWidth);
+ unsigned NumElts = Op.getValueType().getVectorNumElements();
+ return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
+ MVT::getVectorVT(EltNVT, NumElts), Op);
+}
+
+SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
+ MVT DestVT) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Create the stack frame object. Make sure it is aligned for both
+ // the source and destination types.
+ SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
+ // Emit a store to the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0);
+ // Result is a load from the stack slot.
+ return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0);
+}
+
+/// CustomLowerNode - Replace the node's results with custom code provided
+/// by the target and return "true", or do nothing and return "false".
+/// The last parameter is FALSE if we are dealing with a node with legal
+/// result types and illegal operand. The second parameter denotes the type of
+/// illegal OperandNo in that case.
+/// The last parameter being TRUE means we are dealing with a
+/// node with illegal result types. The second parameter denotes the type of
+/// illegal ResNo in that case.
+bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult) {
+ // See if the target wants to custom lower this node.
+ if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+ return false;
+
+ SmallVector<SDValue, 8> Results;
+ if (LegalizeResult)
+ TLI.ReplaceNodeResults(N, Results, DAG);
+ else
+ TLI.LowerOperationWrapper(N, Results, DAG);
+
+ if (Results.empty())
+ // The target didn't want to custom lower it after all.
+ return false;
+
+ // Make everything that once used N's values now use those in Results instead.
+ assert(Results.size() == N->getNumValues() &&
+ "Custom lowering returned the wrong number of results!");
+ for (unsigned i = 0, e = Results.size(); i != e; ++i)
+ ReplaceValueWith(SDValue(N, i), Results[i]);
+ return true;
+}
+
+/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+/// which is split into two not necessarily identical pieces.
+void DAGTypeLegalizer::GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT) {
+ if (!InVT.isVector()) {
+ LoVT = HiVT = TLI.getTypeToTransformTo(InVT);
+ } else {
+ MVT NewEltVT = InVT.getVectorElementType();
+ unsigned NumElements = InVT.getVectorNumElements();
+ if ((NumElements & (NumElements-1)) == 0) { // Simple power of two vector.
+ NumElements >>= 1;
+ LoVT = HiVT = MVT::getVectorVT(NewEltVT, NumElements);
+ } else { // Non-power-of-two vectors.
+ unsigned NewNumElts_Lo = 1 << Log2_32(NumElements);
+ unsigned NewNumElts_Hi = NumElements - NewNumElts_Lo;
+ LoVT = MVT::getVectorVT(NewEltVT, NewNumElts_Lo);
+ HiVT = MVT::getVectorVT(NewEltVT, NewNumElts_Hi);
+ }
+ }
+}
+
+/// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
+/// high parts of the given value.
+void DAGTypeLegalizer::GetPairElements(SDValue Pair,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = Pair.getDebugLoc();
+ MVT NVT = TLI.getTypeToTransformTo(Pair.getValueType());
+ Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
+ DAG.getIntPtrConstant(1));
+}
+
+SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, MVT EltVT,
+ SDValue Index) {
+ DebugLoc dl = Index.getDebugLoc();
+ // Make sure the index type is big enough to compute in.
+ if (Index.getValueType().bitsGT(TLI.getPointerTy()))
+ Index = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Index);
+ else
+ Index = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Index);
+
+ // Calculate the element offset and add it to the pointer.
+ unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
+
+ Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
+ DAG.getConstant(EltSize, Index.getValueType()));
+ return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr);
+}
+
+/// JoinIntegers - Build an integer with low bits Lo and high bits Hi.
+SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
+ // Arbitrarily use dlHi for result DebugLoc
+ DebugLoc dlHi = Hi.getDebugLoc();
+ DebugLoc dlLo = Lo.getDebugLoc();
+ MVT LVT = Lo.getValueType();
+ MVT HVT = Hi.getValueType();
+ MVT NVT = MVT::getIntegerVT(LVT.getSizeInBits() + HVT.getSizeInBits());
+
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
+ Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi,
+ DAG.getConstant(LVT.getSizeInBits(), TLI.getPointerTy()));
+ return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);
+}
+
+/// LibCallify - Convert the node into a libcall with the same prototype.
+SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
+ bool isSigned) {
+ unsigned NumOps = N->getNumOperands();
+ DebugLoc dl = N->getDebugLoc();
+ if (NumOps == 0) {
+ return MakeLibCall(LC, N->getValueType(0), 0, 0, isSigned, dl);
+ } else if (NumOps == 1) {
+ SDValue Op = N->getOperand(0);
+ return MakeLibCall(LC, N->getValueType(0), &Op, 1, isSigned, dl);
+ } else if (NumOps == 2) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ return MakeLibCall(LC, N->getValueType(0), Ops, 2, isSigned, dl);
+ }
+ SmallVector<SDValue, 8> Ops(NumOps);
+ for (unsigned i = 0; i < NumOps; ++i)
+ Ops[i] = N->getOperand(i);
+
+ return MakeLibCall(LC, N->getValueType(0), &Ops[0], NumOps, isSigned, dl);
+}
+
+/// MakeLibCall - Generate a libcall taking the given operands as arguments and
+/// returning a result of type RetVT.
+SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, MVT RetVT,
+ const SDValue *Ops, unsigned NumOps,
+ bool isSigned, DebugLoc dl) {
+ TargetLowering::ArgListTy Args;
+ Args.reserve(NumOps);
+
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ Entry.Node = Ops[i];
+ Entry.Ty = Entry.Node.getValueType().getTypeForMVT();
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ const Type *RetTy = RetVT.getTypeForMVT();
+ std::pair<SDValue,SDValue> CallInfo =
+ TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+ false, CallingConv::C, false, Callee, Args, DAG, dl);
+ return CallInfo.first;
+}
+
+/// PromoteTargetBoolean - Promote the given target boolean to a target boolean
+/// of the given type. A target boolean is an integer value, not necessarily of
+/// type i1, the bits of which conform to getBooleanContents.
+SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, MVT VT) {
+ DebugLoc dl = Bool.getDebugLoc();
+ ISD::NodeType ExtendCode;
+ switch (TLI.getBooleanContents()) {
+ default:
+ assert(false && "Unknown BooleanContent!");
+ case TargetLowering::UndefinedBooleanContent:
+ // Extend to VT by adding rubbish bits.
+ ExtendCode = ISD::ANY_EXTEND;
+ break;
+ case TargetLowering::ZeroOrOneBooleanContent:
+ // Extend to VT by adding zero bits.
+ ExtendCode = ISD::ZERO_EXTEND;
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent: {
+ // Extend to VT by copying the sign bit.
+ ExtendCode = ISD::SIGN_EXTEND;
+ break;
+ }
+ }
+ return DAG.getNode(ExtendCode, dl, VT, Bool);
+}
+
+/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT
+/// bits in Hi.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+ MVT LoVT, MVT HiVT,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = Op.getDebugLoc();
+ assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() ==
+ Op.getValueType().getSizeInBits() && "Invalid integer splitting!");
+ Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op);
+ Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op,
+ DAG.getConstant(LoVT.getSizeInBits(), TLI.getPointerTy()));
+ Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
+}
+
+/// SplitInteger - Return the lower and upper halves of Op's bits in a value
+/// type half the size of Op's.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+ SDValue &Lo, SDValue &Hi) {
+ MVT HalfVT = MVT::getIntegerVT(Op.getValueType().getSizeInBits()/2);
+ SplitInteger(Op, HalfVT, HalfVT, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Entry Point
+//===----------------------------------------------------------------------===//
+
+/// LegalizeTypes - This transforms the SelectionDAG into a SelectionDAG that
+/// only uses types natively supported by the target. Returns "true" if it made
+/// any changes.
+///
+/// Note that this is an involved process that may invalidate pointers into
+/// the graph.
+bool SelectionDAG::LegalizeTypes() {
+ return DAGTypeLegalizer(*this).run();
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
new file mode 100644
index 0000000..75c8924
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -0,0 +1,736 @@
+//===-- LegalizeTypes.h - Definition of the DAG Type Legalizer class ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DAGTypeLegalizer class. This is a private interface
+// shared between the code that implements the SelectionDAG::LegalizeTypes
+// method.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SELECTIONDAG_LEGALIZETYPES_H
+#define SELECTIONDAG_LEGALIZETYPES_H
+
+#define DEBUG_TYPE "legalize-types"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+/// DAGTypeLegalizer - This takes an arbitrary SelectionDAG as input and hacks
+/// on it until only value types the target machine can handle are left. This
+/// involves promoting small sizes to large sizes or splitting up large values
+/// into small values.
+///
+class VISIBILITY_HIDDEN DAGTypeLegalizer {
+ TargetLowering &TLI;
+ SelectionDAG &DAG;
+public:
+ // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information
+ // about the state of the node. The enum has all the values.
+ enum NodeIdFlags {
+ /// ReadyToProcess - All operands have been processed, so this node is ready
+ /// to be handled.
+ ReadyToProcess = 0,
+
+ /// NewNode - This is a new node, not before seen, that was created in the
+ /// process of legalizing some other node.
+ NewNode = -1,
+
+ /// Unanalyzed - This node's ID needs to be set to the number of its
+ /// unprocessed operands.
+ Unanalyzed = -2,
+
+ /// Processed - This is a node that has already been processed.
+ Processed = -3
+
+ // 1+ - This is a node which has this many unprocessed operands.
+ };
+private:
+ enum LegalizeAction {
+ Legal, // The target natively supports this type.
+ PromoteInteger, // Replace this integer type with a larger one.
+ ExpandInteger, // Split this integer type into two of half the size.
+ SoftenFloat, // Convert this float type to a same size integer type.
+ ExpandFloat, // Split this float type into two of half the size.
+ ScalarizeVector, // Replace this one-element vector with its element type.
+ SplitVector, // This vector type should be split into smaller vectors.
+ WidenVector // This vector type should be widened into a larger vector.
+ };
+
+ /// ValueTypeActions - This is a bitvector that contains two bits for each
+ /// simple value type, where the two bits correspond to the LegalizeAction
+ /// enum from TargetLowering. This can be queried with "getTypeAction(VT)".
+ TargetLowering::ValueTypeActionImpl ValueTypeActions;
+
+ /// getTypeAction - Return how we should legalize values of this type.
+ LegalizeAction getTypeAction(MVT VT) const {
+ switch (ValueTypeActions.getTypeAction(VT)) {
+ default:
+ assert(false && "Unknown legalize action!");
+ case TargetLowering::Legal:
+ return Legal;
+ case TargetLowering::Promote:
+ // Promote can mean
+ // 1) For integers, use a larger integer type (e.g. i8 -> i32).
+ // 2) For vectors, use a wider vector type (e.g. v3i32 -> v4i32).
+ if (!VT.isVector())
+ return PromoteInteger;
+ else
+ return WidenVector;
+ case TargetLowering::Expand:
+ // Expand can mean
+ // 1) split scalar in half, 2) convert a float to an integer,
+ // 3) scalarize a single-element vector, 4) split a vector in two.
+ if (!VT.isVector()) {
+ if (VT.isInteger())
+ return ExpandInteger;
+ else if (VT.getSizeInBits() ==
+ TLI.getTypeToTransformTo(VT).getSizeInBits())
+ return SoftenFloat;
+ else
+ return ExpandFloat;
+ } else if (VT.getVectorNumElements() == 1) {
+ return ScalarizeVector;
+ } else {
+ return SplitVector;
+ }
+ }
+ }
+
+ /// isTypeLegal - Return true if this type is legal on this target.
+ bool isTypeLegal(MVT VT) const {
+ return ValueTypeActions.getTypeAction(VT) == TargetLowering::Legal;
+ }
+
+ /// IgnoreNodeResults - Pretend all of this node's results are legal.
+ bool IgnoreNodeResults(SDNode *N) const {
+ return N->getOpcode() == ISD::TargetConstant;
+ }
+
+ /// PromotedIntegers - For integer nodes that are below legal width, this map
+ /// indicates what promoted value to use.
+ DenseMap<SDValue, SDValue> PromotedIntegers;
+
+ /// ExpandedIntegers - For integer nodes that need to be expanded this map
+ /// indicates which operands are the expanded version of the input.
+ DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedIntegers;
+
+ /// SoftenedFloats - For floating point nodes converted to integers of
+ /// the same size, this map indicates the converted value to use.
+ DenseMap<SDValue, SDValue> SoftenedFloats;
+
+ /// ExpandedFloats - For float nodes that need to be expanded this map
+ /// indicates which operands are the expanded version of the input.
+ DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedFloats;
+
+ /// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the
+ /// scalar value of type 'ty' to use.
+ DenseMap<SDValue, SDValue> ScalarizedVectors;
+
+ /// SplitVectors - For nodes that need to be split this map indicates
+ /// which operands are the expanded version of the input.
+ DenseMap<SDValue, std::pair<SDValue, SDValue> > SplitVectors;
+
+ /// WidenedVectors - For vector nodes that need to be widened, indicates
+ /// the widened value to use.
+ DenseMap<SDValue, SDValue> WidenedVectors;
+
+ /// ReplacedValues - For values that have been replaced with another,
+ /// indicates the replacement value to use.
+ DenseMap<SDValue, SDValue> ReplacedValues;
+
+ /// Worklist - This defines a worklist of nodes to process. In order to be
+ /// pushed onto this worklist, all operands of a node must have already been
+ /// processed.
+ SmallVector<SDNode*, 128> Worklist;
+
+public:
+ explicit DAGTypeLegalizer(SelectionDAG &dag)
+ : TLI(dag.getTargetLoweringInfo()), DAG(dag),
+ ValueTypeActions(TLI.getValueTypeActions()) {
+ assert(MVT::LAST_VALUETYPE <= 32 &&
+ "Too many value types for ValueTypeActions to hold!");
+ }
+
+ /// run - This is the main entry point for the type legalizer. This does a
+ /// top-down traversal of the dag, legalizing types as it goes. Returns
+ /// "true" if it made any changes.
+ bool run();
+
+ void NoteDeletion(SDNode *Old, SDNode *New) {
+ ExpungeNode(Old);
+ ExpungeNode(New);
+ for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i)
+ ReplacedValues[SDValue(Old, i)] = SDValue(New, i);
+ }
+
+private:
+ SDNode *AnalyzeNewNode(SDNode *N);
+ void AnalyzeNewValue(SDValue &Val);
+ void ExpungeNode(SDNode *N);
+ void PerformExpensiveChecks();
+ void RemapValue(SDValue &N);
+
+ // Common routines.
+ SDValue BitConvertToInteger(SDValue Op);
+ SDValue BitConvertVectorToIntegerVector(SDValue Op);
+ SDValue CreateStackStoreLoad(SDValue Op, MVT DestVT);
+ bool CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult);
+ SDValue GetVectorElementPointer(SDValue VecPtr, MVT EltVT, SDValue Index);
+ SDValue JoinIntegers(SDValue Lo, SDValue Hi);
+ SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
+ SDValue MakeLibCall(RTLIB::Libcall LC, MVT RetVT,
+ const SDValue *Ops, unsigned NumOps, bool isSigned,
+ DebugLoc dl);
+ SDValue PromoteTargetBoolean(SDValue Bool, MVT VT);
+ void ReplaceValueWith(SDValue From, SDValue To);
+ void ReplaceValueWithHelper(SDValue From, SDValue To);
+ void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SplitInteger(SDValue Op, MVT LoVT, MVT HiVT,
+ SDValue &Lo, SDValue &Hi);
+
+ //===--------------------------------------------------------------------===//
+ // Integer Promotion Support: LegalizeIntegerTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetPromotedInteger - Given a processed operand Op which was promoted to a
+ /// larger integer type, this returns the promoted value. The low bits of the
+ /// promoted value corresponding to the original type are exactly equal to Op.
+ /// The extra bits contain rubbish, so the promoted value may need to be zero-
+ /// or sign-extended from the original type before it is usable (the helpers
+ /// SExtPromotedInteger and ZExtPromotedInteger can do this for you).
+ /// For example, if Op is an i16 and was promoted to an i32, then this method
+ /// returns an i32, the lower 16 bits of which coincide with Op, and the upper
+ /// 16 bits of which contain rubbish.
+ SDValue GetPromotedInteger(SDValue Op) {
+ SDValue &PromotedOp = PromotedIntegers[Op];
+ RemapValue(PromotedOp);
+ assert(PromotedOp.getNode() && "Operand wasn't promoted?");
+ return PromotedOp;
+ }
+ void SetPromotedInteger(SDValue Op, SDValue Result);
+
+ /// SExtPromotedInteger - Get a promoted operand and sign extend it to the
+ /// final size.
+ SDValue SExtPromotedInteger(SDValue Op) {
+ MVT OldVT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ Op = GetPromotedInteger(Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op,
+ DAG.getValueType(OldVT));
+ }
+
+ /// ZExtPromotedInteger - Get a promoted operand and zero extend it to the
+ /// final size.
+ SDValue ZExtPromotedInteger(SDValue Op) {
+ MVT OldVT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ Op = GetPromotedInteger(Op);
+ return DAG.getZeroExtendInReg(Op, dl, OldVT);
+ }
+
+ // Integer Result Promotion.
+ void PromoteIntegerResult(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_AssertSext(SDNode *N);
+ SDValue PromoteIntRes_AssertZext(SDNode *N);
+ SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
+ SDValue PromoteIntRes_Atomic2(AtomicSDNode *N);
+ SDValue PromoteIntRes_BIT_CONVERT(SDNode *N);
+ SDValue PromoteIntRes_BSWAP(SDNode *N);
+ SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
+ SDValue PromoteIntRes_Constant(SDNode *N);
+ SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N);
+ SDValue PromoteIntRes_CTLZ(SDNode *N);
+ SDValue PromoteIntRes_CTPOP(SDNode *N);
+ SDValue PromoteIntRes_CTTZ(SDNode *N);
+ SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
+ SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
+ SDValue PromoteIntRes_LOAD(LoadSDNode *N);
+ SDValue PromoteIntRes_Overflow(SDNode *N);
+ SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_SDIV(SDNode *N);
+ SDValue PromoteIntRes_SELECT(SDNode *N);
+ SDValue PromoteIntRes_SELECT_CC(SDNode *N);
+ SDValue PromoteIntRes_SETCC(SDNode *N);
+ SDValue PromoteIntRes_SHL(SDNode *N);
+ SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
+ SDValue PromoteIntRes_SRA(SDNode *N);
+ SDValue PromoteIntRes_SRL(SDNode *N);
+ SDValue PromoteIntRes_TRUNCATE(SDNode *N);
+ SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_UDIV(SDNode *N);
+ SDValue PromoteIntRes_UNDEF(SDNode *N);
+ SDValue PromoteIntRes_VAARG(SDNode *N);
+ SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
+
+ // Integer Operand Promotion.
+ bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);
+ SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_BIT_CONVERT(SDNode *N);
+ SDValue PromoteIntOp_BUILD_PAIR(SDNode *N);
+ SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N);
+ SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_MEMBARRIER(SDNode *N);
+ SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_Shift(SDNode *N);
+ SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_TRUNCATE(SDNode *N);
+ SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
+
+ void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
+
+ //===--------------------------------------------------------------------===//
+ // Integer Expansion Support: LegalizeIntegerTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetExpandedInteger - Given a processed operand Op which was expanded into
+ /// two integers of half the size, this returns the two halves. The low bits
+ /// of Op are exactly equal to the bits of Lo; the high bits exactly equal Hi.
+ /// For example, if Op is an i64 which was expanded into two i32's, then this
+ /// method returns the two i32's, with Lo being equal to the lower 32 bits of
+ /// Op, and Hi being equal to the upper 32 bits.
+ void GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Integer Result Expansion.
+ void ExpandIntegerResult(SDNode *N, unsigned ResNo);
+ void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandShiftByConstant(SDNode *N, unsigned Amt,
+ SDValue &Lo, SDValue &Hi);
+ bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+ bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Integer Operand Expansion.
+ bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
+ SDValue ExpandIntOp_BIT_CONVERT(SDNode *N);
+ SDValue ExpandIntOp_BR_CC(SDNode *N);
+ SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N);
+ SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N);
+ SDValue ExpandIntOp_SELECT_CC(SDNode *N);
+ SDValue ExpandIntOp_SETCC(SDNode *N);
+ SDValue ExpandIntOp_Shift(SDNode *N);
+ SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
+ SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue ExpandIntOp_TRUNCATE(SDNode *N);
+ SDValue ExpandIntOp_UINT_TO_FP(SDNode *N);
+
+ void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, DebugLoc dl);
+
+ //===--------------------------------------------------------------------===//
+ // Float to Integer Conversion Support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetSoftenedFloat - Given a processed operand Op which was converted to an
+ /// integer of the same size, this returns the integer. The integer contains
+ /// exactly the same bits as Op - only the type changed. For example, if Op
+ /// is an f32 which was softened to an i32, then this method returns an i32,
+ /// the bits of which coincide with those of Op.
+ SDValue GetSoftenedFloat(SDValue Op) {
+ SDValue &SoftenedOp = SoftenedFloats[Op];
+ RemapValue(SoftenedOp);
+ assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?");
+ return SoftenedOp;
+ }
+ void SetSoftenedFloat(SDValue Op, SDValue Result);
+
+ // Result Float to Integer Conversion.
+ void SoftenFloatResult(SDNode *N, unsigned OpNo);
+ SDValue SoftenFloatRes_BIT_CONVERT(SDNode *N);
+ SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
+ SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);
+ SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SoftenFloatRes_FABS(SDNode *N);
+ SDValue SoftenFloatRes_FADD(SDNode *N);
+ SDValue SoftenFloatRes_FCEIL(SDNode *N);
+ SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
+ SDValue SoftenFloatRes_FCOS(SDNode *N);
+ SDValue SoftenFloatRes_FDIV(SDNode *N);
+ SDValue SoftenFloatRes_FEXP(SDNode *N);
+ SDValue SoftenFloatRes_FEXP2(SDNode *N);
+ SDValue SoftenFloatRes_FFLOOR(SDNode *N);
+ SDValue SoftenFloatRes_FLOG(SDNode *N);
+ SDValue SoftenFloatRes_FLOG2(SDNode *N);
+ SDValue SoftenFloatRes_FLOG10(SDNode *N);
+ SDValue SoftenFloatRes_FMUL(SDNode *N);
+ SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
+ SDValue SoftenFloatRes_FNEG(SDNode *N);
+ SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
+ SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
+ SDValue SoftenFloatRes_FPOW(SDNode *N);
+ SDValue SoftenFloatRes_FPOWI(SDNode *N);
+ SDValue SoftenFloatRes_FREM(SDNode *N);
+ SDValue SoftenFloatRes_FRINT(SDNode *N);
+ SDValue SoftenFloatRes_FSIN(SDNode *N);
+ SDValue SoftenFloatRes_FSQRT(SDNode *N);
+ SDValue SoftenFloatRes_FSUB(SDNode *N);
+ SDValue SoftenFloatRes_FTRUNC(SDNode *N);
+ SDValue SoftenFloatRes_LOAD(SDNode *N);
+ SDValue SoftenFloatRes_SELECT(SDNode *N);
+ SDValue SoftenFloatRes_SELECT_CC(SDNode *N);
+ SDValue SoftenFloatRes_UNDEF(SDNode *N);
+ SDValue SoftenFloatRes_VAARG(SDNode *N);
+ SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
+
+ // Operand Float to Integer Conversion.
+ bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
+ SDValue SoftenFloatOp_BIT_CONVERT(SDNode *N);
+ SDValue SoftenFloatOp_BR_CC(SDNode *N);
+ SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
+ SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
+ SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N);
+ SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
+ SDValue SoftenFloatOp_SETCC(SDNode *N);
+ SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
+
+ void SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, DebugLoc dl);
+
+ //===--------------------------------------------------------------------===//
+ // Float Expansion Support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetExpandedFloat - Given a processed operand Op which was expanded into
+ /// two floating point values of half the size, this returns the two halves.
+ /// The low bits of Op are exactly equal to the bits of Lo; the high bits
+ /// exactly equal Hi. For example, if Op is a ppcf128 which was expanded
+ /// into two f64's, then this method returns the two f64's, with Lo being
+ /// equal to the lower 64 bits of Op, and Hi to the upper 64 bits.
+ void GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Float Result Expansion.
+ void ExpandFloatResult(SDNode *N, unsigned ResNo);
+ void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Float Operand Expansion.
+ bool ExpandFloatOperand(SDNode *N, unsigned OperandNo);
+ SDValue ExpandFloatOp_BR_CC(SDNode *N);
+ SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
+ SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N);
+ SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N);
+ SDValue ExpandFloatOp_SELECT_CC(SDNode *N);
+ SDValue ExpandFloatOp_SETCC(SDNode *N);
+ SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo);
+
+ void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, DebugLoc dl);
+
+ //===--------------------------------------------------------------------===//
+ // Scalarization Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetScalarizedVector - Given a processed one-element vector Op which was
+ /// scalarized to its element type, this returns the element. For example,
+ /// if Op is a v1i32, Op = < i32 val >, this method returns val, an i32.
+ SDValue GetScalarizedVector(SDValue Op) {
+ SDValue &ScalarizedOp = ScalarizedVectors[Op];
+ RemapValue(ScalarizedOp);
+ assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?");
+ return ScalarizedOp;
+ }
+ void SetScalarizedVector(SDValue Op, SDValue Result);
+
+ // Vector Result Scalarization: <1 x ty> -> ty.
+ void ScalarizeVectorResult(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecRes_BinOp(SDNode *N);
+ SDValue ScalarizeVecRes_ShiftOp(SDNode *N);
+ SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
+
+ SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N);
+ SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
+ SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_FPOWI(SDNode *N);
+ SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+ SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_SELECT(SDNode *N);
+ SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
+ SDValue ScalarizeVecRes_UNDEF(SDNode *N);
+ SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
+ SDValue ScalarizeVecRes_VSETCC(SDNode *N);
+
+ // Vector Operand Scalarization: <1 x ty> -> ty.
+ bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecOp_BIT_CONVERT(SDNode *N);
+ SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Splitting Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetSplitVector - Given a processed vector Op which was split into smaller
+ /// vectors, this method returns the smaller vectors. The first elements of
+ /// Op coincide with the elements of Lo; the remaining elements of Op coincide
+ /// with the elements of Hi: Op is what you would get by concatenating Lo and
+ /// Hi. For example, if Op is a v8i32 that was split into two v4i32's, then
+ /// this method returns the two v4i32's, with Lo corresponding to the first 4
+ /// elements of Op, and Hi to the last 4 elements.
+ void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
+ void SplitVectorResult(SDNode *N, unsigned OpNo);
+ void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
+ SDValue &Hi);
+ void SplitVecRes_VSETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
+ bool SplitVectorOperand(SDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_UnaryOp(SDNode *N);
+
+ SDValue SplitVecOp_BIT_CONVERT(SDNode *N);
+ SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Widening Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetWidenedVector - Given a processed vector Op which was widened into a
+ /// larger vector, this method returns the larger vector. The elements of
+ /// the returned vector consist of the elements of Op followed by elements
+ /// containing rubbish. For example, if Op is a v2i32 that was widened to a
+ /// v4i32, then this method returns a v4i32 for which the first two elements
+ /// are the same as those of Op, while the last two elements contain rubbish.
+ SDValue GetWidenedVector(SDValue Op) {
+ SDValue &WidenedOp = WidenedVectors[Op];
+ RemapValue(WidenedOp);
+ assert(WidenedOp.getNode() && "Operand wasn't widened?");
+ return WidenedOp;
+ }
+ void SetWidenedVector(SDValue Op, SDValue Result);
+
+ // Widen Vector Result Promotion.
+ void WidenVectorResult(SDNode *N, unsigned ResNo);
+ SDValue WidenVecRes_BIT_CONVERT(SDNode* N);
+ SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
+ SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
+ SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N);
+ SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
+ SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+ SDValue WidenVecRes_LOAD(SDNode* N);
+ SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
+ SDValue WidenVecRes_SELECT(SDNode* N);
+ SDValue WidenVecRes_SELECT_CC(SDNode* N);
+ SDValue WidenVecRes_UNDEF(SDNode *N);
+ SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
+ SDValue WidenVecRes_VSETCC(SDNode* N);
+
+ SDValue WidenVecRes_Binary(SDNode *N);
+ SDValue WidenVecRes_Convert(SDNode *N);
+ SDValue WidenVecRes_Shift(SDNode *N);
+ SDValue WidenVecRes_Unary(SDNode *N);
+
+ // Widen Vector Operand.
+ bool WidenVectorOperand(SDNode *N, unsigned ResNo);
+ SDValue WidenVecOp_BIT_CONVERT(SDNode *N);
+ SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue WidenVecOp_STORE(SDNode* N);
+
+ SDValue WidenVecOp_Convert(SDNode *N);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Widening Utilities Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Helper genWidenVectorLoads - Helper function to generate a set of
+ /// loads to load a vector with a resulting wider type. It takes
+ /// ExtType: Extension type
+ /// LdChain: list of chains for the load we have generated.
+ /// Chain: incoming chain for the ld vector.
+ /// BasePtr: base pointer to load from.
+ /// SV: memory disambiguation source value.
+ /// SVOffset: memory disambiugation offset.
+ /// Alignment: alignment of the memory.
+ /// isVolatile: volatile load.
+ /// LdWidth: width of memory that we want to load.
+ /// ResType: the wider result result type for the resulting vector.
+ /// dl: DebugLoc to be applied to new nodes
+ SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, SDValue Chain,
+ SDValue BasePtr, const Value *SV,
+ int SVOffset, unsigned Alignment,
+ bool isVolatile, unsigned LdWidth,
+ MVT ResType, DebugLoc dl);
+
+ /// Helper genWidenVectorStores - Helper function to generate a set of
+ /// stores to store a widen vector into non widen memory
+ /// It takes
+ /// StChain: list of chains for the stores we have generated
+ /// Chain: incoming chain for the ld vector
+ /// BasePtr: base pointer to load from
+ /// SV: memory disambiguation source value
+ /// SVOffset: memory disambiugation offset
+ /// Alignment: alignment of the memory
+ /// isVolatile: volatile lod
+ /// ValOp: value to store
+ /// StWidth: width of memory that we want to store
+ /// dl: DebugLoc to be applied to new nodes
+ void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, SDValue Chain,
+ SDValue BasePtr, const Value *SV,
+ int SVOffset, unsigned Alignment,
+ bool isVolatile, SDValue ValOp,
+ unsigned StWidth, DebugLoc dl);
+
+ /// Modifies a vector input (widen or narrows) to a vector of NVT. The
+ /// input vector must have the same element type as NVT.
+ SDValue ModifyToType(SDValue InOp, MVT WidenVT);
+
+
+ //===--------------------------------------------------------------------===//
+ // Generic Splitting: LegalizeTypesGeneric.cpp
+ //===--------------------------------------------------------------------===//
+
+ // Legalization methods which only use that the illegal type is split into two
+ // not necessarily identical types. As such they can be used for splitting
+ // vectors and expanding integers and floats.
+
+ void GetSplitOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+ if (Op.getValueType().isVector())
+ GetSplitVector(Op, Lo, Hi);
+ else if (Op.getValueType().isInteger())
+ GetExpandedInteger(Op, Lo, Hi);
+ else
+ GetExpandedFloat(Op, Lo, Hi);
+ }
+
+ /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+ /// which is split (or expanded) into two not necessarily identical pieces.
+ void GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT);
+
+ /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
+ /// high parts of the given value.
+ void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi);
+
+ // Generic Result Splitting.
+ void SplitRes_MERGE_VALUES(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ //===--------------------------------------------------------------------===//
+ // Generic Expansion: LegalizeTypesGeneric.cpp
+ //===--------------------------------------------------------------------===//
+
+ // Legalization methods which only use that the illegal type is split into two
+ // identical types of half the size, and that the Lo/Hi part is stored first
+ // in memory on little/big-endian machines, followed by the Hi/Lo part. As
+ // such they can be used for expanding integers and floats.
+
+ void GetExpandedOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+ if (Op.getValueType().isInteger())
+ GetExpandedInteger(Op, Lo, Hi);
+ else
+ GetExpandedFloat(Op, Lo, Hi);
+ }
+
+ // Generic Result Expansion.
+ void ExpandRes_BIT_CONVERT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_NormalLoad (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Generic Operand Expansion.
+ SDValue ExpandOp_BIT_CONVERT (SDNode *N);
+ SDValue ExpandOp_BUILD_VECTOR (SDNode *N);
+ SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N);
+ SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N);
+ SDValue ExpandOp_NormalStore (SDNode *N, unsigned OpNo);
+};
+
+} // end namespace llvm.
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
new file mode 100644
index 0000000..e8ff3fc
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -0,0 +1,453 @@
+//===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements generic type expansion and splitting for LegalizeTypes.
+// The routines here perform legalization when the details of the type (such as
+// whether it is an integer or a float) do not matter.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type.
+// Splitting is the act of changing a computation in an illegal type to be a
+// computation in two not necessarily identical registers of a smaller type.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Generic Result Expansion.
+//===----------------------------------------------------------------------===//
+
+// These routines assume that the Lo/Hi part is stored first in memory on
+// little/big-endian machines, followed by the Hi/Lo part. This means that
+// they cannot be used as is on vectors, for which Lo is always stored first.
+
+void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ MVT OutVT = N->getValueType(0);
+ MVT NOutVT = TLI.getTypeToTransformTo(OutVT);
+ SDValue InOp = N->getOperand(0);
+ MVT InVT = InOp.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Handle some special cases efficiently.
+ switch (getTypeAction(InVT)) {
+ default:
+ assert(false && "Unknown type action!");
+ case Legal:
+ case PromoteInteger:
+ break;
+ case SoftenFloat:
+ // Convert the integer operand instead.
+ SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ return;
+ case ExpandInteger:
+ case ExpandFloat:
+ // Convert the expanded pieces of the input.
+ GetExpandedOp(InOp, Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ return;
+ case SplitVector:
+ // Convert the split parts of the input if it was split in two.
+ GetSplitVector(InOp, Lo, Hi);
+ if (Lo.getValueType() == Hi.getValueType()) {
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ return;
+ }
+ break;
+ case ScalarizeVector:
+ // Convert the element instead.
+ SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ return;
+ case WidenVector: {
+ assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT");
+ InOp = GetWidenedVector(InOp);
+ MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),
+ InVT.getVectorNumElements()/2);
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ return;
+ }
+ }
+
+ // Lower the bit-convert to a store/load from the stack.
+ assert(NOutVT.isByteSized() && "Expanded type not byte sized!");
+
+ // Create the stack frame object. Make sure it is aligned for both
+ // the source and expanded destination types.
+ unsigned Alignment =
+ TLI.getTargetData()->getPrefTypeAlignment(NOutVT.getTypeForMVT());
+ SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+
+ // Emit a store to the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0);
+
+ // Load the first half from the stack slot.
+ Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getIntPtrConstant(IncrementSize));
+
+ // Load the second half from the stack slot.
+ Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false,
+ MinAlign(Alignment, IncrementSize));
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Return the operands.
+ Lo = N->getOperand(0);
+ Hi = N->getOperand(1);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ GetExpandedOp(N->getOperand(0), Lo, Hi);
+ SDValue Part = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ?
+ Hi : Lo;
+
+ assert(Part.getValueType() == N->getValueType(0) &&
+ "Type twice as big as expanded type not itself expanded!");
+
+ GetPairElements(Part, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue OldVec = N->getOperand(0);
+ unsigned OldElts = OldVec.getValueType().getVectorNumElements();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Convert to a vector of the expanded element type, for example
+ // <3 x i64> -> <6 x i32>.
+ MVT OldVT = N->getValueType(0);
+ MVT NewVT = TLI.getTypeToTransformTo(OldVT);
+
+ SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::getVectorVT(NewVT, 2*OldElts),
+ OldVec);
+
+ // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector.
+ SDValue Idx = N->getOperand(1);
+
+ // Make sure the type of Idx is big enough to hold the new values.
+ if (Idx.getValueType().bitsLT(TLI.getPointerTy()))
+ Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+ Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(1, Idx.getValueType()));
+ Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(ISD::isNormalLoad(N) && "This routine only for normal loads!");
+ DebugLoc dl = N->getDebugLoc();
+
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ MVT NVT = TLI.getTypeToTransformTo(LD->getValueType(0));
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ int SVOffset = LD->getSrcValueOffset();
+ unsigned Alignment = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset,
+ isVolatile, Alignment);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits() / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(),
+ SVOffset+IncrementSize,
+ isVolatile, MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2));
+ Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2));
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Operand Expansion.
+//===--------------------------------------------------------------------===//
+
+SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getValueType(0).isVector()) {
+ // An illegal expanding type is being converted to a legal vector type.
+ // Make a two element vector out of the expanded parts and convert that
+ // instead, but only if the new vector type is legal (otherwise there
+ // is no point, and it might create expansion loops). For example, on
+ // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32.
+ MVT OVT = N->getOperand(0).getValueType();
+ MVT NVT = MVT::getVectorVT(TLI.getTypeToTransformTo(OVT), 2);
+
+ if (isTypeLegal(NVT)) {
+ SDValue Parts[2];
+ GetExpandedOp(N->getOperand(0), Parts[0], Parts[1]);
+
+ if (TLI.isBigEndian())
+ std::swap(Parts[0], Parts[1]);
+
+ SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, N->getValueType(0), Vec);
+ }
+ }
+
+ // Otherwise, store to a temporary and load out again as the new type.
+ return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
+ // The vector type is legal but the element type needs expansion.
+ MVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ MVT OldVT = N->getOperand(0).getValueType();
+ MVT NewVT = TLI.getTypeToTransformTo(OldVT);
+ DebugLoc dl = N->getDebugLoc();
+
+ assert(OldVT == VecVT.getVectorElementType() &&
+ "BUILD_VECTOR operand type doesn't match vector element type!");
+
+ // Build a vector of twice the length out of the expanded elements.
+ // For example <3 x i64> -> <6 x i32>.
+ std::vector<SDValue> NewElts;
+ NewElts.reserve(NumElts*2);
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Lo, Hi;
+ GetExpandedOp(N->getOperand(i), Lo, Hi);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ NewElts.push_back(Lo);
+ NewElts.push_back(Hi);
+ }
+
+ SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::getVectorVT(NewVT, NewElts.size()),
+ &NewElts[0], NewElts.size());
+
+ // Convert the new vector to the old vector type.
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
+ SDValue Lo, Hi;
+ GetExpandedOp(N->getOperand(0), Lo, Hi);
+ return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? Hi : Lo;
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
+ // The vector type is legal but the element type needs expansion.
+ MVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue Val = N->getOperand(1);
+ MVT OldEVT = Val.getValueType();
+ MVT NewEVT = TLI.getTypeToTransformTo(OldEVT);
+
+ assert(OldEVT == VecVT.getVectorElementType() &&
+ "Inserted element type doesn't match vector element type!");
+
+ // Bitconvert to a vector of twice the length with elements of the expanded
+ // type, insert the expanded vector elements, and then convert back.
+ MVT NewVecVT = MVT::getVectorVT(NewEVT, NumElts*2);
+ SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
+ NewVecVT, N->getOperand(0));
+
+ SDValue Lo, Hi;
+ GetExpandedOp(Val, Lo, Hi);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ SDValue Idx = N->getOperand(2);
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx);
+ Idx = DAG.getNode(ISD::ADD, dl,
+ Idx.getValueType(), Idx, DAG.getIntPtrConstant(1));
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);
+
+ // Convert the new vector to the old vector type.
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ MVT VT = N->getValueType(0);
+ assert(VT.getVectorElementType() == N->getOperand(0).getValueType() &&
+ "SCALAR_TO_VECTOR operand type doesn't match vector element type!");
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(NumElts);
+ Ops[0] = N->getOperand(0);
+ SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType());
+ for (unsigned i = 1; i < NumElts; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
+ assert(ISD::isNormalStore(N) && "This routine only for normal stores!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+ DebugLoc dl = N->getDebugLoc();
+
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ MVT NVT = TLI.getTypeToTransformTo(St->getValue().getValueType());
+ SDValue Chain = St->getChain();
+ SDValue Ptr = St->getBasePtr();
+ int SVOffset = St->getSrcValueOffset();
+ unsigned Alignment = St->getAlignment();
+ bool isVolatile = St->isVolatile();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ unsigned IncrementSize = NVT.getSizeInBits() / 8;
+
+ SDValue Lo, Hi;
+ GetExpandedOp(St->getValue(), Lo, Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset,
+ isVolatile, Alignment);
+
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!");
+ Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(),
+ SVOffset + IncrementSize,
+ isVolatile, MinAlign(Alignment, IncrementSize));
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Result Splitting.
+//===--------------------------------------------------------------------===//
+
+// Be careful to make no assumptions about which of Lo/Hi is stored first in
+// memory (for vectors it is always Lo first followed by Hi in the following
+// bytes; for integers and floats it is Lo first if and only if the machine is
+// little-endian).
+
+void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // A MERGE_VALUES node can produce any number of values. We know that the
+ // first illegal one needs to be expanded into Lo/Hi.
+ unsigned i;
+
+ // The string of legal results gets turned into input operands, which have
+ // the same type.
+ for (i = 0; isTypeLegal(N->getValueType(i)); ++i)
+ ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
+
+ // The first illegal result must be the one that needs to be expanded.
+ GetSplitOp(N->getOperand(i), Lo, Hi);
+
+ // Legalize the rest of the results into the input operands whether they are
+ // legal or not.
+ unsigned e = N->getNumValues();
+ for (++i; i != e; ++i)
+ ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LL, LH, RL, RH;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitOp(N->getOperand(1), LL, LH);
+ GetSplitOp(N->getOperand(2), RL, RH);
+
+ SDValue Cond = N->getOperand(0);
+ Lo = DAG.getNode(ISD::SELECT, dl, LL.getValueType(), Cond, LL, RL);
+ Hi = DAG.getNode(ISD::SELECT, dl, LH.getValueType(), Cond, LH, RH);
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LL, LH, RL, RH;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitOp(N->getOperand(2), LL, LH);
+ GetSplitOp(N->getOperand(3), RL, RH);
+
+ Lo = DAG.getNode(ISD::SELECT_CC, dl, LL.getValueType(), N->getOperand(0),
+ N->getOperand(1), LL, RL, N->getOperand(4));
+ Hi = DAG.getNode(ISD::SELECT_CC, dl, LH.getValueType(), N->getOperand(0),
+ N->getOperand(1), LH, RH, N->getOperand(4));
+}
+
+void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ MVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ Lo = DAG.getUNDEF(LoVT);
+ Hi = DAG.getUNDEF(HiVT);
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
new file mode 100644
index 0000000..df9af21
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -0,0 +1,335 @@
+//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeVectors method.
+//
+// The vector legalizer looks for vector operations which might need to be
+// scalarized and legalizes them. This is a separate step from Legalize because
+// scalarizing can introduce illegal types. For example, suppose we have an
+// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
+// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
+// operation, which introduces nodes with the illegal type i64 which must be
+// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
+// the operation must be unrolled, which introduces nodes with the illegal
+// type i8 which must be promoted.
+//
+// This does not legalize vector manipulations like ISD::BUILD_VECTOR,
+// or operations that happen to take a vector which are custom-lowered like
+// ISD::CALL; the legalization for such operations never produces nodes
+// with illegal types, so it's okay to put off legalizing them until
+// SelectionDAG::Legalize runs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+namespace {
+class VectorLegalizer {
+ SelectionDAG& DAG;
+ TargetLowering& TLI;
+ bool Changed; // Keep track of whether anything changed
+
+ /// LegalizedNodes - For nodes that are of legal width, and that have more
+ /// than one use, this map indicates what regularized operand to use. This
+ /// allows us to avoid legalizing the same thing more than once.
+ DenseMap<SDValue, SDValue> LegalizedNodes;
+
+ // Adds a node to the translation cache
+ void AddLegalizedOperand(SDValue From, SDValue To) {
+ LegalizedNodes.insert(std::make_pair(From, To));
+ // If someone requests legalization of the new node, return itself.
+ if (From != To)
+ LegalizedNodes.insert(std::make_pair(To, To));
+ }
+
+ // Legalizes the given node
+ SDValue LegalizeOp(SDValue Op);
+ // Assuming the node is legal, "legalize" the results
+ SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
+ // Implements unrolling a generic vector operation, i.e. turning it into
+ // scalar operations.
+ SDValue UnrollVectorOp(SDValue Op);
+ // Implements unrolling a VSETCC.
+ SDValue UnrollVSETCC(SDValue Op);
+ // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB
+ // isn't legal.
+ SDValue ExpandFNEG(SDValue Op);
+ // Implements vector promotion; this is essentially just bitcasting the
+ // operands to a different type and bitcasting the result back to the
+ // original type.
+ SDValue PromoteVectorOp(SDValue Op);
+
+ public:
+ bool Run();
+ VectorLegalizer(SelectionDAG& dag) :
+ DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {}
+};
+
+bool VectorLegalizer::Run() {
+ // The legalize process is inherently a bottom-up recursive process (users
+ // legalize their uses before themselves). Given infinite stack space, we
+ // could just start legalizing on the root and traverse the whole graph. In
+ // practice however, this causes us to run out of stack space on large basic
+ // blocks. To avoid this problem, compute an ordering of the nodes where each
+ // node is only legalized after all of its operands are legalized.
+ DAG.AssignTopologicalOrder();
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = prior(DAG.allnodes_end()); I != next(E); ++I)
+ LegalizeOp(SDValue(I, 0));
+
+ // Finally, it's possible the root changed. Get the new root.
+ SDValue OldRoot = DAG.getRoot();
+ assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
+ DAG.setRoot(LegalizedNodes[OldRoot]);
+
+ LegalizedNodes.clear();
+
+ // Remove dead nodes now.
+ DAG.RemoveDeadNodes();
+
+ return Changed;
+}
+
+SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
+ // Generic legalization: just pass the operand through.
+ for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
+ AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
+ return Result.getValue(Op.getResNo());
+}
+
+SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+ if (I != LegalizedNodes.end()) return I->second;
+
+ SDNode* Node = Op.getNode();
+
+ // Legalize the operands
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+ Ops.push_back(LegalizeOp(Node->getOperand(i)));
+
+ SDValue Result =
+ DAG.UpdateNodeOperands(Op.getValue(0), Ops.data(), Ops.size());
+
+ bool HasVectorValue = false;
+ for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
+ J != E;
+ ++J)
+ HasVectorValue |= J->isVector();
+ if (!HasVectorValue)
+ return TranslateLegalizeResults(Op, Result);
+
+ switch (Op.getOpcode()) {
+ default:
+ return TranslateLegalizeResults(Op, Result);
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::SELECT:
+ case ISD::SELECT_CC:
+ case ISD::VSETCC:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FNEG:
+ case ISD::FABS:
+ case ISD::FSQRT:
+ case ISD::FSIN:
+ case ISD::FCOS:
+ case ISD::FPOWI:
+ case ISD::FPOW:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FCEIL:
+ case ISD::FTRUNC:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
+ case ISD::FFLOOR:
+ break;
+ }
+
+ switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+ case TargetLowering::Promote:
+ // "Promote" the operation by bitcasting
+ Result = PromoteVectorOp(Op);
+ Changed = true;
+ break;
+ case TargetLowering::Legal: break;
+ case TargetLowering::Custom: {
+ SDValue Tmp1 = TLI.LowerOperation(Op, DAG);
+ if (Tmp1.getNode()) {
+ Result = Tmp1;
+ break;
+ }
+ // FALL THROUGH
+ }
+ case TargetLowering::Expand:
+ if (Node->getOpcode() == ISD::FNEG)
+ Result = ExpandFNEG(Op);
+ else if (Node->getOpcode() == ISD::VSETCC)
+ Result = UnrollVSETCC(Op);
+ else
+ Result = UnrollVectorOp(Op);
+ break;
+ }
+
+ // Make sure that the generated code is itself legal.
+ if (Result != Op) {
+ Result = LegalizeOp(Result);
+ Changed = true;
+ }
+
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ AddLegalizedOperand(Op, Result);
+ return Result;
+}
+
+SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
+ // Vector "promotion" is basically just bitcasting and doing the operation
+ // in a different type. For example, x86 promotes ISD::AND on v2i32 to
+ // v1i64.
+ MVT VT = Op.getValueType();
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "Can't promote a vector with multiple results!");
+ MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+ DebugLoc dl = Op.getDebugLoc();
+ SmallVector<SDValue, 4> Operands(Op.getNumOperands());
+
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ if (Op.getOperand(j).getValueType().isVector())
+ Operands[j] = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Op.getOperand(j));
+ else
+ Operands[j] = Op.getOperand(j);
+ }
+
+ Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size());
+
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op);
+}
+
+SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
+ if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
+ SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType());
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ Zero, Op.getOperand(0));
+ }
+ return UnrollVectorOp(Op);
+}
+
+SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
+ MVT VT = Op.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+ MVT EltVT = VT.getVectorElementType();
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
+ MVT TmpEltVT = LHS.getValueType().getVectorElementType();
+ DebugLoc dl = Op.getDebugLoc();
+ SmallVector<SDValue, 8> Ops(NumElems);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getIntPtrConstant(i));
+ SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getIntPtrConstant(i));
+ Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(TmpEltVT),
+ LHSElem, RHSElem, CC);
+ Ops[i] = DAG.getNode(ISD::SELECT, dl, EltVT, Ops[i],
+ DAG.getConstant(APInt::getAllOnesValue
+ (EltVT.getSizeInBits()), EltVT),
+ DAG.getConstant(0, EltVT));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems);
+}
+
+/// UnrollVectorOp - We know that the given vector has a legal type, however
+/// the operation it performs is not legal, and the target has requested that
+/// the operation be expanded. "Unroll" the vector, splitting out the scalars
+/// and operating on each element individually.
+SDValue VectorLegalizer::UnrollVectorOp(SDValue Op) {
+ MVT VT = Op.getValueType();
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "Can't unroll a vector with multiple results!");
+ unsigned NE = VT.getVectorNumElements();
+ MVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ SmallVector<SDValue, 8> Scalars;
+ SmallVector<SDValue, 4> Operands(Op.getNumOperands());
+ for (unsigned i = 0; i != NE; ++i) {
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ SDValue Operand = Op.getOperand(j);
+ MVT OperandVT = Operand.getValueType();
+ if (OperandVT.isVector()) {
+ // A vector operand; extract a single element.
+ MVT OperandEltVT = OperandVT.getVectorElementType();
+ Operands[j] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ OperandEltVT,
+ Operand,
+ DAG.getConstant(i, MVT::i32));
+ } else {
+ // A scalar operand; just use it as is.
+ Operands[j] = Operand;
+ }
+ }
+
+ switch (Op.getOpcode()) {
+ default:
+ Scalars.push_back(DAG.getNode(Op.getOpcode(), dl, EltVT,
+ &Operands[0], Operands.size()));
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ Scalars.push_back(DAG.getNode(Op.getOpcode(), dl, EltVT, Operands[0],
+ DAG.getShiftAmountOperand(Operands[1])));
+ break;
+ }
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Scalars[0], Scalars.size());
+}
+
+}
+
+bool SelectionDAG::LegalizeVectors() {
+ return VectorLegalizer(*this).Run();
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
new file mode 100644
index 0000000..68967cc
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -0,0 +1,2151 @@
+//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file performs vector type splitting and scalarization for LegalizeTypes.
+// Scalarization is the act of changing a computation in an illegal one-element
+// vector type to be a computation in its scalar element type. For example,
+// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed
+// as a base case when scalarizing vector arithmetic like <4 x f32>, which
+// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32
+// types.
+// Splitting is the act of changing a computation in an invalid vector type to
+// be a computation in multiple vectors of a smaller type. For example,
+// implementing <128 x f32> operations in terms of two <64 x f32> operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Result Vector Scalarization: <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(cerr << "Scalarize node result " << ResNo << ": "; N->dump(&DAG);
+ cerr << "\n");
+ SDValue R = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "ScalarizeVectorResult #" << ResNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to scalarize the result of this operator!");
+ abort();
+
+ case ISD::BIT_CONVERT: R = ScalarizeVecRes_BIT_CONVERT(N); break;
+ case ISD::BUILD_VECTOR: R = N->getOperand(0); break;
+ case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
+ case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
+ case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
+ case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break;
+ case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
+ case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
+ case ISD::VSETCC: R = ScalarizeVecRes_VSETCC(N); break;
+
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::FABS:
+ case ISD::FCOS:
+ case ISD::FNEG:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ case ISD::FFLOOR:
+ case ISD::FCEIL:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP: R = ScalarizeVecRes_UnaryOp(N); break;
+
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::FADD:
+ case ISD::FDIV:
+ case ISD::FMUL:
+ case ISD::FPOW:
+ case ISD::FREM:
+ case ISD::FSUB:
+ case ISD::MUL:
+ case ISD::OR:
+ case ISD::SDIV:
+ case ISD::SREM:
+ case ISD::SUB:
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::XOR: R = ScalarizeVecRes_BinOp(N); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: R = ScalarizeVecRes_ShiftOp(N); break;
+ }
+
+ // If R is null, the sub-method took care of registering the result.
+ if (R.getNode())
+ SetScalarizedVector(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_ShiftOp(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue ShiftAmt = GetScalarizedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, ShiftAmt);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BIT_CONVERT(SDNode *N) {
+ MVT NewVT = N->getValueType(0).getVectorElementType();
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ NewVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) {
+ MVT NewVT = N->getValueType(0).getVectorElementType();
+ SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+ return DAG.getConvertRndSat(NewVT, N->getDebugLoc(),
+ Op0, DAG.getValueType(NewVT),
+ DAG.getValueType(Op0.getValueType()),
+ N->getOperand(3),
+ N->getOperand(4),
+ cast<CvtRndSatSDNode>(N)->getCvtCode());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+ N->getValueType(0).getVectorElementType(),
+ N->getOperand(0), N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::FPOWI, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+ // The value to insert may have a wider type than the vector element type,
+ // so be sure to truncate it to the element type if necessary.
+ SDValue Op = N->getOperand(1);
+ MVT EltVT = N->getValueType(0).getVectorElementType();
+ if (Op.getValueType() != EltVT)
+ // FIXME: Can this happen for floating point types?
+ Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, Op);
+ return Op;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
+ assert(N->isUnindexed() && "Indexed vector load?");
+
+ SDValue Result = DAG.getLoad(ISD::UNINDEXED, N->getDebugLoc(),
+ N->getExtensionType(),
+ N->getValueType(0).getVectorElementType(),
+ N->getChain(), N->getBasePtr(),
+ DAG.getUNDEF(N->getBasePtr().getValueType()),
+ N->getSrcValue(), N->getSrcValueOffset(),
+ N->getMemoryVT().getVectorElementType(),
+ N->isVolatile(), N->getAlignment());
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
+ // Get the dest type - it doesn't always match the input type, e.g. int_to_fp.
+ MVT DestVT = N->getValueType(0).getVectorElementType();
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+ // If the operand is wider than the vector element type then it is implicitly
+ // truncated. Make that explicit here.
+ MVT EltVT = N->getValueType(0).getVectorElementType();
+ SDValue InOp = N->getOperand(0);
+ if (InOp.getValueType() != EltVT)
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp);
+ return InOp;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(1));
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0), LHS,
+ GetScalarizedVector(N->getOperand(2)));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(2));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), LHS.getValueType(),
+ N->getOperand(0), N->getOperand(1),
+ LHS, GetScalarizedVector(N->getOperand(3)),
+ N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
+ // Figure out if the scalar is the LHS or RHS and return it.
+ SDValue Arg = N->getOperand(2).getOperand(0);
+ if (Arg.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+ unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue();
+ return GetScalarizedVector(N->getOperand(Op));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ MVT NVT = N->getValueType(0).getVectorElementType();
+ MVT SVT = TLI.getSetCCResultType(LHS.getValueType());
+ DebugLoc dl = N->getDebugLoc();
+
+ // Turn it into a scalar SETCC.
+ SDValue Res = DAG.getNode(ISD::SETCC, dl, SVT, LHS, RHS, N->getOperand(2));
+
+ // VSETCC always returns a sign-extended value, while SETCC may not. The
+ // SETCC result type may not match the vector element type. Correct these.
+ if (NVT.bitsLE(SVT)) {
+ // The SETCC result type is bigger than the vector element type.
+ // Ensure the SETCC result is sign-extended.
+ if (TLI.getBooleanContents() !=
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, SVT, Res,
+ DAG.getValueType(MVT::i1));
+ // Truncate to the final type.
+ return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);
+ } else {
+ // The SETCC result type is smaller than the vector element type.
+ // If the SetCC result is not sign-extended, chop it down to MVT::i1.
+ if (TLI.getBooleanContents() !=
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ Res = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Res);
+ // Sign extend to the final type.
+ return DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, Res);
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Vector Scalarization <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(cerr << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG);
+ cerr << "\n");
+ SDValue Res = SDValue();
+
+ if (Res.getNode() == 0) {
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "ScalarizeVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to scalarize this operator's operand!");
+ abort();
+
+ case ISD::BIT_CONVERT:
+ Res = ScalarizeVecOp_BIT_CONVERT(N); break;
+
+ case ISD::CONCAT_VECTORS:
+ Res = ScalarizeVecOp_CONCAT_VECTORS(N); break;
+
+ case ISD::EXTRACT_VECTOR_ELT:
+ Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); break;
+
+ case ISD::STORE:
+ Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo); break;
+ }
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// ScalarizeVecOp_BIT_CONVERT - If the value to convert is a vector that needs
+/// to be scalarized, it must be <1 x ty>. Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_BIT_CONVERT(SDNode *N) {
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ N->getValueType(0), Elt);
+}
+
+/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one -
+/// use a BUILD_VECTOR instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
+ SmallVector<SDValue, 8> Ops(N->getNumOperands());
+ for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
+ Ops[i] = GetScalarizedVector(N->getOperand(i));
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0),
+ &Ops[0], Ops.size());
+}
+
+/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to
+/// be scalarized, it must be <1 x ty>, so just return the element, ignoring the
+/// index.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ return GetScalarizedVector(N->getOperand(0));
+}
+
+/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be
+/// scalarized, it must be <1 x ty>. Just store the element.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
+ assert(N->isUnindexed() && "Indexed store of one-element vector?");
+ assert(OpNo == 1 && "Do not know how to scalarize this operand!");
+ DebugLoc dl = N->getDebugLoc();
+
+ if (N->isTruncatingStore())
+ return DAG.getTruncStore(N->getChain(), dl,
+ GetScalarizedVector(N->getOperand(1)),
+ N->getBasePtr(),
+ N->getSrcValue(), N->getSrcValueOffset(),
+ N->getMemoryVT().getVectorElementType(),
+ N->isVolatile(), N->getAlignment());
+
+ return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
+ N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(),
+ N->isVolatile(), N->getAlignment());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Result Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// SplitVectorResult - This method is called when the specified result of the
+/// specified node is found to need vector splitting. At this point, the node
+/// may also have invalid operands or may have other results that need
+/// legalization, we just know that (at least) one result needs vector
+/// splitting.
+void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(cerr << "Split node result: "; N->dump(&DAG); cerr << "\n");
+ SDValue Lo, Hi;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "SplitVectorResult #" << ResNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to split the result of this operator!");
+ abort();
+
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+
+ case ISD::BIT_CONVERT: SplitVecRes_BIT_CONVERT(N, Lo, Hi); break;
+ case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
+ case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
+ case ISD::CONVERT_RNDSAT: SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break;
+ case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
+ case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
+ case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
+ case ISD::LOAD: SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);break;
+ case ISD::VECTOR_SHUFFLE:
+ SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break;
+ case ISD::VSETCC: SplitVecRes_VSETCC(N, Lo, Hi); break;
+
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::FNEG:
+ case ISD::FABS:
+ case ISD::FSQRT:
+ case ISD::FSIN:
+ case ISD::FCOS:
+ case ISD::FTRUNC:
+ case ISD::FFLOOR:
+ case ISD::FCEIL:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP: SplitVecRes_UnaryOp(N, Lo, Hi); break;
+
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::FDIV:
+ case ISD::FPOW:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::UREM:
+ case ISD::SREM:
+ case ISD::FREM: SplitVecRes_BinOp(N, Lo, Hi); break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetSplitVector(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDValue RHSLo, RHSHi;
+ GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
+ DebugLoc dl = N->getDebugLoc();
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo);
+ Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // We know the result is a vector. The input may be either a vector or a
+ // scalar value.
+ MVT LoVT, HiVT;
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue InOp = N->getOperand(0);
+ MVT InVT = InOp.getValueType();
+
+ // Handle some special cases efficiently.
+ switch (getTypeAction(InVT)) {
+ default:
+ assert(false && "Unknown type action!");
+ case Legal:
+ case PromoteInteger:
+ case SoftenFloat:
+ case ScalarizeVector:
+ break;
+ case ExpandInteger:
+ case ExpandFloat:
+ // A scalar to vector conversion, where the scalar needs expansion.
+ // If the vector is being split in two then we can just convert the
+ // expanded pieces.
+ if (LoVT == HiVT) {
+ GetExpandedOp(InOp, Lo, Hi);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+ return;
+ }
+ break;
+ case SplitVector:
+ // If the input is a vector that needs to be split, convert each split
+ // piece of the input now.
+ GetSplitVector(InOp, Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+ return;
+ }
+
+ // In the general case, convert the input to an integer and split it by hand.
+ MVT LoIntVT = MVT::getIntegerVT(LoVT.getSizeInBits());
+ MVT HiIntVT = MVT::getIntegerVT(HiVT.getSizeInBits());
+ if (TLI.isBigEndian())
+ std::swap(LoIntVT, HiIntVT);
+
+ SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ MVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ unsigned LoNumElts = LoVT.getVectorNumElements();
+ SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
+ Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size());
+
+ SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end());
+ Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, &HiOps[0], HiOps.size());
+}
+
+void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS");
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumSubvectors = N->getNumOperands() / 2;
+ if (NumSubvectors == 1) {
+ Lo = N->getOperand(0);
+ Hi = N->getOperand(1);
+ return;
+ }
+
+ MVT LoVT, HiVT;
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
+ Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size());
+
+ SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end());
+ Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size());
+}
+
+void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ MVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ SDValue DTyOpLo = DAG.getValueType(LoVT);
+ SDValue DTyOpHi = DAG.getValueType(HiVT);
+
+ SDValue RndOp = N->getOperand(3);
+ SDValue SatOp = N->getOperand(4);
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+
+ // Split the input.
+ SDValue VLo, VHi;
+ MVT InVT = N->getOperand(0).getValueType();
+ switch (getTypeAction(InVT)) {
+ default: assert(0 && "Unexpected type action!");
+ case Legal: {
+ assert(LoVT == HiVT && "Legal non-power-of-two vector type?");
+ MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),
+ LoVT.getVectorNumElements());
+ VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(0));
+ VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ break;
+ }
+ case SplitVector:
+ GetSplitVector(N->getOperand(0), VLo, VHi);
+ break;
+ case WidenVector: {
+ // If the result needs to be split and the input needs to be widened,
+ // the two types must have different lengths. Use the widened result
+ // and extract from it to do the split.
+ assert(LoVT == HiVT && "Legal non-power-of-two vector type?");
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),
+ LoVT.getVectorNumElements());
+ VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(0));
+ VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ break;
+ }
+ }
+
+ SDValue STyOpLo = DAG.getValueType(VLo.getValueType());
+ SDValue STyOpHi = DAG.getValueType(VHi.getValueType());
+
+ Lo = DAG.getConvertRndSat(LoVT, dl, VLo, DTyOpLo, STyOpLo, RndOp, SatOp,
+ CvtCode);
+ Hi = DAG.getConvertRndSat(HiVT, dl, VHi, DTyOpHi, STyOpHi, RndOp, SatOp,
+ CvtCode);
+}
+
+void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ MVT IdxVT = Idx.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ MVT LoVT, HiVT;
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ // The indices are not guaranteed to be a multiple of the new vector
+ // size unless the original vector type was split in two.
+ assert(LoVT == HiVT && "Non power-of-two vectors not supported!");
+
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
+ Idx = DAG.getNode(ISD::ADD, dl, IdxVT, Idx,
+ DAG.getConstant(LoVT.getVectorNumElements(), IdxVT));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, Idx);
+}
+
+void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1));
+ Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
+}
+
+void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Elt = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(Vec, Lo, Hi);
+
+ if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
+ unsigned IdxVal = CIdx->getZExtValue();
+ unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
+ if (IdxVal < LoNumElts)
+ Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+ Lo.getValueType(), Lo, Elt, Idx);
+ else
+ Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
+ DAG.getIntPtrConstant(IdxVal - LoNumElts));
+ return;
+ }
+
+ // Spill the vector to the stack.
+ MVT VecVT = Vec.getValueType();
+ MVT EltVT = VecVT.getVectorElementType();
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0);
+
+ // Store the new element. This may be larger than the vector element type,
+ // so use a truncating store.
+ SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+ unsigned Alignment =
+ TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForMVT());
+ Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT);
+
+ // Load the Lo part from the stack slot.
+ Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0);
+
+ // Increment the pointer to the other part.
+ unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getIntPtrConstant(IncrementSize));
+
+ // Load the Hi part from the stack slot.
+ Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false,
+ MinAlign(Alignment, IncrementSize));
+}
+
+void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ MVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));
+ Hi = DAG.getUNDEF(HiVT);
+}
+
+void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
+ SDValue &Hi) {
+ assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
+ MVT LoVT, HiVT;
+ DebugLoc dl = LD->getDebugLoc();
+ GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT);
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ SDValue Ch = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
+ const Value *SV = LD->getSrcValue();
+ int SVOffset = LD->getSrcValueOffset();
+ MVT MemoryVT = LD->getMemoryVT();
+ unsigned Alignment = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+
+ MVT LoMemVT, HiMemVT;
+ GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
+
+ Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset,
+ SV, SVOffset, LoMemVT, isVolatile, Alignment);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ SVOffset += IncrementSize;
+ Alignment = MinAlign(Alignment, IncrementSize);
+ Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset,
+ SV, SVOffset, HiMemVT, isVolatile, Alignment);
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(LD, 1), Ch);
+}
+
+void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Get the dest types - they may not match the input types, e.g. int_to_fp.
+ MVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ // Split the input.
+ MVT InVT = N->getOperand(0).getValueType();
+ switch (getTypeAction(InVT)) {
+ default: assert(0 && "Unexpected type action!");
+ case Legal: {
+ assert(LoVT == HiVT && "Legal non-power-of-two vector type?");
+ MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),
+ LoVT.getVectorNumElements());
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ break;
+ }
+ case SplitVector:
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ break;
+ case WidenVector: {
+ // If the result needs to be split and the input needs to be widened,
+ // the two types must have different lengths. Use the widened result
+ // and extract from it to do the split.
+ assert(LoVT == HiVT && "Legal non-power-of-two vector type?");
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),
+ LoVT.getVectorNumElements());
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ break;
+ }
+ }
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // The low and high parts of the original input give four input vectors.
+ SDValue Inputs[4];
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
+ GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
+ MVT NewVT = Inputs[0].getValueType();
+ unsigned NewElts = NewVT.getVectorNumElements();
+ assert(NewVT == Inputs[1].getValueType() &&
+ "Non power-of-two vectors not supported!");
+
+ // If Lo or Hi uses elements from at most two of the four input vectors, then
+ // express it as a vector shuffle of those two inputs. Otherwise extract the
+ // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
+ SmallVector<int, 16> Ops;
+ for (unsigned High = 0; High < 2; ++High) {
+ SDValue &Output = High ? Hi : Lo;
+
+ // Build a shuffle mask for the output, discovering on the fly which
+ // input vectors to use as shuffle operands (recorded in InputUsed).
+ // If building a suitable shuffle vector proves too hard, then bail
+ // out with useBuildVector set.
+ unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered.
+ unsigned FirstMaskIdx = High * NewElts;
+ bool useBuildVector = false;
+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+ // The mask element. This indexes into the input.
+ int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+ // The input vector this mask element indexes into.
+ unsigned Input = (unsigned)Idx / NewElts;
+
+ if (Input >= array_lengthof(Inputs)) {
+ // The mask element does not index into any input vector.
+ Ops.push_back(-1);
+ continue;
+ }
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NewElts;
+
+ // Find or create a shuffle vector operand to hold this input.
+ unsigned OpNo;
+ for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
+ if (InputUsed[OpNo] == Input) {
+ // This input vector is already an operand.
+ break;
+ } else if (InputUsed[OpNo] == -1U) {
+ // Create a new operand for this input vector.
+ InputUsed[OpNo] = Input;
+ break;
+ }
+ }
+
+ if (OpNo >= array_lengthof(InputUsed)) {
+ // More than two input vectors used! Give up on trying to create a
+ // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
+ useBuildVector = true;
+ break;
+ }
+
+ // Add the mask index for the new shuffle vector.
+ Ops.push_back(Idx + OpNo * NewElts);
+ }
+
+ if (useBuildVector) {
+ MVT EltVT = NewVT.getVectorElementType();
+ SmallVector<SDValue, 16> SVOps;
+
+ // Extract the input elements by hand.
+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+ // The mask element. This indexes into the input.
+ int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+ // The input vector this mask element indexes into.
+ unsigned Input = (unsigned)Idx / NewElts;
+
+ if (Input >= array_lengthof(Inputs)) {
+ // The mask element is "undef" or indexes off the end of the input.
+ SVOps.push_back(DAG.getUNDEF(EltVT));
+ continue;
+ }
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NewElts;
+
+ // Extract the vector element by hand.
+ SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ Inputs[Input], DAG.getIntPtrConstant(Idx)));
+ }
+
+ // Construct the Lo/Hi output using a BUILD_VECTOR.
+ Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size());
+ } else if (InputUsed[0] == -1U) {
+ // No input vectors were used! The result is undefined.
+ Output = DAG.getUNDEF(NewVT);
+ } else {
+ SDValue Op0 = Inputs[InputUsed[0]];
+ // If only one input was used, use an undefined vector for the other.
+ SDValue Op1 = InputUsed[1] == -1U ?
+ DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]];
+ // At least one input vector was used. Create a new shuffle vector.
+ Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]);
+ }
+
+ Ops.clear();
+ }
+}
+
+void DAGTypeLegalizer::SplitVecRes_VSETCC(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ MVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ SDValue LL, LH, RL, RH;
+ GetSplitVector(N->getOperand(0), LL, LH);
+ GetSplitVector(N->getOperand(1), RL, RH);
+
+ Lo = DAG.getNode(ISD::VSETCC, dl, LoVT, LL, RL, N->getOperand(2));
+ Hi = DAG.getNode(ISD::VSETCC, dl, HiVT, LH, RH, N->getOperand(2));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// SplitVectorOperand - This method is called when the specified operand of the
+/// specified node is found to need vector splitting. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need legalization as well as the specified one.
+bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(cerr << "Split node operand: "; N->dump(&DAG); cerr << "\n");
+ SDValue Res = SDValue();
+
+ if (Res.getNode() == 0) {
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "SplitVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to split this operator's operand!");
+ abort();
+
+ case ISD::BIT_CONVERT: Res = SplitVecOp_BIT_CONVERT(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::STORE: Res = SplitVecOp_STORE(cast<StoreSDNode>(N),
+ OpNo); break;
+
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP: Res = SplitVecOp_UnaryOp(N); break;
+ }
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
+ // The result has a legal vector type, but the input needs splitting.
+ MVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ assert(Lo.getValueType() == Hi.getValueType() &&
+ "Returns legal non-power-of-two vector type?");
+ MVT InVT = Lo.getValueType();
+
+ MVT OutVT = MVT::getVectorVT(ResVT.getVectorElementType(),
+ InVT.getVectorNumElements());
+
+ Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) {
+ // For example, i64 = BIT_CONVERT v4i16 on alpha. Typically the vector will
+ // end up being split all the way down to individual components. Convert the
+ // split pieces into integers and reassemble.
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = BitConvertToInteger(Lo);
+ Hi = BitConvertToInteger(Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0),
+ JoinIntegers(Lo, Hi));
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+ // We know that the extracted result type is legal. For now, assume the index
+ // is a constant.
+ MVT SubVT = N->getValueType(0);
+ SDValue Idx = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+
+ uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ if (IdxVal < LoElts) {
+ assert(IdxVal + SubVT.getVectorNumElements() <= LoElts &&
+ "Extracted subvector crosses vector split!");
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
+ } else {
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi,
+ DAG.getConstant(IdxVal - LoElts, Idx.getValueType()));
+ }
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ MVT VecVT = Vec.getValueType();
+
+ if (isa<ConstantSDNode>(Idx)) {
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!");
+
+ SDValue Lo, Hi;
+ GetSplitVector(Vec, Lo, Hi);
+
+ uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+
+ if (IdxVal < LoElts)
+ return DAG.UpdateNodeOperands(SDValue(N, 0), Lo, Idx);
+ else
+ return DAG.UpdateNodeOperands(SDValue(N, 0), Hi,
+ DAG.getConstant(IdxVal - LoElts,
+ Idx.getValueType()));
+ }
+
+ // Store the vector to the stack.
+ MVT EltVT = VecVT.getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0);
+
+ // Load back the required element.
+ StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+ return DAG.getLoad(EltVT, dl, Store, StackPtr, SV, 0);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
+ assert(N->isUnindexed() && "Indexed store of vector?");
+ assert(OpNo == 1 && "Can only split the stored value");
+ DebugLoc dl = N->getDebugLoc();
+
+ bool isTruncating = N->isTruncatingStore();
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ int SVOffset = N->getSrcValueOffset();
+ MVT MemoryVT = N->getMemoryVT();
+ unsigned Alignment = N->getAlignment();
+ bool isVol = N->isVolatile();
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(1), Lo, Hi);
+
+ MVT LoMemVT, HiMemVT;
+ GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+
+ if (isTruncating)
+ Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ LoMemVT, isVol, Alignment);
+ else
+ Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ isVol, Alignment);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+
+ if (isTruncating)
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
+ N->getSrcValue(), SVOffset+IncrementSize,
+ HiMemVT,
+ isVol, MinAlign(Alignment, IncrementSize));
+ else
+ Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset+IncrementSize,
+ isVol, MinAlign(Alignment, IncrementSize));
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Result Vector Widening
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(cerr << "Widen node result " << ResNo << ": "; N->dump(&DAG);
+ cerr << "\n");
+ SDValue Res = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "WidenVectorResult #" << ResNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to widen the result of this operator!");
+ abort();
+
+ case ISD::BIT_CONVERT: Res = WidenVecRes_BIT_CONVERT(N); break;
+ case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
+ case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
+ case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SELECT: Res = WidenVecRes_SELECT(N); break;
+ case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;
+ case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break;
+ case ISD::VECTOR_SHUFFLE:
+ Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); break;
+ case ISD::VSETCC: Res = WidenVecRes_VSETCC(N); break;
+
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::BSWAP:
+ case ISD::FADD:
+ case ISD::FCOPYSIGN:
+ case ISD::FDIV:
+ case ISD::FMUL:
+ case ISD::FPOW:
+ case ISD::FPOWI:
+ case ISD::FREM:
+ case ISD::FSUB:
+ case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
+ case ISD::OR:
+ case ISD::SDIV:
+ case ISD::SREM:
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::SUB:
+ case ISD::XOR: Res = WidenVecRes_Binary(N); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: Res = WidenVecRes_Shift(N); break;
+
+ case ISD::ANY_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SIGN_EXTEND:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::ZERO_EXTEND:
+ case ISD::UINT_TO_FP: Res = WidenVecRes_Convert(N); break;
+
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::FABS:
+ case ISD::FCOS:
+ case ISD::FNEG:
+ case ISD::FSIN:
+ case ISD::FSQRT: Res = WidenVecRes_Unary(N); break;
+ }
+
+ // If Res is null, the sub-method took care of registering the result.
+ if (Res.getNode())
+ SetWidenedVector(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
+ // Binary op widening.
+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp1, InOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ MVT InVT = InOp.getValueType();
+ MVT InEltVT = InVT.getVectorElementType();
+ MVT InWidenVT = MVT::getVectorVT(InEltVT, WidenNumElts);
+
+ unsigned Opcode = N->getOpcode();
+ unsigned InVTNumElts = InVT.getVectorNumElements();
+
+ if (getTypeAction(InVT) == WidenVector) {
+ InOp = GetWidenedVector(N->getOperand(0));
+ InVT = InOp.getValueType();
+ InVTNumElts = InVT.getVectorNumElements();
+ if (InVTNumElts == WidenNumElts)
+ return DAG.getNode(Opcode, dl, WidenVT, InOp);
+ }
+
+ if (TLI.isTypeLegal(InWidenVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ if (WidenNumElts % InVTNumElts == 0) {
+ // Widen the input and call convert on the widened input vector.
+ unsigned NumConcat = WidenNumElts/InVTNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ Ops[0] = InOp;
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(Opcode, dl, WidenVT,
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT,
+ &Ops[0], NumConcat));
+ }
+
+ if (InVTNumElts % WidenNumElts == 0) {
+ // Extract the input and convert the shorten input vector.
+ return DAG.getNode(Opcode, dl, WidenVT,
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT,
+ InOp, DAG.getIntPtrConstant(0)));
+ }
+ }
+
+ // Otherwise unroll into some nasty scalar code and rebuild the vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ MVT EltVT = WidenVT.getVectorElementType();
+ unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+ unsigned i;
+ for (i=0; i < MinElts; ++i)
+ Ops[i] = DAG.getNode(Opcode, dl, EltVT,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getIntPtrConstant(i)));
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ SDValue ShOp = N->getOperand(1);
+
+ MVT ShVT = ShOp.getValueType();
+ if (getTypeAction(ShVT) == WidenVector) {
+ ShOp = GetWidenedVector(ShOp);
+ ShVT = ShOp.getValueType();
+ }
+ MVT ShWidenVT = MVT::getVectorVT(ShVT.getVectorElementType(),
+ WidenVT.getVectorNumElements());
+ if (ShVT != ShWidenVT)
+ ShOp = ModifyToType(ShOp, ShWidenVT);
+
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
+ // Unary op widening.
+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ MVT InVT = InOp.getValueType();
+ MVT VT = N->getValueType(0);
+ MVT WidenVT = TLI.getTypeToTransformTo(VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (getTypeAction(InVT)) {
+ default:
+ assert(false && "Unknown type action!");
+ break;
+ case Legal:
+ break;
+ case PromoteInteger:
+ // If the InOp is promoted to the same size, convert it. Otherwise,
+ // fall out of the switch and widen the promoted input.
+ InOp = GetPromotedInteger(InOp);
+ InVT = InOp.getValueType();
+ if (WidenVT.bitsEq(InVT))
+ return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp);
+ break;
+ case SoftenFloat:
+ case ExpandInteger:
+ case ExpandFloat:
+ case ScalarizeVector:
+ case SplitVector:
+ break;
+ case WidenVector:
+ // If the InOp is widened to the same size, convert it. Otherwise, fall
+ // out of the switch and widen the widened input.
+ InOp = GetWidenedVector(InOp);
+ InVT = InOp.getValueType();
+ if (WidenVT.bitsEq(InVT))
+ // The input widens to the same size. Convert to the widen value.
+ return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp);
+ break;
+ }
+
+ unsigned WidenSize = WidenVT.getSizeInBits();
+ unsigned InSize = InVT.getSizeInBits();
+ if (WidenSize % InSize == 0) {
+ // Determine new input vector type. The new input vector type will use
+ // the same element type (if its a vector) or use the input type as a
+ // vector. It is the same size as the type to widen to.
+ MVT NewInVT;
+ unsigned NewNumElts = WidenSize / InSize;
+ if (InVT.isVector()) {
+ MVT InEltVT = InVT.getVectorElementType();
+ NewInVT= MVT::getVectorVT(InEltVT, WidenSize / InEltVT.getSizeInBits());
+ } else {
+ NewInVT = MVT::getVectorVT(InVT, NewNumElts);
+ }
+
+ if (TLI.isTypeLegal(NewInVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ SmallVector<SDValue, 16> Ops(NewNumElts);
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ Ops[0] = InOp;
+ for (unsigned i = 1; i < NewNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ SDValue NewVec;
+ if (InVT.isVector())
+ NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ NewInVT, &Ops[0], NewNumElts);
+ else
+ NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ NewInVT, &Ops[0], NewNumElts);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, NewVec);
+ }
+ }
+
+ // This should occur rarely. Lower the bit-convert to a store/load
+ // from the stack. Create the stack frame object. Make sure it is aligned
+ // for both the source and destination types.
+ SDValue FIPtr = DAG.CreateStackTemporary(InVT, WidenVT);
+ int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
+ const Value *SV = PseudoSourceValue::getFixedStack(FI);
+
+ // Emit a store to the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0);
+
+ // Result is a load from the stack slot.
+ return DAG.getLoad(WidenVT, dl, Store, FIPtr, SV, 0);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ // Build a vector with undefined for the new nodes.
+ MVT VT = N->getValueType(0);
+ MVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ MVT WidenVT = TLI.getTypeToTransformTo(VT);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end());
+ NewOps.reserve(WidenNumElts);
+ for (unsigned i = NumElts; i < WidenNumElts; ++i)
+ NewOps.push_back(DAG.getUNDEF(EltVT));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &NewOps[0], NewOps.size());
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
+ MVT InVT = N->getOperand(0).getValueType();
+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ unsigned NumOperands = N->getNumOperands();
+
+ bool InputWidened = false; // Indicates we need to widen the input.
+ if (getTypeAction(InVT) != WidenVector) {
+ if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {
+ // Add undef vectors to widen to correct length.
+ unsigned NumConcat = WidenVT.getVectorNumElements() /
+ InVT.getVectorNumElements();
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ for (unsigned i=0; i < NumOperands; ++i)
+ Ops[i] = N->getOperand(i);
+ for (unsigned i = NumOperands; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &Ops[0], NumConcat);
+ }
+ } else {
+ InputWidened = true;
+ if (WidenVT == TLI.getTypeToTransformTo(InVT)) {
+ // The inputs and the result are widen to the same value.
+ unsigned i;
+ for (i=1; i < NumOperands; ++i)
+ if (N->getOperand(i).getOpcode() != ISD::UNDEF)
+ break;
+
+ if (i > NumOperands)
+ // Everything but the first operand is an UNDEF so just return the
+ // widened first operand.
+ return GetWidenedVector(N->getOperand(0));
+
+ if (NumOperands == 2) {
+ // Replace concat of two operands with a shuffle.
+ SmallVector<int, 16> MaskOps(WidenNumElts);
+ for (unsigned i=0; i < WidenNumElts/2; ++i) {
+ MaskOps[i] = i;
+ MaskOps[i+WidenNumElts/2] = i+WidenNumElts;
+ }
+ return DAG.getVectorShuffle(WidenVT, dl,
+ GetWidenedVector(N->getOperand(0)),
+ GetWidenedVector(N->getOperand(1)),
+ &MaskOps[0]);
+ }
+ }
+ }
+
+ // Fall back to use extracts and build vector.
+ MVT EltVT = WidenVT.getVectorElementType();
+ unsigned NumInElts = InVT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ unsigned Idx = 0;
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue InOp = N->getOperand(i);
+ if (InputWidened)
+ InOp = GetWidenedVector(InOp);
+ for (unsigned j=0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(j));
+ }
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; Idx < WidenNumElts; ++Idx)
+ Ops[Idx] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue InOp = N->getOperand(0);
+ SDValue RndOp = N->getOperand(3);
+ SDValue SatOp = N->getOperand(4);
+
+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ MVT InVT = InOp.getValueType();
+ MVT InEltVT = InVT.getVectorElementType();
+ MVT InWidenVT = MVT::getVectorVT(InEltVT, WidenNumElts);
+
+ SDValue DTyOp = DAG.getValueType(WidenVT);
+ SDValue STyOp = DAG.getValueType(InWidenVT);
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+
+ unsigned InVTNumElts = InVT.getVectorNumElements();
+ if (getTypeAction(InVT) == WidenVector) {
+ InOp = GetWidenedVector(InOp);
+ InVT = InOp.getValueType();
+ InVTNumElts = InVT.getVectorNumElements();
+ if (InVTNumElts == WidenNumElts)
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ if (TLI.isTypeLegal(InWidenVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ if (WidenNumElts % InVTNumElts == 0) {
+ // Widen the input and call convert on the widened input vector.
+ unsigned NumConcat = WidenNumElts/InVTNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ Ops[0] = InOp;
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ for (unsigned i = 1; i != NumConcat; ++i) {
+ Ops[i] = UndefVal;
+ }
+ InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat);
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ if (InVTNumElts % WidenNumElts == 0) {
+ // Extract the input and convert the shorten input vector.
+ InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
+ DAG.getIntPtrConstant(0));
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+ }
+
+ // Otherwise unroll into some nasty scalar code and rebuild the vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ MVT EltVT = WidenVT.getVectorElementType();
+ DTyOp = DAG.getValueType(EltVT);
+ STyOp = DAG.getValueType(InEltVT);
+
+ unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+ unsigned i;
+ for (i=0; i < MinElts; ++i) {
+ SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getIntPtrConstant(i));
+ Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ MVT VT = N->getValueType(0);
+ MVT WidenVT = TLI.getTypeToTransformTo(VT);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SDValue InOp = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ if (getTypeAction(InOp.getValueType()) == WidenVector)
+ InOp = GetWidenedVector(InOp);
+
+ MVT InVT = InOp.getValueType();
+
+ ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx);
+ if (CIdx) {
+ unsigned IdxVal = CIdx->getZExtValue();
+ // Check if we can just return the input vector after widening.
+ if (IdxVal == 0 && InVT == WidenVT)
+ return InOp;
+
+ // Check if we can extract from the vector.
+ unsigned InNumElts = InVT.getVectorNumElements();
+ if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
+ }
+
+ // We could try widening the input to the right length but for now, extract
+ // the original elements, fill the rest with undefs and build a vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ MVT EltVT = VT.getVectorElementType();
+ MVT IdxVT = Idx.getValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned i;
+ if (CIdx) {
+ unsigned IdxVal = CIdx->getZExtValue();
+ for (i=0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getConstant(IdxVal+i, IdxVT));
+ } else {
+ Ops[0] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, Idx);
+ for (i=1; i < NumElts; ++i) {
+ SDValue NewIdx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(i, IdxVT));
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, NewIdx);
+ }
+ }
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, N->getDebugLoc(),
+ InOp.getValueType(), InOp,
+ N->getOperand(1), N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ MVT WidenVT = TLI.getTypeToTransformTo(LD->getValueType(0));
+ MVT LdVT = LD->getMemoryVT();
+ DebugLoc dl = N->getDebugLoc();
+ assert(LdVT.isVector() && WidenVT.isVector());
+
+ // Load information
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ int SVOffset = LD->getSrcValueOffset();
+ unsigned Align = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ const Value *SV = LD->getSrcValue();
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+
+ SDValue Result;
+ SmallVector<SDValue, 16> LdChain; // Chain for the series of load
+ if (ExtType != ISD::NON_EXTLOAD) {
+ // For extension loads, we can not play the tricks of chopping legal
+ // vector types and bit cast it to the right type. Instead, we unroll
+ // the load and build a vector.
+ MVT EltVT = WidenVT.getVectorElementType();
+ MVT LdEltVT = LdVT.getVectorElementType();
+ unsigned NumElts = LdVT.getVectorNumElements();
+
+ // Load each element and widen
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ unsigned Increment = LdEltVT.getSizeInBits() / 8;
+ Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset,
+ LdEltVT, isVolatile, Align);
+ LdChain.push_back(Ops[0].getValue(1));
+ unsigned i = 0, Offset = Increment;
+ for (i=1; i < NumElts; ++i, Offset += Increment) {
+ SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+ BasePtr, DAG.getIntPtrConstant(Offset));
+ Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV,
+ SVOffset + Offset, LdEltVT, isVolatile, Align);
+ LdChain.push_back(Ops[i].getValue(1));
+ }
+
+ // Fill the rest with undefs
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i != WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ Result = DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size());
+ } else {
+ assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
+ unsigned int LdWidth = LdVT.getSizeInBits();
+ Result = GenWidenVectorLoads(LdChain, Chain, BasePtr, SV, SVOffset,
+ Align, isVolatile, LdWidth, WidenVT, dl);
+ }
+
+ // If we generate a single load, we can use that for the chain. Otherwise,
+ // build a factor node to remember the multiple loads are independent and
+ // chain to that.
+ SDValue NewChain;
+ if (LdChain.size() == 1)
+ NewChain = LdChain[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LdChain[0],
+ LdChain.size());
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, N->getDebugLoc(),
+ WidenVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue Cond1 = N->getOperand(0);
+ MVT CondVT = Cond1.getValueType();
+ if (CondVT.isVector()) {
+ MVT CondEltVT = CondVT.getVectorElementType();
+ MVT CondWidenVT = MVT::getVectorVT(CondEltVT, WidenNumElts);
+ if (getTypeAction(CondVT) == WidenVector)
+ Cond1 = GetWidenedVector(Cond1);
+
+ if (Cond1.getValueType() != CondWidenVT)
+ Cond1 = ModifyToType(Cond1, CondWidenVT);
+ }
+
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(2));
+ assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ WidenVT, Cond1, InOp1, InOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
+ SDValue InOp1 = GetWidenedVector(N->getOperand(2));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+ InOp1.getValueType(), N->getOperand(0),
+ N->getOperand(1), InOp1, InOp2, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ return DAG.getUNDEF(WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
+ MVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ MVT WidenVT = TLI.getTypeToTransformTo(VT);
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+ // Adjust mask based on new input vector length.
+ SmallVector<int, 16> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = N->getMaskElt(i);
+ if (Idx < (int)NumElts)
+ NewMask.push_back(Idx);
+ else
+ NewMask.push_back(Idx - NumElts + WidenNumElts);
+ }
+ for (unsigned i = NumElts; i != WidenNumElts; ++i)
+ NewMask.push_back(-1);
+ return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue InOp1 = N->getOperand(0);
+ MVT InVT = InOp1.getValueType();
+ assert(InVT.isVector() && "can not widen non vector type");
+ MVT WidenInVT = MVT::getVectorVT(InVT.getVectorElementType(), WidenNumElts);
+ InOp1 = GetWidenedVector(InOp1);
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+ // Assume that the input and output will be widen appropriately. If not,
+ // we will have to unroll it at some point.
+ assert(InOp1.getValueType() == WidenInVT &&
+ InOp2.getValueType() == WidenInVT &&
+ "Input not widened to expected type!");
+ return DAG.getNode(ISD::VSETCC, N->getDebugLoc(),
+ WidenVT, InOp1, InOp2, N->getOperand(2));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Widen Vector Operand
+//===----------------------------------------------------------------------===//
+bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
+ DEBUG(cerr << "Widen node operand " << ResNo << ": "; N->dump(&DAG);
+ cerr << "\n");
+ SDValue Res = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "WidenVectorOperand op #" << ResNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to widen this operator's operand!");
+ abort();
+
+ case ISD::BIT_CONVERT: Res = WidenVecOp_BIT_CONVERT(N); break;
+ case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;
+ case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::STORE: Res = WidenVecOp_STORE(N); break;
+
+ case ISD::FP_ROUND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP: Res = WidenVecOp_Convert(N); break;
+ }
+
+ // If Res is null, the sub-method took care of registering the result.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
+ // Since the result is legal and the input is illegal, it is unlikely
+ // that we can fix the input to a legal type so unroll the convert
+ // into some scalar code and create a nasty build vector.
+ MVT VT = N->getValueType(0);
+ MVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue InOp = N->getOperand(0);
+ if (getTypeAction(InOp.getValueType()) == WidenVector)
+ InOp = GetWidenedVector(InOp);
+ MVT InVT = InOp.getValueType();
+ MVT InEltVT = InVT.getVectorElementType();
+
+ unsigned Opcode = N->getOpcode();
+ SmallVector<SDValue, 16> Ops(NumElts);
+ for (unsigned i=0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(Opcode, dl, EltVT,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getIntPtrConstant(i)));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {
+ MVT VT = N->getValueType(0);
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ MVT InWidenVT = InOp.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Check if we can convert between two legal vector types and extract.
+ unsigned InWidenSize = InWidenVT.getSizeInBits();
+ unsigned Size = VT.getSizeInBits();
+ if (InWidenSize % Size == 0 && !VT.isVector()) {
+ unsigned NewNumElts = InWidenSize / Size;
+ MVT NewVT = MVT::getVectorVT(VT, NewNumElts);
+ if (TLI.isTypeLegal(NewVT)) {
+ SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
+ DAG.getIntPtrConstant(0));
+ }
+ }
+
+ // Lower the bit-convert to a store/load from the stack. Create the stack
+ // frame object. Make sure it is aligned for both the source and destination
+ // types.
+ SDValue FIPtr = DAG.CreateStackTemporary(InWidenVT, VT);
+ int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
+ const Value *SV = PseudoSourceValue::getFixedStack(FI);
+
+ // Emit a store to the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0);
+
+ // Result is a load from the stack slot.
+ return DAG.getLoad(VT, dl, Store, FIPtr, SV, 0);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
+ // If the input vector is not legal, it is likely that we will not find a
+ // legal vector of the same size. Replace the concatenate vector with a
+ // nasty build vector.
+ MVT VT = N->getValueType(0);
+ MVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(NumElts);
+
+ MVT InVT = N->getOperand(0).getValueType();
+ unsigned NumInElts = InVT.getVectorNumElements();
+
+ unsigned Idx = 0;
+ unsigned NumOperands = N->getNumOperands();
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue InOp = N->getOperand(i);
+ if (getTypeAction(InOp.getValueType()) == WidenVector)
+ InOp = GetWidenedVector(InOp);
+ for (unsigned j=0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(j));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ MVT EltVT = InOp.getValueType().getVectorElementType();
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+ EltVT, InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
+ // We have to widen the value but we want only to store the original
+ // vector type.
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ const Value *SV = ST->getSrcValue();
+ int SVOffset = ST->getSrcValueOffset();
+ unsigned Align = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ SDValue ValOp = GetWidenedVector(ST->getValue());
+ DebugLoc dl = N->getDebugLoc();
+
+ MVT StVT = ST->getMemoryVT();
+ MVT ValVT = ValOp.getValueType();
+ // It must be true that we the widen vector type is bigger than where
+ // we need to store.
+ assert(StVT.isVector() && ValOp.getValueType().isVector());
+ assert(StVT.bitsLT(ValOp.getValueType()));
+
+ SmallVector<SDValue, 16> StChain;
+ if (ST->isTruncatingStore()) {
+ // For truncating stores, we can not play the tricks of chopping legal
+ // vector types and bit cast it to the right type. Instead, we unroll
+ // the store.
+ MVT StEltVT = StVT.getVectorElementType();
+ MVT ValEltVT = ValVT.getVectorElementType();
+ unsigned Increment = ValEltVT.getSizeInBits() / 8;
+ unsigned NumElts = StVT.getVectorNumElements();
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getIntPtrConstant(0));
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV,
+ SVOffset, StEltVT,
+ isVolatile, Align));
+ unsigned Offset = Increment;
+ for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
+ SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+ BasePtr, DAG.getIntPtrConstant(Offset));
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getIntPtrConstant(0));
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV,
+ SVOffset + Offset, StEltVT,
+ isVolatile, MinAlign(Align, Offset)));
+ }
+ }
+ else {
+ assert(StVT.getVectorElementType() == ValVT.getVectorElementType());
+ // Store value
+ GenWidenVectorStores(StChain, Chain, BasePtr, SV, SVOffset,
+ Align, isVolatile, ValOp, StVT.getSizeInBits(), dl);
+ }
+ if (StChain.size() == 1)
+ return StChain[0];
+ else
+ return DAG.getNode(ISD::TokenFactor, dl,
+ MVT::Other,&StChain[0],StChain.size());
+}
+
+//===----------------------------------------------------------------------===//
+// Vector Widening Utilities
+//===----------------------------------------------------------------------===//
+
+
+// Utility function to find a vector type and its associated element
+// type from a preferred width and whose vector type must be the same size
+// as the VecVT.
+// TLI: Target lowering used to determine legal types.
+// Width: Preferred width to store.
+// VecVT: Vector value type whose size we must match.
+// Returns NewVecVT and NewEltVT - the vector type and its associated
+// element type.
+static void FindAssocWidenVecType(const TargetLowering &TLI, unsigned Width,
+ MVT VecVT,
+ MVT& NewEltVT, MVT& NewVecVT) {
+ unsigned EltWidth = Width + 1;
+ if (TLI.isTypeLegal(VecVT)) {
+ // We start with the preferred with, making it a power of 2 and find a
+ // legal vector type of that width. If not, we reduce it by another of 2.
+ // For incoming type is legal, this process will end as a vector of the
+ // smallest loadable type should always be legal.
+ do {
+ assert(EltWidth > 0);
+ EltWidth = 1 << Log2_32(EltWidth - 1);
+ NewEltVT = MVT::getIntegerVT(EltWidth);
+ unsigned NumElts = VecVT.getSizeInBits() / EltWidth;
+ NewVecVT = MVT::getVectorVT(NewEltVT, NumElts);
+ } while (!TLI.isTypeLegal(NewVecVT) ||
+ VecVT.getSizeInBits() != NewVecVT.getSizeInBits());
+ } else {
+ // The incoming vector type is illegal and is the result of widening
+ // a vector to a power of 2. In this case, we will use the preferred
+ // with as long as it is a multiple of the incoming vector length.
+ // The legalization process will eventually make this into a legal type
+ // and remove the illegal bit converts (which would turn to stack converts
+ // if they are allow to exist).
+ do {
+ assert(EltWidth > 0);
+ EltWidth = 1 << Log2_32(EltWidth - 1);
+ NewEltVT = MVT::getIntegerVT(EltWidth);
+ unsigned NumElts = VecVT.getSizeInBits() / EltWidth;
+ NewVecVT = MVT::getVectorVT(NewEltVT, NumElts);
+ } while (!TLI.isTypeLegal(NewEltVT) ||
+ VecVT.getSizeInBits() != NewVecVT.getSizeInBits());
+ }
+}
+
+SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
+ SDValue Chain,
+ SDValue BasePtr,
+ const Value *SV,
+ int SVOffset,
+ unsigned Alignment,
+ bool isVolatile,
+ unsigned LdWidth,
+ MVT ResType,
+ DebugLoc dl) {
+ // The strategy assumes that we can efficiently load powers of two widths.
+ // The routines chops the vector into the largest power of 2 load and
+ // can be inserted into a legal vector and then cast the result into the
+ // vector type we want. This avoids unnecessary stack converts.
+
+ // TODO: If the Ldwidth is legal, alignment is the same as the LdWidth, and
+ // the load is nonvolatile, we an use a wider load for the value.
+
+ // Find the vector type that can load from.
+ MVT NewEltVT, NewVecVT;
+ unsigned NewEltVTWidth;
+ FindAssocWidenVecType(TLI, LdWidth, ResType, NewEltVT, NewVecVT);
+ NewEltVTWidth = NewEltVT.getSizeInBits();
+
+ SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV, SVOffset,
+ isVolatile, Alignment);
+ SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+ LdChain.push_back(LdOp.getValue(1));
+
+ // Check if we can load the element with one instruction
+ if (LdWidth == NewEltVTWidth) {
+ return DAG.getNode(ISD::BIT_CONVERT, dl, ResType, VecOp);
+ }
+
+ unsigned Idx = 1;
+ LdWidth -= NewEltVTWidth;
+ unsigned Offset = 0;
+
+ while (LdWidth > 0) {
+ unsigned Increment = NewEltVTWidth / 8;
+ Offset += Increment;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getIntPtrConstant(Increment));
+
+ if (LdWidth < NewEltVTWidth) {
+ // Our current type we are using is too large, use a smaller size by
+ // using a smaller power of 2
+ unsigned oNewEltVTWidth = NewEltVTWidth;
+ FindAssocWidenVecType(TLI, LdWidth, ResType, NewEltVT, NewVecVT);
+ NewEltVTWidth = NewEltVT.getSizeInBits();
+ // Readjust position and vector position based on new load type
+ Idx = Idx * (oNewEltVTWidth/NewEltVTWidth);
+ VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp);
+ }
+
+ SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV,
+ SVOffset+Offset, isVolatile,
+ MinAlign(Alignment, Offset));
+ LdChain.push_back(LdOp.getValue(1));
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOp,
+ DAG.getIntPtrConstant(Idx++));
+
+ LdWidth -= NewEltVTWidth;
+ }
+
+ return DAG.getNode(ISD::BIT_CONVERT, dl, ResType, VecOp);
+}
+
+void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
+ SDValue Chain,
+ SDValue BasePtr,
+ const Value *SV,
+ int SVOffset,
+ unsigned Alignment,
+ bool isVolatile,
+ SDValue ValOp,
+ unsigned StWidth,
+ DebugLoc dl) {
+ // Breaks the stores into a series of power of 2 width stores. For any
+ // width, we convert the vector to the vector of element size that we
+ // want to store. This avoids requiring a stack convert.
+
+ // Find a width of the element type we can store with
+ MVT WidenVT = ValOp.getValueType();
+ MVT NewEltVT, NewVecVT;
+
+ FindAssocWidenVecType(TLI, StWidth, WidenVT, NewEltVT, NewVecVT);
+ unsigned NewEltVTWidth = NewEltVT.getSizeInBits();
+
+ SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp);
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewEltVT, VecOp,
+ DAG.getIntPtrConstant(0));
+ SDValue StOp = DAG.getStore(Chain, dl, EOp, BasePtr, SV, SVOffset,
+ isVolatile, Alignment);
+ StChain.push_back(StOp);
+
+ // Check if we are done
+ if (StWidth == NewEltVTWidth) {
+ return;
+ }
+
+ unsigned Idx = 1;
+ StWidth -= NewEltVTWidth;
+ unsigned Offset = 0;
+
+ while (StWidth > 0) {
+ unsigned Increment = NewEltVTWidth / 8;
+ Offset += Increment;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getIntPtrConstant(Increment));
+
+ if (StWidth < NewEltVTWidth) {
+ // Our current type we are using is too large, use a smaller size by
+ // using a smaller power of 2
+ unsigned oNewEltVTWidth = NewEltVTWidth;
+ FindAssocWidenVecType(TLI, StWidth, WidenVT, NewEltVT, NewVecVT);
+ NewEltVTWidth = NewEltVT.getSizeInBits();
+ // Readjust position and vector position based on new load type
+ Idx = Idx * (oNewEltVTWidth/NewEltVTWidth);
+ VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp);
+ }
+
+ EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewEltVT, VecOp,
+ DAG.getIntPtrConstant(Idx++));
+ StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
+ SVOffset + Offset, isVolatile,
+ MinAlign(Alignment, Offset)));
+ StWidth -= NewEltVTWidth;
+ }
+}
+
+/// Modifies a vector input (widen or narrows) to a vector of NVT. The
+/// input vector must have the same element type as NVT.
+SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, MVT NVT) {
+ // Note that InOp might have been widened so it might already have
+ // the right width or it might need be narrowed.
+ MVT InVT = InOp.getValueType();
+ assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
+ "input and widen element type must match");
+ DebugLoc dl = InOp.getDebugLoc();
+
+ // Check if InOp already has the right width.
+ if (InVT == NVT)
+ return InOp;
+
+ unsigned InNumElts = InVT.getVectorNumElements();
+ unsigned WidenNumElts = NVT.getVectorNumElements();
+ if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {
+ unsigned NumConcat = WidenNumElts / InNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ Ops[0] = InOp;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, &Ops[0], NumConcat);
+ }
+
+ if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
+ DAG.getIntPtrConstant(0));
+
+ // Fall back to extract and build.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ MVT EltVT = NVT.getVectorElementType();
+ unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
+ unsigned Idx;
+ for (Idx = 0; Idx < MinNumElts; ++Idx)
+ Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(Idx));
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for ( ; Idx < WidenNumElts; ++Idx)
+ Ops[Idx] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], WidenNumElts);
+}
diff --git a/lib/CodeGen/SelectionDAG/Makefile b/lib/CodeGen/SelectionDAG/Makefile
new file mode 100644
index 0000000..185222a
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/Makefile
@@ -0,0 +1,15 @@
+##===- lib/CodeGen/SelectionDAG/Makefile -------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMSelectionDAG
+PARALLEL_DIRS =
+BUILD_ARCHIVE = 1
+DONT_BUILD_RELINKED = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
new file mode 100644
index 0000000..af73b28
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -0,0 +1,635 @@
+//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a fast scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+STATISTIC(NumUnfolds, "Number of nodes unfolded");
+STATISTIC(NumDups, "Number of duplicated nodes");
+STATISTIC(NumPRCopies, "Number of physical copies");
+
+static RegisterScheduler
+ fastDAGScheduler("fast", "Fast suboptimal list scheduling",
+ createFastDAGScheduler);
+
+namespace {
+ /// FastPriorityQueue - A degenerate priority queue that considers
+ /// all nodes to have the same priority.
+ ///
+ struct VISIBILITY_HIDDEN FastPriorityQueue {
+ SmallVector<SUnit *, 16> Queue;
+
+ bool empty() const { return Queue.empty(); }
+
+ void push(SUnit *U) {
+ Queue.push_back(U);
+ }
+
+ SUnit *pop() {
+ if (empty()) return NULL;
+ SUnit *V = Queue.back();
+ Queue.pop_back();
+ return V;
+ }
+ };
+
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGFast - The actual "fast" list scheduler implementation.
+///
+class VISIBILITY_HIDDEN ScheduleDAGFast : public ScheduleDAGSDNodes {
+private:
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ FastPriorityQueue AvailableQueue;
+
+ /// LiveRegDefs - A set of physical registers and their definition
+ /// that are "live". These nodes must be scheduled before any other nodes that
+ /// modifies the registers can be scheduled.
+ unsigned NumLiveRegs;
+ std::vector<SUnit*> LiveRegDefs;
+ std::vector<unsigned> LiveRegCycles;
+
+public:
+ ScheduleDAGFast(MachineFunction &mf)
+ : ScheduleDAGSDNodes(mf) {}
+
+ void Schedule();
+
+ /// AddPred - adds a predecessor edge to SUnit SU.
+ /// This returns true if this is a new predecessor.
+ void AddPred(SUnit *SU, const SDep &D) {
+ SU->addPred(D);
+ }
+
+ /// RemovePred - removes a predecessor edge from SUnit SU.
+ /// This returns true if an edge was removed.
+ void RemovePred(SUnit *SU, const SDep &D) {
+ SU->removePred(D);
+ }
+
+private:
+ void ReleasePred(SUnit *SU, SDep *PredEdge);
+ void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
+ void ScheduleNodeBottomUp(SUnit*, unsigned);
+ SUnit *CopyAndMoveSuccessors(SUnit*);
+ void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+ const TargetRegisterClass*,
+ const TargetRegisterClass*,
+ SmallVector<SUnit*, 2>&);
+ bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
+ void ListScheduleBottomUp();
+
+ /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
+ bool ForceUnitLatencies() const { return true; }
+};
+} // end anonymous namespace
+
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGFast::Schedule() {
+ DOUT << "********** List Scheduling **********\n";
+
+ NumLiveRegs = 0;
+ LiveRegDefs.resize(TRI->getNumRegs(), NULL);
+ LiveRegCycles.resize(TRI->getNumRegs(), 0);
+
+ // Build the scheduling graph.
+ BuildSchedGraph();
+
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+
+ // Execute the actual scheduling loop.
+ ListScheduleBottomUp();
+}
+
+//===----------------------------------------------------------------------===//
+// Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+ --PredSU->NumSuccsLeft;
+
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft < 0) {
+ cerr << "*** Scheduling failed! ***\n";
+ PredSU->dump(this);
+ cerr << " has been released too many times!\n";
+ assert(0);
+ }
+#endif
+
+ // If all the node's successors are scheduled, this node is ready
+ // to be scheduled. Ignore the special EntrySU node.
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+ PredSU->isAvailable = true;
+ AvailableQueue.push(PredSU);
+ }
+}
+
+void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
+ // Bottom up: release predecessors
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ ReleasePred(SU, &*I);
+ if (I->isAssignedRegDep()) {
+ // This is a physical register dependency and it's impossible or
+ // expensive to copy the register. Make sure nothing that can
+ // clobber the register is scheduled between the predecessor and
+ // this node.
+ if (!LiveRegDefs[I->getReg()]) {
+ ++NumLiveRegs;
+ LiveRegDefs[I->getReg()] = I->getSUnit();
+ LiveRegCycles[I->getReg()] = CurCycle;
+ }
+ }
+ }
+}
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+ DOUT << "*** Scheduling [" << CurCycle << "]: ";
+ DEBUG(SU->dump(this));
+
+ assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+ SU->setHeightToAtLeast(CurCycle);
+ Sequence.push_back(SU);
+
+ ReleasePredecessors(SU, CurCycle);
+
+ // Release all the implicit physical register defs that are live.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep()) {
+ if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[I->getReg()] == SU &&
+ "Physical register dependency violated?");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = NULL;
+ LiveRegCycles[I->getReg()] = 0;
+ }
+ }
+ }
+
+ SU->isScheduled = true;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
+ if (SU->getNode()->getFlaggedNode())
+ return NULL;
+
+ SDNode *N = SU->getNode();
+ if (!N)
+ return NULL;
+
+ SUnit *NewSU;
+ bool TryUnfold = false;
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ MVT VT = N->getValueType(i);
+ if (VT == MVT::Flag)
+ return NULL;
+ else if (VT == MVT::Other)
+ TryUnfold = true;
+ }
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = N->getOperand(i);
+ MVT VT = Op.getNode()->getValueType(Op.getResNo());
+ if (VT == MVT::Flag)
+ return NULL;
+ }
+
+ if (TryUnfold) {
+ SmallVector<SDNode*, 2> NewNodes;
+ if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ return NULL;
+
+ DOUT << "Unfolding SU # " << SU->NodeNum << "\n";
+ assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+ N = NewNodes[1];
+ SDNode *LoadNode = NewNodes[0];
+ unsigned NumVals = N->getNumValues();
+ unsigned OldNumVals = SU->getNode()->getNumValues();
+ for (unsigned i = 0; i != NumVals; ++i)
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
+ SDValue(LoadNode, 1));
+
+ SUnit *NewSU = NewSUnit(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NewSU->NodeNum);
+
+ const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
+ if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+ NewSU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (TID.isCommutable())
+ NewSU->isCommutable = true;
+
+ // LoadNode may already exist. This can happen when there is another
+ // load from the same location and producing the same type of value
+ // but it has different alignment or volatileness.
+ bool isNewLoad = true;
+ SUnit *LoadSU;
+ if (LoadNode->getNodeId() != -1) {
+ LoadSU = &SUnits[LoadNode->getNodeId()];
+ isNewLoad = false;
+ } else {
+ LoadSU = NewSUnit(LoadNode);
+ LoadNode->setNodeId(LoadSU->NodeNum);
+ }
+
+ SDep ChainPred;
+ SmallVector<SDep, 4> ChainSuccs;
+ SmallVector<SDep, 4> LoadPreds;
+ SmallVector<SDep, 4> NodePreds;
+ SmallVector<SDep, 4> NodeSuccs;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainPred = *I;
+ else if (I->getSUnit()->getNode() &&
+ I->getSUnit()->getNode()->isOperandOf(LoadNode))
+ LoadPreds.push_back(*I);
+ else
+ NodePreds.push_back(*I);
+ }
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainSuccs.push_back(*I);
+ else
+ NodeSuccs.push_back(*I);
+ }
+
+ if (ChainPred.getSUnit()) {
+ RemovePred(SU, ChainPred);
+ if (isNewLoad)
+ AddPred(LoadSU, ChainPred);
+ }
+ for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
+ const SDep &Pred = LoadPreds[i];
+ RemovePred(SU, Pred);
+ if (isNewLoad) {
+ AddPred(LoadSU, Pred);
+ }
+ }
+ for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
+ const SDep &Pred = NodePreds[i];
+ RemovePred(SU, Pred);
+ AddPred(NewSU, Pred);
+ }
+ for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
+ SDep D = NodeSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ D.setSUnit(NewSU);
+ AddPred(SuccDep, D);
+ }
+ for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
+ SDep D = ChainSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ if (isNewLoad) {
+ D.setSUnit(LoadSU);
+ AddPred(SuccDep, D);
+ }
+ }
+ if (isNewLoad) {
+ AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));
+ }
+
+ ++NumUnfolds;
+
+ if (NewSU->NumSuccsLeft == 0) {
+ NewSU->isAvailable = true;
+ return NewSU;
+ }
+ SU = NewSU;
+ }
+
+ DOUT << "Duplicating SU # " << SU->NodeNum << "\n";
+ NewSU = Clone(SU);
+
+ // New SUnit has the exact same predecessors.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (!I->isArtificial())
+ AddPred(NewSU, *I);
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(NewSU);
+ AddPred(SuccSU, D);
+ D.setSUnit(SU);
+ DelDeps.push_back(std::make_pair(SuccSU, D));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+ ++NumDups;
+ return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ SmallVector<SUnit*, 2> &Copies) {
+ SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL));
+ CopyFromSU->CopySrcRC = SrcRC;
+ CopyFromSU->CopyDstRC = DestRC;
+
+ SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL));
+ CopyToSU->CopySrcRC = DestRC;
+ CopyToSU->CopyDstRC = SrcRC;
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(CopyToSU);
+ AddPred(SuccSU, D);
+ DelDeps.push_back(std::make_pair(SuccSU, *I));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+ }
+
+ AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
+ AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
+
+ Copies.push_back(CopyFromSU);
+ Copies.push_back(CopyToSU);
+
+ ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+ const TargetInstrInfo *TII) {
+ const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+ assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ unsigned NumRes = TID.getNumDefs();
+ for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ if (Reg == *ImpDef)
+ break;
+ ++NumRes;
+ }
+ return N->getValueType(NumRes);
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
+ SmallVector<unsigned, 4> &LRegs){
+ if (NumLiveRegs == 0)
+ return false;
+
+ SmallSet<unsigned, 4> RegAdded;
+ // If this node would clobber any "live" register, then it's not ready.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep()) {
+ unsigned Reg = I->getReg();
+ if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != I->getSUnit()) {
+ if (RegAdded.insert(Reg))
+ LRegs.push_back(Reg);
+ }
+ for (const unsigned *Alias = TRI->getAliasSet(Reg);
+ *Alias; ++Alias)
+ if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != I->getSUnit()) {
+ if (RegAdded.insert(*Alias))
+ LRegs.push_back(*Alias);
+ }
+ }
+ }
+
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
+ if (!Node->isMachineOpcode())
+ continue;
+ const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
+ if (!TID.ImplicitDefs)
+ continue;
+ for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) {
+ if (LiveRegDefs[*Reg] && LiveRegDefs[*Reg] != SU) {
+ if (RegAdded.insert(*Reg))
+ LRegs.push_back(*Reg);
+ }
+ for (const unsigned *Alias = TRI->getAliasSet(*Reg);
+ *Alias; ++Alias)
+ if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
+ if (RegAdded.insert(*Alias))
+ LRegs.push_back(*Alias);
+ }
+ }
+ }
+ return !LRegs.empty();
+}
+
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGFast::ListScheduleBottomUp() {
+ unsigned CurCycle = 0;
+
+ // Release any predecessors of the special Exit node.
+ ReleasePredecessors(&ExitSU, CurCycle);
+
+ // Add root to Available queue.
+ if (!SUnits.empty()) {
+ SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+ assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+ RootSU->isAvailable = true;
+ AvailableQueue.push(RootSU);
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ SmallVector<SUnit*, 4> NotReady;
+ DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue.empty()) {
+ bool Delayed = false;
+ LRegsMap.clear();
+ SUnit *CurSU = AvailableQueue.pop();
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ Delayed = true;
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ NotReady.push_back(CurSU);
+ CurSU = AvailableQueue.pop();
+ }
+
+ // All candidates are delayed due to live physical reg dependencies.
+ // Try code duplication or inserting cross class copies
+ // to resolve it.
+ if (Delayed && !CurSU) {
+ if (!CurSU) {
+ // Try duplicating the nodes that produces these
+ // "expensive to copy" values to break the dependency. In case even
+ // that doesn't work, insert cross class copies.
+ SUnit *TrySU = NotReady[0];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+ assert(LRegs.size() == 1 && "Can't handle this yet!");
+ unsigned Reg = LRegs[0];
+ SUnit *LRDef = LiveRegDefs[Reg];
+ MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getPhysicalRegisterRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is null, then it must be possible copy
+ // the value directly. Do not try duplicate the def.
+ SUnit *NewDef = 0;
+ if (DestRC)
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ else
+ DestRC = RC;
+ if (!NewDef) {
+ // Issue copies, these can be expensive cross register class copies.
+ SmallVector<SUnit*, 2> Copies;
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ DOUT << "Adding an edge from SU # " << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n";
+ AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false, /*isArtificial=*/true));
+ NewDef = Copies.back();
+ }
+
+ DOUT << "Adding an edge from SU # " << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n";
+ LiveRegDefs[Reg] = NewDef;
+ AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false, /*isArtificial=*/true));
+ TrySU->isAvailable = false;
+ CurSU = NewDef;
+ }
+
+ if (!CurSU) {
+ assert(false && "Unable to resolve live physical register dependencies!");
+ abort();
+ }
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
+ NotReady[i]->isPending = false;
+ // May no longer be available due to backtracking.
+ if (NotReady[i]->isAvailable)
+ AvailableQueue.push(NotReady[i]);
+ }
+ NotReady.clear();
+
+ if (CurSU)
+ ScheduleNodeBottomUp(CurSU, CurCycle);
+ ++CurCycle;
+ }
+
+ // Reverse the order if it is bottom up.
+ std::reverse(Sequence.begin(), Sequence.end());
+
+
+#ifndef NDEBUG
+ // Verify that all SUnits were scheduled.
+ bool AnyNotSched = false;
+ unsigned DeadNodes = 0;
+ unsigned Noops = 0;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ if (!SUnits[i].isScheduled) {
+ if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
+ ++DeadNodes;
+ continue;
+ }
+ if (!AnyNotSched)
+ cerr << "*** List scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ cerr << "has not been scheduled!\n";
+ AnyNotSched = true;
+ }
+ if (SUnits[i].NumSuccsLeft != 0) {
+ if (!AnyNotSched)
+ cerr << "*** List scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ cerr << "has successors left!\n";
+ AnyNotSched = true;
+ }
+ }
+ for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+ if (!Sequence[i])
+ ++Noops;
+ assert(!AnyNotSched);
+ assert(Sequence.size() + DeadNodes - Noops == SUnits.size() &&
+ "The number of nodes scheduled doesn't match the expected number!");
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAGSDNodes *
+llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGFast(*IS->MF);
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
new file mode 100644
index 0000000..c432534
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -0,0 +1,268 @@
+//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/ADT/Statistic.h"
+#include <climits>
+using namespace llvm;
+
+STATISTIC(NumNoops , "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+
+static RegisterScheduler
+ tdListDAGScheduler("list-td", "Top-down list scheduler",
+ createTDListDAGScheduler);
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGList - The actual list scheduler implementation. This supports
+/// top-down scheduling.
+///
+class VISIBILITY_HIDDEN ScheduleDAGList : public ScheduleDAGSDNodes {
+private:
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ ///
+ SchedulingPriorityQueue *AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands become available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+public:
+ ScheduleDAGList(MachineFunction &mf,
+ SchedulingPriorityQueue *availqueue,
+ ScheduleHazardRecognizer *HR)
+ : ScheduleDAGSDNodes(mf),
+ AvailableQueue(availqueue), HazardRec(HR) {
+ }
+
+ ~ScheduleDAGList() {
+ delete HazardRec;
+ delete AvailableQueue;
+ }
+
+ void Schedule();
+
+private:
+ void ReleaseSucc(SUnit *SU, const SDep &D);
+ void ReleaseSuccessors(SUnit *SU);
+ void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+ void ListScheduleTopDown();
+};
+} // end anonymous namespace
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGList::Schedule() {
+ DOUT << "********** List Scheduling **********\n";
+
+ // Build the scheduling graph.
+ BuildSchedGraph();
+
+ AvailableQueue->initNodes(SUnits);
+
+ ListScheduleTopDown();
+
+ AvailableQueue->releaseState();
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
+ SUnit *SuccSU = D.getSUnit();
+ --SuccSU->NumPredsLeft;
+
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft < 0) {
+ cerr << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ cerr << " has been released too many times!\n";
+ assert(0);
+ }
+#endif
+
+ SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
+
+ // If all the node's predecessors are scheduled, this node is ready
+ // to be scheduled. Ignore the special ExitSU node.
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+ PendingQueue.push_back(SuccSU);
+}
+
+void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) {
+ // Top down: release successors.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ assert(!I->isAssignedRegDep() &&
+ "The list-td scheduler doesn't yet support physreg dependencies!");
+
+ ReleaseSucc(SU, *I);
+ }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+ DOUT << "*** Scheduling [" << CurCycle << "]: ";
+ DEBUG(SU->dump(this));
+
+ Sequence.push_back(SU);
+ assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
+
+ ReleaseSuccessors(SU);
+ SU->isScheduled = true;
+ AvailableQueue->ScheduledNode(SU);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGList::ListScheduleTopDown() {
+ unsigned CurCycle = 0;
+
+ // Release any successors of the special Entry node.
+ ReleaseSuccessors(&EntrySU);
+
+ // All leaves to Available queue.
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ // It is available if it has no predecessors.
+ if (SUnits[i].Preds.empty()) {
+ AvailableQueue->push(&SUnits[i]);
+ SUnits[i].isAvailable = true;
+ }
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ std::vector<SUnit*> NotReady;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue->empty() || !PendingQueue.empty()) {
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ if (PendingQueue[i]->getDepth() == CurCycle) {
+ AvailableQueue->push(PendingQueue[i]);
+ PendingQueue[i]->isAvailable = true;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ } else {
+ assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
+ }
+ }
+
+ // If there are no instructions available, don't try to issue anything, and
+ // don't advance the hazard recognizer.
+ if (AvailableQueue->empty()) {
+ ++CurCycle;
+ continue;
+ }
+
+ SUnit *FoundSUnit = 0;
+
+ bool HasNoopHazards = false;
+ while (!AvailableQueue->empty()) {
+ SUnit *CurSUnit = AvailableQueue->pop();
+
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(CurSUnit);
+ if (HT == ScheduleHazardRecognizer::NoHazard) {
+ FoundSUnit = CurSUnit;
+ break;
+ }
+
+ // Remember if this is a noop hazard.
+ HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+ NotReady.push_back(CurSUnit);
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ if (!NotReady.empty()) {
+ AvailableQueue->push_all(NotReady);
+ NotReady.clear();
+ }
+
+ // If we found a node to schedule, do it now.
+ if (FoundSUnit) {
+ ScheduleNodeTopDown(FoundSUnit, CurCycle);
+ HazardRec->EmitInstruction(FoundSUnit);
+
+ // If this is a pseudo-op node, we don't want to increment the current
+ // cycle.
+ if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
+ ++CurCycle;
+ } else if (!HasNoopHazards) {
+ // Otherwise, we have a pipeline stall, but no other problem, just advance
+ // the current cycle and try again.
+ DOUT << "*** Advancing cycle, no work to do\n";
+ HazardRec->AdvanceCycle();
+ ++NumStalls;
+ ++CurCycle;
+ } else {
+ // Otherwise, we have no instructions to issue and we have instructions
+ // that will fault if we don't do this right. This is the case for
+ // processors without pipeline interlocks and other cases.
+ DOUT << "*** Emitting noop\n";
+ HazardRec->EmitNoop();
+ Sequence.push_back(0); // NULL here means noop
+ ++NumNoops;
+ ++CurCycle;
+ }
+ }
+
+#ifndef NDEBUG
+ VerifySchedule(/*isBottomUp=*/false);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+/// createTDListDAGScheduler - This creates a top-down list scheduler with a
+/// new hazard recognizer. This scheduler takes ownership of the hazard
+/// recognizer and deletes it when done.
+ScheduleDAGSDNodes *
+llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGList(*IS->MF,
+ new LatencyPriorityQueue(),
+ IS->CreateTargetHazardRecognizer());
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
new file mode 100644
index 0000000..c97e2a8
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -0,0 +1,1533 @@
+//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements bottom-up and top-down register pressure reduction list
+// schedulers, using standard algorithms. The basic approach uses a priority
+// queue of available nodes to schedule. One at a time, nodes are taken from
+// the priority queue (thus in priority order), checked for legality to
+// schedule, and emitted if legal.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <climits>
+using namespace llvm;
+
+STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
+STATISTIC(NumUnfolds, "Number of nodes unfolded");
+STATISTIC(NumDups, "Number of duplicated nodes");
+STATISTIC(NumPRCopies, "Number of physical register copies");
+
+static RegisterScheduler
+ burrListDAGScheduler("list-burr",
+ "Bottom-up register reduction list scheduling",
+ createBURRListDAGScheduler);
+static RegisterScheduler
+ tdrListrDAGScheduler("list-tdrr",
+ "Top-down register reduction list scheduling",
+ createTDRRListDAGScheduler);
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGRRList - The actual register reduction list scheduler
+/// implementation. This supports both top-down and bottom-up scheduling.
+///
+class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAGSDNodes {
+private:
+ /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
+ /// it is top-down.
+ bool isBottomUp;
+
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ SchedulingPriorityQueue *AvailableQueue;
+
+ /// LiveRegDefs - A set of physical registers and their definition
+ /// that are "live". These nodes must be scheduled before any other nodes that
+ /// modifies the registers can be scheduled.
+ unsigned NumLiveRegs;
+ std::vector<SUnit*> LiveRegDefs;
+ std::vector<unsigned> LiveRegCycles;
+
+ /// Topo - A topological ordering for SUnits which permits fast IsReachable
+ /// and similar queries.
+ ScheduleDAGTopologicalSort Topo;
+
+public:
+ ScheduleDAGRRList(MachineFunction &mf,
+ bool isbottomup,
+ SchedulingPriorityQueue *availqueue)
+ : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup),
+ AvailableQueue(availqueue), Topo(SUnits) {
+ }
+
+ ~ScheduleDAGRRList() {
+ delete AvailableQueue;
+ }
+
+ void Schedule();
+
+ /// IsReachable - Checks if SU is reachable from TargetSU.
+ bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
+ return Topo.IsReachable(SU, TargetSU);
+ }
+
+ /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
+ /// create a cycle.
+ bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
+ return Topo.WillCreateCycle(SU, TargetSU);
+ }
+
+ /// AddPred - adds a predecessor edge to SUnit SU.
+ /// This returns true if this is a new predecessor.
+ /// Updates the topological ordering if required.
+ void AddPred(SUnit *SU, const SDep &D) {
+ Topo.AddPred(SU, D.getSUnit());
+ SU->addPred(D);
+ }
+
+ /// RemovePred - removes a predecessor edge from SUnit SU.
+ /// This returns true if an edge was removed.
+ /// Updates the topological ordering if required.
+ void RemovePred(SUnit *SU, const SDep &D) {
+ Topo.RemovePred(SU, D.getSUnit());
+ SU->removePred(D);
+ }
+
+private:
+ void ReleasePred(SUnit *SU, const SDep *PredEdge);
+ void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
+ void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
+ void ReleaseSuccessors(SUnit *SU);
+ void CapturePred(SDep *PredEdge);
+ void ScheduleNodeBottomUp(SUnit*, unsigned);
+ void ScheduleNodeTopDown(SUnit*, unsigned);
+ void UnscheduleNodeBottomUp(SUnit*);
+ void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
+ SUnit *CopyAndMoveSuccessors(SUnit*);
+ void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+ const TargetRegisterClass*,
+ const TargetRegisterClass*,
+ SmallVector<SUnit*, 2>&);
+ bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
+ void ListScheduleTopDown();
+ void ListScheduleBottomUp();
+
+
+ /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
+ /// Updates the topological ordering if required.
+ SUnit *CreateNewSUnit(SDNode *N) {
+ unsigned NumSUnits = SUnits.size();
+ SUnit *NewNode = NewSUnit(N);
+ // Update the topological ordering.
+ if (NewNode->NodeNum >= NumSUnits)
+ Topo.InitDAGTopologicalSorting();
+ return NewNode;
+ }
+
+ /// CreateClone - Creates a new SUnit from an existing one.
+ /// Updates the topological ordering if required.
+ SUnit *CreateClone(SUnit *N) {
+ unsigned NumSUnits = SUnits.size();
+ SUnit *NewNode = Clone(N);
+ // Update the topological ordering.
+ if (NewNode->NodeNum >= NumSUnits)
+ Topo.InitDAGTopologicalSorting();
+ return NewNode;
+ }
+
+ /// ForceUnitLatencies - Return true, since register-pressure-reducing
+ /// scheduling doesn't need actual latency information.
+ bool ForceUnitLatencies() const { return true; }
+};
+} // end anonymous namespace
+
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGRRList::Schedule() {
+ DOUT << "********** List Scheduling **********\n";
+
+ NumLiveRegs = 0;
+ LiveRegDefs.resize(TRI->getNumRegs(), NULL);
+ LiveRegCycles.resize(TRI->getNumRegs(), 0);
+
+ // Build the scheduling graph.
+ BuildSchedGraph();
+
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+ Topo.InitDAGTopologicalSorting();
+
+ AvailableQueue->initNodes(SUnits);
+
+ // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
+ if (isBottomUp)
+ ListScheduleBottomUp();
+ else
+ ListScheduleTopDown();
+
+ AvailableQueue->releaseState();
+}
+
+//===----------------------------------------------------------------------===//
+// Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+ --PredSU->NumSuccsLeft;
+
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft < 0) {
+ cerr << "*** Scheduling failed! ***\n";
+ PredSU->dump(this);
+ cerr << " has been released too many times!\n";
+ assert(0);
+ }
+#endif
+
+ // If all the node's successors are scheduled, this node is ready
+ // to be scheduled. Ignore the special EntrySU node.
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+ PredSU->isAvailable = true;
+ AvailableQueue->push(PredSU);
+ }
+}
+
+void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
+ // Bottom up: release predecessors
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ ReleasePred(SU, &*I);
+ if (I->isAssignedRegDep()) {
+ // This is a physical register dependency and it's impossible or
+ // expensive to copy the register. Make sure nothing that can
+ // clobber the register is scheduled between the predecessor and
+ // this node.
+ if (!LiveRegDefs[I->getReg()]) {
+ ++NumLiveRegs;
+ LiveRegDefs[I->getReg()] = I->getSUnit();
+ LiveRegCycles[I->getReg()] = CurCycle;
+ }
+ }
+ }
+}
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+ DOUT << "*** Scheduling [" << CurCycle << "]: ";
+ DEBUG(SU->dump(this));
+
+ assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+ SU->setHeightToAtLeast(CurCycle);
+ Sequence.push_back(SU);
+
+ ReleasePredecessors(SU, CurCycle);
+
+ // Release all the implicit physical register defs that are live.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep()) {
+ if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[I->getReg()] == SU &&
+ "Physical register dependency violated?");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = NULL;
+ LiveRegCycles[I->getReg()] = 0;
+ }
+ }
+ }
+
+ SU->isScheduled = true;
+ AvailableQueue->ScheduledNode(SU);
+}
+
+/// CapturePred - This does the opposite of ReleasePred. Since SU is being
+/// unscheduled, incrcease the succ left count of its predecessors. Remove
+/// them from AvailableQueue if necessary.
+void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+ if (PredSU->isAvailable) {
+ PredSU->isAvailable = false;
+ if (!PredSU->isPending)
+ AvailableQueue->remove(PredSU);
+ }
+
+ ++PredSU->NumSuccsLeft;
+}
+
+/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
+/// its predecessor states to reflect the change.
+void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
+ DOUT << "*** Unscheduling [" << SU->getHeight() << "]: ";
+ DEBUG(SU->dump(this));
+
+ AvailableQueue->UnscheduledNode(SU);
+
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ CapturePred(&*I);
+ if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
+ "Physical register dependency violated?");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = NULL;
+ LiveRegCycles[I->getReg()] = 0;
+ }
+ }
+
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep()) {
+ if (!LiveRegDefs[I->getReg()]) {
+ LiveRegDefs[I->getReg()] = SU;
+ ++NumLiveRegs;
+ }
+ if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
+ LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
+ }
+ }
+
+ SU->setHeightDirty();
+ SU->isScheduled = false;
+ SU->isAvailable = true;
+ AvailableQueue->push(SU);
+}
+
+/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
+/// BTCycle in order to schedule a specific node.
+void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
+ unsigned &CurCycle) {
+ SUnit *OldSU = NULL;
+ while (CurCycle > BtCycle) {
+ OldSU = Sequence.back();
+ Sequence.pop_back();
+ if (SU->isSucc(OldSU))
+ // Don't try to remove SU from AvailableQueue.
+ SU->isAvailable = false;
+ UnscheduleNodeBottomUp(OldSU);
+ --CurCycle;
+ }
+
+ assert(!SU->isSucc(OldSU) && "Something is wrong!");
+
+ ++NumBacktracks;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
+ if (SU->getNode()->getFlaggedNode())
+ return NULL;
+
+ SDNode *N = SU->getNode();
+ if (!N)
+ return NULL;
+
+ SUnit *NewSU;
+ bool TryUnfold = false;
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ MVT VT = N->getValueType(i);
+ if (VT == MVT::Flag)
+ return NULL;
+ else if (VT == MVT::Other)
+ TryUnfold = true;
+ }
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = N->getOperand(i);
+ MVT VT = Op.getNode()->getValueType(Op.getResNo());
+ if (VT == MVT::Flag)
+ return NULL;
+ }
+
+ if (TryUnfold) {
+ SmallVector<SDNode*, 2> NewNodes;
+ if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ return NULL;
+
+ DOUT << "Unfolding SU # " << SU->NodeNum << "\n";
+ assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+ N = NewNodes[1];
+ SDNode *LoadNode = NewNodes[0];
+ unsigned NumVals = N->getNumValues();
+ unsigned OldNumVals = SU->getNode()->getNumValues();
+ for (unsigned i = 0; i != NumVals; ++i)
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
+ SDValue(LoadNode, 1));
+
+ // LoadNode may already exist. This can happen when there is another
+ // load from the same location and producing the same type of value
+ // but it has different alignment or volatileness.
+ bool isNewLoad = true;
+ SUnit *LoadSU;
+ if (LoadNode->getNodeId() != -1) {
+ LoadSU = &SUnits[LoadNode->getNodeId()];
+ isNewLoad = false;
+ } else {
+ LoadSU = CreateNewSUnit(LoadNode);
+ LoadNode->setNodeId(LoadSU->NodeNum);
+ ComputeLatency(LoadSU);
+ }
+
+ SUnit *NewSU = CreateNewSUnit(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NewSU->NodeNum);
+
+ const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
+ if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+ NewSU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (TID.isCommutable())
+ NewSU->isCommutable = true;
+ ComputeLatency(NewSU);
+
+ // Record all the edges to and from the old SU, by category.
+ SmallVector<SDep, 4> ChainPreds;
+ SmallVector<SDep, 4> ChainSuccs;
+ SmallVector<SDep, 4> LoadPreds;
+ SmallVector<SDep, 4> NodePreds;
+ SmallVector<SDep, 4> NodeSuccs;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainPreds.push_back(*I);
+ else if (I->getSUnit()->getNode() &&
+ I->getSUnit()->getNode()->isOperandOf(LoadNode))
+ LoadPreds.push_back(*I);
+ else
+ NodePreds.push_back(*I);
+ }
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainSuccs.push_back(*I);
+ else
+ NodeSuccs.push_back(*I);
+ }
+
+ // Now assign edges to the newly-created nodes.
+ for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
+ const SDep &Pred = ChainPreds[i];
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPred(LoadSU, Pred);
+ }
+ for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
+ const SDep &Pred = LoadPreds[i];
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPred(LoadSU, Pred);
+ }
+ for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
+ const SDep &Pred = NodePreds[i];
+ RemovePred(SU, Pred);
+ AddPred(NewSU, Pred);
+ }
+ for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
+ SDep D = NodeSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ D.setSUnit(NewSU);
+ AddPred(SuccDep, D);
+ }
+ for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
+ SDep D = ChainSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ if (isNewLoad) {
+ D.setSUnit(LoadSU);
+ AddPred(SuccDep, D);
+ }
+ }
+
+ // Add a data dependency to reflect that NewSU reads the value defined
+ // by LoadSU.
+ AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
+
+ if (isNewLoad)
+ AvailableQueue->addNode(LoadSU);
+ AvailableQueue->addNode(NewSU);
+
+ ++NumUnfolds;
+
+ if (NewSU->NumSuccsLeft == 0) {
+ NewSU->isAvailable = true;
+ return NewSU;
+ }
+ SU = NewSU;
+ }
+
+ DOUT << "Duplicating SU # " << SU->NodeNum << "\n";
+ NewSU = CreateClone(SU);
+
+ // New SUnit has the exact same predecessors.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (!I->isArtificial())
+ AddPred(NewSU, *I);
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(NewSU);
+ AddPred(SuccSU, D);
+ D.setSUnit(SU);
+ DelDeps.push_back(std::make_pair(SuccSU, D));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+ AvailableQueue->updateNode(SU);
+ AvailableQueue->addNode(NewSU);
+
+ ++NumDups;
+ return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ SmallVector<SUnit*, 2> &Copies) {
+ SUnit *CopyFromSU = CreateNewSUnit(NULL);
+ CopyFromSU->CopySrcRC = SrcRC;
+ CopyFromSU->CopyDstRC = DestRC;
+
+ SUnit *CopyToSU = CreateNewSUnit(NULL);
+ CopyToSU->CopySrcRC = DestRC;
+ CopyToSU->CopyDstRC = SrcRC;
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(CopyToSU);
+ AddPred(SuccSU, D);
+ DelDeps.push_back(std::make_pair(SuccSU, *I));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+ AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
+ AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
+
+ AvailableQueue->updateNode(SU);
+ AvailableQueue->addNode(CopyFromSU);
+ AvailableQueue->addNode(CopyToSU);
+ Copies.push_back(CopyFromSU);
+ Copies.push_back(CopyToSU);
+
+ ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+ const TargetInstrInfo *TII) {
+ const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+ assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ unsigned NumRes = TID.getNumDefs();
+ for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ if (Reg == *ImpDef)
+ break;
+ ++NumRes;
+ }
+ return N->getValueType(NumRes);
+}
+
+/// CheckForLiveRegDef - Return true and update live register vector if the
+/// specified register def of the specified SUnit clobbers any "live" registers.
+static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+ std::vector<SUnit*> &LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVector<unsigned, 4> &LRegs,
+ const TargetRegisterInfo *TRI) {
+ bool Added = false;
+ if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
+ if (RegAdded.insert(Reg)) {
+ LRegs.push_back(Reg);
+ Added = true;
+ }
+ }
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+ if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
+ if (RegAdded.insert(*Alias)) {
+ LRegs.push_back(*Alias);
+ Added = true;
+ }
+ }
+ return Added;
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
+ SmallVector<unsigned, 4> &LRegs){
+ if (NumLiveRegs == 0)
+ return false;
+
+ SmallSet<unsigned, 4> RegAdded;
+ // If this node would clobber any "live" register, then it's not ready.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep())
+ CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
+ RegAdded, LRegs, TRI);
+ }
+
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
+ if (Node->getOpcode() == ISD::INLINEASM) {
+ // Inline asm can clobber physical defs.
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
+ --NumOps; // Ignore the flag operand.
+
+ for (unsigned i = 2; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = (Flags & 0xffff) >> 3;
+
+ ++i; // Skip the ID value.
+ if ((Flags & 7) == 2 || (Flags & 7) == 6) {
+ // Check for def of register or earlyclobber register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ } else
+ i += NumVals;
+ }
+ continue;
+ }
+
+ if (!Node->isMachineOpcode())
+ continue;
+ const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
+ if (!TID.ImplicitDefs)
+ continue;
+ for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
+ CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ return !LRegs.empty();
+}
+
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGRRList::ListScheduleBottomUp() {
+ unsigned CurCycle = 0;
+
+ // Release any predecessors of the special Exit node.
+ ReleasePredecessors(&ExitSU, CurCycle);
+
+ // Add root to Available queue.
+ if (!SUnits.empty()) {
+ SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+ assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+ RootSU->isAvailable = true;
+ AvailableQueue->push(RootSU);
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ SmallVector<SUnit*, 4> NotReady;
+ DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue->empty()) {
+ bool Delayed = false;
+ LRegsMap.clear();
+ SUnit *CurSU = AvailableQueue->pop();
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ Delayed = true;
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ NotReady.push_back(CurSU);
+ CurSU = AvailableQueue->pop();
+ }
+
+ // All candidates are delayed due to live physical reg dependencies.
+ // Try backtracking, code duplication, or inserting cross class copies
+ // to resolve it.
+ if (Delayed && !CurSU) {
+ for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
+ SUnit *TrySU = NotReady[i];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+
+ // Try unscheduling up to the point where it's safe to schedule
+ // this node.
+ unsigned LiveCycle = CurCycle;
+ for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
+ unsigned Reg = LRegs[j];
+ unsigned LCycle = LiveRegCycles[Reg];
+ LiveCycle = std::min(LiveCycle, LCycle);
+ }
+ SUnit *OldSU = Sequence[LiveCycle];
+ if (!WillCreateCycle(TrySU, OldSU)) {
+ BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
+ // Force the current node to be scheduled before the node that
+ // requires the physical reg dep.
+ if (OldSU->isAvailable) {
+ OldSU->isAvailable = false;
+ AvailableQueue->remove(OldSU);
+ }
+ AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false, /*isArtificial=*/true));
+ // If one or more successors has been unscheduled, then the current
+ // node is no longer avaialable. Schedule a successor that's now
+ // available instead.
+ if (!TrySU->isAvailable)
+ CurSU = AvailableQueue->pop();
+ else {
+ CurSU = TrySU;
+ TrySU->isPending = false;
+ NotReady.erase(NotReady.begin()+i);
+ }
+ break;
+ }
+ }
+
+ if (!CurSU) {
+ // Can't backtrack. If it's too expensive to copy the value, then try
+ // duplicate the nodes that produces these "too expensive to copy"
+ // values to break the dependency. In case even that doesn't work,
+ // insert cross class copies.
+ // If it's not too expensive, i.e. cost != -1, issue copies.
+ SUnit *TrySU = NotReady[0];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+ assert(LRegs.size() == 1 && "Can't handle this yet!");
+ unsigned Reg = LRegs[0];
+ SUnit *LRDef = LiveRegDefs[Reg];
+ MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getPhysicalRegisterRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is null, then it must be possible copy
+ // the value directly. Do not try duplicate the def.
+ SUnit *NewDef = 0;
+ if (DestRC)
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ else
+ DestRC = RC;
+ if (!NewDef) {
+ // Issue copies, these can be expensive cross register class copies.
+ SmallVector<SUnit*, 2> Copies;
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ DOUT << "Adding an edge from SU #" << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n";
+ AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ NewDef = Copies.back();
+ }
+
+ DOUT << "Adding an edge from SU #" << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n";
+ LiveRegDefs[Reg] = NewDef;
+ AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ TrySU->isAvailable = false;
+ CurSU = NewDef;
+ }
+
+ assert(CurSU && "Unable to resolve live physical register dependencies!");
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
+ NotReady[i]->isPending = false;
+ // May no longer be available due to backtracking.
+ if (NotReady[i]->isAvailable)
+ AvailableQueue->push(NotReady[i]);
+ }
+ NotReady.clear();
+
+ if (CurSU)
+ ScheduleNodeBottomUp(CurSU, CurCycle);
+ ++CurCycle;
+ }
+
+ // Reverse the order if it is bottom up.
+ std::reverse(Sequence.begin(), Sequence.end());
+
+#ifndef NDEBUG
+ VerifySchedule(isBottomUp);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
+ SUnit *SuccSU = SuccEdge->getSUnit();
+ --SuccSU->NumPredsLeft;
+
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft < 0) {
+ cerr << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ cerr << " has been released too many times!\n";
+ assert(0);
+ }
+#endif
+
+ // If all the node's predecessors are scheduled, this node is ready
+ // to be scheduled. Ignore the special ExitSU node.
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
+ SuccSU->isAvailable = true;
+ AvailableQueue->push(SuccSU);
+ }
+}
+
+void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
+ // Top down: release successors
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ assert(!I->isAssignedRegDep() &&
+ "The list-tdrr scheduler doesn't yet support physreg dependencies!");
+
+ ReleaseSucc(SU, &*I);
+ }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+ DOUT << "*** Scheduling [" << CurCycle << "]: ";
+ DEBUG(SU->dump(this));
+
+ assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
+ Sequence.push_back(SU);
+
+ ReleaseSuccessors(SU);
+ SU->isScheduled = true;
+ AvailableQueue->ScheduledNode(SU);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGRRList::ListScheduleTopDown() {
+ unsigned CurCycle = 0;
+
+ // Release any successors of the special Entry node.
+ ReleaseSuccessors(&EntrySU);
+
+ // All leaves to Available queue.
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ // It is available if it has no predecessors.
+ if (SUnits[i].Preds.empty()) {
+ AvailableQueue->push(&SUnits[i]);
+ SUnits[i].isAvailable = true;
+ }
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue->empty()) {
+ SUnit *CurSU = AvailableQueue->pop();
+
+ if (CurSU)
+ ScheduleNodeTopDown(CurSU, CurCycle);
+ ++CurCycle;
+ }
+
+#ifndef NDEBUG
+ VerifySchedule(isBottomUp);
+#endif
+}
+
+
+//===----------------------------------------------------------------------===//
+// RegReductionPriorityQueue Implementation
+//===----------------------------------------------------------------------===//
+//
+// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
+// to reduce register pressure.
+//
+namespace {
+ template<class SF>
+ class RegReductionPriorityQueue;
+
+ /// Sorting functions for the Available queue.
+ struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+ RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
+ bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
+ bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+ bool operator()(const SUnit* left, const SUnit* right) const;
+ };
+
+ struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+ RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
+ td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
+ td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+ bool operator()(const SUnit* left, const SUnit* right) const;
+ };
+} // end anonymous namespace
+
+/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
+/// Smaller number is the higher priority.
+static unsigned
+CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
+ unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
+ if (SethiUllmanNumber != 0)
+ return SethiUllmanNumber;
+
+ unsigned Extra = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = I->getSUnit();
+ unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
+ if (PredSethiUllman > SethiUllmanNumber) {
+ SethiUllmanNumber = PredSethiUllman;
+ Extra = 0;
+ } else if (PredSethiUllman == SethiUllmanNumber)
+ ++Extra;
+ }
+
+ SethiUllmanNumber += Extra;
+
+ if (SethiUllmanNumber == 0)
+ SethiUllmanNumber = 1;
+
+ return SethiUllmanNumber;
+}
+
+namespace {
+ template<class SF>
+ class VISIBILITY_HIDDEN RegReductionPriorityQueue
+ : public SchedulingPriorityQueue {
+ PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue;
+ unsigned currentQueueId;
+
+ protected:
+ // SUnits - The SUnits for the current graph.
+ std::vector<SUnit> *SUnits;
+
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ ScheduleDAGRRList *scheduleDAG;
+
+ // SethiUllmanNumbers - The SethiUllman number for each node.
+ std::vector<unsigned> SethiUllmanNumbers;
+
+ public:
+ RegReductionPriorityQueue(const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri) :
+ Queue(SF(this)), currentQueueId(0),
+ TII(tii), TRI(tri), scheduleDAG(NULL) {}
+
+ void initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ // Add pseudo dependency edges for two-address nodes.
+ AddPseudoTwoAddrDeps();
+ // Reroute edges to nodes with multiple uses.
+ PrescheduleNodesWithMultipleUses();
+ // Calculate node priorities.
+ CalculateSethiUllmanNumbers();
+ }
+
+ void addNode(const SUnit *SU) {
+ unsigned SUSize = SethiUllmanNumbers.size();
+ if (SUnits->size() > SUSize)
+ SethiUllmanNumbers.resize(SUSize*2, 0);
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+ }
+
+ void updateNode(const SUnit *SU) {
+ SethiUllmanNumbers[SU->NodeNum] = 0;
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+ }
+
+ void releaseState() {
+ SUnits = 0;
+ SethiUllmanNumbers.clear();
+ }
+
+ unsigned getNodePriority(const SUnit *SU) const {
+ assert(SU->NodeNum < SethiUllmanNumbers.size());
+ unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+ if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+ // CopyToReg should be close to its uses to facilitate coalescing and
+ // avoid spilling.
+ return 0;
+ if (Opc == TargetInstrInfo::EXTRACT_SUBREG ||
+ Opc == TargetInstrInfo::SUBREG_TO_REG ||
+ Opc == TargetInstrInfo::INSERT_SUBREG)
+ // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+ // close to their uses to facilitate coalescing.
+ return 0;
+ if (SU->NumSuccs == 0 && SU->NumPreds != 0)
+ // If SU does not have a register use, i.e. it doesn't produce a value
+ // that would be consumed (e.g. store), then it terminates a chain of
+ // computation. Give it a large SethiUllman number so it will be
+ // scheduled right before its predecessors that it doesn't lengthen
+ // their live ranges.
+ return 0xffff;
+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+ // If SU does not have a register def, schedule it close to its uses
+ // because it does not lengthen any live ranges.
+ return 0;
+ return SethiUllmanNumbers[SU->NodeNum];
+ }
+
+ unsigned size() const { return Queue.size(); }
+
+ bool empty() const { return Queue.empty(); }
+
+ void push(SUnit *U) {
+ assert(!U->NodeQueueId && "Node in the queue already");
+ U->NodeQueueId = ++currentQueueId;
+ Queue.push(U);
+ }
+
+ void push_all(const std::vector<SUnit *> &Nodes) {
+ for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
+ push(Nodes[i]);
+ }
+
+ SUnit *pop() {
+ if (empty()) return NULL;
+ SUnit *V = Queue.top();
+ Queue.pop();
+ V->NodeQueueId = 0;
+ return V;
+ }
+
+ void remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ assert(SU->NodeQueueId != 0 && "Not in queue!");
+ Queue.erase_one(SU);
+ SU->NodeQueueId = 0;
+ }
+
+ void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
+ scheduleDAG = scheduleDag;
+ }
+
+ protected:
+ bool canClobber(const SUnit *SU, const SUnit *Op);
+ void AddPseudoTwoAddrDeps();
+ void PrescheduleNodesWithMultipleUses();
+ void CalculateSethiUllmanNumbers();
+ };
+
+ typedef RegReductionPriorityQueue<bu_ls_rr_sort>
+ BURegReductionPriorityQueue;
+
+ typedef RegReductionPriorityQueue<td_ls_rr_sort>
+ TDRegReductionPriorityQueue;
+}
+
+/// closestSucc - Returns the scheduled cycle of the successor which is
+/// closest to the current cycle.
+static unsigned closestSucc(const SUnit *SU) {
+ unsigned MaxHeight = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain succs
+ unsigned Height = I->getSUnit()->getHeight();
+ // If there are bunch of CopyToRegs stacked up, they should be considered
+ // to be at the same position.
+ if (I->getSUnit()->getNode() &&
+ I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
+ Height = closestSucc(I->getSUnit())+1;
+ if (Height > MaxHeight)
+ MaxHeight = Height;
+ }
+ return MaxHeight;
+}
+
+/// calcMaxScratches - Returns an cost estimate of the worse case requirement
+/// for scratch registers, i.e. number of data dependencies.
+static unsigned calcMaxScratches(const SUnit *SU) {
+ unsigned Scratches = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ Scratches++;
+ }
+ return Scratches;
+}
+
+// Bottom up
+bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+ unsigned LPriority = SPQ->getNodePriority(left);
+ unsigned RPriority = SPQ->getNodePriority(right);
+ if (LPriority != RPriority)
+ return LPriority > RPriority;
+
+ // Try schedule def + use closer when Sethi-Ullman numbers are the same.
+ // e.g.
+ // t1 = op t2, c1
+ // t3 = op t4, c2
+ //
+ // and the following instructions are both ready.
+ // t2 = op c3
+ // t4 = op c4
+ //
+ // Then schedule t2 = op first.
+ // i.e.
+ // t4 = op c4
+ // t2 = op c3
+ // t1 = op t2, c1
+ // t3 = op t4, c2
+ //
+ // This creates more short live intervals.
+ unsigned LDist = closestSucc(left);
+ unsigned RDist = closestSucc(right);
+ if (LDist != RDist)
+ return LDist < RDist;
+
+ // How many registers becomes live when the node is scheduled.
+ unsigned LScratch = calcMaxScratches(left);
+ unsigned RScratch = calcMaxScratches(right);
+ if (LScratch != RScratch)
+ return LScratch > RScratch;
+
+ if (left->getHeight() != right->getHeight())
+ return left->getHeight() > right->getHeight();
+
+ if (left->getDepth() != right->getDepth())
+ return left->getDepth() < right->getDepth();
+
+ assert(left->NodeQueueId && right->NodeQueueId &&
+ "NodeQueueId cannot be zero");
+ return (left->NodeQueueId > right->NodeQueueId);
+}
+
+template<class SF>
+bool
+RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
+ if (SU->isTwoAddress) {
+ unsigned Opc = SU->getNode()->getMachineOpcode();
+ const TargetInstrDesc &TID = TII->get(Opc);
+ unsigned NumRes = TID.getNumDefs();
+ unsigned NumOps = TID.getNumOperands() - NumRes;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
+ SDNode *DU = SU->getNode()->getOperand(i).getNode();
+ if (DU->getNodeId() != -1 &&
+ Op->OrigNode == &(*SUnits)[DU->getNodeId()])
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
+/// hasCopyToRegUse - Return true if SU has a value successor that is a
+/// CopyToReg node.
+static bool hasCopyToRegUse(const SUnit *SU) {
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue;
+ const SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
+ return true;
+ }
+ return false;
+}
+
+/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
+/// physical register defs.
+static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ SDNode *N = SuccSU->getNode();
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
+ assert(ImpDefs && "Caller should check hasPhysRegDefs");
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getFlaggedNode()) {
+ if (!SUNode->isMachineOpcode())
+ continue;
+ const unsigned *SUImpDefs =
+ TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
+ if (!SUImpDefs)
+ return false;
+ for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+ MVT VT = N->getValueType(i);
+ if (VT == MVT::Flag || VT == MVT::Other)
+ continue;
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned Reg = ImpDefs[i - NumDefs];
+ for (;*SUImpDefs; ++SUImpDefs) {
+ unsigned SUReg = *SUImpDefs;
+ if (TRI->regsOverlap(Reg, SUReg))
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
+/// are not handled well by the general register pressure reduction
+/// heuristics. When presented with code like this:
+///
+/// N
+/// / |
+/// / |
+/// U store
+/// |
+/// ...
+///
+/// the heuristics tend to push the store up, but since the
+/// operand of the store has another use (U), this would increase
+/// the length of that other use (the U->N edge).
+///
+/// This function transforms code like the above to route U's
+/// dependence through the store when possible, like this:
+///
+/// N
+/// ||
+/// ||
+/// store
+/// |
+/// U
+/// |
+/// ...
+///
+/// This results in the store being scheduled immediately
+/// after N, which shortens the U->N live range, reducing
+/// register pressure.
+///
+template<class SF>
+void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
+ // Visit all the nodes in topological order, working top-down.
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+ SUnit *SU = &(*SUnits)[i];
+ // For now, only look at nodes with no data successors, such as stores.
+ // These are especially important, due to the heuristics in
+ // getNodePriority for nodes with no data successors.
+ if (SU->NumSuccs != 0)
+ continue;
+ // For now, only look at nodes with exactly one data predecessor.
+ if (SU->NumPreds != 1)
+ continue;
+ // Avoid prescheduling copies to virtual registers, which don't behave
+ // like other nodes from the perspective of scheduling heuristics.
+ if (SDNode *N = SU->getNode())
+ if (N->getOpcode() == ISD::CopyToReg &&
+ TargetRegisterInfo::isVirtualRegister
+ (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ continue;
+
+ // Locate the single data predecessor.
+ SUnit *PredSU = 0;
+ for (SUnit::const_pred_iterator II = SU->Preds.begin(),
+ EE = SU->Preds.end(); II != EE; ++II)
+ if (!II->isCtrl()) {
+ PredSU = II->getSUnit();
+ break;
+ }
+ assert(PredSU);
+
+ // Don't rewrite edges that carry physregs, because that requires additional
+ // support infrastructure.
+ if (PredSU->hasPhysRegDefs)
+ continue;
+ // Short-circuit the case where SU is PredSU's only data successor.
+ if (PredSU->NumSuccs == 1)
+ continue;
+ // Avoid prescheduling to copies from virtual registers, which don't behave
+ // like other nodes from the perspective of scheduling // heuristics.
+ if (SDNode *N = SU->getNode())
+ if (N->getOpcode() == ISD::CopyFromReg &&
+ TargetRegisterInfo::isVirtualRegister
+ (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ continue;
+
+ // Perform checks on the successors of PredSU.
+ for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
+ EE = PredSU->Succs.end(); II != EE; ++II) {
+ SUnit *PredSuccSU = II->getSUnit();
+ if (PredSuccSU == SU) continue;
+ // If PredSU has another successor with no data successors, for
+ // now don't attempt to choose either over the other.
+ if (PredSuccSU->NumSuccs == 0)
+ goto outer_loop_continue;
+ // Don't break physical register dependencies.
+ if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
+ if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
+ goto outer_loop_continue;
+ // Don't introduce graph cycles.
+ if (scheduleDAG->IsReachable(SU, PredSuccSU))
+ goto outer_loop_continue;
+ }
+
+ // Ok, the transformation is safe and the heuristics suggest it is
+ // profitable. Update the graph.
+ DOUT << "Prescheduling SU # " << SU->NodeNum
+ << " next to PredSU # " << PredSU->NodeNum
+ << " to guide scheduling in the presence of multiple uses\n";
+ for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
+ SDep Edge = PredSU->Succs[i];
+ assert(!Edge.isAssignedRegDep());
+ SUnit *SuccSU = Edge.getSUnit();
+ if (SuccSU != SU) {
+ Edge.setSUnit(PredSU);
+ scheduleDAG->RemovePred(SuccSU, Edge);
+ scheduleDAG->AddPred(SU, Edge);
+ Edge.setSUnit(SU);
+ scheduleDAG->AddPred(SuccSU, Edge);
+ --i;
+ }
+ }
+ outer_loop_continue:;
+ }
+}
+
+/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
+/// it as a def&use operand. Add a pseudo control edge from it to the other
+/// node (if it won't create a cycle) so the two-address one will be scheduled
+/// first (lower in the schedule). If both nodes are two-address, favor the
+/// one that has a CopyToReg use (more likely to be a loop induction update).
+/// If both are two-address, but one is commutable while the other is not
+/// commutable, favor the one that's not commutable.
+template<class SF>
+void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+ SUnit *SU = &(*SUnits)[i];
+ if (!SU->isTwoAddress)
+ continue;
+
+ SDNode *Node = SU->getNode();
+ if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
+ continue;
+
+ unsigned Opc = Node->getMachineOpcode();
+ const TargetInstrDesc &TID = TII->get(Opc);
+ unsigned NumRes = TID.getNumDefs();
+ unsigned NumOps = TID.getNumOperands() - NumRes;
+ for (unsigned j = 0; j != NumOps; ++j) {
+ if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
+ continue;
+ SDNode *DU = SU->getNode()->getOperand(j).getNode();
+ if (DU->getNodeId() == -1)
+ continue;
+ const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
+ if (!DUSU) continue;
+ for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
+ E = DUSU->Succs.end(); I != E; ++I) {
+ if (I->isCtrl()) continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU == SU)
+ continue;
+ // Be conservative. Ignore if nodes aren't at roughly the same
+ // depth and height.
+ if (SuccSU->getHeight() < SU->getHeight() &&
+ (SU->getHeight() - SuccSU->getHeight()) > 1)
+ continue;
+ // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
+ // constrains whatever is using the copy, instead of the copy
+ // itself. In the case that the copy is coalesced, this
+ // preserves the intent of the pseudo two-address heurietics.
+ while (SuccSU->Succs.size() == 1 &&
+ SuccSU->getNode()->isMachineOpcode() &&
+ SuccSU->getNode()->getMachineOpcode() ==
+ TargetInstrInfo::COPY_TO_REGCLASS)
+ SuccSU = SuccSU->Succs.front().getSUnit();
+ // Don't constrain non-instruction nodes.
+ if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
+ continue;
+ // Don't constrain nodes with physical register defs if the
+ // predecessor can clobber them.
+ if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
+ if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
+ continue;
+ }
+ // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
+ // these may be coalesced away. We want them close to their uses.
+ unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
+ if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG ||
+ SuccOpc == TargetInstrInfo::INSERT_SUBREG ||
+ SuccOpc == TargetInstrInfo::SUBREG_TO_REG)
+ continue;
+ if ((!canClobber(SuccSU, DUSU) ||
+ (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
+ (!SU->isCommutable && SuccSU->isCommutable)) &&
+ !scheduleDAG->IsReachable(SuccSU, SU)) {
+ DOUT << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum
+ << " to SU #" << SuccSU->NodeNum << "\n";
+ scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ }
+ }
+ }
+ }
+}
+
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+template<class SF>
+void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
+ SethiUllmanNumbers.assign(SUnits->size(), 0);
+
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+ CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
+}
+
+/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
+/// predecessors of the successors of the SUnit SU. Stop when the provided
+/// limit is exceeded.
+static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
+ unsigned Limit) {
+ unsigned Sum = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ const SUnit *SuccSU = I->getSUnit();
+ for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
+ EE = SuccSU->Preds.end(); II != EE; ++II) {
+ SUnit *PredSU = II->getSUnit();
+ if (!PredSU->isScheduled)
+ if (++Sum > Limit)
+ return Sum;
+ }
+ }
+ return Sum;
+}
+
+
+// Top down
+bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+ unsigned LPriority = SPQ->getNodePriority(left);
+ unsigned RPriority = SPQ->getNodePriority(right);
+ bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
+ bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
+ bool LIsFloater = LIsTarget && left->NumPreds == 0;
+ bool RIsFloater = RIsTarget && right->NumPreds == 0;
+ unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
+ unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
+
+ if (left->NumSuccs == 0 && right->NumSuccs != 0)
+ return false;
+ else if (left->NumSuccs != 0 && right->NumSuccs == 0)
+ return true;
+
+ if (LIsFloater)
+ LBonus -= 2;
+ if (RIsFloater)
+ RBonus -= 2;
+ if (left->NumSuccs == 1)
+ LBonus += 2;
+ if (right->NumSuccs == 1)
+ RBonus += 2;
+
+ if (LPriority+LBonus != RPriority+RBonus)
+ return LPriority+LBonus < RPriority+RBonus;
+
+ if (left->getDepth() != right->getDepth())
+ return left->getDepth() < right->getDepth();
+
+ if (left->NumSuccsLeft != right->NumSuccsLeft)
+ return left->NumSuccsLeft > right->NumSuccsLeft;
+
+ assert(left->NodeQueueId && right->NodeQueueId &&
+ "NodeQueueId cannot be zero");
+ return (left->NodeQueueId > right->NodeQueueId);
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAGSDNodes *
+llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);
+
+ ScheduleDAGRRList *SD =
+ new ScheduleDAGRRList(*IS->MF, true, PQ);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);
+
+ ScheduleDAGRRList *SD =
+ new ScheduleDAGRRList(*IS->MF, false, PQ);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
new file mode 100644
index 0000000..7aa15bc
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -0,0 +1,294 @@
+//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
+ : ScheduleDAG(mf) {
+}
+
+/// Run - perform scheduling.
+///
+void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb,
+ MachineBasicBlock::iterator insertPos) {
+ DAG = dag;
+ ScheduleDAG::Run(bb, insertPos);
+}
+
+SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
+ SUnit *SU = NewSUnit(Old->getNode());
+ SU->OrigNode = Old->OrigNode;
+ SU->Latency = Old->Latency;
+ SU->isTwoAddress = Old->isTwoAddress;
+ SU->isCommutable = Old->isCommutable;
+ SU->hasPhysRegDefs = Old->hasPhysRegDefs;
+ SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
+ Old->isCloned = true;
+ return SU;
+}
+
+/// CheckForPhysRegDependency - Check if the dependency between def and use of
+/// a specified operand is a physical register dependency. If so, returns the
+/// register and the cost of copying the register.
+static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
+ const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII,
+ unsigned &PhysReg, int &Cost) {
+ if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
+ return;
+
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return;
+
+ unsigned ResNo = User->getOperand(2).getResNo();
+ if (Def->isMachineOpcode()) {
+ const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
+ if (ResNo >= II.getNumDefs() &&
+ II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
+ PhysReg = Reg;
+ const TargetRegisterClass *RC =
+ TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo));
+ Cost = RC->getCopyCost();
+ }
+ }
+}
+
+void ScheduleDAGSDNodes::BuildSchedUnits() {
+ // During scheduling, the NodeId field of SDNode is used to map SDNodes
+ // to their associated SUnits by holding SUnits table indices. A value
+ // of -1 means the SDNode does not yet have an associated SUnit.
+ unsigned NumNodes = 0;
+ for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+ E = DAG->allnodes_end(); NI != E; ++NI) {
+ NI->setNodeId(-1);
+ ++NumNodes;
+ }
+
+ // Reserve entries in the vector for each of the SUnits we are creating. This
+ // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
+ // invalidated.
+ // FIXME: Multiply by 2 because we may clone nodes during scheduling.
+ // This is a temporary workaround.
+ SUnits.reserve(NumNodes * 2);
+
+ // Check to see if the scheduler cares about latencies.
+ bool UnitLatencies = ForceUnitLatencies();
+
+ for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+ E = DAG->allnodes_end(); NI != E; ++NI) {
+ if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
+ continue;
+
+ // If this node has already been processed, stop now.
+ if (NI->getNodeId() != -1) continue;
+
+ SUnit *NodeSUnit = NewSUnit(NI);
+
+ // See if anything is flagged to this node, if so, add them to flagged
+ // nodes. Nodes can have at most one flag input and one flag output. Flags
+ // are required to be the last operand and result of a node.
+
+ // Scan up to find flagged preds.
+ SDNode *N = NI;
+ while (N->getNumOperands() &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) {
+ N = N->getOperand(N->getNumOperands()-1).getNode();
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+ }
+
+ // Scan down to find any flagged succs.
+ N = NI;
+ while (N->getValueType(N->getNumValues()-1) == MVT::Flag) {
+ SDValue FlagVal(N, N->getNumValues()-1);
+
+ // There are either zero or one users of the Flag result.
+ bool HasFlagUse = false;
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ UI != E; ++UI)
+ if (FlagVal.isOperandOf(*UI)) {
+ HasFlagUse = true;
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+ N = *UI;
+ break;
+ }
+ if (!HasFlagUse) break;
+ }
+
+ // If there are flag operands involved, N is now the bottom-most node
+ // of the sequence of nodes that are flagged together.
+ // Update the SUnit.
+ NodeSUnit->setNode(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+
+ // Assign the Latency field of NodeSUnit using target-provided information.
+ if (UnitLatencies)
+ NodeSUnit->Latency = 1;
+ else
+ ComputeLatency(NodeSUnit);
+ }
+}
+
+void ScheduleDAGSDNodes::AddSchedEdges() {
+ // Pass 2: add the preds, succs, etc.
+ for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
+ SUnit *SU = &SUnits[su];
+ SDNode *MainNode = SU->getNode();
+
+ if (MainNode->isMachineOpcode()) {
+ unsigned Opc = MainNode->getMachineOpcode();
+ const TargetInstrDesc &TID = TII->get(Opc);
+ for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
+ if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+ SU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (TID.isCommutable())
+ SU->isCommutable = true;
+ }
+
+ // Find all predecessors and successors of the group.
+ for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) {
+ if (N->isMachineOpcode() &&
+ TII->get(N->getMachineOpcode()).getImplicitDefs()) {
+ SU->hasPhysRegClobbers = true;
+ unsigned NumUsed = CountResults(N);
+ while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
+ --NumUsed; // Skip over unused values at the end.
+ if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
+ SU->hasPhysRegDefs = true;
+ }
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDNode *OpN = N->getOperand(i).getNode();
+ if (isPassiveNode(OpN)) continue; // Not scheduled.
+ SUnit *OpSU = &SUnits[OpN->getNodeId()];
+ assert(OpSU && "Node has no SUnit!");
+ if (OpSU == SU) continue; // In the same group.
+
+ MVT OpVT = N->getOperand(i).getValueType();
+ assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");
+ bool isChain = OpVT == MVT::Other;
+
+ unsigned PhysReg = 0;
+ int Cost = 1;
+ // Determine if this is a physical register dependency.
+ CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
+ assert((PhysReg == 0 || !isChain) &&
+ "Chain dependence via physreg data?");
+ // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
+ // emits a copy from the physical register to a virtual register unless
+ // it requires a cross class copy (cost < 0). That means we are only
+ // treating "expensive to copy" register dependency as physical register
+ // dependency. This may change in the future though.
+ if (Cost >= 0)
+ PhysReg = 0;
+ SU->addPred(SDep(OpSU, isChain ? SDep::Order : SDep::Data,
+ OpSU->Latency, PhysReg));
+ }
+ }
+ }
+}
+
+/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+/// are input. This SUnit graph is similar to the SelectionDAG, but
+/// excludes nodes that aren't interesting to scheduling, and represents
+/// flagged together nodes with a single SUnit.
+void ScheduleDAGSDNodes::BuildSchedGraph() {
+ // Populate the SUnits array.
+ BuildSchedUnits();
+ // Compute all the scheduling dependencies between nodes.
+ AddSchedEdges();
+}
+
+void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
+ const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+
+ // Compute the latency for the node. We use the sum of the latencies for
+ // all nodes flagged together into this SUnit.
+ SU->Latency = 0;
+ bool SawMachineOpcode = false;
+ for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
+ if (N->isMachineOpcode()) {
+ SawMachineOpcode = true;
+ SU->Latency +=
+ InstrItins.getLatency(TII->get(N->getMachineOpcode()).getSchedClass());
+ }
+}
+
+/// CountResults - The results of target nodes have register or immediate
+/// operands first, then an optional chain, and optional flag operands (which do
+/// not go into the resulting MachineInstr).
+unsigned ScheduleDAGSDNodes::CountResults(SDNode *Node) {
+ unsigned N = Node->getNumValues();
+ while (N && Node->getValueType(N - 1) == MVT::Flag)
+ --N;
+ if (N && Node->getValueType(N - 1) == MVT::Other)
+ --N; // Skip over chain result.
+ return N;
+}
+
+/// CountOperands - The inputs to target nodes have any actual inputs first,
+/// followed by special operands that describe memory references, then an
+/// optional chain operand, then an optional flag operand. Compute the number
+/// of actual operands that will go into the resulting MachineInstr.
+unsigned ScheduleDAGSDNodes::CountOperands(SDNode *Node) {
+ unsigned N = ComputeMemOperandsEnd(Node);
+ while (N && isa<MemOperandSDNode>(Node->getOperand(N - 1).getNode()))
+ --N; // Ignore MEMOPERAND nodes
+ return N;
+}
+
+/// ComputeMemOperandsEnd - Find the index one past the last MemOperandSDNode
+/// operand
+unsigned ScheduleDAGSDNodes::ComputeMemOperandsEnd(SDNode *Node) {
+ unsigned N = Node->getNumOperands();
+ while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag)
+ --N;
+ if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
+ --N; // Ignore chain if it exists.
+ return N;
+}
+
+
+void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
+ if (!SU->getNode()) {
+ cerr << "PHYS REG COPY\n";
+ return;
+ }
+
+ SU->getNode()->dump(DAG);
+ cerr << "\n";
+ SmallVector<SDNode *, 4> FlaggedNodes;
+ for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
+ FlaggedNodes.push_back(N);
+ while (!FlaggedNodes.empty()) {
+ cerr << " ";
+ FlaggedNodes.back()->dump(DAG);
+ cerr << "\n";
+ FlaggedNodes.pop_back();
+ }
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
new file mode 100644
index 0000000..2a278b7
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -0,0 +1,179 @@
+//===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScheduleDAGSDNodes class, which implements
+// scheduling for an SDNode-based dependency graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCHEDULEDAGSDNODES_H
+#define SCHEDULEDAGSDNODES_H
+
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+
+namespace llvm {
+ /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
+ ///
+ /// Edges between SUnits are initially based on edges in the SelectionDAG,
+ /// and additional edges can be added by the schedulers as heuristics.
+ /// SDNodes such as Constants, Registers, and a few others that are not
+ /// interesting to schedulers are not allocated SUnits.
+ ///
+ /// SDNodes with MVT::Flag operands are grouped along with the flagged
+ /// nodes into a single SUnit so that they are scheduled together.
+ ///
+ /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output
+ /// edges. Physical register dependence information is not carried in
+ /// the DAG and must be handled explicitly by schedulers.
+ ///
+ class ScheduleDAGSDNodes : public ScheduleDAG {
+ public:
+ SelectionDAG *DAG; // DAG of the current basic block
+
+ explicit ScheduleDAGSDNodes(MachineFunction &mf);
+
+ virtual ~ScheduleDAGSDNodes() {}
+
+ /// Run - perform scheduling.
+ ///
+ void Run(SelectionDAG *dag, MachineBasicBlock *bb,
+ MachineBasicBlock::iterator insertPos);
+
+ /// isPassiveNode - Return true if the node is a non-scheduled leaf.
+ ///
+ static bool isPassiveNode(SDNode *Node) {
+ if (isa<ConstantSDNode>(Node)) return true;
+ if (isa<ConstantFPSDNode>(Node)) return true;
+ if (isa<RegisterSDNode>(Node)) return true;
+ if (isa<GlobalAddressSDNode>(Node)) return true;
+ if (isa<BasicBlockSDNode>(Node)) return true;
+ if (isa<FrameIndexSDNode>(Node)) return true;
+ if (isa<ConstantPoolSDNode>(Node)) return true;
+ if (isa<JumpTableSDNode>(Node)) return true;
+ if (isa<ExternalSymbolSDNode>(Node)) return true;
+ if (isa<MemOperandSDNode>(Node)) return true;
+ if (Node->getOpcode() == ISD::EntryToken) return true;
+ return false;
+ }
+
+ /// NewSUnit - Creates a new SUnit and return a ptr to it.
+ ///
+ SUnit *NewSUnit(SDNode *N) {
+#ifndef NDEBUG
+ const SUnit *Addr = 0;
+ if (!SUnits.empty())
+ Addr = &SUnits[0];
+#endif
+ SUnits.push_back(SUnit(N, (unsigned)SUnits.size()));
+ assert((Addr == 0 || Addr == &SUnits[0]) &&
+ "SUnits std::vector reallocated on the fly!");
+ SUnits.back().OrigNode = &SUnits.back();
+ return &SUnits.back();
+ }
+
+ /// Clone - Creates a clone of the specified SUnit. It does not copy the
+ /// predecessors / successors info nor the temporary scheduling states.
+ ///
+ SUnit *Clone(SUnit *N);
+
+ /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+ /// are input. This SUnit graph is similar to the SelectionDAG, but
+ /// excludes nodes that aren't interesting to scheduling, and represents
+ /// flagged together nodes with a single SUnit.
+ virtual void BuildSchedGraph();
+
+ /// ComputeLatency - Compute node latency.
+ ///
+ virtual void ComputeLatency(SUnit *SU);
+
+ /// CountResults - The results of target nodes have register or immediate
+ /// operands first, then an optional chain, and optional flag operands
+ /// (which do not go into the machine instrs.)
+ static unsigned CountResults(SDNode *Node);
+
+ /// CountOperands - The inputs to target nodes have any actual inputs first,
+ /// followed by special operands that describe memory references, then an
+ /// optional chain operand, then flag operands. Compute the number of
+ /// actual operands that will go into the resulting MachineInstr.
+ static unsigned CountOperands(SDNode *Node);
+
+ /// ComputeMemOperandsEnd - Find the index one past the last
+ /// MemOperandSDNode operand
+ static unsigned ComputeMemOperandsEnd(SDNode *Node);
+
+ /// EmitNode - Generate machine code for an node and needed dependencies.
+ /// VRBaseMap contains, for each already emitted node, the first virtual
+ /// register number for the results of the node.
+ ///
+ void EmitNode(SDNode *Node, bool IsClone, bool HasClone,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ virtual MachineBasicBlock *EmitSchedule();
+
+ /// Schedule - Order nodes according to selected style, filling
+ /// in the Sequence member.
+ ///
+ virtual void Schedule() = 0;
+
+ virtual void dumpNode(const SUnit *SU) const;
+
+ virtual std::string getGraphNodeLabel(const SUnit *SU) const;
+
+ virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
+
+ private:
+ /// EmitSubregNode - Generate machine code for subreg nodes.
+ ///
+ void EmitSubregNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS
+ /// nodes.
+ ///
+ void EmitCopyToRegClassNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// getVR - Return the virtual register corresponding to the specified result
+ /// of the specified node.
+ unsigned getVR(SDValue Op, DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// getDstOfCopyToRegUse - If the only use of the specified result number of
+ /// node is a CopyToReg, return its destination register. Return 0 otherwise.
+ unsigned getDstOfOnlyCopyToRegUse(SDNode *Node, unsigned ResNo) const;
+
+ void AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum,
+ const TargetInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// AddRegisterOperand - Add the specified register as an operand to the
+ /// specified machine instr. Insert register copies if the register is
+ /// not in the required register class.
+ void AddRegisterOperand(MachineInstr *MI, SDValue Op,
+ unsigned IIOpNum, const TargetInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+ /// implicit physical register output.
+ void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone,
+ bool IsCloned, unsigned SrcReg,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
+ const TargetInstrDesc &II, bool IsClone,
+ bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.
+ void BuildSchedUnits();
+ void AddSchedEdges();
+ };
+}
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
new file mode 100644
index 0000000..fb5e207
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
@@ -0,0 +1,668 @@
+//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the Emit routines for the ScheduleDAG class, which creates
+// MachineInstrs according to the computed schedule.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+/// implicit physical register output.
+void ScheduleDAGSDNodes::EmitCopyFromReg(SDNode *Node, unsigned ResNo,
+ bool IsClone, bool IsCloned,
+ unsigned SrcReg,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ unsigned VRBase = 0;
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ // Just use the input register directly!
+ SDValue Op(Node, ResNo);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ return;
+ }
+
+ // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+ // the CopyToReg'd destination register instead of creating a new vreg.
+ bool MatchReg = true;
+ const TargetRegisterClass *UseRC = NULL;
+ if (!IsClone && !IsCloned)
+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ bool Match = true;
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == ResNo) {
+ unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ VRBase = DestReg;
+ Match = false;
+ } else if (DestReg != SrcReg)
+ Match = false;
+ } else {
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+ SDValue Op = User->getOperand(i);
+ if (Op.getNode() != Node || Op.getResNo() != ResNo)
+ continue;
+ MVT VT = Node->getValueType(Op.getResNo());
+ if (VT == MVT::Other || VT == MVT::Flag)
+ continue;
+ Match = false;
+ if (User->isMachineOpcode()) {
+ const TargetInstrDesc &II = TII->get(User->getMachineOpcode());
+ const TargetRegisterClass *RC =
+ getInstrOperandRegClass(TRI, II, i+II.getNumDefs());
+ if (!UseRC)
+ UseRC = RC;
+ else if (RC) {
+ if (UseRC->hasSuperClass(RC))
+ UseRC = RC;
+ else
+ assert((UseRC == RC || RC->hasSuperClass(UseRC)) &&
+ "Multiple uses expecting different register classes!");
+ }
+ }
+ }
+ }
+ MatchReg &= Match;
+ if (VRBase)
+ break;
+ }
+
+ MVT VT = Node->getValueType(ResNo);
+ const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
+ SrcRC = TRI->getPhysicalRegisterRegClass(SrcReg, VT);
+
+ // Figure out the register class to create for the destreg.
+ if (VRBase) {
+ DstRC = MRI.getRegClass(VRBase);
+ } else if (UseRC) {
+ assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!");
+ DstRC = UseRC;
+ } else {
+ DstRC = TLI->getRegClassFor(VT);
+ }
+
+ // If all uses are reading from the src physical register and copying the
+ // register is either impossible or very expensive, then don't create a copy.
+ if (MatchReg && SrcRC->getCopyCost() < 0) {
+ VRBase = SrcReg;
+ } else {
+ // Create the reg, emit the copy.
+ VRBase = MRI.createVirtualRegister(DstRC);
+ bool Emitted = TII->copyRegToReg(*BB, InsertPos, VRBase, SrcReg,
+ DstRC, SrcRC);
+
+ assert(Emitted && "Unable to issue a copy instruction!\n");
+ (void) Emitted;
+ }
+
+ SDValue Op(Node, ResNo);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// getDstOfCopyToRegUse - If the only use of the specified result number of
+/// node is a CopyToReg, return its destination register. Return 0 otherwise.
+unsigned ScheduleDAGSDNodes::getDstOfOnlyCopyToRegUse(SDNode *Node,
+ unsigned ResNo) const {
+ if (!Node->hasOneUse())
+ return 0;
+
+ SDNode *User = *Node->use_begin();
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == ResNo) {
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return Reg;
+ }
+ return 0;
+}
+
+void ScheduleDAGSDNodes::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
+ const TargetInstrDesc &II,
+ bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ assert(Node->getMachineOpcode() != TargetInstrInfo::IMPLICIT_DEF &&
+ "IMPLICIT_DEF should have been handled as a special case elsewhere!");
+
+ for (unsigned i = 0; i < II.getNumDefs(); ++i) {
+ // If the specific node value is only used by a CopyToReg and the dest reg
+ // is a vreg in the same register class, use the CopyToReg'd destination
+ // register instead of creating a new vreg.
+ unsigned VRBase = 0;
+ const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, II, i);
+
+ if (!IsClone && !IsCloned)
+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == i) {
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ const TargetRegisterClass *RegRC = MRI.getRegClass(Reg);
+ if (RegRC == RC) {
+ VRBase = Reg;
+ MI->addOperand(MachineOperand::CreateReg(Reg, true));
+ break;
+ }
+ }
+ }
+ }
+
+ // Create the result registers for this node and add the result regs to
+ // the machine instruction.
+ if (VRBase == 0) {
+ assert(RC && "Isn't a register operand!");
+ VRBase = MRI.createVirtualRegister(RC);
+ MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+ }
+
+ SDValue Op(Node, i);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ }
+}
+
+/// getVR - Return the virtual register corresponding to the specified result
+/// of the specified node.
+unsigned ScheduleDAGSDNodes::getVR(SDValue Op,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ if (Op.isMachineOpcode() &&
+ Op.getMachineOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+ // Add an IMPLICIT_DEF instruction before every use.
+ unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo());
+ // IMPLICIT_DEF can produce any type of result so its TargetInstrDesc
+ // does not include operand register class info.
+ if (!VReg) {
+ const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType());
+ VReg = MRI.createVirtualRegister(RC);
+ }
+ BuildMI(BB, Op.getDebugLoc(), TII->get(TargetInstrInfo::IMPLICIT_DEF),VReg);
+ return VReg;
+ }
+
+ DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+ assert(I != VRBaseMap.end() && "Node emitted out of order - late");
+ return I->second;
+}
+
+
+/// AddRegisterOperand - Add the specified register as an operand to the
+/// specified machine instr. Insert register copies if the register is
+/// not in the required register class.
+void
+ScheduleDAGSDNodes::AddRegisterOperand(MachineInstr *MI, SDValue Op,
+ unsigned IIOpNum,
+ const TargetInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ assert(Op.getValueType() != MVT::Other &&
+ Op.getValueType() != MVT::Flag &&
+ "Chain and flag operands should occur at end of operand list!");
+ // Get/emit the operand.
+ unsigned VReg = getVR(Op, VRBaseMap);
+ assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
+
+ const TargetInstrDesc &TID = MI->getDesc();
+ bool isOptDef = IIOpNum < TID.getNumOperands() &&
+ TID.OpInfo[IIOpNum].isOptionalDef();
+
+ // If the instruction requires a register in a different class, create
+ // a new virtual register and copy the value into it.
+ if (II) {
+ const TargetRegisterClass *SrcRC =
+ MRI.getRegClass(VReg);
+ const TargetRegisterClass *DstRC =
+ getInstrOperandRegClass(TRI, *II, IIOpNum);
+ assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) &&
+ "Don't have operand info for this instruction!");
+ if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) {
+ unsigned NewVReg = MRI.createVirtualRegister(DstRC);
+ bool Emitted = TII->copyRegToReg(*BB, InsertPos, NewVReg, VReg,
+ DstRC, SrcRC);
+ assert(Emitted && "Unable to issue a copy instruction!\n");
+ (void) Emitted;
+ VReg = NewVReg;
+ }
+ }
+
+ MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef));
+}
+
+/// AddOperand - Add the specified operand to the specified machine instr. II
+/// specifies the instruction information for the node, and IIOpNum is the
+/// operand number (in the II) that we are adding. IIOpNum and II are used for
+/// assertions only.
+void ScheduleDAGSDNodes::AddOperand(MachineInstr *MI, SDValue Op,
+ unsigned IIOpNum,
+ const TargetInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ if (Op.isMachineOpcode()) {
+ AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap);
+ } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateImm(C->getZExtValue()));
+ } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
+ const ConstantFP *CFP = F->getConstantFPValue();
+ MI->addOperand(MachineOperand::CreateFPImm(CFP));
+ } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateReg(R->getReg(), false));
+ } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(),TGA->getOffset()));
+ } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateMBB(BBNode->getBasicBlock()));
+ } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateFI(FI->getIndex()));
+ } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateJTI(JT->getIndex()));
+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) {
+ int Offset = CP->getOffset();
+ unsigned Align = CP->getAlignment();
+ const Type *Type = CP->getType();
+ // MachineConstantPool wants an explicit alignment.
+ if (Align == 0) {
+ Align = TM.getTargetData()->getPrefTypeAlignment(Type);
+ if (Align == 0) {
+ // Alignment of vector types. FIXME!
+ Align = TM.getTargetData()->getTypeAllocSize(Type);
+ }
+ }
+
+ unsigned Idx;
+ if (CP->isMachineConstantPoolEntry())
+ Idx = ConstPool->getConstantPoolIndex(CP->getMachineCPVal(), Align);
+ else
+ Idx = ConstPool->getConstantPoolIndex(CP->getConstVal(), Align);
+ MI->addOperand(MachineOperand::CreateCPI(Idx, Offset));
+ } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateES(ES->getSymbol()));
+ } else {
+ assert(Op.getValueType() != MVT::Other &&
+ Op.getValueType() != MVT::Flag &&
+ "Chain and flag operands should occur at end of operand list!");
+ AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap);
+ }
+}
+
+/// getSuperRegisterRegClass - Returns the register class of a superreg A whose
+/// "SubIdx"'th sub-register class is the specified register class and whose
+/// type matches the specified type.
+static const TargetRegisterClass*
+getSuperRegisterRegClass(const TargetRegisterClass *TRC,
+ unsigned SubIdx, MVT VT) {
+ // Pick the register class of the superegister for this type
+ for (TargetRegisterInfo::regclass_iterator I = TRC->superregclasses_begin(),
+ E = TRC->superregclasses_end(); I != E; ++I)
+ if ((*I)->hasType(VT) && (*I)->getSubRegisterRegClass(SubIdx) == TRC)
+ return *I;
+ assert(false && "Couldn't find the register class");
+ return 0;
+}
+
+/// EmitSubregNode - Generate machine code for subreg nodes.
+///
+void ScheduleDAGSDNodes::EmitSubregNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ unsigned VRBase = 0;
+ unsigned Opc = Node->getMachineOpcode();
+
+ // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+ // the CopyToReg'd destination register instead of creating a new vreg.
+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node) {
+ unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ VRBase = DestReg;
+ break;
+ }
+ }
+ }
+
+ if (Opc == TargetInstrInfo::EXTRACT_SUBREG) {
+ unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+
+ // Create the extract_subreg machine instruction.
+ MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(),
+ TII->get(TargetInstrInfo::EXTRACT_SUBREG));
+
+ // Figure out the register class to create for the destreg.
+ unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+ const TargetRegisterClass *TRC = MRI.getRegClass(VReg);
+ const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx);
+ assert(SRC && "Invalid subregister index in EXTRACT_SUBREG");
+
+ // Figure out the register class to create for the destreg.
+ // Note that if we're going to directly use an existing register,
+ // it must be precisely the required class, and not a subclass
+ // thereof.
+ if (VRBase == 0 || SRC != MRI.getRegClass(VRBase)) {
+ // Create the reg
+ assert(SRC && "Couldn't find source register class");
+ VRBase = MRI.createVirtualRegister(SRC);
+ }
+
+ // Add def, source, and subreg index
+ MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+ AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap);
+ MI->addOperand(MachineOperand::CreateImm(SubIdx));
+ BB->insert(InsertPos, MI);
+ } else if (Opc == TargetInstrInfo::INSERT_SUBREG ||
+ Opc == TargetInstrInfo::SUBREG_TO_REG) {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+ SDValue N2 = Node->getOperand(2);
+ unsigned SubReg = getVR(N1, VRBaseMap);
+ unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+ const TargetRegisterClass *TRC = MRI.getRegClass(SubReg);
+ const TargetRegisterClass *SRC =
+ getSuperRegisterRegClass(TRC, SubIdx,
+ Node->getValueType(0));
+
+ // Figure out the register class to create for the destreg.
+ // Note that if we're going to directly use an existing register,
+ // it must be precisely the required class, and not a subclass
+ // thereof.
+ if (VRBase == 0 || SRC != MRI.getRegClass(VRBase)) {
+ // Create the reg
+ assert(SRC && "Couldn't find source register class");
+ VRBase = MRI.createVirtualRegister(SRC);
+ }
+
+ // Create the insert_subreg or subreg_to_reg machine instruction.
+ MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(), TII->get(Opc));
+ MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+
+ // If creating a subreg_to_reg, then the first input operand
+ // is an implicit value immediate, otherwise it's a register
+ if (Opc == TargetInstrInfo::SUBREG_TO_REG) {
+ const ConstantSDNode *SD = cast<ConstantSDNode>(N0);
+ MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue()));
+ } else
+ AddOperand(MI, N0, 0, 0, VRBaseMap);
+ // Add the subregster being inserted
+ AddOperand(MI, N1, 0, 0, VRBaseMap);
+ MI->addOperand(MachineOperand::CreateImm(SubIdx));
+ BB->insert(InsertPos, MI);
+ } else
+ assert(0 && "Node is not insert_subreg, extract_subreg, or subreg_to_reg");
+
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+/// COPY_TO_REGCLASS is just a normal copy, except that the destination
+/// register is constrained to be in a particular register class.
+///
+void
+ScheduleDAGSDNodes::EmitCopyToRegClassNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(VReg);
+
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx);
+
+ // Create the new VReg in the destination class and emit a copy.
+ unsigned NewVReg = MRI.createVirtualRegister(DstRC);
+ bool Emitted = TII->copyRegToReg(*BB, InsertPos, NewVReg, VReg,
+ DstRC, SrcRC);
+ assert(Emitted &&
+ "Unable to issue a copy instruction for a COPY_TO_REGCLASS node!\n");
+ (void) Emitted;
+
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitNode - Generate machine code for an node and needed dependencies.
+///
+void ScheduleDAGSDNodes::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ // If machine instruction
+ if (Node->isMachineOpcode()) {
+ unsigned Opc = Node->getMachineOpcode();
+
+ // Handle subreg insert/extract specially
+ if (Opc == TargetInstrInfo::EXTRACT_SUBREG ||
+ Opc == TargetInstrInfo::INSERT_SUBREG ||
+ Opc == TargetInstrInfo::SUBREG_TO_REG) {
+ EmitSubregNode(Node, VRBaseMap);
+ return;
+ }
+
+ // Handle COPY_TO_REGCLASS specially.
+ if (Opc == TargetInstrInfo::COPY_TO_REGCLASS) {
+ EmitCopyToRegClassNode(Node, VRBaseMap);
+ return;
+ }
+
+ if (Opc == TargetInstrInfo::IMPLICIT_DEF)
+ // We want a unique VR for each IMPLICIT_DEF use.
+ return;
+
+ const TargetInstrDesc &II = TII->get(Opc);
+ unsigned NumResults = CountResults(Node);
+ unsigned NodeOperands = CountOperands(Node);
+ unsigned MemOperandsEnd = ComputeMemOperandsEnd(Node);
+ bool HasPhysRegOuts = (NumResults > II.getNumDefs()) &&
+ II.getImplicitDefs() != 0;
+#ifndef NDEBUG
+ unsigned NumMIOperands = NodeOperands + NumResults;
+ assert((II.getNumOperands() == NumMIOperands ||
+ HasPhysRegOuts || II.isVariadic()) &&
+ "#operands for dag node doesn't match .td file!");
+#endif
+
+ // Create the new machine instruction.
+ MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(), II);
+
+ // Add result register values for things that are defined by this
+ // instruction.
+ if (NumResults)
+ CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap);
+
+ // Emit all of the actual operands of this instruction, adding them to the
+ // instruction as appropriate.
+ for (unsigned i = 0; i != NodeOperands; ++i)
+ AddOperand(MI, Node->getOperand(i), i+II.getNumDefs(), &II, VRBaseMap);
+
+ // Emit all of the memory operands of this instruction
+ for (unsigned i = NodeOperands; i != MemOperandsEnd; ++i)
+ AddMemOperand(MI, cast<MemOperandSDNode>(Node->getOperand(i))->MO);
+
+ if (II.usesCustomDAGSchedInsertionHook()) {
+ // Insert this instruction into the basic block using a target
+ // specific inserter which may returns a new basic block.
+ BB = TLI->EmitInstrWithCustomInserter(MI, BB);
+ InsertPos = BB->end();
+ } else {
+ BB->insert(InsertPos, MI);
+ }
+
+ // Additional results must be an physical register def.
+ if (HasPhysRegOuts) {
+ for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
+ unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
+ if (Node->hasAnyUseOfValue(i))
+ EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
+ }
+ }
+ return;
+ }
+
+ switch (Node->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ Node->dump(DAG);
+#endif
+ assert(0 && "This target-independent node should have been selected!");
+ break;
+ case ISD::EntryToken:
+ assert(0 && "EntryToken should have been excluded from the schedule!");
+ break;
+ case ISD::TokenFactor: // fall thru
+ break;
+ case ISD::CopyToReg: {
+ unsigned SrcReg;
+ SDValue SrcVal = Node->getOperand(2);
+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal))
+ SrcReg = R->getReg();
+ else
+ SrcReg = getVR(SrcVal, VRBaseMap);
+
+ unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
+ break;
+
+ const TargetRegisterClass *SrcTRC = 0, *DstTRC = 0;
+ // Get the register classes of the src/dst.
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg))
+ SrcTRC = MRI.getRegClass(SrcReg);
+ else
+ SrcTRC = TRI->getPhysicalRegisterRegClass(SrcReg,SrcVal.getValueType());
+
+ if (TargetRegisterInfo::isVirtualRegister(DestReg))
+ DstTRC = MRI.getRegClass(DestReg);
+ else
+ DstTRC = TRI->getPhysicalRegisterRegClass(DestReg,
+ Node->getOperand(1).getValueType());
+
+ bool Emitted = TII->copyRegToReg(*BB, InsertPos, DestReg, SrcReg,
+ DstTRC, SrcTRC);
+ assert(Emitted && "Unable to issue a copy instruction!\n");
+ (void) Emitted;
+ break;
+ }
+ case ISD::CopyFromReg: {
+ unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap);
+ break;
+ }
+ case ISD::INLINEASM: {
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
+ --NumOps; // Ignore the flag operand.
+
+ // Create the inline asm machine instruction.
+ MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(),
+ TII->get(TargetInstrInfo::INLINEASM));
+
+ // Add the asm string as an external symbol operand.
+ const char *AsmStr =
+ cast<ExternalSymbolSDNode>(Node->getOperand(1))->getSymbol();
+ MI->addOperand(MachineOperand::CreateES(AsmStr));
+
+ // Add all of the operand registers to the instruction.
+ for (unsigned i = 2; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ MI->addOperand(MachineOperand::CreateImm(Flags));
+ ++i; // Skip the ID value.
+
+ switch (Flags & 7) {
+ default: assert(0 && "Bad flags!");
+ case 2: // Def of register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ MI->addOperand(MachineOperand::CreateReg(Reg, true));
+ }
+ break;
+ case 6: // Def of earlyclobber register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false,
+ false, 0, true));
+ }
+ break;
+ case 1: // Use of register.
+ case 3: // Immediate.
+ case 4: // Addressing mode.
+ // The addressing mode has been selected, just add all of the
+ // operands to the machine instruction.
+ for (; NumVals; --NumVals, ++i)
+ AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap);
+ break;
+ }
+ }
+ BB->insert(InsertPos, MI);
+ break;
+ }
+ }
+}
+
+/// EmitSchedule - Emit the machine code in scheduled order.
+MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
+ DenseMap<SDValue, unsigned> VRBaseMap;
+ DenseMap<SUnit*, unsigned> CopyVRBaseMap;
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ SUnit *SU = Sequence[i];
+ if (!SU) {
+ // Null SUnit* is a noop.
+ EmitNoop();
+ continue;
+ }
+
+ // For pre-regalloc scheduling, create instructions corresponding to the
+ // SDNode and any flagged SDNodes and append them to the block.
+ if (!SU->getNode()) {
+ // Emit a copy.
+ EmitPhysRegCopy(SU, CopyVRBaseMap);
+ continue;
+ }
+
+ SmallVector<SDNode *, 4> FlaggedNodes;
+ for (SDNode *N = SU->getNode()->getFlaggedNode(); N;
+ N = N->getFlaggedNode())
+ FlaggedNodes.push_back(N);
+ while (!FlaggedNodes.empty()) {
+ EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,VRBaseMap);
+ FlaggedNodes.pop_back();
+ }
+ EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap);
+ }
+
+ return BB;
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
new file mode 100644
index 0000000..195896e
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -0,0 +1,5743 @@
+//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG class.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Constants.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+/// makeVTList - Return an instance of the SDVTList struct initialized with the
+/// specified members.
+static SDVTList makeVTList(const MVT *VTs, unsigned NumVTs) {
+ SDVTList Res = {VTs, NumVTs};
+ return Res;
+}
+
+static const fltSemantics *MVTToAPFloatSemantics(MVT VT) {
+ switch (VT.getSimpleVT()) {
+ default: assert(0 && "Unknown FP format");
+ case MVT::f32: return &APFloat::IEEEsingle;
+ case MVT::f64: return &APFloat::IEEEdouble;
+ case MVT::f80: return &APFloat::x87DoubleExtended;
+ case MVT::f128: return &APFloat::IEEEquad;
+ case MVT::ppcf128: return &APFloat::PPCDoubleDouble;
+ }
+}
+
+SelectionDAG::DAGUpdateListener::~DAGUpdateListener() {}
+
+//===----------------------------------------------------------------------===//
+// ConstantFPSDNode Class
+//===----------------------------------------------------------------------===//
+
+/// isExactlyValue - We don't rely on operator== working on double values, as
+/// it returns true for things that are clearly not equal, like -0.0 and 0.0.
+/// As such, this method can be used to do an exact bit-for-bit comparison of
+/// two floating point values.
+bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const {
+ return getValueAPF().bitwiseIsEqual(V);
+}
+
+bool ConstantFPSDNode::isValueValidForType(MVT VT,
+ const APFloat& Val) {
+ assert(VT.isFloatingPoint() && "Can only convert between FP types");
+
+ // PPC long double cannot be converted to any other type.
+ if (VT == MVT::ppcf128 ||
+ &Val.getSemantics() == &APFloat::PPCDoubleDouble)
+ return false;
+
+ // convert modifies in place, so make a copy.
+ APFloat Val2 = APFloat(Val);
+ bool losesInfo;
+ (void) Val2.convert(*MVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,
+ &losesInfo);
+ return !losesInfo;
+}
+
+//===----------------------------------------------------------------------===//
+// ISD Namespace
+//===----------------------------------------------------------------------===//
+
+/// isBuildVectorAllOnes - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are ~0 or undef.
+bool ISD::isBuildVectorAllOnes(const SDNode *N) {
+ // Look through a bit convert.
+ if (N->getOpcode() == ISD::BIT_CONVERT)
+ N = N->getOperand(0).getNode();
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+ unsigned i = 0, e = N->getNumOperands();
+
+ // Skip over all of the undef values.
+ while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+ ++i;
+
+ // Do not accept an all-undef vector.
+ if (i == e) return false;
+
+ // Do not accept build_vectors that aren't all constants or which have non-~0
+ // elements.
+ SDValue NotZero = N->getOperand(i);
+ if (isa<ConstantSDNode>(NotZero)) {
+ if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue())
+ return false;
+ } else if (isa<ConstantFPSDNode>(NotZero)) {
+ if (!cast<ConstantFPSDNode>(NotZero)->getValueAPF().
+ bitcastToAPInt().isAllOnesValue())
+ return false;
+ } else
+ return false;
+
+ // Okay, we have at least one ~0 value, check to see if the rest match or are
+ // undefs.
+ for (++i; i != e; ++i)
+ if (N->getOperand(i) != NotZero &&
+ N->getOperand(i).getOpcode() != ISD::UNDEF)
+ return false;
+ return true;
+}
+
+
+/// isBuildVectorAllZeros - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are 0 or undef.
+bool ISD::isBuildVectorAllZeros(const SDNode *N) {
+ // Look through a bit convert.
+ if (N->getOpcode() == ISD::BIT_CONVERT)
+ N = N->getOperand(0).getNode();
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+ unsigned i = 0, e = N->getNumOperands();
+
+ // Skip over all of the undef values.
+ while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+ ++i;
+
+ // Do not accept an all-undef vector.
+ if (i == e) return false;
+
+ // Do not accept build_vectors that aren't all constants or which have non-~0
+ // elements.
+ SDValue Zero = N->getOperand(i);
+ if (isa<ConstantSDNode>(Zero)) {
+ if (!cast<ConstantSDNode>(Zero)->isNullValue())
+ return false;
+ } else if (isa<ConstantFPSDNode>(Zero)) {
+ if (!cast<ConstantFPSDNode>(Zero)->getValueAPF().isPosZero())
+ return false;
+ } else
+ return false;
+
+ // Okay, we have at least one ~0 value, check to see if the rest match or are
+ // undefs.
+ for (++i; i != e; ++i)
+ if (N->getOperand(i) != Zero &&
+ N->getOperand(i).getOpcode() != ISD::UNDEF)
+ return false;
+ return true;
+}
+
+/// isScalarToVector - Return true if the specified node is a
+/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
+/// element is not an undef.
+bool ISD::isScalarToVector(const SDNode *N) {
+ if (N->getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return true;
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ if (N->getOperand(0).getOpcode() == ISD::UNDEF)
+ return false;
+ unsigned NumElems = N->getNumOperands();
+ for (unsigned i = 1; i < NumElems; ++i) {
+ SDValue V = N->getOperand(i);
+ if (V.getOpcode() != ISD::UNDEF)
+ return false;
+ }
+ return true;
+}
+
+
+/// isDebugLabel - Return true if the specified node represents a debug
+/// label (i.e. ISD::DBG_LABEL or TargetInstrInfo::DBG_LABEL node).
+bool ISD::isDebugLabel(const SDNode *N) {
+ SDValue Zero;
+ if (N->getOpcode() == ISD::DBG_LABEL)
+ return true;
+ if (N->isMachineOpcode() &&
+ N->getMachineOpcode() == TargetInstrInfo::DBG_LABEL)
+ return true;
+ return false;
+}
+
+/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
+/// when given the operation for (X op Y).
+ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
+ // To perform this operation, we just need to swap the L and G bits of the
+ // operation.
+ unsigned OldL = (Operation >> 2) & 1;
+ unsigned OldG = (Operation >> 1) & 1;
+ return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits
+ (OldL << 1) | // New G bit
+ (OldG << 2)); // New L bit.
+}
+
+/// getSetCCInverse - Return the operation corresponding to !(X op Y), where
+/// 'op' is a valid SetCC operation.
+ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
+ unsigned Operation = Op;
+ if (isInteger)
+ Operation ^= 7; // Flip L, G, E bits, but not U.
+ else
+ Operation ^= 15; // Flip all of the condition bits.
+
+ if (Operation > ISD::SETTRUE2)
+ Operation &= ~8; // Don't let N and U bits get set.
+
+ return ISD::CondCode(Operation);
+}
+
+
+/// isSignedOp - For an integer comparison, return 1 if the comparison is a
+/// signed operation and 2 if the result is an unsigned comparison. Return zero
+/// if the operation does not depend on the sign of the input (setne and seteq).
+static int isSignedOp(ISD::CondCode Opcode) {
+ switch (Opcode) {
+ default: assert(0 && "Illegal integer setcc operation!");
+ case ISD::SETEQ:
+ case ISD::SETNE: return 0;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETGT:
+ case ISD::SETGE: return 1;
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ case ISD::SETUGE: return 2;
+ }
+}
+
+/// getSetCCOrOperation - Return the result of a logical OR between different
+/// comparisons of identical values: ((X op1 Y) | (X op2 Y)). This function
+/// returns SETCC_INVALID if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+ bool isInteger) {
+ if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ // Cannot fold a signed integer setcc with an unsigned integer setcc.
+ return ISD::SETCC_INVALID;
+
+ unsigned Op = Op1 | Op2; // Combine all of the condition bits.
+
+ // If the N and U bits get set then the resultant comparison DOES suddenly
+ // care about orderedness, and is true when ordered.
+ if (Op > ISD::SETTRUE2)
+ Op &= ~16; // Clear the U bit if the N bit is set.
+
+ // Canonicalize illegal integer setcc's.
+ if (isInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT
+ Op = ISD::SETNE;
+
+ return ISD::CondCode(Op);
+}
+
+/// getSetCCAndOperation - Return the result of a logical AND between different
+/// comparisons of identical values: ((X op1 Y) & (X op2 Y)). This
+/// function returns zero if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+ bool isInteger) {
+ if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ // Cannot fold a signed setcc with an unsigned setcc.
+ return ISD::SETCC_INVALID;
+
+ // Combine all of the condition bits.
+ ISD::CondCode Result = ISD::CondCode(Op1 & Op2);
+
+ // Canonicalize illegal integer setcc's.
+ if (isInteger) {
+ switch (Result) {
+ default: break;
+ case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT
+ case ISD::SETOEQ: // SETEQ & SETU[LG]E
+ case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE
+ case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE
+ case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE
+ }
+ }
+
+ return Result;
+}
+
+const TargetMachine &SelectionDAG::getTarget() const {
+ return MF->getTarget();
+}
+
+//===----------------------------------------------------------------------===//
+// SDNode Profile Support
+//===----------------------------------------------------------------------===//
+
+/// AddNodeIDOpcode - Add the node opcode to the NodeID data.
+///
+static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) {
+ ID.AddInteger(OpC);
+}
+
+/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them
+/// solely with their pointer.
+static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
+ ID.AddPointer(VTList.VTs);
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+///
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+ const SDValue *Ops, unsigned NumOps) {
+ for (; NumOps; --NumOps, ++Ops) {
+ ID.AddPointer(Ops->getNode());
+ ID.AddInteger(Ops->getResNo());
+ }
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+///
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+ const SDUse *Ops, unsigned NumOps) {
+ for (; NumOps; --NumOps, ++Ops) {
+ ID.AddPointer(Ops->getNode());
+ ID.AddInteger(Ops->getResNo());
+ }
+}
+
+static void AddNodeIDNode(FoldingSetNodeID &ID,
+ unsigned short OpC, SDVTList VTList,
+ const SDValue *OpList, unsigned N) {
+ AddNodeIDOpcode(ID, OpC);
+ AddNodeIDValueTypes(ID, VTList);
+ AddNodeIDOperands(ID, OpList, N);
+}
+
+/// AddNodeIDCustom - If this is an SDNode with special info, add this info to
+/// the NodeID data.
+static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
+ switch (N->getOpcode()) {
+ default: break; // Normal nodes don't need extra info.
+ case ISD::ARG_FLAGS:
+ ID.AddInteger(cast<ARG_FLAGSSDNode>(N)->getArgFlags().getRawBits());
+ break;
+ case ISD::TargetConstant:
+ case ISD::Constant:
+ ID.AddPointer(cast<ConstantSDNode>(N)->getConstantIntValue());
+ break;
+ case ISD::TargetConstantFP:
+ case ISD::ConstantFP: {
+ ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue());
+ break;
+ }
+ case ISD::TargetGlobalAddress:
+ case ISD::GlobalAddress:
+ case ISD::TargetGlobalTLSAddress:
+ case ISD::GlobalTLSAddress: {
+ const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+ ID.AddPointer(GA->getGlobal());
+ ID.AddInteger(GA->getOffset());
+ break;
+ }
+ case ISD::BasicBlock:
+ ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock());
+ break;
+ case ISD::Register:
+ ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
+ break;
+ case ISD::DBG_STOPPOINT: {
+ const DbgStopPointSDNode *DSP = cast<DbgStopPointSDNode>(N);
+ ID.AddInteger(DSP->getLine());
+ ID.AddInteger(DSP->getColumn());
+ ID.AddPointer(DSP->getCompileUnit());
+ break;
+ }
+ case ISD::SRCVALUE:
+ ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
+ break;
+ case ISD::MEMOPERAND: {
+ const MachineMemOperand &MO = cast<MemOperandSDNode>(N)->MO;
+ MO.Profile(ID);
+ break;
+ }
+ case ISD::FrameIndex:
+ case ISD::TargetFrameIndex:
+ ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());
+ break;
+ case ISD::JumpTable:
+ case ISD::TargetJumpTable:
+ ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
+ break;
+ case ISD::ConstantPool:
+ case ISD::TargetConstantPool: {
+ const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
+ ID.AddInteger(CP->getAlignment());
+ ID.AddInteger(CP->getOffset());
+ if (CP->isMachineConstantPoolEntry())
+ CP->getMachineCPVal()->AddSelectionDAGCSEId(ID);
+ else
+ ID.AddPointer(CP->getConstVal());
+ break;
+ }
+ case ISD::CALL: {
+ const CallSDNode *Call = cast<CallSDNode>(N);
+ ID.AddInteger(Call->getCallingConv());
+ ID.AddInteger(Call->isVarArg());
+ break;
+ }
+ case ISD::LOAD: {
+ const LoadSDNode *LD = cast<LoadSDNode>(N);
+ ID.AddInteger(LD->getMemoryVT().getRawBits());
+ ID.AddInteger(LD->getRawSubclassData());
+ break;
+ }
+ case ISD::STORE: {
+ const StoreSDNode *ST = cast<StoreSDNode>(N);
+ ID.AddInteger(ST->getMemoryVT().getRawBits());
+ ID.AddInteger(ST->getRawSubclassData());
+ break;
+ }
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX: {
+ const AtomicSDNode *AT = cast<AtomicSDNode>(N);
+ ID.AddInteger(AT->getMemoryVT().getRawBits());
+ ID.AddInteger(AT->getRawSubclassData());
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements();
+ i != e; ++i)
+ ID.AddInteger(SVN->getMaskElt(i));
+ break;
+ }
+ } // end switch (N->getOpcode())
+}
+
+/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
+/// data.
+static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
+ AddNodeIDOpcode(ID, N->getOpcode());
+ // Add the return value info.
+ AddNodeIDValueTypes(ID, N->getVTList());
+ // Add the operand info.
+ AddNodeIDOperands(ID, N->op_begin(), N->getNumOperands());
+
+ // Handle SDNode leafs with special info.
+ AddNodeIDCustom(ID, N);
+}
+
+/// encodeMemSDNodeFlags - Generic routine for computing a value for use in
+/// the CSE map that carries alignment, volatility, indexing mode, and
+/// extension/truncation information.
+///
+static inline unsigned
+encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM,
+ bool isVolatile, unsigned Alignment) {
+ assert((ConvType & 3) == ConvType &&
+ "ConvType may not require more than 2 bits!");
+ assert((AM & 7) == AM &&
+ "AM may not require more than 3 bits!");
+ return ConvType |
+ (AM << 2) |
+ (isVolatile << 5) |
+ ((Log2_32(Alignment) + 1) << 6);
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAG Class
+//===----------------------------------------------------------------------===//
+
+/// doNotCSE - Return true if CSE should not be performed for this node.
+static bool doNotCSE(SDNode *N) {
+ if (N->getValueType(0) == MVT::Flag)
+ return true; // Never CSE anything that produces a flag.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::HANDLENODE:
+ case ISD::DBG_LABEL:
+ case ISD::DBG_STOPPOINT:
+ case ISD::EH_LABEL:
+ case ISD::DECLARE:
+ return true; // Never CSE these nodes.
+ }
+
+ // Check that remaining values produced are not flags.
+ for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+ if (N->getValueType(i) == MVT::Flag)
+ return true; // Never CSE anything that produces a flag.
+
+ return false;
+}
+
+/// RemoveDeadNodes - This method deletes all unreachable nodes in the
+/// SelectionDAG.
+void SelectionDAG::RemoveDeadNodes() {
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted.
+ HandleSDNode Dummy(getRoot());
+
+ SmallVector<SDNode*, 128> DeadNodes;
+
+ // Add all obviously-dead nodes to the DeadNodes worklist.
+ for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I)
+ if (I->use_empty())
+ DeadNodes.push_back(I);
+
+ RemoveDeadNodes(DeadNodes);
+
+ // If the root changed (e.g. it was a dead load, update the root).
+ setRoot(Dummy.getValue());
+}
+
+/// RemoveDeadNodes - This method deletes the unreachable nodes in the
+/// given list, and any nodes that become unreachable as a result.
+void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes,
+ DAGUpdateListener *UpdateListener) {
+
+ // Process the worklist, deleting the nodes and adding their uses to the
+ // worklist.
+ while (!DeadNodes.empty()) {
+ SDNode *N = DeadNodes.pop_back_val();
+
+ if (UpdateListener)
+ UpdateListener->NodeDeleted(N, 0);
+
+ // Take the node out of the appropriate CSE map.
+ RemoveNodeFromCSEMaps(N);
+
+ // Next, brutally remove the operand list. This is safe to do, as there are
+ // no cycles in the graph.
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ SDNode *Operand = Use.getNode();
+ Use.set(SDValue());
+
+ // Now that we removed this operand, see if there are no uses of it left.
+ if (Operand->use_empty())
+ DeadNodes.push_back(Operand);
+ }
+
+ DeallocateNode(N);
+ }
+}
+
+void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){
+ SmallVector<SDNode*, 16> DeadNodes(1, N);
+ RemoveDeadNodes(DeadNodes, UpdateListener);
+}
+
+void SelectionDAG::DeleteNode(SDNode *N) {
+ // First take this out of the appropriate CSE map.
+ RemoveNodeFromCSEMaps(N);
+
+ // Finally, remove uses due to operands of this node, remove from the
+ // AllNodes list, and delete the node.
+ DeleteNodeNotInCSEMaps(N);
+}
+
+void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
+ assert(N != AllNodes.begin() && "Cannot delete the entry node!");
+ assert(N->use_empty() && "Cannot delete a node that is not dead!");
+
+ // Drop all of the operands and decrement used node's use counts.
+ N->DropOperands();
+
+ DeallocateNode(N);
+}
+
+void SelectionDAG::DeallocateNode(SDNode *N) {
+ if (N->OperandsNeedDelete)
+ delete[] N->OperandList;
+
+ // Set the opcode to DELETED_NODE to help catch bugs when node
+ // memory is reallocated.
+ N->NodeType = ISD::DELETED_NODE;
+
+ NodeAllocator.Deallocate(AllNodes.remove(N));
+}
+
+/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
+/// correspond to it. This is useful when we're about to delete or repurpose
+/// the node. We don't want future request for structurally identical nodes
+/// to return N anymore.
+bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
+ bool Erased = false;
+ switch (N->getOpcode()) {
+ case ISD::EntryToken:
+ assert(0 && "EntryToken should not be in CSEMaps!");
+ return false;
+ case ISD::HANDLENODE: return false; // noop.
+ case ISD::CONDCODE:
+ assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
+ "Cond code doesn't exist!");
+ Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != 0;
+ CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = 0;
+ break;
+ case ISD::ExternalSymbol:
+ Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+ break;
+ case ISD::TargetExternalSymbol:
+ Erased =
+ TargetExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+ break;
+ case ISD::VALUETYPE: {
+ MVT VT = cast<VTSDNode>(N)->getVT();
+ if (VT.isExtended()) {
+ Erased = ExtendedValueTypeNodes.erase(VT);
+ } else {
+ Erased = ValueTypeNodes[VT.getSimpleVT()] != 0;
+ ValueTypeNodes[VT.getSimpleVT()] = 0;
+ }
+ break;
+ }
+ default:
+ // Remove it from the CSE Map.
+ Erased = CSEMap.RemoveNode(N);
+ break;
+ }
+#ifndef NDEBUG
+ // Verify that the node was actually in one of the CSE maps, unless it has a
+ // flag result (which cannot be CSE'd) or is one of the special cases that are
+ // not subject to CSE.
+ if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag &&
+ !N->isMachineOpcode() && !doNotCSE(N)) {
+ N->dump(this);
+ cerr << "\n";
+ assert(0 && "Node is not in map!");
+ }
+#endif
+ return Erased;
+}
+
+/// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE
+/// maps and modified in place. Add it back to the CSE maps, unless an identical
+/// node already exists, in which case transfer all its users to the existing
+/// node. This transfer can potentially trigger recursive merging.
+///
+void
+SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N,
+ DAGUpdateListener *UpdateListener) {
+ // For node types that aren't CSE'd, just act as if no identical node
+ // already exists.
+ if (!doNotCSE(N)) {
+ SDNode *Existing = CSEMap.GetOrInsertNode(N);
+ if (Existing != N) {
+ // If there was already an existing matching node, use ReplaceAllUsesWith
+ // to replace the dead one with the existing one. This can cause
+ // recursive merging of other unrelated nodes down the line.
+ ReplaceAllUsesWith(N, Existing, UpdateListener);
+
+ // N is now dead. Inform the listener if it exists and delete it.
+ if (UpdateListener)
+ UpdateListener->NodeDeleted(N, Existing);
+ DeleteNodeNotInCSEMaps(N);
+ return;
+ }
+ }
+
+ // If the node doesn't already exist, we updated it. Inform a listener if
+ // it exists.
+ if (UpdateListener)
+ UpdateListener->NodeUpdated(N);
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return 0;
+
+ SDValue Ops[] = { Op };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1);
+ AddNodeIDCustom(ID, N);
+ return CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
+ SDValue Op1, SDValue Op2,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return 0;
+
+ SDValue Ops[] = { Op1, Op2 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2);
+ AddNodeIDCustom(ID, N);
+ return CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+}
+
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
+ const SDValue *Ops,unsigned NumOps,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return 0;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps);
+ AddNodeIDCustom(ID, N);
+ return CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+}
+
+/// VerifyNode - Sanity check the given node. Aborts if it is invalid.
+void SelectionDAG::VerifyNode(SDNode *N) {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case ISD::BUILD_PAIR: {
+ MVT VT = N->getValueType(0);
+ assert(N->getNumValues() == 1 && "Too many results!");
+ assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) &&
+ "Wrong return type!");
+ assert(N->getNumOperands() == 2 && "Wrong number of operands!");
+ assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() &&
+ "Mismatched operand types!");
+ assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() &&
+ "Wrong operand type!");
+ assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() &&
+ "Wrong return type size");
+ break;
+ }
+ case ISD::BUILD_VECTOR: {
+ assert(N->getNumValues() == 1 && "Too many results!");
+ assert(N->getValueType(0).isVector() && "Wrong return type!");
+ assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
+ "Wrong number of operands!");
+ MVT EltVT = N->getValueType(0).getVectorElementType();
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I)
+ assert((I->getValueType() == EltVT ||
+ (EltVT.isInteger() && I->getValueType().isInteger() &&
+ EltVT.bitsLE(I->getValueType()))) &&
+ "Wrong operand type!");
+ break;
+ }
+ }
+}
+
+/// getMVTAlignment - Compute the default alignment value for the
+/// given type.
+///
+unsigned SelectionDAG::getMVTAlignment(MVT VT) const {
+ const Type *Ty = VT == MVT::iPTR ?
+ PointerType::get(Type::Int8Ty, 0) :
+ VT.getTypeForMVT();
+
+ return TLI.getTargetData()->getABITypeAlignment(Ty);
+}
+
+// EntryNode could meaningfully have debug info if we can find it...
+SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli)
+ : TLI(tli), FLI(fli), DW(0),
+ EntryNode(ISD::EntryToken, DebugLoc::getUnknownLoc(),
+ getVTList(MVT::Other)), Root(getEntryNode()) {
+ AllNodes.push_back(&EntryNode);
+}
+
+void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi,
+ DwarfWriter *dw) {
+ MF = &mf;
+ MMI = mmi;
+ DW = dw;
+}
+
+SelectionDAG::~SelectionDAG() {
+ allnodes_clear();
+}
+
+void SelectionDAG::allnodes_clear() {
+ assert(&*AllNodes.begin() == &EntryNode);
+ AllNodes.remove(AllNodes.begin());
+ while (!AllNodes.empty())
+ DeallocateNode(AllNodes.begin());
+}
+
+void SelectionDAG::clear() {
+ allnodes_clear();
+ OperandAllocator.Reset();
+ CSEMap.clear();
+
+ ExtendedValueTypeNodes.clear();
+ ExternalSymbols.clear();
+ TargetExternalSymbols.clear();
+ std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
+ static_cast<CondCodeSDNode*>(0));
+ std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
+ static_cast<SDNode*>(0));
+
+ EntryNode.UseList = 0;
+ AllNodes.push_back(&EntryNode);
+ Root = getEntryNode();
+}
+
+SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, MVT VT) {
+ if (Op.getValueType() == VT) return Op;
+ APInt Imm = APInt::getLowBitsSet(Op.getValueSizeInBits(),
+ VT.getSizeInBits());
+ return getNode(ISD::AND, DL, Op.getValueType(), Op,
+ getConstant(Imm, Op.getValueType()));
+}
+
+/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
+///
+SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, MVT VT) {
+ MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ SDValue NegOne =
+ getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+ return getNode(ISD::XOR, DL, VT, Val, NegOne);
+}
+
+SDValue SelectionDAG::getConstant(uint64_t Val, MVT VT, bool isT) {
+ MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ assert((EltVT.getSizeInBits() >= 64 ||
+ (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
+ "getConstant with a uint64_t value that doesn't fit in the type!");
+ return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT);
+}
+
+SDValue SelectionDAG::getConstant(const APInt &Val, MVT VT, bool isT) {
+ return getConstant(*ConstantInt::get(Val), VT, isT);
+}
+
+SDValue SelectionDAG::getConstant(const ConstantInt &Val, MVT VT, bool isT) {
+ assert(VT.isInteger() && "Cannot create FP integer constant!");
+
+ MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ assert(Val.getBitWidth() == EltVT.getSizeInBits() &&
+ "APInt size does not match type size!");
+
+ unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
+ ID.AddPointer(&Val);
+ void *IP = 0;
+ SDNode *N = NULL;
+ if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+ if (!VT.isVector())
+ return SDValue(N, 0);
+ if (!N) {
+ N = NodeAllocator.Allocate<ConstantSDNode>();
+ new (N) ConstantSDNode(isT, &Val, EltVT);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ }
+
+ SDValue Result(N, 0);
+ if (VT.isVector()) {
+ SmallVector<SDValue, 8> Ops;
+ Ops.assign(VT.getVectorNumElements(), Result);
+ Result = getNode(ISD::BUILD_VECTOR, DebugLoc::getUnknownLoc(),
+ VT, &Ops[0], Ops.size());
+ }
+ return Result;
+}
+
+SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) {
+ return getConstant(Val, TLI.getPointerTy(), isTarget);
+}
+
+
+SDValue SelectionDAG::getConstantFP(const APFloat& V, MVT VT, bool isTarget) {
+ return getConstantFP(*ConstantFP::get(V), VT, isTarget);
+}
+
+SDValue SelectionDAG::getConstantFP(const ConstantFP& V, MVT VT, bool isTarget){
+ assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");
+
+ MVT EltVT =
+ VT.isVector() ? VT.getVectorElementType() : VT;
+
+ // Do the map lookup using the actual bit pattern for the floating point
+ // value, so that we don't have problems with 0.0 comparing equal to -0.0, and
+ // we don't have issues with SNANs.
+ unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
+ ID.AddPointer(&V);
+ void *IP = 0;
+ SDNode *N = NULL;
+ if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+ if (!VT.isVector())
+ return SDValue(N, 0);
+ if (!N) {
+ N = NodeAllocator.Allocate<ConstantFPSDNode>();
+ new (N) ConstantFPSDNode(isTarget, &V, EltVT);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ }
+
+ SDValue Result(N, 0);
+ if (VT.isVector()) {
+ SmallVector<SDValue, 8> Ops;
+ Ops.assign(VT.getVectorNumElements(), Result);
+ // FIXME DebugLoc info might be appropriate here
+ Result = getNode(ISD::BUILD_VECTOR, DebugLoc::getUnknownLoc(),
+ VT, &Ops[0], Ops.size());
+ }
+ return Result;
+}
+
+SDValue SelectionDAG::getConstantFP(double Val, MVT VT, bool isTarget) {
+ MVT EltVT =
+ VT.isVector() ? VT.getVectorElementType() : VT;
+ if (EltVT==MVT::f32)
+ return getConstantFP(APFloat((float)Val), VT, isTarget);
+ else
+ return getConstantFP(APFloat(Val), VT, isTarget);
+}
+
+SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
+ MVT VT, int64_t Offset,
+ bool isTargetGA) {
+ unsigned Opc;
+
+ // Truncate (with sign-extension) the offset value to the pointer size.
+ unsigned BitWidth = TLI.getPointerTy().getSizeInBits();
+ if (BitWidth < 64)
+ Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth));
+
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (!GVar) {
+ // If GV is an alias then use the aliasee for determining thread-localness.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
+ }
+
+ if (GVar && GVar->isThreadLocal())
+ Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress;
+ else
+ Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddPointer(GV);
+ ID.AddInteger(Offset);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<GlobalAddressSDNode>();
+ new (N) GlobalAddressSDNode(isTargetGA, GV, VT, Offset);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getFrameIndex(int FI, MVT VT, bool isTarget) {
+ unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(FI);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<FrameIndexSDNode>();
+ new (N) FrameIndexSDNode(FI, VT, isTarget);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getJumpTable(int JTI, MVT VT, bool isTarget){
+ unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(JTI);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<JumpTableSDNode>();
+ new (N) JumpTableSDNode(JTI, VT, isTarget);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getConstantPool(Constant *C, MVT VT,
+ unsigned Alignment, int Offset,
+ bool isTarget) {
+ if (Alignment == 0)
+ Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
+ unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(Alignment);
+ ID.AddInteger(Offset);
+ ID.AddPointer(C);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();
+ new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+
+SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, MVT VT,
+ unsigned Alignment, int Offset,
+ bool isTarget) {
+ if (Alignment == 0)
+ Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
+ unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(Alignment);
+ ID.AddInteger(Offset);
+ C->AddSelectionDAGCSEId(ID);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();
+ new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0);
+ ID.AddPointer(MBB);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<BasicBlockSDNode>();
+ new (N) BasicBlockSDNode(MBB);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getArgFlags(ISD::ArgFlagsTy Flags) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::ARG_FLAGS, getVTList(MVT::Other), 0, 0);
+ ID.AddInteger(Flags.getRawBits());
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<ARG_FLAGSSDNode>();
+ new (N) ARG_FLAGSSDNode(Flags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getValueType(MVT VT) {
+ if (VT.isSimple() && (unsigned)VT.getSimpleVT() >= ValueTypeNodes.size())
+ ValueTypeNodes.resize(VT.getSimpleVT()+1);
+
+ SDNode *&N = VT.isExtended() ?
+ ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT()];
+
+ if (N) return SDValue(N, 0);
+ N = NodeAllocator.Allocate<VTSDNode>();
+ new (N) VTSDNode(VT);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getExternalSymbol(const char *Sym, MVT VT) {
+ SDNode *&N = ExternalSymbols[Sym];
+ if (N) return SDValue(N, 0);
+ N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
+ new (N) ExternalSymbolSDNode(false, Sym, VT);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, MVT VT) {
+ SDNode *&N = TargetExternalSymbols[Sym];
+ if (N) return SDValue(N, 0);
+ N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
+ new (N) ExternalSymbolSDNode(true, Sym, VT);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
+ if ((unsigned)Cond >= CondCodeNodes.size())
+ CondCodeNodes.resize(Cond+1);
+
+ if (CondCodeNodes[Cond] == 0) {
+ CondCodeSDNode *N = NodeAllocator.Allocate<CondCodeSDNode>();
+ new (N) CondCodeSDNode(Cond);
+ CondCodeNodes[Cond] = N;
+ AllNodes.push_back(N);
+ }
+ return SDValue(CondCodeNodes[Cond], 0);
+}
+
+// commuteShuffle - swaps the values of N1 and N2, and swaps all indices in
+// the shuffle mask M that point at N1 to point at N2, and indices that point
+// N2 to point at N1.
+static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {
+ std::swap(N1, N2);
+ int NElts = M.size();
+ for (int i = 0; i != NElts; ++i) {
+ if (M[i] >= NElts)
+ M[i] -= NElts;
+ else if (M[i] >= 0)
+ M[i] += NElts;
+ }
+}
+
+SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1,
+ SDValue N2, const int *Mask) {
+ assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE");
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ "Vector Shuffle VTs must be a vectors");
+ assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType()
+ && "Vector Shuffle VTs must have same element type");
+
+ // Canonicalize shuffle undef, undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ // Validate that all indices in Mask are within the range of the elements
+ // input to the shuffle.
+ unsigned NElts = VT.getVectorNumElements();
+ SmallVector<int, 8> MaskVec;
+ for (unsigned i = 0; i != NElts; ++i) {
+ assert(Mask[i] < (int)(NElts * 2) && "Index out of range");
+ MaskVec.push_back(Mask[i]);
+ }
+
+ // Canonicalize shuffle v, v -> v, undef
+ if (N1 == N2) {
+ N2 = getUNDEF(VT);
+ for (unsigned i = 0; i != NElts; ++i)
+ if (MaskVec[i] >= (int)NElts) MaskVec[i] -= NElts;
+ }
+
+ // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
+ if (N1.getOpcode() == ISD::UNDEF)
+ commuteShuffle(N1, N2, MaskVec);
+
+ // Canonicalize all index into lhs, -> shuffle lhs, undef
+ // Canonicalize all index into rhs, -> shuffle rhs, undef
+ bool AllLHS = true, AllRHS = true;
+ bool N2Undef = N2.getOpcode() == ISD::UNDEF;
+ for (unsigned i = 0; i != NElts; ++i) {
+ if (MaskVec[i] >= (int)NElts) {
+ if (N2Undef)
+ MaskVec[i] = -1;
+ else
+ AllLHS = false;
+ } else if (MaskVec[i] >= 0) {
+ AllRHS = false;
+ }
+ }
+ if (AllLHS && AllRHS)
+ return getUNDEF(VT);
+ if (AllLHS && !N2Undef)
+ N2 = getUNDEF(VT);
+ if (AllRHS) {
+ N1 = getUNDEF(VT);
+ commuteShuffle(N1, N2, MaskVec);
+ }
+
+ // If Identity shuffle, or all shuffle in to undef, return that node.
+ bool AllUndef = true;
+ bool Identity = true;
+ for (unsigned i = 0; i != NElts; ++i) {
+ if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;
+ if (MaskVec[i] >= 0) AllUndef = false;
+ }
+ if (Identity)
+ return N1;
+ if (AllUndef)
+ return getUNDEF(VT);
+
+ FoldingSetNodeID ID;
+ SDValue Ops[2] = { N1, N2 };
+ AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2);
+ for (unsigned i = 0; i != NElts; ++i)
+ ID.AddInteger(MaskVec[i]);
+
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ // Allocate the mask array for the node out of the BumpPtrAllocator, since
+ // SDNode doesn't have access to it. This memory will be "leaked" when
+ // the node is deallocated, but recovered when the NodeAllocator is released.
+ int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
+ memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int));
+
+ ShuffleVectorSDNode *N = NodeAllocator.Allocate<ShuffleVectorSDNode>();
+ new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getConvertRndSat(MVT VT, DebugLoc dl,
+ SDValue Val, SDValue DTy,
+ SDValue STy, SDValue Rnd, SDValue Sat,
+ ISD::CvtCode Code) {
+ // If the src and dest types are the same and the conversion is between
+ // integer types of the same sign or two floats, no conversion is necessary.
+ if (DTy == STy &&
+ (Code == ISD::CVT_UU || Code == ISD::CVT_SS || Code == ISD::CVT_FF))
+ return Val;
+
+ FoldingSetNodeID ID;
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ CvtRndSatSDNode *N = NodeAllocator.Allocate<CvtRndSatSDNode>();
+ SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
+ new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getRegister(unsigned RegNo, MVT VT) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0);
+ ID.AddInteger(RegNo);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<RegisterSDNode>();
+ new (N) RegisterSDNode(RegNo, VT);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getDbgStopPoint(DebugLoc DL, SDValue Root,
+ unsigned Line, unsigned Col,
+ Value *CU) {
+ SDNode *N = NodeAllocator.Allocate<DbgStopPointSDNode>();
+ new (N) DbgStopPointSDNode(Root, Line, Col, CU);
+ N->setDebugLoc(DL);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getLabel(unsigned Opcode, DebugLoc dl,
+ SDValue Root,
+ unsigned LabelID) {
+ FoldingSetNodeID ID;
+ SDValue Ops[] = { Root };
+ AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), &Ops[0], 1);
+ ID.AddInteger(LabelID);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<LabelSDNode>();
+ new (N) LabelSDNode(Opcode, dl, Root, LabelID);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getSrcValue(const Value *V) {
+ assert((!V || isa<PointerType>(V->getType())) &&
+ "SrcValue is not a pointer?");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), 0, 0);
+ ID.AddPointer(V);
+
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = NodeAllocator.Allocate<SrcValueSDNode>();
+ new (N) SrcValueSDNode(V);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getMemOperand(const MachineMemOperand &MO) {
+#ifndef NDEBUG
+ const Value *v = MO.getValue();
+ assert((!v || isa<PointerType>(v->getType())) &&
+ "SrcValue is not a pointer?");
+#endif
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MEMOPERAND, getVTList(MVT::Other), 0, 0);
+ MO.Profile(ID);
+
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = NodeAllocator.Allocate<MemOperandSDNode>();
+ new (N) MemOperandSDNode(MO);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+/// getShiftAmountOperand - Return the specified value casted to
+/// the target's desired shift amount type.
+SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) {
+ MVT OpTy = Op.getValueType();
+ MVT ShTy = TLI.getShiftAmountTy();
+ if (OpTy == ShTy || OpTy.isVector()) return Op;
+
+ ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
+ return getNode(Opcode, Op.getDebugLoc(), ShTy, Op);
+}
+
+/// CreateStackTemporary - Create a stack temporary, suitable for holding the
+/// specified value type.
+SDValue SelectionDAG::CreateStackTemporary(MVT VT, unsigned minAlign) {
+ MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
+ unsigned ByteSize = VT.getStoreSizeInBits()/8;
+ const Type *Ty = VT.getTypeForMVT();
+ unsigned StackAlign =
+ std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign);
+
+ int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign);
+ return getFrameIndex(FrameIdx, TLI.getPointerTy());
+}
+
+/// CreateStackTemporary - Create a stack temporary suitable for holding
+/// either of the specified value types.
+SDValue SelectionDAG::CreateStackTemporary(MVT VT1, MVT VT2) {
+ unsigned Bytes = std::max(VT1.getStoreSizeInBits(),
+ VT2.getStoreSizeInBits())/8;
+ const Type *Ty1 = VT1.getTypeForMVT();
+ const Type *Ty2 = VT2.getTypeForMVT();
+ const TargetData *TD = TLI.getTargetData();
+ unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1),
+ TD->getPrefTypeAlignment(Ty2));
+
+ MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align);
+ return getFrameIndex(FrameIdx, TLI.getPointerTy());
+}
+
+SDValue SelectionDAG::FoldSetCC(MVT VT, SDValue N1,
+ SDValue N2, ISD::CondCode Cond, DebugLoc dl) {
+ // These setcc operations always fold.
+ switch (Cond) {
+ default: break;
+ case ISD::SETFALSE:
+ case ISD::SETFALSE2: return getConstant(0, VT);
+ case ISD::SETTRUE:
+ case ISD::SETTRUE2: return getConstant(1, VT);
+
+ case ISD::SETOEQ:
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETONE:
+ case ISD::SETO:
+ case ISD::SETUO:
+ case ISD::SETUEQ:
+ case ISD::SETUNE:
+ assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!");
+ break;
+ }
+
+ if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode())) {
+ const APInt &C2 = N2C->getAPIntValue();
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const APInt &C1 = N1C->getAPIntValue();
+
+ switch (Cond) {
+ default: assert(0 && "Unknown integer setcc!");
+ case ISD::SETEQ: return getConstant(C1 == C2, VT);
+ case ISD::SETNE: return getConstant(C1 != C2, VT);
+ case ISD::SETULT: return getConstant(C1.ult(C2), VT);
+ case ISD::SETUGT: return getConstant(C1.ugt(C2), VT);
+ case ISD::SETULE: return getConstant(C1.ule(C2), VT);
+ case ISD::SETUGE: return getConstant(C1.uge(C2), VT);
+ case ISD::SETLT: return getConstant(C1.slt(C2), VT);
+ case ISD::SETGT: return getConstant(C1.sgt(C2), VT);
+ case ISD::SETLE: return getConstant(C1.sle(C2), VT);
+ case ISD::SETGE: return getConstant(C1.sge(C2), VT);
+ }
+ }
+ }
+ if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
+ if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.getNode())) {
+ // No compile time operations on this type yet.
+ if (N1C->getValueType(0) == MVT::ppcf128)
+ return SDValue();
+
+ APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF());
+ switch (Cond) {
+ default: break;
+ case ISD::SETEQ: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, VT);
+ case ISD::SETNE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpLessThan, VT);
+ case ISD::SETLT: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, VT);
+ case ISD::SETGT: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, VT);
+ case ISD::SETLE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan ||
+ R==APFloat::cmpEqual, VT);
+ case ISD::SETGE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpEqual, VT);
+ case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, VT);
+ case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, VT);
+ case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpEqual, VT);
+ case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, VT);
+ case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpLessThan, VT);
+ case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpUnordered, VT);
+ case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, VT);
+ case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, VT);
+ }
+ } else {
+ // Ensure that the constant occurs on the RHS.
+ return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond));
+ }
+ }
+
+ // Could not fold it.
+ return SDValue();
+}
+
+/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We
+/// use this predicate to simplify operations downstream.
+bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
+ unsigned BitWidth = Op.getValueSizeInBits();
+ return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth);
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
+/// this predicate to simplify operations downstream. Mask is known to be zero
+/// for bits that V cannot have.
+bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
+ unsigned Depth) const {
+ APInt KnownZero, KnownOne;
+ ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ return (KnownZero & Mask) == Mask;
+}
+
+/// ComputeMaskedBits - Determine which of the bits specified in Mask are
+/// known to be either zero or one and return them in the KnownZero/KnownOne
+/// bitsets. This code only analyzes bits in Mask, in order to short-circuit
+/// processing.
+void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
+ APInt &KnownZero, APInt &KnownOne,
+ unsigned Depth) const {
+ unsigned BitWidth = Mask.getBitWidth();
+ assert(BitWidth == Op.getValueType().getSizeInBits() &&
+ "Mask size mismatches value type size!");
+
+ KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.
+ if (Depth == 6 || Mask == 0)
+ return; // Limit search depth.
+
+ APInt KnownZero2, KnownOne2;
+
+ switch (Op.getOpcode()) {
+ case ISD::Constant:
+ // We know all of the bits for a constant!
+ KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & Mask;
+ KnownZero = ~KnownOne & Mask;
+ return;
+ case ISD::AND:
+ // If either the LHS or the RHS are Zero, the result is zero.
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownZero,
+ KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne &= KnownOne2;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero |= KnownZero2;
+ return;
+ case ISD::OR:
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownOne,
+ KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero &= KnownZero2;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne |= KnownOne2;
+ return;
+ case ISD::XOR: {
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+ KnownZero = KnownZeroOut;
+ return;
+ }
+ case ISD::MUL: {
+ APInt Mask2 = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If low bits are zero in either operand, output low known-0 bits.
+ // Also compute a conserative estimate for high known-0 bits.
+ // More trickiness is possible, but this is sufficient for the
+ // interesting case of alignment computation.
+ KnownOne.clear();
+ unsigned TrailZ = KnownZero.countTrailingOnes() +
+ KnownZero2.countTrailingOnes();
+ unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
+ KnownZero2.countLeadingOnes(),
+ BitWidth) - BitWidth;
+
+ TrailZ = std::min(TrailZ, BitWidth);
+ LeadZ = std::min(LeadZ, BitWidth);
+ KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+ APInt::getHighBitsSet(BitWidth, LeadZ);
+ KnownZero &= Mask;
+ return;
+ }
+ case ISD::UDIV: {
+ // For the purposes of computing leading zeros we can conservatively
+ // treat a udiv as a logical right shift by the power of 2 known to
+ // be less than the denominator.
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(Op.getOperand(0),
+ AllOnes, KnownZero2, KnownOne2, Depth+1);
+ unsigned LeadZ = KnownZero2.countLeadingOnes();
+
+ KnownOne2.clear();
+ KnownZero2.clear();
+ ComputeMaskedBits(Op.getOperand(1),
+ AllOnes, KnownZero2, KnownOne2, Depth+1);
+ unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
+ if (RHSUnknownLeadingOnes != BitWidth)
+ LeadZ = std::min(BitWidth,
+ LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
+
+ KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask;
+ return;
+ }
+ case ISD::SELECT:
+ ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ return;
+ case ISD::SELECT_CC:
+ ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ return;
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ if (Op.getResNo() != 1)
+ return;
+ // The boolean result conforms to getBooleanContents. Fall through.
+ case ISD::SETCC:
+ // If we know the result of a setcc has the top bits zero, use this info.
+ if (TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ return;
+ case ISD::SHL:
+ // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ return;
+
+ ComputeMaskedBits(Op.getOperand(0), Mask.lshr(ShAmt),
+ KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero <<= ShAmt;
+ KnownOne <<= ShAmt;
+ // low bits known zero.
+ KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt);
+ }
+ return;
+ case ISD::SRL:
+ // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ return;
+
+ ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt),
+ KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
+ KnownZero |= HighBits; // High bits known zero.
+ }
+ return;
+ case ISD::SRA:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ return;
+
+ APInt InDemandedMask = (Mask << ShAmt);
+ // If any of the demanded bits are produced by the sign extension, we also
+ // demand the input sign bit.
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
+ if (HighBits.getBoolValue())
+ InDemandedMask |= APInt::getSignBit(BitWidth);
+
+ ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ // Handle the sign bits.
+ APInt SignBit = APInt::getSignBit(BitWidth);
+ SignBit = SignBit.lshr(ShAmt); // Adjust to where it is now in the mask.
+
+ if (KnownZero.intersects(SignBit)) {
+ KnownZero |= HighBits; // New bits are known zero.
+ } else if (KnownOne.intersects(SignBit)) {
+ KnownOne |= HighBits; // New bits are known one.
+ }
+ }
+ return;
+ case ISD::SIGN_EXTEND_INREG: {
+ MVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ unsigned EBits = EVT.getSizeInBits();
+
+ // Sign extension. Compute the demanded bits in the result that are not
+ // present in the input.
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits) & Mask;
+
+ APInt InSignBit = APInt::getSignBit(EBits);
+ APInt InputDemandedBits = Mask & APInt::getLowBitsSet(BitWidth, EBits);
+
+ // If the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ InSignBit.zext(BitWidth);
+ if (NewBits.getBoolValue())
+ InputDemandedBits |= InSignBit;
+
+ ComputeMaskedBits(Op.getOperand(0), InputDemandedBits,
+ KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+ if (KnownZero.intersects(InSignBit)) { // Input sign bit known clear
+ KnownZero |= NewBits;
+ KnownOne &= ~NewBits;
+ } else if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
+ KnownOne |= NewBits;
+ KnownZero &= ~NewBits;
+ } else { // Input sign bit unknown
+ KnownZero &= ~NewBits;
+ KnownOne &= ~NewBits;
+ }
+ return;
+ }
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP: {
+ unsigned LowBits = Log2_32(BitWidth)+1;
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+ KnownOne.clear();
+ return;
+ }
+ case ISD::LOAD: {
+ if (ISD::isZEXTLoad(Op.getNode())) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ MVT VT = LD->getMemoryVT();
+ unsigned MemBits = VT.getSizeInBits();
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask;
+ }
+ return;
+ }
+ case ISD::ZERO_EXTEND: {
+ MVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getSizeInBits();
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
+ APInt InMask = Mask;
+ InMask.trunc(InBits);
+ KnownZero.trunc(InBits);
+ KnownOne.trunc(InBits);
+ ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ KnownZero.zext(BitWidth);
+ KnownOne.zext(BitWidth);
+ KnownZero |= NewBits;
+ return;
+ }
+ case ISD::SIGN_EXTEND: {
+ MVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getSizeInBits();
+ APInt InSignBit = APInt::getSignBit(InBits);
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
+ APInt InMask = Mask;
+ InMask.trunc(InBits);
+
+ // If any of the sign extended bits are demanded, we know that the sign
+ // bit is demanded. Temporarily set this bit in the mask for our callee.
+ if (NewBits.getBoolValue())
+ InMask |= InSignBit;
+
+ KnownZero.trunc(InBits);
+ KnownOne.trunc(InBits);
+ ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+
+ // Note if the sign bit is known to be zero or one.
+ bool SignBitKnownZero = KnownZero.isNegative();
+ bool SignBitKnownOne = KnownOne.isNegative();
+ assert(!(SignBitKnownZero && SignBitKnownOne) &&
+ "Sign bit can't be known to be both zero and one!");
+
+ // If the sign bit wasn't actually demanded by our caller, we don't
+ // want it set in the KnownZero and KnownOne result values. Reset the
+ // mask and reapply it to the result values.
+ InMask = Mask;
+ InMask.trunc(InBits);
+ KnownZero &= InMask;
+ KnownOne &= InMask;
+
+ KnownZero.zext(BitWidth);
+ KnownOne.zext(BitWidth);
+
+ // If the sign bit is known zero or one, the top bits match.
+ if (SignBitKnownZero)
+ KnownZero |= NewBits;
+ else if (SignBitKnownOne)
+ KnownOne |= NewBits;
+ return;
+ }
+ case ISD::ANY_EXTEND: {
+ MVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getSizeInBits();
+ APInt InMask = Mask;
+ InMask.trunc(InBits);
+ KnownZero.trunc(InBits);
+ KnownOne.trunc(InBits);
+ ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ KnownZero.zext(BitWidth);
+ KnownOne.zext(BitWidth);
+ return;
+ }
+ case ISD::TRUNCATE: {
+ MVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getSizeInBits();
+ APInt InMask = Mask;
+ InMask.zext(InBits);
+ KnownZero.zext(InBits);
+ KnownOne.zext(InBits);
+ ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero.trunc(BitWidth);
+ KnownOne.trunc(BitWidth);
+ break;
+ }
+ case ISD::AssertZext: {
+ MVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
+ ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero,
+ KnownOne, Depth+1);
+ KnownZero |= (~InMask) & Mask;
+ return;
+ }
+ case ISD::FGETSIGN:
+ // All bits are zero except the low bit.
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ return;
+
+ case ISD::SUB: {
+ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) {
+ // We know that the top bits of C-X are clear if X contains less bits
+ // than C (i.e. no wrap-around can happen). For example, 20-X is
+ // positive if we can prove that X is >= 0 and < 16.
+ if (CLHS->getAPIntValue().isNonNegative()) {
+ unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
+ // NLZ can't be BitWidth with no sign bit
+ APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
+ ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2,
+ Depth+1);
+
+ // If all of the MaskV bits are known to be zero, then we know the
+ // output top bits are zero, because we now know that the output is
+ // from [0-C].
+ if ((KnownZero2 & MaskV) == MaskV) {
+ unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
+ // Top bits known zero.
+ KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
+ }
+ }
+ }
+ }
+ // fall through
+ case ISD::ADD: {
+ // Output known-0 bits are known if clear or set in both the low clear bits
+ // common to both LHS & RHS. For example, 8+(X<<3) is known to have the
+ // low 3 bits clear.
+ APInt Mask2 = APInt::getLowBitsSet(BitWidth, Mask.countTrailingOnes());
+ ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ unsigned KnownZeroOut = KnownZero2.countTrailingOnes();
+
+ ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ KnownZeroOut = std::min(KnownZeroOut,
+ KnownZero2.countTrailingOnes());
+
+ KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+ return;
+ }
+ case ISD::SREM:
+ if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ const APInt &RA = Rem->getAPIntValue();
+ if (RA.isPowerOf2() || (-RA).isPowerOf2()) {
+ APInt LowBits = RA.isStrictlyPositive() ? (RA - 1) : ~RA;
+ APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+ ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1);
+
+ // If the sign bit of the first operand is zero, the sign bit of
+ // the result is zero. If the first operand has no one bits below
+ // the second operand's single 1 bit, its sign will be zero.
+ if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
+ KnownZero2 |= ~LowBits;
+
+ KnownZero |= KnownZero2 & Mask;
+
+ assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+ }
+ }
+ return;
+ case ISD::UREM: {
+ if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ const APInt &RA = Rem->getAPIntValue();
+ if (RA.isPowerOf2()) {
+ APInt LowBits = (RA - 1);
+ APInt Mask2 = LowBits & Mask;
+ KnownZero |= ~LowBits & Mask;
+ ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1);
+ assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+ break;
+ }
+ }
+
+ // Since the result is less than or equal to either operand, any leading
+ // zero bits in either operand must also exist in the result.
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne,
+ Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2,
+ Depth+1);
+
+ uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
+ KnownZero2.countLeadingOnes());
+ KnownOne.clear();
+ KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
+ return;
+ }
+ default:
+ // Allow the target to implement this method for its nodes.
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this);
+ }
+ return;
+ }
+}
+
+/// ComputeNumSignBits - Return the number of times the sign bit of the
+/// register is replicated into the other bits. We know that at least 1 bit
+/// is always equal to the sign bit (itself), but other cases can give us
+/// information. For example, immediately after an "SRA X, 2", we know that
+/// the top 3 bits are all equal to each other, so we return 3.
+unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
+ MVT VT = Op.getValueType();
+ assert(VT.isInteger() && "Invalid VT!");
+ unsigned VTBits = VT.getSizeInBits();
+ unsigned Tmp, Tmp2;
+ unsigned FirstAnswer = 1;
+
+ if (Depth == 6)
+ return 1; // Limit search depth.
+
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::AssertSext:
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+ return VTBits-Tmp+1;
+ case ISD::AssertZext:
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+ return VTBits-Tmp;
+
+ case ISD::Constant: {
+ const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue();
+ // If negative, return # leading ones.
+ if (Val.isNegative())
+ return Val.countLeadingOnes();
+
+ // Return # leading zeros.
+ return Val.countLeadingZeros();
+ }
+
+ case ISD::SIGN_EXTEND:
+ Tmp = VTBits-Op.getOperand(0).getValueType().getSizeInBits();
+ return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
+
+ case ISD::SIGN_EXTEND_INREG:
+ // Max of the input and what this extends.
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+ Tmp = VTBits-Tmp+1;
+
+ Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ return std::max(Tmp, Tmp2);
+
+ case ISD::SRA:
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ // SRA X, C -> adds C sign bits.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ Tmp += C->getZExtValue();
+ if (Tmp > VTBits) Tmp = VTBits;
+ }
+ return Tmp;
+ case ISD::SHL:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ // shl destroys sign bits.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (C->getZExtValue() >= VTBits || // Bad shift.
+ C->getZExtValue() >= Tmp) break; // Shifted all sign bits out.
+ return Tmp - C->getZExtValue();
+ }
+ break;
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: // NOT is handled here.
+ // Logical binary ops preserve the number of sign bits at the worst.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp != 1) {
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ FirstAnswer = std::min(Tmp, Tmp2);
+ // We computed what we know about the sign bits as our first
+ // answer. Now proceed to the generic code that uses
+ // ComputeMaskedBits, and pick whichever answer is better.
+ }
+ break;
+
+ case ISD::SELECT:
+ Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1);
+ return std::min(Tmp, Tmp2);
+
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ if (Op.getResNo() != 1)
+ break;
+ // The boolean result conforms to getBooleanContents. Fall through.
+ case ISD::SETCC:
+ // If setcc returns 0/-1, all bits are sign bits.
+ if (TLI.getBooleanContents() ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ return VTBits;
+ break;
+ case ISD::ROTL:
+ case ISD::ROTR:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned RotAmt = C->getZExtValue() & (VTBits-1);
+
+ // Handle rotate right by N like a rotate left by 32-N.
+ if (Op.getOpcode() == ISD::ROTR)
+ RotAmt = (VTBits-RotAmt) & (VTBits-1);
+
+ // If we aren't rotating out all of the known-in sign bits, return the
+ // number that are left. This handles rotl(sext(x), 1) for example.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp > RotAmt+1) return Tmp-RotAmt;
+ }
+ break;
+ case ISD::ADD:
+ // Add can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+
+ // Special case decrementing a value (ADD X, -1):
+ if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ if (CRHS->isAllOnesValue()) {
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getAllOnesValue(VTBits);
+ ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero | APInt(VTBits, 1)) == Mask)
+ return VTBits;
+
+ // If we are subtracting one from a positive number, there is no carry
+ // out of the result.
+ if (KnownZero.isNegative())
+ return Tmp;
+ }
+
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp2 == 1) return 1;
+ return std::min(Tmp, Tmp2)-1;
+ break;
+
+ case ISD::SUB:
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp2 == 1) return 1;
+
+ // Handle NEG.
+ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
+ if (CLHS->isNullValue()) {
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getAllOnesValue(VTBits);
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero | APInt(VTBits, 1)) == Mask)
+ return VTBits;
+
+ // If the input is known to be positive (the sign bit is known clear),
+ // the output of the NEG has the same number of sign bits as the input.
+ if (KnownZero.isNegative())
+ return Tmp2;
+
+ // Otherwise, we treat this like a SUB.
+ }
+
+ // Sub can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ return std::min(Tmp, Tmp2)-1;
+ break;
+ case ISD::TRUNCATE:
+ // FIXME: it's tricky to do anything useful for this, but it is an important
+ // case for targets like X86.
+ break;
+ }
+
+ // Handle LOADX separately here. EXTLOAD case will fallthrough.
+ if (Op.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ unsigned ExtType = LD->getExtensionType();
+ switch (ExtType) {
+ default: break;
+ case ISD::SEXTLOAD: // '17' bits known
+ Tmp = LD->getMemoryVT().getSizeInBits();
+ return VTBits-Tmp+1;
+ case ISD::ZEXTLOAD: // '16' bits known
+ Tmp = LD->getMemoryVT().getSizeInBits();
+ return VTBits-Tmp;
+ }
+ }
+
+ // Allow the target to implement this method for its nodes.
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) {
+ unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth);
+ if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits);
+ }
+
+ // Finally, if we can prove that the top bits of the result are 0's or 1's,
+ // use this information.
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getAllOnesValue(VTBits);
+ ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+
+ if (KnownZero.isNegative()) { // sign bit is 0
+ Mask = KnownZero;
+ } else if (KnownOne.isNegative()) { // sign bit is 1;
+ Mask = KnownOne;
+ } else {
+ // Nothing known.
+ return FirstAnswer;
+ }
+
+ // Okay, we know that the sign bit in Mask is set. Use CLZ to determine
+ // the number of identical bits in the top of the input value.
+ Mask = ~Mask;
+ Mask <<= Mask.getBitWidth()-VTBits;
+ // Return # leading zeros. We use 'min' here in case Val was zero before
+ // shifting. We don't want to return '64' as for an i32 "0".
+ return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
+}
+
+
+bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
+ if (!GA) return false;
+ if (GA->getOffset() != 0) return false;
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal());
+ if (!GV) return false;
+ MachineModuleInfo *MMI = getMachineModuleInfo();
+ return MMI && MMI->hasDebugInfo();
+}
+
+
+/// getShuffleScalarElt - Returns the scalar element that will make up the ith
+/// element of the result of the vector shuffle.
+SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N,
+ unsigned i) {
+ MVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getMaskElt(i) < 0)
+ return getUNDEF(VT.getVectorElementType());
+ unsigned Index = N->getMaskElt(i);
+ unsigned NumElems = VT.getVectorNumElements();
+ SDValue V = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
+ Index %= NumElems;
+
+ if (V.getOpcode() == ISD::BIT_CONVERT) {
+ V = V.getOperand(0);
+ MVT VVT = V.getValueType();
+ if (!VVT.isVector() || VVT.getVectorNumElements() != (unsigned)NumElems)
+ return SDValue();
+ }
+ if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return (Index == 0) ? V.getOperand(0)
+ : getUNDEF(VT.getVectorElementType());
+ if (V.getOpcode() == ISD::BUILD_VECTOR)
+ return V.getOperand(Index);
+ if (const ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(V))
+ return getShuffleScalarElt(SVN, Index);
+ return SDValue();
+}
+
+
+/// getNode - Gets or creates the specified node.
+///
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<SDNode>();
+ new (N) SDNode(Opcode, DL, getVTList(VT));
+ CSEMap.InsertNode(N, IP);
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifyNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+ MVT VT, SDValue Operand) {
+ // Constant fold unary operations with an integer constant operand.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) {
+ const APInt &Val = C->getAPIntValue();
+ unsigned BitWidth = VT.getSizeInBits();
+ switch (Opcode) {
+ default: break;
+ case ISD::SIGN_EXTEND:
+ return getConstant(APInt(Val).sextOrTrunc(BitWidth), VT);
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::TRUNCATE:
+ return getConstant(APInt(Val).zextOrTrunc(BitWidth), VT);
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP: {
+ const uint64_t zero[] = {0, 0};
+ // No compile time operations on this type.
+ if (VT==MVT::ppcf128)
+ break;
+ APFloat apf = APFloat(APInt(BitWidth, 2, zero));
+ (void)apf.convertFromAPInt(Val,
+ Opcode==ISD::SINT_TO_FP,
+ APFloat::rmNearestTiesToEven);
+ return getConstantFP(apf, VT);
+ }
+ case ISD::BIT_CONVERT:
+ if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
+ return getConstantFP(Val.bitsToFloat(), VT);
+ else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
+ return getConstantFP(Val.bitsToDouble(), VT);
+ break;
+ case ISD::BSWAP:
+ return getConstant(Val.byteSwap(), VT);
+ case ISD::CTPOP:
+ return getConstant(Val.countPopulation(), VT);
+ case ISD::CTLZ:
+ return getConstant(Val.countLeadingZeros(), VT);
+ case ISD::CTTZ:
+ return getConstant(Val.countTrailingZeros(), VT);
+ }
+ }
+
+ // Constant fold unary operations with a floating point constant operand.
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.getNode())) {
+ APFloat V = C->getValueAPF(); // make copy
+ if (VT != MVT::ppcf128 && Operand.getValueType() != MVT::ppcf128) {
+ switch (Opcode) {
+ case ISD::FNEG:
+ V.changeSign();
+ return getConstantFP(V, VT);
+ case ISD::FABS:
+ V.clearSign();
+ return getConstantFP(V, VT);
+ case ISD::FP_ROUND:
+ case ISD::FP_EXTEND: {
+ bool ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(*MVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &ignored);
+ return getConstantFP(V, VT);
+ }
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: {
+ integerPart x[2];
+ bool ignored;
+ assert(integerPartWidth >= 64);
+ // FIXME need to be more flexible about rounding mode.
+ APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(),
+ Opcode==ISD::FP_TO_SINT,
+ APFloat::rmTowardZero, &ignored);
+ if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual
+ break;
+ APInt api(VT.getSizeInBits(), 2, x);
+ return getConstant(api, VT);
+ }
+ case ISD::BIT_CONVERT:
+ if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+ return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
+ else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
+ return getConstant(V.bitcastToAPInt().getZExtValue(), VT);
+ break;
+ }
+ }
+ }
+
+ unsigned OpOpcode = Operand.getNode()->getOpcode();
+ switch (Opcode) {
+ case ISD::TokenFactor:
+ case ISD::MERGE_VALUES:
+ case ISD::CONCAT_VECTORS:
+ return Operand; // Factor, merge or concat of one node? No need.
+ case ISD::FP_ROUND: assert(0 && "Invalid method to make FP_ROUND node");
+ case ISD::FP_EXTEND:
+ assert(VT.isFloatingPoint() &&
+ Operand.getValueType().isFloatingPoint() && "Invalid FP cast!");
+ if (Operand.getValueType() == VT) return Operand; // noop conversion.
+ if (Operand.getOpcode() == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::SIGN_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid SIGN_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert(Operand.getValueType().bitsLT(VT)
+ && "Invalid sext node, dst < src!");
+ if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ break;
+ case ISD::ZERO_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid ZERO_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert(Operand.getValueType().bitsLT(VT)
+ && "Invalid zext node, dst < src!");
+ if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
+ return getNode(ISD::ZERO_EXTEND, DL, VT,
+ Operand.getNode()->getOperand(0));
+ break;
+ case ISD::ANY_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid ANY_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert(Operand.getValueType().bitsLT(VT)
+ && "Invalid anyext node, dst < src!");
+ if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND)
+ // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ break;
+ case ISD::TRUNCATE:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid TRUNCATE!");
+ if (Operand.getValueType() == VT) return Operand; // noop truncate
+ assert(Operand.getValueType().bitsGT(VT)
+ && "Invalid truncate node, src < dst!");
+ if (OpOpcode == ISD::TRUNCATE)
+ return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
+ else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+ OpOpcode == ISD::ANY_EXTEND) {
+ // If the source is smaller than the dest, we still need an extend.
+ if (Operand.getNode()->getOperand(0).getValueType().bitsLT(VT))
+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
+ return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
+ else
+ return Operand.getNode()->getOperand(0);
+ }
+ break;
+ case ISD::BIT_CONVERT:
+ // Basic sanity checking.
+ assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits()
+ && "Cannot BIT_CONVERT between types of different sizes!");
+ if (VT == Operand.getValueType()) return Operand; // noop conversion.
+ if (OpOpcode == ISD::BIT_CONVERT) // bitconv(bitconv(x)) -> bitconv(x)
+ return getNode(ISD::BIT_CONVERT, DL, VT, Operand.getOperand(0));
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::SCALAR_TO_VECTOR:
+ assert(VT.isVector() && !Operand.getValueType().isVector() &&
+ (VT.getVectorElementType() == Operand.getValueType() ||
+ (VT.getVectorElementType().isInteger() &&
+ Operand.getValueType().isInteger() &&
+ VT.getVectorElementType().bitsLE(Operand.getValueType()))) &&
+ "Illegal SCALAR_TO_VECTOR node!");
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined.
+ if (OpOpcode == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(Operand.getOperand(1)) &&
+ Operand.getConstantOperandVal(1) == 0 &&
+ Operand.getOperand(0).getValueType() == VT)
+ return Operand.getOperand(0);
+ break;
+ case ISD::FNEG:
+ // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
+ if (UnsafeFPMath && OpOpcode == ISD::FSUB)
+ return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
+ Operand.getNode()->getOperand(0));
+ if (OpOpcode == ISD::FNEG) // --X -> X
+ return Operand.getNode()->getOperand(0);
+ break;
+ case ISD::FABS:
+ if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)
+ return getNode(ISD::FABS, DL, VT, Operand.getNode()->getOperand(0));
+ break;
+ }
+
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Flag) { // Don't CSE flag producing nodes
+ FoldingSetNodeID ID;
+ SDValue Ops[1] = { Operand };
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 1);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ N = NodeAllocator.Allocate<UnarySDNode>();
+ new (N) UnarySDNode(Opcode, DL, VTs, Operand);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = NodeAllocator.Allocate<UnarySDNode>();
+ new (N) UnarySDNode(Opcode, DL, VTs, Operand);
+ }
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifyNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode,
+ MVT VT,
+ ConstantSDNode *Cst1,
+ ConstantSDNode *Cst2) {
+ const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue();
+
+ switch (Opcode) {
+ case ISD::ADD: return getConstant(C1 + C2, VT);
+ case ISD::SUB: return getConstant(C1 - C2, VT);
+ case ISD::MUL: return getConstant(C1 * C2, VT);
+ case ISD::UDIV:
+ if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT);
+ break;
+ case ISD::UREM:
+ if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT);
+ break;
+ case ISD::SDIV:
+ if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT);
+ break;
+ case ISD::SREM:
+ if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT);
+ break;
+ case ISD::AND: return getConstant(C1 & C2, VT);
+ case ISD::OR: return getConstant(C1 | C2, VT);
+ case ISD::XOR: return getConstant(C1 ^ C2, VT);
+ case ISD::SHL: return getConstant(C1 << C2, VT);
+ case ISD::SRL: return getConstant(C1.lshr(C2), VT);
+ case ISD::SRA: return getConstant(C1.ashr(C2), VT);
+ case ISD::ROTL: return getConstant(C1.rotl(C2), VT);
+ case ISD::ROTR: return getConstant(C1.rotr(C2), VT);
+ default: break;
+ }
+
+ return SDValue();
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+ SDValue N1, SDValue N2) {
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ switch (Opcode) {
+ default: break;
+ case ISD::TokenFactor:
+ assert(VT == MVT::Other && N1.getValueType() == MVT::Other &&
+ N2.getValueType() == MVT::Other && "Invalid token factor!");
+ // Fold trivial token factors.
+ if (N1.getOpcode() == ISD::EntryToken) return N2;
+ if (N2.getOpcode() == ISD::EntryToken) return N1;
+ if (N1 == N2) return N1;
+ break;
+ case ISD::CONCAT_VECTORS:
+ // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
+ // one big BUILD_VECTOR.
+ if (N1.getOpcode() == ISD::BUILD_VECTOR &&
+ N2.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end());
+ Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end());
+ return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
+ }
+ break;
+ case ISD::AND:
+ assert(VT.isInteger() && N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's
+ // worth handling here.
+ if (N2C && N2C->isNullValue())
+ return N2;
+ if (N2C && N2C->isAllOnesValue()) // X & -1 -> X
+ return N1;
+ break;
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADD:
+ case ISD::SUB:
+ assert(VT.isInteger() && N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so
+ // it's worth handling here.
+ if (N2C && N2C->isNullValue())
+ return N1;
+ break;
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::MULHU:
+ case ISD::MULHS:
+ case ISD::MUL:
+ case ISD::SDIV:
+ case ISD::SREM:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ // fall through
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ if (UnsafeFPMath) {
+ if (Opcode == ISD::FADD) {
+ // 0+x --> x
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1))
+ if (CFP->getValueAPF().isZero())
+ return N2;
+ // x+0 --> x
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
+ if (CFP->getValueAPF().isZero())
+ return N1;
+ } else if (Opcode == ISD::FSUB) {
+ // x-0 --> x
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
+ if (CFP->getValueAPF().isZero())
+ return N1;
+ }
+ }
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ break;
+ case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match.
+ assert(N1.getValueType() == VT &&
+ N1.getValueType().isFloatingPoint() &&
+ N2.getValueType().isFloatingPoint() &&
+ "Invalid FCOPYSIGN!");
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ assert(VT == N1.getValueType() &&
+ "Shift operators return type must be the same as their first arg");
+ assert(VT.isInteger() && N2.getValueType().isInteger() &&
+ "Shifts only work on integers");
+
+ // Always fold shifts of i1 values so the code generator doesn't need to
+ // handle them. Since we know the size of the shift has to be less than the
+ // size of the value, the shift/rotate count is guaranteed to be zero.
+ if (VT == MVT::i1)
+ return N1;
+ break;
+ case ISD::FP_ROUND_INREG: {
+ MVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg round!");
+ assert(VT.isFloatingPoint() && EVT.isFloatingPoint() &&
+ "Cannot FP_ROUND_INREG integer types");
+ assert(EVT.bitsLE(VT) && "Not rounding down!");
+ if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding.
+ break;
+ }
+ case ISD::FP_ROUND:
+ assert(VT.isFloatingPoint() &&
+ N1.getValueType().isFloatingPoint() &&
+ VT.bitsLE(N1.getValueType()) &&
+ isa<ConstantSDNode>(N2) && "Invalid FP_ROUND!");
+ if (N1.getValueType() == VT) return N1; // noop conversion.
+ break;
+ case ISD::AssertSext:
+ case ISD::AssertZext: {
+ MVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg extend!");
+ assert(VT.isInteger() && EVT.isInteger() &&
+ "Cannot *_EXTEND_INREG FP types");
+ assert(EVT.bitsLE(VT) && "Not extending!");
+ if (VT == EVT) return N1; // noop assertion.
+ break;
+ }
+ case ISD::SIGN_EXTEND_INREG: {
+ MVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg extend!");
+ assert(VT.isInteger() && EVT.isInteger() &&
+ "Cannot *_EXTEND_INREG FP types");
+ assert(EVT.bitsLE(VT) && "Not extending!");
+ if (EVT == VT) return N1; // Not actually extending
+
+ if (N1C) {
+ APInt Val = N1C->getAPIntValue();
+ unsigned FromBits = cast<VTSDNode>(N2)->getVT().getSizeInBits();
+ Val <<= Val.getBitWidth()-FromBits;
+ Val = Val.ashr(Val.getBitWidth()-FromBits);
+ return getConstant(Val, VT);
+ }
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT:
+ // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.
+ if (N1.getOpcode() == ISD::UNDEF)
+ return getUNDEF(VT);
+
+ // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
+ // expanding copies of large vectors from registers.
+ if (N2C &&
+ N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N1.getNumOperands() > 0) {
+ unsigned Factor =
+ N1.getOperand(0).getValueType().getVectorNumElements();
+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+ N1.getOperand(N2C->getZExtValue() / Factor),
+ getConstant(N2C->getZExtValue() % Factor,
+ N2.getValueType()));
+ }
+
+ // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is
+ // expanding large vector constants.
+ if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) {
+ SDValue Elt = N1.getOperand(N2C->getZExtValue());
+ if (Elt.getValueType() != VT) {
+ // If the vector element type is not legal, the BUILD_VECTOR operands
+ // are promoted and implicitly truncated. Make that explicit here.
+ assert(VT.isInteger() && Elt.getValueType().isInteger() &&
+ VT.bitsLE(Elt.getValueType()) &&
+ "Bad type for BUILD_VECTOR operand");
+ Elt = getNode(ISD::TRUNCATE, DL, VT, Elt);
+ }
+ return Elt;
+ }
+
+ // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector
+ // operations are lowered to scalars.
+ if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+ // If the indices are the same, return the inserted element.
+ if (N1.getOperand(2) == N2)
+ return N1.getOperand(1);
+ // If the indices are known different, extract the element from
+ // the original vector.
+ else if (isa<ConstantSDNode>(N1.getOperand(2)) &&
+ isa<ConstantSDNode>(N2))
+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
+ }
+ break;
+ case ISD::EXTRACT_ELEMENT:
+ assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!");
+ assert(!N1.getValueType().isVector() && !VT.isVector() &&
+ (N1.getValueType().isInteger() == VT.isInteger()) &&
+ "Wrong types for EXTRACT_ELEMENT!");
+
+ // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding
+ // 64-bit integers into 32-bit parts. Instead of building the extract of
+ // the BUILD_PAIR, only to have legalize rip it apart, just do it now.
+ if (N1.getOpcode() == ISD::BUILD_PAIR)
+ return N1.getOperand(N2C->getZExtValue());
+
+ // EXTRACT_ELEMENT of a constant int is also very common.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ unsigned ElementSize = VT.getSizeInBits();
+ unsigned Shift = ElementSize * N2C->getZExtValue();
+ APInt ShiftedVal = C->getAPIntValue().lshr(Shift);
+ return getConstant(ShiftedVal.trunc(ElementSize), VT);
+ }
+ break;
+ case ISD::EXTRACT_SUBVECTOR:
+ if (N1.getValueType() == VT) // Trivial extraction.
+ return N1;
+ break;
+ }
+
+ if (N1C) {
+ if (N2C) {
+ SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C);
+ if (SV.getNode()) return SV;
+ } else { // Cannonicalize constant to RHS if commutative
+ if (isCommutativeBinOp(Opcode)) {
+ std::swap(N1C, N2C);
+ std::swap(N1, N2);
+ }
+ }
+ }
+
+ // Constant fold FP operations.
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode());
+ ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
+ if (N1CFP) {
+ if (!N2CFP && isCommutativeBinOp(Opcode)) {
+ // Cannonicalize constant to RHS if commutative
+ std::swap(N1CFP, N2CFP);
+ std::swap(N1, N2);
+ } else if (N2CFP && VT != MVT::ppcf128) {
+ APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
+ APFloat::opStatus s;
+ switch (Opcode) {
+ case ISD::FADD:
+ s = V1.add(V2, APFloat::rmNearestTiesToEven);
+ if (s != APFloat::opInvalidOp)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FSUB:
+ s = V1.subtract(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FMUL:
+ s = V1.multiply(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FDIV:
+ s = V1.divide(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FREM :
+ s = V1.mod(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FCOPYSIGN:
+ V1.copySign(V2);
+ return getConstantFP(V1, VT);
+ default: break;
+ }
+ }
+ }
+
+ // Canonicalize an UNDEF to the RHS, even over a constant.
+ if (N1.getOpcode() == ISD::UNDEF) {
+ if (isCommutativeBinOp(Opcode)) {
+ std::swap(N1, N2);
+ } else {
+ switch (Opcode) {
+ case ISD::FP_ROUND_INREG:
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::SUB:
+ case ISD::FSUB:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::SRA:
+ return N1; // fold op(undef, arg2) -> undef
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::UREM:
+ case ISD::SREM:
+ case ISD::SRL:
+ case ISD::SHL:
+ if (!VT.isVector())
+ return getConstant(0, VT); // fold op(undef, arg2) -> 0
+ // For vectors, we can't easily build an all zero vector, just return
+ // the LHS.
+ return N2;
+ }
+ }
+ }
+
+ // Fold a bunch of operators when the RHS is undef.
+ if (N2.getOpcode() == ISD::UNDEF) {
+ switch (Opcode) {
+ case ISD::XOR:
+ if (N1.getOpcode() == ISD::UNDEF)
+ // Handle undef ^ undef -> 0 special case. This is a common
+ // idiom (misuse).
+ return getConstant(0, VT);
+ // fallthrough
+ case ISD::ADD:
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUB:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::UREM:
+ case ISD::SREM:
+ return N2; // fold op(arg1, undef) -> undef
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::SRL:
+ case ISD::SHL:
+ if (!VT.isVector())
+ return getConstant(0, VT); // fold op(arg1, undef) -> 0
+ // For vectors, we can't easily build an all zero vector, just return
+ // the LHS.
+ return N1;
+ case ISD::OR:
+ if (!VT.isVector())
+ return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+ // For vectors, we can't easily build an all one vector, just return
+ // the LHS.
+ return N1;
+ case ISD::SRA:
+ return N1;
+ }
+ }
+
+ // Memoize this node if possible.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Flag) {
+ SDValue Ops[] = { N1, N2 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ N = NodeAllocator.Allocate<BinarySDNode>();
+ new (N) BinarySDNode(Opcode, DL, VTs, N1, N2);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = NodeAllocator.Allocate<BinarySDNode>();
+ new (N) BinarySDNode(Opcode, DL, VTs, N1, N2);
+ }
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifyNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+ SDValue N1, SDValue N2, SDValue N3) {
+ // Perform various simplifications.
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ switch (Opcode) {
+ case ISD::CONCAT_VECTORS:
+ // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
+ // one big BUILD_VECTOR.
+ if (N1.getOpcode() == ISD::BUILD_VECTOR &&
+ N2.getOpcode() == ISD::BUILD_VECTOR &&
+ N3.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end());
+ Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end());
+ Elts.insert(Elts.end(), N3.getNode()->op_begin(), N3.getNode()->op_end());
+ return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
+ }
+ break;
+ case ISD::SETCC: {
+ // Use FoldSetCC to simplify SETCC's.
+ SDValue Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL);
+ if (Simp.getNode()) return Simp;
+ break;
+ }
+ case ISD::SELECT:
+ if (N1C) {
+ if (N1C->getZExtValue())
+ return N2; // select true, X, Y -> X
+ else
+ return N3; // select false, X, Y -> Y
+ }
+
+ if (N2 == N3) return N2; // select C, X, X -> X
+ break;
+ case ISD::BRCOND:
+ if (N2C) {
+ if (N2C->getZExtValue()) // Unconditional branch
+ return getNode(ISD::BR, DL, MVT::Other, N1, N3);
+ else
+ return N1; // Never-taken branch
+ }
+ break;
+ case ISD::VECTOR_SHUFFLE:
+ assert(0 && "should use getVectorShuffle constructor!");
+ break;
+ case ISD::BIT_CONVERT:
+ // Fold bit_convert nodes from a type to themselves.
+ if (N1.getValueType() == VT)
+ return N1;
+ break;
+ }
+
+ // Memoize node if it doesn't produce a flag.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Flag) {
+ SDValue Ops[] = { N1, N2, N3 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ N = NodeAllocator.Allocate<TernarySDNode>();
+ new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = NodeAllocator.Allocate<TernarySDNode>();
+ new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+ }
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifyNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4) {
+ SDValue Ops[] = { N1, N2, N3, N4 };
+ return getNode(Opcode, DL, VT, Ops, 4);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4, SDValue N5) {
+ SDValue Ops[] = { N1, N2, N3, N4, N5 };
+ return getNode(Opcode, DL, VT, Ops, 5);
+}
+
+/// getMemsetValue - Vectorized representation of the memset value
+/// operand.
+static SDValue getMemsetValue(SDValue Value, MVT VT, SelectionDAG &DAG,
+ DebugLoc dl) {
+ unsigned NumBits = VT.isVector() ?
+ VT.getVectorElementType().getSizeInBits() : VT.getSizeInBits();
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
+ APInt Val = APInt(NumBits, C->getZExtValue() & 255);
+ unsigned Shift = 8;
+ for (unsigned i = NumBits; i > 8; i >>= 1) {
+ Val = (Val << Shift) | Val;
+ Shift <<= 1;
+ }
+ if (VT.isInteger())
+ return DAG.getConstant(Val, VT);
+ return DAG.getConstantFP(APFloat(Val), VT);
+ }
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
+ unsigned Shift = 8;
+ for (unsigned i = NumBits; i > 8; i >>= 1) {
+ Value = DAG.getNode(ISD::OR, dl, VT,
+ DAG.getNode(ISD::SHL, dl, VT, Value,
+ DAG.getConstant(Shift,
+ TLI.getShiftAmountTy())),
+ Value);
+ Shift <<= 1;
+ }
+
+ return Value;
+}
+
+/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
+/// used when a memcpy is turned into a memset when the source is a constant
+/// string ptr.
+static SDValue getMemsetStringVal(MVT VT, DebugLoc dl, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ std::string &Str, unsigned Offset) {
+ // Handle vector with all elements zero.
+ if (Str.empty()) {
+ if (VT.isInteger())
+ return DAG.getConstant(0, VT);
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ DAG.getConstant(0, MVT::getVectorVT(EltVT, NumElts)));
+ }
+
+ assert(!VT.isVector() && "Can't handle vector type here!");
+ unsigned NumBits = VT.getSizeInBits();
+ unsigned MSB = NumBits / 8;
+ uint64_t Val = 0;
+ if (TLI.isLittleEndian())
+ Offset = Offset + MSB - 1;
+ for (unsigned i = 0; i != MSB; ++i) {
+ Val = (Val << 8) | (unsigned char)Str[Offset];
+ Offset += TLI.isLittleEndian() ? -1 : 1;
+ }
+ return DAG.getConstant(Val, VT);
+}
+
+/// getMemBasePlusOffset - Returns base and offset node for the
+///
+static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset,
+ SelectionDAG &DAG) {
+ MVT VT = Base.getValueType();
+ return DAG.getNode(ISD::ADD, Base.getDebugLoc(),
+ VT, Base, DAG.getConstant(Offset, VT));
+}
+
+/// isMemSrcFromString - Returns true if memcpy source is a string constant.
+///
+static bool isMemSrcFromString(SDValue Src, std::string &Str) {
+ unsigned SrcDelta = 0;
+ GlobalAddressSDNode *G = NULL;
+ if (Src.getOpcode() == ISD::GlobalAddress)
+ G = cast<GlobalAddressSDNode>(Src);
+ else if (Src.getOpcode() == ISD::ADD &&
+ Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
+ Src.getOperand(1).getOpcode() == ISD::Constant) {
+ G = cast<GlobalAddressSDNode>(Src.getOperand(0));
+ SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue();
+ }
+ if (!G)
+ return false;
+
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
+ if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false))
+ return true;
+
+ return false;
+}
+
+/// MeetsMaxMemopRequirement - Determines if the number of memory ops required
+/// to replace the memset / memcpy is below the threshold. It also returns the
+/// types of the sequence of memory ops to perform memset / memcpy.
+static
+bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps,
+ SDValue Dst, SDValue Src,
+ unsigned Limit, uint64_t Size, unsigned &Align,
+ std::string &Str, bool &isSrcStr,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ isSrcStr = isMemSrcFromString(Src, Str);
+ bool isSrcConst = isa<ConstantSDNode>(Src);
+ bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses();
+ MVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr);
+ if (VT != MVT::iAny) {
+ unsigned NewAlign = (unsigned)
+ TLI.getTargetData()->getABITypeAlignment(VT.getTypeForMVT());
+ // If source is a string constant, this will require an unaligned load.
+ if (NewAlign > Align && (isSrcConst || AllowUnalign)) {
+ if (Dst.getOpcode() != ISD::FrameIndex) {
+ // Can't change destination alignment. It requires a unaligned store.
+ if (AllowUnalign)
+ VT = MVT::iAny;
+ } else {
+ int FI = cast<FrameIndexSDNode>(Dst)->getIndex();
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ if (MFI->isFixedObjectIndex(FI)) {
+ // Can't change destination alignment. It requires a unaligned store.
+ if (AllowUnalign)
+ VT = MVT::iAny;
+ } else {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI->getObjectAlignment(FI) < NewAlign)
+ MFI->setObjectAlignment(FI, NewAlign);
+ Align = NewAlign;
+ }
+ }
+ }
+ }
+
+ if (VT == MVT::iAny) {
+ if (AllowUnalign) {
+ VT = MVT::i64;
+ } else {
+ switch (Align & 7) {
+ case 0: VT = MVT::i64; break;
+ case 4: VT = MVT::i32; break;
+ case 2: VT = MVT::i16; break;
+ default: VT = MVT::i8; break;
+ }
+ }
+
+ MVT LVT = MVT::i64;
+ while (!TLI.isTypeLegal(LVT))
+ LVT = (MVT::SimpleValueType)(LVT.getSimpleVT() - 1);
+ assert(LVT.isInteger());
+
+ if (VT.bitsGT(LVT))
+ VT = LVT;
+ }
+
+ unsigned NumMemOps = 0;
+ while (Size != 0) {
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ while (VTSize > Size) {
+ // For now, only use non-vector load / store's for the left-over pieces.
+ if (VT.isVector()) {
+ VT = MVT::i64;
+ while (!TLI.isTypeLegal(VT))
+ VT = (MVT::SimpleValueType)(VT.getSimpleVT() - 1);
+ VTSize = VT.getSizeInBits() / 8;
+ } else {
+ VT = (MVT::SimpleValueType)(VT.getSimpleVT() - 1);
+ VTSize >>= 1;
+ }
+ }
+
+ if (++NumMemOps > Limit)
+ return false;
+ MemOps.push_back(VT);
+ Size -= VTSize;
+ }
+
+ return true;
+}
+
+static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size,
+ unsigned Align, bool AlwaysInline,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff){
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Expand memcpy to a series of load and store ops if the size operand falls
+ // below a certain threshold.
+ std::vector<MVT> MemOps;
+ uint64_t Limit = -1ULL;
+ if (!AlwaysInline)
+ Limit = TLI.getMaxStoresPerMemcpy();
+ unsigned DstAlign = Align; // Destination alignment can change.
+ std::string Str;
+ bool CopyFromStr;
+ if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
+ Str, CopyFromStr, DAG, TLI))
+ return SDValue();
+
+
+ bool isZeroStr = CopyFromStr && Str.empty();
+ SmallVector<SDValue, 8> OutChains;
+ unsigned NumMemOps = MemOps.size();
+ uint64_t SrcOff = 0, DstOff = 0;
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ MVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value, Store;
+
+ if (CopyFromStr && (isZeroStr || !VT.isVector())) {
+ // It's unlikely a store of a vector immediate can be done in a single
+ // instruction. It would require a load from a constantpool first.
+ // We also handle store a vector with all zero's.
+ // FIXME: Handle other cases where store of vector immediate is done in
+ // a single instruction.
+ Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
+ Store = DAG.getStore(Chain, dl, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstSV, DstSVOff + DstOff, false, DstAlign);
+ } else {
+ Value = DAG.getLoad(VT, dl, Chain,
+ getMemBasePlusOffset(Src, SrcOff, DAG),
+ SrcSV, SrcSVOff + SrcOff, false, Align);
+ Store = DAG.getStore(Chain, dl, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstSV, DstSVOff + DstOff, false, DstAlign);
+ }
+ OutChains.push_back(Store);
+ SrcOff += VTSize;
+ DstOff += VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
+static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size,
+ unsigned Align, bool AlwaysInline,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff){
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Expand memmove to a series of load and store ops if the size operand falls
+ // below a certain threshold.
+ std::vector<MVT> MemOps;
+ uint64_t Limit = -1ULL;
+ if (!AlwaysInline)
+ Limit = TLI.getMaxStoresPerMemmove();
+ unsigned DstAlign = Align; // Destination alignment can change.
+ std::string Str;
+ bool CopyFromStr;
+ if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
+ Str, CopyFromStr, DAG, TLI))
+ return SDValue();
+
+ uint64_t SrcOff = 0, DstOff = 0;
+
+ SmallVector<SDValue, 8> LoadValues;
+ SmallVector<SDValue, 8> LoadChains;
+ SmallVector<SDValue, 8> OutChains;
+ unsigned NumMemOps = MemOps.size();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ MVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value, Store;
+
+ Value = DAG.getLoad(VT, dl, Chain,
+ getMemBasePlusOffset(Src, SrcOff, DAG),
+ SrcSV, SrcSVOff + SrcOff, false, Align);
+ LoadValues.push_back(Value);
+ LoadChains.push_back(Value.getValue(1));
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &LoadChains[0], LoadChains.size());
+ OutChains.clear();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ MVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value, Store;
+
+ Store = DAG.getStore(Chain, dl, LoadValues[i],
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstSV, DstSVOff + DstOff, false, DstAlign);
+ OutChains.push_back(Store);
+ DstOff += VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
+static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size,
+ unsigned Align,
+ const Value *DstSV, uint64_t DstSVOff) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Expand memset to a series of load/store ops if the size operand
+ // falls below a certain threshold.
+ std::vector<MVT> MemOps;
+ std::string Str;
+ bool CopyFromStr;
+ if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(),
+ Size, Align, Str, CopyFromStr, DAG, TLI))
+ return SDValue();
+
+ SmallVector<SDValue, 8> OutChains;
+ uint64_t DstOff = 0;
+
+ unsigned NumMemOps = MemOps.size();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ MVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value = getMemsetValue(Src, VT, DAG, dl);
+ SDValue Store = DAG.getStore(Chain, dl, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstSV, DstSVOff + DstOff);
+ OutChains.push_back(Store);
+ DstOff += VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
+SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align, bool AlwaysInline,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff) {
+
+ // Check to see if we should lower the memcpy to loads and stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memcpy with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result =
+ getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(),
+ Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memcpy with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDValue Result =
+ TLI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
+ AlwaysInline,
+ DstSV, DstSVOff, SrcSV, SrcSVOff);
+ if (Result.getNode())
+ return Result;
+
+ // If we really need inline code and the target declined to provide it,
+ // use a (potentially long) sequence of loads and stores.
+ if (AlwaysInline) {
+ assert(ConstantSize && "AlwaysInline requires a constant size!");
+ return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(), Align, true,
+ DstSV, DstSVOff, SrcSV, SrcSVOff);
+ }
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = TLI.getTargetData()->getIntPtrType();
+ Entry.Node = Dst; Args.push_back(Entry);
+ Entry.Node = Src; Args.push_back(Entry);
+ Entry.Node = Size; Args.push_back(Entry);
+ // FIXME: pass in DebugLoc
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(Chain, Type::VoidTy,
+ false, false, false, false, CallingConv::C, false,
+ getExternalSymbol("memcpy", TLI.getPointerTy()),
+ Args, *this, dl);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff) {
+
+ // Check to see if we should lower the memmove to loads and stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memmove with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result =
+ getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(),
+ Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memmove with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDValue Result =
+ TLI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align,
+ DstSV, DstSVOff, SrcSV, SrcSVOff);
+ if (Result.getNode())
+ return Result;
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = TLI.getTargetData()->getIntPtrType();
+ Entry.Node = Dst; Args.push_back(Entry);
+ Entry.Node = Src; Args.push_back(Entry);
+ Entry.Node = Size; Args.push_back(Entry);
+ // FIXME: pass in DebugLoc
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(Chain, Type::VoidTy,
+ false, false, false, false, CallingConv::C, false,
+ getExternalSymbol("memmove", TLI.getPointerTy()),
+ Args, *this, dl);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align,
+ const Value *DstSV, uint64_t DstSVOff) {
+
+ // Check to see if we should lower the memset to stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memset with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result =
+ getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
+ Align, DstSV, DstSVOff);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memset with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDValue Result =
+ TLI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align,
+ DstSV, DstSVOff);
+ if (Result.getNode())
+ return Result;
+
+ // Emit a library call.
+ const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType();
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Dst; Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+ // Extend or truncate the argument to be an i32 value for the call.
+ if (Src.getValueType().bitsGT(MVT::i32))
+ Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
+ else
+ Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
+ Entry.Node = Src; Entry.Ty = Type::Int32Ty; Entry.isSExt = true;
+ Args.push_back(Entry);
+ Entry.Node = Size; Entry.Ty = IntPtrTy; Entry.isSExt = false;
+ Args.push_back(Entry);
+ // FIXME: pass in DebugLoc
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(Chain, Type::VoidTy,
+ false, false, false, false, CallingConv::C, false,
+ getExternalSymbol("memset", TLI.getPointerTy()),
+ Args, *this, dl);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT,
+ SDValue Chain,
+ SDValue Ptr, SDValue Cmp,
+ SDValue Swp, const Value* PtrVal,
+ unsigned Alignment) {
+ assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op");
+ assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
+
+ MVT VT = Cmp.getValueType();
+
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getMVTAlignment(MemVT);
+
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ FoldingSetNodeID ID;
+ ID.AddInteger(MemVT.getRawBits());
+ SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 4);
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();
+ new (N) AtomicSDNode(Opcode, dl, VTs, MemVT,
+ Chain, Ptr, Cmp, Swp, PtrVal, Alignment);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT,
+ SDValue Chain,
+ SDValue Ptr, SDValue Val,
+ const Value* PtrVal,
+ unsigned Alignment) {
+ assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
+ Opcode == ISD::ATOMIC_LOAD_SUB ||
+ Opcode == ISD::ATOMIC_LOAD_AND ||
+ Opcode == ISD::ATOMIC_LOAD_OR ||
+ Opcode == ISD::ATOMIC_LOAD_XOR ||
+ Opcode == ISD::ATOMIC_LOAD_NAND ||
+ Opcode == ISD::ATOMIC_LOAD_MIN ||
+ Opcode == ISD::ATOMIC_LOAD_MAX ||
+ Opcode == ISD::ATOMIC_LOAD_UMIN ||
+ Opcode == ISD::ATOMIC_LOAD_UMAX ||
+ Opcode == ISD::ATOMIC_SWAP) &&
+ "Invalid Atomic Op");
+
+ MVT VT = Val.getValueType();
+
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getMVTAlignment(MemVT);
+
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ FoldingSetNodeID ID;
+ ID.AddInteger(MemVT.getRawBits());
+ SDValue Ops[] = {Chain, Ptr, Val};
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();
+ new (N) AtomicSDNode(Opcode, dl, VTs, MemVT,
+ Chain, Ptr, Val, PtrVal, Alignment);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+/// getMergeValues - Create a MERGE_VALUES node from the given operands.
+/// Allowed to return something different (and simpler) if Simplify is true.
+SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps,
+ DebugLoc dl) {
+ if (NumOps == 1)
+ return Ops[0];
+
+ SmallVector<MVT, 4> VTs;
+ VTs.reserve(NumOps);
+ for (unsigned i = 0; i < NumOps; ++i)
+ VTs.push_back(Ops[i].getValueType());
+ return getNode(ISD::MERGE_VALUES, dl, getVTList(&VTs[0], NumOps),
+ Ops, NumOps);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,
+ const MVT *VTs, unsigned NumVTs,
+ const SDValue *Ops, unsigned NumOps,
+ MVT MemVT, const Value *srcValue, int SVOff,
+ unsigned Align, bool Vol,
+ bool ReadMem, bool WriteMem) {
+ return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps,
+ MemVT, srcValue, SVOff, Align, Vol,
+ ReadMem, WriteMem);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps,
+ MVT MemVT, const Value *srcValue, int SVOff,
+ unsigned Align, bool Vol,
+ bool ReadMem, bool WriteMem) {
+ // Memoize the node unless it returns a flag.
+ MemIntrinsicSDNode *N;
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ N = NodeAllocator.Allocate<MemIntrinsicSDNode>();
+ new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT,
+ srcValue, SVOff, Align, Vol, ReadMem, WriteMem);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = NodeAllocator.Allocate<MemIntrinsicSDNode>();
+ new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT,
+ srcValue, SVOff, Align, Vol, ReadMem, WriteMem);
+ }
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue
+SelectionDAG::getCall(unsigned CallingConv, DebugLoc dl, bool IsVarArgs,
+ bool IsTailCall, bool IsInreg, SDVTList VTs,
+ const SDValue *Operands, unsigned NumOperands) {
+ // Do not include isTailCall in the folding set profile.
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::CALL, VTs, Operands, NumOperands);
+ ID.AddInteger(CallingConv);
+ ID.AddInteger(IsVarArgs);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ // Instead of including isTailCall in the folding set, we just
+ // set the flag of the existing node.
+ if (!IsTailCall)
+ cast<CallSDNode>(E)->setNotTailCall();
+ return SDValue(E, 0);
+ }
+ SDNode *N = NodeAllocator.Allocate<CallSDNode>();
+ new (N) CallSDNode(CallingConv, dl, IsVarArgs, IsTailCall, IsInreg,
+ VTs, Operands, NumOperands);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
+ ISD::LoadExtType ExtType, MVT VT, SDValue Chain,
+ SDValue Ptr, SDValue Offset,
+ const Value *SV, int SVOffset, MVT EVT,
+ bool isVolatile, unsigned Alignment) {
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getMVTAlignment(VT);
+
+ if (VT == EVT) {
+ ExtType = ISD::NON_EXTLOAD;
+ } else if (ExtType == ISD::NON_EXTLOAD) {
+ assert(VT == EVT && "Non-extending load from different memory type!");
+ } else {
+ // Extending load.
+ if (VT.isVector())
+ assert(EVT.getVectorNumElements() == VT.getVectorNumElements() &&
+ "Invalid vector extload!");
+ else
+ assert(EVT.bitsLT(VT) &&
+ "Should only be an extending load, not truncating!");
+ assert((ExtType == ISD::EXTLOAD || VT.isInteger()) &&
+ "Cannot sign/zero extend a FP/Vector load!");
+ assert(VT.isInteger() == EVT.isInteger() &&
+ "Cannot convert from FP to Int or Int -> FP!");
+ }
+
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.getOpcode() == ISD::UNDEF) &&
+ "Unindexed load with an offset!");
+
+ SDVTList VTs = Indexed ?
+ getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other);
+ SDValue Ops[] = { Chain, Ptr, Offset };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
+ ID.AddInteger(EVT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, isVolatile, Alignment));
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<LoadSDNode>();
+ new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, EVT, SV, SVOffset,
+ Alignment, isVolatile);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getLoad(MVT VT, DebugLoc dl,
+ SDValue Chain, SDValue Ptr,
+ const Value *SV, int SVOffset,
+ bool isVolatile, unsigned Alignment) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, dl, ISD::NON_EXTLOAD, VT, Chain, Ptr, Undef,
+ SV, SVOffset, VT, isVolatile, Alignment);
+}
+
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, MVT VT,
+ SDValue Chain, SDValue Ptr,
+ const Value *SV,
+ int SVOffset, MVT EVT,
+ bool isVolatile, unsigned Alignment) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef,
+ SV, SVOffset, EVT, isVolatile, Alignment);
+}
+
+SDValue
+SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
+ SDValue Offset, ISD::MemIndexedMode AM) {
+ LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);
+ assert(LD->getOffset().getOpcode() == ISD::UNDEF &&
+ "Load is already a indexed load!");
+ return getLoad(AM, dl, LD->getExtensionType(), OrigLoad.getValueType(),
+ LD->getChain(), Base, Offset, LD->getSrcValue(),
+ LD->getSrcValueOffset(), LD->getMemoryVT(),
+ LD->isVolatile(), LD->getAlignment());
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
+ SDValue Ptr, const Value *SV, int SVOffset,
+ bool isVolatile, unsigned Alignment) {
+ MVT VT = Val.getValueType();
+
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getMVTAlignment(VT);
+
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = { Chain, Val, Ptr, Undef };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED,
+ isVolatile, Alignment));
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
+ new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false,
+ VT, SV, SVOffset, Alignment, isVolatile);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
+ SDValue Ptr, const Value *SV,
+ int SVOffset, MVT SVT,
+ bool isVolatile, unsigned Alignment) {
+ MVT VT = Val.getValueType();
+
+ if (VT == SVT)
+ return getStore(Chain, dl, Val, Ptr, SV, SVOffset, isVolatile, Alignment);
+
+ assert(VT.bitsGT(SVT) && "Not a truncation?");
+ assert(VT.isInteger() == SVT.isInteger() &&
+ "Can't do FP-INT conversion!");
+
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getMVTAlignment(VT);
+
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = { Chain, Val, Ptr, Undef };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ ID.AddInteger(SVT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED,
+ isVolatile, Alignment));
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
+ new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true,
+ SVT, SV, SVOffset, Alignment, isVolatile);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue
+SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base,
+ SDValue Offset, ISD::MemIndexedMode AM) {
+ StoreSDNode *ST = cast<StoreSDNode>(OrigStore);
+ assert(ST->getOffset().getOpcode() == ISD::UNDEF &&
+ "Store is already a indexed store!");
+ SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
+ SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ ID.AddInteger(ST->getMemoryVT().getRawBits());
+ ID.AddInteger(ST->getRawSubclassData());
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
+ new (N) StoreSDNode(Ops, dl, VTs, AM,
+ ST->isTruncatingStore(), ST->getMemoryVT(),
+ ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->getAlignment(), ST->isVolatile());
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getVAArg(MVT VT, DebugLoc dl,
+ SDValue Chain, SDValue Ptr,
+ SDValue SV) {
+ SDValue Ops[] = { Chain, Ptr, SV };
+ return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 3);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+ const SDUse *Ops, unsigned NumOps) {
+ switch (NumOps) {
+ case 0: return getNode(Opcode, DL, VT);
+ case 1: return getNode(Opcode, DL, VT, Ops[0]);
+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+ case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+ default: break;
+ }
+
+ // Copy from an SDUse array into an SDValue array for use with
+ // the regular getNode logic.
+ SmallVector<SDValue, 8> NewOps(Ops, Ops + NumOps);
+ return getNode(Opcode, DL, VT, &NewOps[0], NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+ const SDValue *Ops, unsigned NumOps) {
+ switch (NumOps) {
+ case 0: return getNode(Opcode, DL, VT);
+ case 1: return getNode(Opcode, DL, VT, Ops[0]);
+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+ case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+ default: break;
+ }
+
+ switch (Opcode) {
+ default: break;
+ case ISD::SELECT_CC: {
+ assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
+ assert(Ops[0].getValueType() == Ops[1].getValueType() &&
+ "LHS and RHS of condition must have same type!");
+ assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+ "True and False arms of SelectCC must have same type!");
+ assert(Ops[2].getValueType() == VT &&
+ "select_cc node must be of same type as true and false value!");
+ break;
+ }
+ case ISD::BR_CC: {
+ assert(NumOps == 5 && "BR_CC takes 5 operands!");
+ assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+ "LHS/RHS of comparison should match types!");
+ break;
+ }
+ }
+
+ // Memoize nodes.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+
+ if (VT != MVT::Flag) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps);
+ void *IP = 0;
+
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ N = NodeAllocator.Allocate<SDNode>();
+ new (N) SDNode(Opcode, DL, VTs, Ops, NumOps);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = NodeAllocator.Allocate<SDNode>();
+ new (N) SDNode(Opcode, DL, VTs, Ops, NumOps);
+ }
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifyNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+ const std::vector<MVT> &ResultTys,
+ const SDValue *Ops, unsigned NumOps) {
+ return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()),
+ Ops, NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+ const MVT *VTs, unsigned NumVTs,
+ const SDValue *Ops, unsigned NumOps) {
+ if (NumVTs == 1)
+ return getNode(Opcode, DL, VTs[0], Ops, NumOps);
+ return getNode(Opcode, DL, makeVTList(VTs, NumVTs), Ops, NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps) {
+ if (VTList.NumVTs == 1)
+ return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps);
+
+ switch (Opcode) {
+ // FIXME: figure out how to safely handle things like
+ // int foo(int x) { return 1 << (x & 255); }
+ // int bar() { return foo(256); }
+#if 0
+ case ISD::SRA_PARTS:
+ case ISD::SRL_PARTS:
+ case ISD::SHL_PARTS:
+ if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1)
+ return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+ else if (N3.getOpcode() == ISD::AND)
+ if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) {
+ // If the and is only masking out bits that cannot effect the shift,
+ // eliminate the and.
+ unsigned NumBits = VT.getSizeInBits()*2;
+ if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
+ return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+ }
+ break;
+#endif
+ }
+
+ // Memoize the node unless it returns a flag.
+ SDNode *N;
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ if (NumOps == 1) {
+ N = NodeAllocator.Allocate<UnarySDNode>();
+ new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+ } else if (NumOps == 2) {
+ N = NodeAllocator.Allocate<BinarySDNode>();
+ new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+ } else if (NumOps == 3) {
+ N = NodeAllocator.Allocate<TernarySDNode>();
+ new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]);
+ } else {
+ N = NodeAllocator.Allocate<SDNode>();
+ new (N) SDNode(Opcode, DL, VTList, Ops, NumOps);
+ }
+ CSEMap.InsertNode(N, IP);
+ } else {
+ if (NumOps == 1) {
+ N = NodeAllocator.Allocate<UnarySDNode>();
+ new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+ } else if (NumOps == 2) {
+ N = NodeAllocator.Allocate<BinarySDNode>();
+ new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+ } else if (NumOps == 3) {
+ N = NodeAllocator.Allocate<TernarySDNode>();
+ new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]);
+ } else {
+ N = NodeAllocator.Allocate<SDNode>();
+ new (N) SDNode(Opcode, DL, VTList, Ops, NumOps);
+ }
+ }
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifyNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList) {
+ return getNode(Opcode, DL, VTList, 0, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1) {
+ SDValue Ops[] = { N1 };
+ return getNode(Opcode, DL, VTList, Ops, 1);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2) {
+ SDValue Ops[] = { N1, N2 };
+ return getNode(Opcode, DL, VTList, Ops, 2);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3) {
+ SDValue Ops[] = { N1, N2, N3 };
+ return getNode(Opcode, DL, VTList, Ops, 3);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4) {
+ SDValue Ops[] = { N1, N2, N3, N4 };
+ return getNode(Opcode, DL, VTList, Ops, 4);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4, SDValue N5) {
+ SDValue Ops[] = { N1, N2, N3, N4, N5 };
+ return getNode(Opcode, DL, VTList, Ops, 5);
+}
+
+SDVTList SelectionDAG::getVTList(MVT VT) {
+ return makeVTList(SDNode::getValueTypeList(VT), 1);
+}
+
+SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2) {
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I)
+ if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2)
+ return *I;
+
+ MVT *Array = Allocator.Allocate<MVT>(2);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ SDVTList Result = makeVTList(Array, 2);
+ VTList.push_back(Result);
+ return Result;
+}
+
+SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3) {
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I)
+ if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
+ I->VTs[2] == VT3)
+ return *I;
+
+ MVT *Array = Allocator.Allocate<MVT>(3);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Array[2] = VT3;
+ SDVTList Result = makeVTList(Array, 3);
+ VTList.push_back(Result);
+ return Result;
+}
+
+SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3, MVT VT4) {
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I)
+ if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
+ I->VTs[2] == VT3 && I->VTs[3] == VT4)
+ return *I;
+
+ MVT *Array = Allocator.Allocate<MVT>(3);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Array[2] = VT3;
+ Array[3] = VT4;
+ SDVTList Result = makeVTList(Array, 4);
+ VTList.push_back(Result);
+ return Result;
+}
+
+SDVTList SelectionDAG::getVTList(const MVT *VTs, unsigned NumVTs) {
+ switch (NumVTs) {
+ case 0: assert(0 && "Cannot have nodes without results!");
+ case 1: return getVTList(VTs[0]);
+ case 2: return getVTList(VTs[0], VTs[1]);
+ case 3: return getVTList(VTs[0], VTs[1], VTs[2]);
+ default: break;
+ }
+
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I) {
+ if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1])
+ continue;
+
+ bool NoMatch = false;
+ for (unsigned i = 2; i != NumVTs; ++i)
+ if (VTs[i] != I->VTs[i]) {
+ NoMatch = true;
+ break;
+ }
+ if (!NoMatch)
+ return *I;
+ }
+
+ MVT *Array = Allocator.Allocate<MVT>(NumVTs);
+ std::copy(VTs, VTs+NumVTs, Array);
+ SDVTList Result = makeVTList(Array, NumVTs);
+ VTList.push_back(Result);
+ return Result;
+}
+
+
+/// UpdateNodeOperands - *Mutate* the specified node in-place to have the
+/// specified operands. If the resultant node already exists in the DAG,
+/// this does not modify the specified node, instead it returns the node that
+/// already exists. If the resultant node does not exist in the DAG, the
+/// input node is returned. As a degenerate case, if you specify the same
+/// input operands as the node already has, the input node is returned.
+SDValue SelectionDAG::UpdateNodeOperands(SDValue InN, SDValue Op) {
+ SDNode *N = InN.getNode();
+ assert(N->getNumOperands() == 1 && "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ if (Op == N->getOperand(0)) return InN;
+
+ // See if the modified node already exists.
+ void *InsertPos = 0;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos))
+ return SDValue(Existing, InN.getResNo());
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = 0;
+
+ // Now we update the operands.
+ N->OperandList[0].set(Op);
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return InN;
+}
+
+SDValue SelectionDAG::
+UpdateNodeOperands(SDValue InN, SDValue Op1, SDValue Op2) {
+ SDNode *N = InN.getNode();
+ assert(N->getNumOperands() == 2 && "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1))
+ return InN; // No operands changed, just return the input node.
+
+ // See if the modified node already exists.
+ void *InsertPos = 0;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos))
+ return SDValue(Existing, InN.getResNo());
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = 0;
+
+ // Now we update the operands.
+ if (N->OperandList[0] != Op1)
+ N->OperandList[0].set(Op1);
+ if (N->OperandList[1] != Op2)
+ N->OperandList[1].set(Op2);
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return InN;
+}
+
+SDValue SelectionDAG::
+UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, SDValue Op3) {
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return UpdateNodeOperands(N, Ops, 3);
+}
+
+SDValue SelectionDAG::
+UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2,
+ SDValue Op3, SDValue Op4) {
+ SDValue Ops[] = { Op1, Op2, Op3, Op4 };
+ return UpdateNodeOperands(N, Ops, 4);
+}
+
+SDValue SelectionDAG::
+UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2,
+ SDValue Op3, SDValue Op4, SDValue Op5) {
+ SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 };
+ return UpdateNodeOperands(N, Ops, 5);
+}
+
+SDValue SelectionDAG::
+UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) {
+ SDNode *N = InN.getNode();
+ assert(N->getNumOperands() == NumOps &&
+ "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ bool AnyChange = false;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (Ops[i] != N->getOperand(i)) {
+ AnyChange = true;
+ break;
+ }
+ }
+
+ // No operands changed, just return the input node.
+ if (!AnyChange) return InN;
+
+ // See if the modified node already exists.
+ void *InsertPos = 0;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos))
+ return SDValue(Existing, InN.getResNo());
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = 0;
+
+ // Now we update the operands.
+ for (unsigned i = 0; i != NumOps; ++i)
+ if (N->OperandList[i] != Ops[i])
+ N->OperandList[i].set(Ops[i]);
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return InN;
+}
+
+/// DropOperands - Release the operands and set this node to have
+/// zero operands.
+void SDNode::DropOperands() {
+ // Unlike the code in MorphNodeTo that does this, we don't need to
+ // watch for dead nodes here.
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ Use.set(SDValue());
+ }
+}
+
+/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a
+/// machine opcode.
+///
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT) {
+ SDVTList VTs = getVTList(VT);
+ return SelectNodeTo(N, MachineOpc, VTs, 0, 0);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT, SDValue Op1) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT, SDValue Op1,
+ SDValue Op2) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT, SDValue Op1,
+ SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT, const SDValue *Ops,
+ unsigned NumOps) {
+ SDVTList VTs = getVTList(VT);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT1, MVT VT2, const SDValue *Ops,
+ unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT1, MVT VT2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return SelectNodeTo(N, MachineOpc, VTs, (SDValue *)0, 0);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT1, MVT VT2, MVT VT3,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT1, MVT VT2, MVT VT3, MVT VT4,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT1, MVT VT2,
+ SDValue Op1) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT1, MVT VT2,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT1, MVT VT2,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT1, MVT VT2, MVT VT3,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ SDVTList VTs, const SDValue *Ops,
+ unsigned NumOps) {
+ return MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ MVT VT) {
+ SDVTList VTs = getVTList(VT);
+ return MorphNodeTo(N, Opc, VTs, 0, 0);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ MVT VT, SDValue Op1) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1 };
+ return MorphNodeTo(N, Opc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ MVT VT, SDValue Op1,
+ SDValue Op2) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2 };
+ return MorphNodeTo(N, Opc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ MVT VT, SDValue Op1,
+ SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return MorphNodeTo(N, Opc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ MVT VT, const SDValue *Ops,
+ unsigned NumOps) {
+ SDVTList VTs = getVTList(VT);
+ return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ MVT VT1, MVT VT2, const SDValue *Ops,
+ unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ MVT VT1, MVT VT2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return MorphNodeTo(N, Opc, VTs, (SDValue *)0, 0);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ MVT VT1, MVT VT2, MVT VT3,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ MVT VT1, MVT VT2,
+ SDValue Op1) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1 };
+ return MorphNodeTo(N, Opc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ MVT VT1, MVT VT2,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2 };
+ return MorphNodeTo(N, Opc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ MVT VT1, MVT VT2,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return MorphNodeTo(N, Opc, VTs, Ops, 3);
+}
+
+/// MorphNodeTo - These *mutate* the specified node to have the specified
+/// return type, opcode, and operands.
+///
+/// Note that MorphNodeTo returns the resultant node. If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one. Note that the DebugLoc need not be the same.
+///
+/// Using MorphNodeTo is faster than creating a new node and swapping it in
+/// with ReplaceAllUsesWith both because it often avoids allocating a new
+/// node, and because it doesn't require CSE recalculation for any of
+/// the node's users.
+///
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ SDVTList VTs, const SDValue *Ops,
+ unsigned NumOps) {
+ // If an identical node already exists, use it.
+ void *IP = 0;
+ if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
+ if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return ON;
+ }
+
+ if (!RemoveNodeFromCSEMaps(N))
+ IP = 0;
+
+ // Start the morphing.
+ N->NodeType = Opc;
+ N->ValueList = VTs.VTs;
+ N->NumValues = VTs.NumVTs;
+
+ // Clear the operands list, updating used nodes to remove this from their
+ // use list. Keep track of any operands that become dead as a result.
+ SmallPtrSet<SDNode*, 16> DeadNodeSet;
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ SDNode *Used = Use.getNode();
+ Use.set(SDValue());
+ if (Used->use_empty())
+ DeadNodeSet.insert(Used);
+ }
+
+ // If NumOps is larger than the # of operands we currently have, reallocate
+ // the operand list.
+ if (NumOps > N->NumOperands) {
+ if (N->OperandsNeedDelete)
+ delete[] N->OperandList;
+
+ if (N->isMachineOpcode()) {
+ // We're creating a final node that will live unmorphed for the
+ // remainder of the current SelectionDAG iteration, so we can allocate
+ // the operands directly out of a pool with no recycling metadata.
+ N->OperandList = OperandAllocator.Allocate<SDUse>(NumOps);
+ N->OperandsNeedDelete = false;
+ } else {
+ N->OperandList = new SDUse[NumOps];
+ N->OperandsNeedDelete = true;
+ }
+ }
+
+ // Assign the new operands.
+ N->NumOperands = NumOps;
+ for (unsigned i = 0, e = NumOps; i != e; ++i) {
+ N->OperandList[i].setUser(N);
+ N->OperandList[i].setInitial(Ops[i]);
+ }
+
+ // Delete any nodes that are still dead after adding the uses for the
+ // new operands.
+ SmallVector<SDNode *, 16> DeadNodes;
+ for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(),
+ E = DeadNodeSet.end(); I != E; ++I)
+ if ((*I)->use_empty())
+ DeadNodes.push_back(*I);
+ RemoveDeadNodes(DeadNodes);
+
+ if (IP)
+ CSEMap.InsertNode(N, IP); // Memoize the new node.
+ return N;
+}
+
+
+/// getTargetNode - These are used for target selectors to create a new node
+/// with specified return type(s), target opcode, and operands.
+///
+/// Note that getTargetNode returns the resultant node. If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one.
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT) {
+ return getNode(~Opcode, dl, VT).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT,
+ SDValue Op1) {
+ return getNode(~Opcode, dl, VT, Op1).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT,
+ SDValue Op1, SDValue Op2) {
+ return getNode(~Opcode, dl, VT, Op1, Op2).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ return getNode(~Opcode, dl, VT, Op1, Op2, Op3).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT,
+ const SDValue *Ops, unsigned NumOps) {
+ return getNode(~Opcode, dl, VT, Ops, NumOps).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
+ MVT VT1, MVT VT2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Op;
+ return getNode(~Opcode, dl, VTs, &Op, 0).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1,
+ MVT VT2, SDValue Op1) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return getNode(~Opcode, dl, VTs, &Op1, 1).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1,
+ MVT VT2, SDValue Op1,
+ SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2 };
+ return getNode(~Opcode, dl, VTs, Ops, 2).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1,
+ MVT VT2, SDValue Op1,
+ SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getNode(~Opcode, dl, VTs, Ops, 3).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
+ MVT VT1, MVT VT2,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
+ MVT VT1, MVT VT2, MVT VT3,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2 };
+ return getNode(~Opcode, dl, VTs, Ops, 2).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
+ MVT VT1, MVT VT2, MVT VT3,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getNode(~Opcode, dl, VTs, Ops, 3).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
+ MVT VT1, MVT VT2, MVT VT3,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1,
+ MVT VT2, MVT VT3, MVT VT4,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
+ return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
+ const std::vector<MVT> &ResultTys,
+ const SDValue *Ops, unsigned NumOps) {
+ return getNode(~Opcode, dl, ResultTys, Ops, NumOps).getNode();
+}
+
+/// getNodeIfExists - Get the specified node if it's already available, or
+/// else return NULL.
+SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps) {
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return E;
+ }
+ return NULL;
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes From has a single result value.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
+ DAGUpdateListener *UpdateListener) {
+ SDNode *From = FromN.getNode();
+ assert(From->getNumValues() == 1 && FromN.getResNo() == 0 &&
+ "Cannot replace with this method!");
+ assert(From != To.getNode() && "Cannot replace uses of with self");
+
+ // Iterate over all the existing uses of From. New uses will be added
+ // to the beginning of the use list, which we avoid visiting.
+ // This specifically avoids visiting uses of From that arise while the
+ // replacement is happening, because any such uses would be the result
+ // of CSE: If an existing node looks like From after one of its operands
+ // is replaced by To, we don't want to replace of all its users with To
+ // too. See PR3018 for more info.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ ++UI;
+ Use.set(To);
+ } while (UI != UE && *UI == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User, UpdateListener);
+ }
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes that for each value of From, there is a
+/// corresponding value in To in the same position with the same type.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
+ DAGUpdateListener *UpdateListener) {
+#ifndef NDEBUG
+ for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
+ assert((!From->hasAnyUseOfValue(i) ||
+ From->getValueType(i) == To->getValueType(i)) &&
+ "Cannot use this version of ReplaceAllUsesWith!");
+#endif
+
+ // Handle the trivial case.
+ if (From == To)
+ return;
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ ++UI;
+ Use.setNode(To);
+ } while (UI != UE && *UI == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User, UpdateListener);
+ }
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version can replace From with any result values. To must match the
+/// number and types of values returned by From.
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
+ const SDValue *To,
+ DAGUpdateListener *UpdateListener) {
+ if (From->getNumValues() == 1) // Handle the simple case efficiently.
+ return ReplaceAllUsesWith(SDValue(From, 0), To[0], UpdateListener);
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ const SDValue &ToOp = To[Use.getResNo()];
+ ++UI;
+ Use.set(ToOp);
+ } while (UI != UE && *UI == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User, UpdateListener);
+ }
+}
+
+/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone. The Deleted
+/// vector is handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
+ DAGUpdateListener *UpdateListener){
+ // Handle the really simple, really trivial case efficiently.
+ if (From == To) return;
+
+ // Handle the simple, trivial, case efficiently.
+ if (From.getNode()->getNumValues() == 1) {
+ ReplaceAllUsesWith(From, To, UpdateListener);
+ return;
+ }
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From.getNode()->use_begin(),
+ UE = From.getNode()->use_end();
+ while (UI != UE) {
+ SDNode *User = *UI;
+ bool UserRemovedFromCSEMaps = false;
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+
+ // Skip uses of different values from the same node.
+ if (Use.getResNo() != From.getResNo()) {
+ ++UI;
+ continue;
+ }
+
+ // If this node hasn't been modified yet, it's still in the CSE maps,
+ // so remove its old self from the CSE maps.
+ if (!UserRemovedFromCSEMaps) {
+ RemoveNodeFromCSEMaps(User);
+ UserRemovedFromCSEMaps = true;
+ }
+
+ ++UI;
+ Use.set(To);
+ } while (UI != UE && *UI == User);
+
+ // We are iterating over all uses of the From node, so if a use
+ // doesn't use the specific value, no changes are made.
+ if (!UserRemovedFromCSEMaps)
+ continue;
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User, UpdateListener);
+ }
+}
+
+namespace {
+ /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith
+ /// to record information about a use.
+ struct UseMemo {
+ SDNode *User;
+ unsigned Index;
+ SDUse *Use;
+ };
+
+ /// operator< - Sort Memos by User.
+ bool operator<(const UseMemo &L, const UseMemo &R) {
+ return (intptr_t)L.User < (intptr_t)R.User;
+ }
+}
+
+/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone. The same value
+/// may appear in both the From and To list. The Deleted vector is
+/// handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
+ const SDValue *To,
+ unsigned Num,
+ DAGUpdateListener *UpdateListener){
+ // Handle the simple, trivial case efficiently.
+ if (Num == 1)
+ return ReplaceAllUsesOfValueWith(*From, *To, UpdateListener);
+
+ // Read up all the uses and make records of them. This helps
+ // processing new uses that are introduced during the
+ // replacement process.
+ SmallVector<UseMemo, 4> Uses;
+ for (unsigned i = 0; i != Num; ++i) {
+ unsigned FromResNo = From[i].getResNo();
+ SDNode *FromNode = From[i].getNode();
+ for (SDNode::use_iterator UI = FromNode->use_begin(),
+ E = FromNode->use_end(); UI != E; ++UI) {
+ SDUse &Use = UI.getUse();
+ if (Use.getResNo() == FromResNo) {
+ UseMemo Memo = { *UI, i, &Use };
+ Uses.push_back(Memo);
+ }
+ }
+ }
+
+ // Sort the uses, so that all the uses from a given User are together.
+ std::sort(Uses.begin(), Uses.end());
+
+ for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();
+ UseIndex != UseIndexEnd; ) {
+ // We know that this user uses some value of From. If it is the right
+ // value, update it.
+ SDNode *User = Uses[UseIndex].User;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // The Uses array is sorted, so all the uses for a given User
+ // are next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ unsigned i = Uses[UseIndex].Index;
+ SDUse &Use = *Uses[UseIndex].Use;
+ ++UseIndex;
+
+ Use.set(To[i]);
+ } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User, UpdateListener);
+ }
+}
+
+/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG
+/// based on their topological order. It returns the maximum id and a vector
+/// of the SDNodes* in assigned order by reference.
+unsigned SelectionDAG::AssignTopologicalOrder() {
+
+ unsigned DAGSize = 0;
+
+ // SortedPos tracks the progress of the algorithm. Nodes before it are
+ // sorted, nodes after it are unsorted. When the algorithm completes
+ // it is at the end of the list.
+ allnodes_iterator SortedPos = allnodes_begin();
+
+ // Visit all the nodes. Move nodes with no operands to the front of
+ // the list immediately. Annotate nodes that do have operands with their
+ // operand count. Before we do this, the Node Id fields of the nodes
+ // may contain arbitrary values. After, the Node Id fields for nodes
+ // before SortedPos will contain the topological sort index, and the
+ // Node Id fields for nodes At SortedPos and after will contain the
+ // count of outstanding operands.
+ for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
+ SDNode *N = I++;
+ unsigned Degree = N->getNumOperands();
+ if (Degree == 0) {
+ // A node with no uses, add it to the result array immediately.
+ N->setNodeId(DAGSize++);
+ allnodes_iterator Q = N;
+ if (Q != SortedPos)
+ SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));
+ ++SortedPos;
+ } else {
+ // Temporarily use the Node Id as scratch space for the degree count.
+ N->setNodeId(Degree);
+ }
+ }
+
+ // Visit all the nodes. As we iterate, moves nodes into sorted order,
+ // such that by the time the end is reached all nodes will be sorted.
+ for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) {
+ SDNode *N = I;
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDNode *P = *UI;
+ unsigned Degree = P->getNodeId();
+ --Degree;
+ if (Degree == 0) {
+ // All of P's operands are sorted, so P may sorted now.
+ P->setNodeId(DAGSize++);
+ if (P != SortedPos)
+ SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P));
+ ++SortedPos;
+ } else {
+ // Update P's outstanding operand count.
+ P->setNodeId(Degree);
+ }
+ }
+ }
+
+ assert(SortedPos == AllNodes.end() &&
+ "Topological sort incomplete!");
+ assert(AllNodes.front().getOpcode() == ISD::EntryToken &&
+ "First node in topological sort is not the entry token!");
+ assert(AllNodes.front().getNodeId() == 0 &&
+ "First node in topological sort has non-zero id!");
+ assert(AllNodes.front().getNumOperands() == 0 &&
+ "First node in topological sort has operands!");
+ assert(AllNodes.back().getNodeId() == (int)DAGSize-1 &&
+ "Last node in topologic sort has unexpected id!");
+ assert(AllNodes.back().use_empty() &&
+ "Last node in topologic sort has users!");
+ assert(DAGSize == allnodes_size() && "Node count mismatch!");
+ return DAGSize;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// SDNode Class
+//===----------------------------------------------------------------------===//
+
+HandleSDNode::~HandleSDNode() {
+ DropOperands();
+}
+
+GlobalAddressSDNode::GlobalAddressSDNode(bool isTarget, const GlobalValue *GA,
+ MVT VT, int64_t o)
+ : SDNode(isa<GlobalVariable>(GA) &&
+ cast<GlobalVariable>(GA)->isThreadLocal() ?
+ // Thread Local
+ (isTarget ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress) :
+ // Non Thread Local
+ (isTarget ? ISD::TargetGlobalAddress : ISD::GlobalAddress),
+ DebugLoc::getUnknownLoc(), getSDVTList(VT)), Offset(o) {
+ TheGlobal = const_cast<GlobalValue*>(GA);
+}
+
+MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, MVT memvt,
+ const Value *srcValue, int SVO,
+ unsigned alignment, bool vol)
+ : SDNode(Opc, dl, VTs), MemoryVT(memvt), SrcValue(srcValue), SVOffset(SVO) {
+ SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, vol, alignment);
+ assert(isPowerOf2_32(alignment) && "Alignment is not a power of 2!");
+ assert(getAlignment() == alignment && "Alignment representation error!");
+ assert(isVolatile() == vol && "Volatile representation error!");
+}
+
+MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
+ const SDValue *Ops,
+ unsigned NumOps, MVT memvt, const Value *srcValue,
+ int SVO, unsigned alignment, bool vol)
+ : SDNode(Opc, dl, VTs, Ops, NumOps),
+ MemoryVT(memvt), SrcValue(srcValue), SVOffset(SVO) {
+ SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, vol, alignment);
+ assert(isPowerOf2_32(alignment) && "Alignment is not a power of 2!");
+ assert(getAlignment() == alignment && "Alignment representation error!");
+ assert(isVolatile() == vol && "Volatile representation error!");
+}
+
+/// getMemOperand - Return a MachineMemOperand object describing the memory
+/// reference performed by this memory reference.
+MachineMemOperand MemSDNode::getMemOperand() const {
+ int Flags = 0;
+ if (isa<LoadSDNode>(this))
+ Flags = MachineMemOperand::MOLoad;
+ else if (isa<StoreSDNode>(this))
+ Flags = MachineMemOperand::MOStore;
+ else if (isa<AtomicSDNode>(this)) {
+ Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ }
+ else {
+ const MemIntrinsicSDNode* MemIntrinNode = dyn_cast<MemIntrinsicSDNode>(this);
+ assert(MemIntrinNode && "Unknown MemSDNode opcode!");
+ if (MemIntrinNode->readMem()) Flags |= MachineMemOperand::MOLoad;
+ if (MemIntrinNode->writeMem()) Flags |= MachineMemOperand::MOStore;
+ }
+
+ int Size = (getMemoryVT().getSizeInBits() + 7) >> 3;
+ if (isVolatile()) Flags |= MachineMemOperand::MOVolatile;
+
+ // Check if the memory reference references a frame index
+ const FrameIndexSDNode *FI =
+ dyn_cast<const FrameIndexSDNode>(getBasePtr().getNode());
+ if (!getSrcValue() && FI)
+ return MachineMemOperand(PseudoSourceValue::getFixedStack(FI->getIndex()),
+ Flags, 0, Size, getAlignment());
+ else
+ return MachineMemOperand(getSrcValue(), Flags, getSrcValueOffset(),
+ Size, getAlignment());
+}
+
+/// Profile - Gather unique data for the node.
+///
+void SDNode::Profile(FoldingSetNodeID &ID) const {
+ AddNodeIDNode(ID, this);
+}
+
+/// getValueTypeList - Return a pointer to the specified value type.
+///
+const MVT *SDNode::getValueTypeList(MVT VT) {
+ if (VT.isExtended()) {
+ static std::set<MVT, MVT::compareRawBits> EVTs;
+ return &(*EVTs.insert(VT).first);
+ } else {
+ static MVT VTs[MVT::LAST_VALUETYPE];
+ VTs[VT.getSimpleVT()] = VT;
+ return &VTs[VT.getSimpleVT()];
+ }
+}
+
+/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
+/// indicated value. This method ignores uses of other values defined by this
+/// operation.
+bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const {
+ assert(Value < getNumValues() && "Bad value!");
+
+ // TODO: Only iterate over uses of a given value of the node
+ for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+ if (UI.getUse().getResNo() == Value) {
+ if (NUses == 0)
+ return false;
+ --NUses;
+ }
+ }
+
+ // Found exactly the right number of uses?
+ return NUses == 0;
+}
+
+
+/// hasAnyUseOfValue - Return true if there are any use of the indicated
+/// value. This method ignores uses of other values defined by this operation.
+bool SDNode::hasAnyUseOfValue(unsigned Value) const {
+ assert(Value < getNumValues() && "Bad value!");
+
+ for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI)
+ if (UI.getUse().getResNo() == Value)
+ return true;
+
+ return false;
+}
+
+
+/// isOnlyUserOf - Return true if this node is the only use of N.
+///
+bool SDNode::isOnlyUserOf(SDNode *N) const {
+ bool Seen = false;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDNode *User = *I;
+ if (User == this)
+ Seen = true;
+ else
+ return false;
+ }
+
+ return Seen;
+}
+
+/// isOperand - Return true if this node is an operand of N.
+///
+bool SDValue::isOperandOf(SDNode *N) const {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (*this == N->getOperand(i))
+ return true;
+ return false;
+}
+
+bool SDNode::isOperandOf(SDNode *N) const {
+ for (unsigned i = 0, e = N->NumOperands; i != e; ++i)
+ if (this == N->OperandList[i].getNode())
+ return true;
+ return false;
+}
+
+/// reachesChainWithoutSideEffects - Return true if this operand (which must
+/// be a chain) reaches the specified operand without crossing any
+/// side-effecting instructions. In practice, this looks through token
+/// factors and non-volatile loads. In order to remain efficient, this only
+/// looks a couple of nodes in, it does not do an exhaustive search.
+bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
+ unsigned Depth) const {
+ if (*this == Dest) return true;
+
+ // Don't search too deeply, we just want to be able to see through
+ // TokenFactor's etc.
+ if (Depth == 0) return false;
+
+ // If this is a token factor, all inputs to the TF happen in parallel. If any
+ // of the operands of the TF reach dest, then we can do the xform.
+ if (getOpcode() == ISD::TokenFactor) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
+ return true;
+ return false;
+ }
+
+ // Loads don't have side effects, look through them.
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) {
+ if (!Ld->isVolatile())
+ return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1);
+ }
+ return false;
+}
+
+
+static void findPredecessor(SDNode *N, const SDNode *P, bool &found,
+ SmallPtrSet<SDNode *, 32> &Visited) {
+ if (found || !Visited.insert(N))
+ return;
+
+ for (unsigned i = 0, e = N->getNumOperands(); !found && i != e; ++i) {
+ SDNode *Op = N->getOperand(i).getNode();
+ if (Op == P) {
+ found = true;
+ return;
+ }
+ findPredecessor(Op, P, found, Visited);
+ }
+}
+
+/// isPredecessorOf - Return true if this node is a predecessor of N. This node
+/// is either an operand of N or it can be reached by recursively traversing
+/// up the operands.
+/// NOTE: this is an expensive method. Use it carefully.
+bool SDNode::isPredecessorOf(SDNode *N) const {
+ SmallPtrSet<SDNode *, 32> Visited;
+ bool found = false;
+ findPredecessor(N, this, found, Visited);
+ return found;
+}
+
+uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
+ assert(Num < NumOperands && "Invalid child # of SDNode!");
+ return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
+}
+
+std::string SDNode::getOperationName(const SelectionDAG *G) const {
+ switch (getOpcode()) {
+ default:
+ if (getOpcode() < ISD::BUILTIN_OP_END)
+ return "<<Unknown DAG Node>>";
+ if (isMachineOpcode()) {
+ if (G)
+ if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
+ if (getMachineOpcode() < TII->getNumOpcodes())
+ return TII->get(getMachineOpcode()).getName();
+ return "<<Unknown Machine Node>>";
+ }
+ if (G) {
+ const TargetLowering &TLI = G->getTargetLoweringInfo();
+ const char *Name = TLI.getTargetNodeName(getOpcode());
+ if (Name) return Name;
+ return "<<Unknown Target Node>>";
+ }
+ return "<<Unknown Node>>";
+
+#ifndef NDEBUG
+ case ISD::DELETED_NODE:
+ return "<<Deleted Node!>>";
+#endif
+ case ISD::PREFETCH: return "Prefetch";
+ case ISD::MEMBARRIER: return "MemBarrier";
+ case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
+ case ISD::ATOMIC_SWAP: return "AtomicSwap";
+ case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
+ case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub";
+ case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd";
+ case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr";
+ case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor";
+ case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand";
+ case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin";
+ case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
+ case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
+ case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
+ case ISD::PCMARKER: return "PCMarker";
+ case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
+ case ISD::SRCVALUE: return "SrcValue";
+ case ISD::MEMOPERAND: return "MemOperand";
+ case ISD::EntryToken: return "EntryToken";
+ case ISD::TokenFactor: return "TokenFactor";
+ case ISD::AssertSext: return "AssertSext";
+ case ISD::AssertZext: return "AssertZext";
+
+ case ISD::BasicBlock: return "BasicBlock";
+ case ISD::ARG_FLAGS: return "ArgFlags";
+ case ISD::VALUETYPE: return "ValueType";
+ case ISD::Register: return "Register";
+
+ case ISD::Constant: return "Constant";
+ case ISD::ConstantFP: return "ConstantFP";
+ case ISD::GlobalAddress: return "GlobalAddress";
+ case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
+ case ISD::FrameIndex: return "FrameIndex";
+ case ISD::JumpTable: return "JumpTable";
+ case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
+ case ISD::RETURNADDR: return "RETURNADDR";
+ case ISD::FRAMEADDR: return "FRAMEADDR";
+ case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
+ case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
+ case ISD::EHSELECTION: return "EHSELECTION";
+ case ISD::EH_RETURN: return "EH_RETURN";
+ case ISD::ConstantPool: return "ConstantPool";
+ case ISD::ExternalSymbol: return "ExternalSymbol";
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IID = cast<ConstantSDNode>(getOperand(0))->getZExtValue();
+ return Intrinsic::getName((Intrinsic::ID)IID);
+ }
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned IID = cast<ConstantSDNode>(getOperand(1))->getZExtValue();
+ return Intrinsic::getName((Intrinsic::ID)IID);
+ }
+
+ case ISD::BUILD_VECTOR: return "BUILD_VECTOR";
+ case ISD::TargetConstant: return "TargetConstant";
+ case ISD::TargetConstantFP:return "TargetConstantFP";
+ case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
+ case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
+ case ISD::TargetFrameIndex: return "TargetFrameIndex";
+ case ISD::TargetJumpTable: return "TargetJumpTable";
+ case ISD::TargetConstantPool: return "TargetConstantPool";
+ case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
+
+ case ISD::CopyToReg: return "CopyToReg";
+ case ISD::CopyFromReg: return "CopyFromReg";
+ case ISD::UNDEF: return "undef";
+ case ISD::MERGE_VALUES: return "merge_values";
+ case ISD::INLINEASM: return "inlineasm";
+ case ISD::DBG_LABEL: return "dbg_label";
+ case ISD::EH_LABEL: return "eh_label";
+ case ISD::DECLARE: return "declare";
+ case ISD::HANDLENODE: return "handlenode";
+ case ISD::FORMAL_ARGUMENTS: return "formal_arguments";
+ case ISD::CALL: return "call";
+
+ // Unary operators
+ case ISD::FABS: return "fabs";
+ case ISD::FNEG: return "fneg";
+ case ISD::FSQRT: return "fsqrt";
+ case ISD::FSIN: return "fsin";
+ case ISD::FCOS: return "fcos";
+ case ISD::FPOWI: return "fpowi";
+ case ISD::FPOW: return "fpow";
+ case ISD::FTRUNC: return "ftrunc";
+ case ISD::FFLOOR: return "ffloor";
+ case ISD::FCEIL: return "fceil";
+ case ISD::FRINT: return "frint";
+ case ISD::FNEARBYINT: return "fnearbyint";
+
+ // Binary operators
+ case ISD::ADD: return "add";
+ case ISD::SUB: return "sub";
+ case ISD::MUL: return "mul";
+ case ISD::MULHU: return "mulhu";
+ case ISD::MULHS: return "mulhs";
+ case ISD::SDIV: return "sdiv";
+ case ISD::UDIV: return "udiv";
+ case ISD::SREM: return "srem";
+ case ISD::UREM: return "urem";
+ case ISD::SMUL_LOHI: return "smul_lohi";
+ case ISD::UMUL_LOHI: return "umul_lohi";
+ case ISD::SDIVREM: return "sdivrem";
+ case ISD::UDIVREM: return "udivrem";
+ case ISD::AND: return "and";
+ case ISD::OR: return "or";
+ case ISD::XOR: return "xor";
+ case ISD::SHL: return "shl";
+ case ISD::SRA: return "sra";
+ case ISD::SRL: return "srl";
+ case ISD::ROTL: return "rotl";
+ case ISD::ROTR: return "rotr";
+ case ISD::FADD: return "fadd";
+ case ISD::FSUB: return "fsub";
+ case ISD::FMUL: return "fmul";
+ case ISD::FDIV: return "fdiv";
+ case ISD::FREM: return "frem";
+ case ISD::FCOPYSIGN: return "fcopysign";
+ case ISD::FGETSIGN: return "fgetsign";
+
+ case ISD::SETCC: return "setcc";
+ case ISD::VSETCC: return "vsetcc";
+ case ISD::SELECT: return "select";
+ case ISD::SELECT_CC: return "select_cc";
+ case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
+ case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
+ case ISD::CONCAT_VECTORS: return "concat_vectors";
+ case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
+ case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
+ case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
+ case ISD::CARRY_FALSE: return "carry_false";
+ case ISD::ADDC: return "addc";
+ case ISD::ADDE: return "adde";
+ case ISD::SADDO: return "saddo";
+ case ISD::UADDO: return "uaddo";
+ case ISD::SSUBO: return "ssubo";
+ case ISD::USUBO: return "usubo";
+ case ISD::SMULO: return "smulo";
+ case ISD::UMULO: return "umulo";
+ case ISD::SUBC: return "subc";
+ case ISD::SUBE: return "sube";
+ case ISD::SHL_PARTS: return "shl_parts";
+ case ISD::SRA_PARTS: return "sra_parts";
+ case ISD::SRL_PARTS: return "srl_parts";
+
+ // Conversion operators.
+ case ISD::SIGN_EXTEND: return "sign_extend";
+ case ISD::ZERO_EXTEND: return "zero_extend";
+ case ISD::ANY_EXTEND: return "any_extend";
+ case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
+ case ISD::TRUNCATE: return "truncate";
+ case ISD::FP_ROUND: return "fp_round";
+ case ISD::FLT_ROUNDS_: return "flt_rounds";
+ case ISD::FP_ROUND_INREG: return "fp_round_inreg";
+ case ISD::FP_EXTEND: return "fp_extend";
+
+ case ISD::SINT_TO_FP: return "sint_to_fp";
+ case ISD::UINT_TO_FP: return "uint_to_fp";
+ case ISD::FP_TO_SINT: return "fp_to_sint";
+ case ISD::FP_TO_UINT: return "fp_to_uint";
+ case ISD::BIT_CONVERT: return "bit_convert";
+
+ case ISD::CONVERT_RNDSAT: {
+ switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
+ default: assert(0 && "Unknown cvt code!");
+ case ISD::CVT_FF: return "cvt_ff";
+ case ISD::CVT_FS: return "cvt_fs";
+ case ISD::CVT_FU: return "cvt_fu";
+ case ISD::CVT_SF: return "cvt_sf";
+ case ISD::CVT_UF: return "cvt_uf";
+ case ISD::CVT_SS: return "cvt_ss";
+ case ISD::CVT_SU: return "cvt_su";
+ case ISD::CVT_US: return "cvt_us";
+ case ISD::CVT_UU: return "cvt_uu";
+ }
+ }
+
+ // Control flow instructions
+ case ISD::BR: return "br";
+ case ISD::BRIND: return "brind";
+ case ISD::BR_JT: return "br_jt";
+ case ISD::BRCOND: return "brcond";
+ case ISD::BR_CC: return "br_cc";
+ case ISD::RET: return "ret";
+ case ISD::CALLSEQ_START: return "callseq_start";
+ case ISD::CALLSEQ_END: return "callseq_end";
+
+ // Other operators
+ case ISD::LOAD: return "load";
+ case ISD::STORE: return "store";
+ case ISD::VAARG: return "vaarg";
+ case ISD::VACOPY: return "vacopy";
+ case ISD::VAEND: return "vaend";
+ case ISD::VASTART: return "vastart";
+ case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
+ case ISD::EXTRACT_ELEMENT: return "extract_element";
+ case ISD::BUILD_PAIR: return "build_pair";
+ case ISD::STACKSAVE: return "stacksave";
+ case ISD::STACKRESTORE: return "stackrestore";
+ case ISD::TRAP: return "trap";
+
+ // Bit manipulation
+ case ISD::BSWAP: return "bswap";
+ case ISD::CTPOP: return "ctpop";
+ case ISD::CTTZ: return "cttz";
+ case ISD::CTLZ: return "ctlz";
+
+ // Debug info
+ case ISD::DBG_STOPPOINT: return "dbg_stoppoint";
+ case ISD::DEBUG_LOC: return "debug_loc";
+
+ // Trampolines
+ case ISD::TRAMPOLINE: return "trampoline";
+
+ case ISD::CONDCODE:
+ switch (cast<CondCodeSDNode>(this)->get()) {
+ default: assert(0 && "Unknown setcc condition!");
+ case ISD::SETOEQ: return "setoeq";
+ case ISD::SETOGT: return "setogt";
+ case ISD::SETOGE: return "setoge";
+ case ISD::SETOLT: return "setolt";
+ case ISD::SETOLE: return "setole";
+ case ISD::SETONE: return "setone";
+
+ case ISD::SETO: return "seto";
+ case ISD::SETUO: return "setuo";
+ case ISD::SETUEQ: return "setue";
+ case ISD::SETUGT: return "setugt";
+ case ISD::SETUGE: return "setuge";
+ case ISD::SETULT: return "setult";
+ case ISD::SETULE: return "setule";
+ case ISD::SETUNE: return "setune";
+
+ case ISD::SETEQ: return "seteq";
+ case ISD::SETGT: return "setgt";
+ case ISD::SETGE: return "setge";
+ case ISD::SETLT: return "setlt";
+ case ISD::SETLE: return "setle";
+ case ISD::SETNE: return "setne";
+ }
+ }
+}
+
+const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
+ switch (AM) {
+ default:
+ return "";
+ case ISD::PRE_INC:
+ return "<pre-inc>";
+ case ISD::PRE_DEC:
+ return "<pre-dec>";
+ case ISD::POST_INC:
+ return "<post-inc>";
+ case ISD::POST_DEC:
+ return "<post-dec>";
+ }
+}
+
+std::string ISD::ArgFlagsTy::getArgFlagsString() {
+ std::string S = "< ";
+
+ if (isZExt())
+ S += "zext ";
+ if (isSExt())
+ S += "sext ";
+ if (isInReg())
+ S += "inreg ";
+ if (isSRet())
+ S += "sret ";
+ if (isByVal())
+ S += "byval ";
+ if (isNest())
+ S += "nest ";
+ if (getByValAlign())
+ S += "byval-align:" + utostr(getByValAlign()) + " ";
+ if (getOrigAlign())
+ S += "orig-align:" + utostr(getOrigAlign()) + " ";
+ if (getByValSize())
+ S += "byval-size:" + utostr(getByValSize()) + " ";
+ return S + ">";
+}
+
+void SDNode::dump() const { dump(0); }
+void SDNode::dump(const SelectionDAG *G) const {
+ print(errs(), G);
+}
+
+void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
+ OS << (void*)this << ": ";
+
+ for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+ if (i) OS << ",";
+ if (getValueType(i) == MVT::Other)
+ OS << "ch";
+ else
+ OS << getValueType(i).getMVTString();
+ }
+ OS << " = " << getOperationName(G);
+}
+
+void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
+ if (!isTargetOpcode() && getOpcode() == ISD::VECTOR_SHUFFLE) {
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(this);
+ OS << "<";
+ for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (i) OS << ",";
+ if (Idx < 0)
+ OS << "u";
+ else
+ OS << Idx;
+ }
+ OS << ">";
+ }
+
+ if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+ OS << '<' << CSDN->getAPIntValue() << '>';
+ } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
+ if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
+ OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
+ else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
+ OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
+ else {
+ OS << "<APFloat(";
+ CSDN->getValueAPF().bitcastToAPInt().dump();
+ OS << ")>";
+ }
+ } else if (const GlobalAddressSDNode *GADN =
+ dyn_cast<GlobalAddressSDNode>(this)) {
+ int64_t offset = GADN->getOffset();
+ OS << '<';
+ WriteAsOperand(OS, GADN->getGlobal());
+ OS << '>';
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
+ OS << "<" << FIDN->getIndex() << ">";
+ } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
+ OS << "<" << JTDN->getIndex() << ">";
+ } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
+ int offset = CP->getOffset();
+ if (CP->isMachineConstantPoolEntry())
+ OS << "<" << *CP->getMachineCPVal() << ">";
+ else
+ OS << "<" << *CP->getConstVal() << ">";
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
+ OS << "<";
+ const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+ if (LBB)
+ OS << LBB->getName() << " ";
+ OS << (const void*)BBDN->getBasicBlock() << ">";
+ } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
+ if (G && R->getReg() &&
+ TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+ OS << " " << G->getTarget().getRegisterInfo()->getName(R->getReg());
+ } else {
+ OS << " #" << R->getReg();
+ }
+ } else if (const ExternalSymbolSDNode *ES =
+ dyn_cast<ExternalSymbolSDNode>(this)) {
+ OS << "'" << ES->getSymbol() << "'";
+ } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
+ if (M->getValue())
+ OS << "<" << M->getValue() << ">";
+ else
+ OS << "<null>";
+ } else if (const MemOperandSDNode *M = dyn_cast<MemOperandSDNode>(this)) {
+ if (M->MO.getValue())
+ OS << "<" << M->MO.getValue() << ":" << M->MO.getOffset() << ">";
+ else
+ OS << "<null:" << M->MO.getOffset() << ">";
+ } else if (const ARG_FLAGSSDNode *N = dyn_cast<ARG_FLAGSSDNode>(this)) {
+ OS << N->getArgFlags().getArgFlagsString();
+ } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
+ OS << ":" << N->getVT().getMVTString();
+ }
+ else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
+ const Value *SrcValue = LD->getSrcValue();
+ int SrcOffset = LD->getSrcValueOffset();
+ OS << " <";
+ if (SrcValue)
+ OS << SrcValue;
+ else
+ OS << "null";
+ OS << ":" << SrcOffset << ">";
+
+ bool doExt = true;
+ switch (LD->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD: OS << " <anyext "; break;
+ case ISD::SEXTLOAD: OS << " <sext "; break;
+ case ISD::ZEXTLOAD: OS << " <zext "; break;
+ }
+ if (doExt)
+ OS << LD->getMemoryVT().getMVTString() << ">";
+
+ const char *AM = getIndexedModeName(LD->getAddressingMode());
+ if (*AM)
+ OS << " " << AM;
+ if (LD->isVolatile())
+ OS << " <volatile>";
+ OS << " alignment=" << LD->getAlignment();
+ } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
+ const Value *SrcValue = ST->getSrcValue();
+ int SrcOffset = ST->getSrcValueOffset();
+ OS << " <";
+ if (SrcValue)
+ OS << SrcValue;
+ else
+ OS << "null";
+ OS << ":" << SrcOffset << ">";
+
+ if (ST->isTruncatingStore())
+ OS << " <trunc " << ST->getMemoryVT().getMVTString() << ">";
+
+ const char *AM = getIndexedModeName(ST->getAddressingMode());
+ if (*AM)
+ OS << " " << AM;
+ if (ST->isVolatile())
+ OS << " <volatile>";
+ OS << " alignment=" << ST->getAlignment();
+ } else if (const AtomicSDNode* AT = dyn_cast<AtomicSDNode>(this)) {
+ const Value *SrcValue = AT->getSrcValue();
+ int SrcOffset = AT->getSrcValueOffset();
+ OS << " <";
+ if (SrcValue)
+ OS << SrcValue;
+ else
+ OS << "null";
+ OS << ":" << SrcOffset << ">";
+ if (AT->isVolatile())
+ OS << " <volatile>";
+ OS << " alignment=" << AT->getAlignment();
+ }
+}
+
+void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
+ print_types(OS, G);
+ OS << " ";
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ if (i) OS << ", ";
+ OS << (void*)getOperand(i).getNode();
+ if (unsigned RN = getOperand(i).getResNo())
+ OS << ":" << RN;
+ }
+ print_details(OS, G);
+}
+
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getNode()->hasOneUse())
+ DumpNodes(N->getOperand(i).getNode(), indent+2, G);
+ else
+ cerr << "\n" << std::string(indent+2, ' ')
+ << (void*)N->getOperand(i).getNode() << ": <multiple use>";
+
+
+ cerr << "\n" << std::string(indent, ' ');
+ N->dump(G);
+}
+
+void SelectionDAG::dump() const {
+ cerr << "SelectionDAG has " << AllNodes.size() << " nodes:";
+
+ for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
+ I != E; ++I) {
+ const SDNode *N = I;
+ if (!N->hasOneUse() && N != getRoot().getNode())
+ DumpNodes(N, 2, this);
+ }
+
+ if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
+
+ cerr << "\n\n";
+}
+
+void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
+ print_types(OS, G);
+ print_details(OS, G);
+}
+
+typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
+static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
+ const SelectionDAG *G, VisitedSDNodeSet &once) {
+ if (!once.insert(N)) // If we've been here before, return now.
+ return;
+ // Dump the current SDNode, but don't end the line yet.
+ OS << std::string(indent, ' ');
+ N->printr(OS, G);
+ // Having printed this SDNode, walk the children:
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDNode *child = N->getOperand(i).getNode();
+ if (i) OS << ",";
+ OS << " ";
+ if (child->getNumOperands() == 0) {
+ // This child has no grandchildren; print it inline right here.
+ child->printr(OS, G);
+ once.insert(child);
+ } else { // Just the address. FIXME: also print the child's opcode
+ OS << (void*)child;
+ if (unsigned RN = N->getOperand(i).getResNo())
+ OS << ":" << RN;
+ }
+ }
+ OS << "\n";
+ // Dump children that have grandchildren on their own line(s).
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDNode *child = N->getOperand(i).getNode();
+ DumpNodesr(OS, child, indent+2, G, once);
+ }
+}
+
+void SDNode::dumpr() const {
+ VisitedSDNodeSet once;
+ DumpNodesr(errs(), this, 0, 0, once);
+}
+
+
+// getAddressSpace - Return the address space this GlobalAddress belongs to.
+unsigned GlobalAddressSDNode::getAddressSpace() const {
+ return getGlobal()->getType()->getAddressSpace();
+}
+
+
+const Type *ConstantPoolSDNode::getType() const {
+ if (isMachineConstantPoolEntry())
+ return Val.MachineCPVal->getType();
+ return Val.ConstVal->getType();
+}
+
+bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
+ APInt &SplatUndef,
+ unsigned &SplatBitSize,
+ bool &HasAnyUndefs,
+ unsigned MinSplatBits) {
+ MVT VT = getValueType(0);
+ assert(VT.isVector() && "Expected a vector type");
+ unsigned sz = VT.getSizeInBits();
+ if (MinSplatBits > sz)
+ return false;
+
+ SplatValue = APInt(sz, 0);
+ SplatUndef = APInt(sz, 0);
+
+ // Get the bits. Bits with undefined values (when the corresponding element
+ // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared
+ // in SplatValue. If any of the values are not constant, give up and return
+ // false.
+ unsigned int nOps = getNumOperands();
+ assert(nOps > 0 && "isConstantSplat has 0-size build vector");
+ unsigned EltBitSize = VT.getVectorElementType().getSizeInBits();
+ for (unsigned i = 0; i < nOps; ++i) {
+ SDValue OpVal = getOperand(i);
+ unsigned BitPos = i * EltBitSize;
+
+ if (OpVal.getOpcode() == ISD::UNDEF)
+ SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos +EltBitSize);
+ else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
+ SplatValue |= (APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize).
+ zextOrTrunc(sz) << BitPos);
+ else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
+ SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;
+ else
+ return false;
+ }
+
+ // The build_vector is all constants or undefs. Find the smallest element
+ // size that splats the vector.
+
+ HasAnyUndefs = (SplatUndef != 0);
+ while (sz > 8) {
+
+ unsigned HalfSize = sz / 2;
+ APInt HighValue = APInt(SplatValue).lshr(HalfSize).trunc(HalfSize);
+ APInt LowValue = APInt(SplatValue).trunc(HalfSize);
+ APInt HighUndef = APInt(SplatUndef).lshr(HalfSize).trunc(HalfSize);
+ APInt LowUndef = APInt(SplatUndef).trunc(HalfSize);
+
+ // If the two halves do not match (ignoring undef bits), stop here.
+ if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) ||
+ MinSplatBits > HalfSize)
+ break;
+
+ SplatValue = HighValue | LowValue;
+ SplatUndef = HighUndef & LowUndef;
+
+ sz = HalfSize;
+ }
+
+ SplatBitSize = sz;
+ return true;
+}
+
+bool ShuffleVectorSDNode::isSplatMask(const int *Mask, MVT VT) {
+ // Find the first non-undef value in the shuffle mask.
+ unsigned i, e;
+ for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i)
+ /* search */;
+
+ assert(i != e && "VECTOR_SHUFFLE node with all undef indices!");
+
+ // Make sure all remaining elements are either undef or the same as the first
+ // non-undef value.
+ for (int Idx = Mask[i]; i != e; ++i)
+ if (Mask[i] >= 0 && Mask[i] != Idx)
+ return false;
+ return true;
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
new file mode 100644
index 0000000..889d7f5
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
@@ -0,0 +1,6052 @@
+//===-- SelectionDAGBuild.cpp - Selection-DAG building --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "SelectionDAGBuild.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Constants.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+/// LimitFloatPrecision - Generate low-precision inline sequences for
+/// some float libcalls (6, 8 or 12 bits).
+static unsigned LimitFloatPrecision;
+
+static cl::opt<unsigned, true>
+LimitFPPrecision("limit-float-precision",
+ cl::desc("Generate low-precision inline sequences "
+ "for some float libcalls"),
+ cl::location(LimitFloatPrecision),
+ cl::init(0));
+
+/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
+/// of insertvalue or extractvalue indices that identify a member, return
+/// the linearized index of the start of the member.
+///
+static unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
+ const unsigned *Indices,
+ const unsigned *IndicesEnd,
+ unsigned CurIndex = 0) {
+ // Base case: We're done.
+ if (Indices && Indices == IndicesEnd)
+ return CurIndex;
+
+ // Given a struct type, recursively traverse the elements.
+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (StructType::element_iterator EB = STy->element_begin(),
+ EI = EB,
+ EE = STy->element_end();
+ EI != EE; ++EI) {
+ if (Indices && *Indices == unsigned(EI - EB))
+ return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex);
+ CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex);
+ }
+ return CurIndex;
+ }
+ // Given an array type, recursively traverse the elements.
+ else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ const Type *EltTy = ATy->getElementType();
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
+ if (Indices && *Indices == i)
+ return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex);
+ CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex);
+ }
+ return CurIndex;
+ }
+ // We haven't found the type we're looking for, so keep searching.
+ return CurIndex + 1;
+}
+
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// MVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+static void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
+ SmallVectorImpl<MVT> &ValueVTs,
+ SmallVectorImpl<uint64_t> *Offsets = 0,
+ uint64_t StartingOffset = 0) {
+ // Given a struct type, recursively traverse the elements.
+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy);
+ for (StructType::element_iterator EB = STy->element_begin(),
+ EI = EB,
+ EE = STy->element_end();
+ EI != EE; ++EI)
+ ComputeValueVTs(TLI, *EI, ValueVTs, Offsets,
+ StartingOffset + SL->getElementOffset(EI - EB));
+ return;
+ }
+ // Given an array type, recursively traverse the elements.
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ const Type *EltTy = ATy->getElementType();
+ uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+ ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
+ StartingOffset + i * EltSize);
+ return;
+ }
+ // Interpret void as zero return values.
+ if (Ty == Type::VoidTy)
+ return;
+ // Base case: we can get an MVT for this LLVM IR type.
+ ValueVTs.push_back(TLI.getValueType(Ty));
+ if (Offsets)
+ Offsets->push_back(StartingOffset);
+}
+
+namespace llvm {
+ /// RegsForValue - This struct represents the registers (physical or virtual)
+ /// that a particular set of values is assigned, and the type information about
+ /// the value. The most common situation is to represent one value at a time,
+ /// but struct or array values are handled element-wise as multiple values.
+ /// The splitting of aggregates is performed recursively, so that we never
+ /// have aggregate-typed registers. The values at this point do not necessarily
+ /// have legal types, so each value may require one or more registers of some
+ /// legal type.
+ ///
+ struct VISIBILITY_HIDDEN RegsForValue {
+ /// TLI - The TargetLowering object.
+ ///
+ const TargetLowering *TLI;
+
+ /// ValueVTs - The value types of the values, which may not be legal, and
+ /// may need be promoted or synthesized from one or more registers.
+ ///
+ SmallVector<MVT, 4> ValueVTs;
+
+ /// RegVTs - The value types of the registers. This is the same size as
+ /// ValueVTs and it records, for each value, what the type of the assigned
+ /// register or registers are. (Individual values are never synthesized
+ /// from more than one type of register.)
+ ///
+ /// With virtual registers, the contents of RegVTs is redundant with TLI's
+ /// getRegisterType member function, however when with physical registers
+ /// it is necessary to have a separate record of the types.
+ ///
+ SmallVector<MVT, 4> RegVTs;
+
+ /// Regs - This list holds the registers assigned to the values.
+ /// Each legal or promoted value requires one register, and each
+ /// expanded value requires multiple registers.
+ ///
+ SmallVector<unsigned, 4> Regs;
+
+ RegsForValue() : TLI(0) {}
+
+ RegsForValue(const TargetLowering &tli,
+ const SmallVector<unsigned, 4> &regs,
+ MVT regvt, MVT valuevt)
+ : TLI(&tli), ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
+ RegsForValue(const TargetLowering &tli,
+ const SmallVector<unsigned, 4> &regs,
+ const SmallVector<MVT, 4> &regvts,
+ const SmallVector<MVT, 4> &valuevts)
+ : TLI(&tli), ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {}
+ RegsForValue(const TargetLowering &tli,
+ unsigned Reg, const Type *Ty) : TLI(&tli) {
+ ComputeValueVTs(tli, Ty, ValueVTs);
+
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ MVT ValueVT = ValueVTs[Value];
+ unsigned NumRegs = TLI->getNumRegisters(ValueVT);
+ MVT RegisterVT = TLI->getRegisterType(ValueVT);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ Regs.push_back(Reg + i);
+ RegVTs.push_back(RegisterVT);
+ Reg += NumRegs;
+ }
+ }
+
+ /// append - Add the specified values to this one.
+ void append(const RegsForValue &RHS) {
+ TLI = RHS.TLI;
+ ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
+ RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
+ Regs.append(RHS.Regs.begin(), RHS.Regs.end());
+ }
+
+
+ /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+ /// this value and returns the result as a ValueVTs value. This uses
+ /// Chain/Flag as the input and updates them for the output Chain/Flag.
+ /// If the Flag pointer is NULL, no flag is used.
+ SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
+ SDValue &Chain, SDValue *Flag) const;
+
+ /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+ /// specified value into the registers specified by this object. This uses
+ /// Chain/Flag as the input and updates them for the output Chain/Flag.
+ /// If the Flag pointer is NULL, no flag is used.
+ void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+ SDValue &Chain, SDValue *Flag) const;
+
+ /// AddInlineAsmOperands - Add this value to the specified inlineasm node
+ /// operand list. This adds the code marker, matching input operand index
+ /// (if applicable), and includes the number of values added into it.
+ void AddInlineAsmOperands(unsigned Code,
+ bool HasMatching, unsigned MatchingIdx,
+ SelectionDAG &DAG, std::vector<SDValue> &Ops) const;
+ };
+}
+
+/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
+/// PHI nodes or outside of the basic block that defines it, or used by a
+/// switch or atomic instruction, which may expand to multiple basic blocks.
+static bool isUsedOutsideOfDefiningBlock(Instruction *I) {
+ if (isa<PHINode>(I)) return true;
+ BasicBlock *BB = I->getParent();
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI)
+ if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI))
+ return true;
+ return false;
+}
+
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true. This includes arguments used by switches, since
+/// the switch may expand into multiple basic blocks.
+static bool isOnlyUsedInEntryBlock(Argument *A, bool EnableFastISel) {
+ // With FastISel active, we may be splitting blocks, so force creation
+ // of virtual registers for all non-dead arguments.
+ // Don't force virtual registers for byval arguments though, because
+ // fast-isel can't handle those in all cases.
+ if (EnableFastISel && !A->hasByValAttr())
+ return A->use_empty();
+
+ BasicBlock *Entry = A->getParent()->begin();
+ for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI)
+ if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI))
+ return false; // Use not in entry block.
+ return true;
+}
+
+FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli)
+ : TLI(tli) {
+}
+
+void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
+ SelectionDAG &DAG,
+ bool EnableFastISel) {
+ Fn = &fn;
+ MF = &mf;
+ RegInfo = &MF->getRegInfo();
+
+ // Create a vreg for each argument register that is not dead and is used
+ // outside of the entry block for the function.
+ for (Function::arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end();
+ AI != E; ++AI)
+ if (!isOnlyUsedInEntryBlock(AI, EnableFastISel))
+ InitializeRegForValue(AI);
+
+ // Initialize the mapping of values to registers. This is only set up for
+ // instruction values that are used outside of the block that defines
+ // them.
+ Function::iterator BB = Fn->begin(), EB = Fn->end();
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+ if (ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
+ const Type *Ty = AI->getAllocatedType();
+ uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+ unsigned Align =
+ std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+ AI->getAlignment());
+
+ TySize *= CUI->getZExtValue(); // Get total allocated size.
+ if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
+ StaticAllocaMap[AI] =
+ MF->getFrameInfo()->CreateStackObject(TySize, Align);
+ }
+
+ for (; BB != EB; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I))
+ if (!isa<AllocaInst>(I) ||
+ !StaticAllocaMap.count(cast<AllocaInst>(I)))
+ InitializeRegForValue(I);
+
+ // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This
+ // also creates the initial PHI MachineInstrs, though none of the input
+ // operands are populated.
+ for (BB = Fn->begin(), EB = Fn->end(); BB != EB; ++BB) {
+ MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB);
+ MBBMap[BB] = MBB;
+ MF->push_back(MBB);
+
+ // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
+ // appropriate.
+ PHINode *PN;
+ DebugLoc DL;
+ for (BasicBlock::iterator
+ I = BB->begin(), E = BB->end(); I != E; ++I) {
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (Function *F = CI->getCalledFunction()) {
+ switch (F->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::dbg_stoppoint: {
+ DbgStopPointInst *SPI = cast<DbgStopPointInst>(I);
+
+ if (DIDescriptor::ValidDebugInfo(SPI->getContext(),
+ CodeGenOpt::Default)) {
+ DICompileUnit CU(cast<GlobalVariable>(SPI->getContext()));
+ unsigned idx = MF->getOrCreateDebugLocID(CU.getGV(),
+ SPI->getLine(),
+ SPI->getColumn());
+ DL = DebugLoc::get(idx);
+ }
+
+ break;
+ }
+ case Intrinsic::dbg_func_start: {
+ DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I);
+ Value *SP = FSI->getSubprogram();
+
+ if (DIDescriptor::ValidDebugInfo(SP, CodeGenOpt::Default)) {
+ DISubprogram Subprogram(cast<GlobalVariable>(SP));
+ DICompileUnit CU(Subprogram.getCompileUnit());
+ unsigned Line = Subprogram.getLineNumber();
+ DL = DebugLoc::get(MF->getOrCreateDebugLocID(CU.getGV(),
+ Line, 0));
+ }
+
+ break;
+ }
+ }
+ }
+ }
+
+ PN = dyn_cast<PHINode>(I);
+ if (!PN || PN->use_empty()) continue;
+
+ unsigned PHIReg = ValueMap[PN];
+ assert(PHIReg && "PHI node does not have an assigned virtual register!");
+
+ SmallVector<MVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+ for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+ MVT VT = ValueVTs[vti];
+ unsigned NumRegisters = TLI.getNumRegisters(VT);
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ for (unsigned i = 0; i != NumRegisters; ++i)
+ BuildMI(MBB, DL, TII->get(TargetInstrInfo::PHI), PHIReg + i);
+ PHIReg += NumRegisters;
+ }
+ }
+ }
+}
+
+unsigned FunctionLoweringInfo::MakeReg(MVT VT) {
+ return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
+}
+
+/// CreateRegForValue - Allocate the appropriate number of virtual registers of
+/// the correctly promoted or expanded types. Assign these registers
+/// consecutive vreg numbers and return the first assigned number.
+///
+/// In the case that the given value has struct or array type, this function
+/// will assign registers for each member or element.
+///
+unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) {
+ SmallVector<MVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, V->getType(), ValueVTs);
+
+ unsigned FirstReg = 0;
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ MVT ValueVT = ValueVTs[Value];
+ MVT RegisterVT = TLI.getRegisterType(ValueVT);
+
+ unsigned NumRegs = TLI.getNumRegisters(ValueVT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ unsigned R = MakeReg(RegisterVT);
+ if (!FirstReg) FirstReg = R;
+ }
+ }
+ return FirstReg;
+}
+
+/// getCopyFromParts - Create a value that contains the specified legal parts
+/// combined into the value they represent. If the parts combine to a type
+/// larger then ValueVT then AssertOp can be used to specify whether the extra
+/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
+/// (ISD::AssertSext).
+static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
+ const SDValue *Parts,
+ unsigned NumParts, MVT PartVT, MVT ValueVT,
+ ISD::NodeType AssertOp = ISD::DELETED_NODE) {
+ assert(NumParts > 0 && "No parts to assemble!");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Val = Parts[0];
+
+ if (NumParts > 1) {
+ // Assemble the value from multiple parts.
+ if (!ValueVT.isVector() && ValueVT.isInteger()) {
+ unsigned PartBits = PartVT.getSizeInBits();
+ unsigned ValueBits = ValueVT.getSizeInBits();
+
+ // Assemble the power of 2 part.
+ unsigned RoundParts = NumParts & (NumParts - 1) ?
+ 1 << Log2_32(NumParts) : NumParts;
+ unsigned RoundBits = PartBits * RoundParts;
+ MVT RoundVT = RoundBits == ValueBits ?
+ ValueVT : MVT::getIntegerVT(RoundBits);
+ SDValue Lo, Hi;
+
+ MVT HalfVT = MVT::getIntegerVT(RoundBits/2);
+
+ if (RoundParts > 2) {
+ Lo = getCopyFromParts(DAG, dl, Parts, RoundParts/2, PartVT, HalfVT);
+ Hi = getCopyFromParts(DAG, dl, Parts+RoundParts/2, RoundParts/2,
+ PartVT, HalfVT);
+ } else {
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[1]);
+ }
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi);
+
+ if (RoundParts < NumParts) {
+ // Assemble the trailing non-power-of-2 part.
+ unsigned OddParts = NumParts - RoundParts;
+ MVT OddVT = MVT::getIntegerVT(OddParts * PartBits);
+ Hi = getCopyFromParts(DAG, dl,
+ Parts+RoundParts, OddParts, PartVT, OddVT);
+
+ // Combine the round and odd parts.
+ Lo = Val;
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ MVT TotalVT = MVT::getIntegerVT(NumParts * PartBits);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi);
+ Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi,
+ DAG.getConstant(Lo.getValueType().getSizeInBits(),
+ TLI.getPointerTy()));
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo);
+ Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi);
+ }
+ } else if (ValueVT.isVector()) {
+ // Handle a multi-element vector.
+ MVT IntermediateVT, RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegs =
+ TLI.getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates,
+ RegisterVT);
+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+ NumParts = NumRegs; // Silence a compiler warning.
+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+ assert(RegisterVT == Parts[0].getValueType() &&
+ "Part type doesn't match part!");
+
+ // Assemble the parts into intermediate operands.
+ SmallVector<SDValue, 8> Ops(NumIntermediates);
+ if (NumIntermediates == NumParts) {
+ // If the register was not expanded, truncate or copy the value,
+ // as appropriate.
+ for (unsigned i = 0; i != NumParts; ++i)
+ Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1,
+ PartVT, IntermediateVT);
+ } else if (NumParts > 0) {
+ // If the intermediate type was expanded, build the intermediate operands
+ // from the parts.
+ assert(NumParts % NumIntermediates == 0 &&
+ "Must expand into a divisible number of parts!");
+ unsigned Factor = NumParts / NumIntermediates;
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor,
+ PartVT, IntermediateVT);
+ }
+
+ // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the intermediate
+ // operands.
+ Val = DAG.getNode(IntermediateVT.isVector() ?
+ ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl,
+ ValueVT, &Ops[0], NumIntermediates);
+ } else if (PartVT.isFloatingPoint()) {
+ // FP split into multiple FP parts (for ppcf128)
+ assert(ValueVT == MVT(MVT::ppcf128) && PartVT == MVT(MVT::f64) &&
+ "Unexpected split");
+ SDValue Lo, Hi;
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, MVT(MVT::f64), Parts[0]);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, MVT(MVT::f64), Parts[1]);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi);
+ } else {
+ // FP split into integer parts (soft fp)
+ assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
+ !PartVT.isVector() && "Unexpected split");
+ MVT IntVT = MVT::getIntegerVT(ValueVT.getSizeInBits());
+ Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT);
+ }
+ }
+
+ // There is now one part, held in Val. Correct it to match ValueVT.
+ PartVT = Val.getValueType();
+
+ if (PartVT == ValueVT)
+ return Val;
+
+ if (PartVT.isVector()) {
+ assert(ValueVT.isVector() && "Unknown vector conversion!");
+ return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
+ }
+
+ if (ValueVT.isVector()) {
+ assert(ValueVT.getVectorElementType() == PartVT &&
+ ValueVT.getVectorNumElements() == 1 &&
+ "Only trivial scalar-to-vector conversions should get here!");
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val);
+ }
+
+ if (PartVT.isInteger() &&
+ ValueVT.isInteger()) {
+ if (ValueVT.bitsLT(PartVT)) {
+ // For a truncate, see if we have any information to
+ // indicate whether the truncated bits will always be
+ // zero or sign-extension.
+ if (AssertOp != ISD::DELETED_NODE)
+ Val = DAG.getNode(AssertOp, dl, PartVT, Val,
+ DAG.getValueType(ValueVT));
+ return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
+ } else {
+ return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val);
+ }
+ }
+
+ if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+ if (ValueVT.bitsLT(Val.getValueType()))
+ // FP_ROUND's are always exact here.
+ return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val,
+ DAG.getIntPtrConstant(1));
+ return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val);
+ }
+
+ if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
+ return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
+
+ assert(0 && "Unknown mismatch!");
+ return SDValue();
+}
+
+/// getCopyToParts - Create a series of nodes that contain the specified value
+/// split into legal parts. If the parts contain more bits than Val, then, for
+/// integers, ExtendKind can be used to specify how to generate the extra bits.
+static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val,
+ SDValue *Parts, unsigned NumParts, MVT PartVT,
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MVT PtrVT = TLI.getPointerTy();
+ MVT ValueVT = Val.getValueType();
+ unsigned PartBits = PartVT.getSizeInBits();
+ unsigned OrigNumParts = NumParts;
+ assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
+
+ if (!NumParts)
+ return;
+
+ if (!ValueVT.isVector()) {
+ if (PartVT == ValueVT) {
+ assert(NumParts == 1 && "No-op copy with multiple parts!");
+ Parts[0] = Val;
+ return;
+ }
+
+ if (NumParts * PartBits > ValueVT.getSizeInBits()) {
+ // If the parts cover more bits than the value has, promote the value.
+ if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+ assert(NumParts == 1 && "Do not know what to promote to!");
+ Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val);
+ } else if (PartVT.isInteger() && ValueVT.isInteger()) {
+ ValueVT = MVT::getIntegerVT(NumParts * PartBits);
+ Val = DAG.getNode(ExtendKind, dl, ValueVT, Val);
+ } else {
+ assert(0 && "Unknown mismatch!");
+ }
+ } else if (PartBits == ValueVT.getSizeInBits()) {
+ // Different types of the same size.
+ assert(NumParts == 1 && PartVT != ValueVT);
+ Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
+ } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
+ // If the parts cover less bits than value has, truncate the value.
+ if (PartVT.isInteger() && ValueVT.isInteger()) {
+ ValueVT = MVT::getIntegerVT(NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
+ } else {
+ assert(0 && "Unknown mismatch!");
+ }
+ }
+
+ // The value may have changed - recompute ValueVT.
+ ValueVT = Val.getValueType();
+ assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
+ "Failed to tile the value with PartVT!");
+
+ if (NumParts == 1) {
+ assert(PartVT == ValueVT && "Type conversion failed!");
+ Parts[0] = Val;
+ return;
+ }
+
+ // Expand the value into multiple parts.
+ if (NumParts & (NumParts - 1)) {
+ // The number of parts is not a power of 2. Split off and copy the tail.
+ assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ "Do not know what to expand to!");
+ unsigned RoundParts = 1 << Log2_32(NumParts);
+ unsigned RoundBits = RoundParts * PartBits;
+ unsigned OddParts = NumParts - RoundParts;
+ SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val,
+ DAG.getConstant(RoundBits,
+ TLI.getPointerTy()));
+ getCopyToParts(DAG, dl, OddVal, Parts + RoundParts, OddParts, PartVT);
+ if (TLI.isBigEndian())
+ // The odd parts were reversed by getCopyToParts - unreverse them.
+ std::reverse(Parts + RoundParts, Parts + NumParts);
+ NumParts = RoundParts;
+ ValueVT = MVT::getIntegerVT(NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
+ }
+
+ // The number of parts is a power of 2. Repeatedly bisect the value using
+ // EXTRACT_ELEMENT.
+ Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::getIntegerVT(ValueVT.getSizeInBits()),
+ Val);
+ for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
+ for (unsigned i = 0; i < NumParts; i += StepSize) {
+ unsigned ThisBits = StepSize * PartBits / 2;
+ MVT ThisVT = MVT::getIntegerVT (ThisBits);
+ SDValue &Part0 = Parts[i];
+ SDValue &Part1 = Parts[i+StepSize/2];
+
+ Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+ ThisVT, Part0,
+ DAG.getConstant(1, PtrVT));
+ Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+ ThisVT, Part0,
+ DAG.getConstant(0, PtrVT));
+
+ if (ThisBits == PartBits && ThisVT != PartVT) {
+ Part0 = DAG.getNode(ISD::BIT_CONVERT, dl,
+ PartVT, Part0);
+ Part1 = DAG.getNode(ISD::BIT_CONVERT, dl,
+ PartVT, Part1);
+ }
+ }
+ }
+
+ if (TLI.isBigEndian())
+ std::reverse(Parts, Parts + OrigNumParts);
+
+ return;
+ }
+
+ // Vector ValueVT.
+ if (NumParts == 1) {
+ if (PartVT != ValueVT) {
+ if (PartVT.isVector()) {
+ Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
+ } else {
+ assert(ValueVT.getVectorElementType() == PartVT &&
+ ValueVT.getVectorNumElements() == 1 &&
+ "Only trivial vector-to-scalar conversions should get here!");
+ Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ PartVT, Val,
+ DAG.getConstant(0, PtrVT));
+ }
+ }
+
+ Parts[0] = Val;
+ return;
+ }
+
+ // Handle a multi-element vector.
+ MVT IntermediateVT, RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegs = TLI
+ .getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates,
+ RegisterVT);
+ unsigned NumElements = ValueVT.getVectorNumElements();
+
+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+ NumParts = NumRegs; // Silence a compiler warning.
+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+
+ // Split the vector into intermediate operands.
+ SmallVector<SDValue, 8> Ops(NumIntermediates);
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ if (IntermediateVT.isVector())
+ Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
+ IntermediateVT, Val,
+ DAG.getConstant(i * (NumElements / NumIntermediates),
+ PtrVT));
+ else
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ IntermediateVT, Val,
+ DAG.getConstant(i, PtrVT));
+
+ // Split the intermediate operands into legal parts.
+ if (NumParts == NumIntermediates) {
+ // If the register was not expanded, promote or copy the value,
+ // as appropriate.
+ for (unsigned i = 0; i != NumParts; ++i)
+ getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT);
+ } else if (NumParts > 0) {
+ // If the intermediate type was expanded, split each the value into
+ // legal parts.
+ assert(NumParts % NumIntermediates == 0 &&
+ "Must expand into a divisible number of parts!");
+ unsigned Factor = NumParts / NumIntermediates;
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ getCopyToParts(DAG, dl, Ops[i], &Parts[i * Factor], Factor, PartVT);
+ }
+}
+
+
+void SelectionDAGLowering::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
+ AA = &aa;
+ GFI = gfi;
+ TD = DAG.getTarget().getTargetData();
+}
+
+/// clear - Clear out the curret SelectionDAG and the associated
+/// state and prepare this SelectionDAGLowering object to be used
+/// for a new block. This doesn't clear out information about
+/// additional blocks that are needed to complete switch lowering
+/// or PHI node updating; that information is cleared out as it is
+/// consumed.
+void SelectionDAGLowering::clear() {
+ NodeMap.clear();
+ PendingLoads.clear();
+ PendingExports.clear();
+ DAG.clear();
+ CurDebugLoc = DebugLoc::getUnknownLoc();
+}
+
+/// getRoot - Return the current virtual root of the Selection DAG,
+/// flushing any PendingLoad items. This must be done before emitting
+/// a store or any other node that may need to be ordered after any
+/// prior load instructions.
+///
+SDValue SelectionDAGLowering::getRoot() {
+ if (PendingLoads.empty())
+ return DAG.getRoot();
+
+ if (PendingLoads.size() == 1) {
+ SDValue Root = PendingLoads[0];
+ DAG.setRoot(Root);
+ PendingLoads.clear();
+ return Root;
+ }
+
+ // Otherwise, we have to make a token factor node.
+ SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+ &PendingLoads[0], PendingLoads.size());
+ PendingLoads.clear();
+ DAG.setRoot(Root);
+ return Root;
+}
+
+/// getControlRoot - Similar to getRoot, but instead of flushing all the
+/// PendingLoad items, flush all the PendingExports items. It is necessary
+/// to do this before emitting a terminator instruction.
+///
+SDValue SelectionDAGLowering::getControlRoot() {
+ SDValue Root = DAG.getRoot();
+
+ if (PendingExports.empty())
+ return Root;
+
+ // Turn all of the CopyToReg chains into one factored node.
+ if (Root.getOpcode() != ISD::EntryToken) {
+ unsigned i = 0, e = PendingExports.size();
+ for (; i != e; ++i) {
+ assert(PendingExports[i].getNode()->getNumOperands() > 1);
+ if (PendingExports[i].getNode()->getOperand(0) == Root)
+ break; // Don't add the root if we already indirectly depend on it.
+ }
+
+ if (i == e)
+ PendingExports.push_back(Root);
+ }
+
+ Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+ &PendingExports[0],
+ PendingExports.size());
+ PendingExports.clear();
+ DAG.setRoot(Root);
+ return Root;
+}
+
+void SelectionDAGLowering::visit(Instruction &I) {
+ visit(I.getOpcode(), I);
+}
+
+void SelectionDAGLowering::visit(unsigned Opcode, User &I) {
+ // Note: this doesn't use InstVisitor, because it has to work with
+ // ConstantExpr's in addition to instructions.
+ switch (Opcode) {
+ default: assert(0 && "Unknown instruction type encountered!");
+ abort();
+ // Build the switch statement using the Instruction.def file.
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+ case Instruction::OPCODE:return visit##OPCODE((CLASS&)I);
+#include "llvm/Instruction.def"
+ }
+}
+
+void SelectionDAGLowering::visitAdd(User &I) {
+ if (I.getType()->isFPOrFPVector())
+ visitBinary(I, ISD::FADD);
+ else
+ visitBinary(I, ISD::ADD);
+}
+
+void SelectionDAGLowering::visitMul(User &I) {
+ if (I.getType()->isFPOrFPVector())
+ visitBinary(I, ISD::FMUL);
+ else
+ visitBinary(I, ISD::MUL);
+}
+
+SDValue SelectionDAGLowering::getValue(const Value *V) {
+ SDValue &N = NodeMap[V];
+ if (N.getNode()) return N;
+
+ if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {
+ MVT VT = TLI.getValueType(V->getType(), true);
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
+ return N = DAG.getConstant(*CI, VT);
+
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return N = DAG.getGlobalAddress(GV, VT);
+
+ if (isa<ConstantPointerNull>(C))
+ return N = DAG.getConstant(0, TLI.getPointerTy());
+
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return N = DAG.getConstantFP(*CFP, VT);
+
+ if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
+ return N = DAG.getUNDEF(VT);
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ visit(CE->getOpcode(), *CE);
+ SDValue N1 = NodeMap[V];
+ assert(N1.getNode() && "visit didn't populate the ValueMap!");
+ return N1;
+ }
+
+ if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
+ SmallVector<SDValue, 4> Constants;
+ for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
+ OI != OE; ++OI) {
+ SDNode *Val = getValue(*OI).getNode();
+ for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+ Constants.push_back(SDValue(Val, i));
+ }
+ return DAG.getMergeValues(&Constants[0], Constants.size(),
+ getCurDebugLoc());
+ }
+
+ if (isa<StructType>(C->getType()) || isa<ArrayType>(C->getType())) {
+ assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
+ "Unknown struct or array constant!");
+
+ SmallVector<MVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, C->getType(), ValueVTs);
+ unsigned NumElts = ValueVTs.size();
+ if (NumElts == 0)
+ return SDValue(); // empty struct
+ SmallVector<SDValue, 4> Constants(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ MVT EltVT = ValueVTs[i];
+ if (isa<UndefValue>(C))
+ Constants[i] = DAG.getUNDEF(EltVT);
+ else if (EltVT.isFloatingPoint())
+ Constants[i] = DAG.getConstantFP(0, EltVT);
+ else
+ Constants[i] = DAG.getConstant(0, EltVT);
+ }
+ return DAG.getMergeValues(&Constants[0], NumElts, getCurDebugLoc());
+ }
+
+ const VectorType *VecTy = cast<VectorType>(V->getType());
+ unsigned NumElements = VecTy->getNumElements();
+
+ // Now that we know the number and type of the elements, get that number of
+ // elements into the Ops array based on what kind of constant it is.
+ SmallVector<SDValue, 16> Ops;
+ if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+ for (unsigned i = 0; i != NumElements; ++i)
+ Ops.push_back(getValue(CP->getOperand(i)));
+ } else {
+ assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
+ MVT EltVT = TLI.getValueType(VecTy->getElementType());
+
+ SDValue Op;
+ if (EltVT.isFloatingPoint())
+ Op = DAG.getConstantFP(0, EltVT);
+ else
+ Op = DAG.getConstant(0, EltVT);
+ Ops.assign(NumElements, Op);
+ }
+
+ // Create a BUILD_VECTOR node.
+ return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+ VT, &Ops[0], Ops.size());
+ }
+
+ // If this is a static alloca, generate it as the frameindex instead of
+ // computation.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end())
+ return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
+ }
+
+ unsigned InReg = FuncInfo.ValueMap[V];
+ assert(InReg && "Value not in map!");
+
+ RegsForValue RFV(TLI, InReg, V->getType());
+ SDValue Chain = DAG.getEntryNode();
+ return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL);
+}
+
+
+void SelectionDAGLowering::visitRet(ReturnInst &I) {
+ if (I.getNumOperands() == 0) {
+ DAG.setRoot(DAG.getNode(ISD::RET, getCurDebugLoc(),
+ MVT::Other, getControlRoot()));
+ return;
+ }
+
+ SmallVector<SDValue, 8> NewValues;
+ NewValues.push_back(getControlRoot());
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ SmallVector<MVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) continue;
+
+ SDValue RetOp = getValue(I.getOperand(i));
+ for (unsigned j = 0, f = NumValues; j != f; ++j) {
+ MVT VT = ValueVTs[j];
+
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ const Function *F = I.getParent()->getParent();
+ if (F->paramHasAttr(0, Attribute::SExt))
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (F->paramHasAttr(0, Attribute::ZExt))
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ // FIXME: C calling convention requires the return type to be promoted to
+ // at least 32-bit. But this is not necessary for non-C calling
+ // conventions. The frontend should mark functions whose return values
+ // require promoting with signext or zeroext attributes.
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+ MVT MinVT = TLI.getRegisterType(MVT::i32);
+ if (VT.bitsLT(MinVT))
+ VT = MinVT;
+ }
+
+ unsigned NumParts = TLI.getNumRegisters(VT);
+ MVT PartVT = TLI.getRegisterType(VT);
+ SmallVector<SDValue, 4> Parts(NumParts);
+ getCopyToParts(DAG, getCurDebugLoc(),
+ SDValue(RetOp.getNode(), RetOp.getResNo() + j),
+ &Parts[0], NumParts, PartVT, ExtendKind);
+
+ // 'inreg' on function refers to return value
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (F->paramHasAttr(0, Attribute::InReg))
+ Flags.setInReg();
+ for (unsigned i = 0; i < NumParts; ++i) {
+ NewValues.push_back(Parts[i]);
+ NewValues.push_back(DAG.getArgFlags(Flags));
+ }
+ }
+ }
+ DAG.setRoot(DAG.getNode(ISD::RET, getCurDebugLoc(), MVT::Other,
+ &NewValues[0], NewValues.size()));
+}
+
+/// CopyToExportRegsIfNeeded - If the given value has virtual registers
+/// created for it, emit nodes to copy the value into the virtual
+/// registers.
+void SelectionDAGLowering::CopyToExportRegsIfNeeded(Value *V) {
+ if (!V->use_empty()) {
+ DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+ if (VMI != FuncInfo.ValueMap.end())
+ CopyValueToVirtualRegister(V, VMI->second);
+ }
+}
+
+/// ExportFromCurrentBlock - If this condition isn't known to be exported from
+/// the current basic block, add it to ValueMap now so that we'll get a
+/// CopyTo/FromReg.
+void SelectionDAGLowering::ExportFromCurrentBlock(Value *V) {
+ // No need to export constants.
+ if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
+
+ // Already exported?
+ if (FuncInfo.isExportedInst(V)) return;
+
+ unsigned Reg = FuncInfo.InitializeRegForValue(V);
+ CopyValueToVirtualRegister(V, Reg);
+}
+
+bool SelectionDAGLowering::isExportableFromCurrentBlock(Value *V,
+ const BasicBlock *FromBB) {
+ // The operands of the setcc have to be in this block. We don't know
+ // how to export them from some other block.
+ if (Instruction *VI = dyn_cast<Instruction>(V)) {
+ // Can export from current BB.
+ if (VI->getParent() == FromBB)
+ return true;
+
+ // Is already exported, noop.
+ return FuncInfo.isExportedInst(V);
+ }
+
+ // If this is an argument, we can export it if the BB is the entry block or
+ // if it is already exported.
+ if (isa<Argument>(V)) {
+ if (FromBB == &FromBB->getParent()->getEntryBlock())
+ return true;
+
+ // Otherwise, can only export this if it is already exported.
+ return FuncInfo.isExportedInst(V);
+ }
+
+ // Otherwise, constants can always be exported.
+ return true;
+}
+
+static bool InBlock(const Value *V, const BasicBlock *BB) {
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() == BB;
+ return true;
+}
+
+/// getFCmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR floating-point condition code. This includes
+/// consideration of global floating-point math flags.
+///
+static ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred) {
+ ISD::CondCode FPC, FOC;
+ switch (Pred) {
+ case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
+ case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
+ case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
+ case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
+ case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
+ case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
+ case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break;
+ case FCmpInst::FCMP_ORD: FOC = FPC = ISD::SETO; break;
+ case FCmpInst::FCMP_UNO: FOC = FPC = ISD::SETUO; break;
+ case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
+ case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
+ case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
+ case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break;
+ case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break;
+ case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
+ case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break;
+ default:
+ assert(0 && "Invalid FCmp predicate opcode!");
+ FOC = FPC = ISD::SETFALSE;
+ break;
+ }
+ if (FiniteOnlyFPMath())
+ return FOC;
+ else
+ return FPC;
+}
+
+/// getICmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR integer condition code.
+///
+static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) {
+ switch (Pred) {
+ case ICmpInst::ICMP_EQ: return ISD::SETEQ;
+ case ICmpInst::ICMP_NE: return ISD::SETNE;
+ case ICmpInst::ICMP_SLE: return ISD::SETLE;
+ case ICmpInst::ICMP_ULE: return ISD::SETULE;
+ case ICmpInst::ICMP_SGE: return ISD::SETGE;
+ case ICmpInst::ICMP_UGE: return ISD::SETUGE;
+ case ICmpInst::ICMP_SLT: return ISD::SETLT;
+ case ICmpInst::ICMP_ULT: return ISD::SETULT;
+ case ICmpInst::ICMP_SGT: return ISD::SETGT;
+ case ICmpInst::ICMP_UGT: return ISD::SETUGT;
+ default:
+ assert(0 && "Invalid ICmp predicate opcode!");
+ return ISD::SETNE;
+ }
+}
+
+/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
+/// This function emits a branch and is used at the leaves of an OR or an
+/// AND operator tree.
+///
+void
+SelectionDAGLowering::EmitBranchForMergedCondition(Value *Cond,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB) {
+ const BasicBlock *BB = CurBB->getBasicBlock();
+
+ // If the leaf of the tree is a comparison, merge the condition into
+ // the caseblock.
+ if (CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
+ // The operands of the cmp have to be in this block. We don't know
+ // how to export them from some other block. If this is the first block
+ // of the sequence, no exporting is needed.
+ if (CurBB == CurMBB ||
+ (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
+ isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
+ ISD::CondCode Condition;
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+ Condition = getICmpCondCode(IC->getPredicate());
+ } else if (FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
+ Condition = getFCmpCondCode(FC->getPredicate());
+ } else {
+ Condition = ISD::SETEQ; // silence warning.
+ assert(0 && "Unknown compare instruction");
+ }
+
+ CaseBlock CB(Condition, BOp->getOperand(0),
+ BOp->getOperand(1), NULL, TBB, FBB, CurBB);
+ SwitchCases.push_back(CB);
+ return;
+ }
+ }
+
+ // Create a CaseBlock record representing this branch.
+ CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(),
+ NULL, TBB, FBB, CurBB);
+ SwitchCases.push_back(CB);
+}
+
+/// FindMergedConditions - If Cond is an expression like
+void SelectionDAGLowering::FindMergedConditions(Value *Cond,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ unsigned Opc) {
+ // If this node is not part of the or/and tree, emit it as a branch.
+ Instruction *BOp = dyn_cast<Instruction>(Cond);
+ if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
+ (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
+ BOp->getParent() != CurBB->getBasicBlock() ||
+ !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
+ !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
+ EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB);
+ return;
+ }
+
+ // Create TmpBB after CurBB.
+ MachineFunction::iterator BBI = CurBB;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
+ CurBB->getParent()->insert(++BBI, TmpBB);
+
+ if (Opc == Instruction::Or) {
+ // Codegen X | Y as:
+ // jmp_if_X TBB
+ // jmp TmpBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+
+ // Emit the LHS condition.
+ FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, Opc);
+
+ // Emit the RHS condition into TmpBB.
+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
+ } else {
+ assert(Opc == Instruction::And && "Unknown merge op!");
+ // Codegen X & Y as:
+ // jmp_if_X TmpBB
+ // jmp FBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+ // This requires creation of TmpBB after CurBB.
+
+ // Emit the LHS condition.
+ FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, Opc);
+
+ // Emit the RHS condition into TmpBB.
+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
+ }
+}
+
+/// If the set of cases should be emitted as a series of branches, return true.
+/// If we should emit this as a bunch of and/or'd together conditions, return
+/// false.
+bool
+SelectionDAGLowering::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
+ if (Cases.size() != 2) return true;
+
+ // If this is two comparisons of the same values or'd or and'd together, they
+ // will get folded into a single comparison, so don't emit two blocks.
+ if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
+ Cases[0].CmpRHS == Cases[1].CmpRHS) ||
+ (Cases[0].CmpRHS == Cases[1].CmpLHS &&
+ Cases[0].CmpLHS == Cases[1].CmpRHS)) {
+ return false;
+ }
+
+ return true;
+}
+
+void SelectionDAGLowering::visitBr(BranchInst &I) {
+ // Update machine-CFG edges.
+ MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
+
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CurMBB;
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ if (I.isUnconditional()) {
+ // Update machine-CFG edges.
+ CurMBB->addSuccessor(Succ0MBB);
+
+ // If this is not a fall-through branch, emit the branch.
+ if (Succ0MBB != NextBlock)
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Succ0MBB)));
+ return;
+ }
+
+ // If this condition is one of the special cases we handle, do special stuff
+ // now.
+ Value *CondVal = I.getCondition();
+ MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+ // If this is a series of conditions that are or'd or and'd together, emit
+ // this as a sequence of branches instead of setcc's with and/or operations.
+ // For example, instead of something like:
+ // cmp A, B
+ // C = seteq
+ // cmp D, E
+ // F = setle
+ // or C, F
+ // jnz foo
+ // Emit:
+ // cmp A, B
+ // je foo
+ // cmp D, E
+ // jle foo
+ //
+ if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
+ if (BOp->hasOneUse() &&
+ (BOp->getOpcode() == Instruction::And ||
+ BOp->getOpcode() == Instruction::Or)) {
+ FindMergedConditions(BOp, Succ0MBB, Succ1MBB, CurMBB, BOp->getOpcode());
+ // If the compares in later blocks need to use values not currently
+ // exported from this block, export them now. This block should always
+ // be the first entry.
+ assert(SwitchCases[0].ThisBB == CurMBB && "Unexpected lowering!");
+
+ // Allow some cases to be rejected.
+ if (ShouldEmitAsBranches(SwitchCases)) {
+ for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
+ ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
+ ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
+ }
+
+ // Emit the branch for this block.
+ visitSwitchCase(SwitchCases[0]);
+ SwitchCases.erase(SwitchCases.begin());
+ return;
+ }
+
+ // Okay, we decided not to do this, remove any inserted MBB's and clear
+ // SwitchCases.
+ for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
+ CurMBB->getParent()->erase(SwitchCases[i].ThisBB);
+
+ SwitchCases.clear();
+ }
+ }
+
+ // Create a CaseBlock record representing this branch.
+ CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(),
+ NULL, Succ0MBB, Succ1MBB, CurMBB);
+ // Use visitSwitchCase to actually insert the fast branch sequence for this
+ // cond branch.
+ visitSwitchCase(CB);
+}
+
+/// visitSwitchCase - Emits the necessary code to represent a single node in
+/// the binary search tree resulting from lowering a switch instruction.
+void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) {
+ SDValue Cond;
+ SDValue CondLHS = getValue(CB.CmpLHS);
+ DebugLoc dl = getCurDebugLoc();
+
+ // Build the setcc now.
+ if (CB.CmpMHS == NULL) {
+ // Fold "(X == true)" to X and "(X == false)" to !X to
+ // handle common cases produced by branch lowering.
+ if (CB.CmpRHS == ConstantInt::getTrue() && CB.CC == ISD::SETEQ)
+ Cond = CondLHS;
+ else if (CB.CmpRHS == ConstantInt::getFalse() && CB.CC == ISD::SETEQ) {
+ SDValue True = DAG.getConstant(1, CondLHS.getValueType());
+ Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
+ } else
+ Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
+ } else {
+ assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
+
+ const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
+ const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
+
+ SDValue CmpOp = getValue(CB.CmpMHS);
+ MVT VT = CmpOp.getValueType();
+
+ if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
+ Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
+ ISD::SETLE);
+ } else {
+ SDValue SUB = DAG.getNode(ISD::SUB, dl,
+ VT, CmpOp, DAG.getConstant(Low, VT));
+ Cond = DAG.getSetCC(dl, MVT::i1, SUB,
+ DAG.getConstant(High-Low, VT), ISD::SETULE);
+ }
+ }
+
+ // Update successor info
+ CurMBB->addSuccessor(CB.TrueBB);
+ CurMBB->addSuccessor(CB.FalseBB);
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CurMBB;
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ // If the lhs block is the next block, invert the condition so that we can
+ // fall through to the lhs instead of the rhs block.
+ if (CB.TrueBB == NextBlock) {
+ std::swap(CB.TrueBB, CB.FalseBB);
+ SDValue True = DAG.getConstant(1, Cond.getValueType());
+ Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
+ }
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
+ MVT::Other, getControlRoot(), Cond,
+ DAG.getBasicBlock(CB.TrueBB));
+
+ // If the branch was constant folded, fix up the CFG.
+ if (BrCond.getOpcode() == ISD::BR) {
+ CurMBB->removeSuccessor(CB.FalseBB);
+ DAG.setRoot(BrCond);
+ } else {
+ // Otherwise, go ahead and insert the false branch.
+ if (BrCond == getControlRoot())
+ CurMBB->removeSuccessor(CB.TrueBB);
+
+ if (CB.FalseBB == NextBlock)
+ DAG.setRoot(BrCond);
+ else
+ DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+ DAG.getBasicBlock(CB.FalseBB)));
+ }
+}
+
+/// visitJumpTable - Emit JumpTable node in the current MBB
+void SelectionDAGLowering::visitJumpTable(JumpTable &JT) {
+ // Emit the code for the jump table
+ assert(JT.Reg != -1U && "Should lower JT Header first!");
+ MVT PTy = TLI.getPointerTy();
+ SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
+ JT.Reg, PTy);
+ SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
+ DAG.setRoot(DAG.getNode(ISD::BR_JT, getCurDebugLoc(),
+ MVT::Other, Index.getValue(1),
+ Table, Index));
+}
+
+/// visitJumpTableHeader - This function emits necessary code to produce index
+/// in the JumpTable from switch case.
+void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT,
+ JumpTableHeader &JTH) {
+ // Subtract the lowest switch case value from the value being switched on and
+ // conditional branch to default mbb if the result is greater than the
+ // difference between smallest and largest cases.
+ SDValue SwitchOp = getValue(JTH.SValue);
+ MVT VT = SwitchOp.getValueType();
+ SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
+ DAG.getConstant(JTH.First, VT));
+
+ // The SDNode we just created, which holds the value being switched on minus
+ // the the smallest case value, needs to be copied to a virtual register so it
+ // can be used as an index into the jump table in a subsequent basic block.
+ // This value may be smaller or larger than the target's pointer type, and
+ // therefore require extension or truncating.
+ if (VT.bitsGT(TLI.getPointerTy()))
+ SwitchOp = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+ TLI.getPointerTy(), SUB);
+ else
+ SwitchOp = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+ TLI.getPointerTy(), SUB);
+
+ unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy());
+ SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
+ JumpTableReg, SwitchOp);
+ JT.Reg = JumpTableReg;
+
+ // Emit the range check for the jump table, and branch to the default block
+ // for the switch statement if the value being switched on exceeds the largest
+ // case in the switch.
+ SDValue CMP = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(SUB.getValueType()), SUB,
+ DAG.getConstant(JTH.Last-JTH.First,VT),
+ ISD::SETUGT);
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CurMBB;
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+ MVT::Other, CopyTo, CMP,
+ DAG.getBasicBlock(JT.Default));
+
+ if (JT.MBB == NextBlock)
+ DAG.setRoot(BrCond);
+ else
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond,
+ DAG.getBasicBlock(JT.MBB)));
+}
+
+/// visitBitTestHeader - This function emits necessary code to produce value
+/// suitable for "bit tests"
+void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) {
+ // Subtract the minimum value
+ SDValue SwitchOp = getValue(B.SValue);
+ MVT VT = SwitchOp.getValueType();
+ SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
+ DAG.getConstant(B.First, VT));
+
+ // Check range
+ SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(SUB.getValueType()),
+ SUB, DAG.getConstant(B.Range, VT),
+ ISD::SETUGT);
+
+ SDValue ShiftOp;
+ if (VT.bitsGT(TLI.getPointerTy()))
+ ShiftOp = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+ TLI.getPointerTy(), SUB);
+ else
+ ShiftOp = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+ TLI.getPointerTy(), SUB);
+
+ B.Reg = FuncInfo.MakeReg(TLI.getPointerTy());
+ SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
+ B.Reg, ShiftOp);
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CurMBB;
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ MachineBasicBlock* MBB = B.Cases[0].ThisBB;
+
+ CurMBB->addSuccessor(B.Default);
+ CurMBB->addSuccessor(MBB);
+
+ SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+ MVT::Other, CopyTo, RangeCmp,
+ DAG.getBasicBlock(B.Default));
+
+ if (MBB == NextBlock)
+ DAG.setRoot(BrRange);
+ else
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo,
+ DAG.getBasicBlock(MBB)));
+}
+
+/// visitBitTestCase - this function produces one "bit test"
+void SelectionDAGLowering::visitBitTestCase(MachineBasicBlock* NextMBB,
+ unsigned Reg,
+ BitTestCase &B) {
+ // Make desired shift
+ SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,
+ TLI.getPointerTy());
+ SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(),
+ TLI.getPointerTy(),
+ DAG.getConstant(1, TLI.getPointerTy()),
+ ShiftOp);
+
+ // Emit bit tests and jumps
+ SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
+ TLI.getPointerTy(), SwitchVal,
+ DAG.getConstant(B.Mask, TLI.getPointerTy()));
+ SDValue AndCmp = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(AndOp.getValueType()),
+ AndOp, DAG.getConstant(0, TLI.getPointerTy()),
+ ISD::SETNE);
+
+ CurMBB->addSuccessor(B.TargetBB);
+ CurMBB->addSuccessor(NextMBB);
+
+ SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ AndCmp, DAG.getBasicBlock(B.TargetBB));
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CurMBB;
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ if (NextMBB == NextBlock)
+ DAG.setRoot(BrAnd);
+ else
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd,
+ DAG.getBasicBlock(NextMBB)));
+}
+
+void SelectionDAGLowering::visitInvoke(InvokeInst &I) {
+ // Retrieve successors.
+ MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
+ MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+ const Value *Callee(I.getCalledValue());
+ if (isa<InlineAsm>(Callee))
+ visitInlineAsm(&I);
+ else
+ LowerCallTo(&I, getValue(Callee), false, LandingPad);
+
+ // If the value of the invoke is used outside of its defining block, make it
+ // available as a virtual register.
+ CopyToExportRegsIfNeeded(&I);
+
+ // Update successor info
+ CurMBB->addSuccessor(Return);
+ CurMBB->addSuccessor(LandingPad);
+
+ // Drop into normal successor.
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Return)));
+}
+
+void SelectionDAGLowering::visitUnwind(UnwindInst &I) {
+}
+
+/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
+/// small case ranges).
+bool SelectionDAGLowering::handleSmallSwitchRange(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default) {
+ Case& BackCase = *(CR.Range.second-1);
+
+ // Size is the number of Cases represented by this range.
+ size_t Size = CR.Range.second - CR.Range.first;
+ if (Size > 3)
+ return false;
+
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = CurMBB->getParent();
+
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CR.CaseBB;
+
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ // TODO: If any two of the cases has the same destination, and if one value
+ // is the same as the other, but has one bit unset that the other has set,
+ // use bit manipulation to do two compares at once. For example:
+ // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+
+ // Rearrange the case blocks so that the last one falls through if possible.
+ if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
+ // The last case block won't fall through into 'NextBlock' if we emit the
+ // branches in this order. See if rearranging a case value would help.
+ for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) {
+ if (I->BB == NextBlock) {
+ std::swap(*I, BackCase);
+ break;
+ }
+ }
+ }
+
+ // Create a CaseBlock record representing a conditional branch to
+ // the Case's target mbb if the value being switched on SV is equal
+ // to C.
+ MachineBasicBlock *CurBlock = CR.CaseBB;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
+ MachineBasicBlock *FallThrough;
+ if (I != E-1) {
+ FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock());
+ CurMF->insert(BBI, FallThrough);
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ } else {
+ // If the last case doesn't match, go to the default block.
+ FallThrough = Default;
+ }
+
+ Value *RHS, *LHS, *MHS;
+ ISD::CondCode CC;
+ if (I->High == I->Low) {
+ // This is just small small case range :) containing exactly 1 case
+ CC = ISD::SETEQ;
+ LHS = SV; RHS = I->High; MHS = NULL;
+ } else {
+ CC = ISD::SETLE;
+ LHS = I->Low; MHS = SV; RHS = I->High;
+ }
+ CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock);
+
+ // If emitting the first comparison, just call visitSwitchCase to emit the
+ // code into the current block. Otherwise, push the CaseBlock onto the
+ // vector to be later processed by SDISel, and insert the node's MBB
+ // before the next MBB.
+ if (CurBlock == CurMBB)
+ visitSwitchCase(CB);
+ else
+ SwitchCases.push_back(CB);
+
+ CurBlock = FallThrough;
+ }
+
+ return true;
+}
+
+static inline bool areJTsAllowed(const TargetLowering &TLI) {
+ return !DisableJumpTables &&
+ (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+ TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
+}
+
+static APInt ComputeRange(const APInt &First, const APInt &Last) {
+ APInt LastExt(Last), FirstExt(First);
+ uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
+ LastExt.sext(BitWidth); FirstExt.sext(BitWidth);
+ return (LastExt - FirstExt + 1ULL);
+}
+
+/// handleJTSwitchCase - Emit jumptable for current switch case range
+bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default) {
+ Case& FrontCase = *CR.Range.first;
+ Case& BackCase = *(CR.Range.second-1);
+
+ const APInt& First = cast<ConstantInt>(FrontCase.Low)->getValue();
+ const APInt& Last = cast<ConstantInt>(BackCase.High)->getValue();
+
+ size_t TSize = 0;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second;
+ I!=E; ++I)
+ TSize += I->size();
+
+ if (!areJTsAllowed(TLI) || TSize <= 3)
+ return false;
+
+ APInt Range = ComputeRange(First, Last);
+ double Density = (double)TSize / Range.roundToDouble();
+ if (Density < 0.4)
+ return false;
+
+ DEBUG(errs() << "Lowering jump table\n"
+ << "First entry: " << First << ". Last entry: " << Last << '\n'
+ << "Range: " << Range
+ << "Size: " << TSize << ". Density: " << Density << "\n\n");
+
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = CurMBB->getParent();
+
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CR.CaseBB;
+
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+ // Create a new basic block to hold the code for loading the address
+ // of the jump table, and jumping to it. Update successor information;
+ // we will either branch to the default case for the switch, or the jump
+ // table.
+ MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, JumpTableBB);
+ CR.CaseBB->addSuccessor(Default);
+ CR.CaseBB->addSuccessor(JumpTableBB);
+
+ // Build a vector of destination BBs, corresponding to each target
+ // of the jump table. If the value of the jump table slot corresponds to
+ // a case statement, push the case's BB onto the vector, otherwise, push
+ // the default BB.
+ std::vector<MachineBasicBlock*> DestBBs;
+ APInt TEI = First;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
+ const APInt& Low = cast<ConstantInt>(I->Low)->getValue();
+ const APInt& High = cast<ConstantInt>(I->High)->getValue();
+
+ if (Low.sle(TEI) && TEI.sle(High)) {
+ DestBBs.push_back(I->BB);
+ if (TEI==High)
+ ++I;
+ } else {
+ DestBBs.push_back(Default);
+ }
+ }
+
+ // Update successor info. Add one edge to each unique successor.
+ BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
+ for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
+ E = DestBBs.end(); I != E; ++I) {
+ if (!SuccsHandled[(*I)->getNumber()]) {
+ SuccsHandled[(*I)->getNumber()] = true;
+ JumpTableBB->addSuccessor(*I);
+ }
+ }
+
+ // Create a jump table index for this jump table, or return an existing
+ // one.
+ unsigned JTI = CurMF->getJumpTableInfo()->getJumpTableIndex(DestBBs);
+
+ // Set the jump table information so that we can codegen it as a second
+ // MachineBasicBlock
+ JumpTable JT(-1U, JTI, JumpTableBB, Default);
+ JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == CurMBB));
+ if (CR.CaseBB == CurMBB)
+ visitJumpTableHeader(JT, JTH);
+
+ JTCases.push_back(JumpTableBlock(JTH, JT));
+
+ return true;
+}
+
+/// handleBTSplitSwitchCase - emit comparison and split binary search tree into
+/// 2 subtrees.
+bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default) {
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = CurMBB->getParent();
+
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CR.CaseBB;
+
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ Case& FrontCase = *CR.Range.first;
+ Case& BackCase = *(CR.Range.second-1);
+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+ // Size is the number of Cases represented by this range.
+ unsigned Size = CR.Range.second - CR.Range.first;
+
+ const APInt& First = cast<ConstantInt>(FrontCase.Low)->getValue();
+ const APInt& Last = cast<ConstantInt>(BackCase.High)->getValue();
+ double FMetric = 0;
+ CaseItr Pivot = CR.Range.first + Size/2;
+
+ // Select optimal pivot, maximizing sum density of LHS and RHS. This will
+ // (heuristically) allow us to emit JumpTable's later.
+ size_t TSize = 0;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second;
+ I!=E; ++I)
+ TSize += I->size();
+
+ size_t LSize = FrontCase.size();
+ size_t RSize = TSize-LSize;
+ DEBUG(errs() << "Selecting best pivot: \n"
+ << "First: " << First << ", Last: " << Last <<'\n'
+ << "LSize: " << LSize << ", RSize: " << RSize << '\n');
+ for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
+ J!=E; ++I, ++J) {
+ const APInt& LEnd = cast<ConstantInt>(I->High)->getValue();
+ const APInt& RBegin = cast<ConstantInt>(J->Low)->getValue();
+ APInt Range = ComputeRange(LEnd, RBegin);
+ assert((Range - 2ULL).isNonNegative() &&
+ "Invalid case distance");
+ double LDensity = (double)LSize / (LEnd - First + 1ULL).roundToDouble();
+ double RDensity = (double)RSize / (Last - RBegin + 1ULL).roundToDouble();
+ double Metric = Range.logBase2()*(LDensity+RDensity);
+ // Should always split in some non-trivial place
+ DEBUG(errs() <<"=>Step\n"
+ << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
+ << "LDensity: " << LDensity
+ << ", RDensity: " << RDensity << '\n'
+ << "Metric: " << Metric << '\n');
+ if (FMetric < Metric) {
+ Pivot = J;
+ FMetric = Metric;
+ DEBUG(errs() << "Current metric set to: " << FMetric << '\n');
+ }
+
+ LSize += J->size();
+ RSize -= J->size();
+ }
+ if (areJTsAllowed(TLI)) {
+ // If our case is dense we *really* should handle it earlier!
+ assert((FMetric > 0) && "Should handle dense range earlier!");
+ } else {
+ Pivot = CR.Range.first + Size/2;
+ }
+
+ CaseRange LHSR(CR.Range.first, Pivot);
+ CaseRange RHSR(Pivot, CR.Range.second);
+ Constant *C = Pivot->Low;
+ MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
+
+ // We know that we branch to the LHS if the Value being switched on is
+ // less than the Pivot value, C. We use this to optimize our binary
+ // tree a bit, by recognizing that if SV is greater than or equal to the
+ // LHS's Case Value, and that Case Value is exactly one less than the
+ // Pivot's Value, then we can branch directly to the LHS's Target,
+ // rather than creating a leaf node for it.
+ if ((LHSR.second - LHSR.first) == 1 &&
+ LHSR.first->High == CR.GE &&
+ cast<ConstantInt>(C)->getValue() ==
+ (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
+ TrueBB = LHSR.first->BB;
+ } else {
+ TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, TrueBB);
+ WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ }
+
+ // Similar to the optimization above, if the Value being switched on is
+ // known to be less than the Constant CR.LT, and the current Case Value
+ // is CR.LT - 1, then we can branch directly to the target block for
+ // the current Case Value, rather than emitting a RHS leaf node for it.
+ if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
+ cast<ConstantInt>(RHSR.first->Low)->getValue() ==
+ (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
+ FalseBB = RHSR.first->BB;
+ } else {
+ FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, FalseBB);
+ WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ }
+
+ // Create a CaseBlock record representing a conditional branch to
+ // the LHS node if the value being switched on SV is less than C.
+ // Otherwise, branch to LHS.
+ CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
+
+ if (CR.CaseBB == CurMBB)
+ visitSwitchCase(CB);
+ else
+ SwitchCases.push_back(CB);
+
+ return true;
+}
+
+/// handleBitTestsSwitchCase - if current case range has few destination and
+/// range span less, than machine word bitwidth, encode case range into series
+/// of masks and emit bit tests with these masks.
+bool SelectionDAGLowering::handleBitTestsSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default){
+ unsigned IntPtrBits = TLI.getPointerTy().getSizeInBits();
+
+ Case& FrontCase = *CR.Range.first;
+ Case& BackCase = *(CR.Range.second-1);
+
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = CurMBB->getParent();
+
+ // If target does not have legal shift left, do not emit bit tests at all.
+ if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy()))
+ return false;
+
+ size_t numCmps = 0;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second;
+ I!=E; ++I) {
+ // Single case counts one, case range - two.
+ numCmps += (I->Low == I->High ? 1 : 2);
+ }
+
+ // Count unique destinations
+ SmallSet<MachineBasicBlock*, 4> Dests;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+ Dests.insert(I->BB);
+ if (Dests.size() > 3)
+ // Don't bother the code below, if there are too much unique destinations
+ return false;
+ }
+ DEBUG(errs() << "Total number of unique destinations: " << Dests.size() << '\n'
+ << "Total number of comparisons: " << numCmps << '\n');
+
+ // Compute span of values.
+ const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();
+ const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
+ APInt cmpRange = maxValue - minValue;
+
+ DEBUG(errs() << "Compare range: " << cmpRange << '\n'
+ << "Low bound: " << minValue << '\n'
+ << "High bound: " << maxValue << '\n');
+
+ if (cmpRange.uge(APInt(cmpRange.getBitWidth(), IntPtrBits)) ||
+ (!(Dests.size() == 1 && numCmps >= 3) &&
+ !(Dests.size() == 2 && numCmps >= 5) &&
+ !(Dests.size() >= 3 && numCmps >= 6)))
+ return false;
+
+ DEBUG(errs() << "Emitting bit tests\n");
+ APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
+
+ // Optimize the case where all the case values fit in a
+ // word without having to subtract minValue. In this case,
+ // we can optimize away the subtraction.
+ if (minValue.isNonNegative() &&
+ maxValue.slt(APInt(maxValue.getBitWidth(), IntPtrBits))) {
+ cmpRange = maxValue;
+ } else {
+ lowBound = minValue;
+ }
+
+ CaseBitsVector CasesBits;
+ unsigned i, count = 0;
+
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+ MachineBasicBlock* Dest = I->BB;
+ for (i = 0; i < count; ++i)
+ if (Dest == CasesBits[i].BB)
+ break;
+
+ if (i == count) {
+ assert((count < 3) && "Too much destinations to test!");
+ CasesBits.push_back(CaseBits(0, Dest, 0));
+ count++;
+ }
+
+ const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue();
+ const APInt& highValue = cast<ConstantInt>(I->High)->getValue();
+
+ uint64_t lo = (lowValue - lowBound).getZExtValue();
+ uint64_t hi = (highValue - lowBound).getZExtValue();
+
+ for (uint64_t j = lo; j <= hi; j++) {
+ CasesBits[i].Mask |= 1ULL << j;
+ CasesBits[i].Bits++;
+ }
+
+ }
+ std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
+
+ BitTestInfo BTC;
+
+ // Figure out which block is immediately after the current one.
+ MachineFunction::iterator BBI = CR.CaseBB;
+ ++BBI;
+
+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+ DEBUG(errs() << "Cases:\n");
+ for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
+ DEBUG(errs() << "Mask: " << CasesBits[i].Mask
+ << ", Bits: " << CasesBits[i].Bits
+ << ", BB: " << CasesBits[i].BB << '\n');
+
+ MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, CaseBB);
+ BTC.push_back(BitTestCase(CasesBits[i].Mask,
+ CaseBB,
+ CasesBits[i].BB));
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ }
+
+ BitTestBlock BTB(lowBound, cmpRange, SV,
+ -1U, (CR.CaseBB == CurMBB),
+ CR.CaseBB, Default, BTC);
+
+ if (CR.CaseBB == CurMBB)
+ visitBitTestHeader(BTB);
+
+ BitTestCases.push_back(BTB);
+
+ return true;
+}
+
+
+/// Clusterify - Transform simple list of Cases into list of CaseRange's
+size_t SelectionDAGLowering::Clusterify(CaseVector& Cases,
+ const SwitchInst& SI) {
+ size_t numCmps = 0;
+
+ // Start with "simple" cases
+ for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
+ MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)];
+ Cases.push_back(Case(SI.getSuccessorValue(i),
+ SI.getSuccessorValue(i),
+ SMBB));
+ }
+ std::sort(Cases.begin(), Cases.end(), CaseCmp());
+
+ // Merge case into clusters
+ if (Cases.size() >= 2)
+ // Must recompute end() each iteration because it may be
+ // invalidated by erase if we hold on to it
+ for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) {
+ const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
+ const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
+ MachineBasicBlock* nextBB = J->BB;
+ MachineBasicBlock* currentBB = I->BB;
+
+ // If the two neighboring cases go to the same destination, merge them
+ // into a single case.
+ if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
+ I->High = J->High;
+ J = Cases.erase(J);
+ } else {
+ I = J++;
+ }
+ }
+
+ for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+ if (I->Low != I->High)
+ // A range counts double, since it requires two compares.
+ ++numCmps;
+ }
+
+ return numCmps;
+}
+
+void SelectionDAGLowering::visitSwitch(SwitchInst &SI) {
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CurMBB;
+
+ MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
+
+ // If there is only the default destination, branch to it if it is not the
+ // next basic block. Otherwise, just fall through.
+ if (SI.getNumOperands() == 2) {
+ // Update machine-CFG edges.
+
+ // If this is not a fall-through branch, emit the branch.
+ CurMBB->addSuccessor(Default);
+ if (Default != NextBlock)
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Default)));
+ return;
+ }
+
+ // If there are any non-default case statements, create a vector of Cases
+ // representing each one, and sort the vector so that we can efficiently
+ // create a binary search tree from them.
+ CaseVector Cases;
+ size_t numCmps = Clusterify(Cases, SI);
+ DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size()
+ << ". Total compares: " << numCmps << '\n');
+ numCmps = 0;
+
+ // Get the Value to be switched on and default basic blocks, which will be
+ // inserted into CaseBlock records, representing basic blocks in the binary
+ // search tree.
+ Value *SV = SI.getOperand(0);
+
+ // Push the initial CaseRec onto the worklist
+ CaseRecVector WorkList;
+ WorkList.push_back(CaseRec(CurMBB,0,0,CaseRange(Cases.begin(),Cases.end())));
+
+ while (!WorkList.empty()) {
+ // Grab a record representing a case range to process off the worklist
+ CaseRec CR = WorkList.back();
+ WorkList.pop_back();
+
+ if (handleBitTestsSwitchCase(CR, WorkList, SV, Default))
+ continue;
+
+ // If the range has few cases (two or less) emit a series of specific
+ // tests.
+ if (handleSmallSwitchRange(CR, WorkList, SV, Default))
+ continue;
+
+ // If the switch has more than 5 blocks, and at least 40% dense, and the
+ // target supports indirect branches, then emit a jump table rather than
+ // lowering the switch to a binary tree of conditional branches.
+ if (handleJTSwitchCase(CR, WorkList, SV, Default))
+ continue;
+
+ // Emit binary tree. We need to pick a pivot, and push left and right ranges
+ // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
+ handleBTSplitSwitchCase(CR, WorkList, SV, Default);
+ }
+}
+
+
+void SelectionDAGLowering::visitSub(User &I) {
+ // -0.0 - X --> fneg
+ const Type *Ty = I.getType();
+ if (isa<VectorType>(Ty)) {
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
+ const VectorType *DestTy = cast<VectorType>(I.getType());
+ const Type *ElTy = DestTy->getElementType();
+ if (ElTy->isFloatingPoint()) {
+ unsigned VL = DestTy->getNumElements();
+ std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy));
+ Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
+ if (CV == CNZ) {
+ SDValue Op2 = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+ Op2.getValueType(), Op2));
+ return;
+ }
+ }
+ }
+ }
+ if (Ty->isFloatingPoint()) {
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
+ if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {
+ SDValue Op2 = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+ Op2.getValueType(), Op2));
+ return;
+ }
+ }
+
+ visitBinary(I, Ty->isFPOrFPVector() ? ISD::FSUB : ISD::SUB);
+}
+
+void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) {
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+
+ setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(),
+ Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) {
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ if (!isa<VectorType>(I.getType()) &&
+ Op2.getValueType() != TLI.getShiftAmountTy()) {
+ // If the operand is smaller than the shift count type, promote it.
+ if (TLI.getShiftAmountTy().bitsGT(Op2.getValueType()))
+ Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
+ TLI.getShiftAmountTy(), Op2);
+ // If the operand is larger than the shift count type but the shift
+ // count type has enough bits to represent any shift value, truncate
+ // it now. This is a common case and it exposes the truncate to
+ // optimization early.
+ else if (TLI.getShiftAmountTy().getSizeInBits() >=
+ Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
+ Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+ TLI.getShiftAmountTy(), Op2);
+ // Otherwise we'll need to temporarily settle for some other
+ // convenient type; type legalization will make adjustments as
+ // needed.
+ else if (TLI.getPointerTy().bitsLT(Op2.getValueType()))
+ Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+ TLI.getPointerTy(), Op2);
+ else if (TLI.getPointerTy().bitsGT(Op2.getValueType()))
+ Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
+ TLI.getPointerTy(), Op2);
+ }
+
+ setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
+ Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGLowering::visitICmp(User &I) {
+ ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(&I))
+ predicate = IC->getPredicate();
+ else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
+ predicate = ICmpInst::Predicate(IC->getPredicate());
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ ISD::CondCode Opcode = getICmpCondCode(predicate);
+ setValue(&I, DAG.getSetCC(getCurDebugLoc(),MVT::i1, Op1, Op2, Opcode));
+}
+
+void SelectionDAGLowering::visitFCmp(User &I) {
+ FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
+ if (FCmpInst *FC = dyn_cast<FCmpInst>(&I))
+ predicate = FC->getPredicate();
+ else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+ predicate = FCmpInst::Predicate(FC->getPredicate());
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ ISD::CondCode Condition = getFCmpCondCode(predicate);
+ setValue(&I, DAG.getSetCC(getCurDebugLoc(), MVT::i1, Op1, Op2, Condition));
+}
+
+void SelectionDAGLowering::visitVICmp(User &I) {
+ ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
+ if (VICmpInst *IC = dyn_cast<VICmpInst>(&I))
+ predicate = IC->getPredicate();
+ else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
+ predicate = ICmpInst::Predicate(IC->getPredicate());
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ ISD::CondCode Opcode = getICmpCondCode(predicate);
+ setValue(&I, DAG.getVSetCC(getCurDebugLoc(), Op1.getValueType(),
+ Op1, Op2, Opcode));
+}
+
+void SelectionDAGLowering::visitVFCmp(User &I) {
+ FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
+ if (VFCmpInst *FC = dyn_cast<VFCmpInst>(&I))
+ predicate = FC->getPredicate();
+ else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+ predicate = FCmpInst::Predicate(FC->getPredicate());
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ ISD::CondCode Condition = getFCmpCondCode(predicate);
+ MVT DestVT = TLI.getValueType(I.getType());
+
+ setValue(&I, DAG.getVSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
+}
+
+void SelectionDAGLowering::visitSelect(User &I) {
+ SmallVector<MVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I.getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues != 0) {
+ SmallVector<SDValue, 4> Values(NumValues);
+ SDValue Cond = getValue(I.getOperand(0));
+ SDValue TrueVal = getValue(I.getOperand(1));
+ SDValue FalseVal = getValue(I.getOperand(2));
+
+ for (unsigned i = 0; i != NumValues; ++i)
+ Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),
+ TrueVal.getValueType(), Cond,
+ SDValue(TrueVal.getNode(), TrueVal.getResNo() + i),
+ SDValue(FalseVal.getNode(), FalseVal.getResNo() + i));
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&ValueVTs[0], NumValues),
+ &Values[0], NumValues));
+ }
+}
+
+
+void SelectionDAGLowering::visitTrunc(User &I) {
+ // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
+ SDValue N = getValue(I.getOperand(0));
+ MVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGLowering::visitZExt(User &I) {
+ // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+ // ZExt also can't be a cast to bool for same reason. So, nothing much to do
+ SDValue N = getValue(I.getOperand(0));
+ MVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGLowering::visitSExt(User &I) {
+ // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+ // SExt also can't be a cast to bool for same reason. So, nothing much to do
+ SDValue N = getValue(I.getOperand(0));
+ MVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGLowering::visitFPTrunc(User &I) {
+ // FPTrunc is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ MVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
+ DestVT, N, DAG.getIntPtrConstant(0)));
+}
+
+void SelectionDAGLowering::visitFPExt(User &I){
+ // FPTrunc is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ MVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGLowering::visitFPToUI(User &I) {
+ // FPToUI is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ MVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGLowering::visitFPToSI(User &I) {
+ // FPToSI is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ MVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGLowering::visitUIToFP(User &I) {
+ // UIToFP is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ MVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGLowering::visitSIToFP(User &I){
+ // SIToFP is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ MVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGLowering::visitPtrToInt(User &I) {
+ // What to do depends on the size of the integer and the size of the pointer.
+ // We can either truncate, zero extend, or no-op, accordingly.
+ SDValue N = getValue(I.getOperand(0));
+ MVT SrcVT = N.getValueType();
+ MVT DestVT = TLI.getValueType(I.getType());
+ SDValue Result;
+ if (DestVT.bitsLT(SrcVT))
+ Result = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N);
+ else
+ // Note: ZERO_EXTEND can handle cases where the sizes are equal too
+ Result = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N);
+ setValue(&I, Result);
+}
+
+void SelectionDAGLowering::visitIntToPtr(User &I) {
+ // What to do depends on the size of the integer and the size of the pointer.
+ // We can either truncate, zero extend, or no-op, accordingly.
+ SDValue N = getValue(I.getOperand(0));
+ MVT SrcVT = N.getValueType();
+ MVT DestVT = TLI.getValueType(I.getType());
+ if (DestVT.bitsLT(SrcVT))
+ setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
+ else
+ // Note: ZERO_EXTEND can handle cases where the sizes are equal too
+ setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+ DestVT, N));
+}
+
+void SelectionDAGLowering::visitBitCast(User &I) {
+ SDValue N = getValue(I.getOperand(0));
+ MVT DestVT = TLI.getValueType(I.getType());
+
+ // BitCast assures us that source and destination are the same size so this
+ // is either a BIT_CONVERT or a no-op.
+ if (DestVT != N.getValueType())
+ setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ DestVT, N)); // convert types
+ else
+ setValue(&I, N); // noop cast.
+}
+
+void SelectionDAGLowering::visitInsertElement(User &I) {
+ SDValue InVec = getValue(I.getOperand(0));
+ SDValue InVal = getValue(I.getOperand(1));
+ SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+ TLI.getPointerTy(),
+ getValue(I.getOperand(2)));
+
+ setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(),
+ TLI.getValueType(I.getType()),
+ InVec, InVal, InIdx));
+}
+
+void SelectionDAGLowering::visitExtractElement(User &I) {
+ SDValue InVec = getValue(I.getOperand(0));
+ SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+ TLI.getPointerTy(),
+ getValue(I.getOperand(1)));
+ setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+ TLI.getValueType(I.getType()), InVec, InIdx));
+}
+
+
+// Utility for visitShuffleVector - Returns true if the mask is mask starting
+// from SIndx and increasing to the element length (undefs are allowed).
+static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
+ unsigned MaskNumElts = Mask.size();
+ for (unsigned i = 0; i != MaskNumElts; ++i)
+ if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx)))
+ return false;
+ return true;
+}
+
+void SelectionDAGLowering::visitShuffleVector(User &I) {
+ SmallVector<int, 8> Mask;
+ SDValue Src1 = getValue(I.getOperand(0));
+ SDValue Src2 = getValue(I.getOperand(1));
+
+ // Convert the ConstantVector mask operand into an array of ints, with -1
+ // representing undef values.
+ SmallVector<Constant*, 8> MaskElts;
+ cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
+ unsigned MaskNumElts = MaskElts.size();
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ if (isa<UndefValue>(MaskElts[i]))
+ Mask.push_back(-1);
+ else
+ Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
+ }
+
+ MVT VT = TLI.getValueType(I.getType());
+ MVT SrcVT = Src1.getValueType();
+ unsigned SrcNumElts = SrcVT.getVectorNumElements();
+
+ if (SrcNumElts == MaskNumElts) {
+ setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+ &Mask[0]));
+ return;
+ }
+
+ // Normalize the shuffle vector since mask and vector length don't match.
+ if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
+ // Mask is longer than the source vectors and is a multiple of the source
+ // vectors. We can use concatenate vector to make the mask and vectors
+ // lengths match.
+ if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
+ // The shuffle is concatenating two vectors together.
+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+ VT, Src1, Src2));
+ return;
+ }
+
+ // Pad both vectors with undefs to make them the same length as the mask.
+ unsigned NumConcat = MaskNumElts / SrcNumElts;
+ bool Src1U = Src1.getOpcode() == ISD::UNDEF;
+ bool Src2U = Src2.getOpcode() == ISD::UNDEF;
+ SDValue UndefVal = DAG.getUNDEF(SrcVT);
+
+ SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
+ SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
+ MOps1[0] = Src1;
+ MOps2[0] = Src2;
+
+ Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
+ getCurDebugLoc(), VT,
+ &MOps1[0], NumConcat);
+ Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
+ getCurDebugLoc(), VT,
+ &MOps2[0], NumConcat);
+
+ // Readjust mask for new input vector length.
+ SmallVector<int, 8> MappedOps;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ if (Idx < (int)SrcNumElts)
+ MappedOps.push_back(Idx);
+ else
+ MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
+ }
+ setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+ &MappedOps[0]));
+ return;
+ }
+
+ if (SrcNumElts > MaskNumElts) {
+ // Analyze the access pattern of the vector to see if we can extract
+ // two subvectors and do the shuffle. The analysis is done by calculating
+ // the range of elements the mask access on both vectors.
+ int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};
+ int MaxRange[2] = {-1, -1};
+
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ int Input = 0;
+ if (Idx < 0)
+ continue;
+
+ if (Idx >= (int)SrcNumElts) {
+ Input = 1;
+ Idx -= SrcNumElts;
+ }
+ if (Idx > MaxRange[Input])
+ MaxRange[Input] = Idx;
+ if (Idx < MinRange[Input])
+ MinRange[Input] = Idx;
+ }
+
+ // Check if the access is smaller than the vector size and can we find
+ // a reasonable extract index.
+ int RangeUse[2] = { 2, 2 }; // 0 = Unused, 1 = Extract, 2 = Can not Extract.
+ int StartIdx[2]; // StartIdx to extract from
+ for (int Input=0; Input < 2; ++Input) {
+ if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) {
+ RangeUse[Input] = 0; // Unused
+ StartIdx[Input] = 0;
+ } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) {
+ // Fits within range but we should see if we can find a good
+ // start index that is a multiple of the mask length.
+ if (MaxRange[Input] < (int)MaskNumElts) {
+ RangeUse[Input] = 1; // Extract from beginning of the vector
+ StartIdx[Input] = 0;
+ } else {
+ StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
+ if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
+ StartIdx[Input] + MaskNumElts < SrcNumElts)
+ RangeUse[Input] = 1; // Extract from a multiple of the mask length.
+ }
+ }
+ }
+
+ if (RangeUse[0] == 0 && RangeUse[0] == 0) {
+ setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
+ return;
+ }
+ else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
+ // Extract appropriate subvector and generate a vector shuffle
+ for (int Input=0; Input < 2; ++Input) {
+ SDValue& Src = Input == 0 ? Src1 : Src2;
+ if (RangeUse[Input] == 0) {
+ Src = DAG.getUNDEF(VT);
+ } else {
+ Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT,
+ Src, DAG.getIntPtrConstant(StartIdx[Input]));
+ }
+ }
+ // Calculate new mask.
+ SmallVector<int, 8> MappedOps;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ if (Idx < 0)
+ MappedOps.push_back(Idx);
+ else if (Idx < (int)SrcNumElts)
+ MappedOps.push_back(Idx - StartIdx[0]);
+ else
+ MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
+ }
+ setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+ &MappedOps[0]));
+ return;
+ }
+ }
+
+ // We can't use either concat vectors or extract subvectors so fall back to
+ // replacing the shuffle with extract and build vector.
+ // to insert and build vector.
+ MVT EltVT = VT.getVectorElementType();
+ MVT PtrVT = TLI.getPointerTy();
+ SmallVector<SDValue,8> Ops;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ if (Mask[i] < 0) {
+ Ops.push_back(DAG.getUNDEF(EltVT));
+ } else {
+ int Idx = Mask[i];
+ if (Idx < (int)SrcNumElts)
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+ EltVT, Src1, DAG.getConstant(Idx, PtrVT)));
+ else
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+ EltVT, Src2,
+ DAG.getConstant(Idx - SrcNumElts, PtrVT)));
+ }
+ }
+ setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+ VT, &Ops[0], Ops.size()));
+}
+
+void SelectionDAGLowering::visitInsertValue(InsertValueInst &I) {
+ const Value *Op0 = I.getOperand(0);
+ const Value *Op1 = I.getOperand(1);
+ const Type *AggTy = I.getType();
+ const Type *ValTy = Op1->getType();
+ bool IntoUndef = isa<UndefValue>(Op0);
+ bool FromUndef = isa<UndefValue>(Op1);
+
+ unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
+ I.idx_begin(), I.idx_end());
+
+ SmallVector<MVT, 4> AggValueVTs;
+ ComputeValueVTs(TLI, AggTy, AggValueVTs);
+ SmallVector<MVT, 4> ValValueVTs;
+ ComputeValueVTs(TLI, ValTy, ValValueVTs);
+
+ unsigned NumAggValues = AggValueVTs.size();
+ unsigned NumValValues = ValValueVTs.size();
+ SmallVector<SDValue, 4> Values(NumAggValues);
+
+ SDValue Agg = getValue(Op0);
+ SDValue Val = getValue(Op1);
+ unsigned i = 0;
+ // Copy the beginning value(s) from the original aggregate.
+ for (; i != LinearIndex; ++i)
+ Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+ // Copy values from the inserted value(s).
+ for (; i != LinearIndex + NumValValues; ++i)
+ Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+ // Copy remaining value(s) from the original aggregate.
+ for (; i != NumAggValues; ++i)
+ Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&AggValueVTs[0], NumAggValues),
+ &Values[0], NumAggValues));
+}
+
+void SelectionDAGLowering::visitExtractValue(ExtractValueInst &I) {
+ const Value *Op0 = I.getOperand(0);
+ const Type *AggTy = Op0->getType();
+ const Type *ValTy = I.getType();
+ bool OutOfUndef = isa<UndefValue>(Op0);
+
+ unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
+ I.idx_begin(), I.idx_end());
+
+ SmallVector<MVT, 4> ValValueVTs;
+ ComputeValueVTs(TLI, ValTy, ValValueVTs);
+
+ unsigned NumValValues = ValValueVTs.size();
+ SmallVector<SDValue, 4> Values(NumValValues);
+
+ SDValue Agg = getValue(Op0);
+ // Copy out the selected value(s).
+ for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
+ Values[i - LinearIndex] =
+ OutOfUndef ?
+ DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&ValValueVTs[0], NumValValues),
+ &Values[0], NumValValues));
+}
+
+
+void SelectionDAGLowering::visitGetElementPtr(User &I) {
+ SDValue N = getValue(I.getOperand(0));
+ const Type *Ty = I.getOperand(0)->getType();
+
+ for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end();
+ OI != E; ++OI) {
+ Value *Idx = *OI;
+ if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+ unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+ if (Field) {
+ // N = N + Offset
+ uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
+ N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
+ DAG.getIntPtrConstant(Offset));
+ }
+ Ty = StTy->getElementType(Field);
+ } else {
+ Ty = cast<SequentialType>(Ty)->getElementType();
+
+ // If this is a constant subscript, handle it quickly.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->getZExtValue() == 0) continue;
+ uint64_t Offs =
+ TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+ SDValue OffsVal;
+ unsigned PtrBits = TLI.getPointerTy().getSizeInBits();
+ if (PtrBits < 64) {
+ OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+ TLI.getPointerTy(),
+ DAG.getConstant(Offs, MVT::i64));
+ } else
+ OffsVal = DAG.getIntPtrConstant(Offs);
+ N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
+ OffsVal);
+ continue;
+ }
+
+ // N = N + Idx * ElementSize;
+ uint64_t ElementSize = TD->getTypeAllocSize(Ty);
+ SDValue IdxN = getValue(Idx);
+
+ // If the index is smaller or larger than intptr_t, truncate or extend
+ // it.
+ if (IdxN.getValueType().bitsLT(N.getValueType()))
+ IdxN = DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(),
+ N.getValueType(), IdxN);
+ else if (IdxN.getValueType().bitsGT(N.getValueType()))
+ IdxN = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+ N.getValueType(), IdxN);
+
+ // If this is a multiply by a power of two, turn it into a shl
+ // immediately. This is a very common case.
+ if (ElementSize != 1) {
+ if (isPowerOf2_64(ElementSize)) {
+ unsigned Amt = Log2_64(ElementSize);
+ IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
+ N.getValueType(), IdxN,
+ DAG.getConstant(Amt, TLI.getPointerTy()));
+ } else {
+ SDValue Scale = DAG.getIntPtrConstant(ElementSize);
+ IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
+ N.getValueType(), IdxN, Scale);
+ }
+ }
+
+ N = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ N.getValueType(), N, IdxN);
+ }
+ }
+ setValue(&I, N);
+}
+
+void SelectionDAGLowering::visitAlloca(AllocaInst &I) {
+ // If this is a fixed sized alloca in the entry block of the function,
+ // allocate it statically on the stack.
+ if (FuncInfo.StaticAllocaMap.count(&I))
+ return; // getValue will auto-populate this.
+
+ const Type *Ty = I.getAllocatedType();
+ uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+ unsigned Align =
+ std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+ I.getAlignment());
+
+ SDValue AllocSize = getValue(I.getArraySize());
+
+ AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), AllocSize.getValueType(),
+ AllocSize,
+ DAG.getConstant(TySize, AllocSize.getValueType()));
+
+
+
+ MVT IntPtr = TLI.getPointerTy();
+ if (IntPtr.bitsLT(AllocSize.getValueType()))
+ AllocSize = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+ IntPtr, AllocSize);
+ else if (IntPtr.bitsGT(AllocSize.getValueType()))
+ AllocSize = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+ IntPtr, AllocSize);
+
+ // Handle alignment. If the requested alignment is less than or equal to
+ // the stack alignment, ignore it. If the size is greater than or equal to
+ // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
+ unsigned StackAlign =
+ TLI.getTargetMachine().getFrameInfo()->getStackAlignment();
+ if (Align <= StackAlign)
+ Align = 0;
+
+ // Round the size of the allocation up to the stack alignment size
+ // by add SA-1 to the size.
+ AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ AllocSize.getValueType(), AllocSize,
+ DAG.getIntPtrConstant(StackAlign-1));
+ // Mask out the low bits for alignment purposes.
+ AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(),
+ AllocSize.getValueType(), AllocSize,
+ DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1)));
+
+ SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) };
+ SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
+ SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(),
+ VTs, Ops, 3);
+ setValue(&I, DSA);
+ DAG.setRoot(DSA.getValue(1));
+
+ // Inform the Frame Information that we have just allocated a variable-sized
+ // object.
+ CurMBB->getParent()->getFrameInfo()->CreateVariableSizedObject();
+}
+
+void SelectionDAGLowering::visitLoad(LoadInst &I) {
+ const Value *SV = I.getOperand(0);
+ SDValue Ptr = getValue(SV);
+
+ const Type *Ty = I.getType();
+ bool isVolatile = I.isVolatile();
+ unsigned Alignment = I.getAlignment();
+
+ SmallVector<MVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ return;
+
+ SDValue Root;
+ bool ConstantMemory = false;
+ if (I.isVolatile())
+ // Serialize volatile loads with other side effects.
+ Root = getRoot();
+ else if (AA->pointsToConstantMemory(SV)) {
+ // Do not serialize (non-volatile) loads of constant memory with anything.
+ Root = DAG.getEntryNode();
+ ConstantMemory = true;
+ } else {
+ // Do not serialize non-volatile loads against each other.
+ Root = DAG.getRoot();
+ }
+
+ SmallVector<SDValue, 4> Values(NumValues);
+ SmallVector<SDValue, 4> Chains(NumValues);
+ MVT PtrVT = Ptr.getValueType();
+ for (unsigned i = 0; i != NumValues; ++i) {
+ SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
+ DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ PtrVT, Ptr,
+ DAG.getConstant(Offsets[i], PtrVT)),
+ SV, Offsets[i],
+ isVolatile, Alignment);
+ Values[i] = L;
+ Chains[i] = L.getValue(1);
+ }
+
+ if (!ConstantMemory) {
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other,
+ &Chains[0], NumValues);
+ if (isVolatile)
+ DAG.setRoot(Chain);
+ else
+ PendingLoads.push_back(Chain);
+ }
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&ValueVTs[0], NumValues),
+ &Values[0], NumValues));
+}
+
+
+void SelectionDAGLowering::visitStore(StoreInst &I) {
+ Value *SrcV = I.getOperand(0);
+ Value *PtrV = I.getOperand(1);
+
+ SmallVector<MVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ return;
+
+ // Get the lowered operands. Note that we do this after
+ // checking if NumResults is zero, because with zero results
+ // the operands won't have values in the map.
+ SDValue Src = getValue(SrcV);
+ SDValue Ptr = getValue(PtrV);
+
+ SDValue Root = getRoot();
+ SmallVector<SDValue, 4> Chains(NumValues);
+ MVT PtrVT = Ptr.getValueType();
+ bool isVolatile = I.isVolatile();
+ unsigned Alignment = I.getAlignment();
+ for (unsigned i = 0; i != NumValues; ++i)
+ Chains[i] = DAG.getStore(Root, getCurDebugLoc(),
+ SDValue(Src.getNode(), Src.getResNo() + i),
+ DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ PtrVT, Ptr,
+ DAG.getConstant(Offsets[i], PtrVT)),
+ PtrV, Offsets[i],
+ isVolatile, Alignment);
+
+ DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], NumValues));
+}
+
+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
+/// node.
+void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I,
+ unsigned Intrinsic) {
+ bool HasChain = !I.doesNotAccessMemory();
+ bool OnlyLoad = HasChain && I.onlyReadsMemory();
+
+ // Build the operand list.
+ SmallVector<SDValue, 8> Ops;
+ if (HasChain) { // If this intrinsic has side-effects, chainify it.
+ if (OnlyLoad) {
+ // We don't need to serialize loads against other loads.
+ Ops.push_back(DAG.getRoot());
+ } else {
+ Ops.push_back(getRoot());
+ }
+ }
+
+ // Info is set by getTgtMemInstrinsic
+ TargetLowering::IntrinsicInfo Info;
+ bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
+
+ // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
+ if (!IsTgtIntrinsic)
+ Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
+
+ // Add all operands of the call to the operand list.
+ for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
+ SDValue Op = getValue(I.getOperand(i));
+ assert(TLI.isTypeLegal(Op.getValueType()) &&
+ "Intrinsic uses a non-legal type?");
+ Ops.push_back(Op);
+ }
+
+ std::vector<MVT> VTArray;
+ if (I.getType() != Type::VoidTy) {
+ MVT VT = TLI.getValueType(I.getType());
+ if (VT.isVector()) {
+ const VectorType *DestTy = cast<VectorType>(I.getType());
+ MVT EltVT = TLI.getValueType(DestTy->getElementType());
+
+ VT = MVT::getVectorVT(EltVT, DestTy->getNumElements());
+ assert(VT != MVT::Other && "Intrinsic uses a non-legal type?");
+ }
+
+ assert(TLI.isTypeLegal(VT) && "Intrinsic uses a non-legal type?");
+ VTArray.push_back(VT);
+ }
+ if (HasChain)
+ VTArray.push_back(MVT::Other);
+
+ SDVTList VTs = DAG.getVTList(&VTArray[0], VTArray.size());
+
+ // Create the node.
+ SDValue Result;
+ if (IsTgtIntrinsic) {
+ // This is target intrinsic that touches memory
+ Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size(),
+ Info.memVT, Info.ptrVal, Info.offset,
+ Info.align, Info.vol,
+ Info.readMem, Info.writeMem);
+ }
+ else if (!HasChain)
+ Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size());
+ else if (I.getType() != Type::VoidTy)
+ Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size());
+ else
+ Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size());
+
+ if (HasChain) {
+ SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
+ if (OnlyLoad)
+ PendingLoads.push_back(Chain);
+ else
+ DAG.setRoot(Chain);
+ }
+ if (I.getType() != Type::VoidTy) {
+ if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
+ MVT VT = TLI.getValueType(PTy);
+ Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);
+ }
+ setValue(&I, Result);
+ }
+}
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+static GlobalVariable *ExtractTypeInfo(Value *V) {
+ V = V->stripPointerCasts();
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
+ assert ((GV || isa<ConstantPointerNull>(V)) &&
+ "TypeInfo must be a global variable or NULL");
+ return GV;
+}
+
+namespace llvm {
+
+/// AddCatchInfo - Extract the personality and type infos from an eh.selector
+/// call, and add them to the specified machine basic block.
+void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI,
+ MachineBasicBlock *MBB) {
+ // Inform the MachineModuleInfo of the personality for this landing pad.
+ ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2));
+ assert(CE->getOpcode() == Instruction::BitCast &&
+ isa<Function>(CE->getOperand(0)) &&
+ "Personality should be a function");
+ MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0)));
+
+ // Gather all the type infos for this landing pad and pass them along to
+ // MachineModuleInfo.
+ std::vector<GlobalVariable *> TyInfo;
+ unsigned N = I.getNumOperands();
+
+ for (unsigned i = N - 1; i > 2; --i) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) {
+ unsigned FilterLength = CI->getZExtValue();
+ unsigned FirstCatch = i + FilterLength + !FilterLength;
+ assert (FirstCatch <= N && "Invalid filter length");
+
+ if (FirstCatch < N) {
+ TyInfo.reserve(N - FirstCatch);
+ for (unsigned j = FirstCatch; j < N; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+ MMI->addCatchTypeInfo(MBB, TyInfo);
+ TyInfo.clear();
+ }
+
+ if (!FilterLength) {
+ // Cleanup.
+ MMI->addCleanup(MBB);
+ } else {
+ // Filter.
+ TyInfo.reserve(FilterLength - 1);
+ for (unsigned j = i + 1; j < FirstCatch; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+ MMI->addFilterTypeInfo(MBB, TyInfo);
+ TyInfo.clear();
+ }
+
+ N = i;
+ }
+ }
+
+ if (N > 3) {
+ TyInfo.reserve(N - 3);
+ for (unsigned j = 3; j < N; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+ MMI->addCatchTypeInfo(MBB, TyInfo);
+ }
+}
+
+}
+
+/// GetSignificand - Get the significand and build it into a floating-point
+/// number with exponent of 1:
+///
+/// Op = (Op & 0x007fffff) | 0x3f800000;
+///
+/// where Op is the hexidecimal representation of floating point value.
+static SDValue
+GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
+ SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+ DAG.getConstant(0x007fffff, MVT::i32));
+ SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
+ DAG.getConstant(0x3f800000, MVT::i32));
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2);
+}
+
+/// GetExponent - Get the exponent:
+///
+/// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
+///
+/// where Op is the hexidecimal representation of floating point value.
+static SDValue
+GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
+ DebugLoc dl) {
+ SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+ DAG.getConstant(0x7f800000, MVT::i32));
+ SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
+ DAG.getConstant(23, TLI.getPointerTy()));
+ SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
+ DAG.getConstant(127, MVT::i32));
+ return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
+}
+
+/// getF32Constant - Get 32-bit floating point constant.
+static SDValue
+getF32Constant(SelectionDAG &DAG, unsigned Flt) {
+ return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
+}
+
+/// Inlined utility function to implement binary input atomic intrinsics for
+/// visitIntrinsicCall: I is a call instruction
+/// Op is the associated NodeType for I
+const char *
+SelectionDAGLowering::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) {
+ SDValue Root = getRoot();
+ SDValue L =
+ DAG.getAtomic(Op, getCurDebugLoc(),
+ getValue(I.getOperand(2)).getValueType().getSimpleVT(),
+ Root,
+ getValue(I.getOperand(1)),
+ getValue(I.getOperand(2)),
+ I.getOperand(1));
+ setValue(&I, L);
+ DAG.setRoot(L.getValue(1));
+ return 0;
+}
+
+// implVisitAluOverflow - Lower arithmetic overflow instrinsics.
+const char *
+SelectionDAGLowering::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) {
+ SDValue Op1 = getValue(I.getOperand(1));
+ SDValue Op2 = getValue(I.getOperand(2));
+
+ SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
+ SDValue Result = DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2);
+
+ setValue(&I, Result);
+ return 0;
+}
+
+/// visitExp - Lower an exp intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGLowering::visitExp(CallInst &I) {
+ SDValue result;
+ DebugLoc dl = getCurDebugLoc();
+
+ if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getOperand(1));
+
+ // Put the exponent in the right bit position for later addition to the
+ // final result:
+ //
+ // #define LOG2OFe 1.4426950f
+ // IntegerPartOfX = ((int32_t)(X * LOG2OFe));
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+ getF32Constant(DAG, 0x3fb8aa3b));
+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+ // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX;
+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+ // IntegerPartOfX <<= 23;
+ IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, TLI.getPointerTy()));
+
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.997535578f +
+ // (0.735607626f + 0.252464424f * x) * x;
+ //
+ // error 0.0144103317, which is 6 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3e814304));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f3c50c8));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e));
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5);
+
+ // Add the exponent into the result in integer domain.
+ SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ TwoToFracPartOfX, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999892986f +
+ // (0.696457318f +
+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;
+ //
+ // 0.000107046256 error, which is 13 to 14 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3da235e3));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3e65b8f3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f324b07));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd));
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7);
+
+ // Add the exponent into the result in integer domain.
+ SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ TwoToFracPartOfX, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999999982f +
+ // (0.693148872f +
+ // (0.240227044f +
+ // (0.554906021e-1f +
+ // (0.961591928e-2f +
+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+ //
+ // error 2.47208000*10^(-7), which is better than 18 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3924b03e));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3ab24b87));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3c1d8c17));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3d634a1d));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3e75fe14));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x3f317234));
+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+ SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000));
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::i32, t13);
+
+ // Add the exponent into the result in integer domain.
+ SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ TwoToFracPartOfX, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FEXP, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1)));
+ }
+
+ setValue(&I, result);
+}
+
+/// visitLog - Lower a log intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGLowering::visitLog(CallInst &I) {
+ SDValue result;
+ DebugLoc dl = getCurDebugLoc();
+
+ if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getOperand(1));
+ SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+
+ // Scale the exponent by log(2) [0.69314718f].
+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
+ SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+ getF32Constant(DAG, 0x3f317218));
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // LogofMantissa =
+ // -1.1609546f +
+ // (1.4034025f - 0.23903021f * x) * x;
+ //
+ // error 0.0034276066, which is better than 8 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbe74c456));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3fb3a2b1));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f949a29));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, LogOfMantissa);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // LogOfMantissa =
+ // -1.7417939f +
+ // (2.8212026f +
+ // (-1.4699568f +
+ // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
+ //
+ // error 0.000061011436, which is 14 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbd67b6d6));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3ee4f4b8));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fbc278b));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40348e95));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3fdef31a));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, LogOfMantissa);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // LogOfMantissa =
+ // -2.1072184f +
+ // (4.2372794f +
+ // (-3.7029485f +
+ // (2.2781945f +
+ // (-0.87823314f +
+ // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
+ //
+ // error 0.0000023660568, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbc91e5ac));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e4350aa));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f60d3e3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x4011cdf0));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x406cfd1c));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x408797cb));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x4006dcab));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, LogOfMantissa);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FLOG, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1)));
+ }
+
+ setValue(&I, result);
+}
+
+/// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGLowering::visitLog2(CallInst &I) {
+ SDValue result;
+ DebugLoc dl = getCurDebugLoc();
+
+ if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getOperand(1));
+ SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+
+ // Get the exponent.
+ SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ // Different possible minimax approximations of significand in
+ // floating-point for various degrees of accuracy over [1,2].
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
+ //
+ // error 0.0049451742, which is more than 7 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbeb08fe0));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x40019463));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fd6633d));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log2ofMantissa);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // Log2ofMantissa =
+ // -2.51285454f +
+ // (4.07009056f +
+ // (-2.12067489f +
+ // (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
+ //
+ // error 0.0000876136000, which is better than 13 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbda7262e));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3f25280b));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x4007b923));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40823e2f));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x4020d29c));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log2ofMantissa);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // Log2ofMantissa =
+ // -3.0400495f +
+ // (6.1129976f +
+ // (-5.3420409f +
+ // (3.2865683f +
+ // (-1.2669343f +
+ // (0.27515199f -
+ // 0.25691327e-1f * x) * x) * x) * x) * x) * x;
+ //
+ // error 0.0000018516, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbcd2769e));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e8ce0b9));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fa22ae7));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40525723));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x40aaf200));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x40c39dad));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x4042902c));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log2ofMantissa);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FLOG2, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1)));
+ }
+
+ setValue(&I, result);
+}
+
+/// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGLowering::visitLog10(CallInst &I) {
+ SDValue result;
+ DebugLoc dl = getCurDebugLoc();
+
+ if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getOperand(1));
+ SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+
+ // Scale the exponent by log10(2) [0.30102999f].
+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
+ SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+ getF32Constant(DAG, 0x3e9a209a));
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // Log10ofMantissa =
+ // -0.50419619f +
+ // (0.60948995f - 0.10380950f * x) * x;
+ //
+ // error 0.0014886165, which is 6 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbdd49a13));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3f1c0789));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f011300));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log10ofMantissa);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // Log10ofMantissa =
+ // -0.64831180f +
+ // (0.91751397f +
+ // (-0.31664806f + 0.47637168e-1f * x) * x) * x;
+ //
+ // error 0.00019228036, which is better than 12 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3d431f31));
+ SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3ea21fb2));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f6ae232));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f25f7c3));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log10ofMantissa);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // Log10ofMantissa =
+ // -0.84299375f +
+ // (1.5327582f +
+ // (-1.0688956f +
+ // (0.49102474f +
+ // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
+ //
+ // error 0.0000037995730, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3c5d51ce));
+ SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e00685a));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3efb6798));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f88d192));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3fc4316c));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3f57ce70));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log10ofMantissa);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FLOG10, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1)));
+ }
+
+ setValue(&I, result);
+}
+
+/// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGLowering::visitExp2(CallInst &I) {
+ SDValue result;
+ DebugLoc dl = getCurDebugLoc();
+
+ if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getOperand(1));
+
+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
+
+ // FractionalPartOfX = x - (float)IntegerPartOfX;
+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);
+
+ // IntegerPartOfX <<= 23;
+ IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, TLI.getPointerTy()));
+
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.997535578f +
+ // (0.735607626f + 0.252464424f * x) * x;
+ //
+ // error 0.0144103317, which is 6 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3e814304));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f3c50c8));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e));
+ SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999892986f +
+ // (0.696457318f +
+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;
+ //
+ // error 0.000107046256, which is 13 to 14 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3da235e3));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3e65b8f3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f324b07));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd));
+ SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999999982f +
+ // (0.693148872f +
+ // (0.240227044f +
+ // (0.554906021e-1f +
+ // (0.961591928e-2f +
+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+ // error 2.47208000*10^(-7), which is better than 18 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3924b03e));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3ab24b87));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3c1d8c17));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3d634a1d));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3e75fe14));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x3f317234));
+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+ SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000));
+ SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FEXP2, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1)));
+ }
+
+ setValue(&I, result);
+}
+
+/// visitPow - Lower a pow intrinsic. Handles the special sequences for
+/// limited-precision mode with x == 10.0f.
+void
+SelectionDAGLowering::visitPow(CallInst &I) {
+ SDValue result;
+ Value *Val = I.getOperand(1);
+ DebugLoc dl = getCurDebugLoc();
+ bool IsExp10 = false;
+
+ if (getValue(Val).getValueType() == MVT::f32 &&
+ getValue(I.getOperand(2)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) {
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+ APFloat Ten(10.0f);
+ IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten);
+ }
+ }
+ }
+
+ if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getOperand(2));
+
+ // Put the exponent in the right bit position for later addition to the
+ // final result:
+ //
+ // #define LOG2OF10 3.3219281f
+ // IntegerPartOfX = (int32_t)(x * LOG2OF10);
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+ getF32Constant(DAG, 0x40549a78));
+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+ // FractionalPartOfX = x - (float)IntegerPartOfX;
+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+ // IntegerPartOfX <<= 23;
+ IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, TLI.getPointerTy()));
+
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // twoToFractionalPartOfX =
+ // 0.997535578f +
+ // (0.735607626f + 0.252464424f * x) * x;
+ //
+ // error 0.0144103317, which is 6 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3e814304));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f3c50c8));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e));
+ SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999892986f +
+ // (0.696457318f +
+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;
+ //
+ // error 0.000107046256, which is 13 to 14 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3da235e3));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3e65b8f3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f324b07));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd));
+ SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999999982f +
+ // (0.693148872f +
+ // (0.240227044f +
+ // (0.554906021e-1f +
+ // (0.961591928e-2f +
+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+ // error 2.47208000*10^(-7), which is better than 18 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3924b03e));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3ab24b87));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3c1d8c17));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3d634a1d));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3e75fe14));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x3f317234));
+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+ SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000));
+ SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FPOW, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1)),
+ getValue(I.getOperand(2)));
+ }
+
+ setValue(&I, result);
+}
+
+/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If
+/// we want to emit this as a call to a named external function, return the name
+/// otherwise lower it and return null.
+const char *
+SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
+ DebugLoc dl = getCurDebugLoc();
+ switch (Intrinsic) {
+ default:
+ // By default, turn this into a target intrinsic node.
+ visitTargetIntrinsic(I, Intrinsic);
+ return 0;
+ case Intrinsic::vastart: visitVAStart(I); return 0;
+ case Intrinsic::vaend: visitVAEnd(I); return 0;
+ case Intrinsic::vacopy: visitVACopy(I); return 0;
+ case Intrinsic::returnaddress:
+ setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::frameaddress:
+ setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::setjmp:
+ return "_setjmp"+!TLI.usesUnderscoreSetJmp();
+ break;
+ case Intrinsic::longjmp:
+ return "_longjmp"+!TLI.usesUnderscoreLongJmp();
+ break;
+ case Intrinsic::memcpy: {
+ SDValue Op1 = getValue(I.getOperand(1));
+ SDValue Op2 = getValue(I.getOperand(2));
+ SDValue Op3 = getValue(I.getOperand(3));
+ unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
+ DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
+ I.getOperand(1), 0, I.getOperand(2), 0));
+ return 0;
+ }
+ case Intrinsic::memset: {
+ SDValue Op1 = getValue(I.getOperand(1));
+ SDValue Op2 = getValue(I.getOperand(2));
+ SDValue Op3 = getValue(I.getOperand(3));
+ unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
+ DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align,
+ I.getOperand(1), 0));
+ return 0;
+ }
+ case Intrinsic::memmove: {
+ SDValue Op1 = getValue(I.getOperand(1));
+ SDValue Op2 = getValue(I.getOperand(2));
+ SDValue Op3 = getValue(I.getOperand(3));
+ unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
+
+ // If the source and destination are known to not be aliases, we can
+ // lower memmove as memcpy.
+ uint64_t Size = -1ULL;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
+ Size = C->getZExtValue();
+ if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) ==
+ AliasAnalysis::NoAlias) {
+ DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
+ I.getOperand(1), 0, I.getOperand(2), 0));
+ return 0;
+ }
+
+ DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align,
+ I.getOperand(1), 0, I.getOperand(2), 0));
+ return 0;
+ }
+ case Intrinsic::dbg_stoppoint: {
+ DbgStopPointInst &SPI = cast<DbgStopPointInst>(I);
+ if (DIDescriptor::ValidDebugInfo(SPI.getContext(), OptLevel)) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ DICompileUnit CU(cast<GlobalVariable>(SPI.getContext()));
+ DebugLoc Loc = DebugLoc::get(MF.getOrCreateDebugLocID(CU.getGV(),
+ SPI.getLine(), SPI.getColumn()));
+ setCurDebugLoc(Loc);
+
+ if (OptLevel == CodeGenOpt::None)
+ DAG.setRoot(DAG.getDbgStopPoint(Loc, getRoot(),
+ SPI.getLine(),
+ SPI.getColumn(),
+ SPI.getContext()));
+ }
+ return 0;
+ }
+ case Intrinsic::dbg_region_start: {
+ DwarfWriter *DW = DAG.getDwarfWriter();
+ DbgRegionStartInst &RSI = cast<DbgRegionStartInst>(I);
+
+ if (DIDescriptor::ValidDebugInfo(RSI.getContext(), OptLevel) &&
+ DW && DW->ShouldEmitDwarfDebug()) {
+ unsigned LabelID =
+ DW->RecordRegionStart(cast<GlobalVariable>(RSI.getContext()));
+ DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
+ getRoot(), LabelID));
+ }
+
+ return 0;
+ }
+ case Intrinsic::dbg_region_end: {
+ DwarfWriter *DW = DAG.getDwarfWriter();
+ DbgRegionEndInst &REI = cast<DbgRegionEndInst>(I);
+
+ if (DIDescriptor::ValidDebugInfo(REI.getContext(), OptLevel) &&
+ DW && DW->ShouldEmitDwarfDebug()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ DISubprogram Subprogram(cast<GlobalVariable>(REI.getContext()));
+
+ if (Subprogram.isNull() || Subprogram.describes(MF.getFunction())) {
+ unsigned LabelID =
+ DW->RecordRegionEnd(cast<GlobalVariable>(REI.getContext()));
+ DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
+ getRoot(), LabelID));
+ } else {
+ // This is end of inlined function. Debugging information for inlined
+ // function is not handled yet (only supported by FastISel).
+ if (OptLevel == CodeGenOpt::None) {
+ unsigned ID = DW->RecordInlinedFnEnd(Subprogram);
+ if (ID != 0)
+ // Returned ID is 0 if this is unbalanced "end of inlined
+ // scope". This could happen if optimizer eats dbg intrinsics or
+ // "beginning of inlined scope" is not recoginized due to missing
+ // location info. In such cases, do ignore this region.end.
+ DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
+ getRoot(), ID));
+ }
+ }
+ }
+
+ return 0;
+ }
+ case Intrinsic::dbg_func_start: {
+ DwarfWriter *DW = DAG.getDwarfWriter();
+ DbgFuncStartInst &FSI = cast<DbgFuncStartInst>(I);
+ Value *SP = FSI.getSubprogram();
+ if (!DIDescriptor::ValidDebugInfo(SP, OptLevel))
+ return 0;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ if (OptLevel == CodeGenOpt::None) {
+ // llvm.dbg.func.start implicitly defines a dbg_stoppoint which is what
+ // (most?) gdb expects.
+ DebugLoc PrevLoc = CurDebugLoc;
+ DISubprogram Subprogram(cast<GlobalVariable>(SP));
+ DICompileUnit CompileUnit = Subprogram.getCompileUnit();
+
+ if (!Subprogram.describes(MF.getFunction())) {
+ // This is a beginning of an inlined function.
+
+ // If llvm.dbg.func.start is seen in a new block before any
+ // llvm.dbg.stoppoint intrinsic then the location info is unknown.
+ // FIXME : Why DebugLoc is reset at the beginning of each block ?
+ if (PrevLoc.isUnknown())
+ return 0;
+
+ // Record the source line.
+ unsigned Line = Subprogram.getLineNumber();
+ setCurDebugLoc(DebugLoc::get(
+ MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0)));
+
+ if (DW && DW->ShouldEmitDwarfDebug()) {
+ DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc);
+ unsigned LabelID = DW->RecordInlinedFnStart(Subprogram,
+ DICompileUnit(PrevLocTpl.CompileUnit),
+ PrevLocTpl.Line,
+ PrevLocTpl.Col);
+ DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
+ getRoot(), LabelID));
+ }
+ } else {
+ // Record the source line.
+ unsigned Line = Subprogram.getLineNumber();
+ MF.setDefaultDebugLoc(DebugLoc::get(
+ MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0)));
+ if (DW && DW->ShouldEmitDwarfDebug()) {
+ // llvm.dbg.func_start also defines beginning of function scope.
+ DW->RecordRegionStart(cast<GlobalVariable>(FSI.getSubprogram()));
+ }
+ }
+ } else {
+ DISubprogram Subprogram(cast<GlobalVariable>(SP));
+
+ std::string SPName;
+ Subprogram.getLinkageName(SPName);
+ if (!SPName.empty()
+ && strcmp(SPName.c_str(), MF.getFunction()->getNameStart())) {
+ // This is beginning of inlined function. Debugging information for
+ // inlined function is not handled yet (only supported by FastISel).
+ return 0;
+ }
+
+ // llvm.dbg.func.start implicitly defines a dbg_stoppoint which is
+ // what (most?) gdb expects.
+ DICompileUnit CompileUnit = Subprogram.getCompileUnit();
+
+ // Record the source line but does not create a label for the normal
+ // function start. It will be emitted at asm emission time. However,
+ // create a label if this is a beginning of inlined function.
+ unsigned Line = Subprogram.getLineNumber();
+ setCurDebugLoc(DebugLoc::get(
+ MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0)));
+ // FIXME - Start new region because llvm.dbg.func_start also defines
+ // beginning of function scope.
+ }
+
+ return 0;
+ }
+ case Intrinsic::dbg_declare: {
+ if (OptLevel == CodeGenOpt::None) {
+ DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
+ Value *Variable = DI.getVariable();
+ if (DIDescriptor::ValidDebugInfo(Variable, OptLevel))
+ DAG.setRoot(DAG.getNode(ISD::DECLARE, dl, MVT::Other, getRoot(),
+ getValue(DI.getAddress()), getValue(Variable)));
+ } else {
+ // FIXME: Do something sensible here when we support debug declare.
+ }
+ return 0;
+ }
+ case Intrinsic::eh_exception: {
+ // Insert the EXCEPTIONADDR instruction.
+ assert(CurMBB->isLandingPad() &&"Call to eh.exception not in landing pad!");
+ SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+ SDValue Ops[1];
+ Ops[0] = DAG.getRoot();
+ SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);
+ setValue(&I, Op);
+ DAG.setRoot(Op.getValue(1));
+ return 0;
+ }
+
+ case Intrinsic::eh_selector_i32:
+ case Intrinsic::eh_selector_i64: {
+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+ MVT VT = (Intrinsic == Intrinsic::eh_selector_i32 ?
+ MVT::i32 : MVT::i64);
+
+ if (MMI) {
+ if (CurMBB->isLandingPad())
+ AddCatchInfo(I, MMI, CurMBB);
+ else {
+#ifndef NDEBUG
+ FuncInfo.CatchInfoLost.insert(&I);
+#endif
+ // FIXME: Mark exception selector register as live in. Hack for PR1508.
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ if (Reg) CurMBB->addLiveIn(Reg);
+ }
+
+ // Insert the EHSELECTION instruction.
+ SDVTList VTs = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[2];
+ Ops[0] = getValue(I.getOperand(1));
+ Ops[1] = getRoot();
+ SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
+ setValue(&I, Op);
+ DAG.setRoot(Op.getValue(1));
+ } else {
+ setValue(&I, DAG.getConstant(0, VT));
+ }
+
+ return 0;
+ }
+
+ case Intrinsic::eh_typeid_for_i32:
+ case Intrinsic::eh_typeid_for_i64: {
+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+ MVT VT = (Intrinsic == Intrinsic::eh_typeid_for_i32 ?
+ MVT::i32 : MVT::i64);
+
+ if (MMI) {
+ // Find the type id for the given typeinfo.
+ GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1));
+
+ unsigned TypeID = MMI->getTypeIDFor(GV);
+ setValue(&I, DAG.getConstant(TypeID, VT));
+ } else {
+ // Return something different to eh_selector.
+ setValue(&I, DAG.getConstant(1, VT));
+ }
+
+ return 0;
+ }
+
+ case Intrinsic::eh_return_i32:
+ case Intrinsic::eh_return_i64:
+ if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {
+ MMI->setCallsEHReturn(true);
+ DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl,
+ MVT::Other,
+ getControlRoot(),
+ getValue(I.getOperand(1)),
+ getValue(I.getOperand(2))));
+ } else {
+ setValue(&I, DAG.getConstant(0, TLI.getPointerTy()));
+ }
+
+ return 0;
+ case Intrinsic::eh_unwind_init:
+ if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {
+ MMI->setCallsUnwindInit(true);
+ }
+
+ return 0;
+
+ case Intrinsic::eh_dwarf_cfa: {
+ MVT VT = getValue(I.getOperand(1)).getValueType();
+ SDValue CfaArg;
+ if (VT.bitsGT(TLI.getPointerTy()))
+ CfaArg = DAG.getNode(ISD::TRUNCATE, dl,
+ TLI.getPointerTy(), getValue(I.getOperand(1)));
+ else
+ CfaArg = DAG.getNode(ISD::SIGN_EXTEND, dl,
+ TLI.getPointerTy(), getValue(I.getOperand(1)));
+
+ SDValue Offset = DAG.getNode(ISD::ADD, dl,
+ TLI.getPointerTy(),
+ DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl,
+ TLI.getPointerTy()),
+ CfaArg);
+ setValue(&I, DAG.getNode(ISD::ADD, dl,
+ TLI.getPointerTy(),
+ DAG.getNode(ISD::FRAMEADDR, dl,
+ TLI.getPointerTy(),
+ DAG.getConstant(0,
+ TLI.getPointerTy())),
+ Offset));
+ return 0;
+ }
+
+ case Intrinsic::convertff:
+ case Intrinsic::convertfsi:
+ case Intrinsic::convertfui:
+ case Intrinsic::convertsif:
+ case Intrinsic::convertuif:
+ case Intrinsic::convertss:
+ case Intrinsic::convertsu:
+ case Intrinsic::convertus:
+ case Intrinsic::convertuu: {
+ ISD::CvtCode Code = ISD::CVT_INVALID;
+ switch (Intrinsic) {
+ case Intrinsic::convertff: Code = ISD::CVT_FF; break;
+ case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
+ case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
+ case Intrinsic::convertsif: Code = ISD::CVT_SF; break;
+ case Intrinsic::convertuif: Code = ISD::CVT_UF; break;
+ case Intrinsic::convertss: Code = ISD::CVT_SS; break;
+ case Intrinsic::convertsu: Code = ISD::CVT_SU; break;
+ case Intrinsic::convertus: Code = ISD::CVT_US; break;
+ case Intrinsic::convertuu: Code = ISD::CVT_UU; break;
+ }
+ MVT DestVT = TLI.getValueType(I.getType());
+ Value* Op1 = I.getOperand(1);
+ setValue(&I, DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
+ DAG.getValueType(DestVT),
+ DAG.getValueType(getValue(Op1).getValueType()),
+ getValue(I.getOperand(2)),
+ getValue(I.getOperand(3)),
+ Code));
+ return 0;
+ }
+
+ case Intrinsic::sqrt:
+ setValue(&I, DAG.getNode(ISD::FSQRT, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::powi:
+ setValue(&I, DAG.getNode(ISD::FPOWI, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1)),
+ getValue(I.getOperand(2))));
+ return 0;
+ case Intrinsic::sin:
+ setValue(&I, DAG.getNode(ISD::FSIN, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::cos:
+ setValue(&I, DAG.getNode(ISD::FCOS, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::log:
+ visitLog(I);
+ return 0;
+ case Intrinsic::log2:
+ visitLog2(I);
+ return 0;
+ case Intrinsic::log10:
+ visitLog10(I);
+ return 0;
+ case Intrinsic::exp:
+ visitExp(I);
+ return 0;
+ case Intrinsic::exp2:
+ visitExp2(I);
+ return 0;
+ case Intrinsic::pow:
+ visitPow(I);
+ return 0;
+ case Intrinsic::pcmarker: {
+ SDValue Tmp = getValue(I.getOperand(1));
+ DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp));
+ return 0;
+ }
+ case Intrinsic::readcyclecounter: {
+ SDValue Op = getRoot();
+ SDValue Tmp = DAG.getNode(ISD::READCYCLECOUNTER, dl,
+ DAG.getVTList(MVT::i64, MVT::Other),
+ &Op, 1);
+ setValue(&I, Tmp);
+ DAG.setRoot(Tmp.getValue(1));
+ return 0;
+ }
+ case Intrinsic::part_select: {
+ // Currently not implemented: just abort
+ assert(0 && "part_select intrinsic not implemented");
+ abort();
+ }
+ case Intrinsic::part_set: {
+ // Currently not implemented: just abort
+ assert(0 && "part_set intrinsic not implemented");
+ abort();
+ }
+ case Intrinsic::bswap:
+ setValue(&I, DAG.getNode(ISD::BSWAP, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::cttz: {
+ SDValue Arg = getValue(I.getOperand(1));
+ MVT Ty = Arg.getValueType();
+ SDValue result = DAG.getNode(ISD::CTTZ, dl, Ty, Arg);
+ setValue(&I, result);
+ return 0;
+ }
+ case Intrinsic::ctlz: {
+ SDValue Arg = getValue(I.getOperand(1));
+ MVT Ty = Arg.getValueType();
+ SDValue result = DAG.getNode(ISD::CTLZ, dl, Ty, Arg);
+ setValue(&I, result);
+ return 0;
+ }
+ case Intrinsic::ctpop: {
+ SDValue Arg = getValue(I.getOperand(1));
+ MVT Ty = Arg.getValueType();
+ SDValue result = DAG.getNode(ISD::CTPOP, dl, Ty, Arg);
+ setValue(&I, result);
+ return 0;
+ }
+ case Intrinsic::stacksave: {
+ SDValue Op = getRoot();
+ SDValue Tmp = DAG.getNode(ISD::STACKSAVE, dl,
+ DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1);
+ setValue(&I, Tmp);
+ DAG.setRoot(Tmp.getValue(1));
+ return 0;
+ }
+ case Intrinsic::stackrestore: {
+ SDValue Tmp = getValue(I.getOperand(1));
+ DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Tmp));
+ return 0;
+ }
+ case Intrinsic::stackprotector: {
+ // Emit code into the DAG to store the stack guard onto the stack.
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MVT PtrTy = TLI.getPointerTy();
+
+ SDValue Src = getValue(I.getOperand(1)); // The guard's value.
+ AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2));
+
+ int FI = FuncInfo.StaticAllocaMap[Slot];
+ MFI->setStackProtectorIndex(FI);
+
+ SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
+
+ // Store the stack protector onto the stack.
+ SDValue Result = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
+ PseudoSourceValue::getFixedStack(FI),
+ 0, true);
+ setValue(&I, Result);
+ DAG.setRoot(Result);
+ return 0;
+ }
+ case Intrinsic::var_annotation:
+ // Discard annotate attributes
+ return 0;
+
+ case Intrinsic::init_trampoline: {
+ const Function *F = cast<Function>(I.getOperand(2)->stripPointerCasts());
+
+ SDValue Ops[6];
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getOperand(1));
+ Ops[2] = getValue(I.getOperand(2));
+ Ops[3] = getValue(I.getOperand(3));
+ Ops[4] = DAG.getSrcValue(I.getOperand(1));
+ Ops[5] = DAG.getSrcValue(F);
+
+ SDValue Tmp = DAG.getNode(ISD::TRAMPOLINE, dl,
+ DAG.getVTList(TLI.getPointerTy(), MVT::Other),
+ Ops, 6);
+
+ setValue(&I, Tmp);
+ DAG.setRoot(Tmp.getValue(1));
+ return 0;
+ }
+
+ case Intrinsic::gcroot:
+ if (GFI) {
+ Value *Alloca = I.getOperand(1);
+ Constant *TypeMap = cast<Constant>(I.getOperand(2));
+
+ FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
+ GFI->addStackRoot(FI->getIndex(), TypeMap);
+ }
+ return 0;
+
+ case Intrinsic::gcread:
+ case Intrinsic::gcwrite:
+ assert(0 && "GC failed to lower gcread/gcwrite intrinsics!");
+ return 0;
+
+ case Intrinsic::flt_rounds: {
+ setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
+ return 0;
+ }
+
+ case Intrinsic::trap: {
+ DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+ return 0;
+ }
+
+ case Intrinsic::uadd_with_overflow:
+ return implVisitAluOverflow(I, ISD::UADDO);
+ case Intrinsic::sadd_with_overflow:
+ return implVisitAluOverflow(I, ISD::SADDO);
+ case Intrinsic::usub_with_overflow:
+ return implVisitAluOverflow(I, ISD::USUBO);
+ case Intrinsic::ssub_with_overflow:
+ return implVisitAluOverflow(I, ISD::SSUBO);
+ case Intrinsic::umul_with_overflow:
+ return implVisitAluOverflow(I, ISD::UMULO);
+ case Intrinsic::smul_with_overflow:
+ return implVisitAluOverflow(I, ISD::SMULO);
+
+ case Intrinsic::prefetch: {
+ SDValue Ops[4];
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getOperand(1));
+ Ops[2] = getValue(I.getOperand(2));
+ Ops[3] = getValue(I.getOperand(3));
+ DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4));
+ return 0;
+ }
+
+ case Intrinsic::memory_barrier: {
+ SDValue Ops[6];
+ Ops[0] = getRoot();
+ for (int x = 1; x < 6; ++x)
+ Ops[x] = getValue(I.getOperand(x));
+
+ DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6));
+ return 0;
+ }
+ case Intrinsic::atomic_cmp_swap: {
+ SDValue Root = getRoot();
+ SDValue L =
+ DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(),
+ getValue(I.getOperand(2)).getValueType().getSimpleVT(),
+ Root,
+ getValue(I.getOperand(1)),
+ getValue(I.getOperand(2)),
+ getValue(I.getOperand(3)),
+ I.getOperand(1));
+ setValue(&I, L);
+ DAG.setRoot(L.getValue(1));
+ return 0;
+ }
+ case Intrinsic::atomic_load_add:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD);
+ case Intrinsic::atomic_load_sub:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB);
+ case Intrinsic::atomic_load_or:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR);
+ case Intrinsic::atomic_load_xor:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR);
+ case Intrinsic::atomic_load_and:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND);
+ case Intrinsic::atomic_load_nand:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND);
+ case Intrinsic::atomic_load_max:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX);
+ case Intrinsic::atomic_load_min:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN);
+ case Intrinsic::atomic_load_umin:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN);
+ case Intrinsic::atomic_load_umax:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX);
+ case Intrinsic::atomic_swap:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP);
+ }
+}
+
+
+void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,
+ bool IsTailCall,
+ MachineBasicBlock *LandingPad) {
+ const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+ unsigned BeginLabel = 0, EndLabel = 0;
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Args.reserve(CS.arg_size());
+ for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ SDValue ArgNode = getValue(*i);
+ Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
+
+ unsigned attrInd = i - CS.arg_begin() + 1;
+ Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt);
+ Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt);
+ Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
+ Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet);
+ Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest);
+ Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
+ Entry.Alignment = CS.getParamAlignment(attrInd);
+ Args.push_back(Entry);
+ }
+
+ if (LandingPad && MMI) {
+ // Insert a label before the invoke call to mark the try range. This can be
+ // used to detect deletion of the invoke via the MachineModuleInfo.
+ BeginLabel = MMI->NextLabelID();
+ // Both PendingLoads and PendingExports must be flushed here;
+ // this call might not return.
+ (void)getRoot();
+ DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(),
+ getControlRoot(), BeginLabel));
+ }
+
+ std::pair<SDValue,SDValue> Result =
+ TLI.LowerCallTo(getRoot(), CS.getType(),
+ CS.paramHasAttr(0, Attribute::SExt),
+ CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),
+ CS.paramHasAttr(0, Attribute::InReg),
+ CS.getCallingConv(),
+ IsTailCall && PerformTailCallOpt,
+ Callee, Args, DAG, getCurDebugLoc());
+ if (CS.getType() != Type::VoidTy)
+ setValue(CS.getInstruction(), Result.first);
+ DAG.setRoot(Result.second);
+
+ if (LandingPad && MMI) {
+ // Insert a label at the end of the invoke call to mark the try range. This
+ // can be used to detect deletion of the invoke via the MachineModuleInfo.
+ EndLabel = MMI->NextLabelID();
+ DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(),
+ getRoot(), EndLabel));
+
+ // Inform MachineModuleInfo of range.
+ MMI->addInvoke(LandingPad, BeginLabel, EndLabel);
+ }
+}
+
+
+void SelectionDAGLowering::visitCall(CallInst &I) {
+ const char *RenameFn = 0;
+ if (Function *F = I.getCalledFunction()) {
+ if (F->isDeclaration()) {
+ const TargetIntrinsicInfo *II = TLI.getTargetMachine().getIntrinsicInfo();
+ if (II) {
+ if (unsigned IID = II->getIntrinsicID(F)) {
+ RenameFn = visitIntrinsicCall(I, IID);
+ if (!RenameFn)
+ return;
+ }
+ }
+ if (unsigned IID = F->getIntrinsicID()) {
+ RenameFn = visitIntrinsicCall(I, IID);
+ if (!RenameFn)
+ return;
+ }
+ }
+
+ // Check for well-known libc/libm calls. If the function is internal, it
+ // can't be a library call.
+ unsigned NameLen = F->getNameLen();
+ if (!F->hasLocalLinkage() && NameLen) {
+ const char *NameStr = F->getNameStart();
+ if (NameStr[0] == 'c' &&
+ ((NameLen == 8 && !strcmp(NameStr, "copysign")) ||
+ (NameLen == 9 && !strcmp(NameStr, "copysignf")))) {
+ if (I.getNumOperands() == 3 && // Basic sanity checks.
+ I.getOperand(1)->getType()->isFloatingPoint() &&
+ I.getType() == I.getOperand(1)->getType() &&
+ I.getType() == I.getOperand(2)->getType()) {
+ SDValue LHS = getValue(I.getOperand(1));
+ SDValue RHS = getValue(I.getOperand(2));
+ setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
+ LHS.getValueType(), LHS, RHS));
+ return;
+ }
+ } else if (NameStr[0] == 'f' &&
+ ((NameLen == 4 && !strcmp(NameStr, "fabs")) ||
+ (NameLen == 5 && !strcmp(NameStr, "fabsf")) ||
+ (NameLen == 5 && !strcmp(NameStr, "fabsl")))) {
+ if (I.getNumOperands() == 2 && // Basic sanity checks.
+ I.getOperand(1)->getType()->isFloatingPoint() &&
+ I.getType() == I.getOperand(1)->getType()) {
+ SDValue Tmp = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if (NameStr[0] == 's' &&
+ ((NameLen == 3 && !strcmp(NameStr, "sin")) ||
+ (NameLen == 4 && !strcmp(NameStr, "sinf")) ||
+ (NameLen == 4 && !strcmp(NameStr, "sinl")))) {
+ if (I.getNumOperands() == 2 && // Basic sanity checks.
+ I.getOperand(1)->getType()->isFloatingPoint() &&
+ I.getType() == I.getOperand(1)->getType()) {
+ SDValue Tmp = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if (NameStr[0] == 'c' &&
+ ((NameLen == 3 && !strcmp(NameStr, "cos")) ||
+ (NameLen == 4 && !strcmp(NameStr, "cosf")) ||
+ (NameLen == 4 && !strcmp(NameStr, "cosl")))) {
+ if (I.getNumOperands() == 2 && // Basic sanity checks.
+ I.getOperand(1)->getType()->isFloatingPoint() &&
+ I.getType() == I.getOperand(1)->getType()) {
+ SDValue Tmp = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ }
+ }
+ } else if (isa<InlineAsm>(I.getOperand(0))) {
+ visitInlineAsm(&I);
+ return;
+ }
+
+ SDValue Callee;
+ if (!RenameFn)
+ Callee = getValue(I.getOperand(0));
+ else
+ Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
+
+ LowerCallTo(&I, Callee, I.isTailCall());
+}
+
+
+/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+/// this value and returns the result as a ValueVT value. This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
+ SDValue &Chain,
+ SDValue *Flag) const {
+ // Assemble the legal parts into the final values.
+ SmallVector<SDValue, 4> Values(ValueVTs.size());
+ SmallVector<SDValue, 8> Parts;
+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ // Copy the legal parts from the registers.
+ MVT ValueVT = ValueVTs[Value];
+ unsigned NumRegs = TLI->getNumRegisters(ValueVT);
+ MVT RegisterVT = RegVTs[Value];
+
+ Parts.resize(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ SDValue P;
+ if (Flag == 0)
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
+ else {
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
+ *Flag = P.getValue(2);
+ }
+ Chain = P.getValue(1);
+
+ // If the source register was virtual and if we know something about it,
+ // add an assert node.
+ if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) &&
+ RegisterVT.isInteger() && !RegisterVT.isVector()) {
+ unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister;
+ FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
+ if (FLI.LiveOutRegInfo.size() > SlotNo) {
+ FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[SlotNo];
+
+ unsigned RegSize = RegisterVT.getSizeInBits();
+ unsigned NumSignBits = LOI.NumSignBits;
+ unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
+
+ // FIXME: We capture more information than the dag can represent. For
+ // now, just use the tightest assertzext/assertsext possible.
+ bool isSExt = true;
+ MVT FromVT(MVT::Other);
+ if (NumSignBits == RegSize)
+ isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1
+ else if (NumZeroBits >= RegSize-1)
+ isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1
+ else if (NumSignBits > RegSize-8)
+ isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8
+ else if (NumZeroBits >= RegSize-8)
+ isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8
+ else if (NumSignBits > RegSize-16)
+ isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16
+ else if (NumZeroBits >= RegSize-16)
+ isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
+ else if (NumSignBits > RegSize-32)
+ isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32
+ else if (NumZeroBits >= RegSize-32)
+ isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
+
+ if (FromVT != MVT::Other) {
+ P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
+ RegisterVT, P, DAG.getValueType(FromVT));
+
+ }
+ }
+ }
+
+ Parts[i] = P;
+ }
+
+ Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
+ NumRegs, RegisterVT, ValueVT);
+ Part += NumRegs;
+ Parts.clear();
+ }
+
+ return DAG.getNode(ISD::MERGE_VALUES, dl,
+ DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
+ &Values[0], ValueVTs.size());
+}
+
+/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+/// specified value into the registers specified by this object. This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+ SDValue &Chain, SDValue *Flag) const {
+ // Get the list of the values's legal parts.
+ unsigned NumRegs = Regs.size();
+ SmallVector<SDValue, 8> Parts(NumRegs);
+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ MVT ValueVT = ValueVTs[Value];
+ unsigned NumParts = TLI->getNumRegisters(ValueVT);
+ MVT RegisterVT = RegVTs[Value];
+
+ getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
+ &Parts[Part], NumParts, RegisterVT);
+ Part += NumParts;
+ }
+
+ // Copy the parts into the registers.
+ SmallVector<SDValue, 8> Chains(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ SDValue Part;
+ if (Flag == 0)
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
+ else {
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
+ *Flag = Part.getValue(1);
+ }
+ Chains[i] = Part.getValue(0);
+ }
+
+ if (NumRegs == 1 || Flag)
+ // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
+ // flagged to it. That is the CopyToReg nodes and the user are considered
+ // a single scheduling unit. If we create a TokenFactor and return it as
+ // chain, then the TokenFactor is both a predecessor (operand) of the
+ // user as well as a successor (the TF operands are flagged to the user).
+ // c1, f1 = CopyToReg
+ // c2, f2 = CopyToReg
+ // c3 = TokenFactor c1, c2
+ // ...
+ // = op c3, ..., f2
+ Chain = Chains[NumRegs-1];
+ else
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);
+}
+
+/// AddInlineAsmOperands - Add this value to the specified inlineasm node
+/// operand list. This adds the code marker and includes the number of
+/// values added into it.
+void RegsForValue::AddInlineAsmOperands(unsigned Code,
+ bool HasMatching,unsigned MatchingIdx,
+ SelectionDAG &DAG,
+ std::vector<SDValue> &Ops) const {
+ MVT IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy();
+ assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!");
+ unsigned Flag = Code | (Regs.size() << 3);
+ if (HasMatching)
+ Flag |= 0x80000000 | (MatchingIdx << 16);
+ Ops.push_back(DAG.getTargetConstant(Flag, IntPtrTy));
+ for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ unsigned NumRegs = TLI->getNumRegisters(ValueVTs[Value]);
+ MVT RegisterVT = RegVTs[Value];
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ assert(Reg < Regs.size() && "Mismatch in # registers expected");
+ Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
+ }
+ }
+}
+
+/// isAllocatableRegister - If the specified register is safe to allocate,
+/// i.e. it isn't a stack pointer or some other special register, return the
+/// register class for the register. Otherwise, return null.
+static const TargetRegisterClass *
+isAllocatableRegister(unsigned Reg, MachineFunction &MF,
+ const TargetLowering &TLI,
+ const TargetRegisterInfo *TRI) {
+ MVT FoundVT = MVT::Other;
+ const TargetRegisterClass *FoundRC = 0;
+ for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),
+ E = TRI->regclass_end(); RCI != E; ++RCI) {
+ MVT ThisVT = MVT::Other;
+
+ const TargetRegisterClass *RC = *RCI;
+ // If none of the the value types for this register class are valid, we
+ // can't use it. For example, 64-bit reg classes on 32-bit targets.
+ for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+ I != E; ++I) {
+ if (TLI.isTypeLegal(*I)) {
+ // If we have already found this register in a different register class,
+ // choose the one with the largest VT specified. For example, on
+ // PowerPC, we favor f64 register classes over f32.
+ if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) {
+ ThisVT = *I;
+ break;
+ }
+ }
+ }
+
+ if (ThisVT == MVT::Other) continue;
+
+ // NOTE: This isn't ideal. In particular, this might allocate the
+ // frame pointer in functions that need it (due to them not being taken
+ // out of allocation, because a variable sized allocation hasn't been seen
+ // yet). This is a slight code pessimization, but should still work.
+ for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
+ E = RC->allocation_order_end(MF); I != E; ++I)
+ if (*I == Reg) {
+ // We found a matching register class. Keep looking at others in case
+ // we find one with larger registers that this physreg is also in.
+ FoundRC = RC;
+ FoundVT = ThisVT;
+ break;
+ }
+ }
+ return FoundRC;
+}
+
+
+namespace llvm {
+/// AsmOperandInfo - This contains information for each constraint that we are
+/// lowering.
+class VISIBILITY_HIDDEN SDISelAsmOperandInfo :
+ public TargetLowering::AsmOperandInfo {
+public:
+ /// CallOperand - If this is the result output operand or a clobber
+ /// this is null, otherwise it is the incoming operand to the CallInst.
+ /// This gets modified as the asm is processed.
+ SDValue CallOperand;
+
+ /// AssignedRegs - If this is a register or register class operand, this
+ /// contains the set of register corresponding to the operand.
+ RegsForValue AssignedRegs;
+
+ explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info)
+ : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
+ }
+
+ /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
+ /// busy in OutputRegs/InputRegs.
+ void MarkAllocatedRegs(bool isOutReg, bool isInReg,
+ std::set<unsigned> &OutputRegs,
+ std::set<unsigned> &InputRegs,
+ const TargetRegisterInfo &TRI) const {
+ if (isOutReg) {
+ for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
+ MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI);
+ }
+ if (isInReg) {
+ for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
+ MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI);
+ }
+ }
+
+ /// getCallOperandValMVT - Return the MVT of the Value* that this operand
+ /// corresponds to. If there is no Value* for this operand, it returns
+ /// MVT::Other.
+ MVT getCallOperandValMVT(const TargetLowering &TLI,
+ const TargetData *TD) const {
+ if (CallOperandVal == 0) return MVT::Other;
+
+ if (isa<BasicBlock>(CallOperandVal))
+ return TLI.getPointerTy();
+
+ const llvm::Type *OpTy = CallOperandVal->getType();
+
+ // If this is an indirect operand, the operand is a pointer to the
+ // accessed type.
+ if (isIndirect)
+ OpTy = cast<PointerType>(OpTy)->getElementType();
+
+ // If OpTy is not a single value, it may be a struct/union that we
+ // can tile with integers.
+ if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+ unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+ switch (BitSize) {
+ default: break;
+ case 1:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ OpTy = IntegerType::get(BitSize);
+ break;
+ }
+ }
+
+ return TLI.getValueType(OpTy, true);
+ }
+
+private:
+ /// MarkRegAndAliases - Mark the specified register and all aliases in the
+ /// specified set.
+ static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs,
+ const TargetRegisterInfo &TRI) {
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg");
+ Regs.insert(Reg);
+ if (const unsigned *Aliases = TRI.getAliasSet(Reg))
+ for (; *Aliases; ++Aliases)
+ Regs.insert(*Aliases);
+ }
+};
+} // end llvm namespace.
+
+
+/// GetRegistersForValue - Assign registers (virtual or physical) for the
+/// specified operand. We prefer to assign virtual registers, to allow the
+/// register allocator handle the assignment process. However, if the asm uses
+/// features that we can't model on machineinstrs, we have SDISel do the
+/// allocation. This produces generally horrible, but correct, code.
+///
+/// OpInfo describes the operand.
+/// Input and OutputRegs are the set of already allocated physical registers.
+///
+void SelectionDAGLowering::
+GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
+ std::set<unsigned> &OutputRegs,
+ std::set<unsigned> &InputRegs) {
+ // Compute whether this value requires an input register, an output register,
+ // or both.
+ bool isOutReg = false;
+ bool isInReg = false;
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ isOutReg = true;
+
+ // If there is an input constraint that matches this, we need to reserve
+ // the input register so no other inputs allocate to it.
+ isInReg = OpInfo.hasMatchingInput();
+ break;
+ case InlineAsm::isInput:
+ isInReg = true;
+ isOutReg = false;
+ break;
+ case InlineAsm::isClobber:
+ isOutReg = true;
+ isInReg = true;
+ break;
+ }
+
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ SmallVector<unsigned, 4> Regs;
+
+ // If this is a constraint for a single physreg, or a constraint for a
+ // register class, find it.
+ std::pair<unsigned, const TargetRegisterClass*> PhysReg =
+ TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+
+ unsigned NumRegs = 1;
+ if (OpInfo.ConstraintVT != MVT::Other) {
+ // If this is a FP input in an integer register (or visa versa) insert a bit
+ // cast of the input value. More generally, handle any case where the input
+ // value disagrees with the register class we plan to stick this in.
+ if (OpInfo.Type == InlineAsm::isInput &&
+ PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) {
+ // Try to convert to the first MVT that the reg class contains. If the
+ // types are identical size, use a bitcast to convert (e.g. two differing
+ // vector types).
+ MVT RegVT = *PhysReg.second->vt_begin();
+ if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
+ OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ RegVT, OpInfo.CallOperand);
+ OpInfo.ConstraintVT = RegVT;
+ } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
+ // If the input is a FP value and we want it in FP registers, do a
+ // bitcast to the corresponding integer type. This turns an f64 value
+ // into i64, which can be passed with two i32 values on a 32-bit
+ // machine.
+ RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
+ OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ RegVT, OpInfo.CallOperand);
+ OpInfo.ConstraintVT = RegVT;
+ }
+ }
+
+ NumRegs = TLI.getNumRegisters(OpInfo.ConstraintVT);
+ }
+
+ MVT RegVT;
+ MVT ValueVT = OpInfo.ConstraintVT;
+
+ // If this is a constraint for a specific physical register, like {r17},
+ // assign it now.
+ if (unsigned AssignedReg = PhysReg.first) {
+ const TargetRegisterClass *RC = PhysReg.second;
+ if (OpInfo.ConstraintVT == MVT::Other)
+ ValueVT = *RC->vt_begin();
+
+ // Get the actual register value type. This is important, because the user
+ // may have asked for (e.g.) the AX register in i32 type. We need to
+ // remember that AX is actually i16 to get the right extension.
+ RegVT = *RC->vt_begin();
+
+ // This is a explicit reference to a physical register.
+ Regs.push_back(AssignedReg);
+
+ // If this is an expanded reference, add the rest of the regs to Regs.
+ if (NumRegs != 1) {
+ TargetRegisterClass::iterator I = RC->begin();
+ for (; *I != AssignedReg; ++I)
+ assert(I != RC->end() && "Didn't find reg!");
+
+ // Already added the first reg.
+ --NumRegs; ++I;
+ for (; NumRegs; --NumRegs, ++I) {
+ assert(I != RC->end() && "Ran out of registers to allocate!");
+ Regs.push_back(*I);
+ }
+ }
+ OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);
+ const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+ OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
+ return;
+ }
+
+ // Otherwise, if this was a reference to an LLVM register class, create vregs
+ // for this reference.
+ if (const TargetRegisterClass *RC = PhysReg.second) {
+ RegVT = *RC->vt_begin();
+ if (OpInfo.ConstraintVT == MVT::Other)
+ ValueVT = RegVT;
+
+ // Create the appropriate number of virtual registers.
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ for (; NumRegs; --NumRegs)
+ Regs.push_back(RegInfo.createVirtualRegister(RC));
+
+ OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);
+ return;
+ }
+
+ // This is a reference to a register class that doesn't directly correspond
+ // to an LLVM register class. Allocate NumRegs consecutive, available,
+ // registers from the class.
+ std::vector<unsigned> RegClassRegs
+ = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+
+ const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+ unsigned NumAllocated = 0;
+ for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {
+ unsigned Reg = RegClassRegs[i];
+ // See if this register is available.
+ if ((isOutReg && OutputRegs.count(Reg)) || // Already used.
+ (isInReg && InputRegs.count(Reg))) { // Already used.
+ // Make sure we find consecutive registers.
+ NumAllocated = 0;
+ continue;
+ }
+
+ // Check to see if this register is allocatable (i.e. don't give out the
+ // stack pointer).
+ const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI);
+ if (!RC) { // Couldn't allocate this register.
+ // Reset NumAllocated to make sure we return consecutive registers.
+ NumAllocated = 0;
+ continue;
+ }
+
+ // Okay, this register is good, we can use it.
+ ++NumAllocated;
+
+ // If we allocated enough consecutive registers, succeed.
+ if (NumAllocated == NumRegs) {
+ unsigned RegStart = (i-NumAllocated)+1;
+ unsigned RegEnd = i+1;
+ // Mark all of the allocated registers used.
+ for (unsigned i = RegStart; i != RegEnd; ++i)
+ Regs.push_back(RegClassRegs[i]);
+
+ OpInfo.AssignedRegs = RegsForValue(TLI, Regs, *RC->vt_begin(),
+ OpInfo.ConstraintVT);
+ OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
+ return;
+ }
+ }
+
+ // Otherwise, we couldn't allocate enough registers for this.
+}
+
+/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
+/// processed uses a memory 'm' constraint.
+static bool
+hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
+ const TargetLowering &TLI) {
+ for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
+ InlineAsm::ConstraintInfo &CI = CInfos[i];
+ for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {
+ TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
+ if (CType == TargetLowering::C_Memory)
+ return true;
+ }
+
+ // Indirect operand accesses access memory.
+ if (CI.isIndirect)
+ return true;
+ }
+
+ return false;
+}
+
+/// visitInlineAsm - Handle a call to an InlineAsm object.
+///
+void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
+ InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+
+ /// ConstraintOperands - Information about all of the constraints.
+ std::vector<SDISelAsmOperandInfo> ConstraintOperands;
+
+ std::set<unsigned> OutputRegs, InputRegs;
+
+ // Do a prepass over the constraints, canonicalizing them, and building up the
+ // ConstraintOperands list.
+ std::vector<InlineAsm::ConstraintInfo>
+ ConstraintInfos = IA->ParseConstraints();
+
+ bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI);
+
+ SDValue Chain, Flag;
+
+ // We won't need to flush pending loads if this asm doesn't touch
+ // memory and is nonvolatile.
+ if (hasMemory || IA->hasSideEffects())
+ Chain = getRoot();
+ else
+ Chain = DAG.getRoot();
+
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ unsigned ResNo = 0; // ResNo - The result number of the next output.
+ for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+ ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i]));
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ MVT OpVT = MVT::Other;
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ // Indirect outputs just consume an argument.
+ if (OpInfo.isIndirect) {
+ OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+ break;
+ }
+
+ // The return value of the call is this value. As such, there is no
+ // corresponding argument.
+ assert(CS.getType() != Type::VoidTy && "Bad inline asm!");
+ if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ OpVT = TLI.getValueType(STy->getElementType(ResNo));
+ } else {
+ assert(ResNo == 0 && "Asm only has one result!");
+ OpVT = TLI.getValueType(CS.getType());
+ }
+ ++ResNo;
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+ break;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
+ }
+
+ // If this is an input or an indirect output, process the call argument.
+ // BasicBlocks are labels, currently appearing only in asm's.
+ if (OpInfo.CallOperandVal) {
+ if (BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
+ OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
+ } else {
+ OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
+ }
+
+ OpVT = OpInfo.getCallOperandValMVT(TLI, TD);
+ }
+
+ OpInfo.ConstraintVT = OpVT;
+ }
+
+ // Second pass over the constraints: compute which constraint option to use
+ // and assign registers to constraints that want a specific physreg.
+ for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ // If this is an output operand with a matching input operand, look up the
+ // matching input. If their types mismatch, e.g. one is an integer, the
+ // other is floating point, or their sizes are different, flag it as an
+ // error.
+ if (OpInfo.hasMatchingInput()) {
+ SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (OpInfo.ConstraintVT.getSizeInBits() !=
+ Input.ConstraintVT.getSizeInBits())) {
+ cerr << "llvm: error: Unsupported asm: input constraint with a "
+ << "matching output constraint of incompatible type!\n";
+ exit(1);
+ }
+ Input.ConstraintVT = OpInfo.ConstraintVT;
+ }
+ }
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, hasMemory, &DAG);
+
+ // If this is a memory input, and if the operand is not indirect, do what we
+ // need to to provide an address for the memory input.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ !OpInfo.isIndirect) {
+ assert(OpInfo.Type == InlineAsm::isInput &&
+ "Can only indirectify direct input operands!");
+
+ // Memory operands really want the address of the value. If we don't have
+ // an indirect input, put it in the constpool if we can, otherwise spill
+ // it to a stack slot.
+
+ // If the operand is a float, integer, or vector constant, spill to a
+ // constant pool entry to get its address.
+ Value *OpVal = OpInfo.CallOperandVal;
+ if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
+ isa<ConstantVector>(OpVal)) {
+ OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
+ TLI.getPointerTy());
+ } else {
+ // Otherwise, create a stack slot and emit a store to it before the
+ // asm.
+ const Type *Ty = OpVal->getType();
+ uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+ unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty);
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+ Chain = DAG.getStore(Chain, getCurDebugLoc(),
+ OpInfo.CallOperand, StackSlot, NULL, 0);
+ OpInfo.CallOperand = StackSlot;
+ }
+
+ // There is no longer a Value* corresponding to this operand.
+ OpInfo.CallOperandVal = 0;
+ // It is now an indirect operand.
+ OpInfo.isIndirect = true;
+ }
+
+ // If this constraint is for a specific register, allocate it before
+ // anything else.
+ if (OpInfo.ConstraintType == TargetLowering::C_Register)
+ GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
+ }
+ ConstraintInfos.clear();
+
+
+ // Second pass - Loop over all of the operands, assigning virtual or physregs
+ // to register class operands.
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ // C_Register operands have already been allocated, Other/Memory don't need
+ // to be.
+ if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
+ GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
+ }
+
+ // AsmNodeOperands - The operands for the ISD::INLINEASM node.
+ std::vector<SDValue> AsmNodeOperands;
+ AsmNodeOperands.push_back(SDValue()); // reserve space for input chain
+ AsmNodeOperands.push_back(
+ DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), MVT::Other));
+
+
+ // Loop over all of the inputs, copying the operand values into the
+ // appropriate registers and processing the output regs.
+ RegsForValue RetValRegs;
+
+ // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
+ std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
+
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput: {
+ if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
+ OpInfo.ConstraintType != TargetLowering::C_Register) {
+ // Memory output, or 'other' output (e.g. 'X' constraint).
+ assert(OpInfo.isIndirect && "Memory output must be indirect operand");
+
+ // Add information to the INLINEASM node to know about this output.
+ unsigned ResOpType = 4/*MEM*/ | (1<<3);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(OpInfo.CallOperand);
+ break;
+ }
+
+ // Otherwise, this is a register or register class output.
+
+ // Copy the output from the appropriate register. Find a register that
+ // we can use.
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ cerr << "llvm: error: Couldn't allocate output reg for constraint '"
+ << OpInfo.ConstraintCode << "'!\n";
+ exit(1);
+ }
+
+ // If this is an indirect operand, store through the pointer after the
+ // asm.
+ if (OpInfo.isIndirect) {
+ IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
+ OpInfo.CallOperandVal));
+ } else {
+ // This is the result value of the call.
+ assert(CS.getType() != Type::VoidTy && "Bad inline asm!");
+ // Concatenate this output onto the outputs list.
+ RetValRegs.append(OpInfo.AssignedRegs);
+ }
+
+ // Add information to the INLINEASM node to know that this register is
+ // set.
+ OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?
+ 6 /* EARLYCLOBBER REGDEF */ :
+ 2 /* REGDEF */ ,
+ false,
+ 0,
+ DAG, AsmNodeOperands);
+ break;
+ }
+ case InlineAsm::isInput: {
+ SDValue InOperandVal = OpInfo.CallOperand;
+
+ if (OpInfo.isMatchingInputConstraint()) { // Matching constraint?
+ // If this is required to match an output register we have already set,
+ // just use its register.
+ unsigned OperandNo = OpInfo.getMatchedOperand();
+
+ // Scan until we find the definition we already emitted of this operand.
+ // When we find it, create a RegsForValue operand.
+ unsigned CurOp = 2; // The first operand.
+ for (; OperandNo; --OperandNo) {
+ // Advance to the next operand.
+ unsigned OpFlag =
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+ assert(((OpFlag & 7) == 2 /*REGDEF*/ ||
+ (OpFlag & 7) == 6 /*EARLYCLOBBER REGDEF*/ ||
+ (OpFlag & 7) == 4 /*MEM*/) &&
+ "Skipped past definitions?");
+ CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
+ }
+
+ unsigned OpFlag =
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+ if ((OpFlag & 7) == 2 /*REGDEF*/
+ || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) {
+ // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
+ assert(!OpInfo.isIndirect &&
+ "Don't know how to handle tied indirect register inputs yet!");
+ RegsForValue MatchedRegs;
+ MatchedRegs.TLI = &TLI;
+ MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
+ MVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
+ MatchedRegs.RegVTs.push_back(RegVT);
+ MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
+ for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
+ i != e; ++i)
+ MatchedRegs.Regs.
+ push_back(RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
+
+ // Use the produced MatchedRegs object to
+ MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
+ Chain, &Flag);
+ MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/,
+ true, OpInfo.getMatchedOperand(),
+ DAG, AsmNodeOperands);
+ break;
+ } else {
+ assert(((OpFlag & 7) == 4) && "Unknown matching constraint!");
+ assert((InlineAsm::getNumOperandRegisters(OpFlag)) == 1 &&
+ "Unexpected number of operands");
+ // Add information to the INLINEASM node to know about this input.
+ // See InlineAsm.h isUseOperandTiedToDef.
+ OpFlag |= 0x80000000 | (OpInfo.getMatchedOperand() << 16);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
+ break;
+ }
+ }
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Other) {
+ assert(!OpInfo.isIndirect &&
+ "Don't know how to handle indirect other inputs yet!");
+
+ std::vector<SDValue> Ops;
+ TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
+ hasMemory, Ops, DAG);
+ if (Ops.empty()) {
+ cerr << "llvm: error: Invalid operand for inline asm constraint '"
+ << OpInfo.ConstraintCode << "'!\n";
+ exit(1);
+ }
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned ResOpType = 3 /*IMM*/ | (Ops.size() << 3);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+ TLI.getPointerTy()));
+ AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
+ break;
+ } else if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+ assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
+ assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
+ "Memory operands expect pointer values");
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned ResOpType = 4/*MEM*/ | (1<<3);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(InOperandVal);
+ break;
+ }
+
+ assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+ OpInfo.ConstraintType == TargetLowering::C_Register) &&
+ "Unknown constraint type!");
+ assert(!OpInfo.isIndirect &&
+ "Don't know how to handle indirect register inputs yet!");
+
+ // Copy the input into the appropriate registers.
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ cerr << "llvm: error: Couldn't allocate output reg for constraint '"
+ << OpInfo.ConstraintCode << "'!\n";
+ exit(1);
+ }
+
+ OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
+ Chain, &Flag);
+
+ OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0,
+ DAG, AsmNodeOperands);
+ break;
+ }
+ case InlineAsm::isClobber: {
+ // Add the clobbered value to the operand list, so that the register
+ // allocator is aware that the physreg got clobbered.
+ if (!OpInfo.AssignedRegs.Regs.empty())
+ OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */,
+ false, 0, DAG,AsmNodeOperands);
+ break;
+ }
+ }
+ }
+
+ // Finish up input operands.
+ AsmNodeOperands[0] = Chain;
+ if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
+
+ Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
+ DAG.getVTList(MVT::Other, MVT::Flag),
+ &AsmNodeOperands[0], AsmNodeOperands.size());
+ Flag = Chain.getValue(1);
+
+ // If this asm returns a register value, copy the result from that register
+ // and set it as the value of the call.
+ if (!RetValRegs.Regs.empty()) {
+ SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
+ Chain, &Flag);
+
+ // FIXME: Why don't we do this for inline asms with MRVs?
+ if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
+ MVT ResultType = TLI.getValueType(CS.getType());
+
+ // If any of the results of the inline asm is a vector, it may have the
+ // wrong width/num elts. This can happen for register classes that can
+ // contain multiple different value types. The preg or vreg allocated may
+ // not have the same VT as was expected. Convert it to the right type
+ // with bit_convert.
+ if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
+ Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ ResultType, Val);
+
+ } else if (ResultType != Val.getValueType() &&
+ ResultType.isInteger() && Val.getValueType().isInteger()) {
+ // If a result value was tied to an input value, the computed result may
+ // have a wider width than the expected result. Extract the relevant
+ // portion.
+ Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val);
+ }
+
+ assert(ResultType == Val.getValueType() && "Asm result value mismatch!");
+ }
+
+ setValue(CS.getInstruction(), Val);
+ // Don't need to use this as a chain in this case.
+ if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
+ return;
+ }
+
+ std::vector<std::pair<SDValue, Value*> > StoresToEmit;
+
+ // Process indirect outputs, first output all of the flagged copies out of
+ // physregs.
+ for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
+ RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
+ Value *Ptr = IndirectStoresToEmit[i].second;
+ SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
+ Chain, &Flag);
+ StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
+
+ }
+
+ // Emit the non-flagged stores from the physregs.
+ SmallVector<SDValue, 8> OutChains;
+ for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i)
+ OutChains.push_back(DAG.getStore(Chain, getCurDebugLoc(),
+ StoresToEmit[i].first,
+ getValue(StoresToEmit[i].second),
+ StoresToEmit[i].second, 0));
+ if (!OutChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+ &OutChains[0], OutChains.size());
+ DAG.setRoot(Chain);
+}
+
+
+void SelectionDAGLowering::visitMalloc(MallocInst &I) {
+ SDValue Src = getValue(I.getOperand(0));
+
+ // Scale up by the type size in the original i32 type width. Various
+ // mid-level optimizers may make assumptions about demanded bits etc from the
+ // i32-ness of the optimizer: we do not want to promote to i64 and then
+ // multiply on 64-bit targets.
+ // FIXME: Malloc inst should go away: PR715.
+ uint64_t ElementSize = TD->getTypeAllocSize(I.getType()->getElementType());
+ if (ElementSize != 1)
+ Src = DAG.getNode(ISD::MUL, getCurDebugLoc(), Src.getValueType(),
+ Src, DAG.getConstant(ElementSize, Src.getValueType()));
+
+ MVT IntPtr = TLI.getPointerTy();
+
+ if (IntPtr.bitsLT(Src.getValueType()))
+ Src = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), IntPtr, Src);
+ else if (IntPtr.bitsGT(Src.getValueType()))
+ Src = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), IntPtr, Src);
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Src;
+ Entry.Ty = TLI.getTargetData()->getIntPtrType();
+ Args.push_back(Entry);
+
+ std::pair<SDValue,SDValue> Result =
+ TLI.LowerCallTo(getRoot(), I.getType(), false, false, false, false,
+ CallingConv::C, PerformTailCallOpt,
+ DAG.getExternalSymbol("malloc", IntPtr),
+ Args, DAG, getCurDebugLoc());
+ setValue(&I, Result.first); // Pointers always fit in registers
+ DAG.setRoot(Result.second);
+}
+
+void SelectionDAGLowering::visitFree(FreeInst &I) {
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = getValue(I.getOperand(0));
+ Entry.Ty = TLI.getTargetData()->getIntPtrType();
+ Args.push_back(Entry);
+ MVT IntPtr = TLI.getPointerTy();
+ std::pair<SDValue,SDValue> Result =
+ TLI.LowerCallTo(getRoot(), Type::VoidTy, false, false, false, false,
+ CallingConv::C, PerformTailCallOpt,
+ DAG.getExternalSymbol("free", IntPtr), Args, DAG,
+ getCurDebugLoc());
+ DAG.setRoot(Result.second);
+}
+
+void SelectionDAGLowering::visitVAStart(CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getOperand(1)),
+ DAG.getSrcValue(I.getOperand(1))));
+}
+
+void SelectionDAGLowering::visitVAArg(VAArgInst &I) {
+ SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
+ getRoot(), getValue(I.getOperand(0)),
+ DAG.getSrcValue(I.getOperand(0)));
+ setValue(&I, V);
+ DAG.setRoot(V.getValue(1));
+}
+
+void SelectionDAGLowering::visitVAEnd(CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getOperand(1)),
+ DAG.getSrcValue(I.getOperand(1))));
+}
+
+void SelectionDAGLowering::visitVACopy(CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getOperand(1)),
+ getValue(I.getOperand(2)),
+ DAG.getSrcValue(I.getOperand(1)),
+ DAG.getSrcValue(I.getOperand(2))));
+}
+
+/// TargetLowering::LowerArguments - This is the default LowerArguments
+/// implementation, which just inserts a FORMAL_ARGUMENTS node. FIXME: When all
+/// targets are migrated to using FORMAL_ARGUMENTS, this hook should be
+/// integrated into SDISel.
+void TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &ArgValues,
+ DebugLoc dl) {
+ // Add CC# and isVararg as operands to the FORMAL_ARGUMENTS node.
+ SmallVector<SDValue, 3+16> Ops;
+ Ops.push_back(DAG.getRoot());
+ Ops.push_back(DAG.getConstant(F.getCallingConv(), getPointerTy()));
+ Ops.push_back(DAG.getConstant(F.isVarArg(), getPointerTy()));
+
+ // Add one result value for each formal argument.
+ SmallVector<MVT, 16> RetVals;
+ unsigned j = 1;
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
+ I != E; ++I, ++j) {
+ SmallVector<MVT, 4> ValueVTs;
+ ComputeValueVTs(*this, I->getType(), ValueVTs);
+ for (unsigned Value = 0, NumValues = ValueVTs.size();
+ Value != NumValues; ++Value) {
+ MVT VT = ValueVTs[Value];
+ const Type *ArgTy = VT.getTypeForMVT();
+ ISD::ArgFlagsTy Flags;
+ unsigned OriginalAlignment =
+ getTargetData()->getABITypeAlignment(ArgTy);
+
+ if (F.paramHasAttr(j, Attribute::ZExt))
+ Flags.setZExt();
+ if (F.paramHasAttr(j, Attribute::SExt))
+ Flags.setSExt();
+ if (F.paramHasAttr(j, Attribute::InReg))
+ Flags.setInReg();
+ if (F.paramHasAttr(j, Attribute::StructRet))
+ Flags.setSRet();
+ if (F.paramHasAttr(j, Attribute::ByVal)) {
+ Flags.setByVal();
+ const PointerType *Ty = cast<PointerType>(I->getType());
+ const Type *ElementTy = Ty->getElementType();
+ unsigned FrameAlign = getByValTypeAlignment(ElementTy);
+ unsigned FrameSize = getTargetData()->getTypeAllocSize(ElementTy);
+ // For ByVal, alignment should be passed from FE. BE will guess if
+ // this info is not there but there are cases it cannot get right.
+ if (F.getParamAlignment(j))
+ FrameAlign = F.getParamAlignment(j);
+ Flags.setByValAlign(FrameAlign);
+ Flags.setByValSize(FrameSize);
+ }
+ if (F.paramHasAttr(j, Attribute::Nest))
+ Flags.setNest();
+ Flags.setOrigAlign(OriginalAlignment);
+
+ MVT RegisterVT = getRegisterType(VT);
+ unsigned NumRegs = getNumRegisters(VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ RetVals.push_back(RegisterVT);
+ ISD::ArgFlagsTy MyFlags = Flags;
+ if (NumRegs > 1 && i == 0)
+ MyFlags.setSplit();
+ // if it isn't first piece, alignment must be 1
+ else if (i > 0)
+ MyFlags.setOrigAlign(1);
+ Ops.push_back(DAG.getArgFlags(MyFlags));
+ }
+ }
+ }
+
+ RetVals.push_back(MVT::Other);
+
+ // Create the node.
+ SDNode *Result = DAG.getNode(ISD::FORMAL_ARGUMENTS, dl,
+ DAG.getVTList(&RetVals[0], RetVals.size()),
+ &Ops[0], Ops.size()).getNode();
+
+ // Prelower FORMAL_ARGUMENTS. This isn't required for functionality, but
+ // allows exposing the loads that may be part of the argument access to the
+ // first DAGCombiner pass.
+ SDValue TmpRes = LowerOperation(SDValue(Result, 0), DAG);
+
+ // The number of results should match up, except that the lowered one may have
+ // an extra flag result.
+ assert((Result->getNumValues() == TmpRes.getNode()->getNumValues() ||
+ (Result->getNumValues()+1 == TmpRes.getNode()->getNumValues() &&
+ TmpRes.getValue(Result->getNumValues()).getValueType() == MVT::Flag))
+ && "Lowering produced unexpected number of results!");
+
+ // The FORMAL_ARGUMENTS node itself is likely no longer needed.
+ if (Result != TmpRes.getNode() && Result->use_empty()) {
+ HandleSDNode Dummy(DAG.getRoot());
+ DAG.RemoveDeadNode(Result);
+ }
+
+ Result = TmpRes.getNode();
+
+ unsigned NumArgRegs = Result->getNumValues() - 1;
+ DAG.setRoot(SDValue(Result, NumArgRegs));
+
+ // Set up the return result vector.
+ unsigned i = 0;
+ unsigned Idx = 1;
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
+ ++I, ++Idx) {
+ SmallVector<MVT, 4> ValueVTs;
+ ComputeValueVTs(*this, I->getType(), ValueVTs);
+ for (unsigned Value = 0, NumValues = ValueVTs.size();
+ Value != NumValues; ++Value) {
+ MVT VT = ValueVTs[Value];
+ MVT PartVT = getRegisterType(VT);
+
+ unsigned NumParts = getNumRegisters(VT);
+ SmallVector<SDValue, 4> Parts(NumParts);
+ for (unsigned j = 0; j != NumParts; ++j)
+ Parts[j] = SDValue(Result, i++);
+
+ ISD::NodeType AssertOp = ISD::DELETED_NODE;
+ if (F.paramHasAttr(Idx, Attribute::SExt))
+ AssertOp = ISD::AssertSext;
+ else if (F.paramHasAttr(Idx, Attribute::ZExt))
+ AssertOp = ISD::AssertZext;
+
+ ArgValues.push_back(getCopyFromParts(DAG, dl, &Parts[0], NumParts,
+ PartVT, VT, AssertOp));
+ }
+ }
+ assert(i == NumArgRegs && "Argument register count mismatch!");
+}
+
+
+/// TargetLowering::LowerCallTo - This is the default LowerCallTo
+/// implementation, which just inserts an ISD::CALL node, which is later custom
+/// lowered by the target to something concrete. FIXME: When all targets are
+/// migrated to using ISD::CALL, this hook should be integrated into SDISel.
+std::pair<SDValue, SDValue>
+TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
+ bool RetSExt, bool RetZExt, bool isVarArg,
+ bool isInreg,
+ unsigned CallingConv, bool isTailCall,
+ SDValue Callee,
+ ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) {
+ assert((!isTailCall || PerformTailCallOpt) &&
+ "isTailCall set when tail-call optimizations are disabled!");
+
+ SmallVector<SDValue, 32> Ops;
+ Ops.push_back(Chain); // Op#0 - Chain
+ Ops.push_back(Callee);
+
+ // Handle all of the outgoing arguments.
+ for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+ SmallVector<MVT, 4> ValueVTs;
+ ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
+ for (unsigned Value = 0, NumValues = ValueVTs.size();
+ Value != NumValues; ++Value) {
+ MVT VT = ValueVTs[Value];
+ const Type *ArgTy = VT.getTypeForMVT();
+ SDValue Op = SDValue(Args[i].Node.getNode(),
+ Args[i].Node.getResNo() + Value);
+ ISD::ArgFlagsTy Flags;
+ unsigned OriginalAlignment =
+ getTargetData()->getABITypeAlignment(ArgTy);
+
+ if (Args[i].isZExt)
+ Flags.setZExt();
+ if (Args[i].isSExt)
+ Flags.setSExt();
+ if (Args[i].isInReg)
+ Flags.setInReg();
+ if (Args[i].isSRet)
+ Flags.setSRet();
+ if (Args[i].isByVal) {
+ Flags.setByVal();
+ const PointerType *Ty = cast<PointerType>(Args[i].Ty);
+ const Type *ElementTy = Ty->getElementType();
+ unsigned FrameAlign = getByValTypeAlignment(ElementTy);
+ unsigned FrameSize = getTargetData()->getTypeAllocSize(ElementTy);
+ // For ByVal, alignment should come from FE. BE will guess if this
+ // info is not there but there are cases it cannot get right.
+ if (Args[i].Alignment)
+ FrameAlign = Args[i].Alignment;
+ Flags.setByValAlign(FrameAlign);
+ Flags.setByValSize(FrameSize);
+ }
+ if (Args[i].isNest)
+ Flags.setNest();
+ Flags.setOrigAlign(OriginalAlignment);
+
+ MVT PartVT = getRegisterType(VT);
+ unsigned NumParts = getNumRegisters(VT);
+ SmallVector<SDValue, 4> Parts(NumParts);
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ if (Args[i].isSExt)
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (Args[i].isZExt)
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ getCopyToParts(DAG, dl, Op, &Parts[0], NumParts, PartVT, ExtendKind);
+
+ for (unsigned i = 0; i != NumParts; ++i) {
+ // if it isn't first piece, alignment must be 1
+ ISD::ArgFlagsTy MyFlags = Flags;
+ if (NumParts > 1 && i == 0)
+ MyFlags.setSplit();
+ else if (i != 0)
+ MyFlags.setOrigAlign(1);
+
+ Ops.push_back(Parts[i]);
+ Ops.push_back(DAG.getArgFlags(MyFlags));
+ }
+ }
+ }
+
+ // Figure out the result value types. We start by making a list of
+ // the potentially illegal return value types.
+ SmallVector<MVT, 4> LoweredRetTys;
+ SmallVector<MVT, 4> RetTys;
+ ComputeValueVTs(*this, RetTy, RetTys);
+
+ // Then we translate that to a list of legal types.
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ MVT VT = RetTys[I];
+ MVT RegisterVT = getRegisterType(VT);
+ unsigned NumRegs = getNumRegisters(VT);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ LoweredRetTys.push_back(RegisterVT);
+ }
+
+ LoweredRetTys.push_back(MVT::Other); // Always has a chain.
+
+ // Create the CALL node.
+ SDValue Res = DAG.getCall(CallingConv, dl,
+ isVarArg, isTailCall, isInreg,
+ DAG.getVTList(&LoweredRetTys[0],
+ LoweredRetTys.size()),
+ &Ops[0], Ops.size()
+ );
+ Chain = Res.getValue(LoweredRetTys.size() - 1);
+
+ // Gather up the call result into a single value.
+ if (RetTy != Type::VoidTy && !RetTys.empty()) {
+ ISD::NodeType AssertOp = ISD::DELETED_NODE;
+
+ if (RetSExt)
+ AssertOp = ISD::AssertSext;
+ else if (RetZExt)
+ AssertOp = ISD::AssertZext;
+
+ SmallVector<SDValue, 4> ReturnValues;
+ unsigned RegNo = 0;
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ MVT VT = RetTys[I];
+ MVT RegisterVT = getRegisterType(VT);
+ unsigned NumRegs = getNumRegisters(VT);
+ unsigned RegNoEnd = NumRegs + RegNo;
+ SmallVector<SDValue, 4> Results;
+ for (; RegNo != RegNoEnd; ++RegNo)
+ Results.push_back(Res.getValue(RegNo));
+ SDValue ReturnValue =
+ getCopyFromParts(DAG, dl, &Results[0], NumRegs, RegisterVT, VT,
+ AssertOp);
+ ReturnValues.push_back(ReturnValue);
+ }
+ Res = DAG.getNode(ISD::MERGE_VALUES, dl,
+ DAG.getVTList(&RetTys[0], RetTys.size()),
+ &ReturnValues[0], ReturnValues.size());
+ }
+
+ return std::make_pair(Res, Chain);
+}
+
+void TargetLowering::LowerOperationWrapper(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) {
+ SDValue Res = LowerOperation(SDValue(N, 0), DAG);
+ if (Res.getNode())
+ Results.push_back(Res);
+}
+
+SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+ assert(0 && "LowerOperation not implemented for this target!");
+ abort();
+ return SDValue();
+}
+
+
+void SelectionDAGLowering::CopyValueToVirtualRegister(Value *V, unsigned Reg) {
+ SDValue Op = getValue(V);
+ assert((Op.getOpcode() != ISD::CopyFromReg ||
+ cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
+ "Copy from a reg to the same reg!");
+ assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
+
+ RegsForValue RFV(TLI, Reg, V->getType());
+ SDValue Chain = DAG.getEntryNode();
+ RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
+ PendingExports.push_back(Chain);
+}
+
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+void SelectionDAGISel::
+LowerArguments(BasicBlock *LLVMBB) {
+ // If this is the entry block, emit arguments.
+ Function &F = *LLVMBB->getParent();
+ SDValue OldRoot = SDL->DAG.getRoot();
+ SmallVector<SDValue, 16> Args;
+ TLI.LowerArguments(F, SDL->DAG, Args, SDL->getCurDebugLoc());
+
+ unsigned a = 0;
+ for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();
+ AI != E; ++AI) {
+ SmallVector<MVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, AI->getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (!AI->use_empty()) {
+ SDL->setValue(AI, SDL->DAG.getMergeValues(&Args[a], NumValues,
+ SDL->getCurDebugLoc()));
+ // If this argument is live outside of the entry block, insert a copy from
+ // whereever we got it to the vreg that other BB's will reference it as.
+ SDL->CopyToExportRegsIfNeeded(AI);
+ }
+ a += NumValues;
+ }
+
+ // Finally, if the target has anything special to do, allow it to do so.
+ // FIXME: this should insert code into the DAG!
+ EmitFunctionEntryCode(F, SDL->DAG.getMachineFunction());
+}
+
+/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to
+/// ensure constants are generated when needed. Remember the virtual registers
+/// that need to be added to the Machine PHI nodes as input. We cannot just
+/// directly add them, because expansion might result in multiple MBB's for one
+/// BB. As such, the start of the BB might correspond to a different MBB than
+/// the end.
+///
+void
+SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) {
+ TerminatorInst *TI = LLVMBB->getTerminator();
+
+ SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+
+ // Check successor nodes' PHI nodes that expect a constant to be available
+ // from this block.
+ for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+ BasicBlock *SuccBB = TI->getSuccessor(succ);
+ if (!isa<PHINode>(SuccBB->begin())) continue;
+ MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];
+
+ // If this terminator has multiple identical successors (common for
+ // switches), only handle each succ once.
+ if (!SuccsHandled.insert(SuccMBB)) continue;
+
+ MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+ PHINode *PN;
+
+ // At this point we know that there is a 1-1 correspondence between LLVM PHI
+ // nodes and Machine PHI nodes, but the incoming operands have not been
+ // emitted yet.
+ for (BasicBlock::iterator I = SuccBB->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I) {
+ // Ignore dead phi's.
+ if (PN->use_empty()) continue;
+
+ unsigned Reg;
+ Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+ if (Constant *C = dyn_cast<Constant>(PHIOp)) {
+ unsigned &RegOut = SDL->ConstantsOut[C];
+ if (RegOut == 0) {
+ RegOut = FuncInfo->CreateRegForValue(C);
+ SDL->CopyValueToVirtualRegister(C, RegOut);
+ }
+ Reg = RegOut;
+ } else {
+ Reg = FuncInfo->ValueMap[PHIOp];
+ if (Reg == 0) {
+ assert(isa<AllocaInst>(PHIOp) &&
+ FuncInfo->StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
+ "Didn't codegen value into a register!??");
+ Reg = FuncInfo->CreateRegForValue(PHIOp);
+ SDL->CopyValueToVirtualRegister(PHIOp, Reg);
+ }
+ }
+
+ // Remember that this register needs to added to the machine PHI node as
+ // the input for this MBB.
+ SmallVector<MVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+ for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+ MVT VT = ValueVTs[vti];
+ unsigned NumRegisters = TLI.getNumRegisters(VT);
+ for (unsigned i = 0, e = NumRegisters; i != e; ++i)
+ SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
+ Reg += NumRegisters;
+ }
+ }
+ }
+ SDL->ConstantsOut.clear();
+}
+
+/// This is the Fast-ISel version of HandlePHINodesInSuccessorBlocks. It only
+/// supports legal types, and it emits MachineInstrs directly instead of
+/// creating SelectionDAG nodes.
+///
+bool
+SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB,
+ FastISel *F) {
+ TerminatorInst *TI = LLVMBB->getTerminator();
+
+ SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+ unsigned OrigNumPHINodesToUpdate = SDL->PHINodesToUpdate.size();
+
+ // Check successor nodes' PHI nodes that expect a constant to be available
+ // from this block.
+ for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+ BasicBlock *SuccBB = TI->getSuccessor(succ);
+ if (!isa<PHINode>(SuccBB->begin())) continue;
+ MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];
+
+ // If this terminator has multiple identical successors (common for
+ // switches), only handle each succ once.
+ if (!SuccsHandled.insert(SuccMBB)) continue;
+
+ MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+ PHINode *PN;
+
+ // At this point we know that there is a 1-1 correspondence between LLVM PHI
+ // nodes and Machine PHI nodes, but the incoming operands have not been
+ // emitted yet.
+ for (BasicBlock::iterator I = SuccBB->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I) {
+ // Ignore dead phi's.
+ if (PN->use_empty()) continue;
+
+ // Only handle legal types. Two interesting things to note here. First,
+ // by bailing out early, we may leave behind some dead instructions,
+ // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
+ // own moves. Second, this check is necessary becuase FastISel doesn't
+ // use CreateRegForValue to create registers, so it always creates
+ // exactly one register for each non-void instruction.
+ MVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
+ if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
+ // Promote MVT::i1.
+ if (VT == MVT::i1)
+ VT = TLI.getTypeToTransformTo(VT);
+ else {
+ SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+ return false;
+ }
+ }
+
+ Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+ unsigned Reg = F->getRegForValue(PHIOp);
+ if (Reg == 0) {
+ SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+ return false;
+ }
+ SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
+ }
+ }
+
+ return true;
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
new file mode 100644
index 0000000..578aa591
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
@@ -0,0 +1,558 @@
+//===-- SelectionDAGBuild.h - Selection-DAG building ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SELECTIONDAGBUILD_H
+#define SELECTIONDAGBUILD_H
+
+#include "llvm/Constants.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SmallSet.h"
+#endif
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Target/TargetMachine.h"
+#include <vector>
+#include <set>
+
+namespace llvm {
+
+class AliasAnalysis;
+class AllocaInst;
+class BasicBlock;
+class BitCastInst;
+class BranchInst;
+class CallInst;
+class ExtractElementInst;
+class ExtractValueInst;
+class FCmpInst;
+class FPExtInst;
+class FPToSIInst;
+class FPToUIInst;
+class FPTruncInst;
+class FreeInst;
+class Function;
+class GetElementPtrInst;
+class GCFunctionInfo;
+class ICmpInst;
+class IntToPtrInst;
+class InvokeInst;
+class InsertElementInst;
+class InsertValueInst;
+class Instruction;
+class LoadInst;
+class MachineBasicBlock;
+class MachineFunction;
+class MachineInstr;
+class MachineModuleInfo;
+class MachineRegisterInfo;
+class MallocInst;
+class PHINode;
+class PtrToIntInst;
+class ReturnInst;
+class SDISelAsmOperandInfo;
+class SExtInst;
+class SelectInst;
+class ShuffleVectorInst;
+class SIToFPInst;
+class StoreInst;
+class SwitchInst;
+class TargetData;
+class TargetLowering;
+class TruncInst;
+class UIToFPInst;
+class UnreachableInst;
+class UnwindInst;
+class VICmpInst;
+class VFCmpInst;
+class VAArgInst;
+class ZExtInst;
+
+//===--------------------------------------------------------------------===//
+/// FunctionLoweringInfo - This contains information that is global to a
+/// function that is used when lowering a region of the function.
+///
+class FunctionLoweringInfo {
+public:
+ TargetLowering &TLI;
+ Function *Fn;
+ MachineFunction *MF;
+ MachineRegisterInfo *RegInfo;
+
+ explicit FunctionLoweringInfo(TargetLowering &TLI);
+
+ /// set - Initialize this FunctionLoweringInfo with the given Function
+ /// and its associated MachineFunction.
+ ///
+ void set(Function &Fn, MachineFunction &MF, SelectionDAG &DAG,
+ bool EnableFastISel);
+
+ /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
+ DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap;
+
+ /// ValueMap - Since we emit code for the function a basic block at a time,
+ /// we must remember which virtual registers hold the values for
+ /// cross-basic-block values.
+ DenseMap<const Value*, unsigned> ValueMap;
+
+ /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in
+ /// the entry block. This allows the allocas to be efficiently referenced
+ /// anywhere in the function.
+ DenseMap<const AllocaInst*, int> StaticAllocaMap;
+
+#ifndef NDEBUG
+ SmallSet<Instruction*, 8> CatchInfoLost;
+ SmallSet<Instruction*, 8> CatchInfoFound;
+#endif
+
+ unsigned MakeReg(MVT VT);
+
+ /// isExportedInst - Return true if the specified value is an instruction
+ /// exported from its block.
+ bool isExportedInst(const Value *V) {
+ return ValueMap.count(V);
+ }
+
+ unsigned CreateRegForValue(const Value *V);
+
+ unsigned InitializeRegForValue(const Value *V) {
+ unsigned &R = ValueMap[V];
+ assert(R == 0 && "Already initialized this value register!");
+ return R = CreateRegForValue(V);
+ }
+
+ struct LiveOutInfo {
+ unsigned NumSignBits;
+ APInt KnownOne, KnownZero;
+ LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {}
+ };
+
+ /// LiveOutRegInfo - Information about live out vregs, indexed by their
+ /// register number offset by 'FirstVirtualRegister'.
+ std::vector<LiveOutInfo> LiveOutRegInfo;
+
+ /// clear - Clear out all the function-specific state. This returns this
+ /// FunctionLoweringInfo to an empty state, ready to be used for a
+ /// different function.
+ void clear() {
+ MBBMap.clear();
+ ValueMap.clear();
+ StaticAllocaMap.clear();
+#ifndef NDEBUG
+ CatchInfoLost.clear();
+ CatchInfoFound.clear();
+#endif
+ LiveOutRegInfo.clear();
+ }
+};
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGLowering - This is the common target-independent lowering
+/// implementation that is parameterized by a TargetLowering object.
+/// Also, targets can overload any lowering method.
+///
+class SelectionDAGLowering {
+ MachineBasicBlock *CurMBB;
+
+ /// CurDebugLoc - current file + line number. Changes as we build the DAG.
+ DebugLoc CurDebugLoc;
+
+ DenseMap<const Value*, SDValue> NodeMap;
+
+ /// PendingLoads - Loads are not emitted to the program immediately. We bunch
+ /// them up and then emit token factor nodes when possible. This allows us to
+ /// get simple disambiguation between loads without worrying about alias
+ /// analysis.
+ SmallVector<SDValue, 8> PendingLoads;
+
+ /// PendingExports - CopyToReg nodes that copy values to virtual registers
+ /// for export to other blocks need to be emitted before any terminator
+ /// instruction, but they have no other ordering requirements. We bunch them
+ /// up and the emit a single tokenfactor for them just before terminator
+ /// instructions.
+ SmallVector<SDValue, 8> PendingExports;
+
+ /// Case - A struct to record the Value for a switch case, and the
+ /// case's target basic block.
+ struct Case {
+ Constant* Low;
+ Constant* High;
+ MachineBasicBlock* BB;
+
+ Case() : Low(0), High(0), BB(0) { }
+ Case(Constant* low, Constant* high, MachineBasicBlock* bb) :
+ Low(low), High(high), BB(bb) { }
+ uint64_t size() const {
+ uint64_t rHigh = cast<ConstantInt>(High)->getSExtValue();
+ uint64_t rLow = cast<ConstantInt>(Low)->getSExtValue();
+ return (rHigh - rLow + 1ULL);
+ }
+ };
+
+ struct CaseBits {
+ uint64_t Mask;
+ MachineBasicBlock* BB;
+ unsigned Bits;
+
+ CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits):
+ Mask(mask), BB(bb), Bits(bits) { }
+ };
+
+ typedef std::vector<Case> CaseVector;
+ typedef std::vector<CaseBits> CaseBitsVector;
+ typedef CaseVector::iterator CaseItr;
+ typedef std::pair<CaseItr, CaseItr> CaseRange;
+
+ /// CaseRec - A struct with ctor used in lowering switches to a binary tree
+ /// of conditional branches.
+ struct CaseRec {
+ CaseRec(MachineBasicBlock *bb, Constant *lt, Constant *ge, CaseRange r) :
+ CaseBB(bb), LT(lt), GE(ge), Range(r) {}
+
+ /// CaseBB - The MBB in which to emit the compare and branch
+ MachineBasicBlock *CaseBB;
+ /// LT, GE - If nonzero, we know the current case value must be less-than or
+ /// greater-than-or-equal-to these Constants.
+ Constant *LT;
+ Constant *GE;
+ /// Range - A pair of iterators representing the range of case values to be
+ /// processed at this point in the binary search tree.
+ CaseRange Range;
+ };
+
+ typedef std::vector<CaseRec> CaseRecVector;
+
+ /// The comparison function for sorting the switch case values in the vector.
+ /// WARNING: Case ranges should be disjoint!
+ struct CaseCmp {
+ bool operator () (const Case& C1, const Case& C2) {
+ assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High));
+ const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+ const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+ return CI1->getValue().slt(CI2->getValue());
+ }
+ };
+
+ struct CaseBitsCmp {
+ bool operator () (const CaseBits& C1, const CaseBits& C2) {
+ return C1.Bits > C2.Bits;
+ }
+ };
+
+ size_t Clusterify(CaseVector& Cases, const SwitchInst &SI);
+
+ /// CaseBlock - This structure is used to communicate between SDLowering and
+ /// SDISel for the code generation of additional basic blocks needed by multi-
+ /// case switch statements.
+ struct CaseBlock {
+ CaseBlock(ISD::CondCode cc, Value *cmplhs, Value *cmprhs, Value *cmpmiddle,
+ MachineBasicBlock *truebb, MachineBasicBlock *falsebb,
+ MachineBasicBlock *me)
+ : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
+ TrueBB(truebb), FalseBB(falsebb), ThisBB(me) {}
+ // CC - the condition code to use for the case block's setcc node
+ ISD::CondCode CC;
+ // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit.
+ // Emit by default LHS op RHS. MHS is used for range comparisons:
+ // If MHS is not null: (LHS <= MHS) and (MHS <= RHS).
+ Value *CmpLHS, *CmpMHS, *CmpRHS;
+ // TrueBB/FalseBB - the block to branch to if the setcc is true/false.
+ MachineBasicBlock *TrueBB, *FalseBB;
+ // ThisBB - the block into which to emit the code for the setcc and branches
+ MachineBasicBlock *ThisBB;
+ };
+ struct JumpTable {
+ JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
+ MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
+
+ /// Reg - the virtual register containing the index of the jump table entry
+ //. to jump to.
+ unsigned Reg;
+ /// JTI - the JumpTableIndex for this jump table in the function.
+ unsigned JTI;
+ /// MBB - the MBB into which to emit the code for the indirect jump.
+ MachineBasicBlock *MBB;
+ /// Default - the MBB of the default bb, which is a successor of the range
+ /// check MBB. This is when updating PHI nodes in successors.
+ MachineBasicBlock *Default;
+ };
+ struct JumpTableHeader {
+ JumpTableHeader(APInt F, APInt L, Value* SV, MachineBasicBlock* H,
+ bool E = false):
+ First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {}
+ APInt First;
+ APInt Last;
+ Value *SValue;
+ MachineBasicBlock *HeaderBB;
+ bool Emitted;
+ };
+ typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock;
+
+ struct BitTestCase {
+ BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr):
+ Mask(M), ThisBB(T), TargetBB(Tr) { }
+ uint64_t Mask;
+ MachineBasicBlock* ThisBB;
+ MachineBasicBlock* TargetBB;
+ };
+
+ typedef SmallVector<BitTestCase, 3> BitTestInfo;
+
+ struct BitTestBlock {
+ BitTestBlock(APInt F, APInt R, Value* SV,
+ unsigned Rg, bool E,
+ MachineBasicBlock* P, MachineBasicBlock* D,
+ const BitTestInfo& C):
+ First(F), Range(R), SValue(SV), Reg(Rg), Emitted(E),
+ Parent(P), Default(D), Cases(C) { }
+ APInt First;
+ APInt Range;
+ Value *SValue;
+ unsigned Reg;
+ bool Emitted;
+ MachineBasicBlock *Parent;
+ MachineBasicBlock *Default;
+ BitTestInfo Cases;
+ };
+
+public:
+ // TLI - This is information that describes the available target features we
+ // need for lowering. This indicates when operations are unavailable,
+ // implemented with a libcall, etc.
+ TargetLowering &TLI;
+ SelectionDAG &DAG;
+ const TargetData *TD;
+ AliasAnalysis *AA;
+
+ /// SwitchCases - Vector of CaseBlock structures used to communicate
+ /// SwitchInst code generation information.
+ std::vector<CaseBlock> SwitchCases;
+ /// JTCases - Vector of JumpTable structures used to communicate
+ /// SwitchInst code generation information.
+ std::vector<JumpTableBlock> JTCases;
+ /// BitTestCases - Vector of BitTestBlock structures used to communicate
+ /// SwitchInst code generation information.
+ std::vector<BitTestBlock> BitTestCases;
+
+ std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;
+
+ // Emit PHI-node-operand constants only once even if used by multiple
+ // PHI nodes.
+ DenseMap<Constant*, unsigned> ConstantsOut;
+
+ /// FuncInfo - Information about the function as a whole.
+ ///
+ FunctionLoweringInfo &FuncInfo;
+
+ /// OptLevel - What optimization level we're generating code for.
+ ///
+ CodeGenOpt::Level OptLevel;
+
+ /// GFI - Garbage collection metadata for the function.
+ GCFunctionInfo *GFI;
+
+ SelectionDAGLowering(SelectionDAG &dag, TargetLowering &tli,
+ FunctionLoweringInfo &funcinfo,
+ CodeGenOpt::Level ol)
+ : CurDebugLoc(DebugLoc::getUnknownLoc()),
+ TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol) {
+ }
+
+ void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
+
+ /// clear - Clear out the curret SelectionDAG and the associated
+ /// state and prepare this SelectionDAGLowering object to be used
+ /// for a new block. This doesn't clear out information about
+ /// additional blocks that are needed to complete switch lowering
+ /// or PHI node updating; that information is cleared out as it is
+ /// consumed.
+ void clear();
+
+ /// getRoot - Return the current virtual root of the Selection DAG,
+ /// flushing any PendingLoad items. This must be done before emitting
+ /// a store or any other node that may need to be ordered after any
+ /// prior load instructions.
+ ///
+ SDValue getRoot();
+
+ /// getControlRoot - Similar to getRoot, but instead of flushing all the
+ /// PendingLoad items, flush all the PendingExports items. It is necessary
+ /// to do this before emitting a terminator instruction.
+ ///
+ SDValue getControlRoot();
+
+ DebugLoc getCurDebugLoc() const { return CurDebugLoc; }
+ void setCurDebugLoc(DebugLoc dl) { CurDebugLoc = dl; }
+
+ void CopyValueToVirtualRegister(Value *V, unsigned Reg);
+
+ void visit(Instruction &I);
+
+ void visit(unsigned Opcode, User &I);
+
+ void setCurrentBasicBlock(MachineBasicBlock *MBB) { CurMBB = MBB; }
+
+ SDValue getValue(const Value *V);
+
+ void setValue(const Value *V, SDValue NewN) {
+ SDValue &N = NodeMap[V];
+ assert(N.getNode() == 0 && "Already set a value for this node!");
+ N = NewN;
+ }
+
+ void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
+ std::set<unsigned> &OutputRegs,
+ std::set<unsigned> &InputRegs);
+
+ void FindMergedConditions(Value *Cond, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
+ unsigned Opc);
+ void EmitBranchForMergedCondition(Value *Cond, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB);
+ bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
+ bool isExportableFromCurrentBlock(Value *V, const BasicBlock *FromBB);
+ void CopyToExportRegsIfNeeded(Value *V);
+ void ExportFromCurrentBlock(Value *V);
+ void LowerCallTo(CallSite CS, SDValue Callee, bool IsTailCall,
+ MachineBasicBlock *LandingPad = NULL);
+
+private:
+ // Terminator instructions.
+ void visitRet(ReturnInst &I);
+ void visitBr(BranchInst &I);
+ void visitSwitch(SwitchInst &I);
+ void visitUnreachable(UnreachableInst &I) { /* noop */ }
+
+ // Helpers for visitSwitch
+ bool handleSmallSwitchRange(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default);
+ bool handleJTSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default);
+ bool handleBTSplitSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default);
+ bool handleBitTestsSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default);
+public:
+ void visitSwitchCase(CaseBlock &CB);
+ void visitBitTestHeader(BitTestBlock &B);
+ void visitBitTestCase(MachineBasicBlock* NextMBB,
+ unsigned Reg,
+ BitTestCase &B);
+ void visitJumpTable(JumpTable &JT);
+ void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH);
+
+private:
+ // These all get lowered before this pass.
+ void visitInvoke(InvokeInst &I);
+ void visitUnwind(UnwindInst &I);
+
+ void visitBinary(User &I, unsigned OpCode);
+ void visitShift(User &I, unsigned Opcode);
+ void visitAdd(User &I);
+ void visitSub(User &I);
+ void visitMul(User &I);
+ void visitURem(User &I) { visitBinary(I, ISD::UREM); }
+ void visitSRem(User &I) { visitBinary(I, ISD::SREM); }
+ void visitFRem(User &I) { visitBinary(I, ISD::FREM); }
+ void visitUDiv(User &I) { visitBinary(I, ISD::UDIV); }
+ void visitSDiv(User &I) { visitBinary(I, ISD::SDIV); }
+ void visitFDiv(User &I) { visitBinary(I, ISD::FDIV); }
+ void visitAnd (User &I) { visitBinary(I, ISD::AND); }
+ void visitOr (User &I) { visitBinary(I, ISD::OR); }
+ void visitXor (User &I) { visitBinary(I, ISD::XOR); }
+ void visitShl (User &I) { visitShift(I, ISD::SHL); }
+ void visitLShr(User &I) { visitShift(I, ISD::SRL); }
+ void visitAShr(User &I) { visitShift(I, ISD::SRA); }
+ void visitICmp(User &I);
+ void visitFCmp(User &I);
+ void visitVICmp(User &I);
+ void visitVFCmp(User &I);
+ // Visit the conversion instructions
+ void visitTrunc(User &I);
+ void visitZExt(User &I);
+ void visitSExt(User &I);
+ void visitFPTrunc(User &I);
+ void visitFPExt(User &I);
+ void visitFPToUI(User &I);
+ void visitFPToSI(User &I);
+ void visitUIToFP(User &I);
+ void visitSIToFP(User &I);
+ void visitPtrToInt(User &I);
+ void visitIntToPtr(User &I);
+ void visitBitCast(User &I);
+
+ void visitExtractElement(User &I);
+ void visitInsertElement(User &I);
+ void visitShuffleVector(User &I);
+
+ void visitExtractValue(ExtractValueInst &I);
+ void visitInsertValue(InsertValueInst &I);
+
+ void visitGetElementPtr(User &I);
+ void visitSelect(User &I);
+
+ void visitMalloc(MallocInst &I);
+ void visitFree(FreeInst &I);
+ void visitAlloca(AllocaInst &I);
+ void visitLoad(LoadInst &I);
+ void visitStore(StoreInst &I);
+ void visitPHI(PHINode &I) { } // PHI nodes are handled specially.
+ void visitCall(CallInst &I);
+ void visitInlineAsm(CallSite CS);
+ const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic);
+ void visitTargetIntrinsic(CallInst &I, unsigned Intrinsic);
+
+ void visitPow(CallInst &I);
+ void visitExp2(CallInst &I);
+ void visitExp(CallInst &I);
+ void visitLog(CallInst &I);
+ void visitLog2(CallInst &I);
+ void visitLog10(CallInst &I);
+
+ void visitVAStart(CallInst &I);
+ void visitVAArg(VAArgInst &I);
+ void visitVAEnd(CallInst &I);
+ void visitVACopy(CallInst &I);
+
+ void visitUserOp1(Instruction &I) {
+ assert(0 && "UserOp1 should not exist at instruction selection time!");
+ abort();
+ }
+ void visitUserOp2(Instruction &I) {
+ assert(0 && "UserOp2 should not exist at instruction selection time!");
+ abort();
+ }
+
+ const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op);
+ const char *implVisitAluOverflow(CallInst &I, ISD::NodeType Op);
+};
+
+/// AddCatchInfo - Extract the personality and type infos from an eh.selector
+/// call, and add them to the specified machine basic block.
+void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI,
+ MachineBasicBlock *MBB);
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
new file mode 100644
index 0000000..9d72a12
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -0,0 +1,1347 @@
+//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAGISel class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "ScheduleDAGSDNodes.h"
+#include "SelectionDAGBuild.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Constants.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Timer.h"
+#include <algorithm>
+using namespace llvm;
+
+static cl::opt<bool>
+DisableLegalizeTypes("disable-legalize-types", cl::Hidden);
+static cl::opt<bool>
+EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
+ cl::desc("Enable verbose messages in the \"fast\" "
+ "instruction selector"));
+static cl::opt<bool>
+EnableFastISelAbort("fast-isel-abort", cl::Hidden,
+ cl::desc("Enable abort calls when \"fast\" instruction fails"));
+static cl::opt<bool>
+SchedLiveInCopies("schedule-livein-copies",
+ cl::desc("Schedule copies of livein registers"),
+ cl::init(false));
+
+#ifndef NDEBUG
+static cl::opt<bool>
+ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the first "
+ "dag combine pass"));
+static cl::opt<bool>
+ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before legalize types"));
+static cl::opt<bool>
+ViewLegalizeDAGs("view-legalize-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before legalize"));
+static cl::opt<bool>
+ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the second "
+ "dag combine pass"));
+static cl::opt<bool>
+ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the post legalize types"
+ " dag combine pass"));
+static cl::opt<bool>
+ViewISelDAGs("view-isel-dags", cl::Hidden,
+ cl::desc("Pop up a window to show isel dags as they are selected"));
+static cl::opt<bool>
+ViewSchedDAGs("view-sched-dags", cl::Hidden,
+ cl::desc("Pop up a window to show sched dags as they are processed"));
+static cl::opt<bool>
+ViewSUnitDAGs("view-sunit-dags", cl::Hidden,
+ cl::desc("Pop up a window to show SUnit dags after they are processed"));
+#else
+static const bool ViewDAGCombine1 = false,
+ ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false,
+ ViewDAGCombine2 = false,
+ ViewDAGCombineLT = false,
+ ViewISelDAGs = false, ViewSchedDAGs = false,
+ ViewSUnitDAGs = false;
+#endif
+
+//===---------------------------------------------------------------------===//
+///
+/// RegisterScheduler class - Track the registration of instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+MachinePassRegistry RegisterScheduler::Registry;
+
+//===---------------------------------------------------------------------===//
+///
+/// ISHeuristic command line option for instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+static cl::opt<RegisterScheduler::FunctionPassCtor, false,
+ RegisterPassParser<RegisterScheduler> >
+ISHeuristic("pre-RA-sched",
+ cl::init(&createDefaultScheduler),
+ cl::desc("Instruction schedulers available (before register"
+ " allocation):"));
+
+static RegisterScheduler
+defaultListDAGScheduler("default", "Best scheduler for the target",
+ createDefaultScheduler);
+
+namespace llvm {
+ //===--------------------------------------------------------------------===//
+ /// createDefaultScheduler - This creates an instruction scheduler appropriate
+ /// for the target.
+ ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetLowering &TLI = IS->getTargetLowering();
+
+ if (OptLevel == CodeGenOpt::None)
+ return createFastDAGScheduler(IS, OptLevel);
+ if (TLI.getSchedulingPreference() == TargetLowering::SchedulingForLatency)
+ return createTDListDAGScheduler(IS, OptLevel);
+ assert(TLI.getSchedulingPreference() ==
+ TargetLowering::SchedulingForRegPressure && "Unknown sched type!");
+ return createBURRListDAGScheduler(IS, OptLevel);
+ }
+}
+
+// EmitInstrWithCustomInserter - This method should be implemented by targets
+// that mark instructions with the 'usesCustomDAGSchedInserter' flag. These
+// instructions are special in various ways, which require special support to
+// insert. The specified MachineInstr is created but not inserted into any
+// basic blocks, and the scheduler passes ownership of it to this method.
+MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ cerr << "If a target marks an instruction with "
+ << "'usesCustomDAGSchedInserter', it must implement "
+ << "TargetLowering::EmitInstrWithCustomInserter!\n";
+ abort();
+ return 0;
+}
+
+/// EmitLiveInCopy - Emit a copy for a live in physical register. If the
+/// physical register has only a single copy use, then coalesced the copy
+/// if possible.
+static void EmitLiveInCopy(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &InsertPos,
+ unsigned VirtReg, unsigned PhysReg,
+ const TargetRegisterClass *RC,
+ DenseMap<MachineInstr*, unsigned> &CopyRegMap,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII) {
+ unsigned NumUses = 0;
+ MachineInstr *UseMI = NULL;
+ for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg),
+ UE = MRI.use_end(); UI != UE; ++UI) {
+ UseMI = &*UI;
+ if (++NumUses > 1)
+ break;
+ }
+
+ // If the number of uses is not one, or the use is not a move instruction,
+ // don't coalesce. Also, only coalesce away a virtual register to virtual
+ // register copy.
+ bool Coalesced = false;
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (NumUses == 1 &&
+ TII.isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+ TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ VirtReg = DstReg;
+ Coalesced = true;
+ }
+
+ // Now find an ideal location to insert the copy.
+ MachineBasicBlock::iterator Pos = InsertPos;
+ while (Pos != MBB->begin()) {
+ MachineInstr *PrevMI = prior(Pos);
+ DenseMap<MachineInstr*, unsigned>::iterator RI = CopyRegMap.find(PrevMI);
+ // copyRegToReg might emit multiple instructions to do a copy.
+ unsigned CopyDstReg = (RI == CopyRegMap.end()) ? 0 : RI->second;
+ if (CopyDstReg && !TRI.regsOverlap(CopyDstReg, PhysReg))
+ // This is what the BB looks like right now:
+ // r1024 = mov r0
+ // ...
+ // r1 = mov r1024
+ //
+ // We want to insert "r1025 = mov r1". Inserting this copy below the
+ // move to r1024 makes it impossible for that move to be coalesced.
+ //
+ // r1025 = mov r1
+ // r1024 = mov r0
+ // ...
+ // r1 = mov 1024
+ // r2 = mov 1025
+ break; // Woot! Found a good location.
+ --Pos;
+ }
+
+ TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC);
+ CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg));
+ if (Coalesced) {
+ if (&*InsertPos == UseMI) ++InsertPos;
+ MBB->erase(UseMI);
+ }
+}
+
+/// EmitLiveInCopies - If this is the first basic block in the function,
+/// and if it has live ins that need to be copied into vregs, emit the
+/// copies into the block.
+static void EmitLiveInCopies(MachineBasicBlock *EntryMBB,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII) {
+ if (SchedLiveInCopies) {
+ // Emit the copies at a heuristically-determined location in the block.
+ DenseMap<MachineInstr*, unsigned> CopyRegMap;
+ MachineBasicBlock::iterator InsertPos = EntryMBB->begin();
+ for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
+ E = MRI.livein_end(); LI != E; ++LI)
+ if (LI->second) {
+ const TargetRegisterClass *RC = MRI.getRegClass(LI->second);
+ EmitLiveInCopy(EntryMBB, InsertPos, LI->second, LI->first,
+ RC, CopyRegMap, MRI, TRI, TII);
+ }
+ } else {
+ // Emit the copies into the top of the block.
+ for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
+ E = MRI.livein_end(); LI != E; ++LI)
+ if (LI->second) {
+ const TargetRegisterClass *RC = MRI.getRegClass(LI->second);
+ TII.copyRegToReg(*EntryMBB, EntryMBB->begin(),
+ LI->second, LI->first, RC, RC);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAGISel code
+//===----------------------------------------------------------------------===//
+
+SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) :
+ FunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()),
+ FuncInfo(new FunctionLoweringInfo(TLI)),
+ CurDAG(new SelectionDAG(TLI, *FuncInfo)),
+ SDL(new SelectionDAGLowering(*CurDAG, TLI, *FuncInfo, OL)),
+ GFI(),
+ OptLevel(OL),
+ DAGSize(0)
+{}
+
+SelectionDAGISel::~SelectionDAGISel() {
+ delete SDL;
+ delete CurDAG;
+ delete FuncInfo;
+}
+
+unsigned SelectionDAGISel::MakeReg(MVT VT) {
+ return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
+}
+
+void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<GCModuleInfo>();
+ AU.addRequired<DwarfWriter>();
+ AU.setPreservesAll();
+}
+
+bool SelectionDAGISel::runOnFunction(Function &Fn) {
+ // Do some sanity-checking on the command-line options.
+ assert((!EnableFastISelVerbose || EnableFastISel) &&
+ "-fast-isel-verbose requires -fast-isel");
+ assert((!EnableFastISelAbort || EnableFastISel) &&
+ "-fast-isel-abort requires -fast-isel");
+
+ // Do not codegen any 'available_externally' functions at all, they have
+ // definitions outside the translation unit.
+ if (Fn.hasAvailableExternallyLinkage())
+ return false;
+
+
+ // Get alias analysis for load/store combining.
+ AA = &getAnalysis<AliasAnalysis>();
+
+ TargetMachine &TM = TLI.getTargetMachine();
+ MF = &MachineFunction::construct(&Fn, TM);
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+
+ if (MF->getFunction()->hasGC())
+ GFI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF->getFunction());
+ else
+ GFI = 0;
+ RegInfo = &MF->getRegInfo();
+ DOUT << "\n\n\n=== " << Fn.getName() << "\n";
+
+ MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
+ CurDAG->init(*MF, MMI, DW);
+ FuncInfo->set(Fn, *MF, *CurDAG, EnableFastISel);
+ SDL->init(GFI, *AA);
+
+ for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+ if (InvokeInst *Invoke = dyn_cast<InvokeInst>(I->getTerminator()))
+ // Mark landing pad.
+ FuncInfo->MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
+
+ SelectAllBasicBlocks(Fn, *MF, MMI, DW, TII);
+
+ // If the first basic block in the function has live ins that need to be
+ // copied into vregs, emit the copies into the top of the block before
+ // emitting the code for the block.
+ EmitLiveInCopies(MF->begin(), *RegInfo, TRI, TII);
+
+ // Add function live-ins to entry block live-in set.
+ for (MachineRegisterInfo::livein_iterator I = RegInfo->livein_begin(),
+ E = RegInfo->livein_end(); I != E; ++I)
+ MF->begin()->addLiveIn(I->first);
+
+#ifndef NDEBUG
+ assert(FuncInfo->CatchInfoFound.size() == FuncInfo->CatchInfoLost.size() &&
+ "Not all catch info was assigned to a landing pad!");
+#endif
+
+ FuncInfo->clear();
+
+ return true;
+}
+
+static void copyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
+ MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
+ for (BasicBlock::iterator I = SrcBB->begin(), E = --SrcBB->end(); I != E; ++I)
+ if (EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
+ // Apply the catch info to DestBB.
+ AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]);
+#ifndef NDEBUG
+ if (!FLI.MBBMap[SrcBB]->isLandingPad())
+ FLI.CatchInfoFound.insert(EHSel);
+#endif
+ }
+}
+
+/// IsFixedFrameObjectWithPosOffset - Check if object is a fixed frame object and
+/// whether object offset >= 0.
+static bool
+IsFixedFrameObjectWithPosOffset(MachineFrameInfo *MFI, SDValue Op) {
+ if (!isa<FrameIndexSDNode>(Op)) return false;
+
+ FrameIndexSDNode * FrameIdxNode = dyn_cast<FrameIndexSDNode>(Op);
+ int FrameIdx = FrameIdxNode->getIndex();
+ return MFI->isFixedObjectIndex(FrameIdx) &&
+ MFI->getObjectOffset(FrameIdx) >= 0;
+}
+
+/// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could
+/// possibly be overwritten when lowering the outgoing arguments in a tail
+/// call. Currently the implementation of this call is very conservative and
+/// assumes all arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with
+/// virtual registers would be overwritten by direct lowering.
+static bool IsPossiblyOverwrittenArgumentOfTailCall(SDValue Op,
+ MachineFrameInfo *MFI) {
+ RegisterSDNode * OpReg = NULL;
+ if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS ||
+ (Op.getOpcode()== ISD::CopyFromReg &&
+ (OpReg = dyn_cast<RegisterSDNode>(Op.getOperand(1))) &&
+ (OpReg->getReg() >= TargetRegisterInfo::FirstVirtualRegister)) ||
+ (Op.getOpcode() == ISD::LOAD &&
+ IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(1))) ||
+ (Op.getOpcode() == ISD::MERGE_VALUES &&
+ Op.getOperand(Op.getResNo()).getOpcode() == ISD::LOAD &&
+ IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(Op.getResNo()).
+ getOperand(1))))
+ return true;
+ return false;
+}
+
+/// CheckDAGForTailCallsAndFixThem - This Function looks for CALL nodes in the
+/// DAG and fixes their tailcall attribute operand.
+static void CheckDAGForTailCallsAndFixThem(SelectionDAG &DAG,
+ const TargetLowering& TLI) {
+ SDNode * Ret = NULL;
+ SDValue Terminator = DAG.getRoot();
+
+ // Find RET node.
+ if (Terminator.getOpcode() == ISD::RET) {
+ Ret = Terminator.getNode();
+ }
+
+ // Fix tail call attribute of CALL nodes.
+ for (SelectionDAG::allnodes_iterator BE = DAG.allnodes_begin(),
+ BI = DAG.allnodes_end(); BI != BE; ) {
+ --BI;
+ if (CallSDNode *TheCall = dyn_cast<CallSDNode>(BI)) {
+ SDValue OpRet(Ret, 0);
+ SDValue OpCall(BI, 0);
+ bool isMarkedTailCall = TheCall->isTailCall();
+ // If CALL node has tail call attribute set to true and the call is not
+ // eligible (no RET or the target rejects) the attribute is fixed to
+ // false. The TargetLowering::IsEligibleForTailCallOptimization function
+ // must correctly identify tail call optimizable calls.
+ if (!isMarkedTailCall) continue;
+ if (Ret==NULL ||
+ !TLI.IsEligibleForTailCallOptimization(TheCall, OpRet, DAG)) {
+ // Not eligible. Mark CALL node as non tail call. Note that we
+ // can modify the call node in place since calls are not CSE'd.
+ TheCall->setNotTailCall();
+ } else {
+ // Look for tail call clobbered arguments. Emit a series of
+ // copyto/copyfrom virtual register nodes to protect them.
+ SmallVector<SDValue, 32> Ops;
+ SDValue Chain = TheCall->getChain(), InFlag;
+ Ops.push_back(Chain);
+ Ops.push_back(TheCall->getCallee());
+ for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; ++i) {
+ SDValue Arg = TheCall->getArg(i);
+ bool isByVal = TheCall->getArgFlags(i).isByVal();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ if (!isByVal &&
+ IsPossiblyOverwrittenArgumentOfTailCall(Arg, MFI)) {
+ MVT VT = Arg.getValueType();
+ unsigned VReg = MF.getRegInfo().
+ createVirtualRegister(TLI.getRegClassFor(VT));
+ Chain = DAG.getCopyToReg(Chain, Arg.getDebugLoc(),
+ VReg, Arg, InFlag);
+ InFlag = Chain.getValue(1);
+ Arg = DAG.getCopyFromReg(Chain, Arg.getDebugLoc(),
+ VReg, VT, InFlag);
+ Chain = Arg.getValue(1);
+ InFlag = Arg.getValue(2);
+ }
+ Ops.push_back(Arg);
+ Ops.push_back(TheCall->getArgFlagsVal(i));
+ }
+ // Link in chain of CopyTo/CopyFromReg.
+ Ops[0] = Chain;
+ DAG.UpdateNodeOperands(OpCall, Ops.begin(), Ops.size());
+ }
+ }
+ }
+}
+
+void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
+ BasicBlock::iterator Begin,
+ BasicBlock::iterator End) {
+ SDL->setCurrentBasicBlock(BB);
+
+ // Lower all of the non-terminator instructions.
+ for (BasicBlock::iterator I = Begin; I != End; ++I)
+ if (!isa<TerminatorInst>(I))
+ SDL->visit(*I);
+
+ // Ensure that all instructions which are used outside of their defining
+ // blocks are available as virtual registers. Invoke is handled elsewhere.
+ for (BasicBlock::iterator I = Begin; I != End; ++I)
+ if (!isa<PHINode>(I) && !isa<InvokeInst>(I))
+ SDL->CopyToExportRegsIfNeeded(I);
+
+ // Handle PHI nodes in successor blocks.
+ if (End == LLVMBB->end()) {
+ HandlePHINodesInSuccessorBlocks(LLVMBB);
+
+ // Lower the terminator after the copies are emitted.
+ SDL->visit(*LLVMBB->getTerminator());
+ }
+
+ // Make sure the root of the DAG is up-to-date.
+ CurDAG->setRoot(SDL->getControlRoot());
+
+ // Check whether calls in this block are real tail calls. Fix up CALL nodes
+ // with correct tailcall attribute so that the target can rely on the tailcall
+ // attribute indicating whether the call is really eligible for tail call
+ // optimization.
+ if (PerformTailCallOpt)
+ CheckDAGForTailCallsAndFixThem(*CurDAG, TLI);
+
+ // Final step, emit the lowered DAG as machine code.
+ CodeGenAndEmitDAG();
+ SDL->clear();
+}
+
+void SelectionDAGISel::ComputeLiveOutVRegInfo() {
+ SmallPtrSet<SDNode*, 128> VisitedNodes;
+ SmallVector<SDNode*, 128> Worklist;
+
+ Worklist.push_back(CurDAG->getRoot().getNode());
+
+ APInt Mask;
+ APInt KnownZero;
+ APInt KnownOne;
+
+ while (!Worklist.empty()) {
+ SDNode *N = Worklist.back();
+ Worklist.pop_back();
+
+ // If we've already seen this node, ignore it.
+ if (!VisitedNodes.insert(N))
+ continue;
+
+ // Otherwise, add all chain operands to the worklist.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ Worklist.push_back(N->getOperand(i).getNode());
+
+ // If this is a CopyToReg with a vreg dest, process it.
+ if (N->getOpcode() != ISD::CopyToReg)
+ continue;
+
+ unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+ continue;
+
+ // Ignore non-scalar or non-integer values.
+ SDValue Src = N->getOperand(2);
+ MVT SrcVT = Src.getValueType();
+ if (!SrcVT.isInteger() || SrcVT.isVector())
+ continue;
+
+ unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
+ Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits());
+ CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne);
+
+ // Only install this information if it tells us something.
+ if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) {
+ DestReg -= TargetRegisterInfo::FirstVirtualRegister;
+ FunctionLoweringInfo &FLI = CurDAG->getFunctionLoweringInfo();
+ if (DestReg >= FLI.LiveOutRegInfo.size())
+ FLI.LiveOutRegInfo.resize(DestReg+1);
+ FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[DestReg];
+ LOI.NumSignBits = NumSignBits;
+ LOI.KnownOne = KnownOne;
+ LOI.KnownZero = KnownZero;
+ }
+ }
+}
+
+void SelectionDAGISel::CodeGenAndEmitDAG() {
+ std::string GroupName;
+ if (TimePassesIsEnabled)
+ GroupName = "Instruction Selection and Scheduling";
+ std::string BlockName;
+ if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
+ ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
+ ViewSUnitDAGs)
+ BlockName = CurDAG->getMachineFunction().getFunction()->getName() + ':' +
+ BB->getBasicBlock()->getName();
+
+ DOUT << "Initial selection DAG:\n";
+ DEBUG(CurDAG->dump());
+
+ if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName);
+
+ // Run the DAG combiner in pre-legalize mode.
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("DAG Combining 1", GroupName);
+ CurDAG->Combine(Unrestricted, *AA, OptLevel);
+ } else {
+ CurDAG->Combine(Unrestricted, *AA, OptLevel);
+ }
+
+ DOUT << "Optimized lowered selection DAG:\n";
+ DEBUG(CurDAG->dump());
+
+ // Second step, hack on the DAG until it only uses operations and types that
+ // the target supports.
+ if (!DisableLegalizeTypes) {
+ if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " +
+ BlockName);
+
+ bool Changed;
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("Type Legalization", GroupName);
+ Changed = CurDAG->LegalizeTypes();
+ } else {
+ Changed = CurDAG->LegalizeTypes();
+ }
+
+ DOUT << "Type-legalized selection DAG:\n";
+ DEBUG(CurDAG->dump());
+
+ if (Changed) {
+ if (ViewDAGCombineLT)
+ CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
+
+ // Run the DAG combiner in post-type-legalize mode.
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("DAG Combining after legalize types", GroupName);
+ CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
+ } else {
+ CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
+ }
+
+ DOUT << "Optimized type-legalized selection DAG:\n";
+ DEBUG(CurDAG->dump());
+ }
+
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("Vector Legalization", GroupName);
+ Changed = CurDAG->LegalizeVectors();
+ } else {
+ Changed = CurDAG->LegalizeVectors();
+ }
+
+ if (Changed) {
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("Type Legalization 2", GroupName);
+ Changed = CurDAG->LegalizeTypes();
+ } else {
+ Changed = CurDAG->LegalizeTypes();
+ }
+
+ if (ViewDAGCombineLT)
+ CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
+
+ // Run the DAG combiner in post-type-legalize mode.
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("DAG Combining after legalize vectors", GroupName);
+ CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+ } else {
+ CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+ }
+
+ DOUT << "Optimized vector-legalized selection DAG:\n";
+ DEBUG(CurDAG->dump());
+ }
+ }
+
+ if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
+
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("DAG Legalization", GroupName);
+ CurDAG->Legalize(DisableLegalizeTypes, OptLevel);
+ } else {
+ CurDAG->Legalize(DisableLegalizeTypes, OptLevel);
+ }
+
+ DOUT << "Legalized selection DAG:\n";
+ DEBUG(CurDAG->dump());
+
+ if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName);
+
+ // Run the DAG combiner in post-legalize mode.
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("DAG Combining 2", GroupName);
+ CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+ } else {
+ CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+ }
+
+ DOUT << "Optimized legalized selection DAG:\n";
+ DEBUG(CurDAG->dump());
+
+ if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
+
+ if (OptLevel != CodeGenOpt::None)
+ ComputeLiveOutVRegInfo();
+
+ // Third, instruction select all of the operations to machine code, adding the
+ // code to the MachineBasicBlock.
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("Instruction Selection", GroupName);
+ InstructionSelect();
+ } else {
+ InstructionSelect();
+ }
+
+ DOUT << "Selected selection DAG:\n";
+ DEBUG(CurDAG->dump());
+
+ if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName);
+
+ // Schedule machine code.
+ ScheduleDAGSDNodes *Scheduler = CreateScheduler();
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("Instruction Scheduling", GroupName);
+ Scheduler->Run(CurDAG, BB, BB->end());
+ } else {
+ Scheduler->Run(CurDAG, BB, BB->end());
+ }
+
+ if (ViewSUnitDAGs) Scheduler->viewGraph();
+
+ // Emit machine code to BB. This can change 'BB' to the last block being
+ // inserted into.
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("Instruction Creation", GroupName);
+ BB = Scheduler->EmitSchedule();
+ } else {
+ BB = Scheduler->EmitSchedule();
+ }
+
+ // Free the scheduler state.
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName);
+ delete Scheduler;
+ } else {
+ delete Scheduler;
+ }
+
+ DOUT << "Selected machine code:\n";
+ DEBUG(BB->dump());
+}
+
+void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
+ MachineFunction &MF,
+ MachineModuleInfo *MMI,
+ DwarfWriter *DW,
+ const TargetInstrInfo &TII) {
+ // Initialize the Fast-ISel state, if needed.
+ FastISel *FastIS = 0;
+ if (EnableFastISel)
+ FastIS = TLI.createFastISel(MF, MMI, DW,
+ FuncInfo->ValueMap,
+ FuncInfo->MBBMap,
+ FuncInfo->StaticAllocaMap
+#ifndef NDEBUG
+ , FuncInfo->CatchInfoLost
+#endif
+ );
+
+ // Iterate over all basic blocks in the function.
+ for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
+ BasicBlock *LLVMBB = &*I;
+ BB = FuncInfo->MBBMap[LLVMBB];
+
+ BasicBlock::iterator const Begin = LLVMBB->begin();
+ BasicBlock::iterator const End = LLVMBB->end();
+ BasicBlock::iterator BI = Begin;
+
+ // Lower any arguments needed in this block if this is the entry block.
+ bool SuppressFastISel = false;
+ if (LLVMBB == &Fn.getEntryBlock()) {
+ LowerArguments(LLVMBB);
+
+ // If any of the arguments has the byval attribute, forgo
+ // fast-isel in the entry block.
+ if (FastIS) {
+ unsigned j = 1;
+ for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end();
+ I != E; ++I, ++j)
+ if (Fn.paramHasAttr(j, Attribute::ByVal)) {
+ if (EnableFastISelVerbose || EnableFastISelAbort)
+ cerr << "FastISel skips entry block due to byval argument\n";
+ SuppressFastISel = true;
+ break;
+ }
+ }
+ }
+
+ if (MMI && BB->isLandingPad()) {
+ // Add a label to mark the beginning of the landing pad. Deletion of the
+ // landing pad can thus be detected via the MachineModuleInfo.
+ unsigned LabelID = MMI->addLandingPad(BB);
+
+ const TargetInstrDesc &II = TII.get(TargetInstrInfo::EH_LABEL);
+ BuildMI(BB, SDL->getCurDebugLoc(), II).addImm(LabelID);
+
+ // Mark exception register as live in.
+ unsigned Reg = TLI.getExceptionAddressRegister();
+ if (Reg) BB->addLiveIn(Reg);
+
+ // Mark exception selector register as live in.
+ Reg = TLI.getExceptionSelectorRegister();
+ if (Reg) BB->addLiveIn(Reg);
+
+ // FIXME: Hack around an exception handling flaw (PR1508): the personality
+ // function and list of typeids logically belong to the invoke (or, if you
+ // like, the basic block containing the invoke), and need to be associated
+ // with it in the dwarf exception handling tables. Currently however the
+ // information is provided by an intrinsic (eh.selector) that can be moved
+ // to unexpected places by the optimizers: if the unwind edge is critical,
+ // then breaking it can result in the intrinsics being in the successor of
+ // the landing pad, not the landing pad itself. This results in exceptions
+ // not being caught because no typeids are associated with the invoke.
+ // This may not be the only way things can go wrong, but it is the only way
+ // we try to work around for the moment.
+ BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
+
+ if (Br && Br->isUnconditional()) { // Critical edge?
+ BasicBlock::iterator I, E;
+ for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
+ if (isa<EHSelectorInst>(I))
+ break;
+
+ if (I == E)
+ // No catch info found - try to extract some from the successor.
+ copyCatchInfo(Br->getSuccessor(0), LLVMBB, MMI, *FuncInfo);
+ }
+ }
+
+ // Before doing SelectionDAG ISel, see if FastISel has been requested.
+ if (FastIS && !SuppressFastISel) {
+ // Emit code for any incoming arguments. This must happen before
+ // beginning FastISel on the entry block.
+ if (LLVMBB == &Fn.getEntryBlock()) {
+ CurDAG->setRoot(SDL->getControlRoot());
+ CodeGenAndEmitDAG();
+ SDL->clear();
+ }
+ FastIS->startNewBlock(BB);
+ // Do FastISel on as many instructions as possible.
+ for (; BI != End; ++BI) {
+ // Just before the terminator instruction, insert instructions to
+ // feed PHI nodes in successor blocks.
+ if (isa<TerminatorInst>(BI))
+ if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) {
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ cerr << "FastISel miss: ";
+ BI->dump();
+ }
+ if (EnableFastISelAbort)
+ assert(0 && "FastISel didn't handle a PHI in a successor");
+ break;
+ }
+
+ // First try normal tablegen-generated "fast" selection.
+ if (FastIS->SelectInstruction(BI))
+ continue;
+
+ // Next, try calling the target to attempt to handle the instruction.
+ if (FastIS->TargetSelectInstruction(BI))
+ continue;
+
+ // Then handle certain instructions as single-LLVM-Instruction blocks.
+ if (isa<CallInst>(BI)) {
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ cerr << "FastISel missed call: ";
+ BI->dump();
+ }
+
+ if (BI->getType() != Type::VoidTy) {
+ unsigned &R = FuncInfo->ValueMap[BI];
+ if (!R)
+ R = FuncInfo->CreateRegForValue(BI);
+ }
+
+ SDL->setCurDebugLoc(FastIS->getCurDebugLoc());
+ SelectBasicBlock(LLVMBB, BI, next(BI));
+ // If the instruction was codegen'd with multiple blocks,
+ // inform the FastISel object where to resume inserting.
+ FastIS->setCurrentBlock(BB);
+ continue;
+ }
+
+ // Otherwise, give up on FastISel for the rest of the block.
+ // For now, be a little lenient about non-branch terminators.
+ if (!isa<TerminatorInst>(BI) || isa<BranchInst>(BI)) {
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ cerr << "FastISel miss: ";
+ BI->dump();
+ }
+ if (EnableFastISelAbort)
+ // The "fast" selector couldn't handle something and bailed.
+ // For the purpose of debugging, just abort.
+ assert(0 && "FastISel didn't select the entire block");
+ }
+ break;
+ }
+ }
+
+ // Run SelectionDAG instruction selection on the remainder of the block
+ // not handled by FastISel. If FastISel is not run, this is the entire
+ // block.
+ if (BI != End) {
+ // If FastISel is run and it has known DebugLoc then use it.
+ if (FastIS && !FastIS->getCurDebugLoc().isUnknown())
+ SDL->setCurDebugLoc(FastIS->getCurDebugLoc());
+ SelectBasicBlock(LLVMBB, BI, End);
+ }
+
+ FinishBasicBlock();
+ }
+
+ delete FastIS;
+}
+
+void
+SelectionDAGISel::FinishBasicBlock() {
+
+ DOUT << "Target-post-processed machine code:\n";
+ DEBUG(BB->dump());
+
+ DOUT << "Total amount of phi nodes to update: "
+ << SDL->PHINodesToUpdate.size() << "\n";
+ DEBUG(for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i)
+ DOUT << "Node " << i << " : (" << SDL->PHINodesToUpdate[i].first
+ << ", " << SDL->PHINodesToUpdate[i].second << ")\n";);
+
+ // Next, now that we know what the last MBB the LLVM BB expanded is, update
+ // PHI nodes in successors.
+ if (SDL->SwitchCases.empty() &&
+ SDL->JTCases.empty() &&
+ SDL->BitTestCases.empty()) {
+ for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) {
+ MachineInstr *PHI = SDL->PHINodesToUpdate[i].first;
+ assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+ "This is not a machine PHI node that we are updating!");
+ PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[i].second,
+ false));
+ PHI->addOperand(MachineOperand::CreateMBB(BB));
+ }
+ SDL->PHINodesToUpdate.clear();
+ return;
+ }
+
+ for (unsigned i = 0, e = SDL->BitTestCases.size(); i != e; ++i) {
+ // Lower header first, if it wasn't already lowered
+ if (!SDL->BitTestCases[i].Emitted) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ BB = SDL->BitTestCases[i].Parent;
+ SDL->setCurrentBasicBlock(BB);
+ // Emit the code
+ SDL->visitBitTestHeader(SDL->BitTestCases[i]);
+ CurDAG->setRoot(SDL->getRoot());
+ CodeGenAndEmitDAG();
+ SDL->clear();
+ }
+
+ for (unsigned j = 0, ej = SDL->BitTestCases[i].Cases.size(); j != ej; ++j) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ BB = SDL->BitTestCases[i].Cases[j].ThisBB;
+ SDL->setCurrentBasicBlock(BB);
+ // Emit the code
+ if (j+1 != ej)
+ SDL->visitBitTestCase(SDL->BitTestCases[i].Cases[j+1].ThisBB,
+ SDL->BitTestCases[i].Reg,
+ SDL->BitTestCases[i].Cases[j]);
+ else
+ SDL->visitBitTestCase(SDL->BitTestCases[i].Default,
+ SDL->BitTestCases[i].Reg,
+ SDL->BitTestCases[i].Cases[j]);
+
+
+ CurDAG->setRoot(SDL->getRoot());
+ CodeGenAndEmitDAG();
+ SDL->clear();
+ }
+
+ // Update PHI Nodes
+ for (unsigned pi = 0, pe = SDL->PHINodesToUpdate.size(); pi != pe; ++pi) {
+ MachineInstr *PHI = SDL->PHINodesToUpdate[pi].first;
+ MachineBasicBlock *PHIBB = PHI->getParent();
+ assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+ "This is not a machine PHI node that we are updating!");
+ // This is "default" BB. We have two jumps to it. From "header" BB and
+ // from last "case" BB.
+ if (PHIBB == SDL->BitTestCases[i].Default) {
+ PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,
+ false));
+ PHI->addOperand(MachineOperand::CreateMBB(SDL->BitTestCases[i].Parent));
+ PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,
+ false));
+ PHI->addOperand(MachineOperand::CreateMBB(SDL->BitTestCases[i].Cases.
+ back().ThisBB));
+ }
+ // One of "cases" BB.
+ for (unsigned j = 0, ej = SDL->BitTestCases[i].Cases.size();
+ j != ej; ++j) {
+ MachineBasicBlock* cBB = SDL->BitTestCases[i].Cases[j].ThisBB;
+ if (cBB->succ_end() !=
+ std::find(cBB->succ_begin(),cBB->succ_end(), PHIBB)) {
+ PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,
+ false));
+ PHI->addOperand(MachineOperand::CreateMBB(cBB));
+ }
+ }
+ }
+ }
+ SDL->BitTestCases.clear();
+
+ // If the JumpTable record is filled in, then we need to emit a jump table.
+ // Updating the PHI nodes is tricky in this case, since we need to determine
+ // whether the PHI is a successor of the range check MBB or the jump table MBB
+ for (unsigned i = 0, e = SDL->JTCases.size(); i != e; ++i) {
+ // Lower header first, if it wasn't already lowered
+ if (!SDL->JTCases[i].first.Emitted) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ BB = SDL->JTCases[i].first.HeaderBB;
+ SDL->setCurrentBasicBlock(BB);
+ // Emit the code
+ SDL->visitJumpTableHeader(SDL->JTCases[i].second, SDL->JTCases[i].first);
+ CurDAG->setRoot(SDL->getRoot());
+ CodeGenAndEmitDAG();
+ SDL->clear();
+ }
+
+ // Set the current basic block to the mbb we wish to insert the code into
+ BB = SDL->JTCases[i].second.MBB;
+ SDL->setCurrentBasicBlock(BB);
+ // Emit the code
+ SDL->visitJumpTable(SDL->JTCases[i].second);
+ CurDAG->setRoot(SDL->getRoot());
+ CodeGenAndEmitDAG();
+ SDL->clear();
+
+ // Update PHI Nodes
+ for (unsigned pi = 0, pe = SDL->PHINodesToUpdate.size(); pi != pe; ++pi) {
+ MachineInstr *PHI = SDL->PHINodesToUpdate[pi].first;
+ MachineBasicBlock *PHIBB = PHI->getParent();
+ assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+ "This is not a machine PHI node that we are updating!");
+ // "default" BB. We can go there only from header BB.
+ if (PHIBB == SDL->JTCases[i].second.Default) {
+ PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,
+ false));
+ PHI->addOperand(MachineOperand::CreateMBB(SDL->JTCases[i].first.HeaderBB));
+ }
+ // JT BB. Just iterate over successors here
+ if (BB->succ_end() != std::find(BB->succ_begin(),BB->succ_end(), PHIBB)) {
+ PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,
+ false));
+ PHI->addOperand(MachineOperand::CreateMBB(BB));
+ }
+ }
+ }
+ SDL->JTCases.clear();
+
+ // If the switch block involved a branch to one of the actual successors, we
+ // need to update PHI nodes in that block.
+ for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) {
+ MachineInstr *PHI = SDL->PHINodesToUpdate[i].first;
+ assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+ "This is not a machine PHI node that we are updating!");
+ if (BB->isSuccessor(PHI->getParent())) {
+ PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[i].second,
+ false));
+ PHI->addOperand(MachineOperand::CreateMBB(BB));
+ }
+ }
+
+ // If we generated any switch lowering information, build and codegen any
+ // additional DAGs necessary.
+ for (unsigned i = 0, e = SDL->SwitchCases.size(); i != e; ++i) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ BB = SDL->SwitchCases[i].ThisBB;
+ SDL->setCurrentBasicBlock(BB);
+
+ // Emit the code
+ SDL->visitSwitchCase(SDL->SwitchCases[i]);
+ CurDAG->setRoot(SDL->getRoot());
+ CodeGenAndEmitDAG();
+ SDL->clear();
+
+ // Handle any PHI nodes in successors of this chunk, as if we were coming
+ // from the original BB before switch expansion. Note that PHI nodes can
+ // occur multiple times in PHINodesToUpdate. We have to be very careful to
+ // handle them the right number of times.
+ while ((BB = SDL->SwitchCases[i].TrueBB)) { // Handle LHS and RHS.
+ for (MachineBasicBlock::iterator Phi = BB->begin();
+ Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; ++Phi){
+ // This value for this PHI node is recorded in PHINodesToUpdate, get it.
+ for (unsigned pn = 0; ; ++pn) {
+ assert(pn != SDL->PHINodesToUpdate.size() &&
+ "Didn't find PHI entry!");
+ if (SDL->PHINodesToUpdate[pn].first == Phi) {
+ Phi->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pn].
+ second, false));
+ Phi->addOperand(MachineOperand::CreateMBB(SDL->SwitchCases[i].ThisBB));
+ break;
+ }
+ }
+ }
+
+ // Don't process RHS if same block as LHS.
+ if (BB == SDL->SwitchCases[i].FalseBB)
+ SDL->SwitchCases[i].FalseBB = 0;
+
+ // If we haven't handled the RHS, do so now. Otherwise, we're done.
+ SDL->SwitchCases[i].TrueBB = SDL->SwitchCases[i].FalseBB;
+ SDL->SwitchCases[i].FalseBB = 0;
+ }
+ assert(SDL->SwitchCases[i].TrueBB == 0 && SDL->SwitchCases[i].FalseBB == 0);
+ }
+ SDL->SwitchCases.clear();
+
+ SDL->PHINodesToUpdate.clear();
+}
+
+
+/// Create the scheduler. If a specific scheduler was specified
+/// via the SchedulerRegistry, use it, otherwise select the
+/// one preferred by the target.
+///
+ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
+ RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault();
+
+ if (!Ctor) {
+ Ctor = ISHeuristic;
+ RegisterScheduler::setDefault(Ctor);
+ }
+
+ return Ctor(this, OptLevel);
+}
+
+ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() {
+ return new ScheduleHazardRecognizer();
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions used by the generated instruction selector.
+//===----------------------------------------------------------------------===//
+// Calls to these methods are generated by tblgen.
+
+/// CheckAndMask - The isel is trying to match something like (and X, 255). If
+/// the dag combiner simplified the 255, we still want to match. RHS is the
+/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
+ int64_t DesiredMaskS) const {
+ const APInt &ActualMask = RHS->getAPIntValue();
+ const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+ // If the actual mask exactly matches, success!
+ if (ActualMask == DesiredMask)
+ return true;
+
+ // If the actual AND mask is allowing unallowed bits, this doesn't match.
+ if (ActualMask.intersects(~DesiredMask))
+ return false;
+
+ // Otherwise, the DAG Combiner may have proven that the value coming in is
+ // either already zero or is not demanded. Check for known zero input bits.
+ APInt NeededMask = DesiredMask & ~ActualMask;
+ if (CurDAG->MaskedValueIsZero(LHS, NeededMask))
+ return true;
+
+ // TODO: check to see if missing bits are just not demanded.
+
+ // Otherwise, this pattern doesn't match.
+ return false;
+}
+
+/// CheckOrMask - The isel is trying to match something like (or X, 255). If
+/// the dag combiner simplified the 255, we still want to match. RHS is the
+/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
+ int64_t DesiredMaskS) const {
+ const APInt &ActualMask = RHS->getAPIntValue();
+ const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+ // If the actual mask exactly matches, success!
+ if (ActualMask == DesiredMask)
+ return true;
+
+ // If the actual AND mask is allowing unallowed bits, this doesn't match.
+ if (ActualMask.intersects(~DesiredMask))
+ return false;
+
+ // Otherwise, the DAG Combiner may have proven that the value coming in is
+ // either already zero or is not demanded. Check for known zero input bits.
+ APInt NeededMask = DesiredMask & ~ActualMask;
+
+ APInt KnownZero, KnownOne;
+ CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne);
+
+ // If all the missing bits in the or are already known to be set, match!
+ if ((NeededMask & KnownOne) == NeededMask)
+ return true;
+
+ // TODO: check to see if missing bits are just not demanded.
+
+ // Otherwise, this pattern doesn't match.
+ return false;
+}
+
+
+/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated
+/// by tblgen. Others should not call it.
+void SelectionDAGISel::
+SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
+ std::vector<SDValue> InOps;
+ std::swap(InOps, Ops);
+
+ Ops.push_back(InOps[0]); // input chain.
+ Ops.push_back(InOps[1]); // input asm string.
+
+ unsigned i = 2, e = InOps.size();
+ if (InOps[e-1].getValueType() == MVT::Flag)
+ --e; // Don't process a flag operand if it is here.
+
+ while (i != e) {
+ unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
+ if ((Flags & 7) != 4 /*MEM*/) {
+ // Just skip over this operand, copying the operands verbatim.
+ Ops.insert(Ops.end(), InOps.begin()+i,
+ InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1);
+ i += InlineAsm::getNumOperandRegisters(Flags) + 1;
+ } else {
+ assert(InlineAsm::getNumOperandRegisters(Flags) == 1 &&
+ "Memory operand with multiple values?");
+ // Otherwise, this is a memory operand. Ask the target to select it.
+ std::vector<SDValue> SelOps;
+ if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) {
+ cerr << "Could not match memory address. Inline asm failure!\n";
+ exit(1);
+ }
+
+ // Add this to the output node.
+ MVT IntPtrTy = CurDAG->getTargetLoweringInfo().getPointerTy();
+ Ops.push_back(CurDAG->getTargetConstant(4/*MEM*/ | (SelOps.size()<< 3),
+ IntPtrTy));
+ Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
+ i += 2;
+ }
+ }
+
+ // Add the flag input back if present.
+ if (e != InOps.size())
+ Ops.push_back(InOps.back());
+}
+
+/// findFlagUse - Return use of MVT::Flag value produced by the specified
+/// SDNode.
+///
+static SDNode *findFlagUse(SDNode *N) {
+ unsigned FlagResNo = N->getNumValues()-1;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDUse &Use = I.getUse();
+ if (Use.getResNo() == FlagResNo)
+ return Use.getUser();
+ }
+ return NULL;
+}
+
+/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def".
+/// This function recursively traverses up the operand chain, ignoring
+/// certain nodes.
+static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
+ SDNode *Root,
+ SmallPtrSet<SDNode*, 16> &Visited) {
+ if (Use->getNodeId() < Def->getNodeId() ||
+ !Visited.insert(Use))
+ return false;
+
+ for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
+ SDNode *N = Use->getOperand(i).getNode();
+ if (N == Def) {
+ if (Use == ImmedUse || Use == Root)
+ continue; // We are not looking for immediate use.
+ assert(N != Root);
+ return true;
+ }
+
+ // Traverse up the operand chain.
+ if (findNonImmUse(N, Def, ImmedUse, Root, Visited))
+ return true;
+ }
+ return false;
+}
+
+/// isNonImmUse - Start searching from Root up the DAG to check is Def can
+/// be reached. Return true if that's the case. However, ignore direct uses
+/// by ImmedUse (which would be U in the example illustrated in
+/// IsLegalAndProfitableToFold) and by Root (which can happen in the store
+/// case).
+/// FIXME: to be really generic, we should allow direct use by any node
+/// that is being folded. But realisticly since we only fold loads which
+/// have one non-chain use, we only need to watch out for load/op/store
+/// and load/op/cmp case where the root (store / cmp) may reach the load via
+/// its chain operand.
+static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse) {
+ SmallPtrSet<SDNode*, 16> Visited;
+ return findNonImmUse(Root, Def, ImmedUse, Root, Visited);
+}
+
+/// IsLegalAndProfitableToFold - Returns true if the specific operand node N of
+/// U can be folded during instruction selection that starts at Root and
+/// folding N is profitable.
+bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
+ SDNode *Root) const {
+ if (OptLevel == CodeGenOpt::None) return false;
+
+ // If Root use can somehow reach N through a path that that doesn't contain
+ // U then folding N would create a cycle. e.g. In the following
+ // diagram, Root can reach N through X. If N is folded into into Root, then
+ // X is both a predecessor and a successor of U.
+ //
+ // [N*] //
+ // ^ ^ //
+ // / \ //
+ // [U*] [X]? //
+ // ^ ^ //
+ // \ / //
+ // \ / //
+ // [Root*] //
+ //
+ // * indicates nodes to be folded together.
+ //
+ // If Root produces a flag, then it gets (even more) interesting. Since it
+ // will be "glued" together with its flag use in the scheduler, we need to
+ // check if it might reach N.
+ //
+ // [N*] //
+ // ^ ^ //
+ // / \ //
+ // [U*] [X]? //
+ // ^ ^ //
+ // \ \ //
+ // \ | //
+ // [Root*] | //
+ // ^ | //
+ // f | //
+ // | / //
+ // [Y] / //
+ // ^ / //
+ // f / //
+ // | / //
+ // [FU] //
+ //
+ // If FU (flag use) indirectly reaches N (the load), and Root folds N
+ // (call it Fold), then X is a predecessor of FU and a successor of
+ // Fold. But since Fold and FU are flagged together, this will create
+ // a cycle in the scheduling graph.
+
+ MVT VT = Root->getValueType(Root->getNumValues()-1);
+ while (VT == MVT::Flag) {
+ SDNode *FU = findFlagUse(Root);
+ if (FU == NULL)
+ break;
+ Root = FU;
+ VT = Root->getValueType(Root->getNumValues()-1);
+ }
+
+ return !isNonImmUse(Root, N, U);
+}
+
+
+char SelectionDAGISel::ID = 0;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
new file mode 100644
index 0000000..3eec684
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -0,0 +1,416 @@
+//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+#include <fstream>
+using namespace llvm;
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits {
+ static bool hasEdgeDestLabels() {
+ return true;
+ }
+
+ static unsigned numEdgeDestLabels(const void *Node) {
+ return ((const SDNode *) Node)->getNumValues();
+ }
+
+ static std::string getEdgeDestLabel(const void *Node, unsigned i) {
+ return ((const SDNode *) Node)->getValueType(i).getMVTString();
+ }
+
+ /// edgeTargetsEdgeSource - This method returns true if this outgoing edge
+ /// should actually target another edge source, not a node. If this method is
+ /// implemented, getEdgeTarget should be implemented.
+ template<typename EdgeIter>
+ static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) {
+ return true;
+ }
+
+ /// getEdgeTarget - If edgeTargetsEdgeSource returns true, this method is
+ /// called to determine which outgoing edge of Node is the target of this
+ /// edge.
+ template<typename EdgeIter>
+ static EdgeIter getEdgeTarget(const void *Node, EdgeIter I) {
+ SDNode *TargetNode = *I;
+ SDNodeIterator NI = SDNodeIterator::begin(TargetNode);
+ std::advance(NI, I.getNode()->getOperand(I.getOperand()).getResNo());
+ return NI;
+ }
+
+ static std::string getGraphName(const SelectionDAG *G) {
+ return G->getMachineFunction().getFunction()->getName();
+ }
+
+ static bool renderGraphFromBottomUp() {
+ return true;
+ }
+
+ static bool hasNodeAddressLabel(const SDNode *Node,
+ const SelectionDAG *Graph) {
+ return true;
+ }
+
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ template<typename EdgeIter>
+ static std::string getEdgeAttributes(const void *Node, EdgeIter EI) {
+ SDValue Op = EI.getNode()->getOperand(EI.getOperand());
+ MVT VT = Op.getValueType();
+ if (VT == MVT::Flag)
+ return "color=red,style=bold";
+ else if (VT == MVT::Other)
+ return "color=blue,style=dashed";
+ return "";
+ }
+
+
+ static std::string getNodeLabel(const SDNode *Node,
+ const SelectionDAG *Graph);
+ static std::string getNodeAttributes(const SDNode *N,
+ const SelectionDAG *Graph) {
+#ifndef NDEBUG
+ const std::string &Attrs = Graph->getGraphAttrs(N);
+ if (!Attrs.empty()) {
+ if (Attrs.find("shape=") == std::string::npos)
+ return std::string("shape=Mrecord,") + Attrs;
+ else
+ return Attrs;
+ }
+#endif
+ return "shape=Mrecord";
+ }
+
+ static void addCustomGraphFeatures(SelectionDAG *G,
+ GraphWriter<SelectionDAG*> &GW) {
+ GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+ if (G->getRoot().getNode())
+ GW.emitEdge(0, -1, G->getRoot().getNode(), G->getRoot().getResNo(),
+ "color=blue,style=dashed");
+ }
+ };
+}
+
+std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
+ const SelectionDAG *G) {
+ std::string Op = Node->getOperationName(G);
+
+ if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Node)) {
+ Op += ": " + utostr(CSDN->getZExtValue());
+ } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(Node)) {
+ Op += ": " + ftostr(CSDN->getValueAPF());
+ } else if (const GlobalAddressSDNode *GADN =
+ dyn_cast<GlobalAddressSDNode>(Node)) {
+ Op += ": " + GADN->getGlobal()->getName();
+ if (int64_t Offset = GADN->getOffset()) {
+ if (Offset > 0)
+ Op += "+" + itostr(Offset);
+ else
+ Op += itostr(Offset);
+ }
+ } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(Node)) {
+ Op += " " + itostr(FIDN->getIndex());
+ } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(Node)) {
+ Op += " " + itostr(JTDN->getIndex());
+ } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Node)){
+ if (CP->isMachineConstantPoolEntry()) {
+ Op += '<';
+ {
+ raw_string_ostream OSS(Op);
+ OSS << *CP->getMachineCPVal();
+ }
+ Op += '>';
+ } else {
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+ Op += "<" + ftostr(CFP->getValueAPF()) + ">";
+ else if (ConstantInt *CI = dyn_cast<ConstantInt>(CP->getConstVal()))
+ Op += "<" + utostr(CI->getZExtValue()) + ">";
+ else {
+ Op += '<';
+ {
+ raw_string_ostream OSS(Op);
+ WriteAsOperand(OSS, CP->getConstVal(), false);
+ }
+ Op += '>';
+ }
+ }
+ Op += " A=" + itostr(CP->getAlignment());
+ } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(Node)) {
+ Op = "BB: ";
+ const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+ if (LBB)
+ Op += LBB->getName();
+ //Op += " " + (const void*)BBDN->getBasicBlock();
+ } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node)) {
+ if (G && R->getReg() != 0 &&
+ TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+ Op = Op + " " +
+ G->getTarget().getRegisterInfo()->getName(R->getReg());
+ } else {
+ Op += " #" + utostr(R->getReg());
+ }
+ } else if (const DbgStopPointSDNode *D = dyn_cast<DbgStopPointSDNode>(Node)) {
+ DICompileUnit CU(cast<GlobalVariable>(D->getCompileUnit()));
+ std::string FN;
+ Op += ": " + CU.getFilename(FN);
+ Op += ":" + utostr(D->getLine());
+ if (D->getColumn() != 0)
+ Op += ":" + utostr(D->getColumn());
+ } else if (const LabelSDNode *L = dyn_cast<LabelSDNode>(Node)) {
+ Op += ": LabelID=" + utostr(L->getLabelID());
+ } else if (const CallSDNode *C = dyn_cast<CallSDNode>(Node)) {
+ Op += ": CallingConv=" + utostr(C->getCallingConv());
+ if (C->isVarArg())
+ Op += ", isVarArg";
+ if (C->isTailCall())
+ Op += ", isTailCall";
+ } else if (const ExternalSymbolSDNode *ES =
+ dyn_cast<ExternalSymbolSDNode>(Node)) {
+ Op += "'" + std::string(ES->getSymbol()) + "'";
+ } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(Node)) {
+ if (M->getValue())
+ Op += "<" + M->getValue()->getName() + ">";
+ else
+ Op += "<null>";
+ } else if (const MemOperandSDNode *M = dyn_cast<MemOperandSDNode>(Node)) {
+ const Value *V = M->MO.getValue();
+ Op += '<';
+ if (!V) {
+ Op += "(unknown)";
+ } else if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+ // PseudoSourceValues don't have names, so use their print method.
+ raw_string_ostream OSS(Op);
+ PSV->print(OSS);
+ } else {
+ Op += V->getName();
+ }
+ Op += '+' + itostr(M->MO.getOffset()) + '>';
+ } else if (const ARG_FLAGSSDNode *N = dyn_cast<ARG_FLAGSSDNode>(Node)) {
+ Op = Op + " AF=" + N->getArgFlags().getArgFlagsString();
+ } else if (const VTSDNode *N = dyn_cast<VTSDNode>(Node)) {
+ Op = Op + " VT=" + N->getVT().getMVTString();
+ } else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(Node)) {
+ bool doExt = true;
+ switch (LD->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD:
+ Op = Op + "<anyext ";
+ break;
+ case ISD::SEXTLOAD:
+ Op = Op + " <sext ";
+ break;
+ case ISD::ZEXTLOAD:
+ Op = Op + " <zext ";
+ break;
+ }
+ if (doExt)
+ Op += LD->getMemoryVT().getMVTString() + ">";
+ if (LD->isVolatile())
+ Op += "<V>";
+ Op += LD->getIndexedModeName(LD->getAddressingMode());
+ if (LD->getAlignment() > 1)
+ Op += " A=" + utostr(LD->getAlignment());
+ } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(Node)) {
+ if (ST->isTruncatingStore())
+ Op += "<trunc " + ST->getMemoryVT().getMVTString() + ">";
+ if (ST->isVolatile())
+ Op += "<V>";
+ Op += ST->getIndexedModeName(ST->getAddressingMode());
+ if (ST->getAlignment() > 1)
+ Op += " A=" + utostr(ST->getAlignment());
+ }
+
+#if 0
+ Op += " Id=" + itostr(Node->getNodeId());
+#endif
+
+ return Op;
+}
+
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void SelectionDAG::viewGraph(const std::string &Title) {
+// This code is only for debugging!
+#ifndef NDEBUG
+ ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(),
+ Title);
+#else
+ cerr << "SelectionDAG::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+// This overload is defined out-of-line here instead of just using a
+// default parameter because this is easiest for gdb to call.
+void SelectionDAG::viewGraph() {
+ viewGraph("");
+}
+
+/// clearGraphAttrs - Clear all previously defined node graph attributes.
+/// Intended to be used from a debugging tool (eg. gdb).
+void SelectionDAG::clearGraphAttrs() {
+#ifndef NDEBUG
+ NodeGraphAttrs.clear();
+#else
+ cerr << "SelectionDAG::clearGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".)
+///
+void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) {
+#ifndef NDEBUG
+ NodeGraphAttrs[N] = Attrs;
+#else
+ cerr << "SelectionDAG::setGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".)
+/// Used from getNodeAttributes.
+const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
+#ifndef NDEBUG
+ std::map<const SDNode *, std::string>::const_iterator I =
+ NodeGraphAttrs.find(N);
+
+ if (I != NodeGraphAttrs.end())
+ return I->second;
+ else
+ return "";
+#else
+ cerr << "SelectionDAG::getGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+ return std::string("");
+#endif
+}
+
+/// setGraphColor - Convenience for setting node color attribute.
+///
+void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) {
+#ifndef NDEBUG
+ NodeGraphAttrs[N] = std::string("color=") + Color;
+#else
+ cerr << "SelectionDAG::setGraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+/// setSubgraphColorHelper - Implement setSubgraphColor. Return
+/// whether we truncated the search.
+///
+bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet<SDNode *> &visited,
+ int level, bool &printed) {
+ bool hit_limit = false;
+
+#ifndef NDEBUG
+ if (level >= 20) {
+ if (!printed) {
+ printed = true;
+ DOUT << "setSubgraphColor hit max level\n";
+ }
+ return true;
+ }
+
+ unsigned oldSize = visited.size();
+ visited.insert(N);
+ if (visited.size() != oldSize) {
+ setGraphColor(N, Color);
+ for(SDNodeIterator i = SDNodeIterator::begin(N), iend = SDNodeIterator::end(N);
+ i != iend;
+ ++i) {
+ hit_limit = setSubgraphColorHelper(*i, Color, visited, level+1, printed) || hit_limit;
+ }
+ }
+#else
+ cerr << "SelectionDAG::setSubgraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+ return hit_limit;
+}
+
+/// setSubgraphColor - Convenience for setting subgraph color attribute.
+///
+void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) {
+#ifndef NDEBUG
+ DenseSet<SDNode *> visited;
+ bool printed = false;
+ if (setSubgraphColorHelper(N, Color, visited, 0, printed)) {
+ // Visually mark that we hit the limit
+ if (strcmp(Color, "red") == 0) {
+ setSubgraphColorHelper(N, "blue", visited, 0, printed);
+ }
+ else if (strcmp(Color, "yellow") == 0) {
+ setSubgraphColorHelper(N, "green", visited, 0, printed);
+ }
+ }
+
+#else
+ cerr << "SelectionDAG::setSubgraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
+ std::string s;
+ raw_string_ostream O(s);
+ O << "SU(" << SU->NodeNum << "): ";
+ if (SU->getNode()) {
+ SmallVector<SDNode *, 4> FlaggedNodes;
+ for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
+ FlaggedNodes.push_back(N);
+ while (!FlaggedNodes.empty()) {
+ O << DOTGraphTraits<SelectionDAG*>::getNodeLabel(FlaggedNodes.back(), DAG);
+ FlaggedNodes.pop_back();
+ if (!FlaggedNodes.empty())
+ O << "\n ";
+ }
+ } else {
+ O << "CROSS RC COPY";
+ }
+ return O.str();
+}
+
+void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const {
+ if (DAG) {
+ // Draw a special "GraphRoot" node to indicate the root of the graph.
+ GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+ const SDNode *N = DAG->getRoot().getNode();
+ if (N && N->getNodeId() != -1)
+ GW.emitEdge(0, -1, &SUnits[N->getNodeId()], -1,
+ "color=blue,style=dashed");
+ }
+}
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
new file mode 100644
index 0000000..3334e53
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -0,0 +1,2592 @@
+//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+namespace llvm {
+TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) {
+ bool isLocal = GV->hasLocalLinkage();
+ bool isDeclaration = GV->isDeclaration();
+ // FIXME: what should we do for protected and internal visibility?
+ // For variables, is internal different from hidden?
+ bool isHidden = GV->hasHiddenVisibility();
+
+ if (reloc == Reloc::PIC_) {
+ if (isLocal || isHidden)
+ return TLSModel::LocalDynamic;
+ else
+ return TLSModel::GeneralDynamic;
+ } else {
+ if (!isDeclaration || isHidden)
+ return TLSModel::LocalExec;
+ else
+ return TLSModel::InitialExec;
+ }
+}
+}
+
+/// InitLibcallNames - Set default libcall names.
+///
+static void InitLibcallNames(const char **Names) {
+ Names[RTLIB::SHL_I16] = "__ashlhi3";
+ Names[RTLIB::SHL_I32] = "__ashlsi3";
+ Names[RTLIB::SHL_I64] = "__ashldi3";
+ Names[RTLIB::SHL_I128] = "__ashlti3";
+ Names[RTLIB::SRL_I16] = "__lshrhi3";
+ Names[RTLIB::SRL_I32] = "__lshrsi3";
+ Names[RTLIB::SRL_I64] = "__lshrdi3";
+ Names[RTLIB::SRL_I128] = "__lshrti3";
+ Names[RTLIB::SRA_I16] = "__ashrhi3";
+ Names[RTLIB::SRA_I32] = "__ashrsi3";
+ Names[RTLIB::SRA_I64] = "__ashrdi3";
+ Names[RTLIB::SRA_I128] = "__ashrti3";
+ Names[RTLIB::MUL_I16] = "__mulhi3";
+ Names[RTLIB::MUL_I32] = "__mulsi3";
+ Names[RTLIB::MUL_I64] = "__muldi3";
+ Names[RTLIB::MUL_I128] = "__multi3";
+ Names[RTLIB::SDIV_I16] = "__divhi3";
+ Names[RTLIB::SDIV_I32] = "__divsi3";
+ Names[RTLIB::SDIV_I64] = "__divdi3";
+ Names[RTLIB::SDIV_I128] = "__divti3";
+ Names[RTLIB::UDIV_I16] = "__udivhi3";
+ Names[RTLIB::UDIV_I32] = "__udivsi3";
+ Names[RTLIB::UDIV_I64] = "__udivdi3";
+ Names[RTLIB::UDIV_I128] = "__udivti3";
+ Names[RTLIB::SREM_I16] = "__modhi3";
+ Names[RTLIB::SREM_I32] = "__modsi3";
+ Names[RTLIB::SREM_I64] = "__moddi3";
+ Names[RTLIB::SREM_I128] = "__modti3";
+ Names[RTLIB::UREM_I16] = "__umodhi3";
+ Names[RTLIB::UREM_I32] = "__umodsi3";
+ Names[RTLIB::UREM_I64] = "__umoddi3";
+ Names[RTLIB::UREM_I128] = "__umodti3";
+ Names[RTLIB::NEG_I32] = "__negsi2";
+ Names[RTLIB::NEG_I64] = "__negdi2";
+ Names[RTLIB::ADD_F32] = "__addsf3";
+ Names[RTLIB::ADD_F64] = "__adddf3";
+ Names[RTLIB::ADD_F80] = "__addxf3";
+ Names[RTLIB::ADD_PPCF128] = "__gcc_qadd";
+ Names[RTLIB::SUB_F32] = "__subsf3";
+ Names[RTLIB::SUB_F64] = "__subdf3";
+ Names[RTLIB::SUB_F80] = "__subxf3";
+ Names[RTLIB::SUB_PPCF128] = "__gcc_qsub";
+ Names[RTLIB::MUL_F32] = "__mulsf3";
+ Names[RTLIB::MUL_F64] = "__muldf3";
+ Names[RTLIB::MUL_F80] = "__mulxf3";
+ Names[RTLIB::MUL_PPCF128] = "__gcc_qmul";
+ Names[RTLIB::DIV_F32] = "__divsf3";
+ Names[RTLIB::DIV_F64] = "__divdf3";
+ Names[RTLIB::DIV_F80] = "__divxf3";
+ Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv";
+ Names[RTLIB::REM_F32] = "fmodf";
+ Names[RTLIB::REM_F64] = "fmod";
+ Names[RTLIB::REM_F80] = "fmodl";
+ Names[RTLIB::REM_PPCF128] = "fmodl";
+ Names[RTLIB::POWI_F32] = "__powisf2";
+ Names[RTLIB::POWI_F64] = "__powidf2";
+ Names[RTLIB::POWI_F80] = "__powixf2";
+ Names[RTLIB::POWI_PPCF128] = "__powitf2";
+ Names[RTLIB::SQRT_F32] = "sqrtf";
+ Names[RTLIB::SQRT_F64] = "sqrt";
+ Names[RTLIB::SQRT_F80] = "sqrtl";
+ Names[RTLIB::SQRT_PPCF128] = "sqrtl";
+ Names[RTLIB::LOG_F32] = "logf";
+ Names[RTLIB::LOG_F64] = "log";
+ Names[RTLIB::LOG_F80] = "logl";
+ Names[RTLIB::LOG_PPCF128] = "logl";
+ Names[RTLIB::LOG2_F32] = "log2f";
+ Names[RTLIB::LOG2_F64] = "log2";
+ Names[RTLIB::LOG2_F80] = "log2l";
+ Names[RTLIB::LOG2_PPCF128] = "log2l";
+ Names[RTLIB::LOG10_F32] = "log10f";
+ Names[RTLIB::LOG10_F64] = "log10";
+ Names[RTLIB::LOG10_F80] = "log10l";
+ Names[RTLIB::LOG10_PPCF128] = "log10l";
+ Names[RTLIB::EXP_F32] = "expf";
+ Names[RTLIB::EXP_F64] = "exp";
+ Names[RTLIB::EXP_F80] = "expl";
+ Names[RTLIB::EXP_PPCF128] = "expl";
+ Names[RTLIB::EXP2_F32] = "exp2f";
+ Names[RTLIB::EXP2_F64] = "exp2";
+ Names[RTLIB::EXP2_F80] = "exp2l";
+ Names[RTLIB::EXP2_PPCF128] = "exp2l";
+ Names[RTLIB::SIN_F32] = "sinf";
+ Names[RTLIB::SIN_F64] = "sin";
+ Names[RTLIB::SIN_F80] = "sinl";
+ Names[RTLIB::SIN_PPCF128] = "sinl";
+ Names[RTLIB::COS_F32] = "cosf";
+ Names[RTLIB::COS_F64] = "cos";
+ Names[RTLIB::COS_F80] = "cosl";
+ Names[RTLIB::COS_PPCF128] = "cosl";
+ Names[RTLIB::POW_F32] = "powf";
+ Names[RTLIB::POW_F64] = "pow";
+ Names[RTLIB::POW_F80] = "powl";
+ Names[RTLIB::POW_PPCF128] = "powl";
+ Names[RTLIB::CEIL_F32] = "ceilf";
+ Names[RTLIB::CEIL_F64] = "ceil";
+ Names[RTLIB::CEIL_F80] = "ceill";
+ Names[RTLIB::CEIL_PPCF128] = "ceill";
+ Names[RTLIB::TRUNC_F32] = "truncf";
+ Names[RTLIB::TRUNC_F64] = "trunc";
+ Names[RTLIB::TRUNC_F80] = "truncl";
+ Names[RTLIB::TRUNC_PPCF128] = "truncl";
+ Names[RTLIB::RINT_F32] = "rintf";
+ Names[RTLIB::RINT_F64] = "rint";
+ Names[RTLIB::RINT_F80] = "rintl";
+ Names[RTLIB::RINT_PPCF128] = "rintl";
+ Names[RTLIB::NEARBYINT_F32] = "nearbyintf";
+ Names[RTLIB::NEARBYINT_F64] = "nearbyint";
+ Names[RTLIB::NEARBYINT_F80] = "nearbyintl";
+ Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl";
+ Names[RTLIB::FLOOR_F32] = "floorf";
+ Names[RTLIB::FLOOR_F64] = "floor";
+ Names[RTLIB::FLOOR_F80] = "floorl";
+ Names[RTLIB::FLOOR_PPCF128] = "floorl";
+ Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
+ Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
+ Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";
+ Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2";
+ Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
+ Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
+ Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
+ Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
+ Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
+ Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
+ Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
+ Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti";
+ Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi";
+ Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi";
+ Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti";
+ Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
+ Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
+ Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
+ Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
+ Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
+ Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
+ Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
+ Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
+ Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";
+ Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi";
+ Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi";
+ Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti";
+ Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi";
+ Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi";
+ Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti";
+ Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf";
+ Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";
+ Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf";
+ Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf";
+ Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";
+ Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";
+ Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf";
+ Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf";
+ Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf";
+ Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf";
+ Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf";
+ Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf";
+ Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf";
+ Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";
+ Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf";
+ Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf";
+ Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";
+ Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";
+ Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf";
+ Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf";
+ Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf";
+ Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf";
+ Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf";
+ Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf";
+ Names[RTLIB::OEQ_F32] = "__eqsf2";
+ Names[RTLIB::OEQ_F64] = "__eqdf2";
+ Names[RTLIB::UNE_F32] = "__nesf2";
+ Names[RTLIB::UNE_F64] = "__nedf2";
+ Names[RTLIB::OGE_F32] = "__gesf2";
+ Names[RTLIB::OGE_F64] = "__gedf2";
+ Names[RTLIB::OLT_F32] = "__ltsf2";
+ Names[RTLIB::OLT_F64] = "__ltdf2";
+ Names[RTLIB::OLE_F32] = "__lesf2";
+ Names[RTLIB::OLE_F64] = "__ledf2";
+ Names[RTLIB::OGT_F32] = "__gtsf2";
+ Names[RTLIB::OGT_F64] = "__gtdf2";
+ Names[RTLIB::UO_F32] = "__unordsf2";
+ Names[RTLIB::UO_F64] = "__unorddf2";
+ Names[RTLIB::O_F32] = "__unordsf2";
+ Names[RTLIB::O_F64] = "__unorddf2";
+ Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
+}
+
+/// getFPEXT - Return the FPEXT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPEXT(MVT OpVT, MVT RetVT) {
+ if (OpVT == MVT::f32) {
+ if (RetVT == MVT::f64)
+ return FPEXT_F32_F64;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPROUND - Return the FPROUND_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPROUND(MVT OpVT, MVT RetVT) {
+ if (RetVT == MVT::f32) {
+ if (OpVT == MVT::f64)
+ return FPROUND_F64_F32;
+ if (OpVT == MVT::f80)
+ return FPROUND_F80_F32;
+ if (OpVT == MVT::ppcf128)
+ return FPROUND_PPCF128_F32;
+ } else if (RetVT == MVT::f64) {
+ if (OpVT == MVT::f80)
+ return FPROUND_F80_F64;
+ if (OpVT == MVT::ppcf128)
+ return FPROUND_PPCF128_F64;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOSINT(MVT OpVT, MVT RetVT) {
+ if (OpVT == MVT::f32) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F32_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F32_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F32_I128;
+ } else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F64_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F64_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F64_I128;
+ } else if (OpVT == MVT::f80) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F80_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F80_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F80_I128;
+ } else if (OpVT == MVT::ppcf128) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_PPCF128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_PPCF128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_PPCF128_I128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOUINT(MVT OpVT, MVT RetVT) {
+ if (OpVT == MVT::f32) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F32_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F32_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F32_I128;
+ } else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F64_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F64_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F64_I128;
+ } else if (OpVT == MVT::f80) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F80_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F80_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F80_I128;
+ } else if (OpVT == MVT::ppcf128) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_PPCF128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_PPCF128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_PPCF128_I128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getSINTTOFP(MVT OpVT, MVT RetVT) {
+ if (OpVT == MVT::i32) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I32_F32;
+ else if (RetVT == MVT::f64)
+ return SINTTOFP_I32_F64;
+ else if (RetVT == MVT::f80)
+ return SINTTOFP_I32_F80;
+ else if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I32_PPCF128;
+ } else if (OpVT == MVT::i64) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I64_F32;
+ else if (RetVT == MVT::f64)
+ return SINTTOFP_I64_F64;
+ else if (RetVT == MVT::f80)
+ return SINTTOFP_I64_F80;
+ else if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I64_PPCF128;
+ } else if (OpVT == MVT::i128) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I128_F32;
+ else if (RetVT == MVT::f64)
+ return SINTTOFP_I128_F64;
+ else if (RetVT == MVT::f80)
+ return SINTTOFP_I128_F80;
+ else if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I128_PPCF128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getUINTTOFP(MVT OpVT, MVT RetVT) {
+ if (OpVT == MVT::i32) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I32_F32;
+ else if (RetVT == MVT::f64)
+ return UINTTOFP_I32_F64;
+ else if (RetVT == MVT::f80)
+ return UINTTOFP_I32_F80;
+ else if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I32_PPCF128;
+ } else if (OpVT == MVT::i64) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I64_F32;
+ else if (RetVT == MVT::f64)
+ return UINTTOFP_I64_F64;
+ else if (RetVT == MVT::f80)
+ return UINTTOFP_I64_F80;
+ else if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I64_PPCF128;
+ } else if (OpVT == MVT::i128) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I128_F32;
+ else if (RetVT == MVT::f64)
+ return UINTTOFP_I128_F64;
+ else if (RetVT == MVT::f80)
+ return UINTTOFP_I128_F80;
+ else if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I128_PPCF128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// InitCmpLibcallCCs - Set default comparison libcall CC.
+///
+static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
+ memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
+ CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
+ CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
+ CCs[RTLIB::UNE_F32] = ISD::SETNE;
+ CCs[RTLIB::UNE_F64] = ISD::SETNE;
+ CCs[RTLIB::OGE_F32] = ISD::SETGE;
+ CCs[RTLIB::OGE_F64] = ISD::SETGE;
+ CCs[RTLIB::OLT_F32] = ISD::SETLT;
+ CCs[RTLIB::OLT_F64] = ISD::SETLT;
+ CCs[RTLIB::OLE_F32] = ISD::SETLE;
+ CCs[RTLIB::OLE_F64] = ISD::SETLE;
+ CCs[RTLIB::OGT_F32] = ISD::SETGT;
+ CCs[RTLIB::OGT_F64] = ISD::SETGT;
+ CCs[RTLIB::UO_F32] = ISD::SETNE;
+ CCs[RTLIB::UO_F64] = ISD::SETNE;
+ CCs[RTLIB::O_F32] = ISD::SETEQ;
+ CCs[RTLIB::O_F64] = ISD::SETEQ;
+}
+
+TargetLowering::TargetLowering(TargetMachine &tm)
+ : TM(tm), TD(TM.getTargetData()) {
+ // All operations default to being supported.
+ memset(OpActions, 0, sizeof(OpActions));
+ memset(LoadExtActions, 0, sizeof(LoadExtActions));
+ memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
+ memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
+ memset(ConvertActions, 0, sizeof(ConvertActions));
+ memset(CondCodeActions, 0, sizeof(CondCodeActions));
+
+ // Set default actions for various operations.
+ for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {
+ // Default all indexed load / store to expand.
+ for (unsigned IM = (unsigned)ISD::PRE_INC;
+ IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
+ setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand);
+ setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
+ }
+
+ // These operations default to expand.
+ setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
+ }
+
+ // Most targets ignore the @llvm.prefetch intrinsic.
+ setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
+
+ // ConstantFP nodes default to expand. Targets can either change this to
+ // Legal, in which case all fp constants are legal, or use addLegalFPImmediate
+ // to optimize expansions for certain constants.
+ setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
+
+ // These library functions default to expand.
+ setOperationAction(ISD::FLOG , MVT::f64, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG10,MVT::f64, Expand);
+ setOperationAction(ISD::FEXP , MVT::f64, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG , MVT::f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG10,MVT::f32, Expand);
+ setOperationAction(ISD::FEXP , MVT::f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+
+ // Default ISD::TRAP to expand (which turns it into abort).
+ setOperationAction(ISD::TRAP, MVT::Other, Expand);
+
+ IsLittleEndian = TD->isLittleEndian();
+ UsesGlobalOffsetTable = false;
+ ShiftAmountTy = PointerTy = getValueType(TD->getIntPtrType());
+ ShiftAmtHandling = Undefined;
+ memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
+ memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
+ maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
+ allowUnalignedMemoryAccesses = false;
+ benefitFromCodePlacementOpt = false;
+ UseUnderscoreSetJmp = false;
+ UseUnderscoreLongJmp = false;
+ SelectIsExpensive = false;
+ IntDivIsCheap = false;
+ Pow2DivIsCheap = false;
+ StackPointerRegisterToSaveRestore = 0;
+ ExceptionPointerRegister = 0;
+ ExceptionSelectorRegister = 0;
+ BooleanContents = UndefinedBooleanContent;
+ SchedPreferenceInfo = SchedulingForLatency;
+ JumpBufSize = 0;
+ JumpBufAlignment = 0;
+ IfCvtBlockSizeLimit = 2;
+ IfCvtDupBlockSizeLimit = 0;
+ PrefLoopAlignment = 0;
+
+ InitLibcallNames(LibcallRoutineNames);
+ InitCmpLibcallCCs(CmpLibcallCCs);
+
+ // Tell Legalize whether the assembler supports DEBUG_LOC.
+ const TargetAsmInfo *TASM = TM.getTargetAsmInfo();
+ if (!TASM || !TASM->hasDotLocAndDotFile())
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+}
+
+TargetLowering::~TargetLowering() {}
+
+/// computeRegisterProperties - Once all of the register classes are added,
+/// this allows us to compute derived properties we expose.
+void TargetLowering::computeRegisterProperties() {
+ assert(MVT::LAST_VALUETYPE <= 32 &&
+ "Too many value types for ValueTypeActions to hold!");
+
+ // Everything defaults to needing one register.
+ for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+ NumRegistersForVT[i] = 1;
+ RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i;
+ }
+ // ...except isVoid, which doesn't need any registers.
+ NumRegistersForVT[MVT::isVoid] = 0;
+
+ // Find the largest integer register class.
+ unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE;
+ for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg)
+ assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
+
+ // Every integer value type larger than this largest register takes twice as
+ // many registers to represent as the previous ValueType.
+ for (unsigned ExpandedReg = LargestIntReg + 1; ; ++ExpandedReg) {
+ MVT EVT = (MVT::SimpleValueType)ExpandedReg;
+ if (!EVT.isInteger())
+ break;
+ NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
+ RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
+ TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
+ ValueTypeActions.setTypeAction(EVT, Expand);
+ }
+
+ // Inspect all of the ValueType's smaller than the largest integer
+ // register to see which ones need promotion.
+ unsigned LegalIntReg = LargestIntReg;
+ for (unsigned IntReg = LargestIntReg - 1;
+ IntReg >= (unsigned)MVT::i1; --IntReg) {
+ MVT IVT = (MVT::SimpleValueType)IntReg;
+ if (isTypeLegal(IVT)) {
+ LegalIntReg = IntReg;
+ } else {
+ RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
+ (MVT::SimpleValueType)LegalIntReg;
+ ValueTypeActions.setTypeAction(IVT, Promote);
+ }
+ }
+
+ // ppcf128 type is really two f64's.
+ if (!isTypeLegal(MVT::ppcf128)) {
+ NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
+ RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
+ TransformToType[MVT::ppcf128] = MVT::f64;
+ ValueTypeActions.setTypeAction(MVT::ppcf128, Expand);
+ }
+
+ // Decide how to handle f64. If the target does not have native f64 support,
+ // expand it to i64 and we will be generating soft float library calls.
+ if (!isTypeLegal(MVT::f64)) {
+ NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
+ RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
+ TransformToType[MVT::f64] = MVT::i64;
+ ValueTypeActions.setTypeAction(MVT::f64, Expand);
+ }
+
+ // Decide how to handle f32. If the target does not have native support for
+ // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
+ if (!isTypeLegal(MVT::f32)) {
+ if (isTypeLegal(MVT::f64)) {
+ NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
+ RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
+ TransformToType[MVT::f32] = MVT::f64;
+ ValueTypeActions.setTypeAction(MVT::f32, Promote);
+ } else {
+ NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
+ RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
+ TransformToType[MVT::f32] = MVT::i32;
+ ValueTypeActions.setTypeAction(MVT::f32, Expand);
+ }
+ }
+
+ // Loop over all of the vector value types to see which need transformations.
+ for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+ if (!isTypeLegal(VT)) {
+ MVT IntermediateVT, RegisterVT;
+ unsigned NumIntermediates;
+ NumRegistersForVT[i] =
+ getVectorTypeBreakdown(VT,
+ IntermediateVT, NumIntermediates,
+ RegisterVT);
+ RegisterTypeForVT[i] = RegisterVT;
+
+ // Determine if there is a legal wider type.
+ bool IsLegalWiderType = false;
+ MVT EltVT = VT.getVectorElementType();
+ unsigned NElts = VT.getVectorNumElements();
+ for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ MVT SVT = (MVT::SimpleValueType)nVT;
+ if (isTypeLegal(SVT) && SVT.getVectorElementType() == EltVT &&
+ SVT.getVectorNumElements() > NElts) {
+ TransformToType[i] = SVT;
+ ValueTypeActions.setTypeAction(VT, Promote);
+ IsLegalWiderType = true;
+ break;
+ }
+ }
+ if (!IsLegalWiderType) {
+ MVT NVT = VT.getPow2VectorType();
+ if (NVT == VT) {
+ // Type is already a power of 2. The default action is to split.
+ TransformToType[i] = MVT::Other;
+ ValueTypeActions.setTypeAction(VT, Expand);
+ } else {
+ TransformToType[i] = NVT;
+ ValueTypeActions.setTypeAction(VT, Promote);
+ }
+ }
+ }
+ }
+}
+
+const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ return NULL;
+}
+
+
+MVT TargetLowering::getSetCCResultType(MVT VT) const {
+ return getValueType(TD->getIntPtrType());
+}
+
+
+/// getVectorTypeBreakdown - Vector types are broken down into some number of
+/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32
+/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
+/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
+///
+/// This method returns the number of registers needed, and the VT for each
+/// register. It also returns the VT and quantity of the intermediate values
+/// before they are promoted/expanded.
+///
+unsigned TargetLowering::getVectorTypeBreakdown(MVT VT,
+ MVT &IntermediateVT,
+ unsigned &NumIntermediates,
+ MVT &RegisterVT) const {
+ // Figure out the right, legal destination reg to copy into.
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT EltTy = VT.getVectorElementType();
+
+ unsigned NumVectorRegs = 1;
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
+ // could break down into LHS/RHS like LegalizeDAG does.
+ if (!isPowerOf2_32(NumElts)) {
+ NumVectorRegs = NumElts;
+ NumElts = 1;
+ }
+
+ // Divide the input until we get to a supported size. This will always
+ // end with a scalar if the target doesn't support vectors.
+ while (NumElts > 1 && !isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
+ NumElts >>= 1;
+ NumVectorRegs <<= 1;
+ }
+
+ NumIntermediates = NumVectorRegs;
+
+ MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
+ if (!isTypeLegal(NewVT))
+ NewVT = EltTy;
+ IntermediateVT = NewVT;
+
+ MVT DestVT = getRegisterType(NewVT);
+ RegisterVT = DestVT;
+ if (DestVT.bitsLT(NewVT)) {
+ // Value is expanded, e.g. i64 -> i16.
+ return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
+ } else {
+ // Otherwise, promotion or legal types use the same number of registers as
+ // the vector decimated to the appropriate level.
+ return NumVectorRegs;
+ }
+
+ return 1;
+}
+
+/// getWidenVectorType: given a vector type, returns the type to widen to
+/// (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself.
+/// If there is no vector type that we want to widen to, returns MVT::Other
+/// When and where to widen is target dependent based on the cost of
+/// scalarizing vs using the wider vector type.
+MVT TargetLowering::getWidenVectorType(MVT VT) const {
+ assert(VT.isVector());
+ if (isTypeLegal(VT))
+ return VT;
+
+ // Default is not to widen until moved to LegalizeTypes
+ return MVT::Other;
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area. This is the actual
+/// alignment, not its logarithm.
+unsigned TargetLowering::getByValTypeAlignment(const Type *Ty) const {
+ return TD->getCallFrameTypeAlignment(Ty);
+}
+
+SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const {
+ if (usesGlobalOffsetTable())
+ return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
+ return Table;
+}
+
+bool
+TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // Assume that everything is safe in static mode.
+ if (getTargetMachine().getRelocationModel() == Reloc::Static)
+ return true;
+
+ // In dynamic-no-pic mode, assume that known defined values are safe.
+ if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC &&
+ GA &&
+ !GA->getGlobal()->isDeclaration() &&
+ !GA->getGlobal()->isWeakForLinker())
+ return true;
+
+ // Otherwise assume nothing is safe.
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Optimization Methods
+//===----------------------------------------------------------------------===//
+
+/// ShrinkDemandedConstant - Check to see if the specified operand of the
+/// specified instruction is a constant integer. If so, check to see if there
+/// are any bits set in the constant that are not demanded. If so, shrink the
+/// constant and return true.
+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
+ const APInt &Demanded) {
+ DebugLoc dl = Op.getDebugLoc();
+
+ // FIXME: ISD::SELECT, ISD::SELECT_CC
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::XOR:
+ case ISD::AND:
+ case ISD::OR: {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!C) return false;
+
+ if (Op.getOpcode() == ISD::XOR &&
+ (C->getAPIntValue() | (~Demanded)).isAllOnesValue())
+ return false;
+
+ // if we can expand it to have all bits set, do it
+ if (C->getAPIntValue().intersects(~Demanded)) {
+ MVT VT = Op.getValueType();
+ SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
+ DAG.getConstant(Demanded &
+ C->getAPIntValue(),
+ VT));
+ return CombineTo(Op, New);
+ }
+
+ break;
+ }
+ }
+
+ return false;
+}
+
+/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the
+/// casts are free. This uses isZExtFree and ZERO_EXTEND for the widening
+/// cast, but it could be generalized for targets with other types of
+/// implicit widening casts.
+bool
+TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
+ unsigned BitWidth,
+ const APInt &Demanded,
+ DebugLoc dl) {
+ assert(Op.getNumOperands() == 2 &&
+ "ShrinkDemandedOp only supports binary operators!");
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "ShrinkDemandedOp only supports nodes with one result!");
+
+ // Don't do this if the node has another user, which may require the
+ // full value.
+ if (!Op.getNode()->hasOneUse())
+ return false;
+
+ // Search for the smallest integer type with free casts to and from
+ // Op's type. For expedience, just check power-of-2 integer types.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned SmallVTBits = BitWidth - Demanded.countLeadingZeros();
+ if (!isPowerOf2_32(SmallVTBits))
+ SmallVTBits = NextPowerOf2(SmallVTBits);
+ for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
+ MVT SmallVT = MVT::getIntegerVT(SmallVTBits);
+ if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
+ TLI.isZExtFree(SmallVT, Op.getValueType())) {
+ // We found a type with free casts.
+ SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT,
+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+ Op.getNode()->getOperand(0)),
+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+ Op.getNode()->getOperand(1)));
+ SDValue Z = DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), X);
+ return CombineTo(Op, Z);
+ }
+ }
+ return false;
+}
+
+/// SimplifyDemandedBits - Look at Op. At this point, we know that only the
+/// DemandedMask bits of the result of Op are ever used downstream. If we can
+/// use this information to simplify Op, create a new simplified DAG node and
+/// return true, returning the original and new nodes in Old and New. Otherwise,
+/// analyze the expression and return a mask of KnownOne and KnownZero bits for
+/// the expression (used to simplify the caller). The KnownZero/One bits may
+/// only be accurate for those bits in the DemandedMask.
+bool TargetLowering::SimplifyDemandedBits(SDValue Op,
+ const APInt &DemandedMask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ TargetLoweringOpt &TLO,
+ unsigned Depth) const {
+ unsigned BitWidth = DemandedMask.getBitWidth();
+ assert(Op.getValueSizeInBits() == BitWidth &&
+ "Mask size mismatches value type size!");
+ APInt NewMask = DemandedMask;
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Don't know anything.
+ KnownZero = KnownOne = APInt(BitWidth, 0);
+
+ // Other users may use these bits.
+ if (!Op.getNode()->hasOneUse()) {
+ if (Depth != 0) {
+ // If not at the root, Just compute the KnownZero/KnownOne bits to
+ // simplify things downstream.
+ TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
+ return false;
+ }
+ // If this is the root being simplified, allow it to have multiple uses,
+ // just set the NewMask to all bits.
+ NewMask = APInt::getAllOnesValue(BitWidth);
+ } else if (DemandedMask == 0) {
+ // Not demanding any bits from Op.
+ if (Op.getOpcode() != ISD::UNDEF)
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType()));
+ return false;
+ } else if (Depth == 6) { // Limit search depth.
+ return false;
+ }
+
+ APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut;
+ switch (Op.getOpcode()) {
+ case ISD::Constant:
+ // We know all of the bits for a constant!
+ KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & NewMask;
+ KnownZero = ~KnownOne & NewMask;
+ return false; // Don't fall through, will infinitely loop.
+ case ISD::AND:
+ // If the RHS is a constant, check to see if the LHS would be zero without
+ // using the bits from the RHS. Below, we use knowledge about the RHS to
+ // simplify the LHS, here we're using information from the LHS to simplify
+ // the RHS.
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ APInt LHSZero, LHSOne;
+ TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask,
+ LHSZero, LHSOne, Depth+1);
+ // If the LHS already has zeros where RHSC does, this and is dead.
+ if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ // If any of the set bits in the RHS are known zero on the LHS, shrink
+ // the constant.
+ if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
+ return true;
+ }
+
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask,
+ KnownZero2, KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known one on one side, return the other.
+ // These bits cannot contribute to the result of the 'and'.
+ if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If all of the demanded bits in the inputs are known zeros, return zero.
+ if ((NewMask & (KnownZero|KnownZero2)) == NewMask)
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType()));
+ // If the RHS is a constant, see if we can simplify it.
+ if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
+ return true;
+ // If the operation can be done in a smaller type, do so.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne &= KnownOne2;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero |= KnownZero2;
+ break;
+ case ISD::OR:
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask,
+ KnownZero2, KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'or'.
+ if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If all of the potentially set bits on one side are known to be set on
+ // the other side, just use the 'other' side.
+ if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If the RHS is a constant, see if we can simplify it.
+ if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ return true;
+ // If the operation can be done in a smaller type, do so.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero &= KnownZero2;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne |= KnownOne2;
+ break;
+ case ISD::XOR:
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'xor'.
+ if ((KnownZero & NewMask) == NewMask)
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((KnownZero2 & NewMask) == NewMask)
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If the operation can be done in a smaller type, do so.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+
+ // If all of the unknown bits are known to be zero on one side or the other
+ // (but not both) turn this into an *inclusive* or.
+ // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+ if ((NewMask & ~KnownZero & ~KnownZero2) == 0)
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1)));
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+
+ // If all of the demanded bits on one side are known, and all of the set
+ // bits on that side are also known to be set on the other side, turn this
+ // into an AND, as we know the bits will be cleared.
+ // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+ if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known
+ if ((KnownOne & KnownOne2) == KnownOne) {
+ MVT VT = Op.getValueType();
+ SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
+ Op.getOperand(0), ANDC));
+ }
+ }
+
+ // If the RHS is a constant, see if we can simplify it.
+ // for XOR, we prefer to force bits to 1 if they will make a -1.
+ // if we can't force bits, try to shrink constant
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ APInt Expanded = C->getAPIntValue() | (~NewMask);
+ // if we can expand it to have all bits set, do it
+ if (Expanded.isAllOnesValue()) {
+ if (Expanded != C->getAPIntValue()) {
+ MVT VT = Op.getValueType();
+ SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0),
+ TLO.DAG.getConstant(Expanded, VT));
+ return TLO.CombineTo(Op, New);
+ }
+ // if it already has all the bits set, nothing to change
+ // but don't shrink either!
+ } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) {
+ return true;
+ }
+ }
+
+ KnownZero = KnownZeroOut;
+ KnownOne = KnownOneOut;
+ break;
+ case ISD::SELECT:
+ if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ return true;
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ case ISD::SELECT_CC:
+ if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ return true;
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ case ISD::SHL:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+ SDValue InOp = Op.getOperand(0);
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the bottom bits (which are shifted
+ // out) are never demanded.
+ if (InOp.getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(InOp.getOperand(1))) {
+ if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+ unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
+ unsigned Opc = ISD::SHL;
+ int Diff = ShAmt-C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SRL;
+ }
+
+ SDValue NewSA =
+ TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+ MVT VT = Op.getValueType();
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+ InOp.getOperand(0), NewSA));
+ }
+ }
+
+ if (SimplifyDemandedBits(Op.getOperand(0), NewMask.lshr(ShAmt),
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ KnownZero <<= SA->getZExtValue();
+ KnownOne <<= SA->getZExtValue();
+ // low bits known zero.
+ KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getZExtValue());
+ }
+ break;
+ case ISD::SRL:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ MVT VT = Op.getValueType();
+ unsigned ShAmt = SA->getZExtValue();
+ unsigned VTSize = VT.getSizeInBits();
+ SDValue InOp = Op.getOperand(0);
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the top bits (which are shifted out)
+ // are never demanded.
+ if (InOp.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(InOp.getOperand(1))) {
+ if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) {
+ unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
+ unsigned Opc = ISD::SRL;
+ int Diff = ShAmt-C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SHL;
+ }
+
+ SDValue NewSA =
+ TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+ InOp.getOperand(0), NewSA));
+ }
+ }
+
+ // Compute the new bits that are at the top now.
+ if (SimplifyDemandedBits(InOp, (NewMask << ShAmt),
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+ KnownZero |= HighBits; // High bits known zero.
+ }
+ break;
+ case ISD::SRA:
+ // If this is an arithmetic shift right and only the low-bit is set, we can
+ // always convert this into a logical shr, even if the shift amount is
+ // variable. The low bit of the shift cannot be an input sign bit unless
+ // the shift amount is >= the size of the datatype, which is undefined.
+ if (DemandedMask == 1)
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
+ Op.getOperand(0), Op.getOperand(1)));
+
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ MVT VT = Op.getValueType();
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ APInt InDemandedMask = (NewMask << ShAmt);
+
+ // If any of the demanded bits are produced by the sign extension, we also
+ // demand the input sign bit.
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+ if (HighBits.intersects(NewMask))
+ InDemandedMask |= APInt::getSignBit(VT.getSizeInBits());
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ // Handle the sign bit, adjusted to where it is now in the mask.
+ APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt);
+
+ // If the input sign bit is known to be zero, or if none of the top bits
+ // are demanded, turn this into an unsigned shift right.
+ if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
+ Op.getOperand(0),
+ Op.getOperand(1)));
+ } else if (KnownOne.intersects(SignBit)) { // New bits are known one.
+ KnownOne |= HighBits;
+ }
+ }
+ break;
+ case ISD::SIGN_EXTEND_INREG: {
+ MVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+ // Sign extension. Compute the demanded bits in the result that are not
+ // present in the input.
+ APInt NewBits = APInt::getHighBitsSet(BitWidth,
+ BitWidth - EVT.getSizeInBits()) &
+ NewMask;
+
+ // If none of the extended bits are demanded, eliminate the sextinreg.
+ if (NewBits == 0)
+ return TLO.CombineTo(Op, Op.getOperand(0));
+
+ APInt InSignBit = APInt::getSignBit(EVT.getSizeInBits());
+ InSignBit.zext(BitWidth);
+ APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth,
+ EVT.getSizeInBits()) &
+ NewMask;
+
+ // Since the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ InputDemandedBits |= InSignBit;
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+
+ // If the input sign bit is known zero, convert this into a zero extension.
+ if (KnownZero.intersects(InSignBit))
+ return TLO.CombineTo(Op,
+ TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT));
+
+ if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
+ KnownOne |= NewBits;
+ KnownZero &= ~NewBits;
+ } else { // Input sign bit unknown
+ KnownZero &= ~NewBits;
+ KnownOne &= ~NewBits;
+ }
+ break;
+ }
+ case ISD::ZERO_EXTEND: {
+ unsigned OperandBitWidth = Op.getOperand(0).getValueSizeInBits();
+ APInt InMask = NewMask;
+ InMask.trunc(OperandBitWidth);
+
+ // If none of the top bits are demanded, convert this into an any_extend.
+ APInt NewBits =
+ APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask;
+ if (!NewBits.intersects(NewMask))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
+ Op.getValueType(),
+ Op.getOperand(0)));
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero.zext(BitWidth);
+ KnownOne.zext(BitWidth);
+ KnownZero |= NewBits;
+ break;
+ }
+ case ISD::SIGN_EXTEND: {
+ MVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getSizeInBits();
+ APInt InMask = APInt::getLowBitsSet(BitWidth, InBits);
+ APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
+ APInt NewBits = ~InMask & NewMask;
+
+ // If none of the top bits are demanded, convert this into an any_extend.
+ if (NewBits == 0)
+ return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
+ Op.getValueType(),
+ Op.getOperand(0)));
+
+ // Since some of the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ APInt InDemandedBits = InMask & NewMask;
+ InDemandedBits |= InSignBit;
+ InDemandedBits.trunc(InBits);
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ KnownZero.zext(BitWidth);
+ KnownOne.zext(BitWidth);
+
+ // If the sign bit is known zero, convert this to a zero extend.
+ if (KnownZero.intersects(InSignBit))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
+ Op.getValueType(),
+ Op.getOperand(0)));
+
+ // If the sign bit is known one, the top bits match.
+ if (KnownOne.intersects(InSignBit)) {
+ KnownOne |= NewBits;
+ KnownZero &= ~NewBits;
+ } else { // Otherwise, top bits aren't known.
+ KnownOne &= ~NewBits;
+ KnownZero &= ~NewBits;
+ }
+ break;
+ }
+ case ISD::ANY_EXTEND: {
+ unsigned OperandBitWidth = Op.getOperand(0).getValueSizeInBits();
+ APInt InMask = NewMask;
+ InMask.trunc(OperandBitWidth);
+ if (SimplifyDemandedBits(Op.getOperand(0), InMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero.zext(BitWidth);
+ KnownOne.zext(BitWidth);
+ break;
+ }
+ case ISD::TRUNCATE: {
+ // Simplify the input, using demanded bit information, and compute the known
+ // zero/one bits live out.
+ APInt TruncMask = NewMask;
+ TruncMask.zext(Op.getOperand(0).getValueSizeInBits());
+ if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ KnownZero.trunc(BitWidth);
+ KnownOne.trunc(BitWidth);
+
+ // If the input is only used by this truncate, see if we can shrink it based
+ // on the known demanded bits.
+ if (Op.getOperand(0).getNode()->hasOneUse()) {
+ SDValue In = Op.getOperand(0);
+ unsigned InBitWidth = In.getValueSizeInBits();
+ switch (In.getOpcode()) {
+ default: break;
+ case ISD::SRL:
+ // Shrink SRL by a constant if none of the high bits shifted in are
+ // demanded.
+ if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){
+ APInt HighBits = APInt::getHighBitsSet(InBitWidth,
+ InBitWidth - BitWidth);
+ HighBits = HighBits.lshr(ShAmt->getZExtValue());
+ HighBits.trunc(BitWidth);
+
+ if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
+ // None of the shifted in bits are needed. Add a truncate of the
+ // shift input, then shift it.
+ SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
+ Op.getValueType(),
+ In.getOperand(0));
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
+ Op.getValueType(),
+ NewTrunc,
+ In.getOperand(1)));
+ }
+ }
+ break;
+ }
+ }
+
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ break;
+ }
+ case ISD::AssertZext: {
+ MVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ APInt InMask = APInt::getLowBitsSet(BitWidth,
+ VT.getSizeInBits());
+ if (SimplifyDemandedBits(Op.getOperand(0), InMask & NewMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero |= ~InMask & NewMask;
+ break;
+ }
+ case ISD::BIT_CONVERT:
+#if 0
+ // If this is an FP->Int bitcast and if the sign bit is the only thing that
+ // is demanded, turn this into a FGETSIGN.
+ if (NewMask == MVT::getIntegerVTSignBit(Op.getValueType()) &&
+ MVT::isFloatingPoint(Op.getOperand(0).getValueType()) &&
+ !MVT::isVector(Op.getOperand(0).getValueType())) {
+ // Only do this xform if FGETSIGN is valid or if before legalize.
+ if (!TLO.AfterLegalize ||
+ isOperationLegal(ISD::FGETSIGN, Op.getValueType())) {
+ // Make a FGETSIGN + SHL to move the sign bit into the appropriate
+ // place. We expect the SHL to be eliminated by other optimizations.
+ SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(),
+ Op.getOperand(0));
+ unsigned ShVal = Op.getValueType().getSizeInBits()-1;
+ SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, Op.getValueType(),
+ Sign, ShAmt));
+ }
+ }
+#endif
+ break;
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::SUB: {
+ // Add, Sub, and Mul don't demand any bits in positions beyond that
+ // of the highest bit demanded of them.
+ APInt LoMask = APInt::getLowBitsSet(BitWidth,
+ BitWidth - NewMask.countLeadingZeros());
+ if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ // See if the operation should be performed at a smaller bit width.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+ }
+ // FALL THROUGH
+ default:
+ // Just use ComputeMaskedBits to compute output bits.
+ TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth);
+ break;
+ }
+
+ // If we know the value of all of the demanded bits, return this as a
+ // constant.
+ if ((NewMask & (KnownZero|KnownOne)) == NewMask)
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));
+
+ return false;
+}
+
+/// computeMaskedBitsForTargetNode - Determine which of the bits specified
+/// in Mask are known to be either zero or one and return them in the
+/// KnownZero/KnownOne bitsets.
+void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use MaskedValueIsZero if you don't know whether Op"
+ " is a target node!");
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+}
+
+/// ComputeNumSignBitsForTargetNode - This method can be implemented by
+/// targets that want to expose additional information about sign bits to the
+/// DAG Combiner.
+unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use ComputeNumSignBits if you don't know whether Op"
+ " is a target node!");
+ return 1;
+}
+
+/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly
+/// one bit set. This differs from ComputeMaskedBits in that it doesn't need to
+/// determine which bit is set.
+///
+static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
+ // A left-shift of a constant one will have exactly one bit set, because
+ // shifting the bit off the end is undefined.
+ if (Val.getOpcode() == ISD::SHL)
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))
+ if (C->getAPIntValue() == 1)
+ return true;
+
+ // Similarly, a right-shift of a constant sign-bit will have exactly
+ // one bit set.
+ if (Val.getOpcode() == ISD::SRL)
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))
+ if (C->getAPIntValue().isSignBit())
+ return true;
+
+ // More could be done here, though the above checks are enough
+ // to handle some common cases.
+
+ // Fall back to ComputeMaskedBits to catch other known cases.
+ MVT OpVT = Val.getValueType();
+ unsigned BitWidth = OpVT.getSizeInBits();
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero, KnownOne;
+ DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne);
+ return (KnownZero.countPopulation() == BitWidth - 1) &&
+ (KnownOne.countPopulation() == 1);
+}
+
+/// SimplifySetCC - Try to simplify a setcc built with the specified operands
+/// and cc. If it is unable to simplify it, return a null SDValue.
+SDValue
+TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, bool foldBooleans,
+ DAGCombinerInfo &DCI, DebugLoc dl) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ // These setcc operations always fold.
+ switch (Cond) {
+ default: break;
+ case ISD::SETFALSE:
+ case ISD::SETFALSE2: return DAG.getConstant(0, VT);
+ case ISD::SETTRUE:
+ case ISD::SETTRUE2: return DAG.getConstant(1, VT);
+ }
+
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const APInt &C1 = N1C->getAPIntValue();
+ if (isa<ConstantSDNode>(N0.getNode())) {
+ return DAG.FoldSetCC(VT, N0, N1, Cond, dl);
+ } else {
+ // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
+ // equality comparison, then we're just comparing whether X itself is
+ // zero.
+ if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
+ N0.getOperand(0).getOpcode() == ISD::CTLZ &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ ShAmt == Log2_32(N0.getValueType().getSizeInBits())) {
+ if ((C1 == 0) == (Cond == ISD::SETEQ)) {
+ // (srl (ctlz x), 5) == 0 -> X != 0
+ // (srl (ctlz x), 5) != 1 -> X != 0
+ Cond = ISD::SETNE;
+ } else {
+ // (srl (ctlz x), 5) != 0 -> X == 0
+ // (srl (ctlz x), 5) == 1 -> X == 0
+ Cond = ISD::SETEQ;
+ }
+ SDValue Zero = DAG.getConstant(0, N0.getValueType());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
+ Zero, Cond);
+ }
+ }
+
+ // If the LHS is '(and load, const)', the RHS is 0,
+ // the test is for equality or unsigned, and all 1 bits of the const are
+ // in the same partial word, see if we can shorten the load.
+ if (DCI.isBeforeLegalize() &&
+ N0.getOpcode() == ISD::AND && C1 == 0 &&
+ N0.getNode()->hasOneUse() &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(0).getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
+ uint64_t bestMask = 0;
+ unsigned bestWidth = 0, bestOffset = 0;
+ if (!Lod->isVolatile() && Lod->isUnindexed() &&
+ // FIXME: This uses getZExtValue() below so it only works on i64 and
+ // below.
+ N0.getValueType().getSizeInBits() <= 64) {
+ unsigned origWidth = N0.getValueType().getSizeInBits();
+ // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
+ // 8 bits, but have to be careful...
+ if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
+ origWidth = Lod->getMemoryVT().getSizeInBits();
+ uint64_t Mask =cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ for (unsigned width = origWidth / 2; width>=8; width /= 2) {
+ uint64_t newMask = (1ULL << width) - 1;
+ for (unsigned offset=0; offset<origWidth/width; offset++) {
+ if ((newMask & Mask) == Mask) {
+ if (!TD->isLittleEndian())
+ bestOffset = (origWidth/width - offset - 1) * (width/8);
+ else
+ bestOffset = (uint64_t)offset * (width/8);
+ bestMask = Mask >> (offset * (width/8) * 8);
+ bestWidth = width;
+ break;
+ }
+ newMask = newMask << width;
+ }
+ }
+ }
+ if (bestWidth) {
+ MVT newVT = MVT::getIntegerVT(bestWidth);
+ if (newVT.isRound()) {
+ MVT PtrType = Lod->getOperand(1).getValueType();
+ SDValue Ptr = Lod->getBasePtr();
+ if (bestOffset != 0)
+ Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
+ DAG.getConstant(bestOffset, PtrType));
+ unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
+ SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
+ Lod->getSrcValue(),
+ Lod->getSrcValueOffset() + bestOffset,
+ false, NewAlign);
+ return DAG.getSetCC(dl, VT,
+ DAG.getNode(ISD::AND, dl, newVT, NewLoad,
+ DAG.getConstant(bestMask, newVT)),
+ DAG.getConstant(0LL, newVT), Cond);
+ }
+ }
+ }
+
+ // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
+ if (N0.getOpcode() == ISD::ZERO_EXTEND) {
+ unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits();
+
+ // If the comparison constant has bits in the upper part, the
+ // zero-extended value could never match.
+ if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
+ C1.getBitWidth() - InSize))) {
+ switch (Cond) {
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETEQ: return DAG.getConstant(0, VT);
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETNE: return DAG.getConstant(1, VT);
+ case ISD::SETGT:
+ case ISD::SETGE:
+ // True if the sign bit of C1 is set.
+ return DAG.getConstant(C1.isNegative(), VT);
+ case ISD::SETLT:
+ case ISD::SETLE:
+ // True if the sign bit of C1 isn't set.
+ return DAG.getConstant(C1.isNonNegative(), VT);
+ default:
+ break;
+ }
+ }
+
+ // Otherwise, we can perform the comparison with the low bits.
+ switch (Cond) {
+ case ISD::SETEQ:
+ case ISD::SETNE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(APInt(C1).trunc(InSize),
+ N0.getOperand(0).getValueType()),
+ Cond);
+ default:
+ break; // todo, be more careful with signed comparisons
+ }
+ } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ MVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
+ unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
+ MVT ExtDstTy = N0.getValueType();
+ unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
+
+ // If the extended part has any inconsistent bits, it cannot ever
+ // compare equal. In other words, they have to be all ones or all
+ // zeros.
+ APInt ExtBits =
+ APInt::getHighBitsSet(ExtDstTyBits, ExtDstTyBits - ExtSrcTyBits);
+ if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits)
+ return DAG.getConstant(Cond == ISD::SETNE, VT);
+
+ SDValue ZextOp;
+ MVT Op0Ty = N0.getOperand(0).getValueType();
+ if (Op0Ty == ExtSrcTy) {
+ ZextOp = N0.getOperand(0);
+ } else {
+ APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
+ ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
+ DAG.getConstant(Imm, Op0Ty));
+ }
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(ZextOp.getNode());
+ // Otherwise, make this a use of a zext.
+ return DAG.getSetCC(dl, VT, ZextOp,
+ DAG.getConstant(C1 & APInt::getLowBitsSet(
+ ExtDstTyBits,
+ ExtSrcTyBits),
+ ExtDstTy),
+ Cond);
+ } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+
+ // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
+ if (N0.getOpcode() == ISD::SETCC) {
+ bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getZExtValue() != 1);
+ if (TrueWhenTrue)
+ return N0;
+
+ // Invert the condition.
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ CC = ISD::getSetCCInverse(CC,
+ N0.getOperand(0).getValueType().isInteger());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
+ }
+
+ if ((N0.getOpcode() == ISD::XOR ||
+ (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::XOR &&
+ N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
+ isa<ConstantSDNode>(N0.getOperand(1)) &&
+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) {
+ // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
+ // can only do this if the top bits are known zero.
+ unsigned BitWidth = N0.getValueSizeInBits();
+ if (DAG.MaskedValueIsZero(N0,
+ APInt::getHighBitsSet(BitWidth,
+ BitWidth-1))) {
+ // Okay, get the un-inverted input value.
+ SDValue Val;
+ if (N0.getOpcode() == ISD::XOR)
+ Val = N0.getOperand(0);
+ else {
+ assert(N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::XOR);
+ // ((X^1)&1)^1 -> X & 1
+ Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
+ N0.getOperand(0).getOperand(0),
+ N0.getOperand(1));
+ }
+ return DAG.getSetCC(dl, VT, Val, N1,
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ }
+ }
+ }
+
+ APInt MinVal, MaxVal;
+ unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
+ if (ISD::isSignedIntSetCC(Cond)) {
+ MinVal = APInt::getSignedMinValue(OperandBitSize);
+ MaxVal = APInt::getSignedMaxValue(OperandBitSize);
+ } else {
+ MinVal = APInt::getMinValue(OperandBitSize);
+ MaxVal = APInt::getMaxValue(OperandBitSize);
+ }
+
+ // Canonicalize GE/LE comparisons to use GT/LT comparisons.
+ if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
+ if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true
+ // X >= C0 --> X > (C0-1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C1-1, N1.getValueType()),
+ (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
+ }
+
+ if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
+ if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true
+ // X <= C0 --> X < (C0+1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C1+1, N1.getValueType()),
+ (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
+ }
+
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
+ return DAG.getConstant(0, VT); // X < MIN --> false
+ if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)
+ return DAG.getConstant(1, VT); // X >= MIN --> true
+ if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)
+ return DAG.getConstant(0, VT); // X > MAX --> false
+ if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)
+ return DAG.getConstant(1, VT); // X <= MAX --> true
+
+ // Canonicalize setgt X, Min --> setne X, Min
+ if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+ // Canonicalize setlt X, Max --> setne X, Max
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+
+ // If we have setult X, 1, turn it into seteq X, 0
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MinVal, N0.getValueType()),
+ ISD::SETEQ);
+ // If we have setugt X, Max-1, turn it into seteq X, Max
+ else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MaxVal, N0.getValueType()),
+ ISD::SETEQ);
+
+ // If we have "setcc X, C0", check to see if we can shrink the immediate
+ // by changing cc.
+
+ // SETUGT X, SINTMAX -> SETLT X, 0
+ if (Cond == ISD::SETUGT &&
+ C1 == APInt::getSignedMaxValue(OperandBitSize))
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(0, N1.getValueType()),
+ ISD::SETLT);
+
+ // SETULT X, SINTMIN -> SETGT X, -1
+ if (Cond == ISD::SETULT &&
+ C1 == APInt::getSignedMinValue(OperandBitSize)) {
+ SDValue ConstMinusOne =
+ DAG.getConstant(APInt::getAllOnesValue(OperandBitSize),
+ N1.getValueType());
+ return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
+ }
+
+ // Fold bit comparisons when we can.
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ VT == N0.getValueType() && N0.getOpcode() == ISD::AND)
+ if (ConstantSDNode *AndRHS =
+ dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ MVT ShiftTy = DCI.isBeforeLegalize() ?
+ getPointerTy() : getShiftAmountTy();
+ if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
+ // Perform the xform if the AND RHS is a single bit.
+ if (isPowerOf2_64(AndRHS->getZExtValue())) {
+ return DAG.getNode(ISD::SRL, dl, VT, N0,
+ DAG.getConstant(Log2_64(AndRHS->getZExtValue()),
+ ShiftTy));
+ }
+ } else if (Cond == ISD::SETEQ && C1 == AndRHS->getZExtValue()) {
+ // (X & 8) == 8 --> (X & 8) >> 3
+ // Perform the xform if C1 is a single bit.
+ if (C1.isPowerOf2()) {
+ return DAG.getNode(ISD::SRL, dl, VT, N0,
+ DAG.getConstant(C1.logBase2(), ShiftTy));
+ }
+ }
+ }
+ }
+ } else if (isa<ConstantSDNode>(N0.getNode())) {
+ // Ensure that the constant occurs on the RHS.
+ return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
+ }
+
+ if (isa<ConstantFPSDNode>(N0.getNode())) {
+ // Constant fold or commute setcc.
+ SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl);
+ if (O.getNode()) return O;
+ } else if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
+ // If the RHS of an FP comparison is a constant, simplify it away in
+ // some cases.
+ if (CFP->getValueAPF().isNaN()) {
+ // If an operand is known to be a nan, we can fold it.
+ switch (ISD::getUnorderedFlavor(Cond)) {
+ default: assert(0 && "Unknown flavor!");
+ case 0: // Known false.
+ return DAG.getConstant(0, VT);
+ case 1: // Known true.
+ return DAG.getConstant(1, VT);
+ case 2: // Undefined.
+ return DAG.getUNDEF(VT);
+ }
+ }
+
+ // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
+ // constant if knowing that the operand is non-nan is enough. We prefer to
+ // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
+ // materialize 0.0.
+ if (Cond == ISD::SETO || Cond == ISD::SETUO)
+ return DAG.getSetCC(dl, VT, N0, N0, Cond);
+ }
+
+ if (N0 == N1) {
+ // We can always fold X == X for integer setcc's.
+ if (N0.getValueType().isInteger())
+ return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+ unsigned UOF = ISD::getUnorderedFlavor(Cond);
+ if (UOF == 2) // FP operators that are undefined on NaNs.
+ return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+ if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
+ return DAG.getConstant(UOF, VT);
+ // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
+ // if it is not already.
+ ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
+ if (NewCond != Cond)
+ return DAG.getSetCC(dl, VT, N0, N1, NewCond);
+ }
+
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getValueType().isInteger()) {
+ if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
+ N0.getOpcode() == ISD::XOR) {
+ // Simplify (X+Y) == (X+Z) --> Y == Z
+ if (N0.getOpcode() == N1.getOpcode()) {
+ if (N0.getOperand(0) == N1.getOperand(0))
+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
+ if (N0.getOperand(1) == N1.getOperand(1))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
+ if (DAG.isCommutativeBinOp(N0.getOpcode())) {
+ // If X op Y == Y op X, try other combinations.
+ if (N0.getOperand(0) == N1.getOperand(1))
+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
+ Cond);
+ if (N0.getOperand(1) == N1.getOperand(0))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
+ Cond);
+ }
+ }
+
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
+ if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ // Turn (X+C1) == C2 --> X == C2-C1
+ if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(RHSC->getAPIntValue()-
+ LHSR->getAPIntValue(),
+ N0.getValueType()), Cond);
+ }
+
+ // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
+ if (N0.getOpcode() == ISD::XOR)
+ // If we know that all of the inverted bits are zero, don't bother
+ // performing the inversion.
+ if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
+ return
+ DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(LHSR->getAPIntValue() ^
+ RHSC->getAPIntValue(),
+ N0.getValueType()),
+ Cond);
+ }
+
+ // Turn (C1-X) == C2 --> X == C1-C2
+ if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
+ if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
+ return
+ DAG.getSetCC(dl, VT, N0.getOperand(1),
+ DAG.getConstant(SUBC->getAPIntValue() -
+ RHSC->getAPIntValue(),
+ N0.getValueType()),
+ Cond);
+ }
+ }
+ }
+
+ // Simplify (X+Z) == X --> Z == 0
+ if (N0.getOperand(0) == N1)
+ return DAG.getSetCC(dl, VT, N0.getOperand(1),
+ DAG.getConstant(0, N0.getValueType()), Cond);
+ if (N0.getOperand(1) == N1) {
+ if (DAG.isCommutativeBinOp(N0.getOpcode()))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(0, N0.getValueType()), Cond);
+ else if (N0.getNode()->hasOneUse()) {
+ assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
+ // (Z-X) == X --> Z == X<<1
+ SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),
+ N1,
+ DAG.getConstant(1, getShiftAmountTy()));
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(SH.getNode());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+ }
+ }
+ }
+
+ if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
+ N1.getOpcode() == ISD::XOR) {
+ // Simplify X == (X+Z) --> Z == 0
+ if (N1.getOperand(0) == N0) {
+ return DAG.getSetCC(dl, VT, N1.getOperand(1),
+ DAG.getConstant(0, N1.getValueType()), Cond);
+ } else if (N1.getOperand(1) == N0) {
+ if (DAG.isCommutativeBinOp(N1.getOpcode())) {
+ return DAG.getSetCC(dl, VT, N1.getOperand(0),
+ DAG.getConstant(0, N1.getValueType()), Cond);
+ } else if (N1.getNode()->hasOneUse()) {
+ assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
+ // X == (Z-X) --> X<<1 == Z
+ SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
+ DAG.getConstant(1, getShiftAmountTy()));
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(SH.getNode());
+ return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
+ }
+ }
+ }
+
+ // Simplify x&y == y to x&y != 0 if y has exactly one bit set.
+ // Note that where y is variable and is known to have at most
+ // one bit set (for example, if it is z&1) we cannot do this;
+ // the expressions are not equivalent when y==0.
+ if (N0.getOpcode() == ISD::AND)
+ if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) {
+ if (ValueHasExactlyOneBitSet(N1, DAG)) {
+ Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+ SDValue Zero = DAG.getConstant(0, N1.getValueType());
+ return DAG.getSetCC(dl, VT, N0, Zero, Cond);
+ }
+ }
+ if (N1.getOpcode() == ISD::AND)
+ if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) {
+ if (ValueHasExactlyOneBitSet(N0, DAG)) {
+ Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+ SDValue Zero = DAG.getConstant(0, N0.getValueType());
+ return DAG.getSetCC(dl, VT, N1, Zero, Cond);
+ }
+ }
+ }
+
+ // Fold away ALL boolean setcc's.
+ SDValue Temp;
+ if (N0.getValueType() == MVT::i1 && foldBooleans) {
+ switch (Cond) {
+ default: assert(0 && "Unknown integer setcc!");
+ case ISD::SETEQ: // X == Y -> ~(X^Y)
+ Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
+ N0 = DAG.getNOT(dl, Temp, MVT::i1);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETNE: // X != Y --> (X^Y)
+ N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
+ break;
+ case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
+ case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
+ Temp = DAG.getNOT(dl, N0, MVT::i1);
+ N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
+ case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
+ Temp = DAG.getNOT(dl, N1, MVT::i1);
+ N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
+ case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
+ Temp = DAG.getNOT(dl, N0, MVT::i1);
+ N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
+ case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
+ Temp = DAG.getNOT(dl, N1, MVT::i1);
+ N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp);
+ break;
+ }
+ if (VT != MVT::i1) {
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(N0.getNode());
+ // FIXME: If running after legalize, we probably can't do this.
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0);
+ }
+ return N0;
+ }
+
+ // Could not fold it.
+ return SDValue();
+}
+
+/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
+/// node is a GlobalAddress + offset.
+bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA,
+ int64_t &Offset) const {
+ if (isa<GlobalAddressSDNode>(N)) {
+ GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
+ GA = GASD->getGlobal();
+ Offset += GASD->getOffset();
+ return true;
+ }
+
+ if (N->getOpcode() == ISD::ADD) {
+ SDValue N1 = N->getOperand(0);
+ SDValue N2 = N->getOperand(1);
+ if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
+ if (V) {
+ Offset += V->getSExtValue();
+ return true;
+ }
+ } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
+ if (V) {
+ Offset += V->getSExtValue();
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
+/// isConsecutiveLoad - Return true if LD (which must be a LoadSDNode) is
+/// loading 'Bytes' bytes from a location that is 'Dist' units away from the
+/// location that the 'Base' load is loading from.
+bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base,
+ unsigned Bytes, int Dist,
+ const MachineFrameInfo *MFI) const {
+ if (LD->getOperand(0).getNode() != Base->getOperand(0).getNode())
+ return false;
+ MVT VT = LD->getValueType(0);
+ if (VT.getSizeInBits() / 8 != Bytes)
+ return false;
+
+ SDValue Loc = LD->getOperand(1);
+ SDValue BaseLoc = Base->getOperand(1);
+ if (Loc.getOpcode() == ISD::FrameIndex) {
+ if (BaseLoc.getOpcode() != ISD::FrameIndex)
+ return false;
+ int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
+ int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+ int FS = MFI->getObjectSize(FI);
+ int BFS = MFI->getObjectSize(BFI);
+ if (FS != BFS || FS != (int)Bytes) return false;
+ return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+ }
+
+ GlobalValue *GV1 = NULL;
+ GlobalValue *GV2 = NULL;
+ int64_t Offset1 = 0;
+ int64_t Offset2 = 0;
+ bool isGA1 = isGAPlusOffset(Loc.getNode(), GV1, Offset1);
+ bool isGA2 = isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
+ if (isGA1 && isGA2 && GV1 == GV2)
+ return Offset1 == (Offset2 + Dist*Bytes);
+ return false;
+}
+
+
+SDValue TargetLowering::
+PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+ // Default implementation: no optimization.
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembler Implementation Methods
+//===----------------------------------------------------------------------===//
+
+
+TargetLowering::ConstraintType
+TargetLowering::getConstraintType(const std::string &Constraint) const {
+ // FIXME: lots more standard ones to handle.
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'r': return C_RegisterClass;
+ case 'm': // memory
+ case 'o': // offsetable
+ case 'V': // not offsetable
+ return C_Memory;
+ case 'i': // Simple Integer or Relocatable Constant
+ case 'n': // Simple Integer
+ case 's': // Relocatable Constant
+ case 'X': // Allow ANY value.
+ case 'I': // Target registers.
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ return C_Other;
+ }
+ }
+
+ if (Constraint.size() > 1 && Constraint[0] == '{' &&
+ Constraint[Constraint.size()-1] == '}')
+ return C_Register;
+ return C_Unknown;
+}
+
+/// LowerXConstraint - try to replace an X constraint, which matches anything,
+/// with another that has more specific requirements based on the type of the
+/// corresponding operand.
+const char *TargetLowering::LowerXConstraint(MVT ConstraintVT) const{
+ if (ConstraintVT.isInteger())
+ return "r";
+ if (ConstraintVT.isFloatingPoint())
+ return "f"; // works for many targets
+ return 0;
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ char ConstraintLetter,
+ bool hasMemory,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+ switch (ConstraintLetter) {
+ default: break;
+ case 'X': // Allows any operand; labels (basic block) use this.
+ if (Op.getOpcode() == ISD::BasicBlock) {
+ Ops.push_back(Op);
+ return;
+ }
+ // fall through
+ case 'i': // Simple Integer or Relocatable Constant
+ case 'n': // Simple Integer
+ case 's': { // Relocatable Constant
+ // These operands are interested in values of the form (GV+C), where C may
+ // be folded in as an offset of GV, or it may be explicitly added. Also, it
+ // is possible and fine if either GV or C are missing.
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
+
+ // If we have "(add GV, C)", pull out GV/C
+ if (Op.getOpcode() == ISD::ADD) {
+ C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
+ if (C == 0 || GA == 0) {
+ C = dyn_cast<ConstantSDNode>(Op.getOperand(0));
+ GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));
+ }
+ if (C == 0 || GA == 0)
+ C = 0, GA = 0;
+ }
+
+ // If we find a valid operand, map to the TargetXXX version so that the
+ // value itself doesn't get selected.
+ if (GA) { // Either &GV or &GV+C
+ if (ConstraintLetter != 'n') {
+ int64_t Offs = GA->getOffset();
+ if (C) Offs += C->getZExtValue();
+ Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
+ Op.getValueType(), Offs));
+ return;
+ }
+ }
+ if (C) { // just C, no GV.
+ // Simple constants are not allowed for 's'.
+ if (ConstraintLetter != 's') {
+ // gcc prints these as sign extended. Sign extend value to 64 bits
+ // now; without this it would get ZExt'd later in
+ // ScheduleDAGSDNodes::EmitNode, which is very generic.
+ Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(),
+ MVT::i64));
+ return;
+ }
+ }
+ break;
+ }
+ }
+}
+
+std::vector<unsigned> TargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const {
+ return std::vector<unsigned>();
+}
+
+
+std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const {
+ if (Constraint[0] != '{')
+ return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
+ assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
+
+ // Remove the braces from around the name.
+ std::string RegName(Constraint.begin()+1, Constraint.end()-1);
+
+ // Figure out which register class contains this reg.
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
+ E = RI->regclass_end(); RCI != E; ++RCI) {
+ const TargetRegisterClass *RC = *RCI;
+
+ // If none of the the value types for this register class are valid, we
+ // can't use it. For example, 64-bit reg classes on 32-bit targets.
+ bool isLegal = false;
+ for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+ I != E; ++I) {
+ if (isTypeLegal(*I)) {
+ isLegal = true;
+ break;
+ }
+ }
+
+ if (!isLegal) continue;
+
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I) {
+ if (StringsEqualNoCase(RegName, RI->get(*I).AsmName))
+ return std::make_pair(*I, RC);
+ }
+ }
+
+ return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Constraint Selection.
+
+/// isMatchingInputConstraint - Return true of this is an input operand that is
+/// a matching constraint like "4".
+bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
+ assert(!ConstraintCode.empty() && "No known constraint!");
+ return isdigit(ConstraintCode[0]);
+}
+
+/// getMatchedOperand - If this is an input matching constraint, this method
+/// returns the output operand it matches.
+unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
+ assert(!ConstraintCode.empty() && "No known constraint!");
+ return atoi(ConstraintCode.c_str());
+}
+
+
+/// getConstraintGenerality - Return an integer indicating how general CT
+/// is.
+static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
+ switch (CT) {
+ default: assert(0 && "Unknown constraint type!");
+ case TargetLowering::C_Other:
+ case TargetLowering::C_Unknown:
+ return 0;
+ case TargetLowering::C_Register:
+ return 1;
+ case TargetLowering::C_RegisterClass:
+ return 2;
+ case TargetLowering::C_Memory:
+ return 3;
+ }
+}
+
+/// ChooseConstraint - If there are multiple different constraints that we
+/// could pick for this operand (e.g. "imr") try to pick the 'best' one.
+/// This is somewhat tricky: constraints fall into four classes:
+/// Other -> immediates and magic values
+/// Register -> one specific register
+/// RegisterClass -> a group of regs
+/// Memory -> memory
+/// Ideally, we would pick the most specific constraint possible: if we have
+/// something that fits into a register, we would pick it. The problem here
+/// is that if we have something that could either be in a register or in
+/// memory that use of the register could cause selection of *other*
+/// operands to fail: they might only succeed if we pick memory. Because of
+/// this the heuristic we use is:
+///
+/// 1) If there is an 'other' constraint, and if the operand is valid for
+/// that constraint, use it. This makes us take advantage of 'i'
+/// constraints when available.
+/// 2) Otherwise, pick the most general constraint present. This prefers
+/// 'm' over 'r', for example.
+///
+static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
+ bool hasMemory, const TargetLowering &TLI,
+ SDValue Op, SelectionDAG *DAG) {
+ assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
+ unsigned BestIdx = 0;
+ TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
+ int BestGenerality = -1;
+
+ // Loop over the options, keeping track of the most general one.
+ for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
+ TargetLowering::ConstraintType CType =
+ TLI.getConstraintType(OpInfo.Codes[i]);
+
+ // If this is an 'other' constraint, see if the operand is valid for it.
+ // For example, on X86 we might have an 'rI' constraint. If the operand
+ // is an integer in the range [0..31] we want to use I (saving a load
+ // of a register), otherwise we must use 'r'.
+ if (CType == TargetLowering::C_Other && Op.getNode()) {
+ assert(OpInfo.Codes[i].size() == 1 &&
+ "Unhandled multi-letter 'other' constraint");
+ std::vector<SDValue> ResultOps;
+ TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0], hasMemory,
+ ResultOps, *DAG);
+ if (!ResultOps.empty()) {
+ BestType = CType;
+ BestIdx = i;
+ break;
+ }
+ }
+
+ // This constraint letter is more general than the previous one, use it.
+ int Generality = getConstraintGenerality(CType);
+ if (Generality > BestGenerality) {
+ BestType = CType;
+ BestIdx = i;
+ BestGenerality = Generality;
+ }
+ }
+
+ OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
+ OpInfo.ConstraintType = BestType;
+}
+
+/// ComputeConstraintToUse - Determines the constraint code and constraint
+/// type to use for the specific AsmOperandInfo, setting
+/// OpInfo.ConstraintCode and OpInfo.ConstraintType.
+void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
+ SDValue Op,
+ bool hasMemory,
+ SelectionDAG *DAG) const {
+ assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
+
+ // Single-letter constraints ('r') are very common.
+ if (OpInfo.Codes.size() == 1) {
+ OpInfo.ConstraintCode = OpInfo.Codes[0];
+ OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+ } else {
+ ChooseConstraint(OpInfo, hasMemory, *this, Op, DAG);
+ }
+
+ // 'X' matches anything.
+ if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
+ // Labels and constants are handled elsewhere ('X' is the only thing
+ // that matches labels).
+ if (isa<BasicBlock>(OpInfo.CallOperandVal) ||
+ isa<ConstantInt>(OpInfo.CallOperandVal))
+ return;
+
+ // Otherwise, try to resolve it to something we know about by looking at
+ // the actual operand type.
+ if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
+ OpInfo.ConstraintCode = Repl;
+ OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Loop Strength Reduction hooks
+//===----------------------------------------------------------------------===//
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const {
+ // The default implementation of this implements a conservative RISCy, r+r and
+ // r+i addr mode.
+
+ // Allows a sign-extended 16-bit immediate field.
+ if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+ return false;
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ // Only support r+r,
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
+ return false;
+ // Otherwise we have r+r or r+i.
+ break;
+ case 2:
+ if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
+ return false;
+ // Allow 2*r as r+r.
+ break;
+ }
+
+ return true;
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
+ std::vector<SDNode*>* Created) const {
+ MVT VT = N->getValueType(0);
+ DebugLoc dl= N->getDebugLoc();
+
+ // Check to see if we can do this.
+ // FIXME: We should be more aggressive here.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+ APInt::ms magics = d.magic();
+
+ // Multiply the numerator (operand 0) by the magic value
+ // FIXME: We should support doing a MUL in a wider type
+ SDValue Q;
+ if (isOperationLegalOrCustom(ISD::MULHS, VT))
+ Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0),
+ DAG.getConstant(magics.m, VT));
+ else if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
+ Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),
+ N->getOperand(0),
+ DAG.getConstant(magics.m, VT)).getNode(), 1);
+ else
+ return SDValue(); // No mulhs or equvialent
+ // If d > 0 and m < 0, add the numerator
+ if (d.isStrictlyPositive() && magics.m.isNegative()) {
+ Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
+ if (Created)
+ Created->push_back(Q.getNode());
+ }
+ // If d < 0 and m > 0, subtract the numerator.
+ if (d.isNegative() && magics.m.isStrictlyPositive()) {
+ Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0));
+ if (Created)
+ Created->push_back(Q.getNode());
+ }
+ // Shift right algebraic if shift value is nonzero
+ if (magics.s > 0) {
+ Q = DAG.getNode(ISD::SRA, dl, VT, Q,
+ DAG.getConstant(magics.s, getShiftAmountTy()));
+ if (Created)
+ Created->push_back(Q.getNode());
+ }
+ // Extract the sign bit and add it to the quotient
+ SDValue T =
+ DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1,
+ getShiftAmountTy()));
+ if (Created)
+ Created->push_back(T.getNode());
+ return DAG.getNode(ISD::ADD, dl, VT, Q, T);
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
+ std::vector<SDNode*>* Created) const {
+ MVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // Check to see if we can do this.
+ // FIXME: We should be more aggressive here.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ // FIXME: We should use a narrower constant when the upper
+ // bits are known to be zero.
+ ConstantSDNode *N1C = cast<ConstantSDNode>(N->getOperand(1));
+ APInt::mu magics = N1C->getAPIntValue().magicu();
+
+ // Multiply the numerator (operand 0) by the magic value
+ // FIXME: We should support doing a MUL in a wider type
+ SDValue Q;
+ if (isOperationLegalOrCustom(ISD::MULHU, VT))
+ Q = DAG.getNode(ISD::MULHU, dl, VT, N->getOperand(0),
+ DAG.getConstant(magics.m, VT));
+ else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
+ Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT),
+ N->getOperand(0),
+ DAG.getConstant(magics.m, VT)).getNode(), 1);
+ else
+ return SDValue(); // No mulhu or equvialent
+ if (Created)
+ Created->push_back(Q.getNode());
+
+ if (magics.a == 0) {
+ assert(magics.s < N1C->getAPIntValue().getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ return DAG.getNode(ISD::SRL, dl, VT, Q,
+ DAG.getConstant(magics.s, getShiftAmountTy()));
+ } else {
+ SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
+ if (Created)
+ Created->push_back(NPQ.getNode());
+ NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
+ DAG.getConstant(1, getShiftAmountTy()));
+ if (Created)
+ Created->push_back(NPQ.getNode());
+ NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
+ if (Created)
+ Created->push_back(NPQ.getNode());
+ return DAG.getNode(ISD::SRL, dl, VT, NPQ,
+ DAG.getConstant(magics.s-1, getShiftAmountTy()));
+ }
+}
+
+/// IgnoreHarmlessInstructions - Ignore instructions between a CALL and RET
+/// node that don't prevent tail call optimization.
+static SDValue IgnoreHarmlessInstructions(SDValue node) {
+ // Found call return.
+ if (node.getOpcode() == ISD::CALL) return node;
+ // Ignore MERGE_VALUES. Will have at least one operand.
+ if (node.getOpcode() == ISD::MERGE_VALUES)
+ return IgnoreHarmlessInstructions(node.getOperand(0));
+ // Ignore ANY_EXTEND node.
+ if (node.getOpcode() == ISD::ANY_EXTEND)
+ return IgnoreHarmlessInstructions(node.getOperand(0));
+ if (node.getOpcode() == ISD::TRUNCATE)
+ return IgnoreHarmlessInstructions(node.getOperand(0));
+ // Any other node type.
+ return node;
+}
+
+bool TargetLowering::CheckTailCallReturnConstraints(CallSDNode *TheCall,
+ SDValue Ret) {
+ unsigned NumOps = Ret.getNumOperands();
+ // ISD::CALL results:(value0, ..., valuen, chain)
+ // ISD::RET operands:(chain, value0, flag0, ..., valuen, flagn)
+ // Value return:
+ // Check that operand of the RET node sources from the CALL node. The RET node
+ // has at least two operands. Operand 0 holds the chain. Operand 1 holds the
+ // value.
+ if (NumOps > 1 &&
+ IgnoreHarmlessInstructions(Ret.getOperand(1)) == SDValue(TheCall,0))
+ return true;
+ // void return: The RET node has the chain result value of the CALL node as
+ // input.
+ if (NumOps == 1 &&
+ Ret.getOperand(0) == SDValue(TheCall, TheCall->getNumValues()-1))
+ return true;
+
+ return false;
+}
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
new file mode 100644
index 0000000..2402f81
--- /dev/null
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -0,0 +1,439 @@
+//===-- ShadowStackGC.cpp - GC support for uncooperative targets ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering for the llvm.gc* intrinsics for targets that do
+// not natively support them (which includes the C backend). Note that the code
+// generated is not quite as efficient as algorithms which generate stack maps
+// to identify roots.
+//
+// This pass implements the code transformation described in this paper:
+// "Accurate Garbage Collection in an Uncooperative Environment"
+// Fergus Henderson, ISMM, 2002
+//
+// In runtime/GC/SemiSpace.cpp is a prototype runtime which is compatible with
+// ShadowStackGC.
+//
+// In order to support this particular transformation, all stack roots are
+// coallocated in the stack. This allows a fully target-independent stack map
+// while introducing only minor runtime overhead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "shadowstackgc"
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/IRBuilder.h"
+
+using namespace llvm;
+
+namespace {
+
+ class VISIBILITY_HIDDEN ShadowStackGC : public GCStrategy {
+ /// RootChain - This is the global linked-list that contains the chain of GC
+ /// roots.
+ GlobalVariable *Head;
+
+ /// StackEntryTy - Abstract type of a link in the shadow stack.
+ ///
+ const StructType *StackEntryTy;
+
+ /// Roots - GC roots in the current function. Each is a pair of the
+ /// intrinsic call and its corresponding alloca.
+ std::vector<std::pair<CallInst*,AllocaInst*> > Roots;
+
+ public:
+ ShadowStackGC();
+
+ bool initializeCustomLowering(Module &M);
+ bool performCustomLowering(Function &F);
+
+ private:
+ bool IsNullValue(Value *V);
+ Constant *GetFrameMap(Function &F);
+ const Type* GetConcreteStackEntryType(Function &F);
+ void CollectRoots(Function &F);
+ static GetElementPtrInst *CreateGEP(IRBuilder<> &B, Value *BasePtr,
+ int Idx1, const char *Name);
+ static GetElementPtrInst *CreateGEP(IRBuilder<> &B, Value *BasePtr,
+ int Idx1, int Idx2, const char *Name);
+ };
+
+}
+
+static GCRegistry::Add<ShadowStackGC>
+X("shadow-stack", "Very portable GC for uncooperative code generators");
+
+namespace {
+ /// EscapeEnumerator - This is a little algorithm to find all escape points
+ /// from a function so that "finally"-style code can be inserted. In addition
+ /// to finding the existing return and unwind instructions, it also (if
+ /// necessary) transforms any call instructions into invokes and sends them to
+ /// a landing pad.
+ ///
+ /// It's wrapped up in a state machine using the same transform C# uses for
+ /// 'yield return' enumerators, This transform allows it to be non-allocating.
+ class VISIBILITY_HIDDEN EscapeEnumerator {
+ Function &F;
+ const char *CleanupBBName;
+
+ // State.
+ int State;
+ Function::iterator StateBB, StateE;
+ IRBuilder<> Builder;
+
+ public:
+ EscapeEnumerator(Function &F, const char *N = "cleanup")
+ : F(F), CleanupBBName(N), State(0) {}
+
+ IRBuilder<> *Next() {
+ switch (State) {
+ default:
+ return 0;
+
+ case 0:
+ StateBB = F.begin();
+ StateE = F.end();
+ State = 1;
+
+ case 1:
+ // Find all 'return' and 'unwind' instructions.
+ while (StateBB != StateE) {
+ BasicBlock *CurBB = StateBB++;
+
+ // Branches and invokes do not escape, only unwind and return do.
+ TerminatorInst *TI = CurBB->getTerminator();
+ if (!isa<UnwindInst>(TI) && !isa<ReturnInst>(TI))
+ continue;
+
+ Builder.SetInsertPoint(TI->getParent(), TI);
+ return &Builder;
+ }
+
+ State = 2;
+
+ // Find all 'call' instructions.
+ SmallVector<Instruction*,16> Calls;
+ for (Function::iterator BB = F.begin(),
+ E = F.end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(),
+ EE = BB->end(); II != EE; ++II)
+ if (CallInst *CI = dyn_cast<CallInst>(II))
+ if (!CI->getCalledFunction() ||
+ !CI->getCalledFunction()->getIntrinsicID())
+ Calls.push_back(CI);
+
+ if (Calls.empty())
+ return 0;
+
+ // Create a cleanup block.
+ BasicBlock *CleanupBB = BasicBlock::Create(CleanupBBName, &F);
+ UnwindInst *UI = new UnwindInst(CleanupBB);
+
+ // Transform the 'call' instructions into 'invoke's branching to the
+ // cleanup block. Go in reverse order to make prettier BB names.
+ SmallVector<Value*,16> Args;
+ for (unsigned I = Calls.size(); I != 0; ) {
+ CallInst *CI = cast<CallInst>(Calls[--I]);
+
+ // Split the basic block containing the function call.
+ BasicBlock *CallBB = CI->getParent();
+ BasicBlock *NewBB =
+ CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont");
+
+ // Remove the unconditional branch inserted at the end of CallBB.
+ CallBB->getInstList().pop_back();
+ NewBB->getInstList().remove(CI);
+
+ // Create a new invoke instruction.
+ Args.clear();
+ Args.append(CI->op_begin() + 1, CI->op_end());
+
+ InvokeInst *II = InvokeInst::Create(CI->getOperand(0),
+ NewBB, CleanupBB,
+ Args.begin(), Args.end(),
+ CI->getName(), CallBB);
+ II->setCallingConv(CI->getCallingConv());
+ II->setAttributes(CI->getAttributes());
+ CI->replaceAllUsesWith(II);
+ delete CI;
+ }
+
+ Builder.SetInsertPoint(UI->getParent(), UI);
+ return &Builder;
+ }
+ }
+ };
+}
+
+// -----------------------------------------------------------------------------
+
+void llvm::linkShadowStackGC() { }
+
+ShadowStackGC::ShadowStackGC() : Head(0), StackEntryTy(0) {
+ InitRoots = true;
+ CustomRoots = true;
+}
+
+Constant *ShadowStackGC::GetFrameMap(Function &F) {
+ // doInitialization creates the abstract type of this value.
+
+ Type *VoidPtr = PointerType::getUnqual(Type::Int8Ty);
+
+ // Truncate the ShadowStackDescriptor if some metadata is null.
+ unsigned NumMeta = 0;
+ SmallVector<Constant*,16> Metadata;
+ for (unsigned I = 0; I != Roots.size(); ++I) {
+ Constant *C = cast<Constant>(Roots[I].first->getOperand(2));
+ if (!C->isNullValue())
+ NumMeta = I + 1;
+ Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
+ }
+
+ Constant *BaseElts[] = {
+ ConstantInt::get(Type::Int32Ty, Roots.size(), false),
+ ConstantInt::get(Type::Int32Ty, NumMeta, false),
+ };
+
+ Constant *DescriptorElts[] = {
+ ConstantStruct::get(BaseElts, 2),
+ ConstantArray::get(ArrayType::get(VoidPtr, NumMeta),
+ Metadata.begin(), NumMeta)
+ };
+
+ Constant *FrameMap = ConstantStruct::get(DescriptorElts, 2);
+
+ std::string TypeName("gc_map.");
+ TypeName += utostr(NumMeta);
+ F.getParent()->addTypeName(TypeName, FrameMap->getType());
+
+ // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
+ // that, short of multithreaded LLVM, it should be safe; all that is
+ // necessary is that a simple Module::iterator loop not be invalidated.
+ // Appending to the GlobalVariable list is safe in that sense.
+ //
+ // All of the output passes emit globals last. The ExecutionEngine
+ // explicitly supports adding globals to the module after
+ // initialization.
+ //
+ // Still, if it isn't deemed acceptable, then this transformation needs
+ // to be a ModulePass (which means it cannot be in the 'llc' pipeline
+ // (which uses a FunctionPassManager (which segfaults (not asserts) if
+ // provided a ModulePass))).
+ Constant *GV = new GlobalVariable(FrameMap->getType(), true,
+ GlobalVariable::InternalLinkage,
+ FrameMap, "__gc_" + F.getName(),
+ F.getParent());
+
+ Constant *GEPIndices[2] = { ConstantInt::get(Type::Int32Ty, 0),
+ ConstantInt::get(Type::Int32Ty, 0) };
+ return ConstantExpr::getGetElementPtr(GV, GEPIndices, 2);
+}
+
+const Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) {
+ // doInitialization creates the generic version of this type.
+ std::vector<const Type*> EltTys;
+ EltTys.push_back(StackEntryTy);
+ for (size_t I = 0; I != Roots.size(); I++)
+ EltTys.push_back(Roots[I].second->getAllocatedType());
+ Type *Ty = StructType::get(EltTys);
+
+ std::string TypeName("gc_stackentry.");
+ TypeName += F.getName();
+ F.getParent()->addTypeName(TypeName, Ty);
+
+ return Ty;
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now. If
+/// not, exit fast.
+bool ShadowStackGC::initializeCustomLowering(Module &M) {
+ // struct FrameMap {
+ // int32_t NumRoots; // Number of roots in stack frame.
+ // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots.
+ // void *Meta[]; // May be absent for roots without metadata.
+ // };
+ std::vector<const Type*> EltTys;
+ EltTys.push_back(Type::Int32Ty); // 32 bits is ok up to a 32GB stack frame. :)
+ EltTys.push_back(Type::Int32Ty); // Specifies length of variable length array.
+ StructType *FrameMapTy = StructType::get(EltTys);
+ M.addTypeName("gc_map", FrameMapTy);
+ PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
+
+ // struct StackEntry {
+ // ShadowStackEntry *Next; // Caller's stack entry.
+ // FrameMap *Map; // Pointer to constant FrameMap.
+ // void *Roots[]; // Stack roots (in-place array, so we pretend).
+ // };
+ OpaqueType *RecursiveTy = OpaqueType::get();
+
+ EltTys.clear();
+ EltTys.push_back(PointerType::getUnqual(RecursiveTy));
+ EltTys.push_back(FrameMapPtrTy);
+ PATypeHolder LinkTyH = StructType::get(EltTys);
+
+ RecursiveTy->refineAbstractTypeTo(LinkTyH.get());
+ StackEntryTy = cast<StructType>(LinkTyH.get());
+ const PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
+ M.addTypeName("gc_stackentry", LinkTyH.get()); // FIXME: Is this safe from
+ // a FunctionPass?
+
+ // Get the root chain if it already exists.
+ Head = M.getGlobalVariable("llvm_gc_root_chain");
+ if (!Head) {
+ // If the root chain does not exist, insert a new one with linkonce
+ // linkage!
+ Head = new GlobalVariable(StackEntryPtrTy, false,
+ GlobalValue::LinkOnceAnyLinkage,
+ Constant::getNullValue(StackEntryPtrTy),
+ "llvm_gc_root_chain", &M);
+ } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
+ Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
+ Head->setLinkage(GlobalValue::LinkOnceAnyLinkage);
+ }
+
+ return true;
+}
+
+bool ShadowStackGC::IsNullValue(Value *V) {
+ if (Constant *C = dyn_cast<Constant>(V))
+ return C->isNullValue();
+ return false;
+}
+
+void ShadowStackGC::CollectRoots(Function &F) {
+ // FIXME: Account for original alignment. Could fragment the root array.
+ // Approach 1: Null initialize empty slots at runtime. Yuck.
+ // Approach 2: Emit a map of the array instead of just a count.
+
+ assert(Roots.empty() && "Not cleaned up?");
+
+ SmallVector<std::pair<CallInst*,AllocaInst*>,16> MetaRoots;
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
+ if (Function *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::gcroot) {
+ std::pair<CallInst*,AllocaInst*> Pair = std::make_pair(
+ CI, cast<AllocaInst>(CI->getOperand(1)->stripPointerCasts()));
+ if (IsNullValue(CI->getOperand(2)))
+ Roots.push_back(Pair);
+ else
+ MetaRoots.push_back(Pair);
+ }
+
+ // Number roots with metadata (usually empty) at the beginning, so that the
+ // FrameMap::Meta array can be elided.
+ Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end());
+}
+
+GetElementPtrInst *
+ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr,
+ int Idx, int Idx2, const char *Name) {
+ Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0),
+ ConstantInt::get(Type::Int32Ty, Idx),
+ ConstantInt::get(Type::Int32Ty, Idx2) };
+ Value* Val = B.CreateGEP(BasePtr, Indices, Indices + 3, Name);
+
+ assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+ return dyn_cast<GetElementPtrInst>(Val);
+}
+
+GetElementPtrInst *
+ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr,
+ int Idx, const char *Name) {
+ Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0),
+ ConstantInt::get(Type::Int32Ty, Idx) };
+ Value *Val = B.CreateGEP(BasePtr, Indices, Indices + 2, Name);
+
+ assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+ return dyn_cast<GetElementPtrInst>(Val);
+}
+
+/// runOnFunction - Insert code to maintain the shadow stack.
+bool ShadowStackGC::performCustomLowering(Function &F) {
+ // Find calls to llvm.gcroot.
+ CollectRoots(F);
+
+ // If there are no roots in this function, then there is no need to add a
+ // stack map entry for it.
+ if (Roots.empty())
+ return false;
+
+ // Build the constant map and figure the type of the shadow stack entry.
+ Value *FrameMap = GetFrameMap(F);
+ const Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
+
+ // Build the shadow stack entry at the very start of the function.
+ BasicBlock::iterator IP = F.getEntryBlock().begin();
+ IRBuilder<> AtEntry(IP->getParent(), IP);
+
+ Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, 0,
+ "gc_frame");
+
+ while (isa<AllocaInst>(IP)) ++IP;
+ AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+ // Initialize the map pointer and load the current head of the shadow stack.
+ Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead");
+ Instruction *EntryMapPtr = CreateGEP(AtEntry, StackEntry,0,1,"gc_frame.map");
+ AtEntry.CreateStore(FrameMap, EntryMapPtr);
+
+ // After all the allocas...
+ for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+ // For each root, find the corresponding slot in the aggregate...
+ Value *SlotPtr = CreateGEP(AtEntry, StackEntry, 1 + I, "gc_root");
+
+ // And use it in lieu of the alloca.
+ AllocaInst *OriginalAlloca = Roots[I].second;
+ SlotPtr->takeName(OriginalAlloca);
+ OriginalAlloca->replaceAllUsesWith(SlotPtr);
+ }
+
+ // Move past the original stores inserted by GCStrategy::InitRoots. This isn't
+ // really necessary (the collector would never see the intermediate state at
+ // runtime), but it's nicer not to push the half-initialized entry onto the
+ // shadow stack.
+ while (isa<StoreInst>(IP)) ++IP;
+ AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+ // Push the entry onto the shadow stack.
+ Instruction *EntryNextPtr = CreateGEP(AtEntry,StackEntry,0,0,"gc_frame.next");
+ Instruction *NewHeadVal = CreateGEP(AtEntry,StackEntry, 0, "gc_newhead");
+ AtEntry.CreateStore(CurrentHead, EntryNextPtr);
+ AtEntry.CreateStore(NewHeadVal, Head);
+
+ // For each instruction that escapes...
+ EscapeEnumerator EE(F, "gc_cleanup");
+ while (IRBuilder<> *AtExit = EE.Next()) {
+ // Pop the entry from the shadow stack. Don't reuse CurrentHead from
+ // AtEntry, since that would make the value live for the entire function.
+ Instruction *EntryNextPtr2 = CreateGEP(*AtExit, StackEntry, 0, 0,
+ "gc_frame.next");
+ Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
+ AtExit->CreateStore(SavedHead, Head);
+ }
+
+ // Delete the original allocas (which are no longer used) and the intrinsic
+ // calls (which are no longer valid). Doing this last avoids invalidating
+ // iterators.
+ for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+ Roots[I].first->eraseFromParent();
+ Roots[I].second->eraseFromParent();
+ }
+
+ Roots.clear();
+ return true;
+}
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
new file mode 100644
index 0000000..e44a138
--- /dev/null
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -0,0 +1,1141 @@
+//===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a shrink wrapping variant of prolog/epilog insertion:
+// - Spills and restores of callee-saved registers (CSRs) are placed in the
+// machine CFG to tightly surround their uses so that execution paths that
+// do not use CSRs do not pay the spill/restore penalty.
+//
+// - Avoiding placment of spills/restores in loops: if a CSR is used inside a
+// loop the spills are placed in the loop preheader, and restores are
+// placed in the loop exit nodes (the successors of loop _exiting_ nodes).
+//
+// - Covering paths without CSR uses:
+// If a region in a CFG uses CSRs and has multiple entry and/or exit points,
+// the use info for the CSRs inside the region is propagated outward in the
+// CFG to ensure validity of the spill/restore placements. This decreases
+// the effectiveness of shrink wrapping but does not require edge splitting
+// in the machine CFG.
+//
+// This shrink wrapping implementation uses an iterative analysis to determine
+// which basic blocks require spills and restores for CSRs.
+//
+// This pass uses MachineDominators and MachineLoopInfo. Loop information
+// is used to prevent placement of callee-saved register spills/restores
+// in the bodies of loops.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "shrink-wrap"
+
+#include "PrologEpilogInserter.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include <sstream>
+
+using namespace llvm;
+
+STATISTIC(numSRReduced, "Number of CSR spills+restores reduced.");
+
+// Shrink Wrapping:
+static cl::opt<bool>
+ShrinkWrapping("shrink-wrap",
+ cl::desc("Shrink wrap callee-saved register spills/restores"));
+
+// Shrink wrap only the specified function, a debugging aid.
+static cl::opt<std::string>
+ShrinkWrapFunc("shrink-wrap-func", cl::Hidden,
+ cl::desc("Shrink wrap the specified function"),
+ cl::value_desc("funcname"),
+ cl::init(""));
+
+// Debugging level for shrink wrapping.
+enum ShrinkWrapDebugLevel {
+ None, BasicInfo, Iterations, Details
+};
+
+static cl::opt<enum ShrinkWrapDebugLevel>
+ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden,
+ cl::desc("Print shrink wrapping debugging information"),
+ cl::values(
+ clEnumVal(None , "disable debug output"),
+ clEnumVal(BasicInfo , "print basic DF sets"),
+ clEnumVal(Iterations, "print SR sets for each iteration"),
+ clEnumVal(Details , "print all DF sets"),
+ clEnumValEnd));
+
+
+void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ if (ShrinkWrapping || ShrinkWrapFunc != "") {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ }
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+//===----------------------------------------------------------------------===//
+// ShrinkWrapping implementation
+//===----------------------------------------------------------------------===//
+
+// Convienences for dealing with machine loops.
+MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) {
+ assert(LP && "Machine loop is NULL.");
+ MachineBasicBlock* PHDR = LP->getLoopPreheader();
+ MachineLoop* PLP = LP->getParentLoop();
+ while (PLP) {
+ PHDR = PLP->getLoopPreheader();
+ PLP = PLP->getParentLoop();
+ }
+ return PHDR;
+}
+
+MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) {
+ if (LP == 0)
+ return 0;
+ MachineLoop* PLP = LP->getParentLoop();
+ while (PLP) {
+ LP = PLP;
+ PLP = PLP->getParentLoop();
+ }
+ return LP;
+}
+
+bool PEI::isReturnBlock(MachineBasicBlock* MBB) {
+ return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn());
+}
+
+// Initialize shrink wrapping DFA sets, called before iterations.
+void PEI::clearAnticAvailSets() {
+ AnticIn.clear();
+ AnticOut.clear();
+ AvailIn.clear();
+ AvailOut.clear();
+}
+
+// Clear all sets constructed by shrink wrapping.
+void PEI::clearAllSets() {
+ ReturnBlocks.clear();
+ clearAnticAvailSets();
+ UsedCSRegs.clear();
+ CSRUsed.clear();
+ TLLoops.clear();
+ CSRSave.clear();
+ CSRRestore.clear();
+}
+
+// Initialize all shrink wrapping data.
+void PEI::initShrinkWrappingInfo() {
+ clearAllSets();
+ EntryBlock = 0;
+#ifndef NDEBUG
+ HasFastExitPath = false;
+#endif
+ ShrinkWrapThisFunction = ShrinkWrapping;
+ // DEBUG: enable or disable shrink wrapping for the current function
+ // via --shrink-wrap-func=<funcname>.
+#ifndef NDEBUG
+ if (ShrinkWrapFunc != "") {
+ std::string MFName = MF->getFunction()->getName();
+ ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
+ }
+#endif
+}
+
+
+/// placeCSRSpillsAndRestores - determine which MBBs of the function
+/// need save, restore code for callee-saved registers by doing a DF analysis
+/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs
+/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo
+/// is used to ensure that CSR save/restore code is not placed inside loops.
+/// This function computes the maps of MBBs -> CSRs to spill and restore
+/// in CSRSave, CSRRestore.
+///
+/// If shrink wrapping is not being performed, place all spills in
+/// the entry block, all restores in return blocks. In this case,
+/// CSRSave has a single mapping, CSRRestore has mappings for each
+/// return block.
+///
+void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
+
+ DEBUG(MF = &Fn);
+
+ initShrinkWrappingInfo();
+
+ DEBUG(if (ShrinkWrapThisFunction) {
+ DOUT << "Place CSR spills/restores for "
+ << MF->getFunction()->getName() << "\n";
+ });
+
+ if (calculateSets(Fn))
+ placeSpillsAndRestores(Fn);
+}
+
+/// calcAnticInOut - calculate the anticipated in/out reg sets
+/// for the given MBB by looking forward in the MCFG at MBB's
+/// successors.
+///
+bool PEI::calcAnticInOut(MachineBasicBlock* MBB) {
+ bool changed = false;
+
+ // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB))
+ SmallVector<MachineBasicBlock*, 4> successors;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (SUCC != MBB)
+ successors.push_back(SUCC);
+ }
+
+ unsigned i = 0, e = successors.size();
+ if (i != e) {
+ CSRegSet prevAnticOut = AnticOut[MBB];
+ MachineBasicBlock* SUCC = successors[i];
+
+ AnticOut[MBB] = AnticIn[SUCC];
+ for (++i; i != e; ++i) {
+ SUCC = successors[i];
+ AnticOut[MBB] &= AnticIn[SUCC];
+ }
+ if (prevAnticOut != AnticOut[MBB])
+ changed = true;
+ }
+
+ // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]);
+ CSRegSet prevAnticIn = AnticIn[MBB];
+ AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
+ if (prevAnticIn |= AnticIn[MBB])
+ changed = true;
+ return changed;
+}
+
+/// calcAvailInOut - calculate the available in/out reg sets
+/// for the given MBB by looking backward in the MCFG at MBB's
+/// predecessors.
+///
+bool PEI::calcAvailInOut(MachineBasicBlock* MBB) {
+ bool changed = false;
+
+ // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB))
+ SmallVector<MachineBasicBlock*, 4> predecessors;
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ if (PRED != MBB)
+ predecessors.push_back(PRED);
+ }
+
+ unsigned i = 0, e = predecessors.size();
+ if (i != e) {
+ CSRegSet prevAvailIn = AvailIn[MBB];
+ MachineBasicBlock* PRED = predecessors[i];
+
+ AvailIn[MBB] = AvailOut[PRED];
+ for (++i; i != e; ++i) {
+ PRED = predecessors[i];
+ AvailIn[MBB] &= AvailOut[PRED];
+ }
+ if (prevAvailIn != AvailIn[MBB])
+ changed = true;
+ }
+
+ // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]);
+ CSRegSet prevAvailOut = AvailOut[MBB];
+ AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
+ if (prevAvailOut |= AvailOut[MBB])
+ changed = true;
+ return changed;
+}
+
+/// calculateAnticAvail - build the sets anticipated and available
+/// registers in the MCFG of the current function iteratively,
+/// doing a combined forward and backward analysis.
+///
+void PEI::calculateAnticAvail(MachineFunction &Fn) {
+ // Initialize data flow sets.
+ clearAnticAvailSets();
+
+ // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
+ bool changed = true;
+ unsigned iterations = 0;
+ while (changed) {
+ changed = false;
+ ++iterations;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+
+ // Calculate anticipated in, out regs at MBB from
+ // anticipated at successors of MBB.
+ changed |= calcAnticInOut(MBB);
+
+ // Calculate available in, out regs at MBB from
+ // available at predecessors of MBB.
+ changed |= calcAvailInOut(MBB);
+ }
+ }
+
+ DEBUG(if (ShrinkWrapDebugging >= Details) {
+ DOUT << "-----------------------------------------------------------\n";
+ DOUT << " Antic/Avail Sets:\n";
+ DOUT << "-----------------------------------------------------------\n";
+ DOUT << "iterations = " << iterations << "\n";
+ DOUT << "-----------------------------------------------------------\n";
+ DOUT << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n";
+ DOUT << "-----------------------------------------------------------\n";
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ dumpSets(MBB);
+ }
+ DOUT << "-----------------------------------------------------------\n";
+ });
+}
+
+/// propagateUsesAroundLoop - copy used register info from MBB to all blocks
+/// of the loop given by LP and its parent loops. This prevents spills/restores
+/// from being placed in the bodies of loops.
+///
+void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) {
+ if (! MBB || !LP)
+ return;
+
+ std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks();
+ for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) {
+ MachineBasicBlock* LBB = loopBlocks[i];
+ if (LBB == MBB)
+ continue;
+ if (CSRUsed[LBB].contains(CSRUsed[MBB]))
+ continue;
+ CSRUsed[LBB] |= CSRUsed[MBB];
+ }
+}
+
+/// calculateSets - collect the CSRs used in this function, compute
+/// the DF sets that describe the initial minimal regions in the
+/// Machine CFG around which CSR spills and restores must be placed.
+///
+/// Additionally, this function decides if shrink wrapping should
+/// be disabled for the current function, checking the following:
+/// 1. the current function has more than 500 MBBs: heuristic limit
+/// on function size to reduce compile time impact of the current
+/// iterative algorithm.
+/// 2. all CSRs are used in the entry block.
+/// 3. all CSRs are used in all immediate successors of the entry block.
+/// 4. all CSRs are used in a subset of blocks, each of which dominates
+/// all return blocks. These blocks, taken as a subgraph of the MCFG,
+/// are equivalent to the entry block since all execution paths pass
+/// through them.
+///
+bool PEI::calculateSets(MachineFunction &Fn) {
+ // Sets used to compute spill, restore placement sets.
+ const std::vector<CalleeSavedInfo> CSI =
+ Fn.getFrameInfo()->getCalleeSavedInfo();
+
+ // If no CSRs used, we are done.
+ if (CSI.empty()) {
+ DEBUG(if (ShrinkWrapThisFunction)
+ DOUT << "DISABLED: " << Fn.getFunction()->getName()
+ << ": uses no callee-saved registers\n");
+ return false;
+ }
+
+ // Save refs to entry and return blocks.
+ EntryBlock = Fn.begin();
+ for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
+ MBB != E; ++MBB)
+ if (isReturnBlock(MBB))
+ ReturnBlocks.push_back(MBB);
+
+ // Determine if this function has fast exit paths.
+ DEBUG(if (ShrinkWrapThisFunction)
+ findFastExitPath());
+
+ // Limit shrink wrapping via the current iterative bit vector
+ // implementation to functions with <= 500 MBBs.
+ if (Fn.size() > 500) {
+ DEBUG(if (ShrinkWrapThisFunction)
+ DOUT << "DISABLED: " << Fn.getFunction()->getName()
+ << ": too large (" << Fn.size() << " MBBs)\n");
+ ShrinkWrapThisFunction = false;
+ }
+
+ // Return now if not shrink wrapping.
+ if (! ShrinkWrapThisFunction)
+ return false;
+
+ // Collect set of used CSRs.
+ for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
+ UsedCSRegs.set(inx);
+ }
+
+ // Walk instructions in all MBBs, create CSRUsed[] sets, choose
+ // whether or not to shrink wrap this function.
+ MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
+ MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+
+ bool allCSRUsesInEntryBlock = true;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) {
+ for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
+ unsigned Reg = CSI[inx].getReg();
+ // If instruction I reads or modifies Reg, add it to UsedCSRegs,
+ // CSRUsed map for the current block.
+ for (unsigned opInx = 0, opEnd = I->getNumOperands();
+ opInx != opEnd; ++opInx) {
+ const MachineOperand &MO = I->getOperand(opInx);
+ if (! (MO.isReg() && (MO.isUse() || MO.isDef())))
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MOReg == Reg ||
+ (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ TRI->isSubRegister(Reg, MOReg))) {
+ // CSR Reg is defined/used in block MBB.
+ CSRUsed[MBB].set(inx);
+ // Check for uses in EntryBlock.
+ if (MBB != EntryBlock)
+ allCSRUsesInEntryBlock = false;
+ }
+ }
+ }
+ }
+
+ if (CSRUsed[MBB].empty())
+ continue;
+
+ // Propagate CSRUsed[MBB] in loops
+ if (MachineLoop* LP = LI.getLoopFor(MBB)) {
+ // Add top level loop to work list.
+ MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP);
+ MachineLoop* PLP = getTopLevelLoopParent(LP);
+
+ if (! HDR) {
+ HDR = PLP->getHeader();
+ assert(HDR->pred_size() > 0 && "Loop header has no predecessors?");
+ MachineBasicBlock::pred_iterator PI = HDR->pred_begin();
+ HDR = *PI;
+ }
+ TLLoops[HDR] = PLP;
+
+ // Push uses from inside loop to its parent loops,
+ // or to all other MBBs in its loop.
+ if (LP->getLoopDepth() > 1) {
+ for (MachineLoop* PLP = LP->getParentLoop(); PLP;
+ PLP = PLP->getParentLoop()) {
+ propagateUsesAroundLoop(MBB, PLP);
+ }
+ } else {
+ propagateUsesAroundLoop(MBB, LP);
+ }
+ }
+ }
+
+ if (allCSRUsesInEntryBlock) {
+ DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
+ << ": all CSRs used in EntryBlock\n");
+ ShrinkWrapThisFunction = false;
+ } else {
+ bool allCSRsUsedInEntryFanout = true;
+ for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
+ SE = EntryBlock->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (CSRUsed[SUCC] != UsedCSRegs)
+ allCSRsUsedInEntryFanout = false;
+ }
+ if (allCSRsUsedInEntryFanout) {
+ DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
+ << ": all CSRs used in imm successors of EntryBlock\n");
+ ShrinkWrapThisFunction = false;
+ }
+ }
+
+ if (ShrinkWrapThisFunction) {
+ // Check if MBB uses CSRs and dominates all exit nodes.
+ // Such nodes are equiv. to the entry node w.r.t.
+ // CSR uses: every path through the function must
+ // pass through this node. If each CSR is used at least
+ // once by these nodes, shrink wrapping is disabled.
+ CSRegSet CSRUsedInChokePoints;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1)
+ continue;
+ bool dominatesExitNodes = true;
+ for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
+ if (! DT.dominates(MBB, ReturnBlocks[ri])) {
+ dominatesExitNodes = false;
+ break;
+ }
+ if (dominatesExitNodes) {
+ CSRUsedInChokePoints |= CSRUsed[MBB];
+ if (CSRUsedInChokePoints == UsedCSRegs) {
+ DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
+ << ": all CSRs used in choke point(s) at "
+ << getBasicBlockName(MBB) << "\n");
+ ShrinkWrapThisFunction = false;
+ break;
+ }
+ }
+ }
+ }
+
+ // Return now if we have decided not to apply shrink wrapping
+ // to the current function.
+ if (! ShrinkWrapThisFunction)
+ return false;
+
+ DEBUG({
+ DOUT << "ENABLED: " << Fn.getFunction()->getName();
+ if (HasFastExitPath)
+ DOUT << " (fast exit path)";
+ DOUT << "\n";
+ if (ShrinkWrapDebugging >= BasicInfo) {
+ DOUT << "------------------------------"
+ << "-----------------------------\n";
+ DOUT << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
+ if (ShrinkWrapDebugging >= Details) {
+ DOUT << "------------------------------"
+ << "-----------------------------\n";
+ dumpAllUsed();
+ }
+ }
+ });
+
+ // Build initial DF sets to determine minimal regions in the
+ // Machine CFG around which CSRs must be spilled and restored.
+ calculateAnticAvail(Fn);
+
+ return true;
+}
+
+/// addUsesForMEMERegion - add uses of CSRs spilled or restored in
+/// multi-entry, multi-exit (MEME) regions so spill and restore
+/// placement will not break code that enters or leaves a
+/// shrink-wrapped region by inducing spills with no matching
+/// restores or restores with no matching spills. A MEME region
+/// is a subgraph of the MCFG with multiple entry edges, multiple
+/// exit edges, or both. This code propagates use information
+/// through the MCFG until all paths requiring spills and restores
+/// _outside_ the computed minimal placement regions have been covered.
+///
+bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4>& blks) {
+ if (MBB->succ_size() < 2 && MBB->pred_size() < 2) {
+ bool processThisBlock = false;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (SUCC->pred_size() > 1) {
+ processThisBlock = true;
+ break;
+ }
+ }
+ if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) {
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ if (PRED->succ_size() > 1) {
+ processThisBlock = true;
+ break;
+ }
+ }
+ }
+ if (! processThisBlock)
+ return false;
+ }
+
+ CSRegSet prop;
+ if (!CSRSave[MBB].empty())
+ prop = CSRSave[MBB];
+ else if (!CSRRestore[MBB].empty())
+ prop = CSRRestore[MBB];
+ else
+ prop = CSRUsed[MBB];
+ if (prop.empty())
+ return false;
+
+ // Propagate selected bits to successors, predecessors of MBB.
+ bool addedUses = false;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ // Self-loop
+ if (SUCC == MBB)
+ continue;
+ if (! CSRUsed[SUCC].contains(prop)) {
+ CSRUsed[SUCC] |= prop;
+ addedUses = true;
+ blks.push_back(SUCC);
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ DOUT << getBasicBlockName(MBB)
+ << "(" << stringifyCSRegSet(prop) << ")->"
+ << "successor " << getBasicBlockName(SUCC) << "\n");
+ }
+ }
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ // Self-loop
+ if (PRED == MBB)
+ continue;
+ if (! CSRUsed[PRED].contains(prop)) {
+ CSRUsed[PRED] |= prop;
+ addedUses = true;
+ blks.push_back(PRED);
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ DOUT << getBasicBlockName(MBB)
+ << "(" << stringifyCSRegSet(prop) << ")->"
+ << "predecessor " << getBasicBlockName(PRED) << "\n");
+ }
+ }
+ return addedUses;
+}
+
+/// addUsesForTopLevelLoops - add uses for CSRs used inside top
+/// level loops to the exit blocks of those loops.
+///
+bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) {
+ bool addedUses = false;
+
+ // Place restores for top level loops where needed.
+ for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator
+ I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) {
+ MachineBasicBlock* MBB = I->first;
+ MachineLoop* LP = I->second;
+ MachineBasicBlock* HDR = LP->getHeader();
+ SmallVector<MachineBasicBlock*, 4> exitBlocks;
+ CSRegSet loopSpills;
+
+ loopSpills = CSRSave[MBB];
+ if (CSRSave[MBB].empty()) {
+ loopSpills = CSRUsed[HDR];
+ assert(!loopSpills.empty() && "No CSRs used in loop?");
+ } else if (CSRRestore[MBB].contains(CSRSave[MBB]))
+ continue;
+
+ LP->getExitBlocks(exitBlocks);
+ assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?");
+ for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) {
+ MachineBasicBlock* EXB = exitBlocks[i];
+ if (! CSRUsed[EXB].contains(loopSpills)) {
+ CSRUsed[EXB] |= loopSpills;
+ addedUses = true;
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ DOUT << "LOOP " << getBasicBlockName(MBB)
+ << "(" << stringifyCSRegSet(loopSpills) << ")->"
+ << getBasicBlockName(EXB) << "\n");
+ if (EXB->succ_size() > 1 || EXB->pred_size() > 1)
+ blks.push_back(EXB);
+ }
+ }
+ }
+ return addedUses;
+}
+
+/// calcSpillPlacements - determine which CSRs should be spilled
+/// in MBB using AnticIn sets of MBB's predecessors, keeping track
+/// of changes to spilled reg sets. Add MBB to the set of blocks
+/// that need to be processed for propagating use info to cover
+/// multi-entry/exit regions.
+///
+bool PEI::calcSpillPlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevSpills) {
+ bool placedSpills = false;
+ // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB)
+ CSRegSet anticInPreds;
+ SmallVector<MachineBasicBlock*, 4> predecessors;
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ if (PRED != MBB)
+ predecessors.push_back(PRED);
+ }
+ unsigned i = 0, e = predecessors.size();
+ if (i != e) {
+ MachineBasicBlock* PRED = predecessors[i];
+ anticInPreds = UsedCSRegs - AnticIn[PRED];
+ for (++i; i != e; ++i) {
+ PRED = predecessors[i];
+ anticInPreds &= (UsedCSRegs - AnticIn[PRED]);
+ }
+ } else {
+ // Handle uses in entry blocks (which have no predecessors).
+ // This is necessary because the DFA formulation assumes the
+ // entry and (multiple) exit nodes cannot have CSR uses, which
+ // is not the case in the real world.
+ anticInPreds = UsedCSRegs;
+ }
+ // Compute spills required at MBB:
+ CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds;
+
+ if (! CSRSave[MBB].empty()) {
+ if (MBB == EntryBlock) {
+ for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
+ CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB];
+ } else {
+ // Reset all regs spilled in MBB that are also spilled in EntryBlock.
+ if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) {
+ CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock];
+ }
+ }
+ }
+ placedSpills = (CSRSave[MBB] != prevSpills[MBB]);
+ prevSpills[MBB] = CSRSave[MBB];
+ // Remember this block for adding restores to successor
+ // blocks for multi-entry region.
+ if (placedSpills)
+ blks.push_back(MBB);
+
+ DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations)
+ DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRSave[MBB]) << "\n");
+
+ return placedSpills;
+}
+
+/// calcRestorePlacements - determine which CSRs should be restored
+/// in MBB using AvailOut sets of MBB's succcessors, keeping track
+/// of changes to restored reg sets. Add MBB to the set of blocks
+/// that need to be processed for propagating use info to cover
+/// multi-entry/exit regions.
+///
+bool PEI::calcRestorePlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevRestores) {
+ bool placedRestores = false;
+ // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB)
+ CSRegSet availOutSucc;
+ SmallVector<MachineBasicBlock*, 4> successors;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (SUCC != MBB)
+ successors.push_back(SUCC);
+ }
+ unsigned i = 0, e = successors.size();
+ if (i != e) {
+ MachineBasicBlock* SUCC = successors[i];
+ availOutSucc = UsedCSRegs - AvailOut[SUCC];
+ for (++i; i != e; ++i) {
+ SUCC = successors[i];
+ availOutSucc &= (UsedCSRegs - AvailOut[SUCC]);
+ }
+ } else {
+ if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) {
+ // Handle uses in return blocks (which have no successors).
+ // This is necessary because the DFA formulation assumes the
+ // entry and (multiple) exit nodes cannot have CSR uses, which
+ // is not the case in the real world.
+ availOutSucc = UsedCSRegs;
+ }
+ }
+ // Compute restores required at MBB:
+ CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc;
+
+ // Postprocess restore placements at MBB.
+ // Remove the CSRs that are restored in the return blocks.
+ // Lest this be confusing, note that:
+ // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks.
+ if (MBB->succ_size() && ! CSRRestore[MBB].empty()) {
+ if (! CSRSave[EntryBlock].empty())
+ CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock];
+ }
+ placedRestores = (CSRRestore[MBB] != prevRestores[MBB]);
+ prevRestores[MBB] = CSRRestore[MBB];
+ // Remember this block for adding saves to predecessor
+ // blocks for multi-entry region.
+ if (placedRestores)
+ blks.push_back(MBB);
+
+ DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations)
+ DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
+
+ return placedRestores;
+}
+
+/// placeSpillsAndRestores - place spills and restores of CSRs
+/// used in MBBs in minimal regions that contain the uses.
+///
+void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
+ CSRegBlockMap prevCSRSave;
+ CSRegBlockMap prevCSRRestore;
+ SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks;
+ bool changed = true;
+ unsigned iterations = 0;
+
+ // Iterate computation of spill and restore placements in the MCFG until:
+ // 1. CSR use info has been fully propagated around the MCFG, and
+ // 2. computation of CSRSave[], CSRRestore[] reach fixed points.
+ while (changed) {
+ changed = false;
+ ++iterations;
+
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ DOUT << "iter " << iterations
+ << " --------------------------------------------------\n");
+
+ // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG,
+ // which determines the placements of spills and restores.
+ // Keep track of changes to spills, restores in each iteration to
+ // minimize the total iterations.
+ bool SRChanged = false;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+
+ // Place spills for CSRs in MBB.
+ SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave);
+
+ // Place restores for CSRs in MBB.
+ SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore);
+ }
+
+ // Add uses of CSRs used inside loops where needed.
+ changed |= addUsesForTopLevelLoops(cvBlocks);
+
+ // Add uses for CSRs spilled or restored at branch, join points.
+ if (changed || SRChanged) {
+ while (! cvBlocks.empty()) {
+ MachineBasicBlock* MBB = cvBlocks.pop_back_val();
+ changed |= addUsesForMEMERegion(MBB, ncvBlocks);
+ }
+ if (! ncvBlocks.empty()) {
+ cvBlocks = ncvBlocks;
+ ncvBlocks.clear();
+ }
+ }
+
+ if (changed) {
+ calculateAnticAvail(Fn);
+ CSRSave.clear();
+ CSRRestore.clear();
+ }
+ }
+
+ // Check for effectiveness:
+ // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks}
+ // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock]
+ // Gives a measure of how many CSR spills have been moved from EntryBlock
+ // to minimal regions enclosing their uses.
+ CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]);
+ unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count();
+ numSRReduced += numSRReducedThisFunc;
+ DEBUG(if (ShrinkWrapDebugging >= BasicInfo) {
+ DOUT << "-----------------------------------------------------------\n";
+ DOUT << "total iterations = " << iterations << " ( "
+ << Fn.getFunction()->getName()
+ << " " << numSRReducedThisFunc
+ << " " << Fn.size()
+ << " )\n";
+ DOUT << "-----------------------------------------------------------\n";
+ dumpSRSets();
+ DOUT << "-----------------------------------------------------------\n";
+ if (numSRReducedThisFunc)
+ verifySpillRestorePlacement();
+ });
+}
+
+// Debugging methods.
+#ifndef NDEBUG
+/// findFastExitPath - debugging method used to detect functions
+/// with at least one path from the entry block to a return block
+/// directly or which has a very small number of edges.
+///
+void PEI::findFastExitPath() {
+ if (! EntryBlock)
+ return;
+ // Fina a path from EntryBlock to any return block that does not branch:
+ // Entry
+ // | ...
+ // v |
+ // B1<-----+
+ // |
+ // v
+ // Return
+ for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
+ SE = EntryBlock->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+
+ // Assume positive, disprove existence of fast path.
+ HasFastExitPath = true;
+
+ // Check the immediate successors.
+ if (isReturnBlock(SUCC)) {
+ if (ShrinkWrapDebugging >= BasicInfo)
+ DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock)
+ << "->" << getBasicBlockName(SUCC) << "\n";
+ break;
+ }
+ // Traverse df from SUCC, look for a branch block.
+ std::string exitPath = getBasicBlockName(SUCC);
+ for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC),
+ BE = df_end(SUCC); BI != BE; ++BI) {
+ MachineBasicBlock* SBB = *BI;
+ // Reject paths with branch nodes.
+ if (SBB->succ_size() > 1) {
+ HasFastExitPath = false;
+ break;
+ }
+ exitPath += "->" + getBasicBlockName(SBB);
+ }
+ if (HasFastExitPath) {
+ if (ShrinkWrapDebugging >= BasicInfo)
+ DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock)
+ << "->" << exitPath << "\n";
+ break;
+ }
+ }
+}
+
+/// verifySpillRestorePlacement - check the current spill/restore
+/// sets for safety. Attempt to find spills without restores or
+/// restores without spills.
+/// Spills: walk df from each MBB in spill set ensuring that
+/// all CSRs spilled at MMBB are restored on all paths
+/// from MBB to all exit blocks.
+/// Restores: walk idf from each MBB in restore set ensuring that
+/// all CSRs restored at MBB are spilled on all paths
+/// reaching MBB.
+///
+void PEI::verifySpillRestorePlacement() {
+ unsigned numReturnBlocks = 0;
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ if (isReturnBlock(MBB) || MBB->succ_size() == 0)
+ ++numReturnBlocks;
+ }
+ for (CSRegBlockMap::iterator BI = CSRSave.begin(),
+ BE = CSRSave.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet spilled = BI->second;
+ CSRegSet restored;
+
+ if (spilled.empty())
+ continue;
+
+ DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(spilled)
+ << " RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+
+ if (CSRRestore[MBB].intersects(spilled)) {
+ restored |= (CSRRestore[MBB] & spilled);
+ }
+
+ // Walk depth first from MBB to find restores of all CSRs spilled at MBB:
+ // we must find restores for all spills w/no intervening spills on all
+ // paths from MBB to all return blocks.
+ for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB),
+ BE = df_end(MBB); BI != BE; ++BI) {
+ MachineBasicBlock* SBB = *BI;
+ if (SBB == MBB)
+ continue;
+ // Stop when we encounter spills of any CSRs spilled at MBB that
+ // have not yet been seen to be restored.
+ if (CSRSave[SBB].intersects(spilled) &&
+ !restored.contains(CSRSave[SBB] & spilled))
+ break;
+ // Collect the CSRs spilled at MBB that are restored
+ // at this DF successor of MBB.
+ if (CSRRestore[SBB].intersects(spilled))
+ restored |= (CSRRestore[SBB] & spilled);
+ // If we are at a retun block, check that the restores
+ // we have seen so far exhaust the spills at MBB, then
+ // reset the restores.
+ if (isReturnBlock(SBB) || SBB->succ_size() == 0) {
+ if (restored != spilled) {
+ CSRegSet notRestored = (spilled - restored);
+ DOUT << MF->getFunction()->getName() << ": "
+ << stringifyCSRegSet(notRestored)
+ << " spilled at " << getBasicBlockName(MBB)
+ << " are never restored on path to return "
+ << getBasicBlockName(SBB) << "\n";
+ }
+ restored.clear();
+ }
+ }
+ }
+
+ // Check restore placements.
+ for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
+ BE = CSRRestore.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet restored = BI->second;
+ CSRegSet spilled;
+
+ if (restored.empty())
+ continue;
+
+ DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRSave[MBB])
+ << " RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(restored) << "\n";
+
+ if (CSRSave[MBB].intersects(restored)) {
+ spilled |= (CSRSave[MBB] & restored);
+ }
+ // Walk inverse depth first from MBB to find spills of all
+ // CSRs restored at MBB:
+ for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB),
+ BE = idf_end(MBB); BI != BE; ++BI) {
+ MachineBasicBlock* PBB = *BI;
+ if (PBB == MBB)
+ continue;
+ // Stop when we encounter restores of any CSRs restored at MBB that
+ // have not yet been seen to be spilled.
+ if (CSRRestore[PBB].intersects(restored) &&
+ !spilled.contains(CSRRestore[PBB] & restored))
+ break;
+ // Collect the CSRs restored at MBB that are spilled
+ // at this DF predecessor of MBB.
+ if (CSRSave[PBB].intersects(restored))
+ spilled |= (CSRSave[PBB] & restored);
+ }
+ if (spilled != restored) {
+ CSRegSet notSpilled = (restored - spilled);
+ DOUT << MF->getFunction()->getName() << ": "
+ << stringifyCSRegSet(notSpilled)
+ << " restored at " << getBasicBlockName(MBB)
+ << " are never spilled\n";
+ }
+ }
+}
+
+// Debugging print methods.
+std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) {
+ std::ostringstream name;
+ if (MBB) {
+ if (MBB->getBasicBlock())
+ name << MBB->getBasicBlock()->getName();
+ else
+ name << "_MBB_" << MBB->getNumber();
+ }
+ return name.str();
+}
+
+std::string PEI::stringifyCSRegSet(const CSRegSet& s) {
+ const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
+ const std::vector<CalleeSavedInfo> CSI =
+ MF->getFrameInfo()->getCalleeSavedInfo();
+
+ std::ostringstream srep;
+ if (CSI.size() == 0) {
+ srep << "[]";
+ return srep.str();
+ }
+ srep << "[";
+ CSRegSet::iterator I = s.begin(), E = s.end();
+ if (I != E) {
+ unsigned reg = CSI[*I].getReg();
+ srep << TRI->getName(reg);
+ for (++I; I != E; ++I) {
+ reg = CSI[*I].getReg();
+ srep << ",";
+ srep << TRI->getName(reg);
+ }
+ }
+ srep << "]";
+ return srep.str();
+}
+
+void PEI::dumpSet(const CSRegSet& s) {
+ DOUT << stringifyCSRegSet(s) << "\n";
+}
+
+void PEI::dumpUsed(MachineBasicBlock* MBB) {
+ if (MBB) {
+ DOUT << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRUsed[MBB]) << "\n";
+ }
+}
+
+void PEI::dumpAllUsed() {
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ dumpUsed(MBB);
+ }
+}
+
+void PEI::dumpSets(MachineBasicBlock* MBB) {
+ if (MBB) {
+ DOUT << getBasicBlockName(MBB) << " | "
+ << stringifyCSRegSet(CSRUsed[MBB]) << " | "
+ << stringifyCSRegSet(AnticIn[MBB]) << " | "
+ << stringifyCSRegSet(AnticOut[MBB]) << " | "
+ << stringifyCSRegSet(AvailIn[MBB]) << " | "
+ << stringifyCSRegSet(AvailOut[MBB]) << "\n";
+ }
+}
+
+void PEI::dumpSets1(MachineBasicBlock* MBB) {
+ if (MBB) {
+ DOUT << getBasicBlockName(MBB) << " | "
+ << stringifyCSRegSet(CSRUsed[MBB]) << " | "
+ << stringifyCSRegSet(AnticIn[MBB]) << " | "
+ << stringifyCSRegSet(AnticOut[MBB]) << " | "
+ << stringifyCSRegSet(AvailIn[MBB]) << " | "
+ << stringifyCSRegSet(AvailOut[MBB]) << " | "
+ << stringifyCSRegSet(CSRSave[MBB]) << " | "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+ }
+}
+
+void PEI::dumpAllSets() {
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ dumpSets1(MBB);
+ }
+}
+
+void PEI::dumpSRSets() {
+ for (MachineFunction::iterator MBB = MF->begin(), E = MF->end();
+ MBB != E; ++MBB) {
+ if (! CSRSave[MBB].empty()) {
+ DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRSave[MBB]);
+ if (CSRRestore[MBB].empty())
+ DOUT << "\n";
+ }
+ if (! CSRRestore[MBB].empty()) {
+ if (! CSRSave[MBB].empty())
+ DOUT << " ";
+ DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+ }
+ }
+}
+#endif
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
new file mode 100644
index 0000000..2bc234f
--- /dev/null
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -0,0 +1,2827 @@
+//===-- SimpleRegisterCoalescing.cpp - Register Coalescing ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple register coalescing pass that attempts to
+// aggressively coalesce every register copy that it can.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regcoalescing"
+#include "SimpleRegisterCoalescing.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Value.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+STATISTIC(numJoins , "Number of interval joins performed");
+STATISTIC(numCrossRCs , "Number of cross class joins performed");
+STATISTIC(numCommutes , "Number of instruction commuting performed");
+STATISTIC(numExtends , "Number of copies extended");
+STATISTIC(NumReMats , "Number of instructions re-materialized");
+STATISTIC(numPeep , "Number of identity moves eliminated after coalescing");
+STATISTIC(numAborts , "Number of times interval joining aborted");
+STATISTIC(numDeadValNo, "Number of valno def marked dead");
+
+char SimpleRegisterCoalescing::ID = 0;
+static cl::opt<bool>
+EnableJoining("join-liveintervals",
+ cl::desc("Coalesce copies (default=true)"),
+ cl::init(true));
+
+static cl::opt<bool>
+NewHeuristic("new-coalescer-heuristic",
+ cl::desc("Use new coalescer heuristic"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+CrossClassJoin("join-cross-class-copies",
+ cl::desc("Coalesce cross register class copies"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+PhysJoinTweak("tweak-phys-join-heuristics",
+ cl::desc("Tweak heuristics for joining phys reg with vr"),
+ cl::init(false), cl::Hidden);
+
+static RegisterPass<SimpleRegisterCoalescing>
+X("simple-register-coalescing", "Simple Register Coalescing");
+
+// Declare that we implement the RegisterCoalescer interface
+static RegisterAnalysisGroup<RegisterCoalescer, true/*The Default*/> V(X);
+
+const PassInfo *const llvm::SimpleRegisterCoalescingID = &X;
+
+void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ if (StrongPHIElim)
+ AU.addPreservedID(StrongPHIEliminationID);
+ else
+ AU.addPreservedID(PHIEliminationID);
+ AU.addPreservedID(TwoAddressInstructionPassID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA
+/// being the source and IntB being the dest, thus this defines a value number
+/// in IntB. If the source value number (in IntA) is defined by a copy from B,
+/// see if we can merge these two pieces of B into a single value number,
+/// eliminating a copy. For example:
+///
+/// A3 = B0
+/// ...
+/// B1 = A3 <- this copy
+///
+/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1
+/// value number to be replaced with B0 (which simplifies the B liveinterval).
+///
+/// This returns true if an interval was modified.
+///
+bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
+ LiveInterval &IntB,
+ MachineInstr *CopyMI) {
+ unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+
+ // BValNo is a value number in B that is defined by a copy from A. 'B3' in
+ // the example above.
+ LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
+ assert(BLR != IntB.end() && "Live range not found!");
+ VNInfo *BValNo = BLR->valno;
+
+ // Get the location that B is defined at. Two options: either this value has
+ // an unknown definition point or it is defined at CopyIdx. If unknown, we
+ // can't process it.
+ if (!BValNo->copy) return false;
+ assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+
+ // AValNo is the value number in A that defines the copy, A3 in the example.
+ LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyIdx-1);
+ assert(ALR != IntA.end() && "Live range not found!");
+ VNInfo *AValNo = ALR->valno;
+ // If it's re-defined by an early clobber somewhere in the live range, then
+ // it's not safe to eliminate the copy. FIXME: This is a temporary workaround.
+ // See PR3149:
+ // 172 %ECX<def> = MOV32rr %reg1039<kill>
+ // 180 INLINEASM <es:subl $5,$1
+ // sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9, %EAX<kill>,
+ // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0
+ // 188 %EAX<def> = MOV32rr %EAX<kill>
+ // 196 %ECX<def> = MOV32rr %ECX<kill>
+ // 204 %ECX<def> = MOV32rr %ECX<kill>
+ // 212 %EAX<def> = MOV32rr %EAX<kill>
+ // 220 %EAX<def> = MOV32rr %EAX
+ // 228 %reg1039<def> = MOV32rr %ECX<kill>
+ // The early clobber operand ties ECX input to the ECX def.
+ //
+ // The live interval of ECX is represented as this:
+ // %reg20,inf = [46,47:1)[174,230:0) 0@174-(230) 1@46-(47)
+ // The coalescer has no idea there was a def in the middle of [174,230].
+ if (AValNo->redefByEC)
+ return false;
+
+ // If AValNo is defined as a copy from IntB, we can potentially process this.
+ // Get the instruction that defines this value number.
+ unsigned SrcReg = li_->getVNInfoSourceReg(AValNo);
+ if (!SrcReg) return false; // Not defined by a copy.
+
+ // If the value number is not defined by a copy instruction, ignore it.
+
+ // If the source register comes from an interval other than IntB, we can't
+ // handle this.
+ if (SrcReg != IntB.reg) return false;
+
+ // Get the LiveRange in IntB that this value number starts with.
+ LiveInterval::iterator ValLR = IntB.FindLiveRangeContaining(AValNo->def-1);
+ assert(ValLR != IntB.end() && "Live range not found!");
+
+ // Make sure that the end of the live range is inside the same block as
+ // CopyMI.
+ MachineInstr *ValLREndInst = li_->getInstructionFromIndex(ValLR->end-1);
+ if (!ValLREndInst ||
+ ValLREndInst->getParent() != CopyMI->getParent()) return false;
+
+ // Okay, we now know that ValLR ends in the same block that the CopyMI
+ // live-range starts. If there are no intervening live ranges between them in
+ // IntB, we can merge them.
+ if (ValLR+1 != BLR) return false;
+
+ // If a live interval is a physical register, conservatively check if any
+ // of its sub-registers is overlapping the live interval of the virtual
+ // register. If so, do not coalesce.
+ if (TargetRegisterInfo::isPhysicalRegister(IntB.reg) &&
+ *tri_->getSubRegisters(IntB.reg)) {
+ for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR)
+ if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) {
+ DOUT << "Interfere with sub-register ";
+ DEBUG(li_->getInterval(*SR).print(DOUT, tri_));
+ return false;
+ }
+ }
+
+ DOUT << "\nExtending: "; IntB.print(DOUT, tri_);
+
+ unsigned FillerStart = ValLR->end, FillerEnd = BLR->start;
+ // We are about to delete CopyMI, so need to remove it as the 'instruction
+ // that defines this value #'. Update the the valnum with the new defining
+ // instruction #.
+ BValNo->def = FillerStart;
+ BValNo->copy = NULL;
+
+ // Okay, we can merge them. We need to insert a new liverange:
+ // [ValLR.end, BLR.begin) of either value number, then we merge the
+ // two value numbers.
+ IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
+
+ // If the IntB live range is assigned to a physical register, and if that
+ // physreg has sub-registers, update their live intervals as well.
+ if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
+ for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
+ LiveInterval &SRLI = li_->getInterval(*SR);
+ SRLI.addRange(LiveRange(FillerStart, FillerEnd,
+ SRLI.getNextValue(FillerStart, 0, li_->getVNInfoAllocator())));
+ }
+ }
+
+ // Okay, merge "B1" into the same value number as "B0".
+ if (BValNo != ValLR->valno) {
+ IntB.addKills(ValLR->valno, BValNo->kills);
+ IntB.MergeValueNumberInto(BValNo, ValLR->valno);
+ }
+ DOUT << " result = "; IntB.print(DOUT, tri_);
+ DOUT << "\n";
+
+ // If the source instruction was killing the source register before the
+ // merge, unset the isKill marker given the live range has been extended.
+ int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true);
+ if (UIdx != -1) {
+ ValLREndInst->getOperand(UIdx).setIsKill(false);
+ IntB.removeKill(ValLR->valno, FillerStart);
+ }
+
+ ++numExtends;
+ return true;
+}
+
+/// HasOtherReachingDefs - Return true if there are definitions of IntB
+/// other than BValNo val# that can reach uses of AValno val# of IntA.
+bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA,
+ LiveInterval &IntB,
+ VNInfo *AValNo,
+ VNInfo *BValNo) {
+ for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
+ AI != AE; ++AI) {
+ if (AI->valno != AValNo) continue;
+ LiveInterval::Ranges::iterator BI =
+ std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start);
+ if (BI != IntB.ranges.begin())
+ --BI;
+ for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) {
+ if (BI->valno == BValNo)
+ continue;
+ if (BI->start <= AI->start && BI->end > AI->start)
+ return true;
+ if (BI->start > AI->start && BI->start < AI->end)
+ return true;
+ }
+ }
+ return false;
+}
+
+/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with IntA
+/// being the source and IntB being the dest, thus this defines a value number
+/// in IntB. If the source value number (in IntA) is defined by a commutable
+/// instruction and its other operand is coalesced to the copy dest register,
+/// see if we can transform the copy into a noop by commuting the definition. For
+/// example,
+///
+/// A3 = op A2 B0<kill>
+/// ...
+/// B1 = A3 <- this copy
+/// ...
+/// = op A3 <- more uses
+///
+/// ==>
+///
+/// B2 = op B0 A2<kill>
+/// ...
+/// B1 = B2 <- now an identify copy
+/// ...
+/// = op B2 <- more uses
+///
+/// This returns true if an interval was modified.
+///
+bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
+ LiveInterval &IntB,
+ MachineInstr *CopyMI) {
+ unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+
+ // FIXME: For now, only eliminate the copy by commuting its def when the
+ // source register is a virtual register. We want to guard against cases
+ // where the copy is a back edge copy and commuting the def lengthen the
+ // live interval of the source register to the entire loop.
+ if (TargetRegisterInfo::isPhysicalRegister(IntA.reg))
+ return false;
+
+ // BValNo is a value number in B that is defined by a copy from A. 'B3' in
+ // the example above.
+ LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
+ assert(BLR != IntB.end() && "Live range not found!");
+ VNInfo *BValNo = BLR->valno;
+
+ // Get the location that B is defined at. Two options: either this value has
+ // an unknown definition point or it is defined at CopyIdx. If unknown, we
+ // can't process it.
+ if (!BValNo->copy) return false;
+ assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+
+ // AValNo is the value number in A that defines the copy, A3 in the example.
+ LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyIdx-1);
+ assert(ALR != IntA.end() && "Live range not found!");
+ VNInfo *AValNo = ALR->valno;
+ // If other defs can reach uses of this def, then it's not safe to perform
+ // the optimization.
+ if (AValNo->def == ~0U || AValNo->def == ~1U || AValNo->hasPHIKill)
+ return false;
+ MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def);
+ const TargetInstrDesc &TID = DefMI->getDesc();
+ unsigned NewDstIdx;
+ if (!TID.isCommutable() ||
+ !tii_->CommuteChangesDestination(DefMI, NewDstIdx))
+ return false;
+
+ MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
+ unsigned NewReg = NewDstMO.getReg();
+ if (NewReg != IntB.reg || !NewDstMO.isKill())
+ return false;
+
+ // Make sure there are no other definitions of IntB that would reach the
+ // uses which the new definition can reach.
+ if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
+ return false;
+
+ // If some of the uses of IntA.reg is already coalesced away, return false.
+ // It's not possible to determine whether it's safe to perform the coalescing.
+ for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
+ UE = mri_->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ unsigned UseIdx = li_->getInstructionIndex(UseMI);
+ LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
+ if (ULR == IntA.end())
+ continue;
+ if (ULR->valno == AValNo && JoinedCopies.count(UseMI))
+ return false;
+ }
+
+ // At this point we have decided that it is legal to do this
+ // transformation. Start by commuting the instruction.
+ MachineBasicBlock *MBB = DefMI->getParent();
+ MachineInstr *NewMI = tii_->commuteInstruction(DefMI);
+ if (!NewMI)
+ return false;
+ if (NewMI != DefMI) {
+ li_->ReplaceMachineInstrInMaps(DefMI, NewMI);
+ MBB->insert(DefMI, NewMI);
+ MBB->erase(DefMI);
+ }
+ unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
+ NewMI->getOperand(OpIdx).setIsKill();
+
+ bool BHasPHIKill = BValNo->hasPHIKill;
+ SmallVector<VNInfo*, 4> BDeadValNos;
+ SmallVector<unsigned, 4> BKills;
+ std::map<unsigned, unsigned> BExtend;
+
+ // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
+ // A = or A, B
+ // ...
+ // B = A
+ // ...
+ // C = A<kill>
+ // ...
+ // = B
+ //
+ // then do not add kills of A to the newly created B interval.
+ bool Extended = BLR->end > ALR->end && ALR->end != ALR->start;
+ if (Extended)
+ BExtend[ALR->end] = BLR->end;
+
+ // Update uses of IntA of the specific Val# with IntB.
+ bool BHasSubRegs = false;
+ if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
+ BHasSubRegs = *tri_->getSubRegisters(IntB.reg);
+ for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
+ UE = mri_->use_end(); UI != UE;) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = &*UI;
+ ++UI;
+ if (JoinedCopies.count(UseMI))
+ continue;
+ unsigned UseIdx = li_->getInstructionIndex(UseMI);
+ LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
+ if (ULR == IntA.end() || ULR->valno != AValNo)
+ continue;
+ UseMO.setReg(NewReg);
+ if (UseMI == CopyMI)
+ continue;
+ if (UseMO.isKill()) {
+ if (Extended)
+ UseMO.setIsKill(false);
+ else
+ BKills.push_back(li_->getUseIndex(UseIdx)+1);
+ }
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
+ continue;
+ if (DstReg == IntB.reg) {
+ // This copy will become a noop. If it's defining a new val#,
+ // remove that val# as well. However this live range is being
+ // extended to the end of the existing live range defined by the copy.
+ unsigned DefIdx = li_->getDefIndex(UseIdx);
+ const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx);
+ BHasPHIKill |= DLR->valno->hasPHIKill;
+ assert(DLR->valno->def == DefIdx);
+ BDeadValNos.push_back(DLR->valno);
+ BExtend[DLR->start] = DLR->end;
+ JoinedCopies.insert(UseMI);
+ // If this is a kill but it's going to be removed, the last use
+ // of the same val# is the new kill.
+ if (UseMO.isKill())
+ BKills.pop_back();
+ }
+ }
+
+ // We need to insert a new liverange: [ALR.start, LastUse). It may be we can
+ // simply extend BLR if CopyMI doesn't end the range.
+ DOUT << "\nExtending: "; IntB.print(DOUT, tri_);
+
+ // Remove val#'s defined by copies that will be coalesced away.
+ for (unsigned i = 0, e = BDeadValNos.size(); i != e; ++i) {
+ VNInfo *DeadVNI = BDeadValNos[i];
+ if (BHasSubRegs) {
+ for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
+ LiveInterval &SRLI = li_->getInterval(*SR);
+ const LiveRange *SRLR = SRLI.getLiveRangeContaining(DeadVNI->def);
+ SRLI.removeValNo(SRLR->valno);
+ }
+ }
+ IntB.removeValNo(BDeadValNos[i]);
+ }
+
+ // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
+ // is updated. Kills are also updated.
+ VNInfo *ValNo = BValNo;
+ ValNo->def = AValNo->def;
+ ValNo->copy = NULL;
+ for (unsigned j = 0, ee = ValNo->kills.size(); j != ee; ++j) {
+ unsigned Kill = ValNo->kills[j];
+ if (Kill != BLR->end)
+ BKills.push_back(Kill);
+ }
+ ValNo->kills.clear();
+ for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
+ AI != AE; ++AI) {
+ if (AI->valno != AValNo) continue;
+ unsigned End = AI->end;
+ std::map<unsigned, unsigned>::iterator EI = BExtend.find(End);
+ if (EI != BExtend.end())
+ End = EI->second;
+ IntB.addRange(LiveRange(AI->start, End, ValNo));
+
+ // If the IntB live range is assigned to a physical register, and if that
+ // physreg has sub-registers, update their live intervals as well.
+ if (BHasSubRegs) {
+ for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
+ LiveInterval &SRLI = li_->getInterval(*SR);
+ SRLI.MergeInClobberRange(AI->start, End, li_->getVNInfoAllocator());
+ }
+ }
+ }
+ IntB.addKills(ValNo, BKills);
+ ValNo->hasPHIKill = BHasPHIKill;
+
+ DOUT << " result = "; IntB.print(DOUT, tri_);
+ DOUT << "\n";
+
+ DOUT << "\nShortening: "; IntA.print(DOUT, tri_);
+ IntA.removeValNo(AValNo);
+ DOUT << " result = "; IntA.print(DOUT, tri_);
+ DOUT << "\n";
+
+ ++numCommutes;
+ return true;
+}
+
+/// isSameOrFallThroughBB - Return true if MBB == SuccMBB or MBB simply
+/// fallthoughs to SuccMBB.
+static bool isSameOrFallThroughBB(MachineBasicBlock *MBB,
+ MachineBasicBlock *SuccMBB,
+ const TargetInstrInfo *tii_) {
+ if (MBB == SuccMBB)
+ return true;
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ return !tii_->AnalyzeBranch(*MBB, TBB, FBB, Cond) && !TBB && !FBB &&
+ MBB->isSuccessor(SuccMBB);
+}
+
+/// removeRange - Wrapper for LiveInterval::removeRange. This removes a range
+/// from a physical register live interval as well as from the live intervals
+/// of its sub-registers.
+static void removeRange(LiveInterval &li, unsigned Start, unsigned End,
+ LiveIntervals *li_, const TargetRegisterInfo *tri_) {
+ li.removeRange(Start, End, true);
+ if (TargetRegisterInfo::isPhysicalRegister(li.reg)) {
+ for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) {
+ if (!li_->hasInterval(*SR))
+ continue;
+ LiveInterval &sli = li_->getInterval(*SR);
+ unsigned RemoveEnd = Start;
+ while (RemoveEnd != End) {
+ LiveInterval::iterator LR = sli.FindLiveRangeContaining(Start);
+ if (LR == sli.end())
+ break;
+ RemoveEnd = (LR->end < End) ? LR->end : End;
+ sli.removeRange(Start, RemoveEnd, true);
+ Start = RemoveEnd;
+ }
+ }
+ }
+}
+
+/// TrimLiveIntervalToLastUse - If there is a last use in the same basic block
+/// as the copy instruction, trim the live interval to the last use and return
+/// true.
+bool
+SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(unsigned CopyIdx,
+ MachineBasicBlock *CopyMBB,
+ LiveInterval &li,
+ const LiveRange *LR) {
+ unsigned MBBStart = li_->getMBBStartIdx(CopyMBB);
+ unsigned LastUseIdx;
+ MachineOperand *LastUse = lastRegisterUse(LR->start, CopyIdx-1, li.reg,
+ LastUseIdx);
+ if (LastUse) {
+ MachineInstr *LastUseMI = LastUse->getParent();
+ if (!isSameOrFallThroughBB(LastUseMI->getParent(), CopyMBB, tii_)) {
+ // r1024 = op
+ // ...
+ // BB1:
+ // = r1024
+ //
+ // BB2:
+ // r1025<dead> = r1024<kill>
+ if (MBBStart < LR->end)
+ removeRange(li, MBBStart, LR->end, li_, tri_);
+ return true;
+ }
+
+ // There are uses before the copy, just shorten the live range to the end
+ // of last use.
+ LastUse->setIsKill();
+ removeRange(li, li_->getDefIndex(LastUseIdx), LR->end, li_, tri_);
+ li.addKill(LR->valno, LastUseIdx+1);
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (tii_->isMoveInstr(*LastUseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
+ DstReg == li.reg) {
+ // Last use is itself an identity code.
+ int DeadIdx = LastUseMI->findRegisterDefOperandIdx(li.reg, false, tri_);
+ LastUseMI->getOperand(DeadIdx).setIsDead();
+ }
+ return true;
+ }
+
+ // Is it livein?
+ if (LR->start <= MBBStart && LR->end > MBBStart) {
+ if (LR->start == 0) {
+ assert(TargetRegisterInfo::isPhysicalRegister(li.reg));
+ // Live-in to the function but dead. Remove it from entry live-in set.
+ mf_->begin()->removeLiveIn(li.reg);
+ }
+ // FIXME: Shorten intervals in BBs that reaches this BB.
+ }
+
+ return false;
+}
+
+/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
+/// computation, replace the copy by rematerialize the definition.
+bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
+ unsigned DstReg,
+ MachineInstr *CopyMI) {
+ unsigned CopyIdx = li_->getUseIndex(li_->getInstructionIndex(CopyMI));
+ LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
+ assert(SrcLR != SrcInt.end() && "Live range not found!");
+ VNInfo *ValNo = SrcLR->valno;
+ // If other defs can reach uses of this def, then it's not safe to perform
+ // the optimization.
+ if (ValNo->def == ~0U || ValNo->def == ~1U || ValNo->hasPHIKill)
+ return false;
+ MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def);
+ const TargetInstrDesc &TID = DefMI->getDesc();
+ if (!TID.isAsCheapAsAMove())
+ return false;
+ if (!DefMI->getDesc().isRematerializable() ||
+ !tii_->isTriviallyReMaterializable(DefMI))
+ return false;
+ bool SawStore = false;
+ if (!DefMI->isSafeToMove(tii_, SawStore))
+ return false;
+
+ unsigned DefIdx = li_->getDefIndex(CopyIdx);
+ const LiveRange *DLR= li_->getInterval(DstReg).getLiveRangeContaining(DefIdx);
+ DLR->valno->copy = NULL;
+ // Don't forget to update sub-register intervals.
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
+ for (const unsigned* SR = tri_->getSubRegisters(DstReg); *SR; ++SR) {
+ if (!li_->hasInterval(*SR))
+ continue;
+ DLR = li_->getInterval(*SR).getLiveRangeContaining(DefIdx);
+ if (DLR && DLR->valno->copy == CopyMI)
+ DLR->valno->copy = NULL;
+ }
+ }
+
+ // If copy kills the source register, find the last use and propagate
+ // kill.
+ bool checkForDeadDef = false;
+ MachineBasicBlock *MBB = CopyMI->getParent();
+ if (CopyMI->killsRegister(SrcInt.reg))
+ if (!TrimLiveIntervalToLastUse(CopyIdx, MBB, SrcInt, SrcLR)) {
+ checkForDeadDef = true;
+ }
+
+ MachineBasicBlock::iterator MII = next(MachineBasicBlock::iterator(CopyMI));
+ CopyMI->removeFromParent();
+ tii_->reMaterialize(*MBB, MII, DstReg, DefMI);
+ MachineInstr *NewMI = prior(MII);
+
+ if (checkForDeadDef) {
+ // PR4090 fix: Trim interval failed because there was no use of the
+ // source interval in this MBB. If the def is in this MBB too then we
+ // should mark it dead:
+ if (DefMI->getParent() == MBB) {
+ DefMI->addRegisterDead(SrcInt.reg, tri_);
+ SrcLR->end = SrcLR->start + 1;
+ }
+
+ }
+
+ // CopyMI may have implicit operands, transfer them over to the newly
+ // rematerialized instruction. And update implicit def interval valnos.
+ for (unsigned i = CopyMI->getDesc().getNumOperands(),
+ e = CopyMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = CopyMI->getOperand(i);
+ if (MO.isReg() && MO.isImplicit())
+ NewMI->addOperand(MO);
+ if (MO.isDef() && li_->hasInterval(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+ DLR = li_->getInterval(Reg).getLiveRangeContaining(DefIdx);
+ if (DLR && DLR->valno->copy == CopyMI)
+ DLR->valno->copy = NULL;
+ }
+ }
+
+ li_->ReplaceMachineInstrInMaps(CopyMI, NewMI);
+ MBB->getParent()->DeleteMachineInstr(CopyMI);
+ ReMatCopies.insert(CopyMI);
+ ReMatDefs.insert(DefMI);
+ ++NumReMats;
+ return true;
+}
+
+/// isBackEdgeCopy - Returns true if CopyMI is a back edge copy.
+///
+bool SimpleRegisterCoalescing::isBackEdgeCopy(MachineInstr *CopyMI,
+ unsigned DstReg) const {
+ MachineBasicBlock *MBB = CopyMI->getParent();
+ const MachineLoop *L = loopInfo->getLoopFor(MBB);
+ if (!L)
+ return false;
+ if (MBB != L->getLoopLatch())
+ return false;
+
+ LiveInterval &LI = li_->getInterval(DstReg);
+ unsigned DefIdx = li_->getInstructionIndex(CopyMI);
+ LiveInterval::const_iterator DstLR =
+ LI.FindLiveRangeContaining(li_->getDefIndex(DefIdx));
+ if (DstLR == LI.end())
+ return false;
+ unsigned KillIdx = li_->getMBBEndIdx(MBB) + 1;
+ if (DstLR->valno->kills.size() == 1 &&
+ DstLR->valno->kills[0] == KillIdx && DstLR->valno->hasPHIKill)
+ return true;
+ return false;
+}
+
+/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+/// update the subregister number if it is not zero. If DstReg is a
+/// physical register and the existing subregister number of the def / use
+/// being updated is not zero, make sure to set it to the correct physical
+/// subregister.
+void
+SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
+ unsigned SubIdx) {
+ bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ if (DstIsPhys && SubIdx) {
+ // Figure out the real physical register we are updating with.
+ DstReg = tri_->getSubReg(DstReg, SubIdx);
+ SubIdx = 0;
+ }
+
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg),
+ E = mri_->reg_end(); I != E; ) {
+ MachineOperand &O = I.getOperand();
+ MachineInstr *UseMI = &*I;
+ ++I;
+ unsigned OldSubIdx = O.getSubReg();
+ if (DstIsPhys) {
+ unsigned UseDstReg = DstReg;
+ if (OldSubIdx)
+ UseDstReg = tri_->getSubReg(DstReg, OldSubIdx);
+
+ unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx;
+ if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg,
+ CopySrcSubIdx, CopyDstSubIdx) &&
+ CopySrcReg != CopyDstReg &&
+ CopySrcReg == SrcReg && CopyDstReg != UseDstReg) {
+ // If the use is a copy and it won't be coalesced away, and its source
+ // is defined by a trivial computation, try to rematerialize it instead.
+ if (ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg,UseMI))
+ continue;
+ }
+
+ O.setReg(UseDstReg);
+ O.setSubReg(0);
+ continue;
+ }
+
+ // Sub-register indexes goes from small to large. e.g.
+ // RAX: 1 -> AL, 2 -> AX, 3 -> EAX
+ // EAX: 1 -> AL, 2 -> AX
+ // So RAX's sub-register 2 is AX, RAX's sub-regsiter 3 is EAX, whose
+ // sub-register 2 is also AX.
+ if (SubIdx && OldSubIdx && SubIdx != OldSubIdx)
+ assert(OldSubIdx < SubIdx && "Conflicting sub-register index!");
+ else if (SubIdx)
+ O.setSubReg(SubIdx);
+ // Remove would-be duplicated kill marker.
+ if (O.isKill() && UseMI->killsRegister(DstReg))
+ O.setIsKill(false);
+ O.setReg(DstReg);
+
+ // After updating the operand, check if the machine instruction has
+ // become a copy. If so, update its val# information.
+ const TargetInstrDesc &TID = UseMI->getDesc();
+ unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx;
+ if (TID.getNumDefs() == 1 && TID.getNumOperands() > 2 &&
+ tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg,
+ CopySrcSubIdx, CopyDstSubIdx) &&
+ CopySrcReg != CopyDstReg &&
+ (TargetRegisterInfo::isVirtualRegister(CopyDstReg) ||
+ allocatableRegs_[CopyDstReg])) {
+ LiveInterval &LI = li_->getInterval(CopyDstReg);
+ unsigned DefIdx = li_->getDefIndex(li_->getInstructionIndex(UseMI));
+ const LiveRange *DLR = LI.getLiveRangeContaining(DefIdx);
+ if (DLR->valno->def == DefIdx)
+ DLR->valno->copy = UseMI;
+ }
+ }
+}
+
+/// RemoveDeadImpDef - Remove implicit_def instructions which are "re-defining"
+/// registers due to insert_subreg coalescing. e.g.
+/// r1024 = op
+/// r1025 = implicit_def
+/// r1025 = insert_subreg r1025, r1024
+/// = op r1025
+/// =>
+/// r1025 = op
+/// r1025 = implicit_def
+/// r1025 = insert_subreg r1025, r1025
+/// = op r1025
+void
+SimpleRegisterCoalescing::RemoveDeadImpDef(unsigned Reg, LiveInterval &LI) {
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg),
+ E = mri_->reg_end(); I != E; ) {
+ MachineOperand &O = I.getOperand();
+ MachineInstr *DefMI = &*I;
+ ++I;
+ if (!O.isDef())
+ continue;
+ if (DefMI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF)
+ continue;
+ if (!LI.liveBeforeAndAt(li_->getInstructionIndex(DefMI)))
+ continue;
+ li_->RemoveMachineInstrFromMaps(DefMI);
+ DefMI->eraseFromParent();
+ }
+}
+
+/// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate
+/// due to live range lengthening as the result of coalescing.
+void SimpleRegisterCoalescing::RemoveUnnecessaryKills(unsigned Reg,
+ LiveInterval &LI) {
+ for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg),
+ UE = mri_->use_end(); UI != UE; ++UI) {
+ MachineOperand &UseMO = UI.getOperand();
+ if (UseMO.isKill()) {
+ MachineInstr *UseMI = UseMO.getParent();
+ unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(UseMI));
+ const LiveRange *UI = LI.getLiveRangeContaining(UseIdx);
+ if (!UI || !LI.isKill(UI->valno, UseIdx+1))
+ UseMO.setIsKill(false);
+ }
+ }
+}
+
+/// removeIntervalIfEmpty - Check if the live interval of a physical register
+/// is empty, if so remove it and also remove the empty intervals of its
+/// sub-registers. Return true if live interval is removed.
+static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_,
+ const TargetRegisterInfo *tri_) {
+ if (li.empty()) {
+ if (TargetRegisterInfo::isPhysicalRegister(li.reg))
+ for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) {
+ if (!li_->hasInterval(*SR))
+ continue;
+ LiveInterval &sli = li_->getInterval(*SR);
+ if (sli.empty())
+ li_->removeInterval(*SR);
+ }
+ li_->removeInterval(li.reg);
+ return true;
+ }
+ return false;
+}
+
+/// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy.
+/// Return true if live interval is removed.
+bool SimpleRegisterCoalescing::ShortenDeadCopyLiveRange(LiveInterval &li,
+ MachineInstr *CopyMI) {
+ unsigned CopyIdx = li_->getInstructionIndex(CopyMI);
+ LiveInterval::iterator MLR =
+ li.FindLiveRangeContaining(li_->getDefIndex(CopyIdx));
+ if (MLR == li.end())
+ return false; // Already removed by ShortenDeadCopySrcLiveRange.
+ unsigned RemoveStart = MLR->start;
+ unsigned RemoveEnd = MLR->end;
+ // Remove the liverange that's defined by this.
+ if (RemoveEnd == li_->getDefIndex(CopyIdx)+1) {
+ removeRange(li, RemoveStart, RemoveEnd, li_, tri_);
+ return removeIntervalIfEmpty(li, li_, tri_);
+ }
+ return false;
+}
+
+/// RemoveDeadDef - If a def of a live interval is now determined dead, remove
+/// the val# it defines. If the live interval becomes empty, remove it as well.
+bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li,
+ MachineInstr *DefMI) {
+ unsigned DefIdx = li_->getDefIndex(li_->getInstructionIndex(DefMI));
+ LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx);
+ if (DefIdx != MLR->valno->def)
+ return false;
+ li.removeValNo(MLR->valno);
+ return removeIntervalIfEmpty(li, li_, tri_);
+}
+
+/// PropagateDeadness - Propagate the dead marker to the instruction which
+/// defines the val#.
+static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI,
+ unsigned &LRStart, LiveIntervals *li_,
+ const TargetRegisterInfo* tri_) {
+ MachineInstr *DefMI =
+ li_->getInstructionFromIndex(li_->getDefIndex(LRStart));
+ if (DefMI && DefMI != CopyMI) {
+ int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg, false, tri_);
+ if (DeadIdx != -1) {
+ DefMI->getOperand(DeadIdx).setIsDead();
+ // A dead def should have a single cycle interval.
+ ++LRStart;
+ }
+ }
+}
+
+/// ShortenDeadCopySrcLiveRange - Shorten a live range as it's artificially
+/// extended by a dead copy. Mark the last use (if any) of the val# as kill as
+/// ends the live range there. If there isn't another use, then this live range
+/// is dead. Return true if live interval is removed.
+bool
+SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li,
+ MachineInstr *CopyMI) {
+ unsigned CopyIdx = li_->getInstructionIndex(CopyMI);
+ if (CopyIdx == 0) {
+ // FIXME: special case: function live in. It can be a general case if the
+ // first instruction index starts at > 0 value.
+ assert(TargetRegisterInfo::isPhysicalRegister(li.reg));
+ // Live-in to the function but dead. Remove it from entry live-in set.
+ if (mf_->begin()->isLiveIn(li.reg))
+ mf_->begin()->removeLiveIn(li.reg);
+ const LiveRange *LR = li.getLiveRangeContaining(CopyIdx);
+ removeRange(li, LR->start, LR->end, li_, tri_);
+ return removeIntervalIfEmpty(li, li_, tri_);
+ }
+
+ LiveInterval::iterator LR = li.FindLiveRangeContaining(CopyIdx-1);
+ if (LR == li.end())
+ // Livein but defined by a phi.
+ return false;
+
+ unsigned RemoveStart = LR->start;
+ unsigned RemoveEnd = li_->getDefIndex(CopyIdx)+1;
+ if (LR->end > RemoveEnd)
+ // More uses past this copy? Nothing to do.
+ return false;
+
+ // If there is a last use in the same bb, we can't remove the live range.
+ // Shorten the live interval and return.
+ MachineBasicBlock *CopyMBB = CopyMI->getParent();
+ if (TrimLiveIntervalToLastUse(CopyIdx, CopyMBB, li, LR))
+ return false;
+
+ MachineBasicBlock *StartMBB = li_->getMBBFromIndex(RemoveStart);
+ if (!isSameOrFallThroughBB(StartMBB, CopyMBB, tii_))
+ // If the live range starts in another mbb and the copy mbb is not a fall
+ // through mbb, then we can only cut the range from the beginning of the
+ // copy mbb.
+ RemoveStart = li_->getMBBStartIdx(CopyMBB) + 1;
+
+ if (LR->valno->def == RemoveStart) {
+ // If the def MI defines the val# and this copy is the only kill of the
+ // val#, then propagate the dead marker.
+ if (li.isOnlyLROfValNo(LR)) {
+ PropagateDeadness(li, CopyMI, RemoveStart, li_, tri_);
+ ++numDeadValNo;
+ }
+ if (li.isKill(LR->valno, RemoveEnd))
+ li.removeKill(LR->valno, RemoveEnd);
+ }
+
+ removeRange(li, RemoveStart, RemoveEnd, li_, tri_);
+ return removeIntervalIfEmpty(li, li_, tri_);
+}
+
+/// CanCoalesceWithImpDef - Returns true if the specified copy instruction
+/// from an implicit def to another register can be coalesced away.
+bool SimpleRegisterCoalescing::CanCoalesceWithImpDef(MachineInstr *CopyMI,
+ LiveInterval &li,
+ LiveInterval &ImpLi) const{
+ if (!CopyMI->killsRegister(ImpLi.reg))
+ return false;
+ unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+ LiveInterval::iterator LR = li.FindLiveRangeContaining(CopyIdx);
+ if (LR == li.end())
+ return false;
+ if (LR->valno->hasPHIKill)
+ return false;
+ if (LR->valno->def != CopyIdx)
+ return false;
+ // Make sure all of val# uses are copies.
+ for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(li.reg),
+ UE = mri_->use_end(); UI != UE;) {
+ MachineInstr *UseMI = &*UI;
+ ++UI;
+ if (JoinedCopies.count(UseMI))
+ continue;
+ unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(UseMI));
+ LiveInterval::iterator ULR = li.FindLiveRangeContaining(UseIdx);
+ if (ULR == li.end() || ULR->valno != LR->valno)
+ continue;
+ // If the use is not a use, then it's not safe to coalesce the move.
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
+ if (UseMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG &&
+ UseMI->getOperand(1).getReg() == li.reg)
+ continue;
+ return false;
+ }
+ }
+ return true;
+}
+
+
+/// RemoveCopiesFromValNo - The specified value# is defined by an implicit
+/// def and it is being removed. Turn all copies from this value# into
+/// identity copies so they will be removed.
+void SimpleRegisterCoalescing::RemoveCopiesFromValNo(LiveInterval &li,
+ VNInfo *VNI) {
+ SmallVector<MachineInstr*, 4> ImpDefs;
+ MachineOperand *LastUse = NULL;
+ unsigned LastUseIdx = li_->getUseIndex(VNI->def);
+ for (MachineRegisterInfo::reg_iterator RI = mri_->reg_begin(li.reg),
+ RE = mri_->reg_end(); RI != RE;) {
+ MachineOperand *MO = &RI.getOperand();
+ MachineInstr *MI = &*RI;
+ ++RI;
+ if (MO->isDef()) {
+ if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+ ImpDefs.push_back(MI);
+ }
+ continue;
+ }
+ if (JoinedCopies.count(MI))
+ continue;
+ unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(MI));
+ LiveInterval::iterator ULR = li.FindLiveRangeContaining(UseIdx);
+ if (ULR == li.end() || ULR->valno != VNI)
+ continue;
+ // If the use is a copy, turn it into an identity copy.
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
+ SrcReg == li.reg) {
+ // Each use MI may have multiple uses of this register. Change them all.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == li.reg)
+ MO.setReg(DstReg);
+ }
+ JoinedCopies.insert(MI);
+ } else if (UseIdx > LastUseIdx) {
+ LastUseIdx = UseIdx;
+ LastUse = MO;
+ }
+ }
+ if (LastUse) {
+ LastUse->setIsKill();
+ li.addKill(VNI, LastUseIdx+1);
+ } else {
+ // Remove dead implicit_def's.
+ while (!ImpDefs.empty()) {
+ MachineInstr *ImpDef = ImpDefs.back();
+ ImpDefs.pop_back();
+ li_->RemoveMachineInstrFromMaps(ImpDef);
+ ImpDef->eraseFromParent();
+ }
+ }
+}
+
+/// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a
+/// a virtual destination register with physical source register.
+bool
+SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI,
+ MachineBasicBlock *CopyMBB,
+ LiveInterval &DstInt,
+ LiveInterval &SrcInt) {
+ // If the virtual register live interval is long but it has low use desity,
+ // do not join them, instead mark the physical register as its allocation
+ // preference.
+ const TargetRegisterClass *RC = mri_->getRegClass(DstInt.reg);
+ unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
+ unsigned Length = li_->getApproximateInstructionCount(DstInt);
+ if (Length > Threshold &&
+ (((float)std::distance(mri_->use_begin(DstInt.reg),
+ mri_->use_end()) / Length) < (1.0 / Threshold)))
+ return false;
+
+ // If the virtual register live interval extends into a loop, turn down
+ // aggressiveness.
+ unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+ const MachineLoop *L = loopInfo->getLoopFor(CopyMBB);
+ if (!L) {
+ // Let's see if the virtual register live interval extends into the loop.
+ LiveInterval::iterator DLR = DstInt.FindLiveRangeContaining(CopyIdx);
+ assert(DLR != DstInt.end() && "Live range not found!");
+ DLR = DstInt.FindLiveRangeContaining(DLR->end+1);
+ if (DLR != DstInt.end()) {
+ CopyMBB = li_->getMBBFromIndex(DLR->start);
+ L = loopInfo->getLoopFor(CopyMBB);
+ }
+ }
+
+ if (!L || Length <= Threshold)
+ return true;
+
+ unsigned UseIdx = li_->getUseIndex(CopyIdx);
+ LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx);
+ MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start);
+ if (loopInfo->getLoopFor(SMBB) != L) {
+ if (!loopInfo->isLoopHeader(CopyMBB))
+ return false;
+ // If vr's live interval extends pass the loop header, do not join.
+ for (MachineBasicBlock::succ_iterator SI = CopyMBB->succ_begin(),
+ SE = CopyMBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+ if (SuccMBB == CopyMBB)
+ continue;
+ if (DstInt.overlaps(li_->getMBBStartIdx(SuccMBB),
+ li_->getMBBEndIdx(SuccMBB)+1))
+ return false;
+ }
+ }
+ return true;
+}
+
+/// isWinToJoinVRWithDstPhysReg - Return true if it's worth while to join a
+/// copy from a virtual source register to a physical destination register.
+bool
+SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,
+ MachineBasicBlock *CopyMBB,
+ LiveInterval &DstInt,
+ LiveInterval &SrcInt) {
+ // If the virtual register live interval is long but it has low use desity,
+ // do not join them, instead mark the physical register as its allocation
+ // preference.
+ const TargetRegisterClass *RC = mri_->getRegClass(SrcInt.reg);
+ unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
+ unsigned Length = li_->getApproximateInstructionCount(SrcInt);
+ if (Length > Threshold &&
+ (((float)std::distance(mri_->use_begin(SrcInt.reg),
+ mri_->use_end()) / Length) < (1.0 / Threshold)))
+ return false;
+
+ if (SrcInt.empty())
+ // Must be implicit_def.
+ return false;
+
+ // If the virtual register live interval is defined or cross a loop, turn
+ // down aggressiveness.
+ unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+ unsigned UseIdx = li_->getUseIndex(CopyIdx);
+ LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx);
+ assert(SLR != SrcInt.end() && "Live range not found!");
+ SLR = SrcInt.FindLiveRangeContaining(SLR->start-1);
+ if (SLR == SrcInt.end())
+ return true;
+ MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start);
+ const MachineLoop *L = loopInfo->getLoopFor(SMBB);
+
+ if (!L || Length <= Threshold)
+ return true;
+
+ if (loopInfo->getLoopFor(CopyMBB) != L) {
+ if (SMBB != L->getLoopLatch())
+ return false;
+ // If vr's live interval is extended from before the loop latch, do not
+ // join.
+ for (MachineBasicBlock::pred_iterator PI = SMBB->pred_begin(),
+ PE = SMBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredMBB = *PI;
+ if (PredMBB == SMBB)
+ continue;
+ if (SrcInt.overlaps(li_->getMBBStartIdx(PredMBB),
+ li_->getMBBEndIdx(PredMBB)+1))
+ return false;
+ }
+ }
+ return true;
+}
+
+/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
+/// two virtual registers from different register classes.
+bool
+SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned LargeReg,
+ unsigned SmallReg,
+ unsigned Threshold) {
+ // Then make sure the intervals are *short*.
+ LiveInterval &LargeInt = li_->getInterval(LargeReg);
+ LiveInterval &SmallInt = li_->getInterval(SmallReg);
+ unsigned LargeSize = li_->getApproximateInstructionCount(LargeInt);
+ unsigned SmallSize = li_->getApproximateInstructionCount(SmallInt);
+ if (SmallSize > Threshold || LargeSize > Threshold)
+ if ((float)std::distance(mri_->use_begin(SmallReg),
+ mri_->use_end()) / SmallSize <
+ (float)std::distance(mri_->use_begin(LargeReg),
+ mri_->use_end()) / LargeSize)
+ return false;
+ return true;
+}
+
+/// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual
+/// register with a physical register, check if any of the virtual register
+/// operand is a sub-register use or def. If so, make sure it won't result
+/// in an illegal extract_subreg or insert_subreg instruction. e.g.
+/// vr1024 = extract_subreg vr1025, 1
+/// ...
+/// vr1024 = mov8rr AH
+/// If vr1024 is coalesced with AH, the extract_subreg is now illegal since
+/// AH does not have a super-reg whose sub-register 1 is AH.
+bool
+SimpleRegisterCoalescing::HasIncompatibleSubRegDefUse(MachineInstr *CopyMI,
+ unsigned VirtReg,
+ unsigned PhysReg) {
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(VirtReg),
+ E = mri_->reg_end(); I != E; ++I) {
+ MachineOperand &O = I.getOperand();
+ MachineInstr *MI = &*I;
+ if (MI == CopyMI || JoinedCopies.count(MI))
+ continue;
+ unsigned SubIdx = O.getSubReg();
+ if (SubIdx && !tri_->getSubReg(PhysReg, SubIdx))
+ return true;
+ if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
+ SubIdx = MI->getOperand(2).getImm();
+ if (O.isUse() && !tri_->getSubReg(PhysReg, SubIdx))
+ return true;
+ if (O.isDef()) {
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ const TargetRegisterClass *RC =
+ TargetRegisterInfo::isPhysicalRegister(SrcReg)
+ ? tri_->getPhysicalRegisterRegClass(SrcReg)
+ : mri_->getRegClass(SrcReg);
+ if (!tri_->getMatchingSuperReg(PhysReg, SubIdx, RC))
+ return true;
+ }
+ }
+ if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+ MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) {
+ SubIdx = MI->getOperand(3).getImm();
+ if (VirtReg == MI->getOperand(0).getReg()) {
+ if (!tri_->getSubReg(PhysReg, SubIdx))
+ return true;
+ } else {
+ unsigned DstReg = MI->getOperand(0).getReg();
+ const TargetRegisterClass *RC =
+ TargetRegisterInfo::isPhysicalRegister(DstReg)
+ ? tri_->getPhysicalRegisterRegClass(DstReg)
+ : mri_->getRegClass(DstReg);
+ if (!tri_->getMatchingSuperReg(PhysReg, SubIdx, RC))
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
+/// CanJoinExtractSubRegToPhysReg - Return true if it's possible to coalesce
+/// an extract_subreg where dst is a physical register, e.g.
+/// cl = EXTRACT_SUBREG reg1024, 1
+bool
+SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg,
+ unsigned SrcReg, unsigned SubIdx,
+ unsigned &RealDstReg) {
+ const TargetRegisterClass *RC = mri_->getRegClass(SrcReg);
+ RealDstReg = tri_->getMatchingSuperReg(DstReg, SubIdx, RC);
+ assert(RealDstReg && "Invalid extract_subreg instruction!");
+
+ // For this type of EXTRACT_SUBREG, conservatively
+ // check if the live interval of the source register interfere with the
+ // actual super physical register we are trying to coalesce with.
+ LiveInterval &RHS = li_->getInterval(SrcReg);
+ if (li_->hasInterval(RealDstReg) &&
+ RHS.overlaps(li_->getInterval(RealDstReg))) {
+ DOUT << "Interfere with register ";
+ DEBUG(li_->getInterval(RealDstReg).print(DOUT, tri_));
+ return false; // Not coalescable
+ }
+ for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR)
+ if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
+ DOUT << "Interfere with sub-register ";
+ DEBUG(li_->getInterval(*SR).print(DOUT, tri_));
+ return false; // Not coalescable
+ }
+ return true;
+}
+
+/// CanJoinInsertSubRegToPhysReg - Return true if it's possible to coalesce
+/// an insert_subreg where src is a physical register, e.g.
+/// reg1024 = INSERT_SUBREG reg1024, c1, 0
+bool
+SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg,
+ unsigned SrcReg, unsigned SubIdx,
+ unsigned &RealSrcReg) {
+ const TargetRegisterClass *RC = mri_->getRegClass(DstReg);
+ RealSrcReg = tri_->getMatchingSuperReg(SrcReg, SubIdx, RC);
+ assert(RealSrcReg && "Invalid extract_subreg instruction!");
+
+ LiveInterval &RHS = li_->getInterval(DstReg);
+ if (li_->hasInterval(RealSrcReg) &&
+ RHS.overlaps(li_->getInterval(RealSrcReg))) {
+ DOUT << "Interfere with register ";
+ DEBUG(li_->getInterval(RealSrcReg).print(DOUT, tri_));
+ return false; // Not coalescable
+ }
+ for (const unsigned* SR = tri_->getSubRegisters(RealSrcReg); *SR; ++SR)
+ if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
+ DOUT << "Interfere with sub-register ";
+ DEBUG(li_->getInterval(*SR).print(DOUT, tri_));
+ return false; // Not coalescable
+ }
+ return true;
+}
+
+/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+/// which are the src/dst of the copy instruction CopyMI. This returns true
+/// if the copy was successfully coalesced away. If it is not currently
+/// possible to coalesce this interval, but it may be possible if other
+/// things get coalesced, then it returns true by reference in 'Again'.
+bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
+ MachineInstr *CopyMI = TheCopy.MI;
+
+ Again = false;
+ if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI))
+ return false; // Already done.
+
+ DOUT << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI;
+
+ unsigned SrcReg, DstReg, SrcSubIdx = 0, DstSubIdx = 0;
+ bool isExtSubReg = CopyMI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG;
+ bool isInsSubReg = CopyMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG;
+ bool isSubRegToReg = CopyMI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG;
+ unsigned SubIdx = 0;
+ if (isExtSubReg) {
+ DstReg = CopyMI->getOperand(0).getReg();
+ DstSubIdx = CopyMI->getOperand(0).getSubReg();
+ SrcReg = CopyMI->getOperand(1).getReg();
+ SrcSubIdx = CopyMI->getOperand(2).getImm();
+ } else if (isInsSubReg || isSubRegToReg) {
+ if (CopyMI->getOperand(2).getSubReg()) {
+ DOUT << "\tSource of insert_subreg is already coalesced "
+ << "to another register.\n";
+ return false; // Not coalescable.
+ }
+ DstReg = CopyMI->getOperand(0).getReg();
+ DstSubIdx = CopyMI->getOperand(3).getImm();
+ SrcReg = CopyMI->getOperand(2).getReg();
+ } else if (!tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)){
+ assert(0 && "Unrecognized copy instruction!");
+ return false;
+ }
+
+ // If they are already joined we continue.
+ if (SrcReg == DstReg) {
+ DOUT << "\tCopy already coalesced.\n";
+ return false; // Not coalescable.
+ }
+
+ bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
+ bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+
+ // If they are both physical registers, we cannot join them.
+ if (SrcIsPhys && DstIsPhys) {
+ DOUT << "\tCan not coalesce physregs.\n";
+ return false; // Not coalescable.
+ }
+
+ // We only join virtual registers with allocatable physical registers.
+ if (SrcIsPhys && !allocatableRegs_[SrcReg]) {
+ DOUT << "\tSrc reg is unallocatable physreg.\n";
+ return false; // Not coalescable.
+ }
+ if (DstIsPhys && !allocatableRegs_[DstReg]) {
+ DOUT << "\tDst reg is unallocatable physreg.\n";
+ return false; // Not coalescable.
+ }
+
+ // Check that a physical source register is compatible with dst regclass
+ if (SrcIsPhys) {
+ unsigned SrcSubReg = SrcSubIdx ?
+ tri_->getSubReg(SrcReg, SrcSubIdx) : SrcReg;
+ const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg);
+ const TargetRegisterClass *DstSubRC = DstRC;
+ if (DstSubIdx)
+ DstSubRC = DstRC->getSubRegisterRegClass(DstSubIdx);
+ assert(DstSubRC && "Illegal subregister index");
+ if (!DstSubRC->contains(SrcSubReg)) {
+ DOUT << "\tIncompatible destination regclass: "
+ << tri_->getName(SrcSubReg) << " not in " << DstSubRC->getName()
+ << ".\n";
+ return false; // Not coalescable.
+ }
+ }
+
+ // Check that a physical dst register is compatible with source regclass
+ if (DstIsPhys) {
+ unsigned DstSubReg = DstSubIdx ?
+ tri_->getSubReg(DstReg, DstSubIdx) : DstReg;
+ const TargetRegisterClass *SrcRC = mri_->getRegClass(SrcReg);
+ const TargetRegisterClass *SrcSubRC = SrcRC;
+ if (SrcSubIdx)
+ SrcSubRC = SrcRC->getSubRegisterRegClass(SrcSubIdx);
+ assert(SrcSubRC && "Illegal subregister index");
+ if (!SrcSubRC->contains(DstReg)) {
+ DOUT << "\tIncompatible source regclass: "
+ << tri_->getName(DstSubReg) << " not in " << SrcSubRC->getName()
+ << ".\n";
+ return false; // Not coalescable.
+ }
+ }
+
+ // Should be non-null only when coalescing to a sub-register class.
+ bool CrossRC = false;
+ const TargetRegisterClass *NewRC = NULL;
+ MachineBasicBlock *CopyMBB = CopyMI->getParent();
+ unsigned RealDstReg = 0;
+ unsigned RealSrcReg = 0;
+ if (isExtSubReg || isInsSubReg || isSubRegToReg) {
+ SubIdx = CopyMI->getOperand(isExtSubReg ? 2 : 3).getImm();
+ if (SrcIsPhys && isExtSubReg) {
+ // r1024 = EXTRACT_SUBREG EAX, 0 then r1024 is really going to be
+ // coalesced with AX.
+ unsigned DstSubIdx = CopyMI->getOperand(0).getSubReg();
+ if (DstSubIdx) {
+ // r1024<2> = EXTRACT_SUBREG EAX, 2. Then r1024 has already been
+ // coalesced to a larger register so the subreg indices cancel out.
+ if (DstSubIdx != SubIdx) {
+ DOUT << "\t Sub-register indices mismatch.\n";
+ return false; // Not coalescable.
+ }
+ } else
+ SrcReg = tri_->getSubReg(SrcReg, SubIdx);
+ SubIdx = 0;
+ } else if (DstIsPhys && (isInsSubReg || isSubRegToReg)) {
+ // EAX = INSERT_SUBREG EAX, r1024, 0
+ unsigned SrcSubIdx = CopyMI->getOperand(2).getSubReg();
+ if (SrcSubIdx) {
+ // EAX = INSERT_SUBREG EAX, r1024<2>, 2 Then r1024 has already been
+ // coalesced to a larger register so the subreg indices cancel out.
+ if (SrcSubIdx != SubIdx) {
+ DOUT << "\t Sub-register indices mismatch.\n";
+ return false; // Not coalescable.
+ }
+ } else
+ DstReg = tri_->getSubReg(DstReg, SubIdx);
+ SubIdx = 0;
+ } else if ((DstIsPhys && isExtSubReg) ||
+ (SrcIsPhys && (isInsSubReg || isSubRegToReg))) {
+ if (!isSubRegToReg && CopyMI->getOperand(1).getSubReg()) {
+ DOUT << "\tSrc of extract_subreg already coalesced with reg"
+ << " of a super-class.\n";
+ return false; // Not coalescable.
+ }
+
+ if (isExtSubReg) {
+ if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealDstReg))
+ return false; // Not coalescable
+ } else {
+ if (!CanJoinInsertSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealSrcReg))
+ return false; // Not coalescable
+ }
+ SubIdx = 0;
+ } else {
+ unsigned OldSubIdx = isExtSubReg ? CopyMI->getOperand(0).getSubReg()
+ : CopyMI->getOperand(2).getSubReg();
+ if (OldSubIdx) {
+ if (OldSubIdx == SubIdx && !differingRegisterClasses(SrcReg, DstReg))
+ // r1024<2> = EXTRACT_SUBREG r1025, 2. Then r1024 has already been
+ // coalesced to a larger register so the subreg indices cancel out.
+ // Also check if the other larger register is of the same register
+ // class as the would be resulting register.
+ SubIdx = 0;
+ else {
+ DOUT << "\t Sub-register indices mismatch.\n";
+ return false; // Not coalescable.
+ }
+ }
+ if (SubIdx) {
+ unsigned LargeReg = isExtSubReg ? SrcReg : DstReg;
+ unsigned SmallReg = isExtSubReg ? DstReg : SrcReg;
+ unsigned Limit= allocatableRCRegs_[mri_->getRegClass(SmallReg)].count();
+ if (!isWinToJoinCrossClass(LargeReg, SmallReg, Limit)) {
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+ }
+ }
+ } else if (differingRegisterClasses(SrcReg, DstReg)) {
+ if (!CrossClassJoin)
+ return false;
+ CrossRC = true;
+
+ // FIXME: What if the result of a EXTRACT_SUBREG is then coalesced
+ // with another? If it's the resulting destination register, then
+ // the subidx must be propagated to uses (but only those defined
+ // by the EXTRACT_SUBREG). If it's being coalesced into another
+ // register, it should be safe because register is assumed to have
+ // the register class of the super-register.
+
+ // Process moves where one of the registers have a sub-register index.
+ MachineOperand *DstMO = CopyMI->findRegisterDefOperand(DstReg);
+ MachineOperand *SrcMO = CopyMI->findRegisterUseOperand(SrcReg);
+ SubIdx = DstMO->getSubReg();
+ if (SubIdx) {
+ if (SrcMO->getSubReg())
+ // FIXME: can we handle this?
+ return false;
+ // This is not an insert_subreg but it looks like one.
+ // e.g. %reg1024:4 = MOV32rr %EAX
+ isInsSubReg = true;
+ if (SrcIsPhys) {
+ if (!CanJoinInsertSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealSrcReg))
+ return false; // Not coalescable
+ SubIdx = 0;
+ }
+ } else {
+ SubIdx = SrcMO->getSubReg();
+ if (SubIdx) {
+ // This is not a extract_subreg but it looks like one.
+ // e.g. %cl = MOV16rr %reg1024:1
+ isExtSubReg = true;
+ if (DstIsPhys) {
+ if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx,RealDstReg))
+ return false; // Not coalescable
+ SubIdx = 0;
+ }
+ }
+ }
+
+ const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg);
+ const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg);
+ unsigned LargeReg = SrcReg;
+ unsigned SmallReg = DstReg;
+ unsigned Limit = 0;
+
+ // Now determine the register class of the joined register.
+ if (isExtSubReg) {
+ if (SubIdx && DstRC && DstRC->isASubClass()) {
+ // This is a move to a sub-register class. However, the source is a
+ // sub-register of a larger register class. We don't know what should
+ // the register class be. FIXME.
+ Again = true;
+ return false;
+ }
+ Limit = allocatableRCRegs_[DstRC].count();
+ } else if (!SrcIsPhys && !DstIsPhys) {
+ NewRC = getCommonSubClass(SrcRC, DstRC);
+ if (!NewRC) {
+ DOUT << "\tDisjoint regclasses: "
+ << SrcRC->getName() << ", "
+ << DstRC->getName() << ".\n";
+ return false; // Not coalescable.
+ }
+ if (DstRC->getSize() > SrcRC->getSize())
+ std::swap(LargeReg, SmallReg);
+ }
+
+ // If we are joining two virtual registers and the resulting register
+ // class is more restrictive (fewer register, smaller size). Check if it's
+ // worth doing the merge.
+ if (!SrcIsPhys && !DstIsPhys &&
+ (isExtSubReg || DstRC->isASubClass()) &&
+ !isWinToJoinCrossClass(LargeReg, SmallReg,
+ allocatableRCRegs_[NewRC].count())) {
+ DOUT << "\tSrc/Dest are different register classes.\n";
+ // Allow the coalescer to try again in case either side gets coalesced to
+ // a physical register that's compatible with the other side. e.g.
+ // r1024 = MOV32to32_ r1025
+ // But later r1024 is assigned EAX then r1025 may be coalesced with EAX.
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+ }
+
+ // Will it create illegal extract_subreg / insert_subreg?
+ if (SrcIsPhys && HasIncompatibleSubRegDefUse(CopyMI, DstReg, SrcReg))
+ return false;
+ if (DstIsPhys && HasIncompatibleSubRegDefUse(CopyMI, SrcReg, DstReg))
+ return false;
+
+ LiveInterval &SrcInt = li_->getInterval(SrcReg);
+ LiveInterval &DstInt = li_->getInterval(DstReg);
+ assert(SrcInt.reg == SrcReg && DstInt.reg == DstReg &&
+ "Register mapping is horribly broken!");
+
+ DOUT << "\t\tInspecting "; SrcInt.print(DOUT, tri_);
+ DOUT << " and "; DstInt.print(DOUT, tri_);
+ DOUT << ": ";
+
+ // Save a copy of the virtual register live interval. We'll manually
+ // merge this into the "real" physical register live interval this is
+ // coalesced with.
+ LiveInterval *SavedLI = 0;
+ if (RealDstReg)
+ SavedLI = li_->dupInterval(&SrcInt);
+ else if (RealSrcReg)
+ SavedLI = li_->dupInterval(&DstInt);
+
+ // Check if it is necessary to propagate "isDead" property.
+ if (!isExtSubReg && !isInsSubReg && !isSubRegToReg) {
+ MachineOperand *mopd = CopyMI->findRegisterDefOperand(DstReg, false);
+ bool isDead = mopd->isDead();
+
+ // We need to be careful about coalescing a source physical register with a
+ // virtual register. Once the coalescing is done, it cannot be broken and
+ // these are not spillable! If the destination interval uses are far away,
+ // think twice about coalescing them!
+ if (!isDead && (SrcIsPhys || DstIsPhys)) {
+ // If the copy is in a loop, take care not to coalesce aggressively if the
+ // src is coming in from outside the loop (or the dst is out of the loop).
+ // If it's not in a loop, then determine whether to join them base purely
+ // by the length of the interval.
+ if (PhysJoinTweak) {
+ if (SrcIsPhys) {
+ if (!isWinToJoinVRWithSrcPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
+ DstInt.preference = SrcReg;
+ ++numAborts;
+ DOUT << "\tMay tie down a physical register, abort!\n";
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+ } else {
+ if (!isWinToJoinVRWithDstPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
+ SrcInt.preference = DstReg;
+ ++numAborts;
+ DOUT << "\tMay tie down a physical register, abort!\n";
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+ }
+ } else {
+ // If the virtual register live interval is long but it has low use desity,
+ // do not join them, instead mark the physical register as its allocation
+ // preference.
+ LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt;
+ unsigned JoinVReg = SrcIsPhys ? DstReg : SrcReg;
+ unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg;
+ const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg);
+ unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
+ if (TheCopy.isBackEdge)
+ Threshold *= 2; // Favors back edge copies.
+
+ unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
+ float Ratio = 1.0 / Threshold;
+ if (Length > Threshold &&
+ (((float)std::distance(mri_->use_begin(JoinVReg),
+ mri_->use_end()) / Length) < Ratio)) {
+ JoinVInt.preference = JoinPReg;
+ ++numAborts;
+ DOUT << "\tMay tie down a physical register, abort!\n";
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+ }
+ }
+ }
+
+ // Okay, attempt to join these two intervals. On failure, this returns false.
+ // Otherwise, if one of the intervals being joined is a physreg, this method
+ // always canonicalizes DstInt to be it. The output "SrcInt" will not have
+ // been modified, so we can use this information below to update aliases.
+ bool Swapped = false;
+ // If SrcInt is implicitly defined, it's safe to coalesce.
+ bool isEmpty = SrcInt.empty();
+ if (isEmpty && !CanCoalesceWithImpDef(CopyMI, DstInt, SrcInt)) {
+ // Only coalesce an empty interval (defined by implicit_def) with
+ // another interval which has a valno defined by the CopyMI and the CopyMI
+ // is a kill of the implicit def.
+ DOUT << "Not profitable!\n";
+ return false;
+ }
+
+ if (!isEmpty && !JoinIntervals(DstInt, SrcInt, Swapped)) {
+ // Coalescing failed.
+
+ // If definition of source is defined by trivial computation, try
+ // rematerializing it.
+ if (!isExtSubReg && !isInsSubReg && !isSubRegToReg &&
+ ReMaterializeTrivialDef(SrcInt, DstInt.reg, CopyMI))
+ return true;
+
+ // If we can eliminate the copy without merging the live ranges, do so now.
+ if (!isExtSubReg && !isInsSubReg && !isSubRegToReg &&
+ (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) ||
+ RemoveCopyByCommutingDef(SrcInt, DstInt, CopyMI))) {
+ JoinedCopies.insert(CopyMI);
+ return true;
+ }
+
+ // Otherwise, we are unable to join the intervals.
+ DOUT << "Interference!\n";
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+
+ LiveInterval *ResSrcInt = &SrcInt;
+ LiveInterval *ResDstInt = &DstInt;
+ if (Swapped) {
+ std::swap(SrcReg, DstReg);
+ std::swap(ResSrcInt, ResDstInt);
+ }
+ assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ "LiveInterval::join didn't work right!");
+
+ // If we're about to merge live ranges into a physical register live interval,
+ // we have to update any aliased register's live ranges to indicate that they
+ // have clobbered values for this range.
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
+ // If this is a extract_subreg where dst is a physical register, e.g.
+ // cl = EXTRACT_SUBREG reg1024, 1
+ // then create and update the actual physical register allocated to RHS.
+ if (RealDstReg || RealSrcReg) {
+ LiveInterval &RealInt =
+ li_->getOrCreateInterval(RealDstReg ? RealDstReg : RealSrcReg);
+ for (LiveInterval::const_vni_iterator I = SavedLI->vni_begin(),
+ E = SavedLI->vni_end(); I != E; ++I) {
+ const VNInfo *ValNo = *I;
+ VNInfo *NewValNo = RealInt.getNextValue(ValNo->def, ValNo->copy,
+ li_->getVNInfoAllocator());
+ NewValNo->hasPHIKill = ValNo->hasPHIKill;
+ NewValNo->redefByEC = ValNo->redefByEC;
+ RealInt.addKills(NewValNo, ValNo->kills);
+ RealInt.MergeValueInAsValue(*SavedLI, ValNo, NewValNo);
+ }
+ RealInt.weight += SavedLI->weight;
+ DstReg = RealDstReg ? RealDstReg : RealSrcReg;
+ }
+
+ // Update the liveintervals of sub-registers.
+ for (const unsigned *AS = tri_->getSubRegisters(DstReg); *AS; ++AS)
+ li_->getOrCreateInterval(*AS).MergeInClobberRanges(*ResSrcInt,
+ li_->getVNInfoAllocator());
+ }
+
+ // If this is a EXTRACT_SUBREG, make sure the result of coalescing is the
+ // larger super-register.
+ if ((isExtSubReg || isInsSubReg || isSubRegToReg) &&
+ !SrcIsPhys && !DstIsPhys) {
+ if ((isExtSubReg && !Swapped) ||
+ ((isInsSubReg || isSubRegToReg) && Swapped)) {
+ ResSrcInt->Copy(*ResDstInt, li_->getVNInfoAllocator());
+ std::swap(SrcReg, DstReg);
+ std::swap(ResSrcInt, ResDstInt);
+ }
+ }
+
+ // Coalescing to a virtual register that is of a sub-register class of the
+ // other. Make sure the resulting register is set to the right register class.
+ if (CrossRC) {
+ ++numCrossRCs;
+ if (NewRC)
+ mri_->setRegClass(DstReg, NewRC);
+ }
+
+ if (NewHeuristic) {
+ // Add all copies that define val# in the source interval into the queue.
+ for (LiveInterval::const_vni_iterator i = ResSrcInt->vni_begin(),
+ e = ResSrcInt->vni_end(); i != e; ++i) {
+ const VNInfo *vni = *i;
+ if (!vni->def || vni->def == ~1U || vni->def == ~0U)
+ continue;
+ MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
+ unsigned NewSrcReg, NewDstReg, NewSrcSubIdx, NewDstSubIdx;
+ if (CopyMI &&
+ JoinedCopies.count(CopyMI) == 0 &&
+ tii_->isMoveInstr(*CopyMI, NewSrcReg, NewDstReg,
+ NewSrcSubIdx, NewDstSubIdx)) {
+ unsigned LoopDepth = loopInfo->getLoopDepth(CopyMBB);
+ JoinQueue->push(CopyRec(CopyMI, LoopDepth,
+ isBackEdgeCopy(CopyMI, DstReg)));
+ }
+ }
+ }
+
+ // Remember to delete the copy instruction.
+ JoinedCopies.insert(CopyMI);
+
+ // Some live range has been lengthened due to colaescing, eliminate the
+ // unnecessary kills.
+ RemoveUnnecessaryKills(SrcReg, *ResDstInt);
+ if (TargetRegisterInfo::isVirtualRegister(DstReg))
+ RemoveUnnecessaryKills(DstReg, *ResDstInt);
+
+ if (isInsSubReg)
+ // Avoid:
+ // r1024 = op
+ // r1024 = implicit_def
+ // ...
+ // = r1024
+ RemoveDeadImpDef(DstReg, *ResDstInt);
+ UpdateRegDefsUses(SrcReg, DstReg, SubIdx);
+
+ // SrcReg is guarateed to be the register whose live interval that is
+ // being merged.
+ li_->removeInterval(SrcReg);
+
+ // Manually deleted the live interval copy.
+ if (SavedLI) {
+ SavedLI->clear();
+ delete SavedLI;
+ }
+
+ if (isEmpty) {
+ // Now the copy is being coalesced away, the val# previously defined
+ // by the copy is being defined by an IMPLICIT_DEF which defines a zero
+ // length interval. Remove the val#.
+ unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+ const LiveRange *LR = ResDstInt->getLiveRangeContaining(CopyIdx);
+ VNInfo *ImpVal = LR->valno;
+ assert(ImpVal->def == CopyIdx);
+ unsigned NextDef = LR->end;
+ RemoveCopiesFromValNo(*ResDstInt, ImpVal);
+ ResDstInt->removeValNo(ImpVal);
+ LR = ResDstInt->FindLiveRangeContaining(NextDef);
+ if (LR != ResDstInt->end() && LR->valno->def == NextDef) {
+ // Special case: vr1024 = implicit_def
+ // vr1024 = insert_subreg vr1024, vr1025, c
+ // The insert_subreg becomes a "copy" that defines a val# which can itself
+ // be coalesced away.
+ MachineInstr *DefMI = li_->getInstructionFromIndex(NextDef);
+ if (DefMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG)
+ LR->valno->copy = DefMI;
+ }
+ }
+
+ // If resulting interval has a preference that no longer fits because of subreg
+ // coalescing, just clear the preference.
+ if (ResDstInt->preference && (isExtSubReg || isInsSubReg || isSubRegToReg) &&
+ TargetRegisterInfo::isVirtualRegister(ResDstInt->reg)) {
+ const TargetRegisterClass *RC = mri_->getRegClass(ResDstInt->reg);
+ if (!RC->contains(ResDstInt->preference))
+ ResDstInt->preference = 0;
+ }
+
+ DOUT << "\n\t\tJoined. Result = "; ResDstInt->print(DOUT, tri_);
+ DOUT << "\n";
+
+ ++numJoins;
+ return true;
+}
+
+/// ComputeUltimateVN - Assuming we are going to join two live intervals,
+/// compute what the resultant value numbers for each value in the input two
+/// ranges will be. This is complicated by copies between the two which can
+/// and will commonly cause multiple value numbers to be merged into one.
+///
+/// VN is the value number that we're trying to resolve. InstDefiningValue
+/// keeps track of the new InstDefiningValue assignment for the result
+/// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of
+/// whether a value in this or other is a copy from the opposite set.
+/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have
+/// already been assigned.
+///
+/// ThisFromOther[x] - If x is defined as a copy from the other interval, this
+/// contains the value number the copy is from.
+///
+static unsigned ComputeUltimateVN(VNInfo *VNI,
+ SmallVector<VNInfo*, 16> &NewVNInfo,
+ DenseMap<VNInfo*, VNInfo*> &ThisFromOther,
+ DenseMap<VNInfo*, VNInfo*> &OtherFromThis,
+ SmallVector<int, 16> &ThisValNoAssignments,
+ SmallVector<int, 16> &OtherValNoAssignments) {
+ unsigned VN = VNI->id;
+
+ // If the VN has already been computed, just return it.
+ if (ThisValNoAssignments[VN] >= 0)
+ return ThisValNoAssignments[VN];
+// assert(ThisValNoAssignments[VN] != -2 && "Cyclic case?");
+
+ // If this val is not a copy from the other val, then it must be a new value
+ // number in the destination.
+ DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI);
+ if (I == ThisFromOther.end()) {
+ NewVNInfo.push_back(VNI);
+ return ThisValNoAssignments[VN] = NewVNInfo.size()-1;
+ }
+ VNInfo *OtherValNo = I->second;
+
+ // Otherwise, this *is* a copy from the RHS. If the other side has already
+ // been computed, return it.
+ if (OtherValNoAssignments[OtherValNo->id] >= 0)
+ return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id];
+
+ // Mark this value number as currently being computed, then ask what the
+ // ultimate value # of the other value is.
+ ThisValNoAssignments[VN] = -2;
+ unsigned UltimateVN =
+ ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther,
+ OtherValNoAssignments, ThisValNoAssignments);
+ return ThisValNoAssignments[VN] = UltimateVN;
+}
+
+static bool InVector(VNInfo *Val, const SmallVector<VNInfo*, 8> &V) {
+ return std::find(V.begin(), V.end(), Val) != V.end();
+}
+
+/// RangeIsDefinedByCopyFromReg - Return true if the specified live range of
+/// the specified live interval is defined by a copy from the specified
+/// register.
+bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li,
+ LiveRange *LR,
+ unsigned Reg) {
+ unsigned SrcReg = li_->getVNInfoSourceReg(LR->valno);
+ if (SrcReg == Reg)
+ return true;
+ if (LR->valno->def == ~0U &&
+ TargetRegisterInfo::isPhysicalRegister(li.reg) &&
+ *tri_->getSuperRegisters(li.reg)) {
+ // It's a sub-register live interval, we may not have precise information.
+ // Re-compute it.
+ MachineInstr *DefMI = li_->getInstructionFromIndex(LR->start);
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (DefMI &&
+ tii_->isMoveInstr(*DefMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
+ DstReg == li.reg && SrcReg == Reg) {
+ // Cache computed info.
+ LR->valno->def = LR->start;
+ LR->valno->copy = DefMI;
+ return true;
+ }
+ }
+ return false;
+}
+
+/// SimpleJoin - Attempt to joint the specified interval into this one. The
+/// caller of this method must guarantee that the RHS only contains a single
+/// value number and that the RHS is not defined by a copy from this
+/// interval. This returns false if the intervals are not joinable, or it
+/// joins them and returns true.
+bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
+ assert(RHS.containsOneValue());
+
+ // Some number (potentially more than one) value numbers in the current
+ // interval may be defined as copies from the RHS. Scan the overlapping
+ // portions of the LHS and RHS, keeping track of this and looking for
+ // overlapping live ranges that are NOT defined as copies. If these exist, we
+ // cannot coalesce.
+
+ LiveInterval::iterator LHSIt = LHS.begin(), LHSEnd = LHS.end();
+ LiveInterval::iterator RHSIt = RHS.begin(), RHSEnd = RHS.end();
+
+ if (LHSIt->start < RHSIt->start) {
+ LHSIt = std::upper_bound(LHSIt, LHSEnd, RHSIt->start);
+ if (LHSIt != LHS.begin()) --LHSIt;
+ } else if (RHSIt->start < LHSIt->start) {
+ RHSIt = std::upper_bound(RHSIt, RHSEnd, LHSIt->start);
+ if (RHSIt != RHS.begin()) --RHSIt;
+ }
+
+ SmallVector<VNInfo*, 8> EliminatedLHSVals;
+
+ while (1) {
+ // Determine if these live intervals overlap.
+ bool Overlaps = false;
+ if (LHSIt->start <= RHSIt->start)
+ Overlaps = LHSIt->end > RHSIt->start;
+ else
+ Overlaps = RHSIt->end > LHSIt->start;
+
+ // If the live intervals overlap, there are two interesting cases: if the
+ // LHS interval is defined by a copy from the RHS, it's ok and we record
+ // that the LHS value # is the same as the RHS. If it's not, then we cannot
+ // coalesce these live ranges and we bail out.
+ if (Overlaps) {
+ // If we haven't already recorded that this value # is safe, check it.
+ if (!InVector(LHSIt->valno, EliminatedLHSVals)) {
+ // Copy from the RHS?
+ if (!RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg))
+ return false; // Nope, bail out.
+
+ if (LHSIt->contains(RHSIt->valno->def))
+ // Here is an interesting situation:
+ // BB1:
+ // vr1025 = copy vr1024
+ // ..
+ // BB2:
+ // vr1024 = op
+ // = vr1025
+ // Even though vr1025 is copied from vr1024, it's not safe to
+ // coalesce them since the live range of vr1025 intersects the
+ // def of vr1024. This happens because vr1025 is assigned the
+ // value of the previous iteration of vr1024.
+ return false;
+ EliminatedLHSVals.push_back(LHSIt->valno);
+ }
+
+ // We know this entire LHS live range is okay, so skip it now.
+ if (++LHSIt == LHSEnd) break;
+ continue;
+ }
+
+ if (LHSIt->end < RHSIt->end) {
+ if (++LHSIt == LHSEnd) break;
+ } else {
+ // One interesting case to check here. It's possible that we have
+ // something like "X3 = Y" which defines a new value number in the LHS,
+ // and is the last use of this liverange of the RHS. In this case, we
+ // want to notice this copy (so that it gets coalesced away) even though
+ // the live ranges don't actually overlap.
+ if (LHSIt->start == RHSIt->end) {
+ if (InVector(LHSIt->valno, EliminatedLHSVals)) {
+ // We already know that this value number is going to be merged in
+ // if coalescing succeeds. Just skip the liverange.
+ if (++LHSIt == LHSEnd) break;
+ } else {
+ // Otherwise, if this is a copy from the RHS, mark it as being merged
+ // in.
+ if (RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) {
+ if (LHSIt->contains(RHSIt->valno->def))
+ // Here is an interesting situation:
+ // BB1:
+ // vr1025 = copy vr1024
+ // ..
+ // BB2:
+ // vr1024 = op
+ // = vr1025
+ // Even though vr1025 is copied from vr1024, it's not safe to
+ // coalesced them since live range of vr1025 intersects the
+ // def of vr1024. This happens because vr1025 is assigned the
+ // value of the previous iteration of vr1024.
+ return false;
+ EliminatedLHSVals.push_back(LHSIt->valno);
+
+ // We know this entire LHS live range is okay, so skip it now.
+ if (++LHSIt == LHSEnd) break;
+ }
+ }
+ }
+
+ if (++RHSIt == RHSEnd) break;
+ }
+ }
+
+ // If we got here, we know that the coalescing will be successful and that
+ // the value numbers in EliminatedLHSVals will all be merged together. Since
+ // the most common case is that EliminatedLHSVals has a single number, we
+ // optimize for it: if there is more than one value, we merge them all into
+ // the lowest numbered one, then handle the interval as if we were merging
+ // with one value number.
+ VNInfo *LHSValNo = NULL;
+ if (EliminatedLHSVals.size() > 1) {
+ // Loop through all the equal value numbers merging them into the smallest
+ // one.
+ VNInfo *Smallest = EliminatedLHSVals[0];
+ for (unsigned i = 1, e = EliminatedLHSVals.size(); i != e; ++i) {
+ if (EliminatedLHSVals[i]->id < Smallest->id) {
+ // Merge the current notion of the smallest into the smaller one.
+ LHS.MergeValueNumberInto(Smallest, EliminatedLHSVals[i]);
+ Smallest = EliminatedLHSVals[i];
+ } else {
+ // Merge into the smallest.
+ LHS.MergeValueNumberInto(EliminatedLHSVals[i], Smallest);
+ }
+ }
+ LHSValNo = Smallest;
+ } else if (EliminatedLHSVals.empty()) {
+ if (TargetRegisterInfo::isPhysicalRegister(LHS.reg) &&
+ *tri_->getSuperRegisters(LHS.reg))
+ // Imprecise sub-register information. Can't handle it.
+ return false;
+ assert(0 && "No copies from the RHS?");
+ } else {
+ LHSValNo = EliminatedLHSVals[0];
+ }
+
+ // Okay, now that there is a single LHS value number that we're merging the
+ // RHS into, update the value number info for the LHS to indicate that the
+ // value number is defined where the RHS value number was.
+ const VNInfo *VNI = RHS.getValNumInfo(0);
+ LHSValNo->def = VNI->def;
+ LHSValNo->copy = VNI->copy;
+
+ // Okay, the final step is to loop over the RHS live intervals, adding them to
+ // the LHS.
+ LHSValNo->hasPHIKill |= VNI->hasPHIKill;
+ LHS.addKills(LHSValNo, VNI->kills);
+ LHS.MergeRangesInAsValue(RHS, LHSValNo);
+ LHS.weight += RHS.weight;
+ if (RHS.preference && !LHS.preference)
+ LHS.preference = RHS.preference;
+
+ // Update the liveintervals of sub-registers.
+ if (TargetRegisterInfo::isPhysicalRegister(LHS.reg))
+ for (const unsigned *AS = tri_->getSubRegisters(LHS.reg); *AS; ++AS)
+ li_->getOrCreateInterval(*AS).MergeInClobberRanges(LHS,
+ li_->getVNInfoAllocator());
+
+ return true;
+}
+
+/// JoinIntervals - Attempt to join these two intervals. On failure, this
+/// returns false. Otherwise, if one of the intervals being joined is a
+/// physreg, this method always canonicalizes LHS to be it. The output
+/// "RHS" will not have been modified, so we can use this information
+/// below to update aliases.
+bool
+SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
+ bool &Swapped) {
+ // Compute the final value assignment, assuming that the live ranges can be
+ // coalesced.
+ SmallVector<int, 16> LHSValNoAssignments;
+ SmallVector<int, 16> RHSValNoAssignments;
+ DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS;
+ DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS;
+ SmallVector<VNInfo*, 16> NewVNInfo;
+
+ // If a live interval is a physical register, conservatively check if any
+ // of its sub-registers is overlapping the live interval of the virtual
+ // register. If so, do not coalesce.
+ if (TargetRegisterInfo::isPhysicalRegister(LHS.reg) &&
+ *tri_->getSubRegisters(LHS.reg)) {
+ // If it's coalescing a virtual register to a physical register, estimate
+ // its live interval length. This is the *cost* of scanning an entire live
+ // interval. If the cost is low, we'll do an exhaustive check instead.
+
+ // If this is something like this:
+ // BB1:
+ // v1024 = op
+ // ...
+ // BB2:
+ // ...
+ // RAX = v1024
+ //
+ // That is, the live interval of v1024 crosses a bb. Then we can't rely on
+ // less conservative check. It's possible a sub-register is defined before
+ // v1024 (or live in) and live out of BB1.
+ if (RHS.containsOneValue() &&
+ li_->intervalIsInOneMBB(RHS) &&
+ li_->getApproximateInstructionCount(RHS) <= 10) {
+ // Perform a more exhaustive check for some common cases.
+ if (li_->conflictsWithPhysRegRef(RHS, LHS.reg, true, JoinedCopies))
+ return false;
+ } else {
+ for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR)
+ if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
+ DOUT << "Interfere with sub-register ";
+ DEBUG(li_->getInterval(*SR).print(DOUT, tri_));
+ return false;
+ }
+ }
+ } else if (TargetRegisterInfo::isPhysicalRegister(RHS.reg) &&
+ *tri_->getSubRegisters(RHS.reg)) {
+ if (LHS.containsOneValue() &&
+ li_->getApproximateInstructionCount(LHS) <= 10) {
+ // Perform a more exhaustive check for some common cases.
+ if (li_->conflictsWithPhysRegRef(LHS, RHS.reg, false, JoinedCopies))
+ return false;
+ } else {
+ for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR)
+ if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) {
+ DOUT << "Interfere with sub-register ";
+ DEBUG(li_->getInterval(*SR).print(DOUT, tri_));
+ return false;
+ }
+ }
+ }
+
+ // Compute ultimate value numbers for the LHS and RHS values.
+ if (RHS.containsOneValue()) {
+ // Copies from a liveinterval with a single value are simple to handle and
+ // very common, handle the special case here. This is important, because
+ // often RHS is small and LHS is large (e.g. a physreg).
+
+ // Find out if the RHS is defined as a copy from some value in the LHS.
+ int RHSVal0DefinedFromLHS = -1;
+ int RHSValID = -1;
+ VNInfo *RHSValNoInfo = NULL;
+ VNInfo *RHSValNoInfo0 = RHS.getValNumInfo(0);
+ unsigned RHSSrcReg = li_->getVNInfoSourceReg(RHSValNoInfo0);
+ if (RHSSrcReg == 0 || RHSSrcReg != LHS.reg) {
+ // If RHS is not defined as a copy from the LHS, we can use simpler and
+ // faster checks to see if the live ranges are coalescable. This joiner
+ // can't swap the LHS/RHS intervals though.
+ if (!TargetRegisterInfo::isPhysicalRegister(RHS.reg)) {
+ return SimpleJoin(LHS, RHS);
+ } else {
+ RHSValNoInfo = RHSValNoInfo0;
+ }
+ } else {
+ // It was defined as a copy from the LHS, find out what value # it is.
+ RHSValNoInfo = LHS.getLiveRangeContaining(RHSValNoInfo0->def-1)->valno;
+ RHSValID = RHSValNoInfo->id;
+ RHSVal0DefinedFromLHS = RHSValID;
+ }
+
+ LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
+ RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
+ NewVNInfo.resize(LHS.getNumValNums(), NULL);
+
+ // Okay, *all* of the values in LHS that are defined as a copy from RHS
+ // should now get updated.
+ for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ unsigned VN = VNI->id;
+ if (unsigned LHSSrcReg = li_->getVNInfoSourceReg(VNI)) {
+ if (LHSSrcReg != RHS.reg) {
+ // If this is not a copy from the RHS, its value number will be
+ // unmodified by the coalescing.
+ NewVNInfo[VN] = VNI;
+ LHSValNoAssignments[VN] = VN;
+ } else if (RHSValID == -1) {
+ // Otherwise, it is a copy from the RHS, and we don't already have a
+ // value# for it. Keep the current value number, but remember it.
+ LHSValNoAssignments[VN] = RHSValID = VN;
+ NewVNInfo[VN] = RHSValNoInfo;
+ LHSValsDefinedFromRHS[VNI] = RHSValNoInfo0;
+ } else {
+ // Otherwise, use the specified value #.
+ LHSValNoAssignments[VN] = RHSValID;
+ if (VN == (unsigned)RHSValID) { // Else this val# is dead.
+ NewVNInfo[VN] = RHSValNoInfo;
+ LHSValsDefinedFromRHS[VNI] = RHSValNoInfo0;
+ }
+ }
+ } else {
+ NewVNInfo[VN] = VNI;
+ LHSValNoAssignments[VN] = VN;
+ }
+ }
+
+ assert(RHSValID != -1 && "Didn't find value #?");
+ RHSValNoAssignments[0] = RHSValID;
+ if (RHSVal0DefinedFromLHS != -1) {
+ // This path doesn't go through ComputeUltimateVN so just set
+ // it to anything.
+ RHSValsDefinedFromLHS[RHSValNoInfo0] = (VNInfo*)1;
+ }
+ } else {
+ // Loop over the value numbers of the LHS, seeing if any are defined from
+ // the RHS.
+ for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ if (VNI->def == ~1U || VNI->copy == 0) // Src not defined by a copy?
+ continue;
+
+ // DstReg is known to be a register in the LHS interval. If the src is
+ // from the RHS interval, we can use its value #.
+ if (li_->getVNInfoSourceReg(VNI) != RHS.reg)
+ continue;
+
+ // Figure out the value # from the RHS.
+ LHSValsDefinedFromRHS[VNI]=RHS.getLiveRangeContaining(VNI->def-1)->valno;
+ }
+
+ // Loop over the value numbers of the RHS, seeing if any are defined from
+ // the LHS.
+ for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ if (VNI->def == ~1U || VNI->copy == 0) // Src not defined by a copy?
+ continue;
+
+ // DstReg is known to be a register in the RHS interval. If the src is
+ // from the LHS interval, we can use its value #.
+ if (li_->getVNInfoSourceReg(VNI) != LHS.reg)
+ continue;
+
+ // Figure out the value # from the LHS.
+ RHSValsDefinedFromLHS[VNI]=LHS.getLiveRangeContaining(VNI->def-1)->valno;
+ }
+
+ LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
+ RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
+ NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums());
+
+ for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ unsigned VN = VNI->id;
+ if (LHSValNoAssignments[VN] >= 0 || VNI->def == ~1U)
+ continue;
+ ComputeUltimateVN(VNI, NewVNInfo,
+ LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
+ LHSValNoAssignments, RHSValNoAssignments);
+ }
+ for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ unsigned VN = VNI->id;
+ if (RHSValNoAssignments[VN] >= 0 || VNI->def == ~1U)
+ continue;
+ // If this value number isn't a copy from the LHS, it's a new number.
+ if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) {
+ NewVNInfo.push_back(VNI);
+ RHSValNoAssignments[VN] = NewVNInfo.size()-1;
+ continue;
+ }
+
+ ComputeUltimateVN(VNI, NewVNInfo,
+ RHSValsDefinedFromLHS, LHSValsDefinedFromRHS,
+ RHSValNoAssignments, LHSValNoAssignments);
+ }
+ }
+
+ // Armed with the mappings of LHS/RHS values to ultimate values, walk the
+ // interval lists to see if these intervals are coalescable.
+ LiveInterval::const_iterator I = LHS.begin();
+ LiveInterval::const_iterator IE = LHS.end();
+ LiveInterval::const_iterator J = RHS.begin();
+ LiveInterval::const_iterator JE = RHS.end();
+
+ // Skip ahead until the first place of potential sharing.
+ if (I->start < J->start) {
+ I = std::upper_bound(I, IE, J->start);
+ if (I != LHS.begin()) --I;
+ } else if (J->start < I->start) {
+ J = std::upper_bound(J, JE, I->start);
+ if (J != RHS.begin()) --J;
+ }
+
+ while (1) {
+ // Determine if these two live ranges overlap.
+ bool Overlaps;
+ if (I->start < J->start) {
+ Overlaps = I->end > J->start;
+ } else {
+ Overlaps = J->end > I->start;
+ }
+
+ // If so, check value # info to determine if they are really different.
+ if (Overlaps) {
+ // If the live range overlap will map to the same value number in the
+ // result liverange, we can still coalesce them. If not, we can't.
+ if (LHSValNoAssignments[I->valno->id] !=
+ RHSValNoAssignments[J->valno->id])
+ return false;
+ }
+
+ if (I->end < J->end) {
+ ++I;
+ if (I == IE) break;
+ } else {
+ ++J;
+ if (J == JE) break;
+ }
+ }
+
+ // Update kill info. Some live ranges are extended due to copy coalescing.
+ for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(),
+ E = LHSValsDefinedFromRHS.end(); I != E; ++I) {
+ VNInfo *VNI = I->first;
+ unsigned LHSValID = LHSValNoAssignments[VNI->id];
+ LiveInterval::removeKill(NewVNInfo[LHSValID], VNI->def);
+ NewVNInfo[LHSValID]->hasPHIKill |= VNI->hasPHIKill;
+ RHS.addKills(NewVNInfo[LHSValID], VNI->kills);
+ }
+
+ // Update kill info. Some live ranges are extended due to copy coalescing.
+ for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(),
+ E = RHSValsDefinedFromLHS.end(); I != E; ++I) {
+ VNInfo *VNI = I->first;
+ unsigned RHSValID = RHSValNoAssignments[VNI->id];
+ LiveInterval::removeKill(NewVNInfo[RHSValID], VNI->def);
+ NewVNInfo[RHSValID]->hasPHIKill |= VNI->hasPHIKill;
+ LHS.addKills(NewVNInfo[RHSValID], VNI->kills);
+ }
+
+ // If we get here, we know that we can coalesce the live ranges. Ask the
+ // intervals to coalesce themselves now.
+ if ((RHS.ranges.size() > LHS.ranges.size() &&
+ TargetRegisterInfo::isVirtualRegister(LHS.reg)) ||
+ TargetRegisterInfo::isPhysicalRegister(RHS.reg)) {
+ RHS.join(LHS, &RHSValNoAssignments[0], &LHSValNoAssignments[0], NewVNInfo);
+ Swapped = true;
+ } else {
+ LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo);
+ Swapped = false;
+ }
+ return true;
+}
+
+namespace {
+ // DepthMBBCompare - Comparison predicate that sort first based on the loop
+ // depth of the basic block (the unsigned), and then on the MBB number.
+ struct DepthMBBCompare {
+ typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair;
+ bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const {
+ if (LHS.first > RHS.first) return true; // Deeper loops first
+ return LHS.first == RHS.first &&
+ LHS.second->getNumber() < RHS.second->getNumber();
+ }
+ };
+}
+
+/// getRepIntervalSize - Returns the size of the interval that represents the
+/// specified register.
+template<class SF>
+unsigned JoinPriorityQueue<SF>::getRepIntervalSize(unsigned Reg) {
+ return Rc->getRepIntervalSize(Reg);
+}
+
+/// CopyRecSort::operator - Join priority queue sorting function.
+///
+bool CopyRecSort::operator()(CopyRec left, CopyRec right) const {
+ // Inner loops first.
+ if (left.LoopDepth > right.LoopDepth)
+ return false;
+ else if (left.LoopDepth == right.LoopDepth)
+ if (left.isBackEdge && !right.isBackEdge)
+ return false;
+ return true;
+}
+
+void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
+ std::vector<CopyRec> &TryAgain) {
+ DOUT << ((Value*)MBB->getBasicBlock())->getName() << ":\n";
+
+ std::vector<CopyRec> VirtCopies;
+ std::vector<CopyRec> PhysCopies;
+ std::vector<CopyRec> ImpDefCopies;
+ unsigned LoopDepth = loopInfo->getLoopDepth(MBB);
+ for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+ MII != E;) {
+ MachineInstr *Inst = MII++;
+
+ // If this isn't a copy nor a extract_subreg, we can't join intervals.
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (Inst->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
+ DstReg = Inst->getOperand(0).getReg();
+ SrcReg = Inst->getOperand(1).getReg();
+ } else if (Inst->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+ Inst->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) {
+ DstReg = Inst->getOperand(0).getReg();
+ SrcReg = Inst->getOperand(2).getReg();
+ } else if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
+ continue;
+
+ bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
+ bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ if (NewHeuristic) {
+ JoinQueue->push(CopyRec(Inst, LoopDepth, isBackEdgeCopy(Inst, DstReg)));
+ } else {
+ if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty())
+ ImpDefCopies.push_back(CopyRec(Inst, 0, false));
+ else if (SrcIsPhys || DstIsPhys)
+ PhysCopies.push_back(CopyRec(Inst, 0, false));
+ else
+ VirtCopies.push_back(CopyRec(Inst, 0, false));
+ }
+ }
+
+ if (NewHeuristic)
+ return;
+
+ // Try coalescing implicit copies first, followed by copies to / from
+ // physical registers, then finally copies from virtual registers to
+ // virtual registers.
+ for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) {
+ CopyRec &TheCopy = ImpDefCopies[i];
+ bool Again = false;
+ if (!JoinCopy(TheCopy, Again))
+ if (Again)
+ TryAgain.push_back(TheCopy);
+ }
+ for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) {
+ CopyRec &TheCopy = PhysCopies[i];
+ bool Again = false;
+ if (!JoinCopy(TheCopy, Again))
+ if (Again)
+ TryAgain.push_back(TheCopy);
+ }
+ for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) {
+ CopyRec &TheCopy = VirtCopies[i];
+ bool Again = false;
+ if (!JoinCopy(TheCopy, Again))
+ if (Again)
+ TryAgain.push_back(TheCopy);
+ }
+}
+
+void SimpleRegisterCoalescing::joinIntervals() {
+ DOUT << "********** JOINING INTERVALS ***********\n";
+
+ if (NewHeuristic)
+ JoinQueue = new JoinPriorityQueue<CopyRecSort>(this);
+
+ std::vector<CopyRec> TryAgainList;
+ if (loopInfo->empty()) {
+ // If there are no loops in the function, join intervals in function order.
+ for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();
+ I != E; ++I)
+ CopyCoalesceInMBB(I, TryAgainList);
+ } else {
+ // Otherwise, join intervals in inner loops before other intervals.
+ // Unfortunately we can't just iterate over loop hierarchy here because
+ // there may be more MBB's than BB's. Collect MBB's for sorting.
+
+ // Join intervals in the function prolog first. We want to join physical
+ // registers with virtual registers before the intervals got too long.
+ std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs;
+ for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();I != E;++I){
+ MachineBasicBlock *MBB = I;
+ MBBs.push_back(std::make_pair(loopInfo->getLoopDepth(MBB), I));
+ }
+
+ // Sort by loop depth.
+ std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare());
+
+ // Finally, join intervals in loop nest order.
+ for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
+ CopyCoalesceInMBB(MBBs[i].second, TryAgainList);
+ }
+
+ // Joining intervals can allow other intervals to be joined. Iteratively join
+ // until we make no progress.
+ if (NewHeuristic) {
+ SmallVector<CopyRec, 16> TryAgain;
+ bool ProgressMade = true;
+ while (ProgressMade) {
+ ProgressMade = false;
+ while (!JoinQueue->empty()) {
+ CopyRec R = JoinQueue->pop();
+ bool Again = false;
+ bool Success = JoinCopy(R, Again);
+ if (Success)
+ ProgressMade = true;
+ else if (Again)
+ TryAgain.push_back(R);
+ }
+
+ if (ProgressMade) {
+ while (!TryAgain.empty()) {
+ JoinQueue->push(TryAgain.back());
+ TryAgain.pop_back();
+ }
+ }
+ }
+ } else {
+ bool ProgressMade = true;
+ while (ProgressMade) {
+ ProgressMade = false;
+
+ for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) {
+ CopyRec &TheCopy = TryAgainList[i];
+ if (TheCopy.MI) {
+ bool Again = false;
+ bool Success = JoinCopy(TheCopy, Again);
+ if (Success || !Again) {
+ TheCopy.MI = 0; // Mark this one as done.
+ ProgressMade = true;
+ }
+ }
+ }
+ }
+ }
+
+ if (NewHeuristic)
+ delete JoinQueue;
+}
+
+/// Return true if the two specified registers belong to different register
+/// classes. The registers may be either phys or virt regs.
+bool
+SimpleRegisterCoalescing::differingRegisterClasses(unsigned RegA,
+ unsigned RegB) const {
+ // Get the register classes for the first reg.
+ if (TargetRegisterInfo::isPhysicalRegister(RegA)) {
+ assert(TargetRegisterInfo::isVirtualRegister(RegB) &&
+ "Shouldn't consider two physregs!");
+ return !mri_->getRegClass(RegB)->contains(RegA);
+ }
+
+ // Compare against the regclass for the second reg.
+ const TargetRegisterClass *RegClassA = mri_->getRegClass(RegA);
+ if (TargetRegisterInfo::isVirtualRegister(RegB)) {
+ const TargetRegisterClass *RegClassB = mri_->getRegClass(RegB);
+ return RegClassA != RegClassB;
+ }
+ return !RegClassA->contains(RegB);
+}
+
+/// lastRegisterUse - Returns the last use of the specific register between
+/// cycles Start and End or NULL if there are no uses.
+MachineOperand *
+SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End,
+ unsigned Reg, unsigned &UseIdx) const{
+ UseIdx = 0;
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ MachineOperand *LastUse = NULL;
+ for (MachineRegisterInfo::use_iterator I = mri_->use_begin(Reg),
+ E = mri_->use_end(); I != E; ++I) {
+ MachineOperand &Use = I.getOperand();
+ MachineInstr *UseMI = Use.getParent();
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
+ SrcReg == DstReg)
+ // Ignore identity copies.
+ continue;
+ unsigned Idx = li_->getInstructionIndex(UseMI);
+ if (Idx >= Start && Idx < End && Idx >= UseIdx) {
+ LastUse = &Use;
+ UseIdx = li_->getUseIndex(Idx);
+ }
+ }
+ return LastUse;
+ }
+
+ int e = (End-1) / InstrSlots::NUM * InstrSlots::NUM;
+ int s = Start;
+ while (e >= s) {
+ // Skip deleted instructions
+ MachineInstr *MI = li_->getInstructionFromIndex(e);
+ while ((e - InstrSlots::NUM) >= s && !MI) {
+ e -= InstrSlots::NUM;
+ MI = li_->getInstructionFromIndex(e);
+ }
+ if (e < s || MI == NULL)
+ return NULL;
+
+ // Ignore identity copies.
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (!(tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
+ SrcReg == DstReg))
+ for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) {
+ MachineOperand &Use = MI->getOperand(i);
+ if (Use.isReg() && Use.isUse() && Use.getReg() &&
+ tri_->regsOverlap(Use.getReg(), Reg)) {
+ UseIdx = li_->getUseIndex(e);
+ return &Use;
+ }
+ }
+
+ e -= InstrSlots::NUM;
+ }
+
+ return NULL;
+}
+
+
+void SimpleRegisterCoalescing::printRegName(unsigned reg) const {
+ if (TargetRegisterInfo::isPhysicalRegister(reg))
+ cerr << tri_->getName(reg);
+ else
+ cerr << "%reg" << reg;
+}
+
+void SimpleRegisterCoalescing::releaseMemory() {
+ JoinedCopies.clear();
+ ReMatCopies.clear();
+ ReMatDefs.clear();
+}
+
+static bool isZeroLengthInterval(LiveInterval *li) {
+ for (LiveInterval::Ranges::const_iterator
+ i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i)
+ if (i->end - i->start > LiveInterval::InstrSlots::NUM)
+ return false;
+ return true;
+}
+
+/// TurnCopyIntoImpDef - If source of the specified copy is an implicit def,
+/// turn the copy into an implicit def.
+bool
+SimpleRegisterCoalescing::TurnCopyIntoImpDef(MachineBasicBlock::iterator &I,
+ MachineBasicBlock *MBB,
+ unsigned DstReg, unsigned SrcReg) {
+ MachineInstr *CopyMI = &*I;
+ unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+ if (!li_->hasInterval(SrcReg))
+ return false;
+ LiveInterval &SrcInt = li_->getInterval(SrcReg);
+ if (!SrcInt.empty())
+ return false;
+ if (!li_->hasInterval(DstReg))
+ return false;
+ LiveInterval &DstInt = li_->getInterval(DstReg);
+ const LiveRange *DstLR = DstInt.getLiveRangeContaining(CopyIdx);
+ DstInt.removeValNo(DstLR->valno);
+ CopyMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
+ for (int i = CopyMI->getNumOperands() - 1, e = 0; i > e; --i)
+ CopyMI->RemoveOperand(i);
+ bool NoUse = mri_->use_empty(SrcReg);
+ if (NoUse) {
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg),
+ E = mri_->reg_end(); I != E; ) {
+ assert(I.getOperand().isDef());
+ MachineInstr *DefMI = &*I;
+ ++I;
+ // The implicit_def source has no other uses, delete it.
+ assert(DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF);
+ li_->RemoveMachineInstrFromMaps(DefMI);
+ DefMI->eraseFromParent();
+ }
+ }
+ ++I;
+ return true;
+}
+
+
+bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
+ mf_ = &fn;
+ mri_ = &fn.getRegInfo();
+ tm_ = &fn.getTarget();
+ tri_ = tm_->getRegisterInfo();
+ tii_ = tm_->getInstrInfo();
+ li_ = &getAnalysis<LiveIntervals>();
+ loopInfo = &getAnalysis<MachineLoopInfo>();
+
+ DOUT << "********** SIMPLE REGISTER COALESCING **********\n"
+ << "********** Function: "
+ << ((Value*)mf_->getFunction())->getName() << '\n';
+
+ allocatableRegs_ = tri_->getAllocatableSet(fn);
+ for (TargetRegisterInfo::regclass_iterator I = tri_->regclass_begin(),
+ E = tri_->regclass_end(); I != E; ++I)
+ allocatableRCRegs_.insert(std::make_pair(*I,
+ tri_->getAllocatableSet(fn, *I)));
+
+ // Join (coalesce) intervals if requested.
+ if (EnableJoining) {
+ joinIntervals();
+ DEBUG({
+ DOUT << "********** INTERVALS POST JOINING **********\n";
+ for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I){
+ I->second->print(DOUT, tri_);
+ DOUT << "\n";
+ }
+ });
+ }
+
+ // Perform a final pass over the instructions and compute spill weights
+ // and remove identity moves.
+ SmallVector<unsigned, 4> DeadDefs;
+ for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
+ mbbi != mbbe; ++mbbi) {
+ MachineBasicBlock* mbb = mbbi;
+ unsigned loopDepth = loopInfo->getLoopDepth(mbb);
+
+ for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
+ mii != mie; ) {
+ MachineInstr *MI = mii;
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (JoinedCopies.count(MI)) {
+ // Delete all coalesced copies.
+ if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
+ assert((MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG ||
+ MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+ MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) &&
+ "Unrecognized copy instruction");
+ DstReg = MI->getOperand(0).getReg();
+ }
+ if (MI->registerDefIsDead(DstReg)) {
+ LiveInterval &li = li_->getInterval(DstReg);
+ if (!ShortenDeadCopySrcLiveRange(li, MI))
+ ShortenDeadCopyLiveRange(li, MI);
+ }
+ li_->RemoveMachineInstrFromMaps(MI);
+ mii = mbbi->erase(mii);
+ ++numPeep;
+ continue;
+ }
+
+ // Now check if this is a remat'ed def instruction which is now dead.
+ if (ReMatDefs.count(MI)) {
+ bool isDead = true;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ DeadDefs.push_back(Reg);
+ if (MO.isDead())
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !mri_->use_empty(Reg)) {
+ isDead = false;
+ break;
+ }
+ }
+ if (isDead) {
+ while (!DeadDefs.empty()) {
+ unsigned DeadDef = DeadDefs.back();
+ DeadDefs.pop_back();
+ RemoveDeadDef(li_->getInterval(DeadDef), MI);
+ }
+ li_->RemoveMachineInstrFromMaps(mii);
+ mii = mbbi->erase(mii);
+ continue;
+ } else
+ DeadDefs.clear();
+ }
+
+ // If the move will be an identity move delete it
+ bool isMove= tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
+ if (isMove && SrcReg == DstReg) {
+ if (li_->hasInterval(SrcReg)) {
+ LiveInterval &RegInt = li_->getInterval(SrcReg);
+ // If def of this move instruction is dead, remove its live range
+ // from the dstination register's live interval.
+ if (MI->registerDefIsDead(DstReg)) {
+ if (!ShortenDeadCopySrcLiveRange(RegInt, MI))
+ ShortenDeadCopyLiveRange(RegInt, MI);
+ }
+ }
+ li_->RemoveMachineInstrFromMaps(MI);
+ mii = mbbi->erase(mii);
+ ++numPeep;
+ } else if (!isMove || !TurnCopyIntoImpDef(mii, mbb, DstReg, SrcReg)) {
+ SmallSet<unsigned, 4> UniqueUses;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &mop = MI->getOperand(i);
+ if (mop.isReg() && mop.getReg() &&
+ TargetRegisterInfo::isVirtualRegister(mop.getReg())) {
+ unsigned reg = mop.getReg();
+ // Multiple uses of reg by the same instruction. It should not
+ // contribute to spill weight again.
+ if (UniqueUses.count(reg) != 0)
+ continue;
+ LiveInterval &RegInt = li_->getInterval(reg);
+ RegInt.weight +=
+ li_->getSpillWeight(mop.isDef(), mop.isUse(), loopDepth);
+ UniqueUses.insert(reg);
+ }
+ }
+ ++mii;
+ }
+ }
+ }
+
+ for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) {
+ LiveInterval &LI = *I->second;
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ // If the live interval length is essentially zero, i.e. in every live
+ // range the use follows def immediately, it doesn't make sense to spill
+ // it and hope it will be easier to allocate for this li.
+ if (isZeroLengthInterval(&LI))
+ LI.weight = HUGE_VALF;
+ else {
+ bool isLoad = false;
+ SmallVector<LiveInterval*, 4> SpillIs;
+ if (li_->isReMaterializable(LI, SpillIs, isLoad)) {
+ // If all of the definitions of the interval are re-materializable,
+ // it is a preferred candidate for spilling. If non of the defs are
+ // loads, then it's potentially very cheap to re-materialize.
+ // FIXME: this gets much more complicated once we support non-trivial
+ // re-materialization.
+ if (isLoad)
+ LI.weight *= 0.9F;
+ else
+ LI.weight *= 0.5F;
+ }
+ }
+
+ // Slightly prefer live interval that has been assigned a preferred reg.
+ if (LI.preference)
+ LI.weight *= 1.01F;
+
+ // Divide the weight of the interval by its size. This encourages
+ // spilling of intervals that are large and have few uses, and
+ // discourages spilling of small intervals with many uses.
+ LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM;
+ }
+ }
+
+ DEBUG(dump());
+ return true;
+}
+
+/// print - Implement the dump method.
+void SimpleRegisterCoalescing::print(std::ostream &O, const Module* m) const {
+ li_->print(O, m);
+}
+
+RegisterCoalescer* llvm::createSimpleRegisterCoalescer() {
+ return new SimpleRegisterCoalescing();
+}
+
+// Make sure that anything that uses RegisterCoalescer pulls in this file...
+DEFINING_FILE_FOR(SimpleRegisterCoalescing)
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
new file mode 100644
index 0000000..a495bfd
--- /dev/null
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -0,0 +1,313 @@
+//===-- SimpleRegisterCoalescing.h - Register Coalescing --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple register copy coalescing phase.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H
+#define LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/ADT/BitVector.h"
+#include <queue>
+
+namespace llvm {
+ class SimpleRegisterCoalescing;
+ class LiveVariables;
+ class TargetRegisterInfo;
+ class TargetInstrInfo;
+ class VirtRegMap;
+ class MachineLoopInfo;
+
+ /// CopyRec - Representation for copy instructions in coalescer queue.
+ ///
+ struct CopyRec {
+ MachineInstr *MI;
+ unsigned LoopDepth;
+ bool isBackEdge;
+ CopyRec(MachineInstr *mi, unsigned depth, bool be)
+ : MI(mi), LoopDepth(depth), isBackEdge(be) {};
+ };
+
+ template<class SF> class JoinPriorityQueue;
+
+ /// CopyRecSort - Sorting function for coalescer queue.
+ ///
+ struct CopyRecSort : public std::binary_function<CopyRec,CopyRec,bool> {
+ JoinPriorityQueue<CopyRecSort> *JPQ;
+ explicit CopyRecSort(JoinPriorityQueue<CopyRecSort> *jpq) : JPQ(jpq) {}
+ CopyRecSort(const CopyRecSort &RHS) : JPQ(RHS.JPQ) {}
+ bool operator()(CopyRec left, CopyRec right) const;
+ };
+
+ /// JoinQueue - A priority queue of copy instructions the coalescer is
+ /// going to process.
+ template<class SF>
+ class JoinPriorityQueue {
+ SimpleRegisterCoalescing *Rc;
+ std::priority_queue<CopyRec, std::vector<CopyRec>, SF> Queue;
+
+ public:
+ explicit JoinPriorityQueue(SimpleRegisterCoalescing *rc)
+ : Rc(rc), Queue(SF(this)) {}
+
+ bool empty() const { return Queue.empty(); }
+ void push(CopyRec R) { Queue.push(R); }
+ CopyRec pop() {
+ if (empty()) return CopyRec(0, 0, false);
+ CopyRec R = Queue.top();
+ Queue.pop();
+ return R;
+ }
+
+ // Callbacks to SimpleRegisterCoalescing.
+ unsigned getRepIntervalSize(unsigned Reg);
+ };
+
+ class SimpleRegisterCoalescing : public MachineFunctionPass,
+ public RegisterCoalescer {
+ MachineFunction* mf_;
+ MachineRegisterInfo* mri_;
+ const TargetMachine* tm_;
+ const TargetRegisterInfo* tri_;
+ const TargetInstrInfo* tii_;
+ LiveIntervals *li_;
+ const MachineLoopInfo* loopInfo;
+
+ BitVector allocatableRegs_;
+ DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs_;
+
+ /// JoinQueue - A priority queue of copy instructions the coalescer is
+ /// going to process.
+ JoinPriorityQueue<CopyRecSort> *JoinQueue;
+
+ /// JoinedCopies - Keep track of copies eliminated due to coalescing.
+ ///
+ SmallPtrSet<MachineInstr*, 32> JoinedCopies;
+
+ /// ReMatCopies - Keep track of copies eliminated due to remat.
+ ///
+ SmallPtrSet<MachineInstr*, 32> ReMatCopies;
+
+ /// ReMatDefs - Keep track of definition instructions which have
+ /// been remat'ed.
+ SmallPtrSet<MachineInstr*, 8> ReMatDefs;
+
+ public:
+ static char ID; // Pass identifcation, replacement for typeid
+ SimpleRegisterCoalescing() : MachineFunctionPass(&ID) {}
+
+ struct InstrSlots {
+ enum {
+ LOAD = 0,
+ USE = 1,
+ DEF = 2,
+ STORE = 3,
+ NUM = 4
+ };
+ };
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual void releaseMemory();
+
+ /// runOnMachineFunction - pass entry point
+ virtual bool runOnMachineFunction(MachineFunction&);
+
+ bool coalesceFunction(MachineFunction &mf, RegallocQuery &) {
+ // This runs as an independent pass, so don't do anything.
+ return false;
+ };
+
+ /// getRepIntervalSize - Called from join priority queue sorting function.
+ /// It returns the size of the interval that represent the given register.
+ unsigned getRepIntervalSize(unsigned Reg) {
+ if (!li_->hasInterval(Reg))
+ return 0;
+ return li_->getApproximateInstructionCount(li_->getInterval(Reg)) *
+ LiveInterval::InstrSlots::NUM;
+ }
+
+ /// print - Implement the dump method.
+ virtual void print(std::ostream &O, const Module* = 0) const;
+ void print(std::ostream *O, const Module* M = 0) const {
+ if (O) print(*O, M);
+ }
+
+ private:
+ /// joinIntervals - join compatible live intervals
+ void joinIntervals();
+
+ /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting
+ /// copies that cannot yet be coalesced into the "TryAgain" list.
+ void CopyCoalesceInMBB(MachineBasicBlock *MBB,
+ std::vector<CopyRec> &TryAgain);
+
+ /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+ /// which are the src/dst of the copy instruction CopyMI. This returns true
+ /// if the copy was successfully coalesced away. If it is not currently
+ /// possible to coalesce this interval, but it may be possible if other
+ /// things get coalesced, then it returns true by reference in 'Again'.
+ bool JoinCopy(CopyRec &TheCopy, bool &Again);
+
+ /// JoinIntervals - Attempt to join these two intervals. On failure, this
+ /// returns false. Otherwise, if one of the intervals being joined is a
+ /// physreg, this method always canonicalizes DestInt to be it. The output
+ /// "SrcInt" will not have been modified, so we can use this information
+ /// below to update aliases.
+ bool JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, bool &Swapped);
+
+ /// SimpleJoin - Attempt to join the specified interval into this one. The
+ /// caller of this method must guarantee that the RHS only contains a single
+ /// value number and that the RHS is not defined by a copy from this
+ /// interval. This returns false if the intervals are not joinable, or it
+ /// joins them and returns true.
+ bool SimpleJoin(LiveInterval &LHS, LiveInterval &RHS);
+
+ /// Return true if the two specified registers belong to different register
+ /// classes. The registers may be either phys or virt regs.
+ bool differingRegisterClasses(unsigned RegA, unsigned RegB) const;
+
+
+ /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
+ /// the source value number is defined by a copy from the destination reg
+ /// see if we can merge these two destination reg valno# into a single
+ /// value number, eliminating a copy.
+ bool AdjustCopiesBackFrom(LiveInterval &IntA, LiveInterval &IntB,
+ MachineInstr *CopyMI);
+
+ /// HasOtherReachingDefs - Return true if there are definitions of IntB
+ /// other than BValNo val# that can reach uses of AValno val# of IntA.
+ bool HasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
+ VNInfo *AValNo, VNInfo *BValNo);
+
+ /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy.
+ /// If the source value number is defined by a commutable instruction and
+ /// its other operand is coalesced to the copy dest register, see if we
+ /// can transform the copy into a noop by commuting the definition.
+ bool RemoveCopyByCommutingDef(LiveInterval &IntA, LiveInterval &IntB,
+ MachineInstr *CopyMI);
+
+ /// TrimLiveIntervalToLastUse - If there is a last use in the same basic
+ /// block as the copy instruction, trim the ive interval to the last use
+ /// and return true.
+ bool TrimLiveIntervalToLastUse(unsigned CopyIdx,
+ MachineBasicBlock *CopyMBB,
+ LiveInterval &li, const LiveRange *LR);
+
+ /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
+ /// computation, replace the copy by rematerialize the definition.
+ bool ReMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg,
+ MachineInstr *CopyMI);
+
+ /// TurnCopyIntoImpDef - If source of the specified copy is an implicit def,
+ /// turn the copy into an implicit def.
+ bool TurnCopyIntoImpDef(MachineBasicBlock::iterator &I,
+ MachineBasicBlock *MBB,
+ unsigned DstReg, unsigned SrcReg);
+
+ /// CanCoalesceWithImpDef - Returns true if the specified copy instruction
+ /// from an implicit def to another register can be coalesced away.
+ bool CanCoalesceWithImpDef(MachineInstr *CopyMI,
+ LiveInterval &li, LiveInterval &ImpLi) const;
+
+ /// RemoveCopiesFromValNo - The specified value# is defined by an implicit
+ /// def and it is being removed. Turn all copies from this value# into
+ /// identity copies so they will be removed.
+ void RemoveCopiesFromValNo(LiveInterval &li, VNInfo *VNI);
+
+ /// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a
+ /// a virtual destination register with physical source register.
+ bool isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI,
+ MachineBasicBlock *CopyMBB,
+ LiveInterval &DstInt, LiveInterval &SrcInt);
+
+ /// isWinToJoinVRWithDstPhysReg - Return true if it's worth while to join a
+ /// copy from a virtual source register to a physical destination register.
+ bool isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,
+ MachineBasicBlock *CopyMBB,
+ LiveInterval &DstInt, LiveInterval &SrcInt);
+
+ /// isWinToJoinCrossClass - Return true if it's profitable to coalesce
+ /// two virtual registers from different register classes.
+ bool isWinToJoinCrossClass(unsigned LargeReg, unsigned SmallReg,
+ unsigned Threshold);
+
+ /// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual
+ /// register with a physical register, check if any of the virtual register
+ /// operand is a sub-register use or def. If so, make sure it won't result
+ /// in an illegal extract_subreg or insert_subreg instruction.
+ bool HasIncompatibleSubRegDefUse(MachineInstr *CopyMI,
+ unsigned VirtReg, unsigned PhysReg);
+
+ /// CanJoinExtractSubRegToPhysReg - Return true if it's possible to coalesce
+ /// an extract_subreg where dst is a physical register, e.g.
+ /// cl = EXTRACT_SUBREG reg1024, 1
+ bool CanJoinExtractSubRegToPhysReg(unsigned DstReg, unsigned SrcReg,
+ unsigned SubIdx, unsigned &RealDstReg);
+
+ /// CanJoinInsertSubRegToPhysReg - Return true if it's possible to coalesce
+ /// an insert_subreg where src is a physical register, e.g.
+ /// reg1024 = INSERT_SUBREG reg1024, c1, 0
+ bool CanJoinInsertSubRegToPhysReg(unsigned DstReg, unsigned SrcReg,
+ unsigned SubIdx, unsigned &RealDstReg);
+
+ /// RangeIsDefinedByCopyFromReg - Return true if the specified live range of
+ /// the specified live interval is defined by a copy from the specified
+ /// register.
+ bool RangeIsDefinedByCopyFromReg(LiveInterval &li, LiveRange *LR,
+ unsigned Reg);
+
+ /// isBackEdgeCopy - Return true if CopyMI is a back edge copy.
+ ///
+ bool isBackEdgeCopy(MachineInstr *CopyMI, unsigned DstReg) const;
+
+ /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+ /// update the subregister number if it is not zero. If DstReg is a
+ /// physical register and the existing subregister number of the def / use
+ /// being updated is not zero, make sure to set it to the correct physical
+ /// subregister.
+ void UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
+
+ /// RemoveDeadImpDef - Remove implicit_def instructions which are
+ /// "re-defining" registers due to insert_subreg coalescing. e.g.
+ void RemoveDeadImpDef(unsigned Reg, LiveInterval &LI);
+
+ /// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate
+ /// due to live range lengthening as the result of coalescing.
+ void RemoveUnnecessaryKills(unsigned Reg, LiveInterval &LI);
+
+ /// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy.
+ /// Return true if live interval is removed.
+ bool ShortenDeadCopyLiveRange(LiveInterval &li, MachineInstr *CopyMI);
+
+ /// ShortenDeadCopyLiveRange - Shorten a live range as it's artificially
+ /// extended by a dead copy. Mark the last use (if any) of the val# as kill
+ /// as ends the live range there. If there isn't another use, then this
+ /// live range is dead. Return true if live interval is removed.
+ bool ShortenDeadCopySrcLiveRange(LiveInterval &li, MachineInstr *CopyMI);
+
+ /// RemoveDeadDef - If a def of a live interval is now determined dead,
+ /// remove the val# it defines. If the live interval becomes empty, remove
+ /// it as well.
+ bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI);
+
+ /// lastRegisterUse - Returns the last use of the specific register between
+ /// cycles Start and End or NULL if there are no uses.
+ MachineOperand *lastRegisterUse(unsigned Start, unsigned End, unsigned Reg,
+ unsigned &LastUseIdx) const;
+
+ void printRegName(unsigned reg) const;
+ };
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
new file mode 100644
index 0000000..ce63121
--- /dev/null
+++ b/lib/CodeGen/Spiller.cpp
@@ -0,0 +1,229 @@
+//===-- llvm/CodeGen/Spiller.cpp - Spiller -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "spiller"
+
+#include "Spiller.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+Spiller::~Spiller() {}
+
+namespace {
+
+/// Utility class for spillers.
+class SpillerBase : public Spiller {
+protected:
+
+ MachineFunction *mf;
+ LiveIntervals *lis;
+ LiveStacks *ls;
+ MachineFrameInfo *mfi;
+ MachineRegisterInfo *mri;
+ const TargetInstrInfo *tii;
+ VirtRegMap *vrm;
+
+ /// Construct a spiller base.
+ SpillerBase(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls, VirtRegMap *vrm) :
+ mf(mf), lis(lis), ls(ls), vrm(vrm)
+ {
+ mfi = mf->getFrameInfo();
+ mri = &mf->getRegInfo();
+ tii = mf->getTarget().getInstrInfo();
+ }
+
+ /// Insert a store of the given vreg to the given stack slot immediately
+ /// after the given instruction. Returns the base index of the inserted
+ /// instruction. The caller is responsible for adding an appropriate
+ /// LiveInterval to the LiveIntervals analysis.
+ unsigned insertStoreFor(MachineInstr *mi, unsigned ss,
+ unsigned newVReg,
+ const TargetRegisterClass *trc) {
+ MachineBasicBlock::iterator nextInstItr(mi);
+ ++nextInstItr;
+
+ if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) {
+ lis->scaleNumbering(2);
+ ls->scaleNumbering(2);
+ }
+
+ unsigned miIdx = lis->getInstructionIndex(mi);
+
+ assert(lis->hasGapAfterInstr(miIdx));
+
+ tii->storeRegToStackSlot(*mi->getParent(), nextInstItr, newVReg,
+ true, ss, trc);
+ MachineBasicBlock::iterator storeInstItr(mi);
+ ++storeInstItr;
+ MachineInstr *storeInst = &*storeInstItr;
+ unsigned storeInstIdx = miIdx + LiveInterval::InstrSlots::NUM;
+
+ assert(lis->getInstructionFromIndex(storeInstIdx) == 0 &&
+ "Store inst index already in use.");
+
+ lis->InsertMachineInstrInMaps(storeInst, storeInstIdx);
+
+ return storeInstIdx;
+ }
+
+ /// Insert a load of the given veg from the given stack slot immediately
+ /// before the given instruction. Returns the base index of the inserted
+ /// instruction. The caller is responsible for adding an appropriate
+ /// LiveInterval to the LiveIntervals analysis.
+ unsigned insertLoadFor(MachineInstr *mi, unsigned ss,
+ unsigned newVReg,
+ const TargetRegisterClass *trc) {
+ MachineBasicBlock::iterator useInstItr(mi);
+
+ if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) {
+ lis->scaleNumbering(2);
+ ls->scaleNumbering(2);
+ }
+
+ unsigned miIdx = lis->getInstructionIndex(mi);
+
+ assert(lis->hasGapBeforeInstr(miIdx));
+
+ tii->loadRegFromStackSlot(*mi->getParent(), useInstItr, newVReg, ss, trc);
+ MachineBasicBlock::iterator loadInstItr(mi);
+ --loadInstItr;
+ MachineInstr *loadInst = &*loadInstItr;
+ unsigned loadInstIdx = miIdx - LiveInterval::InstrSlots::NUM;
+
+ assert(lis->getInstructionFromIndex(loadInstIdx) == 0 &&
+ "Load inst index already in use.");
+
+ lis->InsertMachineInstrInMaps(loadInst, loadInstIdx);
+
+ return loadInstIdx;
+ }
+
+
+ /// Add spill ranges for every use/def of the live interval, inserting loads
+ /// immediately before each use, and stores after each def. No folding is
+ /// attempted.
+ std::vector<LiveInterval*> trivialSpillEverywhere(LiveInterval *li) {
+ DOUT << "Spilling everywhere " << *li << "\n";
+
+ assert(li->weight != HUGE_VALF &&
+ "Attempting to spill already spilled value.");
+
+ assert(!li->isStackSlot() &&
+ "Trying to spill a stack slot.");
+
+ std::vector<LiveInterval*> added;
+
+ const TargetRegisterClass *trc = mri->getRegClass(li->reg);
+ unsigned ss = vrm->assignVirt2StackSlot(li->reg);
+
+ for (MachineRegisterInfo::reg_iterator
+ regItr = mri->reg_begin(li->reg); regItr != mri->reg_end();) {
+
+ MachineInstr *mi = &*regItr;
+ do {
+ ++regItr;
+ } while (regItr != mri->reg_end() && (&*regItr == mi));
+
+ SmallVector<unsigned, 2> indices;
+ bool hasUse = false;
+ bool hasDef = false;
+
+ for (unsigned i = 0; i != mi->getNumOperands(); ++i) {
+ MachineOperand &op = mi->getOperand(i);
+
+ if (!op.isReg() || op.getReg() != li->reg)
+ continue;
+
+ hasUse |= mi->getOperand(i).isUse();
+ hasDef |= mi->getOperand(i).isDef();
+
+ indices.push_back(i);
+ }
+
+ unsigned newVReg = mri->createVirtualRegister(trc);
+ vrm->grow();
+ vrm->assignVirt2StackSlot(newVReg, ss);
+
+ LiveInterval *newLI = &lis->getOrCreateInterval(newVReg);
+ newLI->weight = HUGE_VALF;
+
+ for (unsigned i = 0; i < indices.size(); ++i) {
+ mi->getOperand(indices[i]).setReg(newVReg);
+
+ if (mi->getOperand(indices[i]).isUse()) {
+ mi->getOperand(indices[i]).setIsKill(true);
+ }
+ }
+
+ assert(hasUse || hasDef);
+
+ if (hasUse) {
+ unsigned loadInstIdx = insertLoadFor(mi, ss, newVReg, trc);
+ unsigned start = lis->getDefIndex(loadInstIdx),
+ end = lis->getUseIndex(lis->getInstructionIndex(mi));
+
+ VNInfo *vni =
+ newLI->getNextValue(loadInstIdx, 0, lis->getVNInfoAllocator());
+ vni->kills.push_back(lis->getInstructionIndex(mi));
+ LiveRange lr(start, end, vni);
+
+ newLI->addRange(lr);
+ }
+
+ if (hasDef) {
+ unsigned storeInstIdx = insertStoreFor(mi, ss, newVReg, trc);
+ unsigned start = lis->getDefIndex(lis->getInstructionIndex(mi)),
+ end = lis->getUseIndex(storeInstIdx);
+
+ VNInfo *vni =
+ newLI->getNextValue(storeInstIdx, 0, lis->getVNInfoAllocator());
+ vni->kills.push_back(storeInstIdx);
+ LiveRange lr(start, end, vni);
+
+ newLI->addRange(lr);
+ }
+
+ added.push_back(newLI);
+ }
+
+
+ return added;
+ }
+
+};
+
+
+/// Spills any live range using the spill-everywhere method with no attempt at
+/// folding.
+class TrivialSpiller : public SpillerBase {
+public:
+ TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls, VirtRegMap *vrm) :
+ SpillerBase(mf, lis, ls, vrm) {}
+
+ std::vector<LiveInterval*> spill(LiveInterval *li) {
+ return trivialSpillEverywhere(li);
+ }
+
+};
+
+}
+
+llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis,
+ LiveStacks *ls, VirtRegMap *vrm) {
+ return new TrivialSpiller(mf, lis, ls, vrm);
+}
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
new file mode 100644
index 0000000..cad054d
--- /dev/null
+++ b/lib/CodeGen/Spiller.h
@@ -0,0 +1,37 @@
+//===-- llvm/CodeGen/Spiller.h - Spiller -*- C++ -*------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPILLER_H
+#define LLVM_CODEGEN_SPILLER_H
+
+#include <vector>
+
+namespace llvm {
+ class LiveInterval;
+ class LiveIntervals;
+ class LiveStacks;
+ class MachineFunction;
+ class VirtRegMap;
+
+ /// Spiller interface.
+ ///
+ /// Implementations are utility classes which insert spill or remat code on
+ /// demand.
+ class Spiller {
+ public:
+ virtual ~Spiller() = 0;
+ virtual std::vector<LiveInterval*> spill(LiveInterval *li) = 0;
+ };
+
+ /// Create and return a spiller object, as specified on the command line.
+ Spiller* createSpiller(MachineFunction *mf, LiveIntervals *li,
+ LiveStacks *ls, VirtRegMap *vrm);
+}
+
+#endif
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
new file mode 100644
index 0000000..c179f1e
--- /dev/null
+++ b/lib/CodeGen/StackProtector.cpp
@@ -0,0 +1,224 @@
+//===-- StackProtector.cpp - Stack Protector Insertion --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass inserts stack protectors into functions which need them. A variable
+// with a random value in it is stored onto the stack before the local variables
+// are allocated. Upon exiting the block, the stored value is checked. If it's
+// changed, then there was some sort of violation and the program aborts.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "stack-protector"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Attributes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+// SSPBufferSize - The lower bound for a buffer to be considered for stack
+// smashing protection.
+static cl::opt<unsigned>
+SSPBufferSize("stack-protector-buffer-size", cl::init(8),
+ cl::desc("Lower bound for a buffer to be considered for "
+ "stack protection"));
+
+namespace {
+ class VISIBILITY_HIDDEN StackProtector : public FunctionPass {
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// target type sizes.
+ const TargetLowering *TLI;
+
+ Function *F;
+ Module *M;
+
+ /// InsertStackProtectors - Insert code into the prologue and epilogue of
+ /// the function.
+ ///
+ /// - The prologue code loads and stores the stack guard onto the stack.
+ /// - The epilogue checks the value stored in the prologue against the
+ /// original value. It calls __stack_chk_fail if they differ.
+ bool InsertStackProtectors();
+
+ /// CreateFailBB - Create a basic block to jump to when the stack protector
+ /// check fails.
+ BasicBlock *CreateFailBB();
+
+ /// RequiresStackProtector - Check whether or not this function needs a
+ /// stack protector based upon the stack protector level.
+ bool RequiresStackProtector() const;
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ StackProtector() : FunctionPass(&ID), TLI(0) {}
+ StackProtector(const TargetLowering *tli)
+ : FunctionPass(&ID), TLI(tli) {}
+
+ virtual bool runOnFunction(Function &Fn);
+ };
+} // end anonymous namespace
+
+char StackProtector::ID = 0;
+static RegisterPass<StackProtector>
+X("stack-protector", "Insert stack protectors");
+
+FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) {
+ return new StackProtector(tli);
+}
+
+bool StackProtector::runOnFunction(Function &Fn) {
+ F = &Fn;
+ M = F->getParent();
+
+ if (!RequiresStackProtector()) return false;
+
+ return InsertStackProtectors();
+}
+
+/// RequiresStackProtector - Check whether or not this function needs a stack
+/// protector based upon the stack protector level. The heuristic we use is to
+/// add a guard variable to functions that call alloca, and functions with
+/// buffers larger than SSPBufferSize bytes.
+bool StackProtector::RequiresStackProtector() const {
+ if (F->hasFnAttr(Attribute::StackProtectReq))
+ return true;
+
+ if (!F->hasFnAttr(Attribute::StackProtect))
+ return false;
+
+ const TargetData *TD = TLI->getTargetData();
+
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ BasicBlock *BB = I;
+
+ for (BasicBlock::iterator
+ II = BB->begin(), IE = BB->end(); II != IE; ++II)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (AI->isArrayAllocation())
+ // This is a call to alloca with a variable size. Emit stack
+ // protectors.
+ return true;
+
+ if (const ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType()))
+ // If an array has more than SSPBufferSize bytes of allocated space,
+ // then we emit stack protectors.
+ if (SSPBufferSize <= TD->getTypeAllocSize(AT))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// InsertStackProtectors - Insert code into the prologue and epilogue of the
+/// function.
+///
+/// - The prologue code loads and stores the stack guard onto the stack.
+/// - The epilogue checks the value stored in the prologue against the original
+/// value. It calls __stack_chk_fail if they differ.
+bool StackProtector::InsertStackProtectors() {
+ BasicBlock *FailBB = 0; // The basic block to jump to if check fails.
+ AllocaInst *AI = 0; // Place on stack that stores the stack guard.
+ Constant *StackGuardVar = 0; // The stack guard variable.
+
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ) {
+ BasicBlock *BB = I++;
+
+ ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
+ if (!RI) continue;
+
+ if (!FailBB) {
+ // Insert code into the entry block that stores the __stack_chk_guard
+ // variable onto the stack:
+ //
+ // entry:
+ // StackGuardSlot = alloca i8*
+ // StackGuard = load __stack_chk_guard
+ // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot)
+ //
+ PointerType *PtrTy = PointerType::getUnqual(Type::Int8Ty);
+ StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
+
+ BasicBlock &Entry = F->getEntryBlock();
+ Instruction *InsPt = &Entry.front();
+
+ AI = new AllocaInst(PtrTy, "StackGuardSlot", InsPt);
+ LoadInst *LI = new LoadInst(StackGuardVar, "StackGuard", false, InsPt);
+
+ Value *Args[] = { LI, AI };
+ CallInst::
+ Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector),
+ &Args[0], array_endof(Args), "", InsPt);
+
+ // Create the basic block to jump to when the guard check fails.
+ FailBB = CreateFailBB();
+ }
+
+ // For each block with a return instruction, convert this:
+ //
+ // return:
+ // ...
+ // ret ...
+ //
+ // into this:
+ //
+ // return:
+ // ...
+ // %1 = load __stack_chk_guard
+ // %2 = load StackGuardSlot
+ // %3 = cmp i1 %1, %2
+ // br i1 %3, label %SP_return, label %CallStackCheckFailBlk
+ //
+ // SP_return:
+ // ret ...
+ //
+ // CallStackCheckFailBlk:
+ // call void @__stack_chk_fail()
+ // unreachable
+
+ // Split the basic block before the return instruction.
+ BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return");
+
+ // Remove default branch instruction to the new BB.
+ BB->getTerminator()->eraseFromParent();
+
+ // Move the newly created basic block to the point right after the old basic
+ // block so that it's in the "fall through" position.
+ NewBB->moveAfter(BB);
+
+ // Generate the stack protector instructions in the old basic block.
+ LoadInst *LI1 = new LoadInst(StackGuardVar, "", false, BB);
+ LoadInst *LI2 = new LoadInst(AI, "", true, BB);
+ ICmpInst *Cmp = new ICmpInst(CmpInst::ICMP_EQ, LI1, LI2, "", BB);
+ BranchInst::Create(NewBB, FailBB, Cmp, BB);
+ }
+
+ // Return if we didn't modify any basic blocks. I.e., there are no return
+ // statements in the function.
+ if (!FailBB) return false;
+
+ return true;
+}
+
+/// CreateFailBB - Create a basic block to jump to when the stack protector
+/// check fails.
+BasicBlock *StackProtector::CreateFailBB() {
+ BasicBlock *FailBB = BasicBlock::Create("CallStackCheckFailBlk", F);
+ Constant *StackChkFail =
+ M->getOrInsertFunction("__stack_chk_fail", Type::VoidTy, NULL);
+ CallInst::Create(StackChkFail, "", FailBB);
+ new UnreachableInst(FailBB);
+ return FailBB;
+}
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
new file mode 100644
index 0000000..5824644
--- /dev/null
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -0,0 +1,733 @@
+//===-- StackSlotColoring.cpp - Stack slot coloring pass. -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the stack slot coloring pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "stackcoloring"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include <vector>
+using namespace llvm;
+
+static cl::opt<bool>
+DisableSharing("no-stack-slot-sharing",
+ cl::init(false), cl::Hidden,
+ cl::desc("Suppress slot sharing during stack coloring"));
+
+static cl::opt<bool>
+ColorWithRegsOpt("color-ss-with-regs",
+ cl::init(false), cl::Hidden,
+ cl::desc("Color stack slots with free registers"));
+
+
+static cl::opt<int> DCELimit("ssc-dce-limit", cl::init(-1), cl::Hidden);
+
+STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring");
+STATISTIC(NumRegRepl, "Number of stack slot refs replaced with reg refs");
+STATISTIC(NumLoadElim, "Number of loads eliminated");
+STATISTIC(NumStoreElim, "Number of stores eliminated");
+STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated");
+
+namespace {
+ class VISIBILITY_HIDDEN StackSlotColoring : public MachineFunctionPass {
+ bool ColorWithRegs;
+ LiveStacks* LS;
+ VirtRegMap* VRM;
+ MachineFrameInfo *MFI;
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const MachineLoopInfo *loopInfo;
+
+ // SSIntervals - Spill slot intervals.
+ std::vector<LiveInterval*> SSIntervals;
+
+ // SSRefs - Keep a list of frame index references for each spill slot.
+ SmallVector<SmallVector<MachineInstr*, 8>, 16> SSRefs;
+
+ // OrigAlignments - Alignments of stack objects before coloring.
+ SmallVector<unsigned, 16> OrigAlignments;
+
+ // OrigSizes - Sizess of stack objects before coloring.
+ SmallVector<unsigned, 16> OrigSizes;
+
+ // AllColors - If index is set, it's a spill slot, i.e. color.
+ // FIXME: This assumes PEI locate spill slot with smaller indices
+ // closest to stack pointer / frame pointer. Therefore, smaller
+ // index == better color.
+ BitVector AllColors;
+
+ // NextColor - Next "color" that's not yet used.
+ int NextColor;
+
+ // UsedColors - "Colors" that have been assigned.
+ BitVector UsedColors;
+
+ // Assignments - Color to intervals mapping.
+ SmallVector<SmallVector<LiveInterval*,4>, 16> Assignments;
+
+ public:
+ static char ID; // Pass identification
+ StackSlotColoring() :
+ MachineFunctionPass(&ID), ColorWithRegs(false), NextColor(-1) {}
+ StackSlotColoring(bool RegColor) :
+ MachineFunctionPass(&ID), ColorWithRegs(RegColor), NextColor(-1) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LiveStacks>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char* getPassName() const {
+ return "Stack Slot Coloring";
+ }
+
+ private:
+ void InitializeSlots();
+ void ScanForSpillSlotRefs(MachineFunction &MF);
+ bool OverlapWithAssignments(LiveInterval *li, int Color) const;
+ int ColorSlot(LiveInterval *li);
+ bool ColorSlots(MachineFunction &MF);
+ bool ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
+ SmallVector<SmallVector<int, 4>, 16> &RevMap,
+ BitVector &SlotIsReg);
+ void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI,
+ MachineFunction &MF);
+ bool PropagateBackward(MachineBasicBlock::iterator MII,
+ MachineBasicBlock *MBB,
+ unsigned OldReg, unsigned NewReg);
+ bool PropagateForward(MachineBasicBlock::iterator MII,
+ MachineBasicBlock *MBB,
+ unsigned OldReg, unsigned NewReg);
+ void UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
+ unsigned Reg, const TargetRegisterClass *RC,
+ SmallSet<unsigned, 4> &Defs,
+ MachineFunction &MF);
+ bool AllMemRefsCanBeUnfolded(int SS);
+ bool RemoveDeadStores(MachineBasicBlock* MBB);
+ };
+} // end anonymous namespace
+
+char StackSlotColoring::ID = 0;
+
+static RegisterPass<StackSlotColoring>
+X("stack-slot-coloring", "Stack Slot Coloring");
+
+FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) {
+ return new StackSlotColoring(RegColor);
+}
+
+namespace {
+ // IntervalSorter - Comparison predicate that sort live intervals by
+ // their weight.
+ struct IntervalSorter {
+ bool operator()(LiveInterval* LHS, LiveInterval* RHS) const {
+ return LHS->weight > RHS->weight;
+ }
+ };
+}
+
+/// ScanForSpillSlotRefs - Scan all the machine instructions for spill slot
+/// references and update spill slot weights.
+void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
+ SSRefs.resize(MFI->getObjectIndexEnd());
+
+ // FIXME: Need the equivalent of MachineRegisterInfo for frameindex operands.
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = &*MBBI;
+ unsigned loopDepth = loopInfo->getLoopDepth(MBB);
+ for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end();
+ MII != EE; ++MII) {
+ MachineInstr *MI = &*MII;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isFI())
+ continue;
+ int FI = MO.getIndex();
+ if (FI < 0)
+ continue;
+ if (!LS->hasInterval(FI))
+ continue;
+ LiveInterval &li = LS->getInterval(FI);
+ li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth);
+ SSRefs[FI].push_back(MI);
+ }
+ }
+ }
+}
+
+/// InitializeSlots - Process all spill stack slot liveintervals and add them
+/// to a sorted (by weight) list.
+void StackSlotColoring::InitializeSlots() {
+ int LastFI = MFI->getObjectIndexEnd();
+ OrigAlignments.resize(LastFI);
+ OrigSizes.resize(LastFI);
+ AllColors.resize(LastFI);
+ UsedColors.resize(LastFI);
+ Assignments.resize(LastFI);
+
+ // Gather all spill slots into a list.
+ DOUT << "Spill slot intervals:\n";
+ for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) {
+ LiveInterval &li = i->second;
+ DEBUG(li.dump());
+ int FI = li.getStackSlotIndex();
+ if (MFI->isDeadObjectIndex(FI))
+ continue;
+ SSIntervals.push_back(&li);
+ OrigAlignments[FI] = MFI->getObjectAlignment(FI);
+ OrigSizes[FI] = MFI->getObjectSize(FI);
+ AllColors.set(FI);
+ }
+ DOUT << '\n';
+
+ // Sort them by weight.
+ std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+
+ // Get first "color".
+ NextColor = AllColors.find_first();
+}
+
+/// OverlapWithAssignments - Return true if LiveInterval overlaps with any
+/// LiveIntervals that have already been assigned to the specified color.
+bool
+StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const {
+ const SmallVector<LiveInterval*,4> &OtherLIs = Assignments[Color];
+ for (unsigned i = 0, e = OtherLIs.size(); i != e; ++i) {
+ LiveInterval *OtherLI = OtherLIs[i];
+ if (OtherLI->overlaps(*li))
+ return true;
+ }
+ return false;
+}
+
+/// ColorSlotsWithFreeRegs - If there are any free registers available, try
+/// replacing spill slots references with registers instead.
+bool
+StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
+ SmallVector<SmallVector<int, 4>, 16> &RevMap,
+ BitVector &SlotIsReg) {
+ if (!(ColorWithRegs || ColorWithRegsOpt) || !VRM->HasUnusedRegisters())
+ return false;
+
+ bool Changed = false;
+ DOUT << "Assigning unused registers to spill slots:\n";
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+ LiveInterval *li = SSIntervals[i];
+ int SS = li->getStackSlotIndex();
+ if (!UsedColors[SS] || li->weight < 20)
+ // If the weight is < 20, i.e. two references in a loop with depth 1,
+ // don't bother with it.
+ continue;
+
+ // These slots allow to share the same registers.
+ bool AllColored = true;
+ SmallVector<unsigned, 4> ColoredRegs;
+ for (unsigned j = 0, ee = RevMap[SS].size(); j != ee; ++j) {
+ int RSS = RevMap[SS][j];
+ const TargetRegisterClass *RC = LS->getIntervalRegClass(RSS);
+ // If it's not colored to another stack slot, try coloring it
+ // to a "free" register.
+ if (!RC) {
+ AllColored = false;
+ continue;
+ }
+ unsigned Reg = VRM->getFirstUnusedRegister(RC);
+ if (!Reg) {
+ AllColored = false;
+ continue;
+ }
+ if (!AllMemRefsCanBeUnfolded(RSS)) {
+ AllColored = false;
+ continue;
+ } else {
+ DOUT << "Assigning fi#" << RSS << " to " << TRI->getName(Reg) << '\n';
+ ColoredRegs.push_back(Reg);
+ SlotMapping[RSS] = Reg;
+ SlotIsReg.set(RSS);
+ Changed = true;
+ }
+ }
+
+ // Register and its sub-registers are no longer free.
+ while (!ColoredRegs.empty()) {
+ unsigned Reg = ColoredRegs.back();
+ ColoredRegs.pop_back();
+ VRM->setRegisterUsed(Reg);
+ // If reg is a callee-saved register, it will have to be spilled in
+ // the prologue.
+ MRI->setPhysRegUsed(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ VRM->setRegisterUsed(*AS);
+ MRI->setPhysRegUsed(*AS);
+ }
+ }
+ // This spill slot is dead after the rewrites
+ if (AllColored) {
+ MFI->RemoveStackObject(SS);
+ ++NumEliminated;
+ }
+ }
+ DOUT << '\n';
+
+ return Changed;
+}
+
+/// ColorSlot - Assign a "color" (stack slot) to the specified stack slot.
+///
+int StackSlotColoring::ColorSlot(LiveInterval *li) {
+ int Color = -1;
+ bool Share = false;
+ if (!DisableSharing) {
+ // Check if it's possible to reuse any of the used colors.
+ Color = UsedColors.find_first();
+ while (Color != -1) {
+ if (!OverlapWithAssignments(li, Color)) {
+ Share = true;
+ ++NumEliminated;
+ break;
+ }
+ Color = UsedColors.find_next(Color);
+ }
+ }
+
+ // Assign it to the first available color (assumed to be the best) if it's
+ // not possible to share a used color with other objects.
+ if (!Share) {
+ assert(NextColor != -1 && "No more spill slots?");
+ Color = NextColor;
+ UsedColors.set(Color);
+ NextColor = AllColors.find_next(NextColor);
+ }
+
+ // Record the assignment.
+ Assignments[Color].push_back(li);
+ int FI = li->getStackSlotIndex();
+ DOUT << "Assigning fi#" << FI << " to fi#" << Color << "\n";
+
+ // Change size and alignment of the allocated slot. If there are multiple
+ // objects sharing the same slot, then make sure the size and alignment
+ // are large enough for all.
+ unsigned Align = OrigAlignments[FI];
+ if (!Share || Align > MFI->getObjectAlignment(Color))
+ MFI->setObjectAlignment(Color, Align);
+ int64_t Size = OrigSizes[FI];
+ if (!Share || Size > MFI->getObjectSize(Color))
+ MFI->setObjectSize(Color, Size);
+ return Color;
+}
+
+/// Colorslots - Color all spill stack slots and rewrite all frameindex machine
+/// operands in the function.
+bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
+ unsigned NumObjs = MFI->getObjectIndexEnd();
+ SmallVector<int, 16> SlotMapping(NumObjs, -1);
+ SmallVector<float, 16> SlotWeights(NumObjs, 0.0);
+ SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs);
+ BitVector SlotIsReg(NumObjs);
+ BitVector UsedColors(NumObjs);
+
+ DOUT << "Color spill slot intervals:\n";
+ bool Changed = false;
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+ LiveInterval *li = SSIntervals[i];
+ int SS = li->getStackSlotIndex();
+ int NewSS = ColorSlot(li);
+ assert(NewSS >= 0 && "Stack coloring failed?");
+ SlotMapping[SS] = NewSS;
+ RevMap[NewSS].push_back(SS);
+ SlotWeights[NewSS] += li->weight;
+ UsedColors.set(NewSS);
+ Changed |= (SS != NewSS);
+ }
+
+ DOUT << "\nSpill slots after coloring:\n";
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+ LiveInterval *li = SSIntervals[i];
+ int SS = li->getStackSlotIndex();
+ li->weight = SlotWeights[SS];
+ }
+ // Sort them by new weight.
+ std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i)
+ DEBUG(SSIntervals[i]->dump());
+ DOUT << '\n';
+#endif
+
+ // Can we "color" a stack slot with a unused register?
+ Changed |= ColorSlotsWithFreeRegs(SlotMapping, RevMap, SlotIsReg);
+
+ if (!Changed)
+ return false;
+
+ // Rewrite all MO_FrameIndex operands.
+ SmallVector<SmallSet<unsigned, 4>, 4> NewDefs(MF.getNumBlockIDs());
+ for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) {
+ bool isReg = SlotIsReg[SS];
+ int NewFI = SlotMapping[SS];
+ if (NewFI == -1 || (NewFI == (int)SS && !isReg))
+ continue;
+
+ const TargetRegisterClass *RC = LS->getIntervalRegClass(SS);
+ SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS];
+ for (unsigned i = 0, e = RefMIs.size(); i != e; ++i)
+ if (!isReg)
+ RewriteInstruction(RefMIs[i], SS, NewFI, MF);
+ else {
+ // Rewrite to use a register instead.
+ unsigned MBBId = RefMIs[i]->getParent()->getNumber();
+ SmallSet<unsigned, 4> &Defs = NewDefs[MBBId];
+ UnfoldAndRewriteInstruction(RefMIs[i], SS, NewFI, RC, Defs, MF);
+ }
+ }
+
+ // Delete unused stack slots.
+ while (NextColor != -1) {
+ DOUT << "Removing unused stack object fi#" << NextColor << "\n";
+ MFI->RemoveStackObject(NextColor);
+ NextColor = AllColors.find_next(NextColor);
+ }
+
+ return true;
+}
+
+/// AllMemRefsCanBeUnfolded - Return true if all references of the specified
+/// spill slot index can be unfolded.
+bool StackSlotColoring::AllMemRefsCanBeUnfolded(int SS) {
+ SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS];
+ for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) {
+ MachineInstr *MI = RefMIs[i];
+ if (TII->isLoadFromStackSlot(MI, SS) ||
+ TII->isStoreToStackSlot(MI, SS))
+ // Restore and spill will become copies.
+ return true;
+ if (!TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), false, false))
+ return false;
+ for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+ MachineOperand &MO = MI->getOperand(j);
+ if (MO.isFI() && MO.getIndex() != SS)
+ // If it uses another frameindex, we can, currently* unfold it.
+ return false;
+ }
+ }
+ return true;
+}
+
+/// RewriteInstruction - Rewrite specified instruction by replacing references
+/// to old frame index with new one.
+void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI,
+ int NewFI, MachineFunction &MF) {
+ for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isFI())
+ continue;
+ int FI = MO.getIndex();
+ if (FI != OldFI)
+ continue;
+ MO.setIndex(NewFI);
+ }
+
+ // Update the MachineMemOperand for the new memory location.
+ // FIXME: We need a better method of managing these too.
+ SmallVector<MachineMemOperand, 2> MMOs(MI->memoperands_begin(),
+ MI->memoperands_end());
+ MI->clearMemOperands(MF);
+ const Value *OldSV = PseudoSourceValue::getFixedStack(OldFI);
+ for (unsigned i = 0, ee = MMOs.size(); i != ee; ++i) {
+ if (MMOs[i].getValue() != OldSV)
+ MI->addMemOperand(MF, MMOs[i]);
+ else {
+ MachineMemOperand MMO(PseudoSourceValue::getFixedStack(NewFI),
+ MMOs[i].getFlags(), MMOs[i].getOffset(),
+ MMOs[i].getSize(), MMOs[i].getAlignment());
+ MI->addMemOperand(MF, MMO);
+ }
+ }
+}
+
+/// PropagateBackward - Traverse backward and look for the definition of
+/// OldReg. If it can successfully update all of the references with NewReg,
+/// do so and return true.
+bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII,
+ MachineBasicBlock *MBB,
+ unsigned OldReg, unsigned NewReg) {
+ if (MII == MBB->begin())
+ return false;
+
+ SmallVector<MachineOperand*, 4> Uses;
+ SmallVector<MachineOperand*, 4> Refs;
+ while (--MII != MBB->begin()) {
+ bool FoundDef = false; // Not counting 2address def.
+
+ Uses.clear();
+ const TargetInstrDesc &TID = MII->getDesc();
+ for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MII->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+ if (Reg == OldReg) {
+ if (MO.isImplicit())
+ return false;
+ const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, TID, i);
+ if (RC && !RC->contains(NewReg))
+ return false;
+
+ if (MO.isUse()) {
+ Uses.push_back(&MO);
+ } else {
+ Refs.push_back(&MO);
+ if (!MII->isRegTiedToUseOperand(i))
+ FoundDef = true;
+ }
+ } else if (TRI->regsOverlap(Reg, NewReg)) {
+ return false;
+ } else if (TRI->regsOverlap(Reg, OldReg)) {
+ if (!MO.isUse() || !MO.isKill())
+ return false;
+ }
+ }
+
+ if (FoundDef) {
+ // Found non-two-address def. Stop here.
+ for (unsigned i = 0, e = Refs.size(); i != e; ++i)
+ Refs[i]->setReg(NewReg);
+ return true;
+ }
+
+ // Two-address uses must be updated as well.
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+ Refs.push_back(Uses[i]);
+ }
+ return false;
+}
+
+/// PropagateForward - Traverse forward and look for the kill of OldReg. If
+/// it can successfully update all of the uses with NewReg, do so and
+/// return true.
+bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII,
+ MachineBasicBlock *MBB,
+ unsigned OldReg, unsigned NewReg) {
+ if (MII == MBB->end())
+ return false;
+
+ SmallVector<MachineOperand*, 4> Uses;
+ while (++MII != MBB->end()) {
+ bool FoundUse = false;
+ bool FoundKill = false;
+ const TargetInstrDesc &TID = MII->getDesc();
+ for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MII->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+ if (Reg == OldReg) {
+ if (MO.isDef() || MO.isImplicit())
+ return false;
+
+ const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, TID, i);
+ if (RC && !RC->contains(NewReg))
+ return false;
+ FoundUse = true;
+ if (MO.isKill())
+ FoundKill = true;
+ Uses.push_back(&MO);
+ } else if (TRI->regsOverlap(Reg, NewReg) ||
+ TRI->regsOverlap(Reg, OldReg))
+ return false;
+ }
+ if (FoundKill) {
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+ Uses[i]->setReg(NewReg);
+ return true;
+ }
+ }
+ return false;
+}
+
+/// UnfoldAndRewriteInstruction - Rewrite specified instruction by unfolding
+/// folded memory references and replacing those references with register
+/// references instead.
+void
+StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
+ unsigned Reg,
+ const TargetRegisterClass *RC,
+ SmallSet<unsigned, 4> &Defs,
+ MachineFunction &MF) {
+ MachineBasicBlock *MBB = MI->getParent();
+ if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) {
+ if (PropagateForward(MI, MBB, DstReg, Reg)) {
+ DOUT << "Eliminated load: ";
+ DEBUG(MI->dump());
+ ++NumLoadElim;
+ } else {
+ TII->copyRegToReg(*MBB, MI, DstReg, Reg, RC, RC);
+ ++NumRegRepl;
+ }
+
+ if (!Defs.count(Reg)) {
+ // If this is the first use of Reg in this MBB and it wasn't previously
+ // defined in MBB, add it to livein.
+ MBB->addLiveIn(Reg);
+ Defs.insert(Reg);
+ }
+ } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) {
+ if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) {
+ DOUT << "Eliminated store: ";
+ DEBUG(MI->dump());
+ ++NumStoreElim;
+ } else {
+ TII->copyRegToReg(*MBB, MI, Reg, SrcReg, RC, RC);
+ ++NumRegRepl;
+ }
+
+ // Remember reg has been defined in MBB.
+ Defs.insert(Reg);
+ } else {
+ SmallVector<MachineInstr*, 4> NewMIs;
+ bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, false, false, NewMIs);
+ Success = Success; // Silence compiler warning.
+ assert(Success && "Failed to unfold!");
+ MachineInstr *NewMI = NewMIs[0];
+ MBB->insert(MI, NewMI);
+ ++NumRegRepl;
+
+ if (NewMI->readsRegister(Reg)) {
+ if (!Defs.count(Reg))
+ // If this is the first use of Reg in this MBB and it wasn't previously
+ // defined in MBB, add it to livein.
+ MBB->addLiveIn(Reg);
+ Defs.insert(Reg);
+ }
+ }
+ MBB->erase(MI);
+}
+
+/// RemoveDeadStores - Scan through a basic block and look for loads followed
+/// by stores. If they're both using the same stack slot, then the store is
+/// definitely dead. This could obviously be much more aggressive (consider
+/// pairs with instructions between them), but such extensions might have a
+/// considerable compile time impact.
+bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
+ // FIXME: This could be much more aggressive, but we need to investigate
+ // the compile time impact of doing so.
+ bool changed = false;
+
+ SmallVector<MachineInstr*, 4> toErase;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if (DCELimit != -1 && (int)NumDead >= DCELimit)
+ break;
+
+ MachineBasicBlock::iterator NextMI = next(I);
+ if (NextMI == MBB->end()) continue;
+
+ int FirstSS, SecondSS;
+ unsigned LoadReg = 0;
+ unsigned StoreReg = 0;
+ if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue;
+ if (!(StoreReg = TII->isStoreToStackSlot(NextMI, SecondSS))) continue;
+ if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue;
+
+ ++NumDead;
+ changed = true;
+
+ if (NextMI->findRegisterUseOperandIdx(LoadReg, true, 0) != -1) {
+ ++NumDead;
+ toErase.push_back(I);
+ }
+
+ toErase.push_back(NextMI);
+ ++I;
+ }
+
+ for (SmallVector<MachineInstr*, 4>::iterator I = toErase.begin(),
+ E = toErase.end(); I != E; ++I)
+ (*I)->eraseFromParent();
+
+ return changed;
+}
+
+
+bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
+ DOUT << "********** Stack Slot Coloring **********\n";
+
+ MFI = MF.getFrameInfo();
+ MRI = &MF.getRegInfo();
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ LS = &getAnalysis<LiveStacks>();
+ VRM = &getAnalysis<VirtRegMap>();
+ loopInfo = &getAnalysis<MachineLoopInfo>();
+
+ bool Changed = false;
+
+ unsigned NumSlots = LS->getNumIntervals();
+ if (NumSlots < 2) {
+ if (NumSlots == 0 || !VRM->HasUnusedRegisters())
+ // Nothing to do!
+ return false;
+ }
+
+ // Gather spill slot references
+ ScanForSpillSlotRefs(MF);
+ InitializeSlots();
+ Changed = ColorSlots(MF);
+
+ NextColor = -1;
+ SSIntervals.clear();
+ for (unsigned i = 0, e = SSRefs.size(); i != e; ++i)
+ SSRefs[i].clear();
+ SSRefs.clear();
+ OrigAlignments.clear();
+ OrigSizes.clear();
+ AllColors.clear();
+ UsedColors.clear();
+ for (unsigned i = 0, e = Assignments.size(); i != e; ++i)
+ Assignments[i].clear();
+ Assignments.clear();
+
+ if (Changed) {
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ Changed |= RemoveDeadStores(I);
+ }
+
+ return Changed;
+}
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
new file mode 100644
index 0000000..a2c1255
--- /dev/null
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -0,0 +1,1053 @@
+//===- StrongPhiElimination.cpp - Eliminate PHI nodes by inserting copies -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates machine instruction PHI nodes by inserting copy
+// instructions, using an intelligent copy-folding technique based on
+// dominator information. This is technique is derived from:
+//
+// Budimlic, et al. Fast copy coalescing and live-range identification.
+// In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language
+// Design and Implementation (Berlin, Germany, June 17 - 19, 2002).
+// PLDI '02. ACM, New York, NY, 25-32.
+// DOI= http://doi.acm.org/10.1145/512529.512534
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "strongphielim"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+namespace {
+ struct VISIBILITY_HIDDEN StrongPHIElimination : public MachineFunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ StrongPHIElimination() : MachineFunctionPass(&ID) {}
+
+ // Waiting stores, for each MBB, the set of copies that need to
+ // be inserted into that MBB
+ DenseMap<MachineBasicBlock*,
+ std::multimap<unsigned, unsigned> > Waiting;
+
+ // Stacks holds the renaming stack for each register
+ std::map<unsigned, std::vector<unsigned> > Stacks;
+
+ // Registers in UsedByAnother are PHI nodes that are themselves
+ // used as operands to another another PHI node
+ std::set<unsigned> UsedByAnother;
+
+ // RenameSets are the is a map from a PHI-defined register
+ // to the input registers to be coalesced along with the
+ // predecessor block for those input registers.
+ std::map<unsigned, std::map<unsigned, MachineBasicBlock*> > RenameSets;
+
+ // PhiValueNumber holds the ID numbers of the VNs for each phi that we're
+ // eliminating, indexed by the register defined by that phi.
+ std::map<unsigned, unsigned> PhiValueNumber;
+
+ // Store the DFS-in number of each block
+ DenseMap<MachineBasicBlock*, unsigned> preorder;
+
+ // Store the DFS-out number of each block
+ DenseMap<MachineBasicBlock*, unsigned> maxpreorder;
+
+ bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<LiveIntervals>();
+
+ // TODO: Actually make this true.
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<RegisterCoalescer>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual void releaseMemory() {
+ preorder.clear();
+ maxpreorder.clear();
+
+ Waiting.clear();
+ Stacks.clear();
+ UsedByAnother.clear();
+ RenameSets.clear();
+ }
+
+ private:
+
+ /// DomForestNode - Represents a node in the "dominator forest". This is
+ /// a forest in which the nodes represent registers and the edges
+ /// represent a dominance relation in the block defining those registers.
+ struct DomForestNode {
+ private:
+ // Store references to our children
+ std::vector<DomForestNode*> children;
+ // The register we represent
+ unsigned reg;
+
+ // Add another node as our child
+ void addChild(DomForestNode* DFN) { children.push_back(DFN); }
+
+ public:
+ typedef std::vector<DomForestNode*>::iterator iterator;
+
+ // Create a DomForestNode by providing the register it represents, and
+ // the node to be its parent. The virtual root node has register 0
+ // and a null parent.
+ DomForestNode(unsigned r, DomForestNode* parent) : reg(r) {
+ if (parent)
+ parent->addChild(this);
+ }
+
+ ~DomForestNode() {
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+ }
+
+ /// getReg - Return the regiser that this node represents
+ inline unsigned getReg() { return reg; }
+
+ // Provide iterator access to our children
+ inline DomForestNode::iterator begin() { return children.begin(); }
+ inline DomForestNode::iterator end() { return children.end(); }
+ };
+
+ void computeDFS(MachineFunction& MF);
+ void processBlock(MachineBasicBlock* MBB);
+
+ std::vector<DomForestNode*> computeDomForest(
+ std::map<unsigned, MachineBasicBlock*>& instrs,
+ MachineRegisterInfo& MRI);
+ void processPHIUnion(MachineInstr* Inst,
+ std::map<unsigned, MachineBasicBlock*>& PHIUnion,
+ std::vector<StrongPHIElimination::DomForestNode*>& DF,
+ std::vector<std::pair<unsigned, unsigned> >& locals);
+ void ScheduleCopies(MachineBasicBlock* MBB, std::set<unsigned>& pushed);
+ void InsertCopies(MachineDomTreeNode* MBB,
+ SmallPtrSet<MachineBasicBlock*, 16>& v);
+ bool mergeLiveIntervals(unsigned primary, unsigned secondary);
+ };
+}
+
+char StrongPHIElimination::ID = 0;
+static RegisterPass<StrongPHIElimination>
+X("strong-phi-node-elimination",
+ "Eliminate PHI nodes for register allocation, intelligently");
+
+const PassInfo *const llvm::StrongPHIEliminationID = &X;
+
+/// computeDFS - Computes the DFS-in and DFS-out numbers of the dominator tree
+/// of the given MachineFunction. These numbers are then used in other parts
+/// of the PHI elimination process.
+void StrongPHIElimination::computeDFS(MachineFunction& MF) {
+ SmallPtrSet<MachineDomTreeNode*, 8> frontier;
+ SmallPtrSet<MachineDomTreeNode*, 8> visited;
+
+ unsigned time = 0;
+
+ MachineDominatorTree& DT = getAnalysis<MachineDominatorTree>();
+
+ MachineDomTreeNode* node = DT.getRootNode();
+
+ std::vector<MachineDomTreeNode*> worklist;
+ worklist.push_back(node);
+
+ while (!worklist.empty()) {
+ MachineDomTreeNode* currNode = worklist.back();
+
+ if (!frontier.count(currNode)) {
+ frontier.insert(currNode);
+ ++time;
+ preorder.insert(std::make_pair(currNode->getBlock(), time));
+ }
+
+ bool inserted = false;
+ for (MachineDomTreeNode::iterator I = currNode->begin(), E = currNode->end();
+ I != E; ++I)
+ if (!frontier.count(*I) && !visited.count(*I)) {
+ worklist.push_back(*I);
+ inserted = true;
+ break;
+ }
+
+ if (!inserted) {
+ frontier.erase(currNode);
+ visited.insert(currNode);
+ maxpreorder.insert(std::make_pair(currNode->getBlock(), time));
+
+ worklist.pop_back();
+ }
+ }
+}
+
+namespace {
+
+/// PreorderSorter - a helper class that is used to sort registers
+/// according to the preorder number of their defining blocks
+class PreorderSorter {
+private:
+ DenseMap<MachineBasicBlock*, unsigned>& preorder;
+ MachineRegisterInfo& MRI;
+
+public:
+ PreorderSorter(DenseMap<MachineBasicBlock*, unsigned>& p,
+ MachineRegisterInfo& M) : preorder(p), MRI(M) { }
+
+ bool operator()(unsigned A, unsigned B) {
+ if (A == B)
+ return false;
+
+ MachineBasicBlock* ABlock = MRI.getVRegDef(A)->getParent();
+ MachineBasicBlock* BBlock = MRI.getVRegDef(B)->getParent();
+
+ if (preorder[ABlock] < preorder[BBlock])
+ return true;
+ else if (preorder[ABlock] > preorder[BBlock])
+ return false;
+
+ return false;
+ }
+};
+
+}
+
+/// computeDomForest - compute the subforest of the DomTree corresponding
+/// to the defining blocks of the registers in question
+std::vector<StrongPHIElimination::DomForestNode*>
+StrongPHIElimination::computeDomForest(
+ std::map<unsigned, MachineBasicBlock*>& regs,
+ MachineRegisterInfo& MRI) {
+ // Begin by creating a virtual root node, since the actual results
+ // may well be a forest. Assume this node has maximum DFS-out number.
+ DomForestNode* VirtualRoot = new DomForestNode(0, 0);
+ maxpreorder.insert(std::make_pair((MachineBasicBlock*)0, ~0UL));
+
+ // Populate a worklist with the registers
+ std::vector<unsigned> worklist;
+ worklist.reserve(regs.size());
+ for (std::map<unsigned, MachineBasicBlock*>::iterator I = regs.begin(),
+ E = regs.end(); I != E; ++I)
+ worklist.push_back(I->first);
+
+ // Sort the registers by the DFS-in number of their defining block
+ PreorderSorter PS(preorder, MRI);
+ std::sort(worklist.begin(), worklist.end(), PS);
+
+ // Create a "current parent" stack, and put the virtual root on top of it
+ DomForestNode* CurrentParent = VirtualRoot;
+ std::vector<DomForestNode*> stack;
+ stack.push_back(VirtualRoot);
+
+ // Iterate over all the registers in the previously computed order
+ for (std::vector<unsigned>::iterator I = worklist.begin(), E = worklist.end();
+ I != E; ++I) {
+ unsigned pre = preorder[MRI.getVRegDef(*I)->getParent()];
+ MachineBasicBlock* parentBlock = CurrentParent->getReg() ?
+ MRI.getVRegDef(CurrentParent->getReg())->getParent() :
+ 0;
+
+ // If the DFS-in number of the register is greater than the DFS-out number
+ // of the current parent, repeatedly pop the parent stack until it isn't.
+ while (pre > maxpreorder[parentBlock]) {
+ stack.pop_back();
+ CurrentParent = stack.back();
+
+ parentBlock = CurrentParent->getReg() ?
+ MRI.getVRegDef(CurrentParent->getReg())->getParent() :
+ 0;
+ }
+
+ // Now that we've found the appropriate parent, create a DomForestNode for
+ // this register and attach it to the forest
+ DomForestNode* child = new DomForestNode(*I, CurrentParent);
+
+ // Push this new node on the "current parent" stack
+ stack.push_back(child);
+ CurrentParent = child;
+ }
+
+ // Return a vector containing the children of the virtual root node
+ std::vector<DomForestNode*> ret;
+ ret.insert(ret.end(), VirtualRoot->begin(), VirtualRoot->end());
+ return ret;
+}
+
+/// isLiveIn - helper method that determines, from a regno, if a register
+/// is live into a block
+static bool isLiveIn(unsigned r, MachineBasicBlock* MBB,
+ LiveIntervals& LI) {
+ LiveInterval& I = LI.getOrCreateInterval(r);
+ unsigned idx = LI.getMBBStartIdx(MBB);
+ return I.liveAt(idx);
+}
+
+/// isLiveOut - help method that determines, from a regno, if a register is
+/// live out of a block.
+static bool isLiveOut(unsigned r, MachineBasicBlock* MBB,
+ LiveIntervals& LI) {
+ for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(),
+ E = MBB->succ_end(); PI != E; ++PI)
+ if (isLiveIn(r, *PI, LI))
+ return true;
+
+ return false;
+}
+
+/// interferes - checks for local interferences by scanning a block. The only
+/// trick parameter is 'mode' which tells it the relationship of the two
+/// registers. 0 - defined in the same block, 1 - first properly dominates
+/// second, 2 - second properly dominates first
+static bool interferes(unsigned a, unsigned b, MachineBasicBlock* scan,
+ LiveIntervals& LV, unsigned mode) {
+ MachineInstr* def = 0;
+ MachineInstr* kill = 0;
+
+ // The code is still in SSA form at this point, so there is only one
+ // definition per VReg. Thus we can safely use MRI->getVRegDef().
+ const MachineRegisterInfo* MRI = &scan->getParent()->getRegInfo();
+
+ bool interference = false;
+
+ // Wallk the block, checking for interferences
+ for (MachineBasicBlock::iterator MBI = scan->begin(), MBE = scan->end();
+ MBI != MBE; ++MBI) {
+ MachineInstr* curr = MBI;
+
+ // Same defining block...
+ if (mode == 0) {
+ if (curr == MRI->getVRegDef(a)) {
+ // If we find our first definition, save it
+ if (!def) {
+ def = curr;
+ // If there's already an unkilled definition, then
+ // this is an interference
+ } else if (!kill) {
+ interference = true;
+ break;
+ // If there's a definition followed by a KillInst, then
+ // they can't interfere
+ } else {
+ interference = false;
+ break;
+ }
+ // Symmetric with the above
+ } else if (curr == MRI->getVRegDef(b)) {
+ if (!def) {
+ def = curr;
+ } else if (!kill) {
+ interference = true;
+ break;
+ } else {
+ interference = false;
+ break;
+ }
+ // Store KillInsts if they match up with the definition
+ } else if (curr->killsRegister(a)) {
+ if (def == MRI->getVRegDef(a)) {
+ kill = curr;
+ } else if (curr->killsRegister(b)) {
+ if (def == MRI->getVRegDef(b)) {
+ kill = curr;
+ }
+ }
+ }
+ // First properly dominates second...
+ } else if (mode == 1) {
+ if (curr == MRI->getVRegDef(b)) {
+ // Definition of second without kill of first is an interference
+ if (!kill) {
+ interference = true;
+ break;
+ // Definition after a kill is a non-interference
+ } else {
+ interference = false;
+ break;
+ }
+ // Save KillInsts of First
+ } else if (curr->killsRegister(a)) {
+ kill = curr;
+ }
+ // Symmetric with the above
+ } else if (mode == 2) {
+ if (curr == MRI->getVRegDef(a)) {
+ if (!kill) {
+ interference = true;
+ break;
+ } else {
+ interference = false;
+ break;
+ }
+ } else if (curr->killsRegister(b)) {
+ kill = curr;
+ }
+ }
+ }
+
+ return interference;
+}
+
+/// processBlock - Determine how to break up PHIs in the current block. Each
+/// PHI is broken up by some combination of renaming its operands and inserting
+/// copies. This method is responsible for determining which operands receive
+/// which treatment.
+void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) {
+ LiveIntervals& LI = getAnalysis<LiveIntervals>();
+ MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo();
+
+ // Holds names that have been added to a set in any PHI within this block
+ // before the current one.
+ std::set<unsigned> ProcessedNames;
+
+ // Iterate over all the PHI nodes in this block
+ MachineBasicBlock::iterator P = MBB->begin();
+ while (P != MBB->end() && P->getOpcode() == TargetInstrInfo::PHI) {
+ unsigned DestReg = P->getOperand(0).getReg();
+
+ // Don't both doing PHI elimination for dead PHI's.
+ if (P->registerDefIsDead(DestReg)) {
+ ++P;
+ continue;
+ }
+
+ LiveInterval& PI = LI.getOrCreateInterval(DestReg);
+ unsigned pIdx = LI.getDefIndex(LI.getInstructionIndex(P));
+ VNInfo* PVN = PI.getLiveRangeContaining(pIdx)->valno;
+ PhiValueNumber.insert(std::make_pair(DestReg, PVN->id));
+
+ // PHIUnion is the set of incoming registers to the PHI node that
+ // are going to be renames rather than having copies inserted. This set
+ // is refinded over the course of this function. UnionedBlocks is the set
+ // of corresponding MBBs.
+ std::map<unsigned, MachineBasicBlock*> PHIUnion;
+ SmallPtrSet<MachineBasicBlock*, 8> UnionedBlocks;
+
+ // Iterate over the operands of the PHI node
+ for (int i = P->getNumOperands() - 1; i >= 2; i-=2) {
+ unsigned SrcReg = P->getOperand(i-1).getReg();
+
+ // Don't need to try to coalesce a register with itself.
+ if (SrcReg == DestReg) {
+ ProcessedNames.insert(SrcReg);
+ continue;
+ }
+
+ // We don't need to insert copies for implicit_defs.
+ MachineInstr* DefMI = MRI.getVRegDef(SrcReg);
+ if (DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
+ ProcessedNames.insert(SrcReg);
+
+ // Check for trivial interferences via liveness information, allowing us
+ // to avoid extra work later. Any registers that interfere cannot both
+ // be in the renaming set, so choose one and add copies for it instead.
+ // The conditions are:
+ // 1) if the operand is live into the PHI node's block OR
+ // 2) if the PHI node is live out of the operand's defining block OR
+ // 3) if the operand is itself a PHI node and the original PHI is
+ // live into the operand's defining block OR
+ // 4) if the operand is already being renamed for another PHI node
+ // in this block OR
+ // 5) if any two operands are defined in the same block, insert copies
+ // for one of them
+ if (isLiveIn(SrcReg, P->getParent(), LI) ||
+ isLiveOut(P->getOperand(0).getReg(),
+ MRI.getVRegDef(SrcReg)->getParent(), LI) ||
+ ( MRI.getVRegDef(SrcReg)->getOpcode() == TargetInstrInfo::PHI &&
+ isLiveIn(P->getOperand(0).getReg(),
+ MRI.getVRegDef(SrcReg)->getParent(), LI) ) ||
+ ProcessedNames.count(SrcReg) ||
+ UnionedBlocks.count(MRI.getVRegDef(SrcReg)->getParent())) {
+
+ // Add a copy for the selected register
+ MachineBasicBlock* From = P->getOperand(i).getMBB();
+ Waiting[From].insert(std::make_pair(SrcReg, DestReg));
+ UsedByAnother.insert(SrcReg);
+ } else {
+ // Otherwise, add it to the renaming set
+ PHIUnion.insert(std::make_pair(SrcReg,P->getOperand(i).getMBB()));
+ UnionedBlocks.insert(MRI.getVRegDef(SrcReg)->getParent());
+ }
+ }
+
+ // Compute the dominator forest for the renaming set. This is a forest
+ // where the nodes are the registers and the edges represent dominance
+ // relations between the defining blocks of the registers
+ std::vector<StrongPHIElimination::DomForestNode*> DF =
+ computeDomForest(PHIUnion, MRI);
+
+ // Walk DomForest to resolve interferences at an inter-block level. This
+ // will remove registers from the renaming set (and insert copies for them)
+ // if interferences are found.
+ std::vector<std::pair<unsigned, unsigned> > localInterferences;
+ processPHIUnion(P, PHIUnion, DF, localInterferences);
+
+ // If one of the inputs is defined in the same block as the current PHI
+ // then we need to check for a local interference between that input and
+ // the PHI.
+ for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(),
+ E = PHIUnion.end(); I != E; ++I)
+ if (MRI.getVRegDef(I->first)->getParent() == P->getParent())
+ localInterferences.push_back(std::make_pair(I->first,
+ P->getOperand(0).getReg()));
+
+ // The dominator forest walk may have returned some register pairs whose
+ // interference cannot be determined from dominator analysis. We now
+ // examine these pairs for local interferences.
+ for (std::vector<std::pair<unsigned, unsigned> >::iterator I =
+ localInterferences.begin(), E = localInterferences.end(); I != E; ++I) {
+ std::pair<unsigned, unsigned> p = *I;
+
+ MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
+
+ // Determine the block we need to scan and the relationship between
+ // the two registers
+ MachineBasicBlock* scan = 0;
+ unsigned mode = 0;
+ if (MRI.getVRegDef(p.first)->getParent() ==
+ MRI.getVRegDef(p.second)->getParent()) {
+ scan = MRI.getVRegDef(p.first)->getParent();
+ mode = 0; // Same block
+ } else if (MDT.dominates(MRI.getVRegDef(p.first)->getParent(),
+ MRI.getVRegDef(p.second)->getParent())) {
+ scan = MRI.getVRegDef(p.second)->getParent();
+ mode = 1; // First dominates second
+ } else {
+ scan = MRI.getVRegDef(p.first)->getParent();
+ mode = 2; // Second dominates first
+ }
+
+ // If there's an interference, we need to insert copies
+ if (interferes(p.first, p.second, scan, LI, mode)) {
+ // Insert copies for First
+ for (int i = P->getNumOperands() - 1; i >= 2; i-=2) {
+ if (P->getOperand(i-1).getReg() == p.first) {
+ unsigned SrcReg = p.first;
+ MachineBasicBlock* From = P->getOperand(i).getMBB();
+
+ Waiting[From].insert(std::make_pair(SrcReg,
+ P->getOperand(0).getReg()));
+ UsedByAnother.insert(SrcReg);
+
+ PHIUnion.erase(SrcReg);
+ }
+ }
+ }
+ }
+
+ // Add the renaming set for this PHI node to our overall renaming information
+ for (std::map<unsigned, MachineBasicBlock*>::iterator QI = PHIUnion.begin(),
+ QE = PHIUnion.end(); QI != QE; ++QI) {
+ DOUT << "Adding Renaming: " << QI->first << " -> "
+ << P->getOperand(0).getReg() << "\n";
+ }
+
+ RenameSets.insert(std::make_pair(P->getOperand(0).getReg(), PHIUnion));
+
+ // Remember which registers are already renamed, so that we don't try to
+ // rename them for another PHI node in this block
+ for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(),
+ E = PHIUnion.end(); I != E; ++I)
+ ProcessedNames.insert(I->first);
+
+ ++P;
+ }
+}
+
+/// processPHIUnion - Take a set of candidate registers to be coalesced when
+/// decomposing the PHI instruction. Use the DominanceForest to remove the ones
+/// that are known to interfere, and flag others that need to be checked for
+/// local interferences.
+void StrongPHIElimination::processPHIUnion(MachineInstr* Inst,
+ std::map<unsigned, MachineBasicBlock*>& PHIUnion,
+ std::vector<StrongPHIElimination::DomForestNode*>& DF,
+ std::vector<std::pair<unsigned, unsigned> >& locals) {
+
+ std::vector<DomForestNode*> worklist(DF.begin(), DF.end());
+ SmallPtrSet<DomForestNode*, 4> visited;
+
+ // Code is still in SSA form, so we can use MRI::getVRegDef()
+ MachineRegisterInfo& MRI = Inst->getParent()->getParent()->getRegInfo();
+
+ LiveIntervals& LI = getAnalysis<LiveIntervals>();
+ unsigned DestReg = Inst->getOperand(0).getReg();
+
+ // DF walk on the DomForest
+ while (!worklist.empty()) {
+ DomForestNode* DFNode = worklist.back();
+
+ visited.insert(DFNode);
+
+ bool inserted = false;
+ for (DomForestNode::iterator CI = DFNode->begin(), CE = DFNode->end();
+ CI != CE; ++CI) {
+ DomForestNode* child = *CI;
+
+ // If the current node is live-out of the defining block of one of its
+ // children, insert a copy for it. NOTE: The paper actually calls for
+ // a more elaborate heuristic for determining whether to insert copies
+ // for the child or the parent. In the interest of simplicity, we're
+ // just always choosing the parent.
+ if (isLiveOut(DFNode->getReg(),
+ MRI.getVRegDef(child->getReg())->getParent(), LI)) {
+ // Insert copies for parent
+ for (int i = Inst->getNumOperands() - 1; i >= 2; i-=2) {
+ if (Inst->getOperand(i-1).getReg() == DFNode->getReg()) {
+ unsigned SrcReg = DFNode->getReg();
+ MachineBasicBlock* From = Inst->getOperand(i).getMBB();
+
+ Waiting[From].insert(std::make_pair(SrcReg, DestReg));
+ UsedByAnother.insert(SrcReg);
+
+ PHIUnion.erase(SrcReg);
+ }
+ }
+
+ // If a node is live-in to the defining block of one of its children, but
+ // not live-out, then we need to scan that block for local interferences.
+ } else if (isLiveIn(DFNode->getReg(),
+ MRI.getVRegDef(child->getReg())->getParent(), LI) ||
+ MRI.getVRegDef(DFNode->getReg())->getParent() ==
+ MRI.getVRegDef(child->getReg())->getParent()) {
+ // Add (p, c) to possible local interferences
+ locals.push_back(std::make_pair(DFNode->getReg(), child->getReg()));
+ }
+
+ if (!visited.count(child)) {
+ worklist.push_back(child);
+ inserted = true;
+ }
+ }
+
+ if (!inserted) worklist.pop_back();
+ }
+}
+
+/// ScheduleCopies - Insert copies into predecessor blocks, scheduling
+/// them properly so as to avoid the 'lost copy' and the 'virtual swap'
+/// problems.
+///
+/// Based on "Practical Improvements to the Construction and Destruction
+/// of Static Single Assignment Form" by Briggs, et al.
+void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
+ std::set<unsigned>& pushed) {
+ // FIXME: This function needs to update LiveIntervals
+ std::multimap<unsigned, unsigned>& copy_set= Waiting[MBB];
+
+ std::multimap<unsigned, unsigned> worklist;
+ std::map<unsigned, unsigned> map;
+
+ // Setup worklist of initial copies
+ for (std::multimap<unsigned, unsigned>::iterator I = copy_set.begin(),
+ E = copy_set.end(); I != E; ) {
+ map.insert(std::make_pair(I->first, I->first));
+ map.insert(std::make_pair(I->second, I->second));
+
+ if (!UsedByAnother.count(I->second)) {
+ worklist.insert(*I);
+
+ // Avoid iterator invalidation
+ std::multimap<unsigned, unsigned>::iterator OI = I;
+ ++I;
+ copy_set.erase(OI);
+ } else {
+ ++I;
+ }
+ }
+
+ LiveIntervals& LI = getAnalysis<LiveIntervals>();
+ MachineFunction* MF = MBB->getParent();
+ MachineRegisterInfo& MRI = MF->getRegInfo();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+ SmallVector<std::pair<unsigned, MachineInstr*>, 4> InsertedPHIDests;
+
+ // Iterate over the worklist, inserting copies
+ while (!worklist.empty() || !copy_set.empty()) {
+ while (!worklist.empty()) {
+ std::multimap<unsigned, unsigned>::iterator WI = worklist.begin();
+ std::pair<unsigned, unsigned> curr = *WI;
+ worklist.erase(WI);
+
+ const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(curr.first);
+
+ if (isLiveOut(curr.second, MBB, LI)) {
+ // Create a temporary
+ unsigned t = MF->getRegInfo().createVirtualRegister(RC);
+
+ // Insert copy from curr.second to a temporary at
+ // the Phi defining curr.second
+ MachineBasicBlock::iterator PI = MRI.getVRegDef(curr.second);
+ TII->copyRegToReg(*PI->getParent(), PI, t,
+ curr.second, RC, RC);
+
+ DOUT << "Inserted copy from " << curr.second << " to " << t << "\n";
+
+ // Push temporary on Stacks
+ Stacks[curr.second].push_back(t);
+
+ // Insert curr.second in pushed
+ pushed.insert(curr.second);
+
+ // Create a live interval for this temporary
+ InsertedPHIDests.push_back(std::make_pair(t, --PI));
+ }
+
+ // Insert copy from map[curr.first] to curr.second
+ TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), curr.second,
+ map[curr.first], RC, RC);
+ map[curr.first] = curr.second;
+ DOUT << "Inserted copy from " << curr.first << " to "
+ << curr.second << "\n";
+
+ // Push this copy onto InsertedPHICopies so we can
+ // update LiveIntervals with it.
+ MachineBasicBlock::iterator MI = MBB->getFirstTerminator();
+ InsertedPHIDests.push_back(std::make_pair(curr.second, --MI));
+
+ // If curr.first is a destination in copy_set...
+ for (std::multimap<unsigned, unsigned>::iterator I = copy_set.begin(),
+ E = copy_set.end(); I != E; )
+ if (curr.first == I->second) {
+ std::pair<unsigned, unsigned> temp = *I;
+ worklist.insert(temp);
+
+ // Avoid iterator invalidation
+ std::multimap<unsigned, unsigned>::iterator OI = I;
+ ++I;
+ copy_set.erase(OI);
+
+ break;
+ } else {
+ ++I;
+ }
+ }
+
+ if (!copy_set.empty()) {
+ std::multimap<unsigned, unsigned>::iterator CI = copy_set.begin();
+ std::pair<unsigned, unsigned> curr = *CI;
+ worklist.insert(curr);
+ copy_set.erase(CI);
+
+ LiveInterval& I = LI.getInterval(curr.second);
+ MachineBasicBlock::iterator term = MBB->getFirstTerminator();
+ unsigned endIdx = 0;
+ if (term != MBB->end())
+ endIdx = LI.getInstructionIndex(term);
+ else
+ endIdx = LI.getMBBEndIdx(MBB);
+
+ if (I.liveAt(endIdx)) {
+ const TargetRegisterClass *RC =
+ MF->getRegInfo().getRegClass(curr.first);
+
+ // Insert a copy from dest to a new temporary t at the end of b
+ unsigned t = MF->getRegInfo().createVirtualRegister(RC);
+ TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), t,
+ curr.second, RC, RC);
+ map[curr.second] = t;
+
+ MachineBasicBlock::iterator TI = MBB->getFirstTerminator();
+ InsertedPHIDests.push_back(std::make_pair(t, --TI));
+ }
+ }
+ }
+
+ // Renumber the instructions so that we can perform the index computations
+ // needed to create new live intervals.
+ LI.computeNumbering();
+
+ // For copies that we inserted at the ends of predecessors, we construct
+ // live intervals. This is pretty easy, since we know that the destination
+ // register cannot have be in live at that point previously. We just have
+ // to make sure that, for registers that serve as inputs to more than one
+ // PHI, we don't create multiple overlapping live intervals.
+ std::set<unsigned> RegHandled;
+ for (SmallVector<std::pair<unsigned, MachineInstr*>, 4>::iterator I =
+ InsertedPHIDests.begin(), E = InsertedPHIDests.end(); I != E; ++I) {
+ if (RegHandled.insert(I->first).second) {
+ LiveInterval& Int = LI.getOrCreateInterval(I->first);
+ unsigned instrIdx = LI.getInstructionIndex(I->second);
+ if (Int.liveAt(LiveIntervals::getDefIndex(instrIdx)))
+ Int.removeRange(LiveIntervals::getDefIndex(instrIdx),
+ LI.getMBBEndIdx(I->second->getParent())+1,
+ true);
+
+ LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, I->second);
+ R.valno->copy = I->second;
+ R.valno->def =
+ LiveIntervals::getDefIndex(LI.getInstructionIndex(I->second));
+ }
+ }
+}
+
+/// InsertCopies - insert copies into MBB and all of its successors
+void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN,
+ SmallPtrSet<MachineBasicBlock*, 16>& visited) {
+ MachineBasicBlock* MBB = MDTN->getBlock();
+ visited.insert(MBB);
+
+ std::set<unsigned> pushed;
+
+ LiveIntervals& LI = getAnalysis<LiveIntervals>();
+ // Rewrite register uses from Stacks
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if (I->getOpcode() == TargetInstrInfo::PHI)
+ continue;
+
+ for (unsigned i = 0; i < I->getNumOperands(); ++i)
+ if (I->getOperand(i).isReg() &&
+ Stacks[I->getOperand(i).getReg()].size()) {
+ // Remove the live range for the old vreg.
+ LiveInterval& OldInt = LI.getInterval(I->getOperand(i).getReg());
+ LiveInterval::iterator OldLR = OldInt.FindLiveRangeContaining(
+ LiveIntervals::getUseIndex(LI.getInstructionIndex(I)));
+ if (OldLR != OldInt.end())
+ OldInt.removeRange(*OldLR, true);
+
+ // Change the register
+ I->getOperand(i).setReg(Stacks[I->getOperand(i).getReg()].back());
+
+ // Add a live range for the new vreg
+ LiveInterval& Int = LI.getInterval(I->getOperand(i).getReg());
+ VNInfo* FirstVN = *Int.vni_begin();
+ FirstVN->hasPHIKill = false;
+ if (I->getOperand(i).isKill())
+ FirstVN->kills.push_back(
+ LiveIntervals::getUseIndex(LI.getInstructionIndex(I)));
+
+ LiveRange LR (LI.getMBBStartIdx(I->getParent()),
+ LiveIntervals::getUseIndex(LI.getInstructionIndex(I))+1,
+ FirstVN);
+
+ Int.addRange(LR);
+ }
+ }
+
+ // Schedule the copies for this block
+ ScheduleCopies(MBB, pushed);
+
+ // Recur down the dominator tree.
+ for (MachineDomTreeNode::iterator I = MDTN->begin(),
+ E = MDTN->end(); I != E; ++I)
+ if (!visited.count((*I)->getBlock()))
+ InsertCopies(*I, visited);
+
+ // As we exit this block, pop the names we pushed while processing it
+ for (std::set<unsigned>::iterator I = pushed.begin(),
+ E = pushed.end(); I != E; ++I)
+ Stacks[*I].pop_back();
+}
+
+bool StrongPHIElimination::mergeLiveIntervals(unsigned primary,
+ unsigned secondary) {
+
+ LiveIntervals& LI = getAnalysis<LiveIntervals>();
+ LiveInterval& LHS = LI.getOrCreateInterval(primary);
+ LiveInterval& RHS = LI.getOrCreateInterval(secondary);
+
+ LI.computeNumbering();
+
+ DenseMap<VNInfo*, VNInfo*> VNMap;
+ for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
+ LiveRange R = *I;
+
+ unsigned Start = R.start;
+ unsigned End = R.end;
+ if (LHS.getLiveRangeContaining(Start))
+ return false;
+
+ if (LHS.getLiveRangeContaining(End))
+ return false;
+
+ LiveInterval::iterator RI = std::upper_bound(LHS.begin(), LHS.end(), R);
+ if (RI != LHS.end() && RI->start < End)
+ return false;
+ }
+
+ for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
+ LiveRange R = *I;
+ VNInfo* OldVN = R.valno;
+ VNInfo*& NewVN = VNMap[OldVN];
+ if (!NewVN) {
+ NewVN = LHS.getNextValue(OldVN->def,
+ OldVN->copy,
+ LI.getVNInfoAllocator());
+ NewVN->kills = OldVN->kills;
+ }
+
+ LiveRange LR (R.start, R.end, NewVN);
+ LHS.addRange(LR);
+ }
+
+ LI.removeInterval(RHS.reg);
+
+ return true;
+}
+
+bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
+ LiveIntervals& LI = getAnalysis<LiveIntervals>();
+
+ // Compute DFS numbers of each block
+ computeDFS(Fn);
+
+ // Determine which phi node operands need copies
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+ if (!I->empty() &&
+ I->begin()->getOpcode() == TargetInstrInfo::PHI)
+ processBlock(I);
+
+ // Break interferences where two different phis want to coalesce
+ // in the same register.
+ std::set<unsigned> seen;
+ typedef std::map<unsigned, std::map<unsigned, MachineBasicBlock*> >
+ RenameSetType;
+ for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end();
+ I != E; ++I) {
+ for (std::map<unsigned, MachineBasicBlock*>::iterator
+ OI = I->second.begin(), OE = I->second.end(); OI != OE; ) {
+ if (!seen.count(OI->first)) {
+ seen.insert(OI->first);
+ ++OI;
+ } else {
+ Waiting[OI->second].insert(std::make_pair(OI->first, I->first));
+ unsigned reg = OI->first;
+ ++OI;
+ I->second.erase(reg);
+ DOUT << "Removing Renaming: " << reg << " -> " << I->first << "\n";
+ }
+ }
+ }
+
+ // Insert copies
+ // FIXME: This process should probably preserve LiveIntervals
+ SmallPtrSet<MachineBasicBlock*, 16> visited;
+ MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
+ InsertCopies(MDT.getRootNode(), visited);
+
+ // Perform renaming
+ for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end();
+ I != E; ++I)
+ while (I->second.size()) {
+ std::map<unsigned, MachineBasicBlock*>::iterator SI = I->second.begin();
+
+ DOUT << "Renaming: " << SI->first << " -> " << I->first << "\n";
+
+ if (SI->first != I->first) {
+ if (mergeLiveIntervals(I->first, SI->first)) {
+ Fn.getRegInfo().replaceRegWith(SI->first, I->first);
+
+ if (RenameSets.count(SI->first)) {
+ I->second.insert(RenameSets[SI->first].begin(),
+ RenameSets[SI->first].end());
+ RenameSets.erase(SI->first);
+ }
+ } else {
+ // Insert a last-minute copy if a conflict was detected.
+ const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(I->first);
+ TII->copyRegToReg(*SI->second, SI->second->getFirstTerminator(),
+ I->first, SI->first, RC, RC);
+
+ LI.computeNumbering();
+
+ LiveInterval& Int = LI.getOrCreateInterval(I->first);
+ unsigned instrIdx =
+ LI.getInstructionIndex(--SI->second->getFirstTerminator());
+ if (Int.liveAt(LiveIntervals::getDefIndex(instrIdx)))
+ Int.removeRange(LiveIntervals::getDefIndex(instrIdx),
+ LI.getMBBEndIdx(SI->second)+1, true);
+
+ LiveRange R = LI.addLiveRangeToEndOfBlock(I->first,
+ --SI->second->getFirstTerminator());
+ R.valno->copy = --SI->second->getFirstTerminator();
+ R.valno->def = LiveIntervals::getDefIndex(instrIdx);
+
+ DOUT << "Renaming failed: " << SI->first << " -> "
+ << I->first << "\n";
+ }
+ }
+
+ LiveInterval& Int = LI.getOrCreateInterval(I->first);
+ const LiveRange* LR =
+ Int.getLiveRangeContaining(LI.getMBBEndIdx(SI->second));
+ LR->valno->hasPHIKill = true;
+
+ I->second.erase(SI->first);
+ }
+
+ // Remove PHIs
+ std::vector<MachineInstr*> phis;
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
+ for (MachineBasicBlock::iterator BI = I->begin(), BE = I->end();
+ BI != BE; ++BI)
+ if (BI->getOpcode() == TargetInstrInfo::PHI)
+ phis.push_back(BI);
+ }
+
+ for (std::vector<MachineInstr*>::iterator I = phis.begin(), E = phis.end();
+ I != E; ) {
+ MachineInstr* PInstr = *(I++);
+
+ // If this is a dead PHI node, then remove it from LiveIntervals.
+ unsigned DestReg = PInstr->getOperand(0).getReg();
+ LiveInterval& PI = LI.getInterval(DestReg);
+ if (PInstr->registerDefIsDead(DestReg)) {
+ if (PI.containsOneValue()) {
+ LI.removeInterval(DestReg);
+ } else {
+ unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr));
+ PI.removeRange(*PI.getLiveRangeContaining(idx), true);
+ }
+ } else {
+ // Trim live intervals of input registers. They are no longer live into
+ // this block if they died after the PHI. If they lived after it, don't
+ // trim them because they might have other legitimate uses.
+ for (unsigned i = 1; i < PInstr->getNumOperands(); i += 2) {
+ unsigned reg = PInstr->getOperand(i).getReg();
+
+ MachineBasicBlock* MBB = PInstr->getOperand(i+1).getMBB();
+ LiveInterval& InputI = LI.getInterval(reg);
+ if (MBB != PInstr->getParent() &&
+ InputI.liveAt(LI.getMBBStartIdx(PInstr->getParent())) &&
+ InputI.expiredAt(LI.getInstructionIndex(PInstr) +
+ LiveInterval::InstrSlots::NUM))
+ InputI.removeRange(LI.getMBBStartIdx(PInstr->getParent()),
+ LI.getInstructionIndex(PInstr),
+ true);
+ }
+
+ // If the PHI is not dead, then the valno defined by the PHI
+ // now has an unknown def.
+ unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr));
+ const LiveRange* PLR = PI.getLiveRangeContaining(idx);
+ PLR->valno->def = ~0U;
+ LiveRange R (LI.getMBBStartIdx(PInstr->getParent()),
+ PLR->start, PLR->valno);
+ PI.addRange(R);
+ }
+
+ LI.RemoveMachineInstrFromMaps(PInstr);
+ PInstr->eraseFromParent();
+ }
+
+ LI.computeNumbering();
+
+ return true;
+}
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
new file mode 100644
index 0000000..a5e1ee4
--- /dev/null
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -0,0 +1,194 @@
+//===-- TargetInstrInfoImpl.cpp - Target Instruction Information ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetInstrInfoImpl class, it just provides default
+// implementations of various methods.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+using namespace llvm;
+
+// commuteInstruction - The default implementation of this method just exchanges
+// operand 1 and 2.
+MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
+ bool NewMI) const {
+ assert(MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
+ "This only knows how to commute register operands so far");
+ unsigned Reg1 = MI->getOperand(1).getReg();
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ bool Reg1IsKill = MI->getOperand(1).isKill();
+ bool Reg2IsKill = MI->getOperand(2).isKill();
+ bool ChangeReg0 = false;
+ if (MI->getOperand(0).getReg() == Reg1) {
+ // Must be two address instruction!
+ assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) &&
+ "Expecting a two-address instruction!");
+ Reg2IsKill = false;
+ ChangeReg0 = true;
+ }
+
+ if (NewMI) {
+ // Create a new instruction.
+ unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg();
+ bool Reg0IsDead = MI->getOperand(0).isDead();
+ MachineFunction &MF = *MI->getParent()->getParent();
+ return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
+ .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
+ .addReg(Reg2, getKillRegState(Reg2IsKill))
+ .addReg(Reg1, getKillRegState(Reg2IsKill));
+ }
+
+ if (ChangeReg0)
+ MI->getOperand(0).setReg(Reg2);
+ MI->getOperand(2).setReg(Reg1);
+ MI->getOperand(1).setReg(Reg2);
+ MI->getOperand(2).setIsKill(Reg1IsKill);
+ MI->getOperand(1).setIsKill(Reg2IsKill);
+ return MI;
+}
+
+/// CommuteChangesDestination - Return true if commuting the specified
+/// instruction will also changes the destination operand. Also return the
+/// current operand index of the would be new destination register by
+/// reference. This can happen when the commutable instruction is also a
+/// two-address instruction.
+bool TargetInstrInfoImpl::CommuteChangesDestination(MachineInstr *MI,
+ unsigned &OpIdx) const{
+ assert(MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
+ "This only knows how to commute register operands so far");
+ if (MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
+ // Must be two address instruction!
+ assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) &&
+ "Expecting a two-address instruction!");
+ OpIdx = 2;
+ return true;
+ }
+ return false;
+}
+
+
+bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ bool MadeChange = false;
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.isPredicable())
+ return false;
+
+ for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (TID.OpInfo[i].isPredicate()) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg()) {
+ MO.setReg(Pred[j].getReg());
+ MadeChange = true;
+ } else if (MO.isImm()) {
+ MO.setImm(Pred[j].getImm());
+ MadeChange = true;
+ } else if (MO.isMBB()) {
+ MO.setMBB(Pred[j].getMBB());
+ MadeChange = true;
+ }
+ ++j;
+ }
+ }
+ return MadeChange;
+}
+
+void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg,
+ const MachineInstr *Orig) const {
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+ MI->getOperand(0).setReg(DestReg);
+ MBB.insert(I, MI);
+}
+
+unsigned
+TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const {
+ unsigned FnSize = 0;
+ for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ const MachineBasicBlock &MBB = *MBBI;
+ for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end();
+ I != E; ++I)
+ FnSize += GetInstSizeInBytes(I);
+ }
+ return FnSize;
+}
+
+/// foldMemoryOperand - Attempt to fold a load or store of the specified stack
+/// slot into the specified machine instruction for the specified operand(s).
+/// If this is possible, a new instruction is returned with the specified
+/// operand folded, otherwise NULL is returned. The client is responsible for
+/// removing the old instruction and adding the new one in the instruction
+/// stream.
+MachineInstr*
+TargetInstrInfo::foldMemoryOperand(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const {
+ unsigned Flags = 0;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (MI->getOperand(Ops[i]).isDef())
+ Flags |= MachineMemOperand::MOStore;
+ else
+ Flags |= MachineMemOperand::MOLoad;
+
+ // Ask the target to do the actual folding.
+ MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FrameIndex);
+ if (!NewMI) return 0;
+
+ assert((!(Flags & MachineMemOperand::MOStore) ||
+ NewMI->getDesc().mayStore()) &&
+ "Folded a def to a non-store!");
+ assert((!(Flags & MachineMemOperand::MOLoad) ||
+ NewMI->getDesc().mayLoad()) &&
+ "Folded a use to a non-load!");
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ assert(MFI.getObjectOffset(FrameIndex) != -1);
+ MachineMemOperand MMO(PseudoSourceValue::getFixedStack(FrameIndex),
+ Flags,
+ MFI.getObjectOffset(FrameIndex),
+ MFI.getObjectSize(FrameIndex),
+ MFI.getObjectAlignment(FrameIndex));
+ NewMI->addMemOperand(MF, MMO);
+
+ return NewMI;
+}
+
+/// foldMemoryOperand - Same as the previous version except it allows folding
+/// of any load and store from / to any address, not just from a specific
+/// stack slot.
+MachineInstr*
+TargetInstrInfo::foldMemoryOperand(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ assert(LoadMI->getDesc().canFoldAsLoad() && "LoadMI isn't foldable!");
+#ifndef NDEBUG
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!");
+#endif
+
+ // Ask the target to do the actual folding.
+ MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI);
+ if (!NewMI) return 0;
+
+ // Copy the memoperands from the load to the folded instruction.
+ for (std::list<MachineMemOperand>::iterator I = LoadMI->memoperands_begin(),
+ E = LoadMI->memoperands_end(); I != E; ++I)
+ NewMI->addMemOperand(MF, *I);
+
+ return NewMI;
+}
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
new file mode 100644
index 0000000..3c40404
--- /dev/null
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -0,0 +1,997 @@
+//===-- TwoAddressInstructionPass.cpp - Two-Address instruction pass ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TwoAddress instruction pass which is used
+// by most register allocators. Two-Address instructions are rewritten
+// from:
+//
+// A = B op C
+//
+// to:
+//
+// A = B
+// A op= C
+//
+// Note that if a register allocator chooses to use this pass, that it
+// has to be capable of handling the non-SSA nature of these rewritten
+// virtual registers.
+//
+// It is also worth noting that the duplicate operand of the two
+// address instruction is removed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "twoaddrinstr"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions");
+STATISTIC(NumCommuted , "Number of instructions commuted to coalesce");
+STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted");
+STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
+STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk");
+STATISTIC(NumReMats, "Number of instructions re-materialized");
+STATISTIC(NumDeletes, "Number of dead instructions deleted");
+
+namespace {
+ class VISIBILITY_HIDDEN TwoAddressInstructionPass
+ : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ LiveVariables *LV;
+
+ // DistanceMap - Keep track the distance of a MI from the start of the
+ // current basic block.
+ DenseMap<MachineInstr*, unsigned> DistanceMap;
+
+ // SrcRegMap - A map from virtual registers to physical registers which
+ // are likely targets to be coalesced to due to copies from physical
+ // registers to virtual registers. e.g. v1024 = move r0.
+ DenseMap<unsigned, unsigned> SrcRegMap;
+
+ // DstRegMap - A map from virtual registers to physical registers which
+ // are likely targets to be coalesced to due to copies to physical
+ // registers from virtual registers. e.g. r1 = move v1024.
+ DenseMap<unsigned, unsigned> DstRegMap;
+
+ bool Sink3AddrInstruction(MachineBasicBlock *MBB, MachineInstr *MI,
+ unsigned Reg,
+ MachineBasicBlock::iterator OldPos);
+
+ bool isProfitableToReMat(unsigned Reg, const TargetRegisterClass *RC,
+ MachineInstr *MI, MachineInstr *DefMI,
+ MachineBasicBlock *MBB, unsigned Loc);
+
+ bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist,
+ unsigned &LastDef);
+
+ MachineInstr *FindLastUseInMBB(unsigned Reg, MachineBasicBlock *MBB,
+ unsigned Dist);
+
+ bool isProfitableToCommute(unsigned regB, unsigned regC,
+ MachineInstr *MI, MachineBasicBlock *MBB,
+ unsigned Dist);
+
+ bool CommuteInstruction(MachineBasicBlock::iterator &mi,
+ MachineFunction::iterator &mbbi,
+ unsigned RegB, unsigned RegC, unsigned Dist);
+
+ bool isProfitableToConv3Addr(unsigned RegA);
+
+ bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ MachineFunction::iterator &mbbi,
+ unsigned RegB, unsigned Dist);
+
+ void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &Processed);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ TwoAddressInstructionPass() : MachineFunctionPass(&ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<LiveVariables>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ if (StrongPHIElim)
+ AU.addPreservedID(StrongPHIEliminationID);
+ else
+ AU.addPreservedID(PHIEliminationID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// runOnMachineFunction - Pass entry point.
+ bool runOnMachineFunction(MachineFunction&);
+ };
+}
+
+char TwoAddressInstructionPass::ID = 0;
+static RegisterPass<TwoAddressInstructionPass>
+X("twoaddressinstruction", "Two-Address instruction pass");
+
+const PassInfo *const llvm::TwoAddressInstructionPassID = &X;
+
+/// Sink3AddrInstruction - A two-address instruction has been converted to a
+/// three-address instruction to avoid clobbering a register. Try to sink it
+/// past the instruction that would kill the above mentioned register to reduce
+/// register pressure.
+bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
+ MachineInstr *MI, unsigned SavedReg,
+ MachineBasicBlock::iterator OldPos) {
+ // Check if it's safe to move this instruction.
+ bool SeenStore = true; // Be conservative.
+ if (!MI->isSafeToMove(TII, SeenStore))
+ return false;
+
+ unsigned DefReg = 0;
+ SmallSet<unsigned, 4> UseRegs;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isUse() && MOReg != SavedReg)
+ UseRegs.insert(MO.getReg());
+ if (!MO.isDef())
+ continue;
+ if (MO.isImplicit())
+ // Don't try to move it if it implicitly defines a register.
+ return false;
+ if (DefReg)
+ // For now, don't move any instructions that define multiple registers.
+ return false;
+ DefReg = MO.getReg();
+ }
+
+ // Find the instruction that kills SavedReg.
+ MachineInstr *KillMI = NULL;
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SavedReg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineOperand &UseMO = UI.getOperand();
+ if (!UseMO.isKill())
+ continue;
+ KillMI = UseMO.getParent();
+ break;
+ }
+
+ if (!KillMI || KillMI->getParent() != MBB || KillMI == MI)
+ return false;
+
+ // If any of the definitions are used by another instruction between the
+ // position and the kill use, then it's not safe to sink it.
+ //
+ // FIXME: This can be sped up if there is an easy way to query whether an
+ // instruction is before or after another instruction. Then we can use
+ // MachineRegisterInfo def / use instead.
+ MachineOperand *KillMO = NULL;
+ MachineBasicBlock::iterator KillPos = KillMI;
+ ++KillPos;
+
+ unsigned NumVisited = 0;
+ for (MachineBasicBlock::iterator I = next(OldPos); I != KillPos; ++I) {
+ MachineInstr *OtherMI = I;
+ if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost.
+ return false;
+ ++NumVisited;
+ for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = OtherMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (DefReg == MOReg)
+ return false;
+
+ if (MO.isKill()) {
+ if (OtherMI == KillMI && MOReg == SavedReg)
+ // Save the operand that kills the register. We want to unset the kill
+ // marker if we can sink MI past it.
+ KillMO = &MO;
+ else if (UseRegs.count(MOReg))
+ // One of the uses is killed before the destination.
+ return false;
+ }
+ }
+ }
+
+ // Update kill and LV information.
+ KillMO->setIsKill(false);
+ KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI);
+ KillMO->setIsKill(true);
+
+ if (LV)
+ LV->replaceKillInstruction(SavedReg, KillMI, MI);
+
+ // Move instruction to its destination.
+ MBB->remove(MI);
+ MBB->insert(KillPos, MI);
+
+ ++Num3AddrSunk;
+ return true;
+}
+
+/// isTwoAddrUse - Return true if the specified MI is using the specified
+/// register as a two-address operand.
+static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) {
+ const TargetInstrDesc &TID = UseMI->getDesc();
+ for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = UseMI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == Reg &&
+ (MO.isDef() || UseMI->isRegTiedToDefOperand(i)))
+ // Earlier use is a two-address one.
+ return true;
+ }
+ return false;
+}
+
+/// isProfitableToReMat - Return true if the heuristics determines it is likely
+/// to be profitable to re-materialize the definition of Reg rather than copy
+/// the register.
+bool
+TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg,
+ const TargetRegisterClass *RC,
+ MachineInstr *MI, MachineInstr *DefMI,
+ MachineBasicBlock *MBB, unsigned Loc) {
+ bool OtherUse = false;
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = UseMO.getParent();
+ MachineBasicBlock *UseMBB = UseMI->getParent();
+ if (UseMBB == MBB) {
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
+ if (DI != DistanceMap.end() && DI->second == Loc)
+ continue; // Current use.
+ OtherUse = true;
+ // There is at least one other use in the MBB that will clobber the
+ // register.
+ if (isTwoAddrUse(UseMI, Reg))
+ return true;
+ }
+ }
+
+ // If other uses in MBB are not two-address uses, then don't remat.
+ if (OtherUse)
+ return false;
+
+ // No other uses in the same block, remat if it's defined in the same
+ // block so it does not unnecessarily extend the live range.
+ return MBB == DefMI->getParent();
+}
+
+/// NoUseAfterLastDef - Return true if there are no intervening uses between the
+/// last instruction in the MBB that defines the specified register and the
+/// two-address instruction which is being processed. It also returns the last
+/// def location by reference
+bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg,
+ MachineBasicBlock *MBB, unsigned Dist,
+ unsigned &LastDef) {
+ LastDef = 0;
+ unsigned LastUse = Dist;
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
+ E = MRI->reg_end(); I != E; ++I) {
+ MachineOperand &MO = I.getOperand();
+ MachineInstr *MI = MO.getParent();
+ if (MI->getParent() != MBB)
+ continue;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ continue;
+ if (MO.isUse() && DI->second < LastUse)
+ LastUse = DI->second;
+ if (MO.isDef() && DI->second > LastDef)
+ LastDef = DI->second;
+ }
+
+ return !(LastUse > LastDef && LastUse < Dist);
+}
+
+MachineInstr *TwoAddressInstructionPass::FindLastUseInMBB(unsigned Reg,
+ MachineBasicBlock *MBB,
+ unsigned Dist) {
+ unsigned LastUseDist = 0;
+ MachineInstr *LastUse = 0;
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
+ E = MRI->reg_end(); I != E; ++I) {
+ MachineOperand &MO = I.getOperand();
+ MachineInstr *MI = MO.getParent();
+ if (MI->getParent() != MBB)
+ continue;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ continue;
+ if (DI->second >= Dist)
+ continue;
+
+ if (MO.isUse() && DI->second > LastUseDist) {
+ LastUse = DI->first;
+ LastUseDist = DI->second;
+ }
+ }
+ return LastUse;
+}
+
+/// isCopyToReg - Return true if the specified MI is a copy instruction or
+/// a extract_subreg instruction. It also returns the source and destination
+/// registers and whether they are physical registers by reference.
+static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
+ unsigned &SrcReg, unsigned &DstReg,
+ bool &IsSrcPhys, bool &IsDstPhys) {
+ SrcReg = 0;
+ DstReg = 0;
+ unsigned SrcSubIdx, DstSubIdx;
+ if (!TII->isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
+ if (MI.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ } else if (MI.getOpcode() == TargetInstrInfo::INSERT_SUBREG) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(2).getReg();
+ } else if (MI.getOpcode() == TargetInstrInfo::SUBREG_TO_REG) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(2).getReg();
+ }
+ }
+
+ if (DstReg) {
+ IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
+ IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ return true;
+ }
+ return false;
+}
+
+/// isKilled - Test if the given register value, which is used by the given
+/// instruction, is killed by the given instruction. This looks through
+/// coalescable copies to see if the original value is potentially not killed.
+///
+/// For example, in this code:
+///
+/// %reg1034 = copy %reg1024
+/// %reg1035 = copy %reg1025<kill>
+/// %reg1036 = add %reg1034<kill>, %reg1035<kill>
+///
+/// %reg1034 is not considered to be killed, since it is copied from a
+/// register which is not killed. Treating it as not killed lets the
+/// normal heuristics commute the (two-address) add, which lets
+/// coalescing eliminate the extra copy.
+///
+static bool isKilled(MachineInstr &MI, unsigned Reg,
+ const MachineRegisterInfo *MRI,
+ const TargetInstrInfo *TII) {
+ MachineInstr *DefMI = &MI;
+ for (;;) {
+ if (!DefMI->killsRegister(Reg))
+ return false;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return true;
+ MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg);
+ // If there are multiple defs, we can't do a simple analysis, so just
+ // go with what the kill flag says.
+ if (next(Begin) != MRI->def_end())
+ return true;
+ DefMI = &*Begin;
+ bool IsSrcPhys, IsDstPhys;
+ unsigned SrcReg, DstReg;
+ // If the def is something other than a copy, then it isn't going to
+ // be coalesced, so follow the kill flag.
+ if (!isCopyToReg(*DefMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
+ return true;
+ Reg = SrcReg;
+ }
+}
+
+/// isTwoAddrUse - Return true if the specified MI uses the specified register
+/// as a two-address use. If so, return the destination register by reference.
+static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ unsigned NumOps = (MI.getOpcode() == TargetInstrInfo::INLINEASM)
+ ? MI.getNumOperands() : TID.getNumOperands();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
+ continue;
+ unsigned ti;
+ if (MI.isRegTiedToDefOperand(i, &ti)) {
+ DstReg = MI.getOperand(ti).getReg();
+ return true;
+ }
+ }
+ return false;
+}
+
+/// findOnlyInterestingUse - Given a register, if has a single in-basic block
+/// use, return the use instruction if it's a copy or a two-address use.
+static
+MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
+ MachineRegisterInfo *MRI,
+ const TargetInstrInfo *TII,
+ bool &IsCopy,
+ unsigned &DstReg, bool &IsDstPhys) {
+ MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg);
+ if (UI == MRI->use_end())
+ return 0;
+ MachineInstr &UseMI = *UI;
+ if (++UI != MRI->use_end())
+ // More than one use.
+ return 0;
+ if (UseMI.getParent() != MBB)
+ return 0;
+ unsigned SrcReg;
+ bool IsSrcPhys;
+ if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) {
+ IsCopy = true;
+ return &UseMI;
+ }
+ IsDstPhys = false;
+ if (isTwoAddrUse(UseMI, Reg, DstReg)) {
+ IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ return &UseMI;
+ }
+ return 0;
+}
+
+/// getMappedReg - Return the physical register the specified virtual register
+/// might be mapped to.
+static unsigned
+getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) {
+ while (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DenseMap<unsigned, unsigned>::iterator SI = RegMap.find(Reg);
+ if (SI == RegMap.end())
+ return 0;
+ Reg = SI->second;
+ }
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return Reg;
+ return 0;
+}
+
+/// regsAreCompatible - Return true if the two registers are equal or aliased.
+///
+static bool
+regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
+ if (RegA == RegB)
+ return true;
+ if (!RegA || !RegB)
+ return false;
+ return TRI->regsOverlap(RegA, RegB);
+}
+
+
+/// isProfitableToReMat - Return true if it's potentially profitable to commute
+/// the two-address instruction that's being processed.
+bool
+TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC,
+ MachineInstr *MI, MachineBasicBlock *MBB,
+ unsigned Dist) {
+ // Determine if it's profitable to commute this two address instruction. In
+ // general, we want no uses between this instruction and the definition of
+ // the two-address register.
+ // e.g.
+ // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
+ // %reg1029<def> = MOV8rr %reg1028
+ // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
+ // insert => %reg1030<def> = MOV8rr %reg1028
+ // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
+ // In this case, it might not be possible to coalesce the second MOV8rr
+ // instruction if the first one is coalesced. So it would be profitable to
+ // commute it:
+ // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
+ // %reg1029<def> = MOV8rr %reg1028
+ // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
+ // insert => %reg1030<def> = MOV8rr %reg1029
+ // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead>
+
+ if (!MI->killsRegister(regC))
+ return false;
+
+ // Ok, we have something like:
+ // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
+ // let's see if it's worth commuting it.
+
+ // Look for situations like this:
+ // %reg1024<def> = MOV r1
+ // %reg1025<def> = MOV r0
+ // %reg1026<def> = ADD %reg1024, %reg1025
+ // r0 = MOV %reg1026
+ // Commute the ADD to hopefully eliminate an otherwise unavoidable copy.
+ unsigned FromRegB = getMappedReg(regB, SrcRegMap);
+ unsigned FromRegC = getMappedReg(regC, SrcRegMap);
+ unsigned ToRegB = getMappedReg(regB, DstRegMap);
+ unsigned ToRegC = getMappedReg(regC, DstRegMap);
+ if (!regsAreCompatible(FromRegB, ToRegB, TRI) &&
+ (regsAreCompatible(FromRegB, ToRegC, TRI) ||
+ regsAreCompatible(FromRegC, ToRegB, TRI)))
+ return true;
+
+ // If there is a use of regC between its last def (could be livein) and this
+ // instruction, then bail.
+ unsigned LastDefC = 0;
+ if (!NoUseAfterLastDef(regC, MBB, Dist, LastDefC))
+ return false;
+
+ // If there is a use of regB between its last def (could be livein) and this
+ // instruction, then go ahead and make this transformation.
+ unsigned LastDefB = 0;
+ if (!NoUseAfterLastDef(regB, MBB, Dist, LastDefB))
+ return true;
+
+ // Since there are no intervening uses for both registers, then commute
+ // if the def of regC is closer. Its live interval is shorter.
+ return LastDefB && LastDefC && LastDefC > LastDefB;
+}
+
+/// CommuteInstruction - Commute a two-address instruction and update the basic
+/// block, distance map, and live variables if needed. Return true if it is
+/// successful.
+bool
+TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi,
+ MachineFunction::iterator &mbbi,
+ unsigned RegB, unsigned RegC, unsigned Dist) {
+ MachineInstr *MI = mi;
+ DOUT << "2addr: COMMUTING : " << *MI;
+ MachineInstr *NewMI = TII->commuteInstruction(MI);
+
+ if (NewMI == 0) {
+ DOUT << "2addr: COMMUTING FAILED!\n";
+ return false;
+ }
+
+ DOUT << "2addr: COMMUTED TO: " << *NewMI;
+ // If the instruction changed to commute it, update livevar.
+ if (NewMI != MI) {
+ if (LV)
+ // Update live variables
+ LV->replaceKillInstruction(RegC, MI, NewMI);
+
+ mbbi->insert(mi, NewMI); // Insert the new inst
+ mbbi->erase(mi); // Nuke the old inst.
+ mi = NewMI;
+ DistanceMap.insert(std::make_pair(NewMI, Dist));
+ }
+
+ // Update source register map.
+ unsigned FromRegC = getMappedReg(RegC, SrcRegMap);
+ if (FromRegC) {
+ unsigned RegA = MI->getOperand(0).getReg();
+ SrcRegMap[RegA] = FromRegC;
+ }
+
+ return true;
+}
+
+/// isProfitableToConv3Addr - Return true if it is profitable to convert the
+/// given 2-address instruction to a 3-address one.
+bool
+TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA) {
+ // Look for situations like this:
+ // %reg1024<def> = MOV r1
+ // %reg1025<def> = MOV r0
+ // %reg1026<def> = ADD %reg1024, %reg1025
+ // r2 = MOV %reg1026
+ // Turn ADD into a 3-address instruction to avoid a copy.
+ unsigned FromRegA = getMappedReg(RegA, SrcRegMap);
+ unsigned ToRegA = getMappedReg(RegA, DstRegMap);
+ return (FromRegA && ToRegA && !regsAreCompatible(FromRegA, ToRegA, TRI));
+}
+
+/// ConvertInstTo3Addr - Convert the specified two-address instruction into a
+/// three address one. Return true if this transformation was successful.
+bool
+TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ MachineFunction::iterator &mbbi,
+ unsigned RegB, unsigned Dist) {
+ MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV);
+ if (NewMI) {
+ DOUT << "2addr: CONVERTING 2-ADDR: " << *mi;
+ DOUT << "2addr: TO 3-ADDR: " << *NewMI;
+ bool Sunk = false;
+
+ if (NewMI->findRegisterUseOperand(RegB, false, TRI))
+ // FIXME: Temporary workaround. If the new instruction doesn't
+ // uses RegB, convertToThreeAddress must have created more
+ // then one instruction.
+ Sunk = Sink3AddrInstruction(mbbi, NewMI, RegB, mi);
+
+ mbbi->erase(mi); // Nuke the old inst.
+
+ if (!Sunk) {
+ DistanceMap.insert(std::make_pair(NewMI, Dist));
+ mi = NewMI;
+ nmi = next(mi);
+ }
+ return true;
+ }
+
+ return false;
+}
+
+/// ProcessCopy - If the specified instruction is not yet processed, process it
+/// if it's a copy. For a copy instruction, we find the physical registers the
+/// source and destination registers might be mapped to. These are kept in
+/// point-to maps used to determine future optimizations. e.g.
+/// v1024 = mov r0
+/// v1025 = mov r1
+/// v1026 = add v1024, v1025
+/// r1 = mov r1026
+/// If 'add' is a two-address instruction, v1024, v1026 are both potentially
+/// coalesced to r0 (from the input side). v1025 is mapped to r1. v1026 is
+/// potentially joined with r1 on the output side. It's worthwhile to commute
+/// 'add' to eliminate a copy.
+void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &Processed) {
+ if (Processed.count(MI))
+ return;
+
+ bool IsSrcPhys, IsDstPhys;
+ unsigned SrcReg, DstReg;
+ if (!isCopyToReg(*MI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
+ return;
+
+ if (IsDstPhys && !IsSrcPhys)
+ DstRegMap.insert(std::make_pair(SrcReg, DstReg));
+ else if (!IsDstPhys && IsSrcPhys) {
+ bool isNew = SrcRegMap.insert(std::make_pair(DstReg, SrcReg)).second;
+ if (!isNew)
+ assert(SrcRegMap[DstReg] == SrcReg &&
+ "Can't map to two src physical registers!");
+
+ SmallVector<unsigned, 4> VirtRegPairs;
+ bool IsCopy = false;
+ unsigned NewReg = 0;
+ while (MachineInstr *UseMI = findOnlyInterestingUse(DstReg, MBB, MRI,TII,
+ IsCopy, NewReg, IsDstPhys)) {
+ if (IsCopy) {
+ if (!Processed.insert(UseMI))
+ break;
+ }
+
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
+ if (DI != DistanceMap.end())
+ // Earlier in the same MBB.Reached via a back edge.
+ break;
+
+ if (IsDstPhys) {
+ VirtRegPairs.push_back(NewReg);
+ break;
+ }
+ bool isNew = SrcRegMap.insert(std::make_pair(NewReg, DstReg)).second;
+ if (!isNew)
+ assert(SrcRegMap[NewReg] == DstReg &&
+ "Can't map to two src physical registers!");
+ VirtRegPairs.push_back(NewReg);
+ DstReg = NewReg;
+ }
+
+ if (!VirtRegPairs.empty()) {
+ unsigned ToReg = VirtRegPairs.back();
+ VirtRegPairs.pop_back();
+ while (!VirtRegPairs.empty()) {
+ unsigned FromReg = VirtRegPairs.back();
+ VirtRegPairs.pop_back();
+ bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second;
+ if (!isNew)
+ assert(DstRegMap[FromReg] == ToReg &&
+ "Can't map to two dst physical registers!");
+ ToReg = FromReg;
+ }
+ }
+ }
+
+ Processed.insert(MI);
+}
+
+/// isSafeToDelete - If the specified instruction does not produce any side
+/// effects and all of its defs are dead, then it's safe to delete.
+static bool isSafeToDelete(MachineInstr *MI, unsigned Reg,
+ const TargetInstrInfo *TII,
+ SmallVector<unsigned, 4> &Kills) {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (TID.mayStore() || TID.isCall())
+ return false;
+ if (TID.isTerminator() || TID.hasUnmodeledSideEffects())
+ return false;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef() && !MO.isDead())
+ return false;
+ if (MO.isUse() && MO.getReg() != Reg && MO.isKill())
+ Kills.push_back(MO.getReg());
+ }
+
+ return true;
+}
+
+/// runOnMachineFunction - Reduce two-address instructions to two operands.
+///
+bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
+ DOUT << "Machine Function\n";
+ const TargetMachine &TM = MF.getTarget();
+ MRI = &MF.getRegInfo();
+ TII = TM.getInstrInfo();
+ TRI = TM.getRegisterInfo();
+ LV = getAnalysisIfAvailable<LiveVariables>();
+
+ bool MadeChange = false;
+
+ DOUT << "********** REWRITING TWO-ADDR INSTRS **********\n";
+ DOUT << "********** Function: " << MF.getFunction()->getName() << '\n';
+
+ // ReMatRegs - Keep track of the registers whose def's are remat'ed.
+ BitVector ReMatRegs;
+ ReMatRegs.resize(MRI->getLastVirtReg()+1);
+
+ SmallPtrSet<MachineInstr*, 8> Processed;
+ for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
+ mbbi != mbbe; ++mbbi) {
+ unsigned Dist = 0;
+ DistanceMap.clear();
+ SrcRegMap.clear();
+ DstRegMap.clear();
+ Processed.clear();
+ for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
+ mi != me; ) {
+ MachineBasicBlock::iterator nmi = next(mi);
+ const TargetInstrDesc &TID = mi->getDesc();
+ bool FirstTied = true;
+
+ DistanceMap.insert(std::make_pair(mi, ++Dist));
+
+ ProcessCopy(&*mi, &*mbbi, Processed);
+
+ unsigned NumOps = (mi->getOpcode() == TargetInstrInfo::INLINEASM)
+ ? mi->getNumOperands() : TID.getNumOperands();
+ for (unsigned si = 0; si < NumOps; ++si) {
+ unsigned ti = 0;
+ if (!mi->isRegTiedToDefOperand(si, &ti))
+ continue;
+
+ if (FirstTied) {
+ ++NumTwoAddressInstrs;
+ DOUT << '\t'; DEBUG(mi->print(*cerr.stream(), &TM));
+ }
+
+ FirstTied = false;
+
+ assert(mi->getOperand(si).isReg() && mi->getOperand(si).getReg() &&
+ mi->getOperand(si).isUse() && "two address instruction invalid");
+
+ // If the two operands are the same we just remove the use
+ // and mark the def as def&use, otherwise we have to insert a copy.
+ if (mi->getOperand(ti).getReg() != mi->getOperand(si).getReg()) {
+ // Rewrite:
+ // a = b op c
+ // to:
+ // a = b
+ // a = a op c
+ unsigned regA = mi->getOperand(ti).getReg();
+ unsigned regB = mi->getOperand(si).getReg();
+
+ assert(TargetRegisterInfo::isVirtualRegister(regB) &&
+ "cannot update physical register live information");
+
+#ifndef NDEBUG
+ // First, verify that we don't have a use of a in the instruction (a =
+ // b + a for example) because our transformation will not work. This
+ // should never occur because we are in SSA form.
+ for (unsigned i = 0; i != mi->getNumOperands(); ++i)
+ assert(i == ti ||
+ !mi->getOperand(i).isReg() ||
+ mi->getOperand(i).getReg() != regA);
+#endif
+
+ // If this instruction is not the killing user of B, see if we can
+ // rearrange the code to make it so. Making it the killing user will
+ // allow us to coalesce A and B together, eliminating the copy we are
+ // about to insert.
+ if (!isKilled(*mi, regB, MRI, TII)) {
+ // If regA is dead and the instruction can be deleted, just delete
+ // it so it doesn't clobber regB.
+ SmallVector<unsigned, 4> Kills;
+ if (mi->getOperand(ti).isDead() &&
+ isSafeToDelete(mi, regB, TII, Kills)) {
+ SmallVector<std::pair<std::pair<unsigned, bool>
+ ,MachineInstr*>, 4> NewKills;
+ bool ReallySafe = true;
+ // If this instruction kills some virtual registers, we need
+ // update the kill information. If it's not possible to do so,
+ // then bail out.
+ while (!Kills.empty()) {
+ unsigned Kill = Kills.back();
+ Kills.pop_back();
+ if (TargetRegisterInfo::isPhysicalRegister(Kill)) {
+ ReallySafe = false;
+ break;
+ }
+ MachineInstr *LastKill = FindLastUseInMBB(Kill, &*mbbi, Dist);
+ if (LastKill) {
+ bool isModRef = LastKill->modifiesRegister(Kill);
+ NewKills.push_back(std::make_pair(std::make_pair(Kill,isModRef),
+ LastKill));
+ } else {
+ ReallySafe = false;
+ break;
+ }
+ }
+
+ if (ReallySafe) {
+ if (LV) {
+ while (!NewKills.empty()) {
+ MachineInstr *NewKill = NewKills.back().second;
+ unsigned Kill = NewKills.back().first.first;
+ bool isDead = NewKills.back().first.second;
+ NewKills.pop_back();
+ if (LV->removeVirtualRegisterKilled(Kill, mi)) {
+ if (isDead)
+ LV->addVirtualRegisterDead(Kill, NewKill);
+ else
+ LV->addVirtualRegisterKilled(Kill, NewKill);
+ }
+ }
+ }
+
+ // We're really going to nuke the old inst. If regB was marked
+ // as a kill we need to update its Kills list.
+ if (mi->getOperand(si).isKill())
+ LV->removeVirtualRegisterKilled(regB, mi);
+
+ mbbi->erase(mi); // Nuke the old inst.
+ mi = nmi;
+ ++NumDeletes;
+ break; // Done with this instruction.
+ }
+ }
+
+ // If this instruction is commutative, check to see if C dies. If
+ // so, swap the B and C operands. This makes the live ranges of A
+ // and C joinable.
+ // FIXME: This code also works for A := B op C instructions.
+ if (TID.isCommutable() && mi->getNumOperands() >= 3) {
+ assert(mi->getOperand(3-si).isReg() &&
+ "Not a proper commutative instruction!");
+ unsigned regC = mi->getOperand(3-si).getReg();
+ if (isKilled(*mi, regC, MRI, TII)) {
+ if (CommuteInstruction(mi, mbbi, regB, regC, Dist)) {
+ ++NumCommuted;
+ regB = regC;
+ goto InstructionRearranged;
+ }
+ }
+ }
+
+ // If this instruction is potentially convertible to a true
+ // three-address instruction,
+ if (TID.isConvertibleTo3Addr()) {
+ // FIXME: This assumes there are no more operands which are tied
+ // to another register.
+#ifndef NDEBUG
+ for (unsigned i = si + 1, e = TID.getNumOperands(); i < e; ++i)
+ assert(TID.getOperandConstraint(i, TOI::TIED_TO) == -1);
+#endif
+
+ if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) {
+ ++NumConvertedTo3Addr;
+ break; // Done with this instruction.
+ }
+ }
+ }
+
+ // If it's profitable to commute the instruction, do so.
+ if (TID.isCommutable() && mi->getNumOperands() >= 3) {
+ unsigned regC = mi->getOperand(3-si).getReg();
+ if (isProfitableToCommute(regB, regC, mi, mbbi, Dist))
+ if (CommuteInstruction(mi, mbbi, regB, regC, Dist)) {
+ ++NumAggrCommuted;
+ ++NumCommuted;
+ regB = regC;
+ goto InstructionRearranged;
+ }
+ }
+
+ // If it's profitable to convert the 2-address instruction to a
+ // 3-address one, do so.
+ if (TID.isConvertibleTo3Addr() && isProfitableToConv3Addr(regA)) {
+ if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) {
+ ++NumConvertedTo3Addr;
+ break; // Done with this instruction.
+ }
+ }
+
+ InstructionRearranged:
+ const TargetRegisterClass* rc = MRI->getRegClass(regB);
+ MachineInstr *DefMI = MRI->getVRegDef(regB);
+ // If it's safe and profitable, remat the definition instead of
+ // copying it.
+ if (DefMI &&
+ DefMI->getDesc().isAsCheapAsAMove() &&
+ DefMI->isSafeToReMat(TII, regB) &&
+ isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
+ DEBUG(cerr << "2addr: REMATTING : " << *DefMI << "\n");
+ TII->reMaterialize(*mbbi, mi, regA, DefMI);
+ ReMatRegs.set(regB);
+ ++NumReMats;
+ } else {
+ bool Emitted = TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc);
+ (void)Emitted;
+ assert(Emitted && "Unable to issue a copy instruction!\n");
+ }
+
+ MachineBasicBlock::iterator prevMI = prior(mi);
+ // Update DistanceMap.
+ DistanceMap.insert(std::make_pair(prevMI, Dist));
+ DistanceMap[mi] = ++Dist;
+
+ // Update live variables for regB.
+ if (LV) {
+ if (LV->removeVirtualRegisterKilled(regB, mi))
+ LV->addVirtualRegisterKilled(regB, prevMI);
+
+ if (LV->removeVirtualRegisterDead(regB, mi))
+ LV->addVirtualRegisterDead(regB, prevMI);
+ }
+
+ DOUT << "\t\tprepend:\t"; DEBUG(prevMI->print(*cerr.stream(), &TM));
+
+ // Replace all occurences of regB with regA.
+ for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+ if (mi->getOperand(i).isReg() &&
+ mi->getOperand(i).getReg() == regB)
+ mi->getOperand(i).setReg(regA);
+ }
+ }
+
+ assert(mi->getOperand(ti).isDef() && mi->getOperand(si).isUse());
+ mi->getOperand(ti).setReg(mi->getOperand(si).getReg());
+ MadeChange = true;
+
+ DOUT << "\t\trewrite to:\t"; DEBUG(mi->print(*cerr.stream(), &TM));
+ }
+
+ mi = nmi;
+ }
+ }
+
+ // Some remat'ed instructions are dead.
+ int VReg = ReMatRegs.find_first();
+ while (VReg != -1) {
+ if (MRI->use_empty(VReg)) {
+ MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ DefMI->eraseFromParent();
+ }
+ VReg = ReMatRegs.find_next(VReg);
+ }
+
+ return MadeChange;
+}
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
new file mode 100644
index 0000000..c3b213c
--- /dev/null
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -0,0 +1,199 @@
+//===-- UnreachableBlockElim.cpp - Remove unreachable blocks for codegen --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is an extremely simple version of the SimplifyCFG pass. Its sole
+// job is to delete LLVM basic blocks that are not reachable from the entry
+// node. To do this, it performs a simple depth first traversal of the CFG,
+// then deletes any unvisited nodes.
+//
+// Note that this pass is really a hack. In particular, the instruction
+// selectors for various targets should just not generate code for unreachable
+// blocks. Until LLVM has a more systematic way of defining instruction
+// selectors, however, we cannot really expect them to handle additional
+// complexity.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Constant.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+namespace {
+ class VISIBILITY_HIDDEN UnreachableBlockElim : public FunctionPass {
+ virtual bool runOnFunction(Function &F);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ UnreachableBlockElim() : FunctionPass(&ID) {}
+ };
+}
+char UnreachableBlockElim::ID = 0;
+static RegisterPass<UnreachableBlockElim>
+X("unreachableblockelim", "Remove unreachable blocks from the CFG");
+
+FunctionPass *llvm::createUnreachableBlockEliminationPass() {
+ return new UnreachableBlockElim();
+}
+
+bool UnreachableBlockElim::runOnFunction(Function &F) {
+ SmallPtrSet<BasicBlock*, 8> Reachable;
+
+ // Mark all reachable blocks.
+ for (df_ext_iterator<Function*, SmallPtrSet<BasicBlock*, 8> > I =
+ df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable); I != E; ++I)
+ /* Mark all reachable blocks */;
+
+ // Loop over all dead blocks, remembering them and deleting all instructions
+ // in them.
+ std::vector<BasicBlock*> DeadBlocks;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ if (!Reachable.count(I)) {
+ BasicBlock *BB = I;
+ DeadBlocks.push_back(BB);
+ while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+ PN->replaceAllUsesWith(Constant::getNullValue(PN->getType()));
+ BB->getInstList().pop_front();
+ }
+ for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
+ (*SI)->removePredecessor(BB);
+ BB->dropAllReferences();
+ }
+
+ // Actually remove the blocks now.
+ for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i)
+ DeadBlocks[i]->eraseFromParent();
+
+ return DeadBlocks.size();
+}
+
+
+namespace {
+ class VISIBILITY_HIDDEN UnreachableMachineBlockElim :
+ public MachineFunctionPass {
+ virtual bool runOnMachineFunction(MachineFunction &F);
+ MachineModuleInfo *MMI;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ UnreachableMachineBlockElim() : MachineFunctionPass(&ID) {}
+ };
+}
+char UnreachableMachineBlockElim::ID = 0;
+
+static RegisterPass<UnreachableMachineBlockElim>
+Y("unreachable-mbb-elimination",
+ "Remove unreachable machine basic blocks");
+
+const PassInfo *const llvm::UnreachableMachineBlockElimID = &Y;
+
+bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
+ SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+
+ // Mark all reachable blocks.
+ for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> >
+ I = df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable);
+ I != E; ++I)
+ /* Mark all reachable blocks */;
+
+ // Loop over all dead blocks, remembering them and deleting all instructions
+ // in them.
+ std::vector<MachineBasicBlock*> DeadBlocks;
+ for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ MachineBasicBlock *BB = I;
+
+ // Test for deadness.
+ if (!Reachable.count(BB)) {
+ DeadBlocks.push_back(BB);
+
+ while (BB->succ_begin() != BB->succ_end()) {
+ MachineBasicBlock* succ = *BB->succ_begin();
+
+ MachineBasicBlock::iterator start = succ->begin();
+ while (start != succ->end() &&
+ start->getOpcode() == TargetInstrInfo::PHI) {
+ for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2)
+ if (start->getOperand(i).isMBB() &&
+ start->getOperand(i).getMBB() == BB) {
+ start->RemoveOperand(i);
+ start->RemoveOperand(i-1);
+ }
+
+ start++;
+ }
+
+ BB->removeSuccessor(BB->succ_begin());
+ }
+ }
+ }
+
+ // Actually remove the blocks now.
+ for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = DeadBlocks[i];
+ // If there are any labels in the basic block, unregister them from
+ // MachineModuleInfo.
+ if (MMI && !MBB->empty()) {
+ for (MachineBasicBlock::iterator I = MBB->begin(),
+ E = MBB->end(); I != E; ++I) {
+ if (I->isLabel())
+ // The label ID # is always operand #0, an immediate.
+ MMI->InvalidateLabel(I->getOperand(0).getImm());
+ }
+ }
+ MBB->eraseFromParent();
+ }
+
+ // Cleanup PHI nodes.
+ for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ MachineBasicBlock *BB = I;
+ // Prune unneeded PHI entries.
+ SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(),
+ BB->pred_end());
+ MachineBasicBlock::iterator phi = BB->begin();
+ while (phi != BB->end() &&
+ phi->getOpcode() == TargetInstrInfo::PHI) {
+ for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2)
+ if (!preds.count(phi->getOperand(i).getMBB())) {
+ phi->RemoveOperand(i);
+ phi->RemoveOperand(i-1);
+ }
+
+ if (phi->getNumOperands() == 3) {
+ unsigned Input = phi->getOperand(1).getReg();
+ unsigned Output = phi->getOperand(0).getReg();
+
+ MachineInstr* temp = phi;
+ ++phi;
+ temp->eraseFromParent();
+
+ if (Input != Output)
+ F.getRegInfo().replaceRegWith(Output, Input);
+
+ continue;
+ }
+
+ ++phi;
+ }
+ }
+
+ F.RenumberBlocks();
+
+ return DeadBlocks.size();
+}
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
new file mode 100644
index 0000000..29637b9
--- /dev/null
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -0,0 +1,269 @@
+//===-- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the VirtRegMap class.
+//
+// It also contains implementations of the the Spiller interface, which, given a
+// virtual register map and a machine function, eliminates all virtual
+// references by replacing them with physical register references - adding spill
+// code as necessary.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "virtregmap"
+#include "VirtRegMap.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumSpills , "Number of register spills");
+
+//===----------------------------------------------------------------------===//
+// VirtRegMap implementation
+//===----------------------------------------------------------------------===//
+
+char VirtRegMap::ID = 0;
+
+static RegisterPass<VirtRegMap>
+X("virtregmap", "Virtual Register Map");
+
+bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
+ TII = mf.getTarget().getInstrInfo();
+ TRI = mf.getTarget().getRegisterInfo();
+ MF = &mf;
+
+ ReMatId = MAX_STACK_SLOT+1;
+ LowSpillSlot = HighSpillSlot = NO_STACK_SLOT;
+
+ Virt2PhysMap.clear();
+ Virt2StackSlotMap.clear();
+ Virt2ReMatIdMap.clear();
+ Virt2SplitMap.clear();
+ Virt2SplitKillMap.clear();
+ ReMatMap.clear();
+ ImplicitDefed.clear();
+ SpillSlotToUsesMap.clear();
+ MI2VirtMap.clear();
+ SpillPt2VirtMap.clear();
+ RestorePt2VirtMap.clear();
+ EmergencySpillMap.clear();
+ EmergencySpillSlots.clear();
+
+ SpillSlotToUsesMap.resize(8);
+ ImplicitDefed.resize(MF->getRegInfo().getLastVirtReg()+1-
+ TargetRegisterInfo::FirstVirtualRegister);
+
+ allocatableRCRegs.clear();
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ allocatableRCRegs.insert(std::make_pair(*I,
+ TRI->getAllocatableSet(mf, *I)));
+
+ grow();
+
+ return false;
+}
+
+void VirtRegMap::grow() {
+ unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg();
+ Virt2PhysMap.grow(LastVirtReg);
+ Virt2StackSlotMap.grow(LastVirtReg);
+ Virt2ReMatIdMap.grow(LastVirtReg);
+ Virt2SplitMap.grow(LastVirtReg);
+ Virt2SplitKillMap.grow(LastVirtReg);
+ ReMatMap.grow(LastVirtReg);
+ ImplicitDefed.resize(LastVirtReg-TargetRegisterInfo::FirstVirtualRegister+1);
+}
+
+int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign stack slot to already spilled register");
+ const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg);
+ int SS = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
+ RC->getAlignment());
+ if (LowSpillSlot == NO_STACK_SLOT)
+ LowSpillSlot = SS;
+ if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
+ HighSpillSlot = SS;
+ unsigned Idx = SS-LowSpillSlot;
+ while (Idx >= SpillSlotToUsesMap.size())
+ SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2);
+ Virt2StackSlotMap[virtReg] = SS;
+ ++NumSpills;
+ return SS;
+}
+
+void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign stack slot to already spilled register");
+ assert((SS >= 0 ||
+ (SS >= MF->getFrameInfo()->getObjectIndexBegin())) &&
+ "illegal fixed frame index");
+ Virt2StackSlotMap[virtReg] = SS;
+}
+
+int VirtRegMap::assignVirtReMatId(unsigned virtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign re-mat id to already spilled register");
+ Virt2ReMatIdMap[virtReg] = ReMatId;
+ return ReMatId++;
+}
+
+void VirtRegMap::assignVirtReMatId(unsigned virtReg, int id) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign re-mat id to already spilled register");
+ Virt2ReMatIdMap[virtReg] = id;
+}
+
+int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) {
+ std::map<const TargetRegisterClass*, int>::iterator I =
+ EmergencySpillSlots.find(RC);
+ if (I != EmergencySpillSlots.end())
+ return I->second;
+ int SS = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
+ RC->getAlignment());
+ if (LowSpillSlot == NO_STACK_SLOT)
+ LowSpillSlot = SS;
+ if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
+ HighSpillSlot = SS;
+ EmergencySpillSlots[RC] = SS;
+ return SS;
+}
+
+void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) {
+ if (!MF->getFrameInfo()->isFixedObjectIndex(FI)) {
+ // If FI < LowSpillSlot, this stack reference was produced by
+ // instruction selection and is not a spill
+ if (FI >= LowSpillSlot) {
+ assert(FI >= 0 && "Spill slot index should not be negative!");
+ assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size()
+ && "Invalid spill slot");
+ SpillSlotToUsesMap[FI-LowSpillSlot].insert(MI);
+ }
+ }
+}
+
+void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *OldMI,
+ MachineInstr *NewMI, ModRef MRInfo) {
+ // Move previous memory references folded to new instruction.
+ MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(NewMI);
+ for (MI2VirtMapTy::iterator I = MI2VirtMap.lower_bound(OldMI),
+ E = MI2VirtMap.end(); I != E && I->first == OldMI; ) {
+ MI2VirtMap.insert(IP, std::make_pair(NewMI, I->second));
+ MI2VirtMap.erase(I++);
+ }
+
+ // add new memory reference
+ MI2VirtMap.insert(IP, std::make_pair(NewMI, std::make_pair(VirtReg, MRInfo)));
+}
+
+void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo) {
+ MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(MI);
+ MI2VirtMap.insert(IP, std::make_pair(MI, std::make_pair(VirtReg, MRInfo)));
+}
+
+void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isFI())
+ continue;
+ int FI = MO.getIndex();
+ if (MF->getFrameInfo()->isFixedObjectIndex(FI))
+ continue;
+ // This stack reference was produced by instruction selection and
+ // is not a spill
+ if (FI < LowSpillSlot)
+ continue;
+ assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size()
+ && "Invalid spill slot");
+ SpillSlotToUsesMap[FI-LowSpillSlot].erase(MI);
+ }
+ MI2VirtMap.erase(MI);
+ SpillPt2VirtMap.erase(MI);
+ RestorePt2VirtMap.erase(MI);
+ EmergencySpillMap.erase(MI);
+}
+
+/// FindUnusedRegisters - Gather a list of allocatable registers that
+/// have not been allocated to any virtual register.
+bool VirtRegMap::FindUnusedRegisters(const TargetRegisterInfo *TRI,
+ LiveIntervals* LIs) {
+ unsigned NumRegs = TRI->getNumRegs();
+ UnusedRegs.reset();
+ UnusedRegs.resize(NumRegs);
+
+ BitVector Used(NumRegs);
+ for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
+ e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
+ if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG)
+ Used.set(Virt2PhysMap[i]);
+
+ BitVector Allocatable = TRI->getAllocatableSet(*MF);
+ bool AnyUnused = false;
+ for (unsigned Reg = 1; Reg < NumRegs; ++Reg) {
+ if (Allocatable[Reg] && !Used[Reg] && !LIs->hasInterval(Reg)) {
+ bool ReallyUnused = true;
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ if (Used[*AS] || LIs->hasInterval(*AS)) {
+ ReallyUnused = false;
+ break;
+ }
+ }
+ if (ReallyUnused) {
+ AnyUnused = true;
+ UnusedRegs.set(Reg);
+ }
+ }
+ }
+
+ return AnyUnused;
+}
+
+void VirtRegMap::print(std::ostream &OS, const Module* M) const {
+ const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
+
+ OS << "********** REGISTER MAP **********\n";
+ for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
+ e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) {
+ if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG)
+ OS << "[reg" << i << " -> " << TRI->getName(Virt2PhysMap[i])
+ << "]\n";
+ }
+
+ for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
+ e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
+ if (Virt2StackSlotMap[i] != VirtRegMap::NO_STACK_SLOT)
+ OS << "[reg" << i << " -> fi#" << Virt2StackSlotMap[i] << "]\n";
+ OS << '\n';
+}
+
+void VirtRegMap::dump() const {
+ print(cerr);
+}
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
new file mode 100644
index 0000000..507557d
--- /dev/null
+++ b/lib/CodeGen/VirtRegMap.h
@@ -0,0 +1,495 @@
+//===-- llvm/CodeGen/VirtRegMap.h - Virtual Register Map -*- C++ -*--------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a virtual register map. This maps virtual registers to
+// physical registers and virtual registers to stack slots. It is created and
+// updated by a register allocator and then used by a machine code rewriter that
+// adds spill code and rewrites virtual into physical register references.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_VIRTREGMAP_H
+#define LLVM_CODEGEN_VIRTREGMAP_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Streams.h"
+#include <map>
+
+namespace llvm {
+ class LiveIntervals;
+ class MachineInstr;
+ class MachineFunction;
+ class TargetInstrInfo;
+ class TargetRegisterInfo;
+
+ class VirtRegMap : public MachineFunctionPass {
+ public:
+ enum {
+ NO_PHYS_REG = 0,
+ NO_STACK_SLOT = (1L << 30)-1,
+ MAX_STACK_SLOT = (1L << 18)-1
+ };
+
+ enum ModRef { isRef = 1, isMod = 2, isModRef = 3 };
+ typedef std::multimap<MachineInstr*,
+ std::pair<unsigned, ModRef> > MI2VirtMapTy;
+
+ private:
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineFunction *MF;
+
+ DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs;
+
+ /// Virt2PhysMap - This is a virtual to physical register
+ /// mapping. Each virtual register is required to have an entry in
+ /// it; even spilled virtual registers (the register mapped to a
+ /// spilled register is the temporary used to load it from the
+ /// stack).
+ IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysMap;
+
+ /// Virt2StackSlotMap - This is virtual register to stack slot
+ /// mapping. Each spilled virtual register has an entry in it
+ /// which corresponds to the stack slot this register is spilled
+ /// at.
+ IndexedMap<int, VirtReg2IndexFunctor> Virt2StackSlotMap;
+
+ /// Virt2ReMatIdMap - This is virtual register to rematerialization id
+ /// mapping. Each spilled virtual register that should be remat'd has an
+ /// entry in it which corresponds to the remat id.
+ IndexedMap<int, VirtReg2IndexFunctor> Virt2ReMatIdMap;
+
+ /// Virt2SplitMap - This is virtual register to splitted virtual register
+ /// mapping.
+ IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2SplitMap;
+
+ /// Virt2SplitKillMap - This is splitted virtual register to its last use
+ /// (kill) index mapping.
+ IndexedMap<unsigned> Virt2SplitKillMap;
+
+ /// ReMatMap - This is virtual register to re-materialized instruction
+ /// mapping. Each virtual register whose definition is going to be
+ /// re-materialized has an entry in it.
+ IndexedMap<MachineInstr*, VirtReg2IndexFunctor> ReMatMap;
+
+ /// MI2VirtMap - This is MachineInstr to virtual register
+ /// mapping. In the case of memory spill code being folded into
+ /// instructions, we need to know which virtual register was
+ /// read/written by this instruction.
+ MI2VirtMapTy MI2VirtMap;
+
+ /// SpillPt2VirtMap - This records the virtual registers which should
+ /// be spilled right after the MachineInstr due to live interval
+ /// splitting.
+ std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >
+ SpillPt2VirtMap;
+
+ /// RestorePt2VirtMap - This records the virtual registers which should
+ /// be restored right before the MachineInstr due to live interval
+ /// splitting.
+ std::map<MachineInstr*, std::vector<unsigned> > RestorePt2VirtMap;
+
+ /// EmergencySpillMap - This records the physical registers that should
+ /// be spilled / restored around the MachineInstr since the register
+ /// allocator has run out of registers.
+ std::map<MachineInstr*, std::vector<unsigned> > EmergencySpillMap;
+
+ /// EmergencySpillSlots - This records emergency spill slots used to
+ /// spill physical registers when the register allocator runs out of
+ /// registers. Ideally only one stack slot is used per function per
+ /// register class.
+ std::map<const TargetRegisterClass*, int> EmergencySpillSlots;
+
+ /// ReMatId - Instead of assigning a stack slot to a to be rematerialized
+ /// virtual register, an unique id is being assigned. This keeps track of
+ /// the highest id used so far. Note, this starts at (1<<18) to avoid
+ /// conflicts with stack slot numbers.
+ int ReMatId;
+
+ /// LowSpillSlot, HighSpillSlot - Lowest and highest spill slot indexes.
+ int LowSpillSlot, HighSpillSlot;
+
+ /// SpillSlotToUsesMap - Records uses for each register spill slot.
+ SmallVector<SmallPtrSet<MachineInstr*, 4>, 8> SpillSlotToUsesMap;
+
+ /// ImplicitDefed - One bit for each virtual register. If set it indicates
+ /// the register is implicitly defined.
+ BitVector ImplicitDefed;
+
+ /// UnusedRegs - A list of physical registers that have not been used.
+ BitVector UnusedRegs;
+
+ VirtRegMap(const VirtRegMap&); // DO NOT IMPLEMENT
+ void operator=(const VirtRegMap&); // DO NOT IMPLEMENT
+
+ public:
+ static char ID;
+ VirtRegMap() : MachineFunctionPass(&ID), Virt2PhysMap(NO_PHYS_REG),
+ Virt2StackSlotMap(NO_STACK_SLOT),
+ Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0),
+ Virt2SplitKillMap(0), ReMatMap(NULL),
+ ReMatId(MAX_STACK_SLOT+1),
+ LowSpillSlot(NO_STACK_SLOT), HighSpillSlot(NO_STACK_SLOT) { }
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ void grow();
+
+ /// @brief returns true if the specified virtual register is
+ /// mapped to a physical register
+ bool hasPhys(unsigned virtReg) const {
+ return getPhys(virtReg) != NO_PHYS_REG;
+ }
+
+ /// @brief returns the physical register mapped to the specified
+ /// virtual register
+ unsigned getPhys(unsigned virtReg) const {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ return Virt2PhysMap[virtReg];
+ }
+
+ /// @brief creates a mapping for the specified virtual register to
+ /// the specified physical register
+ void assignVirt2Phys(unsigned virtReg, unsigned physReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg) &&
+ TargetRegisterInfo::isPhysicalRegister(physReg));
+ assert(Virt2PhysMap[virtReg] == NO_PHYS_REG &&
+ "attempt to assign physical register to already mapped "
+ "virtual register");
+ Virt2PhysMap[virtReg] = physReg;
+ }
+
+ /// @brief clears the specified virtual register's, physical
+ /// register mapping
+ void clearVirt(unsigned virtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2PhysMap[virtReg] != NO_PHYS_REG &&
+ "attempt to clear a not assigned virtual register");
+ Virt2PhysMap[virtReg] = NO_PHYS_REG;
+ }
+
+ /// @brief clears all virtual to physical register mappings
+ void clearAllVirt() {
+ Virt2PhysMap.clear();
+ grow();
+ }
+
+ /// @brief records virtReg is a split live interval from SReg.
+ void setIsSplitFromReg(unsigned virtReg, unsigned SReg) {
+ Virt2SplitMap[virtReg] = SReg;
+ }
+
+ /// @brief returns the live interval virtReg is split from.
+ unsigned getPreSplitReg(unsigned virtReg) {
+ return Virt2SplitMap[virtReg];
+ }
+
+ /// @brief returns true if the specified virtual register is not
+ /// mapped to a stack slot or rematerialized.
+ bool isAssignedReg(unsigned virtReg) const {
+ if (getStackSlot(virtReg) == NO_STACK_SLOT &&
+ getReMatId(virtReg) == NO_STACK_SLOT)
+ return true;
+ // Split register can be assigned a physical register as well as a
+ // stack slot or remat id.
+ return (Virt2SplitMap[virtReg] && Virt2PhysMap[virtReg] != NO_PHYS_REG);
+ }
+
+ /// @brief returns the stack slot mapped to the specified virtual
+ /// register
+ int getStackSlot(unsigned virtReg) const {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ return Virt2StackSlotMap[virtReg];
+ }
+
+ /// @brief returns the rematerialization id mapped to the specified virtual
+ /// register
+ int getReMatId(unsigned virtReg) const {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ return Virt2ReMatIdMap[virtReg];
+ }
+
+ /// @brief create a mapping for the specifed virtual register to
+ /// the next available stack slot
+ int assignVirt2StackSlot(unsigned virtReg);
+ /// @brief create a mapping for the specified virtual register to
+ /// the specified stack slot
+ void assignVirt2StackSlot(unsigned virtReg, int frameIndex);
+
+ /// @brief assign an unique re-materialization id to the specified
+ /// virtual register.
+ int assignVirtReMatId(unsigned virtReg);
+ /// @brief assign an unique re-materialization id to the specified
+ /// virtual register.
+ void assignVirtReMatId(unsigned virtReg, int id);
+
+ /// @brief returns true if the specified virtual register is being
+ /// re-materialized.
+ bool isReMaterialized(unsigned virtReg) const {
+ return ReMatMap[virtReg] != NULL;
+ }
+
+ /// @brief returns the original machine instruction being re-issued
+ /// to re-materialize the specified virtual register.
+ MachineInstr *getReMaterializedMI(unsigned virtReg) const {
+ return ReMatMap[virtReg];
+ }
+
+ /// @brief records the specified virtual register will be
+ /// re-materialized and the original instruction which will be re-issed
+ /// for this purpose. If parameter all is true, then all uses of the
+ /// registers are rematerialized and it's safe to delete the definition.
+ void setVirtIsReMaterialized(unsigned virtReg, MachineInstr *def) {
+ ReMatMap[virtReg] = def;
+ }
+
+ /// @brief record the last use (kill) of a split virtual register.
+ void addKillPoint(unsigned virtReg, unsigned index) {
+ Virt2SplitKillMap[virtReg] = index;
+ }
+
+ unsigned getKillPoint(unsigned virtReg) const {
+ return Virt2SplitKillMap[virtReg];
+ }
+
+ /// @brief remove the last use (kill) of a split virtual register.
+ void removeKillPoint(unsigned virtReg) {
+ Virt2SplitKillMap[virtReg] = 0;
+ }
+
+ /// @brief returns true if the specified MachineInstr is a spill point.
+ bool isSpillPt(MachineInstr *Pt) const {
+ return SpillPt2VirtMap.find(Pt) != SpillPt2VirtMap.end();
+ }
+
+ /// @brief returns the virtual registers that should be spilled due to
+ /// splitting right after the specified MachineInstr.
+ std::vector<std::pair<unsigned,bool> > &getSpillPtSpills(MachineInstr *Pt) {
+ return SpillPt2VirtMap[Pt];
+ }
+
+ /// @brief records the specified MachineInstr as a spill point for virtReg.
+ void addSpillPoint(unsigned virtReg, bool isKill, MachineInstr *Pt) {
+ std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator
+ I = SpillPt2VirtMap.find(Pt);
+ if (I != SpillPt2VirtMap.end())
+ I->second.push_back(std::make_pair(virtReg, isKill));
+ else {
+ std::vector<std::pair<unsigned,bool> > Virts;
+ Virts.push_back(std::make_pair(virtReg, isKill));
+ SpillPt2VirtMap.insert(std::make_pair(Pt, Virts));
+ }
+ }
+
+ /// @brief - transfer spill point information from one instruction to
+ /// another.
+ void transferSpillPts(MachineInstr *Old, MachineInstr *New) {
+ std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator
+ I = SpillPt2VirtMap.find(Old);
+ if (I == SpillPt2VirtMap.end())
+ return;
+ while (!I->second.empty()) {
+ unsigned virtReg = I->second.back().first;
+ bool isKill = I->second.back().second;
+ I->second.pop_back();
+ addSpillPoint(virtReg, isKill, New);
+ }
+ SpillPt2VirtMap.erase(I);
+ }
+
+ /// @brief returns true if the specified MachineInstr is a restore point.
+ bool isRestorePt(MachineInstr *Pt) const {
+ return RestorePt2VirtMap.find(Pt) != RestorePt2VirtMap.end();
+ }
+
+ /// @brief returns the virtual registers that should be restoreed due to
+ /// splitting right after the specified MachineInstr.
+ std::vector<unsigned> &getRestorePtRestores(MachineInstr *Pt) {
+ return RestorePt2VirtMap[Pt];
+ }
+
+ /// @brief records the specified MachineInstr as a restore point for virtReg.
+ void addRestorePoint(unsigned virtReg, MachineInstr *Pt) {
+ std::map<MachineInstr*, std::vector<unsigned> >::iterator I =
+ RestorePt2VirtMap.find(Pt);
+ if (I != RestorePt2VirtMap.end())
+ I->second.push_back(virtReg);
+ else {
+ std::vector<unsigned> Virts;
+ Virts.push_back(virtReg);
+ RestorePt2VirtMap.insert(std::make_pair(Pt, Virts));
+ }
+ }
+
+ /// @brief - transfer restore point information from one instruction to
+ /// another.
+ void transferRestorePts(MachineInstr *Old, MachineInstr *New) {
+ std::map<MachineInstr*, std::vector<unsigned> >::iterator I =
+ RestorePt2VirtMap.find(Old);
+ if (I == RestorePt2VirtMap.end())
+ return;
+ while (!I->second.empty()) {
+ unsigned virtReg = I->second.back();
+ I->second.pop_back();
+ addRestorePoint(virtReg, New);
+ }
+ RestorePt2VirtMap.erase(I);
+ }
+
+ /// @brief records that the specified physical register must be spilled
+ /// around the specified machine instr.
+ void addEmergencySpill(unsigned PhysReg, MachineInstr *MI) {
+ if (EmergencySpillMap.find(MI) != EmergencySpillMap.end())
+ EmergencySpillMap[MI].push_back(PhysReg);
+ else {
+ std::vector<unsigned> PhysRegs;
+ PhysRegs.push_back(PhysReg);
+ EmergencySpillMap.insert(std::make_pair(MI, PhysRegs));
+ }
+ }
+
+ /// @brief returns true if one or more physical registers must be spilled
+ /// around the specified instruction.
+ bool hasEmergencySpills(MachineInstr *MI) const {
+ return EmergencySpillMap.find(MI) != EmergencySpillMap.end();
+ }
+
+ /// @brief returns the physical registers to be spilled and restored around
+ /// the instruction.
+ std::vector<unsigned> &getEmergencySpills(MachineInstr *MI) {
+ return EmergencySpillMap[MI];
+ }
+
+ /// @brief - transfer emergency spill information from one instruction to
+ /// another.
+ void transferEmergencySpills(MachineInstr *Old, MachineInstr *New) {
+ std::map<MachineInstr*,std::vector<unsigned> >::iterator I =
+ EmergencySpillMap.find(Old);
+ if (I == EmergencySpillMap.end())
+ return;
+ while (!I->second.empty()) {
+ unsigned virtReg = I->second.back();
+ I->second.pop_back();
+ addEmergencySpill(virtReg, New);
+ }
+ EmergencySpillMap.erase(I);
+ }
+
+ /// @brief return or get a emergency spill slot for the register class.
+ int getEmergencySpillSlot(const TargetRegisterClass *RC);
+
+ /// @brief Return lowest spill slot index.
+ int getLowSpillSlot() const {
+ return LowSpillSlot;
+ }
+
+ /// @brief Return highest spill slot index.
+ int getHighSpillSlot() const {
+ return HighSpillSlot;
+ }
+
+ /// @brief Records a spill slot use.
+ void addSpillSlotUse(int FrameIndex, MachineInstr *MI);
+
+ /// @brief Returns true if spill slot has been used.
+ bool isSpillSlotUsed(int FrameIndex) const {
+ assert(FrameIndex >= 0 && "Spill slot index should not be negative!");
+ return !SpillSlotToUsesMap[FrameIndex-LowSpillSlot].empty();
+ }
+
+ /// @brief Mark the specified register as being implicitly defined.
+ void setIsImplicitlyDefined(unsigned VirtReg) {
+ ImplicitDefed.set(VirtReg-TargetRegisterInfo::FirstVirtualRegister);
+ }
+
+ /// @brief Returns true if the virtual register is implicitly defined.
+ bool isImplicitlyDefined(unsigned VirtReg) const {
+ return ImplicitDefed[VirtReg-TargetRegisterInfo::FirstVirtualRegister];
+ }
+
+ /// @brief Updates information about the specified virtual register's value
+ /// folded into newMI machine instruction.
+ void virtFolded(unsigned VirtReg, MachineInstr *OldMI, MachineInstr *NewMI,
+ ModRef MRInfo);
+
+ /// @brief Updates information about the specified virtual register's value
+ /// folded into the specified machine instruction.
+ void virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo);
+
+ /// @brief returns the virtual registers' values folded in memory
+ /// operands of this instruction
+ std::pair<MI2VirtMapTy::const_iterator, MI2VirtMapTy::const_iterator>
+ getFoldedVirts(MachineInstr* MI) const {
+ return MI2VirtMap.equal_range(MI);
+ }
+
+ /// RemoveMachineInstrFromMaps - MI is being erased, remove it from the
+ /// the folded instruction map and spill point map.
+ void RemoveMachineInstrFromMaps(MachineInstr *MI);
+
+ /// FindUnusedRegisters - Gather a list of allocatable registers that
+ /// have not been allocated to any virtual register.
+ bool FindUnusedRegisters(const TargetRegisterInfo *TRI,
+ LiveIntervals* LIs);
+
+ /// HasUnusedRegisters - Return true if there are any allocatable registers
+ /// that have not been allocated to any virtual register.
+ bool HasUnusedRegisters() const {
+ return !UnusedRegs.none();
+ }
+
+ /// setRegisterUsed - Remember the physical register is now used.
+ void setRegisterUsed(unsigned Reg) {
+ UnusedRegs.reset(Reg);
+ }
+
+ /// isRegisterUnused - Return true if the physical register has not been
+ /// used.
+ bool isRegisterUnused(unsigned Reg) const {
+ return UnusedRegs[Reg];
+ }
+
+ /// getFirstUnusedRegister - Return the first physical register that has not
+ /// been used.
+ unsigned getFirstUnusedRegister(const TargetRegisterClass *RC) {
+ int Reg = UnusedRegs.find_first();
+ while (Reg != -1) {
+ if (allocatableRCRegs[RC][Reg])
+ return (unsigned)Reg;
+ Reg = UnusedRegs.find_next(Reg);
+ }
+ return 0;
+ }
+
+ void print(std::ostream &OS, const Module* M = 0) const;
+ void print(std::ostream *OS) const { if (OS) print(*OS); }
+ void dump() const;
+ };
+
+ inline std::ostream *operator<<(std::ostream *OS, const VirtRegMap &VRM) {
+ VRM.print(OS);
+ return OS;
+ }
+ inline std::ostream &operator<<(std::ostream &OS, const VirtRegMap &VRM) {
+ VRM.print(OS);
+ return OS;
+ }
+} // End llvm namespace
+
+#endif
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
new file mode 100644
index 0000000..b4c8bc1
--- /dev/null
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -0,0 +1,2225 @@
+//===-- llvm/CodeGen/Rewriter.cpp - Rewriter -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "virtregrewriter"
+#include "VirtRegRewriter.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumDSE , "Number of dead stores elided");
+STATISTIC(NumDSS , "Number of dead spill slots removed");
+STATISTIC(NumCommutes, "Number of instructions commuted");
+STATISTIC(NumDRM , "Number of re-materializable defs elided");
+STATISTIC(NumStores , "Number of stores added");
+STATISTIC(NumPSpills , "Number of physical register spills");
+STATISTIC(NumOmitted , "Number of reloads omited");
+STATISTIC(NumAvoided , "Number of reloads deemed unnecessary");
+STATISTIC(NumCopified, "Number of available reloads turned into copies");
+STATISTIC(NumReMats , "Number of re-materialization");
+STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumReused , "Number of values reused");
+STATISTIC(NumDCE , "Number of copies elided");
+STATISTIC(NumSUnfold , "Number of stores unfolded");
+STATISTIC(NumModRefUnfold, "Number of modref unfolded");
+
+namespace {
+ enum RewriterName { simple, local, trivial };
+}
+
+static cl::opt<RewriterName>
+RewriterOpt("rewriter",
+ cl::desc("Rewriter to use: (default: local)"),
+ cl::Prefix,
+ cl::values(clEnumVal(simple, "simple rewriter"),
+ clEnumVal(local, "local rewriter"),
+ clEnumVal(trivial, "trivial rewriter"),
+ clEnumValEnd),
+ cl::init(local));
+
+VirtRegRewriter::~VirtRegRewriter() {}
+
+
+// ****************************** //
+// Simple Spiller Implementation //
+// ****************************** //
+
+struct VISIBILITY_HIDDEN SimpleRewriter : public VirtRegRewriter {
+
+ bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
+ LiveIntervals* LIs) {
+ DOUT << "********** REWRITE MACHINE CODE **********\n";
+ DOUT << "********** Function: " << MF.getFunction()->getName() << '\n';
+ const TargetMachine &TM = MF.getTarget();
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+
+
+ // LoadedRegs - Keep track of which vregs are loaded, so that we only load
+ // each vreg once (in the case where a spilled vreg is used by multiple
+ // operands). This is always smaller than the number of operands to the
+ // current machine instr, so it should be small.
+ std::vector<unsigned> LoadedRegs;
+
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ DOUT << MBBI->getBasicBlock()->getName() << ":\n";
+ MachineBasicBlock &MBB = *MBBI;
+ for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
+ MII != E; ++MII) {
+ MachineInstr &MI = *MII;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (MO.isReg() && MO.getReg()) {
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned VirtReg = MO.getReg();
+ unsigned SubIdx = MO.getSubReg();
+ unsigned PhysReg = VRM.getPhys(VirtReg);
+ unsigned RReg = SubIdx ? TRI.getSubReg(PhysReg, SubIdx) : PhysReg;
+ if (!VRM.isAssignedReg(VirtReg)) {
+ int StackSlot = VRM.getStackSlot(VirtReg);
+ const TargetRegisterClass* RC =
+ MF.getRegInfo().getRegClass(VirtReg);
+
+ if (MO.isUse() &&
+ std::find(LoadedRegs.begin(), LoadedRegs.end(), VirtReg)
+ == LoadedRegs.end()) {
+ TII.loadRegFromStackSlot(MBB, &MI, PhysReg, StackSlot, RC);
+ MachineInstr *LoadMI = prior(MII);
+ VRM.addSpillSlotUse(StackSlot, LoadMI);
+ LoadedRegs.push_back(VirtReg);
+ ++NumLoads;
+ DOUT << '\t' << *LoadMI;
+ }
+
+ if (MO.isDef()) {
+ TII.storeRegToStackSlot(MBB, next(MII), PhysReg, true,
+ StackSlot, RC);
+ MachineInstr *StoreMI = next(MII);
+ VRM.addSpillSlotUse(StackSlot, StoreMI);
+ ++NumStores;
+ }
+ }
+ MF.getRegInfo().setPhysRegUsed(RReg);
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ } else {
+ MF.getRegInfo().setPhysRegUsed(MO.getReg());
+ }
+ }
+ }
+
+ DOUT << '\t' << MI;
+ LoadedRegs.clear();
+ }
+ }
+ return true;
+ }
+
+};
+
+/// This class is intended for use with the new spilling framework only. It
+/// rewrites vreg def/uses to use the assigned preg, but does not insert any
+/// spill code.
+struct VISIBILITY_HIDDEN TrivialRewriter : public VirtRegRewriter {
+
+ bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
+ LiveIntervals* LIs) {
+ DOUT << "********** REWRITE MACHINE CODE **********\n";
+ DOUT << "********** Function: " << MF.getFunction()->getName() << '\n';
+ MachineRegisterInfo *mri = &MF.getRegInfo();
+
+ bool changed = false;
+
+ for (LiveIntervals::iterator liItr = LIs->begin(), liEnd = LIs->end();
+ liItr != liEnd; ++liItr) {
+
+ if (TargetRegisterInfo::isVirtualRegister(liItr->first)) {
+ if (VRM.hasPhys(liItr->first)) {
+ unsigned preg = VRM.getPhys(liItr->first);
+ mri->replaceRegWith(liItr->first, preg);
+ mri->setPhysRegUsed(preg);
+ changed = true;
+ }
+ }
+ else {
+ if (!liItr->second->empty()) {
+ mri->setPhysRegUsed(liItr->first);
+ }
+ }
+ }
+
+ return changed;
+ }
+
+};
+
+// ************************************************************************ //
+
+/// AvailableSpills - As the local rewriter is scanning and rewriting an MBB
+/// from top down, keep track of which spill slots or remat are available in
+/// each register.
+///
+/// Note that not all physregs are created equal here. In particular, some
+/// physregs are reloads that we are allowed to clobber or ignore at any time.
+/// Other physregs are values that the register allocated program is using
+/// that we cannot CHANGE, but we can read if we like. We keep track of this
+/// on a per-stack-slot / remat id basis as the low bit in the value of the
+/// SpillSlotsAvailable entries. The predicate 'canClobberPhysReg()' checks
+/// this bit and addAvailable sets it if.
+class VISIBILITY_HIDDEN AvailableSpills {
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+
+ // SpillSlotsOrReMatsAvailable - This map keeps track of all of the spilled
+ // or remat'ed virtual register values that are still available, due to
+ // being loaded or stored to, but not invalidated yet.
+ std::map<int, unsigned> SpillSlotsOrReMatsAvailable;
+
+ // PhysRegsAvailable - This is the inverse of SpillSlotsOrReMatsAvailable,
+ // indicating which stack slot values are currently held by a physreg. This
+ // is used to invalidate entries in SpillSlotsOrReMatsAvailable when a
+ // physreg is modified.
+ std::multimap<unsigned, int> PhysRegsAvailable;
+
+ void disallowClobberPhysRegOnly(unsigned PhysReg);
+
+ void ClobberPhysRegOnly(unsigned PhysReg);
+public:
+ AvailableSpills(const TargetRegisterInfo *tri, const TargetInstrInfo *tii)
+ : TRI(tri), TII(tii) {
+ }
+
+ /// clear - Reset the state.
+ void clear() {
+ SpillSlotsOrReMatsAvailable.clear();
+ PhysRegsAvailable.clear();
+ }
+
+ const TargetRegisterInfo *getRegInfo() const { return TRI; }
+
+ /// getSpillSlotOrReMatPhysReg - If the specified stack slot or remat is
+ /// available in a physical register, return that PhysReg, otherwise
+ /// return 0.
+ unsigned getSpillSlotOrReMatPhysReg(int Slot) const {
+ std::map<int, unsigned>::const_iterator I =
+ SpillSlotsOrReMatsAvailable.find(Slot);
+ if (I != SpillSlotsOrReMatsAvailable.end()) {
+ return I->second >> 1; // Remove the CanClobber bit.
+ }
+ return 0;
+ }
+
+ /// addAvailable - Mark that the specified stack slot / remat is available
+ /// in the specified physreg. If CanClobber is true, the physreg can be
+ /// modified at any time without changing the semantics of the program.
+ void addAvailable(int SlotOrReMat, unsigned Reg, bool CanClobber = true) {
+ // If this stack slot is thought to be available in some other physreg,
+ // remove its record.
+ ModifyStackSlotOrReMat(SlotOrReMat);
+
+ PhysRegsAvailable.insert(std::make_pair(Reg, SlotOrReMat));
+ SpillSlotsOrReMatsAvailable[SlotOrReMat]= (Reg << 1) |
+ (unsigned)CanClobber;
+
+ if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
+ DOUT << "Remembering RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1;
+ else
+ DOUT << "Remembering SS#" << SlotOrReMat;
+ DOUT << " in physreg " << TRI->getName(Reg) << "\n";
+ }
+
+ /// canClobberPhysRegForSS - Return true if the spiller is allowed to change
+ /// the value of the specified stackslot register if it desires. The
+ /// specified stack slot must be available in a physreg for this query to
+ /// make sense.
+ bool canClobberPhysRegForSS(int SlotOrReMat) const {
+ assert(SpillSlotsOrReMatsAvailable.count(SlotOrReMat) &&
+ "Value not available!");
+ return SpillSlotsOrReMatsAvailable.find(SlotOrReMat)->second & 1;
+ }
+
+ /// canClobberPhysReg - Return true if the spiller is allowed to clobber the
+ /// physical register where values for some stack slot(s) might be
+ /// available.
+ bool canClobberPhysReg(unsigned PhysReg) const {
+ std::multimap<unsigned, int>::const_iterator I =
+ PhysRegsAvailable.lower_bound(PhysReg);
+ while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
+ int SlotOrReMat = I->second;
+ I++;
+ if (!canClobberPhysRegForSS(SlotOrReMat))
+ return false;
+ }
+ return true;
+ }
+
+ /// disallowClobberPhysReg - Unset the CanClobber bit of the specified
+ /// stackslot register. The register is still available but is no longer
+ /// allowed to be modifed.
+ void disallowClobberPhysReg(unsigned PhysReg);
+
+ /// ClobberPhysReg - This is called when the specified physreg changes
+ /// value. We use this to invalidate any info about stuff that lives in
+ /// it and any of its aliases.
+ void ClobberPhysReg(unsigned PhysReg);
+
+ /// ModifyStackSlotOrReMat - This method is called when the value in a stack
+ /// slot changes. This removes information about which register the
+ /// previous value for this slot lives in (as the previous value is dead
+ /// now).
+ void ModifyStackSlotOrReMat(int SlotOrReMat);
+
+ /// AddAvailableRegsToLiveIn - Availability information is being kept coming
+ /// into the specified MBB. Add available physical registers as potential
+ /// live-in's. If they are reused in the MBB, they will be added to the
+ /// live-in set to make register scavenger and post-allocation scheduler.
+ void AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps);
+};
+
+// ************************************************************************ //
+
+// ReusedOp - For each reused operand, we keep track of a bit of information,
+// in case we need to rollback upon processing a new operand. See comments
+// below.
+struct ReusedOp {
+ // The MachineInstr operand that reused an available value.
+ unsigned Operand;
+
+ // StackSlotOrReMat - The spill slot or remat id of the value being reused.
+ unsigned StackSlotOrReMat;
+
+ // PhysRegReused - The physical register the value was available in.
+ unsigned PhysRegReused;
+
+ // AssignedPhysReg - The physreg that was assigned for use by the reload.
+ unsigned AssignedPhysReg;
+
+ // VirtReg - The virtual register itself.
+ unsigned VirtReg;
+
+ ReusedOp(unsigned o, unsigned ss, unsigned prr, unsigned apr,
+ unsigned vreg)
+ : Operand(o), StackSlotOrReMat(ss), PhysRegReused(prr),
+ AssignedPhysReg(apr), VirtReg(vreg) {}
+};
+
+/// ReuseInfo - This maintains a collection of ReuseOp's for each operand that
+/// is reused instead of reloaded.
+class VISIBILITY_HIDDEN ReuseInfo {
+ MachineInstr &MI;
+ std::vector<ReusedOp> Reuses;
+ BitVector PhysRegsClobbered;
+public:
+ ReuseInfo(MachineInstr &mi, const TargetRegisterInfo *tri) : MI(mi) {
+ PhysRegsClobbered.resize(tri->getNumRegs());
+ }
+
+ bool hasReuses() const {
+ return !Reuses.empty();
+ }
+
+ /// addReuse - If we choose to reuse a virtual register that is already
+ /// available instead of reloading it, remember that we did so.
+ void addReuse(unsigned OpNo, unsigned StackSlotOrReMat,
+ unsigned PhysRegReused, unsigned AssignedPhysReg,
+ unsigned VirtReg) {
+ // If the reload is to the assigned register anyway, no undo will be
+ // required.
+ if (PhysRegReused == AssignedPhysReg) return;
+
+ // Otherwise, remember this.
+ Reuses.push_back(ReusedOp(OpNo, StackSlotOrReMat, PhysRegReused,
+ AssignedPhysReg, VirtReg));
+ }
+
+ void markClobbered(unsigned PhysReg) {
+ PhysRegsClobbered.set(PhysReg);
+ }
+
+ bool isClobbered(unsigned PhysReg) const {
+ return PhysRegsClobbered.test(PhysReg);
+ }
+
+ /// GetRegForReload - We are about to emit a reload into PhysReg. If there
+ /// is some other operand that is using the specified register, either pick
+ /// a new register to use, or evict the previous reload and use this reg.
+ unsigned GetRegForReload(unsigned PhysReg, MachineInstr *MI,
+ AvailableSpills &Spills,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ SmallSet<unsigned, 8> &Rejected,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM);
+
+ /// GetRegForReload - Helper for the above GetRegForReload(). Add a
+ /// 'Rejected' set to remember which registers have been considered and
+ /// rejected for the reload. This avoids infinite looping in case like
+ /// this:
+ /// t1 := op t2, t3
+ /// t2 <- assigned r0 for use by the reload but ended up reuse r1
+ /// t3 <- assigned r1 for use by the reload but ended up reuse r0
+ /// t1 <- desires r1
+ /// sees r1 is taken by t2, tries t2's reload register r0
+ /// sees r0 is taken by t3, tries t3's reload register r1
+ /// sees r1 is taken by t2, tries t2's reload register r0 ...
+ unsigned GetRegForReload(unsigned PhysReg, MachineInstr *MI,
+ AvailableSpills &Spills,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM) {
+ SmallSet<unsigned, 8> Rejected;
+ return GetRegForReload(PhysReg, MI, Spills, MaybeDeadStores, Rejected,
+ RegKills, KillOps, VRM);
+ }
+};
+
+
+// ****************** //
+// Utility Functions //
+// ****************** //
+
+/// findSinglePredSuccessor - Return via reference a vector of machine basic
+/// blocks each of which is a successor of the specified BB and has no other
+/// predecessor.
+static void findSinglePredSuccessor(MachineBasicBlock *MBB,
+ SmallVectorImpl<MachineBasicBlock *> &Succs) {
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+ if (SuccMBB->pred_size() == 1)
+ Succs.push_back(SuccMBB);
+ }
+}
+
+/// InvalidateKill - Invalidate register kill information for a specific
+/// register. This also unsets the kills marker on the last kill operand.
+static void InvalidateKill(unsigned Reg,
+ const TargetRegisterInfo* TRI,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ if (RegKills[Reg]) {
+ KillOps[Reg]->setIsKill(false);
+ KillOps[Reg] = NULL;
+ RegKills.reset(Reg);
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+ if (RegKills[*SR]) {
+ KillOps[*SR]->setIsKill(false);
+ KillOps[*SR] = NULL;
+ RegKills.reset(*SR);
+ }
+ }
+ }
+}
+
+/// InvalidateKills - MI is going to be deleted. If any of its operands are
+/// marked kill, then invalidate the information.
+static void InvalidateKills(MachineInstr &MI,
+ const TargetRegisterInfo* TRI,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ SmallVector<unsigned, 2> *KillRegs = NULL) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || !MO.isKill())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (KillRegs)
+ KillRegs->push_back(Reg);
+ assert(Reg < KillOps.size());
+ if (KillOps[Reg] == &MO) {
+ KillOps[Reg] = NULL;
+ RegKills.reset(Reg);
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+ if (RegKills[*SR]) {
+ KillOps[*SR] = NULL;
+ RegKills.reset(*SR);
+ }
+ }
+ }
+ }
+}
+
+/// InvalidateRegDef - If the def operand of the specified def MI is now dead
+/// (since it's spill instruction is removed), mark it isDead. Also checks if
+/// the def MI has other definition operands that are not dead. Returns it by
+/// reference.
+static bool InvalidateRegDef(MachineBasicBlock::iterator I,
+ MachineInstr &NewDef, unsigned Reg,
+ bool &HasLiveDef) {
+ // Due to remat, it's possible this reg isn't being reused. That is,
+ // the def of this reg (by prev MI) is now dead.
+ MachineInstr *DefMI = I;
+ MachineOperand *DefOp = NULL;
+ for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = DefMI->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ if (MO.getReg() == Reg)
+ DefOp = &MO;
+ else if (!MO.isDead())
+ HasLiveDef = true;
+ }
+ }
+ if (!DefOp)
+ return false;
+
+ bool FoundUse = false, Done = false;
+ MachineBasicBlock::iterator E = &NewDef;
+ ++I; ++E;
+ for (; !Done && I != E; ++I) {
+ MachineInstr *NMI = I;
+ for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) {
+ MachineOperand &MO = NMI->getOperand(j);
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+ if (MO.isUse())
+ FoundUse = true;
+ Done = true; // Stop after scanning all the operands of this MI.
+ }
+ }
+ if (!FoundUse) {
+ // Def is dead!
+ DefOp->setIsDead();
+ return true;
+ }
+ return false;
+}
+
+/// UpdateKills - Track and update kill info. If a MI reads a register that is
+/// marked kill, then it must be due to register reuse. Transfer the kill info
+/// over.
+static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) {
+ // That can't be right. Register is killed but not re-defined and it's
+ // being reused. Let's fix that.
+ KillOps[Reg]->setIsKill(false);
+ KillOps[Reg] = NULL;
+ RegKills.reset(Reg);
+ if (!MI.isRegTiedToDefOperand(i))
+ // Unless it's a two-address operand, this is the new kill.
+ MO.setIsKill();
+ }
+ if (MO.isKill()) {
+ RegKills.set(Reg);
+ KillOps[Reg] = &MO;
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+ RegKills.set(*SR);
+ KillOps[*SR] = &MO;
+ }
+ }
+ }
+
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ RegKills.reset(Reg);
+ KillOps[Reg] = NULL;
+ // It also defines (or partially define) aliases.
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+ RegKills.reset(*SR);
+ KillOps[*SR] = NULL;
+ }
+ }
+}
+
+/// ReMaterialize - Re-materialize definition for Reg targetting DestReg.
+///
+static void ReMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MII,
+ unsigned DestReg, unsigned Reg,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ VirtRegMap &VRM) {
+ TII->reMaterialize(MBB, MII, DestReg, VRM.getReMaterializedMI(Reg));
+ MachineInstr *NewMI = prior(MII);
+ for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = NewMI->getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ unsigned VirtReg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(VirtReg))
+ continue;
+ assert(MO.isUse());
+ unsigned SubIdx = MO.getSubReg();
+ unsigned Phys = VRM.getPhys(VirtReg);
+ assert(Phys);
+ unsigned RReg = SubIdx ? TRI->getSubReg(Phys, SubIdx) : Phys;
+ MO.setReg(RReg);
+ MO.setSubReg(0);
+ }
+ ++NumReMats;
+}
+
+/// findSuperReg - Find the SubReg's super-register of given register class
+/// where its SubIdx sub-register is SubReg.
+static unsigned findSuperReg(const TargetRegisterClass *RC, unsigned SubReg,
+ unsigned SubIdx, const TargetRegisterInfo *TRI) {
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I) {
+ unsigned Reg = *I;
+ if (TRI->getSubReg(Reg, SubIdx) == SubReg)
+ return Reg;
+ }
+ return 0;
+}
+
+// ******************************** //
+// Available Spills Implementation //
+// ******************************** //
+
+/// disallowClobberPhysRegOnly - Unset the CanClobber bit of the specified
+/// stackslot register. The register is still available but is no longer
+/// allowed to be modifed.
+void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) {
+ std::multimap<unsigned, int>::iterator I =
+ PhysRegsAvailable.lower_bound(PhysReg);
+ while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
+ int SlotOrReMat = I->second;
+ I++;
+ assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
+ "Bidirectional map mismatch!");
+ SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1;
+ DOUT << "PhysReg " << TRI->getName(PhysReg)
+ << " copied, it is available for use but can no longer be modified\n";
+ }
+}
+
+/// disallowClobberPhysReg - Unset the CanClobber bit of the specified
+/// stackslot register and its aliases. The register and its aliases may
+/// still available but is no longer allowed to be modifed.
+void AvailableSpills::disallowClobberPhysReg(unsigned PhysReg) {
+ for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS)
+ disallowClobberPhysRegOnly(*AS);
+ disallowClobberPhysRegOnly(PhysReg);
+}
+
+/// ClobberPhysRegOnly - This is called when the specified physreg changes
+/// value. We use this to invalidate any info about stuff we thing lives in it.
+void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) {
+ std::multimap<unsigned, int>::iterator I =
+ PhysRegsAvailable.lower_bound(PhysReg);
+ while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
+ int SlotOrReMat = I->second;
+ PhysRegsAvailable.erase(I++);
+ assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
+ "Bidirectional map mismatch!");
+ SpillSlotsOrReMatsAvailable.erase(SlotOrReMat);
+ DOUT << "PhysReg " << TRI->getName(PhysReg)
+ << " clobbered, invalidating ";
+ if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
+ DOUT << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 << "\n";
+ else
+ DOUT << "SS#" << SlotOrReMat << "\n";
+ }
+}
+
+/// ClobberPhysReg - This is called when the specified physreg changes
+/// value. We use this to invalidate any info about stuff we thing lives in
+/// it and any of its aliases.
+void AvailableSpills::ClobberPhysReg(unsigned PhysReg) {
+ for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS)
+ ClobberPhysRegOnly(*AS);
+ ClobberPhysRegOnly(PhysReg);
+}
+
+/// AddAvailableRegsToLiveIn - Availability information is being kept coming
+/// into the specified MBB. Add available physical registers as potential
+/// live-in's. If they are reused in the MBB, they will be added to the
+/// live-in set to make register scavenger and post-allocation scheduler.
+void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ std::set<unsigned> NotAvailable;
+ for (std::multimap<unsigned, int>::iterator
+ I = PhysRegsAvailable.begin(), E = PhysRegsAvailable.end();
+ I != E; ++I) {
+ unsigned Reg = I->first;
+ const TargetRegisterClass* RC = TRI->getPhysicalRegisterRegClass(Reg);
+ // FIXME: A temporary workaround. We can't reuse available value if it's
+ // not safe to move the def of the virtual register's class. e.g.
+ // X86::RFP* register classes. Do not add it as a live-in.
+ if (!TII->isSafeToMoveRegClassDefs(RC))
+ // This is no longer available.
+ NotAvailable.insert(Reg);
+ else {
+ MBB.addLiveIn(Reg);
+ InvalidateKill(Reg, TRI, RegKills, KillOps);
+ }
+
+ // Skip over the same register.
+ std::multimap<unsigned, int>::iterator NI = next(I);
+ while (NI != E && NI->first == Reg) {
+ ++I;
+ ++NI;
+ }
+ }
+
+ for (std::set<unsigned>::iterator I = NotAvailable.begin(),
+ E = NotAvailable.end(); I != E; ++I) {
+ ClobberPhysReg(*I);
+ for (const unsigned *SubRegs = TRI->getSubRegisters(*I);
+ *SubRegs; ++SubRegs)
+ ClobberPhysReg(*SubRegs);
+ }
+}
+
+/// ModifyStackSlotOrReMat - This method is called when the value in a stack
+/// slot changes. This removes information about which register the previous
+/// value for this slot lives in (as the previous value is dead now).
+void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) {
+ std::map<int, unsigned>::iterator It =
+ SpillSlotsOrReMatsAvailable.find(SlotOrReMat);
+ if (It == SpillSlotsOrReMatsAvailable.end()) return;
+ unsigned Reg = It->second >> 1;
+ SpillSlotsOrReMatsAvailable.erase(It);
+
+ // This register may hold the value of multiple stack slots, only remove this
+ // stack slot from the set of values the register contains.
+ std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg);
+ for (; ; ++I) {
+ assert(I != PhysRegsAvailable.end() && I->first == Reg &&
+ "Map inverse broken!");
+ if (I->second == SlotOrReMat) break;
+ }
+ PhysRegsAvailable.erase(I);
+}
+
+// ************************** //
+// Reuse Info Implementation //
+// ************************** //
+
+/// GetRegForReload - We are about to emit a reload into PhysReg. If there
+/// is some other operand that is using the specified register, either pick
+/// a new register to use, or evict the previous reload and use this reg.
+unsigned ReuseInfo::GetRegForReload(unsigned PhysReg, MachineInstr *MI,
+ AvailableSpills &Spills,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ SmallSet<unsigned, 8> &Rejected,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM) {
+ const TargetInstrInfo* TII = MI->getParent()->getParent()->getTarget()
+ .getInstrInfo();
+
+ if (Reuses.empty()) return PhysReg; // This is most often empty.
+
+ for (unsigned ro = 0, e = Reuses.size(); ro != e; ++ro) {
+ ReusedOp &Op = Reuses[ro];
+ // If we find some other reuse that was supposed to use this register
+ // exactly for its reload, we can change this reload to use ITS reload
+ // register. That is, unless its reload register has already been
+ // considered and subsequently rejected because it has also been reused
+ // by another operand.
+ if (Op.PhysRegReused == PhysReg &&
+ Rejected.count(Op.AssignedPhysReg) == 0) {
+ // Yup, use the reload register that we didn't use before.
+ unsigned NewReg = Op.AssignedPhysReg;
+ Rejected.insert(PhysReg);
+ return GetRegForReload(NewReg, MI, Spills, MaybeDeadStores, Rejected,
+ RegKills, KillOps, VRM);
+ } else {
+ // Otherwise, we might also have a problem if a previously reused
+ // value aliases the new register. If so, codegen the previous reload
+ // and use this one.
+ unsigned PRRU = Op.PhysRegReused;
+ const TargetRegisterInfo *TRI = Spills.getRegInfo();
+ if (TRI->areAliases(PRRU, PhysReg)) {
+ // Okay, we found out that an alias of a reused register
+ // was used. This isn't good because it means we have
+ // to undo a previous reuse.
+ MachineBasicBlock *MBB = MI->getParent();
+ const TargetRegisterClass *AliasRC =
+ MBB->getParent()->getRegInfo().getRegClass(Op.VirtReg);
+
+ // Copy Op out of the vector and remove it, we're going to insert an
+ // explicit load for it.
+ ReusedOp NewOp = Op;
+ Reuses.erase(Reuses.begin()+ro);
+
+ // Ok, we're going to try to reload the assigned physreg into the
+ // slot that we were supposed to in the first place. However, that
+ // register could hold a reuse. Check to see if it conflicts or
+ // would prefer us to use a different register.
+ unsigned NewPhysReg = GetRegForReload(NewOp.AssignedPhysReg,
+ MI, Spills, MaybeDeadStores,
+ Rejected, RegKills, KillOps, VRM);
+
+ MachineBasicBlock::iterator MII = MI;
+ if (NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT) {
+ ReMaterialize(*MBB, MII, NewPhysReg, NewOp.VirtReg, TII, TRI,VRM);
+ } else {
+ TII->loadRegFromStackSlot(*MBB, MII, NewPhysReg,
+ NewOp.StackSlotOrReMat, AliasRC);
+ MachineInstr *LoadMI = prior(MII);
+ VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI);
+ // Any stores to this stack slot are not dead anymore.
+ MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL;
+ ++NumLoads;
+ }
+ Spills.ClobberPhysReg(NewPhysReg);
+ Spills.ClobberPhysReg(NewOp.PhysRegReused);
+
+ unsigned SubIdx = MI->getOperand(NewOp.Operand).getSubReg();
+ unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) : NewPhysReg;
+ MI->getOperand(NewOp.Operand).setReg(RReg);
+ MI->getOperand(NewOp.Operand).setSubReg(0);
+
+ Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg);
+ --MII;
+ UpdateKills(*MII, TRI, RegKills, KillOps);
+ DOUT << '\t' << *MII;
+
+ DOUT << "Reuse undone!\n";
+ --NumReused;
+
+ // Finally, PhysReg is now available, go ahead and use it.
+ return PhysReg;
+ }
+ }
+ }
+ return PhysReg;
+}
+
+// ************************************************************************ //
+
+/// FoldsStackSlotModRef - Return true if the specified MI folds the specified
+/// stack slot mod/ref. It also checks if it's possible to unfold the
+/// instruction by having it define a specified physical register instead.
+static bool FoldsStackSlotModRef(MachineInstr &MI, int SS, unsigned PhysReg,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ VirtRegMap &VRM) {
+ if (VRM.hasEmergencySpills(&MI) || VRM.isSpillPt(&MI))
+ return false;
+
+ bool Found = false;
+ VirtRegMap::MI2VirtMapTy::const_iterator I, End;
+ for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) {
+ unsigned VirtReg = I->second.first;
+ VirtRegMap::ModRef MR = I->second.second;
+ if (MR & VirtRegMap::isModRef)
+ if (VRM.getStackSlot(VirtReg) == SS) {
+ Found= TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), true, true) != 0;
+ break;
+ }
+ }
+ if (!Found)
+ return false;
+
+ // Does the instruction uses a register that overlaps the scratch register?
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (!VRM.hasPhys(Reg))
+ continue;
+ Reg = VRM.getPhys(Reg);
+ }
+ if (TRI->regsOverlap(PhysReg, Reg))
+ return false;
+ }
+ return true;
+}
+
+/// FindFreeRegister - Find a free register of a given register class by looking
+/// at (at most) the last two machine instructions.
+static unsigned FindFreeRegister(MachineBasicBlock::iterator MII,
+ MachineBasicBlock &MBB,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ BitVector &AllocatableRegs) {
+ BitVector Defs(TRI->getNumRegs());
+ BitVector Uses(TRI->getNumRegs());
+ SmallVector<unsigned, 4> LocalUses;
+ SmallVector<unsigned, 4> Kills;
+
+ // Take a look at 2 instructions at most.
+ for (unsigned Count = 0; Count < 2; ++Count) {
+ if (MII == MBB.begin())
+ break;
+ MachineInstr *PrevMI = prior(MII);
+ for (unsigned i = 0, e = PrevMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = PrevMI->getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ unsigned Reg = MO.getReg();
+ if (MO.isDef()) {
+ Defs.set(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ Defs.set(*AS);
+ } else {
+ LocalUses.push_back(Reg);
+ if (MO.isKill() && AllocatableRegs[Reg])
+ Kills.push_back(Reg);
+ }
+ }
+
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+ unsigned Kill = Kills[i];
+ if (!Defs[Kill] && !Uses[Kill] &&
+ TRI->getPhysicalRegisterRegClass(Kill) == RC)
+ return Kill;
+ }
+ for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
+ unsigned Reg = LocalUses[i];
+ Uses.set(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ Uses.set(*AS);
+ }
+
+ MII = PrevMI;
+ }
+
+ return 0;
+}
+
+static
+void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == VirtReg)
+ MO.setReg(PhysReg);
+ }
+}
+
+namespace {
+ struct RefSorter {
+ bool operator()(const std::pair<MachineInstr*, int> &A,
+ const std::pair<MachineInstr*, int> &B) {
+ return A.second < B.second;
+ }
+ };
+}
+
+// ***************************** //
+// Local Spiller Implementation //
+// ***************************** //
+
+class VISIBILITY_HIDDEN LocalRewriter : public VirtRegRewriter {
+ MachineRegisterInfo *RegInfo;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ BitVector AllocatableRegs;
+ DenseMap<MachineInstr*, unsigned> DistanceMap;
+public:
+
+ bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
+ LiveIntervals* LIs) {
+ RegInfo = &MF.getRegInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getTarget().getInstrInfo();
+ AllocatableRegs = TRI->getAllocatableSet(MF);
+ DOUT << "\n**** Local spiller rewriting function '"
+ << MF.getFunction()->getName() << "':\n";
+ DOUT << "**** Machine Instrs (NOTE! Does not include spills and reloads!)"
+ " ****\n";
+ DEBUG(MF.dump());
+
+ // Spills - Keep track of which spilled values are available in physregs
+ // so that we can choose to reuse the physregs instead of emitting
+ // reloads. This is usually refreshed per basic block.
+ AvailableSpills Spills(TRI, TII);
+
+ // Keep track of kill information.
+ BitVector RegKills(TRI->getNumRegs());
+ std::vector<MachineOperand*> KillOps;
+ KillOps.resize(TRI->getNumRegs(), NULL);
+
+ // SingleEntrySuccs - Successor blocks which have a single predecessor.
+ SmallVector<MachineBasicBlock*, 4> SinglePredSuccs;
+ SmallPtrSet<MachineBasicBlock*,16> EarlyVisited;
+
+ // Traverse the basic blocks depth first.
+ MachineBasicBlock *Entry = MF.begin();
+ SmallPtrSet<MachineBasicBlock*,16> Visited;
+ for (df_ext_iterator<MachineBasicBlock*,
+ SmallPtrSet<MachineBasicBlock*,16> >
+ DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+ DFI != E; ++DFI) {
+ MachineBasicBlock *MBB = *DFI;
+ if (!EarlyVisited.count(MBB))
+ RewriteMBB(*MBB, VRM, LIs, Spills, RegKills, KillOps);
+
+ // If this MBB is the only predecessor of a successor. Keep the
+ // availability information and visit it next.
+ do {
+ // Keep visiting single predecessor successor as long as possible.
+ SinglePredSuccs.clear();
+ findSinglePredSuccessor(MBB, SinglePredSuccs);
+ if (SinglePredSuccs.empty())
+ MBB = 0;
+ else {
+ // FIXME: More than one successors, each of which has MBB has
+ // the only predecessor.
+ MBB = SinglePredSuccs[0];
+ if (!Visited.count(MBB) && EarlyVisited.insert(MBB)) {
+ Spills.AddAvailableRegsToLiveIn(*MBB, RegKills, KillOps);
+ RewriteMBB(*MBB, VRM, LIs, Spills, RegKills, KillOps);
+ }
+ }
+ } while (MBB);
+
+ // Clear the availability info.
+ Spills.clear();
+ }
+
+ DOUT << "**** Post Machine Instrs ****\n";
+ DEBUG(MF.dump());
+
+ // Mark unused spill slots.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ int SS = VRM.getLowSpillSlot();
+ if (SS != VirtRegMap::NO_STACK_SLOT)
+ for (int e = VRM.getHighSpillSlot(); SS <= e; ++SS)
+ if (!VRM.isSpillSlotUsed(SS)) {
+ MFI->RemoveStackObject(SS);
+ ++NumDSS;
+ }
+
+ return true;
+ }
+
+private:
+
+ /// OptimizeByUnfold2 - Unfold a series of load / store folding instructions if
+ /// a scratch register is available.
+ /// xorq %r12<kill>, %r13
+ /// addq %rax, -184(%rbp)
+ /// addq %r13, -184(%rbp)
+ /// ==>
+ /// xorq %r12<kill>, %r13
+ /// movq -184(%rbp), %r12
+ /// addq %rax, %r12
+ /// addq %r13, %r12
+ /// movq %r12, -184(%rbp)
+ bool OptimizeByUnfold2(unsigned VirtReg, int SS,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MII,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ AvailableSpills &Spills,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM) {
+
+ MachineBasicBlock::iterator NextMII = next(MII);
+ if (NextMII == MBB.end())
+ return false;
+
+ if (TII->getOpcodeAfterMemoryUnfold(MII->getOpcode(), true, true) == 0)
+ return false;
+
+ // Now let's see if the last couple of instructions happens to have freed up
+ // a register.
+ const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
+ unsigned PhysReg = FindFreeRegister(MII, MBB, RC, TRI, AllocatableRegs);
+ if (!PhysReg)
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ TRI = MF.getTarget().getRegisterInfo();
+ MachineInstr &MI = *MII;
+ if (!FoldsStackSlotModRef(MI, SS, PhysReg, TII, TRI, VRM))
+ return false;
+
+ // If the next instruction also folds the same SS modref and can be unfoled,
+ // then it's worthwhile to issue a load from SS into the free register and
+ // then unfold these instructions.
+ if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, VRM))
+ return false;
+
+ // Load from SS to the spare physical register.
+ TII->loadRegFromStackSlot(MBB, MII, PhysReg, SS, RC);
+ // This invalidates Phys.
+ Spills.ClobberPhysReg(PhysReg);
+ // Remember it's available.
+ Spills.addAvailable(SS, PhysReg);
+ MaybeDeadStores[SS] = NULL;
+
+ // Unfold current MI.
+ SmallVector<MachineInstr*, 4> NewMIs;
+ if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs))
+ assert(0 && "Unable unfold the load / store folding instruction!");
+ assert(NewMIs.size() == 1);
+ AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg);
+ VRM.transferRestorePts(&MI, NewMIs[0]);
+ MII = MBB.insert(MII, NewMIs[0]);
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+ ++NumModRefUnfold;
+
+ // Unfold next instructions that fold the same SS.
+ do {
+ MachineInstr &NextMI = *NextMII;
+ NextMII = next(NextMII);
+ NewMIs.clear();
+ if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs))
+ assert(0 && "Unable unfold the load / store folding instruction!");
+ assert(NewMIs.size() == 1);
+ AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg);
+ VRM.transferRestorePts(&NextMI, NewMIs[0]);
+ MBB.insert(NextMII, NewMIs[0]);
+ InvalidateKills(NextMI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&NextMI);
+ MBB.erase(&NextMI);
+ ++NumModRefUnfold;
+ } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, VRM));
+
+ // Store the value back into SS.
+ TII->storeRegToStackSlot(MBB, NextMII, PhysReg, true, SS, RC);
+ MachineInstr *StoreMI = prior(NextMII);
+ VRM.addSpillSlotUse(SS, StoreMI);
+ VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+
+ return true;
+ }
+
+ /// OptimizeByUnfold - Turn a store folding instruction into a load folding
+ /// instruction. e.g.
+ /// xorl %edi, %eax
+ /// movl %eax, -32(%ebp)
+ /// movl -36(%ebp), %eax
+ /// orl %eax, -32(%ebp)
+ /// ==>
+ /// xorl %edi, %eax
+ /// orl -36(%ebp), %eax
+ /// mov %eax, -32(%ebp)
+ /// This enables unfolding optimization for a subsequent instruction which will
+ /// also eliminate the newly introduced store instruction.
+ bool OptimizeByUnfold(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MII,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ AvailableSpills &Spills,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM) {
+ MachineFunction &MF = *MBB.getParent();
+ MachineInstr &MI = *MII;
+ unsigned UnfoldedOpc = 0;
+ unsigned UnfoldPR = 0;
+ unsigned UnfoldVR = 0;
+ int FoldedSS = VirtRegMap::NO_STACK_SLOT;
+ VirtRegMap::MI2VirtMapTy::const_iterator I, End;
+ for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ) {
+ // Only transform a MI that folds a single register.
+ if (UnfoldedOpc)
+ return false;
+ UnfoldVR = I->second.first;
+ VirtRegMap::ModRef MR = I->second.second;
+ // MI2VirtMap be can updated which invalidate the iterator.
+ // Increment the iterator first.
+ ++I;
+ if (VRM.isAssignedReg(UnfoldVR))
+ continue;
+ // If this reference is not a use, any previous store is now dead.
+ // Otherwise, the store to this stack slot is not dead anymore.
+ FoldedSS = VRM.getStackSlot(UnfoldVR);
+ MachineInstr* DeadStore = MaybeDeadStores[FoldedSS];
+ if (DeadStore && (MR & VirtRegMap::isModRef)) {
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS);
+ if (!PhysReg || !DeadStore->readsRegister(PhysReg))
+ continue;
+ UnfoldPR = PhysReg;
+ UnfoldedOpc = TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
+ false, true);
+ }
+ }
+
+ if (!UnfoldedOpc) {
+ if (!UnfoldVR)
+ return false;
+
+ // Look for other unfolding opportunities.
+ return OptimizeByUnfold2(UnfoldVR, FoldedSS, MBB, MII,
+ MaybeDeadStores, Spills, RegKills, KillOps, VRM);
+ }
+
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse())
+ continue;
+ unsigned VirtReg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(VirtReg) || MO.getSubReg())
+ continue;
+ if (VRM.isAssignedReg(VirtReg)) {
+ unsigned PhysReg = VRM.getPhys(VirtReg);
+ if (PhysReg && TRI->regsOverlap(PhysReg, UnfoldPR))
+ return false;
+ } else if (VRM.isReMaterialized(VirtReg))
+ continue;
+ int SS = VRM.getStackSlot(VirtReg);
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+ if (PhysReg) {
+ if (TRI->regsOverlap(PhysReg, UnfoldPR))
+ return false;
+ continue;
+ }
+ if (VRM.hasPhys(VirtReg)) {
+ PhysReg = VRM.getPhys(VirtReg);
+ if (!TRI->regsOverlap(PhysReg, UnfoldPR))
+ continue;
+ }
+
+ // Ok, we'll need to reload the value into a register which makes
+ // it impossible to perform the store unfolding optimization later.
+ // Let's see if it is possible to fold the load if the store is
+ // unfolded. This allows us to perform the store unfolding
+ // optimization.
+ SmallVector<MachineInstr*, 4> NewMIs;
+ if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) {
+ assert(NewMIs.size() == 1);
+ MachineInstr *NewMI = NewMIs.back();
+ NewMIs.clear();
+ int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false);
+ assert(Idx != -1);
+ SmallVector<unsigned, 1> Ops;
+ Ops.push_back(Idx);
+ MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, NewMI, Ops, SS);
+ if (FoldedMI) {
+ VRM.addSpillSlotUse(SS, FoldedMI);
+ if (!VRM.hasPhys(UnfoldVR))
+ VRM.assignVirt2Phys(UnfoldVR, UnfoldPR);
+ VRM.virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
+ MII = MBB.insert(MII, FoldedMI);
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+ MF.DeleteMachineInstr(NewMI);
+ return true;
+ }
+ MF.DeleteMachineInstr(NewMI);
+ }
+ }
+
+ return false;
+ }
+
+ /// CommuteToFoldReload -
+ /// Look for
+ /// r1 = load fi#1
+ /// r1 = op r1, r2<kill>
+ /// store r1, fi#1
+ ///
+ /// If op is commutable and r2 is killed, then we can xform these to
+ /// r2 = op r2, fi#1
+ /// store r2, fi#1
+ bool CommuteToFoldReload(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MII,
+ unsigned VirtReg, unsigned SrcReg, int SS,
+ AvailableSpills &Spills,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ const TargetRegisterInfo *TRI,
+ VirtRegMap &VRM) {
+ if (MII == MBB.begin() || !MII->killsRegister(SrcReg))
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineInstr &MI = *MII;
+ MachineBasicBlock::iterator DefMII = prior(MII);
+ MachineInstr *DefMI = DefMII;
+ const TargetInstrDesc &TID = DefMI->getDesc();
+ unsigned NewDstIdx;
+ if (DefMII != MBB.begin() &&
+ TID.isCommutable() &&
+ TII->CommuteChangesDestination(DefMI, NewDstIdx)) {
+ MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
+ unsigned NewReg = NewDstMO.getReg();
+ if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg))
+ return false;
+ MachineInstr *ReloadMI = prior(DefMII);
+ int FrameIdx;
+ unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx);
+ if (DestReg != SrcReg || FrameIdx != SS)
+ return false;
+ int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false);
+ if (UseIdx == -1)
+ return false;
+ unsigned DefIdx;
+ if (!MI.isRegTiedToDefOperand(UseIdx, &DefIdx))
+ return false;
+ assert(DefMI->getOperand(DefIdx).isReg() &&
+ DefMI->getOperand(DefIdx).getReg() == SrcReg);
+
+ // Now commute def instruction.
+ MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true);
+ if (!CommutedMI)
+ return false;
+ SmallVector<unsigned, 1> Ops;
+ Ops.push_back(NewDstIdx);
+ MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, CommutedMI, Ops, SS);
+ // Not needed since foldMemoryOperand returns new MI.
+ MF.DeleteMachineInstr(CommutedMI);
+ if (!FoldedMI)
+ return false;
+
+ VRM.addSpillSlotUse(SS, FoldedMI);
+ VRM.virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
+ // Insert new def MI and spill MI.
+ const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
+ TII->storeRegToStackSlot(MBB, &MI, NewReg, true, SS, RC);
+ MII = prior(MII);
+ MachineInstr *StoreMI = MII;
+ VRM.addSpillSlotUse(SS, StoreMI);
+ VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+ MII = MBB.insert(MII, FoldedMI); // Update MII to backtrack.
+
+ // Delete all 3 old instructions.
+ InvalidateKills(*ReloadMI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(ReloadMI);
+ MBB.erase(ReloadMI);
+ InvalidateKills(*DefMI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(DefMI);
+ MBB.erase(DefMI);
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+
+ // If NewReg was previously holding value of some SS, it's now clobbered.
+ // This has to be done now because it's a physical register. When this
+ // instruction is re-visited, it's ignored.
+ Spills.ClobberPhysReg(NewReg);
+
+ ++NumCommutes;
+ return true;
+ }
+
+ return false;
+ }
+
+ /// SpillRegToStackSlot - Spill a register to a specified stack slot. Check if
+ /// the last store to the same slot is now dead. If so, remove the last store.
+ void SpillRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MII,
+ int Idx, unsigned PhysReg, int StackSlot,
+ const TargetRegisterClass *RC,
+ bool isAvailable, MachineInstr *&LastStore,
+ AvailableSpills &Spills,
+ SmallSet<MachineInstr*, 4> &ReMatDefs,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM) {
+
+ TII->storeRegToStackSlot(MBB, next(MII), PhysReg, true, StackSlot, RC);
+ MachineInstr *StoreMI = next(MII);
+ VRM.addSpillSlotUse(StackSlot, StoreMI);
+ DOUT << "Store:\t" << *StoreMI;
+
+ // If there is a dead store to this stack slot, nuke it now.
+ if (LastStore) {
+ DOUT << "Removed dead store:\t" << *LastStore;
+ ++NumDSE;
+ SmallVector<unsigned, 2> KillRegs;
+ InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs);
+ MachineBasicBlock::iterator PrevMII = LastStore;
+ bool CheckDef = PrevMII != MBB.begin();
+ if (CheckDef)
+ --PrevMII;
+ VRM.RemoveMachineInstrFromMaps(LastStore);
+ MBB.erase(LastStore);
+ if (CheckDef) {
+ // Look at defs of killed registers on the store. Mark the defs
+ // as dead since the store has been deleted and they aren't
+ // being reused.
+ for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) {
+ bool HasOtherDef = false;
+ if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef)) {
+ MachineInstr *DeadDef = PrevMII;
+ if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
+ // FIXME: This assumes a remat def does not have side
+ // effects.
+ VRM.RemoveMachineInstrFromMaps(DeadDef);
+ MBB.erase(DeadDef);
+ ++NumDRM;
+ }
+ }
+ }
+ }
+ }
+
+ LastStore = next(MII);
+
+ // If the stack slot value was previously available in some other
+ // register, change it now. Otherwise, make the register available,
+ // in PhysReg.
+ Spills.ModifyStackSlotOrReMat(StackSlot);
+ Spills.ClobberPhysReg(PhysReg);
+ Spills.addAvailable(StackSlot, PhysReg, isAvailable);
+ ++NumStores;
+ }
+
+ /// TransferDeadness - A identity copy definition is dead and it's being
+ /// removed. Find the last def or use and mark it as dead / kill.
+ void TransferDeadness(MachineBasicBlock *MBB, unsigned CurDist,
+ unsigned Reg, BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM) {
+ SmallPtrSet<MachineInstr*, 4> Seens;
+ SmallVector<std::pair<MachineInstr*, int>,8> Refs;
+ for (MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(Reg),
+ RE = RegInfo->reg_end(); RI != RE; ++RI) {
+ MachineInstr *UDMI = &*RI;
+ if (UDMI->getParent() != MBB)
+ continue;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI);
+ if (DI == DistanceMap.end() || DI->second > CurDist)
+ continue;
+ if (Seens.insert(UDMI))
+ Refs.push_back(std::make_pair(UDMI, DI->second));
+ }
+
+ if (Refs.empty())
+ return;
+ std::sort(Refs.begin(), Refs.end(), RefSorter());
+
+ while (!Refs.empty()) {
+ MachineInstr *LastUDMI = Refs.back().first;
+ Refs.pop_back();
+
+ MachineOperand *LastUD = NULL;
+ for (unsigned i = 0, e = LastUDMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = LastUDMI->getOperand(i);
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+ if (!LastUD || (LastUD->isUse() && MO.isDef()))
+ LastUD = &MO;
+ if (LastUDMI->isRegTiedToDefOperand(i))
+ break;
+ }
+ if (LastUD->isDef()) {
+ // If the instruction has no side effect, delete it and propagate
+ // backward further. Otherwise, mark is dead and we are done.
+ const TargetInstrDesc &TID = LastUDMI->getDesc();
+ if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
+ TID.hasUnmodeledSideEffects()) {
+ LastUD->setIsDead();
+ break;
+ }
+ VRM.RemoveMachineInstrFromMaps(LastUDMI);
+ MBB->erase(LastUDMI);
+ } else {
+ LastUD->setIsKill();
+ RegKills.set(Reg);
+ KillOps[Reg] = LastUD;
+ break;
+ }
+ }
+ }
+
+ /// rewriteMBB - Keep track of which spills are available even after the
+ /// register allocator is done with them. If possible, avid reloading vregs.
+ void RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM,
+ LiveIntervals *LIs,
+ AvailableSpills &Spills, BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+
+ DOUT << "\n**** Local spiller rewriting MBB '"
+ << MBB.getBasicBlock()->getName() << "':\n";
+
+ MachineFunction &MF = *MBB.getParent();
+
+ // MaybeDeadStores - When we need to write a value back into a stack slot,
+ // keep track of the inserted store. If the stack slot value is never read
+ // (because the value was used from some available register, for example), and
+ // subsequently stored to, the original store is dead. This map keeps track
+ // of inserted stores that are not used. If we see a subsequent store to the
+ // same stack slot, the original store is deleted.
+ std::vector<MachineInstr*> MaybeDeadStores;
+ MaybeDeadStores.resize(MF.getFrameInfo()->getObjectIndexEnd(), NULL);
+
+ // ReMatDefs - These are rematerializable def MIs which are not deleted.
+ SmallSet<MachineInstr*, 4> ReMatDefs;
+
+ // Clear kill info.
+ SmallSet<unsigned, 2> KilledMIRegs;
+ RegKills.reset();
+ KillOps.clear();
+ KillOps.resize(TRI->getNumRegs(), NULL);
+
+ unsigned Dist = 0;
+ DistanceMap.clear();
+ for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
+ MII != E; ) {
+ MachineBasicBlock::iterator NextMII = next(MII);
+
+ VirtRegMap::MI2VirtMapTy::const_iterator I, End;
+ bool Erased = false;
+ bool BackTracked = false;
+ if (OptimizeByUnfold(MBB, MII,
+ MaybeDeadStores, Spills, RegKills, KillOps, VRM))
+ NextMII = next(MII);
+
+ MachineInstr &MI = *MII;
+
+ if (VRM.hasEmergencySpills(&MI)) {
+ // Spill physical register(s) in the rare case the allocator has run out
+ // of registers to allocate.
+ SmallSet<int, 4> UsedSS;
+ std::vector<unsigned> &EmSpills = VRM.getEmergencySpills(&MI);
+ for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) {
+ unsigned PhysReg = EmSpills[i];
+ const TargetRegisterClass *RC =
+ TRI->getPhysicalRegisterRegClass(PhysReg);
+ assert(RC && "Unable to determine register class!");
+ int SS = VRM.getEmergencySpillSlot(RC);
+ if (UsedSS.count(SS))
+ assert(0 && "Need to spill more than one physical registers!");
+ UsedSS.insert(SS);
+ TII->storeRegToStackSlot(MBB, MII, PhysReg, true, SS, RC);
+ MachineInstr *StoreMI = prior(MII);
+ VRM.addSpillSlotUse(SS, StoreMI);
+ TII->loadRegFromStackSlot(MBB, next(MII), PhysReg, SS, RC);
+ MachineInstr *LoadMI = next(MII);
+ VRM.addSpillSlotUse(SS, LoadMI);
+ ++NumPSpills;
+ }
+ NextMII = next(MII);
+ }
+
+ // Insert restores here if asked to.
+ if (VRM.isRestorePt(&MI)) {
+ std::vector<unsigned> &RestoreRegs = VRM.getRestorePtRestores(&MI);
+ for (unsigned i = 0, e = RestoreRegs.size(); i != e; ++i) {
+ unsigned VirtReg = RestoreRegs[e-i-1]; // Reverse order.
+ if (!VRM.getPreSplitReg(VirtReg))
+ continue; // Split interval spilled again.
+ unsigned Phys = VRM.getPhys(VirtReg);
+ RegInfo->setPhysRegUsed(Phys);
+
+ // Check if the value being restored if available. If so, it must be
+ // from a predecessor BB that fallthrough into this BB. We do not
+ // expect:
+ // BB1:
+ // r1 = load fi#1
+ // ...
+ // = r1<kill>
+ // ... # r1 not clobbered
+ // ...
+ // = load fi#1
+ bool DoReMat = VRM.isReMaterialized(VirtReg);
+ int SSorRMId = DoReMat
+ ? VRM.getReMatId(VirtReg) : VRM.getStackSlot(VirtReg);
+ const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
+ unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
+ if (InReg == Phys) {
+ // If the value is already available in the expected register, save
+ // a reload / remat.
+ if (SSorRMId)
+ DOUT << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1;
+ else
+ DOUT << "Reusing SS#" << SSorRMId;
+ DOUT << " from physreg "
+ << TRI->getName(InReg) << " for vreg"
+ << VirtReg <<" instead of reloading into physreg "
+ << TRI->getName(Phys) << "\n";
+ ++NumOmitted;
+ continue;
+ } else if (InReg && InReg != Phys) {
+ if (SSorRMId)
+ DOUT << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1;
+ else
+ DOUT << "Reusing SS#" << SSorRMId;
+ DOUT << " from physreg "
+ << TRI->getName(InReg) << " for vreg"
+ << VirtReg <<" by copying it into physreg "
+ << TRI->getName(Phys) << "\n";
+
+ // If the reloaded / remat value is available in another register,
+ // copy it to the desired register.
+ TII->copyRegToReg(MBB, &MI, Phys, InReg, RC, RC);
+
+ // This invalidates Phys.
+ Spills.ClobberPhysReg(Phys);
+ // Remember it's available.
+ Spills.addAvailable(SSorRMId, Phys);
+
+ // Mark is killed.
+ MachineInstr *CopyMI = prior(MII);
+ MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg);
+ KillOpnd->setIsKill();
+ UpdateKills(*CopyMI, TRI, RegKills, KillOps);
+
+ DOUT << '\t' << *CopyMI;
+ ++NumCopified;
+ continue;
+ }
+
+ if (VRM.isReMaterialized(VirtReg)) {
+ ReMaterialize(MBB, MII, Phys, VirtReg, TII, TRI, VRM);
+ } else {
+ const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
+ TII->loadRegFromStackSlot(MBB, &MI, Phys, SSorRMId, RC);
+ MachineInstr *LoadMI = prior(MII);
+ VRM.addSpillSlotUse(SSorRMId, LoadMI);
+ ++NumLoads;
+ }
+
+ // This invalidates Phys.
+ Spills.ClobberPhysReg(Phys);
+ // Remember it's available.
+ Spills.addAvailable(SSorRMId, Phys);
+
+ UpdateKills(*prior(MII), TRI, RegKills, KillOps);
+ DOUT << '\t' << *prior(MII);
+ }
+ }
+
+ // Insert spills here if asked to.
+ if (VRM.isSpillPt(&MI)) {
+ std::vector<std::pair<unsigned,bool> > &SpillRegs =
+ VRM.getSpillPtSpills(&MI);
+ for (unsigned i = 0, e = SpillRegs.size(); i != e; ++i) {
+ unsigned VirtReg = SpillRegs[i].first;
+ bool isKill = SpillRegs[i].second;
+ if (!VRM.getPreSplitReg(VirtReg))
+ continue; // Split interval spilled again.
+ const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg);
+ unsigned Phys = VRM.getPhys(VirtReg);
+ int StackSlot = VRM.getStackSlot(VirtReg);
+ TII->storeRegToStackSlot(MBB, next(MII), Phys, isKill, StackSlot, RC);
+ MachineInstr *StoreMI = next(MII);
+ VRM.addSpillSlotUse(StackSlot, StoreMI);
+ DOUT << "Store:\t" << *StoreMI;
+ VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+ }
+ NextMII = next(MII);
+ }
+
+ /// ReusedOperands - Keep track of operand reuse in case we need to undo
+ /// reuse.
+ ReuseInfo ReusedOperands(MI, TRI);
+ SmallVector<unsigned, 4> VirtUseOps;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue; // Ignore non-register operands.
+
+ unsigned VirtReg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
+ // Ignore physregs for spilling, but remember that it is used by this
+ // function.
+ RegInfo->setPhysRegUsed(VirtReg);
+ continue;
+ }
+
+ // We want to process implicit virtual register uses first.
+ if (MO.isImplicit())
+ // If the virtual register is implicitly defined, emit a implicit_def
+ // before so scavenger knows it's "defined".
+ VirtUseOps.insert(VirtUseOps.begin(), i);
+ else
+ VirtUseOps.push_back(i);
+ }
+
+ // Process all of the spilled uses and all non spilled reg references.
+ SmallVector<int, 2> PotentialDeadStoreSlots;
+ KilledMIRegs.clear();
+ for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
+ unsigned i = VirtUseOps[j];
+ MachineOperand &MO = MI.getOperand(i);
+ unsigned VirtReg = MO.getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Not a virtual register?");
+
+ unsigned SubIdx = MO.getSubReg();
+ if (VRM.isAssignedReg(VirtReg)) {
+ // This virtual register was assigned a physreg!
+ unsigned Phys = VRM.getPhys(VirtReg);
+ RegInfo->setPhysRegUsed(Phys);
+ if (MO.isDef())
+ ReusedOperands.markClobbered(Phys);
+ unsigned RReg = SubIdx ? TRI->getSubReg(Phys, SubIdx) : Phys;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ if (VRM.isImplicitlyDefined(VirtReg))
+ BuildMI(MBB, &MI, MI.getDebugLoc(),
+ TII->get(TargetInstrInfo::IMPLICIT_DEF), RReg);
+ continue;
+ }
+
+ // This virtual register is now known to be a spilled value.
+ if (!MO.isUse())
+ continue; // Handle defs in the loop below (handle use&def here though)
+
+ bool AvoidReload = false;
+ if (LIs->hasInterval(VirtReg)) {
+ LiveInterval &LI = LIs->getInterval(VirtReg);
+ if (!LI.liveAt(LIs->getUseIndex(LI.beginNumber())))
+ // Must be defined by an implicit def. It should not be spilled. Note,
+ // this is for correctness reason. e.g.
+ // 8 %reg1024<def> = IMPLICIT_DEF
+ // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
+ // The live range [12, 14) are not part of the r1024 live interval since
+ // it's defined by an implicit def. It will not conflicts with live
+ // interval of r1025. Now suppose both registers are spilled, you can
+ // easily see a situation where both registers are reloaded before
+ // the INSERT_SUBREG and both target registers that would overlap.
+ AvoidReload = true;
+ }
+
+ bool DoReMat = VRM.isReMaterialized(VirtReg);
+ int SSorRMId = DoReMat
+ ? VRM.getReMatId(VirtReg) : VRM.getStackSlot(VirtReg);
+ int ReuseSlot = SSorRMId;
+
+ // Check to see if this stack slot is available.
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
+
+ // If this is a sub-register use, make sure the reuse register is in the
+ // right register class. For example, for x86 not all of the 32-bit
+ // registers have accessible sub-registers.
+ // Similarly so for EXTRACT_SUBREG. Consider this:
+ // EDI = op
+ // MOV32_mr fi#1, EDI
+ // ...
+ // = EXTRACT_SUBREG fi#1
+ // fi#1 is available in EDI, but it cannot be reused because it's not in
+ // the right register file.
+ if (PhysReg && !AvoidReload &&
+ (SubIdx || MI.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG)) {
+ const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
+ if (!RC->contains(PhysReg))
+ PhysReg = 0;
+ }
+
+ if (PhysReg && !AvoidReload) {
+ // This spilled operand might be part of a two-address operand. If this
+ // is the case, then changing it will necessarily require changing the
+ // def part of the instruction as well. However, in some cases, we
+ // aren't allowed to modify the reused register. If none of these cases
+ // apply, reuse it.
+ bool CanReuse = true;
+ bool isTied = MI.isRegTiedToDefOperand(i);
+ if (isTied) {
+ // Okay, we have a two address operand. We can reuse this physreg as
+ // long as we are allowed to clobber the value and there isn't an
+ // earlier def that has already clobbered the physreg.
+ CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
+ Spills.canClobberPhysReg(PhysReg);
+ }
+
+ if (CanReuse) {
+ // If this stack slot value is already available, reuse it!
+ if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+ DOUT << "Reusing RM#" << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1;
+ else
+ DOUT << "Reusing SS#" << ReuseSlot;
+ DOUT << " from physreg "
+ << TRI->getName(PhysReg) << " for vreg"
+ << VirtReg <<" instead of reloading into physreg "
+ << TRI->getName(VRM.getPhys(VirtReg)) << "\n";
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+
+ // The only technical detail we have is that we don't know that
+ // PhysReg won't be clobbered by a reloaded stack slot that occurs
+ // later in the instruction. In particular, consider 'op V1, V2'.
+ // If V1 is available in physreg R0, we would choose to reuse it
+ // here, instead of reloading it into the register the allocator
+ // indicated (say R1). However, V2 might have to be reloaded
+ // later, and it might indicate that it needs to live in R0. When
+ // this occurs, we need to have information available that
+ // indicates it is safe to use R1 for the reload instead of R0.
+ //
+ // To further complicate matters, we might conflict with an alias,
+ // or R0 and R1 might not be compatible with each other. In this
+ // case, we actually insert a reload for V1 in R1, ensuring that
+ // we can get at R0 or its alias.
+ ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
+ VRM.getPhys(VirtReg), VirtReg);
+ if (isTied)
+ // Only mark it clobbered if this is a use&def operand.
+ ReusedOperands.markClobbered(PhysReg);
+ ++NumReused;
+
+ if (MI.getOperand(i).isKill() &&
+ ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
+
+ // The store of this spilled value is potentially dead, but we
+ // won't know for certain until we've confirmed that the re-use
+ // above is valid, which means waiting until the other operands
+ // are processed. For now we just track the spill slot, we'll
+ // remove it after the other operands are processed if valid.
+
+ PotentialDeadStoreSlots.push_back(ReuseSlot);
+ }
+
+ // Mark is isKill if it's there no other uses of the same virtual
+ // register and it's not a two-address operand. IsKill will be
+ // unset if reg is reused.
+ if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
+ MI.getOperand(i).setIsKill();
+ KilledMIRegs.insert(VirtReg);
+ }
+
+ continue;
+ } // CanReuse
+
+ // Otherwise we have a situation where we have a two-address instruction
+ // whose mod/ref operand needs to be reloaded. This reload is already
+ // available in some register "PhysReg", but if we used PhysReg as the
+ // operand to our 2-addr instruction, the instruction would modify
+ // PhysReg. This isn't cool if something later uses PhysReg and expects
+ // to get its initial value.
+ //
+ // To avoid this problem, and to avoid doing a load right after a store,
+ // we emit a copy from PhysReg into the designated register for this
+ // operand.
+ unsigned DesignatedReg = VRM.getPhys(VirtReg);
+ assert(DesignatedReg && "Must map virtreg to physreg!");
+
+ // Note that, if we reused a register for a previous operand, the
+ // register we want to reload into might not actually be
+ // available. If this occurs, use the register indicated by the
+ // reuser.
+ if (ReusedOperands.hasReuses())
+ DesignatedReg = ReusedOperands.GetRegForReload(DesignatedReg, &MI,
+ Spills, MaybeDeadStores, RegKills, KillOps, VRM);
+
+ // If the mapped designated register is actually the physreg we have
+ // incoming, we don't need to inserted a dead copy.
+ if (DesignatedReg == PhysReg) {
+ // If this stack slot value is already available, reuse it!
+ if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+ DOUT << "Reusing RM#" << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1;
+ else
+ DOUT << "Reusing SS#" << ReuseSlot;
+ DOUT << " from physreg " << TRI->getName(PhysReg)
+ << " for vreg" << VirtReg
+ << " instead of reloading into same physreg.\n";
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ ReusedOperands.markClobbered(RReg);
+ ++NumReused;
+ continue;
+ }
+
+ const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
+ RegInfo->setPhysRegUsed(DesignatedReg);
+ ReusedOperands.markClobbered(DesignatedReg);
+ TII->copyRegToReg(MBB, &MI, DesignatedReg, PhysReg, RC, RC);
+
+ MachineInstr *CopyMI = prior(MII);
+ UpdateKills(*CopyMI, TRI, RegKills, KillOps);
+
+ // This invalidates DesignatedReg.
+ Spills.ClobberPhysReg(DesignatedReg);
+
+ Spills.addAvailable(ReuseSlot, DesignatedReg);
+ unsigned RReg =
+ SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ DOUT << '\t' << *prior(MII);
+ ++NumReused;
+ continue;
+ } // if (PhysReg)
+
+ // Otherwise, reload it and remember that we have it.
+ PhysReg = VRM.getPhys(VirtReg);
+ assert(PhysReg && "Must map virtreg to physreg!");
+
+ // Note that, if we reused a register for a previous operand, the
+ // register we want to reload into might not actually be
+ // available. If this occurs, use the register indicated by the
+ // reuser.
+ if (ReusedOperands.hasReuses())
+ PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI,
+ Spills, MaybeDeadStores, RegKills, KillOps, VRM);
+
+ RegInfo->setPhysRegUsed(PhysReg);
+ ReusedOperands.markClobbered(PhysReg);
+ if (AvoidReload)
+ ++NumAvoided;
+ else {
+ if (DoReMat) {
+ ReMaterialize(MBB, MII, PhysReg, VirtReg, TII, TRI, VRM);
+ } else {
+ const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
+ TII->loadRegFromStackSlot(MBB, &MI, PhysReg, SSorRMId, RC);
+ MachineInstr *LoadMI = prior(MII);
+ VRM.addSpillSlotUse(SSorRMId, LoadMI);
+ ++NumLoads;
+ }
+ // This invalidates PhysReg.
+ Spills.ClobberPhysReg(PhysReg);
+
+ // Any stores to this stack slot are not dead anymore.
+ if (!DoReMat)
+ MaybeDeadStores[SSorRMId] = NULL;
+ Spills.addAvailable(SSorRMId, PhysReg);
+ // Assumes this is the last use. IsKill will be unset if reg is reused
+ // unless it's a two-address operand.
+ if (!MI.isRegTiedToDefOperand(i) &&
+ KilledMIRegs.count(VirtReg) == 0) {
+ MI.getOperand(i).setIsKill();
+ KilledMIRegs.insert(VirtReg);
+ }
+
+ UpdateKills(*prior(MII), TRI, RegKills, KillOps);
+ DOUT << '\t' << *prior(MII);
+ }
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ }
+
+ // Ok - now we can remove stores that have been confirmed dead.
+ for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
+ // This was the last use and the spilled value is still available
+ // for reuse. That means the spill was unnecessary!
+ int PDSSlot = PotentialDeadStoreSlots[j];
+ MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
+ if (DeadStore) {
+ DOUT << "Removed dead store:\t" << *DeadStore;
+ InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(DeadStore);
+ MBB.erase(DeadStore);
+ MaybeDeadStores[PDSSlot] = NULL;
+ ++NumDSE;
+ }
+ }
+
+
+ DOUT << '\t' << MI;
+
+
+ // If we have folded references to memory operands, make sure we clear all
+ // physical registers that may contain the value of the spilled virtual
+ // register
+ SmallSet<int, 2> FoldedSS;
+ for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ) {
+ unsigned VirtReg = I->second.first;
+ VirtRegMap::ModRef MR = I->second.second;
+ DOUT << "Folded vreg: " << VirtReg << " MR: " << MR;
+
+ // MI2VirtMap be can updated which invalidate the iterator.
+ // Increment the iterator first.
+ ++I;
+ int SS = VRM.getStackSlot(VirtReg);
+ if (SS == VirtRegMap::NO_STACK_SLOT)
+ continue;
+ FoldedSS.insert(SS);
+ DOUT << " - StackSlot: " << SS << "\n";
+
+ // If this folded instruction is just a use, check to see if it's a
+ // straight load from the virt reg slot.
+ if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) {
+ int FrameIdx;
+ unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx);
+ if (DestReg && FrameIdx == SS) {
+ // If this spill slot is available, turn it into a copy (or nothing)
+ // instead of leaving it as a load!
+ if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) {
+ DOUT << "Promoted Load To Copy: " << MI;
+ if (DestReg != InReg) {
+ const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg);
+ TII->copyRegToReg(MBB, &MI, DestReg, InReg, RC, RC);
+ MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg);
+ unsigned SubIdx = DefMO->getSubReg();
+ // Revisit the copy so we make sure to notice the effects of the
+ // operation on the destreg (either needing to RA it if it's
+ // virtual or needing to clobber any values if it's physical).
+ NextMII = &MI;
+ --NextMII; // backtrack to the copy.
+ // Propagate the sub-register index over.
+ if (SubIdx) {
+ DefMO = NextMII->findRegisterDefOperand(DestReg);
+ DefMO->setSubReg(SubIdx);
+ }
+
+ // Mark is killed.
+ MachineOperand *KillOpnd = NextMII->findRegisterUseOperand(InReg);
+ KillOpnd->setIsKill();
+
+ BackTracked = true;
+ } else {
+ DOUT << "Removing now-noop copy: " << MI;
+ // Unset last kill since it's being reused.
+ InvalidateKill(InReg, TRI, RegKills, KillOps);
+ Spills.disallowClobberPhysReg(InReg);
+ }
+
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+ Erased = true;
+ goto ProcessNextInst;
+ }
+ } else {
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+ SmallVector<MachineInstr*, 4> NewMIs;
+ if (PhysReg &&
+ TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)) {
+ MBB.insert(MII, NewMIs[0]);
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+ Erased = true;
+ --NextMII; // backtrack to the unfolded instruction.
+ BackTracked = true;
+ goto ProcessNextInst;
+ }
+ }
+ }
+
+ // If this reference is not a use, any previous store is now dead.
+ // Otherwise, the store to this stack slot is not dead anymore.
+ MachineInstr* DeadStore = MaybeDeadStores[SS];
+ if (DeadStore) {
+ bool isDead = !(MR & VirtRegMap::isRef);
+ MachineInstr *NewStore = NULL;
+ if (MR & VirtRegMap::isModRef) {
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+ SmallVector<MachineInstr*, 4> NewMIs;
+ // We can reuse this physreg as long as we are allowed to clobber
+ // the value and there isn't an earlier def that has already clobbered
+ // the physreg.
+ if (PhysReg &&
+ !ReusedOperands.isClobbered(PhysReg) &&
+ Spills.canClobberPhysReg(PhysReg) &&
+ !TII->isStoreToStackSlot(&MI, SS)) { // Not profitable!
+ MachineOperand *KillOpnd =
+ DeadStore->findRegisterUseOperand(PhysReg, true);
+ // Note, if the store is storing a sub-register, it's possible the
+ // super-register is needed below.
+ if (KillOpnd && !KillOpnd->getSubReg() &&
+ TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, true,NewMIs)){
+ MBB.insert(MII, NewMIs[0]);
+ NewStore = NewMIs[1];
+ MBB.insert(MII, NewStore);
+ VRM.addSpillSlotUse(SS, NewStore);
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+ Erased = true;
+ --NextMII;
+ --NextMII; // backtrack to the unfolded instruction.
+ BackTracked = true;
+ isDead = true;
+ ++NumSUnfold;
+ }
+ }
+ }
+
+ if (isDead) { // Previous store is dead.
+ // If we get here, the store is dead, nuke it now.
+ DOUT << "Removed dead store:\t" << *DeadStore;
+ InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(DeadStore);
+ MBB.erase(DeadStore);
+ if (!NewStore)
+ ++NumDSE;
+ }
+
+ MaybeDeadStores[SS] = NULL;
+ if (NewStore) {
+ // Treat this store as a spill merged into a copy. That makes the
+ // stack slot value available.
+ VRM.virtFolded(VirtReg, NewStore, VirtRegMap::isMod);
+ goto ProcessNextInst;
+ }
+ }
+
+ // If the spill slot value is available, and this is a new definition of
+ // the value, the value is not available anymore.
+ if (MR & VirtRegMap::isMod) {
+ // Notice that the value in this stack slot has been modified.
+ Spills.ModifyStackSlotOrReMat(SS);
+
+ // If this is *just* a mod of the value, check to see if this is just a
+ // store to the spill slot (i.e. the spill got merged into the copy). If
+ // so, realize that the vreg is available now, and add the store to the
+ // MaybeDeadStore info.
+ int StackSlot;
+ if (!(MR & VirtRegMap::isRef)) {
+ if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) {
+ assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ "Src hasn't been allocated yet?");
+
+ if (CommuteToFoldReload(MBB, MII, VirtReg, SrcReg, StackSlot,
+ Spills, RegKills, KillOps, TRI, VRM)) {
+ NextMII = next(MII);
+ BackTracked = true;
+ goto ProcessNextInst;
+ }
+
+ // Okay, this is certainly a store of SrcReg to [StackSlot]. Mark
+ // this as a potentially dead store in case there is a subsequent
+ // store into the stack slot without a read from it.
+ MaybeDeadStores[StackSlot] = &MI;
+
+ // If the stack slot value was previously available in some other
+ // register, change it now. Otherwise, make the register
+ // available in PhysReg.
+ Spills.addAvailable(StackSlot, SrcReg, MI.killsRegister(SrcReg));
+ }
+ }
+ }
+ }
+
+ // Process all of the spilled defs.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!(MO.isReg() && MO.getReg() && MO.isDef()))
+ continue;
+
+ unsigned VirtReg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+ // Check to see if this is a noop copy. If so, eliminate the
+ // instruction before considering the dest reg to be changed.
+ unsigned Src, Dst, SrcSR, DstSR;
+ if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) {
+ ++NumDCE;
+ DOUT << "Removing now-noop copy: " << MI;
+ SmallVector<unsigned, 2> KillRegs;
+ InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs);
+ if (MO.isDead() && !KillRegs.empty()) {
+ // Source register or an implicit super/sub-register use is killed.
+ assert(KillRegs[0] == Dst ||
+ TRI->isSubRegister(KillRegs[0], Dst) ||
+ TRI->isSuperRegister(KillRegs[0], Dst));
+ // Last def is now dead.
+ TransferDeadness(&MBB, Dist, Src, RegKills, KillOps, VRM);
+ }
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+ Erased = true;
+ Spills.disallowClobberPhysReg(VirtReg);
+ goto ProcessNextInst;
+ }
+
+ // If it's not a no-op copy, it clobbers the value in the destreg.
+ Spills.ClobberPhysReg(VirtReg);
+ ReusedOperands.markClobbered(VirtReg);
+
+ // Check to see if this instruction is a load from a stack slot into
+ // a register. If so, this provides the stack slot value in the reg.
+ int FrameIdx;
+ if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) {
+ assert(DestReg == VirtReg && "Unknown load situation!");
+
+ // If it is a folded reference, then it's not safe to clobber.
+ bool Folded = FoldedSS.count(FrameIdx);
+ // Otherwise, if it wasn't available, remember that it is now!
+ Spills.addAvailable(FrameIdx, DestReg, !Folded);
+ goto ProcessNextInst;
+ }
+
+ continue;
+ }
+
+ unsigned SubIdx = MO.getSubReg();
+ bool DoReMat = VRM.isReMaterialized(VirtReg);
+ if (DoReMat)
+ ReMatDefs.insert(&MI);
+
+ // The only vregs left are stack slot definitions.
+ int StackSlot = VRM.getStackSlot(VirtReg);
+ const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg);
+
+ // If this def is part of a two-address operand, make sure to execute
+ // the store from the correct physical register.
+ unsigned PhysReg;
+ unsigned TiedOp;
+ if (MI.isRegTiedToUseOperand(i, &TiedOp)) {
+ PhysReg = MI.getOperand(TiedOp).getReg();
+ if (SubIdx) {
+ unsigned SuperReg = findSuperReg(RC, PhysReg, SubIdx, TRI);
+ assert(SuperReg && TRI->getSubReg(SuperReg, SubIdx) == PhysReg &&
+ "Can't find corresponding super-register!");
+ PhysReg = SuperReg;
+ }
+ } else {
+ PhysReg = VRM.getPhys(VirtReg);
+ if (ReusedOperands.isClobbered(PhysReg)) {
+ // Another def has taken the assigned physreg. It must have been a
+ // use&def which got it due to reuse. Undo the reuse!
+ PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI,
+ Spills, MaybeDeadStores, RegKills, KillOps, VRM);
+ }
+ }
+
+ assert(PhysReg && "VR not assigned a physical register?");
+ RegInfo->setPhysRegUsed(PhysReg);
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ ReusedOperands.markClobbered(RReg);
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+
+ if (!MO.isDead()) {
+ MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
+ SpillRegToStackSlot(MBB, MII, -1, PhysReg, StackSlot, RC, true,
+ LastStore, Spills, ReMatDefs, RegKills, KillOps, VRM);
+ NextMII = next(MII);
+
+ // Check to see if this is a noop copy. If so, eliminate the
+ // instruction before considering the dest reg to be changed.
+ {
+ unsigned Src, Dst, SrcSR, DstSR;
+ if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) {
+ ++NumDCE;
+ DOUT << "Removing now-noop copy: " << MI;
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+ Erased = true;
+ UpdateKills(*LastStore, TRI, RegKills, KillOps);
+ goto ProcessNextInst;
+ }
+ }
+ }
+ }
+ ProcessNextInst:
+ DistanceMap.insert(std::make_pair(&MI, Dist++));
+ if (!Erased && !BackTracked) {
+ for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II)
+ UpdateKills(*II, TRI, RegKills, KillOps);
+ }
+ MII = NextMII;
+ }
+
+ }
+
+};
+
+llvm::VirtRegRewriter* llvm::createVirtRegRewriter() {
+ switch (RewriterOpt) {
+ default: assert(0 && "Unreachable!");
+ case local:
+ return new LocalRewriter();
+ case simple:
+ return new SimpleRewriter();
+ case trivial:
+ return new TrivialRewriter();
+ }
+}
diff --git a/lib/CodeGen/VirtRegRewriter.h b/lib/CodeGen/VirtRegRewriter.h
new file mode 100644
index 0000000..bc830f7
--- /dev/null
+++ b/lib/CodeGen/VirtRegRewriter.h
@@ -0,0 +1,56 @@
+//===-- llvm/CodeGen/VirtRegRewriter.h - VirtRegRewriter -*- C++ -*--------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_VIRTREGREWRITER_H
+#define LLVM_CODEGEN_VIRTREGREWRITER_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "VirtRegMap.h"
+#include <map>
+
+// TODO:
+// - Finish renaming Spiller -> Rewriter
+// - SimpleSpiller
+// - LocalSpiller
+
+namespace llvm {
+
+ /// VirtRegRewriter interface: Implementations of this interface assign
+ /// spilled virtual registers to stack slots, rewriting the code.
+ struct VirtRegRewriter {
+ virtual ~VirtRegRewriter();
+ virtual bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
+ LiveIntervals* LIs) = 0;
+ };
+
+ /// createVirtRegRewriter - Create an return a rewriter object, as specified
+ /// on the command line.
+ VirtRegRewriter* createVirtRegRewriter();
+
+}
+
+#endif
diff --git a/lib/CompilerDriver/Action.cpp b/lib/CompilerDriver/Action.cpp
new file mode 100644
index 0000000..c0a1b84
--- /dev/null
+++ b/lib/CompilerDriver/Action.cpp
@@ -0,0 +1,78 @@
+//===--- Action.cpp - The LLVM Compiler Driver ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Action class - implementation and auxiliary functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/Action.h"
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/System/Program.h"
+
+#include <iostream>
+#include <stdexcept>
+
+using namespace llvm;
+using namespace llvmc;
+
+extern cl::opt<bool> DryRun;
+extern cl::opt<bool> VerboseMode;
+
+namespace {
+ int ExecuteProgram(const std::string& name,
+ const StrVector& args) {
+ sys::Path prog = sys::Program::FindProgramByName(name);
+
+ if (prog.isEmpty())
+ throw std::runtime_error("Can't find program '" + name + "'");
+ if (!prog.canExecute())
+ throw std::runtime_error("Program '" + name + "' is not executable.");
+
+ // Build the command line vector and the redirects array.
+ const sys::Path* redirects[3] = {0,0,0};
+ sys::Path stdout_redirect;
+
+ std::vector<const char*> argv;
+ argv.reserve((args.size()+2));
+ argv.push_back(name.c_str());
+
+ for (StrVector::const_iterator B = args.begin(), E = args.end();
+ B!=E; ++B) {
+ if (*B == ">") {
+ ++B;
+ stdout_redirect.set(*B);
+ redirects[1] = &stdout_redirect;
+ }
+ else {
+ argv.push_back((*B).c_str());
+ }
+ }
+ argv.push_back(0); // null terminate list.
+
+ // Invoke the program.
+ return sys::Program::ExecuteAndWait(prog, &argv[0], 0, &redirects[0]);
+ }
+
+ void print_string (const std::string& str) {
+ std::cerr << str << ' ';
+ }
+}
+
+int llvmc::Action::Execute() const {
+ if (DryRun || VerboseMode) {
+ std::cerr << Command_ << " ";
+ std::for_each(Args_.begin(), Args_.end(), print_string);
+ std::cerr << '\n';
+ }
+ if (DryRun)
+ return 0;
+ else
+ return ExecuteProgram(Command_, Args_);
+}
diff --git a/lib/CompilerDriver/CMakeLists.txt b/lib/CompilerDriver/CMakeLists.txt
new file mode 100644
index 0000000..153dd44
--- /dev/null
+++ b/lib/CompilerDriver/CMakeLists.txt
@@ -0,0 +1,10 @@
+set(LLVM_LINK_COMPONENTS support system)
+set(LLVM_REQUIRES_EH 1)
+
+add_llvm_tool(llvmc
+ Action.cpp
+ CompilationGraph.cpp
+ llvmc.cpp
+ Plugin.cpp
+ Tool.cpp
+ )
diff --git a/lib/CompilerDriver/CompilationGraph.cpp b/lib/CompilerDriver/CompilationGraph.cpp
new file mode 100644
index 0000000..dece4e8
--- /dev/null
+++ b/lib/CompilerDriver/CompilationGraph.cpp
@@ -0,0 +1,536 @@
+//===--- CompilationGraph.cpp - The LLVM Compiler Driver --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Compilation graph - implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/CompilationGraph.h"
+#include "llvm/CompilerDriver/Error.h"
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/GraphWriter.h"
+
+#include <algorithm>
+#include <cstring>
+#include <iostream>
+#include <iterator>
+#include <limits>
+#include <queue>
+#include <stdexcept>
+
+using namespace llvm;
+using namespace llvmc;
+
+extern cl::list<std::string> InputFilenames;
+extern cl::list<std::string> Languages;
+
+namespace llvmc {
+
+ const std::string& LanguageMap::GetLanguage(const sys::Path& File) const {
+ LanguageMap::const_iterator Lang = this->find(File.getSuffix());
+ if (Lang == this->end())
+ throw std::runtime_error("Unknown suffix: " + File.getSuffix());
+ return Lang->second;
+ }
+}
+
+namespace {
+
+ /// ChooseEdge - Return the edge with the maximum weight.
+ template <class C>
+ const Edge* ChooseEdge(const C& EdgesContainer,
+ const InputLanguagesSet& InLangs,
+ const std::string& NodeName = "root") {
+ const Edge* MaxEdge = 0;
+ unsigned MaxWeight = 0;
+ bool SingleMax = true;
+
+ for (typename C::const_iterator B = EdgesContainer.begin(),
+ E = EdgesContainer.end(); B != E; ++B) {
+ const Edge* e = B->getPtr();
+ unsigned EW = e->Weight(InLangs);
+ if (EW > MaxWeight) {
+ MaxEdge = e;
+ MaxWeight = EW;
+ SingleMax = true;
+ } else if (EW == MaxWeight) {
+ SingleMax = false;
+ }
+ }
+
+ if (!SingleMax)
+ throw std::runtime_error("Node " + NodeName +
+ ": multiple maximal outward edges found!"
+ " Most probably a specification error.");
+ if (!MaxEdge)
+ throw std::runtime_error("Node " + NodeName +
+ ": no maximal outward edge found!"
+ " Most probably a specification error.");
+ return MaxEdge;
+ }
+
+}
+
+void Node::AddEdge(Edge* Edg) {
+ // If there already was an edge between two nodes, modify it instead
+ // of adding a new edge.
+ const std::string& ToolName = Edg->ToolName();
+ for (container_type::iterator B = OutEdges.begin(), E = OutEdges.end();
+ B != E; ++B) {
+ if ((*B)->ToolName() == ToolName) {
+ llvm::IntrusiveRefCntPtr<Edge>(Edg).swap(*B);
+ return;
+ }
+ }
+ OutEdges.push_back(llvm::IntrusiveRefCntPtr<Edge>(Edg));
+}
+
+CompilationGraph::CompilationGraph() {
+ NodesMap["root"] = Node(this);
+}
+
+Node& CompilationGraph::getNode(const std::string& ToolName) {
+ nodes_map_type::iterator I = NodesMap.find(ToolName);
+ if (I == NodesMap.end())
+ throw std::runtime_error("Node " + ToolName + " is not in the graph");
+ return I->second;
+}
+
+const Node& CompilationGraph::getNode(const std::string& ToolName) const {
+ nodes_map_type::const_iterator I = NodesMap.find(ToolName);
+ if (I == NodesMap.end())
+ throw std::runtime_error("Node " + ToolName + " is not in the graph!");
+ return I->second;
+}
+
+// Find the tools list corresponding to the given language name.
+const CompilationGraph::tools_vector_type&
+CompilationGraph::getToolsVector(const std::string& LangName) const
+{
+ tools_map_type::const_iterator I = ToolsMap.find(LangName);
+ if (I == ToolsMap.end())
+ throw std::runtime_error("No tool corresponding to the language "
+ + LangName + " found");
+ return I->second;
+}
+
+void CompilationGraph::insertNode(Tool* V) {
+ if (NodesMap.count(V->Name()) == 0)
+ NodesMap[V->Name()] = Node(this, V);
+}
+
+void CompilationGraph::insertEdge(const std::string& A, Edge* Edg) {
+ Node& B = getNode(Edg->ToolName());
+ if (A == "root") {
+ const char** InLangs = B.ToolPtr->InputLanguages();
+ for (;*InLangs; ++InLangs)
+ ToolsMap[*InLangs].push_back(IntrusiveRefCntPtr<Edge>(Edg));
+ NodesMap["root"].AddEdge(Edg);
+ }
+ else {
+ Node& N = getNode(A);
+ N.AddEdge(Edg);
+ }
+ // Increase the inward edge counter.
+ B.IncrInEdges();
+}
+
+// Pass input file through the chain until we bump into a Join node or
+// a node that says that it is the last.
+void CompilationGraph::PassThroughGraph (const sys::Path& InFile,
+ const Node* StartNode,
+ const InputLanguagesSet& InLangs,
+ const sys::Path& TempDir,
+ const LanguageMap& LangMap) const {
+ sys::Path In = InFile;
+ const Node* CurNode = StartNode;
+
+ while(true) {
+ Tool* CurTool = CurNode->ToolPtr.getPtr();
+
+ if (CurTool->IsJoin()) {
+ JoinTool& JT = dynamic_cast<JoinTool&>(*CurTool);
+ JT.AddToJoinList(In);
+ break;
+ }
+
+ Action CurAction = CurTool->GenerateAction(In, CurNode->HasChildren(),
+ TempDir, InLangs, LangMap);
+
+ if (int ret = CurAction.Execute())
+ throw error_code(ret);
+
+ if (CurAction.StopCompilation())
+ return;
+
+ CurNode = &getNode(ChooseEdge(CurNode->OutEdges,
+ InLangs,
+ CurNode->Name())->ToolName());
+ In = CurAction.OutFile();
+ }
+}
+
+// Find the head of the toolchain corresponding to the given file.
+// Also, insert an input language into InLangs.
+const Node* CompilationGraph::
+FindToolChain(const sys::Path& In, const std::string* ForceLanguage,
+ InputLanguagesSet& InLangs, const LanguageMap& LangMap) const {
+
+ // Determine the input language.
+ const std::string& InLanguage =
+ ForceLanguage ? *ForceLanguage : LangMap.GetLanguage(In);
+
+ // Add the current input language to the input language set.
+ InLangs.insert(InLanguage);
+
+ // Find the toolchain for the input language.
+ const tools_vector_type& TV = getToolsVector(InLanguage);
+ if (TV.empty())
+ throw std::runtime_error("No toolchain corresponding to language "
+ + InLanguage + " found");
+ return &getNode(ChooseEdge(TV, InLangs)->ToolName());
+}
+
+// Helper function used by Build().
+// Traverses initial portions of the toolchains (up to the first Join node).
+// This function is also responsible for handling the -x option.
+void CompilationGraph::BuildInitial (InputLanguagesSet& InLangs,
+ const sys::Path& TempDir,
+ const LanguageMap& LangMap) {
+ // This is related to -x option handling.
+ cl::list<std::string>::const_iterator xIter = Languages.begin(),
+ xBegin = xIter, xEnd = Languages.end();
+ bool xEmpty = true;
+ const std::string* xLanguage = 0;
+ unsigned xPos = 0, xPosNext = 0, filePos = 0;
+
+ if (xIter != xEnd) {
+ xEmpty = false;
+ xPos = Languages.getPosition(xIter - xBegin);
+ cl::list<std::string>::const_iterator xNext = llvm::next(xIter);
+ xPosNext = (xNext == xEnd) ? std::numeric_limits<unsigned>::max()
+ : Languages.getPosition(xNext - xBegin);
+ xLanguage = (*xIter == "none") ? 0 : &(*xIter);
+ }
+
+ // For each input file:
+ for (cl::list<std::string>::const_iterator B = InputFilenames.begin(),
+ CB = B, E = InputFilenames.end(); B != E; ++B) {
+ sys::Path In = sys::Path(*B);
+
+ // Code for handling the -x option.
+ // Output: std::string* xLanguage (can be NULL).
+ if (!xEmpty) {
+ filePos = InputFilenames.getPosition(B - CB);
+
+ if (xPos < filePos) {
+ if (filePos < xPosNext) {
+ xLanguage = (*xIter == "none") ? 0 : &(*xIter);
+ }
+ else { // filePos >= xPosNext
+ // Skip xIters while filePos > xPosNext
+ while (filePos > xPosNext) {
+ ++xIter;
+ xPos = xPosNext;
+
+ cl::list<std::string>::const_iterator xNext = llvm::next(xIter);
+ if (xNext == xEnd)
+ xPosNext = std::numeric_limits<unsigned>::max();
+ else
+ xPosNext = Languages.getPosition(xNext - xBegin);
+ xLanguage = (*xIter == "none") ? 0 : &(*xIter);
+ }
+ }
+ }
+ }
+
+ // Find the toolchain corresponding to this file.
+ const Node* N = FindToolChain(In, xLanguage, InLangs, LangMap);
+ // Pass file through the chain starting at head.
+ PassThroughGraph(In, N, InLangs, TempDir, LangMap);
+ }
+}
+
+// Sort the nodes in topological order.
+void CompilationGraph::TopologicalSort(std::vector<const Node*>& Out) {
+ std::queue<const Node*> Q;
+ Q.push(&getNode("root"));
+
+ while (!Q.empty()) {
+ const Node* A = Q.front();
+ Q.pop();
+ Out.push_back(A);
+ for (Node::const_iterator EB = A->EdgesBegin(), EE = A->EdgesEnd();
+ EB != EE; ++EB) {
+ Node* B = &getNode((*EB)->ToolName());
+ B->DecrInEdges();
+ if (B->HasNoInEdges())
+ Q.push(B);
+ }
+ }
+}
+
+namespace {
+ bool NotJoinNode(const Node* N) {
+ return N->ToolPtr ? !N->ToolPtr->IsJoin() : true;
+ }
+}
+
+// Call TopologicalSort and filter the resulting list to include
+// only Join nodes.
+void CompilationGraph::
+TopologicalSortFilterJoinNodes(std::vector<const Node*>& Out) {
+ std::vector<const Node*> TopSorted;
+ TopologicalSort(TopSorted);
+ std::remove_copy_if(TopSorted.begin(), TopSorted.end(),
+ std::back_inserter(Out), NotJoinNode);
+}
+
+int CompilationGraph::Build (const sys::Path& TempDir,
+ const LanguageMap& LangMap) {
+
+ InputLanguagesSet InLangs;
+
+ // Traverse initial parts of the toolchains and fill in InLangs.
+ BuildInitial(InLangs, TempDir, LangMap);
+
+ std::vector<const Node*> JTV;
+ TopologicalSortFilterJoinNodes(JTV);
+
+ // For all join nodes in topological order:
+ for (std::vector<const Node*>::iterator B = JTV.begin(), E = JTV.end();
+ B != E; ++B) {
+
+ const Node* CurNode = *B;
+ JoinTool* JT = &dynamic_cast<JoinTool&>(*CurNode->ToolPtr.getPtr());
+
+ // Are there any files in the join list?
+ if (JT->JoinListEmpty())
+ continue;
+
+ Action CurAction = JT->GenerateAction(CurNode->HasChildren(),
+ TempDir, InLangs, LangMap);
+
+ if (int ret = CurAction.Execute())
+ throw error_code(ret);
+
+ if (CurAction.StopCompilation())
+ return 0;
+
+ const Node* NextNode = &getNode(ChooseEdge(CurNode->OutEdges, InLangs,
+ CurNode->Name())->ToolName());
+ PassThroughGraph(sys::Path(CurAction.OutFile()), NextNode,
+ InLangs, TempDir, LangMap);
+ }
+
+ return 0;
+}
+
+int CompilationGraph::CheckLanguageNames() const {
+ int ret = 0;
+ // Check that names for output and input languages on all edges do match.
+ for (const_nodes_iterator B = this->NodesMap.begin(),
+ E = this->NodesMap.end(); B != E; ++B) {
+
+ const Node & N1 = B->second;
+ if (N1.ToolPtr) {
+ for (Node::const_iterator EB = N1.EdgesBegin(), EE = N1.EdgesEnd();
+ EB != EE; ++EB) {
+ const Node& N2 = this->getNode((*EB)->ToolName());
+
+ if (!N2.ToolPtr) {
+ ++ret;
+ std::cerr << "Error: there is an edge from '" << N1.ToolPtr->Name()
+ << "' back to the root!\n\n";
+ continue;
+ }
+
+ const char* OutLang = N1.ToolPtr->OutputLanguage();
+ const char** InLangs = N2.ToolPtr->InputLanguages();
+ bool eq = false;
+ for (;*InLangs; ++InLangs) {
+ if (std::strcmp(OutLang, *InLangs) == 0) {
+ eq = true;
+ break;
+ }
+ }
+
+ if (!eq) {
+ ++ret;
+ std::cerr << "Error: Output->input language mismatch in the edge '" <<
+ N1.ToolPtr->Name() << "' -> '" << N2.ToolPtr->Name() << "'!\n";
+
+ std::cerr << "Expected one of { ";
+
+ InLangs = N2.ToolPtr->InputLanguages();
+ for (;*InLangs; ++InLangs) {
+ std::cerr << '\'' << *InLangs << (*(InLangs+1) ? "', " : "'");
+ }
+
+ std::cerr << " }, but got '" << OutLang << "'!\n\n";
+ }
+
+ }
+ }
+ }
+
+ return ret;
+}
+
+int CompilationGraph::CheckMultipleDefaultEdges() const {
+ int ret = 0;
+ InputLanguagesSet Dummy;
+
+ // For all nodes, just iterate over the outgoing edges and check if there is
+ // more than one edge with maximum weight.
+ for (const_nodes_iterator B = this->NodesMap.begin(),
+ E = this->NodesMap.end(); B != E; ++B) {
+ const Node& N = B->second;
+ unsigned MaxWeight = 0;
+
+ // Ignore the root node.
+ if (!N.ToolPtr)
+ continue;
+
+ for (Node::const_iterator EB = N.EdgesBegin(), EE = N.EdgesEnd();
+ EB != EE; ++EB) {
+ unsigned EdgeWeight = (*EB)->Weight(Dummy);
+ if (EdgeWeight > MaxWeight) {
+ MaxWeight = EdgeWeight;
+ }
+ else if (EdgeWeight == MaxWeight) {
+ ++ret;
+ std::cerr
+ << "Error: there are multiple maximal edges stemming from the '"
+ << N.ToolPtr->Name() << "' node!\n\n";
+ break;
+ }
+ }
+ }
+
+ return ret;
+}
+
+int CompilationGraph::CheckCycles() {
+ unsigned deleted = 0;
+ std::queue<Node*> Q;
+ Q.push(&getNode("root"));
+
+ // Try to delete all nodes that have no ingoing edges, starting from the
+ // root. If there are any nodes left after this operation, then we have a
+ // cycle. This relies on '--check-graph' not performing the topological sort.
+ while (!Q.empty()) {
+ Node* A = Q.front();
+ Q.pop();
+ ++deleted;
+
+ for (Node::iterator EB = A->EdgesBegin(), EE = A->EdgesEnd();
+ EB != EE; ++EB) {
+ Node* B = &getNode((*EB)->ToolName());
+ B->DecrInEdges();
+ if (B->HasNoInEdges())
+ Q.push(B);
+ }
+ }
+
+ if (deleted != NodesMap.size()) {
+ std::cerr << "Error: there are cycles in the compilation graph!\n"
+ << "Try inspecting the diagram produced by "
+ "'llvmc --view-graph'.\n\n";
+ return 1;
+ }
+
+ return 0;
+}
+
+int CompilationGraph::Check () {
+ // We try to catch as many errors as we can in one go.
+ int ret = 0;
+
+ // Check that output/input language names match.
+ ret += this->CheckLanguageNames();
+
+ // Check for multiple default edges.
+ ret += this->CheckMultipleDefaultEdges();
+
+ // Check for cycles.
+ ret += this->CheckCycles();
+
+ return ret;
+}
+
+// Code related to graph visualization.
+
+namespace llvm {
+ template <>
+ struct DOTGraphTraits<llvmc::CompilationGraph*>
+ : public DefaultDOTGraphTraits
+ {
+
+ template<typename GraphType>
+ static std::string getNodeLabel(const Node* N, const GraphType&)
+ {
+ if (N->ToolPtr)
+ if (N->ToolPtr->IsJoin())
+ return N->Name() + "\n (join" +
+ (N->HasChildren() ? ")"
+ : std::string(": ") + N->ToolPtr->OutputLanguage() + ')');
+ else
+ return N->Name();
+ else
+ return "root";
+ }
+
+ template<typename EdgeIter>
+ static std::string getEdgeSourceLabel(const Node* N, EdgeIter I) {
+ if (N->ToolPtr) {
+ return N->ToolPtr->OutputLanguage();
+ }
+ else {
+ const char** InLangs = I->ToolPtr->InputLanguages();
+ std::string ret;
+
+ for (; *InLangs; ++InLangs) {
+ if (*(InLangs + 1)) {
+ ret += *InLangs;
+ ret += ", ";
+ }
+ else {
+ ret += *InLangs;
+ }
+ }
+
+ return ret;
+ }
+ }
+ };
+
+}
+
+void CompilationGraph::writeGraph(const std::string& OutputFilename) {
+ std::ofstream O(OutputFilename.c_str());
+
+ if (O.good()) {
+ std::cerr << "Writing '"<< OutputFilename << "' file...";
+ llvm::WriteGraph(O, this);
+ std::cerr << "done.\n";
+ O.close();
+ }
+ else {
+ throw std::runtime_error("Error opening file '" + OutputFilename
+ + "' for writing!");
+ }
+}
+
+void CompilationGraph::viewGraph() {
+ llvm::ViewGraph(this, "compilation-graph");
+}
diff --git a/lib/CompilerDriver/Makefile b/lib/CompilerDriver/Makefile
new file mode 100644
index 0000000..e5bf3e1
--- /dev/null
+++ b/lib/CompilerDriver/Makefile
@@ -0,0 +1,19 @@
+##===- lib/CompilerDriver/Makefile -------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open
+# Source License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+
+# We don't want this library to appear in `llvm-config --libs` output, so its
+# name doesn't start with "LLVM".
+
+LIBRARYNAME = CompilerDriver
+LINK_COMPONENTS = support system
+REQUIRES_EH := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/CompilerDriver/Plugin.cpp b/lib/CompilerDriver/Plugin.cpp
new file mode 100644
index 0000000..75abbd0
--- /dev/null
+++ b/lib/CompilerDriver/Plugin.cpp
@@ -0,0 +1,73 @@
+//===--- Plugin.cpp - The LLVM Compiler Driver ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Plugin support.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/Plugin.h"
+
+#include <algorithm>
+#include <vector>
+
+namespace {
+
+ // Registry::Add<> does not do lifetime management (probably issues
+ // with static constructor/destructor ordering), so we have to
+ // implement it here.
+ //
+ // All this static registration/life-before-main model seems
+ // unnecessary convoluted to me.
+
+ static bool pluginListInitialized = false;
+ typedef std::vector<const llvmc::BasePlugin*> PluginList;
+ static PluginList Plugins;
+
+ struct ByPriority {
+ bool operator()(const llvmc::BasePlugin* lhs,
+ const llvmc::BasePlugin* rhs) {
+ return lhs->Priority() < rhs->Priority();
+ }
+ };
+}
+
+namespace llvmc {
+
+ PluginLoader::PluginLoader() {
+ if (!pluginListInitialized) {
+ for (PluginRegistry::iterator B = PluginRegistry::begin(),
+ E = PluginRegistry::end(); B != E; ++B)
+ Plugins.push_back(B->instantiate());
+ std::sort(Plugins.begin(), Plugins.end(), ByPriority());
+ }
+ pluginListInitialized = true;
+ }
+
+ PluginLoader::~PluginLoader() {
+ if (pluginListInitialized) {
+ for (PluginList::iterator B = Plugins.begin(), E = Plugins.end();
+ B != E; ++B)
+ delete (*B);
+ }
+ pluginListInitialized = false;
+ }
+
+ void PluginLoader::PopulateLanguageMap(LanguageMap& langMap) {
+ for (PluginList::iterator B = Plugins.begin(), E = Plugins.end();
+ B != E; ++B)
+ (*B)->PopulateLanguageMap(langMap);
+ }
+
+ void PluginLoader::PopulateCompilationGraph(CompilationGraph& graph) {
+ for (PluginList::iterator B = Plugins.begin(), E = Plugins.end();
+ B != E; ++B)
+ (*B)->PopulateCompilationGraph(graph);
+ }
+
+}
diff --git a/lib/CompilerDriver/Tool.cpp b/lib/CompilerDriver/Tool.cpp
new file mode 100644
index 0000000..886b26b
--- /dev/null
+++ b/lib/CompilerDriver/Tool.cpp
@@ -0,0 +1,74 @@
+//===--- Tool.cpp - The LLVM Compiler Driver --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Tool base class - implementation details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/Tool.h"
+
+#include "llvm/System/Path.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+using namespace llvmc;
+
+extern cl::opt<std::string> OutputFilename;
+
+namespace {
+ sys::Path MakeTempFile(const sys::Path& TempDir, const std::string& BaseName,
+ const std::string& Suffix) {
+ sys::Path Out;
+
+ // Make sure we don't end up with path names like '/file.o' if the
+ // TempDir is empty.
+ if (TempDir.empty()) {
+ Out.set(BaseName);
+ }
+ else {
+ Out = TempDir;
+ Out.appendComponent(BaseName);
+ }
+ Out.appendSuffix(Suffix);
+ // NOTE: makeUnique always *creates* a unique temporary file,
+ // which is good, since there will be no races. However, some
+ // tools do not like it when the output file already exists, so
+ // they have to be placated with -f or something like that.
+ Out.makeUnique(true, NULL);
+ return Out;
+ }
+}
+
+sys::Path Tool::OutFilename(const sys::Path& In,
+ const sys::Path& TempDir,
+ bool StopCompilation,
+ const char* OutputSuffix) const {
+ sys::Path Out;
+
+ if (StopCompilation) {
+ if (!OutputFilename.empty()) {
+ Out.set(OutputFilename);
+ }
+ else if (IsJoin()) {
+ Out.set("a");
+ Out.appendSuffix(OutputSuffix);
+ }
+ else {
+ Out.set(In.getBasename());
+ Out.appendSuffix(OutputSuffix);
+ }
+ }
+ else {
+ if (IsJoin())
+ Out = MakeTempFile(TempDir, "tmp", OutputSuffix);
+ else
+ Out = MakeTempFile(TempDir, In.getBasename(), OutputSuffix);
+ }
+ return Out;
+}
diff --git a/lib/Debugger/CMakeLists.txt b/lib/Debugger/CMakeLists.txt
new file mode 100644
index 0000000..d2508cf
--- /dev/null
+++ b/lib/Debugger/CMakeLists.txt
@@ -0,0 +1,10 @@
+add_llvm_library(LLVMDebugger
+ Debugger.cpp
+ ProgramInfo.cpp
+ RuntimeInfo.cpp
+ SourceFile.cpp
+ SourceLanguage-CFamily.cpp
+ SourceLanguage-CPlusPlus.cpp
+ SourceLanguage-Unknown.cpp
+ SourceLanguage.cpp
+ )
diff --git a/lib/Debugger/Debugger.cpp b/lib/Debugger/Debugger.cpp
new file mode 100644
index 0000000..b12d90a
--- /dev/null
+++ b/lib/Debugger/Debugger.cpp
@@ -0,0 +1,230 @@
+//===-- Debugger.cpp - LLVM debugger library implementation ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the main implementation of the LLVM debugger library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Debugger/Debugger.h"
+#include "llvm/Module.h"
+#include "llvm/ModuleProvider.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Debugger/InferiorProcess.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/ADT/StringExtras.h"
+#include <cstdlib>
+#include <memory>
+using namespace llvm;
+
+/// Debugger constructor - Initialize the debugger to its initial, empty, state.
+///
+Debugger::Debugger() : Environment(0), Program(0), Process(0) {
+}
+
+Debugger::~Debugger() {
+ // Killing the program could throw an exception. We don't want to progagate
+ // the exception out of our destructor though.
+ try {
+ killProgram();
+ } catch (const char *) {
+ } catch (const std::string &) {
+ }
+
+ unloadProgram();
+}
+
+/// getProgramPath - Get the path of the currently loaded program, or an
+/// empty string if none is loaded.
+std::string Debugger::getProgramPath() const {
+ return Program ? Program->getModuleIdentifier() : "";
+}
+
+static Module *
+getMaterializedModuleProvider(const std::string &Filename) {
+ std::auto_ptr<MemoryBuffer> Buffer;
+ Buffer.reset(MemoryBuffer::getFileOrSTDIN(Filename.c_str()));
+ if (Buffer.get())
+ return ParseBitcodeFile(Buffer.get());
+ return 0;
+}
+
+/// loadProgram - If a program is currently loaded, unload it. Then search
+/// the PATH for the specified program, loading it when found. If the
+/// specified program cannot be found, an exception is thrown to indicate the
+/// error.
+void Debugger::loadProgram(const std::string &Filename) {
+ if ((Program = getMaterializedModuleProvider(Filename)) ||
+ (Program = getMaterializedModuleProvider(Filename+".bc")))
+ return; // Successfully loaded the program.
+
+ // Search the program path for the file...
+ if (const char *PathS = getenv("PATH")) {
+ std::string Path = PathS;
+
+ std::string Directory = getToken(Path, ":");
+ while (!Directory.empty()) {
+ if ((Program = getMaterializedModuleProvider(Directory +"/"+ Filename)) ||
+ (Program = getMaterializedModuleProvider(Directory +"/"+ Filename
+ + ".bc")))
+ return; // Successfully loaded the program.
+
+ Directory = getToken(Path, ":");
+ }
+ }
+
+ throw "Could not find program '" + Filename + "'!";
+}
+
+/// unloadProgram - If a program is running, kill it, then unload all traces
+/// of the current program. If no program is loaded, this method silently
+/// succeeds.
+void Debugger::unloadProgram() {
+ if (!isProgramLoaded()) return;
+ killProgram();
+ delete Program;
+ Program = 0;
+}
+
+
+/// createProgram - Create an instance of the currently loaded program,
+/// killing off any existing one. This creates the program and stops it at
+/// the first possible moment. If there is no program loaded or if there is a
+/// problem starting the program, this method throws an exception.
+void Debugger::createProgram() {
+ if (!isProgramLoaded())
+ throw "Cannot start program: none is loaded.";
+
+ // Kill any existing program.
+ killProgram();
+
+ // Add argv[0] to the arguments vector..
+ std::vector<std::string> Args(ProgramArguments);
+ Args.insert(Args.begin(), getProgramPath());
+
+ // Start the new program... this could throw if the program cannot be started.
+ Process = InferiorProcess::create(Program, Args, Environment);
+}
+
+InferiorProcess *
+InferiorProcess::create(Module *M, const std::vector<std::string> &Arguments,
+ const char * const *envp) {
+ throw"No supported binding to inferior processes (debugger not implemented).";
+}
+
+/// killProgram - If the program is currently executing, kill off the
+/// process and free up any state related to the currently running program. If
+/// there is no program currently running, this just silently succeeds.
+void Debugger::killProgram() {
+ // The destructor takes care of the dirty work.
+ try {
+ delete Process;
+ } catch (...) {
+ Process = 0;
+ throw;
+ }
+ Process = 0;
+}
+
+/// stepProgram - Implement the 'step' command, continuing execution until
+/// the next possible stop point.
+void Debugger::stepProgram() {
+ assert(isProgramRunning() && "Cannot step if the program isn't running!");
+ try {
+ Process->stepProgram();
+ } catch (InferiorProcessDead &IPD) {
+ killProgram();
+ throw NonErrorException("The program stopped with exit code " +
+ itostr(IPD.getExitCode()));
+ } catch (...) {
+ killProgram();
+ throw;
+ }
+}
+
+/// nextProgram - Implement the 'next' command, continuing execution until
+/// the next possible stop point that is in the current function.
+void Debugger::nextProgram() {
+ assert(isProgramRunning() && "Cannot next if the program isn't running!");
+ try {
+ // This should step the process. If the process enters a function, then it
+ // should 'finish' it. However, figuring this out is tricky. In
+ // particular, the program can do any of:
+ // 0. Not change current frame.
+ // 1. Entering or exiting a region within the current function
+ // (which changes the frame ID, but which we shouldn't 'finish')
+ // 2. Exiting the current function (which changes the frame ID)
+ // 3. Entering a function (which should be 'finish'ed)
+ // For this reason, we have to be very careful about when we decide to do
+ // the 'finish'.
+
+ // Get the current frame, but don't trust it. It could change...
+ void *CurrentFrame = Process->getPreviousFrame(0);
+
+ // Don't trust the current frame: get the caller frame.
+ void *ParentFrame = Process->getPreviousFrame(CurrentFrame);
+
+ // Ok, we have some information, run the program one step.
+ Process->stepProgram();
+
+ // Where is the new frame? The most common case, by far is that it has not
+ // been modified (Case #0), in which case we don't need to do anything more.
+ void *NewFrame = Process->getPreviousFrame(0);
+ if (NewFrame != CurrentFrame) {
+ // Ok, the frame changed. If we are case #1, then the parent frame will
+ // be identical.
+ void *NewParentFrame = Process->getPreviousFrame(NewFrame);
+ if (ParentFrame != NewParentFrame) {
+ // Ok, now we know we aren't case #0 or #1. Check to see if we entered
+ // a new function. If so, the parent frame will be "CurrentFrame".
+ if (CurrentFrame == NewParentFrame)
+ Process->finishProgram(NewFrame);
+ }
+ }
+
+ } catch (InferiorProcessDead &IPD) {
+ killProgram();
+ throw NonErrorException("The program stopped with exit code " +
+ itostr(IPD.getExitCode()));
+ } catch (...) {
+ killProgram();
+ throw;
+ }
+}
+
+/// finishProgram - Implement the 'finish' command, continuing execution
+/// until the specified frame ID returns.
+void Debugger::finishProgram(void *Frame) {
+ assert(isProgramRunning() && "Cannot cont if the program isn't running!");
+ try {
+ Process->finishProgram(Frame);
+ } catch (InferiorProcessDead &IPD) {
+ killProgram();
+ throw NonErrorException("The program stopped with exit code " +
+ itostr(IPD.getExitCode()));
+ } catch (...) {
+ killProgram();
+ throw;
+ }
+}
+
+/// contProgram - Implement the 'cont' command, continuing execution until
+/// the next breakpoint is encountered.
+void Debugger::contProgram() {
+ assert(isProgramRunning() && "Cannot cont if the program isn't running!");
+ try {
+ Process->contProgram();
+ } catch (InferiorProcessDead &IPD) {
+ killProgram();
+ throw NonErrorException("The program stopped with exit code " +
+ itostr(IPD.getExitCode()));
+ } catch (...) {
+ killProgram();
+ throw;
+ }
+}
diff --git a/lib/Debugger/Makefile b/lib/Debugger/Makefile
new file mode 100644
index 0000000..8290e30
--- /dev/null
+++ b/lib/Debugger/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Debugger/Makefile -------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMDebugger
+EXTRA_DIST = README.txt
+REQUIRES_EH := 1
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Debugger/ProgramInfo.cpp b/lib/Debugger/ProgramInfo.cpp
new file mode 100644
index 0000000..125ff55
--- /dev/null
+++ b/lib/Debugger/ProgramInfo.cpp
@@ -0,0 +1,377 @@
+//===-- ProgramInfo.cpp - Compute and cache info about a program ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ProgramInfo and related classes, by sorting through
+// the loaded Module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Debugger/ProgramInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Debugger/SourceFile.h"
+#include "llvm/Debugger/SourceLanguage.h"
+#include "llvm/Support/SlowOperationInformer.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+/// getGlobalVariablesUsing - Return all of the global variables which have the
+/// specified value in their initializer somewhere.
+static void getGlobalVariablesUsing(Value *V,
+ std::vector<GlobalVariable*> &Found) {
+ for (Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(*I))
+ Found.push_back(GV);
+ else if (Constant *C = dyn_cast<Constant>(*I))
+ getGlobalVariablesUsing(C, Found);
+ }
+}
+
+/// getNextStopPoint - Follow the def-use chains of the specified LLVM value,
+/// traversing the use chains until we get to a stoppoint. When we do, return
+/// the source location of the stoppoint. If we don't find a stoppoint, return
+/// null.
+static const GlobalVariable *getNextStopPoint(const Value *V, unsigned &LineNo,
+ unsigned &ColNo) {
+ // The use-def chains can fork. As such, we pick the lowest numbered one we
+ // find.
+ const GlobalVariable *LastDesc = 0;
+ unsigned LastLineNo = ~0;
+ unsigned LastColNo = ~0;
+
+ for (Value::use_const_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ bool ShouldRecurse = true;
+ if (cast<Instruction>(*UI)->getOpcode() == Instruction::PHI) {
+ // Infinite loops == bad, ignore PHI nodes.
+ ShouldRecurse = false;
+ } else if (const CallInst *CI = dyn_cast<CallInst>(*UI)) {
+
+ // If we found a stop point, check to see if it is earlier than what we
+ // already have. If so, remember it.
+ if (CI->getCalledFunction())
+ if (const DbgStopPointInst *SPI = dyn_cast<DbgStopPointInst>(CI)) {
+ unsigned CurLineNo = SPI->getLine();
+ unsigned CurColNo = SPI->getColumn();
+ const GlobalVariable *CurDesc = 0;
+ const Value *Op = SPI->getContext();
+
+ if ((CurDesc = dyn_cast<GlobalVariable>(Op)) &&
+ (LineNo < LastLineNo ||
+ (LineNo == LastLineNo && ColNo < LastColNo))) {
+ LastDesc = CurDesc;
+ LastLineNo = CurLineNo;
+ LastColNo = CurColNo;
+ }
+ ShouldRecurse = false;
+ }
+ }
+
+ // If this is not a phi node or a stopping point, recursively scan the users
+ // of this instruction to skip over region.begin's and the like.
+ if (ShouldRecurse) {
+ unsigned CurLineNo, CurColNo;
+ if (const GlobalVariable *GV = getNextStopPoint(*UI, CurLineNo,CurColNo)){
+ if (LineNo < LastLineNo || (LineNo == LastLineNo && ColNo < LastColNo)){
+ LastDesc = GV;
+ LastLineNo = CurLineNo;
+ LastColNo = CurColNo;
+ }
+ }
+ }
+ }
+
+ if (LastDesc) {
+ LineNo = LastLineNo != ~0U ? LastLineNo : 0;
+ ColNo = LastColNo != ~0U ? LastColNo : 0;
+ }
+ return LastDesc;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SourceFileInfo implementation
+//
+
+SourceFileInfo::SourceFileInfo(const GlobalVariable *Desc,
+ const SourceLanguage &Lang)
+ : Language(&Lang), Descriptor(Desc) {
+ Version = 0;
+ SourceText = 0;
+
+ if (Desc && Desc->hasInitializer())
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Desc->getInitializer()))
+ if (CS->getNumOperands() > 4) {
+ if (ConstantInt *CUI = dyn_cast<ConstantInt>(CS->getOperand(1)))
+ Version = CUI->getZExtValue();
+
+ if (!GetConstantStringInfo(CS->getOperand(3), BaseName))
+ BaseName = "";
+ if (!GetConstantStringInfo(CS->getOperand(4), Directory))
+ Directory = "";
+ }
+}
+
+SourceFileInfo::~SourceFileInfo() {
+ delete SourceText;
+}
+
+SourceFile &SourceFileInfo::getSourceText() const {
+ // FIXME: this should take into account the source search directories!
+ if (SourceText == 0) { // Read the file in if we haven't already.
+ sys::Path tmpPath;
+ if (!Directory.empty())
+ tmpPath.set(Directory);
+ tmpPath.appendComponent(BaseName);
+ if (tmpPath.canRead())
+ SourceText = new SourceFile(tmpPath.toString(), Descriptor);
+ else
+ SourceText = new SourceFile(BaseName, Descriptor);
+ }
+ return *SourceText;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SourceFunctionInfo implementation
+//
+SourceFunctionInfo::SourceFunctionInfo(ProgramInfo &PI,
+ const GlobalVariable *Desc)
+ : Descriptor(Desc) {
+ LineNo = ColNo = 0;
+ if (Desc && Desc->hasInitializer())
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Desc->getInitializer()))
+ if (CS->getNumOperands() > 2) {
+ // Entry #1 is the file descriptor.
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(CS->getOperand(1)))
+ SourceFile = &PI.getSourceFile(GV);
+
+ // Entry #2 is the function name.
+ if (!GetConstantStringInfo(CS->getOperand(2), Name))
+ Name = "";
+ }
+}
+
+/// getSourceLocation - This method returns the location of the first stopping
+/// point in the function.
+void SourceFunctionInfo::getSourceLocation(unsigned &RetLineNo,
+ unsigned &RetColNo) const {
+ // If we haven't computed this yet...
+ if (!LineNo) {
+ // Look at all of the users of the function descriptor, looking for calls to
+ // %llvm.dbg.func.start.
+ for (Value::use_const_iterator UI = Descriptor->use_begin(),
+ E = Descriptor->use_end(); UI != E; ++UI)
+ if (const CallInst *CI = dyn_cast<CallInst>(*UI))
+ if (const Function *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::dbg_func_start) {
+ // We found the start of the function. Check to see if there are
+ // any stop points on the use-list of the function start.
+ const GlobalVariable *SD = getNextStopPoint(CI, LineNo, ColNo);
+ if (SD) { // We found the first stop point!
+ // This is just a sanity check.
+ if (getSourceFile().getDescriptor() != SD)
+ cout << "WARNING: first line of function is not in the"
+ << " file that the function descriptor claims it is in.\n";
+ break;
+ }
+ }
+ }
+ RetLineNo = LineNo; RetColNo = ColNo;
+}
+
+//===----------------------------------------------------------------------===//
+// ProgramInfo implementation
+//
+
+ProgramInfo::ProgramInfo(Module *m) : M(m), ProgramTimeStamp(0,0) {
+ assert(M && "Cannot create program information with a null module!");
+ sys::PathWithStatus ModPath(M->getModuleIdentifier());
+ const sys::FileStatus *Stat = ModPath.getFileStatus();
+ if (Stat)
+ ProgramTimeStamp = Stat->getTimestamp();
+
+ SourceFilesIsComplete = false;
+ SourceFunctionsIsComplete = false;
+}
+
+ProgramInfo::~ProgramInfo() {
+ // Delete cached information about source program objects...
+ for (std::map<const GlobalVariable*, SourceFileInfo*>::iterator
+ I = SourceFiles.begin(), E = SourceFiles.end(); I != E; ++I)
+ delete I->second;
+ for (std::map<const GlobalVariable*, SourceFunctionInfo*>::iterator
+ I = SourceFunctions.begin(), E = SourceFunctions.end(); I != E; ++I)
+ delete I->second;
+
+ // Delete the source language caches.
+ for (unsigned i = 0, e = LanguageCaches.size(); i != e; ++i)
+ delete LanguageCaches[i].second;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SourceFileInfo tracking...
+//
+
+/// getSourceFile - Return source file information for the specified source file
+/// descriptor object, adding it to the collection as needed. This method
+/// always succeeds (is unambiguous), and is always efficient.
+///
+const SourceFileInfo &
+ProgramInfo::getSourceFile(const GlobalVariable *Desc) {
+ SourceFileInfo *&Result = SourceFiles[Desc];
+ if (Result) return *Result;
+
+ // Figure out what language this source file comes from...
+ unsigned LangID = 0; // Zero is unknown language
+ if (Desc && Desc->hasInitializer())
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Desc->getInitializer()))
+ if (CS->getNumOperands() > 2)
+ if (ConstantInt *CUI = dyn_cast<ConstantInt>(CS->getOperand(2)))
+ LangID = CUI->getZExtValue();
+
+ const SourceLanguage &Lang = SourceLanguage::get(LangID);
+ SourceFileInfo *New = Lang.createSourceFileInfo(Desc, *this);
+
+ // FIXME: this should check to see if there is already a Filename/WorkingDir
+ // pair that matches this one. If so, we shouldn't create the duplicate!
+ //
+ SourceFileIndex.insert(std::make_pair(New->getBaseName(), New));
+ return *(Result = New);
+}
+
+
+/// getSourceFiles - Index all of the source files in the program and return
+/// a mapping of it. This information is lazily computed the first time
+/// that it is requested. Since this information can take a long time to
+/// compute, the user is given a chance to cancel it. If this occurs, an
+/// exception is thrown.
+const std::map<const GlobalVariable*, SourceFileInfo*> &
+ProgramInfo::getSourceFiles(bool RequiresCompleteMap) {
+ // If we have a fully populated map, or if the client doesn't need one, just
+ // return what we have.
+ if (SourceFilesIsComplete || !RequiresCompleteMap)
+ return SourceFiles;
+
+ // Ok, all of the source file descriptors (compile_unit in dwarf terms),
+ // should be on the use list of the llvm.dbg.translation_units global.
+ //
+ GlobalVariable *Units =
+ M->getGlobalVariable("llvm.dbg.translation_units",
+ StructType::get(std::vector<const Type*>()));
+ if (Units == 0)
+ throw "Program contains no debugging information!";
+
+ std::vector<GlobalVariable*> TranslationUnits;
+ getGlobalVariablesUsing(Units, TranslationUnits);
+
+ SlowOperationInformer SOI("building source files index");
+
+ // Loop over all of the translation units found, building the SourceFiles
+ // mapping.
+ for (unsigned i = 0, e = TranslationUnits.size(); i != e; ++i) {
+ getSourceFile(TranslationUnits[i]);
+ if (SOI.progress(i+1, e))
+ throw "While building source files index, operation cancelled.";
+ }
+
+ // Ok, if we got this far, then we indexed the whole program.
+ SourceFilesIsComplete = true;
+ return SourceFiles;
+}
+
+/// getSourceFile - Look up the file with the specified name. If there is
+/// more than one match for the specified filename, prompt the user to pick
+/// one. If there is no source file that matches the specified name, throw
+/// an exception indicating that we can't find the file. Otherwise, return
+/// the file information for that file.
+const SourceFileInfo &ProgramInfo::getSourceFile(const std::string &Filename) {
+ std::multimap<std::string, SourceFileInfo*>::const_iterator Start, End;
+ getSourceFiles();
+ tie(Start, End) = SourceFileIndex.equal_range(Filename);
+
+ if (Start == End) throw "Could not find source file '" + Filename + "'!";
+ const SourceFileInfo &SFI = *Start->second;
+ ++Start;
+ if (Start == End) return SFI;
+
+ throw "FIXME: Multiple source files with the same name not implemented!";
+}
+
+
+//===----------------------------------------------------------------------===//
+// SourceFunctionInfo tracking...
+//
+
+
+/// getFunction - Return function information for the specified function
+/// descriptor object, adding it to the collection as needed. This method
+/// always succeeds (is unambiguous), and is always efficient.
+///
+const SourceFunctionInfo &
+ProgramInfo::getFunction(const GlobalVariable *Desc) {
+ SourceFunctionInfo *&Result = SourceFunctions[Desc];
+ if (Result) return *Result;
+
+ // Figure out what language this function comes from...
+ const GlobalVariable *SourceFileDesc = 0;
+ if (Desc && Desc->hasInitializer())
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Desc->getInitializer()))
+ if (CS->getNumOperands() > 0)
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(CS->getOperand(1)))
+ SourceFileDesc = GV;
+
+ const SourceLanguage &Lang = getSourceFile(SourceFileDesc).getLanguage();
+ return *(Result = Lang.createSourceFunctionInfo(Desc, *this));
+}
+
+
+// getSourceFunctions - Index all of the functions in the program and return
+// them. This information is lazily computed the first time that it is
+// requested. Since this information can take a long time to compute, the user
+// is given a chance to cancel it. If this occurs, an exception is thrown.
+const std::map<const GlobalVariable*, SourceFunctionInfo*> &
+ProgramInfo::getSourceFunctions(bool RequiresCompleteMap) {
+ if (SourceFunctionsIsComplete || !RequiresCompleteMap)
+ return SourceFunctions;
+
+ // Ok, all of the source function descriptors (subprogram in dwarf terms),
+ // should be on the use list of the llvm.dbg.translation_units global.
+ //
+ GlobalVariable *Units =
+ M->getGlobalVariable("llvm.dbg.globals",
+ StructType::get(std::vector<const Type*>()));
+ if (Units == 0)
+ throw "Program contains no debugging information!";
+
+ std::vector<GlobalVariable*> Functions;
+ getGlobalVariablesUsing(Units, Functions);
+
+ SlowOperationInformer SOI("building functions index");
+
+ // Loop over all of the functions found, building the SourceFunctions mapping.
+ for (unsigned i = 0, e = Functions.size(); i != e; ++i) {
+ getFunction(Functions[i]);
+ if (SOI.progress(i+1, e))
+ throw "While functions index, operation cancelled.";
+ }
+
+ // Ok, if we got this far, then we indexed the whole program.
+ SourceFunctionsIsComplete = true;
+ return SourceFunctions;
+}
diff --git a/lib/Debugger/README.txt b/lib/Debugger/README.txt
new file mode 100644
index 0000000..89935c5
--- /dev/null
+++ b/lib/Debugger/README.txt
@@ -0,0 +1,7 @@
+//===-- llvm/lib/Debugger/ - LLVM Debugger interfaces ---------------------===//
+
+This directory contains the implementation of the LLVM debugger backend. This
+directory builds into a library which can be used by various debugger
+front-ends to debug LLVM programs. The current command line LLVM debugger,
+llvm-db is currently the only client of this library, but others could be
+built, to provide a GUI front-end for example.
diff --git a/lib/Debugger/RuntimeInfo.cpp b/lib/Debugger/RuntimeInfo.cpp
new file mode 100644
index 0000000..2f0ff72
--- /dev/null
+++ b/lib/Debugger/RuntimeInfo.cpp
@@ -0,0 +1,69 @@
+//===-- RuntimeInfo.cpp - Compute and cache info about running program ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RuntimeInfo and related classes, by querying and
+// cachine information from the running inferior process.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Debugger/InferiorProcess.h"
+#include "llvm/Debugger/ProgramInfo.h"
+#include "llvm/Debugger/RuntimeInfo.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// StackFrame class implementation
+
+StackFrame::StackFrame(RuntimeInfo &ri, void *ParentFrameID)
+ : RI(ri), SourceInfo(0) {
+ FrameID = RI.getInferiorProcess().getPreviousFrame(ParentFrameID);
+ if (FrameID == 0) throw "Stack frame does not exist!";
+
+ // Compute lazily as needed.
+ FunctionDesc = 0;
+}
+
+const GlobalVariable *StackFrame::getFunctionDesc() {
+ if (FunctionDesc == 0)
+ FunctionDesc = RI.getInferiorProcess().getSubprogramDesc(FrameID);
+ return FunctionDesc;
+}
+
+/// getSourceLocation - Return the source location that this stack frame is
+/// sitting at.
+void StackFrame::getSourceLocation(unsigned &lineNo, unsigned &colNo,
+ const SourceFileInfo *&sourceInfo) {
+ if (SourceInfo == 0) {
+ const GlobalVariable *SourceDesc = 0;
+ RI.getInferiorProcess().getFrameLocation(FrameID, LineNo,ColNo, SourceDesc);
+ SourceInfo = &RI.getProgramInfo().getSourceFile(SourceDesc);
+ }
+
+ lineNo = LineNo;
+ colNo = ColNo;
+ sourceInfo = SourceInfo;
+}
+
+//===----------------------------------------------------------------------===//
+// RuntimeInfo class implementation
+
+/// materializeFrame - Create and process all frames up to and including the
+/// specified frame number. This throws an exception if the specified frame
+/// ID is nonexistant.
+void RuntimeInfo::materializeFrame(unsigned ID) {
+ assert(ID >= CallStack.size() && "no need to materialize this frame!");
+ void *CurFrame = 0;
+ if (!CallStack.empty())
+ CurFrame = CallStack.back().getFrameID();
+
+ while (CallStack.size() <= ID) {
+ CallStack.push_back(StackFrame(*this, CurFrame));
+ CurFrame = CallStack.back().getFrameID();
+ }
+}
diff --git a/lib/Debugger/SourceFile.cpp b/lib/Debugger/SourceFile.cpp
new file mode 100644
index 0000000..03c60f8
--- /dev/null
+++ b/lib/Debugger/SourceFile.cpp
@@ -0,0 +1,82 @@
+//===-- SourceFile.cpp - SourceFile implementation for the debugger -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SourceFile class for the LLVM debugger.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Debugger/SourceFile.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <cassert>
+using namespace llvm;
+
+static const char EmptyFile = 0;
+
+SourceFile::SourceFile(const std::string &fn, const GlobalVariable *Desc)
+ : Filename(fn), Descriptor(Desc) {
+ File.reset(MemoryBuffer::getFileOrSTDIN(fn));
+
+ // On error, return an empty buffer.
+ if (File == 0)
+ File.reset(MemoryBuffer::getMemBuffer(&EmptyFile, &EmptyFile));
+}
+
+SourceFile::~SourceFile() {
+}
+
+
+/// calculateLineOffsets - Compute the LineOffset vector for the current file.
+///
+void SourceFile::calculateLineOffsets() const {
+ assert(LineOffset.empty() && "Line offsets already computed!");
+ const char *BufPtr = File->getBufferStart();
+ const char *FileStart = BufPtr;
+ const char *FileEnd = File->getBufferEnd();
+ do {
+ LineOffset.push_back(BufPtr-FileStart);
+
+ // Scan until we get to a newline.
+ while (BufPtr != FileEnd && *BufPtr != '\n' && *BufPtr != '\r')
+ ++BufPtr;
+
+ if (BufPtr != FileEnd) {
+ ++BufPtr; // Skip over the \n or \r
+ if (BufPtr[-1] == '\r' && BufPtr != FileEnd && BufPtr[0] == '\n')
+ ++BufPtr; // Skip over dos/windows style \r\n's
+ }
+ } while (BufPtr != FileEnd);
+}
+
+
+/// getSourceLine - Given a line number, return the start and end of the line
+/// in the file. If the line number is invalid, or if the file could not be
+/// loaded, null pointers are returned for the start and end of the file. Note
+/// that line numbers start with 0, not 1.
+void SourceFile::getSourceLine(unsigned LineNo, const char *&LineStart,
+ const char *&LineEnd) const {
+ LineStart = LineEnd = 0;
+ if (LineOffset.empty()) calculateLineOffsets();
+
+ // Asking for an out-of-range line number?
+ if (LineNo >= LineOffset.size()) return;
+
+ // Otherwise, they are asking for a valid line, which we can fulfill.
+ LineStart = File->getBufferStart()+LineOffset[LineNo];
+
+ if (LineNo+1 < LineOffset.size())
+ LineEnd = File->getBufferStart()+LineOffset[LineNo+1];
+ else
+ LineEnd = File->getBufferEnd();
+
+ // If the line ended with a newline, strip it off.
+ while (LineEnd != LineStart && (LineEnd[-1] == '\n' || LineEnd[-1] == '\r'))
+ --LineEnd;
+
+ assert(LineEnd >= LineStart && "We somehow got our pointers swizzled!");
+}
diff --git a/lib/Debugger/SourceLanguage-CFamily.cpp b/lib/Debugger/SourceLanguage-CFamily.cpp
new file mode 100644
index 0000000..f329db4
--- /dev/null
+++ b/lib/Debugger/SourceLanguage-CFamily.cpp
@@ -0,0 +1,28 @@
+//===-- SourceLanguage-CFamily.cpp - C family SourceLanguage impl ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SourceLanguage class for the C family of languages
+// (K&R C, C89, C99, etc).
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Debugger/SourceLanguage.h"
+using namespace llvm;
+
+#if 0
+namespace {
+ struct CSL : public SourceLanguage {
+ } TheCSourceLanguageInstance;
+}
+#endif
+
+const SourceLanguage &SourceLanguage::getCFamilyInstance() {
+ return get(0); // We don't have an implementation for C yet fall back on
+ // generic
+}
diff --git a/lib/Debugger/SourceLanguage-CPlusPlus.cpp b/lib/Debugger/SourceLanguage-CPlusPlus.cpp
new file mode 100644
index 0000000..ce94ff4
--- /dev/null
+++ b/lib/Debugger/SourceLanguage-CPlusPlus.cpp
@@ -0,0 +1,27 @@
+//===-- SourceLanguage-CPlusPlus.cpp - C++ SourceLanguage impl ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SourceLanguage class for the C++ language.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Debugger/SourceLanguage.h"
+using namespace llvm;
+
+#if 0
+namespace {
+ struct CPPSL : public SourceLanguage {
+ } TheCPlusPlusLanguageInstance;
+}
+#endif
+
+const SourceLanguage &SourceLanguage::getCPlusPlusInstance() {
+ return get(0); // We don't have an implementation for C yet fall back on
+ // generic
+}
diff --git a/lib/Debugger/SourceLanguage-Unknown.cpp b/lib/Debugger/SourceLanguage-Unknown.cpp
new file mode 100644
index 0000000..b806fc7
--- /dev/null
+++ b/lib/Debugger/SourceLanguage-Unknown.cpp
@@ -0,0 +1,138 @@
+//===-- SourceLanguage-Unknown.cpp - Implement itf for unknown languages --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// If the LLVM debugger does not have a module for a particular language, it
+// falls back on using this one to perform the source-language interface. This
+// interface is not wonderful, but it gets the job done.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Debugger/SourceLanguage.h"
+#include "llvm/Debugger/ProgramInfo.h"
+#include "llvm/Support/Streams.h"
+#include <cassert>
+#include <ostream>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Implement the SourceLanguage cache for the Unknown language.
+//
+
+namespace {
+ /// SLUCache - This cache allows for efficient lookup of source functions by
+ /// name.
+ ///
+ struct SLUCache : public SourceLanguageCache {
+ ProgramInfo &PI;
+ std::multimap<std::string, SourceFunctionInfo*> FunctionMap;
+ public:
+ SLUCache(ProgramInfo &pi);
+
+ typedef std::multimap<std::string, SourceFunctionInfo*>::const_iterator
+ fm_iterator;
+
+ std::pair<fm_iterator, fm_iterator>
+ getFunction(const std::string &Name) const {
+ return FunctionMap.equal_range(Name);
+ }
+
+ SourceFunctionInfo *addSourceFunction(SourceFunctionInfo *SF) {
+ FunctionMap.insert(std::make_pair(SF->getSymbolicName(), SF));
+ return SF;
+ }
+ };
+}
+
+SLUCache::SLUCache(ProgramInfo &pi) : PI(pi) {
+}
+
+
+//===----------------------------------------------------------------------===//
+// Implement SourceLanguageUnknown class, which is used to handle unrecognized
+// languages.
+//
+
+namespace {
+ static struct SLU : public SourceLanguage {
+ //===------------------------------------------------------------------===//
+ // Implement the miscellaneous methods...
+ //
+ virtual const char *getSourceLanguageName() const {
+ return "unknown";
+ }
+
+ /// lookupFunction - Given a textual function name, return the
+ /// SourceFunctionInfo descriptor for that function, or null if it cannot be
+ /// found. If the program is currently running, the RuntimeInfo object
+ /// provides information about the current evaluation context, otherwise it
+ /// will be null.
+ ///
+ virtual SourceFunctionInfo *lookupFunction(const std::string &FunctionName,
+ ProgramInfo &PI,
+ RuntimeInfo *RI = 0) const;
+
+ //===------------------------------------------------------------------===//
+ // We do use a cache for information...
+ //
+ typedef SLUCache CacheType;
+ SLUCache *createSourceLanguageCache(ProgramInfo &PI) const {
+ return new SLUCache(PI);
+ }
+
+ /// createSourceFunctionInfo - Create the new object and inform the cache of
+ /// the new function.
+ virtual SourceFunctionInfo *
+ createSourceFunctionInfo(const GlobalVariable *Desc, ProgramInfo &PI) const;
+
+ } TheUnknownSourceLanguageInstance;
+}
+
+const SourceLanguage &SourceLanguage::getUnknownLanguageInstance() {
+ return TheUnknownSourceLanguageInstance;
+}
+
+
+SourceFunctionInfo *
+SLU::createSourceFunctionInfo(const GlobalVariable *Desc,
+ ProgramInfo &PI) const {
+ SourceFunctionInfo *Result = new SourceFunctionInfo(PI, Desc);
+ return PI.getLanguageCache(this).addSourceFunction(Result);
+}
+
+
+/// lookupFunction - Given a textual function name, return the
+/// SourceFunctionInfo descriptor for that function, or null if it cannot be
+/// found. If the program is currently running, the RuntimeInfo object
+/// provides information about the current evaluation context, otherwise it will
+/// be null.
+///
+SourceFunctionInfo *SLU::lookupFunction(const std::string &FunctionName,
+ ProgramInfo &PI, RuntimeInfo *RI) const{
+ SLUCache &Cache = PI.getLanguageCache(this);
+ std::pair<SLUCache::fm_iterator, SLUCache::fm_iterator> IP
+ = Cache.getFunction(FunctionName);
+
+ if (IP.first == IP.second) {
+ if (PI.allSourceFunctionsRead())
+ return 0; // Nothing found
+
+ // Otherwise, we might be able to find the function if we read all of them
+ // in. Do so now.
+ PI.getSourceFunctions();
+ assert(PI.allSourceFunctionsRead() && "Didn't read in all functions?");
+ return lookupFunction(FunctionName, PI, RI);
+ }
+
+ SourceFunctionInfo *Found = IP.first->second;
+ ++IP.first;
+ if (IP.first != IP.second)
+ cout << "Whoa, found multiple functions with the same name. I should"
+ << " ask the user which one to use: FIXME!\n";
+ return Found;
+}
diff --git a/lib/Debugger/SourceLanguage.cpp b/lib/Debugger/SourceLanguage.cpp
new file mode 100644
index 0000000..4fcc38b
--- /dev/null
+++ b/lib/Debugger/SourceLanguage.cpp
@@ -0,0 +1,54 @@
+//===-- SourceLanguage.cpp - Implement the SourceLanguage class -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SourceLanguage class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Debugger/SourceLanguage.h"
+#include "llvm/Debugger/ProgramInfo.h"
+using namespace llvm;
+
+const SourceLanguage &SourceLanguage::get(unsigned ID) {
+ switch (ID) {
+ case 1: // DW_LANG_C89
+ case 2: // DW_LANG_C
+ case 12: // DW_LANG_C99
+ return getCFamilyInstance();
+
+ case 4: // DW_LANG_C_plus_plus
+ return getCPlusPlusInstance();
+
+ case 3: // DW_LANG_Ada83
+ case 5: // DW_LANG_Cobol74
+ case 6: // DW_LANG_Cobol85
+ case 7: // DW_LANG_Fortran77
+ case 8: // DW_LANG_Fortran90
+ case 9: // DW_LANG_Pascal83
+ case 10: // DW_LANG_Modula2
+ case 11: // DW_LANG_Java
+ case 13: // DW_LANG_Ada95
+ case 14: // DW_LANG_Fortran95
+ default:
+ return getUnknownLanguageInstance();
+ }
+}
+
+
+SourceFileInfo *
+SourceLanguage::createSourceFileInfo(const GlobalVariable *Desc,
+ ProgramInfo &PI) const {
+ return new SourceFileInfo(Desc, *this);
+}
+
+SourceFunctionInfo *
+SourceLanguage::createSourceFunctionInfo(const GlobalVariable *Desc,
+ ProgramInfo &PI) const {
+ return new SourceFunctionInfo(PI, Desc);
+}
diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt
new file mode 100644
index 0000000..e26b98f
--- /dev/null
+++ b/lib/ExecutionEngine/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_partially_linked_object(LLVMExecutionEngine
+ ExecutionEngine.cpp
+ ExecutionEngineBindings.cpp
+ )
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
new file mode 100644
index 0000000..29a05bb
--- /dev/null
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -0,0 +1,1010 @@
+//===-- ExecutionEngine.cpp - Common Implementation shared by EEs ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common interface used by the various execution engine
+// subclasses.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/ModuleProvider.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Config/alloca.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/System/DynamicLibrary.h"
+#include "llvm/System/Host.h"
+#include "llvm/Target/TargetData.h"
+#include <cmath>
+#include <cstring>
+using namespace llvm;
+
+STATISTIC(NumInitBytes, "Number of bytes of global vars initialized");
+STATISTIC(NumGlobals , "Number of global vars initialized");
+
+ExecutionEngine::EECtorFn ExecutionEngine::JITCtor = 0;
+ExecutionEngine::EECtorFn ExecutionEngine::InterpCtor = 0;
+ExecutionEngine::EERegisterFn ExecutionEngine::ExceptionTableRegister = 0;
+
+
+ExecutionEngine::ExecutionEngine(ModuleProvider *P) : LazyFunctionCreator(0) {
+ LazyCompilationDisabled = false;
+ GVCompilationDisabled = false;
+ SymbolSearchingDisabled = false;
+ DlsymStubsEnabled = false;
+ Modules.push_back(P);
+ assert(P && "ModuleProvider is null?");
+}
+
+ExecutionEngine::~ExecutionEngine() {
+ clearAllGlobalMappings();
+ for (unsigned i = 0, e = Modules.size(); i != e; ++i)
+ delete Modules[i];
+}
+
+char* ExecutionEngine::getMemoryForGV(const GlobalVariable* GV) {
+ const Type *ElTy = GV->getType()->getElementType();
+ size_t GVSize = (size_t)getTargetData()->getTypeAllocSize(ElTy);
+ return new char[GVSize];
+}
+
+/// removeModuleProvider - Remove a ModuleProvider from the list of modules.
+/// Relases the Module from the ModuleProvider, materializing it in the
+/// process, and returns the materialized Module.
+Module* ExecutionEngine::removeModuleProvider(ModuleProvider *P,
+ std::string *ErrInfo) {
+ for(SmallVector<ModuleProvider *, 1>::iterator I = Modules.begin(),
+ E = Modules.end(); I != E; ++I) {
+ ModuleProvider *MP = *I;
+ if (MP == P) {
+ Modules.erase(I);
+ clearGlobalMappingsFromModule(MP->getModule());
+ return MP->releaseModule(ErrInfo);
+ }
+ }
+ return NULL;
+}
+
+/// deleteModuleProvider - Remove a ModuleProvider from the list of modules,
+/// and deletes the ModuleProvider and owned Module. Avoids materializing
+/// the underlying module.
+void ExecutionEngine::deleteModuleProvider(ModuleProvider *P,
+ std::string *ErrInfo) {
+ for(SmallVector<ModuleProvider *, 1>::iterator I = Modules.begin(),
+ E = Modules.end(); I != E; ++I) {
+ ModuleProvider *MP = *I;
+ if (MP == P) {
+ Modules.erase(I);
+ clearGlobalMappingsFromModule(MP->getModule());
+ delete MP;
+ return;
+ }
+ }
+}
+
+/// FindFunctionNamed - Search all of the active modules to find the one that
+/// defines FnName. This is very slow operation and shouldn't be used for
+/// general code.
+Function *ExecutionEngine::FindFunctionNamed(const char *FnName) {
+ for (unsigned i = 0, e = Modules.size(); i != e; ++i) {
+ if (Function *F = Modules[i]->getModule()->getFunction(FnName))
+ return F;
+ }
+ return 0;
+}
+
+
+/// addGlobalMapping - Tell the execution engine that the specified global is
+/// at the specified location. This is used internally as functions are JIT'd
+/// and as global variables are laid out in memory. It can and should also be
+/// used by clients of the EE that want to have an LLVM global overlay
+/// existing data in memory.
+void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
+ MutexGuard locked(lock);
+
+ DOUT << "JIT: Map \'" << GV->getNameStart() << "\' to [" << Addr << "]\n";
+ void *&CurVal = state.getGlobalAddressMap(locked)[GV];
+ assert((CurVal == 0 || Addr == 0) && "GlobalMapping already established!");
+ CurVal = Addr;
+
+ // If we are using the reverse mapping, add it too
+ if (!state.getGlobalAddressReverseMap(locked).empty()) {
+ const GlobalValue *&V = state.getGlobalAddressReverseMap(locked)[Addr];
+ assert((V == 0 || GV == 0) && "GlobalMapping already established!");
+ V = GV;
+ }
+}
+
+/// clearAllGlobalMappings - Clear all global mappings and start over again
+/// use in dynamic compilation scenarios when you want to move globals
+void ExecutionEngine::clearAllGlobalMappings() {
+ MutexGuard locked(lock);
+
+ state.getGlobalAddressMap(locked).clear();
+ state.getGlobalAddressReverseMap(locked).clear();
+}
+
+/// clearGlobalMappingsFromModule - Clear all global mappings that came from a
+/// particular module, because it has been removed from the JIT.
+void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) {
+ MutexGuard locked(lock);
+
+ for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) {
+ state.getGlobalAddressMap(locked).erase(FI);
+ state.getGlobalAddressReverseMap(locked).erase(FI);
+ }
+ for (Module::global_iterator GI = M->global_begin(), GE = M->global_end();
+ GI != GE; ++GI) {
+ state.getGlobalAddressMap(locked).erase(GI);
+ state.getGlobalAddressReverseMap(locked).erase(GI);
+ }
+}
+
+/// updateGlobalMapping - Replace an existing mapping for GV with a new
+/// address. This updates both maps as required. If "Addr" is null, the
+/// entry for the global is removed from the mappings.
+void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
+ MutexGuard locked(lock);
+
+ std::map<const GlobalValue*, void *> &Map = state.getGlobalAddressMap(locked);
+
+ // Deleting from the mapping?
+ if (Addr == 0) {
+ std::map<const GlobalValue*, void *>::iterator I = Map.find(GV);
+ void *OldVal;
+ if (I == Map.end())
+ OldVal = 0;
+ else {
+ OldVal = I->second;
+ Map.erase(I);
+ }
+
+ if (!state.getGlobalAddressReverseMap(locked).empty())
+ state.getGlobalAddressReverseMap(locked).erase(Addr);
+ return OldVal;
+ }
+
+ void *&CurVal = Map[GV];
+ void *OldVal = CurVal;
+
+ if (CurVal && !state.getGlobalAddressReverseMap(locked).empty())
+ state.getGlobalAddressReverseMap(locked).erase(CurVal);
+ CurVal = Addr;
+
+ // If we are using the reverse mapping, add it too
+ if (!state.getGlobalAddressReverseMap(locked).empty()) {
+ const GlobalValue *&V = state.getGlobalAddressReverseMap(locked)[Addr];
+ assert((V == 0 || GV == 0) && "GlobalMapping already established!");
+ V = GV;
+ }
+ return OldVal;
+}
+
+/// getPointerToGlobalIfAvailable - This returns the address of the specified
+/// global value if it is has already been codegen'd, otherwise it returns null.
+///
+void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) {
+ MutexGuard locked(lock);
+
+ std::map<const GlobalValue*, void*>::iterator I =
+ state.getGlobalAddressMap(locked).find(GV);
+ return I != state.getGlobalAddressMap(locked).end() ? I->second : 0;
+}
+
+/// getGlobalValueAtAddress - Return the LLVM global value object that starts
+/// at the specified address.
+///
+const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
+ MutexGuard locked(lock);
+
+ // If we haven't computed the reverse mapping yet, do so first.
+ if (state.getGlobalAddressReverseMap(locked).empty()) {
+ for (std::map<const GlobalValue*, void *>::iterator
+ I = state.getGlobalAddressMap(locked).begin(),
+ E = state.getGlobalAddressMap(locked).end(); I != E; ++I)
+ state.getGlobalAddressReverseMap(locked).insert(std::make_pair(I->second,
+ I->first));
+ }
+
+ std::map<void *, const GlobalValue*>::iterator I =
+ state.getGlobalAddressReverseMap(locked).find(Addr);
+ return I != state.getGlobalAddressReverseMap(locked).end() ? I->second : 0;
+}
+
+// CreateArgv - Turn a vector of strings into a nice argv style array of
+// pointers to null terminated strings.
+//
+static void *CreateArgv(ExecutionEngine *EE,
+ const std::vector<std::string> &InputArgv) {
+ unsigned PtrSize = EE->getTargetData()->getPointerSize();
+ char *Result = new char[(InputArgv.size()+1)*PtrSize];
+
+ DOUT << "JIT: ARGV = " << (void*)Result << "\n";
+ const Type *SBytePtr = PointerType::getUnqual(Type::Int8Ty);
+
+ for (unsigned i = 0; i != InputArgv.size(); ++i) {
+ unsigned Size = InputArgv[i].size()+1;
+ char *Dest = new char[Size];
+ DOUT << "JIT: ARGV[" << i << "] = " << (void*)Dest << "\n";
+
+ std::copy(InputArgv[i].begin(), InputArgv[i].end(), Dest);
+ Dest[Size-1] = 0;
+
+ // Endian safe: Result[i] = (PointerTy)Dest;
+ EE->StoreValueToMemory(PTOGV(Dest), (GenericValue*)(Result+i*PtrSize),
+ SBytePtr);
+ }
+
+ // Null terminate it
+ EE->StoreValueToMemory(PTOGV(0),
+ (GenericValue*)(Result+InputArgv.size()*PtrSize),
+ SBytePtr);
+ return Result;
+}
+
+
+/// runStaticConstructorsDestructors - This method is used to execute all of
+/// the static constructors or destructors for a module, depending on the
+/// value of isDtors.
+void ExecutionEngine::runStaticConstructorsDestructors(Module *module, bool isDtors) {
+ const char *Name = isDtors ? "llvm.global_dtors" : "llvm.global_ctors";
+
+ // Execute global ctors/dtors for each module in the program.
+
+ GlobalVariable *GV = module->getNamedGlobal(Name);
+
+ // If this global has internal linkage, or if it has a use, then it must be
+ // an old-style (llvmgcc3) static ctor with __main linked in and in use. If
+ // this is the case, don't execute any of the global ctors, __main will do
+ // it.
+ if (!GV || GV->isDeclaration() || GV->hasLocalLinkage()) return;
+
+ // Should be an array of '{ int, void ()* }' structs. The first value is
+ // the init priority, which we ignore.
+ ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (!InitList) return;
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (ConstantStruct *CS =
+ dyn_cast<ConstantStruct>(InitList->getOperand(i))) {
+ if (CS->getNumOperands() != 2) return; // Not array of 2-element structs.
+
+ Constant *FP = CS->getOperand(1);
+ if (FP->isNullValue())
+ break; // Found a null terminator, exit.
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
+ if (CE->isCast())
+ FP = CE->getOperand(0);
+ if (Function *F = dyn_cast<Function>(FP)) {
+ // Execute the ctor/dtor function!
+ runFunction(F, std::vector<GenericValue>());
+ }
+ }
+}
+
+/// runStaticConstructorsDestructors - This method is used to execute all of
+/// the static constructors or destructors for a program, depending on the
+/// value of isDtors.
+void ExecutionEngine::runStaticConstructorsDestructors(bool isDtors) {
+ // Execute global ctors/dtors for each module in the program.
+ for (unsigned m = 0, e = Modules.size(); m != e; ++m)
+ runStaticConstructorsDestructors(Modules[m]->getModule(), isDtors);
+}
+
+#ifndef NDEBUG
+/// isTargetNullPtr - Return whether the target pointer stored at Loc is null.
+static bool isTargetNullPtr(ExecutionEngine *EE, void *Loc) {
+ unsigned PtrSize = EE->getTargetData()->getPointerSize();
+ for (unsigned i = 0; i < PtrSize; ++i)
+ if (*(i + (uint8_t*)Loc))
+ return false;
+ return true;
+}
+#endif
+
+/// runFunctionAsMain - This is a helper function which wraps runFunction to
+/// handle the common task of starting up main with the specified argc, argv,
+/// and envp parameters.
+int ExecutionEngine::runFunctionAsMain(Function *Fn,
+ const std::vector<std::string> &argv,
+ const char * const * envp) {
+ std::vector<GenericValue> GVArgs;
+ GenericValue GVArgc;
+ GVArgc.IntVal = APInt(32, argv.size());
+
+ // Check main() type
+ unsigned NumArgs = Fn->getFunctionType()->getNumParams();
+ const FunctionType *FTy = Fn->getFunctionType();
+ const Type* PPInt8Ty =
+ PointerType::getUnqual(PointerType::getUnqual(Type::Int8Ty));
+ switch (NumArgs) {
+ case 3:
+ if (FTy->getParamType(2) != PPInt8Ty) {
+ cerr << "Invalid type for third argument of main() supplied\n";
+ abort();
+ }
+ // FALLS THROUGH
+ case 2:
+ if (FTy->getParamType(1) != PPInt8Ty) {
+ cerr << "Invalid type for second argument of main() supplied\n";
+ abort();
+ }
+ // FALLS THROUGH
+ case 1:
+ if (FTy->getParamType(0) != Type::Int32Ty) {
+ cerr << "Invalid type for first argument of main() supplied\n";
+ abort();
+ }
+ // FALLS THROUGH
+ case 0:
+ if (!isa<IntegerType>(FTy->getReturnType()) &&
+ FTy->getReturnType() != Type::VoidTy) {
+ cerr << "Invalid return type of main() supplied\n";
+ abort();
+ }
+ break;
+ default:
+ cerr << "Invalid number of arguments of main() supplied\n";
+ abort();
+ }
+
+ if (NumArgs) {
+ GVArgs.push_back(GVArgc); // Arg #0 = argc.
+ if (NumArgs > 1) {
+ GVArgs.push_back(PTOGV(CreateArgv(this, argv))); // Arg #1 = argv.
+ assert(!isTargetNullPtr(this, GVTOP(GVArgs[1])) &&
+ "argv[0] was null after CreateArgv");
+ if (NumArgs > 2) {
+ std::vector<std::string> EnvVars;
+ for (unsigned i = 0; envp[i]; ++i)
+ EnvVars.push_back(envp[i]);
+ GVArgs.push_back(PTOGV(CreateArgv(this, EnvVars))); // Arg #2 = envp.
+ }
+ }
+ }
+ return runFunction(Fn, GVArgs).IntVal.getZExtValue();
+}
+
+/// If possible, create a JIT, unless the caller specifically requests an
+/// Interpreter or there's an error. If even an Interpreter cannot be created,
+/// NULL is returned.
+///
+ExecutionEngine *ExecutionEngine::create(ModuleProvider *MP,
+ bool ForceInterpreter,
+ std::string *ErrorStr,
+ CodeGenOpt::Level OptLevel) {
+ ExecutionEngine *EE = 0;
+
+ // Make sure we can resolve symbols in the program as well. The zero arg
+ // to the function tells DynamicLibrary to load the program, not a library.
+ if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr))
+ return 0;
+
+ // Unless the interpreter was explicitly selected, try making a JIT.
+ if (!ForceInterpreter && JITCtor)
+ EE = JITCtor(MP, ErrorStr, OptLevel);
+
+ // If we can't make a JIT, make an interpreter instead.
+ if (EE == 0 && InterpCtor)
+ EE = InterpCtor(MP, ErrorStr, OptLevel);
+
+ return EE;
+}
+
+ExecutionEngine *ExecutionEngine::create(Module *M) {
+ return create(new ExistingModuleProvider(M));
+}
+
+/// getPointerToGlobal - This returns the address of the specified global
+/// value. This may involve code generation if it's a function.
+///
+void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
+ if (Function *F = const_cast<Function*>(dyn_cast<Function>(GV)))
+ return getPointerToFunction(F);
+
+ MutexGuard locked(lock);
+ void *p = state.getGlobalAddressMap(locked)[GV];
+ if (p)
+ return p;
+
+ // Global variable might have been added since interpreter started.
+ if (GlobalVariable *GVar =
+ const_cast<GlobalVariable *>(dyn_cast<GlobalVariable>(GV)))
+ EmitGlobalVariable(GVar);
+ else
+ assert(0 && "Global hasn't had an address allocated yet!");
+ return state.getGlobalAddressMap(locked)[GV];
+}
+
+/// This function converts a Constant* into a GenericValue. The interesting
+/// part is if C is a ConstantExpr.
+/// @brief Get a GenericValue for a Constant*
+GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
+ // If its undefined, return the garbage.
+ if (isa<UndefValue>(C))
+ return GenericValue();
+
+ // If the value is a ConstantExpr
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ Constant *Op0 = CE->getOperand(0);
+ switch (CE->getOpcode()) {
+ case Instruction::GetElementPtr: {
+ // Compute the index
+ GenericValue Result = getConstantValue(Op0);
+ SmallVector<Value*, 8> Indices(CE->op_begin()+1, CE->op_end());
+ uint64_t Offset =
+ TD->getIndexedOffset(Op0->getType(), &Indices[0], Indices.size());
+
+ char* tmp = (char*) Result.PointerVal;
+ Result = PTOGV(tmp + Offset);
+ return Result;
+ }
+ case Instruction::Trunc: {
+ GenericValue GV = getConstantValue(Op0);
+ uint32_t BitWidth = cast<IntegerType>(CE->getType())->getBitWidth();
+ GV.IntVal = GV.IntVal.trunc(BitWidth);
+ return GV;
+ }
+ case Instruction::ZExt: {
+ GenericValue GV = getConstantValue(Op0);
+ uint32_t BitWidth = cast<IntegerType>(CE->getType())->getBitWidth();
+ GV.IntVal = GV.IntVal.zext(BitWidth);
+ return GV;
+ }
+ case Instruction::SExt: {
+ GenericValue GV = getConstantValue(Op0);
+ uint32_t BitWidth = cast<IntegerType>(CE->getType())->getBitWidth();
+ GV.IntVal = GV.IntVal.sext(BitWidth);
+ return GV;
+ }
+ case Instruction::FPTrunc: {
+ // FIXME long double
+ GenericValue GV = getConstantValue(Op0);
+ GV.FloatVal = float(GV.DoubleVal);
+ return GV;
+ }
+ case Instruction::FPExt:{
+ // FIXME long double
+ GenericValue GV = getConstantValue(Op0);
+ GV.DoubleVal = double(GV.FloatVal);
+ return GV;
+ }
+ case Instruction::UIToFP: {
+ GenericValue GV = getConstantValue(Op0);
+ if (CE->getType() == Type::FloatTy)
+ GV.FloatVal = float(GV.IntVal.roundToDouble());
+ else if (CE->getType() == Type::DoubleTy)
+ GV.DoubleVal = GV.IntVal.roundToDouble();
+ else if (CE->getType() == Type::X86_FP80Ty) {
+ const uint64_t zero[] = {0, 0};
+ APFloat apf = APFloat(APInt(80, 2, zero));
+ (void)apf.convertFromAPInt(GV.IntVal,
+ false,
+ APFloat::rmNearestTiesToEven);
+ GV.IntVal = apf.bitcastToAPInt();
+ }
+ return GV;
+ }
+ case Instruction::SIToFP: {
+ GenericValue GV = getConstantValue(Op0);
+ if (CE->getType() == Type::FloatTy)
+ GV.FloatVal = float(GV.IntVal.signedRoundToDouble());
+ else if (CE->getType() == Type::DoubleTy)
+ GV.DoubleVal = GV.IntVal.signedRoundToDouble();
+ else if (CE->getType() == Type::X86_FP80Ty) {
+ const uint64_t zero[] = { 0, 0};
+ APFloat apf = APFloat(APInt(80, 2, zero));
+ (void)apf.convertFromAPInt(GV.IntVal,
+ true,
+ APFloat::rmNearestTiesToEven);
+ GV.IntVal = apf.bitcastToAPInt();
+ }
+ return GV;
+ }
+ case Instruction::FPToUI: // double->APInt conversion handles sign
+ case Instruction::FPToSI: {
+ GenericValue GV = getConstantValue(Op0);
+ uint32_t BitWidth = cast<IntegerType>(CE->getType())->getBitWidth();
+ if (Op0->getType() == Type::FloatTy)
+ GV.IntVal = APIntOps::RoundFloatToAPInt(GV.FloatVal, BitWidth);
+ else if (Op0->getType() == Type::DoubleTy)
+ GV.IntVal = APIntOps::RoundDoubleToAPInt(GV.DoubleVal, BitWidth);
+ else if (Op0->getType() == Type::X86_FP80Ty) {
+ APFloat apf = APFloat(GV.IntVal);
+ uint64_t v;
+ bool ignored;
+ (void)apf.convertToInteger(&v, BitWidth,
+ CE->getOpcode()==Instruction::FPToSI,
+ APFloat::rmTowardZero, &ignored);
+ GV.IntVal = v; // endian?
+ }
+ return GV;
+ }
+ case Instruction::PtrToInt: {
+ GenericValue GV = getConstantValue(Op0);
+ uint32_t PtrWidth = TD->getPointerSizeInBits();
+ GV.IntVal = APInt(PtrWidth, uintptr_t(GV.PointerVal));
+ return GV;
+ }
+ case Instruction::IntToPtr: {
+ GenericValue GV = getConstantValue(Op0);
+ uint32_t PtrWidth = TD->getPointerSizeInBits();
+ if (PtrWidth != GV.IntVal.getBitWidth())
+ GV.IntVal = GV.IntVal.zextOrTrunc(PtrWidth);
+ assert(GV.IntVal.getBitWidth() <= 64 && "Bad pointer width");
+ GV.PointerVal = PointerTy(uintptr_t(GV.IntVal.getZExtValue()));
+ return GV;
+ }
+ case Instruction::BitCast: {
+ GenericValue GV = getConstantValue(Op0);
+ const Type* DestTy = CE->getType();
+ switch (Op0->getType()->getTypeID()) {
+ default: assert(0 && "Invalid bitcast operand");
+ case Type::IntegerTyID:
+ assert(DestTy->isFloatingPoint() && "invalid bitcast");
+ if (DestTy == Type::FloatTy)
+ GV.FloatVal = GV.IntVal.bitsToFloat();
+ else if (DestTy == Type::DoubleTy)
+ GV.DoubleVal = GV.IntVal.bitsToDouble();
+ break;
+ case Type::FloatTyID:
+ assert(DestTy == Type::Int32Ty && "Invalid bitcast");
+ GV.IntVal.floatToBits(GV.FloatVal);
+ break;
+ case Type::DoubleTyID:
+ assert(DestTy == Type::Int64Ty && "Invalid bitcast");
+ GV.IntVal.doubleToBits(GV.DoubleVal);
+ break;
+ case Type::PointerTyID:
+ assert(isa<PointerType>(DestTy) && "Invalid bitcast");
+ break; // getConstantValue(Op0) above already converted it
+ }
+ return GV;
+ }
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ GenericValue LHS = getConstantValue(Op0);
+ GenericValue RHS = getConstantValue(CE->getOperand(1));
+ GenericValue GV;
+ switch (CE->getOperand(0)->getType()->getTypeID()) {
+ default: assert(0 && "Bad add type!"); abort();
+ case Type::IntegerTyID:
+ switch (CE->getOpcode()) {
+ default: assert(0 && "Invalid integer opcode");
+ case Instruction::Add: GV.IntVal = LHS.IntVal + RHS.IntVal; break;
+ case Instruction::Sub: GV.IntVal = LHS.IntVal - RHS.IntVal; break;
+ case Instruction::Mul: GV.IntVal = LHS.IntVal * RHS.IntVal; break;
+ case Instruction::UDiv:GV.IntVal = LHS.IntVal.udiv(RHS.IntVal); break;
+ case Instruction::SDiv:GV.IntVal = LHS.IntVal.sdiv(RHS.IntVal); break;
+ case Instruction::URem:GV.IntVal = LHS.IntVal.urem(RHS.IntVal); break;
+ case Instruction::SRem:GV.IntVal = LHS.IntVal.srem(RHS.IntVal); break;
+ case Instruction::And: GV.IntVal = LHS.IntVal & RHS.IntVal; break;
+ case Instruction::Or: GV.IntVal = LHS.IntVal | RHS.IntVal; break;
+ case Instruction::Xor: GV.IntVal = LHS.IntVal ^ RHS.IntVal; break;
+ }
+ break;
+ case Type::FloatTyID:
+ switch (CE->getOpcode()) {
+ default: assert(0 && "Invalid float opcode"); abort();
+ case Instruction::Add:
+ GV.FloatVal = LHS.FloatVal + RHS.FloatVal; break;
+ case Instruction::Sub:
+ GV.FloatVal = LHS.FloatVal - RHS.FloatVal; break;
+ case Instruction::Mul:
+ GV.FloatVal = LHS.FloatVal * RHS.FloatVal; break;
+ case Instruction::FDiv:
+ GV.FloatVal = LHS.FloatVal / RHS.FloatVal; break;
+ case Instruction::FRem:
+ GV.FloatVal = ::fmodf(LHS.FloatVal,RHS.FloatVal); break;
+ }
+ break;
+ case Type::DoubleTyID:
+ switch (CE->getOpcode()) {
+ default: assert(0 && "Invalid double opcode"); abort();
+ case Instruction::Add:
+ GV.DoubleVal = LHS.DoubleVal + RHS.DoubleVal; break;
+ case Instruction::Sub:
+ GV.DoubleVal = LHS.DoubleVal - RHS.DoubleVal; break;
+ case Instruction::Mul:
+ GV.DoubleVal = LHS.DoubleVal * RHS.DoubleVal; break;
+ case Instruction::FDiv:
+ GV.DoubleVal = LHS.DoubleVal / RHS.DoubleVal; break;
+ case Instruction::FRem:
+ GV.DoubleVal = ::fmod(LHS.DoubleVal,RHS.DoubleVal); break;
+ }
+ break;
+ case Type::X86_FP80TyID:
+ case Type::PPC_FP128TyID:
+ case Type::FP128TyID: {
+ APFloat apfLHS = APFloat(LHS.IntVal);
+ switch (CE->getOpcode()) {
+ default: assert(0 && "Invalid long double opcode"); abort();
+ case Instruction::Add:
+ apfLHS.add(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+ GV.IntVal = apfLHS.bitcastToAPInt();
+ break;
+ case Instruction::Sub:
+ apfLHS.subtract(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+ GV.IntVal = apfLHS.bitcastToAPInt();
+ break;
+ case Instruction::Mul:
+ apfLHS.multiply(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+ GV.IntVal = apfLHS.bitcastToAPInt();
+ break;
+ case Instruction::FDiv:
+ apfLHS.divide(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+ GV.IntVal = apfLHS.bitcastToAPInt();
+ break;
+ case Instruction::FRem:
+ apfLHS.mod(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+ GV.IntVal = apfLHS.bitcastToAPInt();
+ break;
+ }
+ }
+ break;
+ }
+ return GV;
+ }
+ default:
+ break;
+ }
+ cerr << "ConstantExpr not handled: " << *CE << "\n";
+ abort();
+ }
+
+ GenericValue Result;
+ switch (C->getType()->getTypeID()) {
+ case Type::FloatTyID:
+ Result.FloatVal = cast<ConstantFP>(C)->getValueAPF().convertToFloat();
+ break;
+ case Type::DoubleTyID:
+ Result.DoubleVal = cast<ConstantFP>(C)->getValueAPF().convertToDouble();
+ break;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ Result.IntVal = cast <ConstantFP>(C)->getValueAPF().bitcastToAPInt();
+ break;
+ case Type::IntegerTyID:
+ Result.IntVal = cast<ConstantInt>(C)->getValue();
+ break;
+ case Type::PointerTyID:
+ if (isa<ConstantPointerNull>(C))
+ Result.PointerVal = 0;
+ else if (const Function *F = dyn_cast<Function>(C))
+ Result = PTOGV(getPointerToFunctionOrStub(const_cast<Function*>(F)));
+ else if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(C))
+ Result = PTOGV(getOrEmitGlobalVariable(const_cast<GlobalVariable*>(GV)));
+ else
+ assert(0 && "Unknown constant pointer type!");
+ break;
+ default:
+ cerr << "ERROR: Constant unimplemented for type: " << *C->getType() << "\n";
+ abort();
+ }
+ return Result;
+}
+
+/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
+/// with the integer held in IntVal.
+static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
+ unsigned StoreBytes) {
+ assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
+ uint8_t *Src = (uint8_t *)IntVal.getRawData();
+
+ if (sys::isLittleEndianHost())
+ // Little-endian host - the source is ordered from LSB to MSB. Order the
+ // destination from LSB to MSB: Do a straight copy.
+ memcpy(Dst, Src, StoreBytes);
+ else {
+ // Big-endian host - the source is an array of 64 bit words ordered from
+ // LSW to MSW. Each word is ordered from MSB to LSB. Order the destination
+ // from MSB to LSB: Reverse the word order, but not the bytes in a word.
+ while (StoreBytes > sizeof(uint64_t)) {
+ StoreBytes -= sizeof(uint64_t);
+ // May not be aligned so use memcpy.
+ memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
+ Src += sizeof(uint64_t);
+ }
+
+ memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
+ }
+}
+
+/// StoreValueToMemory - Stores the data in Val of type Ty at address Ptr. Ptr
+/// is the address of the memory at which to store Val, cast to GenericValue *.
+/// It is not a pointer to a GenericValue containing the address at which to
+/// store Val.
+void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
+ GenericValue *Ptr, const Type *Ty) {
+ const unsigned StoreBytes = getTargetData()->getTypeStoreSize(Ty);
+
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID:
+ StoreIntToMemory(Val.IntVal, (uint8_t*)Ptr, StoreBytes);
+ break;
+ case Type::FloatTyID:
+ *((float*)Ptr) = Val.FloatVal;
+ break;
+ case Type::DoubleTyID:
+ *((double*)Ptr) = Val.DoubleVal;
+ break;
+ case Type::X86_FP80TyID:
+ memcpy(Ptr, Val.IntVal.getRawData(), 10);
+ break;
+ case Type::PointerTyID:
+ // Ensure 64 bit target pointers are fully initialized on 32 bit hosts.
+ if (StoreBytes != sizeof(PointerTy))
+ memset(Ptr, 0, StoreBytes);
+
+ *((PointerTy*)Ptr) = Val.PointerVal;
+ break;
+ default:
+ cerr << "Cannot store value of type " << *Ty << "!\n";
+ }
+
+ if (sys::isLittleEndianHost() != getTargetData()->isLittleEndian())
+ // Host and target are different endian - reverse the stored bytes.
+ std::reverse((uint8_t*)Ptr, StoreBytes + (uint8_t*)Ptr);
+}
+
+/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
+/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
+static void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
+ assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
+ uint8_t *Dst = (uint8_t *)IntVal.getRawData();
+
+ if (sys::isLittleEndianHost())
+ // Little-endian host - the destination must be ordered from LSB to MSB.
+ // The source is ordered from LSB to MSB: Do a straight copy.
+ memcpy(Dst, Src, LoadBytes);
+ else {
+ // Big-endian - the destination is an array of 64 bit words ordered from
+ // LSW to MSW. Each word must be ordered from MSB to LSB. The source is
+ // ordered from MSB to LSB: Reverse the word order, but not the bytes in
+ // a word.
+ while (LoadBytes > sizeof(uint64_t)) {
+ LoadBytes -= sizeof(uint64_t);
+ // May not be aligned so use memcpy.
+ memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
+ Dst += sizeof(uint64_t);
+ }
+
+ memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
+ }
+}
+
+/// FIXME: document
+///
+void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
+ GenericValue *Ptr,
+ const Type *Ty) {
+ const unsigned LoadBytes = getTargetData()->getTypeStoreSize(Ty);
+
+ if (sys::isLittleEndianHost() != getTargetData()->isLittleEndian()) {
+ // Host and target are different endian - reverse copy the stored
+ // bytes into a buffer, and load from that.
+ uint8_t *Src = (uint8_t*)Ptr;
+ uint8_t *Buf = (uint8_t*)alloca(LoadBytes);
+ std::reverse_copy(Src, Src + LoadBytes, Buf);
+ Ptr = (GenericValue*)Buf;
+ }
+
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID:
+ // An APInt with all words initially zero.
+ Result.IntVal = APInt(cast<IntegerType>(Ty)->getBitWidth(), 0);
+ LoadIntFromMemory(Result.IntVal, (uint8_t*)Ptr, LoadBytes);
+ break;
+ case Type::FloatTyID:
+ Result.FloatVal = *((float*)Ptr);
+ break;
+ case Type::DoubleTyID:
+ Result.DoubleVal = *((double*)Ptr);
+ break;
+ case Type::PointerTyID:
+ Result.PointerVal = *((PointerTy*)Ptr);
+ break;
+ case Type::X86_FP80TyID: {
+ // This is endian dependent, but it will only work on x86 anyway.
+ // FIXME: Will not trap if loading a signaling NaN.
+ uint64_t y[2];
+ memcpy(y, Ptr, 10);
+ Result.IntVal = APInt(80, 2, y);
+ break;
+ }
+ default:
+ cerr << "Cannot load value of type " << *Ty << "!\n";
+ abort();
+ }
+}
+
+// InitializeMemory - Recursive function to apply a Constant value into the
+// specified memory location...
+//
+void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) {
+ DOUT << "JIT: Initializing " << Addr << " ";
+ DEBUG(Init->dump());
+ if (isa<UndefValue>(Init)) {
+ return;
+ } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(Init)) {
+ unsigned ElementSize =
+ getTargetData()->getTypeAllocSize(CP->getType()->getElementType());
+ for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
+ InitializeMemory(CP->getOperand(i), (char*)Addr+i*ElementSize);
+ return;
+ } else if (isa<ConstantAggregateZero>(Init)) {
+ memset(Addr, 0, (size_t)getTargetData()->getTypeAllocSize(Init->getType()));
+ return;
+ } else if (const ConstantArray *CPA = dyn_cast<ConstantArray>(Init)) {
+ unsigned ElementSize =
+ getTargetData()->getTypeAllocSize(CPA->getType()->getElementType());
+ for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i)
+ InitializeMemory(CPA->getOperand(i), (char*)Addr+i*ElementSize);
+ return;
+ } else if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(Init)) {
+ const StructLayout *SL =
+ getTargetData()->getStructLayout(cast<StructType>(CPS->getType()));
+ for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i)
+ InitializeMemory(CPS->getOperand(i), (char*)Addr+SL->getElementOffset(i));
+ return;
+ } else if (Init->getType()->isFirstClassType()) {
+ GenericValue Val = getConstantValue(Init);
+ StoreValueToMemory(Val, (GenericValue*)Addr, Init->getType());
+ return;
+ }
+
+ cerr << "Bad Type: " << *Init->getType() << "\n";
+ assert(0 && "Unknown constant type to initialize memory with!");
+}
+
+/// EmitGlobals - Emit all of the global variables to memory, storing their
+/// addresses into GlobalAddress. This must make sure to copy the contents of
+/// their initializers into the memory.
+///
+void ExecutionEngine::emitGlobals() {
+
+ // Loop over all of the global variables in the program, allocating the memory
+ // to hold them. If there is more than one module, do a prepass over globals
+ // to figure out how the different modules should link together.
+ //
+ std::map<std::pair<std::string, const Type*>,
+ const GlobalValue*> LinkedGlobalsMap;
+
+ if (Modules.size() != 1) {
+ for (unsigned m = 0, e = Modules.size(); m != e; ++m) {
+ Module &M = *Modules[m]->getModule();
+ for (Module::const_global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I) {
+ const GlobalValue *GV = I;
+ if (GV->hasLocalLinkage() || GV->isDeclaration() ||
+ GV->hasAppendingLinkage() || !GV->hasName())
+ continue;// Ignore external globals and globals with internal linkage.
+
+ const GlobalValue *&GVEntry =
+ LinkedGlobalsMap[std::make_pair(GV->getName(), GV->getType())];
+
+ // If this is the first time we've seen this global, it is the canonical
+ // version.
+ if (!GVEntry) {
+ GVEntry = GV;
+ continue;
+ }
+
+ // If the existing global is strong, never replace it.
+ if (GVEntry->hasExternalLinkage() ||
+ GVEntry->hasDLLImportLinkage() ||
+ GVEntry->hasDLLExportLinkage())
+ continue;
+
+ // Otherwise, we know it's linkonce/weak, replace it if this is a strong
+ // symbol. FIXME is this right for common?
+ if (GV->hasExternalLinkage() || GVEntry->hasExternalWeakLinkage())
+ GVEntry = GV;
+ }
+ }
+ }
+
+ std::vector<const GlobalValue*> NonCanonicalGlobals;
+ for (unsigned m = 0, e = Modules.size(); m != e; ++m) {
+ Module &M = *Modules[m]->getModule();
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ // In the multi-module case, see what this global maps to.
+ if (!LinkedGlobalsMap.empty()) {
+ if (const GlobalValue *GVEntry =
+ LinkedGlobalsMap[std::make_pair(I->getName(), I->getType())]) {
+ // If something else is the canonical global, ignore this one.
+ if (GVEntry != &*I) {
+ NonCanonicalGlobals.push_back(I);
+ continue;
+ }
+ }
+ }
+
+ if (!I->isDeclaration()) {
+ addGlobalMapping(I, getMemoryForGV(I));
+ } else {
+ // External variable reference. Try to use the dynamic loader to
+ // get a pointer to it.
+ if (void *SymAddr =
+ sys::DynamicLibrary::SearchForAddressOfSymbol(I->getName().c_str()))
+ addGlobalMapping(I, SymAddr);
+ else {
+ cerr << "Could not resolve external global address: "
+ << I->getName() << "\n";
+ abort();
+ }
+ }
+ }
+
+ // If there are multiple modules, map the non-canonical globals to their
+ // canonical location.
+ if (!NonCanonicalGlobals.empty()) {
+ for (unsigned i = 0, e = NonCanonicalGlobals.size(); i != e; ++i) {
+ const GlobalValue *GV = NonCanonicalGlobals[i];
+ const GlobalValue *CGV =
+ LinkedGlobalsMap[std::make_pair(GV->getName(), GV->getType())];
+ void *Ptr = getPointerToGlobalIfAvailable(CGV);
+ assert(Ptr && "Canonical global wasn't codegen'd!");
+ addGlobalMapping(GV, Ptr);
+ }
+ }
+
+ // Now that all of the globals are set up in memory, loop through them all
+ // and initialize their contents.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (!I->isDeclaration()) {
+ if (!LinkedGlobalsMap.empty()) {
+ if (const GlobalValue *GVEntry =
+ LinkedGlobalsMap[std::make_pair(I->getName(), I->getType())])
+ if (GVEntry != &*I) // Not the canonical variable.
+ continue;
+ }
+ EmitGlobalVariable(I);
+ }
+ }
+ }
+}
+
+// EmitGlobalVariable - This method emits the specified global variable to the
+// address specified in GlobalAddresses, or allocates new memory if it's not
+// already in the map.
+void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) {
+ void *GA = getPointerToGlobalIfAvailable(GV);
+
+ if (GA == 0) {
+ // If it's not already specified, allocate memory for the global.
+ GA = getMemoryForGV(GV);
+ addGlobalMapping(GV, GA);
+ }
+
+ // Don't initialize if it's thread local, let the client do it.
+ if (!GV->isThreadLocal())
+ InitializeMemory(GV->getInitializer(), GA);
+
+ const Type *ElTy = GV->getType()->getElementType();
+ size_t GVSize = (size_t)getTargetData()->getTypeAllocSize(ElTy);
+ NumInitBytes += (unsigned)GVSize;
+ ++NumGlobals;
+}
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
new file mode 100644
index 0000000..83397a58
--- /dev/null
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -0,0 +1,206 @@
+//===-- ExecutionEngineBindings.cpp - C bindings for EEs ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the C bindings for the ExecutionEngine library.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "llvm-c/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include <cstring>
+
+using namespace llvm;
+
+/*===-- Operations on generic values --------------------------------------===*/
+
+LLVMGenericValueRef LLVMCreateGenericValueOfInt(LLVMTypeRef Ty,
+ unsigned long long N,
+ int IsSigned) {
+ GenericValue *GenVal = new GenericValue();
+ GenVal->IntVal = APInt(unwrap<IntegerType>(Ty)->getBitWidth(), N, IsSigned);
+ return wrap(GenVal);
+}
+
+LLVMGenericValueRef LLVMCreateGenericValueOfPointer(void *P) {
+ GenericValue *GenVal = new GenericValue();
+ GenVal->PointerVal = P;
+ return wrap(GenVal);
+}
+
+LLVMGenericValueRef LLVMCreateGenericValueOfFloat(LLVMTypeRef TyRef, double N) {
+ GenericValue *GenVal = new GenericValue();
+ switch (unwrap(TyRef)->getTypeID()) {
+ case Type::FloatTyID:
+ GenVal->FloatVal = N;
+ break;
+ case Type::DoubleTyID:
+ GenVal->DoubleVal = N;
+ break;
+ default:
+ assert(0 && "LLVMGenericValueToFloat supports only float and double.");
+ break;
+ }
+ return wrap(GenVal);
+}
+
+unsigned LLVMGenericValueIntWidth(LLVMGenericValueRef GenValRef) {
+ return unwrap(GenValRef)->IntVal.getBitWidth();
+}
+
+unsigned long long LLVMGenericValueToInt(LLVMGenericValueRef GenValRef,
+ int IsSigned) {
+ GenericValue *GenVal = unwrap(GenValRef);
+ if (IsSigned)
+ return GenVal->IntVal.getSExtValue();
+ else
+ return GenVal->IntVal.getZExtValue();
+}
+
+void *LLVMGenericValueToPointer(LLVMGenericValueRef GenVal) {
+ return unwrap(GenVal)->PointerVal;
+}
+
+double LLVMGenericValueToFloat(LLVMTypeRef TyRef, LLVMGenericValueRef GenVal) {
+ switch (unwrap(TyRef)->getTypeID()) {
+ case Type::FloatTyID:
+ return unwrap(GenVal)->FloatVal;
+ case Type::DoubleTyID:
+ return unwrap(GenVal)->DoubleVal;
+ default:
+ assert(0 && "LLVMGenericValueToFloat supports only float and double.");
+ break;
+ }
+ return 0; // Not reached
+}
+
+void LLVMDisposeGenericValue(LLVMGenericValueRef GenVal) {
+ delete unwrap(GenVal);
+}
+
+/*===-- Operations on execution engines -----------------------------------===*/
+
+int LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
+ LLVMModuleProviderRef MP,
+ char **OutError) {
+ std::string Error;
+ if (ExecutionEngine *EE = ExecutionEngine::create(unwrap(MP), false, &Error)){
+ *OutEE = wrap(EE);
+ return 0;
+ }
+ *OutError = strdup(Error.c_str());
+ return 1;
+}
+
+int LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
+ LLVMModuleProviderRef MP,
+ char **OutError) {
+ std::string Error;
+ if (ExecutionEngine *Interp =
+ ExecutionEngine::create(unwrap(MP), true, &Error)) {
+ *OutInterp = wrap(Interp);
+ return 0;
+ }
+ *OutError = strdup(Error.c_str());
+ return 1;
+}
+
+int LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
+ LLVMModuleProviderRef MP,
+ unsigned OptLevel,
+ char **OutError) {
+ std::string Error;
+ if (ExecutionEngine *JIT =
+ ExecutionEngine::createJIT(unwrap(MP), &Error, 0,
+ (CodeGenOpt::Level)OptLevel)) {
+ *OutJIT = wrap(JIT);
+ return 0;
+ }
+ *OutError = strdup(Error.c_str());
+ return 1;
+}
+
+void LLVMDisposeExecutionEngine(LLVMExecutionEngineRef EE) {
+ delete unwrap(EE);
+}
+
+void LLVMRunStaticConstructors(LLVMExecutionEngineRef EE) {
+ unwrap(EE)->runStaticConstructorsDestructors(false);
+}
+
+void LLVMRunStaticDestructors(LLVMExecutionEngineRef EE) {
+ unwrap(EE)->runStaticConstructorsDestructors(true);
+}
+
+int LLVMRunFunctionAsMain(LLVMExecutionEngineRef EE, LLVMValueRef F,
+ unsigned ArgC, const char * const *ArgV,
+ const char * const *EnvP) {
+ std::vector<std::string> ArgVec;
+ for (unsigned I = 0; I != ArgC; ++I)
+ ArgVec.push_back(ArgV[I]);
+
+ return unwrap(EE)->runFunctionAsMain(unwrap<Function>(F), ArgVec, EnvP);
+}
+
+LLVMGenericValueRef LLVMRunFunction(LLVMExecutionEngineRef EE, LLVMValueRef F,
+ unsigned NumArgs,
+ LLVMGenericValueRef *Args) {
+ std::vector<GenericValue> ArgVec;
+ ArgVec.reserve(NumArgs);
+ for (unsigned I = 0; I != NumArgs; ++I)
+ ArgVec.push_back(*unwrap(Args[I]));
+
+ GenericValue *Result = new GenericValue();
+ *Result = unwrap(EE)->runFunction(unwrap<Function>(F), ArgVec);
+ return wrap(Result);
+}
+
+void LLVMFreeMachineCodeForFunction(LLVMExecutionEngineRef EE, LLVMValueRef F) {
+ unwrap(EE)->freeMachineCodeForFunction(unwrap<Function>(F));
+}
+
+void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP){
+ unwrap(EE)->addModuleProvider(unwrap(MP));
+}
+
+int LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE,
+ LLVMModuleProviderRef MP,
+ LLVMModuleRef *OutMod, char **OutError) {
+ std::string Error;
+ if (Module *Gone = unwrap(EE)->removeModuleProvider(unwrap(MP), &Error)) {
+ *OutMod = wrap(Gone);
+ return 0;
+ }
+ if (OutError)
+ *OutError = strdup(Error.c_str());
+ return 1;
+}
+
+int LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
+ LLVMValueRef *OutFn) {
+ if (Function *F = unwrap(EE)->FindFunctionNamed(Name)) {
+ *OutFn = wrap(F);
+ return 0;
+ }
+ return 1;
+}
+
+LLVMTargetDataRef LLVMGetExecutionEngineTargetData(LLVMExecutionEngineRef EE) {
+ return wrap(unwrap(EE)->getTargetData());
+}
+
+void LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global,
+ void* Addr) {
+ unwrap(EE)->addGlobalMapping(unwrap<GlobalValue>(Global), Addr);
+}
+
+void *LLVMGetPointerToGlobal(LLVMExecutionEngineRef EE, LLVMValueRef Global) {
+ return unwrap(EE)->getPointerToGlobal(unwrap<GlobalValue>(Global));
+}
diff --git a/lib/ExecutionEngine/Interpreter/CMakeLists.txt b/lib/ExecutionEngine/Interpreter/CMakeLists.txt
new file mode 100644
index 0000000..626e804
--- /dev/null
+++ b/lib/ExecutionEngine/Interpreter/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_partially_linked_object(LLVMInterpreter
+ Execution.cpp
+ ExternalFunctions.cpp
+ Interpreter.cpp
+ )
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
new file mode 100644
index 0000000..765fed2
--- /dev/null
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -0,0 +1,1382 @@
+//===-- Execution.cpp - Implement code to simulate the program ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the actual instruction interpreter.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "interpreter"
+#include "Interpreter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+using namespace llvm;
+
+STATISTIC(NumDynamicInsts, "Number of dynamic instructions executed");
+static Interpreter *TheEE = 0;
+
+static cl::opt<bool> PrintVolatile("interpreter-print-volatile", cl::Hidden,
+ cl::desc("make the interpreter print every volatile load and store"));
+
+//===----------------------------------------------------------------------===//
+// Various Helper Functions
+//===----------------------------------------------------------------------===//
+
+static inline uint64_t doSignExtension(uint64_t Val, const IntegerType* ITy) {
+ // Determine if the value is signed or not
+ bool isSigned = (Val & (1 << (ITy->getBitWidth()-1))) != 0;
+ // If its signed, extend the sign bits
+ if (isSigned)
+ Val |= ~ITy->getBitMask();
+ return Val;
+}
+
+static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF) {
+ SF.Values[V] = Val;
+}
+
+void Interpreter::initializeExecutionEngine() {
+ TheEE = this;
+}
+
+//===----------------------------------------------------------------------===//
+// Binary Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+#define IMPLEMENT_BINARY_OPERATOR(OP, TY) \
+ case Type::TY##TyID: \
+ Dest.TY##Val = Src1.TY##Val OP Src2.TY##Val; \
+ break
+
+#define IMPLEMENT_INTEGER_BINOP1(OP, TY) \
+ case Type::IntegerTyID: { \
+ Dest.IntVal = Src1.IntVal OP Src2.IntVal; \
+ break; \
+ }
+
+
+static void executeAddInst(GenericValue &Dest, GenericValue Src1,
+ GenericValue Src2, const Type *Ty) {
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_BINOP1(+, Ty);
+ IMPLEMENT_BINARY_OPERATOR(+, Float);
+ IMPLEMENT_BINARY_OPERATOR(+, Double);
+ default:
+ cerr << "Unhandled type for Add instruction: " << *Ty << "\n";
+ abort();
+ }
+}
+
+static void executeSubInst(GenericValue &Dest, GenericValue Src1,
+ GenericValue Src2, const Type *Ty) {
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_BINOP1(-, Ty);
+ IMPLEMENT_BINARY_OPERATOR(-, Float);
+ IMPLEMENT_BINARY_OPERATOR(-, Double);
+ default:
+ cerr << "Unhandled type for Sub instruction: " << *Ty << "\n";
+ abort();
+ }
+}
+
+static void executeMulInst(GenericValue &Dest, GenericValue Src1,
+ GenericValue Src2, const Type *Ty) {
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_BINOP1(*, Ty);
+ IMPLEMENT_BINARY_OPERATOR(*, Float);
+ IMPLEMENT_BINARY_OPERATOR(*, Double);
+ default:
+ cerr << "Unhandled type for Mul instruction: " << *Ty << "\n";
+ abort();
+ }
+}
+
+static void executeFDivInst(GenericValue &Dest, GenericValue Src1,
+ GenericValue Src2, const Type *Ty) {
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_BINARY_OPERATOR(/, Float);
+ IMPLEMENT_BINARY_OPERATOR(/, Double);
+ default:
+ cerr << "Unhandled type for FDiv instruction: " << *Ty << "\n";
+ abort();
+ }
+}
+
+static void executeFRemInst(GenericValue &Dest, GenericValue Src1,
+ GenericValue Src2, const Type *Ty) {
+ switch (Ty->getTypeID()) {
+ case Type::FloatTyID:
+ Dest.FloatVal = fmod(Src1.FloatVal, Src2.FloatVal);
+ break;
+ case Type::DoubleTyID:
+ Dest.DoubleVal = fmod(Src1.DoubleVal, Src2.DoubleVal);
+ break;
+ default:
+ cerr << "Unhandled type for Rem instruction: " << *Ty << "\n";
+ abort();
+ }
+}
+
+#define IMPLEMENT_INTEGER_ICMP(OP, TY) \
+ case Type::IntegerTyID: \
+ Dest.IntVal = APInt(1,Src1.IntVal.OP(Src2.IntVal)); \
+ break;
+
+// Handle pointers specially because they must be compared with only as much
+// width as the host has. We _do not_ want to be comparing 64 bit values when
+// running on a 32-bit target, otherwise the upper 32 bits might mess up
+// comparisons if they contain garbage.
+#define IMPLEMENT_POINTER_ICMP(OP) \
+ case Type::PointerTyID: \
+ Dest.IntVal = APInt(1,(void*)(intptr_t)Src1.PointerVal OP \
+ (void*)(intptr_t)Src2.PointerVal); \
+ break;
+
+static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(eq,Ty);
+ IMPLEMENT_POINTER_ICMP(==);
+ default:
+ cerr << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n";
+ abort();
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(ne,Ty);
+ IMPLEMENT_POINTER_ICMP(!=);
+ default:
+ cerr << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n";
+ abort();
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(ult,Ty);
+ IMPLEMENT_POINTER_ICMP(<);
+ default:
+ cerr << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n";
+ abort();
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(slt,Ty);
+ IMPLEMENT_POINTER_ICMP(<);
+ default:
+ cerr << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n";
+ abort();
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(ugt,Ty);
+ IMPLEMENT_POINTER_ICMP(>);
+ default:
+ cerr << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n";
+ abort();
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(sgt,Ty);
+ IMPLEMENT_POINTER_ICMP(>);
+ default:
+ cerr << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n";
+ abort();
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(ule,Ty);
+ IMPLEMENT_POINTER_ICMP(<=);
+ default:
+ cerr << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n";
+ abort();
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(sle,Ty);
+ IMPLEMENT_POINTER_ICMP(<=);
+ default:
+ cerr << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n";
+ abort();
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(uge,Ty);
+ IMPLEMENT_POINTER_ICMP(>=);
+ default:
+ cerr << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n";
+ abort();
+ }
+ return Dest;
+}
+
+static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_INTEGER_ICMP(sge,Ty);
+ IMPLEMENT_POINTER_ICMP(>=);
+ default:
+ cerr << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n";
+ abort();
+ }
+ return Dest;
+}
+
+void Interpreter::visitICmpInst(ICmpInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ const Type *Ty = I.getOperand(0)->getType();
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue R; // Result
+
+ switch (I.getPredicate()) {
+ case ICmpInst::ICMP_EQ: R = executeICMP_EQ(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_NE: R = executeICMP_NE(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_ULT: R = executeICMP_ULT(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_SLT: R = executeICMP_SLT(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_UGT: R = executeICMP_UGT(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_SGT: R = executeICMP_SGT(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_ULE: R = executeICMP_ULE(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_SLE: R = executeICMP_SLE(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_UGE: R = executeICMP_UGE(Src1, Src2, Ty); break;
+ case ICmpInst::ICMP_SGE: R = executeICMP_SGE(Src1, Src2, Ty); break;
+ default:
+ cerr << "Don't know how to handle this ICmp predicate!\n-->" << I;
+ abort();
+ }
+
+ SetValue(&I, R, SF);
+}
+
+#define IMPLEMENT_FCMP(OP, TY) \
+ case Type::TY##TyID: \
+ Dest.IntVal = APInt(1,Src1.TY##Val OP Src2.TY##Val); \
+ break
+
+static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_FCMP(==, Float);
+ IMPLEMENT_FCMP(==, Double);
+ default:
+ cerr << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n";
+ abort();
+ }
+ return Dest;
+}
+
+static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_FCMP(!=, Float);
+ IMPLEMENT_FCMP(!=, Double);
+
+ default:
+ cerr << "Unhandled type for FCmp NE instruction: " << *Ty << "\n";
+ abort();
+ }
+ return Dest;
+}
+
+static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_FCMP(<=, Float);
+ IMPLEMENT_FCMP(<=, Double);
+ default:
+ cerr << "Unhandled type for FCmp LE instruction: " << *Ty << "\n";
+ abort();
+ }
+ return Dest;
+}
+
+static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_FCMP(>=, Float);
+ IMPLEMENT_FCMP(>=, Double);
+ default:
+ cerr << "Unhandled type for FCmp GE instruction: " << *Ty << "\n";
+ abort();
+ }
+ return Dest;
+}
+
+static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_FCMP(<, Float);
+ IMPLEMENT_FCMP(<, Double);
+ default:
+ cerr << "Unhandled type for FCmp LT instruction: " << *Ty << "\n";
+ abort();
+ }
+ return Dest;
+}
+
+static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ switch (Ty->getTypeID()) {
+ IMPLEMENT_FCMP(>, Float);
+ IMPLEMENT_FCMP(>, Double);
+ default:
+ cerr << "Unhandled type for FCmp GT instruction: " << *Ty << "\n";
+ abort();
+ }
+ return Dest;
+}
+
+#define IMPLEMENT_UNORDERED(TY, X,Y) \
+ if (TY == Type::FloatTy) { \
+ if (X.FloatVal != X.FloatVal || Y.FloatVal != Y.FloatVal) { \
+ Dest.IntVal = APInt(1,true); \
+ return Dest; \
+ } \
+ } else if (X.DoubleVal != X.DoubleVal || Y.DoubleVal != Y.DoubleVal) { \
+ Dest.IntVal = APInt(1,true); \
+ return Dest; \
+ }
+
+
+static GenericValue executeFCMP_UEQ(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ return executeFCMP_OEQ(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_UNE(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ return executeFCMP_ONE(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_ULE(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ return executeFCMP_OLE(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_UGE(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ return executeFCMP_OGE(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_ULT(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ return executeFCMP_OLT(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_UGT(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ return executeFCMP_OGT(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ if (Ty == Type::FloatTy)
+ Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal &&
+ Src2.FloatVal == Src2.FloatVal));
+ else
+ Dest.IntVal = APInt(1,(Src1.DoubleVal == Src1.DoubleVal &&
+ Src2.DoubleVal == Src2.DoubleVal));
+ return Dest;
+}
+
+static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2,
+ const Type *Ty) {
+ GenericValue Dest;
+ if (Ty == Type::FloatTy)
+ Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal ||
+ Src2.FloatVal != Src2.FloatVal));
+ else
+ Dest.IntVal = APInt(1,(Src1.DoubleVal != Src1.DoubleVal ||
+ Src2.DoubleVal != Src2.DoubleVal));
+ return Dest;
+}
+
+void Interpreter::visitFCmpInst(FCmpInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ const Type *Ty = I.getOperand(0)->getType();
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue R; // Result
+
+ switch (I.getPredicate()) {
+ case FCmpInst::FCMP_FALSE: R.IntVal = APInt(1,false); break;
+ case FCmpInst::FCMP_TRUE: R.IntVal = APInt(1,true); break;
+ case FCmpInst::FCMP_ORD: R = executeFCMP_ORD(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_UNO: R = executeFCMP_UNO(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_UEQ: R = executeFCMP_UEQ(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_OEQ: R = executeFCMP_OEQ(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_UNE: R = executeFCMP_UNE(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_ONE: R = executeFCMP_ONE(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_ULT: R = executeFCMP_ULT(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_OLT: R = executeFCMP_OLT(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_UGT: R = executeFCMP_UGT(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_OGT: R = executeFCMP_OGT(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_ULE: R = executeFCMP_ULE(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_OLE: R = executeFCMP_OLE(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_UGE: R = executeFCMP_UGE(Src1, Src2, Ty); break;
+ case FCmpInst::FCMP_OGE: R = executeFCMP_OGE(Src1, Src2, Ty); break;
+ default:
+ cerr << "Don't know how to handle this FCmp predicate!\n-->" << I;
+ abort();
+ }
+
+ SetValue(&I, R, SF);
+}
+
+static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1,
+ GenericValue Src2, const Type *Ty) {
+ GenericValue Result;
+ switch (predicate) {
+ case ICmpInst::ICMP_EQ: return executeICMP_EQ(Src1, Src2, Ty);
+ case ICmpInst::ICMP_NE: return executeICMP_NE(Src1, Src2, Ty);
+ case ICmpInst::ICMP_UGT: return executeICMP_UGT(Src1, Src2, Ty);
+ case ICmpInst::ICMP_SGT: return executeICMP_SGT(Src1, Src2, Ty);
+ case ICmpInst::ICMP_ULT: return executeICMP_ULT(Src1, Src2, Ty);
+ case ICmpInst::ICMP_SLT: return executeICMP_SLT(Src1, Src2, Ty);
+ case ICmpInst::ICMP_UGE: return executeICMP_UGE(Src1, Src2, Ty);
+ case ICmpInst::ICMP_SGE: return executeICMP_SGE(Src1, Src2, Ty);
+ case ICmpInst::ICMP_ULE: return executeICMP_ULE(Src1, Src2, Ty);
+ case ICmpInst::ICMP_SLE: return executeICMP_SLE(Src1, Src2, Ty);
+ case FCmpInst::FCMP_ORD: return executeFCMP_ORD(Src1, Src2, Ty);
+ case FCmpInst::FCMP_UNO: return executeFCMP_UNO(Src1, Src2, Ty);
+ case FCmpInst::FCMP_OEQ: return executeFCMP_OEQ(Src1, Src2, Ty);
+ case FCmpInst::FCMP_UEQ: return executeFCMP_UEQ(Src1, Src2, Ty);
+ case FCmpInst::FCMP_ONE: return executeFCMP_ONE(Src1, Src2, Ty);
+ case FCmpInst::FCMP_UNE: return executeFCMP_UNE(Src1, Src2, Ty);
+ case FCmpInst::FCMP_OLT: return executeFCMP_OLT(Src1, Src2, Ty);
+ case FCmpInst::FCMP_ULT: return executeFCMP_ULT(Src1, Src2, Ty);
+ case FCmpInst::FCMP_OGT: return executeFCMP_OGT(Src1, Src2, Ty);
+ case FCmpInst::FCMP_UGT: return executeFCMP_UGT(Src1, Src2, Ty);
+ case FCmpInst::FCMP_OLE: return executeFCMP_OLE(Src1, Src2, Ty);
+ case FCmpInst::FCMP_ULE: return executeFCMP_ULE(Src1, Src2, Ty);
+ case FCmpInst::FCMP_OGE: return executeFCMP_OGE(Src1, Src2, Ty);
+ case FCmpInst::FCMP_UGE: return executeFCMP_UGE(Src1, Src2, Ty);
+ case FCmpInst::FCMP_FALSE: {
+ GenericValue Result;
+ Result.IntVal = APInt(1, false);
+ return Result;
+ }
+ case FCmpInst::FCMP_TRUE: {
+ GenericValue Result;
+ Result.IntVal = APInt(1, true);
+ return Result;
+ }
+ default:
+ cerr << "Unhandled Cmp predicate\n";
+ abort();
+ }
+}
+
+void Interpreter::visitBinaryOperator(BinaryOperator &I) {
+ ExecutionContext &SF = ECStack.back();
+ const Type *Ty = I.getOperand(0)->getType();
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue R; // Result
+
+ switch (I.getOpcode()) {
+ case Instruction::Add: executeAddInst (R, Src1, Src2, Ty); break;
+ case Instruction::Sub: executeSubInst (R, Src1, Src2, Ty); break;
+ case Instruction::Mul: executeMulInst (R, Src1, Src2, Ty); break;
+ case Instruction::FDiv: executeFDivInst (R, Src1, Src2, Ty); break;
+ case Instruction::FRem: executeFRemInst (R, Src1, Src2, Ty); break;
+ case Instruction::UDiv: R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break;
+ case Instruction::SDiv: R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break;
+ case Instruction::URem: R.IntVal = Src1.IntVal.urem(Src2.IntVal); break;
+ case Instruction::SRem: R.IntVal = Src1.IntVal.srem(Src2.IntVal); break;
+ case Instruction::And: R.IntVal = Src1.IntVal & Src2.IntVal; break;
+ case Instruction::Or: R.IntVal = Src1.IntVal | Src2.IntVal; break;
+ case Instruction::Xor: R.IntVal = Src1.IntVal ^ Src2.IntVal; break;
+ default:
+ cerr << "Don't know how to handle this binary operator!\n-->" << I;
+ abort();
+ }
+
+ SetValue(&I, R, SF);
+}
+
+static GenericValue executeSelectInst(GenericValue Src1, GenericValue Src2,
+ GenericValue Src3) {
+ return Src1.IntVal == 0 ? Src3 : Src2;
+}
+
+void Interpreter::visitSelectInst(SelectInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue Src3 = getOperandValue(I.getOperand(2), SF);
+ GenericValue R = executeSelectInst(Src1, Src2, Src3);
+ SetValue(&I, R, SF);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Terminator Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+void Interpreter::exitCalled(GenericValue GV) {
+ // runAtExitHandlers() assumes there are no stack frames, but
+ // if exit() was called, then it had a stack frame. Blow away
+ // the stack before interpreting atexit handlers.
+ ECStack.clear ();
+ runAtExitHandlers ();
+ exit (GV.IntVal.zextOrTrunc(32).getZExtValue());
+}
+
+/// Pop the last stack frame off of ECStack and then copy the result
+/// back into the result variable if we are not returning void. The
+/// result variable may be the ExitValue, or the Value of the calling
+/// CallInst if there was a previous stack frame. This method may
+/// invalidate any ECStack iterators you have. This method also takes
+/// care of switching to the normal destination BB, if we are returning
+/// from an invoke.
+///
+void Interpreter::popStackAndReturnValueToCaller (const Type *RetTy,
+ GenericValue Result) {
+ // Pop the current stack frame.
+ ECStack.pop_back();
+
+ if (ECStack.empty()) { // Finished main. Put result into exit code...
+ if (RetTy && RetTy->isInteger()) { // Nonvoid return type?
+ ExitValue = Result; // Capture the exit value of the program
+ } else {
+ memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped));
+ }
+ } else {
+ // If we have a previous stack frame, and we have a previous call,
+ // fill in the return value...
+ ExecutionContext &CallingSF = ECStack.back();
+ if (Instruction *I = CallingSF.Caller.getInstruction()) {
+ if (CallingSF.Caller.getType() != Type::VoidTy) // Save result...
+ SetValue(I, Result, CallingSF);
+ if (InvokeInst *II = dyn_cast<InvokeInst> (I))
+ SwitchToNewBasicBlock (II->getNormalDest (), CallingSF);
+ CallingSF.Caller = CallSite(); // We returned from the call...
+ }
+ }
+}
+
+void Interpreter::visitReturnInst(ReturnInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ const Type *RetTy = Type::VoidTy;
+ GenericValue Result;
+
+ // Save away the return value... (if we are not 'ret void')
+ if (I.getNumOperands()) {
+ RetTy = I.getReturnValue()->getType();
+ Result = getOperandValue(I.getReturnValue(), SF);
+ }
+
+ popStackAndReturnValueToCaller(RetTy, Result);
+}
+
+void Interpreter::visitUnwindInst(UnwindInst &I) {
+ // Unwind stack
+ Instruction *Inst;
+ do {
+ ECStack.pop_back ();
+ if (ECStack.empty ())
+ abort ();
+ Inst = ECStack.back ().Caller.getInstruction ();
+ } while (!(Inst && isa<InvokeInst> (Inst)));
+
+ // Return from invoke
+ ExecutionContext &InvokingSF = ECStack.back ();
+ InvokingSF.Caller = CallSite ();
+
+ // Go to exceptional destination BB of invoke instruction
+ SwitchToNewBasicBlock(cast<InvokeInst>(Inst)->getUnwindDest(), InvokingSF);
+}
+
+void Interpreter::visitUnreachableInst(UnreachableInst &I) {
+ cerr << "ERROR: Program executed an 'unreachable' instruction!\n";
+ abort();
+}
+
+void Interpreter::visitBranchInst(BranchInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ BasicBlock *Dest;
+
+ Dest = I.getSuccessor(0); // Uncond branches have a fixed dest...
+ if (!I.isUnconditional()) {
+ Value *Cond = I.getCondition();
+ if (getOperandValue(Cond, SF).IntVal == 0) // If false cond...
+ Dest = I.getSuccessor(1);
+ }
+ SwitchToNewBasicBlock(Dest, SF);
+}
+
+void Interpreter::visitSwitchInst(SwitchInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ GenericValue CondVal = getOperandValue(I.getOperand(0), SF);
+ const Type *ElTy = I.getOperand(0)->getType();
+
+ // Check to see if any of the cases match...
+ BasicBlock *Dest = 0;
+ for (unsigned i = 2, e = I.getNumOperands(); i != e; i += 2)
+ if (executeICMP_EQ(CondVal, getOperandValue(I.getOperand(i), SF), ElTy)
+ .IntVal != 0) {
+ Dest = cast<BasicBlock>(I.getOperand(i+1));
+ break;
+ }
+
+ if (!Dest) Dest = I.getDefaultDest(); // No cases matched: use default
+ SwitchToNewBasicBlock(Dest, SF);
+}
+
+// SwitchToNewBasicBlock - This method is used to jump to a new basic block.
+// This function handles the actual updating of block and instruction iterators
+// as well as execution of all of the PHI nodes in the destination block.
+//
+// This method does this because all of the PHI nodes must be executed
+// atomically, reading their inputs before any of the results are updated. Not
+// doing this can cause problems if the PHI nodes depend on other PHI nodes for
+// their inputs. If the input PHI node is updated before it is read, incorrect
+// results can happen. Thus we use a two phase approach.
+//
+void Interpreter::SwitchToNewBasicBlock(BasicBlock *Dest, ExecutionContext &SF){
+ BasicBlock *PrevBB = SF.CurBB; // Remember where we came from...
+ SF.CurBB = Dest; // Update CurBB to branch destination
+ SF.CurInst = SF.CurBB->begin(); // Update new instruction ptr...
+
+ if (!isa<PHINode>(SF.CurInst)) return; // Nothing fancy to do
+
+ // Loop over all of the PHI nodes in the current block, reading their inputs.
+ std::vector<GenericValue> ResultValues;
+
+ for (; PHINode *PN = dyn_cast<PHINode>(SF.CurInst); ++SF.CurInst) {
+ // Search for the value corresponding to this previous bb...
+ int i = PN->getBasicBlockIndex(PrevBB);
+ assert(i != -1 && "PHINode doesn't contain entry for predecessor??");
+ Value *IncomingValue = PN->getIncomingValue(i);
+
+ // Save the incoming value for this PHI node...
+ ResultValues.push_back(getOperandValue(IncomingValue, SF));
+ }
+
+ // Now loop over all of the PHI nodes setting their values...
+ SF.CurInst = SF.CurBB->begin();
+ for (unsigned i = 0; isa<PHINode>(SF.CurInst); ++SF.CurInst, ++i) {
+ PHINode *PN = cast<PHINode>(SF.CurInst);
+ SetValue(PN, ResultValues[i], SF);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Memory Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+void Interpreter::visitAllocationInst(AllocationInst &I) {
+ ExecutionContext &SF = ECStack.back();
+
+ const Type *Ty = I.getType()->getElementType(); // Type to be allocated
+
+ // Get the number of elements being allocated by the array...
+ unsigned NumElements =
+ getOperandValue(I.getOperand(0), SF).IntVal.getZExtValue();
+
+ unsigned TypeSize = (size_t)TD.getTypeAllocSize(Ty);
+
+ // Avoid malloc-ing zero bytes, use max()...
+ unsigned MemToAlloc = std::max(1U, NumElements * TypeSize);
+
+ // Allocate enough memory to hold the type...
+ void *Memory = malloc(MemToAlloc);
+
+ DOUT << "Allocated Type: " << *Ty << " (" << TypeSize << " bytes) x "
+ << NumElements << " (Total: " << MemToAlloc << ") at "
+ << uintptr_t(Memory) << '\n';
+
+ GenericValue Result = PTOGV(Memory);
+ assert(Result.PointerVal != 0 && "Null pointer returned by malloc!");
+ SetValue(&I, Result, SF);
+
+ if (I.getOpcode() == Instruction::Alloca)
+ ECStack.back().Allocas.add(Memory);
+}
+
+void Interpreter::visitFreeInst(FreeInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ assert(isa<PointerType>(I.getOperand(0)->getType()) && "Freeing nonptr?");
+ GenericValue Value = getOperandValue(I.getOperand(0), SF);
+ // TODO: Check to make sure memory is allocated
+ free(GVTOP(Value)); // Free memory
+}
+
+// getElementOffset - The workhorse for getelementptr.
+//
+GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
+ gep_type_iterator E,
+ ExecutionContext &SF) {
+ assert(isa<PointerType>(Ptr->getType()) &&
+ "Cannot getElementOffset of a nonpointer type!");
+
+ uint64_t Total = 0;
+
+ for (; I != E; ++I) {
+ if (const StructType *STy = dyn_cast<StructType>(*I)) {
+ const StructLayout *SLO = TD.getStructLayout(STy);
+
+ const ConstantInt *CPU = cast<ConstantInt>(I.getOperand());
+ unsigned Index = unsigned(CPU->getZExtValue());
+
+ Total += SLO->getElementOffset(Index);
+ } else {
+ const SequentialType *ST = cast<SequentialType>(*I);
+ // Get the index number for the array... which must be long type...
+ GenericValue IdxGV = getOperandValue(I.getOperand(), SF);
+
+ int64_t Idx;
+ unsigned BitWidth =
+ cast<IntegerType>(I.getOperand()->getType())->getBitWidth();
+ if (BitWidth == 32)
+ Idx = (int64_t)(int32_t)IdxGV.IntVal.getZExtValue();
+ else {
+ assert(BitWidth == 64 && "Invalid index type for getelementptr");
+ Idx = (int64_t)IdxGV.IntVal.getZExtValue();
+ }
+ Total += TD.getTypeAllocSize(ST->getElementType())*Idx;
+ }
+ }
+
+ GenericValue Result;
+ Result.PointerVal = ((char*)getOperandValue(Ptr, SF).PointerVal) + Total;
+ DOUT << "GEP Index " << Total << " bytes.\n";
+ return Result;
+}
+
+void Interpreter::visitGetElementPtrInst(GetElementPtrInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, TheEE->executeGEPOperation(I.getPointerOperand(),
+ gep_type_begin(I), gep_type_end(I), SF), SF);
+}
+
+void Interpreter::visitLoadInst(LoadInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ GenericValue SRC = getOperandValue(I.getPointerOperand(), SF);
+ GenericValue *Ptr = (GenericValue*)GVTOP(SRC);
+ GenericValue Result;
+ LoadValueFromMemory(Result, Ptr, I.getType());
+ SetValue(&I, Result, SF);
+ if (I.isVolatile() && PrintVolatile)
+ cerr << "Volatile load " << I;
+}
+
+void Interpreter::visitStoreInst(StoreInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ GenericValue Val = getOperandValue(I.getOperand(0), SF);
+ GenericValue SRC = getOperandValue(I.getPointerOperand(), SF);
+ StoreValueToMemory(Val, (GenericValue *)GVTOP(SRC),
+ I.getOperand(0)->getType());
+ if (I.isVolatile() && PrintVolatile)
+ cerr << "Volatile store: " << I;
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+void Interpreter::visitCallSite(CallSite CS) {
+ ExecutionContext &SF = ECStack.back();
+
+ // Check to see if this is an intrinsic function call...
+ Function *F = CS.getCalledFunction();
+ if (F && F->isDeclaration ())
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::not_intrinsic:
+ break;
+ case Intrinsic::vastart: { // va_start
+ GenericValue ArgIndex;
+ ArgIndex.UIntPairVal.first = ECStack.size() - 1;
+ ArgIndex.UIntPairVal.second = 0;
+ SetValue(CS.getInstruction(), ArgIndex, SF);
+ return;
+ }
+ case Intrinsic::vaend: // va_end is a noop for the interpreter
+ return;
+ case Intrinsic::vacopy: // va_copy: dest = src
+ SetValue(CS.getInstruction(), getOperandValue(*CS.arg_begin(), SF), SF);
+ return;
+ default:
+ // If it is an unknown intrinsic function, use the intrinsic lowering
+ // class to transform it into hopefully tasty LLVM code.
+ //
+ BasicBlock::iterator me(CS.getInstruction());
+ BasicBlock *Parent = CS.getInstruction()->getParent();
+ bool atBegin(Parent->begin() == me);
+ if (!atBegin)
+ --me;
+ IL->LowerIntrinsicCall(cast<CallInst>(CS.getInstruction()));
+
+ // Restore the CurInst pointer to the first instruction newly inserted, if
+ // any.
+ if (atBegin) {
+ SF.CurInst = Parent->begin();
+ } else {
+ SF.CurInst = me;
+ ++SF.CurInst;
+ }
+ return;
+ }
+
+
+ SF.Caller = CS;
+ std::vector<GenericValue> ArgVals;
+ const unsigned NumArgs = SF.Caller.arg_size();
+ ArgVals.reserve(NumArgs);
+ uint16_t pNum = 1;
+ for (CallSite::arg_iterator i = SF.Caller.arg_begin(),
+ e = SF.Caller.arg_end(); i != e; ++i, ++pNum) {
+ Value *V = *i;
+ ArgVals.push_back(getOperandValue(V, SF));
+ // Promote all integral types whose size is < sizeof(i32) into i32.
+ // We do this by zero or sign extending the value as appropriate
+ // according to the parameter attributes
+ const Type *Ty = V->getType();
+ if (Ty->isInteger() && (ArgVals.back().IntVal.getBitWidth() < 32)) {
+ if (CS.paramHasAttr(pNum, Attribute::ZExt))
+ ArgVals.back().IntVal = ArgVals.back().IntVal.zext(32);
+ else if (CS.paramHasAttr(pNum, Attribute::SExt))
+ ArgVals.back().IntVal = ArgVals.back().IntVal.sext(32);
+ }
+ }
+
+ // To handle indirect calls, we must get the pointer value from the argument
+ // and treat it as a function pointer.
+ GenericValue SRC = getOperandValue(SF.Caller.getCalledValue(), SF);
+ callFunction((Function*)GVTOP(SRC), ArgVals);
+}
+
+void Interpreter::visitShl(BinaryOperator &I) {
+ ExecutionContext &SF = ECStack.back();
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue Dest;
+ if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
+ Dest.IntVal = Src1.IntVal.shl(Src2.IntVal.getZExtValue());
+ else
+ Dest.IntVal = Src1.IntVal;
+
+ SetValue(&I, Dest, SF);
+}
+
+void Interpreter::visitLShr(BinaryOperator &I) {
+ ExecutionContext &SF = ECStack.back();
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue Dest;
+ if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
+ Dest.IntVal = Src1.IntVal.lshr(Src2.IntVal.getZExtValue());
+ else
+ Dest.IntVal = Src1.IntVal;
+
+ SetValue(&I, Dest, SF);
+}
+
+void Interpreter::visitAShr(BinaryOperator &I) {
+ ExecutionContext &SF = ECStack.back();
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue Dest;
+ if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
+ Dest.IntVal = Src1.IntVal.ashr(Src2.IntVal.getZExtValue());
+ else
+ Dest.IntVal = Src1.IntVal;
+
+ SetValue(&I, Dest, SF);
+}
+
+GenericValue Interpreter::executeTruncInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ const IntegerType *DITy = cast<IntegerType>(DstTy);
+ unsigned DBitWidth = DITy->getBitWidth();
+ Dest.IntVal = Src.IntVal.trunc(DBitWidth);
+ return Dest;
+}
+
+GenericValue Interpreter::executeSExtInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ const IntegerType *DITy = cast<IntegerType>(DstTy);
+ unsigned DBitWidth = DITy->getBitWidth();
+ Dest.IntVal = Src.IntVal.sext(DBitWidth);
+ return Dest;
+}
+
+GenericValue Interpreter::executeZExtInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ const IntegerType *DITy = cast<IntegerType>(DstTy);
+ unsigned DBitWidth = DITy->getBitWidth();
+ Dest.IntVal = Src.IntVal.zext(DBitWidth);
+ return Dest;
+}
+
+GenericValue Interpreter::executeFPTruncInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(SrcVal->getType() == Type::DoubleTy && DstTy == Type::FloatTy &&
+ "Invalid FPTrunc instruction");
+ Dest.FloatVal = (float) Src.DoubleVal;
+ return Dest;
+}
+
+GenericValue Interpreter::executeFPExtInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(SrcVal->getType() == Type::FloatTy && DstTy == Type::DoubleTy &&
+ "Invalid FPTrunc instruction");
+ Dest.DoubleVal = (double) Src.FloatVal;
+ return Dest;
+}
+
+GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+ const Type *SrcTy = SrcVal->getType();
+ uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(SrcTy->isFloatingPoint() && "Invalid FPToUI instruction");
+
+ if (SrcTy->getTypeID() == Type::FloatTyID)
+ Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
+ else
+ Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth);
+ return Dest;
+}
+
+GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+ const Type *SrcTy = SrcVal->getType();
+ uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(SrcTy->isFloatingPoint() && "Invalid FPToSI instruction");
+
+ if (SrcTy->getTypeID() == Type::FloatTyID)
+ Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
+ else
+ Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth);
+ return Dest;
+}
+
+GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(DstTy->isFloatingPoint() && "Invalid UIToFP instruction");
+
+ if (DstTy->getTypeID() == Type::FloatTyID)
+ Dest.FloatVal = APIntOps::RoundAPIntToFloat(Src.IntVal);
+ else
+ Dest.DoubleVal = APIntOps::RoundAPIntToDouble(Src.IntVal);
+ return Dest;
+}
+
+GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(DstTy->isFloatingPoint() && "Invalid SIToFP instruction");
+
+ if (DstTy->getTypeID() == Type::FloatTyID)
+ Dest.FloatVal = APIntOps::RoundSignedAPIntToFloat(Src.IntVal);
+ else
+ Dest.DoubleVal = APIntOps::RoundSignedAPIntToDouble(Src.IntVal);
+ return Dest;
+
+}
+
+GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+ uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(isa<PointerType>(SrcVal->getType()) && "Invalid PtrToInt instruction");
+
+ Dest.IntVal = APInt(DBitWidth, (intptr_t) Src.PointerVal);
+ return Dest;
+}
+
+GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ assert(isa<PointerType>(DstTy) && "Invalid PtrToInt instruction");
+
+ uint32_t PtrSize = TD.getPointerSizeInBits();
+ if (PtrSize != Src.IntVal.getBitWidth())
+ Src.IntVal = Src.IntVal.zextOrTrunc(PtrSize);
+
+ Dest.PointerVal = PointerTy(intptr_t(Src.IntVal.getZExtValue()));
+ return Dest;
+}
+
+GenericValue Interpreter::executeBitCastInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF) {
+
+ const Type *SrcTy = SrcVal->getType();
+ GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+ if (isa<PointerType>(DstTy)) {
+ assert(isa<PointerType>(SrcTy) && "Invalid BitCast");
+ Dest.PointerVal = Src.PointerVal;
+ } else if (DstTy->isInteger()) {
+ if (SrcTy == Type::FloatTy) {
+ Dest.IntVal.zext(sizeof(Src.FloatVal) * CHAR_BIT);
+ Dest.IntVal.floatToBits(Src.FloatVal);
+ } else if (SrcTy == Type::DoubleTy) {
+ Dest.IntVal.zext(sizeof(Src.DoubleVal) * CHAR_BIT);
+ Dest.IntVal.doubleToBits(Src.DoubleVal);
+ } else if (SrcTy->isInteger()) {
+ Dest.IntVal = Src.IntVal;
+ } else
+ assert(0 && "Invalid BitCast");
+ } else if (DstTy == Type::FloatTy) {
+ if (SrcTy->isInteger())
+ Dest.FloatVal = Src.IntVal.bitsToFloat();
+ else
+ Dest.FloatVal = Src.FloatVal;
+ } else if (DstTy == Type::DoubleTy) {
+ if (SrcTy->isInteger())
+ Dest.DoubleVal = Src.IntVal.bitsToDouble();
+ else
+ Dest.DoubleVal = Src.DoubleVal;
+ } else
+ assert(0 && "Invalid Bitcast");
+
+ return Dest;
+}
+
+void Interpreter::visitTruncInst(TruncInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeTruncInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitSExtInst(SExtInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeSExtInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitZExtInst(ZExtInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeZExtInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitFPTruncInst(FPTruncInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeFPTruncInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitFPExtInst(FPExtInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeFPExtInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitUIToFPInst(UIToFPInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeUIToFPInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitSIToFPInst(SIToFPInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeSIToFPInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitFPToUIInst(FPToUIInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeFPToUIInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitFPToSIInst(FPToSIInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeFPToSIInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitPtrToIntInst(PtrToIntInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executePtrToIntInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitIntToPtrInst(IntToPtrInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeIntToPtrInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitBitCastInst(BitCastInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ SetValue(&I, executeBitCastInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+#define IMPLEMENT_VAARG(TY) \
+ case Type::TY##TyID: Dest.TY##Val = Src.TY##Val; break
+
+void Interpreter::visitVAArgInst(VAArgInst &I) {
+ ExecutionContext &SF = ECStack.back();
+
+ // Get the incoming valist parameter. LLI treats the valist as a
+ // (ec-stack-depth var-arg-index) pair.
+ GenericValue VAList = getOperandValue(I.getOperand(0), SF);
+ GenericValue Dest;
+ GenericValue Src = ECStack[VAList.UIntPairVal.first]
+ .VarArgs[VAList.UIntPairVal.second];
+ const Type *Ty = I.getType();
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID: Dest.IntVal = Src.IntVal;
+ IMPLEMENT_VAARG(Pointer);
+ IMPLEMENT_VAARG(Float);
+ IMPLEMENT_VAARG(Double);
+ default:
+ cerr << "Unhandled dest type for vaarg instruction: " << *Ty << "\n";
+ abort();
+ }
+
+ // Set the Value of this Instruction.
+ SetValue(&I, Dest, SF);
+
+ // Move the pointer to the next vararg.
+ ++VAList.UIntPairVal.second;
+}
+
+GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
+ ExecutionContext &SF) {
+ switch (CE->getOpcode()) {
+ case Instruction::Trunc:
+ return executeTruncInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::ZExt:
+ return executeZExtInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::SExt:
+ return executeSExtInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::FPTrunc:
+ return executeFPTruncInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::FPExt:
+ return executeFPExtInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::UIToFP:
+ return executeUIToFPInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::SIToFP:
+ return executeSIToFPInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::FPToUI:
+ return executeFPToUIInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::FPToSI:
+ return executeFPToSIInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::PtrToInt:
+ return executePtrToIntInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::IntToPtr:
+ return executeIntToPtrInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::BitCast:
+ return executeBitCastInst(CE->getOperand(0), CE->getType(), SF);
+ case Instruction::GetElementPtr:
+ return executeGEPOperation(CE->getOperand(0), gep_type_begin(CE),
+ gep_type_end(CE), SF);
+ case Instruction::FCmp:
+ case Instruction::ICmp:
+ return executeCmpInst(CE->getPredicate(),
+ getOperandValue(CE->getOperand(0), SF),
+ getOperandValue(CE->getOperand(1), SF),
+ CE->getOperand(0)->getType());
+ case Instruction::Select:
+ return executeSelectInst(getOperandValue(CE->getOperand(0), SF),
+ getOperandValue(CE->getOperand(1), SF),
+ getOperandValue(CE->getOperand(2), SF));
+ default :
+ break;
+ }
+
+ // The cases below here require a GenericValue parameter for the result
+ // so we initialize one, compute it and then return it.
+ GenericValue Op0 = getOperandValue(CE->getOperand(0), SF);
+ GenericValue Op1 = getOperandValue(CE->getOperand(1), SF);
+ GenericValue Dest;
+ const Type * Ty = CE->getOperand(0)->getType();
+ switch (CE->getOpcode()) {
+ case Instruction::Add: executeAddInst (Dest, Op0, Op1, Ty); break;
+ case Instruction::Sub: executeSubInst (Dest, Op0, Op1, Ty); break;
+ case Instruction::Mul: executeMulInst (Dest, Op0, Op1, Ty); break;
+ case Instruction::FDiv: executeFDivInst(Dest, Op0, Op1, Ty); break;
+ case Instruction::FRem: executeFRemInst(Dest, Op0, Op1, Ty); break;
+ case Instruction::SDiv: Dest.IntVal = Op0.IntVal.sdiv(Op1.IntVal); break;
+ case Instruction::UDiv: Dest.IntVal = Op0.IntVal.udiv(Op1.IntVal); break;
+ case Instruction::URem: Dest.IntVal = Op0.IntVal.urem(Op1.IntVal); break;
+ case Instruction::SRem: Dest.IntVal = Op0.IntVal.srem(Op1.IntVal); break;
+ case Instruction::And: Dest.IntVal = Op0.IntVal.And(Op1.IntVal); break;
+ case Instruction::Or: Dest.IntVal = Op0.IntVal.Or(Op1.IntVal); break;
+ case Instruction::Xor: Dest.IntVal = Op0.IntVal.Xor(Op1.IntVal); break;
+ case Instruction::Shl:
+ Dest.IntVal = Op0.IntVal.shl(Op1.IntVal.getZExtValue());
+ break;
+ case Instruction::LShr:
+ Dest.IntVal = Op0.IntVal.lshr(Op1.IntVal.getZExtValue());
+ break;
+ case Instruction::AShr:
+ Dest.IntVal = Op0.IntVal.ashr(Op1.IntVal.getZExtValue());
+ break;
+ default:
+ cerr << "Unhandled ConstantExpr: " << *CE << "\n";
+ abort();
+ return GenericValue();
+ }
+ return Dest;
+}
+
+GenericValue Interpreter::getOperandValue(Value *V, ExecutionContext &SF) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ return getConstantExprValue(CE, SF);
+ } else if (Constant *CPV = dyn_cast<Constant>(V)) {
+ return getConstantValue(CPV);
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ return PTOGV(getPointerToGlobal(GV));
+ } else {
+ return SF.Values[V];
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Dispatch and Execution Code
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// callFunction - Execute the specified function...
+//
+void Interpreter::callFunction(Function *F,
+ const std::vector<GenericValue> &ArgVals) {
+ assert((ECStack.empty() || ECStack.back().Caller.getInstruction() == 0 ||
+ ECStack.back().Caller.arg_size() == ArgVals.size()) &&
+ "Incorrect number of arguments passed into function call!");
+ // Make a new stack frame... and fill it in.
+ ECStack.push_back(ExecutionContext());
+ ExecutionContext &StackFrame = ECStack.back();
+ StackFrame.CurFunction = F;
+
+ // Special handling for external functions.
+ if (F->isDeclaration()) {
+ GenericValue Result = callExternalFunction (F, ArgVals);
+ // Simulate a 'ret' instruction of the appropriate type.
+ popStackAndReturnValueToCaller (F->getReturnType (), Result);
+ return;
+ }
+
+ // Get pointers to first LLVM BB & Instruction in function.
+ StackFrame.CurBB = F->begin();
+ StackFrame.CurInst = StackFrame.CurBB->begin();
+
+ // Run through the function arguments and initialize their values...
+ assert((ArgVals.size() == F->arg_size() ||
+ (ArgVals.size() > F->arg_size() && F->getFunctionType()->isVarArg()))&&
+ "Invalid number of values passed to function invocation!");
+
+ // Handle non-varargs arguments...
+ unsigned i = 0;
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI, ++i)
+ SetValue(AI, ArgVals[i], StackFrame);
+
+ // Handle varargs arguments...
+ StackFrame.VarArgs.assign(ArgVals.begin()+i, ArgVals.end());
+}
+
+
+void Interpreter::run() {
+ while (!ECStack.empty()) {
+ // Interpret a single instruction & increment the "PC".
+ ExecutionContext &SF = ECStack.back(); // Current stack frame
+ Instruction &I = *SF.CurInst++; // Increment before execute
+
+ // Track the number of dynamic instructions executed.
+ ++NumDynamicInsts;
+
+ DOUT << "About to interpret: " << I;
+ visit(I); // Dispatch to one of the visit* methods...
+#if 0
+ // This is not safe, as visiting the instruction could lower it and free I.
+#ifndef NDEBUG
+ if (!isa<CallInst>(I) && !isa<InvokeInst>(I) &&
+ I.getType() != Type::VoidTy) {
+ DOUT << " --> ";
+ const GenericValue &Val = SF.Values[&I];
+ switch (I.getType()->getTypeID()) {
+ default: assert(0 && "Invalid GenericValue Type");
+ case Type::VoidTyID: DOUT << "void"; break;
+ case Type::FloatTyID: DOUT << "float " << Val.FloatVal; break;
+ case Type::DoubleTyID: DOUT << "double " << Val.DoubleVal; break;
+ case Type::PointerTyID: DOUT << "void* " << intptr_t(Val.PointerVal);
+ break;
+ case Type::IntegerTyID:
+ DOUT << "i" << Val.IntVal.getBitWidth() << " "
+ << Val.IntVal.toStringUnsigned(10)
+ << " (0x" << Val.IntVal.toStringUnsigned(16) << ")\n";
+ break;
+ }
+ }
+#endif
+#endif
+ }
+}
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
new file mode 100644
index 0000000..160f1ba
--- /dev/null
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -0,0 +1,542 @@
+//===-- ExternalFunctions.cpp - Implement External Functions --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains both code to deal with invoking "external" functions, but
+// also contains code that implements "exported" external functions.
+//
+// There are currently two mechanisms for handling external functions in the
+// Interpreter. The first is to implement lle_* wrapper functions that are
+// specific to well-known library functions which manually translate the
+// arguments from GenericValues and make the call. If such a wrapper does
+// not exist, and libffi is available, then the Interpreter will attempt to
+// invoke the function using libffi, after finding its address.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Interpreter.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Config/config.h" // Detect libffi
+#include "llvm/Support/Streams.h"
+#include "llvm/System/DynamicLibrary.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/ManagedStatic.h"
+#include <csignal>
+#include <cstdio>
+#include <map>
+#include <cmath>
+#include <cstring>
+
+#ifdef HAVE_FFI_CALL
+#ifdef HAVE_FFI_H
+#include <ffi.h>
+#define USE_LIBFFI
+#elif HAVE_FFI_FFI_H
+#include <ffi/ffi.h>
+#define USE_LIBFFI
+#endif
+#endif
+
+using namespace llvm;
+
+typedef GenericValue (*ExFunc)(const FunctionType *,
+ const std::vector<GenericValue> &);
+static ManagedStatic<std::map<const Function *, ExFunc> > ExportedFunctions;
+static std::map<std::string, ExFunc> FuncNames;
+
+#ifdef USE_LIBFFI
+typedef void (*RawFunc)(void);
+static ManagedStatic<std::map<const Function *, RawFunc> > RawFunctions;
+#endif
+
+static Interpreter *TheInterpreter;
+
+static char getTypeID(const Type *Ty) {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return 'V';
+ case Type::IntegerTyID:
+ switch (cast<IntegerType>(Ty)->getBitWidth()) {
+ case 1: return 'o';
+ case 8: return 'B';
+ case 16: return 'S';
+ case 32: return 'I';
+ case 64: return 'L';
+ default: return 'N';
+ }
+ case Type::FloatTyID: return 'F';
+ case Type::DoubleTyID: return 'D';
+ case Type::PointerTyID: return 'P';
+ case Type::FunctionTyID:return 'M';
+ case Type::StructTyID: return 'T';
+ case Type::ArrayTyID: return 'A';
+ case Type::OpaqueTyID: return 'O';
+ default: return 'U';
+ }
+}
+
+// Try to find address of external function given a Function object.
+// Please note, that interpreter doesn't know how to assemble a
+// real call in general case (this is JIT job), that's why it assumes,
+// that all external functions has the same (and pretty "general") signature.
+// The typical example of such functions are "lle_X_" ones.
+static ExFunc lookupFunction(const Function *F) {
+ // Function not found, look it up... start by figuring out what the
+ // composite function name should be.
+ std::string ExtName = "lle_";
+ const FunctionType *FT = F->getFunctionType();
+ for (unsigned i = 0, e = FT->getNumContainedTypes(); i != e; ++i)
+ ExtName += getTypeID(FT->getContainedType(i));
+ ExtName += "_" + F->getName();
+
+ ExFunc FnPtr = FuncNames[ExtName];
+ if (FnPtr == 0)
+ FnPtr = FuncNames["lle_X_"+F->getName()];
+ if (FnPtr == 0) // Try calling a generic function... if it exists...
+ FnPtr = (ExFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol(
+ ("lle_X_"+F->getName()).c_str());
+ if (FnPtr != 0)
+ ExportedFunctions->insert(std::make_pair(F, FnPtr)); // Cache for later
+ return FnPtr;
+}
+
+#ifdef USE_LIBFFI
+static ffi_type *ffiTypeFor(const Type *Ty) {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return &ffi_type_void;
+ case Type::IntegerTyID:
+ switch (cast<IntegerType>(Ty)->getBitWidth()) {
+ case 8: return &ffi_type_sint8;
+ case 16: return &ffi_type_sint16;
+ case 32: return &ffi_type_sint32;
+ case 64: return &ffi_type_sint64;
+ }
+ case Type::FloatTyID: return &ffi_type_float;
+ case Type::DoubleTyID: return &ffi_type_double;
+ case Type::PointerTyID: return &ffi_type_pointer;
+ default: break;
+ }
+ // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc.
+ cerr << "Type could not be mapped for use with libffi.\n";
+ abort();
+ return NULL;
+}
+
+static void *ffiValueFor(const Type *Ty, const GenericValue &AV,
+ void *ArgDataPtr) {
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID:
+ switch (cast<IntegerType>(Ty)->getBitWidth()) {
+ case 8: {
+ int8_t *I8Ptr = (int8_t *) ArgDataPtr;
+ *I8Ptr = (int8_t) AV.IntVal.getZExtValue();
+ return ArgDataPtr;
+ }
+ case 16: {
+ int16_t *I16Ptr = (int16_t *) ArgDataPtr;
+ *I16Ptr = (int16_t) AV.IntVal.getZExtValue();
+ return ArgDataPtr;
+ }
+ case 32: {
+ int32_t *I32Ptr = (int32_t *) ArgDataPtr;
+ *I32Ptr = (int32_t) AV.IntVal.getZExtValue();
+ return ArgDataPtr;
+ }
+ case 64: {
+ int64_t *I64Ptr = (int64_t *) ArgDataPtr;
+ *I64Ptr = (int64_t) AV.IntVal.getZExtValue();
+ return ArgDataPtr;
+ }
+ }
+ case Type::FloatTyID: {
+ float *FloatPtr = (float *) ArgDataPtr;
+ *FloatPtr = AV.DoubleVal;
+ return ArgDataPtr;
+ }
+ case Type::DoubleTyID: {
+ double *DoublePtr = (double *) ArgDataPtr;
+ *DoublePtr = AV.DoubleVal;
+ return ArgDataPtr;
+ }
+ case Type::PointerTyID: {
+ void **PtrPtr = (void **) ArgDataPtr;
+ *PtrPtr = GVTOP(AV);
+ return ArgDataPtr;
+ }
+ default: break;
+ }
+ // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc.
+ cerr << "Type value could not be mapped for use with libffi.\n";
+ abort();
+ return NULL;
+}
+
+static bool ffiInvoke(RawFunc Fn, Function *F,
+ const std::vector<GenericValue> &ArgVals,
+ const TargetData *TD, GenericValue &Result) {
+ ffi_cif cif;
+ const FunctionType *FTy = F->getFunctionType();
+ const unsigned NumArgs = F->arg_size();
+
+ // TODO: We don't have type information about the remaining arguments, because
+ // this information is never passed into ExecutionEngine::runFunction().
+ if (ArgVals.size() > NumArgs && F->isVarArg()) {
+ cerr << "Calling external var arg function '" << F->getName()
+ << "' is not supported by the Interpreter.\n";
+ abort();
+ }
+
+ unsigned ArgBytes = 0;
+
+ std::vector<ffi_type*> args(NumArgs);
+ for (Function::const_arg_iterator A = F->arg_begin(), E = F->arg_end();
+ A != E; ++A) {
+ const unsigned ArgNo = A->getArgNo();
+ const Type *ArgTy = FTy->getParamType(ArgNo);
+ args[ArgNo] = ffiTypeFor(ArgTy);
+ ArgBytes += TD->getTypeStoreSize(ArgTy);
+ }
+
+ uint8_t *ArgData = (uint8_t*) alloca(ArgBytes);
+ uint8_t *ArgDataPtr = ArgData;
+ std::vector<void*> values(NumArgs);
+ for (Function::const_arg_iterator A = F->arg_begin(), E = F->arg_end();
+ A != E; ++A) {
+ const unsigned ArgNo = A->getArgNo();
+ const Type *ArgTy = FTy->getParamType(ArgNo);
+ values[ArgNo] = ffiValueFor(ArgTy, ArgVals[ArgNo], ArgDataPtr);
+ ArgDataPtr += TD->getTypeStoreSize(ArgTy);
+ }
+
+ const Type *RetTy = FTy->getReturnType();
+ ffi_type *rtype = ffiTypeFor(RetTy);
+
+ if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, NumArgs, rtype, &args[0]) == FFI_OK) {
+ void *ret = NULL;
+ if (RetTy->getTypeID() != Type::VoidTyID)
+ ret = alloca(TD->getTypeStoreSize(RetTy));
+ ffi_call(&cif, Fn, ret, &values[0]);
+ switch (RetTy->getTypeID()) {
+ case Type::IntegerTyID:
+ switch (cast<IntegerType>(RetTy)->getBitWidth()) {
+ case 8: Result.IntVal = APInt(8 , *(int8_t *) ret); break;
+ case 16: Result.IntVal = APInt(16, *(int16_t*) ret); break;
+ case 32: Result.IntVal = APInt(32, *(int32_t*) ret); break;
+ case 64: Result.IntVal = APInt(64, *(int64_t*) ret); break;
+ }
+ break;
+ case Type::FloatTyID: Result.FloatVal = *(float *) ret; break;
+ case Type::DoubleTyID: Result.DoubleVal = *(double*) ret; break;
+ case Type::PointerTyID: Result.PointerVal = *(void **) ret; break;
+ default: break;
+ }
+ return true;
+ }
+
+ return false;
+}
+#endif // USE_LIBFFI
+
+GenericValue Interpreter::callExternalFunction(Function *F,
+ const std::vector<GenericValue> &ArgVals) {
+ TheInterpreter = this;
+
+ // Do a lookup to see if the function is in our cache... this should just be a
+ // deferred annotation!
+ std::map<const Function *, ExFunc>::iterator FI = ExportedFunctions->find(F);
+ if (ExFunc Fn = (FI == ExportedFunctions->end()) ? lookupFunction(F)
+ : FI->second)
+ return Fn(F->getFunctionType(), ArgVals);
+
+#ifdef USE_LIBFFI
+ std::map<const Function *, RawFunc>::iterator RF = RawFunctions->find(F);
+ RawFunc RawFn;
+ if (RF == RawFunctions->end()) {
+ RawFn = (RawFunc)(intptr_t)
+ sys::DynamicLibrary::SearchForAddressOfSymbol(F->getName());
+ if (RawFn != 0)
+ RawFunctions->insert(std::make_pair(F, RawFn)); // Cache for later
+ } else {
+ RawFn = RF->second;
+ }
+
+ GenericValue Result;
+ if (RawFn != 0 && ffiInvoke(RawFn, F, ArgVals, getTargetData(), Result))
+ return Result;
+#endif // USE_LIBFFI
+
+ cerr << "Tried to execute an unknown external function: "
+ << F->getType()->getDescription() << " " << F->getName() << "\n";
+ if (F->getName() != "__main")
+ abort();
+ return GenericValue();
+}
+
+
+//===----------------------------------------------------------------------===//
+// Functions "exported" to the running application...
+//
+extern "C" { // Don't add C++ manglings to llvm mangling :)
+
+// void atexit(Function*)
+GenericValue lle_X_atexit(const FunctionType *FT,
+ const std::vector<GenericValue> &Args) {
+ assert(Args.size() == 1);
+ TheInterpreter->addAtExitHandler((Function*)GVTOP(Args[0]));
+ GenericValue GV;
+ GV.IntVal = 0;
+ return GV;
+}
+
+// void exit(int)
+GenericValue lle_X_exit(const FunctionType *FT,
+ const std::vector<GenericValue> &Args) {
+ TheInterpreter->exitCalled(Args[0]);
+ return GenericValue();
+}
+
+// void abort(void)
+GenericValue lle_X_abort(const FunctionType *FT,
+ const std::vector<GenericValue> &Args) {
+ raise (SIGABRT);
+ return GenericValue();
+}
+
+// int sprintf(char *, const char *, ...) - a very rough implementation to make
+// output useful.
+GenericValue lle_X_sprintf(const FunctionType *FT,
+ const std::vector<GenericValue> &Args) {
+ char *OutputBuffer = (char *)GVTOP(Args[0]);
+ const char *FmtStr = (const char *)GVTOP(Args[1]);
+ unsigned ArgNo = 2;
+
+ // printf should return # chars printed. This is completely incorrect, but
+ // close enough for now.
+ GenericValue GV;
+ GV.IntVal = APInt(32, strlen(FmtStr));
+ while (1) {
+ switch (*FmtStr) {
+ case 0: return GV; // Null terminator...
+ default: // Normal nonspecial character
+ sprintf(OutputBuffer++, "%c", *FmtStr++);
+ break;
+ case '\\': { // Handle escape codes
+ sprintf(OutputBuffer, "%c%c", *FmtStr, *(FmtStr+1));
+ FmtStr += 2; OutputBuffer += 2;
+ break;
+ }
+ case '%': { // Handle format specifiers
+ char FmtBuf[100] = "", Buffer[1000] = "";
+ char *FB = FmtBuf;
+ *FB++ = *FmtStr++;
+ char Last = *FB++ = *FmtStr++;
+ unsigned HowLong = 0;
+ while (Last != 'c' && Last != 'd' && Last != 'i' && Last != 'u' &&
+ Last != 'o' && Last != 'x' && Last != 'X' && Last != 'e' &&
+ Last != 'E' && Last != 'g' && Last != 'G' && Last != 'f' &&
+ Last != 'p' && Last != 's' && Last != '%') {
+ if (Last == 'l' || Last == 'L') HowLong++; // Keep track of l's
+ Last = *FB++ = *FmtStr++;
+ }
+ *FB = 0;
+
+ switch (Last) {
+ case '%':
+ strcpy(Buffer, "%"); break;
+ case 'c':
+ sprintf(Buffer, FmtBuf, uint32_t(Args[ArgNo++].IntVal.getZExtValue()));
+ break;
+ case 'd': case 'i':
+ case 'u': case 'o':
+ case 'x': case 'X':
+ if (HowLong >= 1) {
+ if (HowLong == 1 &&
+ TheInterpreter->getTargetData()->getPointerSizeInBits() == 64 &&
+ sizeof(long) < sizeof(int64_t)) {
+ // Make sure we use %lld with a 64 bit argument because we might be
+ // compiling LLI on a 32 bit compiler.
+ unsigned Size = strlen(FmtBuf);
+ FmtBuf[Size] = FmtBuf[Size-1];
+ FmtBuf[Size+1] = 0;
+ FmtBuf[Size-1] = 'l';
+ }
+ sprintf(Buffer, FmtBuf, Args[ArgNo++].IntVal.getZExtValue());
+ } else
+ sprintf(Buffer, FmtBuf,uint32_t(Args[ArgNo++].IntVal.getZExtValue()));
+ break;
+ case 'e': case 'E': case 'g': case 'G': case 'f':
+ sprintf(Buffer, FmtBuf, Args[ArgNo++].DoubleVal); break;
+ case 'p':
+ sprintf(Buffer, FmtBuf, (void*)GVTOP(Args[ArgNo++])); break;
+ case 's':
+ sprintf(Buffer, FmtBuf, (char*)GVTOP(Args[ArgNo++])); break;
+ default: cerr << "<unknown printf code '" << *FmtStr << "'!>";
+ ArgNo++; break;
+ }
+ strcpy(OutputBuffer, Buffer);
+ OutputBuffer += strlen(Buffer);
+ }
+ break;
+ }
+ }
+ return GV;
+}
+
+// int printf(const char *, ...) - a very rough implementation to make output
+// useful.
+GenericValue lle_X_printf(const FunctionType *FT,
+ const std::vector<GenericValue> &Args) {
+ char Buffer[10000];
+ std::vector<GenericValue> NewArgs;
+ NewArgs.push_back(PTOGV((void*)&Buffer[0]));
+ NewArgs.insert(NewArgs.end(), Args.begin(), Args.end());
+ GenericValue GV = lle_X_sprintf(FT, NewArgs);
+ cout << Buffer;
+ return GV;
+}
+
+static void ByteswapSCANFResults(const char *Fmt, void *Arg0, void *Arg1,
+ void *Arg2, void *Arg3, void *Arg4, void *Arg5,
+ void *Arg6, void *Arg7, void *Arg8) {
+ void *Args[] = { Arg0, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, 0 };
+
+ // Loop over the format string, munging read values as appropriate (performs
+ // byteswaps as necessary).
+ unsigned ArgNo = 0;
+ while (*Fmt) {
+ if (*Fmt++ == '%') {
+ // Read any flag characters that may be present...
+ bool Suppress = false;
+ bool Half = false;
+ bool Long = false;
+ bool LongLong = false; // long long or long double
+
+ while (1) {
+ switch (*Fmt++) {
+ case '*': Suppress = true; break;
+ case 'a': /*Allocate = true;*/ break; // We don't need to track this
+ case 'h': Half = true; break;
+ case 'l': Long = true; break;
+ case 'q':
+ case 'L': LongLong = true; break;
+ default:
+ if (Fmt[-1] > '9' || Fmt[-1] < '0') // Ignore field width specs
+ goto Out;
+ }
+ }
+ Out:
+
+ // Read the conversion character
+ if (!Suppress && Fmt[-1] != '%') { // Nothing to do?
+ unsigned Size = 0;
+ const Type *Ty = 0;
+
+ switch (Fmt[-1]) {
+ case 'i': case 'o': case 'u': case 'x': case 'X': case 'n': case 'p':
+ case 'd':
+ if (Long || LongLong) {
+ Size = 8; Ty = Type::Int64Ty;
+ } else if (Half) {
+ Size = 4; Ty = Type::Int16Ty;
+ } else {
+ Size = 4; Ty = Type::Int32Ty;
+ }
+ break;
+
+ case 'e': case 'g': case 'E':
+ case 'f':
+ if (Long || LongLong) {
+ Size = 8; Ty = Type::DoubleTy;
+ } else {
+ Size = 4; Ty = Type::FloatTy;
+ }
+ break;
+
+ case 's': case 'c': case '[': // No byteswap needed
+ Size = 1;
+ Ty = Type::Int8Ty;
+ break;
+
+ default: break;
+ }
+
+ if (Size) {
+ GenericValue GV;
+ void *Arg = Args[ArgNo++];
+ memcpy(&GV, Arg, Size);
+ TheInterpreter->StoreValueToMemory(GV, (GenericValue*)Arg, Ty);
+ }
+ }
+ }
+ }
+}
+
+// int sscanf(const char *format, ...);
+GenericValue lle_X_sscanf(const FunctionType *FT,
+ const std::vector<GenericValue> &args) {
+ assert(args.size() < 10 && "Only handle up to 10 args to sscanf right now!");
+
+ char *Args[10];
+ for (unsigned i = 0; i < args.size(); ++i)
+ Args[i] = (char*)GVTOP(args[i]);
+
+ GenericValue GV;
+ GV.IntVal = APInt(32, sscanf(Args[0], Args[1], Args[2], Args[3], Args[4],
+ Args[5], Args[6], Args[7], Args[8], Args[9]));
+ ByteswapSCANFResults(Args[1], Args[2], Args[3], Args[4],
+ Args[5], Args[6], Args[7], Args[8], Args[9], 0);
+ return GV;
+}
+
+// int scanf(const char *format, ...);
+GenericValue lle_X_scanf(const FunctionType *FT,
+ const std::vector<GenericValue> &args) {
+ assert(args.size() < 10 && "Only handle up to 10 args to scanf right now!");
+
+ char *Args[10];
+ for (unsigned i = 0; i < args.size(); ++i)
+ Args[i] = (char*)GVTOP(args[i]);
+
+ GenericValue GV;
+ GV.IntVal = APInt(32, scanf( Args[0], Args[1], Args[2], Args[3], Args[4],
+ Args[5], Args[6], Args[7], Args[8], Args[9]));
+ ByteswapSCANFResults(Args[0], Args[1], Args[2], Args[3], Args[4],
+ Args[5], Args[6], Args[7], Args[8], Args[9]);
+ return GV;
+}
+
+// int fprintf(FILE *, const char *, ...) - a very rough implementation to make
+// output useful.
+GenericValue lle_X_fprintf(const FunctionType *FT,
+ const std::vector<GenericValue> &Args) {
+ assert(Args.size() >= 2);
+ char Buffer[10000];
+ std::vector<GenericValue> NewArgs;
+ NewArgs.push_back(PTOGV(Buffer));
+ NewArgs.insert(NewArgs.end(), Args.begin()+1, Args.end());
+ GenericValue GV = lle_X_sprintf(FT, NewArgs);
+
+ fputs(Buffer, (FILE *) GVTOP(Args[0]));
+ return GV;
+}
+
+} // End extern "C"
+
+
+void Interpreter::initializeExternalFunctions() {
+ FuncNames["lle_X_atexit"] = lle_X_atexit;
+ FuncNames["lle_X_exit"] = lle_X_exit;
+ FuncNames["lle_X_abort"] = lle_X_abort;
+
+ FuncNames["lle_X_printf"] = lle_X_printf;
+ FuncNames["lle_X_sprintf"] = lle_X_sprintf;
+ FuncNames["lle_X_sscanf"] = lle_X_sscanf;
+ FuncNames["lle_X_scanf"] = lle_X_scanf;
+ FuncNames["lle_X_fprintf"] = lle_X_fprintf;
+}
+
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
new file mode 100644
index 0000000..ded65d5
--- /dev/null
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
@@ -0,0 +1,104 @@
+//===- Interpreter.cpp - Top-Level LLVM Interpreter Implementation --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the top-level functionality for the LLVM interpreter.
+// This interpreter is designed to be a very simple, portable, inefficient
+// interpreter.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Interpreter.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/ModuleProvider.h"
+#include <cstring>
+using namespace llvm;
+
+namespace {
+
+static struct RegisterInterp {
+ RegisterInterp() { Interpreter::Register(); }
+} InterpRegistrator;
+
+}
+
+namespace llvm {
+ void LinkInInterpreter() {
+ }
+}
+
+/// create - Create a new interpreter object. This can never fail.
+///
+ExecutionEngine *Interpreter::create(ModuleProvider *MP, std::string* ErrStr,
+ CodeGenOpt::Level OptLevel /*unused*/) {
+ // Tell this ModuleProvide to materialize and release the module
+ if (!MP->materializeModule(ErrStr))
+ // We got an error, just return 0
+ return 0;
+
+ return new Interpreter(MP);
+}
+
+//===----------------------------------------------------------------------===//
+// Interpreter ctor - Initialize stuff
+//
+Interpreter::Interpreter(ModuleProvider *M)
+ : ExecutionEngine(M), TD(M->getModule()) {
+
+ memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped));
+ setTargetData(&TD);
+ // Initialize the "backend"
+ initializeExecutionEngine();
+ initializeExternalFunctions();
+ emitGlobals();
+
+ IL = new IntrinsicLowering(TD);
+}
+
+Interpreter::~Interpreter() {
+ delete IL;
+}
+
+void Interpreter::runAtExitHandlers () {
+ while (!AtExitHandlers.empty()) {
+ callFunction(AtExitHandlers.back(), std::vector<GenericValue>());
+ AtExitHandlers.pop_back();
+ run();
+ }
+}
+
+/// run - Start execution with the specified function and arguments.
+///
+GenericValue
+Interpreter::runFunction(Function *F,
+ const std::vector<GenericValue> &ArgValues) {
+ assert (F && "Function *F was null at entry to run()");
+
+ // Try extra hard not to pass extra args to a function that isn't
+ // expecting them. C programmers frequently bend the rules and
+ // declare main() with fewer parameters than it actually gets
+ // passed, and the interpreter barfs if you pass a function more
+ // parameters than it is declared to take. This does not attempt to
+ // take into account gratuitous differences in declared types,
+ // though.
+ std::vector<GenericValue> ActualArgs;
+ const unsigned ArgCount = F->getFunctionType()->getNumParams();
+ for (unsigned i = 0; i < ArgCount; ++i)
+ ActualArgs.push_back(ArgValues[i]);
+
+ // Set up the function call.
+ callFunction(F, ActualArgs);
+
+ // Start executing the function.
+ run();
+
+ return ExitValue;
+}
+
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
new file mode 100644
index 0000000..8a285ec
--- /dev/null
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -0,0 +1,241 @@
+//===-- Interpreter.h ------------------------------------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines the interpreter structure
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLI_INTERPRETER_H
+#define LLI_INTERPRETER_H
+
+#include "llvm/Function.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class IntrinsicLowering;
+struct FunctionInfo;
+template<typename T> class generic_gep_type_iterator;
+class ConstantExpr;
+typedef generic_gep_type_iterator<User::const_op_iterator> gep_type_iterator;
+
+
+// AllocaHolder - Object to track all of the blocks of memory allocated by
+// alloca. When the function returns, this object is popped off the execution
+// stack, which causes the dtor to be run, which frees all the alloca'd memory.
+//
+class AllocaHolder {
+ friend class AllocaHolderHandle;
+ std::vector<void*> Allocations;
+ unsigned RefCnt;
+public:
+ AllocaHolder() : RefCnt(0) {}
+ void add(void *mem) { Allocations.push_back(mem); }
+ ~AllocaHolder() {
+ for (unsigned i = 0; i < Allocations.size(); ++i)
+ free(Allocations[i]);
+ }
+};
+
+// AllocaHolderHandle gives AllocaHolder value semantics so we can stick it into
+// a vector...
+//
+class AllocaHolderHandle {
+ AllocaHolder *H;
+public:
+ AllocaHolderHandle() : H(new AllocaHolder()) { H->RefCnt++; }
+ AllocaHolderHandle(const AllocaHolderHandle &AH) : H(AH.H) { H->RefCnt++; }
+ ~AllocaHolderHandle() { if (--H->RefCnt == 0) delete H; }
+
+ void add(void *mem) { H->add(mem); }
+};
+
+typedef std::vector<GenericValue> ValuePlaneTy;
+
+// ExecutionContext struct - This struct represents one stack frame currently
+// executing.
+//
+struct ExecutionContext {
+ Function *CurFunction;// The currently executing function
+ BasicBlock *CurBB; // The currently executing BB
+ BasicBlock::iterator CurInst; // The next instruction to execute
+ std::map<Value *, GenericValue> Values; // LLVM values used in this invocation
+ std::vector<GenericValue> VarArgs; // Values passed through an ellipsis
+ CallSite Caller; // Holds the call that called subframes.
+ // NULL if main func or debugger invoked fn
+ AllocaHolderHandle Allocas; // Track memory allocated by alloca
+};
+
+// Interpreter - This class represents the entirety of the interpreter.
+//
+class Interpreter : public ExecutionEngine, public InstVisitor<Interpreter> {
+ GenericValue ExitValue; // The return value of the called function
+ TargetData TD;
+ IntrinsicLowering *IL;
+
+ // The runtime stack of executing code. The top of the stack is the current
+ // function record.
+ std::vector<ExecutionContext> ECStack;
+
+ // AtExitHandlers - List of functions to call when the program exits,
+ // registered with the atexit() library function.
+ std::vector<Function*> AtExitHandlers;
+
+public:
+ explicit Interpreter(ModuleProvider *M);
+ ~Interpreter();
+
+ /// runAtExitHandlers - Run any functions registered by the program's calls to
+ /// atexit(3), which we intercept and store in AtExitHandlers.
+ ///
+ void runAtExitHandlers();
+
+ static void Register() {
+ InterpCtor = create;
+ }
+
+ /// create - Create an interpreter ExecutionEngine. This can never fail.
+ ///
+ static ExecutionEngine *create(ModuleProvider *M, std::string *ErrorStr = 0,
+ CodeGenOpt::Level = CodeGenOpt::Default);
+
+ /// run - Start execution with the specified function and arguments.
+ ///
+ virtual GenericValue runFunction(Function *F,
+ const std::vector<GenericValue> &ArgValues);
+
+ /// recompileAndRelinkFunction - For the interpreter, functions are always
+ /// up-to-date.
+ ///
+ virtual void *recompileAndRelinkFunction(Function *F) {
+ return getPointerToFunction(F);
+ }
+
+ /// freeMachineCodeForFunction - The interpreter does not generate any code.
+ ///
+ void freeMachineCodeForFunction(Function *F) { }
+
+ // Methods used to execute code:
+ // Place a call on the stack
+ void callFunction(Function *F, const std::vector<GenericValue> &ArgVals);
+ void run(); // Execute instructions until nothing left to do
+
+ // Opcode Implementations
+ void visitReturnInst(ReturnInst &I);
+ void visitBranchInst(BranchInst &I);
+ void visitSwitchInst(SwitchInst &I);
+
+ void visitBinaryOperator(BinaryOperator &I);
+ void visitICmpInst(ICmpInst &I);
+ void visitFCmpInst(FCmpInst &I);
+ void visitAllocationInst(AllocationInst &I);
+ void visitFreeInst(FreeInst &I);
+ void visitLoadInst(LoadInst &I);
+ void visitStoreInst(StoreInst &I);
+ void visitGetElementPtrInst(GetElementPtrInst &I);
+ void visitPHINode(PHINode &PN) { assert(0 && "PHI nodes already handled!"); }
+ void visitTruncInst(TruncInst &I);
+ void visitZExtInst(ZExtInst &I);
+ void visitSExtInst(SExtInst &I);
+ void visitFPTruncInst(FPTruncInst &I);
+ void visitFPExtInst(FPExtInst &I);
+ void visitUIToFPInst(UIToFPInst &I);
+ void visitSIToFPInst(SIToFPInst &I);
+ void visitFPToUIInst(FPToUIInst &I);
+ void visitFPToSIInst(FPToSIInst &I);
+ void visitPtrToIntInst(PtrToIntInst &I);
+ void visitIntToPtrInst(IntToPtrInst &I);
+ void visitBitCastInst(BitCastInst &I);
+ void visitSelectInst(SelectInst &I);
+
+
+ void visitCallSite(CallSite CS);
+ void visitCallInst(CallInst &I) { visitCallSite (CallSite (&I)); }
+ void visitInvokeInst(InvokeInst &I) { visitCallSite (CallSite (&I)); }
+ void visitUnwindInst(UnwindInst &I);
+ void visitUnreachableInst(UnreachableInst &I);
+
+ void visitShl(BinaryOperator &I);
+ void visitLShr(BinaryOperator &I);
+ void visitAShr(BinaryOperator &I);
+
+ void visitVAArgInst(VAArgInst &I);
+ void visitInstruction(Instruction &I) {
+ cerr << I;
+ assert(0 && "Instruction not interpretable yet!");
+ }
+
+ GenericValue callExternalFunction(Function *F,
+ const std::vector<GenericValue> &ArgVals);
+ void exitCalled(GenericValue GV);
+
+ void addAtExitHandler(Function *F) {
+ AtExitHandlers.push_back(F);
+ }
+
+ GenericValue *getFirstVarArg () {
+ return &(ECStack.back ().VarArgs[0]);
+ }
+
+ //FIXME: private:
+public:
+ GenericValue executeGEPOperation(Value *Ptr, gep_type_iterator I,
+ gep_type_iterator E, ExecutionContext &SF);
+
+private: // Helper functions
+ // SwitchToNewBasicBlock - Start execution in a new basic block and run any
+ // PHI nodes in the top of the block. This is used for intraprocedural
+ // control flow.
+ //
+ void SwitchToNewBasicBlock(BasicBlock *Dest, ExecutionContext &SF);
+
+ void *getPointerToFunction(Function *F) { return (void*)F; }
+
+ void initializeExecutionEngine();
+ void initializeExternalFunctions();
+ GenericValue getConstantExprValue(ConstantExpr *CE, ExecutionContext &SF);
+ GenericValue getOperandValue(Value *V, ExecutionContext &SF);
+ GenericValue executeTruncInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeSExtInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeZExtInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeFPTruncInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeFPExtInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeFPToUIInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeFPToSIInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeUIToFPInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeSIToFPInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executePtrToIntInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeIntToPtrInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeBitCastInst(Value *SrcVal, const Type *DstTy,
+ ExecutionContext &SF);
+ GenericValue executeCastOperation(Instruction::CastOps opcode, Value *SrcVal,
+ const Type *Ty, ExecutionContext &SF);
+ void popStackAndReturnValueToCaller(const Type *RetTy, GenericValue Result);
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/ExecutionEngine/Interpreter/Makefile b/lib/ExecutionEngine/Interpreter/Makefile
new file mode 100644
index 0000000..5f937c3
--- /dev/null
+++ b/lib/ExecutionEngine/Interpreter/Makefile
@@ -0,0 +1,12 @@
+##===- lib/ExecutionEngine/Interpreter/Makefile ------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMInterpreter
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/ExecutionEngine/JIT/CMakeLists.txt b/lib/ExecutionEngine/JIT/CMakeLists.txt
new file mode 100644
index 0000000..d7980d0
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/CMakeLists.txt
@@ -0,0 +1,11 @@
+# TODO: Support other architectures. See Makefile.
+add_definitions(-DENABLE_X86_JIT)
+
+add_partially_linked_object(LLVMJIT
+ Intercept.cpp
+ JIT.cpp
+ JITDwarfEmitter.cpp
+ JITEmitter.cpp
+ JITMemoryManager.cpp
+ TargetSelect.cpp
+ )
diff --git a/lib/ExecutionEngine/JIT/Intercept.cpp b/lib/ExecutionEngine/JIT/Intercept.cpp
new file mode 100644
index 0000000..3dcc462
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/Intercept.cpp
@@ -0,0 +1,148 @@
+//===-- Intercept.cpp - System function interception routines -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// If a function call occurs to an external function, the JIT is designed to use
+// the dynamic loader interface to find a function to call. This is useful for
+// calling system calls and library functions that are not available in LLVM.
+// Some system calls, however, need to be handled specially. For this reason,
+// we intercept some of them here and use our own stubs to handle them.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JIT.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/System/DynamicLibrary.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+
+// AtExitHandlers - List of functions to call when the program exits,
+// registered with the atexit() library function.
+static std::vector<void (*)()> AtExitHandlers;
+
+/// runAtExitHandlers - Run any functions registered by the program's
+/// calls to atexit(3), which we intercept and store in
+/// AtExitHandlers.
+///
+static void runAtExitHandlers() {
+ while (!AtExitHandlers.empty()) {
+ void (*Fn)() = AtExitHandlers.back();
+ AtExitHandlers.pop_back();
+ Fn();
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Function stubs that are invoked instead of certain library calls
+//===----------------------------------------------------------------------===//
+
+// Force the following functions to be linked in to anything that uses the
+// JIT. This is a hack designed to work around the all-too-clever Glibc
+// strategy of making these functions work differently when inlined vs. when
+// not inlined, and hiding their real definitions in a separate archive file
+// that the dynamic linker can't see. For more info, search for
+// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
+#if defined(__linux__)
+#if defined(HAVE_SYS_STAT_H)
+#include <sys/stat.h>
+#endif
+#include <fcntl.h>
+/* stat functions are redirecting to __xstat with a version number. On x86-64
+ * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
+ * available as an exported symbol, so we have to add it explicitly.
+ */
+class StatSymbols {
+public:
+ StatSymbols() {
+ sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
+ sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
+ sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
+ sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
+ sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
+ sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
+ sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
+ sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
+ sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
+ sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
+ sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
+ }
+};
+static StatSymbols initStatSymbols;
+#endif // __linux__
+
+// jit_exit - Used to intercept the "exit" library call.
+static void jit_exit(int Status) {
+ runAtExitHandlers(); // Run atexit handlers...
+ exit(Status);
+}
+
+// jit_atexit - Used to intercept the "atexit" library call.
+static int jit_atexit(void (*Fn)(void)) {
+ AtExitHandlers.push_back(Fn); // Take note of atexit handler...
+ return 0; // Always successful
+}
+
+//===----------------------------------------------------------------------===//
+//
+/// getPointerToNamedFunction - This method returns the address of the specified
+/// function by using the dynamic loader interface. As such it is only useful
+/// for resolving library symbols, not code generated symbols.
+///
+void *JIT::getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure) {
+ if (!isSymbolSearchingDisabled()) {
+ // Check to see if this is one of the functions we want to intercept. Note,
+ // we cast to intptr_t here to silence a -pedantic warning that complains
+ // about casting a function pointer to a normal pointer.
+ if (Name == "exit") return (void*)(intptr_t)&jit_exit;
+ if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
+
+ const char *NameStr = Name.c_str();
+ // If this is an asm specifier, skip the sentinal.
+ if (NameStr[0] == 1) ++NameStr;
+
+ // If it's an external function, look it up in the process image...
+ void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
+ if (Ptr) return Ptr;
+
+ // If it wasn't found and if it starts with an underscore ('_') character,
+ // and has an asm specifier, try again without the underscore.
+ if (Name[0] == 1 && NameStr[0] == '_') {
+ Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
+ if (Ptr) return Ptr;
+ }
+
+ // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These
+ // are references to hidden visibility symbols that dlsym cannot resolve.
+ // If we have one of these, strip off $LDBLStub and try again.
+#if defined(__APPLE__) && defined(__ppc__)
+ if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
+ memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
+ // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
+ // This mirrors logic in libSystemStubs.a.
+ std::string Prefix = std::string(Name.begin(), Name.end()-9);
+ if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
+ return Ptr;
+ if (void *Ptr = getPointerToNamedFunction(Prefix, false))
+ return Ptr;
+ }
+#endif
+ }
+
+ /// If a LazyFunctionCreator is installed, use it to get/create the function.
+ if (LazyFunctionCreator)
+ if (void *RP = LazyFunctionCreator(Name))
+ return RP;
+
+ if (AbortOnFailure) {
+ cerr << "ERROR: Program used external function '" << Name
+ << "' which could not be resolved!\n";
+ abort();
+ }
+ return 0;
+}
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
new file mode 100644
index 0000000..f8ae884
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -0,0 +1,708 @@
+//===-- JIT.cpp - LLVM Just in Time Compiler ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tool implements a just-in-time compiler for LLVM, allowing direct
+// execution of LLVM bitcode in an efficient manner.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JIT.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/ModuleProvider.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/CodeGen/MachineCodeInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/System/DynamicLibrary.h"
+#include "llvm/Config/config.h"
+
+using namespace llvm;
+
+#ifdef __APPLE__
+// Apple gcc defaults to -fuse-cxa-atexit (i.e. calls __cxa_atexit instead
+// of atexit). It passes the address of linker generated symbol __dso_handle
+// to the function.
+// This configuration change happened at version 5330.
+# include <AvailabilityMacros.h>
+# if defined(MAC_OS_X_VERSION_10_4) && \
+ ((MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_4) || \
+ (MAC_OS_X_VERSION_MIN_REQUIRED == MAC_OS_X_VERSION_10_4 && \
+ __APPLE_CC__ >= 5330))
+# ifndef HAVE___DSO_HANDLE
+# define HAVE___DSO_HANDLE 1
+# endif
+# endif
+#endif
+
+#if HAVE___DSO_HANDLE
+extern void *__dso_handle __attribute__ ((__visibility__ ("hidden")));
+#endif
+
+namespace {
+
+static struct RegisterJIT {
+ RegisterJIT() { JIT::Register(); }
+} JITRegistrator;
+
+}
+
+namespace llvm {
+ void LinkInJIT() {
+ }
+}
+
+
+#if defined(__GNUC__) && !defined(__ARM__EABI__)
+
+// libgcc defines the __register_frame function to dynamically register new
+// dwarf frames for exception handling. This functionality is not portable
+// across compilers and is only provided by GCC. We use the __register_frame
+// function here so that code generated by the JIT cooperates with the unwinding
+// runtime of libgcc. When JITting with exception handling enable, LLVM
+// generates dwarf frames and registers it to libgcc with __register_frame.
+//
+// The __register_frame function works with Linux.
+//
+// Unfortunately, this functionality seems to be in libgcc after the unwinding
+// library of libgcc for darwin was written. The code for darwin overwrites the
+// value updated by __register_frame with a value fetched with "keymgr".
+// "keymgr" is an obsolete functionality, which should be rewritten some day.
+// In the meantime, since "keymgr" is on all libgccs shipped with apple-gcc, we
+// need a workaround in LLVM which uses the "keymgr" to dynamically modify the
+// values of an opaque key, used by libgcc to find dwarf tables.
+
+extern "C" void __register_frame(void*);
+
+#if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED <= 1050
+# define USE_KEYMGR 1
+#else
+# define USE_KEYMGR 0
+#endif
+
+#if USE_KEYMGR
+
+namespace {
+
+// LibgccObject - This is the structure defined in libgcc. There is no #include
+// provided for this structure, so we also define it here. libgcc calls it
+// "struct object". The structure is undocumented in libgcc.
+struct LibgccObject {
+ void *unused1;
+ void *unused2;
+ void *unused3;
+
+ /// frame - Pointer to the exception table.
+ void *frame;
+
+ /// encoding - The encoding of the object?
+ union {
+ struct {
+ unsigned long sorted : 1;
+ unsigned long from_array : 1;
+ unsigned long mixed_encoding : 1;
+ unsigned long encoding : 8;
+ unsigned long count : 21;
+ } b;
+ size_t i;
+ } encoding;
+
+ /// fde_end - libgcc defines this field only if some macro is defined. We
+ /// include this field even if it may not there, to make libgcc happy.
+ char *fde_end;
+
+ /// next - At least we know it's a chained list!
+ struct LibgccObject *next;
+};
+
+// "kemgr" stuff. Apparently, all frame tables are stored there.
+extern "C" void _keymgr_set_and_unlock_processwide_ptr(int, void *);
+extern "C" void *_keymgr_get_and_lock_processwide_ptr(int);
+#define KEYMGR_GCC3_DW2_OBJ_LIST 302 /* Dwarf2 object list */
+
+/// LibgccObjectInfo - libgcc defines this struct as km_object_info. It
+/// probably contains all dwarf tables that are loaded.
+struct LibgccObjectInfo {
+
+ /// seenObjects - LibgccObjects already parsed by the unwinding runtime.
+ ///
+ struct LibgccObject* seenObjects;
+
+ /// unseenObjects - LibgccObjects not parsed yet by the unwinding runtime.
+ ///
+ struct LibgccObject* unseenObjects;
+
+ unsigned unused[2];
+};
+
+/// darwin_register_frame - Since __register_frame does not work with darwin's
+/// libgcc,we provide our own function, which "tricks" libgcc by modifying the
+/// "Dwarf2 object list" key.
+void DarwinRegisterFrame(void* FrameBegin) {
+ // Get the key.
+ LibgccObjectInfo* LOI = (struct LibgccObjectInfo*)
+ _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST);
+ assert(LOI && "This should be preallocated by the runtime");
+
+ // Allocate a new LibgccObject to represent this frame. Deallocation of this
+ // object may be impossible: since darwin code in libgcc was written after
+ // the ability to dynamically register frames, things may crash if we
+ // deallocate it.
+ struct LibgccObject* ob = (struct LibgccObject*)
+ malloc(sizeof(struct LibgccObject));
+
+ // Do like libgcc for the values of the field.
+ ob->unused1 = (void *)-1;
+ ob->unused2 = 0;
+ ob->unused3 = 0;
+ ob->frame = FrameBegin;
+ ob->encoding.i = 0;
+ ob->encoding.b.encoding = llvm::dwarf::DW_EH_PE_omit;
+
+ // Put the info on both places, as libgcc uses the first or the the second
+ // field. Note that we rely on having two pointers here. If fde_end was a
+ // char, things would get complicated.
+ ob->fde_end = (char*)LOI->unseenObjects;
+ ob->next = LOI->unseenObjects;
+
+ // Update the key's unseenObjects list.
+ LOI->unseenObjects = ob;
+
+ // Finally update the "key". Apparently, libgcc requires it.
+ _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST,
+ LOI);
+
+}
+
+}
+#endif // __APPLE__
+#endif // __GNUC__
+
+/// createJIT - This is the factory method for creating a JIT for the current
+/// machine, it does not fall back to the interpreter. This takes ownership
+/// of the module provider.
+ExecutionEngine *ExecutionEngine::createJIT(ModuleProvider *MP,
+ std::string *ErrorStr,
+ JITMemoryManager *JMM,
+ CodeGenOpt::Level OptLevel) {
+ ExecutionEngine *EE = JIT::createJIT(MP, ErrorStr, JMM, OptLevel);
+ if (!EE) return 0;
+
+ // Make sure we can resolve symbols in the program as well. The zero arg
+ // to the function tells DynamicLibrary to load the program, not a library.
+ sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr);
+ return EE;
+}
+
+JIT::JIT(ModuleProvider *MP, TargetMachine &tm, TargetJITInfo &tji,
+ JITMemoryManager *JMM, CodeGenOpt::Level OptLevel)
+ : ExecutionEngine(MP), TM(tm), TJI(tji) {
+ setTargetData(TM.getTargetData());
+
+ jitstate = new JITState(MP);
+
+ // Initialize JCE
+ JCE = createEmitter(*this, JMM);
+
+ // Add target data
+ MutexGuard locked(lock);
+ FunctionPassManager &PM = jitstate->getPM(locked);
+ PM.add(new TargetData(*TM.getTargetData()));
+
+ // Turn the machine code intermediate representation into bytes in memory that
+ // may be executed.
+ if (TM.addPassesToEmitMachineCode(PM, *JCE, OptLevel)) {
+ cerr << "Target does not support machine code emission!\n";
+ abort();
+ }
+
+ // Register routine for informing unwinding runtime about new EH frames
+#if defined(__GNUC__) && !defined(__ARM_EABI__)
+#if USE_KEYMGR
+ struct LibgccObjectInfo* LOI = (struct LibgccObjectInfo*)
+ _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST);
+
+ // The key is created on demand, and libgcc creates it the first time an
+ // exception occurs. Since we need the key to register frames, we create
+ // it now.
+ if (!LOI)
+ LOI = (LibgccObjectInfo*)calloc(sizeof(struct LibgccObjectInfo), 1);
+ _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST, LOI);
+ InstallExceptionTableRegister(DarwinRegisterFrame);
+#else
+ InstallExceptionTableRegister(__register_frame);
+#endif // __APPLE__
+#endif // __GNUC__
+
+ // Initialize passes.
+ PM.doInitialization();
+}
+
+JIT::~JIT() {
+ delete jitstate;
+ delete JCE;
+ delete &TM;
+}
+
+/// addModuleProvider - Add a new ModuleProvider to the JIT. If we previously
+/// removed the last ModuleProvider, we need re-initialize jitstate with a valid
+/// ModuleProvider.
+void JIT::addModuleProvider(ModuleProvider *MP) {
+ MutexGuard locked(lock);
+
+ if (Modules.empty()) {
+ assert(!jitstate && "jitstate should be NULL if Modules vector is empty!");
+
+ jitstate = new JITState(MP);
+
+ FunctionPassManager &PM = jitstate->getPM(locked);
+ PM.add(new TargetData(*TM.getTargetData()));
+
+ // Turn the machine code intermediate representation into bytes in memory
+ // that may be executed.
+ if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
+ cerr << "Target does not support machine code emission!\n";
+ abort();
+ }
+
+ // Initialize passes.
+ PM.doInitialization();
+ }
+
+ ExecutionEngine::addModuleProvider(MP);
+}
+
+/// removeModuleProvider - If we are removing the last ModuleProvider,
+/// invalidate the jitstate since the PassManager it contains references a
+/// released ModuleProvider.
+Module *JIT::removeModuleProvider(ModuleProvider *MP, std::string *E) {
+ Module *result = ExecutionEngine::removeModuleProvider(MP, E);
+
+ MutexGuard locked(lock);
+
+ if (jitstate->getMP() == MP) {
+ delete jitstate;
+ jitstate = 0;
+ }
+
+ if (!jitstate && !Modules.empty()) {
+ jitstate = new JITState(Modules[0]);
+
+ FunctionPassManager &PM = jitstate->getPM(locked);
+ PM.add(new TargetData(*TM.getTargetData()));
+
+ // Turn the machine code intermediate representation into bytes in memory
+ // that may be executed.
+ if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
+ cerr << "Target does not support machine code emission!\n";
+ abort();
+ }
+
+ // Initialize passes.
+ PM.doInitialization();
+ }
+ return result;
+}
+
+/// deleteModuleProvider - Remove a ModuleProvider from the list of modules,
+/// and deletes the ModuleProvider and owned Module. Avoids materializing
+/// the underlying module.
+void JIT::deleteModuleProvider(ModuleProvider *MP, std::string *E) {
+ ExecutionEngine::deleteModuleProvider(MP, E);
+
+ MutexGuard locked(lock);
+
+ if (jitstate->getMP() == MP) {
+ delete jitstate;
+ jitstate = 0;
+ }
+
+ if (!jitstate && !Modules.empty()) {
+ jitstate = new JITState(Modules[0]);
+
+ FunctionPassManager &PM = jitstate->getPM(locked);
+ PM.add(new TargetData(*TM.getTargetData()));
+
+ // Turn the machine code intermediate representation into bytes in memory
+ // that may be executed.
+ if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
+ cerr << "Target does not support machine code emission!\n";
+ abort();
+ }
+
+ // Initialize passes.
+ PM.doInitialization();
+ }
+}
+
+/// run - Start execution with the specified function and arguments.
+///
+GenericValue JIT::runFunction(Function *F,
+ const std::vector<GenericValue> &ArgValues) {
+ assert(F && "Function *F was null at entry to run()");
+
+ void *FPtr = getPointerToFunction(F);
+ assert(FPtr && "Pointer to fn's code was null after getPointerToFunction");
+ const FunctionType *FTy = F->getFunctionType();
+ const Type *RetTy = FTy->getReturnType();
+
+ assert((FTy->getNumParams() == ArgValues.size() ||
+ (FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) &&
+ "Wrong number of arguments passed into function!");
+ assert(FTy->getNumParams() == ArgValues.size() &&
+ "This doesn't support passing arguments through varargs (yet)!");
+
+ // Handle some common cases first. These cases correspond to common `main'
+ // prototypes.
+ if (RetTy == Type::Int32Ty || RetTy == Type::VoidTy) {
+ switch (ArgValues.size()) {
+ case 3:
+ if (FTy->getParamType(0) == Type::Int32Ty &&
+ isa<PointerType>(FTy->getParamType(1)) &&
+ isa<PointerType>(FTy->getParamType(2))) {
+ int (*PF)(int, char **, const char **) =
+ (int(*)(int, char **, const char **))(intptr_t)FPtr;
+
+ // Call the function.
+ GenericValue rv;
+ rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
+ (char **)GVTOP(ArgValues[1]),
+ (const char **)GVTOP(ArgValues[2])));
+ return rv;
+ }
+ break;
+ case 2:
+ if (FTy->getParamType(0) == Type::Int32Ty &&
+ isa<PointerType>(FTy->getParamType(1))) {
+ int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr;
+
+ // Call the function.
+ GenericValue rv;
+ rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
+ (char **)GVTOP(ArgValues[1])));
+ return rv;
+ }
+ break;
+ case 1:
+ if (FTy->getNumParams() == 1 &&
+ FTy->getParamType(0) == Type::Int32Ty) {
+ GenericValue rv;
+ int (*PF)(int) = (int(*)(int))(intptr_t)FPtr;
+ rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue()));
+ return rv;
+ }
+ break;
+ }
+ }
+
+ // Handle cases where no arguments are passed first.
+ if (ArgValues.empty()) {
+ GenericValue rv;
+ switch (RetTy->getTypeID()) {
+ default: assert(0 && "Unknown return type for function call!");
+ case Type::IntegerTyID: {
+ unsigned BitWidth = cast<IntegerType>(RetTy)->getBitWidth();
+ if (BitWidth == 1)
+ rv.IntVal = APInt(BitWidth, ((bool(*)())(intptr_t)FPtr)());
+ else if (BitWidth <= 8)
+ rv.IntVal = APInt(BitWidth, ((char(*)())(intptr_t)FPtr)());
+ else if (BitWidth <= 16)
+ rv.IntVal = APInt(BitWidth, ((short(*)())(intptr_t)FPtr)());
+ else if (BitWidth <= 32)
+ rv.IntVal = APInt(BitWidth, ((int(*)())(intptr_t)FPtr)());
+ else if (BitWidth <= 64)
+ rv.IntVal = APInt(BitWidth, ((int64_t(*)())(intptr_t)FPtr)());
+ else
+ assert(0 && "Integer types > 64 bits not supported");
+ return rv;
+ }
+ case Type::VoidTyID:
+ rv.IntVal = APInt(32, ((int(*)())(intptr_t)FPtr)());
+ return rv;
+ case Type::FloatTyID:
+ rv.FloatVal = ((float(*)())(intptr_t)FPtr)();
+ return rv;
+ case Type::DoubleTyID:
+ rv.DoubleVal = ((double(*)())(intptr_t)FPtr)();
+ return rv;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ assert(0 && "long double not supported yet");
+ return rv;
+ case Type::PointerTyID:
+ return PTOGV(((void*(*)())(intptr_t)FPtr)());
+ }
+ }
+
+ // Okay, this is not one of our quick and easy cases. Because we don't have a
+ // full FFI, we have to codegen a nullary stub function that just calls the
+ // function we are interested in, passing in constants for all of the
+ // arguments. Make this function and return.
+
+ // First, create the function.
+ FunctionType *STy=FunctionType::get(RetTy, std::vector<const Type*>(), false);
+ Function *Stub = Function::Create(STy, Function::InternalLinkage, "",
+ F->getParent());
+
+ // Insert a basic block.
+ BasicBlock *StubBB = BasicBlock::Create("", Stub);
+
+ // Convert all of the GenericValue arguments over to constants. Note that we
+ // currently don't support varargs.
+ SmallVector<Value*, 8> Args;
+ for (unsigned i = 0, e = ArgValues.size(); i != e; ++i) {
+ Constant *C = 0;
+ const Type *ArgTy = FTy->getParamType(i);
+ const GenericValue &AV = ArgValues[i];
+ switch (ArgTy->getTypeID()) {
+ default: assert(0 && "Unknown argument type for function call!");
+ case Type::IntegerTyID:
+ C = ConstantInt::get(AV.IntVal);
+ break;
+ case Type::FloatTyID:
+ C = ConstantFP::get(APFloat(AV.FloatVal));
+ break;
+ case Type::DoubleTyID:
+ C = ConstantFP::get(APFloat(AV.DoubleVal));
+ break;
+ case Type::PPC_FP128TyID:
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ C = ConstantFP::get(APFloat(AV.IntVal));
+ break;
+ case Type::PointerTyID:
+ void *ArgPtr = GVTOP(AV);
+ if (sizeof(void*) == 4)
+ C = ConstantInt::get(Type::Int32Ty, (int)(intptr_t)ArgPtr);
+ else
+ C = ConstantInt::get(Type::Int64Ty, (intptr_t)ArgPtr);
+ C = ConstantExpr::getIntToPtr(C, ArgTy); // Cast the integer to pointer
+ break;
+ }
+ Args.push_back(C);
+ }
+
+ CallInst *TheCall = CallInst::Create(F, Args.begin(), Args.end(),
+ "", StubBB);
+ TheCall->setCallingConv(F->getCallingConv());
+ TheCall->setTailCall();
+ if (TheCall->getType() != Type::VoidTy)
+ ReturnInst::Create(TheCall, StubBB); // Return result of the call.
+ else
+ ReturnInst::Create(StubBB); // Just return void.
+
+ // Finally, return the value returned by our nullary stub function.
+ return runFunction(Stub, std::vector<GenericValue>());
+}
+
+/// runJITOnFunction - Run the FunctionPassManager full of
+/// just-in-time compilation passes on F, hopefully filling in
+/// GlobalAddress[F] with the address of F's machine code.
+///
+void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) {
+ MutexGuard locked(lock);
+
+ registerMachineCodeInfo(MCI);
+
+ runJITOnFunctionUnlocked(F, locked);
+
+ registerMachineCodeInfo(0);
+}
+
+void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) {
+ static bool isAlreadyCodeGenerating = false;
+ assert(!isAlreadyCodeGenerating && "Error: Recursive compilation detected!");
+
+ // JIT the function
+ isAlreadyCodeGenerating = true;
+ jitstate->getPM(locked).run(*F);
+ isAlreadyCodeGenerating = false;
+
+ // If the function referred to another function that had not yet been
+ // read from bitcode, but we are jitting non-lazily, emit it now.
+ while (!jitstate->getPendingFunctions(locked).empty()) {
+ Function *PF = jitstate->getPendingFunctions(locked).back();
+ jitstate->getPendingFunctions(locked).pop_back();
+
+ // JIT the function
+ isAlreadyCodeGenerating = true;
+ jitstate->getPM(locked).run(*PF);
+ isAlreadyCodeGenerating = false;
+
+ // Now that the function has been jitted, ask the JITEmitter to rewrite
+ // the stub with real address of the function.
+ updateFunctionStub(PF);
+ }
+
+ // If the JIT is configured to emit info so that dlsym can be used to
+ // rewrite stubs to external globals, do so now.
+ if (areDlsymStubsEnabled() && isLazyCompilationDisabled())
+ updateDlsymStubTable();
+}
+
+/// getPointerToFunction - This method is used to get the address of the
+/// specified function, compiling it if neccesary.
+///
+void *JIT::getPointerToFunction(Function *F) {
+
+ if (void *Addr = getPointerToGlobalIfAvailable(F))
+ return Addr; // Check if function already code gen'd
+
+ MutexGuard locked(lock);
+
+ // Make sure we read in the function if it exists in this Module.
+ if (F->hasNotBeenReadFromBitcode()) {
+ // Determine the module provider this function is provided by.
+ Module *M = F->getParent();
+ ModuleProvider *MP = 0;
+ for (unsigned i = 0, e = Modules.size(); i != e; ++i) {
+ if (Modules[i]->getModule() == M) {
+ MP = Modules[i];
+ break;
+ }
+ }
+ assert(MP && "Function isn't in a module we know about!");
+
+ std::string ErrorMsg;
+ if (MP->materializeFunction(F, &ErrorMsg)) {
+ cerr << "Error reading function '" << F->getName()
+ << "' from bitcode file: " << ErrorMsg << "\n";
+ abort();
+ }
+
+ // Now retry to get the address.
+ if (void *Addr = getPointerToGlobalIfAvailable(F))
+ return Addr;
+ }
+
+ if (F->isDeclaration()) {
+ bool AbortOnFailure =
+ !areDlsymStubsEnabled() && !F->hasExternalWeakLinkage();
+ void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure);
+ addGlobalMapping(F, Addr);
+ return Addr;
+ }
+
+ runJITOnFunctionUnlocked(F, locked);
+
+ void *Addr = getPointerToGlobalIfAvailable(F);
+ assert(Addr && "Code generation didn't add function to GlobalAddress table!");
+ return Addr;
+}
+
+/// getOrEmitGlobalVariable - Return the address of the specified global
+/// variable, possibly emitting it to memory if needed. This is used by the
+/// Emitter.
+void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) {
+ MutexGuard locked(lock);
+
+ void *Ptr = getPointerToGlobalIfAvailable(GV);
+ if (Ptr) return Ptr;
+
+ // If the global is external, just remember the address.
+ if (GV->isDeclaration()) {
+#if HAVE___DSO_HANDLE
+ if (GV->getName() == "__dso_handle")
+ return (void*)&__dso_handle;
+#endif
+ Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(GV->getName().c_str());
+ if (Ptr == 0 && !areDlsymStubsEnabled()) {
+ cerr << "Could not resolve external global address: "
+ << GV->getName() << "\n";
+ abort();
+ }
+ addGlobalMapping(GV, Ptr);
+ } else {
+ // GlobalVariable's which are not "constant" will cause trouble in a server
+ // situation. It's returned in the same block of memory as code which may
+ // not be writable.
+ if (isGVCompilationDisabled() && !GV->isConstant()) {
+ cerr << "Compilation of non-internal GlobalValue is disabled!\n";
+ abort();
+ }
+ // If the global hasn't been emitted to memory yet, allocate space and
+ // emit it into memory. It goes in the same array as the generated
+ // code, jump tables, etc.
+ const Type *GlobalType = GV->getType()->getElementType();
+ size_t S = getTargetData()->getTypeAllocSize(GlobalType);
+ size_t A = getTargetData()->getPreferredAlignment(GV);
+ if (GV->isThreadLocal()) {
+ MutexGuard locked(lock);
+ Ptr = TJI.allocateThreadLocalMemory(S);
+ } else if (TJI.allocateSeparateGVMemory()) {
+ if (A <= 8) {
+ Ptr = malloc(S);
+ } else {
+ // Allocate S+A bytes of memory, then use an aligned pointer within that
+ // space.
+ Ptr = malloc(S+A);
+ unsigned MisAligned = ((intptr_t)Ptr & (A-1));
+ Ptr = (char*)Ptr + (MisAligned ? (A-MisAligned) : 0);
+ }
+ } else {
+ Ptr = JCE->allocateSpace(S, A);
+ }
+ addGlobalMapping(GV, Ptr);
+ EmitGlobalVariable(GV);
+ }
+ return Ptr;
+}
+
+/// recompileAndRelinkFunction - This method is used to force a function
+/// which has already been compiled, to be compiled again, possibly
+/// after it has been modified. Then the entry to the old copy is overwritten
+/// with a branch to the new copy. If there was no old copy, this acts
+/// just like JIT::getPointerToFunction().
+///
+void *JIT::recompileAndRelinkFunction(Function *F) {
+ void *OldAddr = getPointerToGlobalIfAvailable(F);
+
+ // If it's not already compiled there is no reason to patch it up.
+ if (OldAddr == 0) { return getPointerToFunction(F); }
+
+ // Delete the old function mapping.
+ addGlobalMapping(F, 0);
+
+ // Recodegen the function
+ runJITOnFunction(F);
+
+ // Update state, forward the old function to the new function.
+ void *Addr = getPointerToGlobalIfAvailable(F);
+ assert(Addr && "Code generation didn't add function to GlobalAddress table!");
+ TJI.replaceMachineCodeForFunction(OldAddr, Addr);
+ return Addr;
+}
+
+/// getMemoryForGV - This method abstracts memory allocation of global
+/// variable so that the JIT can allocate thread local variables depending
+/// on the target.
+///
+char* JIT::getMemoryForGV(const GlobalVariable* GV) {
+ const Type *ElTy = GV->getType()->getElementType();
+ size_t GVSize = (size_t)getTargetData()->getTypeAllocSize(ElTy);
+ if (GV->isThreadLocal()) {
+ MutexGuard locked(lock);
+ return TJI.allocateThreadLocalMemory(GVSize);
+ } else {
+ return new char[GVSize];
+ }
+}
+
+void JIT::addPendingFunction(Function *F) {
+ MutexGuard locked(lock);
+ jitstate->getPendingFunctions(locked).push_back(F);
+}
diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h
new file mode 100644
index 0000000..3ccb2dd
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/JIT.h
@@ -0,0 +1,176 @@
+//===-- JIT.h - Class definition for the JIT --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the top-level JIT data structure.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef JIT_H
+#define JIT_H
+
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/PassManager.h"
+
+namespace llvm {
+
+class Function;
+class TargetMachine;
+class TargetJITInfo;
+class MachineCodeEmitter;
+class MachineCodeInfo;
+
+class JITState {
+private:
+ FunctionPassManager PM; // Passes to compile a function
+ ModuleProvider *MP; // ModuleProvider used to create the PM
+
+ /// PendingFunctions - Functions which have not been code generated yet, but
+ /// were called from a function being code generated.
+ std::vector<Function*> PendingFunctions;
+
+public:
+ explicit JITState(ModuleProvider *MP) : PM(MP), MP(MP) {}
+
+ FunctionPassManager &getPM(const MutexGuard &L) {
+ return PM;
+ }
+
+ ModuleProvider *getMP() const { return MP; }
+ std::vector<Function*> &getPendingFunctions(const MutexGuard &L) {
+ return PendingFunctions;
+ }
+};
+
+
+class JIT : public ExecutionEngine {
+ TargetMachine &TM; // The current target we are compiling to
+ TargetJITInfo &TJI; // The JITInfo for the target we are compiling to
+ JITCodeEmitter *JCE; // JCE object
+
+ JITState *jitstate;
+
+ JIT(ModuleProvider *MP, TargetMachine &tm, TargetJITInfo &tji,
+ JITMemoryManager *JMM, CodeGenOpt::Level OptLevel);
+public:
+ ~JIT();
+
+ static void Register() {
+ JITCtor = create;
+ }
+
+ /// getJITInfo - Return the target JIT information structure.
+ ///
+ TargetJITInfo &getJITInfo() const { return TJI; }
+
+ /// create - Create an return a new JIT compiler if there is one available
+ /// for the current target. Otherwise, return null.
+ ///
+ static ExecutionEngine *create(ModuleProvider *MP, std::string *Err,
+ CodeGenOpt::Level OptLevel =
+ CodeGenOpt::Default) {
+ return createJIT(MP, Err, 0, OptLevel);
+ }
+
+ virtual void addModuleProvider(ModuleProvider *MP);
+
+ /// removeModuleProvider - Remove a ModuleProvider from the list of modules.
+ /// Relases the Module from the ModuleProvider, materializing it in the
+ /// process, and returns the materialized Module.
+ virtual Module *removeModuleProvider(ModuleProvider *MP,
+ std::string *ErrInfo = 0);
+
+ /// deleteModuleProvider - Remove a ModuleProvider from the list of modules,
+ /// and deletes the ModuleProvider and owned Module. Avoids materializing
+ /// the underlying module.
+ virtual void deleteModuleProvider(ModuleProvider *P,std::string *ErrInfo = 0);
+
+ /// runFunction - Start execution with the specified function and arguments.
+ ///
+ virtual GenericValue runFunction(Function *F,
+ const std::vector<GenericValue> &ArgValues);
+
+ /// getPointerToNamedFunction - This method returns the address of the
+ /// specified function by using the dlsym function call. As such it is only
+ /// useful for resolving library symbols, not code generated symbols.
+ ///
+ /// If AbortOnFailure is false and no function with the given name is
+ /// found, this function silently returns a null pointer. Otherwise,
+ /// it prints a message to stderr and aborts.
+ ///
+ void *getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure = true);
+
+ // CompilationCallback - Invoked the first time that a call site is found,
+ // which causes lazy compilation of the target function.
+ //
+ static void CompilationCallback();
+
+ /// getPointerToFunction - This returns the address of the specified function,
+ /// compiling it if necessary.
+ ///
+ void *getPointerToFunction(Function *F);
+
+ /// getOrEmitGlobalVariable - Return the address of the specified global
+ /// variable, possibly emitting it to memory if needed. This is used by the
+ /// Emitter.
+ void *getOrEmitGlobalVariable(const GlobalVariable *GV);
+
+ /// getPointerToFunctionOrStub - If the specified function has been
+ /// code-gen'd, return a pointer to the function. If not, compile it, or use
+ /// a stub to implement lazy compilation if available.
+ ///
+ void *getPointerToFunctionOrStub(Function *F);
+
+ /// recompileAndRelinkFunction - This method is used to force a function
+ /// which has already been compiled, to be compiled again, possibly
+ /// after it has been modified. Then the entry to the old copy is overwritten
+ /// with a branch to the new copy. If there was no old copy, this acts
+ /// just like JIT::getPointerToFunction().
+ ///
+ void *recompileAndRelinkFunction(Function *F);
+
+ /// freeMachineCodeForFunction - deallocate memory used to code-generate this
+ /// Function.
+ ///
+ void freeMachineCodeForFunction(Function *F);
+
+ /// addPendingFunction - while jitting non-lazily, a called but non-codegen'd
+ /// function was encountered. Add it to a pending list to be processed after
+ /// the current function.
+ ///
+ void addPendingFunction(Function *F);
+
+ /// getCodeEmitter - Return the code emitter this JIT is emitting into.
+ JITCodeEmitter *getCodeEmitter() const { return JCE; }
+
+ static ExecutionEngine *createJIT(ModuleProvider *MP, std::string *Err,
+ JITMemoryManager *JMM,
+ CodeGenOpt::Level OptLevel);
+
+
+ // Run the JIT on F and return information about the generated code
+ void runJITOnFunction(Function *F, MachineCodeInfo *MCI = 0);
+
+private:
+ static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM);
+ void registerMachineCodeInfo(MachineCodeInfo *MCI);
+ void runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked);
+ void updateFunctionStub(Function *F);
+ void updateDlsymStubTable();
+
+protected:
+
+ /// getMemoryforGV - Allocate memory for a global variable.
+ virtual char* getMemoryForGV(const GlobalVariable* GV);
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
new file mode 100644
index 0000000..e101ef3
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -0,0 +1,1056 @@
+//===----- JITDwarfEmitter.cpp - Write dwarf tables into memory -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITDwarfEmitter object that is used by the JIT to
+// write dwarf tables to memory.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JIT.h"
+#include "JITDwarfEmitter.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+JITDwarfEmitter::JITDwarfEmitter(JIT& theJit) : Jit(theJit) {}
+
+
+unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F,
+ JITCodeEmitter& jce,
+ unsigned char* StartFunction,
+ unsigned char* EndFunction) {
+ const TargetMachine& TM = F.getTarget();
+ TD = TM.getTargetData();
+ needsIndirectEncoding = TM.getTargetAsmInfo()->getNeedsIndirectEncoding();
+ stackGrowthDirection = TM.getFrameInfo()->getStackGrowthDirection();
+ RI = TM.getRegisterInfo();
+ JCE = &jce;
+
+ unsigned char* ExceptionTable = EmitExceptionTable(&F, StartFunction,
+ EndFunction);
+
+ unsigned char* Result = 0;
+ unsigned char* EHFramePtr = 0;
+
+ const std::vector<Function *> Personalities = MMI->getPersonalities();
+ EHFramePtr = EmitCommonEHFrame(Personalities[MMI->getPersonalityIndex()]);
+
+ Result = EmitEHFrame(Personalities[MMI->getPersonalityIndex()], EHFramePtr,
+ StartFunction, EndFunction, ExceptionTable);
+
+ return Result;
+}
+
+
+void
+JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr,
+ const std::vector<MachineMove> &Moves) const {
+ unsigned PointerSize = TD->getPointerSize();
+ int stackGrowth = stackGrowthDirection == TargetFrameInfo::StackGrowsUp ?
+ PointerSize : -PointerSize;
+ bool IsLocal = false;
+ unsigned BaseLabelID = 0;
+
+ for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
+ const MachineMove &Move = Moves[i];
+ unsigned LabelID = Move.getLabelID();
+
+ if (LabelID) {
+ LabelID = MMI->MappedLabel(LabelID);
+
+ // Throw out move if the label is invalid.
+ if (!LabelID) continue;
+ }
+
+ intptr_t LabelPtr = 0;
+ if (LabelID) LabelPtr = JCE->getLabelAddress(LabelID);
+
+ const MachineLocation &Dst = Move.getDestination();
+ const MachineLocation &Src = Move.getSource();
+
+ // Advance row if new location.
+ if (BaseLabelPtr && LabelID && (BaseLabelID != LabelID || !IsLocal)) {
+ JCE->emitByte(dwarf::DW_CFA_advance_loc4);
+ JCE->emitInt32(LabelPtr - BaseLabelPtr);
+
+ BaseLabelID = LabelID;
+ BaseLabelPtr = LabelPtr;
+ IsLocal = true;
+ }
+
+ // If advancing cfa.
+ if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+ if (!Src.isReg()) {
+ if (Src.getReg() == MachineLocation::VirtualFP) {
+ JCE->emitByte(dwarf::DW_CFA_def_cfa_offset);
+ } else {
+ JCE->emitByte(dwarf::DW_CFA_def_cfa);
+ JCE->emitULEB128Bytes(RI->getDwarfRegNum(Src.getReg(), true));
+ }
+
+ int Offset = -Src.getOffset();
+
+ JCE->emitULEB128Bytes(Offset);
+ } else {
+ assert(0 && "Machine move no supported yet.");
+ }
+ } else if (Src.isReg() &&
+ Src.getReg() == MachineLocation::VirtualFP) {
+ if (Dst.isReg()) {
+ JCE->emitByte(dwarf::DW_CFA_def_cfa_register);
+ JCE->emitULEB128Bytes(RI->getDwarfRegNum(Dst.getReg(), true));
+ } else {
+ assert(0 && "Machine move no supported yet.");
+ }
+ } else {
+ unsigned Reg = RI->getDwarfRegNum(Src.getReg(), true);
+ int Offset = Dst.getOffset() / stackGrowth;
+
+ if (Offset < 0) {
+ JCE->emitByte(dwarf::DW_CFA_offset_extended_sf);
+ JCE->emitULEB128Bytes(Reg);
+ JCE->emitSLEB128Bytes(Offset);
+ } else if (Reg < 64) {
+ JCE->emitByte(dwarf::DW_CFA_offset + Reg);
+ JCE->emitULEB128Bytes(Offset);
+ } else {
+ JCE->emitByte(dwarf::DW_CFA_offset_extended);
+ JCE->emitULEB128Bytes(Reg);
+ JCE->emitULEB128Bytes(Offset);
+ }
+ }
+ }
+}
+
+/// SharedTypeIds - How many leading type ids two landing pads have in common.
+static unsigned SharedTypeIds(const LandingPadInfo *L,
+ const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ unsigned LSize = LIds.size(), RSize = RIds.size();
+ unsigned MinSize = LSize < RSize ? LSize : RSize;
+ unsigned Count = 0;
+
+ for (; Count != MinSize; ++Count)
+ if (LIds[Count] != RIds[Count])
+ return Count;
+
+ return Count;
+}
+
+
+/// PadLT - Order landing pads lexicographically by type id.
+static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ unsigned LSize = LIds.size(), RSize = RIds.size();
+ unsigned MinSize = LSize < RSize ? LSize : RSize;
+
+ for (unsigned i = 0; i != MinSize; ++i)
+ if (LIds[i] != RIds[i])
+ return LIds[i] < RIds[i];
+
+ return LSize < RSize;
+}
+
+namespace {
+
+struct KeyInfo {
+ static inline unsigned getEmptyKey() { return -1U; }
+ static inline unsigned getTombstoneKey() { return -2U; }
+ static unsigned getHashValue(const unsigned &Key) { return Key; }
+ static bool isEqual(unsigned LHS, unsigned RHS) { return LHS == RHS; }
+ static bool isPod() { return true; }
+};
+
+/// ActionEntry - Structure describing an entry in the actions table.
+struct ActionEntry {
+ int ValueForTypeID; // The value to write - may not be equal to the type id.
+ int NextAction;
+ struct ActionEntry *Previous;
+};
+
+/// PadRange - Structure holding a try-range and the associated landing pad.
+struct PadRange {
+ // The index of the landing pad.
+ unsigned PadIndex;
+ // The index of the begin and end labels in the landing pad's label lists.
+ unsigned RangeIndex;
+};
+
+typedef DenseMap<unsigned, PadRange, KeyInfo> RangeMapType;
+
+/// CallSiteEntry - Structure describing an entry in the call-site table.
+struct CallSiteEntry {
+ unsigned BeginLabel; // zero indicates the start of the function.
+ unsigned EndLabel; // zero indicates the end of the function.
+ unsigned PadLabel; // zero indicates that there is no landing pad.
+ unsigned Action;
+};
+
+}
+
+unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
+ unsigned char* StartFunction,
+ unsigned char* EndFunction) const {
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+
+ const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+ const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+ if (PadInfos.empty()) return 0;
+
+ // Sort the landing pads in order of their type ids. This is used to fold
+ // duplicate actions.
+ SmallVector<const LandingPadInfo *, 64> LandingPads;
+ LandingPads.reserve(PadInfos.size());
+ for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
+ LandingPads.push_back(&PadInfos[i]);
+ std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
+
+ // Negative type ids index into FilterIds, positive type ids index into
+ // TypeInfos. The value written for a positive type id is just the type
+ // id itself. For a negative type id, however, the value written is the
+ // (negative) byte offset of the corresponding FilterIds entry. The byte
+ // offset is usually equal to the type id, because the FilterIds entries
+ // are written using a variable width encoding which outputs one byte per
+ // entry as long as the value written is not too large, but can differ.
+ // This kind of complication does not occur for positive type ids because
+ // type infos are output using a fixed width encoding.
+ // FilterOffsets[i] holds the byte offset corresponding to FilterIds[i].
+ SmallVector<int, 16> FilterOffsets;
+ FilterOffsets.reserve(FilterIds.size());
+ int Offset = -1;
+ for(std::vector<unsigned>::const_iterator I = FilterIds.begin(),
+ E = FilterIds.end(); I != E; ++I) {
+ FilterOffsets.push_back(Offset);
+ Offset -= TargetAsmInfo::getULEB128Size(*I);
+ }
+
+ // Compute the actions table and gather the first action index for each
+ // landing pad site.
+ SmallVector<ActionEntry, 32> Actions;
+ SmallVector<unsigned, 64> FirstActions;
+ FirstActions.reserve(LandingPads.size());
+
+ int FirstAction = 0;
+ unsigned SizeActions = 0;
+ for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+ const LandingPadInfo *LP = LandingPads[i];
+ const std::vector<int> &TypeIds = LP->TypeIds;
+ const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0;
+ unsigned SizeSiteActions = 0;
+
+ if (NumShared < TypeIds.size()) {
+ unsigned SizeAction = 0;
+ ActionEntry *PrevAction = 0;
+
+ if (NumShared) {
+ const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size();
+ assert(Actions.size());
+ PrevAction = &Actions.back();
+ SizeAction = TargetAsmInfo::getSLEB128Size(PrevAction->NextAction) +
+ TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+ for (unsigned j = NumShared; j != SizePrevIds; ++j) {
+ SizeAction -= TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+ SizeAction += -PrevAction->NextAction;
+ PrevAction = PrevAction->Previous;
+ }
+ }
+
+ // Compute the actions.
+ for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) {
+ int TypeID = TypeIds[I];
+ assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
+ int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
+ unsigned SizeTypeID = TargetAsmInfo::getSLEB128Size(ValueForTypeID);
+
+ int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
+ SizeAction = SizeTypeID + TargetAsmInfo::getSLEB128Size(NextAction);
+ SizeSiteActions += SizeAction;
+
+ ActionEntry Action = {ValueForTypeID, NextAction, PrevAction};
+ Actions.push_back(Action);
+
+ PrevAction = &Actions.back();
+ }
+
+ // Record the first action of the landing pad site.
+ FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
+ } // else identical - re-use previous FirstAction
+
+ FirstActions.push_back(FirstAction);
+
+ // Compute this sites contribution to size.
+ SizeActions += SizeSiteActions;
+ }
+
+ // Compute the call-site table. Entries must be ordered by address.
+ SmallVector<CallSiteEntry, 64> CallSites;
+
+ RangeMapType PadMap;
+ for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+ const LandingPadInfo *LandingPad = LandingPads[i];
+ for (unsigned j=0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
+ unsigned BeginLabel = LandingPad->BeginLabels[j];
+ assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
+ PadRange P = { i, j };
+ PadMap[BeginLabel] = P;
+ }
+ }
+
+ bool MayThrow = false;
+ unsigned LastLabel = 0;
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
+ MI != E; ++MI) {
+ if (!MI->isLabel()) {
+ MayThrow |= MI->getDesc().isCall();
+ continue;
+ }
+
+ unsigned BeginLabel = MI->getOperand(0).getImm();
+ assert(BeginLabel && "Invalid label!");
+
+ if (BeginLabel == LastLabel)
+ MayThrow = false;
+
+ RangeMapType::iterator L = PadMap.find(BeginLabel);
+
+ if (L == PadMap.end())
+ continue;
+
+ PadRange P = L->second;
+ const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+
+ assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
+ "Inconsistent landing pad map!");
+
+ // If some instruction between the previous try-range and this one may
+ // throw, create a call-site entry with no landing pad for the region
+ // between the try-ranges.
+ if (MayThrow) {
+ CallSiteEntry Site = {LastLabel, BeginLabel, 0, 0};
+ CallSites.push_back(Site);
+ }
+
+ LastLabel = LandingPad->EndLabels[P.RangeIndex];
+ CallSiteEntry Site = {BeginLabel, LastLabel,
+ LandingPad->LandingPadLabel, FirstActions[P.PadIndex]};
+
+ assert(Site.BeginLabel && Site.EndLabel && Site.PadLabel &&
+ "Invalid landing pad!");
+
+ // Try to merge with the previous call-site.
+ if (CallSites.size()) {
+ CallSiteEntry &Prev = CallSites.back();
+ if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
+ // Extend the range of the previous entry.
+ Prev.EndLabel = Site.EndLabel;
+ continue;
+ }
+ }
+
+ // Otherwise, create a new call-site.
+ CallSites.push_back(Site);
+ }
+ }
+ // If some instruction between the previous try-range and the end of the
+ // function may throw, create a call-site entry with no landing pad for the
+ // region following the try-range.
+ if (MayThrow) {
+ CallSiteEntry Site = {LastLabel, 0, 0, 0};
+ CallSites.push_back(Site);
+ }
+
+ // Final tallies.
+ unsigned SizeSites = CallSites.size() * (sizeof(int32_t) + // Site start.
+ sizeof(int32_t) + // Site length.
+ sizeof(int32_t)); // Landing pad.
+ for (unsigned i = 0, e = CallSites.size(); i < e; ++i)
+ SizeSites += TargetAsmInfo::getULEB128Size(CallSites[i].Action);
+
+ unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize();
+
+ unsigned TypeOffset = sizeof(int8_t) + // Call site format
+ // Call-site table length
+ TargetAsmInfo::getULEB128Size(SizeSites) +
+ SizeSites + SizeActions + SizeTypes;
+
+ unsigned TotalSize = sizeof(int8_t) + // LPStart format
+ sizeof(int8_t) + // TType format
+ TargetAsmInfo::getULEB128Size(TypeOffset) + // TType base offset
+ TypeOffset;
+
+ unsigned SizeAlign = (4 - TotalSize) & 3;
+
+ // Begin the exception table.
+ JCE->emitAlignment(4);
+ for (unsigned i = 0; i != SizeAlign; ++i) {
+ JCE->emitByte(0);
+ // Asm->EOL("Padding");
+ }
+
+ unsigned char* DwarfExceptionTable = (unsigned char*)JCE->getCurrentPCValue();
+
+ // Emit the header.
+ JCE->emitByte(dwarf::DW_EH_PE_omit);
+ // Asm->EOL("LPStart format (DW_EH_PE_omit)");
+ JCE->emitByte(dwarf::DW_EH_PE_absptr);
+ // Asm->EOL("TType format (DW_EH_PE_absptr)");
+ JCE->emitULEB128Bytes(TypeOffset);
+ // Asm->EOL("TType base offset");
+ JCE->emitByte(dwarf::DW_EH_PE_udata4);
+ // Asm->EOL("Call site format (DW_EH_PE_udata4)");
+ JCE->emitULEB128Bytes(SizeSites);
+ // Asm->EOL("Call-site table length");
+
+ // Emit the landing pad site information.
+ for (unsigned i = 0; i < CallSites.size(); ++i) {
+ CallSiteEntry &S = CallSites[i];
+ intptr_t BeginLabelPtr = 0;
+ intptr_t EndLabelPtr = 0;
+
+ if (!S.BeginLabel) {
+ BeginLabelPtr = (intptr_t)StartFunction;
+ JCE->emitInt32(0);
+ } else {
+ BeginLabelPtr = JCE->getLabelAddress(S.BeginLabel);
+ JCE->emitInt32(BeginLabelPtr - (intptr_t)StartFunction);
+ }
+
+ // Asm->EOL("Region start");
+
+ if (!S.EndLabel) {
+ EndLabelPtr = (intptr_t)EndFunction;
+ JCE->emitInt32((intptr_t)EndFunction - BeginLabelPtr);
+ } else {
+ EndLabelPtr = JCE->getLabelAddress(S.EndLabel);
+ JCE->emitInt32(EndLabelPtr - BeginLabelPtr);
+ }
+ //Asm->EOL("Region length");
+
+ if (!S.PadLabel) {
+ JCE->emitInt32(0);
+ } else {
+ unsigned PadLabelPtr = JCE->getLabelAddress(S.PadLabel);
+ JCE->emitInt32(PadLabelPtr - (intptr_t)StartFunction);
+ }
+ // Asm->EOL("Landing pad");
+
+ JCE->emitULEB128Bytes(S.Action);
+ // Asm->EOL("Action");
+ }
+
+ // Emit the actions.
+ for (unsigned I = 0, N = Actions.size(); I != N; ++I) {
+ ActionEntry &Action = Actions[I];
+
+ JCE->emitSLEB128Bytes(Action.ValueForTypeID);
+ //Asm->EOL("TypeInfo index");
+ JCE->emitSLEB128Bytes(Action.NextAction);
+ //Asm->EOL("Next action");
+ }
+
+ // Emit the type ids.
+ for (unsigned M = TypeInfos.size(); M; --M) {
+ GlobalVariable *GV = TypeInfos[M - 1];
+
+ if (GV) {
+ if (TD->getPointerSize() == sizeof(int32_t)) {
+ JCE->emitInt32((intptr_t)Jit.getOrEmitGlobalVariable(GV));
+ } else {
+ JCE->emitInt64((intptr_t)Jit.getOrEmitGlobalVariable(GV));
+ }
+ } else {
+ if (TD->getPointerSize() == sizeof(int32_t))
+ JCE->emitInt32(0);
+ else
+ JCE->emitInt64(0);
+ }
+ // Asm->EOL("TypeInfo");
+ }
+
+ // Emit the filter typeids.
+ for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) {
+ unsigned TypeID = FilterIds[j];
+ JCE->emitULEB128Bytes(TypeID);
+ //Asm->EOL("Filter TypeInfo index");
+ }
+
+ JCE->emitAlignment(4);
+
+ return DwarfExceptionTable;
+}
+
+unsigned char*
+JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const {
+ unsigned PointerSize = TD->getPointerSize();
+ int stackGrowth = stackGrowthDirection == TargetFrameInfo::StackGrowsUp ?
+ PointerSize : -PointerSize;
+
+ unsigned char* StartCommonPtr = (unsigned char*)JCE->getCurrentPCValue();
+ // EH Common Frame header
+ JCE->allocateSpace(4, 0);
+ unsigned char* FrameCommonBeginPtr = (unsigned char*)JCE->getCurrentPCValue();
+ JCE->emitInt32((int)0);
+ JCE->emitByte(dwarf::DW_CIE_VERSION);
+ JCE->emitString(Personality ? "zPLR" : "zR");
+ JCE->emitULEB128Bytes(1);
+ JCE->emitSLEB128Bytes(stackGrowth);
+ JCE->emitByte(RI->getDwarfRegNum(RI->getRARegister(), true));
+
+ if (Personality) {
+ // Augmentation Size: 3 small ULEBs of one byte each, and the personality
+ // function which size is PointerSize.
+ JCE->emitULEB128Bytes(3 + PointerSize);
+
+ // We set the encoding of the personality as direct encoding because we use
+ // the function pointer. The encoding is not relative because the current
+ // PC value may be bigger than the personality function pointer.
+ if (PointerSize == 4) {
+ JCE->emitByte(dwarf::DW_EH_PE_sdata4);
+ JCE->emitInt32(((intptr_t)Jit.getPointerToGlobal(Personality)));
+ } else {
+ JCE->emitByte(dwarf::DW_EH_PE_sdata8);
+ JCE->emitInt64(((intptr_t)Jit.getPointerToGlobal(Personality)));
+ }
+
+ JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+ JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+
+ } else {
+ JCE->emitULEB128Bytes(1);
+ JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+ }
+
+ std::vector<MachineMove> Moves;
+ RI->getInitialFrameState(Moves);
+ EmitFrameMoves(0, Moves);
+ JCE->emitAlignment(PointerSize);
+
+ JCE->emitInt32At((uintptr_t*)StartCommonPtr,
+ (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() -
+ FrameCommonBeginPtr));
+
+ return StartCommonPtr;
+}
+
+
+unsigned char*
+JITDwarfEmitter::EmitEHFrame(const Function* Personality,
+ unsigned char* StartCommonPtr,
+ unsigned char* StartFunction,
+ unsigned char* EndFunction,
+ unsigned char* ExceptionTable) const {
+ unsigned PointerSize = TD->getPointerSize();
+
+ // EH frame header.
+ unsigned char* StartEHPtr = (unsigned char*)JCE->getCurrentPCValue();
+ JCE->allocateSpace(4, 0);
+ unsigned char* FrameBeginPtr = (unsigned char*)JCE->getCurrentPCValue();
+ // FDE CIE Offset
+ JCE->emitInt32(FrameBeginPtr - StartCommonPtr);
+ JCE->emitInt32(StartFunction - (unsigned char*)JCE->getCurrentPCValue());
+ JCE->emitInt32(EndFunction - StartFunction);
+
+ // If there is a personality and landing pads then point to the language
+ // specific data area in the exception table.
+ if (MMI->getPersonalityIndex()) {
+ JCE->emitULEB128Bytes(4);
+
+ if (!MMI->getLandingPads().empty()) {
+ JCE->emitInt32(ExceptionTable - (unsigned char*)JCE->getCurrentPCValue());
+ } else {
+ JCE->emitInt32((int)0);
+ }
+ } else {
+ JCE->emitULEB128Bytes(0);
+ }
+
+ // Indicate locations of function specific callee saved registers in
+ // frame.
+ EmitFrameMoves((intptr_t)StartFunction, MMI->getFrameMoves());
+
+ JCE->emitAlignment(PointerSize);
+
+ // Indicate the size of the table
+ JCE->emitInt32At((uintptr_t*)StartEHPtr,
+ (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() -
+ StartEHPtr));
+
+ // Double zeroes for the unwind runtime
+ if (PointerSize == 8) {
+ JCE->emitInt64(0);
+ JCE->emitInt64(0);
+ } else {
+ JCE->emitInt32(0);
+ JCE->emitInt32(0);
+ }
+
+
+ return StartEHPtr;
+}
+
+unsigned JITDwarfEmitter::GetDwarfTableSizeInBytes(MachineFunction& F,
+ JITCodeEmitter& jce,
+ unsigned char* StartFunction,
+ unsigned char* EndFunction) {
+ const TargetMachine& TM = F.getTarget();
+ TD = TM.getTargetData();
+ needsIndirectEncoding = TM.getTargetAsmInfo()->getNeedsIndirectEncoding();
+ stackGrowthDirection = TM.getFrameInfo()->getStackGrowthDirection();
+ RI = TM.getRegisterInfo();
+ JCE = &jce;
+ unsigned FinalSize = 0;
+
+ FinalSize += GetExceptionTableSizeInBytes(&F);
+
+ const std::vector<Function *> Personalities = MMI->getPersonalities();
+ FinalSize +=
+ GetCommonEHFrameSizeInBytes(Personalities[MMI->getPersonalityIndex()]);
+
+ FinalSize += GetEHFrameSizeInBytes(Personalities[MMI->getPersonalityIndex()],
+ StartFunction);
+
+ return FinalSize;
+}
+
+/// RoundUpToAlign - Add the specified alignment to FinalSize and returns
+/// the new value.
+static unsigned RoundUpToAlign(unsigned FinalSize, unsigned Alignment) {
+ if (Alignment == 0) Alignment = 1;
+ // Since we do not know where the buffer will be allocated, be pessimistic.
+ return FinalSize + Alignment;
+}
+
+unsigned
+JITDwarfEmitter::GetEHFrameSizeInBytes(const Function* Personality,
+ unsigned char* StartFunction) const {
+ unsigned PointerSize = TD->getPointerSize();
+ unsigned FinalSize = 0;
+ // EH frame header.
+ FinalSize += PointerSize;
+ // FDE CIE Offset
+ FinalSize += 3 * PointerSize;
+ // If there is a personality and landing pads then point to the language
+ // specific data area in the exception table.
+ if (MMI->getPersonalityIndex()) {
+ FinalSize += TargetAsmInfo::getULEB128Size(4);
+ FinalSize += PointerSize;
+ } else {
+ FinalSize += TargetAsmInfo::getULEB128Size(0);
+ }
+
+ // Indicate locations of function specific callee saved registers in
+ // frame.
+ FinalSize += GetFrameMovesSizeInBytes((intptr_t)StartFunction,
+ MMI->getFrameMoves());
+
+ FinalSize = RoundUpToAlign(FinalSize, 4);
+
+ // Double zeroes for the unwind runtime
+ FinalSize += 2 * PointerSize;
+
+ return FinalSize;
+}
+
+unsigned JITDwarfEmitter::GetCommonEHFrameSizeInBytes(const Function* Personality)
+ const {
+
+ unsigned PointerSize = TD->getPointerSize();
+ int stackGrowth = stackGrowthDirection == TargetFrameInfo::StackGrowsUp ?
+ PointerSize : -PointerSize;
+ unsigned FinalSize = 0;
+ // EH Common Frame header
+ FinalSize += PointerSize;
+ FinalSize += 4;
+ FinalSize += 1;
+ FinalSize += Personality ? 5 : 3; // "zPLR" or "zR"
+ FinalSize += TargetAsmInfo::getULEB128Size(1);
+ FinalSize += TargetAsmInfo::getSLEB128Size(stackGrowth);
+ FinalSize += 1;
+
+ if (Personality) {
+ FinalSize += TargetAsmInfo::getULEB128Size(7);
+
+ // Encoding
+ FinalSize+= 1;
+ //Personality
+ FinalSize += PointerSize;
+
+ FinalSize += TargetAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel);
+ FinalSize += TargetAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel);
+
+ } else {
+ FinalSize += TargetAsmInfo::getULEB128Size(1);
+ FinalSize += TargetAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel);
+ }
+
+ std::vector<MachineMove> Moves;
+ RI->getInitialFrameState(Moves);
+ FinalSize += GetFrameMovesSizeInBytes(0, Moves);
+ FinalSize = RoundUpToAlign(FinalSize, 4);
+ return FinalSize;
+}
+
+unsigned
+JITDwarfEmitter::GetFrameMovesSizeInBytes(intptr_t BaseLabelPtr,
+ const std::vector<MachineMove> &Moves) const {
+ unsigned PointerSize = TD->getPointerSize();
+ int stackGrowth = stackGrowthDirection == TargetFrameInfo::StackGrowsUp ?
+ PointerSize : -PointerSize;
+ bool IsLocal = BaseLabelPtr;
+ unsigned FinalSize = 0;
+
+ for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
+ const MachineMove &Move = Moves[i];
+ unsigned LabelID = Move.getLabelID();
+
+ if (LabelID) {
+ LabelID = MMI->MappedLabel(LabelID);
+
+ // Throw out move if the label is invalid.
+ if (!LabelID) continue;
+ }
+
+ intptr_t LabelPtr = 0;
+ if (LabelID) LabelPtr = JCE->getLabelAddress(LabelID);
+
+ const MachineLocation &Dst = Move.getDestination();
+ const MachineLocation &Src = Move.getSource();
+
+ // Advance row if new location.
+ if (BaseLabelPtr && LabelID && (BaseLabelPtr != LabelPtr || !IsLocal)) {
+ FinalSize++;
+ FinalSize += PointerSize;
+ BaseLabelPtr = LabelPtr;
+ IsLocal = true;
+ }
+
+ // If advancing cfa.
+ if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+ if (!Src.isReg()) {
+ if (Src.getReg() == MachineLocation::VirtualFP) {
+ ++FinalSize;
+ } else {
+ ++FinalSize;
+ unsigned RegNum = RI->getDwarfRegNum(Src.getReg(), true);
+ FinalSize += TargetAsmInfo::getULEB128Size(RegNum);
+ }
+
+ int Offset = -Src.getOffset();
+
+ FinalSize += TargetAsmInfo::getULEB128Size(Offset);
+ } else {
+ assert(0 && "Machine move no supported yet.");
+ }
+ } else if (Src.isReg() &&
+ Src.getReg() == MachineLocation::VirtualFP) {
+ if (Dst.isReg()) {
+ ++FinalSize;
+ unsigned RegNum = RI->getDwarfRegNum(Dst.getReg(), true);
+ FinalSize += TargetAsmInfo::getULEB128Size(RegNum);
+ } else {
+ assert(0 && "Machine move no supported yet.");
+ }
+ } else {
+ unsigned Reg = RI->getDwarfRegNum(Src.getReg(), true);
+ int Offset = Dst.getOffset() / stackGrowth;
+
+ if (Offset < 0) {
+ ++FinalSize;
+ FinalSize += TargetAsmInfo::getULEB128Size(Reg);
+ FinalSize += TargetAsmInfo::getSLEB128Size(Offset);
+ } else if (Reg < 64) {
+ ++FinalSize;
+ FinalSize += TargetAsmInfo::getULEB128Size(Offset);
+ } else {
+ ++FinalSize;
+ FinalSize += TargetAsmInfo::getULEB128Size(Reg);
+ FinalSize += TargetAsmInfo::getULEB128Size(Offset);
+ }
+ }
+ }
+ return FinalSize;
+}
+
+unsigned
+JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const {
+ unsigned FinalSize = 0;
+
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+
+ const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+ const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+ if (PadInfos.empty()) return 0;
+
+ // Sort the landing pads in order of their type ids. This is used to fold
+ // duplicate actions.
+ SmallVector<const LandingPadInfo *, 64> LandingPads;
+ LandingPads.reserve(PadInfos.size());
+ for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
+ LandingPads.push_back(&PadInfos[i]);
+ std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
+
+ // Negative type ids index into FilterIds, positive type ids index into
+ // TypeInfos. The value written for a positive type id is just the type
+ // id itself. For a negative type id, however, the value written is the
+ // (negative) byte offset of the corresponding FilterIds entry. The byte
+ // offset is usually equal to the type id, because the FilterIds entries
+ // are written using a variable width encoding which outputs one byte per
+ // entry as long as the value written is not too large, but can differ.
+ // This kind of complication does not occur for positive type ids because
+ // type infos are output using a fixed width encoding.
+ // FilterOffsets[i] holds the byte offset corresponding to FilterIds[i].
+ SmallVector<int, 16> FilterOffsets;
+ FilterOffsets.reserve(FilterIds.size());
+ int Offset = -1;
+ for(std::vector<unsigned>::const_iterator I = FilterIds.begin(),
+ E = FilterIds.end(); I != E; ++I) {
+ FilterOffsets.push_back(Offset);
+ Offset -= TargetAsmInfo::getULEB128Size(*I);
+ }
+
+ // Compute the actions table and gather the first action index for each
+ // landing pad site.
+ SmallVector<ActionEntry, 32> Actions;
+ SmallVector<unsigned, 64> FirstActions;
+ FirstActions.reserve(LandingPads.size());
+
+ int FirstAction = 0;
+ unsigned SizeActions = 0;
+ for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+ const LandingPadInfo *LP = LandingPads[i];
+ const std::vector<int> &TypeIds = LP->TypeIds;
+ const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0;
+ unsigned SizeSiteActions = 0;
+
+ if (NumShared < TypeIds.size()) {
+ unsigned SizeAction = 0;
+ ActionEntry *PrevAction = 0;
+
+ if (NumShared) {
+ const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size();
+ assert(Actions.size());
+ PrevAction = &Actions.back();
+ SizeAction = TargetAsmInfo::getSLEB128Size(PrevAction->NextAction) +
+ TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+ for (unsigned j = NumShared; j != SizePrevIds; ++j) {
+ SizeAction -= TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+ SizeAction += -PrevAction->NextAction;
+ PrevAction = PrevAction->Previous;
+ }
+ }
+
+ // Compute the actions.
+ for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) {
+ int TypeID = TypeIds[I];
+ assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
+ int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
+ unsigned SizeTypeID = TargetAsmInfo::getSLEB128Size(ValueForTypeID);
+
+ int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
+ SizeAction = SizeTypeID + TargetAsmInfo::getSLEB128Size(NextAction);
+ SizeSiteActions += SizeAction;
+
+ ActionEntry Action = {ValueForTypeID, NextAction, PrevAction};
+ Actions.push_back(Action);
+
+ PrevAction = &Actions.back();
+ }
+
+ // Record the first action of the landing pad site.
+ FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
+ } // else identical - re-use previous FirstAction
+
+ FirstActions.push_back(FirstAction);
+
+ // Compute this sites contribution to size.
+ SizeActions += SizeSiteActions;
+ }
+
+ // Compute the call-site table. Entries must be ordered by address.
+ SmallVector<CallSiteEntry, 64> CallSites;
+
+ RangeMapType PadMap;
+ for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+ const LandingPadInfo *LandingPad = LandingPads[i];
+ for (unsigned j=0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
+ unsigned BeginLabel = LandingPad->BeginLabels[j];
+ assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
+ PadRange P = { i, j };
+ PadMap[BeginLabel] = P;
+ }
+ }
+
+ bool MayThrow = false;
+ unsigned LastLabel = 0;
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
+ MI != E; ++MI) {
+ if (!MI->isLabel()) {
+ MayThrow |= MI->getDesc().isCall();
+ continue;
+ }
+
+ unsigned BeginLabel = MI->getOperand(0).getImm();
+ assert(BeginLabel && "Invalid label!");
+
+ if (BeginLabel == LastLabel)
+ MayThrow = false;
+
+ RangeMapType::iterator L = PadMap.find(BeginLabel);
+
+ if (L == PadMap.end())
+ continue;
+
+ PadRange P = L->second;
+ const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+
+ assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
+ "Inconsistent landing pad map!");
+
+ // If some instruction between the previous try-range and this one may
+ // throw, create a call-site entry with no landing pad for the region
+ // between the try-ranges.
+ if (MayThrow) {
+ CallSiteEntry Site = {LastLabel, BeginLabel, 0, 0};
+ CallSites.push_back(Site);
+ }
+
+ LastLabel = LandingPad->EndLabels[P.RangeIndex];
+ CallSiteEntry Site = {BeginLabel, LastLabel,
+ LandingPad->LandingPadLabel, FirstActions[P.PadIndex]};
+
+ assert(Site.BeginLabel && Site.EndLabel && Site.PadLabel &&
+ "Invalid landing pad!");
+
+ // Try to merge with the previous call-site.
+ if (CallSites.size()) {
+ CallSiteEntry &Prev = CallSites.back();
+ if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
+ // Extend the range of the previous entry.
+ Prev.EndLabel = Site.EndLabel;
+ continue;
+ }
+ }
+
+ // Otherwise, create a new call-site.
+ CallSites.push_back(Site);
+ }
+ }
+ // If some instruction between the previous try-range and the end of the
+ // function may throw, create a call-site entry with no landing pad for the
+ // region following the try-range.
+ if (MayThrow) {
+ CallSiteEntry Site = {LastLabel, 0, 0, 0};
+ CallSites.push_back(Site);
+ }
+
+ // Final tallies.
+ unsigned SizeSites = CallSites.size() * (sizeof(int32_t) + // Site start.
+ sizeof(int32_t) + // Site length.
+ sizeof(int32_t)); // Landing pad.
+ for (unsigned i = 0, e = CallSites.size(); i < e; ++i)
+ SizeSites += TargetAsmInfo::getULEB128Size(CallSites[i].Action);
+
+ unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize();
+
+ unsigned TypeOffset = sizeof(int8_t) + // Call site format
+ // Call-site table length
+ TargetAsmInfo::getULEB128Size(SizeSites) +
+ SizeSites + SizeActions + SizeTypes;
+
+ unsigned TotalSize = sizeof(int8_t) + // LPStart format
+ sizeof(int8_t) + // TType format
+ TargetAsmInfo::getULEB128Size(TypeOffset) + // TType base offset
+ TypeOffset;
+
+ unsigned SizeAlign = (4 - TotalSize) & 3;
+
+ // Begin the exception table.
+ FinalSize = RoundUpToAlign(FinalSize, 4);
+ for (unsigned i = 0; i != SizeAlign; ++i) {
+ ++FinalSize;
+ }
+
+ unsigned PointerSize = TD->getPointerSize();
+
+ // Emit the header.
+ ++FinalSize;
+ // Asm->EOL("LPStart format (DW_EH_PE_omit)");
+ ++FinalSize;
+ // Asm->EOL("TType format (DW_EH_PE_absptr)");
+ ++FinalSize;
+ // Asm->EOL("TType base offset");
+ ++FinalSize;
+ // Asm->EOL("Call site format (DW_EH_PE_udata4)");
+ ++FinalSize;
+ // Asm->EOL("Call-site table length");
+
+ // Emit the landing pad site information.
+ for (unsigned i = 0; i < CallSites.size(); ++i) {
+ CallSiteEntry &S = CallSites[i];
+
+ // Asm->EOL("Region start");
+ FinalSize += PointerSize;
+
+ //Asm->EOL("Region length");
+ FinalSize += PointerSize;
+
+ // Asm->EOL("Landing pad");
+ FinalSize += PointerSize;
+
+ FinalSize += TargetAsmInfo::getULEB128Size(S.Action);
+ // Asm->EOL("Action");
+ }
+
+ // Emit the actions.
+ for (unsigned I = 0, N = Actions.size(); I != N; ++I) {
+ ActionEntry &Action = Actions[I];
+
+ //Asm->EOL("TypeInfo index");
+ FinalSize += TargetAsmInfo::getSLEB128Size(Action.ValueForTypeID);
+ //Asm->EOL("Next action");
+ FinalSize += TargetAsmInfo::getSLEB128Size(Action.NextAction);
+ }
+
+ // Emit the type ids.
+ for (unsigned M = TypeInfos.size(); M; --M) {
+ // Asm->EOL("TypeInfo");
+ FinalSize += PointerSize;
+ }
+
+ // Emit the filter typeids.
+ for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) {
+ unsigned TypeID = FilterIds[j];
+ FinalSize += TargetAsmInfo::getULEB128Size(TypeID);
+ //Asm->EOL("Filter TypeInfo index");
+ }
+
+ FinalSize = RoundUpToAlign(FinalSize, 4);
+
+ return FinalSize;
+}
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
new file mode 100644
index 0000000..9120ed4
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
@@ -0,0 +1,87 @@
+//===------ JITDwarfEmitter.h - Write dwarf tables into memory ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITDwarfEmitter object that is used by the JIT to
+// write dwarf tables to memory.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H
+#define LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H
+
+namespace llvm {
+
+class Function;
+class JITCodeEmitter;
+class MachineFunction;
+class MachineModuleInfo;
+class MachineMove;
+class TargetData;
+class TargetMachine;
+class TargetRegisterInfo;
+
+class JITDwarfEmitter {
+ const TargetData* TD;
+ JITCodeEmitter* JCE;
+ const TargetRegisterInfo* RI;
+ MachineModuleInfo* MMI;
+ JIT& Jit;
+ bool needsIndirectEncoding;
+ bool stackGrowthDirection;
+
+ unsigned char* EmitExceptionTable(MachineFunction* MF,
+ unsigned char* StartFunction,
+ unsigned char* EndFunction) const;
+
+ void EmitFrameMoves(intptr_t BaseLabelPtr,
+ const std::vector<MachineMove> &Moves) const;
+
+ unsigned char* EmitCommonEHFrame(const Function* Personality) const;
+
+ unsigned char* EmitEHFrame(const Function* Personality,
+ unsigned char* StartBufferPtr,
+ unsigned char* StartFunction,
+ unsigned char* EndFunction,
+ unsigned char* ExceptionTable) const;
+
+ unsigned GetExceptionTableSizeInBytes(MachineFunction* MF) const;
+
+ unsigned
+ GetFrameMovesSizeInBytes(intptr_t BaseLabelPtr,
+ const std::vector<MachineMove> &Moves) const;
+
+ unsigned GetCommonEHFrameSizeInBytes(const Function* Personality) const;
+
+ unsigned GetEHFrameSizeInBytes(const Function* Personality,
+ unsigned char* StartFunction) const;
+
+public:
+
+ JITDwarfEmitter(JIT& jit);
+
+ unsigned char* EmitDwarfTable(MachineFunction& F,
+ JITCodeEmitter& JCE,
+ unsigned char* StartFunction,
+ unsigned char* EndFunction);
+
+
+ unsigned GetDwarfTableSizeInBytes(MachineFunction& F,
+ JITCodeEmitter& JCE,
+ unsigned char* StartFunction,
+ unsigned char* EndFunction);
+
+ void setModuleInfo(MachineModuleInfo* Info) {
+ MMI = Info;
+ }
+};
+
+
+} // end namespace llvm
+
+#endif // LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
new file mode 100644
index 0000000..d3b0820
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -0,0 +1,1615 @@
+//===-- JITEmitter.cpp - Write machine code to executable memory ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a MachineCodeEmitter object that is used by the JIT to
+// write machine code to memory and remember where relocatable values are.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "JIT.h"
+#include "JITDwarfEmitter.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/CodeGen/MachineCodeInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/System/Disassembler.h"
+#include "llvm/System/Memory.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include <algorithm>
+#ifndef NDEBUG
+#include <iomanip>
+#endif
+using namespace llvm;
+
+STATISTIC(NumBytes, "Number of bytes of machine code compiled");
+STATISTIC(NumRelos, "Number of relocations applied");
+static JIT *TheJIT = 0;
+
+
+//===----------------------------------------------------------------------===//
+// JIT lazy compilation code.
+//
+namespace {
+ class JITResolverState {
+ public:
+ typedef std::map<AssertingVH<Function>, void*> FunctionToStubMapTy;
+ typedef std::map<void*, Function*> StubToFunctionMapTy;
+ typedef std::map<AssertingVH<GlobalValue>, void*> GlobalToIndirectSymMapTy;
+ private:
+ /// FunctionToStubMap - Keep track of the stub created for a particular
+ /// function so that we can reuse them if necessary.
+ FunctionToStubMapTy FunctionToStubMap;
+
+ /// StubToFunctionMap - Keep track of the function that each stub
+ /// corresponds to.
+ StubToFunctionMapTy StubToFunctionMap;
+
+ /// GlobalToIndirectSymMap - Keep track of the indirect symbol created for a
+ /// particular GlobalVariable so that we can reuse them if necessary.
+ GlobalToIndirectSymMapTy GlobalToIndirectSymMap;
+
+ public:
+ FunctionToStubMapTy& getFunctionToStubMap(const MutexGuard& locked) {
+ assert(locked.holds(TheJIT->lock));
+ return FunctionToStubMap;
+ }
+
+ StubToFunctionMapTy& getStubToFunctionMap(const MutexGuard& locked) {
+ assert(locked.holds(TheJIT->lock));
+ return StubToFunctionMap;
+ }
+
+ GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap(const MutexGuard& locked) {
+ assert(locked.holds(TheJIT->lock));
+ return GlobalToIndirectSymMap;
+ }
+ };
+
+ /// JITResolver - Keep track of, and resolve, call sites for functions that
+ /// have not yet been compiled.
+ class JITResolver {
+ typedef JITResolverState::FunctionToStubMapTy FunctionToStubMapTy;
+ typedef JITResolverState::StubToFunctionMapTy StubToFunctionMapTy;
+ typedef JITResolverState::GlobalToIndirectSymMapTy GlobalToIndirectSymMapTy;
+
+ /// LazyResolverFn - The target lazy resolver function that we actually
+ /// rewrite instructions to use.
+ TargetJITInfo::LazyResolverFn LazyResolverFn;
+
+ JITResolverState state;
+
+ /// ExternalFnToStubMap - This is the equivalent of FunctionToStubMap for
+ /// external functions.
+ std::map<void*, void*> ExternalFnToStubMap;
+
+ /// revGOTMap - map addresses to indexes in the GOT
+ std::map<void*, unsigned> revGOTMap;
+ unsigned nextGOTIndex;
+
+ static JITResolver *TheJITResolver;
+ public:
+ explicit JITResolver(JIT &jit) : nextGOTIndex(0) {
+ TheJIT = &jit;
+
+ LazyResolverFn = jit.getJITInfo().getLazyResolverFunction(JITCompilerFn);
+ assert(TheJITResolver == 0 && "Multiple JIT resolvers?");
+ TheJITResolver = this;
+ }
+
+ ~JITResolver() {
+ TheJITResolver = 0;
+ }
+
+ /// getFunctionStubIfAvailable - This returns a pointer to a function stub
+ /// if it has already been created.
+ void *getFunctionStubIfAvailable(Function *F);
+
+ /// getFunctionStub - This returns a pointer to a function stub, creating
+ /// one on demand as needed. If empty is true, create a function stub
+ /// pointing at address 0, to be filled in later.
+ void *getFunctionStub(Function *F);
+
+ /// getExternalFunctionStub - Return a stub for the function at the
+ /// specified address, created lazily on demand.
+ void *getExternalFunctionStub(void *FnAddr);
+
+ /// getGlobalValueIndirectSym - Return an indirect symbol containing the
+ /// specified GV address.
+ void *getGlobalValueIndirectSym(GlobalValue *V, void *GVAddress);
+
+ /// AddCallbackAtLocation - If the target is capable of rewriting an
+ /// instruction without the use of a stub, record the location of the use so
+ /// we know which function is being used at the location.
+ void *AddCallbackAtLocation(Function *F, void *Location) {
+ MutexGuard locked(TheJIT->lock);
+ /// Get the target-specific JIT resolver function.
+ state.getStubToFunctionMap(locked)[Location] = F;
+ return (void*)(intptr_t)LazyResolverFn;
+ }
+
+ void getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs,
+ SmallVectorImpl<void*> &Ptrs);
+
+ GlobalValue *invalidateStub(void *Stub);
+
+ /// getGOTIndexForAddress - Return a new or existing index in the GOT for
+ /// an address. This function only manages slots, it does not manage the
+ /// contents of the slots or the memory associated with the GOT.
+ unsigned getGOTIndexForAddr(void *addr);
+
+ /// JITCompilerFn - This function is called to resolve a stub to a compiled
+ /// address. If the LLVM Function corresponding to the stub has not yet
+ /// been compiled, this function compiles it first.
+ static void *JITCompilerFn(void *Stub);
+ };
+}
+
+JITResolver *JITResolver::TheJITResolver = 0;
+
+/// getFunctionStubIfAvailable - This returns a pointer to a function stub
+/// if it has already been created.
+void *JITResolver::getFunctionStubIfAvailable(Function *F) {
+ MutexGuard locked(TheJIT->lock);
+
+ // If we already have a stub for this function, recycle it.
+ void *&Stub = state.getFunctionToStubMap(locked)[F];
+ return Stub;
+}
+
+/// getFunctionStub - This returns a pointer to a function stub, creating
+/// one on demand as needed.
+void *JITResolver::getFunctionStub(Function *F) {
+ MutexGuard locked(TheJIT->lock);
+
+ // If we already have a stub for this function, recycle it.
+ void *&Stub = state.getFunctionToStubMap(locked)[F];
+ if (Stub) return Stub;
+
+ // Call the lazy resolver function unless we are JIT'ing non-lazily, in which
+ // case we must resolve the symbol now.
+ void *Actual = TheJIT->isLazyCompilationDisabled()
+ ? (void *)0 : (void *)(intptr_t)LazyResolverFn;
+
+ // If this is an external declaration, attempt to resolve the address now
+ // to place in the stub.
+ if (F->isDeclaration() && !F->hasNotBeenReadFromBitcode()) {
+ Actual = TheJIT->getPointerToFunction(F);
+
+ // If we resolved the symbol to a null address (eg. a weak external)
+ // don't emit a stub. Return a null pointer to the application. If dlsym
+ // stubs are enabled, not being able to resolve the address is not
+ // meaningful.
+ if (!Actual && !TheJIT->areDlsymStubsEnabled()) return 0;
+ }
+
+ // Codegen a new stub, calling the lazy resolver or the actual address of the
+ // external function, if it was resolved.
+ Stub = TheJIT->getJITInfo().emitFunctionStub(F, Actual,
+ *TheJIT->getCodeEmitter());
+
+ if (Actual != (void*)(intptr_t)LazyResolverFn) {
+ // If we are getting the stub for an external function, we really want the
+ // address of the stub in the GlobalAddressMap for the JIT, not the address
+ // of the external function.
+ TheJIT->updateGlobalMapping(F, Stub);
+ }
+
+ DOUT << "JIT: Stub emitted at [" << Stub << "] for function '"
+ << F->getName() << "'\n";
+
+ // Finally, keep track of the stub-to-Function mapping so that the
+ // JITCompilerFn knows which function to compile!
+ state.getStubToFunctionMap(locked)[Stub] = F;
+
+ // If we are JIT'ing non-lazily but need to call a function that does not
+ // exist yet, add it to the JIT's work list so that we can fill in the stub
+ // address later.
+ if (!Actual && TheJIT->isLazyCompilationDisabled())
+ if (!F->isDeclaration() || F->hasNotBeenReadFromBitcode())
+ TheJIT->addPendingFunction(F);
+
+ return Stub;
+}
+
+/// getGlobalValueIndirectSym - Return a lazy pointer containing the specified
+/// GV address.
+void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) {
+ MutexGuard locked(TheJIT->lock);
+
+ // If we already have a stub for this global variable, recycle it.
+ void *&IndirectSym = state.getGlobalToIndirectSymMap(locked)[GV];
+ if (IndirectSym) return IndirectSym;
+
+ // Otherwise, codegen a new indirect symbol.
+ IndirectSym = TheJIT->getJITInfo().emitGlobalValueIndirectSym(GV, GVAddress,
+ *TheJIT->getCodeEmitter());
+
+ DOUT << "JIT: Indirect symbol emitted at [" << IndirectSym << "] for GV '"
+ << GV->getName() << "'\n";
+
+ return IndirectSym;
+}
+
+/// getExternalFunctionStub - Return a stub for the function at the
+/// specified address, created lazily on demand.
+void *JITResolver::getExternalFunctionStub(void *FnAddr) {
+ // If we already have a stub for this function, recycle it.
+ void *&Stub = ExternalFnToStubMap[FnAddr];
+ if (Stub) return Stub;
+
+ Stub = TheJIT->getJITInfo().emitFunctionStub(0, FnAddr,
+ *TheJIT->getCodeEmitter());
+
+ DOUT << "JIT: Stub emitted at [" << Stub
+ << "] for external function at '" << FnAddr << "'\n";
+ return Stub;
+}
+
+unsigned JITResolver::getGOTIndexForAddr(void* addr) {
+ unsigned idx = revGOTMap[addr];
+ if (!idx) {
+ idx = ++nextGOTIndex;
+ revGOTMap[addr] = idx;
+ DOUT << "JIT: Adding GOT entry " << idx << " for addr [" << addr << "]\n";
+ }
+ return idx;
+}
+
+void JITResolver::getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs,
+ SmallVectorImpl<void*> &Ptrs) {
+ MutexGuard locked(TheJIT->lock);
+
+ FunctionToStubMapTy &FM = state.getFunctionToStubMap(locked);
+ GlobalToIndirectSymMapTy &GM = state.getGlobalToIndirectSymMap(locked);
+
+ for (FunctionToStubMapTy::iterator i = FM.begin(), e = FM.end(); i != e; ++i){
+ Function *F = i->first;
+ if (F->isDeclaration() && F->hasExternalLinkage()) {
+ GVs.push_back(i->first);
+ Ptrs.push_back(i->second);
+ }
+ }
+ for (GlobalToIndirectSymMapTy::iterator i = GM.begin(), e = GM.end();
+ i != e; ++i) {
+ GVs.push_back(i->first);
+ Ptrs.push_back(i->second);
+ }
+}
+
+GlobalValue *JITResolver::invalidateStub(void *Stub) {
+ MutexGuard locked(TheJIT->lock);
+
+ FunctionToStubMapTy &FM = state.getFunctionToStubMap(locked);
+ StubToFunctionMapTy &SM = state.getStubToFunctionMap(locked);
+ GlobalToIndirectSymMapTy &GM = state.getGlobalToIndirectSymMap(locked);
+
+ // Look up the cheap way first, to see if it's a function stub we are
+ // invalidating. If so, remove it from both the forward and reverse maps.
+ if (SM.find(Stub) != SM.end()) {
+ Function *F = SM[Stub];
+ SM.erase(Stub);
+ FM.erase(F);
+ return F;
+ }
+
+ // Otherwise, it might be an indirect symbol stub. Find it and remove it.
+ for (GlobalToIndirectSymMapTy::iterator i = GM.begin(), e = GM.end();
+ i != e; ++i) {
+ if (i->second != Stub)
+ continue;
+ GlobalValue *GV = i->first;
+ GM.erase(i);
+ return GV;
+ }
+
+ // Lastly, check to see if it's in the ExternalFnToStubMap.
+ for (std::map<void *, void *>::iterator i = ExternalFnToStubMap.begin(),
+ e = ExternalFnToStubMap.end(); i != e; ++i) {
+ if (i->second != Stub)
+ continue;
+ ExternalFnToStubMap.erase(i);
+ break;
+ }
+
+ return 0;
+}
+
+/// JITCompilerFn - This function is called when a lazy compilation stub has
+/// been entered. It looks up which function this stub corresponds to, compiles
+/// it if necessary, then returns the resultant function pointer.
+void *JITResolver::JITCompilerFn(void *Stub) {
+ JITResolver &JR = *TheJITResolver;
+
+ Function* F = 0;
+ void* ActualPtr = 0;
+
+ {
+ // Only lock for getting the Function. The call getPointerToFunction made
+ // in this function might trigger function materializing, which requires
+ // JIT lock to be unlocked.
+ MutexGuard locked(TheJIT->lock);
+
+ // The address given to us for the stub may not be exactly right, it might be
+ // a little bit after the stub. As such, use upper_bound to find it.
+ StubToFunctionMapTy::iterator I =
+ JR.state.getStubToFunctionMap(locked).upper_bound(Stub);
+ assert(I != JR.state.getStubToFunctionMap(locked).begin() &&
+ "This is not a known stub!");
+ F = (--I)->second;
+ ActualPtr = I->first;
+ }
+
+ // If we have already code generated the function, just return the address.
+ void *Result = TheJIT->getPointerToGlobalIfAvailable(F);
+
+ if (!Result) {
+ // Otherwise we don't have it, do lazy compilation now.
+
+ // If lazy compilation is disabled, emit a useful error message and abort.
+ if (TheJIT->isLazyCompilationDisabled()) {
+ cerr << "LLVM JIT requested to do lazy compilation of function '"
+ << F->getName() << "' when lazy compiles are disabled!\n";
+ abort();
+ }
+
+ // We might like to remove the stub from the StubToFunction map.
+ // We can't do that! Multiple threads could be stuck, waiting to acquire the
+ // lock above. As soon as the 1st function finishes compiling the function,
+ // the next one will be released, and needs to be able to find the function
+ // it needs to call.
+ //JR.state.getStubToFunctionMap(locked).erase(I);
+
+ DOUT << "JIT: Lazily resolving function '" << F->getName()
+ << "' In stub ptr = " << Stub << " actual ptr = "
+ << ActualPtr << "\n";
+
+ Result = TheJIT->getPointerToFunction(F);
+ }
+
+ // Reacquire the lock to erase the stub in the map.
+ MutexGuard locked(TheJIT->lock);
+
+ // We don't need to reuse this stub in the future, as F is now compiled.
+ JR.state.getFunctionToStubMap(locked).erase(F);
+
+ // FIXME: We could rewrite all references to this stub if we knew them.
+
+ // What we will do is set the compiled function address to map to the
+ // same GOT entry as the stub so that later clients may update the GOT
+ // if they see it still using the stub address.
+ // Note: this is done so the Resolver doesn't have to manage GOT memory
+ // Do this without allocating map space if the target isn't using a GOT
+ if(JR.revGOTMap.find(Stub) != JR.revGOTMap.end())
+ JR.revGOTMap[Result] = JR.revGOTMap[Stub];
+
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// Function Index Support
+
+// On MacOS we generate an index of currently JIT'd functions so that
+// performance tools can determine a symbol name and accurate code range for a
+// PC value. Because performance tools are generally asynchronous, the code
+// below is written with the hope that it could be interrupted at any time and
+// have useful answers. However, we don't go crazy with atomic operations, we
+// just do a "reasonable effort".
+#ifdef __APPLE__
+#define ENABLE_JIT_SYMBOL_TABLE 0
+#endif
+
+/// JitSymbolEntry - Each function that is JIT compiled results in one of these
+/// being added to an array of symbols. This indicates the name of the function
+/// as well as the address range it occupies. This allows the client to map
+/// from a PC value to the name of the function.
+struct JitSymbolEntry {
+ const char *FnName; // FnName - a strdup'd string.
+ void *FnStart;
+ intptr_t FnSize;
+};
+
+
+struct JitSymbolTable {
+ /// NextPtr - This forms a linked list of JitSymbolTable entries. This
+ /// pointer is not used right now, but might be used in the future. Consider
+ /// it reserved for future use.
+ JitSymbolTable *NextPtr;
+
+ /// Symbols - This is an array of JitSymbolEntry entries. Only the first
+ /// 'NumSymbols' symbols are valid.
+ JitSymbolEntry *Symbols;
+
+ /// NumSymbols - This indicates the number entries in the Symbols array that
+ /// are valid.
+ unsigned NumSymbols;
+
+ /// NumAllocated - This indicates the amount of space we have in the Symbols
+ /// array. This is a private field that should not be read by external tools.
+ unsigned NumAllocated;
+};
+
+#if ENABLE_JIT_SYMBOL_TABLE
+JitSymbolTable *__jitSymbolTable;
+#endif
+
+static void AddFunctionToSymbolTable(const char *FnName,
+ void *FnStart, intptr_t FnSize) {
+ assert(FnName != 0 && FnStart != 0 && "Bad symbol to add");
+ JitSymbolTable **SymTabPtrPtr = 0;
+#if !ENABLE_JIT_SYMBOL_TABLE
+ return;
+#else
+ SymTabPtrPtr = &__jitSymbolTable;
+#endif
+
+ // If this is the first entry in the symbol table, add the JitSymbolTable
+ // index.
+ if (*SymTabPtrPtr == 0) {
+ JitSymbolTable *New = new JitSymbolTable();
+ New->NextPtr = 0;
+ New->Symbols = 0;
+ New->NumSymbols = 0;
+ New->NumAllocated = 0;
+ *SymTabPtrPtr = New;
+ }
+
+ JitSymbolTable *SymTabPtr = *SymTabPtrPtr;
+
+ // If we have space in the table, reallocate the table.
+ if (SymTabPtr->NumSymbols >= SymTabPtr->NumAllocated) {
+ // If we don't have space, reallocate the table.
+ unsigned NewSize = std::max(64U, SymTabPtr->NumAllocated*2);
+ JitSymbolEntry *NewSymbols = new JitSymbolEntry[NewSize];
+ JitSymbolEntry *OldSymbols = SymTabPtr->Symbols;
+
+ // Copy the old entries over.
+ memcpy(NewSymbols, OldSymbols, SymTabPtr->NumSymbols*sizeof(OldSymbols[0]));
+
+ // Swap the new symbols in, delete the old ones.
+ SymTabPtr->Symbols = NewSymbols;
+ SymTabPtr->NumAllocated = NewSize;
+ delete [] OldSymbols;
+ }
+
+ // Otherwise, we have enough space, just tack it onto the end of the array.
+ JitSymbolEntry &Entry = SymTabPtr->Symbols[SymTabPtr->NumSymbols];
+ Entry.FnName = strdup(FnName);
+ Entry.FnStart = FnStart;
+ Entry.FnSize = FnSize;
+ ++SymTabPtr->NumSymbols;
+}
+
+static void RemoveFunctionFromSymbolTable(void *FnStart) {
+ assert(FnStart && "Invalid function pointer");
+ JitSymbolTable **SymTabPtrPtr = 0;
+#if !ENABLE_JIT_SYMBOL_TABLE
+ return;
+#else
+ SymTabPtrPtr = &__jitSymbolTable;
+#endif
+
+ JitSymbolTable *SymTabPtr = *SymTabPtrPtr;
+ JitSymbolEntry *Symbols = SymTabPtr->Symbols;
+
+ // Scan the table to find its index. The table is not sorted, so do a linear
+ // scan.
+ unsigned Index;
+ for (Index = 0; Symbols[Index].FnStart != FnStart; ++Index)
+ assert(Index != SymTabPtr->NumSymbols && "Didn't find function!");
+
+ // Once we have an index, we know to nuke this entry, overwrite it with the
+ // entry at the end of the array, making the last entry redundant.
+ const char *OldName = Symbols[Index].FnName;
+ Symbols[Index] = Symbols[SymTabPtr->NumSymbols-1];
+ free((void*)OldName);
+
+ // Drop the number of symbols in the table.
+ --SymTabPtr->NumSymbols;
+
+ // Finally, if we deleted the final symbol, deallocate the table itself.
+ if (SymTabPtr->NumSymbols != 0)
+ return;
+
+ *SymTabPtrPtr = 0;
+ delete [] Symbols;
+ delete SymTabPtr;
+}
+
+//===----------------------------------------------------------------------===//
+// JITEmitter code.
+//
+namespace {
+ /// JITEmitter - The JIT implementation of the MachineCodeEmitter, which is
+ /// used to output functions to memory for execution.
+ class JITEmitter : public JITCodeEmitter {
+ JITMemoryManager *MemMgr;
+
+ // When outputting a function stub in the context of some other function, we
+ // save BufferBegin/BufferEnd/CurBufferPtr here.
+ uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr;
+
+ /// Relocations - These are the relocations that the function needs, as
+ /// emitted.
+ std::vector<MachineRelocation> Relocations;
+
+ /// MBBLocations - This vector is a mapping from MBB ID's to their address.
+ /// It is filled in by the StartMachineBasicBlock callback and queried by
+ /// the getMachineBasicBlockAddress callback.
+ std::vector<uintptr_t> MBBLocations;
+
+ /// ConstantPool - The constant pool for the current function.
+ ///
+ MachineConstantPool *ConstantPool;
+
+ /// ConstantPoolBase - A pointer to the first entry in the constant pool.
+ ///
+ void *ConstantPoolBase;
+
+ /// ConstPoolAddresses - Addresses of individual constant pool entries.
+ ///
+ SmallVector<uintptr_t, 8> ConstPoolAddresses;
+
+ /// JumpTable - The jump tables for the current function.
+ ///
+ MachineJumpTableInfo *JumpTable;
+
+ /// JumpTableBase - A pointer to the first entry in the jump table.
+ ///
+ void *JumpTableBase;
+
+ /// Resolver - This contains info about the currently resolved functions.
+ JITResolver Resolver;
+
+ /// DE - The dwarf emitter for the jit.
+ JITDwarfEmitter *DE;
+
+ /// LabelLocations - This vector is a mapping from Label ID's to their
+ /// address.
+ std::vector<uintptr_t> LabelLocations;
+
+ /// MMI - Machine module info for exception informations
+ MachineModuleInfo* MMI;
+
+ // GVSet - a set to keep track of which globals have been seen
+ SmallPtrSet<const GlobalVariable*, 8> GVSet;
+
+ // CurFn - The llvm function being emitted. Only valid during
+ // finishFunction().
+ const Function *CurFn;
+
+ // CurFnStubUses - For a given Function, a vector of stubs that it
+ // references. This facilitates the JIT detecting that a stub is no
+ // longer used, so that it may be deallocated.
+ DenseMap<const Function *, SmallVector<void*, 1> > CurFnStubUses;
+
+ // StubFnRefs - For a given pointer to a stub, a set of Functions which
+ // reference the stub. When the count of a stub's references drops to zero,
+ // the stub is unused.
+ DenseMap<void *, SmallPtrSet<const Function*, 1> > StubFnRefs;
+
+ // ExtFnStubs - A map of external function names to stubs which have entries
+ // in the JITResolver's ExternalFnToStubMap.
+ StringMap<void *> ExtFnStubs;
+
+ // MCI - A pointer to a MachineCodeInfo object to update with information.
+ MachineCodeInfo *MCI;
+
+ public:
+ JITEmitter(JIT &jit, JITMemoryManager *JMM) : Resolver(jit), CurFn(0), MCI(0) {
+ MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
+ if (jit.getJITInfo().needsGOT()) {
+ MemMgr->AllocateGOT();
+ DOUT << "JIT is managing a GOT\n";
+ }
+
+ if (ExceptionHandling) DE = new JITDwarfEmitter(jit);
+ }
+ ~JITEmitter() {
+ delete MemMgr;
+ if (ExceptionHandling) delete DE;
+ }
+
+ /// classof - Methods for support type inquiry through isa, cast, and
+ /// dyn_cast:
+ ///
+ static inline bool classof(const JITEmitter*) { return true; }
+ static inline bool classof(const MachineCodeEmitter*) { return true; }
+
+ JITResolver &getJITResolver() { return Resolver; }
+
+ virtual void startFunction(MachineFunction &F);
+ virtual bool finishFunction(MachineFunction &F);
+
+ void emitConstantPool(MachineConstantPool *MCP);
+ void initJumpTableInfo(MachineJumpTableInfo *MJTI);
+ void emitJumpTableInfo(MachineJumpTableInfo *MJTI);
+
+ virtual void startGVStub(const GlobalValue* GV, unsigned StubSize,
+ unsigned Alignment = 1);
+ virtual void startGVStub(const GlobalValue* GV, void *Buffer,
+ unsigned StubSize);
+ virtual void* finishGVStub(const GlobalValue *GV);
+
+ /// allocateSpace - Reserves space in the current block if any, or
+ /// allocate a new one of the given size.
+ virtual void *allocateSpace(uintptr_t Size, unsigned Alignment);
+
+ virtual void addRelocation(const MachineRelocation &MR) {
+ Relocations.push_back(MR);
+ }
+
+ virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
+ if (MBBLocations.size() <= (unsigned)MBB->getNumber())
+ MBBLocations.resize((MBB->getNumber()+1)*2);
+ MBBLocations[MBB->getNumber()] = getCurrentPCValue();
+ DOUT << "JIT: Emitting BB" << MBB->getNumber() << " at ["
+ << (void*) getCurrentPCValue() << "]\n";
+ }
+
+ virtual uintptr_t getConstantPoolEntryAddress(unsigned Entry) const;
+ virtual uintptr_t getJumpTableEntryAddress(unsigned Entry) const;
+
+ virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
+ assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
+ MBBLocations[MBB->getNumber()] && "MBB not emitted!");
+ return MBBLocations[MBB->getNumber()];
+ }
+
+ /// deallocateMemForFunction - Deallocate all memory for the specified
+ /// function body.
+ void deallocateMemForFunction(Function *F);
+
+ /// AddStubToCurrentFunction - Mark the current function being JIT'd as
+ /// using the stub at the specified address. Allows
+ /// deallocateMemForFunction to also remove stubs no longer referenced.
+ void AddStubToCurrentFunction(void *Stub);
+
+ /// getExternalFnStubs - Accessor for the JIT to find stubs emitted for
+ /// MachineRelocations that reference external functions by name.
+ const StringMap<void*> &getExternalFnStubs() const { return ExtFnStubs; }
+
+ virtual void emitLabel(uint64_t LabelID) {
+ if (LabelLocations.size() <= LabelID)
+ LabelLocations.resize((LabelID+1)*2);
+ LabelLocations[LabelID] = getCurrentPCValue();
+ }
+
+ virtual uintptr_t getLabelAddress(uint64_t LabelID) const {
+ assert(LabelLocations.size() > (unsigned)LabelID &&
+ LabelLocations[LabelID] && "Label not emitted!");
+ return LabelLocations[LabelID];
+ }
+
+ virtual void setModuleInfo(MachineModuleInfo* Info) {
+ MMI = Info;
+ if (ExceptionHandling) DE->setModuleInfo(Info);
+ }
+
+ void setMemoryExecutable(void) {
+ MemMgr->setMemoryExecutable();
+ }
+
+ JITMemoryManager *getMemMgr(void) const { return MemMgr; }
+
+ void setMachineCodeInfo(MachineCodeInfo *mci) {
+ MCI = mci;
+ }
+
+ private:
+ void *getPointerToGlobal(GlobalValue *GV, void *Reference, bool NoNeedStub);
+ void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference,
+ bool NoNeedStub);
+ unsigned addSizeOfGlobal(const GlobalVariable *GV, unsigned Size);
+ unsigned addSizeOfGlobalsInConstantVal(const Constant *C, unsigned Size);
+ unsigned addSizeOfGlobalsInInitializer(const Constant *Init, unsigned Size);
+ unsigned GetSizeOfGlobalsInBytes(MachineFunction &MF);
+ };
+}
+
+void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
+ bool DoesntNeedStub) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ return TheJIT->getOrEmitGlobalVariable(GV);
+
+ if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
+ return TheJIT->getPointerToGlobal(GA->resolveAliasedGlobal(false));
+
+ // If we have already compiled the function, return a pointer to its body.
+ Function *F = cast<Function>(V);
+ void *ResultPtr;
+ if (!DoesntNeedStub && !TheJIT->isLazyCompilationDisabled()) {
+ // Return the function stub if it's already created.
+ ResultPtr = Resolver.getFunctionStubIfAvailable(F);
+ if (ResultPtr)
+ AddStubToCurrentFunction(ResultPtr);
+ } else {
+ ResultPtr = TheJIT->getPointerToGlobalIfAvailable(F);
+ }
+ if (ResultPtr) return ResultPtr;
+
+ // If this is an external function pointer, we can force the JIT to
+ // 'compile' it, which really just adds it to the map. In dlsym mode,
+ // external functions are forced through a stub, regardless of reloc type.
+ if (F->isDeclaration() && !F->hasNotBeenReadFromBitcode() &&
+ DoesntNeedStub && !TheJIT->areDlsymStubsEnabled())
+ return TheJIT->getPointerToFunction(F);
+
+ // Okay, the function has not been compiled yet, if the target callback
+ // mechanism is capable of rewriting the instruction directly, prefer to do
+ // that instead of emitting a stub. This uses the lazy resolver, so is not
+ // legal if lazy compilation is disabled.
+ if (DoesntNeedStub && !TheJIT->isLazyCompilationDisabled())
+ return Resolver.AddCallbackAtLocation(F, Reference);
+
+ // Otherwise, we have to emit a stub.
+ void *StubAddr = Resolver.getFunctionStub(F);
+
+ // Add the stub to the current function's list of referenced stubs, so we can
+ // deallocate them if the current function is ever freed. It's possible to
+ // return null from getFunctionStub in the case of a weak extern that fails
+ // to resolve.
+ if (StubAddr)
+ AddStubToCurrentFunction(StubAddr);
+
+ return StubAddr;
+}
+
+void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference,
+ bool NoNeedStub) {
+ // Make sure GV is emitted first, and create a stub containing the fully
+ // resolved address.
+ void *GVAddress = getPointerToGlobal(V, Reference, true);
+ void *StubAddr = Resolver.getGlobalValueIndirectSym(V, GVAddress);
+
+ // Add the stub to the current function's list of referenced stubs, so we can
+ // deallocate them if the current function is ever freed.
+ AddStubToCurrentFunction(StubAddr);
+
+ return StubAddr;
+}
+
+void JITEmitter::AddStubToCurrentFunction(void *StubAddr) {
+ if (!TheJIT->areDlsymStubsEnabled())
+ return;
+
+ assert(CurFn && "Stub added to current function, but current function is 0!");
+
+ SmallVectorImpl<void*> &StubsUsed = CurFnStubUses[CurFn];
+ StubsUsed.push_back(StubAddr);
+
+ SmallPtrSet<const Function *, 1> &FnRefs = StubFnRefs[StubAddr];
+ FnRefs.insert(CurFn);
+}
+
+static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP,
+ const TargetData *TD) {
+ const std::vector<MachineConstantPoolEntry> &Constants = MCP->getConstants();
+ if (Constants.empty()) return 0;
+
+ unsigned Size = 0;
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ MachineConstantPoolEntry CPE = Constants[i];
+ unsigned AlignMask = CPE.getAlignment() - 1;
+ Size = (Size + AlignMask) & ~AlignMask;
+ const Type *Ty = CPE.getType();
+ Size += TD->getTypeAllocSize(Ty);
+ }
+ return Size;
+}
+
+static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI) {
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty()) return 0;
+
+ unsigned NumEntries = 0;
+ for (unsigned i = 0, e = JT.size(); i != e; ++i)
+ NumEntries += JT[i].MBBs.size();
+
+ unsigned EntrySize = MJTI->getEntrySize();
+
+ return NumEntries * EntrySize;
+}
+
+static uintptr_t RoundUpToAlign(uintptr_t Size, unsigned Alignment) {
+ if (Alignment == 0) Alignment = 1;
+ // Since we do not know where the buffer will be allocated, be pessimistic.
+ return Size + Alignment;
+}
+
+/// addSizeOfGlobal - add the size of the global (plus any alignment padding)
+/// into the running total Size.
+
+unsigned JITEmitter::addSizeOfGlobal(const GlobalVariable *GV, unsigned Size) {
+ const Type *ElTy = GV->getType()->getElementType();
+ size_t GVSize = (size_t)TheJIT->getTargetData()->getTypeAllocSize(ElTy);
+ size_t GVAlign =
+ (size_t)TheJIT->getTargetData()->getPreferredAlignment(GV);
+ DOUT << "JIT: Adding in size " << GVSize << " alignment " << GVAlign;
+ DEBUG(GV->dump());
+ // Assume code section ends with worst possible alignment, so first
+ // variable needs maximal padding.
+ if (Size==0)
+ Size = 1;
+ Size = ((Size+GVAlign-1)/GVAlign)*GVAlign;
+ Size += GVSize;
+ return Size;
+}
+
+/// addSizeOfGlobalsInConstantVal - find any globals that we haven't seen yet
+/// but are referenced from the constant; put them in GVSet and add their
+/// size into the running total Size.
+
+unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
+ unsigned Size) {
+ // If its undefined, return the garbage.
+ if (isa<UndefValue>(C))
+ return Size;
+
+ // If the value is a ConstantExpr
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ Constant *Op0 = CE->getOperand(0);
+ switch (CE->getOpcode()) {
+ case Instruction::GetElementPtr:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast: {
+ Size = addSizeOfGlobalsInConstantVal(Op0, Size);
+ break;
+ }
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ Size = addSizeOfGlobalsInConstantVal(Op0, Size);
+ Size = addSizeOfGlobalsInConstantVal(CE->getOperand(1), Size);
+ break;
+ }
+ default: {
+ cerr << "ConstantExpr not handled: " << *CE << "\n";
+ abort();
+ }
+ }
+ }
+
+ if (C->getType()->getTypeID() == Type::PointerTyID)
+ if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(C))
+ if (GVSet.insert(GV))
+ Size = addSizeOfGlobal(GV, Size);
+
+ return Size;
+}
+
+/// addSizeOfGLobalsInInitializer - handle any globals that we haven't seen yet
+/// but are referenced from the given initializer.
+
+unsigned JITEmitter::addSizeOfGlobalsInInitializer(const Constant *Init,
+ unsigned Size) {
+ if (!isa<UndefValue>(Init) &&
+ !isa<ConstantVector>(Init) &&
+ !isa<ConstantAggregateZero>(Init) &&
+ !isa<ConstantArray>(Init) &&
+ !isa<ConstantStruct>(Init) &&
+ Init->getType()->isFirstClassType())
+ Size = addSizeOfGlobalsInConstantVal(Init, Size);
+ return Size;
+}
+
+/// GetSizeOfGlobalsInBytes - walk the code for the function, looking for
+/// globals; then walk the initializers of those globals looking for more.
+/// If their size has not been considered yet, add it into the running total
+/// Size.
+
+unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) {
+ unsigned Size = 0;
+ GVSet.clear();
+
+ for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+ MBB != E; ++MBB) {
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ const TargetInstrDesc &Desc = I->getDesc();
+ const MachineInstr &MI = *I;
+ unsigned NumOps = Desc.getNumOperands();
+ for (unsigned CurOp = 0; CurOp < NumOps; CurOp++) {
+ const MachineOperand &MO = MI.getOperand(CurOp);
+ if (MO.isGlobal()) {
+ GlobalValue* V = MO.getGlobal();
+ const GlobalVariable *GV = dyn_cast<const GlobalVariable>(V);
+ if (!GV)
+ continue;
+ // If seen in previous function, it will have an entry here.
+ if (TheJIT->getPointerToGlobalIfAvailable(GV))
+ continue;
+ // If seen earlier in this function, it will have an entry here.
+ // FIXME: it should be possible to combine these tables, by
+ // assuming the addresses of the new globals in this module
+ // start at 0 (or something) and adjusting them after codegen
+ // complete. Another possibility is to grab a marker bit in GV.
+ if (GVSet.insert(GV))
+ // A variable as yet unseen. Add in its size.
+ Size = addSizeOfGlobal(GV, Size);
+ }
+ }
+ }
+ }
+ DOUT << "JIT: About to look through initializers\n";
+ // Look for more globals that are referenced only from initializers.
+ // GVSet.end is computed each time because the set can grow as we go.
+ for (SmallPtrSet<const GlobalVariable *, 8>::iterator I = GVSet.begin();
+ I != GVSet.end(); I++) {
+ const GlobalVariable* GV = *I;
+ if (GV->hasInitializer())
+ Size = addSizeOfGlobalsInInitializer(GV->getInitializer(), Size);
+ }
+
+ return Size;
+}
+
+void JITEmitter::startFunction(MachineFunction &F) {
+ DOUT << "JIT: Starting CodeGen of Function "
+ << F.getFunction()->getName() << "\n";
+
+ uintptr_t ActualSize = 0;
+ // Set the memory writable, if it's not already
+ MemMgr->setMemoryWritable();
+ if (MemMgr->NeedsExactSize()) {
+ DOUT << "JIT: ExactSize\n";
+ const TargetInstrInfo* TII = F.getTarget().getInstrInfo();
+ MachineJumpTableInfo *MJTI = F.getJumpTableInfo();
+ MachineConstantPool *MCP = F.getConstantPool();
+
+ // Ensure the constant pool/jump table info is at least 4-byte aligned.
+ ActualSize = RoundUpToAlign(ActualSize, 16);
+
+ // Add the alignment of the constant pool
+ ActualSize = RoundUpToAlign(ActualSize, MCP->getConstantPoolAlignment());
+
+ // Add the constant pool size
+ ActualSize += GetConstantPoolSizeInBytes(MCP, TheJIT->getTargetData());
+
+ // Add the aligment of the jump table info
+ ActualSize = RoundUpToAlign(ActualSize, MJTI->getAlignment());
+
+ // Add the jump table size
+ ActualSize += GetJumpTableSizeInBytes(MJTI);
+
+ // Add the alignment for the function
+ ActualSize = RoundUpToAlign(ActualSize,
+ std::max(F.getFunction()->getAlignment(), 8U));
+
+ // Add the function size
+ ActualSize += TII->GetFunctionSizeInBytes(F);
+
+ DOUT << "JIT: ActualSize before globals " << ActualSize << "\n";
+ // Add the size of the globals that will be allocated after this function.
+ // These are all the ones referenced from this function that were not
+ // previously allocated.
+ ActualSize += GetSizeOfGlobalsInBytes(F);
+ DOUT << "JIT: ActualSize after globals " << ActualSize << "\n";
+ }
+
+ BufferBegin = CurBufferPtr = MemMgr->startFunctionBody(F.getFunction(),
+ ActualSize);
+ BufferEnd = BufferBegin+ActualSize;
+
+ // Ensure the constant pool/jump table info is at least 4-byte aligned.
+ emitAlignment(16);
+
+ emitConstantPool(F.getConstantPool());
+ initJumpTableInfo(F.getJumpTableInfo());
+
+ // About to start emitting the machine code for the function.
+ emitAlignment(std::max(F.getFunction()->getAlignment(), 8U));
+ TheJIT->updateGlobalMapping(F.getFunction(), CurBufferPtr);
+
+ MBBLocations.clear();
+}
+
+bool JITEmitter::finishFunction(MachineFunction &F) {
+ if (CurBufferPtr == BufferEnd) {
+ // FIXME: Allocate more space, then try again.
+ cerr << "JIT: Ran out of space for generated machine code!\n";
+ abort();
+ }
+
+ emitJumpTableInfo(F.getJumpTableInfo());
+
+ // FnStart is the start of the text, not the start of the constant pool and
+ // other per-function data.
+ uint8_t *FnStart =
+ (uint8_t *)TheJIT->getPointerToGlobalIfAvailable(F.getFunction());
+
+ // FnEnd is the end of the function's machine code.
+ uint8_t *FnEnd = CurBufferPtr;
+
+ if (!Relocations.empty()) {
+ CurFn = F.getFunction();
+ NumRelos += Relocations.size();
+
+ // Resolve the relocations to concrete pointers.
+ for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
+ MachineRelocation &MR = Relocations[i];
+ void *ResultPtr = 0;
+ if (!MR.letTargetResolve()) {
+ if (MR.isExternalSymbol()) {
+ ResultPtr = TheJIT->getPointerToNamedFunction(MR.getExternalSymbol(),
+ false);
+ DOUT << "JIT: Map \'" << MR.getExternalSymbol() << "\' to ["
+ << ResultPtr << "]\n";
+
+ // If the target REALLY wants a stub for this function, emit it now.
+ if (!MR.doesntNeedStub()) {
+ if (!TheJIT->areDlsymStubsEnabled()) {
+ ResultPtr = Resolver.getExternalFunctionStub(ResultPtr);
+ } else {
+ void *&Stub = ExtFnStubs[MR.getExternalSymbol()];
+ if (!Stub) {
+ Stub = Resolver.getExternalFunctionStub((void *)&Stub);
+ AddStubToCurrentFunction(Stub);
+ }
+ ResultPtr = Stub;
+ }
+ }
+ } else if (MR.isGlobalValue()) {
+ ResultPtr = getPointerToGlobal(MR.getGlobalValue(),
+ BufferBegin+MR.getMachineCodeOffset(),
+ MR.doesntNeedStub());
+ } else if (MR.isIndirectSymbol()) {
+ ResultPtr = getPointerToGVIndirectSym(MR.getGlobalValue(),
+ BufferBegin+MR.getMachineCodeOffset(),
+ MR.doesntNeedStub());
+ } else if (MR.isBasicBlock()) {
+ ResultPtr = (void*)getMachineBasicBlockAddress(MR.getBasicBlock());
+ } else if (MR.isConstantPoolIndex()) {
+ ResultPtr = (void*)getConstantPoolEntryAddress(MR.getConstantPoolIndex());
+ } else {
+ assert(MR.isJumpTableIndex());
+ ResultPtr=(void*)getJumpTableEntryAddress(MR.getJumpTableIndex());
+ }
+
+ MR.setResultPointer(ResultPtr);
+ }
+
+ // if we are managing the GOT and the relocation wants an index,
+ // give it one
+ if (MR.isGOTRelative() && MemMgr->isManagingGOT()) {
+ unsigned idx = Resolver.getGOTIndexForAddr(ResultPtr);
+ MR.setGOTIndex(idx);
+ if (((void**)MemMgr->getGOTBase())[idx] != ResultPtr) {
+ DOUT << "JIT: GOT was out of date for " << ResultPtr
+ << " pointing at " << ((void**)MemMgr->getGOTBase())[idx]
+ << "\n";
+ ((void**)MemMgr->getGOTBase())[idx] = ResultPtr;
+ }
+ }
+ }
+
+ CurFn = 0;
+ TheJIT->getJITInfo().relocate(BufferBegin, &Relocations[0],
+ Relocations.size(), MemMgr->getGOTBase());
+ }
+
+ // Update the GOT entry for F to point to the new code.
+ if (MemMgr->isManagingGOT()) {
+ unsigned idx = Resolver.getGOTIndexForAddr((void*)BufferBegin);
+ if (((void**)MemMgr->getGOTBase())[idx] != (void*)BufferBegin) {
+ DOUT << "JIT: GOT was out of date for " << (void*)BufferBegin
+ << " pointing at " << ((void**)MemMgr->getGOTBase())[idx] << "\n";
+ ((void**)MemMgr->getGOTBase())[idx] = (void*)BufferBegin;
+ }
+ }
+
+ // CurBufferPtr may have moved beyond FnEnd, due to memory allocation for
+ // global variables that were referenced in the relocations.
+ MemMgr->endFunctionBody(F.getFunction(), BufferBegin, CurBufferPtr);
+
+ if (CurBufferPtr == BufferEnd) {
+ // FIXME: Allocate more space, then try again.
+ cerr << "JIT: Ran out of space for generated machine code!\n";
+ abort();
+ }
+
+ BufferBegin = CurBufferPtr = 0;
+ NumBytes += FnEnd-FnStart;
+
+ // Invalidate the icache if necessary.
+ sys::Memory::InvalidateInstructionCache(FnStart, FnEnd-FnStart);
+
+ // Add it to the JIT symbol table if the host wants it.
+ AddFunctionToSymbolTable(F.getFunction()->getNameStart(),
+ FnStart, FnEnd-FnStart);
+
+ DOUT << "JIT: Finished CodeGen of [" << (void*)FnStart
+ << "] Function: " << F.getFunction()->getName()
+ << ": " << (FnEnd-FnStart) << " bytes of text, "
+ << Relocations.size() << " relocations\n";
+
+ if (MCI) {
+ MCI->setAddress(FnStart);
+ MCI->setSize(FnEnd-FnStart);
+ }
+
+ Relocations.clear();
+ ConstPoolAddresses.clear();
+
+ // Mark code region readable and executable if it's not so already.
+ MemMgr->setMemoryExecutable();
+
+#ifndef NDEBUG
+ {
+ if (sys::hasDisassembler()) {
+ DOUT << "JIT: Disassembled code:\n";
+ DOUT << sys::disassembleBuffer(FnStart, FnEnd-FnStart, (uintptr_t)FnStart);
+ } else {
+ DOUT << "JIT: Binary code:\n";
+ DOUT << std::hex;
+ uint8_t* q = FnStart;
+ for (int i = 0; q < FnEnd; q += 4, ++i) {
+ if (i == 4)
+ i = 0;
+ if (i == 0)
+ DOUT << "JIT: " << std::setw(8) << std::setfill('0')
+ << (long)(q - FnStart) << ": ";
+ bool Done = false;
+ for (int j = 3; j >= 0; --j) {
+ if (q + j >= FnEnd)
+ Done = true;
+ else
+ DOUT << std::setw(2) << std::setfill('0') << (unsigned short)q[j];
+ }
+ if (Done)
+ break;
+ DOUT << ' ';
+ if (i == 3)
+ DOUT << '\n';
+ }
+ DOUT << std::dec;
+ DOUT<< '\n';
+ }
+ }
+#endif
+ if (ExceptionHandling) {
+ uintptr_t ActualSize = 0;
+ SavedBufferBegin = BufferBegin;
+ SavedBufferEnd = BufferEnd;
+ SavedCurBufferPtr = CurBufferPtr;
+
+ if (MemMgr->NeedsExactSize()) {
+ ActualSize = DE->GetDwarfTableSizeInBytes(F, *this, FnStart, FnEnd);
+ }
+
+ BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(),
+ ActualSize);
+ BufferEnd = BufferBegin+ActualSize;
+ uint8_t* FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd);
+ MemMgr->endExceptionTable(F.getFunction(), BufferBegin, CurBufferPtr,
+ FrameRegister);
+ BufferBegin = SavedBufferBegin;
+ BufferEnd = SavedBufferEnd;
+ CurBufferPtr = SavedCurBufferPtr;
+
+ TheJIT->RegisterTable(FrameRegister);
+ }
+
+ if (MMI)
+ MMI->EndFunction();
+
+ return false;
+}
+
+/// deallocateMemForFunction - Deallocate all memory for the specified
+/// function body. Also drop any references the function has to stubs.
+void JITEmitter::deallocateMemForFunction(Function *F) {
+ MemMgr->deallocateMemForFunction(F);
+
+ // If the function did not reference any stubs, return.
+ if (CurFnStubUses.find(F) == CurFnStubUses.end())
+ return;
+
+ // For each referenced stub, erase the reference to this function, and then
+ // erase the list of referenced stubs.
+ SmallVectorImpl<void *> &StubList = CurFnStubUses[F];
+ for (unsigned i = 0, e = StubList.size(); i != e; ++i) {
+ void *Stub = StubList[i];
+
+ // If we already invalidated this stub for this function, continue.
+ if (StubFnRefs.count(Stub) == 0)
+ continue;
+
+ SmallPtrSet<const Function *, 1> &FnRefs = StubFnRefs[Stub];
+ FnRefs.erase(F);
+
+ // If this function was the last reference to the stub, invalidate the stub
+ // in the JITResolver. Were there a memory manager deallocateStub routine,
+ // we could call that at this point too.
+ if (FnRefs.empty()) {
+ DOUT << "\nJIT: Invalidated Stub at [" << Stub << "]\n";
+ StubFnRefs.erase(Stub);
+
+ // Invalidate the stub. If it is a GV stub, update the JIT's global
+ // mapping for that GV to zero, otherwise, search the string map of
+ // external function names to stubs and remove the entry for this stub.
+ GlobalValue *GV = Resolver.invalidateStub(Stub);
+ if (GV) {
+ TheJIT->updateGlobalMapping(GV, 0);
+ } else {
+ for (StringMapIterator<void*> i = ExtFnStubs.begin(),
+ e = ExtFnStubs.end(); i != e; ++i) {
+ if (i->second == Stub) {
+ ExtFnStubs.erase(i);
+ break;
+ }
+ }
+ }
+ }
+ }
+ CurFnStubUses.erase(F);
+}
+
+
+void* JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) {
+ if (BufferBegin)
+ return JITCodeEmitter::allocateSpace(Size, Alignment);
+
+ // create a new memory block if there is no active one.
+ // care must be taken so that BufferBegin is invalidated when a
+ // block is trimmed
+ BufferBegin = CurBufferPtr = MemMgr->allocateSpace(Size, Alignment);
+ BufferEnd = BufferBegin+Size;
+ return CurBufferPtr;
+}
+
+void JITEmitter::emitConstantPool(MachineConstantPool *MCP) {
+ if (TheJIT->getJITInfo().hasCustomConstantPool())
+ return;
+
+ const std::vector<MachineConstantPoolEntry> &Constants = MCP->getConstants();
+ if (Constants.empty()) return;
+
+ unsigned Size = GetConstantPoolSizeInBytes(MCP, TheJIT->getTargetData());
+ unsigned Align = MCP->getConstantPoolAlignment();
+ ConstantPoolBase = allocateSpace(Size, Align);
+ ConstantPool = MCP;
+
+ if (ConstantPoolBase == 0) return; // Buffer overflow.
+
+ DOUT << "JIT: Emitted constant pool at [" << ConstantPoolBase
+ << "] (size: " << Size << ", alignment: " << Align << ")\n";
+
+ // Initialize the memory for all of the constant pool entries.
+ unsigned Offset = 0;
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ MachineConstantPoolEntry CPE = Constants[i];
+ unsigned AlignMask = CPE.getAlignment() - 1;
+ Offset = (Offset + AlignMask) & ~AlignMask;
+
+ uintptr_t CAddr = (uintptr_t)ConstantPoolBase + Offset;
+ ConstPoolAddresses.push_back(CAddr);
+ if (CPE.isMachineConstantPoolEntry()) {
+ // FIXME: add support to lower machine constant pool values into bytes!
+ cerr << "Initialize memory with machine specific constant pool entry"
+ << " has not been implemented!\n";
+ abort();
+ }
+ TheJIT->InitializeMemory(CPE.Val.ConstVal, (void*)CAddr);
+ DOUT << "JIT: CP" << i << " at [0x"
+ << std::hex << CAddr << std::dec << "]\n";
+
+ const Type *Ty = CPE.Val.ConstVal->getType();
+ Offset += TheJIT->getTargetData()->getTypeAllocSize(Ty);
+ }
+}
+
+void JITEmitter::initJumpTableInfo(MachineJumpTableInfo *MJTI) {
+ if (TheJIT->getJITInfo().hasCustomJumpTables())
+ return;
+
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty()) return;
+
+ unsigned NumEntries = 0;
+ for (unsigned i = 0, e = JT.size(); i != e; ++i)
+ NumEntries += JT[i].MBBs.size();
+
+ unsigned EntrySize = MJTI->getEntrySize();
+
+ // Just allocate space for all the jump tables now. We will fix up the actual
+ // MBB entries in the tables after we emit the code for each block, since then
+ // we will know the final locations of the MBBs in memory.
+ JumpTable = MJTI;
+ JumpTableBase = allocateSpace(NumEntries * EntrySize, MJTI->getAlignment());
+}
+
+void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) {
+ if (TheJIT->getJITInfo().hasCustomJumpTables())
+ return;
+
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty() || JumpTableBase == 0) return;
+
+ if (TargetMachine::getRelocationModel() == Reloc::PIC_) {
+ assert(MJTI->getEntrySize() == 4 && "Cross JIT'ing?");
+ // For each jump table, place the offset from the beginning of the table
+ // to the target address.
+ int *SlotPtr = (int*)JumpTableBase;
+
+ for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+ const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+ // Store the offset of the basic block for this jump table slot in the
+ // memory we allocated for the jump table in 'initJumpTableInfo'
+ uintptr_t Base = (uintptr_t)SlotPtr;
+ for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
+ uintptr_t MBBAddr = getMachineBasicBlockAddress(MBBs[mi]);
+ *SlotPtr++ = TheJIT->getJITInfo().getPICJumpTableEntry(MBBAddr, Base);
+ }
+ }
+ } else {
+ assert(MJTI->getEntrySize() == sizeof(void*) && "Cross JIT'ing?");
+
+ // For each jump table, map each target in the jump table to the address of
+ // an emitted MachineBasicBlock.
+ intptr_t *SlotPtr = (intptr_t*)JumpTableBase;
+
+ for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+ const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+ // Store the address of the basic block for this jump table slot in the
+ // memory we allocated for the jump table in 'initJumpTableInfo'
+ for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi)
+ *SlotPtr++ = getMachineBasicBlockAddress(MBBs[mi]);
+ }
+ }
+}
+
+void JITEmitter::startGVStub(const GlobalValue* GV, unsigned StubSize,
+ unsigned Alignment) {
+ SavedBufferBegin = BufferBegin;
+ SavedBufferEnd = BufferEnd;
+ SavedCurBufferPtr = CurBufferPtr;
+
+ BufferBegin = CurBufferPtr = MemMgr->allocateStub(GV, StubSize, Alignment);
+ BufferEnd = BufferBegin+StubSize+1;
+}
+
+void JITEmitter::startGVStub(const GlobalValue* GV, void *Buffer,
+ unsigned StubSize) {
+ SavedBufferBegin = BufferBegin;
+ SavedBufferEnd = BufferEnd;
+ SavedCurBufferPtr = CurBufferPtr;
+
+ BufferBegin = CurBufferPtr = (uint8_t *)Buffer;
+ BufferEnd = BufferBegin+StubSize+1;
+}
+
+void *JITEmitter::finishGVStub(const GlobalValue* GV) {
+ NumBytes += getCurrentPCOffset();
+ std::swap(SavedBufferBegin, BufferBegin);
+ BufferEnd = SavedBufferEnd;
+ CurBufferPtr = SavedCurBufferPtr;
+ return SavedBufferBegin;
+}
+
+// getConstantPoolEntryAddress - Return the address of the 'ConstantNum' entry
+// in the constant pool that was last emitted with the 'emitConstantPool'
+// method.
+//
+uintptr_t JITEmitter::getConstantPoolEntryAddress(unsigned ConstantNum) const {
+ assert(ConstantNum < ConstantPool->getConstants().size() &&
+ "Invalid ConstantPoolIndex!");
+ return ConstPoolAddresses[ConstantNum];
+}
+
+// getJumpTableEntryAddress - Return the address of the JumpTable with index
+// 'Index' in the jumpp table that was last initialized with 'initJumpTableInfo'
+//
+uintptr_t JITEmitter::getJumpTableEntryAddress(unsigned Index) const {
+ const std::vector<MachineJumpTableEntry> &JT = JumpTable->getJumpTables();
+ assert(Index < JT.size() && "Invalid jump table index!");
+
+ unsigned Offset = 0;
+ unsigned EntrySize = JumpTable->getEntrySize();
+
+ for (unsigned i = 0; i < Index; ++i)
+ Offset += JT[i].MBBs.size();
+
+ Offset *= EntrySize;
+
+ return (uintptr_t)((char *)JumpTableBase + Offset);
+}
+
+//===----------------------------------------------------------------------===//
+// Public interface to this file
+//===----------------------------------------------------------------------===//
+
+JITCodeEmitter *JIT::createEmitter(JIT &jit, JITMemoryManager *JMM) {
+ return new JITEmitter(jit, JMM);
+}
+
+// getPointerToNamedFunction - This function is used as a global wrapper to
+// JIT::getPointerToNamedFunction for the purpose of resolving symbols when
+// bugpoint is debugging the JIT. In that scenario, we are loading an .so and
+// need to resolve function(s) that are being mis-codegenerated, so we need to
+// resolve their addresses at runtime, and this is the way to do it.
+extern "C" {
+ void *getPointerToNamedFunction(const char *Name) {
+ if (Function *F = TheJIT->FindFunctionNamed(Name))
+ return TheJIT->getPointerToFunction(F);
+ return TheJIT->getPointerToNamedFunction(Name);
+ }
+}
+
+// getPointerToFunctionOrStub - If the specified function has been
+// code-gen'd, return a pointer to the function. If not, compile it, or use
+// a stub to implement lazy compilation if available.
+//
+void *JIT::getPointerToFunctionOrStub(Function *F) {
+ // If we have already code generated the function, just return the address.
+ if (void *Addr = getPointerToGlobalIfAvailable(F))
+ return Addr;
+
+ // Get a stub if the target supports it.
+ assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
+ JITEmitter *JE = cast<JITEmitter>(getCodeEmitter());
+ return JE->getJITResolver().getFunctionStub(F);
+}
+
+void JIT::registerMachineCodeInfo(MachineCodeInfo *mc) {
+ assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
+ JITEmitter *JE = cast<JITEmitter>(getCodeEmitter());
+
+ JE->setMachineCodeInfo(mc);
+}
+
+void JIT::updateFunctionStub(Function *F) {
+ // Get the empty stub we generated earlier.
+ assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
+ JITEmitter *JE = cast<JITEmitter>(getCodeEmitter());
+ void *Stub = JE->getJITResolver().getFunctionStub(F);
+
+ // Tell the target jit info to rewrite the stub at the specified address,
+ // rather than creating a new one.
+ void *Addr = getPointerToGlobalIfAvailable(F);
+ getJITInfo().emitFunctionStubAtAddr(F, Addr, Stub, *getCodeEmitter());
+}
+
+/// updateDlsymStubTable - Emit the data necessary to relocate the stubs
+/// that were emitted during code generation.
+///
+void JIT::updateDlsymStubTable() {
+ assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
+ JITEmitter *JE = cast<JITEmitter>(getCodeEmitter());
+
+ SmallVector<GlobalValue*, 8> GVs;
+ SmallVector<void*, 8> Ptrs;
+ const StringMap<void *> &ExtFns = JE->getExternalFnStubs();
+
+ JE->getJITResolver().getRelocatableGVs(GVs, Ptrs);
+
+ unsigned nStubs = GVs.size() + ExtFns.size();
+
+ // If there are no relocatable stubs, return.
+ if (nStubs == 0)
+ return;
+
+ // If there are no new relocatable stubs, return.
+ void *CurTable = JE->getMemMgr()->getDlsymTable();
+ if (CurTable && (*(unsigned *)CurTable == nStubs))
+ return;
+
+ // Calculate the size of the stub info
+ unsigned offset = 4 + 4 * nStubs + sizeof(intptr_t) * nStubs;
+
+ SmallVector<unsigned, 8> Offsets;
+ for (unsigned i = 0; i != GVs.size(); ++i) {
+ Offsets.push_back(offset);
+ offset += GVs[i]->getName().length() + 1;
+ }
+ for (StringMapConstIterator<void*> i = ExtFns.begin(), e = ExtFns.end();
+ i != e; ++i) {
+ Offsets.push_back(offset);
+ offset += strlen(i->first()) + 1;
+ }
+
+ // Allocate space for the new "stub", which contains the dlsym table.
+ JE->startGVStub(0, offset, 4);
+
+ // Emit the number of records
+ JE->emitInt32(nStubs);
+
+ // Emit the string offsets
+ for (unsigned i = 0; i != nStubs; ++i)
+ JE->emitInt32(Offsets[i]);
+
+ // Emit the pointers. Verify that they are at least 2-byte aligned, and set
+ // the low bit to 0 == GV, 1 == Function, so that the client code doing the
+ // relocation can write the relocated pointer at the appropriate place in
+ // the stub.
+ for (unsigned i = 0; i != GVs.size(); ++i) {
+ intptr_t Ptr = (intptr_t)Ptrs[i];
+ assert((Ptr & 1) == 0 && "Stub pointers must be at least 2-byte aligned!");
+
+ if (isa<Function>(GVs[i]))
+ Ptr |= (intptr_t)1;
+
+ if (sizeof(Ptr) == 8)
+ JE->emitInt64(Ptr);
+ else
+ JE->emitInt32(Ptr);
+ }
+ for (StringMapConstIterator<void*> i = ExtFns.begin(), e = ExtFns.end();
+ i != e; ++i) {
+ intptr_t Ptr = (intptr_t)i->second | 1;
+
+ if (sizeof(Ptr) == 8)
+ JE->emitInt64(Ptr);
+ else
+ JE->emitInt32(Ptr);
+ }
+
+ // Emit the strings.
+ for (unsigned i = 0; i != GVs.size(); ++i)
+ JE->emitString(GVs[i]->getName());
+ for (StringMapConstIterator<void*> i = ExtFns.begin(), e = ExtFns.end();
+ i != e; ++i)
+ JE->emitString(i->first());
+
+ // Tell the JIT memory manager where it is. The JIT Memory Manager will
+ // deallocate space for the old one, if one existed.
+ JE->getMemMgr()->SetDlsymTable(JE->finishGVStub(0));
+}
+
+/// freeMachineCodeForFunction - release machine code memory for given Function.
+///
+void JIT::freeMachineCodeForFunction(Function *F) {
+
+ // Delete translation for this from the ExecutionEngine, so it will get
+ // retranslated next time it is used.
+ void *OldPtr = updateGlobalMapping(F, 0);
+
+ if (OldPtr)
+ RemoveFunctionFromSymbolTable(OldPtr);
+
+ // Free the actual memory for the function body and related stuff.
+ assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
+ cast<JITEmitter>(JCE)->deallocateMemForFunction(F);
+}
+
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
new file mode 100644
index 0000000..70ccdcc
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -0,0 +1,541 @@
+//===-- JITMemoryManager.cpp - Memory Allocator for JIT'd code ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DefaultJITMemoryManager class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/GlobalValue.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/System/Memory.h"
+#include <map>
+#include <vector>
+#include <cassert>
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+using namespace llvm;
+
+
+JITMemoryManager::~JITMemoryManager() {}
+
+//===----------------------------------------------------------------------===//
+// Memory Block Implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// MemoryRangeHeader - For a range of memory, this is the header that we put
+ /// on the block of memory. It is carefully crafted to be one word of memory.
+ /// Allocated blocks have just this header, free'd blocks have FreeRangeHeader
+ /// which starts with this.
+ struct FreeRangeHeader;
+ struct MemoryRangeHeader {
+ /// ThisAllocated - This is true if this block is currently allocated. If
+ /// not, this can be converted to a FreeRangeHeader.
+ unsigned ThisAllocated : 1;
+
+ /// PrevAllocated - Keep track of whether the block immediately before us is
+ /// allocated. If not, the word immediately before this header is the size
+ /// of the previous block.
+ unsigned PrevAllocated : 1;
+
+ /// BlockSize - This is the size in bytes of this memory block,
+ /// including this header.
+ uintptr_t BlockSize : (sizeof(intptr_t)*CHAR_BIT - 2);
+
+
+ /// getBlockAfter - Return the memory block immediately after this one.
+ ///
+ MemoryRangeHeader &getBlockAfter() const {
+ return *(MemoryRangeHeader*)((char*)this+BlockSize);
+ }
+
+ /// getFreeBlockBefore - If the block before this one is free, return it,
+ /// otherwise return null.
+ FreeRangeHeader *getFreeBlockBefore() const {
+ if (PrevAllocated) return 0;
+ intptr_t PrevSize = ((intptr_t *)this)[-1];
+ return (FreeRangeHeader*)((char*)this-PrevSize);
+ }
+
+ /// FreeBlock - Turn an allocated block into a free block, adjusting
+ /// bits in the object headers, and adding an end of region memory block.
+ FreeRangeHeader *FreeBlock(FreeRangeHeader *FreeList);
+
+ /// TrimAllocationToSize - If this allocated block is significantly larger
+ /// than NewSize, split it into two pieces (where the former is NewSize
+ /// bytes, including the header), and add the new block to the free list.
+ FreeRangeHeader *TrimAllocationToSize(FreeRangeHeader *FreeList,
+ uint64_t NewSize);
+ };
+
+ /// FreeRangeHeader - For a memory block that isn't already allocated, this
+ /// keeps track of the current block and has a pointer to the next free block.
+ /// Free blocks are kept on a circularly linked list.
+ struct FreeRangeHeader : public MemoryRangeHeader {
+ FreeRangeHeader *Prev;
+ FreeRangeHeader *Next;
+
+ /// getMinBlockSize - Get the minimum size for a memory block. Blocks
+ /// smaller than this size cannot be created.
+ static unsigned getMinBlockSize() {
+ return sizeof(FreeRangeHeader)+sizeof(intptr_t);
+ }
+
+ /// SetEndOfBlockSizeMarker - The word at the end of every free block is
+ /// known to be the size of the free block. Set it for this block.
+ void SetEndOfBlockSizeMarker() {
+ void *EndOfBlock = (char*)this + BlockSize;
+ ((intptr_t *)EndOfBlock)[-1] = BlockSize;
+ }
+
+ FreeRangeHeader *RemoveFromFreeList() {
+ assert(Next->Prev == this && Prev->Next == this && "Freelist broken!");
+ Next->Prev = Prev;
+ return Prev->Next = Next;
+ }
+
+ void AddToFreeList(FreeRangeHeader *FreeList) {
+ Next = FreeList;
+ Prev = FreeList->Prev;
+ Prev->Next = this;
+ Next->Prev = this;
+ }
+
+ /// GrowBlock - The block after this block just got deallocated. Merge it
+ /// into the current block.
+ void GrowBlock(uintptr_t NewSize);
+
+ /// AllocateBlock - Mark this entire block allocated, updating freelists
+ /// etc. This returns a pointer to the circular free-list.
+ FreeRangeHeader *AllocateBlock();
+ };
+}
+
+
+/// AllocateBlock - Mark this entire block allocated, updating freelists
+/// etc. This returns a pointer to the circular free-list.
+FreeRangeHeader *FreeRangeHeader::AllocateBlock() {
+ assert(!ThisAllocated && !getBlockAfter().PrevAllocated &&
+ "Cannot allocate an allocated block!");
+ // Mark this block allocated.
+ ThisAllocated = 1;
+ getBlockAfter().PrevAllocated = 1;
+
+ // Remove it from the free list.
+ return RemoveFromFreeList();
+}
+
+/// FreeBlock - Turn an allocated block into a free block, adjusting
+/// bits in the object headers, and adding an end of region memory block.
+/// If possible, coalesce this block with neighboring blocks. Return the
+/// FreeRangeHeader to allocate from.
+FreeRangeHeader *MemoryRangeHeader::FreeBlock(FreeRangeHeader *FreeList) {
+ MemoryRangeHeader *FollowingBlock = &getBlockAfter();
+ assert(ThisAllocated && "This block is already allocated!");
+ assert(FollowingBlock->PrevAllocated && "Flags out of sync!");
+
+ FreeRangeHeader *FreeListToReturn = FreeList;
+
+ // If the block after this one is free, merge it into this block.
+ if (!FollowingBlock->ThisAllocated) {
+ FreeRangeHeader &FollowingFreeBlock = *(FreeRangeHeader *)FollowingBlock;
+ // "FreeList" always needs to be a valid free block. If we're about to
+ // coalesce with it, update our notion of what the free list is.
+ if (&FollowingFreeBlock == FreeList) {
+ FreeList = FollowingFreeBlock.Next;
+ FreeListToReturn = 0;
+ assert(&FollowingFreeBlock != FreeList && "No tombstone block?");
+ }
+ FollowingFreeBlock.RemoveFromFreeList();
+
+ // Include the following block into this one.
+ BlockSize += FollowingFreeBlock.BlockSize;
+ FollowingBlock = &FollowingFreeBlock.getBlockAfter();
+
+ // Tell the block after the block we are coalescing that this block is
+ // allocated.
+ FollowingBlock->PrevAllocated = 1;
+ }
+
+ assert(FollowingBlock->ThisAllocated && "Missed coalescing?");
+
+ if (FreeRangeHeader *PrevFreeBlock = getFreeBlockBefore()) {
+ PrevFreeBlock->GrowBlock(PrevFreeBlock->BlockSize + BlockSize);
+ return FreeListToReturn ? FreeListToReturn : PrevFreeBlock;
+ }
+
+ // Otherwise, mark this block free.
+ FreeRangeHeader &FreeBlock = *(FreeRangeHeader*)this;
+ FollowingBlock->PrevAllocated = 0;
+ FreeBlock.ThisAllocated = 0;
+
+ // Link this into the linked list of free blocks.
+ FreeBlock.AddToFreeList(FreeList);
+
+ // Add a marker at the end of the block, indicating the size of this free
+ // block.
+ FreeBlock.SetEndOfBlockSizeMarker();
+ return FreeListToReturn ? FreeListToReturn : &FreeBlock;
+}
+
+/// GrowBlock - The block after this block just got deallocated. Merge it
+/// into the current block.
+void FreeRangeHeader::GrowBlock(uintptr_t NewSize) {
+ assert(NewSize > BlockSize && "Not growing block?");
+ BlockSize = NewSize;
+ SetEndOfBlockSizeMarker();
+ getBlockAfter().PrevAllocated = 0;
+}
+
+/// TrimAllocationToSize - If this allocated block is significantly larger
+/// than NewSize, split it into two pieces (where the former is NewSize
+/// bytes, including the header), and add the new block to the free list.
+FreeRangeHeader *MemoryRangeHeader::
+TrimAllocationToSize(FreeRangeHeader *FreeList, uint64_t NewSize) {
+ assert(ThisAllocated && getBlockAfter().PrevAllocated &&
+ "Cannot deallocate part of an allocated block!");
+
+ // Don't allow blocks to be trimmed below minimum required size
+ NewSize = std::max<uint64_t>(FreeRangeHeader::getMinBlockSize(), NewSize);
+
+ // Round up size for alignment of header.
+ unsigned HeaderAlign = __alignof(FreeRangeHeader);
+ NewSize = (NewSize+ (HeaderAlign-1)) & ~(HeaderAlign-1);
+
+ // Size is now the size of the block we will remove from the start of the
+ // current block.
+ assert(NewSize <= BlockSize &&
+ "Allocating more space from this block than exists!");
+
+ // If splitting this block will cause the remainder to be too small, do not
+ // split the block.
+ if (BlockSize <= NewSize+FreeRangeHeader::getMinBlockSize())
+ return FreeList;
+
+ // Otherwise, we splice the required number of bytes out of this block, form
+ // a new block immediately after it, then mark this block allocated.
+ MemoryRangeHeader &FormerNextBlock = getBlockAfter();
+
+ // Change the size of this block.
+ BlockSize = NewSize;
+
+ // Get the new block we just sliced out and turn it into a free block.
+ FreeRangeHeader &NewNextBlock = (FreeRangeHeader &)getBlockAfter();
+ NewNextBlock.BlockSize = (char*)&FormerNextBlock - (char*)&NewNextBlock;
+ NewNextBlock.ThisAllocated = 0;
+ NewNextBlock.PrevAllocated = 1;
+ NewNextBlock.SetEndOfBlockSizeMarker();
+ FormerNextBlock.PrevAllocated = 0;
+ NewNextBlock.AddToFreeList(FreeList);
+ return &NewNextBlock;
+}
+
+//===----------------------------------------------------------------------===//
+// Memory Block Implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// DefaultJITMemoryManager - Manage memory for the JIT code generation.
+ /// This splits a large block of MAP_NORESERVE'd memory into two
+ /// sections, one for function stubs, one for the functions themselves. We
+ /// have to do this because we may need to emit a function stub while in the
+ /// middle of emitting a function, and we don't know how large the function we
+ /// are emitting is.
+ class VISIBILITY_HIDDEN DefaultJITMemoryManager : public JITMemoryManager {
+ std::vector<sys::MemoryBlock> Blocks; // Memory blocks allocated by the JIT
+ FreeRangeHeader *FreeMemoryList; // Circular list of free blocks.
+
+ // When emitting code into a memory block, this is the block.
+ MemoryRangeHeader *CurBlock;
+
+ uint8_t *CurStubPtr, *StubBase;
+ uint8_t *GOTBase; // Target Specific reserved memory
+ void *DlsymTable; // Stub external symbol information
+
+ // Centralize memory block allocation.
+ sys::MemoryBlock getNewMemoryBlock(unsigned size);
+
+ std::map<const Function*, MemoryRangeHeader*> FunctionBlocks;
+ std::map<const Function*, MemoryRangeHeader*> TableBlocks;
+ public:
+ DefaultJITMemoryManager();
+ ~DefaultJITMemoryManager();
+
+ void AllocateGOT();
+ void SetDlsymTable(void *);
+
+ uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
+ unsigned Alignment);
+
+ /// startFunctionBody - When a function starts, allocate a block of free
+ /// executable memory, returning a pointer to it and its actual size.
+ uint8_t *startFunctionBody(const Function *F, uintptr_t &ActualSize) {
+
+ FreeRangeHeader* candidateBlock = FreeMemoryList;
+ FreeRangeHeader* head = FreeMemoryList;
+ FreeRangeHeader* iter = head->Next;
+
+ uintptr_t largest = candidateBlock->BlockSize;
+
+ // Search for the largest free block
+ while (iter != head) {
+ if (iter->BlockSize > largest) {
+ largest = iter->BlockSize;
+ candidateBlock = iter;
+ }
+ iter = iter->Next;
+ }
+
+ // Select this candidate block for allocation
+ CurBlock = candidateBlock;
+
+ // Allocate the entire memory block.
+ FreeMemoryList = candidateBlock->AllocateBlock();
+ ActualSize = CurBlock->BlockSize-sizeof(MemoryRangeHeader);
+ return (uint8_t *)(CurBlock+1);
+ }
+
+ /// endFunctionBody - The function F is now allocated, and takes the memory
+ /// in the range [FunctionStart,FunctionEnd).
+ void endFunctionBody(const Function *F, uint8_t *FunctionStart,
+ uint8_t *FunctionEnd) {
+ assert(FunctionEnd > FunctionStart);
+ assert(FunctionStart == (uint8_t *)(CurBlock+1) &&
+ "Mismatched function start/end!");
+
+ uintptr_t BlockSize = FunctionEnd - (uint8_t *)CurBlock;
+ FunctionBlocks[F] = CurBlock;
+
+ // Release the memory at the end of this block that isn't needed.
+ FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
+ }
+
+ /// allocateSpace - Allocate a memory block of the given size.
+ uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
+ CurBlock = FreeMemoryList;
+ FreeMemoryList = FreeMemoryList->AllocateBlock();
+
+ uint8_t *result = (uint8_t *)CurBlock+1;
+
+ if (Alignment == 0) Alignment = 1;
+ result = (uint8_t*)(((intptr_t)result+Alignment-1) &
+ ~(intptr_t)(Alignment-1));
+
+ uintptr_t BlockSize = result + Size - (uint8_t *)CurBlock;
+ FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
+
+ return result;
+ }
+
+ /// startExceptionTable - Use startFunctionBody to allocate memory for the
+ /// function's exception table.
+ uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize) {
+ return startFunctionBody(F, ActualSize);
+ }
+
+ /// endExceptionTable - The exception table of F is now allocated,
+ /// and takes the memory in the range [TableStart,TableEnd).
+ void endExceptionTable(const Function *F, uint8_t *TableStart,
+ uint8_t *TableEnd, uint8_t* FrameRegister) {
+ assert(TableEnd > TableStart);
+ assert(TableStart == (uint8_t *)(CurBlock+1) &&
+ "Mismatched table start/end!");
+
+ uintptr_t BlockSize = TableEnd - (uint8_t *)CurBlock;
+ TableBlocks[F] = CurBlock;
+
+ // Release the memory at the end of this block that isn't needed.
+ FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
+ }
+
+ uint8_t *getGOTBase() const {
+ return GOTBase;
+ }
+
+ void *getDlsymTable() const {
+ return DlsymTable;
+ }
+
+ /// deallocateMemForFunction - Deallocate all memory for the specified
+ /// function body.
+ void deallocateMemForFunction(const Function *F) {
+ std::map<const Function*, MemoryRangeHeader*>::iterator
+ I = FunctionBlocks.find(F);
+ if (I == FunctionBlocks.end()) return;
+
+ // Find the block that is allocated for this function.
+ MemoryRangeHeader *MemRange = I->second;
+ assert(MemRange->ThisAllocated && "Block isn't allocated!");
+
+ // Fill the buffer with garbage!
+#ifndef NDEBUG
+ memset(MemRange+1, 0xCD, MemRange->BlockSize-sizeof(*MemRange));
+#endif
+
+ // Free the memory.
+ FreeMemoryList = MemRange->FreeBlock(FreeMemoryList);
+
+ // Finally, remove this entry from FunctionBlocks.
+ FunctionBlocks.erase(I);
+
+ I = TableBlocks.find(F);
+ if (I == TableBlocks.end()) return;
+
+ // Find the block that is allocated for this function.
+ MemRange = I->second;
+ assert(MemRange->ThisAllocated && "Block isn't allocated!");
+
+ // Fill the buffer with garbage!
+#ifndef NDEBUG
+ memset(MemRange+1, 0xCD, MemRange->BlockSize-sizeof(*MemRange));
+#endif
+
+ // Free the memory.
+ FreeMemoryList = MemRange->FreeBlock(FreeMemoryList);
+
+ // Finally, remove this entry from TableBlocks.
+ TableBlocks.erase(I);
+ }
+
+ /// setMemoryWritable - When code generation is in progress,
+ /// the code pages may need permissions changed.
+ void setMemoryWritable(void)
+ {
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i)
+ sys::Memory::setWritable(Blocks[i]);
+ }
+ /// setMemoryExecutable - When code generation is done and we're ready to
+ /// start execution, the code pages may need permissions changed.
+ void setMemoryExecutable(void)
+ {
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i)
+ sys::Memory::setExecutable(Blocks[i]);
+ }
+ };
+}
+
+DefaultJITMemoryManager::DefaultJITMemoryManager() {
+ // Allocate a 16M block of memory for functions.
+#if defined(__APPLE__) && defined(__arm__)
+ sys::MemoryBlock MemBlock = getNewMemoryBlock(4 << 20);
+#else
+ sys::MemoryBlock MemBlock = getNewMemoryBlock(16 << 20);
+#endif
+
+ uint8_t *MemBase = static_cast<uint8_t*>(MemBlock.base());
+
+ // Allocate stubs backwards from the base, allocate functions forward
+ // from the base.
+ StubBase = MemBase;
+ CurStubPtr = MemBase + 512*1024; // Use 512k for stubs, working backwards.
+
+ // We set up the memory chunk with 4 mem regions, like this:
+ // [ START
+ // [ Free #0 ] -> Large space to allocate functions from.
+ // [ Allocated #1 ] -> Tiny space to separate regions.
+ // [ Free #2 ] -> Tiny space so there is always at least 1 free block.
+ // [ Allocated #3 ] -> Tiny space to prevent looking past end of block.
+ // END ]
+ //
+ // The last three blocks are never deallocated or touched.
+
+ // Add MemoryRangeHeader to the end of the memory region, indicating that
+ // the space after the block of memory is allocated. This is block #3.
+ MemoryRangeHeader *Mem3 = (MemoryRangeHeader*)(MemBase+MemBlock.size())-1;
+ Mem3->ThisAllocated = 1;
+ Mem3->PrevAllocated = 0;
+ Mem3->BlockSize = 0;
+
+ /// Add a tiny free region so that the free list always has one entry.
+ FreeRangeHeader *Mem2 =
+ (FreeRangeHeader *)(((char*)Mem3)-FreeRangeHeader::getMinBlockSize());
+ Mem2->ThisAllocated = 0;
+ Mem2->PrevAllocated = 1;
+ Mem2->BlockSize = FreeRangeHeader::getMinBlockSize();
+ Mem2->SetEndOfBlockSizeMarker();
+ Mem2->Prev = Mem2; // Mem2 *is* the free list for now.
+ Mem2->Next = Mem2;
+
+ /// Add a tiny allocated region so that Mem2 is never coalesced away.
+ MemoryRangeHeader *Mem1 = (MemoryRangeHeader*)Mem2-1;
+ Mem1->ThisAllocated = 1;
+ Mem1->PrevAllocated = 0;
+ Mem1->BlockSize = (char*)Mem2 - (char*)Mem1;
+
+ // Add a FreeRangeHeader to the start of the function body region, indicating
+ // that the space is free. Mark the previous block allocated so we never look
+ // at it.
+ FreeRangeHeader *Mem0 = (FreeRangeHeader*)CurStubPtr;
+ Mem0->ThisAllocated = 0;
+ Mem0->PrevAllocated = 1;
+ Mem0->BlockSize = (char*)Mem1-(char*)Mem0;
+ Mem0->SetEndOfBlockSizeMarker();
+ Mem0->AddToFreeList(Mem2);
+
+ // Start out with the freelist pointing to Mem0.
+ FreeMemoryList = Mem0;
+
+ GOTBase = NULL;
+ DlsymTable = NULL;
+}
+
+void DefaultJITMemoryManager::AllocateGOT() {
+ assert(GOTBase == 0 && "Cannot allocate the got multiple times");
+ GOTBase = new uint8_t[sizeof(void*) * 8192];
+ HasGOT = true;
+}
+
+void DefaultJITMemoryManager::SetDlsymTable(void *ptr) {
+ DlsymTable = ptr;
+}
+
+DefaultJITMemoryManager::~DefaultJITMemoryManager() {
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i)
+ sys::Memory::ReleaseRWX(Blocks[i]);
+
+ delete[] GOTBase;
+ Blocks.clear();
+}
+
+uint8_t *DefaultJITMemoryManager::allocateStub(const GlobalValue* F,
+ unsigned StubSize,
+ unsigned Alignment) {
+ CurStubPtr -= StubSize;
+ CurStubPtr = (uint8_t*)(((intptr_t)CurStubPtr) &
+ ~(intptr_t)(Alignment-1));
+ if (CurStubPtr < StubBase) {
+ // FIXME: allocate a new block
+ fprintf(stderr, "JIT ran out of memory for function stubs!\n");
+ abort();
+ }
+ return CurStubPtr;
+}
+
+sys::MemoryBlock DefaultJITMemoryManager::getNewMemoryBlock(unsigned size) {
+ // Allocate a new block close to the last one.
+ const sys::MemoryBlock *BOld = Blocks.empty() ? 0 : &Blocks.front();
+ std::string ErrMsg;
+ sys::MemoryBlock B = sys::Memory::AllocateRWX(size, BOld, &ErrMsg);
+ if (B.base() == 0) {
+ fprintf(stderr,
+ "Allocation failed when allocating new memory in the JIT\n%s\n",
+ ErrMsg.c_str());
+ abort();
+ }
+ Blocks.push_back(B);
+ return B;
+}
+
+
+JITMemoryManager *JITMemoryManager::CreateDefaultMemManager() {
+ return new DefaultJITMemoryManager();
+}
diff --git a/lib/ExecutionEngine/JIT/Makefile b/lib/ExecutionEngine/JIT/Makefile
new file mode 100644
index 0000000..e2c9c61
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/Makefile
@@ -0,0 +1,37 @@
+##===- lib/ExecutionEngine/JIT/Makefile --------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMJIT
+
+# Get the $(ARCH) setting
+include $(LEVEL)/Makefile.config
+
+# Enable the X86 JIT if compiling on X86
+ifeq ($(ARCH), x86)
+ ENABLE_X86_JIT = 1
+endif
+
+# This flag can also be used on the command line to force inclusion
+# of the X86 JIT on non-X86 hosts
+ifdef ENABLE_X86_JIT
+ CPPFLAGS += -DENABLE_X86_JIT
+endif
+
+# Enable the Sparc JIT if compiling on Sparc
+ifeq ($(ARCH), Sparc)
+ ENABLE_SPARC_JIT = 1
+endif
+
+# This flag can also be used on the command line to force inclusion
+# of the Sparc JIT on non-Sparc hosts
+ifdef ENABLE_SPARC_JIT
+ CPPFLAGS += -DENABLE_SPARC_JIT
+endif
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/ExecutionEngine/JIT/TargetSelect.cpp b/lib/ExecutionEngine/JIT/TargetSelect.cpp
new file mode 100644
index 0000000..0f20819
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/TargetSelect.cpp
@@ -0,0 +1,83 @@
+//===-- TargetSelect.cpp - Target Chooser Code ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This just asks the TargetMachineRegistry for the appropriate JIT to use, and
+// allows the user to specify a specific one on the commandline with -march=x.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JIT.h"
+#include "llvm/Module.h"
+#include "llvm/ModuleProvider.h"
+#include "llvm/Support/RegistryParser.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Target/SubtargetFeature.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+using namespace llvm;
+
+static cl::opt<const TargetMachineRegistry::entry*, false,
+ RegistryParser<TargetMachine> >
+MArch("march", cl::desc("Architecture to generate assembly for:"));
+
+static cl::opt<std::string>
+MCPU("mcpu",
+ cl::desc("Target a specific cpu type (-mcpu=help for details)"),
+ cl::value_desc("cpu-name"),
+ cl::init(""));
+
+static cl::list<std::string>
+MAttrs("mattr",
+ cl::CommaSeparated,
+ cl::desc("Target specific attributes (-mattr=help for details)"),
+ cl::value_desc("a1,+a2,-a3,..."));
+
+/// createInternal - Create an return a new JIT compiler if there is one
+/// available for the current target. Otherwise, return null.
+///
+ExecutionEngine *JIT::createJIT(ModuleProvider *MP, std::string *ErrorStr,
+ JITMemoryManager *JMM,
+ CodeGenOpt::Level OptLevel) {
+ const TargetMachineRegistry::entry *TheArch = MArch;
+ if (TheArch == 0) {
+ std::string Error;
+ TheArch = TargetMachineRegistry::getClosestTargetForJIT(Error);
+ if (TheArch == 0) {
+ if (ErrorStr)
+ *ErrorStr = Error;
+ return 0;
+ }
+ } else if (TheArch->JITMatchQualityFn() == 0) {
+ cerr << "WARNING: This target JIT is not designed for the host you are"
+ << " running. If bad things happen, please choose a different "
+ << "-march switch.\n";
+ }
+
+ // Package up features to be passed to target/subtarget
+ std::string FeaturesStr;
+ if (!MCPU.empty() || !MAttrs.empty()) {
+ SubtargetFeatures Features;
+ Features.setCPU(MCPU);
+ for (unsigned i = 0; i != MAttrs.size(); ++i)
+ Features.AddFeature(MAttrs[i]);
+ FeaturesStr = Features.getString();
+ }
+
+ // Allocate a target...
+ TargetMachine *Target = TheArch->CtorFn(*MP->getModule(), FeaturesStr);
+ assert(Target && "Could not allocate target machine!");
+
+ // If the target supports JIT code generation, return a new JIT now.
+ if (TargetJITInfo *TJ = Target->getJITInfo())
+ return new JIT(MP, *Target, *TJ, JMM, OptLevel);
+
+ if (ErrorStr)
+ *ErrorStr = "target does not support JIT code generation";
+ return 0;
+}
diff --git a/lib/ExecutionEngine/Makefile b/lib/ExecutionEngine/Makefile
new file mode 100644
index 0000000..e0e050e
--- /dev/null
+++ b/lib/ExecutionEngine/Makefile
@@ -0,0 +1,13 @@
+##===- lib/ExecutionEngine/Makefile ------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../..
+LIBRARYNAME = LLVMExecutionEngine
+PARALLEL_DIRS = Interpreter JIT
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Linker/CMakeLists.txt b/lib/Linker/CMakeLists.txt
new file mode 100644
index 0000000..0b6d2f4
--- /dev/null
+++ b/lib/Linker/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_llvm_library(LLVMLinker
+ LinkArchives.cpp
+ LinkItems.cpp
+ LinkModules.cpp
+ Linker.cpp
+ )
diff --git a/lib/Linker/LinkArchives.cpp b/lib/Linker/LinkArchives.cpp
new file mode 100644
index 0000000..551cc8c
--- /dev/null
+++ b/lib/Linker/LinkArchives.cpp
@@ -0,0 +1,201 @@
+//===- lib/Linker/LinkArchives.cpp - Link LLVM objects and libraries ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines to handle linking together LLVM bitcode files,
+// and to handle annoying things like static libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Linker.h"
+#include "llvm/Module.h"
+#include "llvm/ModuleProvider.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/Bitcode/Archive.h"
+#include "llvm/Config/config.h"
+#include <memory>
+#include <set>
+using namespace llvm;
+
+/// GetAllUndefinedSymbols - calculates the set of undefined symbols that still
+/// exist in an LLVM module. This is a bit tricky because there may be two
+/// symbols with the same name but different LLVM types that will be resolved to
+/// each other but aren't currently (thus we need to treat it as resolved).
+///
+/// Inputs:
+/// M - The module in which to find undefined symbols.
+///
+/// Outputs:
+/// UndefinedSymbols - A set of C++ strings containing the name of all
+/// undefined symbols.
+///
+static void
+GetAllUndefinedSymbols(Module *M, std::set<std::string> &UndefinedSymbols) {
+ std::set<std::string> DefinedSymbols;
+ UndefinedSymbols.clear();
+
+ // If the program doesn't define a main, try pulling one in from a .a file.
+ // This is needed for programs where the main function is defined in an
+ // archive, such f2c'd programs.
+ Function *Main = M->getFunction("main");
+ if (Main == 0 || Main->isDeclaration())
+ UndefinedSymbols.insert("main");
+
+ for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
+ if (I->hasName()) {
+ if (I->isDeclaration())
+ UndefinedSymbols.insert(I->getName());
+ else if (!I->hasLocalLinkage()) {
+ assert(!I->hasDLLImportLinkage()
+ && "Found dllimported non-external symbol!");
+ DefinedSymbols.insert(I->getName());
+ }
+ }
+
+ for (Module::global_iterator I = M->global_begin(), E = M->global_end();
+ I != E; ++I)
+ if (I->hasName()) {
+ if (I->isDeclaration())
+ UndefinedSymbols.insert(I->getName());
+ else if (!I->hasLocalLinkage()) {
+ assert(!I->hasDLLImportLinkage()
+ && "Found dllimported non-external symbol!");
+ DefinedSymbols.insert(I->getName());
+ }
+ }
+
+ for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E; ++I)
+ if (I->hasName())
+ DefinedSymbols.insert(I->getName());
+
+ // Prune out any defined symbols from the undefined symbols set...
+ for (std::set<std::string>::iterator I = UndefinedSymbols.begin();
+ I != UndefinedSymbols.end(); )
+ if (DefinedSymbols.count(*I))
+ UndefinedSymbols.erase(I++); // This symbol really is defined!
+ else
+ ++I; // Keep this symbol in the undefined symbols list
+}
+
+/// LinkInArchive - opens an archive library and link in all objects which
+/// provide symbols that are currently undefined.
+///
+/// Inputs:
+/// Filename - The pathname of the archive.
+///
+/// Return Value:
+/// TRUE - An error occurred.
+/// FALSE - No errors.
+bool
+Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) {
+ // Make sure this is an archive file we're dealing with
+ if (!Filename.isArchive())
+ return error("File '" + Filename.toString() + "' is not an archive.");
+
+ // Open the archive file
+ verbose("Linking archive file '" + Filename.toString() + "'");
+
+ // Find all of the symbols currently undefined in the bitcode program.
+ // If all the symbols are defined, the program is complete, and there is
+ // no reason to link in any archive files.
+ std::set<std::string> UndefinedSymbols;
+ GetAllUndefinedSymbols(Composite, UndefinedSymbols);
+
+ if (UndefinedSymbols.empty()) {
+ verbose("No symbols undefined, skipping library '" +
+ Filename.toString() + "'");
+ return false; // No need to link anything in!
+ }
+
+ std::string ErrMsg;
+ std::auto_ptr<Archive> AutoArch (
+ Archive::OpenAndLoadSymbols(Filename,&ErrMsg));
+
+ Archive* arch = AutoArch.get();
+
+ if (!arch)
+ return error("Cannot read archive '" + Filename.toString() +
+ "': " + ErrMsg);
+ if (!arch->isBitcodeArchive()) {
+ is_native = true;
+ return false;
+ }
+ is_native = false;
+
+ // Save a set of symbols that are not defined by the archive. Since we're
+ // entering a loop, there's no point searching for these multiple times. This
+ // variable is used to "set_subtract" from the set of undefined symbols.
+ std::set<std::string> NotDefinedByArchive;
+
+ // Save the current set of undefined symbols, because we may have to make
+ // multiple passes over the archive:
+ std::set<std::string> CurrentlyUndefinedSymbols;
+
+ do {
+ CurrentlyUndefinedSymbols = UndefinedSymbols;
+
+ // Find the modules we need to link into the target module
+ std::set<ModuleProvider*> Modules;
+ if (!arch->findModulesDefiningSymbols(UndefinedSymbols, Modules, &ErrMsg))
+ return error("Cannot find symbols in '" + Filename.toString() +
+ "': " + ErrMsg);
+
+ // If we didn't find any more modules to link this time, we are done
+ // searching this archive.
+ if (Modules.empty())
+ break;
+
+ // Any symbols remaining in UndefinedSymbols after
+ // findModulesDefiningSymbols are ones that the archive does not define. So
+ // we add them to the NotDefinedByArchive variable now.
+ NotDefinedByArchive.insert(UndefinedSymbols.begin(),
+ UndefinedSymbols.end());
+
+ // Loop over all the ModuleProviders that we got back from the archive
+ for (std::set<ModuleProvider*>::iterator I=Modules.begin(), E=Modules.end();
+ I != E; ++I) {
+
+ // Get the module we must link in.
+ std::string moduleErrorMsg;
+ std::auto_ptr<Module> AutoModule((*I)->releaseModule( &moduleErrorMsg ));
+ if (!moduleErrorMsg.empty())
+ return error("Could not load a module: " + moduleErrorMsg);
+
+ Module* aModule = AutoModule.get();
+
+ if (aModule != NULL) {
+ verbose(" Linking in module: " + aModule->getModuleIdentifier());
+
+ // Link it in
+ if (LinkInModule(aModule, &moduleErrorMsg)) {
+ return error("Cannot link in module '" +
+ aModule->getModuleIdentifier() + "': " + moduleErrorMsg);
+ }
+ }
+ }
+
+ // Get the undefined symbols from the aggregate module. This recomputes the
+ // symbols we still need after the new modules have been linked in.
+ GetAllUndefinedSymbols(Composite, UndefinedSymbols);
+
+ // At this point we have two sets of undefined symbols: UndefinedSymbols
+ // which holds the undefined symbols from all the modules, and
+ // NotDefinedByArchive which holds symbols we know the archive doesn't
+ // define. There's no point searching for symbols that we won't find in the
+ // archive so we subtract these sets.
+ set_subtract(UndefinedSymbols, NotDefinedByArchive);
+
+ // If there's no symbols left, no point in continuing to search the
+ // archive.
+ if (UndefinedSymbols.empty())
+ break;
+ } while (CurrentlyUndefinedSymbols != UndefinedSymbols);
+
+ return false;
+}
diff --git a/lib/Linker/LinkItems.cpp b/lib/Linker/LinkItems.cpp
new file mode 100644
index 0000000..7c888aa
--- /dev/null
+++ b/lib/Linker/LinkItems.cpp
@@ -0,0 +1,238 @@
+//===- lib/Linker/LinkItems.cpp - Link LLVM objects and libraries ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines to handle linking together LLVM bitcode files,
+// and to handle annoying things like static libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Linker.h"
+#include "llvm/Module.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+
+using namespace llvm;
+
+// LinkItems - This function is the main entry point into linking. It takes a
+// list of LinkItem which indicates the order the files should be linked and
+// how each file should be treated (plain file or with library search). The
+// function only links bitcode and produces a result list of items that are
+// native objects.
+bool
+Linker::LinkInItems(const ItemList& Items, ItemList& NativeItems) {
+ // Clear the NativeItems just in case
+ NativeItems.clear();
+
+ // For each linkage item ...
+ for (ItemList::const_iterator I = Items.begin(), E = Items.end();
+ I != E; ++I) {
+ if (I->second) {
+ // Link in the library suggested.
+ bool is_native = false;
+ if (LinkInLibrary(I->first, is_native))
+ return true;
+ if (is_native)
+ NativeItems.push_back(*I);
+ } else {
+ // Link in the file suggested
+ bool is_native = false;
+ if (LinkInFile(sys::Path(I->first), is_native))
+ return true;
+ if (is_native)
+ NativeItems.push_back(*I);
+ }
+ }
+
+ // At this point we have processed all the link items provided to us. Since
+ // we have an aggregated module at this point, the dependent libraries in
+ // that module should also be aggregated with duplicates eliminated. This is
+ // now the time to process the dependent libraries to resolve any remaining
+ // symbols.
+ bool is_native;
+ for (Module::lib_iterator I = Composite->lib_begin(),
+ E = Composite->lib_end(); I != E; ++I) {
+ if(LinkInLibrary(*I, is_native))
+ return true;
+ if (is_native)
+ NativeItems.push_back(std::make_pair(*I, true));
+ }
+
+ return false;
+}
+
+
+/// LinkInLibrary - links one library into the HeadModule.
+///
+bool Linker::LinkInLibrary(const std::string& Lib, bool& is_native) {
+ is_native = false;
+ // Determine where this library lives.
+ sys::Path Pathname = FindLib(Lib);
+ if (Pathname.isEmpty())
+ return error("Cannot find library '" + Lib + "'");
+
+ // If its an archive, try to link it in
+ std::string Magic;
+ Pathname.getMagicNumber(Magic, 64);
+ switch (sys::IdentifyFileType(Magic.c_str(), 64)) {
+ default: assert(0 && "Bad file type identification");
+ case sys::Unknown_FileType:
+ return warning("Supposed library '" + Lib + "' isn't a library.");
+
+ case sys::Bitcode_FileType:
+ // LLVM ".so" file.
+ if (LinkInFile(Pathname, is_native))
+ return true;
+ break;
+
+ case sys::Archive_FileType:
+ if (LinkInArchive(Pathname, is_native))
+ return error("Cannot link archive '" + Pathname.toString() + "'");
+ break;
+
+ case sys::ELF_Relocatable_FileType:
+ case sys::ELF_SharedObject_FileType:
+ case sys::Mach_O_Object_FileType:
+ case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
+ case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
+ case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+ case sys::COFF_FileType:
+ is_native = true;
+ break;
+ }
+ return false;
+}
+
+/// LinkLibraries - takes the specified library files and links them into the
+/// main bitcode object file.
+///
+/// Inputs:
+/// Libraries - The list of libraries to link into the module.
+///
+/// Return value:
+/// FALSE - No error.
+/// TRUE - Error.
+///
+bool Linker::LinkInLibraries(const std::vector<std::string> &Libraries) {
+
+ // Process the set of libraries we've been provided.
+ bool is_native = false;
+ for (unsigned i = 0; i < Libraries.size(); ++i)
+ if (LinkInLibrary(Libraries[i], is_native))
+ return true;
+
+ // At this point we have processed all the libraries provided to us. Since
+ // we have an aggregated module at this point, the dependent libraries in
+ // that module should also be aggregated with duplicates eliminated. This is
+ // now the time to process the dependent libraries to resolve any remaining
+ // symbols.
+ const Module::LibraryListType& DepLibs = Composite->getLibraries();
+ for (Module::LibraryListType::const_iterator I = DepLibs.begin(),
+ E = DepLibs.end(); I != E; ++I)
+ if (LinkInLibrary(*I, is_native))
+ return true;
+
+ return false;
+}
+
+/// LinkInFile - opens a bitcode file and links in all objects which
+/// provide symbols that are currently undefined.
+///
+/// Inputs:
+/// File - The pathname of the bitcode file.
+///
+/// Outputs:
+/// ErrorMessage - A C++ string detailing what error occurred, if any.
+///
+/// Return Value:
+/// TRUE - An error occurred.
+/// FALSE - No errors.
+///
+bool Linker::LinkInFile(const sys::Path &File, bool &is_native) {
+ is_native = false;
+
+ // Check for a file of name "-", which means "read standard input"
+ if (File.toString() == "-") {
+ std::auto_ptr<Module> M;
+ if (MemoryBuffer *Buffer = MemoryBuffer::getSTDIN()) {
+ M.reset(ParseBitcodeFile(Buffer, &Error));
+ delete Buffer;
+ if (M.get())
+ if (!LinkInModule(M.get(), &Error))
+ return false;
+ } else
+ Error = "standard input is empty";
+ return error("Cannot link stdin: " + Error);
+ }
+
+ // Make sure we can at least read the file
+ if (!File.canRead())
+ return error("Cannot find linker input '" + File.toString() + "'");
+
+ // If its an archive, try to link it in
+ std::string Magic;
+ File.getMagicNumber(Magic, 64);
+ switch (sys::IdentifyFileType(Magic.c_str(), 64)) {
+ default: assert(0 && "Bad file type identification");
+ case sys::Unknown_FileType:
+ return warning("Ignoring file '" + File.toString() +
+ "' because does not contain bitcode.");
+
+ case sys::Archive_FileType:
+ // A user may specify an ar archive without -l, perhaps because it
+ // is not installed as a library. Detect that and link the archive.
+ verbose("Linking archive file '" + File.toString() + "'");
+ if (LinkInArchive(File, is_native))
+ return true;
+ break;
+
+ case sys::Bitcode_FileType: {
+ verbose("Linking bitcode file '" + File.toString() + "'");
+ std::auto_ptr<Module> M(LoadObject(File));
+ if (M.get() == 0)
+ return error("Cannot load file '" + File.toString() + "': " + Error);
+ if (LinkInModule(M.get(), &Error))
+ return error("Cannot link file '" + File.toString() + "': " + Error);
+
+ verbose("Linked in file '" + File.toString() + "'");
+ break;
+ }
+
+ case sys::ELF_Relocatable_FileType:
+ case sys::ELF_SharedObject_FileType:
+ case sys::Mach_O_Object_FileType:
+ case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
+ case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
+ case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+ case sys::COFF_FileType:
+ is_native = true;
+ break;
+ }
+ return false;
+}
+
+/// LinkFiles - takes a module and a list of files and links them all together.
+/// It locates the file either in the current directory, as its absolute
+/// or relative pathname, or as a file somewhere in LLVM_LIB_SEARCH_PATH.
+///
+/// Inputs:
+/// Files - A vector of sys::Path indicating the LLVM bitcode filenames
+/// to be linked. The names can refer to a mixture of pure LLVM
+/// bitcode files and archive (ar) formatted files.
+///
+/// Return value:
+/// FALSE - No errors.
+/// TRUE - Some error occurred.
+///
+bool Linker::LinkInFiles(const std::vector<sys::Path> &Files) {
+ bool is_native;
+ for (unsigned i = 0; i < Files.size(); ++i)
+ if (LinkInFile(Files[i], is_native))
+ return true;
+ return false;
+}
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
new file mode 100644
index 0000000..4a15d88
--- /dev/null
+++ b/lib/Linker/LinkModules.cpp
@@ -0,0 +1,1328 @@
+//===- lib/Linker/LinkModules.cpp - Module Linker Implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVM module linker.
+//
+// Specifically, this:
+// * Merges global variables between the two modules
+// * Uninit + Uninit = Init, Init + Uninit = Init, Init + Init = Error if !=
+// * Merges functions between two modules
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Linker.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/Instructions.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/System/Path.h"
+#include "llvm/ADT/DenseMap.h"
+#include <sstream>
+using namespace llvm;
+
+// Error - Simple wrapper function to conditionally assign to E and return true.
+// This just makes error return conditions a little bit simpler...
+static inline bool Error(std::string *E, const std::string &Message) {
+ if (E) *E = Message;
+ return true;
+}
+
+// Function: ResolveTypes()
+//
+// Description:
+// Attempt to link the two specified types together.
+//
+// Inputs:
+// DestTy - The type to which we wish to resolve.
+// SrcTy - The original type which we want to resolve.
+//
+// Outputs:
+// DestST - The symbol table in which the new type should be placed.
+//
+// Return value:
+// true - There is an error and the types cannot yet be linked.
+// false - No errors.
+//
+static bool ResolveTypes(const Type *DestTy, const Type *SrcTy) {
+ if (DestTy == SrcTy) return false; // If already equal, noop
+ assert(DestTy && SrcTy && "Can't handle null types");
+
+ if (const OpaqueType *OT = dyn_cast<OpaqueType>(DestTy)) {
+ // Type _is_ in module, just opaque...
+ const_cast<OpaqueType*>(OT)->refineAbstractTypeTo(SrcTy);
+ } else if (const OpaqueType *OT = dyn_cast<OpaqueType>(SrcTy)) {
+ const_cast<OpaqueType*>(OT)->refineAbstractTypeTo(DestTy);
+ } else {
+ return true; // Cannot link types... not-equal and neither is opaque.
+ }
+ return false;
+}
+
+/// LinkerTypeMap - This implements a map of types that is stable
+/// even if types are resolved/refined to other types. This is not a general
+/// purpose map, it is specific to the linker's use.
+namespace {
+class LinkerTypeMap : public AbstractTypeUser {
+ typedef DenseMap<const Type*, PATypeHolder> TheMapTy;
+ TheMapTy TheMap;
+
+ LinkerTypeMap(const LinkerTypeMap&); // DO NOT IMPLEMENT
+ void operator=(const LinkerTypeMap&); // DO NOT IMPLEMENT
+public:
+ LinkerTypeMap() {}
+ ~LinkerTypeMap() {
+ for (DenseMap<const Type*, PATypeHolder>::iterator I = TheMap.begin(),
+ E = TheMap.end(); I != E; ++I)
+ I->first->removeAbstractTypeUser(this);
+ }
+
+ /// lookup - Return the value for the specified type or null if it doesn't
+ /// exist.
+ const Type *lookup(const Type *Ty) const {
+ TheMapTy::const_iterator I = TheMap.find(Ty);
+ if (I != TheMap.end()) return I->second;
+ return 0;
+ }
+
+ /// erase - Remove the specified type, returning true if it was in the set.
+ bool erase(const Type *Ty) {
+ if (!TheMap.erase(Ty))
+ return false;
+ if (Ty->isAbstract())
+ Ty->removeAbstractTypeUser(this);
+ return true;
+ }
+
+ /// insert - This returns true if the pointer was new to the set, false if it
+ /// was already in the set.
+ bool insert(const Type *Src, const Type *Dst) {
+ if (!TheMap.insert(std::make_pair(Src, PATypeHolder(Dst))).second)
+ return false; // Already in map.
+ if (Src->isAbstract())
+ Src->addAbstractTypeUser(this);
+ return true;
+ }
+
+protected:
+ /// refineAbstractType - The callback method invoked when an abstract type is
+ /// resolved to another type. An object must override this method to update
+ /// its internal state to reference NewType instead of OldType.
+ ///
+ virtual void refineAbstractType(const DerivedType *OldTy,
+ const Type *NewTy) {
+ TheMapTy::iterator I = TheMap.find(OldTy);
+ const Type *DstTy = I->second;
+
+ TheMap.erase(I);
+ if (OldTy->isAbstract())
+ OldTy->removeAbstractTypeUser(this);
+
+ // Don't reinsert into the map if the key is concrete now.
+ if (NewTy->isAbstract())
+ insert(NewTy, DstTy);
+ }
+
+ /// The other case which AbstractTypeUsers must be aware of is when a type
+ /// makes the transition from being abstract (where it has clients on it's
+ /// AbstractTypeUsers list) to concrete (where it does not). This method
+ /// notifies ATU's when this occurs for a type.
+ virtual void typeBecameConcrete(const DerivedType *AbsTy) {
+ TheMap.erase(AbsTy);
+ AbsTy->removeAbstractTypeUser(this);
+ }
+
+ // for debugging...
+ virtual void dump() const {
+ cerr << "AbstractTypeSet!\n";
+ }
+};
+}
+
+
+// RecursiveResolveTypes - This is just like ResolveTypes, except that it
+// recurses down into derived types, merging the used types if the parent types
+// are compatible.
+static bool RecursiveResolveTypesI(const Type *DstTy, const Type *SrcTy,
+ LinkerTypeMap &Pointers) {
+ if (DstTy == SrcTy) return false; // If already equal, noop
+
+ // If we found our opaque type, resolve it now!
+ if (isa<OpaqueType>(DstTy) || isa<OpaqueType>(SrcTy))
+ return ResolveTypes(DstTy, SrcTy);
+
+ // Two types cannot be resolved together if they are of different primitive
+ // type. For example, we cannot resolve an int to a float.
+ if (DstTy->getTypeID() != SrcTy->getTypeID()) return true;
+
+ // If neither type is abstract, then they really are just different types.
+ if (!DstTy->isAbstract() && !SrcTy->isAbstract())
+ return true;
+
+ // Otherwise, resolve the used type used by this derived type...
+ switch (DstTy->getTypeID()) {
+ default:
+ return true;
+ case Type::FunctionTyID: {
+ const FunctionType *DstFT = cast<FunctionType>(DstTy);
+ const FunctionType *SrcFT = cast<FunctionType>(SrcTy);
+ if (DstFT->isVarArg() != SrcFT->isVarArg() ||
+ DstFT->getNumContainedTypes() != SrcFT->getNumContainedTypes())
+ return true;
+
+ // Use TypeHolder's so recursive resolution won't break us.
+ PATypeHolder ST(SrcFT), DT(DstFT);
+ for (unsigned i = 0, e = DstFT->getNumContainedTypes(); i != e; ++i) {
+ const Type *SE = ST->getContainedType(i), *DE = DT->getContainedType(i);
+ if (SE != DE && RecursiveResolveTypesI(DE, SE, Pointers))
+ return true;
+ }
+ return false;
+ }
+ case Type::StructTyID: {
+ const StructType *DstST = cast<StructType>(DstTy);
+ const StructType *SrcST = cast<StructType>(SrcTy);
+ if (DstST->getNumContainedTypes() != SrcST->getNumContainedTypes())
+ return true;
+
+ PATypeHolder ST(SrcST), DT(DstST);
+ for (unsigned i = 0, e = DstST->getNumContainedTypes(); i != e; ++i) {
+ const Type *SE = ST->getContainedType(i), *DE = DT->getContainedType(i);
+ if (SE != DE && RecursiveResolveTypesI(DE, SE, Pointers))
+ return true;
+ }
+ return false;
+ }
+ case Type::ArrayTyID: {
+ const ArrayType *DAT = cast<ArrayType>(DstTy);
+ const ArrayType *SAT = cast<ArrayType>(SrcTy);
+ if (DAT->getNumElements() != SAT->getNumElements()) return true;
+ return RecursiveResolveTypesI(DAT->getElementType(), SAT->getElementType(),
+ Pointers);
+ }
+ case Type::VectorTyID: {
+ const VectorType *DVT = cast<VectorType>(DstTy);
+ const VectorType *SVT = cast<VectorType>(SrcTy);
+ if (DVT->getNumElements() != SVT->getNumElements()) return true;
+ return RecursiveResolveTypesI(DVT->getElementType(), SVT->getElementType(),
+ Pointers);
+ }
+ case Type::PointerTyID: {
+ const PointerType *DstPT = cast<PointerType>(DstTy);
+ const PointerType *SrcPT = cast<PointerType>(SrcTy);
+
+ if (DstPT->getAddressSpace() != SrcPT->getAddressSpace())
+ return true;
+
+ // If this is a pointer type, check to see if we have already seen it. If
+ // so, we are in a recursive branch. Cut off the search now. We cannot use
+ // an associative container for this search, because the type pointers (keys
+ // in the container) change whenever types get resolved.
+ if (SrcPT->isAbstract())
+ if (const Type *ExistingDestTy = Pointers.lookup(SrcPT))
+ return ExistingDestTy != DstPT;
+
+ if (DstPT->isAbstract())
+ if (const Type *ExistingSrcTy = Pointers.lookup(DstPT))
+ return ExistingSrcTy != SrcPT;
+ // Otherwise, add the current pointers to the vector to stop recursion on
+ // this pair.
+ if (DstPT->isAbstract())
+ Pointers.insert(DstPT, SrcPT);
+ if (SrcPT->isAbstract())
+ Pointers.insert(SrcPT, DstPT);
+
+ return RecursiveResolveTypesI(DstPT->getElementType(),
+ SrcPT->getElementType(), Pointers);
+ }
+ }
+}
+
+static bool RecursiveResolveTypes(const Type *DestTy, const Type *SrcTy) {
+ LinkerTypeMap PointerTypes;
+ return RecursiveResolveTypesI(DestTy, SrcTy, PointerTypes);
+}
+
+
+// LinkTypes - Go through the symbol table of the Src module and see if any
+// types are named in the src module that are not named in the Dst module.
+// Make sure there are no type name conflicts.
+static bool LinkTypes(Module *Dest, const Module *Src, std::string *Err) {
+ TypeSymbolTable *DestST = &Dest->getTypeSymbolTable();
+ const TypeSymbolTable *SrcST = &Src->getTypeSymbolTable();
+
+ // Look for a type plane for Type's...
+ TypeSymbolTable::const_iterator TI = SrcST->begin();
+ TypeSymbolTable::const_iterator TE = SrcST->end();
+ if (TI == TE) return false; // No named types, do nothing.
+
+ // Some types cannot be resolved immediately because they depend on other
+ // types being resolved to each other first. This contains a list of types we
+ // are waiting to recheck.
+ std::vector<std::string> DelayedTypesToResolve;
+
+ for ( ; TI != TE; ++TI ) {
+ const std::string &Name = TI->first;
+ const Type *RHS = TI->second;
+
+ // Check to see if this type name is already in the dest module.
+ Type *Entry = DestST->lookup(Name);
+
+ // If the name is just in the source module, bring it over to the dest.
+ if (Entry == 0) {
+ if (!Name.empty())
+ DestST->insert(Name, const_cast<Type*>(RHS));
+ } else if (ResolveTypes(Entry, RHS)) {
+ // They look different, save the types 'till later to resolve.
+ DelayedTypesToResolve.push_back(Name);
+ }
+ }
+
+ // Iteratively resolve types while we can...
+ while (!DelayedTypesToResolve.empty()) {
+ // Loop over all of the types, attempting to resolve them if possible...
+ unsigned OldSize = DelayedTypesToResolve.size();
+
+ // Try direct resolution by name...
+ for (unsigned i = 0; i != DelayedTypesToResolve.size(); ++i) {
+ const std::string &Name = DelayedTypesToResolve[i];
+ Type *T1 = SrcST->lookup(Name);
+ Type *T2 = DestST->lookup(Name);
+ if (!ResolveTypes(T2, T1)) {
+ // We are making progress!
+ DelayedTypesToResolve.erase(DelayedTypesToResolve.begin()+i);
+ --i;
+ }
+ }
+
+ // Did we not eliminate any types?
+ if (DelayedTypesToResolve.size() == OldSize) {
+ // Attempt to resolve subelements of types. This allows us to merge these
+ // two types: { int* } and { opaque* }
+ for (unsigned i = 0, e = DelayedTypesToResolve.size(); i != e; ++i) {
+ const std::string &Name = DelayedTypesToResolve[i];
+ if (!RecursiveResolveTypes(SrcST->lookup(Name), DestST->lookup(Name))) {
+ // We are making progress!
+ DelayedTypesToResolve.erase(DelayedTypesToResolve.begin()+i);
+
+ // Go back to the main loop, perhaps we can resolve directly by name
+ // now...
+ break;
+ }
+ }
+
+ // If we STILL cannot resolve the types, then there is something wrong.
+ if (DelayedTypesToResolve.size() == OldSize) {
+ // Remove the symbol name from the destination.
+ DelayedTypesToResolve.pop_back();
+ }
+ }
+ }
+
+
+ return false;
+}
+
+#ifndef NDEBUG
+static void PrintMap(const std::map<const Value*, Value*> &M) {
+ for (std::map<const Value*, Value*>::const_iterator I = M.begin(), E =M.end();
+ I != E; ++I) {
+ cerr << " Fr: " << (void*)I->first << " ";
+ I->first->dump();
+ cerr << " To: " << (void*)I->second << " ";
+ I->second->dump();
+ cerr << "\n";
+ }
+}
+#endif
+
+
+// RemapOperand - Use ValueMap to convert constants from one module to another.
+static Value *RemapOperand(const Value *In,
+ std::map<const Value*, Value*> &ValueMap) {
+ std::map<const Value*,Value*>::const_iterator I = ValueMap.find(In);
+ if (I != ValueMap.end())
+ return I->second;
+
+ // Check to see if it's a constant that we are interested in transforming.
+ Value *Result = 0;
+ if (const Constant *CPV = dyn_cast<Constant>(In)) {
+ if ((!isa<DerivedType>(CPV->getType()) && !isa<ConstantExpr>(CPV)) ||
+ isa<ConstantInt>(CPV) || isa<ConstantAggregateZero>(CPV))
+ return const_cast<Constant*>(CPV); // Simple constants stay identical.
+
+ if (const ConstantArray *CPA = dyn_cast<ConstantArray>(CPV)) {
+ std::vector<Constant*> Operands(CPA->getNumOperands());
+ for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i)
+ Operands[i] =cast<Constant>(RemapOperand(CPA->getOperand(i), ValueMap));
+ Result = ConstantArray::get(cast<ArrayType>(CPA->getType()), Operands);
+ } else if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(CPV)) {
+ std::vector<Constant*> Operands(CPS->getNumOperands());
+ for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i)
+ Operands[i] =cast<Constant>(RemapOperand(CPS->getOperand(i), ValueMap));
+ Result = ConstantStruct::get(cast<StructType>(CPS->getType()), Operands);
+ } else if (isa<ConstantPointerNull>(CPV) || isa<UndefValue>(CPV)) {
+ Result = const_cast<Constant*>(CPV);
+ } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CPV)) {
+ std::vector<Constant*> Operands(CP->getNumOperands());
+ for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
+ Operands[i] = cast<Constant>(RemapOperand(CP->getOperand(i), ValueMap));
+ Result = ConstantVector::get(Operands);
+ } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) {
+ std::vector<Constant*> Ops;
+ for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
+ Ops.push_back(cast<Constant>(RemapOperand(CE->getOperand(i),ValueMap)));
+ Result = CE->getWithOperands(Ops);
+ } else {
+ assert(!isa<GlobalValue>(CPV) && "Unmapped global?");
+ assert(0 && "Unknown type of derived type constant value!");
+ }
+ } else if (isa<InlineAsm>(In)) {
+ Result = const_cast<Value*>(In);
+ }
+
+ // Cache the mapping in our local map structure
+ if (Result) {
+ ValueMap[In] = Result;
+ return Result;
+ }
+
+#ifndef NDEBUG
+ cerr << "LinkModules ValueMap: \n";
+ PrintMap(ValueMap);
+
+ cerr << "Couldn't remap value: " << (void*)In << " " << *In << "\n";
+ assert(0 && "Couldn't remap value!");
+#endif
+ return 0;
+}
+
+/// ForceRenaming - The LLVM SymbolTable class autorenames globals that conflict
+/// in the symbol table. This is good for all clients except for us. Go
+/// through the trouble to force this back.
+static void ForceRenaming(GlobalValue *GV, const std::string &Name) {
+ assert(GV->getName() != Name && "Can't force rename to self");
+ ValueSymbolTable &ST = GV->getParent()->getValueSymbolTable();
+
+ // If there is a conflict, rename the conflict.
+ if (GlobalValue *ConflictGV = cast_or_null<GlobalValue>(ST.lookup(Name))) {
+ assert(ConflictGV->hasLocalLinkage() &&
+ "Not conflicting with a static global, should link instead!");
+ GV->takeName(ConflictGV);
+ ConflictGV->setName(Name); // This will cause ConflictGV to get renamed
+ assert(ConflictGV->getName() != Name && "ForceRenaming didn't work");
+ } else {
+ GV->setName(Name); // Force the name back
+ }
+}
+
+/// CopyGVAttributes - copy additional attributes (those not needed to construct
+/// a GlobalValue) from the SrcGV to the DestGV.
+static void CopyGVAttributes(GlobalValue *DestGV, const GlobalValue *SrcGV) {
+ // Use the maximum alignment, rather than just copying the alignment of SrcGV.
+ unsigned Alignment = std::max(DestGV->getAlignment(), SrcGV->getAlignment());
+ DestGV->copyAttributesFrom(SrcGV);
+ DestGV->setAlignment(Alignment);
+}
+
+/// GetLinkageResult - This analyzes the two global values and determines what
+/// the result will look like in the destination module. In particular, it
+/// computes the resultant linkage type, computes whether the global in the
+/// source should be copied over to the destination (replacing the existing
+/// one), and computes whether this linkage is an error or not. It also performs
+/// visibility checks: we cannot link together two symbols with different
+/// visibilities.
+static bool GetLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
+ GlobalValue::LinkageTypes &LT, bool &LinkFromSrc,
+ std::string *Err) {
+ assert((!Dest || !Src->hasLocalLinkage()) &&
+ "If Src has internal linkage, Dest shouldn't be set!");
+ if (!Dest) {
+ // Linking something to nothing.
+ LinkFromSrc = true;
+ LT = Src->getLinkage();
+ } else if (Src->isDeclaration()) {
+ // If Src is external or if both Src & Dest are external.. Just link the
+ // external globals, we aren't adding anything.
+ if (Src->hasDLLImportLinkage()) {
+ // If one of GVs has DLLImport linkage, result should be dllimport'ed.
+ if (Dest->isDeclaration()) {
+ LinkFromSrc = true;
+ LT = Src->getLinkage();
+ }
+ } else if (Dest->hasExternalWeakLinkage()) {
+ // If the Dest is weak, use the source linkage.
+ LinkFromSrc = true;
+ LT = Src->getLinkage();
+ } else {
+ LinkFromSrc = false;
+ LT = Dest->getLinkage();
+ }
+ } else if (Dest->isDeclaration() && !Dest->hasDLLImportLinkage()) {
+ // If Dest is external but Src is not:
+ LinkFromSrc = true;
+ LT = Src->getLinkage();
+ } else if (Src->hasAppendingLinkage() || Dest->hasAppendingLinkage()) {
+ if (Src->getLinkage() != Dest->getLinkage())
+ return Error(Err, "Linking globals named '" + Src->getName() +
+ "': can only link appending global with another appending global!");
+ LinkFromSrc = true; // Special cased.
+ LT = Src->getLinkage();
+ } else if (Src->isWeakForLinker()) {
+ // At this point we know that Dest has LinkOnce, External*, Weak, Common,
+ // or DLL* linkage.
+ if (Dest->hasExternalWeakLinkage() ||
+ Dest->hasAvailableExternallyLinkage() ||
+ (Dest->hasLinkOnceLinkage() &&
+ (Src->hasWeakLinkage() || Src->hasCommonLinkage()))) {
+ LinkFromSrc = true;
+ LT = Src->getLinkage();
+ } else {
+ LinkFromSrc = false;
+ LT = Dest->getLinkage();
+ }
+ } else if (Dest->isWeakForLinker()) {
+ // At this point we know that Src has External* or DLL* linkage.
+ if (Src->hasExternalWeakLinkage()) {
+ LinkFromSrc = false;
+ LT = Dest->getLinkage();
+ } else {
+ LinkFromSrc = true;
+ LT = GlobalValue::ExternalLinkage;
+ }
+ } else {
+ assert((Dest->hasExternalLinkage() ||
+ Dest->hasDLLImportLinkage() ||
+ Dest->hasDLLExportLinkage() ||
+ Dest->hasExternalWeakLinkage()) &&
+ (Src->hasExternalLinkage() ||
+ Src->hasDLLImportLinkage() ||
+ Src->hasDLLExportLinkage() ||
+ Src->hasExternalWeakLinkage()) &&
+ "Unexpected linkage type!");
+ return Error(Err, "Linking globals named '" + Src->getName() +
+ "': symbol multiply defined!");
+ }
+
+ // Check visibility
+ if (Dest && Src->getVisibility() != Dest->getVisibility())
+ if (!Src->isDeclaration() && !Dest->isDeclaration())
+ return Error(Err, "Linking globals named '" + Src->getName() +
+ "': symbols have different visibilities!");
+ return false;
+}
+
+// LinkGlobals - Loop through the global variables in the src module and merge
+// them into the dest module.
+static bool LinkGlobals(Module *Dest, const Module *Src,
+ std::map<const Value*, Value*> &ValueMap,
+ std::multimap<std::string, GlobalVariable *> &AppendingVars,
+ std::string *Err) {
+ ValueSymbolTable &DestSymTab = Dest->getValueSymbolTable();
+
+ // Loop over all of the globals in the src module, mapping them over as we go
+ for (Module::const_global_iterator I = Src->global_begin(),
+ E = Src->global_end(); I != E; ++I) {
+ const GlobalVariable *SGV = I;
+ GlobalValue *DGV = 0;
+
+ // Check to see if may have to link the global with the global, alias or
+ // function.
+ if (SGV->hasName() && !SGV->hasLocalLinkage())
+ DGV = cast_or_null<GlobalValue>(DestSymTab.lookup(SGV->getNameStart(),
+ SGV->getNameEnd()));
+
+ // If we found a global with the same name in the dest module, but it has
+ // internal linkage, we are really not doing any linkage here.
+ if (DGV && DGV->hasLocalLinkage())
+ DGV = 0;
+
+ // If types don't agree due to opaque types, try to resolve them.
+ if (DGV && DGV->getType() != SGV->getType())
+ RecursiveResolveTypes(SGV->getType(), DGV->getType());
+
+ assert((SGV->hasInitializer() || SGV->hasExternalWeakLinkage() ||
+ SGV->hasExternalLinkage() || SGV->hasDLLImportLinkage()) &&
+ "Global must either be external or have an initializer!");
+
+ GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
+ bool LinkFromSrc = false;
+ if (GetLinkageResult(DGV, SGV, NewLinkage, LinkFromSrc, Err))
+ return true;
+
+ if (DGV == 0) {
+ // No linking to be performed, simply create an identical version of the
+ // symbol over in the dest module... the initializer will be filled in
+ // later by LinkGlobalInits.
+ GlobalVariable *NewDGV =
+ new GlobalVariable(SGV->getType()->getElementType(),
+ SGV->isConstant(), SGV->getLinkage(), /*init*/0,
+ SGV->getName(), Dest, false,
+ SGV->getType()->getAddressSpace());
+ // Propagate alignment, visibility and section info.
+ CopyGVAttributes(NewDGV, SGV);
+
+ // If the LLVM runtime renamed the global, but it is an externally visible
+ // symbol, DGV must be an existing global with internal linkage. Rename
+ // it.
+ if (!NewDGV->hasLocalLinkage() && NewDGV->getName() != SGV->getName())
+ ForceRenaming(NewDGV, SGV->getName());
+
+ // Make sure to remember this mapping.
+ ValueMap[SGV] = NewDGV;
+
+ // Keep track that this is an appending variable.
+ if (SGV->hasAppendingLinkage())
+ AppendingVars.insert(std::make_pair(SGV->getName(), NewDGV));
+ continue;
+ }
+
+ // If the visibilities of the symbols disagree and the destination is a
+ // prototype, take the visibility of its input.
+ if (DGV->isDeclaration())
+ DGV->setVisibility(SGV->getVisibility());
+
+ if (DGV->hasAppendingLinkage()) {
+ // No linking is performed yet. Just insert a new copy of the global, and
+ // keep track of the fact that it is an appending variable in the
+ // AppendingVars map. The name is cleared out so that no linkage is
+ // performed.
+ GlobalVariable *NewDGV =
+ new GlobalVariable(SGV->getType()->getElementType(),
+ SGV->isConstant(), SGV->getLinkage(), /*init*/0,
+ "", Dest, false,
+ SGV->getType()->getAddressSpace());
+
+ // Set alignment allowing CopyGVAttributes merge it with alignment of SGV.
+ NewDGV->setAlignment(DGV->getAlignment());
+ // Propagate alignment, section and visibility info.
+ CopyGVAttributes(NewDGV, SGV);
+
+ // Make sure to remember this mapping...
+ ValueMap[SGV] = NewDGV;
+
+ // Keep track that this is an appending variable...
+ AppendingVars.insert(std::make_pair(SGV->getName(), NewDGV));
+ continue;
+ }
+
+ if (LinkFromSrc) {
+ if (isa<GlobalAlias>(DGV))
+ return Error(Err, "Global-Alias Collision on '" + SGV->getName() +
+ "': symbol multiple defined");
+
+ // If the types don't match, and if we are to link from the source, nuke
+ // DGV and create a new one of the appropriate type. Note that the thing
+ // we are replacing may be a function (if a prototype, weak, etc) or a
+ // global variable.
+ GlobalVariable *NewDGV =
+ new GlobalVariable(SGV->getType()->getElementType(), SGV->isConstant(),
+ NewLinkage, /*init*/0, DGV->getName(), Dest, false,
+ SGV->getType()->getAddressSpace());
+
+ // Propagate alignment, section, and visibility info.
+ CopyGVAttributes(NewDGV, SGV);
+ DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV, DGV->getType()));
+
+ // DGV will conflict with NewDGV because they both had the same
+ // name. We must erase this now so ForceRenaming doesn't assert
+ // because DGV might not have internal linkage.
+ if (GlobalVariable *Var = dyn_cast<GlobalVariable>(DGV))
+ Var->eraseFromParent();
+ else
+ cast<Function>(DGV)->eraseFromParent();
+ DGV = NewDGV;
+
+ // If the symbol table renamed the global, but it is an externally visible
+ // symbol, DGV must be an existing global with internal linkage. Rename.
+ if (NewDGV->getName() != SGV->getName() && !NewDGV->hasLocalLinkage())
+ ForceRenaming(NewDGV, SGV->getName());
+
+ // Inherit const as appropriate.
+ NewDGV->setConstant(SGV->isConstant());
+
+ // Make sure to remember this mapping.
+ ValueMap[SGV] = NewDGV;
+ continue;
+ }
+
+ // Not "link from source", keep the one in the DestModule and remap the
+ // input onto it.
+
+ // Special case for const propagation.
+ if (GlobalVariable *DGVar = dyn_cast<GlobalVariable>(DGV))
+ if (DGVar->isDeclaration() && SGV->isConstant() && !DGVar->isConstant())
+ DGVar->setConstant(true);
+
+ // SGV is global, but DGV is alias.
+ if (isa<GlobalAlias>(DGV)) {
+ // The only valid mappings are:
+ // - SGV is external declaration, which is effectively a no-op.
+ // - SGV is weak, when we just need to throw SGV out.
+ if (!SGV->isDeclaration() && !SGV->isWeakForLinker())
+ return Error(Err, "Global-Alias Collision on '" + SGV->getName() +
+ "': symbol multiple defined");
+ }
+
+ // Set calculated linkage
+ DGV->setLinkage(NewLinkage);
+
+ // Make sure to remember this mapping...
+ ValueMap[SGV] = ConstantExpr::getBitCast(DGV, SGV->getType());
+ }
+ return false;
+}
+
+static GlobalValue::LinkageTypes
+CalculateAliasLinkage(const GlobalValue *SGV, const GlobalValue *DGV) {
+ GlobalValue::LinkageTypes SL = SGV->getLinkage();
+ GlobalValue::LinkageTypes DL = DGV->getLinkage();
+ if (SL == GlobalValue::ExternalLinkage || DL == GlobalValue::ExternalLinkage)
+ return GlobalValue::ExternalLinkage;
+ else if (SL == GlobalValue::WeakAnyLinkage ||
+ DL == GlobalValue::WeakAnyLinkage)
+ return GlobalValue::WeakAnyLinkage;
+ else if (SL == GlobalValue::WeakODRLinkage ||
+ DL == GlobalValue::WeakODRLinkage)
+ return GlobalValue::WeakODRLinkage;
+ else if (SL == GlobalValue::InternalLinkage &&
+ DL == GlobalValue::InternalLinkage)
+ return GlobalValue::InternalLinkage;
+ else {
+ assert (SL == GlobalValue::PrivateLinkage &&
+ DL == GlobalValue::PrivateLinkage && "Unexpected linkage type");
+ return GlobalValue::PrivateLinkage;
+ }
+}
+
+// LinkAlias - Loop through the alias in the src module and link them into the
+// dest module. We're assuming, that all functions/global variables were already
+// linked in.
+static bool LinkAlias(Module *Dest, const Module *Src,
+ std::map<const Value*, Value*> &ValueMap,
+ std::string *Err) {
+ // Loop over all alias in the src module
+ for (Module::const_alias_iterator I = Src->alias_begin(),
+ E = Src->alias_end(); I != E; ++I) {
+ const GlobalAlias *SGA = I;
+ const GlobalValue *SAliasee = SGA->getAliasedGlobal();
+ GlobalAlias *NewGA = NULL;
+
+ // Globals were already linked, thus we can just query ValueMap for variant
+ // of SAliasee in Dest.
+ std::map<const Value*,Value*>::const_iterator VMI = ValueMap.find(SAliasee);
+ assert(VMI != ValueMap.end() && "Aliasee not linked");
+ GlobalValue* DAliasee = cast<GlobalValue>(VMI->second);
+ GlobalValue* DGV = NULL;
+
+ // Try to find something 'similar' to SGA in destination module.
+ if (!DGV && !SGA->hasLocalLinkage()) {
+ DGV = Dest->getNamedAlias(SGA->getName());
+
+ // If types don't agree due to opaque types, try to resolve them.
+ if (DGV && DGV->getType() != SGA->getType())
+ RecursiveResolveTypes(SGA->getType(), DGV->getType());
+ }
+
+ if (!DGV && !SGA->hasLocalLinkage()) {
+ DGV = Dest->getGlobalVariable(SGA->getName());
+
+ // If types don't agree due to opaque types, try to resolve them.
+ if (DGV && DGV->getType() != SGA->getType())
+ RecursiveResolveTypes(SGA->getType(), DGV->getType());
+ }
+
+ if (!DGV && !SGA->hasLocalLinkage()) {
+ DGV = Dest->getFunction(SGA->getName());
+
+ // If types don't agree due to opaque types, try to resolve them.
+ if (DGV && DGV->getType() != SGA->getType())
+ RecursiveResolveTypes(SGA->getType(), DGV->getType());
+ }
+
+ // No linking to be performed on internal stuff.
+ if (DGV && DGV->hasLocalLinkage())
+ DGV = NULL;
+
+ if (GlobalAlias *DGA = dyn_cast_or_null<GlobalAlias>(DGV)) {
+ // Types are known to be the same, check whether aliasees equal. As
+ // globals are already linked we just need query ValueMap to find the
+ // mapping.
+ if (DAliasee == DGA->getAliasedGlobal()) {
+ // This is just two copies of the same alias. Propagate linkage, if
+ // necessary.
+ DGA->setLinkage(CalculateAliasLinkage(SGA, DGA));
+
+ NewGA = DGA;
+ // Proceed to 'common' steps
+ } else
+ return Error(Err, "Alias Collision on '" + SGA->getName()+
+ "': aliases have different aliasees");
+ } else if (GlobalVariable *DGVar = dyn_cast_or_null<GlobalVariable>(DGV)) {
+ // The only allowed way is to link alias with external declaration or weak
+ // symbol..
+ if (DGVar->isDeclaration() || DGVar->isWeakForLinker()) {
+ // But only if aliasee is global too...
+ if (!isa<GlobalVariable>(DAliasee))
+ return Error(Err, "Global-Alias Collision on '" + SGA->getName() +
+ "': aliasee is not global variable");
+
+ NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(),
+ SGA->getName(), DAliasee, Dest);
+ CopyGVAttributes(NewGA, SGA);
+
+ // Any uses of DGV need to change to NewGA, with cast, if needed.
+ if (SGA->getType() != DGVar->getType())
+ DGVar->replaceAllUsesWith(ConstantExpr::getBitCast(NewGA,
+ DGVar->getType()));
+ else
+ DGVar->replaceAllUsesWith(NewGA);
+
+ // DGVar will conflict with NewGA because they both had the same
+ // name. We must erase this now so ForceRenaming doesn't assert
+ // because DGV might not have internal linkage.
+ DGVar->eraseFromParent();
+
+ // Proceed to 'common' steps
+ } else
+ return Error(Err, "Global-Alias Collision on '" + SGA->getName() +
+ "': symbol multiple defined");
+ } else if (Function *DF = dyn_cast_or_null<Function>(DGV)) {
+ // The only allowed way is to link alias with external declaration or weak
+ // symbol...
+ if (DF->isDeclaration() || DF->isWeakForLinker()) {
+ // But only if aliasee is function too...
+ if (!isa<Function>(DAliasee))
+ return Error(Err, "Function-Alias Collision on '" + SGA->getName() +
+ "': aliasee is not function");
+
+ NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(),
+ SGA->getName(), DAliasee, Dest);
+ CopyGVAttributes(NewGA, SGA);
+
+ // Any uses of DF need to change to NewGA, with cast, if needed.
+ if (SGA->getType() != DF->getType())
+ DF->replaceAllUsesWith(ConstantExpr::getBitCast(NewGA,
+ DF->getType()));
+ else
+ DF->replaceAllUsesWith(NewGA);
+
+ // DF will conflict with NewGA because they both had the same
+ // name. We must erase this now so ForceRenaming doesn't assert
+ // because DF might not have internal linkage.
+ DF->eraseFromParent();
+
+ // Proceed to 'common' steps
+ } else
+ return Error(Err, "Function-Alias Collision on '" + SGA->getName() +
+ "': symbol multiple defined");
+ } else {
+ // No linking to be performed, simply create an identical version of the
+ // alias over in the dest module...
+
+ NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(),
+ SGA->getName(), DAliasee, Dest);
+ CopyGVAttributes(NewGA, SGA);
+
+ // Proceed to 'common' steps
+ }
+
+ assert(NewGA && "No alias was created in destination module!");
+
+ // If the symbol table renamed the alias, but it is an externally visible
+ // symbol, DGA must be an global value with internal linkage. Rename it.
+ if (NewGA->getName() != SGA->getName() &&
+ !NewGA->hasLocalLinkage())
+ ForceRenaming(NewGA, SGA->getName());
+
+ // Remember this mapping so uses in the source module get remapped
+ // later by RemapOperand.
+ ValueMap[SGA] = NewGA;
+ }
+
+ return false;
+}
+
+
+// LinkGlobalInits - Update the initializers in the Dest module now that all
+// globals that may be referenced are in Dest.
+static bool LinkGlobalInits(Module *Dest, const Module *Src,
+ std::map<const Value*, Value*> &ValueMap,
+ std::string *Err) {
+ // Loop over all of the globals in the src module, mapping them over as we go
+ for (Module::const_global_iterator I = Src->global_begin(),
+ E = Src->global_end(); I != E; ++I) {
+ const GlobalVariable *SGV = I;
+
+ if (SGV->hasInitializer()) { // Only process initialized GV's
+ // Figure out what the initializer looks like in the dest module...
+ Constant *SInit =
+ cast<Constant>(RemapOperand(SGV->getInitializer(), ValueMap));
+ // Grab destination global variable or alias.
+ GlobalValue *DGV = cast<GlobalValue>(ValueMap[SGV]->stripPointerCasts());
+
+ // If dest if global variable, check that initializers match.
+ if (GlobalVariable *DGVar = dyn_cast<GlobalVariable>(DGV)) {
+ if (DGVar->hasInitializer()) {
+ if (SGV->hasExternalLinkage()) {
+ if (DGVar->getInitializer() != SInit)
+ return Error(Err, "Global Variable Collision on '" +
+ SGV->getName() +
+ "': global variables have different initializers");
+ } else if (DGVar->isWeakForLinker()) {
+ // Nothing is required, mapped values will take the new global
+ // automatically.
+ } else if (SGV->isWeakForLinker()) {
+ // Nothing is required, mapped values will take the new global
+ // automatically.
+ } else if (DGVar->hasAppendingLinkage()) {
+ assert(0 && "Appending linkage unimplemented!");
+ } else {
+ assert(0 && "Unknown linkage!");
+ }
+ } else {
+ // Copy the initializer over now...
+ DGVar->setInitializer(SInit);
+ }
+ } else {
+ // Destination is alias, the only valid situation is when source is
+ // weak. Also, note, that we already checked linkage in LinkGlobals(),
+ // thus we assert here.
+ // FIXME: Should we weaken this assumption, 'dereference' alias and
+ // check for initializer of aliasee?
+ assert(SGV->isWeakForLinker());
+ }
+ }
+ }
+ return false;
+}
+
+// LinkFunctionProtos - Link the functions together between the two modules,
+// without doing function bodies... this just adds external function prototypes
+// to the Dest function...
+//
+static bool LinkFunctionProtos(Module *Dest, const Module *Src,
+ std::map<const Value*, Value*> &ValueMap,
+ std::string *Err) {
+ ValueSymbolTable &DestSymTab = Dest->getValueSymbolTable();
+
+ // Loop over all of the functions in the src module, mapping them over
+ for (Module::const_iterator I = Src->begin(), E = Src->end(); I != E; ++I) {
+ const Function *SF = I; // SrcFunction
+ GlobalValue *DGV = 0;
+
+ // Check to see if may have to link the function with the global, alias or
+ // function.
+ if (SF->hasName() && !SF->hasLocalLinkage())
+ DGV = cast_or_null<GlobalValue>(DestSymTab.lookup(SF->getNameStart(),
+ SF->getNameEnd()));
+
+ // If we found a global with the same name in the dest module, but it has
+ // internal linkage, we are really not doing any linkage here.
+ if (DGV && DGV->hasLocalLinkage())
+ DGV = 0;
+
+ // If types don't agree due to opaque types, try to resolve them.
+ if (DGV && DGV->getType() != SF->getType())
+ RecursiveResolveTypes(SF->getType(), DGV->getType());
+
+ GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
+ bool LinkFromSrc = false;
+ if (GetLinkageResult(DGV, SF, NewLinkage, LinkFromSrc, Err))
+ return true;
+
+ // If there is no linkage to be performed, just bring over SF without
+ // modifying it.
+ if (DGV == 0) {
+ // Function does not already exist, simply insert an function signature
+ // identical to SF into the dest module.
+ Function *NewDF = Function::Create(SF->getFunctionType(),
+ SF->getLinkage(),
+ SF->getName(), Dest);
+ CopyGVAttributes(NewDF, SF);
+
+ // If the LLVM runtime renamed the function, but it is an externally
+ // visible symbol, DF must be an existing function with internal linkage.
+ // Rename it.
+ if (!NewDF->hasLocalLinkage() && NewDF->getName() != SF->getName())
+ ForceRenaming(NewDF, SF->getName());
+
+ // ... and remember this mapping...
+ ValueMap[SF] = NewDF;
+ continue;
+ }
+
+ // If the visibilities of the symbols disagree and the destination is a
+ // prototype, take the visibility of its input.
+ if (DGV->isDeclaration())
+ DGV->setVisibility(SF->getVisibility());
+
+ if (LinkFromSrc) {
+ if (isa<GlobalAlias>(DGV))
+ return Error(Err, "Function-Alias Collision on '" + SF->getName() +
+ "': symbol multiple defined");
+
+ // We have a definition of the same name but different type in the
+ // source module. Copy the prototype to the destination and replace
+ // uses of the destination's prototype with the new prototype.
+ Function *NewDF = Function::Create(SF->getFunctionType(), NewLinkage,
+ SF->getName(), Dest);
+ CopyGVAttributes(NewDF, SF);
+
+ // Any uses of DF need to change to NewDF, with cast
+ DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF, DGV->getType()));
+
+ // DF will conflict with NewDF because they both had the same. We must
+ // erase this now so ForceRenaming doesn't assert because DF might
+ // not have internal linkage.
+ if (GlobalVariable *Var = dyn_cast<GlobalVariable>(DGV))
+ Var->eraseFromParent();
+ else
+ cast<Function>(DGV)->eraseFromParent();
+
+ // If the symbol table renamed the function, but it is an externally
+ // visible symbol, DF must be an existing function with internal
+ // linkage. Rename it.
+ if (NewDF->getName() != SF->getName() && !NewDF->hasLocalLinkage())
+ ForceRenaming(NewDF, SF->getName());
+
+ // Remember this mapping so uses in the source module get remapped
+ // later by RemapOperand.
+ ValueMap[SF] = NewDF;
+ continue;
+ }
+
+ // Not "link from source", keep the one in the DestModule and remap the
+ // input onto it.
+
+ if (isa<GlobalAlias>(DGV)) {
+ // The only valid mappings are:
+ // - SF is external declaration, which is effectively a no-op.
+ // - SF is weak, when we just need to throw SF out.
+ if (!SF->isDeclaration() && !SF->isWeakForLinker())
+ return Error(Err, "Function-Alias Collision on '" + SF->getName() +
+ "': symbol multiple defined");
+ }
+
+ // Set calculated linkage
+ DGV->setLinkage(NewLinkage);
+
+ // Make sure to remember this mapping.
+ ValueMap[SF] = ConstantExpr::getBitCast(DGV, SF->getType());
+ }
+ return false;
+}
+
+// LinkFunctionBody - Copy the source function over into the dest function and
+// fix up references to values. At this point we know that Dest is an external
+// function, and that Src is not.
+static bool LinkFunctionBody(Function *Dest, Function *Src,
+ std::map<const Value*, Value*> &ValueMap,
+ std::string *Err) {
+ assert(Src && Dest && Dest->isDeclaration() && !Src->isDeclaration());
+
+ // Go through and convert function arguments over, remembering the mapping.
+ Function::arg_iterator DI = Dest->arg_begin();
+ for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
+ I != E; ++I, ++DI) {
+ DI->setName(I->getName()); // Copy the name information over...
+
+ // Add a mapping to our local map
+ ValueMap[I] = DI;
+ }
+
+ // Splice the body of the source function into the dest function.
+ Dest->getBasicBlockList().splice(Dest->end(), Src->getBasicBlockList());
+
+ // At this point, all of the instructions and values of the function are now
+ // copied over. The only problem is that they are still referencing values in
+ // the Source function as operands. Loop through all of the operands of the
+ // functions and patch them up to point to the local versions...
+ //
+ for (Function::iterator BB = Dest->begin(), BE = Dest->end(); BB != BE; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
+ OI != OE; ++OI)
+ if (!isa<Instruction>(*OI) && !isa<BasicBlock>(*OI))
+ *OI = RemapOperand(*OI, ValueMap);
+
+ // There is no need to map the arguments anymore.
+ for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
+ I != E; ++I)
+ ValueMap.erase(I);
+
+ return false;
+}
+
+
+// LinkFunctionBodies - Link in the function bodies that are defined in the
+// source module into the DestModule. This consists basically of copying the
+// function over and fixing up references to values.
+static bool LinkFunctionBodies(Module *Dest, Module *Src,
+ std::map<const Value*, Value*> &ValueMap,
+ std::string *Err) {
+
+ // Loop over all of the functions in the src module, mapping them over as we
+ // go
+ for (Module::iterator SF = Src->begin(), E = Src->end(); SF != E; ++SF) {
+ if (!SF->isDeclaration()) { // No body if function is external
+ Function *DF = dyn_cast<Function>(ValueMap[SF]); // Destination function
+
+ // DF not external SF external?
+ if (DF && DF->isDeclaration())
+ // Only provide the function body if there isn't one already.
+ if (LinkFunctionBody(DF, SF, ValueMap, Err))
+ return true;
+ }
+ }
+ return false;
+}
+
+// LinkAppendingVars - If there were any appending global variables, link them
+// together now. Return true on error.
+static bool LinkAppendingVars(Module *M,
+ std::multimap<std::string, GlobalVariable *> &AppendingVars,
+ std::string *ErrorMsg) {
+ if (AppendingVars.empty()) return false; // Nothing to do.
+
+ // Loop over the multimap of appending vars, processing any variables with the
+ // same name, forming a new appending global variable with both of the
+ // initializers merged together, then rewrite references to the old variables
+ // and delete them.
+ std::vector<Constant*> Inits;
+ while (AppendingVars.size() > 1) {
+ // Get the first two elements in the map...
+ std::multimap<std::string,
+ GlobalVariable*>::iterator Second = AppendingVars.begin(), First=Second++;
+
+ // If the first two elements are for different names, there is no pair...
+ // Otherwise there is a pair, so link them together...
+ if (First->first == Second->first) {
+ GlobalVariable *G1 = First->second, *G2 = Second->second;
+ const ArrayType *T1 = cast<ArrayType>(G1->getType()->getElementType());
+ const ArrayType *T2 = cast<ArrayType>(G2->getType()->getElementType());
+
+ // Check to see that they two arrays agree on type...
+ if (T1->getElementType() != T2->getElementType())
+ return Error(ErrorMsg,
+ "Appending variables with different element types need to be linked!");
+ if (G1->isConstant() != G2->isConstant())
+ return Error(ErrorMsg,
+ "Appending variables linked with different const'ness!");
+
+ if (G1->getAlignment() != G2->getAlignment())
+ return Error(ErrorMsg,
+ "Appending variables with different alignment need to be linked!");
+
+ if (G1->getVisibility() != G2->getVisibility())
+ return Error(ErrorMsg,
+ "Appending variables with different visibility need to be linked!");
+
+ if (G1->getSection() != G2->getSection())
+ return Error(ErrorMsg,
+ "Appending variables with different section name need to be linked!");
+
+ unsigned NewSize = T1->getNumElements() + T2->getNumElements();
+ ArrayType *NewType = ArrayType::get(T1->getElementType(), NewSize);
+
+ G1->setName(""); // Clear G1's name in case of a conflict!
+
+ // Create the new global variable...
+ GlobalVariable *NG =
+ new GlobalVariable(NewType, G1->isConstant(), G1->getLinkage(),
+ /*init*/0, First->first, M, G1->isThreadLocal(),
+ G1->getType()->getAddressSpace());
+
+ // Propagate alignment, visibility and section info.
+ CopyGVAttributes(NG, G1);
+
+ // Merge the initializer...
+ Inits.reserve(NewSize);
+ if (ConstantArray *I = dyn_cast<ConstantArray>(G1->getInitializer())) {
+ for (unsigned i = 0, e = T1->getNumElements(); i != e; ++i)
+ Inits.push_back(I->getOperand(i));
+ } else {
+ assert(isa<ConstantAggregateZero>(G1->getInitializer()));
+ Constant *CV = Constant::getNullValue(T1->getElementType());
+ for (unsigned i = 0, e = T1->getNumElements(); i != e; ++i)
+ Inits.push_back(CV);
+ }
+ if (ConstantArray *I = dyn_cast<ConstantArray>(G2->getInitializer())) {
+ for (unsigned i = 0, e = T2->getNumElements(); i != e; ++i)
+ Inits.push_back(I->getOperand(i));
+ } else {
+ assert(isa<ConstantAggregateZero>(G2->getInitializer()));
+ Constant *CV = Constant::getNullValue(T2->getElementType());
+ for (unsigned i = 0, e = T2->getNumElements(); i != e; ++i)
+ Inits.push_back(CV);
+ }
+ NG->setInitializer(ConstantArray::get(NewType, Inits));
+ Inits.clear();
+
+ // Replace any uses of the two global variables with uses of the new
+ // global...
+
+ // FIXME: This should rewrite simple/straight-forward uses such as
+ // getelementptr instructions to not use the Cast!
+ G1->replaceAllUsesWith(ConstantExpr::getBitCast(NG, G1->getType()));
+ G2->replaceAllUsesWith(ConstantExpr::getBitCast(NG, G2->getType()));
+
+ // Remove the two globals from the module now...
+ M->getGlobalList().erase(G1);
+ M->getGlobalList().erase(G2);
+
+ // Put the new global into the AppendingVars map so that we can handle
+ // linking of more than two vars...
+ Second->second = NG;
+ }
+ AppendingVars.erase(First);
+ }
+
+ return false;
+}
+
+static bool ResolveAliases(Module *Dest) {
+ for (Module::alias_iterator I = Dest->alias_begin(), E = Dest->alias_end();
+ I != E; ++I)
+ if (const GlobalValue *GV = I->resolveAliasedGlobal())
+ if (GV != I && !GV->isDeclaration())
+ I->replaceAllUsesWith(const_cast<GlobalValue*>(GV));
+
+ return false;
+}
+
+// LinkModules - This function links two modules together, with the resulting
+// left module modified to be the composite of the two input modules. If an
+// error occurs, true is returned and ErrorMsg (if not null) is set to indicate
+// the problem. Upon failure, the Dest module could be in a modified state, and
+// shouldn't be relied on to be consistent.
+bool
+Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) {
+ assert(Dest != 0 && "Invalid Destination module");
+ assert(Src != 0 && "Invalid Source Module");
+
+ if (Dest->getDataLayout().empty()) {
+ if (!Src->getDataLayout().empty()) {
+ Dest->setDataLayout(Src->getDataLayout());
+ } else {
+ std::string DataLayout;
+
+ if (Dest->getEndianness() == Module::AnyEndianness) {
+ if (Src->getEndianness() == Module::BigEndian)
+ DataLayout.append("E");
+ else if (Src->getEndianness() == Module::LittleEndian)
+ DataLayout.append("e");
+ }
+
+ if (Dest->getPointerSize() == Module::AnyPointerSize) {
+ if (Src->getPointerSize() == Module::Pointer64)
+ DataLayout.append(DataLayout.length() == 0 ? "p:64:64" : "-p:64:64");
+ else if (Src->getPointerSize() == Module::Pointer32)
+ DataLayout.append(DataLayout.length() == 0 ? "p:32:32" : "-p:32:32");
+ }
+ Dest->setDataLayout(DataLayout);
+ }
+ }
+
+ // Copy the target triple from the source to dest if the dest's is empty.
+ if (Dest->getTargetTriple().empty() && !Src->getTargetTriple().empty())
+ Dest->setTargetTriple(Src->getTargetTriple());
+
+ if (!Src->getDataLayout().empty() && !Dest->getDataLayout().empty() &&
+ Src->getDataLayout() != Dest->getDataLayout())
+ cerr << "WARNING: Linking two modules of different data layouts!\n";
+ if (!Src->getTargetTriple().empty() &&
+ Dest->getTargetTriple() != Src->getTargetTriple())
+ cerr << "WARNING: Linking two modules of different target triples!\n";
+
+ // Append the module inline asm string.
+ if (!Src->getModuleInlineAsm().empty()) {
+ if (Dest->getModuleInlineAsm().empty())
+ Dest->setModuleInlineAsm(Src->getModuleInlineAsm());
+ else
+ Dest->setModuleInlineAsm(Dest->getModuleInlineAsm()+"\n"+
+ Src->getModuleInlineAsm());
+ }
+
+ // Update the destination module's dependent libraries list with the libraries
+ // from the source module. There's no opportunity for duplicates here as the
+ // Module ensures that duplicate insertions are discarded.
+ for (Module::lib_iterator SI = Src->lib_begin(), SE = Src->lib_end();
+ SI != SE; ++SI)
+ Dest->addLibrary(*SI);
+
+ // LinkTypes - Go through the symbol table of the Src module and see if any
+ // types are named in the src module that are not named in the Dst module.
+ // Make sure there are no type name conflicts.
+ if (LinkTypes(Dest, Src, ErrorMsg))
+ return true;
+
+ // ValueMap - Mapping of values from what they used to be in Src, to what they
+ // are now in Dest.
+ std::map<const Value*, Value*> ValueMap;
+
+ // AppendingVars - Keep track of global variables in the destination module
+ // with appending linkage. After the module is linked together, they are
+ // appended and the module is rewritten.
+ std::multimap<std::string, GlobalVariable *> AppendingVars;
+ for (Module::global_iterator I = Dest->global_begin(), E = Dest->global_end();
+ I != E; ++I) {
+ // Add all of the appending globals already in the Dest module to
+ // AppendingVars.
+ if (I->hasAppendingLinkage())
+ AppendingVars.insert(std::make_pair(I->getName(), I));
+ }
+
+ // Insert all of the globals in src into the Dest module... without linking
+ // initializers (which could refer to functions not yet mapped over).
+ if (LinkGlobals(Dest, Src, ValueMap, AppendingVars, ErrorMsg))
+ return true;
+
+ // Link the functions together between the two modules, without doing function
+ // bodies... this just adds external function prototypes to the Dest
+ // function... We do this so that when we begin processing function bodies,
+ // all of the global values that may be referenced are available in our
+ // ValueMap.
+ if (LinkFunctionProtos(Dest, Src, ValueMap, ErrorMsg))
+ return true;
+
+ // If there were any alias, link them now. We really need to do this now,
+ // because all of the aliases that may be referenced need to be available in
+ // ValueMap
+ if (LinkAlias(Dest, Src, ValueMap, ErrorMsg)) return true;
+
+ // Update the initializers in the Dest module now that all globals that may
+ // be referenced are in Dest.
+ if (LinkGlobalInits(Dest, Src, ValueMap, ErrorMsg)) return true;
+
+ // Link in the function bodies that are defined in the source module into the
+ // DestModule. This consists basically of copying the function over and
+ // fixing up references to values.
+ if (LinkFunctionBodies(Dest, Src, ValueMap, ErrorMsg)) return true;
+
+ // If there were any appending global variables, link them together now.
+ if (LinkAppendingVars(Dest, AppendingVars, ErrorMsg)) return true;
+
+ // Resolve all uses of aliases with aliasees
+ if (ResolveAliases(Dest)) return true;
+
+ // If the source library's module id is in the dependent library list of the
+ // destination library, remove it since that module is now linked in.
+ sys::Path modId;
+ modId.set(Src->getModuleIdentifier());
+ if (!modId.isEmpty())
+ Dest->removeLibrary(modId.getBasename());
+
+ return false;
+}
+
+// vim: sw=2
diff --git a/lib/Linker/Linker.cpp b/lib/Linker/Linker.cpp
new file mode 100644
index 0000000..d673772
--- /dev/null
+++ b/lib/Linker/Linker.cpp
@@ -0,0 +1,178 @@
+//===- lib/Linker/Linker.cpp - Basic Linker functionality ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains basic Linker functionality that all usages will need.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Linker.h"
+#include "llvm/Module.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Streams.h"
+using namespace llvm;
+
+Linker::Linker(const std::string& progname, const std::string& modname,
+ unsigned flags)
+ : Composite(0)
+ , LibPaths()
+ , Flags(flags)
+ , Error()
+ , ProgramName(progname)
+{
+ Composite = new Module(modname);
+}
+
+Linker::Linker(const std::string& progname, Module* aModule, unsigned flags)
+ : Composite(aModule)
+ , LibPaths()
+ , Flags(flags)
+ , Error()
+ , ProgramName(progname)
+{
+}
+
+Linker::~Linker() {
+ delete Composite;
+}
+
+bool
+Linker::error(const std::string& message) {
+ Error = message;
+ if (!(Flags&QuietErrors))
+ cerr << ProgramName << ": error: " << message << "\n";
+ return true;
+}
+
+bool
+Linker::warning(const std::string& message) {
+ Error = message;
+ if (!(Flags&QuietWarnings))
+ cerr << ProgramName << ": warning: " << message << "\n";
+ return false;
+}
+
+void
+Linker::verbose(const std::string& message) {
+ if (Flags&Verbose)
+ cerr << " " << message << "\n";
+}
+
+void
+Linker::addPath(const sys::Path& path) {
+ LibPaths.push_back(path);
+}
+
+void
+Linker::addPaths(const std::vector<std::string>& paths) {
+ for (unsigned i = 0; i != paths.size(); ++i) {
+ sys::Path aPath;
+ aPath.set(paths[i]);
+ LibPaths.push_back(aPath);
+ }
+}
+
+void
+Linker::addSystemPaths() {
+ sys::Path::GetBitcodeLibraryPaths(LibPaths);
+ LibPaths.insert(LibPaths.begin(),sys::Path("./"));
+}
+
+Module*
+Linker::releaseModule() {
+ Module* result = Composite;
+ LibPaths.clear();
+ Error.clear();
+ Composite = 0;
+ Flags = 0;
+ return result;
+}
+
+// LoadObject - Read in and parse the bitcode file named by FN and return the
+// module it contains (wrapped in an auto_ptr), or auto_ptr<Module>() and set
+// Error if an error occurs.
+std::auto_ptr<Module>
+Linker::LoadObject(const sys::Path &FN) {
+ std::string ParseErrorMessage;
+ Module *Result = 0;
+
+ const std::string &FNS = FN.toString();
+ std::auto_ptr<MemoryBuffer> Buffer(MemoryBuffer::getFileOrSTDIN(FNS.c_str()));
+ if (Buffer.get())
+ Result = ParseBitcodeFile(Buffer.get(), &ParseErrorMessage);
+ else
+ ParseErrorMessage = "Error reading file '" + FNS + "'";
+
+ if (Result)
+ return std::auto_ptr<Module>(Result);
+ Error = "Bitcode file '" + FN.toString() + "' could not be loaded";
+ if (ParseErrorMessage.size())
+ Error += ": " + ParseErrorMessage;
+ return std::auto_ptr<Module>();
+}
+
+// IsLibrary - Determine if "Name" is a library in "Directory". Return
+// a non-empty sys::Path if its found, an empty one otherwise.
+static inline sys::Path IsLibrary(const std::string& Name,
+ const sys::Path& Directory) {
+
+ sys::Path FullPath(Directory);
+
+ // Try the libX.a form
+ FullPath.appendComponent("lib" + Name);
+ FullPath.appendSuffix("a");
+ if (FullPath.isArchive())
+ return FullPath;
+
+ // Try the libX.bca form
+ FullPath.eraseSuffix();
+ FullPath.appendSuffix("bca");
+ if (FullPath.isArchive())
+ return FullPath;
+
+ // Try the libX.so (or .dylib) form
+ FullPath.eraseSuffix();
+ FullPath.appendSuffix(&(LTDL_SHLIB_EXT[1]));
+ if (FullPath.isDynamicLibrary()) // Native shared library?
+ return FullPath;
+ if (FullPath.isBitcodeFile()) // .so file containing bitcode?
+ return FullPath;
+
+ // Not found .. fall through
+
+ // Indicate that the library was not found in the directory.
+ FullPath.clear();
+ return FullPath;
+}
+
+/// FindLib - Try to convert Filename into the name of a file that we can open,
+/// if it does not already name a file we can open, by first trying to open
+/// Filename, then libFilename.[suffix] for each of a set of several common
+/// library suffixes, in each of the directories in LibPaths. Returns an empty
+/// Path if no matching file can be found.
+///
+sys::Path
+Linker::FindLib(const std::string &Filename) {
+ // Determine if the pathname can be found as it stands.
+ sys::Path FilePath(Filename);
+ if (FilePath.canRead() &&
+ (FilePath.isArchive() || FilePath.isDynamicLibrary()))
+ return FilePath;
+
+ // Iterate over the directories in Paths to see if we can find the library
+ // there.
+ for (unsigned Index = 0; Index != LibPaths.size(); ++Index) {
+ sys::Path Directory(LibPaths[Index]);
+ sys::Path FullPath = IsLibrary(Filename,Directory);
+ if (!FullPath.isEmpty())
+ return FullPath;
+ }
+ return sys::Path();
+}
diff --git a/lib/Linker/Makefile b/lib/Linker/Makefile
new file mode 100644
index 0000000..19e646b
--- /dev/null
+++ b/lib/Linker/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Linker/Makefile ---------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMLinker
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Makefile b/lib/Makefile
new file mode 100644
index 0000000..8dd67d9
--- /dev/null
+++ b/lib/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Makefile ----------------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ..
+
+PARALLEL_DIRS = VMCore AsmParser Bitcode Archive Analysis Transforms CodeGen \
+ Target ExecutionEngine Debugger Linker CompilerDriver
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
new file mode 100644
index 0000000..3b03c54
--- /dev/null
+++ b/lib/Support/APFloat.cpp
@@ -0,0 +1,2950 @@
+//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a class to represent arbitrary precision floating
+// point values and provide a variety of arithmetic operations on them.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/MathExtras.h"
+#include <cstring>
+
+using namespace llvm;
+
+#define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
+
+/* Assumed in hexadecimal significand parsing, and conversion to
+ hexadecimal strings. */
+#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
+COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
+
+namespace llvm {
+
+ /* Represents floating point arithmetic semantics. */
+ struct fltSemantics {
+ /* The largest E such that 2^E is representable; this matches the
+ definition of IEEE 754. */
+ exponent_t maxExponent;
+
+ /* The smallest E such that 2^E is a normalized number; this
+ matches the definition of IEEE 754. */
+ exponent_t minExponent;
+
+ /* Number of bits in the significand. This includes the integer
+ bit. */
+ unsigned int precision;
+
+ /* True if arithmetic is supported. */
+ unsigned int arithmeticOK;
+ };
+
+ const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
+ const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
+ const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
+ const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
+ const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
+
+ // The PowerPC format consists of two doubles. It does not map cleanly
+ // onto the usual format above. For now only storage of constants of
+ // this type is supported, no arithmetic.
+ const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
+
+ /* A tight upper bound on number of parts required to hold the value
+ pow(5, power) is
+
+ power * 815 / (351 * integerPartWidth) + 1
+
+ However, whilst the result may require only this many parts,
+ because we are multiplying two values to get it, the
+ multiplication may require an extra part with the excess part
+ being zero (consider the trivial case of 1 * 1, tcFullMultiply
+ requires two parts to hold the single-part result). So we add an
+ extra one to guarantee enough space whilst multiplying. */
+ const unsigned int maxExponent = 16383;
+ const unsigned int maxPrecision = 113;
+ const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
+ const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
+ / (351 * integerPartWidth));
+}
+
+/* A bunch of private, handy routines. */
+
+static inline unsigned int
+partCountForBits(unsigned int bits)
+{
+ return ((bits) + integerPartWidth - 1) / integerPartWidth;
+}
+
+/* Returns 0U-9U. Return values >= 10U are not digits. */
+static inline unsigned int
+decDigitValue(unsigned int c)
+{
+ return c - '0';
+}
+
+static unsigned int
+hexDigitValue(unsigned int c)
+{
+ unsigned int r;
+
+ r = c - '0';
+ if(r <= 9)
+ return r;
+
+ r = c - 'A';
+ if(r <= 5)
+ return r + 10;
+
+ r = c - 'a';
+ if(r <= 5)
+ return r + 10;
+
+ return -1U;
+}
+
+static inline void
+assertArithmeticOK(const llvm::fltSemantics &semantics) {
+ assert(semantics.arithmeticOK
+ && "Compile-time arithmetic does not support these semantics");
+}
+
+/* Return the value of a decimal exponent of the form
+ [+-]ddddddd.
+
+ If the exponent overflows, returns a large exponent with the
+ appropriate sign. */
+static int
+readExponent(const char *p)
+{
+ bool isNegative;
+ unsigned int absExponent;
+ const unsigned int overlargeExponent = 24000; /* FIXME. */
+
+ isNegative = (*p == '-');
+ if (*p == '-' || *p == '+')
+ p++;
+
+ absExponent = decDigitValue(*p++);
+ assert (absExponent < 10U);
+
+ for (;;) {
+ unsigned int value;
+
+ value = decDigitValue(*p);
+ if (value >= 10U)
+ break;
+
+ p++;
+ value += absExponent * 10;
+ if (absExponent >= overlargeExponent) {
+ absExponent = overlargeExponent;
+ break;
+ }
+ absExponent = value;
+ }
+
+ if (isNegative)
+ return -(int) absExponent;
+ else
+ return (int) absExponent;
+}
+
+/* This is ugly and needs cleaning up, but I don't immediately see
+ how whilst remaining safe. */
+static int
+totalExponent(const char *p, int exponentAdjustment)
+{
+ int unsignedExponent;
+ bool negative, overflow;
+ int exponent;
+
+ /* Move past the exponent letter and sign to the digits. */
+ p++;
+ negative = *p == '-';
+ if(*p == '-' || *p == '+')
+ p++;
+
+ unsignedExponent = 0;
+ overflow = false;
+ for(;;) {
+ unsigned int value;
+
+ value = decDigitValue(*p);
+ if(value >= 10U)
+ break;
+
+ p++;
+ unsignedExponent = unsignedExponent * 10 + value;
+ if(unsignedExponent > 65535)
+ overflow = true;
+ }
+
+ if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
+ overflow = true;
+
+ if(!overflow) {
+ exponent = unsignedExponent;
+ if(negative)
+ exponent = -exponent;
+ exponent += exponentAdjustment;
+ if(exponent > 65535 || exponent < -65536)
+ overflow = true;
+ }
+
+ if(overflow)
+ exponent = negative ? -65536: 65535;
+
+ return exponent;
+}
+
+static const char *
+skipLeadingZeroesAndAnyDot(const char *p, const char **dot)
+{
+ *dot = 0;
+ while(*p == '0')
+ p++;
+
+ if(*p == '.') {
+ *dot = p++;
+ while(*p == '0')
+ p++;
+ }
+
+ return p;
+}
+
+/* Given a normal decimal floating point number of the form
+
+ dddd.dddd[eE][+-]ddd
+
+ where the decimal point and exponent are optional, fill out the
+ structure D. Exponent is appropriate if the significand is
+ treated as an integer, and normalizedExponent if the significand
+ is taken to have the decimal point after a single leading
+ non-zero digit.
+
+ If the value is zero, V->firstSigDigit points to a non-digit, and
+ the return exponent is zero.
+*/
+struct decimalInfo {
+ const char *firstSigDigit;
+ const char *lastSigDigit;
+ int exponent;
+ int normalizedExponent;
+};
+
+static void
+interpretDecimal(const char *p, decimalInfo *D)
+{
+ const char *dot;
+
+ p = skipLeadingZeroesAndAnyDot (p, &dot);
+
+ D->firstSigDigit = p;
+ D->exponent = 0;
+ D->normalizedExponent = 0;
+
+ for (;;) {
+ if (*p == '.') {
+ assert(dot == 0);
+ dot = p++;
+ }
+ if (decDigitValue(*p) >= 10U)
+ break;
+ p++;
+ }
+
+ /* If number is all zerooes accept any exponent. */
+ if (p != D->firstSigDigit) {
+ if (*p == 'e' || *p == 'E')
+ D->exponent = readExponent(p + 1);
+
+ /* Implied decimal point? */
+ if (!dot)
+ dot = p;
+
+ /* Drop insignificant trailing zeroes. */
+ do
+ do
+ p--;
+ while (*p == '0');
+ while (*p == '.');
+
+ /* Adjust the exponents for any decimal point. */
+ D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
+ D->normalizedExponent = (D->exponent +
+ static_cast<exponent_t>((p - D->firstSigDigit)
+ - (dot > D->firstSigDigit && dot < p)));
+ }
+
+ D->lastSigDigit = p;
+}
+
+/* Return the trailing fraction of a hexadecimal number.
+ DIGITVALUE is the first hex digit of the fraction, P points to
+ the next digit. */
+static lostFraction
+trailingHexadecimalFraction(const char *p, unsigned int digitValue)
+{
+ unsigned int hexDigit;
+
+ /* If the first trailing digit isn't 0 or 8 we can work out the
+ fraction immediately. */
+ if(digitValue > 8)
+ return lfMoreThanHalf;
+ else if(digitValue < 8 && digitValue > 0)
+ return lfLessThanHalf;
+
+ /* Otherwise we need to find the first non-zero digit. */
+ while(*p == '0')
+ p++;
+
+ hexDigit = hexDigitValue(*p);
+
+ /* If we ran off the end it is exactly zero or one-half, otherwise
+ a little more. */
+ if(hexDigit == -1U)
+ return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
+ else
+ return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
+}
+
+/* Return the fraction lost were a bignum truncated losing the least
+ significant BITS bits. */
+static lostFraction
+lostFractionThroughTruncation(const integerPart *parts,
+ unsigned int partCount,
+ unsigned int bits)
+{
+ unsigned int lsb;
+
+ lsb = APInt::tcLSB(parts, partCount);
+
+ /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
+ if(bits <= lsb)
+ return lfExactlyZero;
+ if(bits == lsb + 1)
+ return lfExactlyHalf;
+ if(bits <= partCount * integerPartWidth
+ && APInt::tcExtractBit(parts, bits - 1))
+ return lfMoreThanHalf;
+
+ return lfLessThanHalf;
+}
+
+/* Shift DST right BITS bits noting lost fraction. */
+static lostFraction
+shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
+{
+ lostFraction lost_fraction;
+
+ lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
+
+ APInt::tcShiftRight(dst, parts, bits);
+
+ return lost_fraction;
+}
+
+/* Combine the effect of two lost fractions. */
+static lostFraction
+combineLostFractions(lostFraction moreSignificant,
+ lostFraction lessSignificant)
+{
+ if(lessSignificant != lfExactlyZero) {
+ if(moreSignificant == lfExactlyZero)
+ moreSignificant = lfLessThanHalf;
+ else if(moreSignificant == lfExactlyHalf)
+ moreSignificant = lfMoreThanHalf;
+ }
+
+ return moreSignificant;
+}
+
+/* The error from the true value, in half-ulps, on multiplying two
+ floating point numbers, which differ from the value they
+ approximate by at most HUE1 and HUE2 half-ulps, is strictly less
+ than the returned value.
+
+ See "How to Read Floating Point Numbers Accurately" by William D
+ Clinger. */
+static unsigned int
+HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
+{
+ assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
+
+ if (HUerr1 + HUerr2 == 0)
+ return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
+ else
+ return inexactMultiply + 2 * (HUerr1 + HUerr2);
+}
+
+/* The number of ulps from the boundary (zero, or half if ISNEAREST)
+ when the least significant BITS are truncated. BITS cannot be
+ zero. */
+static integerPart
+ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
+{
+ unsigned int count, partBits;
+ integerPart part, boundary;
+
+ assert (bits != 0);
+
+ bits--;
+ count = bits / integerPartWidth;
+ partBits = bits % integerPartWidth + 1;
+
+ part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
+
+ if (isNearest)
+ boundary = (integerPart) 1 << (partBits - 1);
+ else
+ boundary = 0;
+
+ if (count == 0) {
+ if (part - boundary <= boundary - part)
+ return part - boundary;
+ else
+ return boundary - part;
+ }
+
+ if (part == boundary) {
+ while (--count)
+ if (parts[count])
+ return ~(integerPart) 0; /* A lot. */
+
+ return parts[0];
+ } else if (part == boundary - 1) {
+ while (--count)
+ if (~parts[count])
+ return ~(integerPart) 0; /* A lot. */
+
+ return -parts[0];
+ }
+
+ return ~(integerPart) 0; /* A lot. */
+}
+
+/* Place pow(5, power) in DST, and return the number of parts used.
+ DST must be at least one part larger than size of the answer. */
+static unsigned int
+powerOf5(integerPart *dst, unsigned int power)
+{
+ static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
+ 15625, 78125 };
+ integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
+ pow5s[0] = 78125 * 5;
+
+ unsigned int partsCount[16] = { 1 };
+ integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
+ unsigned int result;
+ assert(power <= maxExponent);
+
+ p1 = dst;
+ p2 = scratch;
+
+ *p1 = firstEightPowers[power & 7];
+ power >>= 3;
+
+ result = 1;
+ pow5 = pow5s;
+
+ for (unsigned int n = 0; power; power >>= 1, n++) {
+ unsigned int pc;
+
+ pc = partsCount[n];
+
+ /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
+ if (pc == 0) {
+ pc = partsCount[n - 1];
+ APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
+ pc *= 2;
+ if (pow5[pc - 1] == 0)
+ pc--;
+ partsCount[n] = pc;
+ }
+
+ if (power & 1) {
+ integerPart *tmp;
+
+ APInt::tcFullMultiply(p2, p1, pow5, result, pc);
+ result += pc;
+ if (p2[result - 1] == 0)
+ result--;
+
+ /* Now result is in p1 with partsCount parts and p2 is scratch
+ space. */
+ tmp = p1, p1 = p2, p2 = tmp;
+ }
+
+ pow5 += pc;
+ }
+
+ if (p1 != dst)
+ APInt::tcAssign(dst, p1, result);
+
+ return result;
+}
+
+/* Zero at the end to avoid modular arithmetic when adding one; used
+ when rounding up during hexadecimal output. */
+static const char hexDigitsLower[] = "0123456789abcdef0";
+static const char hexDigitsUpper[] = "0123456789ABCDEF0";
+static const char infinityL[] = "infinity";
+static const char infinityU[] = "INFINITY";
+static const char NaNL[] = "nan";
+static const char NaNU[] = "NAN";
+
+/* Write out an integerPart in hexadecimal, starting with the most
+ significant nibble. Write out exactly COUNT hexdigits, return
+ COUNT. */
+static unsigned int
+partAsHex (char *dst, integerPart part, unsigned int count,
+ const char *hexDigitChars)
+{
+ unsigned int result = count;
+
+ assert (count != 0 && count <= integerPartWidth / 4);
+
+ part >>= (integerPartWidth - 4 * count);
+ while (count--) {
+ dst[count] = hexDigitChars[part & 0xf];
+ part >>= 4;
+ }
+
+ return result;
+}
+
+/* Write out an unsigned decimal integer. */
+static char *
+writeUnsignedDecimal (char *dst, unsigned int n)
+{
+ char buff[40], *p;
+
+ p = buff;
+ do
+ *p++ = '0' + n % 10;
+ while (n /= 10);
+
+ do
+ *dst++ = *--p;
+ while (p != buff);
+
+ return dst;
+}
+
+/* Write out a signed decimal integer. */
+static char *
+writeSignedDecimal (char *dst, int value)
+{
+ if (value < 0) {
+ *dst++ = '-';
+ dst = writeUnsignedDecimal(dst, -(unsigned) value);
+ } else
+ dst = writeUnsignedDecimal(dst, value);
+
+ return dst;
+}
+
+/* Constructors. */
+void
+APFloat::initialize(const fltSemantics *ourSemantics)
+{
+ unsigned int count;
+
+ semantics = ourSemantics;
+ count = partCount();
+ if(count > 1)
+ significand.parts = new integerPart[count];
+}
+
+void
+APFloat::freeSignificand()
+{
+ if(partCount() > 1)
+ delete [] significand.parts;
+}
+
+void
+APFloat::assign(const APFloat &rhs)
+{
+ assert(semantics == rhs.semantics);
+
+ sign = rhs.sign;
+ category = rhs.category;
+ exponent = rhs.exponent;
+ sign2 = rhs.sign2;
+ exponent2 = rhs.exponent2;
+ if(category == fcNormal || category == fcNaN)
+ copySignificand(rhs);
+}
+
+void
+APFloat::copySignificand(const APFloat &rhs)
+{
+ assert(category == fcNormal || category == fcNaN);
+ assert(rhs.partCount() >= partCount());
+
+ APInt::tcAssign(significandParts(), rhs.significandParts(),
+ partCount());
+}
+
+/* Make this number a NaN, with an arbitrary but deterministic value
+ for the significand. If double or longer, this is a signalling NaN,
+ which may not be ideal. If float, this is QNaN(0). */
+void
+APFloat::makeNaN(unsigned type)
+{
+ category = fcNaN;
+ // FIXME: Add double and long double support for QNaN(0).
+ if (semantics->precision == 24 && semantics->maxExponent == 127) {
+ type |= 0x7fc00000U;
+ type &= ~0x80000000U;
+ } else
+ type = ~0U;
+ APInt::tcSet(significandParts(), type, partCount());
+}
+
+APFloat &
+APFloat::operator=(const APFloat &rhs)
+{
+ if(this != &rhs) {
+ if(semantics != rhs.semantics) {
+ freeSignificand();
+ initialize(rhs.semantics);
+ }
+ assign(rhs);
+ }
+
+ return *this;
+}
+
+bool
+APFloat::bitwiseIsEqual(const APFloat &rhs) const {
+ if (this == &rhs)
+ return true;
+ if (semantics != rhs.semantics ||
+ category != rhs.category ||
+ sign != rhs.sign)
+ return false;
+ if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
+ sign2 != rhs.sign2)
+ return false;
+ if (category==fcZero || category==fcInfinity)
+ return true;
+ else if (category==fcNormal && exponent!=rhs.exponent)
+ return false;
+ else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
+ exponent2!=rhs.exponent2)
+ return false;
+ else {
+ int i= partCount();
+ const integerPart* p=significandParts();
+ const integerPart* q=rhs.significandParts();
+ for (; i>0; i--, p++, q++) {
+ if (*p != *q)
+ return false;
+ }
+ return true;
+ }
+}
+
+APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
+{
+ assertArithmeticOK(ourSemantics);
+ initialize(&ourSemantics);
+ sign = 0;
+ zeroSignificand();
+ exponent = ourSemantics.precision - 1;
+ significandParts()[0] = value;
+ normalize(rmNearestTiesToEven, lfExactlyZero);
+}
+
+APFloat::APFloat(const fltSemantics &ourSemantics,
+ fltCategory ourCategory, bool negative, unsigned type)
+{
+ assertArithmeticOK(ourSemantics);
+ initialize(&ourSemantics);
+ category = ourCategory;
+ sign = negative;
+ if (category == fcNormal)
+ category = fcZero;
+ else if (ourCategory == fcNaN)
+ makeNaN(type);
+}
+
+APFloat::APFloat(const fltSemantics &ourSemantics, const char *text)
+{
+ assertArithmeticOK(ourSemantics);
+ initialize(&ourSemantics);
+ convertFromString(text, rmNearestTiesToEven);
+}
+
+APFloat::APFloat(const APFloat &rhs)
+{
+ initialize(rhs.semantics);
+ assign(rhs);
+}
+
+APFloat::~APFloat()
+{
+ freeSignificand();
+}
+
+// Profile - This method 'profiles' an APFloat for use with FoldingSet.
+void APFloat::Profile(FoldingSetNodeID& ID) const {
+ ID.Add(bitcastToAPInt());
+}
+
+unsigned int
+APFloat::partCount() const
+{
+ return partCountForBits(semantics->precision + 1);
+}
+
+unsigned int
+APFloat::semanticsPrecision(const fltSemantics &semantics)
+{
+ return semantics.precision;
+}
+
+const integerPart *
+APFloat::significandParts() const
+{
+ return const_cast<APFloat *>(this)->significandParts();
+}
+
+integerPart *
+APFloat::significandParts()
+{
+ assert(category == fcNormal || category == fcNaN);
+
+ if(partCount() > 1)
+ return significand.parts;
+ else
+ return &significand.part;
+}
+
+void
+APFloat::zeroSignificand()
+{
+ category = fcNormal;
+ APInt::tcSet(significandParts(), 0, partCount());
+}
+
+/* Increment an fcNormal floating point number's significand. */
+void
+APFloat::incrementSignificand()
+{
+ integerPart carry;
+
+ carry = APInt::tcIncrement(significandParts(), partCount());
+
+ /* Our callers should never cause us to overflow. */
+ assert(carry == 0);
+}
+
+/* Add the significand of the RHS. Returns the carry flag. */
+integerPart
+APFloat::addSignificand(const APFloat &rhs)
+{
+ integerPart *parts;
+
+ parts = significandParts();
+
+ assert(semantics == rhs.semantics);
+ assert(exponent == rhs.exponent);
+
+ return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
+}
+
+/* Subtract the significand of the RHS with a borrow flag. Returns
+ the borrow flag. */
+integerPart
+APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
+{
+ integerPart *parts;
+
+ parts = significandParts();
+
+ assert(semantics == rhs.semantics);
+ assert(exponent == rhs.exponent);
+
+ return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
+ partCount());
+}
+
+/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
+ on to the full-precision result of the multiplication. Returns the
+ lost fraction. */
+lostFraction
+APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
+{
+ unsigned int omsb; // One, not zero, based MSB.
+ unsigned int partsCount, newPartsCount, precision;
+ integerPart *lhsSignificand;
+ integerPart scratch[4];
+ integerPart *fullSignificand;
+ lostFraction lost_fraction;
+ bool ignored;
+
+ assert(semantics == rhs.semantics);
+
+ precision = semantics->precision;
+ newPartsCount = partCountForBits(precision * 2);
+
+ if(newPartsCount > 4)
+ fullSignificand = new integerPart[newPartsCount];
+ else
+ fullSignificand = scratch;
+
+ lhsSignificand = significandParts();
+ partsCount = partCount();
+
+ APInt::tcFullMultiply(fullSignificand, lhsSignificand,
+ rhs.significandParts(), partsCount, partsCount);
+
+ lost_fraction = lfExactlyZero;
+ omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
+ exponent += rhs.exponent;
+
+ if(addend) {
+ Significand savedSignificand = significand;
+ const fltSemantics *savedSemantics = semantics;
+ fltSemantics extendedSemantics;
+ opStatus status;
+ unsigned int extendedPrecision;
+
+ /* Normalize our MSB. */
+ extendedPrecision = precision + precision - 1;
+ if(omsb != extendedPrecision)
+ {
+ APInt::tcShiftLeft(fullSignificand, newPartsCount,
+ extendedPrecision - omsb);
+ exponent -= extendedPrecision - omsb;
+ }
+
+ /* Create new semantics. */
+ extendedSemantics = *semantics;
+ extendedSemantics.precision = extendedPrecision;
+
+ if(newPartsCount == 1)
+ significand.part = fullSignificand[0];
+ else
+ significand.parts = fullSignificand;
+ semantics = &extendedSemantics;
+
+ APFloat extendedAddend(*addend);
+ status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
+ assert(status == opOK);
+ lost_fraction = addOrSubtractSignificand(extendedAddend, false);
+
+ /* Restore our state. */
+ if(newPartsCount == 1)
+ fullSignificand[0] = significand.part;
+ significand = savedSignificand;
+ semantics = savedSemantics;
+
+ omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
+ }
+
+ exponent -= (precision - 1);
+
+ if(omsb > precision) {
+ unsigned int bits, significantParts;
+ lostFraction lf;
+
+ bits = omsb - precision;
+ significantParts = partCountForBits(omsb);
+ lf = shiftRight(fullSignificand, significantParts, bits);
+ lost_fraction = combineLostFractions(lf, lost_fraction);
+ exponent += bits;
+ }
+
+ APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
+
+ if(newPartsCount > 4)
+ delete [] fullSignificand;
+
+ return lost_fraction;
+}
+
+/* Multiply the significands of LHS and RHS to DST. */
+lostFraction
+APFloat::divideSignificand(const APFloat &rhs)
+{
+ unsigned int bit, i, partsCount;
+ const integerPart *rhsSignificand;
+ integerPart *lhsSignificand, *dividend, *divisor;
+ integerPart scratch[4];
+ lostFraction lost_fraction;
+
+ assert(semantics == rhs.semantics);
+
+ lhsSignificand = significandParts();
+ rhsSignificand = rhs.significandParts();
+ partsCount = partCount();
+
+ if(partsCount > 2)
+ dividend = new integerPart[partsCount * 2];
+ else
+ dividend = scratch;
+
+ divisor = dividend + partsCount;
+
+ /* Copy the dividend and divisor as they will be modified in-place. */
+ for(i = 0; i < partsCount; i++) {
+ dividend[i] = lhsSignificand[i];
+ divisor[i] = rhsSignificand[i];
+ lhsSignificand[i] = 0;
+ }
+
+ exponent -= rhs.exponent;
+
+ unsigned int precision = semantics->precision;
+
+ /* Normalize the divisor. */
+ bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
+ if(bit) {
+ exponent += bit;
+ APInt::tcShiftLeft(divisor, partsCount, bit);
+ }
+
+ /* Normalize the dividend. */
+ bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
+ if(bit) {
+ exponent -= bit;
+ APInt::tcShiftLeft(dividend, partsCount, bit);
+ }
+
+ /* Ensure the dividend >= divisor initially for the loop below.
+ Incidentally, this means that the division loop below is
+ guaranteed to set the integer bit to one. */
+ if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
+ exponent--;
+ APInt::tcShiftLeft(dividend, partsCount, 1);
+ assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
+ }
+
+ /* Long division. */
+ for(bit = precision; bit; bit -= 1) {
+ if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
+ APInt::tcSubtract(dividend, divisor, 0, partsCount);
+ APInt::tcSetBit(lhsSignificand, bit - 1);
+ }
+
+ APInt::tcShiftLeft(dividend, partsCount, 1);
+ }
+
+ /* Figure out the lost fraction. */
+ int cmp = APInt::tcCompare(dividend, divisor, partsCount);
+
+ if(cmp > 0)
+ lost_fraction = lfMoreThanHalf;
+ else if(cmp == 0)
+ lost_fraction = lfExactlyHalf;
+ else if(APInt::tcIsZero(dividend, partsCount))
+ lost_fraction = lfExactlyZero;
+ else
+ lost_fraction = lfLessThanHalf;
+
+ if(partsCount > 2)
+ delete [] dividend;
+
+ return lost_fraction;
+}
+
+unsigned int
+APFloat::significandMSB() const
+{
+ return APInt::tcMSB(significandParts(), partCount());
+}
+
+unsigned int
+APFloat::significandLSB() const
+{
+ return APInt::tcLSB(significandParts(), partCount());
+}
+
+/* Note that a zero result is NOT normalized to fcZero. */
+lostFraction
+APFloat::shiftSignificandRight(unsigned int bits)
+{
+ /* Our exponent should not overflow. */
+ assert((exponent_t) (exponent + bits) >= exponent);
+
+ exponent += bits;
+
+ return shiftRight(significandParts(), partCount(), bits);
+}
+
+/* Shift the significand left BITS bits, subtract BITS from its exponent. */
+void
+APFloat::shiftSignificandLeft(unsigned int bits)
+{
+ assert(bits < semantics->precision);
+
+ if(bits) {
+ unsigned int partsCount = partCount();
+
+ APInt::tcShiftLeft(significandParts(), partsCount, bits);
+ exponent -= bits;
+
+ assert(!APInt::tcIsZero(significandParts(), partsCount));
+ }
+}
+
+APFloat::cmpResult
+APFloat::compareAbsoluteValue(const APFloat &rhs) const
+{
+ int compare;
+
+ assert(semantics == rhs.semantics);
+ assert(category == fcNormal);
+ assert(rhs.category == fcNormal);
+
+ compare = exponent - rhs.exponent;
+
+ /* If exponents are equal, do an unsigned bignum comparison of the
+ significands. */
+ if(compare == 0)
+ compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
+ partCount());
+
+ if(compare > 0)
+ return cmpGreaterThan;
+ else if(compare < 0)
+ return cmpLessThan;
+ else
+ return cmpEqual;
+}
+
+/* Handle overflow. Sign is preserved. We either become infinity or
+ the largest finite number. */
+APFloat::opStatus
+APFloat::handleOverflow(roundingMode rounding_mode)
+{
+ /* Infinity? */
+ if(rounding_mode == rmNearestTiesToEven
+ || rounding_mode == rmNearestTiesToAway
+ || (rounding_mode == rmTowardPositive && !sign)
+ || (rounding_mode == rmTowardNegative && sign))
+ {
+ category = fcInfinity;
+ return (opStatus) (opOverflow | opInexact);
+ }
+
+ /* Otherwise we become the largest finite number. */
+ category = fcNormal;
+ exponent = semantics->maxExponent;
+ APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
+ semantics->precision);
+
+ return opInexact;
+}
+
+/* Returns TRUE if, when truncating the current number, with BIT the
+ new LSB, with the given lost fraction and rounding mode, the result
+ would need to be rounded away from zero (i.e., by increasing the
+ signficand). This routine must work for fcZero of both signs, and
+ fcNormal numbers. */
+bool
+APFloat::roundAwayFromZero(roundingMode rounding_mode,
+ lostFraction lost_fraction,
+ unsigned int bit) const
+{
+ /* NaNs and infinities should not have lost fractions. */
+ assert(category == fcNormal || category == fcZero);
+
+ /* Current callers never pass this so we don't handle it. */
+ assert(lost_fraction != lfExactlyZero);
+
+ switch (rounding_mode) {
+ default:
+ assert(0);
+
+ case rmNearestTiesToAway:
+ return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
+
+ case rmNearestTiesToEven:
+ if(lost_fraction == lfMoreThanHalf)
+ return true;
+
+ /* Our zeroes don't have a significand to test. */
+ if(lost_fraction == lfExactlyHalf && category != fcZero)
+ return APInt::tcExtractBit(significandParts(), bit);
+
+ return false;
+
+ case rmTowardZero:
+ return false;
+
+ case rmTowardPositive:
+ return sign == false;
+
+ case rmTowardNegative:
+ return sign == true;
+ }
+}
+
+APFloat::opStatus
+APFloat::normalize(roundingMode rounding_mode,
+ lostFraction lost_fraction)
+{
+ unsigned int omsb; /* One, not zero, based MSB. */
+ int exponentChange;
+
+ if(category != fcNormal)
+ return opOK;
+
+ /* Before rounding normalize the exponent of fcNormal numbers. */
+ omsb = significandMSB() + 1;
+
+ if(omsb) {
+ /* OMSB is numbered from 1. We want to place it in the integer
+ bit numbered PRECISON if possible, with a compensating change in
+ the exponent. */
+ exponentChange = omsb - semantics->precision;
+
+ /* If the resulting exponent is too high, overflow according to
+ the rounding mode. */
+ if(exponent + exponentChange > semantics->maxExponent)
+ return handleOverflow(rounding_mode);
+
+ /* Subnormal numbers have exponent minExponent, and their MSB
+ is forced based on that. */
+ if(exponent + exponentChange < semantics->minExponent)
+ exponentChange = semantics->minExponent - exponent;
+
+ /* Shifting left is easy as we don't lose precision. */
+ if(exponentChange < 0) {
+ assert(lost_fraction == lfExactlyZero);
+
+ shiftSignificandLeft(-exponentChange);
+
+ return opOK;
+ }
+
+ if(exponentChange > 0) {
+ lostFraction lf;
+
+ /* Shift right and capture any new lost fraction. */
+ lf = shiftSignificandRight(exponentChange);
+
+ lost_fraction = combineLostFractions(lf, lost_fraction);
+
+ /* Keep OMSB up-to-date. */
+ if(omsb > (unsigned) exponentChange)
+ omsb -= exponentChange;
+ else
+ omsb = 0;
+ }
+ }
+
+ /* Now round the number according to rounding_mode given the lost
+ fraction. */
+
+ /* As specified in IEEE 754, since we do not trap we do not report
+ underflow for exact results. */
+ if(lost_fraction == lfExactlyZero) {
+ /* Canonicalize zeroes. */
+ if(omsb == 0)
+ category = fcZero;
+
+ return opOK;
+ }
+
+ /* Increment the significand if we're rounding away from zero. */
+ if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
+ if(omsb == 0)
+ exponent = semantics->minExponent;
+
+ incrementSignificand();
+ omsb = significandMSB() + 1;
+
+ /* Did the significand increment overflow? */
+ if(omsb == (unsigned) semantics->precision + 1) {
+ /* Renormalize by incrementing the exponent and shifting our
+ significand right one. However if we already have the
+ maximum exponent we overflow to infinity. */
+ if(exponent == semantics->maxExponent) {
+ category = fcInfinity;
+
+ return (opStatus) (opOverflow | opInexact);
+ }
+
+ shiftSignificandRight(1);
+
+ return opInexact;
+ }
+ }
+
+ /* The normal case - we were and are not denormal, and any
+ significand increment above didn't overflow. */
+ if(omsb == semantics->precision)
+ return opInexact;
+
+ /* We have a non-zero denormal. */
+ assert(omsb < semantics->precision);
+
+ /* Canonicalize zeroes. */
+ if(omsb == 0)
+ category = fcZero;
+
+ /* The fcZero case is a denormal that underflowed to zero. */
+ return (opStatus) (opUnderflow | opInexact);
+}
+
+APFloat::opStatus
+APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
+{
+ switch (convolve(category, rhs.category)) {
+ default:
+ assert(0);
+
+ case convolve(fcNaN, fcZero):
+ case convolve(fcNaN, fcNormal):
+ case convolve(fcNaN, fcInfinity):
+ case convolve(fcNaN, fcNaN):
+ case convolve(fcNormal, fcZero):
+ case convolve(fcInfinity, fcNormal):
+ case convolve(fcInfinity, fcZero):
+ return opOK;
+
+ case convolve(fcZero, fcNaN):
+ case convolve(fcNormal, fcNaN):
+ case convolve(fcInfinity, fcNaN):
+ category = fcNaN;
+ copySignificand(rhs);
+ return opOK;
+
+ case convolve(fcNormal, fcInfinity):
+ case convolve(fcZero, fcInfinity):
+ category = fcInfinity;
+ sign = rhs.sign ^ subtract;
+ return opOK;
+
+ case convolve(fcZero, fcNormal):
+ assign(rhs);
+ sign = rhs.sign ^ subtract;
+ return opOK;
+
+ case convolve(fcZero, fcZero):
+ /* Sign depends on rounding mode; handled by caller. */
+ return opOK;
+
+ case convolve(fcInfinity, fcInfinity):
+ /* Differently signed infinities can only be validly
+ subtracted. */
+ if(((sign ^ rhs.sign)!=0) != subtract) {
+ makeNaN();
+ return opInvalidOp;
+ }
+
+ return opOK;
+
+ case convolve(fcNormal, fcNormal):
+ return opDivByZero;
+ }
+}
+
+/* Add or subtract two normal numbers. */
+lostFraction
+APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
+{
+ integerPart carry;
+ lostFraction lost_fraction;
+ int bits;
+
+ /* Determine if the operation on the absolute values is effectively
+ an addition or subtraction. */
+ subtract ^= (sign ^ rhs.sign) ? true : false;
+
+ /* Are we bigger exponent-wise than the RHS? */
+ bits = exponent - rhs.exponent;
+
+ /* Subtraction is more subtle than one might naively expect. */
+ if(subtract) {
+ APFloat temp_rhs(rhs);
+ bool reverse;
+
+ if (bits == 0) {
+ reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
+ lost_fraction = lfExactlyZero;
+ } else if (bits > 0) {
+ lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
+ shiftSignificandLeft(1);
+ reverse = false;
+ } else {
+ lost_fraction = shiftSignificandRight(-bits - 1);
+ temp_rhs.shiftSignificandLeft(1);
+ reverse = true;
+ }
+
+ if (reverse) {
+ carry = temp_rhs.subtractSignificand
+ (*this, lost_fraction != lfExactlyZero);
+ copySignificand(temp_rhs);
+ sign = !sign;
+ } else {
+ carry = subtractSignificand
+ (temp_rhs, lost_fraction != lfExactlyZero);
+ }
+
+ /* Invert the lost fraction - it was on the RHS and
+ subtracted. */
+ if(lost_fraction == lfLessThanHalf)
+ lost_fraction = lfMoreThanHalf;
+ else if(lost_fraction == lfMoreThanHalf)
+ lost_fraction = lfLessThanHalf;
+
+ /* The code above is intended to ensure that no borrow is
+ necessary. */
+ assert(!carry);
+ } else {
+ if(bits > 0) {
+ APFloat temp_rhs(rhs);
+
+ lost_fraction = temp_rhs.shiftSignificandRight(bits);
+ carry = addSignificand(temp_rhs);
+ } else {
+ lost_fraction = shiftSignificandRight(-bits);
+ carry = addSignificand(rhs);
+ }
+
+ /* We have a guard bit; generating a carry cannot happen. */
+ assert(!carry);
+ }
+
+ return lost_fraction;
+}
+
+APFloat::opStatus
+APFloat::multiplySpecials(const APFloat &rhs)
+{
+ switch (convolve(category, rhs.category)) {
+ default:
+ assert(0);
+
+ case convolve(fcNaN, fcZero):
+ case convolve(fcNaN, fcNormal):
+ case convolve(fcNaN, fcInfinity):
+ case convolve(fcNaN, fcNaN):
+ return opOK;
+
+ case convolve(fcZero, fcNaN):
+ case convolve(fcNormal, fcNaN):
+ case convolve(fcInfinity, fcNaN):
+ category = fcNaN;
+ copySignificand(rhs);
+ return opOK;
+
+ case convolve(fcNormal, fcInfinity):
+ case convolve(fcInfinity, fcNormal):
+ case convolve(fcInfinity, fcInfinity):
+ category = fcInfinity;
+ return opOK;
+
+ case convolve(fcZero, fcNormal):
+ case convolve(fcNormal, fcZero):
+ case convolve(fcZero, fcZero):
+ category = fcZero;
+ return opOK;
+
+ case convolve(fcZero, fcInfinity):
+ case convolve(fcInfinity, fcZero):
+ makeNaN();
+ return opInvalidOp;
+
+ case convolve(fcNormal, fcNormal):
+ return opOK;
+ }
+}
+
+APFloat::opStatus
+APFloat::divideSpecials(const APFloat &rhs)
+{
+ switch (convolve(category, rhs.category)) {
+ default:
+ assert(0);
+
+ case convolve(fcNaN, fcZero):
+ case convolve(fcNaN, fcNormal):
+ case convolve(fcNaN, fcInfinity):
+ case convolve(fcNaN, fcNaN):
+ case convolve(fcInfinity, fcZero):
+ case convolve(fcInfinity, fcNormal):
+ case convolve(fcZero, fcInfinity):
+ case convolve(fcZero, fcNormal):
+ return opOK;
+
+ case convolve(fcZero, fcNaN):
+ case convolve(fcNormal, fcNaN):
+ case convolve(fcInfinity, fcNaN):
+ category = fcNaN;
+ copySignificand(rhs);
+ return opOK;
+
+ case convolve(fcNormal, fcInfinity):
+ category = fcZero;
+ return opOK;
+
+ case convolve(fcNormal, fcZero):
+ category = fcInfinity;
+ return opDivByZero;
+
+ case convolve(fcInfinity, fcInfinity):
+ case convolve(fcZero, fcZero):
+ makeNaN();
+ return opInvalidOp;
+
+ case convolve(fcNormal, fcNormal):
+ return opOK;
+ }
+}
+
+APFloat::opStatus
+APFloat::modSpecials(const APFloat &rhs)
+{
+ switch (convolve(category, rhs.category)) {
+ default:
+ assert(0);
+
+ case convolve(fcNaN, fcZero):
+ case convolve(fcNaN, fcNormal):
+ case convolve(fcNaN, fcInfinity):
+ case convolve(fcNaN, fcNaN):
+ case convolve(fcZero, fcInfinity):
+ case convolve(fcZero, fcNormal):
+ case convolve(fcNormal, fcInfinity):
+ return opOK;
+
+ case convolve(fcZero, fcNaN):
+ case convolve(fcNormal, fcNaN):
+ case convolve(fcInfinity, fcNaN):
+ category = fcNaN;
+ copySignificand(rhs);
+ return opOK;
+
+ case convolve(fcNormal, fcZero):
+ case convolve(fcInfinity, fcZero):
+ case convolve(fcInfinity, fcNormal):
+ case convolve(fcInfinity, fcInfinity):
+ case convolve(fcZero, fcZero):
+ makeNaN();
+ return opInvalidOp;
+
+ case convolve(fcNormal, fcNormal):
+ return opOK;
+ }
+}
+
+/* Change sign. */
+void
+APFloat::changeSign()
+{
+ /* Look mummy, this one's easy. */
+ sign = !sign;
+}
+
+void
+APFloat::clearSign()
+{
+ /* So is this one. */
+ sign = 0;
+}
+
+void
+APFloat::copySign(const APFloat &rhs)
+{
+ /* And this one. */
+ sign = rhs.sign;
+}
+
+/* Normalized addition or subtraction. */
+APFloat::opStatus
+APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
+ bool subtract)
+{
+ opStatus fs;
+
+ assertArithmeticOK(*semantics);
+
+ fs = addOrSubtractSpecials(rhs, subtract);
+
+ /* This return code means it was not a simple case. */
+ if(fs == opDivByZero) {
+ lostFraction lost_fraction;
+
+ lost_fraction = addOrSubtractSignificand(rhs, subtract);
+ fs = normalize(rounding_mode, lost_fraction);
+
+ /* Can only be zero if we lost no fraction. */
+ assert(category != fcZero || lost_fraction == lfExactlyZero);
+ }
+
+ /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
+ positive zero unless rounding to minus infinity, except that
+ adding two like-signed zeroes gives that zero. */
+ if(category == fcZero) {
+ if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
+ sign = (rounding_mode == rmTowardNegative);
+ }
+
+ return fs;
+}
+
+/* Normalized addition. */
+APFloat::opStatus
+APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
+{
+ return addOrSubtract(rhs, rounding_mode, false);
+}
+
+/* Normalized subtraction. */
+APFloat::opStatus
+APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
+{
+ return addOrSubtract(rhs, rounding_mode, true);
+}
+
+/* Normalized multiply. */
+APFloat::opStatus
+APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
+{
+ opStatus fs;
+
+ assertArithmeticOK(*semantics);
+ sign ^= rhs.sign;
+ fs = multiplySpecials(rhs);
+
+ if(category == fcNormal) {
+ lostFraction lost_fraction = multiplySignificand(rhs, 0);
+ fs = normalize(rounding_mode, lost_fraction);
+ if(lost_fraction != lfExactlyZero)
+ fs = (opStatus) (fs | opInexact);
+ }
+
+ return fs;
+}
+
+/* Normalized divide. */
+APFloat::opStatus
+APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
+{
+ opStatus fs;
+
+ assertArithmeticOK(*semantics);
+ sign ^= rhs.sign;
+ fs = divideSpecials(rhs);
+
+ if(category == fcNormal) {
+ lostFraction lost_fraction = divideSignificand(rhs);
+ fs = normalize(rounding_mode, lost_fraction);
+ if(lost_fraction != lfExactlyZero)
+ fs = (opStatus) (fs | opInexact);
+ }
+
+ return fs;
+}
+
+/* Normalized remainder. This is not currently correct in all cases. */
+APFloat::opStatus
+APFloat::remainder(const APFloat &rhs)
+{
+ opStatus fs;
+ APFloat V = *this;
+ unsigned int origSign = sign;
+
+ assertArithmeticOK(*semantics);
+ fs = V.divide(rhs, rmNearestTiesToEven);
+ if (fs == opDivByZero)
+ return fs;
+
+ int parts = partCount();
+ integerPart *x = new integerPart[parts];
+ bool ignored;
+ fs = V.convertToInteger(x, parts * integerPartWidth, true,
+ rmNearestTiesToEven, &ignored);
+ if (fs==opInvalidOp)
+ return fs;
+
+ fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
+ rmNearestTiesToEven);
+ assert(fs==opOK); // should always work
+
+ fs = V.multiply(rhs, rmNearestTiesToEven);
+ assert(fs==opOK || fs==opInexact); // should not overflow or underflow
+
+ fs = subtract(V, rmNearestTiesToEven);
+ assert(fs==opOK || fs==opInexact); // likewise
+
+ if (isZero())
+ sign = origSign; // IEEE754 requires this
+ delete[] x;
+ return fs;
+}
+
+/* Normalized llvm frem (C fmod).
+ This is not currently correct in all cases. */
+APFloat::opStatus
+APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
+{
+ opStatus fs;
+ assertArithmeticOK(*semantics);
+ fs = modSpecials(rhs);
+
+ if (category == fcNormal && rhs.category == fcNormal) {
+ APFloat V = *this;
+ unsigned int origSign = sign;
+
+ fs = V.divide(rhs, rmNearestTiesToEven);
+ if (fs == opDivByZero)
+ return fs;
+
+ int parts = partCount();
+ integerPart *x = new integerPart[parts];
+ bool ignored;
+ fs = V.convertToInteger(x, parts * integerPartWidth, true,
+ rmTowardZero, &ignored);
+ if (fs==opInvalidOp)
+ return fs;
+
+ fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
+ rmNearestTiesToEven);
+ assert(fs==opOK); // should always work
+
+ fs = V.multiply(rhs, rounding_mode);
+ assert(fs==opOK || fs==opInexact); // should not overflow or underflow
+
+ fs = subtract(V, rounding_mode);
+ assert(fs==opOK || fs==opInexact); // likewise
+
+ if (isZero())
+ sign = origSign; // IEEE754 requires this
+ delete[] x;
+ }
+ return fs;
+}
+
+/* Normalized fused-multiply-add. */
+APFloat::opStatus
+APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
+ const APFloat &addend,
+ roundingMode rounding_mode)
+{
+ opStatus fs;
+
+ assertArithmeticOK(*semantics);
+
+ /* Post-multiplication sign, before addition. */
+ sign ^= multiplicand.sign;
+
+ /* If and only if all arguments are normal do we need to do an
+ extended-precision calculation. */
+ if(category == fcNormal
+ && multiplicand.category == fcNormal
+ && addend.category == fcNormal) {
+ lostFraction lost_fraction;
+
+ lost_fraction = multiplySignificand(multiplicand, &addend);
+ fs = normalize(rounding_mode, lost_fraction);
+ if(lost_fraction != lfExactlyZero)
+ fs = (opStatus) (fs | opInexact);
+
+ /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
+ positive zero unless rounding to minus infinity, except that
+ adding two like-signed zeroes gives that zero. */
+ if(category == fcZero && sign != addend.sign)
+ sign = (rounding_mode == rmTowardNegative);
+ } else {
+ fs = multiplySpecials(multiplicand);
+
+ /* FS can only be opOK or opInvalidOp. There is no more work
+ to do in the latter case. The IEEE-754R standard says it is
+ implementation-defined in this case whether, if ADDEND is a
+ quiet NaN, we raise invalid op; this implementation does so.
+
+ If we need to do the addition we can do so with normal
+ precision. */
+ if(fs == opOK)
+ fs = addOrSubtract(addend, rounding_mode, false);
+ }
+
+ return fs;
+}
+
+/* Comparison requires normalized numbers. */
+APFloat::cmpResult
+APFloat::compare(const APFloat &rhs) const
+{
+ cmpResult result;
+
+ assertArithmeticOK(*semantics);
+ assert(semantics == rhs.semantics);
+
+ switch (convolve(category, rhs.category)) {
+ default:
+ assert(0);
+
+ case convolve(fcNaN, fcZero):
+ case convolve(fcNaN, fcNormal):
+ case convolve(fcNaN, fcInfinity):
+ case convolve(fcNaN, fcNaN):
+ case convolve(fcZero, fcNaN):
+ case convolve(fcNormal, fcNaN):
+ case convolve(fcInfinity, fcNaN):
+ return cmpUnordered;
+
+ case convolve(fcInfinity, fcNormal):
+ case convolve(fcInfinity, fcZero):
+ case convolve(fcNormal, fcZero):
+ if(sign)
+ return cmpLessThan;
+ else
+ return cmpGreaterThan;
+
+ case convolve(fcNormal, fcInfinity):
+ case convolve(fcZero, fcInfinity):
+ case convolve(fcZero, fcNormal):
+ if(rhs.sign)
+ return cmpGreaterThan;
+ else
+ return cmpLessThan;
+
+ case convolve(fcInfinity, fcInfinity):
+ if(sign == rhs.sign)
+ return cmpEqual;
+ else if(sign)
+ return cmpLessThan;
+ else
+ return cmpGreaterThan;
+
+ case convolve(fcZero, fcZero):
+ return cmpEqual;
+
+ case convolve(fcNormal, fcNormal):
+ break;
+ }
+
+ /* Two normal numbers. Do they have the same sign? */
+ if(sign != rhs.sign) {
+ if(sign)
+ result = cmpLessThan;
+ else
+ result = cmpGreaterThan;
+ } else {
+ /* Compare absolute values; invert result if negative. */
+ result = compareAbsoluteValue(rhs);
+
+ if(sign) {
+ if(result == cmpLessThan)
+ result = cmpGreaterThan;
+ else if(result == cmpGreaterThan)
+ result = cmpLessThan;
+ }
+ }
+
+ return result;
+}
+
+/// APFloat::convert - convert a value of one floating point type to another.
+/// The return value corresponds to the IEEE754 exceptions. *losesInfo
+/// records whether the transformation lost information, i.e. whether
+/// converting the result back to the original type will produce the
+/// original value (this is almost the same as return value==fsOK, but there
+/// are edge cases where this is not so).
+
+APFloat::opStatus
+APFloat::convert(const fltSemantics &toSemantics,
+ roundingMode rounding_mode, bool *losesInfo)
+{
+ lostFraction lostFraction;
+ unsigned int newPartCount, oldPartCount;
+ opStatus fs;
+
+ assertArithmeticOK(*semantics);
+ assertArithmeticOK(toSemantics);
+ lostFraction = lfExactlyZero;
+ newPartCount = partCountForBits(toSemantics.precision + 1);
+ oldPartCount = partCount();
+
+ /* Handle storage complications. If our new form is wider,
+ re-allocate our bit pattern into wider storage. If it is
+ narrower, we ignore the excess parts, but if narrowing to a
+ single part we need to free the old storage.
+ Be careful not to reference significandParts for zeroes
+ and infinities, since it aborts. */
+ if (newPartCount > oldPartCount) {
+ integerPart *newParts;
+ newParts = new integerPart[newPartCount];
+ APInt::tcSet(newParts, 0, newPartCount);
+ if (category==fcNormal || category==fcNaN)
+ APInt::tcAssign(newParts, significandParts(), oldPartCount);
+ freeSignificand();
+ significand.parts = newParts;
+ } else if (newPartCount < oldPartCount) {
+ /* Capture any lost fraction through truncation of parts so we get
+ correct rounding whilst normalizing. */
+ if (category==fcNormal)
+ lostFraction = lostFractionThroughTruncation
+ (significandParts(), oldPartCount, toSemantics.precision);
+ if (newPartCount == 1) {
+ integerPart newPart = 0;
+ if (category==fcNormal || category==fcNaN)
+ newPart = significandParts()[0];
+ freeSignificand();
+ significand.part = newPart;
+ }
+ }
+
+ if(category == fcNormal) {
+ /* Re-interpret our bit-pattern. */
+ exponent += toSemantics.precision - semantics->precision;
+ semantics = &toSemantics;
+ fs = normalize(rounding_mode, lostFraction);
+ *losesInfo = (fs != opOK);
+ } else if (category == fcNaN) {
+ int shift = toSemantics.precision - semantics->precision;
+ // Do this now so significandParts gets the right answer
+ const fltSemantics *oldSemantics = semantics;
+ semantics = &toSemantics;
+ *losesInfo = false;
+ // No normalization here, just truncate
+ if (shift>0)
+ APInt::tcShiftLeft(significandParts(), newPartCount, shift);
+ else if (shift < 0) {
+ unsigned ushift = -shift;
+ // Figure out if we are losing information. This happens
+ // if are shifting out something other than 0s, or if the x87 long
+ // double input did not have its integer bit set (pseudo-NaN), or if the
+ // x87 long double input did not have its QNan bit set (because the x87
+ // hardware sets this bit when converting a lower-precision NaN to
+ // x87 long double).
+ if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
+ *losesInfo = true;
+ if (oldSemantics == &APFloat::x87DoubleExtended &&
+ (!(*significandParts() & 0x8000000000000000ULL) ||
+ !(*significandParts() & 0x4000000000000000ULL)))
+ *losesInfo = true;
+ APInt::tcShiftRight(significandParts(), newPartCount, ushift);
+ }
+ // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
+ // does not give you back the same bits. This is dubious, and we
+ // don't currently do it. You're really supposed to get
+ // an invalid operation signal at runtime, but nobody does that.
+ fs = opOK;
+ } else {
+ semantics = &toSemantics;
+ fs = opOK;
+ *losesInfo = false;
+ }
+
+ return fs;
+}
+
+/* Convert a floating point number to an integer according to the
+ rounding mode. If the rounded integer value is out of range this
+ returns an invalid operation exception and the contents of the
+ destination parts are unspecified. If the rounded value is in
+ range but the floating point number is not the exact integer, the C
+ standard doesn't require an inexact exception to be raised. IEEE
+ 854 does require it so we do that.
+
+ Note that for conversions to integer type the C standard requires
+ round-to-zero to always be used. */
+APFloat::opStatus
+APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
+ bool isSigned,
+ roundingMode rounding_mode,
+ bool *isExact) const
+{
+ lostFraction lost_fraction;
+ const integerPart *src;
+ unsigned int dstPartsCount, truncatedBits;
+
+ assertArithmeticOK(*semantics);
+
+ *isExact = false;
+
+ /* Handle the three special cases first. */
+ if(category == fcInfinity || category == fcNaN)
+ return opInvalidOp;
+
+ dstPartsCount = partCountForBits(width);
+
+ if(category == fcZero) {
+ APInt::tcSet(parts, 0, dstPartsCount);
+ // Negative zero can't be represented as an int.
+ *isExact = !sign;
+ return opOK;
+ }
+
+ src = significandParts();
+
+ /* Step 1: place our absolute value, with any fraction truncated, in
+ the destination. */
+ if (exponent < 0) {
+ /* Our absolute value is less than one; truncate everything. */
+ APInt::tcSet(parts, 0, dstPartsCount);
+ /* For exponent -1 the integer bit represents .5, look at that.
+ For smaller exponents leftmost truncated bit is 0. */
+ truncatedBits = semantics->precision -1U - exponent;
+ } else {
+ /* We want the most significant (exponent + 1) bits; the rest are
+ truncated. */
+ unsigned int bits = exponent + 1U;
+
+ /* Hopelessly large in magnitude? */
+ if (bits > width)
+ return opInvalidOp;
+
+ if (bits < semantics->precision) {
+ /* We truncate (semantics->precision - bits) bits. */
+ truncatedBits = semantics->precision - bits;
+ APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
+ } else {
+ /* We want at least as many bits as are available. */
+ APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
+ APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
+ truncatedBits = 0;
+ }
+ }
+
+ /* Step 2: work out any lost fraction, and increment the absolute
+ value if we would round away from zero. */
+ if (truncatedBits) {
+ lost_fraction = lostFractionThroughTruncation(src, partCount(),
+ truncatedBits);
+ if (lost_fraction != lfExactlyZero
+ && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
+ if (APInt::tcIncrement(parts, dstPartsCount))
+ return opInvalidOp; /* Overflow. */
+ }
+ } else {
+ lost_fraction = lfExactlyZero;
+ }
+
+ /* Step 3: check if we fit in the destination. */
+ unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
+
+ if (sign) {
+ if (!isSigned) {
+ /* Negative numbers cannot be represented as unsigned. */
+ if (omsb != 0)
+ return opInvalidOp;
+ } else {
+ /* It takes omsb bits to represent the unsigned integer value.
+ We lose a bit for the sign, but care is needed as the
+ maximally negative integer is a special case. */
+ if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
+ return opInvalidOp;
+
+ /* This case can happen because of rounding. */
+ if (omsb > width)
+ return opInvalidOp;
+ }
+
+ APInt::tcNegate (parts, dstPartsCount);
+ } else {
+ if (omsb >= width + !isSigned)
+ return opInvalidOp;
+ }
+
+ if (lost_fraction == lfExactlyZero) {
+ *isExact = true;
+ return opOK;
+ } else
+ return opInexact;
+}
+
+/* Same as convertToSignExtendedInteger, except we provide
+ deterministic values in case of an invalid operation exception,
+ namely zero for NaNs and the minimal or maximal value respectively
+ for underflow or overflow.
+ The *isExact output tells whether the result is exact, in the sense
+ that converting it back to the original floating point type produces
+ the original value. This is almost equivalent to result==opOK,
+ except for negative zeroes.
+*/
+APFloat::opStatus
+APFloat::convertToInteger(integerPart *parts, unsigned int width,
+ bool isSigned,
+ roundingMode rounding_mode, bool *isExact) const
+{
+ opStatus fs;
+
+ fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
+ isExact);
+
+ if (fs == opInvalidOp) {
+ unsigned int bits, dstPartsCount;
+
+ dstPartsCount = partCountForBits(width);
+
+ if (category == fcNaN)
+ bits = 0;
+ else if (sign)
+ bits = isSigned;
+ else
+ bits = width - isSigned;
+
+ APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
+ if (sign && isSigned)
+ APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
+ }
+
+ return fs;
+}
+
+/* Convert an unsigned integer SRC to a floating point number,
+ rounding according to ROUNDING_MODE. The sign of the floating
+ point number is not modified. */
+APFloat::opStatus
+APFloat::convertFromUnsignedParts(const integerPart *src,
+ unsigned int srcCount,
+ roundingMode rounding_mode)
+{
+ unsigned int omsb, precision, dstCount;
+ integerPart *dst;
+ lostFraction lost_fraction;
+
+ assertArithmeticOK(*semantics);
+ category = fcNormal;
+ omsb = APInt::tcMSB(src, srcCount) + 1;
+ dst = significandParts();
+ dstCount = partCount();
+ precision = semantics->precision;
+
+ /* We want the most significant PRECISON bits of SRC. There may not
+ be that many; extract what we can. */
+ if (precision <= omsb) {
+ exponent = omsb - 1;
+ lost_fraction = lostFractionThroughTruncation(src, srcCount,
+ omsb - precision);
+ APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
+ } else {
+ exponent = precision - 1;
+ lost_fraction = lfExactlyZero;
+ APInt::tcExtract(dst, dstCount, src, omsb, 0);
+ }
+
+ return normalize(rounding_mode, lost_fraction);
+}
+
+APFloat::opStatus
+APFloat::convertFromAPInt(const APInt &Val,
+ bool isSigned,
+ roundingMode rounding_mode)
+{
+ unsigned int partCount = Val.getNumWords();
+ APInt api = Val;
+
+ sign = false;
+ if (isSigned && api.isNegative()) {
+ sign = true;
+ api = -api;
+ }
+
+ return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
+}
+
+/* Convert a two's complement integer SRC to a floating point number,
+ rounding according to ROUNDING_MODE. ISSIGNED is true if the
+ integer is signed, in which case it must be sign-extended. */
+APFloat::opStatus
+APFloat::convertFromSignExtendedInteger(const integerPart *src,
+ unsigned int srcCount,
+ bool isSigned,
+ roundingMode rounding_mode)
+{
+ opStatus status;
+
+ assertArithmeticOK(*semantics);
+ if (isSigned
+ && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
+ integerPart *copy;
+
+ /* If we're signed and negative negate a copy. */
+ sign = true;
+ copy = new integerPart[srcCount];
+ APInt::tcAssign(copy, src, srcCount);
+ APInt::tcNegate(copy, srcCount);
+ status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
+ delete [] copy;
+ } else {
+ sign = false;
+ status = convertFromUnsignedParts(src, srcCount, rounding_mode);
+ }
+
+ return status;
+}
+
+/* FIXME: should this just take a const APInt reference? */
+APFloat::opStatus
+APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
+ unsigned int width, bool isSigned,
+ roundingMode rounding_mode)
+{
+ unsigned int partCount = partCountForBits(width);
+ APInt api = APInt(width, partCount, parts);
+
+ sign = false;
+ if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
+ sign = true;
+ api = -api;
+ }
+
+ return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
+}
+
+APFloat::opStatus
+APFloat::convertFromHexadecimalString(const char *p,
+ roundingMode rounding_mode)
+{
+ lostFraction lost_fraction;
+ integerPart *significand;
+ unsigned int bitPos, partsCount;
+ const char *dot, *firstSignificantDigit;
+
+ zeroSignificand();
+ exponent = 0;
+ category = fcNormal;
+
+ significand = significandParts();
+ partsCount = partCount();
+ bitPos = partsCount * integerPartWidth;
+
+ /* Skip leading zeroes and any (hexa)decimal point. */
+ p = skipLeadingZeroesAndAnyDot(p, &dot);
+ firstSignificantDigit = p;
+
+ for(;;) {
+ integerPart hex_value;
+
+ if(*p == '.') {
+ assert(dot == 0);
+ dot = p++;
+ }
+
+ hex_value = hexDigitValue(*p);
+ if(hex_value == -1U) {
+ lost_fraction = lfExactlyZero;
+ break;
+ }
+
+ p++;
+
+ /* Store the number whilst 4-bit nibbles remain. */
+ if(bitPos) {
+ bitPos -= 4;
+ hex_value <<= bitPos % integerPartWidth;
+ significand[bitPos / integerPartWidth] |= hex_value;
+ } else {
+ lost_fraction = trailingHexadecimalFraction(p, hex_value);
+ while(hexDigitValue(*p) != -1U)
+ p++;
+ break;
+ }
+ }
+
+ /* Hex floats require an exponent but not a hexadecimal point. */
+ assert(*p == 'p' || *p == 'P');
+
+ /* Ignore the exponent if we are zero. */
+ if(p != firstSignificantDigit) {
+ int expAdjustment;
+
+ /* Implicit hexadecimal point? */
+ if(!dot)
+ dot = p;
+
+ /* Calculate the exponent adjustment implicit in the number of
+ significant digits. */
+ expAdjustment = static_cast<int>(dot - firstSignificantDigit);
+ if(expAdjustment < 0)
+ expAdjustment++;
+ expAdjustment = expAdjustment * 4 - 1;
+
+ /* Adjust for writing the significand starting at the most
+ significant nibble. */
+ expAdjustment += semantics->precision;
+ expAdjustment -= partsCount * integerPartWidth;
+
+ /* Adjust for the given exponent. */
+ exponent = totalExponent(p, expAdjustment);
+ }
+
+ return normalize(rounding_mode, lost_fraction);
+}
+
+APFloat::opStatus
+APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
+ unsigned sigPartCount, int exp,
+ roundingMode rounding_mode)
+{
+ unsigned int parts, pow5PartCount;
+ fltSemantics calcSemantics = { 32767, -32767, 0, true };
+ integerPart pow5Parts[maxPowerOfFiveParts];
+ bool isNearest;
+
+ isNearest = (rounding_mode == rmNearestTiesToEven
+ || rounding_mode == rmNearestTiesToAway);
+
+ parts = partCountForBits(semantics->precision + 11);
+
+ /* Calculate pow(5, abs(exp)). */
+ pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
+
+ for (;; parts *= 2) {
+ opStatus sigStatus, powStatus;
+ unsigned int excessPrecision, truncatedBits;
+
+ calcSemantics.precision = parts * integerPartWidth - 1;
+ excessPrecision = calcSemantics.precision - semantics->precision;
+ truncatedBits = excessPrecision;
+
+ APFloat decSig(calcSemantics, fcZero, sign);
+ APFloat pow5(calcSemantics, fcZero, false);
+
+ sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
+ rmNearestTiesToEven);
+ powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
+ rmNearestTiesToEven);
+ /* Add exp, as 10^n = 5^n * 2^n. */
+ decSig.exponent += exp;
+
+ lostFraction calcLostFraction;
+ integerPart HUerr, HUdistance;
+ unsigned int powHUerr;
+
+ if (exp >= 0) {
+ /* multiplySignificand leaves the precision-th bit set to 1. */
+ calcLostFraction = decSig.multiplySignificand(pow5, NULL);
+ powHUerr = powStatus != opOK;
+ } else {
+ calcLostFraction = decSig.divideSignificand(pow5);
+ /* Denormal numbers have less precision. */
+ if (decSig.exponent < semantics->minExponent) {
+ excessPrecision += (semantics->minExponent - decSig.exponent);
+ truncatedBits = excessPrecision;
+ if (excessPrecision > calcSemantics.precision)
+ excessPrecision = calcSemantics.precision;
+ }
+ /* Extra half-ulp lost in reciprocal of exponent. */
+ powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
+ }
+
+ /* Both multiplySignificand and divideSignificand return the
+ result with the integer bit set. */
+ assert (APInt::tcExtractBit
+ (decSig.significandParts(), calcSemantics.precision - 1) == 1);
+
+ HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
+ powHUerr);
+ HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
+ excessPrecision, isNearest);
+
+ /* Are we guaranteed to round correctly if we truncate? */
+ if (HUdistance >= HUerr) {
+ APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
+ calcSemantics.precision - excessPrecision,
+ excessPrecision);
+ /* Take the exponent of decSig. If we tcExtract-ed less bits
+ above we must adjust our exponent to compensate for the
+ implicit right shift. */
+ exponent = (decSig.exponent + semantics->precision
+ - (calcSemantics.precision - excessPrecision));
+ calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
+ decSig.partCount(),
+ truncatedBits);
+ return normalize(rounding_mode, calcLostFraction);
+ }
+ }
+}
+
+APFloat::opStatus
+APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode)
+{
+ decimalInfo D;
+ opStatus fs;
+
+ /* Scan the text. */
+ interpretDecimal(p, &D);
+
+ /* Handle the quick cases. First the case of no significant digits,
+ i.e. zero, and then exponents that are obviously too large or too
+ small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
+ definitely overflows if
+
+ (exp - 1) * L >= maxExponent
+
+ and definitely underflows to zero where
+
+ (exp + 1) * L <= minExponent - precision
+
+ With integer arithmetic the tightest bounds for L are
+
+ 93/28 < L < 196/59 [ numerator <= 256 ]
+ 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
+ */
+
+ if (decDigitValue(*D.firstSigDigit) >= 10U) {
+ category = fcZero;
+ fs = opOK;
+ } else if ((D.normalizedExponent + 1) * 28738
+ <= 8651 * (semantics->minExponent - (int) semantics->precision)) {
+ /* Underflow to zero and round. */
+ zeroSignificand();
+ fs = normalize(rounding_mode, lfLessThanHalf);
+ } else if ((D.normalizedExponent - 1) * 42039
+ >= 12655 * semantics->maxExponent) {
+ /* Overflow and round. */
+ fs = handleOverflow(rounding_mode);
+ } else {
+ integerPart *decSignificand;
+ unsigned int partCount;
+
+ /* A tight upper bound on number of bits required to hold an
+ N-digit decimal integer is N * 196 / 59. Allocate enough space
+ to hold the full significand, and an extra part required by
+ tcMultiplyPart. */
+ partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
+ partCount = partCountForBits(1 + 196 * partCount / 59);
+ decSignificand = new integerPart[partCount + 1];
+ partCount = 0;
+
+ /* Convert to binary efficiently - we do almost all multiplication
+ in an integerPart. When this would overflow do we do a single
+ bignum multiplication, and then revert again to multiplication
+ in an integerPart. */
+ do {
+ integerPart decValue, val, multiplier;
+
+ val = 0;
+ multiplier = 1;
+
+ do {
+ if (*p == '.')
+ p++;
+
+ decValue = decDigitValue(*p++);
+ multiplier *= 10;
+ val = val * 10 + decValue;
+ /* The maximum number that can be multiplied by ten with any
+ digit added without overflowing an integerPart. */
+ } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
+
+ /* Multiply out the current part. */
+ APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
+ partCount, partCount + 1, false);
+
+ /* If we used another part (likely but not guaranteed), increase
+ the count. */
+ if (decSignificand[partCount])
+ partCount++;
+ } while (p <= D.lastSigDigit);
+
+ category = fcNormal;
+ fs = roundSignificandWithExponent(decSignificand, partCount,
+ D.exponent, rounding_mode);
+
+ delete [] decSignificand;
+ }
+
+ return fs;
+}
+
+APFloat::opStatus
+APFloat::convertFromString(const char *p, roundingMode rounding_mode)
+{
+ assertArithmeticOK(*semantics);
+
+ /* Handle a leading minus sign. */
+ if(*p == '-')
+ sign = 1, p++;
+ else
+ sign = 0;
+
+ if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
+ return convertFromHexadecimalString(p + 2, rounding_mode);
+
+ return convertFromDecimalString(p, rounding_mode);
+}
+
+/* Write out a hexadecimal representation of the floating point value
+ to DST, which must be of sufficient size, in the C99 form
+ [-]0xh.hhhhp[+-]d. Return the number of characters written,
+ excluding the terminating NUL.
+
+ If UPPERCASE, the output is in upper case, otherwise in lower case.
+
+ HEXDIGITS digits appear altogether, rounding the value if
+ necessary. If HEXDIGITS is 0, the minimal precision to display the
+ number precisely is used instead. If nothing would appear after
+ the decimal point it is suppressed.
+
+ The decimal exponent is always printed and has at least one digit.
+ Zero values display an exponent of zero. Infinities and NaNs
+ appear as "infinity" or "nan" respectively.
+
+ The above rules are as specified by C99. There is ambiguity about
+ what the leading hexadecimal digit should be. This implementation
+ uses whatever is necessary so that the exponent is displayed as
+ stored. This implies the exponent will fall within the IEEE format
+ range, and the leading hexadecimal digit will be 0 (for denormals),
+ 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
+ any other digits zero).
+*/
+unsigned int
+APFloat::convertToHexString(char *dst, unsigned int hexDigits,
+ bool upperCase, roundingMode rounding_mode) const
+{
+ char *p;
+
+ assertArithmeticOK(*semantics);
+
+ p = dst;
+ if (sign)
+ *dst++ = '-';
+
+ switch (category) {
+ case fcInfinity:
+ memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
+ dst += sizeof infinityL - 1;
+ break;
+
+ case fcNaN:
+ memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
+ dst += sizeof NaNU - 1;
+ break;
+
+ case fcZero:
+ *dst++ = '0';
+ *dst++ = upperCase ? 'X': 'x';
+ *dst++ = '0';
+ if (hexDigits > 1) {
+ *dst++ = '.';
+ memset (dst, '0', hexDigits - 1);
+ dst += hexDigits - 1;
+ }
+ *dst++ = upperCase ? 'P': 'p';
+ *dst++ = '0';
+ break;
+
+ case fcNormal:
+ dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
+ break;
+ }
+
+ *dst = 0;
+
+ return static_cast<unsigned int>(dst - p);
+}
+
+/* Does the hard work of outputting the correctly rounded hexadecimal
+ form of a normal floating point number with the specified number of
+ hexadecimal digits. If HEXDIGITS is zero the minimum number of
+ digits necessary to print the value precisely is output. */
+char *
+APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
+ bool upperCase,
+ roundingMode rounding_mode) const
+{
+ unsigned int count, valueBits, shift, partsCount, outputDigits;
+ const char *hexDigitChars;
+ const integerPart *significand;
+ char *p;
+ bool roundUp;
+
+ *dst++ = '0';
+ *dst++ = upperCase ? 'X': 'x';
+
+ roundUp = false;
+ hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
+
+ significand = significandParts();
+ partsCount = partCount();
+
+ /* +3 because the first digit only uses the single integer bit, so
+ we have 3 virtual zero most-significant-bits. */
+ valueBits = semantics->precision + 3;
+ shift = integerPartWidth - valueBits % integerPartWidth;
+
+ /* The natural number of digits required ignoring trailing
+ insignificant zeroes. */
+ outputDigits = (valueBits - significandLSB () + 3) / 4;
+
+ /* hexDigits of zero means use the required number for the
+ precision. Otherwise, see if we are truncating. If we are,
+ find out if we need to round away from zero. */
+ if (hexDigits) {
+ if (hexDigits < outputDigits) {
+ /* We are dropping non-zero bits, so need to check how to round.
+ "bits" is the number of dropped bits. */
+ unsigned int bits;
+ lostFraction fraction;
+
+ bits = valueBits - hexDigits * 4;
+ fraction = lostFractionThroughTruncation (significand, partsCount, bits);
+ roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
+ }
+ outputDigits = hexDigits;
+ }
+
+ /* Write the digits consecutively, and start writing in the location
+ of the hexadecimal point. We move the most significant digit
+ left and add the hexadecimal point later. */
+ p = ++dst;
+
+ count = (valueBits + integerPartWidth - 1) / integerPartWidth;
+
+ while (outputDigits && count) {
+ integerPart part;
+
+ /* Put the most significant integerPartWidth bits in "part". */
+ if (--count == partsCount)
+ part = 0; /* An imaginary higher zero part. */
+ else
+ part = significand[count] << shift;
+
+ if (count && shift)
+ part |= significand[count - 1] >> (integerPartWidth - shift);
+
+ /* Convert as much of "part" to hexdigits as we can. */
+ unsigned int curDigits = integerPartWidth / 4;
+
+ if (curDigits > outputDigits)
+ curDigits = outputDigits;
+ dst += partAsHex (dst, part, curDigits, hexDigitChars);
+ outputDigits -= curDigits;
+ }
+
+ if (roundUp) {
+ char *q = dst;
+
+ /* Note that hexDigitChars has a trailing '0'. */
+ do {
+ q--;
+ *q = hexDigitChars[hexDigitValue (*q) + 1];
+ } while (*q == '0');
+ assert (q >= p);
+ } else {
+ /* Add trailing zeroes. */
+ memset (dst, '0', outputDigits);
+ dst += outputDigits;
+ }
+
+ /* Move the most significant digit to before the point, and if there
+ is something after the decimal point add it. This must come
+ after rounding above. */
+ p[-1] = p[0];
+ if (dst -1 == p)
+ dst--;
+ else
+ p[0] = '.';
+
+ /* Finally output the exponent. */
+ *dst++ = upperCase ? 'P': 'p';
+
+ return writeSignedDecimal (dst, exponent);
+}
+
+// For good performance it is desirable for different APFloats
+// to produce different integers.
+uint32_t
+APFloat::getHashValue() const
+{
+ if (category==fcZero) return sign<<8 | semantics->precision ;
+ else if (category==fcInfinity) return sign<<9 | semantics->precision;
+ else if (category==fcNaN) return 1<<10 | semantics->precision;
+ else {
+ uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
+ const integerPart* p = significandParts();
+ for (int i=partCount(); i>0; i--, p++)
+ hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32);
+ return hash;
+ }
+}
+
+// Conversion from APFloat to/from host float/double. It may eventually be
+// possible to eliminate these and have everybody deal with APFloats, but that
+// will take a while. This approach will not easily extend to long double.
+// Current implementation requires integerPartWidth==64, which is correct at
+// the moment but could be made more general.
+
+// Denormals have exponent minExponent in APFloat, but minExponent-1 in
+// the actual IEEE respresentations. We compensate for that here.
+
+APInt
+APFloat::convertF80LongDoubleAPFloatToAPInt() const
+{
+ assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
+ assert (partCount()==2);
+
+ uint64_t myexponent, mysignificand;
+
+ if (category==fcNormal) {
+ myexponent = exponent+16383; //bias
+ mysignificand = significandParts()[0];
+ if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
+ myexponent = 0; // denormal
+ } else if (category==fcZero) {
+ myexponent = 0;
+ mysignificand = 0;
+ } else if (category==fcInfinity) {
+ myexponent = 0x7fff;
+ mysignificand = 0x8000000000000000ULL;
+ } else {
+ assert(category == fcNaN && "Unknown category");
+ myexponent = 0x7fff;
+ mysignificand = significandParts()[0];
+ }
+
+ uint64_t words[2];
+ words[0] = mysignificand;
+ words[1] = ((uint64_t)(sign & 1) << 15) |
+ (myexponent & 0x7fffLL);
+ return APInt(80, 2, words);
+}
+
+APInt
+APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
+{
+ assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
+ assert (partCount()==2);
+
+ uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
+
+ if (category==fcNormal) {
+ myexponent = exponent + 1023; //bias
+ myexponent2 = exponent2 + 1023;
+ mysignificand = significandParts()[0];
+ mysignificand2 = significandParts()[1];
+ if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
+ myexponent = 0; // denormal
+ if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
+ myexponent2 = 0; // denormal
+ } else if (category==fcZero) {
+ myexponent = 0;
+ mysignificand = 0;
+ myexponent2 = 0;
+ mysignificand2 = 0;
+ } else if (category==fcInfinity) {
+ myexponent = 0x7ff;
+ myexponent2 = 0;
+ mysignificand = 0;
+ mysignificand2 = 0;
+ } else {
+ assert(category == fcNaN && "Unknown category");
+ myexponent = 0x7ff;
+ mysignificand = significandParts()[0];
+ myexponent2 = exponent2;
+ mysignificand2 = significandParts()[1];
+ }
+
+ uint64_t words[2];
+ words[0] = ((uint64_t)(sign & 1) << 63) |
+ ((myexponent & 0x7ff) << 52) |
+ (mysignificand & 0xfffffffffffffLL);
+ words[1] = ((uint64_t)(sign2 & 1) << 63) |
+ ((myexponent2 & 0x7ff) << 52) |
+ (mysignificand2 & 0xfffffffffffffLL);
+ return APInt(128, 2, words);
+}
+
+APInt
+APFloat::convertDoubleAPFloatToAPInt() const
+{
+ assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
+ assert (partCount()==1);
+
+ uint64_t myexponent, mysignificand;
+
+ if (category==fcNormal) {
+ myexponent = exponent+1023; //bias
+ mysignificand = *significandParts();
+ if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
+ myexponent = 0; // denormal
+ } else if (category==fcZero) {
+ myexponent = 0;
+ mysignificand = 0;
+ } else if (category==fcInfinity) {
+ myexponent = 0x7ff;
+ mysignificand = 0;
+ } else {
+ assert(category == fcNaN && "Unknown category!");
+ myexponent = 0x7ff;
+ mysignificand = *significandParts();
+ }
+
+ return APInt(64, ((((uint64_t)(sign & 1) << 63) |
+ ((myexponent & 0x7ff) << 52) |
+ (mysignificand & 0xfffffffffffffLL))));
+}
+
+APInt
+APFloat::convertFloatAPFloatToAPInt() const
+{
+ assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
+ assert (partCount()==1);
+
+ uint32_t myexponent, mysignificand;
+
+ if (category==fcNormal) {
+ myexponent = exponent+127; //bias
+ mysignificand = (uint32_t)*significandParts();
+ if (myexponent == 1 && !(mysignificand & 0x800000))
+ myexponent = 0; // denormal
+ } else if (category==fcZero) {
+ myexponent = 0;
+ mysignificand = 0;
+ } else if (category==fcInfinity) {
+ myexponent = 0xff;
+ mysignificand = 0;
+ } else {
+ assert(category == fcNaN && "Unknown category!");
+ myexponent = 0xff;
+ mysignificand = (uint32_t)*significandParts();
+ }
+
+ return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
+ (mysignificand & 0x7fffff)));
+}
+
+// This function creates an APInt that is just a bit map of the floating
+// point constant as it would appear in memory. It is not a conversion,
+// and treating the result as a normal integer is unlikely to be useful.
+
+APInt
+APFloat::bitcastToAPInt() const
+{
+ if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
+ return convertFloatAPFloatToAPInt();
+
+ if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
+ return convertDoubleAPFloatToAPInt();
+
+ if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
+ return convertPPCDoubleDoubleAPFloatToAPInt();
+
+ assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
+ "unknown format!");
+ return convertF80LongDoubleAPFloatToAPInt();
+}
+
+float
+APFloat::convertToFloat() const
+{
+ assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
+ APInt api = bitcastToAPInt();
+ return api.bitsToFloat();
+}
+
+double
+APFloat::convertToDouble() const
+{
+ assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
+ APInt api = bitcastToAPInt();
+ return api.bitsToDouble();
+}
+
+/// Integer bit is explicit in this format. Intel hardware (387 and later)
+/// does not support these bit patterns:
+/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
+/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
+/// exponent = 0, integer bit 1 ("pseudodenormal")
+/// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
+/// At the moment, the first two are treated as NaNs, the second two as Normal.
+void
+APFloat::initFromF80LongDoubleAPInt(const APInt &api)
+{
+ assert(api.getBitWidth()==80);
+ uint64_t i1 = api.getRawData()[0];
+ uint64_t i2 = api.getRawData()[1];
+ uint64_t myexponent = (i2 & 0x7fff);
+ uint64_t mysignificand = i1;
+
+ initialize(&APFloat::x87DoubleExtended);
+ assert(partCount()==2);
+
+ sign = static_cast<unsigned int>(i2>>15);
+ if (myexponent==0 && mysignificand==0) {
+ // exponent, significand meaningless
+ category = fcZero;
+ } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
+ // exponent, significand meaningless
+ category = fcInfinity;
+ } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
+ // exponent meaningless
+ category = fcNaN;
+ significandParts()[0] = mysignificand;
+ significandParts()[1] = 0;
+ } else {
+ category = fcNormal;
+ exponent = myexponent - 16383;
+ significandParts()[0] = mysignificand;
+ significandParts()[1] = 0;
+ if (myexponent==0) // denormal
+ exponent = -16382;
+ }
+}
+
+void
+APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
+{
+ assert(api.getBitWidth()==128);
+ uint64_t i1 = api.getRawData()[0];
+ uint64_t i2 = api.getRawData()[1];
+ uint64_t myexponent = (i1 >> 52) & 0x7ff;
+ uint64_t mysignificand = i1 & 0xfffffffffffffLL;
+ uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
+ uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
+
+ initialize(&APFloat::PPCDoubleDouble);
+ assert(partCount()==2);
+
+ sign = static_cast<unsigned int>(i1>>63);
+ sign2 = static_cast<unsigned int>(i2>>63);
+ if (myexponent==0 && mysignificand==0) {
+ // exponent, significand meaningless
+ // exponent2 and significand2 are required to be 0; we don't check
+ category = fcZero;
+ } else if (myexponent==0x7ff && mysignificand==0) {
+ // exponent, significand meaningless
+ // exponent2 and significand2 are required to be 0; we don't check
+ category = fcInfinity;
+ } else if (myexponent==0x7ff && mysignificand!=0) {
+ // exponent meaningless. So is the whole second word, but keep it
+ // for determinism.
+ category = fcNaN;
+ exponent2 = myexponent2;
+ significandParts()[0] = mysignificand;
+ significandParts()[1] = mysignificand2;
+ } else {
+ category = fcNormal;
+ // Note there is no category2; the second word is treated as if it is
+ // fcNormal, although it might be something else considered by itself.
+ exponent = myexponent - 1023;
+ exponent2 = myexponent2 - 1023;
+ significandParts()[0] = mysignificand;
+ significandParts()[1] = mysignificand2;
+ if (myexponent==0) // denormal
+ exponent = -1022;
+ else
+ significandParts()[0] |= 0x10000000000000LL; // integer bit
+ if (myexponent2==0)
+ exponent2 = -1022;
+ else
+ significandParts()[1] |= 0x10000000000000LL; // integer bit
+ }
+}
+
+void
+APFloat::initFromDoubleAPInt(const APInt &api)
+{
+ assert(api.getBitWidth()==64);
+ uint64_t i = *api.getRawData();
+ uint64_t myexponent = (i >> 52) & 0x7ff;
+ uint64_t mysignificand = i & 0xfffffffffffffLL;
+
+ initialize(&APFloat::IEEEdouble);
+ assert(partCount()==1);
+
+ sign = static_cast<unsigned int>(i>>63);
+ if (myexponent==0 && mysignificand==0) {
+ // exponent, significand meaningless
+ category = fcZero;
+ } else if (myexponent==0x7ff && mysignificand==0) {
+ // exponent, significand meaningless
+ category = fcInfinity;
+ } else if (myexponent==0x7ff && mysignificand!=0) {
+ // exponent meaningless
+ category = fcNaN;
+ *significandParts() = mysignificand;
+ } else {
+ category = fcNormal;
+ exponent = myexponent - 1023;
+ *significandParts() = mysignificand;
+ if (myexponent==0) // denormal
+ exponent = -1022;
+ else
+ *significandParts() |= 0x10000000000000LL; // integer bit
+ }
+}
+
+void
+APFloat::initFromFloatAPInt(const APInt & api)
+{
+ assert(api.getBitWidth()==32);
+ uint32_t i = (uint32_t)*api.getRawData();
+ uint32_t myexponent = (i >> 23) & 0xff;
+ uint32_t mysignificand = i & 0x7fffff;
+
+ initialize(&APFloat::IEEEsingle);
+ assert(partCount()==1);
+
+ sign = i >> 31;
+ if (myexponent==0 && mysignificand==0) {
+ // exponent, significand meaningless
+ category = fcZero;
+ } else if (myexponent==0xff && mysignificand==0) {
+ // exponent, significand meaningless
+ category = fcInfinity;
+ } else if (myexponent==0xff && mysignificand!=0) {
+ // sign, exponent, significand meaningless
+ category = fcNaN;
+ *significandParts() = mysignificand;
+ } else {
+ category = fcNormal;
+ exponent = myexponent - 127; //bias
+ *significandParts() = mysignificand;
+ if (myexponent==0) // denormal
+ exponent = -126;
+ else
+ *significandParts() |= 0x800000; // integer bit
+ }
+}
+
+/// Treat api as containing the bits of a floating point number. Currently
+/// we infer the floating point type from the size of the APInt. The
+/// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
+/// when the size is anything else).
+void
+APFloat::initFromAPInt(const APInt& api, bool isIEEE)
+{
+ if (api.getBitWidth() == 32)
+ return initFromFloatAPInt(api);
+ else if (api.getBitWidth()==64)
+ return initFromDoubleAPInt(api);
+ else if (api.getBitWidth()==80)
+ return initFromF80LongDoubleAPInt(api);
+ else if (api.getBitWidth()==128 && !isIEEE)
+ return initFromPPCDoubleDoubleAPInt(api);
+ else
+ assert(0);
+}
+
+APFloat::APFloat(const APInt& api, bool isIEEE)
+{
+ initFromAPInt(api, isIEEE);
+}
+
+APFloat::APFloat(float f)
+{
+ APInt api = APInt(32, 0);
+ initFromAPInt(api.floatToBits(f));
+}
+
+APFloat::APFloat(double d)
+{
+ APInt api = APInt(64, 0);
+ initFromAPInt(api.doubleToBits(d));
+}
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
new file mode 100644
index 0000000..73bf774
--- /dev/null
+++ b/lib/Support/APInt.cpp
@@ -0,0 +1,2816 @@
+//===-- APInt.cpp - Implement APInt class ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a class to represent arbitrary precision integer
+// constant values and provide a variety of arithmetic operations on them.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "apint"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cmath>
+#include <limits>
+#include <cstring>
+#include <cstdlib>
+using namespace llvm;
+
+/// A utility function for allocating memory, checking for allocation failures,
+/// and ensuring the contents are zeroed.
+inline static uint64_t* getClearedMemory(unsigned numWords) {
+ uint64_t * result = new uint64_t[numWords];
+ assert(result && "APInt memory allocation fails!");
+ memset(result, 0, numWords * sizeof(uint64_t));
+ return result;
+}
+
+/// A utility function for allocating memory and checking for allocation
+/// failure. The content is not zeroed.
+inline static uint64_t* getMemory(unsigned numWords) {
+ uint64_t * result = new uint64_t[numWords];
+ assert(result && "APInt memory allocation fails!");
+ return result;
+}
+
+void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) {
+ pVal = getClearedMemory(getNumWords());
+ pVal[0] = val;
+ if (isSigned && int64_t(val) < 0)
+ for (unsigned i = 1; i < getNumWords(); ++i)
+ pVal[i] = -1ULL;
+}
+
+void APInt::initSlowCase(const APInt& that) {
+ pVal = getMemory(getNumWords());
+ memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
+}
+
+
+APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
+ : BitWidth(numBits), VAL(0) {
+ assert(BitWidth && "bitwidth too small");
+ assert(bigVal && "Null pointer detected!");
+ if (isSingleWord())
+ VAL = bigVal[0];
+ else {
+ // Get memory, cleared to 0
+ pVal = getClearedMemory(getNumWords());
+ // Calculate the number of words to copy
+ unsigned words = std::min<unsigned>(numWords, getNumWords());
+ // Copy the words from bigVal to pVal
+ memcpy(pVal, bigVal, words * APINT_WORD_SIZE);
+ }
+ // Make sure unused high bits are cleared
+ clearUnusedBits();
+}
+
+APInt::APInt(unsigned numbits, const char StrStart[], unsigned slen,
+ uint8_t radix)
+ : BitWidth(numbits), VAL(0) {
+ assert(BitWidth && "bitwidth too small");
+ fromString(numbits, StrStart, slen, radix);
+}
+
+APInt& APInt::AssignSlowCase(const APInt& RHS) {
+ // Don't do anything for X = X
+ if (this == &RHS)
+ return *this;
+
+ if (BitWidth == RHS.getBitWidth()) {
+ // assume same bit-width single-word case is already handled
+ assert(!isSingleWord());
+ memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
+ return *this;
+ }
+
+ if (isSingleWord()) {
+ // assume case where both are single words is already handled
+ assert(!RHS.isSingleWord());
+ VAL = 0;
+ pVal = getMemory(RHS.getNumWords());
+ memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
+ } else if (getNumWords() == RHS.getNumWords())
+ memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
+ else if (RHS.isSingleWord()) {
+ delete [] pVal;
+ VAL = RHS.VAL;
+ } else {
+ delete [] pVal;
+ pVal = getMemory(RHS.getNumWords());
+ memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
+ }
+ BitWidth = RHS.BitWidth;
+ return clearUnusedBits();
+}
+
+APInt& APInt::operator=(uint64_t RHS) {
+ if (isSingleWord())
+ VAL = RHS;
+ else {
+ pVal[0] = RHS;
+ memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
+ }
+ return clearUnusedBits();
+}
+
+/// Profile - This method 'profiles' an APInt for use with FoldingSet.
+void APInt::Profile(FoldingSetNodeID& ID) const {
+ ID.AddInteger(BitWidth);
+
+ if (isSingleWord()) {
+ ID.AddInteger(VAL);
+ return;
+ }
+
+ unsigned NumWords = getNumWords();
+ for (unsigned i = 0; i < NumWords; ++i)
+ ID.AddInteger(pVal[i]);
+}
+
+/// add_1 - This function adds a single "digit" integer, y, to the multiple
+/// "digit" integer array, x[]. x[] is modified to reflect the addition and
+/// 1 is returned if there is a carry out, otherwise 0 is returned.
+/// @returns the carry of the addition.
+static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
+ for (unsigned i = 0; i < len; ++i) {
+ dest[i] = y + x[i];
+ if (dest[i] < y)
+ y = 1; // Carry one to next digit.
+ else {
+ y = 0; // No need to carry so exit early
+ break;
+ }
+ }
+ return y;
+}
+
+/// @brief Prefix increment operator. Increments the APInt by one.
+APInt& APInt::operator++() {
+ if (isSingleWord())
+ ++VAL;
+ else
+ add_1(pVal, pVal, getNumWords(), 1);
+ return clearUnusedBits();
+}
+
+/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
+/// the multi-digit integer array, x[], propagating the borrowed 1 value until
+/// no further borrowing is neeeded or it runs out of "digits" in x. The result
+/// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
+/// In other words, if y > x then this function returns 1, otherwise 0.
+/// @returns the borrow out of the subtraction
+static bool sub_1(uint64_t x[], unsigned len, uint64_t y) {
+ for (unsigned i = 0; i < len; ++i) {
+ uint64_t X = x[i];
+ x[i] -= y;
+ if (y > X)
+ y = 1; // We have to "borrow 1" from next "digit"
+ else {
+ y = 0; // No need to borrow
+ break; // Remaining digits are unchanged so exit early
+ }
+ }
+ return bool(y);
+}
+
+/// @brief Prefix decrement operator. Decrements the APInt by one.
+APInt& APInt::operator--() {
+ if (isSingleWord())
+ --VAL;
+ else
+ sub_1(pVal, getNumWords(), 1);
+ return clearUnusedBits();
+}
+
+/// add - This function adds the integer array x to the integer array Y and
+/// places the result in dest.
+/// @returns the carry out from the addition
+/// @brief General addition of 64-bit integer arrays
+static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
+ unsigned len) {
+ bool carry = false;
+ for (unsigned i = 0; i< len; ++i) {
+ uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x
+ dest[i] = x[i] + y[i] + carry;
+ carry = dest[i] < limit || (carry && dest[i] == limit);
+ }
+ return carry;
+}
+
+/// Adds the RHS APint to this APInt.
+/// @returns this, after addition of RHS.
+/// @brief Addition assignment operator.
+APInt& APInt::operator+=(const APInt& RHS) {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord())
+ VAL += RHS.VAL;
+ else {
+ add(pVal, pVal, RHS.pVal, getNumWords());
+ }
+ return clearUnusedBits();
+}
+
+/// Subtracts the integer array y from the integer array x
+/// @returns returns the borrow out.
+/// @brief Generalized subtraction of 64-bit integer arrays.
+static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
+ unsigned len) {
+ bool borrow = false;
+ for (unsigned i = 0; i < len; ++i) {
+ uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
+ borrow = y[i] > x_tmp || (borrow && x[i] == 0);
+ dest[i] = x_tmp - y[i];
+ }
+ return borrow;
+}
+
+/// Subtracts the RHS APInt from this APInt
+/// @returns this, after subtraction
+/// @brief Subtraction assignment operator.
+APInt& APInt::operator-=(const APInt& RHS) {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord())
+ VAL -= RHS.VAL;
+ else
+ sub(pVal, pVal, RHS.pVal, getNumWords());
+ return clearUnusedBits();
+}
+
+/// Multiplies an integer array, x by a a uint64_t integer and places the result
+/// into dest.
+/// @returns the carry out of the multiplication.
+/// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
+static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
+ // Split y into high 32-bit part (hy) and low 32-bit part (ly)
+ uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
+ uint64_t carry = 0;
+
+ // For each digit of x.
+ for (unsigned i = 0; i < len; ++i) {
+ // Split x into high and low words
+ uint64_t lx = x[i] & 0xffffffffULL;
+ uint64_t hx = x[i] >> 32;
+ // hasCarry - A flag to indicate if there is a carry to the next digit.
+ // hasCarry == 0, no carry
+ // hasCarry == 1, has carry
+ // hasCarry == 2, no carry and the calculation result == 0.
+ uint8_t hasCarry = 0;
+ dest[i] = carry + lx * ly;
+ // Determine if the add above introduces carry.
+ hasCarry = (dest[i] < carry) ? 1 : 0;
+ carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
+ // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
+ // (2^32 - 1) + 2^32 = 2^64.
+ hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
+
+ carry += (lx * hy) & 0xffffffffULL;
+ dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
+ carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
+ (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
+ }
+ return carry;
+}
+
+/// Multiplies integer array x by integer array y and stores the result into
+/// the integer array dest. Note that dest's size must be >= xlen + ylen.
+/// @brief Generalized multiplicate of integer arrays.
+static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
+ unsigned ylen) {
+ dest[xlen] = mul_1(dest, x, xlen, y[0]);
+ for (unsigned i = 1; i < ylen; ++i) {
+ uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
+ uint64_t carry = 0, lx = 0, hx = 0;
+ for (unsigned j = 0; j < xlen; ++j) {
+ lx = x[j] & 0xffffffffULL;
+ hx = x[j] >> 32;
+ // hasCarry - A flag to indicate if has carry.
+ // hasCarry == 0, no carry
+ // hasCarry == 1, has carry
+ // hasCarry == 2, no carry and the calculation result == 0.
+ uint8_t hasCarry = 0;
+ uint64_t resul = carry + lx * ly;
+ hasCarry = (resul < carry) ? 1 : 0;
+ carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
+ hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
+
+ carry += (lx * hy) & 0xffffffffULL;
+ resul = (carry << 32) | (resul & 0xffffffffULL);
+ dest[i+j] += resul;
+ carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
+ (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
+ ((lx * hy) >> 32) + hx * hy;
+ }
+ dest[i+xlen] = carry;
+ }
+}
+
+APInt& APInt::operator*=(const APInt& RHS) {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord()) {
+ VAL *= RHS.VAL;
+ clearUnusedBits();
+ return *this;
+ }
+
+ // Get some bit facts about LHS and check for zero
+ unsigned lhsBits = getActiveBits();
+ unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
+ if (!lhsWords)
+ // 0 * X ===> 0
+ return *this;
+
+ // Get some bit facts about RHS and check for zero
+ unsigned rhsBits = RHS.getActiveBits();
+ unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
+ if (!rhsWords) {
+ // X * 0 ===> 0
+ clear();
+ return *this;
+ }
+
+ // Allocate space for the result
+ unsigned destWords = rhsWords + lhsWords;
+ uint64_t *dest = getMemory(destWords);
+
+ // Perform the long multiply
+ mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
+
+ // Copy result back into *this
+ clear();
+ unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
+ memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
+
+ // delete dest array and return
+ delete[] dest;
+ return *this;
+}
+
+APInt& APInt::operator&=(const APInt& RHS) {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord()) {
+ VAL &= RHS.VAL;
+ return *this;
+ }
+ unsigned numWords = getNumWords();
+ for (unsigned i = 0; i < numWords; ++i)
+ pVal[i] &= RHS.pVal[i];
+ return *this;
+}
+
+APInt& APInt::operator|=(const APInt& RHS) {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord()) {
+ VAL |= RHS.VAL;
+ return *this;
+ }
+ unsigned numWords = getNumWords();
+ for (unsigned i = 0; i < numWords; ++i)
+ pVal[i] |= RHS.pVal[i];
+ return *this;
+}
+
+APInt& APInt::operator^=(const APInt& RHS) {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord()) {
+ VAL ^= RHS.VAL;
+ this->clearUnusedBits();
+ return *this;
+ }
+ unsigned numWords = getNumWords();
+ for (unsigned i = 0; i < numWords; ++i)
+ pVal[i] ^= RHS.pVal[i];
+ return clearUnusedBits();
+}
+
+APInt APInt::AndSlowCase(const APInt& RHS) const {
+ unsigned numWords = getNumWords();
+ uint64_t* val = getMemory(numWords);
+ for (unsigned i = 0; i < numWords; ++i)
+ val[i] = pVal[i] & RHS.pVal[i];
+ return APInt(val, getBitWidth());
+}
+
+APInt APInt::OrSlowCase(const APInt& RHS) const {
+ unsigned numWords = getNumWords();
+ uint64_t *val = getMemory(numWords);
+ for (unsigned i = 0; i < numWords; ++i)
+ val[i] = pVal[i] | RHS.pVal[i];
+ return APInt(val, getBitWidth());
+}
+
+APInt APInt::XorSlowCase(const APInt& RHS) const {
+ unsigned numWords = getNumWords();
+ uint64_t *val = getMemory(numWords);
+ for (unsigned i = 0; i < numWords; ++i)
+ val[i] = pVal[i] ^ RHS.pVal[i];
+
+ // 0^0==1 so clear the high bits in case they got set.
+ return APInt(val, getBitWidth()).clearUnusedBits();
+}
+
+bool APInt::operator !() const {
+ if (isSingleWord())
+ return !VAL;
+
+ for (unsigned i = 0; i < getNumWords(); ++i)
+ if (pVal[i])
+ return false;
+ return true;
+}
+
+APInt APInt::operator*(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord())
+ return APInt(BitWidth, VAL * RHS.VAL);
+ APInt Result(*this);
+ Result *= RHS;
+ return Result.clearUnusedBits();
+}
+
+APInt APInt::operator+(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord())
+ return APInt(BitWidth, VAL + RHS.VAL);
+ APInt Result(BitWidth, 0);
+ add(Result.pVal, this->pVal, RHS.pVal, getNumWords());
+ return Result.clearUnusedBits();
+}
+
+APInt APInt::operator-(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord())
+ return APInt(BitWidth, VAL - RHS.VAL);
+ APInt Result(BitWidth, 0);
+ sub(Result.pVal, this->pVal, RHS.pVal, getNumWords());
+ return Result.clearUnusedBits();
+}
+
+bool APInt::operator[](unsigned bitPosition) const {
+ return (maskBit(bitPosition) &
+ (isSingleWord() ? VAL : pVal[whichWord(bitPosition)])) != 0;
+}
+
+bool APInt::EqualSlowCase(const APInt& RHS) const {
+ // Get some facts about the number of bits used in the two operands.
+ unsigned n1 = getActiveBits();
+ unsigned n2 = RHS.getActiveBits();
+
+ // If the number of bits isn't the same, they aren't equal
+ if (n1 != n2)
+ return false;
+
+ // If the number of bits fits in a word, we only need to compare the low word.
+ if (n1 <= APINT_BITS_PER_WORD)
+ return pVal[0] == RHS.pVal[0];
+
+ // Otherwise, compare everything
+ for (int i = whichWord(n1 - 1); i >= 0; --i)
+ if (pVal[i] != RHS.pVal[i])
+ return false;
+ return true;
+}
+
+bool APInt::EqualSlowCase(uint64_t Val) const {
+ unsigned n = getActiveBits();
+ if (n <= APINT_BITS_PER_WORD)
+ return pVal[0] == Val;
+ else
+ return false;
+}
+
+bool APInt::ult(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
+ if (isSingleWord())
+ return VAL < RHS.VAL;
+
+ // Get active bit length of both operands
+ unsigned n1 = getActiveBits();
+ unsigned n2 = RHS.getActiveBits();
+
+ // If magnitude of LHS is less than RHS, return true.
+ if (n1 < n2)
+ return true;
+
+ // If magnitude of RHS is greather than LHS, return false.
+ if (n2 < n1)
+ return false;
+
+ // If they bot fit in a word, just compare the low order word
+ if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
+ return pVal[0] < RHS.pVal[0];
+
+ // Otherwise, compare all words
+ unsigned topWord = whichWord(std::max(n1,n2)-1);
+ for (int i = topWord; i >= 0; --i) {
+ if (pVal[i] > RHS.pVal[i])
+ return false;
+ if (pVal[i] < RHS.pVal[i])
+ return true;
+ }
+ return false;
+}
+
+bool APInt::slt(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
+ if (isSingleWord()) {
+ int64_t lhsSext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
+ int64_t rhsSext = (int64_t(RHS.VAL) << (64-BitWidth)) >> (64-BitWidth);
+ return lhsSext < rhsSext;
+ }
+
+ APInt lhs(*this);
+ APInt rhs(RHS);
+ bool lhsNeg = isNegative();
+ bool rhsNeg = rhs.isNegative();
+ if (lhsNeg) {
+ // Sign bit is set so perform two's complement to make it positive
+ lhs.flip();
+ lhs++;
+ }
+ if (rhsNeg) {
+ // Sign bit is set so perform two's complement to make it positive
+ rhs.flip();
+ rhs++;
+ }
+
+ // Now we have unsigned values to compare so do the comparison if necessary
+ // based on the negativeness of the values.
+ if (lhsNeg)
+ if (rhsNeg)
+ return lhs.ugt(rhs);
+ else
+ return true;
+ else if (rhsNeg)
+ return false;
+ else
+ return lhs.ult(rhs);
+}
+
+APInt& APInt::set(unsigned bitPosition) {
+ if (isSingleWord())
+ VAL |= maskBit(bitPosition);
+ else
+ pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
+ return *this;
+}
+
+/// Set the given bit to 0 whose position is given as "bitPosition".
+/// @brief Set a given bit to 0.
+APInt& APInt::clear(unsigned bitPosition) {
+ if (isSingleWord())
+ VAL &= ~maskBit(bitPosition);
+ else
+ pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
+ return *this;
+}
+
+/// @brief Toggle every bit to its opposite value.
+
+/// Toggle a given bit to its opposite value whose position is given
+/// as "bitPosition".
+/// @brief Toggles a given bit to its opposite value.
+APInt& APInt::flip(unsigned bitPosition) {
+ assert(bitPosition < BitWidth && "Out of the bit-width range!");
+ if ((*this)[bitPosition]) clear(bitPosition);
+ else set(bitPosition);
+ return *this;
+}
+
+unsigned APInt::getBitsNeeded(const char* str, unsigned slen, uint8_t radix) {
+ assert(str != 0 && "Invalid value string");
+ assert(slen > 0 && "Invalid string length");
+
+ // Each computation below needs to know if its negative
+ unsigned isNegative = str[0] == '-';
+ if (isNegative) {
+ slen--;
+ str++;
+ }
+ // For radixes of power-of-two values, the bits required is accurately and
+ // easily computed
+ if (radix == 2)
+ return slen + isNegative;
+ if (radix == 8)
+ return slen * 3 + isNegative;
+ if (radix == 16)
+ return slen * 4 + isNegative;
+
+ // Otherwise it must be radix == 10, the hard case
+ assert(radix == 10 && "Invalid radix");
+
+ // This is grossly inefficient but accurate. We could probably do something
+ // with a computation of roughly slen*64/20 and then adjust by the value of
+ // the first few digits. But, I'm not sure how accurate that could be.
+
+ // Compute a sufficient number of bits that is always large enough but might
+ // be too large. This avoids the assertion in the constructor.
+ unsigned sufficient = slen*64/18;
+
+ // Convert to the actual binary value.
+ APInt tmp(sufficient, str, slen, radix);
+
+ // Compute how many bits are required.
+ return isNegative + tmp.logBase2() + 1;
+}
+
+// From http://www.burtleburtle.net, byBob Jenkins.
+// When targeting x86, both GCC and LLVM seem to recognize this as a
+// rotate instruction.
+#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
+
+// From http://www.burtleburtle.net, by Bob Jenkins.
+#define mix(a,b,c) \
+ { \
+ a -= c; a ^= rot(c, 4); c += b; \
+ b -= a; b ^= rot(a, 6); a += c; \
+ c -= b; c ^= rot(b, 8); b += a; \
+ a -= c; a ^= rot(c,16); c += b; \
+ b -= a; b ^= rot(a,19); a += c; \
+ c -= b; c ^= rot(b, 4); b += a; \
+ }
+
+// From http://www.burtleburtle.net, by Bob Jenkins.
+#define final(a,b,c) \
+ { \
+ c ^= b; c -= rot(b,14); \
+ a ^= c; a -= rot(c,11); \
+ b ^= a; b -= rot(a,25); \
+ c ^= b; c -= rot(b,16); \
+ a ^= c; a -= rot(c,4); \
+ b ^= a; b -= rot(a,14); \
+ c ^= b; c -= rot(b,24); \
+ }
+
+// hashword() was adapted from http://www.burtleburtle.net, by Bob
+// Jenkins. k is a pointer to an array of uint32_t values; length is
+// the length of the key, in 32-bit chunks. This version only handles
+// keys that are a multiple of 32 bits in size.
+static inline uint32_t hashword(const uint64_t *k64, size_t length)
+{
+ const uint32_t *k = reinterpret_cast<const uint32_t *>(k64);
+ uint32_t a,b,c;
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + (((uint32_t)length)<<2);
+
+ /*------------------------------------------------- handle most of the key */
+ while (length > 3)
+ {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a,b,c);
+ length -= 3;
+ k += 3;
+ }
+
+ /*------------------------------------------- handle the last 3 uint32_t's */
+ switch (length) { /* all the case statements fall through */
+ case 3 : c+=k[2];
+ case 2 : b+=k[1];
+ case 1 : a+=k[0];
+ final(a,b,c);
+ case 0: /* case 0: nothing left to add */
+ break;
+ }
+ /*------------------------------------------------------ report the result */
+ return c;
+}
+
+// hashword8() was adapted from http://www.burtleburtle.net, by Bob
+// Jenkins. This computes a 32-bit hash from one 64-bit word. When
+// targeting x86 (32 or 64 bit), both LLVM and GCC compile this
+// function into about 35 instructions when inlined.
+static inline uint32_t hashword8(const uint64_t k64)
+{
+ uint32_t a,b,c;
+ a = b = c = 0xdeadbeef + 4;
+ b += k64 >> 32;
+ a += k64 & 0xffffffff;
+ final(a,b,c);
+ return c;
+}
+#undef final
+#undef mix
+#undef rot
+
+uint64_t APInt::getHashValue() const {
+ uint64_t hash;
+ if (isSingleWord())
+ hash = hashword8(VAL);
+ else
+ hash = hashword(pVal, getNumWords()*2);
+ return hash;
+}
+
+/// HiBits - This function returns the high "numBits" bits of this APInt.
+APInt APInt::getHiBits(unsigned numBits) const {
+ return APIntOps::lshr(*this, BitWidth - numBits);
+}
+
+/// LoBits - This function returns the low "numBits" bits of this APInt.
+APInt APInt::getLoBits(unsigned numBits) const {
+ return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
+ BitWidth - numBits);
+}
+
+bool APInt::isPowerOf2() const {
+ return (!!*this) && !(*this & (*this - APInt(BitWidth,1)));
+}
+
+unsigned APInt::countLeadingZerosSlowCase() const {
+ unsigned Count = 0;
+ for (unsigned i = getNumWords(); i > 0u; --i) {
+ if (pVal[i-1] == 0)
+ Count += APINT_BITS_PER_WORD;
+ else {
+ Count += CountLeadingZeros_64(pVal[i-1]);
+ break;
+ }
+ }
+ unsigned remainder = BitWidth % APINT_BITS_PER_WORD;
+ if (remainder)
+ Count -= APINT_BITS_PER_WORD - remainder;
+ return std::min(Count, BitWidth);
+}
+
+static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
+ unsigned Count = 0;
+ if (skip)
+ V <<= skip;
+ while (V && (V & (1ULL << 63))) {
+ Count++;
+ V <<= 1;
+ }
+ return Count;
+}
+
+unsigned APInt::countLeadingOnes() const {
+ if (isSingleWord())
+ return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
+
+ unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
+ unsigned shift;
+ if (!highWordBits) {
+ highWordBits = APINT_BITS_PER_WORD;
+ shift = 0;
+ } else {
+ shift = APINT_BITS_PER_WORD - highWordBits;
+ }
+ int i = getNumWords() - 1;
+ unsigned Count = countLeadingOnes_64(pVal[i], shift);
+ if (Count == highWordBits) {
+ for (i--; i >= 0; --i) {
+ if (pVal[i] == -1ULL)
+ Count += APINT_BITS_PER_WORD;
+ else {
+ Count += countLeadingOnes_64(pVal[i], 0);
+ break;
+ }
+ }
+ }
+ return Count;
+}
+
+unsigned APInt::countTrailingZeros() const {
+ if (isSingleWord())
+ return std::min(unsigned(CountTrailingZeros_64(VAL)), BitWidth);
+ unsigned Count = 0;
+ unsigned i = 0;
+ for (; i < getNumWords() && pVal[i] == 0; ++i)
+ Count += APINT_BITS_PER_WORD;
+ if (i < getNumWords())
+ Count += CountTrailingZeros_64(pVal[i]);
+ return std::min(Count, BitWidth);
+}
+
+unsigned APInt::countTrailingOnesSlowCase() const {
+ unsigned Count = 0;
+ unsigned i = 0;
+ for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
+ Count += APINT_BITS_PER_WORD;
+ if (i < getNumWords())
+ Count += CountTrailingOnes_64(pVal[i]);
+ return std::min(Count, BitWidth);
+}
+
+unsigned APInt::countPopulationSlowCase() const {
+ unsigned Count = 0;
+ for (unsigned i = 0; i < getNumWords(); ++i)
+ Count += CountPopulation_64(pVal[i]);
+ return Count;
+}
+
+APInt APInt::byteSwap() const {
+ assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
+ if (BitWidth == 16)
+ return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
+ else if (BitWidth == 32)
+ return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
+ else if (BitWidth == 48) {
+ unsigned Tmp1 = unsigned(VAL >> 16);
+ Tmp1 = ByteSwap_32(Tmp1);
+ uint16_t Tmp2 = uint16_t(VAL);
+ Tmp2 = ByteSwap_16(Tmp2);
+ return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
+ } else if (BitWidth == 64)
+ return APInt(BitWidth, ByteSwap_64(VAL));
+ else {
+ APInt Result(BitWidth, 0);
+ char *pByte = (char*)Result.pVal;
+ for (unsigned i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) {
+ char Tmp = pByte[i];
+ pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i];
+ pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp;
+ }
+ return Result;
+ }
+}
+
+APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
+ const APInt& API2) {
+ APInt A = API1, B = API2;
+ while (!!B) {
+ APInt T = B;
+ B = APIntOps::urem(A, B);
+ A = T;
+ }
+ return A;
+}
+
+APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
+ union {
+ double D;
+ uint64_t I;
+ } T;
+ T.D = Double;
+
+ // Get the sign bit from the highest order bit
+ bool isNeg = T.I >> 63;
+
+ // Get the 11-bit exponent and adjust for the 1023 bit bias
+ int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
+
+ // If the exponent is negative, the value is < 0 so just return 0.
+ if (exp < 0)
+ return APInt(width, 0u);
+
+ // Extract the mantissa by clearing the top 12 bits (sign + exponent).
+ uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
+
+ // If the exponent doesn't shift all bits out of the mantissa
+ if (exp < 52)
+ return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
+ APInt(width, mantissa >> (52 - exp));
+
+ // If the client didn't provide enough bits for us to shift the mantissa into
+ // then the result is undefined, just return 0
+ if (width <= exp - 52)
+ return APInt(width, 0);
+
+ // Otherwise, we have to shift the mantissa bits up to the right location
+ APInt Tmp(width, mantissa);
+ Tmp = Tmp.shl((unsigned)exp - 52);
+ return isNeg ? -Tmp : Tmp;
+}
+
+/// RoundToDouble - This function convert this APInt to a double.
+/// The layout for double is as following (IEEE Standard 754):
+/// --------------------------------------
+/// | Sign Exponent Fraction Bias |
+/// |-------------------------------------- |
+/// | 1[63] 11[62-52] 52[51-00] 1023 |
+/// --------------------------------------
+double APInt::roundToDouble(bool isSigned) const {
+
+ // Handle the simple case where the value is contained in one uint64_t.
+ if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
+ if (isSigned) {
+ int64_t sext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
+ return double(sext);
+ } else
+ return double(VAL);
+ }
+
+ // Determine if the value is negative.
+ bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
+
+ // Construct the absolute value if we're negative.
+ APInt Tmp(isNeg ? -(*this) : (*this));
+
+ // Figure out how many bits we're using.
+ unsigned n = Tmp.getActiveBits();
+
+ // The exponent (without bias normalization) is just the number of bits
+ // we are using. Note that the sign bit is gone since we constructed the
+ // absolute value.
+ uint64_t exp = n;
+
+ // Return infinity for exponent overflow
+ if (exp > 1023) {
+ if (!isSigned || !isNeg)
+ return std::numeric_limits<double>::infinity();
+ else
+ return -std::numeric_limits<double>::infinity();
+ }
+ exp += 1023; // Increment for 1023 bias
+
+ // Number of bits in mantissa is 52. To obtain the mantissa value, we must
+ // extract the high 52 bits from the correct words in pVal.
+ uint64_t mantissa;
+ unsigned hiWord = whichWord(n-1);
+ if (hiWord == 0) {
+ mantissa = Tmp.pVal[0];
+ if (n > 52)
+ mantissa >>= n - 52; // shift down, we want the top 52 bits.
+ } else {
+ assert(hiWord > 0 && "huh?");
+ uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
+ uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
+ mantissa = hibits | lobits;
+ }
+
+ // The leading bit of mantissa is implicit, so get rid of it.
+ uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
+ union {
+ double D;
+ uint64_t I;
+ } T;
+ T.I = sign | (exp << 52) | mantissa;
+ return T.D;
+}
+
+// Truncate to new width.
+APInt &APInt::trunc(unsigned width) {
+ assert(width < BitWidth && "Invalid APInt Truncate request");
+ assert(width && "Can't truncate to 0 bits");
+ unsigned wordsBefore = getNumWords();
+ BitWidth = width;
+ unsigned wordsAfter = getNumWords();
+ if (wordsBefore != wordsAfter) {
+ if (wordsAfter == 1) {
+ uint64_t *tmp = pVal;
+ VAL = pVal[0];
+ delete [] tmp;
+ } else {
+ uint64_t *newVal = getClearedMemory(wordsAfter);
+ for (unsigned i = 0; i < wordsAfter; ++i)
+ newVal[i] = pVal[i];
+ delete [] pVal;
+ pVal = newVal;
+ }
+ }
+ return clearUnusedBits();
+}
+
+// Sign extend to a new width.
+APInt &APInt::sext(unsigned width) {
+ assert(width > BitWidth && "Invalid APInt SignExtend request");
+ // If the sign bit isn't set, this is the same as zext.
+ if (!isNegative()) {
+ zext(width);
+ return *this;
+ }
+
+ // The sign bit is set. First, get some facts
+ unsigned wordsBefore = getNumWords();
+ unsigned wordBits = BitWidth % APINT_BITS_PER_WORD;
+ BitWidth = width;
+ unsigned wordsAfter = getNumWords();
+
+ // Mask the high order word appropriately
+ if (wordsBefore == wordsAfter) {
+ unsigned newWordBits = width % APINT_BITS_PER_WORD;
+ // The extension is contained to the wordsBefore-1th word.
+ uint64_t mask = ~0ULL;
+ if (newWordBits)
+ mask >>= APINT_BITS_PER_WORD - newWordBits;
+ mask <<= wordBits;
+ if (wordsBefore == 1)
+ VAL |= mask;
+ else
+ pVal[wordsBefore-1] |= mask;
+ return clearUnusedBits();
+ }
+
+ uint64_t mask = wordBits == 0 ? 0 : ~0ULL << wordBits;
+ uint64_t *newVal = getMemory(wordsAfter);
+ if (wordsBefore == 1)
+ newVal[0] = VAL | mask;
+ else {
+ for (unsigned i = 0; i < wordsBefore; ++i)
+ newVal[i] = pVal[i];
+ newVal[wordsBefore-1] |= mask;
+ }
+ for (unsigned i = wordsBefore; i < wordsAfter; i++)
+ newVal[i] = -1ULL;
+ if (wordsBefore != 1)
+ delete [] pVal;
+ pVal = newVal;
+ return clearUnusedBits();
+}
+
+// Zero extend to a new width.
+APInt &APInt::zext(unsigned width) {
+ assert(width > BitWidth && "Invalid APInt ZeroExtend request");
+ unsigned wordsBefore = getNumWords();
+ BitWidth = width;
+ unsigned wordsAfter = getNumWords();
+ if (wordsBefore != wordsAfter) {
+ uint64_t *newVal = getClearedMemory(wordsAfter);
+ if (wordsBefore == 1)
+ newVal[0] = VAL;
+ else
+ for (unsigned i = 0; i < wordsBefore; ++i)
+ newVal[i] = pVal[i];
+ if (wordsBefore != 1)
+ delete [] pVal;
+ pVal = newVal;
+ }
+ return *this;
+}
+
+APInt &APInt::zextOrTrunc(unsigned width) {
+ if (BitWidth < width)
+ return zext(width);
+ if (BitWidth > width)
+ return trunc(width);
+ return *this;
+}
+
+APInt &APInt::sextOrTrunc(unsigned width) {
+ if (BitWidth < width)
+ return sext(width);
+ if (BitWidth > width)
+ return trunc(width);
+ return *this;
+}
+
+/// Arithmetic right-shift this APInt by shiftAmt.
+/// @brief Arithmetic right-shift function.
+APInt APInt::ashr(const APInt &shiftAmt) const {
+ return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth));
+}
+
+/// Arithmetic right-shift this APInt by shiftAmt.
+/// @brief Arithmetic right-shift function.
+APInt APInt::ashr(unsigned shiftAmt) const {
+ assert(shiftAmt <= BitWidth && "Invalid shift amount");
+ // Handle a degenerate case
+ if (shiftAmt == 0)
+ return *this;
+
+ // Handle single word shifts with built-in ashr
+ if (isSingleWord()) {
+ if (shiftAmt == BitWidth)
+ return APInt(BitWidth, 0); // undefined
+ else {
+ unsigned SignBit = APINT_BITS_PER_WORD - BitWidth;
+ return APInt(BitWidth,
+ (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt));
+ }
+ }
+
+ // If all the bits were shifted out, the result is, technically, undefined.
+ // We return -1 if it was negative, 0 otherwise. We check this early to avoid
+ // issues in the algorithm below.
+ if (shiftAmt == BitWidth) {
+ if (isNegative())
+ return APInt(BitWidth, -1ULL, true);
+ else
+ return APInt(BitWidth, 0);
+ }
+
+ // Create some space for the result.
+ uint64_t * val = new uint64_t[getNumWords()];
+
+ // Compute some values needed by the following shift algorithms
+ unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
+ unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
+ unsigned breakWord = getNumWords() - 1 - offset; // last word affected
+ unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word?
+ if (bitsInWord == 0)
+ bitsInWord = APINT_BITS_PER_WORD;
+
+ // If we are shifting whole words, just move whole words
+ if (wordShift == 0) {
+ // Move the words containing significant bits
+ for (unsigned i = 0; i <= breakWord; ++i)
+ val[i] = pVal[i+offset]; // move whole word
+
+ // Adjust the top significant word for sign bit fill, if negative
+ if (isNegative())
+ if (bitsInWord < APINT_BITS_PER_WORD)
+ val[breakWord] |= ~0ULL << bitsInWord; // set high bits
+ } else {
+ // Shift the low order words
+ for (unsigned i = 0; i < breakWord; ++i) {
+ // This combines the shifted corresponding word with the low bits from
+ // the next word (shifted into this word's high bits).
+ val[i] = (pVal[i+offset] >> wordShift) |
+ (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
+ }
+
+ // Shift the break word. In this case there are no bits from the next word
+ // to include in this word.
+ val[breakWord] = pVal[breakWord+offset] >> wordShift;
+
+ // Deal with sign extenstion in the break word, and possibly the word before
+ // it.
+ if (isNegative()) {
+ if (wordShift > bitsInWord) {
+ if (breakWord > 0)
+ val[breakWord-1] |=
+ ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
+ val[breakWord] |= ~0ULL;
+ } else
+ val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
+ }
+ }
+
+ // Remaining words are 0 or -1, just assign them.
+ uint64_t fillValue = (isNegative() ? -1ULL : 0);
+ for (unsigned i = breakWord+1; i < getNumWords(); ++i)
+ val[i] = fillValue;
+ return APInt(val, BitWidth).clearUnusedBits();
+}
+
+/// Logical right-shift this APInt by shiftAmt.
+/// @brief Logical right-shift function.
+APInt APInt::lshr(const APInt &shiftAmt) const {
+ return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth));
+}
+
+/// Logical right-shift this APInt by shiftAmt.
+/// @brief Logical right-shift function.
+APInt APInt::lshr(unsigned shiftAmt) const {
+ if (isSingleWord()) {
+ if (shiftAmt == BitWidth)
+ return APInt(BitWidth, 0);
+ else
+ return APInt(BitWidth, this->VAL >> shiftAmt);
+ }
+
+ // If all the bits were shifted out, the result is 0. This avoids issues
+ // with shifting by the size of the integer type, which produces undefined
+ // results. We define these "undefined results" to always be 0.
+ if (shiftAmt == BitWidth)
+ return APInt(BitWidth, 0);
+
+ // If none of the bits are shifted out, the result is *this. This avoids
+ // issues with shifting by the size of the integer type, which produces
+ // undefined results in the code below. This is also an optimization.
+ if (shiftAmt == 0)
+ return *this;
+
+ // Create some space for the result.
+ uint64_t * val = new uint64_t[getNumWords()];
+
+ // If we are shifting less than a word, compute the shift with a simple carry
+ if (shiftAmt < APINT_BITS_PER_WORD) {
+ uint64_t carry = 0;
+ for (int i = getNumWords()-1; i >= 0; --i) {
+ val[i] = (pVal[i] >> shiftAmt) | carry;
+ carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt);
+ }
+ return APInt(val, BitWidth).clearUnusedBits();
+ }
+
+ // Compute some values needed by the remaining shift algorithms
+ unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
+ unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
+
+ // If we are shifting whole words, just move whole words
+ if (wordShift == 0) {
+ for (unsigned i = 0; i < getNumWords() - offset; ++i)
+ val[i] = pVal[i+offset];
+ for (unsigned i = getNumWords()-offset; i < getNumWords(); i++)
+ val[i] = 0;
+ return APInt(val,BitWidth).clearUnusedBits();
+ }
+
+ // Shift the low order words
+ unsigned breakWord = getNumWords() - offset -1;
+ for (unsigned i = 0; i < breakWord; ++i)
+ val[i] = (pVal[i+offset] >> wordShift) |
+ (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
+ // Shift the break word.
+ val[breakWord] = pVal[breakWord+offset] >> wordShift;
+
+ // Remaining words are 0
+ for (unsigned i = breakWord+1; i < getNumWords(); ++i)
+ val[i] = 0;
+ return APInt(val, BitWidth).clearUnusedBits();
+}
+
+/// Left-shift this APInt by shiftAmt.
+/// @brief Left-shift function.
+APInt APInt::shl(const APInt &shiftAmt) const {
+ // It's undefined behavior in C to shift by BitWidth or greater.
+ return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
+}
+
+APInt APInt::shlSlowCase(unsigned shiftAmt) const {
+ // If all the bits were shifted out, the result is 0. This avoids issues
+ // with shifting by the size of the integer type, which produces undefined
+ // results. We define these "undefined results" to always be 0.
+ if (shiftAmt == BitWidth)
+ return APInt(BitWidth, 0);
+
+ // If none of the bits are shifted out, the result is *this. This avoids a
+ // lshr by the words size in the loop below which can produce incorrect
+ // results. It also avoids the expensive computation below for a common case.
+ if (shiftAmt == 0)
+ return *this;
+
+ // Create some space for the result.
+ uint64_t * val = new uint64_t[getNumWords()];
+
+ // If we are shifting less than a word, do it the easy way
+ if (shiftAmt < APINT_BITS_PER_WORD) {
+ uint64_t carry = 0;
+ for (unsigned i = 0; i < getNumWords(); i++) {
+ val[i] = pVal[i] << shiftAmt | carry;
+ carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt);
+ }
+ return APInt(val, BitWidth).clearUnusedBits();
+ }
+
+ // Compute some values needed by the remaining shift algorithms
+ unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
+ unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
+
+ // If we are shifting whole words, just move whole words
+ if (wordShift == 0) {
+ for (unsigned i = 0; i < offset; i++)
+ val[i] = 0;
+ for (unsigned i = offset; i < getNumWords(); i++)
+ val[i] = pVal[i-offset];
+ return APInt(val,BitWidth).clearUnusedBits();
+ }
+
+ // Copy whole words from this to Result.
+ unsigned i = getNumWords() - 1;
+ for (; i > offset; --i)
+ val[i] = pVal[i-offset] << wordShift |
+ pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift);
+ val[offset] = pVal[0] << wordShift;
+ for (i = 0; i < offset; ++i)
+ val[i] = 0;
+ return APInt(val, BitWidth).clearUnusedBits();
+}
+
+APInt APInt::rotl(const APInt &rotateAmt) const {
+ return rotl((unsigned)rotateAmt.getLimitedValue(BitWidth));
+}
+
+APInt APInt::rotl(unsigned rotateAmt) const {
+ if (rotateAmt == 0)
+ return *this;
+ // Don't get too fancy, just use existing shift/or facilities
+ APInt hi(*this);
+ APInt lo(*this);
+ hi.shl(rotateAmt);
+ lo.lshr(BitWidth - rotateAmt);
+ return hi | lo;
+}
+
+APInt APInt::rotr(const APInt &rotateAmt) const {
+ return rotr((unsigned)rotateAmt.getLimitedValue(BitWidth));
+}
+
+APInt APInt::rotr(unsigned rotateAmt) const {
+ if (rotateAmt == 0)
+ return *this;
+ // Don't get too fancy, just use existing shift/or facilities
+ APInt hi(*this);
+ APInt lo(*this);
+ lo.lshr(rotateAmt);
+ hi.shl(BitWidth - rotateAmt);
+ return hi | lo;
+}
+
+// Square Root - this method computes and returns the square root of "this".
+// Three mechanisms are used for computation. For small values (<= 5 bits),
+// a table lookup is done. This gets some performance for common cases. For
+// values using less than 52 bits, the value is converted to double and then
+// the libc sqrt function is called. The result is rounded and then converted
+// back to a uint64_t which is then used to construct the result. Finally,
+// the Babylonian method for computing square roots is used.
+APInt APInt::sqrt() const {
+
+ // Determine the magnitude of the value.
+ unsigned magnitude = getActiveBits();
+
+ // Use a fast table for some small values. This also gets rid of some
+ // rounding errors in libc sqrt for small values.
+ if (magnitude <= 5) {
+ static const uint8_t results[32] = {
+ /* 0 */ 0,
+ /* 1- 2 */ 1, 1,
+ /* 3- 6 */ 2, 2, 2, 2,
+ /* 7-12 */ 3, 3, 3, 3, 3, 3,
+ /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
+ /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ /* 31 */ 6
+ };
+ return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
+ }
+
+ // If the magnitude of the value fits in less than 52 bits (the precision of
+ // an IEEE double precision floating point value), then we can use the
+ // libc sqrt function which will probably use a hardware sqrt computation.
+ // This should be faster than the algorithm below.
+ if (magnitude < 52) {
+#ifdef _MSC_VER
+ // Amazingly, VC++ doesn't have round().
+ return APInt(BitWidth,
+ uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
+#else
+ return APInt(BitWidth,
+ uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
+#endif
+ }
+
+ // Okay, all the short cuts are exhausted. We must compute it. The following
+ // is a classical Babylonian method for computing the square root. This code
+ // was adapted to APINt from a wikipedia article on such computations.
+ // See http://www.wikipedia.org/ and go to the page named
+ // Calculate_an_integer_square_root.
+ unsigned nbits = BitWidth, i = 4;
+ APInt testy(BitWidth, 16);
+ APInt x_old(BitWidth, 1);
+ APInt x_new(BitWidth, 0);
+ APInt two(BitWidth, 2);
+
+ // Select a good starting value using binary logarithms.
+ for (;; i += 2, testy = testy.shl(2))
+ if (i >= nbits || this->ule(testy)) {
+ x_old = x_old.shl(i / 2);
+ break;
+ }
+
+ // Use the Babylonian method to arrive at the integer square root:
+ for (;;) {
+ x_new = (this->udiv(x_old) + x_old).udiv(two);
+ if (x_old.ule(x_new))
+ break;
+ x_old = x_new;
+ }
+
+ // Make sure we return the closest approximation
+ // NOTE: The rounding calculation below is correct. It will produce an
+ // off-by-one discrepancy with results from pari/gp. That discrepancy has been
+ // determined to be a rounding issue with pari/gp as it begins to use a
+ // floating point representation after 192 bits. There are no discrepancies
+ // between this algorithm and pari/gp for bit widths < 192 bits.
+ APInt square(x_old * x_old);
+ APInt nextSquare((x_old + 1) * (x_old +1));
+ if (this->ult(square))
+ return x_old;
+ else if (this->ule(nextSquare)) {
+ APInt midpoint((nextSquare - square).udiv(two));
+ APInt offset(*this - square);
+ if (offset.ult(midpoint))
+ return x_old;
+ else
+ return x_old + 1;
+ } else
+ assert(0 && "Error in APInt::sqrt computation");
+ return x_old + 1;
+}
+
+/// Computes the multiplicative inverse of this APInt for a given modulo. The
+/// iterative extended Euclidean algorithm is used to solve for this value,
+/// however we simplify it to speed up calculating only the inverse, and take
+/// advantage of div+rem calculations. We also use some tricks to avoid copying
+/// (potentially large) APInts around.
+APInt APInt::multiplicativeInverse(const APInt& modulo) const {
+ assert(ult(modulo) && "This APInt must be smaller than the modulo");
+
+ // Using the properties listed at the following web page (accessed 06/21/08):
+ // http://www.numbertheory.org/php/euclid.html
+ // (especially the properties numbered 3, 4 and 9) it can be proved that
+ // BitWidth bits suffice for all the computations in the algorithm implemented
+ // below. More precisely, this number of bits suffice if the multiplicative
+ // inverse exists, but may not suffice for the general extended Euclidean
+ // algorithm.
+
+ APInt r[2] = { modulo, *this };
+ APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
+ APInt q(BitWidth, 0);
+
+ unsigned i;
+ for (i = 0; r[i^1] != 0; i ^= 1) {
+ // An overview of the math without the confusing bit-flipping:
+ // q = r[i-2] / r[i-1]
+ // r[i] = r[i-2] % r[i-1]
+ // t[i] = t[i-2] - t[i-1] * q
+ udivrem(r[i], r[i^1], q, r[i]);
+ t[i] -= t[i^1] * q;
+ }
+
+ // If this APInt and the modulo are not coprime, there is no multiplicative
+ // inverse, so return 0. We check this by looking at the next-to-last
+ // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
+ // algorithm.
+ if (r[i] != 1)
+ return APInt(BitWidth, 0);
+
+ // The next-to-last t is the multiplicative inverse. However, we are
+ // interested in a positive inverse. Calcuate a positive one from a negative
+ // one if necessary. A simple addition of the modulo suffices because
+ // abs(t[i]) is known to be less than *this/2 (see the link above).
+ return t[i].isNegative() ? t[i] + modulo : t[i];
+}
+
+/// Calculate the magic numbers required to implement a signed integer division
+/// by a constant as a sequence of multiplies, adds and shifts. Requires that
+/// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S.
+/// Warren, Jr., chapter 10.
+APInt::ms APInt::magic() const {
+ const APInt& d = *this;
+ unsigned p;
+ APInt ad, anc, delta, q1, r1, q2, r2, t;
+ APInt allOnes = APInt::getAllOnesValue(d.getBitWidth());
+ APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
+ APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
+ struct ms mag;
+
+ ad = d.abs();
+ t = signedMin + (d.lshr(d.getBitWidth() - 1));
+ anc = t - 1 - t.urem(ad); // absolute value of nc
+ p = d.getBitWidth() - 1; // initialize p
+ q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc)
+ r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc))
+ q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d)
+ r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d))
+ do {
+ p = p + 1;
+ q1 = q1<<1; // update q1 = 2p/abs(nc)
+ r1 = r1<<1; // update r1 = rem(2p/abs(nc))
+ if (r1.uge(anc)) { // must be unsigned comparison
+ q1 = q1 + 1;
+ r1 = r1 - anc;
+ }
+ q2 = q2<<1; // update q2 = 2p/abs(d)
+ r2 = r2<<1; // update r2 = rem(2p/abs(d))
+ if (r2.uge(ad)) { // must be unsigned comparison
+ q2 = q2 + 1;
+ r2 = r2 - ad;
+ }
+ delta = ad - r2;
+ } while (q1.ule(delta) || (q1 == delta && r1 == 0));
+
+ mag.m = q2 + 1;
+ if (d.isNegative()) mag.m = -mag.m; // resulting magic number
+ mag.s = p - d.getBitWidth(); // resulting shift
+ return mag;
+}
+
+/// Calculate the magic numbers required to implement an unsigned integer
+/// division by a constant as a sequence of multiplies, adds and shifts.
+/// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry
+/// S. Warren, Jr., chapter 10.
+APInt::mu APInt::magicu() const {
+ const APInt& d = *this;
+ unsigned p;
+ APInt nc, delta, q1, r1, q2, r2;
+ struct mu magu;
+ magu.a = 0; // initialize "add" indicator
+ APInt allOnes = APInt::getAllOnesValue(d.getBitWidth());
+ APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
+ APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
+
+ nc = allOnes - (-d).urem(d);
+ p = d.getBitWidth() - 1; // initialize p
+ q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc
+ r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc)
+ q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d
+ r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d)
+ do {
+ p = p + 1;
+ if (r1.uge(nc - r1)) {
+ q1 = q1 + q1 + 1; // update q1
+ r1 = r1 + r1 - nc; // update r1
+ }
+ else {
+ q1 = q1+q1; // update q1
+ r1 = r1+r1; // update r1
+ }
+ if ((r2 + 1).uge(d - r2)) {
+ if (q2.uge(signedMax)) magu.a = 1;
+ q2 = q2+q2 + 1; // update q2
+ r2 = r2+r2 + 1 - d; // update r2
+ }
+ else {
+ if (q2.uge(signedMin)) magu.a = 1;
+ q2 = q2+q2; // update q2
+ r2 = r2+r2 + 1; // update r2
+ }
+ delta = d - 1 - r2;
+ } while (p < d.getBitWidth()*2 &&
+ (q1.ult(delta) || (q1 == delta && r1 == 0)));
+ magu.m = q2 + 1; // resulting magic number
+ magu.s = p - d.getBitWidth(); // resulting shift
+ return magu;
+}
+
+/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
+/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
+/// variables here have the same names as in the algorithm. Comments explain
+/// the algorithm and any deviation from it.
+static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
+ unsigned m, unsigned n) {
+ assert(u && "Must provide dividend");
+ assert(v && "Must provide divisor");
+ assert(q && "Must provide quotient");
+ assert(u != v && u != q && v != q && "Must us different memory");
+ assert(n>1 && "n must be > 1");
+
+ // Knuth uses the value b as the base of the number system. In our case b
+ // is 2^31 so we just set it to -1u.
+ uint64_t b = uint64_t(1) << 32;
+
+#if 0
+ DEBUG(cerr << "KnuthDiv: m=" << m << " n=" << n << '\n');
+ DEBUG(cerr << "KnuthDiv: original:");
+ DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) << u[i]);
+ DEBUG(cerr << " by");
+ DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) << v[i-1]);
+ DEBUG(cerr << '\n');
+#endif
+ // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
+ // u and v by d. Note that we have taken Knuth's advice here to use a power
+ // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
+ // 2 allows us to shift instead of multiply and it is easy to determine the
+ // shift amount from the leading zeros. We are basically normalizing the u
+ // and v so that its high bits are shifted to the top of v's range without
+ // overflow. Note that this can require an extra word in u so that u must
+ // be of length m+n+1.
+ unsigned shift = CountLeadingZeros_32(v[n-1]);
+ unsigned v_carry = 0;
+ unsigned u_carry = 0;
+ if (shift) {
+ for (unsigned i = 0; i < m+n; ++i) {
+ unsigned u_tmp = u[i] >> (32 - shift);
+ u[i] = (u[i] << shift) | u_carry;
+ u_carry = u_tmp;
+ }
+ for (unsigned i = 0; i < n; ++i) {
+ unsigned v_tmp = v[i] >> (32 - shift);
+ v[i] = (v[i] << shift) | v_carry;
+ v_carry = v_tmp;
+ }
+ }
+ u[m+n] = u_carry;
+#if 0
+ DEBUG(cerr << "KnuthDiv: normal:");
+ DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) << u[i]);
+ DEBUG(cerr << " by");
+ DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) << v[i-1]);
+ DEBUG(cerr << '\n');
+#endif
+
+ // D2. [Initialize j.] Set j to m. This is the loop counter over the places.
+ int j = m;
+ do {
+ DEBUG(cerr << "KnuthDiv: quotient digit #" << j << '\n');
+ // D3. [Calculate q'.].
+ // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
+ // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
+ // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
+ // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
+ // on v[n-2] determines at high speed most of the cases in which the trial
+ // value qp is one too large, and it eliminates all cases where qp is two
+ // too large.
+ uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
+ DEBUG(cerr << "KnuthDiv: dividend == " << dividend << '\n');
+ uint64_t qp = dividend / v[n-1];
+ uint64_t rp = dividend % v[n-1];
+ if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
+ qp--;
+ rp += v[n-1];
+ if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
+ qp--;
+ }
+ DEBUG(cerr << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
+
+ // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
+ // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
+ // consists of a simple multiplication by a one-place number, combined with
+ // a subtraction.
+ bool isNeg = false;
+ for (unsigned i = 0; i < n; ++i) {
+ uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
+ uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
+ bool borrow = subtrahend > u_tmp;
+ DEBUG(cerr << "KnuthDiv: u_tmp == " << u_tmp
+ << ", subtrahend == " << subtrahend
+ << ", borrow = " << borrow << '\n');
+
+ uint64_t result = u_tmp - subtrahend;
+ unsigned k = j + i;
+ u[k++] = (unsigned)(result & (b-1)); // subtract low word
+ u[k++] = (unsigned)(result >> 32); // subtract high word
+ while (borrow && k <= m+n) { // deal with borrow to the left
+ borrow = u[k] == 0;
+ u[k]--;
+ k++;
+ }
+ isNeg |= borrow;
+ DEBUG(cerr << "KnuthDiv: u[j+i] == " << u[j+i] << ", u[j+i+1] == " <<
+ u[j+i+1] << '\n');
+ }
+ DEBUG(cerr << "KnuthDiv: after subtraction:");
+ DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]);
+ DEBUG(cerr << '\n');
+ // The digits (u[j+n]...u[j]) should be kept positive; if the result of
+ // this step is actually negative, (u[j+n]...u[j]) should be left as the
+ // true value plus b**(n+1), namely as the b's complement of
+ // the true value, and a "borrow" to the left should be remembered.
+ //
+ if (isNeg) {
+ bool carry = true; // true because b's complement is "complement + 1"
+ for (unsigned i = 0; i <= m+n; ++i) {
+ u[i] = ~u[i] + carry; // b's complement
+ carry = carry && u[i] == 0;
+ }
+ }
+ DEBUG(cerr << "KnuthDiv: after complement:");
+ DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]);
+ DEBUG(cerr << '\n');
+
+ // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
+ // negative, go to step D6; otherwise go on to step D7.
+ q[j] = (unsigned)qp;
+ if (isNeg) {
+ // D6. [Add back]. The probability that this step is necessary is very
+ // small, on the order of only 2/b. Make sure that test data accounts for
+ // this possibility. Decrease q[j] by 1
+ q[j]--;
+ // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
+ // A carry will occur to the left of u[j+n], and it should be ignored
+ // since it cancels with the borrow that occurred in D4.
+ bool carry = false;
+ for (unsigned i = 0; i < n; i++) {
+ unsigned limit = std::min(u[j+i],v[i]);
+ u[j+i] += v[i] + carry;
+ carry = u[j+i] < limit || (carry && u[j+i] == limit);
+ }
+ u[j+n] += carry;
+ }
+ DEBUG(cerr << "KnuthDiv: after correction:");
+ DEBUG(for (int i = m+n; i >=0; i--) cerr <<" " << u[i]);
+ DEBUG(cerr << "\nKnuthDiv: digit result = " << q[j] << '\n');
+
+ // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3.
+ } while (--j >= 0);
+
+ DEBUG(cerr << "KnuthDiv: quotient:");
+ DEBUG(for (int i = m; i >=0; i--) cerr <<" " << q[i]);
+ DEBUG(cerr << '\n');
+
+ // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
+ // remainder may be obtained by dividing u[...] by d. If r is non-null we
+ // compute the remainder (urem uses this).
+ if (r) {
+ // The value d is expressed by the "shift" value above since we avoided
+ // multiplication by d by using a shift left. So, all we have to do is
+ // shift right here. In order to mak
+ if (shift) {
+ unsigned carry = 0;
+ DEBUG(cerr << "KnuthDiv: remainder:");
+ for (int i = n-1; i >= 0; i--) {
+ r[i] = (u[i] >> shift) | carry;
+ carry = u[i] << (32 - shift);
+ DEBUG(cerr << " " << r[i]);
+ }
+ } else {
+ for (int i = n-1; i >= 0; i--) {
+ r[i] = u[i];
+ DEBUG(cerr << " " << r[i]);
+ }
+ }
+ DEBUG(cerr << '\n');
+ }
+#if 0
+ DEBUG(cerr << std::setbase(10) << '\n');
+#endif
+}
+
+void APInt::divide(const APInt LHS, unsigned lhsWords,
+ const APInt &RHS, unsigned rhsWords,
+ APInt *Quotient, APInt *Remainder)
+{
+ assert(lhsWords >= rhsWords && "Fractional result");
+
+ // First, compose the values into an array of 32-bit words instead of
+ // 64-bit words. This is a necessity of both the "short division" algorithm
+ // and the the Knuth "classical algorithm" which requires there to be native
+ // operations for +, -, and * on an m bit value with an m*2 bit result. We
+ // can't use 64-bit operands here because we don't have native results of
+ // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
+ // work on large-endian machines.
+ uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
+ unsigned n = rhsWords * 2;
+ unsigned m = (lhsWords * 2) - n;
+
+ // Allocate space for the temporary values we need either on the stack, if
+ // it will fit, or on the heap if it won't.
+ unsigned SPACE[128];
+ unsigned *U = 0;
+ unsigned *V = 0;
+ unsigned *Q = 0;
+ unsigned *R = 0;
+ if ((Remainder?4:3)*n+2*m+1 <= 128) {
+ U = &SPACE[0];
+ V = &SPACE[m+n+1];
+ Q = &SPACE[(m+n+1) + n];
+ if (Remainder)
+ R = &SPACE[(m+n+1) + n + (m+n)];
+ } else {
+ U = new unsigned[m + n + 1];
+ V = new unsigned[n];
+ Q = new unsigned[m+n];
+ if (Remainder)
+ R = new unsigned[n];
+ }
+
+ // Initialize the dividend
+ memset(U, 0, (m+n+1)*sizeof(unsigned));
+ for (unsigned i = 0; i < lhsWords; ++i) {
+ uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
+ U[i * 2] = (unsigned)(tmp & mask);
+ U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
+ }
+ U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
+
+ // Initialize the divisor
+ memset(V, 0, (n)*sizeof(unsigned));
+ for (unsigned i = 0; i < rhsWords; ++i) {
+ uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
+ V[i * 2] = (unsigned)(tmp & mask);
+ V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
+ }
+
+ // initialize the quotient and remainder
+ memset(Q, 0, (m+n) * sizeof(unsigned));
+ if (Remainder)
+ memset(R, 0, n * sizeof(unsigned));
+
+ // Now, adjust m and n for the Knuth division. n is the number of words in
+ // the divisor. m is the number of words by which the dividend exceeds the
+ // divisor (i.e. m+n is the length of the dividend). These sizes must not
+ // contain any zero words or the Knuth algorithm fails.
+ for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
+ n--;
+ m++;
+ }
+ for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
+ m--;
+
+ // If we're left with only a single word for the divisor, Knuth doesn't work
+ // so we implement the short division algorithm here. This is much simpler
+ // and faster because we are certain that we can divide a 64-bit quantity
+ // by a 32-bit quantity at hardware speed and short division is simply a
+ // series of such operations. This is just like doing short division but we
+ // are using base 2^32 instead of base 10.
+ assert(n != 0 && "Divide by zero?");
+ if (n == 1) {
+ unsigned divisor = V[0];
+ unsigned remainder = 0;
+ for (int i = m+n-1; i >= 0; i--) {
+ uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
+ if (partial_dividend == 0) {
+ Q[i] = 0;
+ remainder = 0;
+ } else if (partial_dividend < divisor) {
+ Q[i] = 0;
+ remainder = (unsigned)partial_dividend;
+ } else if (partial_dividend == divisor) {
+ Q[i] = 1;
+ remainder = 0;
+ } else {
+ Q[i] = (unsigned)(partial_dividend / divisor);
+ remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
+ }
+ }
+ if (R)
+ R[0] = remainder;
+ } else {
+ // Now we're ready to invoke the Knuth classical divide algorithm. In this
+ // case n > 1.
+ KnuthDiv(U, V, Q, R, m, n);
+ }
+
+ // If the caller wants the quotient
+ if (Quotient) {
+ // Set up the Quotient value's memory.
+ if (Quotient->BitWidth != LHS.BitWidth) {
+ if (Quotient->isSingleWord())
+ Quotient->VAL = 0;
+ else
+ delete [] Quotient->pVal;
+ Quotient->BitWidth = LHS.BitWidth;
+ if (!Quotient->isSingleWord())
+ Quotient->pVal = getClearedMemory(Quotient->getNumWords());
+ } else
+ Quotient->clear();
+
+ // The quotient is in Q. Reconstitute the quotient into Quotient's low
+ // order words.
+ if (lhsWords == 1) {
+ uint64_t tmp =
+ uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
+ if (Quotient->isSingleWord())
+ Quotient->VAL = tmp;
+ else
+ Quotient->pVal[0] = tmp;
+ } else {
+ assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
+ for (unsigned i = 0; i < lhsWords; ++i)
+ Quotient->pVal[i] =
+ uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
+ }
+ }
+
+ // If the caller wants the remainder
+ if (Remainder) {
+ // Set up the Remainder value's memory.
+ if (Remainder->BitWidth != RHS.BitWidth) {
+ if (Remainder->isSingleWord())
+ Remainder->VAL = 0;
+ else
+ delete [] Remainder->pVal;
+ Remainder->BitWidth = RHS.BitWidth;
+ if (!Remainder->isSingleWord())
+ Remainder->pVal = getClearedMemory(Remainder->getNumWords());
+ } else
+ Remainder->clear();
+
+ // The remainder is in R. Reconstitute the remainder into Remainder's low
+ // order words.
+ if (rhsWords == 1) {
+ uint64_t tmp =
+ uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
+ if (Remainder->isSingleWord())
+ Remainder->VAL = tmp;
+ else
+ Remainder->pVal[0] = tmp;
+ } else {
+ assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
+ for (unsigned i = 0; i < rhsWords; ++i)
+ Remainder->pVal[i] =
+ uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
+ }
+ }
+
+ // Clean up the memory we allocated.
+ if (U != &SPACE[0]) {
+ delete [] U;
+ delete [] V;
+ delete [] Q;
+ delete [] R;
+ }
+}
+
+APInt APInt::udiv(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+
+ // First, deal with the easy case
+ if (isSingleWord()) {
+ assert(RHS.VAL != 0 && "Divide by zero?");
+ return APInt(BitWidth, VAL / RHS.VAL);
+ }
+
+ // Get some facts about the LHS and RHS number of bits and words
+ unsigned rhsBits = RHS.getActiveBits();
+ unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
+ assert(rhsWords && "Divided by zero???");
+ unsigned lhsBits = this->getActiveBits();
+ unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
+
+ // Deal with some degenerate cases
+ if (!lhsWords)
+ // 0 / X ===> 0
+ return APInt(BitWidth, 0);
+ else if (lhsWords < rhsWords || this->ult(RHS)) {
+ // X / Y ===> 0, iff X < Y
+ return APInt(BitWidth, 0);
+ } else if (*this == RHS) {
+ // X / X ===> 1
+ return APInt(BitWidth, 1);
+ } else if (lhsWords == 1 && rhsWords == 1) {
+ // All high words are zero, just use native divide
+ return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
+ }
+
+ // We have to compute it the hard way. Invoke the Knuth divide algorithm.
+ APInt Quotient(1,0); // to hold result.
+ divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0);
+ return Quotient;
+}
+
+APInt APInt::urem(const APInt& RHS) const {
+ assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+ if (isSingleWord()) {
+ assert(RHS.VAL != 0 && "Remainder by zero?");
+ return APInt(BitWidth, VAL % RHS.VAL);
+ }
+
+ // Get some facts about the LHS
+ unsigned lhsBits = getActiveBits();
+ unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
+
+ // Get some facts about the RHS
+ unsigned rhsBits = RHS.getActiveBits();
+ unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
+ assert(rhsWords && "Performing remainder operation by zero ???");
+
+ // Check the degenerate cases
+ if (lhsWords == 0) {
+ // 0 % Y ===> 0
+ return APInt(BitWidth, 0);
+ } else if (lhsWords < rhsWords || this->ult(RHS)) {
+ // X % Y ===> X, iff X < Y
+ return *this;
+ } else if (*this == RHS) {
+ // X % X == 0;
+ return APInt(BitWidth, 0);
+ } else if (lhsWords == 1) {
+ // All high words are zero, just use native remainder
+ return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
+ }
+
+ // We have to compute it the hard way. Invoke the Knuth divide algorithm.
+ APInt Remainder(1,0);
+ divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder);
+ return Remainder;
+}
+
+void APInt::udivrem(const APInt &LHS, const APInt &RHS,
+ APInt &Quotient, APInt &Remainder) {
+ // Get some size facts about the dividend and divisor
+ unsigned lhsBits = LHS.getActiveBits();
+ unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
+ unsigned rhsBits = RHS.getActiveBits();
+ unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
+
+ // Check the degenerate cases
+ if (lhsWords == 0) {
+ Quotient = 0; // 0 / Y ===> 0
+ Remainder = 0; // 0 % Y ===> 0
+ return;
+ }
+
+ if (lhsWords < rhsWords || LHS.ult(RHS)) {
+ Quotient = 0; // X / Y ===> 0, iff X < Y
+ Remainder = LHS; // X % Y ===> X, iff X < Y
+ return;
+ }
+
+ if (LHS == RHS) {
+ Quotient = 1; // X / X ===> 1
+ Remainder = 0; // X % X ===> 0;
+ return;
+ }
+
+ if (lhsWords == 1 && rhsWords == 1) {
+ // There is only one word to consider so use the native versions.
+ uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
+ uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
+ Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
+ Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
+ return;
+ }
+
+ // Okay, lets do it the long way
+ divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
+}
+
+void APInt::fromString(unsigned numbits, const char *str, unsigned slen,
+ uint8_t radix) {
+ // Check our assumptions here
+ assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
+ "Radix should be 2, 8, 10, or 16!");
+ assert(str && "String is null?");
+ bool isNeg = str[0] == '-';
+ if (isNeg)
+ str++, slen--;
+ assert((slen <= numbits || radix != 2) && "Insufficient bit width");
+ assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
+ assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
+ assert((((slen-1)*64)/22 <= numbits || radix != 10) && "Insufficient bit width");
+
+ // Allocate memory
+ if (!isSingleWord())
+ pVal = getClearedMemory(getNumWords());
+
+ // Figure out if we can shift instead of multiply
+ unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
+
+ // Set up an APInt for the digit to add outside the loop so we don't
+ // constantly construct/destruct it.
+ APInt apdigit(getBitWidth(), 0);
+ APInt apradix(getBitWidth(), radix);
+
+ // Enter digit traversal loop
+ for (unsigned i = 0; i < slen; i++) {
+ // Get a digit
+ unsigned digit = 0;
+ char cdigit = str[i];
+ if (radix == 16) {
+ if (!isxdigit(cdigit))
+ assert(0 && "Invalid hex digit in string");
+ if (isdigit(cdigit))
+ digit = cdigit - '0';
+ else if (cdigit >= 'a')
+ digit = cdigit - 'a' + 10;
+ else if (cdigit >= 'A')
+ digit = cdigit - 'A' + 10;
+ else
+ assert(0 && "huh? we shouldn't get here");
+ } else if (isdigit(cdigit)) {
+ digit = cdigit - '0';
+ assert((radix == 10 ||
+ (radix == 8 && digit != 8 && digit != 9) ||
+ (radix == 2 && (digit == 0 || digit == 1))) &&
+ "Invalid digit in string for given radix");
+ } else {
+ assert(0 && "Invalid character in digit string");
+ }
+
+ // Shift or multiply the value by the radix
+ if (slen > 1) {
+ if (shift)
+ *this <<= shift;
+ else
+ *this *= apradix;
+ }
+
+ // Add in the digit we just interpreted
+ if (apdigit.isSingleWord())
+ apdigit.VAL = digit;
+ else
+ apdigit.pVal[0] = digit;
+ *this += apdigit;
+ }
+ // If its negative, put it in two's complement form
+ if (isNeg) {
+ (*this)--;
+ this->flip();
+ }
+}
+
+void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
+ bool Signed) const {
+ assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
+ "Radix should be 2, 8, 10, or 16!");
+
+ // First, check for a zero value and just short circuit the logic below.
+ if (*this == 0) {
+ Str.push_back('0');
+ return;
+ }
+
+ static const char Digits[] = "0123456789ABCDEF";
+
+ if (isSingleWord()) {
+ char Buffer[65];
+ char *BufPtr = Buffer+65;
+
+ uint64_t N;
+ if (Signed) {
+ int64_t I = getSExtValue();
+ if (I < 0) {
+ Str.push_back('-');
+ I = -I;
+ }
+ N = I;
+ } else {
+ N = getZExtValue();
+ }
+
+ while (N) {
+ *--BufPtr = Digits[N % Radix];
+ N /= Radix;
+ }
+ Str.append(BufPtr, Buffer+65);
+ return;
+ }
+
+ APInt Tmp(*this);
+
+ if (Signed && isNegative()) {
+ // They want to print the signed version and it is a negative value
+ // Flip the bits and add one to turn it into the equivalent positive
+ // value and put a '-' in the result.
+ Tmp.flip();
+ Tmp++;
+ Str.push_back('-');
+ }
+
+ // We insert the digits backward, then reverse them to get the right order.
+ unsigned StartDig = Str.size();
+
+ // For the 2, 8 and 16 bit cases, we can just shift instead of divide
+ // because the number of bits per digit (1, 3 and 4 respectively) divides
+ // equaly. We just shift until the value is zero.
+ if (Radix != 10) {
+ // Just shift tmp right for each digit width until it becomes zero
+ unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
+ unsigned MaskAmt = Radix - 1;
+
+ while (Tmp != 0) {
+ unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
+ Str.push_back(Digits[Digit]);
+ Tmp = Tmp.lshr(ShiftAmt);
+ }
+ } else {
+ APInt divisor(4, 10);
+ while (Tmp != 0) {
+ APInt APdigit(1, 0);
+ APInt tmp2(Tmp.getBitWidth(), 0);
+ divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
+ &APdigit);
+ unsigned Digit = (unsigned)APdigit.getZExtValue();
+ assert(Digit < Radix && "divide failed");
+ Str.push_back(Digits[Digit]);
+ Tmp = tmp2;
+ }
+ }
+
+ // Reverse the digits before returning.
+ std::reverse(Str.begin()+StartDig, Str.end());
+}
+
+/// toString - This returns the APInt as a std::string. Note that this is an
+/// inefficient method. It is better to pass in a SmallVector/SmallString
+/// to the methods above.
+std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
+ SmallString<40> S;
+ toString(S, Radix, Signed);
+ return S.c_str();
+}
+
+
+void APInt::dump() const {
+ SmallString<40> S, U;
+ this->toStringUnsigned(U);
+ this->toStringSigned(S);
+ fprintf(stderr, "APInt(%db, %su %ss)", BitWidth, U.c_str(), S.c_str());
+}
+
+void APInt::print(raw_ostream &OS, bool isSigned) const {
+ SmallString<40> S;
+ this->toString(S, 10, isSigned);
+ OS << S.c_str();
+}
+
+// This implements a variety of operations on a representation of
+// arbitrary precision, two's-complement, bignum integer values.
+
+/* Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe
+ and unrestricting assumption. */
+#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
+COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0);
+
+/* Some handy functions local to this file. */
+namespace {
+
+ /* Returns the integer part with the least significant BITS set.
+ BITS cannot be zero. */
+ static inline integerPart
+ lowBitMask(unsigned int bits)
+ {
+ assert (bits != 0 && bits <= integerPartWidth);
+
+ return ~(integerPart) 0 >> (integerPartWidth - bits);
+ }
+
+ /* Returns the value of the lower half of PART. */
+ static inline integerPart
+ lowHalf(integerPart part)
+ {
+ return part & lowBitMask(integerPartWidth / 2);
+ }
+
+ /* Returns the value of the upper half of PART. */
+ static inline integerPart
+ highHalf(integerPart part)
+ {
+ return part >> (integerPartWidth / 2);
+ }
+
+ /* Returns the bit number of the most significant set bit of a part.
+ If the input number has no bits set -1U is returned. */
+ static unsigned int
+ partMSB(integerPart value)
+ {
+ unsigned int n, msb;
+
+ if (value == 0)
+ return -1U;
+
+ n = integerPartWidth / 2;
+
+ msb = 0;
+ do {
+ if (value >> n) {
+ value >>= n;
+ msb += n;
+ }
+
+ n >>= 1;
+ } while (n);
+
+ return msb;
+ }
+
+ /* Returns the bit number of the least significant set bit of a
+ part. If the input number has no bits set -1U is returned. */
+ static unsigned int
+ partLSB(integerPart value)
+ {
+ unsigned int n, lsb;
+
+ if (value == 0)
+ return -1U;
+
+ lsb = integerPartWidth - 1;
+ n = integerPartWidth / 2;
+
+ do {
+ if (value << n) {
+ value <<= n;
+ lsb -= n;
+ }
+
+ n >>= 1;
+ } while (n);
+
+ return lsb;
+ }
+}
+
+/* Sets the least significant part of a bignum to the input value, and
+ zeroes out higher parts. */
+void
+APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
+{
+ unsigned int i;
+
+ assert (parts > 0);
+
+ dst[0] = part;
+ for(i = 1; i < parts; i++)
+ dst[i] = 0;
+}
+
+/* Assign one bignum to another. */
+void
+APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
+{
+ unsigned int i;
+
+ for(i = 0; i < parts; i++)
+ dst[i] = src[i];
+}
+
+/* Returns true if a bignum is zero, false otherwise. */
+bool
+APInt::tcIsZero(const integerPart *src, unsigned int parts)
+{
+ unsigned int i;
+
+ for(i = 0; i < parts; i++)
+ if (src[i])
+ return false;
+
+ return true;
+}
+
+/* Extract the given bit of a bignum; returns 0 or 1. */
+int
+APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
+{
+ return(parts[bit / integerPartWidth]
+ & ((integerPart) 1 << bit % integerPartWidth)) != 0;
+}
+
+/* Set the given bit of a bignum. */
+void
+APInt::tcSetBit(integerPart *parts, unsigned int bit)
+{
+ parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
+}
+
+/* Returns the bit number of the least significant set bit of a
+ number. If the input number has no bits set -1U is returned. */
+unsigned int
+APInt::tcLSB(const integerPart *parts, unsigned int n)
+{
+ unsigned int i, lsb;
+
+ for(i = 0; i < n; i++) {
+ if (parts[i] != 0) {
+ lsb = partLSB(parts[i]);
+
+ return lsb + i * integerPartWidth;
+ }
+ }
+
+ return -1U;
+}
+
+/* Returns the bit number of the most significant set bit of a number.
+ If the input number has no bits set -1U is returned. */
+unsigned int
+APInt::tcMSB(const integerPart *parts, unsigned int n)
+{
+ unsigned int msb;
+
+ do {
+ --n;
+
+ if (parts[n] != 0) {
+ msb = partMSB(parts[n]);
+
+ return msb + n * integerPartWidth;
+ }
+ } while (n);
+
+ return -1U;
+}
+
+/* Copy the bit vector of width srcBITS from SRC, starting at bit
+ srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
+ the least significant bit of DST. All high bits above srcBITS in
+ DST are zero-filled. */
+void
+APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src,
+ unsigned int srcBits, unsigned int srcLSB)
+{
+ unsigned int firstSrcPart, dstParts, shift, n;
+
+ dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
+ assert (dstParts <= dstCount);
+
+ firstSrcPart = srcLSB / integerPartWidth;
+ tcAssign (dst, src + firstSrcPart, dstParts);
+
+ shift = srcLSB % integerPartWidth;
+ tcShiftRight (dst, dstParts, shift);
+
+ /* We now have (dstParts * integerPartWidth - shift) bits from SRC
+ in DST. If this is less that srcBits, append the rest, else
+ clear the high bits. */
+ n = dstParts * integerPartWidth - shift;
+ if (n < srcBits) {
+ integerPart mask = lowBitMask (srcBits - n);
+ dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
+ << n % integerPartWidth);
+ } else if (n > srcBits) {
+ if (srcBits % integerPartWidth)
+ dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth);
+ }
+
+ /* Clear high parts. */
+ while (dstParts < dstCount)
+ dst[dstParts++] = 0;
+}
+
+/* DST += RHS + C where C is zero or one. Returns the carry flag. */
+integerPart
+APInt::tcAdd(integerPart *dst, const integerPart *rhs,
+ integerPart c, unsigned int parts)
+{
+ unsigned int i;
+
+ assert(c <= 1);
+
+ for(i = 0; i < parts; i++) {
+ integerPart l;
+
+ l = dst[i];
+ if (c) {
+ dst[i] += rhs[i] + 1;
+ c = (dst[i] <= l);
+ } else {
+ dst[i] += rhs[i];
+ c = (dst[i] < l);
+ }
+ }
+
+ return c;
+}
+
+/* DST -= RHS + C where C is zero or one. Returns the carry flag. */
+integerPart
+APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
+ integerPart c, unsigned int parts)
+{
+ unsigned int i;
+
+ assert(c <= 1);
+
+ for(i = 0; i < parts; i++) {
+ integerPart l;
+
+ l = dst[i];
+ if (c) {
+ dst[i] -= rhs[i] + 1;
+ c = (dst[i] >= l);
+ } else {
+ dst[i] -= rhs[i];
+ c = (dst[i] > l);
+ }
+ }
+
+ return c;
+}
+
+/* Negate a bignum in-place. */
+void
+APInt::tcNegate(integerPart *dst, unsigned int parts)
+{
+ tcComplement(dst, parts);
+ tcIncrement(dst, parts);
+}
+
+/* DST += SRC * MULTIPLIER + CARRY if add is true
+ DST = SRC * MULTIPLIER + CARRY if add is false
+
+ Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC
+ they must start at the same point, i.e. DST == SRC.
+
+ If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
+ returned. Otherwise DST is filled with the least significant
+ DSTPARTS parts of the result, and if all of the omitted higher
+ parts were zero return zero, otherwise overflow occurred and
+ return one. */
+int
+APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
+ integerPart multiplier, integerPart carry,
+ unsigned int srcParts, unsigned int dstParts,
+ bool add)
+{
+ unsigned int i, n;
+
+ /* Otherwise our writes of DST kill our later reads of SRC. */
+ assert(dst <= src || dst >= src + srcParts);
+ assert(dstParts <= srcParts + 1);
+
+ /* N loops; minimum of dstParts and srcParts. */
+ n = dstParts < srcParts ? dstParts: srcParts;
+
+ for(i = 0; i < n; i++) {
+ integerPart low, mid, high, srcPart;
+
+ /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
+
+ This cannot overflow, because
+
+ (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
+
+ which is less than n^2. */
+
+ srcPart = src[i];
+
+ if (multiplier == 0 || srcPart == 0) {
+ low = carry;
+ high = 0;
+ } else {
+ low = lowHalf(srcPart) * lowHalf(multiplier);
+ high = highHalf(srcPart) * highHalf(multiplier);
+
+ mid = lowHalf(srcPart) * highHalf(multiplier);
+ high += highHalf(mid);
+ mid <<= integerPartWidth / 2;
+ if (low + mid < low)
+ high++;
+ low += mid;
+
+ mid = highHalf(srcPart) * lowHalf(multiplier);
+ high += highHalf(mid);
+ mid <<= integerPartWidth / 2;
+ if (low + mid < low)
+ high++;
+ low += mid;
+
+ /* Now add carry. */
+ if (low + carry < low)
+ high++;
+ low += carry;
+ }
+
+ if (add) {
+ /* And now DST[i], and store the new low part there. */
+ if (low + dst[i] < low)
+ high++;
+ dst[i] += low;
+ } else
+ dst[i] = low;
+
+ carry = high;
+ }
+
+ if (i < dstParts) {
+ /* Full multiplication, there is no overflow. */
+ assert(i + 1 == dstParts);
+ dst[i] = carry;
+ return 0;
+ } else {
+ /* We overflowed if there is carry. */
+ if (carry)
+ return 1;
+
+ /* We would overflow if any significant unwritten parts would be
+ non-zero. This is true if any remaining src parts are non-zero
+ and the multiplier is non-zero. */
+ if (multiplier)
+ for(; i < srcParts; i++)
+ if (src[i])
+ return 1;
+
+ /* We fitted in the narrow destination. */
+ return 0;
+ }
+}
+
+/* DST = LHS * RHS, where DST has the same width as the operands and
+ is filled with the least significant parts of the result. Returns
+ one if overflow occurred, otherwise zero. DST must be disjoint
+ from both operands. */
+int
+APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
+ const integerPart *rhs, unsigned int parts)
+{
+ unsigned int i;
+ int overflow;
+
+ assert(dst != lhs && dst != rhs);
+
+ overflow = 0;
+ tcSet(dst, 0, parts);
+
+ for(i = 0; i < parts; i++)
+ overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
+ parts - i, true);
+
+ return overflow;
+}
+
+/* DST = LHS * RHS, where DST has width the sum of the widths of the
+ operands. No overflow occurs. DST must be disjoint from both
+ operands. Returns the number of parts required to hold the
+ result. */
+unsigned int
+APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
+ const integerPart *rhs, unsigned int lhsParts,
+ unsigned int rhsParts)
+{
+ /* Put the narrower number on the LHS for less loops below. */
+ if (lhsParts > rhsParts) {
+ return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
+ } else {
+ unsigned int n;
+
+ assert(dst != lhs && dst != rhs);
+
+ tcSet(dst, 0, rhsParts);
+
+ for(n = 0; n < lhsParts; n++)
+ tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
+
+ n = lhsParts + rhsParts;
+
+ return n - (dst[n - 1] == 0);
+ }
+}
+
+/* If RHS is zero LHS and REMAINDER are left unchanged, return one.
+ Otherwise set LHS to LHS / RHS with the fractional part discarded,
+ set REMAINDER to the remainder, return zero. i.e.
+
+ OLD_LHS = RHS * LHS + REMAINDER
+
+ SCRATCH is a bignum of the same size as the operands and result for
+ use by the routine; its contents need not be initialized and are
+ destroyed. LHS, REMAINDER and SCRATCH must be distinct.
+*/
+int
+APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
+ integerPart *remainder, integerPart *srhs,
+ unsigned int parts)
+{
+ unsigned int n, shiftCount;
+ integerPart mask;
+
+ assert(lhs != remainder && lhs != srhs && remainder != srhs);
+
+ shiftCount = tcMSB(rhs, parts) + 1;
+ if (shiftCount == 0)
+ return true;
+
+ shiftCount = parts * integerPartWidth - shiftCount;
+ n = shiftCount / integerPartWidth;
+ mask = (integerPart) 1 << (shiftCount % integerPartWidth);
+
+ tcAssign(srhs, rhs, parts);
+ tcShiftLeft(srhs, parts, shiftCount);
+ tcAssign(remainder, lhs, parts);
+ tcSet(lhs, 0, parts);
+
+ /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
+ the total. */
+ for(;;) {
+ int compare;
+
+ compare = tcCompare(remainder, srhs, parts);
+ if (compare >= 0) {
+ tcSubtract(remainder, srhs, 0, parts);
+ lhs[n] |= mask;
+ }
+
+ if (shiftCount == 0)
+ break;
+ shiftCount--;
+ tcShiftRight(srhs, parts, 1);
+ if ((mask >>= 1) == 0)
+ mask = (integerPart) 1 << (integerPartWidth - 1), n--;
+ }
+
+ return false;
+}
+
+/* Shift a bignum left COUNT bits in-place. Shifted in bits are zero.
+ There are no restrictions on COUNT. */
+void
+APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
+{
+ if (count) {
+ unsigned int jump, shift;
+
+ /* Jump is the inter-part jump; shift is is intra-part shift. */
+ jump = count / integerPartWidth;
+ shift = count % integerPartWidth;
+
+ while (parts > jump) {
+ integerPart part;
+
+ parts--;
+
+ /* dst[i] comes from the two parts src[i - jump] and, if we have
+ an intra-part shift, src[i - jump - 1]. */
+ part = dst[parts - jump];
+ if (shift) {
+ part <<= shift;
+ if (parts >= jump + 1)
+ part |= dst[parts - jump - 1] >> (integerPartWidth - shift);
+ }
+
+ dst[parts] = part;
+ }
+
+ while (parts > 0)
+ dst[--parts] = 0;
+ }
+}
+
+/* Shift a bignum right COUNT bits in-place. Shifted in bits are
+ zero. There are no restrictions on COUNT. */
+void
+APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
+{
+ if (count) {
+ unsigned int i, jump, shift;
+
+ /* Jump is the inter-part jump; shift is is intra-part shift. */
+ jump = count / integerPartWidth;
+ shift = count % integerPartWidth;
+
+ /* Perform the shift. This leaves the most significant COUNT bits
+ of the result at zero. */
+ for(i = 0; i < parts; i++) {
+ integerPart part;
+
+ if (i + jump >= parts) {
+ part = 0;
+ } else {
+ part = dst[i + jump];
+ if (shift) {
+ part >>= shift;
+ if (i + jump + 1 < parts)
+ part |= dst[i + jump + 1] << (integerPartWidth - shift);
+ }
+ }
+
+ dst[i] = part;
+ }
+ }
+}
+
+/* Bitwise and of two bignums. */
+void
+APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
+{
+ unsigned int i;
+
+ for(i = 0; i < parts; i++)
+ dst[i] &= rhs[i];
+}
+
+/* Bitwise inclusive or of two bignums. */
+void
+APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
+{
+ unsigned int i;
+
+ for(i = 0; i < parts; i++)
+ dst[i] |= rhs[i];
+}
+
+/* Bitwise exclusive or of two bignums. */
+void
+APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
+{
+ unsigned int i;
+
+ for(i = 0; i < parts; i++)
+ dst[i] ^= rhs[i];
+}
+
+/* Complement a bignum in-place. */
+void
+APInt::tcComplement(integerPart *dst, unsigned int parts)
+{
+ unsigned int i;
+
+ for(i = 0; i < parts; i++)
+ dst[i] = ~dst[i];
+}
+
+/* Comparison (unsigned) of two bignums. */
+int
+APInt::tcCompare(const integerPart *lhs, const integerPart *rhs,
+ unsigned int parts)
+{
+ while (parts) {
+ parts--;
+ if (lhs[parts] == rhs[parts])
+ continue;
+
+ if (lhs[parts] > rhs[parts])
+ return 1;
+ else
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Increment a bignum in-place, return the carry flag. */
+integerPart
+APInt::tcIncrement(integerPart *dst, unsigned int parts)
+{
+ unsigned int i;
+
+ for(i = 0; i < parts; i++)
+ if (++dst[i] != 0)
+ break;
+
+ return i == parts;
+}
+
+/* Set the least significant BITS bits of a bignum, clear the
+ rest. */
+void
+APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts,
+ unsigned int bits)
+{
+ unsigned int i;
+
+ i = 0;
+ while (bits > integerPartWidth) {
+ dst[i++] = ~(integerPart) 0;
+ bits -= integerPartWidth;
+ }
+
+ if (bits)
+ dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits);
+
+ while (i < parts)
+ dst[i++] = 0;
+}
diff --git a/lib/Support/APSInt.cpp b/lib/Support/APSInt.cpp
new file mode 100644
index 0000000..73acafa
--- /dev/null
+++ b/lib/Support/APSInt.cpp
@@ -0,0 +1,23 @@
+//===-- llvm/ADT/APSInt.cpp - Arbitrary Precision Signed Int ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the APSInt class, which is a simple class that
+// represents an arbitrary sized integer that knows its signedness.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/FoldingSet.h"
+
+using namespace llvm;
+
+void APSInt::Profile(FoldingSetNodeID& ID) const {
+ ID.AddInteger((unsigned) (IsUnsigned ? 1 : 0));
+ APInt::Profile(ID);
+}
diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp
new file mode 100644
index 0000000..db0d8f3
--- /dev/null
+++ b/lib/Support/Allocator.cpp
@@ -0,0 +1,141 @@
+//===--- Allocator.cpp - Simple memory allocation abstraction -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the BumpPtrAllocator interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Recycler.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Streams.h"
+#include <ostream>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MemRegion class implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// MemRegion - This is one chunk of the BumpPtrAllocator.
+class MemRegion {
+ unsigned RegionSize;
+ MemRegion *Next;
+ char *NextPtr;
+public:
+ void Init(unsigned size, unsigned Alignment, MemRegion *next) {
+ RegionSize = size;
+ Next = next;
+ NextPtr = (char*)(this+1);
+
+ // Align NextPtr.
+ NextPtr = (char*)((intptr_t)(NextPtr+Alignment-1) &
+ ~(intptr_t)(Alignment-1));
+ }
+
+ const MemRegion *getNext() const { return Next; }
+ unsigned getNumBytesAllocated() const {
+ return NextPtr-(const char*)this;
+ }
+
+ /// Allocate - Allocate and return at least the specified number of bytes.
+ ///
+ void *Allocate(size_t AllocSize, size_t Alignment, MemRegion **RegPtr) {
+
+ char* Result = (char*) (((uintptr_t) (NextPtr+Alignment-1))
+ & ~((uintptr_t) Alignment-1));
+
+ // Speculate the new value of NextPtr.
+ char* NextPtrTmp = Result + AllocSize;
+
+ // If we are still within the current region, return Result.
+ if (unsigned (NextPtrTmp - (char*) this) <= RegionSize) {
+ NextPtr = NextPtrTmp;
+ return Result;
+ }
+
+ // Otherwise, we have to allocate a new chunk. Create one twice as big as
+ // this one.
+ MemRegion *NewRegion = (MemRegion *)malloc(RegionSize*2);
+ NewRegion->Init(RegionSize*2, Alignment, this);
+
+ // Update the current "first region" pointer to point to the new region.
+ *RegPtr = NewRegion;
+
+ // Try allocating from it now.
+ return NewRegion->Allocate(AllocSize, Alignment, RegPtr);
+ }
+
+ /// Deallocate - Recursively release all memory for this and its next regions
+ /// to the system.
+ void Deallocate() {
+ MemRegion *next = Next;
+ free(this);
+ if (next)
+ next->Deallocate();
+ }
+
+ /// DeallocateAllButLast - Recursively release all memory for this and its
+ /// next regions to the system stopping at the last region in the list.
+ /// Returns the pointer to the last region.
+ MemRegion *DeallocateAllButLast() {
+ MemRegion *next = Next;
+ if (!next)
+ return this;
+ free(this);
+ return next->DeallocateAllButLast();
+ }
+};
+}
+
+//===----------------------------------------------------------------------===//
+// BumpPtrAllocator class implementation
+//===----------------------------------------------------------------------===//
+
+BumpPtrAllocator::BumpPtrAllocator() {
+ TheMemory = malloc(4096);
+ ((MemRegion*)TheMemory)->Init(4096, 1, 0);
+}
+
+BumpPtrAllocator::~BumpPtrAllocator() {
+ ((MemRegion*)TheMemory)->Deallocate();
+}
+
+void BumpPtrAllocator::Reset() {
+ MemRegion *MRP = (MemRegion*)TheMemory;
+ MRP = MRP->DeallocateAllButLast();
+ MRP->Init(4096, 1, 0);
+ TheMemory = MRP;
+}
+
+void *BumpPtrAllocator::Allocate(size_t Size, size_t Align) {
+ MemRegion *MRP = (MemRegion*)TheMemory;
+ void *Ptr = MRP->Allocate(Size, Align, &MRP);
+ TheMemory = MRP;
+ return Ptr;
+}
+
+void BumpPtrAllocator::PrintStats() const {
+ unsigned BytesUsed = 0;
+ unsigned NumRegions = 0;
+ const MemRegion *R = (MemRegion*)TheMemory;
+ for (; R; R = R->getNext(), ++NumRegions)
+ BytesUsed += R->getNumBytesAllocated();
+
+ cerr << "\nNumber of memory regions: " << NumRegions << "\n";
+ cerr << "Bytes allocated: " << BytesUsed << "\n";
+}
+
+void llvm::PrintRecyclerStats(size_t Size,
+ size_t Align,
+ size_t FreeListSize) {
+ cerr << "Recycler element size: " << Size << '\n';
+ cerr << "Recycler element alignment: " << Align << '\n';
+ cerr << "Number of elements free for recycling: " << FreeListSize << '\n';
+}
diff --git a/lib/Support/Annotation.cpp b/lib/Support/Annotation.cpp
new file mode 100644
index 0000000..9764b5e
--- /dev/null
+++ b/lib/Support/Annotation.cpp
@@ -0,0 +1,115 @@
+//===-- Annotation.cpp - Implement the Annotation Classes -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AnnotationManager class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Annotation.h"
+#include "llvm/Support/ManagedStatic.h"
+#include <map>
+#include <cstring>
+using namespace llvm;
+
+Annotation::~Annotation() {} // Designed to be subclassed
+
+Annotable::~Annotable() { // Virtual because it's designed to be subclassed...
+ Annotation *A = AnnotationList;
+ while (A) {
+ Annotation *Next = A->getNext();
+ delete A;
+ A = Next;
+ }
+}
+
+namespace {
+ class StrCmp {
+ public:
+ bool operator()(const char *a, const char *b) const {
+ return strcmp(a, b) < 0;
+ }
+ };
+}
+
+typedef std::map<const char*, unsigned, StrCmp> IDMapType;
+static unsigned IDCounter = 0; // Unique ID counter
+
+// Static member to ensure initialiation on demand.
+static ManagedStatic<IDMapType> IDMap;
+
+// On demand annotation creation support...
+typedef Annotation *(*AnnFactory)(AnnotationID, const Annotable *, void *);
+typedef std::map<unsigned, std::pair<AnnFactory,void*> > FactMapType;
+
+static FactMapType *TheFactMap = 0;
+static FactMapType &getFactMap() {
+ if (TheFactMap == 0)
+ TheFactMap = new FactMapType();
+ return *TheFactMap;
+}
+
+static void eraseFromFactMap(unsigned ID) {
+ assert(TheFactMap && "No entries found!");
+ TheFactMap->erase(ID);
+ if (TheFactMap->empty()) { // Delete when empty
+ delete TheFactMap;
+ TheFactMap = 0;
+ }
+}
+
+AnnotationID AnnotationManager::getID(const char *Name) { // Name -> ID
+ IDMapType::iterator I = IDMap->find(Name);
+ if (I == IDMap->end()) {
+ (*IDMap)[Name] = IDCounter++; // Add a new element
+ return AnnotationID(IDCounter-1);
+ }
+ return AnnotationID(I->second);
+}
+
+// getID - Name -> ID + registration of a factory function for demand driven
+// annotation support.
+AnnotationID AnnotationManager::getID(const char *Name, Factory Fact,
+ void *Data) {
+ AnnotationID Result(getID(Name));
+ registerAnnotationFactory(Result, Fact, Data);
+ return Result;
+}
+
+// getName - This function is especially slow, but that's okay because it should
+// only be used for debugging.
+//
+const char *AnnotationManager::getName(AnnotationID ID) { // ID -> Name
+ IDMapType &TheMap = *IDMap;
+ for (IDMapType::iterator I = TheMap.begin(); ; ++I) {
+ assert(I != TheMap.end() && "Annotation ID is unknown!");
+ if (I->second == ID.ID) return I->first;
+ }
+}
+
+// registerAnnotationFactory - This method is used to register a callback
+// function used to create an annotation on demand if it is needed by the
+// Annotable::findOrCreateAnnotation method.
+//
+void AnnotationManager::registerAnnotationFactory(AnnotationID ID, AnnFactory F,
+ void *ExtraData) {
+ if (F)
+ getFactMap()[ID.ID] = std::make_pair(F, ExtraData);
+ else
+ eraseFromFactMap(ID.ID);
+}
+
+// createAnnotation - Create an annotation of the specified ID for the
+// specified object, using a register annotation creation function.
+//
+Annotation *AnnotationManager::createAnnotation(AnnotationID ID,
+ const Annotable *Obj) {
+ FactMapType::iterator I = getFactMap().find(ID.ID);
+ if (I == getFactMap().end()) return 0;
+ return I->second.first(ID, Obj, I->second.second);
+}
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
new file mode 100644
index 0000000..7c8ce70
--- /dev/null
+++ b/lib/Support/CMakeLists.txt
@@ -0,0 +1,31 @@
+add_llvm_library(LLVMSupport
+ APFloat.cpp
+ APInt.cpp
+ APSInt.cpp
+ Allocator.cpp
+ Annotation.cpp
+ CommandLine.cpp
+ ConstantRange.cpp
+ Debug.cpp
+ Dwarf.cpp
+ FileUtilities.cpp
+ FoldingSet.cpp
+ GraphWriter.cpp
+ IsInf.cpp
+ IsNAN.cpp
+ ManagedStatic.cpp
+ MemoryBuffer.cpp
+ PluginLoader.cpp
+ PrettyStackTrace.cpp
+ SlowOperationInformer.cpp
+ SmallPtrSet.cpp
+ Statistic.cpp
+ Streams.cpp
+ StringExtras.cpp
+ StringMap.cpp
+ StringPool.cpp
+ SystemUtils.cpp
+ Timer.cpp
+ Triple.cpp
+ raw_ostream.cpp
+ )
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
new file mode 100644
index 0000000..4922560
--- /dev/null
+++ b/lib/Support/CommandLine.cpp
@@ -0,0 +1,1184 @@
+//===-- CommandLine.cpp - Command line parser implementation --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements a command line argument processor that is useful when
+// creating a tool. It provides a simple, minimalistic interface that is easily
+// extensible and supports nonlocal (library) command line options.
+//
+// Note that rather than trying to figure out what this code does, you could try
+// reading the library documentation located in docs/CommandLine.html
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/System/Path.h"
+#include <algorithm>
+#include <functional>
+#include <map>
+#include <ostream>
+#include <set>
+#include <cstdlib>
+#include <cerrno>
+#include <cstring>
+#include <climits>
+using namespace llvm;
+using namespace cl;
+
+//===----------------------------------------------------------------------===//
+// Template instantiations and anchors.
+//
+TEMPLATE_INSTANTIATION(class basic_parser<bool>);
+TEMPLATE_INSTANTIATION(class basic_parser<boolOrDefault>);
+TEMPLATE_INSTANTIATION(class basic_parser<int>);
+TEMPLATE_INSTANTIATION(class basic_parser<unsigned>);
+TEMPLATE_INSTANTIATION(class basic_parser<double>);
+TEMPLATE_INSTANTIATION(class basic_parser<float>);
+TEMPLATE_INSTANTIATION(class basic_parser<std::string>);
+TEMPLATE_INSTANTIATION(class basic_parser<char>);
+
+TEMPLATE_INSTANTIATION(class opt<unsigned>);
+TEMPLATE_INSTANTIATION(class opt<int>);
+TEMPLATE_INSTANTIATION(class opt<std::string>);
+TEMPLATE_INSTANTIATION(class opt<char>);
+TEMPLATE_INSTANTIATION(class opt<bool>);
+
+void Option::anchor() {}
+void basic_parser_impl::anchor() {}
+void parser<bool>::anchor() {}
+void parser<boolOrDefault>::anchor() {}
+void parser<int>::anchor() {}
+void parser<unsigned>::anchor() {}
+void parser<double>::anchor() {}
+void parser<float>::anchor() {}
+void parser<std::string>::anchor() {}
+void parser<char>::anchor() {}
+
+//===----------------------------------------------------------------------===//
+
+// Globals for name and overview of program. Program name is not a string to
+// avoid static ctor/dtor issues.
+static char ProgramName[80] = "<premain>";
+static const char *ProgramOverview = 0;
+
+// This collects additional help to be printed.
+static ManagedStatic<std::vector<const char*> > MoreHelp;
+
+extrahelp::extrahelp(const char *Help)
+ : morehelp(Help) {
+ MoreHelp->push_back(Help);
+}
+
+static bool OptionListChanged = false;
+
+// MarkOptionsChanged - Internal helper function.
+void cl::MarkOptionsChanged() {
+ OptionListChanged = true;
+}
+
+/// RegisteredOptionList - This is the list of the command line options that
+/// have statically constructed themselves.
+static Option *RegisteredOptionList = 0;
+
+void Option::addArgument() {
+ assert(NextRegistered == 0 && "argument multiply registered!");
+
+ NextRegistered = RegisteredOptionList;
+ RegisteredOptionList = this;
+ MarkOptionsChanged();
+}
+
+
+//===----------------------------------------------------------------------===//
+// Basic, shared command line option processing machinery.
+//
+
+/// GetOptionInfo - Scan the list of registered options, turning them into data
+/// structures that are easier to handle.
+static void GetOptionInfo(std::vector<Option*> &PositionalOpts,
+ std::vector<Option*> &SinkOpts,
+ std::map<std::string, Option*> &OptionsMap) {
+ std::vector<const char*> OptionNames;
+ Option *CAOpt = 0; // The ConsumeAfter option if it exists.
+ for (Option *O = RegisteredOptionList; O; O = O->getNextRegisteredOption()) {
+ // If this option wants to handle multiple option names, get the full set.
+ // This handles enum options like "-O1 -O2" etc.
+ O->getExtraOptionNames(OptionNames);
+ if (O->ArgStr[0])
+ OptionNames.push_back(O->ArgStr);
+
+ // Handle named options.
+ for (size_t i = 0, e = OptionNames.size(); i != e; ++i) {
+ // Add argument to the argument map!
+ if (!OptionsMap.insert(std::pair<std::string,Option*>(OptionNames[i],
+ O)).second) {
+ cerr << ProgramName << ": CommandLine Error: Argument '"
+ << OptionNames[i] << "' defined more than once!\n";
+ }
+ }
+
+ OptionNames.clear();
+
+ // Remember information about positional options.
+ if (O->getFormattingFlag() == cl::Positional)
+ PositionalOpts.push_back(O);
+ else if (O->getMiscFlags() & cl::Sink) // Remember sink options
+ SinkOpts.push_back(O);
+ else if (O->getNumOccurrencesFlag() == cl::ConsumeAfter) {
+ if (CAOpt)
+ O->error("Cannot specify more than one option with cl::ConsumeAfter!");
+ CAOpt = O;
+ }
+ }
+
+ if (CAOpt)
+ PositionalOpts.push_back(CAOpt);
+
+ // Make sure that they are in order of registration not backwards.
+ std::reverse(PositionalOpts.begin(), PositionalOpts.end());
+}
+
+
+/// LookupOption - Lookup the option specified by the specified option on the
+/// command line. If there is a value specified (after an equal sign) return
+/// that as well.
+static Option *LookupOption(const char *&Arg, const char *&Value,
+ std::map<std::string, Option*> &OptionsMap) {
+ while (*Arg == '-') ++Arg; // Eat leading dashes
+
+ const char *ArgEnd = Arg;
+ while (*ArgEnd && *ArgEnd != '=')
+ ++ArgEnd; // Scan till end of argument name.
+
+ if (*ArgEnd == '=') // If we have an equals sign...
+ Value = ArgEnd+1; // Get the value, not the equals
+
+
+ if (*Arg == 0) return 0;
+
+ // Look up the option.
+ std::map<std::string, Option*>::iterator I =
+ OptionsMap.find(std::string(Arg, ArgEnd));
+ return I != OptionsMap.end() ? I->second : 0;
+}
+
+static inline bool ProvideOption(Option *Handler, const char *ArgName,
+ const char *Value, int argc, char **argv,
+ int &i) {
+ // Is this a multi-argument option?
+ unsigned NumAdditionalVals = Handler->getNumAdditionalVals();
+
+ // Enforce value requirements
+ switch (Handler->getValueExpectedFlag()) {
+ case ValueRequired:
+ if (Value == 0) { // No value specified?
+ if (i+1 < argc) { // Steal the next argument, like for '-o filename'
+ Value = argv[++i];
+ } else {
+ return Handler->error(" requires a value!");
+ }
+ }
+ break;
+ case ValueDisallowed:
+ if (NumAdditionalVals > 0)
+ return Handler->error(": multi-valued option specified"
+ " with ValueDisallowed modifier!");
+
+ if (Value)
+ return Handler->error(" does not allow a value! '" +
+ std::string(Value) + "' specified.");
+ break;
+ case ValueOptional:
+ break;
+ default:
+ cerr << ProgramName
+ << ": Bad ValueMask flag! CommandLine usage error:"
+ << Handler->getValueExpectedFlag() << "\n";
+ abort();
+ break;
+ }
+
+ // If this isn't a multi-arg option, just run the handler.
+ if (NumAdditionalVals == 0) {
+ return Handler->addOccurrence(i, ArgName, Value ? Value : "");
+ }
+ // If it is, run the handle several times.
+ else {
+ bool MultiArg = false;
+
+ if (Value) {
+ if (Handler->addOccurrence(i, ArgName, Value, MultiArg))
+ return true;
+ --NumAdditionalVals;
+ MultiArg = true;
+ }
+
+ while (NumAdditionalVals > 0) {
+
+ if (i+1 < argc) {
+ Value = argv[++i];
+ } else {
+ return Handler->error(": not enough values!");
+ }
+ if (Handler->addOccurrence(i, ArgName, Value, MultiArg))
+ return true;
+ MultiArg = true;
+ --NumAdditionalVals;
+ }
+ return false;
+ }
+}
+
+static bool ProvidePositionalOption(Option *Handler, const std::string &Arg,
+ int i) {
+ int Dummy = i;
+ return ProvideOption(Handler, Handler->ArgStr, Arg.c_str(), 0, 0, Dummy);
+}
+
+
+// Option predicates...
+static inline bool isGrouping(const Option *O) {
+ return O->getFormattingFlag() == cl::Grouping;
+}
+static inline bool isPrefixedOrGrouping(const Option *O) {
+ return isGrouping(O) || O->getFormattingFlag() == cl::Prefix;
+}
+
+// getOptionPred - Check to see if there are any options that satisfy the
+// specified predicate with names that are the prefixes in Name. This is
+// checked by progressively stripping characters off of the name, checking to
+// see if there options that satisfy the predicate. If we find one, return it,
+// otherwise return null.
+//
+static Option *getOptionPred(std::string Name, size_t &Length,
+ bool (*Pred)(const Option*),
+ std::map<std::string, Option*> &OptionsMap) {
+
+ std::map<std::string, Option*>::iterator OMI = OptionsMap.find(Name);
+ if (OMI != OptionsMap.end() && Pred(OMI->second)) {
+ Length = Name.length();
+ return OMI->second;
+ }
+
+ if (Name.size() == 1) return 0;
+ do {
+ Name.erase(Name.end()-1, Name.end()); // Chop off the last character...
+ OMI = OptionsMap.find(Name);
+
+ // Loop while we haven't found an option and Name still has at least two
+ // characters in it (so that the next iteration will not be the empty
+ // string...
+ } while ((OMI == OptionsMap.end() || !Pred(OMI->second)) && Name.size() > 1);
+
+ if (OMI != OptionsMap.end() && Pred(OMI->second)) {
+ Length = Name.length();
+ return OMI->second; // Found one!
+ }
+ return 0; // No option found!
+}
+
+static bool RequiresValue(const Option *O) {
+ return O->getNumOccurrencesFlag() == cl::Required ||
+ O->getNumOccurrencesFlag() == cl::OneOrMore;
+}
+
+static bool EatsUnboundedNumberOfValues(const Option *O) {
+ return O->getNumOccurrencesFlag() == cl::ZeroOrMore ||
+ O->getNumOccurrencesFlag() == cl::OneOrMore;
+}
+
+/// ParseCStringVector - Break INPUT up wherever one or more
+/// whitespace characters are found, and store the resulting tokens in
+/// OUTPUT. The tokens stored in OUTPUT are dynamically allocated
+/// using strdup (), so it is the caller's responsibility to free ()
+/// them later.
+///
+static void ParseCStringVector(std::vector<char *> &output,
+ const char *input) {
+ // Characters which will be treated as token separators:
+ static const char *const delims = " \v\f\t\r\n";
+
+ std::string work (input);
+ // Skip past any delims at head of input string.
+ size_t pos = work.find_first_not_of (delims);
+ // If the string consists entirely of delims, then exit early.
+ if (pos == std::string::npos) return;
+ // Otherwise, jump forward to beginning of first word.
+ work = work.substr (pos);
+ // Find position of first delimiter.
+ pos = work.find_first_of (delims);
+
+ while (!work.empty() && pos != std::string::npos) {
+ // Everything from 0 to POS is the next word to copy.
+ output.push_back (strdup (work.substr (0,pos).c_str ()));
+ // Is there another word in the string?
+ size_t nextpos = work.find_first_not_of (delims, pos + 1);
+ if (nextpos != std::string::npos) {
+ // Yes? Then remove delims from beginning ...
+ work = work.substr (work.find_first_not_of (delims, pos + 1));
+ // and find the end of the word.
+ pos = work.find_first_of (delims);
+ } else {
+ // No? (Remainder of string is delims.) End the loop.
+ work = "";
+ pos = std::string::npos;
+ }
+ }
+
+ // If `input' ended with non-delim char, then we'll get here with
+ // the last word of `input' in `work'; copy it now.
+ if (!work.empty ()) {
+ output.push_back (strdup (work.c_str ()));
+ }
+}
+
+/// ParseEnvironmentOptions - An alternative entry point to the
+/// CommandLine library, which allows you to read the program's name
+/// from the caller (as PROGNAME) and its command-line arguments from
+/// an environment variable (whose name is given in ENVVAR).
+///
+void cl::ParseEnvironmentOptions(const char *progName, const char *envVar,
+ const char *Overview, bool ReadResponseFiles) {
+ // Check args.
+ assert(progName && "Program name not specified");
+ assert(envVar && "Environment variable name missing");
+
+ // Get the environment variable they want us to parse options out of.
+ const char *envValue = getenv(envVar);
+ if (!envValue)
+ return;
+
+ // Get program's "name", which we wouldn't know without the caller
+ // telling us.
+ std::vector<char*> newArgv;
+ newArgv.push_back(strdup(progName));
+
+ // Parse the value of the environment variable into a "command line"
+ // and hand it off to ParseCommandLineOptions().
+ ParseCStringVector(newArgv, envValue);
+ int newArgc = static_cast<int>(newArgv.size());
+ ParseCommandLineOptions(newArgc, &newArgv[0], Overview, ReadResponseFiles);
+
+ // Free all the strdup()ed strings.
+ for (std::vector<char*>::iterator i = newArgv.begin(), e = newArgv.end();
+ i != e; ++i)
+ free (*i);
+}
+
+
+/// ExpandResponseFiles - Copy the contents of argv into newArgv,
+/// substituting the contents of the response files for the arguments
+/// of type @file.
+static void ExpandResponseFiles(int argc, char** argv,
+ std::vector<char*>& newArgv) {
+ for (int i = 1; i != argc; ++i) {
+ char* arg = argv[i];
+
+ if (arg[0] == '@') {
+
+ sys::PathWithStatus respFile(++arg);
+
+ // Check that the response file is not empty (mmap'ing empty
+ // files can be problematic).
+ const sys::FileStatus *FileStat = respFile.getFileStatus();
+ if (FileStat && FileStat->getSize() != 0) {
+
+ // Mmap the response file into memory.
+ OwningPtr<MemoryBuffer>
+ respFilePtr(MemoryBuffer::getFile(respFile.c_str()));
+
+ // If we could open the file, parse its contents, otherwise
+ // pass the @file option verbatim.
+
+ // TODO: we should also support recursive loading of response files,
+ // since this is how gcc behaves. (From their man page: "The file may
+ // itself contain additional @file options; any such options will be
+ // processed recursively.")
+
+ if (respFilePtr != 0) {
+ ParseCStringVector(newArgv, respFilePtr->getBufferStart());
+ continue;
+ }
+ }
+ }
+ newArgv.push_back(strdup(arg));
+ }
+}
+
+void cl::ParseCommandLineOptions(int argc, char **argv,
+ const char *Overview, bool ReadResponseFiles) {
+ // Process all registered options.
+ std::vector<Option*> PositionalOpts;
+ std::vector<Option*> SinkOpts;
+ std::map<std::string, Option*> Opts;
+ GetOptionInfo(PositionalOpts, SinkOpts, Opts);
+
+ assert((!Opts.empty() || !PositionalOpts.empty()) &&
+ "No options specified!");
+
+ // Expand response files.
+ std::vector<char*> newArgv;
+ if (ReadResponseFiles) {
+ newArgv.push_back(strdup(argv[0]));
+ ExpandResponseFiles(argc, argv, newArgv);
+ argv = &newArgv[0];
+ argc = static_cast<int>(newArgv.size());
+ }
+
+ // Copy the program name into ProgName, making sure not to overflow it.
+ std::string ProgName = sys::Path(argv[0]).getLast();
+ if (ProgName.size() > 79) ProgName.resize(79);
+ strcpy(ProgramName, ProgName.c_str());
+
+ ProgramOverview = Overview;
+ bool ErrorParsing = false;
+
+ // Check out the positional arguments to collect information about them.
+ unsigned NumPositionalRequired = 0;
+
+ // Determine whether or not there are an unlimited number of positionals
+ bool HasUnlimitedPositionals = false;
+
+ Option *ConsumeAfterOpt = 0;
+ if (!PositionalOpts.empty()) {
+ if (PositionalOpts[0]->getNumOccurrencesFlag() == cl::ConsumeAfter) {
+ assert(PositionalOpts.size() > 1 &&
+ "Cannot specify cl::ConsumeAfter without a positional argument!");
+ ConsumeAfterOpt = PositionalOpts[0];
+ }
+
+ // Calculate how many positional values are _required_.
+ bool UnboundedFound = false;
+ for (size_t i = ConsumeAfterOpt != 0, e = PositionalOpts.size();
+ i != e; ++i) {
+ Option *Opt = PositionalOpts[i];
+ if (RequiresValue(Opt))
+ ++NumPositionalRequired;
+ else if (ConsumeAfterOpt) {
+ // ConsumeAfter cannot be combined with "optional" positional options
+ // unless there is only one positional argument...
+ if (PositionalOpts.size() > 2)
+ ErrorParsing |=
+ Opt->error(" error - this positional option will never be matched, "
+ "because it does not Require a value, and a "
+ "cl::ConsumeAfter option is active!");
+ } else if (UnboundedFound && !Opt->ArgStr[0]) {
+ // This option does not "require" a value... Make sure this option is
+ // not specified after an option that eats all extra arguments, or this
+ // one will never get any!
+ //
+ ErrorParsing |= Opt->error(" error - option can never match, because "
+ "another positional argument will match an "
+ "unbounded number of values, and this option"
+ " does not require a value!");
+ }
+ UnboundedFound |= EatsUnboundedNumberOfValues(Opt);
+ }
+ HasUnlimitedPositionals = UnboundedFound || ConsumeAfterOpt;
+ }
+
+ // PositionalVals - A vector of "positional" arguments we accumulate into
+ // the process at the end...
+ //
+ std::vector<std::pair<std::string,unsigned> > PositionalVals;
+
+ // If the program has named positional arguments, and the name has been run
+ // across, keep track of which positional argument was named. Otherwise put
+ // the positional args into the PositionalVals list...
+ Option *ActivePositionalArg = 0;
+
+ // Loop over all of the arguments... processing them.
+ bool DashDashFound = false; // Have we read '--'?
+ for (int i = 1; i < argc; ++i) {
+ Option *Handler = 0;
+ const char *Value = 0;
+ const char *ArgName = "";
+
+ // If the option list changed, this means that some command line
+ // option has just been registered or deregistered. This can occur in
+ // response to things like -load, etc. If this happens, rescan the options.
+ if (OptionListChanged) {
+ PositionalOpts.clear();
+ SinkOpts.clear();
+ Opts.clear();
+ GetOptionInfo(PositionalOpts, SinkOpts, Opts);
+ OptionListChanged = false;
+ }
+
+ // Check to see if this is a positional argument. This argument is
+ // considered to be positional if it doesn't start with '-', if it is "-"
+ // itself, or if we have seen "--" already.
+ //
+ if (argv[i][0] != '-' || argv[i][1] == 0 || DashDashFound) {
+ // Positional argument!
+ if (ActivePositionalArg) {
+ ProvidePositionalOption(ActivePositionalArg, argv[i], i);
+ continue; // We are done!
+ } else if (!PositionalOpts.empty()) {
+ PositionalVals.push_back(std::make_pair(argv[i],i));
+
+ // All of the positional arguments have been fulfulled, give the rest to
+ // the consume after option... if it's specified...
+ //
+ if (PositionalVals.size() >= NumPositionalRequired &&
+ ConsumeAfterOpt != 0) {
+ for (++i; i < argc; ++i)
+ PositionalVals.push_back(std::make_pair(argv[i],i));
+ break; // Handle outside of the argument processing loop...
+ }
+
+ // Delay processing positional arguments until the end...
+ continue;
+ }
+ } else if (argv[i][0] == '-' && argv[i][1] == '-' && argv[i][2] == 0 &&
+ !DashDashFound) {
+ DashDashFound = true; // This is the mythical "--"?
+ continue; // Don't try to process it as an argument itself.
+ } else if (ActivePositionalArg &&
+ (ActivePositionalArg->getMiscFlags() & PositionalEatsArgs)) {
+ // If there is a positional argument eating options, check to see if this
+ // option is another positional argument. If so, treat it as an argument,
+ // otherwise feed it to the eating positional.
+ ArgName = argv[i]+1;
+ Handler = LookupOption(ArgName, Value, Opts);
+ if (!Handler || Handler->getFormattingFlag() != cl::Positional) {
+ ProvidePositionalOption(ActivePositionalArg, argv[i], i);
+ continue; // We are done!
+ }
+
+ } else { // We start with a '-', must be an argument...
+ ArgName = argv[i]+1;
+ Handler = LookupOption(ArgName, Value, Opts);
+
+ // Check to see if this "option" is really a prefixed or grouped argument.
+ if (Handler == 0) {
+ std::string RealName(ArgName);
+ if (RealName.size() > 1) {
+ size_t Length = 0;
+ Option *PGOpt = getOptionPred(RealName, Length, isPrefixedOrGrouping,
+ Opts);
+
+ // If the option is a prefixed option, then the value is simply the
+ // rest of the name... so fall through to later processing, by
+ // setting up the argument name flags and value fields.
+ //
+ if (PGOpt && PGOpt->getFormattingFlag() == cl::Prefix) {
+ Value = ArgName+Length;
+ assert(Opts.find(std::string(ArgName, Value)) != Opts.end() &&
+ Opts.find(std::string(ArgName, Value))->second == PGOpt);
+ Handler = PGOpt;
+ } else if (PGOpt) {
+ // This must be a grouped option... handle them now.
+ assert(isGrouping(PGOpt) && "Broken getOptionPred!");
+
+ do {
+ // Move current arg name out of RealName into RealArgName...
+ std::string RealArgName(RealName.begin(),
+ RealName.begin() + Length);
+ RealName.erase(RealName.begin(), RealName.begin() + Length);
+
+ // Because ValueRequired is an invalid flag for grouped arguments,
+ // we don't need to pass argc/argv in...
+ //
+ assert(PGOpt->getValueExpectedFlag() != cl::ValueRequired &&
+ "Option can not be cl::Grouping AND cl::ValueRequired!");
+ int Dummy;
+ ErrorParsing |= ProvideOption(PGOpt, RealArgName.c_str(),
+ 0, 0, 0, Dummy);
+
+ // Get the next grouping option...
+ PGOpt = getOptionPred(RealName, Length, isGrouping, Opts);
+ } while (PGOpt && Length != RealName.size());
+
+ Handler = PGOpt; // Ate all of the options.
+ }
+ }
+ }
+ }
+
+ if (Handler == 0) {
+ if (SinkOpts.empty()) {
+ cerr << ProgramName << ": Unknown command line argument '"
+ << argv[i] << "'. Try: '" << argv[0] << " --help'\n";
+ ErrorParsing = true;
+ } else {
+ for (std::vector<Option*>::iterator I = SinkOpts.begin(),
+ E = SinkOpts.end(); I != E ; ++I)
+ (*I)->addOccurrence(i, "", argv[i]);
+ }
+ continue;
+ }
+
+ // Check to see if this option accepts a comma separated list of values. If
+ // it does, we have to split up the value into multiple values...
+ if (Value && Handler->getMiscFlags() & CommaSeparated) {
+ std::string Val(Value);
+ std::string::size_type Pos = Val.find(',');
+
+ while (Pos != std::string::npos) {
+ // Process the portion before the comma...
+ ErrorParsing |= ProvideOption(Handler, ArgName,
+ std::string(Val.begin(),
+ Val.begin()+Pos).c_str(),
+ argc, argv, i);
+ // Erase the portion before the comma, AND the comma...
+ Val.erase(Val.begin(), Val.begin()+Pos+1);
+ Value += Pos+1; // Increment the original value pointer as well...
+
+ // Check for another comma...
+ Pos = Val.find(',');
+ }
+ }
+
+ // If this is a named positional argument, just remember that it is the
+ // active one...
+ if (Handler->getFormattingFlag() == cl::Positional)
+ ActivePositionalArg = Handler;
+ else
+ ErrorParsing |= ProvideOption(Handler, ArgName, Value, argc, argv, i);
+ }
+
+ // Check and handle positional arguments now...
+ if (NumPositionalRequired > PositionalVals.size()) {
+ cerr << ProgramName
+ << ": Not enough positional command line arguments specified!\n"
+ << "Must specify at least " << NumPositionalRequired
+ << " positional arguments: See: " << argv[0] << " --help\n";
+
+ ErrorParsing = true;
+ } else if (!HasUnlimitedPositionals
+ && PositionalVals.size() > PositionalOpts.size()) {
+ cerr << ProgramName
+ << ": Too many positional arguments specified!\n"
+ << "Can specify at most " << PositionalOpts.size()
+ << " positional arguments: See: " << argv[0] << " --help\n";
+ ErrorParsing = true;
+
+ } else if (ConsumeAfterOpt == 0) {
+ // Positional args have already been handled if ConsumeAfter is specified...
+ unsigned ValNo = 0, NumVals = static_cast<unsigned>(PositionalVals.size());
+ for (size_t i = 0, e = PositionalOpts.size(); i != e; ++i) {
+ if (RequiresValue(PositionalOpts[i])) {
+ ProvidePositionalOption(PositionalOpts[i], PositionalVals[ValNo].first,
+ PositionalVals[ValNo].second);
+ ValNo++;
+ --NumPositionalRequired; // We fulfilled our duty...
+ }
+
+ // If we _can_ give this option more arguments, do so now, as long as we
+ // do not give it values that others need. 'Done' controls whether the
+ // option even _WANTS_ any more.
+ //
+ bool Done = PositionalOpts[i]->getNumOccurrencesFlag() == cl::Required;
+ while (NumVals-ValNo > NumPositionalRequired && !Done) {
+ switch (PositionalOpts[i]->getNumOccurrencesFlag()) {
+ case cl::Optional:
+ Done = true; // Optional arguments want _at most_ one value
+ // FALL THROUGH
+ case cl::ZeroOrMore: // Zero or more will take all they can get...
+ case cl::OneOrMore: // One or more will take all they can get...
+ ProvidePositionalOption(PositionalOpts[i],
+ PositionalVals[ValNo].first,
+ PositionalVals[ValNo].second);
+ ValNo++;
+ break;
+ default:
+ assert(0 && "Internal error, unexpected NumOccurrences flag in "
+ "positional argument processing!");
+ }
+ }
+ }
+ } else {
+ assert(ConsumeAfterOpt && NumPositionalRequired <= PositionalVals.size());
+ unsigned ValNo = 0;
+ for (size_t j = 1, e = PositionalOpts.size(); j != e; ++j)
+ if (RequiresValue(PositionalOpts[j])) {
+ ErrorParsing |= ProvidePositionalOption(PositionalOpts[j],
+ PositionalVals[ValNo].first,
+ PositionalVals[ValNo].second);
+ ValNo++;
+ }
+
+ // Handle the case where there is just one positional option, and it's
+ // optional. In this case, we want to give JUST THE FIRST option to the
+ // positional option and keep the rest for the consume after. The above
+ // loop would have assigned no values to positional options in this case.
+ //
+ if (PositionalOpts.size() == 2 && ValNo == 0 && !PositionalVals.empty()) {
+ ErrorParsing |= ProvidePositionalOption(PositionalOpts[1],
+ PositionalVals[ValNo].first,
+ PositionalVals[ValNo].second);
+ ValNo++;
+ }
+
+ // Handle over all of the rest of the arguments to the
+ // cl::ConsumeAfter command line option...
+ for (; ValNo != PositionalVals.size(); ++ValNo)
+ ErrorParsing |= ProvidePositionalOption(ConsumeAfterOpt,
+ PositionalVals[ValNo].first,
+ PositionalVals[ValNo].second);
+ }
+
+ // Loop over args and make sure all required args are specified!
+ for (std::map<std::string, Option*>::iterator I = Opts.begin(),
+ E = Opts.end(); I != E; ++I) {
+ switch (I->second->getNumOccurrencesFlag()) {
+ case Required:
+ case OneOrMore:
+ if (I->second->getNumOccurrences() == 0) {
+ I->second->error(" must be specified at least once!");
+ ErrorParsing = true;
+ }
+ // Fall through
+ default:
+ break;
+ }
+ }
+
+ // Free all of the memory allocated to the map. Command line options may only
+ // be processed once!
+ Opts.clear();
+ PositionalOpts.clear();
+ MoreHelp->clear();
+
+ // Free the memory allocated by ExpandResponseFiles.
+ if (ReadResponseFiles) {
+ // Free all the strdup()ed strings.
+ for (std::vector<char*>::iterator i = newArgv.begin(), e = newArgv.end();
+ i != e; ++i)
+ free (*i);
+ }
+
+ // If we had an error processing our arguments, don't let the program execute
+ if (ErrorParsing) exit(1);
+}
+
+//===----------------------------------------------------------------------===//
+// Option Base class implementation
+//
+
+bool Option::error(std::string Message, const char *ArgName) {
+ if (ArgName == 0) ArgName = ArgStr;
+ if (ArgName[0] == 0)
+ cerr << HelpStr; // Be nice for positional arguments
+ else
+ cerr << ProgramName << ": for the -" << ArgName;
+
+ cerr << " option: " << Message << "\n";
+ return true;
+}
+
+bool Option::addOccurrence(unsigned pos, const char *ArgName,
+ const std::string &Value,
+ bool MultiArg) {
+ if (!MultiArg)
+ NumOccurrences++; // Increment the number of times we have been seen
+
+ switch (getNumOccurrencesFlag()) {
+ case Optional:
+ if (NumOccurrences > 1)
+ return error(": may only occur zero or one times!", ArgName);
+ break;
+ case Required:
+ if (NumOccurrences > 1)
+ return error(": must occur exactly one time!", ArgName);
+ // Fall through
+ case OneOrMore:
+ case ZeroOrMore:
+ case ConsumeAfter: break;
+ default: return error(": bad num occurrences flag value!");
+ }
+
+ return handleOccurrence(pos, ArgName, Value);
+}
+
+
+// getValueStr - Get the value description string, using "DefaultMsg" if nothing
+// has been specified yet.
+//
+static const char *getValueStr(const Option &O, const char *DefaultMsg) {
+ if (O.ValueStr[0] == 0) return DefaultMsg;
+ return O.ValueStr;
+}
+
+//===----------------------------------------------------------------------===//
+// cl::alias class implementation
+//
+
+// Return the width of the option tag for printing...
+size_t alias::getOptionWidth() const {
+ return std::strlen(ArgStr)+6;
+}
+
+// Print out the option for the alias.
+void alias::printOptionInfo(size_t GlobalWidth) const {
+ size_t L = std::strlen(ArgStr);
+ cout << " -" << ArgStr << std::string(GlobalWidth-L-6, ' ') << " - "
+ << HelpStr << "\n";
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Parser Implementation code...
+//
+
+// basic_parser implementation
+//
+
+// Return the width of the option tag for printing...
+size_t basic_parser_impl::getOptionWidth(const Option &O) const {
+ size_t Len = std::strlen(O.ArgStr);
+ if (const char *ValName = getValueName())
+ Len += std::strlen(getValueStr(O, ValName))+3;
+
+ return Len + 6;
+}
+
+// printOptionInfo - Print out information about this option. The
+// to-be-maintained width is specified.
+//
+void basic_parser_impl::printOptionInfo(const Option &O,
+ size_t GlobalWidth) const {
+ cout << " -" << O.ArgStr;
+
+ if (const char *ValName = getValueName())
+ cout << "=<" << getValueStr(O, ValName) << ">";
+
+ cout << std::string(GlobalWidth-getOptionWidth(O), ' ') << " - "
+ << O.HelpStr << "\n";
+}
+
+
+
+
+// parser<bool> implementation
+//
+bool parser<bool>::parse(Option &O, const char *ArgName,
+ const std::string &Arg, bool &Value) {
+ if (Arg == "" || Arg == "true" || Arg == "TRUE" || Arg == "True" ||
+ Arg == "1") {
+ Value = true;
+ } else if (Arg == "false" || Arg == "FALSE" || Arg == "False" || Arg == "0") {
+ Value = false;
+ } else {
+ return O.error(": '" + Arg +
+ "' is invalid value for boolean argument! Try 0 or 1");
+ }
+ return false;
+}
+
+// parser<boolOrDefault> implementation
+//
+bool parser<boolOrDefault>::parse(Option &O, const char *ArgName,
+ const std::string &Arg, boolOrDefault &Value) {
+ if (Arg == "" || Arg == "true" || Arg == "TRUE" || Arg == "True" ||
+ Arg == "1") {
+ Value = BOU_TRUE;
+ } else if (Arg == "false" || Arg == "FALSE"
+ || Arg == "False" || Arg == "0") {
+ Value = BOU_FALSE;
+ } else {
+ return O.error(": '" + Arg +
+ "' is invalid value for boolean argument! Try 0 or 1");
+ }
+ return false;
+}
+
+// parser<int> implementation
+//
+bool parser<int>::parse(Option &O, const char *ArgName,
+ const std::string &Arg, int &Value) {
+ char *End;
+ Value = (int)strtol(Arg.c_str(), &End, 0);
+ if (*End != 0)
+ return O.error(": '" + Arg + "' value invalid for integer argument!");
+ return false;
+}
+
+// parser<unsigned> implementation
+//
+bool parser<unsigned>::parse(Option &O, const char *ArgName,
+ const std::string &Arg, unsigned &Value) {
+ char *End;
+ errno = 0;
+ unsigned long V = strtoul(Arg.c_str(), &End, 0);
+ Value = (unsigned)V;
+ if (((V == ULONG_MAX) && (errno == ERANGE))
+ || (*End != 0)
+ || (Value != V))
+ return O.error(": '" + Arg + "' value invalid for uint argument!");
+ return false;
+}
+
+// parser<double>/parser<float> implementation
+//
+static bool parseDouble(Option &O, const std::string &Arg, double &Value) {
+ const char *ArgStart = Arg.c_str();
+ char *End;
+ Value = strtod(ArgStart, &End);
+ if (*End != 0)
+ return O.error(": '" +Arg+ "' value invalid for floating point argument!");
+ return false;
+}
+
+bool parser<double>::parse(Option &O, const char *AN,
+ const std::string &Arg, double &Val) {
+ return parseDouble(O, Arg, Val);
+}
+
+bool parser<float>::parse(Option &O, const char *AN,
+ const std::string &Arg, float &Val) {
+ double dVal;
+ if (parseDouble(O, Arg, dVal))
+ return true;
+ Val = (float)dVal;
+ return false;
+}
+
+
+
+// generic_parser_base implementation
+//
+
+// findOption - Return the option number corresponding to the specified
+// argument string. If the option is not found, getNumOptions() is returned.
+//
+unsigned generic_parser_base::findOption(const char *Name) {
+ unsigned i = 0, e = getNumOptions();
+ std::string N(Name);
+
+ while (i != e)
+ if (getOption(i) == N)
+ return i;
+ else
+ ++i;
+ return e;
+}
+
+
+// Return the width of the option tag for printing...
+size_t generic_parser_base::getOptionWidth(const Option &O) const {
+ if (O.hasArgStr()) {
+ size_t Size = std::strlen(O.ArgStr)+6;
+ for (unsigned i = 0, e = getNumOptions(); i != e; ++i)
+ Size = std::max(Size, std::strlen(getOption(i))+8);
+ return Size;
+ } else {
+ size_t BaseSize = 0;
+ for (unsigned i = 0, e = getNumOptions(); i != e; ++i)
+ BaseSize = std::max(BaseSize, std::strlen(getOption(i))+8);
+ return BaseSize;
+ }
+}
+
+// printOptionInfo - Print out information about this option. The
+// to-be-maintained width is specified.
+//
+void generic_parser_base::printOptionInfo(const Option &O,
+ size_t GlobalWidth) const {
+ if (O.hasArgStr()) {
+ size_t L = std::strlen(O.ArgStr);
+ cout << " -" << O.ArgStr << std::string(GlobalWidth-L-6, ' ')
+ << " - " << O.HelpStr << "\n";
+
+ for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
+ size_t NumSpaces = GlobalWidth-strlen(getOption(i))-8;
+ cout << " =" << getOption(i) << std::string(NumSpaces, ' ')
+ << " - " << getDescription(i) << "\n";
+ }
+ } else {
+ if (O.HelpStr[0])
+ cout << " " << O.HelpStr << "\n";
+ for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
+ size_t L = std::strlen(getOption(i));
+ cout << " -" << getOption(i) << std::string(GlobalWidth-L-8, ' ')
+ << " - " << getDescription(i) << "\n";
+ }
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// --help and --help-hidden option implementation
+//
+
+namespace {
+
+class HelpPrinter {
+ size_t MaxArgLen;
+ const Option *EmptyArg;
+ const bool ShowHidden;
+
+ // isHidden/isReallyHidden - Predicates to be used to filter down arg lists.
+ inline static bool isHidden(std::pair<std::string, Option *> &OptPair) {
+ return OptPair.second->getOptionHiddenFlag() >= Hidden;
+ }
+ inline static bool isReallyHidden(std::pair<std::string, Option *> &OptPair) {
+ return OptPair.second->getOptionHiddenFlag() == ReallyHidden;
+ }
+
+public:
+ explicit HelpPrinter(bool showHidden) : ShowHidden(showHidden) {
+ EmptyArg = 0;
+ }
+
+ void operator=(bool Value) {
+ if (Value == false) return;
+
+ // Get all the options.
+ std::vector<Option*> PositionalOpts;
+ std::vector<Option*> SinkOpts;
+ std::map<std::string, Option*> OptMap;
+ GetOptionInfo(PositionalOpts, SinkOpts, OptMap);
+
+ // Copy Options into a vector so we can sort them as we like...
+ std::vector<std::pair<std::string, Option*> > Opts;
+ copy(OptMap.begin(), OptMap.end(), std::back_inserter(Opts));
+
+ // Eliminate Hidden or ReallyHidden arguments, depending on ShowHidden
+ Opts.erase(std::remove_if(Opts.begin(), Opts.end(),
+ std::ptr_fun(ShowHidden ? isReallyHidden : isHidden)),
+ Opts.end());
+
+ // Eliminate duplicate entries in table (from enum flags options, f.e.)
+ { // Give OptionSet a scope
+ std::set<Option*> OptionSet;
+ for (unsigned i = 0; i != Opts.size(); ++i)
+ if (OptionSet.count(Opts[i].second) == 0)
+ OptionSet.insert(Opts[i].second); // Add new entry to set
+ else
+ Opts.erase(Opts.begin()+i--); // Erase duplicate
+ }
+
+ if (ProgramOverview)
+ cout << "OVERVIEW: " << ProgramOverview << "\n";
+
+ cout << "USAGE: " << ProgramName << " [options]";
+
+ // Print out the positional options.
+ Option *CAOpt = 0; // The cl::ConsumeAfter option, if it exists...
+ if (!PositionalOpts.empty() &&
+ PositionalOpts[0]->getNumOccurrencesFlag() == ConsumeAfter)
+ CAOpt = PositionalOpts[0];
+
+ for (size_t i = CAOpt != 0, e = PositionalOpts.size(); i != e; ++i) {
+ if (PositionalOpts[i]->ArgStr[0])
+ cout << " --" << PositionalOpts[i]->ArgStr;
+ cout << " " << PositionalOpts[i]->HelpStr;
+ }
+
+ // Print the consume after option info if it exists...
+ if (CAOpt) cout << " " << CAOpt->HelpStr;
+
+ cout << "\n\n";
+
+ // Compute the maximum argument length...
+ MaxArgLen = 0;
+ for (size_t i = 0, e = Opts.size(); i != e; ++i)
+ MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth());
+
+ cout << "OPTIONS:\n";
+ for (size_t i = 0, e = Opts.size(); i != e; ++i)
+ Opts[i].second->printOptionInfo(MaxArgLen);
+
+ // Print any extra help the user has declared.
+ for (std::vector<const char *>::iterator I = MoreHelp->begin(),
+ E = MoreHelp->end(); I != E; ++I)
+ cout << *I;
+ MoreHelp->clear();
+
+ // Halt the program since help information was printed
+ exit(1);
+ }
+};
+} // End anonymous namespace
+
+// Define the two HelpPrinter instances that are used to print out help, or
+// help-hidden...
+//
+static HelpPrinter NormalPrinter(false);
+static HelpPrinter HiddenPrinter(true);
+
+static cl::opt<HelpPrinter, true, parser<bool> >
+HOp("help", cl::desc("Display available options (--help-hidden for more)"),
+ cl::location(NormalPrinter), cl::ValueDisallowed);
+
+static cl::opt<HelpPrinter, true, parser<bool> >
+HHOp("help-hidden", cl::desc("Display all available options"),
+ cl::location(HiddenPrinter), cl::Hidden, cl::ValueDisallowed);
+
+static void (*OverrideVersionPrinter)() = 0;
+
+namespace {
+class VersionPrinter {
+public:
+ void print() {
+ cout << "Low Level Virtual Machine (http://llvm.org/):\n";
+ cout << " " << PACKAGE_NAME << " version " << PACKAGE_VERSION;
+#ifdef LLVM_VERSION_INFO
+ cout << LLVM_VERSION_INFO;
+#endif
+ cout << "\n ";
+#ifndef __OPTIMIZE__
+ cout << "DEBUG build";
+#else
+ cout << "Optimized build";
+#endif
+#ifndef NDEBUG
+ cout << " with assertions";
+#endif
+ cout << ".\n";
+ cout << " Built " << __DATE__ << "(" << __TIME__ << ").\n";
+ }
+ void operator=(bool OptionWasSpecified) {
+ if (OptionWasSpecified) {
+ if (OverrideVersionPrinter == 0) {
+ print();
+ exit(1);
+ } else {
+ (*OverrideVersionPrinter)();
+ exit(1);
+ }
+ }
+ }
+};
+} // End anonymous namespace
+
+
+// Define the --version option that prints out the LLVM version for the tool
+static VersionPrinter VersionPrinterInstance;
+
+static cl::opt<VersionPrinter, true, parser<bool> >
+VersOp("version", cl::desc("Display the version of this program"),
+ cl::location(VersionPrinterInstance), cl::ValueDisallowed);
+
+// Utility function for printing the help message.
+void cl::PrintHelpMessage() {
+ // This looks weird, but it actually prints the help message. The
+ // NormalPrinter variable is a HelpPrinter and the help gets printed when
+ // its operator= is invoked. That's because the "normal" usages of the
+ // help printer is to be assigned true/false depending on whether the
+ // --help option was given or not. Since we're circumventing that we have
+ // to make it look like --help was given, so we assign true.
+ NormalPrinter = true;
+}
+
+/// Utility function for printing version number.
+void cl::PrintVersionMessage() {
+ VersionPrinterInstance.print();
+}
+
+void cl::SetVersionPrinter(void (*func)()) {
+ OverrideVersionPrinter = func;
+}
diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp
new file mode 100644
index 0000000..cb8c4b0
--- /dev/null
+++ b/lib/Support/ConstantRange.cpp
@@ -0,0 +1,472 @@
+//===-- ConstantRange.cpp - ConstantRange implementation ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Represent a range of possible values that may occur when the program is run
+// for an integral value. This keeps track of a lower and upper bound for the
+// constant, which MAY wrap around the end of the numeric range. To do this, it
+// keeps track of a [lower, upper) bound, which specifies an interval just like
+// STL iterators. When used with boolean values, the following are important
+// ranges (other integral ranges use min/max values for special range values):
+//
+// [F, F) = {} = Empty set
+// [T, F) = {T}
+// [F, T) = {F}
+// [T, T) = {F, T} = Full set
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// Initialize a full (the default) or empty set for the specified type.
+///
+ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) :
+ Lower(BitWidth, 0), Upper(BitWidth, 0) {
+ if (Full)
+ Lower = Upper = APInt::getMaxValue(BitWidth);
+ else
+ Lower = Upper = APInt::getMinValue(BitWidth);
+}
+
+/// Initialize a range to hold the single specified value.
+///
+ConstantRange::ConstantRange(const APInt & V) : Lower(V), Upper(V + 1) { }
+
+ConstantRange::ConstantRange(const APInt &L, const APInt &U) :
+ Lower(L), Upper(U) {
+ assert(L.getBitWidth() == U.getBitWidth() &&
+ "ConstantRange with unequal bit widths");
+ assert((L != U || (L.isMaxValue() || L.isMinValue())) &&
+ "Lower == Upper, but they aren't min or max value!");
+}
+
+/// isFullSet - Return true if this set contains all of the elements possible
+/// for this data-type
+bool ConstantRange::isFullSet() const {
+ return Lower == Upper && Lower.isMaxValue();
+}
+
+/// isEmptySet - Return true if this set contains no members.
+///
+bool ConstantRange::isEmptySet() const {
+ return Lower == Upper && Lower.isMinValue();
+}
+
+/// isWrappedSet - Return true if this set wraps around the top of the range,
+/// for example: [100, 8)
+///
+bool ConstantRange::isWrappedSet() const {
+ return Lower.ugt(Upper);
+}
+
+/// getSetSize - Return the number of elements in this set.
+///
+APInt ConstantRange::getSetSize() const {
+ if (isEmptySet())
+ return APInt(getBitWidth(), 0);
+ if (getBitWidth() == 1) {
+ if (Lower != Upper) // One of T or F in the set...
+ return APInt(2, 1);
+ return APInt(2, 2); // Must be full set...
+ }
+
+ // Simply subtract the bounds...
+ return Upper - Lower;
+}
+
+/// getUnsignedMax - Return the largest unsigned value contained in the
+/// ConstantRange.
+///
+APInt ConstantRange::getUnsignedMax() const {
+ if (isFullSet() || isWrappedSet())
+ return APInt::getMaxValue(getBitWidth());
+ else
+ return getUpper() - 1;
+}
+
+/// getUnsignedMin - Return the smallest unsigned value contained in the
+/// ConstantRange.
+///
+APInt ConstantRange::getUnsignedMin() const {
+ if (isFullSet() || (isWrappedSet() && getUpper() != 0))
+ return APInt::getMinValue(getBitWidth());
+ else
+ return getLower();
+}
+
+/// getSignedMax - Return the largest signed value contained in the
+/// ConstantRange.
+///
+APInt ConstantRange::getSignedMax() const {
+ APInt SignedMax(APInt::getSignedMaxValue(getBitWidth()));
+ if (!isWrappedSet()) {
+ if (getLower().sle(getUpper() - 1))
+ return getUpper() - 1;
+ else
+ return SignedMax;
+ } else {
+ if ((getUpper() - 1).slt(getLower())) {
+ if (getLower() != SignedMax)
+ return SignedMax;
+ else
+ return getUpper() - 1;
+ } else {
+ return getUpper() - 1;
+ }
+ }
+}
+
+/// getSignedMin - Return the smallest signed value contained in the
+/// ConstantRange.
+///
+APInt ConstantRange::getSignedMin() const {
+ APInt SignedMin(APInt::getSignedMinValue(getBitWidth()));
+ if (!isWrappedSet()) {
+ if (getLower().sle(getUpper() - 1))
+ return getLower();
+ else
+ return SignedMin;
+ } else {
+ if ((getUpper() - 1).slt(getLower())) {
+ if (getUpper() != SignedMin)
+ return SignedMin;
+ else
+ return getLower();
+ } else {
+ return getLower();
+ }
+ }
+}
+
+/// contains - Return true if the specified value is in the set.
+///
+bool ConstantRange::contains(const APInt &V) const {
+ if (Lower == Upper)
+ return isFullSet();
+
+ if (!isWrappedSet())
+ return Lower.ule(V) && V.ult(Upper);
+ else
+ return Lower.ule(V) || V.ult(Upper);
+}
+
+/// subtract - Subtract the specified constant from the endpoints of this
+/// constant range.
+ConstantRange ConstantRange::subtract(const APInt &Val) const {
+ assert(Val.getBitWidth() == getBitWidth() && "Wrong bit width");
+ // If the set is empty or full, don't modify the endpoints.
+ if (Lower == Upper)
+ return *this;
+ return ConstantRange(Lower - Val, Upper - Val);
+}
+
+
+// intersect1Wrapped - This helper function is used to intersect two ranges when
+// it is known that LHS is wrapped and RHS isn't.
+//
+ConstantRange
+ConstantRange::intersect1Wrapped(const ConstantRange &LHS,
+ const ConstantRange &RHS) {
+ assert(LHS.isWrappedSet() && !RHS.isWrappedSet());
+
+ // Check to see if we overlap on the Left side of RHS...
+ //
+ if (RHS.Lower.ult(LHS.Upper)) {
+ // We do overlap on the left side of RHS, see if we overlap on the right of
+ // RHS...
+ if (RHS.Upper.ugt(LHS.Lower)) {
+ // Ok, the result overlaps on both the left and right sides. See if the
+ // resultant interval will be smaller if we wrap or not...
+ //
+ if (LHS.getSetSize().ult(RHS.getSetSize()))
+ return LHS;
+ else
+ return RHS;
+
+ } else {
+ // No overlap on the right, just on the left.
+ return ConstantRange(RHS.Lower, LHS.Upper);
+ }
+ } else {
+ // We don't overlap on the left side of RHS, see if we overlap on the right
+ // of RHS...
+ if (RHS.Upper.ugt(LHS.Lower)) {
+ // Simple overlap...
+ return ConstantRange(LHS.Lower, RHS.Upper);
+ } else {
+ // No overlap...
+ return ConstantRange(LHS.getBitWidth(), false);
+ }
+ }
+}
+
+/// intersectWith - Return the range that results from the intersection of this
+/// range with another range.
+///
+ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const {
+ assert(getBitWidth() == CR.getBitWidth() &&
+ "ConstantRange types don't agree!");
+ // Handle common special cases
+ if (isEmptySet() || CR.isFullSet())
+ return *this;
+ if (isFullSet() || CR.isEmptySet())
+ return CR;
+
+ if (!isWrappedSet()) {
+ if (!CR.isWrappedSet()) {
+ using namespace APIntOps;
+ APInt L = umax(Lower, CR.Lower);
+ APInt U = umin(Upper, CR.Upper);
+
+ if (L.ult(U)) // If range isn't empty...
+ return ConstantRange(L, U);
+ else
+ return ConstantRange(getBitWidth(), false);// Otherwise, empty set
+ } else
+ return intersect1Wrapped(CR, *this);
+ } else { // We know "this" is wrapped...
+ if (!CR.isWrappedSet())
+ return intersect1Wrapped(*this, CR);
+ else {
+ // Both ranges are wrapped...
+ using namespace APIntOps;
+ APInt L = umax(Lower, CR.Lower);
+ APInt U = umin(Upper, CR.Upper);
+ return ConstantRange(L, U);
+ }
+ }
+ return *this;
+}
+
+/// maximalIntersectWith - Return the range that results from the intersection
+/// of this range with another range. The resultant range is guaranteed to
+/// include all elements contained in both input ranges, and to have the
+/// smallest possible set size that does so. Because there may be two
+/// intersections with the same set size, A.maximalIntersectWith(B) might not
+/// be equal to B.maximalIntersect(A).
+ConstantRange ConstantRange::maximalIntersectWith(const ConstantRange &CR) const {
+ assert(getBitWidth() == CR.getBitWidth() &&
+ "ConstantRange types don't agree!");
+
+ // Handle common cases.
+ if ( isEmptySet() || CR.isFullSet()) return *this;
+ if (CR.isEmptySet() || isFullSet()) return CR;
+
+ if (!isWrappedSet() && CR.isWrappedSet())
+ return CR.maximalIntersectWith(*this);
+
+ if (!isWrappedSet() && !CR.isWrappedSet()) {
+ if (Lower.ult(CR.Lower)) {
+ if (Upper.ule(CR.Lower))
+ return ConstantRange(getBitWidth(), false);
+
+ if (Upper.ult(CR.Upper))
+ return ConstantRange(CR.Lower, Upper);
+
+ return CR;
+ } else {
+ if (Upper.ult(CR.Upper))
+ return *this;
+
+ if (Lower.ult(CR.Upper))
+ return ConstantRange(Lower, CR.Upper);
+
+ return ConstantRange(getBitWidth(), false);
+ }
+ }
+
+ if (isWrappedSet() && !CR.isWrappedSet()) {
+ if (CR.Lower.ult(Upper)) {
+ if (CR.Upper.ult(Upper))
+ return CR;
+
+ if (CR.Upper.ult(Lower))
+ return ConstantRange(CR.Lower, Upper);
+
+ if (getSetSize().ult(CR.getSetSize()))
+ return *this;
+ else
+ return CR;
+ } else if (CR.Lower.ult(Lower)) {
+ if (CR.Upper.ule(Lower))
+ return ConstantRange(getBitWidth(), false);
+
+ return ConstantRange(Lower, CR.Upper);
+ }
+ return CR;
+ }
+
+ if (CR.Upper.ult(Upper)) {
+ if (CR.Lower.ult(Upper)) {
+ if (getSetSize().ult(CR.getSetSize()))
+ return *this;
+ else
+ return CR;
+ }
+
+ if (CR.Lower.ult(Lower))
+ return ConstantRange(Lower, CR.Upper);
+
+ return CR;
+ } else if (CR.Upper.ult(Lower)) {
+ if (CR.Lower.ult(Lower))
+ return *this;
+
+ return ConstantRange(CR.Lower, Upper);
+ }
+ if (getSetSize().ult(CR.getSetSize()))
+ return *this;
+ else
+ return CR;
+}
+
+
+/// unionWith - Return the range that results from the union of this range with
+/// another range. The resultant range is guaranteed to include the elements of
+/// both sets, but may contain more. For example, [3, 9) union [12,15) is
+/// [3, 15), which includes 9, 10, and 11, which were not included in either
+/// set before.
+///
+ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
+ assert(getBitWidth() == CR.getBitWidth() &&
+ "ConstantRange types don't agree!");
+
+ if ( isFullSet() || CR.isEmptySet()) return *this;
+ if (CR.isFullSet() || isEmptySet()) return CR;
+
+ if (!isWrappedSet() && CR.isWrappedSet()) return CR.unionWith(*this);
+
+ APInt L = Lower, U = Upper;
+
+ if (!isWrappedSet() && !CR.isWrappedSet()) {
+ if (CR.Lower.ult(L))
+ L = CR.Lower;
+
+ if (CR.Upper.ugt(U))
+ U = CR.Upper;
+ }
+
+ if (isWrappedSet() && !CR.isWrappedSet()) {
+ if ((CR.Lower.ult(Upper) && CR.Upper.ult(Upper)) ||
+ (CR.Lower.ugt(Lower) && CR.Upper.ugt(Lower))) {
+ return *this;
+ }
+
+ if (CR.Lower.ule(Upper) && Lower.ule(CR.Upper)) {
+ return ConstantRange(getBitWidth());
+ }
+
+ if (CR.Lower.ule(Upper) && CR.Upper.ule(Lower)) {
+ APInt d1 = CR.Upper - Upper, d2 = Lower - CR.Upper;
+ if (d1.ult(d2)) {
+ U = CR.Upper;
+ } else {
+ L = CR.Upper;
+ }
+ }
+
+ if (Upper.ult(CR.Lower) && CR.Upper.ult(Lower)) {
+ APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper;
+ if (d1.ult(d2)) {
+ U = CR.Lower + 1;
+ } else {
+ L = CR.Upper - 1;
+ }
+ }
+
+ if (Upper.ult(CR.Lower) && Lower.ult(CR.Upper)) {
+ APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Lower;
+
+ if (d1.ult(d2)) {
+ U = CR.Lower + 1;
+ } else {
+ L = CR.Lower;
+ }
+ }
+ }
+
+ if (isWrappedSet() && CR.isWrappedSet()) {
+ if (Lower.ult(CR.Upper) || CR.Lower.ult(Upper))
+ return ConstantRange(getBitWidth());
+
+ if (CR.Upper.ugt(U)) {
+ U = CR.Upper;
+ }
+
+ if (CR.Lower.ult(L)) {
+ L = CR.Lower;
+ }
+
+ if (L == U) return ConstantRange(getBitWidth());
+ }
+
+ return ConstantRange(L, U);
+}
+
+/// zeroExtend - Return a new range in the specified integer type, which must
+/// be strictly larger than the current type. The returned range will
+/// correspond to the possible range of values as if the source range had been
+/// zero extended.
+ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const {
+ unsigned SrcTySize = getBitWidth();
+ assert(SrcTySize < DstTySize && "Not a value extension");
+ if (isFullSet())
+ // Change a source full set into [0, 1 << 8*numbytes)
+ return ConstantRange(APInt(DstTySize,0), APInt(DstTySize,1).shl(SrcTySize));
+
+ APInt L = Lower; L.zext(DstTySize);
+ APInt U = Upper; U.zext(DstTySize);
+ return ConstantRange(L, U);
+}
+
+/// signExtend - Return a new range in the specified integer type, which must
+/// be strictly larger than the current type. The returned range will
+/// correspond to the possible range of values as if the source range had been
+/// sign extended.
+ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const {
+ unsigned SrcTySize = getBitWidth();
+ assert(SrcTySize < DstTySize && "Not a value extension");
+ if (isFullSet()) {
+ return ConstantRange(APInt::getHighBitsSet(DstTySize,DstTySize-SrcTySize+1),
+ APInt::getLowBitsSet(DstTySize, SrcTySize-1));
+ }
+
+ APInt L = Lower; L.sext(DstTySize);
+ APInt U = Upper; U.sext(DstTySize);
+ return ConstantRange(L, U);
+}
+
+/// truncate - Return a new range in the specified integer type, which must be
+/// strictly smaller than the current type. The returned range will
+/// correspond to the possible range of values as if the source range had been
+/// truncated to the specified type.
+ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
+ unsigned SrcTySize = getBitWidth();
+ assert(SrcTySize > DstTySize && "Not a value truncation");
+ APInt Size(APInt::getLowBitsSet(SrcTySize, DstTySize));
+ if (isFullSet() || getSetSize().ugt(Size))
+ return ConstantRange(DstTySize);
+
+ APInt L = Lower; L.trunc(DstTySize);
+ APInt U = Upper; U.trunc(DstTySize);
+ return ConstantRange(L, U);
+}
+
+/// print - Print out the bounds to a stream...
+///
+void ConstantRange::print(raw_ostream &OS) const {
+ OS << "[" << Lower << "," << Upper << ")";
+}
+
+/// dump - Allow printing from a debugger easily...
+///
+void ConstantRange::dump() const {
+ print(errs());
+}
diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp
new file mode 100644
index 0000000..a09cddf
--- /dev/null
+++ b/lib/Support/Debug.cpp
@@ -0,0 +1,77 @@
+//===-- Debug.cpp - An easy way to add debug output to your code ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a handle way of adding debugging information to your
+// code, without it being enabled all of the time, and without having to add
+// command line options to enable it.
+//
+// In particular, just wrap your code with the DEBUG() macro, and it will be
+// enabled automatically if you specify '-debug' on the command-line.
+// Alternatively, you can also use the SET_DEBUG_TYPE("foo") macro to specify
+// that your debug code belongs to class "foo". Then, on the command line, you
+// can specify '-debug-only=foo' to enable JUST the debug information for the
+// foo class.
+//
+// When compiling in release mode, the -debug-* options and all code in DEBUG()
+// statements disappears, so it does not effect the runtime of the code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+bool llvm::DebugFlag; // DebugFlag - Exported boolean set by the -debug option
+
+namespace {
+#ifndef NDEBUG
+ // -debug - Command line option to enable the DEBUG statements in the passes.
+ // This flag may only be enabled in debug builds.
+ static cl::opt<bool, true>
+ Debug("debug", cl::desc("Enable debug output"), cl::Hidden,
+ cl::location(DebugFlag));
+
+ static std::string CurrentDebugType;
+ static struct DebugOnlyOpt {
+ void operator=(const std::string &Val) const {
+ DebugFlag |= !Val.empty();
+ CurrentDebugType = Val;
+ }
+ } DebugOnlyOptLoc;
+
+ static cl::opt<DebugOnlyOpt, true, cl::parser<std::string> >
+ DebugOnly("debug-only", cl::desc("Enable a specific type of debug output"),
+ cl::Hidden, cl::value_desc("debug string"),
+ cl::location(DebugOnlyOptLoc), cl::ValueRequired);
+#endif
+}
+
+// isCurrentDebugType - Return true if the specified string is the debug type
+// specified on the command line, or if none was specified on the command line
+// with the -debug-only=X option.
+//
+bool llvm::isCurrentDebugType(const char *DebugType) {
+#ifndef NDEBUG
+ return CurrentDebugType.empty() || DebugType == CurrentDebugType;
+#else
+ return false;
+#endif
+}
+
+// getErrorOutputStream - Returns the error output stream (std::cerr). This
+// places the std::c* I/O streams into one .cpp file and relieves the whole
+// program from having to have hundreds of static c'tor/d'tors for them.
+//
+OStream &llvm::getErrorOutputStream(const char *DebugType) {
+ static OStream cnoout(0);
+ if (DebugFlag && isCurrentDebugType(DebugType))
+ return cerr;
+ else
+ return cnoout;
+}
diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp
new file mode 100644
index 0000000..fa99035
--- /dev/null
+++ b/lib/Support/Dwarf.cpp
@@ -0,0 +1,589 @@
+//===-- llvm/Support/Dwarf.cpp - Dwarf Framework ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for generic dwarf information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Dwarf.h"
+
+#include <cassert>
+
+namespace llvm {
+
+namespace dwarf {
+
+/// TagString - Return the string for the specified tag.
+///
+const char *TagString(unsigned Tag) {
+ switch (Tag) {
+ case DW_TAG_array_type: return "DW_TAG_array_type";
+ case DW_TAG_class_type: return "DW_TAG_class_type";
+ case DW_TAG_entry_point: return "DW_TAG_entry_point";
+ case DW_TAG_enumeration_type: return "DW_TAG_enumeration_type";
+ case DW_TAG_formal_parameter: return "DW_TAG_formal_parameter";
+ case DW_TAG_imported_declaration: return "DW_TAG_imported_declaration";
+ case DW_TAG_label: return "DW_TAG_label";
+ case DW_TAG_lexical_block: return "DW_TAG_lexical_block";
+ case DW_TAG_member: return "DW_TAG_member";
+ case DW_TAG_pointer_type: return "DW_TAG_pointer_type";
+ case DW_TAG_reference_type: return "DW_TAG_reference_type";
+ case DW_TAG_compile_unit: return "DW_TAG_compile_unit";
+ case DW_TAG_string_type: return "DW_TAG_string_type";
+ case DW_TAG_structure_type: return "DW_TAG_structure_type";
+ case DW_TAG_subroutine_type: return "DW_TAG_subroutine_type";
+ case DW_TAG_typedef: return "DW_TAG_typedef";
+ case DW_TAG_union_type: return "DW_TAG_union_type";
+ case DW_TAG_unspecified_parameters: return "DW_TAG_unspecified_parameters";
+ case DW_TAG_variant: return "DW_TAG_variant";
+ case DW_TAG_common_block: return "DW_TAG_common_block";
+ case DW_TAG_common_inclusion: return "DW_TAG_common_inclusion";
+ case DW_TAG_inheritance: return "DW_TAG_inheritance";
+ case DW_TAG_inlined_subroutine: return "DW_TAG_inlined_subroutine";
+ case DW_TAG_module: return "DW_TAG_module";
+ case DW_TAG_ptr_to_member_type: return "DW_TAG_ptr_to_member_type";
+ case DW_TAG_set_type: return "DW_TAG_set_type";
+ case DW_TAG_subrange_type: return "DW_TAG_subrange_type";
+ case DW_TAG_with_stmt: return "DW_TAG_with_stmt";
+ case DW_TAG_access_declaration: return "DW_TAG_access_declaration";
+ case DW_TAG_base_type: return "DW_TAG_base_type";
+ case DW_TAG_catch_block: return "DW_TAG_catch_block";
+ case DW_TAG_const_type: return "DW_TAG_const_type";
+ case DW_TAG_constant: return "DW_TAG_constant";
+ case DW_TAG_enumerator: return "DW_TAG_enumerator";
+ case DW_TAG_file_type: return "DW_TAG_file_type";
+ case DW_TAG_friend: return "DW_TAG_friend";
+ case DW_TAG_namelist: return "DW_TAG_namelist";
+ case DW_TAG_namelist_item: return "DW_TAG_namelist_item";
+ case DW_TAG_packed_type: return "DW_TAG_packed_type";
+ case DW_TAG_subprogram: return "DW_TAG_subprogram";
+ case DW_TAG_template_type_parameter: return "DW_TAG_template_type_parameter";
+ case DW_TAG_template_value_parameter: return "DW_TAG_template_value_parameter";
+ case DW_TAG_thrown_type: return "DW_TAG_thrown_type";
+ case DW_TAG_try_block: return "DW_TAG_try_block";
+ case DW_TAG_variant_part: return "DW_TAG_variant_part";
+ case DW_TAG_variable: return "DW_TAG_variable";
+ case DW_TAG_volatile_type: return "DW_TAG_volatile_type";
+ case DW_TAG_dwarf_procedure: return "DW_TAG_dwarf_procedure";
+ case DW_TAG_restrict_type: return "DW_TAG_restrict_type";
+ case DW_TAG_interface_type: return "DW_TAG_interface_type";
+ case DW_TAG_namespace: return "DW_TAG_namespace";
+ case DW_TAG_imported_module: return "DW_TAG_imported_module";
+ case DW_TAG_unspecified_type: return "DW_TAG_unspecified_type";
+ case DW_TAG_partial_unit: return "DW_TAG_partial_unit";
+ case DW_TAG_imported_unit: return "DW_TAG_imported_unit";
+ case DW_TAG_condition: return "DW_TAG_condition";
+ case DW_TAG_shared_type: return "DW_TAG_shared_type";
+ case DW_TAG_lo_user: return "DW_TAG_lo_user";
+ case DW_TAG_hi_user: return "DW_TAG_hi_user";
+ }
+ assert(0 && "Unknown Dwarf Tag");
+ return "";
+}
+
+/// ChildrenString - Return the string for the specified children flag.
+///
+const char *ChildrenString(unsigned Children) {
+ switch (Children) {
+ case DW_CHILDREN_no: return "CHILDREN_no";
+ case DW_CHILDREN_yes: return "CHILDREN_yes";
+ }
+ assert(0 && "Unknown Dwarf ChildrenFlag");
+ return "";
+}
+
+/// AttributeString - Return the string for the specified attribute.
+///
+const char *AttributeString(unsigned Attribute) {
+ switch (Attribute) {
+ case DW_AT_sibling: return "DW_AT_sibling";
+ case DW_AT_location: return "DW_AT_location";
+ case DW_AT_name: return "DW_AT_name";
+ case DW_AT_ordering: return "DW_AT_ordering";
+ case DW_AT_byte_size: return "DW_AT_byte_size";
+ case DW_AT_bit_offset: return "DW_AT_bit_offset";
+ case DW_AT_bit_size: return "DW_AT_bit_size";
+ case DW_AT_stmt_list: return "DW_AT_stmt_list";
+ case DW_AT_low_pc: return "DW_AT_low_pc";
+ case DW_AT_high_pc: return "DW_AT_high_pc";
+ case DW_AT_language: return "DW_AT_language";
+ case DW_AT_discr: return "DW_AT_discr";
+ case DW_AT_discr_value: return "DW_AT_discr_value";
+ case DW_AT_visibility: return "DW_AT_visibility";
+ case DW_AT_import: return "DW_AT_import";
+ case DW_AT_string_length: return "DW_AT_string_length";
+ case DW_AT_common_reference: return "DW_AT_common_reference";
+ case DW_AT_comp_dir: return "DW_AT_comp_dir";
+ case DW_AT_const_value: return "DW_AT_const_value";
+ case DW_AT_containing_type: return "DW_AT_containing_type";
+ case DW_AT_default_value: return "DW_AT_default_value";
+ case DW_AT_inline: return "DW_AT_inline";
+ case DW_AT_is_optional: return "DW_AT_is_optional";
+ case DW_AT_lower_bound: return "DW_AT_lower_bound";
+ case DW_AT_producer: return "DW_AT_producer";
+ case DW_AT_prototyped: return "DW_AT_prototyped";
+ case DW_AT_return_addr: return "DW_AT_return_addr";
+ case DW_AT_start_scope: return "DW_AT_start_scope";
+ case DW_AT_bit_stride: return "DW_AT_bit_stride";
+ case DW_AT_upper_bound: return "DW_AT_upper_bound";
+ case DW_AT_abstract_origin: return "DW_AT_abstract_origin";
+ case DW_AT_accessibility: return "DW_AT_accessibility";
+ case DW_AT_address_class: return "DW_AT_address_class";
+ case DW_AT_artificial: return "DW_AT_artificial";
+ case DW_AT_base_types: return "DW_AT_base_types";
+ case DW_AT_calling_convention: return "DW_AT_calling_convention";
+ case DW_AT_count: return "DW_AT_count";
+ case DW_AT_data_member_location: return "DW_AT_data_member_location";
+ case DW_AT_decl_column: return "DW_AT_decl_column";
+ case DW_AT_decl_file: return "DW_AT_decl_file";
+ case DW_AT_decl_line: return "DW_AT_decl_line";
+ case DW_AT_declaration: return "DW_AT_declaration";
+ case DW_AT_discr_list: return "DW_AT_discr_list";
+ case DW_AT_encoding: return "DW_AT_encoding";
+ case DW_AT_external: return "DW_AT_external";
+ case DW_AT_frame_base: return "DW_AT_frame_base";
+ case DW_AT_friend: return "DW_AT_friend";
+ case DW_AT_identifier_case: return "DW_AT_identifier_case";
+ case DW_AT_macro_info: return "DW_AT_macro_info";
+ case DW_AT_namelist_item: return "DW_AT_namelist_item";
+ case DW_AT_priority: return "DW_AT_priority";
+ case DW_AT_segment: return "DW_AT_segment";
+ case DW_AT_specification: return "DW_AT_specification";
+ case DW_AT_static_link: return "DW_AT_static_link";
+ case DW_AT_type: return "DW_AT_type";
+ case DW_AT_use_location: return "DW_AT_use_location";
+ case DW_AT_variable_parameter: return "DW_AT_variable_parameter";
+ case DW_AT_virtuality: return "DW_AT_virtuality";
+ case DW_AT_vtable_elem_location: return "DW_AT_vtable_elem_location";
+ case DW_AT_allocated: return "DW_AT_allocated";
+ case DW_AT_associated: return "DW_AT_associated";
+ case DW_AT_data_location: return "DW_AT_data_location";
+ case DW_AT_byte_stride: return "DW_AT_byte_stride";
+ case DW_AT_entry_pc: return "DW_AT_entry_pc";
+ case DW_AT_use_UTF8: return "DW_AT_use_UTF8";
+ case DW_AT_extension: return "DW_AT_extension";
+ case DW_AT_ranges: return "DW_AT_ranges";
+ case DW_AT_trampoline: return "DW_AT_trampoline";
+ case DW_AT_call_column: return "DW_AT_call_column";
+ case DW_AT_call_file: return "DW_AT_call_file";
+ case DW_AT_call_line: return "DW_AT_call_line";
+ case DW_AT_description: return "DW_AT_description";
+ case DW_AT_binary_scale: return "DW_AT_binary_scale";
+ case DW_AT_decimal_scale: return "DW_AT_decimal_scale";
+ case DW_AT_small: return "DW_AT_small";
+ case DW_AT_decimal_sign: return "DW_AT_decimal_sign";
+ case DW_AT_digit_count: return "DW_AT_digit_count";
+ case DW_AT_picture_string: return "DW_AT_picture_string";
+ case DW_AT_mutable: return "DW_AT_mutable";
+ case DW_AT_threads_scaled: return "DW_AT_threads_scaled";
+ case DW_AT_explicit: return "DW_AT_explicit";
+ case DW_AT_object_pointer: return "DW_AT_object_pointer";
+ case DW_AT_endianity: return "DW_AT_endianity";
+ case DW_AT_elemental: return "DW_AT_elemental";
+ case DW_AT_pure: return "DW_AT_pure";
+ case DW_AT_recursive: return "DW_AT_recursive";
+ case DW_AT_MIPS_linkage_name: return "DW_AT_MIPS_linkage_name";
+ case DW_AT_sf_names: return "DW_AT_sf_names";
+ case DW_AT_src_info: return "DW_AT_src_info";
+ case DW_AT_mac_info: return "DW_AT_mac_info";
+ case DW_AT_src_coords: return "DW_AT_src_coords";
+ case DW_AT_body_begin: return "DW_AT_body_begin";
+ case DW_AT_body_end: return "DW_AT_body_end";
+ case DW_AT_GNU_vector: return "DW_AT_GNU_vector";
+ case DW_AT_lo_user: return "DW_AT_lo_user";
+ case DW_AT_hi_user: return "DW_AT_hi_user";
+ case DW_AT_APPLE_optimized: return "DW_AT_APPLE_optimized";
+ case DW_AT_APPLE_flags: return "DW_AT_APPLE_flags";
+ case DW_AT_APPLE_isa: return "DW_AT_APPLE_isa";
+ case DW_AT_APPLE_block: return "DW_AT_APPLE_block";
+ case DW_AT_APPLE_major_runtime_vers: return "DW_AT_APPLE_major_runtime_vers";
+ case DW_AT_APPLE_runtime_class: return "DW_AT_APPLE_runtime_class";
+ }
+ assert(0 && "Unknown Dwarf Attribute");
+ return "";
+}
+
+/// FormEncodingString - Return the string for the specified form encoding.
+///
+const char *FormEncodingString(unsigned Encoding) {
+ switch (Encoding) {
+ case DW_FORM_addr: return "FORM_addr";
+ case DW_FORM_block2: return "FORM_block2";
+ case DW_FORM_block4: return "FORM_block4";
+ case DW_FORM_data2: return "FORM_data2";
+ case DW_FORM_data4: return "FORM_data4";
+ case DW_FORM_data8: return "FORM_data8";
+ case DW_FORM_string: return "FORM_string";
+ case DW_FORM_block: return "FORM_block";
+ case DW_FORM_block1: return "FORM_block1";
+ case DW_FORM_data1: return "FORM_data1";
+ case DW_FORM_flag: return "FORM_flag";
+ case DW_FORM_sdata: return "FORM_sdata";
+ case DW_FORM_strp: return "FORM_strp";
+ case DW_FORM_udata: return "FORM_udata";
+ case DW_FORM_ref_addr: return "FORM_ref_addr";
+ case DW_FORM_ref1: return "FORM_ref1";
+ case DW_FORM_ref2: return "FORM_ref2";
+ case DW_FORM_ref4: return "FORM_ref4";
+ case DW_FORM_ref8: return "FORM_ref8";
+ case DW_FORM_ref_udata: return "FORM_ref_udata";
+ case DW_FORM_indirect: return "FORM_indirect";
+ }
+ assert(0 && "Unknown Dwarf Form Encoding");
+ return "";
+}
+
+/// OperationEncodingString - Return the string for the specified operation
+/// encoding.
+const char *OperationEncodingString(unsigned Encoding) {
+ switch (Encoding) {
+ case DW_OP_addr: return "OP_addr";
+ case DW_OP_deref: return "OP_deref";
+ case DW_OP_const1u: return "OP_const1u";
+ case DW_OP_const1s: return "OP_const1s";
+ case DW_OP_const2u: return "OP_const2u";
+ case DW_OP_const2s: return "OP_const2s";
+ case DW_OP_const4u: return "OP_const4u";
+ case DW_OP_const4s: return "OP_const4s";
+ case DW_OP_const8u: return "OP_const8u";
+ case DW_OP_const8s: return "OP_const8s";
+ case DW_OP_constu: return "OP_constu";
+ case DW_OP_consts: return "OP_consts";
+ case DW_OP_dup: return "OP_dup";
+ case DW_OP_drop: return "OP_drop";
+ case DW_OP_over: return "OP_over";
+ case DW_OP_pick: return "OP_pick";
+ case DW_OP_swap: return "OP_swap";
+ case DW_OP_rot: return "OP_rot";
+ case DW_OP_xderef: return "OP_xderef";
+ case DW_OP_abs: return "OP_abs";
+ case DW_OP_and: return "OP_and";
+ case DW_OP_div: return "OP_div";
+ case DW_OP_minus: return "OP_minus";
+ case DW_OP_mod: return "OP_mod";
+ case DW_OP_mul: return "OP_mul";
+ case DW_OP_neg: return "OP_neg";
+ case DW_OP_not: return "OP_not";
+ case DW_OP_or: return "OP_or";
+ case DW_OP_plus: return "OP_plus";
+ case DW_OP_plus_uconst: return "OP_plus_uconst";
+ case DW_OP_shl: return "OP_shl";
+ case DW_OP_shr: return "OP_shr";
+ case DW_OP_shra: return "OP_shra";
+ case DW_OP_xor: return "OP_xor";
+ case DW_OP_skip: return "OP_skip";
+ case DW_OP_bra: return "OP_bra";
+ case DW_OP_eq: return "OP_eq";
+ case DW_OP_ge: return "OP_ge";
+ case DW_OP_gt: return "OP_gt";
+ case DW_OP_le: return "OP_le";
+ case DW_OP_lt: return "OP_lt";
+ case DW_OP_ne: return "OP_ne";
+ case DW_OP_lit0: return "OP_lit0";
+ case DW_OP_lit1: return "OP_lit1";
+ case DW_OP_lit31: return "OP_lit31";
+ case DW_OP_reg0: return "OP_reg0";
+ case DW_OP_reg1: return "OP_reg1";
+ case DW_OP_reg31: return "OP_reg31";
+ case DW_OP_breg0: return "OP_breg0";
+ case DW_OP_breg1: return "OP_breg1";
+ case DW_OP_breg31: return "OP_breg31";
+ case DW_OP_regx: return "OP_regx";
+ case DW_OP_fbreg: return "OP_fbreg";
+ case DW_OP_bregx: return "OP_bregx";
+ case DW_OP_piece: return "OP_piece";
+ case DW_OP_deref_size: return "OP_deref_size";
+ case DW_OP_xderef_size: return "OP_xderef_size";
+ case DW_OP_nop: return "OP_nop";
+ case DW_OP_push_object_address: return "OP_push_object_address";
+ case DW_OP_call2: return "OP_call2";
+ case DW_OP_call4: return "OP_call4";
+ case DW_OP_call_ref: return "OP_call_ref";
+ case DW_OP_form_tls_address: return "OP_form_tls_address";
+ case DW_OP_call_frame_cfa: return "OP_call_frame_cfa";
+ case DW_OP_lo_user: return "OP_lo_user";
+ case DW_OP_hi_user: return "OP_hi_user";
+ }
+ assert(0 && "Unknown Dwarf Operation Encoding");
+ return "";
+}
+
+/// AttributeEncodingString - Return the string for the specified attribute
+/// encoding.
+const char *AttributeEncodingString(unsigned Encoding) {
+ switch (Encoding) {
+ case DW_ATE_address: return "ATE_address";
+ case DW_ATE_boolean: return "ATE_boolean";
+ case DW_ATE_complex_float: return "ATE_complex_float";
+ case DW_ATE_float: return "ATE_float";
+ case DW_ATE_signed: return "ATE_signed";
+ case DW_ATE_signed_char: return "ATE_signed_char";
+ case DW_ATE_unsigned: return "ATE_unsigned";
+ case DW_ATE_unsigned_char: return "ATE_unsigned_char";
+ case DW_ATE_imaginary_float: return "ATE_imaginary_float";
+ case DW_ATE_packed_decimal: return "ATE_packed_decimal";
+ case DW_ATE_numeric_string: return "ATE_numeric_string";
+ case DW_ATE_edited: return "ATE_edited";
+ case DW_ATE_signed_fixed: return "ATE_signed_fixed";
+ case DW_ATE_unsigned_fixed: return "ATE_unsigned_fixed";
+ case DW_ATE_decimal_float: return "ATE_decimal_float";
+ case DW_ATE_lo_user: return "ATE_lo_user";
+ case DW_ATE_hi_user: return "ATE_hi_user";
+ }
+ assert(0 && "Unknown Dwarf Attribute Encoding");
+ return "";
+}
+
+/// DecimalSignString - Return the string for the specified decimal sign
+/// attribute.
+const char *DecimalSignString(unsigned Sign) {
+ switch (Sign) {
+ case DW_DS_unsigned: return "DS_unsigned";
+ case DW_DS_leading_overpunch: return "DS_leading_overpunch";
+ case DW_DS_trailing_overpunch: return "DS_trailing_overpunch";
+ case DW_DS_leading_separate: return "DS_leading_separate";
+ case DW_DS_trailing_separate: return "DS_trailing_separate";
+ }
+ assert(0 && "Unknown Dwarf Decimal Sign Attribute");
+ return "";
+}
+
+/// EndianityString - Return the string for the specified endianity.
+///
+const char *EndianityString(unsigned Endian) {
+ switch (Endian) {
+ case DW_END_default: return "END_default";
+ case DW_END_big: return "END_big";
+ case DW_END_little: return "END_little";
+ case DW_END_lo_user: return "END_lo_user";
+ case DW_END_hi_user: return "END_hi_user";
+ }
+ assert(0 && "Unknown Dwarf Endianity");
+ return "";
+}
+
+/// AccessibilityString - Return the string for the specified accessibility.
+///
+const char *AccessibilityString(unsigned Access) {
+ switch (Access) {
+ // Accessibility codes
+ case DW_ACCESS_public: return "ACCESS_public";
+ case DW_ACCESS_protected: return "ACCESS_protected";
+ case DW_ACCESS_private: return "ACCESS_private";
+ }
+ assert(0 && "Unknown Dwarf Accessibility");
+ return "";
+}
+
+/// VisibilityString - Return the string for the specified visibility.
+///
+const char *VisibilityString(unsigned Visibility) {
+ switch (Visibility) {
+ case DW_VIS_local: return "VIS_local";
+ case DW_VIS_exported: return "VIS_exported";
+ case DW_VIS_qualified: return "VIS_qualified";
+ }
+ assert(0 && "Unknown Dwarf Visibility");
+ return "";
+}
+
+/// VirtualityString - Return the string for the specified virtuality.
+///
+const char *VirtualityString(unsigned Virtuality) {
+ switch (Virtuality) {
+ case DW_VIRTUALITY_none: return "VIRTUALITY_none";
+ case DW_VIRTUALITY_virtual: return "VIRTUALITY_virtual";
+ case DW_VIRTUALITY_pure_virtual: return "VIRTUALITY_pure_virtual";
+ }
+ assert(0 && "Unknown Dwarf Virtuality");
+ return "";
+}
+
+/// LanguageString - Return the string for the specified language.
+///
+const char *LanguageString(unsigned Language) {
+ switch (Language) {
+ case DW_LANG_C89: return "LANG_C89";
+ case DW_LANG_C: return "LANG_C";
+ case DW_LANG_Ada83: return "LANG_Ada83";
+ case DW_LANG_C_plus_plus: return "LANG_C_plus_plus";
+ case DW_LANG_Cobol74: return "LANG_Cobol74";
+ case DW_LANG_Cobol85: return "LANG_Cobol85";
+ case DW_LANG_Fortran77: return "LANG_Fortran77";
+ case DW_LANG_Fortran90: return "LANG_Fortran90";
+ case DW_LANG_Pascal83: return "LANG_Pascal83";
+ case DW_LANG_Modula2: return "LANG_Modula2";
+ case DW_LANG_Java: return "LANG_Java";
+ case DW_LANG_C99: return "LANG_C99";
+ case DW_LANG_Ada95: return "LANG_Ada95";
+ case DW_LANG_Fortran95: return "LANG_Fortran95";
+ case DW_LANG_PLI: return "LANG_PLI";
+ case DW_LANG_ObjC: return "LANG_ObjC";
+ case DW_LANG_ObjC_plus_plus: return "LANG_ObjC_plus_plus";
+ case DW_LANG_UPC: return "LANG_UPC";
+ case DW_LANG_D: return "LANG_D";
+ case DW_LANG_lo_user: return "LANG_lo_user";
+ case DW_LANG_hi_user: return "LANG_hi_user";
+ }
+ assert(0 && "Unknown Dwarf Language");
+ return "";
+}
+
+/// CaseString - Return the string for the specified identifier case.
+///
+const char *CaseString(unsigned Case) {
+ switch (Case) {
+ case DW_ID_case_sensitive: return "ID_case_sensitive";
+ case DW_ID_up_case: return "ID_up_case";
+ case DW_ID_down_case: return "ID_down_case";
+ case DW_ID_case_insensitive: return "ID_case_insensitive";
+ }
+ assert(0 && "Unknown Dwarf Identifier Case");
+ return "";
+}
+
+/// ConventionString - Return the string for the specified calling convention.
+///
+const char *ConventionString(unsigned Convention) {
+ switch (Convention) {
+ case DW_CC_normal: return "CC_normal";
+ case DW_CC_program: return "CC_program";
+ case DW_CC_nocall: return "CC_nocall";
+ case DW_CC_lo_user: return "CC_lo_user";
+ case DW_CC_hi_user: return "CC_hi_user";
+ }
+ assert(0 && "Unknown Dwarf Calling Convention");
+ return "";
+}
+
+/// InlineCodeString - Return the string for the specified inline code.
+///
+const char *InlineCodeString(unsigned Code) {
+ switch (Code) {
+ case DW_INL_not_inlined: return "INL_not_inlined";
+ case DW_INL_inlined: return "INL_inlined";
+ case DW_INL_declared_not_inlined: return "INL_declared_not_inlined";
+ case DW_INL_declared_inlined: return "INL_declared_inlined";
+ }
+ assert(0 && "Unknown Dwarf Inline Code");
+ return "";
+}
+
+/// ArrayOrderString - Return the string for the specified array order.
+///
+const char *ArrayOrderString(unsigned Order) {
+ switch (Order) {
+ case DW_ORD_row_major: return "ORD_row_major";
+ case DW_ORD_col_major: return "ORD_col_major";
+ }
+ assert(0 && "Unknown Dwarf Array Order");
+ return "";
+}
+
+/// DiscriminantString - Return the string for the specified discriminant
+/// descriptor.
+const char *DiscriminantString(unsigned Discriminant) {
+ switch (Discriminant) {
+ case DW_DSC_label: return "DSC_label";
+ case DW_DSC_range: return "DSC_range";
+ }
+ assert(0 && "Unknown Dwarf Discriminant Descriptor");
+ return "";
+}
+
+/// LNStandardString - Return the string for the specified line number standard.
+///
+const char *LNStandardString(unsigned Standard) {
+ switch (Standard) {
+ case DW_LNS_copy: return "LNS_copy";
+ case DW_LNS_advance_pc: return "LNS_advance_pc";
+ case DW_LNS_advance_line: return "LNS_advance_line";
+ case DW_LNS_set_file: return "LNS_set_file";
+ case DW_LNS_set_column: return "LNS_set_column";
+ case DW_LNS_negate_stmt: return "LNS_negate_stmt";
+ case DW_LNS_set_basic_block: return "LNS_set_basic_block";
+ case DW_LNS_const_add_pc: return "LNS_const_add_pc";
+ case DW_LNS_fixed_advance_pc: return "LNS_fixed_advance_pc";
+ case DW_LNS_set_prologue_end: return "LNS_set_prologue_end";
+ case DW_LNS_set_epilogue_begin: return "LNS_set_epilogue_begin";
+ case DW_LNS_set_isa: return "LNS_set_isa";
+ }
+ assert(0 && "Unknown Dwarf Line Number Standard");
+ return "";
+}
+
+/// LNExtendedString - Return the string for the specified line number extended
+/// opcode encodings.
+const char *LNExtendedString(unsigned Encoding) {
+ switch (Encoding) {
+ // Line Number Extended Opcode Encodings
+ case DW_LNE_end_sequence: return "LNE_end_sequence";
+ case DW_LNE_set_address: return "LNE_set_address";
+ case DW_LNE_define_file: return "LNE_define_file";
+ case DW_LNE_lo_user: return "LNE_lo_user";
+ case DW_LNE_hi_user: return "LNE_hi_user";
+ }
+ assert(0 && "Unknown Dwarf Line Number Extended Opcode Encoding");
+ return "";
+}
+
+/// MacinfoString - Return the string for the specified macinfo type encodings.
+///
+const char *MacinfoString(unsigned Encoding) {
+ switch (Encoding) {
+ // Macinfo Type Encodings
+ case DW_MACINFO_define: return "MACINFO_define";
+ case DW_MACINFO_undef: return "MACINFO_undef";
+ case DW_MACINFO_start_file: return "MACINFO_start_file";
+ case DW_MACINFO_end_file: return "MACINFO_end_file";
+ case DW_MACINFO_vendor_ext: return "MACINFO_vendor_ext";
+ }
+ assert(0 && "Unknown Dwarf Macinfo Type Encodings");
+ return "";
+}
+
+/// CallFrameString - Return the string for the specified call frame instruction
+/// encodings.
+const char *CallFrameString(unsigned Encoding) {
+ switch (Encoding) {
+ case DW_CFA_advance_loc: return "CFA_advance_loc";
+ case DW_CFA_offset: return "CFA_offset";
+ case DW_CFA_restore: return "CFA_restore";
+ case DW_CFA_set_loc: return "CFA_set_loc";
+ case DW_CFA_advance_loc1: return "CFA_advance_loc1";
+ case DW_CFA_advance_loc2: return "CFA_advance_loc2";
+ case DW_CFA_advance_loc4: return "CFA_advance_loc4";
+ case DW_CFA_offset_extended: return "CFA_offset_extended";
+ case DW_CFA_restore_extended: return "CFA_restore_extended";
+ case DW_CFA_undefined: return "CFA_undefined";
+ case DW_CFA_same_value: return "CFA_same_value";
+ case DW_CFA_register: return "CFA_register";
+ case DW_CFA_remember_state: return "CFA_remember_state";
+ case DW_CFA_restore_state: return "CFA_restore_state";
+ case DW_CFA_def_cfa: return "CFA_def_cfa";
+ case DW_CFA_def_cfa_register: return "CFA_def_cfa_register";
+ case DW_CFA_def_cfa_offset: return "CFA_def_cfa_offset";
+ case DW_CFA_def_cfa_expression: return "CFA_def_cfa_expression";
+ case DW_CFA_expression: return "CFA_expression";
+ case DW_CFA_offset_extended_sf: return "CFA_offset_extended_sf";
+ case DW_CFA_def_cfa_sf: return "CFA_def_cfa_sf";
+ case DW_CFA_def_cfa_offset_sf: return "CFA_def_cfa_offset_sf";
+ case DW_CFA_val_offset: return "CFA_val_offset";
+ case DW_CFA_val_offset_sf: return "CFA_val_offset_sf";
+ case DW_CFA_val_expression: return "CFA_val_expression";
+ case DW_CFA_lo_user: return "CFA_lo_user";
+ case DW_CFA_hi_user: return "CFA_hi_user";
+ }
+ assert(0 && "Unknown Dwarf Call Frame Instruction Encodings");
+ return "";
+}
+
+} // End of namespace dwarf.
+
+} // End of namespace llvm.
diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp
new file mode 100644
index 0000000..21080b6
--- /dev/null
+++ b/lib/Support/FileUtilities.cpp
@@ -0,0 +1,263 @@
+//===- Support/FileUtilities.cpp - File System Utilities ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a family of utility functions which are useful for doing
+// various things with files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/System/Path.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include <cstdlib>
+#include <cstring>
+#include <cctype>
+using namespace llvm;
+
+static bool isSignedChar(char C) {
+ return (C == '+' || C == '-');
+}
+
+static bool isExponentChar(char C) {
+ switch (C) {
+ case 'D': // Strange exponential notation.
+ case 'd': // Strange exponential notation.
+ case 'e':
+ case 'E': return true;
+ default: return false;
+ }
+}
+
+static bool isNumberChar(char C) {
+ switch (C) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case '.': return true;
+ default: return isSignedChar(C) || isExponentChar(C);
+ }
+}
+
+static const char *BackupNumber(const char *Pos, const char *FirstChar) {
+ // If we didn't stop in the middle of a number, don't backup.
+ if (!isNumberChar(*Pos)) return Pos;
+
+ // Otherwise, return to the start of the number.
+ while (Pos > FirstChar && isNumberChar(Pos[-1])) {
+ --Pos;
+ if (Pos > FirstChar && isSignedChar(Pos[0]) && !isExponentChar(Pos[-1]))
+ break;
+ }
+ return Pos;
+}
+
+/// EndOfNumber - Return the first character that is not part of the specified
+/// number. This assumes that the buffer is null terminated, so it won't fall
+/// off the end.
+static const char *EndOfNumber(const char *Pos) {
+ while (isNumberChar(*Pos))
+ ++Pos;
+ return Pos;
+}
+
+/// CompareNumbers - compare two numbers, returning true if they are different.
+static bool CompareNumbers(const char *&F1P, const char *&F2P,
+ const char *F1End, const char *F2End,
+ double AbsTolerance, double RelTolerance,
+ std::string *ErrorMsg) {
+ const char *F1NumEnd, *F2NumEnd;
+ double V1 = 0.0, V2 = 0.0;
+
+ // If one of the positions is at a space and the other isn't, chomp up 'til
+ // the end of the space.
+ while (isspace(*F1P) && F1P != F1End)
+ ++F1P;
+ while (isspace(*F2P) && F2P != F2End)
+ ++F2P;
+
+ // If we stop on numbers, compare their difference.
+ if (!isNumberChar(*F1P) || !isNumberChar(*F2P)) {
+ // The diff failed.
+ F1NumEnd = F1P;
+ F2NumEnd = F2P;
+ } else {
+ // Note that some ugliness is built into this to permit support for numbers
+ // that use "D" or "d" as their exponential marker, e.g. "1.234D45". This
+ // occurs in 200.sixtrack in spec2k.
+ V1 = strtod(F1P, const_cast<char**>(&F1NumEnd));
+ V2 = strtod(F2P, const_cast<char**>(&F2NumEnd));
+
+ if (*F1NumEnd == 'D' || *F1NumEnd == 'd') {
+ // Copy string into tmp buffer to replace the 'D' with an 'e'.
+ SmallString<200> StrTmp(F1P, EndOfNumber(F1NumEnd)+1);
+ // Strange exponential notation!
+ StrTmp[static_cast<unsigned>(F1NumEnd-F1P)] = 'e';
+
+ V1 = strtod(&StrTmp[0], const_cast<char**>(&F1NumEnd));
+ F1NumEnd = F1P + (F1NumEnd-&StrTmp[0]);
+ }
+
+ if (*F2NumEnd == 'D' || *F2NumEnd == 'd') {
+ // Copy string into tmp buffer to replace the 'D' with an 'e'.
+ SmallString<200> StrTmp(F2P, EndOfNumber(F2NumEnd)+1);
+ // Strange exponential notation!
+ StrTmp[static_cast<unsigned>(F2NumEnd-F2P)] = 'e';
+
+ V2 = strtod(&StrTmp[0], const_cast<char**>(&F2NumEnd));
+ F2NumEnd = F2P + (F2NumEnd-&StrTmp[0]);
+ }
+ }
+
+ if (F1NumEnd == F1P || F2NumEnd == F2P) {
+ if (ErrorMsg) {
+ *ErrorMsg = "FP Comparison failed, not a numeric difference between '";
+ *ErrorMsg += F1P[0];
+ *ErrorMsg += "' and '";
+ *ErrorMsg += F2P[0];
+ *ErrorMsg += "'";
+ }
+ return true;
+ }
+
+ // Check to see if these are inside the absolute tolerance
+ if (AbsTolerance < std::abs(V1-V2)) {
+ // Nope, check the relative tolerance...
+ double Diff;
+ if (V2)
+ Diff = std::abs(V1/V2 - 1.0);
+ else if (V1)
+ Diff = std::abs(V2/V1 - 1.0);
+ else
+ Diff = 0; // Both zero.
+ if (Diff > RelTolerance) {
+ if (ErrorMsg) {
+ *ErrorMsg = "Compared: " + ftostr(V1) + " and " + ftostr(V2) + "\n";
+ *ErrorMsg += "abs. diff = " + ftostr(std::abs(V1-V2)) +
+ " rel.diff = " + ftostr(Diff) + "\n";
+ *ErrorMsg += "Out of tolerance: rel/abs: " + ftostr(RelTolerance) +
+ "/" + ftostr(AbsTolerance);
+ }
+ return true;
+ }
+ }
+
+ // Otherwise, advance our read pointers to the end of the numbers.
+ F1P = F1NumEnd; F2P = F2NumEnd;
+ return false;
+}
+
+/// DiffFilesWithTolerance - Compare the two files specified, returning 0 if the
+/// files match, 1 if they are different, and 2 if there is a file error. This
+/// function differs from DiffFiles in that you can specify an absolete and
+/// relative FP error that is allowed to exist. If you specify a string to fill
+/// in for the error option, it will set the string to an error message if an
+/// error occurs, allowing the caller to distinguish between a failed diff and a
+/// file system error.
+///
+int llvm::DiffFilesWithTolerance(const sys::PathWithStatus &FileA,
+ const sys::PathWithStatus &FileB,
+ double AbsTol, double RelTol,
+ std::string *Error) {
+ const sys::FileStatus *FileAStat = FileA.getFileStatus(false, Error);
+ if (!FileAStat)
+ return 2;
+ const sys::FileStatus *FileBStat = FileB.getFileStatus(false, Error);
+ if (!FileBStat)
+ return 2;
+
+ // Check for zero length files because some systems croak when you try to
+ // mmap an empty file.
+ size_t A_size = FileAStat->getSize();
+ size_t B_size = FileBStat->getSize();
+
+ // If they are both zero sized then they're the same
+ if (A_size == 0 && B_size == 0)
+ return 0;
+
+ // If only one of them is zero sized then they can't be the same
+ if ((A_size == 0 || B_size == 0)) {
+ if (Error)
+ *Error = "Files differ: one is zero-sized, the other isn't";
+ return 1;
+ }
+
+ // Now its safe to mmap the files into memory becasue both files
+ // have a non-zero size.
+ OwningPtr<MemoryBuffer> F1(MemoryBuffer::getFile(FileA.c_str(), Error));
+ OwningPtr<MemoryBuffer> F2(MemoryBuffer::getFile(FileB.c_str(), Error));
+ if (F1 == 0 || F2 == 0)
+ return 2;
+
+ // Okay, now that we opened the files, scan them for the first difference.
+ const char *File1Start = F1->getBufferStart();
+ const char *File2Start = F2->getBufferStart();
+ const char *File1End = F1->getBufferEnd();
+ const char *File2End = F2->getBufferEnd();
+ const char *F1P = File1Start;
+ const char *F2P = File2Start;
+
+ if (A_size == B_size) {
+ // Are the buffers identical? Common case: Handle this efficiently.
+ if (std::memcmp(File1Start, File2Start, A_size) == 0)
+ return 0;
+
+ if (AbsTol == 0 && RelTol == 0) {
+ if (Error)
+ *Error = "Files differ without tolerance allowance";
+ return 1; // Files different!
+ }
+ }
+
+ bool CompareFailed = false;
+ while (1) {
+ // Scan for the end of file or next difference.
+ while (F1P < File1End && F2P < File2End && *F1P == *F2P)
+ ++F1P, ++F2P;
+
+ if (F1P >= File1End || F2P >= File2End) break;
+
+ // Okay, we must have found a difference. Backup to the start of the
+ // current number each stream is at so that we can compare from the
+ // beginning.
+ F1P = BackupNumber(F1P, File1Start);
+ F2P = BackupNumber(F2P, File2Start);
+
+ // Now that we are at the start of the numbers, compare them, exiting if
+ // they don't match.
+ if (CompareNumbers(F1P, F2P, File1End, File2End, AbsTol, RelTol, Error)) {
+ CompareFailed = true;
+ break;
+ }
+ }
+
+ // Okay, we reached the end of file. If both files are at the end, we
+ // succeeded.
+ bool F1AtEnd = F1P >= File1End;
+ bool F2AtEnd = F2P >= File2End;
+ if (!CompareFailed && (!F1AtEnd || !F2AtEnd)) {
+ // Else, we might have run off the end due to a number: backup and retry.
+ if (F1AtEnd && isNumberChar(F1P[-1])) --F1P;
+ if (F2AtEnd && isNumberChar(F2P[-1])) --F2P;
+ F1P = BackupNumber(F1P, File1Start);
+ F2P = BackupNumber(F2P, File2Start);
+
+ // Now that we are at the start of the numbers, compare them, exiting if
+ // they don't match.
+ if (CompareNumbers(F1P, F2P, File1End, File2End, AbsTol, RelTol, Error))
+ CompareFailed = true;
+
+ // If we found the end, we succeeded.
+ if (F1P < File1End || F2P < File2End)
+ CompareFailed = true;
+ }
+
+ return CompareFailed;
+}
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
new file mode 100644
index 0000000..41c730e
--- /dev/null
+++ b/lib/Support/FoldingSet.cpp
@@ -0,0 +1,378 @@
+//===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a hash set that can be used to remove duplication of
+// nodes in a graph. This code was originally created by Chris Lattner for use
+// with SelectionDAGCSEMap, but was isolated to provide use across the llvm code
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+#include <cstring>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// FoldingSetNodeID Implementation
+
+/// Add* - Add various data types to Bit data.
+///
+void FoldingSetNodeID::AddPointer(const void *Ptr) {
+ // Note: this adds pointers to the hash using sizes and endianness that
+ // depend on the host. It doesn't matter however, because hashing on
+ // pointer values in inherently unstable. Nothing should depend on the
+ // ordering of nodes in the folding set.
+ intptr_t PtrI = (intptr_t)Ptr;
+ Bits.push_back(unsigned(PtrI));
+ if (sizeof(intptr_t) > sizeof(unsigned))
+ Bits.push_back(unsigned(uint64_t(PtrI) >> 32));
+}
+void FoldingSetNodeID::AddInteger(signed I) {
+ Bits.push_back(I);
+}
+void FoldingSetNodeID::AddInteger(unsigned I) {
+ Bits.push_back(I);
+}
+void FoldingSetNodeID::AddInteger(long I) {
+ AddInteger((unsigned long)I);
+}
+void FoldingSetNodeID::AddInteger(unsigned long I) {
+ if (sizeof(long) == sizeof(int))
+ AddInteger(unsigned(I));
+ else if (sizeof(long) == sizeof(long long)) {
+ AddInteger((unsigned long long)I);
+ } else {
+ assert(0 && "unexpected sizeof(long)");
+ }
+}
+void FoldingSetNodeID::AddInteger(long long I) {
+ AddInteger((unsigned long long)I);
+}
+void FoldingSetNodeID::AddInteger(unsigned long long I) {
+ AddInteger(unsigned(I));
+ if ((uint64_t)(int)I != I)
+ Bits.push_back(unsigned(I >> 32));
+}
+
+void FoldingSetNodeID::AddString(const char *String, const char *End) {
+ unsigned Size = static_cast<unsigned>(End - String);
+ Bits.push_back(Size);
+ if (!Size) return;
+
+ unsigned Units = Size / 4;
+ unsigned Pos = 0;
+ const unsigned *Base = (const unsigned *)String;
+
+ // If the string is aligned do a bulk transfer.
+ if (!((intptr_t)Base & 3)) {
+ Bits.append(Base, Base + Units);
+ Pos = (Units + 1) * 4;
+ } else {
+ // Otherwise do it the hard way.
+ for (Pos += 4; Pos <= Size; Pos += 4) {
+ unsigned V = ((unsigned char)String[Pos - 4] << 24) |
+ ((unsigned char)String[Pos - 3] << 16) |
+ ((unsigned char)String[Pos - 2] << 8) |
+ (unsigned char)String[Pos - 1];
+ Bits.push_back(V);
+ }
+ }
+
+ // With the leftover bits.
+ unsigned V = 0;
+ // Pos will have overshot size by 4 - #bytes left over.
+ switch (Pos - Size) {
+ case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru.
+ case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru.
+ case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break;
+ default: return; // Nothing left.
+ }
+
+ Bits.push_back(V);
+}
+
+void FoldingSetNodeID::AddString(const char *String) {
+ AddString(String, String + strlen(String));
+}
+
+void FoldingSetNodeID::AddString(const std::string &String) {
+ AddString(&*String.begin(), &*String.end());
+}
+
+/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to
+/// lookup the node in the FoldingSetImpl.
+unsigned FoldingSetNodeID::ComputeHash() const {
+ // This is adapted from SuperFastHash by Paul Hsieh.
+ unsigned Hash = static_cast<unsigned>(Bits.size());
+ for (const unsigned *BP = &Bits[0], *E = BP+Bits.size(); BP != E; ++BP) {
+ unsigned Data = *BP;
+ Hash += Data & 0xFFFF;
+ unsigned Tmp = ((Data >> 16) << 11) ^ Hash;
+ Hash = (Hash << 16) ^ Tmp;
+ Hash += Hash >> 11;
+ }
+
+ // Force "avalanching" of final 127 bits.
+ Hash ^= Hash << 3;
+ Hash += Hash >> 5;
+ Hash ^= Hash << 4;
+ Hash += Hash >> 17;
+ Hash ^= Hash << 25;
+ Hash += Hash >> 6;
+ return Hash;
+}
+
+/// operator== - Used to compare two nodes to each other.
+///
+bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS)const{
+ if (Bits.size() != RHS.Bits.size()) return false;
+ return memcmp(&Bits[0], &RHS.Bits[0], Bits.size()*sizeof(Bits[0])) == 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+/// Helper functions for FoldingSetImpl.
+
+/// GetNextPtr - In order to save space, each bucket is a
+/// singly-linked-list. In order to make deletion more efficient, we make
+/// the list circular, so we can delete a node without computing its hash.
+/// The problem with this is that the start of the hash buckets are not
+/// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null:
+/// use GetBucketPtr when this happens.
+static FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) {
+ // The low bit is set if this is the pointer back to the bucket.
+ if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1)
+ return 0;
+
+ return static_cast<FoldingSetImpl::Node*>(NextInBucketPtr);
+}
+
+
+/// testing.
+static void **GetBucketPtr(void *NextInBucketPtr) {
+ intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr);
+ assert((Ptr & 1) && "Not a bucket pointer");
+ return reinterpret_cast<void**>(Ptr & ~intptr_t(1));
+}
+
+/// GetBucketFor - Hash the specified node ID and return the hash bucket for
+/// the specified ID.
+static void **GetBucketFor(const FoldingSetNodeID &ID,
+ void **Buckets, unsigned NumBuckets) {
+ // NumBuckets is always a power of 2.
+ unsigned BucketNum = ID.ComputeHash() & (NumBuckets-1);
+ return Buckets + BucketNum;
+}
+
+//===----------------------------------------------------------------------===//
+// FoldingSetImpl Implementation
+
+FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) {
+ assert(5 < Log2InitSize && Log2InitSize < 32 &&
+ "Initial hash table size out of range");
+ NumBuckets = 1 << Log2InitSize;
+ Buckets = new void*[NumBuckets+1];
+ clear();
+}
+FoldingSetImpl::~FoldingSetImpl() {
+ delete [] Buckets;
+}
+void FoldingSetImpl::clear() {
+ // Set all but the last bucket to null pointers.
+ memset(Buckets, 0, NumBuckets*sizeof(void*));
+
+ // Set the very last bucket to be a non-null "pointer".
+ Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
+
+ // Reset the node count to zero.
+ NumNodes = 0;
+}
+
+/// GrowHashTable - Double the size of the hash table and rehash everything.
+///
+void FoldingSetImpl::GrowHashTable() {
+ void **OldBuckets = Buckets;
+ unsigned OldNumBuckets = NumBuckets;
+ NumBuckets <<= 1;
+
+ // Clear out new buckets.
+ Buckets = new void*[NumBuckets+1];
+ clear();
+
+ // Walk the old buckets, rehashing nodes into their new place.
+ FoldingSetNodeID ID;
+ for (unsigned i = 0; i != OldNumBuckets; ++i) {
+ void *Probe = OldBuckets[i];
+ if (!Probe) continue;
+ while (Node *NodeInBucket = GetNextPtr(Probe)) {
+ // Figure out the next link, remove NodeInBucket from the old link.
+ Probe = NodeInBucket->getNextInBucket();
+ NodeInBucket->SetNextInBucket(0);
+
+ // Insert the node into the new bucket, after recomputing the hash.
+ GetNodeProfile(ID, NodeInBucket);
+ InsertNode(NodeInBucket, GetBucketFor(ID, Buckets, NumBuckets));
+ ID.clear();
+ }
+ }
+
+ delete[] OldBuckets;
+}
+
+/// FindNodeOrInsertPos - Look up the node specified by ID. If it exists,
+/// return it. If not, return the insertion token that will make insertion
+/// faster.
+FoldingSetImpl::Node
+*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
+ void *&InsertPos) {
+
+ void **Bucket = GetBucketFor(ID, Buckets, NumBuckets);
+ void *Probe = *Bucket;
+
+ InsertPos = 0;
+
+ FoldingSetNodeID OtherID;
+ while (Node *NodeInBucket = GetNextPtr(Probe)) {
+ GetNodeProfile(OtherID, NodeInBucket);
+ if (OtherID == ID)
+ return NodeInBucket;
+
+ Probe = NodeInBucket->getNextInBucket();
+ OtherID.clear();
+ }
+
+ // Didn't find the node, return null with the bucket as the InsertPos.
+ InsertPos = Bucket;
+ return 0;
+}
+
+/// InsertNode - Insert the specified node into the folding set, knowing that it
+/// is not already in the map. InsertPos must be obtained from
+/// FindNodeOrInsertPos.
+void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) {
+ assert(N->getNextInBucket() == 0);
+ // Do we need to grow the hashtable?
+ if (NumNodes+1 > NumBuckets*2) {
+ GrowHashTable();
+ FoldingSetNodeID ID;
+ GetNodeProfile(ID, N);
+ InsertPos = GetBucketFor(ID, Buckets, NumBuckets);
+ }
+
+ ++NumNodes;
+
+ /// The insert position is actually a bucket pointer.
+ void **Bucket = static_cast<void**>(InsertPos);
+
+ void *Next = *Bucket;
+
+ // If this is the first insertion into this bucket, its next pointer will be
+ // null. Pretend as if it pointed to itself, setting the low bit to indicate
+ // that it is a pointer to the bucket.
+ if (Next == 0)
+ Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1);
+
+ // Set the node's next pointer, and make the bucket point to the node.
+ N->SetNextInBucket(Next);
+ *Bucket = N;
+}
+
+/// RemoveNode - Remove a node from the folding set, returning true if one was
+/// removed or false if the node was not in the folding set.
+bool FoldingSetImpl::RemoveNode(Node *N) {
+ // Because each bucket is a circular list, we don't need to compute N's hash
+ // to remove it.
+ void *Ptr = N->getNextInBucket();
+ if (Ptr == 0) return false; // Not in folding set.
+
+ --NumNodes;
+ N->SetNextInBucket(0);
+
+ // Remember what N originally pointed to, either a bucket or another node.
+ void *NodeNextPtr = Ptr;
+
+ // Chase around the list until we find the node (or bucket) which points to N.
+ while (true) {
+ if (Node *NodeInBucket = GetNextPtr(Ptr)) {
+ // Advance pointer.
+ Ptr = NodeInBucket->getNextInBucket();
+
+ // We found a node that points to N, change it to point to N's next node,
+ // removing N from the list.
+ if (Ptr == N) {
+ NodeInBucket->SetNextInBucket(NodeNextPtr);
+ return true;
+ }
+ } else {
+ void **Bucket = GetBucketPtr(Ptr);
+ Ptr = *Bucket;
+
+ // If we found that the bucket points to N, update the bucket to point to
+ // whatever is next.
+ if (Ptr == N) {
+ *Bucket = NodeNextPtr;
+ return true;
+ }
+ }
+ }
+}
+
+/// GetOrInsertNode - If there is an existing simple Node exactly
+/// equal to the specified node, return it. Otherwise, insert 'N' and it
+/// instead.
+FoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) {
+ FoldingSetNodeID ID;
+ GetNodeProfile(ID, N);
+ void *IP;
+ if (Node *E = FindNodeOrInsertPos(ID, IP))
+ return E;
+ InsertNode(N, IP);
+ return N;
+}
+
+//===----------------------------------------------------------------------===//
+// FoldingSetIteratorImpl Implementation
+
+FoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) {
+ // Skip to the first non-null non-self-cycle bucket.
+ while (*Bucket != reinterpret_cast<void*>(-1) &&
+ (*Bucket == 0 || GetNextPtr(*Bucket) == 0))
+ ++Bucket;
+
+ NodePtr = static_cast<FoldingSetNode*>(*Bucket);
+}
+
+void FoldingSetIteratorImpl::advance() {
+ // If there is another link within this bucket, go to it.
+ void *Probe = NodePtr->getNextInBucket();
+
+ if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe))
+ NodePtr = NextNodeInBucket;
+ else {
+ // Otherwise, this is the last link in this bucket.
+ void **Bucket = GetBucketPtr(Probe);
+
+ // Skip to the next non-null non-self-cycle bucket.
+ do {
+ ++Bucket;
+ } while (*Bucket != reinterpret_cast<void*>(-1) &&
+ (*Bucket == 0 || GetNextPtr(*Bucket) == 0));
+
+ NodePtr = static_cast<FoldingSetNode*>(*Bucket);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// FoldingSetBucketIteratorImpl Implementation
+
+FoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) {
+ Ptr = (*Bucket == 0 || GetNextPtr(*Bucket) == 0) ? (void*) Bucket : *Bucket;
+}
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
new file mode 100644
index 0000000..c359dfb
--- /dev/null
+++ b/lib/Support/GraphWriter.cpp
@@ -0,0 +1,89 @@
+//===-- GraphWriter.cpp - Implements GraphWriter support routines ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements misc. GraphWriter support routines.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/System/Path.h"
+#include "llvm/System/Program.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+
+void llvm::DisplayGraph(const sys::Path &Filename) {
+ std::string ErrMsg;
+#if HAVE_GRAPHVIZ
+ sys::Path Graphviz(LLVM_PATH_GRAPHVIZ);
+
+ std::vector<const char*> args;
+ args.push_back(Graphviz.c_str());
+ args.push_back(Filename.c_str());
+ args.push_back(0);
+
+ cerr << "Running 'Graphviz' program... " << std::flush;
+ if (sys::Program::ExecuteAndWait(Graphviz, &args[0],0,0,0,0,&ErrMsg)) {
+ cerr << "Error viewing graph: " << ErrMsg << "\n";
+ }
+#elif (HAVE_GV && HAVE_DOT)
+ sys::Path PSFilename = Filename;
+ PSFilename.appendSuffix("ps");
+
+ sys::Path dot(LLVM_PATH_DOT);
+
+ std::vector<const char*> args;
+ args.push_back(dot.c_str());
+ args.push_back("-Tps");
+ args.push_back("-Nfontname=Courier");
+ args.push_back("-Gsize=7.5,10");
+ args.push_back(Filename.c_str());
+ args.push_back("-o");
+ args.push_back(PSFilename.c_str());
+ args.push_back(0);
+
+ cerr << "Running 'dot' program... " << std::flush;
+ if (sys::Program::ExecuteAndWait(dot, &args[0],0,0,0,0,&ErrMsg)) {
+ cerr << "Error viewing graph: '" << ErrMsg << "\n";
+ } else {
+ cerr << " done. \n";
+
+ sys::Path gv(LLVM_PATH_GV);
+ args.clear();
+ args.push_back(gv.c_str());
+ args.push_back(PSFilename.c_str());
+ args.push_back("-spartan");
+ args.push_back(0);
+
+ ErrMsg.clear();
+ if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg)) {
+ cerr << "Error viewing graph: " << ErrMsg << "\n";
+ }
+ }
+ PSFilename.eraseFromDisk();
+#elif HAVE_DOTTY
+ sys::Path dotty(LLVM_PATH_DOTTY);
+
+ std::vector<const char*> args;
+ args.push_back(dotty.c_str());
+ args.push_back(Filename.c_str());
+ args.push_back(0);
+
+ cerr << "Running 'dotty' program... " << std::flush;
+ if (sys::Program::ExecuteAndWait(dotty, &args[0],0,0,0,0,&ErrMsg)) {
+ cerr << "Error viewing graph: " << ErrMsg << "\n";
+ } else {
+#ifdef __MINGW32__ // Dotty spawns another app and doesn't wait until it returns
+ return;
+#endif
+ }
+#endif
+
+ Filename.eraseFromDisk();
+}
diff --git a/lib/Support/IsInf.cpp b/lib/Support/IsInf.cpp
new file mode 100644
index 0000000..d6da0c9
--- /dev/null
+++ b/lib/Support/IsInf.cpp
@@ -0,0 +1,49 @@
+//===-- IsInf.cpp - Platform-independent wrapper around C99 isinf() -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Platform-independent wrapper around C99 isinf()
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+
+#if HAVE_ISINF_IN_MATH_H
+# include <math.h>
+#elif HAVE_ISINF_IN_CMATH
+# include <cmath>
+#elif HAVE_STD_ISINF_IN_CMATH
+# include <cmath>
+using std::isinf;
+#elif HAVE_FINITE_IN_IEEEFP_H
+// A handy workaround I found at http://www.unixguide.net/sun/faq ...
+// apparently this has been a problem with Solaris for years.
+# include <ieeefp.h>
+static int isinf(double x) { return !finite(x) && x==x; }
+#elif defined(_MSC_VER)
+#include <float.h>
+#define isinf(X) (!_finite(X))
+#elif defined(_AIX) && defined(__GNUC__)
+// GCC's fixincludes seems to be removing the isinf() declaration from the
+// system header /usr/include/math.h
+# include <math.h>
+static int isinf(double x) { return !finite(x) && x==x; }
+#elif defined(__hpux)
+// HP-UX is "special"
+#include <math.h>
+static int isinf(double x) { return ((x) == INFINITY) || ((x) == -INFINITY); }
+#else
+# error "Don't know how to get isinf()"
+#endif
+
+namespace llvm {
+
+int IsInf(float f) { return isinf(f); }
+int IsInf(double d) { return isinf(d); }
+
+} // end namespace llvm;
diff --git a/lib/Support/IsNAN.cpp b/lib/Support/IsNAN.cpp
new file mode 100644
index 0000000..bdfdfbf
--- /dev/null
+++ b/lib/Support/IsNAN.cpp
@@ -0,0 +1,33 @@
+//===-- IsNAN.cpp ---------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Platform-independent wrapper around C99 isnan().
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+
+#if HAVE_ISNAN_IN_MATH_H
+# include <math.h>
+#elif HAVE_ISNAN_IN_CMATH
+# include <cmath>
+#elif HAVE_STD_ISNAN_IN_CMATH
+# include <cmath>
+using std::isnan;
+#elif defined(_MSC_VER)
+#include <float.h>
+#define isnan _isnan
+#else
+# error "Don't know how to get isnan()"
+#endif
+
+namespace llvm {
+ int IsNAN(float f) { return isnan(f); }
+ int IsNAN(double d) { return isnan(d); }
+} // end namespace llvm;
diff --git a/lib/Support/Makefile b/lib/Support/Makefile
new file mode 100644
index 0000000..48c21f4
--- /dev/null
+++ b/lib/Support/Makefile
@@ -0,0 +1,17 @@
+##===- lib/Support/Makefile --------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMSupport
+BUILD_ARCHIVE = 1
+
+## FIXME: This only requires RTTI because tblgen uses it. Fix that.
+REQUIRES_RTTI = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp
new file mode 100644
index 0000000..6de6575
--- /dev/null
+++ b/lib/Support/ManagedStatic.cpp
@@ -0,0 +1,91 @@
+//===-- ManagedStatic.cpp - Static Global wrapper -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ManagedStatic class and llvm_shutdown().
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Config/config.h"
+#include "llvm/System/Atomic.h"
+#include "llvm/System/Mutex.h"
+#include <cassert>
+using namespace llvm;
+
+static const ManagedStaticBase *StaticList = 0;
+
+static sys::Mutex* ManagedStaticMutex = 0;
+
+void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
+ void (*Deleter)(void*)) const {
+ if (ManagedStaticMutex) {
+ ManagedStaticMutex->acquire();
+
+ if (Ptr == 0) {
+ void* tmp = Creator ? Creator() : 0;
+
+ sys::MemoryFence();
+ Ptr = tmp;
+ DeleterFn = Deleter;
+
+ // Add to list of managed statics.
+ Next = StaticList;
+ StaticList = this;
+ }
+
+ ManagedStaticMutex->release();
+ } else {
+ assert(Ptr == 0 && DeleterFn == 0 && Next == 0 &&
+ "Partially initialized ManagedStatic!?");
+ Ptr = Creator ? Creator() : 0;
+ DeleterFn = Deleter;
+
+ // Add to list of managed statics.
+ Next = StaticList;
+ StaticList = this;
+ }
+}
+
+void ManagedStaticBase::destroy() const {
+ assert(DeleterFn && "ManagedStatic not initialized correctly!");
+ assert(StaticList == this &&
+ "Not destroyed in reverse order of construction?");
+ // Unlink from list.
+ StaticList = Next;
+ Next = 0;
+
+ // Destroy memory.
+ DeleterFn(Ptr);
+
+ // Cleanup.
+ Ptr = 0;
+ DeleterFn = 0;
+}
+
+bool llvm::llvm_start_multithreaded() {
+#if LLVM_MULTITHREADED
+ assert(ManagedStaticMutex == 0 && "Multithreaded LLVM already initialized!");
+ ManagedStaticMutex = new sys::Mutex(true);
+ return true;
+#else
+ return false;
+#endif
+}
+
+/// llvm_shutdown - Deallocate and destroy all ManagedStatic variables.
+void llvm::llvm_shutdown() {
+ while (StaticList)
+ StaticList->destroy();
+
+ if (ManagedStaticMutex) {
+ delete ManagedStaticMutex;
+ ManagedStaticMutex = 0;
+ }
+}
+
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
new file mode 100644
index 0000000..e35c626
--- /dev/null
+++ b/lib/Support/MemoryBuffer.cpp
@@ -0,0 +1,279 @@
+//===--- MemoryBuffer.cpp - Memory Buffer implementation ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MemoryBuffer interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/System/Path.h"
+#include "llvm/System/Process.h"
+#include "llvm/System/Program.h"
+#include <cassert>
+#include <cstdio>
+#include <cstring>
+#include <cerrno>
+#include <sys/types.h>
+#include <sys/stat.h>
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
+#include <unistd.h>
+#include <sys/uio.h>
+#else
+#include <io.h>
+#endif
+#include <fcntl.h>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MemoryBuffer implementation itself.
+//===----------------------------------------------------------------------===//
+
+MemoryBuffer::~MemoryBuffer() {
+ if (MustDeleteBuffer)
+ free((void*)BufferStart);
+}
+
+/// initCopyOf - Initialize this source buffer with a copy of the specified
+/// memory range. We make the copy so that we can null terminate it
+/// successfully.
+void MemoryBuffer::initCopyOf(const char *BufStart, const char *BufEnd) {
+ size_t Size = BufEnd-BufStart;
+ BufferStart = (char *)malloc((Size+1) * sizeof(char));
+ BufferEnd = BufferStart+Size;
+ memcpy(const_cast<char*>(BufferStart), BufStart, Size);
+ *const_cast<char*>(BufferEnd) = 0; // Null terminate buffer.
+ MustDeleteBuffer = true;
+}
+
+/// init - Initialize this MemoryBuffer as a reference to externally allocated
+/// memory, memory that we know is already null terminated.
+void MemoryBuffer::init(const char *BufStart, const char *BufEnd) {
+ assert(BufEnd[0] == 0 && "Buffer is not null terminated!");
+ BufferStart = BufStart;
+ BufferEnd = BufEnd;
+ MustDeleteBuffer = false;
+}
+
+//===----------------------------------------------------------------------===//
+// MemoryBufferMem implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class MemoryBufferMem : public MemoryBuffer {
+ std::string FileID;
+public:
+ MemoryBufferMem(const char *Start, const char *End, const char *FID,
+ bool Copy = false)
+ : FileID(FID) {
+ if (!Copy)
+ init(Start, End);
+ else
+ initCopyOf(Start, End);
+ }
+
+ virtual const char *getBufferIdentifier() const {
+ return FileID.c_str();
+ }
+};
+}
+
+/// getMemBuffer - Open the specified memory range as a MemoryBuffer. Note
+/// that EndPtr[0] must be a null byte and be accessible!
+MemoryBuffer *MemoryBuffer::getMemBuffer(const char *StartPtr,
+ const char *EndPtr,
+ const char *BufferName) {
+ return new MemoryBufferMem(StartPtr, EndPtr, BufferName);
+}
+
+/// getMemBufferCopy - Open the specified memory range as a MemoryBuffer,
+/// copying the contents and taking ownership of it. This has no requirements
+/// on EndPtr[0].
+MemoryBuffer *MemoryBuffer::getMemBufferCopy(const char *StartPtr,
+ const char *EndPtr,
+ const char *BufferName) {
+ return new MemoryBufferMem(StartPtr, EndPtr, BufferName, true);
+}
+
+/// getNewUninitMemBuffer - Allocate a new MemoryBuffer of the specified size
+/// that is completely initialized to zeros. Note that the caller should
+/// initialize the memory allocated by this method. The memory is owned by
+/// the MemoryBuffer object.
+MemoryBuffer *MemoryBuffer::getNewUninitMemBuffer(size_t Size,
+ const char *BufferName) {
+ char *Buf = (char *)malloc((Size+1) * sizeof(char));
+ if (!Buf) return 0;
+ Buf[Size] = 0;
+ MemoryBufferMem *SB = new MemoryBufferMem(Buf, Buf+Size, BufferName);
+ // The memory for this buffer is owned by the MemoryBuffer.
+ SB->MustDeleteBuffer = true;
+ return SB;
+}
+
+/// getNewMemBuffer - Allocate a new MemoryBuffer of the specified size that
+/// is completely initialized to zeros. Note that the caller should
+/// initialize the memory allocated by this method. The memory is owned by
+/// the MemoryBuffer object.
+MemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size,
+ const char *BufferName) {
+ MemoryBuffer *SB = getNewUninitMemBuffer(Size, BufferName);
+ if (!SB) return 0;
+ memset(const_cast<char*>(SB->getBufferStart()), 0, Size+1);
+ return SB;
+}
+
+
+/// getFileOrSTDIN - Open the specified file as a MemoryBuffer, or open stdin
+/// if the Filename is "-". If an error occurs, this returns null and fills
+/// in *ErrStr with a reason. If stdin is empty, this API (unlike getSTDIN)
+/// returns an empty buffer.
+MemoryBuffer *MemoryBuffer::getFileOrSTDIN(const char *Filename,
+ std::string *ErrStr,
+ int64_t FileSize) {
+ if (Filename[0] != '-' || Filename[1] != 0)
+ return getFile(Filename, ErrStr, FileSize);
+ MemoryBuffer *M = getSTDIN();
+ if (M) return M;
+
+ // If stdin was empty, M is null. Cons up an empty memory buffer now.
+ const char *EmptyStr = "";
+ return MemoryBuffer::getMemBuffer(EmptyStr, EmptyStr, "<stdin>");
+}
+
+//===----------------------------------------------------------------------===//
+// MemoryBuffer::getFile implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// MemoryBufferMMapFile - This represents a file that was mapped in with the
+/// sys::Path::MapInFilePages method. When destroyed, it calls the
+/// sys::Path::UnMapFilePages method.
+class MemoryBufferMMapFile : public MemoryBuffer {
+ std::string Filename;
+public:
+ MemoryBufferMMapFile(const char *filename, const char *Pages, uint64_t Size)
+ : Filename(filename) {
+ init(Pages, Pages+Size);
+ }
+
+ virtual const char *getBufferIdentifier() const {
+ return Filename.c_str();
+ }
+
+ ~MemoryBufferMMapFile() {
+ sys::Path::UnMapFilePages(getBufferStart(), getBufferSize());
+ }
+};
+}
+
+MemoryBuffer *MemoryBuffer::getFile(const char *Filename, std::string *ErrStr,
+ int64_t FileSize) {
+ int OpenFlags = 0;
+#ifdef O_BINARY
+ OpenFlags |= O_BINARY; // Open input file in binary mode on win32.
+#endif
+ int FD = ::open(Filename, O_RDONLY|OpenFlags);
+ if (FD == -1) {
+ if (ErrStr) *ErrStr = "could not open file";
+ return 0;
+ }
+
+ // If we don't know the file size, use fstat to find out. fstat on an open
+ // file descriptor is cheaper than stat on a random path.
+ if (FileSize == -1) {
+ struct stat FileInfo;
+ // TODO: This should use fstat64 when available.
+ if (fstat(FD, &FileInfo) == -1) {
+ if (ErrStr) *ErrStr = "could not get file length";
+ ::close(FD);
+ return 0;
+ }
+ FileSize = FileInfo.st_size;
+ }
+
+
+ // If the file is large, try to use mmap to read it in. We don't use mmap
+ // for small files, because this can severely fragment our address space. Also
+ // don't try to map files that are exactly a multiple of the system page size,
+ // as the file would not have the required null terminator.
+ if (FileSize >= 4096*4 &&
+ (FileSize & (sys::Process::GetPageSize()-1)) != 0) {
+ if (const char *Pages = sys::Path::MapInFilePages(FD, FileSize)) {
+ // Close the file descriptor, now that the whole file is in memory.
+ ::close(FD);
+ return new MemoryBufferMMapFile(Filename, Pages, FileSize);
+ }
+ }
+
+ MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(FileSize, Filename);
+ if (!Buf) {
+ // Failed to create a buffer.
+ if (ErrStr) *ErrStr = "could not allocate buffer";
+ ::close(FD);
+ return 0;
+ }
+
+ OwningPtr<MemoryBuffer> SB(Buf);
+ char *BufPtr = const_cast<char*>(SB->getBufferStart());
+
+ size_t BytesLeft = FileSize;
+ while (BytesLeft) {
+ ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
+ if (NumRead != -1) {
+ BytesLeft -= NumRead;
+ BufPtr += NumRead;
+ } else if (errno == EINTR) {
+ // try again
+ } else {
+ // error reading.
+ close(FD);
+ if (ErrStr) *ErrStr = "error reading file data";
+ return 0;
+ }
+ }
+ close(FD);
+
+ return SB.take();
+}
+
+//===----------------------------------------------------------------------===//
+// MemoryBuffer::getSTDIN implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class STDINBufferFile : public MemoryBuffer {
+public:
+ virtual const char *getBufferIdentifier() const {
+ return "<stdin>";
+ }
+};
+}
+
+MemoryBuffer *MemoryBuffer::getSTDIN() {
+ char Buffer[4096*4];
+
+ std::vector<char> FileData;
+
+ // Read in all of the data from stdin, we cannot mmap stdin.
+ sys::Program::ChangeStdinToBinary();
+ size_t ReadBytes;
+ do {
+ ReadBytes = fread(Buffer, sizeof(char), sizeof(Buffer), stdin);
+ FileData.insert(FileData.end(), Buffer, Buffer+ReadBytes);
+ } while (ReadBytes == sizeof(Buffer));
+
+ FileData.push_back(0); // &FileData[Size] is invalid. So is &*FileData.end().
+ size_t Size = FileData.size();
+ if (Size <= 1)
+ return 0;
+ MemoryBuffer *B = new STDINBufferFile();
+ B->initCopyOf(&FileData[0], &FileData[Size-1]);
+ return B;
+}
diff --git a/lib/Support/PluginLoader.cpp b/lib/Support/PluginLoader.cpp
new file mode 100644
index 0000000..5acf1d1
--- /dev/null
+++ b/lib/Support/PluginLoader.cpp
@@ -0,0 +1,43 @@
+//===-- PluginLoader.cpp - Implement -load command line option ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the -load <plugin> command line option handler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DONT_GET_PLUGIN_LOADER_OPTION
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PluginLoader.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/System/DynamicLibrary.h"
+#include <ostream>
+#include <vector>
+using namespace llvm;
+
+static ManagedStatic<std::vector<std::string> > Plugins;
+
+void PluginLoader::operator=(const std::string &Filename) {
+ std::string Error;
+ if (sys::DynamicLibrary::LoadLibraryPermanently(Filename.c_str(), &Error)) {
+ cerr << "Error opening '" << Filename << "': " << Error
+ << "\n -load request ignored.\n";
+ } else {
+ Plugins->push_back(Filename);
+ }
+}
+
+unsigned PluginLoader::getNumPlugins() {
+ return Plugins.isConstructed() ? Plugins->size() : 0;
+}
+
+std::string &PluginLoader::getPlugin(unsigned num) {
+ assert(Plugins.isConstructed() && num < Plugins->size() &&
+ "Asking for an out of bounds plugin");
+ return (*Plugins)[num];
+}
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
new file mode 100644
index 0000000..c111c5e
--- /dev/null
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -0,0 +1,108 @@
+//===- PrettyStackTrace.cpp - Pretty Crash Handling -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for dealing with the possibility of
+// Unix signals occuring while your program is running.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Signals.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+// FIXME: This should be thread local when llvm supports threads.
+static const PrettyStackTraceEntry *PrettyStackTraceHead = 0;
+
+static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){
+ unsigned NextID = 0;
+ if (Entry->getNextEntry())
+ NextID = PrintStack(Entry->getNextEntry(), OS);
+ OS << NextID << ".\t";
+ Entry->print(OS);
+
+ return NextID+1;
+}
+
+/// PrintCurStackTrace - Print the current stack trace to the specified stream.
+static void PrintCurStackTrace(raw_ostream &OS) {
+ // Don't print an empty trace.
+ if (PrettyStackTraceHead == 0) return;
+
+ // If there are pretty stack frames registered, walk and emit them.
+ OS << "Stack dump:\n";
+
+ PrintStack(PrettyStackTraceHead, OS);
+ OS.flush();
+}
+
+// Integrate with crash reporter.
+#ifdef __APPLE__
+extern "C" const char *__crashreporter_info__;
+const char *__crashreporter_info__ = 0;
+#endif
+
+
+/// CrashHandler - This callback is run if a fatal signal is delivered to the
+/// process, it prints the pretty stack trace.
+static void CrashHandler(void *Cookie) {
+#ifndef __APPLE__
+ // On non-apple systems, just emit the crash stack trace to stderr.
+ PrintCurStackTrace(errs());
+#else
+ // Otherwise, emit to a smallvector of chars, send *that* to stderr, but also
+ // put it into __crashreporter_info__.
+ SmallString<2048> TmpStr;
+ {
+ raw_svector_ostream Stream(TmpStr);
+ PrintCurStackTrace(Stream);
+ }
+
+ if (!TmpStr.empty()) {
+ __crashreporter_info__ = strdup(TmpStr.c_str());
+ errs() << __crashreporter_info__;
+ }
+
+#endif
+}
+
+static bool RegisterCrashPrinter() {
+ sys::AddSignalHandler(CrashHandler, 0);
+ return false;
+}
+
+PrettyStackTraceEntry::PrettyStackTraceEntry() {
+ // The first time this is called, we register the crash printer.
+ static bool HandlerRegistered = RegisterCrashPrinter();
+ HandlerRegistered = HandlerRegistered;
+
+ // Link ourselves.
+ NextEntry = PrettyStackTraceHead;
+ PrettyStackTraceHead = this;
+}
+
+PrettyStackTraceEntry::~PrettyStackTraceEntry() {
+ assert(PrettyStackTraceHead == this &&
+ "Pretty stack trace entry destruction is out of order");
+ PrettyStackTraceHead = getNextEntry();
+}
+
+void PrettyStackTraceString::print(raw_ostream &OS) const {
+ OS << Str << "\n";
+}
+
+void PrettyStackTraceProgram::print(raw_ostream &OS) const {
+ OS << "Program arguments: ";
+ // Print the argument list.
+ for (unsigned i = 0, e = ArgC; i != e; ++i)
+ OS << ArgV[i] << ' ';
+ OS << '\n';
+}
+
diff --git a/lib/Support/SlowOperationInformer.cpp b/lib/Support/SlowOperationInformer.cpp
new file mode 100644
index 0000000..d5ffff9
--- /dev/null
+++ b/lib/Support/SlowOperationInformer.cpp
@@ -0,0 +1,66 @@
+//===-- SlowOperationInformer.cpp - Keep the user informed ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SlowOperationInformer class for the LLVM debugger.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/SlowOperationInformer.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/System/Alarm.h"
+#include <sstream>
+#include <cassert>
+using namespace llvm;
+
+SlowOperationInformer::SlowOperationInformer(const std::string &Name)
+ : OperationName(Name), LastPrintAmount(0) {
+ sys::SetupAlarm(1);
+}
+
+SlowOperationInformer::~SlowOperationInformer() {
+ sys::TerminateAlarm();
+ if (LastPrintAmount) {
+ // If we have printed something, make _sure_ we print the 100% amount, and
+ // also print a newline.
+ cout << std::string(LastPrintAmount, '\b') << "Progress "
+ << OperationName << ": 100% \n";
+ }
+}
+
+/// progress - Clients should periodically call this method when they are in
+/// an exception-safe state. The Amount variable should indicate how far
+/// along the operation is, given in 1/10ths of a percent (in other words,
+/// Amount should range from 0 to 1000).
+bool SlowOperationInformer::progress(unsigned Amount) {
+ int status = sys::AlarmStatus();
+ if (status == -1) {
+ cout << "\n";
+ LastPrintAmount = 0;
+ return true;
+ }
+
+ // If we haven't spent enough time in this operation to warrant displaying the
+ // progress bar, don't do so yet.
+ if (status == 0)
+ return false;
+
+ // Delete whatever we printed last time.
+ std::string ToPrint = std::string(LastPrintAmount, '\b');
+
+ std::ostringstream OS;
+ OS << "Progress " << OperationName << ": " << Amount/10;
+ if (unsigned Rem = Amount % 10)
+ OS << "." << Rem << "%";
+ else
+ OS << "% ";
+
+ LastPrintAmount = OS.str().size();
+ cout << ToPrint+OS.str() << std::flush;
+ return false;
+}
diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp
new file mode 100644
index 0000000..68938fa
--- /dev/null
+++ b/lib/Support/SmallPtrSet.cpp
@@ -0,0 +1,223 @@
+//===- llvm/ADT/SmallPtrSet.cpp - 'Normally small' pointer set ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SmallPtrSet class. See SmallPtrSet.h for an
+// overview of the algorithm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/MathExtras.h"
+#include <cstdlib>
+
+using namespace llvm;
+
+void SmallPtrSetImpl::shrink_and_clear() {
+ assert(!isSmall() && "Can't shrink a small set!");
+ free(CurArray);
+
+ // Reduce the number of buckets.
+ CurArraySize = NumElements > 16 ? 1 << (Log2_32_Ceil(NumElements) + 1) : 32;
+ NumElements = NumTombstones = 0;
+
+ // Install the new array. Clear all the buckets to empty.
+ CurArray = (const void**)malloc(sizeof(void*) * (CurArraySize+1));
+ assert(CurArray && "Failed to allocate memory?");
+ memset(CurArray, -1, CurArraySize*sizeof(void*));
+
+ // The end pointer, always valid, is set to a valid element to help the
+ // iterator.
+ CurArray[CurArraySize] = 0;
+}
+
+bool SmallPtrSetImpl::insert_imp(const void * Ptr) {
+ if (isSmall()) {
+ // Check to see if it is already in the set.
+ for (const void **APtr = SmallArray, **E = SmallArray+NumElements;
+ APtr != E; ++APtr)
+ if (*APtr == Ptr)
+ return false;
+
+ // Nope, there isn't. If we stay small, just 'pushback' now.
+ if (NumElements < CurArraySize-1) {
+ SmallArray[NumElements++] = Ptr;
+ return true;
+ }
+ // Otherwise, hit the big set case, which will call grow.
+ }
+
+ // If more than 3/4 of the array is full, grow.
+ if (NumElements*4 >= CurArraySize*3 ||
+ CurArraySize-(NumElements+NumTombstones) < CurArraySize/8)
+ Grow();
+
+ // Okay, we know we have space. Find a hash bucket.
+ const void **Bucket = const_cast<const void**>(FindBucketFor(Ptr));
+ if (*Bucket == Ptr) return false; // Already inserted, good.
+
+ // Otherwise, insert it!
+ if (*Bucket == getTombstoneMarker())
+ --NumTombstones;
+ *Bucket = Ptr;
+ ++NumElements; // Track density.
+ return true;
+}
+
+bool SmallPtrSetImpl::erase_imp(const void * Ptr) {
+ if (isSmall()) {
+ // Check to see if it is in the set.
+ for (const void **APtr = SmallArray, **E = SmallArray+NumElements;
+ APtr != E; ++APtr)
+ if (*APtr == Ptr) {
+ // If it is in the set, replace this element.
+ *APtr = E[-1];
+ E[-1] = getEmptyMarker();
+ --NumElements;
+ return true;
+ }
+
+ return false;
+ }
+
+ // Okay, we know we have space. Find a hash bucket.
+ void **Bucket = const_cast<void**>(FindBucketFor(Ptr));
+ if (*Bucket != Ptr) return false; // Not in the set?
+
+ // Set this as a tombstone.
+ *Bucket = getTombstoneMarker();
+ --NumElements;
+ ++NumTombstones;
+ return true;
+}
+
+const void * const *SmallPtrSetImpl::FindBucketFor(const void *Ptr) const {
+ unsigned Bucket = Hash(Ptr);
+ unsigned ArraySize = CurArraySize;
+ unsigned ProbeAmt = 1;
+ const void *const *Array = CurArray;
+ const void *const *Tombstone = 0;
+ while (1) {
+ // Found Ptr's bucket?
+ if (Array[Bucket] == Ptr)
+ return Array+Bucket;
+
+ // If we found an empty bucket, the pointer doesn't exist in the set.
+ // Return a tombstone if we've seen one so far, or the empty bucket if
+ // not.
+ if (Array[Bucket] == getEmptyMarker())
+ return Tombstone ? Tombstone : Array+Bucket;
+
+ // If this is a tombstone, remember it. If Ptr ends up not in the set, we
+ // prefer to return it than something that would require more probing.
+ if (Array[Bucket] == getTombstoneMarker() && !Tombstone)
+ Tombstone = Array+Bucket; // Remember the first tombstone found.
+
+ // It's a hash collision or a tombstone. Reprobe.
+ Bucket = (Bucket + ProbeAmt++) & (ArraySize-1);
+ }
+}
+
+/// Grow - Allocate a larger backing store for the buckets and move it over.
+///
+void SmallPtrSetImpl::Grow() {
+ // Allocate at twice as many buckets, but at least 128.
+ unsigned OldSize = CurArraySize;
+ unsigned NewSize = OldSize < 64 ? 128 : OldSize*2;
+
+ const void **OldBuckets = CurArray;
+ bool WasSmall = isSmall();
+
+ // Install the new array. Clear all the buckets to empty.
+ CurArray = (const void**)malloc(sizeof(void*) * (NewSize+1));
+ assert(CurArray && "Failed to allocate memory?");
+ CurArraySize = NewSize;
+ memset(CurArray, -1, NewSize*sizeof(void*));
+
+ // The end pointer, always valid, is set to a valid element to help the
+ // iterator.
+ CurArray[NewSize] = 0;
+
+ // Copy over all the elements.
+ if (WasSmall) {
+ // Small sets store their elements in order.
+ for (const void **BucketPtr = OldBuckets, **E = OldBuckets+NumElements;
+ BucketPtr != E; ++BucketPtr) {
+ const void *Elt = *BucketPtr;
+ *const_cast<void**>(FindBucketFor(Elt)) = const_cast<void*>(Elt);
+ }
+ } else {
+ // Copy over all valid entries.
+ for (const void **BucketPtr = OldBuckets, **E = OldBuckets+OldSize;
+ BucketPtr != E; ++BucketPtr) {
+ // Copy over the element if it is valid.
+ const void *Elt = *BucketPtr;
+ if (Elt != getTombstoneMarker() && Elt != getEmptyMarker())
+ *const_cast<void**>(FindBucketFor(Elt)) = const_cast<void*>(Elt);
+ }
+
+ free(OldBuckets);
+ NumTombstones = 0;
+ }
+}
+
+SmallPtrSetImpl::SmallPtrSetImpl(const SmallPtrSetImpl& that) {
+ // If we're becoming small, prepare to insert into our stack space
+ if (that.isSmall()) {
+ CurArray = &SmallArray[0];
+ // Otherwise, allocate new heap space (unless we were the same size)
+ } else {
+ CurArray = (const void**)malloc(sizeof(void*) * (that.CurArraySize+1));
+ assert(CurArray && "Failed to allocate memory?");
+ }
+
+ // Copy over the new array size
+ CurArraySize = that.CurArraySize;
+
+ // Copy over the contents from the other set
+ memcpy(CurArray, that.CurArray, sizeof(void*)*(CurArraySize+1));
+
+ NumElements = that.NumElements;
+ NumTombstones = that.NumTombstones;
+}
+
+/// CopyFrom - implement operator= from a smallptrset that has the same pointer
+/// type, but may have a different small size.
+void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) {
+ if (isSmall() && RHS.isSmall())
+ assert(CurArraySize == RHS.CurArraySize &&
+ "Cannot assign sets with different small sizes");
+
+ // If we're becoming small, prepare to insert into our stack space
+ if (RHS.isSmall()) {
+ if (!isSmall())
+ free(CurArray);
+ CurArray = &SmallArray[0];
+ // Otherwise, allocate new heap space (unless we were the same size)
+ } else if (CurArraySize != RHS.CurArraySize) {
+ if (isSmall())
+ CurArray = (const void**)malloc(sizeof(void*) * (RHS.CurArraySize+1));
+ else
+ CurArray = (const void**)realloc(CurArray, sizeof(void*)*(RHS.CurArraySize+1));
+ assert(CurArray && "Failed to allocate memory?");
+ }
+
+ // Copy over the new array size
+ CurArraySize = RHS.CurArraySize;
+
+ // Copy over the contents from the other set
+ memcpy(CurArray, RHS.CurArray, sizeof(void*)*(CurArraySize+1));
+
+ NumElements = RHS.NumElements;
+ NumTombstones = RHS.NumTombstones;
+}
+
+SmallPtrSetImpl::~SmallPtrSetImpl() {
+ if (!isSmall())
+ free(CurArray);
+}
diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp
new file mode 100644
index 0000000..13acc1b
--- /dev/null
+++ b/lib/Support/Statistic.cpp
@@ -0,0 +1,126 @@
+//===-- Statistic.cpp - Easy way to expose stats information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the 'Statistic' class, which is designed to be an easy
+// way to expose various success metrics from passes. These statistics are
+// printed at the end of a run, when the -stats command line option is enabled
+// on the command line.
+//
+// This is useful for reporting information like the number of instructions
+// simplified, optimized or removed by various transformations, like this:
+//
+// static Statistic NumInstEliminated("GCSE", "Number of instructions killed");
+//
+// Later, in the code: ++NumInstEliminated;
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <ostream>
+#include <cstring>
+using namespace llvm;
+
+// GetLibSupportInfoOutputFile - Return a file stream to print our output on.
+namespace llvm { extern std::ostream *GetLibSupportInfoOutputFile(); }
+
+/// -stats - Command line option to cause transformations to emit stats about
+/// what they did.
+///
+static cl::opt<bool>
+Enabled("stats", cl::desc("Enable statistics output from program"));
+
+
+namespace {
+/// StatisticInfo - This class is used in a ManagedStatic so that it is created
+/// on demand (when the first statistic is bumped) and destroyed only when
+/// llvm_shutdown is called. We print statistics from the destructor.
+class StatisticInfo {
+ std::vector<const Statistic*> Stats;
+public:
+ ~StatisticInfo();
+
+ void addStatistic(const Statistic *S) {
+ Stats.push_back(S);
+ }
+};
+}
+
+static ManagedStatic<StatisticInfo> StatInfo;
+
+
+/// RegisterStatistic - The first time a statistic is bumped, this method is
+/// called.
+void Statistic::RegisterStatistic() {
+ // If stats are enabled, inform StatInfo that this statistic should be
+ // printed.
+ if (Enabled)
+ StatInfo->addStatistic(this);
+ // Remember we have been registered.
+ Initialized = true;
+}
+
+namespace {
+
+struct NameCompare {
+ bool operator()(const Statistic *LHS, const Statistic *RHS) const {
+ int Cmp = std::strcmp(LHS->getName(), RHS->getName());
+ if (Cmp != 0) return Cmp < 0;
+
+ // Secondary key is the description.
+ return std::strcmp(LHS->getDesc(), RHS->getDesc()) < 0;
+ }
+};
+
+}
+
+// Print information when destroyed, iff command line option is specified.
+StatisticInfo::~StatisticInfo() {
+ // Statistics not enabled?
+ if (Stats.empty()) return;
+
+ // Get the stream to write to.
+ std::ostream &OutStream = *GetLibSupportInfoOutputFile();
+
+ // Figure out how long the biggest Value and Name fields are.
+ unsigned MaxNameLen = 0, MaxValLen = 0;
+ for (size_t i = 0, e = Stats.size(); i != e; ++i) {
+ MaxValLen = std::max(MaxValLen,
+ (unsigned)utostr(Stats[i]->getValue()).size());
+ MaxNameLen = std::max(MaxNameLen,
+ (unsigned)std::strlen(Stats[i]->getName()));
+ }
+
+ // Sort the fields by name.
+ std::stable_sort(Stats.begin(), Stats.end(), NameCompare());
+
+ // Print out the statistics header...
+ OutStream << "===" << std::string(73, '-') << "===\n"
+ << " ... Statistics Collected ...\n"
+ << "===" << std::string(73, '-') << "===\n\n";
+
+ // Print all of the statistics.
+ for (size_t i = 0, e = Stats.size(); i != e; ++i) {
+ std::string CountStr = utostr(Stats[i]->getValue());
+ OutStream << std::string(MaxValLen-CountStr.size(), ' ')
+ << CountStr << " " << Stats[i]->getName()
+ << std::string(MaxNameLen-std::strlen(Stats[i]->getName()), ' ')
+ << " - " << Stats[i]->getDesc() << "\n";
+
+ }
+
+ OutStream << std::endl; // Flush the output stream...
+
+ if (&OutStream != cerr.stream() && &OutStream != cout.stream())
+ delete &OutStream; // Close the file.
+}
diff --git a/lib/Support/Streams.cpp b/lib/Support/Streams.cpp
new file mode 100644
index 0000000..cf6cfeb
--- /dev/null
+++ b/lib/Support/Streams.cpp
@@ -0,0 +1,30 @@
+//===-- Streams.cpp - Wrappers for iostreams ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a wrapper for the std::cout and std::cerr I/O streams.
+// It prevents the need to include <iostream> to each file just to get I/O.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Streams.h"
+#include <iostream>
+using namespace llvm;
+
+OStream llvm::cout(std::cout);
+OStream llvm::cerr(std::cerr);
+IStream llvm::cin(std::cin);
+
+namespace llvm {
+
+/// FlushStream - Function called by BaseStream to flush an ostream.
+void FlushStream(std::ostream &S) {
+ S << std::flush;
+}
+
+} // end anonymous namespace
diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp
new file mode 100644
index 0000000..1618086
--- /dev/null
+++ b/lib/Support/StringExtras.cpp
@@ -0,0 +1,114 @@
+//===-- StringExtras.cpp - Implement the StringExtras header --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the StringExtras.h header
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringExtras.h"
+#include <cstring>
+using namespace llvm;
+
+/// getToken - This function extracts one token from source, ignoring any
+/// leading characters that appear in the Delimiters string, and ending the
+/// token at any of the characters that appear in the Delimiters string. If
+/// there are no tokens in the source string, an empty string is returned.
+/// The Source source string is updated in place to remove the returned string
+/// and any delimiter prefix from it.
+std::string llvm::getToken(std::string &Source, const char *Delimiters) {
+ size_t NumDelimiters = std::strlen(Delimiters);
+
+ // Figure out where the token starts.
+ std::string::size_type Start =
+ Source.find_first_not_of(Delimiters, 0, NumDelimiters);
+ if (Start == std::string::npos) Start = Source.size();
+
+ // Find the next occurance of the delimiter.
+ std::string::size_type End =
+ Source.find_first_of(Delimiters, Start, NumDelimiters);
+ if (End == std::string::npos) End = Source.size();
+
+ // Create the return token.
+ std::string Result = std::string(Source.begin()+Start, Source.begin()+End);
+
+ // Erase the token that we read in.
+ Source.erase(Source.begin(), Source.begin()+End);
+
+ return Result;
+}
+
+/// SplitString - Split up the specified string according to the specified
+/// delimiters, appending the result fragments to the output list.
+void llvm::SplitString(const std::string &Source,
+ std::vector<std::string> &OutFragments,
+ const char *Delimiters) {
+ std::string S = Source;
+
+ std::string S2 = getToken(S, Delimiters);
+ while (!S2.empty()) {
+ OutFragments.push_back(S2);
+ S2 = getToken(S, Delimiters);
+ }
+}
+
+
+
+/// UnescapeString - Modify the argument string, turning two character sequences
+/// @verbatim
+/// like '\\' 'n' into '\n'. This handles: \e \a \b \f \n \r \t \v \' \ and
+/// \num (where num is a 1-3 byte octal value).
+/// @endverbatim
+void llvm::UnescapeString(std::string &Str) {
+ for (unsigned i = 0; i != Str.size(); ++i) {
+ if (Str[i] == '\\' && i != Str.size()-1) {
+ switch (Str[i+1]) {
+ default: continue; // Don't execute the code after the switch.
+ case 'a': Str[i] = '\a'; break;
+ case 'b': Str[i] = '\b'; break;
+ case 'e': Str[i] = 27; break;
+ case 'f': Str[i] = '\f'; break;
+ case 'n': Str[i] = '\n'; break;
+ case 'r': Str[i] = '\r'; break;
+ case 't': Str[i] = '\t'; break;
+ case 'v': Str[i] = '\v'; break;
+ case '"': Str[i] = '\"'; break;
+ case '\'': Str[i] = '\''; break;
+ case '\\': Str[i] = '\\'; break;
+ }
+ // Nuke the second character.
+ Str.erase(Str.begin()+i+1);
+ }
+ }
+}
+
+/// EscapeString - Modify the argument string, turning '\\' and anything that
+/// doesn't satisfy std::isprint into an escape sequence.
+void llvm::EscapeString(std::string &Str) {
+ for (unsigned i = 0; i != Str.size(); ++i) {
+ if (Str[i] == '\\') {
+ ++i;
+ Str.insert(Str.begin()+i, '\\');
+ } else if (Str[i] == '\t') {
+ Str[i++] = '\\';
+ Str.insert(Str.begin()+i, 't');
+ } else if (Str[i] == '"') {
+ Str.insert(Str.begin()+i++, '\\');
+ } else if (Str[i] == '\n') {
+ Str[i++] = '\\';
+ Str.insert(Str.begin()+i, 'n');
+ } else if (!std::isprint(Str[i])) {
+ // Always expand to a 3-digit octal escape.
+ unsigned Char = Str[i];
+ Str[i++] = '\\';
+ Str.insert(Str.begin()+i++, '0'+((Char/64) & 7));
+ Str.insert(Str.begin()+i++, '0'+((Char/8) & 7));
+ Str.insert(Str.begin()+i , '0'+( Char & 7));
+ }
+ }
+}
diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp
new file mode 100644
index 0000000..0c61732
--- /dev/null
+++ b/lib/Support/StringMap.cpp
@@ -0,0 +1,234 @@
+//===--- StringMap.cpp - String Hash table map implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the StringMap class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringMap.h"
+#include <cassert>
+using namespace llvm;
+
+StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
+ ItemSize = itemSize;
+
+ // If a size is specified, initialize the table with that many buckets.
+ if (InitSize) {
+ init(InitSize);
+ return;
+ }
+
+ // Otherwise, initialize it with zero buckets to avoid the allocation.
+ TheTable = 0;
+ NumBuckets = 0;
+ NumItems = 0;
+ NumTombstones = 0;
+}
+
+void StringMapImpl::init(unsigned InitSize) {
+ assert((InitSize & (InitSize-1)) == 0 &&
+ "Init Size must be a power of 2 or zero!");
+ NumBuckets = InitSize ? InitSize : 16;
+ NumItems = 0;
+ NumTombstones = 0;
+
+ TheTable = (ItemBucket*)calloc(NumBuckets+1, sizeof(ItemBucket));
+
+ // Allocate one extra bucket, set it to look filled so the iterators stop at
+ // end.
+ TheTable[NumBuckets].Item = (StringMapEntryBase*)2;
+}
+
+
+/// HashString - Compute a hash code for the specified string.
+///
+static unsigned HashString(const char *Start, const char *End) {
+ // Bernstein hash function.
+ unsigned int Result = 0;
+ // TODO: investigate whether a modified bernstein hash function performs
+ // better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
+ // X*33+c -> X*33^c
+ while (Start != End)
+ Result = Result * 33 + *Start++;
+ Result = Result + (Result >> 5);
+ return Result;
+}
+
+/// LookupBucketFor - Look up the bucket that the specified string should end
+/// up in. If it already exists as a key in the map, the Item pointer for the
+/// specified bucket will be non-null. Otherwise, it will be null. In either
+/// case, the FullHashValue field of the bucket will be set to the hash value
+/// of the string.
+unsigned StringMapImpl::LookupBucketFor(const char *NameStart,
+ const char *NameEnd) {
+ unsigned HTSize = NumBuckets;
+ if (HTSize == 0) { // Hash table unallocated so far?
+ init(16);
+ HTSize = NumBuckets;
+ }
+ unsigned FullHashValue = HashString(NameStart, NameEnd);
+ unsigned BucketNo = FullHashValue & (HTSize-1);
+
+ unsigned ProbeAmt = 1;
+ int FirstTombstone = -1;
+ while (1) {
+ ItemBucket &Bucket = TheTable[BucketNo];
+ StringMapEntryBase *BucketItem = Bucket.Item;
+ // If we found an empty bucket, this key isn't in the table yet, return it.
+ if (BucketItem == 0) {
+ // If we found a tombstone, we want to reuse the tombstone instead of an
+ // empty bucket. This reduces probing.
+ if (FirstTombstone != -1) {
+ TheTable[FirstTombstone].FullHashValue = FullHashValue;
+ return FirstTombstone;
+ }
+
+ Bucket.FullHashValue = FullHashValue;
+ return BucketNo;
+ }
+
+ if (BucketItem == getTombstoneVal()) {
+ // Skip over tombstones. However, remember the first one we see.
+ if (FirstTombstone == -1) FirstTombstone = BucketNo;
+ } else if (Bucket.FullHashValue == FullHashValue) {
+ // If the full hash value matches, check deeply for a match. The common
+ // case here is that we are only looking at the buckets (for item info
+ // being non-null and for the full hash value) not at the items. This
+ // is important for cache locality.
+
+ // Do the comparison like this because NameStart isn't necessarily
+ // null-terminated!
+ char *ItemStr = (char*)BucketItem+ItemSize;
+ unsigned ItemStrLen = BucketItem->getKeyLength();
+ if (unsigned(NameEnd-NameStart) == ItemStrLen &&
+ memcmp(ItemStr, NameStart, ItemStrLen) == 0) {
+ // We found a match!
+ return BucketNo;
+ }
+ }
+
+ // Okay, we didn't find the item. Probe to the next bucket.
+ BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
+
+ // Use quadratic probing, it has fewer clumping artifacts than linear
+ // probing and has good cache behavior in the common case.
+ ++ProbeAmt;
+ }
+}
+
+
+/// FindKey - Look up the bucket that contains the specified key. If it exists
+/// in the map, return the bucket number of the key. Otherwise return -1.
+/// This does not modify the map.
+int StringMapImpl::FindKey(const char *KeyStart, const char *KeyEnd) const {
+ unsigned HTSize = NumBuckets;
+ if (HTSize == 0) return -1; // Really empty table?
+ unsigned FullHashValue = HashString(KeyStart, KeyEnd);
+ unsigned BucketNo = FullHashValue & (HTSize-1);
+
+ unsigned ProbeAmt = 1;
+ while (1) {
+ ItemBucket &Bucket = TheTable[BucketNo];
+ StringMapEntryBase *BucketItem = Bucket.Item;
+ // If we found an empty bucket, this key isn't in the table yet, return.
+ if (BucketItem == 0)
+ return -1;
+
+ if (BucketItem == getTombstoneVal()) {
+ // Ignore tombstones.
+ } else if (Bucket.FullHashValue == FullHashValue) {
+ // If the full hash value matches, check deeply for a match. The common
+ // case here is that we are only looking at the buckets (for item info
+ // being non-null and for the full hash value) not at the items. This
+ // is important for cache locality.
+
+ // Do the comparison like this because NameStart isn't necessarily
+ // null-terminated!
+ char *ItemStr = (char*)BucketItem+ItemSize;
+ unsigned ItemStrLen = BucketItem->getKeyLength();
+ if (unsigned(KeyEnd-KeyStart) == ItemStrLen &&
+ memcmp(ItemStr, KeyStart, ItemStrLen) == 0) {
+ // We found a match!
+ return BucketNo;
+ }
+ }
+
+ // Okay, we didn't find the item. Probe to the next bucket.
+ BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
+
+ // Use quadratic probing, it has fewer clumping artifacts than linear
+ // probing and has good cache behavior in the common case.
+ ++ProbeAmt;
+ }
+}
+
+/// RemoveKey - Remove the specified StringMapEntry from the table, but do not
+/// delete it. This aborts if the value isn't in the table.
+void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
+ const char *VStr = (char*)V + ItemSize;
+ StringMapEntryBase *V2 = RemoveKey(VStr, VStr+V->getKeyLength());
+ V2 = V2;
+ assert(V == V2 && "Didn't find key?");
+}
+
+/// RemoveKey - Remove the StringMapEntry for the specified key from the
+/// table, returning it. If the key is not in the table, this returns null.
+StringMapEntryBase *StringMapImpl::RemoveKey(const char *KeyStart,
+ const char *KeyEnd) {
+ int Bucket = FindKey(KeyStart, KeyEnd);
+ if (Bucket == -1) return 0;
+
+ StringMapEntryBase *Result = TheTable[Bucket].Item;
+ TheTable[Bucket].Item = getTombstoneVal();
+ --NumItems;
+ ++NumTombstones;
+ return Result;
+}
+
+
+
+/// RehashTable - Grow the table, redistributing values into the buckets with
+/// the appropriate mod-of-hashtable-size.
+void StringMapImpl::RehashTable() {
+ unsigned NewSize = NumBuckets*2;
+ // Allocate one extra bucket which will always be non-empty. This allows the
+ // iterators to stop at end.
+ ItemBucket *NewTableArray =(ItemBucket*)calloc(NewSize+1, sizeof(ItemBucket));
+ NewTableArray[NewSize].Item = (StringMapEntryBase*)2;
+
+ // Rehash all the items into their new buckets. Luckily :) we already have
+ // the hash values available, so we don't have to rehash any strings.
+ for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) {
+ if (IB->Item && IB->Item != getTombstoneVal()) {
+ // Fast case, bucket available.
+ unsigned FullHash = IB->FullHashValue;
+ unsigned NewBucket = FullHash & (NewSize-1);
+ if (NewTableArray[NewBucket].Item == 0) {
+ NewTableArray[FullHash & (NewSize-1)].Item = IB->Item;
+ NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash;
+ continue;
+ }
+
+ // Otherwise probe for a spot.
+ unsigned ProbeSize = 1;
+ do {
+ NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
+ } while (NewTableArray[NewBucket].Item);
+
+ // Finally found a slot. Fill it in.
+ NewTableArray[NewBucket].Item = IB->Item;
+ NewTableArray[NewBucket].FullHashValue = FullHash;
+ }
+ }
+
+ free(TheTable);
+
+ TheTable = NewTableArray;
+ NumBuckets = NewSize;
+}
diff --git a/lib/Support/StringPool.cpp b/lib/Support/StringPool.cpp
new file mode 100644
index 0000000..b9c1fd0
--- /dev/null
+++ b/lib/Support/StringPool.cpp
@@ -0,0 +1,35 @@
+//===-- StringPool.cpp - Interned string pool -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the StringPool class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/StringPool.h"
+#include "llvm/Support/Streams.h"
+
+using namespace llvm;
+
+StringPool::StringPool() {}
+
+StringPool::~StringPool() {
+ assert(InternTable.empty() && "PooledStringPtr leaked!");
+}
+
+PooledStringPtr StringPool::intern(const char *Begin, const char *End) {
+ table_t::iterator I = InternTable.find(Begin, End);
+ if (I != InternTable.end())
+ return PooledStringPtr(&*I);
+
+ entry_t *S = entry_t::Create(Begin, End);
+ S->getValue().Pool = this;
+ InternTable.insert(S);
+
+ return PooledStringPtr(S);
+}
diff --git a/lib/Support/SystemUtils.cpp b/lib/Support/SystemUtils.cpp
new file mode 100644
index 0000000..80d6e4c
--- /dev/null
+++ b/lib/Support/SystemUtils.cpp
@@ -0,0 +1,58 @@
+//===- SystemUtils.cpp - Utilities for low-level system tasks -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains functions used to do a variety of low-level, often
+// system-specific, tasks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Streams.h"
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/System/Process.h"
+#include "llvm/System/Program.h"
+#include <ostream>
+using namespace llvm;
+
+bool llvm::CheckBitcodeOutputToConsole(std::ostream* stream_to_check,
+ bool print_warning) {
+ if (stream_to_check == cout.stream() &&
+ sys::Process::StandardOutIsDisplayed()) {
+ if (print_warning) {
+ cerr << "WARNING: You're attempting to print out a bitcode file.\n"
+ << "This is inadvisable as it may cause display problems. If\n"
+ << "you REALLY want to taste LLVM bitcode first-hand, you\n"
+ << "can force output with the `-f' option.\n\n";
+ }
+ return true;
+ }
+ return false;
+}
+
+/// FindExecutable - Find a named executable, giving the argv[0] of program
+/// being executed. This allows us to find another LLVM tool if it is built
+/// into the same directory, but that directory is neither the current
+/// directory, nor in the PATH. If the executable cannot be found, return an
+/// empty string.
+///
+#undef FindExecutable // needed on windows :(
+sys::Path llvm::FindExecutable(const std::string &ExeName,
+ const std::string &ProgramPath) {
+ // First check the directory that the calling program is in. We can do this
+ // if ProgramPath contains at least one / character, indicating that it is a
+ // relative path to bugpoint itself.
+ sys::Path Result ( ProgramPath );
+ Result.eraseComponent();
+ if (!Result.isEmpty()) {
+ Result.appendComponent(ExeName);
+ if (Result.canExecute())
+ return Result;
+ }
+
+ return sys::Program::FindProgramByName(ExeName);
+}
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
new file mode 100644
index 0000000..3c8879b
--- /dev/null
+++ b/lib/Support/Timer.cpp
@@ -0,0 +1,387 @@
+//===-- Timer.cpp - Interval Timing Support -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interval Timing implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/System/Process.h"
+#include <algorithm>
+#include <fstream>
+#include <functional>
+#include <map>
+using namespace llvm;
+
+// GetLibSupportInfoOutputFile - Return a file stream to print our output on.
+namespace llvm { extern std::ostream *GetLibSupportInfoOutputFile(); }
+
+// getLibSupportInfoOutputFilename - This ugly hack is brought to you courtesy
+// of constructor/destructor ordering being unspecified by C++. Basically the
+// problem is that a Statistic object gets destroyed, which ends up calling
+// 'GetLibSupportInfoOutputFile()' (below), which calls this function.
+// LibSupportInfoOutputFilename used to be a global variable, but sometimes it
+// would get destroyed before the Statistic, causing havoc to ensue. We "fix"
+// this by creating the string the first time it is needed and never destroying
+// it.
+static ManagedStatic<std::string> LibSupportInfoOutputFilename;
+static std::string &getLibSupportInfoOutputFilename() {
+ return *LibSupportInfoOutputFilename;
+}
+
+namespace {
+ static cl::opt<bool>
+ TrackSpace("track-memory", cl::desc("Enable -time-passes memory "
+ "tracking (this may be slow)"),
+ cl::Hidden);
+
+ static cl::opt<std::string, true>
+ InfoOutputFilename("info-output-file", cl::value_desc("filename"),
+ cl::desc("File to append -stats and -timer output to"),
+ cl::Hidden, cl::location(getLibSupportInfoOutputFilename()));
+}
+
+static TimerGroup *DefaultTimerGroup = 0;
+static TimerGroup *getDefaultTimerGroup() {
+ if (DefaultTimerGroup) return DefaultTimerGroup;
+ return DefaultTimerGroup = new TimerGroup("Miscellaneous Ungrouped Timers");
+}
+
+Timer::Timer(const std::string &N)
+ : Elapsed(0), UserTime(0), SystemTime(0), MemUsed(0), PeakMem(0), Name(N),
+ Started(false), TG(getDefaultTimerGroup()) {
+ TG->addTimer();
+}
+
+Timer::Timer(const std::string &N, TimerGroup &tg)
+ : Elapsed(0), UserTime(0), SystemTime(0), MemUsed(0), PeakMem(0), Name(N),
+ Started(false), TG(&tg) {
+ TG->addTimer();
+}
+
+Timer::Timer(const Timer &T) {
+ TG = T.TG;
+ if (TG) TG->addTimer();
+ operator=(T);
+}
+
+
+// Copy ctor, initialize with no TG member.
+Timer::Timer(bool, const Timer &T) {
+ TG = T.TG; // Avoid assertion in operator=
+ operator=(T); // Copy contents
+ TG = 0;
+}
+
+
+Timer::~Timer() {
+ if (TG) {
+ if (Started) {
+ Started = false;
+ TG->addTimerToPrint(*this);
+ }
+ TG->removeTimer();
+ }
+}
+
+static inline size_t getMemUsage() {
+ if (TrackSpace)
+ return sys::Process::GetMallocUsage();
+ return 0;
+}
+
+struct TimeRecord {
+ double Elapsed, UserTime, SystemTime;
+ ssize_t MemUsed;
+};
+
+static TimeRecord getTimeRecord(bool Start) {
+ TimeRecord Result;
+
+ sys::TimeValue now(0,0);
+ sys::TimeValue user(0,0);
+ sys::TimeValue sys(0,0);
+
+ ssize_t MemUsed = 0;
+ if (Start) {
+ MemUsed = getMemUsage();
+ sys::Process::GetTimeUsage(now,user,sys);
+ } else {
+ sys::Process::GetTimeUsage(now,user,sys);
+ MemUsed = getMemUsage();
+ }
+
+ Result.Elapsed = now.seconds() + now.microseconds() / 1000000.0;
+ Result.UserTime = user.seconds() + user.microseconds() / 1000000.0;
+ Result.SystemTime = sys.seconds() + sys.microseconds() / 1000000.0;
+ Result.MemUsed = MemUsed;
+
+ return Result;
+}
+
+static ManagedStatic<std::vector<Timer*> > ActiveTimers;
+
+void Timer::startTimer() {
+ Started = true;
+ ActiveTimers->push_back(this);
+ TimeRecord TR = getTimeRecord(true);
+ Elapsed -= TR.Elapsed;
+ UserTime -= TR.UserTime;
+ SystemTime -= TR.SystemTime;
+ MemUsed -= TR.MemUsed;
+ PeakMemBase = TR.MemUsed;
+}
+
+void Timer::stopTimer() {
+ TimeRecord TR = getTimeRecord(false);
+ Elapsed += TR.Elapsed;
+ UserTime += TR.UserTime;
+ SystemTime += TR.SystemTime;
+ MemUsed += TR.MemUsed;
+
+ if (ActiveTimers->back() == this) {
+ ActiveTimers->pop_back();
+ } else {
+ std::vector<Timer*>::iterator I =
+ std::find(ActiveTimers->begin(), ActiveTimers->end(), this);
+ assert(I != ActiveTimers->end() && "stop but no startTimer?");
+ ActiveTimers->erase(I);
+ }
+}
+
+void Timer::sum(const Timer &T) {
+ Elapsed += T.Elapsed;
+ UserTime += T.UserTime;
+ SystemTime += T.SystemTime;
+ MemUsed += T.MemUsed;
+ PeakMem += T.PeakMem;
+}
+
+/// addPeakMemoryMeasurement - This method should be called whenever memory
+/// usage needs to be checked. It adds a peak memory measurement to the
+/// currently active timers, which will be printed when the timer group prints
+///
+void Timer::addPeakMemoryMeasurement() {
+ size_t MemUsed = getMemUsage();
+
+ for (std::vector<Timer*>::iterator I = ActiveTimers->begin(),
+ E = ActiveTimers->end(); I != E; ++I)
+ (*I)->PeakMem = std::max((*I)->PeakMem, MemUsed-(*I)->PeakMemBase);
+}
+
+//===----------------------------------------------------------------------===//
+// NamedRegionTimer Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+typedef std::map<std::string, Timer> Name2Timer;
+typedef std::map<std::string, std::pair<TimerGroup, Name2Timer> > Name2Pair;
+
+}
+
+static ManagedStatic<Name2Timer> NamedTimers;
+
+static ManagedStatic<Name2Pair> NamedGroupedTimers;
+
+static Timer &getNamedRegionTimer(const std::string &Name) {
+ Name2Timer::iterator I = NamedTimers->find(Name);
+ if (I != NamedTimers->end())
+ return I->second;
+
+ return NamedTimers->insert(I, std::make_pair(Name, Timer(Name)))->second;
+}
+
+static Timer &getNamedRegionTimer(const std::string &Name,
+ const std::string &GroupName) {
+
+ Name2Pair::iterator I = NamedGroupedTimers->find(GroupName);
+ if (I == NamedGroupedTimers->end()) {
+ TimerGroup TG(GroupName);
+ std::pair<TimerGroup, Name2Timer> Pair(TG, Name2Timer());
+ I = NamedGroupedTimers->insert(I, std::make_pair(GroupName, Pair));
+ }
+
+ Name2Timer::iterator J = I->second.second.find(Name);
+ if (J == I->second.second.end())
+ J = I->second.second.insert(J,
+ std::make_pair(Name,
+ Timer(Name,
+ I->second.first)));
+
+ return J->second;
+}
+
+NamedRegionTimer::NamedRegionTimer(const std::string &Name)
+ : TimeRegion(getNamedRegionTimer(Name)) {}
+
+NamedRegionTimer::NamedRegionTimer(const std::string &Name,
+ const std::string &GroupName)
+ : TimeRegion(getNamedRegionTimer(Name, GroupName)) {}
+
+//===----------------------------------------------------------------------===//
+// TimerGroup Implementation
+//===----------------------------------------------------------------------===//
+
+// printAlignedFP - Simulate the printf "%A.Bf" format, where A is the
+// TotalWidth size, and B is the AfterDec size.
+//
+static void printAlignedFP(double Val, unsigned AfterDec, unsigned TotalWidth,
+ std::ostream &OS) {
+ assert(TotalWidth >= AfterDec+1 && "Bad FP Format!");
+ OS.width(TotalWidth-AfterDec-1);
+ char OldFill = OS.fill();
+ OS.fill(' ');
+ OS << (int)Val; // Integer part;
+ OS << ".";
+ OS.width(AfterDec);
+ OS.fill('0');
+ unsigned ResultFieldSize = 1;
+ while (AfterDec--) ResultFieldSize *= 10;
+ OS << (int)(Val*ResultFieldSize) % ResultFieldSize;
+ OS.fill(OldFill);
+}
+
+static void printVal(double Val, double Total, std::ostream &OS) {
+ if (Total < 1e-7) // Avoid dividing by zero...
+ OS << " ----- ";
+ else {
+ OS << " ";
+ printAlignedFP(Val, 4, 7, OS);
+ OS << " (";
+ printAlignedFP(Val*100/Total, 1, 5, OS);
+ OS << "%)";
+ }
+}
+
+void Timer::print(const Timer &Total, std::ostream &OS) {
+ if (Total.UserTime)
+ printVal(UserTime, Total.UserTime, OS);
+ if (Total.SystemTime)
+ printVal(SystemTime, Total.SystemTime, OS);
+ if (Total.getProcessTime())
+ printVal(getProcessTime(), Total.getProcessTime(), OS);
+ printVal(Elapsed, Total.Elapsed, OS);
+
+ OS << " ";
+
+ if (Total.MemUsed) {
+ OS.width(9);
+ OS << MemUsed << " ";
+ }
+ if (Total.PeakMem) {
+ if (PeakMem) {
+ OS.width(9);
+ OS << PeakMem << " ";
+ } else
+ OS << " ";
+ }
+ OS << Name << "\n";
+
+ Started = false; // Once printed, don't print again
+}
+
+// GetLibSupportInfoOutputFile - Return a file stream to print our output on...
+std::ostream *
+llvm::GetLibSupportInfoOutputFile() {
+ std::string &LibSupportInfoOutputFilename = getLibSupportInfoOutputFilename();
+ if (LibSupportInfoOutputFilename.empty())
+ return cerr.stream();
+ if (LibSupportInfoOutputFilename == "-")
+ return cout.stream();
+
+ std::ostream *Result = new std::ofstream(LibSupportInfoOutputFilename.c_str(),
+ std::ios::app);
+ if (!Result->good()) {
+ cerr << "Error opening info-output-file '"
+ << LibSupportInfoOutputFilename << " for appending!\n";
+ delete Result;
+ return cerr.stream();
+ }
+ return Result;
+}
+
+
+void TimerGroup::removeTimer() {
+ if (--NumTimers == 0 && !TimersToPrint.empty()) { // Print timing report...
+ // Sort the timers in descending order by amount of time taken...
+ std::sort(TimersToPrint.begin(), TimersToPrint.end(),
+ std::greater<Timer>());
+
+ // Figure out how many spaces to indent TimerGroup name...
+ unsigned Padding = (80-Name.length())/2;
+ if (Padding > 80) Padding = 0; // Don't allow "negative" numbers
+
+ std::ostream *OutStream = GetLibSupportInfoOutputFile();
+
+ ++NumTimers;
+ { // Scope to contain Total timer... don't allow total timer to drop us to
+ // zero timers...
+ Timer Total("TOTAL");
+
+ for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i)
+ Total.sum(TimersToPrint[i]);
+
+ // Print out timing header...
+ *OutStream << "===" << std::string(73, '-') << "===\n"
+ << std::string(Padding, ' ') << Name << "\n"
+ << "===" << std::string(73, '-')
+ << "===\n";
+
+ // If this is not an collection of ungrouped times, print the total time.
+ // Ungrouped timers don't really make sense to add up. We still print the
+ // TOTAL line to make the percentages make sense.
+ if (this != DefaultTimerGroup) {
+ *OutStream << " Total Execution Time: ";
+
+ printAlignedFP(Total.getProcessTime(), 4, 5, *OutStream);
+ *OutStream << " seconds (";
+ printAlignedFP(Total.getWallTime(), 4, 5, *OutStream);
+ *OutStream << " wall clock)\n";
+ }
+ *OutStream << "\n";
+
+ if (Total.UserTime)
+ *OutStream << " ---User Time---";
+ if (Total.SystemTime)
+ *OutStream << " --System Time--";
+ if (Total.getProcessTime())
+ *OutStream << " --User+System--";
+ *OutStream << " ---Wall Time---";
+ if (Total.getMemUsed())
+ *OutStream << " ---Mem---";
+ if (Total.getPeakMem())
+ *OutStream << " -PeakMem-";
+ *OutStream << " --- Name ---\n";
+
+ // Loop through all of the timing data, printing it out...
+ for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i)
+ TimersToPrint[i].print(Total, *OutStream);
+
+ Total.print(Total, *OutStream);
+ *OutStream << std::endl; // Flush output
+ }
+ --NumTimers;
+
+ TimersToPrint.clear();
+
+ if (OutStream != cerr.stream() && OutStream != cout.stream())
+ delete OutStream; // Close the file...
+ }
+
+ // Delete default timer group!
+ if (NumTimers == 0 && this == DefaultTimerGroup) {
+ delete DefaultTimerGroup;
+ DefaultTimerGroup = 0;
+ }
+}
+
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
new file mode 100644
index 0000000..e8cf69d
--- /dev/null
+++ b/lib/Support/Triple.cpp
@@ -0,0 +1,187 @@
+//===--- Triple.cpp - Target triple helper class --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Triple.h"
+#include <cassert>
+#include <cstring>
+using namespace llvm;
+
+//
+
+const char *Triple::getArchTypeName(ArchType Kind) {
+ switch (Kind) {
+ case InvalidArch: return "<invalid>";
+ case UnknownArch: return "unknown";
+
+ case x86: return "i386";
+ case x86_64: return "x86_64";
+ case ppc: return "powerpc";
+ case ppc64: return "powerpc64";
+ }
+
+ return "<invalid>";
+}
+
+const char *Triple::getVendorTypeName(VendorType Kind) {
+ switch (Kind) {
+ case UnknownVendor: return "unknown";
+
+ case Apple: return "apple";
+ case PC: return "PC";
+ }
+
+ return "<invalid>";
+}
+
+const char *Triple::getOSTypeName(OSType Kind) {
+ switch (Kind) {
+ case UnknownOS: return "unknown";
+
+ case Darwin: return "darwin";
+ case DragonFly: return "dragonfly";
+ case FreeBSD: return "freebsd";
+ case Linux: return "linux";
+ }
+
+ return "<invalid>";
+}
+
+//
+
+void Triple::Parse() const {
+ assert(!isInitialized() && "Invalid parse call.");
+
+ std::string ArchName = getArchName();
+ if (ArchName.size() == 4 && ArchName[0] == 'i' &&
+ ArchName[2] == '8' && ArchName[3] == '6')
+ Arch = x86;
+ else if (ArchName == "amd64" || ArchName == "x86_64")
+ Arch = x86_64;
+ else if (ArchName == "powerpc")
+ Arch = ppc;
+ else if (ArchName == "powerpc64")
+ Arch = ppc64;
+ else
+ Arch = UnknownArch;
+
+ std::string VendorName = getVendorName();
+ if (VendorName == "apple")
+ Vendor = Apple;
+ else if (VendorName == "pc")
+ Vendor = PC;
+ else
+ Vendor = UnknownVendor;
+
+ std::string OSName = getOSName();
+ if (memcmp(&OSName[0], "darwin", 6) == 0)
+ OS = Darwin;
+ else if (memcmp(&OSName[0], "dragonfly", 9) == 0)
+ OS = DragonFly;
+ else if (memcmp(&OSName[0], "freebsd", 7) == 0)
+ OS = FreeBSD;
+ else if (memcmp(&OSName[0], "linux", 5) == 0)
+ OS = Linux;
+ else
+ OS = UnknownOS;
+
+ assert(isInitialized() && "Failed to initialize!");
+}
+
+static std::string extract(const std::string &A,
+ std::string::size_type begin,
+ std::string::size_type end) {
+ if (begin == std::string::npos)
+ return "";
+ if (end == std::string::npos)
+ return A.substr(begin);
+ return A.substr(begin, end - begin);
+}
+
+static std::string extract1(const std::string &A,
+ std::string::size_type begin,
+ std::string::size_type end) {
+ if (begin == std::string::npos || begin == end)
+ return "";
+ return extract(A, begin + 1, end);
+}
+
+std::string Triple::getArchName() const {
+ std::string Tmp = Data;
+ return extract(Tmp, 0, Tmp.find('-'));
+}
+
+std::string Triple::getVendorName() const {
+ std::string Tmp = Data;
+ Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos);
+ return extract(Tmp, 0, Tmp.find('-'));
+}
+
+std::string Triple::getOSName() const {
+ std::string Tmp = Data;
+ Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos);
+ Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos);
+ return extract(Tmp, 0, Tmp.find('-'));
+}
+
+std::string Triple::getEnvironmentName() const {
+ std::string Tmp = Data;
+ Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos);
+ Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos);
+ Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos);
+ return extract(Tmp, 0, std::string::npos);
+}
+
+std::string Triple::getOSAndEnvironmentName() const {
+ std::string Tmp = Data;
+ Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos);
+ Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos);
+ return extract(Tmp, 0, std::string::npos);
+}
+
+void Triple::setTriple(const std::string &Str) {
+ Data = Str;
+ Arch = InvalidArch;
+}
+
+void Triple::setArch(ArchType Kind) {
+ setArchName(getArchTypeName(Kind));
+}
+
+void Triple::setVendor(VendorType Kind) {
+ setVendorName(getVendorTypeName(Kind));
+}
+
+void Triple::setOS(OSType Kind) {
+ setOSName(getOSTypeName(Kind));
+}
+
+void Triple::setArchName(const std::string &Str) {
+ setTriple(Str + "-" + getVendorName() + "-" + getOSAndEnvironmentName());
+}
+
+void Triple::setVendorName(const std::string &Str) {
+ setTriple(getArchName() + "-" + Str + "-" + getOSAndEnvironmentName());
+}
+
+void Triple::setOSName(const std::string &Str) {
+ if (hasEnvironment())
+ setTriple(getArchName() + "-" + getVendorName() + "-" + Str +
+ "-" + getEnvironmentName());
+ else
+ setTriple(getArchName() + "-" + getVendorName() + "-" + Str);
+}
+
+void Triple::setEnvironmentName(const std::string &Str) {
+ setTriple(getArchName() + "-" + getVendorName() + "-" + getOSName() +
+ "-" + Str);
+}
+
+void Triple::setOSAndEnvironmentName(const std::string &Str) {
+ setTriple(getArchName() + "-" + getVendorName() + "-" + Str);
+}
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
new file mode 100644
index 0000000..6ac37bc
--- /dev/null
+++ b/lib/Support/raw_ostream.cpp
@@ -0,0 +1,376 @@
+//===--- raw_ostream.cpp - Implement the raw_ostream classes --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements support for bulk buffered stream output.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+#include "llvm/System/Program.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Compiler.h"
+#include <ostream>
+
+#if defined(HAVE_UNISTD_H)
+# include <unistd.h>
+#endif
+#if defined(HAVE_FCNTL_H)
+# include <fcntl.h>
+#endif
+
+#if defined(_MSC_VER)
+#include <io.h>
+#include <fcntl.h>
+#ifndef STDIN_FILENO
+# define STDIN_FILENO 0
+#endif
+#ifndef STDOUT_FILENO
+# define STDOUT_FILENO 1
+#endif
+#ifndef STDERR_FILENO
+# define STDERR_FILENO 2
+#endif
+#endif
+
+using namespace llvm;
+
+
+// An out of line virtual method to provide a home for the class vtable.
+void raw_ostream::handle() {}
+
+raw_ostream &raw_ostream::operator<<(unsigned long N) {
+ // Zero is a special case.
+ if (N == 0)
+ return *this << '0';
+
+ char NumberBuffer[20];
+ char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
+ char *CurPtr = EndPtr;
+
+ while (N) {
+ *--CurPtr = '0' + char(N % 10);
+ N /= 10;
+ }
+ return write(CurPtr, EndPtr-CurPtr);
+}
+
+raw_ostream &raw_ostream::operator<<(long N) {
+ if (N < 0) {
+ *this << '-';
+ N = -N;
+ }
+
+ return this->operator<<(static_cast<unsigned long>(N));
+}
+
+raw_ostream &raw_ostream::operator<<(unsigned long long N) {
+ // Zero is a special case.
+ if (N == 0)
+ return *this << '0';
+
+ char NumberBuffer[20];
+ char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
+ char *CurPtr = EndPtr;
+
+ while (N) {
+ *--CurPtr = '0' + char(N % 10);
+ N /= 10;
+ }
+ return write(CurPtr, EndPtr-CurPtr);
+}
+
+raw_ostream &raw_ostream::operator<<(long long N) {
+ if (N < 0) {
+ *this << '-';
+ N = -N;
+ }
+
+ return this->operator<<(static_cast<unsigned long long>(N));
+}
+
+raw_ostream &raw_ostream::operator<<(const void *P) {
+ uintptr_t N = (uintptr_t) P;
+ *this << '0' << 'x';
+
+ // Zero is a special case.
+ if (N == 0)
+ return *this << '0';
+
+ char NumberBuffer[20];
+ char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
+ char *CurPtr = EndPtr;
+
+ while (N) {
+ unsigned x = N % 16;
+ *--CurPtr = (x < 10 ? '0' + x : 'a' + x - 10);
+ N /= 16;
+ }
+
+ return write(CurPtr, EndPtr-CurPtr);
+}
+
+void raw_ostream::flush_nonempty() {
+ assert(OutBufCur > OutBufStart && "Invalid call to flush_nonempty.");
+ write_impl(OutBufStart, OutBufCur - OutBufStart);
+ OutBufCur = OutBufStart;
+}
+
+raw_ostream &raw_ostream::write(unsigned char C) {
+ // Group exceptional cases into a single branch.
+ if (OutBufCur >= OutBufEnd) {
+ if (Unbuffered) {
+ write_impl(reinterpret_cast<char*>(&C), 1);
+ return *this;
+ }
+
+ if (!OutBufStart)
+ SetBufferSize();
+ else
+ flush_nonempty();
+ }
+
+ *OutBufCur++ = C;
+ return *this;
+}
+
+raw_ostream &raw_ostream::write(const char *Ptr, unsigned Size) {
+ // Group exceptional cases into a single branch.
+ if (BUILTIN_EXPECT(OutBufCur+Size > OutBufEnd, false)) {
+ if (Unbuffered) {
+ write_impl(Ptr, Size);
+ return *this;
+ }
+
+ if (!OutBufStart)
+ SetBufferSize();
+ else
+ flush_nonempty();
+ }
+
+ // Handle short strings specially, memcpy isn't very good at very short
+ // strings.
+ switch (Size) {
+ case 4: OutBufCur[3] = Ptr[3]; // FALL THROUGH
+ case 3: OutBufCur[2] = Ptr[2]; // FALL THROUGH
+ case 2: OutBufCur[1] = Ptr[1]; // FALL THROUGH
+ case 1: OutBufCur[0] = Ptr[0]; // FALL THROUGH
+ case 0: break;
+ default:
+ // Normally the string to emit is shorter than the buffer.
+ if (Size <= unsigned(OutBufEnd-OutBufStart)) {
+ memcpy(OutBufCur, Ptr, Size);
+ break;
+ }
+
+ // Otherwise we are emitting a string larger than our buffer. We
+ // know we already flushed, so just write it out directly.
+ write_impl(Ptr, Size);
+ Size = 0;
+ break;
+ }
+ OutBufCur += Size;
+
+ return *this;
+}
+
+// Formatted output.
+raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) {
+ // If we have more than a few bytes left in our output buffer, try
+ // formatting directly onto its end.
+ //
+ // FIXME: This test is a bit silly, since if we don't have enough
+ // space in the buffer we will have to flush the formatted output
+ // anyway. We should just flush upfront in such cases, and use the
+ // whole buffer as our scratch pad. Note, however, that this case is
+ // also necessary for correctness on unbuffered streams.
+ unsigned NextBufferSize = 127;
+ if (OutBufEnd-OutBufCur > 3) {
+ unsigned BufferBytesLeft = OutBufEnd-OutBufCur;
+ unsigned BytesUsed = Fmt.print(OutBufCur, BufferBytesLeft);
+
+ // Common case is that we have plenty of space.
+ if (BytesUsed < BufferBytesLeft) {
+ OutBufCur += BytesUsed;
+ return *this;
+ }
+
+ // Otherwise, we overflowed and the return value tells us the size to try
+ // again with.
+ NextBufferSize = BytesUsed;
+ }
+
+ // If we got here, we didn't have enough space in the output buffer for the
+ // string. Try printing into a SmallVector that is resized to have enough
+ // space. Iterate until we win.
+ SmallVector<char, 128> V;
+
+ while (1) {
+ V.resize(NextBufferSize);
+
+ // Try formatting into the SmallVector.
+ unsigned BytesUsed = Fmt.print(&V[0], NextBufferSize);
+
+ // If BytesUsed fit into the vector, we win.
+ if (BytesUsed <= NextBufferSize)
+ return write(&V[0], BytesUsed);
+
+ // Otherwise, try again with a new size.
+ assert(BytesUsed > NextBufferSize && "Didn't grow buffer!?");
+ NextBufferSize = BytesUsed;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Formatted Output
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method.
+void format_object_base::home() {
+}
+
+//===----------------------------------------------------------------------===//
+// raw_fd_ostream
+//===----------------------------------------------------------------------===//
+
+/// raw_fd_ostream - Open the specified file for writing. If an error
+/// occurs, information about the error is put into ErrorInfo, and the
+/// stream should be immediately destroyed; the string will be empty
+/// if no error occurred.
+raw_fd_ostream::raw_fd_ostream(const char *Filename, bool Binary,
+ std::string &ErrorInfo) : pos(0) {
+ ErrorInfo.clear();
+
+ // Handle "-" as stdout.
+ if (Filename[0] == '-' && Filename[1] == 0) {
+ FD = STDOUT_FILENO;
+ // If user requested binary then put stdout into binary mode if
+ // possible.
+ if (Binary)
+ sys::Program::ChangeStdoutToBinary();
+ ShouldClose = false;
+ return;
+ }
+
+ int Flags = O_WRONLY|O_CREAT|O_TRUNC;
+#ifdef O_BINARY
+ if (Binary)
+ Flags |= O_BINARY;
+#endif
+ FD = open(Filename, Flags, 0644);
+ if (FD < 0) {
+ ErrorInfo = "Error opening output file '" + std::string(Filename) + "'";
+ ShouldClose = false;
+ } else {
+ ShouldClose = true;
+ }
+}
+
+raw_fd_ostream::~raw_fd_ostream() {
+ if (FD >= 0) {
+ flush();
+ if (ShouldClose)
+ ::close(FD);
+ }
+}
+
+void raw_fd_ostream::write_impl(const char *Ptr, unsigned Size) {
+ assert (FD >= 0 && "File already closed.");
+ pos += Size;
+ ::write(FD, Ptr, Size);
+}
+
+void raw_fd_ostream::close() {
+ assert (ShouldClose);
+ ShouldClose = false;
+ flush();
+ ::close(FD);
+ FD = -1;
+}
+
+uint64_t raw_fd_ostream::seek(uint64_t off) {
+ flush();
+ pos = lseek(FD, off, SEEK_SET);
+ return pos;
+}
+
+//===----------------------------------------------------------------------===//
+// raw_stdout/err_ostream
+//===----------------------------------------------------------------------===//
+
+raw_stdout_ostream::raw_stdout_ostream():raw_fd_ostream(STDOUT_FILENO, false) {}
+raw_stderr_ostream::raw_stderr_ostream():raw_fd_ostream(STDERR_FILENO, false,
+ true) {}
+
+// An out of line virtual method to provide a home for the class vtable.
+void raw_stdout_ostream::handle() {}
+void raw_stderr_ostream::handle() {}
+
+/// outs() - This returns a reference to a raw_ostream for standard output.
+/// Use it like: outs() << "foo" << "bar";
+raw_ostream &llvm::outs() {
+ static raw_stdout_ostream S;
+ return S;
+}
+
+/// errs() - This returns a reference to a raw_ostream for standard error.
+/// Use it like: errs() << "foo" << "bar";
+raw_ostream &llvm::errs() {
+ static raw_stderr_ostream S;
+ return S;
+}
+
+//===----------------------------------------------------------------------===//
+// raw_os_ostream
+//===----------------------------------------------------------------------===//
+
+raw_os_ostream::~raw_os_ostream() {
+ flush();
+}
+
+void raw_os_ostream::write_impl(const char *Ptr, unsigned Size) {
+ OS.write(Ptr, Size);
+}
+
+uint64_t raw_os_ostream::current_pos() { return OS.tellp(); }
+
+uint64_t raw_os_ostream::tell() {
+ return (uint64_t)OS.tellp() + GetNumBytesInBuffer();
+}
+
+//===----------------------------------------------------------------------===//
+// raw_string_ostream
+//===----------------------------------------------------------------------===//
+
+raw_string_ostream::~raw_string_ostream() {
+ flush();
+}
+
+void raw_string_ostream::write_impl(const char *Ptr, unsigned Size) {
+ OS.append(Ptr, Size);
+}
+
+//===----------------------------------------------------------------------===//
+// raw_svector_ostream
+//===----------------------------------------------------------------------===//
+
+raw_svector_ostream::~raw_svector_ostream() {
+ flush();
+}
+
+void raw_svector_ostream::write_impl(const char *Ptr, unsigned Size) {
+ OS.append(Ptr, Ptr + Size);
+}
+
+uint64_t raw_svector_ostream::current_pos() { return OS.size(); }
+
+uint64_t raw_svector_ostream::tell() {
+ return OS.size() + GetNumBytesInBuffer();
+}
diff --git a/lib/System/Alarm.cpp b/lib/System/Alarm.cpp
new file mode 100644
index 0000000..0014ca7
--- /dev/null
+++ b/lib/System/Alarm.cpp
@@ -0,0 +1,33 @@
+//===- Alarm.cpp - Alarm Generation Support ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Alarm functionality
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/System/Alarm.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Alarm.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Win32/Alarm.inc"
+#endif
diff --git a/lib/System/Atomic.cpp b/lib/System/Atomic.cpp
new file mode 100644
index 0000000..cefd0bb
--- /dev/null
+++ b/lib/System/Atomic.cpp
@@ -0,0 +1,53 @@
+//===-- Atomic.cpp - Atomic Operations --------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file implements atomic operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/System/Atomic.h"
+#include "llvm/Config/config.h"
+
+using namespace llvm;
+
+#if defined(_MSC_VER)
+#include <windows.h>
+#undef MemoryFence
+#endif
+
+void sys::MemoryFence() {
+#if LLVM_MULTITHREADED==0
+ return;
+#else
+# if defined(__GNUC__)
+ __sync_synchronize();
+# elif defined(_MSC_VER)
+ MemoryBarrier();
+# else
+# error No memory fence implementation for your platform!
+# endif
+#endif
+}
+
+sys::cas_flag sys::CompareAndSwap(volatile sys::cas_flag* ptr,
+ sys::cas_flag new_value,
+ sys::cas_flag old_value) {
+#if LLVM_MULTITHREADED==0
+ sys::cas_flag result = *ptr;
+ if (result == old_value)
+ *ptr = new_value;
+ return result;
+#elif defined(__GNUC__)
+ return __sync_val_compare_and_swap(ptr, old_value, new_value);
+#elif defined(_MSC_VER)
+ return InterlockedCompareExchange(ptr, new_value, old_value);
+#else
+# error No compare-and-swap implementation for your platform!
+#endif
+} \ No newline at end of file
diff --git a/lib/System/CMakeLists.txt b/lib/System/CMakeLists.txt
new file mode 100644
index 0000000..5415dd6
--- /dev/null
+++ b/lib/System/CMakeLists.txt
@@ -0,0 +1,19 @@
+add_llvm_library(LLVMSystem
+ Alarm.cpp
+ Atomic.cpp
+ Disassembler.cpp
+ DynamicLibrary.cpp
+ Host.cpp
+ IncludeFile.cpp
+ Memory.cpp
+ Mutex.cpp
+ Path.cpp
+ Process.cpp
+ Program.cpp
+ Signals.cpp
+ TimeValue.cpp
+ )
+
+if( BUILD_SHARED_LIBS AND NOT WIN32 )
+ target_link_libraries(LLVMSystem dl)
+endif()
diff --git a/lib/System/Disassembler.cpp b/lib/System/Disassembler.cpp
new file mode 100644
index 0000000..378fe26
--- /dev/null
+++ b/lib/System/Disassembler.cpp
@@ -0,0 +1,79 @@
+//===- lib/System/Disassembler.cpp ------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the necessary glue to call external disassembler
+// libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/System/Disassembler.h"
+
+#include <cassert>
+#include <iomanip>
+#include <string>
+#include <sstream>
+
+#if USE_UDIS86
+#include <udis86.h>
+#endif
+
+using namespace llvm;
+
+bool llvm::sys::hasDisassembler(void)
+{
+#if defined (__i386__) || defined (__amd64__) || defined (__x86_64__)
+ // We have option to enable udis86 library.
+# if USE_UDIS86
+ return true;
+#else
+ return false;
+#endif
+#else
+ return false;
+#endif
+}
+
+std::string llvm::sys::disassembleBuffer(uint8_t* start, size_t length,
+ uint64_t pc) {
+ std::stringstream res;
+
+#if defined (__i386__) || defined (__amd64__) || defined (__x86_64__)
+ unsigned bits;
+# if defined(__i386__)
+ bits = 32;
+# else
+ bits = 64;
+# endif
+
+# if USE_UDIS86
+ ud_t ud_obj;
+
+ ud_init(&ud_obj);
+ ud_set_input_buffer(&ud_obj, start, length);
+ ud_set_mode(&ud_obj, bits);
+ ud_set_pc(&ud_obj, pc);
+ ud_set_syntax(&ud_obj, UD_SYN_ATT);
+
+ res << std::setbase(16)
+ << std::setw(bits/4);
+
+ while (ud_disassemble(&ud_obj)) {
+ res << ud_insn_off(&ud_obj) << ":\t" << ud_insn_asm(&ud_obj) << "\n";
+ }
+# else
+ res << "No disassembler available. See configure help for options.\n";
+# endif
+
+#else
+ res << "No disassembler available. See configure help for options.\n";
+#endif
+
+ return res.str();
+}
diff --git a/lib/System/DynamicLibrary.cpp b/lib/System/DynamicLibrary.cpp
new file mode 100644
index 0000000..3bf172c
--- /dev/null
+++ b/lib/System/DynamicLibrary.cpp
@@ -0,0 +1,165 @@
+//===-- DynamicLibrary.cpp - Runtime link/load libraries --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file implements the operating system DynamicLibrary concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/System/DynamicLibrary.h"
+#include "llvm/Config/config.h"
+#include <cstdio>
+#include <cstring>
+#include <map>
+
+// Collection of symbol name/value pairs to be searched prior to any libraries.
+std::map<std::string, void *> &g_symbols() {
+ static std::map<std::string, void *> symbols;
+ return symbols;
+}
+
+void llvm::sys::DynamicLibrary::AddSymbol(const char* symbolName,
+ void *symbolValue) {
+ g_symbols()[symbolName] = symbolValue;
+}
+
+// It is not possible to use ltdl.c on VC++ builds as the terms of its LGPL
+// license and special exception would cause all of LLVM to be placed under
+// the LGPL. This is because the exception applies only when libtool is
+// used, and obviously libtool is not used with Visual Studio. An entirely
+// separate implementation is provided in win32/DynamicLibrary.cpp.
+
+#ifdef LLVM_ON_WIN32
+
+#include "Win32/DynamicLibrary.inc"
+
+#else
+
+//#include "ltdl.h"
+#include <dlfcn.h>
+#include <cassert>
+using namespace llvm;
+using namespace llvm::sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+//static std::vector<lt_dlhandle> OpenedHandles;
+static std::vector<void *> OpenedHandles;
+
+DynamicLibrary::DynamicLibrary() {}
+
+DynamicLibrary::~DynamicLibrary() {
+ while(!OpenedHandles.empty()) {
+ void *H = OpenedHandles.back(); OpenedHandles.pop_back();
+ dlclose(H);
+ }
+}
+
+bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
+ std::string *ErrMsg) {
+ void *H = dlopen(Filename, RTLD_LAZY|RTLD_GLOBAL);
+ if (H == 0) {
+ if (ErrMsg)
+ *ErrMsg = dlerror();
+ return true;
+ }
+ OpenedHandles.push_back(H);
+ return false;
+}
+
+void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
+ // check_ltdl_initialization();
+
+ // First check symbols added via AddSymbol().
+ std::map<std::string, void *>::iterator I = g_symbols().find(symbolName);
+ if (I != g_symbols().end())
+ return I->second;
+
+ // Now search the libraries.
+ for (std::vector<void *>::iterator I = OpenedHandles.begin(),
+ E = OpenedHandles.end(); I != E; ++I) {
+ //lt_ptr ptr = lt_dlsym(*I, symbolName);
+ void *ptr = dlsym(*I, symbolName);
+ if (ptr)
+ return ptr;
+ }
+
+#define EXPLICIT_SYMBOL(SYM) \
+ extern void *SYM; if (!strcmp(symbolName, #SYM)) return &SYM
+
+ // If this is darwin, it has some funky issues, try to solve them here. Some
+ // important symbols are marked 'private external' which doesn't allow
+ // SearchForAddressOfSymbol to find them. As such, we special case them here,
+ // there is only a small handful of them.
+
+#ifdef __APPLE__
+ {
+ EXPLICIT_SYMBOL(__ashldi3);
+ EXPLICIT_SYMBOL(__ashrdi3);
+ EXPLICIT_SYMBOL(__cmpdi2);
+ EXPLICIT_SYMBOL(__divdi3);
+ EXPLICIT_SYMBOL(__eprintf);
+ EXPLICIT_SYMBOL(__fixdfdi);
+ EXPLICIT_SYMBOL(__fixsfdi);
+ EXPLICIT_SYMBOL(__fixunsdfdi);
+ EXPLICIT_SYMBOL(__fixunssfdi);
+ EXPLICIT_SYMBOL(__floatdidf);
+ EXPLICIT_SYMBOL(__floatdisf);
+ EXPLICIT_SYMBOL(__lshrdi3);
+ EXPLICIT_SYMBOL(__moddi3);
+ EXPLICIT_SYMBOL(__udivdi3);
+ EXPLICIT_SYMBOL(__umoddi3);
+ }
+#endif
+
+#ifdef __CYGWIN__
+ {
+ EXPLICIT_SYMBOL(_alloca);
+ EXPLICIT_SYMBOL(__main);
+ }
+#endif
+
+#undef EXPLICIT_SYMBOL
+
+// This macro returns the address of a well-known, explicit symbol
+#define EXPLICIT_SYMBOL(SYM) \
+ if (!strcmp(symbolName, #SYM)) return &SYM
+
+// On linux we have a weird situation. The stderr/out/in symbols are both
+// macros and global variables because of standards requirements. So, we
+// boldly use the EXPLICIT_SYMBOL macro without checking for a #define first.
+#if defined(__linux__)
+ {
+ EXPLICIT_SYMBOL(stderr);
+ EXPLICIT_SYMBOL(stdout);
+ EXPLICIT_SYMBOL(stdin);
+ }
+#else
+ // For everything else, we want to check to make sure the symbol isn't defined
+ // as a macro before using EXPLICIT_SYMBOL.
+ {
+#ifndef stdin
+ EXPLICIT_SYMBOL(stdin);
+#endif
+#ifndef stdout
+ EXPLICIT_SYMBOL(stdout);
+#endif
+#ifndef stderr
+ EXPLICIT_SYMBOL(stderr);
+#endif
+ }
+#endif
+#undef EXPLICIT_SYMBOL
+
+ return 0;
+}
+
+#endif // LLVM_ON_WIN32
diff --git a/lib/System/Host.cpp b/lib/System/Host.cpp
new file mode 100644
index 0000000..fd2d952
--- /dev/null
+++ b/lib/System/Host.cpp
@@ -0,0 +1,24 @@
+//===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file implements the operating system Host concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/System/Host.h"
+#include "llvm/Config/config.h"
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Host.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Win32/Host.inc"
+#endif
+
diff --git a/lib/System/IncludeFile.cpp b/lib/System/IncludeFile.cpp
new file mode 100644
index 0000000..8258d40
--- /dev/null
+++ b/lib/System/IncludeFile.cpp
@@ -0,0 +1,20 @@
+//===- lib/System/IncludeFile.cpp - Ensure Linking Of Implementation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IncludeFile constructor.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/System/IncludeFile.h"
+
+using namespace llvm;
+
+// This constructor is used to ensure linking of other modules. See the
+// llvm/System/IncludeFile.h header for details.
+IncludeFile::IncludeFile(const void*) {}
diff --git a/lib/System/LICENSE.TXT b/lib/System/LICENSE.TXT
new file mode 100644
index 0000000..f569da2
--- /dev/null
+++ b/lib/System/LICENSE.TXT
@@ -0,0 +1,6 @@
+LLVM System Interface Library
+-------------------------------------------------------------------------------
+The LLVM System Interface Library is licensed under the Illinois Open Source
+License and has the following additional copyright:
+
+Copyright (C) 2004 eXtensible Systems, Inc.
diff --git a/lib/System/Makefile b/lib/System/Makefile
new file mode 100644
index 0000000..49704c3
--- /dev/null
+++ b/lib/System/Makefile
@@ -0,0 +1,19 @@
+##===- lib/System/Makefile ---------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMSystem
+BUILD_ARCHIVE = 1
+
+EXTRA_DIST = Unix Win32 README.txt
+
+include $(LEVEL)/Makefile.common
+
+CompileCommonOpts := $(filter-out -pedantic,$(CompileCommonOpts))
+CompileCommonOpts := $(filter-out -Wno-long-long,$(CompileCommonOpts))
diff --git a/lib/System/Memory.cpp b/lib/System/Memory.cpp
new file mode 100644
index 0000000..375c73c
--- /dev/null
+++ b/lib/System/Memory.cpp
@@ -0,0 +1,62 @@
+//===- Memory.cpp - Memory Handling Support ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for allocating memory and dealing
+// with memory mapped files
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/System/Memory.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Memory.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Win32/Memory.inc"
+#endif
+
+extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
+
+/// InvalidateInstructionCache - Before the JIT can run a block of code
+/// that has been emitted it must invalidate the instruction cache on some
+/// platforms.
+void llvm::sys::Memory::InvalidateInstructionCache(const void *Addr,
+ size_t Len) {
+
+// icache invalidation for PPC and ARM.
+#if defined(__APPLE__)
+#if (defined(__POWERPC__) || defined (__ppc__) || \
+ defined(_POWER) || defined(_ARCH_PPC)) || defined(__arm__)
+ sys_icache_invalidate(Addr, Len);
+#endif
+#else
+#if (defined(__POWERPC__) || defined (__ppc__) || \
+ defined(_POWER) || defined(_ARCH_PPC)) && defined(__GNUC__)
+ const size_t LineSize = 32;
+
+ const intptr_t Mask = ~(LineSize - 1);
+ const intptr_t StartLine = ((intptr_t) Addr) & Mask;
+ const intptr_t EndLine = ((intptr_t) Addr + Len + LineSize - 1) & Mask;
+
+ for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
+ asm volatile("dcbf 0, %0" : : "r"(Line));
+ asm volatile("sync");
+
+ for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
+ asm volatile("icbi 0, %0" : : "r"(Line));
+ asm volatile("isync");
+#endif
+#endif // end apple
+}
diff --git a/lib/System/Mutex.cpp b/lib/System/Mutex.cpp
new file mode 100644
index 0000000..d95c25b
--- /dev/null
+++ b/lib/System/Mutex.cpp
@@ -0,0 +1,160 @@
+//===- Mutex.cpp - Mutual Exclusion Lock ------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the llvm::sys::Mutex class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/System/Mutex.h"
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+// Define all methods as no-ops if threading is explicitly disabled
+namespace llvm {
+using namespace sys;
+Mutex::Mutex( bool recursive) { }
+Mutex::~Mutex() { }
+bool Mutex::acquire() { return true; }
+bool Mutex::release() { return true; }
+bool Mutex::tryacquire() { return true; }
+}
+#else
+
+#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_MUTEX_LOCK)
+
+#include <cassert>
+#include <pthread.h>
+#include <stdlib.h>
+
+namespace llvm {
+using namespace sys;
+
+
+// This variable is useful for situations where the pthread library has been
+// compiled with weak linkage for its interface symbols. This allows the
+// threading support to be turned off by simply not linking against -lpthread.
+// In that situation, the value of pthread_mutex_init will be 0 and
+// consequently pthread_enabled will be false. In such situations, all the
+// pthread operations become no-ops and the functions all return false. If
+// pthread_mutex_init does have an address, then mutex support is enabled.
+// Note: all LLVM tools will link against -lpthread if its available since it
+// is configured into the LIBS variable.
+// Note: this line of code generates a warning if pthread_mutex_init is not
+// declared with weak linkage. It's safe to ignore the warning.
+static const bool pthread_enabled = true;
+
+// Construct a Mutex using pthread calls
+Mutex::Mutex( bool recursive)
+ : data_(0)
+{
+ if (pthread_enabled)
+ {
+ // Declare the pthread_mutex data structures
+ pthread_mutex_t* mutex =
+ static_cast<pthread_mutex_t*>(malloc(sizeof(pthread_mutex_t)));
+ pthread_mutexattr_t attr;
+
+ // Initialize the mutex attributes
+ int errorcode = pthread_mutexattr_init(&attr);
+ assert(errorcode == 0);
+
+ // Initialize the mutex as a recursive mutex, if requested, or normal
+ // otherwise.
+ int kind = ( recursive ? PTHREAD_MUTEX_RECURSIVE : PTHREAD_MUTEX_NORMAL );
+ errorcode = pthread_mutexattr_settype(&attr, kind);
+ assert(errorcode == 0);
+
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__)
+ // Make it a process local mutex
+ errorcode = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE);
+#endif
+
+ // Initialize the mutex
+ errorcode = pthread_mutex_init(mutex, &attr);
+ assert(errorcode == 0);
+
+ // Destroy the attributes
+ errorcode = pthread_mutexattr_destroy(&attr);
+ assert(errorcode == 0);
+
+ // Assign the data member
+ data_ = mutex;
+ }
+}
+
+// Destruct a Mutex
+Mutex::~Mutex()
+{
+ if (pthread_enabled)
+ {
+ pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+ assert(mutex != 0);
+ pthread_mutex_destroy(mutex);
+ free(mutex);
+ }
+}
+
+bool
+Mutex::acquire()
+{
+ if (pthread_enabled)
+ {
+ pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+ assert(mutex != 0);
+
+ int errorcode = pthread_mutex_lock(mutex);
+ return errorcode == 0;
+ }
+ return false;
+}
+
+bool
+Mutex::release()
+{
+ if (pthread_enabled)
+ {
+ pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+ assert(mutex != 0);
+
+ int errorcode = pthread_mutex_unlock(mutex);
+ return errorcode == 0;
+ }
+ return false;
+}
+
+bool
+Mutex::tryacquire()
+{
+ if (pthread_enabled)
+ {
+ pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+ assert(mutex != 0);
+
+ int errorcode = pthread_mutex_trylock(mutex);
+ return errorcode == 0;
+ }
+ return false;
+}
+
+}
+
+#elif defined(LLVM_ON_UNIX)
+#include "Unix/Mutex.inc"
+#elif defined( LLVM_ON_WIN32)
+#include "Win32/Mutex.inc"
+#else
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/Mutex.cpp
+#endif
+#endif
+
diff --git a/lib/System/Path.cpp b/lib/System/Path.cpp
new file mode 100644
index 0000000..72bd7ad
--- /dev/null
+++ b/lib/System/Path.cpp
@@ -0,0 +1,287 @@
+//===-- Path.cpp - Implement OS Path Concept --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file implements the operating system Path concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/System/Path.h"
+#include "llvm/Config/config.h"
+#include <cassert>
+#include <cstring>
+#include <ostream>
+using namespace llvm;
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+bool Path::operator==(const Path &that) const {
+ return path == that.path;
+}
+
+bool Path::operator!=(const Path &that) const {
+ return path != that.path;
+}
+
+bool Path::operator<(const Path& that) const {
+ return path < that.path;
+}
+
+std::ostream& llvm::operator<<(std::ostream &strm, const sys::Path &aPath) {
+ strm << aPath.toString();
+ return strm;
+}
+
+Path
+Path::GetLLVMConfigDir() {
+ Path result;
+#ifdef LLVM_ETCDIR
+ if (result.set(LLVM_ETCDIR))
+ return result;
+#endif
+ return GetLLVMDefaultConfigDir();
+}
+
+LLVMFileType
+sys::IdentifyFileType(const char *magic, unsigned length) {
+ assert(magic && "Invalid magic number string");
+ assert(length >=4 && "Invalid magic number length");
+ switch ((unsigned char)magic[0]) {
+ case 0xDE: // 0x0B17C0DE = BC wraper
+ if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 &&
+ magic[3] == (char)0x0B)
+ return Bitcode_FileType;
+ break;
+ case 'B':
+ if (magic[1] == 'C' && magic[2] == (char)0xC0 && magic[3] == (char)0xDE)
+ return Bitcode_FileType;
+ break;
+ case '!':
+ if (length >= 8)
+ if (memcmp(magic,"!<arch>\n",8) == 0)
+ return Archive_FileType;
+ break;
+
+ case '\177':
+ if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') {
+ if (length >= 18 && magic[17] == 0)
+ switch (magic[16]) {
+ default: break;
+ case 1: return ELF_Relocatable_FileType;
+ case 2: return ELF_Executable_FileType;
+ case 3: return ELF_SharedObject_FileType;
+ case 4: return ELF_Core_FileType;
+ }
+ }
+ break;
+
+ case 0xCA:
+ if (magic[1] == char(0xFE) && magic[2] == char(0xBA) &&
+ magic[3] == char(0xBE)) {
+ // This is complicated by an overlap with Java class files.
+ // See the Mach-O section in /usr/share/file/magic for details.
+ if (length >= 8 && magic[7] < 43)
+ // FIXME: Universal Binary of any type.
+ return Mach_O_DynamicallyLinkedSharedLib_FileType;
+ }
+ break;
+
+ case 0xFE:
+ case 0xCE: {
+ uint16_t type = 0;
+ if (magic[0] == char(0xFE) && magic[1] == char(0xED) &&
+ magic[2] == char(0xFA) && magic[3] == char(0xCE)) {
+ /* Native endian */
+ if (length >= 16) type = magic[14] << 8 | magic[15];
+ } else if (magic[0] == char(0xCE) && magic[1] == char(0xFA) &&
+ magic[2] == char(0xED) && magic[3] == char(0xFE)) {
+ /* Reverse endian */
+ if (length >= 14) type = magic[13] << 8 | magic[12];
+ }
+ switch (type) {
+ default: break;
+ case 1: return Mach_O_Object_FileType;
+ case 2: return Mach_O_Executable_FileType;
+ case 3: return Mach_O_FixedVirtualMemorySharedLib_FileType;
+ case 4: return Mach_O_Core_FileType;
+ case 5: return Mach_O_PreloadExectuable_FileType;
+ case 6: return Mach_O_DynamicallyLinkedSharedLib_FileType;
+ case 7: return Mach_O_DynamicLinker_FileType;
+ case 8: return Mach_O_Bundle_FileType;
+ case 9: return Mach_O_DynamicallyLinkedSharedLibStub_FileType;
+ case 10: break; // FIXME: MH_DSYM companion file with only debug.
+ }
+ break;
+ }
+ case 0xF0: // PowerPC Windows
+ case 0x83: // Alpha 32-bit
+ case 0x84: // Alpha 64-bit
+ case 0x66: // MPS R4000 Windows
+ case 0x50: // mc68K
+ case 0x4c: // 80386 Windows
+ if (magic[1] == 0x01)
+ return COFF_FileType;
+
+ case 0x90: // PA-RISC Windows
+ case 0x68: // mc68K Windows
+ if (magic[1] == 0x02)
+ return COFF_FileType;
+ break;
+
+ default:
+ break;
+ }
+ return Unknown_FileType;
+}
+
+bool
+Path::isArchive() const {
+ if (canRead())
+ return hasMagicNumber("!<arch>\012");
+ return false;
+}
+
+bool
+Path::isDynamicLibrary() const {
+ if (canRead()) {
+ std::string Magic;
+ if (getMagicNumber(Magic, 64))
+ switch (IdentifyFileType(Magic.c_str(),
+ static_cast<unsigned>(Magic.length()))) {
+ default: return false;
+ case Mach_O_FixedVirtualMemorySharedLib_FileType:
+ case Mach_O_DynamicallyLinkedSharedLib_FileType:
+ case Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+ case ELF_SharedObject_FileType:
+ case COFF_FileType: return true;
+ }
+ }
+ return false;
+}
+
+Path
+Path::FindLibrary(std::string& name) {
+ std::vector<sys::Path> LibPaths;
+ GetSystemLibraryPaths(LibPaths);
+ for (unsigned i = 0; i < LibPaths.size(); ++i) {
+ sys::Path FullPath(LibPaths[i]);
+ FullPath.appendComponent("lib" + name + LTDL_SHLIB_EXT);
+ if (FullPath.isDynamicLibrary())
+ return FullPath;
+ FullPath.eraseSuffix();
+ FullPath.appendSuffix("a");
+ if (FullPath.isArchive())
+ return FullPath;
+ }
+ return sys::Path();
+}
+
+std::string Path::GetDLLSuffix() {
+ return LTDL_SHLIB_EXT;
+}
+
+bool
+Path::isBitcodeFile() const {
+ std::string actualMagic;
+ if (!getMagicNumber(actualMagic, 4))
+ return false;
+ LLVMFileType FT =
+ IdentifyFileType(actualMagic.c_str(),
+ static_cast<unsigned>(actualMagic.length()));
+ return FT == Bitcode_FileType;
+}
+
+bool Path::hasMagicNumber(const std::string &Magic) const {
+ std::string actualMagic;
+ if (getMagicNumber(actualMagic, static_cast<unsigned>(Magic.size())))
+ return Magic == actualMagic;
+ return false;
+}
+
+void Path::makeAbsolute() {
+ if (isAbsolute())
+ return;
+
+ Path CWD = Path::GetCurrentDirectory();
+ assert(CWD.isAbsolute() && "GetCurrentDirectory returned relative path!");
+
+ CWD.appendComponent(path);
+
+ path = CWD.toString();
+}
+
+static void getPathList(const char*path, std::vector<Path>& Paths) {
+ const char* at = path;
+ const char* delim = strchr(at, PathSeparator);
+ Path tmpPath;
+ while (delim != 0) {
+ std::string tmp(at, size_t(delim-at));
+ if (tmpPath.set(tmp))
+ if (tmpPath.canRead())
+ Paths.push_back(tmpPath);
+ at = delim + 1;
+ delim = strchr(at, PathSeparator);
+ }
+
+ if (*at != 0)
+ if (tmpPath.set(std::string(at)))
+ if (tmpPath.canRead())
+ Paths.push_back(tmpPath);
+}
+
+static std::string getDirnameCharSep(const std::string& path, char Sep) {
+
+ if (path.empty())
+ return ".";
+
+ // If the path is all slashes, return a single slash.
+ // Otherwise, remove all trailing slashes.
+
+ signed pos = static_cast<signed>(path.size()) - 1;
+
+ while (pos >= 0 && path[pos] == Sep)
+ --pos;
+
+ if (pos < 0)
+ return path[0] == Sep ? std::string(1, Sep) : std::string(".");
+
+ // Any slashes left?
+ signed i = 0;
+
+ while (i < pos && path[i] != Sep)
+ ++i;
+
+ if (i == pos) // No slashes? Return "."
+ return ".";
+
+ // There is at least one slash left. Remove all trailing non-slashes.
+ while (pos >= 0 && path[pos] != Sep)
+ --pos;
+
+ // Remove any trailing slashes.
+ while (pos >= 0 && path[pos] == Sep)
+ --pos;
+
+ if (pos < 0)
+ return path[0] == Sep ? std::string(1, Sep) : std::string(".");
+
+ return path.substr(0, pos+1);
+}
+
+// Include the truly platform-specific parts of this class.
+#if defined(LLVM_ON_UNIX)
+#include "Unix/Path.inc"
+#endif
+#if defined(LLVM_ON_WIN32)
+#include "Win32/Path.inc"
+#endif
+
diff --git a/lib/System/Process.cpp b/lib/System/Process.cpp
new file mode 100644
index 0000000..e93b2af
--- /dev/null
+++ b/lib/System/Process.cpp
@@ -0,0 +1,33 @@
+//===-- Process.cpp - Implement OS Process Concept --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file implements the operating system Process concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/System/Process.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Process.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Win32/Process.inc"
+#endif
diff --git a/lib/System/Program.cpp b/lib/System/Program.cpp
new file mode 100644
index 0000000..eb289d8
--- /dev/null
+++ b/lib/System/Program.cpp
@@ -0,0 +1,33 @@
+//===-- Program.cpp - Implement OS Program Concept --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file implements the operating system Program concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/System/Program.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Program.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Win32/Program.inc"
+#endif
diff --git a/lib/System/README.txt b/lib/System/README.txt
new file mode 100644
index 0000000..eacb200
--- /dev/null
+++ b/lib/System/README.txt
@@ -0,0 +1,43 @@
+Design Of lib/System
+====================
+
+The software in this directory is designed to completely shield LLVM from any
+and all operating system specific functionality. It is not intended to be a
+complete operating system wrapper (such as ACE), but only to provide the
+functionality necessary to support LLVM.
+
+The software located here, of necessity, has very specific and stringent design
+rules. Violation of these rules means that cracks in the shield could form and
+the primary goal of the library is defeated. By consistently using this library,
+LLVM becomes more easily ported to new platforms since the only thing requiring
+porting is this library.
+
+Complete documentation for the library can be found in the file:
+ llvm/docs/SystemLibrary.html
+or at this URL:
+ http://llvm.org/docs/SystemLibrary.html
+
+While we recommend that you read the more detailed documentation, for the
+impatient, here's a high level summary of the library's requirements.
+
+ 1. No system header files are to be exposed through the interface.
+ 2. Std C++ and Std C header files are okay to be exposed through the interface.
+ 3. No exposed system-specific functions.
+ 4. No exposed system-specific data.
+ 5. Data in lib/System classes must use only simple C++ intrinsic types.
+ 6. Errors are handled by returning "true" and setting an optional std::string
+ 7. Library must not throw any exceptions, period.
+ 8. Interface functions must not have throw() specifications.
+ 9. No duplicate function impementations are permitted within an operating
+ system class.
+
+To accomplish these requirements, the library has numerous design criteria that
+must be satisfied. Here's a high level summary of the library's design criteria:
+
+ 1. No unused functionality (only what LLVM needs)
+ 2. High-Level Interfaces
+ 3. Use Opaque Classes
+ 4. Common Implementations</a></li>
+ 5. Multiple Implementations</a></li>
+ 6. Minimize Memory Allocation</a></li>
+ 7. No Virtual Methods
diff --git a/lib/System/Signals.cpp b/lib/System/Signals.cpp
new file mode 100644
index 0000000..d345b0a
--- /dev/null
+++ b/lib/System/Signals.cpp
@@ -0,0 +1,34 @@
+//===- Signals.cpp - Signal Handling support --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for dealing with the possibility of
+// Unix signals occuring while your program is running.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/System/Signals.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Signals.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Win32/Signals.inc"
+#endif
diff --git a/lib/System/TimeValue.cpp b/lib/System/TimeValue.cpp
new file mode 100644
index 0000000..cf4984c
--- /dev/null
+++ b/lib/System/TimeValue.cpp
@@ -0,0 +1,58 @@
+//===-- TimeValue.cpp - Implement OS TimeValue Concept ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the operating system TimeValue concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/System/TimeValue.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+
+const TimeValue TimeValue::MinTime = TimeValue ( INT64_MIN,0 );
+const TimeValue TimeValue::MaxTime = TimeValue ( INT64_MAX,0 );
+const TimeValue TimeValue::ZeroTime = TimeValue ( 0,0 );
+const TimeValue TimeValue::PosixZeroTime = TimeValue ( -946684800,0 );
+const TimeValue TimeValue::Win32ZeroTime = TimeValue ( -12591158400ULL,0 );
+
+void
+TimeValue::normalize( void ) {
+ if ( nanos_ >= NANOSECONDS_PER_SECOND ) {
+ do {
+ seconds_++;
+ nanos_ -= NANOSECONDS_PER_SECOND;
+ } while ( nanos_ >= NANOSECONDS_PER_SECOND );
+ } else if (nanos_ <= -NANOSECONDS_PER_SECOND ) {
+ do {
+ seconds_--;
+ nanos_ += NANOSECONDS_PER_SECOND;
+ } while (nanos_ <= -NANOSECONDS_PER_SECOND);
+ }
+
+ if (seconds_ >= 1 && nanos_ < 0) {
+ seconds_--;
+ nanos_ += NANOSECONDS_PER_SECOND;
+ } else if (seconds_ < 0 && nanos_ > 0) {
+ seconds_++;
+ nanos_ -= NANOSECONDS_PER_SECOND;
+ }
+}
+
+}
+
+/// Include the platform specific portion of TimeValue class
+#ifdef LLVM_ON_UNIX
+#include "Unix/TimeValue.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Win32/TimeValue.inc"
+#endif
+
diff --git a/lib/System/Unix/Alarm.inc b/lib/System/Unix/Alarm.inc
new file mode 100644
index 0000000..28ff1b8
--- /dev/null
+++ b/lib/System/Unix/Alarm.inc
@@ -0,0 +1,72 @@
+//===-- Alarm.inc - Implement Unix Alarm Support ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the UNIX Alarm support.
+//
+//===----------------------------------------------------------------------===//
+
+#include <signal.h>
+#include <unistd.h>
+#include <cassert>
+using namespace llvm;
+
+/// AlarmCancelled - This flag is set by the SIGINT signal handler if the
+/// user presses CTRL-C.
+static volatile bool AlarmCancelled = false;
+
+/// AlarmTriggered - This flag is set by the SIGALRM signal handler if the
+/// alarm was triggered.
+static volatile bool AlarmTriggered = false;
+
+/// NestedSOI - Sanity check. Alarms cannot be nested or run in parallel.
+/// This ensures that they never do.
+static bool NestedSOI = false;
+
+static RETSIGTYPE SigIntHandler(int Sig) {
+ AlarmCancelled = true;
+ signal(SIGINT, SigIntHandler);
+}
+
+static RETSIGTYPE SigAlarmHandler(int Sig) {
+ AlarmTriggered = true;
+}
+
+static void (*OldSigIntHandler) (int);
+
+void sys::SetupAlarm(unsigned seconds) {
+ assert(!NestedSOI && "sys::SetupAlarm calls cannot be nested!");
+ NestedSOI = true;
+ AlarmCancelled = false;
+ AlarmTriggered = false;
+ ::signal(SIGALRM, SigAlarmHandler);
+ OldSigIntHandler = ::signal(SIGINT, SigIntHandler);
+ ::alarm(seconds);
+}
+
+void sys::TerminateAlarm() {
+ assert(NestedSOI && "sys::TerminateAlarm called without sys::SetupAlarm!");
+ ::alarm(0);
+ ::signal(SIGALRM, SIG_DFL);
+ ::signal(SIGINT, OldSigIntHandler);
+ AlarmCancelled = false;
+ AlarmTriggered = false;
+ NestedSOI = false;
+}
+
+int sys::AlarmStatus() {
+ if (AlarmCancelled)
+ return -1;
+ if (AlarmTriggered)
+ return 1;
+ return 0;
+}
+
+void Sleep(unsigned n) {
+ ::sleep(n);
+}
diff --git a/lib/System/Unix/Host.inc b/lib/System/Unix/Host.inc
new file mode 100644
index 0000000..fb319fd
--- /dev/null
+++ b/lib/System/Unix/Host.inc
@@ -0,0 +1,58 @@
+ //===- llvm/System/Unix/Host.inc -------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the UNIX Host support.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include <llvm/Config/config.h>
+#include "Unix.h"
+#include <sys/utsname.h>
+#include <string>
+
+using namespace llvm;
+
+static std::string getOSVersion() {
+ struct utsname info;
+
+ if (uname(&info))
+ return "";
+
+ return info.release;
+}
+
+std::string sys::getHostTriple() {
+ // FIXME: Derive more directly instead of relying on the autoconf
+ // generated variable.
+
+ std::string Triple = LLVM_HOSTTRIPLE;
+
+ // Force i<N>86 to i386.
+ if (Triple[0] == 'i' && isdigit(Triple[1]) &&
+ Triple[2] == '8' && Triple[3] == '6')
+ Triple[1] = '3';
+
+ // On darwin, we want to update the version to match that of the
+ // host.
+ std::string::size_type DarwinDashIdx = Triple.find("-darwin");
+ if (DarwinDashIdx != std::string::npos) {
+ Triple.resize(DarwinDashIdx + strlen("-darwin"));
+
+ // Only add the major part of the os version.
+ std::string Version = getOSVersion();
+ Triple += Version.substr(0, Version.find('.'));
+ }
+
+ return Triple;
+}
diff --git a/lib/System/Unix/Memory.inc b/lib/System/Unix/Memory.inc
new file mode 100644
index 0000000..b7a7013
--- /dev/null
+++ b/lib/System/Unix/Memory.inc
@@ -0,0 +1,150 @@
+//===- Unix/Memory.cpp - Generic UNIX System Configuration ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some functions for various memory management utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#include "llvm/System/Process.h"
+
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+
+#ifdef __APPLE__
+#include <mach/mach.h>
+#endif
+
+/// AllocateRWX - Allocate a slab of memory with read/write/execute
+/// permissions. This is typically used for JIT applications where we want
+/// to emit code to the memory then jump to it. Getting this type of memory
+/// is very OS specific.
+///
+llvm::sys::MemoryBlock
+llvm::sys::Memory::AllocateRWX(unsigned NumBytes, const MemoryBlock* NearBlock,
+ std::string *ErrMsg) {
+ if (NumBytes == 0) return MemoryBlock();
+
+ unsigned pageSize = Process::GetPageSize();
+ unsigned NumPages = (NumBytes+pageSize-1)/pageSize;
+
+ int fd = -1;
+#ifdef NEED_DEV_ZERO_FOR_MMAP
+ static int zero_fd = open("/dev/zero", O_RDWR);
+ if (zero_fd == -1) {
+ MakeErrMsg(ErrMsg, "Can't open /dev/zero device");
+ return MemoryBlock();
+ }
+ fd = zero_fd;
+#endif
+
+ int flags = MAP_PRIVATE |
+#ifdef HAVE_MMAP_ANONYMOUS
+ MAP_ANONYMOUS
+#else
+ MAP_ANON
+#endif
+ ;
+
+ void* start = NearBlock ? (unsigned char*)NearBlock->base() +
+ NearBlock->size() : 0;
+
+#if defined(__APPLE__) && defined(__arm__)
+ void *pa = ::mmap(start, pageSize*NumPages, PROT_READ|PROT_EXEC,
+ flags, fd, 0);
+#else
+ void *pa = ::mmap(start, pageSize*NumPages, PROT_READ|PROT_WRITE|PROT_EXEC,
+ flags, fd, 0);
+#endif
+ if (pa == MAP_FAILED) {
+ if (NearBlock) //Try again without a near hint
+ return AllocateRWX(NumBytes, 0);
+
+ MakeErrMsg(ErrMsg, "Can't allocate RWX Memory");
+ return MemoryBlock();
+ }
+
+#if defined(__APPLE__) && defined(__arm__)
+ kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)pa,
+ (vm_size_t)(pageSize*NumPages), 0,
+ VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
+ if (KERN_SUCCESS != kr) {
+ MakeErrMsg(ErrMsg, "vm_protect max RX failed");
+ return sys::MemoryBlock();
+ }
+
+ kr = vm_protect(mach_task_self(), (vm_address_t)pa,
+ (vm_size_t)(pageSize*NumPages), 0,
+ VM_PROT_READ | VM_PROT_WRITE);
+ if (KERN_SUCCESS != kr) {
+ MakeErrMsg(ErrMsg, "vm_protect RW failed");
+ return sys::MemoryBlock();
+ }
+#endif
+
+ MemoryBlock result;
+ result.Address = pa;
+ result.Size = NumPages*pageSize;
+
+ return result;
+}
+
+bool llvm::sys::Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) {
+ if (M.Address == 0 || M.Size == 0) return false;
+ if (0 != ::munmap(M.Address, M.Size))
+ return MakeErrMsg(ErrMsg, "Can't release RWX Memory");
+ return false;
+}
+
+bool llvm::sys::Memory::setWritable (MemoryBlock &M, std::string *ErrMsg) {
+#if defined(__APPLE__) && defined(__arm__)
+ if (M.Address == 0 || M.Size == 0) return false;
+ sys::Memory::InvalidateInstructionCache(M.Address, M.Size);
+ kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
+ (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_WRITE);
+ return KERN_SUCCESS == kr;
+#else
+ return true;
+#endif
+}
+
+bool llvm::sys::Memory::setExecutable (MemoryBlock &M, std::string *ErrMsg) {
+#if defined(__APPLE__) && defined(__arm__)
+ if (M.Address == 0 || M.Size == 0) return false;
+ sys::Memory::InvalidateInstructionCache(M.Address, M.Size);
+ kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
+ (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
+ return KERN_SUCCESS == kr;
+#else
+ return false;
+#endif
+}
+
+bool llvm::sys::Memory::setRangeWritable(const void *Addr, size_t Size) {
+#if defined(__APPLE__) && defined(__arm__)
+ kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr,
+ (vm_size_t)Size, 0,
+ VM_PROT_READ | VM_PROT_WRITE);
+ return KERN_SUCCESS == kr;
+#else
+ return true;
+#endif
+}
+
+bool llvm::sys::Memory::setRangeExecutable(const void *Addr, size_t Size) {
+#if defined(__APPLE__) && defined(__arm__)
+ kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr,
+ (vm_size_t)Size, 0,
+ VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
+ return KERN_SUCCESS == kr;
+#else
+ return true;
+#endif
+}
diff --git a/lib/System/Unix/Mutex.inc b/lib/System/Unix/Mutex.inc
new file mode 100644
index 0000000..4a015a6
--- /dev/null
+++ b/lib/System/Unix/Mutex.inc
@@ -0,0 +1,49 @@
+//===- llvm/System/Unix/Mutex.inc - Unix Mutex Implementation ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific (non-pthread) Mutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+namespace llvm
+{
+using namespace sys;
+
+Mutex::Mutex( bool recursive)
+{
+}
+
+Mutex::~Mutex()
+{
+}
+
+bool
+Mutex::acquire()
+{
+ return true;
+}
+
+bool
+Mutex::release()
+{
+ return true;
+}
+
+bool
+Mutex::tryacquire( void )
+{
+ return true;
+}
+
+}
diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc
new file mode 100644
index 0000000..d5edee1
--- /dev/null
+++ b/lib/System/Unix/Path.inc
@@ -0,0 +1,876 @@
+//===- llvm/System/Unix/Path.cpp - Unix Path Implementation -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific portion of the Path class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/alloca.h"
+#include "Unix.h"
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_UTIME_H
+#include <utime.h>
+#endif
+#if HAVE_TIME_H
+#include <time.h>
+#endif
+#if HAVE_DIRENT_H
+# include <dirent.h>
+# define NAMLEN(dirent) strlen((dirent)->d_name)
+#else
+# define dirent direct
+# define NAMLEN(dirent) (dirent)->d_namlen
+# if HAVE_SYS_NDIR_H
+# include <sys/ndir.h>
+# endif
+# if HAVE_SYS_DIR_H
+# include <sys/dir.h>
+# endif
+# if HAVE_NDIR_H
+# include <ndir.h>
+# endif
+#endif
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+// Put in a hack for Cygwin which falsely reports that the mkdtemp function
+// is available when it is not.
+#ifdef __CYGWIN__
+# undef HAVE_MKDTEMP
+#endif
+
+namespace {
+inline bool lastIsSlash(const std::string& path) {
+ return !path.empty() && path[path.length() - 1] == '/';
+}
+
+}
+
+namespace llvm {
+using namespace sys;
+
+extern const char sys::PathSeparator = ':';
+
+Path::Path(const std::string& p)
+ : path(p) {}
+
+Path::Path(const char *StrStart, unsigned StrLen)
+ : path(StrStart, StrLen) {}
+
+Path&
+Path::operator=(const std::string &that) {
+ path = that;
+ return *this;
+}
+
+bool
+Path::isValid() const {
+ // Check some obvious things
+ if (path.empty())
+ return false;
+ else if (path.length() >= MAXPATHLEN)
+ return false;
+
+ // Check that the characters are ascii chars
+ size_t len = path.length();
+ unsigned i = 0;
+ while (i < len && isascii(path[i]))
+ ++i;
+ return i >= len;
+}
+
+bool
+Path::isAbsolute() const {
+ if (path.empty())
+ return false;
+ return path[0] == '/';
+}
+Path
+Path::GetRootDirectory() {
+ Path result;
+ result.set("/");
+ return result;
+}
+
+Path
+Path::GetTemporaryDirectory(std::string *ErrMsg) {
+#if defined(HAVE_MKDTEMP)
+ // The best way is with mkdtemp but that's not available on many systems,
+ // Linux and FreeBSD have it. Others probably won't.
+ char pathname[MAXPATHLEN];
+ strcpy(pathname,"/tmp/llvm_XXXXXX");
+ if (0 == mkdtemp(pathname)) {
+ MakeErrMsg(ErrMsg,
+ std::string(pathname) + ": can't create temporary directory");
+ return Path();
+ }
+ Path result;
+ result.set(pathname);
+ assert(result.isValid() && "mkdtemp didn't create a valid pathname!");
+ return result;
+#elif defined(HAVE_MKSTEMP)
+ // If no mkdtemp is available, mkstemp can be used to create a temporary file
+ // which is then removed and created as a directory. We prefer this over
+ // mktemp because of mktemp's inherent security and threading risks. We still
+ // have a slight race condition from the time the temporary file is created to
+ // the time it is re-created as a directoy.
+ char pathname[MAXPATHLEN];
+ strcpy(pathname, "/tmp/llvm_XXXXXX");
+ int fd = 0;
+ if (-1 == (fd = mkstemp(pathname))) {
+ MakeErrMsg(ErrMsg,
+ std::string(pathname) + ": can't create temporary directory");
+ return Path();
+ }
+ ::close(fd);
+ ::unlink(pathname); // start race condition, ignore errors
+ if (-1 == ::mkdir(pathname, S_IRWXU)) { // end race condition
+ MakeErrMsg(ErrMsg,
+ std::string(pathname) + ": can't create temporary directory");
+ return Path();
+ }
+ Path result;
+ result.set(pathname);
+ assert(result.isValid() && "mkstemp didn't create a valid pathname!");
+ return result;
+#elif defined(HAVE_MKTEMP)
+ // If a system doesn't have mkdtemp(3) or mkstemp(3) but it does have
+ // mktemp(3) then we'll assume that system (e.g. AIX) has a reasonable
+ // implementation of mktemp(3) and doesn't follow BSD 4.3's lead of replacing
+ // the XXXXXX with the pid of the process and a letter. That leads to only
+ // twenty six temporary files that can be generated.
+ char pathname[MAXPATHLEN];
+ strcpy(pathname, "/tmp/llvm_XXXXXX");
+ char *TmpName = ::mktemp(pathname);
+ if (TmpName == 0) {
+ MakeErrMsg(ErrMsg,
+ std::string(TmpName) + ": can't create unique directory name");
+ return Path();
+ }
+ if (-1 == ::mkdir(TmpName, S_IRWXU)) {
+ MakeErrMsg(ErrMsg,
+ std::string(TmpName) + ": can't create temporary directory");
+ return Path();
+ }
+ Path result;
+ result.set(TmpName);
+ assert(result.isValid() && "mktemp didn't create a valid pathname!");
+ return result;
+#else
+ // This is the worst case implementation. tempnam(3) leaks memory unless its
+ // on an SVID2 (or later) system. On BSD 4.3 it leaks. tmpnam(3) has thread
+ // issues. The mktemp(3) function doesn't have enough variability in the
+ // temporary name generated. So, we provide our own implementation that
+ // increments an integer from a random number seeded by the current time. This
+ // should be sufficiently unique that we don't have many collisions between
+ // processes. Generally LLVM processes don't run very long and don't use very
+ // many temporary files so this shouldn't be a big issue for LLVM.
+ static time_t num = ::time(0);
+ char pathname[MAXPATHLEN];
+ do {
+ num++;
+ sprintf(pathname, "/tmp/llvm_%010u", unsigned(num));
+ } while ( 0 == access(pathname, F_OK ) );
+ if (-1 == ::mkdir(pathname, S_IRWXU)) {
+ MakeErrMsg(ErrMsg,
+ std::string(pathname) + ": can't create temporary directory");
+ return Path();
+ }
+ Path result;
+ result.set(pathname);
+ assert(result.isValid() && "mkstemp didn't create a valid pathname!");
+ return result;
+#endif
+}
+
+void
+Path::GetSystemLibraryPaths(std::vector<sys::Path>& Paths) {
+#ifdef LTDL_SHLIBPATH_VAR
+ char* env_var = getenv(LTDL_SHLIBPATH_VAR);
+ if (env_var != 0) {
+ getPathList(env_var,Paths);
+ }
+#endif
+ // FIXME: Should this look at LD_LIBRARY_PATH too?
+ Paths.push_back(sys::Path("/usr/local/lib/"));
+ Paths.push_back(sys::Path("/usr/X11R6/lib/"));
+ Paths.push_back(sys::Path("/usr/lib/"));
+ Paths.push_back(sys::Path("/lib/"));
+}
+
+void
+Path::GetBitcodeLibraryPaths(std::vector<sys::Path>& Paths) {
+ char * env_var = getenv("LLVM_LIB_SEARCH_PATH");
+ if (env_var != 0) {
+ getPathList(env_var,Paths);
+ }
+#ifdef LLVM_LIBDIR
+ {
+ Path tmpPath;
+ if (tmpPath.set(LLVM_LIBDIR))
+ if (tmpPath.canRead())
+ Paths.push_back(tmpPath);
+ }
+#endif
+ GetSystemLibraryPaths(Paths);
+}
+
+Path
+Path::GetLLVMDefaultConfigDir() {
+ return Path("/etc/llvm/");
+}
+
+Path
+Path::GetUserHomeDirectory() {
+ const char* home = getenv("HOME");
+ if (home) {
+ Path result;
+ if (result.set(home))
+ return result;
+ }
+ return GetRootDirectory();
+}
+
+Path
+Path::GetCurrentDirectory() {
+ char pathname[MAXPATHLEN];
+ if (!getcwd(pathname,MAXPATHLEN)) {
+ assert (false && "Could not query current working directory.");
+ return Path("");
+ }
+
+ return Path(pathname);
+}
+
+#ifdef __FreeBSD__
+static int
+test_dir(char buf[PATH_MAX], char ret[PATH_MAX],
+ const char *dir, const char *bin)
+{
+ struct stat sb;
+
+ snprintf(buf, PATH_MAX, "%s//%s", dir, bin);
+ if (realpath(buf, ret) == NULL)
+ return (1);
+ if (stat(buf, &sb) != 0)
+ return (1);
+
+ return (0);
+}
+
+static char *
+getprogpath(char ret[PATH_MAX], const char *bin)
+{
+ char *pv, *s, *t, buf[PATH_MAX];
+
+ /* First approach: absolute path. */
+ if (bin[0] == '/') {
+ if (test_dir(buf, ret, "/", bin) == 0)
+ return (ret);
+ return (NULL);
+ }
+
+ /* Second approach: relative path. */
+ if (strchr(bin, '/') != NULL) {
+ if (getcwd(buf, PATH_MAX) == NULL)
+ return (NULL);
+ if (test_dir(buf, ret, buf, bin) == 0)
+ return (ret);
+ return (NULL);
+ }
+
+ /* Third approach: $PATH */
+ if ((pv = getenv("PATH")) == NULL)
+ return (NULL);
+ s = pv = strdup(pv);
+ if (pv == NULL)
+ return (NULL);
+ while ((t = strsep(&s, ":")) != NULL) {
+ if (test_dir(buf, ret, t, bin) == 0) {
+ free(pv);
+ return (ret);
+ }
+ }
+ free(pv);
+ return (NULL);
+}
+#endif
+
+/// GetMainExecutable - Return the path to the main executable, given the
+/// value of argv[0] from program startup.
+Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
+#if defined(__FreeBSD__)
+ char exe_path[PATH_MAX];
+
+ if (getprogpath(exe_path, argv0) != NULL)
+ return Path(std::string(exe_path));
+#elif defined(__linux__) || defined(__CYGWIN__)
+ char exe_path[MAXPATHLEN];
+ ssize_t len = readlink("/proc/self/exe", exe_path, sizeof(exe_path));
+ if (len > 0 && len < MAXPATHLEN - 1) {
+ exe_path[len] = '\0';
+ return Path(std::string(exe_path));
+ }
+#elif defined(HAVE_DLFCN_H)
+ // Use dladdr to get executable path if available.
+ Dl_info DLInfo;
+ int err = dladdr(MainAddr, &DLInfo);
+ if (err == 0)
+ return Path();
+
+ // If the filename is a symlink, we need to resolve and return the location of
+ // the actual executable.
+ char link_path[MAXPATHLEN];
+ return Path(std::string(realpath(DLInfo.dli_fname, link_path)));
+#endif
+ return Path();
+}
+
+
+std::string Path::getDirname() const {
+ return getDirnameCharSep(path, '/');
+}
+
+std::string
+Path::getBasename() const {
+ // Find the last slash
+ std::string::size_type slash = path.rfind('/');
+ if (slash == std::string::npos)
+ slash = 0;
+ else
+ slash++;
+
+ std::string::size_type dot = path.rfind('.');
+ if (dot == std::string::npos || dot < slash)
+ return path.substr(slash);
+ else
+ return path.substr(slash, dot - slash);
+}
+
+std::string
+Path::getSuffix() const {
+ // Find the last slash
+ std::string::size_type slash = path.rfind('/');
+ if (slash == std::string::npos)
+ slash = 0;
+ else
+ slash++;
+
+ std::string::size_type dot = path.rfind('.');
+ if (dot == std::string::npos || dot < slash)
+ return std::string();
+ else
+ return path.substr(dot + 1);
+}
+
+bool Path::getMagicNumber(std::string& Magic, unsigned len) const {
+ assert(len < 1024 && "Request for magic string too long");
+ char* buf = (char*) alloca(1 + len);
+ int fd = ::open(path.c_str(), O_RDONLY);
+ if (fd < 0)
+ return false;
+ ssize_t bytes_read = ::read(fd, buf, len);
+ ::close(fd);
+ if (ssize_t(len) != bytes_read) {
+ Magic.clear();
+ return false;
+ }
+ Magic.assign(buf,len);
+ return true;
+}
+
+bool
+Path::exists() const {
+ return 0 == access(path.c_str(), F_OK );
+}
+
+bool
+Path::isDirectory() const {
+ struct stat buf;
+ if (0 != stat(path.c_str(), &buf))
+ return false;
+ return buf.st_mode & S_IFDIR ? true : false;
+}
+
+bool
+Path::canRead() const {
+ return 0 == access(path.c_str(), F_OK | R_OK );
+}
+
+bool
+Path::canWrite() const {
+ return 0 == access(path.c_str(), F_OK | W_OK );
+}
+
+bool
+Path::canExecute() const {
+ if (0 != access(path.c_str(), R_OK | X_OK ))
+ return false;
+ struct stat buf;
+ if (0 != stat(path.c_str(), &buf))
+ return false;
+ if (!S_ISREG(buf.st_mode))
+ return false;
+ return true;
+}
+
+std::string
+Path::getLast() const {
+ // Find the last slash
+ size_t pos = path.rfind('/');
+
+ // Handle the corner cases
+ if (pos == std::string::npos)
+ return path;
+
+ // If the last character is a slash
+ if (pos == path.length()-1) {
+ // Find the second to last slash
+ size_t pos2 = path.rfind('/', pos-1);
+ if (pos2 == std::string::npos)
+ return path.substr(0,pos);
+ else
+ return path.substr(pos2+1,pos-pos2-1);
+ }
+ // Return everything after the last slash
+ return path.substr(pos+1);
+}
+
+const FileStatus *
+PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const {
+ if (!fsIsValid || update) {
+ struct stat buf;
+ if (0 != stat(path.c_str(), &buf)) {
+ MakeErrMsg(ErrStr, path + ": can't get status of file");
+ return 0;
+ }
+ status.fileSize = buf.st_size;
+ status.modTime.fromEpochTime(buf.st_mtime);
+ status.mode = buf.st_mode;
+ status.user = buf.st_uid;
+ status.group = buf.st_gid;
+ status.uniqueID = uint64_t(buf.st_ino);
+ status.isDir = S_ISDIR(buf.st_mode);
+ status.isFile = S_ISREG(buf.st_mode);
+ fsIsValid = true;
+ }
+ return &status;
+}
+
+static bool AddPermissionBits(const Path &File, int bits) {
+ // Get the umask value from the operating system. We want to use it
+ // when changing the file's permissions. Since calling umask() sets
+ // the umask and returns its old value, we must call it a second
+ // time to reset it to the user's preference.
+ int mask = umask(0777); // The arg. to umask is arbitrary.
+ umask(mask); // Restore the umask.
+
+ // Get the file's current mode.
+ struct stat buf;
+ if (0 != stat(File.toString().c_str(), &buf))
+ return false;
+ // Change the file to have whichever permissions bits from 'bits'
+ // that the umask would not disable.
+ if ((chmod(File.c_str(), (buf.st_mode | (bits & ~mask)))) == -1)
+ return false;
+ return true;
+}
+
+bool Path::makeReadableOnDisk(std::string* ErrMsg) {
+ if (!AddPermissionBits(*this, 0444))
+ return MakeErrMsg(ErrMsg, path + ": can't make file readable");
+ return false;
+}
+
+bool Path::makeWriteableOnDisk(std::string* ErrMsg) {
+ if (!AddPermissionBits(*this, 0222))
+ return MakeErrMsg(ErrMsg, path + ": can't make file writable");
+ return false;
+}
+
+bool Path::makeExecutableOnDisk(std::string* ErrMsg) {
+ if (!AddPermissionBits(*this, 0111))
+ return MakeErrMsg(ErrMsg, path + ": can't make file executable");
+ return false;
+}
+
+bool
+Path::getDirectoryContents(std::set<Path>& result, std::string* ErrMsg) const {
+ DIR* direntries = ::opendir(path.c_str());
+ if (direntries == 0)
+ return MakeErrMsg(ErrMsg, path + ": can't open directory");
+
+ std::string dirPath = path;
+ if (!lastIsSlash(dirPath))
+ dirPath += '/';
+
+ result.clear();
+ struct dirent* de = ::readdir(direntries);
+ for ( ; de != 0; de = ::readdir(direntries)) {
+ if (de->d_name[0] != '.') {
+ Path aPath(dirPath + (const char*)de->d_name);
+ struct stat st;
+ if (0 != lstat(aPath.path.c_str(), &st)) {
+ if (S_ISLNK(st.st_mode))
+ continue; // dangling symlink -- ignore
+ return MakeErrMsg(ErrMsg,
+ aPath.path + ": can't determine file object type");
+ }
+ result.insert(aPath);
+ }
+ }
+
+ closedir(direntries);
+ return false;
+}
+
+bool
+Path::set(const std::string& a_path) {
+ if (a_path.empty())
+ return false;
+ std::string save(path);
+ path = a_path;
+ if (!isValid()) {
+ path = save;
+ return false;
+ }
+ return true;
+}
+
+bool
+Path::appendComponent(const std::string& name) {
+ if (name.empty())
+ return false;
+ std::string save(path);
+ if (!lastIsSlash(path))
+ path += '/';
+ path += name;
+ if (!isValid()) {
+ path = save;
+ return false;
+ }
+ return true;
+}
+
+bool
+Path::eraseComponent() {
+ size_t slashpos = path.rfind('/',path.size());
+ if (slashpos == 0 || slashpos == std::string::npos) {
+ path.erase();
+ return true;
+ }
+ if (slashpos == path.size() - 1)
+ slashpos = path.rfind('/',slashpos-1);
+ if (slashpos == std::string::npos) {
+ path.erase();
+ return true;
+ }
+ path.erase(slashpos);
+ return true;
+}
+
+bool
+Path::appendSuffix(const std::string& suffix) {
+ std::string save(path);
+ path.append(".");
+ path.append(suffix);
+ if (!isValid()) {
+ path = save;
+ return false;
+ }
+ return true;
+}
+
+bool
+Path::eraseSuffix() {
+ std::string save = path;
+ size_t dotpos = path.rfind('.',path.size());
+ size_t slashpos = path.rfind('/',path.size());
+ if (dotpos != std::string::npos) {
+ if (slashpos == std::string::npos || dotpos > slashpos+1) {
+ path.erase(dotpos, path.size()-dotpos);
+ return true;
+ }
+ }
+ if (!isValid())
+ path = save;
+ return false;
+}
+
+static bool createDirectoryHelper(char* beg, char* end, bool create_parents) {
+
+ if (access(beg, F_OK | R_OK | W_OK) == 0)
+ return false;
+
+ if (create_parents) {
+
+ char* c = end;
+
+ for (; c != beg; --c)
+ if (*c == '/') {
+
+ // Recurse to handling the parent directory.
+ *c = '\0';
+ bool x = createDirectoryHelper(beg, c, create_parents);
+ *c = '/';
+
+ // Return if we encountered an error.
+ if (x)
+ return true;
+
+ break;
+ }
+ }
+
+ return mkdir(beg, S_IRWXU | S_IRWXG) != 0;
+}
+
+bool
+Path::createDirectoryOnDisk( bool create_parents, std::string* ErrMsg ) {
+ // Get a writeable copy of the path name
+ char pathname[MAXPATHLEN];
+ path.copy(pathname,MAXPATHLEN);
+
+ // Null-terminate the last component
+ size_t lastchar = path.length() - 1 ;
+
+ if (pathname[lastchar] != '/')
+ ++lastchar;
+
+ pathname[lastchar] = 0;
+
+ if (createDirectoryHelper(pathname, pathname+lastchar, create_parents))
+ return MakeErrMsg(ErrMsg,
+ std::string(pathname) + ": can't create directory");
+
+ return false;
+}
+
+bool
+Path::createFileOnDisk(std::string* ErrMsg) {
+ // Create the file
+ int fd = ::creat(path.c_str(), S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ return MakeErrMsg(ErrMsg, path + ": can't create file");
+ ::close(fd);
+ return false;
+}
+
+bool
+Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) {
+ // Make this into a unique file name
+ if (makeUnique( reuse_current, ErrMsg ))
+ return true;
+
+ // create the file
+ int fd = ::open(path.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666);
+ if (fd < 0)
+ return MakeErrMsg(ErrMsg, path + ": can't create temporary file");
+ ::close(fd);
+ return false;
+}
+
+bool
+Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
+ // Get the status so we can determin if its a file or directory
+ struct stat buf;
+ if (0 != stat(path.c_str(), &buf)) {
+ MakeErrMsg(ErrStr, path + ": can't get status of file");
+ return true;
+ }
+
+ // Note: this check catches strange situations. In all cases, LLVM should
+ // only be involved in the creation and deletion of regular files. This
+ // check ensures that what we're trying to erase is a regular file. It
+ // effectively prevents LLVM from erasing things like /dev/null, any block
+ // special file, or other things that aren't "regular" files.
+ if (S_ISREG(buf.st_mode)) {
+ if (unlink(path.c_str()) != 0)
+ return MakeErrMsg(ErrStr, path + ": can't destroy file");
+ return false;
+ }
+
+ if (!S_ISDIR(buf.st_mode)) {
+ if (ErrStr) *ErrStr = "not a file or directory";
+ return true;
+ }
+
+ if (remove_contents) {
+ // Recursively descend the directory to remove its contents.
+ std::string cmd = "/bin/rm -rf " + path;
+ if (system(cmd.c_str()) != 0) {
+ MakeErrMsg(ErrStr, path + ": failed to recursively remove directory.");
+ return true;
+ }
+ return false;
+ }
+
+ // Otherwise, try to just remove the one directory.
+ char pathname[MAXPATHLEN];
+ path.copy(pathname, MAXPATHLEN);
+ size_t lastchar = path.length() - 1;
+ if (pathname[lastchar] == '/')
+ pathname[lastchar] = 0;
+ else
+ pathname[lastchar+1] = 0;
+
+ if (rmdir(pathname) != 0)
+ return MakeErrMsg(ErrStr,
+ std::string(pathname) + ": can't erase directory");
+ return false;
+}
+
+bool
+Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) {
+ if (0 != ::rename(path.c_str(), newName.c_str()))
+ return MakeErrMsg(ErrMsg, std::string("can't rename '") + path + "' as '" +
+ newName.toString() + "'");
+ return false;
+}
+
+bool
+Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrStr) const {
+ struct utimbuf utb;
+ utb.actime = si.modTime.toPosixTime();
+ utb.modtime = utb.actime;
+ if (0 != ::utime(path.c_str(),&utb))
+ return MakeErrMsg(ErrStr, path + ": can't set file modification time");
+ if (0 != ::chmod(path.c_str(),si.mode))
+ return MakeErrMsg(ErrStr, path + ": can't set mode");
+ return false;
+}
+
+bool
+sys::CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg){
+ int inFile = -1;
+ int outFile = -1;
+ inFile = ::open(Src.c_str(), O_RDONLY);
+ if (inFile == -1)
+ return MakeErrMsg(ErrMsg, Src.toString() +
+ ": can't open source file to copy");
+
+ outFile = ::open(Dest.c_str(), O_WRONLY|O_CREAT, 0666);
+ if (outFile == -1) {
+ ::close(inFile);
+ return MakeErrMsg(ErrMsg, Dest.toString() +
+ ": can't create destination file for copy");
+ }
+
+ char Buffer[16*1024];
+ while (ssize_t Amt = ::read(inFile, Buffer, 16*1024)) {
+ if (Amt == -1) {
+ if (errno != EINTR && errno != EAGAIN) {
+ ::close(inFile);
+ ::close(outFile);
+ return MakeErrMsg(ErrMsg, Src.toString()+": can't read source file");
+ }
+ } else {
+ char *BufPtr = Buffer;
+ while (Amt) {
+ ssize_t AmtWritten = ::write(outFile, BufPtr, Amt);
+ if (AmtWritten == -1) {
+ if (errno != EINTR && errno != EAGAIN) {
+ ::close(inFile);
+ ::close(outFile);
+ return MakeErrMsg(ErrMsg, Dest.toString() +
+ ": can't write destination file");
+ }
+ } else {
+ Amt -= AmtWritten;
+ BufPtr += AmtWritten;
+ }
+ }
+ }
+ }
+ ::close(inFile);
+ ::close(outFile);
+ return false;
+}
+
+bool
+Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
+ if (reuse_current && !exists())
+ return false; // File doesn't exist already, just use it!
+
+ // Append an XXXXXX pattern to the end of the file for use with mkstemp,
+ // mktemp or our own implementation.
+ char *FNBuffer = (char*) alloca(path.size()+8);
+ path.copy(FNBuffer,path.size());
+ if (isDirectory())
+ strcpy(FNBuffer+path.size(), "/XXXXXX");
+ else
+ strcpy(FNBuffer+path.size(), "-XXXXXX");
+
+#if defined(HAVE_MKSTEMP)
+ int TempFD;
+ if ((TempFD = mkstemp(FNBuffer)) == -1)
+ return MakeErrMsg(ErrMsg, path + ": can't make unique filename");
+
+ // We don't need to hold the temp file descriptor... we will trust that no one
+ // will overwrite/delete the file before we can open it again.
+ close(TempFD);
+
+ // Save the name
+ path = FNBuffer;
+#elif defined(HAVE_MKTEMP)
+ // If we don't have mkstemp, use the old and obsolete mktemp function.
+ if (mktemp(FNBuffer) == 0)
+ return MakeErrMsg(ErrMsg, path + ": can't make unique filename");
+
+ // Save the name
+ path = FNBuffer;
+#else
+ // Okay, looks like we have to do it all by our lonesome.
+ static unsigned FCounter = 0;
+ unsigned offset = path.size() + 1;
+ while ( FCounter < 999999 && exists()) {
+ sprintf(FNBuffer+offset,"%06u",++FCounter);
+ path = FNBuffer;
+ }
+ if (FCounter > 999999)
+ return MakeErrMsg(ErrMsg,
+ path + ": can't make unique filename: too many files");
+#endif
+ return false;
+}
+
+const char *Path::MapInFilePages(int FD, uint64_t FileSize) {
+ int Flags = MAP_PRIVATE;
+#ifdef MAP_FILE
+ Flags |= MAP_FILE;
+#endif
+ void *BasePtr = ::mmap(0, FileSize, PROT_READ, Flags, FD, 0);
+ if (BasePtr == MAP_FAILED)
+ return 0;
+ return (const char*)BasePtr;
+}
+
+void Path::UnMapFilePages(const char *BasePtr, uint64_t FileSize) {
+ ::munmap((void*)BasePtr, FileSize);
+}
+
+} // end llvm namespace
diff --git a/lib/System/Unix/Process.inc b/lib/System/Unix/Process.inc
new file mode 100644
index 0000000..74b9bb8
--- /dev/null
+++ b/lib/System/Unix/Process.inc
@@ -0,0 +1,237 @@
+//===- Unix/Process.cpp - Unix Process Implementation --------- -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the generic Unix implementation of the Process class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#ifdef HAVE_SYS_RESOURCE_H
+#include <sys/resource.h>
+#endif
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif
+#ifdef HAVE_MALLOC_MALLOC_H
+#include <malloc/malloc.h>
+#endif
+#ifdef HAVE_SYS_IOCTL_H
+# include <sys/ioctl.h>
+#endif
+#ifdef HAVE_TERMIOS_H
+# include <termios.h>
+#endif
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+using namespace llvm;
+using namespace sys;
+
+unsigned
+Process::GetPageSize()
+{
+#if defined(__CYGWIN__)
+ // On Cygwin, getpagesize() returns 64k but the page size for the purposes of
+ // memory protection and mmap() is 4k.
+ // See http://www.cygwin.com/ml/cygwin/2009-01/threads.html#00492
+ static const int page_size = 0x1000;
+#elif defined(HAVE_GETPAGESIZE)
+ static const int page_size = ::getpagesize();
+#elif defined(HAVE_SYSCONF)
+ static long page_size = ::sysconf(_SC_PAGE_SIZE);
+#else
+#warning Cannot get the page size on this machine
+#endif
+ return static_cast<unsigned>(page_size);
+}
+
+size_t Process::GetMallocUsage() {
+#if defined(HAVE_MALLINFO)
+ struct mallinfo mi;
+ mi = ::mallinfo();
+ return mi.uordblks;
+#elif defined(HAVE_MALLOC_ZONE_STATISTICS) && defined(HAVE_MALLOC_MALLOC_H)
+ malloc_statistics_t Stats;
+ malloc_zone_statistics(malloc_default_zone(), &Stats);
+ return Stats.size_in_use; // darwin
+#elif defined(HAVE_SBRK)
+ // Note this is only an approximation and more closely resembles
+ // the value returned by mallinfo in the arena field.
+ static char *StartOfMemory = reinterpret_cast<char*>(::sbrk(0));
+ char *EndOfMemory = (char*)sbrk(0);
+ if (EndOfMemory != ((char*)-1) && StartOfMemory != ((char*)-1))
+ return EndOfMemory - StartOfMemory;
+ else
+ return 0;
+#else
+#warning Cannot get malloc info on this platform
+ return 0;
+#endif
+}
+
+size_t
+Process::GetTotalMemoryUsage()
+{
+#if defined(HAVE_MALLINFO)
+ struct mallinfo mi = ::mallinfo();
+ return mi.uordblks + mi.hblkhd;
+#elif defined(HAVE_MALLOC_ZONE_STATISTICS) && defined(HAVE_MALLOC_MALLOC_H)
+ malloc_statistics_t Stats;
+ malloc_zone_statistics(malloc_default_zone(), &Stats);
+ return Stats.size_allocated; // darwin
+#elif defined(HAVE_GETRUSAGE)
+ struct rusage usage;
+ ::getrusage(RUSAGE_SELF, &usage);
+ return usage.ru_maxrss;
+#else
+#warning Cannot get total memory size on this platform
+ return 0;
+#endif
+}
+
+void
+Process::GetTimeUsage(TimeValue& elapsed, TimeValue& user_time,
+ TimeValue& sys_time)
+{
+ elapsed = TimeValue::now();
+#if defined(HAVE_GETRUSAGE)
+ struct rusage usage;
+ ::getrusage(RUSAGE_SELF, &usage);
+ user_time = TimeValue(
+ static_cast<TimeValue::SecondsType>( usage.ru_utime.tv_sec ),
+ static_cast<TimeValue::NanoSecondsType>( usage.ru_utime.tv_usec *
+ TimeValue::NANOSECONDS_PER_MICROSECOND ) );
+ sys_time = TimeValue(
+ static_cast<TimeValue::SecondsType>( usage.ru_stime.tv_sec ),
+ static_cast<TimeValue::NanoSecondsType>( usage.ru_stime.tv_usec *
+ TimeValue::NANOSECONDS_PER_MICROSECOND ) );
+#else
+#warning Cannot get usage times on this platform
+ user_time.seconds(0);
+ user_time.microseconds(0);
+ sys_time.seconds(0);
+ sys_time.microseconds(0);
+#endif
+}
+
+int Process::GetCurrentUserId() {
+ return getuid();
+}
+
+int Process::GetCurrentGroupId() {
+ return getgid();
+}
+
+#ifdef HAVE_MACH_MACH_H
+#include <mach/mach.h>
+#endif
+
+// Some LLVM programs such as bugpoint produce core files as a normal part of
+// their operation. To prevent the disk from filling up, this function
+// does what's necessary to prevent their generation.
+void Process::PreventCoreFiles() {
+#if HAVE_SETRLIMIT
+ struct rlimit rlim;
+ rlim.rlim_cur = rlim.rlim_max = 0;
+ setrlimit(RLIMIT_CORE, &rlim);
+#endif
+
+#ifdef HAVE_MACH_MACH_H
+ // Disable crash reporting on Mac OS X 10.0-10.4
+
+ // get information about the original set of exception ports for the task
+ mach_msg_type_number_t Count = 0;
+ exception_mask_t OriginalMasks[EXC_TYPES_COUNT];
+ exception_port_t OriginalPorts[EXC_TYPES_COUNT];
+ exception_behavior_t OriginalBehaviors[EXC_TYPES_COUNT];
+ thread_state_flavor_t OriginalFlavors[EXC_TYPES_COUNT];
+ kern_return_t err =
+ task_get_exception_ports(mach_task_self(), EXC_MASK_ALL, OriginalMasks,
+ &Count, OriginalPorts, OriginalBehaviors,
+ OriginalFlavors);
+ if (err == KERN_SUCCESS) {
+ // replace each with MACH_PORT_NULL.
+ for (unsigned i = 0; i != Count; ++i)
+ task_set_exception_ports(mach_task_self(), OriginalMasks[i],
+ MACH_PORT_NULL, OriginalBehaviors[i],
+ OriginalFlavors[i]);
+ }
+
+ // Disable crash reporting on Mac OS X 10.5
+ signal(SIGABRT, _exit);
+ signal(SIGILL, _exit);
+ signal(SIGFPE, _exit);
+ signal(SIGSEGV, _exit);
+ signal(SIGBUS, _exit);
+#endif
+}
+
+bool Process::StandardInIsUserInput() {
+#if HAVE_ISATTY
+ return isatty(0);
+#endif
+ // If we don't have isatty, just return false.
+ return false;
+}
+
+bool Process::StandardOutIsDisplayed() {
+#if HAVE_ISATTY
+ return isatty(1);
+#endif
+ // If we don't have isatty, just return false.
+ return false;
+}
+
+bool Process::StandardErrIsDisplayed() {
+#if HAVE_ISATTY
+ return isatty(2);
+#endif
+ // If we don't have isatty, just return false.
+ return false;
+}
+
+static unsigned getColumns(int FileID) {
+ // If COLUMNS is defined in the environment, wrap to that many columns.
+ if (const char *ColumnsStr = std::getenv("COLUMNS")) {
+ int Columns = std::atoi(ColumnsStr);
+ if (Columns > 0)
+ return Columns;
+ }
+
+ unsigned Columns = 0;
+
+#if defined(HAVE_SYS_IOCTL_H) && defined(HAVE_TERMIOS_H)
+ // Try to determine the width of the terminal.
+ struct winsize ws;
+ if (ioctl(FileID, TIOCGWINSZ, &ws) == 0)
+ Columns = ws.ws_col;
+#endif
+
+ return Columns;
+}
+
+unsigned Process::StandardOutColumns() {
+ if (!StandardOutIsDisplayed())
+ return 0;
+
+ return getColumns(1);
+}
+
+unsigned Process::StandardErrColumns() {
+ if (!StandardErrIsDisplayed())
+ return 0;
+
+ return getColumns(2);
+}
diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc
new file mode 100644
index 0000000..cdc6fee
--- /dev/null
+++ b/lib/System/Unix/Program.inc
@@ -0,0 +1,287 @@
+//===- llvm/System/Unix/Program.cpp -----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific portion of the Program class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include <llvm/Config/config.h>
+#include "Unix.h"
+#include <iostream>
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_SYS_RESOURCE_H
+#include <sys/resource.h>
+#endif
+#if HAVE_SIGNAL_H
+#include <signal.h>
+#endif
+#if HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+
+namespace llvm {
+using namespace sys;
+
+// This function just uses the PATH environment variable to find the program.
+Path
+Program::FindProgramByName(const std::string& progName) {
+
+ // Check some degenerate cases
+ if (progName.length() == 0) // no program
+ return Path();
+ Path temp;
+ if (!temp.set(progName)) // invalid name
+ return Path();
+ // FIXME: have to check for absolute filename - we cannot assume anything
+ // about "." being in $PATH
+ if (temp.canExecute()) // already executable as is
+ return temp;
+
+ // At this point, the file name is valid and its not executable
+
+ // Get the path. If its empty, we can't do anything to find it.
+ const char *PathStr = getenv("PATH");
+ if (PathStr == 0)
+ return Path();
+
+ // Now we have a colon separated list of directories to search; try them.
+ size_t PathLen = strlen(PathStr);
+ while (PathLen) {
+ // Find the first colon...
+ const char *Colon = std::find(PathStr, PathStr+PathLen, ':');
+
+ // Check to see if this first directory contains the executable...
+ Path FilePath;
+ if (FilePath.set(std::string(PathStr,Colon))) {
+ FilePath.appendComponent(progName);
+ if (FilePath.canExecute())
+ return FilePath; // Found the executable!
+ }
+
+ // Nope it wasn't in this directory, check the next path in the list!
+ PathLen -= Colon-PathStr;
+ PathStr = Colon;
+
+ // Advance past duplicate colons
+ while (*PathStr == ':') {
+ PathStr++;
+ PathLen--;
+ }
+ }
+ return Path();
+}
+
+static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) {
+ if (Path == 0)
+ // Noop
+ return false;
+ std::string File;
+ if (Path->isEmpty())
+ // Redirect empty paths to /dev/null
+ File = "/dev/null";
+ else
+ File = Path->toString();
+
+ // Open the file
+ int InFD = open(File.c_str(), FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666);
+ if (InFD == -1) {
+ MakeErrMsg(ErrMsg, "Cannot open file '" + File + "' for "
+ + (FD == 0 ? "input" : "output"));
+ return true;
+ }
+
+ // Install it as the requested FD
+ if (-1 == dup2(InFD, FD)) {
+ MakeErrMsg(ErrMsg, "Cannot dup2");
+ return true;
+ }
+ close(InFD); // Close the original FD
+ return false;
+}
+
+static bool Timeout = false;
+static void TimeOutHandler(int Sig) {
+ Timeout = true;
+}
+
+static void SetMemoryLimits (unsigned size)
+{
+#if HAVE_SYS_RESOURCE_H
+ struct rlimit r;
+ __typeof__ (r.rlim_cur) limit = (__typeof__ (r.rlim_cur)) (size) * 1048576;
+
+ // Heap size
+ getrlimit (RLIMIT_DATA, &r);
+ r.rlim_cur = limit;
+ setrlimit (RLIMIT_DATA, &r);
+#ifdef RLIMIT_RSS
+ // Resident set size.
+ getrlimit (RLIMIT_RSS, &r);
+ r.rlim_cur = limit;
+ setrlimit (RLIMIT_RSS, &r);
+#endif
+#ifdef RLIMIT_AS // e.g. NetBSD doesn't have it.
+ // Virtual memory.
+ getrlimit (RLIMIT_AS, &r);
+ r.rlim_cur = limit;
+ setrlimit (RLIMIT_AS, &r);
+#endif
+#endif
+}
+
+int
+Program::ExecuteAndWait(const Path& path,
+ const char** args,
+ const char** envp,
+ const Path** redirects,
+ unsigned secondsToWait,
+ unsigned memoryLimit,
+ std::string* ErrMsg)
+{
+ if (!path.canExecute()) {
+ if (ErrMsg)
+ *ErrMsg = path.toString() + " is not executable";
+ return -1;
+ }
+
+#ifdef HAVE_SYS_WAIT_H
+ // Create a child process.
+ int child = fork();
+ switch (child) {
+ // An error occured: Return to the caller.
+ case -1:
+ MakeErrMsg(ErrMsg, "Couldn't fork");
+ return -1;
+
+ // Child process: Execute the program.
+ case 0: {
+ // Redirect file descriptors...
+ if (redirects) {
+ // Redirect stdin
+ if (RedirectIO(redirects[0], 0, ErrMsg)) { return -1; }
+ // Redirect stdout
+ if (RedirectIO(redirects[1], 1, ErrMsg)) { return -1; }
+ if (redirects[1] && redirects[2] &&
+ *(redirects[1]) == *(redirects[2])) {
+ // If stdout and stderr should go to the same place, redirect stderr
+ // to the FD already open for stdout.
+ if (-1 == dup2(1,2)) {
+ MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout");
+ return -1;
+ }
+ } else {
+ // Just redirect stderr
+ if (RedirectIO(redirects[2], 2, ErrMsg)) { return -1; }
+ }
+ }
+
+ // Set memory limits
+ if (memoryLimit!=0) {
+ SetMemoryLimits(memoryLimit);
+ }
+
+ // Execute!
+ if (envp != 0)
+ execve (path.c_str(), (char**)args, (char**)envp);
+ else
+ execv (path.c_str(), (char**)args);
+ // If the execve() failed, we should exit and let the parent pick up
+ // our non-zero exit status.
+ exit (errno);
+ }
+
+ // Parent process: Break out of the switch to do our processing.
+ default:
+ break;
+ }
+
+ // Make sure stderr and stdout have been flushed
+ std::cerr << std::flush;
+ std::cout << std::flush;
+ fsync(1);
+ fsync(2);
+
+ struct sigaction Act, Old;
+
+ // Install a timeout handler.
+ if (secondsToWait) {
+ Timeout = false;
+ Act.sa_sigaction = 0;
+ Act.sa_handler = TimeOutHandler;
+ sigemptyset(&Act.sa_mask);
+ Act.sa_flags = 0;
+ sigaction(SIGALRM, &Act, &Old);
+ alarm(secondsToWait);
+ }
+
+ // Parent process: Wait for the child process to terminate.
+ int status;
+ while (wait(&status) != child)
+ if (secondsToWait && errno == EINTR) {
+ // Kill the child.
+ kill(child, SIGKILL);
+
+ // Turn off the alarm and restore the signal handler
+ alarm(0);
+ sigaction(SIGALRM, &Old, 0);
+
+ // Wait for child to die
+ if (wait(&status) != child)
+ MakeErrMsg(ErrMsg, "Child timed out but wouldn't die");
+ else
+ MakeErrMsg(ErrMsg, "Child timed out", 0);
+
+ return -1; // Timeout detected
+ } else if (errno != EINTR) {
+ MakeErrMsg(ErrMsg, "Error waiting for child process");
+ return -1;
+ }
+
+ // We exited normally without timeout, so turn off the timer.
+ if (secondsToWait) {
+ alarm(0);
+ sigaction(SIGALRM, &Old, 0);
+ }
+
+ // Return the proper exit status. 0=success, >0 is programs' exit status,
+ // <0 means a signal was returned, -9999999 means the program dumped core.
+ int result = 0;
+ if (WIFEXITED(status))
+ result = WEXITSTATUS(status);
+ else if (WIFSIGNALED(status))
+ result = 0 - WTERMSIG(status);
+#ifdef WCOREDUMP
+ else if (WCOREDUMP(status))
+ result |= 0x01000000;
+#endif
+ return result;
+#else
+ return -99;
+#endif
+
+}
+
+bool Program::ChangeStdinToBinary(){
+ // Do nothing, as Unix doesn't differentiate between text and binary.
+ return false;
+}
+
+bool Program::ChangeStdoutToBinary(){
+ // Do nothing, as Unix doesn't differentiate between text and binary.
+ return false;
+}
+
+}
diff --git a/lib/System/Unix/README.txt b/lib/System/Unix/README.txt
new file mode 100644
index 0000000..b3bace4
--- /dev/null
+++ b/lib/System/Unix/README.txt
@@ -0,0 +1,16 @@
+llvm/lib/System/Unix README
+===========================
+
+This directory provides implementations of the lib/System classes that
+are common to two or more variants of UNIX. For example, the directory
+structure underneath this directory could look like this:
+
+Unix - only code that is truly generic to all UNIX platforms
+ Posix - code that is specific to Posix variants of UNIX
+ SUS - code that is specific to the Single Unix Specification
+ SysV - code that is specific to System V variants of UNIX
+
+As a rule, only those directories actually needing to be created should be
+created. Also, further subdirectories could be created to reflect versions of
+the various standards. For example, under SUS there could be v1, v2, and v3
+subdirectories to reflect the three major versions of SUS.
diff --git a/lib/System/Unix/Signals.inc b/lib/System/Unix/Signals.inc
new file mode 100644
index 0000000..e385e0c
--- /dev/null
+++ b/lib/System/Unix/Signals.inc
@@ -0,0 +1,230 @@
+//===- Signals.cpp - Generic Unix Signals Implementation -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for dealing with the possibility of
+// Unix signals occuring while your program is running.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#include "llvm/ADT/STLExtras.h"
+#include <vector>
+#include <algorithm>
+#if HAVE_EXECINFO_H
+# include <execinfo.h> // For backtrace().
+#endif
+#if HAVE_SIGNAL_H
+#include <signal.h>
+#endif
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_DLFCN_H && __GNUG__
+#include <dlfcn.h>
+#include <cxxabi.h>
+#endif
+using namespace llvm;
+
+static RETSIGTYPE SignalHandler(int Sig); // defined below.
+
+/// InterruptFunction - The function to call if ctrl-c is pressed.
+static void (*InterruptFunction)() = 0;
+
+static std::vector<sys::Path> *FilesToRemove = 0;
+static std::vector<std::pair<void(*)(void*), void*> > *CallBacksToRun = 0;
+
+// IntSigs - Signals that may interrupt the program at any time.
+static const int IntSigs[] = {
+ SIGHUP, SIGINT, SIGQUIT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2
+};
+static const int *const IntSigsEnd =
+ IntSigs + sizeof(IntSigs) / sizeof(IntSigs[0]);
+
+// KillSigs - Signals that are synchronous with the program that will cause it
+// to die.
+static const int KillSigs[] = {
+ SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV, SIGSYS, SIGXCPU, SIGXFSZ
+#ifdef SIGEMT
+ , SIGEMT
+#endif
+};
+static const int *const KillSigsEnd =
+ KillSigs + sizeof(KillSigs) / sizeof(KillSigs[0]);
+
+static unsigned NumRegisteredSignals = 0;
+static struct {
+ struct sigaction SA;
+ int SigNo;
+} RegisteredSignalInfo[(sizeof(IntSigs)+sizeof(KillSigs))/sizeof(KillSigs[0])];
+
+
+static void RegisterHandler(int Signal) {
+ assert(NumRegisteredSignals <
+ sizeof(RegisteredSignalInfo)/sizeof(RegisteredSignalInfo[0]) &&
+ "Out of space for signal handlers!");
+
+ struct sigaction NewHandler;
+
+ NewHandler.sa_handler = SignalHandler;
+ NewHandler.sa_flags = SA_NODEFER|SA_RESETHAND;
+ sigemptyset(&NewHandler.sa_mask);
+
+ // Install the new handler, save the old one in RegisteredSignalInfo.
+ sigaction(Signal, &NewHandler,
+ &RegisteredSignalInfo[NumRegisteredSignals].SA);
+ RegisteredSignalInfo[NumRegisteredSignals].SigNo = Signal;
+ ++NumRegisteredSignals;
+}
+
+static void RegisterHandlers() {
+ // If the handlers are already registered, we're done.
+ if (NumRegisteredSignals != 0) return;
+
+ std::for_each(IntSigs, IntSigsEnd, RegisterHandler);
+ std::for_each(KillSigs, KillSigsEnd, RegisterHandler);
+}
+
+static void UnregisterHandlers() {
+ // Restore all of the signal handlers to how they were before we showed up.
+ for (unsigned i = 0, e = NumRegisteredSignals; i != e; ++i)
+ sigaction(RegisteredSignalInfo[i].SigNo,
+ &RegisteredSignalInfo[i].SA, 0);
+ NumRegisteredSignals = 0;
+}
+
+
+
+// SignalHandler - The signal handler that runs.
+static RETSIGTYPE SignalHandler(int Sig) {
+ // Restore the signal behavior to default, so that the program actually
+ // crashes when we return and the signal reissues. This also ensures that if
+ // we crash in our signal handler that the program will terminate immediately
+ // instead of recursing in the signal handler.
+ UnregisterHandlers();
+
+ // Unmask all potentially blocked kill signals.
+ sigset_t SigMask;
+ sigfillset(&SigMask);
+ sigprocmask(SIG_UNBLOCK, &SigMask, 0);
+
+ if (FilesToRemove != 0)
+ while (!FilesToRemove->empty()) {
+ FilesToRemove->back().eraseFromDisk(true);
+ FilesToRemove->pop_back();
+ }
+
+ if (std::find(IntSigs, IntSigsEnd, Sig) != IntSigsEnd) {
+ if (InterruptFunction) {
+ void (*IF)() = InterruptFunction;
+ InterruptFunction = 0;
+ IF(); // run the interrupt function.
+ return;
+ }
+ raise(Sig); // Execute the default handler.
+ return;
+ }
+
+ // Otherwise if it is a fault (like SEGV) run any handler.
+ if (CallBacksToRun)
+ for (unsigned i = 0, e = CallBacksToRun->size(); i != e; ++i)
+ (*CallBacksToRun)[i].first((*CallBacksToRun)[i].second);
+}
+
+
+
+void llvm::sys::SetInterruptFunction(void (*IF)()) {
+ InterruptFunction = IF;
+ RegisterHandlers();
+}
+
+// RemoveFileOnSignal - The public API
+bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename,
+ std::string* ErrMsg) {
+ if (FilesToRemove == 0)
+ FilesToRemove = new std::vector<sys::Path>();
+
+ FilesToRemove->push_back(Filename);
+
+ RegisterHandlers();
+ return false;
+}
+
+/// AddSignalHandler - Add a function to be called when a signal is delivered
+/// to the process. The handler can have a cookie passed to it to identify
+/// what instance of the handler it is.
+void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
+ if (CallBacksToRun == 0)
+ CallBacksToRun = new std::vector<std::pair<void(*)(void*), void*> >();
+ CallBacksToRun->push_back(std::make_pair(FnPtr, Cookie));
+ RegisterHandlers();
+}
+
+
+// PrintStackTrace - In the case of a program crash or fault, print out a stack
+// trace so that the user has an indication of why and where we died.
+//
+// On glibc systems we have the 'backtrace' function, which works nicely, but
+// doesn't demangle symbols.
+static void PrintStackTrace(void *) {
+#ifdef HAVE_BACKTRACE
+ static void* StackTrace[256];
+ // Use backtrace() to output a backtrace on Linux systems with glibc.
+ int depth = backtrace(StackTrace,
+ static_cast<int>(array_lengthof(StackTrace)));
+#if HAVE_DLFCN_H && __GNUG__
+ int width = 0;
+ for (int i = 0; i < depth; ++i) {
+ Dl_info dlinfo;
+ dladdr(StackTrace[i], &dlinfo);
+ const char* name = strrchr(dlinfo.dli_fname, '/');
+
+ int nwidth;
+ if (name == NULL) nwidth = strlen(dlinfo.dli_fname);
+ else nwidth = strlen(name) - 1;
+
+ if (nwidth > width) width = nwidth;
+ }
+
+ for (int i = 0; i < depth; ++i) {
+ Dl_info dlinfo;
+ dladdr(StackTrace[i], &dlinfo);
+
+ fprintf(stderr, "%-3d", i);
+
+ const char* name = strrchr(dlinfo.dli_fname, '/');
+ if (name == NULL) fprintf(stderr, " %-*s", width, dlinfo.dli_fname);
+ else fprintf(stderr, " %-*s", width, name+1);
+
+ fprintf(stderr, " %#0*lx",
+ (int)(sizeof(void*) * 2) + 2, (unsigned long)StackTrace[i]);
+
+ if (dlinfo.dli_sname != NULL) {
+ int res;
+ fputc(' ', stderr);
+ char* d = abi::__cxa_demangle(dlinfo.dli_sname, NULL, NULL, &res);
+ if (d == NULL) fputs(dlinfo.dli_sname, stderr);
+ else fputs(d, stderr);
+ free(d);
+
+ fprintf(stderr, " + %tu",(char*)StackTrace[i]-(char*)dlinfo.dli_saddr);
+ }
+ fputc('\n', stderr);
+ }
+#else
+ backtrace_symbols_fd(StackTrace, depth, STDERR_FILENO);
+#endif
+#endif
+}
+
+/// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or
+/// SIGSEGV) is delivered to the process, print a stack trace and then exit.
+void llvm::sys::PrintStackTraceOnErrorSignal() {
+ AddSignalHandler(PrintStackTrace, 0);
+}
+
diff --git a/lib/System/Unix/TimeValue.inc b/lib/System/Unix/TimeValue.inc
new file mode 100644
index 0000000..8dd30b9
--- /dev/null
+++ b/lib/System/Unix/TimeValue.inc
@@ -0,0 +1,56 @@
+//===- Unix/TimeValue.cpp - Unix TimeValue Implementation -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific portion of the TimeValue class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+
+namespace llvm {
+ using namespace sys;
+
+std::string TimeValue::toString() const {
+ char buffer[32];
+
+ time_t ourTime = time_t(this->toEpochTime());
+#ifdef __hpux
+// note that the following line needs -D_REENTRANT on HP-UX to be picked up
+ asctime_r(localtime(&ourTime), buffer);
+#else
+ ::asctime_r(::localtime(&ourTime), buffer);
+#endif
+
+ std::string result(buffer);
+ return result.substr(0,24);
+}
+
+TimeValue TimeValue::now() {
+ struct timeval the_time;
+ timerclear(&the_time);
+ if (0 != ::gettimeofday(&the_time,0)) {
+ // This is *really* unlikely to occur because the only gettimeofday
+ // errors concern the timezone parameter which we're passing in as 0.
+ // In the unlikely case it does happen, just return MinTime, no error
+ // message needed.
+ return MinTime;
+ }
+
+ return TimeValue(
+ static_cast<TimeValue::SecondsType>( the_time.tv_sec ),
+ static_cast<TimeValue::NanoSecondsType>( the_time.tv_usec *
+ NANOSECONDS_PER_MICROSECOND ) );
+}
+
+}
diff --git a/lib/System/Unix/Unix.h b/lib/System/Unix/Unix.h
new file mode 100644
index 0000000..452226f
--- /dev/null
+++ b/lib/System/Unix/Unix.h
@@ -0,0 +1,104 @@
+//===- llvm/System/Unix/Unix.h - Common Unix Include File -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines things specific to Unix implementations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_UNIX_UNIX_H
+#define LLVM_SYSTEM_UNIX_UNIX_H
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on all UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h" // Get autoconf configuration settings
+#include <cstdlib>
+#include <cstdio>
+#include <cstring>
+#include <cerrno>
+#include <string>
+#include <algorithm>
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+
+#ifdef HAVE_ASSERT_H
+#include <assert.h>
+#endif
+
+#ifdef TIME_WITH_SYS_TIME
+# include <sys/time.h>
+# include <time.h>
+#else
+# ifdef HAVE_SYS_TIME_H
+# include <sys/time.h>
+# else
+# include <time.h>
+# endif
+#endif
+
+#ifdef HAVE_SYS_WAIT_H
+# include <sys/wait.h>
+#endif
+
+#ifndef WEXITSTATUS
+# define WEXITSTATUS(stat_val) ((unsigned)(stat_val) >> 8)
+#endif
+
+#ifndef WIFEXITED
+# define WIFEXITED(stat_val) (((stat_val) & 255) == 0)
+#endif
+
+/// This function builds an error message into \p ErrMsg using the \p prefix
+/// string and the Unix error number given by \p errnum. If errnum is -1, the
+/// default then the value of errno is used.
+/// @brief Make an error message
+///
+/// If the error number can be converted to a string, it will be
+/// separated from prefix by ": ".
+static inline bool MakeErrMsg(
+ std::string* ErrMsg, const std::string& prefix, int errnum = -1) {
+ if (!ErrMsg)
+ return true;
+ char buffer[MAXPATHLEN];
+ buffer[0] = 0;
+ if (errnum == -1)
+ errnum = errno;
+#ifdef HAVE_STRERROR_R
+ // strerror_r is thread-safe.
+ if (errnum)
+ strerror_r(errnum,buffer,MAXPATHLEN-1);
+#elif HAVE_STRERROR
+ // Copy the thread un-safe result of strerror into
+ // the buffer as fast as possible to minimize impact
+ // of collision of strerror in multiple threads.
+ if (errnum)
+ strncpy(buffer,strerror(errnum),MAXPATHLEN-1);
+ buffer[MAXPATHLEN-1] = 0;
+#else
+ // Strange that this system doesn't even have strerror
+ // but, oh well, just use a generic message
+ sprintf(buffer, "Error #%d", errnum);
+#endif
+ *ErrMsg = prefix + ": " + buffer;
+ return true;
+}
+
+#endif
diff --git a/lib/System/Win32/Alarm.inc b/lib/System/Win32/Alarm.inc
new file mode 100644
index 0000000..e0d00a0
--- /dev/null
+++ b/lib/System/Win32/Alarm.inc
@@ -0,0 +1,43 @@
+//===-- Alarm.inc - Implement Win32 Alarm Support ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 Alarm support.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cassert>
+using namespace llvm;
+
+/// NestedSOI - Sanity check. Alarms cannot be nested or run in parallel.
+/// This ensures that they never do.
+static bool NestedSOI = false;
+
+void sys::SetupAlarm(unsigned seconds) {
+ assert(!NestedSOI && "sys::SetupAlarm calls cannot be nested!");
+ NestedSOI = true;
+ // FIXME: Implement for Win32
+}
+
+void sys::TerminateAlarm() {
+ assert(NestedSOI && "sys::TerminateAlarm called without sys::SetupAlarm!");
+ // FIXME: Implement for Win32
+ NestedSOI = false;
+}
+
+int sys::AlarmStatus() {
+ // FIXME: Implement for Win32
+ return 0;
+}
+
+// Don't pull in all of the Windows headers.
+extern "C" void __stdcall Sleep(unsigned long);
+
+void sys::Sleep(unsigned n) {
+ ::Sleep(n*1000);
+}
diff --git a/lib/System/Win32/DynamicLibrary.inc b/lib/System/Win32/DynamicLibrary.inc
new file mode 100644
index 0000000..1ddf6ce
--- /dev/null
+++ b/lib/System/Win32/DynamicLibrary.inc
@@ -0,0 +1,219 @@
+//===- Win32/DynamicLibrary.cpp - Win32 DL Implementation -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of DynamicLibrary.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Win32.h"
+
+#ifdef __MINGW32__
+ #include <imagehlp.h>
+#else
+ #include <dbghelp.h>
+#endif
+
+#ifdef _MSC_VER
+ #include <ntverp.h>
+#endif
+
+#ifdef __MINGW32__
+ #if (HAVE_LIBIMAGEHLP != 1)
+ #error "libimagehlp.a should be present"
+ #endif
+#else
+ #pragma comment(lib, "dbghelp.lib")
+#endif
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//=== and must not be UNIX code.
+//===----------------------------------------------------------------------===//
+
+static std::vector<HMODULE> OpenedHandles;
+
+#ifdef _WIN64
+ typedef DWORD64 ModuleBaseType;
+#else
+ typedef ULONG ModuleBaseType;
+#endif
+
+extern "C" {
+// Use old callback if:
+// - Not using Visual Studio
+// - Visual Studio 2005 or earlier but only if we are not using the Windows SDK
+// or Windows SDK version is older than 6.0
+// Use new callback if:
+// - Newer Visual Studio (comes with newer SDK).
+// - Visual Studio 2005 with Windows SDK 6.0+
+#if !defined(_MSC_VER) || _MSC_VER < 1500 && (!defined(VER_PRODUCTBUILD) || VER_PRODUCTBUILD < 6000)
+ static BOOL CALLBACK ELM_Callback(PSTR ModuleName,
+ ModuleBaseType ModuleBase,
+ ULONG ModuleSize,
+ PVOID UserContext)
+#else
+ static BOOL CALLBACK ELM_Callback(PCSTR ModuleName,
+ ModuleBaseType ModuleBase,
+ ULONG ModuleSize,
+ PVOID UserContext)
+#endif
+ {
+ // Ignore VC++ runtimes prior to 7.1. Somehow some of them get loaded
+ // into the process.
+ if (stricmp(ModuleName, "msvci70") != 0 &&
+ stricmp(ModuleName, "msvcirt") != 0 &&
+ stricmp(ModuleName, "msvcp50") != 0 &&
+ stricmp(ModuleName, "msvcp60") != 0 &&
+ stricmp(ModuleName, "msvcp70") != 0 &&
+ stricmp(ModuleName, "msvcr70") != 0 &&
+#ifndef __MINGW32__
+ // Mingw32 uses msvcrt.dll by default. Don't ignore it.
+ // Otherwise, user should be aware, what he's doing :)
+ stricmp(ModuleName, "msvcrt") != 0 &&
+#endif
+ stricmp(ModuleName, "msvcrt20") != 0 &&
+ stricmp(ModuleName, "msvcrt40") != 0) {
+ OpenedHandles.push_back((HMODULE)ModuleBase);
+ }
+ return TRUE;
+ }
+}
+
+DynamicLibrary::DynamicLibrary() : handle(0) {
+ handle = GetModuleHandle(NULL);
+ OpenedHandles.push_back((HMODULE)handle);
+}
+
+DynamicLibrary::~DynamicLibrary() {
+ if (handle == 0)
+ return;
+
+ // GetModuleHandle() does not increment the ref count, so we must not free
+ // the handle to the executable.
+ if (handle != GetModuleHandle(NULL))
+ FreeLibrary((HMODULE)handle);
+ handle = 0;
+
+ for (std::vector<HMODULE>::iterator I = OpenedHandles.begin(),
+ E = OpenedHandles.end(); I != E; ++I) {
+ if (*I == handle) {
+ // Note: don't use the swap/pop_back trick here. Order is important.
+ OpenedHandles.erase(I);
+ }
+ }
+}
+
+bool DynamicLibrary::LoadLibraryPermanently(const char *filename,
+ std::string *ErrMsg) {
+ if (filename) {
+ HMODULE a_handle = LoadLibrary(filename);
+
+ if (a_handle == 0)
+ return MakeErrMsg(ErrMsg, std::string(filename) + ": Can't open : ");
+
+ OpenedHandles.push_back(a_handle);
+ } else {
+ // When no file is specified, enumerate all DLLs and EXEs in the
+ // process.
+ EnumerateLoadedModules(GetCurrentProcess(), ELM_Callback, 0);
+ }
+
+ // Because we don't remember the handle, we will never free it; hence,
+ // it is loaded permanently.
+ return false;
+}
+
+// Stack probing routines are in the support library (e.g. libgcc), but we don't
+// have dynamic linking on windows. Provide a hook.
+#if defined(__MINGW32__) || defined (_MSC_VER)
+ #define EXPLICIT_SYMBOL(SYM) \
+ if (!strcmp(symbolName, #SYM)) return (void*)&SYM
+ #define EXPLICIT_SYMBOL2(SYMFROM, SYMTO) \
+ if (!strcmp(symbolName, #SYMFROM)) return (void*)&SYMTO
+ #define EXPLICIT_SYMBOL_DEF(SYM) \
+ extern "C" { extern void *SYM; }
+
+ #if defined(__MINGW32__)
+ EXPLICIT_SYMBOL_DEF(_alloca);
+ EXPLICIT_SYMBOL_DEF(__main);
+ EXPLICIT_SYMBOL_DEF(__ashldi3);
+ EXPLICIT_SYMBOL_DEF(__ashrdi3);
+ EXPLICIT_SYMBOL_DEF(__cmpdi2);
+ EXPLICIT_SYMBOL_DEF(__divdi3);
+ EXPLICIT_SYMBOL_DEF(__fixdfdi);
+ EXPLICIT_SYMBOL_DEF(__fixsfdi);
+ EXPLICIT_SYMBOL_DEF(__fixunsdfdi);
+ EXPLICIT_SYMBOL_DEF(__fixunssfdi);
+ EXPLICIT_SYMBOL_DEF(__floatdidf);
+ EXPLICIT_SYMBOL_DEF(__floatdisf);
+ EXPLICIT_SYMBOL_DEF(__lshrdi3);
+ EXPLICIT_SYMBOL_DEF(__moddi3);
+ EXPLICIT_SYMBOL_DEF(__udivdi3);
+ EXPLICIT_SYMBOL_DEF(__umoddi3);
+ #elif defined(_MSC_VER)
+ EXPLICIT_SYMBOL_DEF(_alloca_probe);
+ #endif
+#endif
+
+void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
+ // First check symbols added via AddSymbol().
+ std::map<std::string, void *>::iterator I = g_symbols().find(symbolName);
+ if (I != g_symbols().end())
+ return I->second;
+
+ // Now search the libraries.
+ for (std::vector<HMODULE>::iterator I = OpenedHandles.begin(),
+ E = OpenedHandles.end(); I != E; ++I) {
+ FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName);
+ if (ptr)
+ return (void *) ptr;
+ }
+
+#if defined(__MINGW32__)
+ {
+ EXPLICIT_SYMBOL(_alloca);
+ EXPLICIT_SYMBOL(__main);
+ EXPLICIT_SYMBOL(__ashldi3);
+ EXPLICIT_SYMBOL(__ashrdi3);
+ EXPLICIT_SYMBOL(__cmpdi2);
+ EXPLICIT_SYMBOL(__divdi3);
+ EXPLICIT_SYMBOL(__fixdfdi);
+ EXPLICIT_SYMBOL(__fixsfdi);
+ EXPLICIT_SYMBOL(__fixunsdfdi);
+ EXPLICIT_SYMBOL(__fixunssfdi);
+ EXPLICIT_SYMBOL(__floatdidf);
+ EXPLICIT_SYMBOL(__floatdisf);
+ EXPLICIT_SYMBOL(__lshrdi3);
+ EXPLICIT_SYMBOL(__moddi3);
+ EXPLICIT_SYMBOL(__udivdi3);
+ EXPLICIT_SYMBOL(__umoddi3);
+
+ EXPLICIT_SYMBOL2(alloca, _alloca);
+#undef EXPLICIT_SYMBOL
+#undef EXPLICIT_SYMBOL2
+#undef EXPLICIT_SYMBOL_DEF
+ }
+#elif defined(_MSC_VER)
+ {
+ EXPLICIT_SYMBOL2(alloca, _alloca_probe);
+ EXPLICIT_SYMBOL2(_alloca, _alloca_probe);
+#undef EXPLICIT_SYMBOL
+#undef EXPLICIT_SYMBOL2
+#undef EXPLICIT_SYMBOL_DEF
+ }
+#endif
+
+ return 0;
+}
+
+}
+
diff --git a/lib/System/Win32/Host.inc b/lib/System/Win32/Host.inc
new file mode 100644
index 0000000..18f00f8
--- /dev/null
+++ b/lib/System/Win32/Host.inc
@@ -0,0 +1,23 @@
+//===- llvm/System/Win32/Host.inc -------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 Host support.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Win32.h"
+#include <cstdio>
+#include <string>
+
+using namespace llvm;
+
+std::string sys::getHostTriple() {
+ // FIXME: Adapt to running version.
+ return LLVM_HOSTTRIPLE;
+}
diff --git a/lib/System/Win32/Memory.inc b/lib/System/Win32/Memory.inc
new file mode 100644
index 0000000..5e5cf7a
--- /dev/null
+++ b/lib/System/Win32/Memory.inc
@@ -0,0 +1,72 @@
+//===- Win32/Memory.cpp - Win32 Memory Implementation -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of various Memory
+// management utilities
+//
+//===----------------------------------------------------------------------===//
+
+#include "Win32.h"
+#include "llvm/System/Process.h"
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//=== and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+MemoryBlock Memory::AllocateRWX(unsigned NumBytes,
+ const MemoryBlock *NearBlock,
+ std::string *ErrMsg) {
+ if (NumBytes == 0) return MemoryBlock();
+
+ static const long pageSize = Process::GetPageSize();
+ unsigned NumPages = (NumBytes+pageSize-1)/pageSize;
+
+ //FIXME: support NearBlock if ever needed on Win64.
+
+ void *pa = VirtualAlloc(NULL, NumPages*pageSize, MEM_COMMIT,
+ PAGE_EXECUTE_READWRITE);
+ if (pa == NULL) {
+ MakeErrMsg(ErrMsg, "Can't allocate RWX Memory: ");
+ return MemoryBlock();
+ }
+
+ MemoryBlock result;
+ result.Address = pa;
+ result.Size = NumPages*pageSize;
+ return result;
+}
+
+bool Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) {
+ if (M.Address == 0 || M.Size == 0) return false;
+ if (!VirtualFree(M.Address, 0, MEM_RELEASE))
+ return MakeErrMsg(ErrMsg, "Can't release RWX Memory: ");
+ return false;
+}
+
+bool Memory::setWritable(MemoryBlock &M, std::string *ErrMsg) {
+ return true;
+}
+
+bool Memory::setExecutable(MemoryBlock &M, std::string *ErrMsg) {
+ return false;
+}
+
+bool Memory::setRangeWritable(const void *Addr, size_t Size) {
+ return true;
+}
+
+bool Memory::setRangeExecutable(const void *Addr, size_t Size) {
+ return false;
+}
+
+}
diff --git a/lib/System/Win32/Mutex.inc b/lib/System/Win32/Mutex.inc
new file mode 100644
index 0000000..7c1723b
--- /dev/null
+++ b/lib/System/Win32/Mutex.inc
@@ -0,0 +1,58 @@
+//===- llvm/System/Win32/Mutex.inc - Win32 Mutex Implementation -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 specific (non-pthread) Mutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//=== is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Win32.h"
+#include "llvm/System/Mutex.h"
+
+namespace llvm {
+using namespace sys;
+
+Mutex::Mutex(bool /*recursive*/)
+{
+ data_ = new CRITICAL_SECTION;
+ InitializeCriticalSection((LPCRITICAL_SECTION)data_);
+}
+
+Mutex::~Mutex()
+{
+ DeleteCriticalSection((LPCRITICAL_SECTION)data_);
+ delete (LPCRITICAL_SECTION)data_;
+ data_ = 0;
+}
+
+bool
+Mutex::acquire()
+{
+ EnterCriticalSection((LPCRITICAL_SECTION)data_);
+ return true;
+}
+
+bool
+Mutex::release()
+{
+ LeaveCriticalSection((LPCRITICAL_SECTION)data_);
+ return true;
+}
+
+bool
+Mutex::tryacquire()
+{
+ return TryEnterCriticalSection((LPCRITICAL_SECTION)data_);
+}
+
+}
diff --git a/lib/System/Win32/Path.inc b/lib/System/Win32/Path.inc
new file mode 100644
index 0000000..fbf8f66
--- /dev/null
+++ b/lib/System/Win32/Path.inc
@@ -0,0 +1,825 @@
+//===- llvm/System/Win32/Path.cpp - Win32 Path Implementation ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// Modified by Henrik Bach to comply with at least MinGW.
+// Ported to Win32 by Jeff Cohen.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Path class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//=== is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Win32.h"
+#include <malloc.h>
+#include <cstdio>
+
+// We need to undo a macro defined in Windows.h, otherwise we won't compile:
+#undef CopyFile
+#undef GetCurrentDirectory
+
+// Windows happily accepts either forward or backward slashes, though any path
+// returned by a Win32 API will have backward slashes. As LLVM code basically
+// assumes forward slashes are used, backward slashs are converted where they
+// can be introduced into a path.
+//
+// Another invariant is that a path ends with a slash if and only if the path
+// is a root directory. Any other use of a trailing slash is stripped. Unlike
+// in Unix, Windows has a rather complicated notion of a root path and this
+// invariant helps simply the code.
+
+static void FlipBackSlashes(std::string& s) {
+ for (size_t i = 0; i < s.size(); i++)
+ if (s[i] == '\\')
+ s[i] = '/';
+}
+
+namespace llvm {
+namespace sys {
+const char PathSeparator = ';';
+
+Path::Path(const std::string& p)
+ : path(p) {
+ FlipBackSlashes(path);
+}
+
+Path::Path(const char *StrStart, unsigned StrLen)
+ : path(StrStart, StrLen) {
+ FlipBackSlashes(path);
+}
+
+Path&
+Path::operator=(const std::string &that) {
+ path = that;
+ FlipBackSlashes(path);
+ return *this;
+}
+
+bool
+Path::isValid() const {
+ if (path.empty())
+ return false;
+
+ // If there is a colon, it must be the second character, preceded by a letter
+ // and followed by something.
+ size_t len = path.size();
+ size_t pos = path.rfind(':',len);
+ size_t rootslash = 0;
+ if (pos != std::string::npos) {
+ if (pos != 1 || !isalpha(path[0]) || len < 3)
+ return false;
+ rootslash = 2;
+ }
+
+ // Look for a UNC path, and if found adjust our notion of the root slash.
+ if (len > 3 && path[0] == '/' && path[1] == '/') {
+ rootslash = path.find('/', 2);
+ if (rootslash == std::string::npos)
+ rootslash = 0;
+ }
+
+ // Check for illegal characters.
+ if (path.find_first_of("\\<>\"|\001\002\003\004\005\006\007\010\011\012"
+ "\013\014\015\016\017\020\021\022\023\024\025\026"
+ "\027\030\031\032\033\034\035\036\037")
+ != std::string::npos)
+ return false;
+
+ // Remove trailing slash, unless it's a root slash.
+ if (len > rootslash+1 && path[len-1] == '/')
+ path.erase(--len);
+
+ // Check each component for legality.
+ for (pos = 0; pos < len; ++pos) {
+ // A component may not end in a space.
+ if (path[pos] == ' ') {
+ if (path[pos+1] == '/' || path[pos+1] == '\0')
+ return false;
+ }
+
+ // A component may not end in a period.
+ if (path[pos] == '.') {
+ if (path[pos+1] == '/' || path[pos+1] == '\0') {
+ // Unless it is the pseudo-directory "."...
+ if (pos == 0 || path[pos-1] == '/' || path[pos-1] == ':')
+ return true;
+ // or "..".
+ if (pos > 0 && path[pos-1] == '.') {
+ if (pos == 1 || path[pos-2] == '/' || path[pos-2] == ':')
+ return true;
+ }
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+bool
+Path::isAbsolute() const {
+ switch (path.length()) {
+ case 0:
+ return false;
+ case 1:
+ case 2:
+ return path[0] == '/';
+ default:
+ return path[0] == '/' || (path[1] == ':' && path[2] == '/');
+ }
+}
+
+static Path *TempDirectory = NULL;
+
+Path
+Path::GetTemporaryDirectory(std::string* ErrMsg) {
+ if (TempDirectory)
+ return *TempDirectory;
+
+ char pathname[MAX_PATH];
+ if (!GetTempPath(MAX_PATH, pathname)) {
+ if (ErrMsg)
+ *ErrMsg = "Can't determine temporary directory";
+ return Path();
+ }
+
+ Path result;
+ result.set(pathname);
+
+ // Append a subdirectory passed on our process id so multiple LLVMs don't
+ // step on each other's toes.
+#ifdef __MINGW32__
+ // Mingw's Win32 header files are broken.
+ sprintf(pathname, "LLVM_%u", unsigned(GetCurrentProcessId()));
+#else
+ sprintf(pathname, "LLVM_%u", GetCurrentProcessId());
+#endif
+ result.appendComponent(pathname);
+
+ // If there's a directory left over from a previous LLVM execution that
+ // happened to have the same process id, get rid of it.
+ result.eraseFromDisk(true);
+
+ // And finally (re-)create the empty directory.
+ result.createDirectoryOnDisk(false);
+ TempDirectory = new Path(result);
+ return *TempDirectory;
+}
+
+// FIXME: the following set of functions don't map to Windows very well.
+Path
+Path::GetRootDirectory() {
+ Path result;
+ result.set("C:/");
+ return result;
+}
+
+void
+Path::GetSystemLibraryPaths(std::vector<sys::Path>& Paths) {
+ Paths.push_back(sys::Path("C:/WINDOWS/SYSTEM32"));
+ Paths.push_back(sys::Path("C:/WINDOWS"));
+}
+
+void
+Path::GetBitcodeLibraryPaths(std::vector<sys::Path>& Paths) {
+ char * env_var = getenv("LLVM_LIB_SEARCH_PATH");
+ if (env_var != 0) {
+ getPathList(env_var,Paths);
+ }
+#ifdef LLVM_LIBDIR
+ {
+ Path tmpPath;
+ if (tmpPath.set(LLVM_LIBDIR))
+ if (tmpPath.canRead())
+ Paths.push_back(tmpPath);
+ }
+#endif
+ GetSystemLibraryPaths(Paths);
+}
+
+Path
+Path::GetLLVMDefaultConfigDir() {
+ // TODO: this isn't going to fly on Windows
+ return Path("/etc/llvm");
+}
+
+Path
+Path::GetUserHomeDirectory() {
+ // TODO: Typical Windows setup doesn't define HOME.
+ const char* home = getenv("HOME");
+ if (home) {
+ Path result;
+ if (result.set(home))
+ return result;
+ }
+ return GetRootDirectory();
+}
+
+Path
+Path::GetCurrentDirectory() {
+ char pathname[MAX_PATH];
+ ::GetCurrentDirectoryA(MAX_PATH,pathname);
+ return Path(pathname);
+}
+
+/// GetMainExecutable - Return the path to the main executable, given the
+/// value of argv[0] from program startup.
+Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
+ return Path();
+}
+
+
+// FIXME: the above set of functions don't map to Windows very well.
+
+
+bool
+Path::isRootDirectory() const {
+ size_t len = path.size();
+ return len > 0 && path[len-1] == '/';
+}
+
+std::string Path::getDirname() const {
+ return getDirnameCharSep(path, '/');
+}
+
+std::string
+Path::getBasename() const {
+ // Find the last slash
+ size_t slash = path.rfind('/');
+ if (slash == std::string::npos)
+ slash = 0;
+ else
+ slash++;
+
+ size_t dot = path.rfind('.');
+ if (dot == std::string::npos || dot < slash)
+ return path.substr(slash);
+ else
+ return path.substr(slash, dot - slash);
+}
+
+std::string
+Path::getSuffix() const {
+ // Find the last slash
+ size_t slash = path.rfind('/');
+ if (slash == std::string::npos)
+ slash = 0;
+ else
+ slash++;
+
+ size_t dot = path.rfind('.');
+ if (dot == std::string::npos || dot < slash)
+ return std::string();
+ else
+ return path.substr(dot + 1);
+}
+
+bool
+Path::exists() const {
+ DWORD attr = GetFileAttributes(path.c_str());
+ return attr != INVALID_FILE_ATTRIBUTES;
+}
+
+bool
+Path::isDirectory() const {
+ DWORD attr = GetFileAttributes(path.c_str());
+ return (attr != INVALID_FILE_ATTRIBUTES) &&
+ (attr & FILE_ATTRIBUTE_DIRECTORY);
+}
+
+bool
+Path::canRead() const {
+ // FIXME: take security attributes into account.
+ DWORD attr = GetFileAttributes(path.c_str());
+ return attr != INVALID_FILE_ATTRIBUTES;
+}
+
+bool
+Path::canWrite() const {
+ // FIXME: take security attributes into account.
+ DWORD attr = GetFileAttributes(path.c_str());
+ return (attr != INVALID_FILE_ATTRIBUTES) && !(attr & FILE_ATTRIBUTE_READONLY);
+}
+
+bool
+Path::canExecute() const {
+ // FIXME: take security attributes into account.
+ DWORD attr = GetFileAttributes(path.c_str());
+ return attr != INVALID_FILE_ATTRIBUTES;
+}
+
+std::string
+Path::getLast() const {
+ // Find the last slash
+ size_t pos = path.rfind('/');
+
+ // Handle the corner cases
+ if (pos == std::string::npos)
+ return path;
+
+ // If the last character is a slash, we have a root directory
+ if (pos == path.length()-1)
+ return path;
+
+ // Return everything after the last slash
+ return path.substr(pos+1);
+}
+
+const FileStatus *
+PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const {
+ if (!fsIsValid || update) {
+ WIN32_FILE_ATTRIBUTE_DATA fi;
+ if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi)) {
+ MakeErrMsg(ErrStr, "getStatusInfo():" + std::string(path) +
+ ": Can't get status: ");
+ return 0;
+ }
+
+ status.fileSize = fi.nFileSizeHigh;
+ status.fileSize <<= sizeof(fi.nFileSizeHigh)*8;
+ status.fileSize += fi.nFileSizeLow;
+
+ status.mode = fi.dwFileAttributes & FILE_ATTRIBUTE_READONLY ? 0555 : 0777;
+ status.user = 9999; // Not applicable to Windows, so...
+ status.group = 9999; // Not applicable to Windows, so...
+
+ // FIXME: this is only unique if the file is accessed by the same file path.
+ // How do we do this for C:\dir\file and ..\dir\file ? Unix has inode
+ // numbers, but the concept doesn't exist in Windows.
+ status.uniqueID = 0;
+ for (unsigned i = 0; i < path.length(); ++i)
+ status.uniqueID += path[i];
+
+ __int64 ft = *reinterpret_cast<__int64*>(&fi.ftLastWriteTime);
+ status.modTime.fromWin32Time(ft);
+
+ status.isDir = fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY;
+ fsIsValid = true;
+ }
+ return &status;
+}
+
+bool Path::makeReadableOnDisk(std::string* ErrMsg) {
+ // All files are readable on Windows (ignoring security attributes).
+ return false;
+}
+
+bool Path::makeWriteableOnDisk(std::string* ErrMsg) {
+ DWORD attr = GetFileAttributes(path.c_str());
+
+ // If it doesn't exist, we're done.
+ if (attr == INVALID_FILE_ATTRIBUTES)
+ return false;
+
+ if (attr & FILE_ATTRIBUTE_READONLY) {
+ if (!SetFileAttributes(path.c_str(), attr & ~FILE_ATTRIBUTE_READONLY)) {
+ MakeErrMsg(ErrMsg, std::string(path) + ": Can't make file writable: ");
+ return true;
+ }
+ }
+ return false;
+}
+
+bool Path::makeExecutableOnDisk(std::string* ErrMsg) {
+ // All files are executable on Windows (ignoring security attributes).
+ return false;
+}
+
+bool
+Path::getDirectoryContents(std::set<Path>& result, std::string* ErrMsg) const {
+ WIN32_FILE_ATTRIBUTE_DATA fi;
+ if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi)) {
+ MakeErrMsg(ErrMsg, path + ": can't get status of file");
+ return true;
+ }
+
+ if (!(fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
+ if (ErrMsg)
+ *ErrMsg = path + ": not a directory";
+ return true;
+ }
+
+ result.clear();
+ WIN32_FIND_DATA fd;
+ std::string searchpath = path;
+ if (path.size() == 0 || searchpath[path.size()-1] == '/')
+ searchpath += "*";
+ else
+ searchpath += "/*";
+
+ HANDLE h = FindFirstFile(searchpath.c_str(), &fd);
+ if (h == INVALID_HANDLE_VALUE) {
+ if (GetLastError() == ERROR_FILE_NOT_FOUND)
+ return true; // not really an error, now is it?
+ MakeErrMsg(ErrMsg, path + ": Can't read directory: ");
+ return true;
+ }
+
+ do {
+ if (fd.cFileName[0] == '.')
+ continue;
+ Path aPath(path);
+ aPath.appendComponent(&fd.cFileName[0]);
+ result.insert(aPath);
+ } while (FindNextFile(h, &fd));
+
+ DWORD err = GetLastError();
+ FindClose(h);
+ if (err != ERROR_NO_MORE_FILES) {
+ SetLastError(err);
+ MakeErrMsg(ErrMsg, path + ": Can't read directory: ");
+ return true;
+ }
+ return false;
+}
+
+bool
+Path::set(const std::string& a_path) {
+ if (a_path.empty())
+ return false;
+ std::string save(path);
+ path = a_path;
+ FlipBackSlashes(path);
+ if (!isValid()) {
+ path = save;
+ return false;
+ }
+ return true;
+}
+
+bool
+Path::appendComponent(const std::string& name) {
+ if (name.empty())
+ return false;
+ std::string save(path);
+ if (!path.empty()) {
+ size_t last = path.size() - 1;
+ if (path[last] != '/')
+ path += '/';
+ }
+ path += name;
+ if (!isValid()) {
+ path = save;
+ return false;
+ }
+ return true;
+}
+
+bool
+Path::eraseComponent() {
+ size_t slashpos = path.rfind('/',path.size());
+ if (slashpos == path.size() - 1 || slashpos == std::string::npos)
+ return false;
+ std::string save(path);
+ path.erase(slashpos);
+ if (!isValid()) {
+ path = save;
+ return false;
+ }
+ return true;
+}
+
+bool
+Path::appendSuffix(const std::string& suffix) {
+ std::string save(path);
+ path.append(".");
+ path.append(suffix);
+ if (!isValid()) {
+ path = save;
+ return false;
+ }
+ return true;
+}
+
+bool
+Path::eraseSuffix() {
+ size_t dotpos = path.rfind('.',path.size());
+ size_t slashpos = path.rfind('/',path.size());
+ if (dotpos != std::string::npos) {
+ if (slashpos == std::string::npos || dotpos > slashpos+1) {
+ std::string save(path);
+ path.erase(dotpos, path.size()-dotpos);
+ if (!isValid()) {
+ path = save;
+ return false;
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
+inline bool PathMsg(std::string* ErrMsg, const char* pathname, const char*msg) {
+ if (ErrMsg)
+ *ErrMsg = std::string(pathname) + ": " + std::string(msg);
+ return true;
+}
+
+bool
+Path::createDirectoryOnDisk(bool create_parents, std::string* ErrMsg) {
+ // Get a writeable copy of the path name
+ size_t len = path.length();
+ char *pathname = reinterpret_cast<char *>(_alloca(len+2));
+ path.copy(pathname, len);
+ pathname[len] = 0;
+
+ // Make sure it ends with a slash.
+ if (len == 0 || pathname[len - 1] != '/') {
+ pathname[len] = '/';
+ pathname[++len] = 0;
+ }
+
+ // Determine starting point for initial / search.
+ char *next = pathname;
+ if (pathname[0] == '/' && pathname[1] == '/') {
+ // Skip host name.
+ next = strchr(pathname+2, '/');
+ if (next == NULL)
+ return PathMsg(ErrMsg, pathname, "badly formed remote directory");
+
+ // Skip share name.
+ next = strchr(next+1, '/');
+ if (next == NULL)
+ return PathMsg(ErrMsg, pathname,"badly formed remote directory");
+
+ next++;
+ if (*next == 0)
+ return PathMsg(ErrMsg, pathname, "badly formed remote directory");
+
+ } else {
+ if (pathname[1] == ':')
+ next += 2; // skip drive letter
+ if (*next == '/')
+ next++; // skip root directory
+ }
+
+ // If we're supposed to create intermediate directories
+ if (create_parents) {
+ // Loop through the directory components until we're done
+ while (*next) {
+ next = strchr(next, '/');
+ *next = 0;
+ if (!CreateDirectory(pathname, NULL))
+ return MakeErrMsg(ErrMsg,
+ std::string(pathname) + ": Can't create directory: ");
+ *next++ = '/';
+ }
+ } else {
+ // Drop trailing slash.
+ pathname[len-1] = 0;
+ if (!CreateDirectory(pathname, NULL)) {
+ return MakeErrMsg(ErrMsg, std::string(pathname) + ": Can't create directory: ");
+ }
+ }
+ return false;
+}
+
+bool
+Path::createFileOnDisk(std::string* ErrMsg) {
+ // Create the file
+ HANDLE h = CreateFile(path.c_str(), GENERIC_WRITE, 0, NULL, CREATE_NEW,
+ FILE_ATTRIBUTE_NORMAL, NULL);
+ if (h == INVALID_HANDLE_VALUE)
+ return MakeErrMsg(ErrMsg, path + ": Can't create file: ");
+
+ CloseHandle(h);
+ return false;
+}
+
+bool
+Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
+ WIN32_FILE_ATTRIBUTE_DATA fi;
+ if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi))
+ return true;
+
+ if (fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
+ // If it doesn't exist, we're done.
+ if (!exists())
+ return false;
+
+ char *pathname = reinterpret_cast<char *>(_alloca(path.length()+3));
+ int lastchar = path.length() - 1 ;
+ path.copy(pathname, lastchar+1);
+
+ // Make path end with '/*'.
+ if (pathname[lastchar] != '/')
+ pathname[++lastchar] = '/';
+ pathname[lastchar+1] = '*';
+ pathname[lastchar+2] = 0;
+
+ if (remove_contents) {
+ WIN32_FIND_DATA fd;
+ HANDLE h = FindFirstFile(pathname, &fd);
+
+ // It's a bad idea to alter the contents of a directory while enumerating
+ // its contents. So build a list of its contents first, then destroy them.
+
+ if (h != INVALID_HANDLE_VALUE) {
+ std::vector<Path> list;
+
+ do {
+ if (strcmp(fd.cFileName, ".") == 0)
+ continue;
+ if (strcmp(fd.cFileName, "..") == 0)
+ continue;
+
+ Path aPath(path);
+ aPath.appendComponent(&fd.cFileName[0]);
+ list.push_back(aPath);
+ } while (FindNextFile(h, &fd));
+
+ DWORD err = GetLastError();
+ FindClose(h);
+ if (err != ERROR_NO_MORE_FILES) {
+ SetLastError(err);
+ return MakeErrMsg(ErrStr, path + ": Can't read directory: ");
+ }
+
+ for (std::vector<Path>::iterator I = list.begin(); I != list.end();
+ ++I) {
+ Path &aPath = *I;
+ aPath.eraseFromDisk(true);
+ }
+ } else {
+ if (GetLastError() != ERROR_FILE_NOT_FOUND)
+ return MakeErrMsg(ErrStr, path + ": Can't read directory: ");
+ }
+ }
+
+ pathname[lastchar] = 0;
+ if (!RemoveDirectory(pathname))
+ return MakeErrMsg(ErrStr,
+ std::string(pathname) + ": Can't destroy directory: ");
+ return false;
+ } else {
+ // Read-only files cannot be deleted on Windows. Must remove the read-only
+ // attribute first.
+ if (fi.dwFileAttributes & FILE_ATTRIBUTE_READONLY) {
+ if (!SetFileAttributes(path.c_str(),
+ fi.dwFileAttributes & ~FILE_ATTRIBUTE_READONLY))
+ return MakeErrMsg(ErrStr, path + ": Can't destroy file: ");
+ }
+
+ if (!DeleteFile(path.c_str()))
+ return MakeErrMsg(ErrStr, path + ": Can't destroy file: ");
+ return false;
+ }
+}
+
+bool Path::getMagicNumber(std::string& Magic, unsigned len) const {
+ assert(len < 1024 && "Request for magic string too long");
+ char* buf = (char*) alloca(1 + len);
+
+ HANDLE h = CreateFile(path.c_str(),
+ GENERIC_READ,
+ FILE_SHARE_READ,
+ NULL,
+ OPEN_EXISTING,
+ FILE_ATTRIBUTE_NORMAL,
+ NULL);
+ if (h == INVALID_HANDLE_VALUE)
+ return false;
+
+ DWORD nRead = 0;
+ BOOL ret = ReadFile(h, buf, len, &nRead, NULL);
+ CloseHandle(h);
+
+ if (!ret || nRead != len)
+ return false;
+
+ buf[len] = '\0';
+ Magic = buf;
+ return true;
+}
+
+bool
+Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) {
+ if (!MoveFileEx(path.c_str(), newName.c_str(), MOVEFILE_REPLACE_EXISTING))
+ return MakeErrMsg(ErrMsg, "Can't move '" + path + "' to '" + newName.path
+ + "': ");
+ return false;
+}
+
+bool
+Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrMsg) const {
+ // FIXME: should work on directories also.
+ if (!si.isFile) {
+ return true;
+ }
+
+ HANDLE h = CreateFile(path.c_str(),
+ FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES,
+ FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
+ NULL,
+ OPEN_EXISTING,
+ FILE_ATTRIBUTE_NORMAL,
+ NULL);
+ if (h == INVALID_HANDLE_VALUE)
+ return true;
+
+ BY_HANDLE_FILE_INFORMATION bhfi;
+ if (!GetFileInformationByHandle(h, &bhfi)) {
+ DWORD err = GetLastError();
+ CloseHandle(h);
+ SetLastError(err);
+ return MakeErrMsg(ErrMsg, path + ": GetFileInformationByHandle: ");
+ }
+
+ FILETIME ft;
+ (uint64_t&)ft = si.modTime.toWin32Time();
+ BOOL ret = SetFileTime(h, NULL, &ft, &ft);
+ DWORD err = GetLastError();
+ CloseHandle(h);
+ if (!ret) {
+ SetLastError(err);
+ return MakeErrMsg(ErrMsg, path + ": SetFileTime: ");
+ }
+
+ // Best we can do with Unix permission bits is to interpret the owner
+ // writable bit.
+ if (si.mode & 0200) {
+ if (bhfi.dwFileAttributes & FILE_ATTRIBUTE_READONLY) {
+ if (!SetFileAttributes(path.c_str(),
+ bhfi.dwFileAttributes & ~FILE_ATTRIBUTE_READONLY))
+ return MakeErrMsg(ErrMsg, path + ": SetFileAttributes: ");
+ }
+ } else {
+ if (!(bhfi.dwFileAttributes & FILE_ATTRIBUTE_READONLY)) {
+ if (!SetFileAttributes(path.c_str(),
+ bhfi.dwFileAttributes | FILE_ATTRIBUTE_READONLY))
+ return MakeErrMsg(ErrMsg, path + ": SetFileAttributes: ");
+ }
+ }
+
+ return false;
+}
+
+bool
+CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg) {
+ // Can't use CopyFile macro defined in Windows.h because it would mess up the
+ // above line. We use the expansion it would have in a non-UNICODE build.
+ if (!::CopyFileA(Src.c_str(), Dest.c_str(), false))
+ return MakeErrMsg(ErrMsg, "Can't copy '" + Src.toString() +
+ "' to '" + Dest.toString() + "': ");
+ return false;
+}
+
+bool
+Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
+ if (reuse_current && !exists())
+ return false; // File doesn't exist already, just use it!
+
+ // Reserve space for -XXXXXX at the end.
+ char *FNBuffer = (char*) alloca(path.size()+8);
+ unsigned offset = path.size();
+ path.copy(FNBuffer, offset);
+
+ // Find a numeric suffix that isn't used by an existing file. Assume there
+ // won't be more than 1 million files with the same prefix. Probably a safe
+ // bet.
+ static unsigned FCounter = 0;
+ do {
+ sprintf(FNBuffer+offset, "-%06u", FCounter);
+ if (++FCounter > 999999)
+ FCounter = 0;
+ path = FNBuffer;
+ } while (exists());
+ return false;
+}
+
+bool
+Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) {
+ // Make this into a unique file name
+ makeUnique(reuse_current, ErrMsg);
+
+ // Now go and create it
+ HANDLE h = CreateFile(path.c_str(), GENERIC_WRITE, 0, NULL, CREATE_NEW,
+ FILE_ATTRIBUTE_NORMAL, NULL);
+ if (h == INVALID_HANDLE_VALUE)
+ return MakeErrMsg(ErrMsg, path + ": can't create file");
+
+ CloseHandle(h);
+ return false;
+}
+
+/// MapInFilePages - Not yet implemented on win32.
+const char *Path::MapInFilePages(int FD, uint64_t FileSize) {
+ return 0;
+}
+
+/// MapInFilePages - Not yet implemented on win32.
+void Path::UnMapFilePages(const char *Base, uint64_t FileSize) {
+ assert(0 && "NOT IMPLEMENTED");
+}
+
+}
+}
diff --git a/lib/System/Win32/Process.inc b/lib/System/Win32/Process.inc
new file mode 100644
index 0000000..e1d7a92
--- /dev/null
+++ b/lib/System/Win32/Process.inc
@@ -0,0 +1,150 @@
+//===- Win32/Process.cpp - Win32 Process Implementation ------- -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Process class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Win32.h"
+#include <psapi.h>
+#include <malloc.h>
+#include <io.h>
+
+#ifdef __MINGW32__
+ #if (HAVE_LIBPSAPI != 1)
+ #error "libpsapi.a should be present"
+ #endif
+#else
+ #pragma comment(lib, "psapi.lib")
+#endif
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//=== and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+#ifdef __MINGW32__
+// This ban should be lifted when MinGW 1.0+ has defined this value.
+# define _HEAPOK (-2)
+#endif
+
+namespace llvm {
+using namespace sys;
+
+// This function retrieves the page size using GetSystemInfo and is present
+// solely so it can be called once in Process::GetPageSize to initialize the
+// static variable PageSize.
+inline unsigned GetPageSizeOnce() {
+ // NOTE: A 32-bit application running under WOW64 is supposed to use
+ // GetNativeSystemInfo. However, this interface is not present prior
+ // to Windows XP so to use it requires dynamic linking. It is not clear
+ // how this affects the reported page size, if at all. One could argue
+ // that LLVM ought to run as 64-bits on a 64-bit system, anyway.
+ SYSTEM_INFO info;
+ GetSystemInfo(&info);
+ return static_cast<unsigned>(info.dwPageSize);
+}
+
+unsigned
+Process::GetPageSize() {
+ static const unsigned PageSize = GetPageSizeOnce();
+ return PageSize;
+}
+
+size_t
+Process::GetMallocUsage()
+{
+ _HEAPINFO hinfo;
+ hinfo._pentry = NULL;
+
+ size_t size = 0;
+
+ while (_heapwalk(&hinfo) == _HEAPOK)
+ size += hinfo._size;
+
+ return size;
+}
+
+size_t
+Process::GetTotalMemoryUsage()
+{
+ PROCESS_MEMORY_COUNTERS pmc;
+ GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc));
+ return pmc.PagefileUsage;
+}
+
+void
+Process::GetTimeUsage(
+ TimeValue& elapsed, TimeValue& user_time, TimeValue& sys_time)
+{
+ elapsed = TimeValue::now();
+
+ uint64_t ProcCreate, ProcExit, KernelTime, UserTime;
+ GetProcessTimes(GetCurrentProcess(), (FILETIME*)&ProcCreate,
+ (FILETIME*)&ProcExit, (FILETIME*)&KernelTime,
+ (FILETIME*)&UserTime);
+
+ // FILETIME's are # of 100 nanosecond ticks (1/10th of a microsecond)
+ user_time.seconds( UserTime / 10000000 );
+ user_time.nanoseconds( unsigned(UserTime % 10000000) * 100 );
+ sys_time.seconds( KernelTime / 10000000 );
+ sys_time.nanoseconds( unsigned(KernelTime % 10000000) * 100 );
+}
+
+int Process::GetCurrentUserId()
+{
+ return 65536;
+}
+
+int Process::GetCurrentGroupId()
+{
+ return 65536;
+}
+
+// Some LLVM programs such as bugpoint produce core files as a normal part of
+// their operation. To prevent the disk from filling up, this configuration item
+// does what's necessary to prevent their generation.
+void Process::PreventCoreFiles() {
+ // Windows doesn't do core files, but it does do modal pop-up message
+ // boxes. As this method is used by bugpoint, preventing these pop-ups
+ // is the moral equivalent of suppressing core files.
+ SetErrorMode(SEM_FAILCRITICALERRORS |
+ SEM_NOGPFAULTERRORBOX |
+ SEM_NOOPENFILEERRORBOX);
+}
+
+bool Process::StandardInIsUserInput() {
+ return GetFileType((HANDLE)_get_osfhandle(0)) == FILE_TYPE_CHAR;
+}
+
+bool Process::StandardOutIsDisplayed() {
+ return GetFileType((HANDLE)_get_osfhandle(1)) == FILE_TYPE_CHAR;
+}
+
+bool Process::StandardErrIsDisplayed() {
+ return GetFileType((HANDLE)_get_osfhandle(2)) == FILE_TYPE_CHAR;
+}
+
+unsigned Process::StandardOutColumns() {
+ unsigned Columns = 0;
+ CONSOLE_SCREEN_BUFFER_INFO csbi;
+ if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi))
+ Columns = csbi.dwSize.X;
+ return Columns;
+}
+
+unsigned Process::StandardErrColumns() {
+ unsigned Columns = 0;
+ CONSOLE_SCREEN_BUFFER_INFO csbi;
+ if (GetConsoleScreenBufferInfo(GetStdHandle(STD_ERROR_HANDLE), &csbi))
+ Columns = csbi.dwSize.X;
+ return Columns;
+}
+
+}
diff --git a/lib/System/Win32/Program.inc b/lib/System/Win32/Program.inc
new file mode 100644
index 0000000..49086b8
--- /dev/null
+++ b/lib/System/Win32/Program.inc
@@ -0,0 +1,316 @@
+//===- Win32/Program.cpp - Win32 Program Implementation ------- -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Program class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Win32.h"
+#include <cstdio>
+#include <malloc.h>
+#include <io.h>
+#include <fcntl.h>
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//=== and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+using namespace sys;
+
+// This function just uses the PATH environment variable to find the program.
+Path
+Program::FindProgramByName(const std::string& progName) {
+
+ // Check some degenerate cases
+ if (progName.length() == 0) // no program
+ return Path();
+ Path temp;
+ if (!temp.set(progName)) // invalid name
+ return Path();
+ if (temp.canExecute()) // already executable as is
+ return temp;
+
+ // At this point, the file name is valid and its not executable.
+ // Let Windows search for it.
+ char buffer[MAX_PATH];
+ char *dummy = NULL;
+ DWORD len = SearchPath(NULL, progName.c_str(), ".exe", MAX_PATH,
+ buffer, &dummy);
+
+ // See if it wasn't found.
+ if (len == 0)
+ return Path();
+
+ // See if we got the entire path.
+ if (len < MAX_PATH)
+ return Path(buffer);
+
+ // Buffer was too small; grow and retry.
+ while (true) {
+ char *b = reinterpret_cast<char *>(_alloca(len+1));
+ DWORD len2 = SearchPath(NULL, progName.c_str(), ".exe", len+1, b, &dummy);
+
+ // It is unlikely the search failed, but it's always possible some file
+ // was added or removed since the last search, so be paranoid...
+ if (len2 == 0)
+ return Path();
+ else if (len2 <= len)
+ return Path(b);
+
+ len = len2;
+ }
+}
+
+static HANDLE RedirectIO(const Path *path, int fd, std::string* ErrMsg) {
+ HANDLE h;
+ if (path == 0) {
+ DuplicateHandle(GetCurrentProcess(), (HANDLE)_get_osfhandle(fd),
+ GetCurrentProcess(), &h,
+ 0, TRUE, DUPLICATE_SAME_ACCESS);
+ return h;
+ }
+
+ const char *fname;
+ if (path->isEmpty())
+ fname = "NUL";
+ else
+ fname = path->toString().c_str();
+
+ SECURITY_ATTRIBUTES sa;
+ sa.nLength = sizeof(sa);
+ sa.lpSecurityDescriptor = 0;
+ sa.bInheritHandle = TRUE;
+
+ h = CreateFile(fname, fd ? GENERIC_WRITE : GENERIC_READ, FILE_SHARE_READ,
+ &sa, fd == 0 ? OPEN_EXISTING : CREATE_ALWAYS,
+ FILE_ATTRIBUTE_NORMAL, NULL);
+ if (h == INVALID_HANDLE_VALUE) {
+ MakeErrMsg(ErrMsg, std::string(fname) + ": Can't open file for " +
+ (fd ? "input: " : "output: "));
+ }
+
+ return h;
+}
+
+#ifdef __MINGW32__
+ // Due to unknown reason, mingw32's w32api doesn't have this declaration.
+ extern "C"
+ BOOL WINAPI SetInformationJobObject(HANDLE hJob,
+ JOBOBJECTINFOCLASS JobObjectInfoClass,
+ LPVOID lpJobObjectInfo,
+ DWORD cbJobObjectInfoLength);
+#endif
+
+int
+Program::ExecuteAndWait(const Path& path,
+ const char** args,
+ const char** envp,
+ const Path** redirects,
+ unsigned secondsToWait,
+ unsigned memoryLimit,
+ std::string* ErrMsg) {
+ if (!path.canExecute()) {
+ if (ErrMsg)
+ *ErrMsg = "program not executable";
+ return -1;
+ }
+
+ // Windows wants a command line, not an array of args, to pass to the new
+ // process. We have to concatenate them all, while quoting the args that
+ // have embedded spaces.
+
+ // First, determine the length of the command line.
+ unsigned len = 0;
+ for (unsigned i = 0; args[i]; i++) {
+ len += strlen(args[i]) + 1;
+ if (strchr(args[i], ' '))
+ len += 2;
+ }
+
+ // Now build the command line.
+ char *command = reinterpret_cast<char *>(_alloca(len+1));
+ char *p = command;
+
+ for (unsigned i = 0; args[i]; i++) {
+ const char *arg = args[i];
+ size_t len = strlen(arg);
+ bool needsQuoting = strchr(arg, ' ') != 0;
+ if (needsQuoting)
+ *p++ = '"';
+ memcpy(p, arg, len);
+ p += len;
+ if (needsQuoting)
+ *p++ = '"';
+ *p++ = ' ';
+ }
+
+ *p = 0;
+
+ // The pointer to the environment block for the new process.
+ char *envblock = 0;
+
+ if (envp) {
+ // An environment block consists of a null-terminated block of
+ // null-terminated strings. Convert the array of environment variables to
+ // an environment block by concatenating them.
+
+ // First, determine the length of the environment block.
+ len = 0;
+ for (unsigned i = 0; envp[i]; i++)
+ len += strlen(envp[i]) + 1;
+
+ // Now build the environment block.
+ envblock = reinterpret_cast<char *>(_alloca(len+1));
+ p = envblock;
+
+ for (unsigned i = 0; envp[i]; i++) {
+ const char *ev = envp[i];
+ size_t len = strlen(ev) + 1;
+ memcpy(p, ev, len);
+ p += len;
+ }
+
+ *p = 0;
+ }
+
+ // Create a child process.
+ STARTUPINFO si;
+ memset(&si, 0, sizeof(si));
+ si.cb = sizeof(si);
+ si.hStdInput = INVALID_HANDLE_VALUE;
+ si.hStdOutput = INVALID_HANDLE_VALUE;
+ si.hStdError = INVALID_HANDLE_VALUE;
+
+ if (redirects) {
+ si.dwFlags = STARTF_USESTDHANDLES;
+
+ si.hStdInput = RedirectIO(redirects[0], 0, ErrMsg);
+ if (si.hStdInput == INVALID_HANDLE_VALUE) {
+ MakeErrMsg(ErrMsg, "can't redirect stdin");
+ return -1;
+ }
+ si.hStdOutput = RedirectIO(redirects[1], 1, ErrMsg);
+ if (si.hStdOutput == INVALID_HANDLE_VALUE) {
+ CloseHandle(si.hStdInput);
+ MakeErrMsg(ErrMsg, "can't redirect stdout");
+ return -1;
+ }
+ if (redirects[1] && redirects[2] && *(redirects[1]) == *(redirects[2])) {
+ // If stdout and stderr should go to the same place, redirect stderr
+ // to the handle already open for stdout.
+ DuplicateHandle(GetCurrentProcess(), si.hStdOutput,
+ GetCurrentProcess(), &si.hStdError,
+ 0, TRUE, DUPLICATE_SAME_ACCESS);
+ } else {
+ // Just redirect stderr
+ si.hStdError = RedirectIO(redirects[2], 2, ErrMsg);
+ if (si.hStdError == INVALID_HANDLE_VALUE) {
+ CloseHandle(si.hStdInput);
+ CloseHandle(si.hStdOutput);
+ MakeErrMsg(ErrMsg, "can't redirect stderr");
+ return -1;
+ }
+ }
+ }
+
+ PROCESS_INFORMATION pi;
+ memset(&pi, 0, sizeof(pi));
+
+ fflush(stdout);
+ fflush(stderr);
+ BOOL rc = CreateProcess(path.c_str(), command, NULL, NULL, TRUE, 0,
+ envblock, NULL, &si, &pi);
+ DWORD err = GetLastError();
+
+ // Regardless of whether the process got created or not, we are done with
+ // the handles we created for it to inherit.
+ CloseHandle(si.hStdInput);
+ CloseHandle(si.hStdOutput);
+ CloseHandle(si.hStdError);
+
+ // Now return an error if the process didn't get created.
+ if (!rc)
+ {
+ SetLastError(err);
+ MakeErrMsg(ErrMsg, std::string("Couldn't execute program '") +
+ path.toString() + "'");
+ return -1;
+ }
+
+ // Make sure these get closed no matter what.
+ AutoHandle hProcess(pi.hProcess);
+ AutoHandle hThread(pi.hThread);
+
+ // Assign the process to a job if a memory limit is defined.
+ AutoHandle hJob(0);
+ if (memoryLimit != 0) {
+ hJob = CreateJobObject(0, 0);
+ bool success = false;
+ if (hJob != 0) {
+ JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli;
+ memset(&jeli, 0, sizeof(jeli));
+ jeli.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_PROCESS_MEMORY;
+ jeli.ProcessMemoryLimit = uintptr_t(memoryLimit) * 1048576;
+ if (SetInformationJobObject(hJob, JobObjectExtendedLimitInformation,
+ &jeli, sizeof(jeli))) {
+ if (AssignProcessToJobObject(hJob, pi.hProcess))
+ success = true;
+ }
+ }
+ if (!success) {
+ SetLastError(GetLastError());
+ MakeErrMsg(ErrMsg, std::string("Unable to set memory limit"));
+ TerminateProcess(pi.hProcess, 1);
+ WaitForSingleObject(pi.hProcess, INFINITE);
+ return -1;
+ }
+ }
+
+ // Wait for it to terminate.
+ DWORD millisecondsToWait = INFINITE;
+ if (secondsToWait > 0)
+ millisecondsToWait = secondsToWait * 1000;
+
+ if (WaitForSingleObject(pi.hProcess, millisecondsToWait) == WAIT_TIMEOUT) {
+ if (!TerminateProcess(pi.hProcess, 1)) {
+ MakeErrMsg(ErrMsg, std::string("Failed to terminate timed-out program '")
+ + path.toString() + "'");
+ return -1;
+ }
+ WaitForSingleObject(pi.hProcess, INFINITE);
+ }
+
+ // Get its exit status.
+ DWORD status;
+ rc = GetExitCodeProcess(pi.hProcess, &status);
+ err = GetLastError();
+
+ if (!rc) {
+ SetLastError(err);
+ MakeErrMsg(ErrMsg, std::string("Failed getting status for program '") +
+ path.toString() + "'");
+ return -1;
+ }
+
+ return status;
+}
+
+bool Program::ChangeStdinToBinary(){
+ int result = _setmode( _fileno(stdin), _O_BINARY );
+ return result == -1;
+}
+
+bool Program::ChangeStdoutToBinary(){
+ int result = _setmode( _fileno(stdout), _O_BINARY );
+ return result == -1;
+}
+
+}
diff --git a/lib/System/Win32/Signals.inc b/lib/System/Win32/Signals.inc
new file mode 100644
index 0000000..3a8f77e
--- /dev/null
+++ b/lib/System/Win32/Signals.inc
@@ -0,0 +1,270 @@
+//===- Win32/Signals.cpp - Win32 Signals Implementation ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Signals class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Win32.h"
+#include <stdio.h>
+#include <vector>
+#include <algorithm>
+
+#ifdef __MINGW32__
+ #include <imagehlp.h>
+#else
+ #include <dbghelp.h>
+#endif
+#include <psapi.h>
+
+#ifdef __MINGW32__
+ #if ((HAVE_LIBIMAGEHLP != 1) || (HAVE_LIBPSAPI != 1))
+ #error "libimagehlp.a & libpsapi.a should be present"
+ #endif
+#else
+ #pragma comment(lib, "psapi.lib")
+ #pragma comment(lib, "dbghelp.lib")
+#endif
+
+// Forward declare.
+static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep);
+static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType);
+
+// InterruptFunction - The function to call if ctrl-c is pressed.
+static void (*InterruptFunction)() = 0;
+
+static std::vector<llvm::sys::Path> *FilesToRemove = NULL;
+static std::vector<std::pair<void(*)(void*), void*> > *CallBacksToRun = 0;
+static bool RegisteredUnhandledExceptionFilter = false;
+static bool CleanupExecuted = false;
+static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL;
+
+// Windows creates a new thread to execute the console handler when an event
+// (such as CTRL/C) occurs. This causes concurrency issues with the above
+// globals which this critical section addresses.
+static CRITICAL_SECTION CriticalSection;
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//=== and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+
+static void RegisterHandler() {
+ if (RegisteredUnhandledExceptionFilter) {
+ EnterCriticalSection(&CriticalSection);
+ return;
+ }
+
+ // Now's the time to create the critical section. This is the first time
+ // through here, and there's only one thread.
+ InitializeCriticalSection(&CriticalSection);
+
+ // Enter it immediately. Now if someone hits CTRL/C, the console handler
+ // can't proceed until the globals are updated.
+ EnterCriticalSection(&CriticalSection);
+
+ RegisteredUnhandledExceptionFilter = true;
+ OldFilter = SetUnhandledExceptionFilter(LLVMUnhandledExceptionFilter);
+ SetConsoleCtrlHandler(LLVMConsoleCtrlHandler, TRUE);
+
+ // IMPORTANT NOTE: Caller must call LeaveCriticalSection(&CriticalSection) or
+ // else multi-threading problems will ensue.
+}
+
+// RemoveFileOnSignal - The public API
+bool sys::RemoveFileOnSignal(const sys::Path &Filename, std::string* ErrMsg) {
+ RegisterHandler();
+
+ if (CleanupExecuted) {
+ if (ErrMsg)
+ *ErrMsg = "Process terminating -- cannot register for removal";
+ return true;
+ }
+
+ if (FilesToRemove == NULL)
+ FilesToRemove = new std::vector<sys::Path>;
+
+ FilesToRemove->push_back(Filename);
+
+ LeaveCriticalSection(&CriticalSection);
+ return false;
+}
+
+/// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or
+/// SIGSEGV) is delivered to the process, print a stack trace and then exit.
+void sys::PrintStackTraceOnErrorSignal() {
+ RegisterHandler();
+ LeaveCriticalSection(&CriticalSection);
+}
+
+
+void sys::SetInterruptFunction(void (*IF)()) {
+ RegisterHandler();
+ InterruptFunction = IF;
+ LeaveCriticalSection(&CriticalSection);
+}
+
+
+/// AddSignalHandler - Add a function to be called when a signal is delivered
+/// to the process. The handler can have a cookie passed to it to identify
+/// what instance of the handler it is.
+void sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
+ if (CallBacksToRun == 0)
+ CallBacksToRun = new std::vector<std::pair<void(*)(void*), void*> >();
+ CallBacksToRun->push_back(std::make_pair(FnPtr, Cookie));
+ RegisterHandler();
+}
+}
+
+static void Cleanup() {
+ EnterCriticalSection(&CriticalSection);
+
+ // Prevent other thread from registering new files and directories for
+ // removal, should we be executing because of the console handler callback.
+ CleanupExecuted = true;
+
+ // FIXME: open files cannot be deleted.
+
+ if (FilesToRemove != NULL)
+ while (!FilesToRemove->empty()) {
+ try {
+ FilesToRemove->back().eraseFromDisk();
+ } catch (...) {
+ }
+ FilesToRemove->pop_back();
+ }
+
+ if (CallBacksToRun)
+ for (unsigned i = 0, e = CallBacksToRun->size(); i != e; ++i)
+ (*CallBacksToRun)[i].first((*CallBacksToRun)[i].second);
+
+ LeaveCriticalSection(&CriticalSection);
+}
+
+static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
+ try {
+ Cleanup();
+
+#ifdef _WIN64
+ // TODO: provide a x64 friendly version of the following
+#else
+
+ // Initialize the STACKFRAME structure.
+ STACKFRAME StackFrame;
+ memset(&StackFrame, 0, sizeof(StackFrame));
+
+ StackFrame.AddrPC.Offset = ep->ContextRecord->Eip;
+ StackFrame.AddrPC.Mode = AddrModeFlat;
+ StackFrame.AddrStack.Offset = ep->ContextRecord->Esp;
+ StackFrame.AddrStack.Mode = AddrModeFlat;
+ StackFrame.AddrFrame.Offset = ep->ContextRecord->Ebp;
+ StackFrame.AddrFrame.Mode = AddrModeFlat;
+
+ HANDLE hProcess = GetCurrentProcess();
+ HANDLE hThread = GetCurrentThread();
+
+ // Initialize the symbol handler.
+ SymSetOptions(SYMOPT_DEFERRED_LOADS|SYMOPT_LOAD_LINES);
+ SymInitialize(hProcess, NULL, TRUE);
+
+ while (true) {
+ if (!StackWalk(IMAGE_FILE_MACHINE_I386, hProcess, hThread, &StackFrame,
+ ep->ContextRecord, NULL, SymFunctionTableAccess,
+ SymGetModuleBase, NULL)) {
+ break;
+ }
+
+ if (StackFrame.AddrFrame.Offset == 0)
+ break;
+
+ // Print the PC in hexadecimal.
+ DWORD PC = StackFrame.AddrPC.Offset;
+ fprintf(stderr, "%08lX", PC);
+
+ // Print the parameters. Assume there are four.
+ fprintf(stderr, " (0x%08lX 0x%08lX 0x%08lX 0x%08lX)", StackFrame.Params[0],
+ StackFrame.Params[1], StackFrame.Params[2], StackFrame.Params[3]);
+
+ // Verify the PC belongs to a module in this process.
+ if (!SymGetModuleBase(hProcess, PC)) {
+ fputs(" <unknown module>\n", stderr);
+ continue;
+ }
+
+ // Print the symbol name.
+ char buffer[512];
+ IMAGEHLP_SYMBOL *symbol = reinterpret_cast<IMAGEHLP_SYMBOL *>(buffer);
+ memset(symbol, 0, sizeof(IMAGEHLP_SYMBOL));
+ symbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL);
+ symbol->MaxNameLength = 512 - sizeof(IMAGEHLP_SYMBOL);
+
+ DWORD dwDisp;
+ if (!SymGetSymFromAddr(hProcess, PC, &dwDisp, symbol)) {
+ fputc('\n', stderr);
+ continue;
+ }
+
+ buffer[511] = 0;
+ if (dwDisp > 0)
+ fprintf(stderr, ", %s()+%04lu bytes(s)", symbol->Name, dwDisp);
+ else
+ fprintf(stderr, ", %s", symbol->Name);
+
+ // Print the source file and line number information.
+ IMAGEHLP_LINE line;
+ memset(&line, 0, sizeof(line));
+ line.SizeOfStruct = sizeof(line);
+ if (SymGetLineFromAddr(hProcess, PC, &dwDisp, &line)) {
+ fprintf(stderr, ", %s, line %lu", line.FileName, line.LineNumber);
+ if (dwDisp > 0)
+ fprintf(stderr, "+%04lu byte(s)", dwDisp);
+ }
+
+ fputc('\n', stderr);
+ }
+
+#endif
+
+ } catch (...) {
+ assert(0 && "Crashed in LLVMUnhandledExceptionFilter");
+ }
+
+ // Allow dialog box to pop up allowing choice to start debugger.
+ if (OldFilter)
+ return (*OldFilter)(ep);
+ else
+ return EXCEPTION_CONTINUE_SEARCH;
+}
+
+static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType) {
+ // We are running in our very own thread, courtesy of Windows.
+ EnterCriticalSection(&CriticalSection);
+ Cleanup();
+
+ // If an interrupt function has been set, go and run one it; otherwise,
+ // the process dies.
+ void (*IF)() = InterruptFunction;
+ InterruptFunction = 0; // Don't run it on another CTRL-C.
+
+ if (IF) {
+ // Note: if the interrupt function throws an exception, there is nothing
+ // to catch it in this thread so it will kill the process.
+ IF(); // Run it now.
+ LeaveCriticalSection(&CriticalSection);
+ return TRUE; // Don't kill the process.
+ }
+
+ // Allow normal processing to take place; i.e., the process dies.
+ LeaveCriticalSection(&CriticalSection);
+ return FALSE;
+}
+
diff --git a/lib/System/Win32/TimeValue.inc b/lib/System/Win32/TimeValue.inc
new file mode 100644
index 0000000..0ca87d4
--- /dev/null
+++ b/lib/System/Win32/TimeValue.inc
@@ -0,0 +1,51 @@
+//===- Win32/TimeValue.cpp - Win32 TimeValue Implementation -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 implementation of the TimeValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Win32.h"
+#include <time.h>
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code.
+//===----------------------------------------------------------------------===//
+
+TimeValue TimeValue::now() {
+ uint64_t ft;
+ GetSystemTimeAsFileTime(reinterpret_cast<FILETIME *>(&ft));
+
+ TimeValue t(0, 0);
+ t.fromWin32Time(ft);
+ return t;
+}
+
+std::string TimeValue::toString() const {
+#ifdef __MINGW32__
+ // This ban may be lifted by either:
+ // (i) a future MinGW version other than 1.0 inherents the __time64_t type, or
+ // (ii) configure tests for either the time_t or __time64_t type.
+ time_t ourTime = time_t(this->toEpochTime());
+ struct tm *lt = ::localtime(&ourTime);
+#else
+ __time64_t ourTime = this->toEpochTime();
+ struct tm *lt = ::_localtime64(&ourTime);
+#endif
+
+ char buffer[25];
+ strftime(buffer, 25, "%a %b %d %H:%M:%S %Y", lt);
+ return std::string(buffer);
+}
+
+
+}
diff --git a/lib/System/Win32/Win32.h b/lib/System/Win32/Win32.h
new file mode 100644
index 0000000..8f505b1
--- /dev/null
+++ b/lib/System/Win32/Win32.h
@@ -0,0 +1,57 @@
+//===- Win32/Win32.h - Common Win32 Include File ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines things specific to Win32 implementations.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//=== is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+// Require at least Windows 2000 API.
+#define _WIN32_WINNT 0x0500
+
+#include "llvm/Config/config.h" // Get autoconf configuration settings
+#include "windows.h"
+#include <cassert>
+#include <string>
+
+inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
+ if (!ErrMsg)
+ return true;
+ char *buffer = NULL;
+ FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM,
+ NULL, GetLastError(), 0, (LPSTR)&buffer, 1, NULL);
+ *ErrMsg = prefix + buffer;
+ LocalFree(buffer);
+ return true;
+}
+
+class AutoHandle {
+ HANDLE handle;
+
+public:
+ AutoHandle(HANDLE h) : handle(h) {}
+
+ ~AutoHandle() {
+ if (handle)
+ CloseHandle(handle);
+ }
+
+ operator HANDLE() {
+ return handle;
+ }
+
+ AutoHandle &operator=(HANDLE h) {
+ handle = h;
+ return *this;
+ }
+};
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
new file mode 100644
index 0000000..ac7de91
--- /dev/null
+++ b/lib/Target/ARM/ARM.h
@@ -0,0 +1,121 @@
+//===-- ARM.h - Top-level interface for ARM representation---- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// ARM back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_ARM_H
+#define TARGET_ARM_H
+
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+
+namespace llvm {
+
+class ARMTargetMachine;
+class FunctionPass;
+class MachineCodeEmitter;
+class JITCodeEmitter;
+class raw_ostream;
+
+// Enums corresponding to ARM condition codes
+namespace ARMCC {
+ // The CondCodes constants map directly to the 4-bit encoding of the
+ // condition field for predicated instructions.
+ enum CondCodes {
+ EQ,
+ NE,
+ HS,
+ LO,
+ MI,
+ PL,
+ VS,
+ VC,
+ HI,
+ LS,
+ GE,
+ LT,
+ GT,
+ LE,
+ AL
+ };
+
+ inline static CondCodes getOppositeCondition(CondCodes CC){
+ switch (CC) {
+ default: assert(0 && "Unknown condition code");
+ case EQ: return NE;
+ case NE: return EQ;
+ case HS: return LO;
+ case LO: return HS;
+ case MI: return PL;
+ case PL: return MI;
+ case VS: return VC;
+ case VC: return VS;
+ case HI: return LS;
+ case LS: return HI;
+ case GE: return LT;
+ case LT: return GE;
+ case GT: return LE;
+ case LE: return GT;
+ }
+ }
+}
+
+inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown condition code");
+ case ARMCC::EQ: return "eq";
+ case ARMCC::NE: return "ne";
+ case ARMCC::HS: return "hs";
+ case ARMCC::LO: return "lo";
+ case ARMCC::MI: return "mi";
+ case ARMCC::PL: return "pl";
+ case ARMCC::VS: return "vs";
+ case ARMCC::VC: return "vc";
+ case ARMCC::HI: return "hi";
+ case ARMCC::LS: return "ls";
+ case ARMCC::GE: return "ge";
+ case ARMCC::LT: return "lt";
+ case ARMCC::GT: return "gt";
+ case ARMCC::LE: return "le";
+ case ARMCC::AL: return "al";
+ }
+}
+
+FunctionPass *createARMISelDag(ARMTargetMachine &TM);
+FunctionPass *createARMCodePrinterPass(raw_ostream &O,
+ ARMTargetMachine &TM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose);
+FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM,
+ MachineCodeEmitter &MCE);
+
+FunctionPass *createARMCodeEmitterPass( ARMTargetMachine &TM,
+ MachineCodeEmitter &MCE);
+FunctionPass *createARMJITCodeEmitterPass( ARMTargetMachine &TM,
+ JITCodeEmitter &JCE);
+
+FunctionPass *createARMLoadStoreOptimizationPass();
+FunctionPass *createARMConstantIslandPass();
+
+} // end namespace llvm;
+
+// Defines symbolic names for ARM registers. This defines a mapping from
+// register name to register number.
+//
+#include "ARMGenRegisterNames.inc"
+
+// Defines symbolic names for the ARM instructions.
+//
+#include "ARMGenInstrNames.inc"
+
+
+#endif
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
new file mode 100644
index 0000000..4ac6857
--- /dev/null
+++ b/lib/Target/ARM/ARM.td
@@ -0,0 +1,136 @@
+//===- ARM.td - Describe the ARM Target Machine -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// ARM Subtarget features.
+//
+
+def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
+ "ARM v4T">;
+def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T",
+ "ARM v5T">;
+def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE",
+ "ARM v5TE, v5TEj, v5TExp">;
+def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6",
+ "ARM v6">;
+def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
+ "ARM v7A">;
+def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2",
+ "Enable VFP2 instructions">;
+def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3",
+ "Enable VFP3 instructions">;
+def FeatureNEON : SubtargetFeature<"neon", "ARMFPUType", "NEON",
+ "Enable NEON instructions">;
+def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2",
+ "Enable Thumb2 instructions">;
+
+//===----------------------------------------------------------------------===//
+// ARM Processors supported.
+//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+// V4 Processors.
+def : Proc<"generic", []>;
+def : Proc<"arm8", []>;
+def : Proc<"arm810", []>;
+def : Proc<"strongarm", []>;
+def : Proc<"strongarm110", []>;
+def : Proc<"strongarm1100", []>;
+def : Proc<"strongarm1110", []>;
+
+// V4T Processors.
+def : Proc<"arm7tdmi", [ArchV4T]>;
+def : Proc<"arm7tdmi-s", [ArchV4T]>;
+def : Proc<"arm710t", [ArchV4T]>;
+def : Proc<"arm720t", [ArchV4T]>;
+def : Proc<"arm9", [ArchV4T]>;
+def : Proc<"arm9tdmi", [ArchV4T]>;
+def : Proc<"arm920", [ArchV4T]>;
+def : Proc<"arm920t", [ArchV4T]>;
+def : Proc<"arm922t", [ArchV4T]>;
+def : Proc<"arm940t", [ArchV4T]>;
+def : Proc<"ep9312", [ArchV4T]>;
+
+// V5T Processors.
+def : Proc<"arm10tdmi", [ArchV5T]>;
+def : Proc<"arm1020t", [ArchV5T]>;
+
+// V5TE Processors.
+def : Proc<"arm9e", [ArchV5TE]>;
+def : Proc<"arm926ej-s", [ArchV5TE]>;
+def : Proc<"arm946e-s", [ArchV5TE]>;
+def : Proc<"arm966e-s", [ArchV5TE]>;
+def : Proc<"arm968e-s", [ArchV5TE]>;
+def : Proc<"arm10e", [ArchV5TE]>;
+def : Proc<"arm1020e", [ArchV5TE]>;
+def : Proc<"arm1022e", [ArchV5TE]>;
+def : Proc<"xscale", [ArchV5TE]>;
+def : Proc<"iwmmxt", [ArchV5TE]>;
+
+// V6 Processors.
+def : Proc<"arm1136j-s", [ArchV6]>;
+def : Proc<"arm1136jf-s", [ArchV6, FeatureVFP2]>;
+def : Proc<"arm1176jz-s", [ArchV6]>;
+def : Proc<"arm1176jzf-s", [ArchV6, FeatureVFP2]>;
+def : Proc<"mpcorenovfp", [ArchV6]>;
+def : Proc<"mpcore", [ArchV6, FeatureVFP2]>;
+
+def : Proc<"arm1156t2-s", [ArchV6, FeatureThumb2]>;
+def : Proc<"arm1156t2f-s", [ArchV6, FeatureThumb2, FeatureVFP2]>;
+
+def : Proc<"cortex-a8", [ArchV7A, FeatureThumb2, FeatureNEON]>;
+def : Proc<"cortex-a9", [ArchV7A, FeatureThumb2, FeatureNEON]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "ARMRegisterInfo.td"
+
+include "ARMCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "ARMInstrInfo.td"
+
+def ARMInstrInfo : InstrInfo {
+ // Define how we want to layout our target-specific information field.
+ let TSFlagsFields = ["AddrModeBits",
+ "SizeFlag",
+ "IndexModeBits",
+ "isUnaryDataProc",
+ "Form"];
+ let TSFlagsShifts = [0,
+ 4,
+ 7,
+ 9,
+ 10];
+}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def ARM : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = ARMInstrInfo;
+}
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
new file mode 100644
index 0000000..6d9b9ee
--- /dev/null
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -0,0 +1,394 @@
+//===- ARMAddressingModes.h - ARM Addressing Modes --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM addressing mode implementation stuff.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+#define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+
+namespace llvm {
+
+/// ARM_AM - ARM Addressing Mode Stuff
+namespace ARM_AM {
+ enum ShiftOpc {
+ no_shift = 0,
+ asr,
+ lsl,
+ lsr,
+ ror,
+ rrx
+ };
+
+ enum AddrOpc {
+ add = '+', sub = '-'
+ };
+
+ static inline const char *getShiftOpcStr(ShiftOpc Op) {
+ switch (Op) {
+ default: assert(0 && "Unknown shift opc!");
+ case ARM_AM::asr: return "asr";
+ case ARM_AM::lsl: return "lsl";
+ case ARM_AM::lsr: return "lsr";
+ case ARM_AM::ror: return "ror";
+ case ARM_AM::rrx: return "rrx";
+ }
+ }
+
+ static inline ShiftOpc getShiftOpcForNode(SDValue N) {
+ switch (N.getOpcode()) {
+ default: return ARM_AM::no_shift;
+ case ISD::SHL: return ARM_AM::lsl;
+ case ISD::SRL: return ARM_AM::lsr;
+ case ISD::SRA: return ARM_AM::asr;
+ case ISD::ROTR: return ARM_AM::ror;
+ //case ISD::ROTL: // Only if imm -> turn into ROTR.
+ // Can't handle RRX here, because it would require folding a flag into
+ // the addressing mode. :( This causes us to miss certain things.
+ //case ARMISD::RRX: return ARM_AM::rrx;
+ }
+ }
+
+ enum AMSubMode {
+ bad_am_submode = 0,
+ ia,
+ ib,
+ da,
+ db
+ };
+
+ static inline const char *getAMSubModeStr(AMSubMode Mode) {
+ switch (Mode) {
+ default: assert(0 && "Unknown addressing sub-mode!");
+ case ARM_AM::ia: return "ia";
+ case ARM_AM::ib: return "ib";
+ case ARM_AM::da: return "da";
+ case ARM_AM::db: return "db";
+ }
+ }
+
+ static inline const char *getAMSubModeAltStr(AMSubMode Mode, bool isLD) {
+ switch (Mode) {
+ default: assert(0 && "Unknown addressing sub-mode!");
+ case ARM_AM::ia: return isLD ? "fd" : "ea";
+ case ARM_AM::ib: return isLD ? "ed" : "fa";
+ case ARM_AM::da: return isLD ? "fa" : "ed";
+ case ARM_AM::db: return isLD ? "ea" : "fd";
+ }
+ }
+
+ /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
+ ///
+ static inline unsigned rotr32(unsigned Val, unsigned Amt) {
+ assert(Amt < 32 && "Invalid rotate amount");
+ return (Val >> Amt) | (Val << ((32-Amt)&31));
+ }
+
+ /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
+ ///
+ static inline unsigned rotl32(unsigned Val, unsigned Amt) {
+ assert(Amt < 32 && "Invalid rotate amount");
+ return (Val << Amt) | (Val >> ((32-Amt)&31));
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #1: shift_operand with registers
+ //===--------------------------------------------------------------------===//
+ //
+ // This 'addressing mode' is used for arithmetic instructions. It can
+ // represent things like:
+ // reg
+ // reg [asr|lsl|lsr|ror|rrx] reg
+ // reg [asr|lsl|lsr|ror|rrx] imm
+ //
+ // This is stored three operands [rega, regb, opc]. The first is the base
+ // reg, the second is the shift amount (or reg0 if not present or imm). The
+ // third operand encodes the shift opcode and the imm if a reg isn't present.
+ //
+ static inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) {
+ return ShOp | (Imm << 3);
+ }
+ static inline unsigned getSORegOffset(unsigned Op) {
+ return Op >> 3;
+ }
+ static inline ShiftOpc getSORegShOp(unsigned Op) {
+ return (ShiftOpc)(Op & 7);
+ }
+
+ /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return
+ /// the 8-bit imm value.
+ static inline unsigned getSOImmValImm(unsigned Imm) {
+ return Imm & 0xFF;
+ }
+ /// getSOImmValRot - Given an encoded imm field for the reg/imm form, return
+ /// the rotate amount.
+ static inline unsigned getSOImmValRot(unsigned Imm) {
+ return (Imm >> 8) * 2;
+ }
+
+ /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand,
+ /// computing the rotate amount to use. If this immediate value cannot be
+ /// handled with a single shifter-op, determine a good rotate amount that will
+ /// take a maximal chunk of bits out of the immediate.
+ static inline unsigned getSOImmValRotate(unsigned Imm) {
+ // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+ // of zero.
+ if ((Imm & ~255U) == 0) return 0;
+
+ // Use CTZ to compute the rotate amount.
+ unsigned TZ = CountTrailingZeros_32(Imm);
+
+ // Rotate amount must be even. Something like 0x200 must be rotated 8 bits,
+ // not 9.
+ unsigned RotAmt = TZ & ~1;
+
+ // If we can handle this spread, return it.
+ if ((rotr32(Imm, RotAmt) & ~255U) == 0)
+ return (32-RotAmt)&31; // HW rotates right, not left.
+
+ // For values like 0xF000000F, we should skip the first run of ones, then
+ // retry the hunt.
+ if (Imm & 1) {
+ unsigned TrailingOnes = CountTrailingZeros_32(~Imm);
+ if (TrailingOnes != 32) { // Avoid overflow on 0xFFFFFFFF
+ // Restart the search for a high-order bit after the initial seconds of
+ // ones.
+ unsigned TZ2 = CountTrailingZeros_32(Imm & ~((1 << TrailingOnes)-1));
+
+ // Rotate amount must be even.
+ unsigned RotAmt2 = TZ2 & ~1;
+
+ // If this fits, use it.
+ if (RotAmt2 != 32 && (rotr32(Imm, RotAmt2) & ~255U) == 0)
+ return (32-RotAmt2)&31; // HW rotates right, not left.
+ }
+ }
+
+ // Otherwise, we have no way to cover this span of bits with a single
+ // shifter_op immediate. Return a chunk of bits that will be useful to
+ // handle.
+ return (32-RotAmt)&31; // HW rotates right, not left.
+ }
+
+ /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit
+ /// into an shifter_operand immediate operand, return the 12-bit encoding for
+ /// it. If not, return -1.
+ static inline int getSOImmVal(unsigned Arg) {
+ // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+ // of zero.
+ if ((Arg & ~255U) == 0) return Arg;
+
+ unsigned RotAmt = getSOImmValRotate(Arg);
+
+ // If this cannot be handled with a single shifter_op, bail out.
+ if (rotr32(~255U, RotAmt) & Arg)
+ return -1;
+
+ // Encode this correctly.
+ return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
+ }
+
+ /// isSOImmTwoPartVal - Return true if the specified value can be obtained by
+ /// or'ing together two SOImmVal's.
+ static inline bool isSOImmTwoPartVal(unsigned V) {
+ // If this can be handled with a single shifter_op, bail out.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+ if (V == 0)
+ return false;
+
+ // If this can be handled with two shifter_op's, accept.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+ return V == 0;
+ }
+
+ /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
+ /// return the first chunk of it.
+ static inline unsigned getSOImmTwoPartFirst(unsigned V) {
+ return rotr32(255U, getSOImmValRotate(V)) & V;
+ }
+
+ /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
+ /// return the second chunk of it.
+ static inline unsigned getSOImmTwoPartSecond(unsigned V) {
+ // Mask out the first hunk.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+
+ // Take what's left.
+ assert(V == (rotr32(255U, getSOImmValRotate(V)) & V));
+ return V;
+ }
+
+ /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
+ /// by a left shift. Returns the shift amount to use.
+ static inline unsigned getThumbImmValShift(unsigned Imm) {
+ // 8-bit (or less) immediates are trivially immediate operand with a shift
+ // of zero.
+ if ((Imm & ~255U) == 0) return 0;
+
+ // Use CTZ to compute the shift amount.
+ return CountTrailingZeros_32(Imm);
+ }
+
+ /// isThumbImmShiftedVal - Return true if the specified value can be obtained
+ /// by left shifting a 8-bit immediate.
+ static inline bool isThumbImmShiftedVal(unsigned V) {
+ // If this can be handled with
+ V = (~255U << getThumbImmValShift(V)) & V;
+ return V == 0;
+ }
+
+ /// getThumbImmNonShiftedVal - If V is a value that satisfies
+ /// isThumbImmShiftedVal, return the non-shiftd value.
+ static inline unsigned getThumbImmNonShiftedVal(unsigned V) {
+ return V >> getThumbImmValShift(V);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #2
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for most simple load/store instructions.
+ //
+ // addrmode2 := reg +/- reg shop imm
+ // addrmode2 := reg +/- imm12
+ //
+ // The first operand is always a Reg. The second operand is a reg if in
+ // reg/reg form, otherwise it's reg#0. The third field encodes the operation
+ // in bit 12, the immediate in bits 0-11, and the shift op in 13-15.
+ //
+ // If this addressing mode is a frame index (before prolog/epilog insertion
+ // and code rewriting), this operand will have the form: FI#, reg0, <offs>
+ // with no shift amount for the frame offset.
+ //
+ static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO) {
+ assert(Imm12 < (1 << 12) && "Imm too large!");
+ bool isSub = Opc == sub;
+ return Imm12 | ((int)isSub << 12) | (SO << 13);
+ }
+ static inline unsigned getAM2Offset(unsigned AM2Opc) {
+ return AM2Opc & ((1 << 12)-1);
+ }
+ static inline AddrOpc getAM2Op(unsigned AM2Opc) {
+ return ((AM2Opc >> 12) & 1) ? sub : add;
+ }
+ static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
+ return (ShiftOpc)(AM2Opc >> 13);
+ }
+
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #3
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for sign-extending loads, and load/store-pair instructions.
+ //
+ // addrmode3 := reg +/- reg
+ // addrmode3 := reg +/- imm8
+ //
+ // The first operand is always a Reg. The second operand is a reg if in
+ // reg/reg form, otherwise it's reg#0. The third field encodes the operation
+ // in bit 8, the immediate in bits 0-7.
+
+ /// getAM3Opc - This function encodes the addrmode3 opc field.
+ static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset) {
+ bool isSub = Opc == sub;
+ return ((int)isSub << 8) | Offset;
+ }
+ static inline unsigned char getAM3Offset(unsigned AM3Opc) {
+ return AM3Opc & 0xFF;
+ }
+ static inline AddrOpc getAM3Op(unsigned AM3Opc) {
+ return ((AM3Opc >> 8) & 1) ? sub : add;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #4
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for load / store multiple instructions.
+ //
+ // addrmode4 := reg, <mode>
+ //
+ // The four modes are:
+ // IA - Increment after
+ // IB - Increment before
+ // DA - Decrement after
+ // DB - Decrement before
+ //
+ // If the 4th bit (writeback)is set, then the base register is updated after
+ // the memory transfer.
+
+ static inline AMSubMode getAM4SubMode(unsigned Mode) {
+ return (AMSubMode)(Mode & 0x7);
+ }
+
+ static inline unsigned getAM4ModeImm(AMSubMode SubMode, bool WB = false) {
+ return (int)SubMode | ((int)WB << 3);
+ }
+
+ static inline bool getAM4WBFlag(unsigned Mode) {
+ return (Mode >> 3) & 1;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #5
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for coprocessor instructions, such as FP load/stores.
+ //
+ // addrmode5 := reg +/- imm8*4
+ //
+ // The first operand is always a Reg. The third field encodes the operation
+ // in bit 8, the immediate in bits 0-7.
+ //
+ // This can also be used for FP load/store multiple ops. The third field encodes
+ // writeback mode in bit 8, the number of registers (or 2 times the number of
+ // registers for DPR ops) in bits 0-7. In addition, bit 9-11 encodes one of the
+ // following two sub-modes:
+ //
+ // IA - Increment after
+ // DB - Decrement before
+
+ /// getAM5Opc - This function encodes the addrmode5 opc field.
+ static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
+ bool isSub = Opc == sub;
+ return ((int)isSub << 8) | Offset;
+ }
+ static inline unsigned char getAM5Offset(unsigned AM5Opc) {
+ return AM5Opc & 0xFF;
+ }
+ static inline AddrOpc getAM5Op(unsigned AM5Opc) {
+ return ((AM5Opc >> 8) & 1) ? sub : add;
+ }
+
+ /// getAM5Opc - This function encodes the addrmode5 opc field for FLDM and
+ /// FSTM instructions.
+ static inline unsigned getAM5Opc(AMSubMode SubMode, bool WB,
+ unsigned char Offset) {
+ assert((SubMode == ia || SubMode == db) &&
+ "Illegal addressing mode 5 sub-mode!");
+ return ((int)SubMode << 9) | ((int)WB << 8) | Offset;
+ }
+ static inline AMSubMode getAM5SubMode(unsigned AM5Opc) {
+ return (AMSubMode)((AM5Opc >> 9) & 0x7);
+ }
+ static inline bool getAM5WBFlag(unsigned AM5Opc) {
+ return ((AM5Opc >> 8) & 1);
+ }
+
+} // end namespace ARM_AM
+} // end namespace llvm
+
+#endif
+
diff --git a/lib/Target/ARM/ARMBuildAttrs.h b/lib/Target/ARM/ARMBuildAttrs.h
new file mode 100644
index 0000000..3b38375
--- /dev/null
+++ b/lib/Target/ARM/ARMBuildAttrs.h
@@ -0,0 +1,64 @@
+//===-------- ARMBuildAttrs.h - ARM Build Attributes ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains enumerations and support routines for ARM build attributes
+// as defined in ARM ABI addenda document (ABI release 2.07).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __TARGET_ARMBUILDATTRS_H__
+#define __TARGET_ARMBUILDATTRS_H__
+
+namespace ARMBuildAttrs {
+ enum {
+ File = 1,
+ Section = 2,
+ Symbol = 3,
+ CPU_raw_name = 4,
+ CPU_name = 5,
+ CPU_arch = 6,
+ CPU_arch_profile = 7,
+ ARM_ISA_use = 8,
+ THUMB_ISA_use = 9,
+ VFP_arch = 10,
+ WMMX_arch = 11,
+ Advanced_SIMD_arch = 12,
+ PCS_config = 13,
+ ABI_PCS_R9_use = 14,
+ ABI_PCS_RW_data = 15,
+ ABI_PCS_RO_data = 16,
+ ABI_PCS_GOT_use = 17,
+ ABI_PCS_wchar_t = 18,
+ ABI_FP_rounding = 19,
+ ABI_FP_denormal = 20,
+ ABI_FP_exceptions = 21,
+ ABI_FP_user_exceptions = 22,
+ ABI_FP_number_model = 23,
+ ABI_align8_needed = 24,
+ ABI_align8_preserved = 25,
+ ABI_enum_size = 26,
+ ABI_HardFP_use = 27,
+ ABI_VFP_args = 28,
+ ABI_WMMX_args = 29,
+ ABI_optimization_goals = 30,
+ ABI_FP_optimization_goals = 31,
+ compatibility = 32,
+ CPU_unaligned_access = 34,
+ VFP_HP_extension = 36,
+ ABI_FP_16bit_format = 38,
+ nodefaults = 64,
+ also_compatible_with = 65,
+ T2EE_use = 66,
+ conformance = 67,
+ Virtualization_use = 68,
+ MPextension_use = 70
+ };
+}
+
+#endif // __TARGET_ARMBUILDATTRS_H__
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
new file mode 100644
index 0000000..6cd786e
--- /dev/null
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -0,0 +1,87 @@
+//===- ARMCallingConv.td - Calling Conventions for ARM ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for ARM architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>:
+ CCIf<!strconcat("State.getTarget().getSubtarget<ARMSubtarget>().", F), A>;
+
+/// CCIfAlign - Match of the original alignment of the arg
+class CCIfAlign<string Align, CCAction A>:
+ CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
+
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention
+//===----------------------------------------------------------------------===//
+def CC_ARM_APCS : CallingConv<[
+
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // f64 is passed in pairs of GPRs, possibly split onto the stack
+ CCIfType<[f64], CCCustom<"CC_ARM_APCS_Custom_f64">>,
+
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+
+ CCIfType<[i32], CCAssignToStack<4, 4>>,
+ CCIfType<[f64], CCAssignToStack<8, 4>>
+]>;
+
+def RetCC_ARM_APCS : CallingConv<[
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCIfType<[f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>,
+
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS (EABI) Calling Convention
+//===----------------------------------------------------------------------===//
+def CC_ARM_AAPCS : CallingConv<[
+
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // i64/f64 is passed in even pairs of GPRs
+ // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register
+ // (and the same is true for f64 if VFP is not enabled)
+ CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>,
+ CCIfType<[f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
+
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&"
+ "ArgFlags.getOrigAlign() != 8",
+ CCAssignToReg<[R0, R1, R2, R3]>>>,
+
+ CCIfType<[i32], CCAssignToStack<4, 4>>,
+ CCIfType<[f64], CCAssignToStack<8, 8>>
+]>;
+
+def RetCC_ARM_AAPCS : CallingConv<[
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCIfType<[f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
+
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM Calling Convention Dispatch
+//===----------------------------------------------------------------------===//
+
+def CC_ARM : CallingConv<[
+ CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<CC_ARM_AAPCS>>,
+ CCDelegateTo<CC_ARM_APCS>
+]>;
+
+def RetCC_ARM : CallingConv<[
+ CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<RetCC_ARM_AAPCS>>,
+ CCDelegateTo<RetCC_ARM_APCS>
+]>;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
new file mode 100644
index 0000000..44fac12
--- /dev/null
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -0,0 +1,1411 @@
+//===-- ARM/ARMCodeEmitter.cpp - Convert ARM code to machine code ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the ARM machine instructions into
+// relocatable machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMInstrInfo.h"
+#include "ARMRelocations.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#ifndef NDEBUG
+#include <iomanip>
+#endif
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+
+ class ARMCodeEmitter {
+ public:
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+ unsigned getBinaryCodeForInstr(const MachineInstr &MI);
+ };
+
+ template<class CodeEmitter>
+ class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass,
+ public ARMCodeEmitter {
+ ARMJITInfo *JTI;
+ const ARMInstrInfo *II;
+ const TargetData *TD;
+ TargetMachine &TM;
+ CodeEmitter &MCE;
+ const std::vector<MachineConstantPoolEntry> *MCPEs;
+ const std::vector<MachineJumpTableEntry> *MJTEs;
+ bool IsPIC;
+
+ public:
+ static char ID;
+ explicit Emitter(TargetMachine &tm, CodeEmitter &mce)
+ : MachineFunctionPass(&ID), JTI(0), II(0), TD(0), TM(tm),
+ MCE(mce), MCPEs(0), MJTEs(0),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+ Emitter(TargetMachine &tm, CodeEmitter &mce,
+ const ARMInstrInfo &ii, const TargetData &td)
+ : MachineFunctionPass(&ID), JTI(0), II(&ii), TD(&td), TM(tm),
+ MCE(mce), MCPEs(0), MJTEs(0),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "ARM Machine Code Emitter";
+ }
+
+ void emitInstruction(const MachineInstr &MI);
+
+ private:
+
+ void emitWordLE(unsigned Binary);
+
+ void emitDWordLE(uint64_t Binary);
+
+ void emitConstPoolInstruction(const MachineInstr &MI);
+
+ void emitMOVi2piecesInstruction(const MachineInstr &MI);
+
+ void emitLEApcrelJTInstruction(const MachineInstr &MI);
+
+ void emitPseudoMoveInstruction(const MachineInstr &MI);
+
+ void addPCLabel(unsigned LabelID);
+
+ void emitPseudoInstruction(const MachineInstr &MI);
+
+ unsigned getMachineSoRegOpValue(const MachineInstr &MI,
+ const TargetInstrDesc &TID,
+ const MachineOperand &MO,
+ unsigned OpIdx);
+
+ unsigned getMachineSoImmOpValue(unsigned SoImm);
+
+ unsigned getAddrModeSBit(const MachineInstr &MI,
+ const TargetInstrDesc &TID) const;
+
+ void emitDataProcessingInstruction(const MachineInstr &MI,
+ unsigned ImplicitRd = 0,
+ unsigned ImplicitRn = 0);
+
+ void emitLoadStoreInstruction(const MachineInstr &MI,
+ unsigned ImplicitRd = 0,
+ unsigned ImplicitRn = 0);
+
+ void emitMiscLoadStoreInstruction(const MachineInstr &MI,
+ unsigned ImplicitRn = 0);
+
+ void emitLoadStoreMultipleInstruction(const MachineInstr &MI);
+
+ void emitMulFrmInstruction(const MachineInstr &MI);
+
+ void emitExtendInstruction(const MachineInstr &MI);
+
+ void emitMiscArithInstruction(const MachineInstr &MI);
+
+ void emitBranchInstruction(const MachineInstr &MI);
+
+ void emitInlineJumpTable(unsigned JTIndex);
+
+ void emitMiscBranchInstruction(const MachineInstr &MI);
+
+ void emitVFPArithInstruction(const MachineInstr &MI);
+
+ void emitVFPConversionInstruction(const MachineInstr &MI);
+
+ void emitVFPLoadStoreInstruction(const MachineInstr &MI);
+
+ void emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI);
+
+ void emitMiscInstruction(const MachineInstr &MI);
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MachineInstr &MI,const MachineOperand &MO);
+ unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) {
+ return getMachineOpValue(MI, MI.getOperand(OpIdx));
+ }
+
+ /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+ ///
+ unsigned getShiftOp(unsigned Imm) const ;
+
+ /// Routines that handle operands which add machine relocations which are
+ /// fixed up by the relocation stage.
+ void emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
+ bool NeedStub, intptr_t ACPV = 0);
+ void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
+ void emitConstPoolAddress(unsigned CPI, unsigned Reloc);
+ void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc);
+ void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc,
+ intptr_t JTBase = 0);
+ };
+ template <class CodeEmitter>
+ char Emitter<CodeEmitter>::ID = 0;
+}
+
+/// createARMCodeEmitterPass - Return a pass that emits the collected ARM code
+/// to the specified MCE object.
+
+namespace llvm {
+
+FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM,
+ MachineCodeEmitter &MCE) {
+ return new Emitter<MachineCodeEmitter>(TM, MCE);
+}
+FunctionPass *createARMJITCodeEmitterPass(ARMTargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new Emitter<JITCodeEmitter>(TM, JCE);
+}
+
+} // end namespace llvm
+
+template<class CodeEmitter>
+bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
+ assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
+ MF.getTarget().getRelocationModel() != Reloc::Static) &&
+ "JIT relocation model must be set to static or default!");
+ II = ((ARMTargetMachine&)MF.getTarget()).getInstrInfo();
+ TD = ((ARMTargetMachine&)MF.getTarget()).getTargetData();
+ JTI = ((ARMTargetMachine&)MF.getTarget()).getJITInfo();
+ MCPEs = &MF.getConstantPool()->getConstants();
+ MJTEs = &MF.getJumpTableInfo()->getJumpTables();
+ IsPIC = TM.getRelocationModel() == Reloc::PIC_;
+ JTI->Initialize(MF, IsPIC);
+
+ do {
+ DOUT << "JITTing function '" << MF.getFunction()->getName() << "'\n";
+ MCE.startFunction(MF);
+ for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+ MBB != E; ++MBB) {
+ MCE.StartMachineBasicBlock(MBB);
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I)
+ emitInstruction(*I);
+ }
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+///
+template<class CodeEmitter>
+unsigned Emitter<CodeEmitter>::getShiftOp(unsigned Imm) const {
+ switch (ARM_AM::getAM2ShiftOpc(Imm)) {
+ default: assert(0 && "Unknown shift opc!");
+ case ARM_AM::asr: return 2;
+ case ARM_AM::lsl: return 0;
+ case ARM_AM::lsr: return 1;
+ case ARM_AM::ror:
+ case ARM_AM::rrx: return 3;
+ }
+ return 0;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+template<class CodeEmitter>
+unsigned Emitter<CodeEmitter>::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) {
+ if (MO.isReg())
+ return ARMRegisterInfo::getRegisterNumbering(MO.getReg());
+ else if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ else if (MO.isGlobal())
+ emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true);
+ else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch);
+ else if (MO.isCPI()) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ // For VFP load, the immediate offset is multiplied by 4.
+ unsigned Reloc = ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm)
+ ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry;
+ emitConstPoolAddress(MO.getIndex(), Reloc);
+ } else if (MO.isJTI())
+ emitJumpTableAddress(MO.getIndex(), ARM::reloc_arm_relative);
+ else if (MO.isMBB())
+ emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch);
+ else {
+ cerr << "ERROR: Unknown type of MachineOperand: " << MO << "\n";
+ abort();
+ }
+ return 0;
+}
+
+/// emitGlobalAddress - Emit the specified address to the code stream.
+///
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
+ bool NeedStub, intptr_t ACPV) {
+ MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+ GV, ACPV, NeedStub));
+}
+
+/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitExternalSymbolAddress(const char *ES,
+ unsigned Reloc) {
+ MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, ES));
+}
+
+/// emitConstPoolAddress - Arrange for the address of an constant pool
+/// to be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitConstPoolAddress(unsigned CPI,
+ unsigned Reloc) {
+ // Tell JIT emitter we'll resolve the address.
+ MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, CPI, 0, true));
+}
+
+/// emitJumpTableAddress - Arrange for the address of a jump table to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTIndex,
+ unsigned Reloc) {
+ MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ Reloc, JTIndex, 0, true));
+}
+
+/// emitMachineBasicBlock - Emit the specified address basic block.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMachineBasicBlock(MachineBasicBlock *BB,
+ unsigned Reloc, intptr_t JTBase) {
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ Reloc, BB, JTBase));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitWordLE(unsigned Binary) {
+#ifndef NDEBUG
+ DOUT << " 0x" << std::hex << std::setw(8) << std::setfill('0')
+ << Binary << std::dec << "\n";
+#endif
+ MCE.emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitDWordLE(uint64_t Binary) {
+#ifndef NDEBUG
+ DOUT << " 0x" << std::hex << std::setw(8) << std::setfill('0')
+ << (unsigned)Binary << std::dec << "\n";
+ DOUT << " 0x" << std::hex << std::setw(8) << std::setfill('0')
+ << (unsigned)(Binary >> 32) << std::dec << "\n";
+#endif
+ MCE.emitDWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI) {
+ DOUT << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI;
+
+ NumEmitted++; // Keep track of the # of mi's emitted
+ switch (MI.getDesc().TSFlags & ARMII::FormMask) {
+ default: {
+ assert(0 && "Unhandled instruction encoding format!");
+ break;
+ }
+ case ARMII::Pseudo:
+ emitPseudoInstruction(MI);
+ break;
+ case ARMII::DPFrm:
+ case ARMII::DPSoRegFrm:
+ emitDataProcessingInstruction(MI);
+ break;
+ case ARMII::LdFrm:
+ case ARMII::StFrm:
+ emitLoadStoreInstruction(MI);
+ break;
+ case ARMII::LdMiscFrm:
+ case ARMII::StMiscFrm:
+ emitMiscLoadStoreInstruction(MI);
+ break;
+ case ARMII::LdStMulFrm:
+ emitLoadStoreMultipleInstruction(MI);
+ break;
+ case ARMII::MulFrm:
+ emitMulFrmInstruction(MI);
+ break;
+ case ARMII::ExtFrm:
+ emitExtendInstruction(MI);
+ break;
+ case ARMII::ArithMiscFrm:
+ emitMiscArithInstruction(MI);
+ break;
+ case ARMII::BrFrm:
+ emitBranchInstruction(MI);
+ break;
+ case ARMII::BrMiscFrm:
+ emitMiscBranchInstruction(MI);
+ break;
+ // VFP instructions.
+ case ARMII::VFPUnaryFrm:
+ case ARMII::VFPBinaryFrm:
+ emitVFPArithInstruction(MI);
+ break;
+ case ARMII::VFPConv1Frm:
+ case ARMII::VFPConv2Frm:
+ case ARMII::VFPConv3Frm:
+ case ARMII::VFPConv4Frm:
+ case ARMII::VFPConv5Frm:
+ emitVFPConversionInstruction(MI);
+ break;
+ case ARMII::VFPLdStFrm:
+ emitVFPLoadStoreInstruction(MI);
+ break;
+ case ARMII::VFPLdStMulFrm:
+ emitVFPLoadStoreMultipleInstruction(MI);
+ break;
+ case ARMII::VFPMiscFrm:
+ emitMiscInstruction(MI);
+ break;
+ }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitConstPoolInstruction(const MachineInstr &MI) {
+ unsigned CPI = MI.getOperand(0).getImm(); // CP instruction index.
+ unsigned CPIndex = MI.getOperand(1).getIndex(); // Actual cp entry index.
+ const MachineConstantPoolEntry &MCPE = (*MCPEs)[CPIndex];
+
+ // Remember the CONSTPOOL_ENTRY address for later relocation.
+ JTI->addConstantPoolEntryAddr(CPI, MCE.getCurrentPCValue());
+
+ // Emit constpool island entry. In most cases, the actual values will be
+ // resolved and relocated after code emission.
+ if (MCPE.isMachineConstantPoolEntry()) {
+ ARMConstantPoolValue *ACPV =
+ static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
+
+ DOUT << " ** ARM constant pool #" << CPI << " @ "
+ << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n';
+
+ GlobalValue *GV = ACPV->getGV();
+ if (GV) {
+ assert(!ACPV->isStub() && "Don't know how to deal this yet!");
+ if (ACPV->isNonLazyPointer())
+ MCE.addRelocation(MachineRelocation::getIndirectSymbol(
+ MCE.getCurrentPCOffset(), ARM::reloc_arm_machine_cp_entry, GV,
+ (intptr_t)ACPV, false));
+ else
+ emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry,
+ ACPV->isStub() || isa<Function>(GV), (intptr_t)ACPV);
+ } else {
+ assert(!ACPV->isNonLazyPointer() && "Don't know how to deal this yet!");
+ emitExternalSymbolAddress(ACPV->getSymbol(), ARM::reloc_arm_absolute);
+ }
+ emitWordLE(0);
+ } else {
+ Constant *CV = MCPE.Val.ConstVal;
+
+#ifndef NDEBUG
+ DOUT << " ** Constant pool #" << CPI << " @ "
+ << (void*)MCE.getCurrentPCValue() << " ";
+ if (const Function *F = dyn_cast<Function>(CV))
+ DOUT << F->getName();
+ else
+ DOUT << *CV;
+ DOUT << '\n';
+#endif
+
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+ emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV));
+ emitWordLE(0);
+ } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ uint32_t Val = *(uint32_t*)CI->getValue().getRawData();
+ emitWordLE(Val);
+ } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+ if (CFP->getType() == Type::FloatTy)
+ emitWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+ else if (CFP->getType() == Type::DoubleTy)
+ emitDWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+ else {
+ assert(0 && "Unable to handle this constantpool entry!");
+ abort();
+ }
+ } else {
+ assert(0 && "Unable to handle this constantpool entry!");
+ abort();
+ }
+ }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMOVi2piecesInstruction(const MachineInstr &MI) {
+ const MachineOperand &MO0 = MI.getOperand(0);
+ const MachineOperand &MO1 = MI.getOperand(1);
+ assert(MO1.isImm() && "Not a valid so_imm value!");
+ unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO1.getImm());
+ unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO1.getImm());
+
+ // Emit the 'mov' instruction.
+ unsigned Binary = 0xd << 21; // mov: Insts{24-21} = 0b1101
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode so_imm.
+ // Set bit I(25) to identify this is the immediate form of <shifter_op>
+ Binary |= 1 << ARMII::I_BitShift;
+ Binary |= getMachineSoImmOpValue(ARM_AM::getSOImmVal(V1));
+ emitWordLE(Binary);
+
+ // Now the 'orr' instruction.
+ Binary = 0xc << 21; // orr: Insts{24-21} = 0b1100
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode Rn.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRnShift;
+
+ // Encode so_imm.
+ // Set bit I(25) to identify this is the immediate form of <shifter_op>
+ Binary |= 1 << ARMII::I_BitShift;
+ Binary |= getMachineSoImmOpValue(ARM_AM::getSOImmVal(V2));
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitLEApcrelJTInstruction(const MachineInstr &MI) {
+ // It's basically add r, pc, (LJTI - $+8)
+
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Emit the 'add' instruction.
+ unsigned Binary = 0x4 << 21; // add: Insts{24-31} = 0b0100
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ Binary |= getAddrModeSBit(MI, TID);
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+ // Encode Rn which is PC.
+ Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+
+ // Encode the displacement.
+ // Set bit I(25) to identify this is the immediate form of <shifter_op>.
+ Binary |= 1 << ARMII::I_BitShift;
+ emitJumpTableAddress(MI.getOperand(1).getIndex(), ARM::reloc_arm_jt_base);
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitPseudoMoveInstruction(const MachineInstr &MI) {
+ unsigned Opcode = MI.getDesc().Opcode;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ if (Opcode == ARM::MOVsrl_flag || Opcode == ARM::MOVsra_flag)
+ Binary |= 1 << ARMII::S_BitShift;
+
+ // Encode register def if there is one.
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+ // Encode the shift operation.
+ switch (Opcode) {
+ default: break;
+ case ARM::MOVrx:
+ // rrx
+ Binary |= 0x6 << 4;
+ break;
+ case ARM::MOVsrl_flag:
+ // lsr #1
+ Binary |= (0x2 << 4) | (1 << 7);
+ break;
+ case ARM::MOVsra_flag:
+ // asr #1
+ Binary |= (0x4 << 4) | (1 << 7);
+ break;
+ }
+
+ // Encode register Rm.
+ Binary |= getMachineOpValue(MI, 1);
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::addPCLabel(unsigned LabelID) {
+ DOUT << " ** LPC" << LabelID << " @ "
+ << (void*)MCE.getCurrentPCValue() << '\n';
+ JTI->addPCLabelAddr(LabelID, MCE.getCurrentPCValue());
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitPseudoInstruction(const MachineInstr &MI) {
+ unsigned Opcode = MI.getDesc().Opcode;
+ switch (Opcode) {
+ default:
+ abort(); // FIXME:
+ case TargetInstrInfo::INLINEASM: {
+ // We allow inline assembler nodes with empty bodies - they can
+ // implicitly define registers, which is ok for JIT.
+ if (MI.getOperand(0).getSymbolName()[0]) {
+ assert(0 && "JIT does not support inline asm!\n");
+ abort();
+ }
+ break;
+ }
+ case TargetInstrInfo::DBG_LABEL:
+ case TargetInstrInfo::EH_LABEL:
+ MCE.emitLabel(MI.getOperand(0).getImm());
+ break;
+ case TargetInstrInfo::IMPLICIT_DEF:
+ case TargetInstrInfo::DECLARE:
+ case ARM::DWARF_LOC:
+ // Do nothing.
+ break;
+ case ARM::CONSTPOOL_ENTRY:
+ emitConstPoolInstruction(MI);
+ break;
+ case ARM::PICADD: {
+ // Remember of the address of the PC label for relocation later.
+ addPCLabel(MI.getOperand(2).getImm());
+ // PICADD is just an add instruction that implicitly read pc.
+ emitDataProcessingInstruction(MI, 0, ARM::PC);
+ break;
+ }
+ case ARM::PICLDR:
+ case ARM::PICLDRB:
+ case ARM::PICSTR:
+ case ARM::PICSTRB: {
+ // Remember of the address of the PC label for relocation later.
+ addPCLabel(MI.getOperand(2).getImm());
+ // These are just load / store instructions that implicitly read pc.
+ emitLoadStoreInstruction(MI, 0, ARM::PC);
+ break;
+ }
+ case ARM::PICLDRH:
+ case ARM::PICLDRSH:
+ case ARM::PICLDRSB:
+ case ARM::PICSTRH: {
+ // Remember of the address of the PC label for relocation later.
+ addPCLabel(MI.getOperand(2).getImm());
+ // These are just load / store instructions that implicitly read pc.
+ emitMiscLoadStoreInstruction(MI, ARM::PC);
+ break;
+ }
+ case ARM::MOVi2pieces:
+ // Two instructions to materialize a constant.
+ emitMOVi2piecesInstruction(MI);
+ break;
+ case ARM::LEApcrelJT:
+ // Materialize jumptable address.
+ emitLEApcrelJTInstruction(MI);
+ break;
+ case ARM::MOVrx:
+ case ARM::MOVsrl_flag:
+ case ARM::MOVsra_flag:
+ emitPseudoMoveInstruction(MI);
+ break;
+ }
+}
+
+template<class CodeEmitter>
+unsigned Emitter<CodeEmitter>::getMachineSoRegOpValue(
+ const MachineInstr &MI,
+ const TargetInstrDesc &TID,
+ const MachineOperand &MO,
+ unsigned OpIdx) {
+ unsigned Binary = getMachineOpValue(MI, MO);
+
+ const MachineOperand &MO1 = MI.getOperand(OpIdx + 1);
+ const MachineOperand &MO2 = MI.getOperand(OpIdx + 2);
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
+
+ // Encode the shift opcode.
+ unsigned SBits = 0;
+ unsigned Rs = MO1.getReg();
+ if (Rs) {
+ // Set shift operand (bit[7:4]).
+ // LSL - 0001
+ // LSR - 0011
+ // ASR - 0101
+ // ROR - 0111
+ // RRX - 0110 and bit[11:8] clear.
+ switch (SOpc) {
+ default: assert(0 && "Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x1; break;
+ case ARM_AM::lsr: SBits = 0x3; break;
+ case ARM_AM::asr: SBits = 0x5; break;
+ case ARM_AM::ror: SBits = 0x7; break;
+ case ARM_AM::rrx: SBits = 0x6; break;
+ }
+ } else {
+ // Set shift operand (bit[6:4]).
+ // LSL - 000
+ // LSR - 010
+ // ASR - 100
+ // ROR - 110
+ switch (SOpc) {
+ default: assert(0 && "Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x0; break;
+ case ARM_AM::lsr: SBits = 0x2; break;
+ case ARM_AM::asr: SBits = 0x4; break;
+ case ARM_AM::ror: SBits = 0x6; break;
+ }
+ }
+ Binary |= SBits << 4;
+ if (SOpc == ARM_AM::rrx)
+ return Binary;
+
+ // Encode the shift operation Rs or shift_imm (except rrx).
+ if (Rs) {
+ // Encode Rs bit[11:8].
+ assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
+ return Binary |
+ (ARMRegisterInfo::getRegisterNumbering(Rs) << ARMII::RegRsShift);
+ }
+
+ // Encode shift_imm bit[11:7].
+ return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7;
+}
+
+template<class CodeEmitter>
+unsigned Emitter<CodeEmitter>::getMachineSoImmOpValue(unsigned SoImm) {
+ // Encode rotate_imm.
+ unsigned Binary = (ARM_AM::getSOImmValRot(SoImm) >> 1)
+ << ARMII::SoRotImmShift;
+
+ // Encode immed_8.
+ Binary |= ARM_AM::getSOImmValImm(SoImm);
+ return Binary;
+}
+
+template<class CodeEmitter>
+unsigned Emitter<CodeEmitter>::getAddrModeSBit(const MachineInstr &MI,
+ const TargetInstrDesc &TID) const {
+ for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i != e; --i){
+ const MachineOperand &MO = MI.getOperand(i-1);
+ if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)
+ return 1 << ARMII::S_BitShift;
+ }
+ return 0;
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitDataProcessingInstruction(
+ const MachineInstr &MI,
+ unsigned ImplicitRd,
+ unsigned ImplicitRn) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ Binary |= getAddrModeSBit(MI, TID);
+
+ // Encode register def if there is one.
+ unsigned NumDefs = TID.getNumDefs();
+ unsigned OpIdx = 0;
+ if (NumDefs)
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+ else if (ImplicitRd)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRd)
+ << ARMII::RegRdShift);
+
+ // If this is a two-address operand, skip it. e.g. MOVCCr operand 1.
+ if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ // Encode first non-shifter register operand if there is one.
+ bool isUnary = TID.TSFlags & ARMII::UnaryDP;
+ if (!isUnary) {
+ if (ImplicitRn)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn)
+ << ARMII::RegRnShift);
+ else {
+ Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift;
+ ++OpIdx;
+ }
+ }
+
+ // Encode shifter operand.
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+ if ((TID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) {
+ // Encode SoReg.
+ emitWordLE(Binary | getMachineSoRegOpValue(MI, TID, MO, OpIdx));
+ return;
+ }
+
+ if (MO.isReg()) {
+ // Encode register Rm.
+ emitWordLE(Binary | ARMRegisterInfo::getRegisterNumbering(MO.getReg()));
+ return;
+ }
+
+ // Encode so_imm.
+ // Set bit I(25) to identify this is the immediate form of <shifter_op>.
+ Binary |= 1 << ARMII::I_BitShift;
+ Binary |= getMachineSoImmOpValue(MO.getImm());
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitLoadStoreInstruction(
+ const MachineInstr &MI,
+ unsigned ImplicitRd,
+ unsigned ImplicitRn) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ unsigned Form = TID.TSFlags & ARMII::FormMask;
+ bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Operand 0 of a pre- and post-indexed store is the address base
+ // writeback. Skip it.
+ bool Skipped = false;
+ if (IsPrePost && Form == ARMII::StFrm) {
+ ++OpIdx;
+ Skipped = true;
+ }
+
+ // Set first operand
+ if (ImplicitRd)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRd)
+ << ARMII::RegRdShift);
+ else
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ // Set second operand
+ if (ImplicitRn)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn)
+ << ARMII::RegRnShift);
+ else
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+ // If this is a two-address operand, skip it. e.g. LDR_PRE.
+ if (!Skipped && TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ const MachineOperand &MO2 = MI.getOperand(OpIdx);
+ unsigned AM2Opc = (ImplicitRn == ARM::PC)
+ ? 0 : MI.getOperand(OpIdx+1).getImm();
+
+ // Set bit U(23) according to sign of immed value (positive or negative).
+ Binary |= ((ARM_AM::getAM2Op(AM2Opc) == ARM_AM::add ? 1 : 0) <<
+ ARMII::U_BitShift);
+ if (!MO2.getReg()) { // is immediate
+ if (ARM_AM::getAM2Offset(AM2Opc))
+ // Set the value of offset_12 field
+ Binary |= ARM_AM::getAM2Offset(AM2Opc);
+ emitWordLE(Binary);
+ return;
+ }
+
+ // Set bit I(25), because this is not in immediate enconding.
+ Binary |= 1 << ARMII::I_BitShift;
+ assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg()));
+ // Set bit[3:0] to the corresponding Rm register
+ Binary |= ARMRegisterInfo::getRegisterNumbering(MO2.getReg());
+
+ // If this instr is in scaled register offset/index instruction, set
+ // shift_immed(bit[11:7]) and shift(bit[6:5]) fields.
+ if (unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc)) {
+ Binary |= getShiftOp(AM2Opc) << ARMII::ShiftImmShift; // shift
+ Binary |= ShImm << ARMII::ShiftShift; // shift_immed
+ }
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMiscLoadStoreInstruction(const MachineInstr &MI,
+ unsigned ImplicitRn) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ unsigned Form = TID.TSFlags & ARMII::FormMask;
+ bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Operand 0 of a pre- and post-indexed store is the address base
+ // writeback. Skip it.
+ bool Skipped = false;
+ if (IsPrePost && Form == ARMII::StMiscFrm) {
+ ++OpIdx;
+ Skipped = true;
+ }
+
+ // Set first operand
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ // Set second operand
+ if (ImplicitRn)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn)
+ << ARMII::RegRnShift);
+ else
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+ // If this is a two-address operand, skip it. e.g. LDRH_POST.
+ if (!Skipped && TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ const MachineOperand &MO2 = MI.getOperand(OpIdx);
+ unsigned AM3Opc = (ImplicitRn == ARM::PC)
+ ? 0 : MI.getOperand(OpIdx+1).getImm();
+
+ // Set bit U(23) according to sign of immed value (positive or negative)
+ Binary |= ((ARM_AM::getAM3Op(AM3Opc) == ARM_AM::add ? 1 : 0) <<
+ ARMII::U_BitShift);
+
+ // If this instr is in register offset/index encoding, set bit[3:0]
+ // to the corresponding Rm register.
+ if (MO2.getReg()) {
+ Binary |= ARMRegisterInfo::getRegisterNumbering(MO2.getReg());
+ emitWordLE(Binary);
+ return;
+ }
+
+ // This instr is in immediate offset/index encoding, set bit 22 to 1.
+ Binary |= 1 << ARMII::AM3_I_BitShift;
+ if (unsigned ImmOffs = ARM_AM::getAM3Offset(AM3Opc)) {
+ // Set operands
+ Binary |= (ImmOffs >> 4) << ARMII::ImmHiShift; // immedH
+ Binary |= (ImmOffs & 0xF); // immedL
+ }
+
+ emitWordLE(Binary);
+}
+
+static unsigned getAddrModeUPBits(unsigned Mode) {
+ unsigned Binary = 0;
+
+ // Set addressing mode by modifying bits U(23) and P(24)
+ // IA - Increment after - bit U = 1 and bit P = 0
+ // IB - Increment before - bit U = 1 and bit P = 1
+ // DA - Decrement after - bit U = 0 and bit P = 0
+ // DB - Decrement before - bit U = 0 and bit P = 1
+ switch (Mode) {
+ default: assert(0 && "Unknown addressing sub-mode!");
+ case ARM_AM::da: break;
+ case ARM_AM::db: Binary |= 0x1 << ARMII::P_BitShift; break;
+ case ARM_AM::ia: Binary |= 0x1 << ARMII::U_BitShift; break;
+ case ARM_AM::ib: Binary |= 0x3 << ARMII::U_BitShift; break;
+ }
+
+ return Binary;
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitLoadStoreMultipleInstruction(
+ const MachineInstr &MI) {
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Set base address operand
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRnShift;
+
+ // Set addressing mode by modifying bits U(23) and P(24)
+ const MachineOperand &MO = MI.getOperand(1);
+ Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm()));
+
+ // Set bit W(21)
+ if (ARM_AM::getAM4WBFlag(MO.getImm()))
+ Binary |= 0x1 << ARMII::W_BitShift;
+
+ // Set registers
+ for (unsigned i = 4, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ break;
+ unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(MO.getReg());
+ assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ RegNum < 16);
+ Binary |= 0x1 << RegNum;
+ }
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMulFrmInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ Binary |= getAddrModeSBit(MI, TID);
+
+ // 32x32->64bit operations have two destination registers. The number
+ // of register definitions will tell us if that's what we're dealing with.
+ unsigned OpIdx = 0;
+ if (TID.getNumDefs() == 2)
+ Binary |= getMachineOpValue (MI, OpIdx++) << ARMII::RegRdLoShift;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdHiShift;
+
+ // Encode Rm
+ Binary |= getMachineOpValue(MI, OpIdx++);
+
+ // Encode Rs
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRsShift;
+
+ // Many multiple instructions (e.g. MLA) have three src operands. Encode
+ // it as Rn (for multiply, that's in the same offset as RdLo.
+ if (TID.getNumOperands() > OpIdx &&
+ !TID.OpInfo[OpIdx].isPredicate() &&
+ !TID.OpInfo[OpIdx].isOptionalDef())
+ Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRdLoShift;
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitExtendInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ const MachineOperand &MO1 = MI.getOperand(OpIdx++);
+ const MachineOperand &MO2 = MI.getOperand(OpIdx);
+ if (MO2.isReg()) {
+ // Two register operand form.
+ // Encode Rn.
+ Binary |= getMachineOpValue(MI, MO1) << ARMII::RegRnShift;
+
+ // Encode Rm.
+ Binary |= getMachineOpValue(MI, MO2);
+ ++OpIdx;
+ } else {
+ Binary |= getMachineOpValue(MI, MO1);
+ }
+
+ // Encode rot imm (0, 8, 16, or 24) if it has a rotate immediate operand.
+ if (MI.getOperand(OpIdx).isImm() &&
+ !TID.OpInfo[OpIdx].isPredicate() &&
+ !TID.OpInfo[OpIdx].isOptionalDef())
+ Binary |= (getMachineOpValue(MI, OpIdx) / 8) << ARMII::ExtRotImmShift;
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMiscArithInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ const MachineOperand &MO = MI.getOperand(OpIdx++);
+ if (OpIdx == TID.getNumOperands() ||
+ TID.OpInfo[OpIdx].isPredicate() ||
+ TID.OpInfo[OpIdx].isOptionalDef()) {
+ // Encode Rm and it's done.
+ Binary |= getMachineOpValue(MI, MO);
+ emitWordLE(Binary);
+ return;
+ }
+
+ // Encode Rn.
+ Binary |= getMachineOpValue(MI, MO) << ARMII::RegRnShift;
+
+ // Encode Rm.
+ Binary |= getMachineOpValue(MI, OpIdx++);
+
+ // Encode shift_imm.
+ unsigned ShiftAmt = MI.getOperand(OpIdx).getImm();
+ assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
+ Binary |= ShiftAmt << ARMII::ShiftShift;
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitBranchInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ if (TID.Opcode == ARM::TPsoft)
+ abort(); // FIXME
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Set signed_immed_24 field
+ Binary |= getMachineOpValue(MI, 0);
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitInlineJumpTable(unsigned JTIndex) {
+ // Remember the base address of the inline jump table.
+ uintptr_t JTBase = MCE.getCurrentPCValue();
+ JTI->addJumpTableBaseAddr(JTIndex, JTBase);
+ DOUT << " ** Jump Table #" << JTIndex << " @ " << (void*)JTBase << '\n';
+
+ // Now emit the jump table entries.
+ const std::vector<MachineBasicBlock*> &MBBs = (*MJTEs)[JTIndex].MBBs;
+ for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
+ if (IsPIC)
+ // DestBB address - JT base.
+ emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_pic_jt, JTBase);
+ else
+ // Absolute DestBB address.
+ emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_absolute);
+ emitWordLE(0);
+ }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMiscBranchInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Handle jump tables.
+ if (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::BR_JTadd) {
+ // First emit a ldr pc, [] instruction.
+ emitDataProcessingInstruction(MI, ARM::PC);
+
+ // Then emit the inline jump table.
+ unsigned JTIndex = (TID.Opcode == ARM::BR_JTr)
+ ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex();
+ emitInlineJumpTable(JTIndex);
+ return;
+ } else if (TID.Opcode == ARM::BR_JTm) {
+ // First emit a ldr pc, [] instruction.
+ emitLoadStoreInstruction(MI, ARM::PC);
+
+ // Then emit the inline jump table.
+ emitInlineJumpTable(MI.getOperand(3).getIndex());
+ return;
+ }
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ if (TID.Opcode == ARM::BX_RET)
+ // The return register is LR.
+ Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::LR);
+ else
+ // otherwise, set the return register
+ Binary |= getMachineOpValue(MI, 0);
+
+ emitWordLE(Binary);
+}
+
+static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegD = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ bool isSPVFP = false;
+ RegD = ARMRegisterInfo::getRegisterNumbering(RegD, isSPVFP);
+ if (!isSPVFP)
+ Binary |= RegD << ARMII::RegRdShift;
+ else {
+ Binary |= ((RegD & 0x1E) >> 1) << ARMII::RegRdShift;
+ Binary |= (RegD & 0x01) << ARMII::D_BitShift;
+ }
+ return Binary;
+}
+
+static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegN = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ bool isSPVFP = false;
+ RegN = ARMRegisterInfo::getRegisterNumbering(RegN, isSPVFP);
+ if (!isSPVFP)
+ Binary |= RegN << ARMII::RegRnShift;
+ else {
+ Binary |= ((RegN & 0x1E) >> 1) << ARMII::RegRnShift;
+ Binary |= (RegN & 0x01) << ARMII::N_BitShift;
+ }
+ return Binary;
+}
+
+static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegM = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ bool isSPVFP = false;
+ RegM = ARMRegisterInfo::getRegisterNumbering(RegM, isSPVFP);
+ if (!isSPVFP)
+ Binary |= RegM;
+ else {
+ Binary |= ((RegM & 0x1E) >> 1);
+ Binary |= (RegM & 0x01) << ARMII::M_BitShift;
+ }
+ return Binary;
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitVFPArithInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+ assert((Binary & ARMII::D_BitShift) == 0 &&
+ (Binary & ARMII::N_BitShift) == 0 &&
+ (Binary & ARMII::M_BitShift) == 0 && "VFP encoding bug!");
+
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, OpIdx++);
+
+ // If this is a two-address operand, skip it, e.g. FMACD.
+ if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ // Encode Dn / Sn.
+ if ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm)
+ Binary |= encodeVFPRn(MI, OpIdx++);
+
+ if (OpIdx == TID.getNumOperands() ||
+ TID.OpInfo[OpIdx].isPredicate() ||
+ TID.OpInfo[OpIdx].isOptionalDef()) {
+ // FCMPEZD etc. has only one operand.
+ emitWordLE(Binary);
+ return;
+ }
+
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, OpIdx);
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitVFPConversionInstruction(
+ const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ unsigned Form = TID.TSFlags & ARMII::FormMask;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ switch (Form) {
+ default: break;
+ case ARMII::VFPConv1Frm:
+ case ARMII::VFPConv2Frm:
+ case ARMII::VFPConv3Frm:
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, 0);
+ break;
+ case ARMII::VFPConv4Frm:
+ // Encode Dn / Sn.
+ Binary |= encodeVFPRn(MI, 0);
+ break;
+ case ARMII::VFPConv5Frm:
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, 0);
+ break;
+ }
+
+ switch (Form) {
+ default: break;
+ case ARMII::VFPConv1Frm:
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, 1);
+ break;
+ case ARMII::VFPConv2Frm:
+ case ARMII::VFPConv3Frm:
+ // Encode Dn / Sn.
+ Binary |= encodeVFPRn(MI, 1);
+ break;
+ case ARMII::VFPConv4Frm:
+ case ARMII::VFPConv5Frm:
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, 1);
+ break;
+ }
+
+ if (Form == ARMII::VFPConv5Frm)
+ // Encode Dn / Sn.
+ Binary |= encodeVFPRn(MI, 2);
+ else if (Form == ARMII::VFPConv3Frm)
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, 2);
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitVFPLoadStoreInstruction(const MachineInstr &MI) {
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, OpIdx++);
+
+ // Encode address base.
+ const MachineOperand &Base = MI.getOperand(OpIdx++);
+ Binary |= getMachineOpValue(MI, Base) << ARMII::RegRnShift;
+
+ // If there is a non-zero immediate offset, encode it.
+ if (Base.isReg()) {
+ const MachineOperand &Offset = MI.getOperand(OpIdx);
+ if (unsigned ImmOffs = ARM_AM::getAM5Offset(Offset.getImm())) {
+ if (ARM_AM::getAM5Op(Offset.getImm()) == ARM_AM::add)
+ Binary |= 1 << ARMII::U_BitShift;
+ Binary |= ImmOffs;
+ emitWordLE(Binary);
+ return;
+ }
+ }
+
+ // If immediate offset is omitted, default to +0.
+ Binary |= 1 << ARMII::U_BitShift;
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitVFPLoadStoreMultipleInstruction(
+ const MachineInstr &MI) {
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Set base address operand
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRnShift;
+
+ // Set addressing mode by modifying bits U(23) and P(24)
+ const MachineOperand &MO = MI.getOperand(1);
+ Binary |= getAddrModeUPBits(ARM_AM::getAM5SubMode(MO.getImm()));
+
+ // Set bit W(21)
+ if (ARM_AM::getAM5WBFlag(MO.getImm()))
+ Binary |= 0x1 << ARMII::W_BitShift;
+
+ // First register is encoded in Dd.
+ Binary |= encodeVFPRd(MI, 4);
+
+ // Number of registers are encoded in offset field.
+ unsigned NumRegs = 1;
+ for (unsigned i = 5, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ break;
+ ++NumRegs;
+ }
+ Binary |= NumRegs * 2;
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMiscInstruction(const MachineInstr &MI) {
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ emitWordLE(Binary);
+}
+
+#include "ARMGenCodeEmitter.inc"
+
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
new file mode 100644
index 0000000..db723fe
--- /dev/null
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -0,0 +1,1285 @@
+//===-- ARMConstantIslandPass.cpp - ARM constant islands --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that splits the constant pool up into 'islands'
+// which are scattered through-out the function. This is required due to the
+// limited pc-relative displacements that ARM has.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-cp-islands"
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMInstrInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumCPEs, "Number of constpool entries");
+STATISTIC(NumSplit, "Number of uncond branches inserted");
+STATISTIC(NumCBrFixed, "Number of cond branches fixed");
+STATISTIC(NumUBrFixed, "Number of uncond branches fixed");
+
+namespace {
+ /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
+ /// requires constant pool entries to be scattered among the instructions
+ /// inside a function. To do this, it completely ignores the normal LLVM
+ /// constant pool; instead, it places constants wherever it feels like with
+ /// special instructions.
+ ///
+ /// The terminology used in this pass includes:
+ /// Islands - Clumps of constants placed in the function.
+ /// Water - Potential places where an island could be formed.
+ /// CPE - A constant pool entry that has been placed somewhere, which
+ /// tracks a list of users.
+ class VISIBILITY_HIDDEN ARMConstantIslands : public MachineFunctionPass {
+ /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed
+ /// by MBB Number. The two-byte pads required for Thumb alignment are
+ /// counted as part of the following block (i.e., the offset and size for
+ /// a padded block will both be ==2 mod 4).
+ std::vector<unsigned> BBSizes;
+
+ /// BBOffsets - the offset of each MBB in bytes, starting from 0.
+ /// The two-byte pads required for Thumb alignment are counted as part of
+ /// the following block.
+ std::vector<unsigned> BBOffsets;
+
+ /// WaterList - A sorted list of basic blocks where islands could be placed
+ /// (i.e. blocks that don't fall through to the following block, due
+ /// to a return, unreachable, or unconditional branch).
+ std::vector<MachineBasicBlock*> WaterList;
+
+ /// CPUser - One user of a constant pool, keeping the machine instruction
+ /// pointer, the constant pool being referenced, and the max displacement
+ /// allowed from the instruction to the CP.
+ struct CPUser {
+ MachineInstr *MI;
+ MachineInstr *CPEMI;
+ unsigned MaxDisp;
+ CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp)
+ : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp) {}
+ };
+
+ /// CPUsers - Keep track of all of the machine instructions that use various
+ /// constant pools and their max displacement.
+ std::vector<CPUser> CPUsers;
+
+ /// CPEntry - One per constant pool entry, keeping the machine instruction
+ /// pointer, the constpool index, and the number of CPUser's which
+ /// reference this entry.
+ struct CPEntry {
+ MachineInstr *CPEMI;
+ unsigned CPI;
+ unsigned RefCount;
+ CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
+ : CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
+ };
+
+ /// CPEntries - Keep track of all of the constant pool entry machine
+ /// instructions. For each original constpool index (i.e. those that
+ /// existed upon entry to this pass), it keeps a vector of entries.
+ /// Original elements are cloned as we go along; the clones are
+ /// put in the vector of the original element, but have distinct CPIs.
+ std::vector<std::vector<CPEntry> > CPEntries;
+
+ /// ImmBranch - One per immediate branch, keeping the machine instruction
+ /// pointer, conditional or unconditional, the max displacement,
+ /// and (if isCond is true) the corresponding unconditional branch
+ /// opcode.
+ struct ImmBranch {
+ MachineInstr *MI;
+ unsigned MaxDisp : 31;
+ bool isCond : 1;
+ int UncondBr;
+ ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr)
+ : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
+ };
+
+ /// ImmBranches - Keep track of all the immediate branch instructions.
+ ///
+ std::vector<ImmBranch> ImmBranches;
+
+ /// PushPopMIs - Keep track of all the Thumb push / pop instructions.
+ ///
+ SmallVector<MachineInstr*, 4> PushPopMIs;
+
+ /// HasFarJump - True if any far jump instruction has been emitted during
+ /// the branch fix up pass.
+ bool HasFarJump;
+
+ const TargetInstrInfo *TII;
+ ARMFunctionInfo *AFI;
+ bool isThumb;
+ public:
+ static char ID;
+ ARMConstantIslands() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM constant island placement and branch shortening pass";
+ }
+
+ private:
+ void DoInitialPlacement(MachineFunction &Fn,
+ std::vector<MachineInstr*> &CPEMIs);
+ CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
+ void InitialFunctionScan(MachineFunction &Fn,
+ const std::vector<MachineInstr*> &CPEMIs);
+ MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI);
+ void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB);
+ void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta);
+ bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI);
+ int LookForExistingCPEntry(CPUser& U, unsigned UserOffset);
+ bool LookForWater(CPUser&U, unsigned UserOffset,
+ MachineBasicBlock** NewMBB);
+ MachineBasicBlock* AcceptWater(MachineBasicBlock *WaterBB,
+ std::vector<MachineBasicBlock*>::iterator IP);
+ void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset,
+ MachineBasicBlock** NewMBB);
+ bool HandleConstantPoolUser(MachineFunction &Fn, unsigned CPUserIndex);
+ void RemoveDeadCPEMI(MachineInstr *CPEMI);
+ bool RemoveUnusedCPEntries();
+ bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
+ MachineInstr *CPEMI, unsigned Disp,
+ bool DoDump);
+ bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water,
+ CPUser &U);
+ bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
+ unsigned Disp, bool NegativeOK);
+ bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
+ bool FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br);
+ bool FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br);
+ bool FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br);
+ bool UndoLRSpillRestore();
+
+ unsigned GetOffsetOf(MachineInstr *MI) const;
+ void dumpBBs();
+ void verify(MachineFunction &Fn);
+ };
+ char ARMConstantIslands::ID = 0;
+}
+
+/// verify - check BBOffsets, BBSizes, alignment of islands
+void ARMConstantIslands::verify(MachineFunction &Fn) {
+ assert(BBOffsets.size() == BBSizes.size());
+ for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i)
+ assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]);
+ if (isThumb) {
+ for (MachineFunction::iterator MBBI = Fn.begin(), E = Fn.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = MBBI;
+ if (!MBB->empty() &&
+ MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY)
+ assert((BBOffsets[MBB->getNumber()]%4 == 0 &&
+ BBSizes[MBB->getNumber()]%4 == 0) ||
+ (BBOffsets[MBB->getNumber()]%4 != 0 &&
+ BBSizes[MBB->getNumber()]%4 != 0));
+ }
+ }
+}
+
+/// print block size and offset information - debugging
+void ARMConstantIslands::dumpBBs() {
+ for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) {
+ DOUT << "block " << J << " offset " << BBOffsets[J] <<
+ " size " << BBSizes[J] << "\n";
+ }
+}
+
+/// createARMConstantIslandPass - returns an instance of the constpool
+/// island pass.
+FunctionPass *llvm::createARMConstantIslandPass() {
+ return new ARMConstantIslands();
+}
+
+bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) {
+ MachineConstantPool &MCP = *Fn.getConstantPool();
+
+ TII = Fn.getTarget().getInstrInfo();
+ AFI = Fn.getInfo<ARMFunctionInfo>();
+ isThumb = AFI->isThumbFunction();
+
+ HasFarJump = false;
+
+ // Renumber all of the machine basic blocks in the function, guaranteeing that
+ // the numbers agree with the position of the block in the function.
+ Fn.RenumberBlocks();
+
+ /// Thumb functions containing constant pools get 2-byte alignment.
+ /// This is so we can keep exact track of where the alignment padding goes.
+ /// Set default.
+ AFI->setAlign(isThumb ? 1U : 2U);
+
+ // Perform the initial placement of the constant pool entries. To start with,
+ // we put them all at the end of the function.
+ std::vector<MachineInstr*> CPEMIs;
+ if (!MCP.isEmpty()) {
+ DoInitialPlacement(Fn, CPEMIs);
+ if (isThumb)
+ AFI->setAlign(2U);
+ }
+
+ /// The next UID to take is the first unused one.
+ AFI->initConstPoolEntryUId(CPEMIs.size());
+
+ // Do the initial scan of the function, building up information about the
+ // sizes of each block, the location of all the water, and finding all of the
+ // constant pool users.
+ InitialFunctionScan(Fn, CPEMIs);
+ CPEMIs.clear();
+
+ /// Remove dead constant pool entries.
+ RemoveUnusedCPEntries();
+
+ // Iteratively place constant pool entries and fix up branches until there
+ // is no change.
+ bool MadeChange = false;
+ while (true) {
+ bool Change = false;
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
+ Change |= HandleConstantPoolUser(Fn, i);
+ DEBUG(dumpBBs());
+ for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
+ Change |= FixUpImmediateBr(Fn, ImmBranches[i]);
+ DEBUG(dumpBBs());
+ if (!Change)
+ break;
+ MadeChange = true;
+ }
+
+ // After a while, this might be made debug-only, but it is not expensive.
+ verify(Fn);
+
+ // If LR has been forced spilled and no far jumps (i.e. BL) has been issued.
+ // Undo the spill / restore of LR if possible.
+ if (!HasFarJump && AFI->isLRSpilledForFarJump() && isThumb)
+ MadeChange |= UndoLRSpillRestore();
+
+ BBSizes.clear();
+ BBOffsets.clear();
+ WaterList.clear();
+ CPUsers.clear();
+ CPEntries.clear();
+ ImmBranches.clear();
+ PushPopMIs.clear();
+
+ return MadeChange;
+}
+
+/// DoInitialPlacement - Perform the initial placement of the constant pool
+/// entries. To start with, we put them all at the end of the function.
+void ARMConstantIslands::DoInitialPlacement(MachineFunction &Fn,
+ std::vector<MachineInstr*> &CPEMIs) {
+ // Create the basic block to hold the CPE's.
+ MachineBasicBlock *BB = Fn.CreateMachineBasicBlock();
+ Fn.push_back(BB);
+
+ // Add all of the constants from the constant pool to the end block, use an
+ // identity mapping of CPI's to CPE's.
+ const std::vector<MachineConstantPoolEntry> &CPs =
+ Fn.getConstantPool()->getConstants();
+
+ const TargetData &TD = *Fn.getTarget().getTargetData();
+ for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
+ unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
+ // Verify that all constant pool entries are a multiple of 4 bytes. If not,
+ // we would have to pad them out or something so that instructions stay
+ // aligned.
+ assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!");
+ MachineInstr *CPEMI =
+ BuildMI(BB, DebugLoc::getUnknownLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+ .addImm(i).addConstantPoolIndex(i).addImm(Size);
+ CPEMIs.push_back(CPEMI);
+
+ // Add a new CPEntry, but no corresponding CPUser yet.
+ std::vector<CPEntry> CPEs;
+ CPEs.push_back(CPEntry(CPEMI, i));
+ CPEntries.push_back(CPEs);
+ NumCPEs++;
+ DOUT << "Moved CPI#" << i << " to end of function as #" << i << "\n";
+ }
+}
+
+/// BBHasFallthrough - Return true if the specified basic block can fallthrough
+/// into the block immediately after it.
+static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+ // Get the next machine basic block in the function.
+ MachineFunction::iterator MBBI = MBB;
+ if (next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function.
+ return false;
+
+ MachineBasicBlock *NextBB = next(MBBI);
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I)
+ if (*I == NextBB)
+ return true;
+
+ return false;
+}
+
+/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
+/// look up the corresponding CPEntry.
+ARMConstantIslands::CPEntry
+*ARMConstantIslands::findConstPoolEntry(unsigned CPI,
+ const MachineInstr *CPEMI) {
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ // Number of entries per constpool index should be small, just do a
+ // linear search.
+ for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ if (CPEs[i].CPEMI == CPEMI)
+ return &CPEs[i];
+ }
+ return NULL;
+}
+
+/// InitialFunctionScan - Do the initial scan of the function, building up
+/// information about the sizes of each block, the location of all the water,
+/// and finding all of the constant pool users.
+void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn,
+ const std::vector<MachineInstr*> &CPEMIs) {
+ unsigned Offset = 0;
+ for (MachineFunction::iterator MBBI = Fn.begin(), E = Fn.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+
+ // If this block doesn't fall through into the next MBB, then this is
+ // 'water' that a constant pool island could be placed.
+ if (!BBHasFallthrough(&MBB))
+ WaterList.push_back(&MBB);
+
+ unsigned MBBSize = 0;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ // Add instruction size to MBBSize.
+ MBBSize += TII->GetInstSizeInBytes(I);
+
+ int Opc = I->getOpcode();
+ if (I->getDesc().isBranch()) {
+ bool isCond = false;
+ unsigned Bits = 0;
+ unsigned Scale = 1;
+ int UOpc = Opc;
+ switch (Opc) {
+ case ARM::tBR_JTr:
+ // A Thumb table jump may involve padding; for the offsets to
+ // be right, functions containing these must be 4-byte aligned.
+ AFI->setAlign(2U);
+ if ((Offset+MBBSize)%4 != 0)
+ MBBSize += 2; // padding
+ continue; // Does not get an entry in ImmBranches
+ default:
+ continue; // Ignore other JT branches
+ case ARM::Bcc:
+ isCond = true;
+ UOpc = ARM::B;
+ // Fallthrough
+ case ARM::B:
+ Bits = 24;
+ Scale = 4;
+ break;
+ case ARM::tBcc:
+ isCond = true;
+ UOpc = ARM::tB;
+ Bits = 8;
+ Scale = 2;
+ break;
+ case ARM::tB:
+ Bits = 11;
+ Scale = 2;
+ break;
+ }
+
+ // Record this immediate branch.
+ unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+ ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc));
+ }
+
+ if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
+ PushPopMIs.push_back(I);
+
+ // Scan the instructions for constant pool operands.
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
+ if (I->getOperand(op).isCPI()) {
+ // We found one. The addressing mode tells us the max displacement
+ // from the PC that this instruction permits.
+
+ // Basic size info comes from the TSFlags field.
+ unsigned Bits = 0;
+ unsigned Scale = 1;
+ unsigned TSFlags = I->getDesc().TSFlags;
+ switch (TSFlags & ARMII::AddrModeMask) {
+ default:
+ // Constant pool entries can reach anything.
+ if (I->getOpcode() == ARM::CONSTPOOL_ENTRY)
+ continue;
+ if (I->getOpcode() == ARM::tLEApcrel) {
+ Bits = 8; // Taking the address of a CP entry.
+ break;
+ }
+ assert(0 && "Unknown addressing mode for CP reference!");
+ case ARMII::AddrMode1: // AM1: 8 bits << 2
+ Bits = 8;
+ Scale = 4; // Taking the address of a CP entry.
+ break;
+ case ARMII::AddrMode2:
+ Bits = 12; // +-offset_12
+ break;
+ case ARMII::AddrMode3:
+ Bits = 8; // +-offset_8
+ break;
+ // addrmode4 has no immediate offset.
+ case ARMII::AddrMode5:
+ Bits = 8;
+ Scale = 4; // +-(offset_8*4)
+ break;
+ case ARMII::AddrModeT1:
+ Bits = 5; // +offset_5
+ break;
+ case ARMII::AddrModeT2:
+ Bits = 5;
+ Scale = 2; // +(offset_5*2)
+ break;
+ case ARMII::AddrModeT4:
+ Bits = 5;
+ Scale = 4; // +(offset_5*4)
+ break;
+ case ARMII::AddrModeTs:
+ Bits = 8;
+ Scale = 4; // +(offset_8*4)
+ break;
+ }
+
+ // Remember that this is a user of a CP entry.
+ unsigned CPI = I->getOperand(op).getIndex();
+ MachineInstr *CPEMI = CPEMIs[CPI];
+ unsigned MaxOffs = ((1 << Bits)-1) * Scale;
+ CPUsers.push_back(CPUser(I, CPEMI, MaxOffs));
+
+ // Increment corresponding CPEntry reference count.
+ CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+ assert(CPE && "Cannot find a corresponding CPEntry!");
+ CPE->RefCount++;
+
+ // Instructions can only use one CP entry, don't bother scanning the
+ // rest of the operands.
+ break;
+ }
+ }
+
+ // In thumb mode, if this block is a constpool island, we may need padding
+ // so it's aligned on 4 byte boundary.
+ if (isThumb &&
+ !MBB.empty() &&
+ MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY &&
+ (Offset%4) != 0)
+ MBBSize += 2;
+
+ BBSizes.push_back(MBBSize);
+ BBOffsets.push_back(Offset);
+ Offset += MBBSize;
+ }
+}
+
+/// GetOffsetOf - Return the current offset of the specified machine instruction
+/// from the start of the function. This offset changes as stuff is moved
+/// around inside the function.
+unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // The offset is composed of two things: the sum of the sizes of all MBB's
+ // before this instruction's block, and the offset from the start of the block
+ // it is in.
+ unsigned Offset = BBOffsets[MBB->getNumber()];
+
+ // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has
+ // alignment padding, and compensate if so.
+ if (isThumb &&
+ MI->getOpcode() == ARM::CONSTPOOL_ENTRY &&
+ Offset%4 != 0)
+ Offset += 2;
+
+ // Sum instructions before MI in MBB.
+ for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) {
+ assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+ if (&*I == MI) return Offset;
+ Offset += TII->GetInstSizeInBytes(I);
+ }
+}
+
+/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
+/// ID.
+static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
+ const MachineBasicBlock *RHS) {
+ return LHS->getNumber() < RHS->getNumber();
+}
+
+/// UpdateForInsertedWaterBlock - When a block is newly inserted into the
+/// machine function, it upsets all of the block numbers. Renumber the blocks
+/// and update the arrays that parallel this numbering.
+void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
+ // Renumber the MBB's to keep them consequtive.
+ NewBB->getParent()->RenumberBlocks(NewBB);
+
+ // Insert a size into BBSizes to align it properly with the (newly
+ // renumbered) block numbers.
+ BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
+
+ // Likewise for BBOffsets.
+ BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+
+ // Next, update WaterList. Specifically, we need to add NewMBB as having
+ // available water after it.
+ std::vector<MachineBasicBlock*>::iterator IP =
+ std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
+ CompareMBBNumbers);
+ WaterList.insert(IP, NewBB);
+}
+
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch. Update datastructures and renumber blocks to
+/// account for this change and returns the newly created block.
+MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
+ MachineBasicBlock *OrigBB = MI->getParent();
+ MachineFunction &MF = *OrigBB->getParent();
+
+ // Create a new MBB for the code after the OrigBB.
+ MachineBasicBlock *NewBB =
+ MF.CreateMachineBasicBlock(OrigBB->getBasicBlock());
+ MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+ MF.insert(MBBI, NewBB);
+
+ // Splice the instructions starting with MI over to NewBB.
+ NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
+
+ // Add an unconditional branch from OrigBB to NewBB.
+ // Note the new unconditional branch is not being recorded.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond to anything in the source.
+ BuildMI(OrigBB, DebugLoc::getUnknownLoc(),
+ TII->get(isThumb ? ARM::tB : ARM::B)).addMBB(NewBB);
+ NumSplit++;
+
+ // Update the CFG. All succs of OrigBB are now succs of NewBB.
+ while (!OrigBB->succ_empty()) {
+ MachineBasicBlock *Succ = *OrigBB->succ_begin();
+ OrigBB->removeSuccessor(Succ);
+ NewBB->addSuccessor(Succ);
+
+ // This pass should be run after register allocation, so there should be no
+ // PHI nodes to update.
+ assert((Succ->empty() || Succ->begin()->getOpcode() != TargetInstrInfo::PHI)
+ && "PHI nodes should be eliminated by now!");
+ }
+
+ // OrigBB branches to NewBB.
+ OrigBB->addSuccessor(NewBB);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ // This is almost the same as UpdateForInsertedWaterBlock, except that
+ // the Water goes after OrigBB, not NewBB.
+ MF.RenumberBlocks(NewBB);
+
+ // Insert a size into BBSizes to align it properly with the (newly
+ // renumbered) block numbers.
+ BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
+
+ // Likewise for BBOffsets.
+ BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+
+ // Next, update WaterList. Specifically, we need to add OrigMBB as having
+ // available water after it (but not if it's already there, which happens
+ // when splitting before a conditional branch that is followed by an
+ // unconditional branch - in that case we want to insert NewBB).
+ std::vector<MachineBasicBlock*>::iterator IP =
+ std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
+ CompareMBBNumbers);
+ MachineBasicBlock* WaterBB = *IP;
+ if (WaterBB == OrigBB)
+ WaterList.insert(next(IP), NewBB);
+ else
+ WaterList.insert(IP, OrigBB);
+
+ // Figure out how large the first NewMBB is. (It cannot
+ // contain a constpool_entry or tablejump.)
+ unsigned NewBBSize = 0;
+ for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
+ I != E; ++I)
+ NewBBSize += TII->GetInstSizeInBytes(I);
+
+ unsigned OrigBBI = OrigBB->getNumber();
+ unsigned NewBBI = NewBB->getNumber();
+ // Set the size of NewBB in BBSizes.
+ BBSizes[NewBBI] = NewBBSize;
+
+ // We removed instructions from UserMBB, subtract that off from its size.
+ // Add 2 or 4 to the block to count the unconditional branch we added to it.
+ unsigned delta = isThumb ? 2 : 4;
+ BBSizes[OrigBBI] -= NewBBSize - delta;
+
+ // ...and adjust BBOffsets for NewBB accordingly.
+ BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI];
+
+ // All BBOffsets following these blocks must be modified.
+ AdjustBBOffsetsAfter(NewBB, delta);
+
+ return NewBB;
+}
+
+/// OffsetIsInRange - Checks whether UserOffset (the location of a constant pool
+/// reference) is within MaxDisp of TrialOffset (a proposed location of a
+/// constant pool entry).
+bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset,
+ unsigned TrialOffset, unsigned MaxDisp, bool NegativeOK) {
+ // On Thumb offsets==2 mod 4 are rounded down by the hardware for
+ // purposes of the displacement computation; compensate for that here.
+ // Effectively, the valid range of displacements is 2 bytes smaller for such
+ // references.
+ if (isThumb && UserOffset%4 !=0)
+ UserOffset -= 2;
+ // CPEs will be rounded up to a multiple of 4.
+ if (isThumb && TrialOffset%4 != 0)
+ TrialOffset += 2;
+
+ if (UserOffset <= TrialOffset) {
+ // User before the Trial.
+ if (TrialOffset-UserOffset <= MaxDisp)
+ return true;
+ } else if (NegativeOK) {
+ if (UserOffset-TrialOffset <= MaxDisp)
+ return true;
+ }
+ return false;
+}
+
+/// WaterIsInRange - Returns true if a CPE placed after the specified
+/// Water (a basic block) will be in range for the specific MI.
+
+bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset,
+ MachineBasicBlock* Water, CPUser &U)
+{
+ unsigned MaxDisp = U.MaxDisp;
+ MachineFunction::iterator I = next(MachineFunction::iterator(Water));
+ unsigned CPEOffset = BBOffsets[Water->getNumber()] +
+ BBSizes[Water->getNumber()];
+
+ // If the CPE is to be inserted before the instruction, that will raise
+ // the offset of the instruction. (Currently applies only to ARM, so
+ // no alignment compensation attempted here.)
+ if (CPEOffset < UserOffset)
+ UserOffset += U.CPEMI->getOperand(2).getImm();
+
+ return OffsetIsInRange (UserOffset, CPEOffset, MaxDisp, !isThumb);
+}
+
+/// CPEIsInRange - Returns true if the distance between specific MI and
+/// specific ConstPool entry instruction can fit in MI's displacement field.
+bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
+ MachineInstr *CPEMI,
+ unsigned MaxDisp, bool DoDump) {
+ unsigned CPEOffset = GetOffsetOf(CPEMI);
+ assert(CPEOffset%4 == 0 && "Misaligned CPE");
+
+ if (DoDump) {
+ DOUT << "User of CPE#" << CPEMI->getOperand(0).getImm()
+ << " max delta=" << MaxDisp
+ << " insn address=" << UserOffset
+ << " CPE address=" << CPEOffset
+ << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI;
+ }
+
+ return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, !isThumb);
+}
+
+#ifndef NDEBUG
+/// BBIsJumpedOver - Return true of the specified basic block's only predecessor
+/// unconditionally branches to its only successor.
+static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
+ if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
+ return false;
+
+ MachineBasicBlock *Succ = *MBB->succ_begin();
+ MachineBasicBlock *Pred = *MBB->pred_begin();
+ MachineInstr *PredMI = &Pred->back();
+ if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB)
+ return PredMI->getOperand(0).getMBB() == Succ;
+ return false;
+}
+#endif // NDEBUG
+
+void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
+ int delta) {
+ MachineFunction::iterator MBBI = BB; MBBI = next(MBBI);
+ for(unsigned i=BB->getNumber()+1; i<BB->getParent()->getNumBlockIDs(); i++) {
+ BBOffsets[i] += delta;
+ // If some existing blocks have padding, adjust the padding as needed, a
+ // bit tricky. delta can be negative so don't use % on that.
+ if (isThumb) {
+ MachineBasicBlock *MBB = MBBI;
+ if (!MBB->empty()) {
+ // Constant pool entries require padding.
+ if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
+ unsigned oldOffset = BBOffsets[i] - delta;
+ if (oldOffset%4==0 && BBOffsets[i]%4!=0) {
+ // add new padding
+ BBSizes[i] += 2;
+ delta += 2;
+ } else if (oldOffset%4!=0 && BBOffsets[i]%4==0) {
+ // remove existing padding
+ BBSizes[i] -=2;
+ delta -= 2;
+ }
+ }
+ // Thumb jump tables require padding. They should be at the end;
+ // following unconditional branches are removed by AnalyzeBranch.
+ MachineInstr *ThumbJTMI = NULL;
+ if (prior(MBB->end())->getOpcode() == ARM::tBR_JTr)
+ ThumbJTMI = prior(MBB->end());
+ if (ThumbJTMI) {
+ unsigned newMIOffset = GetOffsetOf(ThumbJTMI);
+ unsigned oldMIOffset = newMIOffset - delta;
+ if (oldMIOffset%4 == 0 && newMIOffset%4 != 0) {
+ // remove existing padding
+ BBSizes[i] -= 2;
+ delta -= 2;
+ } else if (oldMIOffset%4 != 0 && newMIOffset%4 == 0) {
+ // add new padding
+ BBSizes[i] += 2;
+ delta += 2;
+ }
+ }
+ if (delta==0)
+ return;
+ }
+ MBBI = next(MBBI);
+ }
+ }
+}
+
+/// DecrementOldEntry - find the constant pool entry with index CPI
+/// and instruction CPEMI, and decrement its refcount. If the refcount
+/// becomes 0 remove the entry and instruction. Returns true if we removed
+/// the entry, false if we didn't.
+
+bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) {
+ // Find the old entry. Eliminate it if it is no longer used.
+ CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+ assert(CPE && "Unexpected!");
+ if (--CPE->RefCount == 0) {
+ RemoveDeadCPEMI(CPEMI);
+ CPE->CPEMI = NULL;
+ NumCPEs--;
+ return true;
+ }
+ return false;
+}
+
+/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
+/// if not, see if an in-range clone of the CPE is in range, and if so,
+/// change the data structures so the user references the clone. Returns:
+/// 0 = no existing entry found
+/// 1 = entry found, and there were no code insertions or deletions
+/// 2 = entry found, and there were code insertions or deletions
+int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
+{
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+
+ // Check to see if the CPE is already in-range.
+ if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, true)) {
+ DOUT << "In range\n";
+ return 1;
+ }
+
+ // No. Look for previously created clones of the CPE that are in range.
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ // We already tried this one
+ if (CPEs[i].CPEMI == CPEMI)
+ continue;
+ // Removing CPEs can leave empty entries, skip
+ if (CPEs[i].CPEMI == NULL)
+ continue;
+ if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, false)) {
+ DOUT << "Replacing CPE#" << CPI << " with CPE#" << CPEs[i].CPI << "\n";
+ // Point the CPUser node to the replacement
+ U.CPEMI = CPEs[i].CPEMI;
+ // Change the CPI in the instruction operand to refer to the clone.
+ for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
+ if (UserMI->getOperand(j).isCPI()) {
+ UserMI->getOperand(j).setIndex(CPEs[i].CPI);
+ break;
+ }
+ // Adjust the refcount of the clone...
+ CPEs[i].RefCount++;
+ // ...and the original. If we didn't remove the old entry, none of the
+ // addresses changed, so we don't need another pass.
+ return DecrementOldEntry(CPI, CPEMI) ? 2 : 1;
+ }
+ }
+ return 0;
+}
+
+/// getUnconditionalBrDisp - Returns the maximum displacement that can fit in
+/// the specific unconditional branch instruction.
+static inline unsigned getUnconditionalBrDisp(int Opc) {
+ return (Opc == ARM::tB) ? ((1<<10)-1)*2 : ((1<<23)-1)*4;
+}
+
+/// AcceptWater - Small amount of common code factored out of the following.
+
+MachineBasicBlock* ARMConstantIslands::AcceptWater(MachineBasicBlock *WaterBB,
+ std::vector<MachineBasicBlock*>::iterator IP) {
+ DOUT << "found water in range\n";
+ // Remove the original WaterList entry; we want subsequent
+ // insertions in this vicinity to go after the one we're
+ // about to insert. This considerably reduces the number
+ // of times we have to move the same CPE more than once.
+ WaterList.erase(IP);
+ // CPE goes before following block (NewMBB).
+ return next(MachineFunction::iterator(WaterBB));
+}
+
+/// LookForWater - look for an existing entry in the WaterList in which
+/// we can place the CPE referenced from U so it's within range of U's MI.
+/// Returns true if found, false if not. If it returns true, *NewMBB
+/// is set to the WaterList entry.
+/// For ARM, we prefer the water that's farthest away. For Thumb, prefer
+/// water that will not introduce padding to water that will; within each
+/// group, prefer the water that's farthest away.
+
+bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
+ MachineBasicBlock** NewMBB) {
+ std::vector<MachineBasicBlock*>::iterator IPThatWouldPad;
+ MachineBasicBlock* WaterBBThatWouldPad = NULL;
+ if (!WaterList.empty()) {
+ for (std::vector<MachineBasicBlock*>::iterator IP = prior(WaterList.end()),
+ B = WaterList.begin();; --IP) {
+ MachineBasicBlock* WaterBB = *IP;
+ if (WaterIsInRange(UserOffset, WaterBB, U)) {
+ if (isThumb &&
+ (BBOffsets[WaterBB->getNumber()] +
+ BBSizes[WaterBB->getNumber()])%4 != 0) {
+ // This is valid Water, but would introduce padding. Remember
+ // it in case we don't find any Water that doesn't do this.
+ if (!WaterBBThatWouldPad) {
+ WaterBBThatWouldPad = WaterBB;
+ IPThatWouldPad = IP;
+ }
+ } else {
+ *NewMBB = AcceptWater(WaterBB, IP);
+ return true;
+ }
+ }
+ if (IP == B)
+ break;
+ }
+ }
+ if (isThumb && WaterBBThatWouldPad) {
+ *NewMBB = AcceptWater(WaterBBThatWouldPad, IPThatWouldPad);
+ return true;
+ }
+ return false;
+}
+
+/// CreateNewWater - No existing WaterList entry will work for
+/// CPUsers[CPUserIndex], so create a place to put the CPE. The end of the
+/// block is used if in range, and the conditional branch munged so control
+/// flow is correct. Otherwise the block is split to create a hole with an
+/// unconditional branch around it. In either case *NewMBB is set to a
+/// block following which the new island can be inserted (the WaterList
+/// is not adjusted).
+
+void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
+ unsigned UserOffset, MachineBasicBlock** NewMBB) {
+ CPUser &U = CPUsers[CPUserIndex];
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+ MachineBasicBlock *UserMBB = UserMI->getParent();
+ unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] +
+ BBSizes[UserMBB->getNumber()];
+ assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]);
+
+ // If the use is at the end of the block, or the end of the block
+ // is within range, make new water there. (The addition below is
+ // for the unconditional branch we will be adding: 4 bytes on ARM,
+ // 2 on Thumb. Possible Thumb alignment padding is allowed for
+ // inside OffsetIsInRange.
+ // If the block ends in an unconditional branch already, it is water,
+ // and is known to be out of range, so we'll always be adding a branch.)
+ if (&UserMBB->back() == UserMI ||
+ OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb ? 2: 4),
+ U.MaxDisp, !isThumb)) {
+ DOUT << "Split at end of block\n";
+ if (&UserMBB->back() == UserMI)
+ assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!");
+ *NewMBB = next(MachineFunction::iterator(UserMBB));
+ // Add an unconditional branch from UserMBB to fallthrough block.
+ // Record it for branch lengthening; this new branch will not get out of
+ // range, but if the preceding conditional branch is out of range, the
+ // targets will be exchanged, and the altered branch may be out of
+ // range, so the machinery has to know about it.
+ int UncondBr = isThumb ? ARM::tB : ARM::B;
+ BuildMI(UserMBB, DebugLoc::getUnknownLoc(),
+ TII->get(UncondBr)).addMBB(*NewMBB);
+ unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
+ ImmBranches.push_back(ImmBranch(&UserMBB->back(),
+ MaxDisp, false, UncondBr));
+ int delta = isThumb ? 2 : 4;
+ BBSizes[UserMBB->getNumber()] += delta;
+ AdjustBBOffsetsAfter(UserMBB, delta);
+ } else {
+ // What a big block. Find a place within the block to split it.
+ // This is a little tricky on Thumb since instructions are 2 bytes
+ // and constant pool entries are 4 bytes: if instruction I references
+ // island CPE, and instruction I+1 references CPE', it will
+ // not work well to put CPE as far forward as possible, since then
+ // CPE' cannot immediately follow it (that location is 2 bytes
+ // farther away from I+1 than CPE was from I) and we'd need to create
+ // a new island. So, we make a first guess, then walk through the
+ // instructions between the one currently being looked at and the
+ // possible insertion point, and make sure any other instructions
+ // that reference CPEs will be able to use the same island area;
+ // if not, we back up the insertion point.
+
+ // The 4 in the following is for the unconditional branch we'll be
+ // inserting (allows for long branch on Thumb). Alignment of the
+ // island is handled inside OffsetIsInRange.
+ unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4;
+ // This could point off the end of the block if we've already got
+ // constant pool entries following this block; only the last one is
+ // in the water list. Back past any possible branches (allow for a
+ // conditional and a maximally long unconditional).
+ if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1])
+ BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] -
+ (isThumb ? 6 : 8);
+ unsigned EndInsertOffset = BaseInsertOffset +
+ CPEMI->getOperand(2).getImm();
+ MachineBasicBlock::iterator MI = UserMI;
+ ++MI;
+ unsigned CPUIndex = CPUserIndex+1;
+ for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
+ Offset < BaseInsertOffset;
+ Offset += TII->GetInstSizeInBytes(MI),
+ MI = next(MI)) {
+ if (CPUIndex < CPUsers.size() && CPUsers[CPUIndex].MI == MI) {
+ if (!OffsetIsInRange(Offset, EndInsertOffset,
+ CPUsers[CPUIndex].MaxDisp, !isThumb)) {
+ BaseInsertOffset -= (isThumb ? 2 : 4);
+ EndInsertOffset -= (isThumb ? 2 : 4);
+ }
+ // This is overly conservative, as we don't account for CPEMIs
+ // being reused within the block, but it doesn't matter much.
+ EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm();
+ CPUIndex++;
+ }
+ }
+ DOUT << "Split in middle of big block\n";
+ *NewMBB = SplitBlockBeforeInstr(prior(MI));
+ }
+}
+
+/// HandleConstantPoolUser - Analyze the specified user, checking to see if it
+/// is out-of-range. If so, pick up the constant pool value and move it some
+/// place in-range. Return true if we changed any addresses (thus must run
+/// another pass of branch lengthening), false otherwise.
+bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &Fn,
+ unsigned CPUserIndex) {
+ CPUser &U = CPUsers[CPUserIndex];
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ unsigned Size = CPEMI->getOperand(2).getImm();
+ MachineBasicBlock *NewMBB;
+ // Compute this only once, it's expensive. The 4 or 8 is the value the
+ // hardware keeps in the PC (2 insns ahead of the reference).
+ unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8);
+
+ // Special case: tLEApcrel are two instructions MI's. The actual user is the
+ // second instruction.
+ if (UserMI->getOpcode() == ARM::tLEApcrel)
+ UserOffset += 2;
+
+ // See if the current entry is within range, or there is a clone of it
+ // in range.
+ int result = LookForExistingCPEntry(U, UserOffset);
+ if (result==1) return false;
+ else if (result==2) return true;
+
+ // No existing clone of this CPE is within range.
+ // We will be generating a new clone. Get a UID for it.
+ unsigned ID = AFI->createConstPoolEntryUId();
+
+ // Look for water where we can place this CPE. We look for the farthest one
+ // away that will work. Forward references only for now (although later
+ // we might find some that are backwards).
+
+ if (!LookForWater(U, UserOffset, &NewMBB)) {
+ // No water found.
+ DOUT << "No water found\n";
+ CreateNewWater(CPUserIndex, UserOffset, &NewMBB);
+ }
+
+ // Okay, we know we can put an island before NewMBB now, do it!
+ MachineBasicBlock *NewIsland = Fn.CreateMachineBasicBlock();
+ Fn.insert(NewMBB, NewIsland);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ UpdateForInsertedWaterBlock(NewIsland);
+
+ // Decrement the old entry, and remove it if refcount becomes 0.
+ DecrementOldEntry(CPI, CPEMI);
+
+ // Now that we have an island to add the CPE to, clone the original CPE and
+ // add it to the island.
+ U.CPEMI = BuildMI(NewIsland, DebugLoc::getUnknownLoc(),
+ TII->get(ARM::CONSTPOOL_ENTRY))
+ .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
+ CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
+ NumCPEs++;
+
+ BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()];
+ // Compensate for .align 2 in thumb mode.
+ if (isThumb && BBOffsets[NewIsland->getNumber()]%4 != 0)
+ Size += 2;
+ // Increase the size of the island block to account for the new entry.
+ BBSizes[NewIsland->getNumber()] += Size;
+ AdjustBBOffsetsAfter(NewIsland, Size);
+
+ // Finally, change the CPI in the instruction operand to be ID.
+ for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
+ if (UserMI->getOperand(i).isCPI()) {
+ UserMI->getOperand(i).setIndex(ID);
+ break;
+ }
+
+ DOUT << " Moved CPE to #" << ID << " CPI=" << CPI << "\t" << *UserMI;
+
+ return true;
+}
+
+/// RemoveDeadCPEMI - Remove a dead constant pool entry instruction. Update
+/// sizes and offsets of impacted basic blocks.
+void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) {
+ MachineBasicBlock *CPEBB = CPEMI->getParent();
+ unsigned Size = CPEMI->getOperand(2).getImm();
+ CPEMI->eraseFromParent();
+ BBSizes[CPEBB->getNumber()] -= Size;
+ // All succeeding offsets have the current size value added in, fix this.
+ if (CPEBB->empty()) {
+ // In thumb mode, the size of island may be padded by two to compensate for
+ // the alignment requirement. Then it will now be 2 when the block is
+ // empty, so fix this.
+ // All succeeding offsets have the current size value added in, fix this.
+ if (BBSizes[CPEBB->getNumber()] != 0) {
+ Size += BBSizes[CPEBB->getNumber()];
+ BBSizes[CPEBB->getNumber()] = 0;
+ }
+ }
+ AdjustBBOffsetsAfter(CPEBB, -Size);
+ // An island has only one predecessor BB and one successor BB. Check if
+ // this BB's predecessor jumps directly to this BB's successor. This
+ // shouldn't happen currently.
+ assert(!BBIsJumpedOver(CPEBB) && "How did this happen?");
+ // FIXME: remove the empty blocks after all the work is done?
+}
+
+/// RemoveUnusedCPEntries - Remove constant pool entries whose refcounts
+/// are zero.
+bool ARMConstantIslands::RemoveUnusedCPEntries() {
+ unsigned MadeChange = false;
+ for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
+ std::vector<CPEntry> &CPEs = CPEntries[i];
+ for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
+ if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
+ RemoveDeadCPEMI(CPEs[j].CPEMI);
+ CPEs[j].CPEMI = NULL;
+ MadeChange = true;
+ }
+ }
+ }
+ return MadeChange;
+}
+
+/// BBIsInRange - Returns true if the distance between specific MI and
+/// specific BB can fit in MI's displacement field.
+bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
+ unsigned MaxDisp) {
+ unsigned PCAdj = isThumb ? 4 : 8;
+ unsigned BrOffset = GetOffsetOf(MI) + PCAdj;
+ unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+
+ DOUT << "Branch of destination BB#" << DestBB->getNumber()
+ << " from BB#" << MI->getParent()->getNumber()
+ << " max delta=" << MaxDisp
+ << " from " << GetOffsetOf(MI) << " to " << DestOffset
+ << " offset " << int(DestOffset-BrOffset) << "\t" << *MI;
+
+ if (BrOffset <= DestOffset) {
+ // Branch before the Dest.
+ if (DestOffset-BrOffset <= MaxDisp)
+ return true;
+ } else {
+ if (BrOffset-DestOffset <= MaxDisp)
+ return true;
+ }
+ return false;
+}
+
+/// FixUpImmediateBr - Fix up an immediate branch whose destination is too far
+/// away to fit in its displacement field.
+bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+ // Check to see if the DestBB is already in-range.
+ if (BBIsInRange(MI, DestBB, Br.MaxDisp))
+ return false;
+
+ if (!Br.isCond)
+ return FixUpUnconditionalBr(Fn, Br);
+ return FixUpConditionalBr(Fn, Br);
+}
+
+/// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is
+/// too far away to fit in its displacement field. If the LR register has been
+/// spilled in the epilogue, then we can use BL to implement a far jump.
+/// Otherwise, add an intermediate branch instruction to a branch.
+bool
+ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *MBB = MI->getParent();
+ assert(isThumb && "Expected a Thumb function!");
+
+ // Use BL to implement far jump.
+ Br.MaxDisp = (1 << 21) * 2;
+ MI->setDesc(TII->get(ARM::tBfar));
+ BBSizes[MBB->getNumber()] += 2;
+ AdjustBBOffsetsAfter(MBB, 2);
+ HasFarJump = true;
+ NumUBrFixed++;
+
+ DOUT << " Changed B to long jump " << *MI;
+
+ return true;
+}
+
+/// FixUpConditionalBr - Fix up a conditional branch whose destination is too
+/// far away to fit in its displacement field. It is converted to an inverse
+/// conditional branch + an unconditional branch to the destination.
+bool
+ARMConstantIslands::FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+ // Add an unconditional branch to the destination and invert the branch
+ // condition to jump over it:
+ // blt L1
+ // =>
+ // bge L2
+ // b L1
+ // L2:
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm();
+ CC = ARMCC::getOppositeCondition(CC);
+ unsigned CCReg = MI->getOperand(2).getReg();
+
+ // If the branch is at the end of its MBB and that has a fall-through block,
+ // direct the updated conditional branch to the fall-through block. Otherwise,
+ // split the MBB before the next instruction.
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *BMI = &MBB->back();
+ bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
+
+ NumCBrFixed++;
+ if (BMI != MI) {
+ if (next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
+ BMI->getOpcode() == Br.UncondBr) {
+ // Last MI in the BB is an unconditional branch. Can we simply invert the
+ // condition and swap destinations:
+ // beq L1
+ // b L2
+ // =>
+ // bne L2
+ // b L1
+ MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
+ if (BBIsInRange(MI, NewDest, Br.MaxDisp)) {
+ DOUT << " Invert Bcc condition and swap its destination with " << *BMI;
+ BMI->getOperand(0).setMBB(DestBB);
+ MI->getOperand(0).setMBB(NewDest);
+ MI->getOperand(1).setImm(CC);
+ return true;
+ }
+ }
+ }
+
+ if (NeedSplit) {
+ SplitBlockBeforeInstr(MI);
+ // No need for the branch to the next block. We're adding an unconditional
+ // branch to the destination.
+ int delta = TII->GetInstSizeInBytes(&MBB->back());
+ BBSizes[MBB->getNumber()] -= delta;
+ MachineBasicBlock* SplitBB = next(MachineFunction::iterator(MBB));
+ AdjustBBOffsetsAfter(SplitBB, -delta);
+ MBB->back().eraseFromParent();
+ // BBOffsets[SplitBB] is wrong temporarily, fixed below
+ }
+ MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB));
+
+ DOUT << " Insert B to BB#" << DestBB->getNumber()
+ << " also invert condition and change dest. to BB#"
+ << NextBB->getNumber() << "\n";
+
+ // Insert a new conditional branch and a new unconditional branch.
+ // Also update the ImmBranch as well as adding a new entry for the new branch.
+ BuildMI(MBB, DebugLoc::getUnknownLoc(),
+ TII->get(MI->getOpcode()))
+ .addMBB(NextBB).addImm(CC).addReg(CCReg);
+ Br.MI = &MBB->back();
+ BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+ BuildMI(MBB, DebugLoc::getUnknownLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
+ BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+ unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
+ ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
+
+ // Remove the old conditional branch. It may or may not still be in MBB.
+ BBSizes[MI->getParent()->getNumber()] -= TII->GetInstSizeInBytes(MI);
+ MI->eraseFromParent();
+
+ // The net size change is an addition of one unconditional branch.
+ int delta = TII->GetInstSizeInBytes(&MBB->back());
+ AdjustBBOffsetsAfter(MBB, delta);
+ return true;
+}
+
+/// UndoLRSpillRestore - Remove Thumb push / pop instructions that only spills
+/// LR / restores LR to pc.
+bool ARMConstantIslands::UndoLRSpillRestore() {
+ bool MadeChange = false;
+ for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) {
+ MachineInstr *MI = PushPopMIs[i];
+ if (MI->getOpcode() == ARM::tPOP_RET &&
+ MI->getOperand(0).getReg() == ARM::PC &&
+ MI->getNumExplicitOperands() == 1) {
+ BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET));
+ MI->eraseFromParent();
+ MadeChange = true;
+ }
+ }
+ return MadeChange;
+}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
new file mode 100644
index 0000000..3a038c9
--- /dev/null
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -0,0 +1,100 @@
+//===- ARMConstantPoolValue.cpp - ARM constantpool value --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMConstantPoolValue.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Type.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
+#include <ostream>
+using namespace llvm;
+
+ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, unsigned id,
+ ARMCP::ARMCPKind k,
+ unsigned char PCAdj,
+ const char *Modif,
+ bool AddCA)
+ : MachineConstantPoolValue((const Type*)gv->getType()),
+ GV(gv), S(NULL), LabelId(id), Kind(k), PCAdjust(PCAdj),
+ Modifier(Modif), AddCurrentAddress(AddCA) {}
+
+ARMConstantPoolValue::ARMConstantPoolValue(const char *s, unsigned id,
+ ARMCP::ARMCPKind k,
+ unsigned char PCAdj,
+ const char *Modif,
+ bool AddCA)
+ : MachineConstantPoolValue((const Type*)Type::Int32Ty),
+ GV(NULL), S(s), LabelId(id), Kind(k), PCAdjust(PCAdj),
+ Modifier(Modif), AddCurrentAddress(AddCA) {}
+
+ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv,
+ ARMCP::ARMCPKind k,
+ const char *Modif)
+ : MachineConstantPoolValue((const Type*)Type::Int32Ty),
+ GV(gv), S(NULL), LabelId(0), Kind(k), PCAdjust(0),
+ Modifier(Modif) {}
+
+int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ unsigned AlignMask = Alignment - 1;
+ const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ if (Constants[i].isMachineConstantPoolEntry() &&
+ (Constants[i].getAlignment() & AlignMask) == 0) {
+ ARMConstantPoolValue *CPV =
+ (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+ if (CPV->GV == GV &&
+ CPV->S == S &&
+ CPV->LabelId == LabelId &&
+ CPV->Kind == Kind &&
+ CPV->PCAdjust == PCAdjust)
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+void
+ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddPointer(GV);
+ ID.AddPointer(S);
+ ID.AddInteger(LabelId);
+ ID.AddInteger((unsigned)Kind);
+ ID.AddInteger(PCAdjust);
+}
+
+void ARMConstantPoolValue::dump() const {
+ cerr << " " << *this;
+}
+
+void ARMConstantPoolValue::print(std::ostream &O) const {
+ raw_os_ostream RawOS(O);
+ print(RawOS);
+}
+
+void ARMConstantPoolValue::print(raw_ostream &O) const {
+ if (GV)
+ O << GV->getName();
+ else
+ O << S;
+ if (isNonLazyPointer()) O << "$non_lazy_ptr";
+ else if (isStub()) O << "$stub";
+ if (Modifier) O << "(" << Modifier << ")";
+ if (PCAdjust != 0) {
+ O << "-(LPC" << LabelId << "+" << (unsigned)PCAdjust;
+ if (AddCurrentAddress) O << "-.";
+ O << ")";
+ }
+}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
new file mode 100644
index 0000000..d2b9066
--- /dev/null
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -0,0 +1,92 @@
+//===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include <iosfwd>
+
+namespace llvm {
+
+class GlobalValue;
+
+namespace ARMCP {
+ enum ARMCPKind {
+ CPValue,
+ CPNonLazyPtr,
+ CPStub
+ };
+}
+
+/// ARMConstantPoolValue - ARM specific constantpool value. This is used to
+/// represent PC relative displacement between the address of the load
+/// instruction and the global value being loaded, i.e. (&GV-(LPIC+8)).
+class ARMConstantPoolValue : public MachineConstantPoolValue {
+ GlobalValue *GV; // GlobalValue being loaded.
+ const char *S; // ExtSymbol being loaded.
+ unsigned LabelId; // Label id of the load.
+ ARMCP::ARMCPKind Kind; // non_lazy_ptr or stub?
+ unsigned char PCAdjust; // Extra adjustment if constantpool is pc relative.
+ // 8 for ARM, 4 for Thumb.
+ const char *Modifier; // GV modifier i.e. (&GV(modifier)-(LPIC+8))
+ bool AddCurrentAddress;
+
+public:
+ ARMConstantPoolValue(GlobalValue *gv, unsigned id,
+ ARMCP::ARMCPKind Kind = ARMCP::CPValue,
+ unsigned char PCAdj = 0, const char *Modifier = NULL,
+ bool AddCurrentAddress = false);
+ ARMConstantPoolValue(const char *s, unsigned id,
+ ARMCP::ARMCPKind Kind = ARMCP::CPValue,
+ unsigned char PCAdj = 0, const char *Modifier = NULL,
+ bool AddCurrentAddress = false);
+ ARMConstantPoolValue(GlobalValue *GV, ARMCP::ARMCPKind Kind,
+ const char *Modifier);
+
+
+ GlobalValue *getGV() const { return GV; }
+ const char *getSymbol() const { return S; }
+ const char *getModifier() const { return Modifier; }
+ bool hasModifier() const { return Modifier != NULL; }
+ bool mustAddCurrentAddress() const { return AddCurrentAddress; }
+ unsigned getLabelId() const { return LabelId; }
+ bool isNonLazyPointer() const { return Kind == ARMCP::CPNonLazyPtr; }
+ bool isStub() const { return Kind == ARMCP::CPStub; }
+ unsigned char getPCAdjustment() const { return PCAdjust; }
+
+ virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment);
+
+ virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID);
+
+ void print(std::ostream *O) const { if (O) print(*O); }
+ void print(std::ostream &O) const;
+ void print(raw_ostream *O) const { if (O) print(*O); }
+ void print(raw_ostream &O) const;
+ void dump() const;
+};
+
+ inline std::ostream &operator<<(std::ostream &O, const ARMConstantPoolValue &V) {
+ V.print(O);
+ return O;
+}
+
+inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) {
+ V.print(O);
+ return O;
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/ARM/ARMFrameInfo.h b/lib/Target/ARM/ARMFrameInfo.h
new file mode 100644
index 0000000..405b8f2
--- /dev/null
+++ b/lib/Target/ARM/ARMFrameInfo.h
@@ -0,0 +1,32 @@
+//===-- ARMTargetFrameInfo.h - Define TargetFrameInfo for ARM ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_FRAMEINFO_H
+#define ARM_FRAMEINFO_H
+
+#include "ARM.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "ARMSubtarget.h"
+
+namespace llvm {
+
+class ARMFrameInfo : public TargetFrameInfo {
+public:
+ explicit ARMFrameInfo(const ARMSubtarget &ST)
+ : TargetFrameInfo(StackGrowsDown, ST.getStackAlignment(), 0) {
+ }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
new file mode 100644
index 0000000..ca3a9cb
--- /dev/null
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -0,0 +1,911 @@
+//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the ARM target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMISelLowering.h"
+#include "ARMTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+//===--------------------------------------------------------------------===//
+/// ARMDAGToDAGISel - ARM specific code to select ARM machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+class ARMDAGToDAGISel : public SelectionDAGISel {
+ ARMTargetMachine &TM;
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+
+public:
+ explicit ARMDAGToDAGISel(ARMTargetMachine &tm)
+ : SelectionDAGISel(tm), TM(tm),
+ Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
+ }
+
+ virtual const char *getPassName() const {
+ return "ARM Instruction Selection";
+ }
+
+ SDNode *Select(SDValue Op);
+ virtual void InstructionSelect();
+ bool SelectAddrMode2(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode2Offset(SDValue Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode3(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode3Offset(SDValue Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode5(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Offset);
+
+ bool SelectAddrModePC(SDValue Op, SDValue N, SDValue &Offset,
+ SDValue &Label);
+
+ bool SelectThumbAddrModeRR(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Offset);
+ bool SelectThumbAddrModeRI5(SDValue Op, SDValue N, unsigned Scale,
+ SDValue &Base, SDValue &OffImm,
+ SDValue &Offset);
+ bool SelectThumbAddrModeS1(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &OffImm, SDValue &Offset);
+ bool SelectThumbAddrModeS2(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &OffImm, SDValue &Offset);
+ bool SelectThumbAddrModeS4(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &OffImm, SDValue &Offset);
+ bool SelectThumbAddrModeSP(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &OffImm);
+
+ bool SelectShifterOperandReg(SDValue Op, SDValue N, SDValue &A,
+ SDValue &B, SDValue &C);
+
+ // Include the pieces autogenerated from the target description.
+#include "ARMGenDAGISel.inc"
+
+private:
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+};
+}
+
+void ARMDAGToDAGISel::InstructionSelect() {
+ DEBUG(BB->dump());
+
+ SelectRoot(*CurDAG);
+ CurDAG->RemoveDeadNodes();
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N,
+ SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ if (N.getOpcode() == ISD::MUL) {
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ // X * [3,5,9] -> X + X * [2,4,8] etc.
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC & 1) {
+ RHSC = RHSC & ~1;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ if (isPowerOf2_32(RHSC)) {
+ unsigned ShAmt = Log2_32(RHSC);
+ Base = Offset = N.getOperand(0);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
+ ARM_AM::lsl),
+ MVT::i32);
+ return true;
+ }
+ }
+ }
+ }
+
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ } else if (N.getOpcode() == ARMISD::Wrapper) {
+ Base = N.getOperand(0);
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return true;
+ }
+
+ // Match simple R +/- imm12 operands.
+ if (N.getOpcode() == ISD::ADD)
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if ((RHSC >= 0 && RHSC < 0x1000) ||
+ (RHSC < 0 && RHSC > -0x1000)) { // 12 bits.
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return true;
+ }
+ }
+
+ // Otherwise this is R +/- [possibly shifted] R
+ ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
+ unsigned ShAmt = 0;
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ Offset = N.getOperand(1).getOperand(0);
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ // Try matching (R shl C) + (R).
+ if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) {
+ ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't
+ // fold it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ Offset = N.getOperand(0).getOperand(0);
+ Base = N.getOperand(1);
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+ }
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDValue Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op.getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
+ int Val = (int)C->getZExtValue();
+ if (Val >= 0 && Val < 0x1000) { // 12 bits.
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return true;
+ }
+ }
+
+ Offset = N;
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+ unsigned ShAmt = 0;
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ Offset = N.getOperand(0);
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ MVT::i32);
+ return true;
+}
+
+
+bool ARMDAGToDAGISel::SelectAddrMode3(SDValue Op, SDValue N,
+ SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ if (N.getOpcode() == ISD::SUB) {
+ // X - C is canonicalize to X + -C, no need to handle it here.
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32);
+ return true;
+ }
+
+ if (N.getOpcode() != ISD::ADD) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
+ return true;
+ }
+
+ // If the RHS is +/- imm8, fold into addr mode.
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if ((RHSC >= 0 && RHSC < 256) ||
+ (RHSC < 0 && RHSC > -256)) { // note -256 itself isn't allowed.
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32);
+ return true;
+ }
+ }
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDValue Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op.getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
+ int Val = (int)C->getZExtValue();
+ if (Val >= 0 && Val < 256) {
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32);
+ return true;
+ }
+ }
+
+ Offset = N;
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), MVT::i32);
+ return true;
+}
+
+
+bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N,
+ SDValue &Base, SDValue &Offset) {
+ if (N.getOpcode() != ISD::ADD) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ } else if (N.getOpcode() == ARMISD::Wrapper) {
+ Base = N.getOperand(0);
+ }
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+ MVT::i32);
+ return true;
+ }
+
+ // If the RHS is +/- imm8, fold into addr mode.
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if ((RHSC & 3) == 0) { // The constant is implicitly multiplied by 4.
+ RHSC >>= 2;
+ if ((RHSC >= 0 && RHSC < 256) ||
+ (RHSC < 0 && RHSC > -256)) { // note -256 itself isn't allowed.
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
+ MVT::i32);
+ return true;
+ }
+ }
+ }
+
+ Base = N;
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+ MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrModePC(SDValue Op, SDValue N,
+ SDValue &Offset, SDValue &Label) {
+ if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
+ Offset = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+ Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
+ MVT::i32);
+ return true;
+ }
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue Op, SDValue N,
+ SDValue &Base, SDValue &Offset){
+ // FIXME dl should come from the parent load or store, not the address
+ DebugLoc dl = Op.getDebugLoc();
+ if (N.getOpcode() != ISD::ADD) {
+ Base = N;
+ // We must materialize a zero in a reg! Returning a constant here
+ // wouldn't work without additional code to position the node within
+ // ISel's topological ordering in a place where ISel will process it
+ // normally. Instead, just explicitly issue a tMOVri8 node!
+ Offset = SDValue(CurDAG->getTargetNode(ARM::tMOVi8, dl, MVT::i32,
+ CurDAG->getTargetConstant(0, MVT::i32)), 0);
+ return true;
+ }
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDValue Op, SDValue N,
+ unsigned Scale, SDValue &Base,
+ SDValue &OffImm, SDValue &Offset) {
+ if (Scale == 4) {
+ SDValue TmpBase, TmpOffImm;
+ if (SelectThumbAddrModeSP(Op, N, TmpBase, TmpOffImm))
+ return false; // We want to select tLDRspi / tSTRspi instead.
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
+ return false; // We want to select tLDRpci instead.
+ }
+
+ if (N.getOpcode() != ISD::ADD) {
+ Base = (N.getOpcode() == ARMISD::Wrapper) ? N.getOperand(0) : N;
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ // Thumb does not have [sp, r] address mode.
+ RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+ RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
+ if ((LHSR && LHSR->getReg() == ARM::SP) ||
+ (RHSR && RHSR->getReg() == ARM::SP)) {
+ Base = N;
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ // If the RHS is + imm5 * scale, fold into addr mode.
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if ((RHSC & (Scale-1)) == 0) { // The constant is implicitly multiplied.
+ RHSC /= Scale;
+ if (RHSC >= 0 && RHSC < 32) {
+ Base = N.getOperand(0);
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+ }
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeS1(SDValue Op, SDValue N,
+ SDValue &Base, SDValue &OffImm,
+ SDValue &Offset) {
+ return SelectThumbAddrModeRI5(Op, N, 1, Base, OffImm, Offset);
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeS2(SDValue Op, SDValue N,
+ SDValue &Base, SDValue &OffImm,
+ SDValue &Offset) {
+ return SelectThumbAddrModeRI5(Op, N, 2, Base, OffImm, Offset);
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeS4(SDValue Op, SDValue N,
+ SDValue &Base, SDValue &OffImm,
+ SDValue &Offset) {
+ return SelectThumbAddrModeRI5(Op, N, 4, Base, OffImm, Offset);
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N,
+ SDValue &Base, SDValue &OffImm) {
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (N.getOpcode() != ISD::ADD)
+ return false;
+
+ RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+ if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
+ (LHSR && LHSR->getReg() == ARM::SP)) {
+ // If the RHS is + imm8 * scale, fold into addr mode.
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if ((RHSC & 3) == 0) { // The constant is implicitly multiplied.
+ RHSC >>= 2;
+ if (RHSC >= 0 && RHSC < 256) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op,
+ SDValue N,
+ SDValue &BaseReg,
+ SDValue &ShReg,
+ SDValue &Opc) {
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+ // Don't match base register only case. That is matched to a separate
+ // lower complexity pattern with explicit register operand.
+ if (ShOpcVal == ARM_AM::no_shift) return false;
+
+ BaseReg = N.getOperand(0);
+ unsigned ShImmVal = 0;
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ ShReg = CurDAG->getRegister(0, MVT::i32);
+ ShImmVal = RHS->getZExtValue() & 31;
+ } else {
+ ShReg = N.getOperand(1);
+ }
+ Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+ MVT::i32);
+ return true;
+}
+
+/// getAL - Returns a ARMCC::AL immediate node.
+static inline SDValue getAL(SelectionDAG *CurDAG) {
+ return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32);
+}
+
+
+SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
+ SDNode *N = Op.getNode();
+ DebugLoc dl = N->getDebugLoc();
+
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
+ bool UseCP = true;
+ if (Subtarget->isThumb())
+ UseCP = (Val > 255 && // MOV
+ ~Val > 255 && // MOV + MVN
+ !ARM_AM::isThumbImmShiftedVal(Val)); // MOV + LSL
+ else
+ UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV
+ ARM_AM::getSOImmVal(~Val) == -1 && // MVN
+ !ARM_AM::isSOImmTwoPartVal(Val)); // two instrs.
+ if (UseCP) {
+ SDValue CPIdx =
+ CurDAG->getTargetConstantPool(ConstantInt::get(Type::Int32Ty, Val),
+ TLI.getPointerTy());
+
+ SDNode *ResNode;
+ if (Subtarget->isThumb())
+ ResNode = CurDAG->getTargetNode(ARM::tLDRcp, dl, MVT::i32, MVT::Other,
+ CPIdx, CurDAG->getEntryNode());
+ else {
+ SDValue Ops[] = {
+ CPIdx,
+ CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getTargetConstant(0, MVT::i32),
+ getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getEntryNode()
+ };
+ ResNode=CurDAG->getTargetNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
+ Ops, 6);
+ }
+ ReplaceUses(Op, SDValue(ResNode, 0));
+ return NULL;
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::FrameIndex: {
+ // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ if (Subtarget->isThumb()) {
+ return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, TFI,
+ CurDAG->getTargetConstant(0, MVT::i32));
+ } else {
+ SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->SelectNodeTo(N, ARM::ADDri, MVT::i32, Ops, 5);
+ }
+ }
+ case ISD::ADD: {
+ if (!Subtarget->isThumb())
+ break;
+ // Select add sp, c to tADDhirr.
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(Op.getOperand(0));
+ RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(Op.getOperand(1));
+ if (LHSR && LHSR->getReg() == ARM::SP) {
+ std::swap(N0, N1);
+ std::swap(LHSR, RHSR);
+ }
+ if (RHSR && RHSR->getReg() == ARM::SP) {
+ SDValue Val = SDValue(CurDAG->getTargetNode(ARM::tMOVlor2hir, dl,
+ Op.getValueType(), N0, N0), 0);
+ return CurDAG->SelectNodeTo(N, ARM::tADDhirr, Op.getValueType(), Val, N1);
+ }
+ break;
+ }
+ case ISD::MUL:
+ if (Subtarget->isThumb())
+ break;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned RHSV = C->getZExtValue();
+ if (!RHSV) break;
+ if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
+ SDValue V = Op.getOperand(0);
+ unsigned ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, Log2_32(RHSV-1));
+ SDValue Ops[] = { V, V, CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getTargetConstant(ShImm, MVT::i32),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->SelectNodeTo(N, ARM::ADDrs, MVT::i32, Ops, 7);
+ }
+ if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
+ SDValue V = Op.getOperand(0);
+ unsigned ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, Log2_32(RHSV+1));
+ SDValue Ops[] = { V, V, CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getTargetConstant(ShImm, MVT::i32),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7);
+ }
+ }
+ break;
+ case ARMISD::FMRRD:
+ return CurDAG->getTargetNode(ARM::FMRRD, dl, MVT::i32, MVT::i32,
+ Op.getOperand(0), getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32));
+ case ISD::UMUL_LOHI: {
+ SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getTargetNode(ARM::UMULL, dl, MVT::i32, MVT::i32, Ops, 5);
+ }
+ case ISD::SMUL_LOHI: {
+ SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getTargetNode(ARM::SMULL, dl, MVT::i32, MVT::i32, Ops, 5);
+ }
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ MVT LoadedVT = LD->getMemoryVT();
+ if (AM != ISD::UNINDEXED) {
+ SDValue Offset, AMOpc;
+ bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+ unsigned Opcode = 0;
+ bool Match = false;
+ if (LoadedVT == MVT::i32 &&
+ SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+ Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST;
+ Match = true;
+ } else if (LoadedVT == MVT::i16 &&
+ SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
+ ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
+ : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
+ } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
+ if (LD->getExtensionType() == ISD::SEXTLOAD) {
+ if (SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
+ }
+ } else {
+ if (SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST;
+ }
+ }
+ }
+
+ if (Match) {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32), Chain };
+ return CurDAG->getTargetNode(Opcode, dl, MVT::i32, MVT::i32,
+ MVT::Other, Ops, 6);
+ }
+ }
+ // Other cases are autogenerated.
+ break;
+ }
+ case ARMISD::BRCOND: {
+ // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+ // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+
+ // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+ // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+
+ unsigned Opc = Subtarget->isThumb() ? ARM::tBcc : ARM::Bcc;
+ SDValue Chain = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+ SDValue N3 = Op.getOperand(3);
+ SDValue InFlag = Op.getOperand(4);
+ assert(N1.getOpcode() == ISD::BasicBlock);
+ assert(N2.getOpcode() == ISD::Constant);
+ assert(N3.getOpcode() == ISD::Register);
+
+ SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N2)->getZExtValue()),
+ MVT::i32);
+ SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
+ SDNode *ResNode = CurDAG->getTargetNode(Opc, dl, MVT::Other,
+ MVT::Flag, Ops, 5);
+ Chain = SDValue(ResNode, 0);
+ if (Op.getNode()->getNumValues() == 2) {
+ InFlag = SDValue(ResNode, 1);
+ ReplaceUses(SDValue(Op.getNode(), 1), InFlag);
+ }
+ ReplaceUses(SDValue(Op.getNode(), 0), SDValue(Chain.getNode(), Chain.getResNo()));
+ return NULL;
+ }
+ case ARMISD::CMOV: {
+ bool isThumb = Subtarget->isThumb();
+ MVT VT = Op.getValueType();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+ SDValue N3 = Op.getOperand(3);
+ SDValue InFlag = Op.getOperand(4);
+ assert(N2.getOpcode() == ISD::Constant);
+ assert(N3.getOpcode() == ISD::Register);
+
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+ // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 18 cost = 1 size = 0
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ SDValue CPTmp2;
+ if (!isThumb && VT == MVT::i32 &&
+ SelectShifterOperandReg(Op, N1, CPTmp0, CPTmp1, CPTmp2)) {
+ SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N2)->getZExtValue()),
+ MVT::i32);
+ SDValue Ops[] = { N0, CPTmp0, CPTmp1, CPTmp2, Tmp2, N3, InFlag };
+ return CurDAG->SelectNodeTo(Op.getNode(), ARM::MOVCCs, MVT::i32, Ops, 7);
+ }
+
+ // Pattern: (ARMcmov:i32 GPR:i32:$false,
+ // (imm:i32)<<P:Predicate_so_imm>><<X:so_imm_XFORM>>:$true,
+ // (imm:i32):$cc)
+ // Emits: (MOVCCi:i32 GPR:i32:$false,
+ // (so_imm_XFORM:i32 (imm:i32):$true), (imm:i32):$cc)
+ // Pattern complexity = 10 cost = 1 size = 0
+ if (VT == MVT::i32 &&
+ N3.getOpcode() == ISD::Constant &&
+ Predicate_so_imm(N3.getNode())) {
+ SDValue Tmp1 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N1)->getZExtValue()),
+ MVT::i32);
+ Tmp1 = Transform_so_imm_XFORM(Tmp1.getNode());
+ SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N2)->getZExtValue()),
+ MVT::i32);
+ SDValue Ops[] = { N0, Tmp1, Tmp2, N3, InFlag };
+ return CurDAG->SelectNodeTo(Op.getNode(), ARM::MOVCCi, MVT::i32, Ops, 5);
+ }
+
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+ //
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 11 size = 0
+ //
+ // Also FCPYScc and FCPYDcc.
+ SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N2)->getZExtValue()),
+ MVT::i32);
+ SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag };
+ unsigned Opc = 0;
+ switch (VT.getSimpleVT()) {
+ default: assert(false && "Illegal conditional move type!");
+ break;
+ case MVT::i32:
+ Opc = isThumb ? ARM::tMOVCCr : ARM::MOVCCr;
+ break;
+ case MVT::f32:
+ Opc = ARM::FCPYScc;
+ break;
+ case MVT::f64:
+ Opc = ARM::FCPYDcc;
+ break;
+ }
+ return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5);
+ }
+ case ARMISD::CNEG: {
+ MVT VT = Op.getValueType();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+ SDValue N3 = Op.getOperand(3);
+ SDValue InFlag = Op.getOperand(4);
+ assert(N2.getOpcode() == ISD::Constant);
+ assert(N3.getOpcode() == ISD::Register);
+
+ SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N2)->getZExtValue()),
+ MVT::i32);
+ SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag };
+ unsigned Opc = 0;
+ switch (VT.getSimpleVT()) {
+ default: assert(false && "Illegal conditional move type!");
+ break;
+ case MVT::f32:
+ Opc = ARM::FNEGScc;
+ break;
+ case MVT::f64:
+ Opc = ARM::FNEGDcc;
+ break;
+ }
+ return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5);
+ }
+
+ case ISD::DECLARE: {
+ SDValue Chain = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N1);
+ // FIXME: handle VLAs.
+ if (!FINode) {
+ ReplaceUses(Op.getValue(0), Chain);
+ return NULL;
+ }
+ if (N2.getOpcode() == ARMISD::PIC_ADD && isa<LoadSDNode>(N2.getOperand(0)))
+ N2 = N2.getOperand(0);
+ LoadSDNode *Ld = dyn_cast<LoadSDNode>(N2);
+ if (!Ld) {
+ ReplaceUses(Op.getValue(0), Chain);
+ return NULL;
+ }
+ SDValue BasePtr = Ld->getBasePtr();
+ assert(BasePtr.getOpcode() == ARMISD::Wrapper &&
+ isa<ConstantPoolSDNode>(BasePtr.getOperand(0)) &&
+ "llvm.dbg.variable should be a constantpool node");
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(BasePtr.getOperand(0));
+ GlobalValue *GV = 0;
+ if (CP->isMachineConstantPoolEntry()) {
+ ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)CP->getMachineCPVal();
+ GV = ACPV->getGV();
+ } else
+ GV = dyn_cast<GlobalValue>(CP->getConstVal());
+ if (!GV) {
+ ReplaceUses(Op.getValue(0), Chain);
+ return NULL;
+ }
+
+ SDValue Tmp1 = CurDAG->getTargetFrameIndex(FINode->getIndex(),
+ TLI.getPointerTy());
+ SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GV, TLI.getPointerTy());
+ SDValue Ops[] = { Tmp1, Tmp2, Chain };
+ return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, dl,
+ MVT::Other, Ops, 3);
+ }
+ }
+
+ return SelectCode(Op);
+}
+
+bool ARMDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
+
+ SDValue Base, Offset, Opc;
+ if (!SelectAddrMode2(Op, Op, Base, Offset, Opc))
+ return true;
+
+ OutOps.push_back(Base);
+ OutOps.push_back(Offset);
+ OutOps.push_back(Opc);
+ return false;
+}
+
+/// createARMISelDag - This pass converts a legalized DAG into a
+/// ARM-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createARMISelDag(ARMTargetMachine &TM) {
+ return new ARMDAGToDAGISel(TM);
+}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
new file mode 100644
index 0000000..c0fd9dc
--- /dev/null
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -0,0 +1,2346 @@
+//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMISelLowering.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instruction.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+
+ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
+ : TargetLowering(TM), ARMPCLabelIndex(0) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+
+ if (Subtarget->isTargetDarwin()) {
+ // Uses VFP for Thumb libfuncs if available.
+ if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
+ // Single-precision floating-point arithmetic.
+ setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
+ setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
+ setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
+ setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
+
+ // Double-precision floating-point arithmetic.
+ setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
+ setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
+ setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
+ setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
+
+ // Single-precision comparisons.
+ setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
+ setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
+ setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
+ setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
+ setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
+ setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
+ setLibcallName(RTLIB::UO_F32, "__unordsf2vfp");
+ setLibcallName(RTLIB::O_F32, "__unordsf2vfp");
+
+ setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
+
+ // Double-precision comparisons.
+ setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
+ setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
+ setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
+ setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
+ setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
+ setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
+ setLibcallName(RTLIB::UO_F64, "__unorddf2vfp");
+ setLibcallName(RTLIB::O_F64, "__unorddf2vfp");
+
+ setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ);
+
+ // Floating-point to integer conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
+
+ // Conversions between floating types.
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp");
+
+ // Integer to floating-point conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ // FIXME: There appears to be some naming inconsistency in ARM libgcc:
+ // e.g., __floatunsidf vs. __floatunssidfvfp.
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
+ }
+ }
+
+ // These libcalls are not available in 32-bit.
+ setLibcallName(RTLIB::SHL_I128, 0);
+ setLibcallName(RTLIB::SRL_I128, 0);
+ setLibcallName(RTLIB::SRA_I128, 0);
+
+ if (Subtarget->isThumb())
+ addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
+ else
+ addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) {
+ addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
+ addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ }
+ computeRegisterProperties();
+
+ // ARM does not have f32 extending load.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+ // ARM does not have i1 sign extending load.
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ // ARM supports all 4 flavors of integer indexed load / store.
+ for (unsigned im = (unsigned)ISD::PRE_INC;
+ im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
+ setIndexedLoadAction(im, MVT::i1, Legal);
+ setIndexedLoadAction(im, MVT::i8, Legal);
+ setIndexedLoadAction(im, MVT::i16, Legal);
+ setIndexedLoadAction(im, MVT::i32, Legal);
+ setIndexedStoreAction(im, MVT::i1, Legal);
+ setIndexedStoreAction(im, MVT::i8, Legal);
+ setIndexedStoreAction(im, MVT::i16, Legal);
+ setIndexedStoreAction(im, MVT::i32, Legal);
+ }
+
+ // i64 operation support.
+ if (Subtarget->isThumb()) {
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ } else {
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ if (!Subtarget->hasV6Ops())
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ }
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL, MVT::i64, Custom);
+ setOperationAction(ISD::SRA, MVT::i64, Custom);
+
+ // ARM does not have ROTL.
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ if (!Subtarget->hasV5TOps() || Subtarget->isThumb())
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+
+ // Only ARMv6 has BSWAP.
+ if (!Subtarget->hasV6Ops())
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+ // These are expanded into libcalls.
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+ // Support label based line numbers.
+ setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+
+ setOperationAction(ISD::RET, MVT::Other, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+
+ if (!Subtarget->hasV6Ops()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+ }
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb())
+ // Turn f64->i64 into FMRRD, i64 -> f64 to FMDRR iff target supports vfp2.
+ setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ setOperationAction(ISD::SETCC, MVT::i32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+
+ // We don't support sin/cos/fmod/copysign/pow
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) {
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+ }
+ setOperationAction(ISD::FPOW, MVT::f64, Expand);
+ setOperationAction(ISD::FPOW, MVT::f32, Expand);
+
+ // int <-> fp are custom expanded into bit_convert + ARMISD ops.
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ }
+
+ // We have target-specific dag combine patterns for the following nodes:
+ // ARMISD::FMRRD - No need to call setTargetDAGCombine
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::SUB);
+
+ setStackPointerRegisterToSaveRestore(ARM::SP);
+ setSchedulingPreference(SchedulingForRegPressure);
+ setIfCvtBlockSizeLimit(Subtarget->isThumb() ? 0 : 10);
+ setIfCvtDupBlockSizeLimit(Subtarget->isThumb() ? 0 : 2);
+
+ maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type
+ // Do not enable CodePlacementOpt for now: it currently runs after the
+ // ARMConstantIslandPass and messes up branch relaxation and placement
+ // of constant islands.
+ // benefitFromCodePlacementOpt = true;
+}
+
+const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case ARMISD::Wrapper: return "ARMISD::Wrapper";
+ case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
+ case ARMISD::CALL: return "ARMISD::CALL";
+ case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
+ case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
+ case ARMISD::tCALL: return "ARMISD::tCALL";
+ case ARMISD::BRCOND: return "ARMISD::BRCOND";
+ case ARMISD::BR_JT: return "ARMISD::BR_JT";
+ case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
+ case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
+ case ARMISD::CMP: return "ARMISD::CMP";
+ case ARMISD::CMPNZ: return "ARMISD::CMPNZ";
+ case ARMISD::CMPFP: return "ARMISD::CMPFP";
+ case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
+ case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
+ case ARMISD::CMOV: return "ARMISD::CMOV";
+ case ARMISD::CNEG: return "ARMISD::CNEG";
+
+ case ARMISD::FTOSI: return "ARMISD::FTOSI";
+ case ARMISD::FTOUI: return "ARMISD::FTOUI";
+ case ARMISD::SITOF: return "ARMISD::SITOF";
+ case ARMISD::UITOF: return "ARMISD::UITOF";
+
+ case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
+ case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
+ case ARMISD::RRX: return "ARMISD::RRX";
+
+ case ARMISD::FMRRD: return "ARMISD::FMRRD";
+ case ARMISD::FMDRR: return "ARMISD::FMDRR";
+
+ case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Lowering Code
+//===----------------------------------------------------------------------===//
+
+/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
+static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown condition code!");
+ case ISD::SETNE: return ARMCC::NE;
+ case ISD::SETEQ: return ARMCC::EQ;
+ case ISD::SETGT: return ARMCC::GT;
+ case ISD::SETGE: return ARMCC::GE;
+ case ISD::SETLT: return ARMCC::LT;
+ case ISD::SETLE: return ARMCC::LE;
+ case ISD::SETUGT: return ARMCC::HI;
+ case ISD::SETUGE: return ARMCC::HS;
+ case ISD::SETULT: return ARMCC::LO;
+ case ISD::SETULE: return ARMCC::LS;
+ }
+}
+
+/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. It
+/// returns true if the operands should be inverted to form the proper
+/// comparison.
+static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
+ ARMCC::CondCodes &CondCode2) {
+ bool Invert = false;
+ CondCode2 = ARMCC::AL;
+ switch (CC) {
+ default: assert(0 && "Unknown FP condition!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
+ case ISD::SETGT:
+ case ISD::SETOGT: CondCode = ARMCC::GT; break;
+ case ISD::SETGE:
+ case ISD::SETOGE: CondCode = ARMCC::GE; break;
+ case ISD::SETOLT: CondCode = ARMCC::MI; break;
+ case ISD::SETOLE: CondCode = ARMCC::GT; Invert = true; break;
+ case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
+ case ISD::SETO: CondCode = ARMCC::VC; break;
+ case ISD::SETUO: CondCode = ARMCC::VS; break;
+ case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
+ case ISD::SETUGT: CondCode = ARMCC::HI; break;
+ case ISD::SETUGE: CondCode = ARMCC::PL; break;
+ case ISD::SETLT:
+ case ISD::SETULT: CondCode = ARMCC::LT; break;
+ case ISD::SETLE:
+ case ISD::SETULE: CondCode = ARMCC::LE; break;
+ case ISD::SETNE:
+ case ISD::SETUNE: CondCode = ARMCC::NE; break;
+ }
+ return Invert;
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//
+// The lower operations present on calling convention works on this order:
+// LowerCALL (virt regs --> phys regs, virt regs --> stack)
+// LowerFORMAL_ARGUMENTS (phys --> virt regs, stack --> virt regs)
+// LowerRET (virt regs --> phys regs)
+// LowerCALL (phys regs --> virt regs)
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMGenCallingConv.inc"
+
+// APCS f64 is in register pairs, possibly split to stack
+static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const unsigned HiRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+ static const unsigned LoRegList[] = { ARM::R1,
+ ARM::R2,
+ ARM::R3,
+ ARM::NoRegister };
+
+ unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 4);
+ if (Reg == 0)
+ return false; // we didn't handle it
+
+ unsigned i;
+ for (i = 0; i < 4; ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, MVT::i32, LocInfo));
+ if (LoRegList[i] != ARM::NoRegister)
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ MVT::i32, LocInfo));
+ else
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(4, 4),
+ MVT::i32, LocInfo));
+ return true; // we handled it
+}
+
+// AAPCS f64 is in aligned register pairs
+static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
+ static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+
+ unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
+ if (Reg == 0)
+ return false; // we didn't handle it
+
+ unsigned i;
+ for (i = 0; i < 2; ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, MVT::i32, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ MVT::i32, LocInfo));
+ return true; // we handled it
+}
+
+static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
+ static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+
+ unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
+ if (Reg == 0)
+ return false; // we didn't handle it
+
+ unsigned i;
+ for (i = 0; i < 2; ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, MVT::i32, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ MVT::i32, LocInfo));
+ return true; // we handled it
+}
+
+static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
+ State);
+}
+
+/// LowerCallResult - Lower the result values of an ISD::CALL into the
+/// appropriate copies out of appropriate physical registers. This assumes that
+/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+/// being lowered. The returns a SDNode with the same number of values as the
+/// ISD::CALL.
+SDNode *ARMTargetLowering::
+LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
+ unsigned CallingConv, SelectionDAG &DAG) {
+
+ DebugLoc dl = TheCall->getDebugLoc();
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ bool isVarArg = TheCall->isVarArg();
+ CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
+ CCInfo.AnalyzeCallResult(TheCall, RetCC_ARM);
+
+ SmallVector<SDValue, 8> ResultVals;
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign VA = RVLocs[i];
+
+ SDValue Val;
+ if (VA.needsCustom()) {
+ // Handle f64 as custom.
+ SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+ InFlag);
+ Chain = Lo.getValue(1);
+ InFlag = Lo.getValue(2);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+ InFlag);
+ Chain = Hi.getValue(1);
+ InFlag = Hi.getValue(2);
+ Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi);
+ } else {
+ Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+ InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+ }
+
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val);
+ break;
+ }
+
+ ResultVals.push_back(Val);
+ }
+
+ // Merge everything together with a MERGE_VALUES node.
+ ResultVals.push_back(Chain);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
+ &ResultVals[0], ResultVals.size()).getNode();
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" of size "Size". Alignment information is
+/// specified by the specific parameter attribute. The copy will be passed as
+/// a byval function parameter.
+/// Sometimes what we are copying is the end of a larger object, the part that
+/// does not fit in registers.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+ DebugLoc dl) {
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+ return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+ /*AlwaysInline=*/false, NULL, 0, NULL, 0);
+}
+
+/// LowerMemOpCallTo - Store the argument to the stack.
+SDValue
+ARMTargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
+ const SDValue &StackPtr,
+ const CCValAssign &VA, SDValue Chain,
+ SDValue Arg, ISD::ArgFlagsTy Flags) {
+ DebugLoc dl = TheCall->getDebugLoc();
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ if (Flags.isByVal()) {
+ return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
+ }
+ return DAG.getStore(Chain, dl, Arg, PtrOff,
+ PseudoSourceValue::getStack(), LocMemOffset);
+}
+
+/// LowerCALL - Lowering a ISD::CALL node into a callseq_start <-
+/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
+/// nodes.
+SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
+ CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+ MVT RetVT = TheCall->getRetValType(0);
+ SDValue Chain = TheCall->getChain();
+ unsigned CC = TheCall->getCallingConv();
+ assert((CC == CallingConv::C ||
+ CC == CallingConv::Fast) && "unknown calling convention");
+ bool isVarArg = TheCall->isVarArg();
+ SDValue Callee = TheCall->getCallee();
+ DebugLoc dl = TheCall->getDebugLoc();
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+ CCInfo.AnalyzeCallOperands(TheCall, CC_ARM);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SDValue StackPtr = DAG.getRegister(ARM::SP, MVT::i32);
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ // Walk the register/memloc assignments, inserting copies/loads. In the case
+ // of tail call optimization, arguments are handled later.
+ for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
+ i != e;
+ ++i, ++realArgIdx) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = TheCall->getArg(realArgIdx);
+ ISD::ArgFlagsTy Flags = TheCall->getArgFlags(realArgIdx);
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // f64 is passed in i32 pairs and must be combined
+ if (VA.needsCustom()) {
+ SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
+ VA = ArgLocs[++i]; // skip ahead to next loc
+ if (VA.isRegLoc())
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(1)));
+ else {
+ assert(VA.isMemLoc());
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+
+ MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA,
+ Chain, fmrrd.getValue(1),
+ Flags));
+ }
+ } else if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ assert(VA.isMemLoc());
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+
+ MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA,
+ Chain, Arg, Flags));
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ bool isDirect = false;
+ bool isARMFunc = false;
+ bool isLocalARMFunc = false;
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ GlobalValue *GV = G->getGlobal();
+ isDirect = true;
+ bool isExt = (GV->isDeclaration() || GV->hasWeakLinkage() ||
+ GV->hasLinkOnceLinkage());
+ bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
+ getTargetMachine().getRelocationModel() != Reloc::Static;
+ isARMFunc = !Subtarget->isThumb() || isStub;
+ // ARM call to a local ARM function is predicable.
+ isLocalARMFunc = !Subtarget->isThumb() && !isExt;
+ // tBX takes a register source operand.
+ if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps()) {
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex,
+ ARMCP::CPStub, 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(getPointerTy(), dl,
+ DAG.getEntryNode(), CPAddr, NULL, 0);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
+ getPointerTy(), Callee, PICLabel);
+ } else
+ Callee = DAG.getTargetGlobalAddress(GV, getPointerTy());
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ isDirect = true;
+ bool isStub = Subtarget->isTargetDarwin() &&
+ getTargetMachine().getRelocationModel() != Reloc::Static;
+ isARMFunc = !Subtarget->isThumb() || isStub;
+ // tBX takes a register source operand.
+ const char *Sym = S->getSymbol();
+ if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps()) {
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(Sym, ARMPCLabelIndex,
+ ARMCP::CPStub, 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(getPointerTy(), dl,
+ DAG.getEntryNode(), CPAddr, NULL, 0);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
+ getPointerTy(), Callee, PICLabel);
+ } else
+ Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+ }
+
+ // FIXME: handle tail calls differently.
+ unsigned CallOpc;
+ if (Subtarget->isThumb()) {
+ if (!Subtarget->hasV5TOps() && (!isDirect || isARMFunc))
+ CallOpc = ARMISD::CALL_NOLINK;
+ else
+ CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
+ } else {
+ CallOpc = (isDirect || Subtarget->hasV5TOps())
+ ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
+ : ARMISD::CALL_NOLINK;
+ }
+ if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb()) {
+ // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK
+ Chain = DAG.getCopyToReg(Chain, dl, ARM::LR, DAG.getUNDEF(MVT::i32),InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ std::vector<SDValue> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+ // Returns a chain and a flag for retval copy to use.
+ Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
+ &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ if (RetVT != MVT::Other)
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG),
+ Op.getResNo());
+}
+
+SDValue ARMTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
+ // The chain is always operand #0
+ SDValue Chain = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // CCValAssign - represent the assignment of the return value to a location.
+ SmallVector<CCValAssign, 16> RVLocs;
+ unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+
+ // CCState - Info about the registers and stack slots.
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
+
+ // Analyze return values of ISD::RET.
+ CCInfo.AnalyzeReturn(Op.getNode(), RetCC_ARM);
+
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0, realRVLocIdx = 0;
+ i != RVLocs.size();
+ ++i, ++realRVLocIdx) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ // ISD::RET => ret chain, (regnum1,val1), ...
+ // So i*2+1 index only the regnums
+ SDValue Arg = Op.getOperand(realRVLocIdx*2+1);
+
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
+ // available.
+ if (VA.needsCustom()) {
+ SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
+ Flag = Chain.getValue(1);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
+ Flag);
+ } else
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
+
+ // Guarantee that all emitted copies are
+ // stuck together, avoiding something bad.
+ Flag = Chain.getValue(1);
+ }
+
+ SDValue result;
+ if (Flag.getNode())
+ result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+ else // Return Void
+ result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
+
+ return result;
+}
+
+// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
+// their target countpart wrapped in the ARMISD::Wrapper node. Suppose N is
+// one of the above mentioned nodes. It has to be wrapped because otherwise
+// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
+// be used to form addressing mode. These wrapped nodes will be selected
+// into MOVi.
+static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
+ MVT PtrVT = Op.getValueType();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ SDValue Res;
+ if (CP->isMachineConstantPoolEntry())
+ Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+ CP->getAlignment());
+ else
+ Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlignment());
+ return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model
+SDValue
+ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) {
+ DebugLoc dl = GA->getDebugLoc();
+ MVT PtrVT = getPointerTy();
+ unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV =
+ new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue,
+ PCAdj, "tlsgd", true);
+ SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
+ Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, NULL, 0);
+ SDValue Chain = Argument.getValue(1);
+
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
+
+ // call __tls_get_addr.
+ ArgListTy Args;
+ ArgListEntry Entry;
+ Entry.Node = Argument;
+ Entry.Ty = (const Type *) Type::Int32Ty;
+ Args.push_back(Entry);
+ // FIXME: is there useful debug info available here?
+ std::pair<SDValue, SDValue> CallResult =
+ LowerCallTo(Chain, (const Type *) Type::Int32Ty, false, false, false, false,
+ CallingConv::C, false,
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
+ return CallResult.first;
+}
+
+// Lower ISD::GlobalTLSAddress using the "initial exec" or
+// "local exec" model.
+SDValue
+ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) {
+ GlobalValue *GV = GA->getGlobal();
+ DebugLoc dl = GA->getDebugLoc();
+ SDValue Offset;
+ SDValue Chain = DAG.getEntryNode();
+ MVT PtrVT = getPointerTy();
+ // Get the Thread Pointer
+ SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
+
+ if (GV->isDeclaration()){
+ // initial exec model
+ unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV =
+ new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue,
+ PCAdj, "gottpoff", true);
+ Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
+ Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0);
+ Chain = Offset.getValue(1);
+
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
+
+ Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0);
+ } else {
+ // local exec model
+ ARMConstantPoolValue *CPV =
+ new ARMConstantPoolValue(GV, ARMCP::CPValue, "tpoff");
+ Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
+ Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0);
+ }
+
+ // The address of the thread local variable is the add of the thread
+ // pointer with the offset of the variable.
+ return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+}
+
+SDValue
+ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
+ // TODO: implement the "local dynamic" model
+ assert(Subtarget->isTargetELF() &&
+ "TLS not implemented for non-ELF targets");
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ // If the relocation model is PIC, use the "General Dynamic" TLS Model,
+ // otherwise use the "Local Exec" TLS Model
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ return LowerToTLSGeneralDynamicModel(GA, DAG);
+ else
+ return LowerToTLSExecModels(GA, DAG);
+}
+
+SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
+ SelectionDAG &DAG) {
+ MVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ if (RelocM == Reloc::PIC_) {
+ bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
+ ARMConstantPoolValue *CPV =
+ new ARMConstantPoolValue(GV, ARMCP::CPValue, UseGOTOFF ? "GOTOFF":"GOT");
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+ CPAddr, NULL, 0);
+ SDValue Chain = Result.getValue(1);
+ SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
+ Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
+ if (!UseGOTOFF)
+ Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0);
+ return Result;
+ } else {
+ SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0);
+ }
+}
+
+/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol
+/// even in non-static mode.
+static bool GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) {
+ // If symbol visibility is hidden, the extra load is not needed if
+ // the symbol is definitely defined in the current translation unit.
+ bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode();
+ if (GV->hasHiddenVisibility() && (!isDecl && !GV->hasCommonLinkage()))
+ return false;
+ return RelocM != Reloc::Static && (isDecl || GV->isWeakForLinker());
+}
+
+SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
+ SelectionDAG &DAG) {
+ MVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ bool IsIndirect = GVIsIndirectSymbol(GV, RelocM);
+ SDValue CPAddr;
+ if (RelocM == Reloc::Static)
+ CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+ else {
+ unsigned PCAdj = (RelocM != Reloc::PIC_)
+ ? 0 : (Subtarget->isThumb() ? 4 : 8);
+ ARMCP::ARMCPKind Kind = IsIndirect ? ARMCP::CPNonLazyPtr
+ : ARMCP::CPValue;
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex,
+ Kind, PCAdj);
+ CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ }
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+
+ SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0);
+ SDValue Chain = Result.getValue(1);
+
+ if (RelocM == Reloc::PIC_) {
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+ }
+ if (IsIndirect)
+ Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0);
+
+ return Result;
+}
+
+SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
+ SelectionDAG &DAG){
+ assert(Subtarget->isTargetELF() &&
+ "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
+ MVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue("_GLOBAL_OFFSET_TABLE_",
+ ARMPCLabelIndex,
+ ARMCP::CPValue, PCAdj);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+}
+
+SDValue
+ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ DebugLoc dl = Op.getDebugLoc();
+ switch (IntNo) {
+ default: return SDValue(); // Don't custom lower most intrinsics.
+ case Intrinsic::arm_thread_pointer:
+ return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
+ case Intrinsic::eh_sjlj_setjmp:
+ SDValue Res = DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32,
+ Op.getOperand(1));
+ return Res;
+ }
+}
+
+static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
+ unsigned VarArgsFrameIndex) {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ DebugLoc dl = Op.getDebugLoc();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
+}
+
+SDValue
+ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ SDValue Root = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
+ unsigned CC = MF.getFunction()->getCallingConv();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+ CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_ARM);
+
+ SmallVector<SDValue, 16> ArgValues;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments stored in registers.
+ if (VA.isRegLoc()) {
+ MVT RegVT = VA.getLocVT();
+ TargetRegisterClass *RC;
+ if (AFI->isThumbFunction())
+ RC = ARM::tGPRRegisterClass;
+ else
+ RC = ARM::GPRRegisterClass;
+
+ if (RegVT == MVT::f64) {
+ // f64 is passed in pairs of GPRs and must be combined.
+ RegVT = MVT::i32;
+ } else if (!((RegVT == MVT::i32) || (RegVT == MVT::f32)))
+ assert(0 && "RegVT not supported by FORMAL_ARGUMENTS Lowering");
+
+ // Transform the arguments stored in physical registers into virtual ones.
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT);
+
+ // f64 is passed in i32 pairs and must be combined.
+ if (VA.needsCustom()) {
+ SDValue ArgValue2;
+
+ VA = ArgLocs[++i]; // skip ahead to next loc
+ if (VA.isMemLoc()) {
+ // must be APCS to split like this
+ unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
+ int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset());
+
+ // Create load node to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, NULL, 0);
+ } else {
+ Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
+ }
+
+ ArgValue = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64,
+ ArgValue, ArgValue2);
+ }
+
+ // If this is an 8 or 16-bit value, it is really passed promoted
+ // to 32 bits. Insert an assert[sz]ext to capture this, then
+ // truncate to the right size.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
+ break;
+ case CCValAssign::SExt:
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ break;
+ case CCValAssign::ZExt:
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ break;
+ }
+
+ ArgValues.push_back(ArgValue);
+
+ } else { // VA.isRegLoc()
+
+ // sanity check
+ assert(VA.isMemLoc());
+ assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
+
+ unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
+ int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset());
+
+ // Create load nodes to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValues.push_back(DAG.getLoad(VA.getValVT(), dl, Root, FIN, NULL, 0));
+ }
+ }
+
+ // varargs
+ if (isVarArg) {
+ static const unsigned GPRArgRegs[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3
+ };
+
+ unsigned NumGPRs = CCInfo.getFirstUnallocated
+ (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
+
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned VARegSize = (4 - NumGPRs) * 4;
+ unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
+ unsigned ArgOffset = 0;
+ if (VARegSaveSize) {
+ // If this function is vararg, store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing
+ // the result of va_next.
+ AFI->setVarArgsRegSaveSize(VARegSaveSize);
+ ArgOffset = CCInfo.getNextStackOffset();
+ VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset +
+ VARegSaveSize - VARegSize);
+ SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+
+ SmallVector<SDValue, 4> MemOps;
+ for (; NumGPRs < 4; ++NumGPRs) {
+ TargetRegisterClass *RC;
+ if (AFI->isThumbFunction())
+ RC = ARM::tGPRRegisterClass;
+ else
+ RC = ARM::GPRRegisterClass;
+
+ unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
+ SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
+ DAG.getConstant(4, getPointerTy()));
+ }
+ if (!MemOps.empty())
+ Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+ } else
+ // This will point to the next argument passed via stack.
+ VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset);
+ }
+
+ ArgValues.push_back(Root);
+
+ // Return the new list of results.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
+ &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
+}
+
+/// isFloatingPointZero - Return true if this is +0.0.
+static bool isFloatingPointZero(SDValue Op) {
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+ return CFP->getValueAPF().isPosZero();
+ else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
+ // Maybe this has already been legalized into the constant pool?
+ if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
+ SDValue WrapperOp = Op.getOperand(1).getOperand(0);
+ if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+ return CFP->getValueAPF().isPosZero();
+ }
+ }
+ return false;
+}
+
+static bool isLegalCmpImmediate(unsigned C, bool isThumb) {
+ return ( isThumb && (C & ~255U) == 0) ||
+ (!isThumb && ARM_AM::getSOImmVal(C) != -1);
+}
+
+/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
+/// the given operands.
+static SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ SDValue &ARMCC, SelectionDAG &DAG, bool isThumb,
+ DebugLoc dl) {
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
+ unsigned C = RHSC->getZExtValue();
+ if (!isLegalCmpImmediate(C, isThumb)) {
+ // Constant does not fit, try adjusting it by one?
+ switch (CC) {
+ default: break;
+ case ISD::SETLT:
+ case ISD::SETGE:
+ if (isLegalCmpImmediate(C-1, isThumb)) {
+ CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
+ RHS = DAG.getConstant(C-1, MVT::i32);
+ }
+ break;
+ case ISD::SETULT:
+ case ISD::SETUGE:
+ if (C > 0 && isLegalCmpImmediate(C-1, isThumb)) {
+ CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
+ RHS = DAG.getConstant(C-1, MVT::i32);
+ }
+ break;
+ case ISD::SETLE:
+ case ISD::SETGT:
+ if (isLegalCmpImmediate(C+1, isThumb)) {
+ CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
+ RHS = DAG.getConstant(C+1, MVT::i32);
+ }
+ break;
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ if (C < 0xffffffff && isLegalCmpImmediate(C+1, isThumb)) {
+ CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+ RHS = DAG.getConstant(C+1, MVT::i32);
+ }
+ break;
+ }
+ }
+ }
+
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ ARMISD::NodeType CompareType;
+ switch (CondCode) {
+ default:
+ CompareType = ARMISD::CMP;
+ break;
+ case ARMCC::EQ:
+ case ARMCC::NE:
+ case ARMCC::MI:
+ case ARMCC::PL:
+ // Uses only N and Z Flags
+ CompareType = ARMISD::CMPNZ;
+ break;
+ }
+ ARMCC = DAG.getConstant(CondCode, MVT::i32);
+ return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
+}
+
+/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
+static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
+ DebugLoc dl) {
+ SDValue Cmp;
+ if (!isFloatingPointZero(RHS))
+ Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
+ else
+ Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS);
+ return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
+}
+
+static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ MVT VT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ SDValue TrueVal = Op.getOperand(2);
+ SDValue FalseVal = Op.getOperand(3);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (LHS.getValueType() == MVT::i32) {
+ SDValue ARMCC;
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb(), dl);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp);
+ }
+
+ ARMCC::CondCodes CondCode, CondCode2;
+ if (FPCCToARMCC(CC, CondCode, CondCode2))
+ std::swap(TrueVal, FalseVal);
+
+ SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
+ ARMCC, CCR, Cmp);
+ if (CondCode2 != ARMCC::AL) {
+ SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32);
+ // FIXME: Needs another CMP because flag can have but one use.
+ SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
+ Result = DAG.getNode(ARMISD::CMOV, dl, VT,
+ Result, TrueVal, ARMCC2, CCR, Cmp2);
+ }
+ return Result;
+}
+
+static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (LHS.getValueType() == MVT::i32) {
+ SDValue ARMCC;
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb(), dl);
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+ Chain, Dest, ARMCC, CCR,Cmp);
+ }
+
+ assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+ ARMCC::CondCodes CondCode, CondCode2;
+ if (FPCCToARMCC(CC, CondCode, CondCode2))
+ // Swap the LHS/RHS of the comparison if needed.
+ std::swap(LHS, RHS);
+
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp };
+ SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+ if (CondCode2 != ARMCC::AL) {
+ ARMCC = DAG.getConstant(CondCode2, MVT::i32);
+ SDValue Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) };
+ Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+ }
+ return Res;
+}
+
+SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Table = Op.getOperand(1);
+ SDValue Index = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ MVT PTy = getPointerTy();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+ ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
+ SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
+ Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
+ Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
+ bool isPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+ Addr = DAG.getLoad(isPIC ? (MVT)MVT::i32 : PTy, dl,
+ Chain, Addr, NULL, 0);
+ Chain = Addr.getValue(1);
+ if (isPIC)
+ Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
+ return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
+}
+
+static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc =
+ Op.getOpcode() == ISD::FP_TO_SINT ? ARMISD::FTOSI : ARMISD::FTOUI;
+ Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+}
+
+static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc =
+ Op.getOpcode() == ISD::SINT_TO_FP ? ARMISD::SITOF : ARMISD::UITOF;
+
+ Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(Opc, dl, VT, Op);
+}
+
+static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
+ // Implement fcopysign with a fabs and a conditional fneg.
+ SDValue Tmp0 = Op.getOperand(0);
+ SDValue Tmp1 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ MVT VT = Op.getValueType();
+ MVT SrcVT = Tmp1.getValueType();
+ SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
+ SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl);
+ SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
+}
+
+SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+ MVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned FrameReg = (Subtarget->isThumb() || Subtarget->useThumbBacktraces())
+ ? ARM::R7 : ARM::R11;
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0);
+ return FrameAddr;
+}
+
+SDValue
+ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool AlwaysInline,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff){
+ // Do repeated 4-byte loads and stores. To be improved.
+ // This requires 4-byte alignment.
+ if ((Align & 3) != 0)
+ return SDValue();
+ // This requires the copy size to be a constant, preferrably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDValue();
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
+ return SDValue();
+
+ unsigned BytesLeft = SizeVal & 3;
+ unsigned NumMemOps = SizeVal >> 2;
+ unsigned EmittedNumMemOps = 0;
+ MVT VT = MVT::i32;
+ unsigned VTSize = 4;
+ unsigned i = 0;
+ const unsigned MAX_LOADS_IN_LDM = 6;
+ SDValue TFOps[MAX_LOADS_IN_LDM];
+ SDValue Loads[MAX_LOADS_IN_LDM];
+ uint64_t SrcOff = 0, DstOff = 0;
+
+ // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
+ // same number of stores. The loads and stores will get combined into
+ // ldm/stm later on.
+ while (EmittedNumMemOps < NumMemOps) {
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+ DAG.getConstant(SrcOff, MVT::i32)),
+ SrcSV, SrcSVOff + SrcOff);
+ TFOps[i] = Loads[i].getValue(1);
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+ DAG.getConstant(DstOff, MVT::i32)),
+ DstSV, DstSVOff + DstOff);
+ DstOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ EmittedNumMemOps += i;
+ }
+
+ if (BytesLeft == 0)
+ return Chain;
+
+ // Issue loads / stores for the trailing (1 - 3) bytes.
+ unsigned BytesLeftSave = BytesLeft;
+ i = 0;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+ DAG.getConstant(SrcOff, MVT::i32)),
+ SrcSV, SrcSVOff + SrcOff);
+ TFOps[i] = Loads[i].getValue(1);
+ ++i;
+ SrcOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ i = 0;
+ BytesLeft = BytesLeftSave;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+ DAG.getConstant(DstOff, MVT::i32)),
+ DstSV, DstSVOff + DstOff);
+ ++i;
+ DstOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+}
+
+static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op = N->getOperand(0);
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getValueType(0) == MVT::f64) {
+ // Turn i64->f64 into FMDRR.
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
+ DAG.getConstant(0, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
+ DAG.getConstant(1, MVT::i32));
+ return DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi);
+ }
+
+ // Turn f64->i64 into FMRRD.
+ SDValue Cvt = DAG.getNode(ARMISD::FMRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
+
+ // Merge the pieces into a single i64 value.
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
+}
+
+static SDValue ExpandSRx(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) {
+ assert(N->getValueType(0) == MVT::i64 &&
+ (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
+ "Unknown shift to lower!");
+
+ // We only lower SRA, SRL of 1 here, all others use generic lowering.
+ if (!isa<ConstantSDNode>(N->getOperand(1)) ||
+ cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
+ return SDValue();
+
+ // If we are in thumb mode, we don't have RRX.
+ if (ST->isThumb()) return SDValue();
+
+ // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+ DAG.getConstant(1, MVT::i32));
+
+ // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
+ // captures the result into a carry flag.
+ unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
+ Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1);
+
+ // The low part is an ARMISD::RRX operand, which shifts the carry in.
+ Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
+
+ // Merge the pieces into a single i64 value.
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+}
+
+SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Don't know how to custom lower this!"); abort();
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::GlobalAddress:
+ return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
+ LowerGlobalAddressELF(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::CALL: return LowerCALL(Op, DAG);
+ case ISD::RET: return LowerRET(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, Subtarget);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG, Subtarget);
+ case ISD::BR_JT: return LowerBR_JT(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex);
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
+ case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
+ case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
+ case ISD::RETURNADDR: break;
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG);
+ case ISD::SRL:
+ case ISD::SRA: return ExpandSRx(Op.getNode(), DAG,Subtarget);
+ }
+ return SDValue();
+}
+
+/// ReplaceNodeResults - Replace the results of node with an illegal result
+/// type with new values built out of custom code.
+void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) {
+ switch (N->getOpcode()) {
+ default:
+ assert(0 && "Don't know how to custom expand this!");
+ return;
+ case ISD::BIT_CONVERT:
+ Results.push_back(ExpandBIT_CONVERT(N, DAG));
+ return;
+ case ISD::SRL:
+ case ISD::SRA: {
+ SDValue Res = ExpandSRx(N, DAG, Subtarget);
+ if (Res.getNode())
+ Results.push_back(Res);
+ return;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Scheduler Hooks
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ switch (MI->getOpcode()) {
+ default: assert(false && "Unexpected instr type to insert");
+ case ARM::tMOVCCr: {
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
+ .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+ // Update machine-CFG edges by first adding all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
+ e = BB->succ_end(); i != e; ++i)
+ sinkMBB->addSuccessor(*i);
+ // Next, remove all successors of the current block, and add the true
+ // and fallthrough blocks as its successors.
+ while(!BB->succ_empty())
+ BB->removeSuccessor(BB->succ_begin());
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(BB, dl, TII->get(ARM::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return BB;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+static
+SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MVT VT = N->getValueType(0);
+ unsigned Opc = N->getOpcode();
+ bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
+ SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
+ SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
+ ISD::CondCode CC = ISD::SETCC_INVALID;
+
+ if (isSlctCC) {
+ CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
+ } else {
+ SDValue CCOp = Slct.getOperand(0);
+ if (CCOp.getOpcode() == ISD::SETCC)
+ CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
+ }
+
+ bool DoXform = false;
+ bool InvCC = false;
+ assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
+ "Bad input!");
+
+ if (LHS.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(LHS)->isNullValue()) {
+ DoXform = true;
+ } else if (CC != ISD::SETCC_INVALID &&
+ RHS.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(RHS)->isNullValue()) {
+ std::swap(LHS, RHS);
+ SDValue Op0 = Slct.getOperand(0);
+ MVT OpVT = isSlctCC ? Op0.getValueType() :
+ Op0.getOperand(0).getValueType();
+ bool isInt = OpVT.isInteger();
+ CC = ISD::getSetCCInverse(CC, isInt);
+
+ if (!TLI.isCondCodeLegal(CC, OpVT))
+ return SDValue(); // Inverse operator isn't legal.
+
+ DoXform = true;
+ InvCC = true;
+ }
+
+ if (DoXform) {
+ SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
+ if (isSlctCC)
+ return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
+ Slct.getOperand(0), Slct.getOperand(1), CC);
+ SDValue CCOp = Slct.getOperand(0);
+ if (InvCC)
+ CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
+ CCOp.getOperand(0), CCOp.getOperand(1), CC);
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+ CCOp, OtherOp, Result);
+ }
+ return SDValue();
+}
+
+/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
+static SDValue PerformADDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // added by evan in r37685 with no testcase.
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+
+ // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
+ if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
+ if (Result.getNode()) return Result;
+ }
+ if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
+ if (Result.getNode()) return Result;
+ }
+
+ return SDValue();
+}
+
+/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
+static SDValue PerformSUBCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // added by evan in r37685 with no testcase.
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+
+ // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
+ if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
+ if (Result.getNode()) return Result;
+ }
+
+ return SDValue();
+}
+
+
+/// PerformFMRRDCombine - Target-specific dag combine xforms for ARMISD::FMRRD.
+static SDValue PerformFMRRDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // fmrrd(fmdrr x, y) -> x,y
+ SDValue InDouble = N->getOperand(0);
+ if (InDouble.getOpcode() == ARMISD::FMDRR)
+ return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
+ return SDValue();
+}
+
+SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::ADD: return PerformADDCombine(N, DCI);
+ case ISD::SUB: return PerformSUBCombine(N, DCI);
+ case ARMISD::FMRRD: return PerformFMRRDCombine(N, DCI);
+ }
+
+ return SDValue();
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode for load / store of the
+/// given type.
+static bool isLegalAddressImmediate(int64_t V, MVT VT,
+ const ARMSubtarget *Subtarget) {
+ if (V == 0)
+ return true;
+
+ if (!VT.isSimple())
+ return false;
+
+ if (Subtarget->isThumb()) {
+ if (V < 0)
+ return false;
+
+ unsigned Scale = 1;
+ switch (VT.getSimpleVT()) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ // Scale == 1;
+ break;
+ case MVT::i16:
+ // Scale == 2;
+ Scale = 2;
+ break;
+ case MVT::i32:
+ // Scale == 4;
+ Scale = 4;
+ break;
+ }
+
+ if ((V & (Scale - 1)) != 0)
+ return false;
+ V /= Scale;
+ return V == (V & ((1LL << 5) - 1));
+ }
+
+ if (V < 0)
+ V = - V;
+ switch (VT.getSimpleVT()) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i32:
+ // +- imm12
+ return V == (V & ((1LL << 12) - 1));
+ case MVT::i16:
+ // +- imm8
+ return V == (V & ((1LL << 8) - 1));
+ case MVT::f32:
+ case MVT::f64:
+ if (!Subtarget->hasVFP2())
+ return false;
+ if ((V & 3) != 0)
+ return false;
+ V >>= 2;
+ return V == (V & ((1LL << 8) - 1));
+ }
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const {
+ MVT VT = getValueType(Ty, true);
+ if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
+ return false;
+
+ // Can never fold addr of global into load/store.
+ if (AM.BaseGV)
+ return false;
+
+ switch (AM.Scale) {
+ case 0: // no scale reg, must be "r+i" or "r", or "i".
+ break;
+ case 1:
+ if (Subtarget->isThumb())
+ return false;
+ // FALL THROUGH.
+ default:
+ // ARM doesn't support any R+R*scale+imm addr modes.
+ if (AM.BaseOffs)
+ return false;
+
+ if (!VT.isSimple())
+ return false;
+
+ int Scale = AM.Scale;
+ switch (VT.getSimpleVT()) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i32:
+ case MVT::i64:
+ // This assumes i64 is legalized to a pair of i32. If not (i.e.
+ // ldrd / strd are used, then its address mode is same as i16.
+ // r + r
+ if (Scale < 0) Scale = -Scale;
+ if (Scale == 1)
+ return true;
+ // r + r << imm
+ return isPowerOf2_32(Scale & ~1);
+ case MVT::i16:
+ // r + r
+ if (((unsigned)AM.HasBaseReg + Scale) <= 2)
+ return true;
+ return false;
+
+ case MVT::isVoid:
+ // Note, we allow "void" uses (basically, uses that aren't loads or
+ // stores), because arm allows folding a scale into many arithmetic
+ // operations. This should be made more precise and revisited later.
+
+ // Allow r << imm, but the imm has to be a multiple of two.
+ if (AM.Scale & 1) return false;
+ return isPowerOf2_32(AM.Scale);
+ }
+ break;
+ }
+ return true;
+}
+
+static bool getIndexedAddressParts(SDNode *Ptr, MVT VT,
+ bool isSEXTLoad, SDValue &Base,
+ SDValue &Offset, bool &isInc,
+ SelectionDAG &DAG) {
+ if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
+ return false;
+
+ if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
+ // AddressingMode 3
+ Base = Ptr->getOperand(0);
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC < 0 && RHSC > -256) {
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+ return true;
+ }
+ }
+ isInc = (Ptr->getOpcode() == ISD::ADD);
+ Offset = Ptr->getOperand(1);
+ return true;
+ } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
+ // AddressingMode 2
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC < 0 && RHSC > -0x1000) {
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+ Base = Ptr->getOperand(0);
+ return true;
+ }
+ }
+
+ if (Ptr->getOpcode() == ISD::ADD) {
+ isInc = true;
+ ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0));
+ if (ShOpcVal != ARM_AM::no_shift) {
+ Base = Ptr->getOperand(1);
+ Offset = Ptr->getOperand(0);
+ } else {
+ Base = Ptr->getOperand(0);
+ Offset = Ptr->getOperand(1);
+ }
+ return true;
+ }
+
+ isInc = (Ptr->getOpcode() == ISD::ADD);
+ Base = Ptr->getOperand(0);
+ Offset = Ptr->getOperand(1);
+ return true;
+ }
+
+ // FIXME: Use FLDM / FSTM to emulate indexed FP load / store.
+ return false;
+}
+
+/// getPreIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if the node's address
+/// can be legally represented as pre-indexed load / store address.
+bool
+ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ if (Subtarget->isThumb())
+ return false;
+
+ MVT VT;
+ SDValue Ptr;
+ bool isSEXTLoad = false;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ VT = LD->getMemoryVT();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ Ptr = ST->getBasePtr();
+ VT = ST->getMemoryVT();
+ } else
+ return false;
+
+ bool isInc;
+ bool isLegal = getIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ if (isLegal) {
+ AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
+ return true;
+ }
+ return false;
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ if (Subtarget->isThumb())
+ return false;
+
+ MVT VT;
+ SDValue Ptr;
+ bool isSEXTLoad = false;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ VT = LD->getMemoryVT();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ VT = ST->getMemoryVT();
+ } else
+ return false;
+
+ bool isInc;
+ bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ if (isLegal) {
+ AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+ return true;
+ }
+ return false;
+}
+
+void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ switch (Op.getOpcode()) {
+ default: break;
+ case ARMISD::CMOV: {
+ // Bits are known zero/one if known on the LHS and RHS.
+ DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+ if (KnownZero == 0 && KnownOne == 0) return;
+
+ APInt KnownZeroRHS, KnownOneRHS;
+ DAG.ComputeMaskedBits(Op.getOperand(1), Mask,
+ KnownZeroRHS, KnownOneRHS, Depth+1);
+ KnownZero &= KnownZeroRHS;
+ KnownOne &= KnownOneRHS;
+ return;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+ARMTargetLowering::ConstraintType
+ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'l': return C_RegisterClass;
+ case 'w': return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const {
+ if (Constraint.size() == 1) {
+ // GCC RS6000 Constraint Letters
+ switch (Constraint[0]) {
+ case 'l':
+ if (Subtarget->isThumb())
+ return std::make_pair(0U, ARM::tGPRRegisterClass);
+ else
+ return std::make_pair(0U, ARM::GPRRegisterClass);
+ case 'r':
+ return std::make_pair(0U, ARM::GPRRegisterClass);
+ case 'w':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, ARM::SPRRegisterClass);
+ if (VT == MVT::f64)
+ return std::make_pair(0U, ARM::DPRRegisterClass);
+ break;
+ }
+ }
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+std::vector<unsigned> ARMTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const {
+ if (Constraint.size() != 1)
+ return std::vector<unsigned>();
+
+ switch (Constraint[0]) { // GCC ARM Constraint Letters
+ default: break;
+ case 'l':
+ return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ 0);
+ case 'r':
+ return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R9, ARM::R10, ARM::R11,
+ ARM::R12, ARM::LR, 0);
+ case 'w':
+ if (VT == MVT::f32)
+ return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3,
+ ARM::S4, ARM::S5, ARM::S6, ARM::S7,
+ ARM::S8, ARM::S9, ARM::S10, ARM::S11,
+ ARM::S12,ARM::S13,ARM::S14,ARM::S15,
+ ARM::S16,ARM::S17,ARM::S18,ARM::S19,
+ ARM::S20,ARM::S21,ARM::S22,ARM::S23,
+ ARM::S24,ARM::S25,ARM::S26,ARM::S27,
+ ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0);
+ if (VT == MVT::f64)
+ return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7,
+ ARM::D8, ARM::D9, ARM::D10,ARM::D11,
+ ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0);
+ break;
+ }
+
+ return std::vector<unsigned>();
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ char Constraint,
+ bool hasMemory,
+ std::vector<SDValue>&Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result(0, 0);
+
+ switch (Constraint) {
+ default: break;
+ case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O':
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C)
+ return;
+
+ int64_t CVal64 = C->getSExtValue();
+ int CVal = (int) CVal64;
+ // None of these constraints allow values larger than 32 bits. Check
+ // that the value fits in an int.
+ if (CVal != CVal64)
+ return;
+
+ switch (Constraint) {
+ case 'I':
+ if (Subtarget->isThumb()) {
+ // This must be a constant between 0 and 255, for ADD immediates.
+ if (CVal >= 0 && CVal <= 255)
+ break;
+ } else {
+ // A constant that can be used as an immediate value in a
+ // data-processing instruction.
+ if (ARM_AM::getSOImmVal(CVal) != -1)
+ break;
+ }
+ return;
+
+ case 'J':
+ if (Subtarget->isThumb()) {
+ // This must be a constant between -255 and -1, for negated ADD
+ // immediates. This can be used in GCC with an "n" modifier that
+ // prints the negated value, for use with SUB instructions. It is
+ // not useful otherwise but is implemented for compatibility.
+ if (CVal >= -255 && CVal <= -1)
+ break;
+ } else {
+ // This must be a constant between -4095 and 4095. It is not clear
+ // what this constraint is intended for. Implemented for
+ // compatibility with GCC.
+ if (CVal >= -4095 && CVal <= 4095)
+ break;
+ }
+ return;
+
+ case 'K':
+ if (Subtarget->isThumb()) {
+ // A 32-bit value where only one byte has a nonzero value. Exclude
+ // zero to match GCC. This constraint is used by GCC internally for
+ // constants that can be loaded with a move/shift combination.
+ // It is not useful otherwise but is implemented for compatibility.
+ if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
+ break;
+ } else {
+ // A constant whose bitwise inverse can be used as an immediate
+ // value in a data-processing instruction. This can be used in GCC
+ // with a "B" modifier that prints the inverted value, for use with
+ // BIC and MVN instructions. It is not useful otherwise but is
+ // implemented for compatibility.
+ if (ARM_AM::getSOImmVal(~CVal) != -1)
+ break;
+ }
+ return;
+
+ case 'L':
+ if (Subtarget->isThumb()) {
+ // This must be a constant between -7 and 7,
+ // for 3-operand ADD/SUB immediate instructions.
+ if (CVal >= -7 && CVal < 7)
+ break;
+ } else {
+ // A constant whose negation can be used as an immediate value in a
+ // data-processing instruction. This can be used in GCC with an "n"
+ // modifier that prints the negated value, for use with SUB
+ // instructions. It is not useful otherwise but is implemented for
+ // compatibility.
+ if (ARM_AM::getSOImmVal(-CVal) != -1)
+ break;
+ }
+ return;
+
+ case 'M':
+ if (Subtarget->isThumb()) {
+ // This must be a multiple of 4 between 0 and 1020, for
+ // ADD sp + immediate.
+ if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
+ break;
+ } else {
+ // A power of two or a constant between 0 and 32. This is used in
+ // GCC for the shift amount on shifted register operands, but it is
+ // useful in general for any shift amounts.
+ if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
+ break;
+ }
+ return;
+
+ case 'N':
+ if (Subtarget->isThumb()) {
+ // This must be a constant between 0 and 31, for shift amounts.
+ if (CVal >= 0 && CVal <= 31)
+ break;
+ }
+ return;
+
+ case 'O':
+ if (Subtarget->isThumb()) {
+ // This must be a multiple of 4 between -508 and 508, for
+ // ADD/SUB sp = sp + immediate.
+ if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
+ break;
+ }
+ return;
+ }
+ Result = DAG.getTargetConstant(CVal, Op.getValueType());
+ break;
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+ return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory,
+ Ops, DAG);
+}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
new file mode 100644
index 0000000..2dab2db
--- /dev/null
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -0,0 +1,184 @@
+//===-- ARMISelLowering.h - ARM DAG Lowering Interface ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMISELLOWERING_H
+#define ARMISELLOWERING_H
+
+#include "ARMSubtarget.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include <vector>
+
+namespace llvm {
+ class ARMConstantPoolValue;
+
+ namespace ARMISD {
+ // ARM Specific DAG Nodes
+ enum NodeType {
+ // Start the numbering where the builtin ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ Wrapper, // Wrapper - A wrapper node for TargetConstantPool,
+ // TargetExternalSymbol, and TargetGlobalAddress.
+ WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable
+
+ CALL, // Function call.
+ CALL_PRED, // Function call that's predicable.
+ CALL_NOLINK, // Function call with branch not branch-and-link.
+ tCALL, // Thumb function call.
+ BRCOND, // Conditional branch.
+ BR_JT, // Jumptable branch.
+ RET_FLAG, // Return with a flag operand.
+
+ PIC_ADD, // Add with a PC operand and a PIC label.
+
+ CMP, // ARM compare instructions.
+ CMPNZ, // ARM compare that uses only N or Z flags.
+ CMPFP, // ARM VFP compare instruction, sets FPSCR.
+ CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR.
+ FMSTAT, // ARM fmstat instruction.
+ CMOV, // ARM conditional move instructions.
+ CNEG, // ARM conditional negate instructions.
+
+ FTOSI, // FP to sint within a FP register.
+ FTOUI, // FP to uint within a FP register.
+ SITOF, // sint to FP within a FP register.
+ UITOF, // uint to FP within a FP register.
+
+ SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
+ SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
+ RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
+
+ FMRRD, // double to two gprs.
+ FMDRR, // Two gprs to double.
+
+ EH_SJLJ_SETJMP, // SjLj exception handling setjmp
+ EH_SJLJ_LONGJMP, // SjLj exception handling longjmp
+
+ THREAD_POINTER
+ };
+ }
+
+ //===--------------------------------------------------------------------===//
+ // ARMTargetLowering - ARM Implementation of the TargetLowering interface
+
+ class ARMTargetLowering : public TargetLowering {
+ int VarArgsFrameIndex; // FrameIndex for start of varargs area.
+ public:
+ explicit ARMTargetLowering(TargetMachine &TM);
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG);
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+
+ /// getPreIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if the node's address
+ /// can be legally represented as pre-indexed load / store address.
+ virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ /// getPostIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if this node can be
+ /// combined with a load / store to form a post-indexed load / store.
+ virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base, SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const;
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const;
+ std::vector<unsigned>
+ getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const;
+
+ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+ /// vector. If it is invalid, don't add anything to Ops. If hasMemory is
+ /// true it means one of the asm constraint of the inline asm instruction
+ /// being processed is 'm'.
+ virtual void LowerAsmOperandForConstraint(SDValue Op,
+ char ConstraintLetter,
+ bool hasMemory,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ virtual const ARMSubtarget* getSubtarget() {
+ return Subtarget;
+ }
+
+ private:
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+
+ /// ARMPCLabelIndex - Keep track the number of ARM PC labels created.
+ ///
+ unsigned ARMPCLabelIndex;
+
+ SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
+ const SDValue &StackPtr, const CCValAssign &VA,
+ SDValue Chain, SDValue Arg, ISD::ArgFlagsTy Flags);
+ SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
+ unsigned CallingConv, SelectionDAG &DAG);
+ SDValue LowerCALL(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerRET(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG);
+ SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG);
+ SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
+
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool AlwaysInline,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff);
+ };
+}
+
+#endif // ARMISELLOWERING_H
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
new file mode 100644
index 0000000..9a1e1c2
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -0,0 +1,868 @@
+//===- ARMInstrFormats.td - ARM Instruction Formats --*- tablegen -*---------=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// ARM Instruction Format Definitions.
+//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<5> val> {
+ bits<5> Value = val;
+}
+
+def Pseudo : Format<0>;
+def MulFrm : Format<1>;
+def BrFrm : Format<2>;
+def BrMiscFrm : Format<3>;
+
+def DPFrm : Format<4>;
+def DPSoRegFrm : Format<5>;
+
+def LdFrm : Format<6>;
+def StFrm : Format<7>;
+def LdMiscFrm : Format<8>;
+def StMiscFrm : Format<9>;
+def LdStMulFrm : Format<10>;
+
+def ArithMiscFrm : Format<11>;
+def ExtFrm : Format<12>;
+
+def VFPUnaryFrm : Format<13>;
+def VFPBinaryFrm : Format<14>;
+def VFPConv1Frm : Format<15>;
+def VFPConv2Frm : Format<16>;
+def VFPConv3Frm : Format<17>;
+def VFPConv4Frm : Format<18>;
+def VFPConv5Frm : Format<19>;
+def VFPLdStFrm : Format<20>;
+def VFPLdStMulFrm : Format<21>;
+def VFPMiscFrm : Format<22>;
+
+def ThumbFrm : Format<23>;
+
+// Misc flag for data processing instructions that indicates whether
+// the instruction has a Rn register operand.
+class UnaryDP { bit isUnaryDataProc = 1; }
+
+//===----------------------------------------------------------------------===//
+
+// ARM Instruction templates.
+//
+
+class InstARM<AddrMode am, SizeFlagVal sz, IndexMode im,
+ Format f, string cstr>
+ : Instruction {
+ field bits<32> Inst;
+
+ let Namespace = "ARM";
+
+ // TSFlagsFields
+ AddrMode AM = am;
+ bits<4> AddrModeBits = AM.Value;
+
+ SizeFlagVal SZ = sz;
+ bits<3> SizeFlag = SZ.Value;
+
+ IndexMode IM = im;
+ bits<2> IndexModeBits = IM.Value;
+
+ Format F = f;
+ bits<5> Form = F.Value;
+
+ //
+ // Attributes specific to ARM instructions...
+ //
+ bit isUnaryDataProc = 0;
+
+ let Constraints = cstr;
+}
+
+class PseudoInst<dag oops, dag iops, string asm, list<dag> pattern>
+ : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, ""> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+}
+
+// Almost all ARM instructions are predicable.
+class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ IndexMode im, Format f, string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, sz, im, f, cstr> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ops pred:$p));
+ let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// Same as I except it can optionally modify CPSR. Note it's modeled as
+// an input operand since by default it's a zero register. It will
+// become an implicit def once it's "flipped".
+class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ IndexMode im, Format f, string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, sz, im, f, cstr> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ops pred:$p, cc_out:$s));
+ let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm));
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// Special cases
+class XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ IndexMode im, Format f, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, im, f, cstr> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+class AI<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern>;
+class AsI<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : sI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern>;
+class AXI<dag oops, dag iops, Format f, string asm,
+ list<dag> pattern>
+ : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, asm,
+ "", pattern>;
+
+// Ctrl flow instructions
+class ABI<bits<4> opcod, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, opc,
+ asm, "", pattern> {
+ let Inst{27-24} = opcod;
+}
+class ABXI<bits<4> opcod, dag oops, dag iops, string asm, list<dag> pattern>
+ : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, asm,
+ "", pattern> {
+ let Inst{27-24} = opcod;
+}
+class ABXIx2<dag oops, dag iops, string asm, list<dag> pattern>
+ : XI<oops, iops, AddrModeNone, Size8Bytes, IndexModeNone, BrMiscFrm, asm,
+ "", pattern>;
+
+// BR_JT instructions
+class JTI<dag oops, dag iops, string asm, list<dag> pattern>
+ : XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm,
+ asm, "", pattern>;
+
+// addrmode1 instructions
+class AI1<bits<4> opcod, dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{24-21} = opcod;
+ let Inst{27-26} = {0,0};
+}
+class AsI1<bits<4> opcod, dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : sI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{24-21} = opcod;
+ let Inst{27-26} = {0,0};
+}
+class AXI1<bits<4> opcod, dag oops, dag iops, Format f, string asm,
+ list<dag> pattern>
+ : XI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, asm,
+ "", pattern> {
+ let Inst{24-21} = opcod;
+ let Inst{27-26} = {0,0};
+}
+class AI1x2<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode1, Size8Bytes, IndexModeNone, f, opc,
+ asm, "", pattern>;
+
+
+// addrmode2 loads and stores
+class AI2<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{27-26} = {0,1};
+}
+
+// loads
+class AI2ldw<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 0; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+class AXI2ldw<dag oops, dag iops, Format f, string asm,
+ list<dag> pattern>
+ : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f,
+ asm, "", pattern> {
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 0; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+class AI2ldb<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 1; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+class AXI2ldb<dag oops, dag iops, Format f, string asm,
+ list<dag> pattern>
+ : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f,
+ asm, "", pattern> {
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 1; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+
+// stores
+class AI2stw<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 0; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+class AXI2stw<dag oops, dag iops, Format f, string asm,
+ list<dag> pattern>
+ : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f,
+ asm, "", pattern> {
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 0; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+class AI2stb<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 1; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+class AXI2stb<dag oops, dag iops, Format f, string asm,
+ list<dag> pattern>
+ : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f,
+ asm, "", pattern> {
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 1; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+
+// Pre-indexed loads
+class AI2ldwpr<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, opc,
+ asm, cstr, pattern> {
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{22} = 0; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+class AI2ldbpr<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, opc,
+ asm, cstr, pattern> {
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{22} = 1; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+
+// Pre-indexed stores
+class AI2stwpr<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, opc,
+ asm, cstr, pattern> {
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{22} = 0; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+class AI2stbpr<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, opc,
+ asm, cstr, pattern> {
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{22} = 1; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+
+// Post-indexed loads
+class AI2ldwpo<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, opc,
+ asm, cstr,pattern> {
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 0; // B bit
+ let Inst{24} = 0; // P bit
+ let Inst{27-26} = {0,1};
+}
+class AI2ldbpo<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, opc,
+ asm, cstr,pattern> {
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 1; // B bit
+ let Inst{24} = 0; // P bit
+ let Inst{27-26} = {0,1};
+}
+
+// Post-indexed stores
+class AI2stwpo<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, opc,
+ asm, cstr,pattern> {
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 0; // B bit
+ let Inst{24} = 0; // P bit
+ let Inst{27-26} = {0,1};
+}
+class AI2stbpo<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, opc,
+ asm, cstr,pattern> {
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 1; // B bit
+ let Inst{24} = 0; // P bit
+ let Inst{27-26} = {0,1};
+}
+
+// addrmode3 instructions
+class AI3<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern>;
+class AXI3<dag oops, dag iops, Format f, string asm,
+ list<dag> pattern>
+ : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, asm,
+ "", pattern>;
+
+// loads
+class AI3ldh<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 0; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 1; // P bit
+}
+class AXI3ldh<dag oops, dag iops, Format f, string asm,
+ list<dag> pattern>
+ : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f,
+ asm, "", pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 0; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 1; // P bit
+}
+class AI3ldsh<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 1; // P bit
+}
+class AXI3ldsh<dag oops, dag iops, Format f, string asm,
+ list<dag> pattern>
+ : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f,
+ asm, "", pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 1; // P bit
+}
+class AI3ldsb<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 0; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 1; // P bit
+}
+class AXI3ldsb<dag oops, dag iops, Format f, string asm,
+ list<dag> pattern>
+ : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f,
+ asm, "", pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 0; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 1; // P bit
+}
+class AI3ldd<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 0; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 1; // P bit
+}
+
+// stores
+class AI3sth<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 0; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 1; // P bit
+}
+class AXI3sth<dag oops, dag iops, Format f, string asm,
+ list<dag> pattern>
+ : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f,
+ asm, "", pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 0; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 1; // P bit
+}
+class AI3std<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 1; // P bit
+}
+
+// Pre-indexed loads
+class AI3ldhpr<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, opc,
+ asm, cstr, pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 0; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 1; // P bit
+}
+class AI3ldshpr<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, opc,
+ asm, cstr, pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 1; // P bit
+}
+class AI3ldsbpr<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, opc,
+ asm, cstr, pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 0; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 1; // P bit
+}
+
+// Pre-indexed stores
+class AI3sthpr<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, opc,
+ asm, cstr, pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 0; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 1; // P bit
+}
+
+// Post-indexed loads
+class AI3ldhpo<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, opc,
+ asm, cstr,pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 0; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 0; // P bit
+}
+class AI3ldshpo<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, opc,
+ asm, cstr,pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 0; // P bit
+}
+class AI3ldsbpo<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, opc,
+ asm, cstr,pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 0; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 0; // P bit
+}
+
+// Post-indexed stores
+class AI3sthpo<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, opc,
+ asm, cstr,pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 0; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 0; // P bit
+}
+
+
+// addrmode4 instructions
+class AXI4ld<dag oops, dag iops, Format f, string asm, list<dag> pattern>
+ : XI<oops, iops, AddrMode4, Size4Bytes, IndexModeNone, f, asm,
+ "", pattern> {
+ let Inst{20} = 1; // L bit
+ let Inst{22} = 0; // S bit
+ let Inst{27-25} = 0b100;
+}
+class AXI4st<dag oops, dag iops, Format f, string asm, list<dag> pattern>
+ : XI<oops, iops, AddrMode4, Size4Bytes, IndexModeNone, f, asm,
+ "", pattern> {
+ let Inst{20} = 0; // L bit
+ let Inst{22} = 0; // S bit
+ let Inst{27-25} = 0b100;
+}
+
+// Unsigned multiply, multiply-accumulate instructions.
+class AMul1I<bits<7> opcod, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, opc,
+ asm, "", pattern> {
+ let Inst{7-4} = 0b1001;
+ let Inst{20} = 0; // S bit
+ let Inst{27-21} = opcod;
+}
+class AsMul1I<bits<7> opcod, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : sI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, opc,
+ asm, "", pattern> {
+ let Inst{7-4} = 0b1001;
+ let Inst{27-21} = opcod;
+}
+
+// Most significant word multiply
+class AMul2I<bits<7> opcod, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, opc,
+ asm, "", pattern> {
+ let Inst{7-4} = 0b1001;
+ let Inst{20} = 1;
+ let Inst{27-21} = opcod;
+}
+
+// SMUL<x><y> / SMULW<y> / SMLA<x><y> / SMLAW<x><y>
+class AMulxyI<bits<7> opcod, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, opc,
+ asm, "", pattern> {
+ let Inst{4} = 0;
+ let Inst{7} = 1;
+ let Inst{20} = 0;
+ let Inst{27-21} = opcod;
+}
+
+// Extend instructions.
+class AExtI<bits<8> opcod, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ExtFrm, opc,
+ asm, "", pattern> {
+ let Inst{7-4} = 0b0111;
+ let Inst{27-20} = opcod;
+}
+
+// Misc Arithmetic instructions.
+class AMiscA1I<bits<8> opcod, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, opc,
+ asm, "", pattern> {
+ let Inst{27-20} = opcod;
+}
+
+//===----------------------------------------------------------------------===//
+
+// ARMPat - Same as Pat<>, but requires that the compiler be in ARM mode.
+class ARMPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM];
+}
+class ARMV5TEPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV5TE];
+}
+class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV6];
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Thumb Instruction Format Definitions.
+//
+
+
+// TI - Thumb instruction.
+
+class ThumbI<dag outs, dag ins, AddrMode am, SizeFlagVal sz,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr> {
+ let OutOperandList = outs;
+ let InOperandList = ins;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb];
+}
+
+class TI<dag outs, dag ins, string asm, list<dag> pattern>
+ : ThumbI<outs, ins, AddrModeNone, Size2Bytes, asm, "", pattern>;
+class TI1<dag outs, dag ins, string asm, list<dag> pattern>
+ : ThumbI<outs, ins, AddrModeT1, Size2Bytes, asm, "", pattern>;
+class TI2<dag outs, dag ins, string asm, list<dag> pattern>
+ : ThumbI<outs, ins, AddrModeT2, Size2Bytes, asm, "", pattern>;
+class TI4<dag outs, dag ins, string asm, list<dag> pattern>
+ : ThumbI<outs, ins, AddrModeT4, Size2Bytes, asm, "", pattern>;
+class TIs<dag outs, dag ins, string asm, list<dag> pattern>
+ : ThumbI<outs, ins, AddrModeTs, Size2Bytes, asm, "", pattern>;
+
+// Two-address instructions
+class TIt<dag outs, dag ins, string asm, list<dag> pattern>
+ : ThumbI<outs, ins, AddrModeNone, Size2Bytes, asm, "$lhs = $dst", pattern>;
+
+// BL, BLX(1) are translated by assembler into two instructions
+class TIx2<dag outs, dag ins, string asm, list<dag> pattern>
+ : ThumbI<outs, ins, AddrModeNone, Size4Bytes, asm, "", pattern>;
+
+// BR_JT instructions
+class TJTI<dag outs, dag ins, string asm, list<dag> pattern>
+ : ThumbI<outs, ins, AddrModeNone, SizeSpecial, asm, "", pattern>;
+
+
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM VFP Instruction templates.
+//
+
+// ARM VFP addrmode5 loads and stores
+class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+ VFPLdStFrm, opc, asm, "", pattern> {
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-24} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-8} = 0b1011;
+}
+
+class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+ VFPLdStFrm, opc, asm, "", pattern> {
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-24} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-8} = 0b1010;
+}
+
+// Load / store multiple
+class AXSI5<dag oops, dag iops, string asm, list<dag> pattern>
+ : XI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+ VFPLdStMulFrm, asm, "", pattern> {
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-25} = 0b110;
+ let Inst{11-8} = 0b1011;
+}
+
+class AXDI5<dag oops, dag iops, string asm, list<dag> pattern>
+ : XI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+ VFPLdStMulFrm, asm, "", pattern> {
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-25} = 0b110;
+ let Inst{11-8} = 0b1010;
+}
+
+
+// Double precision, unary
+class ADuI<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops,
+ string opc, string asm, list<dag> pattern>
+ : AI<oops, iops, VFPUnaryFrm, opc, asm, pattern> {
+ let Inst{27-20} = opcod1;
+ let Inst{19-16} = opcod2;
+ let Inst{11-8} = 0b1011;
+ let Inst{7-4} = opcod3;
+}
+
+// Double precision, binary
+class ADbI<bits<8> opcod, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : AI<oops, iops, VFPBinaryFrm, opc, asm, pattern> {
+ let Inst{27-20} = opcod;
+ let Inst{11-8} = 0b1011;
+}
+
+// Single precision, unary
+class ASuI<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops,
+ string opc, string asm, list<dag> pattern>
+ : AI<oops, iops, VFPUnaryFrm, opc, asm, pattern> {
+ // Bits 22 (D bit) and 5 (M bit) will be changed during instruction encoding.
+ let Inst{27-20} = opcod1;
+ let Inst{19-16} = opcod2;
+ let Inst{11-8} = 0b1010;
+ let Inst{7-4} = opcod3;
+}
+
+// Single precision, binary
+class ASbI<bits<8> opcod, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : AI<oops, iops, VFPBinaryFrm, opc, asm, pattern> {
+ // Bit 22 (D bit) can be changed during instruction encoding.
+ let Inst{27-20} = opcod;
+ let Inst{11-8} = 0b1010;
+}
+
+// VFP conversion instructions
+class AVConv1I<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3,
+ dag oops, dag iops, string opc, string asm, list<dag> pattern>
+ : AI<oops, iops, VFPConv1Frm, opc, asm, pattern> {
+ let Inst{27-20} = opcod1;
+ let Inst{19-16} = opcod2;
+ let Inst{11-8} = opcod3;
+ let Inst{6} = 1;
+}
+
+class AVConvXI<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, Format f,
+ string opc, string asm, list<dag> pattern>
+ : AI<oops, iops, f, opc, asm, pattern> {
+ let Inst{27-20} = opcod1;
+ let Inst{11-8} = opcod2;
+ let Inst{4} = 1;
+}
+
+class AVConv2I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv2Frm, opc, asm, pattern>;
+
+class AVConv3I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv3Frm, opc, asm, pattern>;
+
+class AVConv4I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv4Frm, opc, asm, pattern>;
+
+class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv5Frm, opc, asm, pattern>;
+
+//===----------------------------------------------------------------------===//
+
+
+// ThumbPat - Same as Pat<>, but requires that the compiler be in Thumb mode.
+class ThumbPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb];
+}
+
+class ThumbV5Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb, HasV5T];
+}
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
new file mode 100644
index 0000000..4b0dbb5
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -0,0 +1,1025 @@
+//===- ARMInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMInstrInfo.h"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMGenInstrInfo.inc"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
+ cl::desc("Enable ARM 2-addr to 3-addr conv"));
+
+static inline
+const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) {
+ return MIB.addImm((int64_t)ARMCC::AL).addReg(0);
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
+ return MIB.addReg(0);
+}
+
+ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
+ : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)),
+ RI(*this, STI) {
+}
+
+
+/// Return true if the instruction is a register to register move and
+/// leave the source and dest operands in the passed parameters.
+///
+bool ARMInstrInfo::isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
+ SrcSubIdx = DstSubIdx = 0; // No sub-registers.
+
+ unsigned oc = MI.getOpcode();
+ switch (oc) {
+ default:
+ return false;
+ case ARM::FCPYS:
+ case ARM::FCPYD:
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ return true;
+ case ARM::MOVr:
+ case ARM::tMOVr:
+ case ARM::tMOVhir2lor:
+ case ARM::tMOVlor2hir:
+ case ARM::tMOVhir2hir:
+ assert(MI.getDesc().getNumOperands() >= 2 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ "Invalid ARM MOV instruction");
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ return true;
+ }
+}
+
+unsigned ARMInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::LDR:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isImm() &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::FLDD:
+ case ARM::FLDS:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::tRestore:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned ARMInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::STR:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isImm() &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::FSTD:
+ case ARM::FSTS:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::tSpill:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+void ARMInstrInfo::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg,
+ const MachineInstr *Orig) const {
+ DebugLoc dl = Orig->getDebugLoc();
+ if (Orig->getOpcode() == ARM::MOVi2pieces) {
+ RI.emitLoadConstPool(MBB, I, DestReg, Orig->getOperand(1).getImm(),
+ Orig->getOperand(2).getImm(),
+ Orig->getOperand(3).getReg(), this, false, dl);
+ return;
+ }
+
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+ MI->getOperand(0).setReg(DestReg);
+ MBB.insert(I, MI);
+}
+
+static unsigned getUnindexedOpcode(unsigned Opc) {
+ switch (Opc) {
+ default: break;
+ case ARM::LDR_PRE:
+ case ARM::LDR_POST:
+ return ARM::LDR;
+ case ARM::LDRH_PRE:
+ case ARM::LDRH_POST:
+ return ARM::LDRH;
+ case ARM::LDRB_PRE:
+ case ARM::LDRB_POST:
+ return ARM::LDRB;
+ case ARM::LDRSH_PRE:
+ case ARM::LDRSH_POST:
+ return ARM::LDRSH;
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSB_POST:
+ return ARM::LDRSB;
+ case ARM::STR_PRE:
+ case ARM::STR_POST:
+ return ARM::STR;
+ case ARM::STRH_PRE:
+ case ARM::STRH_POST:
+ return ARM::STRH;
+ case ARM::STRB_PRE:
+ case ARM::STRB_POST:
+ return ARM::STRB;
+ }
+ return 0;
+}
+
+MachineInstr *
+ARMInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+ if (!EnableARM3Addr)
+ return NULL;
+
+ MachineInstr *MI = MBBI;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ unsigned TSFlags = MI->getDesc().TSFlags;
+ bool isPre = false;
+ switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
+ default: return NULL;
+ case ARMII::IndexModePre:
+ isPre = true;
+ break;
+ case ARMII::IndexModePost:
+ break;
+ }
+
+ // Try splitting an indexed load/store to an un-indexed one plus an add/sub
+ // operation.
+ unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
+ if (MemOpc == 0)
+ return NULL;
+
+ MachineInstr *UpdateMI = NULL;
+ MachineInstr *MemMI = NULL;
+ unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
+ const TargetInstrDesc &TID = MI->getDesc();
+ unsigned NumOps = TID.getNumOperands();
+ bool isLoad = !TID.mayStore();
+ const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
+ const MachineOperand &Base = MI->getOperand(2);
+ const MachineOperand &Offset = MI->getOperand(NumOps-3);
+ unsigned WBReg = WB.getReg();
+ unsigned BaseReg = Base.getReg();
+ unsigned OffReg = Offset.getReg();
+ unsigned OffImm = MI->getOperand(NumOps-2).getImm();
+ ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
+ switch (AddrMode) {
+ default:
+ assert(false && "Unknown indexed op!");
+ return NULL;
+ case ARMII::AddrMode2: {
+ bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
+ unsigned Amt = ARM_AM::getAM2Offset(OffImm);
+ if (OffReg == 0) {
+ int SOImmVal = ARM_AM::getSOImmVal(Amt);
+ if (SOImmVal == -1)
+ // Can't encode it in a so_imm operand. This transformation will
+ // add more than 1 instruction. Abandon!
+ return NULL;
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+ .addReg(BaseReg).addImm(SOImmVal)
+ .addImm(Pred).addReg(0).addReg(0);
+ } else if (Amt != 0) {
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
+ unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg)
+ .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
+ .addImm(Pred).addReg(0).addReg(0);
+ } else
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+ .addReg(BaseReg).addReg(OffReg)
+ .addImm(Pred).addReg(0).addReg(0);
+ break;
+ }
+ case ARMII::AddrMode3 : {
+ bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
+ unsigned Amt = ARM_AM::getAM3Offset(OffImm);
+ if (OffReg == 0)
+ // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+ .addReg(BaseReg).addImm(Amt)
+ .addImm(Pred).addReg(0).addReg(0);
+ else
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+ .addReg(BaseReg).addReg(OffReg)
+ .addImm(Pred).addReg(0).addReg(0);
+ break;
+ }
+ }
+
+ std::vector<MachineInstr*> NewMIs;
+ if (isPre) {
+ if (isLoad)
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc), MI->getOperand(0).getReg())
+ .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
+ else
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc)).addReg(MI->getOperand(1).getReg())
+ .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
+ NewMIs.push_back(MemMI);
+ NewMIs.push_back(UpdateMI);
+ } else {
+ if (isLoad)
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc), MI->getOperand(0).getReg())
+ .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
+ else
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc)).addReg(MI->getOperand(1).getReg())
+ .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
+ if (WB.isDead())
+ UpdateMI->getOperand(0).setIsDead();
+ NewMIs.push_back(UpdateMI);
+ NewMIs.push_back(MemMI);
+ }
+
+ // Transfer LiveVariables states, kill / dead info.
+ if (LV) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+
+ LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
+ if (MO.isDef()) {
+ MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
+ if (MO.isDead())
+ LV->addVirtualRegisterDead(Reg, NewMI);
+ }
+ if (MO.isUse() && MO.isKill()) {
+ for (unsigned j = 0; j < 2; ++j) {
+ // Look at the two new MI's in reverse order.
+ MachineInstr *NewMI = NewMIs[j];
+ if (!NewMI->readsRegister(Reg))
+ continue;
+ LV->addVirtualRegisterKilled(Reg, NewMI);
+ if (VI.removeKill(MI))
+ VI.Kills.push_back(NewMI);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ MFI->insert(MBBI, NewMIs[1]);
+ MFI->insert(MBBI, NewMIs[0]);
+ return NewMIs[0];
+}
+
+// Branch analysis.
+bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ unsigned LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastOpc == ARM::B || LastOpc == ARM::tB) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(LastInst->getOperand(1));
+ Cond.push_back(LastInst->getOperand(2));
+ return false;
+ }
+ return true; // Can't handle indirect branch.
+ }
+
+ // Get the instruction before it if it is a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with ARM::B/ARM::tB and a ARM::Bcc/ARM::tBcc, handle it.
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+ if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) ||
+ (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB)) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(1));
+ Cond.push_back(SecondLastInst->getOperand(2));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it. The second
+ // one is not executed, so remove it.
+ if ((SecondLastOpc == ARM::B || SecondLastOpc==ARM::tB) &&
+ (LastOpc == ARM::B || LastOpc == ARM::tB)) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // ...likewise if it ends with a branch table followed by an unconditional
+ // branch. The branch folder can create these, and we must get rid of them for
+ // correctness of Thumb constant islands.
+ if ((SecondLastOpc == ARM::BR_JTr || SecondLastOpc==ARM::BR_JTm ||
+ SecondLastOpc == ARM::BR_JTadd || SecondLastOpc==ARM::tBR_JTr) &&
+ (LastOpc == ARM::B || LastOpc == ARM::tB)) {
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return true;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+
+unsigned ARMInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int BOpc = AFI->isThumbFunction() ? ARM::tB : ARM::B;
+ int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc;
+
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (I->getOpcode() != BccOpc)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+unsigned
+ARMInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const {
+ // FIXME this should probably have a DebugLoc argument
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int BOpc = AFI->isThumbFunction() ? ARM::tB : ARM::B;
+ int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc;
+
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "ARM branch conditions have two components!");
+
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch?
+ BuildMI(&MBB, dl, get(BOpc)).addMBB(TBB);
+ else
+ BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB)
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+ return 1;
+ }
+
+ // Two-way conditional branch.
+ BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB)
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+ BuildMI(&MBB, dl, get(BOpc)).addMBB(FBB);
+ return 2;
+}
+
+bool ARMInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (!AFI->isThumbFunction()) {
+ if (DestRC == ARM::GPRRegisterClass) {
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
+ .addReg(SrcReg)));
+ return true;
+ }
+ } else {
+ if (DestRC == ARM::GPRRegisterClass) {
+ if (SrcRC == ARM::GPRRegisterClass) {
+ BuildMI(MBB, I, DL, get(ARM::tMOVhir2hir), DestReg).addReg(SrcReg);
+ return true;
+ } else if (SrcRC == ARM::tGPRRegisterClass) {
+ BuildMI(MBB, I, DL, get(ARM::tMOVlor2hir), DestReg).addReg(SrcReg);
+ return true;
+ }
+ } else if (DestRC == ARM::tGPRRegisterClass) {
+ if (SrcRC == ARM::GPRRegisterClass) {
+ BuildMI(MBB, I, DL, get(ARM::tMOVhir2lor), DestReg).addReg(SrcReg);
+ return true;
+ } else if (SrcRC == ARM::tGPRRegisterClass) {
+ BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg);
+ return true;
+ }
+ }
+ }
+ if (DestRC != SrcRC) {
+ // Not yet supported!
+ return false;
+ }
+
+
+ if (DestRC == ARM::SPRRegisterClass)
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg)
+ .addReg(SrcReg));
+ else if (DestRC == ARM::DPRRegisterClass)
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg)
+ .addReg(SrcReg));
+ else
+ return false;
+
+ return true;
+}
+
+void ARMInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (RC == ARM::GPRRegisterClass) {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ assert (!AFI->isThumbFunction());
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addReg(0).addImm(0));
+ } else if (RC == ARM::tGPRRegisterClass) {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ assert (AFI->isThumbFunction());
+ BuildMI(MBB, I, DL, get(ARM::tSpill))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0);
+ } else if (RC == ARM::DPRRegisterClass) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTD))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0));
+ } else {
+ assert(RC == ARM::SPRRegisterClass && "Unknown regclass!");
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTS))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0));
+ }
+}
+
+void ARMInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+ bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const{
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ unsigned Opc = 0;
+ if (RC == ARM::GPRRegisterClass) {
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (AFI->isThumbFunction()) {
+ Opc = Addr[0].isFI() ? ARM::tSpill : ARM::tSTR;
+ MachineInstrBuilder MIB =
+ BuildMI(MF, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill));
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+ return;
+ }
+ Opc = ARM::STR;
+ } else if (RC == ARM::DPRRegisterClass) {
+ Opc = ARM::FSTD;
+ } else {
+ assert(RC == ARM::SPRRegisterClass && "Unknown regclass!");
+ Opc = ARM::FSTS;
+ }
+
+ MachineInstrBuilder MIB =
+ BuildMI(MF, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill));
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ AddDefaultPred(MIB);
+ NewMIs.push_back(MIB);
+ return;
+}
+
+void ARMInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (RC == ARM::GPRRegisterClass) {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ assert (!AFI->isThumbFunction());
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
+ .addFrameIndex(FI).addReg(0).addImm(0));
+ } else if (RC == ARM::tGPRRegisterClass) {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ assert (AFI->isThumbFunction());
+ BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg)
+ .addFrameIndex(FI).addImm(0);
+ } else if (RC == ARM::DPRRegisterClass) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDD), DestReg)
+ .addFrameIndex(FI).addImm(0));
+ } else {
+ assert(RC == ARM::SPRRegisterClass && "Unknown regclass!");
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDS), DestReg)
+ .addFrameIndex(FI).addImm(0));
+ }
+}
+
+void ARMInstrInfo::
+loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ unsigned Opc = 0;
+ if (RC == ARM::GPRRegisterClass) {
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (AFI->isThumbFunction()) {
+ Opc = Addr[0].isFI() ? ARM::tRestore : ARM::tLDR;
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+ return;
+ }
+ Opc = ARM::LDR;
+ } else if (RC == ARM::DPRRegisterClass) {
+ Opc = ARM::FLDD;
+ } else {
+ assert(RC == ARM::SPRRegisterClass && "Unknown regclass!");
+ Opc = ARM::FLDS;
+ }
+
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ AddDefaultPred(MIB);
+ NewMIs.push_back(MIB);
+ return;
+}
+
+bool ARMInstrInfo::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (!AFI->isThumbFunction() || CSI.empty())
+ return false;
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, get(ARM::tPUSH));
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ MIB.addReg(Reg, RegState::Kill);
+ }
+ return true;
+}
+
+bool ARMInstrInfo::
+restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (!AFI->isThumbFunction() || CSI.empty())
+ return false;
+
+ bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+ MachineInstr *PopMI = MF.CreateMachineInstr(get(ARM::tPOP),MI->getDebugLoc());
+ MBB.insert(MI, PopMI);
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (Reg == ARM::LR) {
+ // Special epilogue for vararg functions. See emitEpilogue
+ if (isVarArg)
+ continue;
+ Reg = ARM::PC;
+ PopMI->setDesc(get(ARM::tPOP_RET));
+ MBB.erase(MI);
+ }
+ PopMI->addOperand(MachineOperand::CreateReg(Reg, true));
+ }
+ return true;
+}
+
+MachineInstr *ARMInstrInfo::
+foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops, int FI) const {
+ if (Ops.size() != 1) return NULL;
+
+ unsigned OpNum = Ops[0];
+ unsigned Opc = MI->getOpcode();
+ MachineInstr *NewMI = NULL;
+ switch (Opc) {
+ default: break;
+ case ARM::MOVr: {
+ if (MI->getOperand(4).getReg() == ARM::CPSR)
+ // If it is updating CPSR, then it cannot be folded.
+ break;
+ unsigned Pred = MI->getOperand(2).getImm();
+ unsigned PredReg = MI->getOperand(3).getReg();
+ if (OpNum == 0) { // move -> store
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::STR))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
+ } else { // move -> load
+ unsigned DstReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::LDR))
+ .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+ .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
+ }
+ break;
+ }
+ case ARM::tMOVr:
+ case ARM::tMOVlor2hir:
+ case ARM::tMOVhir2lor:
+ case ARM::tMOVhir2hir: {
+ if (OpNum == 0) { // move -> store
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ if (RI.isPhysicalRegister(SrcReg) && !RI.isLowRegister(SrcReg))
+ // tSpill cannot take a high register operand.
+ break;
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0);
+ } else { // move -> load
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (RI.isPhysicalRegister(DstReg) && !RI.isLowRegister(DstReg))
+ // tRestore cannot target a high register operand.
+ break;
+ bool isDead = MI->getOperand(0).isDead();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore))
+ .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+ .addFrameIndex(FI).addImm(0);
+ }
+ break;
+ }
+ case ARM::FCPYS: {
+ unsigned Pred = MI->getOperand(2).getImm();
+ unsigned PredReg = MI->getOperand(3).getReg();
+ if (OpNum == 0) { // move -> store
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTS))
+ .addReg(SrcReg).addFrameIndex(FI)
+ .addImm(0).addImm(Pred).addReg(PredReg);
+ } else { // move -> load
+ unsigned DstReg = MI->getOperand(0).getReg();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDS), DstReg)
+ .addFrameIndex(FI)
+ .addImm(0).addImm(Pred).addReg(PredReg);
+ }
+ break;
+ }
+ case ARM::FCPYD: {
+ unsigned Pred = MI->getOperand(2).getImm();
+ unsigned PredReg = MI->getOperand(3).getReg();
+ if (OpNum == 0) { // move -> store
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTD))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
+ } else { // move -> load
+ unsigned DstReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDD))
+ .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+ .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
+ }
+ break;
+ }
+ }
+
+ return NewMI;
+}
+
+bool ARMInstrInfo::
+canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const {
+ if (Ops.size() != 1) return false;
+
+ unsigned OpNum = Ops[0];
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default: break;
+ case ARM::MOVr:
+ // If it is updating CPSR, then it cannot be folded.
+ return MI->getOperand(4).getReg() != ARM::CPSR;
+ case ARM::tMOVr:
+ case ARM::tMOVlor2hir:
+ case ARM::tMOVhir2lor:
+ case ARM::tMOVhir2hir: {
+ if (OpNum == 0) { // move -> store
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ if (RI.isPhysicalRegister(SrcReg) && !RI.isLowRegister(SrcReg))
+ // tSpill cannot take a high register operand.
+ return false;
+ } else { // move -> load
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (RI.isPhysicalRegister(DstReg) && !RI.isLowRegister(DstReg))
+ // tRestore cannot target a high register operand.
+ return false;
+ }
+ return true;
+ }
+ case ARM::FCPYS:
+ case ARM::FCPYD:
+ return true;
+ }
+
+ return false;
+}
+
+bool ARMInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
+ if (MBB.empty()) return false;
+
+ switch (MBB.back().getOpcode()) {
+ case ARM::BX_RET: // Return.
+ case ARM::LDM_RET:
+ case ARM::tBX_RET:
+ case ARM::tBX_RET_vararg:
+ case ARM::tPOP_RET:
+ case ARM::B:
+ case ARM::tB: // Uncond branch.
+ case ARM::tBR_JTr:
+ case ARM::BR_JTr: // Jumptable branch.
+ case ARM::BR_JTm: // Jumptable branch through mem.
+ case ARM::BR_JTadd: // Jumptable branch add to pc.
+ return true;
+ default: return false;
+ }
+}
+
+bool ARMInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
+ Cond[0].setImm(ARMCC::getOppositeCondition(CC));
+ return false;
+}
+
+bool ARMInstrInfo::isPredicated(const MachineInstr *MI) const {
+ int PIdx = MI->findFirstPredOperandIdx();
+ return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
+}
+
+bool ARMInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ unsigned Opc = MI->getOpcode();
+ if (Opc == ARM::B || Opc == ARM::tB) {
+ MI->setDesc(get(Opc == ARM::B ? ARM::Bcc : ARM::tBcc));
+ MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm()));
+ MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false));
+ return true;
+ }
+
+ int PIdx = MI->findFirstPredOperandIdx();
+ if (PIdx != -1) {
+ MachineOperand &PMO = MI->getOperand(PIdx);
+ PMO.setImm(Pred[0].getImm());
+ MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
+ return true;
+ }
+ return false;
+}
+
+bool ARMInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const {
+ if (Pred1.size() > 2 || Pred2.size() > 2)
+ return false;
+
+ ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
+ ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
+ if (CC1 == CC2)
+ return true;
+
+ switch (CC1) {
+ default:
+ return false;
+ case ARMCC::AL:
+ return true;
+ case ARMCC::HS:
+ return CC2 == ARMCC::HI;
+ case ARMCC::LS:
+ return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
+ case ARMCC::GE:
+ return CC2 == ARMCC::GT;
+ case ARMCC::LE:
+ return CC2 == ARMCC::LT;
+ }
+}
+
+bool ARMInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.getImplicitDefs() && !TID.hasOptionalDef())
+ return false;
+
+ bool Found = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == ARM::CPSR) {
+ Pred.push_back(MO);
+ Found = true;
+ }
+ }
+
+ return Found;
+}
+
+
+/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+ unsigned JTI) DISABLE_INLINE;
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+ unsigned JTI) {
+ return JT[JTI].MBBs.size();
+}
+
+/// GetInstSize - Return the size of the specified MachineInstr.
+///
+unsigned ARMInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ const MachineBasicBlock &MBB = *MI->getParent();
+ const MachineFunction *MF = MBB.getParent();
+ const TargetAsmInfo *TAI = MF->getTarget().getTargetAsmInfo();
+
+ // Basic size info comes from the TSFlags field.
+ const TargetInstrDesc &TID = MI->getDesc();
+ unsigned TSFlags = TID.TSFlags;
+
+ switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
+ default: {
+ // If this machine instr is an inline asm, measure it.
+ if (MI->getOpcode() == ARM::INLINEASM)
+ return TAI->getInlineAsmLength(MI->getOperand(0).getSymbolName());
+ if (MI->isLabel())
+ return 0;
+ switch (MI->getOpcode()) {
+ default:
+ assert(0 && "Unknown or unset size field for instr!");
+ break;
+ case TargetInstrInfo::IMPLICIT_DEF:
+ case TargetInstrInfo::DECLARE:
+ case TargetInstrInfo::DBG_LABEL:
+ case TargetInstrInfo::EH_LABEL:
+ return 0;
+ }
+ break;
+ }
+ case ARMII::Size8Bytes: return 8; // Arm instruction x 2.
+ case ARMII::Size4Bytes: return 4; // Arm instruction.
+ case ARMII::Size2Bytes: return 2; // Thumb instruction.
+ case ARMII::SizeSpecial: {
+ switch (MI->getOpcode()) {
+ case ARM::CONSTPOOL_ENTRY:
+ // If this machine instr is a constant pool entry, its size is recorded as
+ // operand #2.
+ return MI->getOperand(2).getImm();
+ case ARM::Int_eh_sjlj_setjmp: return 12;
+ case ARM::BR_JTr:
+ case ARM::BR_JTm:
+ case ARM::BR_JTadd:
+ case ARM::tBR_JTr: {
+ // These are jumptable branches, i.e. a branch followed by an inlined
+ // jumptable. The size is 4 + 4 * number of entries.
+ unsigned NumOps = TID.getNumOperands();
+ MachineOperand JTOP =
+ MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2));
+ unsigned JTI = JTOP.getIndex();
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ assert(JTI < JT.size());
+ // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
+ // 4 aligned. The assembler / linker may add 2 byte padding just before
+ // the JT entries. The size does not include this padding; the
+ // constant islands pass does separate bookkeeping for it.
+ // FIXME: If we know the size of the function is less than (1 << 16) *2
+ // bytes, we can use 16-bit entries instead. Then there won't be an
+ // alignment issue.
+ return getNumJTEntries(JT, JTI) * 4 +
+ (MI->getOpcode()==ARM::tBR_JTr ? 2 : 4);
+ }
+ default:
+ // Otherwise, pseudo-instruction sizes are zero.
+ return 0;
+ }
+ }
+ }
+ return 0; // Not reached
+}
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
new file mode 100644
index 0000000..13ff3fe
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -0,0 +1,258 @@
+//===- ARMInstrInfo.h - ARM Instruction Information -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMINSTRUCTIONINFO_H
+#define ARMINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARM.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+/// ARMII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace ARMII {
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction Flags.
+
+ //===------------------------------------------------------------------===//
+ // This four-bit field describes the addressing mode used.
+
+ AddrModeMask = 0xf,
+ AddrModeNone = 0,
+ AddrMode1 = 1,
+ AddrMode2 = 2,
+ AddrMode3 = 3,
+ AddrMode4 = 4,
+ AddrMode5 = 5,
+ AddrModeT1 = 6,
+ AddrModeT2 = 7,
+ AddrModeT4 = 8,
+ AddrModeTs = 9, // i8 * 4 for pc and sp relative data
+
+ // Size* - Flags to keep track of the size of an instruction.
+ SizeShift = 4,
+ SizeMask = 7 << SizeShift,
+ SizeSpecial = 1, // 0 byte pseudo or special case.
+ Size8Bytes = 2,
+ Size4Bytes = 3,
+ Size2Bytes = 4,
+
+ // IndexMode - Unindex, pre-indexed, or post-indexed. Only valid for load
+ // and store ops
+ IndexModeShift = 7,
+ IndexModeMask = 3 << IndexModeShift,
+ IndexModePre = 1,
+ IndexModePost = 2,
+
+ //===------------------------------------------------------------------===//
+ // Misc flags.
+
+ // UnaryDP - Indicates this is a unary data processing instruction, i.e.
+ // it doesn't have a Rn operand.
+ UnaryDP = 1 << 9,
+
+ //===------------------------------------------------------------------===//
+ // Instruction encoding formats.
+ //
+ FormShift = 10,
+ FormMask = 0x1f << FormShift,
+
+ // Pseudo instructions
+ Pseudo = 0 << FormShift,
+
+ // Multiply instructions
+ MulFrm = 1 << FormShift,
+
+ // Branch instructions
+ BrFrm = 2 << FormShift,
+ BrMiscFrm = 3 << FormShift,
+
+ // Data Processing instructions
+ DPFrm = 4 << FormShift,
+ DPSoRegFrm = 5 << FormShift,
+
+ // Load and Store
+ LdFrm = 6 << FormShift,
+ StFrm = 7 << FormShift,
+ LdMiscFrm = 8 << FormShift,
+ StMiscFrm = 9 << FormShift,
+ LdStMulFrm = 10 << FormShift,
+
+ // Miscellaneous arithmetic instructions
+ ArithMiscFrm = 11 << FormShift,
+
+ // Extend instructions
+ ExtFrm = 12 << FormShift,
+
+ // VFP formats
+ VFPUnaryFrm = 13 << FormShift,
+ VFPBinaryFrm = 14 << FormShift,
+ VFPConv1Frm = 15 << FormShift,
+ VFPConv2Frm = 16 << FormShift,
+ VFPConv3Frm = 17 << FormShift,
+ VFPConv4Frm = 18 << FormShift,
+ VFPConv5Frm = 19 << FormShift,
+ VFPLdStFrm = 20 << FormShift,
+ VFPLdStMulFrm = 21 << FormShift,
+ VFPMiscFrm = 22 << FormShift,
+
+ // Thumb format
+ ThumbFrm = 23 << FormShift,
+
+ //===------------------------------------------------------------------===//
+ // Field shifts - such shifts are used to set field while generating
+ // machine instructions.
+ M_BitShift = 5,
+ ShiftImmShift = 5,
+ ShiftShift = 7,
+ N_BitShift = 7,
+ ImmHiShift = 8,
+ SoRotImmShift = 8,
+ RegRsShift = 8,
+ ExtRotImmShift = 10,
+ RegRdLoShift = 12,
+ RegRdShift = 12,
+ RegRdHiShift = 16,
+ RegRnShift = 16,
+ S_BitShift = 20,
+ W_BitShift = 21,
+ AM3_I_BitShift = 22,
+ D_BitShift = 22,
+ U_BitShift = 23,
+ P_BitShift = 24,
+ I_BitShift = 25,
+ CondShift = 28
+ };
+}
+
+class ARMInstrInfo : public TargetInstrInfoImpl {
+ const ARMRegisterInfo RI;
+public:
+ explicit ARMInstrInfo(const ARMSubtarget &STI);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const ARMRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// Return true if the instruction is a register to register move and return
+ /// the source and dest operands and their sub-register indices by reference.
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned DestReg, const MachineInstr *Orig) const;
+
+ virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
+ // Branch analysis.
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+ virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const;
+ virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+
+ virtual bool canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const;
+
+ virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ // Predication support.
+ virtual bool isPredicated(const MachineInstr *MI) const;
+
+ ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
+ int PIdx = MI->findFirstPredOperandIdx();
+ return PIdx != -1 ? (ARMCC::CondCodes)MI->getOperand(PIdx).getImm()
+ : ARMCC::AL;
+ }
+
+ virtual
+ bool PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const;
+
+ virtual
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+ virtual bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+
+ /// GetInstSize - Returns the size of the specified MachineInstr.
+ ///
+ virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
new file mode 100644
index 0000000..680e772
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -0,0 +1,1390 @@
+//===- ARMInstrInfo.td - Target Description for ARM Target -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM specific DAG Nodes.
+//
+
+// Type profiles.
+def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_ARMCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>;
+
+def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>;
+
+def SDT_ARMcall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
+
+def SDT_ARMCMov : SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisVT<3, i32>]>;
+
+def SDT_ARMBrcond : SDTypeProfile<0, 2,
+ [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
+
+def SDT_ARMBrJT : SDTypeProfile<0, 3,
+ [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+
+def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+
+def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
+
+def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
+def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>;
+
+// Node definitions.
+def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
+def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>;
+
+def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,
+ [SDNPHasChain, SDNPOutFlag]>;
+def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def ARMcall_pred : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
+ [SDNPInFlag]>;
+def ARMcneg : SDNode<"ARMISD::CNEG", SDT_ARMCMov,
+ [SDNPInFlag]>;
+
+def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+
+def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
+ [SDNPHasChain]>;
+
+def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
+ [SDNPOutFlag]>;
+
+def ARMcmpNZ : SDNode<"ARMISD::CMPNZ", SDT_ARMCmp,
+ [SDNPOutFlag]>;
+
+def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
+
+def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>;
+def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>;
+def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInFlag ]>;
+
+def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
+def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>;
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction Predicate Definitions.
+//
+def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
+def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">;
+def HasV6 : Predicate<"Subtarget->hasV6Ops()">;
+def IsThumb : Predicate<"Subtarget->isThumb()">;
+def IsThumb2 : Predicate<"Subtarget->isThumb2()">;
+def IsARM : Predicate<"!Subtarget->isThumb()">;
+
+//===----------------------------------------------------------------------===//
+// ARM Flag Definitions.
+
+class RegConstraint<string C> {
+ string Constraints = C;
+}
+
+//===----------------------------------------------------------------------===//
+// ARM specific transformation functions and pattern fragments.
+//
+
+// so_imm_XFORM - Return a so_imm value packed into the format described for
+// so_imm def below.
+def so_imm_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(N->getZExtValue()),
+ MVT::i32);
+}]>;
+
+// so_imm_neg_XFORM - Return a so_imm value packed into the format described for
+// so_imm_neg def below.
+def so_imm_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(-(int)N->getZExtValue()),
+ MVT::i32);
+}]>;
+
+// so_imm_not_XFORM - Return a so_imm value packed into the format described for
+// so_imm_not def below.
+def so_imm_not_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(~(int)N->getZExtValue()),
+ MVT::i32);
+}]>;
+
+// rot_imm predicate - True if the 32-bit immediate is equal to 8, 16, or 24.
+def rot_imm : PatLeaf<(i32 imm), [{
+ int32_t v = (int32_t)N->getZExtValue();
+ return v == 8 || v == 16 || v == 24;
+}]>;
+
+/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15].
+def imm1_15 : PatLeaf<(i32 imm), [{
+ return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 16;
+}]>;
+
+/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
+def imm16_31 : PatLeaf<(i32 imm), [{
+ return (int32_t)N->getZExtValue() >= 16 && (int32_t)N->getZExtValue() < 32;
+}]>;
+
+def so_imm_neg :
+ PatLeaf<(imm), [{
+ return ARM_AM::getSOImmVal(-(int)N->getZExtValue()) != -1;
+ }], so_imm_neg_XFORM>;
+
+def so_imm_not :
+ PatLeaf<(imm), [{
+ return ARM_AM::getSOImmVal(~(int)N->getZExtValue()) != -1;
+ }], so_imm_not_XFORM>;
+
+// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
+def sext_16_node : PatLeaf<(i32 GPR:$a), [{
+ return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17;
+}]>;
+
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+// Branch target.
+def brtarget : Operand<OtherVT>;
+
+// A list of registers separated by comma. Used by load/store multiple.
+def reglist : Operand<i32> {
+ let PrintMethod = "printRegisterList";
+}
+
+// An operand for the CONSTPOOL_ENTRY pseudo-instruction.
+def cpinst_operand : Operand<i32> {
+ let PrintMethod = "printCPInstOperand";
+}
+
+def jtblock_operand : Operand<i32> {
+ let PrintMethod = "printJTBlockOperand";
+}
+
+// Local PC labels.
+def pclabel : Operand<i32> {
+ let PrintMethod = "printPCLabel";
+}
+
+// shifter_operand operands: so_reg and so_imm.
+def so_reg : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 3, "SelectShifterOperandReg",
+ [shl,srl,sra,rotr]> {
+ let PrintMethod = "printSORegOperand";
+ let MIOperandInfo = (ops GPR, GPR, i32imm);
+}
+
+// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
+// 8-bit immediate rotated by an arbitrary number of bits. so_imm values are
+// represented in the imm field in the same 12-bit form that they are encoded
+// into so_imm instructions: the 8-bit immediate is the least significant bits
+// [bits 0-7], the 4-bit shift amount is the next 4 bits [bits 8-11].
+def so_imm : Operand<i32>,
+ PatLeaf<(imm),
+ [{ return ARM_AM::getSOImmVal(N->getZExtValue()) != -1; }],
+ so_imm_XFORM> {
+ let PrintMethod = "printSOImmOperand";
+}
+
+// Break so_imm's up into two pieces. This handles immediates with up to 16
+// bits set in them. This uses so_imm2part to match and so_imm2part_[12] to
+// get the first/second pieces.
+def so_imm2part : Operand<i32>,
+ PatLeaf<(imm), [{
+ return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
+ }]> {
+ let PrintMethod = "printSOImm2PartOperand";
+}
+
+def so_imm2part_1 : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getSOImmTwoPartFirst((unsigned)N->getZExtValue());
+ return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(V), MVT::i32);
+}]>;
+
+def so_imm2part_2 : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getSOImmTwoPartSecond((unsigned)N->getZExtValue());
+ return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(V), MVT::i32);
+}]>;
+
+
+// Define ARM specific addressing modes.
+
+// addrmode2 := reg +/- reg shop imm
+// addrmode2 := reg +/- imm12
+//
+def addrmode2 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectAddrMode2", []> {
+ let PrintMethod = "printAddrMode2Operand";
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+def am2offset : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode2Offset", []> {
+ let PrintMethod = "printAddrMode2OffsetOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addrmode3 := reg +/- reg
+// addrmode3 := reg +/- imm8
+//
+def addrmode3 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectAddrMode3", []> {
+ let PrintMethod = "printAddrMode3Operand";
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+def am3offset : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode3Offset", []> {
+ let PrintMethod = "printAddrMode3OffsetOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addrmode4 := reg, <mode|W>
+//
+def addrmode4 : Operand<i32>,
+ ComplexPattern<i32, 2, "", []> {
+ let PrintMethod = "printAddrMode4Operand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addrmode5 := reg +/- imm8*4
+//
+def addrmode5 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode5", []> {
+ let PrintMethod = "printAddrMode5Operand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addrmodepc := pc + reg
+//
+def addrmodepc : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrModePC", []> {
+ let PrintMethod = "printAddrModePCOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// ARM Predicate operand. Default to 14 = always (AL). Second part is CC
+// register whose default is 0 (no register).
+def pred : PredicateOperand<OtherVT, (ops i32imm, CCR),
+ (ops (i32 14), (i32 zero_reg))> {
+ let PrintMethod = "printPredicateOperand";
+}
+
+// Conditional code result for instructions whose 's' bit is set, e.g. subs.
+//
+def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> {
+ let PrintMethod = "printSBitModifierOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction flags. These need to match ARMInstrInfo.h.
+//
+
+// Addressing mode.
+class AddrMode<bits<4> val> {
+ bits<4> Value = val;
+}
+def AddrModeNone : AddrMode<0>;
+def AddrMode1 : AddrMode<1>;
+def AddrMode2 : AddrMode<2>;
+def AddrMode3 : AddrMode<3>;
+def AddrMode4 : AddrMode<4>;
+def AddrMode5 : AddrMode<5>;
+def AddrModeT1 : AddrMode<6>;
+def AddrModeT2 : AddrMode<7>;
+def AddrModeT4 : AddrMode<8>;
+def AddrModeTs : AddrMode<9>;
+
+// Instruction size.
+class SizeFlagVal<bits<3> val> {
+ bits<3> Value = val;
+}
+def SizeInvalid : SizeFlagVal<0>; // Unset.
+def SizeSpecial : SizeFlagVal<1>; // Pseudo or special.
+def Size8Bytes : SizeFlagVal<2>;
+def Size4Bytes : SizeFlagVal<3>;
+def Size2Bytes : SizeFlagVal<4>;
+
+// Load / store index mode.
+class IndexMode<bits<2> val> {
+ bits<2> Value = val;
+}
+def IndexModeNone : IndexMode<0>;
+def IndexModePre : IndexMode<1>;
+def IndexModePost : IndexMode<2>;
+
+//===----------------------------------------------------------------------===//
+
+include "ARMInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Multiclass helpers...
+//
+
+/// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a
+/// binop that produces a value.
+multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode> {
+ def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
+ opc, " $dst, $a, $b",
+ [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>;
+ def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
+ opc, " $dst, $a, $b",
+ [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>;
+ def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
+ opc, " $dst, $a, $b",
+ [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>;
+}
+
+/// ASI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the
+/// instruction modifies the CSPR register.
+let Defs = [CPSR] in {
+multiclass ASI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode> {
+ def ri : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
+ opc, "s $dst, $a, $b",
+ [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>;
+ def rr : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
+ opc, "s $dst, $a, $b",
+ [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>;
+ def rs : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
+ opc, "s $dst, $a, $b",
+ [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>;
+}
+}
+
+/// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
+/// patterns. Similar to AsI1_bin_irs except the instruction does not produce
+/// a explicit result, only implicitly set CPSR.
+let Defs = [CPSR] in {
+multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> {
+ def ri : AI1<opcod, (outs), (ins GPR:$a, so_imm:$b), DPFrm,
+ opc, " $a, $b",
+ [(opnode GPR:$a, so_imm:$b)]>;
+ def rr : AI1<opcod, (outs), (ins GPR:$a, GPR:$b), DPFrm,
+ opc, " $a, $b",
+ [(opnode GPR:$a, GPR:$b)]>;
+ def rs : AI1<opcod, (outs), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
+ opc, " $a, $b",
+ [(opnode GPR:$a, so_reg:$b)]>;
+}
+}
+
+/// AI_unary_rrot - A unary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+/// FIXME: Remove the 'r' variant. Its rot_imm is zero.
+multiclass AI_unary_rrot<bits<8> opcod, string opc, PatFrag opnode> {
+ def r : AExtI<opcod, (outs GPR:$dst), (ins GPR:$Src),
+ opc, " $dst, $Src",
+ [(set GPR:$dst, (opnode GPR:$Src))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{19-16} = 0b1111;
+ }
+ def r_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$Src, i32imm:$rot),
+ opc, " $dst, $Src, ror $rot",
+ [(set GPR:$dst, (opnode (rotr GPR:$Src, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{19-16} = 0b1111;
+ }
+}
+
+/// AI_bin_rrot - A binary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass AI_bin_rrot<bits<8> opcod, string opc, PatFrag opnode> {
+ def rr : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS),
+ opc, " $dst, $LHS, $RHS",
+ [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>,
+ Requires<[IsARM, HasV6]>;
+ def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
+ opc, " $dst, $LHS, $RHS, ror $rot",
+ [(set GPR:$dst, (opnode GPR:$LHS,
+ (rotr GPR:$RHS, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]>;
+}
+
+/// AsXI1_bin_c_irs - Same as AsI1_bin_irs but without the predicate operand and
+/// setting carry bit. But it can optionally set CPSR.
+let Uses = [CPSR] in {
+multiclass AsXI1_bin_c_irs<bits<4> opcod, string opc, PatFrag opnode> {
+ def ri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b, cc_out:$s),
+ DPFrm, !strconcat(opc, "${s} $dst, $a, $b"),
+ [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>;
+ def rr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b, cc_out:$s),
+ DPFrm, !strconcat(opc, "${s} $dst, $a, $b"),
+ [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>;
+ def rs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b, cc_out:$s),
+ DPSoRegFrm, !strconcat(opc, "${s} $dst, $a, $b"),
+ [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+/// CONSTPOOL_ENTRY - This instruction represents a floating constant pool in
+/// the function. The first operand is the ID# for this instruction, the second
+/// is the index into the MachineConstantPool that this is, the third is the
+/// size in bytes of this constant pool entry.
+let isNotDuplicable = 1 in
+def CONSTPOOL_ENTRY :
+PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+ i32imm:$size),
+ "${instid:label} ${cpidx:cpentry}", []>;
+
+let Defs = [SP], Uses = [SP] in {
+def ADJCALLSTACKUP :
+PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p),
+ "@ ADJCALLSTACKUP $amt1",
+ [(ARMcallseq_end timm:$amt1, timm:$amt2)]>;
+
+def ADJCALLSTACKDOWN :
+PseudoInst<(outs), (ins i32imm:$amt, pred:$p),
+ "@ ADJCALLSTACKDOWN $amt",
+ [(ARMcallseq_start timm:$amt)]>;
+}
+
+def DWARF_LOC :
+PseudoInst<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file),
+ ".loc $file, $line, $col",
+ [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>;
+
+
+// Address computation and loads and stores in PIC mode.
+let isNotDuplicable = 1 in {
+def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
+ Pseudo, "$cp:\n\tadd$p $dst, pc, $a",
+ [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
+
+let AddedComplexity = 10 in {
+let canFoldAsLoad = 1 in
+def PICLDR : AXI2ldw<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+ Pseudo, "${addr:label}:\n\tldr$p $dst, $addr",
+ [(set GPR:$dst, (load addrmodepc:$addr))]>;
+
+def PICLDRH : AXI3ldh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+ Pseudo, "${addr:label}:\n\tldr${p}h $dst, $addr",
+ [(set GPR:$dst, (zextloadi16 addrmodepc:$addr))]>;
+
+def PICLDRB : AXI2ldb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+ Pseudo, "${addr:label}:\n\tldr${p}b $dst, $addr",
+ [(set GPR:$dst, (zextloadi8 addrmodepc:$addr))]>;
+
+def PICLDRSH : AXI3ldsh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+ Pseudo, "${addr:label}:\n\tldr${p}sh $dst, $addr",
+ [(set GPR:$dst, (sextloadi16 addrmodepc:$addr))]>;
+
+def PICLDRSB : AXI3ldsb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+ Pseudo, "${addr:label}:\n\tldr${p}sb $dst, $addr",
+ [(set GPR:$dst, (sextloadi8 addrmodepc:$addr))]>;
+}
+let AddedComplexity = 10 in {
+def PICSTR : AXI2stw<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ Pseudo, "${addr:label}:\n\tstr$p $src, $addr",
+ [(store GPR:$src, addrmodepc:$addr)]>;
+
+def PICSTRH : AXI3sth<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ Pseudo, "${addr:label}:\n\tstr${p}h $src, $addr",
+ [(truncstorei16 GPR:$src, addrmodepc:$addr)]>;
+
+def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ Pseudo, "${addr:label}:\n\tstr${p}b $src, $addr",
+ [(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
+}
+} // isNotDuplicable = 1
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions.
+//
+
+let isReturn = 1, isTerminator = 1 in
+ def BX_RET : AI<(outs), (ins), BrMiscFrm, "bx", " lr", [(ARMretflag)]> {
+ let Inst{7-4} = 0b0001;
+ let Inst{19-8} = 0b111111111111;
+ let Inst{27-20} = 0b00010010;
+}
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+// FIXME: $dst1 should be a def. But the extra ops must be in the end of the
+// operand list.
+// FIXME: Should pc be an implicit operand like PICADD, etc?
+let isReturn = 1, isTerminator = 1 in
+ def LDM_RET : AXI4ld<(outs),
+ (ins addrmode4:$addr, pred:$p, reglist:$dst1, variable_ops),
+ LdStMulFrm, "ldm${p}${addr:submode} $addr, $dst1",
+ []>;
+
+let isCall = 1,
+ Defs = [R0, R1, R2, R3, R12, LR,
+ D0, D1, D2, D3, D4, D5, D6, D7, CPSR] in {
+ def BL : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops),
+ "bl ${func:call}",
+ [(ARMcall tglobaladdr:$func)]>;
+
+ def BL_pred : ABI<0b1011, (outs), (ins i32imm:$func, variable_ops),
+ "bl", " ${func:call}",
+ [(ARMcall_pred tglobaladdr:$func)]>;
+
+ // ARMv5T and above
+ def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
+ "blx $func",
+ [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T]> {
+ let Inst{7-4} = 0b0011;
+ let Inst{19-8} = 0b111111111111;
+ let Inst{27-20} = 0b00010010;
+ }
+
+ let Uses = [LR] in {
+ // ARMv4T
+ def BX : ABXIx2<(outs), (ins GPR:$func, variable_ops),
+ "mov lr, pc\n\tbx $func",
+ [(ARMcall_nolink GPR:$func)]>;
+ }
+}
+
+let isBranch = 1, isTerminator = 1 in {
+ // B is "predicable" since it can be xformed into a Bcc.
+ let isBarrier = 1 in {
+ let isPredicable = 1 in
+ def B : ABXI<0b1010, (outs), (ins brtarget:$target), "b $target",
+ [(br bb:$target)]>;
+
+ let isNotDuplicable = 1, isIndirectBranch = 1 in {
+ def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id),
+ "mov pc, $target \n$jt",
+ [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> {
+ let Inst{20} = 0; // S Bit
+ let Inst{24-21} = 0b1101;
+ let Inst{27-26} = {0,0};
+ }
+ def BR_JTm : JTI<(outs),
+ (ins addrmode2:$target, jtblock_operand:$jt, i32imm:$id),
+ "ldr pc, $target \n$jt",
+ [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
+ imm:$id)]> {
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 0; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+ }
+ def BR_JTadd : JTI<(outs),
+ (ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id),
+ "add pc, $target, $idx \n$jt",
+ [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
+ imm:$id)]> {
+ let Inst{20} = 0; // S bit
+ let Inst{24-21} = 0b0100;
+ let Inst{27-26} = {0,0};
+ }
+ } // isNotDuplicable = 1, isIndirectBranch = 1
+ } // isBarrier = 1
+
+ // FIXME: should be able to write a pattern for ARMBrcond, but can't use
+ // a two-value operand where a dag node expects two operands. :(
+ def Bcc : ABI<0b1010, (outs), (ins brtarget:$target),
+ "b", " $target",
+ [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Load / store Instructions.
+//
+
+// Load
+let canFoldAsLoad = 1 in
+def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm,
+ "ldr", " $dst, $addr",
+ [(set GPR:$dst, (load addrmode2:$addr))]>;
+
+// Special LDR for loads from non-pc-relative constpools.
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
+def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm,
+ "ldr", " $dst, $addr", []>;
+
+// Loads with zero extension
+def LDRH : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
+ "ldr", "h $dst, $addr",
+ [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>;
+
+def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm,
+ "ldr", "b $dst, $addr",
+ [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>;
+
+// Loads with sign extension
+def LDRSH : AI3ldsh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
+ "ldr", "sh $dst, $addr",
+ [(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>;
+
+def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
+ "ldr", "sb $dst, $addr",
+ [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>;
+
+let mayLoad = 1 in {
+// Load doubleword
+def LDRD : AI3ldd<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
+ "ldr", "d $dst, $addr",
+ []>, Requires<[IsARM, HasV5T]>;
+
+// Indexed loads
+def LDR_PRE : AI2ldwpr<(outs GPR:$dst, GPR:$base_wb),
+ (ins addrmode2:$addr), LdFrm,
+ "ldr", " $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDR_POST : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base, am2offset:$offset), LdFrm,
+ "ldr", " $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRH_PRE : AI3ldhpr<(outs GPR:$dst, GPR:$base_wb),
+ (ins addrmode3:$addr), LdMiscFrm,
+ "ldr", "h $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRH_POST : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base,am3offset:$offset), LdMiscFrm,
+ "ldr", "h $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRB_PRE : AI2ldbpr<(outs GPR:$dst, GPR:$base_wb),
+ (ins addrmode2:$addr), LdFrm,
+ "ldr", "b $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRB_POST : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base,am2offset:$offset), LdFrm,
+ "ldr", "b $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRSH_PRE : AI3ldshpr<(outs GPR:$dst, GPR:$base_wb),
+ (ins addrmode3:$addr), LdMiscFrm,
+ "ldr", "sh $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRSH_POST: AI3ldshpo<(outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base,am3offset:$offset), LdMiscFrm,
+ "ldr", "sh $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb),
+ (ins addrmode3:$addr), LdMiscFrm,
+ "ldr", "sb $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base,am3offset:$offset), LdMiscFrm,
+ "ldr", "sb $dst, [$base], $offset", "$base = $base_wb", []>;
+}
+
+// Store
+def STR : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm,
+ "str", " $src, $addr",
+ [(store GPR:$src, addrmode2:$addr)]>;
+
+// Stores with truncate
+def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm,
+ "str", "h $src, $addr",
+ [(truncstorei16 GPR:$src, addrmode3:$addr)]>;
+
+def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm,
+ "str", "b $src, $addr",
+ [(truncstorei8 GPR:$src, addrmode2:$addr)]>;
+
+// Store doubleword
+let mayStore = 1 in
+def STRD : AI3std<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm,
+ "str", "d $src, $addr",
+ []>, Requires<[IsARM, HasV5T]>;
+
+// Indexed stores
+def STR_PRE : AI2stwpr<(outs GPR:$base_wb),
+ (ins GPR:$src, GPR:$base, am2offset:$offset), StFrm,
+ "str", " $src, [$base, $offset]!", "$base = $base_wb",
+ [(set GPR:$base_wb,
+ (pre_store GPR:$src, GPR:$base, am2offset:$offset))]>;
+
+def STR_POST : AI2stwpo<(outs GPR:$base_wb),
+ (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm,
+ "str", " $src, [$base], $offset", "$base = $base_wb",
+ [(set GPR:$base_wb,
+ (post_store GPR:$src, GPR:$base, am2offset:$offset))]>;
+
+def STRH_PRE : AI3sthpr<(outs GPR:$base_wb),
+ (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm,
+ "str", "h $src, [$base, $offset]!", "$base = $base_wb",
+ [(set GPR:$base_wb,
+ (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>;
+
+def STRH_POST: AI3sthpo<(outs GPR:$base_wb),
+ (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm,
+ "str", "h $src, [$base], $offset", "$base = $base_wb",
+ [(set GPR:$base_wb, (post_truncsti16 GPR:$src,
+ GPR:$base, am3offset:$offset))]>;
+
+def STRB_PRE : AI2stbpr<(outs GPR:$base_wb),
+ (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm,
+ "str", "b $src, [$base, $offset]!", "$base = $base_wb",
+ [(set GPR:$base_wb, (pre_truncsti8 GPR:$src,
+ GPR:$base, am2offset:$offset))]>;
+
+def STRB_POST: AI2stbpo<(outs GPR:$base_wb),
+ (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm,
+ "str", "b $src, [$base], $offset", "$base = $base_wb",
+ [(set GPR:$base_wb, (post_truncsti8 GPR:$src,
+ GPR:$base, am2offset:$offset))]>;
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+// FIXME: $dst1 should be a def.
+let mayLoad = 1 in
+def LDM : AXI4ld<(outs),
+ (ins addrmode4:$addr, pred:$p, reglist:$dst1, variable_ops),
+ LdStMulFrm, "ldm${p}${addr:submode} $addr, $dst1",
+ []>;
+
+let mayStore = 1 in
+def STM : AXI4st<(outs),
+ (ins addrmode4:$addr, pred:$p, reglist:$src1, variable_ops),
+ LdStMulFrm, "stm${p}${addr:submode} $addr, $src1",
+ []>;
+
+//===----------------------------------------------------------------------===//
+// Move Instructions.
+//
+
+def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm,
+ "mov", " $dst, $src", []>, UnaryDP;
+def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
+ "mov", " $dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOVi : AsI1<0b1101, (outs GPR:$dst), (ins so_imm:$src), DPFrm,
+ "mov", " $dst, $src", [(set GPR:$dst, so_imm:$src)]>, UnaryDP;
+
+def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
+ "mov", " $dst, $src, rrx",
+ [(set GPR:$dst, (ARMrrx GPR:$src))]>, UnaryDP;
+
+// These aren't really mov instructions, but we have to define them this way
+// due to flag operands.
+
+let Defs = [CPSR] in {
+def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
+ "mov", "s $dst, $src, lsr #1",
+ [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP;
+def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
+ "mov", "s $dst, $src, asr #1",
+ [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP;
+}
+
+//===----------------------------------------------------------------------===//
+// Extend Instructions.
+//
+
+// Sign extenders
+
+defm SXTB : AI_unary_rrot<0b01101010,
+ "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>;
+defm SXTH : AI_unary_rrot<0b01101011,
+ "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>;
+
+defm SXTAB : AI_bin_rrot<0b01101010,
+ "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
+defm SXTAH : AI_bin_rrot<0b01101011,
+ "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+
+// TODO: SXT(A){B|H}16
+
+// Zero extenders
+
+let AddedComplexity = 16 in {
+defm UXTB : AI_unary_rrot<0b01101110,
+ "uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>;
+defm UXTH : AI_unary_rrot<0b01101111,
+ "uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+defm UXTB16 : AI_unary_rrot<0b01101100,
+ "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+
+def : ARMV6Pat<(and (shl GPR:$Src, 8), 0xFF00FF),
+ (UXTB16r_rot GPR:$Src, 24)>;
+def : ARMV6Pat<(and (srl GPR:$Src, 8), 0xFF00FF),
+ (UXTB16r_rot GPR:$Src, 8)>;
+
+defm UXTAB : AI_bin_rrot<0b01101110, "uxtab",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+defm UXTAH : AI_bin_rrot<0b01101111, "uxtah",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+}
+
+// This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
+//defm UXTAB16 : xxx<"uxtab16", 0xff00ff>;
+
+// TODO: UXT(A){B|H}16
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+defm ADD : AsI1_bin_irs<0b0100, "add",
+ BinOpFrag<(add node:$LHS, node:$RHS)>>;
+defm SUB : AsI1_bin_irs<0b0010, "sub",
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+
+// ADD and SUB with 's' bit set.
+defm ADDS : ASI1_bin_s_irs<0b0100, "add",
+ BinOpFrag<(addc node:$LHS, node:$RHS)>>;
+defm SUBS : ASI1_bin_s_irs<0b0010, "sub",
+ BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+
+// FIXME: Do not allow ADC / SBC to be predicated for now.
+defm ADC : AsXI1_bin_c_irs<0b0101, "adc",
+ BinOpFrag<(adde node:$LHS, node:$RHS)>>;
+defm SBC : AsXI1_bin_c_irs<0b0110, "sbc",
+ BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+
+// These don't define reg/reg forms, because they are handled above.
+def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
+ "rsb", " $dst, $a, $b",
+ [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]>;
+
+def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
+ "rsb", " $dst, $a, $b",
+ [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]>;
+
+// RSB with 's' bit set.
+let Defs = [CPSR] in {
+def RSBSri : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
+ "rsb", "s $dst, $a, $b",
+ [(set GPR:$dst, (subc so_imm:$b, GPR:$a))]>;
+def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
+ "rsb", "s $dst, $a, $b",
+ [(set GPR:$dst, (subc so_reg:$b, GPR:$a))]>;
+}
+
+// FIXME: Do not allow RSC to be predicated for now. But they can set CPSR.
+let Uses = [CPSR] in {
+def RSCri : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b, cc_out:$s),
+ DPFrm, "rsc${s} $dst, $a, $b",
+ [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>;
+def RSCrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b, cc_out:$s),
+ DPSoRegFrm, "rsc${s} $dst, $a, $b",
+ [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>;
+}
+
+// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
+def : ARMPat<(add GPR:$src, so_imm_neg:$imm),
+ (SUBri GPR:$src, so_imm_neg:$imm)>;
+
+//def : ARMPat<(addc GPR:$src, so_imm_neg:$imm),
+// (SUBSri GPR:$src, so_imm_neg:$imm)>;
+//def : ARMPat<(adde GPR:$src, so_imm_neg:$imm),
+// (SBCri GPR:$src, so_imm_neg:$imm)>;
+
+// Note: These are implemented in C++ code, because they have to generate
+// ADD/SUBrs instructions, which use a complex pattern that a xform function
+// cannot produce.
+// (mul X, 2^n+1) -> (add (X << n), X)
+// (mul X, 2^n-1) -> (rsb X, (X << n))
+
+
+//===----------------------------------------------------------------------===//
+// Bitwise Instructions.
+//
+
+defm AND : AsI1_bin_irs<0b0000, "and",
+ BinOpFrag<(and node:$LHS, node:$RHS)>>;
+defm ORR : AsI1_bin_irs<0b1100, "orr",
+ BinOpFrag<(or node:$LHS, node:$RHS)>>;
+defm EOR : AsI1_bin_irs<0b0001, "eor",
+ BinOpFrag<(xor node:$LHS, node:$RHS)>>;
+defm BIC : AsI1_bin_irs<0b1110, "bic",
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+
+def MVNr : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm,
+ "mvn", " $dst, $src",
+ [(set GPR:$dst, (not GPR:$src))]>, UnaryDP;
+def MVNs : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
+ "mvn", " $dst, $src",
+ [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP;
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm,
+ "mvn", " $dst, $imm",
+ [(set GPR:$dst, so_imm_not:$imm)]>,UnaryDP;
+
+def : ARMPat<(and GPR:$src, so_imm_not:$imm),
+ (BICri GPR:$src, so_imm_not:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// Multiply Instructions.
+//
+
+def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ "mul", " $dst, $a, $b",
+ [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
+
+def MLA : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
+ "mla", " $dst, $a, $b, $c",
+ [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
+
+// Extra precision multiplies with low / high results
+def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst),
+ (ins GPR:$a, GPR:$b),
+ "smull", " $ldst, $hdst, $a, $b", []>;
+
+def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst),
+ (ins GPR:$a, GPR:$b),
+ "umull", " $ldst, $hdst, $a, $b", []>;
+
+// Multiply + accumulate
+def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst),
+ (ins GPR:$a, GPR:$b),
+ "smlal", " $ldst, $hdst, $a, $b", []>;
+
+def UMLAL : AsMul1I<0b0000101, (outs GPR:$ldst, GPR:$hdst),
+ (ins GPR:$a, GPR:$b),
+ "umlal", " $ldst, $hdst, $a, $b", []>;
+
+def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst),
+ (ins GPR:$a, GPR:$b),
+ "umaal", " $ldst, $hdst, $a, $b", []>,
+ Requires<[IsARM, HasV6]>;
+
+// Most significant word multiply
+def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ "smmul", " $dst, $a, $b",
+ [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{7-4} = 0b0001;
+ let Inst{15-12} = 0b1111;
+}
+
+def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
+ "smmla", " $dst, $a, $b, $c",
+ [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{7-4} = 0b0001;
+}
+
+
+def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
+ "smmls", " $dst, $a, $b, $c",
+ [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{7-4} = 0b1101;
+}
+
+multiclass AI_smul<string opc, PatFrag opnode> {
+ def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ !strconcat(opc, "bb"), " $dst, $a, $b",
+ [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
+ (sext_inreg GPR:$b, i16)))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 0;
+ let Inst{6} = 0;
+ }
+
+ def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ !strconcat(opc, "bt"), " $dst, $a, $b",
+ [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
+ (sra GPR:$b, 16)))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 0;
+ let Inst{6} = 1;
+ }
+
+ def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ !strconcat(opc, "tb"), " $dst, $a, $b",
+ [(set GPR:$dst, (opnode (sra GPR:$a, 16),
+ (sext_inreg GPR:$b, i16)))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 1;
+ let Inst{6} = 0;
+ }
+
+ def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ !strconcat(opc, "tt"), " $dst, $a, $b",
+ [(set GPR:$dst, (opnode (sra GPR:$a, 16),
+ (sra GPR:$b, 16)))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 1;
+ let Inst{6} = 1;
+ }
+
+ def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ !strconcat(opc, "wb"), " $dst, $a, $b",
+ [(set GPR:$dst, (sra (opnode GPR:$a,
+ (sext_inreg GPR:$b, i16)), 16))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 1;
+ let Inst{6} = 0;
+ }
+
+ def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ !strconcat(opc, "wt"), " $dst, $a, $b",
+ [(set GPR:$dst, (sra (opnode GPR:$a,
+ (sra GPR:$b, 16)), 16))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 1;
+ let Inst{6} = 1;
+ }
+}
+
+
+multiclass AI_smla<string opc, PatFrag opnode> {
+ def BB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "bb"), " $dst, $a, $b, $acc",
+ [(set GPR:$dst, (add GPR:$acc,
+ (opnode (sext_inreg GPR:$a, i16),
+ (sext_inreg GPR:$b, i16))))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 0;
+ let Inst{6} = 0;
+ }
+
+ def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "bt"), " $dst, $a, $b, $acc",
+ [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
+ (sra GPR:$b, 16))))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 0;
+ let Inst{6} = 1;
+ }
+
+ def TB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "tb"), " $dst, $a, $b, $acc",
+ [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16),
+ (sext_inreg GPR:$b, i16))))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 1;
+ let Inst{6} = 0;
+ }
+
+ def TT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "tt"), " $dst, $a, $b, $acc",
+ [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16),
+ (sra GPR:$b, 16))))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 1;
+ let Inst{6} = 1;
+ }
+
+ def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "wb"), " $dst, $a, $b, $acc",
+ [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
+ (sext_inreg GPR:$b, i16)), 16)))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 0;
+ let Inst{6} = 0;
+ }
+
+ def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "wt"), " $dst, $a, $b, $acc",
+ [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
+ (sra GPR:$b, 16)), 16)))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 0;
+ let Inst{6} = 1;
+ }
+}
+
+defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+
+// TODO: Halfword multiple accumulate long: SMLAL<x><y>
+// TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
+
+//===----------------------------------------------------------------------===//
+// Misc. Arithmetic Instructions.
+//
+
+def CLZ : AMiscA1I<0b000010110, (outs GPR:$dst), (ins GPR:$src),
+ "clz", " $dst, $src",
+ [(set GPR:$dst, (ctlz GPR:$src))]>, Requires<[IsARM, HasV5T]> {
+ let Inst{7-4} = 0b0001;
+ let Inst{11-8} = 0b1111;
+ let Inst{19-16} = 0b1111;
+}
+
+def REV : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src),
+ "rev", " $dst, $src",
+ [(set GPR:$dst, (bswap GPR:$src))]>, Requires<[IsARM, HasV6]> {
+ let Inst{7-4} = 0b0011;
+ let Inst{11-8} = 0b1111;
+ let Inst{19-16} = 0b1111;
+}
+
+def REV16 : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src),
+ "rev16", " $dst, $src",
+ [(set GPR:$dst,
+ (or (and (srl GPR:$src, 8), 0xFF),
+ (or (and (shl GPR:$src, 8), 0xFF00),
+ (or (and (srl GPR:$src, 8), 0xFF0000),
+ (and (shl GPR:$src, 8), 0xFF000000)))))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{7-4} = 0b1011;
+ let Inst{11-8} = 0b1111;
+ let Inst{19-16} = 0b1111;
+}
+
+def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src),
+ "revsh", " $dst, $src",
+ [(set GPR:$dst,
+ (sext_inreg
+ (or (srl (and GPR:$src, 0xFF00), 8),
+ (shl GPR:$src, 8)), i16))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{7-4} = 0b1011;
+ let Inst{11-8} = 0b1111;
+ let Inst{19-16} = 0b1111;
+}
+
+def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst),
+ (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
+ "pkhbt", " $dst, $src1, $src2, LSL $shamt",
+ [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
+ (and (shl GPR:$src2, (i32 imm:$shamt)),
+ 0xFFFF0000)))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{6-4} = 0b001;
+}
+
+// Alternate cases for PKHBT where identities eliminate some nodes.
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)),
+ (PKHBT GPR:$src1, GPR:$src2, 0)>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
+ (PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>;
+
+
+def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst),
+ (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
+ "pkhtb", " $dst, $src1, $src2, ASR $shamt",
+ [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
+ (and (sra GPR:$src2, imm16_31:$shamt),
+ 0xFFFF)))]>, Requires<[IsARM, HasV6]> {
+ let Inst{6-4} = 0b101;
+}
+
+// Alternate cases for PKHTB where identities eliminate some nodes. Note that
+// a shift amount of 0 is *not legal* here, it is PKHBT instead.
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, 16)),
+ (PKHTB GPR:$src1, GPR:$src2, 16)>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000),
+ (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)),
+ (PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>;
+
+//===----------------------------------------------------------------------===//
+// Comparison Instructions...
+//
+
+defm CMP : AI1_cmp_irs<0b1010, "cmp",
+ BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+defm CMN : AI1_cmp_irs<0b1011, "cmn",
+ BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
+
+// Note that TST/TEQ don't set all the same flags that CMP does!
+defm TST : AI1_cmp_irs<0b1000, "tst",
+ BinOpFrag<(ARMcmpNZ (and node:$LHS, node:$RHS), 0)>>;
+defm TEQ : AI1_cmp_irs<0b1001, "teq",
+ BinOpFrag<(ARMcmpNZ (xor node:$LHS, node:$RHS), 0)>>;
+
+defm CMPnz : AI1_cmp_irs<0b1010, "cmp",
+ BinOpFrag<(ARMcmpNZ node:$LHS, node:$RHS)>>;
+defm CMNnz : AI1_cmp_irs<0b1011, "cmn",
+ BinOpFrag<(ARMcmpNZ node:$LHS,(ineg node:$RHS))>>;
+
+def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm),
+ (CMNri GPR:$src, so_imm_neg:$imm)>;
+
+def : ARMPat<(ARMcmpNZ GPR:$src, so_imm_neg:$imm),
+ (CMNri GPR:$src, so_imm_neg:$imm)>;
+
+
+// Conditional moves
+// FIXME: should be able to write a pattern for ARMcmov, but can't use
+// a two-value operand where a dag node expects two operands. :(
+def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm,
+ "mov", " $dst, $true",
+ [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $dst">, UnaryDP;
+
+def MOVCCs : AI1<0b1101, (outs GPR:$dst),
+ (ins GPR:$false, so_reg:$true), DPSoRegFrm,
+ "mov", " $dst, $true",
+ [/*(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $dst">, UnaryDP;
+
+def MOVCCi : AI1<0b1101, (outs GPR:$dst),
+ (ins GPR:$false, so_imm:$true), DPFrm,
+ "mov", " $dst, $true",
+ [/*(set GPR:$dst, (ARMcmov GPR:$false, so_imm:$true, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $dst">, UnaryDP;
+
+
+// LEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p), Pseudo,
+ !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(",
+ "${:private}PCRELL${:uid}+8))\n"),
+ !strconcat("${:private}PCRELL${:uid}:\n\t",
+ "add$p $dst, pc, #PCRELV${:uid}")),
+ []>;
+
+def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, i32imm:$id, pred:$p),
+ Pseudo,
+ !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(",
+ "${:private}PCRELL${:uid}+8))\n"),
+ !strconcat("${:private}PCRELL${:uid}:\n\t",
+ "add$p $dst, pc, #PCRELV${:uid}")),
+ []>;
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+let isCall = 1,
+ Defs = [R0, R12, LR, CPSR] in {
+ def TPsoft : ABXI<0b1011, (outs), (ins),
+ "bl __aeabi_read_tp",
+ [(set R0, ARMthread_pointer)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+// eh_sjlj_setjmp() is a three instruction sequence to store the return
+// address and save #0 in R0 for the non-longjmp case.
+// Since by its nature we may be coming from some other function to get
+// here, and we're using the stack frame for the containing function to
+// save/restore registers, we can't keep anything live in regs across
+// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
+// when we get here from a longjmp(). We force everthing out of registers
+// except for our own input by listing the relevant registers in Defs. By
+// doing so, we also cause the prologue/epilogue code to actively preserve
+// all of the callee-saved resgisters, which is exactly what we want.
+let Defs =
+ [ R0, R1, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR,
+ D0, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15 ] in {
+ def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src),
+ AddrModeNone, SizeSpecial, IndexModeNone, Pseudo,
+ "add r0, pc, #4\n\t"
+ "str r0, [$src, #+4]\n\t"
+ "mov r0, #0 @ eh_setjmp", "",
+ [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// ConstantPool, GlobalAddress, and JumpTable
+def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>;
+def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>;
+def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+ (LEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// Large immediate handling.
+
+// Two piece so_imms.
+let isReMaterializable = 1 in
+def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src), Pseudo,
+ "mov", " $dst, $src",
+ [(set GPR:$dst, so_imm2part:$src)]>;
+
+def : ARMPat<(or GPR:$LHS, so_imm2part:$RHS),
+ (ORRri (ORRri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
+ (so_imm2part_2 imm:$RHS))>;
+def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS),
+ (EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
+ (so_imm2part_2 imm:$RHS))>;
+
+// TODO: add,sub,and, 3-instr forms?
+
+
+// Direct calls
+def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>;
+
+// zextload i1 -> zextload i8
+def : ARMPat<(zextloadi1 addrmode2:$addr), (LDRB addrmode2:$addr)>;
+
+// extload -> zextload
+def : ARMPat<(extloadi1 addrmode2:$addr), (LDRB addrmode2:$addr)>;
+def : ARMPat<(extloadi8 addrmode2:$addr), (LDRB addrmode2:$addr)>;
+def : ARMPat<(extloadi16 addrmode3:$addr), (LDRH addrmode3:$addr)>;
+
+def : ARMPat<(extloadi8 addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>;
+def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
+
+// smul* and smla*
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, 16), 16), (sra (shl GPR:$b, 16), 16)),
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, 16), 16), (sra GPR:$b, 16)),
+ (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, 16)),
+ (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, 16), (sra (shl GPR:$b, 16), 16)),
+ (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, 16), sext_16_node:$b),
+ (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, 16), 16)), 16),
+ (SMULWB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), 16),
+ (SMULWB GPR:$a, GPR:$b)>;
+
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra (shl GPR:$a, 16), 16),
+ (sra (shl GPR:$b, 16), 16))),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul sext_16_node:$a, sext_16_node:$b)),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra (shl GPR:$a, 16), 16), (sra GPR:$b, 16))),
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul sext_16_node:$a, (sra GPR:$b, 16))),
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra GPR:$a, 16), (sra (shl GPR:$b, 16), 16))),
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra GPR:$a, 16), sext_16_node:$b)),
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (sra (mul GPR:$a, (sra (shl GPR:$b, 16), 16)), 16)),
+ (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (sra (mul GPR:$a, sext_16_node:$b), 16)),
+ (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+
+//===----------------------------------------------------------------------===//
+// Thumb Support
+//
+
+include "ARMInstrThumb.td"
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//
+
+include "ARMInstrVFP.td"
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
new file mode 100644
index 0000000..ffb83a8
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -0,0 +1,562 @@
+//===- ARMInstrThumb.td - Thumb support for ARM ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Thumb instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Thumb specific DAG Nodes.
+//
+
+def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def imm_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
+}]>;
+def imm_comp_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
+}]>;
+
+
+/// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7].
+def imm0_7 : PatLeaf<(i32 imm), [{
+ return (uint32_t)N->getZExtValue() < 8;
+}]>;
+def imm0_7_neg : PatLeaf<(i32 imm), [{
+ return (uint32_t)-N->getZExtValue() < 8;
+}], imm_neg_XFORM>;
+
+def imm0_255 : PatLeaf<(i32 imm), [{
+ return (uint32_t)N->getZExtValue() < 256;
+}]>;
+def imm0_255_comp : PatLeaf<(i32 imm), [{
+ return ~((uint32_t)N->getZExtValue()) < 256;
+}]>;
+
+def imm8_255 : PatLeaf<(i32 imm), [{
+ return (uint32_t)N->getZExtValue() >= 8 && (uint32_t)N->getZExtValue() < 256;
+}]>;
+def imm8_255_neg : PatLeaf<(i32 imm), [{
+ unsigned Val = -N->getZExtValue();
+ return Val >= 8 && Val < 256;
+}], imm_neg_XFORM>;
+
+// Break imm's up into two pieces: an immediate + a left shift.
+// This uses thumb_immshifted to match and thumb_immshifted_val and
+// thumb_immshifted_shamt to get the val/shift pieces.
+def thumb_immshifted : PatLeaf<(imm), [{
+ return ARM_AM::isThumbImmShiftedVal((unsigned)N->getZExtValue());
+}]>;
+
+def thumb_immshifted_val : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getThumbImmNonShiftedVal((unsigned)N->getZExtValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+def thumb_immshifted_shamt : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getThumbImmValShift((unsigned)N->getZExtValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+// Define Thumb specific addressing modes.
+
+// t_addrmode_rr := reg + reg
+//
+def t_addrmode_rr : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> {
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+}
+
+// t_addrmode_s4 := reg + reg
+// reg + imm5 * 4
+//
+def t_addrmode_s4 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectThumbAddrModeS4", []> {
+ let PrintMethod = "printThumbAddrModeS4Operand";
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg);
+}
+
+// t_addrmode_s2 := reg + reg
+// reg + imm5 * 2
+//
+def t_addrmode_s2 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectThumbAddrModeS2", []> {
+ let PrintMethod = "printThumbAddrModeS2Operand";
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg);
+}
+
+// t_addrmode_s1 := reg + reg
+// reg + imm5
+//
+def t_addrmode_s1 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectThumbAddrModeS1", []> {
+ let PrintMethod = "printThumbAddrModeS1Operand";
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg);
+}
+
+// t_addrmode_sp := sp + imm8 * 4
+//
+def t_addrmode_sp : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
+ let PrintMethod = "printThumbAddrModeSPOperand";
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+let Defs = [SP], Uses = [SP] in {
+def tADJCALLSTACKUP :
+PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "@ tADJCALLSTACKUP $amt1",
+ [(ARMcallseq_end imm:$amt1, imm:$amt2)]>, Requires<[IsThumb]>;
+
+def tADJCALLSTACKDOWN :
+PseudoInst<(outs), (ins i32imm:$amt),
+ "@ tADJCALLSTACKDOWN $amt",
+ [(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb]>;
+}
+
+let isNotDuplicable = 1 in
+def tPICADD : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, pclabel:$cp),
+ "$cp:\n\tadd $dst, pc",
+ [(set tGPR:$dst, (ARMpic_add tGPR:$lhs, imm:$cp))]>;
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions.
+//
+
+let isReturn = 1, isTerminator = 1 in {
+ def tBX_RET : TI<(outs), (ins), "bx lr", [(ARMretflag)]>;
+ // Alternative return instruction used by vararg functions.
+ def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), "bx $target", []>;
+}
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+let isReturn = 1, isTerminator = 1 in
+def tPOP_RET : TI<(outs reglist:$dst1, variable_ops), (ins),
+ "pop $dst1", []>;
+
+let isCall = 1,
+ Defs = [R0, R1, R2, R3, LR,
+ D0, D1, D2, D3, D4, D5, D6, D7] in {
+ def tBL : TIx2<(outs), (ins i32imm:$func, variable_ops),
+ "bl ${func:call}",
+ [(ARMtcall tglobaladdr:$func)]>;
+ // ARMv5T and above
+ def tBLXi : TIx2<(outs), (ins i32imm:$func, variable_ops),
+ "blx ${func:call}",
+ [(ARMcall tglobaladdr:$func)]>, Requires<[HasV5T]>;
+ def tBLXr : TI<(outs), (ins tGPR:$func, variable_ops),
+ "blx $func",
+ [(ARMtcall tGPR:$func)]>, Requires<[HasV5T]>;
+ // ARMv4T
+ def tBX : TIx2<(outs), (ins tGPR:$func, variable_ops),
+ "cpy lr, pc\n\tbx $func",
+ [(ARMcall_nolink tGPR:$func)]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+ let isBarrier = 1 in {
+ let isPredicable = 1 in
+ def tB : TI<(outs), (ins brtarget:$target), "b $target",
+ [(br bb:$target)]>;
+
+ // Far jump
+ def tBfar : TIx2<(outs), (ins brtarget:$target), "bl $target\t@ far jump",[]>;
+
+ def tBR_JTr : TJTI<(outs),
+ (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id),
+ "cpy pc, $target \n\t.align\t2\n$jt",
+ [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>;
+ }
+}
+
+// FIXME: should be able to write a pattern for ARMBrcond, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let isBranch = 1, isTerminator = 1 in
+ def tBcc : TI<(outs), (ins brtarget:$target, pred:$cc), "b$cc $target",
+ [/*(ARMbrcond bb:$target, imm:$cc)*/]>;
+
+//===----------------------------------------------------------------------===//
+// Load Store Instructions.
+//
+
+let canFoldAsLoad = 1 in
+def tLDR : TI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr),
+ "ldr $dst, $addr",
+ [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>;
+
+def tLDRB : TI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr),
+ "ldrb $dst, $addr",
+ [(set tGPR:$dst, (zextloadi8 t_addrmode_s1:$addr))]>;
+
+def tLDRH : TI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr),
+ "ldrh $dst, $addr",
+ [(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>;
+
+def tLDRSB : TI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+ "ldrsb $dst, $addr",
+ [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>;
+
+def tLDRSH : TI2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+ "ldrsh $dst, $addr",
+ [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>;
+
+let canFoldAsLoad = 1 in
+def tLDRspi : TIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr),
+ "ldr $dst, $addr",
+ [(set tGPR:$dst, (load t_addrmode_sp:$addr))]>;
+
+// Special instruction for restore. It cannot clobber condition register
+// when it's expanded by eliminateCallFramePseudoInstr().
+let canFoldAsLoad = 1, mayLoad = 1 in
+def tRestore : TIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr),
+ "ldr $dst, $addr", []>;
+
+// Load tconstpool
+let canFoldAsLoad = 1 in
+def tLDRpci : TIs<(outs tGPR:$dst), (ins i32imm:$addr),
+ "ldr $dst, $addr",
+ [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>;
+
+// Special LDR for loads from non-pc-relative constpools.
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
+def tLDRcp : TIs<(outs tGPR:$dst), (ins i32imm:$addr),
+ "ldr $dst, $addr", []>;
+
+def tSTR : TI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr),
+ "str $src, $addr",
+ [(store tGPR:$src, t_addrmode_s4:$addr)]>;
+
+def tSTRB : TI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr),
+ "strb $src, $addr",
+ [(truncstorei8 tGPR:$src, t_addrmode_s1:$addr)]>;
+
+def tSTRH : TI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr),
+ "strh $src, $addr",
+ [(truncstorei16 tGPR:$src, t_addrmode_s2:$addr)]>;
+
+def tSTRspi : TIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr),
+ "str $src, $addr",
+ [(store tGPR:$src, t_addrmode_sp:$addr)]>;
+
+let mayStore = 1 in {
+// Special instruction for spill. It cannot clobber condition register
+// when it's expanded by eliminateCallFramePseudoInstr().
+def tSpill : TIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr),
+ "str $src, $addr", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+// TODO: A7-44: LDMIA - load multiple
+
+let mayLoad = 1 in
+def tPOP : TI<(outs reglist:$dst1, variable_ops), (ins),
+ "pop $dst1", []>;
+
+let mayStore = 1 in
+def tPUSH : TI<(outs), (ins reglist:$src1, variable_ops),
+ "push $src1", []>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+// Add with carry
+def tADC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "adc $dst, $rhs",
+ [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>;
+
+def tADDS : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "add $dst, $lhs, $rhs",
+ [(set tGPR:$dst, (addc tGPR:$lhs, tGPR:$rhs))]>;
+
+
+def tADDi3 : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+ "add $dst, $lhs, $rhs",
+ [(set tGPR:$dst, (add tGPR:$lhs, imm0_7:$rhs))]>;
+
+def tADDi8 : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+ "add $dst, $rhs",
+ [(set tGPR:$dst, (add tGPR:$lhs, imm8_255:$rhs))]>;
+
+def tADDrr : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "add $dst, $lhs, $rhs",
+ [(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>;
+
+def tADDhirr : TIt<(outs tGPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+ "add $dst, $rhs @ addhirr", []>;
+
+def tADDrPCi : TI<(outs tGPR:$dst), (ins i32imm:$rhs),
+ "add $dst, pc, $rhs * 4", []>;
+
+def tADDrSPi : TI<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs),
+ "add $dst, $sp, $rhs * 4 @ addrspi", []>;
+
+def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+ "add $dst, $rhs * 4", []>;
+
+def tAND : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "and $dst, $rhs",
+ [(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>;
+
+def tASRri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+ "asr $dst, $lhs, $rhs",
+ [(set tGPR:$dst, (sra tGPR:$lhs, imm:$rhs))]>;
+
+def tASRrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "asr $dst, $rhs",
+ [(set tGPR:$dst, (sra tGPR:$lhs, tGPR:$rhs))]>;
+
+def tBIC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "bic $dst, $rhs",
+ [(set tGPR:$dst, (and tGPR:$lhs, (not tGPR:$rhs)))]>;
+
+
+def tCMN : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+ "cmn $lhs, $rhs",
+ [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>;
+
+def tCMPi8 : TI<(outs), (ins tGPR:$lhs, i32imm:$rhs),
+ "cmp $lhs, $rhs",
+ [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>;
+
+def tCMPr : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+ "cmp $lhs, $rhs",
+ [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>;
+
+def tTST : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+ "tst $lhs, $rhs",
+ [(ARMcmpNZ (and tGPR:$lhs, tGPR:$rhs), 0)]>;
+
+def tCMNNZ : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+ "cmn $lhs, $rhs",
+ [(ARMcmpNZ tGPR:$lhs, (ineg tGPR:$rhs))]>;
+
+def tCMPNZi8 : TI<(outs), (ins tGPR:$lhs, i32imm:$rhs),
+ "cmp $lhs, $rhs",
+ [(ARMcmpNZ tGPR:$lhs, imm0_255:$rhs)]>;
+
+def tCMPNZr : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+ "cmp $lhs, $rhs",
+ [(ARMcmpNZ tGPR:$lhs, tGPR:$rhs)]>;
+
+// TODO: A7-37: CMP(3) - cmp hi regs
+
+def tEOR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "eor $dst, $rhs",
+ [(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>;
+
+def tLSLri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+ "lsl $dst, $lhs, $rhs",
+ [(set tGPR:$dst, (shl tGPR:$lhs, imm:$rhs))]>;
+
+def tLSLrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "lsl $dst, $rhs",
+ [(set tGPR:$dst, (shl tGPR:$lhs, tGPR:$rhs))]>;
+
+def tLSRri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+ "lsr $dst, $lhs, $rhs",
+ [(set tGPR:$dst, (srl tGPR:$lhs, imm:$rhs))]>;
+
+def tLSRrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "lsr $dst, $rhs",
+ [(set tGPR:$dst, (srl tGPR:$lhs, tGPR:$rhs))]>;
+
+// FIXME: This is not rematerializable because mov changes the condition code.
+def tMOVi8 : TI<(outs tGPR:$dst), (ins i32imm:$src),
+ "mov $dst, $src",
+ [(set tGPR:$dst, imm0_255:$src)]>;
+
+// TODO: A7-73: MOV(2) - mov setting flag.
+
+
+// Note: MOV(2) of two low regs updates the flags, so we emit this as 'cpy',
+// which is MOV(3). This also supports high registers.
+def tMOVr : TI<(outs tGPR:$dst), (ins tGPR:$src),
+ "cpy $dst, $src", []>;
+def tMOVhir2lor : TI<(outs tGPR:$dst), (ins GPR:$src),
+ "cpy $dst, $src\t@ hir2lor", []>;
+def tMOVlor2hir : TI<(outs GPR:$dst), (ins tGPR:$src),
+ "cpy $dst, $src\t@ lor2hir", []>;
+def tMOVhir2hir : TI<(outs GPR:$dst), (ins GPR:$src),
+ "cpy $dst, $src\t@ hir2hir", []>;
+
+def tMUL : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "mul $dst, $rhs",
+ [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>;
+
+def tMVN : TI<(outs tGPR:$dst), (ins tGPR:$src),
+ "mvn $dst, $src",
+ [(set tGPR:$dst, (not tGPR:$src))]>;
+
+def tNEG : TI<(outs tGPR:$dst), (ins tGPR:$src),
+ "neg $dst, $src",
+ [(set tGPR:$dst, (ineg tGPR:$src))]>;
+
+def tORR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "orr $dst, $rhs",
+ [(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>;
+
+
+def tREV : TI<(outs tGPR:$dst), (ins tGPR:$src),
+ "rev $dst, $src",
+ [(set tGPR:$dst, (bswap tGPR:$src))]>,
+ Requires<[IsThumb, HasV6]>;
+
+def tREV16 : TI<(outs tGPR:$dst), (ins tGPR:$src),
+ "rev16 $dst, $src",
+ [(set tGPR:$dst,
+ (or (and (srl tGPR:$src, 8), 0xFF),
+ (or (and (shl tGPR:$src, 8), 0xFF00),
+ (or (and (srl tGPR:$src, 8), 0xFF0000),
+ (and (shl tGPR:$src, 8), 0xFF000000)))))]>,
+ Requires<[IsThumb, HasV6]>;
+
+def tREVSH : TI<(outs tGPR:$dst), (ins tGPR:$src),
+ "revsh $dst, $src",
+ [(set tGPR:$dst,
+ (sext_inreg
+ (or (srl (and tGPR:$src, 0xFFFF), 8),
+ (shl tGPR:$src, 8)), i16))]>,
+ Requires<[IsThumb, HasV6]>;
+
+def tROR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "ror $dst, $rhs",
+ [(set tGPR:$dst, (rotr tGPR:$lhs, tGPR:$rhs))]>;
+
+
+// Subtract with carry
+def tSBC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "sbc $dst, $rhs",
+ [(set tGPR:$dst, (sube tGPR:$lhs, tGPR:$rhs))]>;
+
+def tSUBS : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "sub $dst, $lhs, $rhs",
+ [(set tGPR:$dst, (subc tGPR:$lhs, tGPR:$rhs))]>;
+
+
+// TODO: A7-96: STMIA - store multiple.
+
+def tSUBi3 : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+ "sub $dst, $lhs, $rhs",
+ [(set tGPR:$dst, (add tGPR:$lhs, imm0_7_neg:$rhs))]>;
+
+def tSUBi8 : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+ "sub $dst, $rhs",
+ [(set tGPR:$dst, (add tGPR:$lhs, imm8_255_neg:$rhs))]>;
+
+def tSUBrr : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "sub $dst, $lhs, $rhs",
+ [(set tGPR:$dst, (sub tGPR:$lhs, tGPR:$rhs))]>;
+
+def tSUBspi : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+ "sub $dst, $rhs * 4", []>;
+
+def tSXTB : TI<(outs tGPR:$dst), (ins tGPR:$src),
+ "sxtb $dst, $src",
+ [(set tGPR:$dst, (sext_inreg tGPR:$src, i8))]>,
+ Requires<[IsThumb, HasV6]>;
+def tSXTH : TI<(outs tGPR:$dst), (ins tGPR:$src),
+ "sxth $dst, $src",
+ [(set tGPR:$dst, (sext_inreg tGPR:$src, i16))]>,
+ Requires<[IsThumb, HasV6]>;
+
+
+def tUXTB : TI<(outs tGPR:$dst), (ins tGPR:$src),
+ "uxtb $dst, $src",
+ [(set tGPR:$dst, (and tGPR:$src, 0xFF))]>,
+ Requires<[IsThumb, HasV6]>;
+def tUXTH : TI<(outs tGPR:$dst), (ins tGPR:$src),
+ "uxth $dst, $src",
+ [(set tGPR:$dst, (and tGPR:$src, 0xFFFF))]>,
+ Requires<[IsThumb, HasV6]>;
+
+
+// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC DAG operation.
+// Expanded by the scheduler into a branch sequence.
+let usesCustomDAGSchedInserter = 1 in // Expanded by the scheduler.
+ def tMOVCCr :
+ PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc),
+ "@ tMOVCCr $cc",
+ [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>;
+
+// tLEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+def tLEApcrel : TIx2<(outs tGPR:$dst), (ins i32imm:$label),
+ !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(",
+ "${:private}PCRELL${:uid}+4))\n"),
+ !strconcat("\tmov $dst, #PCRELV${:uid}\n",
+ "${:private}PCRELL${:uid}:\n\tadd $dst, pc")),
+ []>;
+
+def tLEApcrelJT : TIx2<(outs tGPR:$dst), (ins i32imm:$label, i32imm:$id),
+ !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(",
+ "${:private}PCRELL${:uid}+4))\n"),
+ !strconcat("\tmov $dst, #PCRELV${:uid}\n",
+ "${:private}PCRELL${:uid}:\n\tadd $dst, pc")),
+ []>;
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+let isCall = 1,
+ Defs = [R0, LR] in {
+ def tTPsoft : TIx2<(outs), (ins),
+ "bl __aeabi_read_tp",
+ [(set R0, ARMthread_pointer)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// ConstantPool, GlobalAddress
+def : ThumbPat<(ARMWrapper tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>;
+def : ThumbPat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>;
+
+// JumpTable
+def : ThumbPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+ (tLEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// Direct calls
+def : ThumbPat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>;
+def : ThumbV5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>;
+
+// Indirect calls to ARM routines
+def : ThumbV5Pat<(ARMcall tGPR:$dst), (tBLXr tGPR:$dst)>;
+
+// zextload i1 -> zextload i8
+def : ThumbPat<(zextloadi1 t_addrmode_s1:$addr),
+ (tLDRB t_addrmode_s1:$addr)>;
+
+// extload -> zextload
+def : ThumbPat<(extloadi1 t_addrmode_s1:$addr), (tLDRB t_addrmode_s1:$addr)>;
+def : ThumbPat<(extloadi8 t_addrmode_s1:$addr), (tLDRB t_addrmode_s1:$addr)>;
+def : ThumbPat<(extloadi16 t_addrmode_s2:$addr), (tLDRH t_addrmode_s2:$addr)>;
+
+// Large immediate handling.
+
+// Two piece imms.
+def : ThumbPat<(i32 thumb_immshifted:$src),
+ (tLSLri (tMOVi8 (thumb_immshifted_val imm:$src)),
+ (thumb_immshifted_shamt imm:$src))>;
+
+def : ThumbPat<(i32 imm0_255_comp:$src),
+ (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
new file mode 100644
index 0000000..168fb45
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -0,0 +1,12 @@
+//===- ARMInstrThumb2.td - Thumb2 support for ARM -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Thumb2 instruction set.
+//
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
new file mode 100644
index 0000000..0247daf
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -0,0 +1,398 @@
+//===- ARMInstrVFP.td - VFP support for ARM -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM VFP instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+def SDT_FTOI :
+SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
+def SDT_ITOF :
+SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+def SDT_CMPFP0 :
+SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+def SDT_FMDRR :
+SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
+ SDTCisSameAs<1, 2>]>;
+
+def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>;
+def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>;
+def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>;
+def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>;
+def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>;
+def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>;
+def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>;
+def arm_fmdrr : SDNode<"ARMISD::FMDRR", SDT_FMDRR>;
+
+//===----------------------------------------------------------------------===//
+// Load / store Instructions.
+//
+
+let canFoldAsLoad = 1 in {
+def FLDD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr),
+ "fldd", " $dst, $addr",
+ [(set DPR:$dst, (load addrmode5:$addr))]>;
+
+def FLDS : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr),
+ "flds", " $dst, $addr",
+ [(set SPR:$dst, (load addrmode5:$addr))]>;
+} // canFoldAsLoad
+
+def FSTD : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr),
+ "fstd", " $src, $addr",
+ [(store DPR:$src, addrmode5:$addr)]>;
+
+def FSTS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
+ "fsts", " $src, $addr",
+ [(store SPR:$src, addrmode5:$addr)]>;
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+let mayLoad = 1 in {
+def FLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dst1,
+ variable_ops),
+ "fldm${addr:submode}d${p} ${addr:base}, $dst1",
+ []> {
+ let Inst{20} = 1;
+}
+
+def FLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dst1,
+ variable_ops),
+ "fldm${addr:submode}s${p} ${addr:base}, $dst1",
+ []> {
+ let Inst{20} = 1;
+}
+}
+
+let mayStore = 1 in {
+def FSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$src1,
+ variable_ops),
+ "fstm${addr:submode}d${p} ${addr:base}, $src1",
+ []> {
+ let Inst{20} = 0;
+}
+
+def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$src1,
+ variable_ops),
+ "fstm${addr:submode}s${p} ${addr:base}, $src1",
+ []> {
+ let Inst{20} = 0;
+}
+} // mayStore
+
+// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
+
+//===----------------------------------------------------------------------===//
+// FP Binary Operations.
+//
+
+def FADDD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+ "faddd", " $dst, $a, $b",
+ [(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>;
+
+def FADDS : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+ "fadds", " $dst, $a, $b",
+ [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>;
+
+// These are encoded as unary instructions.
+def FCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b),
+ "fcmped", " $a, $b",
+ [(arm_cmpfp DPR:$a, DPR:$b)]>;
+
+def FCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b),
+ "fcmpes", " $a, $b",
+ [(arm_cmpfp SPR:$a, SPR:$b)]>;
+
+def FDIVD : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+ "fdivd", " $dst, $a, $b",
+ [(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>;
+
+def FDIVS : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+ "fdivs", " $dst, $a, $b",
+ [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>;
+
+def FMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+ "fmuld", " $dst, $a, $b",
+ [(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>;
+
+def FMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+ "fmuls", " $dst, $a, $b",
+ [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>;
+
+def FNMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+ "fnmuld", " $dst, $a, $b",
+ [(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]> {
+ let Inst{6} = 1;
+}
+
+def FNMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+ "fnmuls", " $dst, $a, $b",
+ [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]> {
+ let Inst{6} = 1;
+}
+
+// Match reassociated forms only if not sign dependent rounding.
+def : Pat<(fmul (fneg DPR:$a), DPR:$b),
+ (FNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+def : Pat<(fmul (fneg SPR:$a), SPR:$b),
+ (FNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+
+
+def FSUBD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+ "fsubd", " $dst, $a, $b",
+ [(set DPR:$dst, (fsub DPR:$a, DPR:$b))]> {
+ let Inst{6} = 1;
+}
+
+def FSUBS : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+ "fsubs", " $dst, $a, $b",
+ [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> {
+ let Inst{6} = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// FP Unary Operations.
+//
+
+def FABSD : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a),
+ "fabsd", " $dst, $a",
+ [(set DPR:$dst, (fabs DPR:$a))]>;
+
+def FABSS : ASuI<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a),
+ "fabss", " $dst, $a",
+ [(set SPR:$dst, (fabs SPR:$a))]>;
+
+def FCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a),
+ "fcmpezd", " $a",
+ [(arm_cmpfp0 DPR:$a)]>;
+
+def FCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a),
+ "fcmpezs", " $a",
+ [(arm_cmpfp0 SPR:$a)]>;
+
+def FCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a),
+ "fcvtds", " $dst, $a",
+ [(set DPR:$dst, (fextend SPR:$a))]>;
+
+// Special case encoding: bits 11-8 is 0b1011.
+def FCVTSD : AI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
+ "fcvtsd", " $dst, $a",
+ [(set SPR:$dst, (fround DPR:$a))]> {
+ let Inst{27-23} = 0b11101;
+ let Inst{21-16} = 0b110111;
+ let Inst{11-8} = 0b1011;
+ let Inst{7-4} = 0b1100;
+}
+
+def FCPYD : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a),
+ "fcpyd", " $dst, $a", []>;
+
+def FCPYS : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a),
+ "fcpys", " $dst, $a", []>;
+
+def FNEGD : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a),
+ "fnegd", " $dst, $a",
+ [(set DPR:$dst, (fneg DPR:$a))]>;
+
+def FNEGS : ASuI<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a),
+ "fnegs", " $dst, $a",
+ [(set SPR:$dst, (fneg SPR:$a))]>;
+
+def FSQRTD : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a),
+ "fsqrtd", " $dst, $a",
+ [(set DPR:$dst, (fsqrt DPR:$a))]>;
+
+def FSQRTS : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a),
+ "fsqrts", " $dst, $a",
+ [(set SPR:$dst, (fsqrt SPR:$a))]>;
+
+//===----------------------------------------------------------------------===//
+// FP <-> GPR Copies. Int <-> FP Conversions.
+//
+
+def FMRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src),
+ "fmrs", " $dst, $src",
+ [(set GPR:$dst, (bitconvert SPR:$src))]>;
+
+def FMSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src),
+ "fmsr", " $dst, $src",
+ [(set SPR:$dst, (bitconvert GPR:$src))]>;
+
+def FMRRD : AVConv3I<0b11000101, 0b1011,
+ (outs GPR:$dst1, GPR:$dst2), (ins DPR:$src),
+ "fmrrd", " $dst1, $dst2, $src",
+ [/* FIXME: Can't write pattern for multiple result instr*/]>;
+
+// FMDHR: GPR -> SPR
+// FMDLR: GPR -> SPR
+
+def FMDRR : AVConv5I<0b11000100, 0b1011,
+ (outs DPR:$dst), (ins GPR:$src1, GPR:$src2),
+ "fmdrr", " $dst, $src1, $src2",
+ [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>;
+
+// FMRDH: SPR -> GPR
+// FMRDL: SPR -> GPR
+// FMRRS: SPR -> GPR
+// FMRX : SPR system reg -> GPR
+
+// FMSRR: GPR -> SPR
+
+// FMXR: GPR -> VFP Sstem reg
+
+
+// Int to FP:
+
+def FSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
+ "fsitod", " $dst, $a",
+ [(set DPR:$dst, (arm_sitof SPR:$a))]> {
+ let Inst{7} = 1;
+}
+
+def FSITOS : AVConv1I<0b11101011, 0b1000, 0b1010, (outs SPR:$dst), (ins SPR:$a),
+ "fsitos", " $dst, $a",
+ [(set SPR:$dst, (arm_sitof SPR:$a))]> {
+ let Inst{7} = 1;
+}
+
+def FUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
+ "fuitod", " $dst, $a",
+ [(set DPR:$dst, (arm_uitof SPR:$a))]>;
+
+def FUITOS : AVConv1I<0b11101011, 0b1000, 0b1010, (outs SPR:$dst), (ins SPR:$a),
+ "fuitos", " $dst, $a",
+ [(set SPR:$dst, (arm_uitof SPR:$a))]>;
+
+// FP to Int:
+// Always set Z bit in the instruction, i.e. "round towards zero" variants.
+
+def FTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011,
+ (outs SPR:$dst), (ins DPR:$a),
+ "ftosizd", " $dst, $a",
+ [(set SPR:$dst, (arm_ftosi DPR:$a))]> {
+ let Inst{7} = 1; // Z bit
+}
+
+def FTOSIZS : AVConv1I<0b11101011, 0b1101, 0b1010,
+ (outs SPR:$dst), (ins SPR:$a),
+ "ftosizs", " $dst, $a",
+ [(set SPR:$dst, (arm_ftosi SPR:$a))]> {
+ let Inst{7} = 1; // Z bit
+}
+
+def FTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011,
+ (outs SPR:$dst), (ins DPR:$a),
+ "ftouizd", " $dst, $a",
+ [(set SPR:$dst, (arm_ftoui DPR:$a))]> {
+ let Inst{7} = 1; // Z bit
+}
+
+def FTOUIZS : AVConv1I<0b11101011, 0b1100, 0b1010,
+ (outs SPR:$dst), (ins SPR:$a),
+ "ftouizs", " $dst, $a",
+ [(set SPR:$dst, (arm_ftoui SPR:$a))]> {
+ let Inst{7} = 1; // Z bit
+}
+
+//===----------------------------------------------------------------------===//
+// FP FMA Operations.
+//
+
+def FMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+ "fmacd", " $dst, $a, $b",
+ [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+def FMACS : ASbI<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+ "fmacs", " $dst, $a, $b",
+ [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+def FMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+ "fmscd", " $dst, $a, $b",
+ [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+def FMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+ "fmscs", " $dst, $a, $b",
+ [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+def FNMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+ "fnmacd", " $dst, $a, $b",
+ [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst"> {
+ let Inst{6} = 1;
+}
+
+def FNMACS : ASbI<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+ "fnmacs", " $dst, $a, $b",
+ [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst"> {
+ let Inst{6} = 1;
+}
+
+def FNMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+ "fnmscd", " $dst, $a, $b",
+ [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst"> {
+ let Inst{6} = 1;
+}
+
+def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+ "fnmscs", " $dst, $a, $b",
+ [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst"> {
+ let Inst{6} = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// FP Conditional moves.
+//
+
+def FCPYDcc : ADuI<0b11101011, 0b0000, 0b0100,
+ (outs DPR:$dst), (ins DPR:$false, DPR:$true),
+ "fcpyd", " $dst, $true",
+ [/*(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))*/]>,
+ RegConstraint<"$false = $dst">;
+
+def FCPYScc : ASuI<0b11101011, 0b0000, 0b0100,
+ (outs SPR:$dst), (ins SPR:$false, SPR:$true),
+ "fcpys", " $dst, $true",
+ [/*(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))*/]>,
+ RegConstraint<"$false = $dst">;
+
+def FNEGDcc : ADuI<0b11101011, 0b0001, 0b0100,
+ (outs DPR:$dst), (ins DPR:$false, DPR:$true),
+ "fnegd", " $dst, $true",
+ [/*(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))*/]>,
+ RegConstraint<"$false = $dst">;
+
+def FNEGScc : ASuI<0b11101011, 0b0001, 0b0100,
+ (outs SPR:$dst), (ins SPR:$false, SPR:$true),
+ "fnegs", " $dst, $true",
+ [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>,
+ RegConstraint<"$false = $dst">;
+
+
+//===----------------------------------------------------------------------===//
+// Misc.
+//
+
+let Defs = [CPSR] in
+def FMSTAT : AI<(outs), (ins), VFPMiscFrm, "fmstat", "", [(arm_fmstat)]> {
+ let Inst{27-20} = 0b11101111;
+ let Inst{19-16} = 0b0001;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-8} = 0b1010;
+ let Inst{7} = 0;
+ let Inst{4} = 1;
+}
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
new file mode 100644
index 0000000..e551c41
--- /dev/null
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -0,0 +1,298 @@
+//===-- ARMJITInfo.cpp - Implement the JIT interfaces for the ARM target --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the ARM target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "ARMJITInfo.h"
+#include "ARMInstrInfo.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMRelocations.h"
+#include "ARMSubtarget.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Config/alloca.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/System/Memory.h"
+#include <cstdlib>
+using namespace llvm;
+
+void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ abort();
+}
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+// Get the ASMPREFIX for the current host. This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because we the prolog/epilog inserted by GCC won't work for us (we need
+// to preserve more context and manipulate the stack directly). Instead,
+// write our own wrapper, which does things our way, so we have complete
+// control over register saving and restoring.
+extern "C" {
+#if defined(__arm__)
+ void ARMCompilationCallback(void);
+ asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl " ASMPREFIX "ARMCompilationCallback\n"
+ ASMPREFIX "ARMCompilationCallback:\n"
+ // Save caller saved registers since they may contain stuff
+ // for the real target function right now. We have to act as if this
+ // whole compilation callback doesn't exist as far as the caller is
+ // concerned, so we can't just preserve the callee saved regs.
+ "stmdb sp!, {r0, r1, r2, r3, lr}\n"
+#ifndef __SOFTFP__
+ "fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+#endif
+ // The LR contains the address of the stub function on entry.
+ // pass it as the argument to the C part of the callback
+ "mov r0, lr\n"
+ "sub sp, sp, #4\n"
+ // Call the C portion of the callback
+ "bl " ASMPREFIX "ARMCompilationCallbackC\n"
+ "add sp, sp, #4\n"
+ // Restoring the LR to the return address of the function that invoked
+ // the stub and de-allocating the stack space for it requires us to
+ // swap the two saved LR values on the stack, as they're backwards
+ // for what we need since the pop instruction has a pre-determined
+ // order for the registers.
+ // +--------+
+ // 0 | LR | Original return address
+ // +--------+
+ // 1 | LR | Stub address (start of stub)
+ // 2-5 | R3..R0 | Saved registers (we need to preserve all regs)
+ // 6-20 | D0..D7 | Saved VFP registers
+ // +--------+
+ //
+#ifndef __SOFTFP__
+ // Restore VFP caller-saved registers.
+ "fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+#endif
+ //
+ // We need to exchange the values in slots 0 and 1 so we can
+ // return to the address in slot 1 with the address in slot 0
+ // restored to the LR.
+ "ldr r0, [sp,#20]\n"
+ "ldr r1, [sp,#16]\n"
+ "str r1, [sp,#20]\n"
+ "str r0, [sp,#16]\n"
+ // Return to the (newly modified) stub to invoke the real function.
+ // The above twiddling of the saved return addresses allows us to
+ // deallocate everything, including the LR the stub saved, all in one
+ // pop instruction.
+ "ldmia sp!, {r0, r1, r2, r3, lr, pc}\n"
+ );
+#else // Not an ARM host
+ void ARMCompilationCallback() {
+ assert(0 && "Cannot call ARMCompilationCallback() on a non-ARM arch!\n");
+ abort();
+ }
+#endif
+}
+
+/// ARMCompilationCallbackC - This is the target-specific function invoked
+/// by the function stub when we did not know the real target of a call.
+/// This function must locate the start of the stub or call site and pass
+/// it into the JIT compiler function.
+extern "C" void ARMCompilationCallbackC(intptr_t StubAddr) {
+ // Get the address of the compiled code for this function.
+ intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)StubAddr);
+
+ // Rewrite the call target... so that we don't end up here every time we
+ // execute the call. We're replacing the first two instructions of the
+ // stub with:
+ // ldr pc, [pc,#-4]
+ // <addr>
+ if (!sys::Memory::setRangeWritable((void*)StubAddr, 8)) {
+ cerr << "ERROR: Unable to mark stub writable\n";
+ abort();
+ }
+ *(intptr_t *)StubAddr = 0xe51ff004; // ldr pc, [pc, #-4]
+ *(intptr_t *)(StubAddr+4) = NewVal;
+ if (!sys::Memory::setRangeExecutable((void*)StubAddr, 8)) {
+ cerr << "ERROR: Unable to mark stub executable\n";
+ abort();
+ }
+}
+
+TargetJITInfo::LazyResolverFn
+ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) {
+ JITCompilerFunction = F;
+ return ARMCompilationCallback;
+}
+
+void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr,
+ JITCodeEmitter &JCE) {
+ JCE.startGVStub(GV, 4, 4);
+ JCE.emitWordLE((intptr_t)Ptr);
+ void *PtrAddr = JCE.finishGVStub(GV);
+ addIndirectSymAddr(Ptr, (intptr_t)PtrAddr);
+ return PtrAddr;
+}
+
+void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE) {
+ // If this is just a call to an external function, emit a branch instead of a
+ // call. The code is the same except for one bit of the last instruction.
+ if (Fn != (void*)(intptr_t)ARMCompilationCallback) {
+ // Branch to the corresponding function addr.
+ if (IsPIC) {
+ // The stub is 8-byte size and 4-aligned.
+ intptr_t LazyPtr = getIndirectSymAddr(Fn);
+ if (!LazyPtr) {
+ // In PIC mode, the function stub is loading a lazy-ptr.
+ LazyPtr= (intptr_t)emitGlobalValueIndirectSym((GlobalValue*)F, Fn, JCE);
+ if (F)
+ DOUT << "JIT: Indirect symbol emitted at [" << LazyPtr << "] for GV '"
+ << F->getName() << "'\n";
+ else
+ DOUT << "JIT: Stub emitted at [" << LazyPtr
+ << "] for external function at '" << Fn << "'\n";
+ }
+ JCE.startGVStub(F, 16, 4);
+ intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
+ JCE.emitWordLE(0xe59fc004); // ldr pc, [pc, #+4]
+ JCE.emitWordLE(0xe08fc00c); // L_func$scv: add ip, pc, ip
+ JCE.emitWordLE(0xe59cf000); // ldr pc, [ip]
+ JCE.emitWordLE(LazyPtr - (Addr+4+8)); // func - (L_func$scv+8)
+ sys::Memory::InvalidateInstructionCache((void*)Addr, 16);
+ } else {
+ // The stub is 8-byte size and 4-aligned.
+ JCE.startGVStub(F, 8, 4);
+ intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
+ JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
+ JCE.emitWordLE((intptr_t)Fn); // addr of function
+ sys::Memory::InvalidateInstructionCache((void*)Addr, 8);
+ }
+ } else {
+ // The compilation callback will overwrite the first two words of this
+ // stub with indirect branch instructions targeting the compiled code.
+ // This stub sets the return address to restart the stub, so that
+ // the new branch will be invoked when we come back.
+ //
+ // Branch and link to the compilation callback.
+ // The stub is 16-byte size and 4-byte aligned.
+ JCE.startGVStub(F, 16, 4);
+ intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
+ // Save LR so the callback can determine which stub called it.
+ // The compilation callback is responsible for popping this prior
+ // to returning.
+ JCE.emitWordLE(0xe92d4000); // push {lr}
+ // Set the return address to go back to the start of this stub.
+ JCE.emitWordLE(0xe24fe00c); // sub lr, pc, #12
+ // Invoke the compilation callback.
+ JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
+ // The address of the compilation callback.
+ JCE.emitWordLE((intptr_t)ARMCompilationCallback);
+ sys::Memory::InvalidateInstructionCache((void*)Addr, 16);
+ }
+
+ return JCE.finishGVStub(F);
+}
+
+intptr_t ARMJITInfo::resolveRelocDestAddr(MachineRelocation *MR) const {
+ ARM::RelocationType RT = (ARM::RelocationType)MR->getRelocationType();
+ switch (RT) {
+ default:
+ return (intptr_t)(MR->getResultPointer());
+ case ARM::reloc_arm_pic_jt:
+ // Destination address - jump table base.
+ return (intptr_t)(MR->getResultPointer()) - MR->getConstantVal();
+ case ARM::reloc_arm_jt_base:
+ // Jump table base address.
+ return getJumpTableBaseAddr(MR->getJumpTableIndex());
+ case ARM::reloc_arm_cp_entry:
+ case ARM::reloc_arm_vfp_cp_entry:
+ // Constant pool entry address.
+ return getConstantPoolEntryAddr(MR->getConstantPoolIndex());
+ case ARM::reloc_arm_machine_cp_entry: {
+ ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)MR->getConstantVal();
+ assert((!ACPV->hasModifier() && !ACPV->mustAddCurrentAddress()) &&
+ "Can't handle this machine constant pool entry yet!");
+ intptr_t Addr = (intptr_t)(MR->getResultPointer());
+ Addr -= getPCLabelAddr(ACPV->getLabelId()) + ACPV->getPCAdjustment();
+ return Addr;
+ }
+ }
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
+ intptr_t ResultPtr = resolveRelocDestAddr(MR);
+ switch ((ARM::RelocationType)MR->getRelocationType()) {
+ case ARM::reloc_arm_cp_entry:
+ case ARM::reloc_arm_vfp_cp_entry:
+ case ARM::reloc_arm_relative: {
+ // It is necessary to calculate the correct PC relative value. We
+ // subtract the base addr from the target addr to form a byte offset.
+ ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+ // If the result is positive, set bit U(23) to 1.
+ if (ResultPtr >= 0)
+ *((intptr_t*)RelocPos) |= 1 << ARMII::U_BitShift;
+ else {
+ // Otherwise, obtain the absolute value and set bit U(23) to 0.
+ *((intptr_t*)RelocPos) &= ~(1 << ARMII::U_BitShift);
+ ResultPtr = - ResultPtr;
+ }
+ // Set the immed value calculated.
+ // VFP immediate offset is multiplied by 4.
+ if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry)
+ ResultPtr = ResultPtr >> 2;
+ *((intptr_t*)RelocPos) |= ResultPtr;
+ // Set register Rn to PC.
+ *((intptr_t*)RelocPos) |=
+ ARMRegisterInfo::getRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+ break;
+ }
+ case ARM::reloc_arm_pic_jt:
+ case ARM::reloc_arm_machine_cp_entry:
+ case ARM::reloc_arm_absolute: {
+ // These addresses have already been resolved.
+ *((intptr_t*)RelocPos) |= (intptr_t)ResultPtr;
+ break;
+ }
+ case ARM::reloc_arm_branch: {
+ // It is necessary to calculate the correct value of signed_immed_24
+ // field. We subtract the base addr from the target addr to form a
+ // byte offset, which must be inside the range -33554432 and +33554428.
+ // Then, we set the signed_immed_24 field of the instruction to bits
+ // [25:2] of the byte offset. More details ARM-ARM p. A4-11.
+ ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+ ResultPtr = (ResultPtr & 0x03FFFFFC) >> 2;
+ assert(ResultPtr >= -33554432 && ResultPtr <= 33554428);
+ *((intptr_t*)RelocPos) |= ResultPtr;
+ break;
+ }
+ case ARM::reloc_arm_jt_base: {
+ // JT base - (instruction addr + 8)
+ ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+ *((intptr_t*)RelocPos) |= ResultPtr;
+ break;
+ }
+ }
+ }
+}
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
new file mode 100644
index 0000000..7dfeed8
--- /dev/null
+++ b/lib/Target/ARM/ARMJITInfo.h
@@ -0,0 +1,178 @@
+//===- ARMJITInfo.h - ARM implementation of the JIT interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the ARMJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMJITINFO_H
+#define ARMJITINFO_H
+
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+ class ARMTargetMachine;
+
+ class ARMJITInfo : public TargetJITInfo {
+ // ConstPoolId2AddrMap - A map from constant pool ids to the corresponding
+ // CONSTPOOL_ENTRY addresses.
+ SmallVector<intptr_t, 16> ConstPoolId2AddrMap;
+
+ // JumpTableId2AddrMap - A map from inline jumptable ids to the
+ // corresponding inline jump table bases.
+ SmallVector<intptr_t, 16> JumpTableId2AddrMap;
+
+ // PCLabelMap - A map from PC labels to addresses.
+ DenseMap<unsigned, intptr_t> PCLabelMap;
+
+ // Sym2IndirectSymMap - A map from symbol (GlobalValue and ExternalSymbol)
+ // addresses to their indirect symbol addresses.
+ DenseMap<void*, intptr_t> Sym2IndirectSymMap;
+
+ // IsPIC - True if the relocation model is PIC. This is used to determine
+ // how to codegen function stubs.
+ bool IsPIC;
+
+ public:
+ explicit ARMJITInfo() : IsPIC(false) { useGOT = false; }
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+ /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object
+ /// to emit an indirect symbol which contains the address of the specified
+ /// ptr.
+ virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+ JITCodeEmitter &JCE);
+
+ /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
+ /// small native function that simply calls the function at the specified
+ /// address.
+ virtual void *emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE);
+
+ /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+ /// relocate - Before the JIT can run a block of code that has been emitted,
+ /// it must rewrite the code to contain the actual addresses of any
+ /// referenced global symbols.
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+
+ /// hasCustomConstantPool - Allows a target to specify that constant
+ /// pool address resolution is handled by the target.
+ virtual bool hasCustomConstantPool() const { return true; }
+
+ /// hasCustomJumpTables - Allows a target to specify that jumptables
+ /// are emitted by the target.
+ virtual bool hasCustomJumpTables() const { return true; }
+
+ /// allocateSeparateGVMemory - If true, globals should be placed in
+ /// separately allocated heap memory rather than in the same
+ /// code memory allocated by JITCodeEmitter.
+ virtual bool allocateSeparateGVMemory() const {
+#ifdef __APPLE__
+ return true;
+#else
+ return false;
+#endif
+ }
+
+ /// Initialize - Initialize internal stage for the function being JITted.
+ /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize
+ /// jump table ids to jump table bases map; remember if codegen relocation
+ /// model is PIC.
+ void Initialize(const MachineFunction &MF, bool isPIC) {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ ConstPoolId2AddrMap.resize(AFI->getNumConstPoolEntries());
+ JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
+ IsPIC = isPIC;
+ }
+
+ /// getConstantPoolEntryAddr - The ARM target puts all constant
+ /// pool entries into constant islands. This returns the address of the
+ /// constant pool entry of the specified index.
+ intptr_t getConstantPoolEntryAddr(unsigned CPI) const {
+ assert(CPI < ConstPoolId2AddrMap.size());
+ return ConstPoolId2AddrMap[CPI];
+ }
+
+ /// addConstantPoolEntryAddr - Map a Constant Pool Index to the address
+ /// where its associated value is stored. When relocations are processed,
+ /// this value will be used to resolve references to the constant.
+ void addConstantPoolEntryAddr(unsigned CPI, intptr_t Addr) {
+ assert(CPI < ConstPoolId2AddrMap.size());
+ ConstPoolId2AddrMap[CPI] = Addr;
+ }
+
+ /// getJumpTableBaseAddr - The ARM target inline all jump tables within
+ /// text section of the function. This returns the address of the base of
+ /// the jump table of the specified index.
+ intptr_t getJumpTableBaseAddr(unsigned JTI) const {
+ assert(JTI < JumpTableId2AddrMap.size());
+ return JumpTableId2AddrMap[JTI];
+ }
+
+ /// addJumpTableBaseAddr - Map a jump table index to the address where
+ /// the corresponding inline jump table is emitted. When relocations are
+ /// processed, this value will be used to resolve references to the
+ /// jump table.
+ void addJumpTableBaseAddr(unsigned JTI, intptr_t Addr) {
+ assert(JTI < JumpTableId2AddrMap.size());
+ JumpTableId2AddrMap[JTI] = Addr;
+ }
+
+ /// getPCLabelAddr - Retrieve the address of the PC label of the specified id.
+ intptr_t getPCLabelAddr(unsigned Id) const {
+ DenseMap<unsigned, intptr_t>::const_iterator I = PCLabelMap.find(Id);
+ assert(I != PCLabelMap.end());
+ return I->second;
+ }
+
+ /// addPCLabelAddr - Remember the address of the specified PC label.
+ void addPCLabelAddr(unsigned Id, intptr_t Addr) {
+ PCLabelMap.insert(std::make_pair(Id, Addr));
+ }
+
+ /// getIndirectSymAddr - Retrieve the address of the indirect symbol of the
+ /// specified symbol located at address. Returns 0 if the indirect symbol
+ /// has not been emitted.
+ intptr_t getIndirectSymAddr(void *Addr) const {
+ DenseMap<void*,intptr_t>::const_iterator I= Sym2IndirectSymMap.find(Addr);
+ if (I != Sym2IndirectSymMap.end())
+ return I->second;
+ return 0;
+ }
+
+ /// addIndirectSymAddr - Add a mapping from address of an emitted symbol to
+ /// its indirect symbol address.
+ void addIndirectSymAddr(void *SymAddr, intptr_t IndSymAddr) {
+ Sym2IndirectSymMap.insert(std::make_pair(SymAddr, IndSymAddr));
+ }
+
+ private:
+ /// resolveRelocDestAddr - Resolve the resulting address of the relocation
+ /// if it's not already solved. Constantpool entries must be resolved by
+ /// ARM target.
+ intptr_t resolveRelocDestAddr(MachineRelocation *MR) const;
+ };
+}
+
+#endif
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
new file mode 100644
index 0000000..047552f
--- /dev/null
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -0,0 +1,778 @@
+//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that performs load / store related peephole
+// optimizations. This pass should be run after register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-ldst-opt"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+STATISTIC(NumLDMGened , "Number of ldm instructions generated");
+STATISTIC(NumSTMGened , "Number of stm instructions generated");
+STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
+STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
+
+namespace {
+ struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass {
+ static char ID;
+ ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
+
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ ARMFunctionInfo *AFI;
+ RegScavenger *RS;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM load / store optimization pass";
+ }
+
+ private:
+ struct MemOpQueueEntry {
+ int Offset;
+ unsigned Position;
+ MachineBasicBlock::iterator MBBI;
+ bool Merged;
+ MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
+ : Offset(o), Position(p), MBBI(i), Merged(false) {};
+ };
+ typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
+ typedef MemOpQueue::iterator MemOpQueueIter;
+
+ SmallVector<MachineBasicBlock::iterator, 4>
+ MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
+ int Opcode, unsigned Size,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch, MemOpQueue &MemOps);
+
+ void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
+ bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
+ bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
+ };
+ char ARMLoadStoreOpt::ID = 0;
+}
+
+/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
+/// optimization pass.
+FunctionPass *llvm::createARMLoadStoreOptimizationPass() {
+ return new ARMLoadStoreOpt();
+}
+
+static int getLoadStoreMultipleOpcode(int Opcode) {
+ switch (Opcode) {
+ case ARM::LDR:
+ NumLDMGened++;
+ return ARM::LDM;
+ case ARM::STR:
+ NumSTMGened++;
+ return ARM::STM;
+ case ARM::FLDS:
+ NumFLDMGened++;
+ return ARM::FLDMS;
+ case ARM::FSTS:
+ NumFSTMGened++;
+ return ARM::FSTMS;
+ case ARM::FLDD:
+ NumFLDMGened++;
+ return ARM::FLDMD;
+ case ARM::FSTD:
+ NumFSTMGened++;
+ return ARM::FSTMD;
+ default: abort();
+ }
+ return 0;
+}
+
+/// mergeOps - Create and insert a LDM or STM with Base as base register and
+/// registers in Regs as the register operands that would be loaded / stored.
+/// It returns true if the transformation is done.
+static bool mergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ int Offset, unsigned Base, bool BaseKill, int Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
+ SmallVector<std::pair<unsigned, bool>, 8> &Regs,
+ const TargetInstrInfo *TII) {
+ // FIXME would it be better to take a DL from one of the loads arbitrarily?
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ // Only a single register to load / store. Don't bother.
+ unsigned NumRegs = Regs.size();
+ if (NumRegs <= 1)
+ return false;
+
+ ARM_AM::AMSubMode Mode = ARM_AM::ia;
+ bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
+ if (isAM4 && Offset == 4)
+ Mode = ARM_AM::ib;
+ else if (isAM4 && Offset == -4 * (int)NumRegs + 4)
+ Mode = ARM_AM::da;
+ else if (isAM4 && Offset == -4 * (int)NumRegs)
+ Mode = ARM_AM::db;
+ else if (Offset != 0) {
+ // If starting offset isn't zero, insert a MI to materialize a new base.
+ // But only do so if it is cost effective, i.e. merging more than two
+ // loads / stores.
+ if (NumRegs <= 2)
+ return false;
+
+ unsigned NewBase;
+ if (Opcode == ARM::LDR)
+ // If it is a load, then just use one of the destination register to
+ // use as the new base.
+ NewBase = Regs[NumRegs-1].first;
+ else {
+ // Use the scratch register to use as a new base.
+ NewBase = Scratch;
+ if (NewBase == 0)
+ return false;
+ }
+ int BaseOpc = ARM::ADDri;
+ if (Offset < 0) {
+ BaseOpc = ARM::SUBri;
+ Offset = - Offset;
+ }
+ int ImmedOffset = ARM_AM::getSOImmVal(Offset);
+ if (ImmedOffset == -1)
+ return false; // Probably not worth it then.
+
+ BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
+ .addReg(Base, getKillRegState(BaseKill)).addImm(ImmedOffset)
+ .addImm(Pred).addReg(PredReg).addReg(0);
+ Base = NewBase;
+ BaseKill = true; // New base is always killed right its use.
+ }
+
+ bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD;
+ bool isDef = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+ Opcode = getLoadStoreMultipleOpcode(Opcode);
+ MachineInstrBuilder MIB = (isAM4)
+ ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
+ : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
+ .addImm(Pred).addReg(PredReg);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
+ | getKillRegState(Regs[i].second));
+
+ return true;
+}
+
+/// MergeLDR_STR - Merge a number of load / store instructions into one or more
+/// load / store multiple instructions.
+SmallVector<MachineBasicBlock::iterator, 4>
+ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
+ unsigned Base, int Opcode, unsigned Size,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch, MemOpQueue &MemOps) {
+ SmallVector<MachineBasicBlock::iterator, 4> Merges;
+ bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
+ int Offset = MemOps[SIndex].Offset;
+ int SOffset = Offset;
+ unsigned Pos = MemOps[SIndex].Position;
+ MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
+ unsigned PReg = MemOps[SIndex].MBBI->getOperand(0).getReg();
+ unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
+ bool isKill = MemOps[SIndex].MBBI->getOperand(0).isKill();
+
+ SmallVector<std::pair<unsigned,bool>, 8> Regs;
+ Regs.push_back(std::make_pair(PReg, isKill));
+ for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
+ int NewOffset = MemOps[i].Offset;
+ unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg();
+ unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
+ isKill = MemOps[i].MBBI->getOperand(0).isKill();
+ // AM4 - register numbers in ascending order.
+ // AM5 - consecutive register numbers in ascending order.
+ if (NewOffset == Offset + (int)Size &&
+ ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
+ Offset += Size;
+ Regs.push_back(std::make_pair(Reg, isKill));
+ PRegNum = RegNum;
+ } else {
+ // Can't merge this in. Try merge the earlier ones first.
+ if (mergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg,
+ Scratch, Regs, TII)) {
+ Merges.push_back(prior(Loc));
+ for (unsigned j = SIndex; j < i; ++j) {
+ MBB.erase(MemOps[j].MBBI);
+ MemOps[j].Merged = true;
+ }
+ }
+ SmallVector<MachineBasicBlock::iterator, 4> Merges2 =
+ MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,MemOps);
+ Merges.append(Merges2.begin(), Merges2.end());
+ return Merges;
+ }
+
+ if (MemOps[i].Position > Pos) {
+ Pos = MemOps[i].Position;
+ Loc = MemOps[i].MBBI;
+ }
+ }
+
+ bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
+ if (mergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg,
+ Scratch, Regs, TII)) {
+ Merges.push_back(prior(Loc));
+ for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) {
+ MBB.erase(MemOps[i].MBBI);
+ MemOps[i].Merged = true;
+ }
+ }
+
+ return Merges;
+}
+
+/// getInstrPredicate - If instruction is predicated, returns its predicate
+/// condition, otherwise returns AL. It also returns the condition code
+/// register by reference.
+static ARMCC::CondCodes getInstrPredicate(MachineInstr *MI, unsigned &PredReg) {
+ int PIdx = MI->findFirstPredOperandIdx();
+ if (PIdx == -1) {
+ PredReg = 0;
+ return ARMCC::AL;
+ }
+
+ PredReg = MI->getOperand(PIdx+1).getReg();
+ return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
+}
+
+static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
+ unsigned Bytes, ARMCC::CondCodes Pred,
+ unsigned PredReg) {
+ unsigned MyPredReg = 0;
+ return (MI && MI->getOpcode() == ARM::SUBri &&
+ MI->getOperand(0).getReg() == Base &&
+ MI->getOperand(1).getReg() == Base &&
+ ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes &&
+ getInstrPredicate(MI, MyPredReg) == Pred &&
+ MyPredReg == PredReg);
+}
+
+static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
+ unsigned Bytes, ARMCC::CondCodes Pred,
+ unsigned PredReg) {
+ unsigned MyPredReg = 0;
+ return (MI && MI->getOpcode() == ARM::ADDri &&
+ MI->getOperand(0).getReg() == Base &&
+ MI->getOperand(1).getReg() == Base &&
+ ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes &&
+ getInstrPredicate(MI, MyPredReg) == Pred &&
+ MyPredReg == PredReg);
+}
+
+static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: return 0;
+ case ARM::LDR:
+ case ARM::STR:
+ case ARM::FLDS:
+ case ARM::FSTS:
+ return 4;
+ case ARM::FLDD:
+ case ARM::FSTD:
+ return 8;
+ case ARM::LDM:
+ case ARM::STM:
+ return (MI->getNumOperands() - 4) * 4;
+ case ARM::FLDMS:
+ case ARM::FSTMS:
+ case ARM::FLDMD:
+ case ARM::FSTMD:
+ return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
+ }
+}
+
+/// mergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
+/// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
+///
+/// stmia rn, <ra, rb, rc>
+/// rn := rn + 4 * 3;
+/// =>
+/// stmia rn!, <ra, rb, rc>
+///
+/// rn := rn - 4 * 3;
+/// ldmia rn, <ra, rb, rc>
+/// =>
+/// ldmdb rn!, <ra, rb, rc>
+static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ bool &Advance,
+ MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = MBBI;
+ unsigned Base = MI->getOperand(0).getReg();
+ unsigned Bytes = getLSMultipleTransferSize(MI);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ int Opcode = MI->getOpcode();
+ bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::STM;
+
+ if (isAM4) {
+ if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
+ return false;
+
+ // Can't use the updating AM4 sub-mode if the base register is also a dest
+ // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
+ for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
+ if (MI->getOperand(i).getReg() == Base)
+ return false;
+ }
+
+ ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
+ if (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ if (Mode == ARM_AM::ia &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
+ MBB.erase(PrevMBBI);
+ return true;
+ } else if (Mode == ARM_AM::ib &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
+ MBB.erase(PrevMBBI);
+ return true;
+ }
+ }
+
+ if (MBBI != MBB.end()) {
+ MachineBasicBlock::iterator NextMBBI = next(MBBI);
+ if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
+ isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
+ if (NextMBBI == I) {
+ Advance = true;
+ ++I;
+ }
+ MBB.erase(NextMBBI);
+ return true;
+ } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
+ isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
+ if (NextMBBI == I) {
+ Advance = true;
+ ++I;
+ }
+ MBB.erase(NextMBBI);
+ return true;
+ }
+ }
+ } else {
+ // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
+ if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
+ return false;
+
+ ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
+ unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
+ if (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ if (Mode == ARM_AM::ia &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
+ MBB.erase(PrevMBBI);
+ return true;
+ }
+ }
+
+ if (MBBI != MBB.end()) {
+ MachineBasicBlock::iterator NextMBBI = next(MBBI);
+ if (Mode == ARM_AM::ia &&
+ isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
+ if (NextMBBI == I) {
+ Advance = true;
+ ++I;
+ }
+ MBB.erase(NextMBBI);
+ }
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
+ switch (Opc) {
+ case ARM::LDR: return ARM::LDR_PRE;
+ case ARM::STR: return ARM::STR_PRE;
+ case ARM::FLDS: return ARM::FLDMS;
+ case ARM::FLDD: return ARM::FLDMD;
+ case ARM::FSTS: return ARM::FSTMS;
+ case ARM::FSTD: return ARM::FSTMD;
+ default: abort();
+ }
+ return 0;
+}
+
+static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
+ switch (Opc) {
+ case ARM::LDR: return ARM::LDR_POST;
+ case ARM::STR: return ARM::STR_POST;
+ case ARM::FLDS: return ARM::FLDMS;
+ case ARM::FLDD: return ARM::FLDMD;
+ case ARM::FSTS: return ARM::FSTMS;
+ case ARM::FSTD: return ARM::FSTMD;
+ default: abort();
+ }
+ return 0;
+}
+
+/// mergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
+/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
+static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const TargetInstrInfo *TII,
+ bool &Advance,
+ MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = MBBI;
+ unsigned Base = MI->getOperand(1).getReg();
+ bool BaseKill = MI->getOperand(1).isKill();
+ unsigned Bytes = getLSMultipleTransferSize(MI);
+ int Opcode = MI->getOpcode();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
+ if ((isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) ||
+ (!isAM2 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0))
+ return false;
+
+ bool isLd = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+ // Can't do the merge if the destination register is the same as the would-be
+ // writeback register.
+ if (isLd && MI->getOperand(0).getReg() == Base)
+ return false;
+
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ bool DoMerge = false;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ unsigned NewOpc = 0;
+ if (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ if (isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
+ DoMerge = true;
+ AddSub = ARM_AM::sub;
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
+ } else if (isAM2 && isMatchingIncrement(PrevMBBI, Base, Bytes,
+ Pred, PredReg)) {
+ DoMerge = true;
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
+ }
+ if (DoMerge)
+ MBB.erase(PrevMBBI);
+ }
+
+ if (!DoMerge && MBBI != MBB.end()) {
+ MachineBasicBlock::iterator NextMBBI = next(MBBI);
+ if (isAM2 && isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+ DoMerge = true;
+ AddSub = ARM_AM::sub;
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
+ } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+ DoMerge = true;
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
+ }
+ if (DoMerge) {
+ if (NextMBBI == I) {
+ Advance = true;
+ ++I;
+ }
+ MBB.erase(NextMBBI);
+ }
+ }
+
+ if (!DoMerge)
+ return false;
+
+ bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
+ unsigned Offset = isAM2 ? ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift)
+ : ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) ? ARM_AM::db : ARM_AM::ia,
+ true, isDPR ? 2 : 1);
+ if (isLd) {
+ if (isAM2)
+ // LDR_PRE, LDR_POST;
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ else
+ // FLDMS, FLDMD
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(Offset).addImm(Pred).addReg(PredReg)
+ .addReg(MI->getOperand(0).getReg(), RegState::Define);
+ } else {
+ MachineOperand &MO = MI->getOperand(0);
+ if (isAM2)
+ // STR_PRE, STR_POST;
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+ .addReg(MO.getReg(), getKillRegState(BaseKill))
+ .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ else
+ // FSTMS, FSTMD
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
+ .addImm(Pred).addReg(PredReg)
+ .addReg(MO.getReg(), getKillRegState(MO.isKill()));
+ }
+ MBB.erase(MBBI);
+
+ return true;
+}
+
+/// isMemoryOp - Returns true if instruction is a memory operations (that this
+/// pass is capable of operating on).
+static bool isMemoryOp(MachineInstr *MI) {
+ int Opcode = MI->getOpcode();
+ switch (Opcode) {
+ default: break;
+ case ARM::LDR:
+ case ARM::STR:
+ return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
+ case ARM::FLDS:
+ case ARM::FSTS:
+ return MI->getOperand(1).isReg();
+ case ARM::FLDD:
+ case ARM::FSTD:
+ return MI->getOperand(1).isReg();
+ }
+ return false;
+}
+
+/// AdvanceRS - Advance register scavenger to just before the earliest memory
+/// op that is being merged.
+void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
+ MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
+ unsigned Position = MemOps[0].Position;
+ for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
+ if (MemOps[i].Position < Position) {
+ Position = MemOps[i].Position;
+ Loc = MemOps[i].MBBI;
+ }
+ }
+
+ if (Loc != MBB.begin())
+ RS->forward(prior(Loc));
+}
+
+/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
+/// ops of the same base and incrementing offset into LDM / STM ops.
+bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
+ unsigned NumMerges = 0;
+ unsigned NumMemOps = 0;
+ MemOpQueue MemOps;
+ unsigned CurrBase = 0;
+ int CurrOpc = -1;
+ unsigned CurrSize = 0;
+ ARMCC::CondCodes CurrPred = ARMCC::AL;
+ unsigned CurrPredReg = 0;
+ unsigned Position = 0;
+
+ RS->enterBasicBlock(&MBB);
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ bool Advance = false;
+ bool TryMerge = false;
+ bool Clobber = false;
+
+ bool isMemOp = isMemoryOp(MBBI);
+ if (isMemOp) {
+ int Opcode = MBBI->getOpcode();
+ bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
+ unsigned Size = getLSMultipleTransferSize(MBBI);
+ unsigned Base = MBBI->getOperand(1).getReg();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
+ unsigned NumOperands = MBBI->getDesc().getNumOperands();
+ unsigned OffField = MBBI->getOperand(NumOperands-3).getImm();
+ int Offset = isAM2
+ ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4;
+ if (isAM2) {
+ if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
+ Offset = -Offset;
+ } else {
+ if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
+ Offset = -Offset;
+ }
+ // Watch out for:
+ // r4 := ldr [r5]
+ // r5 := ldr [r5, #4]
+ // r6 := ldr [r5, #8]
+ //
+ // The second ldr has effectively broken the chain even though it
+ // looks like the later ldr(s) use the same base register. Try to
+ // merge the ldr's so far, including this one. But don't try to
+ // combine the following ldr(s).
+ Clobber = (Opcode == ARM::LDR && Base == MBBI->getOperand(0).getReg());
+ if (CurrBase == 0 && !Clobber) {
+ // Start of a new chain.
+ CurrBase = Base;
+ CurrOpc = Opcode;
+ CurrSize = Size;
+ CurrPred = Pred;
+ CurrPredReg = PredReg;
+ MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
+ NumMemOps++;
+ Advance = true;
+ } else {
+ if (Clobber) {
+ TryMerge = true;
+ Advance = true;
+ }
+
+ if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
+ // No need to match PredReg.
+ // Continue adding to the queue.
+ if (Offset > MemOps.back().Offset) {
+ MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
+ NumMemOps++;
+ Advance = true;
+ } else {
+ for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
+ I != E; ++I) {
+ if (Offset < I->Offset) {
+ MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
+ NumMemOps++;
+ Advance = true;
+ break;
+ } else if (Offset == I->Offset) {
+ // Collision! This can't be merged!
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (Advance) {
+ ++Position;
+ ++MBBI;
+ } else
+ TryMerge = true;
+
+ if (TryMerge) {
+ if (NumMemOps > 1) {
+ // Try to find a free register to use as a new base in case it's needed.
+ // First advance to the instruction just before the start of the chain.
+ AdvanceRS(MBB, MemOps);
+ // Find a scratch register. Make sure it's a call clobbered register or
+ // a spilled callee-saved register.
+ unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, true);
+ if (!Scratch)
+ Scratch = RS->FindUnusedReg(&ARM::GPRRegClass,
+ AFI->getSpilledCSRegisters());
+ // Process the load / store instructions.
+ RS->forward(prior(MBBI));
+
+ // Merge ops.
+ SmallVector<MachineBasicBlock::iterator,4> MBBII =
+ MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
+ CurrPred, CurrPredReg, Scratch, MemOps);
+
+ // Try folding preceeding/trailing base inc/dec into the generated
+ // LDM/STM ops.
+ for (unsigned i = 0, e = MBBII.size(); i < e; ++i)
+ if (mergeBaseUpdateLSMultiple(MBB, MBBII[i], Advance, MBBI))
+ NumMerges++;
+ NumMerges += MBBII.size();
+
+ // Try folding preceeding/trailing base inc/dec into those load/store
+ // that were not merged to form LDM/STM ops.
+ for (unsigned i = 0; i != NumMemOps; ++i)
+ if (!MemOps[i].Merged)
+ if (mergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
+ NumMerges++;
+
+ // RS may be pointing to an instruction that's deleted.
+ RS->skipTo(prior(MBBI));
+ }
+
+ CurrBase = 0;
+ CurrOpc = -1;
+ CurrSize = 0;
+ CurrPred = ARMCC::AL;
+ CurrPredReg = 0;
+ if (NumMemOps) {
+ MemOps.clear();
+ NumMemOps = 0;
+ }
+
+ // If iterator hasn't been advanced and this is not a memory op, skip it.
+ // It can't start a new chain anyway.
+ if (!Advance && !isMemOp && MBBI != E) {
+ ++Position;
+ ++MBBI;
+ }
+ }
+ }
+ return NumMerges > 0;
+}
+
+/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
+/// (bx lr) into the preceeding stack restore so it directly restore the value
+/// of LR into pc.
+/// ldmfd sp!, {r7, lr}
+/// bx lr
+/// =>
+/// ldmfd sp!, {r7, pc}
+bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
+ if (MBB.empty()) return false;
+
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ if (MBBI->getOpcode() == ARM::BX_RET && MBBI != MBB.begin()) {
+ MachineInstr *PrevMI = prior(MBBI);
+ if (PrevMI->getOpcode() == ARM::LDM) {
+ MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
+ if (MO.getReg() == ARM::LR) {
+ PrevMI->setDesc(TII->get(ARM::LDM_RET));
+ MO.setReg(ARM::PC);
+ MBB.erase(MBBI);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ const TargetMachine &TM = Fn.getTarget();
+ AFI = Fn.getInfo<ARMFunctionInfo>();
+ TII = TM.getInstrInfo();
+ TRI = TM.getRegisterInfo();
+ RS = new RegScavenger();
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ Modified |= LoadStoreMultipleOpti(MBB);
+ Modified |= MergeReturnIntoLDM(MBB);
+ }
+
+ delete RS;
+ return Modified;
+}
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
new file mode 100644
index 0000000..6662be1
--- /dev/null
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -0,0 +1,238 @@
+//====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares ARM-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMACHINEFUNCTIONINFO_H
+#define ARMMACHINEFUNCTIONINFO_H
+
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+
+namespace llvm {
+
+/// ARMFunctionInfo - This class is derived from MachineFunction private
+/// ARM target-specific information for each MachineFunction.
+class ARMFunctionInfo : public MachineFunctionInfo {
+
+ /// isThumb - True if this function is compiled under Thumb mode.
+ /// Used to initialized Align, so must precede it.
+ bool isThumb;
+
+ /// Align - required alignment. ARM functions and Thumb functions with
+ /// constant pools require 4-byte alignment; other Thumb functions
+ /// require only 2-byte alignment.
+ unsigned Align;
+
+ /// VarArgsRegSaveSize - Size of the register save area for vararg functions.
+ ///
+ unsigned VarArgsRegSaveSize;
+
+ /// HasStackFrame - True if this function has a stack frame. Set by
+ /// processFunctionBeforeCalleeSavedScan().
+ bool HasStackFrame;
+
+ /// LRSpilledForFarJump - True if the LR register has been for spilled to
+ /// enable far jump.
+ bool LRSpilledForFarJump;
+
+ /// R3IsLiveIn - True if R3 is live in to this function.
+ /// FIXME: Remove when register scavenger for Thumb is done.
+ bool R3IsLiveIn;
+
+ /// FramePtrSpillOffset - If HasStackFrame, this records the frame pointer
+ /// spill stack offset.
+ unsigned FramePtrSpillOffset;
+
+ /// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved
+ /// register spills areas. For Mac OS X:
+ ///
+ /// GPR callee-saved (1) : r4, r5, r6, r7, lr
+ /// --------------------------------------------
+ /// GPR callee-saved (2) : r8, r10, r11
+ /// --------------------------------------------
+ /// DPR callee-saved : d8 - d15
+ unsigned GPRCS1Offset;
+ unsigned GPRCS2Offset;
+ unsigned DPRCSOffset;
+
+ /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
+ /// areas.
+ unsigned GPRCS1Size;
+ unsigned GPRCS2Size;
+ unsigned DPRCSSize;
+
+ /// GPRCS1Frames, GPRCS2Frames, DPRCSFrames - Keeps track of frame indices
+ /// which belong to these spill areas.
+ BitVector GPRCS1Frames;
+ BitVector GPRCS2Frames;
+ BitVector DPRCSFrames;
+
+ /// SpilledCSRegs - A BitVector mask of all spilled callee-saved registers.
+ ///
+ BitVector SpilledCSRegs;
+
+ /// JumpTableUId - Unique id for jumptables.
+ ///
+ unsigned JumpTableUId;
+
+ unsigned ConstPoolEntryUId;
+
+public:
+ ARMFunctionInfo() :
+ isThumb(false),
+ Align(2U),
+ VarArgsRegSaveSize(0), HasStackFrame(false),
+ LRSpilledForFarJump(false), R3IsLiveIn(false),
+ FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+ GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+ GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
+ JumpTableUId(0), ConstPoolEntryUId(0) {}
+
+ ARMFunctionInfo(MachineFunction &MF) :
+ isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
+ Align(isThumb ? 1U : 2U),
+ VarArgsRegSaveSize(0), HasStackFrame(false),
+ LRSpilledForFarJump(false), R3IsLiveIn(false),
+ FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+ GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+ GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
+ SpilledCSRegs(MF.getTarget().getRegisterInfo()->getNumRegs()),
+ JumpTableUId(0), ConstPoolEntryUId(0) {}
+
+ bool isThumbFunction() const { return isThumb; }
+
+ unsigned getAlign() const { return Align; }
+ void setAlign(unsigned a) { Align = a; }
+
+ unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; }
+ void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; }
+
+ bool hasStackFrame() const { return HasStackFrame; }
+ void setHasStackFrame(bool s) { HasStackFrame = s; }
+
+ bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; }
+ void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; }
+
+ // FIXME: Remove when register scavenger for Thumb is done.
+ bool isR3LiveIn() const { return R3IsLiveIn; }
+ void setR3IsLiveIn(bool l) { R3IsLiveIn = l; }
+
+ unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; }
+ void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; }
+
+ unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; }
+ unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; }
+ unsigned getDPRCalleeSavedAreaOffset() const { return DPRCSOffset; }
+
+ void setGPRCalleeSavedArea1Offset(unsigned o) { GPRCS1Offset = o; }
+ void setGPRCalleeSavedArea2Offset(unsigned o) { GPRCS2Offset = o; }
+ void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; }
+
+ unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
+ unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
+ unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; }
+
+ void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
+ void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
+ void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; }
+
+ bool isGPRCalleeSavedArea1Frame(int fi) const {
+ if (fi < 0 || fi >= (int)GPRCS1Frames.size())
+ return false;
+ return GPRCS1Frames[fi];
+ }
+ bool isGPRCalleeSavedArea2Frame(int fi) const {
+ if (fi < 0 || fi >= (int)GPRCS2Frames.size())
+ return false;
+ return GPRCS2Frames[fi];
+ }
+ bool isDPRCalleeSavedAreaFrame(int fi) const {
+ if (fi < 0 || fi >= (int)DPRCSFrames.size())
+ return false;
+ return DPRCSFrames[fi];
+ }
+
+ void addGPRCalleeSavedArea1Frame(int fi) {
+ if (fi >= 0) {
+ int Size = GPRCS1Frames.size();
+ if (fi >= Size) {
+ Size *= 2;
+ if (fi >= Size)
+ Size = fi+1;
+ GPRCS1Frames.resize(Size);
+ }
+ GPRCS1Frames[fi] = true;
+ }
+ }
+ void addGPRCalleeSavedArea2Frame(int fi) {
+ if (fi >= 0) {
+ int Size = GPRCS2Frames.size();
+ if (fi >= Size) {
+ Size *= 2;
+ if (fi >= Size)
+ Size = fi+1;
+ GPRCS2Frames.resize(Size);
+ }
+ GPRCS2Frames[fi] = true;
+ }
+ }
+ void addDPRCalleeSavedAreaFrame(int fi) {
+ if (fi >= 0) {
+ int Size = DPRCSFrames.size();
+ if (fi >= Size) {
+ Size *= 2;
+ if (fi >= Size)
+ Size = fi+1;
+ DPRCSFrames.resize(Size);
+ }
+ DPRCSFrames[fi] = true;
+ }
+ }
+
+ void setCSRegisterIsSpilled(unsigned Reg) {
+ SpilledCSRegs.set(Reg);
+ }
+
+ bool isCSRegisterSpilled(unsigned Reg) const {
+ return SpilledCSRegs[Reg];
+ }
+
+ const BitVector &getSpilledCSRegisters() const {
+ return SpilledCSRegs;
+ }
+
+ unsigned createJumpTableUId() {
+ return JumpTableUId++;
+ }
+
+ unsigned getNumJumpTables() const {
+ return JumpTableUId;
+ }
+
+ void initConstPoolEntryUId(unsigned UId) {
+ ConstPoolEntryUId = UId;
+ }
+
+ unsigned getNumConstPoolEntries() const {
+ return ConstPoolEntryUId;
+ }
+
+ unsigned createConstPoolEntryUId() {
+ return ConstPoolEntryUId++;
+ }
+};
+} // End llvm namespace
+
+#endif // ARMMACHINEFUNCTIONINFO_H
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
new file mode 100644
index 0000000..199858f
--- /dev/null
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -0,0 +1,1528 @@
+//===- ARMRegisterInfo.cpp - ARM Register Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include <algorithm>
+using namespace llvm;
+
+static cl::opt<bool> ThumbRegScavenging("enable-thumb-reg-scavenging",
+ cl::Hidden,
+ cl::desc("Enable register scavenging on Thumb"));
+
+unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+ using namespace ARM;
+ switch (RegEnum) {
+ case R0: case S0: case D0: return 0;
+ case R1: case S1: case D1: return 1;
+ case R2: case S2: case D2: return 2;
+ case R3: case S3: case D3: return 3;
+ case R4: case S4: case D4: return 4;
+ case R5: case S5: case D5: return 5;
+ case R6: case S6: case D6: return 6;
+ case R7: case S7: case D7: return 7;
+ case R8: case S8: case D8: return 8;
+ case R9: case S9: case D9: return 9;
+ case R10: case S10: case D10: return 10;
+ case R11: case S11: case D11: return 11;
+ case R12: case S12: case D12: return 12;
+ case SP: case S13: case D13: return 13;
+ case LR: case S14: case D14: return 14;
+ case PC: case S15: case D15: return 15;
+ case S16: return 16;
+ case S17: return 17;
+ case S18: return 18;
+ case S19: return 19;
+ case S20: return 20;
+ case S21: return 21;
+ case S22: return 22;
+ case S23: return 23;
+ case S24: return 24;
+ case S25: return 25;
+ case S26: return 26;
+ case S27: return 27;
+ case S28: return 28;
+ case S29: return 29;
+ case S30: return 30;
+ case S31: return 31;
+ default:
+ assert(0 && "Unknown ARM register!");
+ abort();
+ }
+}
+
+unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum,
+ bool &isSPVFP) {
+ isSPVFP = false;
+
+ using namespace ARM;
+ switch (RegEnum) {
+ default:
+ assert(0 && "Unknown ARM register!");
+ abort();
+ case R0: case D0: return 0;
+ case R1: case D1: return 1;
+ case R2: case D2: return 2;
+ case R3: case D3: return 3;
+ case R4: case D4: return 4;
+ case R5: case D5: return 5;
+ case R6: case D6: return 6;
+ case R7: case D7: return 7;
+ case R8: case D8: return 8;
+ case R9: case D9: return 9;
+ case R10: case D10: return 10;
+ case R11: case D11: return 11;
+ case R12: case D12: return 12;
+ case SP: case D13: return 13;
+ case LR: case D14: return 14;
+ case PC: case D15: return 15;
+
+ case S0: case S1: case S2: case S3:
+ case S4: case S5: case S6: case S7:
+ case S8: case S9: case S10: case S11:
+ case S12: case S13: case S14: case S15:
+ case S16: case S17: case S18: case S19:
+ case S20: case S21: case S22: case S23:
+ case S24: case S25: case S26: case S27:
+ case S28: case S29: case S30: case S31: {
+ isSPVFP = true;
+ switch (RegEnum) {
+ default: return 0; // Avoid compile time warning.
+ case S0: return 0;
+ case S1: return 1;
+ case S2: return 2;
+ case S3: return 3;
+ case S4: return 4;
+ case S5: return 5;
+ case S6: return 6;
+ case S7: return 7;
+ case S8: return 8;
+ case S9: return 9;
+ case S10: return 10;
+ case S11: return 11;
+ case S12: return 12;
+ case S13: return 13;
+ case S14: return 14;
+ case S15: return 15;
+ case S16: return 16;
+ case S17: return 17;
+ case S18: return 18;
+ case S19: return 19;
+ case S20: return 20;
+ case S21: return 21;
+ case S22: return 22;
+ case S23: return 23;
+ case S24: return 24;
+ case S25: return 25;
+ case S26: return 26;
+ case S27: return 27;
+ case S28: return 28;
+ case S29: return 29;
+ case S30: return 30;
+ case S31: return 31;
+ }
+ }
+ }
+}
+
+ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii,
+ const ARMSubtarget &sti)
+ : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
+ TII(tii), STI(sti),
+ FramePtr((STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11) {
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) {
+ return MIB.addImm((int64_t)ARMCC::AL).addReg(0);
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
+ return MIB.addReg(0);
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void ARMRegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, int Val,
+ unsigned Pred, unsigned PredReg,
+ const TargetInstrInfo *TII,
+ bool isThumb,
+ DebugLoc dl) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineConstantPool *ConstantPool = MF.getConstantPool();
+ Constant *C = ConstantInt::get(Type::Int32Ty, Val);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+ if (isThumb)
+ BuildMI(MBB, MBBI, dl,
+ TII->get(ARM::tLDRcp),DestReg).addConstantPoolIndex(Idx);
+ else
+ BuildMI(MBB, MBBI, dl, TII->get(ARM::LDRcp), DestReg)
+ .addConstantPoolIndex(Idx)
+ .addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
+}
+
+const TargetRegisterClass *ARMRegisterInfo::getPointerRegClass() const {
+ return &ARM::GPRRegClass;
+}
+
+/// isLowRegister - Returns true if the register is low register r0-r7.
+///
+bool ARMRegisterInfo::isLowRegister(unsigned Reg) const {
+ using namespace ARM;
+ switch (Reg) {
+ case R0: case R1: case R2: case R3:
+ case R4: case R5: case R6: case R7:
+ return true;
+ default:
+ return false;
+ }
+}
+
+const TargetRegisterClass*
+ARMRegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, MVT VT) const {
+ if (STI.isThumb()) {
+ if (isLowRegister(Reg))
+ return ARM::tGPRRegisterClass;
+ switch (Reg) {
+ default:
+ break;
+ case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11:
+ case ARM::R12: case ARM::SP: case ARM::LR: case ARM::PC:
+ return ARM::GPRRegisterClass;
+ }
+ }
+ return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT);
+}
+
+const unsigned*
+ARMRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ static const unsigned CalleeSavedRegs[] = {
+ ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8,
+ ARM::R7, ARM::R6, ARM::R5, ARM::R4,
+
+ ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+ ARM::D11, ARM::D10, ARM::D9, ARM::D8,
+ 0
+ };
+
+ static const unsigned DarwinCalleeSavedRegs[] = {
+ ARM::LR, ARM::R7, ARM::R6, ARM::R5, ARM::R4,
+ ARM::R11, ARM::R10, ARM::R9, ARM::R8,
+
+ ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+ ARM::D11, ARM::D10, ARM::D9, ARM::D8,
+ 0
+ };
+ return STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs;
+}
+
+const TargetRegisterClass* const *
+ARMRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
+ &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+ &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+ &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+
+ &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+ &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+ 0
+ };
+ static const TargetRegisterClass * const ThumbCalleeSavedRegClasses[] = {
+ &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+ &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::tGPRRegClass,
+ &ARM::tGPRRegClass,&ARM::tGPRRegClass,&ARM::tGPRRegClass,
+
+ &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+ &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+ 0
+ };
+ return STI.isThumb() ? ThumbCalleeSavedRegClasses : CalleeSavedRegClasses;
+}
+
+BitVector ARMRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ // FIXME: avoid re-calculating this everytime.
+ BitVector Reserved(getNumRegs());
+ Reserved.set(ARM::SP);
+ Reserved.set(ARM::PC);
+ if (STI.isTargetDarwin() || hasFP(MF))
+ Reserved.set(FramePtr);
+ // Some targets reserve R9.
+ if (STI.isR9Reserved())
+ Reserved.set(ARM::R9);
+ return Reserved;
+}
+
+bool
+ARMRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const {
+ switch (Reg) {
+ default: break;
+ case ARM::SP:
+ case ARM::PC:
+ return true;
+ case ARM::R7:
+ case ARM::R11:
+ if (FramePtr == Reg && (STI.isTargetDarwin() || hasFP(MF)))
+ return true;
+ break;
+ case ARM::R9:
+ return STI.isR9Reserved();
+ }
+
+ return false;
+}
+
+bool
+ARMRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ return ThumbRegScavenging || !AFI->isThumbFunction();
+}
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register. This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+///
+bool ARMRegisterInfo::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return NoFramePointerElim || MFI->hasVarSizedObjects();
+}
+
+// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
+// not required, we reserve argument space for call sites in the function
+// immediately on entry to the current function. This eliminates the need for
+// add/sub sp brackets around call sites. Returns true if the call frame is
+// included as part of the stack frame.
+bool ARMRegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
+ const MachineFrameInfo *FFI = MF.getFrameInfo();
+ unsigned CFSize = FFI->getMaxCallFrameSize();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ // It's not always a good idea to include the call frame as part of the
+ // stack frame. ARM (especially Thumb) has small immediate offset to
+ // address the stack frame. So a large call frame can cause poor codegen
+ // and may even makes it impossible to scavenge a register.
+ if (AFI->isThumbFunction()) {
+ if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
+ return false;
+ } else {
+ if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
+ return false;
+ }
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// emitARMRegPlusImmediate - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in ARM code.
+static
+void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const TargetInstrInfo &TII,
+ DebugLoc dl) {
+ bool isSub = NumBytes < 0;
+ if (isSub) NumBytes = -NumBytes;
+
+ while (NumBytes) {
+ unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
+ unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
+ assert(ThisVal && "Didn't extract field correctly");
+
+ // We will handle these bits from offset, clear them.
+ NumBytes &= ~ThisVal;
+
+ // Get the properly encoded SOImmVal field.
+ int SOImmVal = ARM_AM::getSOImmVal(ThisVal);
+ assert(SOImmVal != -1 && "Bit extraction didn't work?");
+
+ // Build the new ADD / SUB.
+ BuildMI(MBB, MBBI, dl, TII.get(isSub ? ARM::SUBri : ARM::ADDri), DestReg)
+ .addReg(BaseReg, RegState::Kill).addImm(SOImmVal)
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+ BaseReg = DestReg;
+ }
+}
+
+/// calcNumMI - Returns the number of instructions required to materialize
+/// the specific add / sub r, c instruction.
+static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
+ unsigned NumBits, unsigned Scale) {
+ unsigned NumMIs = 0;
+ unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+
+ if (Opc == ARM::tADDrSPi) {
+ unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+ Bytes -= ThisVal;
+ NumMIs++;
+ NumBits = 8;
+ Scale = 1; // Followed by a number of tADDi8.
+ Chunk = ((1 << NumBits) - 1) * Scale;
+ }
+
+ NumMIs += Bytes / Chunk;
+ if ((Bytes % Chunk) != 0)
+ NumMIs++;
+ if (ExtraOpc)
+ NumMIs++;
+ return NumMIs;
+}
+
+/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
+/// in a register using mov / mvn sequences or load the immediate from a
+/// constpool entry.
+static
+void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, bool CanChangeCC,
+ const TargetInstrInfo &TII,
+ const ARMRegisterInfo& MRI,
+ DebugLoc dl) {
+ bool isHigh = !MRI.isLowRegister(DestReg) ||
+ (BaseReg != 0 && !MRI.isLowRegister(BaseReg));
+ bool isSub = false;
+ // Subtract doesn't have high register version. Load the negative value
+ // if either base or dest register is a high register. Also, if do not
+ // issue sub as part of the sequence if condition register is to be
+ // preserved.
+ if (NumBytes < 0 && !isHigh && CanChangeCC) {
+ isSub = true;
+ NumBytes = -NumBytes;
+ }
+ unsigned LdReg = DestReg;
+ if (DestReg == ARM::SP) {
+ assert(BaseReg == ARM::SP && "Unexpected!");
+ LdReg = ARM::R3;
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
+ .addReg(ARM::R3, RegState::Kill);
+ }
+
+ if (NumBytes <= 255 && NumBytes >= 0)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
+ else if (NumBytes < 0 && NumBytes >= -255) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), LdReg)
+ .addReg(LdReg, RegState::Kill);
+ } else
+ MRI.emitLoadConstPool(MBB, MBBI, LdReg, NumBytes, ARMCC::AL, 0, &TII,
+ true, dl);
+
+ // Emit add / sub.
+ int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
+ const MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl,
+ TII.get(Opc), DestReg);
+ if (DestReg == ARM::SP || isSub)
+ MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill);
+ else
+ MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
+ if (DestReg == ARM::SP)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVhir2lor), ARM::R3)
+ .addReg(ARM::R12, RegState::Kill);
+}
+
+/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code.
+static
+void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, const TargetInstrInfo &TII,
+ const ARMRegisterInfo& MRI,
+ DebugLoc dl) {
+ bool isSub = NumBytes < 0;
+ unsigned Bytes = (unsigned)NumBytes;
+ if (isSub) Bytes = -NumBytes;
+ bool isMul4 = (Bytes & 3) == 0;
+ bool isTwoAddr = false;
+ bool DstNotEqBase = false;
+ unsigned NumBits = 1;
+ unsigned Scale = 1;
+ int Opc = 0;
+ int ExtraOpc = 0;
+
+ if (DestReg == BaseReg && BaseReg == ARM::SP) {
+ assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
+ NumBits = 7;
+ Scale = 4;
+ Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ isTwoAddr = true;
+ } else if (!isSub && BaseReg == ARM::SP) {
+ // r1 = add sp, 403
+ // =>
+ // r1 = add sp, 100 * 4
+ // r1 = add r1, 3
+ if (!isMul4) {
+ Bytes &= ~3;
+ ExtraOpc = ARM::tADDi3;
+ }
+ NumBits = 8;
+ Scale = 4;
+ Opc = ARM::tADDrSPi;
+ } else {
+ // sp = sub sp, c
+ // r1 = sub sp, c
+ // r8 = sub sp, c
+ if (DestReg != BaseReg)
+ DstNotEqBase = true;
+ NumBits = 8;
+ Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+ isTwoAddr = true;
+ }
+
+ unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale);
+ unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2;
+ if (NumMIs > Threshold) {
+ // This will expand into too many instructions. Load the immediate from a
+ // constpool entry.
+ emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII,
+ MRI, dl);
+ return;
+ }
+
+ if (DstNotEqBase) {
+ if (MRI.isLowRegister(DestReg) && MRI.isLowRegister(BaseReg)) {
+ // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
+ unsigned Chunk = (1 << 3) - 1;
+ unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+ Bytes -= ThisVal;
+ BuildMI(MBB, MBBI, dl,TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3), DestReg)
+ .addReg(BaseReg, RegState::Kill).addImm(ThisVal);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
+ .addReg(BaseReg, RegState::Kill);
+ }
+ BaseReg = DestReg;
+ }
+
+ unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+ while (Bytes) {
+ unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+ Bytes -= ThisVal;
+ ThisVal /= Scale;
+ // Build the new tADD / tSUB.
+ if (isTwoAddr)
+ BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(DestReg).addImm(ThisVal);
+ else {
+ bool isKill = BaseReg != ARM::SP;
+ BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
+ BaseReg = DestReg;
+
+ if (Opc == ARM::tADDrSPi) {
+ // r4 = add sp, imm
+ // r4 = add r4, imm
+ // ...
+ NumBits = 8;
+ Scale = 1;
+ Chunk = ((1 << NumBits) - 1) * Scale;
+ Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+ isTwoAddr = true;
+ }
+ }
+ }
+
+ if (ExtraOpc)
+ BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg)
+ .addReg(DestReg, RegState::Kill)
+ .addImm(((unsigned)NumBytes) & 3);
+}
+
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg,
+ bool isThumb, const TargetInstrInfo &TII,
+ const ARMRegisterInfo& MRI,
+ DebugLoc dl) {
+ if (isThumb)
+ emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII,
+ MRI, dl);
+ else
+ emitARMRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes,
+ Pred, PredReg, TII, dl);
+}
+
+void ARMRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (!hasReservedCallFrame(MF)) {
+ // If we have alloca, convert as follows:
+ // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+ // ADJCALLSTACKUP -> add, sp, sp, amount
+ MachineInstr *Old = I;
+ DebugLoc dl = Old->getDebugLoc();
+ unsigned Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ // Replace the pseudo instruction with a new instruction...
+ unsigned Opc = Old->getOpcode();
+ bool isThumb = AFI->isThumbFunction();
+ ARMCC::CondCodes Pred = isThumb
+ ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(1).getImm();
+ if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+ // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
+ unsigned PredReg = isThumb ? 0 : Old->getOperand(2).getReg();
+ emitSPUpdate(MBB, I, -Amount, Pred, PredReg, isThumb, TII, *this, dl);
+ } else {
+ // Note: PredReg is operand 3 for ADJCALLSTACKUP.
+ unsigned PredReg = isThumb ? 0 : Old->getOperand(3).getReg();
+ assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+ emitSPUpdate(MBB, I, Amount, Pred, PredReg, isThumb, TII, *this, dl);
+ }
+ }
+ }
+ MBB.erase(I);
+}
+
+/// emitThumbConstant - Emit a series of instructions to materialize a
+/// constant.
+static void emitThumbConstant(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, int Imm,
+ const TargetInstrInfo &TII,
+ const ARMRegisterInfo& MRI,
+ DebugLoc dl) {
+ bool isSub = Imm < 0;
+ if (isSub) Imm = -Imm;
+
+ int Chunk = (1 << 8) - 1;
+ int ThisVal = (Imm > Chunk) ? Chunk : Imm;
+ Imm -= ThisVal;
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), DestReg).addImm(ThisVal);
+ if (Imm > 0)
+ emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl);
+ if (isSub)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), DestReg)
+ .addReg(DestReg, RegState::Kill);
+}
+
+/// findScratchRegister - Find a 'free' ARM register. If register scavenger
+/// is not being used, R12 is available. Otherwise, try for a call-clobbered
+/// register first and then a spilled callee-saved register if that fails.
+static
+unsigned findScratchRegister(RegScavenger *RS, const TargetRegisterClass *RC,
+ ARMFunctionInfo *AFI) {
+ unsigned Reg = RS ? RS->FindUnusedReg(RC, true) : (unsigned) ARM::R12;
+ assert (!AFI->isThumbFunction());
+ if (Reg == 0)
+ // Try a already spilled CS register.
+ Reg = RS->FindUnusedReg(RC, AFI->getSpilledCSRegisters());
+
+ return Reg;
+}
+
+void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const{
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ bool isThumb = AFI->isThumbFunction();
+ DebugLoc dl = MI.getDebugLoc();
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ unsigned FrameReg = ARM::SP;
+ int FrameIndex = MI.getOperand(i).getIndex();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+ MF.getFrameInfo()->getStackSize() + SPAdj;
+
+ if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea1Offset();
+ else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea2Offset();
+ else if (AFI->isDPRCalleeSavedAreaFrame(FrameIndex))
+ Offset -= AFI->getDPRCalleeSavedAreaOffset();
+ else if (hasFP(MF)) {
+ assert(SPAdj == 0 && "Unexpected");
+ // There is alloca()'s in this function, must reference off the frame
+ // pointer instead.
+ FrameReg = getFrameRegister(MF);
+ Offset -= AFI->getFramePtrSpillOffset();
+ }
+
+ unsigned Opcode = MI.getOpcode();
+ const TargetInstrDesc &Desc = MI.getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ bool isSub = false;
+
+ // Memory operands in inline assembly always use AddrMode2.
+ if (Opcode == ARM::INLINEASM)
+ AddrMode = ARMII::AddrMode2;
+
+ if (Opcode == ARM::ADDri) {
+ Offset += MI.getOperand(i+1).getImm();
+ if (Offset == 0) {
+ // Turn it into a move.
+ MI.setDesc(TII.get(ARM::MOVr));
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.RemoveOperand(i+1);
+ return;
+ } else if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ MI.setDesc(TII.get(ARM::SUBri));
+ }
+
+ // Common case: small offset, fits into instruction.
+ int ImmedOffset = ARM_AM::getSOImmVal(Offset);
+ if (ImmedOffset != -1) {
+ // Replace the FrameIndex with sp / fp
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(i+1).ChangeToImmediate(ImmedOffset);
+ return;
+ }
+
+ // Otherwise, we fallback to common code below to form the imm offset with
+ // a sequence of ADDri instructions. First though, pull as much of the imm
+ // into this ADDri as possible.
+ unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
+ unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
+
+ // We will handle these bits from offset, clear them.
+ Offset &= ~ThisImmVal;
+
+ // Get the properly encoded SOImmVal field.
+ int ThisSOImmVal = ARM_AM::getSOImmVal(ThisImmVal);
+ assert(ThisSOImmVal != -1 && "Bit extraction didn't work?");
+ MI.getOperand(i+1).ChangeToImmediate(ThisSOImmVal);
+ } else if (Opcode == ARM::tADDrSPi) {
+ Offset += MI.getOperand(i+1).getImm();
+
+ // Can't use tADDrSPi if it's based off the frame pointer.
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ if (FrameReg != ARM::SP) {
+ Opcode = ARM::tADDi3;
+ MI.setDesc(TII.get(ARM::tADDi3));
+ NumBits = 3;
+ } else {
+ NumBits = 8;
+ Scale = 4;
+ assert((Offset & 3) == 0 &&
+ "Thumb add/sub sp, #imm immediate must be multiple of 4!");
+ }
+
+ if (Offset == 0) {
+ // Turn it into a move.
+ MI.setDesc(TII.get(ARM::tMOVhir2lor));
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.RemoveOperand(i+1);
+ return;
+ }
+
+ // Common case: small offset, fits into instruction.
+ unsigned Mask = (1 << NumBits) - 1;
+ if (((Offset / Scale) & ~Mask) == 0) {
+ // Replace the FrameIndex with sp / fp
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset / Scale);
+ return;
+ }
+
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned Bytes = (Offset > 0) ? Offset : -Offset;
+ unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale);
+ // MI would expand into a large number of instructions. Don't try to
+ // simplify the immediate.
+ if (NumMIs > 2) {
+ emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII,
+ *this, dl);
+ MBB.erase(II);
+ return;
+ }
+
+ if (Offset > 0) {
+ // Translate r0 = add sp, imm to
+ // r0 = add sp, 255*4
+ // r0 = add r0, (imm - 255*4)
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(i+1).ChangeToImmediate(Mask);
+ Offset = (Offset - Mask * Scale);
+ MachineBasicBlock::iterator NII = next(II);
+ emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII,
+ *this, dl);
+ } else {
+ // Translate r0 = add sp, -imm to
+ // r0 = -imm (this is then translated into a series of instructons)
+ // r0 = add r0, sp
+ emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
+ MI.setDesc(TII.get(ARM::tADDhirr));
+ MI.getOperand(i).ChangeToRegister(DestReg, false, false, true);
+ MI.getOperand(i+1).ChangeToRegister(FrameReg, false);
+ }
+ return;
+ } else {
+ unsigned ImmIdx = 0;
+ int InstrOffs = 0;
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ switch (AddrMode) {
+ case ARMII::AddrMode2: {
+ ImmIdx = i+2;
+ InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 12;
+ break;
+ }
+ case ARMII::AddrMode3: {
+ ImmIdx = i+2;
+ InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ break;
+ }
+ case ARMII::AddrMode5: {
+ ImmIdx = i+1;
+ InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ Scale = 4;
+ break;
+ }
+ case ARMII::AddrModeTs: {
+ ImmIdx = i+1;
+ InstrOffs = MI.getOperand(ImmIdx).getImm();
+ NumBits = (FrameReg == ARM::SP) ? 8 : 5;
+ Scale = 4;
+ break;
+ }
+ default:
+ assert(0 && "Unsupported addressing mode!");
+ abort();
+ break;
+ }
+
+ Offset += InstrOffs * Scale;
+ assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+ if (Offset < 0 && !isThumb) {
+ Offset = -Offset;
+ isSub = true;
+ }
+
+ // Common case: small offset, fits into instruction.
+ MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+ int ImmedOffset = Offset / Scale;
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale) {
+ // Replace the FrameIndex with sp
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ if (isSub)
+ ImmedOffset |= 1 << NumBits;
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ return;
+ }
+
+ bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill;
+ if (AddrMode == ARMII::AddrModeTs) {
+ // Thumb tLDRspi, tSTRspi. These will change to instructions that use
+ // a different base register.
+ NumBits = 5;
+ Mask = (1 << NumBits) - 1;
+ }
+ // If this is a thumb spill / restore, we will be using a constpool load to
+ // materialize the offset.
+ if (AddrMode == ARMII::AddrModeTs && isThumSpillRestore)
+ ImmOp.ChangeToImmediate(0);
+ else {
+ // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+ ImmedOffset = ImmedOffset & Mask;
+ if (isSub)
+ ImmedOffset |= 1 << NumBits;
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset &= ~(Mask*Scale);
+ }
+ }
+
+ // If we get here, the immediate doesn't fit into the instruction. We folded
+ // as much as possible above, handle the rest, providing a register that is
+ // SP+LargeImm.
+ assert(Offset && "This code isn't needed if offset already handled!");
+
+ if (isThumb) {
+ if (Desc.mayLoad()) {
+ // Use the destination register to materialize sp + offset.
+ unsigned TmpReg = MI.getOperand(0).getReg();
+ bool UseRR = false;
+ if (Opcode == ARM::tRestore) {
+ if (FrameReg == ARM::SP)
+ emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
+ Offset, false, TII, *this, dl);
+ else {
+ emitLoadConstPool(MBB, II, TmpReg, Offset, ARMCC::AL, 0, &TII,
+ true, dl);
+ UseRR = true;
+ }
+ } else
+ emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
+ *this, dl);
+ MI.setDesc(TII.get(ARM::tLDR));
+ MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+ if (UseRR)
+ // Use [reg, reg] addrmode.
+ MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
+ else // tLDR has an extra register operand.
+ MI.addOperand(MachineOperand::CreateReg(0, false));
+ } else if (Desc.mayStore()) {
+ // FIXME! This is horrific!!! We need register scavenging.
+ // Our temporary workaround has marked r3 unavailable. Of course, r3 is
+ // also a ABI register so it's possible that is is the register that is
+ // being storing here. If that's the case, we do the following:
+ // r12 = r2
+ // Use r2 to materialize sp + offset
+ // str r3, r2
+ // r2 = r12
+ unsigned ValReg = MI.getOperand(0).getReg();
+ unsigned TmpReg = ARM::R3;
+ bool UseRR = false;
+ if (ValReg == ARM::R3) {
+ BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
+ .addReg(ARM::R2, RegState::Kill);
+ TmpReg = ARM::R2;
+ }
+ if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
+ BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
+ .addReg(ARM::R3, RegState::Kill);
+ if (Opcode == ARM::tSpill) {
+ if (FrameReg == ARM::SP)
+ emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
+ Offset, false, TII, *this, dl);
+ else {
+ emitLoadConstPool(MBB, II, TmpReg, Offset, ARMCC::AL, 0, &TII,
+ true, dl);
+ UseRR = true;
+ }
+ } else
+ emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
+ *this, dl);
+ MI.setDesc(TII.get(ARM::tSTR));
+ MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+ if (UseRR) // Use [reg, reg] addrmode.
+ MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
+ else // tSTR has an extra register operand.
+ MI.addOperand(MachineOperand::CreateReg(0, false));
+
+ MachineBasicBlock::iterator NII = next(II);
+ if (ValReg == ARM::R3)
+ BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R2)
+ .addReg(ARM::R12, RegState::Kill);
+ if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
+ BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R3)
+ .addReg(ARM::R12, RegState::Kill);
+ } else
+ assert(false && "Unexpected opcode!");
+ } else {
+ // Insert a set of r12 with the full address: r12 = sp + offset
+ // If the offset we have is too large to fit into the instruction, we need
+ // to form it with a series of ADDri's. Do this by taking 8-bit chunks
+ // out of 'Offset'.
+ unsigned ScratchReg = findScratchRegister(RS, &ARM::GPRRegClass, AFI);
+ if (ScratchReg == 0)
+ // No register is "free". Scavenge a register.
+ ScratchReg = RS->scavengeRegister(&ARM::GPRRegClass, II, SPAdj);
+ int PIdx = MI.findFirstPredOperandIdx();
+ ARMCC::CondCodes Pred = (PIdx == -1)
+ ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
+ unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
+ emitARMRegPlusImmediate(MBB, II, ScratchReg, FrameReg,
+ isSub ? -Offset : Offset, Pred, PredReg, TII, dl);
+ MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
+ }
+}
+
+static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) {
+ const MachineFrameInfo *FFI = MF.getFrameInfo();
+ int Offset = 0;
+ for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
+ int FixedOff = -FFI->getObjectOffset(i);
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+ for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
+ if (FFI->isDeadObjectIndex(i))
+ continue;
+ Offset += FFI->getObjectSize(i);
+ unsigned Align = FFI->getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+ }
+ return (unsigned)Offset;
+}
+
+void
+ARMRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // This tells PEI to spill the FP as if it is any other callee-save register
+ // to take advantage the eliminateFrameIndex machinery. This also ensures it
+ // is spilled in the order specified by getCalleeSavedRegs() to make it easier
+ // to combine multiple loads / stores.
+ bool CanEliminateFrame = true;
+ bool CS1Spilled = false;
+ bool LRSpilled = false;
+ unsigned NumGPRSpills = 0;
+ SmallVector<unsigned, 4> UnspilledCS1GPRs;
+ SmallVector<unsigned, 4> UnspilledCS2GPRs;
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Don't spill FP if the frame can be eliminated. This is determined
+ // by scanning the callee-save registers to see if any is used.
+ const unsigned *CSRegs = getCalleeSavedRegs();
+ const TargetRegisterClass* const *CSRegClasses = getCalleeSavedRegClasses();
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ bool Spilled = false;
+ if (MF.getRegInfo().isPhysRegUsed(Reg)) {
+ AFI->setCSRegisterIsSpilled(Reg);
+ Spilled = true;
+ CanEliminateFrame = false;
+ } else {
+ // Check alias registers too.
+ for (const unsigned *Aliases = getAliasSet(Reg); *Aliases; ++Aliases) {
+ if (MF.getRegInfo().isPhysRegUsed(*Aliases)) {
+ Spilled = true;
+ CanEliminateFrame = false;
+ }
+ }
+ }
+
+ if (CSRegClasses[i] == &ARM::GPRRegClass) {
+ if (Spilled) {
+ NumGPRSpills++;
+
+ if (!STI.isTargetDarwin()) {
+ if (Reg == ARM::LR)
+ LRSpilled = true;
+ CS1Spilled = true;
+ continue;
+ }
+
+ // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
+ switch (Reg) {
+ case ARM::LR:
+ LRSpilled = true;
+ // Fallthrough
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ CS1Spilled = true;
+ break;
+ default:
+ break;
+ }
+ } else {
+ if (!STI.isTargetDarwin()) {
+ UnspilledCS1GPRs.push_back(Reg);
+ continue;
+ }
+
+ switch (Reg) {
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ UnspilledCS1GPRs.push_back(Reg);
+ break;
+ default:
+ UnspilledCS2GPRs.push_back(Reg);
+ break;
+ }
+ }
+ }
+ }
+
+ bool ForceLRSpill = false;
+ if (!LRSpilled && AFI->isThumbFunction()) {
+ unsigned FnSize = TII.GetFunctionSizeInBytes(MF);
+ // Force LR to be spilled if the Thumb function size is > 2048. This enables
+ // use of BL to implement far jump. If it turns out that it's not needed
+ // then the branch fix up path will undo it.
+ if (FnSize >= (1 << 11)) {
+ CanEliminateFrame = false;
+ ForceLRSpill = true;
+ }
+ }
+
+ bool ExtraCSSpill = false;
+ if (!CanEliminateFrame || hasFP(MF)) {
+ AFI->setHasStackFrame(true);
+
+ // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
+ // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
+ if (!LRSpilled && CS1Spilled) {
+ MF.getRegInfo().setPhysRegUsed(ARM::LR);
+ AFI->setCSRegisterIsSpilled(ARM::LR);
+ NumGPRSpills++;
+ UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
+ UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
+ ForceLRSpill = false;
+ ExtraCSSpill = true;
+ }
+
+ // Darwin ABI requires FP to point to the stack slot that contains the
+ // previous FP.
+ if (STI.isTargetDarwin() || hasFP(MF)) {
+ MF.getRegInfo().setPhysRegUsed(FramePtr);
+ NumGPRSpills++;
+ }
+
+ // If stack and double are 8-byte aligned and we are spilling an odd number
+ // of GPRs. Spill one extra callee save GPR so we won't have to pad between
+ // the integer and double callee save areas.
+ unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ if (TargetAlign == 8 && (NumGPRSpills & 1)) {
+ if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
+ for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
+ unsigned Reg = UnspilledCS1GPRs[i];
+ // Don't spiil high register if the function is thumb
+ if (!AFI->isThumbFunction() || isLowRegister(Reg) || Reg == ARM::LR) {
+ MF.getRegInfo().setPhysRegUsed(Reg);
+ AFI->setCSRegisterIsSpilled(Reg);
+ if (!isReservedReg(MF, Reg))
+ ExtraCSSpill = true;
+ break;
+ }
+ }
+ } else if (!UnspilledCS2GPRs.empty() &&
+ !AFI->isThumbFunction()) {
+ unsigned Reg = UnspilledCS2GPRs.front();
+ MF.getRegInfo().setPhysRegUsed(Reg);
+ AFI->setCSRegisterIsSpilled(Reg);
+ if (!isReservedReg(MF, Reg))
+ ExtraCSSpill = true;
+ }
+ }
+
+ // Estimate if we might need to scavenge a register at some point in order
+ // to materialize a stack offset. If so, either spill one additiona
+ // callee-saved register or reserve a special spill slot to facilitate
+ // register scavenging.
+ if (RS && !ExtraCSSpill && !AFI->isThumbFunction()) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned Size = estimateStackSize(MF, MFI);
+ unsigned Limit = (1 << 12) - 1;
+ for (MachineFunction::iterator BB = MF.begin(),E = MF.end();BB != E; ++BB)
+ for (MachineBasicBlock::iterator I= BB->begin(); I != BB->end(); ++I) {
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (I->getOperand(i).isFI()) {
+ unsigned Opcode = I->getOpcode();
+ const TargetInstrDesc &Desc = TII.get(Opcode);
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ if (AddrMode == ARMII::AddrMode3) {
+ Limit = (1 << 8) - 1;
+ goto DoneEstimating;
+ } else if (AddrMode == ARMII::AddrMode5) {
+ unsigned ThisLimit = ((1 << 8) - 1) * 4;
+ if (ThisLimit < Limit)
+ Limit = ThisLimit;
+ }
+ }
+ }
+ DoneEstimating:
+ if (Size >= Limit) {
+ // If any non-reserved CS register isn't spilled, just spill one or two
+ // extra. That should take care of it!
+ unsigned NumExtras = TargetAlign / 4;
+ SmallVector<unsigned, 2> Extras;
+ while (NumExtras && !UnspilledCS1GPRs.empty()) {
+ unsigned Reg = UnspilledCS1GPRs.back();
+ UnspilledCS1GPRs.pop_back();
+ if (!isReservedReg(MF, Reg)) {
+ Extras.push_back(Reg);
+ NumExtras--;
+ }
+ }
+ while (NumExtras && !UnspilledCS2GPRs.empty()) {
+ unsigned Reg = UnspilledCS2GPRs.back();
+ UnspilledCS2GPRs.pop_back();
+ if (!isReservedReg(MF, Reg)) {
+ Extras.push_back(Reg);
+ NumExtras--;
+ }
+ }
+ if (Extras.size() && NumExtras == 0) {
+ for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
+ MF.getRegInfo().setPhysRegUsed(Extras[i]);
+ AFI->setCSRegisterIsSpilled(Extras[i]);
+ }
+ } else {
+ // Reserve a slot closest to SP or frame pointer.
+ const TargetRegisterClass *RC = &ARM::GPRRegClass;
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment()));
+ }
+ }
+ }
+ }
+
+ if (ForceLRSpill) {
+ MF.getRegInfo().setPhysRegUsed(ARM::LR);
+ AFI->setCSRegisterIsSpilled(ARM::LR);
+ AFI->setLRIsSpilledForFarJump(true);
+ }
+}
+
+/// Move iterator pass the next bunch of callee save load / store ops for
+/// the particular spill area (1: integer area 1, 2: integer area 2,
+/// 3: fp area, 0: don't care).
+static void movePastCSLoadStoreOps(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ int Opc, unsigned Area,
+ const ARMSubtarget &STI) {
+ while (MBBI != MBB.end() &&
+ MBBI->getOpcode() == Opc && MBBI->getOperand(1).isFI()) {
+ if (Area != 0) {
+ bool Done = false;
+ unsigned Category = 0;
+ switch (MBBI->getOperand(0).getReg()) {
+ case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7:
+ case ARM::LR:
+ Category = 1;
+ break;
+ case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11:
+ Category = STI.isTargetDarwin() ? 2 : 1;
+ break;
+ case ARM::D8: case ARM::D9: case ARM::D10: case ARM::D11:
+ case ARM::D12: case ARM::D13: case ARM::D14: case ARM::D15:
+ Category = 3;
+ break;
+ default:
+ Done = true;
+ break;
+ }
+ if (Done || Category != Area)
+ break;
+ }
+
+ ++MBBI;
+ }
+}
+
+void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ bool isThumb = AFI->isThumbFunction();
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned NumBytes = MFI->getStackSize();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ DebugLoc dl = (MBBI != MBB.end() ?
+ MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+
+ if (isThumb) {
+ // Check if R3 is live in. It might have to be used as a scratch register.
+ for (MachineRegisterInfo::livein_iterator I =MF.getRegInfo().livein_begin(),
+ E = MF.getRegInfo().livein_end(); I != E; ++I) {
+ if (I->first == ARM::R3) {
+ AFI->setR3IsLiveIn(true);
+ break;
+ }
+ }
+
+ // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
+ NumBytes = (NumBytes + 3) & ~3;
+ MFI->setStackSize(NumBytes);
+ }
+
+ // Determine the sizes of each callee-save spill areas and record which frame
+ // belongs to which callee-save spill areas.
+ unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+ int FramePtrSpillFI = 0;
+
+ if (VARegSaveSize)
+ emitSPUpdate(MBB, MBBI, -VARegSaveSize, ARMCC::AL, 0, isThumb, TII,
+ *this, dl);
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(MBB, MBBI, -NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl);
+ return;
+ }
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ int FI = CSI[i].getFrameIdx();
+ switch (Reg) {
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ break;
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ if (STI.isTargetDarwin()) {
+ AFI->addGPRCalleeSavedArea2Frame(FI);
+ GPRCS2Size += 4;
+ } else {
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ }
+ break;
+ default:
+ AFI->addDPRCalleeSavedAreaFrame(FI);
+ DPRCSSize += 8;
+ }
+ }
+
+ if (!isThumb) {
+ // Build the new SUBri to adjust SP for integer callee-save spill area 1.
+ emitSPUpdate(MBB, MBBI, -GPRCS1Size, ARMCC::AL, 0, isThumb, TII, *this, dl);
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 1, STI);
+ } else if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
+ ++MBBI;
+ if (MBBI != MBB.end())
+ dl = MBBI->getDebugLoc();
+ }
+
+ // Darwin ABI requires FP to point to the stack slot that contains the
+ // previous FP.
+ if (STI.isTargetDarwin() || hasFP(MF)) {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(isThumb ? ARM::tADDrSPi : ARM::ADDri),
+ FramePtr)
+ .addFrameIndex(FramePtrSpillFI).addImm(0);
+ if (!isThumb) AddDefaultCC(AddDefaultPred(MIB));
+ }
+
+ if (!isThumb) {
+ // Build the new SUBri to adjust SP for integer callee-save spill area 2.
+ emitSPUpdate(MBB, MBBI, -GPRCS2Size, ARMCC::AL, 0, false, TII, *this, dl);
+
+ // Build the new SUBri to adjust SP for FP callee-save spill area.
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 2, STI);
+ emitSPUpdate(MBB, MBBI, -DPRCSSize, ARMCC::AL, 0, false, TII, *this, dl);
+ }
+
+ // Determine starting offsets of spill areas.
+ unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+ unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+ unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+ AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+ AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+ AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+ AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+
+ NumBytes = DPRCSOffset;
+ if (NumBytes) {
+ // Insert it after all the callee-save spills.
+ if (!isThumb)
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 3, STI);
+ emitSPUpdate(MBB, MBBI, -NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl);
+ }
+
+ if(STI.isTargetELF() && hasFP(MF)) {
+ MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+ AFI->getFramePtrSpillOffset());
+ }
+
+ AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+ AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+ AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ if (Reg == CSRegs[i])
+ return true;
+ return false;
+}
+
+static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
+ return ((MI->getOpcode() == ARM::FLDD ||
+ MI->getOpcode() == ARM::LDR ||
+ MI->getOpcode() == ARM::tRestore) &&
+ MI->getOperand(1).isFI() &&
+ isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs));
+}
+
+void ARMRegisterInfo::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ assert((MBBI->getOpcode() == ARM::BX_RET ||
+ MBBI->getOpcode() == ARM::tBX_RET ||
+ MBBI->getOpcode() == ARM::tPOP_RET) &&
+ "Can only insert epilog into returning blocks");
+ DebugLoc dl = MBBI->getDebugLoc();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ bool isThumb = AFI->isThumbFunction();
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ int NumBytes = (int)MFI->getStackSize();
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl);
+ } else {
+ // Unwind MBBI to point to first LDR / FLDD.
+ const unsigned *CSRegs = getCalleeSavedRegs();
+ if (MBBI != MBB.begin()) {
+ do
+ --MBBI;
+ while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
+ if (!isCSRestore(MBBI, CSRegs))
+ ++MBBI;
+ }
+
+ // Move SP to start of FP callee save spill area.
+ NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+ AFI->getGPRCalleeSavedArea2Size() +
+ AFI->getDPRCalleeSavedAreaSize());
+ if (isThumb) {
+ if (hasFP(MF)) {
+ NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot or target is ELF and the function has FP.
+ if (NumBytes)
+ emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes,
+ TII, *this, dl);
+ else
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::SP)
+ .addReg(FramePtr);
+ } else {
+ if (MBBI->getOpcode() == ARM::tBX_RET &&
+ &MBB.front() != MBBI &&
+ prior(MBBI)->getOpcode() == ARM::tPOP) {
+ MachineBasicBlock::iterator PMBBI = prior(MBBI);
+ emitSPUpdate(MBB, PMBBI, NumBytes, ARMCC::AL, 0, isThumb, TII,
+ *this, dl);
+ } else
+ emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, isThumb, TII,
+ *this, dl);
+ }
+ } else {
+ // Darwin ABI requires FP to point to the stack slot that contains the
+ // previous FP.
+ if ((STI.isTargetDarwin() && NumBytes) || hasFP(MF)) {
+ NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot or target is ELF and the function has FP.
+ if (AFI->getGPRCalleeSavedArea2Size() ||
+ AFI->getDPRCalleeSavedAreaSize() ||
+ AFI->getDPRCalleeSavedAreaOffset()||
+ hasFP(MF)) {
+ if (NumBytes)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::SUBri), ARM::SP).addReg(FramePtr)
+ .addImm(NumBytes)
+ .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP).addReg(FramePtr)
+ .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ }
+ } else if (NumBytes) {
+ emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, false, TII, *this, dl);
+ }
+
+ // Move SP to start of integer callee save spill area 2.
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 3, STI);
+ emitSPUpdate(MBB, MBBI, AFI->getDPRCalleeSavedAreaSize(), ARMCC::AL, 0,
+ false, TII, *this, dl);
+
+ // Move SP to start of integer callee save spill area 1.
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 2, STI);
+ emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea2Size(), ARMCC::AL, 0,
+ false, TII, *this, dl);
+
+ // Move SP to SP upon entry to the function.
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 1, STI);
+ emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea1Size(), ARMCC::AL, 0,
+ false, TII, *this, dl);
+ }
+ }
+
+ if (VARegSaveSize) {
+ if (isThumb)
+ // Epilogue for vararg functions: pop LR to R3 and branch off it.
+ // FIXME: Verify this is still ok when R3 is no longer being reserved.
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)).addReg(ARM::R3);
+
+ emitSPUpdate(MBB, MBBI, VARegSaveSize, ARMCC::AL, 0, isThumb, TII,
+ *this, dl);
+
+ if (isThumb) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)).addReg(ARM::R3);
+ MBB.erase(MBBI);
+ }
+ }
+}
+
+unsigned ARMRegisterInfo::getRARegister() const {
+ return ARM::LR;
+}
+
+unsigned ARMRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ if (STI.isTargetDarwin() || hasFP(MF))
+ return (STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11;
+ else
+ return ARM::SP;
+}
+
+unsigned ARMRegisterInfo::getEHExceptionRegister() const {
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned ARMRegisterInfo::getEHHandlerRegister() const {
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
+
+int ARMRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+#include "ARMGenRegisterInfo.inc"
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
new file mode 100644
index 0000000..e1d9efb
--- /dev/null
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -0,0 +1,102 @@
+//===- ARMRegisterInfo.h - ARM Register Information Impl --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMREGISTERINFO_H
+#define ARMREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "ARMGenRegisterInfo.h.inc"
+
+namespace llvm {
+ class ARMSubtarget;
+ class TargetInstrInfo;
+ class Type;
+
+struct ARMRegisterInfo : public ARMGenRegisterInfo {
+ const TargetInstrInfo &TII;
+ const ARMSubtarget &STI;
+private:
+ /// FramePtr - ARM physical register used as frame ptr.
+ unsigned FramePtr;
+
+public:
+ ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
+
+ /// emitLoadConstPool - Emits a load from constpool to materialize the
+ /// specified immediate.
+ void emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, int Val,
+ unsigned Pred, unsigned PredReg,
+ const TargetInstrInfo *TII, bool isThumb,
+ DebugLoc dl) const;
+
+ /// getRegisterNumbering - Given the enum value for some register, e.g.
+ /// ARM::LR, return the number that it corresponds to (e.g. 14).
+ static unsigned getRegisterNumbering(unsigned RegEnum);
+
+ /// Same as previous getRegisterNumbering except it returns true in isSPVFP
+ /// if the register is a single precision VFP register.
+ static unsigned getRegisterNumbering(unsigned RegEnum, bool &isSPVFP);
+
+ /// getPointerRegClass - Return the register class to use to hold pointers.
+ /// This is used for addressing modes.
+ const TargetRegisterClass *getPointerRegClass() const;
+
+ /// Code Generation virtual methods...
+ const TargetRegisterClass *
+ getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const;
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ const TargetRegisterClass* const*
+ getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
+
+ bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ bool hasReservedCallFrame(MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+
+ bool isLowRegister(unsigned Reg) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
new file mode 100644
index 0000000..e8daf74
--- /dev/null
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -0,0 +1,221 @@
+//===- ARMRegisterInfo.td - ARM Register defs -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the ARM register file
+//===----------------------------------------------------------------------===//
+
+// Registers are identified with 4-bit ID numbers.
+class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> {
+ field bits<4> Num;
+ let Namespace = "ARM";
+ let SubRegs = subregs;
+}
+
+class ARMFReg<bits<5> num, string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "ARM";
+}
+
+// Integer registers
+def R0 : ARMReg< 0, "r0">, DwarfRegNum<[0]>;
+def R1 : ARMReg< 1, "r1">, DwarfRegNum<[1]>;
+def R2 : ARMReg< 2, "r2">, DwarfRegNum<[2]>;
+def R3 : ARMReg< 3, "r3">, DwarfRegNum<[3]>;
+def R4 : ARMReg< 4, "r4">, DwarfRegNum<[4]>;
+def R5 : ARMReg< 5, "r5">, DwarfRegNum<[5]>;
+def R6 : ARMReg< 6, "r6">, DwarfRegNum<[6]>;
+def R7 : ARMReg< 7, "r7">, DwarfRegNum<[7]>;
+def R8 : ARMReg< 8, "r8">, DwarfRegNum<[8]>;
+def R9 : ARMReg< 9, "r9">, DwarfRegNum<[9]>;
+def R10 : ARMReg<10, "r10">, DwarfRegNum<[10]>;
+def R11 : ARMReg<11, "r11">, DwarfRegNum<[11]>;
+def R12 : ARMReg<12, "r12">, DwarfRegNum<[12]>;
+def SP : ARMReg<13, "sp">, DwarfRegNum<[13]>;
+def LR : ARMReg<14, "lr">, DwarfRegNum<[14]>;
+def PC : ARMReg<15, "pc">, DwarfRegNum<[15]>;
+
+// Float registers
+def S0 : ARMFReg< 0, "s0">; def S1 : ARMFReg< 1, "s1">;
+def S2 : ARMFReg< 2, "s2">; def S3 : ARMFReg< 3, "s3">;
+def S4 : ARMFReg< 4, "s4">; def S5 : ARMFReg< 5, "s5">;
+def S6 : ARMFReg< 6, "s6">; def S7 : ARMFReg< 7, "s7">;
+def S8 : ARMFReg< 8, "s8">; def S9 : ARMFReg< 9, "s9">;
+def S10 : ARMFReg<10, "s10">; def S11 : ARMFReg<11, "s11">;
+def S12 : ARMFReg<12, "s12">; def S13 : ARMFReg<13, "s13">;
+def S14 : ARMFReg<14, "s14">; def S15 : ARMFReg<15, "s15">;
+def S16 : ARMFReg<16, "s16">; def S17 : ARMFReg<17, "s17">;
+def S18 : ARMFReg<18, "s18">; def S19 : ARMFReg<19, "s19">;
+def S20 : ARMFReg<20, "s20">; def S21 : ARMFReg<21, "s21">;
+def S22 : ARMFReg<22, "s22">; def S23 : ARMFReg<23, "s23">;
+def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">;
+def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">;
+def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">;
+def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">;
+
+// Aliases of the F* registers used to hold 64-bit fp values (doubles)
+def D0 : ARMReg< 0, "d0", [S0, S1]>;
+def D1 : ARMReg< 1, "d1", [S2, S3]>;
+def D2 : ARMReg< 2, "d2", [S4, S5]>;
+def D3 : ARMReg< 3, "d3", [S6, S7]>;
+def D4 : ARMReg< 4, "d4", [S8, S9]>;
+def D5 : ARMReg< 5, "d5", [S10, S11]>;
+def D6 : ARMReg< 6, "d6", [S12, S13]>;
+def D7 : ARMReg< 7, "d7", [S14, S15]>;
+def D8 : ARMReg< 8, "d8", [S16, S17]>;
+def D9 : ARMReg< 9, "d9", [S18, S19]>;
+def D10 : ARMReg<10, "d10", [S20, S21]>;
+def D11 : ARMReg<11, "d11", [S22, S23]>;
+def D12 : ARMReg<12, "d12", [S24, S25]>;
+def D13 : ARMReg<13, "d13", [S26, S27]>;
+def D14 : ARMReg<14, "d14", [S28, S29]>;
+def D15 : ARMReg<15, "d15", [S30, S31]>;
+
+// Current Program Status Register.
+def CPSR : ARMReg<0, "cpsr">;
+
+// Register classes.
+//
+// pc == Program Counter
+// lr == Link Register
+// sp == Stack Pointer
+// r12 == ip (scratch)
+// r7 == Frame Pointer (thumb-style backtraces)
+// r11 == Frame Pointer (arm-style backtraces)
+// r10 == Stack Limit
+//
+def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
+ R7, R8, R9, R10, R12, R11,
+ LR, SP, PC]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ // FIXME: We are reserving r12 in case the PEI needs to use it to
+ // generate large stack offset. Make it available once we have register
+ // scavenging. Similarly r3 is reserved in Thumb mode for now.
+ let MethodBodies = [{
+ // FP is R11, R9 is available.
+ static const unsigned ARM_GPR_AO_1[] = {
+ ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+ ARM::R12,ARM::LR,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R9, ARM::R10,
+ ARM::R11 };
+ // FP is R11, R9 is not available.
+ static const unsigned ARM_GPR_AO_2[] = {
+ ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+ ARM::R12,ARM::LR,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R10,
+ ARM::R11 };
+ // FP is R7, R9 is available.
+ static const unsigned ARM_GPR_AO_3[] = {
+ ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+ ARM::R12,ARM::LR,
+ ARM::R4, ARM::R5, ARM::R6,
+ ARM::R8, ARM::R9, ARM::R10,ARM::R11,
+ ARM::R7 };
+ // FP is R7, R9 is not available.
+ static const unsigned ARM_GPR_AO_4[] = {
+ ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+ ARM::R12,ARM::LR,
+ ARM::R4, ARM::R5, ARM::R6,
+ ARM::R8, ARM::R10,ARM::R11,
+ ARM::R7 };
+
+ GPRClass::iterator
+ GPRClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget.useThumbBacktraces()) {
+ if (Subtarget.isR9Reserved())
+ return ARM_GPR_AO_4;
+ else
+ return ARM_GPR_AO_3;
+ } else {
+ if (Subtarget.isR9Reserved())
+ return ARM_GPR_AO_2;
+ else
+ return ARM_GPR_AO_1;
+ }
+ }
+
+ GPRClass::iterator
+ GPRClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ GPRClass::iterator I;
+
+ if (Subtarget.useThumbBacktraces()) {
+ if (Subtarget.isR9Reserved()) {
+ I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned));
+ } else {
+ I = ARM_GPR_AO_3 + (sizeof(ARM_GPR_AO_3)/sizeof(unsigned));
+ }
+ } else {
+ if (Subtarget.isR9Reserved()) {
+ I = ARM_GPR_AO_2 + (sizeof(ARM_GPR_AO_2)/sizeof(unsigned));
+ } else {
+ I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned));
+ }
+ }
+
+ // Mac OS X requires FP not to be clobbered for backtracing purpose.
+ return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+ }
+ }];
+}
+
+// Thumb registers are R0-R7 normally. Some instructions can still use
+// the general GPR register class above (MOV, e.g.)
+def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ // FIXME: We are reserving r3 in Thumb mode in case the PEI needs to use it
+ // to generate large stack offset. Make it available once we have register
+ // scavenging.
+ let MethodBodies = [{
+ static const unsigned THUMB_tGPR_AO[] = {
+ ARM::R2, ARM::R1, ARM::R0,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
+
+ // FP is R7, only low registers available.
+ tGPRClass::iterator
+ tGPRClass::allocation_order_begin(const MachineFunction &MF) const {
+ return THUMB_tGPR_AO;
+ }
+
+ tGPRClass::iterator
+ tGPRClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ tGPRClass::iterator I =
+ THUMB_tGPR_AO + (sizeof(THUMB_tGPR_AO)/sizeof(unsigned));
+ // Mac OS X requires FP not to be clobbered for backtracing purpose.
+ return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+ }
+ }];
+}
+
+def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22,
+ S23, S24, S25, S26, S27, S28, S29, S30, S31]>;
+
+// ARM requires only word alignment for double. It's more performant if it
+// is double-word alignment though.
+def DPR : RegisterClass<"ARM", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7, D8,
+ D9, D10, D11, D12, D13, D14, D15]>;
+
+// Condition code registers.
+def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>;
diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h
new file mode 100644
index 0000000..2cc2950
--- /dev/null
+++ b/lib/Target/ARM/ARMRelocations.h
@@ -0,0 +1,56 @@
+//===- ARMRelocations.h - ARM Code Relocations ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMRELOCATIONS_H
+#define ARMRELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace ARM {
+ enum RelocationType {
+ // reloc_arm_absolute - Absolute relocation, just add the relocated value
+ // to the value already in memory.
+ reloc_arm_absolute,
+
+ // reloc_arm_relative - PC relative relocation, add the relocated value to
+ // the value already in memory, after we adjust it for where the PC is.
+ reloc_arm_relative,
+
+ // reloc_arm_cp_entry - PC relative relocation for constpool_entry's whose
+ // addresses are kept locally in a map.
+ reloc_arm_cp_entry,
+
+ // reloc_arm_vfp_cp_entry - Same as reloc_arm_cp_entry except the offset
+ // should be divided by 4.
+ reloc_arm_vfp_cp_entry,
+
+ // reloc_arm_machine_cp_entry - Relocation of a ARM machine constantpool
+ // entry.
+ reloc_arm_machine_cp_entry,
+
+ // reloc_arm_jt_base - PC relative relocation for jump tables whose
+ // addresses are kept locally in a map.
+ reloc_arm_jt_base,
+
+ // reloc_arm_pic_jt - PIC jump table entry relocation: dest bb - jt base.
+ reloc_arm_pic_jt,
+
+ // reloc_arm_branch - Branch address relocation.
+ reloc_arm_branch
+ };
+ }
+}
+
+#endif
+
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
new file mode 100644
index 0000000..ef78cd5
--- /dev/null
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -0,0 +1,84 @@
+//===-- ARMSubtarget.cpp - ARM Subtarget Information ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMSubtarget.h"
+#include "ARMGenSubtarget.inc"
+#include "llvm/Module.h"
+using namespace llvm;
+
+ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS,
+ bool isThumb)
+ : ARMArchVersion(V4T)
+ , ARMFPUType(None)
+ , IsThumb(isThumb)
+ , ThumbMode(Thumb1)
+ , UseThumbBacktraces(false)
+ , IsR9Reserved(false)
+ , stackAlignment(4)
+ , CPUString("generic")
+ , TargetType(isELF) // Default to ELF unless otherwise specified.
+ , TargetABI(ARM_ABI_APCS) {
+ // Determine default and user specified characteristics
+
+ // Parse features string.
+ CPUString = ParseSubtargetFeatures(FS, CPUString);
+
+ // Set the boolean corresponding to the current target triple, or the default
+ // if one cannot be determined, to true.
+ const std::string& TT = M.getTargetTriple();
+ unsigned Len = TT.length();
+ unsigned Idx = 0;
+
+ if (Len >= 5 && TT.substr(0, 4) == "armv")
+ Idx = 4;
+ else if (Len >= 6 && TT.substr(0, 6) == "thumb") {
+ IsThumb = true;
+ if (Len >= 7 && TT[5] == 'v')
+ Idx = 6;
+ }
+ if (Idx) {
+ unsigned SubVer = TT[Idx];
+ if (SubVer > '4' && SubVer <= '9') {
+ if (SubVer >= '7')
+ ARMArchVersion = V7A;
+ else if (SubVer == '6')
+ ARMArchVersion = V6;
+ else if (SubVer == '5') {
+ ARMArchVersion = V5T;
+ if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e')
+ ARMArchVersion = V5TE;
+ }
+ }
+ }
+
+ if (Len >= 10) {
+ if (TT.find("-darwin") != std::string::npos)
+ // arm-darwin
+ TargetType = isDarwin;
+ } else if (TT.empty()) {
+#if defined(__APPLE__)
+ TargetType = isDarwin;
+#endif
+ }
+
+ if (TT.find("eabi") != std::string::npos)
+ TargetABI = ARM_ABI_AAPCS;
+
+ if (isAAPCS_ABI())
+ stackAlignment = 8;
+
+ if (isTargetDarwin()) {
+ UseThumbBacktraces = true;
+ IsR9Reserved = true;
+ }
+}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
new file mode 100644
index 0000000..8b469cf
--- /dev/null
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -0,0 +1,122 @@
+//=====---- ARMSubtarget.h - Define Subtarget for the ARM -----*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMSUBTARGET_H
+#define ARMSUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include <string>
+
+namespace llvm {
+class Module;
+
+class ARMSubtarget : public TargetSubtarget {
+protected:
+ enum ARMArchEnum {
+ V4T, V5T, V5TE, V6, V7A
+ };
+
+ enum ARMFPEnum {
+ None, VFPv2, VFPv3, NEON
+ };
+
+ enum ThumbTypeEnum {
+ Thumb1,
+ Thumb2
+ };
+
+ /// ARMArchVersion - ARM architecture version: V4T (base), V5T, V5TE,
+ /// V6, V6T2, V7A.
+ ARMArchEnum ARMArchVersion;
+
+ /// ARMFPUType - Floating Point Unit type.
+ ARMFPEnum ARMFPUType;
+
+ /// IsThumb - True if we are in thumb mode, false if in ARM mode.
+ bool IsThumb;
+
+ /// ThumbMode - Indicates supported Thumb version.
+ ThumbTypeEnum ThumbMode;
+
+ /// UseThumbBacktraces - True if we use thumb style backtraces.
+ bool UseThumbBacktraces;
+
+ /// IsR9Reserved - True if R9 is a not available as general purpose register.
+ bool IsR9Reserved;
+
+ /// stackAlignment - The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ unsigned stackAlignment;
+
+ /// CPUString - String name of used CPU.
+ std::string CPUString;
+
+ public:
+ enum {
+ isELF, isDarwin
+ } TargetType;
+
+ enum {
+ ARM_ABI_APCS,
+ ARM_ABI_AAPCS // ARM EABI
+ } TargetABI;
+
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ ARMSubtarget(const Module &M, const std::string &FS, bool isThumb);
+
+ /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
+ /// that still makes it profitable to inline the call.
+ unsigned getMaxInlineSizeThreshold() const {
+ // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb.
+ // Change this once Thumb ldmia / stmia support is added.
+ return isThumb() ? 0 : 64;
+ }
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+
+ bool hasV4TOps() const { return ARMArchVersion >= V4T; }
+ bool hasV5TOps() const { return ARMArchVersion >= V5T; }
+ bool hasV5TEOps() const { return ARMArchVersion >= V5TE; }
+ bool hasV6Ops() const { return ARMArchVersion >= V6; }
+ bool hasV7Ops() const { return ARMArchVersion >= V7A; }
+
+ bool hasVFP2() const { return ARMFPUType >= VFPv2; }
+ bool hasVFP3() const { return ARMFPUType >= VFPv3; }
+ bool hasNEON() const { return ARMFPUType >= NEON; }
+
+ bool isTargetDarwin() const { return TargetType == isDarwin; }
+ bool isTargetELF() const { return TargetType == isELF; }
+
+ bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
+ bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }
+
+ bool isThumb() const { return IsThumb; }
+ bool isThumb2() const { return IsThumb && (ThumbMode >= Thumb2); }
+
+ bool useThumbBacktraces() const { return UseThumbBacktraces; }
+ bool isR9Reserved() const { return IsR9Reserved; }
+
+ const std::string & getCPUString() const { return CPUString; }
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by every
+ /// function for this subtarget.
+ unsigned getStackAlignment() const { return stackAlignment; }
+};
+} // End llvm namespace
+
+#endif // ARMSUBTARGET_H
diff --git a/lib/Target/ARM/ARMTargetAsmInfo.cpp b/lib/Target/ARM/ARMTargetAsmInfo.cpp
new file mode 100644
index 0000000..4107dcc
--- /dev/null
+++ b/lib/Target/ARM/ARMTargetAsmInfo.cpp
@@ -0,0 +1,291 @@
+//===-- ARMTargetAsmInfo.cpp - ARM asm properties ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the ARMTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMTargetAsmInfo.h"
+#include "ARMTargetMachine.h"
+#include <cstring>
+#include <cctype>
+using namespace llvm;
+
+
+const char *const llvm::arm_asm_table[] = {
+ "{r0}", "r0",
+ "{r1}", "r1",
+ "{r2}", "r2",
+ "{r3}", "r3",
+ "{r4}", "r4",
+ "{r5}", "r5",
+ "{r6}", "r6",
+ "{r7}", "r7",
+ "{r8}", "r8",
+ "{r9}", "r9",
+ "{r10}", "r10",
+ "{r11}", "r11",
+ "{r12}", "r12",
+ "{r13}", "r13",
+ "{r14}", "r14",
+ "{lr}", "lr",
+ "{sp}", "sp",
+ "{ip}", "ip",
+ "{fp}", "fp",
+ "{sl}", "sl",
+ "{memory}", "memory",
+ "{cc}", "cc",
+ 0,0};
+
+ARMDarwinTargetAsmInfo::ARMDarwinTargetAsmInfo(const ARMTargetMachine &TM):
+ ARMTargetAsmInfo<DarwinTargetAsmInfo>(TM) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+
+ GlobalPrefix = "_";
+ PrivateGlobalPrefix = "L";
+ LessPrivateGlobalPrefix = "l";
+ StringConstantPrefix = "\1LC";
+ BSSSection = 0; // no BSS section
+ ZeroDirective = "\t.space\t";
+ ZeroFillDirective = "\t.zerofill\t"; // Uses .zerofill
+ SetDirective = "\t.set\t";
+ WeakRefDirective = "\t.weak_reference\t";
+ WeakDefDirective = "\t.weak_definition ";
+ HiddenDirective = "\t.private_extern\t";
+ ProtectedDirective = NULL;
+ JumpTableDataSection = ".const";
+ CStringSection = "\t.cstring";
+ HasDotTypeDotSizeDirective = false;
+ HasSingleParameterDotFile = false;
+ NeedsIndirectEncoding = true;
+ if (TM.getRelocationModel() == Reloc::Static) {
+ StaticCtorsSection = ".constructor";
+ StaticDtorsSection = ".destructor";
+ } else {
+ StaticCtorsSection = ".mod_init_func";
+ StaticDtorsSection = ".mod_term_func";
+ }
+
+ // In non-PIC modes, emit a special label before jump tables so that the
+ // linker can perform more accurate dead code stripping.
+ if (TM.getRelocationModel() != Reloc::PIC_) {
+ // Emit a local label that is preserved until the linker runs.
+ JumpTableSpecialLabelPrefix = "l";
+ }
+
+ NeedsSet = true;
+ DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
+ DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
+ DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
+ DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
+ DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
+ DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
+ DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
+ DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
+ DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
+ DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
+ DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
+}
+
+ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM):
+ ARMTargetAsmInfo<ELFTargetAsmInfo>(TM) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+
+ NeedsSet = false;
+ HasLEB128 = true;
+ AbsoluteDebugSectionOffsets = true;
+ CStringSection = ".rodata.str";
+ PrivateGlobalPrefix = ".L";
+ WeakRefDirective = "\t.weak\t";
+ SetDirective = "\t.set\t";
+ DwarfRequiresFrameSection = false;
+ DwarfAbbrevSection = "\t.section\t.debug_abbrev,\"\",%progbits";
+ DwarfInfoSection = "\t.section\t.debug_info,\"\",%progbits";
+ DwarfLineSection = "\t.section\t.debug_line,\"\",%progbits";
+ DwarfFrameSection = "\t.section\t.debug_frame,\"\",%progbits";
+ DwarfPubNamesSection ="\t.section\t.debug_pubnames,\"\",%progbits";
+ DwarfPubTypesSection ="\t.section\t.debug_pubtypes,\"\",%progbits";
+ DwarfStrSection = "\t.section\t.debug_str,\"\",%progbits";
+ DwarfLocSection = "\t.section\t.debug_loc,\"\",%progbits";
+ DwarfARangesSection = "\t.section\t.debug_aranges,\"\",%progbits";
+ DwarfRangesSection = "\t.section\t.debug_ranges,\"\",%progbits";
+ DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",%progbits";
+
+ if (Subtarget->isAAPCS_ABI()) {
+ StaticCtorsSection = "\t.section .init_array,\"aw\",%init_array";
+ StaticDtorsSection = "\t.section .fini_array,\"aw\",%fini_array";
+ } else {
+ StaticCtorsSection = "\t.section .ctors,\"aw\",%progbits";
+ StaticDtorsSection = "\t.section .dtors,\"aw\",%progbits";
+ }
+}
+
+/// Count the number of comma-separated arguments.
+/// Do not try to detect errors.
+template <class BaseTAI>
+unsigned ARMTargetAsmInfo<BaseTAI>::countArguments(const char* p) const {
+ unsigned count = 0;
+ while (*p && isspace(*p) && *p != '\n')
+ p++;
+ count++;
+ while (*p && *p!='\n' &&
+ strncmp(p, BaseTAI::CommentString,
+ strlen(BaseTAI::CommentString))!=0) {
+ if (*p==',')
+ count++;
+ p++;
+ }
+ return count;
+}
+
+/// Count the length of a string enclosed in quote characters.
+/// Do not try to detect errors.
+template <class BaseTAI>
+unsigned ARMTargetAsmInfo<BaseTAI>::countString(const char* p) const {
+ unsigned count = 0;
+ while (*p && isspace(*p) && *p!='\n')
+ p++;
+ if (!*p || *p != '\"')
+ return count;
+ while (*++p && *p != '\"')
+ count++;
+ return count;
+}
+
+/// ARM-specific version of TargetAsmInfo::getInlineAsmLength.
+template <class BaseTAI>
+unsigned ARMTargetAsmInfo<BaseTAI>::getInlineAsmLength(const char *s) const {
+ // Make a lowercase-folded version of s for counting purposes.
+ char *q, *s_copy = (char *)malloc(strlen(s) + 1);
+ strcpy(s_copy, s);
+ for (q=s_copy; *q; q++)
+ *q = tolower(*q);
+ const char *Str = s_copy;
+
+ // Count the number of bytes in the asm.
+ bool atInsnStart = true;
+ bool inTextSection = true;
+ unsigned Length = 0;
+ for (; *Str; ++Str) {
+ if (atInsnStart) {
+ // Skip whitespace
+ while (*Str && isspace(*Str) && *Str != '\n')
+ Str++;
+ // Skip label
+ for (const char* p = Str; *p && !isspace(*p); p++)
+ if (*p == ':') {
+ Str = p+1;
+ while (*Str && isspace(*Str) && *Str != '\n')
+ Str++;
+ break;
+ }
+
+ if (*Str == 0) break;
+
+ // Ignore everything from comment char(s) to EOL
+ if (strncmp(Str, BaseTAI::CommentString,
+ strlen(BaseTAI::CommentString)) == 0)
+ atInsnStart = false;
+ // FIXME do something like the following for non-Darwin
+ else if (*Str == '.' && Subtarget->isTargetDarwin()) {
+ // Directive.
+ atInsnStart = false;
+
+ // Some change the section, but don't generate code.
+ if (strncmp(Str, ".literal4", strlen(".literal4"))==0 ||
+ strncmp(Str, ".literal8", strlen(".literal8"))==0 ||
+ strncmp(Str, ".const", strlen(".const"))==0 ||
+ strncmp(Str, ".constructor", strlen(".constructor"))==0 ||
+ strncmp(Str, ".cstring", strlen(".cstring"))==0 ||
+ strncmp(Str, ".data", strlen(".data"))==0 ||
+ strncmp(Str, ".destructor", strlen(".destructor"))==0 ||
+ strncmp(Str, ".fvmlib_init0", strlen(".fvmlib_init0"))==0 ||
+ strncmp(Str, ".fvmlib_init1", strlen(".fvmlib_init1"))==0 ||
+ strncmp(Str, ".mod_init_func", strlen(".mod_init_func"))==0 ||
+ strncmp(Str, ".mod_term_func", strlen(".mod_term_func"))==0 ||
+ strncmp(Str, ".picsymbol_stub", strlen(".picsymbol_stub"))==0 ||
+ strncmp(Str, ".symbol_stub", strlen(".symbol_stub"))==0 ||
+ strncmp(Str, ".static_data", strlen(".static_data"))==0 ||
+ strncmp(Str, ".section", strlen(".section"))==0 ||
+ strncmp(Str, ".lazy_symbol_pointer", strlen(".lazy_symbol_pointer"))==0 ||
+ strncmp(Str, ".non_lazy_symbol_pointer", strlen(".non_lazy_symbol_pointer"))==0 ||
+ strncmp(Str, ".dyld", strlen(".dyld"))==0 ||
+ strncmp(Str, ".const_data", strlen(".const_data"))==0 ||
+ strncmp(Str, ".objc", strlen(".objc"))==0 || //// many directives
+ strncmp(Str, ".static_const", strlen(".static_const"))==0)
+ inTextSection=false;
+ else if (strncmp(Str, ".text", strlen(".text"))==0)
+ inTextSection = true;
+ // Some can't really be handled without implementing significant pieces
+ // of an assembler. Others require dynamic adjustment of block sizes in
+ // AdjustBBOffsetsAfter; it's a big compile-time speed hit to check every
+ // instruction in there, and none of these are currently used in the kernel.
+ else if (strncmp(Str, ".macro", strlen(".macro"))==0 ||
+ strncmp(Str, ".if", strlen(".if"))==0 ||
+ strncmp(Str, ".align", strlen(".align"))==0 ||
+ strncmp(Str, ".fill", strlen(".fill"))==0 ||
+ strncmp(Str, ".space", strlen(".space"))==0 ||
+ strncmp(Str, ".zerofill", strlen(".zerofill"))==0 ||
+ strncmp(Str, ".p2align", strlen(".p2align"))==0 ||
+ strncmp(Str, ".p2alignw", strlen(".p2alignw"))==0 ||
+ strncmp(Str, ".p2alignl", strlen(".p2alignl"))==0 ||
+ strncmp(Str, ".align32", strlen(".p2align32"))==0 ||
+ strncmp(Str, ".include", strlen(".include"))==0)
+ cerr << "Directive " << Str << " in asm may lead to invalid offsets for" <<
+ " constant pools (the assembler will tell you if this happens).\n";
+ // Some generate code, but this is only interesting in the text section.
+ else if (inTextSection) {
+ if (strncmp(Str, ".long", strlen(".long"))==0)
+ Length += 4*countArguments(Str+strlen(".long"));
+ else if (strncmp(Str, ".short", strlen(".short"))==0)
+ Length += 2*countArguments(Str+strlen(".short"));
+ else if (strncmp(Str, ".byte", strlen(".byte"))==0)
+ Length += 1*countArguments(Str+strlen(".byte"));
+ else if (strncmp(Str, ".single", strlen(".single"))==0)
+ Length += 4*countArguments(Str+strlen(".single"));
+ else if (strncmp(Str, ".double", strlen(".double"))==0)
+ Length += 8*countArguments(Str+strlen(".double"));
+ else if (strncmp(Str, ".quad", strlen(".quad"))==0)
+ Length += 16*countArguments(Str+strlen(".quad"));
+ else if (strncmp(Str, ".ascii", strlen(".ascii"))==0)
+ Length += countString(Str+strlen(".ascii"));
+ else if (strncmp(Str, ".asciz", strlen(".asciz"))==0)
+ Length += countString(Str+strlen(".asciz"))+1;
+ }
+ } else if (inTextSection) {
+ // An instruction
+ atInsnStart = false;
+ if (Subtarget->isThumb()) {
+ // BL and BLX <non-reg> are 4 bytes, all others 2.
+ if (strncmp(Str, "blx", strlen("blx"))==0) {
+ const char* p = Str+3;
+ while (*p && isspace(*p))
+ p++;
+ if (*p == 'r' || *p=='R')
+ Length += 2; // BLX reg
+ else
+ Length += 4; // BLX non-reg
+ } else if (strncmp(Str, "bl", strlen("bl"))==0)
+ Length += 4; // BL
+ else
+ Length += 2; // Thumb anything else
+ }
+ else
+ Length += 4; // ARM
+ }
+ }
+ if (*Str == '\n' || *Str == BaseTAI::SeparatorChar)
+ atInsnStart = true;
+ }
+ free(s_copy);
+ return Length;
+}
+
+// Instantiate default implementation.
+TEMPLATE_INSTANTIATION(class ARMTargetAsmInfo<TargetAsmInfo>);
diff --git a/lib/Target/ARM/ARMTargetAsmInfo.h b/lib/Target/ARM/ARMTargetAsmInfo.h
new file mode 100644
index 0000000..9e6f856
--- /dev/null
+++ b/lib/Target/ARM/ARMTargetAsmInfo.h
@@ -0,0 +1,64 @@
+//=====-- ARMTargetAsmInfo.h - ARM asm properties -------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the ARMTargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMTARGETASMINFO_H
+#define ARMTARGETASMINFO_H
+
+#include "ARMTargetMachine.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/ELFTargetAsmInfo.h"
+#include "llvm/Target/DarwinTargetAsmInfo.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+ extern const char *const arm_asm_table[];
+
+ template <class BaseTAI>
+ struct ARMTargetAsmInfo : public BaseTAI {
+ explicit ARMTargetAsmInfo(const ARMTargetMachine &TM):
+ BaseTAI(TM) {
+ BaseTAI::AsmTransCBE = arm_asm_table;
+
+ BaseTAI::AlignmentIsInBytes = false;
+ BaseTAI::Data64bitsDirective = 0;
+ BaseTAI::CommentString = "@";
+ BaseTAI::ConstantPoolSection = "\t.text\n";
+ BaseTAI::COMMDirectiveTakesAlignment = false;
+ BaseTAI::InlineAsmStart = "@ InlineAsm Start";
+ BaseTAI::InlineAsmEnd = "@ InlineAsm End";
+ BaseTAI::LCOMMDirective = "\t.lcomm\t";
+ }
+
+ const ARMSubtarget *Subtarget;
+
+ virtual unsigned getInlineAsmLength(const char *Str) const;
+ unsigned countArguments(const char *p) const;
+ unsigned countString(const char *p) const;
+ };
+
+ typedef ARMTargetAsmInfo<TargetAsmInfo> ARMGenericTargetAsmInfo;
+
+ EXTERN_TEMPLATE_INSTANTIATION(class ARMTargetAsmInfo<TargetAsmInfo>);
+
+ struct ARMDarwinTargetAsmInfo : public ARMTargetAsmInfo<DarwinTargetAsmInfo> {
+ explicit ARMDarwinTargetAsmInfo(const ARMTargetMachine &TM);
+ };
+
+ struct ARMELFTargetAsmInfo : public ARMTargetAsmInfo<ELFTargetAsmInfo> {
+ explicit ARMELFTargetAsmInfo(const ARMTargetMachine &TM);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
new file mode 100644
index 0000000..1dc7d19
--- /dev/null
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -0,0 +1,242 @@
+//===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMTargetMachine.h"
+#include "ARMTargetAsmInfo.h"
+#include "ARMFrameInfo.h"
+#include "ARM.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden,
+ cl::desc("Disable load store optimization pass"));
+static cl::opt<bool> DisableIfConversion("disable-arm-if-conversion",cl::Hidden,
+ cl::desc("Disable if-conversion pass"));
+
+/// ARMTargetMachineModule - Note that this is used on hosts that cannot link
+/// in a library unless there are references into the library. In particular,
+/// it seems that it is not possible to get things to work on Win32 without
+/// this. Though it is unused, do not remove it.
+extern "C" int ARMTargetMachineModule;
+int ARMTargetMachineModule = 0;
+
+// Register the target.
+static RegisterTarget<ARMTargetMachine> X("arm", "ARM");
+static RegisterTarget<ThumbTargetMachine> Y("thumb", "Thumb");
+
+// No assembler printer by default
+ARMTargetMachine::AsmPrinterCtorFn ARMTargetMachine::AsmPrinterCtor = 0;
+
+/// ThumbTargetMachine - Create an Thumb architecture model.
+///
+unsigned ThumbTargetMachine::getJITMatchQuality() {
+#if defined(__thumb__)
+ return 10;
+#endif
+ return 0;
+}
+
+unsigned ThumbTargetMachine::getModuleMatchQuality(const Module &M) {
+ std::string TT = M.getTargetTriple();
+ // Match thumb-foo-bar, as well as things like thumbv5blah-*
+ if (TT.size() >= 6 &&
+ (TT.substr(0, 6) == "thumb-" || TT.substr(0, 6) == "thumbv"))
+ return 20;
+
+ // If the target triple is something non-thumb, we don't match.
+ if (!TT.empty()) return 0;
+
+ if (M.getEndianness() == Module::LittleEndian &&
+ M.getPointerSize() == Module::Pointer32)
+ return 10; // Weak match
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+ return getJITMatchQuality()/2;
+}
+
+ThumbTargetMachine::ThumbTargetMachine(const Module &M, const std::string &FS)
+ : ARMTargetMachine(M, FS, true) {
+}
+
+/// TargetMachine ctor - Create an ARM architecture model.
+///
+ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS,
+ bool isThumb)
+ : Subtarget(M, FS, isThumb),
+ DataLayout(Subtarget.isAPCS_ABI() ?
+ // APCS ABI
+ (isThumb ?
+ std::string("e-p:32:32-f64:32:32-i64:32:32-"
+ "i16:16:32-i8:8:32-i1:8:32-a:0:32") :
+ std::string("e-p:32:32-f64:32:32-i64:32:32")) :
+ // AAPCS ABI
+ (isThumb ?
+ std::string("e-p:32:32-f64:64:64-i64:64:64-"
+ "i16:16:32-i8:8:32-i1:8:32-a:0:32") :
+ std::string("e-p:32:32-f64:64:64-i64:64:64"))),
+ InstrInfo(Subtarget),
+ FrameInfo(Subtarget),
+ JITInfo(),
+ TLInfo(*this) {
+ DefRelocModel = getRelocationModel();
+}
+
+unsigned ARMTargetMachine::getJITMatchQuality() {
+#if defined(__arm__)
+ return 10;
+#endif
+ return 0;
+}
+
+unsigned ARMTargetMachine::getModuleMatchQuality(const Module &M) {
+ std::string TT = M.getTargetTriple();
+ // Match arm-foo-bar, as well as things like armv5blah-*
+ if (TT.size() >= 4 &&
+ (TT.substr(0, 4) == "arm-" || TT.substr(0, 4) == "armv"))
+ return 20;
+ // If the target triple is something non-arm, we don't match.
+ if (!TT.empty()) return 0;
+
+ if (M.getEndianness() == Module::LittleEndian &&
+ M.getPointerSize() == Module::Pointer32)
+ return 10; // Weak match
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+ return getJITMatchQuality()/2;
+}
+
+
+const TargetAsmInfo *ARMTargetMachine::createTargetAsmInfo() const {
+ switch (Subtarget.TargetType) {
+ case ARMSubtarget::isDarwin:
+ return new ARMDarwinTargetAsmInfo(*this);
+ case ARMSubtarget::isELF:
+ return new ARMELFTargetAsmInfo(*this);
+ default:
+ return new ARMGenericTargetAsmInfo(*this);
+ }
+}
+
+
+// Pass Pipeline Configuration
+bool ARMTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createARMISelDag(*this));
+ return false;
+}
+
+bool ARMTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // FIXME: temporarily disabling load / store optimization pass for Thumb mode.
+ if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb())
+ PM.add(createARMLoadStoreOptimizationPass());
+
+ if (OptLevel != CodeGenOpt::None &&
+ !DisableIfConversion && !Subtarget.isThumb())
+ PM.add(createIfConverterPass());
+
+ PM.add(createARMConstantIslandPass());
+ return true;
+}
+
+bool ARMTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose,
+ raw_ostream &Out) {
+ // Output assembly language.
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+
+ return false;
+}
+
+
+bool ARMTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ MachineCodeEmitter &MCE) {
+ // FIXME: Move this to TargetJITInfo!
+ if (DefRelocModel == Reloc::Default)
+ setRelocationModel(Reloc::Static);
+
+ // Machine code emitter pass for ARM.
+ PM.add(createARMCodeEmitterPass(*this, MCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
+bool ARMTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ JITCodeEmitter &JCE) {
+ // FIXME: Move this to TargetJITInfo!
+ if (DefRelocModel == Reloc::Default)
+ setRelocationModel(Reloc::Static);
+
+ // Machine code emitter pass for ARM.
+ PM.add(createARMJITCodeEmitterPass(*this, JCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
+bool ARMTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ MachineCodeEmitter &MCE) {
+ // Machine code emitter pass for ARM.
+ PM.add(createARMCodeEmitterPass(*this, MCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
+bool ARMTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ JITCodeEmitter &JCE) {
+ // Machine code emitter pass for ARM.
+ PM.add(createARMJITCodeEmitterPass(*this, JCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
+
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
new file mode 100644
index 0000000..916a8aa
--- /dev/null
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -0,0 +1,104 @@
+//===-- ARMTargetMachine.h - Define TargetMachine for ARM -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMTARGETMACHINE_H
+#define ARMTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "ARMInstrInfo.h"
+#include "ARMFrameInfo.h"
+#include "ARMJITInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMISelLowering.h"
+
+namespace llvm {
+
+class Module;
+
+class ARMTargetMachine : public LLVMTargetMachine {
+ ARMSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ ARMInstrInfo InstrInfo;
+ ARMFrameInfo FrameInfo;
+ ARMJITInfo JITInfo;
+ ARMTargetLowering TLInfo;
+ Reloc::Model DefRelocModel; // Reloc model before it's overridden.
+
+protected:
+ // To avoid having target depend on the asmprinter stuff libraries, asmprinter
+ // set this functions to ctor pointer at startup time if they are linked in.
+ typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+ ARMTargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose);
+ static AsmPrinterCtorFn AsmPrinterCtor;
+
+public:
+ ARMTargetMachine(const Module &M, const std::string &FS, bool isThumb = false);
+
+ virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const ARMFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual ARMJITInfo *getJITInfo() { return &JITInfo; }
+ virtual const ARMRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ virtual ARMTargetLowering *getTargetLowering() const {
+ return const_cast<ARMTargetLowering*>(&TLInfo);
+ }
+
+ static void registerAsmPrinter(AsmPrinterCtorFn F) {
+ AsmPrinterCtor = F;
+ }
+
+ static unsigned getModuleMatchQuality(const Module &M);
+ static unsigned getJITMatchQuality();
+
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ bool DumpAsm, MachineCodeEmitter &MCE);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ bool DumpAsm, JITCodeEmitter &MCE);
+ virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ MachineCodeEmitter &MCE);
+ virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ JITCodeEmitter &MCE);
+};
+
+/// ThumbTargetMachine - Thumb target machine.
+///
+class ThumbTargetMachine : public ARMTargetMachine {
+public:
+ ThumbTargetMachine(const Module &M, const std::string &FS);
+
+ static unsigned getJITMatchQuality();
+ static unsigned getModuleMatchQuality(const Module &M);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
new file mode 100644
index 0000000..d908cf4
--- /dev/null
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -0,0 +1,1117 @@
+//===-- ARMAsmPrinter.cpp - ARM LLVM assembly writer ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format ARM assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "ARM.h"
+#include "ARMBuildAttrs.h"
+#include "ARMTargetMachine.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+ class VISIBILITY_HIDDEN ARMAsmPrinter : public AsmPrinter {
+ DwarfWriter *DW;
+ MachineModuleInfo *MMI;
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when printing asm code for different targets.
+ const ARMSubtarget *Subtarget;
+
+ /// AFI - Keep a pointer to ARMFunctionInfo for the current
+ /// MachineFunction.
+ ARMFunctionInfo *AFI;
+
+ /// MCP - Keep a pointer to constantpool entries of the current
+ /// MachineFunction.
+ const MachineConstantPool *MCP;
+
+ /// We name each basic block in a Function with a unique number, so
+ /// that we can consistently refer to them later. This is cleared
+ /// at the beginning of each call to runOnMachineFunction().
+ ///
+ typedef std::map<const Value *, unsigned> ValueMapTy;
+ ValueMapTy NumberForBB;
+
+ /// GVNonLazyPtrs - Keeps the set of GlobalValues that require
+ /// non-lazy-pointers for indirect access.
+ StringSet<> GVNonLazyPtrs;
+
+ /// HiddenGVNonLazyPtrs - Keeps the set of GlobalValues with hidden
+ /// visibility that require non-lazy-pointers for indirect access.
+ StringSet<> HiddenGVNonLazyPtrs;
+
+ /// FnStubs - Keeps the set of external function GlobalAddresses that the
+ /// asm printer should generate stubs for.
+ StringSet<> FnStubs;
+
+ /// True if asm printer is printing a series of CONSTPOOL_ENTRY.
+ bool InCPMode;
+ public:
+ explicit ARMAsmPrinter(raw_ostream &O, TargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : AsmPrinter(O, TM, T, OL, V), DW(0), MMI(NULL), AFI(NULL), MCP(NULL),
+ InCPMode(false) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "ARM Assembly Printer";
+ }
+
+ void printOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier = 0);
+ void printSOImmOperand(const MachineInstr *MI, int opNum);
+ void printSOImm2PartOperand(const MachineInstr *MI, int opNum);
+ void printSORegOperand(const MachineInstr *MI, int opNum);
+ void printAddrMode2Operand(const MachineInstr *MI, int OpNo);
+ void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNo);
+ void printAddrMode3Operand(const MachineInstr *MI, int OpNo);
+ void printAddrMode3OffsetOperand(const MachineInstr *MI, int OpNo);
+ void printAddrMode4Operand(const MachineInstr *MI, int OpNo,
+ const char *Modifier = 0);
+ void printAddrMode5Operand(const MachineInstr *MI, int OpNo,
+ const char *Modifier = 0);
+ void printAddrModePCOperand(const MachineInstr *MI, int OpNo,
+ const char *Modifier = 0);
+ void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNo);
+ void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNo,
+ unsigned Scale);
+ void printThumbAddrModeS1Operand(const MachineInstr *MI, int OpNo);
+ void printThumbAddrModeS2Operand(const MachineInstr *MI, int OpNo);
+ void printThumbAddrModeS4Operand(const MachineInstr *MI, int OpNo);
+ void printThumbAddrModeSPOperand(const MachineInstr *MI, int OpNo);
+ void printPredicateOperand(const MachineInstr *MI, int opNum);
+ void printSBitModifierOperand(const MachineInstr *MI, int opNum);
+ void printPCLabel(const MachineInstr *MI, int opNum);
+ void printRegisterList(const MachineInstr *MI, int opNum);
+ void printCPInstOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier);
+ void printJTBlockOperand(const MachineInstr *MI, int opNum);
+
+ virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode);
+
+ void printModuleLevelGV(const GlobalVariable* GVar);
+ bool printInstruction(const MachineInstr *MI); // autogenerated.
+ void printMachineInstruction(const MachineInstr *MI);
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ /// EmitMachineConstantPoolValue - Print a machine constantpool value to
+ /// the .s file.
+ virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+ printDataDirective(MCPV->getType());
+
+ ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
+ GlobalValue *GV = ACPV->getGV();
+ std::string Name = GV ? Mang->getValueName(GV) : TAI->getGlobalPrefix();
+ if (!GV)
+ Name += ACPV->getSymbol();
+ if (ACPV->isNonLazyPointer()) {
+ if (GV->hasHiddenVisibility())
+ HiddenGVNonLazyPtrs.insert(Name);
+ else
+ GVNonLazyPtrs.insert(Name);
+ printSuffixedName(Name, "$non_lazy_ptr");
+ } else if (ACPV->isStub()) {
+ FnStubs.insert(Name);
+ printSuffixedName(Name, "$stub");
+ } else
+ O << Name;
+ if (ACPV->hasModifier()) O << "(" << ACPV->getModifier() << ")";
+ if (ACPV->getPCAdjustment() != 0) {
+ O << "-(" << TAI->getPrivateGlobalPrefix() << "PC"
+ << utostr(ACPV->getLabelId())
+ << "+" << (unsigned)ACPV->getPCAdjustment();
+ if (ACPV->mustAddCurrentAddress())
+ O << "-.";
+ O << ")";
+ }
+ O << "\n";
+
+ // If the constant pool value is a extern weak symbol, remember to emit
+ // the weak reference.
+ if (GV && GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AsmPrinter::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<DwarfWriter>();
+ }
+ };
+} // end of anonymous namespace
+
+#include "ARMGenAsmWriter.inc"
+
+/// runOnMachineFunction - This uses the printInstruction()
+/// method to print assembly for each instruction.
+///
+bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ this->MF = &MF;
+
+ AFI = MF.getInfo<ARMFunctionInfo>();
+ MCP = MF.getConstantPool();
+
+ SetupMachineFunction(MF);
+ O << "\n";
+
+ // NOTE: we don't print out constant pools here, they are handled as
+ // instructions.
+
+ O << "\n";
+ // Print out labels for the function.
+ const Function *F = MF.getFunction();
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::PrivateLinkage:
+ case Function::InternalLinkage:
+ SwitchToTextSection("\t.text", F);
+ break;
+ case Function::ExternalLinkage:
+ SwitchToTextSection("\t.text", F);
+ O << "\t.globl\t" << CurrentFnName << "\n";
+ break;
+ case Function::WeakAnyLinkage:
+ case Function::WeakODRLinkage:
+ case Function::LinkOnceAnyLinkage:
+ case Function::LinkOnceODRLinkage:
+ if (Subtarget->isTargetDarwin()) {
+ SwitchToTextSection(
+ ".section __TEXT,__textcoal_nt,coalesced,pure_instructions", F);
+ O << "\t.globl\t" << CurrentFnName << "\n";
+ O << "\t.weak_definition\t" << CurrentFnName << "\n";
+ } else {
+ O << TAI->getWeakRefDirective() << CurrentFnName << "\n";
+ }
+ break;
+ }
+
+ printVisibility(CurrentFnName, F->getVisibility());
+
+ if (AFI->isThumbFunction()) {
+ EmitAlignment(1, F, AFI->getAlign());
+ O << "\t.code\t16\n";
+ O << "\t.thumb_func";
+ if (Subtarget->isTargetDarwin())
+ O << "\t" << CurrentFnName;
+ O << "\n";
+ InCPMode = false;
+ } else
+ EmitAlignment(2, F);
+
+ O << CurrentFnName << ":\n";
+ // Emit pre-function debug information.
+ DW->BeginFunction(&MF);
+
+ if (Subtarget->isTargetDarwin()) {
+ // If the function is empty, then we need to emit *something*. Otherwise,
+ // the function's label might be associated with something that it wasn't
+ // meant to be associated with. We emit a noop in this situation.
+ MachineFunction::iterator I = MF.begin();
+
+ if (++I == MF.end() && MF.front().empty())
+ O << "\tnop\n";
+ }
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true, true, VerboseAsm);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ printMachineInstruction(II);
+ }
+ }
+
+ if (TAI->hasDotTypeDotSizeDirective())
+ O << "\t.size " << CurrentFnName << ", .-" << CurrentFnName << "\n";
+
+ // Emit post-function debug information.
+ DW->EndFunction(&MF);
+
+ O.flush();
+
+ return false;
+}
+
+void ARMAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ O << TM.getRegisterInfo()->get(MO.getReg()).AsmName;
+ else
+ assert(0 && "not implemented");
+ break;
+ case MachineOperand::MO_Immediate: {
+ if (!Modifier || strcmp(Modifier, "no_hash") != 0)
+ O << "#";
+
+ O << MO.getImm();
+ break;
+ }
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB());
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
+ GlobalValue *GV = MO.getGlobal();
+ std::string Name = Mang->getValueName(GV);
+ bool isExt = (GV->isDeclaration() || GV->hasWeakLinkage() ||
+ GV->hasLinkOnceLinkage());
+ if (isExt && isCallOp && Subtarget->isTargetDarwin() &&
+ TM.getRelocationModel() != Reloc::Static) {
+ printSuffixedName(Name, "$stub");
+ FnStubs.insert(Name);
+ } else
+ O << Name;
+
+ printOffset(MO.getOffset());
+
+ if (isCallOp && Subtarget->isTargetELF() &&
+ TM.getRelocationModel() == Reloc::PIC_)
+ O << "(PLT)";
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ break;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
+ std::string Name(TAI->getGlobalPrefix());
+ Name += MO.getSymbolName();
+ if (isCallOp && Subtarget->isTargetDarwin() &&
+ TM.getRelocationModel() != Reloc::Static) {
+ printSuffixedName(Name, "$stub");
+ FnStubs.insert(Name);
+ } else
+ O << Name;
+ if (isCallOp && Subtarget->isTargetELF() &&
+ TM.getRelocationModel() == Reloc::PIC_)
+ O << "(PLT)";
+ break;
+ }
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+ default:
+ O << "<unknown operand type>"; abort (); break;
+ }
+}
+
+static void printSOImm(raw_ostream &O, int64_t V, bool VerboseAsm,
+ const TargetAsmInfo *TAI) {
+ assert(V < (1 << 12) && "Not a valid so_imm value!");
+ unsigned Imm = ARM_AM::getSOImmValImm(V);
+ unsigned Rot = ARM_AM::getSOImmValRot(V);
+
+ // Print low-level immediate formation info, per
+ // A5.1.3: "Data-processing operands - Immediate".
+ if (Rot) {
+ O << "#" << Imm << ", " << Rot;
+ // Pretty printed version.
+ if (VerboseAsm)
+ O << ' ' << TAI->getCommentString()
+ << ' ' << (int)ARM_AM::rotr32(Imm, Rot);
+ } else {
+ O << "#" << Imm;
+ }
+}
+
+/// printSOImmOperand - SOImm is 4-bit rotate amount in bits 8-11 with 8-bit
+/// immediate in bits 0-7.
+void ARMAsmPrinter::printSOImmOperand(const MachineInstr *MI, int OpNum) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ assert(MO.isImm() && "Not a valid so_imm value!");
+ printSOImm(O, MO.getImm(), VerboseAsm, TAI);
+}
+
+/// printSOImm2PartOperand - SOImm is broken into two pieces using a 'mov'
+/// followed by an 'orr' to materialize.
+void ARMAsmPrinter::printSOImm2PartOperand(const MachineInstr *MI, int OpNum) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ assert(MO.isImm() && "Not a valid so_imm value!");
+ unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO.getImm());
+ unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO.getImm());
+ printSOImm(O, ARM_AM::getSOImmVal(V1), VerboseAsm, TAI);
+ O << "\n\torr";
+ printPredicateOperand(MI, 2);
+ O << " ";
+ printOperand(MI, 0);
+ O << ", ";
+ printOperand(MI, 0);
+ O << ", ";
+ printSOImm(O, ARM_AM::getSOImmVal(V2), VerboseAsm, TAI);
+}
+
+// so_reg is a 4-operand unit corresponding to register forms of the A5.1
+// "Addressing Mode 1 - Data-processing operands" forms. This includes:
+// REG 0 0 - e.g. R5
+// REG REG 0,SH_OPC - e.g. R5, ROR R3
+// REG 0 IMM,SH_OPC - e.g. R5, LSL #3
+void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ const MachineOperand &MO3 = MI->getOperand(Op+2);
+
+ assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
+ O << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+
+ // Print the shift opc.
+ O << ", "
+ << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()))
+ << " ";
+
+ if (MO2.getReg()) {
+ assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg()));
+ O << TM.getRegisterInfo()->get(MO2.getReg()).AsmName;
+ assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+ } else {
+ O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+ }
+}
+
+void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ const MachineOperand &MO3 = MI->getOperand(Op+2);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op);
+ return;
+ }
+
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+
+ if (!MO2.getReg()) {
+ if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
+ O << ", #"
+ << (char)ARM_AM::getAM2Op(MO3.getImm())
+ << ARM_AM::getAM2Offset(MO3.getImm());
+ O << "]";
+ return;
+ }
+
+ O << ", "
+ << (char)ARM_AM::getAM2Op(MO3.getImm())
+ << TM.getRegisterInfo()->get(MO2.getReg()).AsmName;
+
+ if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
+ O << ", "
+ << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm()))
+ << " #" << ShImm;
+ O << "]";
+}
+
+void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op){
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+
+ if (!MO1.getReg()) {
+ unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
+ assert(ImmOffs && "Malformed indexed load / store!");
+ O << "#"
+ << (char)ARM_AM::getAM2Op(MO2.getImm())
+ << ImmOffs;
+ return;
+ }
+
+ O << (char)ARM_AM::getAM2Op(MO2.getImm())
+ << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+
+ if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
+ O << ", "
+ << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm()))
+ << " #" << ShImm;
+}
+
+void ARMAsmPrinter::printAddrMode3Operand(const MachineInstr *MI, int Op) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ const MachineOperand &MO3 = MI->getOperand(Op+2);
+
+ assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+
+ if (MO2.getReg()) {
+ O << ", "
+ << (char)ARM_AM::getAM3Op(MO3.getImm())
+ << TM.getRegisterInfo()->get(MO2.getReg()).AsmName
+ << "]";
+ return;
+ }
+
+ if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
+ O << ", #"
+ << (char)ARM_AM::getAM3Op(MO3.getImm())
+ << ImmOffs;
+ O << "]";
+}
+
+void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op){
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+
+ if (MO1.getReg()) {
+ O << (char)ARM_AM::getAM3Op(MO2.getImm())
+ << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+ return;
+ }
+
+ unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
+ assert(ImmOffs && "Malformed indexed load / store!");
+ O << "#"
+ << (char)ARM_AM::getAM3Op(MO2.getImm())
+ << ImmOffs;
+}
+
+void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op,
+ const char *Modifier) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
+ if (Modifier && strcmp(Modifier, "submode") == 0) {
+ if (MO1.getReg() == ARM::SP) {
+ bool isLDM = (MI->getOpcode() == ARM::LDM ||
+ MI->getOpcode() == ARM::LDM_RET);
+ O << ARM_AM::getAMSubModeAltStr(Mode, isLDM);
+ } else
+ O << ARM_AM::getAMSubModeStr(Mode);
+ } else {
+ printOperand(MI, Op);
+ if (ARM_AM::getAM4WBFlag(MO2.getImm()))
+ O << "!";
+ }
+}
+
+void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
+ const char *Modifier) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op);
+ return;
+ }
+
+ assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
+
+ if (Modifier && strcmp(Modifier, "submode") == 0) {
+ ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm());
+ if (MO1.getReg() == ARM::SP) {
+ bool isFLDM = (MI->getOpcode() == ARM::FLDMD ||
+ MI->getOpcode() == ARM::FLDMS);
+ O << ARM_AM::getAMSubModeAltStr(Mode, isFLDM);
+ } else
+ O << ARM_AM::getAMSubModeStr(Mode);
+ return;
+ } else if (Modifier && strcmp(Modifier, "base") == 0) {
+ // Used for FSTM{D|S} and LSTM{D|S} operations.
+ O << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+ if (ARM_AM::getAM5WBFlag(MO2.getImm()))
+ O << "!";
+ return;
+ }
+
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+
+ if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
+ O << ", #"
+ << (char)ARM_AM::getAM5Op(MO2.getImm())
+ << ImmOffs*4;
+ }
+ O << "]";
+}
+
+void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op,
+ const char *Modifier) {
+ if (Modifier && strcmp(Modifier, "label") == 0) {
+ printPCLabel(MI, Op+1);
+ return;
+ }
+
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
+ O << "[pc, +" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName << "]";
+}
+
+void
+ARMAsmPrinter::printThumbAddrModeRROperand(const MachineInstr *MI, int Op) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+ O << ", " << TM.getRegisterInfo()->get(MO2.getReg()).AsmName << "]";
+}
+
+void
+ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op,
+ unsigned Scale) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ const MachineOperand &MO3 = MI->getOperand(Op+2);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op);
+ return;
+ }
+
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+ if (MO3.getReg())
+ O << ", " << TM.getRegisterInfo()->get(MO3.getReg()).AsmName;
+ else if (unsigned ImmOffs = MO2.getImm()) {
+ O << ", #" << ImmOffs;
+ if (Scale > 1)
+ O << " * " << Scale;
+ }
+ O << "]";
+}
+
+void
+ARMAsmPrinter::printThumbAddrModeS1Operand(const MachineInstr *MI, int Op) {
+ printThumbAddrModeRI5Operand(MI, Op, 1);
+}
+void
+ARMAsmPrinter::printThumbAddrModeS2Operand(const MachineInstr *MI, int Op) {
+ printThumbAddrModeRI5Operand(MI, Op, 2);
+}
+void
+ARMAsmPrinter::printThumbAddrModeS4Operand(const MachineInstr *MI, int Op) {
+ printThumbAddrModeRI5Operand(MI, Op, 4);
+}
+
+void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+ if (unsigned ImmOffs = MO2.getImm())
+ O << ", #" << ImmOffs << " * 4";
+ O << "]";
+}
+
+void ARMAsmPrinter::printPredicateOperand(const MachineInstr *MI, int opNum) {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(opNum).getImm();
+ if (CC != ARMCC::AL)
+ O << ARMCondCodeToString(CC);
+}
+
+void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int opNum){
+ unsigned Reg = MI->getOperand(opNum).getReg();
+ if (Reg) {
+ assert(Reg == ARM::CPSR && "Expect ARM CPSR register!");
+ O << 's';
+ }
+}
+
+void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int opNum) {
+ int Id = (int)MI->getOperand(opNum).getImm();
+ O << TAI->getPrivateGlobalPrefix() << "PC" << Id;
+}
+
+void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int opNum) {
+ O << "{";
+ for (unsigned i = opNum, e = MI->getNumOperands(); i != e; ++i) {
+ printOperand(MI, i);
+ if (i != e-1) O << ", ";
+ }
+ O << "}";
+}
+
+void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNo,
+ const char *Modifier) {
+ assert(Modifier && "This operand only works with a modifier!");
+ // There are two aspects to a CONSTANTPOOL_ENTRY operand, the label and the
+ // data itself.
+ if (!strcmp(Modifier, "label")) {
+ unsigned ID = MI->getOperand(OpNo).getImm();
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << ID << ":\n";
+ } else {
+ assert(!strcmp(Modifier, "cpentry") && "Unknown modifier for CPE");
+ unsigned CPI = MI->getOperand(OpNo).getIndex();
+
+ const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
+
+ if (MCPE.isMachineConstantPoolEntry()) {
+ EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
+ } else {
+ EmitGlobalConstant(MCPE.Val.ConstVal);
+ // remember to emit the weak reference
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(MCPE.Val.ConstVal))
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ }
+ }
+}
+
+void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNo) {
+ const MachineOperand &MO1 = MI->getOperand(OpNo);
+ const MachineOperand &MO2 = MI->getOperand(OpNo+1); // Unique Id
+ unsigned JTI = MO1.getIndex();
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << JTI << '_' << MO2.getImm() << ":\n";
+
+ const char *JTEntryDirective = TAI->getJumpTableDirective();
+ if (!JTEntryDirective)
+ JTEntryDirective = TAI->getData32bitsDirective();
+
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ bool UseSet= TAI->getSetDirective() && TM.getRelocationModel() == Reloc::PIC_;
+ std::set<MachineBasicBlock*> JTSets;
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ if (UseSet && JTSets.insert(MBB).second)
+ printPICJumpTableSetLabel(JTI, MO2.getImm(), MBB);
+
+ O << JTEntryDirective << ' ';
+ if (UseSet)
+ O << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
+ << '_' << JTI << '_' << MO2.getImm()
+ << "_set_" << MBB->getNumber();
+ else if (TM.getRelocationModel() == Reloc::PIC_) {
+ printBasicBlockLabel(MBB, false, false, false);
+ // If the arch uses custom Jump Table directives, don't calc relative to JT
+ if (!TAI->getJumpTableDirective())
+ O << '-' << TAI->getPrivateGlobalPrefix() << "JTI"
+ << getFunctionNumber() << '_' << JTI << '_' << MO2.getImm();
+ } else
+ printBasicBlockLabel(MBB, false, false, false);
+ if (i != e-1)
+ O << '\n';
+ }
+}
+
+
+bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode){
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'a': // Don't print "#" before a global var name or constant.
+ case 'c': // Don't print "$" before a global var name or constant.
+ printOperand(MI, OpNo, "no_hash");
+ return false;
+ case 'P': // Print a VFP double precision register.
+ printOperand(MI, OpNo);
+ return false;
+ case 'Q':
+ if (TM.getTargetData()->isLittleEndian())
+ break;
+ // Fallthrough
+ case 'R':
+ if (TM.getTargetData()->isBigEndian())
+ break;
+ // Fallthrough
+ case 'H': // Write second word of DI / DF reference.
+ // Verify that this operand has two consecutive registers.
+ if (!MI->getOperand(OpNo).isReg() ||
+ OpNo+1 == MI->getNumOperands() ||
+ !MI->getOperand(OpNo+1).isReg())
+ return true;
+ ++OpNo; // Return the high-part.
+ }
+ }
+
+ printOperand(MI, OpNo);
+ return false;
+}
+
+bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+ printAddrMode2Operand(MI, OpNo);
+ return false;
+}
+
+void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ ++EmittedInsts;
+
+ int Opc = MI->getOpcode();
+ switch (Opc) {
+ case ARM::CONSTPOOL_ENTRY:
+ if (!InCPMode && AFI->isThumbFunction()) {
+ EmitAlignment(2);
+ InCPMode = true;
+ }
+ break;
+ default: {
+ if (InCPMode && AFI->isThumbFunction())
+ InCPMode = false;
+ }}
+
+ // Call the autogenerated instruction printer routines.
+ printInstruction(MI);
+}
+
+bool ARMAsmPrinter::doInitialization(Module &M) {
+
+ bool Result = AsmPrinter::doInitialization(M);
+
+ // Emit initial debug information.
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ assert(MMI);
+ DW = getAnalysisIfAvailable<DwarfWriter>();
+ assert(DW && "Dwarf Writer is not available");
+ DW->BeginModule(&M, MMI, O, this, TAI);
+
+ // Darwin wants symbols to be quoted if they have complex names.
+ if (Subtarget->isTargetDarwin())
+ Mang->setUseQuotes(true);
+
+ // Emit ARM Build Attributes
+ if (Subtarget->isTargetELF()) {
+ // CPU Type
+ std::string CPUString = Subtarget->getCPUString();
+ if (CPUString != "generic")
+ O << "\t.cpu " << CPUString << '\n';
+
+ // FIXME: Emit FPU type
+ if (Subtarget->hasVFP2())
+ O << "\t.eabi_attribute " << ARMBuildAttrs::VFP_arch << ", 2\n";
+
+ // Signal various FP modes.
+ if (!UnsafeFPMath)
+ O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_FP_denormal << ", 1\n"
+ << "\t.eabi_attribute " << ARMBuildAttrs::ABI_FP_exceptions << ", 1\n";
+
+ if (FiniteOnlyFPMath())
+ O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_FP_number_model << ", 1\n";
+ else
+ O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_FP_number_model << ", 3\n";
+
+ // 8-bytes alignment stuff.
+ O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_align8_needed << ", 1\n"
+ << "\t.eabi_attribute " << ARMBuildAttrs::ABI_align8_preserved << ", 1\n";
+
+ // FIXME: Should we signal R9 usage?
+ }
+
+ return Result;
+}
+
+/// PrintUnmangledNameSafely - Print out the printable characters in the name.
+/// Don't print things like \\n or \\0.
+static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
+ for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen();
+ Name != E; ++Name)
+ if (isprint(*Name))
+ OS << *Name;
+}
+
+void ARMAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
+ const TargetData *TD = TM.getTargetData();
+
+ if (!GVar->hasInitializer()) // External global require no code
+ return;
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+
+ if (EmitSpecialLLVMGlobal(GVar)) {
+ if (Subtarget->isTargetDarwin() &&
+ TM.getRelocationModel() == Reloc::Static) {
+ if (GVar->getName() == "llvm.global_ctors")
+ O << ".reference .constructors_used\n";
+ else if (GVar->getName() == "llvm.global_dtors")
+ O << ".reference .destructors_used\n";
+ }
+ return;
+ }
+
+ std::string name = Mang->getValueName(GVar);
+ Constant *C = GVar->getInitializer();
+ const Type *Type = C->getType();
+ unsigned Size = TD->getTypeAllocSize(Type);
+ unsigned Align = TD->getPreferredAlignmentLog(GVar);
+ bool isDarwin = Subtarget->isTargetDarwin();
+
+ printVisibility(name, GVar->getVisibility());
+
+ if (Subtarget->isTargetELF())
+ O << "\t.type " << name << ",%object\n";
+
+ if (C->isNullValue() && !GVar->hasSection() && !GVar->isThreadLocal() &&
+ !(isDarwin &&
+ TAI->SectionKindForGlobal(GVar) == SectionKind::RODataMergeStr)) {
+ // FIXME: This seems to be pretty darwin-specific
+
+ if (GVar->hasExternalLinkage()) {
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+ if (const char *Directive = TAI->getZeroFillDirective()) {
+ O << "\t.globl\t" << name << "\n";
+ O << Directive << "__DATA, __common, " << name << ", "
+ << Size << ", " << Align << "\n";
+ return;
+ }
+ }
+
+ if (GVar->hasLocalLinkage() || GVar->isWeakForLinker()) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+
+ if (isDarwin) {
+ if (GVar->hasLocalLinkage()) {
+ O << TAI->getLCOMMDirective() << name << "," << Size
+ << ',' << Align;
+ } else if (GVar->hasCommonLinkage()) {
+ O << TAI->getCOMMDirective() << name << "," << Size
+ << ',' << Align;
+ } else {
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+ O << "\t.globl " << name << '\n'
+ << TAI->getWeakDefDirective() << name << '\n';
+ EmitAlignment(Align, GVar);
+ O << name << ":";
+ if (VerboseAsm) {
+ O << "\t\t\t\t" << TAI->getCommentString() << ' ';
+ PrintUnmangledNameSafely(GVar, O);
+ }
+ O << '\n';
+ EmitGlobalConstant(C);
+ return;
+ }
+ } else if (TAI->getLCOMMDirective() != NULL) {
+ if (GVar->hasLocalLinkage()) {
+ O << TAI->getLCOMMDirective() << name << "," << Size;
+ } else {
+ O << TAI->getCOMMDirective() << name << "," << Size;
+ if (TAI->getCOMMDirectiveTakesAlignment())
+ O << ',' << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
+ }
+ } else {
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+ if (GVar->hasLocalLinkage())
+ O << "\t.local\t" << name << "\n";
+ O << TAI->getCOMMDirective() << name << "," << Size;
+ if (TAI->getCOMMDirectiveTakesAlignment())
+ O << "," << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
+ }
+ if (VerboseAsm) {
+ O << "\t\t" << TAI->getCommentString() << " ";
+ PrintUnmangledNameSafely(GVar, O);
+ }
+ O << "\n";
+ return;
+ }
+ }
+
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+ switch (GVar->getLinkage()) {
+ case GlobalValue::CommonLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ if (isDarwin) {
+ O << "\t.globl " << name << "\n"
+ << "\t.weak_definition " << name << "\n";
+ } else {
+ O << "\t.weak " << name << "\n";
+ }
+ break;
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ O << "\t.globl " << name << "\n";
+ // FALL THROUGH
+ case GlobalValue::PrivateLinkage:
+ case GlobalValue::InternalLinkage:
+ break;
+ default:
+ assert(0 && "Unknown linkage type!");
+ break;
+ }
+
+ EmitAlignment(Align, GVar);
+ O << name << ":";
+ if (VerboseAsm) {
+ O << "\t\t\t\t" << TAI->getCommentString() << " ";
+ PrintUnmangledNameSafely(GVar, O);
+ }
+ O << "\n";
+ if (TAI->hasDotTypeDotSizeDirective())
+ O << "\t.size " << name << ", " << Size << "\n";
+
+ // If the initializer is a extern weak symbol, remember to emit the weak
+ // reference!
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+
+ EmitGlobalConstant(C);
+ O << '\n';
+}
+
+
+bool ARMAsmPrinter::doFinalization(Module &M) {
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ printModuleLevelGV(I);
+
+ if (Subtarget->isTargetDarwin()) {
+ SwitchToDataSection("");
+
+ // Output stubs for dynamically-linked functions
+ for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end();
+ i != e; ++i) {
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ SwitchToTextSection(".section __TEXT,__picsymbolstub4,symbol_stubs,"
+ "none,16", 0);
+ else
+ SwitchToTextSection(".section __TEXT,__symbol_stub4,symbol_stubs,"
+ "none,12", 0);
+
+ EmitAlignment(2);
+ O << "\t.code\t32\n";
+
+ const char *p = i->getKeyData();
+ printSuffixedName(p, "$stub");
+ O << ":\n";
+ O << "\t.indirect_symbol " << p << "\n";
+ O << "\tldr ip, ";
+ printSuffixedName(p, "$slp");
+ O << "\n";
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ printSuffixedName(p, "$scv");
+ O << ":\n";
+ O << "\tadd ip, pc, ip\n";
+ }
+ O << "\tldr pc, [ip, #0]\n";
+ printSuffixedName(p, "$slp");
+ O << ":\n";
+ O << "\t.long\t";
+ printSuffixedName(p, "$lazy_ptr");
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ O << "-(";
+ printSuffixedName(p, "$scv");
+ O << "+8)\n";
+ } else
+ O << "\n";
+ SwitchToDataSection(".lazy_symbol_pointer", 0);
+ printSuffixedName(p, "$lazy_ptr");
+ O << ":\n";
+ O << "\t.indirect_symbol " << p << "\n";
+ O << "\t.long\tdyld_stub_binding_helper\n";
+ }
+ O << "\n";
+
+ // Output non-lazy-pointers for external and common global variables.
+ if (!GVNonLazyPtrs.empty()) {
+ SwitchToDataSection("\t.non_lazy_symbol_pointer", 0);
+ for (StringSet<>::iterator i = GVNonLazyPtrs.begin(),
+ e = GVNonLazyPtrs.end(); i != e; ++i) {
+ const char *p = i->getKeyData();
+ printSuffixedName(p, "$non_lazy_ptr");
+ O << ":\n";
+ O << "\t.indirect_symbol " << p << "\n";
+ O << "\t.long\t0\n";
+ }
+ }
+
+ if (!HiddenGVNonLazyPtrs.empty()) {
+ SwitchToSection(TAI->getDataSection());
+ for (StringSet<>::iterator i = HiddenGVNonLazyPtrs.begin(),
+ e = HiddenGVNonLazyPtrs.end(); i != e; ++i) {
+ const char *p = i->getKeyData();
+ EmitAlignment(2);
+ printSuffixedName(p, "$non_lazy_ptr");
+ O << ":\n";
+ O << "\t.long " << p << "\n";
+ }
+ }
+
+
+ // Emit initial debug information.
+ DW->EndModule();
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never
+ // generates code that does this, it is always safe to set.
+ O << "\t.subsections_via_symbols\n";
+ } else {
+ // Emit final debug information for ELF.
+ DW->EndModule();
+ }
+
+ return AsmPrinter::doFinalization(M);
+}
+
+/// createARMCodePrinterPass - Returns a pass that prints the ARM
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+///
+FunctionPass *llvm::createARMCodePrinterPass(raw_ostream &o,
+ ARMTargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose) {
+ return new ARMAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+}
+
+namespace {
+ static struct Register {
+ Register() {
+ ARMTargetMachine::registerAsmPrinter(createARMCodePrinterPass);
+ }
+ } Registrator;
+}
diff --git a/lib/Target/ARM/AsmPrinter/CMakeLists.txt b/lib/Target/ARM/AsmPrinter/CMakeLists.txt
new file mode 100644
index 0000000..524a748
--- /dev/null
+++ b/lib/Target/ARM/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,9 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_partially_linked_object(LLVMARMAsmPrinter
+ ARMAsmPrinter.cpp
+ )
+
+target_name_of_partially_linked_object(LLVMARMCodeGen n)
+
+add_dependencies(LLVMARMAsmPrinter ${n})
diff --git a/lib/Target/ARM/AsmPrinter/Makefile b/lib/Target/ARM/AsmPrinter/Makefile
new file mode 100644
index 0000000..ce36cec
--- /dev/null
+++ b/lib/Target/ARM/AsmPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/ARM/Makefile -----------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARMAsmPrinter
+
+# Hack: we need to include 'main' arm target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
new file mode 100644
index 0000000..2ac40f5
--- /dev/null
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -0,0 +1,27 @@
+set(LLVM_TARGET_DEFINITIONS ARM.td)
+
+tablegen(ARMGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(ARMGenRegisterNames.inc -gen-register-enums)
+tablegen(ARMGenRegisterInfo.inc -gen-register-desc)
+tablegen(ARMGenInstrNames.inc -gen-instr-enums)
+tablegen(ARMGenInstrInfo.inc -gen-instr-desc)
+tablegen(ARMGenCodeEmitter.inc -gen-emitter)
+tablegen(ARMGenAsmWriter.inc -gen-asm-writer)
+tablegen(ARMGenDAGISel.inc -gen-dag-isel)
+tablegen(ARMGenCallingConv.inc -gen-callingconv)
+tablegen(ARMGenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(ARMCodeGen
+ ARMCodeEmitter.cpp
+ ARMConstantIslandPass.cpp
+ ARMConstantPoolValue.cpp
+ ARMInstrInfo.cpp
+ ARMISelDAGToDAG.cpp
+ ARMISelLowering.cpp
+ ARMJITInfo.cpp
+ ARMLoadStoreOptimizer.cpp
+ ARMRegisterInfo.cpp
+ ARMSubtarget.cpp
+ ARMTargetAsmInfo.cpp
+ ARMTargetMachine.cpp
+ )
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
new file mode 100644
index 0000000..9a3b9be
--- /dev/null
+++ b/lib/Target/ARM/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/ARM/Makefile -----------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMARMCodeGen
+TARGET = ARM
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
+ ARMGenRegisterInfo.inc ARMGenInstrNames.inc \
+ ARMGenInstrInfo.inc ARMGenAsmWriter.inc \
+ ARMGenDAGISel.inc ARMGenSubtarget.inc \
+ ARMGenCodeEmitter.inc ARMGenCallingConv.inc
+
+DIRS = AsmPrinter
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt
new file mode 100644
index 0000000..4d3200b
--- /dev/null
+++ b/lib/Target/ARM/README-Thumb.txt
@@ -0,0 +1,228 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the ARM backend (Thumb specific).
+//===---------------------------------------------------------------------===//
+
+* Add support for compiling functions in both ARM and Thumb mode, then taking
+ the smallest.
+
+* Add support for compiling individual basic blocks in thumb mode, when in a
+ larger ARM function. This can be used for presumed cold code, like paths
+ to abort (failure path of asserts), EH handling code, etc.
+
+* Thumb doesn't have normal pre/post increment addressing modes, but you can
+ load/store 32-bit integers with pre/postinc by using load/store multiple
+ instrs with a single register.
+
+* Make better use of high registers r8, r10, r11, r12 (ip). Some variants of add
+ and cmp instructions can use high registers. Also, we can use them as
+ temporaries to spill values into.
+
+* In thumb mode, short, byte, and bool preferred alignments are currently set
+ to 4 to accommodate ISA restriction (i.e. add sp, #imm, imm must be multiple
+ of 4).
+
+//===---------------------------------------------------------------------===//
+
+Potential jumptable improvements:
+
+* If we know function size is less than (1 << 16) * 2 bytes, we can use 16-bit
+ jumptable entries (e.g. (L1 - L2) >> 1). Or even smaller entries if the
+ function is even smaller. This also applies to ARM.
+
+* Thumb jumptable codegen can improve given some help from the assembler. This
+ is what we generate right now:
+
+ .set PCRELV0, (LJTI1_0_0-(LPCRELL0+4))
+LPCRELL0:
+ mov r1, #PCRELV0
+ add r1, pc
+ ldr r0, [r0, r1]
+ cpy pc, r0
+ .align 2
+LJTI1_0_0:
+ .long LBB1_3
+ ...
+
+Note there is another pc relative add that we can take advantage of.
+ add r1, pc, #imm_8 * 4
+
+We should be able to generate:
+
+LPCRELL0:
+ add r1, LJTI1_0_0
+ ldr r0, [r0, r1]
+ cpy pc, r0
+ .align 2
+LJTI1_0_0:
+ .long LBB1_3
+
+if the assembler can translate the add to:
+ add r1, pc, #((LJTI1_0_0-(LPCRELL0+4))&0xfffffffc)
+
+Note the assembler also does something similar to constpool load:
+LPCRELL0:
+ ldr r0, LCPI1_0
+=>
+ ldr r0, pc, #((LCPI1_0-(LPCRELL0+4))&0xfffffffc)
+
+
+//===---------------------------------------------------------------------===//
+
+We compiles the following:
+
+define i16 @func_entry_2E_ce(i32 %i) {
+ switch i32 %i, label %bb12.exitStub [
+ i32 0, label %bb4.exitStub
+ i32 1, label %bb9.exitStub
+ i32 2, label %bb4.exitStub
+ i32 3, label %bb4.exitStub
+ i32 7, label %bb9.exitStub
+ i32 8, label %bb.exitStub
+ i32 9, label %bb9.exitStub
+ ]
+
+bb12.exitStub:
+ ret i16 0
+
+bb4.exitStub:
+ ret i16 1
+
+bb9.exitStub:
+ ret i16 2
+
+bb.exitStub:
+ ret i16 3
+}
+
+into:
+
+_func_entry_2E_ce:
+ mov r2, #1
+ lsl r2, r0
+ cmp r0, #9
+ bhi LBB1_4 @bb12.exitStub
+LBB1_1: @newFuncRoot
+ mov r1, #13
+ tst r2, r1
+ bne LBB1_5 @bb4.exitStub
+LBB1_2: @newFuncRoot
+ ldr r1, LCPI1_0
+ tst r2, r1
+ bne LBB1_6 @bb9.exitStub
+LBB1_3: @newFuncRoot
+ mov r1, #1
+ lsl r1, r1, #8
+ tst r2, r1
+ bne LBB1_7 @bb.exitStub
+LBB1_4: @bb12.exitStub
+ mov r0, #0
+ bx lr
+LBB1_5: @bb4.exitStub
+ mov r0, #1
+ bx lr
+LBB1_6: @bb9.exitStub
+ mov r0, #2
+ bx lr
+LBB1_7: @bb.exitStub
+ mov r0, #3
+ bx lr
+LBB1_8:
+ .align 2
+LCPI1_0:
+ .long 642
+
+
+gcc compiles to:
+
+ cmp r0, #9
+ @ lr needed for prologue
+ bhi L2
+ ldr r3, L11
+ mov r2, #1
+ mov r1, r2, asl r0
+ ands r0, r3, r2, asl r0
+ movne r0, #2
+ bxne lr
+ tst r1, #13
+ beq L9
+L3:
+ mov r0, r2
+ bx lr
+L9:
+ tst r1, #256
+ movne r0, #3
+ bxne lr
+L2:
+ mov r0, #0
+ bx lr
+L12:
+ .align 2
+L11:
+ .long 642
+
+
+GCC is doing a couple of clever things here:
+ 1. It is predicating one of the returns. This isn't a clear win though: in
+ cases where that return isn't taken, it is replacing one condbranch with
+ two 'ne' predicated instructions.
+ 2. It is sinking the shift of "1 << i" into the tst, and using ands instead of
+ tst. This will probably require whole function isel.
+ 3. GCC emits:
+ tst r1, #256
+ we emit:
+ mov r1, #1
+ lsl r1, r1, #8
+ tst r2, r1
+
+
+//===---------------------------------------------------------------------===//
+
+When spilling in thumb mode and the sp offset is too large to fit in the ldr /
+str offset field, we load the offset from a constpool entry and add it to sp:
+
+ldr r2, LCPI
+add r2, sp
+ldr r2, [r2]
+
+These instructions preserve the condition code which is important if the spill
+is between a cmp and a bcc instruction. However, we can use the (potentially)
+cheaper sequnce if we know it's ok to clobber the condition register.
+
+add r2, sp, #255 * 4
+add r2, #132
+ldr r2, [r2, #7 * 4]
+
+This is especially bad when dynamic alloca is used. The all fixed size stack
+objects are referenced off the frame pointer with negative offsets. See
+oggenc for an example.
+
+//===---------------------------------------------------------------------===//
+
+We are reserving R3 as a scratch register under thumb mode. So if it is live in
+to the function, we save / restore R3 to / from R12. Until register scavenging
+is done, we should save R3 to a high callee saved reg at emitPrologue time
+(when hasFP is true or stack size is large) and restore R3 from that register
+instead. This allows us to at least get rid of the save to r12 everytime it is
+used.
+
+//===---------------------------------------------------------------------===//
+
+Poor codegen test/CodeGen/ARM/select.ll f7:
+
+ ldr r5, LCPI1_0
+LPC0:
+ add r5, pc
+ ldr r6, LCPI1_1
+ ldr r2, LCPI1_2
+ cpy r3, r6
+ cpy lr, pc
+ bx r5
+
+//===---------------------------------------------------------------------===//
+
+Make register allocator / spiller smarter so we can re-materialize "mov r, imm",
+etc. Almost all Thumb instructions clobber condition code.
+
+//===---------------------------------------------------------------------===//
+
+Add ldmia, stmia support.
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
new file mode 100644
index 0000000..068c441e
--- /dev/null
+++ b/lib/Target/ARM/README.txt
@@ -0,0 +1,554 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the ARM backend.
+//===---------------------------------------------------------------------===//
+
+Reimplement 'select' in terms of 'SEL'.
+
+* We would really like to support UXTAB16, but we need to prove that the
+ add doesn't need to overflow between the two 16-bit chunks.
+
+* Implement pre/post increment support. (e.g. PR935)
+* Coalesce stack slots!
+* Implement smarter constant generation for binops with large immediates.
+
+* Consider materializing FP constants like 0.0f and 1.0f using integer
+ immediate instructions then copy to FPU. Slower than load into FPU?
+
+//===---------------------------------------------------------------------===//
+
+Crazy idea: Consider code that uses lots of 8-bit or 16-bit values. By the
+time regalloc happens, these values are now in a 32-bit register, usually with
+the top-bits known to be sign or zero extended. If spilled, we should be able
+to spill these to a 8-bit or 16-bit stack slot, zero or sign extending as part
+of the reload.
+
+Doing this reduces the size of the stack frame (important for thumb etc), and
+also increases the likelihood that we will be able to reload multiple values
+from the stack with a single load.
+
+//===---------------------------------------------------------------------===//
+
+The constant island pass is in good shape. Some cleanups might be desirable,
+but there is unlikely to be much improvement in the generated code.
+
+1. There may be some advantage to trying to be smarter about the initial
+placement, rather than putting everything at the end.
+
+2. There might be some compile-time efficiency to be had by representing
+consecutive islands as a single block rather than multiple blocks.
+
+3. Use a priority queue to sort constant pool users in inverse order of
+ position so we always process the one closed to the end of functions
+ first. This may simply CreateNewWater.
+
+//===---------------------------------------------------------------------===//
+
+Eliminate copysign custom expansion. We are still generating crappy code with
+default expansion + if-conversion.
+
+//===---------------------------------------------------------------------===//
+
+Eliminate one instruction from:
+
+define i32 @_Z6slow4bii(i32 %x, i32 %y) {
+ %tmp = icmp sgt i32 %x, %y
+ %retval = select i1 %tmp, i32 %x, i32 %y
+ ret i32 %retval
+}
+
+__Z6slow4bii:
+ cmp r0, r1
+ movgt r1, r0
+ mov r0, r1
+ bx lr
+=>
+
+__Z6slow4bii:
+ cmp r0, r1
+ movle r0, r1
+ bx lr
+
+//===---------------------------------------------------------------------===//
+
+Implement long long "X-3" with instructions that fold the immediate in. These
+were disabled due to badness with the ARM carry flag on subtracts.
+
+//===---------------------------------------------------------------------===//
+
+We currently compile abs:
+int foo(int p) { return p < 0 ? -p : p; }
+
+into:
+
+_foo:
+ rsb r1, r0, #0
+ cmn r0, #1
+ movgt r1, r0
+ mov r0, r1
+ bx lr
+
+This is very, uh, literal. This could be a 3 operation sequence:
+ t = (p sra 31);
+ res = (p xor t)-t
+
+Which would be better. This occurs in png decode.
+
+//===---------------------------------------------------------------------===//
+
+More load / store optimizations:
+1) Look past instructions without side-effects (not load, store, branch, etc.)
+ when forming the list of loads / stores to optimize.
+
+2) Smarter register allocation?
+We are probably missing some opportunities to use ldm / stm. Consider:
+
+ldr r5, [r0]
+ldr r4, [r0, #4]
+
+This cannot be merged into a ldm. Perhaps we will need to do the transformation
+before register allocation. Then teach the register allocator to allocate a
+chunk of consecutive registers.
+
+3) Better representation for block transfer? This is from Olden/power:
+
+ fldd d0, [r4]
+ fstd d0, [r4, #+32]
+ fldd d0, [r4, #+8]
+ fstd d0, [r4, #+40]
+ fldd d0, [r4, #+16]
+ fstd d0, [r4, #+48]
+ fldd d0, [r4, #+24]
+ fstd d0, [r4, #+56]
+
+If we can spare the registers, it would be better to use fldm and fstm here.
+Need major register allocator enhancement though.
+
+4) Can we recognize the relative position of constantpool entries? i.e. Treat
+
+ ldr r0, LCPI17_3
+ ldr r1, LCPI17_4
+ ldr r2, LCPI17_5
+
+ as
+ ldr r0, LCPI17
+ ldr r1, LCPI17+4
+ ldr r2, LCPI17+8
+
+ Then the ldr's can be combined into a single ldm. See Olden/power.
+
+Note for ARM v4 gcc uses ldmia to load a pair of 32-bit values to represent a
+double 64-bit FP constant:
+
+ adr r0, L6
+ ldmia r0, {r0-r1}
+
+ .align 2
+L6:
+ .long -858993459
+ .long 1074318540
+
+5) Can we make use of ldrd and strd? Instead of generating ldm / stm, use
+ldrd/strd instead if there are only two destination registers that form an
+odd/even pair. However, we probably would pay a penalty if the address is not
+aligned on 8-byte boundary. This requires more information on load / store
+nodes (and MI's?) then we currently carry.
+
+6) struct copies appear to be done field by field
+instead of by words, at least sometimes:
+
+struct foo { int x; short s; char c1; char c2; };
+void cpy(struct foo*a, struct foo*b) { *a = *b; }
+
+llvm code (-O2)
+ ldrb r3, [r1, #+6]
+ ldr r2, [r1]
+ ldrb r12, [r1, #+7]
+ ldrh r1, [r1, #+4]
+ str r2, [r0]
+ strh r1, [r0, #+4]
+ strb r3, [r0, #+6]
+ strb r12, [r0, #+7]
+gcc code (-O2)
+ ldmia r1, {r1-r2}
+ stmia r0, {r1-r2}
+
+In this benchmark poor handling of aggregate copies has shown up as
+having a large effect on size, and possibly speed as well (we don't have
+a good way to measure on ARM).
+
+//===---------------------------------------------------------------------===//
+
+* Consider this silly example:
+
+double bar(double x) {
+ double r = foo(3.1);
+ return x+r;
+}
+
+_bar:
+ stmfd sp!, {r4, r5, r7, lr}
+ add r7, sp, #8
+ mov r4, r0
+ mov r5, r1
+ fldd d0, LCPI1_0
+ fmrrd r0, r1, d0
+ bl _foo
+ fmdrr d0, r4, r5
+ fmsr s2, r0
+ fsitod d1, s2
+ faddd d0, d1, d0
+ fmrrd r0, r1, d0
+ ldmfd sp!, {r4, r5, r7, pc}
+
+Ignore the prologue and epilogue stuff for a second. Note
+ mov r4, r0
+ mov r5, r1
+the copys to callee-save registers and the fact they are only being used by the
+fmdrr instruction. It would have been better had the fmdrr been scheduled
+before the call and place the result in a callee-save DPR register. The two
+mov ops would not have been necessary.
+
+//===---------------------------------------------------------------------===//
+
+Calling convention related stuff:
+
+* gcc's parameter passing implementation is terrible and we suffer as a result:
+
+e.g.
+struct s {
+ double d1;
+ int s1;
+};
+
+void foo(struct s S) {
+ printf("%g, %d\n", S.d1, S.s1);
+}
+
+'S' is passed via registers r0, r1, r2. But gcc stores them to the stack, and
+then reload them to r1, r2, and r3 before issuing the call (r0 contains the
+address of the format string):
+
+ stmfd sp!, {r7, lr}
+ add r7, sp, #0
+ sub sp, sp, #12
+ stmia sp, {r0, r1, r2}
+ ldmia sp, {r1-r2}
+ ldr r0, L5
+ ldr r3, [sp, #8]
+L2:
+ add r0, pc, r0
+ bl L_printf$stub
+
+Instead of a stmia, ldmia, and a ldr, wouldn't it be better to do three moves?
+
+* Return an aggregate type is even worse:
+
+e.g.
+struct s foo(void) {
+ struct s S = {1.1, 2};
+ return S;
+}
+
+ mov ip, r0
+ ldr r0, L5
+ sub sp, sp, #12
+L2:
+ add r0, pc, r0
+ @ lr needed for prologue
+ ldmia r0, {r0, r1, r2}
+ stmia sp, {r0, r1, r2}
+ stmia ip, {r0, r1, r2}
+ mov r0, ip
+ add sp, sp, #12
+ bx lr
+
+r0 (and later ip) is the hidden parameter from caller to store the value in. The
+first ldmia loads the constants into r0, r1, r2. The last stmia stores r0, r1,
+r2 into the address passed in. However, there is one additional stmia that
+stores r0, r1, and r2 to some stack location. The store is dead.
+
+The llvm-gcc generated code looks like this:
+
+csretcc void %foo(%struct.s* %agg.result) {
+entry:
+ %S = alloca %struct.s, align 4 ; <%struct.s*> [#uses=1]
+ %memtmp = alloca %struct.s ; <%struct.s*> [#uses=1]
+ cast %struct.s* %S to sbyte* ; <sbyte*>:0 [#uses=2]
+ call void %llvm.memcpy.i32( sbyte* %0, sbyte* cast ({ double, int }* %C.0.904 to sbyte*), uint 12, uint 4 )
+ cast %struct.s* %agg.result to sbyte* ; <sbyte*>:1 [#uses=2]
+ call void %llvm.memcpy.i32( sbyte* %1, sbyte* %0, uint 12, uint 0 )
+ cast %struct.s* %memtmp to sbyte* ; <sbyte*>:2 [#uses=1]
+ call void %llvm.memcpy.i32( sbyte* %2, sbyte* %1, uint 12, uint 0 )
+ ret void
+}
+
+llc ends up issuing two memcpy's (the first memcpy becomes 3 loads from
+constantpool). Perhaps we should 1) fix llvm-gcc so the memcpy is translated
+into a number of load and stores, or 2) custom lower memcpy (of small size) to
+be ldmia / stmia. I think option 2 is better but the current register
+allocator cannot allocate a chunk of registers at a time.
+
+A feasible temporary solution is to use specific physical registers at the
+lowering time for small (<= 4 words?) transfer size.
+
+* ARM CSRet calling convention requires the hidden argument to be returned by
+the callee.
+
+//===---------------------------------------------------------------------===//
+
+We can definitely do a better job on BB placements to eliminate some branches.
+It's very common to see llvm generated assembly code that looks like this:
+
+LBB3:
+ ...
+LBB4:
+...
+ beq LBB3
+ b LBB2
+
+If BB4 is the only predecessor of BB3, then we can emit BB3 after BB4. We can
+then eliminate beq and and turn the unconditional branch to LBB2 to a bne.
+
+See McCat/18-imp/ComputeBoundingBoxes for an example.
+
+//===---------------------------------------------------------------------===//
+
+Register scavenging is now implemented. The example in the previous version
+of this document produces optimal code at -O2.
+
+//===---------------------------------------------------------------------===//
+
+Pre-/post- indexed load / stores:
+
+1) We should not make the pre/post- indexed load/store transform if the base ptr
+is guaranteed to be live beyond the load/store. This can happen if the base
+ptr is live out of the block we are performing the optimization. e.g.
+
+mov r1, r2
+ldr r3, [r1], #4
+...
+
+vs.
+
+ldr r3, [r2]
+add r1, r2, #4
+...
+
+In most cases, this is just a wasted optimization. However, sometimes it can
+negatively impact the performance because two-address code is more restrictive
+when it comes to scheduling.
+
+Unfortunately, liveout information is currently unavailable during DAG combine
+time.
+
+2) Consider spliting a indexed load / store into a pair of add/sub + load/store
+ to solve #1 (in TwoAddressInstructionPass.cpp).
+
+3) Enhance LSR to generate more opportunities for indexed ops.
+
+4) Once we added support for multiple result patterns, write indexed loads
+ patterns instead of C++ instruction selection code.
+
+5) Use FLDM / FSTM to emulate indexed FP load / store.
+
+//===---------------------------------------------------------------------===//
+
+We should add i64 support to take advantage of the 64-bit load / stores.
+We can add a pseudo i64 register class containing pseudo registers that are
+register pairs. All other ops (e.g. add, sub) would be expanded as usual.
+
+We need to add pseudo instructions (i.e. gethi / getlo) to extract i32 registers
+from the i64 register. These are single moves which can be eliminated if the
+destination register is a sub-register of the source. We should implement proper
+subreg support in the register allocator to coalesce these away.
+
+There are other minor issues such as multiple instructions for a spill / restore
+/ move.
+
+//===---------------------------------------------------------------------===//
+
+Implement support for some more tricky ways to materialize immediates. For
+example, to get 0xffff8000, we can use:
+
+mov r9, #&3f8000
+sub r9, r9, #&400000
+
+//===---------------------------------------------------------------------===//
+
+We sometimes generate multiple add / sub instructions to update sp in prologue
+and epilogue if the inc / dec value is too large to fit in a single immediate
+operand. In some cases, perhaps it might be better to load the value from a
+constantpool instead.
+
+//===---------------------------------------------------------------------===//
+
+GCC generates significantly better code for this function.
+
+int foo(int StackPtr, unsigned char *Line, unsigned char *Stack, int LineLen) {
+ int i = 0;
+
+ if (StackPtr != 0) {
+ while (StackPtr != 0 && i < (((LineLen) < (32768))? (LineLen) : (32768)))
+ Line[i++] = Stack[--StackPtr];
+ if (LineLen > 32768)
+ {
+ while (StackPtr != 0 && i < LineLen)
+ {
+ i++;
+ --StackPtr;
+ }
+ }
+ }
+ return StackPtr;
+}
+
+//===---------------------------------------------------------------------===//
+
+This should compile to the mlas instruction:
+int mlas(int x, int y, int z) { return ((x * y + z) < 0) ? 7 : 13; }
+
+//===---------------------------------------------------------------------===//
+
+At some point, we should triage these to see if they still apply to us:
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19598
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18560
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27016
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11831
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11826
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11825
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11824
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11823
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11820
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10982
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10242
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9831
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9760
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9759
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9703
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9702
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9663
+
+http://www.inf.u-szeged.hu/gcc-arm/
+http://citeseer.ist.psu.edu/debus04linktime.html
+
+//===---------------------------------------------------------------------===//
+
+gcc generates smaller code for this function at -O2 or -Os:
+
+void foo(signed char* p) {
+ if (*p == 3)
+ bar();
+ else if (*p == 4)
+ baz();
+ else if (*p == 5)
+ quux();
+}
+
+llvm decides it's a good idea to turn the repeated if...else into a
+binary tree, as if it were a switch; the resulting code requires -1
+compare-and-branches when *p<=2 or *p==5, the same number if *p==4
+or *p>6, and +1 if *p==3. So it should be a speed win
+(on balance). However, the revised code is larger, with 4 conditional
+branches instead of 3.
+
+More seriously, there is a byte->word extend before
+each comparison, where there should be only one, and the condition codes
+are not remembered when the same two values are compared twice.
+
+//===---------------------------------------------------------------------===//
+
+More register scavenging work:
+
+1. Use the register scavenger to track frame index materialized into registers
+ (those that do not fit in addressing modes) to allow reuse in the same BB.
+2. Finish scavenging for Thumb.
+3. We know some spills and restores are unnecessary. The issue is once live
+ intervals are merged, they are not never split. So every def is spilled
+ and every use requires a restore if the register allocator decides the
+ resulting live interval is not assigned a physical register. It may be
+ possible (with the help of the scavenger) to turn some spill / restore
+ pairs into register copies.
+
+//===---------------------------------------------------------------------===//
+
+More LSR enhancements possible:
+
+1. Teach LSR about pre- and post- indexed ops to allow iv increment be merged
+ in a load / store.
+2. Allow iv reuse even when a type conversion is required. For example, i8
+ and i32 load / store addressing modes are identical.
+
+
+//===---------------------------------------------------------------------===//
+
+This:
+
+int foo(int a, int b, int c, int d) {
+ long long acc = (long long)a * (long long)b;
+ acc += (long long)c * (long long)d;
+ return (int)(acc >> 32);
+}
+
+Should compile to use SMLAL (Signed Multiply Accumulate Long) which multiplies
+two signed 32-bit values to produce a 64-bit value, and accumulates this with
+a 64-bit value.
+
+We currently get this with both v4 and v6:
+
+_foo:
+ smull r1, r0, r1, r0
+ smull r3, r2, r3, r2
+ adds r3, r3, r1
+ adc r0, r2, r0
+ bx lr
+
+//===---------------------------------------------------------------------===//
+
+This:
+ #include <algorithm>
+ std::pair<unsigned, bool> full_add(unsigned a, unsigned b)
+ { return std::make_pair(a + b, a + b < a); }
+ bool no_overflow(unsigned a, unsigned b)
+ { return !full_add(a, b).second; }
+
+Should compile to:
+
+_Z8full_addjj:
+ adds r2, r1, r2
+ movcc r1, #0
+ movcs r1, #1
+ str r2, [r0, #0]
+ strb r1, [r0, #4]
+ mov pc, lr
+
+_Z11no_overflowjj:
+ cmn r0, r1
+ movcs r0, #0
+ movcc r0, #1
+ mov pc, lr
+
+not:
+
+__Z8full_addjj:
+ add r3, r2, r1
+ str r3, [r0]
+ mov r2, #1
+ mov r12, #0
+ cmp r3, r1
+ movlo r12, r2
+ str r12, [r0, #+4]
+ bx lr
+__Z11no_overflowjj:
+ add r3, r1, r0
+ mov r2, #1
+ mov r1, #0
+ cmp r3, r0
+ movhs r1, r2
+ mov r0, r1
+ bx lr
+
+//===---------------------------------------------------------------------===//
+
diff --git a/lib/Target/Alpha/Alpha.h b/lib/Target/Alpha/Alpha.h
new file mode 100644
index 0000000..2815176
--- /dev/null
+++ b/lib/Target/Alpha/Alpha.h
@@ -0,0 +1,51 @@
+//===-- Alpha.h - Top-level interface for Alpha representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Alpha back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_ALPHA_H
+#define TARGET_ALPHA_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+ class AlphaTargetMachine;
+ class FunctionPass;
+ class MachineCodeEmitter;
+ class raw_ostream;
+
+ FunctionPass *createAlphaISelDag(AlphaTargetMachine &TM);
+ FunctionPass *createAlphaCodePrinterPass(raw_ostream &OS,
+ TargetMachine &TM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose);
+ FunctionPass *createAlphaPatternInstructionSelector(TargetMachine &TM);
+ FunctionPass *createAlphaCodeEmitterPass(AlphaTargetMachine &TM,
+ MachineCodeEmitter &MCE);
+ FunctionPass *createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM,
+ JITCodeEmitter &JCE);
+ FunctionPass *createAlphaLLRPPass(AlphaTargetMachine &tm);
+ FunctionPass *createAlphaBranchSelectionPass();
+
+} // end namespace llvm;
+
+// Defines symbolic names for Alpha registers. This defines a mapping from
+// register name to register number.
+//
+#include "AlphaGenRegisterNames.inc"
+
+// Defines symbolic names for the Alpha instructions.
+//
+#include "AlphaGenInstrNames.inc"
+
+#endif
diff --git a/lib/Target/Alpha/Alpha.td b/lib/Target/Alpha/Alpha.td
new file mode 100644
index 0000000..e3748c6
--- /dev/null
+++ b/lib/Target/Alpha/Alpha.td
@@ -0,0 +1,66 @@
+//===- Alpha.td - Describe the Alpha Target Machine --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing...
+//
+include "llvm/Target/Target.td"
+
+//Alpha is little endian
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features
+//===----------------------------------------------------------------------===//
+
+def FeatureCIX : SubtargetFeature<"cix", "HasCT", "true",
+ "Enable CIX extentions">;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "AlphaRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Schedule Description
+//===----------------------------------------------------------------------===//
+
+include "AlphaSchedule.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "AlphaInstrInfo.td"
+
+def AlphaInstrInfo : InstrInfo {
+ // Define how we want to layout our target-specific information field.
+ // let TSFlagsFields = [];
+ // let TSFlagsShifts = [];
+}
+
+//===----------------------------------------------------------------------===//
+// Alpha Processor Definitions
+//===----------------------------------------------------------------------===//
+
+def : Processor<"generic", Alpha21264Itineraries, []>;
+def : Processor<"ev6" , Alpha21264Itineraries, []>;
+def : Processor<"ev67" , Alpha21264Itineraries, [FeatureCIX]>;
+
+//===----------------------------------------------------------------------===//
+// The Alpha Target
+//===----------------------------------------------------------------------===//
+
+
+def Alpha : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = AlphaInstrInfo;
+}
diff --git a/lib/Target/Alpha/AlphaBranchSelector.cpp b/lib/Target/Alpha/AlphaBranchSelector.cpp
new file mode 100644
index 0000000..aca8ca7
--- /dev/null
+++ b/lib/Target/Alpha/AlphaBranchSelector.cpp
@@ -0,0 +1,67 @@
+//===-- AlphaBranchSelector.cpp - Convert Pseudo branchs ----------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Replace Pseudo COND_BRANCH_* with their appropriate real branch
+// Simplified version of the PPC Branch Selector
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetAsmInfo.h"
+using namespace llvm;
+
+namespace {
+ struct VISIBILITY_HIDDEN AlphaBSel : public MachineFunctionPass {
+ static char ID;
+ AlphaBSel() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "Alpha Branch Selection";
+ }
+ };
+ char AlphaBSel::ID = 0;
+}
+
+/// createAlphaBranchSelectionPass - returns an instance of the Branch Selection
+/// Pass
+///
+FunctionPass *llvm::createAlphaBranchSelectionPass() {
+ return new AlphaBSel();
+}
+
+bool AlphaBSel::runOnMachineFunction(MachineFunction &Fn) {
+
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
+ MBBI != EE; ++MBBI) {
+ if (MBBI->getOpcode() == Alpha::COND_BRANCH_I ||
+ MBBI->getOpcode() == Alpha::COND_BRANCH_F) {
+
+ // condbranch operands:
+ // 0. bc opcode
+ // 1. reg
+ // 2. target MBB
+ const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ MBBI->setDesc(TII->get(MBBI->getOperand(0).getImm()));
+ }
+ }
+ }
+
+ return true;
+}
+
diff --git a/lib/Target/Alpha/AlphaCodeEmitter.cpp b/lib/Target/Alpha/AlphaCodeEmitter.cpp
new file mode 100644
index 0000000..f50f007
--- /dev/null
+++ b/lib/Target/Alpha/AlphaCodeEmitter.cpp
@@ -0,0 +1,242 @@
+//===-- Alpha/AlphaCodeEmitter.cpp - Convert Alpha code to machine code ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the Alpha machine instructions
+// into relocatable machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "alpha-emitter"
+#include "AlphaTargetMachine.h"
+#include "AlphaRelocations.h"
+#include "Alpha.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+namespace {
+
+ class AlphaCodeEmitter {
+ MachineCodeEmitter &MCE;
+ public:
+ AlphaCodeEmitter(MachineCodeEmitter &mce) : MCE(mce) {}
+
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+
+ unsigned getBinaryCodeForInstr(const MachineInstr &MI);
+
+ /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr
+
+ unsigned getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO);
+ };
+
+ template <class CodeEmitter>
+ class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass,
+ public AlphaCodeEmitter
+ {
+ const AlphaInstrInfo *II;
+ TargetMachine &TM;
+ CodeEmitter &MCE;
+
+ public:
+ static char ID;
+ explicit Emitter(TargetMachine &tm, CodeEmitter &mce)
+ : MachineFunctionPass(&ID), AlphaCodeEmitter(mce),
+ II(0), TM(tm), MCE(mce) {}
+ Emitter(TargetMachine &tm, CodeEmitter &mce, const AlphaInstrInfo& ii)
+ : MachineFunctionPass(&ID), AlphaCodeEmitter(mce),
+ II(&ii), TM(tm), MCE(mce) {}
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "Alpha Machine Code Emitter";
+ }
+
+ void emitInstruction(const MachineInstr &MI);
+
+ private:
+ void emitBasicBlock(MachineBasicBlock &MBB);
+ };
+
+ template <class CodeEmitter>
+ char Emitter<CodeEmitter>::ID = 0;
+}
+
+/// createAlphaCodeEmitterPass - Return a pass that emits the collected Alpha
+/// code to the specified MCE object.
+
+FunctionPass *llvm::createAlphaCodeEmitterPass(AlphaTargetMachine &TM,
+ MachineCodeEmitter &MCE) {
+ return new Emitter<MachineCodeEmitter>(TM, MCE);
+}
+
+FunctionPass *llvm::createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new Emitter<JITCodeEmitter>(TM, JCE);
+}
+
+template <class CodeEmitter>
+bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
+ II = ((AlphaTargetMachine&)MF.getTarget()).getInstrInfo();
+
+ do {
+ MCE.startFunction(MF);
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ emitBasicBlock(*I);
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+template <class CodeEmitter>
+void Emitter<CodeEmitter>::emitBasicBlock(MachineBasicBlock &MBB) {
+ MCE.StartMachineBasicBlock(&MBB);
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ const MachineInstr &MI = *I;
+ switch(MI.getOpcode()) {
+ default:
+ MCE.emitWordLE(getBinaryCodeForInstr(*I));
+ break;
+ case Alpha::ALTENT:
+ case Alpha::PCLABEL:
+ case Alpha::MEMLABEL:
+ case TargetInstrInfo::IMPLICIT_DEF:
+ break; //skip these
+ }
+ }
+}
+
+static unsigned getAlphaRegNumber(unsigned Reg) {
+ switch (Reg) {
+ case Alpha::R0 : case Alpha::F0 : return 0;
+ case Alpha::R1 : case Alpha::F1 : return 1;
+ case Alpha::R2 : case Alpha::F2 : return 2;
+ case Alpha::R3 : case Alpha::F3 : return 3;
+ case Alpha::R4 : case Alpha::F4 : return 4;
+ case Alpha::R5 : case Alpha::F5 : return 5;
+ case Alpha::R6 : case Alpha::F6 : return 6;
+ case Alpha::R7 : case Alpha::F7 : return 7;
+ case Alpha::R8 : case Alpha::F8 : return 8;
+ case Alpha::R9 : case Alpha::F9 : return 9;
+ case Alpha::R10 : case Alpha::F10 : return 10;
+ case Alpha::R11 : case Alpha::F11 : return 11;
+ case Alpha::R12 : case Alpha::F12 : return 12;
+ case Alpha::R13 : case Alpha::F13 : return 13;
+ case Alpha::R14 : case Alpha::F14 : return 14;
+ case Alpha::R15 : case Alpha::F15 : return 15;
+ case Alpha::R16 : case Alpha::F16 : return 16;
+ case Alpha::R17 : case Alpha::F17 : return 17;
+ case Alpha::R18 : case Alpha::F18 : return 18;
+ case Alpha::R19 : case Alpha::F19 : return 19;
+ case Alpha::R20 : case Alpha::F20 : return 20;
+ case Alpha::R21 : case Alpha::F21 : return 21;
+ case Alpha::R22 : case Alpha::F22 : return 22;
+ case Alpha::R23 : case Alpha::F23 : return 23;
+ case Alpha::R24 : case Alpha::F24 : return 24;
+ case Alpha::R25 : case Alpha::F25 : return 25;
+ case Alpha::R26 : case Alpha::F26 : return 26;
+ case Alpha::R27 : case Alpha::F27 : return 27;
+ case Alpha::R28 : case Alpha::F28 : return 28;
+ case Alpha::R29 : case Alpha::F29 : return 29;
+ case Alpha::R30 : case Alpha::F30 : return 30;
+ case Alpha::R31 : case Alpha::F31 : return 31;
+ default:
+ assert(0 && "Unhandled reg");
+ abort();
+ }
+}
+
+unsigned AlphaCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) {
+
+ unsigned rv = 0; // Return value; defaults to 0 for unhandled cases
+ // or things that get fixed up later by the JIT.
+
+ if (MO.isReg()) {
+ rv = getAlphaRegNumber(MO.getReg());
+ } else if (MO.isImm()) {
+ rv = MO.getImm();
+ } else if (MO.isGlobal() || MO.isSymbol() || MO.isCPI()) {
+ DOUT << MO << " is a relocated op for " << MI << "\n";
+ unsigned Reloc = 0;
+ int Offset = 0;
+ bool useGOT = false;
+ switch (MI.getOpcode()) {
+ case Alpha::BSR:
+ Reloc = Alpha::reloc_bsr;
+ break;
+ case Alpha::LDLr:
+ case Alpha::LDQr:
+ case Alpha::LDBUr:
+ case Alpha::LDWUr:
+ case Alpha::LDSr:
+ case Alpha::LDTr:
+ case Alpha::LDAr:
+ case Alpha::STQr:
+ case Alpha::STLr:
+ case Alpha::STWr:
+ case Alpha::STBr:
+ case Alpha::STSr:
+ case Alpha::STTr:
+ Reloc = Alpha::reloc_gprellow;
+ break;
+ case Alpha::LDAHr:
+ Reloc = Alpha::reloc_gprelhigh;
+ break;
+ case Alpha::LDQl:
+ Reloc = Alpha::reloc_literal;
+ useGOT = true;
+ break;
+ case Alpha::LDAg:
+ case Alpha::LDAHg:
+ Reloc = Alpha::reloc_gpdist;
+ Offset = MI.getOperand(3).getImm();
+ break;
+ default:
+ assert(0 && "unknown relocatable instruction");
+ abort();
+ }
+ if (MO.isGlobal())
+ MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(),
+ Reloc, MO.getGlobal(), Offset,
+ isa<Function>(MO.getGlobal()),
+ useGOT));
+ else if (MO.isSymbol())
+ MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, MO.getSymbolName(),
+ Offset, true));
+ else
+ MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, MO.getIndex(), Offset));
+ } else if (MO.isMBB()) {
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ Alpha::reloc_bsr, MO.getMBB()));
+ }else {
+ cerr << "ERROR: Unknown type of MachineOperand: " << MO << "\n";
+ abort();
+ }
+
+ return rv;
+}
+
+#include "AlphaGenCodeEmitter.inc"
+
+
diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
new file mode 100644
index 0000000..affcd3e
--- /dev/null
+++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -0,0 +1,553 @@
+//===-- AlphaISelDAGToDAG.cpp - Alpha pattern matching inst selector ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for Alpha,
+// converting from a legalized dag to a Alpha dag.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaTargetMachine.h"
+#include "AlphaISelLowering.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+
+ //===--------------------------------------------------------------------===//
+ /// AlphaDAGToDAGISel - Alpha specific code to select Alpha machine
+ /// instructions for SelectionDAG operations.
+ class AlphaDAGToDAGISel : public SelectionDAGISel {
+ static const int64_t IMM_LOW = -32768;
+ static const int64_t IMM_HIGH = 32767;
+ static const int64_t IMM_MULT = 65536;
+ static const int64_t IMM_FULLHIGH = IMM_HIGH + IMM_HIGH * IMM_MULT;
+ static const int64_t IMM_FULLLOW = IMM_LOW + IMM_LOW * IMM_MULT;
+
+ static int64_t get_ldah16(int64_t x) {
+ int64_t y = x / IMM_MULT;
+ if (x % IMM_MULT > IMM_HIGH)
+ ++y;
+ return y;
+ }
+
+ static int64_t get_lda16(int64_t x) {
+ return x - get_ldah16(x) * IMM_MULT;
+ }
+
+ /// get_zapImm - Return a zap mask if X is a valid immediate for a zapnot
+ /// instruction (if not, return 0). Note that this code accepts partial
+ /// zap masks. For example (and LHS, 1) is a valid zap, as long we know
+ /// that the bits 1-7 of LHS are already zero. If LHS is non-null, we are
+ /// in checking mode. If LHS is null, we assume that the mask has already
+ /// been validated before.
+ uint64_t get_zapImm(SDValue LHS, uint64_t Constant) {
+ uint64_t BitsToCheck = 0;
+ unsigned Result = 0;
+ for (unsigned i = 0; i != 8; ++i) {
+ if (((Constant >> 8*i) & 0xFF) == 0) {
+ // nothing to do.
+ } else {
+ Result |= 1 << i;
+ if (((Constant >> 8*i) & 0xFF) == 0xFF) {
+ // If the entire byte is set, zapnot the byte.
+ } else if (LHS.getNode() == 0) {
+ // Otherwise, if the mask was previously validated, we know its okay
+ // to zapnot this entire byte even though all the bits aren't set.
+ } else {
+ // Otherwise we don't know that the it's okay to zapnot this entire
+ // byte. Only do this iff we can prove that the missing bits are
+ // already null, so the bytezap doesn't need to really null them.
+ BitsToCheck |= ~Constant & (0xFF << 8*i);
+ }
+ }
+ }
+
+ // If there are missing bits in a byte (for example, X & 0xEF00), check to
+ // see if the missing bits (0x1000) are already known zero if not, the zap
+ // isn't okay to do, as it won't clear all the required bits.
+ if (BitsToCheck &&
+ !CurDAG->MaskedValueIsZero(LHS,
+ APInt(LHS.getValueSizeInBits(),
+ BitsToCheck)))
+ return 0;
+
+ return Result;
+ }
+
+ static uint64_t get_zapImm(uint64_t x) {
+ unsigned build = 0;
+ for(int i = 0; i != 8; ++i) {
+ if ((x & 0x00FF) == 0x00FF)
+ build |= 1 << i;
+ else if ((x & 0x00FF) != 0)
+ return 0;
+ x >>= 8;
+ }
+ return build;
+ }
+
+
+ static uint64_t getNearPower2(uint64_t x) {
+ if (!x) return 0;
+ unsigned at = CountLeadingZeros_64(x);
+ uint64_t complow = 1 << (63 - at);
+ uint64_t comphigh = 1 << (64 - at);
+ //cerr << x << ":" << complow << ":" << comphigh << "\n";
+ if (abs(complow - x) <= abs(comphigh - x))
+ return complow;
+ else
+ return comphigh;
+ }
+
+ static bool chkRemNearPower2(uint64_t x, uint64_t r, bool swap) {
+ uint64_t y = getNearPower2(x);
+ if (swap)
+ return (y - x) == r;
+ else
+ return (x - y) == r;
+ }
+
+ static bool isFPZ(SDValue N) {
+ ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
+ return (CN && (CN->getValueAPF().isZero()));
+ }
+ static bool isFPZn(SDValue N) {
+ ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
+ return (CN && CN->getValueAPF().isNegZero());
+ }
+ static bool isFPZp(SDValue N) {
+ ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
+ return (CN && CN->getValueAPF().isPosZero());
+ }
+
+ public:
+ explicit AlphaDAGToDAGISel(AlphaTargetMachine &TM)
+ : SelectionDAGISel(TM)
+ {}
+
+ /// getI64Imm - Return a target constant with the specified value, of type
+ /// i64.
+ inline SDValue getI64Imm(int64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i64);
+ }
+
+ // Select - Convert the specified operand from a target-independent to a
+ // target-specific node if it hasn't already been changed.
+ SDNode *Select(SDValue Op);
+
+ /// InstructionSelect - This callback is invoked by
+ /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+ virtual void InstructionSelect();
+
+ virtual const char *getPassName() const {
+ return "Alpha DAG->DAG Pattern Instruction Selection";
+ }
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ Op0 = Op;
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ return false;
+ }
+
+// Include the pieces autogenerated from the target description.
+#include "AlphaGenDAGISel.inc"
+
+private:
+ SDValue getGlobalBaseReg();
+ SDValue getGlobalRetAddr();
+ void SelectCALL(SDValue Op);
+
+ };
+}
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// GOT address into a register.
+///
+SDValue AlphaDAGToDAGISel::getGlobalBaseReg() {
+ unsigned GP = 0;
+ for(MachineRegisterInfo::livein_iterator ii = RegInfo->livein_begin(),
+ ee = RegInfo->livein_end(); ii != ee; ++ii)
+ if (ii->first == Alpha::R29) {
+ GP = ii->second;
+ break;
+ }
+ assert(GP && "GOT PTR not in liveins");
+ // FIXME is there anywhere sensible to get a DebugLoc here?
+ return CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ DebugLoc::getUnknownLoc(), GP, MVT::i64);
+}
+
+/// getRASaveReg - Grab the return address
+///
+SDValue AlphaDAGToDAGISel::getGlobalRetAddr() {
+ unsigned RA = 0;
+ for(MachineRegisterInfo::livein_iterator ii = RegInfo->livein_begin(),
+ ee = RegInfo->livein_end(); ii != ee; ++ii)
+ if (ii->first == Alpha::R26) {
+ RA = ii->second;
+ break;
+ }
+ assert(RA && "RA PTR not in liveins");
+ // FIXME is there anywhere sensible to get a DebugLoc here?
+ return CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ DebugLoc::getUnknownLoc(), RA, MVT::i64);
+}
+
+/// InstructionSelect - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void AlphaDAGToDAGISel::InstructionSelect() {
+ DEBUG(BB->dump());
+
+ // Select target instructions for the DAG.
+ SelectRoot(*CurDAG);
+ CurDAG->RemoveDeadNodes();
+}
+
+// Select - Convert the specified operand from a target-independent to a
+// target-specific node if it hasn't already been changed.
+SDNode *AlphaDAGToDAGISel::Select(SDValue Op) {
+ SDNode *N = Op.getNode();
+ if (N->isMachineOpcode()) {
+ return NULL; // Already selected.
+ }
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (N->getOpcode()) {
+ default: break;
+ case AlphaISD::CALL:
+ SelectCALL(Op);
+ return NULL;
+
+ case ISD::FrameIndex: {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ return CurDAG->SelectNodeTo(N, Alpha::LDA, MVT::i64,
+ CurDAG->getTargetFrameIndex(FI, MVT::i32),
+ getI64Imm(0));
+ }
+ case ISD::GLOBAL_OFFSET_TABLE: {
+ SDValue Result = getGlobalBaseReg();
+ ReplaceUses(Op, Result);
+ return NULL;
+ }
+ case AlphaISD::GlobalRetAddr: {
+ SDValue Result = getGlobalRetAddr();
+ ReplaceUses(Op, Result);
+ return NULL;
+ }
+
+ case AlphaISD::DivCall: {
+ SDValue Chain = CurDAG->getEntryNode();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+ Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R24, N1,
+ SDValue(0,0));
+ Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R25, N2,
+ Chain.getValue(1));
+ Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, N0,
+ Chain.getValue(1));
+ SDNode *CNode =
+ CurDAG->getTargetNode(Alpha::JSRs, dl, MVT::Other, MVT::Flag,
+ Chain, Chain.getValue(1));
+ Chain = CurDAG->getCopyFromReg(Chain, dl, Alpha::R27, MVT::i64,
+ SDValue(CNode, 1));
+ return CurDAG->SelectNodeTo(N, Alpha::BISr, MVT::i64, Chain, Chain);
+ }
+
+ case ISD::READCYCLECOUNTER: {
+ SDValue Chain = N->getOperand(0);
+ return CurDAG->getTargetNode(Alpha::RPCC, dl, MVT::i64, MVT::Other,
+ Chain);
+ }
+
+ case ISD::Constant: {
+ uint64_t uval = cast<ConstantSDNode>(N)->getZExtValue();
+
+ if (uval == 0) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ Alpha::R31, MVT::i64);
+ ReplaceUses(Op, Result);
+ return NULL;
+ }
+
+ int64_t val = (int64_t)uval;
+ int32_t val32 = (int32_t)val;
+ if (val <= IMM_HIGH + IMM_HIGH * IMM_MULT &&
+ val >= IMM_LOW + IMM_LOW * IMM_MULT)
+ break; //(LDAH (LDA))
+ if ((uval >> 32) == 0 && //empty upper bits
+ val32 <= IMM_HIGH + IMM_HIGH * IMM_MULT)
+ // val32 >= IMM_LOW + IMM_LOW * IMM_MULT) //always true
+ break; //(zext (LDAH (LDA)))
+ //Else use the constant pool
+ ConstantInt *C = ConstantInt::get(Type::Int64Ty, uval);
+ SDValue CPI = CurDAG->getTargetConstantPool(C, MVT::i64);
+ SDNode *Tmp = CurDAG->getTargetNode(Alpha::LDAHr, dl, MVT::i64, CPI,
+ getGlobalBaseReg());
+ return CurDAG->SelectNodeTo(N, Alpha::LDQr, MVT::i64, MVT::Other,
+ CPI, SDValue(Tmp, 0), CurDAG->getEntryNode());
+ }
+ case ISD::TargetConstantFP:
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
+ bool isDouble = N->getValueType(0) == MVT::f64;
+ MVT T = isDouble ? MVT::f64 : MVT::f32;
+ if (CN->getValueAPF().isPosZero()) {
+ return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYST : Alpha::CPYSS,
+ T, CurDAG->getRegister(Alpha::F31, T),
+ CurDAG->getRegister(Alpha::F31, T));
+ } else if (CN->getValueAPF().isNegZero()) {
+ return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYSNT : Alpha::CPYSNS,
+ T, CurDAG->getRegister(Alpha::F31, T),
+ CurDAG->getRegister(Alpha::F31, T));
+ } else {
+ abort();
+ }
+ break;
+ }
+
+ case ISD::SETCC:
+ if (N->getOperand(0).getNode()->getValueType(0).isFloatingPoint()) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+ unsigned Opc = Alpha::WTF;
+ bool rev = false;
+ bool inv = false;
+ switch(CC) {
+ default: DEBUG(N->dump(CurDAG)); assert(0 && "Unknown FP comparison!");
+ case ISD::SETEQ: case ISD::SETOEQ: case ISD::SETUEQ:
+ Opc = Alpha::CMPTEQ; break;
+ case ISD::SETLT: case ISD::SETOLT: case ISD::SETULT:
+ Opc = Alpha::CMPTLT; break;
+ case ISD::SETLE: case ISD::SETOLE: case ISD::SETULE:
+ Opc = Alpha::CMPTLE; break;
+ case ISD::SETGT: case ISD::SETOGT: case ISD::SETUGT:
+ Opc = Alpha::CMPTLT; rev = true; break;
+ case ISD::SETGE: case ISD::SETOGE: case ISD::SETUGE:
+ Opc = Alpha::CMPTLE; rev = true; break;
+ case ISD::SETNE: case ISD::SETONE: case ISD::SETUNE:
+ Opc = Alpha::CMPTEQ; inv = true; break;
+ case ISD::SETO:
+ Opc = Alpha::CMPTUN; inv = true; break;
+ case ISD::SETUO:
+ Opc = Alpha::CMPTUN; break;
+ };
+ SDValue tmp1 = N->getOperand(rev?1:0);
+ SDValue tmp2 = N->getOperand(rev?0:1);
+ SDNode *cmp = CurDAG->getTargetNode(Opc, dl, MVT::f64, tmp1, tmp2);
+ if (inv)
+ cmp = CurDAG->getTargetNode(Alpha::CMPTEQ, dl,
+ MVT::f64, SDValue(cmp, 0),
+ CurDAG->getRegister(Alpha::F31, MVT::f64));
+ switch(CC) {
+ case ISD::SETUEQ: case ISD::SETULT: case ISD::SETULE:
+ case ISD::SETUNE: case ISD::SETUGT: case ISD::SETUGE:
+ {
+ SDNode* cmp2 = CurDAG->getTargetNode(Alpha::CMPTUN, dl, MVT::f64,
+ tmp1, tmp2);
+ cmp = CurDAG->getTargetNode(Alpha::ADDT, dl, MVT::f64,
+ SDValue(cmp2, 0), SDValue(cmp, 0));
+ break;
+ }
+ default: break;
+ }
+
+ SDNode* LD = CurDAG->getTargetNode(Alpha::FTOIT, dl,
+ MVT::i64, SDValue(cmp, 0));
+ return CurDAG->getTargetNode(Alpha::CMPULT, dl, MVT::i64,
+ CurDAG->getRegister(Alpha::R31, MVT::i64),
+ SDValue(LD,0));
+ }
+ break;
+
+ case ISD::SELECT:
+ if (N->getValueType(0).isFloatingPoint() &&
+ (N->getOperand(0).getOpcode() != ISD::SETCC ||
+ !N->getOperand(0).getOperand(1).getValueType().isFloatingPoint())) {
+ //This should be the condition not covered by the Patterns
+ //FIXME: Don't have SelectCode die, but rather return something testable
+ // so that things like this can be caught in fall though code
+ //move int to fp
+ bool isDouble = N->getValueType(0) == MVT::f64;
+ SDValue cond = N->getOperand(0);
+ SDValue TV = N->getOperand(1);
+ SDValue FV = N->getOperand(2);
+
+ SDNode* LD = CurDAG->getTargetNode(Alpha::ITOFT, dl, MVT::f64, cond);
+ return CurDAG->getTargetNode(isDouble?Alpha::FCMOVNET:Alpha::FCMOVNES,
+ dl, MVT::f64, FV, TV, SDValue(LD,0));
+ }
+ break;
+
+ case ISD::AND: {
+ ConstantSDNode* SC = NULL;
+ ConstantSDNode* MC = NULL;
+ if (N->getOperand(0).getOpcode() == ISD::SRL &&
+ (MC = dyn_cast<ConstantSDNode>(N->getOperand(1))) &&
+ (SC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1)))) {
+ uint64_t sval = SC->getZExtValue();
+ uint64_t mval = MC->getZExtValue();
+ // If the result is a zap, let the autogened stuff handle it.
+ if (get_zapImm(N->getOperand(0), mval))
+ break;
+ // given mask X, and shift S, we want to see if there is any zap in the
+ // mask if we play around with the botton S bits
+ uint64_t dontcare = (~0ULL) >> (64 - sval);
+ uint64_t mask = mval << sval;
+
+ if (get_zapImm(mask | dontcare))
+ mask = mask | dontcare;
+
+ if (get_zapImm(mask)) {
+ SDValue Z =
+ SDValue(CurDAG->getTargetNode(Alpha::ZAPNOTi, dl, MVT::i64,
+ N->getOperand(0).getOperand(0),
+ getI64Imm(get_zapImm(mask))), 0);
+ return CurDAG->getTargetNode(Alpha::SRLr, dl, MVT::i64, Z,
+ getI64Imm(sval));
+ }
+ }
+ break;
+ }
+
+ }
+
+ return SelectCode(Op);
+}
+
+void AlphaDAGToDAGISel::SelectCALL(SDValue Op) {
+ //TODO: add flag stuff to prevent nondeturministic breakage!
+
+ SDNode *N = Op.getNode();
+ SDValue Chain = N->getOperand(0);
+ SDValue Addr = N->getOperand(1);
+ SDValue InFlag(0,0); // Null incoming flag value.
+ DebugLoc dl = N->getDebugLoc();
+
+ std::vector<SDValue> CallOperands;
+ std::vector<MVT> TypeOperands;
+
+ //grab the arguments
+ for(int i = 2, e = N->getNumOperands(); i < e; ++i) {
+ TypeOperands.push_back(N->getOperand(i).getValueType());
+ CallOperands.push_back(N->getOperand(i));
+ }
+ int count = N->getNumOperands() - 2;
+
+ static const unsigned args_int[] = {Alpha::R16, Alpha::R17, Alpha::R18,
+ Alpha::R19, Alpha::R20, Alpha::R21};
+ static const unsigned args_float[] = {Alpha::F16, Alpha::F17, Alpha::F18,
+ Alpha::F19, Alpha::F20, Alpha::F21};
+
+ for (int i = 6; i < count; ++i) {
+ unsigned Opc = Alpha::WTF;
+ if (TypeOperands[i].isInteger()) {
+ Opc = Alpha::STQ;
+ } else if (TypeOperands[i] == MVT::f32) {
+ Opc = Alpha::STS;
+ } else if (TypeOperands[i] == MVT::f64) {
+ Opc = Alpha::STT;
+ } else
+ assert(0 && "Unknown operand");
+
+ SDValue Ops[] = { CallOperands[i], getI64Imm((i - 6) * 8),
+ CurDAG->getCopyFromReg(Chain, dl, Alpha::R30, MVT::i64),
+ Chain };
+ Chain = SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Other, Ops, 4), 0);
+ }
+ for (int i = 0; i < std::min(6, count); ++i) {
+ if (TypeOperands[i].isInteger()) {
+ Chain = CurDAG->getCopyToReg(Chain, dl, args_int[i],
+ CallOperands[i], InFlag);
+ InFlag = Chain.getValue(1);
+ } else if (TypeOperands[i] == MVT::f32 || TypeOperands[i] == MVT::f64) {
+ Chain = CurDAG->getCopyToReg(Chain, dl, args_float[i],
+ CallOperands[i], InFlag);
+ InFlag = Chain.getValue(1);
+ } else
+ assert(0 && "Unknown operand");
+ }
+
+ // Finally, once everything is in registers to pass to the call, emit the
+ // call itself.
+ if (Addr.getOpcode() == AlphaISD::GPRelLo) {
+ SDValue GOT = getGlobalBaseReg();
+ Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R29, GOT, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = SDValue(CurDAG->getTargetNode(Alpha::BSR, dl, MVT::Other,
+ MVT::Flag, Addr.getOperand(0),
+ Chain, InFlag), 0);
+ } else {
+ Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, Addr, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = SDValue(CurDAG->getTargetNode(Alpha::JSR, dl, MVT::Other,
+ MVT::Flag, Chain, InFlag), 0);
+ }
+ InFlag = Chain.getValue(1);
+
+ std::vector<SDValue> CallResults;
+
+ switch (N->getValueType(0).getSimpleVT()) {
+ default: assert(0 && "Unexpected ret value!");
+ case MVT::Other: break;
+ case MVT::i64:
+ Chain = CurDAG->getCopyFromReg(Chain, dl,
+ Alpha::R0, MVT::i64, InFlag).getValue(1);
+ CallResults.push_back(Chain.getValue(0));
+ break;
+ case MVT::f32:
+ Chain = CurDAG->getCopyFromReg(Chain, dl,
+ Alpha::F0, MVT::f32, InFlag).getValue(1);
+ CallResults.push_back(Chain.getValue(0));
+ break;
+ case MVT::f64:
+ Chain = CurDAG->getCopyFromReg(Chain, dl,
+ Alpha::F0, MVT::f64, InFlag).getValue(1);
+ CallResults.push_back(Chain.getValue(0));
+ break;
+ }
+
+ CallResults.push_back(Chain);
+ for (unsigned i = 0, e = CallResults.size(); i != e; ++i)
+ ReplaceUses(Op.getValue(i), CallResults[i]);
+}
+
+
+/// createAlphaISelDag - This pass converts a legalized DAG into a
+/// Alpha-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createAlphaISelDag(AlphaTargetMachine &TM) {
+ return new AlphaDAGToDAGISel(TM);
+}
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
new file mode 100644
index 0000000..1001112
--- /dev/null
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -0,0 +1,798 @@
+//===-- AlphaISelLowering.cpp - Alpha DAG Lowering Implementation ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AlphaISelLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaISelLowering.h"
+#include "AlphaTargetMachine.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+/// AddLiveIn - This helper function adds the specified physical register to the
+/// MachineFunction as a live in value. It also creates a corresponding virtual
+/// register for it.
+static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
+ TargetRegisterClass *RC) {
+ assert(RC->contains(PReg) && "Not the correct regclass!");
+ unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
+ MF.getRegInfo().addLiveIn(PReg, VReg);
+ return VReg;
+}
+
+AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM) {
+ // Set up the TargetLowering object.
+ //I am having problems with shr n ubyte 1
+ setShiftAmountType(MVT::i64);
+ setBooleanContents(ZeroOrOneBooleanContent);
+
+ setUsesGlobalOffsetTable(true);
+
+ addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass);
+ addRegisterClass(MVT::f64, Alpha::F8RCRegisterClass);
+ addRegisterClass(MVT::f32, Alpha::F4RCRegisterClass);
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
+ // setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+
+ if (!TM.getSubtarget<AlphaSubtarget>().hasCT()) {
+ setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+ setOperationAction(ISD::CTLZ , MVT::i64 , Expand);
+ }
+ setOperationAction(ISD::BSWAP , MVT::i64, Expand);
+ setOperationAction(ISD::ROTL , MVT::i64, Expand);
+ setOperationAction(ISD::ROTR , MVT::i64, Expand);
+
+ setOperationAction(ISD::SREM , MVT::i64, Custom);
+ setOperationAction(ISD::UREM , MVT::i64, Custom);
+ setOperationAction(ISD::SDIV , MVT::i64, Custom);
+ setOperationAction(ISD::UDIV , MVT::i64, Custom);
+
+ setOperationAction(ISD::ADDC , MVT::i64, Expand);
+ setOperationAction(ISD::ADDE , MVT::i64, Expand);
+ setOperationAction(ISD::SUBC , MVT::i64, Expand);
+ setOperationAction(ISD::SUBE , MVT::i64, Expand);
+
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+
+
+ // We don't support sin/cos/sqrt/pow
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+
+ setOperationAction(ISD::FPOW , MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f64, Expand);
+
+ setOperationAction(ISD::SETCC, MVT::f32, Promote);
+
+ setOperationAction(ISD::BIT_CONVERT, MVT::f32, Promote);
+
+ // We don't have line number support yet.
+ setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+ setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+ // Not implemented yet.
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+
+ // We want to legalize GlobalAddress and ConstantPool and
+ // ExternalSymbols nodes into the appropriate instructions to
+ // materialize the address.
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
+ setOperationAction(ISD::ExternalSymbol, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::i32, Custom);
+
+ setOperationAction(ISD::RET, MVT::Other, Custom);
+
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+
+ setStackPointerRegisterToSaveRestore(Alpha::R30);
+
+ addLegalFPImmediate(APFloat(+0.0)); //F31
+ addLegalFPImmediate(APFloat(+0.0f)); //F31
+ addLegalFPImmediate(APFloat(-0.0)); //-F31
+ addLegalFPImmediate(APFloat(-0.0f)); //-F31
+
+ setJumpBufSize(272);
+ setJumpBufAlignment(16);
+
+ computeRegisterProperties();
+}
+
+MVT AlphaTargetLowering::getSetCCResultType(MVT VT) const {
+ return MVT::i64;
+}
+
+const char *AlphaTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case AlphaISD::CVTQT_: return "Alpha::CVTQT_";
+ case AlphaISD::CVTQS_: return "Alpha::CVTQS_";
+ case AlphaISD::CVTTQ_: return "Alpha::CVTTQ_";
+ case AlphaISD::GPRelHi: return "Alpha::GPRelHi";
+ case AlphaISD::GPRelLo: return "Alpha::GPRelLo";
+ case AlphaISD::RelLit: return "Alpha::RelLit";
+ case AlphaISD::GlobalRetAddr: return "Alpha::GlobalRetAddr";
+ case AlphaISD::CALL: return "Alpha::CALL";
+ case AlphaISD::DivCall: return "Alpha::DivCall";
+ case AlphaISD::RET_FLAG: return "Alpha::RET_FLAG";
+ case AlphaISD::COND_BRANCH_I: return "Alpha::COND_BRANCH_I";
+ case AlphaISD::COND_BRANCH_F: return "Alpha::COND_BRANCH_F";
+ }
+}
+
+static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+ MVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ // FIXME there isn't really any debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, JTI,
+ DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+ SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, JTI, Hi);
+ return Lo;
+}
+
+//http://www.cs.arizona.edu/computer.help/policy/DIGITAL_unix/
+//AA-PY8AC-TET1_html/callCH3.html#BLOCK21
+
+//For now, just use variable size stack frame format
+
+//In a standard call, the first six items are passed in registers $16
+//- $21 and/or registers $f16 - $f21. (See Section 4.1.2 for details
+//of argument-to-register correspondence.) The remaining items are
+//collected in a memory argument list that is a naturally aligned
+//array of quadwords. In a standard call, this list, if present, must
+//be passed at 0(SP).
+//7 ... n 0(SP) ... (n-7)*8(SP)
+
+// //#define FP $15
+// //#define RA $26
+// //#define PV $27
+// //#define GP $29
+// //#define SP $30
+
+static SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG,
+ int &VarArgsBase,
+ int &VarArgsOffset) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ std::vector<SDValue> ArgValues;
+ SDValue Root = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+
+ AddLiveIn(MF, Alpha::R29, &Alpha::GPRCRegClass); //GP
+ AddLiveIn(MF, Alpha::R26, &Alpha::GPRCRegClass); //RA
+
+ unsigned args_int[] = {
+ Alpha::R16, Alpha::R17, Alpha::R18, Alpha::R19, Alpha::R20, Alpha::R21};
+ unsigned args_float[] = {
+ Alpha::F16, Alpha::F17, Alpha::F18, Alpha::F19, Alpha::F20, Alpha::F21};
+
+ for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues()-1; ArgNo != e; ++ArgNo) {
+ SDValue argt;
+ MVT ObjectVT = Op.getValue(ArgNo).getValueType();
+ SDValue ArgVal;
+
+ if (ArgNo < 6) {
+ switch (ObjectVT.getSimpleVT()) {
+ default:
+ assert(false && "Invalid value type!");
+ case MVT::f64:
+ args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo],
+ &Alpha::F8RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Root, dl, args_float[ArgNo], ObjectVT);
+ break;
+ case MVT::f32:
+ args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo],
+ &Alpha::F4RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Root, dl, args_float[ArgNo], ObjectVT);
+ break;
+ case MVT::i64:
+ args_int[ArgNo] = AddLiveIn(MF, args_int[ArgNo],
+ &Alpha::GPRCRegClass);
+ ArgVal = DAG.getCopyFromReg(Root, dl, args_int[ArgNo], MVT::i64);
+ break;
+ }
+ } else { //more args
+ // Create the frame index object for this incoming parameter...
+ int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6));
+
+ // Create the SelectionDAG nodes corresponding to a load
+ //from this parameter
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i64);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0);
+ }
+ ArgValues.push_back(ArgVal);
+ }
+
+ // If the functions takes variable number of arguments, copy all regs to stack
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
+ if (isVarArg) {
+ VarArgsOffset = (Op.getNode()->getNumValues()-1) * 8;
+ std::vector<SDValue> LS;
+ for (int i = 0; i < 6; ++i) {
+ if (TargetRegisterInfo::isPhysicalRegister(args_int[i]))
+ args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass);
+ SDValue argt = DAG.getCopyFromReg(Root, dl, args_int[i], MVT::i64);
+ int FI = MFI->CreateFixedObject(8, -8 * (6 - i));
+ if (i == 0) VarArgsBase = FI;
+ SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64);
+ LS.push_back(DAG.getStore(Root, dl, argt, SDFI, NULL, 0));
+
+ if (TargetRegisterInfo::isPhysicalRegister(args_float[i]))
+ args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass);
+ argt = DAG.getCopyFromReg(Root, dl, args_float[i], MVT::f64);
+ FI = MFI->CreateFixedObject(8, - 8 * (12 - i));
+ SDFI = DAG.getFrameIndex(FI, MVT::i64);
+ LS.push_back(DAG.getStore(Root, dl, argt, SDFI, NULL, 0));
+ }
+
+ //Set up a token factor with all the stack traffic
+ Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LS[0], LS.size());
+ }
+
+ ArgValues.push_back(Root);
+
+ // Return the new list of results.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
+ &ArgValues[0], ArgValues.size());
+}
+
+static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Copy = DAG.getCopyToReg(Op.getOperand(0), dl, Alpha::R26,
+ DAG.getNode(AlphaISD::GlobalRetAddr,
+ DebugLoc::getUnknownLoc(),
+ MVT::i64),
+ SDValue());
+ switch (Op.getNumOperands()) {
+ default:
+ assert(0 && "Do not know how to return this many arguments!");
+ abort();
+ case 1:
+ break;
+ //return SDValue(); // ret void is legal
+ case 3: {
+ MVT ArgVT = Op.getOperand(1).getValueType();
+ unsigned ArgReg;
+ if (ArgVT.isInteger())
+ ArgReg = Alpha::R0;
+ else {
+ assert(ArgVT.isFloatingPoint());
+ ArgReg = Alpha::F0;
+ }
+ Copy = DAG.getCopyToReg(Copy, dl, ArgReg,
+ Op.getOperand(1), Copy.getValue(1));
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg);
+ break;
+ }
+ case 5: {
+ MVT ArgVT = Op.getOperand(1).getValueType();
+ unsigned ArgReg1, ArgReg2;
+ if (ArgVT.isInteger()) {
+ ArgReg1 = Alpha::R0;
+ ArgReg2 = Alpha::R1;
+ } else {
+ assert(ArgVT.isFloatingPoint());
+ ArgReg1 = Alpha::F0;
+ ArgReg2 = Alpha::F1;
+ }
+ Copy = DAG.getCopyToReg(Copy, dl, ArgReg1,
+ Op.getOperand(1), Copy.getValue(1));
+ if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(),
+ DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg1)
+ == DAG.getMachineFunction().getRegInfo().liveout_end())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg1);
+ Copy = DAG.getCopyToReg(Copy, dl, ArgReg2,
+ Op.getOperand(3), Copy.getValue(1));
+ if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(),
+ DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg2)
+ == DAG.getMachineFunction().getRegInfo().liveout_end())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg2);
+ break;
+ }
+ }
+ return DAG.getNode(AlphaISD::RET_FLAG, dl,
+ MVT::Other, Copy, Copy.getValue(1));
+}
+
+std::pair<SDValue, SDValue>
+AlphaTargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
+ bool RetSExt, bool RetZExt, bool isVarArg,
+ bool isInreg, unsigned CallingConv,
+ bool isTailCall, SDValue Callee,
+ ArgListTy &Args, SelectionDAG &DAG,
+ DebugLoc dl) {
+ int NumBytes = 0;
+ if (Args.size() > 6)
+ NumBytes = (Args.size() - 6) * 8;
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ std::vector<SDValue> args_to_use;
+ for (unsigned i = 0, e = Args.size(); i != e; ++i)
+ {
+ switch (getValueType(Args[i].Ty).getSimpleVT()) {
+ default: assert(0 && "Unexpected ValueType for argument!");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ // Promote the integer to 64 bits. If the input type is signed use a
+ // sign extend, otherwise use a zero extend.
+ if (Args[i].isSExt)
+ Args[i].Node = DAG.getNode(ISD::SIGN_EXTEND, dl,
+ MVT::i64, Args[i].Node);
+ else if (Args[i].isZExt)
+ Args[i].Node = DAG.getNode(ISD::ZERO_EXTEND, dl,
+ MVT::i64, Args[i].Node);
+ else
+ Args[i].Node = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Args[i].Node);
+ break;
+ case MVT::i64:
+ case MVT::f64:
+ case MVT::f32:
+ break;
+ }
+ args_to_use.push_back(Args[i].Node);
+ }
+
+ std::vector<MVT> RetVals;
+ MVT RetTyVT = getValueType(RetTy);
+ MVT ActualRetTyVT = RetTyVT;
+ if (RetTyVT.getSimpleVT() >= MVT::i1 && RetTyVT.getSimpleVT() <= MVT::i32)
+ ActualRetTyVT = MVT::i64;
+
+ if (RetTyVT != MVT::isVoid)
+ RetVals.push_back(ActualRetTyVT);
+ RetVals.push_back(MVT::Other);
+
+ std::vector<SDValue> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+ Ops.insert(Ops.end(), args_to_use.begin(), args_to_use.end());
+ SDValue TheCall = DAG.getNode(AlphaISD::CALL, dl,
+ RetVals, &Ops[0], Ops.size());
+ Chain = TheCall.getValue(RetTyVT != MVT::isVoid);
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), SDValue());
+ SDValue RetVal = TheCall;
+
+ if (RetTyVT != ActualRetTyVT) {
+ ISD::NodeType AssertKind = ISD::DELETED_NODE;
+ if (RetSExt)
+ AssertKind = ISD::AssertSext;
+ else if (RetZExt)
+ AssertKind = ISD::AssertZext;
+
+ if (AssertKind != ISD::DELETED_NODE)
+ RetVal = DAG.getNode(AssertKind, dl, MVT::i64, RetVal,
+ DAG.getValueType(RetTyVT));
+
+ RetVal = DAG.getNode(ISD::TRUNCATE, dl, RetTyVT, RetVal);
+ }
+
+ return std::make_pair(RetVal, Chain);
+}
+
+void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
+ SDValue &DataPtr, SelectionDAG &DAG) {
+ Chain = N->getOperand(0);
+ SDValue VAListP = N->getOperand(1);
+ const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue();
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP, VAListS, 0);
+ SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
+ DAG.getConstant(8, MVT::i64));
+ SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1),
+ Tmp, NULL, 0, MVT::i32);
+ DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset);
+ if (N->getValueType(0).isFloatingPoint())
+ {
+ //if fp && Offset < 6*8, then subtract 6*8 from DataPtr
+ SDValue FPDataPtr = DAG.getNode(ISD::SUB, dl, MVT::i64, DataPtr,
+ DAG.getConstant(8*6, MVT::i64));
+ SDValue CC = DAG.getSetCC(dl, MVT::i64, Offset,
+ DAG.getConstant(8*6, MVT::i64), ISD::SETLT);
+ DataPtr = DAG.getNode(ISD::SELECT, dl, MVT::i64, CC, FPDataPtr, DataPtr);
+ }
+
+ SDValue NewOffset = DAG.getNode(ISD::ADD, dl, MVT::i64, Offset,
+ DAG.getConstant(8, MVT::i64));
+ Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp, NULL, 0,
+ MVT::i32);
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Wasn't expecting to be able to lower this!");
+ case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG,
+ VarArgsBase,
+ VarArgsOffset);
+
+ case ISD::RET: return LowerRET(Op,DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default: break; // Don't custom lower most intrinsics.
+ case Intrinsic::alpha_umulh:
+ return DAG.getNode(ISD::MULHU, dl, MVT::i64,
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ }
+
+ case ISD::SINT_TO_FP: {
+ assert(Op.getOperand(0).getValueType() == MVT::i64 &&
+ "Unhandled SINT_TO_FP type in custom expander!");
+ SDValue LD;
+ bool isDouble = Op.getValueType() == MVT::f64;
+ LD = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op.getOperand(0));
+ SDValue FP = DAG.getNode(isDouble?AlphaISD::CVTQT_:AlphaISD::CVTQS_, dl,
+ isDouble?MVT::f64:MVT::f32, LD);
+ return FP;
+ }
+ case ISD::FP_TO_SINT: {
+ bool isDouble = Op.getOperand(0).getValueType() == MVT::f64;
+ SDValue src = Op.getOperand(0);
+
+ if (!isDouble) //Promote
+ src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, src);
+
+ src = DAG.getNode(AlphaISD::CVTTQ_, dl, MVT::f64, src);
+
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, src);
+ }
+ case ISD::ConstantPool: {
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ Constant *C = CP->getConstVal();
+ SDValue CPI = DAG.getTargetConstantPool(C, MVT::i64, CP->getAlignment());
+ // FIXME there isn't really any debug info here
+
+ SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, CPI,
+ DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+ SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, CPI, Hi);
+ return Lo;
+ }
+ case ISD::GlobalTLSAddress:
+ assert(0 && "TLS not implemented for Alpha.");
+ case ISD::GlobalAddress: {
+ GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+ GlobalValue *GV = GSDN->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i64, GSDN->getOffset());
+ // FIXME there isn't really any debug info here
+
+ // if (!GV->hasWeakLinkage() && !GV->isDeclaration() && !GV->hasLinkOnceLinkage()) {
+ if (GV->hasLocalLinkage()) {
+ SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, GA,
+ DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+ SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, GA, Hi);
+ return Lo;
+ } else
+ return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64, GA,
+ DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+ }
+ case ISD::ExternalSymbol: {
+ return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64,
+ DAG.getTargetExternalSymbol(cast<ExternalSymbolSDNode>(Op)
+ ->getSymbol(), MVT::i64),
+ DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+ }
+
+ case ISD::UREM:
+ case ISD::SREM:
+ //Expand only on constant case
+ if (Op.getOperand(1).getOpcode() == ISD::Constant) {
+ MVT VT = Op.getNode()->getValueType(0);
+ SDValue Tmp1 = Op.getNode()->getOpcode() == ISD::UREM ?
+ BuildUDIV(Op.getNode(), DAG, NULL) :
+ BuildSDIV(Op.getNode(), DAG, NULL);
+ Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Op.getOperand(1));
+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Op.getOperand(0), Tmp1);
+ return Tmp1;
+ }
+ //fall through
+ case ISD::SDIV:
+ case ISD::UDIV:
+ if (Op.getValueType().isInteger()) {
+ if (Op.getOperand(1).getOpcode() == ISD::Constant)
+ return Op.getOpcode() == ISD::SDIV ? BuildSDIV(Op.getNode(), DAG, NULL)
+ : BuildUDIV(Op.getNode(), DAG, NULL);
+ const char* opstr = 0;
+ switch (Op.getOpcode()) {
+ case ISD::UREM: opstr = "__remqu"; break;
+ case ISD::SREM: opstr = "__remq"; break;
+ case ISD::UDIV: opstr = "__divqu"; break;
+ case ISD::SDIV: opstr = "__divq"; break;
+ }
+ SDValue Tmp1 = Op.getOperand(0),
+ Tmp2 = Op.getOperand(1),
+ Addr = DAG.getExternalSymbol(opstr, MVT::i64);
+ return DAG.getNode(AlphaISD::DivCall, dl, MVT::i64, Addr, Tmp1, Tmp2);
+ }
+ break;
+
+ case ISD::VAARG: {
+ SDValue Chain, DataPtr;
+ LowerVAARG(Op.getNode(), Chain, DataPtr, DAG);
+
+ SDValue Result;
+ if (Op.getValueType() == MVT::i32)
+ Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr,
+ NULL, 0, MVT::i32);
+ else
+ Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0);
+ return Result;
+ }
+ case ISD::VACOPY: {
+ SDValue Chain = Op.getOperand(0);
+ SDValue DestP = Op.getOperand(1);
+ SDValue SrcP = Op.getOperand(2);
+ const Value *DestS = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+ const Value *SrcS = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+
+ SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP, SrcS, 0);
+ SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, DestS, 0);
+ SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP,
+ DAG.getConstant(8, MVT::i64));
+ Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result,
+ NP, NULL,0, MVT::i32);
+ SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP,
+ DAG.getConstant(8, MVT::i64));
+ return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, NULL, 0, MVT::i32);
+ }
+ case ISD::VASTART: {
+ SDValue Chain = Op.getOperand(0);
+ SDValue VAListP = Op.getOperand(1);
+ const Value *VAListS = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+
+ // vastart stores the address of the VarArgsBase and VarArgsOffset
+ SDValue FR = DAG.getFrameIndex(VarArgsBase, MVT::i64);
+ SDValue S1 = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0);
+ SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
+ DAG.getConstant(8, MVT::i64));
+ return DAG.getTruncStore(S1, dl, DAG.getConstant(VarArgsOffset, MVT::i64),
+ SA2, NULL, 0, MVT::i32);
+ }
+ case ISD::RETURNADDR:
+ return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc::getUnknownLoc(),
+ MVT::i64);
+ //FIXME: implement
+ case ISD::FRAMEADDR: break;
+ }
+
+ return SDValue();
+}
+
+void AlphaTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) {
+ DebugLoc dl = N->getDebugLoc();
+ assert(N->getValueType(0) == MVT::i32 &&
+ N->getOpcode() == ISD::VAARG &&
+ "Unknown node to custom promote!");
+
+ SDValue Chain, DataPtr;
+ LowerVAARG(N, Chain, DataPtr, DAG);
+ SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, NULL, 0);
+ Results.push_back(Res);
+ Results.push_back(SDValue(Res.getNode(), 1));
+}
+
+
+//Inline Asm
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+AlphaTargetLowering::ConstraintType
+AlphaTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'f':
+ case 'r':
+ return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+std::vector<unsigned> AlphaTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break; // Unknown constriant letter
+ case 'f':
+ return make_vector<unsigned>(Alpha::F0 , Alpha::F1 , Alpha::F2 ,
+ Alpha::F3 , Alpha::F4 , Alpha::F5 ,
+ Alpha::F6 , Alpha::F7 , Alpha::F8 ,
+ Alpha::F9 , Alpha::F10, Alpha::F11,
+ Alpha::F12, Alpha::F13, Alpha::F14,
+ Alpha::F15, Alpha::F16, Alpha::F17,
+ Alpha::F18, Alpha::F19, Alpha::F20,
+ Alpha::F21, Alpha::F22, Alpha::F23,
+ Alpha::F24, Alpha::F25, Alpha::F26,
+ Alpha::F27, Alpha::F28, Alpha::F29,
+ Alpha::F30, Alpha::F31, 0);
+ case 'r':
+ return make_vector<unsigned>(Alpha::R0 , Alpha::R1 , Alpha::R2 ,
+ Alpha::R3 , Alpha::R4 , Alpha::R5 ,
+ Alpha::R6 , Alpha::R7 , Alpha::R8 ,
+ Alpha::R9 , Alpha::R10, Alpha::R11,
+ Alpha::R12, Alpha::R13, Alpha::R14,
+ Alpha::R15, Alpha::R16, Alpha::R17,
+ Alpha::R18, Alpha::R19, Alpha::R20,
+ Alpha::R21, Alpha::R22, Alpha::R23,
+ Alpha::R24, Alpha::R25, Alpha::R26,
+ Alpha::R27, Alpha::R28, Alpha::R29,
+ Alpha::R30, Alpha::R31, 0);
+ }
+ }
+
+ return std::vector<unsigned>();
+}
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ assert((MI->getOpcode() == Alpha::CAS32 ||
+ MI->getOpcode() == Alpha::CAS64 ||
+ MI->getOpcode() == Alpha::LAS32 ||
+ MI->getOpcode() == Alpha::LAS64 ||
+ MI->getOpcode() == Alpha::SWAP32 ||
+ MI->getOpcode() == Alpha::SWAP64) &&
+ "Unexpected instr type to insert");
+
+ bool is32 = MI->getOpcode() == Alpha::CAS32 ||
+ MI->getOpcode() == Alpha::LAS32 ||
+ MI->getOpcode() == Alpha::SWAP32;
+
+ //Load locked store conditional for atomic ops take on the same form
+ //start:
+ //ll
+ //do stuff (maybe branch to exit)
+ //sc
+ //test sc and maybe branck to start
+ //exit:
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ DebugLoc dl = MI->getDebugLoc();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *llscMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+ sinkMBB->transferSuccessors(thisMBB);
+
+ F->insert(It, llscMBB);
+ F->insert(It, sinkMBB);
+
+ BuildMI(thisMBB, dl, TII->get(Alpha::BR)).addMBB(llscMBB);
+
+ unsigned reg_res = MI->getOperand(0).getReg(),
+ reg_ptr = MI->getOperand(1).getReg(),
+ reg_v2 = MI->getOperand(2).getReg(),
+ reg_store = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass);
+
+ BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::LDL_L : Alpha::LDQ_L),
+ reg_res).addImm(0).addReg(reg_ptr);
+ switch (MI->getOpcode()) {
+ case Alpha::CAS32:
+ case Alpha::CAS64: {
+ unsigned reg_cmp
+ = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass);
+ BuildMI(llscMBB, dl, TII->get(Alpha::CMPEQ), reg_cmp)
+ .addReg(reg_v2).addReg(reg_res);
+ BuildMI(llscMBB, dl, TII->get(Alpha::BEQ))
+ .addImm(0).addReg(reg_cmp).addMBB(sinkMBB);
+ BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store)
+ .addReg(Alpha::R31).addReg(MI->getOperand(3).getReg());
+ break;
+ }
+ case Alpha::LAS32:
+ case Alpha::LAS64: {
+ BuildMI(llscMBB, dl,TII->get(is32 ? Alpha::ADDLr : Alpha::ADDQr), reg_store)
+ .addReg(reg_res).addReg(reg_v2);
+ break;
+ }
+ case Alpha::SWAP32:
+ case Alpha::SWAP64: {
+ BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store)
+ .addReg(reg_v2).addReg(reg_v2);
+ break;
+ }
+ }
+ BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::STL_C : Alpha::STQ_C), reg_store)
+ .addReg(reg_store).addImm(0).addReg(reg_ptr);
+ BuildMI(llscMBB, dl, TII->get(Alpha::BEQ))
+ .addImm(0).addReg(reg_store).addMBB(llscMBB);
+ BuildMI(llscMBB, dl, TII->get(Alpha::BR)).addMBB(sinkMBB);
+
+ thisMBB->addSuccessor(llscMBB);
+ llscMBB->addSuccessor(llscMBB);
+ llscMBB->addSuccessor(sinkMBB);
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+
+ return sinkMBB;
+}
+
+bool
+AlphaTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The Alpha target isn't yet aware of offsets.
+ return false;
+}
diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h
new file mode 100644
index 0000000..fdd817c
--- /dev/null
+++ b/lib/Target/Alpha/AlphaISelLowering.h
@@ -0,0 +1,114 @@
+//===-- AlphaISelLowering.h - Alpha DAG Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Alpha uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
+#define LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
+
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "Alpha.h"
+
+namespace llvm {
+
+ namespace AlphaISD {
+ enum NodeType {
+ // Start the numbering where the builting ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ //These corrospond to the identical Instruction
+ CVTQT_, CVTQS_, CVTTQ_,
+
+ /// GPRelHi/GPRelLo - These represent the high and low 16-bit
+ /// parts of a global address respectively.
+ GPRelHi, GPRelLo,
+
+ /// RetLit - Literal Relocation of a Global
+ RelLit,
+
+ /// GlobalRetAddr - used to restore the return address
+ GlobalRetAddr,
+
+ /// CALL - Normal call.
+ CALL,
+
+ /// DIVCALL - used for special library calls for div and rem
+ DivCall,
+
+ /// return flag operand
+ RET_FLAG,
+
+ /// CHAIN = COND_BRANCH CHAIN, OPC, (G|F)PRC, DESTBB [, INFLAG] - This
+ /// corresponds to the COND_BRANCH pseudo instruction.
+ /// *PRC is the input register to compare to zero,
+ /// OPC is the branch opcode to use (e.g. Alpha::BEQ),
+ /// DESTBB is the destination block to branch to, and INFLAG is
+ /// an optional input flag argument.
+ COND_BRANCH_I, COND_BRANCH_F
+
+ };
+ }
+
+ class AlphaTargetLowering : public TargetLowering {
+ int VarArgsOffset; // What is the offset to the first vaarg
+ int VarArgsBase; // What is the base FrameIndex
+ bool useITOF;
+ public:
+ explicit AlphaTargetLowering(TargetMachine &TM);
+
+ /// getSetCCResultType - Get the SETCC result ValueType
+ virtual MVT getSetCCResultType(MVT VT) const;
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ ///
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG);
+
+ // Friendly names for dumps
+ const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// LowerCallTo - This hook lowers an abstract call to a function into an
+ /// actual call.
+ virtual std::pair<SDValue, SDValue>
+ LowerCallTo(SDValue Chain, const Type *RetTy, bool RetSExt, bool RetZExt,
+ bool isVarArg, bool isInreg, unsigned CC, bool isTailCall,
+ SDValue Callee, ArgListTy &Args, SelectionDAG &DAG,
+ DebugLoc dl);
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ std::vector<unsigned>
+ getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const;
+
+ bool hasITOF() { return useITOF; }
+
+ MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ private:
+ // Helpers for custom lowering.
+ void LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr,
+ SelectionDAG &DAG);
+
+ };
+}
+
+#endif // LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
diff --git a/lib/Target/Alpha/AlphaInstrFormats.td b/lib/Target/Alpha/AlphaInstrFormats.td
new file mode 100644
index 0000000..6d82875
--- /dev/null
+++ b/lib/Target/Alpha/AlphaInstrFormats.td
@@ -0,0 +1,268 @@
+//===- AlphaInstrFormats.td - Alpha Instruction Formats ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//3.3:
+//Memory
+//Branch
+//Operate
+//Floating-point
+//PALcode
+
+def u8imm : Operand<i64>;
+def s14imm : Operand<i64>;
+def s16imm : Operand<i64>;
+def s21imm : Operand<i64>;
+def s64imm : Operand<i64>;
+def u64imm : Operand<i64>;
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+// Alpha instruction baseline
+class InstAlpha<bits<6> op, string asmstr, InstrItinClass itin> : Instruction {
+ field bits<32> Inst;
+ let Namespace = "Alpha";
+ let AsmString = asmstr;
+ let Inst{31-26} = op;
+ let Itinerary = itin;
+}
+
+
+//3.3.1
+class MForm<bits<6> opcode, bit load, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let canFoldAsLoad = load;
+ let Defs = [R28]; //We may use this for frame index calculations, so reserve it here
+
+ bits<5> Ra;
+ bits<16> disp;
+ bits<5> Rb;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-0} = disp;
+}
+class MfcForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ bits<5> Ra;
+
+ let OutOperandList = (ops GPRC:$RA);
+ let InOperandList = (ops);
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = 0;
+ let Inst{15-0} = fc;
+}
+class MfcPForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let OutOperandList = (ops);
+ let InOperandList = (ops);
+ let Inst{25-21} = 0;
+ let Inst{20-16} = 0;
+ let Inst{15-0} = fc;
+}
+
+class MbrForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ bits<5> Ra;
+ bits<5> Rb;
+ bits<14> disp;
+
+ let OutOperandList = (ops);
+ let InOperandList = OL;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-14} = TB;
+ let Inst{13-0} = disp;
+}
+class MbrpForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern=pattern;
+ bits<5> Ra;
+ bits<5> Rb;
+ bits<14> disp;
+
+ let OutOperandList = (ops);
+ let InOperandList = OL;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-14} = TB;
+ let Inst{13-0} = disp;
+}
+
+//3.3.2
+def target : Operand<OtherVT> {}
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+class BFormN<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let OutOperandList = (ops);
+ let InOperandList = OL;
+ bits<64> Opc; //dummy
+ bits<5> Ra;
+ bits<21> disp;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-0} = disp;
+}
+}
+
+let isBranch = 1, isTerminator = 1 in
+class BFormD<bits<6> opcode, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (ops);
+ let InOperandList = (ops target:$DISP);
+ bits<5> Ra;
+ bits<21> disp;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-0} = disp;
+}
+
+//3.3.3
+class OForm<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (outs GPRC:$RC);
+ let InOperandList = (ins GPRC:$RA, GPRC:$RB);
+
+ bits<5> Rc;
+ bits<5> Ra;
+ bits<5> Rb;
+ bits<7> Function = fun;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-13} = 0;
+ let Inst{12} = 0;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+class OForm2<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (outs GPRC:$RC);
+ let InOperandList = (ins GPRC:$RB);
+
+ bits<5> Rc;
+ bits<5> Rb;
+ bits<7> Function = fun;
+
+ let Inst{25-21} = 31;
+ let Inst{20-16} = Rb;
+ let Inst{15-13} = 0;
+ let Inst{12} = 0;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+class OForm4<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (outs GPRC:$RDEST);
+ let InOperandList = (ins GPRC:$RCOND, GPRC:$RTRUE, GPRC:$RFALSE);
+ let Constraints = "$RFALSE = $RDEST";
+ let DisableEncoding = "$RFALSE";
+
+ bits<5> Rc;
+ bits<5> Ra;
+ bits<5> Rb;
+ bits<7> Function = fun;
+
+// let isTwoAddress = 1;
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-13} = 0;
+ let Inst{12} = 0;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+
+class OFormL<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (outs GPRC:$RC);
+ let InOperandList = (ins GPRC:$RA, u8imm:$L);
+
+ bits<5> Rc;
+ bits<5> Ra;
+ bits<8> LIT;
+ bits<7> Function = fun;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-13} = LIT;
+ let Inst{12} = 1;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+class OForm4L<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (outs GPRC:$RDEST);
+ let InOperandList = (ins GPRC:$RCOND, s64imm:$RTRUE, GPRC:$RFALSE);
+ let Constraints = "$RFALSE = $RDEST";
+ let DisableEncoding = "$RFALSE";
+
+ bits<5> Rc;
+ bits<5> Ra;
+ bits<8> LIT;
+ bits<7> Function = fun;
+
+// let isTwoAddress = 1;
+ let Inst{25-21} = Ra;
+ let Inst{20-13} = LIT;
+ let Inst{12} = 1;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+//3.3.4
+class FPForm<bits<6> opcode, bits<11> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+
+ bits<5> Fc;
+ bits<5> Fa;
+ bits<5> Fb;
+ bits<11> Function = fun;
+
+ let Inst{25-21} = Fa;
+ let Inst{20-16} = Fb;
+ let Inst{15-5} = Function;
+ let Inst{4-0} = Fc;
+}
+
+//3.3.5
+class PALForm<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let OutOperandList = (ops);
+ let InOperandList = OL;
+ bits<26> Function;
+
+ let Inst{25-0} = Function;
+}
+
+
+// Pseudo instructions.
+class PseudoInstAlpha<dag OOL, dag IOL, string nm, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<0, nm, itin> {
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let Pattern = pattern;
+
+}
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
new file mode 100644
index 0000000..a54d97d
--- /dev/null
+++ b/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -0,0 +1,450 @@
+//===- AlphaInstrInfo.cpp - Alpha Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaGenInstrInfo.inc"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+using namespace llvm;
+
+AlphaInstrInfo::AlphaInstrInfo()
+ : TargetInstrInfoImpl(AlphaInsts, array_lengthof(AlphaInsts)),
+ RI(*this) { }
+
+
+bool AlphaInstrInfo::isMoveInstr(const MachineInstr& MI,
+ unsigned& sourceReg, unsigned& destReg,
+ unsigned& SrcSR, unsigned& DstSR) const {
+ unsigned oc = MI.getOpcode();
+ if (oc == Alpha::BISr ||
+ oc == Alpha::CPYSS ||
+ oc == Alpha::CPYST ||
+ oc == Alpha::CPYSSt ||
+ oc == Alpha::CPYSTs) {
+ // or r1, r2, r2
+ // cpys(s|t) r1 r2 r2
+ assert(MI.getNumOperands() >= 3 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ MI.getOperand(2).isReg() &&
+ "invalid Alpha BIS instruction!");
+ if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ SrcSR = DstSR = 0;
+ return true;
+ }
+ }
+ return false;
+}
+
+unsigned
+AlphaInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ case Alpha::LDL:
+ case Alpha::LDQ:
+ case Alpha::LDBU:
+ case Alpha::LDWU:
+ case Alpha::LDS:
+ case Alpha::LDT:
+ if (MI->getOperand(1).isFI()) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned
+AlphaInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ case Alpha::STL:
+ case Alpha::STQ:
+ case Alpha::STB:
+ case Alpha::STW:
+ case Alpha::STS:
+ case Alpha::STT:
+ if (MI->getOperand(1).isFI()) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+static bool isAlphaIntCondCode(unsigned Opcode) {
+ switch (Opcode) {
+ case Alpha::BEQ:
+ case Alpha::BNE:
+ case Alpha::BGE:
+ case Alpha::BGT:
+ case Alpha::BLE:
+ case Alpha::BLT:
+ case Alpha::BLBC:
+ case Alpha::BLBS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+unsigned AlphaInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const {
+ // FIXME this should probably have a DebugLoc argument
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "Alpha branch conditions have two components!");
+
+ // One-way branch.
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch
+ BuildMI(&MBB, dl, get(Alpha::BR)).addMBB(TBB);
+ else // Conditional branch
+ if (isAlphaIntCondCode(Cond[0].getImm()))
+ BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_I))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ else
+ BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_F))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ return 1;
+ }
+
+ // Two-way Conditional Branch.
+ if (isAlphaIntCondCode(Cond[0].getImm()))
+ BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_I))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ else
+ BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_F))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ BuildMI(&MBB, dl, get(Alpha::BR)).addMBB(FBB);
+ return 2;
+}
+
+bool AlphaInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const {
+ //cerr << "copyRegToReg " << DestReg << " <- " << SrcReg << "\n";
+ if (DestRC != SrcRC) {
+ // Not yet supported!
+ return false;
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if (DestRC == Alpha::GPRCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(Alpha::BISr), DestReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg);
+ } else if (DestRC == Alpha::F4RCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(Alpha::CPYSS), DestReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg);
+ } else if (DestRC == Alpha::F8RCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(Alpha::CPYST), DestReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg);
+ } else {
+ // Attempt to copy register that is not GPR or FPR
+ return false;
+ }
+
+ return true;
+}
+
+void
+AlphaInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC) const {
+ //cerr << "Trying to store " << getPrettyName(SrcReg) << " to "
+ // << FrameIdx << "\n";
+ //BuildMI(MBB, MI, Alpha::WTF, 0).addReg(SrcReg);
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if (RC == Alpha::F4RCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::STS))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else if (RC == Alpha::F8RCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::STT))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else if (RC == Alpha::GPRCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::STQ))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else
+ abort();
+}
+
+void AlphaInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+ bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ unsigned Opc = 0;
+ if (RC == Alpha::F4RCRegisterClass)
+ Opc = Alpha::STS;
+ else if (RC == Alpha::F8RCRegisterClass)
+ Opc = Alpha::STT;
+ else if (RC == Alpha::GPRCRegisterClass)
+ Opc = Alpha::STQ;
+ else
+ abort();
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB =
+ BuildMI(MF, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill));
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+}
+
+void
+AlphaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC) const {
+ //cerr << "Trying to load " << getPrettyName(DestReg) << " to "
+ // << FrameIdx << "\n";
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if (RC == Alpha::F4RCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::LDS), DestReg)
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else if (RC == Alpha::F8RCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::LDT), DestReg)
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else if (RC == Alpha::GPRCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::LDQ), DestReg)
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else
+ abort();
+}
+
+void AlphaInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ unsigned Opc = 0;
+ if (RC == Alpha::F4RCRegisterClass)
+ Opc = Alpha::LDS;
+ else if (RC == Alpha::F8RCRegisterClass)
+ Opc = Alpha::LDT;
+ else if (RC == Alpha::GPRCRegisterClass)
+ Opc = Alpha::LDQ;
+ else
+ abort();
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB =
+ BuildMI(MF, DL, get(Opc), DestReg);
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+}
+
+MachineInstr *AlphaInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const {
+ if (Ops.size() != 1) return NULL;
+
+ // Make sure this is a reg-reg copy.
+ unsigned Opc = MI->getOpcode();
+
+ MachineInstr *NewMI = NULL;
+ switch(Opc) {
+ default:
+ break;
+ case Alpha::BISr:
+ case Alpha::CPYSS:
+ case Alpha::CPYST:
+ if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
+ if (Ops[0] == 0) { // move -> store
+ unsigned InReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ Opc = (Opc == Alpha::BISr) ? Alpha::STQ :
+ ((Opc == Alpha::CPYSS) ? Alpha::STS : Alpha::STT);
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(InReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIndex)
+ .addReg(Alpha::F31);
+ } else { // load -> move
+ unsigned OutReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ Opc = (Opc == Alpha::BISr) ? Alpha::LDQ :
+ ((Opc == Alpha::CPYSS) ? Alpha::LDS : Alpha::LDT);
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(OutReg, RegState::Define | getDeadRegState(isDead))
+ .addFrameIndex(FrameIndex)
+ .addReg(Alpha::F31);
+ }
+ }
+ break;
+ }
+ return NewMI;
+}
+
+static unsigned AlphaRevCondCode(unsigned Opcode) {
+ switch (Opcode) {
+ case Alpha::BEQ: return Alpha::BNE;
+ case Alpha::BNE: return Alpha::BEQ;
+ case Alpha::BGE: return Alpha::BLT;
+ case Alpha::BGT: return Alpha::BLE;
+ case Alpha::BLE: return Alpha::BGT;
+ case Alpha::BLT: return Alpha::BGE;
+ case Alpha::BLBC: return Alpha::BLBS;
+ case Alpha::BLBS: return Alpha::BLBC;
+ case Alpha::FBEQ: return Alpha::FBNE;
+ case Alpha::FBNE: return Alpha::FBEQ;
+ case Alpha::FBGE: return Alpha::FBLT;
+ case Alpha::FBGT: return Alpha::FBLE;
+ case Alpha::FBLE: return Alpha::FBGT;
+ case Alpha::FBLT: return Alpha::FBGE;
+ default:
+ assert(0 && "Unknown opcode");
+ }
+ return 0; // Not reached
+}
+
+// Branch analysis.
+bool AlphaInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastInst->getOpcode() == Alpha::BR) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (LastInst->getOpcode() == Alpha::COND_BRANCH_I ||
+ LastInst->getOpcode() == Alpha::COND_BRANCH_F) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(2).getMBB();
+ Cond.push_back(LastInst->getOperand(0));
+ Cond.push_back(LastInst->getOperand(1));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with Alpha::BR and Alpha::COND_BRANCH_*, handle it.
+ if ((SecondLastInst->getOpcode() == Alpha::COND_BRANCH_I ||
+ SecondLastInst->getOpcode() == Alpha::COND_BRANCH_F) &&
+ LastInst->getOpcode() == Alpha::BR) {
+ TBB = SecondLastInst->getOperand(2).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(0));
+ Cond.push_back(SecondLastInst->getOperand(1));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two Alpha::BRs, handle it. The second one is not
+ // executed, so remove it.
+ if (SecondLastInst->getOpcode() == Alpha::BR &&
+ LastInst->getOpcode() == Alpha::BR) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned AlphaInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ if (I->getOpcode() != Alpha::BR &&
+ I->getOpcode() != Alpha::COND_BRANCH_I &&
+ I->getOpcode() != Alpha::COND_BRANCH_F)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (I->getOpcode() != Alpha::COND_BRANCH_I &&
+ I->getOpcode() != Alpha::COND_BRANCH_F)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+void AlphaInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ BuildMI(MBB, MI, DL, get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31)
+ .addReg(Alpha::R31);
+}
+
+bool AlphaInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
+ if (MBB.empty()) return false;
+
+ switch (MBB.back().getOpcode()) {
+ case Alpha::RETDAG: // Return.
+ case Alpha::RETDAGp:
+ case Alpha::BR: // Uncond branch.
+ case Alpha::JMP: // Indirect branch.
+ return true;
+ default: return false;
+ }
+}
+bool AlphaInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 2 && "Invalid Alpha branch opcode!");
+ Cond[0].setImm(AlphaRevCondCode(Cond[0].getImm()));
+ return false;
+}
+
diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h
new file mode 100644
index 0000000..182aa32
--- /dev/null
+++ b/lib/Target/Alpha/AlphaInstrInfo.h
@@ -0,0 +1,97 @@
+//===- AlphaInstrInfo.h - Alpha Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAINSTRUCTIONINFO_H
+#define ALPHAINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "AlphaRegisterInfo.h"
+
+namespace llvm {
+
+class AlphaInstrInfo : public TargetInstrInfoImpl {
+ const AlphaRegisterInfo RI;
+public:
+ AlphaInstrInfo();
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const AlphaRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// Return true if the instruction is a register to register move and return
+ /// the source and dest operands and their sub-register indices by reference.
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+
+ bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+ bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
new file mode 100644
index 0000000..e73bdf9
--- /dev/null
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@@ -0,0 +1,1137 @@
+//===- AlphaInstrInfo.td - The Alpha Instruction Set -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+include "AlphaInstrFormats.td"
+
+//********************
+//Custom DAG Nodes
+//********************
+
+def SDTFPUnaryOpUnC : SDTypeProfile<1, 1, [
+ SDTCisFP<1>, SDTCisFP<0>
+]>;
+def Alpha_cvtqt : SDNode<"AlphaISD::CVTQT_", SDTFPUnaryOpUnC, []>;
+def Alpha_cvtqs : SDNode<"AlphaISD::CVTQS_", SDTFPUnaryOpUnC, []>;
+def Alpha_cvttq : SDNode<"AlphaISD::CVTTQ_" , SDTFPUnaryOp, []>;
+def Alpha_gprello : SDNode<"AlphaISD::GPRelLo", SDTIntBinOp, []>;
+def Alpha_gprelhi : SDNode<"AlphaISD::GPRelHi", SDTIntBinOp, []>;
+def Alpha_rellit : SDNode<"AlphaISD::RelLit", SDTIntBinOp, [SDNPMayLoad]>;
+
+def retflag : SDNode<"AlphaISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_AlphaCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i64> ]>;
+def SDT_AlphaCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i64>,
+ SDTCisVT<1, i64> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AlphaCallSeqStart,
+ [SDNPHasChain, SDNPOutFlag]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AlphaCallSeqEnd,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+//********************
+//Paterns for matching
+//********************
+def invX : SDNodeXForm<imm, [{ //invert
+ return getI64Imm(~N->getZExtValue());
+}]>;
+def negX : SDNodeXForm<imm, [{ //negate
+ return getI64Imm(~N->getZExtValue() + 1);
+}]>;
+def SExt32 : SDNodeXForm<imm, [{ //signed extend int to long
+ return getI64Imm(((int64_t)N->getZExtValue() << 32) >> 32);
+}]>;
+def SExt16 : SDNodeXForm<imm, [{ //signed extend int to long
+ return getI64Imm(((int64_t)N->getZExtValue() << 48) >> 48);
+}]>;
+def LL16 : SDNodeXForm<imm, [{ //lda part of constant
+ return getI64Imm(get_lda16(N->getZExtValue()));
+}]>;
+def LH16 : SDNodeXForm<imm, [{ //ldah part of constant (or more if too big)
+ return getI64Imm(get_ldah16(N->getZExtValue()));
+}]>;
+def iZAPX : SDNodeXForm<and, [{ // get imm to ZAPi
+ ConstantSDNode *RHS = cast<ConstantSDNode>(N->getOperand(1));
+ return getI64Imm(get_zapImm(SDValue(), RHS->getZExtValue()));
+}]>;
+def nearP2X : SDNodeXForm<imm, [{
+ return getI64Imm(Log2_64(getNearPower2((uint64_t)N->getZExtValue())));
+}]>;
+def nearP2RemX : SDNodeXForm<imm, [{
+ uint64_t x =
+ abs64(N->getZExtValue() - getNearPower2((uint64_t)N->getZExtValue()));
+ return getI64Imm(Log2_64(x));
+}]>;
+
+def immUExt8 : PatLeaf<(imm), [{ //imm fits in 8 bit zero extended field
+ return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue();
+}]>;
+def immUExt8inv : PatLeaf<(imm), [{ //inverted imm fits in 8 bit zero extended field
+ return (uint64_t)~N->getZExtValue() == (uint8_t)~N->getZExtValue();
+}], invX>;
+def immUExt8neg : PatLeaf<(imm), [{ //negated imm fits in 8 bit zero extended field
+ return ((uint64_t)~N->getZExtValue() + 1) ==
+ (uint8_t)((uint64_t)~N->getZExtValue() + 1);
+}], negX>;
+def immSExt16 : PatLeaf<(imm), [{ //imm fits in 16 bit sign extended field
+ return ((int64_t)N->getZExtValue() << 48) >> 48 ==
+ (int64_t)N->getZExtValue();
+}]>;
+def immSExt16int : PatLeaf<(imm), [{ //(int)imm fits in a 16 bit sign extended field
+ return ((int64_t)N->getZExtValue() << 48) >> 48 ==
+ ((int64_t)N->getZExtValue() << 32) >> 32;
+}], SExt16>;
+
+def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm:$L), [{
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!RHS) return 0;
+ uint64_t build = get_zapImm(N->getOperand(0), (uint64_t)RHS->getZExtValue());
+ return build != 0;
+}]>;
+
+def immFPZ : PatLeaf<(fpimm), [{ //the only fpconstant nodes are +/- 0.0
+ (void)N; // silence warning.
+ return true;
+}]>;
+
+def immRem1 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,0);}]>;
+def immRem2 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,0);}]>;
+def immRem3 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,0);}]>;
+def immRem4 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,0);}]>;
+def immRem5 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,0);}]>;
+def immRem1n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,1);}]>;
+def immRem2n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,1);}]>;
+def immRem3n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,1);}]>;
+def immRem4n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,1);}]>;
+def immRem5n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,1);}]>;
+
+def immRemP2n : PatLeaf<(imm), [{
+ return isPowerOf2_64(getNearPower2((uint64_t)N->getZExtValue()) -
+ N->getZExtValue());
+}]>;
+def immRemP2 : PatLeaf<(imm), [{
+ return isPowerOf2_64(N->getZExtValue() -
+ getNearPower2((uint64_t)N->getZExtValue()));
+}]>;
+def immUExt8ME : PatLeaf<(imm), [{ //use this imm for mulqi
+ int64_t d = abs64((int64_t)N->getZExtValue() -
+ (int64_t)getNearPower2((uint64_t)N->getZExtValue()));
+ if (isPowerOf2_64(d)) return false;
+ switch (d) {
+ case 1: case 3: case 5: return false;
+ default: return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue();
+ };
+}]>;
+
+def intop : PatFrag<(ops node:$op), (sext_inreg node:$op, i32)>;
+def add4 : PatFrag<(ops node:$op1, node:$op2),
+ (add (shl node:$op1, 2), node:$op2)>;
+def sub4 : PatFrag<(ops node:$op1, node:$op2),
+ (sub (shl node:$op1, 2), node:$op2)>;
+def add8 : PatFrag<(ops node:$op1, node:$op2),
+ (add (shl node:$op1, 3), node:$op2)>;
+def sub8 : PatFrag<(ops node:$op1, node:$op2),
+ (sub (shl node:$op1, 3), node:$op2)>;
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class CmpOpFrag<dag res> : PatFrag<(ops node:$R), res>;
+
+//Pseudo ops for selection
+
+def WTF : PseudoInstAlpha<(outs), (ins variable_ops), "#wtf", [], s_pseudo>;
+
+let hasCtrlDep = 1, Defs = [R30], Uses = [R30] in {
+def ADJUSTSTACKUP : PseudoInstAlpha<(outs), (ins s64imm:$amt),
+ "; ADJUP $amt",
+ [(callseq_start timm:$amt)], s_pseudo>;
+def ADJUSTSTACKDOWN : PseudoInstAlpha<(outs), (ins s64imm:$amt1, s64imm:$amt2),
+ "; ADJDOWN $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)], s_pseudo>;
+}
+
+def ALTENT : PseudoInstAlpha<(outs), (ins s64imm:$TARGET), "$$$TARGET..ng:\n", [], s_pseudo>;
+def PCLABEL : PseudoInstAlpha<(outs), (ins s64imm:$num), "PCMARKER_$num:\n",[], s_pseudo>;
+def MEMLABEL : PseudoInstAlpha<(outs), (ins s64imm:$i, s64imm:$j, s64imm:$k, s64imm:$m),
+ "LSMARKER$$$i$$$j$$$k$$$m:", [], s_pseudo>;
+
+
+let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
+def CAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_cmp_swap_32 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>;
+def CAS64 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_cmp_swap_64 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>;
+
+def LAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_load_add_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+def LAS64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_load_add_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+
+def SWAP32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_swap_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+def SWAP64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_swap_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+}
+
+//***********************
+//Real instructions
+//***********************
+
+//Operation Form:
+
+//conditional moves, int
+
+multiclass cmov_inst<bits<7> fun, string asmstr, PatFrag OpNode> {
+def r : OForm4<0x11, fun, !strconcat(asmstr, " $RCOND,$RTRUE,$RDEST"),
+ [(set GPRC:$RDEST, (select (OpNode GPRC:$RCOND), GPRC:$RTRUE, GPRC:$RFALSE))], s_cmov>;
+def i : OForm4L<0x11, fun, !strconcat(asmstr, " $RCOND,$RTRUE,$RDEST"),
+ [(set GPRC:$RDEST, (select (OpNode GPRC:$RCOND), immUExt8:$RTRUE, GPRC:$RFALSE))], s_cmov>;
+}
+
+defm CMOVEQ : cmov_inst<0x24, "cmoveq", CmpOpFrag<(seteq node:$R, 0)>>;
+defm CMOVNE : cmov_inst<0x26, "cmovne", CmpOpFrag<(setne node:$R, 0)>>;
+defm CMOVLT : cmov_inst<0x44, "cmovlt", CmpOpFrag<(setlt node:$R, 0)>>;
+defm CMOVLE : cmov_inst<0x64, "cmovle", CmpOpFrag<(setle node:$R, 0)>>;
+defm CMOVGT : cmov_inst<0x66, "cmovgt", CmpOpFrag<(setgt node:$R, 0)>>;
+defm CMOVGE : cmov_inst<0x46, "cmovge", CmpOpFrag<(setge node:$R, 0)>>;
+defm CMOVLBC : cmov_inst<0x16, "cmovlbc", CmpOpFrag<(xor node:$R, 1)>>;
+defm CMOVLBS : cmov_inst<0x14, "cmovlbs", CmpOpFrag<(and node:$R, 1)>>;
+
+//General pattern for cmov
+def : Pat<(select GPRC:$which, GPRC:$src1, GPRC:$src2),
+ (CMOVNEr GPRC:$src2, GPRC:$src1, GPRC:$which)>;
+def : Pat<(select GPRC:$which, GPRC:$src1, immUExt8:$src2),
+ (CMOVEQi GPRC:$src1, immUExt8:$src2, GPRC:$which)>;
+
+//Invert sense when we can for constants:
+def : Pat<(select (setne GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVEQi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setgt GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVLEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setge GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVLTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setlt GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVGEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setle GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVGTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+
+multiclass all_inst<bits<6> opc, bits<7> funl, bits<7> funq,
+ string asmstr, PatFrag OpNode, InstrItinClass itin> {
+ def Lr : OForm< opc, funl, !strconcat(asmstr, "l $RA,$RB,$RC"),
+ [(set GPRC:$RC, (intop (OpNode GPRC:$RA, GPRC:$RB)))], itin>;
+ def Li : OFormL<opc, funl, !strconcat(asmstr, "l $RA,$L,$RC"),
+ [(set GPRC:$RC, (intop (OpNode GPRC:$RA, immUExt8:$L)))], itin>;
+ def Qr : OForm< opc, funq, !strconcat(asmstr, "q $RA,$RB,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))], itin>;
+ def Qi : OFormL<opc, funq, !strconcat(asmstr, "q $RA,$L,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8:$L))], itin>;
+}
+
+defm MUL : all_inst<0x13, 0x00, 0x20, "mul", BinOpFrag<(mul node:$LHS, node:$RHS)>, s_imul>;
+defm ADD : all_inst<0x10, 0x00, 0x20, "add", BinOpFrag<(add node:$LHS, node:$RHS)>, s_iadd>;
+defm S4ADD : all_inst<0x10, 0x02, 0x22, "s4add", add4, s_iadd>;
+defm S8ADD : all_inst<0x10, 0x12, 0x32, "s8add", add8, s_iadd>;
+defm S4SUB : all_inst<0x10, 0x0B, 0x2B, "s4sub", sub4, s_iadd>;
+defm S8SUB : all_inst<0x10, 0x1B, 0x3B, "s8sub", sub8, s_iadd>;
+defm SUB : all_inst<0x10, 0x09, 0x29, "sub", BinOpFrag<(sub node:$LHS, node:$RHS)>, s_iadd>;
+//Const cases since legalize does sub x, int -> add x, inv(int) + 1
+def : Pat<(intop (add GPRC:$RA, immUExt8neg:$L)), (SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add GPRC:$RA, immUExt8neg:$L), (SUBQi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(intop (add4 GPRC:$RA, immUExt8neg:$L)), (S4SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add4 GPRC:$RA, immUExt8neg:$L), (S4SUBQi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(intop (add8 GPRC:$RA, immUExt8neg:$L)), (S8SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add8 GPRC:$RA, immUExt8neg:$L), (S8SUBQi GPRC:$RA, immUExt8neg:$L)>;
+
+multiclass log_inst<bits<6> opc, bits<7> fun, string asmstr, SDNode OpNode, InstrItinClass itin> {
+def r : OForm<opc, fun, !strconcat(asmstr, " $RA,$RB,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))], itin>;
+def i : OFormL<opc, fun, !strconcat(asmstr, " $RA,$L,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8:$L))], itin>;
+}
+multiclass inv_inst<bits<6> opc, bits<7> fun, string asmstr, SDNode OpNode, InstrItinClass itin> {
+def r : OForm<opc, fun, !strconcat(asmstr, " $RA,$RB,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, (not GPRC:$RB)))], itin>;
+def i : OFormL<opc, fun, !strconcat(asmstr, " $RA,$L,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8inv:$L))], itin>;
+}
+
+defm AND : log_inst<0x11, 0x00, "and", and, s_ilog>;
+defm BIC : inv_inst<0x11, 0x08, "bic", and, s_ilog>;
+defm BIS : log_inst<0x11, 0x20, "bis", or, s_ilog>;
+defm ORNOT : inv_inst<0x11, 0x28, "ornot", or, s_ilog>;
+defm XOR : log_inst<0x11, 0x40, "xor", xor, s_ilog>;
+defm EQV : inv_inst<0x11, 0x48, "eqv", xor, s_ilog>;
+
+defm SL : log_inst<0x12, 0x39, "sll", shl, s_ishf>;
+defm SRA : log_inst<0x12, 0x3c, "sra", sra, s_ishf>;
+defm SRL : log_inst<0x12, 0x34, "srl", srl, s_ishf>;
+defm UMULH : log_inst<0x13, 0x30, "umulh", mulhu, s_imul>;
+
+def CTLZ : OForm2<0x1C, 0x32, "CTLZ $RB,$RC",
+ [(set GPRC:$RC, (ctlz GPRC:$RB))], s_imisc>;
+def CTPOP : OForm2<0x1C, 0x30, "CTPOP $RB,$RC",
+ [(set GPRC:$RC, (ctpop GPRC:$RB))], s_imisc>;
+def CTTZ : OForm2<0x1C, 0x33, "CTTZ $RB,$RC",
+ [(set GPRC:$RC, (cttz GPRC:$RB))], s_imisc>;
+def EXTBL : OForm< 0x12, 0x06, "EXTBL $RA,$RB,$RC",
+ [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 255))], s_ishf>;
+def EXTWL : OForm< 0x12, 0x16, "EXTWL $RA,$RB,$RC",
+ [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 65535))], s_ishf>;
+def EXTLL : OForm< 0x12, 0x26, "EXTLL $RA,$RB,$RC",
+ [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 4294967295))], s_ishf>;
+def SEXTB : OForm2<0x1C, 0x00, "sextb $RB,$RC",
+ [(set GPRC:$RC, (sext_inreg GPRC:$RB, i8))], s_ishf>;
+def SEXTW : OForm2<0x1C, 0x01, "sextw $RB,$RC",
+ [(set GPRC:$RC, (sext_inreg GPRC:$RB, i16))], s_ishf>;
+
+//def EXTBLi : OFormL<0x12, 0x06, "EXTBL $RA,$L,$RC", []>; //Extract byte low
+//def EXTLH : OForm< 0x12, 0x6A, "EXTLH $RA,$RB,$RC", []>; //Extract longword high
+//def EXTLHi : OFormL<0x12, 0x6A, "EXTLH $RA,$L,$RC", []>; //Extract longword high
+//def EXTLLi : OFormL<0x12, 0x26, "EXTLL $RA,$L,$RC", []>; //Extract longword low
+//def EXTQH : OForm< 0x12, 0x7A, "EXTQH $RA,$RB,$RC", []>; //Extract quadword high
+//def EXTQHi : OFormL<0x12, 0x7A, "EXTQH $RA,$L,$RC", []>; //Extract quadword high
+//def EXTQ : OForm< 0x12, 0x36, "EXTQ $RA,$RB,$RC", []>; //Extract quadword low
+//def EXTQi : OFormL<0x12, 0x36, "EXTQ $RA,$L,$RC", []>; //Extract quadword low
+//def EXTWH : OForm< 0x12, 0x5A, "EXTWH $RA,$RB,$RC", []>; //Extract word high
+//def EXTWHi : OFormL<0x12, 0x5A, "EXTWH $RA,$L,$RC", []>; //Extract word high
+//def EXTWLi : OFormL<0x12, 0x16, "EXTWL $RA,$L,$RC", []>; //Extract word low
+
+//def INSBL : OForm< 0x12, 0x0B, "INSBL $RA,$RB,$RC", []>; //Insert byte low
+//def INSBLi : OFormL<0x12, 0x0B, "INSBL $RA,$L,$RC", []>; //Insert byte low
+//def INSLH : OForm< 0x12, 0x67, "INSLH $RA,$RB,$RC", []>; //Insert longword high
+//def INSLHi : OFormL<0x12, 0x67, "INSLH $RA,$L,$RC", []>; //Insert longword high
+//def INSLL : OForm< 0x12, 0x2B, "INSLL $RA,$RB,$RC", []>; //Insert longword low
+//def INSLLi : OFormL<0x12, 0x2B, "INSLL $RA,$L,$RC", []>; //Insert longword low
+//def INSQH : OForm< 0x12, 0x77, "INSQH $RA,$RB,$RC", []>; //Insert quadword high
+//def INSQHi : OFormL<0x12, 0x77, "INSQH $RA,$L,$RC", []>; //Insert quadword high
+//def INSQL : OForm< 0x12, 0x3B, "INSQL $RA,$RB,$RC", []>; //Insert quadword low
+//def INSQLi : OFormL<0x12, 0x3B, "INSQL $RA,$L,$RC", []>; //Insert quadword low
+//def INSWH : OForm< 0x12, 0x57, "INSWH $RA,$RB,$RC", []>; //Insert word high
+//def INSWHi : OFormL<0x12, 0x57, "INSWH $RA,$L,$RC", []>; //Insert word high
+//def INSWL : OForm< 0x12, 0x1B, "INSWL $RA,$RB,$RC", []>; //Insert word low
+//def INSWLi : OFormL<0x12, 0x1B, "INSWL $RA,$L,$RC", []>; //Insert word low
+
+//def MSKBL : OForm< 0x12, 0x02, "MSKBL $RA,$RB,$RC", []>; //Mask byte low
+//def MSKBLi : OFormL<0x12, 0x02, "MSKBL $RA,$L,$RC", []>; //Mask byte low
+//def MSKLH : OForm< 0x12, 0x62, "MSKLH $RA,$RB,$RC", []>; //Mask longword high
+//def MSKLHi : OFormL<0x12, 0x62, "MSKLH $RA,$L,$RC", []>; //Mask longword high
+//def MSKLL : OForm< 0x12, 0x22, "MSKLL $RA,$RB,$RC", []>; //Mask longword low
+//def MSKLLi : OFormL<0x12, 0x22, "MSKLL $RA,$L,$RC", []>; //Mask longword low
+//def MSKQH : OForm< 0x12, 0x72, "MSKQH $RA,$RB,$RC", []>; //Mask quadword high
+//def MSKQHi : OFormL<0x12, 0x72, "MSKQH $RA,$L,$RC", []>; //Mask quadword high
+//def MSKQL : OForm< 0x12, 0x32, "MSKQL $RA,$RB,$RC", []>; //Mask quadword low
+//def MSKQLi : OFormL<0x12, 0x32, "MSKQL $RA,$L,$RC", []>; //Mask quadword low
+//def MSKWH : OForm< 0x12, 0x52, "MSKWH $RA,$RB,$RC", []>; //Mask word high
+//def MSKWHi : OFormL<0x12, 0x52, "MSKWH $RA,$L,$RC", []>; //Mask word high
+//def MSKWL : OForm< 0x12, 0x12, "MSKWL $RA,$RB,$RC", []>; //Mask word low
+//def MSKWLi : OFormL<0x12, 0x12, "MSKWL $RA,$L,$RC", []>; //Mask word low
+
+def ZAPNOTi : OFormL<0x12, 0x31, "zapnot $RA,$L,$RC", [], s_ishf>;
+
+// Define the pattern that produces ZAPNOTi.
+def : Pat<(zappat:$imm GPRC:$RA),
+ (ZAPNOTi GPRC:$RA, (iZAPX GPRC:$imm))>;
+
+
+//Comparison, int
+//So this is a waste of what this instruction can do, but it still saves something
+def CMPBGE : OForm< 0x10, 0x0F, "cmpbge $RA,$RB,$RC",
+ [(set GPRC:$RC, (setuge (and GPRC:$RA, 255), (and GPRC:$RB, 255)))], s_ilog>;
+def CMPBGEi : OFormL<0x10, 0x0F, "cmpbge $RA,$L,$RC",
+ [(set GPRC:$RC, (setuge (and GPRC:$RA, 255), immUExt8:$L))], s_ilog>;
+def CMPEQ : OForm< 0x10, 0x2D, "cmpeq $RA,$RB,$RC",
+ [(set GPRC:$RC, (seteq GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPEQi : OFormL<0x10, 0x2D, "cmpeq $RA,$L,$RC",
+ [(set GPRC:$RC, (seteq GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPLE : OForm< 0x10, 0x6D, "cmple $RA,$RB,$RC",
+ [(set GPRC:$RC, (setle GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPLEi : OFormL<0x10, 0x6D, "cmple $RA,$L,$RC",
+ [(set GPRC:$RC, (setle GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPLT : OForm< 0x10, 0x4D, "cmplt $RA,$RB,$RC",
+ [(set GPRC:$RC, (setlt GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPLTi : OFormL<0x10, 0x4D, "cmplt $RA,$L,$RC",
+ [(set GPRC:$RC, (setlt GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPULE : OForm< 0x10, 0x3D, "cmpule $RA,$RB,$RC",
+ [(set GPRC:$RC, (setule GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPULEi : OFormL<0x10, 0x3D, "cmpule $RA,$L,$RC",
+ [(set GPRC:$RC, (setule GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPULT : OForm< 0x10, 0x1D, "cmpult $RA,$RB,$RC",
+ [(set GPRC:$RC, (setult GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPULTi : OFormL<0x10, 0x1D, "cmpult $RA,$L,$RC",
+ [(set GPRC:$RC, (setult GPRC:$RA, immUExt8:$L))], s_iadd>;
+
+//Patterns for unsupported int comparisons
+def : Pat<(setueq GPRC:$X, GPRC:$Y), (CMPEQ GPRC:$X, GPRC:$Y)>;
+def : Pat<(setueq GPRC:$X, immUExt8:$Y), (CMPEQi GPRC:$X, immUExt8:$Y)>;
+
+def : Pat<(setugt GPRC:$X, GPRC:$Y), (CMPULT GPRC:$Y, GPRC:$X)>;
+def : Pat<(setugt immUExt8:$X, GPRC:$Y), (CMPULTi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setuge GPRC:$X, GPRC:$Y), (CMPULE GPRC:$Y, GPRC:$X)>;
+def : Pat<(setuge immUExt8:$X, GPRC:$Y), (CMPULEi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setgt GPRC:$X, GPRC:$Y), (CMPLT GPRC:$Y, GPRC:$X)>;
+def : Pat<(setgt immUExt8:$X, GPRC:$Y), (CMPLTi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setge GPRC:$X, GPRC:$Y), (CMPLE GPRC:$Y, GPRC:$X)>;
+def : Pat<(setge immUExt8:$X, GPRC:$Y), (CMPLEi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setne GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>;
+def : Pat<(setne GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQi GPRC:$X, immUExt8:$Y), 0)>;
+
+def : Pat<(setune GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>;
+def : Pat<(setune GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQ GPRC:$X, immUExt8:$Y), 0)>;
+
+
+let isReturn = 1, isTerminator = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in {
+ def RETDAG : MbrForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", s_jsr>; //Return from subroutine
+ def RETDAGp : MbrpForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1, Ra = 31, disp = 0 in
+def JMP : MbrpForm< 0x1A, 0x00, (ops GPRC:$RS), "jmp $$31,($RS),0",
+ [(brind GPRC:$RS)], s_jsr>; //Jump
+
+let isCall = 1, Ra = 26,
+ Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19,
+ R20, R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ F0, F1,
+ F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+ F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R29] in {
+ def BSR : BFormD<0x34, "bsr $$26,$$$DISP..ng", [], s_jsr>; //Branch to subroutine
+}
+let isCall = 1, Ra = 26, Rb = 27, disp = 0,
+ Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19,
+ R20, R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ F0, F1,
+ F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+ F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R27, R29] in {
+ def JSR : MbrForm< 0x1A, 0x01, (ops ), "jsr $$26,($$27),0", s_jsr>; //Jump to subroutine
+}
+
+let isCall = 1, Ra = 23, Rb = 27, disp = 0,
+ Defs = [R23, R24, R25, R27, R28], Uses = [R24, R25, R27] in
+ def JSRs : MbrForm< 0x1A, 0x01, (ops ), "jsr $$23,($$27),0", s_jsr>; //Jump to div or rem
+
+
+def JSR_COROUTINE : MbrForm< 0x1A, 0x03, (ops GPRC:$RD, GPRC:$RS, s14imm:$DISP), "jsr_coroutine $RD,($RS),$DISP", s_jsr>; //Jump to subroutine return
+
+
+let OutOperandList = (ops GPRC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in {
+def LDQ : MForm<0x29, 1, "ldq $RA,$DISP($RB)",
+ [(set GPRC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDQr : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+def LDL : MForm<0x28, 1, "ldl $RA,$DISP($RB)",
+ [(set GPRC:$RA, (sextloadi32 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDLr : MForm<0x28, 1, "ldl $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (sextloadi32 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+def LDBU : MForm<0x0A, 1, "ldbu $RA,$DISP($RB)",
+ [(set GPRC:$RA, (zextloadi8 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDBUr : MForm<0x0A, 1, "ldbu $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (zextloadi8 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+def LDWU : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)",
+ [(set GPRC:$RA, (zextloadi16 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDWUr : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (zextloadi16 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+}
+
+
+let OutOperandList = (ops), InOperandList = (ops GPRC:$RA, s64imm:$DISP, GPRC:$RB) in {
+def STB : MForm<0x0E, 0, "stb $RA,$DISP($RB)",
+ [(truncstorei8 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STBr : MForm<0x0E, 0, "stb $RA,$DISP($RB)\t\t!gprellow",
+ [(truncstorei8 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+def STW : MForm<0x0D, 0, "stw $RA,$DISP($RB)",
+ [(truncstorei16 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STWr : MForm<0x0D, 0, "stw $RA,$DISP($RB)\t\t!gprellow",
+ [(truncstorei16 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+def STL : MForm<0x2C, 0, "stl $RA,$DISP($RB)",
+ [(truncstorei32 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STLr : MForm<0x2C, 0, "stl $RA,$DISP($RB)\t\t!gprellow",
+ [(truncstorei32 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+def STQ : MForm<0x2D, 0, "stq $RA,$DISP($RB)",
+ [(store GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STQr : MForm<0x2D, 0, "stq $RA,$DISP($RB)\t\t!gprellow",
+ [(store GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+}
+
+//Load address
+let OutOperandList = (ops GPRC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in {
+def LDA : MForm<0x08, 0, "lda $RA,$DISP($RB)",
+ [(set GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_lda>;
+def LDAr : MForm<0x08, 0, "lda $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_lda>; //Load address
+def LDAH : MForm<0x09, 0, "ldah $RA,$DISP($RB)",
+ [], s_lda>; //Load address high
+def LDAHr : MForm<0x09, 0, "ldah $RA,$DISP($RB)\t\t!gprelhigh",
+ [(set GPRC:$RA, (Alpha_gprelhi tglobaladdr:$DISP, GPRC:$RB))], s_lda>; //Load address high
+}
+
+let OutOperandList = (ops), InOperandList = (ops F4RC:$RA, s64imm:$DISP, GPRC:$RB) in {
+def STS : MForm<0x26, 0, "sts $RA,$DISP($RB)",
+ [(store F4RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>;
+def STSr : MForm<0x26, 0, "sts $RA,$DISP($RB)\t\t!gprellow",
+ [(store F4RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>;
+}
+let OutOperandList = (ops F4RC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in {
+def LDS : MForm<0x22, 1, "lds $RA,$DISP($RB)",
+ [(set F4RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>;
+def LDSr : MForm<0x22, 1, "lds $RA,$DISP($RB)\t\t!gprellow",
+ [(set F4RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>;
+}
+let OutOperandList = (ops), InOperandList = (ops F8RC:$RA, s64imm:$DISP, GPRC:$RB) in {
+def STT : MForm<0x27, 0, "stt $RA,$DISP($RB)",
+ [(store F8RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>;
+def STTr : MForm<0x27, 0, "stt $RA,$DISP($RB)\t\t!gprellow",
+ [(store F8RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>;
+}
+let OutOperandList = (ops F8RC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in {
+def LDT : MForm<0x23, 1, "ldt $RA,$DISP($RB)",
+ [(set F8RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>;
+def LDTr : MForm<0x23, 1, "ldt $RA,$DISP($RB)\t\t!gprellow",
+ [(set F8RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>;
+}
+
+
+//constpool rels
+def : Pat<(i64 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDQr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (sextloadi32 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDLr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (zextloadi8 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDBUr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (zextloadi16 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDWUr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (Alpha_gprello tconstpool:$DISP, GPRC:$RB)),
+ (LDAr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (Alpha_gprelhi tconstpool:$DISP, GPRC:$RB)),
+ (LDAHr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(f32 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDSr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(f64 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDTr tconstpool:$DISP, GPRC:$RB)>;
+
+//jumptable rels
+def : Pat<(i64 (Alpha_gprelhi tjumptable:$DISP, GPRC:$RB)),
+ (LDAHr tjumptable:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (Alpha_gprello tjumptable:$DISP, GPRC:$RB)),
+ (LDAr tjumptable:$DISP, GPRC:$RB)>;
+
+
+//misc ext patterns
+def : Pat<(i64 (extloadi8 (add GPRC:$RB, immSExt16:$DISP))),
+ (LDBU immSExt16:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (extloadi16 (add GPRC:$RB, immSExt16:$DISP))),
+ (LDWU immSExt16:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (extloadi32 (add GPRC:$RB, immSExt16:$DISP))),
+ (LDL immSExt16:$DISP, GPRC:$RB)>;
+
+//0 disp patterns
+def : Pat<(i64 (load GPRC:$addr)),
+ (LDQ 0, GPRC:$addr)>;
+def : Pat<(f64 (load GPRC:$addr)),
+ (LDT 0, GPRC:$addr)>;
+def : Pat<(f32 (load GPRC:$addr)),
+ (LDS 0, GPRC:$addr)>;
+def : Pat<(i64 (sextloadi32 GPRC:$addr)),
+ (LDL 0, GPRC:$addr)>;
+def : Pat<(i64 (zextloadi16 GPRC:$addr)),
+ (LDWU 0, GPRC:$addr)>;
+def : Pat<(i64 (zextloadi8 GPRC:$addr)),
+ (LDBU 0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi8 GPRC:$addr)),
+ (LDBU 0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi16 GPRC:$addr)),
+ (LDWU 0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi32 GPRC:$addr)),
+ (LDL 0, GPRC:$addr)>;
+
+def : Pat<(store GPRC:$DATA, GPRC:$addr),
+ (STQ GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(store F8RC:$DATA, GPRC:$addr),
+ (STT F8RC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(store F4RC:$DATA, GPRC:$addr),
+ (STS F4RC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei32 GPRC:$DATA, GPRC:$addr),
+ (STL GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei16 GPRC:$DATA, GPRC:$addr),
+ (STW GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei8 GPRC:$DATA, GPRC:$addr),
+ (STB GPRC:$DATA, 0, GPRC:$addr)>;
+
+
+//load address, rellocated gpdist form
+let OutOperandList = (ops GPRC:$RA),
+ InOperandList = (ops s16imm:$DISP, GPRC:$RB, s16imm:$NUM),
+ mayLoad = 1 in {
+def LDAg : MForm<0x08, 1, "lda $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>; //Load address
+def LDAHg : MForm<0x09, 1, "ldah $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>; //Load address
+}
+
+//Load quad, rellocated literal form
+let OutOperandList = (ops GPRC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in
+def LDQl : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!literal",
+ [(set GPRC:$RA, (Alpha_rellit tglobaladdr:$DISP, GPRC:$RB))], s_ild>;
+def : Pat<(Alpha_rellit texternalsym:$ext, GPRC:$RB),
+ (LDQl texternalsym:$ext, GPRC:$RB)>;
+
+let OutOperandList = (outs GPRC:$RR),
+ InOperandList = (ins GPRC:$RA, s64imm:$DISP, GPRC:$RB),
+ Constraints = "$RA = $RR",
+ DisableEncoding = "$RR" in {
+def STQ_C : MForm<0x2F, 0, "stq_l $RA,$DISP($RB)", [], s_ist>;
+def STL_C : MForm<0x2E, 0, "stl_l $RA,$DISP($RB)", [], s_ist>;
+}
+let OutOperandList = (ops GPRC:$RA),
+ InOperandList = (ops s64imm:$DISP, GPRC:$RB),
+ mayLoad = 1 in {
+def LDQ_L : MForm<0x2B, 1, "ldq_l $RA,$DISP($RB)", [], s_ild>;
+def LDL_L : MForm<0x2A, 1, "ldl_l $RA,$DISP($RB)", [], s_ild>;
+}
+
+def RPCC : MfcForm<0x18, 0xC000, "rpcc $RA", s_rpcc>; //Read process cycle counter
+def MB : MfcPForm<0x18, 0x4000, "mb", s_imisc>; //memory barrier
+def WMB : MfcPForm<0x18, 0x4400, "wmb", s_imisc>; //write memory barrier
+
+def : Pat<(membarrier (i64 imm:$ll), (i64 imm:$ls), (i64 imm:$sl), (i64 1), (i64 imm:$dev)),
+ (WMB)>;
+def : Pat<(membarrier (i64 imm:$ll), (i64 imm:$ls), (i64 imm:$sl), (i64 imm:$ss), (i64 imm:$dev)),
+ (MB)>;
+
+//Basic Floating point ops
+
+//Floats
+
+let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F4RC:$RB), Fa = 31 in
+def SQRTS : FPForm<0x14, 0x58B, "sqrts/su $RB,$RC",
+ [(set F4RC:$RC, (fsqrt F4RC:$RB))], s_fsqrts>;
+
+let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F4RC:$RA, F4RC:$RB) in {
+def ADDS : FPForm<0x16, 0x580, "adds/su $RA,$RB,$RC",
+ [(set F4RC:$RC, (fadd F4RC:$RA, F4RC:$RB))], s_fadd>;
+def SUBS : FPForm<0x16, 0x581, "subs/su $RA,$RB,$RC",
+ [(set F4RC:$RC, (fsub F4RC:$RA, F4RC:$RB))], s_fadd>;
+def DIVS : FPForm<0x16, 0x583, "divs/su $RA,$RB,$RC",
+ [(set F4RC:$RC, (fdiv F4RC:$RA, F4RC:$RB))], s_fdivs>;
+def MULS : FPForm<0x16, 0x582, "muls/su $RA,$RB,$RC",
+ [(set F4RC:$RC, (fmul F4RC:$RA, F4RC:$RB))], s_fmul>;
+
+def CPYSS : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+ [(set F4RC:$RC, (fcopysign F4RC:$RB, F4RC:$RA))], s_fadd>;
+def CPYSES : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
+def CPYSNS : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+ [(set F4RC:$RC, (fneg (fcopysign F4RC:$RB, F4RC:$RA)))], s_fadd>;
+}
+
+//Doubles
+
+let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in
+def SQRTT : FPForm<0x14, 0x5AB, "sqrtt/su $RB,$RC",
+ [(set F8RC:$RC, (fsqrt F8RC:$RB))], s_fsqrtt>;
+
+let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RA, F8RC:$RB) in {
+def ADDT : FPForm<0x16, 0x5A0, "addt/su $RA,$RB,$RC",
+ [(set F8RC:$RC, (fadd F8RC:$RA, F8RC:$RB))], s_fadd>;
+def SUBT : FPForm<0x16, 0x5A1, "subt/su $RA,$RB,$RC",
+ [(set F8RC:$RC, (fsub F8RC:$RA, F8RC:$RB))], s_fadd>;
+def DIVT : FPForm<0x16, 0x5A3, "divt/su $RA,$RB,$RC",
+ [(set F8RC:$RC, (fdiv F8RC:$RA, F8RC:$RB))], s_fdivt>;
+def MULT : FPForm<0x16, 0x5A2, "mult/su $RA,$RB,$RC",
+ [(set F8RC:$RC, (fmul F8RC:$RA, F8RC:$RB))], s_fmul>;
+
+def CPYST : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+ [(set F8RC:$RC, (fcopysign F8RC:$RB, F8RC:$RA))], s_fadd>;
+def CPYSET : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
+def CPYSNT : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+ [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F8RC:$RA)))], s_fadd>;
+
+def CMPTEQ : FPForm<0x16, 0x5A5, "cmpteq/su $RA,$RB,$RC", [], s_fadd>;
+// [(set F8RC:$RC, (seteq F8RC:$RA, F8RC:$RB))]>;
+def CMPTLE : FPForm<0x16, 0x5A7, "cmptle/su $RA,$RB,$RC", [], s_fadd>;
+// [(set F8RC:$RC, (setle F8RC:$RA, F8RC:$RB))]>;
+def CMPTLT : FPForm<0x16, 0x5A6, "cmptlt/su $RA,$RB,$RC", [], s_fadd>;
+// [(set F8RC:$RC, (setlt F8RC:$RA, F8RC:$RB))]>;
+def CMPTUN : FPForm<0x16, 0x5A4, "cmptun/su $RA,$RB,$RC", [], s_fadd>;
+// [(set F8RC:$RC, (setuo F8RC:$RA, F8RC:$RB))]>;
+}
+
+//More CPYS forms:
+let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F4RC:$RA, F8RC:$RB) in {
+def CPYSTs : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+ [(set F8RC:$RC, (fcopysign F8RC:$RB, F4RC:$RA))], s_fadd>;
+def CPYSNTs : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+ [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F4RC:$RA)))], s_fadd>;
+}
+let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F8RC:$RA, F4RC:$RB) in {
+def CPYSSt : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+ [(set F4RC:$RC, (fcopysign F4RC:$RB, F8RC:$RA))], s_fadd>;
+def CPYSESt : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
+def CPYSNSt : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+ [(set F4RC:$RC, (fneg (fcopysign F4RC:$RB, F8RC:$RA)))], s_fadd>;
+}
+
+//conditional moves, floats
+let OutOperandList = (ops F4RC:$RDEST), InOperandList = (ops F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND),
+ isTwoAddress = 1 in {
+def FCMOVEQS : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if = zero
+def FCMOVGES : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if >= zero
+def FCMOVGTS : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if > zero
+def FCMOVLES : FPForm<0x17, 0x02E, "fcmovle $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if <= zero
+def FCMOVLTS : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST",[], s_fcmov>; // FCMOVE if < zero
+def FCMOVNES : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if != zero
+}
+//conditional moves, doubles
+let OutOperandList = (ops F8RC:$RDEST), InOperandList = (ops F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND),
+ isTwoAddress = 1 in {
+def FCMOVEQT : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVGET : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVGTT : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVLET : FPForm<0x17, 0x02E, "fcmovle $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVLTT : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVNET : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+}
+
+//misc FP selects
+//Select double
+
+def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setueq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setne F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setone F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setune F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setgt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setogt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setugt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setoge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setuge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setlt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setolt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setult F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setle F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setole F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+
+//Select single
+def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setueq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setne F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setone F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setune F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setgt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setogt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setugt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setoge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setuge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setlt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setolt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setult F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setle F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setole F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+
+
+
+let OutOperandList = (ops GPRC:$RC), InOperandList = (ops F4RC:$RA), Fb = 31 in
+def FTOIS : FPForm<0x1C, 0x078, "ftois $RA,$RC",[], s_ftoi>; //Floating to integer move, S_floating
+let OutOperandList = (ops GPRC:$RC), InOperandList = (ops F8RC:$RA), Fb = 31 in
+def FTOIT : FPForm<0x1C, 0x070, "ftoit $RA,$RC",
+ [(set GPRC:$RC, (bitconvert F8RC:$RA))], s_ftoi>; //Floating to integer move
+let OutOperandList = (ops F4RC:$RC), InOperandList = (ops GPRC:$RA), Fb = 31 in
+def ITOFS : FPForm<0x14, 0x004, "itofs $RA,$RC",[], s_itof>; //Integer to floating move, S_floating
+let OutOperandList = (ops F8RC:$RC), InOperandList = (ops GPRC:$RA), Fb = 31 in
+def ITOFT : FPForm<0x14, 0x024, "itoft $RA,$RC",
+ [(set F8RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move
+
+
+let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in
+def CVTQS : FPForm<0x16, 0x7BC, "cvtqs/sui $RB,$RC",
+ [(set F4RC:$RC, (Alpha_cvtqs F8RC:$RB))], s_fadd>;
+let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in
+def CVTQT : FPForm<0x16, 0x7BE, "cvtqt/sui $RB,$RC",
+ [(set F8RC:$RC, (Alpha_cvtqt F8RC:$RB))], s_fadd>;
+let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in
+def CVTTQ : FPForm<0x16, 0x52F, "cvttq/svc $RB,$RC",
+ [(set F8RC:$RC, (Alpha_cvttq F8RC:$RB))], s_fadd>;
+let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F4RC:$RB), Fa = 31 in
+def CVTST : FPForm<0x16, 0x6AC, "cvtst/s $RB,$RC",
+ [(set F8RC:$RC, (fextend F4RC:$RB))], s_fadd>;
+let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in
+def CVTTS : FPForm<0x16, 0x7AC, "cvtts/sui $RB,$RC",
+ [(set F4RC:$RC, (fround F8RC:$RB))], s_fadd>;
+
+
+/////////////////////////////////////////////////////////
+//Branching
+/////////////////////////////////////////////////////////
+class br_icc<bits<6> opc, string asmstr>
+ : BFormN<opc, (ops u64imm:$opc, GPRC:$R, target:$dst),
+ !strconcat(asmstr, " $R,$dst"), s_icbr>;
+class br_fcc<bits<6> opc, string asmstr>
+ : BFormN<opc, (ops u64imm:$opc, F8RC:$R, target:$dst),
+ !strconcat(asmstr, " $R,$dst"), s_fbr>;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+let Ra = 31 in
+def BR : BFormD<0x30, "br $$31,$DISP", [(br bb:$DISP)], s_ubr>;
+
+def COND_BRANCH_I : BFormN<0, (ops u64imm:$opc, GPRC:$R, target:$dst),
+ "{:comment} COND_BRANCH imm:$opc, GPRC:$R, bb:$dst",
+ s_icbr>;
+def COND_BRANCH_F : BFormN<0, (ops u64imm:$opc, F8RC:$R, target:$dst),
+ "{:comment} COND_BRANCH imm:$opc, F8RC:$R, bb:$dst",
+ s_fbr>;
+//Branches, int
+def BEQ : br_icc<0x39, "beq">;
+def BGE : br_icc<0x3E, "bge">;
+def BGT : br_icc<0x3F, "bgt">;
+def BLBC : br_icc<0x38, "blbc">;
+def BLBS : br_icc<0x3C, "blbs">;
+def BLE : br_icc<0x3B, "ble">;
+def BLT : br_icc<0x3A, "blt">;
+def BNE : br_icc<0x3D, "bne">;
+
+//Branches, float
+def FBEQ : br_fcc<0x31, "fbeq">;
+def FBGE : br_fcc<0x36, "fbge">;
+def FBGT : br_fcc<0x37, "fbgt">;
+def FBLE : br_fcc<0x33, "fble">;
+def FBLT : br_fcc<0x32, "fblt">;
+def FBNE : br_fcc<0x36, "fbne">;
+}
+
+//An ugly trick to get the opcode as an imm I can use
+def immBRCond : SDNodeXForm<imm, [{
+ switch((uint64_t)N->getZExtValue()) {
+ default: assert(0 && "Unknown branch type");
+ case 0: return getI64Imm(Alpha::BEQ);
+ case 1: return getI64Imm(Alpha::BNE);
+ case 2: return getI64Imm(Alpha::BGE);
+ case 3: return getI64Imm(Alpha::BGT);
+ case 4: return getI64Imm(Alpha::BLE);
+ case 5: return getI64Imm(Alpha::BLT);
+ case 6: return getI64Imm(Alpha::BLBS);
+ case 7: return getI64Imm(Alpha::BLBC);
+ case 20: return getI64Imm(Alpha::FBEQ);
+ case 21: return getI64Imm(Alpha::FBNE);
+ case 22: return getI64Imm(Alpha::FBGE);
+ case 23: return getI64Imm(Alpha::FBGT);
+ case 24: return getI64Imm(Alpha::FBLE);
+ case 25: return getI64Imm(Alpha::FBLT);
+ }
+}]>;
+
+//Int cond patterns
+def : Pat<(brcond (seteq GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 0), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setge GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 2), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setgt GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 3), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (and GPRC:$RA, 1), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 6), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setle GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 4), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setlt GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 5), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setne GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 1), GPRC:$RA, bb:$DISP)>;
+
+def : Pat<(brcond GPRC:$RA, bb:$DISP),
+ (COND_BRANCH_I (immBRCond 1), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setne GPRC:$RA, GPRC:$RB), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 0), (CMPEQ GPRC:$RA, GPRC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setne GPRC:$RA, immUExt8:$L), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 0), (CMPEQi GPRC:$RA, immUExt8:$L), bb:$DISP)>;
+
+//FP cond patterns
+def : Pat<(brcond (seteq F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setne F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setge F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 22), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setgt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 23), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setle F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 24), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setlt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 25), F8RC:$RA, bb:$DISP)>;
+
+
+def : Pat<(brcond (seteq F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setoeq F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setueq F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+def : Pat<(brcond (setlt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setolt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setult F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+def : Pat<(brcond (setle F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setole F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setule F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+def : Pat<(brcond (setgt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setogt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setugt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+
+def : Pat<(brcond (setge F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setoge F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setuge F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+
+def : Pat<(brcond (setne F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setone F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setune F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+
+def : Pat<(brcond (setoeq F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setueq F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setoge F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 22), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setuge F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 22), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setogt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 23), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setugt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 23), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setole F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 24), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setule F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 24), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setolt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 25), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setult F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 25), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setone F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setune F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), F8RC:$RA,bb:$DISP)>;
+
+//End Branches
+
+//S_floating : IEEE Single
+//T_floating : IEEE Double
+
+//Unused instructions
+//Mnemonic Format Opcode Description
+//CALL_PAL Pcd 00 Trap to PALcode
+//ECB Mfc 18.E800 Evict cache block
+//EXCB Mfc 18.0400 Exception barrier
+//FETCH Mfc 18.8000 Prefetch data
+//FETCH_M Mfc 18.A000 Prefetch data, modify intent
+//LDQ_U Mem 0B Load unaligned quadword
+//MB Mfc 18.4000 Memory barrier
+//STQ_U Mem 0F Store unaligned quadword
+//TRAPB Mfc 18.0000 Trap barrier
+//WH64 Mfc 18.F800 Write hint  64 bytes
+//WMB Mfc 18.4400 Write memory barrier
+//MF_FPCR F-P 17.025 Move from FPCR
+//MT_FPCR F-P 17.024 Move to FPCR
+//There are in the Multimedia extentions, so let's not use them yet
+//def MAXSB8 : OForm<0x1C, 0x3E, "MAXSB8 $RA,$RB,$RC">; //Vector signed byte maximum
+//def MAXSW4 : OForm< 0x1C, 0x3F, "MAXSW4 $RA,$RB,$RC">; //Vector signed word maximum
+//def MAXUB8 : OForm<0x1C, 0x3C, "MAXUB8 $RA,$RB,$RC">; //Vector unsigned byte maximum
+//def MAXUW4 : OForm< 0x1C, 0x3D, "MAXUW4 $RA,$RB,$RC">; //Vector unsigned word maximum
+//def MINSB8 : OForm< 0x1C, 0x38, "MINSB8 $RA,$RB,$RC">; //Vector signed byte minimum
+//def MINSW4 : OForm< 0x1C, 0x39, "MINSW4 $RA,$RB,$RC">; //Vector signed word minimum
+//def MINUB8 : OForm< 0x1C, 0x3A, "MINUB8 $RA,$RB,$RC">; //Vector unsigned byte minimum
+//def MINUW4 : OForm< 0x1C, 0x3B, "MINUW4 $RA,$RB,$RC">; //Vector unsigned word minimum
+//def PERR : OForm< 0x1C, 0x31, "PERR $RA,$RB,$RC">; //Pixel error
+//def PKLB : OForm< 0x1C, 0x37, "PKLB $RA,$RB,$RC">; //Pack longwords to bytes
+//def PKWB : OForm<0x1C, 0x36, "PKWB $RA,$RB,$RC">; //Pack words to bytes
+//def UNPKBL : OForm< 0x1C, 0x35, "UNPKBL $RA,$RB,$RC">; //Unpack bytes to longwords
+//def UNPKBW : OForm< 0x1C, 0x34, "UNPKBW $RA,$RB,$RC">; //Unpack bytes to words
+//CVTLQ F-P 17.010 Convert longword to quadword
+//CVTQL F-P 17.030 Convert quadword to longword
+
+
+//Constant handling
+
+def immConst2Part : PatLeaf<(imm), [{
+ //true if imm fits in a LDAH LDA pair
+ int64_t val = (int64_t)N->getZExtValue();
+ return (val <= IMM_FULLHIGH && val >= IMM_FULLLOW);
+}]>;
+def immConst2PartInt : PatLeaf<(imm), [{
+ //true if imm fits in a LDAH LDA pair with zeroext
+ uint64_t uval = N->getZExtValue();
+ int32_t val32 = (int32_t)uval;
+ return ((uval >> 32) == 0 && //empty upper bits
+ val32 <= IMM_FULLHIGH);
+// val32 >= IMM_FULLLOW + IMM_LOW * IMM_MULT); //Always True
+}], SExt32>;
+
+def : Pat<(i64 immConst2Part:$imm),
+ (LDA (LL16 immConst2Part:$imm), (LDAH (LH16 immConst2Part:$imm), R31))>;
+
+def : Pat<(i64 immSExt16:$imm),
+ (LDA immSExt16:$imm, R31)>;
+
+def : Pat<(i64 immSExt16int:$imm),
+ (ZAPNOTi (LDA (SExt16 immSExt16int:$imm), R31), 15)>;
+def : Pat<(i64 immConst2PartInt:$imm),
+ (ZAPNOTi (LDA (LL16 (SExt32 immConst2PartInt:$imm)),
+ (LDAH (LH16 (SExt32 immConst2PartInt:$imm)), R31)), 15)>;
+
+
+//TODO: I want to just define these like this!
+//def : Pat<(i64 0),
+// (R31)>;
+//def : Pat<(f64 0.0),
+// (F31)>;
+//def : Pat<(f64 -0.0),
+// (CPYSNT F31, F31)>;
+//def : Pat<(f32 0.0),
+// (F31)>;
+//def : Pat<(f32 -0.0),
+// (CPYSNS F31, F31)>;
+
+//Misc Patterns:
+
+def : Pat<(sext_inreg GPRC:$RB, i32),
+ (ADDLi GPRC:$RB, 0)>;
+
+def : Pat<(fabs F8RC:$RB),
+ (CPYST F31, F8RC:$RB)>;
+def : Pat<(fabs F4RC:$RB),
+ (CPYSS F31, F4RC:$RB)>;
+def : Pat<(fneg F8RC:$RB),
+ (CPYSNT F8RC:$RB, F8RC:$RB)>;
+def : Pat<(fneg F4RC:$RB),
+ (CPYSNS F4RC:$RB, F4RC:$RB)>;
+
+def : Pat<(fcopysign F4RC:$A, (fneg F4RC:$B)),
+ (CPYSNS F4RC:$B, F4RC:$A)>;
+def : Pat<(fcopysign F8RC:$A, (fneg F8RC:$B)),
+ (CPYSNT F8RC:$B, F8RC:$A)>;
+def : Pat<(fcopysign F4RC:$A, (fneg F8RC:$B)),
+ (CPYSNSt F8RC:$B, F4RC:$A)>;
+def : Pat<(fcopysign F8RC:$A, (fneg F4RC:$B)),
+ (CPYSNTs F4RC:$B, F8RC:$A)>;
+
+//Yes, signed multiply high is ugly
+def : Pat<(mulhs GPRC:$RA, GPRC:$RB),
+ (SUBQr (UMULHr GPRC:$RA, GPRC:$RB), (ADDQr (CMOVGEr GPRC:$RB, R31, GPRC:$RA),
+ (CMOVGEr GPRC:$RA, R31, GPRC:$RB)))>;
+
+//Stupid crazy arithmetic stuff:
+let AddedComplexity = 1 in {
+def : Pat<(mul GPRC:$RA, 5), (S4ADDQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 9), (S8ADDQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 3), (S4SUBQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 7), (S8SUBQr GPRC:$RA, GPRC:$RA)>;
+
+//slight tree expansion if we are multiplying near to a power of 2
+//n is above a power of 2
+def : Pat<(mul GPRC:$RA, immRem1:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRem1:$imm)), GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, immRem2:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRem2:$imm)), (ADDQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRem3:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRem3:$imm)), (S4SUBQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRem4:$imm),
+ (S4ADDQr GPRC:$RA, (SLr GPRC:$RA, (nearP2X immRem4:$imm)))>;
+def : Pat<(mul GPRC:$RA, immRem5:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRem5:$imm)), (S4ADDQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRemP2:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRemP2:$imm)), (SLi GPRC:$RA, (nearP2RemX immRemP2:$imm)))>;
+
+//n is below a power of 2
+//FIXME: figure out why something is truncating the imm to 32bits
+// this will fix 2007-11-27-mulneg3
+//def : Pat<(mul GPRC:$RA, immRem1n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRem1n:$imm)), GPRC:$RA)>;
+//def : Pat<(mul GPRC:$RA, immRem2n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRem2n:$imm)), (ADDQr GPRC:$RA, GPRC:$RA))>;
+//def : Pat<(mul GPRC:$RA, immRem3n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRem3n:$imm)), (S4SUBQr GPRC:$RA, GPRC:$RA))>;
+//def : Pat<(mul GPRC:$RA, immRem4n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRem4n:$imm)), (SLi GPRC:$RA, 2))>;
+//def : Pat<(mul GPRC:$RA, immRem5n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRem5n:$imm)), (S4ADDQr GPRC:$RA, GPRC:$RA))>;
+//def : Pat<(mul GPRC:$RA, immRemP2n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRemP2n:$imm)), (SLi GPRC:$RA, (nearP2RemX immRemP2n:$imm)))>;
+} //Added complexity
diff --git a/lib/Target/Alpha/AlphaJITInfo.cpp b/lib/Target/Alpha/AlphaJITInfo.cpp
new file mode 100644
index 0000000..3fecb19
--- /dev/null
+++ b/lib/Target/Alpha/AlphaJITInfo.cpp
@@ -0,0 +1,307 @@
+//===-- AlphaJITInfo.cpp - Implement the JIT interfaces for the Alpha ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the Alpha target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "AlphaJITInfo.h"
+#include "AlphaRelocations.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Config/alloca.h"
+#include "llvm/Support/Debug.h"
+#include <cstdlib>
+#include <map>
+using namespace llvm;
+
+#define BUILD_OFormatI(Op, RA, LIT, FUN, RC) \
+ ((Op << 26) | (RA << 21) | (LIT << 13) | (1 << 12) | (FUN << 5) | (RC))
+#define BUILD_OFormat(Op, RA, RB, FUN, RC) \
+ ((Op << 26) | (RA << 21) | (RB << 16) | (FUN << 5) | (RC))
+
+#define BUILD_LDA(RD, RS, IMM16) \
+ ((0x08 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
+#define BUILD_LDAH(RD, RS, IMM16) \
+ ((0x09 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
+
+#define BUILD_LDQ(RD, RS, IMM16) \
+ ((0x29 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 0xFFFF))
+
+#define BUILD_JMP(RD, RS, IMM16) \
+ ((0x1A << 26) | ((RD) << 21) | ((RS) << 16) | (0x00 << 14) | ((IMM16) & 0x3FFF))
+#define BUILD_JSR(RD, RS, IMM16) \
+ ((0x1A << 26) | ((RD) << 21) | ((RS) << 16) | (0x01 << 14) | ((IMM16) & 0x3FFF))
+
+#define BUILD_SLLi(RD, RS, IMM8) \
+ (BUILD_OFormatI(0x12, RS, IMM8, 0x39, RD))
+
+#define BUILD_ORi(RD, RS, IMM8) \
+ (BUILD_OFormatI(0x11, RS, IMM8, 0x20, RD))
+
+#define BUILD_OR(RD, RS, RT) \
+ (BUILD_OFormat(0x11, RS, RT, 0x20, RD))
+
+
+
+static void EmitBranchToAt(void *At, void *To) {
+ unsigned long Fn = (unsigned long)To;
+
+ unsigned *AtI = (unsigned*)At;
+
+ AtI[0] = BUILD_OR(0, 27, 27);
+
+ DOUT << "Stub targeting " << To << "\n";
+
+ for (int x = 1; x <= 8; ++x) {
+ AtI[2*x - 1] = BUILD_SLLi(27,27,8);
+ unsigned d = (Fn >> (64 - 8 * x)) & 0x00FF;
+ //DOUT << "outputing " << hex << d << dec << "\n";
+ AtI[2*x] = BUILD_ORi(27, 27, d);
+ }
+ AtI[17] = BUILD_JMP(31,27,0); //jump, preserving ra, and setting pv
+ AtI[18] = 0x00FFFFFF; //mark this as a stub
+}
+
+void AlphaJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ //FIXME
+ assert(0);
+}
+
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+//static AlphaJITInfo* AlphaJTI;
+
+extern "C" {
+#ifdef __alpha
+
+ void AlphaCompilationCallbackC(long* oldpv, void* CameFromStub)
+ {
+ void* Target = JITCompilerFunction(CameFromStub);
+
+ //rewrite the stub to an unconditional branch
+ if (((unsigned*)CameFromStub)[18] == 0x00FFFFFF) {
+ DOUT << "Came from a stub, rewriting\n";
+ EmitBranchToAt(CameFromStub, Target);
+ } else {
+ DOUT << "confused, didn't come from stub at " << CameFromStub
+ << " old jump vector " << oldpv
+ << " new jump vector " << Target << "\n";
+ }
+
+ //Change pv to new Target
+ *oldpv = (long)Target;
+ }
+
+ void AlphaCompilationCallback(void);
+
+ asm(
+ ".text\n"
+ ".globl AlphaComilationCallbackC\n"
+ ".align 4\n"
+ ".globl AlphaCompilationCallback\n"
+ ".ent AlphaCompilationCallback\n"
+"AlphaCompilationCallback:\n"
+ // //get JIT's GOT
+ "ldgp $29, 0($27)\n"
+ //Save args, callee saved, and perhaps others?
+ //args: $16-$21 $f16-$f21 (12)
+ //callee: $9-$14 $f2-$f9 (14)
+ //others: fp:$15 ra:$26 pv:$27 (3)
+ "lda $30, -232($30)\n"
+ "stq $16, 0($30)\n"
+ "stq $17, 8($30)\n"
+ "stq $18, 16($30)\n"
+ "stq $19, 24($30)\n"
+ "stq $20, 32($30)\n"
+ "stq $21, 40($30)\n"
+ "stt $f16, 48($30)\n"
+ "stt $f17, 56($30)\n"
+ "stt $f18, 64($30)\n"
+ "stt $f19, 72($30)\n"
+ "stt $f20, 80($30)\n"
+ "stt $f21, 88($30)\n"
+ "stq $9, 96($30)\n"
+ "stq $10, 104($30)\n"
+ "stq $11, 112($30)\n"
+ "stq $12, 120($30)\n"
+ "stq $13, 128($30)\n"
+ "stq $14, 136($30)\n"
+ "stt $f2, 144($30)\n"
+ "stt $f3, 152($30)\n"
+ "stt $f4, 160($30)\n"
+ "stt $f5, 168($30)\n"
+ "stt $f6, 176($30)\n"
+ "stt $f7, 184($30)\n"
+ "stt $f8, 192($30)\n"
+ "stt $f9, 200($30)\n"
+ "stq $15, 208($30)\n"
+ "stq $26, 216($30)\n"
+ "stq $27, 224($30)\n"
+
+ "addq $30, 224, $16\n" //pass the addr of saved pv as the first arg
+ "bis $0, $0, $17\n" //pass the roughly stub addr in second arg
+ "jsr $26, AlphaCompilationCallbackC\n" //call without saving ra
+
+ "ldq $16, 0($30)\n"
+ "ldq $17, 8($30)\n"
+ "ldq $18, 16($30)\n"
+ "ldq $19, 24($30)\n"
+ "ldq $20, 32($30)\n"
+ "ldq $21, 40($30)\n"
+ "ldt $f16, 48($30)\n"
+ "ldt $f17, 56($30)\n"
+ "ldt $f18, 64($30)\n"
+ "ldt $f19, 72($30)\n"
+ "ldt $f20, 80($30)\n"
+ "ldt $f21, 88($30)\n"
+ "ldq $9, 96($30)\n"
+ "ldq $10, 104($30)\n"
+ "ldq $11, 112($30)\n"
+ "ldq $12, 120($30)\n"
+ "ldq $13, 128($30)\n"
+ "ldq $14, 136($30)\n"
+ "ldt $f2, 144($30)\n"
+ "ldt $f3, 152($30)\n"
+ "ldt $f4, 160($30)\n"
+ "ldt $f5, 168($30)\n"
+ "ldt $f6, 176($30)\n"
+ "ldt $f7, 184($30)\n"
+ "ldt $f8, 192($30)\n"
+ "ldt $f9, 200($30)\n"
+ "ldq $15, 208($30)\n"
+ "ldq $26, 216($30)\n"
+ "ldq $27, 224($30)\n" //this was updated in the callback with the target
+
+ "lda $30, 232($30)\n" //restore sp
+ "jmp $31, ($27)\n" //jump to the new function
+ ".end AlphaCompilationCallback\n"
+ );
+#else
+ void AlphaCompilationCallback() {
+ cerr << "Cannot call AlphaCompilationCallback() on a non-Alpha arch!\n";
+ abort();
+ }
+#endif
+}
+
+void *AlphaJITInfo::emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE) {
+ //assert(Fn == AlphaCompilationCallback && "Where are you going?\n");
+ //Do things in a stupid slow way!
+ JCE.startGVStub(F, 19*4);
+ void* Addr = (void*)(intptr_t)JCE.getCurrentPCValue();
+ for (int x = 0; x < 19; ++ x)
+ JCE.emitWordLE(0);
+ EmitBranchToAt(Addr, Fn);
+ DOUT << "Emitting Stub to " << Fn << " at [" << Addr << "]\n";
+ return JCE.finishGVStub(F);
+}
+
+TargetJITInfo::LazyResolverFn
+AlphaJITInfo::getLazyResolverFunction(JITCompilerFn F) {
+ JITCompilerFunction = F;
+ // setZerothGOTEntry((void*)AlphaCompilationCallback);
+ return AlphaCompilationCallback;
+}
+
+//These describe LDAx
+static const int IMM_LOW = -32768;
+static const int IMM_HIGH = 32767;
+static const int IMM_MULT = 65536;
+
+static long getUpper16(long l)
+{
+ long y = l / IMM_MULT;
+ if (l % IMM_MULT > IMM_HIGH)
+ ++y;
+ if (l % IMM_MULT < IMM_LOW)
+ --y;
+ assert((short)y == y && "displacement out of range");
+ return y;
+}
+
+static long getLower16(long l)
+{
+ long h = getUpper16(l);
+ long y = l - h * IMM_MULT;
+ assert(y == (short)y && "Displacement out of range");
+ return y;
+}
+
+void AlphaJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ //because gpdist are paired and relative to the pc of the first inst,
+ //we need to have some state
+
+ static std::map<std::pair<void*, int>, void*> gpdistmap;
+
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
+ long idx = 0;
+ bool doCommon = true;
+ switch ((Alpha::RelocationType)MR->getRelocationType()) {
+ default: assert(0 && "Unknown relocation type!");
+ case Alpha::reloc_literal:
+ //This is a LDQl
+ idx = MR->getGOTIndex();
+ DOUT << "Literal relocation to slot " << idx;
+ idx = (idx - GOToffset) * 8;
+ DOUT << " offset " << idx << "\n";
+ break;
+ case Alpha::reloc_gprellow:
+ idx = (unsigned char*)MR->getResultPointer() - &GOTBase[GOToffset * 8];
+ idx = getLower16(idx);
+ DOUT << "gprellow relocation offset " << idx << "\n";
+ DOUT << " Pointer is " << (void*)MR->getResultPointer()
+ << " GOT is " << (void*)&GOTBase[GOToffset * 8] << "\n";
+ break;
+ case Alpha::reloc_gprelhigh:
+ idx = (unsigned char*)MR->getResultPointer() - &GOTBase[GOToffset * 8];
+ idx = getUpper16(idx);
+ DOUT << "gprelhigh relocation offset " << idx << "\n";
+ DOUT << " Pointer is " << (void*)MR->getResultPointer()
+ << " GOT is " << (void*)&GOTBase[GOToffset * 8] << "\n";
+ break;
+ case Alpha::reloc_gpdist:
+ switch (*RelocPos >> 26) {
+ case 0x09: //LDAH
+ idx = &GOTBase[GOToffset * 8] - (unsigned char*)RelocPos;
+ idx = getUpper16(idx);
+ DOUT << "LDAH: " << idx << "\n";
+ //add the relocation to the map
+ gpdistmap[std::make_pair(Function, MR->getConstantVal())] = RelocPos;
+ break;
+ case 0x08: //LDA
+ assert(gpdistmap[std::make_pair(Function, MR->getConstantVal())] &&
+ "LDAg without seeing LDAHg");
+ idx = &GOTBase[GOToffset * 8] -
+ (unsigned char*)gpdistmap[std::make_pair(Function, MR->getConstantVal())];
+ idx = getLower16(idx);
+ DOUT << "LDA: " << idx << "\n";
+ break;
+ default:
+ assert(0 && "Cannot handle gpdist yet");
+ }
+ break;
+ case Alpha::reloc_bsr: {
+ idx = (((unsigned char*)MR->getResultPointer() -
+ (unsigned char*)RelocPos) >> 2) + 1; //skip first 2 inst of fun
+ *RelocPos |= (idx & ((1 << 21)-1));
+ doCommon = false;
+ break;
+ }
+ }
+ if (doCommon) {
+ short x = (short)idx;
+ assert(x == idx);
+ *(short*)RelocPos = x;
+ }
+ }
+}
diff --git a/lib/Target/Alpha/AlphaJITInfo.h b/lib/Target/Alpha/AlphaJITInfo.h
new file mode 100644
index 0000000..edff990
--- /dev/null
+++ b/lib/Target/Alpha/AlphaJITInfo.h
@@ -0,0 +1,47 @@
+//===- AlphaJITInfo.h - Alpha impl. of the JIT interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHA_JITINFO_H
+#define ALPHA_JITINFO_H
+
+#include "llvm/Target/TargetJITInfo.h"
+
+namespace llvm {
+ class TargetMachine;
+
+ class AlphaJITInfo : public TargetJITInfo {
+ protected:
+ TargetMachine &TM;
+ public:
+ explicit AlphaJITInfo(TargetMachine &tm) : TM(tm)
+ { useGOT = true; }
+
+ virtual void *emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE);
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+ private:
+ static const unsigned GOToffset = 4096;
+
+ };
+}
+
+#endif
diff --git a/lib/Target/Alpha/AlphaLLRP.cpp b/lib/Target/Alpha/AlphaLLRP.cpp
new file mode 100644
index 0000000..0c51bc5
--- /dev/null
+++ b/lib/Target/Alpha/AlphaLLRP.cpp
@@ -0,0 +1,158 @@
+//===-- AlphaLLRP.cpp - Alpha Load Load Replay Trap elimination pass. -- --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Here we check for potential replay traps introduced by the spiller
+// We also align some branch targets if we can do so for free.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "alpha-nops"
+#include "Alpha.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+STATISTIC(nopintro, "Number of nops inserted");
+STATISTIC(nopalign, "Number of nops inserted for alignment");
+
+namespace {
+ cl::opt<bool>
+ AlignAll("alpha-align-all", cl::Hidden,
+ cl::desc("Align all blocks"));
+
+ struct AlphaLLRPPass : public MachineFunctionPass {
+ /// Target machine description which we query for reg. names, data
+ /// layout, etc.
+ ///
+ AlphaTargetMachine &TM;
+
+ static char ID;
+ AlphaLLRPPass(AlphaTargetMachine &tm)
+ : MachineFunctionPass(&ID), TM(tm) { }
+
+ virtual const char *getPassName() const {
+ return "Alpha NOP inserter";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) {
+ const TargetInstrInfo *TII = F.getTarget().getInstrInfo();
+ bool Changed = false;
+ MachineInstr* prev[3] = {0,0,0};
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ unsigned count = 0;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI) {
+ MachineBasicBlock& MBB = *FI;
+ bool ub = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
+ if (count%4 == 0)
+ prev[0] = prev[1] = prev[2] = 0; //Slots cleared at fetch boundary
+ ++count;
+ MachineInstr *MI = I++;
+ switch (MI->getOpcode()) {
+ case Alpha::LDQ: case Alpha::LDL:
+ case Alpha::LDWU: case Alpha::LDBU:
+ case Alpha::LDT: case Alpha::LDS:
+ case Alpha::STQ: case Alpha::STL:
+ case Alpha::STW: case Alpha::STB:
+ case Alpha::STT: case Alpha::STS:
+ if (MI->getOperand(2).getReg() == Alpha::R30) {
+ if (prev[0] &&
+ prev[0]->getOperand(2).getReg() == MI->getOperand(2).getReg()&&
+ prev[0]->getOperand(1).getImm() == MI->getOperand(1).getImm()){
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = 0;
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31)
+ .addReg(Alpha::R31);
+ Changed = true; nopintro += 1;
+ count += 1;
+ } else if (prev[1]
+ && prev[1]->getOperand(2).getReg() ==
+ MI->getOperand(2).getReg()
+ && prev[1]->getOperand(1).getImm() ==
+ MI->getOperand(1).getImm()) {
+ prev[0] = prev[2];
+ prev[1] = prev[2] = 0;
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31)
+ .addReg(Alpha::R31);
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31)
+ .addReg(Alpha::R31);
+ Changed = true; nopintro += 2;
+ count += 2;
+ } else if (prev[2]
+ && prev[2]->getOperand(2).getReg() ==
+ MI->getOperand(2).getReg()
+ && prev[2]->getOperand(1).getImm() ==
+ MI->getOperand(1).getImm()) {
+ prev[0] = prev[1] = prev[2] = 0;
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31).addReg(Alpha::R31);
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31).addReg(Alpha::R31);
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31).addReg(Alpha::R31);
+ Changed = true; nopintro += 3;
+ count += 3;
+ }
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = MI;
+ break;
+ }
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = 0;
+ break;
+ case Alpha::ALTENT:
+ case Alpha::MEMLABEL:
+ case Alpha::PCLABEL:
+ --count;
+ break;
+ case Alpha::BR:
+ case Alpha::JMP:
+ ub = true;
+ //fall through
+ default:
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = 0;
+ break;
+ }
+ }
+ if (ub || AlignAll) {
+ //we can align stuff for free at this point
+ while (count % 4) {
+ BuildMI(MBB, MBB.end(), dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31).addReg(Alpha::R31);
+ ++count;
+ ++nopalign;
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = 0;
+ }
+ }
+ }
+ return Changed;
+ }
+ };
+ char AlphaLLRPPass::ID = 0;
+} // end of anonymous namespace
+
+FunctionPass *llvm::createAlphaLLRPPass(AlphaTargetMachine &tm) {
+ return new AlphaLLRPPass(tm);
+}
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
new file mode 100644
index 0000000..feee6e4
--- /dev/null
+++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -0,0 +1,335 @@
+//===- AlphaRegisterInfo.cpp - Alpha Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reginfo"
+#include "Alpha.h"
+#include "AlphaRegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cstdlib>
+using namespace llvm;
+
+//These describe LDAx
+static const int IMM_LOW = -32768;
+static const int IMM_HIGH = 32767;
+static const int IMM_MULT = 65536;
+
+static long getUpper16(long l)
+{
+ long y = l / IMM_MULT;
+ if (l % IMM_MULT > IMM_HIGH)
+ ++y;
+ return y;
+}
+
+static long getLower16(long l)
+{
+ long h = getUpper16(l);
+ return l - h * IMM_MULT;
+}
+
+AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii)
+ : AlphaGenRegisterInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP),
+ TII(tii)
+{
+}
+
+const unsigned* AlphaRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ static const unsigned CalleeSavedRegs[] = {
+ Alpha::R9, Alpha::R10,
+ Alpha::R11, Alpha::R12,
+ Alpha::R13, Alpha::R14,
+ Alpha::F2, Alpha::F3,
+ Alpha::F4, Alpha::F5,
+ Alpha::F6, Alpha::F7,
+ Alpha::F8, Alpha::F9, 0
+ };
+ return CalleeSavedRegs;
+}
+
+const TargetRegisterClass* const*
+AlphaRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
+ &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
+ &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
+ &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
+ &Alpha::F8RCRegClass, &Alpha::F8RCRegClass,
+ &Alpha::F8RCRegClass, &Alpha::F8RCRegClass,
+ &Alpha::F8RCRegClass, &Alpha::F8RCRegClass,
+ &Alpha::F8RCRegClass, &Alpha::F8RCRegClass, 0
+ };
+ return CalleeSavedRegClasses;
+}
+
+BitVector AlphaRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(Alpha::R15);
+ Reserved.set(Alpha::R30);
+ Reserved.set(Alpha::R31);
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+//
+bool AlphaRegisterInfo::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->hasVarSizedObjects();
+}
+
+void AlphaRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (hasFP(MF)) {
+ // If we have a frame pointer, turn the adjcallstackup instruction into a
+ // 'sub ESP, <amt>' and the adjcallstackdown instruction into 'add ESP,
+ // <amt>'
+ MachineInstr *Old = I;
+ uint64_t Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ MachineInstr *New;
+ if (Old->getOpcode() == Alpha::ADJUSTSTACKDOWN) {
+ New=BuildMI(MF, Old->getDebugLoc(), TII.get(Alpha::LDA), Alpha::R30)
+ .addImm(-Amount).addReg(Alpha::R30);
+ } else {
+ assert(Old->getOpcode() == Alpha::ADJUSTSTACKUP);
+ New=BuildMI(MF, Old->getDebugLoc(), TII.get(Alpha::LDA), Alpha::R30)
+ .addImm(Amount).addReg(Alpha::R30);
+ }
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+
+ MBB.erase(I);
+}
+
+//Alpha has a slightly funny stack:
+//Args
+//<- incoming SP
+//fixed locals (and spills, callee saved, etc)
+//<- FP
+//variable locals
+//<- SP
+
+void AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ bool FP = hasFP(MF);
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+
+ // Add the base register of R30 (SP) or R15 (FP).
+ MI.getOperand(i + 1).ChangeToRegister(FP ? Alpha::R15 : Alpha::R30, false);
+
+ // Now add the frame object offset to the offset from the virtual frame index.
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+ DOUT << "FI: " << FrameIndex << " Offset: " << Offset << "\n";
+
+ Offset += MF.getFrameInfo()->getStackSize();
+
+ DOUT << "Corrected Offset " << Offset
+ << " for stack size: " << MF.getFrameInfo()->getStackSize() << "\n";
+
+ if (Offset > IMM_HIGH || Offset < IMM_LOW) {
+ DOUT << "Unconditionally using R28 for evil purposes Offset: "
+ << Offset << "\n";
+ //so in this case, we need to use a temporary register, and move the
+ //original inst off the SP/FP
+ //fix up the old:
+ MI.getOperand(i + 1).ChangeToRegister(Alpha::R28, false);
+ MI.getOperand(i).ChangeToImmediate(getLower16(Offset));
+ //insert the new
+ MachineInstr* nMI=BuildMI(MF, MI.getDebugLoc(),
+ TII.get(Alpha::LDAH), Alpha::R28)
+ .addImm(getUpper16(Offset)).addReg(FP ? Alpha::R15 : Alpha::R30);
+ MBB.insert(II, nMI);
+ } else {
+ MI.getOperand(i).ChangeToImmediate(Offset);
+ }
+}
+
+
+void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ DebugLoc dl = (MBBI != MBB.end() ?
+ MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+ bool FP = hasFP(MF);
+
+ static int curgpdist = 0;
+
+ //handle GOP offset
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAHg), Alpha::R29)
+ .addGlobalAddress(const_cast<Function*>(MF.getFunction()))
+ .addReg(Alpha::R27).addImm(++curgpdist);
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAg), Alpha::R29)
+ .addGlobalAddress(const_cast<Function*>(MF.getFunction()))
+ .addReg(Alpha::R29).addImm(curgpdist);
+
+ //evil const_cast until MO stuff setup to handle const
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::ALTENT))
+ .addGlobalAddress(const_cast<Function*>(MF.getFunction()));
+
+ // Get the number of bytes to allocate from the FrameInfo
+ long NumBytes = MFI->getStackSize();
+
+ if (FP)
+ NumBytes += 8; //reserve space for the old FP
+
+ // Do we need to allocate space on the stack?
+ if (NumBytes == 0) return;
+
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ NumBytes = (NumBytes+Align-1)/Align*Align;
+
+ // Update frame info to pretend that this is part of the stack...
+ MFI->setStackSize(NumBytes);
+
+ // adjust stack pointer: r30 -= numbytes
+ NumBytes = -NumBytes;
+ if (NumBytes >= IMM_LOW) {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
+ .addReg(Alpha::R30);
+ } else if (getUpper16(NumBytes) >= IMM_LOW) {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
+ .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
+ .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
+ } else {
+ cerr << "Too big a stack frame at " << NumBytes << "\n";
+ abort();
+ }
+
+ //now if we need to, save the old FP and set the new
+ if (FP)
+ {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::STQ))
+ .addReg(Alpha::R15).addImm(0).addReg(Alpha::R30);
+ //this must be the last instr in the prolog
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R15)
+ .addReg(Alpha::R30).addReg(Alpha::R30);
+ }
+
+}
+
+void AlphaRegisterInfo::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ assert((MBBI->getOpcode() == Alpha::RETDAG ||
+ MBBI->getOpcode() == Alpha::RETDAGp)
+ && "Can only insert epilog into returning blocks");
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ bool FP = hasFP(MF);
+
+ // Get the number of bytes allocated from the FrameInfo...
+ long NumBytes = MFI->getStackSize();
+
+ //now if we need to, restore the old FP
+ if (FP) {
+ //copy the FP into the SP (discards allocas)
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R30).addReg(Alpha::R15)
+ .addReg(Alpha::R15);
+ //restore the FP
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDQ), Alpha::R15)
+ .addImm(0).addReg(Alpha::R15);
+ }
+
+ if (NumBytes != 0) {
+ if (NumBytes <= IMM_HIGH) {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
+ .addReg(Alpha::R30);
+ } else if (getUpper16(NumBytes) <= IMM_HIGH) {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
+ .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
+ .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
+ } else {
+ cerr << "Too big a stack frame at " << NumBytes << "\n";
+ abort();
+ }
+ }
+}
+
+unsigned AlphaRegisterInfo::getRARegister() const {
+ assert(0 && "What is the return address register");
+ return 0;
+}
+
+unsigned AlphaRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ return hasFP(MF) ? Alpha::R15 : Alpha::R30;
+}
+
+unsigned AlphaRegisterInfo::getEHExceptionRegister() const {
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned AlphaRegisterInfo::getEHHandlerRegister() const {
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
+
+int AlphaRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ assert(0 && "What is the dwarf register number");
+ return -1;
+}
+
+#include "AlphaGenRegisterInfo.inc"
+
+std::string AlphaRegisterInfo::getPrettyName(unsigned reg)
+{
+ std::string s(RegisterDescriptors[reg].Name);
+ return s;
+}
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h
new file mode 100644
index 0000000..c4f5f7b
--- /dev/null
+++ b/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -0,0 +1,67 @@
+//===- AlphaRegisterInfo.h - Alpha Register Information Impl ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAREGISTERINFO_H
+#define ALPHAREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "AlphaGenRegisterInfo.h.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+class Type;
+
+struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
+ const TargetInstrInfo &TII;
+
+ AlphaRegisterInfo(const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ const TargetRegisterClass* const* getCalleeSavedRegClasses(
+ const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ //void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+
+ static std::string getPrettyName(unsigned reg);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.td b/lib/Target/Alpha/AlphaRegisterInfo.td
new file mode 100644
index 0000000..35e6804
--- /dev/null
+++ b/lib/Target/Alpha/AlphaRegisterInfo.td
@@ -0,0 +1,171 @@
+//===- AlphaRegisterInfo.td - The Alpha Register File ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Alpha register set.
+//
+//===----------------------------------------------------------------------===//
+
+class AlphaReg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "Alpha";
+}
+
+// We identify all our registers with a 5-bit ID, for consistency's sake.
+
+// GPR - One of the 32 32-bit general-purpose registers
+class GPR<bits<5> num, string n> : AlphaReg<n> {
+ let Num = num;
+}
+
+// FPR - One of the 32 64-bit floating-point registers
+class FPR<bits<5> num, string n> : AlphaReg<n> {
+ let Num = num;
+}
+
+//#define FP $15
+//#define RA $26
+//#define PV $27
+//#define GP $29
+//#define SP $30
+
+// General-purpose registers
+def R0 : GPR< 0, "$0">, DwarfRegNum<[0]>;
+def R1 : GPR< 1, "$1">, DwarfRegNum<[1]>;
+def R2 : GPR< 2, "$2">, DwarfRegNum<[2]>;
+def R3 : GPR< 3, "$3">, DwarfRegNum<[3]>;
+def R4 : GPR< 4, "$4">, DwarfRegNum<[4]>;
+def R5 : GPR< 5, "$5">, DwarfRegNum<[5]>;
+def R6 : GPR< 6, "$6">, DwarfRegNum<[6]>;
+def R7 : GPR< 7, "$7">, DwarfRegNum<[7]>;
+def R8 : GPR< 8, "$8">, DwarfRegNum<[8]>;
+def R9 : GPR< 9, "$9">, DwarfRegNum<[9]>;
+def R10 : GPR<10, "$10">, DwarfRegNum<[10]>;
+def R11 : GPR<11, "$11">, DwarfRegNum<[11]>;
+def R12 : GPR<12, "$12">, DwarfRegNum<[12]>;
+def R13 : GPR<13, "$13">, DwarfRegNum<[13]>;
+def R14 : GPR<14, "$14">, DwarfRegNum<[14]>;
+def R15 : GPR<15, "$15">, DwarfRegNum<[15]>;
+def R16 : GPR<16, "$16">, DwarfRegNum<[16]>;
+def R17 : GPR<17, "$17">, DwarfRegNum<[17]>;
+def R18 : GPR<18, "$18">, DwarfRegNum<[18]>;
+def R19 : GPR<19, "$19">, DwarfRegNum<[19]>;
+def R20 : GPR<20, "$20">, DwarfRegNum<[20]>;
+def R21 : GPR<21, "$21">, DwarfRegNum<[21]>;
+def R22 : GPR<22, "$22">, DwarfRegNum<[22]>;
+def R23 : GPR<23, "$23">, DwarfRegNum<[23]>;
+def R24 : GPR<24, "$24">, DwarfRegNum<[24]>;
+def R25 : GPR<25, "$25">, DwarfRegNum<[25]>;
+def R26 : GPR<26, "$26">, DwarfRegNum<[26]>;
+def R27 : GPR<27, "$27">, DwarfRegNum<[27]>;
+def R28 : GPR<28, "$28">, DwarfRegNum<[28]>;
+def R29 : GPR<29, "$29">, DwarfRegNum<[29]>;
+def R30 : GPR<30, "$30">, DwarfRegNum<[30]>;
+def R31 : GPR<31, "$31">, DwarfRegNum<[31]>;
+
+// Floating-point registers
+def F0 : FPR< 0, "$f0">, DwarfRegNum<[33]>;
+def F1 : FPR< 1, "$f1">, DwarfRegNum<[34]>;
+def F2 : FPR< 2, "$f2">, DwarfRegNum<[35]>;
+def F3 : FPR< 3, "$f3">, DwarfRegNum<[36]>;
+def F4 : FPR< 4, "$f4">, DwarfRegNum<[37]>;
+def F5 : FPR< 5, "$f5">, DwarfRegNum<[38]>;
+def F6 : FPR< 6, "$f6">, DwarfRegNum<[39]>;
+def F7 : FPR< 7, "$f7">, DwarfRegNum<[40]>;
+def F8 : FPR< 8, "$f8">, DwarfRegNum<[41]>;
+def F9 : FPR< 9, "$f9">, DwarfRegNum<[42]>;
+def F10 : FPR<10, "$f10">, DwarfRegNum<[43]>;
+def F11 : FPR<11, "$f11">, DwarfRegNum<[44]>;
+def F12 : FPR<12, "$f12">, DwarfRegNum<[45]>;
+def F13 : FPR<13, "$f13">, DwarfRegNum<[46]>;
+def F14 : FPR<14, "$f14">, DwarfRegNum<[47]>;
+def F15 : FPR<15, "$f15">, DwarfRegNum<[48]>;
+def F16 : FPR<16, "$f16">, DwarfRegNum<[49]>;
+def F17 : FPR<17, "$f17">, DwarfRegNum<[50]>;
+def F18 : FPR<18, "$f18">, DwarfRegNum<[51]>;
+def F19 : FPR<19, "$f19">, DwarfRegNum<[52]>;
+def F20 : FPR<20, "$f20">, DwarfRegNum<[53]>;
+def F21 : FPR<21, "$f21">, DwarfRegNum<[54]>;
+def F22 : FPR<22, "$f22">, DwarfRegNum<[55]>;
+def F23 : FPR<23, "$f23">, DwarfRegNum<[56]>;
+def F24 : FPR<24, "$f24">, DwarfRegNum<[57]>;
+def F25 : FPR<25, "$f25">, DwarfRegNum<[58]>;
+def F26 : FPR<26, "$f26">, DwarfRegNum<[59]>;
+def F27 : FPR<27, "$f27">, DwarfRegNum<[60]>;
+def F28 : FPR<28, "$f28">, DwarfRegNum<[61]>;
+def F29 : FPR<29, "$f29">, DwarfRegNum<[62]>;
+def F30 : FPR<30, "$f30">, DwarfRegNum<[63]>;
+def F31 : FPR<31, "$f31">, DwarfRegNum<[64]>;
+
+ // //#define FP $15
+ // //#define RA $26
+ // //#define PV $27
+ // //#define GP $29
+ // //#define SP $30
+ // $28 is undefined after any and all calls
+
+/// Register classes
+def GPRC : RegisterClass<"Alpha", [i64], 64,
+ // Volatile
+ [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, R20, R21, R22,
+ R23, R24, R25, R28,
+ //Special meaning, but volatile
+ R27, //procedure address
+ R26, //return address
+ R29, //global offset table address
+ // Non-volatile
+ R9, R10, R11, R12, R13, R14,
+// Don't allocate 15, 30, 31
+ R15, R30, R31 ]> //zero
+{
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GPRCClass::iterator
+ GPRCClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-3;
+ }
+ }];
+}
+
+def F4RC : RegisterClass<"Alpha", [f32], 64, [F0, F1,
+ F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+ F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
+ // Saved:
+ F2, F3, F4, F5, F6, F7, F8, F9,
+ F31 ]> //zero
+{
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ F4RCClass::iterator
+ F4RCClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-1;
+ }
+ }];
+}
+
+def F8RC : RegisterClass<"Alpha", [f64], 64, [F0, F1,
+ F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+ F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
+ // Saved:
+ F2, F3, F4, F5, F6, F7, F8, F9,
+ F31 ]> //zero
+{
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ F8RCClass::iterator
+ F8RCClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-1;
+ }
+ }];
+}
diff --git a/lib/Target/Alpha/AlphaRelocations.h b/lib/Target/Alpha/AlphaRelocations.h
new file mode 100644
index 0000000..4c92045
--- /dev/null
+++ b/lib/Target/Alpha/AlphaRelocations.h
@@ -0,0 +1,31 @@
+//===- AlphaRelocations.h - Alpha Code Relocations --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Alpha target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHARELOCATIONS_H
+#define ALPHARELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace Alpha {
+ enum RelocationType {
+ reloc_literal,
+ reloc_gprellow,
+ reloc_gprelhigh,
+ reloc_gpdist,
+ reloc_bsr
+ };
+ }
+}
+
+#endif
diff --git a/lib/Target/Alpha/AlphaSchedule.td b/lib/Target/Alpha/AlphaSchedule.td
new file mode 100644
index 0000000..b7b4560
--- /dev/null
+++ b/lib/Target/Alpha/AlphaSchedule.td
@@ -0,0 +1,84 @@
+//===- AlphaSchedule.td - Alpha Scheduling Definitions -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//This is table 2-2 from the 21264 compiler writers guide
+//modified some
+
+//Pipelines
+
+def L0 : FuncUnit;
+def L1 : FuncUnit;
+def FST0 : FuncUnit;
+def FST1 : FuncUnit;
+def U0 : FuncUnit;
+def U1 : FuncUnit;
+def FA : FuncUnit;
+def FM : FuncUnit;
+
+def s_ild : InstrItinClass;
+def s_fld : InstrItinClass;
+def s_ist : InstrItinClass;
+def s_fst : InstrItinClass;
+def s_lda : InstrItinClass;
+def s_rpcc : InstrItinClass;
+def s_rx : InstrItinClass;
+def s_mxpr : InstrItinClass;
+def s_icbr : InstrItinClass;
+def s_ubr : InstrItinClass;
+def s_jsr : InstrItinClass;
+def s_iadd : InstrItinClass;
+def s_ilog : InstrItinClass;
+def s_ishf : InstrItinClass;
+def s_cmov : InstrItinClass;
+def s_imul : InstrItinClass;
+def s_imisc : InstrItinClass;
+def s_fbr : InstrItinClass;
+def s_fadd : InstrItinClass;
+def s_fmul : InstrItinClass;
+def s_fcmov : InstrItinClass;
+def s_fdivt : InstrItinClass;
+def s_fdivs : InstrItinClass;
+def s_fsqrts: InstrItinClass;
+def s_fsqrtt: InstrItinClass;
+def s_ftoi : InstrItinClass;
+def s_itof : InstrItinClass;
+def s_pseudo : InstrItinClass;
+
+//Table 2­4 Instruction Class Latency in Cycles
+//modified some
+
+def Alpha21264Itineraries : ProcessorItineraries<[
+ InstrItinData<s_ild , [InstrStage<3, [L0, L1]>]>,
+ InstrItinData<s_fld , [InstrStage<4, [L0, L1]>]>,
+ InstrItinData<s_ist , [InstrStage<0, [L0, L1]>]>,
+ InstrItinData<s_fst , [InstrStage<0, [FST0, FST1, L0, L1]>]>,
+ InstrItinData<s_lda , [InstrStage<1, [L0, L1, U0, U1]>]>,
+ InstrItinData<s_rpcc , [InstrStage<1, [L1]>]>,
+ InstrItinData<s_rx , [InstrStage<1, [L1]>]>,
+ InstrItinData<s_mxpr , [InstrStage<1, [L0, L1]>]>,
+ InstrItinData<s_icbr , [InstrStage<0, [U0, U1]>]>,
+ InstrItinData<s_ubr , [InstrStage<3, [U0, U1]>]>,
+ InstrItinData<s_jsr , [InstrStage<3, [L0]>]>,
+ InstrItinData<s_iadd , [InstrStage<1, [L0, U0, L1, U1]>]>,
+ InstrItinData<s_ilog , [InstrStage<1, [L0, U0, L1, U1]>]>,
+ InstrItinData<s_ishf , [InstrStage<1, [U0, U1]>]>,
+ InstrItinData<s_cmov , [InstrStage<1, [L0, U0, L1, U1]>]>,
+ InstrItinData<s_imul , [InstrStage<7, [U1]>]>,
+ InstrItinData<s_imisc , [InstrStage<3, [U0]>]>,
+ InstrItinData<s_fbr , [InstrStage<0, [FA]>]>,
+ InstrItinData<s_fadd , [InstrStage<6, [FA]>]>,
+ InstrItinData<s_fmul , [InstrStage<6, [FM]>]>,
+ InstrItinData<s_fcmov , [InstrStage<6, [FA]>]>,
+ InstrItinData<s_fdivs , [InstrStage<12, [FA]>]>,
+ InstrItinData<s_fdivt , [InstrStage<15, [FA]>]>,
+ InstrItinData<s_fsqrts , [InstrStage<18, [FA]>]>,
+ InstrItinData<s_fsqrtt , [InstrStage<33, [FA]>]>,
+ InstrItinData<s_ftoi , [InstrStage<3, [FST0, FST1, L0, L1]>]>,
+ InstrItinData<s_itof , [InstrStage<4, [L0, L1]>]>
+]>;
diff --git a/lib/Target/Alpha/AlphaSubtarget.cpp b/lib/Target/Alpha/AlphaSubtarget.cpp
new file mode 100644
index 0000000..d5a9365
--- /dev/null
+++ b/lib/Target/Alpha/AlphaSubtarget.cpp
@@ -0,0 +1,25 @@
+//===- AlphaSubtarget.cpp - Alpha Subtarget Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Alpha specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaSubtarget.h"
+#include "Alpha.h"
+#include "AlphaGenSubtarget.inc"
+using namespace llvm;
+
+AlphaSubtarget::AlphaSubtarget(const Module &M, const std::string &FS)
+ : HasCT(false) {
+ std::string CPU = "generic";
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, CPU);
+}
diff --git a/lib/Target/Alpha/AlphaSubtarget.h b/lib/Target/Alpha/AlphaSubtarget.h
new file mode 100644
index 0000000..0a944cb
--- /dev/null
+++ b/lib/Target/Alpha/AlphaSubtarget.h
@@ -0,0 +1,47 @@
+//=====-- AlphaSubtarget.h - Define Subtarget for the Alpha --*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Alpha specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHASUBTARGET_H
+#define ALPHASUBTARGET_H
+
+#include "llvm/Target/TargetInstrItineraries.h"
+#include "llvm/Target/TargetSubtarget.h"
+
+#include <string>
+
+namespace llvm {
+class Module;
+
+class AlphaSubtarget : public TargetSubtarget {
+protected:
+
+ bool HasCT;
+
+ InstrItineraryData InstrItins;
+
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ AlphaSubtarget(const Module &M, const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+
+ bool hasCT() const { return HasCT; }
+};
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Alpha/AlphaTargetAsmInfo.cpp b/lib/Target/Alpha/AlphaTargetAsmInfo.cpp
new file mode 100644
index 0000000..6092ab6
--- /dev/null
+++ b/lib/Target/Alpha/AlphaTargetAsmInfo.cpp
@@ -0,0 +1,31 @@
+//===-- AlphaTargetAsmInfo.cpp - Alpha asm properties -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the AlphaTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaTargetMachine.h"
+#include "AlphaTargetAsmInfo.h"
+
+using namespace llvm;
+
+AlphaTargetAsmInfo::AlphaTargetAsmInfo(const AlphaTargetMachine &TM)
+ : TargetAsmInfo(TM) {
+ AlignmentIsInBytes = false;
+ PrivateGlobalPrefix = "$";
+ JumpTableDirective = ".gprel32";
+ JumpTableDataSection = "\t.section .rodata\n";
+ WeakRefDirective = "\t.weak\t";
+}
+
+unsigned AlphaTargetAsmInfo::RelocBehaviour() const {
+ return (TM.getRelocationModel() != Reloc::Static ?
+ Reloc::LocalOrGlobal : Reloc::Global);
+}
diff --git a/lib/Target/Alpha/AlphaTargetAsmInfo.h b/lib/Target/Alpha/AlphaTargetAsmInfo.h
new file mode 100644
index 0000000..7675b26
--- /dev/null
+++ b/lib/Target/Alpha/AlphaTargetAsmInfo.h
@@ -0,0 +1,32 @@
+//=====-- AlphaTargetAsmInfo.h - Alpha asm properties ---------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the AlphaTargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHATARGETASMINFO_H
+#define ALPHATARGETASMINFO_H
+
+#include "llvm/Target/TargetAsmInfo.h"
+
+namespace llvm {
+
+ // Forward declaration.
+ class AlphaTargetMachine;
+
+ struct AlphaTargetAsmInfo : public TargetAsmInfo {
+ explicit AlphaTargetAsmInfo(const AlphaTargetMachine &TM);
+
+ virtual unsigned RelocBehaviour() const;
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
new file mode 100644
index 0000000..4c83054
--- /dev/null
+++ b/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -0,0 +1,126 @@
+//===-- AlphaTargetMachine.cpp - Define TargetMachine for Alpha -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaJITInfo.h"
+#include "AlphaTargetAsmInfo.h"
+#include "AlphaTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+/// AlphaTargetMachineModule - Note that this is used on hosts that cannot link
+/// in a library unless there are references into the library. In particular,
+/// it seems that it is not possible to get things to work on Win32 without
+/// this. Though it is unused, do not remove it.
+extern "C" int AlphaTargetMachineModule;
+int AlphaTargetMachineModule = 0;
+
+// Register the targets
+static RegisterTarget<AlphaTargetMachine> X("alpha", "Alpha [experimental]");
+
+const TargetAsmInfo *AlphaTargetMachine::createTargetAsmInfo() const {
+ return new AlphaTargetAsmInfo(*this);
+}
+
+unsigned AlphaTargetMachine::getModuleMatchQuality(const Module &M) {
+ // We strongly match "alpha*".
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 5 && TT[0] == 'a' && TT[1] == 'l' && TT[2] == 'p' &&
+ TT[3] == 'h' && TT[4] == 'a')
+ return 20;
+ // If the target triple is something non-alpha, we don't match.
+ if (!TT.empty()) return 0;
+
+ if (M.getEndianness() == Module::LittleEndian &&
+ M.getPointerSize() == Module::Pointer64)
+ return 10; // Weak match
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+ return getJITMatchQuality()/2;
+}
+
+unsigned AlphaTargetMachine::getJITMatchQuality() {
+#ifdef __alpha
+ return 10;
+#else
+ return 0;
+#endif
+}
+
+AlphaTargetMachine::AlphaTargetMachine(const Module &M, const std::string &FS)
+ : DataLayout("e-f128:128:128"),
+ FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0),
+ JITInfo(*this),
+ Subtarget(M, FS),
+ TLInfo(*this) {
+ setRelocationModel(Reloc::PIC_);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool AlphaTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createAlphaISelDag(*this));
+ return false;
+}
+bool AlphaTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Must run branch selection immediately preceding the asm printer
+ PM.add(createAlphaBranchSelectionPass());
+ return false;
+}
+bool AlphaTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose,
+ raw_ostream &Out) {
+ PM.add(createAlphaLLRPPass(*this));
+ PM.add(createAlphaCodePrinterPass(Out, *this, OptLevel, Verbose));
+ return false;
+}
+bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm, MachineCodeEmitter &MCE) {
+ PM.add(createAlphaCodeEmitterPass(*this, MCE));
+ if (DumpAsm)
+ PM.add(createAlphaCodePrinterPass(errs(), *this, OptLevel, true));
+ return false;
+}
+bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm, JITCodeEmitter &JCE) {
+ PM.add(createAlphaJITCodeEmitterPass(*this, JCE));
+ if (DumpAsm)
+ PM.add(createAlphaCodePrinterPass(errs(), *this, OptLevel, true));
+ return false;
+}
+bool AlphaTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ MachineCodeEmitter &MCE) {
+ return addCodeEmitter(PM, OptLevel, DumpAsm, MCE);
+}
+bool AlphaTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ JITCodeEmitter &JCE) {
+ return addCodeEmitter(PM, OptLevel, DumpAsm, JCE);
+}
+
diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h
new file mode 100644
index 0000000..51224e8
--- /dev/null
+++ b/lib/Target/Alpha/AlphaTargetMachine.h
@@ -0,0 +1,82 @@
+//===-- AlphaTargetMachine.h - Define TargetMachine for Alpha ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Alpha-specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHA_TARGETMACHINE_H
+#define ALPHA_TARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaJITInfo.h"
+#include "AlphaISelLowering.h"
+#include "AlphaSubtarget.h"
+
+namespace llvm {
+
+class GlobalValue;
+
+class AlphaTargetMachine : public LLVMTargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+ AlphaInstrInfo InstrInfo;
+ TargetFrameInfo FrameInfo;
+ AlphaJITInfo JITInfo;
+ AlphaSubtarget Subtarget;
+ AlphaTargetLowering TLInfo;
+
+protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+public:
+ AlphaTargetMachine(const Module &M, const std::string &FS);
+
+ virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual const AlphaSubtarget *getSubtargetImpl() const{ return &Subtarget; }
+ virtual const AlphaRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual AlphaTargetLowering* getTargetLowering() const {
+ return const_cast<AlphaTargetLowering*>(&TLInfo);
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual AlphaJITInfo* getJITInfo() {
+ return &JITInfo;
+ }
+
+ static unsigned getJITMatchQuality();
+ static unsigned getModuleMatchQuality(const Module &M);
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ bool DumpAsm, MachineCodeEmitter &MCE);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ bool DumpAsm, JITCodeEmitter &JCE);
+ virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ MachineCodeEmitter &MCE);
+ virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ JITCodeEmitter &JCE);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
new file mode 100644
index 0000000..74b48ee6
--- /dev/null
+++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
@@ -0,0 +1,305 @@
+//===-- AlphaAsmPrinter.cpp - Alpha LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format Alpha assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Alpha.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+ struct VISIBILITY_HIDDEN AlphaAsmPrinter : public AsmPrinter {
+ /// Unique incrementer for label values for referencing Global values.
+ ///
+
+ explicit AlphaAsmPrinter(raw_ostream &o, TargetMachine &tm,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : AsmPrinter(o, tm, T, OL, V) {}
+
+ virtual const char *getPassName() const {
+ return "Alpha Assembly Printer";
+ }
+ bool printInstruction(const MachineInstr *MI);
+ void printOp(const MachineOperand &MO, bool IsCallOp = false);
+ void printOperand(const MachineInstr *MI, int opNum);
+ void printBaseOffsetPair (const MachineInstr *MI, int i, bool brackets=true);
+ void printModuleLevelGV(const GlobalVariable* GVar);
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode);
+ };
+} // end of anonymous namespace
+
+/// createAlphaCodePrinterPass - Returns a pass that prints the Alpha
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+///
+FunctionPass *llvm::createAlphaCodePrinterPass(raw_ostream &o,
+ TargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose) {
+ return new AlphaAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+}
+
+#include "AlphaGenAsmWriter.inc"
+
+void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum)
+{
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.getType() == MachineOperand::MO_Register) {
+ assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ "Not physreg??");
+ O << TM.getRegisterInfo()->get(MO.getReg()).AsmName;
+ } else if (MO.isImm()) {
+ O << MO.getImm();
+ assert(MO.getImm() < (1 << 30));
+ } else {
+ printOp(MO);
+ }
+}
+
+
+void AlphaAsmPrinter::printOp(const MachineOperand &MO, bool IsCallOp) {
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << RI.get(MO.getReg()).AsmName;
+ return;
+
+ case MachineOperand::MO_Immediate:
+ cerr << "printOp() does not handle immediate values\n";
+ abort();
+ return;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB());
+ return;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+ << MO.getIndex();
+ return;
+
+ case MachineOperand::MO_ExternalSymbol:
+ O << MO.getSymbolName();
+ return;
+
+ case MachineOperand::MO_GlobalAddress: {
+ GlobalValue *GV = MO.getGlobal();
+ O << Mang->getValueName(GV);
+ if (GV->isDeclaration() && GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ return;
+ }
+
+ case MachineOperand::MO_JumpTableIndex:
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ return;
+
+ default:
+ O << "<unknown operand type: " << MO.getType() << ">";
+ return;
+ }
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool AlphaAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ this->MF = &MF;
+
+ SetupMachineFunction(MF);
+ O << "\n\n";
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ // Print out jump tables referenced by the function
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+ // Print out labels for the function.
+ const Function *F = MF.getFunction();
+ SwitchToSection(TAI->SectionForGlobal(F));
+
+ EmitAlignment(4, F);
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::InternalLinkage: // Symbols default to internal.
+ case Function::PrivateLinkage:
+ break;
+ case Function::ExternalLinkage:
+ O << "\t.globl " << CurrentFnName << "\n";
+ break;
+ case Function::WeakAnyLinkage:
+ case Function::WeakODRLinkage:
+ case Function::LinkOnceAnyLinkage:
+ case Function::LinkOnceODRLinkage:
+ O << TAI->getWeakRefDirective() << CurrentFnName << "\n";
+ break;
+ }
+
+ printVisibility(CurrentFnName, F->getVisibility());
+
+ O << "\t.ent " << CurrentFnName << "\n";
+
+ O << CurrentFnName << ":\n";
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true, true);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ ++EmittedInsts;
+ if (!printInstruction(II)) {
+ assert(0 && "Unhandled instruction in asm writer!");
+ abort();
+ }
+ }
+ }
+
+ O << "\t.end " << CurrentFnName << "\n";
+
+ // We didn't modify anything.
+ return false;
+}
+
+bool AlphaAsmPrinter::doInitialization(Module &M)
+{
+ if(TM.getSubtarget<AlphaSubtarget>().hasCT())
+ O << "\t.arch ev6\n"; //This might need to be ev67, so leave this test here
+ else
+ O << "\t.arch ev6\n";
+ O << "\t.set noat\n";
+ return AsmPrinter::doInitialization(M);
+}
+
+void AlphaAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
+ const TargetData *TD = TM.getTargetData();
+
+ if (!GVar->hasInitializer()) return; // External global require no code
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GVar))
+ return;
+
+ std::string name = Mang->getValueName(GVar);
+ Constant *C = GVar->getInitializer();
+ unsigned Size = TD->getTypeAllocSize(C->getType());
+ unsigned Align = TD->getPreferredAlignmentLog(GVar);
+
+ // 0: Switch to section
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+
+ // 1: Check visibility
+ printVisibility(name, GVar->getVisibility());
+
+ // 2: Kind
+ switch (GVar->getLinkage()) {
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::CommonLinkage:
+ O << TAI->getWeakRefDirective() << name << '\n';
+ break;
+ case GlobalValue::AppendingLinkage:
+ case GlobalValue::ExternalLinkage:
+ O << TAI->getGlobalDirective() << name << "\n";
+ break;
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::PrivateLinkage:
+ break;
+ default:
+ assert(0 && "Unknown linkage type!");
+ cerr << "Unknown linkage type!\n";
+ abort();
+ }
+
+ // 3: Type, Size, Align
+ if (TAI->hasDotTypeDotSizeDirective()) {
+ O << "\t.type\t" << name << ", @object\n";
+ O << "\t.size\t" << name << ", " << Size << "\n";
+ }
+
+ EmitAlignment(Align, GVar);
+
+ O << name << ":\n";
+
+ // If the initializer is a extern weak symbol, remember to emit the weak
+ // reference!
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+
+ EmitGlobalConstant(C);
+ O << '\n';
+}
+
+bool AlphaAsmPrinter::doFinalization(Module &M) {
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ printModuleLevelGV(I);
+
+ return AsmPrinter::doFinalization(M);
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool AlphaAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ printOperand(MI, OpNo);
+ return false;
+}
+
+bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+ O << "0(";
+ printOperand(MI, OpNo);
+ O << ")";
+ return false;
+}
diff --git a/lib/Target/Alpha/AsmPrinter/CMakeLists.txt b/lib/Target/Alpha/AsmPrinter/CMakeLists.txt
new file mode 100644
index 0000000..b62a7f6
--- /dev/null
+++ b/lib/Target/Alpha/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,9 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_partially_linked_object(LLVMAlphaAsmPrinter
+ AlphaAsmPrinter.cpp
+ )
+
+target_name_of_partially_linked_object(LLVMAlphaCodeGen n)
+
+add_dependencies(LLVMAlphaAsmPrinter ${n})
diff --git a/lib/Target/Alpha/AsmPrinter/Makefile b/lib/Target/Alpha/AsmPrinter/Makefile
new file mode 100644
index 0000000..c5b3e94
--- /dev/null
+++ b/lib/Target/Alpha/AsmPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/Alpha/Makefile ---------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAlphaAsmPrinter
+
+# Hack: we need to include 'main' alpha target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Alpha/CMakeLists.txt b/lib/Target/Alpha/CMakeLists.txt
new file mode 100644
index 0000000..1e535f7
--- /dev/null
+++ b/lib/Target/Alpha/CMakeLists.txt
@@ -0,0 +1,25 @@
+set(LLVM_TARGET_DEFINITIONS Alpha.td)
+
+tablegen(AlphaGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(AlphaGenRegisterNames.inc -gen-register-enums)
+tablegen(AlphaGenRegisterInfo.inc -gen-register-desc)
+tablegen(AlphaGenInstrNames.inc -gen-instr-enums)
+tablegen(AlphaGenInstrInfo.inc -gen-instr-desc)
+tablegen(AlphaGenCodeEmitter.inc -gen-emitter)
+tablegen(AlphaGenAsmWriter.inc -gen-asm-writer)
+tablegen(AlphaGenDAGISel.inc -gen-dag-isel)
+tablegen(AlphaGenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(AlphaCodeGen
+ AlphaBranchSelector.cpp
+ AlphaCodeEmitter.cpp
+ AlphaInstrInfo.cpp
+ AlphaISelDAGToDAG.cpp
+ AlphaISelLowering.cpp
+ AlphaJITInfo.cpp
+ AlphaLLRP.cpp
+ AlphaRegisterInfo.cpp
+ AlphaSubtarget.cpp
+ AlphaTargetAsmInfo.cpp
+ AlphaTargetMachine.cpp
+ )
diff --git a/lib/Target/Alpha/Makefile b/lib/Target/Alpha/Makefile
new file mode 100644
index 0000000..d6c82c7
--- /dev/null
+++ b/lib/Target/Alpha/Makefile
@@ -0,0 +1,22 @@
+##===- lib/Target/Alpha/Makefile -------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMAlphaCodeGen
+TARGET = Alpha
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = AlphaGenRegisterInfo.h.inc AlphaGenRegisterNames.inc \
+ AlphaGenRegisterInfo.inc AlphaGenInstrNames.inc \
+ AlphaGenInstrInfo.inc AlphaGenCodeEmitter.inc \
+ AlphaGenAsmWriter.inc AlphaGenDAGISel.inc \
+ AlphaGenSubtarget.inc
+
+DIRS = AsmPrinter
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Alpha/README.txt b/lib/Target/Alpha/README.txt
new file mode 100644
index 0000000..9ae1517
--- /dev/null
+++ b/lib/Target/Alpha/README.txt
@@ -0,0 +1,42 @@
+***
+
+add gcc builtins for alpha instructions
+
+
+***
+
+custom expand byteswap into nifty
+extract/insert/mask byte/word/longword/quadword low/high
+sequences
+
+***
+
+see if any of the extract/insert/mask operations can be added
+
+***
+
+match more interesting things for cmovlbc cmovlbs (move if low bit clear/set)
+
+***
+
+lower srem and urem
+
+remq(i,j): i - (j * divq(i,j)) if j != 0
+remqu(i,j): i - (j * divqu(i,j)) if j != 0
+reml(i,j): i - (j * divl(i,j)) if j != 0
+remlu(i,j): i - (j * divlu(i,j)) if j != 0
+
+***
+
+add crazy vector instructions (MVI):
+
+(MIN|MAX)(U|S)(B8|W4) min and max, signed and unsigned, byte and word
+PKWB, UNPKBW pack/unpack word to byte
+PKLB UNPKBL pack/unpack long to byte
+PERR pixel error (sum accross bytes of bytewise abs(i8v8 a - i8v8 b))
+
+cmpbytes bytewise cmpeq of i8v8 a and i8v8 b (not part of MVI extentions)
+
+this has some good examples for other operations that can be synthesised well
+from these rather meager vector ops (such as saturating add).
+http://www.alphalinux.org/docs/MVI-full.html
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
new file mode 100644
index 0000000..4d7b545
--- /dev/null
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -0,0 +1,3601 @@
+//===-- CBackend.cpp - Library for converting LLVM code to C --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library converts LLVM code to C code, compilable by GCC and other C
+// compilers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Analysis/ConstantsScanner.h"
+#include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Config/config.h"
+#include <algorithm>
+#include <sstream>
+using namespace llvm;
+
+/// CBackendTargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int CBackendTargetMachineModule;
+int CBackendTargetMachineModule = 0;
+
+// Register the target.
+static RegisterTarget<CTargetMachine> X("c", "C backend");
+
+namespace {
+ /// CBackendNameAllUsedStructsAndMergeFunctions - This pass inserts names for
+ /// any unnamed structure types that are used by the program, and merges
+ /// external functions with the same name.
+ ///
+ class CBackendNameAllUsedStructsAndMergeFunctions : public ModulePass {
+ public:
+ static char ID;
+ CBackendNameAllUsedStructsAndMergeFunctions()
+ : ModulePass(&ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<FindUsedTypes>();
+ }
+
+ virtual const char *getPassName() const {
+ return "C backend type canonicalizer";
+ }
+
+ virtual bool runOnModule(Module &M);
+ };
+
+ char CBackendNameAllUsedStructsAndMergeFunctions::ID = 0;
+
+ /// CWriter - This class is the main chunk of code that converts an LLVM
+ /// module to a C translation unit.
+ class CWriter : public FunctionPass, public InstVisitor<CWriter> {
+ raw_ostream &Out;
+ IntrinsicLowering *IL;
+ Mangler *Mang;
+ LoopInfo *LI;
+ const Module *TheModule;
+ const TargetAsmInfo* TAsm;
+ const TargetData* TD;
+ std::map<const Type *, std::string> TypeNames;
+ std::map<const ConstantFP *, unsigned> FPConstantMap;
+ std::set<Function*> intrinsicPrototypesAlreadyGenerated;
+ std::set<const Argument*> ByValParams;
+ unsigned FPCounter;
+
+ public:
+ static char ID;
+ explicit CWriter(raw_ostream &o)
+ : FunctionPass(&ID), Out(o), IL(0), Mang(0), LI(0),
+ TheModule(0), TAsm(0), TD(0) {
+ FPCounter = 0;
+ }
+
+ virtual const char *getPassName() const { return "C backend"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.setPreservesAll();
+ }
+
+ virtual bool doInitialization(Module &M);
+
+ bool runOnFunction(Function &F) {
+ // Do not codegen any 'available_externally' functions at all, they have
+ // definitions outside the translation unit.
+ if (F.hasAvailableExternallyLinkage())
+ return false;
+
+ LI = &getAnalysis<LoopInfo>();
+
+ // Get rid of intrinsics we can't handle.
+ lowerIntrinsics(F);
+
+ // Output all floating point constants that cannot be printed accurately.
+ printFloatingPointConstants(F);
+
+ printFunction(F);
+ return false;
+ }
+
+ virtual bool doFinalization(Module &M) {
+ // Free memory...
+ delete IL;
+ delete TD;
+ delete Mang;
+ FPConstantMap.clear();
+ TypeNames.clear();
+ ByValParams.clear();
+ intrinsicPrototypesAlreadyGenerated.clear();
+ return false;
+ }
+
+ raw_ostream &printType(raw_ostream &Out, const Type *Ty,
+ bool isSigned = false,
+ const std::string &VariableName = "",
+ bool IgnoreName = false,
+ const AttrListPtr &PAL = AttrListPtr());
+ std::ostream &printType(std::ostream &Out, const Type *Ty,
+ bool isSigned = false,
+ const std::string &VariableName = "",
+ bool IgnoreName = false,
+ const AttrListPtr &PAL = AttrListPtr());
+ raw_ostream &printSimpleType(raw_ostream &Out, const Type *Ty,
+ bool isSigned,
+ const std::string &NameSoFar = "");
+ std::ostream &printSimpleType(std::ostream &Out, const Type *Ty,
+ bool isSigned,
+ const std::string &NameSoFar = "");
+
+ void printStructReturnPointerFunctionType(raw_ostream &Out,
+ const AttrListPtr &PAL,
+ const PointerType *Ty);
+
+ /// writeOperandDeref - Print the result of dereferencing the specified
+ /// operand with '*'. This is equivalent to printing '*' then using
+ /// writeOperand, but avoids excess syntax in some cases.
+ void writeOperandDeref(Value *Operand) {
+ if (isAddressExposed(Operand)) {
+ // Already something with an address exposed.
+ writeOperandInternal(Operand);
+ } else {
+ Out << "*(";
+ writeOperand(Operand);
+ Out << ")";
+ }
+ }
+
+ void writeOperand(Value *Operand, bool Static = false);
+ void writeInstComputationInline(Instruction &I);
+ void writeOperandInternal(Value *Operand, bool Static = false);
+ void writeOperandWithCast(Value* Operand, unsigned Opcode);
+ void writeOperandWithCast(Value* Operand, const ICmpInst &I);
+ bool writeInstructionCast(const Instruction &I);
+
+ void writeMemoryAccess(Value *Operand, const Type *OperandType,
+ bool IsVolatile, unsigned Alignment);
+
+ private :
+ std::string InterpretASMConstraint(InlineAsm::ConstraintInfo& c);
+
+ void lowerIntrinsics(Function &F);
+
+ void printModule(Module *M);
+ void printModuleTypes(const TypeSymbolTable &ST);
+ void printContainedStructs(const Type *Ty, std::set<const Type *> &);
+ void printFloatingPointConstants(Function &F);
+ void printFloatingPointConstants(const Constant *C);
+ void printFunctionSignature(const Function *F, bool Prototype);
+
+ void printFunction(Function &);
+ void printBasicBlock(BasicBlock *BB);
+ void printLoop(Loop *L);
+
+ void printCast(unsigned opcode, const Type *SrcTy, const Type *DstTy);
+ void printConstant(Constant *CPV, bool Static);
+ void printConstantWithCast(Constant *CPV, unsigned Opcode);
+ bool printConstExprCast(const ConstantExpr *CE, bool Static);
+ void printConstantArray(ConstantArray *CPA, bool Static);
+ void printConstantVector(ConstantVector *CV, bool Static);
+
+ /// isAddressExposed - Return true if the specified value's name needs to
+ /// have its address taken in order to get a C value of the correct type.
+ /// This happens for global variables, byval parameters, and direct allocas.
+ bool isAddressExposed(const Value *V) const {
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return ByValParams.count(A);
+ return isa<GlobalVariable>(V) || isDirectAlloca(V);
+ }
+
+ // isInlinableInst - Attempt to inline instructions into their uses to build
+ // trees as much as possible. To do this, we have to consistently decide
+ // what is acceptable to inline, so that variable declarations don't get
+ // printed and an extra copy of the expr is not emitted.
+ //
+ static bool isInlinableInst(const Instruction &I) {
+ // Always inline cmp instructions, even if they are shared by multiple
+ // expressions. GCC generates horrible code if we don't.
+ if (isa<CmpInst>(I))
+ return true;
+
+ // Must be an expression, must be used exactly once. If it is dead, we
+ // emit it inline where it would go.
+ if (I.getType() == Type::VoidTy || !I.hasOneUse() ||
+ isa<TerminatorInst>(I) || isa<CallInst>(I) || isa<PHINode>(I) ||
+ isa<LoadInst>(I) || isa<VAArgInst>(I) || isa<InsertElementInst>(I) ||
+ isa<InsertValueInst>(I))
+ // Don't inline a load across a store or other bad things!
+ return false;
+
+ // Must not be used in inline asm, extractelement, or shufflevector.
+ if (I.hasOneUse()) {
+ const Instruction &User = cast<Instruction>(*I.use_back());
+ if (isInlineAsm(User) || isa<ExtractElementInst>(User) ||
+ isa<ShuffleVectorInst>(User))
+ return false;
+ }
+
+ // Only inline instruction it if it's use is in the same BB as the inst.
+ return I.getParent() == cast<Instruction>(I.use_back())->getParent();
+ }
+
+ // isDirectAlloca - Define fixed sized allocas in the entry block as direct
+ // variables which are accessed with the & operator. This causes GCC to
+ // generate significantly better code than to emit alloca calls directly.
+ //
+ static const AllocaInst *isDirectAlloca(const Value *V) {
+ const AllocaInst *AI = dyn_cast<AllocaInst>(V);
+ if (!AI) return false;
+ if (AI->isArrayAllocation())
+ return 0; // FIXME: we can also inline fixed size array allocas!
+ if (AI->getParent() != &AI->getParent()->getParent()->getEntryBlock())
+ return 0;
+ return AI;
+ }
+
+ // isInlineAsm - Check if the instruction is a call to an inline asm chunk
+ static bool isInlineAsm(const Instruction& I) {
+ if (isa<CallInst>(&I) && isa<InlineAsm>(I.getOperand(0)))
+ return true;
+ return false;
+ }
+
+ // Instruction visitation functions
+ friend class InstVisitor<CWriter>;
+
+ void visitReturnInst(ReturnInst &I);
+ void visitBranchInst(BranchInst &I);
+ void visitSwitchInst(SwitchInst &I);
+ void visitInvokeInst(InvokeInst &I) {
+ assert(0 && "Lowerinvoke pass didn't work!");
+ }
+
+ void visitUnwindInst(UnwindInst &I) {
+ assert(0 && "Lowerinvoke pass didn't work!");
+ }
+ void visitUnreachableInst(UnreachableInst &I);
+
+ void visitPHINode(PHINode &I);
+ void visitBinaryOperator(Instruction &I);
+ void visitICmpInst(ICmpInst &I);
+ void visitFCmpInst(FCmpInst &I);
+
+ void visitCastInst (CastInst &I);
+ void visitSelectInst(SelectInst &I);
+ void visitCallInst (CallInst &I);
+ void visitInlineAsm(CallInst &I);
+ bool visitBuiltinCall(CallInst &I, Intrinsic::ID ID, bool &WroteCallee);
+
+ void visitMallocInst(MallocInst &I);
+ void visitAllocaInst(AllocaInst &I);
+ void visitFreeInst (FreeInst &I);
+ void visitLoadInst (LoadInst &I);
+ void visitStoreInst (StoreInst &I);
+ void visitGetElementPtrInst(GetElementPtrInst &I);
+ void visitVAArgInst (VAArgInst &I);
+
+ void visitInsertElementInst(InsertElementInst &I);
+ void visitExtractElementInst(ExtractElementInst &I);
+ void visitShuffleVectorInst(ShuffleVectorInst &SVI);
+
+ void visitInsertValueInst(InsertValueInst &I);
+ void visitExtractValueInst(ExtractValueInst &I);
+
+ void visitInstruction(Instruction &I) {
+ cerr << "C Writer does not know about " << I;
+ abort();
+ }
+
+ void outputLValue(Instruction *I) {
+ Out << " " << GetValueName(I) << " = ";
+ }
+
+ bool isGotoCodeNecessary(BasicBlock *From, BasicBlock *To);
+ void printPHICopiesForSuccessor(BasicBlock *CurBlock,
+ BasicBlock *Successor, unsigned Indent);
+ void printBranchToBlock(BasicBlock *CurBlock, BasicBlock *SuccBlock,
+ unsigned Indent);
+ void printGEPExpression(Value *Ptr, gep_type_iterator I,
+ gep_type_iterator E, bool Static);
+
+ std::string GetValueName(const Value *Operand);
+ };
+}
+
+char CWriter::ID = 0;
+
+/// This method inserts names for any unnamed structure types that are used by
+/// the program, and removes names from structure types that are not used by the
+/// program.
+///
+bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) {
+ // Get a set of types that are used by the program...
+ std::set<const Type *> UT = getAnalysis<FindUsedTypes>().getTypes();
+
+ // Loop over the module symbol table, removing types from UT that are
+ // already named, and removing names for types that are not used.
+ //
+ TypeSymbolTable &TST = M.getTypeSymbolTable();
+ for (TypeSymbolTable::iterator TI = TST.begin(), TE = TST.end();
+ TI != TE; ) {
+ TypeSymbolTable::iterator I = TI++;
+
+ // If this isn't a struct or array type, remove it from our set of types
+ // to name. This simplifies emission later.
+ if (!isa<StructType>(I->second) && !isa<OpaqueType>(I->second) &&
+ !isa<ArrayType>(I->second)) {
+ TST.remove(I);
+ } else {
+ // If this is not used, remove it from the symbol table.
+ std::set<const Type *>::iterator UTI = UT.find(I->second);
+ if (UTI == UT.end())
+ TST.remove(I);
+ else
+ UT.erase(UTI); // Only keep one name for this type.
+ }
+ }
+
+ // UT now contains types that are not named. Loop over it, naming
+ // structure types.
+ //
+ bool Changed = false;
+ unsigned RenameCounter = 0;
+ for (std::set<const Type *>::const_iterator I = UT.begin(), E = UT.end();
+ I != E; ++I)
+ if (isa<StructType>(*I) || isa<ArrayType>(*I)) {
+ while (M.addTypeName("unnamed"+utostr(RenameCounter), *I))
+ ++RenameCounter;
+ Changed = true;
+ }
+
+
+ // Loop over all external functions and globals. If we have two with
+ // identical names, merge them.
+ // FIXME: This code should disappear when we don't allow values with the same
+ // names when they have different types!
+ std::map<std::string, GlobalValue*> ExtSymbols;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E;) {
+ Function *GV = I++;
+ if (GV->isDeclaration() && GV->hasName()) {
+ std::pair<std::map<std::string, GlobalValue*>::iterator, bool> X
+ = ExtSymbols.insert(std::make_pair(GV->getName(), GV));
+ if (!X.second) {
+ // Found a conflict, replace this global with the previous one.
+ GlobalValue *OldGV = X.first->second;
+ GV->replaceAllUsesWith(ConstantExpr::getBitCast(OldGV, GV->getType()));
+ GV->eraseFromParent();
+ Changed = true;
+ }
+ }
+ }
+ // Do the same for globals.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E;) {
+ GlobalVariable *GV = I++;
+ if (GV->isDeclaration() && GV->hasName()) {
+ std::pair<std::map<std::string, GlobalValue*>::iterator, bool> X
+ = ExtSymbols.insert(std::make_pair(GV->getName(), GV));
+ if (!X.second) {
+ // Found a conflict, replace this global with the previous one.
+ GlobalValue *OldGV = X.first->second;
+ GV->replaceAllUsesWith(ConstantExpr::getBitCast(OldGV, GV->getType()));
+ GV->eraseFromParent();
+ Changed = true;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+/// printStructReturnPointerFunctionType - This is like printType for a struct
+/// return type, except, instead of printing the type as void (*)(Struct*, ...)
+/// print it as "Struct (*)(...)", for struct return functions.
+void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out,
+ const AttrListPtr &PAL,
+ const PointerType *TheTy) {
+ const FunctionType *FTy = cast<FunctionType>(TheTy->getElementType());
+ std::stringstream FunctionInnards;
+ FunctionInnards << " (*) (";
+ bool PrintedType = false;
+
+ FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end();
+ const Type *RetTy = cast<PointerType>(I->get())->getElementType();
+ unsigned Idx = 1;
+ for (++I, ++Idx; I != E; ++I, ++Idx) {
+ if (PrintedType)
+ FunctionInnards << ", ";
+ const Type *ArgTy = *I;
+ if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+ assert(isa<PointerType>(ArgTy));
+ ArgTy = cast<PointerType>(ArgTy)->getElementType();
+ }
+ printType(FunctionInnards, ArgTy,
+ /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), "");
+ PrintedType = true;
+ }
+ if (FTy->isVarArg()) {
+ if (PrintedType)
+ FunctionInnards << ", ...";
+ } else if (!PrintedType) {
+ FunctionInnards << "void";
+ }
+ FunctionInnards << ')';
+ std::string tstr = FunctionInnards.str();
+ printType(Out, RetTy,
+ /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr);
+}
+
+raw_ostream &
+CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned,
+ const std::string &NameSoFar) {
+ assert((Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) &&
+ "Invalid type for printSimpleType");
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return Out << "void " << NameSoFar;
+ case Type::IntegerTyID: {
+ unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
+ if (NumBits == 1)
+ return Out << "bool " << NameSoFar;
+ else if (NumBits <= 8)
+ return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar;
+ else if (NumBits <= 16)
+ return Out << (isSigned?"signed":"unsigned") << " short " << NameSoFar;
+ else if (NumBits <= 32)
+ return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar;
+ else if (NumBits <= 64)
+ return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar;
+ else {
+ assert(NumBits <= 128 && "Bit widths > 128 not implemented yet");
+ return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar;
+ }
+ }
+ case Type::FloatTyID: return Out << "float " << NameSoFar;
+ case Type::DoubleTyID: return Out << "double " << NameSoFar;
+ // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is
+ // present matches host 'long double'.
+ case Type::X86_FP80TyID:
+ case Type::PPC_FP128TyID:
+ case Type::FP128TyID: return Out << "long double " << NameSoFar;
+
+ case Type::VectorTyID: {
+ const VectorType *VTy = cast<VectorType>(Ty);
+ return printSimpleType(Out, VTy->getElementType(), isSigned,
+ " __attribute__((vector_size(" +
+ utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar);
+ }
+
+ default:
+ cerr << "Unknown primitive type: " << *Ty << "\n";
+ abort();
+ }
+}
+
+std::ostream &
+CWriter::printSimpleType(std::ostream &Out, const Type *Ty, bool isSigned,
+ const std::string &NameSoFar) {
+ assert((Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) &&
+ "Invalid type for printSimpleType");
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return Out << "void " << NameSoFar;
+ case Type::IntegerTyID: {
+ unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
+ if (NumBits == 1)
+ return Out << "bool " << NameSoFar;
+ else if (NumBits <= 8)
+ return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar;
+ else if (NumBits <= 16)
+ return Out << (isSigned?"signed":"unsigned") << " short " << NameSoFar;
+ else if (NumBits <= 32)
+ return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar;
+ else if (NumBits <= 64)
+ return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar;
+ else {
+ assert(NumBits <= 128 && "Bit widths > 128 not implemented yet");
+ return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar;
+ }
+ }
+ case Type::FloatTyID: return Out << "float " << NameSoFar;
+ case Type::DoubleTyID: return Out << "double " << NameSoFar;
+ // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is
+ // present matches host 'long double'.
+ case Type::X86_FP80TyID:
+ case Type::PPC_FP128TyID:
+ case Type::FP128TyID: return Out << "long double " << NameSoFar;
+
+ case Type::VectorTyID: {
+ const VectorType *VTy = cast<VectorType>(Ty);
+ return printSimpleType(Out, VTy->getElementType(), isSigned,
+ " __attribute__((vector_size(" +
+ utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar);
+ }
+
+ default:
+ cerr << "Unknown primitive type: " << *Ty << "\n";
+ abort();
+ }
+}
+
+// Pass the Type* and the variable name and this prints out the variable
+// declaration.
+//
+raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
+ bool isSigned, const std::string &NameSoFar,
+ bool IgnoreName, const AttrListPtr &PAL) {
+ if (Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) {
+ printSimpleType(Out, Ty, isSigned, NameSoFar);
+ return Out;
+ }
+
+ // Check to see if the type is named.
+ if (!IgnoreName || isa<OpaqueType>(Ty)) {
+ std::map<const Type *, std::string>::iterator I = TypeNames.find(Ty);
+ if (I != TypeNames.end()) return Out << I->second << ' ' << NameSoFar;
+ }
+
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: {
+ const FunctionType *FTy = cast<FunctionType>(Ty);
+ std::stringstream FunctionInnards;
+ FunctionInnards << " (" << NameSoFar << ") (";
+ unsigned Idx = 1;
+ for (FunctionType::param_iterator I = FTy->param_begin(),
+ E = FTy->param_end(); I != E; ++I) {
+ const Type *ArgTy = *I;
+ if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+ assert(isa<PointerType>(ArgTy));
+ ArgTy = cast<PointerType>(ArgTy)->getElementType();
+ }
+ if (I != FTy->param_begin())
+ FunctionInnards << ", ";
+ printType(FunctionInnards, ArgTy,
+ /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), "");
+ ++Idx;
+ }
+ if (FTy->isVarArg()) {
+ if (FTy->getNumParams())
+ FunctionInnards << ", ...";
+ } else if (!FTy->getNumParams()) {
+ FunctionInnards << "void";
+ }
+ FunctionInnards << ')';
+ std::string tstr = FunctionInnards.str();
+ printType(Out, FTy->getReturnType(),
+ /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr);
+ return Out;
+ }
+ case Type::StructTyID: {
+ const StructType *STy = cast<StructType>(Ty);
+ Out << NameSoFar + " {\n";
+ unsigned Idx = 0;
+ for (StructType::element_iterator I = STy->element_begin(),
+ E = STy->element_end(); I != E; ++I) {
+ Out << " ";
+ printType(Out, *I, false, "field" + utostr(Idx++));
+ Out << ";\n";
+ }
+ Out << '}';
+ if (STy->isPacked())
+ Out << " __attribute__ ((packed))";
+ return Out;
+ }
+
+ case Type::PointerTyID: {
+ const PointerType *PTy = cast<PointerType>(Ty);
+ std::string ptrName = "*" + NameSoFar;
+
+ if (isa<ArrayType>(PTy->getElementType()) ||
+ isa<VectorType>(PTy->getElementType()))
+ ptrName = "(" + ptrName + ")";
+
+ if (!PAL.isEmpty())
+ // Must be a function ptr cast!
+ return printType(Out, PTy->getElementType(), false, ptrName, true, PAL);
+ return printType(Out, PTy->getElementType(), false, ptrName);
+ }
+
+ case Type::ArrayTyID: {
+ const ArrayType *ATy = cast<ArrayType>(Ty);
+ unsigned NumElements = ATy->getNumElements();
+ if (NumElements == 0) NumElements = 1;
+ // Arrays are wrapped in structs to allow them to have normal
+ // value semantics (avoiding the array "decay").
+ Out << NameSoFar << " { ";
+ printType(Out, ATy->getElementType(), false,
+ "array[" + utostr(NumElements) + "]");
+ return Out << "; }";
+ }
+
+ case Type::OpaqueTyID: {
+ static int Count = 0;
+ std::string TyName = "struct opaque_" + itostr(Count++);
+ assert(TypeNames.find(Ty) == TypeNames.end());
+ TypeNames[Ty] = TyName;
+ return Out << TyName << ' ' << NameSoFar;
+ }
+ default:
+ assert(0 && "Unhandled case in getTypeProps!");
+ abort();
+ }
+
+ return Out;
+}
+
+// Pass the Type* and the variable name and this prints out the variable
+// declaration.
+//
+std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty,
+ bool isSigned, const std::string &NameSoFar,
+ bool IgnoreName, const AttrListPtr &PAL) {
+ if (Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) {
+ printSimpleType(Out, Ty, isSigned, NameSoFar);
+ return Out;
+ }
+
+ // Check to see if the type is named.
+ if (!IgnoreName || isa<OpaqueType>(Ty)) {
+ std::map<const Type *, std::string>::iterator I = TypeNames.find(Ty);
+ if (I != TypeNames.end()) return Out << I->second << ' ' << NameSoFar;
+ }
+
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: {
+ const FunctionType *FTy = cast<FunctionType>(Ty);
+ std::stringstream FunctionInnards;
+ FunctionInnards << " (" << NameSoFar << ") (";
+ unsigned Idx = 1;
+ for (FunctionType::param_iterator I = FTy->param_begin(),
+ E = FTy->param_end(); I != E; ++I) {
+ const Type *ArgTy = *I;
+ if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+ assert(isa<PointerType>(ArgTy));
+ ArgTy = cast<PointerType>(ArgTy)->getElementType();
+ }
+ if (I != FTy->param_begin())
+ FunctionInnards << ", ";
+ printType(FunctionInnards, ArgTy,
+ /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), "");
+ ++Idx;
+ }
+ if (FTy->isVarArg()) {
+ if (FTy->getNumParams())
+ FunctionInnards << ", ...";
+ } else if (!FTy->getNumParams()) {
+ FunctionInnards << "void";
+ }
+ FunctionInnards << ')';
+ std::string tstr = FunctionInnards.str();
+ printType(Out, FTy->getReturnType(),
+ /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr);
+ return Out;
+ }
+ case Type::StructTyID: {
+ const StructType *STy = cast<StructType>(Ty);
+ Out << NameSoFar + " {\n";
+ unsigned Idx = 0;
+ for (StructType::element_iterator I = STy->element_begin(),
+ E = STy->element_end(); I != E; ++I) {
+ Out << " ";
+ printType(Out, *I, false, "field" + utostr(Idx++));
+ Out << ";\n";
+ }
+ Out << '}';
+ if (STy->isPacked())
+ Out << " __attribute__ ((packed))";
+ return Out;
+ }
+
+ case Type::PointerTyID: {
+ const PointerType *PTy = cast<PointerType>(Ty);
+ std::string ptrName = "*" + NameSoFar;
+
+ if (isa<ArrayType>(PTy->getElementType()) ||
+ isa<VectorType>(PTy->getElementType()))
+ ptrName = "(" + ptrName + ")";
+
+ if (!PAL.isEmpty())
+ // Must be a function ptr cast!
+ return printType(Out, PTy->getElementType(), false, ptrName, true, PAL);
+ return printType(Out, PTy->getElementType(), false, ptrName);
+ }
+
+ case Type::ArrayTyID: {
+ const ArrayType *ATy = cast<ArrayType>(Ty);
+ unsigned NumElements = ATy->getNumElements();
+ if (NumElements == 0) NumElements = 1;
+ // Arrays are wrapped in structs to allow them to have normal
+ // value semantics (avoiding the array "decay").
+ Out << NameSoFar << " { ";
+ printType(Out, ATy->getElementType(), false,
+ "array[" + utostr(NumElements) + "]");
+ return Out << "; }";
+ }
+
+ case Type::OpaqueTyID: {
+ static int Count = 0;
+ std::string TyName = "struct opaque_" + itostr(Count++);
+ assert(TypeNames.find(Ty) == TypeNames.end());
+ TypeNames[Ty] = TyName;
+ return Out << TyName << ' ' << NameSoFar;
+ }
+ default:
+ assert(0 && "Unhandled case in getTypeProps!");
+ abort();
+ }
+
+ return Out;
+}
+
+void CWriter::printConstantArray(ConstantArray *CPA, bool Static) {
+
+ // As a special case, print the array as a string if it is an array of
+ // ubytes or an array of sbytes with positive values.
+ //
+ const Type *ETy = CPA->getType()->getElementType();
+ bool isString = (ETy == Type::Int8Ty || ETy == Type::Int8Ty);
+
+ // Make sure the last character is a null char, as automatically added by C
+ if (isString && (CPA->getNumOperands() == 0 ||
+ !cast<Constant>(*(CPA->op_end()-1))->isNullValue()))
+ isString = false;
+
+ if (isString) {
+ Out << '\"';
+ // Keep track of whether the last number was a hexadecimal escape
+ bool LastWasHex = false;
+
+ // Do not include the last character, which we know is null
+ for (unsigned i = 0, e = CPA->getNumOperands()-1; i != e; ++i) {
+ unsigned char C = cast<ConstantInt>(CPA->getOperand(i))->getZExtValue();
+
+ // Print it out literally if it is a printable character. The only thing
+ // to be careful about is when the last letter output was a hex escape
+ // code, in which case we have to be careful not to print out hex digits
+ // explicitly (the C compiler thinks it is a continuation of the previous
+ // character, sheesh...)
+ //
+ if (isprint(C) && (!LastWasHex || !isxdigit(C))) {
+ LastWasHex = false;
+ if (C == '"' || C == '\\')
+ Out << "\\" << (char)C;
+ else
+ Out << (char)C;
+ } else {
+ LastWasHex = false;
+ switch (C) {
+ case '\n': Out << "\\n"; break;
+ case '\t': Out << "\\t"; break;
+ case '\r': Out << "\\r"; break;
+ case '\v': Out << "\\v"; break;
+ case '\a': Out << "\\a"; break;
+ case '\"': Out << "\\\""; break;
+ case '\'': Out << "\\\'"; break;
+ default:
+ Out << "\\x";
+ Out << (char)(( C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A'));
+ Out << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
+ LastWasHex = true;
+ break;
+ }
+ }
+ }
+ Out << '\"';
+ } else {
+ Out << '{';
+ if (CPA->getNumOperands()) {
+ Out << ' ';
+ printConstant(cast<Constant>(CPA->getOperand(0)), Static);
+ for (unsigned i = 1, e = CPA->getNumOperands(); i != e; ++i) {
+ Out << ", ";
+ printConstant(cast<Constant>(CPA->getOperand(i)), Static);
+ }
+ }
+ Out << " }";
+ }
+}
+
+void CWriter::printConstantVector(ConstantVector *CP, bool Static) {
+ Out << '{';
+ if (CP->getNumOperands()) {
+ Out << ' ';
+ printConstant(cast<Constant>(CP->getOperand(0)), Static);
+ for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) {
+ Out << ", ";
+ printConstant(cast<Constant>(CP->getOperand(i)), Static);
+ }
+ }
+ Out << " }";
+}
+
+// isFPCSafeToPrint - Returns true if we may assume that CFP may be written out
+// textually as a double (rather than as a reference to a stack-allocated
+// variable). We decide this by converting CFP to a string and back into a
+// double, and then checking whether the conversion results in a bit-equal
+// double to the original value of CFP. This depends on us and the target C
+// compiler agreeing on the conversion process (which is pretty likely since we
+// only deal in IEEE FP).
+//
+static bool isFPCSafeToPrint(const ConstantFP *CFP) {
+ bool ignored;
+ // Do long doubles in hex for now.
+ if (CFP->getType() != Type::FloatTy && CFP->getType() != Type::DoubleTy)
+ return false;
+ APFloat APF = APFloat(CFP->getValueAPF()); // copy
+ if (CFP->getType() == Type::FloatTy)
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
+#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
+ char Buffer[100];
+ sprintf(Buffer, "%a", APF.convertToDouble());
+ if (!strncmp(Buffer, "0x", 2) ||
+ !strncmp(Buffer, "-0x", 3) ||
+ !strncmp(Buffer, "+0x", 3))
+ return APF.bitwiseIsEqual(APFloat(atof(Buffer)));
+ return false;
+#else
+ std::string StrVal = ftostr(APF);
+
+ while (StrVal[0] == ' ')
+ StrVal.erase(StrVal.begin());
+
+ // Check to make sure that the stringized number is not some string like "Inf"
+ // or NaN. Check that the string matches the "[-+]?[0-9]" regex.
+ if ((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+ ((StrVal[0] == '-' || StrVal[0] == '+') &&
+ (StrVal[1] >= '0' && StrVal[1] <= '9')))
+ // Reparse stringized version!
+ return APF.bitwiseIsEqual(APFloat(atof(StrVal.c_str())));
+ return false;
+#endif
+}
+
+/// Print out the casting for a cast operation. This does the double casting
+/// necessary for conversion to the destination type, if necessary.
+/// @brief Print a cast
+void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) {
+ // Print the destination type cast
+ switch (opc) {
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::IntToPtr:
+ case Instruction::Trunc:
+ case Instruction::BitCast:
+ case Instruction::FPExt:
+ case Instruction::FPTrunc: // For these the DstTy sign doesn't matter
+ Out << '(';
+ printType(Out, DstTy);
+ Out << ')';
+ break;
+ case Instruction::ZExt:
+ case Instruction::PtrToInt:
+ case Instruction::FPToUI: // For these, make sure we get an unsigned dest
+ Out << '(';
+ printSimpleType(Out, DstTy, false);
+ Out << ')';
+ break;
+ case Instruction::SExt:
+ case Instruction::FPToSI: // For these, make sure we get a signed dest
+ Out << '(';
+ printSimpleType(Out, DstTy, true);
+ Out << ')';
+ break;
+ default:
+ assert(0 && "Invalid cast opcode");
+ }
+
+ // Print the source type cast
+ switch (opc) {
+ case Instruction::UIToFP:
+ case Instruction::ZExt:
+ Out << '(';
+ printSimpleType(Out, SrcTy, false);
+ Out << ')';
+ break;
+ case Instruction::SIToFP:
+ case Instruction::SExt:
+ Out << '(';
+ printSimpleType(Out, SrcTy, true);
+ Out << ')';
+ break;
+ case Instruction::IntToPtr:
+ case Instruction::PtrToInt:
+ // Avoid "cast to pointer from integer of different size" warnings
+ Out << "(unsigned long)";
+ break;
+ case Instruction::Trunc:
+ case Instruction::BitCast:
+ case Instruction::FPExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPToSI:
+ case Instruction::FPToUI:
+ break; // These don't need a source cast.
+ default:
+ assert(0 && "Invalid cast opcode");
+ break;
+ }
+}
+
+// printConstant - The LLVM Constant to C Constant converter.
+void CWriter::printConstant(Constant *CPV, bool Static) {
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) {
+ switch (CE->getOpcode()) {
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ Out << "(";
+ printCast(CE->getOpcode(), CE->getOperand(0)->getType(), CE->getType());
+ if (CE->getOpcode() == Instruction::SExt &&
+ CE->getOperand(0)->getType() == Type::Int1Ty) {
+ // Make sure we really sext from bool here by subtracting from 0
+ Out << "0-";
+ }
+ printConstant(CE->getOperand(0), Static);
+ if (CE->getType() == Type::Int1Ty &&
+ (CE->getOpcode() == Instruction::Trunc ||
+ CE->getOpcode() == Instruction::FPToUI ||
+ CE->getOpcode() == Instruction::FPToSI ||
+ CE->getOpcode() == Instruction::PtrToInt)) {
+ // Make sure we really truncate to bool here by anding with 1
+ Out << "&1u";
+ }
+ Out << ')';
+ return;
+
+ case Instruction::GetElementPtr:
+ Out << "(";
+ printGEPExpression(CE->getOperand(0), gep_type_begin(CPV),
+ gep_type_end(CPV), Static);
+ Out << ")";
+ return;
+ case Instruction::Select:
+ Out << '(';
+ printConstant(CE->getOperand(0), Static);
+ Out << '?';
+ printConstant(CE->getOperand(1), Static);
+ Out << ':';
+ printConstant(CE->getOperand(2), Static);
+ Out << ')';
+ return;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::ICmp:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ {
+ Out << '(';
+ bool NeedsClosingParens = printConstExprCast(CE, Static);
+ printConstantWithCast(CE->getOperand(0), CE->getOpcode());
+ switch (CE->getOpcode()) {
+ case Instruction::Add: Out << " + "; break;
+ case Instruction::Sub: Out << " - "; break;
+ case Instruction::Mul: Out << " * "; break;
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem: Out << " % "; break;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv: Out << " / "; break;
+ case Instruction::And: Out << " & "; break;
+ case Instruction::Or: Out << " | "; break;
+ case Instruction::Xor: Out << " ^ "; break;
+ case Instruction::Shl: Out << " << "; break;
+ case Instruction::LShr:
+ case Instruction::AShr: Out << " >> "; break;
+ case Instruction::ICmp:
+ switch (CE->getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << " == "; break;
+ case ICmpInst::ICMP_NE: Out << " != "; break;
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_ULT: Out << " < "; break;
+ case ICmpInst::ICMP_SLE:
+ case ICmpInst::ICMP_ULE: Out << " <= "; break;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_UGT: Out << " > "; break;
+ case ICmpInst::ICMP_SGE:
+ case ICmpInst::ICMP_UGE: Out << " >= "; break;
+ default: assert(0 && "Illegal ICmp predicate");
+ }
+ break;
+ default: assert(0 && "Illegal opcode here!");
+ }
+ printConstantWithCast(CE->getOperand(1), CE->getOpcode());
+ if (NeedsClosingParens)
+ Out << "))";
+ Out << ')';
+ return;
+ }
+ case Instruction::FCmp: {
+ Out << '(';
+ bool NeedsClosingParens = printConstExprCast(CE, Static);
+ if (CE->getPredicate() == FCmpInst::FCMP_FALSE)
+ Out << "0";
+ else if (CE->getPredicate() == FCmpInst::FCMP_TRUE)
+ Out << "1";
+ else {
+ const char* op = 0;
+ switch (CE->getPredicate()) {
+ default: assert(0 && "Illegal FCmp predicate");
+ case FCmpInst::FCMP_ORD: op = "ord"; break;
+ case FCmpInst::FCMP_UNO: op = "uno"; break;
+ case FCmpInst::FCMP_UEQ: op = "ueq"; break;
+ case FCmpInst::FCMP_UNE: op = "une"; break;
+ case FCmpInst::FCMP_ULT: op = "ult"; break;
+ case FCmpInst::FCMP_ULE: op = "ule"; break;
+ case FCmpInst::FCMP_UGT: op = "ugt"; break;
+ case FCmpInst::FCMP_UGE: op = "uge"; break;
+ case FCmpInst::FCMP_OEQ: op = "oeq"; break;
+ case FCmpInst::FCMP_ONE: op = "one"; break;
+ case FCmpInst::FCMP_OLT: op = "olt"; break;
+ case FCmpInst::FCMP_OLE: op = "ole"; break;
+ case FCmpInst::FCMP_OGT: op = "ogt"; break;
+ case FCmpInst::FCMP_OGE: op = "oge"; break;
+ }
+ Out << "llvm_fcmp_" << op << "(";
+ printConstantWithCast(CE->getOperand(0), CE->getOpcode());
+ Out << ", ";
+ printConstantWithCast(CE->getOperand(1), CE->getOpcode());
+ Out << ")";
+ }
+ if (NeedsClosingParens)
+ Out << "))";
+ Out << ')';
+ return;
+ }
+ default:
+ cerr << "CWriter Error: Unhandled constant expression: "
+ << *CE << "\n";
+ abort();
+ }
+ } else if (isa<UndefValue>(CPV) && CPV->getType()->isSingleValueType()) {
+ Out << "((";
+ printType(Out, CPV->getType()); // sign doesn't matter
+ Out << ")/*UNDEF*/";
+ if (!isa<VectorType>(CPV->getType())) {
+ Out << "0)";
+ } else {
+ Out << "{})";
+ }
+ return;
+ }
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
+ const Type* Ty = CI->getType();
+ if (Ty == Type::Int1Ty)
+ Out << (CI->getZExtValue() ? '1' : '0');
+ else if (Ty == Type::Int32Ty)
+ Out << CI->getZExtValue() << 'u';
+ else if (Ty->getPrimitiveSizeInBits() > 32)
+ Out << CI->getZExtValue() << "ull";
+ else {
+ Out << "((";
+ printSimpleType(Out, Ty, false) << ')';
+ if (CI->isMinValue(true))
+ Out << CI->getZExtValue() << 'u';
+ else
+ Out << CI->getSExtValue();
+ Out << ')';
+ }
+ return;
+ }
+
+ switch (CPV->getType()->getTypeID()) {
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ case Type::X86_FP80TyID:
+ case Type::PPC_FP128TyID:
+ case Type::FP128TyID: {
+ ConstantFP *FPC = cast<ConstantFP>(CPV);
+ std::map<const ConstantFP*, unsigned>::iterator I = FPConstantMap.find(FPC);
+ if (I != FPConstantMap.end()) {
+ // Because of FP precision problems we must load from a stack allocated
+ // value that holds the value in hex.
+ Out << "(*(" << (FPC->getType() == Type::FloatTy ? "float" :
+ FPC->getType() == Type::DoubleTy ? "double" :
+ "long double")
+ << "*)&FPConstant" << I->second << ')';
+ } else {
+ double V;
+ if (FPC->getType() == Type::FloatTy)
+ V = FPC->getValueAPF().convertToFloat();
+ else if (FPC->getType() == Type::DoubleTy)
+ V = FPC->getValueAPF().convertToDouble();
+ else {
+ // Long double. Convert the number to double, discarding precision.
+ // This is not awesome, but it at least makes the CBE output somewhat
+ // useful.
+ APFloat Tmp = FPC->getValueAPF();
+ bool LosesInfo;
+ Tmp.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &LosesInfo);
+ V = Tmp.convertToDouble();
+ }
+
+ if (IsNAN(V)) {
+ // The value is NaN
+
+ // FIXME the actual NaN bits should be emitted.
+ // The prefix for a quiet NaN is 0x7FF8. For a signalling NaN,
+ // it's 0x7ff4.
+ const unsigned long QuietNaN = 0x7ff8UL;
+ //const unsigned long SignalNaN = 0x7ff4UL;
+
+ // We need to grab the first part of the FP #
+ char Buffer[100];
+
+ uint64_t ll = DoubleToBits(V);
+ sprintf(Buffer, "0x%llx", static_cast<long long>(ll));
+
+ std::string Num(&Buffer[0], &Buffer[6]);
+ unsigned long Val = strtoul(Num.c_str(), 0, 16);
+
+ if (FPC->getType() == Type::FloatTy)
+ Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "F(\""
+ << Buffer << "\") /*nan*/ ";
+ else
+ Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "(\""
+ << Buffer << "\") /*nan*/ ";
+ } else if (IsInf(V)) {
+ // The value is Inf
+ if (V < 0) Out << '-';
+ Out << "LLVM_INF" << (FPC->getType() == Type::FloatTy ? "F" : "")
+ << " /*inf*/ ";
+ } else {
+ std::string Num;
+#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
+ // Print out the constant as a floating point number.
+ char Buffer[100];
+ sprintf(Buffer, "%a", V);
+ Num = Buffer;
+#else
+ Num = ftostr(FPC->getValueAPF());
+#endif
+ Out << Num;
+ }
+ }
+ break;
+ }
+
+ case Type::ArrayTyID:
+ // Use C99 compound expression literal initializer syntax.
+ if (!Static) {
+ Out << "(";
+ printType(Out, CPV->getType());
+ Out << ")";
+ }
+ Out << "{ "; // Arrays are wrapped in struct types.
+ if (ConstantArray *CA = dyn_cast<ConstantArray>(CPV)) {
+ printConstantArray(CA, Static);
+ } else {
+ assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
+ const ArrayType *AT = cast<ArrayType>(CPV->getType());
+ Out << '{';
+ if (AT->getNumElements()) {
+ Out << ' ';
+ Constant *CZ = Constant::getNullValue(AT->getElementType());
+ printConstant(CZ, Static);
+ for (unsigned i = 1, e = AT->getNumElements(); i != e; ++i) {
+ Out << ", ";
+ printConstant(CZ, Static);
+ }
+ }
+ Out << " }";
+ }
+ Out << " }"; // Arrays are wrapped in struct types.
+ break;
+
+ case Type::VectorTyID:
+ // Use C99 compound expression literal initializer syntax.
+ if (!Static) {
+ Out << "(";
+ printType(Out, CPV->getType());
+ Out << ")";
+ }
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(CPV)) {
+ printConstantVector(CV, Static);
+ } else {
+ assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
+ const VectorType *VT = cast<VectorType>(CPV->getType());
+ Out << "{ ";
+ Constant *CZ = Constant::getNullValue(VT->getElementType());
+ printConstant(CZ, Static);
+ for (unsigned i = 1, e = VT->getNumElements(); i != e; ++i) {
+ Out << ", ";
+ printConstant(CZ, Static);
+ }
+ Out << " }";
+ }
+ break;
+
+ case Type::StructTyID:
+ // Use C99 compound expression literal initializer syntax.
+ if (!Static) {
+ Out << "(";
+ printType(Out, CPV->getType());
+ Out << ")";
+ }
+ if (isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)) {
+ const StructType *ST = cast<StructType>(CPV->getType());
+ Out << '{';
+ if (ST->getNumElements()) {
+ Out << ' ';
+ printConstant(Constant::getNullValue(ST->getElementType(0)), Static);
+ for (unsigned i = 1, e = ST->getNumElements(); i != e; ++i) {
+ Out << ", ";
+ printConstant(Constant::getNullValue(ST->getElementType(i)), Static);
+ }
+ }
+ Out << " }";
+ } else {
+ Out << '{';
+ if (CPV->getNumOperands()) {
+ Out << ' ';
+ printConstant(cast<Constant>(CPV->getOperand(0)), Static);
+ for (unsigned i = 1, e = CPV->getNumOperands(); i != e; ++i) {
+ Out << ", ";
+ printConstant(cast<Constant>(CPV->getOperand(i)), Static);
+ }
+ }
+ Out << " }";
+ }
+ break;
+
+ case Type::PointerTyID:
+ if (isa<ConstantPointerNull>(CPV)) {
+ Out << "((";
+ printType(Out, CPV->getType()); // sign doesn't matter
+ Out << ")/*NULL*/0)";
+ break;
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(CPV)) {
+ writeOperand(GV, Static);
+ break;
+ }
+ // FALL THROUGH
+ default:
+ cerr << "Unknown constant type: " << *CPV << "\n";
+ abort();
+ }
+}
+
+// Some constant expressions need to be casted back to the original types
+// because their operands were casted to the expected type. This function takes
+// care of detecting that case and printing the cast for the ConstantExpr.
+bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) {
+ bool NeedsExplicitCast = false;
+ const Type *Ty = CE->getOperand(0)->getType();
+ bool TypeIsSigned = false;
+ switch (CE->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // We need to cast integer arithmetic so that it is always performed
+ // as unsigned, to avoid undefined behavior on overflow.
+ if (!Ty->isIntOrIntVector()) break;
+ // FALL THROUGH
+ case Instruction::LShr:
+ case Instruction::URem:
+ case Instruction::UDiv: NeedsExplicitCast = true; break;
+ case Instruction::AShr:
+ case Instruction::SRem:
+ case Instruction::SDiv: NeedsExplicitCast = true; TypeIsSigned = true; break;
+ case Instruction::SExt:
+ Ty = CE->getType();
+ NeedsExplicitCast = true;
+ TypeIsSigned = true;
+ break;
+ case Instruction::ZExt:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ Ty = CE->getType();
+ NeedsExplicitCast = true;
+ break;
+ default: break;
+ }
+ if (NeedsExplicitCast) {
+ Out << "((";
+ if (Ty->isInteger() && Ty != Type::Int1Ty)
+ printSimpleType(Out, Ty, TypeIsSigned);
+ else
+ printType(Out, Ty); // not integer, sign doesn't matter
+ Out << ")(";
+ }
+ return NeedsExplicitCast;
+}
+
+// Print a constant assuming that it is the operand for a given Opcode. The
+// opcodes that care about sign need to cast their operands to the expected
+// type before the operation proceeds. This function does the casting.
+void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) {
+
+ // Extract the operand's type, we'll need it.
+ const Type* OpTy = CPV->getType();
+
+ // Indicate whether to do the cast or not.
+ bool shouldCast = false;
+ bool typeIsSigned = false;
+
+ // Based on the Opcode for which this Constant is being written, determine
+ // the new type to which the operand should be casted by setting the value
+ // of OpTy. If we change OpTy, also set shouldCast to true so it gets
+ // casted below.
+ switch (Opcode) {
+ default:
+ // for most instructions, it doesn't matter
+ break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // We need to cast integer arithmetic so that it is always performed
+ // as unsigned, to avoid undefined behavior on overflow.
+ if (!OpTy->isIntOrIntVector()) break;
+ // FALL THROUGH
+ case Instruction::LShr:
+ case Instruction::UDiv:
+ case Instruction::URem:
+ shouldCast = true;
+ break;
+ case Instruction::AShr:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ shouldCast = true;
+ typeIsSigned = true;
+ break;
+ }
+
+ // Write out the casted constant if we should, otherwise just write the
+ // operand.
+ if (shouldCast) {
+ Out << "((";
+ printSimpleType(Out, OpTy, typeIsSigned);
+ Out << ")";
+ printConstant(CPV, false);
+ Out << ")";
+ } else
+ printConstant(CPV, false);
+}
+
+std::string CWriter::GetValueName(const Value *Operand) {
+ std::string Name;
+
+ if (!isa<GlobalValue>(Operand) && Operand->getName() != "") {
+ std::string VarName;
+
+ Name = Operand->getName();
+ VarName.reserve(Name.capacity());
+
+ for (std::string::iterator I = Name.begin(), E = Name.end();
+ I != E; ++I) {
+ char ch = *I;
+
+ if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
+ (ch >= '0' && ch <= '9') || ch == '_')) {
+ char buffer[5];
+ sprintf(buffer, "_%x_", ch);
+ VarName += buffer;
+ } else
+ VarName += ch;
+ }
+
+ Name = "llvm_cbe_" + VarName;
+ } else {
+ Name = Mang->getValueName(Operand);
+ }
+
+ return Name;
+}
+
+/// writeInstComputationInline - Emit the computation for the specified
+/// instruction inline, with no destination provided.
+void CWriter::writeInstComputationInline(Instruction &I) {
+ // If this is a non-trivial bool computation, make sure to truncate down to
+ // a 1 bit value. This is important because we want "add i1 x, y" to return
+ // "0" when x and y are true, not "2" for example.
+ bool NeedBoolTrunc = false;
+ if (I.getType() == Type::Int1Ty && !isa<ICmpInst>(I) && !isa<FCmpInst>(I))
+ NeedBoolTrunc = true;
+
+ if (NeedBoolTrunc)
+ Out << "((";
+
+ visit(I);
+
+ if (NeedBoolTrunc)
+ Out << ")&1)";
+}
+
+
+void CWriter::writeOperandInternal(Value *Operand, bool Static) {
+ if (Instruction *I = dyn_cast<Instruction>(Operand))
+ // Should we inline this instruction to build a tree?
+ if (isInlinableInst(*I) && !isDirectAlloca(I)) {
+ Out << '(';
+ writeInstComputationInline(*I);
+ Out << ')';
+ return;
+ }
+
+ Constant* CPV = dyn_cast<Constant>(Operand);
+
+ if (CPV && !isa<GlobalValue>(CPV))
+ printConstant(CPV, Static);
+ else
+ Out << GetValueName(Operand);
+}
+
+void CWriter::writeOperand(Value *Operand, bool Static) {
+ bool isAddressImplicit = isAddressExposed(Operand);
+ if (isAddressImplicit)
+ Out << "(&"; // Global variables are referenced as their addresses by llvm
+
+ writeOperandInternal(Operand, Static);
+
+ if (isAddressImplicit)
+ Out << ')';
+}
+
+// Some instructions need to have their result value casted back to the
+// original types because their operands were casted to the expected type.
+// This function takes care of detecting that case and printing the cast
+// for the Instruction.
+bool CWriter::writeInstructionCast(const Instruction &I) {
+ const Type *Ty = I.getOperand(0)->getType();
+ switch (I.getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // We need to cast integer arithmetic so that it is always performed
+ // as unsigned, to avoid undefined behavior on overflow.
+ if (!Ty->isIntOrIntVector()) break;
+ // FALL THROUGH
+ case Instruction::LShr:
+ case Instruction::URem:
+ case Instruction::UDiv:
+ Out << "((";
+ printSimpleType(Out, Ty, false);
+ Out << ")(";
+ return true;
+ case Instruction::AShr:
+ case Instruction::SRem:
+ case Instruction::SDiv:
+ Out << "((";
+ printSimpleType(Out, Ty, true);
+ Out << ")(";
+ return true;
+ default: break;
+ }
+ return false;
+}
+
+// Write the operand with a cast to another type based on the Opcode being used.
+// This will be used in cases where an instruction has specific type
+// requirements (usually signedness) for its operands.
+void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) {
+
+ // Extract the operand's type, we'll need it.
+ const Type* OpTy = Operand->getType();
+
+ // Indicate whether to do the cast or not.
+ bool shouldCast = false;
+
+ // Indicate whether the cast should be to a signed type or not.
+ bool castIsSigned = false;
+
+ // Based on the Opcode for which this Operand is being written, determine
+ // the new type to which the operand should be casted by setting the value
+ // of OpTy. If we change OpTy, also set shouldCast to true.
+ switch (Opcode) {
+ default:
+ // for most instructions, it doesn't matter
+ break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // We need to cast integer arithmetic so that it is always performed
+ // as unsigned, to avoid undefined behavior on overflow.
+ if (!OpTy->isIntOrIntVector()) break;
+ // FALL THROUGH
+ case Instruction::LShr:
+ case Instruction::UDiv:
+ case Instruction::URem: // Cast to unsigned first
+ shouldCast = true;
+ castIsSigned = false;
+ break;
+ case Instruction::GetElementPtr:
+ case Instruction::AShr:
+ case Instruction::SDiv:
+ case Instruction::SRem: // Cast to signed first
+ shouldCast = true;
+ castIsSigned = true;
+ break;
+ }
+
+ // Write out the casted operand if we should, otherwise just write the
+ // operand.
+ if (shouldCast) {
+ Out << "((";
+ printSimpleType(Out, OpTy, castIsSigned);
+ Out << ")";
+ writeOperand(Operand);
+ Out << ")";
+ } else
+ writeOperand(Operand);
+}
+
+// Write the operand with a cast to another type based on the icmp predicate
+// being used.
+void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) {
+ // This has to do a cast to ensure the operand has the right signedness.
+ // Also, if the operand is a pointer, we make sure to cast to an integer when
+ // doing the comparison both for signedness and so that the C compiler doesn't
+ // optimize things like "p < NULL" to false (p may contain an integer value
+ // f.e.).
+ bool shouldCast = Cmp.isRelational();
+
+ // Write out the casted operand if we should, otherwise just write the
+ // operand.
+ if (!shouldCast) {
+ writeOperand(Operand);
+ return;
+ }
+
+ // Should this be a signed comparison? If so, convert to signed.
+ bool castIsSigned = Cmp.isSignedPredicate();
+
+ // If the operand was a pointer, convert to a large integer type.
+ const Type* OpTy = Operand->getType();
+ if (isa<PointerType>(OpTy))
+ OpTy = TD->getIntPtrType();
+
+ Out << "((";
+ printSimpleType(Out, OpTy, castIsSigned);
+ Out << ")";
+ writeOperand(Operand);
+ Out << ")";
+}
+
+// generateCompilerSpecificCode - This is where we add conditional compilation
+// directives to cater to specific compilers as need be.
+//
+static void generateCompilerSpecificCode(raw_ostream& Out,
+ const TargetData *TD) {
+ // Alloca is hard to get, and we don't want to include stdlib.h here.
+ Out << "/* get a declaration for alloca */\n"
+ << "#if defined(__CYGWIN__) || defined(__MINGW32__)\n"
+ << "#define alloca(x) __builtin_alloca((x))\n"
+ << "#define _alloca(x) __builtin_alloca((x))\n"
+ << "#elif defined(__APPLE__)\n"
+ << "extern void *__builtin_alloca(unsigned long);\n"
+ << "#define alloca(x) __builtin_alloca(x)\n"
+ << "#define longjmp _longjmp\n"
+ << "#define setjmp _setjmp\n"
+ << "#elif defined(__sun__)\n"
+ << "#if defined(__sparcv9)\n"
+ << "extern void *__builtin_alloca(unsigned long);\n"
+ << "#else\n"
+ << "extern void *__builtin_alloca(unsigned int);\n"
+ << "#endif\n"
+ << "#define alloca(x) __builtin_alloca(x)\n"
+ << "#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)\n"
+ << "#define alloca(x) __builtin_alloca(x)\n"
+ << "#elif defined(_MSC_VER)\n"
+ << "#define inline _inline\n"
+ << "#define alloca(x) _alloca(x)\n"
+ << "#else\n"
+ << "#include <alloca.h>\n"
+ << "#endif\n\n";
+
+ // We output GCC specific attributes to preserve 'linkonce'ness on globals.
+ // If we aren't being compiled with GCC, just drop these attributes.
+ Out << "#ifndef __GNUC__ /* Can only support \"linkonce\" vars with GCC */\n"
+ << "#define __attribute__(X)\n"
+ << "#endif\n\n";
+
+ // On Mac OS X, "external weak" is spelled "__attribute__((weak_import))".
+ Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n"
+ << "#define __EXTERNAL_WEAK__ __attribute__((weak_import))\n"
+ << "#elif defined(__GNUC__)\n"
+ << "#define __EXTERNAL_WEAK__ __attribute__((weak))\n"
+ << "#else\n"
+ << "#define __EXTERNAL_WEAK__\n"
+ << "#endif\n\n";
+
+ // For now, turn off the weak linkage attribute on Mac OS X. (See above.)
+ Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n"
+ << "#define __ATTRIBUTE_WEAK__\n"
+ << "#elif defined(__GNUC__)\n"
+ << "#define __ATTRIBUTE_WEAK__ __attribute__((weak))\n"
+ << "#else\n"
+ << "#define __ATTRIBUTE_WEAK__\n"
+ << "#endif\n\n";
+
+ // Add hidden visibility support. FIXME: APPLE_CC?
+ Out << "#if defined(__GNUC__)\n"
+ << "#define __HIDDEN__ __attribute__((visibility(\"hidden\")))\n"
+ << "#endif\n\n";
+
+ // Define NaN and Inf as GCC builtins if using GCC, as 0 otherwise
+ // From the GCC documentation:
+ //
+ // double __builtin_nan (const char *str)
+ //
+ // This is an implementation of the ISO C99 function nan.
+ //
+ // Since ISO C99 defines this function in terms of strtod, which we do
+ // not implement, a description of the parsing is in order. The string is
+ // parsed as by strtol; that is, the base is recognized by leading 0 or
+ // 0x prefixes. The number parsed is placed in the significand such that
+ // the least significant bit of the number is at the least significant
+ // bit of the significand. The number is truncated to fit the significand
+ // field provided. The significand is forced to be a quiet NaN.
+ //
+ // This function, if given a string literal, is evaluated early enough
+ // that it is considered a compile-time constant.
+ //
+ // float __builtin_nanf (const char *str)
+ //
+ // Similar to __builtin_nan, except the return type is float.
+ //
+ // double __builtin_inf (void)
+ //
+ // Similar to __builtin_huge_val, except a warning is generated if the
+ // target floating-point format does not support infinities. This
+ // function is suitable for implementing the ISO C99 macro INFINITY.
+ //
+ // float __builtin_inff (void)
+ //
+ // Similar to __builtin_inf, except the return type is float.
+ Out << "#ifdef __GNUC__\n"
+ << "#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */\n"
+ << "#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */\n"
+ << "#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */\n"
+ << "#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */\n"
+ << "#define LLVM_INF __builtin_inf() /* Double */\n"
+ << "#define LLVM_INFF __builtin_inff() /* Float */\n"
+ << "#define LLVM_PREFETCH(addr,rw,locality) "
+ "__builtin_prefetch(addr,rw,locality)\n"
+ << "#define __ATTRIBUTE_CTOR__ __attribute__((constructor))\n"
+ << "#define __ATTRIBUTE_DTOR__ __attribute__((destructor))\n"
+ << "#define LLVM_ASM __asm__\n"
+ << "#else\n"
+ << "#define LLVM_NAN(NanStr) ((double)0.0) /* Double */\n"
+ << "#define LLVM_NANF(NanStr) 0.0F /* Float */\n"
+ << "#define LLVM_NANS(NanStr) ((double)0.0) /* Double */\n"
+ << "#define LLVM_NANSF(NanStr) 0.0F /* Float */\n"
+ << "#define LLVM_INF ((double)0.0) /* Double */\n"
+ << "#define LLVM_INFF 0.0F /* Float */\n"
+ << "#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */\n"
+ << "#define __ATTRIBUTE_CTOR__\n"
+ << "#define __ATTRIBUTE_DTOR__\n"
+ << "#define LLVM_ASM(X)\n"
+ << "#endif\n\n";
+
+ Out << "#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ \n"
+ << "#define __builtin_stack_save() 0 /* not implemented */\n"
+ << "#define __builtin_stack_restore(X) /* noop */\n"
+ << "#endif\n\n";
+
+ // Output typedefs for 128-bit integers. If these are needed with a
+ // 32-bit target or with a C compiler that doesn't support mode(TI),
+ // more drastic measures will be needed.
+ Out << "#if __GNUC__ && __LP64__ /* 128-bit integer types */\n"
+ << "typedef int __attribute__((mode(TI))) llvmInt128;\n"
+ << "typedef unsigned __attribute__((mode(TI))) llvmUInt128;\n"
+ << "#endif\n\n";
+
+ // Output target-specific code that should be inserted into main.
+ Out << "#define CODE_FOR_MAIN() /* Any target-specific code for main()*/\n";
+}
+
+/// FindStaticTors - Given a static ctor/dtor list, unpack its contents into
+/// the StaticTors set.
+static void FindStaticTors(GlobalVariable *GV, std::set<Function*> &StaticTors){
+ ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (!InitList) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
+ if (CS->getNumOperands() != 2) return; // Not array of 2-element structs.
+
+ if (CS->getOperand(1)->isNullValue())
+ return; // Found a null terminator, exit printing.
+ Constant *FP = CS->getOperand(1);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
+ if (CE->isCast())
+ FP = CE->getOperand(0);
+ if (Function *F = dyn_cast<Function>(FP))
+ StaticTors.insert(F);
+ }
+}
+
+enum SpecialGlobalClass {
+ NotSpecial = 0,
+ GlobalCtors, GlobalDtors,
+ NotPrinted
+};
+
+/// getGlobalVariableClass - If this is a global that is specially recognized
+/// by LLVM, return a code that indicates how we should handle it.
+static SpecialGlobalClass getGlobalVariableClass(const GlobalVariable *GV) {
+ // If this is a global ctors/dtors list, handle it now.
+ if (GV->hasAppendingLinkage() && GV->use_empty()) {
+ if (GV->getName() == "llvm.global_ctors")
+ return GlobalCtors;
+ else if (GV->getName() == "llvm.global_dtors")
+ return GlobalDtors;
+ }
+
+ // Otherwise, it it is other metadata, don't print it. This catches things
+ // like debug information.
+ if (GV->getSection() == "llvm.metadata")
+ return NotPrinted;
+
+ return NotSpecial;
+}
+
+
+bool CWriter::doInitialization(Module &M) {
+ // Initialize
+ TheModule = &M;
+
+ TD = new TargetData(&M);
+ IL = new IntrinsicLowering(*TD);
+ IL->AddPrototypes(M);
+
+ // Ensure that all structure types have names...
+ Mang = new Mangler(M);
+ Mang->markCharUnacceptable('.');
+
+ // Keep track of which functions are static ctors/dtors so they can have
+ // an attribute added to their prototypes.
+ std::set<Function*> StaticCtors, StaticDtors;
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ switch (getGlobalVariableClass(I)) {
+ default: break;
+ case GlobalCtors:
+ FindStaticTors(I, StaticCtors);
+ break;
+ case GlobalDtors:
+ FindStaticTors(I, StaticDtors);
+ break;
+ }
+ }
+
+ // get declaration for alloca
+ Out << "/* Provide Declarations */\n";
+ Out << "#include <stdarg.h>\n"; // Varargs support
+ Out << "#include <setjmp.h>\n"; // Unwind support
+ generateCompilerSpecificCode(Out, TD);
+
+ // Provide a definition for `bool' if not compiling with a C++ compiler.
+ Out << "\n"
+ << "#ifndef __cplusplus\ntypedef unsigned char bool;\n#endif\n"
+
+ << "\n\n/* Support for floating point constants */\n"
+ << "typedef unsigned long long ConstantDoubleTy;\n"
+ << "typedef unsigned int ConstantFloatTy;\n"
+ << "typedef struct { unsigned long long f1; unsigned short f2; "
+ "unsigned short pad[3]; } ConstantFP80Ty;\n"
+ // This is used for both kinds of 128-bit long double; meaning differs.
+ << "typedef struct { unsigned long long f1; unsigned long long f2; }"
+ " ConstantFP128Ty;\n"
+ << "\n\n/* Global Declarations */\n";
+
+ // First output all the declarations for the program, because C requires
+ // Functions & globals to be declared before they are used.
+ //
+
+ // Loop over the symbol table, emitting all named constants...
+ printModuleTypes(M.getTypeSymbolTable());
+
+ // Global variable declarations...
+ if (!M.global_empty()) {
+ Out << "\n/* External Global Variable Declarations */\n";
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+
+ if (I->hasExternalLinkage() || I->hasExternalWeakLinkage() ||
+ I->hasCommonLinkage())
+ Out << "extern ";
+ else if (I->hasDLLImportLinkage())
+ Out << "__declspec(dllimport) ";
+ else
+ continue; // Internal Global
+
+ // Thread Local Storage
+ if (I->isThreadLocal())
+ Out << "__thread ";
+
+ printType(Out, I->getType()->getElementType(), false, GetValueName(I));
+
+ if (I->hasExternalWeakLinkage())
+ Out << " __EXTERNAL_WEAK__";
+ Out << ";\n";
+ }
+ }
+
+ // Function declarations
+ Out << "\n/* Function Declarations */\n";
+ Out << "double fmod(double, double);\n"; // Support for FP rem
+ Out << "float fmodf(float, float);\n";
+ Out << "long double fmodl(long double, long double);\n";
+
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ // Don't print declarations for intrinsic functions.
+ if (!I->isIntrinsic() && I->getName() != "setjmp" &&
+ I->getName() != "longjmp" && I->getName() != "_setjmp") {
+ if (I->hasExternalWeakLinkage())
+ Out << "extern ";
+ printFunctionSignature(I, true);
+ if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
+ Out << " __ATTRIBUTE_WEAK__";
+ if (I->hasExternalWeakLinkage())
+ Out << " __EXTERNAL_WEAK__";
+ if (StaticCtors.count(I))
+ Out << " __ATTRIBUTE_CTOR__";
+ if (StaticDtors.count(I))
+ Out << " __ATTRIBUTE_DTOR__";
+ if (I->hasHiddenVisibility())
+ Out << " __HIDDEN__";
+
+ if (I->hasName() && I->getName()[0] == 1)
+ Out << " LLVM_ASM(\"" << I->getName().c_str()+1 << "\")";
+
+ Out << ";\n";
+ }
+ }
+
+ // Output the global variable declarations
+ if (!M.global_empty()) {
+ Out << "\n\n/* Global Variable Declarations */\n";
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (!I->isDeclaration()) {
+ // Ignore special globals, such as debug info.
+ if (getGlobalVariableClass(I))
+ continue;
+
+ if (I->hasLocalLinkage())
+ Out << "static ";
+ else
+ Out << "extern ";
+
+ // Thread Local Storage
+ if (I->isThreadLocal())
+ Out << "__thread ";
+
+ printType(Out, I->getType()->getElementType(), false,
+ GetValueName(I));
+
+ if (I->hasLinkOnceLinkage())
+ Out << " __attribute__((common))";
+ else if (I->hasCommonLinkage()) // FIXME is this right?
+ Out << " __ATTRIBUTE_WEAK__";
+ else if (I->hasWeakLinkage())
+ Out << " __ATTRIBUTE_WEAK__";
+ else if (I->hasExternalWeakLinkage())
+ Out << " __EXTERNAL_WEAK__";
+ if (I->hasHiddenVisibility())
+ Out << " __HIDDEN__";
+ Out << ";\n";
+ }
+ }
+
+ // Output the global variable definitions and contents...
+ if (!M.global_empty()) {
+ Out << "\n\n/* Global Variable Definitions and Initialization */\n";
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (!I->isDeclaration()) {
+ // Ignore special globals, such as debug info.
+ if (getGlobalVariableClass(I))
+ continue;
+
+ if (I->hasLocalLinkage())
+ Out << "static ";
+ else if (I->hasDLLImportLinkage())
+ Out << "__declspec(dllimport) ";
+ else if (I->hasDLLExportLinkage())
+ Out << "__declspec(dllexport) ";
+
+ // Thread Local Storage
+ if (I->isThreadLocal())
+ Out << "__thread ";
+
+ printType(Out, I->getType()->getElementType(), false,
+ GetValueName(I));
+ if (I->hasLinkOnceLinkage())
+ Out << " __attribute__((common))";
+ else if (I->hasWeakLinkage())
+ Out << " __ATTRIBUTE_WEAK__";
+ else if (I->hasCommonLinkage())
+ Out << " __ATTRIBUTE_WEAK__";
+
+ if (I->hasHiddenVisibility())
+ Out << " __HIDDEN__";
+
+ // If the initializer is not null, emit the initializer. If it is null,
+ // we try to avoid emitting large amounts of zeros. The problem with
+ // this, however, occurs when the variable has weak linkage. In this
+ // case, the assembler will complain about the variable being both weak
+ // and common, so we disable this optimization.
+ // FIXME common linkage should avoid this problem.
+ if (!I->getInitializer()->isNullValue()) {
+ Out << " = " ;
+ writeOperand(I->getInitializer(), true);
+ } else if (I->hasWeakLinkage()) {
+ // We have to specify an initializer, but it doesn't have to be
+ // complete. If the value is an aggregate, print out { 0 }, and let
+ // the compiler figure out the rest of the zeros.
+ Out << " = " ;
+ if (isa<StructType>(I->getInitializer()->getType()) ||
+ isa<VectorType>(I->getInitializer()->getType())) {
+ Out << "{ 0 }";
+ } else if (isa<ArrayType>(I->getInitializer()->getType())) {
+ // As with structs and vectors, but with an extra set of braces
+ // because arrays are wrapped in structs.
+ Out << "{ { 0 } }";
+ } else {
+ // Just print it out normally.
+ writeOperand(I->getInitializer(), true);
+ }
+ }
+ Out << ";\n";
+ }
+ }
+
+ if (!M.empty())
+ Out << "\n\n/* Function Bodies */\n";
+
+ // Emit some helper functions for dealing with FCMP instruction's
+ // predicates
+ Out << "static inline int llvm_fcmp_ord(double X, double Y) { ";
+ Out << "return X == X && Y == Y; }\n";
+ Out << "static inline int llvm_fcmp_uno(double X, double Y) { ";
+ Out << "return X != X || Y != Y; }\n";
+ Out << "static inline int llvm_fcmp_ueq(double X, double Y) { ";
+ Out << "return X == Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_une(double X, double Y) { ";
+ Out << "return X != Y; }\n";
+ Out << "static inline int llvm_fcmp_ult(double X, double Y) { ";
+ Out << "return X < Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_ugt(double X, double Y) { ";
+ Out << "return X > Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_ule(double X, double Y) { ";
+ Out << "return X <= Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_uge(double X, double Y) { ";
+ Out << "return X >= Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_oeq(double X, double Y) { ";
+ Out << "return X == Y ; }\n";
+ Out << "static inline int llvm_fcmp_one(double X, double Y) { ";
+ Out << "return X != Y && llvm_fcmp_ord(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_olt(double X, double Y) { ";
+ Out << "return X < Y ; }\n";
+ Out << "static inline int llvm_fcmp_ogt(double X, double Y) { ";
+ Out << "return X > Y ; }\n";
+ Out << "static inline int llvm_fcmp_ole(double X, double Y) { ";
+ Out << "return X <= Y ; }\n";
+ Out << "static inline int llvm_fcmp_oge(double X, double Y) { ";
+ Out << "return X >= Y ; }\n";
+ return false;
+}
+
+
+/// Output all floating point constants that cannot be printed accurately...
+void CWriter::printFloatingPointConstants(Function &F) {
+ // Scan the module for floating point constants. If any FP constant is used
+ // in the function, we want to redirect it here so that we do not depend on
+ // the precision of the printed form, unless the printed form preserves
+ // precision.
+ //
+ for (constant_iterator I = constant_begin(&F), E = constant_end(&F);
+ I != E; ++I)
+ printFloatingPointConstants(*I);
+
+ Out << '\n';
+}
+
+void CWriter::printFloatingPointConstants(const Constant *C) {
+ // If this is a constant expression, recursively check for constant fp values.
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
+ printFloatingPointConstants(CE->getOperand(i));
+ return;
+ }
+
+ // Otherwise, check for a FP constant that we need to print.
+ const ConstantFP *FPC = dyn_cast<ConstantFP>(C);
+ if (FPC == 0 ||
+ // Do not put in FPConstantMap if safe.
+ isFPCSafeToPrint(FPC) ||
+ // Already printed this constant?
+ FPConstantMap.count(FPC))
+ return;
+
+ FPConstantMap[FPC] = FPCounter; // Number the FP constants
+
+ if (FPC->getType() == Type::DoubleTy) {
+ double Val = FPC->getValueAPF().convertToDouble();
+ uint64_t i = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
+ Out << "static const ConstantDoubleTy FPConstant" << FPCounter++
+ << " = 0x" << utohexstr(i)
+ << "ULL; /* " << Val << " */\n";
+ } else if (FPC->getType() == Type::FloatTy) {
+ float Val = FPC->getValueAPF().convertToFloat();
+ uint32_t i = (uint32_t)FPC->getValueAPF().bitcastToAPInt().
+ getZExtValue();
+ Out << "static const ConstantFloatTy FPConstant" << FPCounter++
+ << " = 0x" << utohexstr(i)
+ << "U; /* " << Val << " */\n";
+ } else if (FPC->getType() == Type::X86_FP80Ty) {
+ // api needed to prevent premature destruction
+ APInt api = FPC->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = api.getRawData();
+ Out << "static const ConstantFP80Ty FPConstant" << FPCounter++
+ << " = { 0x" << utohexstr(p[0])
+ << "ULL, 0x" << utohexstr((uint16_t)p[1]) << ",{0,0,0}"
+ << "}; /* Long double constant */\n";
+ } else if (FPC->getType() == Type::PPC_FP128Ty) {
+ APInt api = FPC->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = api.getRawData();
+ Out << "static const ConstantFP128Ty FPConstant" << FPCounter++
+ << " = { 0x"
+ << utohexstr(p[0]) << ", 0x" << utohexstr(p[1])
+ << "}; /* Long double constant */\n";
+
+ } else {
+ assert(0 && "Unknown float type!");
+ }
+}
+
+
+
+/// printSymbolTable - Run through symbol table looking for type names. If a
+/// type name is found, emit its declaration...
+///
+void CWriter::printModuleTypes(const TypeSymbolTable &TST) {
+ Out << "/* Helper union for bitcasts */\n";
+ Out << "typedef union {\n";
+ Out << " unsigned int Int32;\n";
+ Out << " unsigned long long Int64;\n";
+ Out << " float Float;\n";
+ Out << " double Double;\n";
+ Out << "} llvmBitCastUnion;\n";
+
+ // We are only interested in the type plane of the symbol table.
+ TypeSymbolTable::const_iterator I = TST.begin();
+ TypeSymbolTable::const_iterator End = TST.end();
+
+ // If there are no type names, exit early.
+ if (I == End) return;
+
+ // Print out forward declarations for structure types before anything else!
+ Out << "/* Structure forward decls */\n";
+ for (; I != End; ++I) {
+ std::string Name = "struct l_" + Mang->makeNameProper(I->first);
+ Out << Name << ";\n";
+ TypeNames.insert(std::make_pair(I->second, Name));
+ }
+
+ Out << '\n';
+
+ // Now we can print out typedefs. Above, we guaranteed that this can only be
+ // for struct or opaque types.
+ Out << "/* Typedefs */\n";
+ for (I = TST.begin(); I != End; ++I) {
+ std::string Name = "l_" + Mang->makeNameProper(I->first);
+ Out << "typedef ";
+ printType(Out, I->second, false, Name);
+ Out << ";\n";
+ }
+
+ Out << '\n';
+
+ // Keep track of which structures have been printed so far...
+ std::set<const Type *> StructPrinted;
+
+ // Loop over all structures then push them into the stack so they are
+ // printed in the correct order.
+ //
+ Out << "/* Structure contents */\n";
+ for (I = TST.begin(); I != End; ++I)
+ if (isa<StructType>(I->second) || isa<ArrayType>(I->second))
+ // Only print out used types!
+ printContainedStructs(I->second, StructPrinted);
+}
+
+// Push the struct onto the stack and recursively push all structs
+// this one depends on.
+//
+// TODO: Make this work properly with vector types
+//
+void CWriter::printContainedStructs(const Type *Ty,
+ std::set<const Type*> &StructPrinted) {
+ // Don't walk through pointers.
+ if (isa<PointerType>(Ty) || Ty->isPrimitiveType() || Ty->isInteger()) return;
+
+ // Print all contained types first.
+ for (Type::subtype_iterator I = Ty->subtype_begin(),
+ E = Ty->subtype_end(); I != E; ++I)
+ printContainedStructs(*I, StructPrinted);
+
+ if (isa<StructType>(Ty) || isa<ArrayType>(Ty)) {
+ // Check to see if we have already printed this struct.
+ if (StructPrinted.insert(Ty).second) {
+ // Print structure type out.
+ std::string Name = TypeNames[Ty];
+ printType(Out, Ty, false, Name, true);
+ Out << ";\n\n";
+ }
+ }
+}
+
+void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
+ /// isStructReturn - Should this function actually return a struct by-value?
+ bool isStructReturn = F->hasStructRetAttr();
+
+ if (F->hasLocalLinkage()) Out << "static ";
+ if (F->hasDLLImportLinkage()) Out << "__declspec(dllimport) ";
+ if (F->hasDLLExportLinkage()) Out << "__declspec(dllexport) ";
+ switch (F->getCallingConv()) {
+ case CallingConv::X86_StdCall:
+ Out << "__attribute__((stdcall)) ";
+ break;
+ case CallingConv::X86_FastCall:
+ Out << "__attribute__((fastcall)) ";
+ break;
+ }
+
+ // Loop over the arguments, printing them...
+ const FunctionType *FT = cast<FunctionType>(F->getFunctionType());
+ const AttrListPtr &PAL = F->getAttributes();
+
+ std::stringstream FunctionInnards;
+
+ // Print out the name...
+ FunctionInnards << GetValueName(F) << '(';
+
+ bool PrintedArg = false;
+ if (!F->isDeclaration()) {
+ if (!F->arg_empty()) {
+ Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ unsigned Idx = 1;
+
+ // If this is a struct-return function, don't print the hidden
+ // struct-return argument.
+ if (isStructReturn) {
+ assert(I != E && "Invalid struct return function!");
+ ++I;
+ ++Idx;
+ }
+
+ std::string ArgName;
+ for (; I != E; ++I) {
+ if (PrintedArg) FunctionInnards << ", ";
+ if (I->hasName() || !Prototype)
+ ArgName = GetValueName(I);
+ else
+ ArgName = "";
+ const Type *ArgTy = I->getType();
+ if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+ ArgTy = cast<PointerType>(ArgTy)->getElementType();
+ ByValParams.insert(I);
+ }
+ printType(FunctionInnards, ArgTy,
+ /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt),
+ ArgName);
+ PrintedArg = true;
+ ++Idx;
+ }
+ }
+ } else {
+ // Loop over the arguments, printing them.
+ FunctionType::param_iterator I = FT->param_begin(), E = FT->param_end();
+ unsigned Idx = 1;
+
+ // If this is a struct-return function, don't print the hidden
+ // struct-return argument.
+ if (isStructReturn) {
+ assert(I != E && "Invalid struct return function!");
+ ++I;
+ ++Idx;
+ }
+
+ for (; I != E; ++I) {
+ if (PrintedArg) FunctionInnards << ", ";
+ const Type *ArgTy = *I;
+ if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+ assert(isa<PointerType>(ArgTy));
+ ArgTy = cast<PointerType>(ArgTy)->getElementType();
+ }
+ printType(FunctionInnards, ArgTy,
+ /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt));
+ PrintedArg = true;
+ ++Idx;
+ }
+ }
+
+ // Finish printing arguments... if this is a vararg function, print the ...,
+ // unless there are no known types, in which case, we just emit ().
+ //
+ if (FT->isVarArg() && PrintedArg) {
+ if (PrintedArg) FunctionInnards << ", ";
+ FunctionInnards << "..."; // Output varargs portion of signature!
+ } else if (!FT->isVarArg() && !PrintedArg) {
+ FunctionInnards << "void"; // ret() -> ret(void) in C.
+ }
+ FunctionInnards << ')';
+
+ // Get the return tpe for the function.
+ const Type *RetTy;
+ if (!isStructReturn)
+ RetTy = F->getReturnType();
+ else {
+ // If this is a struct-return function, print the struct-return type.
+ RetTy = cast<PointerType>(FT->getParamType(0))->getElementType();
+ }
+
+ // Print out the return type and the signature built above.
+ printType(Out, RetTy,
+ /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt),
+ FunctionInnards.str());
+}
+
+static inline bool isFPIntBitCast(const Instruction &I) {
+ if (!isa<BitCastInst>(I))
+ return false;
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DstTy = I.getType();
+ return (SrcTy->isFloatingPoint() && DstTy->isInteger()) ||
+ (DstTy->isFloatingPoint() && SrcTy->isInteger());
+}
+
+void CWriter::printFunction(Function &F) {
+ /// isStructReturn - Should this function actually return a struct by-value?
+ bool isStructReturn = F.hasStructRetAttr();
+
+ printFunctionSignature(&F, false);
+ Out << " {\n";
+
+ // If this is a struct return function, handle the result with magic.
+ if (isStructReturn) {
+ const Type *StructTy =
+ cast<PointerType>(F.arg_begin()->getType())->getElementType();
+ Out << " ";
+ printType(Out, StructTy, false, "StructReturn");
+ Out << "; /* Struct return temporary */\n";
+
+ Out << " ";
+ printType(Out, F.arg_begin()->getType(), false,
+ GetValueName(F.arg_begin()));
+ Out << " = &StructReturn;\n";
+ }
+
+ bool PrintedVar = false;
+
+ // print local variable information for the function
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
+ if (const AllocaInst *AI = isDirectAlloca(&*I)) {
+ Out << " ";
+ printType(Out, AI->getAllocatedType(), false, GetValueName(AI));
+ Out << "; /* Address-exposed local */\n";
+ PrintedVar = true;
+ } else if (I->getType() != Type::VoidTy && !isInlinableInst(*I)) {
+ Out << " ";
+ printType(Out, I->getType(), false, GetValueName(&*I));
+ Out << ";\n";
+
+ if (isa<PHINode>(*I)) { // Print out PHI node temporaries as well...
+ Out << " ";
+ printType(Out, I->getType(), false,
+ GetValueName(&*I)+"__PHI_TEMPORARY");
+ Out << ";\n";
+ }
+ PrintedVar = true;
+ }
+ // We need a temporary for the BitCast to use so it can pluck a value out
+ // of a union to do the BitCast. This is separate from the need for a
+ // variable to hold the result of the BitCast.
+ if (isFPIntBitCast(*I)) {
+ Out << " llvmBitCastUnion " << GetValueName(&*I)
+ << "__BITCAST_TEMPORARY;\n";
+ PrintedVar = true;
+ }
+ }
+
+ if (PrintedVar)
+ Out << '\n';
+
+ if (F.hasExternalLinkage() && F.getName() == "main")
+ Out << " CODE_FOR_MAIN();\n";
+
+ // print the basic blocks
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (Loop *L = LI->getLoopFor(BB)) {
+ if (L->getHeader() == BB && L->getParentLoop() == 0)
+ printLoop(L);
+ } else {
+ printBasicBlock(BB);
+ }
+ }
+
+ Out << "}\n\n";
+}
+
+void CWriter::printLoop(Loop *L) {
+ Out << " do { /* Syntactic loop '" << L->getHeader()->getName()
+ << "' to make GCC happy */\n";
+ for (unsigned i = 0, e = L->getBlocks().size(); i != e; ++i) {
+ BasicBlock *BB = L->getBlocks()[i];
+ Loop *BBLoop = LI->getLoopFor(BB);
+ if (BBLoop == L)
+ printBasicBlock(BB);
+ else if (BB == BBLoop->getHeader() && BBLoop->getParentLoop() == L)
+ printLoop(BBLoop);
+ }
+ Out << " } while (1); /* end of syntactic loop '"
+ << L->getHeader()->getName() << "' */\n";
+}
+
+void CWriter::printBasicBlock(BasicBlock *BB) {
+
+ // Don't print the label for the basic block if there are no uses, or if
+ // the only terminator use is the predecessor basic block's terminator.
+ // We have to scan the use list because PHI nodes use basic blocks too but
+ // do not require a label to be generated.
+ //
+ bool NeedsLabel = false;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ if (isGotoCodeNecessary(*PI, BB)) {
+ NeedsLabel = true;
+ break;
+ }
+
+ if (NeedsLabel) Out << GetValueName(BB) << ":\n";
+
+ // Output all of the instructions in the basic block...
+ for (BasicBlock::iterator II = BB->begin(), E = --BB->end(); II != E;
+ ++II) {
+ if (!isInlinableInst(*II) && !isDirectAlloca(II)) {
+ if (II->getType() != Type::VoidTy && !isInlineAsm(*II))
+ outputLValue(II);
+ else
+ Out << " ";
+ writeInstComputationInline(*II);
+ Out << ";\n";
+ }
+ }
+
+ // Don't emit prefix or suffix for the terminator.
+ visit(*BB->getTerminator());
+}
+
+
+// Specific Instruction type classes... note that all of the casts are
+// necessary because we use the instruction classes as opaque types...
+//
+void CWriter::visitReturnInst(ReturnInst &I) {
+ // If this is a struct return function, return the temporary struct.
+ bool isStructReturn = I.getParent()->getParent()->hasStructRetAttr();
+
+ if (isStructReturn) {
+ Out << " return StructReturn;\n";
+ return;
+ }
+
+ // Don't output a void return if this is the last basic block in the function
+ if (I.getNumOperands() == 0 &&
+ &*--I.getParent()->getParent()->end() == I.getParent() &&
+ !I.getParent()->size() == 1) {
+ return;
+ }
+
+ if (I.getNumOperands() > 1) {
+ Out << " {\n";
+ Out << " ";
+ printType(Out, I.getParent()->getParent()->getReturnType());
+ Out << " llvm_cbe_mrv_temp = {\n";
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ Out << " ";
+ writeOperand(I.getOperand(i));
+ if (i != e - 1)
+ Out << ",";
+ Out << "\n";
+ }
+ Out << " };\n";
+ Out << " return llvm_cbe_mrv_temp;\n";
+ Out << " }\n";
+ return;
+ }
+
+ Out << " return";
+ if (I.getNumOperands()) {
+ Out << ' ';
+ writeOperand(I.getOperand(0));
+ }
+ Out << ";\n";
+}
+
+void CWriter::visitSwitchInst(SwitchInst &SI) {
+
+ Out << " switch (";
+ writeOperand(SI.getOperand(0));
+ Out << ") {\n default:\n";
+ printPHICopiesForSuccessor (SI.getParent(), SI.getDefaultDest(), 2);
+ printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2);
+ Out << ";\n";
+ for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) {
+ Out << " case ";
+ writeOperand(SI.getOperand(i));
+ Out << ":\n";
+ BasicBlock *Succ = cast<BasicBlock>(SI.getOperand(i+1));
+ printPHICopiesForSuccessor (SI.getParent(), Succ, 2);
+ printBranchToBlock(SI.getParent(), Succ, 2);
+ if (Function::iterator(Succ) == next(Function::iterator(SI.getParent())))
+ Out << " break;\n";
+ }
+ Out << " }\n";
+}
+
+void CWriter::visitUnreachableInst(UnreachableInst &I) {
+ Out << " /*UNREACHABLE*/;\n";
+}
+
+bool CWriter::isGotoCodeNecessary(BasicBlock *From, BasicBlock *To) {
+ /// FIXME: This should be reenabled, but loop reordering safe!!
+ return true;
+
+ if (next(Function::iterator(From)) != Function::iterator(To))
+ return true; // Not the direct successor, we need a goto.
+
+ //isa<SwitchInst>(From->getTerminator())
+
+ if (LI->getLoopFor(From) != LI->getLoopFor(To))
+ return true;
+ return false;
+}
+
+void CWriter::printPHICopiesForSuccessor (BasicBlock *CurBlock,
+ BasicBlock *Successor,
+ unsigned Indent) {
+ for (BasicBlock::iterator I = Successor->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ // Now we have to do the printing.
+ Value *IV = PN->getIncomingValueForBlock(CurBlock);
+ if (!isa<UndefValue>(IV)) {
+ Out << std::string(Indent, ' ');
+ Out << " " << GetValueName(I) << "__PHI_TEMPORARY = ";
+ writeOperand(IV);
+ Out << "; /* for PHI node */\n";
+ }
+ }
+}
+
+void CWriter::printBranchToBlock(BasicBlock *CurBB, BasicBlock *Succ,
+ unsigned Indent) {
+ if (isGotoCodeNecessary(CurBB, Succ)) {
+ Out << std::string(Indent, ' ') << " goto ";
+ writeOperand(Succ);
+ Out << ";\n";
+ }
+}
+
+// Branch instruction printing - Avoid printing out a branch to a basic block
+// that immediately succeeds the current one.
+//
+void CWriter::visitBranchInst(BranchInst &I) {
+
+ if (I.isConditional()) {
+ if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(0))) {
+ Out << " if (";
+ writeOperand(I.getCondition());
+ Out << ") {\n";
+
+ printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 2);
+ printBranchToBlock(I.getParent(), I.getSuccessor(0), 2);
+
+ if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(1))) {
+ Out << " } else {\n";
+ printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2);
+ printBranchToBlock(I.getParent(), I.getSuccessor(1), 2);
+ }
+ } else {
+ // First goto not necessary, assume second one is...
+ Out << " if (!";
+ writeOperand(I.getCondition());
+ Out << ") {\n";
+
+ printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2);
+ printBranchToBlock(I.getParent(), I.getSuccessor(1), 2);
+ }
+
+ Out << " }\n";
+ } else {
+ printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 0);
+ printBranchToBlock(I.getParent(), I.getSuccessor(0), 0);
+ }
+ Out << "\n";
+}
+
+// PHI nodes get copied into temporary values at the end of predecessor basic
+// blocks. We now need to copy these temporary values into the REAL value for
+// the PHI.
+void CWriter::visitPHINode(PHINode &I) {
+ writeOperand(&I);
+ Out << "__PHI_TEMPORARY";
+}
+
+
+void CWriter::visitBinaryOperator(Instruction &I) {
+ // binary instructions, shift instructions, setCond instructions.
+ assert(!isa<PointerType>(I.getType()));
+
+ // We must cast the results of binary operations which might be promoted.
+ bool needsCast = false;
+ if ((I.getType() == Type::Int8Ty) || (I.getType() == Type::Int16Ty)
+ || (I.getType() == Type::FloatTy)) {
+ needsCast = true;
+ Out << "((";
+ printType(Out, I.getType(), false);
+ Out << ")(";
+ }
+
+ // If this is a negation operation, print it out as such. For FP, we don't
+ // want to print "-0.0 - X".
+ if (BinaryOperator::isNeg(&I)) {
+ Out << "-(";
+ writeOperand(BinaryOperator::getNegArgument(cast<BinaryOperator>(&I)));
+ Out << ")";
+ } else if (I.getOpcode() == Instruction::FRem) {
+ // Output a call to fmod/fmodf instead of emitting a%b
+ if (I.getType() == Type::FloatTy)
+ Out << "fmodf(";
+ else if (I.getType() == Type::DoubleTy)
+ Out << "fmod(";
+ else // all 3 flavors of long double
+ Out << "fmodl(";
+ writeOperand(I.getOperand(0));
+ Out << ", ";
+ writeOperand(I.getOperand(1));
+ Out << ")";
+ } else {
+
+ // Write out the cast of the instruction's value back to the proper type
+ // if necessary.
+ bool NeedsClosingParens = writeInstructionCast(I);
+
+ // Certain instructions require the operand to be forced to a specific type
+ // so we use writeOperandWithCast here instead of writeOperand. Similarly
+ // below for operand 1
+ writeOperandWithCast(I.getOperand(0), I.getOpcode());
+
+ switch (I.getOpcode()) {
+ case Instruction::Add: Out << " + "; break;
+ case Instruction::Sub: Out << " - "; break;
+ case Instruction::Mul: Out << " * "; break;
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem: Out << " % "; break;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv: Out << " / "; break;
+ case Instruction::And: Out << " & "; break;
+ case Instruction::Or: Out << " | "; break;
+ case Instruction::Xor: Out << " ^ "; break;
+ case Instruction::Shl : Out << " << "; break;
+ case Instruction::LShr:
+ case Instruction::AShr: Out << " >> "; break;
+ default: cerr << "Invalid operator type!" << I; abort();
+ }
+
+ writeOperandWithCast(I.getOperand(1), I.getOpcode());
+ if (NeedsClosingParens)
+ Out << "))";
+ }
+
+ if (needsCast) {
+ Out << "))";
+ }
+}
+
+void CWriter::visitICmpInst(ICmpInst &I) {
+ // We must cast the results of icmp which might be promoted.
+ bool needsCast = false;
+
+ // Write out the cast of the instruction's value back to the proper type
+ // if necessary.
+ bool NeedsClosingParens = writeInstructionCast(I);
+
+ // Certain icmp predicate require the operand to be forced to a specific type
+ // so we use writeOperandWithCast here instead of writeOperand. Similarly
+ // below for operand 1
+ writeOperandWithCast(I.getOperand(0), I);
+
+ switch (I.getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << " == "; break;
+ case ICmpInst::ICMP_NE: Out << " != "; break;
+ case ICmpInst::ICMP_ULE:
+ case ICmpInst::ICMP_SLE: Out << " <= "; break;
+ case ICmpInst::ICMP_UGE:
+ case ICmpInst::ICMP_SGE: Out << " >= "; break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT: Out << " < "; break;
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT: Out << " > "; break;
+ default: cerr << "Invalid icmp predicate!" << I; abort();
+ }
+
+ writeOperandWithCast(I.getOperand(1), I);
+ if (NeedsClosingParens)
+ Out << "))";
+
+ if (needsCast) {
+ Out << "))";
+ }
+}
+
+void CWriter::visitFCmpInst(FCmpInst &I) {
+ if (I.getPredicate() == FCmpInst::FCMP_FALSE) {
+ Out << "0";
+ return;
+ }
+ if (I.getPredicate() == FCmpInst::FCMP_TRUE) {
+ Out << "1";
+ return;
+ }
+
+ const char* op = 0;
+ switch (I.getPredicate()) {
+ default: assert(0 && "Illegal FCmp predicate");
+ case FCmpInst::FCMP_ORD: op = "ord"; break;
+ case FCmpInst::FCMP_UNO: op = "uno"; break;
+ case FCmpInst::FCMP_UEQ: op = "ueq"; break;
+ case FCmpInst::FCMP_UNE: op = "une"; break;
+ case FCmpInst::FCMP_ULT: op = "ult"; break;
+ case FCmpInst::FCMP_ULE: op = "ule"; break;
+ case FCmpInst::FCMP_UGT: op = "ugt"; break;
+ case FCmpInst::FCMP_UGE: op = "uge"; break;
+ case FCmpInst::FCMP_OEQ: op = "oeq"; break;
+ case FCmpInst::FCMP_ONE: op = "one"; break;
+ case FCmpInst::FCMP_OLT: op = "olt"; break;
+ case FCmpInst::FCMP_OLE: op = "ole"; break;
+ case FCmpInst::FCMP_OGT: op = "ogt"; break;
+ case FCmpInst::FCMP_OGE: op = "oge"; break;
+ }
+
+ Out << "llvm_fcmp_" << op << "(";
+ // Write the first operand
+ writeOperand(I.getOperand(0));
+ Out << ", ";
+ // Write the second operand
+ writeOperand(I.getOperand(1));
+ Out << ")";
+}
+
+static const char * getFloatBitCastField(const Type *Ty) {
+ switch (Ty->getTypeID()) {
+ default: assert(0 && "Invalid Type");
+ case Type::FloatTyID: return "Float";
+ case Type::DoubleTyID: return "Double";
+ case Type::IntegerTyID: {
+ unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
+ if (NumBits <= 32)
+ return "Int32";
+ else
+ return "Int64";
+ }
+ }
+}
+
+void CWriter::visitCastInst(CastInst &I) {
+ const Type *DstTy = I.getType();
+ const Type *SrcTy = I.getOperand(0)->getType();
+ if (isFPIntBitCast(I)) {
+ Out << '(';
+ // These int<->float and long<->double casts need to be handled specially
+ Out << GetValueName(&I) << "__BITCAST_TEMPORARY."
+ << getFloatBitCastField(I.getOperand(0)->getType()) << " = ";
+ writeOperand(I.getOperand(0));
+ Out << ", " << GetValueName(&I) << "__BITCAST_TEMPORARY."
+ << getFloatBitCastField(I.getType());
+ Out << ')';
+ return;
+ }
+
+ Out << '(';
+ printCast(I.getOpcode(), SrcTy, DstTy);
+
+ // Make a sext from i1 work by subtracting the i1 from 0 (an int).
+ if (SrcTy == Type::Int1Ty && I.getOpcode() == Instruction::SExt)
+ Out << "0-";
+
+ writeOperand(I.getOperand(0));
+
+ if (DstTy == Type::Int1Ty &&
+ (I.getOpcode() == Instruction::Trunc ||
+ I.getOpcode() == Instruction::FPToUI ||
+ I.getOpcode() == Instruction::FPToSI ||
+ I.getOpcode() == Instruction::PtrToInt)) {
+ // Make sure we really get a trunc to bool by anding the operand with 1
+ Out << "&1u";
+ }
+ Out << ')';
+}
+
+void CWriter::visitSelectInst(SelectInst &I) {
+ Out << "((";
+ writeOperand(I.getCondition());
+ Out << ") ? (";
+ writeOperand(I.getTrueValue());
+ Out << ") : (";
+ writeOperand(I.getFalseValue());
+ Out << "))";
+}
+
+
+void CWriter::lowerIntrinsics(Function &F) {
+ // This is used to keep track of intrinsics that get generated to a lowered
+ // function. We must generate the prototypes before the function body which
+ // will only be expanded on first use (by the loop below).
+ std::vector<Function*> prototypesToGen;
+
+ // Examine all the instructions in this function to find the intrinsics that
+ // need to be lowered.
+ for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
+ if (CallInst *CI = dyn_cast<CallInst>(I++))
+ if (Function *F = CI->getCalledFunction())
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::not_intrinsic:
+ case Intrinsic::memory_barrier:
+ case Intrinsic::vastart:
+ case Intrinsic::vacopy:
+ case Intrinsic::vaend:
+ case Intrinsic::returnaddress:
+ case Intrinsic::frameaddress:
+ case Intrinsic::setjmp:
+ case Intrinsic::longjmp:
+ case Intrinsic::prefetch:
+ case Intrinsic::dbg_stoppoint:
+ case Intrinsic::powi:
+ case Intrinsic::x86_sse_cmp_ss:
+ case Intrinsic::x86_sse_cmp_ps:
+ case Intrinsic::x86_sse2_cmp_sd:
+ case Intrinsic::x86_sse2_cmp_pd:
+ case Intrinsic::ppc_altivec_lvsl:
+ // We directly implement these intrinsics
+ break;
+ default:
+ // If this is an intrinsic that directly corresponds to a GCC
+ // builtin, we handle it.
+ const char *BuiltinName = "";
+#define GET_GCC_BUILTIN_NAME
+#include "llvm/Intrinsics.gen"
+#undef GET_GCC_BUILTIN_NAME
+ // If we handle it, don't lower it.
+ if (BuiltinName[0]) break;
+
+ // All other intrinsic calls we must lower.
+ Instruction *Before = 0;
+ if (CI != &BB->front())
+ Before = prior(BasicBlock::iterator(CI));
+
+ IL->LowerIntrinsicCall(CI);
+ if (Before) { // Move iterator to instruction after call
+ I = Before; ++I;
+ } else {
+ I = BB->begin();
+ }
+ // If the intrinsic got lowered to another call, and that call has
+ // a definition then we need to make sure its prototype is emitted
+ // before any calls to it.
+ if (CallInst *Call = dyn_cast<CallInst>(I))
+ if (Function *NewF = Call->getCalledFunction())
+ if (!NewF->isDeclaration())
+ prototypesToGen.push_back(NewF);
+
+ break;
+ }
+
+ // We may have collected some prototypes to emit in the loop above.
+ // Emit them now, before the function that uses them is emitted. But,
+ // be careful not to emit them twice.
+ std::vector<Function*>::iterator I = prototypesToGen.begin();
+ std::vector<Function*>::iterator E = prototypesToGen.end();
+ for ( ; I != E; ++I) {
+ if (intrinsicPrototypesAlreadyGenerated.insert(*I).second) {
+ Out << '\n';
+ printFunctionSignature(*I, true);
+ Out << ";\n";
+ }
+ }
+}
+
+void CWriter::visitCallInst(CallInst &I) {
+ if (isa<InlineAsm>(I.getOperand(0)))
+ return visitInlineAsm(I);
+
+ bool WroteCallee = false;
+
+ // Handle intrinsic function calls first...
+ if (Function *F = I.getCalledFunction())
+ if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
+ if (visitBuiltinCall(I, ID, WroteCallee))
+ return;
+
+ Value *Callee = I.getCalledValue();
+
+ const PointerType *PTy = cast<PointerType>(Callee->getType());
+ const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+
+ // If this is a call to a struct-return function, assign to the first
+ // parameter instead of passing it to the call.
+ const AttrListPtr &PAL = I.getAttributes();
+ bool hasByVal = I.hasByValArgument();
+ bool isStructRet = I.hasStructRetAttr();
+ if (isStructRet) {
+ writeOperandDeref(I.getOperand(1));
+ Out << " = ";
+ }
+
+ if (I.isTailCall()) Out << " /*tail*/ ";
+
+ if (!WroteCallee) {
+ // If this is an indirect call to a struct return function, we need to cast
+ // the pointer. Ditto for indirect calls with byval arguments.
+ bool NeedsCast = (hasByVal || isStructRet) && !isa<Function>(Callee);
+
+ // GCC is a real PITA. It does not permit codegening casts of functions to
+ // function pointers if they are in a call (it generates a trap instruction
+ // instead!). We work around this by inserting a cast to void* in between
+ // the function and the function pointer cast. Unfortunately, we can't just
+ // form the constant expression here, because the folder will immediately
+ // nuke it.
+ //
+ // Note finally, that this is completely unsafe. ANSI C does not guarantee
+ // that void* and function pointers have the same size. :( To deal with this
+ // in the common case, we handle casts where the number of arguments passed
+ // match exactly.
+ //
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Callee))
+ if (CE->isCast())
+ if (Function *RF = dyn_cast<Function>(CE->getOperand(0))) {
+ NeedsCast = true;
+ Callee = RF;
+ }
+
+ if (NeedsCast) {
+ // Ok, just cast the pointer type.
+ Out << "((";
+ if (isStructRet)
+ printStructReturnPointerFunctionType(Out, PAL,
+ cast<PointerType>(I.getCalledValue()->getType()));
+ else if (hasByVal)
+ printType(Out, I.getCalledValue()->getType(), false, "", true, PAL);
+ else
+ printType(Out, I.getCalledValue()->getType());
+ Out << ")(void*)";
+ }
+ writeOperand(Callee);
+ if (NeedsCast) Out << ')';
+ }
+
+ Out << '(';
+
+ unsigned NumDeclaredParams = FTy->getNumParams();
+
+ CallSite::arg_iterator AI = I.op_begin()+1, AE = I.op_end();
+ unsigned ArgNo = 0;
+ if (isStructRet) { // Skip struct return argument.
+ ++AI;
+ ++ArgNo;
+ }
+
+ bool PrintedArg = false;
+ for (; AI != AE; ++AI, ++ArgNo) {
+ if (PrintedArg) Out << ", ";
+ if (ArgNo < NumDeclaredParams &&
+ (*AI)->getType() != FTy->getParamType(ArgNo)) {
+ Out << '(';
+ printType(Out, FTy->getParamType(ArgNo),
+ /*isSigned=*/PAL.paramHasAttr(ArgNo+1, Attribute::SExt));
+ Out << ')';
+ }
+ // Check if the argument is expected to be passed by value.
+ if (I.paramHasAttr(ArgNo+1, Attribute::ByVal))
+ writeOperandDeref(*AI);
+ else
+ writeOperand(*AI);
+ PrintedArg = true;
+ }
+ Out << ')';
+}
+
+/// visitBuiltinCall - Handle the call to the specified builtin. Returns true
+/// if the entire call is handled, return false it it wasn't handled, and
+/// optionally set 'WroteCallee' if the callee has already been printed out.
+bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
+ bool &WroteCallee) {
+ switch (ID) {
+ default: {
+ // If this is an intrinsic that directly corresponds to a GCC
+ // builtin, we emit it here.
+ const char *BuiltinName = "";
+ Function *F = I.getCalledFunction();
+#define GET_GCC_BUILTIN_NAME
+#include "llvm/Intrinsics.gen"
+#undef GET_GCC_BUILTIN_NAME
+ assert(BuiltinName[0] && "Unknown LLVM intrinsic!");
+
+ Out << BuiltinName;
+ WroteCallee = true;
+ return false;
+ }
+ case Intrinsic::memory_barrier:
+ Out << "__sync_synchronize()";
+ return true;
+ case Intrinsic::vastart:
+ Out << "0; ";
+
+ Out << "va_start(*(va_list*)";
+ writeOperand(I.getOperand(1));
+ Out << ", ";
+ // Output the last argument to the enclosing function.
+ if (I.getParent()->getParent()->arg_empty()) {
+ cerr << "The C backend does not currently support zero "
+ << "argument varargs functions, such as '"
+ << I.getParent()->getParent()->getName() << "'!\n";
+ abort();
+ }
+ writeOperand(--I.getParent()->getParent()->arg_end());
+ Out << ')';
+ return true;
+ case Intrinsic::vaend:
+ if (!isa<ConstantPointerNull>(I.getOperand(1))) {
+ Out << "0; va_end(*(va_list*)";
+ writeOperand(I.getOperand(1));
+ Out << ')';
+ } else {
+ Out << "va_end(*(va_list*)0)";
+ }
+ return true;
+ case Intrinsic::vacopy:
+ Out << "0; ";
+ Out << "va_copy(*(va_list*)";
+ writeOperand(I.getOperand(1));
+ Out << ", *(va_list*)";
+ writeOperand(I.getOperand(2));
+ Out << ')';
+ return true;
+ case Intrinsic::returnaddress:
+ Out << "__builtin_return_address(";
+ writeOperand(I.getOperand(1));
+ Out << ')';
+ return true;
+ case Intrinsic::frameaddress:
+ Out << "__builtin_frame_address(";
+ writeOperand(I.getOperand(1));
+ Out << ')';
+ return true;
+ case Intrinsic::powi:
+ Out << "__builtin_powi(";
+ writeOperand(I.getOperand(1));
+ Out << ", ";
+ writeOperand(I.getOperand(2));
+ Out << ')';
+ return true;
+ case Intrinsic::setjmp:
+ Out << "setjmp(*(jmp_buf*)";
+ writeOperand(I.getOperand(1));
+ Out << ')';
+ return true;
+ case Intrinsic::longjmp:
+ Out << "longjmp(*(jmp_buf*)";
+ writeOperand(I.getOperand(1));
+ Out << ", ";
+ writeOperand(I.getOperand(2));
+ Out << ')';
+ return true;
+ case Intrinsic::prefetch:
+ Out << "LLVM_PREFETCH((const void *)";
+ writeOperand(I.getOperand(1));
+ Out << ", ";
+ writeOperand(I.getOperand(2));
+ Out << ", ";
+ writeOperand(I.getOperand(3));
+ Out << ")";
+ return true;
+ case Intrinsic::stacksave:
+ // Emit this as: Val = 0; *((void**)&Val) = __builtin_stack_save()
+ // to work around GCC bugs (see PR1809).
+ Out << "0; *((void**)&" << GetValueName(&I)
+ << ") = __builtin_stack_save()";
+ return true;
+ case Intrinsic::dbg_stoppoint: {
+ // If we use writeOperand directly we get a "u" suffix which is rejected
+ // by gcc.
+ std::stringstream SPIStr;
+ DbgStopPointInst &SPI = cast<DbgStopPointInst>(I);
+ SPI.getDirectory()->print(SPIStr);
+ Out << "\n#line "
+ << SPI.getLine()
+ << " \"";
+ Out << SPIStr.str();
+ SPIStr.clear();
+ SPI.getFileName()->print(SPIStr);
+ Out << SPIStr.str() << "\"\n";
+ return true;
+ }
+ case Intrinsic::x86_sse_cmp_ss:
+ case Intrinsic::x86_sse_cmp_ps:
+ case Intrinsic::x86_sse2_cmp_sd:
+ case Intrinsic::x86_sse2_cmp_pd:
+ Out << '(';
+ printType(Out, I.getType());
+ Out << ')';
+ // Multiple GCC builtins multiplex onto this intrinsic.
+ switch (cast<ConstantInt>(I.getOperand(3))->getZExtValue()) {
+ default: assert(0 && "Invalid llvm.x86.sse.cmp!");
+ case 0: Out << "__builtin_ia32_cmpeq"; break;
+ case 1: Out << "__builtin_ia32_cmplt"; break;
+ case 2: Out << "__builtin_ia32_cmple"; break;
+ case 3: Out << "__builtin_ia32_cmpunord"; break;
+ case 4: Out << "__builtin_ia32_cmpneq"; break;
+ case 5: Out << "__builtin_ia32_cmpnlt"; break;
+ case 6: Out << "__builtin_ia32_cmpnle"; break;
+ case 7: Out << "__builtin_ia32_cmpord"; break;
+ }
+ if (ID == Intrinsic::x86_sse_cmp_ps || ID == Intrinsic::x86_sse2_cmp_pd)
+ Out << 'p';
+ else
+ Out << 's';
+ if (ID == Intrinsic::x86_sse_cmp_ss || ID == Intrinsic::x86_sse_cmp_ps)
+ Out << 's';
+ else
+ Out << 'd';
+
+ Out << "(";
+ writeOperand(I.getOperand(1));
+ Out << ", ";
+ writeOperand(I.getOperand(2));
+ Out << ")";
+ return true;
+ case Intrinsic::ppc_altivec_lvsl:
+ Out << '(';
+ printType(Out, I.getType());
+ Out << ')';
+ Out << "__builtin_altivec_lvsl(0, (void*)";
+ writeOperand(I.getOperand(1));
+ Out << ")";
+ return true;
+ }
+}
+
+//This converts the llvm constraint string to something gcc is expecting.
+//TODO: work out platform independent constraints and factor those out
+// of the per target tables
+// handle multiple constraint codes
+std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) {
+
+ assert(c.Codes.size() == 1 && "Too many asm constraint codes to handle");
+
+ const char *const *table = 0;
+
+ //Grab the translation table from TargetAsmInfo if it exists
+ if (!TAsm) {
+ std::string E;
+ const TargetMachineRegistry::entry* Match =
+ TargetMachineRegistry::getClosestStaticTargetForModule(*TheModule, E);
+ if (Match) {
+ //Per platform Target Machines don't exist, so create it
+ // this must be done only once
+ const TargetMachine* TM = Match->CtorFn(*TheModule, "");
+ TAsm = TM->getTargetAsmInfo();
+ }
+ }
+ if (TAsm)
+ table = TAsm->getAsmCBE();
+
+ //Search the translation table if it exists
+ for (int i = 0; table && table[i]; i += 2)
+ if (c.Codes[0] == table[i])
+ return table[i+1];
+
+ //default is identity
+ return c.Codes[0];
+}
+
+//TODO: import logic from AsmPrinter.cpp
+static std::string gccifyAsm(std::string asmstr) {
+ for (std::string::size_type i = 0; i != asmstr.size(); ++i)
+ if (asmstr[i] == '\n')
+ asmstr.replace(i, 1, "\\n");
+ else if (asmstr[i] == '\t')
+ asmstr.replace(i, 1, "\\t");
+ else if (asmstr[i] == '$') {
+ if (asmstr[i + 1] == '{') {
+ std::string::size_type a = asmstr.find_first_of(':', i + 1);
+ std::string::size_type b = asmstr.find_first_of('}', i + 1);
+ std::string n = "%" +
+ asmstr.substr(a + 1, b - a - 1) +
+ asmstr.substr(i + 2, a - i - 2);
+ asmstr.replace(i, b - i + 1, n);
+ i += n.size() - 1;
+ } else
+ asmstr.replace(i, 1, "%");
+ }
+ else if (asmstr[i] == '%')//grr
+ { asmstr.replace(i, 1, "%%"); ++i;}
+
+ return asmstr;
+}
+
+//TODO: assumptions about what consume arguments from the call are likely wrong
+// handle communitivity
+void CWriter::visitInlineAsm(CallInst &CI) {
+ InlineAsm* as = cast<InlineAsm>(CI.getOperand(0));
+ std::vector<InlineAsm::ConstraintInfo> Constraints = as->ParseConstraints();
+
+ std::vector<std::pair<Value*, int> > ResultVals;
+ if (CI.getType() == Type::VoidTy)
+ ;
+ else if (const StructType *ST = dyn_cast<StructType>(CI.getType())) {
+ for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i)
+ ResultVals.push_back(std::make_pair(&CI, (int)i));
+ } else {
+ ResultVals.push_back(std::make_pair(&CI, -1));
+ }
+
+ // Fix up the asm string for gcc and emit it.
+ Out << "__asm__ volatile (\"" << gccifyAsm(as->getAsmString()) << "\"\n";
+ Out << " :";
+
+ unsigned ValueCount = 0;
+ bool IsFirst = true;
+
+ // Convert over all the output constraints.
+ for (std::vector<InlineAsm::ConstraintInfo>::iterator I = Constraints.begin(),
+ E = Constraints.end(); I != E; ++I) {
+
+ if (I->Type != InlineAsm::isOutput) {
+ ++ValueCount;
+ continue; // Ignore non-output constraints.
+ }
+
+ assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
+ std::string C = InterpretASMConstraint(*I);
+ if (C.empty()) continue;
+
+ if (!IsFirst) {
+ Out << ", ";
+ IsFirst = false;
+ }
+
+ // Unpack the dest.
+ Value *DestVal;
+ int DestValNo = -1;
+
+ if (ValueCount < ResultVals.size()) {
+ DestVal = ResultVals[ValueCount].first;
+ DestValNo = ResultVals[ValueCount].second;
+ } else
+ DestVal = CI.getOperand(ValueCount-ResultVals.size()+1);
+
+ if (I->isEarlyClobber)
+ C = "&"+C;
+
+ Out << "\"=" << C << "\"(" << GetValueName(DestVal);
+ if (DestValNo != -1)
+ Out << ".field" << DestValNo; // Multiple retvals.
+ Out << ")";
+ ++ValueCount;
+ }
+
+
+ // Convert over all the input constraints.
+ Out << "\n :";
+ IsFirst = true;
+ ValueCount = 0;
+ for (std::vector<InlineAsm::ConstraintInfo>::iterator I = Constraints.begin(),
+ E = Constraints.end(); I != E; ++I) {
+ if (I->Type != InlineAsm::isInput) {
+ ++ValueCount;
+ continue; // Ignore non-input constraints.
+ }
+
+ assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
+ std::string C = InterpretASMConstraint(*I);
+ if (C.empty()) continue;
+
+ if (!IsFirst) {
+ Out << ", ";
+ IsFirst = false;
+ }
+
+ assert(ValueCount >= ResultVals.size() && "Input can't refer to result");
+ Value *SrcVal = CI.getOperand(ValueCount-ResultVals.size()+1);
+
+ Out << "\"" << C << "\"(";
+ if (!I->isIndirect)
+ writeOperand(SrcVal);
+ else
+ writeOperandDeref(SrcVal);
+ Out << ")";
+ }
+
+ // Convert over the clobber constraints.
+ IsFirst = true;
+ ValueCount = 0;
+ for (std::vector<InlineAsm::ConstraintInfo>::iterator I = Constraints.begin(),
+ E = Constraints.end(); I != E; ++I) {
+ if (I->Type != InlineAsm::isClobber)
+ continue; // Ignore non-input constraints.
+
+ assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
+ std::string C = InterpretASMConstraint(*I);
+ if (C.empty()) continue;
+
+ if (!IsFirst) {
+ Out << ", ";
+ IsFirst = false;
+ }
+
+ Out << '\"' << C << '"';
+ }
+
+ Out << ")";
+}
+
+void CWriter::visitMallocInst(MallocInst &I) {
+ assert(0 && "lowerallocations pass didn't work!");
+}
+
+void CWriter::visitAllocaInst(AllocaInst &I) {
+ Out << '(';
+ printType(Out, I.getType());
+ Out << ") alloca(sizeof(";
+ printType(Out, I.getType()->getElementType());
+ Out << ')';
+ if (I.isArrayAllocation()) {
+ Out << " * " ;
+ writeOperand(I.getOperand(0));
+ }
+ Out << ')';
+}
+
+void CWriter::visitFreeInst(FreeInst &I) {
+ assert(0 && "lowerallocations pass didn't work!");
+}
+
+void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
+ gep_type_iterator E, bool Static) {
+
+ // If there are no indices, just print out the pointer.
+ if (I == E) {
+ writeOperand(Ptr);
+ return;
+ }
+
+ // Find out if the last index is into a vector. If so, we have to print this
+ // specially. Since vectors can't have elements of indexable type, only the
+ // last index could possibly be of a vector element.
+ const VectorType *LastIndexIsVector = 0;
+ {
+ for (gep_type_iterator TmpI = I; TmpI != E; ++TmpI)
+ LastIndexIsVector = dyn_cast<VectorType>(*TmpI);
+ }
+
+ Out << "(";
+
+ // If the last index is into a vector, we can't print it as &a[i][j] because
+ // we can't index into a vector with j in GCC. Instead, emit this as
+ // (((float*)&a[i])+j)
+ if (LastIndexIsVector) {
+ Out << "((";
+ printType(Out, PointerType::getUnqual(LastIndexIsVector->getElementType()));
+ Out << ")(";
+ }
+
+ Out << '&';
+
+ // If the first index is 0 (very typical) we can do a number of
+ // simplifications to clean up the code.
+ Value *FirstOp = I.getOperand();
+ if (!isa<Constant>(FirstOp) || !cast<Constant>(FirstOp)->isNullValue()) {
+ // First index isn't simple, print it the hard way.
+ writeOperand(Ptr);
+ } else {
+ ++I; // Skip the zero index.
+
+ // Okay, emit the first operand. If Ptr is something that is already address
+ // exposed, like a global, avoid emitting (&foo)[0], just emit foo instead.
+ if (isAddressExposed(Ptr)) {
+ writeOperandInternal(Ptr, Static);
+ } else if (I != E && isa<StructType>(*I)) {
+ // If we didn't already emit the first operand, see if we can print it as
+ // P->f instead of "P[0].f"
+ writeOperand(Ptr);
+ Out << "->field" << cast<ConstantInt>(I.getOperand())->getZExtValue();
+ ++I; // eat the struct index as well.
+ } else {
+ // Instead of emitting P[0][1], emit (*P)[1], which is more idiomatic.
+ Out << "(*";
+ writeOperand(Ptr);
+ Out << ")";
+ }
+ }
+
+ for (; I != E; ++I) {
+ if (isa<StructType>(*I)) {
+ Out << ".field" << cast<ConstantInt>(I.getOperand())->getZExtValue();
+ } else if (isa<ArrayType>(*I)) {
+ Out << ".array[";
+ writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
+ Out << ']';
+ } else if (!isa<VectorType>(*I)) {
+ Out << '[';
+ writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
+ Out << ']';
+ } else {
+ // If the last index is into a vector, then print it out as "+j)". This
+ // works with the 'LastIndexIsVector' code above.
+ if (isa<Constant>(I.getOperand()) &&
+ cast<Constant>(I.getOperand())->isNullValue()) {
+ Out << "))"; // avoid "+0".
+ } else {
+ Out << ")+(";
+ writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
+ Out << "))";
+ }
+ }
+ }
+ Out << ")";
+}
+
+void CWriter::writeMemoryAccess(Value *Operand, const Type *OperandType,
+ bool IsVolatile, unsigned Alignment) {
+
+ bool IsUnaligned = Alignment &&
+ Alignment < TD->getABITypeAlignment(OperandType);
+
+ if (!IsUnaligned)
+ Out << '*';
+ if (IsVolatile || IsUnaligned) {
+ Out << "((";
+ if (IsUnaligned)
+ Out << "struct __attribute__ ((packed, aligned(" << Alignment << "))) {";
+ printType(Out, OperandType, false, IsUnaligned ? "data" : "volatile*");
+ if (IsUnaligned) {
+ Out << "; } ";
+ if (IsVolatile) Out << "volatile ";
+ Out << "*";
+ }
+ Out << ")";
+ }
+
+ writeOperand(Operand);
+
+ if (IsVolatile || IsUnaligned) {
+ Out << ')';
+ if (IsUnaligned)
+ Out << "->data";
+ }
+}
+
+void CWriter::visitLoadInst(LoadInst &I) {
+ writeMemoryAccess(I.getOperand(0), I.getType(), I.isVolatile(),
+ I.getAlignment());
+
+}
+
+void CWriter::visitStoreInst(StoreInst &I) {
+ writeMemoryAccess(I.getPointerOperand(), I.getOperand(0)->getType(),
+ I.isVolatile(), I.getAlignment());
+ Out << " = ";
+ Value *Operand = I.getOperand(0);
+ Constant *BitMask = 0;
+ if (const IntegerType* ITy = dyn_cast<IntegerType>(Operand->getType()))
+ if (!ITy->isPowerOf2ByteWidth())
+ // We have a bit width that doesn't match an even power-of-2 byte
+ // size. Consequently we must & the value with the type's bit mask
+ BitMask = ConstantInt::get(ITy, ITy->getBitMask());
+ if (BitMask)
+ Out << "((";
+ writeOperand(Operand);
+ if (BitMask) {
+ Out << ") & ";
+ printConstant(BitMask, false);
+ Out << ")";
+ }
+}
+
+void CWriter::visitGetElementPtrInst(GetElementPtrInst &I) {
+ printGEPExpression(I.getPointerOperand(), gep_type_begin(I),
+ gep_type_end(I), false);
+}
+
+void CWriter::visitVAArgInst(VAArgInst &I) {
+ Out << "va_arg(*(va_list*)";
+ writeOperand(I.getOperand(0));
+ Out << ", ";
+ printType(Out, I.getType());
+ Out << ");\n ";
+}
+
+void CWriter::visitInsertElementInst(InsertElementInst &I) {
+ const Type *EltTy = I.getType()->getElementType();
+ writeOperand(I.getOperand(0));
+ Out << ";\n ";
+ Out << "((";
+ printType(Out, PointerType::getUnqual(EltTy));
+ Out << ")(&" << GetValueName(&I) << "))[";
+ writeOperand(I.getOperand(2));
+ Out << "] = (";
+ writeOperand(I.getOperand(1));
+ Out << ")";
+}
+
+void CWriter::visitExtractElementInst(ExtractElementInst &I) {
+ // We know that our operand is not inlined.
+ Out << "((";
+ const Type *EltTy =
+ cast<VectorType>(I.getOperand(0)->getType())->getElementType();
+ printType(Out, PointerType::getUnqual(EltTy));
+ Out << ")(&" << GetValueName(I.getOperand(0)) << "))[";
+ writeOperand(I.getOperand(1));
+ Out << "]";
+}
+
+void CWriter::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
+ Out << "(";
+ printType(Out, SVI.getType());
+ Out << "){ ";
+ const VectorType *VT = SVI.getType();
+ unsigned NumElts = VT->getNumElements();
+ const Type *EltTy = VT->getElementType();
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (i) Out << ", ";
+ int SrcVal = SVI.getMaskValue(i);
+ if ((unsigned)SrcVal >= NumElts*2) {
+ Out << " 0/*undef*/ ";
+ } else {
+ Value *Op = SVI.getOperand((unsigned)SrcVal >= NumElts);
+ if (isa<Instruction>(Op)) {
+ // Do an extractelement of this value from the appropriate input.
+ Out << "((";
+ printType(Out, PointerType::getUnqual(EltTy));
+ Out << ")(&" << GetValueName(Op)
+ << "))[" << (SrcVal & (NumElts-1)) << "]";
+ } else if (isa<ConstantAggregateZero>(Op) || isa<UndefValue>(Op)) {
+ Out << "0";
+ } else {
+ printConstant(cast<ConstantVector>(Op)->getOperand(SrcVal &
+ (NumElts-1)),
+ false);
+ }
+ }
+ }
+ Out << "}";
+}
+
+void CWriter::visitInsertValueInst(InsertValueInst &IVI) {
+ // Start by copying the entire aggregate value into the result variable.
+ writeOperand(IVI.getOperand(0));
+ Out << ";\n ";
+
+ // Then do the insert to update the field.
+ Out << GetValueName(&IVI);
+ for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end();
+ i != e; ++i) {
+ const Type *IndexedTy =
+ ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(), b, i+1);
+ if (isa<ArrayType>(IndexedTy))
+ Out << ".array[" << *i << "]";
+ else
+ Out << ".field" << *i;
+ }
+ Out << " = ";
+ writeOperand(IVI.getOperand(1));
+}
+
+void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
+ Out << "(";
+ if (isa<UndefValue>(EVI.getOperand(0))) {
+ Out << "(";
+ printType(Out, EVI.getType());
+ Out << ") 0/*UNDEF*/";
+ } else {
+ Out << GetValueName(EVI.getOperand(0));
+ for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end();
+ i != e; ++i) {
+ const Type *IndexedTy =
+ ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(), b, i+1);
+ if (isa<ArrayType>(IndexedTy))
+ Out << ".array[" << *i << "]";
+ else
+ Out << ".field" << *i;
+ }
+ }
+ Out << ")";
+}
+
+//===----------------------------------------------------------------------===//
+// External Interface declaration
+//===----------------------------------------------------------------------===//
+
+bool CTargetMachine::addPassesToEmitWholeFile(PassManager &PM,
+ raw_ostream &o,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel) {
+ if (FileType != TargetMachine::AssemblyFile) return true;
+
+ PM.add(createGCLoweringPass());
+ PM.add(createLowerAllocationsPass(true));
+ PM.add(createLowerInvokePass());
+ PM.add(createCFGSimplificationPass()); // clean up after lower invoke.
+ PM.add(new CBackendNameAllUsedStructsAndMergeFunctions());
+ PM.add(new CWriter(o));
+ PM.add(createGCInfoDeleter());
+ return false;
+}
diff --git a/lib/Target/CBackend/CMakeLists.txt b/lib/Target/CBackend/CMakeLists.txt
new file mode 100644
index 0000000..be24336
--- /dev/null
+++ b/lib/Target/CBackend/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_target(CBackend
+ CBackend.cpp
+ )
diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h
new file mode 100644
index 0000000..8b26245
--- /dev/null
+++ b/lib/Target/CBackend/CTargetMachine.h
@@ -0,0 +1,43 @@
+//===-- CTargetMachine.h - TargetMachine for the C backend ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the TargetMachine that is used by the C backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CTARGETMACHINE_H
+#define CTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+
+struct CTargetMachine : public TargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+
+ CTargetMachine(const Module &M, const std::string &FS)
+ : DataLayout(&M) {}
+
+ virtual bool WantsWholeFile() const { return true; }
+ virtual bool addPassesToEmitWholeFile(PassManager &PM, raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel);
+
+ // This class always works, but must be requested explicitly on
+ // llc command line.
+ static unsigned getModuleMatchQuality(const Module &M) { return 0; }
+
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+};
+
+} // End llvm namespace
+
+
+#endif
diff --git a/lib/Target/CBackend/Makefile b/lib/Target/CBackend/Makefile
new file mode 100644
index 0000000..336de0c
--- /dev/null
+++ b/lib/Target/CBackend/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Target/CBackend/Makefile ------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMCBackend
+include $(LEVEL)/Makefile.common
+
+CompileCommonOpts += -Wno-format
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
new file mode 100644
index 0000000..1cf0a91
--- /dev/null
+++ b/lib/Target/CMakeLists.txt
@@ -0,0 +1,17 @@
+add_llvm_library(LLVMTarget
+ DarwinTargetAsmInfo.cpp
+ ELFTargetAsmInfo.cpp
+ SubtargetFeature.cpp
+ Target.cpp
+ TargetAsmInfo.cpp
+ TargetData.cpp
+ TargetFrameInfo.cpp
+ TargetInstrInfo.cpp
+ TargetMachOWriterInfo.cpp
+ TargetMachine.cpp
+ TargetMachineRegistry.cpp
+ TargetRegisterInfo.cpp
+ TargetSubtarget.cpp
+ )
+
+# TODO: Support other targets besides X86. See Makefile. \ No newline at end of file
diff --git a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
new file mode 100644
index 0000000..4336b05
--- /dev/null
+++ b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,12 @@
+include_directories(
+ ${CMAKE_CURRENT_BINARY_DIR}/..
+ ${CMAKE_CURRENT_SOURCE_DIR}/..
+ )
+
+add_partially_linked_object(LLVMCellSPUAsmPrinter
+ SPUAsmPrinter.cpp
+ )
+
+target_name_of_partially_linked_object(LLVMCellSPUCodeGen n)
+
+add_dependencies(LLVMCellSPUAsmPrinter ${n})
diff --git a/lib/Target/CellSPU/AsmPrinter/Makefile b/lib/Target/CellSPU/AsmPrinter/Makefile
new file mode 100644
index 0000000..dd56df7
--- /dev/null
+++ b/lib/Target/CellSPU/AsmPrinter/Makefile
@@ -0,0 +1,17 @@
+##===- lib/Target/CellSPU/Makefile -------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMCellSPUAsmPrinter
+
+# Hack: we need to include 'main' CellSPU target directory to grab
+# private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
new file mode 100644
index 0000000..da1bf07
--- /dev/null
+++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
@@ -0,0 +1,623 @@
+//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -------=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to Cell SPU assembly language. This printer
+// is the output mechanism used by `llc'.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asmprinter"
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include <set>
+using namespace llvm;
+
+namespace {
+ STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+ const std::string bss_section(".bss");
+
+ class VISIBILITY_HIDDEN SPUAsmPrinter : public AsmPrinter {
+ std::set<std::string> FnStubs, GVStubs;
+ public:
+ explicit SPUAsmPrinter(raw_ostream &O, TargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V) :
+ AsmPrinter(O, TM, T, OL, V) {}
+
+ virtual const char *getPassName() const {
+ return "STI CBEA SPU Assembly Printer";
+ }
+
+ SPUTargetMachine &getTM() {
+ return static_cast<SPUTargetMachine&>(TM);
+ }
+
+ /// printInstruction - This method is automatically generated by tablegen
+ /// from the instruction set description. This method returns true if the
+ /// machine instruction was sufficiently described to print it, otherwise it
+ /// returns false.
+ bool printInstruction(const MachineInstr *MI);
+
+ void printMachineInstruction(const MachineInstr *MI);
+ void printOp(const MachineOperand &MO);
+
+ /// printRegister - Print register according to target requirements.
+ ///
+ void printRegister(const MachineOperand &MO, bool R0AsZero) {
+ unsigned RegNo = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(RegNo) &&
+ "Not physreg??");
+ O << TM.getRegisterInfo()->get(RegNo).AsmName;
+ }
+
+ void printOperand(const MachineInstr *MI, unsigned OpNo) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (MO.isReg()) {
+ assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg())&&"Not physreg??");
+ O << TM.getRegisterInfo()->get(MO.getReg()).AsmName;
+ } else if (MO.isImm()) {
+ O << MO.getImm();
+ } else {
+ printOp(MO);
+ }
+ }
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+
+
+ void
+ printS7ImmOperand(const MachineInstr *MI, unsigned OpNo)
+ {
+ int value = MI->getOperand(OpNo).getImm();
+ value = (value << (32 - 7)) >> (32 - 7);
+
+ assert((value >= -(1 << 8) && value <= (1 << 7) - 1)
+ && "Invalid s7 argument");
+ O << value;
+ }
+
+ void
+ printU7ImmOperand(const MachineInstr *MI, unsigned OpNo)
+ {
+ unsigned int value = MI->getOperand(OpNo).getImm();
+ assert(value < (1 << 8) && "Invalid u7 argument");
+ O << value;
+ }
+
+ void
+ printShufAddr(const MachineInstr *MI, unsigned OpNo)
+ {
+ char value = MI->getOperand(OpNo).getImm();
+ O << (int) value;
+ O << "(";
+ printOperand(MI, OpNo+1);
+ O << ")";
+ }
+
+ void
+ printS16ImmOperand(const MachineInstr *MI, unsigned OpNo)
+ {
+ O << (short) MI->getOperand(OpNo).getImm();
+ }
+
+ void
+ printU16ImmOperand(const MachineInstr *MI, unsigned OpNo)
+ {
+ O << (unsigned short)MI->getOperand(OpNo).getImm();
+ }
+
+ void
+ printU32ImmOperand(const MachineInstr *MI, unsigned OpNo)
+ {
+ O << (unsigned)MI->getOperand(OpNo).getImm();
+ }
+
+ void
+ printMemRegReg(const MachineInstr *MI, unsigned OpNo) {
+ // When used as the base register, r0 reads constant zero rather than
+ // the value contained in the register. For this reason, the darwin
+ // assembler requires that we print r0 as 0 (no r) when used as the base.
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ O << TM.getRegisterInfo()->get(MO.getReg()).AsmName;
+ O << ", ";
+ printOperand(MI, OpNo+1);
+ }
+
+ void
+ printU18ImmOperand(const MachineInstr *MI, unsigned OpNo)
+ {
+ unsigned int value = MI->getOperand(OpNo).getImm();
+ assert(value <= (1 << 19) - 1 && "Invalid u18 argument");
+ O << value;
+ }
+
+ void
+ printS10ImmOperand(const MachineInstr *MI, unsigned OpNo)
+ {
+ short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16)
+ >> 16);
+ assert((value >= -(1 << 9) && value <= (1 << 9) - 1)
+ && "Invalid s10 argument");
+ O << value;
+ }
+
+ void
+ printU10ImmOperand(const MachineInstr *MI, unsigned OpNo)
+ {
+ short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16)
+ >> 16);
+ assert((value <= (1 << 10) - 1) && "Invalid u10 argument");
+ O << value;
+ }
+
+ void
+ printDFormAddr(const MachineInstr *MI, unsigned OpNo)
+ {
+ assert(MI->getOperand(OpNo).isImm() &&
+ "printDFormAddr first operand is not immediate");
+ int64_t value = int64_t(MI->getOperand(OpNo).getImm());
+ int16_t value16 = int16_t(value);
+ assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1)
+ && "Invalid dform s10 offset argument");
+ O << (value16 & ~0xf) << "(";
+ printOperand(MI, OpNo+1);
+ O << ")";
+ }
+
+ void
+ printAddr256K(const MachineInstr *MI, unsigned OpNo)
+ {
+ /* Note: operand 1 is an offset or symbol name. */
+ if (MI->getOperand(OpNo).isImm()) {
+ printS16ImmOperand(MI, OpNo);
+ } else {
+ printOp(MI->getOperand(OpNo));
+ if (MI->getOperand(OpNo+1).isImm()) {
+ int displ = int(MI->getOperand(OpNo+1).getImm());
+ if (displ > 0)
+ O << "+" << displ;
+ else if (displ < 0)
+ O << displ;
+ }
+ }
+ }
+
+ void printCallOperand(const MachineInstr *MI, unsigned OpNo) {
+ printOp(MI->getOperand(OpNo));
+ }
+
+ void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo) {
+ // Used to generate a ".-<target>", but it turns out that the assembler
+ // really wants the target.
+ //
+ // N.B.: This operand is used for call targets. Branch hints are another
+ // animal entirely.
+ printOp(MI->getOperand(OpNo));
+ }
+
+ void printHBROperand(const MachineInstr *MI, unsigned OpNo) {
+ // HBR operands are generated in front of branches, hence, the
+ // program counter plus the target.
+ O << ".+";
+ printOp(MI->getOperand(OpNo));
+ }
+
+ void printSymbolHi(const MachineInstr *MI, unsigned OpNo) {
+ if (MI->getOperand(OpNo).isImm()) {
+ printS16ImmOperand(MI, OpNo);
+ } else {
+ printOp(MI->getOperand(OpNo));
+ O << "@h";
+ }
+ }
+
+ void printSymbolLo(const MachineInstr *MI, unsigned OpNo) {
+ if (MI->getOperand(OpNo).isImm()) {
+ printS16ImmOperand(MI, OpNo);
+ } else {
+ printOp(MI->getOperand(OpNo));
+ O << "@l";
+ }
+ }
+
+ /// Print local store address
+ void printSymbolLSA(const MachineInstr *MI, unsigned OpNo) {
+ printOp(MI->getOperand(OpNo));
+ }
+
+ void printROTHNeg7Imm(const MachineInstr *MI, unsigned OpNo) {
+ if (MI->getOperand(OpNo).isImm()) {
+ int value = (int) MI->getOperand(OpNo).getImm();
+ assert((value >= 0 && value < 16)
+ && "Invalid negated immediate rotate 7-bit argument");
+ O << -value;
+ } else {
+ assert(0 &&"Invalid/non-immediate rotate amount in printRotateNeg7Imm");
+ }
+ }
+
+ void printROTNeg7Imm(const MachineInstr *MI, unsigned OpNo) {
+ if (MI->getOperand(OpNo).isImm()) {
+ int value = (int) MI->getOperand(OpNo).getImm();
+ assert((value >= 0 && value <= 32)
+ && "Invalid negated immediate rotate 7-bit argument");
+ O << -value;
+ } else {
+ assert(0 &&"Invalid/non-immediate rotate amount in printRotateNeg7Imm");
+ }
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &F) = 0;
+ //! Assembly printer cleanup after function has been emitted
+ virtual bool doFinalization(Module &M) = 0;
+ };
+
+ /// LinuxAsmPrinter - SPU assembly printer, customized for Linux
+ class VISIBILITY_HIDDEN LinuxAsmPrinter : public SPUAsmPrinter {
+ DwarfWriter *DW;
+ MachineModuleInfo *MMI;
+ public:
+ explicit LinuxAsmPrinter(raw_ostream &O, SPUTargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level F,
+ bool V)
+ : SPUAsmPrinter(O, TM, T, F, V), DW(0), MMI(0) {}
+
+ virtual const char *getPassName() const {
+ return "STI CBEA SPU Assembly Printer";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ //! Dump globals, perform cleanup after function emission
+ bool doFinalization(Module &M);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<DwarfWriter>();
+ SPUAsmPrinter::getAnalysisUsage(AU);
+ }
+
+ //! Emit a global variable according to its section and type
+ void printModuleLevelGV(const GlobalVariable* GVar);
+ };
+} // end of anonymous namespace
+
+// Include the auto-generated portion of the assembly writer
+#include "SPUGenAsmWriter.inc"
+
+void SPUAsmPrinter::printOp(const MachineOperand &MO) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Immediate:
+ cerr << "printOp() does not handle immediate values\n";
+ abort();
+ return;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB());
+ return;
+ case MachineOperand::MO_JumpTableIndex:
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ return;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ return;
+ case MachineOperand::MO_ExternalSymbol:
+ // Computing the address of an external symbol, not calling it.
+ if (TM.getRelocationModel() != Reloc::Static) {
+ std::string Name(TAI->getGlobalPrefix()); Name += MO.getSymbolName();
+ GVStubs.insert(Name);
+ O << "L" << Name << "$non_lazy_ptr";
+ return;
+ }
+ O << TAI->getGlobalPrefix() << MO.getSymbolName();
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ // Computing the address of a global symbol, not calling it.
+ GlobalValue *GV = MO.getGlobal();
+ std::string Name = Mang->getValueName(GV);
+
+ // External or weakly linked global variables need non-lazily-resolved
+ // stubs
+ if (TM.getRelocationModel() != Reloc::Static) {
+ if (((GV->isDeclaration() || GV->hasWeakLinkage() ||
+ GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) {
+ GVStubs.insert(Name);
+ O << "L" << Name << "$non_lazy_ptr";
+ return;
+ }
+ }
+ O << Name;
+
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ return;
+ }
+
+ default:
+ O << "<unknown operand type: " << MO.getType() << ">";
+ return;
+ }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'L': // Write second word of DImode reference.
+ // Verify that this operand has two consecutive registers.
+ if (!MI->getOperand(OpNo).isReg() ||
+ OpNo+1 == MI->getNumOperands() ||
+ !MI->getOperand(OpNo+1).isReg())
+ return true;
+ ++OpNo; // Return the high-part.
+ break;
+ }
+ }
+
+ printOperand(MI, OpNo);
+ return false;
+}
+
+bool SPUAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+ printMemRegReg(MI, OpNo);
+ return false;
+}
+
+/// printMachineInstruction -- Print out a single PowerPC MI in Darwin syntax
+/// to the current output stream.
+///
+void SPUAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ ++EmittedInsts;
+ printInstruction(MI);
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool
+LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF)
+{
+ this->MF = &MF;
+
+ SetupMachineFunction(MF);
+ O << "\n\n";
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ // Print out labels for the function.
+ const Function *F = MF.getFunction();
+
+ SwitchToSection(TAI->SectionForGlobal(F));
+ EmitAlignment(3, F);
+
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::PrivateLinkage:
+ case Function::InternalLinkage: // Symbols default to internal.
+ break;
+ case Function::ExternalLinkage:
+ O << "\t.global\t" << CurrentFnName << "\n"
+ << "\t.type\t" << CurrentFnName << ", @function\n";
+ break;
+ case Function::WeakAnyLinkage:
+ case Function::WeakODRLinkage:
+ case Function::LinkOnceAnyLinkage:
+ case Function::LinkOnceODRLinkage:
+ O << "\t.global\t" << CurrentFnName << "\n";
+ O << "\t.weak_definition\t" << CurrentFnName << "\n";
+ break;
+ }
+ O << CurrentFnName << ":\n";
+
+ // Emit pre-function debug information.
+ DW->BeginFunction(&MF);
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true, true);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ printMachineInstruction(II);
+ }
+ }
+
+ O << "\t.size\t" << CurrentFnName << ",.-" << CurrentFnName << "\n";
+
+ // Print out jump tables referenced by the function.
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+ // Emit post-function debug information.
+ DW->EndFunction(&MF);
+
+ // We didn't modify anything.
+ return false;
+}
+
+
+bool LinuxAsmPrinter::doInitialization(Module &M) {
+ bool Result = AsmPrinter::doInitialization(M);
+ SwitchToTextSection("\t.text");
+ // Emit initial debug information.
+ DW = getAnalysisIfAvailable<DwarfWriter>();
+ assert(DW && "Dwarf Writer is not available");
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ DW->BeginModule(&M, MMI, O, this, TAI);
+ return Result;
+}
+
+/// PrintUnmangledNameSafely - Print out the printable characters in the name.
+/// Don't print things like \\n or \\0.
+static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
+ for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen();
+ Name != E; ++Name)
+ if (isprint(*Name))
+ OS << *Name;
+}
+
+/*!
+ Emit a global variable according to its section, alignment, etc.
+
+ \note This code was shamelessly copied from the PowerPC's assembly printer,
+ which sort of screams for some kind of refactorization of common code.
+ */
+void LinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
+ const TargetData *TD = TM.getTargetData();
+
+ if (!GVar->hasInitializer())
+ return;
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GVar))
+ return;
+
+ std::string name = Mang->getValueName(GVar);
+
+ printVisibility(name, GVar->getVisibility());
+
+ Constant *C = GVar->getInitializer();
+ const Type *Type = C->getType();
+ unsigned Size = TD->getTypeAllocSize(Type);
+ unsigned Align = TD->getPreferredAlignmentLog(GVar);
+
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+
+ if (C->isNullValue() && /* FIXME: Verify correct */
+ !GVar->hasSection() &&
+ (GVar->hasLocalLinkage() || GVar->hasExternalLinkage() ||
+ GVar->isWeakForLinker())) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+
+ if (GVar->hasExternalLinkage()) {
+ O << "\t.global " << name << '\n';
+ O << "\t.type " << name << ", @object\n";
+ O << name << ":\n";
+ O << "\t.zero " << Size << '\n';
+ } else if (GVar->hasLocalLinkage()) {
+ O << TAI->getLCOMMDirective() << name << ',' << Size;
+ } else {
+ O << ".comm " << name << ',' << Size;
+ }
+ O << "\t\t" << TAI->getCommentString() << " '";
+ PrintUnmangledNameSafely(GVar, O);
+ O << "'\n";
+ return;
+ }
+
+ switch (GVar->getLinkage()) {
+ // Should never be seen for the CellSPU platform...
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::CommonLinkage:
+ O << "\t.global " << name << '\n'
+ << "\t.type " << name << ", @object\n"
+ << "\t.weak " << name << '\n';
+ break;
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol
+ O << "\t.global " << name << '\n'
+ << "\t.type " << name << ", @object\n";
+ // FALL THROUGH
+ case GlobalValue::PrivateLinkage:
+ case GlobalValue::InternalLinkage:
+ break;
+ default:
+ cerr << "Unknown linkage type!";
+ abort();
+ }
+
+ EmitAlignment(Align, GVar);
+ O << name << ":\t\t\t\t" << TAI->getCommentString() << " '";
+ PrintUnmangledNameSafely(GVar, O);
+ O << "'\n";
+
+ // If the initializer is a extern weak symbol, remember to emit the weak
+ // reference!
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+
+ EmitGlobalConstant(C);
+ O << '\n';
+}
+
+bool LinuxAsmPrinter::doFinalization(Module &M) {
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ printModuleLevelGV(I);
+
+ // Emit initial debug information.
+ DW->EndModule();
+
+ return AsmPrinter::doFinalization(M);
+}
+
+/// createSPUCodePrinterPass - Returns a pass that prints the Cell SPU
+/// assembly code for a MachineFunction to the given output stream, in a format
+/// that the Linux SPU assembler can deal with.
+///
+FunctionPass *llvm::createSPUAsmPrinterPass(raw_ostream &o,
+ SPUTargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose) {
+ return new LinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+}
diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt
new file mode 100644
index 0000000..e3e12ac
--- /dev/null
+++ b/lib/Target/CellSPU/CMakeLists.txt
@@ -0,0 +1,24 @@
+set(LLVM_TARGET_DEFINITIONS SPU.td)
+
+tablegen(SPUGenInstrNames.inc -gen-instr-enums)
+tablegen(SPUGenRegisterNames.inc -gen-register-enums)
+tablegen(SPUGenAsmWriter.inc -gen-asm-writer)
+tablegen(SPUGenCodeEmitter.inc -gen-emitter)
+tablegen(SPUGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(SPUGenRegisterInfo.inc -gen-register-desc)
+tablegen(SPUGenInstrInfo.inc -gen-instr-desc)
+tablegen(SPUGenDAGISel.inc -gen-dag-isel)
+tablegen(SPUGenSubtarget.inc -gen-subtarget)
+tablegen(SPUGenCallingConv.inc -gen-callingconv)
+
+add_llvm_target(CellSPUCodeGen
+ SPUFrameInfo.cpp
+ SPUHazardRecognizers.cpp
+ SPUInstrInfo.cpp
+ SPUISelDAGToDAG.cpp
+ SPUISelLowering.cpp
+ SPURegisterInfo.cpp
+ SPUSubtarget.cpp
+ SPUTargetAsmInfo.cpp
+ SPUTargetMachine.cpp
+ )
diff --git a/lib/Target/CellSPU/CellSDKIntrinsics.td b/lib/Target/CellSPU/CellSDKIntrinsics.td
new file mode 100644
index 0000000..5d759a4
--- /dev/null
+++ b/lib/Target/CellSPU/CellSDKIntrinsics.td
@@ -0,0 +1,448 @@
+//===-- CellSDKIntrinsics.td - Cell SDK Intrinsics ---------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+///--==-- Arithmetic ops intrinsics --==--
+def CellSDKah:
+ RR_Int_v8i16<0b00010011000, "ah", IntegerOp, int_spu_si_ah>;
+def CellSDKahi:
+ RI10_Int_v8i16<0b00010011000, "ahi", IntegerOp, int_spu_si_ahi>;
+def CellSDKa:
+ RR_Int_v4i32<0b00000011000, "a", IntegerOp, int_spu_si_a>;
+def CellSDKai:
+ RI10_Int_v4i32<0b00111000, "ai", IntegerOp, int_spu_si_ai>;
+def CellSDKsfh:
+ RR_Int_v8i16<0b00010010000, "sfh", IntegerOp, int_spu_si_sfh>;
+def CellSDKsfhi:
+ RI10_Int_v8i16<0b10110000, "sfhi", IntegerOp, int_spu_si_sfhi>;
+def CellSDKsf:
+ RR_Int_v4i32<0b00000010000, "sf", IntegerOp, int_spu_si_sf>;
+def CellSDKsfi:
+ RI10_Int_v4i32<0b00110000, "sfi", IntegerOp, int_spu_si_sfi>;
+def CellSDKaddx:
+ RR_Int_v4i32<0b00000010110, "addx", IntegerOp, int_spu_si_addx>;
+def CellSDKcg:
+ RR_Int_v4i32<0b0100001100, "cg", IntegerOp, int_spu_si_cg>;
+def CellSDKcgx:
+ RR_Int_v4i32<0b01000010110, "cgx", IntegerOp, int_spu_si_cgx>;
+def CellSDKsfx:
+ RR_Int_v4i32<0b10000010110, "sfx", IntegerOp, int_spu_si_sfx>;
+def CellSDKbg:
+ RR_Int_v4i32<0b01000010000, "bg", IntegerOp, int_spu_si_bg>;
+def CellSDKbgx:
+ RR_Int_v4i32<0b11000010110, "bgx", IntegerOp, int_spu_si_bgx>;
+
+def CellSDKmpy:
+ RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpy $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpy (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyu:
+ RRForm<0b00110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyu $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyu (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))] >;
+
+def CellSDKmpyi:
+ RI10Form<0b00101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "mpyi $rT, $rA, $val", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyi (v8i16 VECREG:$rA),
+ i16ImmSExt10:$val))]>;
+
+def CellSDKmpyui:
+ RI10Form<0b10101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "mpyui $rT, $rA, $val", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyui (v8i16 VECREG:$rA),
+ i16ImmSExt10:$val))]>;
+
+def CellSDKmpya:
+ RRRForm<0b0011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "mpya $rT, $rA, $rB, $rC", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpya (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB),
+ (v8i16 VECREG:$rC)))]>;
+
+def CellSDKmpyh:
+ RRForm<0b10100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyh $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyh (v4i32 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpys:
+ RRForm<0b11100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpys $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpys (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyhh:
+ RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyhh $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhh (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyhha:
+ RRForm<0b01100010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyhha $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhha (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+// Not sure how to match a (set $rT, (add $rT (mpyhh $rA, $rB)))... so leave
+// as an intrinsic for the time being
+def CellSDKmpyhhu:
+ RRForm<0b01110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyhhu $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhu (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyhhau:
+ RRForm<0b01110010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyhhau $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhau (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKand:
+ RRForm<0b1000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "and\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_and (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKandc:
+ RRForm<0b10000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "andc\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_andc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKandbi:
+ RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "andbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_andbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKandhi:
+ RI10Form<0b10101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "andhi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_andhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKandi:
+ RI10Form<0b00101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "andi\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_andi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKor:
+ RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "or\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_or (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKorc:
+ RRForm<0b10010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "addc\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_orc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKorbi:
+ RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "orbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_orbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKorhi:
+ RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "orhi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_orhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKori:
+ RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "ori\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_ori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKxor:
+ RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "xor\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_xor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKxorbi:
+ RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "xorbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT), (int_spu_si_xorbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKxorhi:
+ RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "xorhi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_xorhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKxori:
+ RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "xori\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_xori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKnor:
+ RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "nor\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_nor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKnand:
+ RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "nand\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_nand (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+//===----------------------------------------------------------------------===//
+// Shift/rotate intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKshli:
+ Pat<(int_spu_si_shli (v4i32 VECREG:$rA), uimm7:$val),
+ (SHLIv4i32 VECREG:$rA, uimm7:$val)>;
+
+def CellSDKshlqbi:
+ Pat<(int_spu_si_shlqbi VECREG:$rA, R32C:$rB),
+ (SHLQBIv16i8 VECREG:$rA, R32C:$rB)>;
+
+def CellSDKshlqii:
+ Pat<(int_spu_si_shlqbii VECREG:$rA, uimm7:$val),
+ (SHLQBIIv16i8 VECREG:$rA, uimm7:$val)>;
+
+def CellSDKshlqby:
+ Pat<(int_spu_si_shlqby VECREG:$rA, R32C:$rB),
+ (SHLQBYv16i8 VECREG:$rA, R32C:$rB)>;
+
+def CellSDKshlqbyi:
+ Pat<(int_spu_si_shlqbyi VECREG:$rA, uimm7:$val),
+ (SHLQBYIv16i8 VECREG:$rA, uimm7:$val)>;
+
+//===----------------------------------------------------------------------===//
+// Branch/compare intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKceq:
+ RRForm<0b00000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "ceq\t $rT, $rA, $rB", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_ceq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKceqi:
+ RI10Form<0b00111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "ceqi\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_ceqi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKceqb:
+ RRForm<0b00001011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "ceqb\t $rT, $rA, $rB", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_ceqb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
+
+def CellSDKceqbi:
+ RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "ceqbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT), (int_spu_si_ceqbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKceqh:
+ RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "ceqh\t $rT, $rA, $rB", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_ceqh (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
+
+def CellSDKceqhi:
+ RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "ceqhi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_ceqhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+def CellSDKcgth:
+ RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "cgth\t $rT, $rA, $rB", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_cgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
+
+def CellSDKcgthi:
+ RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "cgthi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_cgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKcgt:
+ RRForm<0b00000010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "cgt\t $rT, $rA, $rB", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_cgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKcgti:
+ RI10Form<0b00110010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "cgti\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_cgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKcgtb:
+ RRForm<0b00001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "cgtb\t $rT, $rA, $rB", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_cgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
+
+def CellSDKcgtbi:
+ RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "cgtbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT), (int_spu_si_cgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKclgth:
+ RRForm<0b00010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "clgth\t $rT, $rA, $rB", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_clgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
+
+def CellSDKclgthi:
+ RI10Form<0b10111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "clgthi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_clgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKclgt:
+ RRForm<0b00000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "clgt\t $rT, $rA, $rB", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_clgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKclgti:
+ RI10Form<0b00111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "clgti\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_clgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKclgtb:
+ RRForm<0b00001011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "clgtb\t $rT, $rA, $rB", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_clgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
+
+def CellSDKclgtbi:
+ RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "clgtbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_clgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKfa:
+ RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fa\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fa (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfs:
+ RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fs\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fs (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfm:
+ RRForm<0b01100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fm\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fm (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfceq:
+ RRForm<0b01000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fceq\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fceq (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfcgt:
+ RRForm<0b01000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fcgt\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fcgt (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfcmeq:
+ RRForm<0b01010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fcmeq\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fcmeq (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfcmgt:
+ RRForm<0b01010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fcmgt\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fcmgt (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfma:
+ RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fma\t $rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fma (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rC)))]>;
+
+def CellSDKfnms:
+ RRRForm<0b1011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fnms\t $rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fnms (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rC)))]>;
+
+def CellSDKfms:
+ RRRForm<0b1111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fms\t $rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fms (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rC)))]>;
+
+//===----------------------------------------------------------------------===//
+// Double precision floating-point intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKdfa:
+ RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfa\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfa (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfs:
+ RRForm<0b10110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfs\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfs (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfm:
+ RRForm<0b01110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfm\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfm (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfma:
+ RRForm<0b00111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfma\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfma (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfnma:
+ RRForm<0b11111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfnma\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfnma (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfnms:
+ RRForm<0b01111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfnms\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfnms (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfms:
+ RRForm<0b10111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfms\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfms (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
diff --git a/lib/Target/CellSPU/Makefile b/lib/Target/CellSPU/Makefile
new file mode 100644
index 0000000..a460db3
--- /dev/null
+++ b/lib/Target/CellSPU/Makefile
@@ -0,0 +1,22 @@
+##===- lib/Target/CellSPU/Makefile -------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMCellSPUCodeGen
+TARGET = SPU
+
+BUILT_SOURCES = SPUGenInstrNames.inc SPUGenRegisterNames.inc \
+ SPUGenAsmWriter.inc SPUGenCodeEmitter.inc \
+ SPUGenRegisterInfo.h.inc SPUGenRegisterInfo.inc \
+ SPUGenInstrInfo.inc SPUGenDAGISel.inc \
+ SPUGenSubtarget.inc SPUGenCallingConv.inc
+
+DIRS = AsmPrinter
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CellSPU/README.txt b/lib/Target/CellSPU/README.txt
new file mode 100644
index 0000000..4783dd5
--- /dev/null
+++ b/lib/Target/CellSPU/README.txt
@@ -0,0 +1,90 @@
+//===- README.txt - Notes for improving CellSPU-specific code gen ---------===//
+
+This code was contributed by a team from the Computer Systems Research
+Department in The Aerospace Corporation:
+
+- Scott Michel (head bottle washer and much of the non-floating point
+ instructions)
+- Mark Thomas (floating point instructions)
+- Michael AuYeung (intrinsics)
+- Chandler Carruth (LLVM expertise)
+- Nehal Desai (debugging, i32 operations, RoadRunner SPU expertise)
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR
+OTHERWISE. IN NO EVENT SHALL THE AEROSPACE CORPORATION BE LIABLE FOR DAMAGES
+OF ANY KIND OR NATURE WHETHER BASED IN CONTRACT, TORT, OR OTHERWISE ARISING
+OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE INCLUDING, WITHOUT
+LIMITATION, DAMAGES RESULTING FROM LOST OR CONTAMINATED DATA, LOST PROFITS OR
+REVENUE, COMPUTER MALFUNCTION, OR FOR ANY SPECIAL, INCIDENTAL, CONSEQUENTIAL,
+OR PUNITIVE DAMAGES, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES OR
+SUCH DAMAGES ARE FORESEEABLE.
+
+---------------------------------------------------------------------------
+--WARNING--:
+--WARNING--: The CellSPU work is work-in-progress and "alpha" quality code.
+--WARNING--:
+
+If you are brave enough to try this code or help to hack on it, be sure
+to add 'spu' to configure's --enable-targets option, e.g.:
+
+ ./configure <your_configure_flags_here> \
+ --enable-targets=x86,x86_64,powerpc,spu
+
+---------------------------------------------------------------------------
+
+TODO:
+* Create a machine pass for performing dual-pipeline scheduling specifically
+ for CellSPU, and insert branch prediction instructions as needed.
+
+* i32 instructions:
+
+ * i32 division (work-in-progress)
+
+* i64 support (see i64operations.c test harness):
+
+ * shifts and comparison operators: done
+ * sign and zero extension: done
+ * addition: done
+ * subtraction: needed
+ * multiplication: done
+
+* i128 support:
+
+ * zero extension, any extension: done
+ * sign extension: needed
+ * arithmetic operators (add, sub, mul, div): needed
+ * logical operations (and, or, shl, srl, sra, xor, nor, nand): needed
+
+ * or: done
+
+* f64 support
+
+ * Comparison operators:
+ SETOEQ unimplemented
+ SETOGT unimplemented
+ SETOGE unimplemented
+ SETOLT unimplemented
+ SETOLE unimplemented
+ SETONE unimplemented
+ SETO done (lowered)
+ SETUO done (lowered)
+ SETUEQ unimplemented
+ SETUGT unimplemented
+ SETUGE unimplemented
+ SETULT unimplemented
+ SETULE unimplemented
+ SETUNE unimplemented
+
+* LLVM vector suport
+
+ * VSETCC needs to be implemented. It's pretty straightforward to code, but
+ needs implementation.
+
+* Intrinsics
+
+ * spu.h instrinsics added but not tested. Need to have an operational
+ llvm-spu-gcc in order to write a unit test harness.
+
+===-------------------------------------------------------------------------===
diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h
new file mode 100644
index 0000000..77a062e
--- /dev/null
+++ b/lib/Target/CellSPU/SPU.h
@@ -0,0 +1,102 @@
+//===-- SPU.h - Top-level interface for Cell SPU Target ----------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Cell SPU back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_IBMCELLSPU_H
+#define LLVM_TARGET_IBMCELLSPU_H
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class SPUTargetMachine;
+ class FunctionPass;
+ class raw_ostream;
+
+ FunctionPass *createSPUISelDag(SPUTargetMachine &TM);
+ FunctionPass *createSPUAsmPrinterPass(raw_ostream &o,
+ SPUTargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose);
+
+ /*--== Utility functions/predicates/etc used all over the place: --==*/
+ //! Predicate test for a signed 10-bit value
+ /*!
+ \param Value The input value to be tested
+
+ This predicate tests for a signed 10-bit value, returning the 10-bit value
+ as a short if true.
+ */
+ template<typename T>
+ inline bool isS10Constant(T Value);
+
+ template<>
+ inline bool isS10Constant<short>(short Value) {
+ int SExtValue = ((int) Value << (32 - 10)) >> (32 - 10);
+ return ((Value > 0 && Value <= (1 << 9) - 1)
+ || (Value < 0 && (short) SExtValue == Value));
+ }
+
+ template<>
+ inline bool isS10Constant<int>(int Value) {
+ return (Value >= -(1 << 9) && Value <= (1 << 9) - 1);
+ }
+
+ template<>
+ inline bool isS10Constant<uint32_t>(uint32_t Value) {
+ return (Value <= ((1 << 9) - 1));
+ }
+
+ template<>
+ inline bool isS10Constant<int64_t>(int64_t Value) {
+ return (Value >= -(1 << 9) && Value <= (1 << 9) - 1);
+ }
+
+ template<>
+ inline bool isS10Constant<uint64_t>(uint64_t Value) {
+ return (Value <= ((1 << 9) - 1));
+ }
+
+ //! Predicate test for an unsigned 10-bit value
+ /*!
+ \param Value The input value to be tested
+
+ This predicate tests for an unsigned 10-bit value, returning the 10-bit value
+ as a short if true.
+ */
+ inline bool isU10Constant(short Value) {
+ return (Value == (Value & 0x3ff));
+ }
+
+ inline bool isU10Constant(int Value) {
+ return (Value == (Value & 0x3ff));
+ }
+
+ inline bool isU10Constant(uint32_t Value) {
+ return (Value == (Value & 0x3ff));
+ }
+
+ inline bool isU10Constant(int64_t Value) {
+ return (Value == (Value & 0x3ff));
+ }
+
+ inline bool isU10Constant(uint64_t Value) {
+ return (Value == (Value & 0x3ff));
+ }
+}
+
+// Defines symbolic names for the SPU instructions.
+//
+#include "SPUGenInstrNames.inc"
+
+#endif /* LLVM_TARGET_IBMCELLSPU_H */
diff --git a/lib/Target/CellSPU/SPU.td b/lib/Target/CellSPU/SPU.td
new file mode 100644
index 0000000..8327fe0
--- /dev/null
+++ b/lib/Target/CellSPU/SPU.td
@@ -0,0 +1,66 @@
+//===- SPU.td - Describe the STI Cell SPU Target Machine ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the STI Cell SPU target machine.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing.
+//
+include "llvm/Target/Target.td"
+
+// Holder of code fragments (you'd think this'd already be in
+// a td file somewhere... :-)
+
+class CodeFrag<dag frag> {
+ dag Fragment = frag;
+}
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "SPURegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction formats, instructions
+//===----------------------------------------------------------------------===//
+
+include "SPUNodes.td"
+include "SPUOperands.td"
+include "SPUSchedule.td"
+include "SPUInstrFormats.td"
+include "SPUInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget features:
+//===----------------------------------------------------------------------===//
+
+def DefaultProc: SubtargetFeature<"", "ProcDirective", "SPU::DEFAULT_PROC", "">;
+def LargeMemFeature:
+ SubtargetFeature<"large_mem","UseLargeMem", "true",
+ "Use large (>256) LSA memory addressing [default = false]">;
+
+def SPURev0 : Processor<"v0", SPUItineraries, [DefaultProc]>;
+
+//===----------------------------------------------------------------------===//
+// Calling convention:
+//===----------------------------------------------------------------------===//
+
+include "SPUCallingConv.td"
+
+// Target:
+
+def SPUInstrInfo : InstrInfo {
+ let isLittleEndianEncoding = 1;
+}
+
+def SPU : Target {
+ let InstructionSet = SPUInstrInfo;
+}
diff --git a/lib/Target/CellSPU/SPU128InstrInfo.td b/lib/Target/CellSPU/SPU128InstrInfo.td
new file mode 100644
index 0000000..3031fda
--- /dev/null
+++ b/lib/Target/CellSPU/SPU128InstrInfo.td
@@ -0,0 +1,41 @@
+//===--- SPU128InstrInfo.td - Cell SPU 128-bit operations -*- tablegen -*--===//
+//
+// Cell SPU 128-bit operations
+//
+//===----------------------------------------------------------------------===//
+
+// zext 32->128: Zero extend 32-bit to 128-bit
+def : Pat<(i128 (zext R32C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
+
+// zext 64->128: Zero extend 64-bit to 128-bit
+def : Pat<(i128 (zext R64C:$rSrc)),
+ (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
+
+// zext 16->128: Zero extend 16-bit to 128-bit
+def : Pat<(i128 (zext R16C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
+
+// zext 8->128: Zero extend 8-bit to 128-bit
+def : Pat<(i128 (zext R8C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
+
+// anyext 32->128: Zero extend 32-bit to 128-bit
+def : Pat<(i128 (anyext R32C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
+
+// anyext 64->128: Zero extend 64-bit to 128-bit
+def : Pat<(i128 (anyext R64C:$rSrc)),
+ (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
+
+// anyext 16->128: Zero extend 16-bit to 128-bit
+def : Pat<(i128 (anyext R16C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
+
+// anyext 8->128: Zero extend 8-bit to 128-bit
+def : Pat<(i128 (anyext R8C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
+
+// Shift left
+def : Pat<(shl GPRC:$rA, R32C:$rB),
+ (SHLQBYBIr128 (SHLQBIr128 GPRC:$rA, R32C:$rB), R32C:$rB)>;
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td
new file mode 100644
index 0000000..06eb149
--- /dev/null
+++ b/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -0,0 +1,394 @@
+//====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====//
+//
+// Cell SPU 64-bit operations
+//
+//===----------------------------------------------------------------------===//
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// 64-bit comparisons:
+//
+// 1. The instruction sequences for vector vice scalar differ by a
+// constant. In the scalar case, we're only interested in the
+// top two 32-bit slots, whereas we're interested in an exact
+// all-four-slot match in the vector case.
+//
+// 2. There are no "immediate" forms, since loading 64-bit constants
+// could be a constant pool load.
+//
+// 3. i64 setcc results are i32, which are subsequently converted to a FSM
+// mask when used in a select pattern.
+//
+// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO)
+// [Note: this may be moot, since gb produces v4i32 or r32.]
+//
+// 5. The code sequences for r64 and v2i64 are probably overly conservative,
+// compared to the code that gcc produces.
+//
+// M00$E B!tes Kan be Pretty N@sTi!!!!! (appologies to Monty!)
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// selb instruction definition for i64. Note that the selection mask is
+// a vector, produced by various forms of FSM:
+def SELBr64_cond:
+ SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
+ [/* no pattern */]>;
+
+// The generic i64 select pattern, which assumes that the comparison result
+// is in a 32-bit register that contains a select mask pattern (i.e., gather
+// bits result):
+
+def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue),
+ (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>;
+
+// select the negative condition:
+class I64SELECTNegCond<PatFrag cond, CodeFrag compare>:
+ Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
+ (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>;
+
+// setcc the negative condition:
+class I64SETCCNegCond<PatFrag cond, CodeFrag compare>:
+ Pat<(cond R64C:$rA, R64C:$rB),
+ (XORIr32 compare.Fragment, -1)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// The i64 seteq fragment that does the scalar->vector conversion and
+// comparison:
+def CEQr64compare:
+ CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (ORv2i64_i64 R64C:$rA),
+ (ORv2i64_i64 R64C:$rB))), 0xb)>;
+
+// The i64 seteq fragment that does the vector comparison
+def CEQv2i64compare:
+ CodeFrag<(CEQIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), 0xf)>;
+
+// i64 seteq (equality): the setcc result is i32, which is converted to a
+// vector FSM mask when used in a select pattern.
+//
+// v2i64 seteq (equality): the setcc result is v4i32
+multiclass CompareEqual64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(ORi32_v4i32 CEQr64compare.Fragment)>;
+ def v2i64: CodeFrag<(ORi32_v4i32 CEQv2i64compare.Fragment)>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQr64compare.Fragment))>;
+ def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQv2i64compare.Fragment))>;
+}
+
+defm I64EQ: CompareEqual64;
+
+def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>;
+def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>;
+
+// i64 setne:
+def : I64SETCCNegCond<setne, I64EQr64>;
+def : I64SELECTNegCond<setne, I64EQr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setugt/setule:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CLGTr64ugt:
+ CodeFrag<(CLGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+
+def CLGTr64eq:
+ CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+
+def CLGTr64compare:
+ CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment,
+ (XSWDv2i64 CLGTr64ugt.Fragment),
+ CLGTr64eq.Fragment)>;
+
+def CLGTv2i64ugt:
+ CodeFrag<(CLGTv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CLGTv2i64eq:
+ CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CLGTv2i64compare:
+ CodeFrag<(SELBv2i64 CLGTv2i64ugt.Fragment,
+ (XSWDv2i64 CLGTr64ugt.Fragment),
+ CLGTv2i64eq.Fragment)>;
+
+multiclass CompareLogicalGreaterThan64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(ORi32_v4i32 CLGTr64compare.Fragment)>;
+ def v2i64: CodeFrag<CLGTv2i64compare.Fragment>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTr64compare.Fragment))>;
+ def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTv2i64compare.Fragment))>;
+}
+
+defm I64LGT: CompareLogicalGreaterThan64;
+
+def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>;
+def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+ I64LGTv2i64.Fragment>;
+
+// i64 setult:
+def : I64SETCCNegCond<setule, I64LGTr64>;
+def : I64SELECTNegCond<setule, I64LGTr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setuge/setult:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CLGEr64compare:
+ CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CLGTr64ugt.Fragment,
+ CLGTr64eq.Fragment)), 0xb)>;
+
+def CLGEv2i64compare:
+ CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CLGTv2i64ugt.Fragment,
+ CLGTv2i64eq.Fragment)), 0xf)>;
+
+multiclass CompareLogicalGreaterEqual64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(ORi32_v4i32 CLGEr64compare.Fragment)>;
+ def v2i64: CodeFrag<CLGEv2i64compare.Fragment>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEr64compare.Fragment))>;
+ def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEv2i64compare.Fragment))>;
+}
+
+defm I64LGE: CompareLogicalGreaterEqual64;
+
+def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>;
+def : Pat<(setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+ I64LGEv2i64.Fragment>;
+
+// i64 setult:
+def : I64SETCCNegCond<setult, I64LGEr64>;
+def : I64SELECTNegCond<setult, I64LGEr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setgt/setle:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CGTr64sgt:
+ CodeFrag<(CGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+
+def CGTr64eq:
+ CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+
+def CGTr64compare:
+ CodeFrag<(SELBv2i64 CGTr64sgt.Fragment,
+ (XSWDv2i64 CGTr64sgt.Fragment),
+ CGTr64eq.Fragment)>;
+
+def CGTv2i64sgt:
+ CodeFrag<(CGTv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CGTv2i64eq:
+ CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CGTv2i64compare:
+ CodeFrag<(SELBv2i64 CGTv2i64sgt.Fragment,
+ (XSWDv2i64 CGTr64sgt.Fragment),
+ CGTv2i64eq.Fragment)>;
+
+multiclass CompareGreaterThan64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(ORi32_v4i32 CGTr64compare.Fragment)>;
+ def v2i64: CodeFrag<CGTv2i64compare.Fragment>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTr64compare.Fragment))>;
+ def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTv2i64compare.Fragment))>;
+}
+
+defm I64GT: CompareLogicalGreaterThan64;
+
+def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>;
+def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+ I64GTv2i64.Fragment>;
+
+// i64 setult:
+def : I64SETCCNegCond<setle, I64GTr64>;
+def : I64SELECTNegCond<setle, I64GTr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setge/setlt:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CGEr64compare:
+ CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CGTr64sgt.Fragment,
+ CGTr64eq.Fragment)), 0xb)>;
+
+def CGEv2i64compare:
+ CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CGTv2i64sgt.Fragment,
+ CGTv2i64eq.Fragment)), 0xf)>;
+
+multiclass CompareGreaterEqual64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(ORi32_v4i32 CGEr64compare.Fragment)>;
+ def v2i64: CodeFrag<CGEv2i64compare.Fragment>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEr64compare.Fragment))>;
+ def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEv2i64compare.Fragment))>;
+}
+
+defm I64GE: CompareGreaterEqual64;
+
+def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>;
+def : Pat<(setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+ I64GEv2i64.Fragment>;
+
+// i64 setult:
+def : I64SETCCNegCond<setlt, I64GEr64>;
+def : I64SELECTNegCond<setlt, I64GEr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 add
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_add_cg<dag lhs, dag rhs>:
+ CodeFrag<(CGv4i32 lhs, rhs)>;
+
+class v2i64_add_1<dag lhs, dag rhs, dag cg, dag cg_mask>:
+ CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>;
+
+class v2i64_add<dag lhs, dag rhs, dag cg_mask>:
+ v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>;
+
+def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+ (ORi64_v2i64 v2i64_add<(ORv2i64_i64 R64C:$rA),
+ (ORv2i64_i64 R64C:$rB),
+ (v4i32 VECREG:$rCGmask)>.Fragment)>;
+
+def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)),
+ v2i64_add<(v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 subtraction
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_sub_bg<dag lhs, dag rhs>: CodeFrag<(BGv4i32 lhs, rhs)>;
+
+class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>:
+ CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>;
+
+def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+ (ORi64_v2i64 v2i64_sub<(ORv2i64_i64 R64C:$rA),
+ (ORv2i64_i64 R64C:$rB),
+ v2i64_sub_bg<(ORv2i64_i64 R64C:$rA),
+ (ORv2i64_i64 R64C:$rB)>.Fragment,
+ (v4i32 VECREG:$rCGmask)>.Fragment)>;
+
+def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)),
+ v2i64_sub<(v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB),
+ v2i64_sub_bg<(v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB)>.Fragment,
+ (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 multiply
+//
+// Note: i64 multiply is simply the vector->scalar conversion of the
+// full-on v2i64 multiply, since the entire vector has to be manipulated
+// anyway.
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_mul_ahi64<dag rA> :
+ CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
+
+class v2i64_mul_bhi64<dag rB> :
+ CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
+
+class v2i64_mul_alo64<dag rB> :
+ CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
+
+class v2i64_mul_blo64<dag rB> :
+ CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
+
+class v2i64_mul_ashlq2<dag rA>:
+ CodeFrag<(SHLQBYIv4i32 rA, 0x2)>;
+
+class v2i64_mul_ashlq4<dag rA>:
+ CodeFrag<(SHLQBYIv4i32 rA, 0x4)>;
+
+class v2i64_mul_bshlq2<dag rB> :
+ CodeFrag<(SHLQBYIv4i32 rB, 0x2)>;
+
+class v2i64_mul_bshlq4<dag rB> :
+ CodeFrag<(SHLQBYIv4i32 rB, 0x4)>;
+
+class v2i64_highprod<dag rA, dag rB>:
+ CodeFrag<(Av4i32
+ (Av4i32
+ (MPYUv4i32 v2i64_mul_bshlq4<rB>.Fragment, // a1 x b3
+ v2i64_mul_ahi64<rA>.Fragment),
+ (MPYHv4i32 v2i64_mul_ahi64<rA>.Fragment, // a0 x b3
+ v2i64_mul_bshlq4<rB>.Fragment)),
+ (Av4i32
+ (MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment,
+ v2i64_mul_ashlq4<rA>.Fragment),
+ (Av4i32
+ (MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
+ v2i64_mul_bhi64<rB>.Fragment),
+ (Av4i32
+ (MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment,
+ v2i64_mul_bhi64<rB>.Fragment),
+ (Av4i32
+ (MPYHv4i32 v2i64_mul_ashlq2<rA>.Fragment,
+ v2i64_mul_bshlq2<rB>.Fragment),
+ (MPYUv4i32 v2i64_mul_ashlq2<rA>.Fragment,
+ v2i64_mul_bshlq2<rB>.Fragment))))))>;
+
+class v2i64_mul_a3_b3<dag rA, dag rB>:
+ CodeFrag<(MPYUv4i32 v2i64_mul_alo64<rA>.Fragment,
+ v2i64_mul_blo64<rB>.Fragment)>;
+
+class v2i64_mul_a2_b3<dag rA, dag rB>:
+ CodeFrag<(SELBv4i32 (SHLQBYIv4i32
+ (MPYHHUv4i32 v2i64_mul_alo64<rA>.Fragment,
+ v2i64_mul_bshlq2<rB>.Fragment), 0x2),
+ (ILv4i32 0),
+ (FSMBIv4i32 0xc3c3))>;
+
+class v2i64_mul_a3_b2<dag rA, dag rB>:
+ CodeFrag<(SELBv4i32 (SHLQBYIv4i32
+ (MPYHHUv4i32 v2i64_mul_blo64<rB>.Fragment,
+ v2i64_mul_ashlq2<rA>.Fragment), 0x2),
+ (ILv4i32 0),
+ (FSMBIv4i32 0xc3c3))>;
+
+class v2i64_lowsum<dag rA, dag rB, dag rCGmask>:
+ v2i64_add<v2i64_add<v2i64_mul_a3_b3<rA, rB>.Fragment,
+ v2i64_mul_a2_b3<rA, rB>.Fragment, rCGmask>.Fragment,
+ v2i64_mul_a3_b2<rA, rB>.Fragment, rCGmask>;
+
+class v2i64_mul<dag rA, dag rB, dag rCGmask>:
+ v2i64_add<v2i64_lowsum<rA, rB, rCGmask>.Fragment,
+ (SELBv4i32 v2i64_highprod<rA, rB>.Fragment,
+ (ILv4i32 0),
+ (FSMBIv4i32 0x0f0f)),
+ rCGmask>;
+
+def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+ (ORi64_v2i64 v2i64_mul<(ORv2i64_i64 R64C:$rA),
+ (ORv2i64_i64 R64C:$rB),
+ (v4i32 VECREG:$rCGmask)>.Fragment)>;
+
+def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)),
+ v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// f64 comparisons
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// selb instruction definition for i64. Note that the selection mask is
+// a vector, produced by various forms of FSM:
+def SELBf64_cond:
+ SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC),
+ [(set R64FP:$rT,
+ (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>;
diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td
new file mode 100644
index 0000000..10dc837
--- /dev/null
+++ b/lib/Target/CellSPU/SPUCallingConv.td
@@ -0,0 +1,115 @@
+//===- SPUCallingConv.td - Calling Conventions for CellSPU ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the STI Cell SPU architecture.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+
+// Return-value convention for Cell SPU: Everything can be passed back via $3:
+def RetCC_SPU : CallingConv<[
+ CCIfType<[i8], CCAssignToReg<[R3]>>,
+ CCIfType<[i16], CCAssignToReg<[R3]>>,
+ CCIfType<[i32], CCAssignToReg<[R3]>>,
+ CCIfType<[i64], CCAssignToReg<[R3]>>,
+ CCIfType<[i128], CCAssignToReg<[R3]>>,
+ CCIfType<[f32, f64], CCAssignToReg<[R3]>>,
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[R3]>>,
+ CCIfType<[v2i32], CCAssignToReg<[R3]>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// CellSPU Argument Calling Conventions
+// (note: this isn't used, but presumably should be at some point when other
+// targets do.)
+//===----------------------------------------------------------------------===//
+/*
+def CC_SPU : CallingConv<[
+ CCIfType<[i8], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74,
+ R75, R76, R77, R78, R79]>>,
+ CCIfType<[i16], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74,
+ R75, R76, R77, R78, R79]>>,
+ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74,
+ R75, R76, R77, R78, R79]>>,
+ CCIfType<[f32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74,
+ R75, R76, R77, R78, R79]>>,
+ CCIfType<[i64], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74,
+ R75, R76, R77, R78, R79]>>,
+ CCIfType<[f64], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74,
+ R75, R76, R77, R78, R79]>>,
+ CCIfType<[v16i8, v8i16, v4i32, v4f32, v2i64, v2f64],
+ CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74,
+ R75, R76, R77, R78, R79]>>,
+
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToStack<16, 16>>
+]>;
+*/
diff --git a/lib/Target/CellSPU/SPUFrameInfo.cpp b/lib/Target/CellSPU/SPUFrameInfo.cpp
new file mode 100644
index 0000000..60d7ba7
--- /dev/null
+++ b/lib/Target/CellSPU/SPUFrameInfo.cpp
@@ -0,0 +1,29 @@
+//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the Cell SPU target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUFrameInfo.h"
+#include "SPURegisterNames.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// SPUFrameInfo:
+//===----------------------------------------------------------------------===//
+
+SPUFrameInfo::SPUFrameInfo(const TargetMachine &tm):
+ TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0),
+ TM(tm)
+{
+ LR[0].first = SPU::R0;
+ LR[0].second = 16;
+}
diff --git a/lib/Target/CellSPU/SPUFrameInfo.h b/lib/Target/CellSPU/SPUFrameInfo.h
new file mode 100644
index 0000000..e8ca333
--- /dev/null
+++ b/lib/Target/CellSPU/SPUFrameInfo.h
@@ -0,0 +1,79 @@
+//===-- SPUFrameInfo.h - Top-level interface for Cell SPU Target -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains CellSPU frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#if !defined(SPUFRAMEINFO_H)
+
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "SPURegisterInfo.h"
+
+namespace llvm {
+ class SPUFrameInfo: public TargetFrameInfo {
+ const TargetMachine &TM;
+ std::pair<unsigned, int> LR[1];
+
+ public:
+ SPUFrameInfo(const TargetMachine &tm);
+
+ //! Return a function's saved spill slots
+ /*!
+ For CellSPU, a function's saved spill slots is just the link register.
+ */
+ const std::pair<unsigned, int> *
+ getCalleeSaveSpillSlots(unsigned &NumEntries) const;
+
+ //! Stack slot size (16 bytes)
+ static int stackSlotSize() {
+ return 16;
+ }
+ //! Maximum frame offset representable by a signed 10-bit integer
+ /*!
+ This is the maximum frame offset that can be expressed as a 10-bit
+ integer, used in D-form addresses.
+ */
+ static int maxFrameOffset() {
+ return ((1 << 9) - 1) * stackSlotSize();
+ }
+ //! Minimum frame offset representable by a signed 10-bit integer
+ static int minFrameOffset() {
+ return -(1 << 9) * stackSlotSize();
+ }
+ //! Minimum frame size (enough to spill LR + SP)
+ static int minStackSize() {
+ return (2 * stackSlotSize());
+ }
+ //! Frame size required to spill all registers plus frame info
+ static int fullSpillSize() {
+ return (SPURegisterInfo::getNumArgRegs() * stackSlotSize());
+ }
+ //! Convert frame index to stack offset
+ static int FItoStackOffset(int frame_index) {
+ return frame_index * stackSlotSize();
+ }
+ //! Number of instructions required to overcome hint-for-branch latency
+ /*!
+ HBR (hint-for-branch) instructions can be inserted when, for example,
+ we know that a given function is going to be called, such as printf(),
+ in the control flow graph. HBRs are only inserted if a sufficient number
+ of instructions occurs between the HBR and the target. Currently, HBRs
+ take 6 cycles, ergo, the magic number 6.
+ */
+ static int branchHintPenalty() {
+ return 6;
+ }
+ };
+}
+
+#define SPUFRAMEINFO_H 1
+#endif
diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/lib/Target/CellSPU/SPUHazardRecognizers.cpp
new file mode 100644
index 0000000..caaa71a
--- /dev/null
+++ b/lib/Target/CellSPU/SPUHazardRecognizers.cpp
@@ -0,0 +1,138 @@
+//===-- SPUHazardRecognizers.cpp - Cell Hazard Recognizer Impls -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements hazard recognizers for scheduling on Cell SPU
+// processors.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sched"
+
+#include "SPUHazardRecognizers.h"
+#include "SPU.h"
+#include "SPUInstrInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Cell SPU hazard recognizer
+//
+// This is the pipeline hazard recognizer for the Cell SPU processor. It does
+// very little right now.
+//===----------------------------------------------------------------------===//
+
+SPUHazardRecognizer::SPUHazardRecognizer(const TargetInstrInfo &tii) :
+ TII(tii),
+ EvenOdd(0)
+{
+}
+
+/// Return the pipeline hazard type encountered or generated by this
+/// instruction. Currently returns NoHazard.
+///
+/// \return NoHazard
+ScheduleHazardRecognizer::HazardType
+SPUHazardRecognizer::getHazardType(SUnit *SU)
+{
+ // Initial thoughts on how to do this, but this code cannot work unless the
+ // function's prolog and epilog code are also being scheduled so that we can
+ // accurately determine which pipeline is being scheduled.
+#if 0
+ const SDNode *Node = SU->getNode()->getFlaggedMachineNode();
+ ScheduleHazardRecognizer::HazardType retval = NoHazard;
+ bool mustBeOdd = false;
+
+ switch (Node->getOpcode()) {
+ case SPU::LQDv16i8:
+ case SPU::LQDv8i16:
+ case SPU::LQDv4i32:
+ case SPU::LQDv4f32:
+ case SPU::LQDv2f64:
+ case SPU::LQDr128:
+ case SPU::LQDr64:
+ case SPU::LQDr32:
+ case SPU::LQDr16:
+ case SPU::LQAv16i8:
+ case SPU::LQAv8i16:
+ case SPU::LQAv4i32:
+ case SPU::LQAv4f32:
+ case SPU::LQAv2f64:
+ case SPU::LQAr128:
+ case SPU::LQAr64:
+ case SPU::LQAr32:
+ case SPU::LQXv4i32:
+ case SPU::LQXr128:
+ case SPU::LQXr64:
+ case SPU::LQXr32:
+ case SPU::LQXr16:
+ case SPU::STQDv16i8:
+ case SPU::STQDv8i16:
+ case SPU::STQDv4i32:
+ case SPU::STQDv4f32:
+ case SPU::STQDv2f64:
+ case SPU::STQDr128:
+ case SPU::STQDr64:
+ case SPU::STQDr32:
+ case SPU::STQDr16:
+ case SPU::STQDr8:
+ case SPU::STQAv16i8:
+ case SPU::STQAv8i16:
+ case SPU::STQAv4i32:
+ case SPU::STQAv4f32:
+ case SPU::STQAv2f64:
+ case SPU::STQAr128:
+ case SPU::STQAr64:
+ case SPU::STQAr32:
+ case SPU::STQAr16:
+ case SPU::STQAr8:
+ case SPU::STQXv16i8:
+ case SPU::STQXv8i16:
+ case SPU::STQXv4i32:
+ case SPU::STQXv4f32:
+ case SPU::STQXv2f64:
+ case SPU::STQXr128:
+ case SPU::STQXr64:
+ case SPU::STQXr32:
+ case SPU::STQXr16:
+ case SPU::STQXr8:
+ case SPU::RET:
+ mustBeOdd = true;
+ break;
+ default:
+ // Assume that this instruction can be on the even pipe
+ break;
+ }
+
+ if (mustBeOdd && !EvenOdd)
+ retval = Hazard;
+
+ DOUT << "SPUHazardRecognizer EvenOdd " << EvenOdd << " Hazard " << retval << "\n";
+ EvenOdd ^= 1;
+ return retval;
+#else
+ return NoHazard;
+#endif
+}
+
+void SPUHazardRecognizer::EmitInstruction(SUnit *SU)
+{
+}
+
+void SPUHazardRecognizer::AdvanceCycle()
+{
+ DOUT << "SPUHazardRecognizer::AdvanceCycle\n";
+}
+
+void SPUHazardRecognizer::EmitNoop()
+{
+ AdvanceCycle();
+}
diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.h b/lib/Target/CellSPU/SPUHazardRecognizers.h
new file mode 100644
index 0000000..d0ae2d8
--- /dev/null
+++ b/lib/Target/CellSPU/SPUHazardRecognizers.h
@@ -0,0 +1,41 @@
+//===-- SPUHazardRecognizers.h - Cell SPU Hazard Recognizer -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling on the Cell SPU
+// processor.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUHAZRECS_H
+#define SPUHAZRECS_H
+
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+
+namespace llvm {
+
+class TargetInstrInfo;
+
+/// SPUHazardRecognizer
+class SPUHazardRecognizer : public ScheduleHazardRecognizer
+{
+private:
+ const TargetInstrInfo &TII;
+ int EvenOdd;
+
+public:
+ SPUHazardRecognizer(const TargetInstrInfo &TII);
+ virtual HazardType getHazardType(SUnit *SU);
+ virtual void EmitInstruction(SUnit *SU);
+ virtual void AdvanceCycle();
+ virtual void EmitNoop();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
new file mode 100644
index 0000000..779d75d
--- /dev/null
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -0,0 +1,1244 @@
+//===-- SPUISelDAGToDAG.cpp - CellSPU pattern matching inst selector ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for the Cell SPU,
+// converting from a legalized dag to a SPU-target dag.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "SPUISelLowering.h"
+#include "SPUHazardRecognizers.h"
+#include "SPUFrameInfo.h"
+#include "SPURegisterNames.h"
+#include "SPUTargetMachine.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Compiler.h"
+
+using namespace llvm;
+
+namespace {
+ //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
+ bool
+ isI64IntS10Immediate(ConstantSDNode *CN)
+ {
+ return isS10Constant(CN->getSExtValue());
+ }
+
+ //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
+ bool
+ isI32IntS10Immediate(ConstantSDNode *CN)
+ {
+ return isS10Constant(CN->getSExtValue());
+ }
+
+ //! ConstantSDNode predicate for i32 unsigned 10-bit immediate values
+ bool
+ isI32IntU10Immediate(ConstantSDNode *CN)
+ {
+ return isU10Constant(CN->getSExtValue());
+ }
+
+ //! ConstantSDNode predicate for i16 sign-extended, 10-bit immediate values
+ bool
+ isI16IntS10Immediate(ConstantSDNode *CN)
+ {
+ return isS10Constant(CN->getSExtValue());
+ }
+
+ //! SDNode predicate for i16 sign-extended, 10-bit immediate values
+ bool
+ isI16IntS10Immediate(SDNode *N)
+ {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+ return (CN != 0 && isI16IntS10Immediate(CN));
+ }
+
+ //! ConstantSDNode predicate for i16 unsigned 10-bit immediate values
+ bool
+ isI16IntU10Immediate(ConstantSDNode *CN)
+ {
+ return isU10Constant((short) CN->getZExtValue());
+ }
+
+ //! SDNode predicate for i16 sign-extended, 10-bit immediate values
+ bool
+ isI16IntU10Immediate(SDNode *N)
+ {
+ return (N->getOpcode() == ISD::Constant
+ && isI16IntU10Immediate(cast<ConstantSDNode>(N)));
+ }
+
+ //! ConstantSDNode predicate for signed 16-bit values
+ /*!
+ \arg CN The constant SelectionDAG node holding the value
+ \arg Imm The returned 16-bit value, if returning true
+
+ This predicate tests the value in \a CN to see whether it can be
+ represented as a 16-bit, sign-extended quantity. Returns true if
+ this is the case.
+ */
+ bool
+ isIntS16Immediate(ConstantSDNode *CN, short &Imm)
+ {
+ MVT vt = CN->getValueType(0);
+ Imm = (short) CN->getZExtValue();
+ if (vt.getSimpleVT() >= MVT::i1 && vt.getSimpleVT() <= MVT::i16) {
+ return true;
+ } else if (vt == MVT::i32) {
+ int32_t i_val = (int32_t) CN->getZExtValue();
+ short s_val = (short) i_val;
+ return i_val == s_val;
+ } else {
+ int64_t i_val = (int64_t) CN->getZExtValue();
+ short s_val = (short) i_val;
+ return i_val == s_val;
+ }
+
+ return false;
+ }
+
+ //! SDNode predicate for signed 16-bit values.
+ bool
+ isIntS16Immediate(SDNode *N, short &Imm)
+ {
+ return (N->getOpcode() == ISD::Constant
+ && isIntS16Immediate(cast<ConstantSDNode>(N), Imm));
+ }
+
+ //! ConstantFPSDNode predicate for representing floats as 16-bit sign ext.
+ static bool
+ isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm)
+ {
+ MVT vt = FPN->getValueType(0);
+ if (vt == MVT::f32) {
+ int val = FloatToBits(FPN->getValueAPF().convertToFloat());
+ int sval = (int) ((val << 16) >> 16);
+ Imm = (short) val;
+ return val == sval;
+ }
+
+ return false;
+ }
+
+ bool
+ isHighLow(const SDValue &Op)
+ {
+ return (Op.getOpcode() == SPUISD::IndirectAddr
+ && ((Op.getOperand(0).getOpcode() == SPUISD::Hi
+ && Op.getOperand(1).getOpcode() == SPUISD::Lo)
+ || (Op.getOperand(0).getOpcode() == SPUISD::Lo
+ && Op.getOperand(1).getOpcode() == SPUISD::Hi)));
+ }
+
+ //===------------------------------------------------------------------===//
+ //! MVT to "useful stuff" mapping structure:
+
+ struct valtype_map_s {
+ MVT VT;
+ unsigned ldresult_ins; /// LDRESULT instruction (0 = undefined)
+ bool ldresult_imm; /// LDRESULT instruction requires immediate?
+ unsigned lrinst; /// LR instruction
+ };
+
+ const valtype_map_s valtype_map[] = {
+ { MVT::i8, SPU::ORBIr8, true, SPU::LRr8 },
+ { MVT::i16, SPU::ORHIr16, true, SPU::LRr16 },
+ { MVT::i32, SPU::ORIr32, true, SPU::LRr32 },
+ { MVT::i64, SPU::ORr64, false, SPU::LRr64 },
+ { MVT::f32, SPU::ORf32, false, SPU::LRf32 },
+ { MVT::f64, SPU::ORf64, false, SPU::LRf64 },
+ // vector types... (sigh!)
+ { MVT::v16i8, 0, false, SPU::LRv16i8 },
+ { MVT::v8i16, 0, false, SPU::LRv8i16 },
+ { MVT::v4i32, 0, false, SPU::LRv4i32 },
+ { MVT::v2i64, 0, false, SPU::LRv2i64 },
+ { MVT::v4f32, 0, false, SPU::LRv4f32 },
+ { MVT::v2f64, 0, false, SPU::LRv2f64 }
+ };
+
+ const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
+
+ const valtype_map_s *getValueTypeMapEntry(MVT VT)
+ {
+ const valtype_map_s *retval = 0;
+ for (size_t i = 0; i < n_valtype_map; ++i) {
+ if (valtype_map[i].VT == VT) {
+ retval = valtype_map + i;
+ break;
+ }
+ }
+
+
+#ifndef NDEBUG
+ if (retval == 0) {
+ cerr << "SPUISelDAGToDAG.cpp: getValueTypeMapEntry returns NULL for "
+ << VT.getMVTString()
+ << "\n";
+ abort();
+ }
+#endif
+
+ return retval;
+ }
+
+ //! Generate the carry-generate shuffle mask.
+ SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
+ SmallVector<SDValue, 16 > ShufBytes;
+
+ // Create the shuffle mask for "rotating" the borrow up one register slot
+ // once the borrow is generated.
+ ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size());
+ }
+
+ //! Generate the borrow-generate shuffle mask
+ SDValue getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
+ SmallVector<SDValue, 16 > ShufBytes;
+
+ // Create the shuffle mask for "rotating" the borrow up one register slot
+ // once the borrow is generated.
+ ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size());
+ }
+
+ //===------------------------------------------------------------------===//
+ /// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine
+ /// instructions for SelectionDAG operations.
+ ///
+ class SPUDAGToDAGISel :
+ public SelectionDAGISel
+ {
+ SPUTargetMachine &TM;
+ SPUTargetLowering &SPUtli;
+ unsigned GlobalBaseReg;
+
+ public:
+ explicit SPUDAGToDAGISel(SPUTargetMachine &tm) :
+ SelectionDAGISel(tm),
+ TM(tm),
+ SPUtli(*tm.getTargetLowering())
+ { }
+
+ virtual bool runOnFunction(Function &Fn) {
+ // Make sure we re-emit a set of the global base reg if necessary
+ GlobalBaseReg = 0;
+ SelectionDAGISel::runOnFunction(Fn);
+ return true;
+ }
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(uint32_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ /// getI64Imm - Return a target constant with the specified value, of type
+ /// i64.
+ inline SDValue getI64Imm(uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i64);
+ }
+
+ /// getSmallIPtrImm - Return a target constant of pointer type.
+ inline SDValue getSmallIPtrImm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
+ }
+
+ SDNode *emitBuildVector(SDValue build_vec) {
+ MVT vecVT = build_vec.getValueType();
+ MVT eltVT = vecVT.getVectorElementType();
+ SDNode *bvNode = build_vec.getNode();
+ DebugLoc dl = bvNode->getDebugLoc();
+
+ // Check to see if this vector can be represented as a CellSPU immediate
+ // constant by invoking all of the instruction selection predicates:
+ if (((vecVT == MVT::v8i16) &&
+ (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0)) ||
+ ((vecVT == MVT::v4i32) &&
+ ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+ (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+ (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+ (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0))) ||
+ ((vecVT == MVT::v2i64) &&
+ ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
+ (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
+ (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0))))
+ return Select(build_vec);
+
+ // No, need to emit a constant pool spill:
+ std::vector<Constant*> CV;
+
+ for (size_t i = 0; i < build_vec.getNumOperands(); ++i) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode > (build_vec.getOperand(i));
+ CV.push_back(const_cast<ConstantInt *> (V->getConstantIntValue()));
+ }
+
+ Constant *CP = ConstantVector::get(CV);
+ SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ SDValue CGPoolOffset =
+ SPU::LowerConstantPool(CPIdx, *CurDAG,
+ SPUtli.getSPUTargetMachine());
+ return SelectCode(CurDAG->getLoad(build_vec.getValueType(), dl,
+ CurDAG->getEntryNode(), CGPoolOffset,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, Alignment));
+ }
+
+ /// Select - Convert the specified operand from a target-independent to a
+ /// target-specific node if it hasn't already been changed.
+ SDNode *Select(SDValue Op);
+
+ //! Emit the instruction sequence for i64 shl
+ SDNode *SelectSHLi64(SDValue &Op, MVT OpVT);
+
+ //! Emit the instruction sequence for i64 srl
+ SDNode *SelectSRLi64(SDValue &Op, MVT OpVT);
+
+ //! Emit the instruction sequence for i64 sra
+ SDNode *SelectSRAi64(SDValue &Op, MVT OpVT);
+
+ //! Emit the necessary sequence for loading i64 constants:
+ SDNode *SelectI64Constant(SDValue &Op, MVT OpVT, DebugLoc dl);
+
+ //! Alternate instruction emit sequence for loading i64 constants
+ SDNode *SelectI64Constant(uint64_t i64const, MVT OpVT, DebugLoc dl);
+
+ //! Returns true if the address N is an A-form (local store) address
+ bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Index);
+
+ //! D-form address predicate
+ bool SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Index);
+
+ /// Alternate D-form address using i7 offset predicate
+ bool SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp,
+ SDValue &Base);
+
+ /// D-form address selection workhorse
+ bool DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Disp,
+ SDValue &Base, int minOffset, int maxOffset);
+
+ //! Address predicate if N can be expressed as an indexed [r+r] operation.
+ bool SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Index);
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ if (!SelectDFormAddr(Op, Op, Op0, Op1)
+ && !SelectAFormAddr(Op, Op, Op0, Op1))
+ SelectXFormAddr(Op, Op, Op0, Op1);
+ break;
+ case 'o': // offsetable
+ if (!SelectDFormAddr(Op, Op, Op0, Op1)
+ && !SelectAFormAddr(Op, Op, Op0, Op1)) {
+ Op0 = Op;
+ Op1 = getSmallIPtrImm(0);
+ }
+ break;
+ case 'v': // not offsetable
+#if 1
+ assert(0 && "InlineAsmMemoryOperand 'v' constraint not handled.");
+#else
+ SelectAddrIdxOnly(Op, Op, Op0, Op1);
+#endif
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+ }
+
+ /// InstructionSelect - This callback is invoked by
+ /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+ virtual void InstructionSelect();
+
+ virtual const char *getPassName() const {
+ return "Cell SPU DAG->DAG Pattern Instruction Selection";
+ }
+
+ /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
+ /// this target when scheduling the DAG.
+ virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() {
+ const TargetInstrInfo *II = TM.getInstrInfo();
+ assert(II && "No InstrInfo?");
+ return new SPUHazardRecognizer(*II);
+ }
+
+ // Include the pieces autogenerated from the target description.
+#include "SPUGenDAGISel.inc"
+ };
+}
+
+/// InstructionSelect - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void
+SPUDAGToDAGISel::InstructionSelect()
+{
+ DEBUG(BB->dump());
+
+ // Select target instructions for the DAG.
+ SelectRoot(*CurDAG);
+ CurDAG->RemoveDeadNodes();
+}
+
+/*!
+ \arg Op The ISD instruction operand
+ \arg N The address to be tested
+ \arg Base The base address
+ \arg Index The base address index
+ */
+bool
+SPUDAGToDAGISel::SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Index) {
+ // These match the addr256k operand type:
+ MVT OffsVT = MVT::i16;
+ SDValue Zero = CurDAG->getTargetConstant(0, OffsVT);
+
+ switch (N.getOpcode()) {
+ case ISD::Constant:
+ case ISD::ConstantPool:
+ case ISD::GlobalAddress:
+ cerr << "SPU SelectAFormAddr: Constant/Pool/Global not lowered.\n";
+ abort();
+ /*NOTREACHED*/
+
+ case ISD::TargetConstant:
+ case ISD::TargetGlobalAddress:
+ case ISD::TargetJumpTable:
+ cerr << "SPUSelectAFormAddr: Target Constant/Pool/Global not wrapped as "
+ << "A-form address.\n";
+ abort();
+ /*NOTREACHED*/
+
+ case SPUISD::AFormAddr:
+ // Just load from memory if there's only a single use of the location,
+ // otherwise, this will get handled below with D-form offset addresses
+ if (N.hasOneUse()) {
+ SDValue Op0 = N.getOperand(0);
+ switch (Op0.getOpcode()) {
+ case ISD::TargetConstantPool:
+ case ISD::TargetJumpTable:
+ Base = Op0;
+ Index = Zero;
+ return true;
+
+ case ISD::TargetGlobalAddress: {
+ GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op0);
+ GlobalValue *GV = GSDN->getGlobal();
+ if (GV->getAlignment() == 16) {
+ Base = Op0;
+ Index = Zero;
+ return true;
+ }
+ break;
+ }
+ }
+ }
+ break;
+ }
+ return false;
+}
+
+bool
+SPUDAGToDAGISel::SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp,
+ SDValue &Base) {
+ const int minDForm2Offset = -(1 << 7);
+ const int maxDForm2Offset = (1 << 7) - 1;
+ return DFormAddressPredicate(Op, N, Disp, Base, minDForm2Offset,
+ maxDForm2Offset);
+}
+
+/*!
+ \arg Op The ISD instruction (ignored)
+ \arg N The address to be tested
+ \arg Base Base address register/pointer
+ \arg Index Base address index
+
+ Examine the input address by a base register plus a signed 10-bit
+ displacement, [r+I10] (D-form address).
+
+ \return true if \a N is a D-form address with \a Base and \a Index set
+ to non-empty SDValue instances.
+*/
+bool
+SPUDAGToDAGISel::SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Index) {
+ return DFormAddressPredicate(Op, N, Base, Index,
+ SPUFrameInfo::minFrameOffset(),
+ SPUFrameInfo::maxFrameOffset());
+}
+
+bool
+SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Index, int minOffset,
+ int maxOffset) {
+ unsigned Opc = N.getOpcode();
+ MVT PtrTy = SPUtli.getPointerTy();
+
+ if (Opc == ISD::FrameIndex) {
+ // Stack frame index must be less than 512 (divided by 16):
+ FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N);
+ int FI = int(FIN->getIndex());
+ DEBUG(cerr << "SelectDFormAddr: ISD::FrameIndex = "
+ << FI << "\n");
+ if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) {
+ Base = CurDAG->getTargetConstant(0, PtrTy);
+ Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
+ return true;
+ }
+ } else if (Opc == ISD::ADD) {
+ // Generated by getelementptr
+ const SDValue Op0 = N.getOperand(0);
+ const SDValue Op1 = N.getOperand(1);
+
+ if ((Op0.getOpcode() == SPUISD::Hi && Op1.getOpcode() == SPUISD::Lo)
+ || (Op1.getOpcode() == SPUISD::Hi && Op0.getOpcode() == SPUISD::Lo)) {
+ Base = CurDAG->getTargetConstant(0, PtrTy);
+ Index = N;
+ return true;
+ } else if (Op1.getOpcode() == ISD::Constant
+ || Op1.getOpcode() == ISD::TargetConstant) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1);
+ int32_t offset = int32_t(CN->getSExtValue());
+
+ if (Op0.getOpcode() == ISD::FrameIndex) {
+ FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op0);
+ int FI = int(FIN->getIndex());
+ DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset
+ << " frame index = " << FI << "\n");
+
+ if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
+ return true;
+ }
+ } else if (offset > minOffset && offset < maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = Op0;
+ return true;
+ }
+ } else if (Op0.getOpcode() == ISD::Constant
+ || Op0.getOpcode() == ISD::TargetConstant) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0);
+ int32_t offset = int32_t(CN->getSExtValue());
+
+ if (Op1.getOpcode() == ISD::FrameIndex) {
+ FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op1);
+ int FI = int(FIN->getIndex());
+ DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset
+ << " frame index = " << FI << "\n");
+
+ if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
+ return true;
+ }
+ } else if (offset > minOffset && offset < maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = Op1;
+ return true;
+ }
+ }
+ } else if (Opc == SPUISD::IndirectAddr) {
+ // Indirect with constant offset -> D-Form address
+ const SDValue Op0 = N.getOperand(0);
+ const SDValue Op1 = N.getOperand(1);
+
+ if (Op0.getOpcode() == SPUISD::Hi
+ && Op1.getOpcode() == SPUISD::Lo) {
+ // (SPUindirect (SPUhi <arg>, 0), (SPUlo <arg>, 0))
+ Base = CurDAG->getTargetConstant(0, PtrTy);
+ Index = N;
+ return true;
+ } else if (isa<ConstantSDNode>(Op0) || isa<ConstantSDNode>(Op1)) {
+ int32_t offset = 0;
+ SDValue idxOp;
+
+ if (isa<ConstantSDNode>(Op1)) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
+ offset = int32_t(CN->getSExtValue());
+ idxOp = Op0;
+ } else if (isa<ConstantSDNode>(Op0)) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op0);
+ offset = int32_t(CN->getSExtValue());
+ idxOp = Op1;
+ }
+
+ if (offset >= minOffset && offset <= maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = idxOp;
+ return true;
+ }
+ }
+ } else if (Opc == SPUISD::AFormAddr) {
+ Base = CurDAG->getTargetConstant(0, N.getValueType());
+ Index = N;
+ return true;
+ } else if (Opc == SPUISD::LDRESULT) {
+ Base = CurDAG->getTargetConstant(0, N.getValueType());
+ Index = N;
+ return true;
+ } else if (Opc == ISD::Register || Opc == ISD::CopyFromReg) {
+ unsigned OpOpc = Op.getOpcode();
+
+ if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) {
+ // Direct load/store without getelementptr
+ SDValue Addr, Offs;
+
+ // Get the register from CopyFromReg
+ if (Opc == ISD::CopyFromReg)
+ Addr = N.getOperand(1);
+ else
+ Addr = N; // Register
+
+ Offs = ((OpOpc == ISD::STORE) ? Op.getOperand(3) : Op.getOperand(2));
+
+ if (Offs.getOpcode() == ISD::Constant || Offs.getOpcode() == ISD::UNDEF) {
+ if (Offs.getOpcode() == ISD::UNDEF)
+ Offs = CurDAG->getTargetConstant(0, Offs.getValueType());
+
+ Base = Offs;
+ Index = Addr;
+ return true;
+ }
+ } else {
+ /* If otherwise unadorned, default to D-form address with 0 offset: */
+ if (Opc == ISD::CopyFromReg) {
+ Index = N.getOperand(1);
+ } else {
+ Index = N;
+ }
+
+ Base = CurDAG->getTargetConstant(0, Index.getValueType());
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*!
+ \arg Op The ISD instruction operand
+ \arg N The address operand
+ \arg Base The base pointer operand
+ \arg Index The offset/index operand
+
+ If the address \a N can be expressed as an A-form or D-form address, returns
+ false. Otherwise, creates two operands, Base and Index that will become the
+ (r)(r) X-form address.
+*/
+bool
+SPUDAGToDAGISel::SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Index) {
+ if (!SelectAFormAddr(Op, N, Base, Index)
+ && !SelectDFormAddr(Op, N, Base, Index)) {
+ // If the address is neither A-form or D-form, punt and use an X-form
+ // address:
+ Base = N.getOperand(1);
+ Index = N.getOperand(0);
+ return true;
+ }
+
+ return false;
+}
+
+//! Convert the operand from a target-independent to a target-specific node
+/*!
+ */
+SDNode *
+SPUDAGToDAGISel::Select(SDValue Op) {
+ SDNode *N = Op.getNode();
+ unsigned Opc = N->getOpcode();
+ int n_ops = -1;
+ unsigned NewOpc;
+ MVT OpVT = Op.getValueType();
+ SDValue Ops[8];
+ DebugLoc dl = N->getDebugLoc();
+
+ if (N->isMachineOpcode()) {
+ return NULL; // Already selected.
+ }
+
+ if (Opc == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType());
+ SDValue Imm0 = CurDAG->getTargetConstant(0, Op.getValueType());
+
+ if (FI < 128) {
+ NewOpc = SPU::AIr32;
+ Ops[0] = TFI;
+ Ops[1] = Imm0;
+ n_ops = 2;
+ } else {
+ NewOpc = SPU::Ar32;
+ Ops[0] = CurDAG->getRegister(SPU::R1, Op.getValueType());
+ Ops[1] = SDValue(CurDAG->getTargetNode(SPU::ILAr32, dl, Op.getValueType(),
+ TFI, Imm0), 0);
+ n_ops = 2;
+ }
+ } else if (Opc == ISD::Constant && OpVT == MVT::i64) {
+ // Catch the i64 constants that end up here. Note: The backend doesn't
+ // attempt to legalize the constant (it's useless because DAGCombiner
+ // will insert 64-bit constants and we can't stop it).
+ return SelectI64Constant(Op, OpVT, Op.getDebugLoc());
+ } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND)
+ && OpVT == MVT::i64) {
+ SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = Op0.getValueType();
+ MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
+ MVT OpVecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue shufMask;
+
+ switch (Op0VT.getSimpleVT()) {
+ default:
+ cerr << "CellSPU Select: Unhandled zero/any extend MVT\n";
+ abort();
+ /*NOTREACHED*/
+ break;
+ case MVT::i32:
+ shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x00010203, MVT::i32),
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x08090a0b, MVT::i32));
+ break;
+
+ case MVT::i16:
+ shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x80800203, MVT::i32),
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x80800a0b, MVT::i32));
+ break;
+
+ case MVT::i8:
+ shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x80808003, MVT::i32),
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x8080800b, MVT::i32));
+ break;
+ }
+
+ SDNode *shufMaskLoad = emitBuildVector(shufMask);
+ SDNode *PromoteScalar =
+ SelectCode(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl, Op0VecVT, Op0));
+
+ SDValue zextShuffle =
+ CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
+ SDValue(PromoteScalar, 0),
+ SDValue(PromoteScalar, 0),
+ SDValue(shufMaskLoad, 0));
+
+ // N.B.: BIT_CONVERT replaces and updates the zextShuffle node, so we
+ // re-use it in the VEC2PREFSLOT selection without needing to explicitly
+ // call SelectCode (it's already done for us.)
+ SelectCode(CurDAG->getNode(ISD::BIT_CONVERT, dl, OpVecVT, zextShuffle));
+ return SelectCode(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT,
+ zextShuffle));
+ } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+ SDNode *CGLoad =
+ emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl));
+
+ return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
+ Op.getOperand(0), Op.getOperand(1),
+ SDValue(CGLoad, 0)));
+ } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+ SDNode *CGLoad =
+ emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl));
+
+ return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
+ Op.getOperand(0), Op.getOperand(1),
+ SDValue(CGLoad, 0)));
+ } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+ SDNode *CGLoad =
+ emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl));
+
+ return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT,
+ Op.getOperand(0), Op.getOperand(1),
+ SDValue(CGLoad, 0)));
+ } else if (Opc == ISD::TRUNCATE) {
+ SDValue Op0 = Op.getOperand(0);
+ if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL)
+ && OpVT == MVT::i32
+ && Op0.getValueType() == MVT::i64) {
+ // Catch (truncate:i32 ([sra|srl]:i64 arg, c), where c >= 32
+ //
+ // Take advantage of the fact that the upper 32 bits are in the
+ // i32 preferred slot and avoid shuffle gymnastics:
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
+ if (CN != 0) {
+ unsigned shift_amt = unsigned(CN->getZExtValue());
+
+ if (shift_amt >= 32) {
+ SDNode *hi32 =
+ CurDAG->getTargetNode(SPU::ORr32_r64, dl, OpVT,
+ Op0.getOperand(0));
+
+ shift_amt -= 32;
+ if (shift_amt > 0) {
+ // Take care of the additional shift, if present:
+ SDValue shift = CurDAG->getTargetConstant(shift_amt, MVT::i32);
+ unsigned Opc = SPU::ROTMAIr32_i32;
+
+ if (Op0.getOpcode() == ISD::SRL)
+ Opc = SPU::ROTMr32;
+
+ hi32 = CurDAG->getTargetNode(Opc, dl, OpVT, SDValue(hi32, 0),
+ shift);
+ }
+
+ return hi32;
+ }
+ }
+ }
+ } else if (Opc == ISD::SHL) {
+ if (OpVT == MVT::i64) {
+ return SelectSHLi64(Op, OpVT);
+ }
+ } else if (Opc == ISD::SRL) {
+ if (OpVT == MVT::i64) {
+ return SelectSRLi64(Op, OpVT);
+ }
+ } else if (Opc == ISD::SRA) {
+ if (OpVT == MVT::i64) {
+ return SelectSRAi64(Op, OpVT);
+ }
+ } else if (Opc == ISD::FNEG
+ && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Check if the pattern is a special form of DFNMS:
+ // (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))
+ SDValue Op0 = Op.getOperand(0);
+ if (Op0.getOpcode() == ISD::FSUB) {
+ SDValue Op00 = Op0.getOperand(0);
+ if (Op00.getOpcode() == ISD::FMUL) {
+ unsigned Opc = SPU::DFNMSf64;
+ if (OpVT == MVT::v2f64)
+ Opc = SPU::DFNMSv2f64;
+
+ return CurDAG->getTargetNode(Opc, dl, OpVT,
+ Op00.getOperand(0),
+ Op00.getOperand(1),
+ Op0.getOperand(1));
+ }
+ }
+
+ SDValue negConst = CurDAG->getConstant(0x8000000000000000ULL, MVT::i64);
+ SDNode *signMask = 0;
+ unsigned Opc = SPU::XORfneg64;
+
+ if (OpVT == MVT::f64) {
+ signMask = SelectI64Constant(negConst, MVT::i64, dl);
+ } else if (OpVT == MVT::v2f64) {
+ Opc = SPU::XORfnegvec;
+ signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v2i64,
+ negConst, negConst));
+ }
+
+ return CurDAG->getTargetNode(Opc, dl, OpVT,
+ Op.getOperand(0), SDValue(signMask, 0));
+ } else if (Opc == ISD::FABS) {
+ if (OpVT == MVT::f64) {
+ SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl);
+ return CurDAG->getTargetNode(SPU::ANDfabs64, dl, OpVT,
+ Op.getOperand(0), SDValue(signMask, 0));
+ } else if (OpVT == MVT::v2f64) {
+ SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64);
+ SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
+ absConst, absConst);
+ SDNode *signMask = emitBuildVector(absVec);
+ return CurDAG->getTargetNode(SPU::ANDfabsvec, dl, OpVT,
+ Op.getOperand(0), SDValue(signMask, 0));
+ }
+ } else if (Opc == SPUISD::LDRESULT) {
+ // Custom select instructions for LDRESULT
+ MVT VT = N->getValueType(0);
+ SDValue Arg = N->getOperand(0);
+ SDValue Chain = N->getOperand(1);
+ SDNode *Result;
+ const valtype_map_s *vtm = getValueTypeMapEntry(VT);
+
+ if (vtm->ldresult_ins == 0) {
+ cerr << "LDRESULT for unsupported type: "
+ << VT.getMVTString()
+ << "\n";
+ abort();
+ }
+
+ Opc = vtm->ldresult_ins;
+ if (vtm->ldresult_imm) {
+ SDValue Zero = CurDAG->getTargetConstant(0, VT);
+
+ Result = CurDAG->getTargetNode(Opc, dl, VT, MVT::Other, Arg, Zero, Chain);
+ } else {
+ Result = CurDAG->getTargetNode(Opc, dl, VT, MVT::Other, Arg, Arg, Chain);
+ }
+
+ return Result;
+ } else if (Opc == SPUISD::IndirectAddr) {
+ // Look at the operands: SelectCode() will catch the cases that aren't
+ // specifically handled here.
+ //
+ // SPUInstrInfo catches the following patterns:
+ // (SPUindirect (SPUhi ...), (SPUlo ...))
+ // (SPUindirect $sp, imm)
+ MVT VT = Op.getValueType();
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ RegisterSDNode *RN;
+
+ if ((Op0.getOpcode() != SPUISD::Hi && Op1.getOpcode() != SPUISD::Lo)
+ || (Op0.getOpcode() == ISD::Register
+ && ((RN = dyn_cast<RegisterSDNode>(Op0.getNode())) != 0
+ && RN->getReg() != SPU::R1))) {
+ NewOpc = SPU::Ar32;
+ if (Op1.getOpcode() == ISD::Constant) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
+ Op1 = CurDAG->getTargetConstant(CN->getSExtValue(), VT);
+ NewOpc = (isI32IntS10Immediate(CN) ? SPU::AIr32 : SPU::Ar32);
+ }
+ Ops[0] = Op0;
+ Ops[1] = Op1;
+ n_ops = 2;
+ }
+ }
+
+ if (n_ops > 0) {
+ if (N->hasOneUse())
+ return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops);
+ else
+ return CurDAG->getTargetNode(NewOpc, dl, OpVT, Ops, n_ops);
+ } else
+ return SelectCode(Op);
+}
+
+/*!
+ * Emit the instruction sequence for i64 left shifts. The basic algorithm
+ * is to fill the bottom two word slots with zeros so that zeros are shifted
+ * in as the entire quadword is shifted left.
+ *
+ * \note This code could also be used to implement v2i64 shl.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSHLi64(SDValue &Op, MVT OpVT) {
+ SDValue Op0 = Op.getOperand(0);
+ MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue ShiftAmt = Op.getOperand(1);
+ MVT ShiftAmtVT = ShiftAmt.getValueType();
+ SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0;
+ SDValue SelMaskVal;
+ DebugLoc dl = Op.getDebugLoc();
+
+ VecOp0 = CurDAG->getTargetNode(SPU::ORv2i64_i64, dl, VecVT, Op0);
+ SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16);
+ SelMask = CurDAG->getTargetNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal);
+ ZeroFill = CurDAG->getTargetNode(SPU::ILv2i64, dl, VecVT,
+ CurDAG->getTargetConstant(0, OpVT));
+ VecOp0 = CurDAG->getTargetNode(SPU::SELBv2i64, dl, VecVT,
+ SDValue(ZeroFill, 0),
+ SDValue(VecOp0, 0),
+ SDValue(SelMask, 0));
+
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+ unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+ unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+ if (bytes > 0) {
+ Shift =
+ CurDAG->getTargetNode(SPU::SHLQBYIv2i64, dl, VecVT,
+ SDValue(VecOp0, 0),
+ CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+ }
+
+ if (bits > 0) {
+ Shift =
+ CurDAG->getTargetNode(SPU::SHLQBIIv2i64, dl, VecVT,
+ SDValue((Shift != 0 ? Shift : VecOp0), 0),
+ CurDAG->getTargetConstant(bits, ShiftAmtVT));
+ }
+ } else {
+ SDNode *Bytes =
+ CurDAG->getTargetNode(SPU::ROTMIr32, dl, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(3, ShiftAmtVT));
+ SDNode *Bits =
+ CurDAG->getTargetNode(SPU::ANDIr32, dl, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(7, ShiftAmtVT));
+ Shift =
+ CurDAG->getTargetNode(SPU::SHLQBYv2i64, dl, VecVT,
+ SDValue(VecOp0, 0), SDValue(Bytes, 0));
+ Shift =
+ CurDAG->getTargetNode(SPU::SHLQBIv2i64, dl, VecVT,
+ SDValue(Shift, 0), SDValue(Bits, 0));
+ }
+
+ return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
+}
+
+/*!
+ * Emit the instruction sequence for i64 logical right shifts.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSRLi64(SDValue &Op, MVT OpVT) {
+ SDValue Op0 = Op.getOperand(0);
+ MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue ShiftAmt = Op.getOperand(1);
+ MVT ShiftAmtVT = ShiftAmt.getValueType();
+ SDNode *VecOp0, *Shift = 0;
+ DebugLoc dl = Op.getDebugLoc();
+
+ VecOp0 = CurDAG->getTargetNode(SPU::ORv2i64_i64, dl, VecVT, Op0);
+
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+ unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+ unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+ if (bytes > 0) {
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQMBYIv2i64, dl, VecVT,
+ SDValue(VecOp0, 0),
+ CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+ }
+
+ if (bits > 0) {
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQMBIIv2i64, dl, VecVT,
+ SDValue((Shift != 0 ? Shift : VecOp0), 0),
+ CurDAG->getTargetConstant(bits, ShiftAmtVT));
+ }
+ } else {
+ SDNode *Bytes =
+ CurDAG->getTargetNode(SPU::ROTMIr32, dl, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(3, ShiftAmtVT));
+ SDNode *Bits =
+ CurDAG->getTargetNode(SPU::ANDIr32, dl, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(7, ShiftAmtVT));
+
+ // Ensure that the shift amounts are negated!
+ Bytes = CurDAG->getTargetNode(SPU::SFIr32, dl, ShiftAmtVT,
+ SDValue(Bytes, 0),
+ CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+ Bits = CurDAG->getTargetNode(SPU::SFIr32, dl, ShiftAmtVT,
+ SDValue(Bits, 0),
+ CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQMBYv2i64, dl, VecVT,
+ SDValue(VecOp0, 0), SDValue(Bytes, 0));
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQMBIv2i64, dl, VecVT,
+ SDValue(Shift, 0), SDValue(Bits, 0));
+ }
+
+ return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
+}
+
+/*!
+ * Emit the instruction sequence for i64 arithmetic right shifts.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, MVT OpVT) {
+ // Promote Op0 to vector
+ MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue ShiftAmt = Op.getOperand(1);
+ MVT ShiftAmtVT = ShiftAmt.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDNode *VecOp0 =
+ CurDAG->getTargetNode(SPU::ORv2i64_i64, dl, VecVT, Op.getOperand(0));
+
+ SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT);
+ SDNode *SignRot =
+ CurDAG->getTargetNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64,
+ SDValue(VecOp0, 0), SignRotAmt);
+ SDNode *UpperHalfSign =
+ CurDAG->getTargetNode(SPU::ORi32_v4i32, dl, MVT::i32, SDValue(SignRot, 0));
+
+ SDNode *UpperHalfSignMask =
+ CurDAG->getTargetNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0));
+ SDNode *UpperLowerMask =
+ CurDAG->getTargetNode(SPU::FSMBIv2i64, dl, VecVT,
+ CurDAG->getTargetConstant(0xff00ULL, MVT::i16));
+ SDNode *UpperLowerSelect =
+ CurDAG->getTargetNode(SPU::SELBv2i64, dl, VecVT,
+ SDValue(UpperHalfSignMask, 0),
+ SDValue(VecOp0, 0),
+ SDValue(UpperLowerMask, 0));
+
+ SDNode *Shift = 0;
+
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+ unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+ unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+ if (bytes > 0) {
+ bytes = 31 - bytes;
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQBYIv2i64, dl, VecVT,
+ SDValue(UpperLowerSelect, 0),
+ CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+ }
+
+ if (bits > 0) {
+ bits = 8 - bits;
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQBIIv2i64, dl, VecVT,
+ SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0),
+ CurDAG->getTargetConstant(bits, ShiftAmtVT));
+ }
+ } else {
+ SDNode *NegShift =
+ CurDAG->getTargetNode(SPU::SFIr32, dl, ShiftAmtVT,
+ ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQBYBIv2i64_r32, dl, VecVT,
+ SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0));
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQBIv2i64, dl, VecVT,
+ SDValue(Shift, 0), SDValue(NegShift, 0));
+ }
+
+ return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
+}
+
+/*!
+ Do the necessary magic necessary to load a i64 constant
+ */
+SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT,
+ DebugLoc dl) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
+ return SelectI64Constant(CN->getZExtValue(), OpVT, dl);
+}
+
+SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, MVT OpVT,
+ DebugLoc dl) {
+ MVT OpVecVT = MVT::getVectorVT(OpVT, 2);
+ SDValue i64vec =
+ SPU::LowerV2I64Splat(OpVecVT, *CurDAG, Value64, dl);
+
+ // Here's where it gets interesting, because we have to parse out the
+ // subtree handed back in i64vec:
+
+ if (i64vec.getOpcode() == ISD::BIT_CONVERT) {
+ // The degenerate case where the upper and lower bits in the splat are
+ // identical:
+ SDValue Op0 = i64vec.getOperand(0);
+
+ ReplaceUses(i64vec, Op0);
+ return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT,
+ SDValue(emitBuildVector(Op0), 0));
+ } else if (i64vec.getOpcode() == SPUISD::SHUFB) {
+ SDValue lhs = i64vec.getOperand(0);
+ SDValue rhs = i64vec.getOperand(1);
+ SDValue shufmask = i64vec.getOperand(2);
+
+ if (lhs.getOpcode() == ISD::BIT_CONVERT) {
+ ReplaceUses(lhs, lhs.getOperand(0));
+ lhs = lhs.getOperand(0);
+ }
+
+ SDNode *lhsNode = (lhs.getNode()->isMachineOpcode()
+ ? lhs.getNode()
+ : emitBuildVector(lhs));
+
+ if (rhs.getOpcode() == ISD::BIT_CONVERT) {
+ ReplaceUses(rhs, rhs.getOperand(0));
+ rhs = rhs.getOperand(0);
+ }
+
+ SDNode *rhsNode = (rhs.getNode()->isMachineOpcode()
+ ? rhs.getNode()
+ : emitBuildVector(rhs));
+
+ if (shufmask.getOpcode() == ISD::BIT_CONVERT) {
+ ReplaceUses(shufmask, shufmask.getOperand(0));
+ shufmask = shufmask.getOperand(0);
+ }
+
+ SDNode *shufMaskNode = (shufmask.getNode()->isMachineOpcode()
+ ? shufmask.getNode()
+ : emitBuildVector(shufmask));
+
+ SDNode *shufNode =
+ Select(CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
+ SDValue(lhsNode, 0), SDValue(rhsNode, 0),
+ SDValue(shufMaskNode, 0)));
+
+ return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT,
+ SDValue(shufNode, 0));
+ } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) {
+ return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT,
+ SDValue(emitBuildVector(i64vec), 0));
+ } else {
+ cerr << "SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec condition\n";
+ abort();
+ }
+}
+
+/// createSPUISelDag - This pass converts a legalized DAG into a
+/// SPU-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createSPUISelDag(SPUTargetMachine &TM) {
+ return new SPUDAGToDAGISel(TM);
+}
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
new file mode 100644
index 0000000..864a914
--- /dev/null
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -0,0 +1,2980 @@
+//
+//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPUTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPURegisterNames.h"
+#include "SPUISelLowering.h"
+#include "SPUTargetMachine.h"
+#include "SPUFrameInfo.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetOptions.h"
+
+#include <map>
+
+using namespace llvm;
+
+// Used in getTargetNodeName() below
+namespace {
+ std::map<unsigned, const char *> node_names;
+
+ //! MVT mapping to useful data for Cell SPU
+ struct valtype_map_s {
+ const MVT valtype;
+ const int prefslot_byte;
+ };
+
+ const valtype_map_s valtype_map[] = {
+ { MVT::i1, 3 },
+ { MVT::i8, 3 },
+ { MVT::i16, 2 },
+ { MVT::i32, 0 },
+ { MVT::f32, 0 },
+ { MVT::i64, 0 },
+ { MVT::f64, 0 },
+ { MVT::i128, 0 }
+ };
+
+ const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
+
+ const valtype_map_s *getValueTypeMapEntry(MVT VT) {
+ const valtype_map_s *retval = 0;
+
+ for (size_t i = 0; i < n_valtype_map; ++i) {
+ if (valtype_map[i].valtype == VT) {
+ retval = valtype_map + i;
+ break;
+ }
+ }
+
+#ifndef NDEBUG
+ if (retval == 0) {
+ cerr << "getValueTypeMapEntry returns NULL for "
+ << VT.getMVTString()
+ << "\n";
+ abort();
+ }
+#endif
+
+ return retval;
+ }
+
+ //! Expand a library call into an actual call DAG node
+ /*!
+ \note
+ This code is taken from SelectionDAGLegalize, since it is not exposed as
+ part of the LLVM SelectionDAG API.
+ */
+
+ SDValue
+ ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
+ bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ MVT ArgVT = Op.getOperand(i).getValueType();
+ const Type *ArgTy = ArgVT.getTypeForMVT();
+ Entry.Node = Op.getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ // Splice the libcall in wherever FindInputOutputChains tells us to.
+ const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT();
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ CallingConv::C, false, Callee, Args, DAG,
+ Op.getDebugLoc());
+
+ return CallInfo.first;
+ }
+}
+
+SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
+ : TargetLowering(TM),
+ SPUTM(TM)
+{
+ // Fold away setcc operations if possible.
+ setPow2DivIsCheap();
+
+ // Use _setjmp/_longjmp instead of setjmp/longjmp.
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(true);
+
+ // Set RTLIB libcall names as used by SPU:
+ setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
+
+ // Set up the SPU's register classes:
+ addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
+ addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
+ addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
+ addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
+ addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
+ addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
+ addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
+
+ // SPU has no sign or zero extended loads for i1, i8, i16:
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
+
+ // SPU constant load actions are custom lowered:
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
+
+ // SPU's loads and stores have to be custom lowered:
+ for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
+ ++sctype) {
+ MVT VT = (MVT::SimpleValueType)sctype;
+
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+ setLoadExtAction(ISD::EXTLOAD, VT, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
+
+ for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
+ MVT StoreVT = (MVT::SimpleValueType) stype;
+ setTruncStoreAction(VT, StoreVT, Expand);
+ }
+ }
+
+ for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
+ ++sctype) {
+ MVT VT = (MVT::SimpleValueType) sctype;
+
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+
+ for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
+ MVT StoreVT = (MVT::SimpleValueType) stype;
+ setTruncStoreAction(VT, StoreVT, Expand);
+ }
+ }
+
+ // Expand the jumptable branches
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+
+ // Custom lower SELECT_CC for most cases, but expand by default
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+
+ // SPU has no intrinsics for these particular operations:
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+
+ // SPU has no SREM/UREM instructions
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+ // We don't support sin/cos/sqrt/fmod
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+
+ // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
+ // for f32!)
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+ // SPU can do rotate right and left, so legalize it... but customize for i8
+ // because instructions don't exist.
+
+ // FIXME: Change from "expand" to appropriate type once ROTR is supported in
+ // .td files.
+ setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
+ setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
+ setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
+
+ setOperationAction(ISD::ROTL, MVT::i32, Legal);
+ setOperationAction(ISD::ROTL, MVT::i16, Legal);
+ setOperationAction(ISD::ROTL, MVT::i8, Custom);
+
+ // SPU has no native version of shift left/right for i8
+ setOperationAction(ISD::SHL, MVT::i8, Custom);
+ setOperationAction(ISD::SRL, MVT::i8, Custom);
+ setOperationAction(ISD::SRA, MVT::i8, Custom);
+
+ // Make these operations legal and handle them during instruction selection:
+ setOperationAction(ISD::SHL, MVT::i64, Legal);
+ setOperationAction(ISD::SRL, MVT::i64, Legal);
+ setOperationAction(ISD::SRA, MVT::i64, Legal);
+
+ // Custom lower i8, i32 and i64 multiplications
+ setOperationAction(ISD::MUL, MVT::i8, Custom);
+ setOperationAction(ISD::MUL, MVT::i32, Legal);
+ setOperationAction(ISD::MUL, MVT::i64, Legal);
+
+ // Need to custom handle (some) common i8, i64 math ops
+ setOperationAction(ISD::ADD, MVT::i8, Custom);
+ setOperationAction(ISD::ADD, MVT::i64, Legal);
+ setOperationAction(ISD::SUB, MVT::i8, Custom);
+ setOperationAction(ISD::SUB, MVT::i64, Legal);
+
+ // SPU does not have BSWAP. It does have i32 support CTLZ.
+ // CTPOP has to be custom lowered.
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+
+ setOperationAction(ISD::CTPOP, MVT::i8, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i16, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i64, Custom);
+
+ setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64, Expand);
+
+ setOperationAction(ISD::CTLZ , MVT::i32, Legal);
+
+ // SPU has a version of select that implements (a&~c)|(b&c), just like
+ // select ought to work:
+ setOperationAction(ISD::SELECT, MVT::i8, Legal);
+ setOperationAction(ISD::SELECT, MVT::i16, Legal);
+ setOperationAction(ISD::SELECT, MVT::i32, Legal);
+ setOperationAction(ISD::SELECT, MVT::i64, Legal);
+
+ setOperationAction(ISD::SETCC, MVT::i8, Legal);
+ setOperationAction(ISD::SETCC, MVT::i16, Legal);
+ setOperationAction(ISD::SETCC, MVT::i32, Legal);
+ setOperationAction(ISD::SETCC, MVT::i64, Legal);
+ setOperationAction(ISD::SETCC, MVT::f64, Custom);
+
+ // Custom lower i128 -> i64 truncates
+ setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
+
+ // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
+ // to expand to a libcall, hence the custom lowering:
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+
+ // FDIV on SPU requires custom lowering
+ setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
+
+ // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+
+ setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
+ setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
+ setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
+ setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
+
+ // We cannot sextinreg(i1). Expand to shifts.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ // Support label based line numbers.
+ setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+
+ // We want to legalize GlobalAddress and ConstantPool nodes into the
+ // appropriate instructions to materialize the address.
+ for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
+ ++sctype) {
+ MVT VT = (MVT::SimpleValueType)sctype;
+
+ setOperationAction(ISD::GlobalAddress, VT, Custom);
+ setOperationAction(ISD::ConstantPool, VT, Custom);
+ setOperationAction(ISD::JumpTable, VT, Custom);
+ }
+
+ // RET must be custom lowered, to meet ABI requirements
+ setOperationAction(ISD::RET, MVT::Other, Custom);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
+
+ // Cell SPU has instructions for converting between i64 and fp.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+
+ // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
+
+ // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+
+ // First set operation action for all vector types to expand. Then we
+ // will selectively turn on ones that can be effectively codegen'd.
+ addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
+
+ // "Odd size" vector classes that we're willing to support:
+ addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
+
+ for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+
+ // add/sub are legal for all supported vector VT's.
+ setOperationAction(ISD::ADD, VT, Legal);
+ setOperationAction(ISD::SUB, VT, Legal);
+ // mul has to be custom lowered.
+ setOperationAction(ISD::MUL, VT, Legal);
+
+ setOperationAction(ISD::AND, VT, Legal);
+ setOperationAction(ISD::OR, VT, Legal);
+ setOperationAction(ISD::XOR, VT, Legal);
+ setOperationAction(ISD::LOAD, VT, Legal);
+ setOperationAction(ISD::SELECT, VT, Legal);
+ setOperationAction(ISD::STORE, VT, Legal);
+
+ // These operations need to be expanded:
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+
+ // Custom lower build_vector, constant pool spills, insert and
+ // extract vector elements:
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::ConstantPool, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ }
+
+ setOperationAction(ISD::AND, MVT::v16i8, Custom);
+ setOperationAction(ISD::OR, MVT::v16i8, Custom);
+ setOperationAction(ISD::XOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+
+ setShiftAmountType(MVT::i32);
+ setBooleanContents(ZeroOrNegativeOneBooleanContent);
+
+ setStackPointerRegisterToSaveRestore(SPU::R1);
+
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::ANY_EXTEND);
+
+ computeRegisterProperties();
+
+ // Set pre-RA register scheduler default to BURR, which produces slightly
+ // better code than the default (could also be TDRR, but TargetLowering.h
+ // needs a mod to support that model):
+ setSchedulingPreference(SchedulingForRegPressure);
+}
+
+const char *
+SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
+{
+ if (node_names.empty()) {
+ node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
+ node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
+ node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
+ node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
+ node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
+ node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
+ node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
+ node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
+ node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
+ node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
+ node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
+ node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
+ node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
+ node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
+ node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
+ node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
+ node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
+ node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
+ node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
+ node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
+ node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
+ node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
+ "SPUISD::ROTBYTES_LEFT_BITS";
+ node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
+ node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
+ node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
+ node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
+ node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
+ }
+
+ std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
+
+ return ((i != node_names.end()) ? i->second : 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Return the Cell SPU's SETCC result type
+//===----------------------------------------------------------------------===//
+
+MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
+ // i16 and i32 are valid SETCC result types
+ return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
+}
+
+//===----------------------------------------------------------------------===//
+// Calling convention code:
+//===----------------------------------------------------------------------===//
+
+#include "SPUGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// LowerOperation implementation
+//===----------------------------------------------------------------------===//
+
+/// Custom lower loads for CellSPU
+/*!
+ All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
+ within a 16-byte block, we have to rotate to extract the requested element.
+
+ For extending loads, we also want to ensure that the following sequence is
+ emitted, e.g. for MVT::f32 extending load to MVT::f64:
+
+\verbatim
+%1 v16i8,ch = load
+%2 v16i8,ch = rotate %1
+%3 v4f8, ch = bitconvert %2
+%4 f32 = vec2perfslot %3
+%5 f64 = fp_extend %4
+\endverbatim
+*/
+static SDValue
+LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ LoadSDNode *LN = cast<LoadSDNode>(Op);
+ SDValue the_chain = LN->getChain();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ MVT InVT = LN->getMemoryVT();
+ MVT OutVT = Op.getValueType();
+ ISD::LoadExtType ExtType = LN->getExtensionType();
+ unsigned alignment = LN->getAlignment();
+ const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
+ DebugLoc dl = Op.getDebugLoc();
+
+ switch (LN->getAddressingMode()) {
+ case ISD::UNINDEXED: {
+ SDValue result;
+ SDValue basePtr = LN->getBasePtr();
+ SDValue rotate;
+
+ if (alignment == 16) {
+ ConstantSDNode *CN;
+
+ // Special cases for a known aligned load to simplify the base pointer
+ // and the rotation amount:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+ int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
+
+ if (rotamt < 0)
+ rotamt += 16;
+
+ rotate = DAG.getConstant(rotamt, MVT::i16);
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ if ((offset & ~0xf) > 0) {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & ~0xf), PtrVT));
+ }
+ } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
+ || (basePtr.getOpcode() == SPUISD::IndirectAddr
+ && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
+ && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
+ // Plain aligned a-form address: rotate into preferred slot
+ // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
+ int64_t rotamt = -vtm->prefslot_byte;
+ if (rotamt < 0)
+ rotamt += 16;
+ rotate = DAG.getConstant(rotamt, MVT::i16);
+ } else {
+ // Offset the rotate amount by the basePtr and the preferred slot
+ // byte offset
+ int64_t rotamt = -vtm->prefslot_byte;
+ if (rotamt < 0)
+ rotamt += 16;
+ rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(rotamt, PtrVT));
+ }
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
+
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Offset the rotate amount by the basePtr and the preferred slot
+ // byte offset
+ rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(-vtm->prefslot_byte, PtrVT));
+ }
+
+ // Re-emit as a v16i8 vector load
+ result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
+ LN->getSrcValue(), LN->getSrcValueOffset(),
+ LN->isVolatile(), 16);
+
+ // Update the chain
+ the_chain = result.getValue(1);
+
+ // Rotate into the preferred slot:
+ result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
+ result.getValue(0), rotate);
+
+ // Convert the loaded v16i8 vector to the appropriate vector type
+ // specified by the operand:
+ MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
+ result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
+ DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
+
+ // Handle extending loads by extending the scalar result:
+ if (ExtType == ISD::SEXTLOAD) {
+ result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
+ } else if (ExtType == ISD::ZEXTLOAD) {
+ result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
+ } else if (ExtType == ISD::EXTLOAD) {
+ unsigned NewOpc = ISD::ANY_EXTEND;
+
+ if (OutVT.isFloatingPoint())
+ NewOpc = ISD::FP_EXTEND;
+
+ result = DAG.getNode(NewOpc, dl, OutVT, result);
+ }
+
+ SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
+ SDValue retops[2] = {
+ result,
+ the_chain
+ };
+
+ result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
+ retops, sizeof(retops) / sizeof(retops[0]));
+ return result;
+ }
+ case ISD::PRE_INC:
+ case ISD::PRE_DEC:
+ case ISD::POST_INC:
+ case ISD::POST_DEC:
+ case ISD::LAST_INDEXED_MODE:
+ cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
+ "UNINDEXED\n";
+ cerr << (unsigned) LN->getAddressingMode() << "\n";
+ abort();
+ /*NOTREACHED*/
+ }
+
+ return SDValue();
+}
+
+/// Custom lower stores for CellSPU
+/*!
+ All CellSPU stores are aligned to 16-byte boundaries, so for elements
+ within a 16-byte block, we have to generate a shuffle to insert the
+ requested element into its place, then store the resulting block.
+ */
+static SDValue
+LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ StoreSDNode *SN = cast<StoreSDNode>(Op);
+ SDValue Value = SN->getValue();
+ MVT VT = Value.getValueType();
+ MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned alignment = SN->getAlignment();
+
+ switch (SN->getAddressingMode()) {
+ case ISD::UNINDEXED: {
+ // The vector type we really want to load from the 16-byte chunk.
+ MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
+ stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
+
+ SDValue alignLoadVec;
+ SDValue basePtr = SN->getBasePtr();
+ SDValue the_chain = SN->getChain();
+ SDValue insertEltOffs;
+
+ if (alignment == 16) {
+ ConstantSDNode *CN;
+
+ // Special cases for a known aligned load to simplify the base pointer
+ // and insertion byte:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & 0xf), PtrVT));
+
+ if ((offset & ~0xf) > 0) {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & ~0xf), PtrVT));
+ }
+ } else {
+ // Otherwise, assume it's at byte 0 of basePtr
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
+
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Insertion point is solely determined by basePtr's contents
+ insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Re-emit as a v16i8 vector load
+ alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
+ SN->getSrcValue(), SN->getSrcValueOffset(),
+ SN->isVolatile(), 16);
+
+ // Update the chain
+ the_chain = alignLoadVec.getValue(1);
+
+ LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
+ SDValue theValue = SN->getValue();
+ SDValue result;
+
+ if (StVT != VT
+ && (theValue.getOpcode() == ISD::AssertZext
+ || theValue.getOpcode() == ISD::AssertSext)) {
+ // Drill down and get the value for zero- and sign-extended
+ // quantities
+ theValue = theValue.getOperand(0);
+ }
+
+ // If the base pointer is already a D-form address, then just create
+ // a new D-form address with a slot offset and the orignal base pointer.
+ // Otherwise generate a D-form address with the slot offset relative
+ // to the stack pointer, which is always aligned.
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ cerr << "CellSPU LowerSTORE: basePtr = ";
+ basePtr.getNode()->dump(&DAG);
+ cerr << "\n";
+ }
+#endif
+
+ SDValue insertEltOp =
+ DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
+ SDValue vectorizeOp =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
+
+ result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
+ vectorizeOp, alignLoadVec,
+ DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::v4i32, insertEltOp));
+
+ result = DAG.getStore(the_chain, dl, result, basePtr,
+ LN->getSrcValue(), LN->getSrcValueOffset(),
+ LN->isVolatile(), LN->getAlignment());
+
+#if 0 && !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ const SDValue &currentRoot = DAG.getRoot();
+
+ DAG.setRoot(result);
+ cerr << "------- CellSPU:LowerStore result:\n";
+ DAG.dump();
+ cerr << "-------\n";
+ DAG.setRoot(currentRoot);
+ }
+#endif
+
+ return result;
+ /*UNREACHED*/
+ }
+ case ISD::PRE_INC:
+ case ISD::PRE_DEC:
+ case ISD::POST_INC:
+ case ISD::POST_DEC:
+ case ISD::LAST_INDEXED_MODE:
+ cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
+ "UNINDEXED\n";
+ cerr << (unsigned) SN->getAddressingMode() << "\n";
+ abort();
+ /*NOTREACHED*/
+ }
+
+ return SDValue();
+}
+
+//! Generate the address of a constant pool entry.
+SDValue
+LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ MVT PtrVT = Op.getValueType();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ Constant *C = CP->getConstVal();
+ SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ const TargetMachine &TM = DAG.getTarget();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (TM.getRelocationModel() == Reloc::Static) {
+ if (!ST->usingLargeMem()) {
+ // Just return the SDValue with the constant pool address in it.
+ return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
+ } else {
+ SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
+ SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
+ return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
+ }
+ }
+
+ assert(0 &&
+ "LowerConstantPool: Relocation model other than static"
+ " not supported.");
+ return SDValue();
+}
+
+//! Alternate entry point for generating the address of a constant pool entry
+SDValue
+SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
+ return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
+}
+
+static SDValue
+LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ MVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ const TargetMachine &TM = DAG.getTarget();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (TM.getRelocationModel() == Reloc::Static) {
+ if (!ST->usingLargeMem()) {
+ return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
+ } else {
+ SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
+ SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
+ return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
+ }
+ }
+
+ assert(0 &&
+ "LowerJumpTable: Relocation model other than static not supported.");
+ return SDValue();
+}
+
+static SDValue
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ MVT PtrVT = Op.getValueType();
+ GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+ GlobalValue *GV = GSDN->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
+ const TargetMachine &TM = DAG.getTarget();
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (TM.getRelocationModel() == Reloc::Static) {
+ if (!ST->usingLargeMem()) {
+ return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
+ } else {
+ SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
+ SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
+ return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
+ }
+ } else {
+ cerr << "LowerGlobalAddress: Relocation model other than static not "
+ << "supported.\n";
+ abort();
+ /*NOTREACHED*/
+ }
+
+ return SDValue();
+}
+
+//! Custom lower double precision floating point constants
+static SDValue
+LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (VT == MVT::f64) {
+ ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
+
+ assert((FP != 0) &&
+ "LowerConstantFP: Node is not ConstantFPSDNode");
+
+ uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
+ SDValue T = DAG.getConstant(dbits, MVT::i64);
+ SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
+ }
+
+ return SDValue();
+}
+
+static SDValue
+LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
+{
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ SmallVector<SDValue, 48> ArgValues;
+ SDValue Root = Op.getOperand(0);
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
+ DebugLoc dl = Op.getDebugLoc();
+
+ const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
+ const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
+
+ unsigned ArgOffset = SPUFrameInfo::minStackSize();
+ unsigned ArgRegIdx = 0;
+ unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
+
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Add DAG nodes to load the arguments or copy them out of registers.
+ for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
+ ArgNo != e; ++ArgNo) {
+ MVT ObjectVT = Op.getValue(ArgNo).getValueType();
+ unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+ SDValue ArgVal;
+
+ if (ArgRegIdx < NumArgRegs) {
+ const TargetRegisterClass *ArgRegClass;
+
+ switch (ObjectVT.getSimpleVT()) {
+ default: {
+ cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
+ << ObjectVT.getMVTString()
+ << "\n";
+ abort();
+ }
+ case MVT::i8:
+ ArgRegClass = &SPU::R8CRegClass;
+ break;
+ case MVT::i16:
+ ArgRegClass = &SPU::R16CRegClass;
+ break;
+ case MVT::i32:
+ ArgRegClass = &SPU::R32CRegClass;
+ break;
+ case MVT::i64:
+ ArgRegClass = &SPU::R64CRegClass;
+ break;
+ case MVT::i128:
+ ArgRegClass = &SPU::GPRCRegClass;
+ break;
+ case MVT::f32:
+ ArgRegClass = &SPU::R32FPRegClass;
+ break;
+ case MVT::f64:
+ ArgRegClass = &SPU::R64FPRegClass;
+ break;
+ case MVT::v2f64:
+ case MVT::v4f32:
+ case MVT::v2i64:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ ArgRegClass = &SPU::VECREGRegClass;
+ break;
+ }
+
+ unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
+ RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
+ ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
+ ++ArgRegIdx;
+ } else {
+ // We need to load the argument to a virtual register if we determined
+ // above that we ran out of physical registers of the appropriate type
+ // or we're forced to do vararg
+ int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0);
+ ArgOffset += StackSlotSize;
+ }
+
+ ArgValues.push_back(ArgVal);
+ // Update the chain
+ Root = ArgVal.getOperand(0);
+ }
+
+ // vararg handling:
+ if (isVarArg) {
+ // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
+ // We will spill (79-3)+1 registers to the stack
+ SmallVector<SDValue, 79-3+1> MemOps;
+
+ // Create the frame slot
+
+ for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
+ VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
+ SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+ SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
+ SDValue Store = DAG.getStore(Root, dl, ArgVal, FIN, NULL, 0);
+ Root = Store.getOperand(0);
+ MemOps.push_back(Store);
+
+ // Increment address by stack slot size for the next stored argument
+ ArgOffset += StackSlotSize;
+ }
+ if (!MemOps.empty())
+ Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+ }
+
+ ArgValues.push_back(Root);
+
+ // Return the new list of results.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
+ &ArgValues[0], ArgValues.size());
+}
+
+/// isLSAAddress - Return the immediate to use if the specified
+/// value is representable as a LSA address.
+static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C) return 0;
+
+ int Addr = C->getZExtValue();
+ if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
+ (Addr << 14 >> 14) != Addr)
+ return 0; // Top 14 bits have to be sext of immediate.
+
+ return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
+}
+
+static SDValue
+LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+ SDValue Chain = TheCall->getChain();
+ SDValue Callee = TheCall->getCallee();
+ unsigned NumOps = TheCall->getNumArgs();
+ unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
+ const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
+ const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
+ DebugLoc dl = TheCall->getDebugLoc();
+
+ // Handy pointer type
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Accumulate how many bytes are to be pushed on the stack, including the
+ // linkage area, and parameter passing area. According to the SPU ABI,
+ // we minimally need space for [LR] and [SP]
+ unsigned NumStackBytes = SPUFrameInfo::minStackSize();
+
+ // Set up a copy of the stack pointer for use loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
+
+ // Figure out which arguments are going to go in registers, and which in
+ // memory.
+ unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
+ unsigned ArgRegIdx = 0;
+
+ // Keep track of registers passing arguments
+ std::vector<std::pair<unsigned, SDValue> > RegsToPass;
+ // And the arguments passed on the stack
+ SmallVector<SDValue, 8> MemOpChains;
+
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDValue Arg = TheCall->getArg(i);
+
+ // PtrOff will be used to store the current argument to the stack if a
+ // register cannot be found for it.
+ SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+
+ switch (Arg.getValueType().getSimpleVT()) {
+ default: assert(0 && "Unexpected ValueType for argument!");
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ case MVT::i128:
+ if (ArgRegIdx != NumArgRegs) {
+ RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
+ } else {
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+ ArgOffset += StackSlotSize;
+ }
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ if (ArgRegIdx != NumArgRegs) {
+ RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
+ } else {
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+ ArgOffset += StackSlotSize;
+ }
+ break;
+ case MVT::v2i64:
+ case MVT::v2f64:
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ if (ArgRegIdx != NumArgRegs) {
+ RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
+ } else {
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+ ArgOffset += StackSlotSize;
+ }
+ break;
+ }
+ }
+
+ // Update number of stack bytes actually used, insert a call sequence start
+ NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
+ true));
+
+ if (!MemOpChains.empty()) {
+ // Adjust the stack pointer for the stack arguments.
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+ }
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ SmallVector<SDValue, 8> Ops;
+ unsigned CallOpc = SPUISD::CALL;
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ GlobalValue *GV = G->getGlobal();
+ MVT CalleeVT = Callee.getValueType();
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
+
+ if (!ST->usingLargeMem()) {
+ // Turn calls to targets that are defined (i.e., have bodies) into BRSL
+ // style calls, otherwise, external symbols are BRASL calls. This assumes
+ // that declared/defined symbols are in the same compilation unit and can
+ // be reached through PC-relative jumps.
+ //
+ // NOTE:
+ // This may be an unsafe assumption for JIT and really large compilation
+ // units.
+ if (GV->isDeclaration()) {
+ Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
+ } else {
+ Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
+ }
+ } else {
+ // "Large memory" mode: Turn all calls into indirect calls with a X-form
+ // address pairs:
+ Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
+ }
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ MVT CalleeVT = Callee.getValueType();
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
+ Callee.getValueType());
+
+ if (!ST->usingLargeMem()) {
+ Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
+ } else {
+ Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
+ }
+ } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
+ // If this is an absolute destination address that appears to be a legal
+ // local store address, use the munged value.
+ Callee = SDValue(Dest, 0);
+ }
+
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+ // Returns a chain and a flag for retval copy to use.
+ Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
+ &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ if (TheCall->getValueType(0) != MVT::Other)
+ InFlag = Chain.getValue(1);
+
+ SDValue ResultVals[3];
+ unsigned NumResults = 0;
+
+ // If the call has results, copy the values out of the ret val registers.
+ switch (TheCall->getValueType(0).getSimpleVT()) {
+ default: assert(0 && "Unexpected ret value!");
+ case MVT::Other: break;
+ case MVT::i32:
+ if (TheCall->getValueType(1) == MVT::i32) {
+ Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
+ MVT::i32, InFlag).getValue(1);
+ ResultVals[0] = Chain.getValue(0);
+ Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
+ Chain.getValue(2)).getValue(1);
+ ResultVals[1] = Chain.getValue(0);
+ NumResults = 2;
+ } else {
+ Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
+ InFlag).getValue(1);
+ ResultVals[0] = Chain.getValue(0);
+ NumResults = 1;
+ }
+ break;
+ case MVT::i64:
+ Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
+ InFlag).getValue(1);
+ ResultVals[0] = Chain.getValue(0);
+ NumResults = 1;
+ break;
+ case MVT::i128:
+ Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
+ InFlag).getValue(1);
+ ResultVals[0] = Chain.getValue(0);
+ NumResults = 1;
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
+ InFlag).getValue(1);
+ ResultVals[0] = Chain.getValue(0);
+ NumResults = 1;
+ break;
+ case MVT::v2f64:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
+ InFlag).getValue(1);
+ ResultVals[0] = Chain.getValue(0);
+ NumResults = 1;
+ break;
+ }
+
+ // If the function returns void, just return the chain.
+ if (NumResults == 0)
+ return Chain;
+
+ // Otherwise, merge everything together with a MERGE_VALUES node.
+ ResultVals[NumResults++] = Chain;
+ SDValue Res = DAG.getMergeValues(ResultVals, NumResults, dl);
+ return Res.getValue(Op.getResNo());
+}
+
+static SDValue
+LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+ DebugLoc dl = Op.getDebugLoc();
+ CCState CCInfo(CC, isVarArg, TM, RVLocs);
+ CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ Op.getOperand(i*2+1), Flag);
+ Flag = Chain.getValue(1);
+ }
+
+ if (Flag.getNode())
+ return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+ else
+ return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Vector related lowering:
+//===----------------------------------------------------------------------===//
+
+static ConstantSDNode *
+getVecImm(SDNode *N) {
+ SDValue OpVal(0, 0);
+
+ // Check to see if this buildvec has a single non-undef value in its elements.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (OpVal.getNode() == 0)
+ OpVal = N->getOperand(i);
+ else if (OpVal != N->getOperand(i))
+ return 0;
+ }
+
+ if (OpVal.getNode() != 0) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
+ return CN;
+ }
+ }
+
+ return 0;
+}
+
+/// get_vec_i18imm - Test if this vector is a vector filled with the same value
+/// and the value fits into an unsigned 18-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
+ MVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ uint64_t Value = CN->getZExtValue();
+ if (ValueType == MVT::i64) {
+ uint64_t UValue = CN->getZExtValue();
+ uint32_t upper = uint32_t(UValue >> 32);
+ uint32_t lower = uint32_t(UValue);
+ if (upper != lower)
+ return SDValue();
+ Value = Value >> 32;
+ }
+ if (Value <= 0x3ffff)
+ return DAG.getTargetConstant(Value, ValueType);
+ }
+
+ return SDValue();
+}
+
+/// get_vec_i16imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 16-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
+ MVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ int64_t Value = CN->getSExtValue();
+ if (ValueType == MVT::i64) {
+ uint64_t UValue = CN->getZExtValue();
+ uint32_t upper = uint32_t(UValue >> 32);
+ uint32_t lower = uint32_t(UValue);
+ if (upper != lower)
+ return SDValue();
+ Value = Value >> 32;
+ }
+ if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
+ return DAG.getTargetConstant(Value, ValueType);
+ }
+ }
+
+ return SDValue();
+}
+
+/// get_vec_i10imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 10-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
+ MVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ int64_t Value = CN->getSExtValue();
+ if (ValueType == MVT::i64) {
+ uint64_t UValue = CN->getZExtValue();
+ uint32_t upper = uint32_t(UValue >> 32);
+ uint32_t lower = uint32_t(UValue);
+ if (upper != lower)
+ return SDValue();
+ Value = Value >> 32;
+ }
+ if (isS10Constant(Value))
+ return DAG.getTargetConstant(Value, ValueType);
+ }
+
+ return SDValue();
+}
+
+/// get_vec_i8imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 8-bit constant, and if so, return the
+/// constant.
+///
+/// @note: The incoming vector is v16i8 because that's the only way we can load
+/// constant vectors. Thus, we test to see if the upper and lower bytes are the
+/// same value.
+SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
+ MVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ int Value = (int) CN->getZExtValue();
+ if (ValueType == MVT::i16
+ && Value <= 0xffff /* truncated from uint64_t */
+ && ((short) Value >> 8) == ((short) Value & 0xff))
+ return DAG.getTargetConstant(Value & 0xff, ValueType);
+ else if (ValueType == MVT::i8
+ && (Value & 0xff) == Value)
+ return DAG.getTargetConstant(Value, ValueType);
+ }
+
+ return SDValue();
+}
+
+/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 16-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
+ MVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ uint64_t Value = CN->getZExtValue();
+ if ((ValueType == MVT::i32
+ && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
+ || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
+ return DAG.getTargetConstant(Value >> 16, ValueType);
+ }
+
+ return SDValue();
+}
+
+/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
+SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
+ }
+
+ return SDValue();
+}
+
+/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
+SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
+ }
+
+ return SDValue();
+}
+
+//! Lower a BUILD_VECTOR instruction creatively:
+SDValue
+LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ MVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = Op.getDebugLoc();
+ BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
+ unsigned minSplatBits = EltVT.getSizeInBits();
+
+ if (minSplatBits < 16)
+ minSplatBits = 16;
+
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, minSplatBits)
+ || minSplatBits < SplatBitSize)
+ return SDValue(); // Wasn't a constant vector or splat exceeded min
+
+ uint64_t SplatBits = APSplatBits.getZExtValue();
+
+ switch (VT.getSimpleVT()) {
+ default:
+ cerr << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
+ << VT.getMVTString()
+ << "\n";
+ abort();
+ /*NOTREACHED*/
+ case MVT::v4f32: {
+ uint32_t Value32 = uint32_t(SplatBits);
+ assert(SplatBitSize == 32
+ && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
+ // NOTE: pretend the constant is an integer. LLVM won't load FP constants
+ SDValue T = DAG.getConstant(Value32, MVT::i32);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
+ break;
+ }
+ case MVT::v2f64: {
+ uint64_t f64val = uint64_t(SplatBits);
+ assert(SplatBitSize == 64
+ && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
+ // NOTE: pretend the constant is an integer. LLVM won't load FP constants
+ SDValue T = DAG.getConstant(f64val, MVT::i64);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
+ break;
+ }
+ case MVT::v16i8: {
+ // 8-bit constants have to be expanded to 16-bits
+ unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
+ SmallVector<SDValue, 8> Ops;
+
+ Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
+ }
+ case MVT::v8i16: {
+ unsigned short Value16 = SplatBits;
+ SDValue T = DAG.getConstant(Value16, EltVT);
+ SmallVector<SDValue, 8> Ops;
+
+ Ops.assign(8, T);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+ }
+ case MVT::v4i32: {
+ SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
+ }
+ case MVT::v2i32: {
+ SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
+ }
+ case MVT::v2i64: {
+ return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
+ }
+ }
+
+ return SDValue();
+}
+
+/*!
+ */
+SDValue
+SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
+ DebugLoc dl) {
+ uint32_t upper = uint32_t(SplatVal >> 32);
+ uint32_t lower = uint32_t(SplatVal);
+
+ if (upper == lower) {
+ // Magic constant that can be matched by IL, ILA, et. al.
+ SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ Val, Val, Val, Val));
+ } else {
+ bool upper_special, lower_special;
+
+ // NOTE: This code creates common-case shuffle masks that can be easily
+ // detected as common expressions. It is not attempting to create highly
+ // specialized masks to replace any and all 0's, 0xff's and 0x80's.
+
+ // Detect if the upper or lower half is a special shuffle mask pattern:
+ upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
+ lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
+
+ // Both upper and lower are special, lower to a constant pool load:
+ if (lower_special && upper_special) {
+ SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
+ SplatValCN, SplatValCN);
+ }
+
+ SDValue LO32;
+ SDValue HI32;
+ SmallVector<SDValue, 16> ShufBytes;
+ SDValue Result;
+
+ // Create lower vector if not a special pattern
+ if (!lower_special) {
+ SDValue LO32C = DAG.getConstant(lower, MVT::i32);
+ LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ LO32C, LO32C, LO32C, LO32C));
+ }
+
+ // Create upper vector if not a special pattern
+ if (!upper_special) {
+ SDValue HI32C = DAG.getConstant(upper, MVT::i32);
+ HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ HI32C, HI32C, HI32C, HI32C));
+ }
+
+ // If either upper or lower are special, then the two input operands are
+ // the same (basically, one of them is a "don't care")
+ if (lower_special)
+ LO32 = HI32;
+ if (upper_special)
+ HI32 = LO32;
+
+ for (int i = 0; i < 4; ++i) {
+ uint64_t val = 0;
+ for (int j = 0; j < 4; ++j) {
+ SDValue V;
+ bool process_upper, process_lower;
+ val <<= 8;
+ process_upper = (upper_special && (i & 1) == 0);
+ process_lower = (lower_special && (i & 1) == 1);
+
+ if (process_upper || process_lower) {
+ if ((process_upper && upper == 0)
+ || (process_lower && lower == 0))
+ val |= 0x80;
+ else if ((process_upper && upper == 0xffffffff)
+ || (process_lower && lower == 0xffffffff))
+ val |= 0xc0;
+ else if ((process_upper && upper == 0x80000000)
+ || (process_lower && lower == 0x80000000))
+ val |= (j == 0 ? 0xe0 : 0x80);
+ } else
+ val |= i * 4 + j + ((i & 1) * 16);
+ }
+
+ ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
+ }
+
+ return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size()));
+ }
+}
+
+/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
+/// which the Cell can operate. The code inspects V3 to ascertain whether the
+/// permutation vector, V3, is monotonically increasing with one "exception"
+/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
+/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
+/// In either case, the net result is going to eventually invoke SHUFB to
+/// permute/shuffle the bytes from V1 and V2.
+/// \note
+/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
+/// control word for byte/halfword/word insertion. This takes care of a single
+/// element move from V2 into V1.
+/// \note
+/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+
+ // If we have a single element being moved from V1 to V2, this can be handled
+ // using the C*[DX] compute mask instructions, but the vector elements have
+ // to be monotonically increasing with one exception element.
+ MVT VecVT = V1.getValueType();
+ MVT EltVT = VecVT.getVectorElementType();
+ unsigned EltsFromV2 = 0;
+ unsigned V2Elt = 0;
+ unsigned V2EltIdx0 = 0;
+ unsigned CurrElt = 0;
+ unsigned MaxElts = VecVT.getVectorNumElements();
+ unsigned PrevElt = 0;
+ unsigned V0Elt = 0;
+ bool monotonic = true;
+ bool rotate = true;
+
+ if (EltVT == MVT::i8) {
+ V2EltIdx0 = 16;
+ } else if (EltVT == MVT::i16) {
+ V2EltIdx0 = 8;
+ } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
+ V2EltIdx0 = 4;
+ } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
+ V2EltIdx0 = 2;
+ } else
+ assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
+
+ for (unsigned i = 0; i != MaxElts; ++i) {
+ if (SVN->getMaskElt(i) < 0)
+ continue;
+
+ unsigned SrcElt = SVN->getMaskElt(i);
+
+ if (monotonic) {
+ if (SrcElt >= V2EltIdx0) {
+ if (1 >= (++EltsFromV2)) {
+ V2Elt = (V2EltIdx0 - SrcElt) << 2;
+ }
+ } else if (CurrElt != SrcElt) {
+ monotonic = false;
+ }
+
+ ++CurrElt;
+ }
+
+ if (rotate) {
+ if (PrevElt > 0 && SrcElt < MaxElts) {
+ if ((PrevElt == SrcElt - 1)
+ || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
+ PrevElt = SrcElt;
+ if (SrcElt == 0)
+ V0Elt = i;
+ } else {
+ rotate = false;
+ }
+ } else if (PrevElt == 0) {
+ // First time through, need to keep track of previous element
+ PrevElt = SrcElt;
+ } else {
+ // This isn't a rotation, takes elements from vector 2
+ rotate = false;
+ }
+ }
+ }
+
+ if (EltsFromV2 == 1 && monotonic) {
+ // Compute mask and shuffle
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ // Initialize temporary register to 0
+ SDValue InitTempReg =
+ DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
+ // Copy register's contents as index in SHUFFLE_MASK:
+ SDValue ShufMaskOp =
+ DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
+ DAG.getTargetConstant(V2Elt, MVT::i32),
+ DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
+ // Use shuffle mask in SHUFB synthetic instruction:
+ return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
+ ShufMaskOp);
+ } else if (rotate) {
+ int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
+
+ return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
+ V1, DAG.getConstant(rotamt, MVT::i16));
+ } else {
+ // Convert the SHUFFLE_VECTOR mask's input element units to the
+ // actual bytes.
+ unsigned BytesPerElement = EltVT.getSizeInBits()/8;
+
+ SmallVector<SDValue, 16> ResultMask;
+ for (unsigned i = 0, e = MaxElts; i != e; ++i) {
+ unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
+
+ for (unsigned j = 0; j < BytesPerElement; ++j)
+ ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
+ }
+
+ SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
+ &ResultMask[0], ResultMask.size());
+ return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
+ }
+}
+
+static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
+ SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Op0.getNode()->getOpcode() == ISD::Constant) {
+ // For a constant, build the appropriate constant vector, which will
+ // eventually simplify to a vector register load.
+
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
+ SmallVector<SDValue, 16> ConstVecValues;
+ MVT VT;
+ size_t n_copies;
+
+ // Create a constant vector:
+ switch (Op.getValueType().getSimpleVT()) {
+ default: assert(0 && "Unexpected constant value type in "
+ "LowerSCALAR_TO_VECTOR");
+ case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
+ case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
+ case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
+ case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
+ case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
+ case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
+ }
+
+ SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
+ for (size_t j = 0; j < n_copies; ++j)
+ ConstVecValues.push_back(CValue);
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
+ &ConstVecValues[0], ConstVecValues.size());
+ } else {
+ // Otherwise, copy the value from one register to another:
+ switch (Op0.getValueType().getSimpleVT()) {
+ default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ case MVT::f32:
+ case MVT::f64:
+ return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
+ }
+ }
+
+ return SDValue();
+}
+
+static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ SDValue N = Op.getOperand(0);
+ SDValue Elt = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue retval;
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
+ // Constant argument:
+ int EltNo = (int) C->getZExtValue();
+
+ // sanity checks:
+ if (VT == MVT::i8 && EltNo >= 16)
+ assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
+ else if (VT == MVT::i16 && EltNo >= 8)
+ assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
+ else if (VT == MVT::i32 && EltNo >= 4)
+ assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
+ else if (VT == MVT::i64 && EltNo >= 2)
+ assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
+
+ if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
+ // i32 and i64: Element 0 is the preferred slot
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
+ }
+
+ // Need to generate shuffle mask and extract:
+ int prefslot_begin = -1, prefslot_end = -1;
+ int elt_byte = EltNo * VT.getSizeInBits() / 8;
+
+ switch (VT.getSimpleVT()) {
+ default:
+ assert(false && "Invalid value type!");
+ case MVT::i8: {
+ prefslot_begin = prefslot_end = 3;
+ break;
+ }
+ case MVT::i16: {
+ prefslot_begin = 2; prefslot_end = 3;
+ break;
+ }
+ case MVT::i32:
+ case MVT::f32: {
+ prefslot_begin = 0; prefslot_end = 3;
+ break;
+ }
+ case MVT::i64:
+ case MVT::f64: {
+ prefslot_begin = 0; prefslot_end = 7;
+ break;
+ }
+ }
+
+ assert(prefslot_begin != -1 && prefslot_end != -1 &&
+ "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
+
+ unsigned int ShufBytes[16];
+ for (int i = 0; i < 16; ++i) {
+ // zero fill uppper part of preferred slot, don't care about the
+ // other slots:
+ unsigned int mask_val;
+ if (i <= prefslot_end) {
+ mask_val =
+ ((i < prefslot_begin)
+ ? 0x80
+ : elt_byte + (i - prefslot_begin));
+
+ ShufBytes[i] = mask_val;
+ } else
+ ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
+ }
+
+ SDValue ShufMask[4];
+ for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
+ unsigned bidx = i * 4;
+ unsigned int bits = ((ShufBytes[bidx] << 24) |
+ (ShufBytes[bidx+1] << 16) |
+ (ShufBytes[bidx+2] << 8) |
+ ShufBytes[bidx+3]);
+ ShufMask[i] = DAG.getConstant(bits, MVT::i32);
+ }
+
+ SDValue ShufMaskVec =
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
+
+ retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+ DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
+ N, N, ShufMaskVec));
+ } else {
+ // Variable index: Rotate the requested element into slot 0, then replicate
+ // slot 0 across the vector
+ MVT VecVT = N.getValueType();
+ if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
+ cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
+ abort();
+ }
+
+ // Make life easier by making sure the index is zero-extended to i32
+ if (Elt.getValueType() != MVT::i32)
+ Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
+
+ // Scale the index to a bit/byte shift quantity
+ APInt scaleFactor =
+ APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
+ unsigned scaleShift = scaleFactor.logBase2();
+ SDValue vecShift;
+
+ if (scaleShift > 0) {
+ // Scale the shift factor:
+ Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
+ DAG.getConstant(scaleShift, MVT::i32));
+ }
+
+ vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
+
+ // Replicate the bytes starting at byte 0 across the entire vector (for
+ // consistency with the notion of a unified register set)
+ SDValue replicate;
+
+ switch (VT.getSimpleVT()) {
+ default:
+ cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
+ abort();
+ /*NOTREACHED*/
+ case MVT::i8: {
+ SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ factor, factor, factor, factor);
+ break;
+ }
+ case MVT::i16: {
+ SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ factor, factor, factor, factor);
+ break;
+ }
+ case MVT::i32:
+ case MVT::f32: {
+ SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ factor, factor, factor, factor);
+ break;
+ }
+ case MVT::i64:
+ case MVT::f64: {
+ SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
+ SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ loFactor, hiFactor, loFactor, hiFactor);
+ break;
+ }
+ }
+
+ retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+ DAG.getNode(SPUISD::SHUFB, dl, VecVT,
+ vecShift, vecShift, replicate));
+ }
+
+ return retval;
+}
+
+static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ SDValue VecOp = Op.getOperand(0);
+ SDValue ValOp = Op.getOperand(1);
+ SDValue IdxOp = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+ MVT VT = Op.getValueType();
+
+ ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
+ assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
+
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ // Use $sp ($1) because it's always 16-byte aligned and it's available:
+ SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ DAG.getRegister(SPU::R1, PtrVT),
+ DAG.getConstant(CN->getSExtValue(), PtrVT));
+ SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
+
+ SDValue result =
+ DAG.getNode(SPUISD::SHUFB, dl, VT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
+ VecOp,
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
+
+ return result;
+}
+
+static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
+ const TargetLowering &TLI)
+{
+ SDValue N0 = Op.getOperand(0); // Everything has at least one operand
+ DebugLoc dl = Op.getDebugLoc();
+ MVT ShiftVT = TLI.getShiftAmountTy();
+
+ assert(Op.getValueType() == MVT::i8);
+ switch (Opc) {
+ default:
+ assert(0 && "Unhandled i8 math operator");
+ /*NOTREACHED*/
+ break;
+ case ISD::ADD: {
+ // 8-bit addition: Promote the arguments up to 16-bits and truncate
+ // the result:
+ SDValue N1 = Op.getOperand(1);
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+
+ }
+
+ case ISD::SUB: {
+ // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
+ // the result:
+ SDValue N1 = Op.getOperand(1);
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+ }
+ case ISD::ROTR:
+ case ISD::ROTL: {
+ SDValue N1 = Op.getOperand(1);
+ MVT N1VT = N1.getValueType();
+
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
+ if (!N1VT.bitsEq(ShiftVT)) {
+ unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
+ ? ISD::ZERO_EXTEND
+ : ISD::TRUNCATE;
+ N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+ }
+
+ // Replicate lower 8-bits into upper 8:
+ SDValue ExpandArg =
+ DAG.getNode(ISD::OR, dl, MVT::i16, N0,
+ DAG.getNode(ISD::SHL, dl, MVT::i16,
+ N0, DAG.getConstant(8, MVT::i32)));
+
+ // Truncate back down to i8
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
+ }
+ case ISD::SRL:
+ case ISD::SHL: {
+ SDValue N1 = Op.getOperand(1);
+ MVT N1VT = N1.getValueType();
+
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
+ if (!N1VT.bitsEq(ShiftVT)) {
+ unsigned N1Opc = ISD::ZERO_EXTEND;
+
+ if (N1.getValueType().bitsGT(ShiftVT))
+ N1Opc = ISD::TRUNCATE;
+
+ N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+ }
+
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+ }
+ case ISD::SRA: {
+ SDValue N1 = Op.getOperand(1);
+ MVT N1VT = N1.getValueType();
+
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ if (!N1VT.bitsEq(ShiftVT)) {
+ unsigned N1Opc = ISD::SIGN_EXTEND;
+
+ if (N1VT.bitsGT(ShiftVT))
+ N1Opc = ISD::TRUNCATE;
+ N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+ }
+
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+ }
+ case ISD::MUL: {
+ SDValue N1 = Op.getOperand(1);
+
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+ break;
+ }
+ }
+
+ return SDValue();
+}
+
+//! Lower byte immediate operations for v16i8 vectors:
+static SDValue
+LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
+ SDValue ConstVec;
+ SDValue Arg;
+ MVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ ConstVec = Op.getOperand(0);
+ Arg = Op.getOperand(1);
+ if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
+ if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
+ ConstVec = ConstVec.getOperand(0);
+ } else {
+ ConstVec = Op.getOperand(1);
+ Arg = Op.getOperand(0);
+ if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
+ ConstVec = ConstVec.getOperand(0);
+ }
+ }
+ }
+
+ if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
+ BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
+ assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
+
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
+
+ if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, minSplatBits)
+ && minSplatBits <= SplatBitSize) {
+ uint64_t SplatBits = APSplatBits.getZExtValue();
+ SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
+
+ SmallVector<SDValue, 16> tcVec;
+ tcVec.assign(16, tc);
+ return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
+ }
+ }
+
+ // These operations (AND, OR, XOR) are legal, they just couldn't be custom
+ // lowered. Return the operation, rather than a null SDValue.
+ return Op;
+}
+
+//! Custom lowering for CTPOP (count population)
+/*!
+ Custom lowering code that counts the number ones in the input
+ operand. SPU has such an instruction, but it counts the number of
+ ones per byte, which then have to be accumulated.
+*/
+static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
+ DebugLoc dl = Op.getDebugLoc();
+
+ switch (VT.getSimpleVT()) {
+ default:
+ assert(false && "Invalid value type!");
+ case MVT::i8: {
+ SDValue N = Op.getOperand(0);
+ SDValue Elt0 = DAG.getConstant(0, MVT::i32);
+
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+ SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
+ }
+
+ case MVT::i16: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
+
+ SDValue N = Op.getOperand(0);
+ SDValue Elt0 = DAG.getConstant(0, MVT::i16);
+ SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
+ SDValue Shift1 = DAG.getConstant(8, MVT::i32);
+
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+ SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
+
+ // CNTB_result becomes the chain to which all of the virtual registers
+ // CNTB_reg, SUM1_reg become associated:
+ SDValue CNTB_result =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
+
+ SDValue CNTB_rescopy =
+ DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
+
+ SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
+
+ return DAG.getNode(ISD::AND, dl, MVT::i16,
+ DAG.getNode(ISD::ADD, dl, MVT::i16,
+ DAG.getNode(ISD::SRL, dl, MVT::i16,
+ Tmp1, Shift1),
+ Tmp1),
+ Mask0);
+ }
+
+ case MVT::i32: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+
+ SDValue N = Op.getOperand(0);
+ SDValue Elt0 = DAG.getConstant(0, MVT::i32);
+ SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
+ SDValue Shift1 = DAG.getConstant(16, MVT::i32);
+ SDValue Shift2 = DAG.getConstant(8, MVT::i32);
+
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+ SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
+
+ // CNTB_result becomes the chain to which all of the virtual registers
+ // CNTB_reg, SUM1_reg become associated:
+ SDValue CNTB_result =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
+
+ SDValue CNTB_rescopy =
+ DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
+
+ SDValue Comp1 =
+ DAG.getNode(ISD::SRL, dl, MVT::i32,
+ DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
+ Shift1);
+
+ SDValue Sum1 =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
+ DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
+
+ SDValue Sum1_rescopy =
+ DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
+
+ SDValue Comp2 =
+ DAG.getNode(ISD::SRL, dl, MVT::i32,
+ DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
+ Shift2);
+ SDValue Sum2 =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
+ DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
+
+ return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
+ }
+
+ case MVT::i64:
+ break;
+ }
+
+ return SDValue();
+}
+
+//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
+/*!
+ f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
+ All conversions to i64 are expanded to a libcall.
+ */
+static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+ SPUTargetLowering &TLI) {
+ MVT OpVT = Op.getValueType();
+ SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = Op0.getValueType();
+
+ if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
+ || OpVT == MVT::i64) {
+ // Convert f32 / f64 to i32 / i64 via libcall.
+ RTLIB::Libcall LC =
+ (Op.getOpcode() == ISD::FP_TO_SINT)
+ ? RTLIB::getFPTOSINT(Op0VT, OpVT)
+ : RTLIB::getFPTOUINT(Op0VT, OpVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
+ SDValue Dummy;
+ return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
+ }
+
+ return Op;
+}
+
+//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
+/*!
+ i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
+ All conversions from i64 are expanded to a libcall.
+ */
+static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
+ SPUTargetLowering &TLI) {
+ MVT OpVT = Op.getValueType();
+ SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = Op0.getValueType();
+
+ if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
+ || Op0VT == MVT::i64) {
+ // Convert i32, i64 to f64 via libcall:
+ RTLIB::Libcall LC =
+ (Op.getOpcode() == ISD::SINT_TO_FP)
+ ? RTLIB::getSINTTOFP(Op0VT, OpVT)
+ : RTLIB::getUINTTOFP(Op0VT, OpVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
+ SDValue Dummy;
+ return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
+ }
+
+ return Op;
+}
+
+//! Lower ISD::SETCC
+/*!
+ This handles MVT::f64 (double floating point) condition lowering
+ */
+static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
+ DebugLoc dl = Op.getDebugLoc();
+ assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
+
+ SDValue lhs = Op.getOperand(0);
+ SDValue rhs = Op.getOperand(1);
+ MVT lhsVT = lhs.getValueType();
+ assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
+
+ MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
+ APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
+ MVT IntVT(MVT::i64);
+
+ // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
+ // selected to a NOP:
+ SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
+ SDValue lhsHi32 =
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
+ DAG.getNode(ISD::SRL, dl, IntVT,
+ i64lhs, DAG.getConstant(32, MVT::i32)));
+ SDValue lhsHi32abs =
+ DAG.getNode(ISD::AND, dl, MVT::i32,
+ lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
+ SDValue lhsLo32 =
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
+
+ // SETO and SETUO only use the lhs operand:
+ if (CC->get() == ISD::SETO) {
+ // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
+ // SETUO
+ APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
+ return DAG.getNode(ISD::XOR, dl, ccResultVT,
+ DAG.getSetCC(dl, ccResultVT,
+ lhs, DAG.getConstantFP(0.0, lhsVT),
+ ISD::SETUO),
+ DAG.getConstant(ccResultAllOnes, ccResultVT));
+ } else if (CC->get() == ISD::SETUO) {
+ // Evaluates to true if Op0 is [SQ]NaN
+ return DAG.getNode(ISD::AND, dl, ccResultVT,
+ DAG.getSetCC(dl, ccResultVT,
+ lhsHi32abs,
+ DAG.getConstant(0x7ff00000, MVT::i32),
+ ISD::SETGE),
+ DAG.getSetCC(dl, ccResultVT,
+ lhsLo32,
+ DAG.getConstant(0, MVT::i32),
+ ISD::SETGT));
+ }
+
+ SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
+ SDValue rhsHi32 =
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
+ DAG.getNode(ISD::SRL, dl, IntVT,
+ i64rhs, DAG.getConstant(32, MVT::i32)));
+
+ // If a value is negative, subtract from the sign magnitude constant:
+ SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
+
+ // Convert the sign-magnitude representation into 2's complement:
+ SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
+ lhsHi32, DAG.getConstant(31, MVT::i32));
+ SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
+ SDValue lhsSelect =
+ DAG.getNode(ISD::SELECT, dl, IntVT,
+ lhsSelectMask, lhsSignMag2TC, i64lhs);
+
+ SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
+ rhsHi32, DAG.getConstant(31, MVT::i32));
+ SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
+ SDValue rhsSelect =
+ DAG.getNode(ISD::SELECT, dl, IntVT,
+ rhsSelectMask, rhsSignMag2TC, i64rhs);
+
+ unsigned compareOp;
+
+ switch (CC->get()) {
+ case ISD::SETOEQ:
+ case ISD::SETUEQ:
+ compareOp = ISD::SETEQ; break;
+ case ISD::SETOGT:
+ case ISD::SETUGT:
+ compareOp = ISD::SETGT; break;
+ case ISD::SETOGE:
+ case ISD::SETUGE:
+ compareOp = ISD::SETGE; break;
+ case ISD::SETOLT:
+ case ISD::SETULT:
+ compareOp = ISD::SETLT; break;
+ case ISD::SETOLE:
+ case ISD::SETULE:
+ compareOp = ISD::SETLE; break;
+ case ISD::SETUNE:
+ case ISD::SETONE:
+ compareOp = ISD::SETNE; break;
+ default:
+ cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
+ abort();
+ break;
+ }
+
+ SDValue result =
+ DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
+ (ISD::CondCode) compareOp);
+
+ if ((CC->get() & 0x8) == 0) {
+ // Ordered comparison:
+ SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
+ lhs, DAG.getConstantFP(0.0, MVT::f64),
+ ISD::SETO);
+ SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
+ rhs, DAG.getConstantFP(0.0, MVT::f64),
+ ISD::SETO);
+ SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
+
+ result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
+ }
+
+ return result;
+}
+
+//! Lower ISD::SELECT_CC
+/*!
+ ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
+ SELB instruction.
+
+ \note Need to revisit this in the future: if the code path through the true
+ and false value computations is longer than the latency of a branch (6
+ cycles), then it would be more advantageous to branch and insert a new basic
+ block and branch on the condition. However, this code does not make that
+ assumption, given the simplisitc uses so far.
+ */
+
+static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ MVT VT = Op.getValueType();
+ SDValue lhs = Op.getOperand(0);
+ SDValue rhs = Op.getOperand(1);
+ SDValue trueval = Op.getOperand(2);
+ SDValue falseval = Op.getOperand(3);
+ SDValue condition = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // NOTE: SELB's arguments: $rA, $rB, $mask
+ //
+ // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
+ // where bits in $mask are 1. CCond will be inverted, having 1s where the
+ // condition was true and 0s where the condition was false. Hence, the
+ // arguments to SELB get reversed.
+
+ // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
+ // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
+ // with another "cannot select select_cc" assert:
+
+ SDValue compare = DAG.getNode(ISD::SETCC, dl,
+ TLI.getSetCCResultType(Op.getValueType()),
+ lhs, rhs, condition);
+ return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
+}
+
+//! Custom lower ISD::TRUNCATE
+static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
+{
+ // Type to truncate to
+ MVT VT = Op.getValueType();
+ MVT::SimpleValueType simpleVT = VT.getSimpleVT();
+ MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Type to truncate from
+ SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = Op0.getValueType();
+
+ if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
+ // Create shuffle mask, least significant doubleword of quadword
+ unsigned maskHigh = 0x08090a0b;
+ unsigned maskLow = 0x0c0d0e0f;
+ // Use a shuffle to perform the truncation
+ SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ DAG.getConstant(maskHigh, MVT::i32),
+ DAG.getConstant(maskLow, MVT::i32),
+ DAG.getConstant(maskHigh, MVT::i32),
+ DAG.getConstant(maskLow, MVT::i32));
+
+ SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
+ Op0, Op0, shufMask);
+
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
+ }
+
+ return SDValue(); // Leave the truncate unmolested
+}
+
+//! Custom (target-specific) lowering entry point
+/*!
+ This is where LLVM's DAG selection process calls to do target-specific
+ lowering of nodes.
+ */
+SDValue
+SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
+{
+ unsigned Opc = (unsigned) Op.getOpcode();
+ MVT VT = Op.getValueType();
+
+ switch (Opc) {
+ default: {
+ cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
+ cerr << "Op.getOpcode() = " << Opc << "\n";
+ cerr << "*Op.getNode():\n";
+ Op.getNode()->dump();
+ abort();
+ }
+ case ISD::LOAD:
+ case ISD::EXTLOAD:
+ case ISD::SEXTLOAD:
+ case ISD::ZEXTLOAD:
+ return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::STORE:
+ return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::ConstantPool:
+ return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::GlobalAddress:
+ return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::JumpTable:
+ return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::ConstantFP:
+ return LowerConstantFP(Op, DAG);
+ case ISD::FORMAL_ARGUMENTS:
+ return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
+ case ISD::CALL:
+ return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::RET:
+ return LowerRET(Op, DAG, getTargetMachine());
+
+ // i8, i64 math ops:
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::ROTR:
+ case ISD::ROTL:
+ case ISD::SRL:
+ case ISD::SHL:
+ case ISD::SRA: {
+ if (VT == MVT::i8)
+ return LowerI8Math(Op, DAG, Opc, *this);
+ break;
+ }
+
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ return LowerFP_TO_INT(Op, DAG, *this);
+
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ return LowerINT_TO_FP(Op, DAG, *this);
+
+ // Vector-related lowering.
+ case ISD::BUILD_VECTOR:
+ return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR:
+ return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE:
+ return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT:
+ return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT:
+ return LowerINSERT_VECTOR_ELT(Op, DAG);
+
+ // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ return LowerByteImmed(Op, DAG);
+
+ // Vector and i8 multiply:
+ case ISD::MUL:
+ if (VT == MVT::i8)
+ return LowerI8Math(Op, DAG, Opc, *this);
+
+ case ISD::CTPOP:
+ return LowerCTPOP(Op, DAG);
+
+ case ISD::SELECT_CC:
+ return LowerSELECT_CC(Op, DAG, *this);
+
+ case ISD::SETCC:
+ return LowerSETCC(Op, DAG, *this);
+
+ case ISD::TRUNCATE:
+ return LowerTRUNCATE(Op, DAG);
+ }
+
+ return SDValue();
+}
+
+void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG)
+{
+#if 0
+ unsigned Opc = (unsigned) N->getOpcode();
+ MVT OpVT = N->getValueType(0);
+
+ switch (Opc) {
+ default: {
+ cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
+ cerr << "Op.getOpcode() = " << Opc << "\n";
+ cerr << "*Op.getNode():\n";
+ N->dump();
+ abort();
+ /*NOTREACHED*/
+ }
+ }
+#endif
+
+ /* Otherwise, return unchanged */
+}
+
+//===----------------------------------------------------------------------===//
+// Target Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue
+SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
+{
+#if 0
+ TargetMachine &TM = getTargetMachine();
+#endif
+ const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Op0 = N->getOperand(0); // everything has at least one operand
+ MVT NodeVT = N->getValueType(0); // The node's value type
+ MVT Op0VT = Op0.getValueType(); // The first operand's result
+ SDValue Result; // Initially, empty result
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::ADD: {
+ SDValue Op1 = N->getOperand(1);
+
+ if (Op0.getOpcode() == SPUISD::IndirectAddr
+ || Op1.getOpcode() == SPUISD::IndirectAddr) {
+ // Normalize the operands to reduce repeated code
+ SDValue IndirectArg = Op0, AddArg = Op1;
+
+ if (Op1.getOpcode() == SPUISD::IndirectAddr) {
+ IndirectArg = Op1;
+ AddArg = Op0;
+ }
+
+ if (isa<ConstantSDNode>(AddArg)) {
+ ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
+ SDValue IndOp1 = IndirectArg.getOperand(1);
+
+ if (CN0->isNullValue()) {
+ // (add (SPUindirect <arg>, <arg>), 0) ->
+ // (SPUindirect <arg>, <arg>)
+
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ cerr << "\n"
+ << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
+ << "With: (SPUindirect <arg>, <arg>)\n";
+ }
+#endif
+
+ return IndirectArg;
+ } else if (isa<ConstantSDNode>(IndOp1)) {
+ // (add (SPUindirect <arg>, <const>), <const>) ->
+ // (SPUindirect <arg>, <const + const>)
+ ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
+ int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
+ SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
+
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ cerr << "\n"
+ << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
+ << "), " << CN0->getSExtValue() << ")\n"
+ << "With: (SPUindirect <arg>, "
+ << combinedConst << ")\n";
+ }
+#endif
+
+ return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
+ IndirectArg, combinedValue);
+ }
+ }
+ }
+ break;
+ }
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND: {
+ if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
+ // (any_extend (SPUextract_elt0 <arg>)) ->
+ // (SPUextract_elt0 <arg>)
+ // Types must match, however...
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ cerr << "\nReplace: ";
+ N->dump(&DAG);
+ cerr << "\nWith: ";
+ Op0.getNode()->dump(&DAG);
+ cerr << "\n";
+ }
+#endif
+
+ return Op0;
+ }
+ break;
+ }
+ case SPUISD::IndirectAddr: {
+ if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (CN != 0 && CN->getZExtValue() == 0) {
+ // (SPUindirect (SPUaform <addr>, 0), 0) ->
+ // (SPUaform <addr>, 0)
+
+ DEBUG(cerr << "Replace: ");
+ DEBUG(N->dump(&DAG));
+ DEBUG(cerr << "\nWith: ");
+ DEBUG(Op0.getNode()->dump(&DAG));
+ DEBUG(cerr << "\n");
+
+ return Op0;
+ }
+ } else if (Op0.getOpcode() == ISD::ADD) {
+ SDValue Op1 = N->getOperand(1);
+ if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
+ // (SPUindirect (add <arg>, <arg>), 0) ->
+ // (SPUindirect <arg>, <arg>)
+ if (CN1->isNullValue()) {
+
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ cerr << "\n"
+ << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
+ << "With: (SPUindirect <arg>, <arg>)\n";
+ }
+#endif
+
+ return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
+ Op0.getOperand(0), Op0.getOperand(1));
+ }
+ }
+ }
+ break;
+ }
+ case SPUISD::SHLQUAD_L_BITS:
+ case SPUISD::SHLQUAD_L_BYTES:
+ case SPUISD::VEC_SHL:
+ case SPUISD::VEC_SRL:
+ case SPUISD::VEC_SRA:
+ case SPUISD::ROTBYTES_LEFT: {
+ SDValue Op1 = N->getOperand(1);
+
+ // Kill degenerate vector shifts:
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
+ if (CN->isNullValue()) {
+ Result = Op0;
+ }
+ }
+ break;
+ }
+ case SPUISD::PREFSLOT2VEC: {
+ switch (Op0.getOpcode()) {
+ default:
+ break;
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND: {
+ // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
+ // <arg>
+ // but only if the SPUprefslot2vec and <arg> types match.
+ SDValue Op00 = Op0.getOperand(0);
+ if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
+ SDValue Op000 = Op00.getOperand(0);
+ if (Op000.getValueType() == NodeVT) {
+ Result = Op000;
+ }
+ }
+ break;
+ }
+ case SPUISD::VEC2PREFSLOT: {
+ // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
+ // <arg>
+ Result = Op0.getOperand(0);
+ break;
+ }
+ }
+ break;
+ }
+ }
+
+ // Otherwise, return unchanged.
+#ifndef NDEBUG
+ if (Result.getNode()) {
+ DEBUG(cerr << "\nReplace.SPU: ");
+ DEBUG(N->dump(&DAG));
+ DEBUG(cerr << "\nWith: ");
+ DEBUG(Result.getNode()->dump(&DAG));
+ DEBUG(cerr << "\n");
+ }
+#endif
+
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+SPUTargetLowering::ConstraintType
+SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
+ if (ConstraintLetter.size() == 1) {
+ switch (ConstraintLetter[0]) {
+ default: break;
+ case 'b':
+ case 'r':
+ case 'f':
+ case 'v':
+ case 'y':
+ return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(ConstraintLetter);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const
+{
+ if (Constraint.size() == 1) {
+ // GCC RS6000 Constraint Letters
+ switch (Constraint[0]) {
+ case 'b': // R1-R31
+ case 'r': // R0-R31
+ if (VT == MVT::i64)
+ return std::make_pair(0U, SPU::R64CRegisterClass);
+ return std::make_pair(0U, SPU::R32CRegisterClass);
+ case 'f':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, SPU::R32FPRegisterClass);
+ else if (VT == MVT::f64)
+ return std::make_pair(0U, SPU::R64FPRegisterClass);
+ break;
+ case 'v':
+ return std::make_pair(0U, SPU::GPRCRegisterClass);
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+//! Compute used/known bits for a SPU operand
+void
+SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth ) const {
+#if 0
+ const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
+
+ switch (Op.getOpcode()) {
+ default:
+ // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ break;
+ case CALL:
+ case SHUFB:
+ case SHUFFLE_MASK:
+ case CNTB:
+ case SPUISD::PREFSLOT2VEC:
+ case SPUISD::LDRESULT:
+ case SPUISD::VEC2PREFSLOT:
+ case SPUISD::SHLQUAD_L_BITS:
+ case SPUISD::SHLQUAD_L_BYTES:
+ case SPUISD::VEC_SHL:
+ case SPUISD::VEC_SRL:
+ case SPUISD::VEC_SRA:
+ case SPUISD::VEC_ROTL:
+ case SPUISD::VEC_ROTR:
+ case SPUISD::ROTBYTES_LEFT:
+ case SPUISD::SELECT_MASK:
+ case SPUISD::SELB:
+ }
+#endif
+}
+
+unsigned
+SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const {
+ switch (Op.getOpcode()) {
+ default:
+ return 1;
+
+ case ISD::SETCC: {
+ MVT VT = Op.getValueType();
+
+ if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
+ VT = MVT::i32;
+ }
+ return VT.getSizeInBits();
+ }
+ }
+}
+
+// LowerAsmOperandForConstraint
+void
+SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ char ConstraintLetter,
+ bool hasMemory,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+ // Default, for the time being, to the base class handler
+ TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
+ Ops, DAG);
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode.
+bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
+ const Type *Ty) const {
+ // SPU's addresses are 256K:
+ return (V > -(1 << 18) && V < (1 << 18) - 1);
+}
+
+bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
+ return false;
+}
+
+bool
+SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The SPU target isn't yet aware of offsets.
+ return false;
+}
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
new file mode 100644
index 0000000..866c632
--- /dev/null
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -0,0 +1,154 @@
+//===-- SPUISelLowering.h - Cell SPU DAG Lowering Interface -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Cell SPU uses to lower LLVM code into
+// a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_ISELLOWERING_H
+#define SPU_ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "SPU.h"
+
+namespace llvm {
+ namespace SPUISD {
+ enum NodeType {
+ // Start the numbering where the builting ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // Pseudo instructions:
+ RET_FLAG, ///< Return with flag, matched by bi instruction
+
+ Hi, ///< High address component (upper 16)
+ Lo, ///< Low address component (lower 16)
+ PCRelAddr, ///< Program counter relative address
+ AFormAddr, ///< A-form address (local store)
+ IndirectAddr, ///< D-Form "imm($r)" and X-form "$r($r)"
+
+ LDRESULT, ///< Load result (value, chain)
+ CALL, ///< CALL instruction
+ SHUFB, ///< Vector shuffle (permute)
+ SHUFFLE_MASK, ///< Shuffle mask
+ CNTB, ///< Count leading ones in bytes
+ PREFSLOT2VEC, ///< Promote scalar->vector
+ VEC2PREFSLOT, ///< Extract element 0
+ SHLQUAD_L_BITS, ///< Rotate quad left, by bits
+ SHLQUAD_L_BYTES, ///< Rotate quad left, by bytes
+ VEC_SHL, ///< Vector shift left
+ VEC_SRL, ///< Vector shift right (logical)
+ VEC_SRA, ///< Vector shift right (arithmetic)
+ VEC_ROTL, ///< Vector rotate left
+ VEC_ROTR, ///< Vector rotate right
+ ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI)
+ ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count
+ SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
+ SELB, ///< Select bits -> (b & mask) | (a & ~mask)
+ // Markers: These aren't used to generate target-dependent nodes, but
+ // are used during instruction selection.
+ ADD64_MARKER, ///< i64 addition marker
+ SUB64_MARKER, ///< i64 subtraction marker
+ MUL64_MARKER, ///< i64 multiply marker
+ LAST_SPUISD ///< Last user-defined instruction
+ };
+ }
+
+ //! Utility functions specific to CellSPU:
+ namespace SPU {
+ SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
+ MVT ValueType);
+ SDValue get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
+ MVT ValueType);
+ SDValue get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
+ MVT ValueType);
+ SDValue get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
+ MVT ValueType);
+ SDValue get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
+ MVT ValueType);
+ SDValue get_v4i32_imm(SDNode *N, SelectionDAG &DAG);
+ SDValue get_v2i64_imm(SDNode *N, SelectionDAG &DAG);
+
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
+ const SPUTargetMachine &TM);
+ //! Simplify a MVT::v2i64 constant splat to CellSPU-ready form
+ SDValue LowerV2I64Splat(MVT OpVT, SelectionDAG &DAG, uint64_t splat,
+ DebugLoc dl);
+ }
+
+ class SPUTargetMachine; // forward dec'l.
+
+ class SPUTargetLowering :
+ public TargetLowering
+ {
+ int VarArgsFrameIndex; // FrameIndex for start of varargs area.
+ int ReturnAddrIndex; // FrameIndex for return slot.
+ SPUTargetMachine &SPUTM;
+
+ public:
+ //! The venerable constructor
+ /*!
+ This is where the CellSPU backend sets operation handling (i.e., legal,
+ custom, expand or promote.)
+ */
+ SPUTargetLowering(SPUTargetMachine &TM);
+
+ //! Get the target machine
+ SPUTargetMachine &getSPUTargetMachine() {
+ return SPUTM;
+ }
+
+ /// getTargetNodeName() - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType - Return the ValueType for ISD::SETCC
+ virtual MVT getSetCCResultType(MVT VT) const;
+
+ //! Custom lowering hooks
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+ //! Custom lowering hook for nodes with illegal result types.
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG);
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth = 0) const;
+
+ ConstraintType getConstraintType(const std::string &ConstraintLetter) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const;
+
+ void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter,
+ bool hasMemory,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ /// isLegalAddressImmediate - Return true if the integer value can be used
+ /// as the offset of the target addressing mode.
+ virtual bool isLegalAddressImmediate(int64_t V, const Type *Ty) const;
+ virtual bool isLegalAddressImmediate(GlobalValue *) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+ };
+}
+
+#endif
diff --git a/lib/Target/CellSPU/SPUInstrBuilder.h b/lib/Target/CellSPU/SPUInstrBuilder.h
new file mode 100644
index 0000000..5e268f8
--- /dev/null
+++ b/lib/Target/CellSPU/SPUInstrBuilder.h
@@ -0,0 +1,43 @@
+//==-- SPUInstrBuilder.h - Aides for building Cell SPU insts -----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to simplify generating frame and constant pool
+// references.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate
+// Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_INSTRBUILDER_H
+#define SPU_INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+inline const MachineInstrBuilder&
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
+ bool mem = true) {
+ if (mem)
+ return MIB.addImm(Offset).addFrameIndex(FI);
+ else
+ return MIB.addFrameIndex(FI).addImm(Offset);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/CellSPU/SPUInstrFormats.td b/lib/Target/CellSPU/SPUInstrFormats.td
new file mode 100644
index 0000000..21bc275
--- /dev/null
+++ b/lib/Target/CellSPU/SPUInstrFormats.td
@@ -0,0 +1,298 @@
+//==== SPUInstrFormats.td - Cell SPU Instruction Formats ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// Cell SPU instruction formats. Note that these are notationally similar to
+// PowerPC, like "A-Form". But the sizes of operands and fields differ.
+
+// This was kiped from the PPC instruction formats (seemed like a good idea...)
+
+class SPUInstr<dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : Instruction {
+ field bits<32> Inst;
+
+ let Namespace = "SPU";
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Itinerary = itin;
+}
+
+// RR Format
+class RRForm<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin> {
+ bits<7> RA;
+ bits<7> RB;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-10} = opcode;
+ let Inst{11-17} = RB;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+}
+
+let RB = 0 in {
+ // RR Format, where RB is zeroed (dont care):
+ class RRForm_1<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+
+ let RA = 0 in {
+ // RR Format, where RA and RB are zeroed (dont care):
+ // Used for reads from status control registers (see FPSCRRr32)
+ class RRForm_2<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+ }
+}
+
+let RT = 0 in {
+ // RR Format, where RT is zeroed (don't care), or as the instruction handbook
+ // says, "RT is a false target." Used in "Halt if" instructions
+ class RRForm_3<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+}
+
+// RRR Format
+class RRRForm<bits<4> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<7> RA;
+ bits<7> RB;
+ bits<7> RC;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-3} = opcode;
+ let Inst{4-10} = RT;
+ let Inst{11-17} = RB;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RC;
+}
+
+// RI7 Format
+class RI7Form<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<7> i7;
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-10} = opcode;
+ let Inst{11-17} = i7;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+}
+
+// CVTIntFp Format
+class CVTIntFPForm<bits<10> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-9} = opcode;
+ let Inst{10-17} = 0;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+}
+
+let RA = 0 in {
+ class BICondForm<bits<11> opcode, dag OOL, dag IOL, string asmstr, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
+ { }
+
+ let RT = 0 in {
+ // Branch instruction format (without D/E flag settings)
+ class BRForm<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+
+ class BIForm<bits<11> opcode, string asmstr, list<dag> pattern>
+ : RRForm<opcode, (outs), (ins R32C:$func), asmstr, BranchResolv,
+ pattern>
+ { }
+
+ let RB = 0 in {
+ // Return instruction (bi, branch indirect), RA is zero (LR):
+ class RETForm<string asmstr, list<dag> pattern>
+ : BRForm<0b00010101100, (outs), (ins), asmstr, BranchResolv,
+ pattern>
+ { }
+ }
+ }
+}
+
+// Branch indirect external data forms:
+class BISLEDForm<bits<2> DE_flag, string asmstr, list<dag> pattern>
+ : SPUInstr<(outs), (ins indcalltarget:$func), asmstr, BranchResolv>
+{
+ bits<7> Rcalldest;
+
+ let Pattern = pattern;
+
+ let Inst{0-10} = 0b11010101100;
+ let Inst{11} = 0;
+ let Inst{12-13} = DE_flag;
+ let Inst{14-17} = 0b0000;
+ let Inst{18-24} = Rcalldest;
+ let Inst{25-31} = 0b0000000;
+}
+
+// RI10 Format
+class RI10Form<bits<8> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<10> i10;
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-7} = opcode;
+ let Inst{8-17} = i10;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+}
+
+// RI10 Format, where the constant is zero (or effectively ignored by the
+// SPU)
+let i10 = 0 in {
+ class RI10Form_1<bits<8> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RI10Form<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+}
+
+// RI10 Format, where RT is ignored.
+// This format is used primarily by the Halt If ... Immediate set of
+// instructions
+let RT = 0 in {
+ class RI10Form_2<bits<8> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RI10Form<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+}
+
+// RI16 Format
+class RI16Form<bits<9> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<16> i16;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-8} = opcode;
+ let Inst{9-24} = i16;
+ let Inst{25-31} = RT;
+}
+
+// Specialized version of the RI16 Format for unconditional branch relative and
+// branch absolute, branch and set link. Note that for branch and set link, the
+// link register doesn't have to be $lr, but this is actually hard coded into
+// the instruction pattern.
+
+let RT = 0 in {
+ class UncondBranch<bits<9> opcode, dag OOL, dag IOL, string asmstr,
+ list<dag> pattern>
+ : RI16Form<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
+ { }
+
+ class BranchSetLink<bits<9> opcode, dag OOL, dag IOL, string asmstr,
+ list<dag> pattern>
+ : RI16Form<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
+ { }
+}
+
+//===----------------------------------------------------------------------===//
+// Specialized versions of RI16:
+//===----------------------------------------------------------------------===//
+
+// RI18 Format
+class RI18Form<bits<7> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<18> i18;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-6} = opcode;
+ let Inst{7-24} = i18;
+ let Inst{25-31} = RT;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction formats for intrinsics:
+//===----------------------------------------------------------------------===//
+
+// RI10 Format for v8i16 intrinsics
+class RI10_Int_v8i16<bits<8> opcode, string opc, InstrItinClass itin,
+ Intrinsic IntID> :
+ RI10Form<opcode, (outs VECREG:$rT), (ins s10imm:$val, VECREG:$rA),
+ !strconcat(opc, " $rT, $rA, $val"), itin,
+ [(set (v8i16 VECREG:$rT), (IntID (v8i16 VECREG:$rA),
+ i16ImmSExt10:$val))] >;
+
+class RI10_Int_v4i32<bits<8> opcode, string opc, InstrItinClass itin,
+ Intrinsic IntID> :
+ RI10Form<opcode, (outs VECREG:$rT), (ins s10imm:$val, VECREG:$rA),
+ !strconcat(opc, " $rT, $rA, $val"), itin,
+ [(set (v4i32 VECREG:$rT), (IntID (v4i32 VECREG:$rA),
+ i32ImmSExt10:$val))] >;
+
+// RR Format for v8i16 intrinsics
+class RR_Int_v8i16<bits<11> opcode, string opc, InstrItinClass itin,
+ Intrinsic IntID> :
+ RRForm<opcode, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ !strconcat(opc, " $rT, $rA, $rB"), itin,
+ [(set (v8i16 VECREG:$rT), (IntID (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))] >;
+
+// RR Format for v4i32 intrinsics
+class RR_Int_v4i32<bits<11> opcode, string opc, InstrItinClass itin,
+ Intrinsic IntID> :
+ RRForm<opcode, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ !strconcat(opc, " $rT, $rA, $rB"), itin,
+ [(set (v4i32 VECREG:$rT), (IntID (v4i32 VECREG:$rA),
+ (v4i32 VECREG:$rB)))] >;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions, like call frames:
+//===----------------------------------------------------------------------===//
+
+class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, NoItinerary> {
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Inst{31-0} = 0;
+}
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
new file mode 100644
index 0000000..4af995a
--- /dev/null
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -0,0 +1,693 @@
+//===- SPUInstrInfo.cpp - Cell SPU Instruction Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Cell SPU implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPURegisterNames.h"
+#include "SPUInstrInfo.h"
+#include "SPUInstrBuilder.h"
+#include "SPUTargetMachine.h"
+#include "SPUGenInstrInfo.inc"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+namespace {
+ //! Predicate for an unconditional branch instruction
+ inline bool isUncondBranch(const MachineInstr *I) {
+ unsigned opc = I->getOpcode();
+
+ return (opc == SPU::BR
+ || opc == SPU::BRA
+ || opc == SPU::BI);
+ }
+
+ //! Predicate for a conditional branch instruction
+ inline bool isCondBranch(const MachineInstr *I) {
+ unsigned opc = I->getOpcode();
+
+ return (opc == SPU::BRNZr32
+ || opc == SPU::BRNZv4i32
+ || opc == SPU::BRZr32
+ || opc == SPU::BRZv4i32
+ || opc == SPU::BRHNZr16
+ || opc == SPU::BRHNZv8i16
+ || opc == SPU::BRHZr16
+ || opc == SPU::BRHZv8i16);
+ }
+}
+
+SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm)
+ : TargetInstrInfoImpl(SPUInsts, sizeof(SPUInsts)/sizeof(SPUInsts[0])),
+ TM(tm),
+ RI(*TM.getSubtargetImpl(), *this)
+{ /* NOP */ }
+
+bool
+SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
+ unsigned& sourceReg,
+ unsigned& destReg,
+ unsigned& SrcSR, unsigned& DstSR) const {
+ SrcSR = DstSR = 0; // No sub-registers.
+
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case SPU::ORIv4i32:
+ case SPU::ORIr32:
+ case SPU::ORHIv8i16:
+ case SPU::ORHIr16:
+ case SPU::ORHIi8i16:
+ case SPU::ORBIv16i8:
+ case SPU::ORBIr8:
+ case SPU::ORIi16i32:
+ case SPU::ORIi8i32:
+ case SPU::AHIvec:
+ case SPU::AHIr16:
+ case SPU::AIv4i32:
+ assert(MI.getNumOperands() == 3 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ MI.getOperand(2).isImm() &&
+ "invalid SPU ORI/ORHI/ORBI/AHI/AI/SFI/SFHI instruction!");
+ if (MI.getOperand(2).getImm() == 0) {
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ break;
+ case SPU::AIr32:
+ assert(MI.getNumOperands() == 3 &&
+ "wrong number of operands to AIr32");
+ if (MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ (MI.getOperand(2).isImm() &&
+ MI.getOperand(2).getImm() == 0)) {
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ break;
+ case SPU::LRr8:
+ case SPU::LRr16:
+ case SPU::LRr32:
+ case SPU::LRf32:
+ case SPU::LRr64:
+ case SPU::LRf64:
+ case SPU::LRr128:
+ case SPU::LRv16i8:
+ case SPU::LRv8i16:
+ case SPU::LRv4i32:
+ case SPU::LRv4f32:
+ case SPU::LRv2i64:
+ case SPU::LRv2f64:
+ case SPU::ORv16i8_i8:
+ case SPU::ORv8i16_i16:
+ case SPU::ORv4i32_i32:
+ case SPU::ORv2i64_i64:
+ case SPU::ORv4f32_f32:
+ case SPU::ORv2f64_f64:
+ case SPU::ORi8_v16i8:
+ case SPU::ORi16_v8i16:
+ case SPU::ORi32_v4i32:
+ case SPU::ORi64_v2i64:
+ case SPU::ORf32_v4f32:
+ case SPU::ORf64_v2f64:
+/*
+ case SPU::ORi128_r64:
+ case SPU::ORi128_f64:
+ case SPU::ORi128_r32:
+ case SPU::ORi128_f32:
+ case SPU::ORi128_r16:
+ case SPU::ORi128_r8:
+*/
+ case SPU::ORi128_vec:
+/*
+ case SPU::ORr64_i128:
+ case SPU::ORf64_i128:
+ case SPU::ORr32_i128:
+ case SPU::ORf32_i128:
+ case SPU::ORr16_i128:
+ case SPU::ORr8_i128:
+*/
+ case SPU::ORvec_i128:
+/*
+ case SPU::ORr16_r32:
+ case SPU::ORr8_r32:
+ case SPU::ORf32_r32:
+ case SPU::ORr32_f32:
+ case SPU::ORr32_r16:
+ case SPU::ORr32_r8:
+ case SPU::ORr16_r64:
+ case SPU::ORr8_r64:
+ case SPU::ORr64_r16:
+ case SPU::ORr64_r8:
+*/
+ case SPU::ORr64_r32:
+ case SPU::ORr32_r64:
+ case SPU::ORf32_r32:
+ case SPU::ORr32_f32:
+ case SPU::ORf64_r64:
+ case SPU::ORr64_f64: {
+ assert(MI.getNumOperands() == 2 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ "invalid SPU OR<type>_<vec> or LR instruction!");
+ if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ break;
+ }
+ case SPU::ORv16i8:
+ case SPU::ORv8i16:
+ case SPU::ORv4i32:
+ case SPU::ORv2i64:
+ case SPU::ORr8:
+ case SPU::ORr16:
+ case SPU::ORr32:
+ case SPU::ORr64:
+ case SPU::ORr128:
+ case SPU::ORf32:
+ case SPU::ORf64:
+ assert(MI.getNumOperands() == 3 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ MI.getOperand(2).isReg() &&
+ "invalid SPU OR(vec|r32|r64|gprc) instruction!");
+ if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ break;
+ }
+
+ return false;
+}
+
+unsigned
+SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case SPU::LQDv16i8:
+ case SPU::LQDv8i16:
+ case SPU::LQDv4i32:
+ case SPU::LQDv4f32:
+ case SPU::LQDv2f64:
+ case SPU::LQDr128:
+ case SPU::LQDr64:
+ case SPU::LQDr32:
+ case SPU::LQDr16: {
+ const MachineOperand MOp1 = MI->getOperand(1);
+ const MachineOperand MOp2 = MI->getOperand(2);
+ if (MOp1.isImm() && MOp2.isFI()) {
+ FrameIndex = MOp2.getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ }
+ return 0;
+}
+
+unsigned
+SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case SPU::STQDv16i8:
+ case SPU::STQDv8i16:
+ case SPU::STQDv4i32:
+ case SPU::STQDv4f32:
+ case SPU::STQDv2f64:
+ case SPU::STQDr128:
+ case SPU::STQDr64:
+ case SPU::STQDr32:
+ case SPU::STQDr16:
+ case SPU::STQDr8: {
+ const MachineOperand MOp1 = MI->getOperand(1);
+ const MachineOperand MOp2 = MI->getOperand(2);
+ if (MOp1.isImm() && MOp2.isFI()) {
+ FrameIndex = MOp2.getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ }
+ return 0;
+}
+
+bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const
+{
+ // We support cross register class moves for our aliases, such as R3 in any
+ // reg class to any other reg class containing R3. This is required because
+ // we instruction select bitconvert i64 -> f64 as a noop for example, so our
+ // types have no specific meaning.
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if (DestRC == SPU::R8CRegisterClass) {
+ BuildMI(MBB, MI, DL, get(SPU::LRr8), DestReg).addReg(SrcReg);
+ } else if (DestRC == SPU::R16CRegisterClass) {
+ BuildMI(MBB, MI, DL, get(SPU::LRr16), DestReg).addReg(SrcReg);
+ } else if (DestRC == SPU::R32CRegisterClass) {
+ BuildMI(MBB, MI, DL, get(SPU::LRr32), DestReg).addReg(SrcReg);
+ } else if (DestRC == SPU::R32FPRegisterClass) {
+ BuildMI(MBB, MI, DL, get(SPU::LRf32), DestReg).addReg(SrcReg);
+ } else if (DestRC == SPU::R64CRegisterClass) {
+ BuildMI(MBB, MI, DL, get(SPU::LRr64), DestReg).addReg(SrcReg);
+ } else if (DestRC == SPU::R64FPRegisterClass) {
+ BuildMI(MBB, MI, DL, get(SPU::LRf64), DestReg).addReg(SrcReg);
+ } else if (DestRC == SPU::GPRCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(SPU::LRr128), DestReg).addReg(SrcReg);
+ } else if (DestRC == SPU::VECREGRegisterClass) {
+ BuildMI(MBB, MI, DL, get(SPU::LRv16i8), DestReg).addReg(SrcReg);
+ } else {
+ // Attempt to copy unknown/unsupported register class!
+ return false;
+ }
+
+ return true;
+}
+
+void
+SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC) const
+{
+ unsigned opc;
+ bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset());
+ if (RC == SPU::GPRCRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr128 : SPU::STQXr128);
+ } else if (RC == SPU::R64CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64);
+ } else if (RC == SPU::R64FPRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64);
+ } else if (RC == SPU::R32CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32);
+ } else if (RC == SPU::R32FPRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32);
+ } else if (RC == SPU::R16CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr16 : SPU::STQXr16);
+ } else if (RC == SPU::R8CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr8 : SPU::STQXr8);
+ } else if (RC == SPU::VECREGRegisterClass) {
+ opc = (isValidFrameIdx) ? SPU::STQDv16i8 : SPU::STQXv16i8;
+ } else {
+ assert(0 && "Unknown regclass!");
+ abort();
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ addFrameReference(BuildMI(MBB, MI, DL, get(opc))
+ .addReg(SrcReg, getKillRegState(isKill)), FrameIdx);
+}
+
+void SPUInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+ bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ cerr << "storeRegToAddr() invoked!\n";
+ abort();
+
+ if (Addr[0].isFI()) {
+ /* do what storeRegToStackSlot does here */
+ } else {
+ unsigned Opc = 0;
+ if (RC == SPU::GPRCRegisterClass) {
+ /* Opc = PPC::STW; */
+ } else if (RC == SPU::R16CRegisterClass) {
+ /* Opc = PPC::STD; */
+ } else if (RC == SPU::R32CRegisterClass) {
+ /* Opc = PPC::STFD; */
+ } else if (RC == SPU::R32FPRegisterClass) {
+ /* Opc = PPC::STFD; */
+ } else if (RC == SPU::R64FPRegisterClass) {
+ /* Opc = PPC::STFS; */
+ } else if (RC == SPU::VECREGRegisterClass) {
+ /* Opc = PPC::STVX; */
+ } else {
+ assert(0 && "Unknown regclass!");
+ abort();
+ }
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc))
+ .addReg(SrcReg, getKillRegState(isKill));
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+ }
+}
+
+void
+SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC) const
+{
+ unsigned opc;
+ bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset());
+ if (RC == SPU::GPRCRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr128 : SPU::LQXr128);
+ } else if (RC == SPU::R64CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64);
+ } else if (RC == SPU::R64FPRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64);
+ } else if (RC == SPU::R32CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32);
+ } else if (RC == SPU::R32FPRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32);
+ } else if (RC == SPU::R16CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr16 : SPU::LQXr16);
+ } else if (RC == SPU::R8CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr8 : SPU::LQXr8);
+ } else if (RC == SPU::VECREGRegisterClass) {
+ opc = (isValidFrameIdx) ? SPU::LQDv16i8 : SPU::LQXv16i8;
+ } else {
+ assert(0 && "Unknown regclass in loadRegFromStackSlot!");
+ abort();
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ addFrameReference(BuildMI(MBB, MI, DL, get(opc), DestReg), FrameIdx);
+}
+
+/*!
+ \note We are really pessimistic here about what kind of a load we're doing.
+ */
+void SPUInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs)
+ const {
+ cerr << "loadRegToAddr() invoked!\n";
+ abort();
+
+ if (Addr[0].isFI()) {
+ /* do what loadRegFromStackSlot does here... */
+ } else {
+ unsigned Opc = 0;
+ if (RC == SPU::R8CRegisterClass) {
+ /* do brilliance here */
+ } else if (RC == SPU::R16CRegisterClass) {
+ /* Opc = PPC::LWZ; */
+ } else if (RC == SPU::R32CRegisterClass) {
+ /* Opc = PPC::LD; */
+ } else if (RC == SPU::R32FPRegisterClass) {
+ /* Opc = PPC::LFD; */
+ } else if (RC == SPU::R64FPRegisterClass) {
+ /* Opc = PPC::LFS; */
+ } else if (RC == SPU::VECREGRegisterClass) {
+ /* Opc = PPC::LVX; */
+ } else if (RC == SPU::GPRCRegisterClass) {
+ /* Opc = something else! */
+ } else {
+ assert(0 && "Unknown regclass!");
+ abort();
+ }
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+ }
+}
+
+//! Return true if the specified load or store can be folded
+bool
+SPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const {
+ if (Ops.size() != 1) return false;
+
+ // Make sure this is a reg-reg copy.
+ unsigned Opc = MI->getOpcode();
+
+ switch (Opc) {
+ case SPU::ORv16i8:
+ case SPU::ORv8i16:
+ case SPU::ORv4i32:
+ case SPU::ORv2i64:
+ case SPU::ORr8:
+ case SPU::ORr16:
+ case SPU::ORr32:
+ case SPU::ORr64:
+ case SPU::ORf32:
+ case SPU::ORf64:
+ if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg())
+ return true;
+ break;
+ }
+
+ return false;
+}
+
+/// foldMemoryOperand - SPU, like PPC, can only fold spills into
+/// copy instructions, turning them into load/store instructions.
+MachineInstr *
+SPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const
+{
+ if (Ops.size() != 1) return 0;
+
+ unsigned OpNum = Ops[0];
+ unsigned Opc = MI->getOpcode();
+ MachineInstr *NewMI = 0;
+
+ switch (Opc) {
+ case SPU::ORv16i8:
+ case SPU::ORv8i16:
+ case SPU::ORv4i32:
+ case SPU::ORv2i64:
+ case SPU::ORr8:
+ case SPU::ORr16:
+ case SPU::ORr32:
+ case SPU::ORr64:
+ case SPU::ORf32:
+ case SPU::ORf64:
+ if (OpNum == 0) { // move -> store
+ unsigned InReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ if (FrameIndex < SPUFrameInfo::maxFrameOffset()) {
+ MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(),
+ get(SPU::STQDr32));
+
+ MIB.addReg(InReg, getKillRegState(isKill));
+ NewMI = addFrameReference(MIB, FrameIndex);
+ }
+ } else { // move -> load
+ unsigned OutReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc));
+
+ MIB.addReg(OutReg, RegState::Define | getDeadRegState(isDead));
+ Opc = (FrameIndex < SPUFrameInfo::maxFrameOffset())
+ ? SPU::STQDr32 : SPU::STQXr32;
+ NewMI = addFrameReference(MIB, FrameIndex);
+ break;
+ }
+ }
+
+ return NewMI;
+}
+
+//! Branch analysis
+/*!
+ \note This code was kiped from PPC. There may be more branch analysis for
+ CellSPU than what's currently done here.
+ */
+bool
+SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (isUncondBranch(LastInst)) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (isCondBranch(LastInst)) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ DEBUG(cerr << "Pushing LastInst: ");
+ DEBUG(LastInst->dump());
+ Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with a conditional and unconditional branch, handle it.
+ if (isCondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ DEBUG(cerr << "Pushing SecondLastInst: ");
+ DEBUG(SecondLastInst->dump());
+ Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it. The second
+ // one is not executed, so remove it.
+ if (isUncondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned
+SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ if (!isCondBranch(I) && !isUncondBranch(I))
+ return 0;
+
+ // Remove the first branch.
+ DEBUG(cerr << "Removing branch: ");
+ DEBUG(I->dump());
+ I->eraseFromParent();
+ I = MBB.end();
+ if (I == MBB.begin())
+ return 1;
+
+ --I;
+ if (!(isCondBranch(I) || isUncondBranch(I)))
+ return 1;
+
+ // Remove the second branch.
+ DEBUG(cerr << "Removing second branch: ");
+ DEBUG(I->dump());
+ I->eraseFromParent();
+ return 2;
+}
+
+unsigned
+SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const {
+ // FIXME this should probably have a DebugLoc argument
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "SPU branch conditions have two components!");
+
+ // One-way branch.
+ if (FBB == 0) {
+ if (Cond.empty()) {
+ // Unconditional branch
+ MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(SPU::BR));
+ MIB.addMBB(TBB);
+
+ DEBUG(cerr << "Inserted one-way uncond branch: ");
+ DEBUG((*MIB).dump());
+ } else {
+ // Conditional branch
+ MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(Cond[0].getImm()));
+ MIB.addReg(Cond[1].getReg()).addMBB(TBB);
+
+ DEBUG(cerr << "Inserted one-way cond branch: ");
+ DEBUG((*MIB).dump());
+ }
+ return 1;
+ } else {
+ MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(Cond[0].getImm()));
+ MachineInstrBuilder MIB2 = BuildMI(&MBB, dl, get(SPU::BR));
+
+ // Two-way Conditional Branch.
+ MIB.addReg(Cond[1].getReg()).addMBB(TBB);
+ MIB2.addMBB(FBB);
+
+ DEBUG(cerr << "Inserted conditional branch: ");
+ DEBUG((*MIB).dump());
+ DEBUG(cerr << "part 2: ");
+ DEBUG((*MIB2).dump());
+ return 2;
+ }
+}
+
+bool
+SPUInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
+ return (!MBB.empty() && isUncondBranch(&MBB.back()));
+}
+//! Reverses a branch's condition, returning false on success.
+bool
+SPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+ const {
+ // Pretty brainless way of inverting the condition, but it works, considering
+ // there are only two conditions...
+ static struct {
+ unsigned Opc; //! The incoming opcode
+ unsigned RevCondOpc; //! The reversed condition opcode
+ } revconds[] = {
+ { SPU::BRNZr32, SPU::BRZr32 },
+ { SPU::BRNZv4i32, SPU::BRZv4i32 },
+ { SPU::BRZr32, SPU::BRNZr32 },
+ { SPU::BRZv4i32, SPU::BRNZv4i32 },
+ { SPU::BRHNZr16, SPU::BRHZr16 },
+ { SPU::BRHNZv8i16, SPU::BRHZv8i16 },
+ { SPU::BRHZr16, SPU::BRHNZr16 },
+ { SPU::BRHZv8i16, SPU::BRHNZv8i16 }
+ };
+
+ unsigned Opc = unsigned(Cond[0].getImm());
+ // Pretty dull mapping between the two conditions that SPU can generate:
+ for (int i = sizeof(revconds)/sizeof(revconds[0]) - 1; i >= 0; --i) {
+ if (revconds[i].Opc == Opc) {
+ Cond[0].setImm(revconds[i].RevCondOpc);
+ return false;
+ }
+ }
+
+ return true;
+}
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
new file mode 100644
index 0000000..ffb4087
--- /dev/null
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -0,0 +1,114 @@
+//===- SPUInstrInfo.h - Cell SPU Instruction Information --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the CellSPU implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_INSTRUCTIONINFO_H
+#define SPU_INSTRUCTIONINFO_H
+
+#include "SPU.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "SPURegisterInfo.h"
+
+namespace llvm {
+ //! Cell SPU instruction information class
+ class SPUInstrInfo : public TargetInstrInfoImpl {
+ SPUTargetMachine &TM;
+ const SPURegisterInfo RI;
+ protected:
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+
+ public:
+ explicit SPUInstrInfo(SPUTargetMachine &tm);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const SPURegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// Return true if the instruction is a register to register move and return
+ /// the source and dest operands and their sub-register indices by reference.
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+ unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+
+ //! Store a register to a stack slot, based on its register class.
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ //! Store a register to an address, based on its register class
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ //! Load a register from a stack slot, based on its register class.
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ //! Loqad a register from an address, based on its register class
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ //! Return true if the specified load or store can be folded
+ virtual
+ bool canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const;
+
+ //! Return true if the specified block does not fall through
+ virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
+
+ //! Reverses a branch's condition, returning false on success.
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+ };
+}
+
+#endif
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
new file mode 100644
index 0000000..63eb85a
--- /dev/null
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -0,0 +1,4614 @@
+//==- SPUInstrInfo.td - Describe the Cell SPU Instructions -*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Cell SPU Instructions:
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// TODO Items (not urgent today, but would be nice, low priority)
+//
+// ANDBI, ORBI: SPU constructs a 4-byte constant for these instructions by
+// concatenating the byte argument b as "bbbb". Could recognize this bit pattern
+// in 16-bit and 32-bit constants and reduce instruction count.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions:
+//===----------------------------------------------------------------------===//
+
+let hasCtrlDep = 1, Defs = [R1], Uses = [R1] in {
+ def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm_i32:$amt),
+ "${:comment} ADJCALLSTACKDOWN",
+ [(callseq_start timm:$amt)]>;
+ def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm_i32:$amt),
+ "${:comment} ADJCALLSTACKUP",
+ [(callseq_end timm:$amt)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// DWARF debugging Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+def DWARF_LOC : Pseudo<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file),
+ ".loc $file, $line, $col",
+ [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>;
+
+//===----------------------------------------------------------------------===//
+// Loads:
+// NB: The ordering is actually important, since the instruction selection
+// will try each of the instructions in sequence, i.e., the D-form first with
+// the 10-bit displacement, then the A-form with the 16 bit displacement, and
+// finally the X-form with the register-register.
+//===----------------------------------------------------------------------===//
+
+let canFoldAsLoad = 1 in {
+ class LoadDFormVec<ValueType vectype>
+ : RI10Form<0b00101100, (outs VECREG:$rT), (ins dformaddr:$src),
+ "lqd\t$rT, $src",
+ LoadStore,
+ [(set (vectype VECREG:$rT), (load dform_addr:$src))]>
+ { }
+
+ class LoadDForm<RegisterClass rclass>
+ : RI10Form<0b00101100, (outs rclass:$rT), (ins dformaddr:$src),
+ "lqd\t$rT, $src",
+ LoadStore,
+ [(set rclass:$rT, (load dform_addr:$src))]>
+ { }
+
+ multiclass LoadDForms
+ {
+ def v16i8: LoadDFormVec<v16i8>;
+ def v8i16: LoadDFormVec<v8i16>;
+ def v4i32: LoadDFormVec<v4i32>;
+ def v2i64: LoadDFormVec<v2i64>;
+ def v4f32: LoadDFormVec<v4f32>;
+ def v2f64: LoadDFormVec<v2f64>;
+
+ def v2i32: LoadDFormVec<v2i32>;
+
+ def r128: LoadDForm<GPRC>;
+ def r64: LoadDForm<R64C>;
+ def r32: LoadDForm<R32C>;
+ def f32: LoadDForm<R32FP>;
+ def f64: LoadDForm<R64FP>;
+ def r16: LoadDForm<R16C>;
+ def r8: LoadDForm<R8C>;
+ }
+
+ class LoadAFormVec<ValueType vectype>
+ : RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src),
+ "lqa\t$rT, $src",
+ LoadStore,
+ [(set (vectype VECREG:$rT), (load aform_addr:$src))]>
+ { }
+
+ class LoadAForm<RegisterClass rclass>
+ : RI16Form<0b100001100, (outs rclass:$rT), (ins addr256k:$src),
+ "lqa\t$rT, $src",
+ LoadStore,
+ [(set rclass:$rT, (load aform_addr:$src))]>
+ { }
+
+ multiclass LoadAForms
+ {
+ def v16i8: LoadAFormVec<v16i8>;
+ def v8i16: LoadAFormVec<v8i16>;
+ def v4i32: LoadAFormVec<v4i32>;
+ def v2i64: LoadAFormVec<v2i64>;
+ def v4f32: LoadAFormVec<v4f32>;
+ def v2f64: LoadAFormVec<v2f64>;
+
+ def v2i32: LoadAFormVec<v2i32>;
+
+ def r128: LoadAForm<GPRC>;
+ def r64: LoadAForm<R64C>;
+ def r32: LoadAForm<R32C>;
+ def f32: LoadAForm<R32FP>;
+ def f64: LoadAForm<R64FP>;
+ def r16: LoadAForm<R16C>;
+ def r8: LoadAForm<R8C>;
+ }
+
+ class LoadXFormVec<ValueType vectype>
+ : RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src),
+ "lqx\t$rT, $src",
+ LoadStore,
+ [(set (vectype VECREG:$rT), (load xform_addr:$src))]>
+ { }
+
+ class LoadXForm<RegisterClass rclass>
+ : RRForm<0b00100011100, (outs rclass:$rT), (ins memrr:$src),
+ "lqx\t$rT, $src",
+ LoadStore,
+ [(set rclass:$rT, (load xform_addr:$src))]>
+ { }
+
+ multiclass LoadXForms
+ {
+ def v16i8: LoadXFormVec<v16i8>;
+ def v8i16: LoadXFormVec<v8i16>;
+ def v4i32: LoadXFormVec<v4i32>;
+ def v2i64: LoadXFormVec<v2i64>;
+ def v4f32: LoadXFormVec<v4f32>;
+ def v2f64: LoadXFormVec<v2f64>;
+
+ def v2i32: LoadXFormVec<v2i32>;
+
+ def r128: LoadXForm<GPRC>;
+ def r64: LoadXForm<R64C>;
+ def r32: LoadXForm<R32C>;
+ def f32: LoadXForm<R32FP>;
+ def f64: LoadXForm<R64FP>;
+ def r16: LoadXForm<R16C>;
+ def r8: LoadXForm<R8C>;
+ }
+
+ defm LQA : LoadAForms;
+ defm LQD : LoadDForms;
+ defm LQX : LoadXForms;
+
+/* Load quadword, PC relative: Not much use at this point in time.
+ Might be of use later for relocatable code. It's effectively the
+ same as LQA, but uses PC-relative addressing.
+ def LQR : RI16Form<0b111001100, (outs VECREG:$rT), (ins s16imm:$disp),
+ "lqr\t$rT, $disp", LoadStore,
+ [(set VECREG:$rT, (load iaddr:$disp))]>;
+ */
+}
+
+//===----------------------------------------------------------------------===//
+// Stores:
+//===----------------------------------------------------------------------===//
+class StoreDFormVec<ValueType vectype>
+ : RI10Form<0b00100100, (outs), (ins VECREG:$rT, dformaddr:$src),
+ "stqd\t$rT, $src",
+ LoadStore,
+ [(store (vectype VECREG:$rT), dform_addr:$src)]>
+{ }
+
+class StoreDForm<RegisterClass rclass>
+ : RI10Form<0b00100100, (outs), (ins rclass:$rT, dformaddr:$src),
+ "stqd\t$rT, $src",
+ LoadStore,
+ [(store rclass:$rT, dform_addr:$src)]>
+{ }
+
+multiclass StoreDForms
+{
+ def v16i8: StoreDFormVec<v16i8>;
+ def v8i16: StoreDFormVec<v8i16>;
+ def v4i32: StoreDFormVec<v4i32>;
+ def v2i64: StoreDFormVec<v2i64>;
+ def v4f32: StoreDFormVec<v4f32>;
+ def v2f64: StoreDFormVec<v2f64>;
+
+ def v2i32: StoreDFormVec<v2i32>;
+
+ def r128: StoreDForm<GPRC>;
+ def r64: StoreDForm<R64C>;
+ def r32: StoreDForm<R32C>;
+ def f32: StoreDForm<R32FP>;
+ def f64: StoreDForm<R64FP>;
+ def r16: StoreDForm<R16C>;
+ def r8: StoreDForm<R8C>;
+}
+
+class StoreAFormVec<ValueType vectype>
+ : RI16Form<0b0010010, (outs), (ins VECREG:$rT, addr256k:$src),
+ "stqa\t$rT, $src",
+ LoadStore,
+ [(store (vectype VECREG:$rT), aform_addr:$src)]>;
+
+class StoreAForm<RegisterClass rclass>
+ : RI16Form<0b001001, (outs), (ins rclass:$rT, addr256k:$src),
+ "stqa\t$rT, $src",
+ LoadStore,
+ [(store rclass:$rT, aform_addr:$src)]>;
+
+multiclass StoreAForms
+{
+ def v16i8: StoreAFormVec<v16i8>;
+ def v8i16: StoreAFormVec<v8i16>;
+ def v4i32: StoreAFormVec<v4i32>;
+ def v2i64: StoreAFormVec<v2i64>;
+ def v4f32: StoreAFormVec<v4f32>;
+ def v2f64: StoreAFormVec<v2f64>;
+
+ def v2i32: StoreAFormVec<v2i32>;
+
+ def r128: StoreAForm<GPRC>;
+ def r64: StoreAForm<R64C>;
+ def r32: StoreAForm<R32C>;
+ def f32: StoreAForm<R32FP>;
+ def f64: StoreAForm<R64FP>;
+ def r16: StoreAForm<R16C>;
+ def r8: StoreAForm<R8C>;
+}
+
+class StoreXFormVec<ValueType vectype>
+ : RRForm<0b00100100, (outs), (ins VECREG:$rT, memrr:$src),
+ "stqx\t$rT, $src",
+ LoadStore,
+ [(store (vectype VECREG:$rT), xform_addr:$src)]>
+{ }
+
+class StoreXForm<RegisterClass rclass>
+ : RRForm<0b00100100, (outs), (ins rclass:$rT, memrr:$src),
+ "stqx\t$rT, $src",
+ LoadStore,
+ [(store rclass:$rT, xform_addr:$src)]>
+{ }
+
+multiclass StoreXForms
+{
+ def v16i8: StoreXFormVec<v16i8>;
+ def v8i16: StoreXFormVec<v8i16>;
+ def v4i32: StoreXFormVec<v4i32>;
+ def v2i64: StoreXFormVec<v2i64>;
+ def v4f32: StoreXFormVec<v4f32>;
+ def v2f64: StoreXFormVec<v2f64>;
+
+ def v2i32: StoreXFormVec<v2i32>;
+
+ def r128: StoreXForm<GPRC>;
+ def r64: StoreXForm<R64C>;
+ def r32: StoreXForm<R32C>;
+ def f32: StoreXForm<R32FP>;
+ def f64: StoreXForm<R64FP>;
+ def r16: StoreXForm<R16C>;
+ def r8: StoreXForm<R8C>;
+}
+
+defm STQD : StoreDForms;
+defm STQA : StoreAForms;
+defm STQX : StoreXForms;
+
+/* Store quadword, PC relative: Not much use at this point in time. Might
+ be useful for relocatable code.
+def STQR : RI16Form<0b111000100, (outs), (ins VECREG:$rT, s16imm:$disp),
+ "stqr\t$rT, $disp", LoadStore,
+ [(store VECREG:$rT, iaddr:$disp)]>;
+*/
+
+//===----------------------------------------------------------------------===//
+// Generate Controls for Insertion:
+//===----------------------------------------------------------------------===//
+
+def CBD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cbd\t$rT, $src", ShuffleOp,
+ [(set (v16i8 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CBX: RRForm<0b00101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cbx\t$rT, $src", ShuffleOp,
+ [(set (v16i8 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CHD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "chd\t$rT, $src", ShuffleOp,
+ [(set (v8i16 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CHX: RRForm<0b10101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "chx\t$rT, $src", ShuffleOp,
+ [(set (v8i16 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CWD: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cwd\t$rT, $src", ShuffleOp,
+ [(set (v4i32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CWX: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cwx\t$rT, $src", ShuffleOp,
+ [(set (v4i32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CWDf32: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cwd\t$rT, $src", ShuffleOp,
+ [(set (v4f32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CWXf32: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cwx\t$rT, $src", ShuffleOp,
+ [(set (v4f32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CDD: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cdd\t$rT, $src", ShuffleOp,
+ [(set (v2i64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CDX: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cdx\t$rT, $src", ShuffleOp,
+ [(set (v2i64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CDDf64: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cdd\t$rT, $src", ShuffleOp,
+ [(set (v2f64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CDXf64: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cdx\t$rT, $src", ShuffleOp,
+ [(set (v2f64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+//===----------------------------------------------------------------------===//
+// Constant formation:
+//===----------------------------------------------------------------------===//
+
+def ILHv8i16:
+ RI16Form<0b110000010, (outs VECREG:$rT), (ins s16imm:$val),
+ "ilh\t$rT, $val", ImmLoad,
+ [(set (v8i16 VECREG:$rT), (v8i16 v8i16SExt16Imm:$val))]>;
+
+def ILHr16:
+ RI16Form<0b110000010, (outs R16C:$rT), (ins s16imm:$val),
+ "ilh\t$rT, $val", ImmLoad,
+ [(set R16C:$rT, immSExt16:$val)]>;
+
+// Cell SPU doesn't have a native 8-bit immediate load, but ILH works ("with
+// the right constant")
+def ILHr8:
+ RI16Form<0b110000010, (outs R8C:$rT), (ins s16imm_i8:$val),
+ "ilh\t$rT, $val", ImmLoad,
+ [(set R8C:$rT, immSExt8:$val)]>;
+
+// IL does sign extension!
+
+class ILInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI16Form<0b100000010, OOL, IOL, "il\t$rT, $val",
+ ImmLoad, pattern>;
+
+class ILVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
+ ILInst<(outs VECREG:$rT), (ins immtype:$val),
+ [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
+
+class ILRegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
+ ILInst<(outs rclass:$rT), (ins immtype:$val),
+ [(set rclass:$rT, xform:$val)]>;
+
+multiclass ImmediateLoad
+{
+ def v2i64: ILVecInst<v2i64, s16imm_i64, v2i64SExt16Imm>;
+ def v4i32: ILVecInst<v4i32, s16imm_i32, v4i32SExt16Imm>;
+
+ // TODO: Need v2f64, v4f32
+
+ def r64: ILRegInst<R64C, s16imm_i64, immSExt16>;
+ def r32: ILRegInst<R32C, s16imm_i32, immSExt16>;
+ def f32: ILRegInst<R32FP, s16imm_f32, fpimmSExt16>;
+ def f64: ILRegInst<R64FP, s16imm_f64, fpimmSExt16>;
+}
+
+defm IL : ImmediateLoad;
+
+class ILHUInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI16Form<0b010000010, OOL, IOL, "ilhu\t$rT, $val",
+ ImmLoad, pattern>;
+
+class ILHUVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
+ ILHUInst<(outs VECREG:$rT), (ins immtype:$val),
+ [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
+
+class ILHURegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
+ ILHUInst<(outs rclass:$rT), (ins immtype:$val),
+ [(set rclass:$rT, xform:$val)]>;
+
+multiclass ImmLoadHalfwordUpper
+{
+ def v2i64: ILHUVecInst<v2i64, u16imm_i64, immILHUvec_i64>;
+ def v4i32: ILHUVecInst<v4i32, u16imm_i32, immILHUvec>;
+
+ def r64: ILHURegInst<R64C, u16imm_i64, hi16>;
+ def r32: ILHURegInst<R32C, u16imm_i32, hi16>;
+
+ // Loads the high portion of an address
+ def hi: ILHURegInst<R32C, symbolHi, hi16>;
+
+ // Used in custom lowering constant SFP loads:
+ def f32: ILHURegInst<R32FP, f16imm, hi16_f32>;
+}
+
+defm ILHU : ImmLoadHalfwordUpper;
+
+// Immediate load address (can also be used to load 18-bit unsigned constants,
+// see the zext 16->32 pattern)
+
+class ILAInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI18Form<0b1000010, OOL, IOL, "ila\t$rT, $val",
+ LoadNOP, pattern>;
+
+class ILAVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
+ ILAInst<(outs VECREG:$rT), (ins immtype:$val),
+ [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
+
+class ILARegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
+ ILAInst<(outs rclass:$rT), (ins immtype:$val),
+ [(set rclass:$rT, xform:$val)]>;
+
+multiclass ImmLoadAddress
+{
+ def v2i64: ILAVecInst<v2i64, u18imm, v2i64Uns18Imm>;
+ def v4i32: ILAVecInst<v4i32, u18imm, v4i32Uns18Imm>;
+
+ def r64: ILARegInst<R64C, u18imm_i64, imm18>;
+ def r32: ILARegInst<R32C, u18imm, imm18>;
+ def f32: ILARegInst<R32FP, f18imm, fpimm18>;
+ def f64: ILARegInst<R64FP, f18imm_f64, fpimm18>;
+
+ def hi: ILARegInst<R32C, symbolHi, imm18>;
+ def lo: ILARegInst<R32C, symbolLo, imm18>;
+
+ def lsa: ILAInst<(outs R32C:$rT), (ins symbolLSA:$val),
+ [/* no pattern */]>;
+}
+
+defm ILA : ImmLoadAddress;
+
+// Immediate OR, Halfword Lower: The "other" part of loading large constants
+// into 32-bit registers. See the anonymous pattern Pat<(i32 imm:$imm), ...>
+// Note that these are really two operand instructions, but they're encoded
+// as three operands with the first two arguments tied-to each other.
+
+class IOHLInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI16Form<0b100000110, OOL, IOL, "iohl\t$rT, $val",
+ ImmLoad, pattern>,
+ RegConstraint<"$rS = $rT">,
+ NoEncode<"$rS">;
+
+class IOHLVecInst<ValueType vectype, Operand immtype /* , PatLeaf xform */>:
+ IOHLInst<(outs VECREG:$rT), (ins VECREG:$rS, immtype:$val),
+ [/* no pattern */]>;
+
+class IOHLRegInst<RegisterClass rclass, Operand immtype /* , PatLeaf xform */>:
+ IOHLInst<(outs rclass:$rT), (ins rclass:$rS, immtype:$val),
+ [/* no pattern */]>;
+
+multiclass ImmOrHalfwordLower
+{
+ def v2i64: IOHLVecInst<v2i64, u16imm_i64>;
+ def v4i32: IOHLVecInst<v4i32, u16imm_i32>;
+
+ def r32: IOHLRegInst<R32C, i32imm>;
+ def f32: IOHLRegInst<R32FP, f32imm>;
+
+ def lo: IOHLRegInst<R32C, symbolLo>;
+}
+
+defm IOHL: ImmOrHalfwordLower;
+
+// Form select mask for bytes using immediate, used in conjunction with the
+// SELB instruction:
+
+class FSMBIVec<ValueType vectype>:
+ RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val),
+ "fsmbi\t$rT, $val",
+ SelectOp,
+ [(set (vectype VECREG:$rT), (SPUselmask (i16 immU16:$val)))]>;
+
+multiclass FormSelectMaskBytesImm
+{
+ def v16i8: FSMBIVec<v16i8>;
+ def v8i16: FSMBIVec<v8i16>;
+ def v4i32: FSMBIVec<v4i32>;
+ def v2i64: FSMBIVec<v2i64>;
+}
+
+defm FSMBI : FormSelectMaskBytesImm;
+
+// fsmb: Form select mask for bytes. N.B. Input operand, $rA, is 16-bits
+class FSMBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01101101100, OOL, IOL, "fsmb\t$rT, $rA", SelectOp,
+ pattern>;
+
+class FSMBRegInst<RegisterClass rclass, ValueType vectype>:
+ FSMBInst<(outs VECREG:$rT), (ins rclass:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMBVecInst<ValueType vectype>:
+ FSMBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT),
+ (SPUselmask (vectype VECREG:$rA)))]>;
+
+multiclass FormSelectMaskBits {
+ def v16i8_r16: FSMBRegInst<R16C, v16i8>;
+ def v16i8: FSMBVecInst<v16i8>;
+}
+
+defm FSMB: FormSelectMaskBits;
+
+// fsmh: Form select mask for halfwords. N.B., Input operand, $rA, is
+// only 8-bits wide (even though it's input as 16-bits here)
+
+class FSMHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b10101101100, OOL, IOL, "fsmh\t$rT, $rA", SelectOp,
+ pattern>;
+
+class FSMHRegInst<RegisterClass rclass, ValueType vectype>:
+ FSMHInst<(outs VECREG:$rT), (ins rclass:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMHVecInst<ValueType vectype>:
+ FSMHInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT),
+ (SPUselmask (vectype VECREG:$rA)))]>;
+
+multiclass FormSelectMaskHalfword {
+ def v8i16_r16: FSMHRegInst<R16C, v8i16>;
+ def v8i16: FSMHVecInst<v8i16>;
+}
+
+defm FSMH: FormSelectMaskHalfword;
+
+// fsm: Form select mask for words. Like the other fsm* instructions,
+// only the lower 4 bits of $rA are significant.
+
+class FSMInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b00101101100, OOL, IOL, "fsm\t$rT, $rA", SelectOp,
+ pattern>;
+
+class FSMRegInst<ValueType vectype, RegisterClass rclass>:
+ FSMInst<(outs VECREG:$rT), (ins rclass:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMVecInst<ValueType vectype>:
+ FSMInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask (vectype VECREG:$rA)))]>;
+
+multiclass FormSelectMaskWord {
+ def v4i32: FSMVecInst<v4i32>;
+
+ def r32 : FSMRegInst<v4i32, R32C>;
+ def r16 : FSMRegInst<v4i32, R16C>;
+}
+
+defm FSM : FormSelectMaskWord;
+
+// Special case when used for i64 math operations
+multiclass FormSelectMaskWord64 {
+ def r32 : FSMRegInst<v2i64, R32C>;
+ def r16 : FSMRegInst<v2i64, R16C>;
+}
+
+defm FSM64 : FormSelectMaskWord64;
+
+//===----------------------------------------------------------------------===//
+// Integer and Logical Operations:
+//===----------------------------------------------------------------------===//
+
+def AHv8i16:
+ RRForm<0b00010011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "ah\t$rT, $rA, $rB", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (int_spu_si_ah VECREG:$rA, VECREG:$rB))]>;
+
+def : Pat<(add (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+ (AHv8i16 VECREG:$rA, VECREG:$rB)>;
+
+def AHr16:
+ RRForm<0b00010011000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ "ah\t$rT, $rA, $rB", IntegerOp,
+ [(set R16C:$rT, (add R16C:$rA, R16C:$rB))]>;
+
+def AHIvec:
+ RI10Form<0b10111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "ahi\t$rT, $rA, $val", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (add (v8i16 VECREG:$rA),
+ v8i16SExt10Imm:$val))]>;
+
+def AHIr16:
+ RI10Form<0b10111000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ "ahi\t$rT, $rA, $val", IntegerOp,
+ [(set R16C:$rT, (add R16C:$rA, i16ImmSExt10:$val))]>;
+
+// v4i32, i32 add instruction:
+
+class AInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00000011000, OOL, IOL,
+ "a\t$rT, $rA, $rB", IntegerOp,
+ pattern>;
+
+class AVecInst<ValueType vectype>:
+ AInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA),
+ (vectype VECREG:$rB)))]>;
+
+class ARegInst<RegisterClass rclass>:
+ AInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (add rclass:$rA, rclass:$rB))]>;
+
+multiclass AddInstruction {
+ def v4i32: AVecInst<v4i32>;
+ def v16i8: AVecInst<v16i8>;
+
+ def r32: ARegInst<R32C>;
+}
+
+defm A : AddInstruction;
+
+class AIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b00111000, OOL, IOL,
+ "ai\t$rT, $rA, $val", IntegerOp,
+ pattern>;
+
+class AIVecInst<ValueType vectype, PatLeaf immpred>:
+ AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), immpred:$val))]>;
+
+class AIFPVecInst<ValueType vectype, PatLeaf immpred>:
+ AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [/* no pattern */]>;
+
+class AIRegInst<RegisterClass rclass, PatLeaf immpred>:
+ AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
+ [(set rclass:$rT, (add rclass:$rA, immpred:$val))]>;
+
+// This is used to add epsilons to floating point numbers in the f32 fdiv code:
+class AIFPInst<RegisterClass rclass, PatLeaf immpred>:
+ AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
+ [/* no pattern */]>;
+
+multiclass AddImmediate {
+ def v4i32: AIVecInst<v4i32, v4i32SExt10Imm>;
+
+ def r32: AIRegInst<R32C, i32ImmSExt10>;
+
+ def v4f32: AIFPVecInst<v4f32, v4i32SExt10Imm>;
+ def f32: AIFPInst<R32FP, i32ImmSExt10>;
+}
+
+defm AI : AddImmediate;
+
+def SFHvec:
+ RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "sfh\t$rT, $rA, $rB", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (sub (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def SFHr16:
+ RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ "sfh\t$rT, $rA, $rB", IntegerOp,
+ [(set R16C:$rT, (sub R16C:$rA, R16C:$rB))]>;
+
+def SFHIvec:
+ RI10Form<0b10110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "sfhi\t$rT, $rA, $val", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (sub v8i16SExt10Imm:$val,
+ (v8i16 VECREG:$rA)))]>;
+
+def SFHIr16 : RI10Form<0b10110000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ "sfhi\t$rT, $rA, $val", IntegerOp,
+ [(set R16C:$rT, (sub i16ImmSExt10:$val, R16C:$rA))]>;
+
+def SFvec : RRForm<0b00000010000, (outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB),
+ "sf\t$rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ "sf\t$rT, $rA, $rB", IntegerOp,
+ [(set R32C:$rT, (sub R32C:$rA, R32C:$rB))]>;
+
+def SFIvec:
+ RI10Form<0b00110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "sfi\t$rT, $rA, $val", IntegerOp,
+ [(set (v4i32 VECREG:$rT), (sub v4i32SExt10Imm:$val,
+ (v4i32 VECREG:$rA)))]>;
+
+def SFIr32 : RI10Form<0b00110000, (outs R32C:$rT),
+ (ins R32C:$rA, s10imm_i32:$val),
+ "sfi\t$rT, $rA, $val", IntegerOp,
+ [(set R32C:$rT, (sub i32ImmSExt10:$val, R32C:$rA))]>;
+
+// ADDX: only available in vector form, doesn't match a pattern.
+class ADDXInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00000010110, OOL, IOL,
+ "addx\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ADDXVecInst<ValueType vectype>:
+ ADDXInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
+ [/* no pattern */]>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+class ADDXRegInst<RegisterClass rclass>:
+ ADDXInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB, rclass:$rCarry),
+ [/* no pattern */]>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+multiclass AddExtended {
+ def v2i64 : ADDXVecInst<v2i64>;
+ def v4i32 : ADDXVecInst<v4i32>;
+ def r64 : ADDXRegInst<R64C>;
+ def r32 : ADDXRegInst<R32C>;
+}
+
+defm ADDX : AddExtended;
+
+// CG: Generate carry for add
+class CGInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01000011000, OOL, IOL,
+ "cg\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class CGVecInst<ValueType vectype>:
+ CGInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern */]>;
+
+class CGRegInst<RegisterClass rclass>:
+ CGInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB),
+ [/* no pattern */]>;
+
+multiclass CarryGenerate {
+ def v2i64 : CGVecInst<v2i64>;
+ def v4i32 : CGVecInst<v4i32>;
+ def r64 : CGRegInst<R64C>;
+ def r32 : CGRegInst<R32C>;
+}
+
+defm CG : CarryGenerate;
+
+// SFX: Subract from, extended. This is used in conjunction with BG to subtract
+// with carry (borrow, in this case)
+class SFXInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10000010110, OOL, IOL,
+ "sfx\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class SFXVecInst<ValueType vectype>:
+ SFXInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
+ [/* no pattern */]>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+class SFXRegInst<RegisterClass rclass>:
+ SFXInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB, rclass:$rCarry),
+ [/* no pattern */]>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+multiclass SubtractExtended {
+ def v2i64 : SFXVecInst<v2i64>;
+ def v4i32 : SFXVecInst<v4i32>;
+ def r64 : SFXRegInst<R64C>;
+ def r32 : SFXRegInst<R32C>;
+}
+
+defm SFX : SubtractExtended;
+
+// BG: only available in vector form, doesn't match a pattern.
+class BGInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01000010000, OOL, IOL,
+ "bg\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class BGVecInst<ValueType vectype>:
+ BGInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern */]>;
+
+class BGRegInst<RegisterClass rclass>:
+ BGInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB),
+ [/* no pattern */]>;
+
+multiclass BorrowGenerate {
+ def v4i32 : BGVecInst<v4i32>;
+ def v2i64 : BGVecInst<v2i64>;
+ def r64 : BGRegInst<R64C>;
+ def r32 : BGRegInst<R32C>;
+}
+
+defm BG : BorrowGenerate;
+
+// BGX: Borrow generate, extended.
+def BGXvec:
+ RRForm<0b11000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB,
+ VECREG:$rCarry),
+ "bgx\t$rT, $rA, $rB", IntegerOp,
+ []>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+// Halfword multiply variants:
+// N.B: These can be used to build up larger quantities (16x16 -> 32)
+
+def MPYv8i16:
+ RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpy\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYr16:
+ RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ "mpy\t$rT, $rA, $rB", IntegerMulDiv,
+ [(set R16C:$rT, (mul R16C:$rA, R16C:$rB))]>;
+
+// Unsigned 16-bit multiply:
+
+class MPYUInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00110011110, OOL, IOL,
+ "mpyu\t$rT, $rA, $rB", IntegerMulDiv,
+ pattern>;
+
+def MPYUv4i32:
+ MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern */]>;
+
+def MPYUr16:
+ MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R32C:$rT, (mul (zext R16C:$rA), (zext R16C:$rB)))]>;
+
+def MPYUr32:
+ MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+// mpyi: multiply 16 x s10imm -> 32 result.
+
+class MPYIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b00101110, OOL, IOL,
+ "mpyi\t$rT, $rA, $val", IntegerMulDiv,
+ pattern>;
+
+def MPYIvec:
+ MPYIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (mul (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
+
+def MPYIr16:
+ MPYIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ [(set R16C:$rT, (mul R16C:$rA, i16ImmSExt10:$val))]>;
+
+// mpyui: same issues as other multiplies, plus, this doesn't match a
+// pattern... but may be used during target DAG selection or lowering
+
+class MPYUIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b10101110, OOL, IOL,
+ "mpyui\t$rT, $rA, $val", IntegerMulDiv,
+ pattern>;
+
+def MPYUIvec:
+ MPYUIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ []>;
+
+def MPYUIr16:
+ MPYUIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ []>;
+
+// mpya: 16 x 16 + 16 -> 32 bit result
+class MPYAInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRRForm<0b0011, OOL, IOL,
+ "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
+ pattern>;
+
+def MPYAv4i32:
+ MPYAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (v4i32 VECREG:$rT),
+ (add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))),
+ (v4i32 VECREG:$rC)))]>;
+
+def MPYAr32:
+ MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
+ [(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)),
+ R32C:$rC))]>;
+
+def MPYAr32_sext:
+ MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
+ [(set R32C:$rT, (add (mul (sext R16C:$rA), (sext R16C:$rB)),
+ R32C:$rC))]>;
+
+def MPYAr32_sextinreg:
+ MPYAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB, R32C:$rC),
+ [(set R32C:$rT, (add (mul (sext_inreg R32C:$rA, i16),
+ (sext_inreg R32C:$rB, i16)),
+ R32C:$rC))]>;
+
+// mpyh: multiply high, used to synthesize 32-bit multiplies
+class MPYHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10100011110, OOL, IOL,
+ "mpyh\t$rT, $rA, $rB", IntegerMulDiv,
+ pattern>;
+
+def MPYHv4i32:
+ MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern */]>;
+
+def MPYHr32:
+ MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+// mpys: multiply high and shift right (returns the top half of
+// a 16-bit multiply, sign extended to 32 bits.)
+
+class MPYSInst<dag OOL, dag IOL>:
+ RRForm<0b11100011110, OOL, IOL,
+ "mpys\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYSv4i32:
+ MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYSr16:
+ MPYSInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB)>;
+
+// mpyhh: multiply high-high (returns the 32-bit result from multiplying
+// the top 16 bits of the $rA, $rB)
+
+class MPYHHInst<dag OOL, dag IOL>:
+ RRForm<0b01100011110, OOL, IOL,
+ "mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYHHv8i16:
+ MPYHHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYHHr32:
+ MPYHHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+// mpyhha: Multiply high-high, add to $rT:
+
+class MPYHHAInst<dag OOL, dag IOL>:
+ RRForm<0b01100010110, OOL, IOL,
+ "mpyhha\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYHHAvec:
+ MPYHHAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYHHAr32:
+ MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+// mpyhhu: Multiply high-high, unsigned, e.g.:
+//
+// +-------+-------+ +-------+-------+ +---------+
+// | a0 . a1 | x | b0 . b1 | = | a0 x b0 |
+// +-------+-------+ +-------+-------+ +---------+
+//
+// where a0, b0 are the upper 16 bits of the 32-bit word
+
+class MPYHHUInst<dag OOL, dag IOL>:
+ RRForm<0b01110011110, OOL, IOL,
+ "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYHHUv4i32:
+ MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYHHUr32:
+ MPYHHUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+// mpyhhau: Multiply high-high, unsigned
+
+class MPYHHAUInst<dag OOL, dag IOL>:
+ RRForm<0b01110010110, OOL, IOL,
+ "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYHHAUvec:
+ MPYHHAUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYHHAUr32:
+ MPYHHAUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// clz: Count leading zeroes
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+class CLZInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b10100101010, OOL, IOL, "clz\t$rT, $rA",
+ IntegerOp, pattern>;
+
+class CLZRegInst<RegisterClass rclass>:
+ CLZInst<(outs rclass:$rT), (ins rclass:$rA),
+ [(set rclass:$rT, (ctlz rclass:$rA))]>;
+
+class CLZVecInst<ValueType vectype>:
+ CLZInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT), (ctlz (vectype VECREG:$rA)))]>;
+
+multiclass CountLeadingZeroes {
+ def v4i32 : CLZVecInst<v4i32>;
+ def r32 : CLZRegInst<R32C>;
+}
+
+defm CLZ : CountLeadingZeroes;
+
+// cntb: Count ones in bytes (aka "population count")
+//
+// NOTE: This instruction is really a vector instruction, but the custom
+// lowering code uses it in unorthodox ways to support CTPOP for other
+// data types!
+
+def CNTBv16i8:
+ RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cntb\t$rT, $rA", IntegerOp,
+ [(set (v16i8 VECREG:$rT), (SPUcntb (v16i8 VECREG:$rA)))]>;
+
+def CNTBv8i16 :
+ RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cntb\t$rT, $rA", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (SPUcntb (v8i16 VECREG:$rA)))]>;
+
+def CNTBv4i32 :
+ RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cntb\t$rT, $rA", IntegerOp,
+ [(set (v4i32 VECREG:$rT), (SPUcntb (v4i32 VECREG:$rA)))]>;
+
+// gbb: Gather the low order bits from each byte in $rA into a single 16-bit
+// quantity stored into $rT's slot 0, upper 16 bits are zeroed, as are
+// slots 1-3.
+//
+// Note: This instruction "pairs" with the fsmb instruction for all of the
+// various types defined here.
+//
+// Note 2: The "VecInst" and "RegInst" forms refer to the result being either
+// a vector or register.
+
+class GBBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01001101100, OOL, IOL, "gbb\t$rT, $rA", GatherOp, pattern>;
+
+class GBBRegInst<RegisterClass rclass, ValueType vectype>:
+ GBBInst<(outs rclass:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+class GBBVecInst<ValueType vectype>:
+ GBBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+multiclass GatherBitsFromBytes {
+ def v16i8_r32: GBBRegInst<R32C, v16i8>;
+ def v16i8_r16: GBBRegInst<R16C, v16i8>;
+ def v16i8: GBBVecInst<v16i8>;
+}
+
+defm GBB: GatherBitsFromBytes;
+
+// gbh: Gather all low order bits from each halfword in $rA into a single
+// 8-bit quantity stored in $rT's slot 0, with the upper bits of $rT set to 0
+// and slots 1-3 also set to 0.
+//
+// See notes for GBBInst, above.
+
+class GBHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b10001101100, OOL, IOL, "gbh\t$rT, $rA", GatherOp,
+ pattern>;
+
+class GBHRegInst<RegisterClass rclass, ValueType vectype>:
+ GBHInst<(outs rclass:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+class GBHVecInst<ValueType vectype>:
+ GBHInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+multiclass GatherBitsHalfword {
+ def v8i16_r32: GBHRegInst<R32C, v8i16>;
+ def v8i16_r16: GBHRegInst<R16C, v8i16>;
+ def v8i16: GBHVecInst<v8i16>;
+}
+
+defm GBH: GatherBitsHalfword;
+
+// gb: Gather all low order bits from each word in $rA into a single
+// 4-bit quantity stored in $rT's slot 0, upper bits in $rT set to 0,
+// as well as slots 1-3.
+//
+// See notes for gbb, above.
+
+class GBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b00001101100, OOL, IOL, "gb\t$rT, $rA", GatherOp,
+ pattern>;
+
+class GBRegInst<RegisterClass rclass, ValueType vectype>:
+ GBInst<(outs rclass:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+class GBVecInst<ValueType vectype>:
+ GBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+multiclass GatherBitsWord {
+ def v4i32_r32: GBRegInst<R32C, v4i32>;
+ def v4i32_r16: GBRegInst<R16C, v4i32>;
+ def v4i32: GBVecInst<v4i32>;
+}
+
+defm GB: GatherBitsWord;
+
+// avgb: average bytes
+def AVGB:
+ RRForm<0b11001011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "avgb\t$rT, $rA, $rB", ByteOp,
+ []>;
+
+// absdb: absolute difference of bytes
+def ABSDB:
+ RRForm<0b11001010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "absdb\t$rT, $rA, $rB", ByteOp,
+ []>;
+
+// sumb: sum bytes into halfwords
+def SUMB:
+ RRForm<0b11001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "sumb\t$rT, $rA, $rB", ByteOp,
+ []>;
+
+// Sign extension operations:
+class XSBHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01101101010, OOL, IOL,
+ "xsbh\t$rDst, $rSrc",
+ IntegerOp, pattern>;
+
+class XSBHVecInst<ValueType vectype>:
+ XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
+ [(set (v8i16 VECREG:$rDst), (sext (vectype VECREG:$rSrc)))]>;
+
+class XSBHInRegInst<RegisterClass rclass, list<dag> pattern>:
+ XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc),
+ pattern>;
+
+multiclass ExtendByteHalfword {
+ def v16i8: XSBHVecInst<v8i16>;
+ def r8: XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc),
+ [(set R16C:$rDst, (sext R8C:$rSrc))]>;
+ def r16: XSBHInRegInst<R16C,
+ [(set R16C:$rDst, (sext_inreg R16C:$rSrc, i8))]>;
+
+ // 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit
+ // quantities to 32-bit quantities via a 32-bit register (see the sext 8->32
+ // pattern below). Intentionally doesn't match a pattern because we want the
+ // sext 8->32 pattern to do the work for us, namely because we need the extra
+ // XSHWr32.
+ def r32: XSBHInRegInst<R32C, [/* no pattern */]>;
+
+ // Same as the 32-bit version, but for i64
+ def r64: XSBHInRegInst<R64C, [/* no pattern */]>;
+}
+
+defm XSBH : ExtendByteHalfword;
+
+// Sign extend halfwords to words:
+
+class XSHWInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01101101010, OOL, IOL, "xshw\t$rDest, $rSrc",
+ IntegerOp, pattern>;
+
+class XSHWVecInst<ValueType in_vectype, ValueType out_vectype>:
+ XSHWInst<(outs VECREG:$rDest), (ins VECREG:$rSrc),
+ [(set (out_vectype VECREG:$rDest),
+ (sext (in_vectype VECREG:$rSrc)))]>;
+
+class XSHWInRegInst<RegisterClass rclass, list<dag> pattern>:
+ XSHWInst<(outs rclass:$rDest), (ins rclass:$rSrc),
+ pattern>;
+
+class XSHWRegInst<RegisterClass rclass>:
+ XSHWInst<(outs rclass:$rDest), (ins R16C:$rSrc),
+ [(set rclass:$rDest, (sext R16C:$rSrc))]>;
+
+multiclass ExtendHalfwordWord {
+ def v4i32: XSHWVecInst<v4i32, v8i16>;
+
+ def r16: XSHWRegInst<R32C>;
+
+ def r32: XSHWInRegInst<R32C,
+ [(set R32C:$rDest, (sext_inreg R32C:$rSrc, i16))]>;
+ def r64: XSHWInRegInst<R64C, [/* no pattern */]>;
+}
+
+defm XSHW : ExtendHalfwordWord;
+
+// Sign-extend words to doublewords (32->64 bits)
+
+class XSWDInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01100101010, OOL, IOL, "xswd\t$rDst, $rSrc",
+ IntegerOp, pattern>;
+
+class XSWDVecInst<ValueType in_vectype, ValueType out_vectype>:
+ XSWDInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
+ [(set (out_vectype VECREG:$rDst),
+ (sext (out_vectype VECREG:$rSrc)))]>;
+
+class XSWDRegInst<RegisterClass in_rclass, RegisterClass out_rclass>:
+ XSWDInst<(outs out_rclass:$rDst), (ins in_rclass:$rSrc),
+ [(set out_rclass:$rDst, (sext in_rclass:$rSrc))]>;
+
+multiclass ExtendWordToDoubleWord {
+ def v2i64: XSWDVecInst<v4i32, v2i64>;
+ def r64: XSWDRegInst<R32C, R64C>;
+
+ def r64_inreg: XSWDInst<(outs R64C:$rDst), (ins R64C:$rSrc),
+ [(set R64C:$rDst, (sext_inreg R64C:$rSrc, i32))]>;
+}
+
+defm XSWD : ExtendWordToDoubleWord;
+
+// AND operations
+
+class ANDInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b10000011000, OOL, IOL, "and\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ANDVecInst<ValueType vectype>:
+ ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (and (vectype VECREG:$rA),
+ (vectype VECREG:$rB)))]>;
+
+class ANDRegInst<RegisterClass rclass>:
+ ANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (and rclass:$rA, rclass:$rB))]>;
+
+multiclass BitwiseAnd
+{
+ def v16i8: ANDVecInst<v16i8>;
+ def v8i16: ANDVecInst<v8i16>;
+ def v4i32: ANDVecInst<v4i32>;
+ def v2i64: ANDVecInst<v2i64>;
+
+ def r128: ANDRegInst<GPRC>;
+ def r64: ANDRegInst<R64C>;
+ def r32: ANDRegInst<R32C>;
+ def r16: ANDRegInst<R16C>;
+ def r8: ANDRegInst<R8C>;
+
+ //===---------------------------------------------
+ // Special instructions to perform the fabs instruction
+ def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
+ [/* Intentionally does not match a pattern */]>;
+
+ def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
+ [/* Intentionally does not match a pattern */]>;
+
+ def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* Intentionally does not match a pattern */]>;
+
+ //===---------------------------------------------
+
+ // Hacked form of AND to zero-extend 16-bit quantities to 32-bit
+ // quantities -- see 16->32 zext pattern.
+ //
+ // This pattern is somewhat artificial, since it might match some
+ // compiler generated pattern but it is unlikely to do so.
+
+ def i16i32: ANDInst<(outs R32C:$rT), (ins R16C:$rA, R32C:$rB),
+ [(set R32C:$rT, (and (zext R16C:$rA), R32C:$rB))]>;
+}
+
+defm AND : BitwiseAnd;
+
+// N.B.: vnot_conv is one of those special target selection pattern fragments,
+// in which we expect there to be a bit_convert on the constant. Bear in mind
+// that llvm translates "not <reg>" to "xor <reg>, -1" (or in this case, a
+// constant -1 vector.)
+
+class ANDCInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10000011010, OOL, IOL, "andc\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ANDCVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
+ ANDCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (and (vectype VECREG:$rA),
+ (vnot_frag (vectype VECREG:$rB))))]>;
+
+class ANDCRegInst<RegisterClass rclass>:
+ ANDCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (and rclass:$rA, (not rclass:$rB)))]>;
+
+multiclass AndComplement
+{
+ def v16i8: ANDCVecInst<v16i8>;
+ def v8i16: ANDCVecInst<v8i16>;
+ def v4i32: ANDCVecInst<v4i32>;
+ def v2i64: ANDCVecInst<v2i64>;
+
+ def r128: ANDCRegInst<GPRC>;
+ def r64: ANDCRegInst<R64C>;
+ def r32: ANDCRegInst<R32C>;
+ def r16: ANDCRegInst<R16C>;
+ def r8: ANDCRegInst<R8C>;
+
+ // Sometimes, the xor pattern has a bitcast constant:
+ def v16i8_conv: ANDCVecInst<v16i8, vnot_conv>;
+}
+
+defm ANDC : AndComplement;
+
+class ANDBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b01101000, OOL, IOL, "andbi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass AndByteImm
+{
+ def v16i8: ANDBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (v16i8 VECREG:$rT),
+ (and (v16i8 VECREG:$rA),
+ (v16i8 v16i8U8Imm:$val)))]>;
+
+ def r8: ANDBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
+ [(set R8C:$rT, (and R8C:$rA, immU8:$val))]>;
+}
+
+defm ANDBI : AndByteImm;
+
+class ANDHIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b10101000, OOL, IOL, "andhi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass AndHalfwordImm
+{
+ def v8i16: ANDHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (and (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
+
+ def r16: ANDHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val),
+ [(set R16C:$rT, (and R16C:$rA, i16ImmUns10:$val))]>;
+
+ // Zero-extend i8 to i16:
+ def i8i16: ANDHIInst<(outs R16C:$rT), (ins R8C:$rA, u10imm:$val),
+ [(set R16C:$rT, (and (zext R8C:$rA), i16ImmUns10:$val))]>;
+}
+
+defm ANDHI : AndHalfwordImm;
+
+class ANDIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b00101000, OOL, IOL, "andi\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+multiclass AndWordImm
+{
+ def v4i32: ANDIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (and (v4i32 VECREG:$rA), v4i32SExt10Imm:$val))]>;
+
+ def r32: ANDIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (and R32C:$rA, i32ImmSExt10:$val))]>;
+
+ // Hacked form of ANDI to zero-extend i8 quantities to i32. See the zext 8->32
+ // pattern below.
+ def i8i32: ANDIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT,
+ (and (zext R8C:$rA), i32ImmSExt10:$val))]>;
+
+ // Hacked form of ANDI to zero-extend i16 quantities to i32. See the
+ // zext 16->32 pattern below.
+ //
+ // Note that this pattern is somewhat artificial, since it might match
+ // something the compiler generates but is unlikely to occur in practice.
+ def i16i32: ANDIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT,
+ (and (zext R16C:$rA), i32ImmSExt10:$val))]>;
+}
+
+defm ANDI : AndWordImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Bitwise OR group:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// Bitwise "or" (N.B.: These are also register-register copy instructions...)
+class ORInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10000010000, OOL, IOL, "or\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ORVecInst<ValueType vectype>:
+ ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+ (vectype VECREG:$rB)))]>;
+
+class ORRegInst<RegisterClass rclass>:
+ ORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (or rclass:$rA, rclass:$rB))]>;
+
+// ORCvtForm: OR conversion form
+//
+// This is used to "convert" the preferred slot to its vector equivalent, as
+// well as convert a vector back to its preferred slot.
+//
+// These are effectively no-ops, but need to exist for proper type conversion
+// and type coercion.
+
+class ORCvtForm<dag OOL, dag IOL, list<dag> pattern = [/* no pattern */]>
+ : SPUInstr<OOL, IOL, "or\t$rT, $rA, $rA", IntegerOp> {
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-10} = 0b10000010000;
+ let Inst{11-17} = RA;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+}
+
+class ORPromoteScalar<RegisterClass rclass>:
+ ORCvtForm<(outs VECREG:$rT), (ins rclass:$rA)>;
+
+class ORExtractElt<RegisterClass rclass>:
+ ORCvtForm<(outs rclass:$rT), (ins VECREG:$rA)>;
+
+/* class ORCvtRegGPRC<RegisterClass rclass>:
+ ORCvtForm<(outs GPRC:$rT), (ins rclass:$rA)>; */
+
+/* class ORCvtGPRCReg<RegisterClass rclass>:
+ ORCvtForm<(outs rclass:$rT), (ins GPRC:$rA)>; */
+
+class ORCvtFormR32Reg<RegisterClass rclass, list<dag> pattern = [ ]>:
+ ORCvtForm<(outs rclass:$rT), (ins R32C:$rA), pattern>;
+
+class ORCvtFormRegR32<RegisterClass rclass, list<dag> pattern = [ ]>:
+ ORCvtForm<(outs R32C:$rT), (ins rclass:$rA), pattern>;
+
+class ORCvtFormR64Reg<RegisterClass rclass, list<dag> pattern = [ ]>:
+ ORCvtForm<(outs rclass:$rT), (ins R64C:$rA), pattern>;
+
+class ORCvtFormRegR64<RegisterClass rclass, list<dag> pattern = [ ]>:
+ ORCvtForm<(outs R64C:$rT), (ins rclass:$rA), pattern>;
+
+class ORCvtGPRCVec:
+ ORCvtForm<(outs VECREG:$rT), (ins GPRC:$rA)>;
+
+class ORCvtVecGPRC:
+ ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>;
+
+multiclass BitwiseOr
+{
+ def v16i8: ORVecInst<v16i8>;
+ def v8i16: ORVecInst<v8i16>;
+ def v4i32: ORVecInst<v4i32>;
+ def v2i64: ORVecInst<v2i64>;
+
+ def v4f32: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4f32 VECREG:$rT),
+ (v4f32 (bitconvert (or (v4i32 VECREG:$rA),
+ (v4i32 VECREG:$rB)))))]>;
+
+ def v2f64: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v2f64 VECREG:$rT),
+ (v2f64 (bitconvert (or (v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB)))))]>;
+
+ def r128: ORRegInst<GPRC>;
+ def r64: ORRegInst<R64C>;
+ def r32: ORRegInst<R32C>;
+ def r16: ORRegInst<R16C>;
+ def r8: ORRegInst<R8C>;
+
+ // OR instructions used to copy f32 and f64 registers.
+ def f32: ORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [/* no pattern */]>;
+
+ def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+ [/* no pattern */]>;
+
+ // scalar->vector promotion, prefslot2vec:
+ def v16i8_i8: ORPromoteScalar<R8C>;
+ def v8i16_i16: ORPromoteScalar<R16C>;
+ def v4i32_i32: ORPromoteScalar<R32C>;
+ def v2i64_i64: ORPromoteScalar<R64C>;
+ def v4f32_f32: ORPromoteScalar<R32FP>;
+ def v2f64_f64: ORPromoteScalar<R64FP>;
+
+ // vector->scalar demotion, vec2prefslot:
+ def i8_v16i8: ORExtractElt<R8C>;
+ def i16_v8i16: ORExtractElt<R16C>;
+ def i32_v4i32: ORExtractElt<R32C>;
+ def i64_v2i64: ORExtractElt<R64C>;
+ def f32_v4f32: ORExtractElt<R32FP>;
+ def f64_v2f64: ORExtractElt<R64FP>;
+
+ // Conversion from vector to GPRC
+ def i128_vec: ORCvtVecGPRC;
+
+ // Conversion from GPRC to vector
+ def vec_i128: ORCvtGPRCVec;
+
+/*
+ // Conversion from register to GPRC
+ def i128_r64: ORCvtRegGPRC<R64C>;
+ def i128_f64: ORCvtRegGPRC<R64FP>;
+ def i128_r32: ORCvtRegGPRC<R32C>;
+ def i128_f32: ORCvtRegGPRC<R32FP>;
+ def i128_r16: ORCvtRegGPRC<R16C>;
+ def i128_r8: ORCvtRegGPRC<R8C>;
+
+ // Conversion from GPRC to register
+ def r64_i128: ORCvtGPRCReg<R64C>;
+ def f64_i128: ORCvtGPRCReg<R64FP>;
+ def r32_i128: ORCvtGPRCReg<R32C>;
+ def f32_i128: ORCvtGPRCReg<R32FP>;
+ def r16_i128: ORCvtGPRCReg<R16C>;
+ def r8_i128: ORCvtGPRCReg<R8C>;
+*/
+/*
+ // Conversion from register to R32C:
+ def r32_r16: ORCvtFormRegR32<R16C>;
+ def r32_r8: ORCvtFormRegR32<R8C>;
+
+ // Conversion from R32C to register
+ def r32_r16: ORCvtFormR32Reg<R16C>;
+ def r32_r8: ORCvtFormR32Reg<R8C>;
+*/
+
+ // Conversion from R64C to register:
+ def r32_r64: ORCvtFormR64Reg<R32C>;
+ // def r16_r64: ORCvtFormR64Reg<R16C>;
+ // def r8_r64: ORCvtFormR64Reg<R8C>;
+
+ // Conversion to R64C from register:
+ def r64_r32: ORCvtFormRegR64<R32C>;
+ // def r64_r16: ORCvtFormRegR64<R16C>;
+ // def r64_r8: ORCvtFormRegR64<R8C>;
+
+ // bitconvert patterns:
+ def r32_f32: ORCvtFormR32Reg<R32FP,
+ [(set R32FP:$rT, (bitconvert R32C:$rA))]>;
+ def f32_r32: ORCvtFormRegR32<R32FP,
+ [(set R32C:$rT, (bitconvert R32FP:$rA))]>;
+
+ def r64_f64: ORCvtFormR64Reg<R64FP,
+ [(set R64FP:$rT, (bitconvert R64C:$rA))]>;
+ def f64_r64: ORCvtFormRegR64<R64FP,
+ [(set R64C:$rT, (bitconvert R64FP:$rA))]>;
+}
+
+defm OR : BitwiseOr;
+
+// scalar->vector promotion patterns (preferred slot to vector):
+def : Pat<(v16i8 (SPUprefslot2vec R8C:$rA)),
+ (ORv16i8_i8 R8C:$rA)>;
+
+def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)),
+ (ORv8i16_i16 R16C:$rA)>;
+
+def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)),
+ (ORv4i32_i32 R32C:$rA)>;
+
+def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)),
+ (ORv2i64_i64 R64C:$rA)>;
+
+def : Pat<(v4f32 (SPUprefslot2vec R32FP:$rA)),
+ (ORv4f32_f32 R32FP:$rA)>;
+
+def : Pat<(v2f64 (SPUprefslot2vec R64FP:$rA)),
+ (ORv2f64_f64 R64FP:$rA)>;
+
+// ORi*_v*: Used to extract vector element 0 (the preferred slot), otherwise
+// known as converting the vector back to its preferred slot
+
+def : Pat<(SPUvec2prefslot (v16i8 VECREG:$rA)),
+ (ORi8_v16i8 VECREG:$rA)>;
+
+def : Pat<(SPUvec2prefslot (v8i16 VECREG:$rA)),
+ (ORi16_v8i16 VECREG:$rA)>;
+
+def : Pat<(SPUvec2prefslot (v4i32 VECREG:$rA)),
+ (ORi32_v4i32 VECREG:$rA)>;
+
+def : Pat<(SPUvec2prefslot (v2i64 VECREG:$rA)),
+ (ORi64_v2i64 VECREG:$rA)>;
+
+def : Pat<(SPUvec2prefslot (v4f32 VECREG:$rA)),
+ (ORf32_v4f32 VECREG:$rA)>;
+
+def : Pat<(SPUvec2prefslot (v2f64 VECREG:$rA)),
+ (ORf64_v2f64 VECREG:$rA)>;
+
+// Load Register: This is an assembler alias for a bitwise OR of a register
+// against itself. It's here because it brings some clarity to assembly
+// language output.
+
+let hasCtrlDep = 1 in {
+ class LRInst<dag OOL, dag IOL>
+ : SPUInstr<OOL, IOL, "lr\t$rT, $rA", IntegerOp> {
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = [/*no pattern*/];
+
+ let Inst{0-10} = 0b10000010000; /* It's an OR operation */
+ let Inst{11-17} = RA;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+ }
+
+ class LRVecInst<ValueType vectype>:
+ LRInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
+
+ class LRRegInst<RegisterClass rclass>:
+ LRInst<(outs rclass:$rT), (ins rclass:$rA)>;
+
+ multiclass LoadRegister {
+ def v2i64: LRVecInst<v2i64>;
+ def v2f64: LRVecInst<v2f64>;
+ def v4i32: LRVecInst<v4i32>;
+ def v4f32: LRVecInst<v4f32>;
+ def v8i16: LRVecInst<v8i16>;
+ def v16i8: LRVecInst<v16i8>;
+
+ def r128: LRRegInst<GPRC>;
+ def r64: LRRegInst<R64C>;
+ def f64: LRRegInst<R64FP>;
+ def r32: LRRegInst<R32C>;
+ def f32: LRRegInst<R32FP>;
+ def r16: LRRegInst<R16C>;
+ def r8: LRRegInst<R8C>;
+ }
+
+ defm LR: LoadRegister;
+}
+
+// ORC: Bitwise "or" with complement (c = a | ~b)
+
+class ORCInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10010010000, OOL, IOL, "orc\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ORCVecInst<ValueType vectype>:
+ ORCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+ (vnot (vectype VECREG:$rB))))]>;
+
+class ORCRegInst<RegisterClass rclass>:
+ ORCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (or rclass:$rA, (not rclass:$rB)))]>;
+
+multiclass BitwiseOrComplement
+{
+ def v16i8: ORCVecInst<v16i8>;
+ def v8i16: ORCVecInst<v8i16>;
+ def v4i32: ORCVecInst<v4i32>;
+ def v2i64: ORCVecInst<v2i64>;
+
+ def r128: ORCRegInst<GPRC>;
+ def r64: ORCRegInst<R64C>;
+ def r32: ORCRegInst<R32C>;
+ def r16: ORCRegInst<R16C>;
+ def r8: ORCRegInst<R8C>;
+}
+
+defm ORC : BitwiseOrComplement;
+
+// OR byte immediate
+class ORBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b01100000, OOL, IOL, "orbi\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+class ORBIVecInst<ValueType vectype, PatLeaf immpred>:
+ ORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (v16i8 VECREG:$rT), (or (vectype VECREG:$rA),
+ (vectype immpred:$val)))]>;
+
+multiclass BitwiseOrByteImm
+{
+ def v16i8: ORBIVecInst<v16i8, v16i8U8Imm>;
+
+ def r8: ORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
+ [(set R8C:$rT, (or R8C:$rA, immU8:$val))]>;
+}
+
+defm ORBI : BitwiseOrByteImm;
+
+// OR halfword immediate
+class ORHIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b10100000, OOL, IOL, "orhi\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+class ORHIVecInst<ValueType vectype, PatLeaf immpred>:
+ ORHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+ immpred:$val))]>;
+
+multiclass BitwiseOrHalfwordImm
+{
+ def v8i16: ORHIVecInst<v8i16, v8i16Uns10Imm>;
+
+ def r16: ORHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val),
+ [(set R16C:$rT, (or R16C:$rA, i16ImmUns10:$val))]>;
+
+ // Specialized ORHI form used to promote 8-bit registers to 16-bit
+ def i8i16: ORHIInst<(outs R16C:$rT), (ins R8C:$rA, s10imm:$val),
+ [(set R16C:$rT, (or (anyext R8C:$rA),
+ i16ImmSExt10:$val))]>;
+}
+
+defm ORHI : BitwiseOrHalfwordImm;
+
+class ORIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b00100000, OOL, IOL, "ori\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+class ORIVecInst<ValueType vectype, PatLeaf immpred>:
+ ORIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+ immpred:$val))]>;
+
+// Bitwise "or" with immediate
+multiclass BitwiseOrImm
+{
+ def v4i32: ORIVecInst<v4i32, v4i32Uns10Imm>;
+
+ def r32: ORIInst<(outs R32C:$rT), (ins R32C:$rA, u10imm_i32:$val),
+ [(set R32C:$rT, (or R32C:$rA, i32ImmUns10:$val))]>;
+
+ // i16i32: hacked version of the ori instruction to extend 16-bit quantities
+ // to 32-bit quantities. used exclusively to match "anyext" conversions (vide
+ // infra "anyext 16->32" pattern.)
+ def i16i32: ORIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (or (anyext R16C:$rA),
+ i32ImmSExt10:$val))]>;
+
+ // i8i32: Hacked version of the ORI instruction to extend 16-bit quantities
+ // to 32-bit quantities. Used exclusively to match "anyext" conversions (vide
+ // infra "anyext 16->32" pattern.)
+ def i8i32: ORIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (or (anyext R8C:$rA),
+ i32ImmSExt10:$val))]>;
+}
+
+defm ORI : BitwiseOrImm;
+
+// ORX: "or" across the vector: or's $rA's word slots leaving the result in
+// $rT[0], slots 1-3 are zeroed.
+//
+// FIXME: Needs to match an intrinsic pattern.
+def ORXv4i32:
+ RRForm<0b10010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "orx\t$rT, $rA, $rB", IntegerOp,
+ []>;
+
+// XOR:
+
+class XORInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b10010010000, OOL, IOL, "xor\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class XORVecInst<ValueType vectype>:
+ XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (xor (vectype VECREG:$rA),
+ (vectype VECREG:$rB)))]>;
+
+class XORRegInst<RegisterClass rclass>:
+ XORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (xor rclass:$rA, rclass:$rB))]>;
+
+multiclass BitwiseExclusiveOr
+{
+ def v16i8: XORVecInst<v16i8>;
+ def v8i16: XORVecInst<v8i16>;
+ def v4i32: XORVecInst<v4i32>;
+ def v2i64: XORVecInst<v2i64>;
+
+ def r128: XORRegInst<GPRC>;
+ def r64: XORRegInst<R64C>;
+ def r32: XORRegInst<R32C>;
+ def r16: XORRegInst<R16C>;
+ def r8: XORRegInst<R8C>;
+
+ // XOR instructions used to negate f32 and f64 quantities.
+
+ def fneg32: XORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+ def fneg64: XORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
+ [/* no pattern */]>;
+
+ def fnegvec: XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern, see fneg{32,64} */]>;
+}
+
+defm XOR : BitwiseExclusiveOr;
+
+//==----------------------------------------------------------
+
+class XORBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b01100000, OOL, IOL, "xorbi\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+multiclass XorByteImm
+{
+ def v16i8:
+ XORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (v16i8 VECREG:$rT), (xor (v16i8 VECREG:$rA), v16i8U8Imm:$val))]>;
+
+ def r8:
+ XORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
+ [(set R8C:$rT, (xor R8C:$rA, immU8:$val))]>;
+}
+
+defm XORBI : XorByteImm;
+
+def XORHIv8i16:
+ RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ "xorhi\t$rT, $rA, $val", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (xor (v8i16 VECREG:$rA),
+ v8i16SExt10Imm:$val))]>;
+
+def XORHIr16:
+ RI10Form<0b10100000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ "xorhi\t$rT, $rA, $val", IntegerOp,
+ [(set R16C:$rT, (xor R16C:$rA, i16ImmSExt10:$val))]>;
+
+def XORIv4i32:
+ RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm_i32:$val),
+ "xori\t$rT, $rA, $val", IntegerOp,
+ [(set (v4i32 VECREG:$rT), (xor (v4i32 VECREG:$rA),
+ v4i32SExt10Imm:$val))]>;
+
+def XORIr32:
+ RI10Form<0b00100000, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ "xori\t$rT, $rA, $val", IntegerOp,
+ [(set R32C:$rT, (xor R32C:$rA, i32ImmSExt10:$val))]>;
+
+// NAND:
+
+class NANDInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10010011000, OOL, IOL, "nand\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class NANDVecInst<ValueType vectype>:
+ NANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (vnot (and (vectype VECREG:$rA),
+ (vectype VECREG:$rB))))]>;
+class NANDRegInst<RegisterClass rclass>:
+ NANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (not (and rclass:$rA, rclass:$rB)))]>;
+
+multiclass BitwiseNand
+{
+ def v16i8: NANDVecInst<v16i8>;
+ def v8i16: NANDVecInst<v8i16>;
+ def v4i32: NANDVecInst<v4i32>;
+ def v2i64: NANDVecInst<v2i64>;
+
+ def r128: NANDRegInst<GPRC>;
+ def r64: NANDRegInst<R64C>;
+ def r32: NANDRegInst<R32C>;
+ def r16: NANDRegInst<R16C>;
+ def r8: NANDRegInst<R8C>;
+}
+
+defm NAND : BitwiseNand;
+
+// NOR:
+
+class NORInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10010010000, OOL, IOL, "nor\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class NORVecInst<ValueType vectype>:
+ NORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (vnot (or (vectype VECREG:$rA),
+ (vectype VECREG:$rB))))]>;
+class NORRegInst<RegisterClass rclass>:
+ NORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (not (or rclass:$rA, rclass:$rB)))]>;
+
+multiclass BitwiseNor
+{
+ def v16i8: NORVecInst<v16i8>;
+ def v8i16: NORVecInst<v8i16>;
+ def v4i32: NORVecInst<v4i32>;
+ def v2i64: NORVecInst<v2i64>;
+
+ def r128: NORRegInst<GPRC>;
+ def r64: NORRegInst<R64C>;
+ def r32: NORRegInst<R32C>;
+ def r16: NORRegInst<R16C>;
+ def r8: NORRegInst<R8C>;
+}
+
+defm NOR : BitwiseNor;
+
+// Select bits:
+class SELBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRRForm<0b1000, OOL, IOL, "selb\t$rT, $rA, $rB, $rC",
+ IntegerOp, pattern>;
+
+class SELBVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
+ SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (vectype VECREG:$rT),
+ (or (and (vectype VECREG:$rC), (vectype VECREG:$rB)),
+ (and (vnot_frag (vectype VECREG:$rC)),
+ (vectype VECREG:$rA))))]>;
+
+class SELBVecVCondInst<ValueType vectype>:
+ SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (vectype VECREG:$rT),
+ (select (vectype VECREG:$rC),
+ (vectype VECREG:$rB),
+ (vectype VECREG:$rA)))]>;
+
+class SELBVecCondInst<ValueType vectype>:
+ SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC),
+ [(set (vectype VECREG:$rT),
+ (select R32C:$rC,
+ (vectype VECREG:$rB),
+ (vectype VECREG:$rA)))]>;
+
+class SELBRegInst<RegisterClass rclass>:
+ SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rC),
+ [(set rclass:$rT,
+ (or (and rclass:$rB, rclass:$rC),
+ (and rclass:$rA, (not rclass:$rC))))]>;
+
+class SELBRegCondInst<RegisterClass rcond, RegisterClass rclass>:
+ SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rcond:$rC),
+ [(set rclass:$rT,
+ (select rcond:$rC, rclass:$rB, rclass:$rA))]>;
+
+multiclass SelectBits
+{
+ def v16i8: SELBVecInst<v16i8>;
+ def v8i16: SELBVecInst<v8i16>;
+ def v4i32: SELBVecInst<v4i32>;
+ def v2i64: SELBVecInst<v2i64, vnot_conv>;
+
+ def r128: SELBRegInst<GPRC>;
+ def r64: SELBRegInst<R64C>;
+ def r32: SELBRegInst<R32C>;
+ def r16: SELBRegInst<R16C>;
+ def r8: SELBRegInst<R8C>;
+
+ def v16i8_cond: SELBVecCondInst<v16i8>;
+ def v8i16_cond: SELBVecCondInst<v8i16>;
+ def v4i32_cond: SELBVecCondInst<v4i32>;
+ def v2i64_cond: SELBVecCondInst<v2i64>;
+
+ def v16i8_vcond: SELBVecCondInst<v16i8>;
+ def v8i16_vcond: SELBVecCondInst<v8i16>;
+ def v4i32_vcond: SELBVecCondInst<v4i32>;
+ def v2i64_vcond: SELBVecCondInst<v2i64>;
+
+ def v4f32_cond:
+ SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (v4f32 VECREG:$rT),
+ (select (v4i32 VECREG:$rC),
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rA)))]>;
+
+ // SELBr64_cond is defined in SPU64InstrInfo.td
+ def r32_cond: SELBRegCondInst<R32C, R32C>;
+ def f32_cond: SELBRegCondInst<R32C, R32FP>;
+ def r16_cond: SELBRegCondInst<R16C, R16C>;
+ def r8_cond: SELBRegCondInst<R8C, R8C>;
+}
+
+defm SELB : SelectBits;
+
+class SPUselbPatVec<ValueType vectype, SPUInstr inst>:
+ Pat<(SPUselb (vectype VECREG:$rA), (vectype VECREG:$rB), (vectype VECREG:$rC)),
+ (inst VECREG:$rA, VECREG:$rB, VECREG:$rC)>;
+
+def : SPUselbPatVec<v16i8, SELBv16i8>;
+def : SPUselbPatVec<v8i16, SELBv8i16>;
+def : SPUselbPatVec<v4i32, SELBv4i32>;
+def : SPUselbPatVec<v2i64, SELBv2i64>;
+
+class SPUselbPatReg<RegisterClass rclass, SPUInstr inst>:
+ Pat<(SPUselb rclass:$rA, rclass:$rB, rclass:$rC),
+ (inst rclass:$rA, rclass:$rB, rclass:$rC)>;
+
+def : SPUselbPatReg<R8C, SELBr8>;
+def : SPUselbPatReg<R16C, SELBr16>;
+def : SPUselbPatReg<R32C, SELBr32>;
+def : SPUselbPatReg<R64C, SELBr64>;
+
+// EQV: Equivalence (1 for each same bit, otherwise 0)
+//
+// Note: There are a lot of ways to match this bit operator and these patterns
+// attempt to be as exhaustive as possible.
+
+class EQVInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10010010000, OOL, IOL, "eqv\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class EQVVecInst<ValueType vectype>:
+ EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)),
+ (and (vnot (vectype VECREG:$rA)),
+ (vnot (vectype VECREG:$rB)))))]>;
+
+class EQVRegInst<RegisterClass rclass>:
+ EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (or (and rclass:$rA, rclass:$rB),
+ (and (not rclass:$rA), (not rclass:$rB))))]>;
+
+class EQVVecPattern1<ValueType vectype>:
+ EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (xor (vectype VECREG:$rA), (vnot (vectype VECREG:$rB))))]>;
+
+class EQVRegPattern1<RegisterClass rclass>:
+ EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (xor rclass:$rA, (not rclass:$rB)))]>;
+
+class EQVVecPattern2<ValueType vectype>:
+ EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)),
+ (vnot (or (vectype VECREG:$rA), (vectype VECREG:$rB)))))]>;
+
+class EQVRegPattern2<RegisterClass rclass>:
+ EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT,
+ (or (and rclass:$rA, rclass:$rB),
+ (not (or rclass:$rA, rclass:$rB))))]>;
+
+class EQVVecPattern3<ValueType vectype>:
+ EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (not (xor (vectype VECREG:$rA), (vectype VECREG:$rB))))]>;
+
+class EQVRegPattern3<RegisterClass rclass>:
+ EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (not (xor rclass:$rA, rclass:$rB)))]>;
+
+multiclass BitEquivalence
+{
+ def v16i8: EQVVecInst<v16i8>;
+ def v8i16: EQVVecInst<v8i16>;
+ def v4i32: EQVVecInst<v4i32>;
+ def v2i64: EQVVecInst<v2i64>;
+
+ def v16i8_1: EQVVecPattern1<v16i8>;
+ def v8i16_1: EQVVecPattern1<v8i16>;
+ def v4i32_1: EQVVecPattern1<v4i32>;
+ def v2i64_1: EQVVecPattern1<v2i64>;
+
+ def v16i8_2: EQVVecPattern2<v16i8>;
+ def v8i16_2: EQVVecPattern2<v8i16>;
+ def v4i32_2: EQVVecPattern2<v4i32>;
+ def v2i64_2: EQVVecPattern2<v2i64>;
+
+ def v16i8_3: EQVVecPattern3<v16i8>;
+ def v8i16_3: EQVVecPattern3<v8i16>;
+ def v4i32_3: EQVVecPattern3<v4i32>;
+ def v2i64_3: EQVVecPattern3<v2i64>;
+
+ def r128: EQVRegInst<GPRC>;
+ def r64: EQVRegInst<R64C>;
+ def r32: EQVRegInst<R32C>;
+ def r16: EQVRegInst<R16C>;
+ def r8: EQVRegInst<R8C>;
+
+ def r128_1: EQVRegPattern1<GPRC>;
+ def r64_1: EQVRegPattern1<R64C>;
+ def r32_1: EQVRegPattern1<R32C>;
+ def r16_1: EQVRegPattern1<R16C>;
+ def r8_1: EQVRegPattern1<R8C>;
+
+ def r128_2: EQVRegPattern2<GPRC>;
+ def r64_2: EQVRegPattern2<R64C>;
+ def r32_2: EQVRegPattern2<R32C>;
+ def r16_2: EQVRegPattern2<R16C>;
+ def r8_2: EQVRegPattern2<R8C>;
+
+ def r128_3: EQVRegPattern3<GPRC>;
+ def r64_3: EQVRegPattern3<R64C>;
+ def r32_3: EQVRegPattern3<R32C>;
+ def r16_3: EQVRegPattern3<R16C>;
+ def r8_3: EQVRegPattern3<R8C>;
+}
+
+defm EQV: BitEquivalence;
+
+//===----------------------------------------------------------------------===//
+// Vector shuffle...
+//===----------------------------------------------------------------------===//
+// SPUshuffle is generated in LowerVECTOR_SHUFFLE and gets replaced with SHUFB.
+// See the SPUshuffle SDNode operand above, which sets up the DAG pattern
+// matcher to emit something when the LowerVECTOR_SHUFFLE generates a node with
+// the SPUISD::SHUFB opcode.
+//===----------------------------------------------------------------------===//
+
+class SHUFBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRRForm<0b1000, OOL, IOL, "shufb\t$rT, $rA, $rB, $rC",
+ IntegerOp, pattern>;
+
+class SHUFBVecInst<ValueType resultvec, ValueType maskvec>:
+ SHUFBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (resultvec VECREG:$rT),
+ (SPUshuffle (resultvec VECREG:$rA),
+ (resultvec VECREG:$rB),
+ (maskvec VECREG:$rC)))]>;
+
+class SHUFBGPRCInst:
+ SHUFBInst<(outs VECREG:$rT), (ins GPRC:$rA, GPRC:$rB, VECREG:$rC),
+ [/* no pattern */]>;
+
+multiclass ShuffleBytes
+{
+ def v16i8 : SHUFBVecInst<v16i8, v16i8>;
+ def v16i8_m32 : SHUFBVecInst<v16i8, v4i32>;
+ def v8i16 : SHUFBVecInst<v8i16, v16i8>;
+ def v8i16_m32 : SHUFBVecInst<v8i16, v4i32>;
+ def v4i32 : SHUFBVecInst<v4i32, v16i8>;
+ def v4i32_m32 : SHUFBVecInst<v4i32, v4i32>;
+ def v2i64 : SHUFBVecInst<v2i64, v16i8>;
+ def v2i64_m32 : SHUFBVecInst<v2i64, v4i32>;
+
+ def v4f32 : SHUFBVecInst<v4f32, v16i8>;
+ def v4f32_m32 : SHUFBVecInst<v4f32, v4i32>;
+
+ def v2f64 : SHUFBVecInst<v2f64, v16i8>;
+ def v2f64_m32 : SHUFBVecInst<v2f64, v4i32>;
+
+ def gprc : SHUFBGPRCInst;
+}
+
+defm SHUFB : ShuffleBytes;
+
+//===----------------------------------------------------------------------===//
+// Shift and rotate group:
+//===----------------------------------------------------------------------===//
+
+class SHLHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b11111010000, OOL, IOL, "shlh\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+class SHLHVecInst<ValueType vectype>:
+ SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_shl (vectype VECREG:$rA), R16C:$rB))]>;
+
+multiclass ShiftLeftHalfword
+{
+ def v8i16: SHLHVecInst<v8i16>;
+ def r16: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R16C:$rT, (shl R16C:$rA, R16C:$rB))]>;
+ def r16_r32: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+ [(set R16C:$rT, (shl R16C:$rA, R32C:$rB))]>;
+}
+
+defm SHLH : ShiftLeftHalfword;
+
+//===----------------------------------------------------------------------===//
+
+class SHLHIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11111010000, OOL, IOL, "shlhi\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+class SHLHIVecInst<ValueType vectype>:
+ SHLHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_shl (vectype VECREG:$rA), (i16 uimm7:$val)))]>;
+
+multiclass ShiftLeftHalfwordImm
+{
+ def v8i16: SHLHIVecInst<v8i16>;
+ def r16: SHLHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val),
+ [(set R16C:$rT, (shl R16C:$rA, (i16 uimm7:$val)))]>;
+}
+
+defm SHLHI : ShiftLeftHalfwordImm;
+
+def : Pat<(SPUvec_shl (v8i16 VECREG:$rA), (i32 uimm7:$val)),
+ (SHLHIv8i16 VECREG:$rA, uimm7:$val)>;
+
+def : Pat<(shl R16C:$rA, (i32 uimm7:$val)),
+ (SHLHIr16 R16C:$rA, uimm7:$val)>;
+
+//===----------------------------------------------------------------------===//
+
+class SHLInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b11111010000, OOL, IOL, "shl\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+multiclass ShiftLeftWord
+{
+ def v4i32:
+ SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB),
+ [(set (v4i32 VECREG:$rT),
+ (SPUvec_shl (v4i32 VECREG:$rA), R16C:$rB))]>;
+ def r32:
+ SHLInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [(set R32C:$rT, (shl R32C:$rA, R32C:$rB))]>;
+}
+
+defm SHL: ShiftLeftWord;
+
+//===----------------------------------------------------------------------===//
+
+class SHLIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11111010000, OOL, IOL, "shli\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+multiclass ShiftLeftWordImm
+{
+ def v4i32:
+ SHLIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
+ [(set (v4i32 VECREG:$rT),
+ (SPUvec_shl (v4i32 VECREG:$rA), (i32 uimm7:$val)))]>;
+
+ def r32:
+ SHLIInst<(outs R32C:$rT), (ins R32C:$rA, u7imm_i32:$val),
+ [(set R32C:$rT, (shl R32C:$rA, (i32 uimm7:$val)))]>;
+}
+
+defm SHLI : ShiftLeftWordImm;
+
+//===----------------------------------------------------------------------===//
+// SHLQBI vec form: Note that this will shift the entire vector (the 128-bit
+// register) to the left. Vector form is here to ensure type correctness.
+//
+// The shift count is in the lowest 3 bits (29-31) of $rB, so only a bit shift
+// of 7 bits is actually possible.
+//
+// Note also that SHLQBI/SHLQBII are used in conjunction with SHLQBY/SHLQBYI
+// to shift i64 and i128. SHLQBI is the residual left over after shifting by
+// bytes with SHLQBY.
+
+class SHLQBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b11011011100, OOL, IOL, "shlqbi\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+class SHLQBIVecInst<ValueType vectype>:
+ SHLQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUshlquad_l_bits (vectype VECREG:$rA), R32C:$rB))]>;
+
+class SHLQBIRegInst<RegisterClass rclass>:
+ SHLQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+multiclass ShiftLeftQuadByBits
+{
+ def v16i8: SHLQBIVecInst<v16i8>;
+ def v8i16: SHLQBIVecInst<v8i16>;
+ def v4i32: SHLQBIVecInst<v4i32>;
+ def v4f32: SHLQBIVecInst<v4f32>;
+ def v2i64: SHLQBIVecInst<v2i64>;
+ def v2f64: SHLQBIVecInst<v2f64>;
+
+ def r128: SHLQBIRegInst<GPRC>;
+}
+
+defm SHLQBI : ShiftLeftQuadByBits;
+
+// See note above on SHLQBI. In this case, the predicate actually does then
+// enforcement, whereas with SHLQBI, we have to "take it on faith."
+class SHLQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11011111100, OOL, IOL, "shlqbii\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+class SHLQBIIVecInst<ValueType vectype>:
+ SHLQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUshlquad_l_bits (vectype VECREG:$rA), (i32 bitshift:$val)))]>;
+
+multiclass ShiftLeftQuadByBitsImm
+{
+ def v16i8 : SHLQBIIVecInst<v16i8>;
+ def v8i16 : SHLQBIIVecInst<v8i16>;
+ def v4i32 : SHLQBIIVecInst<v4i32>;
+ def v4f32 : SHLQBIIVecInst<v4f32>;
+ def v2i64 : SHLQBIIVecInst<v2i64>;
+ def v2f64 : SHLQBIIVecInst<v2f64>;
+}
+
+defm SHLQBII : ShiftLeftQuadByBitsImm;
+
+// SHLQBY, SHLQBYI vector forms: Shift the entire vector to the left by bytes,
+// not by bits. See notes above on SHLQBI.
+
+class SHLQBYInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11111011100, OOL, IOL, "shlqby\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+class SHLQBYVecInst<ValueType vectype>:
+ SHLQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUshlquad_l_bytes (vectype VECREG:$rA), R32C:$rB))]>;
+
+multiclass ShiftLeftQuadBytes
+{
+ def v16i8: SHLQBYVecInst<v16i8>;
+ def v8i16: SHLQBYVecInst<v8i16>;
+ def v4i32: SHLQBYVecInst<v4i32>;
+ def v4f32: SHLQBYVecInst<v4f32>;
+ def v2i64: SHLQBYVecInst<v2i64>;
+ def v2f64: SHLQBYVecInst<v2f64>;
+ def r128: SHLQBYInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB),
+ [(set GPRC:$rT, (SPUshlquad_l_bytes GPRC:$rA, R32C:$rB))]>;
+}
+
+defm SHLQBY: ShiftLeftQuadBytes;
+
+class SHLQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11111111100, OOL, IOL, "shlqbyi\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+class SHLQBYIVecInst<ValueType vectype>:
+ SHLQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUshlquad_l_bytes (vectype VECREG:$rA), (i32 uimm7:$val)))]>;
+
+multiclass ShiftLeftQuadBytesImm
+{
+ def v16i8: SHLQBYIVecInst<v16i8>;
+ def v8i16: SHLQBYIVecInst<v8i16>;
+ def v4i32: SHLQBYIVecInst<v4i32>;
+ def v4f32: SHLQBYIVecInst<v4f32>;
+ def v2i64: SHLQBYIVecInst<v2i64>;
+ def v2f64: SHLQBYIVecInst<v2f64>;
+ def r128: SHLQBYIInst<(outs GPRC:$rT), (ins GPRC:$rA, u7imm_i32:$val),
+ [(set GPRC:$rT,
+ (SPUshlquad_l_bytes GPRC:$rA, (i32 uimm7:$val)))]>;
+}
+
+defm SHLQBYI : ShiftLeftQuadBytesImm;
+
+class SHLQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00111001111, OOL, IOL, "shlqbybi\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+class SHLQBYBIVecInst<ValueType vectype>:
+ SHLQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+class SHLQBYBIRegInst<RegisterClass rclass>:
+ SHLQBYBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+multiclass ShiftLeftQuadBytesBitCount
+{
+ def v16i8: SHLQBYBIVecInst<v16i8>;
+ def v8i16: SHLQBYBIVecInst<v8i16>;
+ def v4i32: SHLQBYBIVecInst<v4i32>;
+ def v4f32: SHLQBYBIVecInst<v4f32>;
+ def v2i64: SHLQBYBIVecInst<v2i64>;
+ def v2f64: SHLQBYBIVecInst<v2f64>;
+
+ def r128: SHLQBYBIRegInst<GPRC>;
+}
+
+defm SHLQBYBI : ShiftLeftQuadBytesBitCount;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate halfword:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+class ROTHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00111010000, OOL, IOL, "roth\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+class ROTHVecInst<ValueType vectype>:
+ ROTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_rotl VECREG:$rA, VECREG:$rB))]>;
+
+class ROTHRegInst<RegisterClass rclass>:
+ ROTHInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (rotl rclass:$rA, rclass:$rB))]>;
+
+multiclass RotateLeftHalfword
+{
+ def v8i16: ROTHVecInst<v8i16>;
+ def r16: ROTHRegInst<R16C>;
+}
+
+defm ROTH: RotateLeftHalfword;
+
+def ROTHr16_r32: ROTHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+ [(set R16C:$rT, (rotl R16C:$rA, R32C:$rB))]>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate halfword, immediate:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+class ROTHIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00111110000, OOL, IOL, "rothi\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+class ROTHIVecInst<ValueType vectype>:
+ ROTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_rotl VECREG:$rA, (i16 uimm7:$val)))]>;
+
+multiclass RotateLeftHalfwordImm
+{
+ def v8i16: ROTHIVecInst<v8i16>;
+ def r16: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val),
+ [(set R16C:$rT, (rotl R16C:$rA, (i16 uimm7:$val)))]>;
+ def r16_r32: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm_i32:$val),
+ [(set R16C:$rT, (rotl R16C:$rA, (i32 uimm7:$val)))]>;
+}
+
+defm ROTHI: RotateLeftHalfwordImm;
+
+def : Pat<(SPUvec_rotl VECREG:$rA, (i32 uimm7:$val)),
+ (ROTHIv8i16 VECREG:$rA, imm:$val)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate word:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00011010000, OOL, IOL, "rot\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+class ROTVecInst<ValueType vectype>:
+ ROTInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_rotl (vectype VECREG:$rA), R32C:$rB))]>;
+
+class ROTRegInst<RegisterClass rclass>:
+ ROTInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [(set rclass:$rT,
+ (rotl rclass:$rA, R32C:$rB))]>;
+
+multiclass RotateLeftWord
+{
+ def v4i32: ROTVecInst<v4i32>;
+ def r32: ROTRegInst<R32C>;
+}
+
+defm ROT: RotateLeftWord;
+
+// The rotate amount is in the same bits whether we've got an 8-bit, 16-bit or
+// 32-bit register
+def ROTr32_r16_anyext:
+ ROTInst<(outs R32C:$rT), (ins R32C:$rA, R16C:$rB),
+ [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R16C:$rB))))]>;
+
+def : Pat<(rotl R32C:$rA, (i32 (zext R16C:$rB))),
+ (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>;
+
+def : Pat<(rotl R32C:$rA, (i32 (sext R16C:$rB))),
+ (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>;
+
+def ROTr32_r8_anyext:
+ ROTInst<(outs R32C:$rT), (ins R32C:$rA, R8C:$rB),
+ [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R8C:$rB))))]>;
+
+def : Pat<(rotl R32C:$rA, (i32 (zext R8C:$rB))),
+ (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>;
+
+def : Pat<(rotl R32C:$rA, (i32 (sext R8C:$rB))),
+ (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate word, immediate
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00011110000, OOL, IOL, "roti\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+class ROTIVecInst<ValueType vectype, Operand optype, ValueType inttype, PatLeaf pred>:
+ ROTIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_rotl (vectype VECREG:$rA), (inttype pred:$val)))]>;
+
+class ROTIRegInst<RegisterClass rclass, Operand optype, ValueType inttype, PatLeaf pred>:
+ ROTIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
+ [(set rclass:$rT, (rotl rclass:$rA, (inttype pred:$val)))]>;
+
+multiclass RotateLeftWordImm
+{
+ def v4i32: ROTIVecInst<v4i32, u7imm_i32, i32, uimm7>;
+ def v4i32_i16: ROTIVecInst<v4i32, u7imm, i16, uimm7>;
+ def v4i32_i8: ROTIVecInst<v4i32, u7imm_i8, i8, uimm7>;
+
+ def r32: ROTIRegInst<R32C, u7imm_i32, i32, uimm7>;
+ def r32_i16: ROTIRegInst<R32C, u7imm, i16, uimm7>;
+ def r32_i8: ROTIRegInst<R32C, u7imm_i8, i8, uimm7>;
+}
+
+defm ROTI : RotateLeftWordImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad by byte (count)
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQBYInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00111011100, OOL, IOL, "rotqby\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+class ROTQBYVecInst<ValueType vectype>:
+ ROTQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUrotbytes_left (vectype VECREG:$rA), R32C:$rB))]>;
+
+multiclass RotateQuadLeftByBytes
+{
+ def v16i8: ROTQBYVecInst<v16i8>;
+ def v8i16: ROTQBYVecInst<v8i16>;
+ def v4i32: ROTQBYVecInst<v4i32>;
+ def v4f32: ROTQBYVecInst<v4f32>;
+ def v2i64: ROTQBYVecInst<v2i64>;
+ def v2f64: ROTQBYVecInst<v2f64>;
+}
+
+defm ROTQBY: RotateQuadLeftByBytes;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad by byte (count), immediate
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00111111100, OOL, IOL, "rotqbyi\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+class ROTQBYIVecInst<ValueType vectype>:
+ ROTQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUrotbytes_left (vectype VECREG:$rA), (i16 uimm7:$val)))]>;
+
+multiclass RotateQuadByBytesImm
+{
+ def v16i8: ROTQBYIVecInst<v16i8>;
+ def v8i16: ROTQBYIVecInst<v8i16>;
+ def v4i32: ROTQBYIVecInst<v4i32>;
+ def v4f32: ROTQBYIVecInst<v4f32>;
+ def v2i64: ROTQBYIVecInst<v2i64>;
+ def vfi64: ROTQBYIVecInst<v2f64>;
+}
+
+defm ROTQBYI: RotateQuadByBytesImm;
+
+// See ROTQBY note above.
+class ROTQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00110011100, OOL, IOL,
+ "rotqbybi\t$rT, $rA, $shift",
+ RotateShift, pattern>;
+
+class ROTQBYBIVecInst<ValueType vectype, RegisterClass rclass>:
+ ROTQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, rclass:$shift),
+ [(set (vectype VECREG:$rT),
+ (SPUrotbytes_left_bits (vectype VECREG:$rA), rclass:$shift))]>;
+
+multiclass RotateQuadByBytesByBitshift {
+ def v16i8_r32: ROTQBYBIVecInst<v16i8, R32C>;
+ def v8i16_r32: ROTQBYBIVecInst<v8i16, R32C>;
+ def v4i32_r32: ROTQBYBIVecInst<v4i32, R32C>;
+ def v2i64_r32: ROTQBYBIVecInst<v2i64, R32C>;
+}
+
+defm ROTQBYBI : RotateQuadByBytesByBitshift;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// See ROTQBY note above.
+//
+// Assume that the user of this instruction knows to shift the rotate count
+// into bit 29
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00011011100, OOL, IOL, "rotqbi\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+class ROTQBIVecInst<ValueType vectype>:
+ ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern yet */]>;
+
+class ROTQBIRegInst<RegisterClass rclass>:
+ ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern yet */]>;
+
+multiclass RotateQuadByBitCount
+{
+ def v16i8: ROTQBIVecInst<v16i8>;
+ def v8i16: ROTQBIVecInst<v8i16>;
+ def v4i32: ROTQBIVecInst<v4i32>;
+ def v2i64: ROTQBIVecInst<v2i64>;
+
+ def r128: ROTQBIRegInst<GPRC>;
+ def r64: ROTQBIRegInst<R64C>;
+}
+
+defm ROTQBI: RotateQuadByBitCount;
+
+class ROTQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00011111100, OOL, IOL, "rotqbii\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+class ROTQBIIVecInst<ValueType vectype, Operand optype, ValueType inttype,
+ PatLeaf pred>:
+ ROTQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val),
+ [/* no pattern yet */]>;
+
+class ROTQBIIRegInst<RegisterClass rclass, Operand optype, ValueType inttype,
+ PatLeaf pred>:
+ ROTQBIIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
+ [/* no pattern yet */]>;
+
+multiclass RotateQuadByBitCountImm
+{
+ def v16i8: ROTQBIIVecInst<v16i8, u7imm_i32, i32, uimm7>;
+ def v8i16: ROTQBIIVecInst<v8i16, u7imm_i32, i32, uimm7>;
+ def v4i32: ROTQBIIVecInst<v4i32, u7imm_i32, i32, uimm7>;
+ def v2i64: ROTQBIIVecInst<v2i64, u7imm_i32, i32, uimm7>;
+
+ def r128: ROTQBIIRegInst<GPRC, u7imm_i32, i32, uimm7>;
+ def r64: ROTQBIIRegInst<R64C, u7imm_i32, i32, uimm7>;
+}
+
+defm ROTQBII : RotateQuadByBitCountImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// ROTHM v8i16 form:
+// NOTE(1): No vector rotate is generated by the C/C++ frontend (today),
+// so this only matches a synthetically generated/lowered code
+// fragment.
+// NOTE(2): $rB must be negated before the right rotate!
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTHMInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10111010000, OOL, IOL, "rothm\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+def ROTHMv8i16:
+ ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R32C:$rB),
+ (ROTHMv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R16C:$rB),
+ (ROTHMv8i16 VECREG:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R8C:$rB),
+ (ROTHMv8i16 VECREG:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>;
+
+// ROTHM r16 form: Rotate 16-bit quantity to right, zero fill at the left
+// Note: This instruction doesn't match a pattern because rB must be negated
+// for the instruction to work. Thus, the pattern below the instruction!
+
+def ROTHMr16:
+ ROTHMInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+ [/* see patterns below - $rB must be negated! */]>;
+
+def : Pat<(srl R16C:$rA, R32C:$rB),
+ (ROTHMr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(srl R16C:$rA, R16C:$rB),
+ (ROTHMr16 R16C:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(srl R16C:$rA, R8C:$rB),
+ (ROTHMr16 R16C:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>;
+
+// ROTHMI v8i16 form: See the comment for ROTHM v8i16. The difference here is
+// that the immediate can be complemented, so that the user doesn't have to
+// worry about it.
+
+class ROTHMIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b10111110000, OOL, IOL, "rothmi\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+def ROTHMIv8i16:
+ ROTHMIInst<(outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
+ [/* no pattern */]>;
+
+def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i32 imm:$val)),
+ (ROTHMIv8i16 VECREG:$rA, imm:$val)>;
+
+def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i16 imm:$val)),
+ (ROTHMIv8i16 VECREG:$rA, imm:$val)>;
+
+def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i8 imm:$val)),
+ (ROTHMIv8i16 VECREG:$rA, imm:$val)>;
+
+def ROTHMIr16:
+ ROTHMIInst<(outs R16C:$rT), (ins R16C:$rA, rothNeg7imm:$val),
+ [/* no pattern */]>;
+
+def: Pat<(srl R16C:$rA, (i32 uimm7:$val)),
+ (ROTHMIr16 R16C:$rA, uimm7:$val)>;
+
+def: Pat<(srl R16C:$rA, (i16 uimm7:$val)),
+ (ROTHMIr16 R16C:$rA, uimm7:$val)>;
+
+def: Pat<(srl R16C:$rA, (i8 uimm7:$val)),
+ (ROTHMIr16 R16C:$rA, uimm7:$val)>;
+
+// ROTM v4i32 form: See the ROTHM v8i16 comments.
+class ROTMInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10011010000, OOL, IOL, "rotm\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+def ROTMv4i32:
+ ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_srl VECREG:$rA, R32C:$rB),
+ (ROTMv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(SPUvec_srl VECREG:$rA, R16C:$rB),
+ (ROTMv4i32 VECREG:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(SPUvec_srl VECREG:$rA, R8C:$rB),
+ (ROTMv4i32 VECREG:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+def ROTMr32:
+ ROTMInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(srl R32C:$rA, R32C:$rB),
+ (ROTMr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(srl R32C:$rA, R16C:$rB),
+ (ROTMr32 R32C:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(srl R32C:$rA, R8C:$rB),
+ (ROTMr32 R32C:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+// ROTMI v4i32 form: See the comment for ROTHM v8i16.
+def ROTMIv4i32:
+ RI7Form<0b10011110000, (outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
+ "rotmi\t$rT, $rA, $val", RotateShift,
+ [(set (v4i32 VECREG:$rT),
+ (SPUvec_srl VECREG:$rA, (i32 uimm7:$val)))]>;
+
+def : Pat<(SPUvec_srl VECREG:$rA, (i16 uimm7:$val)),
+ (ROTMIv4i32 VECREG:$rA, uimm7:$val)>;
+
+def : Pat<(SPUvec_srl VECREG:$rA, (i8 uimm7:$val)),
+ (ROTMIv4i32 VECREG:$rA, uimm7:$val)>;
+
+// ROTMI r32 form: know how to complement the immediate value.
+def ROTMIr32:
+ RI7Form<0b10011110000, (outs R32C:$rT), (ins R32C:$rA, rotNeg7imm:$val),
+ "rotmi\t$rT, $rA, $val", RotateShift,
+ [(set R32C:$rT, (srl R32C:$rA, (i32 uimm7:$val)))]>;
+
+def : Pat<(srl R32C:$rA, (i16 imm:$val)),
+ (ROTMIr32 R32C:$rA, uimm7:$val)>;
+
+def : Pat<(srl R32C:$rA, (i8 imm:$val)),
+ (ROTMIr32 R32C:$rA, uimm7:$val)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// ROTQMBY: This is a vector form merely so that when used in an
+// instruction pattern, type checking will succeed. This instruction assumes
+// that the user knew to negate $rB.
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBYInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10111011100, OOL, IOL, "rotqmby\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+class ROTQMBYVecInst<ValueType vectype>:
+ ROTQMBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern, $rB must be negated */]>;
+
+class ROTQMBYRegInst<RegisterClass rclass>:
+ ROTQMBYInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+multiclass RotateQuadBytes
+{
+ def v16i8: ROTQMBYVecInst<v16i8>;
+ def v8i16: ROTQMBYVecInst<v8i16>;
+ def v4i32: ROTQMBYVecInst<v4i32>;
+ def v2i64: ROTQMBYVecInst<v2i64>;
+
+ def r128: ROTQMBYRegInst<GPRC>;
+ def r64: ROTQMBYRegInst<R64C>;
+}
+
+defm ROTQMBY : RotateQuadBytes;
+
+class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+class ROTQMBYIVecInst<ValueType vectype>:
+ ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
+ [/* no pattern */]>;
+
+class ROTQMBYIRegInst<RegisterClass rclass, Operand optype, ValueType inttype,
+ PatLeaf pred>:
+ ROTQMBYIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
+ [/* no pattern */]>;
+
+// 128-bit zero extension form:
+class ROTQMBYIZExtInst<RegisterClass rclass, Operand optype, PatLeaf pred>:
+ ROTQMBYIInst<(outs GPRC:$rT), (ins rclass:$rA, optype:$val),
+ [/* no pattern */]>;
+
+multiclass RotateQuadBytesImm
+{
+ def v16i8: ROTQMBYIVecInst<v16i8>;
+ def v8i16: ROTQMBYIVecInst<v8i16>;
+ def v4i32: ROTQMBYIVecInst<v4i32>;
+ def v2i64: ROTQMBYIVecInst<v2i64>;
+
+ def r128: ROTQMBYIRegInst<GPRC, rotNeg7imm, i32, uimm7>;
+ def r64: ROTQMBYIRegInst<R64C, rotNeg7imm, i32, uimm7>;
+
+ def r128_zext_r8: ROTQMBYIZExtInst<R8C, rotNeg7imm, uimm7>;
+ def r128_zext_r16: ROTQMBYIZExtInst<R16C, rotNeg7imm, uimm7>;
+ def r128_zext_r32: ROTQMBYIZExtInst<R32C, rotNeg7imm, uimm7>;
+ def r128_zext_r64: ROTQMBYIZExtInst<R64C, rotNeg7imm, uimm7>;
+}
+
+defm ROTQMBYI : RotateQuadBytesImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate right and mask by bit count
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10110011100, OOL, IOL, "rotqmbybi\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+class ROTQMBYBIVecInst<ValueType vectype>:
+ ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern, */]>;
+
+multiclass RotateMaskQuadByBitCount
+{
+ def v16i8: ROTQMBYBIVecInst<v16i8>;
+ def v8i16: ROTQMBYBIVecInst<v8i16>;
+ def v4i32: ROTQMBYBIVecInst<v4i32>;
+ def v2i64: ROTQMBYBIVecInst<v2i64>;
+}
+
+defm ROTQMBYBI: RotateMaskQuadByBitCount;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad and mask by bits
+// Note that the rotate amount has to be negated
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10011011100, OOL, IOL, "rotqmbi\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+class ROTQMBIVecInst<ValueType vectype>:
+ ROTQMBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+class ROTQMBIRegInst<RegisterClass rclass>:
+ ROTQMBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+multiclass RotateMaskQuadByBits
+{
+ def v16i8: ROTQMBIVecInst<v16i8>;
+ def v8i16: ROTQMBIVecInst<v8i16>;
+ def v4i32: ROTQMBIVecInst<v4i32>;
+ def v2i64: ROTQMBIVecInst<v2i64>;
+
+ def r128: ROTQMBIRegInst<GPRC>;
+ def r64: ROTQMBIRegInst<R64C>;
+}
+
+defm ROTQMBI: RotateMaskQuadByBits;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad and mask by bits, immediate
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBIIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b10011111100, OOL, IOL, "rotqmbii\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+class ROTQMBIIVecInst<ValueType vectype>:
+ ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
+ [/* no pattern */]>;
+
+class ROTQMBIIRegInst<RegisterClass rclass>:
+ ROTQMBIIInst<(outs rclass:$rT), (ins rclass:$rA, rotNeg7imm:$val),
+ [/* no pattern */]>;
+
+multiclass RotateMaskQuadByBitsImm
+{
+ def v16i8: ROTQMBIIVecInst<v16i8>;
+ def v8i16: ROTQMBIIVecInst<v8i16>;
+ def v4i32: ROTQMBIIVecInst<v4i32>;
+ def v2i64: ROTQMBIIVecInst<v2i64>;
+
+ def r128: ROTQMBIIRegInst<GPRC>;
+ def r64: ROTQMBIIRegInst<R64C>;
+}
+
+defm ROTQMBII: RotateMaskQuadByBitsImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def ROTMAHv8i16:
+ RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ "rotmah\t$rT, $rA, $rB", RotateShift,
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_sra VECREG:$rA, R32C:$rB),
+ (ROTMAHv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(SPUvec_sra VECREG:$rA, R16C:$rB),
+ (ROTMAHv8i16 VECREG:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(SPUvec_sra VECREG:$rA, R8C:$rB),
+ (ROTMAHv8i16 VECREG:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+def ROTMAHr16:
+ RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+ "rotmah\t$rT, $rA, $rB", RotateShift,
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(sra R16C:$rA, R32C:$rB),
+ (ROTMAHr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(sra R16C:$rA, R16C:$rB),
+ (ROTMAHr16 R16C:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(sra R16C:$rA, R8C:$rB),
+ (ROTMAHr16 R16C:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+def ROTMAHIv8i16:
+ RRForm<0b01111110000, (outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
+ "rotmahi\t$rT, $rA, $val", RotateShift,
+ [(set (v8i16 VECREG:$rT),
+ (SPUvec_sra (v8i16 VECREG:$rA), (i32 uimm7:$val)))]>;
+
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i16 uimm7:$val)),
+ (ROTMAHIv8i16 (v8i16 VECREG:$rA), (i32 uimm7:$val))>;
+
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i8 uimm7:$val)),
+ (ROTMAHIv8i16 (v8i16 VECREG:$rA), (i32 uimm7:$val))>;
+
+def ROTMAHIr16:
+ RRForm<0b01111110000, (outs R16C:$rT), (ins R16C:$rA, rothNeg7imm_i16:$val),
+ "rotmahi\t$rT, $rA, $val", RotateShift,
+ [(set R16C:$rT, (sra R16C:$rA, (i16 uimm7:$val)))]>;
+
+def : Pat<(sra R16C:$rA, (i32 imm:$val)),
+ (ROTMAHIr16 R16C:$rA, uimm7:$val)>;
+
+def : Pat<(sra R16C:$rA, (i8 imm:$val)),
+ (ROTMAHIr16 R16C:$rA, uimm7:$val)>;
+
+def ROTMAv4i32:
+ RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ "rotma\t$rT, $rA, $rB", RotateShift,
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_sra VECREG:$rA, R32C:$rB),
+ (ROTMAv4i32 (v4i32 VECREG:$rA), (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(SPUvec_sra VECREG:$rA, R16C:$rB),
+ (ROTMAv4i32 (v4i32 VECREG:$rA),
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(SPUvec_sra VECREG:$rA, R8C:$rB),
+ (ROTMAv4i32 (v4i32 VECREG:$rA),
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+def ROTMAr32:
+ RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ "rotma\t$rT, $rA, $rB", RotateShift,
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(sra R32C:$rA, R32C:$rB),
+ (ROTMAr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(sra R32C:$rA, R16C:$rB),
+ (ROTMAr32 R32C:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(sra R32C:$rA, R8C:$rB),
+ (ROTMAr32 R32C:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+class ROTMAIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01011110000, OOL, IOL,
+ "rotmai\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+class ROTMAIVecInst<ValueType vectype, Operand intop, ValueType inttype>:
+ ROTMAIInst<(outs VECREG:$rT), (ins VECREG:$rA, intop:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_sra VECREG:$rA, (inttype uimm7:$val)))]>;
+
+class ROTMAIRegInst<RegisterClass rclass, Operand intop, ValueType inttype>:
+ ROTMAIInst<(outs rclass:$rT), (ins rclass:$rA, intop:$val),
+ [(set rclass:$rT, (sra rclass:$rA, (inttype uimm7:$val)))]>;
+
+multiclass RotateMaskAlgebraicImm {
+ def v2i64_i32 : ROTMAIVecInst<v2i64, rotNeg7imm, i32>;
+ def v4i32_i32 : ROTMAIVecInst<v4i32, rotNeg7imm, i32>;
+ def r64_i32 : ROTMAIRegInst<R64C, rotNeg7imm, i32>;
+ def r32_i32 : ROTMAIRegInst<R32C, rotNeg7imm, i32>;
+}
+
+defm ROTMAI : RotateMaskAlgebraicImm;
+
+//===----------------------------------------------------------------------===//
+// Branch and conditionals:
+//===----------------------------------------------------------------------===//
+
+let isTerminator = 1, isBarrier = 1 in {
+ // Halt If Equal (r32 preferred slot only, no vector form)
+ def HEQr32:
+ RRForm_3<0b00011011110, (outs), (ins R32C:$rA, R32C:$rB),
+ "heq\t$rA, $rB", BranchResolv,
+ [/* no pattern to match */]>;
+
+ def HEQIr32 :
+ RI10Form_2<0b11111110, (outs), (ins R32C:$rA, s10imm:$val),
+ "heqi\t$rA, $val", BranchResolv,
+ [/* no pattern to match */]>;
+
+ // HGT/HGTI: These instructions use signed arithmetic for the comparison,
+ // contrasting with HLGT/HLGTI, which use unsigned comparison:
+ def HGTr32:
+ RRForm_3<0b00011010010, (outs), (ins R32C:$rA, R32C:$rB),
+ "hgt\t$rA, $rB", BranchResolv,
+ [/* no pattern to match */]>;
+
+ def HGTIr32:
+ RI10Form_2<0b11110010, (outs), (ins R32C:$rA, s10imm:$val),
+ "hgti\t$rA, $val", BranchResolv,
+ [/* no pattern to match */]>;
+
+ def HLGTr32:
+ RRForm_3<0b00011011010, (outs), (ins R32C:$rA, R32C:$rB),
+ "hlgt\t$rA, $rB", BranchResolv,
+ [/* no pattern to match */]>;
+
+ def HLGTIr32:
+ RI10Form_2<0b11111010, (outs), (ins R32C:$rA, s10imm:$val),
+ "hlgti\t$rA, $val", BranchResolv,
+ [/* no pattern to match */]>;
+}
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Comparison operators for i8, i16 and i32:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class CEQBInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00001011110, OOL, IOL, "ceqb\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpEqualByte
+{
+ def v16i8 :
+ CEQBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v16i8 VECREG:$rT), (seteq (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r8 :
+ CEQBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
+ [(set R8C:$rT, (seteq R8C:$rA, R8C:$rB))]>;
+}
+
+class CEQBIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b01111110, OOL, IOL, "ceqbi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpEqualByteImm
+{
+ def v16i8 :
+ CEQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
+ [(set (v16i8 VECREG:$rT), (seteq (v16i8 VECREG:$rA),
+ v16i8SExt8Imm:$val))]>;
+ def r8:
+ CEQBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
+ [(set R8C:$rT, (seteq R8C:$rA, immSExt8:$val))]>;
+}
+
+class CEQHInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00010011110, OOL, IOL, "ceqh\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpEqualHalfword
+{
+ def v8i16 : CEQHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v8i16 VECREG:$rT), (seteq (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r16 : CEQHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R16C:$rT, (seteq R16C:$rA, R16C:$rB))]>;
+}
+
+class CEQHIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b10111110, OOL, IOL, "ceqhi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpEqualHalfwordImm
+{
+ def v8i16 : CEQHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (seteq (v8i16 VECREG:$rA),
+ (v8i16 v8i16SExt10Imm:$val)))]>;
+ def r16 : CEQHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ [(set R16C:$rT, (seteq R16C:$rA, i16ImmSExt10:$val))]>;
+}
+
+class CEQInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00000011110, OOL, IOL, "ceq\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpEqualWord
+{
+ def v4i32 : CEQInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4i32 VECREG:$rT),
+ (seteq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+ def r32 : CEQInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [(set R32C:$rT, (seteq R32C:$rA, R32C:$rB))]>;
+}
+
+class CEQIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b00111110, OOL, IOL, "ceqi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpEqualWordImm
+{
+ def v4i32 : CEQIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (seteq (v4i32 VECREG:$rA),
+ (v4i32 v4i32SExt16Imm:$val)))]>;
+
+ def r32: CEQIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (seteq R32C:$rA, i32ImmSExt10:$val))]>;
+}
+
+class CGTBInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00001010010, OOL, IOL, "cgtb\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpGtrByte
+{
+ def v16i8 :
+ CGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v16i8 VECREG:$rT), (setgt (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r8 :
+ CGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
+ [(set R8C:$rT, (setgt R8C:$rA, R8C:$rB))]>;
+}
+
+class CGTBIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b01110010, OOL, IOL, "cgtbi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpGtrByteImm
+{
+ def v16i8 :
+ CGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
+ [(set (v16i8 VECREG:$rT), (setgt (v16i8 VECREG:$rA),
+ v16i8SExt8Imm:$val))]>;
+ def r8:
+ CGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
+ [(set R8C:$rT, (setgt R8C:$rA, immSExt8:$val))]>;
+}
+
+class CGTHInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00010010010, OOL, IOL, "cgth\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpGtrHalfword
+{
+ def v8i16 : CGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v8i16 VECREG:$rT), (setgt (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r16 : CGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R16C:$rT, (setgt R16C:$rA, R16C:$rB))]>;
+}
+
+class CGTHIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b10110010, OOL, IOL, "cgthi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpGtrHalfwordImm
+{
+ def v8i16 : CGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (setgt (v8i16 VECREG:$rA),
+ (v8i16 v8i16SExt10Imm:$val)))]>;
+ def r16 : CGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ [(set R16C:$rT, (setgt R16C:$rA, i16ImmSExt10:$val))]>;
+}
+
+class CGTInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00000010010, OOL, IOL, "cgt\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpGtrWord
+{
+ def v4i32 : CGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4i32 VECREG:$rT),
+ (setgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+ def r32 : CGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [(set R32C:$rT, (setgt R32C:$rA, R32C:$rB))]>;
+}
+
+class CGTIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b00110010, OOL, IOL, "cgti\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpGtrWordImm
+{
+ def v4i32 : CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (setgt (v4i32 VECREG:$rA),
+ (v4i32 v4i32SExt16Imm:$val)))]>;
+
+ def r32: CGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (setgt R32C:$rA, i32ImmSExt10:$val))]>;
+
+ // CGTIv4f32, CGTIf32: These are used in the f32 fdiv instruction sequence:
+ def v4f32: CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (setgt (v4i32 (bitconvert (v4f32 VECREG:$rA))),
+ (v4i32 v4i32SExt16Imm:$val)))]>;
+
+ def f32: CGTIInst<(outs R32C:$rT), (ins R32FP:$rA, s10imm_i32:$val),
+ [/* no pattern */]>;
+}
+
+class CLGTBInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00001011010, OOL, IOL, "clgtb\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrByte
+{
+ def v16i8 :
+ CLGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v16i8 VECREG:$rT), (setugt (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r8 :
+ CLGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
+ [(set R8C:$rT, (setugt R8C:$rA, R8C:$rB))]>;
+}
+
+class CLGTBIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b01111010, OOL, IOL, "clgtbi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrByteImm
+{
+ def v16i8 :
+ CLGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
+ [(set (v16i8 VECREG:$rT), (setugt (v16i8 VECREG:$rA),
+ v16i8SExt8Imm:$val))]>;
+ def r8:
+ CLGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
+ [(set R8C:$rT, (setugt R8C:$rA, immSExt8:$val))]>;
+}
+
+class CLGTHInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00010011010, OOL, IOL, "clgth\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrHalfword
+{
+ def v8i16 : CLGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v8i16 VECREG:$rT), (setugt (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r16 : CLGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R16C:$rT, (setugt R16C:$rA, R16C:$rB))]>;
+}
+
+class CLGTHIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b10111010, OOL, IOL, "clgthi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrHalfwordImm
+{
+ def v8i16 : CLGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (setugt (v8i16 VECREG:$rA),
+ (v8i16 v8i16SExt10Imm:$val)))]>;
+ def r16 : CLGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ [(set R16C:$rT, (setugt R16C:$rA, i16ImmSExt10:$val))]>;
+}
+
+class CLGTInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00000011010, OOL, IOL, "clgt\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrWord
+{
+ def v4i32 : CLGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4i32 VECREG:$rT),
+ (setugt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+ def r32 : CLGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [(set R32C:$rT, (setugt R32C:$rA, R32C:$rB))]>;
+}
+
+class CLGTIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b00111010, OOL, IOL, "clgti\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrWordImm
+{
+ def v4i32 : CLGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (setugt (v4i32 VECREG:$rA),
+ (v4i32 v4i32SExt16Imm:$val)))]>;
+
+ def r32: CLGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (setugt R32C:$rA, i32ImmSExt10:$val))]>;
+}
+
+defm CEQB : CmpEqualByte;
+defm CEQBI : CmpEqualByteImm;
+defm CEQH : CmpEqualHalfword;
+defm CEQHI : CmpEqualHalfwordImm;
+defm CEQ : CmpEqualWord;
+defm CEQI : CmpEqualWordImm;
+defm CGTB : CmpGtrByte;
+defm CGTBI : CmpGtrByteImm;
+defm CGTH : CmpGtrHalfword;
+defm CGTHI : CmpGtrHalfwordImm;
+defm CGT : CmpGtrWord;
+defm CGTI : CmpGtrWordImm;
+defm CLGTB : CmpLGtrByte;
+defm CLGTBI : CmpLGtrByteImm;
+defm CLGTH : CmpLGtrHalfword;
+defm CLGTHI : CmpLGtrHalfwordImm;
+defm CLGT : CmpLGtrWord;
+defm CLGTI : CmpLGtrWordImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// For SETCC primitives not supported above (setlt, setle, setge, etc.)
+// define a pattern to generate the right code, as a binary operator
+// (in a manner of speaking.)
+//
+// Notes:
+// 1. This only matches the setcc set of conditionals. Special pattern
+// matching is used for select conditionals.
+//
+// 2. The "DAG" versions of these classes is almost exclusively used for
+// i64 comparisons. See the tblgen fundamentals documentation for what
+// ".ResultInstrs[0]" means; see TargetSelectionDAG.td and the Pattern
+// class for where ResultInstrs originates.
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class SETCCNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ SPUInstr xorinst, SPUInstr cmpare>:
+ Pat<(cond rclass:$rA, rclass:$rB),
+ (xorinst (cmpare rclass:$rA, rclass:$rB), (inttype -1))>;
+
+class SETCCNegCondImm<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ PatLeaf immpred, SPUInstr xorinst, SPUInstr cmpare>:
+ Pat<(cond rclass:$rA, (inttype immpred:$imm)),
+ (xorinst (cmpare rclass:$rA, (inttype immpred:$imm)), (inttype -1))>;
+
+def : SETCCNegCondReg<setne, R8C, i8, XORBIr8, CEQBr8>;
+def : SETCCNegCondImm<setne, R8C, i8, immSExt8, XORBIr8, CEQBIr8>;
+
+def : SETCCNegCondReg<setne, R16C, i16, XORHIr16, CEQHr16>;
+def : SETCCNegCondImm<setne, R16C, i16, i16ImmSExt10, XORHIr16, CEQHIr16>;
+
+def : SETCCNegCondReg<setne, R32C, i32, XORIr32, CEQr32>;
+def : SETCCNegCondImm<setne, R32C, i32, i32ImmSExt10, XORIr32, CEQIr32>;
+
+class SETCCBinOpReg<PatFrag cond, RegisterClass rclass,
+ SPUInstr binop, SPUInstr cmpOp1, SPUInstr cmpOp2>:
+ Pat<(cond rclass:$rA, rclass:$rB),
+ (binop (cmpOp1 rclass:$rA, rclass:$rB),
+ (cmpOp2 rclass:$rA, rclass:$rB))>;
+
+class SETCCBinOpImm<PatFrag cond, RegisterClass rclass, PatLeaf immpred,
+ ValueType immtype,
+ SPUInstr binop, SPUInstr cmpOp1, SPUInstr cmpOp2>:
+ Pat<(cond rclass:$rA, (immtype immpred:$imm)),
+ (binop (cmpOp1 rclass:$rA, (immtype immpred:$imm)),
+ (cmpOp2 rclass:$rA, (immtype immpred:$imm)))>;
+
+def : SETCCBinOpReg<setge, R8C, ORr8, CGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setge, R8C, immSExt8, i8, ORr8, CGTBIr8, CEQBIr8>;
+def : SETCCBinOpReg<setlt, R8C, NORr8, CGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setlt, R8C, immSExt8, i8, NORr8, CGTBIr8, CEQBIr8>;
+def : Pat<(setle R8C:$rA, R8C:$rB),
+ (XORBIr8 (CGTBr8 R8C:$rA, R8C:$rB), 0xff)>;
+def : Pat<(setle R8C:$rA, immU8:$imm),
+ (XORBIr8 (CGTBIr8 R8C:$rA, immU8:$imm), 0xff)>;
+
+def : SETCCBinOpReg<setge, R16C, ORr16, CGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setge, R16C, i16ImmSExt10, i16,
+ ORr16, CGTHIr16, CEQHIr16>;
+def : SETCCBinOpReg<setlt, R16C, NORr16, CGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setlt, R16C, i16ImmSExt10, i16, NORr16, CGTHIr16, CEQHIr16>;
+def : Pat<(setle R16C:$rA, R16C:$rB),
+ (XORHIr16 (CGTHr16 R16C:$rA, R16C:$rB), 0xffff)>;
+def : Pat<(setle R16C:$rA, i16ImmSExt10:$imm),
+ (XORHIr16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>;
+
+def : SETCCBinOpReg<setge, R32C, ORr32, CGTr32, CEQr32>;
+def : SETCCBinOpImm<setge, R32C, i32ImmSExt10, i32,
+ ORr32, CGTIr32, CEQIr32>;
+def : SETCCBinOpReg<setlt, R32C, NORr32, CGTr32, CEQr32>;
+def : SETCCBinOpImm<setlt, R32C, i32ImmSExt10, i32, NORr32, CGTIr32, CEQIr32>;
+def : Pat<(setle R32C:$rA, R32C:$rB),
+ (XORIr32 (CGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>;
+def : Pat<(setle R32C:$rA, i32ImmSExt10:$imm),
+ (XORIr32 (CGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>;
+
+def : SETCCBinOpReg<setuge, R8C, ORr8, CLGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setuge, R8C, immSExt8, i8, ORr8, CLGTBIr8, CEQBIr8>;
+def : SETCCBinOpReg<setult, R8C, NORr8, CLGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setult, R8C, immSExt8, i8, NORr8, CLGTBIr8, CEQBIr8>;
+def : Pat<(setule R8C:$rA, R8C:$rB),
+ (XORBIr8 (CLGTBr8 R8C:$rA, R8C:$rB), 0xff)>;
+def : Pat<(setule R8C:$rA, immU8:$imm),
+ (XORBIr8 (CLGTBIr8 R8C:$rA, immU8:$imm), 0xff)>;
+
+def : SETCCBinOpReg<setuge, R16C, ORr16, CLGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setuge, R16C, i16ImmSExt10, i16,
+ ORr16, CLGTHIr16, CEQHIr16>;
+def : SETCCBinOpReg<setult, R16C, NORr16, CLGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setult, R16C, i16ImmSExt10, i16, NORr16,
+ CLGTHIr16, CEQHIr16>;
+def : Pat<(setule R16C:$rA, R16C:$rB),
+ (XORHIr16 (CLGTHr16 R16C:$rA, R16C:$rB), 0xffff)>;
+def : Pat<(setule R16C:$rA, i16ImmSExt10:$imm),
+ (XORHIr16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>;
+
+def : SETCCBinOpReg<setuge, R32C, ORr32, CLGTr32, CEQr32>;
+def : SETCCBinOpImm<setuge, R32C, i32ImmSExt10, i32,
+ ORr32, CLGTIr32, CEQIr32>;
+def : SETCCBinOpReg<setult, R32C, NORr32, CLGTr32, CEQr32>;
+def : SETCCBinOpImm<setult, R32C, i32ImmSExt10, i32, NORr32, CLGTIr32, CEQIr32>;
+def : Pat<(setule R32C:$rA, R32C:$rB),
+ (XORIr32 (CLGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>;
+def : Pat<(setule R32C:$rA, i32ImmSExt10:$imm),
+ (XORIr32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// select conditional patterns:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class SELECTNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ SPUInstr selinstr, SPUInstr cmpare>:
+ Pat<(select (inttype (cond rclass:$rA, rclass:$rB)),
+ rclass:$rTrue, rclass:$rFalse),
+ (selinstr rclass:$rTrue, rclass:$rFalse,
+ (cmpare rclass:$rA, rclass:$rB))>;
+
+class SELECTNegCondImm<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ PatLeaf immpred, SPUInstr selinstr, SPUInstr cmpare>:
+ Pat<(select (inttype (cond rclass:$rA, immpred:$imm)),
+ rclass:$rTrue, rclass:$rFalse),
+ (selinstr rclass:$rTrue, rclass:$rFalse,
+ (cmpare rclass:$rA, immpred:$imm))>;
+
+def : SELECTNegCondReg<setne, R8C, i8, SELBr8, CEQBr8>;
+def : SELECTNegCondImm<setne, R8C, i8, immSExt8, SELBr8, CEQBIr8>;
+def : SELECTNegCondReg<setle, R8C, i8, SELBr8, CGTBr8>;
+def : SELECTNegCondImm<setle, R8C, i8, immSExt8, SELBr8, CGTBr8>;
+def : SELECTNegCondReg<setule, R8C, i8, SELBr8, CLGTBr8>;
+def : SELECTNegCondImm<setule, R8C, i8, immU8, SELBr8, CLGTBIr8>;
+
+def : SELECTNegCondReg<setne, R16C, i16, SELBr16, CEQHr16>;
+def : SELECTNegCondImm<setne, R16C, i16, i16ImmSExt10, SELBr16, CEQHIr16>;
+def : SELECTNegCondReg<setle, R16C, i16, SELBr16, CGTHr16>;
+def : SELECTNegCondImm<setle, R16C, i16, i16ImmSExt10, SELBr16, CGTHIr16>;
+def : SELECTNegCondReg<setule, R16C, i16, SELBr16, CLGTHr16>;
+def : SELECTNegCondImm<setule, R16C, i16, i16ImmSExt10, SELBr16, CLGTHIr16>;
+
+def : SELECTNegCondReg<setne, R32C, i32, SELBr32, CEQr32>;
+def : SELECTNegCondImm<setne, R32C, i32, i32ImmSExt10, SELBr32, CEQIr32>;
+def : SELECTNegCondReg<setle, R32C, i32, SELBr32, CGTr32>;
+def : SELECTNegCondImm<setle, R32C, i32, i32ImmSExt10, SELBr32, CGTIr32>;
+def : SELECTNegCondReg<setule, R32C, i32, SELBr32, CLGTr32>;
+def : SELECTNegCondImm<setule, R32C, i32, i32ImmSExt10, SELBr32, CLGTIr32>;
+
+class SELECTBinOpReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1,
+ SPUInstr cmpOp2>:
+ Pat<(select (inttype (cond rclass:$rA, rclass:$rB)),
+ rclass:$rTrue, rclass:$rFalse),
+ (selinstr rclass:$rFalse, rclass:$rTrue,
+ (binop (cmpOp1 rclass:$rA, rclass:$rB),
+ (cmpOp2 rclass:$rA, rclass:$rB)))>;
+
+class SELECTBinOpImm<PatFrag cond, RegisterClass rclass, PatLeaf immpred,
+ ValueType inttype,
+ SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1,
+ SPUInstr cmpOp2>:
+ Pat<(select (inttype (cond rclass:$rA, (inttype immpred:$imm))),
+ rclass:$rTrue, rclass:$rFalse),
+ (selinstr rclass:$rFalse, rclass:$rTrue,
+ (binop (cmpOp1 rclass:$rA, (inttype immpred:$imm)),
+ (cmpOp2 rclass:$rA, (inttype immpred:$imm))))>;
+
+def : SELECTBinOpReg<setge, R8C, i8, SELBr8, ORr8, CGTBr8, CEQBr8>;
+def : SELECTBinOpImm<setge, R8C, immSExt8, i8,
+ SELBr8, ORr8, CGTBIr8, CEQBIr8>;
+
+def : SELECTBinOpReg<setge, R16C, i16, SELBr16, ORr16, CGTHr16, CEQHr16>;
+def : SELECTBinOpImm<setge, R16C, i16ImmSExt10, i16,
+ SELBr16, ORr16, CGTHIr16, CEQHIr16>;
+
+def : SELECTBinOpReg<setge, R32C, i32, SELBr32, ORr32, CGTr32, CEQr32>;
+def : SELECTBinOpImm<setge, R32C, i32ImmSExt10, i32,
+ SELBr32, ORr32, CGTIr32, CEQIr32>;
+
+def : SELECTBinOpReg<setuge, R8C, i8, SELBr8, ORr8, CLGTBr8, CEQBr8>;
+def : SELECTBinOpImm<setuge, R8C, immSExt8, i8,
+ SELBr8, ORr8, CLGTBIr8, CEQBIr8>;
+
+def : SELECTBinOpReg<setuge, R16C, i16, SELBr16, ORr16, CLGTHr16, CEQHr16>;
+def : SELECTBinOpImm<setuge, R16C, i16ImmUns10, i16,
+ SELBr16, ORr16, CLGTHIr16, CEQHIr16>;
+
+def : SELECTBinOpReg<setuge, R32C, i32, SELBr32, ORr32, CLGTr32, CEQr32>;
+def : SELECTBinOpImm<setuge, R32C, i32ImmUns10, i32,
+ SELBr32, ORr32, CLGTIr32, CEQIr32>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+let isCall = 1,
+ // All calls clobber the non-callee-saved registers:
+ Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9,
+ R10,R11,R12,R13,R14,R15,R16,R17,R18,R19,
+ R20,R21,R22,R23,R24,R25,R26,R27,R28,R29,
+ R30,R31,R32,R33,R34,R35,R36,R37,R38,R39,
+ R40,R41,R42,R43,R44,R45,R46,R47,R48,R49,
+ R50,R51,R52,R53,R54,R55,R56,R57,R58,R59,
+ R60,R61,R62,R63,R64,R65,R66,R67,R68,R69,
+ R70,R71,R72,R73,R74,R75,R76,R77,R78,R79],
+ // All of these instructions use $lr (aka $0)
+ Uses = [R0] in {
+ // Branch relative and set link: Used if we actually know that the target
+ // is within [-32768, 32767] bytes of the target
+ def BRSL:
+ BranchSetLink<0b011001100, (outs), (ins relcalltarget:$func, variable_ops),
+ "brsl\t$$lr, $func",
+ [(SPUcall (SPUpcrel tglobaladdr:$func, 0))]>;
+
+ // Branch absolute and set link: Used if we actually know that the target
+ // is an absolute address
+ def BRASL:
+ BranchSetLink<0b011001100, (outs), (ins calltarget:$func, variable_ops),
+ "brasl\t$$lr, $func",
+ [(SPUcall (SPUaform tglobaladdr:$func, 0))]>;
+
+ // Branch indirect and set link if external data. These instructions are not
+ // actually generated, matched by an intrinsic:
+ def BISLED_00: BISLEDForm<0b11, "bisled\t$$lr, $func", [/* empty pattern */]>;
+ def BISLED_E0: BISLEDForm<0b10, "bisled\t$$lr, $func", [/* empty pattern */]>;
+ def BISLED_0D: BISLEDForm<0b01, "bisled\t$$lr, $func", [/* empty pattern */]>;
+ def BISLED_ED: BISLEDForm<0b00, "bisled\t$$lr, $func", [/* empty pattern */]>;
+
+ // Branch indirect and set link. This is the "X-form" address version of a
+ // function call
+ def BISL:
+ BIForm<0b10010101100, "bisl\t$$lr, $func", [(SPUcall R32C:$func)]>;
+}
+
+// Support calls to external symbols:
+def : Pat<(SPUcall (SPUpcrel texternalsym:$func, 0)),
+ (BRSL texternalsym:$func)>;
+
+def : Pat<(SPUcall (SPUaform texternalsym:$func, 0)),
+ (BRASL texternalsym:$func)>;
+
+// Unconditional branches:
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
+ def BR :
+ UncondBranch<0b001001100, (outs), (ins brtarget:$dest),
+ "br\t$dest",
+ [(br bb:$dest)]>;
+
+ // Unconditional, absolute address branch
+ def BRA:
+ UncondBranch<0b001100000, (outs), (ins brtarget:$dest),
+ "bra\t$dest",
+ [/* no pattern */]>;
+
+ // Indirect branch
+ def BI:
+ BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>;
+
+ // Conditional branches:
+ class BRNZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b010000100, (outs), IOL, "brnz\t$rCond,$dest",
+ BranchResolv, pattern>;
+
+ class BRNZRegInst<RegisterClass rclass>:
+ BRNZInst<(ins rclass:$rCond, brtarget:$dest),
+ [(brcond rclass:$rCond, bb:$dest)]>;
+
+ class BRNZVecInst<ValueType vectype>:
+ BRNZInst<(ins VECREG:$rCond, brtarget:$dest),
+ [(brcond (vectype VECREG:$rCond), bb:$dest)]>;
+
+ multiclass BranchNotZero {
+ def v4i32 : BRNZVecInst<v4i32>;
+ def r32 : BRNZRegInst<R32C>;
+ }
+
+ defm BRNZ : BranchNotZero;
+
+ class BRZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b000000100, (outs), IOL, "brz\t$rT,$dest",
+ BranchResolv, pattern>;
+
+ class BRZRegInst<RegisterClass rclass>:
+ BRZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ class BRZVecInst<ValueType vectype>:
+ BRZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ multiclass BranchZero {
+ def v4i32: BRZVecInst<v4i32>;
+ def r32: BRZRegInst<R32C>;
+ }
+
+ defm BRZ: BranchZero;
+
+ // Note: LLVM doesn't do branch conditional, indirect. Otherwise these would
+ // be useful:
+ /*
+ class BINZInst<dag IOL, list<dag> pattern>:
+ BICondForm<0b10010100100, (outs), IOL, "binz\t$rA, $dest", pattern>;
+
+ class BINZRegInst<RegisterClass rclass>:
+ BINZInst<(ins rclass:$rA, brtarget:$dest),
+ [(brcond rclass:$rA, R32C:$dest)]>;
+
+ class BINZVecInst<ValueType vectype>:
+ BINZInst<(ins VECREG:$rA, R32C:$dest),
+ [(brcond (vectype VECREG:$rA), R32C:$dest)]>;
+
+ multiclass BranchNotZeroIndirect {
+ def v4i32: BINZVecInst<v4i32>;
+ def r32: BINZRegInst<R32C>;
+ }
+
+ defm BINZ: BranchNotZeroIndirect;
+
+ class BIZInst<dag IOL, list<dag> pattern>:
+ BICondForm<0b00010100100, (outs), IOL, "biz\t$rA, $func", pattern>;
+
+ class BIZRegInst<RegisterClass rclass>:
+ BIZInst<(ins rclass:$rA, R32C:$func), [/* no pattern */]>;
+
+ class BIZVecInst<ValueType vectype>:
+ BIZInst<(ins VECREG:$rA, R32C:$func), [/* no pattern */]>;
+
+ multiclass BranchZeroIndirect {
+ def v4i32: BIZVecInst<v4i32>;
+ def r32: BIZRegInst<R32C>;
+ }
+
+ defm BIZ: BranchZeroIndirect;
+ */
+
+ class BRHNZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b011000100, (outs), IOL, "brhnz\t$rCond,$dest", BranchResolv,
+ pattern>;
+
+ class BRHNZRegInst<RegisterClass rclass>:
+ BRHNZInst<(ins rclass:$rCond, brtarget:$dest),
+ [(brcond rclass:$rCond, bb:$dest)]>;
+
+ class BRHNZVecInst<ValueType vectype>:
+ BRHNZInst<(ins VECREG:$rCond, brtarget:$dest), [/* no pattern */]>;
+
+ multiclass BranchNotZeroHalfword {
+ def v8i16: BRHNZVecInst<v8i16>;
+ def r16: BRHNZRegInst<R16C>;
+ }
+
+ defm BRHNZ: BranchNotZeroHalfword;
+
+ class BRHZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b001000100, (outs), IOL, "brhz\t$rT,$dest", BranchResolv,
+ pattern>;
+
+ class BRHZRegInst<RegisterClass rclass>:
+ BRHZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ class BRHZVecInst<ValueType vectype>:
+ BRHZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ multiclass BranchZeroHalfword {
+ def v8i16: BRHZVecInst<v8i16>;
+ def r16: BRHZRegInst<R16C>;
+ }
+
+ defm BRHZ: BranchZeroHalfword;
+}
+
+//===----------------------------------------------------------------------===//
+// setcc and brcond patterns:
+//===----------------------------------------------------------------------===//
+
+def : Pat<(brcond (i16 (seteq R16C:$rA, 0)), bb:$dest),
+ (BRHZr16 R16C:$rA, bb:$dest)>;
+def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest),
+ (BRHNZr16 R16C:$rA, bb:$dest)>;
+
+def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest),
+ (BRZr32 R32C:$rA, bb:$dest)>;
+def : Pat<(brcond (i32 (setne R32C:$rA, 0)), bb:$dest),
+ (BRNZr32 R32C:$rA, bb:$dest)>;
+
+multiclass BranchCondEQ<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
+{
+ def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (CEQHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
+
+ def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (CEQHr16 R16C:$rA, R16:$rB), bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (CEQIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (CEQr32 R32C:$rA, R32C:$rB), bb:$dest)>;
+}
+
+defm BRCONDeq : BranchCondEQ<seteq, BRHNZr16, BRNZr32>;
+defm BRCONDne : BranchCondEQ<setne, BRHZr16, BRZr32>;
+
+multiclass BranchCondLGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
+{
+ def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
+
+ def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (CLGTHr16 R16C:$rA, R16:$rB), bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (CLGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
+}
+
+defm BRCONDugt : BranchCondLGT<setugt, BRHNZr16, BRNZr32>;
+defm BRCONDule : BranchCondLGT<setule, BRHZr16, BRZr32>;
+
+multiclass BranchCondLGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
+ SPUInstr orinst32, SPUInstr brinst32>
+{
+ def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (orinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val),
+ (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)),
+ bb:$dest)>;
+
+ def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (orinst16 (CLGTHr16 R16C:$rA, R16:$rB),
+ (CEQHr16 R16C:$rA, R16:$rB)),
+ bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (orinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val),
+ (CEQIr32 R32C:$rA, i32ImmSExt10:$val)),
+ bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (orinst32 (CLGTr32 R32C:$rA, R32C:$rB),
+ (CEQr32 R32C:$rA, R32C:$rB)),
+ bb:$dest)>;
+}
+
+defm BRCONDuge : BranchCondLGTEQ<setuge, ORr16, BRHNZr16, ORr32, BRNZr32>;
+defm BRCONDult : BranchCondLGTEQ<setult, ORr16, BRHZr16, ORr32, BRZr32>;
+
+multiclass BranchCondGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
+{
+ def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
+
+ def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (CGTHr16 R16C:$rA, R16:$rB), bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (CGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
+}
+
+defm BRCONDgt : BranchCondGT<setgt, BRHNZr16, BRNZr32>;
+defm BRCONDle : BranchCondGT<setle, BRHZr16, BRZr32>;
+
+multiclass BranchCondGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
+ SPUInstr orinst32, SPUInstr brinst32>
+{
+ def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (orinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val),
+ (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)),
+ bb:$dest)>;
+
+ def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (orinst16 (CGTHr16 R16C:$rA, R16:$rB),
+ (CEQHr16 R16C:$rA, R16:$rB)),
+ bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (orinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val),
+ (CEQIr32 R32C:$rA, i32ImmSExt10:$val)),
+ bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (orinst32 (CGTr32 R32C:$rA, R32C:$rB),
+ (CEQr32 R32C:$rA, R32C:$rB)),
+ bb:$dest)>;
+}
+
+defm BRCONDge : BranchCondGTEQ<setge, ORr16, BRHNZr16, ORr32, BRNZr32>;
+defm BRCONDlt : BranchCondGTEQ<setlt, ORr16, BRHZr16, ORr32, BRZr32>;
+
+let isTerminator = 1, isBarrier = 1 in {
+ let isReturn = 1 in {
+ def RET:
+ RETForm<"bi\t$$lr", [(retflag)]>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Single precision floating point instructions
+//===----------------------------------------------------------------------===//
+
+class FAInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01011000100, OOL, IOL, "fa\t$rT, $rA, $rB",
+ SPrecFP, pattern>;
+
+class FAVecInst<ValueType vectype>:
+ FAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+
+multiclass SFPAdd
+{
+ def v4f32: FAVecInst<v4f32>;
+ def f32: FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>;
+}
+
+defm FA : SFPAdd;
+
+class FSInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01011000100, OOL, IOL, "fs\t$rT, $rA, $rB",
+ SPrecFP, pattern>;
+
+class FSVecInst<ValueType vectype>:
+ FSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (fsub (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+
+multiclass SFPSub
+{
+ def v4f32: FSVecInst<v4f32>;
+ def f32: FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>;
+}
+
+defm FS : SFPSub;
+
+// Floating point reciprocal estimate
+
+class FRESTInst<dag OOL, dag IOL>:
+ RRForm_1<0b00110111000, OOL, IOL,
+ "frest\t$rT, $rA", SPrecFP,
+ [/* no pattern */]>;
+
+def FRESTv4f32 :
+ FRESTInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
+
+def FRESTf32 :
+ FRESTInst<(outs R32FP:$rT), (ins R32FP:$rA)>;
+
+// Floating point interpolate (used in conjunction with reciprocal estimate)
+def FIv4f32 :
+ RRForm<0b00101011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fi\t$rT, $rA, $rB", SPrecFP,
+ [/* no pattern */]>;
+
+def FIf32 :
+ RRForm<0b00101011110, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fi\t$rT, $rA, $rB", SPrecFP,
+ [/* no pattern */]>;
+
+//--------------------------------------------------------------------------
+// Basic single precision floating point comparisons:
+//
+// Note: There is no support on SPU for single precision NaN. Consequently,
+// ordered and unordered comparisons are the same.
+//--------------------------------------------------------------------------
+
+def FCEQf32 :
+ RRForm<0b01000011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fceq\t$rT, $rA, $rB", SPrecFP,
+ [(set R32C:$rT, (setueq R32FP:$rA, R32FP:$rB))]>;
+
+def : Pat<(setoeq R32FP:$rA, R32FP:$rB),
+ (FCEQf32 R32FP:$rA, R32FP:$rB)>;
+
+def FCMEQf32 :
+ RRForm<0b01010011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fcmeq\t$rT, $rA, $rB", SPrecFP,
+ [(set R32C:$rT, (setueq (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
+
+def : Pat<(setoeq (fabs R32FP:$rA), (fabs R32FP:$rB)),
+ (FCMEQf32 R32FP:$rA, R32FP:$rB)>;
+
+def FCGTf32 :
+ RRForm<0b01000011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fcgt\t$rT, $rA, $rB", SPrecFP,
+ [(set R32C:$rT, (setugt R32FP:$rA, R32FP:$rB))]>;
+
+def : Pat<(setugt R32FP:$rA, R32FP:$rB),
+ (FCGTf32 R32FP:$rA, R32FP:$rB)>;
+
+def FCMGTf32 :
+ RRForm<0b01010011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fcmgt\t$rT, $rA, $rB", SPrecFP,
+ [(set R32C:$rT, (setugt (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
+
+def : Pat<(setugt (fabs R32FP:$rA), (fabs R32FP:$rB)),
+ (FCMGTf32 R32FP:$rA, R32FP:$rB)>;
+
+//--------------------------------------------------------------------------
+// Single precision floating point comparisons and SETCC equivalents:
+//--------------------------------------------------------------------------
+
+def : SETCCNegCondReg<setune, R32FP, i32, XORIr32, FCEQf32>;
+def : SETCCNegCondReg<setone, R32FP, i32, XORIr32, FCEQf32>;
+
+def : SETCCBinOpReg<setuge, R32FP, ORr32, FCGTf32, FCEQf32>;
+def : SETCCBinOpReg<setoge, R32FP, ORr32, FCGTf32, FCEQf32>;
+
+def : SETCCBinOpReg<setult, R32FP, NORr32, FCGTf32, FCEQf32>;
+def : SETCCBinOpReg<setolt, R32FP, NORr32, FCGTf32, FCEQf32>;
+
+def : Pat<(setule R32FP:$rA, R32FP:$rB),
+ (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>;
+def : Pat<(setole R32FP:$rA, R32FP:$rB),
+ (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>;
+
+// FP Status and Control Register Write
+// Why isn't rT a don't care in the ISA?
+// Should we create a special RRForm_3 for this guy and zero out the rT?
+def FSCRWf32 :
+ RRForm_1<0b01011101110, (outs R32FP:$rT), (ins R32FP:$rA),
+ "fscrwr\t$rA", SPrecFP,
+ [/* This instruction requires an intrinsic. Note: rT is unused. */]>;
+
+// FP Status and Control Register Read
+def FSCRRf32 :
+ RRForm_2<0b01011101110, (outs R32FP:$rT), (ins),
+ "fscrrd\t$rT", SPrecFP,
+ [/* This instruction requires an intrinsic */]>;
+
+// llvm instruction space
+// How do these map onto cell instructions?
+// fdiv rA rB
+// frest rC rB # c = 1/b (both lines)
+// fi rC rB rC
+// fm rD rA rC # d = a * 1/b
+// fnms rB rD rB rA # b = - (d * b - a) --should == 0 in a perfect world
+// fma rB rB rC rD # b = b * c + d
+// = -(d *b -a) * c + d
+// = a * c - c ( a *b *c - a)
+
+// fcopysign (???)
+
+// Library calls:
+// These llvm instructions will actually map to library calls.
+// All that's needed, then, is to check that the appropriate library is
+// imported and do a brsl to the proper function name.
+// frem # fmod(x, y): x - (x/y) * y
+// (Note: fmod(double, double), fmodf(float,float)
+// fsqrt?
+// fsin?
+// fcos?
+// Unimplemented SPU instruction space
+// floating reciprocal absolute square root estimate (frsqest)
+
+// The following are probably just intrinsics
+// status and control register write
+// status and control register read
+
+//--------------------------------------
+// Floating point multiply instructions
+//--------------------------------------
+
+def FMv4f32:
+ RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fm\t$rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (fmul (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def FMf32 :
+ RRForm<0b01100011010, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fm\t$rT, $rA, $rB", SPrecFP,
+ [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>;
+
+// Floating point multiply and add
+// e.g. d = c + (a * b)
+def FMAv4f32:
+ RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fadd (v4f32 VECREG:$rC),
+ (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>;
+
+def FMAf32:
+ RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+// FP multiply and subtract
+// Subtracts value in rC from product
+// res = a * b - c
+def FMSv4f32 :
+ RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
+ (v4f32 VECREG:$rC)))]>;
+
+def FMSf32 :
+ RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT,
+ (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>;
+
+// Floating Negative Mulitply and Subtract
+// Subtracts product from value in rC
+// res = fneg(fms a b c)
+// = - (a * b - c)
+// = c - a * b
+// NOTE: subtraction order
+// fsub a b = a - b
+// fs a b = b - a?
+def FNMSf32 :
+ RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+def FNMSv4f32 :
+ RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fsub (v4f32 VECREG:$rC),
+ (fmul (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB))))]>;
+
+//--------------------------------------
+// Floating Point Conversions
+// Signed conversions:
+def CSiFv4f32:
+ CVTIntFPForm<0b0101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "csflt\t$rT, $rA, 0", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (sint_to_fp (v4i32 VECREG:$rA)))]>;
+
+// Convert signed integer to floating point
+def CSiFf32 :
+ CVTIntFPForm<0b0101101110, (outs R32FP:$rT), (ins R32C:$rA),
+ "csflt\t$rT, $rA, 0", SPrecFP,
+ [(set R32FP:$rT, (sint_to_fp R32C:$rA))]>;
+
+// Convert unsigned into to float
+def CUiFv4f32 :
+ CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cuflt\t$rT, $rA, 0", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (uint_to_fp (v4i32 VECREG:$rA)))]>;
+
+def CUiFf32 :
+ CVTIntFPForm<0b1101101110, (outs R32FP:$rT), (ins R32C:$rA),
+ "cuflt\t$rT, $rA, 0", SPrecFP,
+ [(set R32FP:$rT, (uint_to_fp R32C:$rA))]>;
+
+// Convert float to unsigned int
+// Assume that scale = 0
+
+def CFUiv4f32 :
+ CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cfltu\t$rT, $rA, 0", SPrecFP,
+ [(set (v4i32 VECREG:$rT), (fp_to_uint (v4f32 VECREG:$rA)))]>;
+
+def CFUif32 :
+ CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA),
+ "cfltu\t$rT, $rA, 0", SPrecFP,
+ [(set R32C:$rT, (fp_to_uint R32FP:$rA))]>;
+
+// Convert float to signed int
+// Assume that scale = 0
+
+def CFSiv4f32 :
+ CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cflts\t$rT, $rA, 0", SPrecFP,
+ [(set (v4i32 VECREG:$rT), (fp_to_sint (v4f32 VECREG:$rA)))]>;
+
+def CFSif32 :
+ CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA),
+ "cflts\t$rT, $rA, 0", SPrecFP,
+ [(set R32C:$rT, (fp_to_sint R32FP:$rA))]>;
+
+//===----------------------------------------------------------------------==//
+// Single<->Double precision conversions
+//===----------------------------------------------------------------------==//
+
+// NOTE: We use "vec" name suffix here to avoid confusion (e.g. input is a
+// v4f32, output is v2f64--which goes in the name?)
+
+// Floating point extend single to double
+// NOTE: Not sure if passing in v4f32 to FESDvec is correct since it
+// operates on two double-word slots (i.e. 1st and 3rd fp numbers
+// are ignored).
+def FESDvec :
+ RRForm_1<0b00011101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "fesd\t$rT, $rA", SPrecFP,
+ [(set (v2f64 VECREG:$rT), (fextend (v4f32 VECREG:$rA)))]>;
+
+def FESDf32 :
+ RRForm_1<0b00011101110, (outs R64FP:$rT), (ins R32FP:$rA),
+ "fesd\t$rT, $rA", SPrecFP,
+ [(set R64FP:$rT, (fextend R32FP:$rA))]>;
+
+// Floating point round double to single
+//def FRDSvec :
+// RRForm_1<0b10011101110, (outs VECREG:$rT), (ins VECREG:$rA),
+// "frds\t$rT, $rA,", SPrecFP,
+// [(set (v4f32 R32FP:$rT), (fround (v2f64 R64FP:$rA)))]>;
+
+def FRDSf64 :
+ RRForm_1<0b10011101110, (outs R32FP:$rT), (ins R64FP:$rA),
+ "frds\t$rT, $rA", SPrecFP,
+ [(set R32FP:$rT, (fround R64FP:$rA))]>;
+
+//ToDo include anyextend?
+
+//===----------------------------------------------------------------------==//
+// Double precision floating point instructions
+//===----------------------------------------------------------------------==//
+def FAf64 :
+ RRForm<0b00110011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+ "dfa\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fadd R64FP:$rA, R64FP:$rB))]>;
+
+def FAv2f64 :
+ RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfa\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (fadd (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
+
+def FSf64 :
+ RRForm<0b10100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+ "dfs\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fsub R64FP:$rA, R64FP:$rB))]>;
+
+def FSv2f64 :
+ RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfs\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fsub (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
+
+def FMf64 :
+ RRForm<0b01100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+ "dfm\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fmul R64FP:$rA, R64FP:$rB))]>;
+
+def FMv2f64:
+ RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfm\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
+
+def FMAf64:
+ RRForm<0b00111010110, (outs R64FP:$rT),
+ (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+ "dfma\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB)))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+def FMAv2f64:
+ RRForm<0b00111010110, (outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "dfma\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fadd (v2f64 VECREG:$rC),
+ (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB))))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+def FMSf64 :
+ RRForm<0b10111010110, (outs R64FP:$rT),
+ (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+ "dfms\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+def FMSv2f64 :
+ RRForm<0b10111010110, (outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "dfms\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fsub (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)),
+ (v2f64 VECREG:$rC)))]>;
+
+// DFNMS: - (a * b - c)
+// - (a * b) + c => c - (a * b)
+
+class DFNMSInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01111010110, OOL, IOL, "dfnms\t$rT, $rA, $rB",
+ DPrecFP, pattern>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+class DFNMSVecInst<list<dag> pattern>:
+ DFNMSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ pattern>;
+
+class DFNMSRegInst<list<dag> pattern>:
+ DFNMSInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+ pattern>;
+
+multiclass DFMultiplySubtract
+{
+ def v2f64 : DFNMSVecInst<[(set (v2f64 VECREG:$rT),
+ (fsub (v2f64 VECREG:$rC),
+ (fmul (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB))))]>;
+
+ def f64 : DFNMSRegInst<[(set R64FP:$rT,
+ (fsub R64FP:$rC,
+ (fmul R64FP:$rA, R64FP:$rB)))]>;
+}
+
+defm DFNMS : DFMultiplySubtract;
+
+// - (a * b + c)
+// - (a * b) - c
+def FNMAf64 :
+ RRForm<0b11111010110, (outs R64FP:$rT),
+ (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+ "dfnma\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fneg (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB))))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+def FNMAv2f64 :
+ RRForm<0b11111010110, (outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "dfnma\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fneg (fadd (v2f64 VECREG:$rC),
+ (fmul (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+//===----------------------------------------------------------------------==//
+// Floating point negation and absolute value
+//===----------------------------------------------------------------------==//
+
+def : Pat<(fneg (v4f32 VECREG:$rA)),
+ (XORfnegvec (v4f32 VECREG:$rA),
+ (v4f32 (ILHUv4i32 0x8000)))>;
+
+def : Pat<(fneg R32FP:$rA),
+ (XORfneg32 R32FP:$rA, (ILHUr32 0x8000))>;
+
+// Floating point absolute value
+// Note: f64 fabs is custom-selected.
+
+def : Pat<(fabs R32FP:$rA),
+ (ANDfabs32 R32FP:$rA, (IOHLr32 (ILHUr32 0x7fff), 0xffff))>;
+
+def : Pat<(fabs (v4f32 VECREG:$rA)),
+ (ANDfabsvec (v4f32 VECREG:$rA),
+ (IOHLv4i32 (ILHUv4i32 0x7fff), 0xffff))>;
+
+//===----------------------------------------------------------------------===//
+// Hint for branch instructions:
+//===----------------------------------------------------------------------===//
+
+/* def HBR : SPUInstr<(outs), (ins), "hbr\t" */
+
+//===----------------------------------------------------------------------===//
+// Execution, Load NOP (execute NOPs belong in even pipeline, load NOPs belong
+// in the odd pipeline)
+//===----------------------------------------------------------------------===//
+
+def ENOP : SPUInstr<(outs), (ins), "enop", ExecNOP> {
+ let Pattern = [];
+
+ let Inst{0-10} = 0b10000000010;
+ let Inst{11-17} = 0;
+ let Inst{18-24} = 0;
+ let Inst{25-31} = 0;
+}
+
+def LNOP : SPUInstr<(outs), (ins), "lnop", LoadNOP> {
+ let Pattern = [];
+
+ let Inst{0-10} = 0b10000000000;
+ let Inst{11-17} = 0;
+ let Inst{18-24} = 0;
+ let Inst{25-31} = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Bit conversions (type conversions between vector/packed types)
+// NOTE: Promotions are handled using the XS* instructions.
+//===----------------------------------------------------------------------===//
+def : Pat<(v16i8 (bitconvert (v8i16 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v2i64 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v2f64 VECREG:$src))), (v16i8 VECREG:$src)>;
+
+def : Pat<(v8i16 (bitconvert (v16i8 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v2i64 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v2f64 VECREG:$src))), (v8i16 VECREG:$src)>;
+
+def : Pat<(v4i32 (bitconvert (v16i8 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v2i64 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v2f64 VECREG:$src))), (v4i32 VECREG:$src)>;
+
+def : Pat<(v2i64 (bitconvert (v16i8 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v4i32 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v2f64 VECREG:$src))), (v2i64 VECREG:$src)>;
+
+def : Pat<(v4f32 (bitconvert (v16i8 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v2i64 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v2f64 VECREG:$src))), (v4f32 VECREG:$src)>;
+
+def : Pat<(v2f64 (bitconvert (v16i8 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v8i16 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v4i32 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v2i64 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v2f64 VECREG:$src))), (v2f64 VECREG:$src)>;
+
+def : Pat<(i128 (bitconvert (v16i8 VECREG:$src))),
+ (ORi128_vec VECREG:$src)>;
+def : Pat<(i128 (bitconvert (v8i16 VECREG:$src))),
+ (ORi128_vec VECREG:$src)>;
+def : Pat<(i128 (bitconvert (v4i32 VECREG:$src))),
+ (ORi128_vec VECREG:$src)>;
+def : Pat<(i128 (bitconvert (v2i64 VECREG:$src))),
+ (ORi128_vec VECREG:$src)>;
+def : Pat<(i128 (bitconvert (v4f32 VECREG:$src))),
+ (ORi128_vec VECREG:$src)>;
+def : Pat<(i128 (bitconvert (v2f64 VECREG:$src))),
+ (ORi128_vec VECREG:$src)>;
+
+def : Pat<(v16i8 (bitconvert (i128 GPRC:$src))),
+ (v16i8 (ORvec_i128 GPRC:$src))>;
+def : Pat<(v8i16 (bitconvert (i128 GPRC:$src))),
+ (v8i16 (ORvec_i128 GPRC:$src))>;
+def : Pat<(v4i32 (bitconvert (i128 GPRC:$src))),
+ (v4i32 (ORvec_i128 GPRC:$src))>;
+def : Pat<(v2i64 (bitconvert (i128 GPRC:$src))),
+ (v2i64 (ORvec_i128 GPRC:$src))>;
+def : Pat<(v4f32 (bitconvert (i128 GPRC:$src))),
+ (v4f32 (ORvec_i128 GPRC:$src))>;
+def : Pat<(v2f64 (bitconvert (i128 GPRC:$src))),
+ (v2f64 (ORvec_i128 GPRC:$src))>;
+
+//===----------------------------------------------------------------------===//
+// Instruction patterns:
+//===----------------------------------------------------------------------===//
+
+// General 32-bit constants:
+def : Pat<(i32 imm:$imm),
+ (IOHLr32 (ILHUr32 (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Single precision float constants:
+def : Pat<(f32 fpimm:$imm),
+ (IOHLf32 (ILHUf32 (HI16_f32 fpimm:$imm)), (LO16_f32 fpimm:$imm))>;
+
+// General constant 32-bit vectors
+def : Pat<(v4i32 v4i32Imm:$imm),
+ (IOHLv4i32 (v4i32 (ILHUv4i32 (HI16_vec v4i32Imm:$imm))),
+ (LO16_vec v4i32Imm:$imm))>;
+
+// 8-bit constants
+def : Pat<(i8 imm:$imm),
+ (ILHr8 imm:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// Call instruction patterns:
+//===----------------------------------------------------------------------===//
+// Return void
+def : Pat<(ret),
+ (RET)>;
+
+//===----------------------------------------------------------------------===//
+// Zero/Any/Sign extensions
+//===----------------------------------------------------------------------===//
+
+// sext 8->32: Sign extend bytes to words
+def : Pat<(sext_inreg R32C:$rSrc, i8),
+ (XSHWr32 (XSBHr32 R32C:$rSrc))>;
+
+def : Pat<(i32 (sext R8C:$rSrc)),
+ (XSHWr16 (XSBHr8 R8C:$rSrc))>;
+
+// sext 8->64: Sign extend bytes to double word
+def : Pat<(sext_inreg R64C:$rSrc, i8),
+ (XSWDr64_inreg (XSHWr64 (XSBHr64 R64C:$rSrc)))>;
+
+def : Pat<(i64 (sext R8C:$rSrc)),
+ (XSWDr64 (XSHWr16 (XSBHr8 R8C:$rSrc)))>;
+
+// zext 8->16: Zero extend bytes to halfwords
+def : Pat<(i16 (zext R8C:$rSrc)),
+ (ANDHIi8i16 R8C:$rSrc, 0xff)>;
+
+// zext 8->32: Zero extend bytes to words
+def : Pat<(i32 (zext R8C:$rSrc)),
+ (ANDIi8i32 R8C:$rSrc, 0xff)>;
+
+// zext 8->64: Zero extend bytes to double words
+def : Pat<(i64 (zext R8C:$rSrc)),
+ (ORi64_v2i64 (SELBv4i32 (ROTQMBYv4i32
+ (ORv4i32_i32 (ANDIi8i32 R8C:$rSrc, 0xff)),
+ 0x4),
+ (ILv4i32 0x0),
+ (FSMBIv4i32 0x0f0f)))>;
+
+// anyext 8->16: Extend 8->16 bits, irrespective of sign, preserves high bits
+def : Pat<(i16 (anyext R8C:$rSrc)),
+ (ORHIi8i16 R8C:$rSrc, 0)>;
+
+// anyext 8->32: Extend 8->32 bits, irrespective of sign, preserves high bits
+def : Pat<(i32 (anyext R8C:$rSrc)),
+ (ORIi8i32 R8C:$rSrc, 0)>;
+
+// sext 16->64: Sign extend halfword to double word
+def : Pat<(sext_inreg R64C:$rSrc, i16),
+ (XSWDr64_inreg (XSHWr64 R64C:$rSrc))>;
+
+def : Pat<(sext R16C:$rSrc),
+ (XSWDr64 (XSHWr16 R16C:$rSrc))>;
+
+// zext 16->32: Zero extend halfwords to words
+def : Pat<(i32 (zext R16C:$rSrc)),
+ (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff))>;
+
+def : Pat<(i32 (zext (and R16C:$rSrc, 0xf))),
+ (ANDIi16i32 R16C:$rSrc, 0xf)>;
+
+def : Pat<(i32 (zext (and R16C:$rSrc, 0xff))),
+ (ANDIi16i32 R16C:$rSrc, 0xff)>;
+
+def : Pat<(i32 (zext (and R16C:$rSrc, 0xfff))),
+ (ANDIi16i32 R16C:$rSrc, 0xfff)>;
+
+// anyext 16->32: Extend 16->32 bits, irrespective of sign
+def : Pat<(i32 (anyext R16C:$rSrc)),
+ (ORIi16i32 R16C:$rSrc, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Truncates:
+// These truncates are for the SPU's supported types (i8, i16, i32). i64 and
+// above are custom lowered.
+//===----------------------------------------------------------------------===//
+
+def : Pat<(i8 (trunc GPRC:$src)),
+ (ORi8_v16i8
+ (SHUFBgprc GPRC:$src, GPRC:$src,
+ (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)))>;
+
+def : Pat<(i8 (trunc R64C:$src)),
+ (ORi8_v16i8
+ (SHUFBv2i64_m32
+ (ORv2i64_i64 R64C:$src),
+ (ORv2i64_i64 R64C:$src),
+ (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)))>;
+
+def : Pat<(i8 (trunc R32C:$src)),
+ (ORi8_v16i8
+ (SHUFBv4i32_m32
+ (ORv4i32_i32 R32C:$src),
+ (ORv4i32_i32 R32C:$src),
+ (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>;
+
+def : Pat<(i8 (trunc R16C:$src)),
+ (ORi8_v16i8
+ (SHUFBv4i32_m32
+ (ORv8i16_i16 R16C:$src),
+ (ORv8i16_i16 R16C:$src),
+ (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>;
+
+def : Pat<(i16 (trunc GPRC:$src)),
+ (ORi16_v8i16
+ (SHUFBgprc GPRC:$src, GPRC:$src,
+ (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)))>;
+
+def : Pat<(i16 (trunc R64C:$src)),
+ (ORi16_v8i16
+ (SHUFBv2i64_m32
+ (ORv2i64_i64 R64C:$src),
+ (ORv2i64_i64 R64C:$src),
+ (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)))>;
+
+def : Pat<(i16 (trunc R32C:$src)),
+ (ORi16_v8i16
+ (SHUFBv4i32_m32
+ (ORv4i32_i32 R32C:$src),
+ (ORv4i32_i32 R32C:$src),
+ (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)))>;
+
+def : Pat<(i32 (trunc GPRC:$src)),
+ (ORi32_v4i32
+ (SHUFBgprc GPRC:$src, GPRC:$src,
+ (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)))>;
+
+def : Pat<(i32 (trunc R64C:$src)),
+ (ORi32_v4i32
+ (SHUFBv2i64_m32
+ (ORv2i64_i64 R64C:$src),
+ (ORv2i64_i64 R64C:$src),
+ (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)))>;
+
+//===----------------------------------------------------------------------===//
+// Address generation: SPU, like PPC, has to split addresses into high and
+// low parts in order to load them into a register.
+//===----------------------------------------------------------------------===//
+
+def : Pat<(SPUaform tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>;
+def : Pat<(SPUaform texternalsym:$in, 0), (ILAlsa texternalsym:$in)>;
+def : Pat<(SPUaform tjumptable:$in, 0), (ILAlsa tjumptable:$in)>;
+def : Pat<(SPUaform tconstpool:$in, 0), (ILAlsa tconstpool:$in)>;
+
+def : Pat<(SPUindirect (SPUhi tglobaladdr:$in, 0),
+ (SPUlo tglobaladdr:$in, 0)),
+ (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
+
+def : Pat<(SPUindirect (SPUhi texternalsym:$in, 0),
+ (SPUlo texternalsym:$in, 0)),
+ (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>;
+
+def : Pat<(SPUindirect (SPUhi tjumptable:$in, 0),
+ (SPUlo tjumptable:$in, 0)),
+ (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
+
+def : Pat<(SPUindirect (SPUhi tconstpool:$in, 0),
+ (SPUlo tconstpool:$in, 0)),
+ (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
+
+def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)),
+ (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
+
+def : Pat<(add (SPUhi texternalsym:$in, 0), (SPUlo texternalsym:$in, 0)),
+ (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>;
+
+def : Pat<(add (SPUhi tjumptable:$in, 0), (SPUlo tjumptable:$in, 0)),
+ (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
+
+def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)),
+ (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
+
+// Intrinsics:
+include "CellSDKIntrinsics.td"
+// Various math operator instruction sequences
+include "SPUMathInstr.td"
+// 64-bit "instructions"/support
+include "SPU64InstrInfo.td"
+// 128-bit "instructions"/support
+include "SPU128InstrInfo.td"
diff --git a/lib/Target/CellSPU/SPUMachineFunction.h b/lib/Target/CellSPU/SPUMachineFunction.h
new file mode 100644
index 0000000..6a66967
--- /dev/null
+++ b/lib/Target/CellSPU/SPUMachineFunction.h
@@ -0,0 +1,43 @@
+//===-- SPUMachineFunctionInfo.h - Private data used for CellSPU --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the IBM Cell SPU specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_MACHINE_FUNCTION_INFO_H
+#define SPU_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// SPUFunctionInfo - Cell SPU target-specific information for each
+/// MachineFunction
+class SPUFunctionInfo : public MachineFunctionInfo {
+private:
+ /// UsesLR - Indicates whether LR is used in the current function.
+ ///
+ bool UsesLR;
+
+public:
+ SPUFunctionInfo(MachineFunction& MF)
+ : UsesLR(false)
+ {}
+
+ void setUsesLR(bool U) { UsesLR = U; }
+ bool usesLR() { return UsesLR; }
+
+};
+
+} // end of namespace llvm
+
+
+#endif
+
diff --git a/lib/Target/CellSPU/SPUMathInstr.td b/lib/Target/CellSPU/SPUMathInstr.td
new file mode 100644
index 0000000..80ebde3
--- /dev/null
+++ b/lib/Target/CellSPU/SPUMathInstr.td
@@ -0,0 +1,97 @@
+//======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======//
+//
+// Cell SPU math operations
+//
+// This target description file contains instruction sequences for various
+// math operations, such as vector multiplies, i32 multiply, etc., for the
+// SPU's i32, i16 i8 and corresponding vector types.
+//
+// Any resemblance to libsimdmath or the Cell SDK simdmath library is
+// purely and completely coincidental.
+//===----------------------------------------------------------------------===//
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v16i8 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
+ (ORv4i32
+ (ANDv4i32
+ (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
+ (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
+ (ROTMAHIv8i16 VECREG:$rB, 8)), 8),
+ (FSMBIv8i16 0x2222)),
+ (ILAv4i32 0x0000ffff)),
+ (SHLIv4i32
+ (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
+ (ROTMAIv4i32_i32 VECREG:$rB, 16)),
+ (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
+ (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
+ (FSMBIv8i16 0x2222)), 16))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v8i16 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+ (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
+ (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
+ (FSMBIv8i16 0xcccc))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v4i32, i32 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def MPYv4i32:
+ Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
+ (Av4i32
+ (Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB),
+ (MPYHv4i32 VECREG:$rB, VECREG:$rA)),
+ (MPYUv4i32 VECREG:$rA, VECREG:$rB))>;
+
+def MPYi32:
+ Pat<(mul R32C:$rA, R32C:$rB),
+ (Ar32
+ (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
+ (MPYHr32 R32C:$rB, R32C:$rA)),
+ (MPYUr32 R32C:$rA, R32C:$rB))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// f32, v4f32 divide instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// Reciprocal estimate and interpolation
+def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
+// Division estimate
+def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
+// Newton-Raphson iteration
+def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
+ Interpf32.Fragment,
+ DivEstf32.Fragment)>;
+// Epsilon addition
+def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
+
+def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
+ (SELBf32_cond NRaphf32.Fragment,
+ Epsilonf32.Fragment,
+ (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
+
+// Reciprocal estimate and interpolation
+def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
+// Division estimate
+def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
+// Newton-Raphson iteration
+def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rA)),
+ Interpv4f32.Fragment,
+ DivEstv4f32.Fragment)>;
+// Epsilon addition
+def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
+
+def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
+ (SELBv4f32_cond NRaphv4f32.Fragment,
+ Epsilonv4f32.Fragment,
+ (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
+ Epsilonv4f32.Fragment,
+ (v4f32 VECREG:$rA)), -1))>;
diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td
new file mode 100644
index 0000000..87c4115
--- /dev/null
+++ b/lib/Target/CellSPU/SPUNodes.td
@@ -0,0 +1,156 @@
+//===- SPUNodes.td - Specialized SelectionDAG nodes used for CellSPU ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Type profiles and SelectionDAG nodes used by CellSPU
+//
+//===----------------------------------------------------------------------===//
+
+// Type profile for a call sequence
+def SDT_SPUCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
+
+// SPU_GenControl: Type profile for generating control words for insertions
+def SPU_GenControl : SDTypeProfile<1, 1, []>;
+def SPUshufmask : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq,
+ [SDNPHasChain, SDNPOutFlag]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPUCallSeq,
+ [SDNPHasChain, SDNPOutFlag]>;
+//===----------------------------------------------------------------------===//
+// Operand constraints:
+//===----------------------------------------------------------------------===//
+
+def SDT_SPUCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
+def SPUcall : SDNode<"SPUISD::CALL", SDT_SPUCall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+// Operand type constraints for vector shuffle/permute operations
+def SDT_SPUshuffle : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+]>;
+
+// Vector binary operator type constraints (needs a further constraint to
+// ensure that operand 0 is a vector...):
+
+def SPUVecBinop: SDTypeProfile<1, 2, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+]>;
+
+// Trinary operators, e.g., addx, carry generate
+def SPUIntTrinaryOp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0>
+]>;
+
+// SELECT_MASK type constraints: There are several variations for the various
+// vector types (this avoids having to bit_convert all over the place.)
+def SPUselmask_type: SDTypeProfile<1, 1, [
+ SDTCisInt<1>
+]>;
+
+// SELB type constraints:
+def SPUselb_type: SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<0, 3> ]>;
+
+// SPU Vector shift pseudo-instruction type constraints
+def SPUvecshift_type: SDTypeProfile<1, 2, [
+ SDTCisSameAs<0, 1>, SDTCisInt<2>]>;
+
+// "marker" type for i64 operators that need a shuffle mask
+// (i.e., uses cg or bg or another instruction that needs to
+// use shufb to get things in the right place.)
+// Op0: The result
+// Op1, 2: LHS, RHS
+// Op3: Carry-generate shuffle mask
+
+def SPUmarker_type : SDTypeProfile<1, 3, [
+ SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>;
+
+//===----------------------------------------------------------------------===//
+// Synthetic/pseudo-instructions
+//===----------------------------------------------------------------------===//
+
+// SPU CNTB:
+def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
+
+// SPU vector shuffle node, matched by the SPUISD::SHUFB enum (see
+// SPUISelLowering.h):
+def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>;
+
+// Shift left quadword by bits and bytes
+def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>;
+def SPUshlquad_l_bytes: SDNode<"SPUISD::SHLQUAD_L_BYTES", SPUvecshift_type, []>;
+
+// Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only):
+def SPUvec_shl: SDNode<"SPUISD::VEC_SHL", SPUvecshift_type, []>;
+def SPUvec_srl: SDNode<"SPUISD::VEC_SRL", SPUvecshift_type, []>;
+def SPUvec_sra: SDNode<"SPUISD::VEC_SRA", SPUvecshift_type, []>;
+
+def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>;
+def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>;
+
+// Vector rotate left, bits shifted out of the left are rotated in on the right
+def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
+ SPUvecshift_type, []>;
+
+// Vector rotate left by bytes, but the count is given in bits and the SPU
+// internally converts it to bytes (saves an instruction to mask off lower
+// three bits)
+def SPUrotbytes_left_bits : SDNode<"SPUISD::ROTBYTES_LEFT_BITS",
+ SPUvecshift_type>;
+
+// SPU form select mask for bytes, immediate
+def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>;
+
+// SPU select bits instruction
+def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>;
+
+def SDTprefslot2vec: SDTypeProfile<1, 1, []>;
+def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>;
+
+def SPU_vec_demote : SDTypeProfile<1, 1, []>;
+def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>;
+
+// Address high and low components, used for [r+r] type addressing
+def SPUhi : SDNode<"SPUISD::Hi", SDTIntBinOp, []>;
+def SPUlo : SDNode<"SPUISD::Lo", SDTIntBinOp, []>;
+
+// PC-relative address
+def SPUpcrel : SDNode<"SPUISD::PCRelAddr", SDTIntBinOp, []>;
+
+// A-Form local store addresses
+def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>;
+
+// Indirect [D-Form "imm($reg)" and X-Form "$reg($reg)"] addresses
+def SPUindirect : SDNode<"SPUISD::IndirectAddr", SDTIntBinOp, []>;
+
+// i64 markers: supplies extra operands used to generate the i64 operator
+// instruction sequences
+def SPUadd64 : SDNode<"SPUISD::ADD64_MARKER", SPUmarker_type, []>;
+def SPUsub64 : SDNode<"SPUISD::SUB64_MARKER", SPUmarker_type, []>;
+def SPUmul64 : SDNode<"SPUISD::MUL64_MARKER", SPUmarker_type, []>;
+
+//===----------------------------------------------------------------------===//
+// Constraints: (taken from PPCInstrInfo.td)
+//===----------------------------------------------------------------------===//
+
+class RegConstraint<string C> {
+ string Constraints = C;
+}
+
+class NoEncode<string E> {
+ string DisableEncoding = E;
+}
+
+//===----------------------------------------------------------------------===//
+// Return (flag isn't quite what it means: the operations are flagged so that
+// instruction scheduling doesn't disassociate them.)
+//===----------------------------------------------------------------------===//
+
+def retflag : SDNode<"SPUISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag]>;
diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td
new file mode 100644
index 0000000..802628f
--- /dev/null
+++ b/lib/Target/CellSPU/SPUOperands.td
@@ -0,0 +1,655 @@
+//===- SPUOperands.td - Cell SPU Instruction Operands ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Cell SPU Instruction Operands:
+//===----------------------------------------------------------------------===//
+
+def LO16 : SDNodeXForm<imm, [{
+ unsigned val = N->getZExtValue();
+ // Transformation function: get the low 16 bits.
+ return getI32Imm(val & 0xffff);
+}]>;
+
+def LO16_vec : SDNodeXForm<scalar_to_vector, [{
+ SDValue OpVal(0, 0);
+
+ // Transformation function: get the low 16 bit immediate from a build_vector
+ // node.
+ assert(N->getOpcode() == ISD::BUILD_VECTOR
+ && "LO16_vec got something other than a BUILD_VECTOR");
+
+ // Get first constant operand...
+ for (unsigned i = 0, e = N->getNumOperands();
+ OpVal.getNode() == 0 && i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (OpVal.getNode() == 0)
+ OpVal = N->getOperand(i);
+ }
+
+ assert(OpVal.getNode() != 0 && "LO16_vec did not locate a <defined> node");
+ ConstantSDNode *CN = cast<ConstantSDNode>(OpVal);
+ return getI32Imm((unsigned)CN->getZExtValue() & 0xffff);
+}]>;
+
+// Transform an immediate, returning the high 16 bits shifted down:
+def HI16 : SDNodeXForm<imm, [{
+ return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+// Transformation function: shift the high 16 bit immediate from a build_vector
+// node into the low 16 bits, and return a 16-bit constant.
+def HI16_vec : SDNodeXForm<scalar_to_vector, [{
+ SDValue OpVal(0, 0);
+
+ assert(N->getOpcode() == ISD::BUILD_VECTOR
+ && "HI16_vec got something other than a BUILD_VECTOR");
+
+ // Get first constant operand...
+ for (unsigned i = 0, e = N->getNumOperands();
+ OpVal.getNode() == 0 && i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (OpVal.getNode() == 0)
+ OpVal = N->getOperand(i);
+ }
+
+ assert(OpVal.getNode() != 0 && "HI16_vec did not locate a <defined> node");
+ ConstantSDNode *CN = cast<ConstantSDNode>(OpVal);
+ return getI32Imm((unsigned)CN->getZExtValue() >> 16);
+}]>;
+
+// simm7 predicate - True if the immediate fits in an 7-bit signed
+// field.
+def simm7: PatLeaf<(imm), [{
+ int sextVal = int(N->getSExtValue());
+ return (sextVal >= -64 && sextVal <= 63);
+}]>;
+
+// uimm7 predicate - True if the immediate fits in an 7-bit unsigned
+// field.
+def uimm7: PatLeaf<(imm), [{
+ return (N->getZExtValue() <= 0x7f);
+}]>;
+
+// immSExt8 predicate - True if the immediate fits in an 8-bit sign extended
+// field.
+def immSExt8 : PatLeaf<(imm), [{
+ int Value = int(N->getSExtValue());
+ return (Value >= -(1 << 8) && Value <= (1 << 8) - 1);
+}]>;
+
+// immU8: immediate, unsigned 8-bit quantity
+def immU8 : PatLeaf<(imm), [{
+ return (N->getZExtValue() <= 0xff);
+}]>;
+
+// i64ImmSExt10 predicate - True if the i64 immediate fits in a 10-bit sign
+// extended field. Used by RI10Form instructions like 'ldq'.
+def i64ImmSExt10 : PatLeaf<(imm), [{
+ return isI64IntS10Immediate(N);
+}]>;
+
+// i32ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign
+// extended field. Used by RI10Form instructions like 'ldq'.
+def i32ImmSExt10 : PatLeaf<(imm), [{
+ return isI32IntS10Immediate(N);
+}]>;
+
+// i32ImmUns10 predicate - True if the i32 immediate fits in a 10-bit unsigned
+// field. Used by RI10Form instructions like 'ldq'.
+def i32ImmUns10 : PatLeaf<(imm), [{
+ return isI32IntU10Immediate(N);
+}]>;
+
+// i16ImmSExt10 predicate - True if the i16 immediate fits in a 10-bit sign
+// extended field. Used by RI10Form instructions like 'ldq'.
+def i16ImmSExt10 : PatLeaf<(imm), [{
+ return isI16IntS10Immediate(N);
+}]>;
+
+// i16ImmUns10 predicate - True if the i16 immediate fits into a 10-bit unsigned
+// value. Used by RI10Form instructions.
+def i16ImmUns10 : PatLeaf<(imm), [{
+ return isI16IntU10Immediate(N);
+}]>;
+
+def immSExt16 : PatLeaf<(imm), [{
+ // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ short Ignored;
+ return isIntS16Immediate(N, Ignored);
+}]>;
+
+def immZExt16 : PatLeaf<(imm), [{
+ // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
+ // field.
+ return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+}], LO16>;
+
+def immU16 : PatLeaf<(imm), [{
+ // immU16 predicate- True if the immediate fits into a 16-bit unsigned field.
+ return (uint64_t)N->getZExtValue() == (N->getZExtValue() & 0xffff);
+}]>;
+
+def imm18 : PatLeaf<(imm), [{
+ // imm18 predicate: True if the immediate fits into an 18-bit unsigned field.
+ int Value = (int) N->getZExtValue();
+ return ((Value & ((1 << 19) - 1)) == Value);
+}]>;
+
+def lo16 : PatLeaf<(imm), [{
+ // lo16 predicate - returns true if the immediate has all zeros in the
+ // low order bits and is a 32-bit constant:
+ if (N->getValueType(0) == MVT::i32) {
+ uint32_t val = N->getZExtValue();
+ return ((val & 0x0000ffff) == val);
+ }
+
+ return false;
+}], LO16>;
+
+def hi16 : PatLeaf<(imm), [{
+ // hi16 predicate - returns true if the immediate has all zeros in the
+ // low order bits and is a 32-bit constant:
+ if (N->getValueType(0) == MVT::i32) {
+ uint32_t val = uint32_t(N->getZExtValue());
+ return ((val & 0xffff0000) == val);
+ } else if (N->getValueType(0) == MVT::i64) {
+ uint64_t val = N->getZExtValue();
+ return ((val & 0xffff0000ULL) == val);
+ }
+
+ return false;
+}], HI16>;
+
+def bitshift : PatLeaf<(imm), [{
+ // bitshift predicate - returns true if 0 < imm <= 7 for SHLQBII
+ // (shift left quadword by bits immediate)
+ int64_t Val = N->getZExtValue();
+ return (Val > 0 && Val <= 7);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Floating point operands:
+//===----------------------------------------------------------------------===//
+
+// Transform a float, returning the high 16 bits shifted down, as if
+// the float was really an unsigned integer:
+def HI16_f32 : SDNodeXForm<fpimm, [{
+ float fval = N->getValueAPF().convertToFloat();
+ return getI32Imm(FloatToBits(fval) >> 16);
+}]>;
+
+// Transformation function on floats: get the low 16 bits as if the float was
+// an unsigned integer.
+def LO16_f32 : SDNodeXForm<fpimm, [{
+ float fval = N->getValueAPF().convertToFloat();
+ return getI32Imm(FloatToBits(fval) & 0xffff);
+}]>;
+
+def FPimm_sext16 : SDNodeXForm<fpimm, [{
+ float fval = N->getValueAPF().convertToFloat();
+ return getI32Imm((int) ((FloatToBits(fval) << 16) >> 16));
+}]>;
+
+def FPimm_u18 : SDNodeXForm<fpimm, [{
+ float fval = N->getValueAPF().convertToFloat();
+ return getI32Imm(FloatToBits(fval) & ((1 << 19) - 1));
+}]>;
+
+def fpimmSExt16 : PatLeaf<(fpimm), [{
+ short Ignored;
+ return isFPS16Immediate(N, Ignored);
+}], FPimm_sext16>;
+
+// Does the SFP constant only have upp 16 bits set?
+def hi16_f32 : PatLeaf<(fpimm), [{
+ if (N->getValueType(0) == MVT::f32) {
+ uint32_t val = FloatToBits(N->getValueAPF().convertToFloat());
+ return ((val & 0xffff0000) == val);
+ }
+
+ return false;
+}], HI16_f32>;
+
+// Does the SFP constant fit into 18 bits?
+def fpimm18 : PatLeaf<(fpimm), [{
+ if (N->getValueType(0) == MVT::f32) {
+ uint32_t Value = FloatToBits(N->getValueAPF().convertToFloat());
+ return ((Value & ((1 << 19) - 1)) == Value);
+ }
+
+ return false;
+}], FPimm_u18>;
+
+//===----------------------------------------------------------------------===//
+// 64-bit operands (TODO):
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// build_vector operands:
+//===----------------------------------------------------------------------===//
+
+// v16i8SExt8Imm_xform function: convert build_vector to 8-bit sign extended
+// immediate constant load for v16i8 vectors. N.B.: The incoming constant has
+// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a).
+def v16i8SExt8Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8);
+}]>;
+
+// v16i8SExt8Imm: Predicate test for 8-bit sign extended immediate constant
+// load, works in conjunction with its transform function. N.B.: This relies the
+// incoming constant being a 16-bit quantity, where the upper and lower bytes
+// are EXACTLY the same (e.g., 0x2a2a)
+def v16i8SExt8Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0;
+}], v16i8SExt8Imm_xform>;
+
+// v16i8U8Imm_xform function: convert build_vector to unsigned 8-bit
+// immediate constant load for v16i8 vectors. N.B.: The incoming constant has
+// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a).
+def v16i8U8Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8);
+}]>;
+
+// v16i8U8Imm: Predicate test for unsigned 8-bit immediate constant
+// load, works in conjunction with its transform function. N.B.: This relies the
+// incoming constant being a 16-bit quantity, where the upper and lower bytes
+// are EXACTLY the same (e.g., 0x2a2a)
+def v16i8U8Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0;
+}], v16i8U8Imm_xform>;
+
+// v8i16SExt8Imm_xform function: convert build_vector to 8-bit sign extended
+// immediate constant load for v8i16 vectors.
+def v8i16SExt8Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16SExt8Imm: Predicate test for 8-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v8i16SExt8Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16SExt8Imm_xform>;
+
+// v8i16SExt10Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v8i16 vectors.
+def v8i16SExt10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16SExt10Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v8i16SExt10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16SExt10Imm_xform>;
+
+// v8i16Uns10Imm_xform function: convert build_vector to 16-bit unsigned
+// immediate constant load for v8i16 vectors.
+def v8i16Uns10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16Uns10Imm: Predicate test for 16-bit unsigned immediate constant
+// load, works in conjunction with its transform function.
+def v8i16Uns10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16Uns10Imm_xform>;
+
+// v8i16SExt16Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v8i16 vectors.
+def v8i16Uns16Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16SExt16Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v8i16SExt16Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16Uns16Imm_xform>;
+
+// v4i32SExt10Imm_xform function: convert build_vector to 10-bit sign extended
+// immediate constant load for v4i32 vectors.
+def v4i32SExt10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32SExt10Imm: Predicate test for 10-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v4i32SExt10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32SExt10Imm_xform>;
+
+// v4i32Uns10Imm_xform function: convert build_vector to 10-bit unsigned
+// immediate constant load for v4i32 vectors.
+def v4i32Uns10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32Uns10Imm: Predicate test for 10-bit unsigned immediate constant
+// load, works in conjunction with its transform function.
+def v4i32Uns10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32Uns10Imm_xform>;
+
+// v4i32SExt16Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v4i32 vectors.
+def v4i32SExt16Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32SExt16Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v4i32SExt16Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32SExt16Imm_xform>;
+
+// v4i32Uns18Imm_xform function: convert build_vector to 18-bit unsigned
+// immediate constant load for v4i32 vectors.
+def v4i32Uns18Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32Uns18Imm: Predicate test for 18-bit unsigned immediate constant load,
+// works in conjunction with its transform function.
+def v4i32Uns18Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32Uns18Imm_xform>;
+
+// ILHUvec_get_imm xform function: convert build_vector to ILHUvec imm constant
+// load.
+def ILHUvec_get_imm: SDNodeXForm<build_vector, [{
+ return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32);
+}]>;
+
+/// immILHUvec: Predicate test for a ILHU constant vector.
+def immILHUvec: PatLeaf<(build_vector), [{
+ return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], ILHUvec_get_imm>;
+
+// Catch-all for any other i32 vector constants
+def v4i32_get_imm: SDNodeXForm<build_vector, [{
+ return SPU::get_v4i32_imm(N, *CurDAG);
+}]>;
+
+def v4i32Imm: PatLeaf<(build_vector), [{
+ return SPU::get_v4i32_imm(N, *CurDAG).getNode() != 0;
+}], v4i32_get_imm>;
+
+// v2i64SExt10Imm_xform function: convert build_vector to 10-bit sign extended
+// immediate constant load for v2i64 vectors.
+def v2i64SExt10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64);
+}]>;
+
+// v2i64SExt10Imm: Predicate test for 10-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v2i64SExt10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], v2i64SExt10Imm_xform>;
+
+// v2i64SExt16Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v2i64 vectors.
+def v2i64SExt16Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64);
+}]>;
+
+// v2i64SExt16Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v2i64SExt16Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], v2i64SExt16Imm_xform>;
+
+// v2i64Uns18Imm_xform function: convert build_vector to 18-bit unsigned
+// immediate constant load for v2i64 vectors.
+def v2i64Uns18Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64);
+}]>;
+
+// v2i64Uns18Imm: Predicate test for 18-bit unsigned immediate constant load,
+// works in conjunction with its transform function.
+def v2i64Uns18Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], v2i64Uns18Imm_xform>;
+
+/// immILHUvec: Predicate test for a ILHU constant vector.
+def immILHUvec_i64: PatLeaf<(build_vector), [{
+ return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], ILHUvec_get_imm>;
+
+// Catch-all for any other i32 vector constants
+def v2i64_get_imm: SDNodeXForm<build_vector, [{
+ return SPU::get_v2i64_imm(N, *CurDAG);
+}]>;
+
+def v2i64Imm: PatLeaf<(build_vector), [{
+ return SPU::get_v2i64_imm(N, *CurDAG).getNode() != 0;
+}], v2i64_get_imm>;
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+
+def s7imm: Operand<i8> {
+ let PrintMethod = "printS7ImmOperand";
+}
+
+def s7imm_i8: Operand<i8> {
+ let PrintMethod = "printS7ImmOperand";
+}
+
+def u7imm: Operand<i16> {
+ let PrintMethod = "printU7ImmOperand";
+}
+
+def u7imm_i8: Operand<i8> {
+ let PrintMethod = "printU7ImmOperand";
+}
+
+def u7imm_i32: Operand<i32> {
+ let PrintMethod = "printU7ImmOperand";
+}
+
+// Halfword, signed 10-bit constant
+def s10imm : Operand<i16> {
+ let PrintMethod = "printS10ImmOperand";
+}
+
+def s10imm_i8: Operand<i8> {
+ let PrintMethod = "printS10ImmOperand";
+}
+
+def s10imm_i32: Operand<i32> {
+ let PrintMethod = "printS10ImmOperand";
+}
+
+def s10imm_i64: Operand<i64> {
+ let PrintMethod = "printS10ImmOperand";
+}
+
+// Unsigned 10-bit integers:
+def u10imm: Operand<i16> {
+ let PrintMethod = "printU10ImmOperand";
+}
+
+def u10imm_i8: Operand<i8> {
+ let PrintMethod = "printU10ImmOperand";
+}
+
+def u10imm_i32: Operand<i32> {
+ let PrintMethod = "printU10ImmOperand";
+}
+
+def s16imm : Operand<i16> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_i8: Operand<i8> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_i32: Operand<i32> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_i64: Operand<i64> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_f32: Operand<f32> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_f64: Operand<f64> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def u16imm_i64 : Operand<i64> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
+def u16imm_i32 : Operand<i32> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
+def u16imm : Operand<i16> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
+def f16imm : Operand<f32> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
+def s18imm : Operand<i32> {
+ let PrintMethod = "printS18ImmOperand";
+}
+
+def u18imm : Operand<i32> {
+ let PrintMethod = "printU18ImmOperand";
+}
+
+def u18imm_i64 : Operand<i64> {
+ let PrintMethod = "printU18ImmOperand";
+}
+
+def f18imm : Operand<f32> {
+ let PrintMethod = "printU18ImmOperand";
+}
+
+def f18imm_f64 : Operand<f64> {
+ let PrintMethod = "printU18ImmOperand";
+}
+
+// Negated 7-bit halfword rotate immediate operands
+def rothNeg7imm : Operand<i32> {
+ let PrintMethod = "printROTHNeg7Imm";
+}
+
+def rothNeg7imm_i16 : Operand<i16> {
+ let PrintMethod = "printROTHNeg7Imm";
+}
+
+// Negated 7-bit word rotate immediate operands
+def rotNeg7imm : Operand<i32> {
+ let PrintMethod = "printROTNeg7Imm";
+}
+
+def rotNeg7imm_i16 : Operand<i16> {
+ let PrintMethod = "printROTNeg7Imm";
+}
+
+def rotNeg7imm_i8 : Operand<i8> {
+ let PrintMethod = "printROTNeg7Imm";
+}
+
+def target : Operand<OtherVT> {
+ let PrintMethod = "printBranchOperand";
+}
+
+// Absolute address call target
+def calltarget : Operand<iPTR> {
+ let PrintMethod = "printCallOperand";
+ let MIOperandInfo = (ops u18imm:$calldest);
+}
+
+// PC relative call target
+def relcalltarget : Operand<iPTR> {
+ let PrintMethod = "printPCRelativeOperand";
+ let MIOperandInfo = (ops s16imm:$calldest);
+}
+
+// Branch targets:
+def brtarget : Operand<OtherVT> {
+ let PrintMethod = "printPCRelativeOperand";
+}
+
+// Hint for branch target
+def hbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printHBROperand";
+}
+
+// Indirect call target
+def indcalltarget : Operand<iPTR> {
+ let PrintMethod = "printCallOperand";
+ let MIOperandInfo = (ops ptr_rc:$calldest);
+}
+
+def symbolHi: Operand<i32> {
+ let PrintMethod = "printSymbolHi";
+}
+
+def symbolLo: Operand<i32> {
+ let PrintMethod = "printSymbolLo";
+}
+
+def symbolLSA: Operand<i32> {
+ let PrintMethod = "printSymbolLSA";
+}
+
+// Shuffle address memory operaand [s7imm(reg) d-format]
+def shufaddr : Operand<iPTR> {
+ let PrintMethod = "printShufAddr";
+ let MIOperandInfo = (ops s7imm:$imm, ptr_rc:$reg);
+}
+
+// memory s10imm(reg) operand
+def dformaddr : Operand<iPTR> {
+ let PrintMethod = "printDFormAddr";
+ let MIOperandInfo = (ops s10imm:$imm, ptr_rc:$reg);
+}
+
+// 256K local store address
+// N.B.: The tblgen code generator expects to have two operands, an offset
+// and a pointer. Of these, only the immediate is actually used.
+def addr256k : Operand<iPTR> {
+ let PrintMethod = "printAddr256K";
+ let MIOperandInfo = (ops s16imm:$imm, ptr_rc:$reg);
+}
+
+// memory s18imm(reg) operand
+def memri18 : Operand<iPTR> {
+ let PrintMethod = "printMemRegImmS18";
+ let MIOperandInfo = (ops s18imm:$imm, ptr_rc:$reg);
+}
+
+// memory register + register operand
+def memrr : Operand<iPTR> {
+ let PrintMethod = "printMemRegReg";
+ let MIOperandInfo = (ops ptr_rc:$reg_a, ptr_rc:$reg_b);
+}
+
+// Define SPU-specific addressing modes: These come in three basic
+// flavors:
+//
+// D-form : [r+I10] (10-bit signed offset + reg)
+// X-form : [r+r] (reg+reg)
+// A-form : abs (256K LSA offset)
+// D-form(2): [r+I7] (7-bit signed offset + reg)
+
+def dform_addr : ComplexPattern<iPTR, 2, "SelectDFormAddr", [], []>;
+def xform_addr : ComplexPattern<iPTR, 2, "SelectXFormAddr", [], []>;
+def aform_addr : ComplexPattern<iPTR, 2, "SelectAFormAddr", [], []>;
+def dform2_addr : ComplexPattern<iPTR, 2, "SelectDForm2Addr", [], []>;
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
new file mode 100644
index 0000000..e031048
--- /dev/null
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -0,0 +1,614 @@
+//===- SPURegisterInfo.cpp - Cell SPU Register Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Cell implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reginfo"
+#include "SPU.h"
+#include "SPURegisterInfo.h"
+#include "SPURegisterNames.h"
+#include "SPUInstrBuilder.h"
+#include "SPUSubtarget.h"
+#include "SPUMachineFunction.h"
+#include "SPUFrameInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cstdlib>
+
+using namespace llvm;
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// PPC::F14, return the number that it corresponds to (e.g. 14).
+unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+ using namespace SPU;
+ switch (RegEnum) {
+ case SPU::R0: return 0;
+ case SPU::R1: return 1;
+ case SPU::R2: return 2;
+ case SPU::R3: return 3;
+ case SPU::R4: return 4;
+ case SPU::R5: return 5;
+ case SPU::R6: return 6;
+ case SPU::R7: return 7;
+ case SPU::R8: return 8;
+ case SPU::R9: return 9;
+ case SPU::R10: return 10;
+ case SPU::R11: return 11;
+ case SPU::R12: return 12;
+ case SPU::R13: return 13;
+ case SPU::R14: return 14;
+ case SPU::R15: return 15;
+ case SPU::R16: return 16;
+ case SPU::R17: return 17;
+ case SPU::R18: return 18;
+ case SPU::R19: return 19;
+ case SPU::R20: return 20;
+ case SPU::R21: return 21;
+ case SPU::R22: return 22;
+ case SPU::R23: return 23;
+ case SPU::R24: return 24;
+ case SPU::R25: return 25;
+ case SPU::R26: return 26;
+ case SPU::R27: return 27;
+ case SPU::R28: return 28;
+ case SPU::R29: return 29;
+ case SPU::R30: return 30;
+ case SPU::R31: return 31;
+ case SPU::R32: return 32;
+ case SPU::R33: return 33;
+ case SPU::R34: return 34;
+ case SPU::R35: return 35;
+ case SPU::R36: return 36;
+ case SPU::R37: return 37;
+ case SPU::R38: return 38;
+ case SPU::R39: return 39;
+ case SPU::R40: return 40;
+ case SPU::R41: return 41;
+ case SPU::R42: return 42;
+ case SPU::R43: return 43;
+ case SPU::R44: return 44;
+ case SPU::R45: return 45;
+ case SPU::R46: return 46;
+ case SPU::R47: return 47;
+ case SPU::R48: return 48;
+ case SPU::R49: return 49;
+ case SPU::R50: return 50;
+ case SPU::R51: return 51;
+ case SPU::R52: return 52;
+ case SPU::R53: return 53;
+ case SPU::R54: return 54;
+ case SPU::R55: return 55;
+ case SPU::R56: return 56;
+ case SPU::R57: return 57;
+ case SPU::R58: return 58;
+ case SPU::R59: return 59;
+ case SPU::R60: return 60;
+ case SPU::R61: return 61;
+ case SPU::R62: return 62;
+ case SPU::R63: return 63;
+ case SPU::R64: return 64;
+ case SPU::R65: return 65;
+ case SPU::R66: return 66;
+ case SPU::R67: return 67;
+ case SPU::R68: return 68;
+ case SPU::R69: return 69;
+ case SPU::R70: return 70;
+ case SPU::R71: return 71;
+ case SPU::R72: return 72;
+ case SPU::R73: return 73;
+ case SPU::R74: return 74;
+ case SPU::R75: return 75;
+ case SPU::R76: return 76;
+ case SPU::R77: return 77;
+ case SPU::R78: return 78;
+ case SPU::R79: return 79;
+ case SPU::R80: return 80;
+ case SPU::R81: return 81;
+ case SPU::R82: return 82;
+ case SPU::R83: return 83;
+ case SPU::R84: return 84;
+ case SPU::R85: return 85;
+ case SPU::R86: return 86;
+ case SPU::R87: return 87;
+ case SPU::R88: return 88;
+ case SPU::R89: return 89;
+ case SPU::R90: return 90;
+ case SPU::R91: return 91;
+ case SPU::R92: return 92;
+ case SPU::R93: return 93;
+ case SPU::R94: return 94;
+ case SPU::R95: return 95;
+ case SPU::R96: return 96;
+ case SPU::R97: return 97;
+ case SPU::R98: return 98;
+ case SPU::R99: return 99;
+ case SPU::R100: return 100;
+ case SPU::R101: return 101;
+ case SPU::R102: return 102;
+ case SPU::R103: return 103;
+ case SPU::R104: return 104;
+ case SPU::R105: return 105;
+ case SPU::R106: return 106;
+ case SPU::R107: return 107;
+ case SPU::R108: return 108;
+ case SPU::R109: return 109;
+ case SPU::R110: return 110;
+ case SPU::R111: return 111;
+ case SPU::R112: return 112;
+ case SPU::R113: return 113;
+ case SPU::R114: return 114;
+ case SPU::R115: return 115;
+ case SPU::R116: return 116;
+ case SPU::R117: return 117;
+ case SPU::R118: return 118;
+ case SPU::R119: return 119;
+ case SPU::R120: return 120;
+ case SPU::R121: return 121;
+ case SPU::R122: return 122;
+ case SPU::R123: return 123;
+ case SPU::R124: return 124;
+ case SPU::R125: return 125;
+ case SPU::R126: return 126;
+ case SPU::R127: return 127;
+ default:
+ cerr << "Unhandled reg in SPURegisterInfo::getRegisterNumbering!\n";
+ abort();
+ }
+}
+
+SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget,
+ const TargetInstrInfo &tii) :
+ SPUGenRegisterInfo(SPU::ADJCALLSTACKDOWN, SPU::ADJCALLSTACKUP),
+ Subtarget(subtarget),
+ TII(tii)
+{
+}
+
+// SPU's 128-bit registers used for argument passing:
+static const unsigned SPU_ArgRegs[] = {
+ SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
+ SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
+ SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
+ SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
+ SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
+ SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
+ SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
+ SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
+ SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
+ SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
+ SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
+};
+
+const unsigned *
+SPURegisterInfo::getArgRegs()
+{
+ return SPU_ArgRegs;
+}
+
+unsigned
+SPURegisterInfo::getNumArgRegs()
+{
+ return sizeof(SPU_ArgRegs) / sizeof(SPU_ArgRegs[0]);
+}
+
+/// getPointerRegClass - Return the register class to use to hold pointers.
+/// This is used for addressing modes.
+const TargetRegisterClass * SPURegisterInfo::getPointerRegClass() const
+{
+ return &SPU::R32CRegClass;
+}
+
+const unsigned *
+SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const
+{
+ // Cell ABI calling convention
+ static const unsigned SPU_CalleeSaveRegs[] = {
+ SPU::R80, SPU::R81, SPU::R82, SPU::R83,
+ SPU::R84, SPU::R85, SPU::R86, SPU::R87,
+ SPU::R88, SPU::R89, SPU::R90, SPU::R91,
+ SPU::R92, SPU::R93, SPU::R94, SPU::R95,
+ SPU::R96, SPU::R97, SPU::R98, SPU::R99,
+ SPU::R100, SPU::R101, SPU::R102, SPU::R103,
+ SPU::R104, SPU::R105, SPU::R106, SPU::R107,
+ SPU::R108, SPU::R109, SPU::R110, SPU::R111,
+ SPU::R112, SPU::R113, SPU::R114, SPU::R115,
+ SPU::R116, SPU::R117, SPU::R118, SPU::R119,
+ SPU::R120, SPU::R121, SPU::R122, SPU::R123,
+ SPU::R124, SPU::R125, SPU::R126, SPU::R127,
+ SPU::R2, /* environment pointer */
+ SPU::R1, /* stack pointer */
+ SPU::R0, /* link register */
+ 0 /* end */
+ };
+
+ return SPU_CalleeSaveRegs;
+}
+
+const TargetRegisterClass* const*
+SPURegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const
+{
+ // Cell ABI Calling Convention
+ static const TargetRegisterClass * const SPU_CalleeSaveRegClasses[] = {
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, /* environment pointer */
+ &SPU::GPRCRegClass, /* stack pointer */
+ &SPU::GPRCRegClass, /* link register */
+ 0 /* end */
+ };
+
+ return SPU_CalleeSaveRegClasses;
+}
+
+/*!
+ R0 (link register), R1 (stack pointer) and R2 (environment pointer -- this is
+ generally unused) are the Cell's reserved registers
+ */
+BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(SPU::R0); // LR
+ Reserved.set(SPU::R1); // SP
+ Reserved.set(SPU::R2); // environment pointer
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+// needsFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+//
+static bool needsFP(const MachineFunction &MF) {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return NoFramePointerElim || MFI->hasVarSizedObjects();
+}
+
+//--------------------------------------------------------------------------
+// hasFP - Return true if the specified function actually has a dedicated frame
+// pointer register. This is true if the function needs a frame pointer and has
+// a non-zero stack size.
+bool
+SPURegisterInfo::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->getStackSize() && needsFP(MF);
+}
+
+//--------------------------------------------------------------------------
+void
+SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I)
+ const
+{
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+void
+SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ RegScavenger *RS) const
+{
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ MachineOperand &SPOp = MI.getOperand(i);
+ int FrameIndex = SPOp.getIndex();
+
+ // Now add the frame object offset to the offset from r1.
+ int Offset = MFI->getObjectOffset(FrameIndex);
+
+ // Most instructions, except for generated FrameIndex additions using AIr32
+ // and ILAr32, have the immediate in operand 1. AIr32 and ILAr32 have the
+ // immediate in operand 2.
+ unsigned OpNo = 1;
+ if (MI.getOpcode() == SPU::AIr32 || MI.getOpcode() == SPU::ILAr32)
+ OpNo = 2;
+
+ MachineOperand &MO = MI.getOperand(OpNo);
+
+ // Offset is biased by $lr's slot at the bottom.
+ Offset += MO.getImm() + MFI->getStackSize() + SPUFrameInfo::minStackSize();
+ assert((Offset & 0xf) == 0
+ && "16-byte alignment violated in eliminateFrameIndex");
+
+ // Replace the FrameIndex with base register with $sp (aka $r1)
+ SPOp.ChangeToRegister(SPU::R1, false);
+ if (Offset > SPUFrameInfo::maxFrameOffset()
+ || Offset < SPUFrameInfo::minFrameOffset()) {
+ cerr << "Large stack adjustment ("
+ << Offset
+ << ") in SPURegisterInfo::eliminateFrameIndex.";
+ } else {
+ MO.ChangeToImmediate(Offset);
+ }
+}
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void
+SPURegisterInfo::determineFrameLayout(MachineFunction &MF) const
+{
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Get the number of bytes to allocate from the FrameInfo
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get the alignments provided by the target, and the maximum alignment
+ // (if any) of the fixed frame objects.
+ unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment());
+ assert(isPowerOf2_32(Align) && "Alignment is not power of 2");
+ unsigned AlignMask = Align - 1;
+
+ // Get the maximum call frame size of all the calls.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+ // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+ // that allocations will be aligned.
+ if (MFI->hasVarSizedObjects())
+ maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+
+ // Update maximum call frame size.
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+ // Include call frame size in total.
+ FrameSize += maxCallFrameSize;
+
+ // Make sure the frame is aligned.
+ FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+
+ // Update frame info.
+ MFI->setStackSize(FrameSize);
+}
+
+void SPURegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS)
+ const {
+ // Mark LR and SP unused, since the prolog spills them to stack and
+ // we don't want anyone else to spill them for us.
+ //
+ // Also, unless R2 is really used someday, don't spill it automatically.
+ MF.getRegInfo().setPhysRegUnused(SPU::R0);
+ MF.getRegInfo().setPhysRegUnused(SPU::R1);
+ MF.getRegInfo().setPhysRegUnused(SPU::R2);
+}
+
+void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
+{
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+ DebugLoc dl = (MBBI != MBB.end() ?
+ MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+
+ // Prepare for debug frame info.
+ bool hasDebugInfo = MMI && MMI->hasDebugInfo();
+ unsigned FrameLabelId = 0;
+
+ // Move MBBI back to the beginning of the function.
+ MBBI = MBB.begin();
+
+ // Work out frame sizes.
+ determineFrameLayout(MF);
+ int FrameSize = MFI->getStackSize();
+
+ assert((FrameSize & 0xf) == 0
+ && "SPURegisterInfo::emitPrologue: FrameSize not aligned");
+
+ if (FrameSize > 0 || MFI->hasCalls()) {
+ FrameSize = -(FrameSize + SPUFrameInfo::minStackSize());
+ if (hasDebugInfo) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ FrameLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addImm(FrameLabelId);
+ }
+
+ // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
+ // for the ABI
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16)
+ .addReg(SPU::R1);
+ if (isS10Constant(FrameSize)) {
+ // Spill $sp to adjusted $sp
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize)
+ .addReg(SPU::R1);
+ // Adjust $sp by required amout
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1)
+ .addImm(FrameSize);
+ } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
+ // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
+ // $r2 to adjust $sp:
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
+ .addImm(-16)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
+ .addImm(FrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1)
+ .addReg(SPU::R2)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
+ .addReg(SPU::R1)
+ .addReg(SPU::R2);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2)
+ .addReg(SPU::R2)
+ .addImm(16);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
+ .addReg(SPU::R2)
+ .addReg(SPU::R1);
+ } else {
+ cerr << "Unhandled frame size: " << FrameSize << "\n";
+ abort();
+ }
+
+ if (hasDebugInfo) {
+ std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+
+ // Show update of SP.
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize);
+ Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+ unsigned Reg = CSI[I].getReg();
+ if (Reg == SPU::R0) continue;
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc));
+ }
+
+ // Mark effective beginning of when frame pointer is ready.
+ unsigned ReadyLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addImm(ReadyLabelId);
+
+ MachineLocation FPDst(SPU::R1);
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
+ }
+ } else {
+ // This is a leaf function -- insert a branch hint iff there are
+ // sufficient number instructions in the basic block. Note that
+ // this is just a best guess based on the basic block's size.
+ if (MBB.size() >= (unsigned) SPUFrameInfo::branchHintPenalty()) {
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ dl = MBBI->getDebugLoc();
+
+ // Insert terminator label
+ unsigned BranchLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addImm(BranchLabelId);
+ }
+ }
+}
+
+void
+SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
+{
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ int FrameSize = MFI->getStackSize();
+ int LinkSlotOffset = SPUFrameInfo::stackSlotSize();
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ assert(MBBI->getOpcode() == SPU::RET &&
+ "Can only insert epilog into returning blocks");
+ assert((FrameSize & 0xf) == 0
+ && "SPURegisterInfo::emitEpilogue: FrameSize not aligned");
+ if (FrameSize > 0 || MFI->hasCalls()) {
+ FrameSize = FrameSize + SPUFrameInfo::minStackSize();
+ if (isS10Constant(FrameSize + LinkSlotOffset)) {
+ // Reload $lr, adjust $sp by required amount
+ // Note: We do this to slightly improve dual issue -- not by much, but it
+ // is an opportunity for dual issue.
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
+ .addImm(FrameSize + LinkSlotOffset)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1)
+ .addReg(SPU::R1)
+ .addImm(FrameSize);
+ } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
+ // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
+ // $r2 to adjust $sp:
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
+ .addImm(16)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
+ .addImm(FrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
+ .addReg(SPU::R1)
+ .addReg(SPU::R2);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
+ .addImm(16)
+ .addReg(SPU::R2);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2).
+ addReg(SPU::R2)
+ .addImm(16);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
+ .addReg(SPU::R2)
+ .addReg(SPU::R1);
+ } else {
+ cerr << "Unhandled frame size: " << FrameSize << "\n";
+ abort();
+ }
+ }
+}
+
+unsigned
+SPURegisterInfo::getRARegister() const
+{
+ return SPU::R0;
+}
+
+unsigned
+SPURegisterInfo::getFrameRegister(MachineFunction &MF) const
+{
+ return SPU::R1;
+}
+
+void
+SPURegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const
+{
+ // Initial state of the frame pointer is R1.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(SPU::R1, 0);
+ Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+
+int
+SPURegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ // FIXME: Most probably dwarf numbers differs for Linux and Darwin
+ return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+#include "SPUGenRegisterInfo.inc"
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
new file mode 100644
index 0000000..5b6e9ec
--- /dev/null
+++ b/lib/Target/CellSPU/SPURegisterInfo.h
@@ -0,0 +1,101 @@
+//===- SPURegisterInfo.h - Cell SPU Register Information Impl ----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Cell SPU implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_REGISTERINFO_H
+#define SPU_REGISTERINFO_H
+
+#include "SPU.h"
+#include "SPUGenRegisterInfo.h.inc"
+
+namespace llvm {
+ class SPUSubtarget;
+ class TargetInstrInfo;
+ class Type;
+
+ class SPURegisterInfo : public SPUGenRegisterInfo {
+ private:
+ const SPUSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+
+ //! Predicate: Does the machine function use the link register?
+ bool usesLR(MachineFunction &MF) const;
+
+ public:
+ SPURegisterInfo(const SPUSubtarget &subtarget, const TargetInstrInfo &tii);
+
+ //! Translate a register's enum value to a register number
+ /*!
+ This method translates a register's enum value to it's regiser number,
+ e.g. SPU::R14 -> 14.
+ */
+ static unsigned getRegisterNumbering(unsigned RegEnum);
+
+ /// getPointerRegClass - Return the register class to use to hold pointers.
+ /// This is used for addressing modes.
+ virtual const TargetRegisterClass *getPointerRegClass() const;
+
+ //! Return the array of callee-saved registers
+ virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF) const;
+
+ //! Return the register class array of the callee-saved registers
+ virtual const TargetRegisterClass* const *
+ getCalleeSavedRegClasses(const MachineFunction *MF) const;
+
+ //! Return the reserved registers
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ //! Prediate: Target has dedicated frame pointer
+ bool hasFP(const MachineFunction &MF) const;
+ //! Eliminate the call frame setup pseudo-instructions
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+ //! Convert frame indicies into machine operands
+ void eliminateFrameIndex(MachineBasicBlock::iterator II, int,
+ RegScavenger *RS) const;
+ //! Determine the frame's layour
+ void determineFrameLayout(MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+ //! Emit the function prologue
+ void emitPrologue(MachineFunction &MF) const;
+ //! Emit the function epilogue
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ //! Get return address register (LR, aka R0)
+ unsigned getRARegister() const;
+ //! Get the stack frame register (SP, aka R1)
+ unsigned getFrameRegister(MachineFunction &MF) const;
+ //! Perform target-specific stack frame setup.
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+ //------------------------------------------------------------------------
+ // New methods added:
+ //------------------------------------------------------------------------
+
+ //! Return the array of argument passing registers
+ /*!
+ \note The size of this array is returned by getArgRegsSize().
+ */
+ static const unsigned *getArgRegs();
+
+ //! Return the size of the argument passing register array
+ static unsigned getNumArgRegs();
+
+ //! Get DWARF debugging register number
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+ };
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/CellSPU/SPURegisterInfo.td b/lib/Target/CellSPU/SPURegisterInfo.td
new file mode 100644
index 0000000..bb88f2b
--- /dev/null
+++ b/lib/Target/CellSPU/SPURegisterInfo.td
@@ -0,0 +1,429 @@
+//===- SPURegisterInfo.td - The Cell SPU Register File -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+class SPUReg<string n> : Register<n> {
+ let Namespace = "SPU";
+}
+
+// The SPU's register are all 128-bits wide, which makes specifying the
+// registers relatively easy, if relatively mundane:
+
+class SPUVecReg<bits<7> num, string n> : SPUReg<n> {
+ field bits<7> Num = num;
+}
+
+def R0 : SPUVecReg<0, "$lr">, DwarfRegNum<[0]>;
+def R1 : SPUVecReg<1, "$sp">, DwarfRegNum<[1]>;
+def R2 : SPUVecReg<2, "$2">, DwarfRegNum<[2]>;
+def R3 : SPUVecReg<3, "$3">, DwarfRegNum<[3]>;
+def R4 : SPUVecReg<4, "$4">, DwarfRegNum<[4]>;
+def R5 : SPUVecReg<5, "$5">, DwarfRegNum<[5]>;
+def R6 : SPUVecReg<6, "$6">, DwarfRegNum<[6]>;
+def R7 : SPUVecReg<7, "$7">, DwarfRegNum<[7]>;
+def R8 : SPUVecReg<8, "$8">, DwarfRegNum<[8]>;
+def R9 : SPUVecReg<9, "$9">, DwarfRegNum<[9]>;
+def R10 : SPUVecReg<10, "$10">, DwarfRegNum<[10]>;
+def R11 : SPUVecReg<11, "$11">, DwarfRegNum<[11]>;
+def R12 : SPUVecReg<12, "$12">, DwarfRegNum<[12]>;
+def R13 : SPUVecReg<13, "$13">, DwarfRegNum<[13]>;
+def R14 : SPUVecReg<14, "$14">, DwarfRegNum<[14]>;
+def R15 : SPUVecReg<15, "$15">, DwarfRegNum<[15]>;
+def R16 : SPUVecReg<16, "$16">, DwarfRegNum<[16]>;
+def R17 : SPUVecReg<17, "$17">, DwarfRegNum<[17]>;
+def R18 : SPUVecReg<18, "$18">, DwarfRegNum<[18]>;
+def R19 : SPUVecReg<19, "$19">, DwarfRegNum<[19]>;
+def R20 : SPUVecReg<20, "$20">, DwarfRegNum<[20]>;
+def R21 : SPUVecReg<21, "$21">, DwarfRegNum<[21]>;
+def R22 : SPUVecReg<22, "$22">, DwarfRegNum<[22]>;
+def R23 : SPUVecReg<23, "$23">, DwarfRegNum<[23]>;
+def R24 : SPUVecReg<24, "$24">, DwarfRegNum<[24]>;
+def R25 : SPUVecReg<25, "$25">, DwarfRegNum<[25]>;
+def R26 : SPUVecReg<26, "$26">, DwarfRegNum<[26]>;
+def R27 : SPUVecReg<27, "$27">, DwarfRegNum<[27]>;
+def R28 : SPUVecReg<28, "$28">, DwarfRegNum<[28]>;
+def R29 : SPUVecReg<29, "$29">, DwarfRegNum<[29]>;
+def R30 : SPUVecReg<30, "$30">, DwarfRegNum<[30]>;
+def R31 : SPUVecReg<31, "$31">, DwarfRegNum<[31]>;
+def R32 : SPUVecReg<32, "$32">, DwarfRegNum<[32]>;
+def R33 : SPUVecReg<33, "$33">, DwarfRegNum<[33]>;
+def R34 : SPUVecReg<34, "$34">, DwarfRegNum<[34]>;
+def R35 : SPUVecReg<35, "$35">, DwarfRegNum<[35]>;
+def R36 : SPUVecReg<36, "$36">, DwarfRegNum<[36]>;
+def R37 : SPUVecReg<37, "$37">, DwarfRegNum<[37]>;
+def R38 : SPUVecReg<38, "$38">, DwarfRegNum<[38]>;
+def R39 : SPUVecReg<39, "$39">, DwarfRegNum<[39]>;
+def R40 : SPUVecReg<40, "$40">, DwarfRegNum<[40]>;
+def R41 : SPUVecReg<41, "$41">, DwarfRegNum<[41]>;
+def R42 : SPUVecReg<42, "$42">, DwarfRegNum<[42]>;
+def R43 : SPUVecReg<43, "$43">, DwarfRegNum<[43]>;
+def R44 : SPUVecReg<44, "$44">, DwarfRegNum<[44]>;
+def R45 : SPUVecReg<45, "$45">, DwarfRegNum<[45]>;
+def R46 : SPUVecReg<46, "$46">, DwarfRegNum<[46]>;
+def R47 : SPUVecReg<47, "$47">, DwarfRegNum<[47]>;
+def R48 : SPUVecReg<48, "$48">, DwarfRegNum<[48]>;
+def R49 : SPUVecReg<49, "$49">, DwarfRegNum<[49]>;
+def R50 : SPUVecReg<50, "$50">, DwarfRegNum<[50]>;
+def R51 : SPUVecReg<51, "$51">, DwarfRegNum<[51]>;
+def R52 : SPUVecReg<52, "$52">, DwarfRegNum<[52]>;
+def R53 : SPUVecReg<53, "$53">, DwarfRegNum<[53]>;
+def R54 : SPUVecReg<54, "$54">, DwarfRegNum<[54]>;
+def R55 : SPUVecReg<55, "$55">, DwarfRegNum<[55]>;
+def R56 : SPUVecReg<56, "$56">, DwarfRegNum<[56]>;
+def R57 : SPUVecReg<57, "$57">, DwarfRegNum<[57]>;
+def R58 : SPUVecReg<58, "$58">, DwarfRegNum<[58]>;
+def R59 : SPUVecReg<59, "$59">, DwarfRegNum<[59]>;
+def R60 : SPUVecReg<60, "$60">, DwarfRegNum<[60]>;
+def R61 : SPUVecReg<61, "$61">, DwarfRegNum<[61]>;
+def R62 : SPUVecReg<62, "$62">, DwarfRegNum<[62]>;
+def R63 : SPUVecReg<63, "$63">, DwarfRegNum<[63]>;
+def R64 : SPUVecReg<64, "$64">, DwarfRegNum<[64]>;
+def R65 : SPUVecReg<65, "$65">, DwarfRegNum<[65]>;
+def R66 : SPUVecReg<66, "$66">, DwarfRegNum<[66]>;
+def R67 : SPUVecReg<67, "$67">, DwarfRegNum<[67]>;
+def R68 : SPUVecReg<68, "$68">, DwarfRegNum<[68]>;
+def R69 : SPUVecReg<69, "$69">, DwarfRegNum<[69]>;
+def R70 : SPUVecReg<70, "$70">, DwarfRegNum<[70]>;
+def R71 : SPUVecReg<71, "$71">, DwarfRegNum<[71]>;
+def R72 : SPUVecReg<72, "$72">, DwarfRegNum<[72]>;
+def R73 : SPUVecReg<73, "$73">, DwarfRegNum<[73]>;
+def R74 : SPUVecReg<74, "$74">, DwarfRegNum<[74]>;
+def R75 : SPUVecReg<75, "$75">, DwarfRegNum<[75]>;
+def R76 : SPUVecReg<76, "$76">, DwarfRegNum<[76]>;
+def R77 : SPUVecReg<77, "$77">, DwarfRegNum<[77]>;
+def R78 : SPUVecReg<78, "$78">, DwarfRegNum<[78]>;
+def R79 : SPUVecReg<79, "$79">, DwarfRegNum<[79]>;
+def R80 : SPUVecReg<80, "$80">, DwarfRegNum<[80]>;
+def R81 : SPUVecReg<81, "$81">, DwarfRegNum<[81]>;
+def R82 : SPUVecReg<82, "$82">, DwarfRegNum<[82]>;
+def R83 : SPUVecReg<83, "$83">, DwarfRegNum<[83]>;
+def R84 : SPUVecReg<84, "$84">, DwarfRegNum<[84]>;
+def R85 : SPUVecReg<85, "$85">, DwarfRegNum<[85]>;
+def R86 : SPUVecReg<86, "$86">, DwarfRegNum<[86]>;
+def R87 : SPUVecReg<87, "$87">, DwarfRegNum<[87]>;
+def R88 : SPUVecReg<88, "$88">, DwarfRegNum<[88]>;
+def R89 : SPUVecReg<89, "$89">, DwarfRegNum<[89]>;
+def R90 : SPUVecReg<90, "$90">, DwarfRegNum<[90]>;
+def R91 : SPUVecReg<91, "$91">, DwarfRegNum<[91]>;
+def R92 : SPUVecReg<92, "$92">, DwarfRegNum<[92]>;
+def R93 : SPUVecReg<93, "$93">, DwarfRegNum<[93]>;
+def R94 : SPUVecReg<94, "$94">, DwarfRegNum<[94]>;
+def R95 : SPUVecReg<95, "$95">, DwarfRegNum<[95]>;
+def R96 : SPUVecReg<96, "$96">, DwarfRegNum<[96]>;
+def R97 : SPUVecReg<97, "$97">, DwarfRegNum<[97]>;
+def R98 : SPUVecReg<98, "$98">, DwarfRegNum<[98]>;
+def R99 : SPUVecReg<99, "$99">, DwarfRegNum<[99]>;
+def R100 : SPUVecReg<100, "$100">, DwarfRegNum<[100]>;
+def R101 : SPUVecReg<101, "$101">, DwarfRegNum<[101]>;
+def R102 : SPUVecReg<102, "$102">, DwarfRegNum<[102]>;
+def R103 : SPUVecReg<103, "$103">, DwarfRegNum<[103]>;
+def R104 : SPUVecReg<104, "$104">, DwarfRegNum<[104]>;
+def R105 : SPUVecReg<105, "$105">, DwarfRegNum<[105]>;
+def R106 : SPUVecReg<106, "$106">, DwarfRegNum<[106]>;
+def R107 : SPUVecReg<107, "$107">, DwarfRegNum<[107]>;
+def R108 : SPUVecReg<108, "$108">, DwarfRegNum<[108]>;
+def R109 : SPUVecReg<109, "$109">, DwarfRegNum<[109]>;
+def R110 : SPUVecReg<110, "$110">, DwarfRegNum<[110]>;
+def R111 : SPUVecReg<111, "$111">, DwarfRegNum<[111]>;
+def R112 : SPUVecReg<112, "$112">, DwarfRegNum<[112]>;
+def R113 : SPUVecReg<113, "$113">, DwarfRegNum<[113]>;
+def R114 : SPUVecReg<114, "$114">, DwarfRegNum<[114]>;
+def R115 : SPUVecReg<115, "$115">, DwarfRegNum<[115]>;
+def R116 : SPUVecReg<116, "$116">, DwarfRegNum<[116]>;
+def R117 : SPUVecReg<117, "$117">, DwarfRegNum<[117]>;
+def R118 : SPUVecReg<118, "$118">, DwarfRegNum<[118]>;
+def R119 : SPUVecReg<119, "$119">, DwarfRegNum<[119]>;
+def R120 : SPUVecReg<120, "$120">, DwarfRegNum<[120]>;
+def R121 : SPUVecReg<121, "$121">, DwarfRegNum<[121]>;
+def R122 : SPUVecReg<122, "$122">, DwarfRegNum<[122]>;
+def R123 : SPUVecReg<123, "$123">, DwarfRegNum<[123]>;
+def R124 : SPUVecReg<124, "$124">, DwarfRegNum<[124]>;
+def R125 : SPUVecReg<125, "$125">, DwarfRegNum<[125]>;
+def R126 : SPUVecReg<126, "$126">, DwarfRegNum<[126]>;
+def R127 : SPUVecReg<127, "$127">, DwarfRegNum<[127]>;
+
+/* Need floating point status register here: */
+/* def FPCSR : ... */
+
+// The SPU's registers as 128-bit wide entities, and can function as general
+// purpose registers, where the operands are in the "preferred slot":
+def GPRC : RegisterClass<"SPU", [i128], 128,
+ [
+ /* volatile register */
+ R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16,
+ R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+ R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+ R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+ R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+ R77, R78, R79,
+ /* non-volatile register: take hint from PPC and allocate in reverse order */
+ R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+ R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+ R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+ R86, R85, R84, R83, R82, R81, R80,
+ /* environment ptr, SP, LR */
+ R2, R1, R0 ]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GPRCClass::iterator
+ GPRCClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin();
+ }
+ GPRCClass::iterator
+ GPRCClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr)
+ }
+ }];
+}
+
+// The SPU's registers as 64-bit wide (double word integer) "preferred slot":
+def R64C : RegisterClass<"SPU", [i64], 128,
+ [
+ /* volatile register */
+ R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16,
+ R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+ R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+ R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+ R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+ R77, R78, R79,
+ /* non-volatile register: take hint from PPC and allocate in reverse order */
+ R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+ R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+ R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+ R86, R85, R84, R83, R82, R81, R80,
+ /* environment ptr, SP, LR */
+ R2, R1, R0 ]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ R64CClass::iterator
+ R64CClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin();
+ }
+ R64CClass::iterator
+ R64CClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr)
+ }
+ }];
+}
+
+// The SPU's registers as 64-bit wide (double word) FP "preferred slot":
+def R64FP : RegisterClass<"SPU", [f64], 128,
+ [
+ /* volatile register */
+ R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16,
+ R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+ R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+ R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+ R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+ R77, R78, R79,
+ /* non-volatile register: take hint from PPC and allocate in reverse order */
+ R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+ R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+ R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+ R86, R85, R84, R83, R82, R81, R80,
+ /* environment ptr, SP, LR */
+ R2, R1, R0 ]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ R64FPClass::iterator
+ R64FPClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin();
+ }
+ R64FPClass::iterator
+ R64FPClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr)
+ }
+ }];
+}
+
+// The SPU's registers as 32-bit wide (word) "preferred slot":
+def R32C : RegisterClass<"SPU", [i32], 128,
+ [
+ /* volatile register */
+ R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16,
+ R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+ R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+ R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+ R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+ R77, R78, R79,
+ /* non-volatile register: take hint from PPC and allocate in reverse order */
+ R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+ R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+ R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+ R86, R85, R84, R83, R82, R81, R80,
+ /* environment ptr, SP, LR */
+ R2, R1, R0 ]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ R32CClass::iterator
+ R32CClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin();
+ }
+ R32CClass::iterator
+ R32CClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr)
+ }
+ }];
+}
+
+// The SPU's registers as single precision floating point "preferred slot":
+def R32FP : RegisterClass<"SPU", [f32], 128,
+ [
+ /* volatile register */
+ R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16,
+ R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+ R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+ R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+ R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+ R77, R78, R79,
+ /* non-volatile register: take hint from PPC and allocate in reverse order */
+ R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+ R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+ R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+ R86, R85, R84, R83, R82, R81, R80,
+ /* environment ptr, SP, LR */
+ R2, R1, R0 ]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ R32FPClass::iterator
+ R32FPClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin();
+ }
+ R32FPClass::iterator
+ R32FPClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr)
+ }
+ }];
+}
+
+// The SPU's registers as 16-bit wide (halfword) "preferred slot":
+def R16C : RegisterClass<"SPU", [i16], 128,
+ [
+ /* volatile register */
+ R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16,
+ R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+ R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+ R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+ R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+ R77, R78, R79,
+ /* non-volatile register: take hint from PPC and allocate in reverse order */
+ R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+ R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+ R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+ R86, R85, R84, R83, R82, R81, R80,
+ /* environment ptr, SP, LR */
+ R2, R1, R0 ]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ R16CClass::iterator
+ R16CClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin();
+ }
+ R16CClass::iterator
+ R16CClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr)
+ }
+ }];
+}
+
+// The SPU's registers as 8-bit wide (byte) "preferred slot":
+def R8C : RegisterClass<"SPU", [i8], 128,
+ [
+ /* volatile register */
+ R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16,
+ R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+ R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+ R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+ R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+ R77, R78, R79,
+ /* non-volatile register: take hint from PPC and allocate in reverse order */
+ R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+ R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+ R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+ R86, R85, R84, R83, R82, R81, R80,
+ /* environment ptr, SP, LR */
+ R2, R1, R0 ]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ R8CClass::iterator
+ R8CClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin();
+ }
+ R8CClass::iterator
+ R8CClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr)
+ }
+ }];
+}
+
+// The SPU's registers as vector registers:
+def VECREG : RegisterClass<"SPU",
+ [v16i8,v8i16,v2i32,v4i32,v4f32,v2i64,v2f64],
+ 128,
+ [
+ /* volatile register */
+ R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16,
+ R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+ R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+ R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+ R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+ R77, R78, R79,
+ /* non-volatile register: take hint from PPC and allocate in reverse order */
+ R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+ R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+ R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+ R86, R85, R84, R83, R82, R81, R80,
+ /* environment ptr, SP, LR */
+ R2, R1, R0 ]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ VECREGClass::iterator
+ VECREGClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin();
+ }
+ VECREGClass::iterator
+ VECREGClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr)
+ }
+ }];
+}
diff --git a/lib/Target/CellSPU/SPURegisterNames.h b/lib/Target/CellSPU/SPURegisterNames.h
new file mode 100644
index 0000000..6c3afdf
--- /dev/null
+++ b/lib/Target/CellSPU/SPURegisterNames.h
@@ -0,0 +1,18 @@
+//===- SPURegisterNames.h - Wrapper header for SPU register names -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_REGISTER_NAMES_H
+#define SPU_REGISTER_NAMES_H
+
+// Define symbolic names for Cell registers. This defines a mapping from
+// register name to register number.
+//
+#include "SPUGenRegisterNames.inc"
+
+#endif
diff --git a/lib/Target/CellSPU/SPUSchedule.td b/lib/Target/CellSPU/SPUSchedule.td
new file mode 100644
index 0000000..785dc46
--- /dev/null
+++ b/lib/Target/CellSPU/SPUSchedule.td
@@ -0,0 +1,57 @@
+//===- SPUSchedule.td - Cell Scheduling Definitions --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Even pipeline:
+
+def EVEN_UNIT : FuncUnit; // Even execution unit: (PC & 0x7 == 000)
+def ODD_UNIT : FuncUnit; // Odd execution unit: (PC & 0x7 == 100)
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for Cell SPU
+//===----------------------------------------------------------------------===//
+
+def LoadStore : InstrItinClass; // ODD_UNIT
+def BranchHints : InstrItinClass; // ODD_UNIT
+def BranchResolv : InstrItinClass; // ODD_UNIT
+def ChanOpSPR : InstrItinClass; // ODD_UNIT
+def ShuffleOp : InstrItinClass; // ODD_UNIT
+def SelectOp : InstrItinClass; // ODD_UNIT
+def GatherOp : InstrItinClass; // ODD_UNIT
+def LoadNOP : InstrItinClass; // ODD_UNIT
+def ExecNOP : InstrItinClass; // EVEN_UNIT
+def SPrecFP : InstrItinClass; // EVEN_UNIT
+def DPrecFP : InstrItinClass; // EVEN_UNIT
+def FPInt : InstrItinClass; // EVEN_UNIT (FP<->integer)
+def ByteOp : InstrItinClass; // EVEN_UNIT
+def IntegerOp : InstrItinClass; // EVEN_UNIT
+def IntegerMulDiv: InstrItinClass; // EVEN_UNIT
+def RotateShift : InstrItinClass; // EVEN_UNIT
+def ImmLoad : InstrItinClass; // EVEN_UNIT
+
+/* Note: The itinerary for the Cell SPU is somewhat contrived... */
+def SPUItineraries : ProcessorItineraries<[
+ InstrItinData<LoadStore , [InstrStage<6, [ODD_UNIT]>]>,
+ InstrItinData<BranchHints , [InstrStage<6, [ODD_UNIT]>]>,
+ InstrItinData<BranchResolv, [InstrStage<4, [ODD_UNIT]>]>,
+ InstrItinData<ChanOpSPR , [InstrStage<6, [ODD_UNIT]>]>,
+ InstrItinData<ShuffleOp , [InstrStage<4, [ODD_UNIT]>]>,
+ InstrItinData<SelectOp , [InstrStage<4, [ODD_UNIT]>]>,
+ InstrItinData<GatherOp , [InstrStage<4, [ODD_UNIT]>]>,
+ InstrItinData<LoadNOP , [InstrStage<1, [ODD_UNIT]>]>,
+ InstrItinData<ExecNOP , [InstrStage<1, [EVEN_UNIT]>]>,
+ InstrItinData<SPrecFP , [InstrStage<6, [EVEN_UNIT]>]>,
+ InstrItinData<DPrecFP , [InstrStage<13, [EVEN_UNIT]>]>,
+ InstrItinData<FPInt , [InstrStage<2, [EVEN_UNIT]>]>,
+ InstrItinData<ByteOp , [InstrStage<4, [EVEN_UNIT]>]>,
+ InstrItinData<IntegerOp , [InstrStage<2, [EVEN_UNIT]>]>,
+ InstrItinData<RotateShift , [InstrStage<4, [EVEN_UNIT]>]>,
+ InstrItinData<IntegerMulDiv,[InstrStage<7, [EVEN_UNIT]>]>,
+ InstrItinData<ImmLoad , [InstrStage<2, [EVEN_UNIT]>]>
+ ]>;
diff --git a/lib/Target/CellSPU/SPUSubtarget.cpp b/lib/Target/CellSPU/SPUSubtarget.cpp
new file mode 100644
index 0000000..0a1c2f7
--- /dev/null
+++ b/lib/Target/CellSPU/SPUSubtarget.cpp
@@ -0,0 +1,40 @@
+//===- SPUSubtarget.cpp - STI Cell SPU Subtarget Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CellSPU-specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUSubtarget.h"
+#include "SPU.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetMachine.h"
+#include "SPUGenSubtarget.inc"
+
+using namespace llvm;
+
+SPUSubtarget::SPUSubtarget(const TargetMachine &tm, const Module &M,
+ const std::string &FS) :
+ TM(tm),
+ StackAlignment(16),
+ ProcDirective(SPU::DEFAULT_PROC),
+ UseLargeMem(false)
+{
+ // Should be the target SPU processor type. For now, since there's only
+ // one, simply default to the current "v0" default:
+ std::string default_cpu("v0");
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, default_cpu);
+}
+
+/// SetJITMode - This is called to inform the subtarget info that we are
+/// producing code for the JIT.
+void SPUSubtarget::SetJITMode() {
+}
diff --git a/lib/Target/CellSPU/SPUSubtarget.h b/lib/Target/CellSPU/SPUSubtarget.h
new file mode 100644
index 0000000..b6a3409
--- /dev/null
+++ b/lib/Target/CellSPU/SPUSubtarget.h
@@ -0,0 +1,95 @@
+//===-- SPUSubtarget.h - Define Subtarget for the Cell SPU ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Cell SPU-specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CELLSUBTARGET_H
+#define CELLSUBTARGET_H
+
+#include "llvm/Target/TargetInstrItineraries.h"
+#include "llvm/Target/TargetSubtarget.h"
+
+#include <string>
+
+namespace llvm {
+ class Module;
+ class GlobalValue;
+ class TargetMachine;
+
+ namespace SPU {
+ enum {
+ PROC_NONE,
+ DEFAULT_PROC
+ };
+ }
+
+ class SPUSubtarget : public TargetSubtarget {
+ protected:
+ const TargetMachine &TM;
+
+ /// stackAlignment - The minimum alignment known to hold of the stack frame
+ /// on entry to the function and which must be maintained by every function.
+ unsigned StackAlignment;
+
+ /// Selected instruction itineraries (one entry per itinerary class.)
+ InstrItineraryData InstrItins;
+
+ /// Which SPU processor (this isn't really used, but it's there to keep
+ /// the C compiler happy)
+ unsigned ProcDirective;
+
+ /// Use (assume) large memory -- effectively disables the LQA/STQA
+ /// instructions that assume 259K local store.
+ bool UseLargeMem;
+
+ public:
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ SPUSubtarget(const TargetMachine &TM, const Module &M,
+ const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+
+ /// SetJITMode - This is called to inform the subtarget info that we are
+ /// producing code for the JIT.
+ void SetJITMode();
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by
+ /// every function for this subtarget.
+ unsigned getStackAlignment() const { return StackAlignment; }
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// selection.
+ const InstrItineraryData &getInstrItineraryData() const {
+ return InstrItins;
+ }
+
+ /// Use large memory addressing predicate
+ bool usingLargeMem() const {
+ return UseLargeMem;
+ }
+
+ /// getTargetDataString - Return the pointer size and type alignment
+ /// properties of this subtarget.
+ const char *getTargetDataString() const {
+ return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128"
+ "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:128:128-v128:128:128"
+ "-s:128:128";
+ }
+ };
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/CellSPU/SPUTargetAsmInfo.cpp b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp
new file mode 100644
index 0000000..ff88ed8
--- /dev/null
+++ b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp
@@ -0,0 +1,74 @@
+//===-- SPUTargetAsmInfo.cpp - Cell SPU asm properties ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the SPUTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUTargetAsmInfo.h"
+#include "SPUTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Dwarf.h"
+
+using namespace llvm;
+using namespace llvm::dwarf;
+
+SPULinuxTargetAsmInfo::SPULinuxTargetAsmInfo(const SPUTargetMachine &TM) :
+ SPUTargetAsmInfo<ELFTargetAsmInfo>(TM) {
+ PCSymbol = ".";
+ CommentString = "#";
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = ".L";
+ // This corresponds to what the gcc SPU compiler emits, for consistency.
+ CStringSection = ".rodata.str";
+
+ // Has leb128, .loc and .file
+ HasLEB128 = true;
+ HasDotLocAndDotFile = true;
+
+ // BSS section needs to be emitted as ".section"
+ BSSSection = "\t.section\t.bss";
+ BSSSection_ = getUnnamedSection("\t.section\t.bss",
+ SectionFlags::Writeable | SectionFlags::BSS,
+ true);
+
+ SupportsDebugInformation = true;
+ NeedsSet = true;
+ SupportsMacInfoSection = false;
+ DwarfAbbrevSection = "\t.section .debug_abbrev,\"\",@progbits";
+ DwarfInfoSection = "\t.section .debug_info,\"\",@progbits";
+ DwarfLineSection = "\t.section .debug_line,\"\",@progbits";
+ DwarfFrameSection = "\t.section .debug_frame,\"\",@progbits";
+ DwarfPubNamesSection = "\t.section .debug_pubnames,\"\",@progbits";
+ DwarfPubTypesSection = "\t.section .debug_pubtypes,\"\",progbits";
+ DwarfStrSection = "\t.section .debug_str,\"MS\",@progbits,1";
+ DwarfLocSection = "\t.section .debug_loc,\"\",@progbits";
+ DwarfARangesSection = "\t.section .debug_aranges,\"\",@progbits";
+ DwarfRangesSection = "\t.section .debug_ranges,\"\",@progbits";
+ DwarfMacInfoSection = "\t.section .debug_macinfo,\"\",progbits";
+
+ // Exception handling is not supported on CellSPU (think about it: you only
+ // have 256K for code+data. Would you support exception handling?)
+ SupportsExceptionHandling = false;
+}
+
+/// PreferredEHDataFormat - This hook allows the target to select data
+/// format used for encoding pointers in exception handling data. Reason is
+/// 0 for data, 1 for code labels, 2 for function pointers. Global is true
+/// if the symbol can be relocated.
+unsigned
+SPULinuxTargetAsmInfo::PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const {
+ // We really need to write something here.
+ return TargetAsmInfo::PreferredEHDataFormat(Reason, Global);
+}
+
+// Instantiate default implementation.
+TEMPLATE_INSTANTIATION(class SPUTargetAsmInfo<TargetAsmInfo>);
diff --git a/lib/Target/CellSPU/SPUTargetAsmInfo.h b/lib/Target/CellSPU/SPUTargetAsmInfo.h
new file mode 100644
index 0000000..d10a565
--- /dev/null
+++ b/lib/Target/CellSPU/SPUTargetAsmInfo.h
@@ -0,0 +1,51 @@
+//===-- SPUTargetAsmInfo.h - Cell SPU asm properties -----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SPUTargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUTARGETASMINFO_H
+#define SPUTARGETASMINFO_H
+
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/ELFTargetAsmInfo.h"
+#include "SPUTargetMachine.h"
+#include "SPUSubtarget.h"
+
+namespace llvm {
+
+ // Forward declaration.
+ class SPUTargetMachine;
+
+ template <class BaseTAI>
+ struct SPUTargetAsmInfo : public BaseTAI {
+ explicit SPUTargetAsmInfo(const SPUTargetMachine &TM):
+ BaseTAI(TM) {
+ /* (unused today)
+ * const SPUSubtarget *Subtarget = &TM.getSubtarget<SPUSubtarget>(); */
+
+ BaseTAI::ZeroDirective = "\t.space\t";
+ BaseTAI::SetDirective = "\t.set";
+ BaseTAI::Data64bitsDirective = "\t.quad\t";
+ BaseTAI::AlignmentIsInBytes = false;
+ BaseTAI::LCOMMDirective = "\t.lcomm\t";
+ BaseTAI::InlineAsmStart = "# InlineAsm Start";
+ BaseTAI::InlineAsmEnd = "# InlineAsm End";
+ }
+ };
+
+ struct SPULinuxTargetAsmInfo : public SPUTargetAsmInfo<ELFTargetAsmInfo> {
+ explicit SPULinuxTargetAsmInfo(const SPUTargetMachine &TM);
+ virtual unsigned PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const;
+ };
+} // namespace llvm
+
+#endif /* SPUTARGETASMINFO_H */
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
new file mode 100644
index 0000000..7fa9022
--- /dev/null
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -0,0 +1,98 @@
+//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the Cell SPU target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPURegisterNames.h"
+#include "SPUTargetAsmInfo.h"
+#include "SPUTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+
+using namespace llvm;
+
+/// CellSPUTargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int CellSPUTargetMachineModule;
+int CellSPUTargetMachineModule = 0;
+
+namespace {
+ // Register the targets
+ RegisterTarget<SPUTargetMachine>
+ CELLSPU("cellspu", "STI CBEA Cell SPU [experimental]");
+}
+
+const std::pair<unsigned, int> *
+SPUFrameInfo::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
+ NumEntries = 1;
+ return &LR[0];
+}
+
+const TargetAsmInfo *
+SPUTargetMachine::createTargetAsmInfo() const
+{
+ return new SPULinuxTargetAsmInfo(*this);
+}
+
+unsigned
+SPUTargetMachine::getModuleMatchQuality(const Module &M)
+{
+ // We strongly match "spu-*" or "cellspu-*".
+ std::string TT = M.getTargetTriple();
+ if ((TT.size() == 3 && std::string(TT.begin(), TT.begin()+3) == "spu")
+ || (TT.size() == 7 && std::string(TT.begin(), TT.begin()+7) == "cellspu")
+ || (TT.size() >= 4 && std::string(TT.begin(), TT.begin()+4) == "spu-")
+ || (TT.size() >= 8 && std::string(TT.begin(), TT.begin()+8) == "cellspu-"))
+ return 20;
+
+ return 0; // No match at all...
+}
+
+SPUTargetMachine::SPUTargetMachine(const Module &M, const std::string &FS)
+ : Subtarget(*this, M, FS),
+ DataLayout(Subtarget.getTargetDataString()),
+ InstrInfo(*this),
+ FrameInfo(*this),
+ TLInfo(*this),
+ InstrItins(Subtarget.getInstrItineraryData())
+{
+ // For the time being, use static relocations, since there's really no
+ // support for PIC yet.
+ setRelocationModel(Reloc::Static);
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool
+SPUTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel)
+{
+ // Install an instruction selector.
+ PM.add(createSPUISelDag(*this));
+ return false;
+}
+
+bool SPUTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose,
+ raw_ostream &Out) {
+ PM.add(createSPUAsmPrinterPass(Out, *this, OptLevel, Verbose));
+ return false;
+}
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
new file mode 100644
index 0000000..cd39203
--- /dev/null
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -0,0 +1,95 @@
+//===-- SPUTargetMachine.h - Define TargetMachine for Cell SPU ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the CellSPU-specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_TARGETMACHINE_H
+#define SPU_TARGETMACHINE_H
+
+#include "SPUSubtarget.h"
+#include "SPUInstrInfo.h"
+#include "SPUISelLowering.h"
+#include "SPUFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+class PassManager;
+class GlobalValue;
+class TargetFrameInfo;
+
+/// SPUTargetMachine
+///
+class SPUTargetMachine : public LLVMTargetMachine {
+ SPUSubtarget Subtarget;
+ const TargetData DataLayout;
+ SPUInstrInfo InstrInfo;
+ SPUFrameInfo FrameInfo;
+ SPUTargetLowering TLInfo;
+ InstrItineraryData InstrItins;
+
+protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+public:
+ SPUTargetMachine(const Module &M, const std::string &FS);
+
+ /// Return the subtarget implementation object
+ virtual const SPUSubtarget *getSubtargetImpl() const {
+ return &Subtarget;
+ }
+ virtual const SPUInstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+ virtual const SPUFrameInfo *getFrameInfo() const {
+ return &FrameInfo;
+ }
+ /*!
+ \note Cell SPU does not support JIT today. It could support JIT at some
+ point.
+ */
+ virtual TargetJITInfo *getJITInfo() {
+ return NULL;
+ }
+
+ //! Module match function
+ /*!
+ Module matching function called by TargetMachineRegistry().
+ */
+ static unsigned getModuleMatchQuality(const Module &M);
+
+ virtual SPUTargetLowering *getTargetLowering() const {
+ return const_cast<SPUTargetLowering*>(&TLInfo);
+ }
+
+ virtual const SPURegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const TargetData *getTargetData() const {
+ return &DataLayout;
+ }
+
+ virtual const InstrItineraryData getInstrItineraryData() const {
+ return InstrItins;
+ }
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+ virtual bool addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/CppBackend/CMakeLists.txt b/lib/Target/CppBackend/CMakeLists.txt
new file mode 100644
index 0000000..f8182b8
--- /dev/null
+++ b/lib/Target/CppBackend/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_target(CppBackend
+ CPPBackend.cpp
+ )
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
new file mode 100644
index 0000000..4082989
--- /dev/null
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -0,0 +1,2007 @@
+//===-- CPPBackend.cpp - Library for converting LLVM code to C++ code -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the writing of the LLVM IR as a set of C++ calls to the
+// LLVM IR interface. The input module is assumed to be verified.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CPPTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Config/config.h"
+#include <algorithm>
+#include <set>
+
+using namespace llvm;
+
+static cl::opt<std::string>
+FuncName("cppfname", cl::desc("Specify the name of the generated function"),
+ cl::value_desc("function name"));
+
+enum WhatToGenerate {
+ GenProgram,
+ GenModule,
+ GenContents,
+ GenFunction,
+ GenFunctions,
+ GenInline,
+ GenVariable,
+ GenType
+};
+
+static cl::opt<WhatToGenerate> GenerationType("cppgen", cl::Optional,
+ cl::desc("Choose what kind of output to generate"),
+ cl::init(GenProgram),
+ cl::values(
+ clEnumValN(GenProgram, "program", "Generate a complete program"),
+ clEnumValN(GenModule, "module", "Generate a module definition"),
+ clEnumValN(GenContents, "contents", "Generate contents of a module"),
+ clEnumValN(GenFunction, "function", "Generate a function definition"),
+ clEnumValN(GenFunctions,"functions", "Generate all function definitions"),
+ clEnumValN(GenInline, "inline", "Generate an inline function"),
+ clEnumValN(GenVariable, "variable", "Generate a variable definition"),
+ clEnumValN(GenType, "type", "Generate a type definition"),
+ clEnumValEnd
+ )
+);
+
+static cl::opt<std::string> NameToGenerate("cppfor", cl::Optional,
+ cl::desc("Specify the name of the thing to generate"),
+ cl::init("!bad!"));
+
+/// CppBackendTargetMachineModule - Note that this is used on hosts
+/// that cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int CppBackendTargetMachineModule;
+int CppBackendTargetMachineModule = 0;
+
+// Register the target.
+static RegisterTarget<CPPTargetMachine> X("cpp", "C++ backend");
+
+namespace {
+ typedef std::vector<const Type*> TypeList;
+ typedef std::map<const Type*,std::string> TypeMap;
+ typedef std::map<const Value*,std::string> ValueMap;
+ typedef std::set<std::string> NameSet;
+ typedef std::set<const Type*> TypeSet;
+ typedef std::set<const Value*> ValueSet;
+ typedef std::map<const Value*,std::string> ForwardRefMap;
+
+ /// CppWriter - This class is the main chunk of code that converts an LLVM
+ /// module to a C++ translation unit.
+ class CppWriter : public ModulePass {
+ raw_ostream &Out;
+ const Module *TheModule;
+ uint64_t uniqueNum;
+ TypeMap TypeNames;
+ ValueMap ValueNames;
+ TypeMap UnresolvedTypes;
+ TypeList TypeStack;
+ NameSet UsedNames;
+ TypeSet DefinedTypes;
+ ValueSet DefinedValues;
+ ForwardRefMap ForwardRefs;
+ bool is_inline;
+
+ public:
+ static char ID;
+ explicit CppWriter(raw_ostream &o) :
+ ModulePass(&ID), Out(o), uniqueNum(0), is_inline(false) {}
+
+ virtual const char *getPassName() const { return "C++ backend"; }
+
+ bool runOnModule(Module &M);
+
+ void printProgram(const std::string& fname, const std::string& modName );
+ void printModule(const std::string& fname, const std::string& modName );
+ void printContents(const std::string& fname, const std::string& modName );
+ void printFunction(const std::string& fname, const std::string& funcName );
+ void printFunctions();
+ void printInline(const std::string& fname, const std::string& funcName );
+ void printVariable(const std::string& fname, const std::string& varName );
+ void printType(const std::string& fname, const std::string& typeName );
+
+ void error(const std::string& msg);
+
+ private:
+ void printLinkageType(GlobalValue::LinkageTypes LT);
+ void printVisibilityType(GlobalValue::VisibilityTypes VisTypes);
+ void printCallingConv(unsigned cc);
+ void printEscapedString(const std::string& str);
+ void printCFP(const ConstantFP* CFP);
+
+ std::string getCppName(const Type* val);
+ inline void printCppName(const Type* val);
+
+ std::string getCppName(const Value* val);
+ inline void printCppName(const Value* val);
+
+ void printAttributes(const AttrListPtr &PAL, const std::string &name);
+ bool printTypeInternal(const Type* Ty);
+ inline void printType(const Type* Ty);
+ void printTypes(const Module* M);
+
+ void printConstant(const Constant *CPV);
+ void printConstants(const Module* M);
+
+ void printVariableUses(const GlobalVariable *GV);
+ void printVariableHead(const GlobalVariable *GV);
+ void printVariableBody(const GlobalVariable *GV);
+
+ void printFunctionUses(const Function *F);
+ void printFunctionHead(const Function *F);
+ void printFunctionBody(const Function *F);
+ void printInstruction(const Instruction *I, const std::string& bbname);
+ std::string getOpName(Value*);
+
+ void printModuleBody();
+ };
+
+ static unsigned indent_level = 0;
+ inline raw_ostream& nl(raw_ostream& Out, int delta = 0) {
+ Out << "\n";
+ if (delta >= 0 || indent_level >= unsigned(-delta))
+ indent_level += delta;
+ for (unsigned i = 0; i < indent_level; ++i)
+ Out << " ";
+ return Out;
+ }
+
+ inline void in() { indent_level++; }
+ inline void out() { if (indent_level >0) indent_level--; }
+
+ inline void
+ sanitize(std::string& str) {
+ for (size_t i = 0; i < str.length(); ++i)
+ if (!isalnum(str[i]) && str[i] != '_')
+ str[i] = '_';
+ }
+
+ inline std::string
+ getTypePrefix(const Type* Ty ) {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return "void_";
+ case Type::IntegerTyID:
+ return std::string("int") + utostr(cast<IntegerType>(Ty)->getBitWidth()) +
+ "_";
+ case Type::FloatTyID: return "float_";
+ case Type::DoubleTyID: return "double_";
+ case Type::LabelTyID: return "label_";
+ case Type::FunctionTyID: return "func_";
+ case Type::StructTyID: return "struct_";
+ case Type::ArrayTyID: return "array_";
+ case Type::PointerTyID: return "ptr_";
+ case Type::VectorTyID: return "packed_";
+ case Type::OpaqueTyID: return "opaque_";
+ default: return "other_";
+ }
+ return "unknown_";
+ }
+
+ // Looks up the type in the symbol table and returns a pointer to its name or
+ // a null pointer if it wasn't found. Note that this isn't the same as the
+ // Mode::getTypeName function which will return an empty string, not a null
+ // pointer if the name is not found.
+ inline const std::string*
+ findTypeName(const TypeSymbolTable& ST, const Type* Ty) {
+ TypeSymbolTable::const_iterator TI = ST.begin();
+ TypeSymbolTable::const_iterator TE = ST.end();
+ for (;TI != TE; ++TI)
+ if (TI->second == Ty)
+ return &(TI->first);
+ return 0;
+ }
+
+ void CppWriter::error(const std::string& msg) {
+ cerr << msg << "\n";
+ exit(2);
+ }
+
+ // printCFP - Print a floating point constant .. very carefully :)
+ // This makes sure that conversion to/from floating yields the same binary
+ // result so that we don't lose precision.
+ void CppWriter::printCFP(const ConstantFP *CFP) {
+ bool ignored;
+ APFloat APF = APFloat(CFP->getValueAPF()); // copy
+ if (CFP->getType() == Type::FloatTy)
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
+ Out << "ConstantFP::get(";
+ Out << "APFloat(";
+#if HAVE_PRINTF_A
+ char Buffer[100];
+ sprintf(Buffer, "%A", APF.convertToDouble());
+ if ((!strncmp(Buffer, "0x", 2) ||
+ !strncmp(Buffer, "-0x", 3) ||
+ !strncmp(Buffer, "+0x", 3)) &&
+ APF.bitwiseIsEqual(APFloat(atof(Buffer)))) {
+ if (CFP->getType() == Type::DoubleTy)
+ Out << "BitsToDouble(" << Buffer << ")";
+ else
+ Out << "BitsToFloat((float)" << Buffer << ")";
+ Out << ")";
+ } else {
+#endif
+ std::string StrVal = ftostr(CFP->getValueAPF());
+
+ while (StrVal[0] == ' ')
+ StrVal.erase(StrVal.begin());
+
+ // Check to make sure that the stringized number is not some string like
+ // "Inf" or NaN. Check that the string matches the "[-+]?[0-9]" regex.
+ if (((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+ ((StrVal[0] == '-' || StrVal[0] == '+') &&
+ (StrVal[1] >= '0' && StrVal[1] <= '9'))) &&
+ (CFP->isExactlyValue(atof(StrVal.c_str())))) {
+ if (CFP->getType() == Type::DoubleTy)
+ Out << StrVal;
+ else
+ Out << StrVal << "f";
+ } else if (CFP->getType() == Type::DoubleTy)
+ Out << "BitsToDouble(0x"
+ << utohexstr(CFP->getValueAPF().bitcastToAPInt().getZExtValue())
+ << "ULL) /* " << StrVal << " */";
+ else
+ Out << "BitsToFloat(0x"
+ << utohexstr((uint32_t)CFP->getValueAPF().
+ bitcastToAPInt().getZExtValue())
+ << "U) /* " << StrVal << " */";
+ Out << ")";
+#if HAVE_PRINTF_A
+ }
+#endif
+ Out << ")";
+ }
+
+ void CppWriter::printCallingConv(unsigned cc){
+ // Print the calling convention.
+ switch (cc) {
+ case CallingConv::C: Out << "CallingConv::C"; break;
+ case CallingConv::Fast: Out << "CallingConv::Fast"; break;
+ case CallingConv::Cold: Out << "CallingConv::Cold"; break;
+ case CallingConv::FirstTargetCC: Out << "CallingConv::FirstTargetCC"; break;
+ default: Out << cc; break;
+ }
+ }
+
+ void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) {
+ switch (LT) {
+ case GlobalValue::InternalLinkage:
+ Out << "GlobalValue::InternalLinkage"; break;
+ case GlobalValue::PrivateLinkage:
+ Out << "GlobalValue::PrivateLinkage"; break;
+ case GlobalValue::AvailableExternallyLinkage:
+ Out << "GlobalValue::AvailableExternallyLinkage "; break;
+ case GlobalValue::LinkOnceAnyLinkage:
+ Out << "GlobalValue::LinkOnceAnyLinkage "; break;
+ case GlobalValue::LinkOnceODRLinkage:
+ Out << "GlobalValue::LinkOnceODRLinkage "; break;
+ case GlobalValue::WeakAnyLinkage:
+ Out << "GlobalValue::WeakAnyLinkage"; break;
+ case GlobalValue::WeakODRLinkage:
+ Out << "GlobalValue::WeakODRLinkage"; break;
+ case GlobalValue::AppendingLinkage:
+ Out << "GlobalValue::AppendingLinkage"; break;
+ case GlobalValue::ExternalLinkage:
+ Out << "GlobalValue::ExternalLinkage"; break;
+ case GlobalValue::DLLImportLinkage:
+ Out << "GlobalValue::DLLImportLinkage"; break;
+ case GlobalValue::DLLExportLinkage:
+ Out << "GlobalValue::DLLExportLinkage"; break;
+ case GlobalValue::ExternalWeakLinkage:
+ Out << "GlobalValue::ExternalWeakLinkage"; break;
+ case GlobalValue::GhostLinkage:
+ Out << "GlobalValue::GhostLinkage"; break;
+ case GlobalValue::CommonLinkage:
+ Out << "GlobalValue::CommonLinkage"; break;
+ }
+ }
+
+ void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) {
+ switch (VisType) {
+ default: assert(0 && "Unknown GVar visibility");
+ case GlobalValue::DefaultVisibility:
+ Out << "GlobalValue::DefaultVisibility";
+ break;
+ case GlobalValue::HiddenVisibility:
+ Out << "GlobalValue::HiddenVisibility";
+ break;
+ case GlobalValue::ProtectedVisibility:
+ Out << "GlobalValue::ProtectedVisibility";
+ break;
+ }
+ }
+
+ // printEscapedString - Print each character of the specified string, escaping
+ // it if it is not printable or if it is an escape char.
+ void CppWriter::printEscapedString(const std::string &Str) {
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ unsigned char C = Str[i];
+ if (isprint(C) && C != '"' && C != '\\') {
+ Out << C;
+ } else {
+ Out << "\\x"
+ << (char) ((C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A'))
+ << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
+ }
+ }
+ }
+
+ std::string CppWriter::getCppName(const Type* Ty) {
+ // First, handle the primitive types .. easy
+ if (Ty->isPrimitiveType() || Ty->isInteger()) {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return "Type::VoidTy";
+ case Type::IntegerTyID: {
+ unsigned BitWidth = cast<IntegerType>(Ty)->getBitWidth();
+ return "IntegerType::get(" + utostr(BitWidth) + ")";
+ }
+ case Type::X86_FP80TyID: return "Type::X86_FP80Ty";
+ case Type::FloatTyID: return "Type::FloatTy";
+ case Type::DoubleTyID: return "Type::DoubleTy";
+ case Type::LabelTyID: return "Type::LabelTy";
+ default:
+ error("Invalid primitive type");
+ break;
+ }
+ return "Type::VoidTy"; // shouldn't be returned, but make it sensible
+ }
+
+ // Now, see if we've seen the type before and return that
+ TypeMap::iterator I = TypeNames.find(Ty);
+ if (I != TypeNames.end())
+ return I->second;
+
+ // Okay, let's build a new name for this type. Start with a prefix
+ const char* prefix = 0;
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: prefix = "FuncTy_"; break;
+ case Type::StructTyID: prefix = "StructTy_"; break;
+ case Type::ArrayTyID: prefix = "ArrayTy_"; break;
+ case Type::PointerTyID: prefix = "PointerTy_"; break;
+ case Type::OpaqueTyID: prefix = "OpaqueTy_"; break;
+ case Type::VectorTyID: prefix = "VectorTy_"; break;
+ default: prefix = "OtherTy_"; break; // prevent breakage
+ }
+
+ // See if the type has a name in the symboltable and build accordingly
+ const std::string* tName = findTypeName(TheModule->getTypeSymbolTable(), Ty);
+ std::string name;
+ if (tName)
+ name = std::string(prefix) + *tName;
+ else
+ name = std::string(prefix) + utostr(uniqueNum++);
+ sanitize(name);
+
+ // Save the name
+ return TypeNames[Ty] = name;
+ }
+
+ void CppWriter::printCppName(const Type* Ty) {
+ printEscapedString(getCppName(Ty));
+ }
+
+ std::string CppWriter::getCppName(const Value* val) {
+ std::string name;
+ ValueMap::iterator I = ValueNames.find(val);
+ if (I != ValueNames.end() && I->first == val)
+ return I->second;
+
+ if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(val)) {
+ name = std::string("gvar_") +
+ getTypePrefix(GV->getType()->getElementType());
+ } else if (isa<Function>(val)) {
+ name = std::string("func_");
+ } else if (const Constant* C = dyn_cast<Constant>(val)) {
+ name = std::string("const_") + getTypePrefix(C->getType());
+ } else if (const Argument* Arg = dyn_cast<Argument>(val)) {
+ if (is_inline) {
+ unsigned argNum = std::distance(Arg->getParent()->arg_begin(),
+ Function::const_arg_iterator(Arg)) + 1;
+ name = std::string("arg_") + utostr(argNum);
+ NameSet::iterator NI = UsedNames.find(name);
+ if (NI != UsedNames.end())
+ name += std::string("_") + utostr(uniqueNum++);
+ UsedNames.insert(name);
+ return ValueNames[val] = name;
+ } else {
+ name = getTypePrefix(val->getType());
+ }
+ } else {
+ name = getTypePrefix(val->getType());
+ }
+ name += (val->hasName() ? val->getName() : utostr(uniqueNum++));
+ sanitize(name);
+ NameSet::iterator NI = UsedNames.find(name);
+ if (NI != UsedNames.end())
+ name += std::string("_") + utostr(uniqueNum++);
+ UsedNames.insert(name);
+ return ValueNames[val] = name;
+ }
+
+ void CppWriter::printCppName(const Value* val) {
+ printEscapedString(getCppName(val));
+ }
+
+ void CppWriter::printAttributes(const AttrListPtr &PAL,
+ const std::string &name) {
+ Out << "AttrListPtr " << name << "_PAL;";
+ nl(Out);
+ if (!PAL.isEmpty()) {
+ Out << '{'; in(); nl(Out);
+ Out << "SmallVector<AttributeWithIndex, 4> Attrs;"; nl(Out);
+ Out << "AttributeWithIndex PAWI;"; nl(Out);
+ for (unsigned i = 0; i < PAL.getNumSlots(); ++i) {
+ unsigned index = PAL.getSlot(i).Index;
+ Attributes attrs = PAL.getSlot(i).Attrs;
+ Out << "PAWI.Index = " << index << "U; PAWI.Attrs = 0 ";
+#define HANDLE_ATTR(X) \
+ if (attrs & Attribute::X) \
+ Out << " | Attribute::" #X; \
+ attrs &= ~Attribute::X;
+
+ HANDLE_ATTR(SExt);
+ HANDLE_ATTR(ZExt);
+ HANDLE_ATTR(NoReturn);
+ HANDLE_ATTR(InReg);
+ HANDLE_ATTR(StructRet);
+ HANDLE_ATTR(NoUnwind);
+ HANDLE_ATTR(NoAlias);
+ HANDLE_ATTR(ByVal);
+ HANDLE_ATTR(Nest);
+ HANDLE_ATTR(ReadNone);
+ HANDLE_ATTR(ReadOnly);
+ HANDLE_ATTR(NoInline);
+ HANDLE_ATTR(AlwaysInline);
+ HANDLE_ATTR(OptimizeForSize);
+ HANDLE_ATTR(StackProtect);
+ HANDLE_ATTR(StackProtectReq);
+ HANDLE_ATTR(NoCapture);
+#undef HANDLE_ATTR
+ assert(attrs == 0 && "Unhandled attribute!");
+ Out << ";";
+ nl(Out);
+ Out << "Attrs.push_back(PAWI);";
+ nl(Out);
+ }
+ Out << name << "_PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());";
+ nl(Out);
+ out(); nl(Out);
+ Out << '}'; nl(Out);
+ }
+ }
+
+ bool CppWriter::printTypeInternal(const Type* Ty) {
+ // We don't print definitions for primitive types
+ if (Ty->isPrimitiveType() || Ty->isInteger())
+ return false;
+
+ // If we already defined this type, we don't need to define it again.
+ if (DefinedTypes.find(Ty) != DefinedTypes.end())
+ return false;
+
+ // Everything below needs the name for the type so get it now.
+ std::string typeName(getCppName(Ty));
+
+ // Search the type stack for recursion. If we find it, then generate this
+ // as an OpaqueType, but make sure not to do this multiple times because
+ // the type could appear in multiple places on the stack. Once the opaque
+ // definition is issued, it must not be re-issued. Consequently we have to
+ // check the UnresolvedTypes list as well.
+ TypeList::const_iterator TI = std::find(TypeStack.begin(), TypeStack.end(),
+ Ty);
+ if (TI != TypeStack.end()) {
+ TypeMap::const_iterator I = UnresolvedTypes.find(Ty);
+ if (I == UnresolvedTypes.end()) {
+ Out << "PATypeHolder " << typeName << "_fwd = OpaqueType::get();";
+ nl(Out);
+ UnresolvedTypes[Ty] = typeName;
+ }
+ return true;
+ }
+
+ // We're going to print a derived type which, by definition, contains other
+ // types. So, push this one we're printing onto the type stack to assist with
+ // recursive definitions.
+ TypeStack.push_back(Ty);
+
+ // Print the type definition
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: {
+ const FunctionType* FT = cast<FunctionType>(Ty);
+ Out << "std::vector<const Type*>" << typeName << "_args;";
+ nl(Out);
+ FunctionType::param_iterator PI = FT->param_begin();
+ FunctionType::param_iterator PE = FT->param_end();
+ for (; PI != PE; ++PI) {
+ const Type* argTy = static_cast<const Type*>(*PI);
+ bool isForward = printTypeInternal(argTy);
+ std::string argName(getCppName(argTy));
+ Out << typeName << "_args.push_back(" << argName;
+ if (isForward)
+ Out << "_fwd";
+ Out << ");";
+ nl(Out);
+ }
+ bool isForward = printTypeInternal(FT->getReturnType());
+ std::string retTypeName(getCppName(FT->getReturnType()));
+ Out << "FunctionType* " << typeName << " = FunctionType::get(";
+ in(); nl(Out) << "/*Result=*/" << retTypeName;
+ if (isForward)
+ Out << "_fwd";
+ Out << ",";
+ nl(Out) << "/*Params=*/" << typeName << "_args,";
+ nl(Out) << "/*isVarArg=*/" << (FT->isVarArg() ? "true" : "false") << ");";
+ out();
+ nl(Out);
+ break;
+ }
+ case Type::StructTyID: {
+ const StructType* ST = cast<StructType>(Ty);
+ Out << "std::vector<const Type*>" << typeName << "_fields;";
+ nl(Out);
+ StructType::element_iterator EI = ST->element_begin();
+ StructType::element_iterator EE = ST->element_end();
+ for (; EI != EE; ++EI) {
+ const Type* fieldTy = static_cast<const Type*>(*EI);
+ bool isForward = printTypeInternal(fieldTy);
+ std::string fieldName(getCppName(fieldTy));
+ Out << typeName << "_fields.push_back(" << fieldName;
+ if (isForward)
+ Out << "_fwd";
+ Out << ");";
+ nl(Out);
+ }
+ Out << "StructType* " << typeName << " = StructType::get("
+ << typeName << "_fields, /*isPacked=*/"
+ << (ST->isPacked() ? "true" : "false") << ");";
+ nl(Out);
+ break;
+ }
+ case Type::ArrayTyID: {
+ const ArrayType* AT = cast<ArrayType>(Ty);
+ const Type* ET = AT->getElementType();
+ bool isForward = printTypeInternal(ET);
+ std::string elemName(getCppName(ET));
+ Out << "ArrayType* " << typeName << " = ArrayType::get("
+ << elemName << (isForward ? "_fwd" : "")
+ << ", " << utostr(AT->getNumElements()) << ");";
+ nl(Out);
+ break;
+ }
+ case Type::PointerTyID: {
+ const PointerType* PT = cast<PointerType>(Ty);
+ const Type* ET = PT->getElementType();
+ bool isForward = printTypeInternal(ET);
+ std::string elemName(getCppName(ET));
+ Out << "PointerType* " << typeName << " = PointerType::get("
+ << elemName << (isForward ? "_fwd" : "")
+ << ", " << utostr(PT->getAddressSpace()) << ");";
+ nl(Out);
+ break;
+ }
+ case Type::VectorTyID: {
+ const VectorType* PT = cast<VectorType>(Ty);
+ const Type* ET = PT->getElementType();
+ bool isForward = printTypeInternal(ET);
+ std::string elemName(getCppName(ET));
+ Out << "VectorType* " << typeName << " = VectorType::get("
+ << elemName << (isForward ? "_fwd" : "")
+ << ", " << utostr(PT->getNumElements()) << ");";
+ nl(Out);
+ break;
+ }
+ case Type::OpaqueTyID: {
+ Out << "OpaqueType* " << typeName << " = OpaqueType::get();";
+ nl(Out);
+ break;
+ }
+ default:
+ error("Invalid TypeID");
+ }
+
+ // If the type had a name, make sure we recreate it.
+ const std::string* progTypeName =
+ findTypeName(TheModule->getTypeSymbolTable(),Ty);
+ if (progTypeName) {
+ Out << "mod->addTypeName(\"" << *progTypeName << "\", "
+ << typeName << ");";
+ nl(Out);
+ }
+
+ // Pop us off the type stack
+ TypeStack.pop_back();
+
+ // Indicate that this type is now defined.
+ DefinedTypes.insert(Ty);
+
+ // Early resolve as many unresolved types as possible. Search the unresolved
+ // types map for the type we just printed. Now that its definition is complete
+ // we can resolve any previous references to it. This prevents a cascade of
+ // unresolved types.
+ TypeMap::iterator I = UnresolvedTypes.find(Ty);
+ if (I != UnresolvedTypes.end()) {
+ Out << "cast<OpaqueType>(" << I->second
+ << "_fwd.get())->refineAbstractTypeTo(" << I->second << ");";
+ nl(Out);
+ Out << I->second << " = cast<";
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: Out << "FunctionType"; break;
+ case Type::ArrayTyID: Out << "ArrayType"; break;
+ case Type::StructTyID: Out << "StructType"; break;
+ case Type::VectorTyID: Out << "VectorType"; break;
+ case Type::PointerTyID: Out << "PointerType"; break;
+ case Type::OpaqueTyID: Out << "OpaqueType"; break;
+ default: Out << "NoSuchDerivedType"; break;
+ }
+ Out << ">(" << I->second << "_fwd.get());";
+ nl(Out); nl(Out);
+ UnresolvedTypes.erase(I);
+ }
+
+ // Finally, separate the type definition from other with a newline.
+ nl(Out);
+
+ // We weren't a recursive type
+ return false;
+ }
+
+ // Prints a type definition. Returns true if it could not resolve all the
+ // types in the definition but had to use a forward reference.
+ void CppWriter::printType(const Type* Ty) {
+ assert(TypeStack.empty());
+ TypeStack.clear();
+ printTypeInternal(Ty);
+ assert(TypeStack.empty());
+ }
+
+ void CppWriter::printTypes(const Module* M) {
+ // Walk the symbol table and print out all its types
+ const TypeSymbolTable& symtab = M->getTypeSymbolTable();
+ for (TypeSymbolTable::const_iterator TI = symtab.begin(), TE = symtab.end();
+ TI != TE; ++TI) {
+
+ // For primitive types and types already defined, just add a name
+ TypeMap::const_iterator TNI = TypeNames.find(TI->second);
+ if (TI->second->isInteger() || TI->second->isPrimitiveType() ||
+ TNI != TypeNames.end()) {
+ Out << "mod->addTypeName(\"";
+ printEscapedString(TI->first);
+ Out << "\", " << getCppName(TI->second) << ");";
+ nl(Out);
+ // For everything else, define the type
+ } else {
+ printType(TI->second);
+ }
+ }
+
+ // Add all of the global variables to the value table...
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I) {
+ if (I->hasInitializer())
+ printType(I->getInitializer()->getType());
+ printType(I->getType());
+ }
+
+ // Add all the functions to the table
+ for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end();
+ FI != FE; ++FI) {
+ printType(FI->getReturnType());
+ printType(FI->getFunctionType());
+ // Add all the function arguments
+ for (Function::const_arg_iterator AI = FI->arg_begin(),
+ AE = FI->arg_end(); AI != AE; ++AI) {
+ printType(AI->getType());
+ }
+
+ // Add all of the basic blocks and instructions
+ for (Function::const_iterator BB = FI->begin(),
+ E = FI->end(); BB != E; ++BB) {
+ printType(BB->getType());
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;
+ ++I) {
+ printType(I->getType());
+ for (unsigned i = 0; i < I->getNumOperands(); ++i)
+ printType(I->getOperand(i)->getType());
+ }
+ }
+ }
+ }
+
+
+ // printConstant - Print out a constant pool entry...
+ void CppWriter::printConstant(const Constant *CV) {
+ // First, if the constant is actually a GlobalValue (variable or function)
+ // or its already in the constant list then we've printed it already and we
+ // can just return.
+ if (isa<GlobalValue>(CV) || ValueNames.find(CV) != ValueNames.end())
+ return;
+
+ std::string constName(getCppName(CV));
+ std::string typeName(getCppName(CV->getType()));
+
+ if (isa<GlobalValue>(CV)) {
+ // Skip variables and functions, we emit them elsewhere
+ return;
+ }
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ std::string constValue = CI->getValue().toString(10, true);
+ Out << "ConstantInt* " << constName << " = ConstantInt::get(APInt("
+ << cast<IntegerType>(CI->getType())->getBitWidth() << ", \""
+ << constValue << "\", " << constValue.length() << ", 10));";
+ } else if (isa<ConstantAggregateZero>(CV)) {
+ Out << "ConstantAggregateZero* " << constName
+ << " = ConstantAggregateZero::get(" << typeName << ");";
+ } else if (isa<ConstantPointerNull>(CV)) {
+ Out << "ConstantPointerNull* " << constName
+ << " = ConstantPointerNull::get(" << typeName << ");";
+ } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+ Out << "ConstantFP* " << constName << " = ";
+ printCFP(CFP);
+ Out << ";";
+ } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
+ if (CA->isString() && CA->getType()->getElementType() == Type::Int8Ty) {
+ Out << "Constant* " << constName << " = ConstantArray::get(\"";
+ std::string tmp = CA->getAsString();
+ bool nullTerminate = false;
+ if (tmp[tmp.length()-1] == 0) {
+ tmp.erase(tmp.length()-1);
+ nullTerminate = true;
+ }
+ printEscapedString(tmp);
+ // Determine if we want null termination or not.
+ if (nullTerminate)
+ Out << "\", true"; // Indicate that the null terminator should be
+ // added.
+ else
+ Out << "\", false";// No null terminator
+ Out << ");";
+ } else {
+ Out << "std::vector<Constant*> " << constName << "_elems;";
+ nl(Out);
+ unsigned N = CA->getNumOperands();
+ for (unsigned i = 0; i < N; ++i) {
+ printConstant(CA->getOperand(i)); // recurse to print operands
+ Out << constName << "_elems.push_back("
+ << getCppName(CA->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName << " = ConstantArray::get("
+ << typeName << ", " << constName << "_elems);";
+ }
+ } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
+ Out << "std::vector<Constant*> " << constName << "_fields;";
+ nl(Out);
+ unsigned N = CS->getNumOperands();
+ for (unsigned i = 0; i < N; i++) {
+ printConstant(CS->getOperand(i));
+ Out << constName << "_fields.push_back("
+ << getCppName(CS->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName << " = ConstantStruct::get("
+ << typeName << ", " << constName << "_fields);";
+ } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+ Out << "std::vector<Constant*> " << constName << "_elems;";
+ nl(Out);
+ unsigned N = CP->getNumOperands();
+ for (unsigned i = 0; i < N; ++i) {
+ printConstant(CP->getOperand(i));
+ Out << constName << "_elems.push_back("
+ << getCppName(CP->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName << " = ConstantVector::get("
+ << typeName << ", " << constName << "_elems);";
+ } else if (isa<UndefValue>(CV)) {
+ Out << "UndefValue* " << constName << " = UndefValue::get("
+ << typeName << ");";
+ } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ Out << "std::vector<Constant*> " << constName << "_indices;";
+ nl(Out);
+ printConstant(CE->getOperand(0));
+ for (unsigned i = 1; i < CE->getNumOperands(); ++i ) {
+ printConstant(CE->getOperand(i));
+ Out << constName << "_indices.push_back("
+ << getCppName(CE->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName
+ << " = ConstantExpr::getGetElementPtr("
+ << getCppName(CE->getOperand(0)) << ", "
+ << "&" << constName << "_indices[0], "
+ << constName << "_indices.size()"
+ << " );";
+ } else if (CE->isCast()) {
+ printConstant(CE->getOperand(0));
+ Out << "Constant* " << constName << " = ConstantExpr::getCast(";
+ switch (CE->getOpcode()) {
+ default: assert(0 && "Invalid cast opcode");
+ case Instruction::Trunc: Out << "Instruction::Trunc"; break;
+ case Instruction::ZExt: Out << "Instruction::ZExt"; break;
+ case Instruction::SExt: Out << "Instruction::SExt"; break;
+ case Instruction::FPTrunc: Out << "Instruction::FPTrunc"; break;
+ case Instruction::FPExt: Out << "Instruction::FPExt"; break;
+ case Instruction::FPToUI: Out << "Instruction::FPToUI"; break;
+ case Instruction::FPToSI: Out << "Instruction::FPToSI"; break;
+ case Instruction::UIToFP: Out << "Instruction::UIToFP"; break;
+ case Instruction::SIToFP: Out << "Instruction::SIToFP"; break;
+ case Instruction::PtrToInt: Out << "Instruction::PtrToInt"; break;
+ case Instruction::IntToPtr: Out << "Instruction::IntToPtr"; break;
+ case Instruction::BitCast: Out << "Instruction::BitCast"; break;
+ }
+ Out << ", " << getCppName(CE->getOperand(0)) << ", "
+ << getCppName(CE->getType()) << ");";
+ } else {
+ unsigned N = CE->getNumOperands();
+ for (unsigned i = 0; i < N; ++i ) {
+ printConstant(CE->getOperand(i));
+ }
+ Out << "Constant* " << constName << " = ConstantExpr::";
+ switch (CE->getOpcode()) {
+ case Instruction::Add: Out << "getAdd("; break;
+ case Instruction::Sub: Out << "getSub("; break;
+ case Instruction::Mul: Out << "getMul("; break;
+ case Instruction::UDiv: Out << "getUDiv("; break;
+ case Instruction::SDiv: Out << "getSDiv("; break;
+ case Instruction::FDiv: Out << "getFDiv("; break;
+ case Instruction::URem: Out << "getURem("; break;
+ case Instruction::SRem: Out << "getSRem("; break;
+ case Instruction::FRem: Out << "getFRem("; break;
+ case Instruction::And: Out << "getAnd("; break;
+ case Instruction::Or: Out << "getOr("; break;
+ case Instruction::Xor: Out << "getXor("; break;
+ case Instruction::ICmp:
+ Out << "getICmp(ICmpInst::ICMP_";
+ switch (CE->getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << "EQ"; break;
+ case ICmpInst::ICMP_NE: Out << "NE"; break;
+ case ICmpInst::ICMP_SLT: Out << "SLT"; break;
+ case ICmpInst::ICMP_ULT: Out << "ULT"; break;
+ case ICmpInst::ICMP_SGT: Out << "SGT"; break;
+ case ICmpInst::ICMP_UGT: Out << "UGT"; break;
+ case ICmpInst::ICMP_SLE: Out << "SLE"; break;
+ case ICmpInst::ICMP_ULE: Out << "ULE"; break;
+ case ICmpInst::ICMP_SGE: Out << "SGE"; break;
+ case ICmpInst::ICMP_UGE: Out << "UGE"; break;
+ default: error("Invalid ICmp Predicate");
+ }
+ break;
+ case Instruction::FCmp:
+ Out << "getFCmp(FCmpInst::FCMP_";
+ switch (CE->getPredicate()) {
+ case FCmpInst::FCMP_FALSE: Out << "FALSE"; break;
+ case FCmpInst::FCMP_ORD: Out << "ORD"; break;
+ case FCmpInst::FCMP_UNO: Out << "UNO"; break;
+ case FCmpInst::FCMP_OEQ: Out << "OEQ"; break;
+ case FCmpInst::FCMP_UEQ: Out << "UEQ"; break;
+ case FCmpInst::FCMP_ONE: Out << "ONE"; break;
+ case FCmpInst::FCMP_UNE: Out << "UNE"; break;
+ case FCmpInst::FCMP_OLT: Out << "OLT"; break;
+ case FCmpInst::FCMP_ULT: Out << "ULT"; break;
+ case FCmpInst::FCMP_OGT: Out << "OGT"; break;
+ case FCmpInst::FCMP_UGT: Out << "UGT"; break;
+ case FCmpInst::FCMP_OLE: Out << "OLE"; break;
+ case FCmpInst::FCMP_ULE: Out << "ULE"; break;
+ case FCmpInst::FCMP_OGE: Out << "OGE"; break;
+ case FCmpInst::FCMP_UGE: Out << "UGE"; break;
+ case FCmpInst::FCMP_TRUE: Out << "TRUE"; break;
+ default: error("Invalid FCmp Predicate");
+ }
+ break;
+ case Instruction::Shl: Out << "getShl("; break;
+ case Instruction::LShr: Out << "getLShr("; break;
+ case Instruction::AShr: Out << "getAShr("; break;
+ case Instruction::Select: Out << "getSelect("; break;
+ case Instruction::ExtractElement: Out << "getExtractElement("; break;
+ case Instruction::InsertElement: Out << "getInsertElement("; break;
+ case Instruction::ShuffleVector: Out << "getShuffleVector("; break;
+ default:
+ error("Invalid constant expression");
+ break;
+ }
+ Out << getCppName(CE->getOperand(0));
+ for (unsigned i = 1; i < CE->getNumOperands(); ++i)
+ Out << ", " << getCppName(CE->getOperand(i));
+ Out << ");";
+ }
+ } else {
+ error("Bad Constant");
+ Out << "Constant* " << constName << " = 0; ";
+ }
+ nl(Out);
+ }
+
+ void CppWriter::printConstants(const Module* M) {
+ // Traverse all the global variables looking for constant initializers
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I)
+ if (I->hasInitializer())
+ printConstant(I->getInitializer());
+
+ // Traverse the LLVM functions looking for constants
+ for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end();
+ FI != FE; ++FI) {
+ // Add all of the basic blocks and instructions
+ for (Function::const_iterator BB = FI->begin(),
+ E = FI->end(); BB != E; ++BB) {
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;
+ ++I) {
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ if (Constant* C = dyn_cast<Constant>(I->getOperand(i))) {
+ printConstant(C);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ void CppWriter::printVariableUses(const GlobalVariable *GV) {
+ nl(Out) << "// Type Definitions";
+ nl(Out);
+ printType(GV->getType());
+ if (GV->hasInitializer()) {
+ Constant* Init = GV->getInitializer();
+ printType(Init->getType());
+ if (Function* F = dyn_cast<Function>(Init)) {
+ nl(Out)<< "/ Function Declarations"; nl(Out);
+ printFunctionHead(F);
+ } else if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
+ nl(Out) << "// Global Variable Declarations"; nl(Out);
+ printVariableHead(gv);
+ } else {
+ nl(Out) << "// Constant Definitions"; nl(Out);
+ printConstant(gv);
+ }
+ if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
+ nl(Out) << "// Global Variable Definitions"; nl(Out);
+ printVariableBody(gv);
+ }
+ }
+ }
+
+ void CppWriter::printVariableHead(const GlobalVariable *GV) {
+ nl(Out) << "GlobalVariable* " << getCppName(GV);
+ if (is_inline) {
+ Out << " = mod->getGlobalVariable(";
+ printEscapedString(GV->getName());
+ Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)";
+ nl(Out) << "if (!" << getCppName(GV) << ") {";
+ in(); nl(Out) << getCppName(GV);
+ }
+ Out << " = new GlobalVariable(";
+ nl(Out) << "/*Type=*/";
+ printCppName(GV->getType()->getElementType());
+ Out << ",";
+ nl(Out) << "/*isConstant=*/" << (GV->isConstant()?"true":"false");
+ Out << ",";
+ nl(Out) << "/*Linkage=*/";
+ printLinkageType(GV->getLinkage());
+ Out << ",";
+ nl(Out) << "/*Initializer=*/0, ";
+ if (GV->hasInitializer()) {
+ Out << "// has initializer, specified below";
+ }
+ nl(Out) << "/*Name=*/\"";
+ printEscapedString(GV->getName());
+ Out << "\",";
+ nl(Out) << "mod);";
+ nl(Out);
+
+ if (GV->hasSection()) {
+ printCppName(GV);
+ Out << "->setSection(\"";
+ printEscapedString(GV->getSection());
+ Out << "\");";
+ nl(Out);
+ }
+ if (GV->getAlignment()) {
+ printCppName(GV);
+ Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");";
+ nl(Out);
+ }
+ if (GV->getVisibility() != GlobalValue::DefaultVisibility) {
+ printCppName(GV);
+ Out << "->setVisibility(";
+ printVisibilityType(GV->getVisibility());
+ Out << ");";
+ nl(Out);
+ }
+ if (is_inline) {
+ out(); Out << "}"; nl(Out);
+ }
+ }
+
+ void CppWriter::printVariableBody(const GlobalVariable *GV) {
+ if (GV->hasInitializer()) {
+ printCppName(GV);
+ Out << "->setInitializer(";
+ Out << getCppName(GV->getInitializer()) << ");";
+ nl(Out);
+ }
+ }
+
+ std::string CppWriter::getOpName(Value* V) {
+ if (!isa<Instruction>(V) || DefinedValues.find(V) != DefinedValues.end())
+ return getCppName(V);
+
+ // See if its alread in the map of forward references, if so just return the
+ // name we already set up for it
+ ForwardRefMap::const_iterator I = ForwardRefs.find(V);
+ if (I != ForwardRefs.end())
+ return I->second;
+
+ // This is a new forward reference. Generate a unique name for it
+ std::string result(std::string("fwdref_") + utostr(uniqueNum++));
+
+ // Yes, this is a hack. An Argument is the smallest instantiable value that
+ // we can make as a placeholder for the real value. We'll replace these
+ // Argument instances later.
+ Out << "Argument* " << result << " = new Argument("
+ << getCppName(V->getType()) << ");";
+ nl(Out);
+ ForwardRefs[V] = result;
+ return result;
+ }
+
+ // printInstruction - This member is called for each Instruction in a function.
+ void CppWriter::printInstruction(const Instruction *I,
+ const std::string& bbname) {
+ std::string iName(getCppName(I));
+
+ // Before we emit this instruction, we need to take care of generating any
+ // forward references. So, we get the names of all the operands in advance
+ std::string* opNames = new std::string[I->getNumOperands()];
+ for (unsigned i = 0; i < I->getNumOperands(); i++) {
+ opNames[i] = getOpName(I->getOperand(i));
+ }
+
+ switch (I->getOpcode()) {
+ default:
+ error("Invalid instruction");
+ break;
+
+ case Instruction::Ret: {
+ const ReturnInst* ret = cast<ReturnInst>(I);
+ Out << "ReturnInst::Create("
+ << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");";
+ break;
+ }
+ case Instruction::Br: {
+ const BranchInst* br = cast<BranchInst>(I);
+ Out << "BranchInst::Create(" ;
+ if (br->getNumOperands() == 3 ) {
+ Out << opNames[2] << ", "
+ << opNames[1] << ", "
+ << opNames[0] << ", ";
+
+ } else if (br->getNumOperands() == 1) {
+ Out << opNames[0] << ", ";
+ } else {
+ error("Branch with 2 operands?");
+ }
+ Out << bbname << ");";
+ break;
+ }
+ case Instruction::Switch: {
+ const SwitchInst* sw = cast<SwitchInst>(I);
+ Out << "SwitchInst* " << iName << " = SwitchInst::Create("
+ << opNames[0] << ", "
+ << opNames[1] << ", "
+ << sw->getNumCases() << ", " << bbname << ");";
+ nl(Out);
+ for (unsigned i = 2; i < sw->getNumOperands(); i += 2 ) {
+ Out << iName << "->addCase("
+ << opNames[i] << ", "
+ << opNames[i+1] << ");";
+ nl(Out);
+ }
+ break;
+ }
+ case Instruction::Invoke: {
+ const InvokeInst* inv = cast<InvokeInst>(I);
+ Out << "std::vector<Value*> " << iName << "_params;";
+ nl(Out);
+ for (unsigned i = 3; i < inv->getNumOperands(); ++i) {
+ Out << iName << "_params.push_back("
+ << opNames[i] << ");";
+ nl(Out);
+ }
+ Out << "InvokeInst *" << iName << " = InvokeInst::Create("
+ << opNames[0] << ", "
+ << opNames[1] << ", "
+ << opNames[2] << ", "
+ << iName << "_params.begin(), " << iName << "_params.end(), \"";
+ printEscapedString(inv->getName());
+ Out << "\", " << bbname << ");";
+ nl(Out) << iName << "->setCallingConv(";
+ printCallingConv(inv->getCallingConv());
+ Out << ");";
+ printAttributes(inv->getAttributes(), iName);
+ Out << iName << "->setAttributes(" << iName << "_PAL);";
+ nl(Out);
+ break;
+ }
+ case Instruction::Unwind: {
+ Out << "new UnwindInst("
+ << bbname << ");";
+ break;
+ }
+ case Instruction::Unreachable:{
+ Out << "new UnreachableInst("
+ << bbname << ");";
+ break;
+ }
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:{
+ Out << "BinaryOperator* " << iName << " = BinaryOperator::Create(";
+ switch (I->getOpcode()) {
+ case Instruction::Add: Out << "Instruction::Add"; break;
+ case Instruction::Sub: Out << "Instruction::Sub"; break;
+ case Instruction::Mul: Out << "Instruction::Mul"; break;
+ case Instruction::UDiv:Out << "Instruction::UDiv"; break;
+ case Instruction::SDiv:Out << "Instruction::SDiv"; break;
+ case Instruction::FDiv:Out << "Instruction::FDiv"; break;
+ case Instruction::URem:Out << "Instruction::URem"; break;
+ case Instruction::SRem:Out << "Instruction::SRem"; break;
+ case Instruction::FRem:Out << "Instruction::FRem"; break;
+ case Instruction::And: Out << "Instruction::And"; break;
+ case Instruction::Or: Out << "Instruction::Or"; break;
+ case Instruction::Xor: Out << "Instruction::Xor"; break;
+ case Instruction::Shl: Out << "Instruction::Shl"; break;
+ case Instruction::LShr:Out << "Instruction::LShr"; break;
+ case Instruction::AShr:Out << "Instruction::AShr"; break;
+ default: Out << "Instruction::BadOpCode"; break;
+ }
+ Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+ printEscapedString(I->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::FCmp: {
+ Out << "FCmpInst* " << iName << " = new FCmpInst(";
+ switch (cast<FCmpInst>(I)->getPredicate()) {
+ case FCmpInst::FCMP_FALSE: Out << "FCmpInst::FCMP_FALSE"; break;
+ case FCmpInst::FCMP_OEQ : Out << "FCmpInst::FCMP_OEQ"; break;
+ case FCmpInst::FCMP_OGT : Out << "FCmpInst::FCMP_OGT"; break;
+ case FCmpInst::FCMP_OGE : Out << "FCmpInst::FCMP_OGE"; break;
+ case FCmpInst::FCMP_OLT : Out << "FCmpInst::FCMP_OLT"; break;
+ case FCmpInst::FCMP_OLE : Out << "FCmpInst::FCMP_OLE"; break;
+ case FCmpInst::FCMP_ONE : Out << "FCmpInst::FCMP_ONE"; break;
+ case FCmpInst::FCMP_ORD : Out << "FCmpInst::FCMP_ORD"; break;
+ case FCmpInst::FCMP_UNO : Out << "FCmpInst::FCMP_UNO"; break;
+ case FCmpInst::FCMP_UEQ : Out << "FCmpInst::FCMP_UEQ"; break;
+ case FCmpInst::FCMP_UGT : Out << "FCmpInst::FCMP_UGT"; break;
+ case FCmpInst::FCMP_UGE : Out << "FCmpInst::FCMP_UGE"; break;
+ case FCmpInst::FCMP_ULT : Out << "FCmpInst::FCMP_ULT"; break;
+ case FCmpInst::FCMP_ULE : Out << "FCmpInst::FCMP_ULE"; break;
+ case FCmpInst::FCMP_UNE : Out << "FCmpInst::FCMP_UNE"; break;
+ case FCmpInst::FCMP_TRUE : Out << "FCmpInst::FCMP_TRUE"; break;
+ default: Out << "FCmpInst::BAD_ICMP_PREDICATE"; break;
+ }
+ Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+ printEscapedString(I->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::ICmp: {
+ Out << "ICmpInst* " << iName << " = new ICmpInst(";
+ switch (cast<ICmpInst>(I)->getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << "ICmpInst::ICMP_EQ"; break;
+ case ICmpInst::ICMP_NE: Out << "ICmpInst::ICMP_NE"; break;
+ case ICmpInst::ICMP_ULE: Out << "ICmpInst::ICMP_ULE"; break;
+ case ICmpInst::ICMP_SLE: Out << "ICmpInst::ICMP_SLE"; break;
+ case ICmpInst::ICMP_UGE: Out << "ICmpInst::ICMP_UGE"; break;
+ case ICmpInst::ICMP_SGE: Out << "ICmpInst::ICMP_SGE"; break;
+ case ICmpInst::ICMP_ULT: Out << "ICmpInst::ICMP_ULT"; break;
+ case ICmpInst::ICMP_SLT: Out << "ICmpInst::ICMP_SLT"; break;
+ case ICmpInst::ICMP_UGT: Out << "ICmpInst::ICMP_UGT"; break;
+ case ICmpInst::ICMP_SGT: Out << "ICmpInst::ICMP_SGT"; break;
+ default: Out << "ICmpInst::BAD_ICMP_PREDICATE"; break;
+ }
+ Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+ printEscapedString(I->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::Malloc: {
+ const MallocInst* mallocI = cast<MallocInst>(I);
+ Out << "MallocInst* " << iName << " = new MallocInst("
+ << getCppName(mallocI->getAllocatedType()) << ", ";
+ if (mallocI->isArrayAllocation())
+ Out << opNames[0] << ", " ;
+ Out << "\"";
+ printEscapedString(mallocI->getName());
+ Out << "\", " << bbname << ");";
+ if (mallocI->getAlignment())
+ nl(Out) << iName << "->setAlignment("
+ << mallocI->getAlignment() << ");";
+ break;
+ }
+ case Instruction::Free: {
+ Out << "FreeInst* " << iName << " = new FreeInst("
+ << getCppName(I->getOperand(0)) << ", " << bbname << ");";
+ break;
+ }
+ case Instruction::Alloca: {
+ const AllocaInst* allocaI = cast<AllocaInst>(I);
+ Out << "AllocaInst* " << iName << " = new AllocaInst("
+ << getCppName(allocaI->getAllocatedType()) << ", ";
+ if (allocaI->isArrayAllocation())
+ Out << opNames[0] << ", ";
+ Out << "\"";
+ printEscapedString(allocaI->getName());
+ Out << "\", " << bbname << ");";
+ if (allocaI->getAlignment())
+ nl(Out) << iName << "->setAlignment("
+ << allocaI->getAlignment() << ");";
+ break;
+ }
+ case Instruction::Load:{
+ const LoadInst* load = cast<LoadInst>(I);
+ Out << "LoadInst* " << iName << " = new LoadInst("
+ << opNames[0] << ", \"";
+ printEscapedString(load->getName());
+ Out << "\", " << (load->isVolatile() ? "true" : "false" )
+ << ", " << bbname << ");";
+ break;
+ }
+ case Instruction::Store: {
+ const StoreInst* store = cast<StoreInst>(I);
+ Out << " new StoreInst("
+ << opNames[0] << ", "
+ << opNames[1] << ", "
+ << (store->isVolatile() ? "true" : "false")
+ << ", " << bbname << ");";
+ break;
+ }
+ case Instruction::GetElementPtr: {
+ const GetElementPtrInst* gep = cast<GetElementPtrInst>(I);
+ if (gep->getNumOperands() <= 2) {
+ Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create("
+ << opNames[0];
+ if (gep->getNumOperands() == 2)
+ Out << ", " << opNames[1];
+ } else {
+ Out << "std::vector<Value*> " << iName << "_indices;";
+ nl(Out);
+ for (unsigned i = 1; i < gep->getNumOperands(); ++i ) {
+ Out << iName << "_indices.push_back("
+ << opNames[i] << ");";
+ nl(Out);
+ }
+ Out << "Instruction* " << iName << " = GetElementPtrInst::Create("
+ << opNames[0] << ", " << iName << "_indices.begin(), "
+ << iName << "_indices.end()";
+ }
+ Out << ", \"";
+ printEscapedString(gep->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::PHI: {
+ const PHINode* phi = cast<PHINode>(I);
+
+ Out << "PHINode* " << iName << " = PHINode::Create("
+ << getCppName(phi->getType()) << ", \"";
+ printEscapedString(phi->getName());
+ Out << "\", " << bbname << ");";
+ nl(Out) << iName << "->reserveOperandSpace("
+ << phi->getNumIncomingValues()
+ << ");";
+ nl(Out);
+ for (unsigned i = 0; i < phi->getNumOperands(); i+=2) {
+ Out << iName << "->addIncoming("
+ << opNames[i] << ", " << opNames[i+1] << ");";
+ nl(Out);
+ }
+ break;
+ }
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast: {
+ const CastInst* cst = cast<CastInst>(I);
+ Out << "CastInst* " << iName << " = new ";
+ switch (I->getOpcode()) {
+ case Instruction::Trunc: Out << "TruncInst"; break;
+ case Instruction::ZExt: Out << "ZExtInst"; break;
+ case Instruction::SExt: Out << "SExtInst"; break;
+ case Instruction::FPTrunc: Out << "FPTruncInst"; break;
+ case Instruction::FPExt: Out << "FPExtInst"; break;
+ case Instruction::FPToUI: Out << "FPToUIInst"; break;
+ case Instruction::FPToSI: Out << "FPToSIInst"; break;
+ case Instruction::UIToFP: Out << "UIToFPInst"; break;
+ case Instruction::SIToFP: Out << "SIToFPInst"; break;
+ case Instruction::PtrToInt: Out << "PtrToIntInst"; break;
+ case Instruction::IntToPtr: Out << "IntToPtrInst"; break;
+ case Instruction::BitCast: Out << "BitCastInst"; break;
+ default: assert(!"Unreachable"); break;
+ }
+ Out << "(" << opNames[0] << ", "
+ << getCppName(cst->getType()) << ", \"";
+ printEscapedString(cst->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::Call:{
+ const CallInst* call = cast<CallInst>(I);
+ if (const InlineAsm* ila = dyn_cast<InlineAsm>(call->getCalledValue())) {
+ Out << "InlineAsm* " << getCppName(ila) << " = InlineAsm::get("
+ << getCppName(ila->getFunctionType()) << ", \""
+ << ila->getAsmString() << "\", \""
+ << ila->getConstraintString() << "\","
+ << (ila->hasSideEffects() ? "true" : "false") << ");";
+ nl(Out);
+ }
+ if (call->getNumOperands() > 2) {
+ Out << "std::vector<Value*> " << iName << "_params;";
+ nl(Out);
+ for (unsigned i = 1; i < call->getNumOperands(); ++i) {
+ Out << iName << "_params.push_back(" << opNames[i] << ");";
+ nl(Out);
+ }
+ Out << "CallInst* " << iName << " = CallInst::Create("
+ << opNames[0] << ", " << iName << "_params.begin(), "
+ << iName << "_params.end(), \"";
+ } else if (call->getNumOperands() == 2) {
+ Out << "CallInst* " << iName << " = CallInst::Create("
+ << opNames[0] << ", " << opNames[1] << ", \"";
+ } else {
+ Out << "CallInst* " << iName << " = CallInst::Create(" << opNames[0]
+ << ", \"";
+ }
+ printEscapedString(call->getName());
+ Out << "\", " << bbname << ");";
+ nl(Out) << iName << "->setCallingConv(";
+ printCallingConv(call->getCallingConv());
+ Out << ");";
+ nl(Out) << iName << "->setTailCall("
+ << (call->isTailCall() ? "true":"false");
+ Out << ");";
+ printAttributes(call->getAttributes(), iName);
+ Out << iName << "->setAttributes(" << iName << "_PAL);";
+ nl(Out);
+ break;
+ }
+ case Instruction::Select: {
+ const SelectInst* sel = cast<SelectInst>(I);
+ Out << "SelectInst* " << getCppName(sel) << " = SelectInst::Create(";
+ Out << opNames[0] << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+ printEscapedString(sel->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::UserOp1:
+ /// FALL THROUGH
+ case Instruction::UserOp2: {
+ /// FIXME: What should be done here?
+ break;
+ }
+ case Instruction::VAArg: {
+ const VAArgInst* va = cast<VAArgInst>(I);
+ Out << "VAArgInst* " << getCppName(va) << " = new VAArgInst("
+ << opNames[0] << ", " << getCppName(va->getType()) << ", \"";
+ printEscapedString(va->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::ExtractElement: {
+ const ExtractElementInst* eei = cast<ExtractElementInst>(I);
+ Out << "ExtractElementInst* " << getCppName(eei)
+ << " = new ExtractElementInst(" << opNames[0]
+ << ", " << opNames[1] << ", \"";
+ printEscapedString(eei->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::InsertElement: {
+ const InsertElementInst* iei = cast<InsertElementInst>(I);
+ Out << "InsertElementInst* " << getCppName(iei)
+ << " = InsertElementInst::Create(" << opNames[0]
+ << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+ printEscapedString(iei->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::ShuffleVector: {
+ const ShuffleVectorInst* svi = cast<ShuffleVectorInst>(I);
+ Out << "ShuffleVectorInst* " << getCppName(svi)
+ << " = new ShuffleVectorInst(" << opNames[0]
+ << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+ printEscapedString(svi->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::ExtractValue: {
+ const ExtractValueInst *evi = cast<ExtractValueInst>(I);
+ Out << "std::vector<unsigned> " << iName << "_indices;";
+ nl(Out);
+ for (unsigned i = 0; i < evi->getNumIndices(); ++i) {
+ Out << iName << "_indices.push_back("
+ << evi->idx_begin()[i] << ");";
+ nl(Out);
+ }
+ Out << "ExtractValueInst* " << getCppName(evi)
+ << " = ExtractValueInst::Create(" << opNames[0]
+ << ", "
+ << iName << "_indices.begin(), " << iName << "_indices.end(), \"";
+ printEscapedString(evi->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::InsertValue: {
+ const InsertValueInst *ivi = cast<InsertValueInst>(I);
+ Out << "std::vector<unsigned> " << iName << "_indices;";
+ nl(Out);
+ for (unsigned i = 0; i < ivi->getNumIndices(); ++i) {
+ Out << iName << "_indices.push_back("
+ << ivi->idx_begin()[i] << ");";
+ nl(Out);
+ }
+ Out << "InsertValueInst* " << getCppName(ivi)
+ << " = InsertValueInst::Create(" << opNames[0]
+ << ", " << opNames[1] << ", "
+ << iName << "_indices.begin(), " << iName << "_indices.end(), \"";
+ printEscapedString(ivi->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ }
+ DefinedValues.insert(I);
+ nl(Out);
+ delete [] opNames;
+}
+
+ // Print out the types, constants and declarations needed by one function
+ void CppWriter::printFunctionUses(const Function* F) {
+ nl(Out) << "// Type Definitions"; nl(Out);
+ if (!is_inline) {
+ // Print the function's return type
+ printType(F->getReturnType());
+
+ // Print the function's function type
+ printType(F->getFunctionType());
+
+ // Print the types of each of the function's arguments
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ printType(AI->getType());
+ }
+ }
+
+ // Print type definitions for every type referenced by an instruction and
+ // make a note of any global values or constants that are referenced
+ SmallPtrSet<GlobalValue*,64> gvs;
+ SmallPtrSet<Constant*,64> consts;
+ for (Function::const_iterator BB = F->begin(), BE = F->end();
+ BB != BE; ++BB){
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ // Print the type of the instruction itself
+ printType(I->getType());
+
+ // Print the type of each of the instruction's operands
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ Value* operand = I->getOperand(i);
+ printType(operand->getType());
+
+ // If the operand references a GVal or Constant, make a note of it
+ if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) {
+ gvs.insert(GV);
+ if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->hasInitializer())
+ consts.insert(GVar->getInitializer());
+ } else if (Constant* C = dyn_cast<Constant>(operand))
+ consts.insert(C);
+ }
+ }
+ }
+
+ // Print the function declarations for any functions encountered
+ nl(Out) << "// Function Declarations"; nl(Out);
+ for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+ I != E; ++I) {
+ if (Function* Fun = dyn_cast<Function>(*I)) {
+ if (!is_inline || Fun != F)
+ printFunctionHead(Fun);
+ }
+ }
+
+ // Print the global variable declarations for any variables encountered
+ nl(Out) << "// Global Variable Declarations"; nl(Out);
+ for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+ I != E; ++I) {
+ if (GlobalVariable* F = dyn_cast<GlobalVariable>(*I))
+ printVariableHead(F);
+ }
+
+ // Print the constants found
+ nl(Out) << "// Constant Definitions"; nl(Out);
+ for (SmallPtrSet<Constant*,64>::iterator I = consts.begin(),
+ E = consts.end(); I != E; ++I) {
+ printConstant(*I);
+ }
+
+ // Process the global variables definitions now that all the constants have
+ // been emitted. These definitions just couple the gvars with their constant
+ // initializers.
+ nl(Out) << "// Global Variable Definitions"; nl(Out);
+ for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+ I != E; ++I) {
+ if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I))
+ printVariableBody(GV);
+ }
+ }
+
+ void CppWriter::printFunctionHead(const Function* F) {
+ nl(Out) << "Function* " << getCppName(F);
+ if (is_inline) {
+ Out << " = mod->getFunction(\"";
+ printEscapedString(F->getName());
+ Out << "\", " << getCppName(F->getFunctionType()) << ");";
+ nl(Out) << "if (!" << getCppName(F) << ") {";
+ nl(Out) << getCppName(F);
+ }
+ Out<< " = Function::Create(";
+ nl(Out,1) << "/*Type=*/" << getCppName(F->getFunctionType()) << ",";
+ nl(Out) << "/*Linkage=*/";
+ printLinkageType(F->getLinkage());
+ Out << ",";
+ nl(Out) << "/*Name=*/\"";
+ printEscapedString(F->getName());
+ Out << "\", mod); " << (F->isDeclaration()? "// (external, no body)" : "");
+ nl(Out,-1);
+ printCppName(F);
+ Out << "->setCallingConv(";
+ printCallingConv(F->getCallingConv());
+ Out << ");";
+ nl(Out);
+ if (F->hasSection()) {
+ printCppName(F);
+ Out << "->setSection(\"" << F->getSection() << "\");";
+ nl(Out);
+ }
+ if (F->getAlignment()) {
+ printCppName(F);
+ Out << "->setAlignment(" << F->getAlignment() << ");";
+ nl(Out);
+ }
+ if (F->getVisibility() != GlobalValue::DefaultVisibility) {
+ printCppName(F);
+ Out << "->setVisibility(";
+ printVisibilityType(F->getVisibility());
+ Out << ");";
+ nl(Out);
+ }
+ if (F->hasGC()) {
+ printCppName(F);
+ Out << "->setGC(\"" << F->getGC() << "\");";
+ nl(Out);
+ }
+ if (is_inline) {
+ Out << "}";
+ nl(Out);
+ }
+ printAttributes(F->getAttributes(), getCppName(F));
+ printCppName(F);
+ Out << "->setAttributes(" << getCppName(F) << "_PAL);";
+ nl(Out);
+ }
+
+ void CppWriter::printFunctionBody(const Function *F) {
+ if (F->isDeclaration())
+ return; // external functions have no bodies.
+
+ // Clear the DefinedValues and ForwardRefs maps because we can't have
+ // cross-function forward refs
+ ForwardRefs.clear();
+ DefinedValues.clear();
+
+ // Create all the argument values
+ if (!is_inline) {
+ if (!F->arg_empty()) {
+ Out << "Function::arg_iterator args = " << getCppName(F)
+ << "->arg_begin();";
+ nl(Out);
+ }
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ Out << "Value* " << getCppName(AI) << " = args++;";
+ nl(Out);
+ if (AI->hasName()) {
+ Out << getCppName(AI) << "->setName(\"" << AI->getName() << "\");";
+ nl(Out);
+ }
+ }
+ }
+
+ // Create all the basic blocks
+ nl(Out);
+ for (Function::const_iterator BI = F->begin(), BE = F->end();
+ BI != BE; ++BI) {
+ std::string bbname(getCppName(BI));
+ Out << "BasicBlock* " << bbname << " = BasicBlock::Create(\"";
+ if (BI->hasName())
+ printEscapedString(BI->getName());
+ Out << "\"," << getCppName(BI->getParent()) << ",0);";
+ nl(Out);
+ }
+
+ // Output all of its basic blocks... for the function
+ for (Function::const_iterator BI = F->begin(), BE = F->end();
+ BI != BE; ++BI) {
+ std::string bbname(getCppName(BI));
+ nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")";
+ nl(Out);
+
+ // Output all of the instructions in the basic block...
+ for (BasicBlock::const_iterator I = BI->begin(), E = BI->end();
+ I != E; ++I) {
+ printInstruction(I,bbname);
+ }
+ }
+
+ // Loop over the ForwardRefs and resolve them now that all instructions
+ // are generated.
+ if (!ForwardRefs.empty()) {
+ nl(Out) << "// Resolve Forward References";
+ nl(Out);
+ }
+
+ while (!ForwardRefs.empty()) {
+ ForwardRefMap::iterator I = ForwardRefs.begin();
+ Out << I->second << "->replaceAllUsesWith("
+ << getCppName(I->first) << "); delete " << I->second << ";";
+ nl(Out);
+ ForwardRefs.erase(I);
+ }
+ }
+
+ void CppWriter::printInline(const std::string& fname,
+ const std::string& func) {
+ const Function* F = TheModule->getFunction(func);
+ if (!F) {
+ error(std::string("Function '") + func + "' not found in input module");
+ return;
+ }
+ if (F->isDeclaration()) {
+ error(std::string("Function '") + func + "' is external!");
+ return;
+ }
+ nl(Out) << "BasicBlock* " << fname << "(Module* mod, Function *"
+ << getCppName(F);
+ unsigned arg_count = 1;
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ Out << ", Value* arg_" << arg_count;
+ }
+ Out << ") {";
+ nl(Out);
+ is_inline = true;
+ printFunctionUses(F);
+ printFunctionBody(F);
+ is_inline = false;
+ Out << "return " << getCppName(F->begin()) << ";";
+ nl(Out) << "}";
+ nl(Out);
+ }
+
+ void CppWriter::printModuleBody() {
+ // Print out all the type definitions
+ nl(Out) << "// Type Definitions"; nl(Out);
+ printTypes(TheModule);
+
+ // Functions can call each other and global variables can reference them so
+ // define all the functions first before emitting their function bodies.
+ nl(Out) << "// Function Declarations"; nl(Out);
+ for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+ I != E; ++I)
+ printFunctionHead(I);
+
+ // Process the global variables declarations. We can't initialze them until
+ // after the constants are printed so just print a header for each global
+ nl(Out) << "// Global Variable Declarations\n"; nl(Out);
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I) {
+ printVariableHead(I);
+ }
+
+ // Print out all the constants definitions. Constants don't recurse except
+ // through GlobalValues. All GlobalValues have been declared at this point
+ // so we can proceed to generate the constants.
+ nl(Out) << "// Constant Definitions"; nl(Out);
+ printConstants(TheModule);
+
+ // Process the global variables definitions now that all the constants have
+ // been emitted. These definitions just couple the gvars with their constant
+ // initializers.
+ nl(Out) << "// Global Variable Definitions"; nl(Out);
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I) {
+ printVariableBody(I);
+ }
+
+ // Finally, we can safely put out all of the function bodies.
+ nl(Out) << "// Function Definitions"; nl(Out);
+ for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+ I != E; ++I) {
+ if (!I->isDeclaration()) {
+ nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I)
+ << ")";
+ nl(Out) << "{";
+ nl(Out,1);
+ printFunctionBody(I);
+ nl(Out,-1) << "}";
+ nl(Out);
+ }
+ }
+ }
+
+ void CppWriter::printProgram(const std::string& fname,
+ const std::string& mName) {
+ Out << "#include <llvm/Module.h>\n";
+ Out << "#include <llvm/DerivedTypes.h>\n";
+ Out << "#include <llvm/Constants.h>\n";
+ Out << "#include <llvm/GlobalVariable.h>\n";
+ Out << "#include <llvm/Function.h>\n";
+ Out << "#include <llvm/CallingConv.h>\n";
+ Out << "#include <llvm/BasicBlock.h>\n";
+ Out << "#include <llvm/Instructions.h>\n";
+ Out << "#include <llvm/InlineAsm.h>\n";
+ Out << "#include <llvm/Support/MathExtras.h>\n";
+ Out << "#include <llvm/Support/raw_ostream.h>\n";
+ Out << "#include <llvm/Pass.h>\n";
+ Out << "#include <llvm/PassManager.h>\n";
+ Out << "#include <llvm/ADT/SmallVector.h>\n";
+ Out << "#include <llvm/Analysis/Verifier.h>\n";
+ Out << "#include <llvm/Assembly/PrintModulePass.h>\n";
+ Out << "#include <algorithm>\n";
+ Out << "using namespace llvm;\n\n";
+ Out << "Module* " << fname << "();\n\n";
+ Out << "int main(int argc, char**argv) {\n";
+ Out << " Module* Mod = " << fname << "();\n";
+ Out << " verifyModule(*Mod, PrintMessageAction);\n";
+ Out << " outs().flush();\n";
+ Out << " PassManager PM;\n";
+ Out << " PM.add(createPrintModulePass(&outs()));\n";
+ Out << " PM.run(*Mod);\n";
+ Out << " return 0;\n";
+ Out << "}\n\n";
+ printModule(fname,mName);
+ }
+
+ void CppWriter::printModule(const std::string& fname,
+ const std::string& mName) {
+ nl(Out) << "Module* " << fname << "() {";
+ nl(Out,1) << "// Module Construction";
+ nl(Out) << "Module* mod = new Module(\"" << mName << "\");";
+ if (!TheModule->getTargetTriple().empty()) {
+ nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");";
+ }
+ if (!TheModule->getTargetTriple().empty()) {
+ nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple()
+ << "\");";
+ }
+
+ if (!TheModule->getModuleInlineAsm().empty()) {
+ nl(Out) << "mod->setModuleInlineAsm(\"";
+ printEscapedString(TheModule->getModuleInlineAsm());
+ Out << "\");";
+ }
+ nl(Out);
+
+ // Loop over the dependent libraries and emit them.
+ Module::lib_iterator LI = TheModule->lib_begin();
+ Module::lib_iterator LE = TheModule->lib_end();
+ while (LI != LE) {
+ Out << "mod->addLibrary(\"" << *LI << "\");";
+ nl(Out);
+ ++LI;
+ }
+ printModuleBody();
+ nl(Out) << "return mod;";
+ nl(Out,-1) << "}";
+ nl(Out);
+ }
+
+ void CppWriter::printContents(const std::string& fname,
+ const std::string& mName) {
+ Out << "\nModule* " << fname << "(Module *mod) {\n";
+ Out << "\nmod->setModuleIdentifier(\"" << mName << "\");\n";
+ printModuleBody();
+ Out << "\nreturn mod;\n";
+ Out << "\n}\n";
+ }
+
+ void CppWriter::printFunction(const std::string& fname,
+ const std::string& funcName) {
+ const Function* F = TheModule->getFunction(funcName);
+ if (!F) {
+ error(std::string("Function '") + funcName + "' not found in input module");
+ return;
+ }
+ Out << "\nFunction* " << fname << "(Module *mod) {\n";
+ printFunctionUses(F);
+ printFunctionHead(F);
+ printFunctionBody(F);
+ Out << "return " << getCppName(F) << ";\n";
+ Out << "}\n";
+ }
+
+ void CppWriter::printFunctions() {
+ const Module::FunctionListType &funcs = TheModule->getFunctionList();
+ Module::const_iterator I = funcs.begin();
+ Module::const_iterator IE = funcs.end();
+
+ for (; I != IE; ++I) {
+ const Function &func = *I;
+ if (!func.isDeclaration()) {
+ std::string name("define_");
+ name += func.getName();
+ printFunction(name, func.getName());
+ }
+ }
+ }
+
+ void CppWriter::printVariable(const std::string& fname,
+ const std::string& varName) {
+ const GlobalVariable* GV = TheModule->getNamedGlobal(varName);
+
+ if (!GV) {
+ error(std::string("Variable '") + varName + "' not found in input module");
+ return;
+ }
+ Out << "\nGlobalVariable* " << fname << "(Module *mod) {\n";
+ printVariableUses(GV);
+ printVariableHead(GV);
+ printVariableBody(GV);
+ Out << "return " << getCppName(GV) << ";\n";
+ Out << "}\n";
+ }
+
+ void CppWriter::printType(const std::string& fname,
+ const std::string& typeName) {
+ const Type* Ty = TheModule->getTypeByName(typeName);
+ if (!Ty) {
+ error(std::string("Type '") + typeName + "' not found in input module");
+ return;
+ }
+ Out << "\nType* " << fname << "(Module *mod) {\n";
+ printType(Ty);
+ Out << "return " << getCppName(Ty) << ";\n";
+ Out << "}\n";
+ }
+
+ bool CppWriter::runOnModule(Module &M) {
+ TheModule = &M;
+
+ // Emit a header
+ Out << "// Generated by llvm2cpp - DO NOT MODIFY!\n\n";
+
+ // Get the name of the function we're supposed to generate
+ std::string fname = FuncName.getValue();
+
+ // Get the name of the thing we are to generate
+ std::string tgtname = NameToGenerate.getValue();
+ if (GenerationType == GenModule ||
+ GenerationType == GenContents ||
+ GenerationType == GenProgram ||
+ GenerationType == GenFunctions) {
+ if (tgtname == "!bad!") {
+ if (M.getModuleIdentifier() == "-")
+ tgtname = "<stdin>";
+ else
+ tgtname = M.getModuleIdentifier();
+ }
+ } else if (tgtname == "!bad!")
+ error("You must use the -for option with -gen-{function,variable,type}");
+
+ switch (WhatToGenerate(GenerationType)) {
+ case GenProgram:
+ if (fname.empty())
+ fname = "makeLLVMModule";
+ printProgram(fname,tgtname);
+ break;
+ case GenModule:
+ if (fname.empty())
+ fname = "makeLLVMModule";
+ printModule(fname,tgtname);
+ break;
+ case GenContents:
+ if (fname.empty())
+ fname = "makeLLVMModuleContents";
+ printContents(fname,tgtname);
+ break;
+ case GenFunction:
+ if (fname.empty())
+ fname = "makeLLVMFunction";
+ printFunction(fname,tgtname);
+ break;
+ case GenFunctions:
+ printFunctions();
+ break;
+ case GenInline:
+ if (fname.empty())
+ fname = "makeLLVMInline";
+ printInline(fname,tgtname);
+ break;
+ case GenVariable:
+ if (fname.empty())
+ fname = "makeLLVMVariable";
+ printVariable(fname,tgtname);
+ break;
+ case GenType:
+ if (fname.empty())
+ fname = "makeLLVMType";
+ printType(fname,tgtname);
+ break;
+ default:
+ error("Invalid generation option");
+ }
+
+ return false;
+ }
+}
+
+char CppWriter::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// External Interface declaration
+//===----------------------------------------------------------------------===//
+
+bool CPPTargetMachine::addPassesToEmitWholeFile(PassManager &PM,
+ raw_ostream &o,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel) {
+ if (FileType != TargetMachine::AssemblyFile) return true;
+ PM.add(new CppWriter(o));
+ return false;
+}
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
new file mode 100644
index 0000000..db4bc0e
--- /dev/null
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -0,0 +1,44 @@
+//===-- CPPTargetMachine.h - TargetMachine for the C++ backend --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the TargetMachine that is used by the C++ backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CPPTARGETMACHINE_H
+#define CPPTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+
+class raw_ostream;
+
+struct CPPTargetMachine : public TargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+
+ CPPTargetMachine(const Module &M, const std::string &FS)
+ : DataLayout(&M) {}
+
+ virtual bool WantsWholeFile() const { return true; }
+ virtual bool addPassesToEmitWholeFile(PassManager &PM, raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel);
+
+ // This class always works, but shouldn't be the default in most cases.
+ static unsigned getModuleMatchQuality(const Module &M) { return 1; }
+
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+};
+
+} // End llvm namespace
+
+
+#endif
diff --git a/lib/Target/CppBackend/Makefile b/lib/Target/CppBackend/Makefile
new file mode 100644
index 0000000..ca7e1a8
--- /dev/null
+++ b/lib/Target/CppBackend/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Target/CppBackend/Makefile --- ------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMCppBackend
+include $(LEVEL)/Makefile.common
+
+CompileCommonOpts += -Wno-format
diff --git a/lib/Target/DarwinTargetAsmInfo.cpp b/lib/Target/DarwinTargetAsmInfo.cpp
new file mode 100644
index 0000000..05d2351
--- /dev/null
+++ b/lib/Target/DarwinTargetAsmInfo.cpp
@@ -0,0 +1,169 @@
+//===-- DarwinTargetAsmInfo.cpp - Darwin asm properties ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take in general on Darwin-based targets
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Target/DarwinTargetAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+using namespace llvm;
+
+DarwinTargetAsmInfo::DarwinTargetAsmInfo(const TargetMachine &TM)
+ : TargetAsmInfo(TM) {
+
+ CStringSection_ = getUnnamedSection("\t.cstring",
+ SectionFlags::Mergeable | SectionFlags::Strings);
+ FourByteConstantSection = getUnnamedSection("\t.literal4\n",
+ SectionFlags::Mergeable);
+ EightByteConstantSection = getUnnamedSection("\t.literal8\n",
+ SectionFlags::Mergeable);
+
+ // Note: 16-byte constant section is subtarget specific and should be provided
+ // there, if needed.
+ SixteenByteConstantSection = 0;
+
+ ReadOnlySection = getUnnamedSection("\t.const\n", SectionFlags::None);
+
+ TextCoalSection =
+ getNamedSection("\t__TEXT,__textcoal_nt,coalesced,pure_instructions",
+ SectionFlags::Code);
+ ConstTextCoalSection = getNamedSection("\t__TEXT,__const_coal,coalesced",
+ SectionFlags::None);
+ ConstDataCoalSection = getNamedSection("\t__DATA,__const_coal,coalesced",
+ SectionFlags::None);
+ ConstDataSection = getUnnamedSection(".const_data", SectionFlags::None);
+ DataCoalSection = getNamedSection("\t__DATA,__datacoal_nt,coalesced",
+ SectionFlags::Writeable);
+}
+
+/// emitUsedDirectiveFor - On Darwin, internally linked data beginning with
+/// the PrivateGlobalPrefix or the LessPrivateGlobalPrefix does not have the
+/// directive emitted (this occurs in ObjC metadata).
+
+bool
+DarwinTargetAsmInfo::emitUsedDirectiveFor(const GlobalValue* GV,
+ Mangler *Mang) const {
+ if (GV==0)
+ return false;
+ if (GV->hasLocalLinkage() && !isa<Function>(GV) &&
+ ((strlen(getPrivateGlobalPrefix()) != 0 &&
+ Mang->getValueName(GV).substr(0,strlen(getPrivateGlobalPrefix())) ==
+ getPrivateGlobalPrefix()) ||
+ (strlen(getLessPrivateGlobalPrefix()) != 0 &&
+ Mang->getValueName(GV).substr(0,strlen(getLessPrivateGlobalPrefix())) ==
+ getLessPrivateGlobalPrefix())))
+ return false;
+ return true;
+}
+
+const Section*
+DarwinTargetAsmInfo::SelectSectionForGlobal(const GlobalValue *GV) const {
+ SectionKind::Kind Kind = SectionKindForGlobal(GV);
+ bool isWeak = GV->isWeakForLinker();
+ bool isNonStatic = TM.getRelocationModel() != Reloc::Static;
+
+ switch (Kind) {
+ case SectionKind::Text:
+ if (isWeak)
+ return TextCoalSection;
+ else
+ return TextSection;
+ case SectionKind::Data:
+ case SectionKind::ThreadData:
+ case SectionKind::BSS:
+ case SectionKind::ThreadBSS:
+ if (cast<GlobalVariable>(GV)->isConstant())
+ return (isWeak ? ConstDataCoalSection : ConstDataSection);
+ else
+ return (isWeak ? DataCoalSection : DataSection);
+ case SectionKind::ROData:
+ return (isWeak ? ConstDataCoalSection :
+ (isNonStatic ? ConstDataSection : getReadOnlySection()));
+ case SectionKind::RODataMergeStr:
+ return (isWeak ?
+ ConstTextCoalSection :
+ MergeableStringSection(cast<GlobalVariable>(GV)));
+ case SectionKind::RODataMergeConst:
+ return (isWeak ?
+ ConstDataCoalSection:
+ MergeableConstSection(cast<GlobalVariable>(GV)));
+ default:
+ assert(0 && "Unsuported section kind for global");
+ }
+
+ // FIXME: Do we have any extra special weird cases?
+ return NULL;
+}
+
+const Section*
+DarwinTargetAsmInfo::MergeableStringSection(const GlobalVariable *GV) const {
+ const TargetData *TD = TM.getTargetData();
+ Constant *C = cast<GlobalVariable>(GV)->getInitializer();
+ const Type *Ty = cast<ArrayType>(C->getType())->getElementType();
+
+ unsigned Size = TD->getTypeAllocSize(Ty);
+ if (Size) {
+ unsigned Align = TD->getPreferredAlignment(GV);
+ if (Align <= 32)
+ return getCStringSection_();
+ }
+
+ return getReadOnlySection();
+}
+
+const Section*
+DarwinTargetAsmInfo::MergeableConstSection(const GlobalVariable *GV) const {
+ Constant *C = GV->getInitializer();
+
+ return MergeableConstSection(C->getType());
+}
+
+inline const Section*
+DarwinTargetAsmInfo::MergeableConstSection(const Type *Ty) const {
+ const TargetData *TD = TM.getTargetData();
+
+ unsigned Size = TD->getTypeAllocSize(Ty);
+ if (Size == 4)
+ return FourByteConstantSection;
+ else if (Size == 8)
+ return EightByteConstantSection;
+ else if (Size == 16 && SixteenByteConstantSection)
+ return SixteenByteConstantSection;
+
+ return getReadOnlySection();
+}
+
+const Section*
+DarwinTargetAsmInfo::SelectSectionForMachineConst(const Type *Ty) const {
+ const Section* S = MergeableConstSection(Ty);
+
+ // Handle weird special case, when compiling PIC stuff.
+ if (S == getReadOnlySection() &&
+ TM.getRelocationModel() != Reloc::Static)
+ return ConstDataSection;
+
+ return S;
+}
+
+std::string
+DarwinTargetAsmInfo::UniqueSectionForGlobal(const GlobalValue* GV,
+ SectionKind::Kind kind) const {
+ assert(0 && "Darwin does not use unique sections");
+ return "";
+}
diff --git a/lib/Target/ELFTargetAsmInfo.cpp b/lib/Target/ELFTargetAsmInfo.cpp
new file mode 100644
index 0000000..8f6e96e
--- /dev/null
+++ b/lib/Target/ELFTargetAsmInfo.cpp
@@ -0,0 +1,227 @@
+//===-- ELFTargetAsmInfo.cpp - ELF asm properties ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take in general on ELF-based targets
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Target/ELFTargetAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+using namespace llvm;
+
+ELFTargetAsmInfo::ELFTargetAsmInfo(const TargetMachine &TM)
+ : TargetAsmInfo(TM) {
+
+ BSSSection_ = getUnnamedSection("\t.bss",
+ SectionFlags::Writeable | SectionFlags::BSS);
+ ReadOnlySection = getNamedSection("\t.rodata", SectionFlags::None);
+ TLSDataSection = getNamedSection("\t.tdata",
+ SectionFlags::Writeable | SectionFlags::TLS);
+ TLSBSSSection = getNamedSection("\t.tbss",
+ SectionFlags::Writeable | SectionFlags::TLS | SectionFlags::BSS);
+
+ DataRelSection = getNamedSection("\t.data.rel", SectionFlags::Writeable);
+ DataRelLocalSection = getNamedSection("\t.data.rel.local",
+ SectionFlags::Writeable);
+ DataRelROSection = getNamedSection("\t.data.rel.ro",
+ SectionFlags::Writeable);
+ DataRelROLocalSection = getNamedSection("\t.data.rel.ro.local",
+ SectionFlags::Writeable);
+}
+
+SectionKind::Kind
+ELFTargetAsmInfo::SectionKindForGlobal(const GlobalValue *GV) const {
+ SectionKind::Kind Kind = TargetAsmInfo::SectionKindForGlobal(GV);
+
+ if (Kind != SectionKind::Data)
+ return Kind;
+
+ // Decide, whether we need data.rel stuff
+ const GlobalVariable* GVar = dyn_cast<GlobalVariable>(GV);
+ if (GVar->hasInitializer()) {
+ Constant *C = GVar->getInitializer();
+ bool isConstant = GVar->isConstant();
+ unsigned Reloc = RelocBehaviour();
+ if (Reloc != Reloc::None && C->ContainsRelocations(Reloc))
+ return (C->ContainsRelocations(Reloc::Global) ?
+ (isConstant ?
+ SectionKind::DataRelRO : SectionKind::DataRel) :
+ (isConstant ?
+ SectionKind::DataRelROLocal : SectionKind::DataRelLocal));
+ }
+
+ return Kind;
+}
+
+const Section*
+ELFTargetAsmInfo::SelectSectionForGlobal(const GlobalValue *GV) const {
+ SectionKind::Kind Kind = SectionKindForGlobal(GV);
+
+ if (const Function *F = dyn_cast<Function>(GV)) {
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::PrivateLinkage:
+ case Function::InternalLinkage:
+ case Function::DLLExportLinkage:
+ case Function::ExternalLinkage:
+ return TextSection;
+ case Function::WeakAnyLinkage:
+ case Function::WeakODRLinkage:
+ case Function::LinkOnceAnyLinkage:
+ case Function::LinkOnceODRLinkage:
+ std::string Name = UniqueSectionForGlobal(GV, Kind);
+ unsigned Flags = SectionFlagsForGlobal(GV, Name.c_str());
+ return getNamedSection(Name.c_str(), Flags);
+ }
+ } else if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
+ if (GVar->isWeakForLinker()) {
+ std::string Name = UniqueSectionForGlobal(GVar, Kind);
+ unsigned Flags = SectionFlagsForGlobal(GVar, Name.c_str());
+ return getNamedSection(Name.c_str(), Flags);
+ } else {
+ switch (Kind) {
+ case SectionKind::Data:
+ case SectionKind::SmallData:
+ return DataSection;
+ case SectionKind::DataRel:
+ return DataRelSection;
+ case SectionKind::DataRelLocal:
+ return DataRelLocalSection;
+ case SectionKind::DataRelRO:
+ return DataRelROSection;
+ case SectionKind::DataRelROLocal:
+ return DataRelROLocalSection;
+ case SectionKind::BSS:
+ case SectionKind::SmallBSS:
+ // ELF targets usually have BSS sections
+ return getBSSSection_();
+ case SectionKind::ROData:
+ case SectionKind::SmallROData:
+ return getReadOnlySection();
+ case SectionKind::RODataMergeStr:
+ return MergeableStringSection(GVar);
+ case SectionKind::RODataMergeConst:
+ return MergeableConstSection(GVar);
+ case SectionKind::ThreadData:
+ // ELF targets usually support TLS stuff
+ return TLSDataSection;
+ case SectionKind::ThreadBSS:
+ return TLSBSSSection;
+ default:
+ assert(0 && "Unsuported section kind for global");
+ }
+ }
+ } else
+ assert(0 && "Unsupported global");
+
+ return NULL;
+}
+
+const Section*
+ELFTargetAsmInfo::SelectSectionForMachineConst(const Type *Ty) const {
+ // FIXME: Support data.rel stuff someday
+ return MergeableConstSection(Ty);
+}
+
+const Section*
+ELFTargetAsmInfo::MergeableConstSection(const GlobalVariable *GV) const {
+ Constant *C = GV->getInitializer();
+ return MergeableConstSection(C->getType());
+}
+
+inline const Section*
+ELFTargetAsmInfo::MergeableConstSection(const Type *Ty) const {
+ const TargetData *TD = TM.getTargetData();
+
+ // FIXME: string here is temporary, until stuff will fully land in.
+ // We cannot use {Four,Eight,Sixteen}ByteConstantSection here, since it's
+ // currently directly used by asmprinter.
+ unsigned Size = TD->getTypeAllocSize(Ty);
+ if (Size == 4 || Size == 8 || Size == 16) {
+ std::string Name = ".rodata.cst" + utostr(Size);
+
+ return getNamedSection(Name.c_str(),
+ SectionFlags::setEntitySize(SectionFlags::Mergeable,
+ Size));
+ }
+
+ return getReadOnlySection();
+}
+
+const Section*
+ELFTargetAsmInfo::MergeableStringSection(const GlobalVariable *GV) const {
+ const TargetData *TD = TM.getTargetData();
+ Constant *C = cast<GlobalVariable>(GV)->getInitializer();
+ const Type *Ty = cast<ArrayType>(C->getType())->getElementType();
+
+ unsigned Size = TD->getTypeAllocSize(Ty);
+ if (Size <= 16) {
+ assert(getCStringSection() && "Should have string section prefix");
+
+ // We also need alignment here
+ unsigned Align = TD->getPrefTypeAlignment(Ty);
+ if (Align < Size)
+ Align = Size;
+
+ std::string Name = getCStringSection() + utostr(Size) + '.' + utostr(Align);
+ unsigned Flags = SectionFlags::setEntitySize(SectionFlags::Mergeable |
+ SectionFlags::Strings,
+ Size);
+ return getNamedSection(Name.c_str(), Flags);
+ }
+
+ return getReadOnlySection();
+}
+
+std::string ELFTargetAsmInfo::printSectionFlags(unsigned flags) const {
+ std::string Flags = ",\"";
+
+ if (!(flags & SectionFlags::Debug))
+ Flags += 'a';
+ if (flags & SectionFlags::Code)
+ Flags += 'x';
+ if (flags & SectionFlags::Writeable)
+ Flags += 'w';
+ if (flags & SectionFlags::Mergeable)
+ Flags += 'M';
+ if (flags & SectionFlags::Strings)
+ Flags += 'S';
+ if (flags & SectionFlags::TLS)
+ Flags += 'T';
+ if (flags & SectionFlags::Small)
+ Flags += 's';
+
+ Flags += "\",";
+
+ // If comment string is '@', e.g. as on ARM - use '%' instead
+ if (strcmp(CommentString, "@") == 0)
+ Flags += '%';
+ else
+ Flags += '@';
+
+ // FIXME: There can be exceptions here
+ if (flags & SectionFlags::BSS)
+ Flags += "nobits";
+ else
+ Flags += "progbits";
+
+ if (unsigned entitySize = SectionFlags::getEntitySize(flags))
+ Flags += "," + utostr(entitySize);
+
+ return Flags;
+}
diff --git a/lib/Target/IA64/AsmPrinter/CMakeLists.txt b/lib/Target/IA64/AsmPrinter/CMakeLists.txt
new file mode 100644
index 0000000..1d552bd
--- /dev/null
+++ b/lib/Target/IA64/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,12 @@
+include_directories(
+ ${CMAKE_CURRENT_BINARY_DIR}/..
+ ${CMAKE_CURRENT_SOURCE_DIR}/..
+ )
+
+add_partially_linked_object(LLVMIA64AsmPrinter
+ IA64AsmPrinter.cpp
+ )
+
+target_name_of_partially_linked_object(LLVMIA64CodeGen n)
+
+add_dependencies(LLVMIA64AsmPrinter ${n})
diff --git a/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
new file mode 100644
index 0000000..fc54e23
--- /dev/null
+++ b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
@@ -0,0 +1,376 @@
+//===-- IA64AsmPrinter.cpp - Print out IA64 LLVM as assembly --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to assembly accepted by the GNU binutils 'gas'
+// assembler. The Intel 'ias' and HP-UX 'as' assemblers *may* choke on this
+// output, but if so that's a bug I'd like to hear about: please file a bug
+// report in bugzilla. FYI, the not too bad 'ias' assembler is bundled with
+// the Intel C/C++ compiler for Itanium Linux.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "IA64.h"
+#include "IA64TargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+ class IA64AsmPrinter : public AsmPrinter {
+ std::set<std::string> ExternalFunctionNames, ExternalObjectNames;
+ public:
+ explicit IA64AsmPrinter(raw_ostream &O, TargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : AsmPrinter(O, TM, T, OL, V) {}
+
+ virtual const char *getPassName() const {
+ return "IA64 Assembly Printer";
+ }
+
+ /// printInstruction - This method is automatically generated by tablegen
+ /// from the instruction set description. This method returns true if the
+ /// machine instruction was sufficiently described to print it, otherwise it
+ /// returns false.
+ bool printInstruction(const MachineInstr *MI);
+
+ // This method is used by the tablegen'erated instruction printer.
+ void printOperand(const MachineInstr *MI, unsigned OpNo){
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (MO.getType() == MachineOperand::MO_Register) {
+ assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ "Not physref??");
+ //XXX Bug Workaround: See note in Printer::doInitialization about %.
+ O << TM.getRegisterInfo()->get(MO.getReg()).AsmName;
+ } else {
+ printOp(MO);
+ }
+ }
+
+ void printS8ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ int val=(unsigned int)MI->getOperand(OpNo).getImm();
+ if(val>=128) val=val-256; // if negative, flip sign
+ O << val;
+ }
+ void printS14ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ int val=(unsigned int)MI->getOperand(OpNo).getImm();
+ if(val>=8192) val=val-16384; // if negative, flip sign
+ O << val;
+ }
+ void printS22ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ int val=(unsigned int)MI->getOperand(OpNo).getImm();
+ if(val>=2097152) val=val-4194304; // if negative, flip sign
+ O << val;
+ }
+ void printU64ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ O << (uint64_t)MI->getOperand(OpNo).getImm();
+ }
+ void printS64ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+// XXX : nasty hack to avoid GPREL22 "relocation truncated to fit" linker
+// errors - instead of add rX = @gprel(CPI<whatever>), r1;; we now
+// emit movl rX = @gprel(CPI<whatever);;
+// add rX = rX, r1;
+// this gives us 64 bits instead of 22 (for the add long imm) to play
+// with, which shuts up the linker. The problem is that the constant
+// pool entries aren't immediates at this stage, so we check here.
+// If it's an immediate, print it the old fashioned way. If it's
+// not, we print it as a constant pool index.
+ if (MI->getOperand(OpNo).isImm()) {
+ O << (int64_t)MI->getOperand(OpNo).getImm();
+ } else { // this is a constant pool reference: FIXME: assert this
+ printOp(MI->getOperand(OpNo));
+ }
+ }
+
+ void printGlobalOperand(const MachineInstr *MI, unsigned OpNo) {
+ printOp(MI->getOperand(OpNo), false); // this is NOT a br.call instruction
+ }
+
+ void printCallOperand(const MachineInstr *MI, unsigned OpNo) {
+ printOp(MI->getOperand(OpNo), true); // this is a br.call instruction
+ }
+
+ void printMachineInstruction(const MachineInstr *MI);
+ void printOp(const MachineOperand &MO, bool isBRCALLinsn= false);
+ void printModuleLevelGV(const GlobalVariable* GVar);
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+ };
+} // end of anonymous namespace
+
+
+// Include the auto-generated portion of the assembly writer.
+#include "IA64GenAsmWriter.inc"
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool IA64AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ this->MF = &MF;
+
+ SetupMachineFunction(MF);
+ O << "\n\n";
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ const Function *F = MF.getFunction();
+ SwitchToSection(TAI->SectionForGlobal(F));
+
+ // Print out labels for the function.
+ EmitAlignment(5);
+ O << "\t.global\t" << CurrentFnName << '\n';
+
+ printVisibility(CurrentFnName, F->getVisibility());
+
+ O << "\t.type\t" << CurrentFnName << ", @function\n";
+ O << CurrentFnName << ":\n";
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block if there are any predecessors.
+ if (!I->pred_empty()) {
+ printBasicBlockLabel(I, true, true);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ printMachineInstruction(II);
+ }
+ }
+
+ // We didn't modify anything.
+ return false;
+}
+
+void IA64AsmPrinter::printOp(const MachineOperand &MO,
+ bool isBRCALLinsn /* = false */) {
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << RI.get(MO.getReg()).AsmName;
+ return;
+
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB());
+ return;
+ case MachineOperand::MO_ConstantPoolIndex: {
+ O << "@gprel(" << TAI->getPrivateGlobalPrefix()
+ << "CPI" << getFunctionNumber() << "_" << MO.getIndex() << ")";
+ return;
+ }
+
+ case MachineOperand::MO_GlobalAddress: {
+
+ // functions need @ltoff(@fptr(fn_name)) form
+ GlobalValue *GV = MO.getGlobal();
+ Function *F = dyn_cast<Function>(GV);
+
+ bool Needfptr=false; // if we're computing an address @ltoff(X), do
+ // we need to decorate it so it becomes
+ // @ltoff(@fptr(X)) ?
+ if (F && !isBRCALLinsn /*&& F->isDeclaration()*/)
+ Needfptr=true;
+
+ // if this is the target of a call instruction, we should define
+ // the function somewhere (GNU gas has no problem without this, but
+ // Intel ias rightly complains of an 'undefined symbol')
+
+ if (F /*&& isBRCALLinsn*/ && F->isDeclaration())
+ ExternalFunctionNames.insert(Mang->getValueName(MO.getGlobal()));
+ else
+ if (GV->isDeclaration()) // e.g. stuff like 'stdin'
+ ExternalObjectNames.insert(Mang->getValueName(MO.getGlobal()));
+
+ if (!isBRCALLinsn)
+ O << "@ltoff(";
+ if (Needfptr)
+ O << "@fptr(";
+ O << Mang->getValueName(MO.getGlobal());
+
+ if (Needfptr && !isBRCALLinsn)
+ O << "#))"; // close both fptr( and ltoff(
+ else {
+ if (Needfptr)
+ O << "#)"; // close only fptr(
+ if (!isBRCALLinsn)
+ O << "#)"; // close only ltoff(
+ }
+
+ int Offset = MO.getOffset();
+ if (Offset > 0)
+ O << " + " << Offset;
+ else if (Offset < 0)
+ O << " - " << -Offset;
+ return;
+ }
+ case MachineOperand::MO_ExternalSymbol:
+ O << MO.getSymbolName();
+ ExternalFunctionNames.insert(MO.getSymbolName());
+ return;
+ default:
+ O << "<AsmPrinter: unknown operand type: " << MO.getType() << " >"; return;
+ }
+}
+
+/// printMachineInstruction -- Print out a single IA64 LLVM instruction
+/// MI to the current output stream.
+///
+void IA64AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ ++EmittedInsts;
+
+ // Call the autogenerated instruction printer routines.
+ printInstruction(MI);
+}
+
+bool IA64AsmPrinter::doInitialization(Module &M) {
+ bool Result = AsmPrinter::doInitialization(M);
+
+ O << "\n.ident \"LLVM-ia64\"\n\n"
+ << "\t.psr lsb\n" // should be "msb" on HP-UX, for starters
+ << "\t.radix C\n"
+ << "\t.psr abi64\n"; // we only support 64 bits for now
+ return Result;
+}
+
+void IA64AsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
+ const TargetData *TD = TM.getTargetData();
+
+ if (!GVar->hasInitializer())
+ return; // External global require no code
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GVar))
+ return;
+
+ O << "\n\n";
+ std::string name = Mang->getValueName(GVar);
+ Constant *C = GVar->getInitializer();
+ unsigned Size = TD->getTypeAllocSize(C->getType());
+ unsigned Align = TD->getPreferredAlignmentLog(GVar);
+
+ printVisibility(name, GVar->getVisibility());
+
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+
+ if (C->isNullValue() && !GVar->hasSection()) {
+ if (!GVar->isThreadLocal() &&
+ (GVar->hasLocalLinkage() || GVar->isWeakForLinker())) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+
+ if (GVar->hasLocalLinkage()) {
+ O << "\t.lcomm " << name << "#," << Size
+ << ',' << (1 << Align);
+ O << '\n';
+ } else {
+ O << "\t.common " << name << "#," << Size
+ << ',' << (1 << Align);
+ O << '\n';
+ }
+
+ return;
+ }
+ }
+
+ switch (GVar->getLinkage()) {
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::CommonLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ // Nonnull linkonce -> weak
+ O << "\t.weak " << name << '\n';
+ break;
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol
+ O << TAI->getGlobalDirective() << name << '\n';
+ // FALL THROUGH
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::PrivateLinkage:
+ break;
+ case GlobalValue::GhostLinkage:
+ cerr << "GhostLinkage cannot appear in IA64AsmPrinter!\n";
+ abort();
+ case GlobalValue::DLLImportLinkage:
+ cerr << "DLLImport linkage is not supported by this target!\n";
+ abort();
+ case GlobalValue::DLLExportLinkage:
+ cerr << "DLLExport linkage is not supported by this target!\n";
+ abort();
+ default:
+ assert(0 && "Unknown linkage type!");
+ }
+
+ EmitAlignment(Align, GVar);
+
+ if (TAI->hasDotTypeDotSizeDirective()) {
+ O << "\t.type " << name << ",@object\n";
+ O << "\t.size " << name << ',' << Size << '\n';
+ }
+
+ O << name << ":\n";
+ EmitGlobalConstant(C);
+}
+
+
+bool IA64AsmPrinter::doFinalization(Module &M) {
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ printModuleLevelGV(I);
+
+ // we print out ".global X \n .type X, @function" for each external function
+ O << "\n\n// br.call targets referenced (and not defined) above: \n";
+ for (std::set<std::string>::iterator i = ExternalFunctionNames.begin(),
+ e = ExternalFunctionNames.end(); i!=e; ++i) {
+ O << "\t.global " << *i << "\n\t.type " << *i << ", @function\n";
+ }
+ O << "\n\n";
+
+ // we print out ".global X \n .type X, @object" for each external object
+ O << "\n\n// (external) symbols referenced (and not defined) above: \n";
+ for (std::set<std::string>::iterator i = ExternalObjectNames.begin(),
+ e = ExternalObjectNames.end(); i!=e; ++i) {
+ O << "\t.global " << *i << "\n\t.type " << *i << ", @object\n";
+ }
+ O << "\n\n";
+
+ return AsmPrinter::doFinalization(M);
+}
+
+/// createIA64CodePrinterPass - Returns a pass that prints the IA64
+/// assembly code for a MachineFunction to the given output stream, using
+/// the given target machine description.
+///
+FunctionPass *llvm::createIA64CodePrinterPass(raw_ostream &o,
+ IA64TargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose) {
+ return new IA64AsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+}
diff --git a/lib/Target/IA64/AsmPrinter/Makefile b/lib/Target/IA64/AsmPrinter/Makefile
new file mode 100644
index 0000000..12880f3
--- /dev/null
+++ b/lib/Target/IA64/AsmPrinter/Makefile
@@ -0,0 +1,17 @@
+##===- lib/Target/IA64/AsmPrinter/Makefile -----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMIA64AsmPrinter
+
+# Hack: we need to include 'main' IA64 target directory to grab
+# private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/IA64/CMakeLists.txt b/lib/Target/IA64/CMakeLists.txt
new file mode 100644
index 0000000..26f86ca
--- /dev/null
+++ b/lib/Target/IA64/CMakeLists.txt
@@ -0,0 +1,20 @@
+set(LLVM_TARGET_DEFINITIONS IA64.td)
+
+tablegen(IA64GenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(IA64GenRegisterNames.inc -gen-register-enums)
+tablegen(IA64GenRegisterInfo.inc -gen-register-desc)
+tablegen(IA64GenInstrNames.inc -gen-instr-enums)
+tablegen(IA64GenInstrInfo.inc -gen-instr-desc)
+tablegen(IA64GenAsmWriter.inc -gen-asm-writer)
+tablegen(IA64GenDAGISel.inc -gen-dag-isel)
+
+add_llvm_target(IA64CodeGen
+ IA64Bundling.cpp
+ IA64InstrInfo.cpp
+ IA64ISelDAGToDAG.cpp
+ IA64ISelLowering.cpp
+ IA64RegisterInfo.cpp
+ IA64Subtarget.cpp
+ IA64TargetAsmInfo.cpp
+ IA64TargetMachine.cpp
+ )
diff --git a/lib/Target/IA64/IA64.h b/lib/Target/IA64/IA64.h
new file mode 100644
index 0000000..ec8e3d6
--- /dev/null
+++ b/lib/Target/IA64/IA64.h
@@ -0,0 +1,58 @@
+//===-- IA64.h - Top-level interface for IA64 representation ------*- C++ -*-===//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the IA64
+// target library, as used by the LLVM JIT.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_IA64_H
+#define TARGET_IA64_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class IA64TargetMachine;
+class FunctionPass;
+class raw_ostream;
+
+/// createIA64DAGToDAGInstructionSelector - This pass converts an LLVM
+/// function into IA64 machine code in a sane, DAG->DAG transform.
+///
+FunctionPass *createIA64DAGToDAGInstructionSelector(IA64TargetMachine &TM);
+
+/// createIA64BundlingPass - This pass adds stop bits and bundles
+/// instructions.
+///
+FunctionPass *createIA64BundlingPass(IA64TargetMachine &TM);
+
+/// createIA64CodePrinterPass - Returns a pass that prints the IA64
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+///
+FunctionPass *createIA64CodePrinterPass(raw_ostream &o,
+ IA64TargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose);
+
+} // End llvm namespace
+
+// Defines symbolic names for IA64 registers. This defines a mapping from
+// register name to register number.
+//
+#include "IA64GenRegisterNames.inc"
+
+// Defines symbolic names for the IA64 instructions.
+//
+#include "IA64GenInstrNames.inc"
+
+#endif
+
+
diff --git a/lib/Target/IA64/IA64.td b/lib/Target/IA64/IA64.td
new file mode 100644
index 0000000..c469281
--- /dev/null
+++ b/lib/Target/IA64/IA64.td
@@ -0,0 +1,39 @@
+//===-- IA64.td - Target definition file for Intel IA64 -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a target description file for the Intel IA64 architecture,
+// also known variously as ia64, IA-64, IPF, "the Itanium architecture" etc.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing...
+//
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "IA64RegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "IA64InstrInfo.td"
+
+def IA64InstrInfo : InstrInfo { }
+
+def IA64 : Target {
+ // Our instruction set
+ let InstructionSet = IA64InstrInfo;
+
+}
+
+
diff --git a/lib/Target/IA64/IA64Bundling.cpp b/lib/Target/IA64/IA64Bundling.cpp
new file mode 100644
index 0000000..3a9ba6c
--- /dev/null
+++ b/lib/Target/IA64/IA64Bundling.cpp
@@ -0,0 +1,118 @@
+//===-- IA64Bundling.cpp - IA-64 instruction bundling pass. ------------ --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Add stops where required to prevent read-after-write and write-after-write
+// dependencies, for both registers and memory addresses. There are exceptions:
+//
+// - Compare instructions (cmp*, tbit, tnat, fcmp, frcpa) are OK with
+// WAW dependencies so long as they all target p0, or are of parallel
+// type (.and*/.or*)
+//
+// FIXME: bundling, for now, is left to the assembler.
+// FIXME: this might be an appropriate place to translate between different
+// instructions that do the same thing, if this helps bundling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ia64-codegen"
+#include "IA64.h"
+#include "IA64InstrInfo.h"
+#include "IA64TargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(StopBitsAdded, "Number of stop bits added");
+
+namespace {
+ struct IA64BundlingPass : public MachineFunctionPass {
+ static char ID;
+ /// Target machine description which we query for reg. names, data
+ /// layout, etc.
+ ///
+ IA64TargetMachine &TM;
+
+ IA64BundlingPass(IA64TargetMachine &tm)
+ : MachineFunctionPass(&ID), TM(tm) { }
+
+ virtual const char *getPassName() const {
+ return "IA64 (Itanium) Bundling Pass";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F) {
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+ }
+
+ // XXX: ugly global, but pending writes can cross basic blocks. Note that
+ // taken branches end instruction groups. So we only need to worry about
+ // 'fallthrough' code
+ std::set<unsigned> PendingRegWrites;
+ };
+ char IA64BundlingPass::ID = 0;
+} // end of anonymous namespace
+
+/// createIA64BundlingPass - Returns a pass that adds STOP (;;) instructions
+/// and arranges the result into bundles.
+///
+FunctionPass *llvm::createIA64BundlingPass(IA64TargetMachine &tm) {
+ return new IA64BundlingPass(tm);
+}
+
+/// runOnMachineBasicBlock - add stops and bundle this MBB.
+///
+bool IA64BundlingPass::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
+ MachineInstr *CurrentInsn = I++;
+ std::set<unsigned> CurrentReads, CurrentWrites, OrigWrites;
+
+ for(unsigned i=0; i < CurrentInsn->getNumOperands(); i++) {
+ MachineOperand &MO=CurrentInsn->getOperand(i);
+ if (MO.isReg()) {
+ if(MO.isUse()) { // TODO: exclude p0
+ CurrentReads.insert(MO.getReg());
+ }
+ if(MO.isDef()) { // TODO: exclude p0
+ CurrentWrites.insert(MO.getReg());
+ OrigWrites.insert(MO.getReg()); // FIXME: use a nondestructive
+ // set_intersect instead?
+ }
+ }
+ }
+
+ // CurrentReads/CurrentWrites contain info for the current instruction.
+ // Does it read or write any registers that are pending a write?
+ // (i.e. not separated by a stop)
+ set_intersect(CurrentReads, PendingRegWrites);
+ set_intersect(CurrentWrites, PendingRegWrites);
+
+ if(! (CurrentReads.empty() && CurrentWrites.empty()) ) {
+ // there is a conflict, insert a stop and reset PendingRegWrites
+ CurrentInsn = BuildMI(MBB, CurrentInsn, CurrentInsn->getDebugLoc(),
+ TM.getInstrInfo()->get(IA64::STOP), 0);
+ PendingRegWrites=OrigWrites; // carry over current writes to next insn
+ Changed=true; StopBitsAdded++; // update stats
+ } else { // otherwise, track additional pending writes
+ set_union(PendingRegWrites, OrigWrites);
+ }
+ } // onto the next insn in the MBB
+
+ return Changed;
+}
+
diff --git a/lib/Target/IA64/IA64ISelDAGToDAG.cpp b/lib/Target/IA64/IA64ISelDAGToDAG.cpp
new file mode 100644
index 0000000..9800c50
--- /dev/null
+++ b/lib/Target/IA64/IA64ISelDAGToDAG.cpp
@@ -0,0 +1,575 @@
+//===---- IA64ISelDAGToDAG.cpp - IA64 pattern matching inst selector ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for IA64,
+// converting a legalized dag to an IA64 dag.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ia64-codegen"
+#include "IA64.h"
+#include "IA64TargetMachine.h"
+#include "IA64ISelLowering.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ /// IA64DAGToDAGISel - IA64 specific code to select IA64 machine
+ /// instructions for SelectionDAG operations.
+ ///
+ class IA64DAGToDAGISel : public SelectionDAGISel {
+ unsigned GlobalBaseReg;
+ public:
+ explicit IA64DAGToDAGISel(IA64TargetMachine &TM)
+ : SelectionDAGISel(TM) {}
+
+ virtual bool runOnFunction(Function &Fn) {
+ // Make sure we re-emit a set of the global base reg if necessary
+ GlobalBaseReg = 0;
+ return SelectionDAGISel::runOnFunction(Fn);
+ }
+
+ /// getI64Imm - Return a target constant with the specified value, of type
+ /// i64.
+ inline SDValue getI64Imm(uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i64);
+ }
+
+ /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
+ /// base register. Return the virtual register that holds this value.
+ // SDValue getGlobalBaseReg(); TODO: hmm
+
+ // Select - Convert the specified operand from a target-independent to a
+ // target-specific node if it hasn't already been changed.
+ SDNode *Select(SDValue N);
+
+ SDNode *SelectIntImmediateExpr(SDValue LHS, SDValue RHS,
+ unsigned OCHi, unsigned OCLo,
+ bool IsArithmetic = false,
+ bool Negate = false);
+ SDNode *SelectBitfieldInsert(SDNode *N);
+
+ /// SelectCC - Select a comparison of the specified values with the
+ /// specified condition code, returning the CR# of the expression.
+ SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC);
+
+ /// SelectAddr - Given the specified address, return the two operands for a
+ /// load/store instruction, and return true if it should be an indexed [r+r]
+ /// operation.
+ bool SelectAddr(SDValue Addr, SDValue &Op1, SDValue &Op2);
+
+ /// InstructionSelect - This callback is invoked by
+ /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+ virtual void InstructionSelect();
+
+ virtual const char *getPassName() const {
+ return "IA64 (Itanium) DAG->DAG Instruction Selector";
+ }
+
+// Include the pieces autogenerated from the target description.
+#include "IA64GenDAGISel.inc"
+
+private:
+ SDNode *SelectDIV(SDValue Op);
+ };
+}
+
+/// InstructionSelect - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void IA64DAGToDAGISel::InstructionSelect() {
+ DEBUG(BB->dump());
+
+ // Select target instructions for the DAG.
+ SelectRoot(*CurDAG);
+ CurDAG->RemoveDeadNodes();
+}
+
+SDNode *IA64DAGToDAGISel::SelectDIV(SDValue Op) {
+ SDNode *N = Op.getNode();
+ SDValue Chain = N->getOperand(0);
+ SDValue Tmp1 = N->getOperand(0);
+ SDValue Tmp2 = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ bool isFP=false;
+
+ if(Tmp1.getValueType().isFloatingPoint())
+ isFP=true;
+
+ bool isModulus=false; // is it a division or a modulus?
+ bool isSigned=false;
+
+ switch(N->getOpcode()) {
+ case ISD::FDIV:
+ case ISD::SDIV: isModulus=false; isSigned=true; break;
+ case ISD::UDIV: isModulus=false; isSigned=false; break;
+ case ISD::FREM:
+ case ISD::SREM: isModulus=true; isSigned=true; break;
+ case ISD::UREM: isModulus=true; isSigned=false; break;
+ }
+
+ // TODO: check for integer divides by powers of 2 (or other simple patterns?)
+
+ SDValue TmpPR, TmpPR2;
+ SDValue TmpF1, TmpF2, TmpF3, TmpF4, TmpF5, TmpF6, TmpF7, TmpF8;
+ SDValue TmpF9, TmpF10,TmpF11,TmpF12,TmpF13,TmpF14,TmpF15;
+ SDNode *Result;
+
+ // we'll need copies of F0 and F1
+ SDValue F0 = CurDAG->getRegister(IA64::F0, MVT::f64);
+ SDValue F1 = CurDAG->getRegister(IA64::F1, MVT::f64);
+
+ // OK, emit some code:
+
+ if(!isFP) {
+ // first, load the inputs into FP regs.
+ TmpF1 =
+ SDValue(CurDAG->getTargetNode(IA64::SETFSIG, dl, MVT::f64, Tmp1), 0);
+ Chain = TmpF1.getValue(1);
+ TmpF2 =
+ SDValue(CurDAG->getTargetNode(IA64::SETFSIG, dl, MVT::f64, Tmp2), 0);
+ Chain = TmpF2.getValue(1);
+
+ // next, convert the inputs to FP
+ if(isSigned) {
+ TmpF3 =
+ SDValue(CurDAG->getTargetNode(IA64::FCVTXF, dl, MVT::f64, TmpF1), 0);
+ Chain = TmpF3.getValue(1);
+ TmpF4 =
+ SDValue(CurDAG->getTargetNode(IA64::FCVTXF, dl, MVT::f64, TmpF2), 0);
+ Chain = TmpF4.getValue(1);
+ } else { // is unsigned
+ TmpF3 =
+ SDValue(CurDAG->getTargetNode(IA64::FCVTXUFS1, dl, MVT::f64, TmpF1),
+ 0);
+ Chain = TmpF3.getValue(1);
+ TmpF4 =
+ SDValue(CurDAG->getTargetNode(IA64::FCVTXUFS1, dl, MVT::f64, TmpF2),
+ 0);
+ Chain = TmpF4.getValue(1);
+ }
+
+ } else { // this is an FP divide/remainder, so we 'leak' some temp
+ // regs and assign TmpF3=Tmp1, TmpF4=Tmp2
+ TmpF3=Tmp1;
+ TmpF4=Tmp2;
+ }
+
+ // we start by computing an approximate reciprocal (good to 9 bits?)
+ // note, this instruction writes _both_ TmpF5 (answer) and TmpPR (predicate)
+ if(isFP)
+ TmpF5 = SDValue(CurDAG->getTargetNode(IA64::FRCPAS0, dl, MVT::f64,
+ MVT::i1, TmpF3, TmpF4), 0);
+ else
+ TmpF5 = SDValue(CurDAG->getTargetNode(IA64::FRCPAS1, dl, MVT::f64,
+ MVT::i1, TmpF3, TmpF4), 0);
+
+ TmpPR = TmpF5.getValue(1);
+ Chain = TmpF5.getValue(2);
+
+ SDValue minusB;
+ if(isModulus) { // for remainders, it'll be handy to have
+ // copies of -input_b
+ minusB = SDValue(CurDAG->getTargetNode(IA64::SUB, dl, MVT::i64,
+ CurDAG->getRegister(IA64::r0, MVT::i64), Tmp2), 0);
+ Chain = minusB.getValue(1);
+ }
+
+ SDValue TmpE0, TmpY1, TmpE1, TmpY2;
+
+ SDValue OpsE0[] = { TmpF4, TmpF5, F1, TmpPR };
+ TmpE0 = SDValue(CurDAG->getTargetNode(IA64::CFNMAS1, dl, MVT::f64,
+ OpsE0, 4), 0);
+ Chain = TmpE0.getValue(1);
+ SDValue OpsY1[] = { TmpF5, TmpE0, TmpF5, TmpPR };
+ TmpY1 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64,
+ OpsY1, 4), 0);
+ Chain = TmpY1.getValue(1);
+ SDValue OpsE1[] = { TmpE0, TmpE0, F0, TmpPR };
+ TmpE1 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64,
+ OpsE1, 4), 0);
+ Chain = TmpE1.getValue(1);
+ SDValue OpsY2[] = { TmpY1, TmpE1, TmpY1, TmpPR };
+ TmpY2 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64,
+ OpsY2, 4), 0);
+ Chain = TmpY2.getValue(1);
+
+ if(isFP) { // if this is an FP divide, we finish up here and exit early
+ if(isModulus)
+ assert(0 && "Sorry, try another FORTRAN compiler.");
+
+ SDValue TmpE2, TmpY3, TmpQ0, TmpR0;
+
+ SDValue OpsE2[] = { TmpE1, TmpE1, F0, TmpPR };
+ TmpE2 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64,
+ OpsE2, 4), 0);
+ Chain = TmpE2.getValue(1);
+ SDValue OpsY3[] = { TmpY2, TmpE2, TmpY2, TmpPR };
+ TmpY3 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64,
+ OpsY3, 4), 0);
+ Chain = TmpY3.getValue(1);
+ SDValue OpsQ0[] = { Tmp1, TmpY3, F0, TmpPR };
+ TmpQ0 =
+ SDValue(CurDAG->getTargetNode(IA64::CFMADS1, dl, // double prec!
+ MVT::f64, OpsQ0, 4), 0);
+ Chain = TmpQ0.getValue(1);
+ SDValue OpsR0[] = { Tmp2, TmpQ0, Tmp1, TmpPR };
+ TmpR0 =
+ SDValue(CurDAG->getTargetNode(IA64::CFNMADS1, dl, // double prec!
+ MVT::f64, OpsR0, 4), 0);
+ Chain = TmpR0.getValue(1);
+
+// we want Result to have the same target register as the frcpa, so
+// we two-address hack it. See the comment "for this to work..." on
+// page 48 of Intel application note #245415
+ SDValue Ops[] = { TmpF5, TmpY3, TmpR0, TmpQ0, TmpPR };
+ Result = CurDAG->getTargetNode(IA64::TCFMADS0, dl, // d.p. s0 rndg!
+ MVT::f64, Ops, 5);
+ Chain = SDValue(Result, 1);
+ return Result; // XXX: early exit!
+ } else { // this is *not* an FP divide, so there's a bit left to do:
+
+ SDValue TmpQ2, TmpR2, TmpQ3, TmpQ;
+
+ SDValue OpsQ2[] = { TmpF3, TmpY2, F0, TmpPR };
+ TmpQ2 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64,
+ OpsQ2, 4), 0);
+ Chain = TmpQ2.getValue(1);
+ SDValue OpsR2[] = { TmpF4, TmpQ2, TmpF3, TmpPR };
+ TmpR2 = SDValue(CurDAG->getTargetNode(IA64::CFNMAS1, dl, MVT::f64,
+ OpsR2, 4), 0);
+ Chain = TmpR2.getValue(1);
+
+// we want TmpQ3 to have the same target register as the frcpa? maybe we
+// should two-address hack it. See the comment "for this to work..." on page
+// 48 of Intel application note #245415
+ SDValue OpsQ3[] = { TmpF5, TmpR2, TmpY2, TmpQ2, TmpPR };
+ TmpQ3 = SDValue(CurDAG->getTargetNode(IA64::TCFMAS1, dl, MVT::f64,
+ OpsQ3, 5), 0);
+ Chain = TmpQ3.getValue(1);
+
+ // STORY: without these two-address instructions (TCFMAS1 and TCFMADS0)
+ // the FPSWA won't be able to help out in the case of large/tiny
+ // arguments. Other fun bugs may also appear, e.g. 0/x = x, not 0.
+
+ if(isSigned)
+ TmpQ = SDValue(CurDAG->getTargetNode(IA64::FCVTFXTRUNCS1, dl,
+ MVT::f64, TmpQ3), 0);
+ else
+ TmpQ = SDValue(CurDAG->getTargetNode(IA64::FCVTFXUTRUNCS1, dl,
+ MVT::f64, TmpQ3), 0);
+
+ Chain = TmpQ.getValue(1);
+
+ if(isModulus) {
+ SDValue FPminusB =
+ SDValue(CurDAG->getTargetNode(IA64::SETFSIG, dl, MVT::f64, minusB),
+ 0);
+ Chain = FPminusB.getValue(1);
+ SDValue Remainder =
+ SDValue(CurDAG->getTargetNode(IA64::XMAL, dl, MVT::f64,
+ TmpQ, FPminusB, TmpF1), 0);
+ Chain = Remainder.getValue(1);
+ Result = CurDAG->getTargetNode(IA64::GETFSIG, dl, MVT::i64, Remainder);
+ Chain = SDValue(Result, 1);
+ } else { // just an integer divide
+ Result = CurDAG->getTargetNode(IA64::GETFSIG, dl, MVT::i64, TmpQ);
+ Chain = SDValue(Result, 1);
+ }
+
+ return Result;
+ } // wasn't an FP divide
+}
+
+// Select - Convert the specified operand from a target-independent to a
+// target-specific node if it hasn't already been changed.
+SDNode *IA64DAGToDAGISel::Select(SDValue Op) {
+ SDNode *N = Op.getNode();
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+ DebugLoc dl = Op.getDebugLoc();
+
+ switch (N->getOpcode()) {
+ default: break;
+
+ case IA64ISD::BRCALL: { // XXX: this is also a hack!
+ SDValue Chain = N->getOperand(0);
+ SDValue InFlag; // Null incoming flag value.
+
+ if(N->getNumOperands()==3) { // we have an incoming chain, callee and flag
+ InFlag = N->getOperand(2);
+ }
+
+ unsigned CallOpcode;
+ SDValue CallOperand;
+
+ // if we can call directly, do so
+ if (GlobalAddressSDNode *GASD =
+ dyn_cast<GlobalAddressSDNode>(N->getOperand(1))) {
+ CallOpcode = IA64::BRCALL_IPREL_GA;
+ CallOperand = CurDAG->getTargetGlobalAddress(GASD->getGlobal(), MVT::i64);
+ } else if (isa<ExternalSymbolSDNode>(N->getOperand(1))) {
+ // FIXME: we currently NEED this case for correctness, to avoid
+ // "non-pic code with imm reloc.n against dynamic symbol" errors
+ CallOpcode = IA64::BRCALL_IPREL_ES;
+ CallOperand = N->getOperand(1);
+ } else {
+ // otherwise we need to load the function descriptor,
+ // load the branch target (function)'s entry point and GP,
+ // branch (call) then restore the GP
+ SDValue FnDescriptor = N->getOperand(1);
+
+ // load the branch target's entry point [mem] and
+ // GP value [mem+8]
+ SDValue targetEntryPoint=
+ SDValue(CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64, MVT::Other,
+ FnDescriptor, CurDAG->getEntryNode()), 0);
+ Chain = targetEntryPoint.getValue(1);
+ SDValue targetGPAddr=
+ SDValue(CurDAG->getTargetNode(IA64::ADDS, dl, MVT::i64,
+ FnDescriptor,
+ CurDAG->getConstant(8, MVT::i64)), 0);
+ Chain = targetGPAddr.getValue(1);
+ SDValue targetGP =
+ SDValue(CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64,MVT::Other,
+ targetGPAddr, CurDAG->getEntryNode()), 0);
+ Chain = targetGP.getValue(1);
+
+ Chain = CurDAG->getCopyToReg(Chain, dl, IA64::r1, targetGP, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = CurDAG->getCopyToReg(Chain, dl, IA64::B6,
+ targetEntryPoint, InFlag); // FLAG these?
+ InFlag = Chain.getValue(1);
+
+ CallOperand = CurDAG->getRegister(IA64::B6, MVT::i64);
+ CallOpcode = IA64::BRCALL_INDIRECT;
+ }
+
+ // Finally, once everything is setup, emit the call itself
+ if (InFlag.getNode())
+ Chain = SDValue(CurDAG->getTargetNode(CallOpcode, dl, MVT::Other,
+ MVT::Flag, CallOperand, InFlag), 0);
+ else // there might be no arguments
+ Chain = SDValue(CurDAG->getTargetNode(CallOpcode, dl, MVT::Other,
+ MVT::Flag, CallOperand, Chain), 0);
+ InFlag = Chain.getValue(1);
+
+ std::vector<SDValue> CallResults;
+
+ CallResults.push_back(Chain);
+ CallResults.push_back(InFlag);
+
+ for (unsigned i = 0, e = CallResults.size(); i != e; ++i)
+ ReplaceUses(Op.getValue(i), CallResults[i]);
+ return NULL;
+ }
+
+ case IA64ISD::GETFD: {
+ SDValue Input = N->getOperand(0);
+ return CurDAG->getTargetNode(IA64::GETFD, dl, MVT::i64, Input);
+ }
+
+ case ISD::FDIV:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ return SelectDIV(Op);
+
+ case ISD::TargetConstantFP: {
+ SDValue Chain = CurDAG->getEntryNode(); // this is a constant, so..
+
+ SDValue V;
+ ConstantFPSDNode* N2 = cast<ConstantFPSDNode>(N);
+ if (N2->getValueAPF().isPosZero()) {
+ V = CurDAG->getCopyFromReg(Chain, dl, IA64::F0, MVT::f64);
+ } else if (N2->isExactlyValue(N2->getValueType(0) == MVT::f32 ?
+ APFloat(+1.0f) : APFloat(+1.0))) {
+ V = CurDAG->getCopyFromReg(Chain, dl, IA64::F1, MVT::f64);
+ } else
+ assert(0 && "Unexpected FP constant!");
+
+ ReplaceUses(SDValue(N, 0), V);
+ return 0;
+ }
+
+ case ISD::FrameIndex: { // TODO: reduce creepyness
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ if (N->hasOneUse())
+ return CurDAG->SelectNodeTo(N, IA64::MOV, MVT::i64,
+ CurDAG->getTargetFrameIndex(FI, MVT::i64));
+ else
+ return CurDAG->getTargetNode(IA64::MOV, dl, MVT::i64,
+ CurDAG->getTargetFrameIndex(FI, MVT::i64));
+ }
+
+ case ISD::ConstantPool: { // TODO: nuke the constant pool
+ // (ia64 doesn't need one)
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
+ Constant *C = CP->getConstVal();
+ SDValue CPI = CurDAG->getTargetConstantPool(C, MVT::i64,
+ CP->getAlignment());
+ return CurDAG->getTargetNode(IA64::ADDL_GA, dl, MVT::i64, // ?
+ CurDAG->getRegister(IA64::r1, MVT::i64), CPI);
+ }
+
+ case ISD::GlobalAddress: {
+ GlobalValue *GV = cast<GlobalAddressSDNode>(N)->getGlobal();
+ SDValue GA = CurDAG->getTargetGlobalAddress(GV, MVT::i64);
+ SDValue Tmp =
+ SDValue(CurDAG->getTargetNode(IA64::ADDL_GA, dl, MVT::i64,
+ CurDAG->getRegister(IA64::r1,
+ MVT::i64), GA), 0);
+ return CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64, MVT::Other, Tmp,
+ CurDAG->getEntryNode());
+ }
+
+/* XXX
+ case ISD::ExternalSymbol: {
+ SDValue EA = CurDAG->getTargetExternalSymbol(
+ cast<ExternalSymbolSDNode>(N)->getSymbol(),
+ MVT::i64);
+ SDValue Tmp = CurDAG->getTargetNode(IA64::ADDL_EA, dl, MVT::i64,
+ CurDAG->getRegister(IA64::r1,
+ MVT::i64),
+ EA);
+ return CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64, Tmp);
+ }
+*/
+
+ case ISD::LOAD: { // FIXME: load -1, not 1, for bools?
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDValue Chain = LD->getChain();
+ SDValue Address = LD->getBasePtr();
+
+ MVT TypeBeingLoaded = LD->getMemoryVT();
+ unsigned Opc;
+ switch (TypeBeingLoaded.getSimpleVT()) {
+ default:
+#ifndef NDEBUG
+ N->dump(CurDAG);
+#endif
+ assert(0 && "Cannot load this type!");
+ case MVT::i1: { // this is a bool
+ Opc = IA64::LD1; // first we load a byte, then compare for != 0
+ if(N->getValueType(0) == MVT::i1) { // XXX: early exit!
+ return CurDAG->SelectNodeTo(N, IA64::CMPNE, MVT::i1, MVT::Other,
+ SDValue(CurDAG->getTargetNode(Opc, dl,
+ MVT::i64,
+ Address), 0),
+ CurDAG->getRegister(IA64::r0, MVT::i64),
+ Chain);
+ }
+ /* otherwise, we want to load a bool into something bigger: LD1
+ will do that for us, so we just fall through */
+ }
+ case MVT::i8: Opc = IA64::LD1; break;
+ case MVT::i16: Opc = IA64::LD2; break;
+ case MVT::i32: Opc = IA64::LD4; break;
+ case MVT::i64: Opc = IA64::LD8; break;
+
+ case MVT::f32: Opc = IA64::LDF4; break;
+ case MVT::f64: Opc = IA64::LDF8; break;
+ }
+
+ // TODO: comment this
+ return CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), MVT::Other,
+ Address, Chain);
+ }
+
+ case ISD::STORE: {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Address = ST->getBasePtr();
+ SDValue Chain = ST->getChain();
+
+ unsigned Opc;
+ if (ISD::isNON_TRUNCStore(N)) {
+ switch (N->getOperand(1).getValueType().getSimpleVT()) {
+ default: assert(0 && "unknown type in store");
+ case MVT::i1: { // this is a bool
+ Opc = IA64::ST1; // we store either 0 or 1 as a byte
+ // first load zero!
+ SDValue Initial = CurDAG->getCopyFromReg(Chain, dl, IA64::r0, MVT::i64);
+ Chain = Initial.getValue(1);
+ // then load 1 into the same reg iff the predicate to store is 1
+ SDValue Tmp = ST->getValue();
+ Tmp =
+ SDValue(CurDAG->getTargetNode(IA64::TPCADDS, dl, MVT::i64, Initial,
+ CurDAG->getTargetConstant(1,
+ MVT::i64),
+ Tmp), 0);
+ return CurDAG->SelectNodeTo(N, Opc, MVT::Other, Address, Tmp, Chain);
+ }
+ case MVT::i64: Opc = IA64::ST8; break;
+ case MVT::f64: Opc = IA64::STF8; break;
+ }
+ } else { // Truncating store
+ switch(ST->getMemoryVT().getSimpleVT()) {
+ default: assert(0 && "unknown type in truncstore");
+ case MVT::i8: Opc = IA64::ST1; break;
+ case MVT::i16: Opc = IA64::ST2; break;
+ case MVT::i32: Opc = IA64::ST4; break;
+ case MVT::f32: Opc = IA64::STF4; break;
+ }
+ }
+
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ return CurDAG->SelectNodeTo(N, Opc, MVT::Other, N2, N1, Chain);
+ }
+
+ case ISD::BRCOND: {
+ SDValue Chain = N->getOperand(0);
+ SDValue CC = N->getOperand(1);
+ MachineBasicBlock *Dest =
+ cast<BasicBlockSDNode>(N->getOperand(2))->getBasicBlock();
+ //FIXME - we do NOT need long branches all the time
+ return CurDAG->SelectNodeTo(N, IA64::BRLCOND_NOTCALL, MVT::Other, CC,
+ CurDAG->getBasicBlock(Dest), Chain);
+ }
+
+ case ISD::CALLSEQ_START:
+ case ISD::CALLSEQ_END: {
+ int64_t Amt = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned Opc = N->getOpcode() == ISD::CALLSEQ_START ?
+ IA64::ADJUSTCALLSTACKDOWN : IA64::ADJUSTCALLSTACKUP;
+ SDValue N0 = N->getOperand(0);
+ return CurDAG->SelectNodeTo(N, Opc, MVT::Other, getI64Imm(Amt), N0);
+ }
+
+ case ISD::BR:
+ // FIXME: we don't need long branches all the time!
+ SDValue N0 = N->getOperand(0);
+ return CurDAG->SelectNodeTo(N, IA64::BRL_NOTCALL, MVT::Other,
+ N->getOperand(1), N0);
+ }
+
+ return SelectCode(Op);
+}
+
+
+/// createIA64DAGToDAGInstructionSelector - This pass converts a legalized DAG
+/// into an IA64-specific DAG, ready for instruction scheduling.
+///
+FunctionPass
+*llvm::createIA64DAGToDAGInstructionSelector(IA64TargetMachine &TM) {
+ return new IA64DAGToDAGISel(TM);
+}
+
diff --git a/lib/Target/IA64/IA64ISelLowering.cpp b/lib/Target/IA64/IA64ISelLowering.cpp
new file mode 100644
index 0000000..34a0686
--- /dev/null
+++ b/lib/Target/IA64/IA64ISelLowering.cpp
@@ -0,0 +1,622 @@
+//===-- IA64ISelLowering.cpp - IA64 DAG Lowering Implementation -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IA64ISelLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IA64ISelLowering.h"
+#include "IA64MachineFunctionInfo.h"
+#include "IA64TargetMachine.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+using namespace llvm;
+
+IA64TargetLowering::IA64TargetLowering(TargetMachine &TM)
+ : TargetLowering(TM) {
+
+ // register class for general registers
+ addRegisterClass(MVT::i64, IA64::GRRegisterClass);
+
+ // register class for FP registers
+ addRegisterClass(MVT::f64, IA64::FPRegisterClass);
+
+ // register class for predicate registers
+ addRegisterClass(MVT::i1, IA64::PRRegisterClass);
+
+ setLoadExtAction(ISD::EXTLOAD , MVT::i1 , Promote);
+
+ setLoadExtAction(ISD::ZEXTLOAD , MVT::i1 , Promote);
+
+ setLoadExtAction(ISD::SEXTLOAD , MVT::i1 , Promote);
+ setLoadExtAction(ISD::SEXTLOAD , MVT::i8 , Expand);
+ setLoadExtAction(ISD::SEXTLOAD , MVT::i16 , Expand);
+ setLoadExtAction(ISD::SEXTLOAD , MVT::i32 , Expand);
+
+ setOperationAction(ISD::BRIND , MVT::Other, Expand);
+ setOperationAction(ISD::BR_JT , MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC , MVT::Other, Expand);
+ setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
+
+ // ia64 uses SELECT not SELECT_CC
+ setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
+
+ // We need to handle ISD::RET for void functions ourselves,
+ // so we get a chance to restore ar.pfs before adding a
+ // br.ret insn
+ setOperationAction(ISD::RET, MVT::Other, Custom);
+
+ setShiftAmountType(MVT::i64);
+
+ setOperationAction(ISD::FREM , MVT::f32 , Expand);
+ setOperationAction(ISD::FREM , MVT::f64 , Expand);
+
+ setOperationAction(ISD::UREM , MVT::f32 , Expand);
+ setOperationAction(ISD::UREM , MVT::f64 , Expand);
+
+ setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand);
+
+ setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
+
+ // We don't support sin/cos/sqrt/pow
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FPOW , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f32, Expand);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+
+ // FIXME: IA64 supports fcopysign natively!
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+ // We don't have line number support yet.
+ setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+ setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+ // IA64 has ctlz in the form of the 'fnorm' instruction. The Legalizer
+ // expansion for ctlz/cttz in terms of ctpop is much larger, but lower
+ // latency.
+ // FIXME: Custom lower CTLZ when compiling for size?
+ setOperationAction(ISD::CTLZ , MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+ setOperationAction(ISD::ROTL , MVT::i64 , Expand);
+ setOperationAction(ISD::ROTR , MVT::i64 , Expand);
+
+ // FIXME: IA64 has this, but is not implemented. should be mux @rev
+ setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+ setOperationAction(ISD::VAARG , MVT::Other, Custom);
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+
+ // Thread Local Storage
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+
+ setStackPointerRegisterToSaveRestore(IA64::r12);
+
+ setJumpBufSize(704); // on ia64-linux, jmp_bufs are 704 bytes..
+ setJumpBufAlignment(16); // ...and must be 16-byte aligned
+
+ computeRegisterProperties();
+
+ addLegalFPImmediate(APFloat(+0.0));
+ addLegalFPImmediate(APFloat(-0.0));
+ addLegalFPImmediate(APFloat(+1.0));
+ addLegalFPImmediate(APFloat(-1.0));
+}
+
+const char *IA64TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case IA64ISD::GETFD: return "IA64ISD::GETFD";
+ case IA64ISD::BRCALL: return "IA64ISD::BRCALL";
+ case IA64ISD::RET_FLAG: return "IA64ISD::RET_FLAG";
+ }
+}
+
+MVT IA64TargetLowering::getSetCCResultType(MVT VT) const {
+ return MVT::i1;
+}
+
+void IA64TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &ArgValues,
+ DebugLoc dl) {
+ //
+ // add beautiful description of IA64 stack frame format
+ // here (from intel 24535803.pdf most likely)
+ //
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ GP = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+ SP = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+ RP = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+
+ MachineBasicBlock& BB = MF.front();
+
+ unsigned args_int[] = {IA64::r32, IA64::r33, IA64::r34, IA64::r35,
+ IA64::r36, IA64::r37, IA64::r38, IA64::r39};
+
+ unsigned args_FP[] = {IA64::F8, IA64::F9, IA64::F10, IA64::F11,
+ IA64::F12,IA64::F13,IA64::F14, IA64::F15};
+
+ unsigned argVreg[8];
+ unsigned argPreg[8];
+ unsigned argOpc[8];
+
+ unsigned used_FPArgs = 0; // how many FP args have been used so far?
+
+ unsigned ArgOffset = 0;
+ int count = 0;
+
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
+ {
+ SDValue newroot, argt;
+ if(count < 8) { // need to fix this logic? maybe.
+
+ switch (getValueType(I->getType()).getSimpleVT()) {
+ default:
+ assert(0 && "ERROR in LowerArgs: can't lower this type of arg.\n");
+ case MVT::f32:
+ // fixme? (well, will need to for weird FP structy stuff,
+ // see intel ABI docs)
+ case MVT::f64:
+//XXX BuildMI(&BB, IA64::IDEF, 0, args_FP[used_FPArgs]);
+ MF.getRegInfo().addLiveIn(args_FP[used_FPArgs]);
+ // mark this reg as liveIn
+ // floating point args go into f8..f15 as-needed, the increment
+ argVreg[count] = // is below..:
+ MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::f64));
+ // FP args go into f8..f15 as needed: (hence the ++)
+ argPreg[count] = args_FP[used_FPArgs++];
+ argOpc[count] = IA64::FMOV;
+ argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), dl,
+ argVreg[count], MVT::f64);
+ if (I->getType() == Type::FloatTy)
+ argt = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, argt,
+ DAG.getIntPtrConstant(0));
+ break;
+ case MVT::i1: // NOTE: as far as C abi stuff goes,
+ // bools are just boring old ints
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+//XXX BuildMI(&BB, IA64::IDEF, 0, args_int[count]);
+ MF.getRegInfo().addLiveIn(args_int[count]);
+ // mark this register as liveIn
+ argVreg[count] =
+ MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+ argPreg[count] = args_int[count];
+ argOpc[count] = IA64::MOV;
+ argt = newroot =
+ DAG.getCopyFromReg(DAG.getRoot(), dl, argVreg[count], MVT::i64);
+ if ( getValueType(I->getType()) != MVT::i64)
+ argt = DAG.getNode(ISD::TRUNCATE, dl, getValueType(I->getType()),
+ newroot);
+ break;
+ }
+ } else { // more than 8 args go into the frame
+ // Create the frame index object for this incoming parameter...
+ ArgOffset = 16 + 8 * (count - 8);
+ int FI = MFI->CreateFixedObject(8, ArgOffset);
+
+ // Create the SelectionDAG nodes corresponding to a load
+ //from this parameter
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i64);
+ argt = newroot = DAG.getLoad(getValueType(I->getType()), dl,
+ DAG.getEntryNode(), FIN, NULL, 0);
+ }
+ ++count;
+ DAG.setRoot(newroot.getValue(1));
+ ArgValues.push_back(argt);
+ }
+
+
+ // Create a vreg to hold the output of (what will become)
+ // the "alloc" instruction
+ VirtGPR = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+ BuildMI(&BB, dl, TII->get(IA64::PSEUDO_ALLOC), VirtGPR);
+ // we create a PSEUDO_ALLOC (pseudo)instruction for now
+/*
+ BuildMI(&BB, IA64::IDEF, 0, IA64::r1);
+
+ // hmm:
+ BuildMI(&BB, IA64::IDEF, 0, IA64::r12);
+ BuildMI(&BB, IA64::IDEF, 0, IA64::rp);
+ // ..hmm.
+
+ BuildMI(&BB, IA64::MOV, 1, GP).addReg(IA64::r1);
+
+ // hmm:
+ BuildMI(&BB, IA64::MOV, 1, SP).addReg(IA64::r12);
+ BuildMI(&BB, IA64::MOV, 1, RP).addReg(IA64::rp);
+ // ..hmm.
+*/
+
+ unsigned tempOffset=0;
+
+ // if this is a varargs function, we simply lower llvm.va_start by
+ // pointing to the first entry
+ if(F.isVarArg()) {
+ tempOffset=0;
+ VarArgsFrameIndex = MFI->CreateFixedObject(8, tempOffset);
+ }
+
+ // here we actually do the moving of args, and store them to the stack
+ // too if this is a varargs function:
+ for (int i = 0; i < count && i < 8; ++i) {
+ BuildMI(&BB, dl, TII->get(argOpc[i]), argVreg[i]).addReg(argPreg[i]);
+ if(F.isVarArg()) {
+ // if this is a varargs function, we copy the input registers to the stack
+ int FI = MFI->CreateFixedObject(8, tempOffset);
+ tempOffset+=8; //XXX: is it safe to use r22 like this?
+ BuildMI(&BB, dl, TII->get(IA64::MOV), IA64::r22).addFrameIndex(FI);
+ // FIXME: we should use st8.spill here, one day
+ BuildMI(&BB, dl, TII->get(IA64::ST8), IA64::r22).addReg(argPreg[i]);
+ }
+ }
+
+ // Finally, inform the code generator which regs we return values in.
+ // (see the ISD::RET: case in the instruction selector)
+ switch (getValueType(F.getReturnType()).getSimpleVT()) {
+ default: assert(0 && "i have no idea where to return this type!");
+ case MVT::isVoid: break;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ MF.getRegInfo().addLiveOut(IA64::r8);
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ MF.getRegInfo().addLiveOut(IA64::F8);
+ break;
+ }
+}
+
+std::pair<SDValue, SDValue>
+IA64TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
+ bool RetSExt, bool RetZExt, bool isVarArg,
+ bool isInreg, unsigned CallingConv,
+ bool isTailCall, SDValue Callee,
+ ArgListTy &Args, SelectionDAG &DAG,
+ DebugLoc dl) {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ unsigned NumBytes = 16;
+ unsigned outRegsUsed = 0;
+
+ if (Args.size() > 8) {
+ NumBytes += (Args.size() - 8) * 8;
+ outRegsUsed = 8;
+ } else {
+ outRegsUsed = Args.size();
+ }
+
+ // FIXME? this WILL fail if we ever try to pass around an arg that
+ // consumes more than a single output slot (a 'real' double, int128
+ // some sort of aggregate etc.), as we'll underestimate how many 'outX'
+ // registers we use. Hopefully, the assembler will notice.
+ MF.getInfo<IA64FunctionInfo>()->outRegsUsed=
+ std::max(outRegsUsed, MF.getInfo<IA64FunctionInfo>()->outRegsUsed);
+
+ // keep stack frame 16-byte aligned
+ // assert(NumBytes==((NumBytes+15) & ~15) &&
+ // "stack frame not 16-byte aligned!");
+ NumBytes = (NumBytes+15) & ~15;
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SDValue StackPtr;
+ std::vector<SDValue> Stores;
+ std::vector<SDValue> Converts;
+ std::vector<SDValue> RegValuesToPass;
+ unsigned ArgOffset = 16;
+
+ for (unsigned i = 0, e = Args.size(); i != e; ++i)
+ {
+ SDValue Val = Args[i].Node;
+ MVT ObjectVT = Val.getValueType();
+ SDValue ValToStore(0, 0), ValToConvert(0, 0);
+ unsigned ObjSize=8;
+ switch (ObjectVT.getSimpleVT()) {
+ default: assert(0 && "unexpected argument type!");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32: {
+ //promote to 64-bits, sign/zero extending based on type
+ //of the argument
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+ if (Args[i].isSExt)
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (Args[i].isZExt)
+ ExtendKind = ISD::ZERO_EXTEND;
+ Val = DAG.getNode(ExtendKind, dl, MVT::i64, Val);
+ // XXX: fall through
+ }
+ case MVT::i64:
+ //ObjSize = 8;
+ if(RegValuesToPass.size() >= 8) {
+ ValToStore = Val;
+ } else {
+ RegValuesToPass.push_back(Val);
+ }
+ break;
+ case MVT::f32:
+ //promote to 64-bits
+ Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
+ // XXX: fall through
+ case MVT::f64:
+ if(RegValuesToPass.size() >= 8) {
+ ValToStore = Val;
+ } else {
+ RegValuesToPass.push_back(Val);
+ if(1 /* TODO: if(calling external or varadic function)*/ ) {
+ ValToConvert = Val; // additionally pass this FP value as an int
+ }
+ }
+ break;
+ }
+
+ if(ValToStore.getNode()) {
+ if(!StackPtr.getNode()) {
+ StackPtr = DAG.getRegister(IA64::r12, MVT::i64);
+ }
+ SDValue PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, PtrOff);
+ Stores.push_back(DAG.getStore(Chain, dl, ValToStore, PtrOff, NULL, 0));
+ ArgOffset += ObjSize;
+ }
+
+ if(ValToConvert.getNode()) {
+ Converts.push_back(DAG.getNode(IA64ISD::GETFD, dl,
+ MVT::i64, ValToConvert));
+ }
+ }
+
+ // Emit all stores, make sure they occur before any copies into physregs.
+ if (!Stores.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl,
+ MVT::Other, &Stores[0],Stores.size());
+
+ static const unsigned IntArgRegs[] = {
+ IA64::out0, IA64::out1, IA64::out2, IA64::out3,
+ IA64::out4, IA64::out5, IA64::out6, IA64::out7
+ };
+
+ static const unsigned FPArgRegs[] = {
+ IA64::F8, IA64::F9, IA64::F10, IA64::F11,
+ IA64::F12, IA64::F13, IA64::F14, IA64::F15
+ };
+
+ SDValue InFlag;
+
+ // save the current GP, SP and RP : FIXME: do we need to do all 3 always?
+ SDValue GPBeforeCall = DAG.getCopyFromReg(Chain, dl, IA64::r1,
+ MVT::i64, InFlag);
+ Chain = GPBeforeCall.getValue(1);
+ InFlag = Chain.getValue(2);
+ SDValue SPBeforeCall = DAG.getCopyFromReg(Chain, dl, IA64::r12,
+ MVT::i64, InFlag);
+ Chain = SPBeforeCall.getValue(1);
+ InFlag = Chain.getValue(2);
+ SDValue RPBeforeCall = DAG.getCopyFromReg(Chain, dl, IA64::rp,
+ MVT::i64, InFlag);
+ Chain = RPBeforeCall.getValue(1);
+ InFlag = Chain.getValue(2);
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing integer args into regs out[0-7]
+ // mapped 1:1 and the FP args into regs F8-F15 "lazily"
+ // TODO: for performance, we should only copy FP args into int regs when we
+ // know this is required (i.e. for varardic or external (unknown) functions)
+
+ // first to the FP->(integer representation) conversions, these are
+ // flagged for now, but shouldn't have to be (TODO)
+ unsigned seenConverts = 0;
+ for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) {
+ if(RegValuesToPass[i].getValueType().isFloatingPoint()) {
+ Chain = DAG.getCopyToReg(Chain, dl, IntArgRegs[i],
+ Converts[seenConverts++], InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ }
+
+ // next copy args into the usual places, these are flagged
+ unsigned usedFPArgs = 0;
+ for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl,
+ RegValuesToPass[i].getValueType().isInteger() ?
+ IntArgRegs[i] : FPArgRegs[usedFPArgs++], RegValuesToPass[i], InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+/*
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i64);
+ }
+*/
+
+ std::vector<MVT> NodeTys;
+ std::vector<SDValue> CallOperands;
+ NodeTys.push_back(MVT::Other); // Returns a chain
+ NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
+ CallOperands.push_back(Chain);
+ CallOperands.push_back(Callee);
+
+ // emit the call itself
+ if (InFlag.getNode())
+ CallOperands.push_back(InFlag);
+ else
+ assert(0 && "this should never happen!\n");
+
+ // to make way for a hack:
+ Chain = DAG.getNode(IA64ISD::BRCALL, dl, NodeTys,
+ &CallOperands[0], CallOperands.size());
+ InFlag = Chain.getValue(1);
+
+ // restore the GP, SP and RP after the call
+ Chain = DAG.getCopyToReg(Chain, dl, IA64::r1, GPBeforeCall, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, IA64::r12, SPBeforeCall, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, IA64::rp, RPBeforeCall, InFlag);
+ InFlag = Chain.getValue(1);
+
+ std::vector<MVT> RetVals;
+ RetVals.push_back(MVT::Other);
+ RetVals.push_back(MVT::Flag);
+
+ MVT RetTyVT = getValueType(RetTy);
+ SDValue RetVal;
+ if (RetTyVT != MVT::isVoid) {
+ switch (RetTyVT.getSimpleVT()) {
+ default: assert(0 && "Unknown value type to return!");
+ case MVT::i1: { // bools are just like other integers (returned in r8)
+ // we *could* fall through to the truncate below, but this saves a
+ // few redundant predicate ops
+ SDValue boolInR8 = DAG.getCopyFromReg(Chain, dl, IA64::r8,
+ MVT::i64,InFlag);
+ InFlag = boolInR8.getValue(2);
+ Chain = boolInR8.getValue(1);
+ SDValue zeroReg = DAG.getCopyFromReg(Chain, dl, IA64::r0,
+ MVT::i64, InFlag);
+ InFlag = zeroReg.getValue(2);
+ Chain = zeroReg.getValue(1);
+
+ RetVal = DAG.getSetCC(dl, MVT::i1, boolInR8, zeroReg, ISD::SETNE);
+ break;
+ }
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ RetVal = DAG.getCopyFromReg(Chain, dl, IA64::r8, MVT::i64, InFlag);
+ Chain = RetVal.getValue(1);
+
+ // keep track of whether it is sign or zero extended (todo: bools?)
+/* XXX
+ RetVal = DAG.getNode(RetTy->isSigned() ? ISD::AssertSext :ISD::AssertZext,
+ dl, MVT::i64, RetVal, DAG.getValueType(RetTyVT));
+*/
+ RetVal = DAG.getNode(ISD::TRUNCATE, dl, RetTyVT, RetVal);
+ break;
+ case MVT::i64:
+ RetVal = DAG.getCopyFromReg(Chain, dl, IA64::r8, MVT::i64, InFlag);
+ Chain = RetVal.getValue(1);
+ InFlag = RetVal.getValue(2); // XXX dead
+ break;
+ case MVT::f32:
+ RetVal = DAG.getCopyFromReg(Chain, dl, IA64::F8, MVT::f64, InFlag);
+ Chain = RetVal.getValue(1);
+ RetVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, RetVal,
+ DAG.getIntPtrConstant(0));
+ break;
+ case MVT::f64:
+ RetVal = DAG.getCopyFromReg(Chain, dl, IA64::F8, MVT::f64, InFlag);
+ Chain = RetVal.getValue(1);
+ InFlag = RetVal.getValue(2); // XXX dead
+ break;
+ }
+ }
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), SDValue());
+ return std::make_pair(RetVal, Chain);
+}
+
+SDValue IA64TargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Should not custom lower this!");
+ case ISD::GlobalTLSAddress:
+ assert(0 && "TLS not implemented for IA64.");
+ case ISD::RET: {
+ SDValue AR_PFSVal, Copy;
+
+ switch(Op.getNumOperands()) {
+ default:
+ assert(0 && "Do not know how to return this many arguments!");
+ abort();
+ case 1:
+ AR_PFSVal = DAG.getCopyFromReg(Op.getOperand(0), dl, VirtGPR, MVT::i64);
+ AR_PFSVal = DAG.getCopyToReg(AR_PFSVal.getValue(1), dl, IA64::AR_PFS,
+ AR_PFSVal);
+ return DAG.getNode(IA64ISD::RET_FLAG, dl, MVT::Other, AR_PFSVal);
+ case 3: {
+ // Copy the result into the output register & restore ar.pfs
+ MVT ArgVT = Op.getOperand(1).getValueType();
+ unsigned ArgReg = ArgVT.isInteger() ? IA64::r8 : IA64::F8;
+
+ AR_PFSVal = DAG.getCopyFromReg(Op.getOperand(0), dl, VirtGPR, MVT::i64);
+ Copy = DAG.getCopyToReg(AR_PFSVal.getValue(1), dl, ArgReg,
+ Op.getOperand(1), SDValue());
+ AR_PFSVal = DAG.getCopyToReg(Copy.getValue(0), dl,
+ IA64::AR_PFS, AR_PFSVal, Copy.getValue(1));
+ return DAG.getNode(IA64ISD::RET_FLAG, dl, MVT::Other,
+ AR_PFSVal, AR_PFSVal.getValue(1));
+ }
+ }
+ return SDValue();
+ }
+ case ISD::VAARG: {
+ MVT VT = getPointerTy();
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ SDValue VAList = DAG.getLoad(VT, dl, Op.getOperand(0), Op.getOperand(1),
+ SV, 0);
+ // Increment the pointer, VAList, to the next vaarg
+ SDValue VAIncr = DAG.getNode(ISD::ADD, dl, VT, VAList,
+ DAG.getConstant(VT.getSizeInBits()/8,
+ VT));
+ // Store the incremented VAList to the legalized pointer
+ VAIncr = DAG.getStore(VAList.getValue(1), dl, VAIncr,
+ Op.getOperand(1), SV, 0);
+ // Load the actual argument out of the pointer VAList
+ return DAG.getLoad(Op.getValueType(), dl, VAIncr, VAList, NULL, 0);
+ }
+ case ISD::VASTART: {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i64);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
+ }
+ // Frame & Return address. Currently unimplemented
+ case ISD::RETURNADDR: break;
+ case ISD::FRAMEADDR: break;
+ }
+ return SDValue();
+}
diff --git a/lib/Target/IA64/IA64ISelLowering.h b/lib/Target/IA64/IA64ISelLowering.h
new file mode 100644
index 0000000..edf7eb8
--- /dev/null
+++ b/lib/Target/IA64/IA64ISelLowering.h
@@ -0,0 +1,76 @@
+//===-- IA64ISelLowering.h - IA64 DAG Lowering Interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that IA64 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_IA64_IA64ISELLOWERING_H
+#define LLVM_TARGET_IA64_IA64ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "IA64.h"
+
+namespace llvm {
+ namespace IA64ISD {
+ enum NodeType {
+ // Start the numbering where the builting ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// GETFD - the getf.d instruction takes a floating point operand and
+ /// returns its 64-bit memory representation as an i64
+ GETFD,
+
+ // TODO: explain this hack
+ BRCALL,
+
+ // RET_FLAG - Return with a flag operand
+ RET_FLAG
+ };
+ }
+
+ class IA64TargetLowering : public TargetLowering {
+ int VarArgsFrameIndex; // FrameIndex for start of varargs area.
+ //int ReturnAddrIndex; // FrameIndex for return slot.
+ unsigned GP, SP, RP; // FIXME - clean this mess up
+ public:
+ explicit IA64TargetLowering(TargetMachine &TM);
+
+ unsigned VirtGPR; // this is public so it can be accessed in the selector
+ // for ISD::RET. add an accessor instead? FIXME
+ const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType: return ISD::SETCC's result type.
+ virtual MVT getSetCCResultType(MVT VT) const;
+
+ /// LowerArguments - This hook must be implemented to indicate how we should
+ /// lower the arguments for the specified function, into the specified DAG.
+ virtual void LowerArguments(Function &F, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &ArgValues,
+ DebugLoc dl);
+
+ /// LowerCallTo - This hook lowers an abstract call to a function into an
+ /// actual call.
+ virtual std::pair<SDValue, SDValue>
+ LowerCallTo(SDValue Chain, const Type *RetTy,
+ bool RetSExt, bool RetZExt, bool isVarArg, bool isInreg,
+ unsigned CC, bool isTailCall,
+ SDValue Callee, ArgListTy &Args, SelectionDAG &DAG,
+ DebugLoc dl);
+
+ /// LowerOperation - for custom lowering specific ops
+ /// (currently, only "ret void")
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+ };
+}
+
+#endif // LLVM_TARGET_IA64_IA64ISELLOWERING_H
diff --git a/lib/Target/IA64/IA64InstrBuilder.h b/lib/Target/IA64/IA64InstrBuilder.h
new file mode 100644
index 0000000..a5d4dca
--- /dev/null
+++ b/lib/Target/IA64/IA64InstrBuilder.h
@@ -0,0 +1,40 @@
+//===-- IA64PCInstrBuilder.h - Aids for building IA64 insts -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to simplify generating frame and constant pool
+// references.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef IA64_INSTRBUILDER_H
+#define IA64_INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+inline const MachineInstrBuilder&
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
+ bool mem = true) {
+ if (mem)
+ return MIB.addImm(Offset).addFrameIndex(FI);
+ else
+ return MIB.addFrameIndex(FI).addImm(Offset);
+}
+
+} // End llvm namespace
+
+#endif
+
diff --git a/lib/Target/IA64/IA64InstrFormats.td b/lib/Target/IA64/IA64InstrFormats.td
new file mode 100644
index 0000000..c465880
--- /dev/null
+++ b/lib/Target/IA64/IA64InstrFormats.td
@@ -0,0 +1,80 @@
+//===- IA64InstrFormats.td - IA64 Instruction Formats --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// - Warning: the stuff in here isn't really being used, so is mostly
+// junk. It'll get fixed as the JIT gets built.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+class InstIA64<bits<4> op, dag OOL, dag IOL, string asmstr> : Instruction {
+ // IA64 instruction baseline
+ field bits<41> Inst;
+ let Namespace = "IA64";
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+
+ let Inst{40-37} = op;
+}
+
+//"Each Itanium instruction is categorized into one of six types."
+//We should have:
+// A, I, M, F, B, L+X
+
+class AForm<bits<4> opcode, bits<6> qpReg, dag OOL, dag IOL, string asmstr> :
+ InstIA64<opcode, OOL, IOL, asmstr> {
+
+ let Inst{5-0} = qpReg;
+}
+
+class AForm_DAG<bits<4> opcode, bits<6> qpReg, dag OOL, dag IOL, string asmstr,
+ list<dag> pattern> :
+ InstIA64<opcode, OOL, IOL, asmstr> {
+
+ let Pattern = pattern;
+ let Inst{5-0} = qpReg;
+}
+
+let isBranch = 1, isTerminator = 1 in
+class BForm<bits<4> opcode, bits<6> x6, bits<3> btype, dag OOL, dag IOL, string asmstr> :
+ InstIA64<opcode, OOL, IOL, asmstr> {
+
+ let Inst{32-27} = x6;
+ let Inst{8-6} = btype;
+}
+
+class MForm<bits<4> opcode, bits<6> x6, dag OOL, dag IOL, string asmstr> :
+ InstIA64<opcode, OOL, IOL, asmstr> {
+ bits<7> Ra;
+ bits<7> Rb;
+ bits<16> disp;
+
+ let Inst{35-30} = x6;
+// let Inst{20-16} = Rb;
+ let Inst{15-0} = disp;
+}
+
+class RawForm<bits<4> opcode, bits<26> rest, dag OOL, dag IOL, string asmstr> :
+ InstIA64<opcode, OOL, IOL, asmstr> {
+ let Inst{25-0} = rest;
+}
+
+// Pseudo instructions.
+class PseudoInstIA64<dag OOL, dag IOL, string nm> : InstIA64<0, OOL, IOL, nm> {
+}
+
+class PseudoInstIA64_DAG<dag OOL, dag IOL, string nm, list<dag> pattern>
+ : InstIA64<0, OOL, IOL, nm> {
+ let Pattern = pattern;
+}
+
diff --git a/lib/Target/IA64/IA64InstrInfo.cpp b/lib/Target/IA64/IA64InstrInfo.cpp
new file mode 100644
index 0000000..5f89d4f
--- /dev/null
+++ b/lib/Target/IA64/IA64InstrInfo.cpp
@@ -0,0 +1,193 @@
+//===- IA64InstrInfo.cpp - IA64 Instruction Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the IA64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IA64InstrInfo.h"
+#include "IA64.h"
+#include "IA64InstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/ADT/SmallVector.h"
+#include "IA64GenInstrInfo.inc"
+using namespace llvm;
+
+IA64InstrInfo::IA64InstrInfo()
+ : TargetInstrInfoImpl(IA64Insts, sizeof(IA64Insts)/sizeof(IA64Insts[0])),
+ RI(*this) {
+}
+
+
+bool IA64InstrInfo::isMoveInstr(const MachineInstr& MI,
+ unsigned& sourceReg,
+ unsigned& destReg,
+ unsigned& SrcSR, unsigned& DstSR) const {
+ SrcSR = DstSR = 0; // No sub-registers.
+
+ unsigned oc = MI.getOpcode();
+ if (oc == IA64::MOV || oc == IA64::FMOV) {
+ // TODO: this doesn't detect predicate moves
+ assert(MI.getNumOperands() >= 2 &&
+ /* MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() && */
+ "invalid register-register move instruction");
+ if (MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg()) {
+ // if both operands of the MOV/FMOV are registers, then
+ // yes, this is a move instruction
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ }
+ return false; // we don't consider e.g. %regN = MOV <FrameIndex #x> a
+ // move instruction
+}
+
+unsigned
+IA64InstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond)const {
+ // FIXME this should probably have a DebugLoc argument
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ // Can only insert uncond branches so far.
+ assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
+ BuildMI(&MBB, dl, get(IA64::BRL_NOTCALL)).addMBB(TBB);
+ return 1;
+}
+
+bool IA64InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const {
+ if (DestRC != SrcRC) {
+ // Not yet supported!
+ return false;
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if(DestRC == IA64::PRRegisterClass ) // if a bool, we use pseudocode
+ // (SrcReg) DestReg = cmp.eq.unc(r0, r0)
+ BuildMI(MBB, MI, DL, get(IA64::PCMPEQUNC), DestReg)
+ .addReg(IA64::r0).addReg(IA64::r0).addReg(SrcReg);
+ else // otherwise, MOV works (for both gen. regs and FP regs)
+ BuildMI(MBB, MI, DL, get(IA64::MOV), DestReg).addReg(SrcReg);
+
+ return true;
+}
+
+void IA64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill,
+ int FrameIdx,
+ const TargetRegisterClass *RC) const{
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if (RC == IA64::FPRegisterClass) {
+ BuildMI(MBB, MI, DL, get(IA64::STF_SPILL)).addFrameIndex(FrameIdx)
+ .addReg(SrcReg, getKillRegState(isKill));
+ } else if (RC == IA64::GRRegisterClass) {
+ BuildMI(MBB, MI, DL, get(IA64::ST8)).addFrameIndex(FrameIdx)
+ .addReg(SrcReg, getKillRegState(isKill));
+ } else if (RC == IA64::PRRegisterClass) {
+ /* we use IA64::r2 as a temporary register for doing this hackery. */
+ // first we load 0:
+ BuildMI(MBB, MI, DL, get(IA64::MOV), IA64::r2).addReg(IA64::r0);
+ // then conditionally add 1:
+ BuildMI(MBB, MI, DL, get(IA64::CADDIMM22), IA64::r2).addReg(IA64::r2)
+ .addImm(1).addReg(SrcReg, getKillRegState(isKill));
+ // and then store it to the stack
+ BuildMI(MBB, MI, DL, get(IA64::ST8))
+ .addFrameIndex(FrameIdx)
+ .addReg(IA64::r2);
+ } else assert(0 &&
+ "sorry, I don't know how to store this sort of reg in the stack\n");
+}
+
+void IA64InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+ bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ unsigned Opc = 0;
+ if (RC == IA64::FPRegisterClass) {
+ Opc = IA64::STF8;
+ } else if (RC == IA64::GRRegisterClass) {
+ Opc = IA64::ST8;
+ } else if (RC == IA64::PRRegisterClass) {
+ Opc = IA64::ST1;
+ } else {
+ assert(0 &&
+ "sorry, I don't know how to store this sort of reg\n");
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ MIB.addReg(SrcReg, getKillRegState(isKill));
+ NewMIs.push_back(MIB);
+ return;
+
+}
+
+void IA64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC)const{
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if (RC == IA64::FPRegisterClass) {
+ BuildMI(MBB, MI, DL, get(IA64::LDF_FILL), DestReg).addFrameIndex(FrameIdx);
+ } else if (RC == IA64::GRRegisterClass) {
+ BuildMI(MBB, MI, DL, get(IA64::LD8), DestReg).addFrameIndex(FrameIdx);
+ } else if (RC == IA64::PRRegisterClass) {
+ // first we load a byte from the stack into r2, our 'predicate hackery'
+ // scratch reg
+ BuildMI(MBB, MI, DL, get(IA64::LD8), IA64::r2).addFrameIndex(FrameIdx);
+ // then we compare it to zero. If it _is_ zero, compare-not-equal to
+ // r0 gives us 0, which is what we want, so that's nice.
+ BuildMI(MBB, MI, DL, get(IA64::CMPNE), DestReg)
+ .addReg(IA64::r2)
+ .addReg(IA64::r0);
+ } else {
+ assert(0 &&
+ "sorry, I don't know how to load this sort of reg from the stack\n");
+ }
+}
+
+void IA64InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ unsigned Opc = 0;
+ if (RC == IA64::FPRegisterClass) {
+ Opc = IA64::LDF8;
+ } else if (RC == IA64::GRRegisterClass) {
+ Opc = IA64::LD8;
+ } else if (RC == IA64::PRRegisterClass) {
+ Opc = IA64::LD1;
+ } else {
+ assert(0 &&
+ "sorry, I don't know how to load this sort of reg\n");
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+ return;
+}
diff --git a/lib/Target/IA64/IA64InstrInfo.h b/lib/Target/IA64/IA64InstrInfo.h
new file mode 100644
index 0000000..79236c2
--- /dev/null
+++ b/lib/Target/IA64/IA64InstrInfo.h
@@ -0,0 +1,70 @@
+//===- IA64InstrInfo.h - IA64 Instruction Information ----------*- C++ -*- ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the IA64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef IA64INSTRUCTIONINFO_H
+#define IA64INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "IA64RegisterInfo.h"
+
+namespace llvm {
+
+class IA64InstrInfo : public TargetInstrInfoImpl {
+ const IA64RegisterInfo RI;
+public:
+ IA64InstrInfo();
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const IA64RegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// Return true if the instruction is a register to register move and return
+ /// the source and dest operands and their sub-register indices by reference.
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+};
+
+} // End llvm namespace
+
+#endif
+
diff --git a/lib/Target/IA64/IA64InstrInfo.td b/lib/Target/IA64/IA64InstrInfo.td
new file mode 100644
index 0000000..2ab9897
--- /dev/null
+++ b/lib/Target/IA64/IA64InstrInfo.td
@@ -0,0 +1,751 @@
+//===- IA64InstrInfo.td - Describe the IA64 Instruction Set -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the IA64 instruction set, defining the instructions, and
+// properties of the instructions which are needed for code generation, machine
+// code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+include "IA64InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// IA-64 specific DAG Nodes.
+//
+
+def IA64getfd : SDNode<"IA64ISD::GETFD", SDTFPToIntOp, []>;
+
+def retflag : SDNode<"IA64ISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+//===---------
+// Instruction types
+
+class isA { bit A=1; } // I or M unit
+class isM { bit M=1; } // M unit
+class isI { bit I=1; } // I unit
+class isB { bit B=1; } // B unit
+class isF { bit F=1; } // F unit
+class isLX { bit LX=1; } // I/B
+
+//===---------
+
+def u2imm : Operand<i8>;
+def u6imm : Operand<i8>;
+def s8imm : Operand<i8> {
+ let PrintMethod = "printS8ImmOperand";
+}
+def s14imm : Operand<i64> {
+ let PrintMethod = "printS14ImmOperand";
+}
+def s22imm : Operand<i64> {
+ let PrintMethod = "printS22ImmOperand";
+}
+def u64imm : Operand<i64> {
+ let PrintMethod = "printU64ImmOperand";
+}
+def s64imm : Operand<i64> {
+ let PrintMethod = "printS64ImmOperand";
+}
+
+let PrintMethod = "printGlobalOperand" in
+ def globaladdress : Operand<i64>;
+
+// the asmprinter needs to know about calls
+let PrintMethod = "printCallOperand" in
+ def calltarget : Operand<i64>;
+
+/* new daggy action!!! */
+
+def is32ones : PatLeaf<(i64 imm), [{
+ // is32ones predicate - True if the immediate is 0x00000000FFFFFFFF
+ // Used to create ZXT4s appropriately
+ uint64_t v = (uint64_t)N->getZExtValue();
+ return (v == 0x00000000FFFFFFFFLL);
+}]>;
+
+// isMIXable predicates - True if the immediate is
+// 0xFF00FF00FF00FF00, 0x00FF00FF00FF00FF
+// etc, through 0x00000000FFFFFFFF
+// Used to test for the suitability of mix*
+def isMIX1Lable: PatLeaf<(i64 imm), [{
+ return((uint64_t)N->getZExtValue()==0xFF00FF00FF00FF00LL);
+}]>;
+def isMIX1Rable: PatLeaf<(i64 imm), [{
+ return((uint64_t)N->getZExtValue()==0x00FF00FF00FF00FFLL);
+}]>;
+def isMIX2Lable: PatLeaf<(i64 imm), [{
+ return((uint64_t)N->getZExtValue()==0xFFFF0000FFFF0000LL);
+}]>;
+def isMIX2Rable: PatLeaf<(i64 imm), [{
+ return((uint64_t)N->getZExtValue()==0x0000FFFF0000FFFFLL);
+}]>;
+def isMIX4Lable: PatLeaf<(i64 imm), [{
+ return((uint64_t)N->getZExtValue()==0xFFFFFFFF00000000LL);
+}]>;
+def isMIX4Rable: PatLeaf<(i64 imm), [{
+ return((uint64_t)N->getZExtValue()==0x00000000FFFFFFFFLL);
+}]>;
+
+def isSHLADDimm: PatLeaf<(i64 imm), [{
+ // isSHLADDimm predicate - True if the immediate is exactly 1, 2, 3 or 4
+ // - 0 is *not* okay.
+ // Used to create shladd instructions appropriately
+ int64_t v = (int64_t)N->getZExtValue();
+ return (v >= 1 && v <= 4);
+}]>;
+
+def immSExt14 : PatLeaf<(i64 imm), [{
+ // immSExt14 predicate - True if the immediate fits in a 14-bit sign extended
+ // field. Used by instructions like 'adds'.
+ int64_t v = (int64_t)N->getZExtValue();
+ return (v <= 8191 && v >= -8192);
+}]>;
+
+// imm64 predicate - True if the immediate fits in a 64-bit
+// field - i.e., true. used to keep movl happy
+def imm64 : PatLeaf<(i64 imm)>;
+
+def ADD : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "add $dst = $src1, $src2",
+ [(set GR:$dst, (add GR:$src1, GR:$src2))]>, isA;
+
+def ADD1 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "add $dst = $src1, $src2, 1",
+ [(set GR:$dst, (add (add GR:$src1, GR:$src2), 1))]>, isA;
+
+def ADDS : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, s14imm:$imm),
+ "adds $dst = $imm, $src1",
+ [(set GR:$dst, (add GR:$src1, immSExt14:$imm))]>, isA;
+
+def MOVL : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins s64imm:$imm),
+ "movl $dst = $imm",
+ [(set GR:$dst, imm64:$imm)]>, isLX;
+
+def ADDL_GA : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, globaladdress:$imm),
+ "addl $dst = $imm, $src1",
+ []>, isA;
+
+// hmm
+def ADDL_EA : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, calltarget:$imm),
+ "addl $dst = $imm, $src1",
+ []>, isA;
+
+def SUB : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "sub $dst = $src1, $src2",
+ [(set GR:$dst, (sub GR:$src1, GR:$src2))]>, isA;
+
+def SUB1 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "sub $dst = $src1, $src2, 1",
+ [(set GR:$dst, (add (sub GR: $src1, GR:$src2), -1))]>, isA;
+
+let isTwoAddress = 1 in {
+def TPCADDIMM22 : AForm<0x03, 0x0b,
+ (outs GR:$dst), (ins GR:$src1, s22imm:$imm, PR:$qp),
+ "($qp) add $dst = $imm, $dst">, isA;
+def TPCADDS : AForm_DAG<0x03, 0x0b,
+ (outs GR:$dst), (ins GR:$src1, s14imm:$imm, PR:$qp),
+ "($qp) adds $dst = $imm, $dst",
+ []>, isA;
+def TPCMPIMM8NE : AForm<0x03, 0x0b,
+ (outs PR:$dst), (ins PR:$src1, s22imm:$imm, GR:$src2, PR:$qp),
+ "($qp) cmp.ne $dst , p0 = $imm, $src2">, isA;
+}
+
+// zero extend a bool (predicate reg) into an integer reg
+def ZXTb : Pat<(zext PR:$src),
+ (TPCADDIMM22 (ADDS r0, 0), 1, PR:$src)>;
+def AXTb : Pat<(anyext PR:$src),
+ (TPCADDIMM22 (ADDS r0, 0), 1, PR:$src)>;
+
+// normal sign/zero-extends
+def SXT1 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src), "sxt1 $dst = $src",
+ [(set GR:$dst, (sext_inreg GR:$src, i8))]>, isI;
+def ZXT1 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src), "zxt1 $dst = $src",
+ [(set GR:$dst, (and GR:$src, 255))]>, isI;
+def SXT2 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src), "sxt2 $dst = $src",
+ [(set GR:$dst, (sext_inreg GR:$src, i16))]>, isI;
+def ZXT2 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src), "zxt2 $dst = $src",
+ [(set GR:$dst, (and GR:$src, 65535))]>, isI;
+def SXT4 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src), "sxt4 $dst = $src",
+ [(set GR:$dst, (sext_inreg GR:$src, i32))]>, isI;
+def ZXT4 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src), "zxt4 $dst = $src",
+ [(set GR:$dst, (and GR:$src, is32ones))]>, isI;
+
+// fixme: shrs vs shru?
+def MIX1L : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "mix1.l $dst = $src1, $src2",
+ [(set GR:$dst, (or (and GR:$src1, isMIX1Lable),
+ (and (srl GR:$src2, (i64 8)), isMIX1Lable)))]>, isI;
+
+def MIX2L : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "mix2.l $dst = $src1, $src2",
+ [(set GR:$dst, (or (and GR:$src1, isMIX2Lable),
+ (and (srl GR:$src2, (i64 16)), isMIX2Lable)))]>, isI;
+
+def MIX4L : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "mix4.l $dst = $src1, $src2",
+ [(set GR:$dst, (or (and GR:$src1, isMIX4Lable),
+ (and (srl GR:$src2, (i64 32)), isMIX4Lable)))]>, isI;
+
+def MIX1R : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "mix1.r $dst = $src1, $src2",
+ [(set GR:$dst, (or (and (shl GR:$src1, (i64 8)), isMIX1Rable),
+ (and GR:$src2, isMIX1Rable)))]>, isI;
+
+def MIX2R : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "mix2.r $dst = $src1, $src2",
+ [(set GR:$dst, (or (and (shl GR:$src1, (i64 16)), isMIX2Rable),
+ (and GR:$src2, isMIX2Rable)))]>, isI;
+
+def MIX4R : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "mix4.r $dst = $src1, $src2",
+ [(set GR:$dst, (or (and (shl GR:$src1, (i64 32)), isMIX4Rable),
+ (and GR:$src2, isMIX4Rable)))]>, isI;
+
+def GETFSIGD : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins FP:$src),
+ "getf.sig $dst = $src",
+ []>, isM;
+
+def SETFSIGD : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins GR:$src),
+ "setf.sig $dst = $src",
+ []>, isM;
+
+def XMALD : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3),
+ "xma.l $dst = $src1, $src2, $src3",
+ []>, isF;
+def XMAHD : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3),
+ "xma.h $dst = $src1, $src2, $src3",
+ []>, isF;
+def XMAHUD : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3),
+ "xma.hu $dst = $src1, $src2, $src3",
+ []>, isF;
+
+// pseudocode for integer multiplication
+def : Pat<(mul GR:$src1, GR:$src2),
+ (GETFSIGD (XMALD (SETFSIGD GR:$src1), (SETFSIGD GR:$src2), F0))>;
+def : Pat<(mulhs GR:$src1, GR:$src2),
+ (GETFSIGD (XMAHD (SETFSIGD GR:$src1), (SETFSIGD GR:$src2), F0))>;
+def : Pat<(mulhu GR:$src1, GR:$src2),
+ (GETFSIGD (XMAHUD (SETFSIGD GR:$src1), (SETFSIGD GR:$src2), F0))>;
+
+// TODO: addp4 (addp4 dst = src, r0 is a 32-bit add)
+// has imm form, too
+
+// def ADDS : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, s14imm:$imm),
+// "adds $dst = $imm, $src1">;
+
+def AND : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "and $dst = $src1, $src2",
+ [(set GR:$dst, (and GR:$src1, GR:$src2))]>, isA;
+def ANDCM : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "andcm $dst = $src1, $src2",
+ [(set GR:$dst, (and GR:$src1, (not GR:$src2)))]>, isA;
+// TODO: and/andcm/or/xor/add/sub/shift immediate forms
+def OR : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "or $dst = $src1, $src2",
+ [(set GR:$dst, (or GR:$src1, GR:$src2))]>, isA;
+
+def pOR : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2, PR:$qp),
+ "($qp) or $dst = $src1, $src2">, isA;
+
+// the following are all a bit unfortunate: we throw away the complement
+// of the compare!
+def CMPEQ : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2),
+ "cmp.eq $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (seteq GR:$src1, GR:$src2))]>, isA;
+def CMPGT : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2),
+ "cmp.gt $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setgt GR:$src1, GR:$src2))]>, isA;
+def CMPGE : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2),
+ "cmp.ge $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setge GR:$src1, GR:$src2))]>, isA;
+def CMPLT : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2),
+ "cmp.lt $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setlt GR:$src1, GR:$src2))]>, isA;
+def CMPLE : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2),
+ "cmp.le $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setle GR:$src1, GR:$src2))]>, isA;
+def CMPNE : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2),
+ "cmp.ne $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setne GR:$src1, GR:$src2))]>, isA;
+def CMPLTU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2),
+ "cmp.ltu $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setult GR:$src1, GR:$src2))]>, isA;
+def CMPGTU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2),
+ "cmp.gtu $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setugt GR:$src1, GR:$src2))]>, isA;
+def CMPLEU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2),
+ "cmp.leu $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setule GR:$src1, GR:$src2))]>, isA;
+def CMPGEU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2),
+ "cmp.geu $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setuge GR:$src1, GR:$src2))]>, isA;
+
+// and we do the whole thing again for FP compares!
+def FCMPEQ : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2),
+ "fcmp.eq $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (seteq FP:$src1, FP:$src2))]>, isF;
+def FCMPGT : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2),
+ "fcmp.gt $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setgt FP:$src1, FP:$src2))]>, isF;
+def FCMPGE : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2),
+ "fcmp.ge $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setge FP:$src1, FP:$src2))]>, isF;
+def FCMPLT : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2),
+ "fcmp.lt $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setlt FP:$src1, FP:$src2))]>, isF;
+def FCMPLE : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2),
+ "fcmp.le $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setle FP:$src1, FP:$src2))]>, isF;
+def FCMPNE : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2),
+ "fcmp.neq $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setne FP:$src1, FP:$src2))]>, isF;
+def FCMPLTU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2),
+ "fcmp.lt $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setult FP:$src1, FP:$src2))]>, isF;
+def FCMPGTU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2),
+ "fcmp.gt $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setugt FP:$src1, FP:$src2))]>, isF;
+def FCMPLEU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2),
+ "fcmp.le $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setule FP:$src1, FP:$src2))]>, isF;
+def FCMPGEU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2),
+ "fcmp.ge $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setuge FP:$src1, FP:$src2))]>, isF;
+
+def PCMPEQUNCR0R0 : AForm<0x03, 0x0b, (outs PR:$dst), (ins PR:$qp),
+ "($qp) cmp.eq.unc $dst, p0 = r0, r0">, isA;
+
+def : Pat<(trunc GR:$src), // truncate i64 to i1
+ (CMPNE GR:$src, r0)>; // $src!=0? If so, PR:$dst=true
+
+let isTwoAddress=1 in {
+ def TPCMPEQR0R0 : AForm<0x03, 0x0b, (outs PR:$dst), (ins PR:$bogus, PR:$qp),
+ "($qp) cmp.eq $dst, p0 = r0, r0">, isA;
+ def TPCMPNER0R0 : AForm<0x03, 0x0b, (outs PR:$dst), (ins PR:$bogus, PR:$qp),
+ "($qp) cmp.ne $dst, p0 = r0, r0">, isA;
+}
+
+/* our pseudocode for OR on predicates is:
+pC = pA OR pB
+-------------
+(pA) cmp.eq.unc pC,p0 = r0,r0 // pC = pA
+ ;;
+(pB) cmp.eq pC,p0 = r0,r0 // if (pB) pC = 1 */
+
+def bOR : Pat<(or PR:$src1, PR:$src2),
+ (TPCMPEQR0R0 (PCMPEQUNCR0R0 PR:$src1), PR:$src2)>;
+
+/* our pseudocode for AND on predicates is:
+ *
+(pA) cmp.eq.unc pC,p0 = r0,r0 // pC = pA
+ cmp.eq pTemp,p0 = r0,r0 // pTemp = NOT pB
+ ;;
+(pB) cmp.ne pTemp,p0 = r0,r0
+ ;;
+(pTemp)cmp.ne pC,p0 = r0,r0 // if (NOT pB) pC = 0 */
+
+def bAND : Pat<(and PR:$src1, PR:$src2),
+ ( TPCMPNER0R0 (PCMPEQUNCR0R0 PR:$src1),
+ (TPCMPNER0R0 (CMPEQ r0, r0), PR:$src2) )>;
+
+/* one possible routine for XOR on predicates is:
+
+ // Compute px = py ^ pz
+ // using sum of products: px = (py & !pz) | (pz & !py)
+ // Uses 5 instructions in 3 cycles.
+ // cycle 1
+(pz) cmp.eq.unc px = r0, r0 // px = pz
+(py) cmp.eq.unc pt = r0, r0 // pt = py
+ ;;
+ // cycle 2
+(pt) cmp.ne.and px = r0, r0 // px = px & !pt (px = pz & !pt)
+(pz) cmp.ne.and pt = r0, r0 // pt = pt & !pz
+ ;;
+ } { .mmi
+ // cycle 3
+(pt) cmp.eq.or px = r0, r0 // px = px | pt
+
+*** Another, which we use here, requires one scratch GR. it is:
+
+ mov rt = 0 // initialize rt off critical path
+ ;;
+
+ // cycle 1
+(pz) cmp.eq.unc px = r0, r0 // px = pz
+(pz) mov rt = 1 // rt = pz
+ ;;
+ // cycle 2
+(py) cmp.ne px = 1, rt // if (py) px = !pz
+
+.. these routines kindly provided by Jim Hull
+*/
+
+def bXOR : Pat<(xor PR:$src1, PR:$src2),
+ (TPCMPIMM8NE (PCMPEQUNCR0R0 PR:$src2), 1,
+ (TPCADDS (ADDS r0, 0), 1, PR:$src2),
+ PR:$src1)>;
+
+def XOR : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "xor $dst = $src1, $src2",
+ [(set GR:$dst, (xor GR:$src1, GR:$src2))]>, isA;
+
+def SHLADD: AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1,s64imm:$imm,GR:$src2),
+ "shladd $dst = $src1, $imm, $src2",
+ [(set GR:$dst, (add GR:$src2, (shl GR:$src1, isSHLADDimm:$imm)))]>, isA;
+
+def SHL : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "shl $dst = $src1, $src2",
+ [(set GR:$dst, (shl GR:$src1, GR:$src2))]>, isI;
+
+def SHRU : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "shr.u $dst = $src1, $src2",
+ [(set GR:$dst, (srl GR:$src1, GR:$src2))]>, isI;
+
+def SHRS : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "shr $dst = $src1, $src2",
+ [(set GR:$dst, (sra GR:$src1, GR:$src2))]>, isI;
+
+def MOV : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src), "mov $dst = $src">, isA;
+def FMOV : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "mov $dst = $src">, isF; // XXX: there _is_ no fmov
+def PMOV : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src, PR:$qp),
+ "($qp) mov $dst = $src">, isA;
+
+def SPILL_ALL_PREDICATES_TO_GR : AForm<0x03, 0x0b, (outs GR:$dst), (ins),
+ "mov $dst = pr">, isI;
+def FILL_ALL_PREDICATES_FROM_GR : AForm<0x03, 0x0b, (outs), (ins GR:$src),
+ "mov pr = $src">, isI;
+
+let isTwoAddress = 1 in {
+ def CMOV : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src2, GR:$src, PR:$qp),
+ "($qp) mov $dst = $src">, isA;
+}
+
+def PFMOV : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src, PR:$qp),
+ "($qp) mov $dst = $src">, isF;
+
+let isTwoAddress = 1 in {
+ def CFMOV : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src2, FP:$src, PR:$qp),
+ "($qp) mov $dst = $src">, isF;
+}
+
+def SELECTINT : Pat<(select PR:$which, GR:$src1, GR:$src2),
+ (CMOV (MOV GR:$src2), GR:$src1, PR:$which)>; // note order!
+def SELECTFP : Pat<(select PR:$which, FP:$src1, FP:$src2),
+ (CFMOV (FMOV FP:$src2), FP:$src1, PR:$which)>; // note order!
+// TODO: can do this faster, w/o using any integer regs (see pattern isel)
+def SELECTBOOL : Pat<(select PR:$which, PR:$src1, PR:$src2), // note order!
+ (CMPNE (CMOV
+ (MOV (TPCADDIMM22 (ADDS r0, 0), 1, PR:$src2)),
+ (TPCADDIMM22 (ADDS r0, 0), 1, PR:$src1), PR:$which), r0)>;
+
+// load constants of various sizes // FIXME: prettyprint -ve constants
+def : Pat<(i64 immSExt14:$imm), (ADDS r0, immSExt14:$imm)>;
+def : Pat<(i1 -1), (CMPEQ r0, r0)>; // TODO: this should just be a ref to p0
+def : Pat<(i1 0), (CMPNE r0, r0)>; // TODO: any instruction actually *using*
+ // this predicate should be killed!
+
+// TODO: support postincrement (reg, imm9) loads+stores - this needs more
+// tablegen support
+
+def IUSE : PseudoInstIA64<(outs), (ins variable_ops), "// IUSE">;
+def ADJUSTCALLSTACKUP : PseudoInstIA64<(outs), (ins variable_ops),
+ "// ADJUSTCALLSTACKUP">;
+def ADJUSTCALLSTACKDOWN : PseudoInstIA64<(outs), (ins variable_ops),
+ "// ADJUSTCALLSTACKDOWN">;
+def PSEUDO_ALLOC : PseudoInstIA64<(outs), (ins GR:$foo), "// PSEUDO_ALLOC">;
+
+def ALLOC : AForm<0x03, 0x0b,
+ (outs GR:$dst), (ins i8imm:$inputs, i8imm:$locals, i8imm:$outputs, i8imm:$rotating),
+ "alloc $dst = ar.pfs,$inputs,$locals,$outputs,$rotating">, isM;
+
+let isTwoAddress = 1 in {
+ def TCMPNE : AForm<0x03, 0x0b,
+ (outs PR:$dst), (ins PR:$src2, GR:$src3, GR:$src4),
+ "cmp.ne $dst, p0 = $src3, $src4">, isA;
+
+ def TPCMPEQOR : AForm<0x03, 0x0b,
+ (outs PR:$dst), (ins PR:$src2, GR:$src3, GR:$src4, PR:$qp),
+ "($qp) cmp.eq.or $dst, p0 = $src3, $src4">, isA;
+
+ def TPCMPNE : AForm<0x03, 0x0b,
+ (outs PR:$dst), (ins PR:$src2, GR:$src3, GR:$src4, PR:$qp),
+ "($qp) cmp.ne $dst, p0 = $src3, $src4">, isA;
+
+ def TPCMPEQ : AForm<0x03, 0x0b,
+ (outs PR:$dst), (ins PR:$src2, GR:$src3, GR:$src4, PR:$qp),
+ "($qp) cmp.eq $dst, p0 = $src3, $src4">, isA;
+}
+
+def MOVSIMM14 : AForm<0x03, 0x0b, (outs GR:$dst), (ins s14imm:$imm),
+ "mov $dst = $imm">, isA;
+def MOVSIMM22 : AForm<0x03, 0x0b, (outs GR:$dst), (ins s22imm:$imm),
+ "mov $dst = $imm">, isA;
+def MOVLIMM64 : AForm<0x03, 0x0b, (outs GR:$dst), (ins s64imm:$imm),
+ "movl $dst = $imm">, isLX;
+
+def SHLI : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, u6imm:$imm),
+ "shl $dst = $src1, $imm">, isI;
+def SHRUI : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, u6imm:$imm),
+ "shr.u $dst = $src1, $imm">, isI;
+def SHRSI : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, u6imm:$imm),
+ "shr $dst = $src1, $imm">, isI;
+
+def EXTRU : AForm<0x03, 0x0b,
+ (outs GR:$dst), (ins GR:$src1, u6imm:$imm1, u6imm:$imm2),
+ "extr.u $dst = $src1, $imm1, $imm2">, isI;
+
+def DEPZ : AForm<0x03, 0x0b,
+ (outs GR:$dst), (ins GR:$src1, u6imm:$imm1, u6imm:$imm2),
+ "dep.z $dst = $src1, $imm1, $imm2">, isI;
+
+def PCMPEQOR : AForm<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2, PR:$qp),
+ "($qp) cmp.eq.or $dst, p0 = $src1, $src2">, isA;
+def PCMPEQUNC : AForm<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2, PR:$qp),
+ "($qp) cmp.eq.unc $dst, p0 = $src1, $src2">, isA;
+def PCMPNE : AForm<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2, PR:$qp),
+ "($qp) cmp.ne $dst, p0 = $src1, $src2">, isA;
+
+// two destinations!
+def BCMPEQ : AForm<0x03, 0x0b, (outs PR:$dst1, PR:$dst2), (ins GR:$src1, GR:$src2),
+ "cmp.eq $dst1, dst2 = $src1, $src2">, isA;
+
+def ADDIMM14 : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, s14imm:$imm),
+ "adds $dst = $imm, $src1">, isA;
+
+def ADDIMM22 : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, s22imm:$imm),
+ "add $dst = $imm, $src1">, isA;
+def CADDIMM22 : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, s22imm:$imm, PR:$qp),
+ "($qp) add $dst = $imm, $src1">, isA;
+
+def SUBIMM8 : AForm<0x03, 0x0b, (outs GR:$dst), (ins s8imm:$imm, GR:$src2),
+ "sub $dst = $imm, $src2">, isA;
+
+let mayStore = 1 in {
+ def ST1 : AForm<0x03, 0x0b, (outs), (ins GR:$dstPtr, GR:$value),
+ "st1 [$dstPtr] = $value">, isM;
+ def ST2 : AForm<0x03, 0x0b, (outs), (ins GR:$dstPtr, GR:$value),
+ "st2 [$dstPtr] = $value">, isM;
+ def ST4 : AForm<0x03, 0x0b, (outs), (ins GR:$dstPtr, GR:$value),
+ "st4 [$dstPtr] = $value">, isM;
+ def ST8 : AForm<0x03, 0x0b, (outs), (ins GR:$dstPtr, GR:$value),
+ "st8 [$dstPtr] = $value">, isM;
+ def STF4 : AForm<0x03, 0x0b, (outs), (ins GR:$dstPtr, FP:$value),
+ "stfs [$dstPtr] = $value">, isM;
+ def STF8 : AForm<0x03, 0x0b, (outs), (ins GR:$dstPtr, FP:$value),
+ "stfd [$dstPtr] = $value">, isM;
+ def STF_SPILL : AForm<0x03, 0x0b, (outs), (ins GR:$dstPtr, FP:$value),
+ "stf.spill [$dstPtr] = $value">, isM;
+}
+
+let canFoldAsLoad = 1 in {
+ def LD1 : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$srcPtr),
+ "ld1 $dst = [$srcPtr]">, isM;
+ def LD2 : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$srcPtr),
+ "ld2 $dst = [$srcPtr]">, isM;
+ def LD4 : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$srcPtr),
+ "ld4 $dst = [$srcPtr]">, isM;
+ def LD8 : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$srcPtr),
+ "ld8 $dst = [$srcPtr]">, isM;
+ def LDF4 : AForm<0x03, 0x0b, (outs FP:$dst), (ins GR:$srcPtr),
+ "ldfs $dst = [$srcPtr]">, isM;
+ def LDF8 : AForm<0x03, 0x0b, (outs FP:$dst), (ins GR:$srcPtr),
+ "ldfd $dst = [$srcPtr]">, isM;
+ def LDF_FILL : AForm<0x03, 0x0b, (outs FP:$dst), (ins GR:$srcPtr),
+ "ldf.fill $dst = [$srcPtr]">, isM;
+}
+
+def POPCNT : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src),
+ "popcnt $dst = $src",
+ [(set GR:$dst, (ctpop GR:$src))]>, isI;
+
+// some FP stuff: // TODO: single-precision stuff?
+def FADD : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2),
+ "fadd $dst = $src1, $src2",
+ [(set FP:$dst, (fadd FP:$src1, FP:$src2))]>, isF;
+def FADDS: AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2),
+ "fadd.s $dst = $src1, $src2">, isF;
+def FSUB : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2),
+ "fsub $dst = $src1, $src2",
+ [(set FP:$dst, (fsub FP:$src1, FP:$src2))]>, isF;
+def FMPY : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2),
+ "fmpy $dst = $src1, $src2",
+ [(set FP:$dst, (fmul FP:$src1, FP:$src2))]>, isF;
+def FMA : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3),
+ "fma $dst = $src1, $src2, $src3",
+ [(set FP:$dst, (fadd (fmul FP:$src1, FP:$src2), FP:$src3))]>, isF;
+def FMS : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3),
+ "fms $dst = $src1, $src2, $src3",
+ [(set FP:$dst, (fsub (fmul FP:$src1, FP:$src2), FP:$src3))]>, isF;
+def FNMA : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3),
+ "fnma $dst = $src1, $src2, $src3",
+ [(set FP:$dst, (fneg (fadd (fmul FP:$src1, FP:$src2), FP:$src3)))]>, isF;
+def FABS : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fabs $dst = $src",
+ [(set FP:$dst, (fabs FP:$src))]>, isF;
+def FNEG : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fneg $dst = $src",
+ [(set FP:$dst, (fneg FP:$src))]>, isF;
+def FNEGABS : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fnegabs $dst = $src",
+ [(set FP:$dst, (fneg (fabs FP:$src)))]>, isF;
+
+let isTwoAddress=1 in {
+def TCFMAS1 : AForm<0x03, 0x0b,
+ (outs FP:$dst), (ins FP:$bogussrc, FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fma.s1 $dst = $src1, $src2, $src3">, isF;
+def TCFMADS0 : AForm<0x03, 0x0b,
+ (outs FP:$dst), (ins FP:$bogussrc, FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fma.d.s0 $dst = $src1, $src2, $src3">, isF;
+}
+
+def CFMAS1 : AForm<0x03, 0x0b,
+ (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fma.s1 $dst = $src1, $src2, $src3">, isF;
+def CFNMAS1 : AForm<0x03, 0x0b,
+ (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fnma.s1 $dst = $src1, $src2, $src3">, isF;
+
+def CFMADS1 : AForm<0x03, 0x0b,
+ (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fma.d.s1 $dst = $src1, $src2, $src3">, isF;
+def CFMADS0 : AForm<0x03, 0x0b,
+ (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fma.d.s0 $dst = $src1, $src2, $src3">, isF;
+def CFNMADS1 : AForm<0x03, 0x0b,
+ (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fnma.d.s1 $dst = $src1, $src2, $src3">, isF;
+
+def FRCPAS0 : AForm<0x03, 0x0b, (outs FP:$dstFR, PR:$dstPR), (ins FP:$src1, FP:$src2),
+ "frcpa.s0 $dstFR, $dstPR = $src1, $src2">, isF;
+def FRCPAS1 : AForm<0x03, 0x0b, (outs FP:$dstFR, PR:$dstPR), (ins FP:$src1, FP:$src2),
+ "frcpa.s1 $dstFR, $dstPR = $src1, $src2">, isF;
+
+def XMAL : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3),
+ "xma.l $dst = $src1, $src2, $src3">, isF;
+
+def FCVTXF : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fcvt.xf $dst = $src">, isF;
+def FCVTXUF : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fcvt.xuf $dst = $src">, isF;
+def FCVTXUFS1 : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fcvt.xuf.s1 $dst = $src">, isF;
+def FCVTFX : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fcvt.fx $dst = $src">, isF;
+def FCVTFXU : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fcvt.fxu $dst = $src">, isF;
+
+def FCVTFXTRUNC : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fcvt.fx.trunc $dst = $src">, isF;
+def FCVTFXUTRUNC : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fcvt.fxu.trunc $dst = $src">, isF;
+
+def FCVTFXTRUNCS1 : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fcvt.fx.trunc.s1 $dst = $src">, isF;
+def FCVTFXUTRUNCS1 : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fcvt.fxu.trunc.s1 $dst = $src">, isF;
+
+def FNORMD : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fnorm.d $dst = $src">, isF;
+
+def GETFD : AForm<0x03, 0x0b, (outs GR:$dst), (ins FP:$src),
+ "getf.d $dst = $src">, isM;
+def SETFD : AForm<0x03, 0x0b, (outs FP:$dst), (ins GR:$src),
+ "setf.d $dst = $src">, isM;
+
+def GETFSIG : AForm<0x03, 0x0b, (outs GR:$dst), (ins FP:$src),
+ "getf.sig $dst = $src">, isM;
+def SETFSIG : AForm<0x03, 0x0b, (outs FP:$dst), (ins GR:$src),
+ "setf.sig $dst = $src">, isM;
+
+// these four FP<->int conversion patterns need checking/cleaning
+def SINT_TO_FP : Pat<(sint_to_fp GR:$src),
+ (FNORMD (FCVTXF (SETFSIG GR:$src)))>;
+def UINT_TO_FP : Pat<(uint_to_fp GR:$src),
+ (FNORMD (FCVTXUF (SETFSIG GR:$src)))>;
+def FP_TO_SINT : Pat<(i64 (fp_to_sint FP:$src)),
+ (GETFSIG (FCVTFXTRUNC FP:$src))>;
+def FP_TO_UINT : Pat<(i64 (fp_to_uint FP:$src)),
+ (GETFSIG (FCVTFXUTRUNC FP:$src))>;
+
+def fpimm0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+def fpimm1 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+1.0);
+}]>;
+def fpimmn0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(-0.0);
+}]>;
+def fpimmn1 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(-1.0);
+}]>;
+
+def : Pat<(f64 fpimm0), (FMOV F0)>;
+def : Pat<(f64 fpimm1), (FMOV F1)>;
+def : Pat<(f64 fpimmn0), (FNEG F0)>;
+def : Pat<(f64 fpimmn1), (FNEG F1)>;
+
+let isTerminator = 1, isBranch = 1 in {
+ def BRL_NOTCALL : RawForm<0x03, 0xb0, (outs), (ins i64imm:$dst),
+ "(p0) brl.cond.sptk $dst">, isB;
+ def BRLCOND_NOTCALL : RawForm<0x03, 0xb0, (outs), (ins PR:$qp, i64imm:$dst),
+ "($qp) brl.cond.sptk $dst">, isB;
+ def BRCOND_NOTCALL : RawForm<0x03, 0xb0, (outs), (ins PR:$qp, GR:$dst),
+ "($qp) br.cond.sptk $dst">, isB;
+}
+
+let isCall = 1, /* isTerminator = 1, isBranch = 1, */
+ Uses = [out0,out1,out2,out3,out4,out5,out6,out7],
+// all calls clobber non-callee-saved registers, and for now, they are these:
+ Defs = [r2,r3,r8,r9,r10,r11,r14,r15,r16,r17,r18,r19,r20,r21,r22,r23,r24,
+ r25,r26,r27,r28,r29,r30,r31,
+ p6,p7,p8,p9,p10,p11,p12,p13,p14,p15,
+ F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,
+ F32,F33,F34,F35,F36,F37,F38,F39,F40,F41,F42,F43,F44,F45,F46,F47,F48,F49,
+ F50,F51,F52,F53,F54,F55,F56,
+ F57,F58,F59,F60,F61,F62,F63,F64,F65,F66,F67,F68,F69,F70,F71,F72,F73,F74,
+ F75,F76,F77,F78,F79,F80,F81,
+ F82,F83,F84,F85,F86,F87,F88,F89,F90,F91,F92,F93,F94,F95,F96,F97,F98,F99,
+ F100,F101,F102,F103,F104,F105,
+ F106,F107,F108,F109,F110,F111,F112,F113,F114,F115,F116,F117,F118,F119,
+ F120,F121,F122,F123,F124,F125,F126,F127,
+ out0,out1,out2,out3,out4,out5,out6,out7] in {
+// old pattern call
+ def BRCALL: RawForm<0x03, 0xb0, (outs), (ins calltarget:$dst),
+ "br.call.sptk rp = $dst">, isB; // FIXME: teach llvm about branch regs?
+// new daggy stuff!
+
+// calls a globaladdress
+ def BRCALL_IPREL_GA : RawForm<0x03, 0xb0, (outs), (ins calltarget:$dst),
+ "br.call.sptk rp = $dst">, isB; // FIXME: teach llvm about branch regs?
+// calls an externalsymbol
+ def BRCALL_IPREL_ES : RawForm<0x03, 0xb0, (outs), (ins calltarget:$dst),
+ "br.call.sptk rp = $dst">, isB; // FIXME: teach llvm about branch regs?
+// calls through a function descriptor
+ def BRCALL_INDIRECT : RawForm<0x03, 0xb0, (outs), (ins GR:$branchreg),
+ "br.call.sptk rp = $branchreg">, isB; // FIXME: teach llvm about branch regs?
+ def BRLCOND_CALL : RawForm<0x03, 0xb0, (outs), (ins PR:$qp, i64imm:$dst),
+ "($qp) brl.cond.call.sptk $dst">, isB;
+ def BRCOND_CALL : RawForm<0x03, 0xb0, (outs), (ins PR:$qp, GR:$dst),
+ "($qp) br.cond.call.sptk $dst">, isB;
+}
+
+// Return branch:
+let isTerminator = 1, isReturn = 1 in
+ def RET : AForm_DAG<0x03, 0x0b, (outs), (ins),
+ "br.ret.sptk.many rp",
+ [(retflag)]>, isB; // return
+def : Pat<(ret), (RET)>;
+
+// the evil stop bit of despair
+def STOP : PseudoInstIA64<(outs), (ins variable_ops), ";;">;
+
diff --git a/lib/Target/IA64/IA64MachineFunctionInfo.h b/lib/Target/IA64/IA64MachineFunctionInfo.h
new file mode 100644
index 0000000..fb93056
--- /dev/null
+++ b/lib/Target/IA64/IA64MachineFunctionInfo.h
@@ -0,0 +1,34 @@
+//===-- IA64MachineFunctionInfo.h - IA64-specific information ---*- C++ -*-===//
+//===-- for MachineFunction ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares IA64-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef IA64MACHINEFUNCTIONINFO_H
+#define IA64MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+//#include "IA64JITInfo.h"
+
+namespace llvm {
+
+class IA64FunctionInfo : public MachineFunctionInfo {
+
+public:
+ unsigned outRegsUsed; // how many 'out' registers are used
+ // by this machinefunction? (used to compute the appropriate
+ // entry in the 'alloc' instruction at the top of the
+ // machinefunction)
+ IA64FunctionInfo(MachineFunction& MF) { outRegsUsed=0; };
+
+};
+
+} // End llvm namespace
+
+#endif
+
diff --git a/lib/Target/IA64/IA64RegisterInfo.cpp b/lib/Target/IA64/IA64RegisterInfo.cpp
new file mode 100644
index 0000000..7ad6f51
--- /dev/null
+++ b/lib/Target/IA64/IA64RegisterInfo.cpp
@@ -0,0 +1,319 @@
+//===- IA64RegisterInfo.cpp - IA64 Register Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the IA64 implementation of the TargetRegisterInfo class.
+// This file is responsible for the frame pointer elimination optimization
+// on IA64.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IA64.h"
+#include "IA64RegisterInfo.h"
+#include "IA64InstrBuilder.h"
+#include "IA64MachineFunctionInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+IA64RegisterInfo::IA64RegisterInfo(const TargetInstrInfo &tii)
+ : IA64GenRegisterInfo(IA64::ADJUSTCALLSTACKDOWN, IA64::ADJUSTCALLSTACKUP),
+ TII(tii) {}
+
+const unsigned* IA64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ static const unsigned CalleeSavedRegs[] = {
+ IA64::r5, 0
+ };
+ return CalleeSavedRegs;
+}
+
+const TargetRegisterClass* const*
+IA64RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
+ &IA64::GRRegClass, 0
+ };
+ return CalleeSavedRegClasses;
+}
+
+BitVector IA64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(IA64::r0);
+ Reserved.set(IA64::r1);
+ Reserved.set(IA64::r2);
+ Reserved.set(IA64::r5);
+ Reserved.set(IA64::r12);
+ Reserved.set(IA64::r13);
+ Reserved.set(IA64::r22);
+ Reserved.set(IA64::rp);
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+//
+bool IA64RegisterInfo::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return NoFramePointerElim || MFI->hasVarSizedObjects();
+}
+
+void IA64RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (hasFP(MF)) {
+ // If we have a frame pointer, turn the adjcallstackup instruction into a
+ // 'sub SP, <amt>' and the adjcallstackdown instruction into 'add SP,
+ // <amt>'
+ MachineInstr *Old = I;
+ unsigned Amount = Old->getOperand(0).getImm();
+ DebugLoc dl = Old->getDebugLoc();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ // Replace the pseudo instruction with a new instruction...
+ if (Old->getOpcode() == IA64::ADJUSTCALLSTACKDOWN) {
+ BuildMI(MBB, I, dl, TII.get(IA64::ADDIMM22), IA64::r12)
+ .addReg(IA64::r12).addImm(-Amount);
+ } else {
+ assert(Old->getOpcode() == IA64::ADJUSTCALLSTACKUP);
+ BuildMI(MBB, I, dl, TII.get(IA64::ADDIMM22), IA64::r12)
+ .addReg(IA64::r12).addImm(Amount);
+ }
+ }
+ }
+
+ MBB.erase(I);
+}
+
+void IA64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS)const{
+ assert(SPAdj == 0 && "Unexpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+
+ bool FP = hasFP(MF);
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+
+ // choose a base register: ( hasFP? framepointer : stack pointer )
+ unsigned BaseRegister = FP ? IA64::r5 : IA64::r12;
+ // Add the base register
+ MI.getOperand(i).ChangeToRegister(BaseRegister, false);
+
+ // Now add the frame object offset to the offset from r1.
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+ // If we're not using a Frame Pointer that has been set to the value of the
+ // SP before having the stack size subtracted from it, then add the stack size
+ // to Offset to get the correct offset.
+ Offset += MF.getFrameInfo()->getStackSize();
+
+ // XXX: we use 'r22' as another hack+slash temporary register here :(
+ if (Offset <= 8191 && Offset >= -8192) { // smallish offset
+ // Fix up the old:
+ MI.getOperand(i).ChangeToRegister(IA64::r22, false);
+ //insert the new
+ BuildMI(MBB, II, dl, TII.get(IA64::ADDIMM22), IA64::r22)
+ .addReg(BaseRegister).addImm(Offset);
+ } else { // it's big
+ //fix up the old:
+ MI.getOperand(i).ChangeToRegister(IA64::r22, false);
+ BuildMI(MBB, II, dl, TII.get(IA64::MOVLIMM64), IA64::r22).addImm(Offset);
+ BuildMI(MBB, II, dl, TII.get(IA64::ADD), IA64::r22).addReg(BaseRegister)
+ .addReg(IA64::r22);
+ }
+
+}
+
+void IA64RegisterInfo::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool FP = hasFP(MF);
+ DebugLoc dl = (MBBI != MBB.end() ?
+ MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+
+ // first, we handle the 'alloc' instruction, that should be right up the
+ // top of any function
+ static const unsigned RegsInOrder[96] = { // there are 96 GPRs the
+ // RSE worries about
+ IA64::r32, IA64::r33, IA64::r34, IA64::r35,
+ IA64::r36, IA64::r37, IA64::r38, IA64::r39, IA64::r40, IA64::r41,
+ IA64::r42, IA64::r43, IA64::r44, IA64::r45, IA64::r46, IA64::r47,
+ IA64::r48, IA64::r49, IA64::r50, IA64::r51, IA64::r52, IA64::r53,
+ IA64::r54, IA64::r55, IA64::r56, IA64::r57, IA64::r58, IA64::r59,
+ IA64::r60, IA64::r61, IA64::r62, IA64::r63, IA64::r64, IA64::r65,
+ IA64::r66, IA64::r67, IA64::r68, IA64::r69, IA64::r70, IA64::r71,
+ IA64::r72, IA64::r73, IA64::r74, IA64::r75, IA64::r76, IA64::r77,
+ IA64::r78, IA64::r79, IA64::r80, IA64::r81, IA64::r82, IA64::r83,
+ IA64::r84, IA64::r85, IA64::r86, IA64::r87, IA64::r88, IA64::r89,
+ IA64::r90, IA64::r91, IA64::r92, IA64::r93, IA64::r94, IA64::r95,
+ IA64::r96, IA64::r97, IA64::r98, IA64::r99, IA64::r100, IA64::r101,
+ IA64::r102, IA64::r103, IA64::r104, IA64::r105, IA64::r106, IA64::r107,
+ IA64::r108, IA64::r109, IA64::r110, IA64::r111, IA64::r112, IA64::r113,
+ IA64::r114, IA64::r115, IA64::r116, IA64::r117, IA64::r118, IA64::r119,
+ IA64::r120, IA64::r121, IA64::r122, IA64::r123, IA64::r124, IA64::r125,
+ IA64::r126, IA64::r127 };
+
+ unsigned numStackedGPRsUsed=0;
+ for (int i=0; i != 96; i++) {
+ if (MF.getRegInfo().isPhysRegUsed(RegsInOrder[i]))
+ numStackedGPRsUsed=i+1; // (i+1 and not ++ - consider fn(fp, fp, int)
+ }
+
+ unsigned numOutRegsUsed=MF.getInfo<IA64FunctionInfo>()->outRegsUsed;
+
+ // XXX FIXME : this code should be a bit more reliable (in case there _isn't_
+ // a pseudo_alloc in the MBB)
+ unsigned dstRegOfPseudoAlloc;
+ for(MBBI = MBB.begin(); /*MBBI->getOpcode() != IA64::PSEUDO_ALLOC*/; ++MBBI) {
+ assert(MBBI != MBB.end());
+ if(MBBI->getOpcode() == IA64::PSEUDO_ALLOC) {
+ dstRegOfPseudoAlloc=MBBI->getOperand(0).getReg();
+ break;
+ }
+ }
+
+ if (MBBI != MBB.end()) dl = MBBI->getDebugLoc();
+
+ BuildMI(MBB, MBBI, dl, TII.get(IA64::ALLOC)).
+ addReg(dstRegOfPseudoAlloc).addImm(0).
+ addImm(numStackedGPRsUsed).addImm(numOutRegsUsed).addImm(0);
+
+ // Get the number of bytes to allocate from the FrameInfo
+ unsigned NumBytes = MFI->getStackSize();
+
+ if(FP)
+ NumBytes += 8; // reserve space for the old FP
+
+ // Do we need to allocate space on the stack?
+ if (NumBytes == 0)
+ return;
+
+ // Add 16 bytes at the bottom of the stack (scratch area)
+ // and round the size to a multiple of the alignment.
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned Size = 16 + (FP ? 8 : 0);
+ NumBytes = (NumBytes+Size+Align-1)/Align*Align;
+
+ // Update frame info to pretend that this is part of the stack...
+ MFI->setStackSize(NumBytes);
+
+ // adjust stack pointer: r12 -= numbytes
+ if (NumBytes <= 8191) {
+ BuildMI(MBB, MBBI, dl, TII.get(IA64::ADDIMM22),IA64::r12).addReg(IA64::r12).
+ addImm(-NumBytes);
+ } else { // we use r22 as a scratch register here
+ // first load the decrement into r22
+ BuildMI(MBB, MBBI, dl, TII.get(IA64::MOVLIMM64), IA64::r22).
+ addImm(-NumBytes);
+ // FIXME: MOVLSI32 expects a _u_32imm
+ // then add (subtract) it to r12 (stack ptr)
+ BuildMI(MBB, MBBI, dl, TII.get(IA64::ADD), IA64::r12)
+ .addReg(IA64::r12).addReg(IA64::r22);
+
+ }
+
+ // now if we need to, save the old FP and set the new
+ if (FP) {
+ BuildMI(MBB, MBBI,dl,TII.get(IA64::ST8)).addReg(IA64::r12).addReg(IA64::r5);
+ // this must be the last instr in the prolog ? (XXX: why??)
+ BuildMI(MBB, MBBI, dl, TII.get(IA64::MOV), IA64::r5).addReg(IA64::r12);
+ }
+
+}
+
+void IA64RegisterInfo::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ assert(MBBI->getOpcode() == IA64::RET &&
+ "Can only insert epilog into returning blocks");
+ DebugLoc dl = MBBI->getDebugLoc();
+ bool FP = hasFP(MF);
+
+ // Get the number of bytes allocated from the FrameInfo...
+ unsigned NumBytes = MFI->getStackSize();
+
+ //now if we need to, restore the old FP
+ if (FP) {
+ //copy the FP into the SP (discards allocas)
+ BuildMI(MBB, MBBI, dl, TII.get(IA64::MOV), IA64::r12).addReg(IA64::r5);
+ //restore the FP
+ BuildMI(MBB, MBBI, dl, TII.get(IA64::LD8), IA64::r5).addReg(IA64::r5);
+ }
+
+ if (NumBytes != 0) {
+ if (NumBytes <= 8191) {
+ BuildMI(MBB, MBBI, dl, TII.get(IA64::ADDIMM22),IA64::r12).
+ addReg(IA64::r12).addImm(NumBytes);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(IA64::MOVLIMM64), IA64::r22).
+ addImm(NumBytes);
+ BuildMI(MBB, MBBI, dl, TII.get(IA64::ADD), IA64::r12).addReg(IA64::r12).
+ addReg(IA64::r22);
+ }
+ }
+}
+
+unsigned IA64RegisterInfo::getRARegister() const {
+ assert(0 && "What is the return address register");
+ return 0;
+}
+
+unsigned IA64RegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ return hasFP(MF) ? IA64::r5 : IA64::r12;
+}
+
+unsigned IA64RegisterInfo::getEHExceptionRegister() const {
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned IA64RegisterInfo::getEHHandlerRegister() const {
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
+
+int IA64RegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ assert(0 && "What is the dwarf register number");
+ return -1;
+}
+
+#include "IA64GenRegisterInfo.inc"
+
diff --git a/lib/Target/IA64/IA64RegisterInfo.h b/lib/Target/IA64/IA64RegisterInfo.h
new file mode 100644
index 0000000..0c5083e
--- /dev/null
+++ b/lib/Target/IA64/IA64RegisterInfo.h
@@ -0,0 +1,63 @@
+//===- IA64RegisterInfo.h - IA64 Register Information Impl ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the IA64 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef IA64REGISTERINFO_H
+#define IA64REGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "IA64GenRegisterInfo.h.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+
+struct IA64RegisterInfo : public IA64GenRegisterInfo {
+ const TargetInstrInfo &TII;
+
+ IA64RegisterInfo(const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ const TargetRegisterClass* const* getCalleeSavedRegClasses(
+ const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // End llvm namespace
+
+#endif
+
diff --git a/lib/Target/IA64/IA64RegisterInfo.td b/lib/Target/IA64/IA64RegisterInfo.td
new file mode 100644
index 0000000..dd72dc3
--- /dev/null
+++ b/lib/Target/IA64/IA64RegisterInfo.td
@@ -0,0 +1,509 @@
+//===- IA64RegisterInfo.td - Describe the IA64 Register File ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the IA64 register file, defining the registers
+// themselves, aliases between the registers, and the register classes built
+// out of the registers.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Register definitions...
+//
+
+class IA64Register<string n> : Register<n> {
+ let Namespace = "IA64";
+}
+
+// GR - One of 128 32-bit general registers
+class GR<bits<7> num, string n> : IA64Register<n> {
+ field bits<7> Num = num;
+}
+
+// FP - One of 128 82-bit floating-point registers
+class FP<bits<7> num, string n> : IA64Register<n> {
+ field bits<7> Num = num;
+}
+
+// PR - One of 64 1-bit predicate registers
+class PR<bits<6> num, string n> : IA64Register<n> {
+ field bits<6> Num = num;
+}
+
+/* general registers */
+def r0 : GR< 0, "r0">, DwarfRegNum<[0]>;
+def r1 : GR< 1, "r1">, DwarfRegNum<[1]>;
+def r2 : GR< 2, "r2">, DwarfRegNum<[2]>;
+def r3 : GR< 3, "r3">, DwarfRegNum<[3]>;
+def r4 : GR< 4, "r4">, DwarfRegNum<[4]>;
+def r5 : GR< 5, "r5">, DwarfRegNum<[5]>;
+def r6 : GR< 6, "r6">, DwarfRegNum<[6]>;
+def r7 : GR< 7, "r7">, DwarfRegNum<[7]>;
+def r8 : GR< 8, "r8">, DwarfRegNum<[8]>;
+def r9 : GR< 9, "r9">, DwarfRegNum<[9]>;
+def r10 : GR< 10, "r10">, DwarfRegNum<[10]>;
+def r11 : GR< 11, "r11">, DwarfRegNum<[11]>;
+def r12 : GR< 12, "r12">, DwarfRegNum<[12]>;
+def r13 : GR< 13, "r13">, DwarfRegNum<[13]>;
+def r14 : GR< 14, "r14">, DwarfRegNum<[14]>;
+def r15 : GR< 15, "r15">, DwarfRegNum<[15]>;
+def r16 : GR< 16, "r16">, DwarfRegNum<[16]>;
+def r17 : GR< 17, "r17">, DwarfRegNum<[17]>;
+def r18 : GR< 18, "r18">, DwarfRegNum<[18]>;
+def r19 : GR< 19, "r19">, DwarfRegNum<[19]>;
+def r20 : GR< 20, "r20">, DwarfRegNum<[20]>;
+def r21 : GR< 21, "r21">, DwarfRegNum<[21]>;
+def r22 : GR< 22, "r22">, DwarfRegNum<[22]>;
+def r23 : GR< 23, "r23">, DwarfRegNum<[23]>;
+def r24 : GR< 24, "r24">, DwarfRegNum<[24]>;
+def r25 : GR< 25, "r25">, DwarfRegNum<[25]>;
+def r26 : GR< 26, "r26">, DwarfRegNum<[26]>;
+def r27 : GR< 27, "r27">, DwarfRegNum<[27]>;
+def r28 : GR< 28, "r28">, DwarfRegNum<[28]>;
+def r29 : GR< 29, "r29">, DwarfRegNum<[29]>;
+def r30 : GR< 30, "r30">, DwarfRegNum<[30]>;
+def r31 : GR< 31, "r31">, DwarfRegNum<[31]>;
+def r32 : GR< 32, "r32">, DwarfRegNum<[32]>;
+def r33 : GR< 33, "r33">, DwarfRegNum<[33]>;
+def r34 : GR< 34, "r34">, DwarfRegNum<[34]>;
+def r35 : GR< 35, "r35">, DwarfRegNum<[35]>;
+def r36 : GR< 36, "r36">, DwarfRegNum<[36]>;
+def r37 : GR< 37, "r37">, DwarfRegNum<[37]>;
+def r38 : GR< 38, "r38">, DwarfRegNum<[38]>;
+def r39 : GR< 39, "r39">, DwarfRegNum<[39]>;
+def r40 : GR< 40, "r40">, DwarfRegNum<[40]>;
+def r41 : GR< 41, "r41">, DwarfRegNum<[41]>;
+def r42 : GR< 42, "r42">, DwarfRegNum<[42]>;
+def r43 : GR< 43, "r43">, DwarfRegNum<[43]>;
+def r44 : GR< 44, "r44">, DwarfRegNum<[44]>;
+def r45 : GR< 45, "r45">, DwarfRegNum<[45]>;
+def r46 : GR< 46, "r46">, DwarfRegNum<[46]>;
+def r47 : GR< 47, "r47">, DwarfRegNum<[47]>;
+def r48 : GR< 48, "r48">, DwarfRegNum<[48]>;
+def r49 : GR< 49, "r49">, DwarfRegNum<[49]>;
+def r50 : GR< 50, "r50">, DwarfRegNum<[50]>;
+def r51 : GR< 51, "r51">, DwarfRegNum<[51]>;
+def r52 : GR< 52, "r52">, DwarfRegNum<[52]>;
+def r53 : GR< 53, "r53">, DwarfRegNum<[53]>;
+def r54 : GR< 54, "r54">, DwarfRegNum<[54]>;
+def r55 : GR< 55, "r55">, DwarfRegNum<[55]>;
+def r56 : GR< 56, "r56">, DwarfRegNum<[56]>;
+def r57 : GR< 57, "r57">, DwarfRegNum<[57]>;
+def r58 : GR< 58, "r58">, DwarfRegNum<[58]>;
+def r59 : GR< 59, "r59">, DwarfRegNum<[59]>;
+def r60 : GR< 60, "r60">, DwarfRegNum<[60]>;
+def r61 : GR< 61, "r61">, DwarfRegNum<[61]>;
+def r62 : GR< 62, "r62">, DwarfRegNum<[62]>;
+def r63 : GR< 63, "r63">, DwarfRegNum<[63]>;
+def r64 : GR< 64, "r64">, DwarfRegNum<[64]>;
+def r65 : GR< 65, "r65">, DwarfRegNum<[65]>;
+def r66 : GR< 66, "r66">, DwarfRegNum<[66]>;
+def r67 : GR< 67, "r67">, DwarfRegNum<[67]>;
+def r68 : GR< 68, "r68">, DwarfRegNum<[68]>;
+def r69 : GR< 69, "r69">, DwarfRegNum<[69]>;
+def r70 : GR< 70, "r70">, DwarfRegNum<[70]>;
+def r71 : GR< 71, "r71">, DwarfRegNum<[71]>;
+def r72 : GR< 72, "r72">, DwarfRegNum<[72]>;
+def r73 : GR< 73, "r73">, DwarfRegNum<[73]>;
+def r74 : GR< 74, "r74">, DwarfRegNum<[74]>;
+def r75 : GR< 75, "r75">, DwarfRegNum<[75]>;
+def r76 : GR< 76, "r76">, DwarfRegNum<[76]>;
+def r77 : GR< 77, "r77">, DwarfRegNum<[77]>;
+def r78 : GR< 78, "r78">, DwarfRegNum<[78]>;
+def r79 : GR< 79, "r79">, DwarfRegNum<[79]>;
+def r80 : GR< 80, "r80">, DwarfRegNum<[80]>;
+def r81 : GR< 81, "r81">, DwarfRegNum<[81]>;
+def r82 : GR< 82, "r82">, DwarfRegNum<[82]>;
+def r83 : GR< 83, "r83">, DwarfRegNum<[83]>;
+def r84 : GR< 84, "r84">, DwarfRegNum<[84]>;
+def r85 : GR< 85, "r85">, DwarfRegNum<[85]>;
+def r86 : GR< 86, "r86">, DwarfRegNum<[86]>;
+def r87 : GR< 87, "r87">, DwarfRegNum<[87]>;
+def r88 : GR< 88, "r88">, DwarfRegNum<[88]>;
+def r89 : GR< 89, "r89">, DwarfRegNum<[89]>;
+def r90 : GR< 90, "r90">, DwarfRegNum<[90]>;
+def r91 : GR< 91, "r91">, DwarfRegNum<[91]>;
+def r92 : GR< 92, "r92">, DwarfRegNum<[92]>;
+def r93 : GR< 93, "r93">, DwarfRegNum<[93]>;
+def r94 : GR< 94, "r94">, DwarfRegNum<[94]>;
+def r95 : GR< 95, "r95">, DwarfRegNum<[95]>;
+def r96 : GR< 96, "r96">, DwarfRegNum<[96]>;
+def r97 : GR< 97, "r97">, DwarfRegNum<[97]>;
+def r98 : GR< 98, "r98">, DwarfRegNum<[98]>;
+def r99 : GR< 99, "r99">, DwarfRegNum<[99]>;
+def r100 : GR< 100, "r100">, DwarfRegNum<[100]>;
+def r101 : GR< 101, "r101">, DwarfRegNum<[101]>;
+def r102 : GR< 102, "r102">, DwarfRegNum<[102]>;
+def r103 : GR< 103, "r103">, DwarfRegNum<[103]>;
+def r104 : GR< 104, "r104">, DwarfRegNum<[104]>;
+def r105 : GR< 105, "r105">, DwarfRegNum<[105]>;
+def r106 : GR< 106, "r106">, DwarfRegNum<[106]>;
+def r107 : GR< 107, "r107">, DwarfRegNum<[107]>;
+def r108 : GR< 108, "r108">, DwarfRegNum<[108]>;
+def r109 : GR< 109, "r109">, DwarfRegNum<[109]>;
+def r110 : GR< 110, "r110">, DwarfRegNum<[110]>;
+def r111 : GR< 111, "r111">, DwarfRegNum<[111]>;
+def r112 : GR< 112, "r112">, DwarfRegNum<[112]>;
+def r113 : GR< 113, "r113">, DwarfRegNum<[113]>;
+def r114 : GR< 114, "r114">, DwarfRegNum<[114]>;
+def r115 : GR< 115, "r115">, DwarfRegNum<[115]>;
+def r116 : GR< 116, "r116">, DwarfRegNum<[116]>;
+def r117 : GR< 117, "r117">, DwarfRegNum<[117]>;
+def r118 : GR< 118, "r118">, DwarfRegNum<[118]>;
+def r119 : GR< 119, "r119">, DwarfRegNum<[119]>;
+def r120 : GR< 120, "r120">, DwarfRegNum<[120]>;
+def r121 : GR< 121, "r121">, DwarfRegNum<[121]>;
+def r122 : GR< 122, "r122">, DwarfRegNum<[122]>;
+def r123 : GR< 123, "r123">, DwarfRegNum<[123]>;
+def r124 : GR< 124, "r124">, DwarfRegNum<[124]>;
+def r125 : GR< 125, "r125">, DwarfRegNum<[125]>;
+def r126 : GR< 126, "r126">, DwarfRegNum<[126]>;
+def r127 : GR< 127, "r127">, DwarfRegNum<[127]>;
+
+/* floating-point registers */
+def F0 : FP< 0, "f0">, DwarfRegNum<[128]>;
+def F1 : FP< 1, "f1">, DwarfRegNum<[129]>;
+def F2 : FP< 2, "f2">, DwarfRegNum<[130]>;
+def F3 : FP< 3, "f3">, DwarfRegNum<[131]>;
+def F4 : FP< 4, "f4">, DwarfRegNum<[132]>;
+def F5 : FP< 5, "f5">, DwarfRegNum<[133]>;
+def F6 : FP< 6, "f6">, DwarfRegNum<[134]>;
+def F7 : FP< 7, "f7">, DwarfRegNum<[135]>;
+def F8 : FP< 8, "f8">, DwarfRegNum<[136]>;
+def F9 : FP< 9, "f9">, DwarfRegNum<[137]>;
+def F10 : FP< 10, "f10">, DwarfRegNum<[138]>;
+def F11 : FP< 11, "f11">, DwarfRegNum<[139]>;
+def F12 : FP< 12, "f12">, DwarfRegNum<[140]>;
+def F13 : FP< 13, "f13">, DwarfRegNum<[141]>;
+def F14 : FP< 14, "f14">, DwarfRegNum<[142]>;
+def F15 : FP< 15, "f15">, DwarfRegNum<[143]>;
+def F16 : FP< 16, "f16">, DwarfRegNum<[144]>;
+def F17 : FP< 17, "f17">, DwarfRegNum<[145]>;
+def F18 : FP< 18, "f18">, DwarfRegNum<[146]>;
+def F19 : FP< 19, "f19">, DwarfRegNum<[147]>;
+def F20 : FP< 20, "f20">, DwarfRegNum<[148]>;
+def F21 : FP< 21, "f21">, DwarfRegNum<[149]>;
+def F22 : FP< 22, "f22">, DwarfRegNum<[150]>;
+def F23 : FP< 23, "f23">, DwarfRegNum<[151]>;
+def F24 : FP< 24, "f24">, DwarfRegNum<[152]>;
+def F25 : FP< 25, "f25">, DwarfRegNum<[153]>;
+def F26 : FP< 26, "f26">, DwarfRegNum<[154]>;
+def F27 : FP< 27, "f27">, DwarfRegNum<[155]>;
+def F28 : FP< 28, "f28">, DwarfRegNum<[156]>;
+def F29 : FP< 29, "f29">, DwarfRegNum<[157]>;
+def F30 : FP< 30, "f30">, DwarfRegNum<[158]>;
+def F31 : FP< 31, "f31">, DwarfRegNum<[159]>;
+def F32 : FP< 32, "f32">, DwarfRegNum<[160]>;
+def F33 : FP< 33, "f33">, DwarfRegNum<[161]>;
+def F34 : FP< 34, "f34">, DwarfRegNum<[162]>;
+def F35 : FP< 35, "f35">, DwarfRegNum<[163]>;
+def F36 : FP< 36, "f36">, DwarfRegNum<[164]>;
+def F37 : FP< 37, "f37">, DwarfRegNum<[165]>;
+def F38 : FP< 38, "f38">, DwarfRegNum<[166]>;
+def F39 : FP< 39, "f39">, DwarfRegNum<[167]>;
+def F40 : FP< 40, "f40">, DwarfRegNum<[168]>;
+def F41 : FP< 41, "f41">, DwarfRegNum<[169]>;
+def F42 : FP< 42, "f42">, DwarfRegNum<[170]>;
+def F43 : FP< 43, "f43">, DwarfRegNum<[171]>;
+def F44 : FP< 44, "f44">, DwarfRegNum<[172]>;
+def F45 : FP< 45, "f45">, DwarfRegNum<[173]>;
+def F46 : FP< 46, "f46">, DwarfRegNum<[174]>;
+def F47 : FP< 47, "f47">, DwarfRegNum<[175]>;
+def F48 : FP< 48, "f48">, DwarfRegNum<[176]>;
+def F49 : FP< 49, "f49">, DwarfRegNum<[177]>;
+def F50 : FP< 50, "f50">, DwarfRegNum<[178]>;
+def F51 : FP< 51, "f51">, DwarfRegNum<[179]>;
+def F52 : FP< 52, "f52">, DwarfRegNum<[180]>;
+def F53 : FP< 53, "f53">, DwarfRegNum<[181]>;
+def F54 : FP< 54, "f54">, DwarfRegNum<[182]>;
+def F55 : FP< 55, "f55">, DwarfRegNum<[183]>;
+def F56 : FP< 56, "f56">, DwarfRegNum<[184]>;
+def F57 : FP< 57, "f57">, DwarfRegNum<[185]>;
+def F58 : FP< 58, "f58">, DwarfRegNum<[186]>;
+def F59 : FP< 59, "f59">, DwarfRegNum<[187]>;
+def F60 : FP< 60, "f60">, DwarfRegNum<[188]>;
+def F61 : FP< 61, "f61">, DwarfRegNum<[189]>;
+def F62 : FP< 62, "f62">, DwarfRegNum<[190]>;
+def F63 : FP< 63, "f63">, DwarfRegNum<[191]>;
+def F64 : FP< 64, "f64">, DwarfRegNum<[192]>;
+def F65 : FP< 65, "f65">, DwarfRegNum<[193]>;
+def F66 : FP< 66, "f66">, DwarfRegNum<[194]>;
+def F67 : FP< 67, "f67">, DwarfRegNum<[195]>;
+def F68 : FP< 68, "f68">, DwarfRegNum<[196]>;
+def F69 : FP< 69, "f69">, DwarfRegNum<[197]>;
+def F70 : FP< 70, "f70">, DwarfRegNum<[198]>;
+def F71 : FP< 71, "f71">, DwarfRegNum<[199]>;
+def F72 : FP< 72, "f72">, DwarfRegNum<[200]>;
+def F73 : FP< 73, "f73">, DwarfRegNum<[201]>;
+def F74 : FP< 74, "f74">, DwarfRegNum<[202]>;
+def F75 : FP< 75, "f75">, DwarfRegNum<[203]>;
+def F76 : FP< 76, "f76">, DwarfRegNum<[204]>;
+def F77 : FP< 77, "f77">, DwarfRegNum<[205]>;
+def F78 : FP< 78, "f78">, DwarfRegNum<[206]>;
+def F79 : FP< 79, "f79">, DwarfRegNum<[207]>;
+def F80 : FP< 80, "f80">, DwarfRegNum<[208]>;
+def F81 : FP< 81, "f81">, DwarfRegNum<[209]>;
+def F82 : FP< 82, "f82">, DwarfRegNum<[210]>;
+def F83 : FP< 83, "f83">, DwarfRegNum<[211]>;
+def F84 : FP< 84, "f84">, DwarfRegNum<[212]>;
+def F85 : FP< 85, "f85">, DwarfRegNum<[213]>;
+def F86 : FP< 86, "f86">, DwarfRegNum<[214]>;
+def F87 : FP< 87, "f87">, DwarfRegNum<[215]>;
+def F88 : FP< 88, "f88">, DwarfRegNum<[216]>;
+def F89 : FP< 89, "f89">, DwarfRegNum<[217]>;
+def F90 : FP< 90, "f90">, DwarfRegNum<[218]>;
+def F91 : FP< 91, "f91">, DwarfRegNum<[219]>;
+def F92 : FP< 92, "f92">, DwarfRegNum<[220]>;
+def F93 : FP< 93, "f93">, DwarfRegNum<[221]>;
+def F94 : FP< 94, "f94">, DwarfRegNum<[222]>;
+def F95 : FP< 95, "f95">, DwarfRegNum<[223]>;
+def F96 : FP< 96, "f96">, DwarfRegNum<[224]>;
+def F97 : FP< 97, "f97">, DwarfRegNum<[225]>;
+def F98 : FP< 98, "f98">, DwarfRegNum<[226]>;
+def F99 : FP< 99, "f99">, DwarfRegNum<[227]>;
+def F100 : FP< 100, "f100">, DwarfRegNum<[228]>;
+def F101 : FP< 101, "f101">, DwarfRegNum<[229]>;
+def F102 : FP< 102, "f102">, DwarfRegNum<[230]>;
+def F103 : FP< 103, "f103">, DwarfRegNum<[231]>;
+def F104 : FP< 104, "f104">, DwarfRegNum<[232]>;
+def F105 : FP< 105, "f105">, DwarfRegNum<[233]>;
+def F106 : FP< 106, "f106">, DwarfRegNum<[234]>;
+def F107 : FP< 107, "f107">, DwarfRegNum<[235]>;
+def F108 : FP< 108, "f108">, DwarfRegNum<[236]>;
+def F109 : FP< 109, "f109">, DwarfRegNum<[237]>;
+def F110 : FP< 110, "f110">, DwarfRegNum<[238]>;
+def F111 : FP< 111, "f111">, DwarfRegNum<[239]>;
+def F112 : FP< 112, "f112">, DwarfRegNum<[240]>;
+def F113 : FP< 113, "f113">, DwarfRegNum<[241]>;
+def F114 : FP< 114, "f114">, DwarfRegNum<[242]>;
+def F115 : FP< 115, "f115">, DwarfRegNum<[243]>;
+def F116 : FP< 116, "f116">, DwarfRegNum<[244]>;
+def F117 : FP< 117, "f117">, DwarfRegNum<[245]>;
+def F118 : FP< 118, "f118">, DwarfRegNum<[246]>;
+def F119 : FP< 119, "f119">, DwarfRegNum<[247]>;
+def F120 : FP< 120, "f120">, DwarfRegNum<[248]>;
+def F121 : FP< 121, "f121">, DwarfRegNum<[249]>;
+def F122 : FP< 122, "f122">, DwarfRegNum<[250]>;
+def F123 : FP< 123, "f123">, DwarfRegNum<[251]>;
+def F124 : FP< 124, "f124">, DwarfRegNum<[252]>;
+def F125 : FP< 125, "f125">, DwarfRegNum<[253]>;
+def F126 : FP< 126, "f126">, DwarfRegNum<[254]>;
+def F127 : FP< 127, "f127">, DwarfRegNum<[255]>;
+
+/* predicate registers */
+def p0 : PR< 0, "p0">, DwarfRegNum<[256]>;
+def p1 : PR< 1, "p1">, DwarfRegNum<[257]>;
+def p2 : PR< 2, "p2">, DwarfRegNum<[258]>;
+def p3 : PR< 3, "p3">, DwarfRegNum<[259]>;
+def p4 : PR< 4, "p4">, DwarfRegNum<[260]>;
+def p5 : PR< 5, "p5">, DwarfRegNum<[261]>;
+def p6 : PR< 6, "p6">, DwarfRegNum<[262]>;
+def p7 : PR< 7, "p7">, DwarfRegNum<[263]>;
+def p8 : PR< 8, "p8">, DwarfRegNum<[264]>;
+def p9 : PR< 9, "p9">, DwarfRegNum<[265]>;
+def p10 : PR< 10, "p10">, DwarfRegNum<[266]>;
+def p11 : PR< 11, "p11">, DwarfRegNum<[267]>;
+def p12 : PR< 12, "p12">, DwarfRegNum<[268]>;
+def p13 : PR< 13, "p13">, DwarfRegNum<[269]>;
+def p14 : PR< 14, "p14">, DwarfRegNum<[270]>;
+def p15 : PR< 15, "p15">, DwarfRegNum<[271]>;
+def p16 : PR< 16, "p16">, DwarfRegNum<[272]>;
+def p17 : PR< 17, "p17">, DwarfRegNum<[273]>;
+def p18 : PR< 18, "p18">, DwarfRegNum<[274]>;
+def p19 : PR< 19, "p19">, DwarfRegNum<[275]>;
+def p20 : PR< 20, "p20">, DwarfRegNum<[276]>;
+def p21 : PR< 21, "p21">, DwarfRegNum<[277]>;
+def p22 : PR< 22, "p22">, DwarfRegNum<[278]>;
+def p23 : PR< 23, "p23">, DwarfRegNum<[279]>;
+def p24 : PR< 24, "p24">, DwarfRegNum<[280]>;
+def p25 : PR< 25, "p25">, DwarfRegNum<[281]>;
+def p26 : PR< 26, "p26">, DwarfRegNum<[282]>;
+def p27 : PR< 27, "p27">, DwarfRegNum<[283]>;
+def p28 : PR< 28, "p28">, DwarfRegNum<[284]>;
+def p29 : PR< 29, "p29">, DwarfRegNum<[285]>;
+def p30 : PR< 30, "p30">, DwarfRegNum<[286]>;
+def p31 : PR< 31, "p31">, DwarfRegNum<[287]>;
+def p32 : PR< 32, "p32">, DwarfRegNum<[288]>;
+def p33 : PR< 33, "p33">, DwarfRegNum<[289]>;
+def p34 : PR< 34, "p34">, DwarfRegNum<[290]>;
+def p35 : PR< 35, "p35">, DwarfRegNum<[291]>;
+def p36 : PR< 36, "p36">, DwarfRegNum<[292]>;
+def p37 : PR< 37, "p37">, DwarfRegNum<[293]>;
+def p38 : PR< 38, "p38">, DwarfRegNum<[294]>;
+def p39 : PR< 39, "p39">, DwarfRegNum<[295]>;
+def p40 : PR< 40, "p40">, DwarfRegNum<[296]>;
+def p41 : PR< 41, "p41">, DwarfRegNum<[297]>;
+def p42 : PR< 42, "p42">, DwarfRegNum<[298]>;
+def p43 : PR< 43, "p43">, DwarfRegNum<[299]>;
+def p44 : PR< 44, "p44">, DwarfRegNum<[300]>;
+def p45 : PR< 45, "p45">, DwarfRegNum<[301]>;
+def p46 : PR< 46, "p46">, DwarfRegNum<[302]>;
+def p47 : PR< 47, "p47">, DwarfRegNum<[303]>;
+def p48 : PR< 48, "p48">, DwarfRegNum<[304]>;
+def p49 : PR< 49, "p49">, DwarfRegNum<[305]>;
+def p50 : PR< 50, "p50">, DwarfRegNum<[306]>;
+def p51 : PR< 51, "p51">, DwarfRegNum<[307]>;
+def p52 : PR< 52, "p52">, DwarfRegNum<[308]>;
+def p53 : PR< 53, "p53">, DwarfRegNum<[309]>;
+def p54 : PR< 54, "p54">, DwarfRegNum<[310]>;
+def p55 : PR< 55, "p55">, DwarfRegNum<[311]>;
+def p56 : PR< 56, "p56">, DwarfRegNum<[312]>;
+def p57 : PR< 57, "p57">, DwarfRegNum<[313]>;
+def p58 : PR< 58, "p58">, DwarfRegNum<[314]>;
+def p59 : PR< 59, "p59">, DwarfRegNum<[315]>;
+def p60 : PR< 60, "p60">, DwarfRegNum<[316]>;
+def p61 : PR< 61, "p61">, DwarfRegNum<[317]>;
+def p62 : PR< 62, "p62">, DwarfRegNum<[318]>;
+def p63 : PR< 63, "p63">, DwarfRegNum<[319]>;
+
+// XXX : this is temporary, we'll eventually have the output registers
+// in the general purpose register class too?
+def out0 : GR<0, "out0">, DwarfRegNum<[120]>;
+def out1 : GR<1, "out1">, DwarfRegNum<[121]>;
+def out2 : GR<2, "out2">, DwarfRegNum<[122]>;
+def out3 : GR<3, "out3">, DwarfRegNum<[123]>;
+def out4 : GR<4, "out4">, DwarfRegNum<[124]>;
+def out5 : GR<5, "out5">, DwarfRegNum<[125]>;
+def out6 : GR<6, "out6">, DwarfRegNum<[126]>;
+def out7 : GR<7, "out7">, DwarfRegNum<[127]>;
+
+// application (special) registers:
+
+// "previous function state" application register
+def AR_PFS : GR<0, "ar.pfs">, DwarfRegNum<[331]>;
+
+// "return pointer" (this is really branch register b0)
+def rp : GR<0, "rp">, DwarfRegNum<[-1]>;
+
+// branch reg 6
+def B6 : GR<0, "b6">, DwarfRegNum<[326]>;
+
+//===----------------------------------------------------------------------===//
+// Register Class Definitions... now that we have all of the pieces, define the
+// top-level register classes. The order specified in the register list is
+// implicitly defined to be the register allocation order.
+//
+
+// these are the scratch (+stacked) general registers
+// FIXME/XXX we also reserve a frame pointer (r5)
+// FIXME/XXX we also reserve r2 for spilling/filling predicates
+// in IA64RegisterInfo.cpp
+// FIXME/XXX we also reserve r22 for calculating addresses
+// in IA64RegisterInfo.cpp
+
+def GR : RegisterClass<"IA64", [i64], 64,
+ [
+
+//FIXME!: for both readability and performance, we don't want the out
+// registers to be the first ones allocated
+
+ out7, out6, out5, out4, out3, out2, out1, out0,
+ r3, r8, r9, r10, r11, r14, r15,
+ r16, r17, r18, r19, r20, r21, r23,
+ r24, r25, r26, r27, r28, r29, r30, r31,
+ r32, r33, r34, r35, r36, r37, r38, r39,
+ r40, r41, r42, r43, r44, r45, r46, r47,
+ r48, r49, r50, r51, r52, r53, r54, r55,
+ r56, r57, r58, r59, r60, r61, r62, r63,
+ r64, r65, r66, r67, r68, r69, r70, r71,
+ r72, r73, r74, r75, r76, r77, r78, r79,
+ r80, r81, r82, r83, r84, r85, r86, r87,
+ r88, r89, r90, r91, r92, r93, r94, r95,
+ r96, r97, r98, r99, r100, r101, r102, r103,
+ r104, r105, r106, r107, r108, r109, r110, r111,
+ r112, r113, r114, r115, r116, r117, r118, r119,
+ // last 17 are special (look down)
+ r120, r121, r122, r123, r124, r125, r126, r127,
+ r0, r1, r2, r5, r12, r13, r22, rp, AR_PFS]>
+ {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GRClass::iterator
+ GRClass::allocation_order_begin(const MachineFunction &MF) const {
+ // hide the 8 out? registers appropriately:
+ return begin()+(8-(MF.getInfo<IA64FunctionInfo>()->outRegsUsed));
+ }
+
+ GRClass::iterator
+ GRClass::allocation_order_end(const MachineFunction &MF) const {
+ // the 9 special registers r0,r1,r2,r5,r12,r13 etc
+ int numReservedRegs=9;
+
+ // we also can't allocate registers for use as locals if they're already
+ // required as 'out' registers
+ numReservedRegs+=MF.getInfo<IA64FunctionInfo>()->outRegsUsed;
+ return end()-numReservedRegs; // hide registers appropriately
+ }
+ }];
+}
+
+
+// these are the scratch (+stacked) FP registers
+
+def FP : RegisterClass<"IA64", [f64], 64,
+ [F6, F7,
+ F8, F9, F10, F11, F12, F13, F14, F15,
+ F32, F33, F34, F35, F36, F37, F38, F39,
+ F40, F41, F42, F43, F44, F45, F46, F47,
+ F48, F49, F50, F51, F52, F53, F54, F55,
+ F56, F57, F58, F59, F60, F61, F62, F63,
+ F64, F65, F66, F67, F68, F69, F70, F71,
+ F72, F73, F74, F75, F76, F77, F78, F79,
+ F80, F81, F82, F83, F84, F85, F86, F87,
+ F88, F89, F90, F91, F92, F93, F94, F95,
+ F96, F97, F98, F99, F100, F101, F102, F103,
+ F104, F105, F106, F107, F108, F109, F110, F111,
+ F112, F113, F114, F115, F116, F117, F118, F119,
+ F120, F121, F122, F123, F124, F125, F126, F127,
+ F0, F1]> // these last two are hidden
+ {
+// the 128s here are to make stf.spill/ldf.fill happy,
+// when storing full (82-bit) FP regs to stack slots
+// we need to 16-byte align
+ let Size=128;
+ let Alignment=128;
+
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ FPClass::iterator
+ FPClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin(); // we don't hide any FP regs from the start
+ }
+
+ FPClass::iterator
+ FPClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-2; // we hide regs F0, F1 from the end
+ }
+ }];
+}
+
+// these are the predicate registers, p0 (1/TRUE) is not here
+def PR : RegisterClass<"IA64", [i1], 64,
+
+// for now, let's be wimps and only have the scratch predicate regs
+ [p6, p7, p8, p9, p10, p11, p12, p13, p14, p15]> {
+ let Size = 64;
+ }
+
+/*
+ [p1, p2, p3, p4, p5, p6, p7,
+ p8, p9, p10, p11, p12, p13, p14, p15,
+ p16, p17, p18, p19, p20, p21, p22, p23,
+ p24, p25, p26, p27, p28, p29, p30, p31,
+ p32, p33, p34, p35, p36, p37, p38, p39,
+ p40, p41, p42, p43, p44, p45, p46, p47,
+ p48, p49, p50, p51, p52, p53, p54, p55,
+ p56, p57, p58, p59, p60, p61, p62, p63]>;
+ */
diff --git a/lib/Target/IA64/IA64Subtarget.cpp b/lib/Target/IA64/IA64Subtarget.cpp
new file mode 100644
index 0000000..4eca50b
--- /dev/null
+++ b/lib/Target/IA64/IA64Subtarget.cpp
@@ -0,0 +1,18 @@
+//===-- IA64Subtarget.cpp - IA64 Subtarget Information ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IA64 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "subtarget"
+#include "IA64Subtarget.h"
+using namespace llvm;
+
+IA64Subtarget::IA64Subtarget() {}
diff --git a/lib/Target/IA64/IA64Subtarget.h b/lib/Target/IA64/IA64Subtarget.h
new file mode 100644
index 0000000..0387af5
--- /dev/null
+++ b/lib/Target/IA64/IA64Subtarget.h
@@ -0,0 +1,28 @@
+//====---- IA64Subtarget.h - Define Subtarget for the IA64 -----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the IA64 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef IA64SUBTARGET_H
+#define IA64SUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+
+namespace llvm {
+
+class IA64Subtarget : public TargetSubtarget {
+public:
+ IA64Subtarget();
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/IA64/IA64TargetAsmInfo.cpp b/lib/Target/IA64/IA64TargetAsmInfo.cpp
new file mode 100644
index 0000000..2ae8beb
--- /dev/null
+++ b/lib/Target/IA64/IA64TargetAsmInfo.cpp
@@ -0,0 +1,44 @@
+//===-- IA64TargetAsmInfo.cpp - IA64 asm properties -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the IA64TargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IA64TargetAsmInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+IA64TargetAsmInfo::IA64TargetAsmInfo(const TargetMachine &TM):
+ ELFTargetAsmInfo(TM) {
+ CommentString = "//";
+ Data8bitsDirective = "\tdata1\t"; // FIXME: check that we are
+ Data16bitsDirective = "\tdata2.ua\t"; // disabling auto-alignment
+ Data32bitsDirective = "\tdata4.ua\t"; // properly
+ Data64bitsDirective = "\tdata8.ua\t";
+ ZeroDirective = "\t.skip\t";
+ AsciiDirective = "\tstring\t";
+
+ GlobalVarAddrPrefix="";
+ GlobalVarAddrSuffix="";
+ FunctionAddrPrefix="@fptr(";
+ FunctionAddrSuffix=")";
+
+ // FIXME: would be nice to have rodata (no 'w') when appropriate?
+ ConstantPoolSection = "\n\t.section .data, \"aw\", \"progbits\"\n";
+}
+
+unsigned IA64TargetAsmInfo::RelocBehaviour() const {
+ return (TM.getRelocationModel() != Reloc::Static ?
+ Reloc::LocalOrGlobal : Reloc::Global);
+}
+
+// FIXME: Support small data/bss/rodata sections someday.
diff --git a/lib/Target/IA64/IA64TargetAsmInfo.h b/lib/Target/IA64/IA64TargetAsmInfo.h
new file mode 100644
index 0000000..130822e
--- /dev/null
+++ b/lib/Target/IA64/IA64TargetAsmInfo.h
@@ -0,0 +1,33 @@
+//=====-- IA64TargetAsmInfo.h - IA64 asm properties -----------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the IA64TargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef IA64TARGETASMINFO_H
+#define IA64TARGETASMINFO_H
+
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/ELFTargetAsmInfo.h"
+
+namespace llvm {
+
+ // Forward declaration.
+ class TargetMachine;
+
+ struct IA64TargetAsmInfo : public ELFTargetAsmInfo {
+ explicit IA64TargetAsmInfo(const TargetMachine &TM);
+ virtual unsigned RelocBehaviour() const;
+ };
+
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/IA64/IA64TargetMachine.cpp b/lib/Target/IA64/IA64TargetMachine.cpp
new file mode 100644
index 0000000..878a00a
--- /dev/null
+++ b/lib/Target/IA64/IA64TargetMachine.cpp
@@ -0,0 +1,94 @@
+//===-- IA64TargetMachine.cpp - Define TargetMachine for IA64 -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IA64 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IA64TargetAsmInfo.h"
+#include "IA64TargetMachine.h"
+#include "IA64.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+using namespace llvm;
+
+/// IA64TargetMachineModule - Note that this is used on hosts that cannot link
+/// in a library unless there are references into the library. In particular,
+/// it seems that it is not possible to get things to work on Win32 without
+/// this. Though it is unused, do not remove it.
+extern "C" int IA64TargetMachineModule;
+int IA64TargetMachineModule = 0;
+
+static RegisterTarget<IA64TargetMachine> X("ia64",
+ "IA-64 (Itanium) [experimental]");
+
+const TargetAsmInfo *IA64TargetMachine::createTargetAsmInfo() const {
+ return new IA64TargetAsmInfo(*this);
+}
+
+unsigned IA64TargetMachine::getModuleMatchQuality(const Module &M) {
+ // we match [iI][aA]*64
+ bool seenIA64=false;
+ std::string TT = M.getTargetTriple();
+
+ if (TT.size() >= 4) {
+ if( (TT[0]=='i' || TT[0]=='I') &&
+ (TT[1]=='a' || TT[1]=='A') ) {
+ for(unsigned int i=2; i<(TT.size()-1); i++)
+ if(TT[i]=='6' && TT[i+1]=='4')
+ seenIA64=true;
+ }
+
+ if (seenIA64)
+ return 20; // strong match
+ }
+ // If the target triple is something non-ia64, we don't match.
+ if (!TT.empty()) return 0;
+
+#if defined(__ia64__) || defined(__IA64__)
+ return 5;
+#else
+ return 0;
+#endif
+}
+
+/// IA64TargetMachine ctor - Create an LP64 architecture model
+///
+IA64TargetMachine::IA64TargetMachine(const Module &M, const std::string &FS)
+ : DataLayout("e-f80:128:128"),
+ FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0),
+ TLInfo(*this) { // FIXME? check this stuff
+}
+
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool IA64TargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel){
+ PM.add(createIA64DAGToDAGInstructionSelector(*this));
+ return false;
+}
+
+bool IA64TargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Make sure everything is bundled happily
+ PM.add(createIA64BundlingPass(*this));
+ return true;
+}
+bool IA64TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose,
+ raw_ostream &Out) {
+ PM.add(createIA64CodePrinterPass(Out, *this, OptLevel, Verbose));
+ return false;
+}
+
diff --git a/lib/Target/IA64/IA64TargetMachine.h b/lib/Target/IA64/IA64TargetMachine.h
new file mode 100644
index 0000000..29d625c
--- /dev/null
+++ b/lib/Target/IA64/IA64TargetMachine.h
@@ -0,0 +1,64 @@
+//===-- IA64TargetMachine.h - Define TargetMachine for IA64 ---*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the IA64 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_IA64TARGETMACHINE_H
+#define LLVM_TARGET_IA64TARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "IA64InstrInfo.h"
+#include "IA64ISelLowering.h"
+#include "IA64Subtarget.h"
+
+namespace llvm {
+
+class IA64TargetMachine : public LLVMTargetMachine {
+ IA64Subtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ IA64InstrInfo InstrInfo;
+ TargetFrameInfo FrameInfo;
+ //IA64JITInfo JITInfo;
+ IA64TargetLowering TLInfo;
+
+protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+public:
+ IA64TargetMachine(const Module &M, const std::string &FS);
+
+ virtual const IA64InstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual const IA64Subtarget *getSubtargetImpl() const { return &Subtarget; }
+ virtual IA64TargetLowering *getTargetLowering() const {
+ return const_cast<IA64TargetLowering*>(&TLInfo);
+ }
+ virtual const IA64RegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+
+ static unsigned getModuleMatchQuality(const Module &M);
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out);
+};
+} // End llvm namespace
+
+#endif
+
+
diff --git a/lib/Target/IA64/Makefile b/lib/Target/IA64/Makefile
new file mode 100644
index 0000000..d383254
--- /dev/null
+++ b/lib/Target/IA64/Makefile
@@ -0,0 +1,20 @@
+##===- lib/Target/IA64/Makefile -----------------------------*- Makefile -*-===##
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMIA64CodeGen
+TARGET = IA64
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = IA64GenRegisterInfo.h.inc IA64GenRegisterNames.inc \
+ IA64GenRegisterInfo.inc IA64GenInstrNames.inc \
+ IA64GenInstrInfo.inc IA64GenAsmWriter.inc \
+ IA64GenDAGISel.inc
+
+DIRS = AsmPrinter
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Target/IA64/README b/lib/Target/IA64/README
new file mode 100644
index 0000000..60761ac
--- /dev/null
+++ b/lib/Target/IA64/README
@@ -0,0 +1,48 @@
+TODO:
+ - Un-bitrot ISel
+ - Hook up If-Conversion a la ARM target
+ - Hook up all branch analysis functions
+ - Instruction scheduling
+ - Bundling
+ - Dynamic Optimization
+ - Testing and bugfixing
+ - stop passing FP args in both FP *and* integer regs when not required
+ - allocate low (nonstacked) registers more aggressively
+ - clean up and thoroughly test the isel patterns.
+ - fix stacked register allocation order: (for readability) we don't want
+ the out? registers being the first ones used
+ - fix up floating point
+ (nb http://gcc.gnu.org/wiki?pagename=ia64%20floating%20point )
+ - bundling!
+ (we will avoid the mess that is:
+ http://gcc.gnu.org/ml/gcc/2003-12/msg00832.html )
+ - instruction scheduling (hmmmm! ;)
+ - counted loop support
+ - make integer + FP mul/div more clever (we have fixed pseudocode atm)
+ - track and use comparison complements
+
+INFO:
+ - we are strictly LP64 here, no support for ILP32 on HP-UX. Linux users
+ don't need to worry about this.
+ - i have instruction scheduling/bundling pseudocode, that really works
+ (has been tested, albeit at the perl-script level).
+ so, before you go write your own, send me an email!
+
+KNOWN DEFECTS AT THE CURRENT TIME:
+ - C++ vtables contain naked function pointers, not function descriptors,
+ which is bad. see http://llvm.cs.uiuc.edu/bugs/show_bug.cgi?id=406
+ - varargs are broken
+ - alloca doesn't work (indeed, stack frame layout is bogus)
+ - no support for big-endian environments
+ - (not really the backend, but...) the CFE has some issues on IA64.
+ these will probably be fixed soon.
+
+ACKNOWLEDGEMENTS:
+ - Chris Lattner (x100)
+ - Other LLVM developers ("hey, that looks familiar")
+
+CONTACT:
+ - You can email me at duraid@octopus.com.au. If you find a small bug,
+ just email me. If you find a big bug, please file a bug report
+ in bugzilla! http://llvm.cs.uiuc.edu is your one stop shop for all
+ things LLVM.
diff --git a/lib/Target/MSIL/CMakeLists.txt b/lib/Target/MSIL/CMakeLists.txt
new file mode 100644
index 0000000..b1d47ef
--- /dev/null
+++ b/lib/Target/MSIL/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_target(MSIL
+ MSILWriter.cpp
+ )
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
new file mode 100644
index 0000000..ada851d
--- /dev/null
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -0,0 +1,1680 @@
+//===-- MSILWriter.cpp - Library for converting LLVM code to MSIL ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library converts LLVM code to MSIL code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSILWriter.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/Analysis/ConstantsScanner.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/Passes.h"
+
+namespace {
+ // TargetMachine for the MSIL
+ struct VISIBILITY_HIDDEN MSILTarget : public TargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+
+ MSILTarget(const Module &M, const std::string &FS)
+ : DataLayout(&M) {}
+
+ virtual bool WantsWholeFile() const { return true; }
+ virtual bool addPassesToEmitWholeFile(PassManager &PM, raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel);
+
+ // This class always works, but shouldn't be the default in most cases.
+ static unsigned getModuleMatchQuality(const Module &M) { return 1; }
+
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ };
+}
+
+/// MSILTargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int MSILTargetMachineModule;
+int MSILTargetMachineModule = 0;
+
+static RegisterTarget<MSILTarget> X("msil", "MSIL backend");
+
+bool MSILModule::runOnModule(Module &M) {
+ ModulePtr = &M;
+ TD = &getAnalysis<TargetData>();
+ bool Changed = false;
+ // Find named types.
+ TypeSymbolTable& Table = M.getTypeSymbolTable();
+ std::set<const Type *> Types = getAnalysis<FindUsedTypes>().getTypes();
+ for (TypeSymbolTable::iterator I = Table.begin(), E = Table.end(); I!=E; ) {
+ if (!isa<StructType>(I->second) && !isa<OpaqueType>(I->second))
+ Table.remove(I++);
+ else {
+ std::set<const Type *>::iterator T = Types.find(I->second);
+ if (T==Types.end())
+ Table.remove(I++);
+ else {
+ Types.erase(T);
+ ++I;
+ }
+ }
+ }
+ // Find unnamed types.
+ unsigned RenameCounter = 0;
+ for (std::set<const Type *>::const_iterator I = Types.begin(),
+ E = Types.end(); I!=E; ++I)
+ if (const StructType *STy = dyn_cast<StructType>(*I)) {
+ while (ModulePtr->addTypeName("unnamed$"+utostr(RenameCounter), STy))
+ ++RenameCounter;
+ Changed = true;
+ }
+ // Pointer for FunctionPass.
+ UsedTypes = &getAnalysis<FindUsedTypes>().getTypes();
+ return Changed;
+}
+
+char MSILModule::ID = 0;
+char MSILWriter::ID = 0;
+
+bool MSILWriter::runOnFunction(Function &F) {
+ if (F.isDeclaration()) return false;
+
+ // Do not codegen any 'available_externally' functions at all, they have
+ // definitions outside the translation unit.
+ if (F.hasAvailableExternallyLinkage())
+ return false;
+
+ LInfo = &getAnalysis<LoopInfo>();
+ printFunction(F);
+ return false;
+}
+
+
+bool MSILWriter::doInitialization(Module &M) {
+ ModulePtr = &M;
+ Mang = new Mangler(M);
+ Out << ".assembly extern mscorlib {}\n";
+ Out << ".assembly MSIL {}\n\n";
+ Out << "// External\n";
+ printExternals();
+ Out << "// Declarations\n";
+ printDeclarations(M.getTypeSymbolTable());
+ Out << "// Definitions\n";
+ printGlobalVariables();
+ Out << "// Startup code\n";
+ printModuleStartup();
+ return false;
+}
+
+
+bool MSILWriter::doFinalization(Module &M) {
+ delete Mang;
+ return false;
+}
+
+
+void MSILWriter::printModuleStartup() {
+ Out <<
+ ".method static public int32 $MSIL_Startup() {\n"
+ "\t.entrypoint\n"
+ "\t.locals (native int i)\n"
+ "\t.locals (native int argc)\n"
+ "\t.locals (native int ptr)\n"
+ "\t.locals (void* argv)\n"
+ "\t.locals (string[] args)\n"
+ "\tcall\tstring[] [mscorlib]System.Environment::GetCommandLineArgs()\n"
+ "\tdup\n"
+ "\tstloc\targs\n"
+ "\tldlen\n"
+ "\tconv.i4\n"
+ "\tdup\n"
+ "\tstloc\targc\n";
+ printPtrLoad(TD->getPointerSize());
+ Out <<
+ "\tmul\n"
+ "\tlocalloc\n"
+ "\tstloc\targv\n"
+ "\tldc.i4.0\n"
+ "\tstloc\ti\n"
+ "L_01:\n"
+ "\tldloc\ti\n"
+ "\tldloc\targc\n"
+ "\tceq\n"
+ "\tbrtrue\tL_02\n"
+ "\tldloc\targs\n"
+ "\tldloc\ti\n"
+ "\tldelem.ref\n"
+ "\tcall\tnative int [mscorlib]System.Runtime.InteropServices.Marshal::"
+ "StringToHGlobalAnsi(string)\n"
+ "\tstloc\tptr\n"
+ "\tldloc\targv\n"
+ "\tldloc\ti\n";
+ printPtrLoad(TD->getPointerSize());
+ Out <<
+ "\tmul\n"
+ "\tadd\n"
+ "\tldloc\tptr\n"
+ "\tstind.i\n"
+ "\tldloc\ti\n"
+ "\tldc.i4.1\n"
+ "\tadd\n"
+ "\tstloc\ti\n"
+ "\tbr\tL_01\n"
+ "L_02:\n"
+ "\tcall void $MSIL_Init()\n";
+
+ // Call user 'main' function.
+ const Function* F = ModulePtr->getFunction("main");
+ if (!F || F->isDeclaration()) {
+ Out << "\tldc.i4.0\n\tret\n}\n";
+ return;
+ }
+ bool BadSig = true;
+ std::string Args("");
+ Function::const_arg_iterator Arg1,Arg2;
+
+ switch (F->arg_size()) {
+ case 0:
+ BadSig = false;
+ break;
+ case 1:
+ Arg1 = F->arg_begin();
+ if (Arg1->getType()->isInteger()) {
+ Out << "\tldloc\targc\n";
+ Args = getTypeName(Arg1->getType());
+ BadSig = false;
+ }
+ break;
+ case 2:
+ Arg1 = Arg2 = F->arg_begin(); ++Arg2;
+ if (Arg1->getType()->isInteger() &&
+ Arg2->getType()->getTypeID() == Type::PointerTyID) {
+ Out << "\tldloc\targc\n\tldloc\targv\n";
+ Args = getTypeName(Arg1->getType())+","+getTypeName(Arg2->getType());
+ BadSig = false;
+ }
+ break;
+ default:
+ BadSig = true;
+ }
+
+ bool RetVoid = (F->getReturnType()->getTypeID() == Type::VoidTyID);
+ if (BadSig || (!F->getReturnType()->isInteger() && !RetVoid)) {
+ Out << "\tldc.i4.0\n";
+ } else {
+ Out << "\tcall\t" << getTypeName(F->getReturnType()) <<
+ getConvModopt(F->getCallingConv()) << "main(" << Args << ")\n";
+ if (RetVoid)
+ Out << "\tldc.i4.0\n";
+ else
+ Out << "\tconv.i4\n";
+ }
+ Out << "\tret\n}\n";
+}
+
+bool MSILWriter::isZeroValue(const Value* V) {
+ if (const Constant *C = dyn_cast<Constant>(V))
+ return C->isNullValue();
+ return false;
+}
+
+
+std::string MSILWriter::getValueName(const Value* V) {
+ // Name into the quotes allow control and space characters.
+ return "'"+Mang->getValueName(V)+"'";
+}
+
+
+std::string MSILWriter::getLabelName(const std::string& Name) {
+ if (Name.find('.')!=std::string::npos) {
+ std::string Tmp(Name);
+ // Replace unaccepable characters in the label name.
+ for (std::string::iterator I = Tmp.begin(), E = Tmp.end(); I!=E; ++I)
+ if (*I=='.') *I = '@';
+ return Tmp;
+ }
+ return Name;
+}
+
+
+std::string MSILWriter::getLabelName(const Value* V) {
+ return getLabelName(Mang->getValueName(V));
+}
+
+
+std::string MSILWriter::getConvModopt(unsigned CallingConvID) {
+ switch (CallingConvID) {
+ case CallingConv::C:
+ case CallingConv::Cold:
+ case CallingConv::Fast:
+ return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvCdecl) ";
+ case CallingConv::X86_FastCall:
+ return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvFastcall) ";
+ case CallingConv::X86_StdCall:
+ return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvStdcall) ";
+ default:
+ cerr << "CallingConvID = " << CallingConvID << '\n';
+ assert(0 && "Unsupported calling convention");
+ }
+ return ""; // Not reached
+}
+
+
+std::string MSILWriter::getArrayTypeName(Type::TypeID TyID, const Type* Ty) {
+ std::string Tmp = "";
+ const Type* ElemTy = Ty;
+ assert(Ty->getTypeID()==TyID && "Invalid type passed");
+ // Walk trought array element types.
+ for (;;) {
+ // Multidimensional array.
+ if (ElemTy->getTypeID()==TyID) {
+ if (const ArrayType* ATy = dyn_cast<ArrayType>(ElemTy))
+ Tmp += utostr(ATy->getNumElements());
+ else if (const VectorType* VTy = dyn_cast<VectorType>(ElemTy))
+ Tmp += utostr(VTy->getNumElements());
+ ElemTy = cast<SequentialType>(ElemTy)->getElementType();
+ }
+ // Base element type found.
+ if (ElemTy->getTypeID()!=TyID) break;
+ Tmp += ",";
+ }
+ return getTypeName(ElemTy, false, true)+"["+Tmp+"]";
+}
+
+
+std::string MSILWriter::getPrimitiveTypeName(const Type* Ty, bool isSigned) {
+ unsigned NumBits = 0;
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID:
+ return "void ";
+ case Type::IntegerTyID:
+ NumBits = getBitWidth(Ty);
+ if(NumBits==1)
+ return "bool ";
+ if (!isSigned)
+ return "unsigned int"+utostr(NumBits)+" ";
+ return "int"+utostr(NumBits)+" ";
+ case Type::FloatTyID:
+ return "float32 ";
+ case Type::DoubleTyID:
+ return "float64 ";
+ default:
+ cerr << "Type = " << *Ty << '\n';
+ assert(0 && "Invalid primitive type");
+ }
+ return ""; // Not reached
+}
+
+
+std::string MSILWriter::getTypeName(const Type* Ty, bool isSigned,
+ bool isNested) {
+ if (Ty->isPrimitiveType() || Ty->isInteger())
+ return getPrimitiveTypeName(Ty,isSigned);
+ // FIXME: "OpaqueType" support
+ switch (Ty->getTypeID()) {
+ case Type::PointerTyID:
+ return "void* ";
+ case Type::StructTyID:
+ if (isNested)
+ return ModulePtr->getTypeName(Ty);
+ return "valuetype '"+ModulePtr->getTypeName(Ty)+"' ";
+ case Type::ArrayTyID:
+ if (isNested)
+ return getArrayTypeName(Ty->getTypeID(),Ty);
+ return "valuetype '"+getArrayTypeName(Ty->getTypeID(),Ty)+"' ";
+ case Type::VectorTyID:
+ if (isNested)
+ return getArrayTypeName(Ty->getTypeID(),Ty);
+ return "valuetype '"+getArrayTypeName(Ty->getTypeID(),Ty)+"' ";
+ default:
+ cerr << "Type = " << *Ty << '\n';
+ assert(0 && "Invalid type in getTypeName()");
+ }
+ return ""; // Not reached
+}
+
+
+MSILWriter::ValueType MSILWriter::getValueLocation(const Value* V) {
+ // Function argument
+ if (isa<Argument>(V))
+ return ArgumentVT;
+ // Function
+ else if (const Function* F = dyn_cast<Function>(V))
+ return F->hasLocalLinkage() ? InternalVT : GlobalVT;
+ // Variable
+ else if (const GlobalVariable* G = dyn_cast<GlobalVariable>(V))
+ return G->hasLocalLinkage() ? InternalVT : GlobalVT;
+ // Constant
+ else if (isa<Constant>(V))
+ return isa<ConstantExpr>(V) ? ConstExprVT : ConstVT;
+ // Local variable
+ return LocalVT;
+}
+
+
+std::string MSILWriter::getTypePostfix(const Type* Ty, bool Expand,
+ bool isSigned) {
+ unsigned NumBits = 0;
+ switch (Ty->getTypeID()) {
+ // Integer constant, expanding for stack operations.
+ case Type::IntegerTyID:
+ NumBits = getBitWidth(Ty);
+ // Expand integer value to "int32" or "int64".
+ if (Expand) return (NumBits<=32 ? "i4" : "i8");
+ if (NumBits==1) return "i1";
+ return (isSigned ? "i" : "u")+utostr(NumBits/8);
+ // Float constant.
+ case Type::FloatTyID:
+ return "r4";
+ case Type::DoubleTyID:
+ return "r8";
+ case Type::PointerTyID:
+ return "i"+utostr(TD->getTypeAllocSize(Ty));
+ default:
+ cerr << "TypeID = " << Ty->getTypeID() << '\n';
+ assert(0 && "Invalid type in TypeToPostfix()");
+ }
+ return ""; // Not reached
+}
+
+
+void MSILWriter::printConvToPtr() {
+ switch (ModulePtr->getPointerSize()) {
+ case Module::Pointer32:
+ printSimpleInstruction("conv.u4");
+ break;
+ case Module::Pointer64:
+ printSimpleInstruction("conv.u8");
+ break;
+ default:
+ assert(0 && "Module use not supporting pointer size");
+ }
+}
+
+
+void MSILWriter::printPtrLoad(uint64_t N) {
+ switch (ModulePtr->getPointerSize()) {
+ case Module::Pointer32:
+ printSimpleInstruction("ldc.i4",utostr(N).c_str());
+ // FIXME: Need overflow test?
+ if (!isUInt32(N)) {
+ cerr << "Value = " << utostr(N) << '\n';
+ assert(0 && "32-bit pointer overflowed");
+ }
+ break;
+ case Module::Pointer64:
+ printSimpleInstruction("ldc.i8",utostr(N).c_str());
+ break;
+ default:
+ assert(0 && "Module use not supporting pointer size");
+ }
+}
+
+
+void MSILWriter::printValuePtrLoad(const Value* V) {
+ printValueLoad(V);
+ printConvToPtr();
+}
+
+
+void MSILWriter::printConstLoad(const Constant* C) {
+ if (const ConstantInt* CInt = dyn_cast<ConstantInt>(C)) {
+ // Integer constant
+ Out << "\tldc." << getTypePostfix(C->getType(),true) << '\t';
+ if (CInt->isMinValue(true))
+ Out << CInt->getSExtValue();
+ else
+ Out << CInt->getZExtValue();
+ } else if (const ConstantFP* FP = dyn_cast<ConstantFP>(C)) {
+ // Float constant
+ uint64_t X;
+ unsigned Size;
+ if (FP->getType()->getTypeID()==Type::FloatTyID) {
+ X = (uint32_t)FP->getValueAPF().bitcastToAPInt().getZExtValue();
+ Size = 4;
+ } else {
+ X = FP->getValueAPF().bitcastToAPInt().getZExtValue();
+ Size = 8;
+ }
+ Out << "\tldc.r" << Size << "\t( " << utohexstr(X) << ')';
+ } else if (isa<UndefValue>(C)) {
+ // Undefined constant value = NULL.
+ printPtrLoad(0);
+ } else {
+ cerr << "Constant = " << *C << '\n';
+ assert(0 && "Invalid constant value");
+ }
+ Out << '\n';
+}
+
+
+void MSILWriter::printValueLoad(const Value* V) {
+ MSILWriter::ValueType Location = getValueLocation(V);
+ switch (Location) {
+ // Global variable or function address.
+ case GlobalVT:
+ case InternalVT:
+ if (const Function* F = dyn_cast<Function>(V)) {
+ std::string Name = getConvModopt(F->getCallingConv())+getValueName(F);
+ printSimpleInstruction("ldftn",
+ getCallSignature(F->getFunctionType(),NULL,Name).c_str());
+ } else {
+ std::string Tmp;
+ const Type* ElemTy = cast<PointerType>(V->getType())->getElementType();
+ if (Location==GlobalVT && cast<GlobalVariable>(V)->hasDLLImportLinkage()) {
+ Tmp = "void* "+getValueName(V);
+ printSimpleInstruction("ldsfld",Tmp.c_str());
+ } else {
+ Tmp = getTypeName(ElemTy)+getValueName(V);
+ printSimpleInstruction("ldsflda",Tmp.c_str());
+ }
+ }
+ break;
+ // Function argument.
+ case ArgumentVT:
+ printSimpleInstruction("ldarg",getValueName(V).c_str());
+ break;
+ // Local function variable.
+ case LocalVT:
+ printSimpleInstruction("ldloc",getValueName(V).c_str());
+ break;
+ // Constant value.
+ case ConstVT:
+ if (isa<ConstantPointerNull>(V))
+ printPtrLoad(0);
+ else
+ printConstLoad(cast<Constant>(V));
+ break;
+ // Constant expression.
+ case ConstExprVT:
+ printConstantExpr(cast<ConstantExpr>(V));
+ break;
+ default:
+ cerr << "Value = " << *V << '\n';
+ assert(0 && "Invalid value location");
+ }
+}
+
+
+void MSILWriter::printValueSave(const Value* V) {
+ switch (getValueLocation(V)) {
+ case ArgumentVT:
+ printSimpleInstruction("starg",getValueName(V).c_str());
+ break;
+ case LocalVT:
+ printSimpleInstruction("stloc",getValueName(V).c_str());
+ break;
+ default:
+ cerr << "Value = " << *V << '\n';
+ assert(0 && "Invalid value location");
+ }
+}
+
+
+void MSILWriter::printBinaryInstruction(const char* Name, const Value* Left,
+ const Value* Right) {
+ printValueLoad(Left);
+ printValueLoad(Right);
+ Out << '\t' << Name << '\n';
+}
+
+
+void MSILWriter::printSimpleInstruction(const char* Inst, const char* Operand) {
+ if(Operand)
+ Out << '\t' << Inst << '\t' << Operand << '\n';
+ else
+ Out << '\t' << Inst << '\n';
+}
+
+
+void MSILWriter::printPHICopy(const BasicBlock* Src, const BasicBlock* Dst) {
+ for (BasicBlock::const_iterator I = Dst->begin(), E = Dst->end();
+ isa<PHINode>(I); ++I) {
+ const PHINode* Phi = cast<PHINode>(I);
+ const Value* Val = Phi->getIncomingValueForBlock(Src);
+ if (isa<UndefValue>(Val)) continue;
+ printValueLoad(Val);
+ printValueSave(Phi);
+ }
+}
+
+
+void MSILWriter::printBranchToBlock(const BasicBlock* CurrBB,
+ const BasicBlock* TrueBB,
+ const BasicBlock* FalseBB) {
+ if (TrueBB==FalseBB) {
+ // "TrueBB" and "FalseBB" destination equals
+ printPHICopy(CurrBB,TrueBB);
+ printSimpleInstruction("pop");
+ printSimpleInstruction("br",getLabelName(TrueBB).c_str());
+ } else if (FalseBB==NULL) {
+ // If "FalseBB" not used the jump have condition
+ printPHICopy(CurrBB,TrueBB);
+ printSimpleInstruction("brtrue",getLabelName(TrueBB).c_str());
+ } else if (TrueBB==NULL) {
+ // If "TrueBB" not used the jump is unconditional
+ printPHICopy(CurrBB,FalseBB);
+ printSimpleInstruction("br",getLabelName(FalseBB).c_str());
+ } else {
+ // Copy PHI instructions for each block
+ std::string TmpLabel;
+ // Print PHI instructions for "TrueBB"
+ if (isa<PHINode>(TrueBB->begin())) {
+ TmpLabel = getLabelName(TrueBB)+"$phi_"+utostr(getUniqID());
+ printSimpleInstruction("brtrue",TmpLabel.c_str());
+ } else {
+ printSimpleInstruction("brtrue",getLabelName(TrueBB).c_str());
+ }
+ // Print PHI instructions for "FalseBB"
+ if (isa<PHINode>(FalseBB->begin())) {
+ printPHICopy(CurrBB,FalseBB);
+ printSimpleInstruction("br",getLabelName(FalseBB).c_str());
+ } else {
+ printSimpleInstruction("br",getLabelName(FalseBB).c_str());
+ }
+ if (isa<PHINode>(TrueBB->begin())) {
+ // Handle "TrueBB" PHI Copy
+ Out << TmpLabel << ":\n";
+ printPHICopy(CurrBB,TrueBB);
+ printSimpleInstruction("br",getLabelName(TrueBB).c_str());
+ }
+ }
+}
+
+
+void MSILWriter::printBranchInstruction(const BranchInst* Inst) {
+ if (Inst->isUnconditional()) {
+ printBranchToBlock(Inst->getParent(),NULL,Inst->getSuccessor(0));
+ } else {
+ printValueLoad(Inst->getCondition());
+ printBranchToBlock(Inst->getParent(),Inst->getSuccessor(0),
+ Inst->getSuccessor(1));
+ }
+}
+
+
+void MSILWriter::printSelectInstruction(const Value* Cond, const Value* VTrue,
+ const Value* VFalse) {
+ std::string TmpLabel = std::string("select$true_")+utostr(getUniqID());
+ printValueLoad(VTrue);
+ printValueLoad(Cond);
+ printSimpleInstruction("brtrue",TmpLabel.c_str());
+ printSimpleInstruction("pop");
+ printValueLoad(VFalse);
+ Out << TmpLabel << ":\n";
+}
+
+
+void MSILWriter::printIndirectLoad(const Value* V) {
+ const Type* Ty = V->getType();
+ printValueLoad(V);
+ if (const PointerType* P = dyn_cast<PointerType>(Ty))
+ Ty = P->getElementType();
+ std::string Tmp = "ldind."+getTypePostfix(Ty, false);
+ printSimpleInstruction(Tmp.c_str());
+}
+
+
+void MSILWriter::printIndirectSave(const Value* Ptr, const Value* Val) {
+ printValueLoad(Ptr);
+ printValueLoad(Val);
+ printIndirectSave(Val->getType());
+}
+
+
+void MSILWriter::printIndirectSave(const Type* Ty) {
+ // Instruction need signed postfix for any type.
+ std::string postfix = getTypePostfix(Ty, false);
+ if (*postfix.begin()=='u') *postfix.begin() = 'i';
+ postfix = "stind."+postfix;
+ printSimpleInstruction(postfix.c_str());
+}
+
+
+void MSILWriter::printCastInstruction(unsigned int Op, const Value* V,
+ const Type* Ty) {
+ std::string Tmp("");
+ printValueLoad(V);
+ switch (Op) {
+ // Signed
+ case Instruction::SExt:
+ case Instruction::SIToFP:
+ case Instruction::FPToSI:
+ Tmp = "conv."+getTypePostfix(Ty,false,true);
+ printSimpleInstruction(Tmp.c_str());
+ break;
+ // Unsigned
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::FPToUI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ Tmp = "conv."+getTypePostfix(Ty,false);
+ printSimpleInstruction(Tmp.c_str());
+ break;
+ // Do nothing
+ case Instruction::BitCast:
+ // FIXME: meaning that ld*/st* instruction do not change data format.
+ break;
+ default:
+ cerr << "Opcode = " << Op << '\n';
+ assert(0 && "Invalid conversion instruction");
+ }
+}
+
+
+void MSILWriter::printGepInstruction(const Value* V, gep_type_iterator I,
+ gep_type_iterator E) {
+ unsigned Size;
+ // Load address
+ printValuePtrLoad(V);
+ // Calculate element offset.
+ for (; I!=E; ++I){
+ Size = 0;
+ const Value* IndexValue = I.getOperand();
+ if (const StructType* StrucTy = dyn_cast<StructType>(*I)) {
+ uint64_t FieldIndex = cast<ConstantInt>(IndexValue)->getZExtValue();
+ // Offset is the sum of all previous structure fields.
+ for (uint64_t F = 0; F<FieldIndex; ++F)
+ Size += TD->getTypeAllocSize(StrucTy->getContainedType((unsigned)F));
+ printPtrLoad(Size);
+ printSimpleInstruction("add");
+ continue;
+ } else if (const SequentialType* SeqTy = dyn_cast<SequentialType>(*I)) {
+ Size = TD->getTypeAllocSize(SeqTy->getElementType());
+ } else {
+ Size = TD->getTypeAllocSize(*I);
+ }
+ // Add offset of current element to stack top.
+ if (!isZeroValue(IndexValue)) {
+ // Constant optimization.
+ if (const ConstantInt* C = dyn_cast<ConstantInt>(IndexValue)) {
+ if (C->getValue().isNegative()) {
+ printPtrLoad(C->getValue().abs().getZExtValue()*Size);
+ printSimpleInstruction("sub");
+ continue;
+ } else
+ printPtrLoad(C->getZExtValue()*Size);
+ } else {
+ printPtrLoad(Size);
+ printValuePtrLoad(IndexValue);
+ printSimpleInstruction("mul");
+ }
+ printSimpleInstruction("add");
+ }
+ }
+}
+
+
+std::string MSILWriter::getCallSignature(const FunctionType* Ty,
+ const Instruction* Inst,
+ std::string Name) {
+ std::string Tmp("");
+ if (Ty->isVarArg()) Tmp += "vararg ";
+ // Name and return type.
+ Tmp += getTypeName(Ty->getReturnType())+Name+"(";
+ // Function argument type list.
+ unsigned NumParams = Ty->getNumParams();
+ for (unsigned I = 0; I!=NumParams; ++I) {
+ if (I!=0) Tmp += ",";
+ Tmp += getTypeName(Ty->getParamType(I));
+ }
+ // CLR needs to know the exact amount of parameters received by vararg
+ // function, because caller cleans the stack.
+ if (Ty->isVarArg() && Inst) {
+ // Origin to function arguments in "CallInst" or "InvokeInst".
+ unsigned Org = isa<InvokeInst>(Inst) ? 3 : 1;
+ // Print variable argument types.
+ unsigned NumOperands = Inst->getNumOperands()-Org;
+ if (NumParams<NumOperands) {
+ if (NumParams!=0) Tmp += ", ";
+ Tmp += "... , ";
+ for (unsigned J = NumParams; J!=NumOperands; ++J) {
+ if (J!=NumParams) Tmp += ", ";
+ Tmp += getTypeName(Inst->getOperand(J+Org)->getType());
+ }
+ }
+ }
+ return Tmp+")";
+}
+
+
+void MSILWriter::printFunctionCall(const Value* FnVal,
+ const Instruction* Inst) {
+ // Get function calling convention.
+ std::string Name = "";
+ if (const CallInst* Call = dyn_cast<CallInst>(Inst))
+ Name = getConvModopt(Call->getCallingConv());
+ else if (const InvokeInst* Invoke = dyn_cast<InvokeInst>(Inst))
+ Name = getConvModopt(Invoke->getCallingConv());
+ else {
+ cerr << "Instruction = " << Inst->getName() << '\n';
+ assert(0 && "Need \"Invoke\" or \"Call\" instruction only");
+ }
+ if (const Function* F = dyn_cast<Function>(FnVal)) {
+ // Direct call.
+ Name += getValueName(F);
+ printSimpleInstruction("call",
+ getCallSignature(F->getFunctionType(),Inst,Name).c_str());
+ } else {
+ // Indirect function call.
+ const PointerType* PTy = cast<PointerType>(FnVal->getType());
+ const FunctionType* FTy = cast<FunctionType>(PTy->getElementType());
+ // Load function address.
+ printValueLoad(FnVal);
+ printSimpleInstruction("calli",getCallSignature(FTy,Inst,Name).c_str());
+ }
+}
+
+
+void MSILWriter::printIntrinsicCall(const IntrinsicInst* Inst) {
+ std::string Name;
+ switch (Inst->getIntrinsicID()) {
+ case Intrinsic::vastart:
+ Name = getValueName(Inst->getOperand(1));
+ Name.insert(Name.length()-1,"$valist");
+ // Obtain the argument handle.
+ printSimpleInstruction("ldloca",Name.c_str());
+ printSimpleInstruction("arglist");
+ printSimpleInstruction("call",
+ "instance void [mscorlib]System.ArgIterator::.ctor"
+ "(valuetype [mscorlib]System.RuntimeArgumentHandle)");
+ // Save as pointer type "void*"
+ printValueLoad(Inst->getOperand(1));
+ printSimpleInstruction("ldloca",Name.c_str());
+ printIndirectSave(PointerType::getUnqual(IntegerType::get(8)));
+ break;
+ case Intrinsic::vaend:
+ // Close argument list handle.
+ printIndirectLoad(Inst->getOperand(1));
+ printSimpleInstruction("call","instance void [mscorlib]System.ArgIterator::End()");
+ break;
+ case Intrinsic::vacopy:
+ // Copy "ArgIterator" valuetype.
+ printIndirectLoad(Inst->getOperand(1));
+ printIndirectLoad(Inst->getOperand(2));
+ printSimpleInstruction("cpobj","[mscorlib]System.ArgIterator");
+ break;
+ default:
+ cerr << "Intrinsic ID = " << Inst->getIntrinsicID() << '\n';
+ assert(0 && "Invalid intrinsic function");
+ }
+}
+
+
+void MSILWriter::printCallInstruction(const Instruction* Inst) {
+ if (isa<IntrinsicInst>(Inst)) {
+ // Handle intrinsic function.
+ printIntrinsicCall(cast<IntrinsicInst>(Inst));
+ } else {
+ // Load arguments to stack and call function.
+ for (int I = 1, E = Inst->getNumOperands(); I!=E; ++I)
+ printValueLoad(Inst->getOperand(I));
+ printFunctionCall(Inst->getOperand(0),Inst);
+ }
+}
+
+
+void MSILWriter::printICmpInstruction(unsigned Predicate, const Value* Left,
+ const Value* Right) {
+ switch (Predicate) {
+ case ICmpInst::ICMP_EQ:
+ printBinaryInstruction("ceq",Left,Right);
+ break;
+ case ICmpInst::ICMP_NE:
+ // Emulate = not neg (Op1 eq Op2)
+ printBinaryInstruction("ceq",Left,Right);
+ printSimpleInstruction("neg");
+ printSimpleInstruction("not");
+ break;
+ case ICmpInst::ICMP_ULE:
+ case ICmpInst::ICMP_SLE:
+ // Emulate = (Op1 eq Op2) or (Op1 lt Op2)
+ printBinaryInstruction("ceq",Left,Right);
+ if (Predicate==ICmpInst::ICMP_ULE)
+ printBinaryInstruction("clt.un",Left,Right);
+ else
+ printBinaryInstruction("clt",Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case ICmpInst::ICMP_UGE:
+ case ICmpInst::ICMP_SGE:
+ // Emulate = (Op1 eq Op2) or (Op1 gt Op2)
+ printBinaryInstruction("ceq",Left,Right);
+ if (Predicate==ICmpInst::ICMP_UGE)
+ printBinaryInstruction("cgt.un",Left,Right);
+ else
+ printBinaryInstruction("cgt",Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case ICmpInst::ICMP_ULT:
+ printBinaryInstruction("clt.un",Left,Right);
+ break;
+ case ICmpInst::ICMP_SLT:
+ printBinaryInstruction("clt",Left,Right);
+ break;
+ case ICmpInst::ICMP_UGT:
+ printBinaryInstruction("cgt.un",Left,Right);
+ case ICmpInst::ICMP_SGT:
+ printBinaryInstruction("cgt",Left,Right);
+ break;
+ default:
+ cerr << "Predicate = " << Predicate << '\n';
+ assert(0 && "Invalid icmp predicate");
+ }
+}
+
+
+void MSILWriter::printFCmpInstruction(unsigned Predicate, const Value* Left,
+ const Value* Right) {
+ // FIXME: Correct comparison
+ std::string NanFunc = "bool [mscorlib]System.Double::IsNaN(float64)";
+ switch (Predicate) {
+ case FCmpInst::FCMP_UGT:
+ // X > Y || llvm_fcmp_uno(X, Y)
+ printBinaryInstruction("cgt",Left,Right);
+ printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_OGT:
+ // X > Y
+ printBinaryInstruction("cgt",Left,Right);
+ break;
+ case FCmpInst::FCMP_UGE:
+ // X >= Y || llvm_fcmp_uno(X, Y)
+ printBinaryInstruction("ceq",Left,Right);
+ printBinaryInstruction("cgt",Left,Right);
+ printSimpleInstruction("or");
+ printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_OGE:
+ // X >= Y
+ printBinaryInstruction("ceq",Left,Right);
+ printBinaryInstruction("cgt",Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_ULT:
+ // X < Y || llvm_fcmp_uno(X, Y)
+ printBinaryInstruction("clt",Left,Right);
+ printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_OLT:
+ // X < Y
+ printBinaryInstruction("clt",Left,Right);
+ break;
+ case FCmpInst::FCMP_ULE:
+ // X <= Y || llvm_fcmp_uno(X, Y)
+ printBinaryInstruction("ceq",Left,Right);
+ printBinaryInstruction("clt",Left,Right);
+ printSimpleInstruction("or");
+ printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_OLE:
+ // X <= Y
+ printBinaryInstruction("ceq",Left,Right);
+ printBinaryInstruction("clt",Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_UEQ:
+ // X == Y || llvm_fcmp_uno(X, Y)
+ printBinaryInstruction("ceq",Left,Right);
+ printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_OEQ:
+ // X == Y
+ printBinaryInstruction("ceq",Left,Right);
+ break;
+ case FCmpInst::FCMP_UNE:
+ // X != Y
+ printBinaryInstruction("ceq",Left,Right);
+ printSimpleInstruction("neg");
+ printSimpleInstruction("not");
+ break;
+ case FCmpInst::FCMP_ONE:
+ // X != Y && llvm_fcmp_ord(X, Y)
+ printBinaryInstruction("ceq",Left,Right);
+ printSimpleInstruction("not");
+ break;
+ case FCmpInst::FCMP_ORD:
+ // return X == X && Y == Y
+ printBinaryInstruction("ceq",Left,Left);
+ printBinaryInstruction("ceq",Right,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_UNO:
+ // X != X || Y != Y
+ printBinaryInstruction("ceq",Left,Left);
+ printSimpleInstruction("not");
+ printBinaryInstruction("ceq",Right,Right);
+ printSimpleInstruction("not");
+ printSimpleInstruction("or");
+ break;
+ default:
+ assert(0 && "Illegal FCmp predicate");
+ }
+}
+
+
+void MSILWriter::printInvokeInstruction(const InvokeInst* Inst) {
+ std::string Label = "leave$normal_"+utostr(getUniqID());
+ Out << ".try {\n";
+ // Load arguments
+ for (int I = 3, E = Inst->getNumOperands(); I!=E; ++I)
+ printValueLoad(Inst->getOperand(I));
+ // Print call instruction
+ printFunctionCall(Inst->getOperand(0),Inst);
+ // Save function result and leave "try" block
+ printValueSave(Inst);
+ printSimpleInstruction("leave",Label.c_str());
+ Out << "}\n";
+ Out << "catch [mscorlib]System.Exception {\n";
+ // Redirect to unwind block
+ printSimpleInstruction("pop");
+ printBranchToBlock(Inst->getParent(),NULL,Inst->getUnwindDest());
+ Out << "}\n" << Label << ":\n";
+ // Redirect to continue block
+ printBranchToBlock(Inst->getParent(),NULL,Inst->getNormalDest());
+}
+
+
+void MSILWriter::printSwitchInstruction(const SwitchInst* Inst) {
+ // FIXME: Emulate with IL "switch" instruction
+ // Emulate = if () else if () else if () else ...
+ for (unsigned int I = 1, E = Inst->getNumCases(); I!=E; ++I) {
+ printValueLoad(Inst->getCondition());
+ printValueLoad(Inst->getCaseValue(I));
+ printSimpleInstruction("ceq");
+ // Condition jump to successor block
+ printBranchToBlock(Inst->getParent(),Inst->getSuccessor(I),NULL);
+ }
+ // Jump to default block
+ printBranchToBlock(Inst->getParent(),NULL,Inst->getDefaultDest());
+}
+
+
+void MSILWriter::printVAArgInstruction(const VAArgInst* Inst) {
+ printIndirectLoad(Inst->getOperand(0));
+ printSimpleInstruction("call",
+ "instance typedref [mscorlib]System.ArgIterator::GetNextArg()");
+ printSimpleInstruction("refanyval","void*");
+ std::string Name =
+ "ldind."+getTypePostfix(PointerType::getUnqual(IntegerType::get(8)),false);
+ printSimpleInstruction(Name.c_str());
+}
+
+
+void MSILWriter::printAllocaInstruction(const AllocaInst* Inst) {
+ uint64_t Size = TD->getTypeAllocSize(Inst->getAllocatedType());
+ // Constant optimization.
+ if (const ConstantInt* CInt = dyn_cast<ConstantInt>(Inst->getOperand(0))) {
+ printPtrLoad(CInt->getZExtValue()*Size);
+ } else {
+ printPtrLoad(Size);
+ printValueLoad(Inst->getOperand(0));
+ printSimpleInstruction("mul");
+ }
+ printSimpleInstruction("localloc");
+}
+
+
+void MSILWriter::printInstruction(const Instruction* Inst) {
+ const Value *Left = 0, *Right = 0;
+ if (Inst->getNumOperands()>=1) Left = Inst->getOperand(0);
+ if (Inst->getNumOperands()>=2) Right = Inst->getOperand(1);
+ // Print instruction
+ // FIXME: "ShuffleVector","ExtractElement","InsertElement" support.
+ switch (Inst->getOpcode()) {
+ // Terminator
+ case Instruction::Ret:
+ if (Inst->getNumOperands()) {
+ printValueLoad(Left);
+ printSimpleInstruction("ret");
+ } else
+ printSimpleInstruction("ret");
+ break;
+ case Instruction::Br:
+ printBranchInstruction(cast<BranchInst>(Inst));
+ break;
+ // Binary
+ case Instruction::Add:
+ printBinaryInstruction("add",Left,Right);
+ break;
+ case Instruction::Sub:
+ printBinaryInstruction("sub",Left,Right);
+ break;
+ case Instruction::Mul:
+ printBinaryInstruction("mul",Left,Right);
+ break;
+ case Instruction::UDiv:
+ printBinaryInstruction("div.un",Left,Right);
+ break;
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ printBinaryInstruction("div",Left,Right);
+ break;
+ case Instruction::URem:
+ printBinaryInstruction("rem.un",Left,Right);
+ break;
+ case Instruction::SRem:
+ case Instruction::FRem:
+ printBinaryInstruction("rem",Left,Right);
+ break;
+ // Binary Condition
+ case Instruction::ICmp:
+ printICmpInstruction(cast<ICmpInst>(Inst)->getPredicate(),Left,Right);
+ break;
+ case Instruction::FCmp:
+ printFCmpInstruction(cast<FCmpInst>(Inst)->getPredicate(),Left,Right);
+ break;
+ // Bitwise Binary
+ case Instruction::And:
+ printBinaryInstruction("and",Left,Right);
+ break;
+ case Instruction::Or:
+ printBinaryInstruction("or",Left,Right);
+ break;
+ case Instruction::Xor:
+ printBinaryInstruction("xor",Left,Right);
+ break;
+ case Instruction::Shl:
+ printValueLoad(Left);
+ printValueLoad(Right);
+ printSimpleInstruction("conv.i4");
+ printSimpleInstruction("shl");
+ break;
+ case Instruction::LShr:
+ printValueLoad(Left);
+ printValueLoad(Right);
+ printSimpleInstruction("conv.i4");
+ printSimpleInstruction("shr.un");
+ break;
+ case Instruction::AShr:
+ printValueLoad(Left);
+ printValueLoad(Right);
+ printSimpleInstruction("conv.i4");
+ printSimpleInstruction("shr");
+ break;
+ case Instruction::Select:
+ printSelectInstruction(Inst->getOperand(0),Inst->getOperand(1),Inst->getOperand(2));
+ break;
+ case Instruction::Load:
+ printIndirectLoad(Inst->getOperand(0));
+ break;
+ case Instruction::Store:
+ printIndirectSave(Inst->getOperand(1), Inst->getOperand(0));
+ break;
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ printCastInstruction(Inst->getOpcode(),Left,
+ cast<CastInst>(Inst)->getDestTy());
+ break;
+ case Instruction::GetElementPtr:
+ printGepInstruction(Inst->getOperand(0),gep_type_begin(Inst),
+ gep_type_end(Inst));
+ break;
+ case Instruction::Call:
+ printCallInstruction(cast<CallInst>(Inst));
+ break;
+ case Instruction::Invoke:
+ printInvokeInstruction(cast<InvokeInst>(Inst));
+ break;
+ case Instruction::Unwind:
+ printSimpleInstruction("newobj",
+ "instance void [mscorlib]System.Exception::.ctor()");
+ printSimpleInstruction("throw");
+ break;
+ case Instruction::Switch:
+ printSwitchInstruction(cast<SwitchInst>(Inst));
+ break;
+ case Instruction::Alloca:
+ printAllocaInstruction(cast<AllocaInst>(Inst));
+ break;
+ case Instruction::Malloc:
+ assert(0 && "LowerAllocationsPass used");
+ break;
+ case Instruction::Free:
+ assert(0 && "LowerAllocationsPass used");
+ break;
+ case Instruction::Unreachable:
+ printSimpleInstruction("ldstr", "\"Unreachable instruction\"");
+ printSimpleInstruction("newobj",
+ "instance void [mscorlib]System.Exception::.ctor(string)");
+ printSimpleInstruction("throw");
+ break;
+ case Instruction::VAArg:
+ printVAArgInstruction(cast<VAArgInst>(Inst));
+ break;
+ default:
+ cerr << "Instruction = " << Inst->getName() << '\n';
+ assert(0 && "Unsupported instruction");
+ }
+}
+
+
+void MSILWriter::printLoop(const Loop* L) {
+ Out << getLabelName(L->getHeader()->getName()) << ":\n";
+ const std::vector<BasicBlock*>& blocks = L->getBlocks();
+ for (unsigned I = 0, E = blocks.size(); I!=E; I++) {
+ BasicBlock* BB = blocks[I];
+ Loop* BBLoop = LInfo->getLoopFor(BB);
+ if (BBLoop == L)
+ printBasicBlock(BB);
+ else if (BB==BBLoop->getHeader() && BBLoop->getParentLoop()==L)
+ printLoop(BBLoop);
+ }
+ printSimpleInstruction("br",getLabelName(L->getHeader()->getName()).c_str());
+}
+
+
+void MSILWriter::printBasicBlock(const BasicBlock* BB) {
+ Out << getLabelName(BB) << ":\n";
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
+ const Instruction* Inst = I;
+ // Comment llvm original instruction
+ // Out << "\n//" << *Inst << "\n";
+ // Do not handle PHI instruction in current block
+ if (Inst->getOpcode()==Instruction::PHI) continue;
+ // Print instruction
+ printInstruction(Inst);
+ // Save result
+ if (Inst->getType()!=Type::VoidTy) {
+ // Do not save value after invoke, it done in "try" block
+ if (Inst->getOpcode()==Instruction::Invoke) continue;
+ printValueSave(Inst);
+ }
+ }
+}
+
+
+void MSILWriter::printLocalVariables(const Function& F) {
+ std::string Name;
+ const Type* Ty = NULL;
+ std::set<const Value*> Printed;
+ const Value* VaList = NULL;
+ unsigned StackDepth = 8;
+ // Find local variables
+ for (const_inst_iterator I = inst_begin(&F), E = inst_end(&F); I!=E; ++I) {
+ if (I->getOpcode()==Instruction::Call ||
+ I->getOpcode()==Instruction::Invoke) {
+ // Test stack depth.
+ if (StackDepth<I->getNumOperands())
+ StackDepth = I->getNumOperands();
+ }
+ const AllocaInst* AI = dyn_cast<AllocaInst>(&*I);
+ if (AI && !isa<GlobalVariable>(AI)) {
+ // Local variable allocation.
+ Ty = PointerType::getUnqual(AI->getAllocatedType());
+ Name = getValueName(AI);
+ Out << "\t.locals (" << getTypeName(Ty) << Name << ")\n";
+ } else if (I->getType()!=Type::VoidTy) {
+ // Operation result.
+ Ty = I->getType();
+ Name = getValueName(&*I);
+ Out << "\t.locals (" << getTypeName(Ty) << Name << ")\n";
+ }
+ // Test on 'va_list' variable
+ bool isVaList = false;
+ if (const VAArgInst* VaInst = dyn_cast<VAArgInst>(&*I)) {
+ // "va_list" as "va_arg" instruction operand.
+ isVaList = true;
+ VaList = VaInst->getOperand(0);
+ } else if (const IntrinsicInst* Inst = dyn_cast<IntrinsicInst>(&*I)) {
+ // "va_list" as intrinsic function operand.
+ switch (Inst->getIntrinsicID()) {
+ case Intrinsic::vastart:
+ case Intrinsic::vaend:
+ case Intrinsic::vacopy:
+ isVaList = true;
+ VaList = Inst->getOperand(1);
+ break;
+ default:
+ isVaList = false;
+ }
+ }
+ // Print "va_list" variable.
+ if (isVaList && Printed.insert(VaList).second) {
+ Name = getValueName(VaList);
+ Name.insert(Name.length()-1,"$valist");
+ Out << "\t.locals (valuetype [mscorlib]System.ArgIterator "
+ << Name << ")\n";
+ }
+ }
+ printSimpleInstruction(".maxstack",utostr(StackDepth*2).c_str());
+}
+
+
+void MSILWriter::printFunctionBody(const Function& F) {
+ // Print body
+ for (Function::const_iterator I = F.begin(), E = F.end(); I!=E; ++I) {
+ if (Loop *L = LInfo->getLoopFor(I)) {
+ if (L->getHeader()==I && L->getParentLoop()==0)
+ printLoop(L);
+ } else {
+ printBasicBlock(I);
+ }
+ }
+}
+
+
+void MSILWriter::printConstantExpr(const ConstantExpr* CE) {
+ const Value *left = 0, *right = 0;
+ if (CE->getNumOperands()>=1) left = CE->getOperand(0);
+ if (CE->getNumOperands()>=2) right = CE->getOperand(1);
+ // Print instruction
+ switch (CE->getOpcode()) {
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ printCastInstruction(CE->getOpcode(),left,CE->getType());
+ break;
+ case Instruction::GetElementPtr:
+ printGepInstruction(CE->getOperand(0),gep_type_begin(CE),gep_type_end(CE));
+ break;
+ case Instruction::ICmp:
+ printICmpInstruction(CE->getPredicate(),left,right);
+ break;
+ case Instruction::FCmp:
+ printFCmpInstruction(CE->getPredicate(),left,right);
+ break;
+ case Instruction::Select:
+ printSelectInstruction(CE->getOperand(0),CE->getOperand(1),CE->getOperand(2));
+ break;
+ case Instruction::Add:
+ printBinaryInstruction("add",left,right);
+ break;
+ case Instruction::Sub:
+ printBinaryInstruction("sub",left,right);
+ break;
+ case Instruction::Mul:
+ printBinaryInstruction("mul",left,right);
+ break;
+ case Instruction::UDiv:
+ printBinaryInstruction("div.un",left,right);
+ break;
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ printBinaryInstruction("div",left,right);
+ break;
+ case Instruction::URem:
+ printBinaryInstruction("rem.un",left,right);
+ break;
+ case Instruction::SRem:
+ case Instruction::FRem:
+ printBinaryInstruction("rem",left,right);
+ break;
+ case Instruction::And:
+ printBinaryInstruction("and",left,right);
+ break;
+ case Instruction::Or:
+ printBinaryInstruction("or",left,right);
+ break;
+ case Instruction::Xor:
+ printBinaryInstruction("xor",left,right);
+ break;
+ case Instruction::Shl:
+ printBinaryInstruction("shl",left,right);
+ break;
+ case Instruction::LShr:
+ printBinaryInstruction("shr.un",left,right);
+ break;
+ case Instruction::AShr:
+ printBinaryInstruction("shr",left,right);
+ break;
+ default:
+ cerr << "Expression = " << *CE << "\n";
+ assert(0 && "Invalid constant expression");
+ }
+}
+
+
+void MSILWriter::printStaticInitializerList() {
+ // List of global variables with uninitialized fields.
+ for (std::map<const GlobalVariable*,std::vector<StaticInitializer> >::iterator
+ VarI = StaticInitList.begin(), VarE = StaticInitList.end(); VarI!=VarE;
+ ++VarI) {
+ const std::vector<StaticInitializer>& InitList = VarI->second;
+ if (InitList.empty()) continue;
+ // For each uninitialized field.
+ for (std::vector<StaticInitializer>::const_iterator I = InitList.begin(),
+ E = InitList.end(); I!=E; ++I) {
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(I->constant)) {
+ // Out << "\n// Init " << getValueName(VarI->first) << ", offset " <<
+ // utostr(I->offset) << ", type "<< *I->constant->getType() << "\n\n";
+ // Load variable address
+ printValueLoad(VarI->first);
+ // Add offset
+ if (I->offset!=0) {
+ printPtrLoad(I->offset);
+ printSimpleInstruction("add");
+ }
+ // Load value
+ printConstantExpr(CE);
+ // Save result at offset
+ std::string postfix = getTypePostfix(CE->getType(),true);
+ if (*postfix.begin()=='u') *postfix.begin() = 'i';
+ postfix = "stind."+postfix;
+ printSimpleInstruction(postfix.c_str());
+ } else {
+ cerr << "Constant = " << *I->constant << '\n';
+ assert(0 && "Invalid static initializer");
+ }
+ }
+ }
+}
+
+
+void MSILWriter::printFunction(const Function& F) {
+ bool isSigned = F.paramHasAttr(0, Attribute::SExt);
+ Out << "\n.method static ";
+ Out << (F.hasLocalLinkage() ? "private " : "public ");
+ if (F.isVarArg()) Out << "vararg ";
+ Out << getTypeName(F.getReturnType(),isSigned) <<
+ getConvModopt(F.getCallingConv()) << getValueName(&F) << '\n';
+ // Arguments
+ Out << "\t(";
+ unsigned ArgIdx = 1;
+ for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I!=E;
+ ++I, ++ArgIdx) {
+ isSigned = F.paramHasAttr(ArgIdx, Attribute::SExt);
+ if (I!=F.arg_begin()) Out << ", ";
+ Out << getTypeName(I->getType(),isSigned) << getValueName(I);
+ }
+ Out << ") cil managed\n";
+ // Body
+ Out << "{\n";
+ printLocalVariables(F);
+ printFunctionBody(F);
+ Out << "}\n";
+}
+
+
+void MSILWriter::printDeclarations(const TypeSymbolTable& ST) {
+ std::string Name;
+ std::set<const Type*> Printed;
+ for (std::set<const Type*>::const_iterator
+ UI = UsedTypes->begin(), UE = UsedTypes->end(); UI!=UE; ++UI) {
+ const Type* Ty = *UI;
+ if (isa<ArrayType>(Ty) || isa<VectorType>(Ty) || isa<StructType>(Ty))
+ Name = getTypeName(Ty, false, true);
+ // Type with no need to declare.
+ else continue;
+ // Print not duplicated type
+ if (Printed.insert(Ty).second) {
+ Out << ".class value explicit ansi sealed '" << Name << "'";
+ Out << " { .pack " << 1 << " .size " << TD->getTypeAllocSize(Ty);
+ Out << " }\n\n";
+ }
+ }
+}
+
+
+unsigned int MSILWriter::getBitWidth(const Type* Ty) {
+ unsigned int N = Ty->getPrimitiveSizeInBits();
+ assert(N!=0 && "Invalid type in getBitWidth()");
+ switch (N) {
+ case 1:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ return N;
+ default:
+ cerr << "Bits = " << N << '\n';
+ assert(0 && "Unsupported integer width");
+ }
+ return 0; // Not reached
+}
+
+
+void MSILWriter::printStaticConstant(const Constant* C, uint64_t& Offset) {
+ uint64_t TySize = 0;
+ const Type* Ty = C->getType();
+ // Print zero initialized constant.
+ if (isa<ConstantAggregateZero>(C) || C->isNullValue()) {
+ TySize = TD->getTypeAllocSize(C->getType());
+ Offset += TySize;
+ Out << "int8 (0) [" << TySize << "]";
+ return;
+ }
+ // Print constant initializer
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID: {
+ TySize = TD->getTypeAllocSize(Ty);
+ const ConstantInt* Int = cast<ConstantInt>(C);
+ Out << getPrimitiveTypeName(Ty,true) << "(" << Int->getSExtValue() << ")";
+ break;
+ }
+ case Type::FloatTyID:
+ case Type::DoubleTyID: {
+ TySize = TD->getTypeAllocSize(Ty);
+ const ConstantFP* FP = cast<ConstantFP>(C);
+ if (Ty->getTypeID() == Type::FloatTyID)
+ Out << "int32 (" <<
+ (uint32_t)FP->getValueAPF().bitcastToAPInt().getZExtValue() << ')';
+ else
+ Out << "int64 (" <<
+ FP->getValueAPF().bitcastToAPInt().getZExtValue() << ')';
+ break;
+ }
+ case Type::ArrayTyID:
+ case Type::VectorTyID:
+ case Type::StructTyID:
+ for (unsigned I = 0, E = C->getNumOperands(); I<E; I++) {
+ if (I!=0) Out << ",\n";
+ printStaticConstant(C->getOperand(I),Offset);
+ }
+ break;
+ case Type::PointerTyID:
+ TySize = TD->getTypeAllocSize(C->getType());
+ // Initialize with global variable address
+ if (const GlobalVariable *G = dyn_cast<GlobalVariable>(C)) {
+ std::string name = getValueName(G);
+ Out << "&(" << name.insert(name.length()-1,"$data") << ")";
+ } else {
+ // Dynamic initialization
+ if (!isa<ConstantPointerNull>(C) && !C->isNullValue())
+ InitListPtr->push_back(StaticInitializer(C,Offset));
+ // Null pointer initialization
+ if (TySize==4) Out << "int32 (0)";
+ else if (TySize==8) Out << "int64 (0)";
+ else assert(0 && "Invalid pointer size");
+ }
+ break;
+ default:
+ cerr << "TypeID = " << Ty->getTypeID() << '\n';
+ assert(0 && "Invalid type in printStaticConstant()");
+ }
+ // Increase offset.
+ Offset += TySize;
+}
+
+
+void MSILWriter::printStaticInitializer(const Constant* C,
+ const std::string& Name) {
+ switch (C->getType()->getTypeID()) {
+ case Type::IntegerTyID:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ Out << getPrimitiveTypeName(C->getType(), false);
+ break;
+ case Type::ArrayTyID:
+ case Type::VectorTyID:
+ case Type::StructTyID:
+ case Type::PointerTyID:
+ Out << getTypeName(C->getType());
+ break;
+ default:
+ cerr << "Type = " << *C << "\n";
+ assert(0 && "Invalid constant type");
+ }
+ // Print initializer
+ std::string label = Name;
+ label.insert(label.length()-1,"$data");
+ Out << Name << " at " << label << '\n';
+ Out << ".data " << label << " = {\n";
+ uint64_t offset = 0;
+ printStaticConstant(C,offset);
+ Out << "\n}\n\n";
+}
+
+
+void MSILWriter::printVariableDefinition(const GlobalVariable* G) {
+ const Constant* C = G->getInitializer();
+ if (C->isNullValue() || isa<ConstantAggregateZero>(C) || isa<UndefValue>(C))
+ InitListPtr = 0;
+ else
+ InitListPtr = &StaticInitList[G];
+ printStaticInitializer(C,getValueName(G));
+}
+
+
+void MSILWriter::printGlobalVariables() {
+ if (ModulePtr->global_empty()) return;
+ Module::global_iterator I,E;
+ for (I = ModulePtr->global_begin(), E = ModulePtr->global_end(); I!=E; ++I) {
+ // Variable definition
+ Out << ".field static " << (I->isDeclaration() ? "public " :
+ "private ");
+ if (I->isDeclaration()) {
+ Out << getTypeName(I->getType()) << getValueName(&*I) << "\n\n";
+ } else
+ printVariableDefinition(&*I);
+ }
+}
+
+
+const char* MSILWriter::getLibraryName(const Function* F) {
+ return getLibraryForSymbol(F->getName().c_str(), true, F->getCallingConv());
+}
+
+
+const char* MSILWriter::getLibraryName(const GlobalVariable* GV) {
+ return getLibraryForSymbol(Mang->getValueName(GV).c_str(), false, 0);
+}
+
+
+const char* MSILWriter::getLibraryForSymbol(const char* Name, bool isFunction,
+ unsigned CallingConv) {
+ // TODO: Read *.def file with function and libraries definitions.
+ return "MSVCRT.DLL";
+}
+
+
+void MSILWriter::printExternals() {
+ Module::const_iterator I,E;
+ // Functions.
+ for (I=ModulePtr->begin(),E=ModulePtr->end(); I!=E; ++I) {
+ // Skip intrisics
+ if (I->isIntrinsic()) continue;
+ if (I->isDeclaration()) {
+ const Function* F = I;
+ std::string Name = getConvModopt(F->getCallingConv())+getValueName(F);
+ std::string Sig =
+ getCallSignature(cast<FunctionType>(F->getFunctionType()), NULL, Name);
+ Out << ".method static hidebysig pinvokeimpl(\""
+ << getLibraryName(F) << "\")\n\t" << Sig << " preservesig {}\n\n";
+ }
+ }
+ // External variables and static initialization.
+ Out <<
+ ".method public hidebysig static pinvokeimpl(\"KERNEL32.DLL\" ansi winapi)"
+ " native int LoadLibrary(string) preservesig {}\n"
+ ".method public hidebysig static pinvokeimpl(\"KERNEL32.DLL\" ansi winapi)"
+ " native int GetProcAddress(native int, string) preservesig {}\n";
+ Out <<
+ ".method private static void* $MSIL_Import(string lib,string sym)\n"
+ " managed cil\n{\n"
+ "\tldarg\tlib\n"
+ "\tcall\tnative int LoadLibrary(string)\n"
+ "\tldarg\tsym\n"
+ "\tcall\tnative int GetProcAddress(native int,string)\n"
+ "\tdup\n"
+ "\tbrtrue\tL_01\n"
+ "\tldstr\t\"Can no import variable\"\n"
+ "\tnewobj\tinstance void [mscorlib]System.Exception::.ctor(string)\n"
+ "\tthrow\n"
+ "L_01:\n"
+ "\tret\n"
+ "}\n\n"
+ ".method static private void $MSIL_Init() managed cil\n{\n";
+ printStaticInitializerList();
+ // Foreach global variable.
+ for (Module::global_iterator I = ModulePtr->global_begin(),
+ E = ModulePtr->global_end(); I!=E; ++I) {
+ if (!I->isDeclaration() || !I->hasDLLImportLinkage()) continue;
+ // Use "LoadLibrary"/"GetProcAddress" to recive variable address.
+ std::string Label = "not_null$_"+utostr(getUniqID());
+ std::string Tmp = getTypeName(I->getType())+getValueName(&*I);
+ printSimpleInstruction("ldsflda",Tmp.c_str());
+ Out << "\tldstr\t\"" << getLibraryName(&*I) << "\"\n";
+ Out << "\tldstr\t\"" << Mang->getValueName(&*I) << "\"\n";
+ printSimpleInstruction("call","void* $MSIL_Import(string,string)");
+ printIndirectSave(I->getType());
+ }
+ printSimpleInstruction("ret");
+ Out << "}\n\n";
+}
+
+
+//===----------------------------------------------------------------------===//
+// External Interface declaration
+//===----------------------------------------------------------------------===//
+
+bool MSILTarget::addPassesToEmitWholeFile(PassManager &PM, raw_ostream &o,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel)
+{
+ if (FileType != TargetMachine::AssemblyFile) return true;
+ MSILWriter* Writer = new MSILWriter(o);
+ PM.add(createGCLoweringPass());
+ PM.add(createLowerAllocationsPass(true));
+ // FIXME: Handle switch trougth native IL instruction "switch"
+ PM.add(createLowerSwitchPass());
+ PM.add(createCFGSimplificationPass());
+ PM.add(new MSILModule(Writer->UsedTypes,Writer->TD));
+ PM.add(Writer);
+ PM.add(createGCInfoDeleter());
+ return false;
+}
diff --git a/lib/Target/MSIL/MSILWriter.h b/lib/Target/MSIL/MSILWriter.h
new file mode 100644
index 0000000..45f5579
--- /dev/null
+++ b/lib/Target/MSIL/MSILWriter.h
@@ -0,0 +1,255 @@
+//===-- MSILWriter.h - TargetMachine for the MSIL ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MSILWriter that is used by the MSIL.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MSILWRITER_H
+#define MSILWRITER_H
+
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Support/Mangler.h"
+#include <ios>
+using namespace llvm;
+
+namespace {
+
+ class MSILModule : public ModulePass {
+ Module *ModulePtr;
+ const std::set<const Type *>*& UsedTypes;
+ const TargetData*& TD;
+
+ public:
+ static char ID;
+ MSILModule(const std::set<const Type *>*& _UsedTypes,
+ const TargetData*& _TD)
+ : ModulePass(&ID), UsedTypes(_UsedTypes), TD(_TD) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<FindUsedTypes>();
+ AU.addRequired<TargetData>();
+ }
+
+ virtual const char *getPassName() const {
+ return "MSIL backend definitions";
+ }
+
+ virtual bool runOnModule(Module &M);
+
+ };
+
+ class MSILWriter : public FunctionPass {
+ struct StaticInitializer {
+ const Constant* constant;
+ uint64_t offset;
+
+ StaticInitializer()
+ : constant(0), offset(0) {}
+
+ StaticInitializer(const Constant* _constant, uint64_t _offset)
+ : constant(_constant), offset(_offset) {}
+ };
+
+ uint64_t UniqID;
+
+ uint64_t getUniqID() {
+ return ++UniqID;
+ }
+
+ public:
+ raw_ostream &Out;
+ Module* ModulePtr;
+ const TargetData* TD;
+ Mangler* Mang;
+ LoopInfo *LInfo;
+ std::vector<StaticInitializer>* InitListPtr;
+ std::map<const GlobalVariable*,std::vector<StaticInitializer> >
+ StaticInitList;
+ const std::set<const Type *>* UsedTypes;
+ static char ID;
+ MSILWriter(raw_ostream &o) : FunctionPass(&ID), Out(o) {
+ UniqID = 0;
+ }
+
+ enum ValueType {
+ UndefVT,
+ GlobalVT,
+ InternalVT,
+ ArgumentVT,
+ LocalVT,
+ ConstVT,
+ ConstExprVT
+ };
+
+ bool isVariable(ValueType V) {
+ return V==GlobalVT || V==InternalVT || V==ArgumentVT || V==LocalVT;
+ }
+
+ bool isConstValue(ValueType V) {
+ return V==ConstVT || V==ConstExprVT;
+ }
+
+ virtual const char *getPassName() const { return "MSIL backend"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.setPreservesAll();
+ }
+
+ bool runOnFunction(Function &F);
+
+ virtual bool doInitialization(Module &M);
+
+ virtual bool doFinalization(Module &M);
+
+ void printModuleStartup();
+
+ bool isZeroValue(const Value* V);
+
+ std::string getValueName(const Value* V);
+
+ std::string getLabelName(const Value* V);
+
+ std::string getLabelName(const std::string& Name);
+
+ std::string getConvModopt(unsigned CallingConvID);
+
+ std::string getArrayTypeName(Type::TypeID TyID, const Type* Ty);
+
+ std::string getPrimitiveTypeName(const Type* Ty, bool isSigned);
+
+ std::string getFunctionTypeName(const Type* Ty);
+
+ std::string getPointerTypeName(const Type* Ty);
+
+ std::string getTypeName(const Type* Ty, bool isSigned = false,
+ bool isNested = false);
+
+ ValueType getValueLocation(const Value* V);
+
+ std::string getTypePostfix(const Type* Ty, bool Expand,
+ bool isSigned = false);
+
+ void printConvToPtr();
+
+ void printPtrLoad(uint64_t N);
+
+ void printValuePtrLoad(const Value* V);
+
+ void printConstLoad(const Constant* C);
+
+ void printValueLoad(const Value* V);
+
+ void printValueSave(const Value* V);
+
+ void printBinaryInstruction(const char* Name, const Value* Left,
+ const Value* Right);
+
+ void printSimpleInstruction(const char* Inst, const char* Operand = NULL);
+
+ void printPHICopy(const BasicBlock* Src, const BasicBlock* Dst);
+
+ void printBranchToBlock(const BasicBlock* CurrBB,
+ const BasicBlock* TrueBB,
+ const BasicBlock* FalseBB);
+
+ void printBranchInstruction(const BranchInst* Inst);
+
+ void printSelectInstruction(const Value* Cond, const Value* VTrue,
+ const Value* VFalse);
+
+ void printIndirectLoad(const Value* V);
+
+ void printIndirectSave(const Value* Ptr, const Value* Val);
+
+ void printIndirectSave(const Type* Ty);
+
+ void printCastInstruction(unsigned int Op, const Value* V,
+ const Type* Ty);
+
+ void printGepInstruction(const Value* V, gep_type_iterator I,
+ gep_type_iterator E);
+
+ std::string getCallSignature(const FunctionType* Ty,
+ const Instruction* Inst,
+ std::string Name);
+
+ void printFunctionCall(const Value* FnVal, const Instruction* Inst);
+
+ void printIntrinsicCall(const IntrinsicInst* Inst);
+
+ void printCallInstruction(const Instruction* Inst);
+
+ void printICmpInstruction(unsigned Predicate, const Value* Left,
+ const Value* Right);
+
+ void printFCmpInstruction(unsigned Predicate, const Value* Left,
+ const Value* Right);
+
+ void printInvokeInstruction(const InvokeInst* Inst);
+
+ void printSwitchInstruction(const SwitchInst* Inst);
+
+ void printVAArgInstruction(const VAArgInst* Inst);
+
+ void printAllocaInstruction(const AllocaInst* Inst);
+
+ void printInstruction(const Instruction* Inst);
+
+ void printLoop(const Loop* L);
+
+ void printBasicBlock(const BasicBlock* BB);
+
+ void printLocalVariables(const Function& F);
+
+ void printFunctionBody(const Function& F);
+
+ void printConstantExpr(const ConstantExpr* CE);
+
+ void printStaticInitializerList();
+
+ void printFunction(const Function& F);
+
+ void printDeclarations(const TypeSymbolTable& ST);
+
+ unsigned int getBitWidth(const Type* Ty);
+
+ void printStaticConstant(const Constant* C, uint64_t& Offset);
+
+ void printStaticInitializer(const Constant* C, const std::string& Name);
+
+ void printVariableDefinition(const GlobalVariable* G);
+
+ void printGlobalVariables();
+
+ const char* getLibraryName(const Function* F);
+
+ const char* getLibraryName(const GlobalVariable* GV);
+
+ const char* getLibraryForSymbol(const char* Name, bool isFunction,
+ unsigned CallingConv);
+
+ void printExternals();
+ };
+}
+
+#endif
+
diff --git a/lib/Target/MSIL/Makefile b/lib/Target/MSIL/Makefile
new file mode 100644
index 0000000..94265ed
--- /dev/null
+++ b/lib/Target/MSIL/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Target/MSIL/Makefile ----------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMMSIL
+include $(LEVEL)/Makefile.common
+
+CompileCommonOpts := $(CompileCommonOpts) -Wno-format
diff --git a/lib/Target/MSIL/README.TXT b/lib/Target/MSIL/README.TXT
new file mode 100644
index 0000000..d797c71
--- /dev/null
+++ b/lib/Target/MSIL/README.TXT
@@ -0,0 +1,26 @@
+//===---------------------------------------------------------------------===//
+
+Vector instructions support.
+
+ShuffleVector
+ExtractElement
+InsertElement
+
+//===---------------------------------------------------------------------===//
+
+Add "OpaqueType" type.
+
+//===---------------------------------------------------------------------===//
+
+"switch" instruction emulation with CLI "switch" instruction.
+
+//===---------------------------------------------------------------------===//
+
+Write linker for external function, because function export need to know
+dynamic library where function located.
+
+.method static hidebysig pinvokeimpl("msvcrt.dll" cdecl)
+ void free(void*) preservesig {}
+
+
+
diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt
new file mode 100644
index 0000000..6701773
--- /dev/null
+++ b/lib/Target/MSP430/CMakeLists.txt
@@ -0,0 +1,23 @@
+set(LLVM_TARGET_DEFINITIONS MSP430.td)
+
+tablegen(MSP430GenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(MSP430GenRegisterNames.inc -gen-register-enums)
+tablegen(MSP430GenRegisterInfo.inc -gen-register-desc)
+tablegen(MSP430GenInstrNames.inc -gen-instr-enums)
+tablegen(MSP430GenInstrInfo.inc -gen-instr-desc)
+tablegen(MSP430GenAsmWriter.inc -gen-asm-writer)
+tablegen(MSP430GenDAGISel.inc -gen-dag-isel)
+tablegen(MSP430GenCallingConv.inc -gen-callingconv)
+tablegen(MSP430GenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(MSP430
+ MSP430AsmPrinter.cpp
+ MSP430FrameInfo.cpp
+ MSP430InstrInfo.cpp
+ MSP430ISelDAGToDAG.cpp
+ MSP430ISelLowering.cpp
+ MSP430RegisterInfo.cpp
+ MSP430Subtarget.cpp
+ MSP430TargetAsmInfo.cpp
+ MSP430TargetMachine.cpp
+ )
diff --git a/lib/Target/MSP430/MSP430.h b/lib/Target/MSP430/MSP430.h
new file mode 100644
index 0000000..ed0cd04
--- /dev/null
+++ b/lib/Target/MSP430/MSP430.h
@@ -0,0 +1,40 @@
+//==-- MSP430.h - Top-level interface for MSP430 representation --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM MSP430 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430_H
+#define LLVM_TARGET_MSP430_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class MSP430TargetMachine;
+ class FunctionPass;
+ class raw_ostream;
+
+ FunctionPass *createMSP430ISelDag(MSP430TargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+ FunctionPass *createMSP430CodePrinterPass(raw_ostream &o,
+ MSP430TargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose);
+} // end namespace llvm;
+
+// Defines symbolic names for MSP430 registers.
+// This defines a mapping from register name to register number.
+#include "MSP430GenRegisterNames.inc"
+
+// Defines symbolic names for the MSP430 instructions.
+#include "MSP430GenInstrNames.inc"
+
+#endif
diff --git a/lib/Target/MSP430/MSP430.td b/lib/Target/MSP430/MSP430.td
new file mode 100644
index 0000000..89313ab
--- /dev/null
+++ b/lib/Target/MSP430/MSP430.td
@@ -0,0 +1,60 @@
+//===- MSP430.td - Describe the MSP430 Target Machine ---------*- tblgen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the MSP430 target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features.
+//===----------------------------------------------------------------------===//
+def FeatureX
+ : SubtargetFeature<"ext", "ExtendedInsts", "true",
+ "Enable MSP430-X extensions">;
+
+//===----------------------------------------------------------------------===//
+// MSP430 supported processors.
+//===----------------------------------------------------------------------===//
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "MSP430RegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Description
+//===----------------------------------------------------------------------===//
+
+include "MSP430CallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "MSP430InstrInfo.td"
+
+def MSP430InstrInfo : InstrInfo {}
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def MSP430 : Target {
+ let InstructionSet = MSP430InstrInfo;
+}
+
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
new file mode 100644
index 0000000..71b785b
--- /dev/null
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -0,0 +1,267 @@
+//===-- MSP430AsmPrinter.cpp - MSP430 LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the MSP430 assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MSP430.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+ class VISIBILITY_HIDDEN MSP430AsmPrinter : public AsmPrinter {
+ public:
+ MSP430AsmPrinter(raw_ostream &O, MSP430TargetMachine &TM,
+ const TargetAsmInfo *TAI,
+ CodeGenOpt::Level OL, bool V)
+ : AsmPrinter(O, TM, TAI, OL, V) {}
+
+ virtual const char *getPassName() const {
+ return "MSP430 Assembly Printer";
+ }
+
+ void printOperand(const MachineInstr *MI, int OpNum,
+ const char* Modifier = 0);
+ void printSrcMemOperand(const MachineInstr *MI, int OpNum,
+ const char* Modifier = 0);
+ void printCCOperand(const MachineInstr *MI, int OpNum);
+ bool printInstruction(const MachineInstr *MI); // autogenerated.
+ void printMachineInstruction(const MachineInstr * MI);
+
+ void emitFunctionHeader(const MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AsmPrinter::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ }
+ };
+} // end of anonymous namespace
+
+#include "MSP430GenAsmWriter.inc"
+
+/// createMSP430CodePrinterPass - Returns a pass that prints the MSP430
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+///
+FunctionPass *llvm::createMSP430CodePrinterPass(raw_ostream &o,
+ MSP430TargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose) {
+ return new MSP430AsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+}
+
+bool MSP430AsmPrinter::doInitialization(Module &M) {
+ Mang = new Mangler(M, "", TAI->getPrivateGlobalPrefix());
+ return false; // success
+}
+
+
+bool MSP430AsmPrinter::doFinalization(Module &M) {
+ return AsmPrinter::doFinalization(M);
+}
+
+void MSP430AsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
+ const Function *F = MF.getFunction();
+
+ SwitchToSection(TAI->SectionForGlobal(F));
+
+ unsigned FnAlign = 4;
+ if (F->hasFnAttr(Attribute::OptimizeForSize))
+ FnAlign = 1;
+
+ EmitAlignment(FnAlign, F);
+
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::InternalLinkage: // Symbols default to internal.
+ case Function::PrivateLinkage:
+ break;
+ case Function::ExternalLinkage:
+ O << "\t.globl\t" << CurrentFnName << '\n';
+ break;
+ case Function::LinkOnceAnyLinkage:
+ case Function::LinkOnceODRLinkage:
+ case Function::WeakAnyLinkage:
+ case Function::WeakODRLinkage:
+ O << "\t.weak\t" << CurrentFnName << '\n';
+ break;
+ }
+
+ printVisibility(CurrentFnName, F->getVisibility());
+
+ O << "\t.type\t" << CurrentFnName << ",@function\n"
+ << CurrentFnName << ":\n";
+}
+
+bool MSP430AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ SetupMachineFunction(MF);
+ O << "\n\n";
+
+ // Print the 'header' of function
+ emitFunctionHeader(MF);
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ if (!VerboseAsm && (I->pred_empty() || I->isOnlyReachableByFallthrough())) {
+ // This is an entry block or a block that's only reachable via a
+ // fallthrough edge. In non-VerboseAsm mode, don't print the label.
+ } else {
+ printBasicBlockLabel(I, true, true, VerboseAsm);
+ O << '\n';
+ }
+
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II)
+ // Print the assembly for the instruction.
+ printMachineInstruction(II);
+ }
+
+ if (TAI->hasDotTypeDotSizeDirective())
+ O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n';
+
+ O.flush();
+
+ // We didn't modify anything
+ return false;
+}
+
+void MSP430AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ ++EmittedInsts;
+
+ // Call the autogenerated instruction printer routines.
+ if (printInstruction(MI))
+ return;
+
+ assert(0 && "Should not happen");
+}
+
+void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+ const char* Modifier) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ assert (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ "Virtual registers should be already mapped!");
+ O << TM.getRegisterInfo()->get(MO.getReg()).AsmName;
+ return;
+ case MachineOperand::MO_Immediate:
+ if (!Modifier || strcmp(Modifier, "nohash"))
+ O << '#';
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB());
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
+ std::string Name = Mang->getValueName(MO.getGlobal());
+ assert(MO.getOffset() == 0 && "No offsets allowed!");
+
+ if (isCallOp)
+ O << '#';
+ else if (isMemOp)
+ O << '&';
+
+ O << Name;
+
+ return;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
+ std::string Name(TAI->getGlobalPrefix());
+ Name += MO.getSymbolName();
+ if (isCallOp)
+ O << '#';
+ O << Name;
+ return;
+ }
+ default:
+ assert(0 && "Not implemented yet!");
+ }
+}
+
+void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum,
+ const char* Modifier) {
+ const MachineOperand &Base = MI->getOperand(OpNum);
+ const MachineOperand &Disp = MI->getOperand(OpNum+1);
+
+ if (Base.isGlobal())
+ printOperand(MI, OpNum, "mem");
+ else if (Disp.isImm() && !Base.getReg())
+ printOperand(MI, OpNum);
+ else if (Base.getReg()) {
+ if (Disp.getImm()) {
+ printOperand(MI, OpNum + 1, "nohash");
+ O << '(';
+ printOperand(MI, OpNum);
+ O << ')';
+ } else {
+ O << '@';
+ printOperand(MI, OpNum);
+ }
+ } else
+ assert(0 && "Unsupported memory operand");
+}
+
+void MSP430AsmPrinter::printCCOperand(const MachineInstr *MI, int OpNum) {
+ unsigned CC = MI->getOperand(OpNum).getImm();
+
+ switch (CC) {
+ default:
+ assert(0 && "Unsupported CC code");
+ break;
+ case MSP430::COND_E:
+ O << "eq";
+ break;
+ case MSP430::COND_NE:
+ O << "ne";
+ break;
+ case MSP430::COND_HS:
+ O << "hs";
+ break;
+ case MSP430::COND_LO:
+ O << "lo";
+ break;
+ case MSP430::COND_GE:
+ O << "ge";
+ break;
+ case MSP430::COND_L:
+ O << 'l';
+ break;
+ }
+}
diff --git a/lib/Target/MSP430/MSP430CallingConv.td b/lib/Target/MSP430/MSP430CallingConv.td
new file mode 100644
index 0000000..ad27cc9
--- /dev/null
+++ b/lib/Target/MSP430/MSP430CallingConv.td
@@ -0,0 +1,37 @@
+//==- MSP430CallingConv.td - Calling Conventions for MSP430 -*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for MSP430 architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MSP430 Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_MSP430 : CallingConv<[
+ // i8 are returned in registers R15B, R14B, R13B, R12B
+ CCIfType<[i8], CCAssignToReg<[R15B, R14B, R13B, R12B]>>,
+
+ // i16 are returned in registers R15, R14, R13, R12
+ CCIfType<[i16], CCAssignToReg<[R15W, R14W, R13W, R12W]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// MSP430 Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_MSP430 : CallingConv<[
+ // Promote i8 arguments to i16.
+ CCIfType<[i8], CCPromoteToType<i16>>,
+
+ // The first 4 integer arguments of non-varargs functions are passed in
+ // integer registers.
+ CCIfNotVarArg<CCIfType<[i16], CCAssignToReg<[R15W, R14W, R13W, R12W]>>>,
+
+ // Integer values get stored in stack slots that are 2 bytes in
+ // size and 2-byte aligned.
+ CCIfType<[i16], CCAssignToStack<2, 2>>
+]>;
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
new file mode 100644
index 0000000..bf49ec0
--- /dev/null
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -0,0 +1,194 @@
+//===-- MSP430ISelDAGToDAG.cpp - A dag to dag inst selector for MSP430 ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MSP430 target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MSP430ISelLowering.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+/// MSP430DAGToDAGISel - MSP430 specific code to select MSP430 machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+ class MSP430DAGToDAGISel : public SelectionDAGISel {
+ MSP430TargetLowering &Lowering;
+ const MSP430Subtarget &Subtarget;
+
+ public:
+ MSP430DAGToDAGISel(MSP430TargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(TM, OptLevel),
+ Lowering(*TM.getTargetLowering()),
+ Subtarget(*TM.getSubtargetImpl()) { }
+
+ virtual void InstructionSelect();
+
+ virtual const char *getPassName() const {
+ return "MSP430 DAG->DAG Pattern Instruction Selection";
+ }
+
+ // Include the pieces autogenerated from the target description.
+ #include "MSP430GenDAGISel.inc"
+
+ private:
+ SDNode *Select(SDValue Op);
+ bool SelectAddr(SDValue Op, SDValue Addr, SDValue &Base, SDValue &Disp);
+
+ #ifndef NDEBUG
+ unsigned Indent;
+ #endif
+ };
+} // end anonymous namespace
+
+/// createMSP430ISelDag - This pass converts a legalized DAG into a
+/// MSP430-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createMSP430ISelDag(MSP430TargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new MSP430DAGToDAGISel(TM, OptLevel);
+}
+
+// FIXME: This is pretty dummy routine and needs to be rewritten in the future.
+bool MSP430DAGToDAGISel::SelectAddr(SDValue Op, SDValue Addr,
+ SDValue &Base, SDValue &Disp) {
+ // Try to match frame address first.
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i16);
+ Disp = CurDAG->getTargetConstant(0, MVT::i16);
+ return true;
+ }
+
+ switch (Addr.getOpcode()) {
+ case ISD::ADD:
+ // Operand is a result from ADD with constant operand which fits into i16.
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+ uint64_t CVal = CN->getZExtValue();
+ // Offset should fit into 16 bits.
+ if (((CVal << 48) >> 48) == CVal) {
+ SDValue N0 = Addr.getOperand(0);
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N0))
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i16);
+ else
+ Base = N0;
+
+ Disp = CurDAG->getTargetConstant(CVal, MVT::i16);
+ return true;
+ }
+ }
+ break;
+ case MSP430ISD::Wrapper:
+ SDValue N0 = Addr.getOperand(0);
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+ Base = CurDAG->getTargetGlobalAddress(G->getGlobal(),
+ MVT::i16, G->getOffset());
+ Disp = CurDAG->getTargetConstant(0, MVT::i16);
+ return true;
+ } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(N0)) {
+ Base = CurDAG->getTargetExternalSymbol(E->getSymbol(), MVT::i16);
+ Disp = CurDAG->getTargetConstant(0, MVT::i16);
+ }
+ break;
+ };
+
+ Base = Addr;
+ Disp = CurDAG->getTargetConstant(0, MVT::i16);
+
+ return true;
+}
+
+
+
+/// InstructionSelect - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void MSP430DAGToDAGISel::InstructionSelect() {
+ DEBUG(BB->dump());
+
+ // Codegen the basic block.
+#ifndef NDEBUG
+ DOUT << "===== Instruction selection begins:\n";
+ Indent = 0;
+#endif
+ SelectRoot(*CurDAG);
+#ifndef NDEBUG
+ DOUT << "===== Instruction selection ends:\n";
+#endif
+
+ CurDAG->RemoveDeadNodes();
+}
+
+SDNode *MSP430DAGToDAGISel::Select(SDValue Op) {
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Dump information about the Node being selected
+ #ifndef NDEBUG
+ DOUT << std::string(Indent, ' ') << "Selecting: ";
+ DEBUG(Node->dump(CurDAG));
+ DOUT << "\n";
+ Indent += 2;
+ #endif
+
+ // If we have a custom node, we already have selected!
+ if (Node->isMachineOpcode()) {
+ #ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "== ";
+ DEBUG(Node->dump(CurDAG));
+ DOUT << "\n";
+ Indent -= 2;
+ #endif
+ return NULL;
+ }
+
+ // Few custom selection stuff.
+ switch (Node->getOpcode()) {
+ default: break;
+ case ISD::FrameIndex: {
+ assert(Op.getValueType() == MVT::i16);
+ int FI = cast<FrameIndexSDNode>(Node)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i16);
+ if (Node->hasOneUse())
+ return CurDAG->SelectNodeTo(Node, MSP430::ADD16ri, MVT::i16,
+ TFI, CurDAG->getTargetConstant(0, MVT::i16));
+ return CurDAG->getTargetNode(MSP430::ADD16ri, dl, MVT::i16,
+ TFI, CurDAG->getTargetConstant(0, MVT::i16));
+ }
+ }
+
+ // Select the default instruction
+ SDNode *ResNode = SelectCode(Op);
+
+ #ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ if (ResNode == NULL || ResNode == Op.getNode())
+ DEBUG(Op.getNode()->dump(CurDAG));
+ else
+ DEBUG(ResNode->dump(CurDAG));
+ DOUT << "\n";
+ Indent -= 2;
+ #endif
+
+ return ResNode;
+}
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
new file mode 100644
index 0000000..14db20e
--- /dev/null
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -0,0 +1,670 @@
+//===-- MSP430ISelLowering.cpp - MSP430 DAG Lowering Implementation ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430TargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-lower"
+
+#include "MSP430ISelLowering.h"
+#include "MSP430.h"
+#include "MSP430TargetMachine.h"
+#include "MSP430Subtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/VectorExtras.h"
+using namespace llvm;
+
+MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
+ TargetLowering(tm), Subtarget(*tm.getSubtargetImpl()), TM(tm) {
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i8, MSP430::GR8RegisterClass);
+ addRegisterClass(MVT::i16, MSP430::GR16RegisterClass);
+
+ // Compute derived properties from the register classes
+ computeRegisterProperties();
+
+ // Provide all sorts of operation actions
+
+ // Division is expensive
+ setIntDivIsCheap(false);
+
+ // Even if we have only 1 bit shift here, we can perform
+ // shifts of the whole bitwidth 1 bit per step.
+ setShiftAmountType(MVT::i8);
+
+ setStackPointerRegisterToSaveRestore(MSP430::SPW);
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setSchedulingPreference(SchedulingForLatency);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
+ // We don't have any truncstores
+ setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+
+ setOperationAction(ISD::SRA, MVT::i8, Custom);
+ setOperationAction(ISD::SHL, MVT::i8, Custom);
+ setOperationAction(ISD::SRL, MVT::i8, Custom);
+ setOperationAction(ISD::SRA, MVT::i16, Custom);
+ setOperationAction(ISD::SHL, MVT::i16, Custom);
+ setOperationAction(ISD::SRL, MVT::i16, Custom);
+ setOperationAction(ISD::ROTL, MVT::i8, Expand);
+ setOperationAction(ISD::ROTR, MVT::i8, Expand);
+ setOperationAction(ISD::ROTL, MVT::i16, Expand);
+ setOperationAction(ISD::ROTR, MVT::i16, Expand);
+ setOperationAction(ISD::RET, MVT::Other, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);
+ setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i8, Custom);
+ setOperationAction(ISD::BR_CC, MVT::i16, Custom);
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::SETCC, MVT::i8, Expand);
+ setOperationAction(ISD::SETCC, MVT::i16, Expand);
+ setOperationAction(ISD::SELECT, MVT::i8, Expand);
+ setOperationAction(ISD::SELECT, MVT::i16, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::i16, Custom);
+
+ // FIXME: Implement efficiently multiplication by a constant
+ setOperationAction(ISD::MUL, MVT::i16, Expand);
+ setOperationAction(ISD::MULHS, MVT::i16, Expand);
+ setOperationAction(ISD::MULHU, MVT::i16, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
+
+ setOperationAction(ISD::UDIV, MVT::i16, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::UREM, MVT::i16, Expand);
+ setOperationAction(ISD::SDIV, MVT::i16, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::SREM, MVT::i16, Expand);
+}
+
+SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+ switch (Op.getOpcode()) {
+ case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
+ case ISD::SHL: // FALLTHROUGH
+ case ISD::SRL:
+ case ISD::SRA: return LowerShifts(Op, DAG);
+ case ISD::RET: return LowerRET(Op, DAG);
+ case ISD::CALL: return LowerCALL(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
+ default:
+ assert(0 && "unimplemented operand");
+ return SDValue();
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "MSP430GenCallingConv.inc"
+
+SDValue MSP430TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
+ SelectionDAG &DAG) {
+ unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ switch (CC) {
+ default:
+ assert(0 && "Unsupported calling convention");
+ case CallingConv::C:
+ case CallingConv::Fast:
+ return LowerCCCArguments(Op, DAG);
+ }
+}
+
+SDValue MSP430TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
+ CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+ unsigned CallingConv = TheCall->getCallingConv();
+ switch (CallingConv) {
+ default:
+ assert(0 && "Unsupported calling convention");
+ case CallingConv::Fast:
+ case CallingConv::C:
+ return LowerCCCCallTo(Op, DAG, CallingConv);
+ }
+}
+
+/// LowerCCCArguments - transform physical registers into virtual registers and
+/// generate load operations for arguments places on the stack.
+// FIXME: struct return stuff
+// FIXME: varargs
+SDValue MSP430TargetLowering::LowerCCCArguments(SDValue Op,
+ SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ SDValue Root = Op.getOperand(0);
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
+ unsigned CC = MF.getFunction()->getCallingConv();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+ CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_MSP430);
+
+ assert(!isVarArg && "Varargs not supported yet");
+
+ SmallVector<SDValue, 16> ArgValues;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isRegLoc()) {
+ // Arguments passed in registers
+ MVT RegVT = VA.getLocVT();
+ switch (RegVT.getSimpleVT()) {
+ default:
+ cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
+ << RegVT.getSimpleVT()
+ << "\n";
+ abort();
+ case MVT::i16:
+ unsigned VReg =
+ RegInfo.createVirtualRegister(MSP430::GR16RegisterClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ SDValue ArgValue = DAG.getCopyFromReg(Root, dl, VReg, RegVT);
+
+ // If this is an 8-bit value, it is really passed promoted to 16
+ // bits. Insert an assert[sz]ext to capture this, then truncate to the
+ // right size.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+
+ if (VA.getLocInfo() != CCValAssign::Full)
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+
+ ArgValues.push_back(ArgValue);
+ }
+ } else {
+ // Sanity check
+ assert(VA.isMemLoc());
+ // Load the argument to a virtual register
+ unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
+ if (ObjSize > 2) {
+ cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
+ << VA.getLocVT().getSimpleVT()
+ << "\n";
+ }
+ // Create the frame index object for this incoming parameter...
+ int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset());
+
+ // Create the SelectionDAG nodes corresponding to a load
+ //from this parameter
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
+ ArgValues.push_back(DAG.getLoad(VA.getLocVT(), dl, Root, FIN,
+ PseudoSourceValue::getFixedStack(FI), 0));
+ }
+ }
+
+ ArgValues.push_back(Root);
+
+ // Return the new list of results.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
+ &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
+}
+
+SDValue MSP430TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
+ // CCValAssign - represent the assignment of the return value to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+ unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
+
+ // Analize return values of ISD::RET
+ CCInfo.AnalyzeReturn(Op.getNode(), RetCC_MSP430);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ // The chain is always operand #0
+ SDValue Chain = Op.getOperand(0);
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ // ISD::RET => ret chain, (regnum1,val1), ...
+ // So i*2+1 index only the regnums
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ Op.getOperand(i*2+1), Flag);
+
+ // Guarantee that all emitted copies are stuck together,
+ // avoiding something bad.
+ Flag = Chain.getValue(1);
+ }
+
+ if (Flag.getNode())
+ return DAG.getNode(MSP430ISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+
+ // Return Void
+ return DAG.getNode(MSP430ISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+/// LowerCCCCallTo - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: sret.
+SDValue MSP430TargetLowering::LowerCCCCallTo(SDValue Op, SelectionDAG &DAG,
+ unsigned CC) {
+ CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+ SDValue Chain = TheCall->getChain();
+ SDValue Callee = TheCall->getCallee();
+ bool isVarArg = TheCall->isVarArg();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+
+ CCInfo.AnalyzeCallOperands(TheCall, CC_MSP430);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ Chain = DAG.getCALLSEQ_START(Chain ,DAG.getConstant(NumBytes,
+ getPointerTy(), true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+ SmallVector<SDValue, 12> MemOpChains;
+ SDValue StackPtr;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments start after the 5 first operands of ISD::CALL
+ SDValue Arg = TheCall->getArg(i);
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at RegsToPass
+ // vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ assert(VA.isMemLoc());
+
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, MSP430::SPW, getPointerTy());
+
+ SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ StackPtr,
+ DAG.getIntPtrConstant(VA.getLocMemOffset()));
+
+
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ PseudoSourceValue::getStack(),
+ VA.getLocMemOffset()));
+ }
+ }
+
+ // Transform all store nodes into one single node because all store nodes are
+ // independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain and
+ // flag operands which copy the outgoing args into registers. The InFlag in
+ // necessary since all emited instructions must be stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ // Likewise ExternalSymbol -> TargetExternalSymbol.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i16);
+ else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i16);
+
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(MSP430ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getConstant(NumBytes, getPointerTy(), true),
+ DAG.getConstant(0, getPointerTy(), true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG),
+ Op.getResNo());
+}
+
+/// LowerCallResult - Lower the result values of an ISD::CALL into the
+/// appropriate copies out of appropriate physical registers. This assumes that
+/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+/// being lowered. Returns a SDNode with the same number of values as the
+/// ISD::CALL.
+SDNode*
+MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallSDNode *TheCall,
+ unsigned CallingConv,
+ SelectionDAG &DAG) {
+ bool isVarArg = TheCall->isVarArg();
+ DebugLoc dl = TheCall->getDebugLoc();
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
+
+ CCInfo.AnalyzeCallResult(TheCall, RetCC_MSP430);
+ SmallVector<SDValue, 8> ResultVals;
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ ResultVals.push_back(Chain.getValue(0));
+ }
+
+ ResultVals.push_back(Chain);
+
+ // Merge everything together with a MERGE_VALUES node.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
+ &ResultVals[0], ResultVals.size()).getNode();
+}
+
+SDValue MSP430TargetLowering::LowerShifts(SDValue Op,
+ SelectionDAG &DAG) {
+ unsigned Opc = Op.getOpcode();
+ SDNode* N = Op.getNode();
+ MVT VT = Op.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // We currently only lower shifts of constant argument.
+ if (!isa<ConstantSDNode>(N->getOperand(1)))
+ return SDValue();
+
+ uint64_t ShiftAmount = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+
+ // Expand the stuff into sequence of shifts.
+ // FIXME: for some shift amounts this might be done better!
+ // E.g.: foo >> (8 + N) => sxt(swpb(foo)) >> N
+ SDValue Victim = N->getOperand(0);
+
+ if (Opc == ISD::SRL && ShiftAmount) {
+ // Emit a special goodness here:
+ // srl A, 1 => clrc; rrc A
+ Victim = DAG.getNode(MSP430ISD::RRC, dl, VT, Victim);
+ ShiftAmount -= 1;
+ }
+
+ while (ShiftAmount--)
+ Victim = DAG.getNode((Opc == ISD::SHL ? MSP430ISD::RLA : MSP430ISD::RRA),
+ dl, VT, Victim);
+
+ return Victim;
+}
+
+SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+
+ // Create the TargetGlobalAddress node, folding in the constant offset.
+ SDValue Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset);
+ return DAG.getNode(MSP430ISD::Wrapper, Op.getDebugLoc(),
+ getPointerTy(), Result);
+}
+
+SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op,
+ SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+ SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+
+ return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);;
+}
+
+static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, unsigned &TargetCC,
+ ISD::CondCode CC,
+ DebugLoc dl, SelectionDAG &DAG) {
+ // FIXME: Handle bittests someday
+ assert(!LHS.getValueType().isFloatingPoint() && "We don't handle FP yet");
+
+ // FIXME: Handle jump negative someday
+ TargetCC = MSP430::COND_INVALID;
+ switch (CC) {
+ default: assert(0 && "Invalid integer condition!");
+ case ISD::SETEQ:
+ TargetCC = MSP430::COND_E; // aka COND_Z
+ break;
+ case ISD::SETNE:
+ TargetCC = MSP430::COND_NE; // aka COND_NZ
+ break;
+ case ISD::SETULE:
+ std::swap(LHS, RHS); // FALLTHROUGH
+ case ISD::SETUGE:
+ TargetCC = MSP430::COND_HS; // aka COND_C
+ break;
+ case ISD::SETUGT:
+ std::swap(LHS, RHS); // FALLTHROUGH
+ case ISD::SETULT:
+ TargetCC = MSP430::COND_LO; // aka COND_NC
+ break;
+ case ISD::SETLE:
+ std::swap(LHS, RHS); // FALLTHROUGH
+ case ISD::SETGE:
+ TargetCC = MSP430::COND_GE;
+ break;
+ case ISD::SETGT:
+ std::swap(LHS, RHS); // FALLTHROUGH
+ case ISD::SETLT:
+ TargetCC = MSP430::COND_L;
+ break;
+ }
+
+ return DAG.getNode(MSP430ISD::CMP, dl, MVT::Flag, LHS, RHS);
+}
+
+
+SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ unsigned TargetCC = MSP430::COND_INVALID;
+ SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
+
+ return DAG.getNode(MSP430ISD::BR_CC, dl, Op.getValueType(),
+ Chain,
+ Dest, DAG.getConstant(TargetCC, MVT::i8),
+ Flag);
+}
+
+SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue TrueV = Op.getOperand(2);
+ SDValue FalseV = Op.getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ DebugLoc dl = Op.getDebugLoc();
+
+ unsigned TargetCC = MSP430::COND_INVALID;
+ SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
+
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag);
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(TrueV);
+ Ops.push_back(FalseV);
+ Ops.push_back(DAG.getConstant(TargetCC, MVT::i8));
+ Ops.push_back(Flag);
+
+ return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size());
+}
+
+SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op,
+ SelectionDAG &DAG) {
+ SDValue Val = Op.getOperand(0);
+ MVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ assert(VT == MVT::i16 && "Only support i16 for now!");
+
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
+ DAG.getNode(ISD::ANY_EXTEND, dl, VT, Val),
+ DAG.getValueType(Val.getValueType()));
+}
+
+const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return NULL;
+ case MSP430ISD::RET_FLAG: return "MSP430ISD::RET_FLAG";
+ case MSP430ISD::RRA: return "MSP430ISD::RRA";
+ case MSP430ISD::RLA: return "MSP430ISD::RLA";
+ case MSP430ISD::RRC: return "MSP430ISD::RRC";
+ case MSP430ISD::CALL: return "MSP430ISD::CALL";
+ case MSP430ISD::Wrapper: return "MSP430ISD::Wrapper";
+ case MSP430ISD::BR_CC: return "MSP430ISD::BR_CC";
+ case MSP430ISD::CMP: return "MSP430ISD::CMP";
+ case MSP430ISD::SELECT_CC: return "MSP430ISD::SELECT_CC";
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock*
+MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ assert((MI->getOpcode() == MSP430::Select16 ||
+ MI->getOpcode() == MSP430::Select8) &&
+ "Unexpected instr type to insert");
+
+ // To "insert" a SELECT instruction, we actually have to insert the diamond
+ // control-flow pattern. The incoming instruction knows the destination vreg
+ // to set, the condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator I = BB;
+ ++I;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // jCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ BuildMI(BB, dl, TII.get(MSP430::JCC))
+ .addMBB(copy1MBB)
+ .addImm(MI->getOperand(3).getImm());
+ F->insert(I, copy0MBB);
+ F->insert(I, copy1MBB);
+ // Update machine-CFG edges by transferring all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ copy1MBB->transferSuccessors(BB);
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(copy1MBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to copy1MBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(copy1MBB);
+
+ // copy1MBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = copy1MBB;
+ BuildMI(BB, dl, TII.get(MSP430::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return BB;
+}
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
new file mode 100644
index 0000000..404534d
--- /dev/null
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -0,0 +1,103 @@
+//==-- MSP430ISelLowering.h - MSP430 DAG Lowering Interface ------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MSP430 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430_ISELLOWERING_H
+#define LLVM_TARGET_MSP430_ISELLOWERING_H
+
+#include "MSP430.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+ namespace MSP430ISD {
+ enum {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// Return with a flag operand. Operand 0 is the chain operand.
+ RET_FLAG,
+
+ /// Y = R{R,L}A X, rotate right (left) arithmetically
+ RRA, RLA,
+
+ /// Y = RRC X, rotate right via carry
+ RRC,
+
+ /// CALL/TAILCALL - These operations represent an abstract call
+ /// instruction, which includes a bunch of information.
+ CALL,
+
+ /// Wrapper - A wrapper node for TargetConstantPool, TargetExternalSymbol,
+ /// and TargetGlobalAddress.
+ Wrapper,
+
+ /// CMP - Compare instruction.
+ CMP,
+
+ /// SetCC. Operand 0 is condition code, and operand 1 is the flag
+ /// operand produced by a CMP instruction.
+ SETCC,
+
+ /// MSP430 conditional branches. Operand 0 is the chain operand, operand 1
+ /// is the block to branch if condition is true, operand 2 is the
+ /// condition code, and operand 3 is the flag operand produced by a CMP
+ /// instruction.
+ BR_CC,
+
+ /// SELECT_CC. Operand 0 and operand 1 are selection variable, operand 3
+ /// is condition code and operand 4 is flag operand.
+ SELECT_CC
+ };
+ }
+
+ class MSP430Subtarget;
+ class MSP430TargetMachine;
+
+ class MSP430TargetLowering : public TargetLowering {
+ public:
+ explicit MSP430TargetLowering(MSP430TargetMachine &TM);
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerCALL(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerRET(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerCCCArguments(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerShifts(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG);
+
+ SDValue LowerCCCCallTo(SDValue Op, SelectionDAG &DAG,
+ unsigned CC);
+ SDNode* LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallSDNode *TheCall,
+ unsigned CallingConv, SelectionDAG &DAG);
+
+ MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ private:
+ const MSP430Subtarget &Subtarget;
+ const MSP430TargetMachine &TM;
+ };
+} // namespace llvm
+
+#endif // LLVM_TARGET_MSP430_ISELLOWERING_H
diff --git a/lib/Target/MSP430/MSP430InstrFormats.td b/lib/Target/MSP430/MSP430InstrFormats.td
new file mode 100644
index 0000000..61b3399
--- /dev/null
+++ b/lib/Target/MSP430/MSP430InstrFormats.td
@@ -0,0 +1,67 @@
+//===- MSP430InstrFormats.td - MSP430 Instruction Formats-----*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe MSP430 instructions format here
+//
+
+// Generic MSP430 Format
+class MSP430Inst<dag outs, dag ins, string asmstr> : Instruction {
+ field bits<16> Inst;
+
+ let Namespace = "MSP430";
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+
+ let AsmString = asmstr;
+}
+
+// FIXME: Create different classes for different addressing modes.
+
+// MSP430 Double Operand (Format I) Instructions
+class IForm<bits<4> opcode, bit ad, bit bw, bits<2> as,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, asmstr> {
+ let Pattern = pattern;
+
+ let Inst{12-15} = opcode;
+ let Inst{7} = ad;
+ let Inst{6} = bw;
+ let Inst{4-5} = as;
+}
+
+// MSP430 Single Operand (Format II) Instructions
+class IIForm<bits<9> opcode, bit bw, bits<2> ad,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, asmstr> {
+ let Pattern = pattern;
+
+ let Inst{7-15} = opcode;
+ let Inst{6} = bw;
+ let Inst{4-5} = ad;
+}
+
+// MSP430 Conditional Jumps Instructions
+class CJForm<bits<3> opcode, bits<3> cond, bit s,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, asmstr> {
+ let Pattern = pattern;
+
+ let Inst{13-15} = opcode;
+ let Inst{10-12} = cond;
+ let Inst{9} = s;
+}
+
+// Pseudo instructions
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, asmstr> {
+ let Pattern = pattern;
+ let Inst{15-0} = 0;
+}
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
new file mode 100644
index 0000000..91112c3
--- /dev/null
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -0,0 +1,177 @@
+//===- MSP430InstrInfo.cpp - MSP430 Instruction Information ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "MSP430TargetMachine.h"
+#include "MSP430GenInstrInfo.inc"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+
+using namespace llvm;
+
+MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm)
+ : TargetInstrInfoImpl(MSP430Insts, array_lengthof(MSP430Insts)),
+ RI(tm, *this), TM(tm) {}
+
+void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if (RC == &MSP430::GR16RegClass)
+ BuildMI(MBB, MI, DL, get(MSP430::MOV16mr))
+ .addFrameIndex(FrameIdx).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill));
+ else if (RC == &MSP430::GR8RegClass)
+ BuildMI(MBB, MI, DL, get(MSP430::MOV8mr))
+ .addFrameIndex(FrameIdx).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill));
+ else
+ assert(0 && "Cannot store this register to stack slot!");
+}
+
+void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC) const{
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if (RC == &MSP430::GR16RegClass)
+ BuildMI(MBB, MI, DL, get(MSP430::MOV16rm))
+ .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0);
+ else if (RC == &MSP430::GR8RegClass)
+ BuildMI(MBB, MI, DL, get(MSP430::MOV8rm))
+ .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0);
+ else
+ assert(0 && "Cannot store this register to stack slot!");
+}
+
+bool MSP430InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (DestRC == SrcRC) {
+ unsigned Opc;
+ if (DestRC == &MSP430::GR16RegClass) {
+ Opc = MSP430::MOV16rr;
+ } else if (DestRC == &MSP430::GR8RegClass) {
+ Opc = MSP430::MOV8rr;
+ } else {
+ return false;
+ }
+
+ BuildMI(MBB, I, DL, get(Opc), DestReg).addReg(SrcReg);
+ return true;
+ }
+
+ return false;
+}
+
+bool
+MSP430InstrInfo::isMoveInstr(const MachineInstr& MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
+ SrcSubIdx = DstSubIdx = 0; // No sub-registers yet.
+
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case MSP430::MOV8rr:
+ case MSP430::MOV16rr:
+ assert(MI.getNumOperands() >= 2 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ "invalid register-register move instruction");
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ return true;
+ }
+}
+
+bool
+MSP430InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MSP430MachineFunctionInfo *MFI = MF.getInfo<MSP430MachineFunctionInfo>();
+ MFI->setCalleeSavedFrameSize(CSI.size() * 2);
+
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ BuildMI(MBB, MI, DL, get(MSP430::PUSH16r))
+ .addReg(Reg, RegState::Kill);
+ }
+ return true;
+}
+
+bool
+MSP430InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i)
+ BuildMI(MBB, MI, DL, get(MSP430::POP16r), CSI[i].getReg());
+
+ return true;
+}
+
+unsigned
+MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const {
+ // FIXME this should probably have a DebugLoc operand
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "MSP430 branch conditions have one component!");
+
+ if (Cond.empty()) {
+ // Unconditional branch?
+ assert(!FBB && "Unconditional branch with multiple successors!");
+ BuildMI(&MBB, dl, get(MSP430::JMP)).addMBB(TBB);
+ return 1;
+ }
+
+ // Conditional branch.
+ unsigned Count = 0;
+ assert(0 && "Implement conditional branches!");
+
+ return Count;
+}
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
new file mode 100644
index 0000000..e07aaca
--- /dev/null
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -0,0 +1,84 @@
+//===- MSP430InstrInfo.h - MSP430 Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430INSTRINFO_H
+#define LLVM_TARGET_MSP430INSTRINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "MSP430RegisterInfo.h"
+
+namespace llvm {
+
+class MSP430TargetMachine;
+
+namespace MSP430 {
+ // MSP430 specific condition code.
+ enum CondCode {
+ COND_E = 0, // aka COND_Z
+ COND_NE = 1, // aka COND_NZ
+ COND_HS = 2, // aka COND_C
+ COND_LO = 3, // aka COND_NC
+ COND_GE = 4,
+ COND_L = 5,
+
+ COND_INVALID
+ };
+}
+
+class MSP430InstrInfo : public TargetInstrInfoImpl {
+ const MSP430RegisterInfo RI;
+ MSP430TargetMachine &TM;
+public:
+ explicit MSP430InstrInfo(MSP430TargetMachine &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+
+ bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+
+ bool isMoveInstr(const MachineInstr& MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC) const;
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC) const;
+
+ virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const;
+ virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+
+};
+
+}
+
+#endif
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
new file mode 100644
index 0000000..39c08e4
--- /dev/null
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -0,0 +1,901 @@
+//===- MSP430InstrInfo.td - MSP430 Instruction defs -----------*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the MSP430 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "MSP430InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Type Constraints.
+//===----------------------------------------------------------------------===//
+class SDTCisI8<int OpNum> : SDTCisVT<OpNum, i8>;
+class SDTCisI16<int OpNum> : SDTCisVT<OpNum, i16>;
+
+//===----------------------------------------------------------------------===//
+// Type Profiles.
+//===----------------------------------------------------------------------===//
+def SDT_MSP430Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
+def SDT_MSP430CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i16>]>;
+def SDT_MSP430CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
+def SDT_MSP430Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+def SDT_MSP430Cmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+def SDT_MSP430BrCC : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>,
+ SDTCisVT<1, i8>]>;
+def SDT_MSP430SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i8>]>;
+
+//===----------------------------------------------------------------------===//
+// MSP430 Specific Node Definitions.
+//===----------------------------------------------------------------------===//
+def MSP430retflag : SDNode<"MSP430ISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+def MSP430rra : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>;
+def MSP430rla : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>;
+def MSP430rrc : SDNode<"MSP430ISD::RRC", SDTIntUnaryOp, []>;
+
+def MSP430call : SDNode<"MSP430ISD::CALL", SDT_MSP430Call,
+ [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
+def MSP430callseq_start :
+ SDNode<"ISD::CALLSEQ_START", SDT_MSP430CallSeqStart,
+ [SDNPHasChain, SDNPOutFlag]>;
+def MSP430callseq_end :
+ SDNode<"ISD::CALLSEQ_END", SDT_MSP430CallSeqEnd,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def MSP430Wrapper : SDNode<"MSP430ISD::Wrapper", SDT_MSP430Wrapper>;
+def MSP430cmp : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutFlag]>;
+def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC, [SDNPHasChain, SDNPInFlag]>;
+def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC, [SDNPInFlag]>;
+
+//===----------------------------------------------------------------------===//
+// MSP430 Operand Definitions.
+//===----------------------------------------------------------------------===//
+
+// Address operands
+def memsrc : Operand<i16> {
+ let PrintMethod = "printSrcMemOperand";
+ let MIOperandInfo = (ops GR16, i16imm);
+}
+
+def memdst : Operand<i16> {
+ let PrintMethod = "printSrcMemOperand";
+ let MIOperandInfo = (ops GR16, i16imm);
+}
+
+// Branch targets have OtherVT type.
+def brtarget : Operand<OtherVT>;
+
+// Operand for printing out a condition code.
+def cc : Operand<i8> {
+ let PrintMethod = "printCCOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// MSP430 Complex Pattern Definitions.
+//===----------------------------------------------------------------------===//
+
+def addr : ComplexPattern<iPTR, 2, "SelectAddr", [], []>;
+
+//===----------------------------------------------------------------------===//
+// Pattern Fragments
+def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
+def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 ( extloadi8 node:$ptr))>;
+
+//===----------------------------------------------------------------------===//
+// Instruction list..
+
+// ADJCALLSTACKDOWN/UP implicitly use/def SP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber SRW.
+let Defs = [SPW, SRW], Uses = [SPW] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i16imm:$amt),
+ "#ADJCALLSTACKDOWN",
+ [(MSP430callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2),
+ "#ADJCALLSTACKUP",
+ [(MSP430callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+let usesCustomDAGSchedInserter = 1 in {
+ def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cc),
+ "# Select8 PSEUDO",
+ [(set GR8:$dst,
+ (MSP430selectcc GR8:$src1, GR8:$src2, imm:$cc))]>;
+ def Select16 : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cc),
+ "# Select16 PSEUDO",
+ [(set GR16:$dst,
+ (MSP430selectcc GR16:$src1, GR16:$src2, imm:$cc))]>;
+}
+
+let neverHasSideEffects = 1 in
+def NOP : Pseudo<(outs), (ins), "nop", []>;
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions...
+//
+
+// FIXME: Provide proper encoding!
+let isReturn = 1, isTerminator = 1 in {
+ def RET : Pseudo<(outs), (ins), "ret", [(MSP430retflag)]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+
+// Direct branch
+let isBarrier = 1 in
+ def JMP : Pseudo<(outs), (ins brtarget:$dst),
+ "jmp\t$dst",
+ [(br bb:$dst)]>;
+
+// Conditional branches
+let Uses = [SRW] in
+ def JCC : Pseudo<(outs), (ins brtarget:$dst, cc:$cc),
+ "j$cc $dst",
+ [(MSP430brcc bb:$dst, imm:$cc)]>;
+} // isBranch, isTerminator
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+let isCall = 1 in
+ // All calls clobber the non-callee saved registers. SPW is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Defs = [R12W, R13W, R14W, R15W, SRW],
+ Uses = [SPW] in {
+ def CALLi : Pseudo<(outs), (ins i16imm:$dst, variable_ops),
+ "call\t${dst:call}", [(MSP430call imm:$dst)]>;
+ def CALLr : Pseudo<(outs), (ins GR16:$dst, variable_ops),
+ "call\t$dst", [(MSP430call GR16:$dst)]>;
+ def CALLm : Pseudo<(outs), (ins memsrc:$dst, variable_ops),
+ "call\t${dst:mem}", [(MSP430call (load addr:$dst))]>;
+ }
+
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions...
+//
+let Defs = [SPW], Uses = [SPW], neverHasSideEffects=1 in {
+let mayLoad = 1 in
+def POP16r : Pseudo<(outs GR16:$reg), (ins), "pop.w\t$reg", []>;
+
+let mayStore = 1 in
+def PUSH16r : Pseudo<(outs), (ins GR16:$reg), "push.w\t$reg",[]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move Instructions
+
+// FIXME: Provide proper encoding!
+let neverHasSideEffects = 1 in {
+def MOV8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src),
+ "mov.b\t{$src, $dst}",
+ []>;
+def MOV16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src),
+ "mov.w\t{$src, $dst}",
+ []>;
+}
+
+// FIXME: Provide proper encoding!
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MOV8ri : Pseudo<(outs GR8:$dst), (ins i8imm:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR8:$dst, imm:$src)]>;
+def MOV16ri : Pseudo<(outs GR16:$dst), (ins i16imm:$src),
+ "mov.w\t{$src, $dst}",
+ [(set GR16:$dst, imm:$src)]>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
+def MOV8rm : Pseudo<(outs GR8:$dst), (ins memsrc:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR8:$dst, (load addr:$src))]>;
+def MOV16rm : Pseudo<(outs GR16:$dst), (ins memsrc:$src),
+ "mov.w\t{$src, $dst}",
+ [(set GR16:$dst, (load addr:$src))]>;
+}
+
+def MOVZX16rr8 : Pseudo<(outs GR16:$dst), (ins GR8:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR16:$dst, (zext GR8:$src))]>;
+def MOVZX16rm8 : Pseudo<(outs GR16:$dst), (ins memsrc:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR16:$dst, (zextloadi16i8 addr:$src))]>;
+
+// Any instruction that defines a 8-bit result leaves the high half of the
+// register. Truncate can be lowered to EXTRACT_SUBREG, and CopyFromReg may
+// be copying from a truncate, but any other 8-bit operation will zero-extend
+// up to 16 bits.
+def def8 : PatLeaf<(i8 GR8:$src), [{
+ return N->getOpcode() != ISD::TRUNCATE &&
+ N->getOpcode() != TargetInstrInfo::EXTRACT_SUBREG &&
+ N->getOpcode() != ISD::CopyFromReg;
+}]>;
+
+// In the case of a 8-bit def that is known to implicitly zero-extend,
+// we can use a SUBREG_TO_REG.
+def : Pat<(i16 (zext def8:$src)),
+ (SUBREG_TO_REG (i16 0), GR8:$src, subreg_8bit)>;
+
+
+def MOV8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src),
+ "mov.b\t{$src, $dst}",
+ [(store (i8 imm:$src), addr:$dst)]>;
+def MOV16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src),
+ "mov.w\t{$src, $dst}",
+ [(store (i16 imm:$src), addr:$dst)]>;
+
+def MOV8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
+ "mov.b\t{$src, $dst}",
+ [(store GR8:$src, addr:$dst)]>;
+def MOV16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
+ "mov.w\t{$src, $dst}",
+ [(store GR16:$src, addr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+
+let isTwoAddress = 1 in {
+
+let Defs = [SRW] in {
+
+let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y
+// FIXME: Provide proper encoding!
+def ADD8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ "add.b\t{$src2, $dst}",
+ [(set GR8:$dst, (add GR8:$src1, GR8:$src2)),
+ (implicit SRW)]>;
+def ADD16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "add.w\t{$src2, $dst}",
+ [(set GR16:$dst, (add GR16:$src1, GR16:$src2)),
+ (implicit SRW)]>;
+}
+
+def ADD8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ "add.b\t{$src2, $dst}",
+ [(set GR8:$dst, (add GR8:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+def ADD16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ "add.w\t{$src2, $dst}",
+ [(set GR16:$dst, (add GR16:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+
+def ADD8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "add.b\t{$src2, $dst}",
+ [(set GR8:$dst, (add GR8:$src1, imm:$src2)),
+ (implicit SRW)]>;
+def ADD16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "add.w\t{$src2, $dst}",
+ [(set GR16:$dst, (add GR16:$src1, imm:$src2)),
+ (implicit SRW)]>;
+
+let isTwoAddress = 0 in {
+def ADD8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
+ "add.b\t{$src, $dst}",
+ [(store (add (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def ADD16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
+ "add.w\t{$src, $dst}",
+ [(store (add (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def ADD8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src),
+ "add.b\t{$src, $dst}",
+ [(store (add (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def ADD16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src),
+ "add.w\t{$src, $dst}",
+ [(store (add (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def ADD8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "add.b\t{$src, $dst}",
+ [(store (add (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def ADD16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "add.w\t{$src, $dst}",
+ [(store (add (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+let Uses = [SRW] in {
+
+let isCommutable = 1 in { // X = ADDC Y, Z == X = ADDC Z, Y
+def ADC8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ "addc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (adde GR8:$src1, GR8:$src2)),
+ (implicit SRW)]>;
+def ADC16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "addc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (adde GR16:$src1, GR16:$src2)),
+ (implicit SRW)]>;
+} // isCommutable
+
+def ADC8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "addc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (adde GR8:$src1, imm:$src2)),
+ (implicit SRW)]>;
+def ADC16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "addc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (adde GR16:$src1, imm:$src2)),
+ (implicit SRW)]>;
+
+def ADC8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ "addc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (adde GR8:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+def ADC16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ "addc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (adde GR16:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let isTwoAddress = 0 in {
+def ADC8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
+ "addc.b\t{$src, $dst}",
+ [(store (adde (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def ADC16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
+ "addc.w\t{$src, $dst}",
+ [(store (adde (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def ADC8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src),
+ "addc.b\t{$src, $dst}",
+ [(store (adde (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def ADC16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src),
+ "addc.w\t{$src, $dst}",
+ [(store (adde (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def ADC8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "addc.b\t{$src, $dst}",
+ [(store (adde (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def ADC16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "addc.w\t{$src, $dst}",
+ [(store (adde (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+} // Uses = [SRW]
+
+let isCommutable = 1 in { // X = AND Y, Z == X = AND Z, Y
+def AND8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ "and.b\t{$src2, $dst}",
+ [(set GR8:$dst, (and GR8:$src1, GR8:$src2)),
+ (implicit SRW)]>;
+def AND16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "and.w\t{$src2, $dst}",
+ [(set GR16:$dst, (and GR16:$src1, GR16:$src2)),
+ (implicit SRW)]>;
+}
+
+def AND8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "and.b\t{$src2, $dst}",
+ [(set GR8:$dst, (and GR8:$src1, imm:$src2)),
+ (implicit SRW)]>;
+def AND16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "and.w\t{$src2, $dst}",
+ [(set GR16:$dst, (and GR16:$src1, imm:$src2)),
+ (implicit SRW)]>;
+
+def AND8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ "and.b\t{$src2, $dst}",
+ [(set GR8:$dst, (and GR8:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+def AND16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ "and.w\t{$src2, $dst}",
+ [(set GR16:$dst, (and GR16:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let isTwoAddress = 0 in {
+def AND8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
+ "and.b\t{$src, $dst}",
+ [(store (and (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def AND16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
+ "and.w\t{$src, $dst}",
+ [(store (and (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def AND8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src),
+ "and.b\t{$src, $dst}",
+ [(store (and (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def AND16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src),
+ "and.w\t{$src, $dst}",
+ [(store (and (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def AND8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "and.b\t{$src, $dst}",
+ [(store (and (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def AND16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "and.w\t{$src, $dst}",
+ [(store (and (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+
+let isCommutable = 1 in { // X = XOR Y, Z == X = XOR Z, Y
+def XOR8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ "xor.b\t{$src2, $dst}",
+ [(set GR8:$dst, (xor GR8:$src1, GR8:$src2)),
+ (implicit SRW)]>;
+def XOR16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "xor.w\t{$src2, $dst}",
+ [(set GR16:$dst, (xor GR16:$src1, GR16:$src2)),
+ (implicit SRW)]>;
+}
+
+def XOR8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "xor.b\t{$src2, $dst}",
+ [(set GR8:$dst, (xor GR8:$src1, imm:$src2)),
+ (implicit SRW)]>;
+def XOR16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "xor.w\t{$src2, $dst}",
+ [(set GR16:$dst, (xor GR16:$src1, imm:$src2)),
+ (implicit SRW)]>;
+
+def XOR8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ "xor.b\t{$src2, $dst}",
+ [(set GR8:$dst, (xor GR8:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+def XOR16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ "xor.w\t{$src2, $dst}",
+ [(set GR16:$dst, (xor GR16:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let isTwoAddress = 0 in {
+def XOR8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
+ "xor.b\t{$src, $dst}",
+ [(store (xor (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def XOR16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
+ "xor.w\t{$src, $dst}",
+ [(store (xor (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def XOR8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src),
+ "xor.b\t{$src, $dst}",
+ [(store (xor (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def XOR16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src),
+ "xor.w\t{$src, $dst}",
+ [(store (xor (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def XOR8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "xor.b\t{$src, $dst}",
+ [(store (xor (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def XOR16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "xor.w\t{$src, $dst}",
+ [(store (xor (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+
+def SUB8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ "sub.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sub GR8:$src1, GR8:$src2)),
+ (implicit SRW)]>;
+def SUB16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "sub.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sub GR16:$src1, GR16:$src2)),
+ (implicit SRW)]>;
+
+def SUB8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "sub.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sub GR8:$src1, imm:$src2)),
+ (implicit SRW)]>;
+def SUB16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "sub.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sub GR16:$src1, imm:$src2)),
+ (implicit SRW)]>;
+
+def SUB8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ "sub.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+def SUB16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ "sub.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let isTwoAddress = 0 in {
+def SUB8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
+ "sub.b\t{$src, $dst}",
+ [(store (sub (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def SUB16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
+ "sub.w\t{$src, $dst}",
+ [(store (sub (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def SUB8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src),
+ "sub.b\t{$src, $dst}",
+ [(store (sub (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def SUB16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src),
+ "sub.w\t{$src, $dst}",
+ [(store (sub (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def SUB8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "sub.b\t{$src, $dst}",
+ [(store (sub (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def SUB16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "sub.w\t{$src, $dst}",
+ [(store (sub (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+let Uses = [SRW] in {
+def SBC8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ "subc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sube GR8:$src1, GR8:$src2)),
+ (implicit SRW)]>;
+def SBC16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "subc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sube GR16:$src1, GR16:$src2)),
+ (implicit SRW)]>;
+
+def SBC8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "subc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sube GR8:$src1, imm:$src2)),
+ (implicit SRW)]>;
+def SBC16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "subc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sube GR16:$src1, imm:$src2)),
+ (implicit SRW)]>;
+
+def SBC8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ "subc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sube GR8:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+def SBC16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ "subc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sube GR16:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let isTwoAddress = 0 in {
+def SBC8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
+ "subc.b\t{$src, $dst}",
+ [(store (sube (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def SBC16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
+ "subc.w\t{$src, $dst}",
+ [(store (sube (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def SBC8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src),
+ "subc.b\t{$src, $dst}",
+ [(store (sube (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def SBC16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src),
+ "subc.w\t{$src, $dst}",
+ [(store (sube (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def SBC8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "subc.b\t{$src, $dst}",
+ [(store (sube (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def SBC16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "subc.w\t{$src, $dst}",
+ [(store (sube (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+} // Uses = [SRW]
+
+// FIXME: Provide proper encoding!
+def SAR8r1 : Pseudo<(outs GR8:$dst), (ins GR8:$src),
+ "rra.b\t$dst",
+ [(set GR8:$dst, (MSP430rra GR8:$src)),
+ (implicit SRW)]>;
+def SAR16r1 : Pseudo<(outs GR16:$dst), (ins GR16:$src),
+ "rra.w\t$dst",
+ [(set GR16:$dst, (MSP430rra GR16:$src)),
+ (implicit SRW)]>;
+
+def SHL8r1 : Pseudo<(outs GR8:$dst), (ins GR8:$src),
+ "rla.b\t$dst",
+ [(set GR8:$dst, (MSP430rla GR8:$src)),
+ (implicit SRW)]>;
+def SHL16r1 : Pseudo<(outs GR16:$dst), (ins GR16:$src),
+ "rla.w\t$dst",
+ [(set GR16:$dst, (MSP430rla GR16:$src)),
+ (implicit SRW)]>;
+
+def SAR8r1c : Pseudo<(outs GR8:$dst), (ins GR8:$src),
+ "clrc\n\t"
+ "rrc.b\t$dst",
+ [(set GR8:$dst, (MSP430rrc GR8:$src)),
+ (implicit SRW)]>;
+def SAR16r1c : Pseudo<(outs GR16:$dst), (ins GR16:$src),
+ "clrc\n\t"
+ "rrc.w\t$dst",
+ [(set GR16:$dst, (MSP430rrc GR16:$src)),
+ (implicit SRW)]>;
+
+def SEXT16r : Pseudo<(outs GR16:$dst), (ins GR16:$src),
+ "sxt\t$dst",
+ [(set GR16:$dst, (sext_inreg GR16:$src, i8)),
+ (implicit SRW)]>;
+
+} // Defs = [SRW]
+
+def SWPB16r : Pseudo<(outs GR16:$dst), (ins GR16:$src),
+ "swpb\t$dst",
+ [(set GR16:$dst, (bswap GR16:$src))]>;
+
+let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y
+def OR8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ "bis.b\t{$src2, $dst}",
+ [(set GR8:$dst, (or GR8:$src1, GR8:$src2))]>;
+def OR16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "bis.w\t{$src2, $dst}",
+ [(set GR16:$dst, (or GR16:$src1, GR16:$src2))]>;
+}
+
+def OR8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "bis.b\t{$src2, $dst}",
+ [(set GR8:$dst, (or GR8:$src1, imm:$src2))]>;
+def OR16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "bis.w\t{$src2, $dst}",
+ [(set GR16:$dst, (or GR16:$src1, imm:$src2))]>;
+
+def OR8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ "bis.b\t{$src2, $dst}",
+ [(set GR8:$dst, (or GR8:$src1, (load addr:$src2)))]>;
+def OR16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ "bis.w\t{$src2, $dst}",
+ [(set GR16:$dst, (or GR16:$src1, (load addr:$src2)))]>;
+
+let isTwoAddress = 0 in {
+def OR8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
+ "bis.b\t{$src, $dst}",
+ [(store (or (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def OR16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
+ "bis.w\t{$src, $dst}",
+ [(store (or (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def OR8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src),
+ "bis.b\t{$src, $dst}",
+ [(store (or (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def OR16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src),
+ "bis.w\t{$src, $dst}",
+ [(store (or (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def OR8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "bis.b\t{$src, $dst}",
+ [(store (or (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def OR16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "bis.w\t{$src, $dst}",
+ [(store (or (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+} // isTwoAddress = 1
+
+// Integer comparisons
+let Defs = [SRW] in {
+def CMP8rr : Pseudo<(outs), (ins GR8:$src1, GR8:$src2),
+ "cmp.b\t{$src1, $src2}",
+ [(MSP430cmp GR8:$src1, GR8:$src2), (implicit SRW)]>;
+def CMP16rr : Pseudo<(outs), (ins GR16:$src1, GR16:$src2),
+ "cmp.w\t{$src1, $src2}",
+ [(MSP430cmp GR16:$src1, GR16:$src2), (implicit SRW)]>;
+
+def CMP8ir : Pseudo<(outs), (ins i8imm:$src1, GR8:$src2),
+ "cmp.b\t{$src1, $src2}",
+ [(MSP430cmp imm:$src1, GR8:$src2), (implicit SRW)]>;
+def CMP16ir : Pseudo<(outs), (ins i16imm:$src1, GR16:$src2),
+ "cmp.w\t{$src1, $src2}",
+ [(MSP430cmp imm:$src1, GR16:$src2), (implicit SRW)]>;
+
+def CMP8im : Pseudo<(outs), (ins i8imm:$src1, memsrc:$src2),
+ "cmp.b\t{$src1, $src2}",
+ [(MSP430cmp (i8 imm:$src1), (load addr:$src2)), (implicit SRW)]>;
+def CMP16im : Pseudo<(outs), (ins i16imm:$src1, memsrc:$src2),
+ "cmp.w\t{$src1, $src2}",
+ [(MSP430cmp (i16 imm:$src1), (load addr:$src2)), (implicit SRW)]>;
+
+// FIXME: imm is allowed only on src operand, not on dst.
+
+//def CMP8ri : Pseudo<(outs), (ins GR8:$src1, i8imm:$src2),
+// "cmp.b\t{$src1, $src2}",
+// [(MSP430cmp GR8:$src1, imm:$src2), (implicit SRW)]>;
+//def CMP16ri : Pseudo<(outs), (ins GR16:$src1, i16imm:$src2),
+// "cmp.w\t{$src1, $src2}",
+// [(MSP430cmp GR16:$src1, imm:$src2), (implicit SRW)]>;
+
+//def CMP8mi : Pseudo<(outs), (ins memsrc:$src1, i8imm:$src2),
+// "cmp.b\t{$src1, $src2}",
+// [(MSP430cmp (load addr:$src1), (i8 imm:$src2)), (implicit SRW)]>;
+//def CMP16mi : Pseudo<(outs), (ins memsrc:$src1, i16imm:$src2),
+// "cmp.w\t{$src1, $src2}",
+// [(MSP430cmp (load addr:$src1), (i16 imm:$src2)), (implicit SRW)]>;
+
+
+// Imm 0, +1, +2, +4, +8 are encoded via constant generator registers.
+// That's why we can use them as dest operands.
+// We don't define new class for them, since they would need special encoding
+// in the future.
+
+def CMP8ri0 : Pseudo<(outs), (ins GR8:$src1),
+ "cmp.b\t{$src1, #0}",
+ [(MSP430cmp GR8:$src1, 0), (implicit SRW)]>;
+def CMP16ri0: Pseudo<(outs), (ins GR16:$src1),
+ "cmp.w\t{$src1, #0}",
+ [(MSP430cmp GR16:$src1, 0), (implicit SRW)]>;
+def CMP8ri1 : Pseudo<(outs), (ins GR8:$src1),
+ "cmp.b\t{$src1, #1}",
+ [(MSP430cmp GR8:$src1, 1), (implicit SRW)]>;
+def CMP16ri1: Pseudo<(outs), (ins GR16:$src1),
+ "cmp.w\t{$src1, #1}",
+ [(MSP430cmp GR16:$src1, 1), (implicit SRW)]>;
+def CMP8ri2 : Pseudo<(outs), (ins GR8:$src1),
+ "cmp.b\t{$src1, #2}",
+ [(MSP430cmp GR8:$src1, 2), (implicit SRW)]>;
+def CMP16ri2: Pseudo<(outs), (ins GR16:$src1),
+ "cmp.w\t{$src1, #2}",
+ [(MSP430cmp GR16:$src1, 2), (implicit SRW)]>;
+def CMP8ri4 : Pseudo<(outs), (ins GR8:$src1),
+ "cmp.b\t{$src1, #4}",
+ [(MSP430cmp GR8:$src1, 4), (implicit SRW)]>;
+def CMP16ri4: Pseudo<(outs), (ins GR16:$src1),
+ "cmp.w\t{$src1, #4}",
+ [(MSP430cmp GR16:$src1, 4), (implicit SRW)]>;
+def CMP8ri8 : Pseudo<(outs), (ins GR8:$src1),
+ "cmp.b\t{$src1, #8}",
+ [(MSP430cmp GR8:$src1, 8), (implicit SRW)]>;
+def CMP16ri8: Pseudo<(outs), (ins GR16:$src1),
+ "cmp.w\t{$src1, #8}",
+ [(MSP430cmp GR16:$src1, 8), (implicit SRW)]>;
+
+def CMP8rm : Pseudo<(outs), (ins GR8:$src1, memsrc:$src2),
+ "cmp.b\t{$src1, $src2}",
+ [(MSP430cmp GR8:$src1, (load addr:$src2)), (implicit SRW)]>;
+def CMP16rm : Pseudo<(outs), (ins GR16:$src1, memsrc:$src2),
+ "cmp.w\t{$src1, $src2}",
+ [(MSP430cmp GR16:$src1, (load addr:$src2)), (implicit SRW)]>;
+
+def CMP8mr : Pseudo<(outs), (ins memsrc:$src1, GR8:$src2),
+ "cmp.b\t{$src1, $src2}",
+ [(MSP430cmp (load addr:$src1), GR8:$src2), (implicit SRW)]>;
+def CMP16mr : Pseudo<(outs), (ins memsrc:$src1, GR16:$src2),
+ "cmp.w\t{$src1, $src2}",
+ [(MSP430cmp (load addr:$src1), GR16:$src2), (implicit SRW)]>;
+
+def CMP8mi0 : Pseudo<(outs), (ins memsrc:$src1),
+ "cmp.b\t{$src1, #0}",
+ [(MSP430cmp (load addr:$src1), (i8 0)), (implicit SRW)]>;
+def CMP16mi0: Pseudo<(outs), (ins memsrc:$src1),
+ "cmp.w\t{$src1, #0}",
+ [(MSP430cmp (load addr:$src1), (i16 0)), (implicit SRW)]>;
+def CMP8mi1 : Pseudo<(outs), (ins memsrc:$src1),
+ "cmp.b\t{$src1, #1}",
+ [(MSP430cmp (load addr:$src1), (i8 1)), (implicit SRW)]>;
+def CMP16mi1: Pseudo<(outs), (ins memsrc:$src1),
+ "cmp.w\t{$src1, #1}",
+ [(MSP430cmp (load addr:$src1), (i16 1)), (implicit SRW)]>;
+def CMP8mi2 : Pseudo<(outs), (ins memsrc:$src1),
+ "cmp.b\t{$src1, #2}",
+ [(MSP430cmp (load addr:$src1), (i8 2)), (implicit SRW)]>;
+def CMP16mi2: Pseudo<(outs), (ins memsrc:$src1),
+ "cmp.w\t{$src1, #2}",
+ [(MSP430cmp (load addr:$src1), (i16 2)), (implicit SRW)]>;
+def CMP8mi4 : Pseudo<(outs), (ins memsrc:$src1),
+ "cmp.b\t{$src1, #4}",
+ [(MSP430cmp (load addr:$src1), (i8 4)), (implicit SRW)]>;
+def CMP16mi4: Pseudo<(outs), (ins memsrc:$src1),
+ "cmp.w\t{$src1, #4}",
+ [(MSP430cmp (load addr:$src1), (i16 4)), (implicit SRW)]>;
+def CMP8mi8 : Pseudo<(outs), (ins memsrc:$src1),
+ "cmp.b\t{$src1, #8}",
+ [(MSP430cmp (load addr:$src1), (i8 8)), (implicit SRW)]>;
+def CMP16mi8: Pseudo<(outs), (ins memsrc:$src1),
+ "cmp.w\t{$src1, #8}",
+ [(MSP430cmp (load addr:$src1), (i16 8)), (implicit SRW)]>;
+
+} // Defs = [SRW]
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+
+// extload
+def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
+
+// anyext
+def : Pat<(anyext addr:$src), (MOVZX16rr8 GR8:$src)>;
+
+// truncs
+def : Pat<(i8 (trunc GR16:$src)),
+ (EXTRACT_SUBREG GR16:$src, subreg_8bit)>;
+
+// GlobalAddress, ExternalSymbol
+def : Pat<(i16 (MSP430Wrapper tglobaladdr:$dst)), (MOV16ri tglobaladdr:$dst)>;
+def : Pat<(i16 (MSP430Wrapper texternalsym:$dst)), (MOV16ri texternalsym:$dst)>;
+
+def : Pat<(add GR16:$src1, (MSP430Wrapper tglobaladdr :$src2)),
+ (ADD16ri GR16:$src1, tglobaladdr:$src2)>;
+def : Pat<(add GR16:$src1, (MSP430Wrapper texternalsym:$src2)),
+ (ADD16ri GR16:$src1, texternalsym:$src2)>;
+
+def : Pat<(store (i16 (MSP430Wrapper tglobaladdr:$src)), addr:$dst),
+ (MOV16mi addr:$dst, tglobaladdr:$src)>;
+def : Pat<(store (i16 (MSP430Wrapper texternalsym:$src)), addr:$dst),
+ (MOV16mi addr:$dst, texternalsym:$src)>;
+
+// calls
+def : Pat<(MSP430call (i16 tglobaladdr:$dst)),
+ (CALLi tglobaladdr:$dst)>;
+def : Pat<(MSP430call (i16 texternalsym:$dst)),
+ (CALLi texternalsym:$dst)>;
+
+// add and sub always produce carry
+def : Pat<(addc GR16:$src1, GR16:$src2),
+ (ADD16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(addc GR16:$src1, (load addr:$src2)),
+ (ADD16rm GR16:$src1, addr:$src2)>;
+def : Pat<(addc GR16:$src1, imm:$src2),
+ (ADD16ri GR16:$src1, imm:$src2)>;
+def : Pat<(store (addc (load addr:$dst), GR16:$src), addr:$dst),
+ (ADD16mr addr:$dst, GR16:$src)>;
+def : Pat<(store (addc (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (ADD16mm addr:$dst, addr:$src)>;
+
+def : Pat<(addc GR8:$src1, GR8:$src2),
+ (ADD8rr GR8:$src1, GR8:$src2)>;
+def : Pat<(addc GR8:$src1, (load addr:$src2)),
+ (ADD8rm GR8:$src1, addr:$src2)>;
+def : Pat<(addc GR8:$src1, imm:$src2),
+ (ADD8ri GR8:$src1, imm:$src2)>;
+def : Pat<(store (addc (load addr:$dst), GR8:$src), addr:$dst),
+ (ADD8mr addr:$dst, GR8:$src)>;
+def : Pat<(store (addc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (ADD8mm addr:$dst, addr:$src)>;
+
+def : Pat<(subc GR16:$src1, GR16:$src2),
+ (SUB16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(subc GR16:$src1, (load addr:$src2)),
+ (SUB16rm GR16:$src1, addr:$src2)>;
+def : Pat<(subc GR16:$src1, imm:$src2),
+ (SUB16ri GR16:$src1, imm:$src2)>;
+def : Pat<(store (subc (load addr:$dst), GR16:$src), addr:$dst),
+ (SUB16mr addr:$dst, GR16:$src)>;
+def : Pat<(store (subc (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (SUB16mm addr:$dst, addr:$src)>;
+
+def : Pat<(subc GR8:$src1, GR8:$src2),
+ (SUB8rr GR8:$src1, GR8:$src2)>;
+def : Pat<(subc GR8:$src1, (load addr:$src2)),
+ (SUB8rm GR8:$src1, addr:$src2)>;
+def : Pat<(subc GR8:$src1, imm:$src2),
+ (SUB8ri GR8:$src1, imm:$src2)>;
+def : Pat<(store (subc (load addr:$dst), GR8:$src), addr:$dst),
+ (SUB8mr addr:$dst, GR8:$src)>;
+def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (SUB8mm addr:$dst, addr:$src)>;
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
new file mode 100644
index 0000000..b94d7e4
--- /dev/null
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -0,0 +1,39 @@
+//===- MSP430MachineFuctionInfo.h - MSP430 machine function info -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares MSP430-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430MACHINEFUNCTIONINFO_H
+#define MSP430MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// MSP430MachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private MSP430 target-specific information for each MachineFunction.
+class MSP430MachineFunctionInfo : public MachineFunctionInfo {
+ /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
+ /// stack frame in bytes.
+ unsigned CalleeSavedFrameSize;
+
+public:
+ MSP430MachineFunctionInfo() : CalleeSavedFrameSize(0) {}
+
+ MSP430MachineFunctionInfo(MachineFunction &MF) : CalleeSavedFrameSize(0) {}
+
+ unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+ void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
new file mode 100644
index 0000000..ef6f997
--- /dev/null
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -0,0 +1,355 @@
+//===- MSP430RegisterInfo.cpp - MSP430 Register Information ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-reg-info"
+
+#include "MSP430.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "MSP430RegisterInfo.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+
+using namespace llvm;
+
+// FIXME: Provide proper call frame setup / destroy opcodes.
+MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm,
+ const TargetInstrInfo &tii)
+ : MSP430GenRegisterInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP),
+ TM(tm), TII(tii) {
+ StackAlign = TM.getFrameInfo()->getStackAlignment();
+}
+
+const unsigned*
+MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ static const unsigned CalleeSavedRegs[] = {
+ MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
+ MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+ 0
+ };
+
+ return CalleeSavedRegs;
+}
+
+const TargetRegisterClass* const*
+MSP430RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
+ &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+ &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+ &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+ &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+ 0
+ };
+
+ return CalleeSavedRegClasses;
+}
+
+BitVector
+MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+
+ // Mark 4 special registers as reserved.
+ Reserved.set(MSP430::PCW);
+ Reserved.set(MSP430::SPW);
+ Reserved.set(MSP430::SRW);
+ Reserved.set(MSP430::CGW);
+
+ // Mark frame pointer as reserved if needed.
+ if (hasFP(MF))
+ Reserved.set(MSP430::FPW);
+
+ return Reserved;
+}
+
+const TargetRegisterClass* MSP430RegisterInfo::getPointerRegClass() const {
+ return &MSP430::GR16RegClass;
+}
+
+
+bool MSP430RegisterInfo::hasFP(const MachineFunction &MF) const {
+ return NoFramePointerElim || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+bool MSP430RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+void MSP430RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (!hasReservedCallFrame(MF)) {
+ // If the stack pointer can be changed after prologue, turn the
+ // adjcallstackup instruction into a 'sub SPW, <amt>' and the
+ // adjcallstackdown instruction into 'add SPW, <amt>'
+ // TODO: consider using push / pop instead of sub + store / add
+ MachineInstr *Old = I;
+ uint64_t Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ Amount = (Amount+StackAlign-1)/StackAlign*StackAlign;
+
+ MachineInstr *New = 0;
+ if (Old->getOpcode() == getCallFrameSetupOpcode()) {
+ New = BuildMI(MF, Old->getDebugLoc(),
+ TII.get(MSP430::SUB16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(Amount);
+ } else {
+ assert(Old->getOpcode() == getCallFrameDestroyOpcode());
+ // factor out the amount the callee already popped.
+ uint64_t CalleeAmt = Old->getOperand(1).getImm();
+ Amount -= CalleeAmt;
+ if (Amount)
+ New = BuildMI(MF, Old->getDebugLoc(),
+ TII.get(MSP430::ADD16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(Amount);
+ }
+
+ if (New) {
+ // The SRW implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+ } else if (I->getOpcode() == getCallFrameDestroyOpcode()) {
+ // If we are performing frame pointer elimination and if the callee pops
+ // something off the stack pointer, add it back.
+ if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
+ MachineInstr *Old = I;
+ MachineInstr *New =
+ BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri),
+ MSP430::SPW).addReg(MSP430::SPW).addImm(CalleeAmt);
+ // The SRW implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ MBB.insert(I, New);
+ }
+ }
+
+ MBB.erase(I);
+}
+
+void
+MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+
+ unsigned BasePtr = (hasFP(MF) ? MSP430::FPW : MSP430::SPW);
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+ // Skip the saved PC
+ Offset += 2;
+
+ if (!hasFP(MF))
+ Offset += MF.getFrameInfo()->getStackSize();
+ else
+ Offset += 2; // Skip the saved FPW
+
+ // Fold imm into offset
+ Offset += MI.getOperand(i+1).getImm();
+
+ if (MI.getOpcode() == MSP430::ADD16ri) {
+ // This is actually "load effective address" of the stack slot
+ // instruction. We have only two-address instructions, thus we need to
+ // expand it into mov + add
+
+ MI.setDesc(TII.get(MSP430::MOV16rr));
+ MI.getOperand(i).ChangeToRegister(BasePtr, false);
+
+ if (Offset == 0)
+ return;
+
+ // We need to materialize the offset via add instruction.
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if (Offset < 0)
+ BuildMI(MBB, next(II), dl, TII.get(MSP430::SUB16ri), DstReg)
+ .addReg(DstReg).addImm(-Offset);
+ else
+ BuildMI(MBB, next(II), dl, TII.get(MSP430::ADD16ri), DstReg)
+ .addReg(DstReg).addImm(Offset);
+
+ return;
+ }
+
+ MI.getOperand(i).ChangeToRegister(BasePtr, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+}
+
+void
+MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
+ const {
+ // Create a frame entry for the FPW register that must be saved.
+ if (hasFP(MF)) {
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4);
+ assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
+ "Slot for FPW register must be last in order to be found!");
+ FrameIdx = 0;
+ }
+}
+
+
+void MSP430RegisterInfo::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc DL = (MBBI != MBB.end() ? MBBI->getDebugLoc() :
+ DebugLoc::getUnknownLoc());
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ uint64_t StackSize = MFI->getStackSize();
+
+ uint64_t NumBytes = 0;
+ if (hasFP(MF)) {
+ // Calculate required stack adjustment
+ uint64_t FrameSize = StackSize - 2;
+ NumBytes = FrameSize - MSP430FI->getCalleeSavedFrameSize();
+
+ // Get the offset of the stack slot for the EBP register... which is
+ // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
+ // Update the frame offset adjustment.
+ MFI->setOffsetAdjustment(-NumBytes);
+
+ // Save FPW into the appropriate stack slot...
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::PUSH16r))
+ .addReg(MSP430::FPW, RegState::Kill);
+
+ // Update FPW with the new base value...
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::FPW)
+ .addReg(MSP430::SPW);
+
+ // Mark the FramePtr as live-in in every block except the entry.
+ for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
+ I != E; ++I)
+ I->addLiveIn(MSP430::FPW);
+
+ } else
+ NumBytes = StackSize - MSP430FI->getCalleeSavedFrameSize();
+
+ // Skip the callee-saved push instructions.
+ while (MBBI != MBB.end() && (MBBI->getOpcode() == MSP430::PUSH16r))
+ ++MBBI;
+
+ if (MBBI != MBB.end())
+ DL = MBBI->getDebugLoc();
+
+ if (NumBytes) { // adjust stack pointer: SPW -= numbytes
+ // If there is an SUB16ri of SPW immediately before this instruction, merge
+ // the two.
+ //NumBytes -= mergeSPUpdates(MBB, MBBI, true);
+ // If there is an ADD16ri or SUB16ri of SPW immediately after this
+ // instruction, merge the two instructions.
+ // mergeSPUpdatesDown(MBB, MBBI, &NumBytes);
+
+ if (NumBytes) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(NumBytes);
+ // The SRW implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+ }
+}
+
+void MSP430RegisterInfo::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc DL = MBBI->getDebugLoc();
+
+ switch (RetOpcode) {
+ case MSP430::RET: break; // These are ok
+ default:
+ assert(0 && "Can only insert epilog into returning blocks");
+ }
+
+ // Get the number of bytes to allocate from the FrameInfo
+ uint64_t StackSize = MFI->getStackSize();
+ unsigned CSSize = MSP430FI->getCalleeSavedFrameSize();
+ uint64_t NumBytes = 0;
+
+ if (hasFP(MF)) {
+ // Calculate required stack adjustment
+ uint64_t FrameSize = StackSize - 2;
+ NumBytes = FrameSize - CSSize;
+
+ // pop FPW.
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::FPW);
+ } else
+ NumBytes = StackSize - CSSize;
+
+ // Skip the callee-saved pop instructions.
+ MachineBasicBlock::iterator LastCSPop = MBBI;
+ while (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ unsigned Opc = PI->getOpcode();
+ if (Opc != MSP430::POP16r && !PI->getDesc().isTerminator())
+ break;
+ --MBBI;
+ }
+
+ DL = MBBI->getDebugLoc();
+
+ // If there is an ADD16ri or SUB16ri of SPW immediately before this
+ // instruction, merge the two instructions.
+ //if (NumBytes || MFI->hasVarSizedObjects())
+ // mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
+
+ if (MFI->hasVarSizedObjects()) {
+ assert(0 && "Not implemented yet!");
+ } else {
+ // adjust stack pointer back: SPW += numbytes
+ if (NumBytes) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(NumBytes);
+ // The SRW implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+ }
+}
+
+unsigned MSP430RegisterInfo::getRARegister() const {
+ return MSP430::PCW;
+}
+
+unsigned MSP430RegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ return hasFP(MF) ? MSP430::FPW : MSP430::SPW;
+}
+
+int MSP430RegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ assert(0 && "Not implemented yet!");
+}
+
+#include "MSP430GenRegisterInfo.inc"
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
new file mode 100644
index 0000000..a210e36
--- /dev/null
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -0,0 +1,70 @@
+//===- MSP430RegisterInfo.h - MSP430 Register Information Impl --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430REGISTERINFO_H
+#define LLVM_TARGET_MSP430REGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "MSP430GenRegisterInfo.h.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+class MSP430TargetMachine;
+
+struct MSP430RegisterInfo : public MSP430GenRegisterInfo {
+private:
+ MSP430TargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+ /// StackAlign - Default stack alignment.
+ ///
+ unsigned StackAlign;
+public:
+ MSP430RegisterInfo(MSP430TargetMachine &tm, const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ const TargetRegisterClass* const*
+ getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+ const TargetRegisterClass* getPointerRegClass() const;
+
+ bool hasFP(const MachineFunction &MF) const;
+ bool hasReservedCallFrame(MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+
+ //! Get DWARF debugging register number
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_MSP430REGISTERINFO_H
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.td b/lib/Target/MSP430/MSP430RegisterInfo.td
new file mode 100644
index 0000000..4078626
--- /dev/null
+++ b/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -0,0 +1,122 @@
+//===- MSP430RegisterInfo.td - MSP430 Register defs ----------*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the MSP430 register file
+//===----------------------------------------------------------------------===//
+
+class MSP430Reg<bits<4> num, string n> : Register<n> {
+ field bits<4> Num = num;
+ let Namespace = "MSP430";
+}
+
+class MSP430RegWithSubregs<bits<4> num, string n, list<Register> subregs>
+ : RegisterWithSubRegs<n, subregs> {
+ field bits<4> Num = num;
+ let Namespace = "MSP430";
+}
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+def PCB : MSP430Reg<0, "r0">;
+def SPB : MSP430Reg<1, "r1">;
+def SRB : MSP430Reg<2, "r2">;
+def CGB : MSP430Reg<3, "r3">;
+def FPB : MSP430Reg<4, "r4">;
+def R5B : MSP430Reg<5, "r5">;
+def R6B : MSP430Reg<6, "r6">;
+def R7B : MSP430Reg<7, "r7">;
+def R8B : MSP430Reg<8, "r8">;
+def R9B : MSP430Reg<9, "r9">;
+def R10B : MSP430Reg<10, "r10">;
+def R11B : MSP430Reg<11, "r11">;
+def R12B : MSP430Reg<12, "r12">;
+def R13B : MSP430Reg<13, "r13">;
+def R14B : MSP430Reg<14, "r14">;
+def R15B : MSP430Reg<15, "r15">;
+
+def PCW : MSP430RegWithSubregs<0, "r0", [PCB]>;
+def SPW : MSP430RegWithSubregs<1, "r1", [SPB]>;
+def SRW : MSP430RegWithSubregs<2, "r2", [SRB]>;
+def CGW : MSP430RegWithSubregs<3, "r3", [CGB]>;
+def FPW : MSP430RegWithSubregs<4, "r4", [FPB]>;
+def R5W : MSP430RegWithSubregs<5, "r5", [R5B]>;
+def R6W : MSP430RegWithSubregs<6, "r6", [R6B]>;
+def R7W : MSP430RegWithSubregs<7, "r7", [R7B]>;
+def R8W : MSP430RegWithSubregs<8, "r8", [R8B]>;
+def R9W : MSP430RegWithSubregs<9, "r9", [R9B]>;
+def R10W : MSP430RegWithSubregs<10, "r10", [R10B]>;
+def R11W : MSP430RegWithSubregs<11, "r11", [R11B]>;
+def R12W : MSP430RegWithSubregs<12, "r12", [R12B]>;
+def R13W : MSP430RegWithSubregs<13, "r13", [R13B]>;
+def R14W : MSP430RegWithSubregs<14, "r14", [R14B]>;
+def R15W : MSP430RegWithSubregs<15, "r15", [R15B]>;
+
+def : SubRegSet<1, [PCW, SPW, SRW, CGW, FPW,
+ R5W, R6W, R7W, R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W],
+ [PCB, SPB, SRB, CGB, FPB,
+ R5B, R6B, R7B, R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
+
+def subreg_8bit : PatLeaf<(i32 1)>;
+
+def GR8 : RegisterClass<"MSP430", [i8], 8,
+ // Volatile registers
+ [R12B, R13B, R14B, R15B, R11B, R10B, R9B, R8B, R7B, R6B, R5B,
+ // Frame pointer, sometimes allocable
+ FPB,
+ // Volatile, but not allocable
+ PCB, SPB, SRB, CGB]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GR8Class::iterator
+ GR8Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ // Depending on whether the function uses frame pointer or not, last 5 or 4
+ // registers on the list above are reserved
+ if (RI->hasFP(MF))
+ return end()-5;
+ else
+ return end()-4;
+ }
+ }];
+}
+
+def GR16 : RegisterClass<"MSP430", [i16], 16,
+ // Volatile registers
+ [R12W, R13W, R14W, R15W, R11W, R10W, R9W, R8W, R7W, R6W, R5W,
+ // Frame pointer, sometimes allocable
+ FPW,
+ // Volatile, but not allocable
+ PCW, SPW, SRW, CGW]>
+{
+ let SubRegClassList = [GR8];
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GR16Class::iterator
+ GR16Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ // Depending on whether the function uses frame pointer or not, last 5 or 4
+ // registers on the list above are reserved
+ if (RI->hasFP(MF))
+ return end()-5;
+ else
+ return end()-4;
+ }
+ }];
+}
+
diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp
new file mode 100644
index 0000000..ef9e103
--- /dev/null
+++ b/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -0,0 +1,27 @@
+//===- MSP430Subtarget.cpp - MSP430 Subtarget Information ---------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430Subtarget.h"
+#include "MSP430.h"
+#include "MSP430GenSubtarget.inc"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+MSP430Subtarget::MSP430Subtarget(const TargetMachine &TM, const Module &M,
+ const std::string &FS) {
+ std::string CPU = "generic";
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, CPU);
+}
diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h
new file mode 100644
index 0000000..96c8108
--- /dev/null
+++ b/lib/Target/MSP430/MSP430Subtarget.h
@@ -0,0 +1,41 @@
+//====-- MSP430Subtarget.h - Define Subtarget for the MSP430 ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MSP430 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430_SUBTARGET_H
+#define LLVM_TARGET_MSP430_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+
+#include <string>
+
+namespace llvm {
+class Module;
+class TargetMachine;
+
+class MSP430Subtarget : public TargetSubtarget {
+ bool ExtendedInsts;
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ MSP430Subtarget(const TargetMachine &TM, const Module &M,
+ const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+};
+} // End llvm namespace
+
+#endif // LLVM_TARGET_MSP430_SUBTARGET_H
diff --git a/lib/Target/MSP430/MSP430TargetAsmInfo.cpp b/lib/Target/MSP430/MSP430TargetAsmInfo.cpp
new file mode 100644
index 0000000..ab181de
--- /dev/null
+++ b/lib/Target/MSP430/MSP430TargetAsmInfo.cpp
@@ -0,0 +1,22 @@
+//===-- MSP430TargetAsmInfo.cpp - MSP430 asm properties -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MSP430TargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430TargetAsmInfo.h"
+#include "MSP430TargetMachine.h"
+
+using namespace llvm;
+
+MSP430TargetAsmInfo::MSP430TargetAsmInfo(const MSP430TargetMachine &TM)
+ : ELFTargetAsmInfo(TM) {
+ AlignmentIsInBytes = false;
+}
diff --git a/lib/Target/MSP430/MSP430TargetAsmInfo.h b/lib/Target/MSP430/MSP430TargetAsmInfo.h
new file mode 100644
index 0000000..b58d5c9
--- /dev/null
+++ b/lib/Target/MSP430/MSP430TargetAsmInfo.h
@@ -0,0 +1,31 @@
+//=====-- MSP430TargetAsmInfo.h - MSP430 asm properties -------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MSP430TargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430TARGETASMINFO_H
+#define MSP430TARGETASMINFO_H
+
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/ELFTargetAsmInfo.h"
+
+namespace llvm {
+
+ // Forward declaration.
+ class MSP430TargetMachine;
+
+ struct MSP430TargetAsmInfo : public ELFTargetAsmInfo {
+ explicit MSP430TargetAsmInfo(const MSP430TargetMachine &TM);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
new file mode 100644
index 0000000..7886946
--- /dev/null
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -0,0 +1,76 @@
+//===-- MSP430TargetMachine.cpp - Define TargetMachine for MSP430 ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the MSP430 target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MSP430TargetAsmInfo.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+
+using namespace llvm;
+
+/// MSP430TargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int MSP430TargetMachineModule;
+int MSP430TargetMachineModule = 0;
+
+
+// Register the targets
+static RegisterTarget<MSP430TargetMachine>
+X("msp430", "MSP430 [experimental]");
+
+MSP430TargetMachine::MSP430TargetMachine(const Module &M,
+ const std::string &FS) :
+ Subtarget(*this, M, FS),
+ // FIXME: Check TargetData string.
+ DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"),
+ InstrInfo(*this), TLInfo(*this),
+ FrameInfo(TargetFrameInfo::StackGrowsDown, 2, -2) { }
+
+const TargetAsmInfo *MSP430TargetMachine::createTargetAsmInfo() const {
+ return new MSP430TargetAsmInfo(*this);
+}
+
+bool MSP430TargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Install an instruction selector.
+ PM.add(createMSP430ISelDag(*this, OptLevel));
+ return false;
+}
+
+bool MSP430TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose,
+ raw_ostream &Out) {
+ // Output assembly language.
+ PM.add(createMSP430CodePrinterPass(Out, *this, OptLevel, Verbose));
+ return false;
+}
+
+unsigned MSP430TargetMachine::getModuleMatchQuality(const Module &M) {
+ std::string TT = M.getTargetTriple();
+
+ // We strongly match msp430
+ if (TT.size() >= 6 && TT[0] == 'm' && TT[1] == 's' && TT[2] == 'p' &&
+ TT[3] == '4' && TT[4] == '3' && TT[5] == '0')
+ return 20;
+
+ return 0;
+}
+
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
new file mode 100644
index 0000000..d9ffa2b
--- /dev/null
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -0,0 +1,68 @@
+//==-- MSP430TargetMachine.h - Define TargetMachine for MSP430 ---*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MSP430 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_TARGET_MSP430_TARGETMACHINE_H
+#define LLVM_TARGET_MSP430_TARGETMACHINE_H
+
+#include "MSP430InstrInfo.h"
+#include "MSP430ISelLowering.h"
+#include "MSP430RegisterInfo.h"
+#include "MSP430Subtarget.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+/// MSP430TargetMachine
+///
+class MSP430TargetMachine : public LLVMTargetMachine {
+ MSP430Subtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ MSP430InstrInfo InstrInfo;
+ MSP430TargetLowering TLInfo;
+
+ // MSP430 does not have any call stack frame, therefore not having
+ // any MSP430 specific FrameInfo class.
+ TargetFrameInfo FrameInfo;
+
+protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+public:
+ MSP430TargetMachine(const Module &M, const std::string &FS);
+
+ virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual const MSP430InstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetData *getTargetData() const { return &DataLayout;}
+ virtual const MSP430Subtarget *getSubtargetImpl() const { return &Subtarget; }
+
+ virtual const TargetRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual MSP430TargetLowering *getTargetLowering() const {
+ return const_cast<MSP430TargetLowering*>(&TLInfo);
+ }
+
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel, bool Verbose,
+ raw_ostream &Out);
+ static unsigned getModuleMatchQuality(const Module &M);
+}; // MSP430TargetMachine.
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_MSP430_TARGETMACHINE_H
diff --git a/lib/Target/MSP430/Makefile b/lib/Target/MSP430/Makefile
new file mode 100644
index 0000000..45cb3aa
--- /dev/null
+++ b/lib/Target/MSP430/Makefile
@@ -0,0 +1,21 @@
+##===- lib/Target/MSP430/Makefile --------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMMSP430
+TARGET = MSP430
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = MSP430GenRegisterInfo.h.inc MSP430GenRegisterNames.inc \
+ MSP430GenRegisterInfo.inc MSP430GenInstrNames.inc \
+ MSP430GenInstrInfo.inc MSP430GenAsmWriter.inc \
+ MSP430GenDAGISel.inc MSP430GenCallingConv.inc \
+ MSP430GenSubtarget.inc
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Target/MSP430/README.txt b/lib/Target/MSP430/README.txt
new file mode 100644
index 0000000..b14e93d
--- /dev/null
+++ b/lib/Target/MSP430/README.txt
@@ -0,0 +1,42 @@
+//===---------------------------------------------------------------------===//
+// MSP430 backend.
+//===---------------------------------------------------------------------===//
+
+DISCLAIMER: Thid backend should be considered as highly experimental. I never
+seen nor worked with this MCU, all information was gathered from datasheet
+only. The original intention of making this backend was to write documentation
+of form "How to write backend for dummies" :) Thes notes hopefully will be
+available pretty soon.
+
+Some things are incomplete / not implemented yet (this list surely is not
+complete as well):
+
+0. Implement asmprinting for variables :)
+
+1. Verify, how stuff is handling implicit zext with 8 bit operands (this might
+be modelled currently in improper way - should we need to mark the superreg as
+def for every 8 bit instruction?).
+
+2. Libcalls: multiplication, division, remainder. Note, that calling convention
+for libcalls is incomptible with calling convention of libcalls of msp430-gcc
+(these cannot be used though due to license restriction).
+
+3. Implement multiplication / division by constant (dag combiner hook?).
+
+4. Implement non-constant shifts.
+
+5. Implement varargs stuff.
+
+6. Verify and fix (if needed) how's stuff playing with i32 / i64.
+
+7. Implement floating point stuff (softfp?)
+
+8. Implement instruction encoding for (possible) direct code emission in the
+future.
+
+9. Since almost all instructions set flags - implement brcond / select in better
+way (currently they emit explicit comparison).
+
+10. Handle imm in comparisons in better way (see comment in MSP430InstrInfo.td)
+
+11. Implement hooks for better memory op folding, etc.
diff --git a/lib/Target/Makefile b/lib/Target/Makefile
new file mode 100644
index 0000000..50a360f
--- /dev/null
+++ b/lib/Target/Makefile
@@ -0,0 +1,20 @@
+#===- lib/Target/Makefile ----------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMTarget
+BUILD_ARCHIVE = 1
+
+# We include this early so we can access the value of TARGETS_TO_BUILD as the
+# value for PARALLEL_DIRS which must be set before Makefile.rules is included
+include $(LEVEL)/Makefile.config
+
+PARALLEL_DIRS := $(TARGETS_TO_BUILD)
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/lib/Target/Mips/AsmPrinter/CMakeLists.txt b/lib/Target/Mips/AsmPrinter/CMakeLists.txt
new file mode 100644
index 0000000..6a868c2
--- /dev/null
+++ b/lib/Target/Mips/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,12 @@
+include_directories(
+ ${CMAKE_CURRENT_BINARY_DIR}/..
+ ${CMAKE_CURRENT_SOURCE_DIR}/..
+ )
+
+add_partially_linked_object(LLVMMipsAsmPrinter
+ MipsAsmPrinter.cpp
+ )
+
+target_name_of_partially_linked_object(LLVMMipsCodeGen n)
+
+add_dependencies(LLVMMipsAsmPrinter ${n})
diff --git a/lib/Target/Mips/AsmPrinter/Makefile b/lib/Target/Mips/AsmPrinter/Makefile
new file mode 100644
index 0000000..a2fecf4
--- /dev/null
+++ b/lib/Target/Mips/AsmPrinter/Makefile
@@ -0,0 +1,17 @@
+##===- lib/Target/Mips/AsmPrinter/Makefile -----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMipsAsmPrinter
+
+# Hack: we need to include 'main' Mips target directory to grab
+# private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
new file mode 100644
index 0000000..dfb6238
--- /dev/null
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -0,0 +1,580 @@
+//===-- MipsAsmPrinter.cpp - Mips LLVM assembly writer --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format MIPS assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-asm-printer"
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "MipsInstrInfo.h"
+#include "MipsTargetMachine.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+ class VISIBILITY_HIDDEN MipsAsmPrinter : public AsmPrinter {
+ const MipsSubtarget *Subtarget;
+ public:
+ explicit MipsAsmPrinter(raw_ostream &O, MipsTargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : AsmPrinter(O, TM, T, OL, V) {
+ Subtarget = &TM.getSubtarget<MipsSubtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "Mips Assembly Printer";
+ }
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ void printOperand(const MachineInstr *MI, int opNum);
+ void printUnsignedImm(const MachineInstr *MI, int opNum);
+ void printMemOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier = 0);
+ void printFCCOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier = 0);
+ void printModuleLevelGV(const GlobalVariable* GVar);
+ void printSavedRegsBitmask(MachineFunction &MF);
+ void printHex32(unsigned int Value);
+
+ const char *emitCurrentABIString(void);
+ void emitFunctionStart(MachineFunction &MF);
+ void emitFunctionEnd(MachineFunction &MF);
+ void emitFrameDirective(MachineFunction &MF);
+
+ bool printInstruction(const MachineInstr *MI); // autogenerated.
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+ };
+} // end of anonymous namespace
+
+#include "MipsGenAsmWriter.inc"
+
+/// createMipsCodePrinterPass - Returns a pass that prints the MIPS
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+FunctionPass *llvm::createMipsCodePrinterPass(raw_ostream &o,
+ MipsTargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose) {
+ return new MipsAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Mips Asm Directives
+//
+// -- Frame directive "frame Stackpointer, Stacksize, RARegister"
+// Describe the stack frame.
+//
+// -- Mask directives "(f)mask bitmask, offset"
+// Tells the assembler which registers are saved and where.
+// bitmask - contain a little endian bitset indicating which registers are
+// saved on function prologue (e.g. with a 0x80000000 mask, the
+// assembler knows the register 31 (RA) is saved at prologue.
+// offset - the position before stack pointer subtraction indicating where
+// the first saved register on prologue is located. (e.g. with a
+//
+// Consider the following function prologue:
+//
+// .frame $fp,48,$ra
+// .mask 0xc0000000,-8
+// addiu $sp, $sp, -48
+// sw $ra, 40($sp)
+// sw $fp, 36($sp)
+//
+// With a 0xc0000000 mask, the assembler knows the register 31 (RA) and
+// 30 (FP) are saved at prologue. As the save order on prologue is from
+// left to right, RA is saved first. A -8 offset means that after the
+// stack pointer subtration, the first register in the mask (RA) will be
+// saved at address 48-8=40.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Mask directives
+//===----------------------------------------------------------------------===//
+
+// Create a bitmask with all callee saved registers for CPU or Floating Point
+// registers. For CPU registers consider RA, GP and FP for saving if necessary.
+void MipsAsmPrinter::
+printSavedRegsBitmask(MachineFunction &MF)
+{
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ // CPU and FPU Saved Registers Bitmasks
+ unsigned int CPUBitmask = 0;
+ unsigned int FPUBitmask = 0;
+
+ // Set the CPU and FPU Bitmasks
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(CSI[i].getReg());
+ if (CSI[i].getRegClass() == Mips::CPURegsRegisterClass)
+ CPUBitmask |= (1 << RegNum);
+ else
+ FPUBitmask |= (1 << RegNum);
+ }
+
+ // Return Address and Frame registers must also be set in CPUBitmask.
+ if (RI.hasFP(MF))
+ CPUBitmask |= (1 << MipsRegisterInfo::
+ getRegisterNumbering(RI.getFrameRegister(MF)));
+
+ if (MF.getFrameInfo()->hasCalls())
+ CPUBitmask |= (1 << MipsRegisterInfo::
+ getRegisterNumbering(RI.getRARegister()));
+
+ // Print CPUBitmask
+ O << "\t.mask \t"; printHex32(CPUBitmask); O << ','
+ << MipsFI->getCPUTopSavedRegOff() << '\n';
+
+ // Print FPUBitmask
+ O << "\t.fmask\t"; printHex32(FPUBitmask); O << ","
+ << MipsFI->getFPUTopSavedRegOff() << '\n';
+}
+
+// Print a 32 bit hex number with all numbers.
+void MipsAsmPrinter::
+printHex32(unsigned int Value)
+{
+ O << "0x";
+ for (int i = 7; i >= 0; i--)
+ O << utohexstr( (Value & (0xF << (i*4))) >> (i*4) );
+}
+
+//===----------------------------------------------------------------------===//
+// Frame and Set directives
+//===----------------------------------------------------------------------===//
+
+/// Frame Directive
+void MipsAsmPrinter::
+emitFrameDirective(MachineFunction &MF)
+{
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+
+ unsigned stackReg = RI.getFrameRegister(MF);
+ unsigned returnReg = RI.getRARegister();
+ unsigned stackSize = MF.getFrameInfo()->getStackSize();
+
+
+ O << "\t.frame\t" << '$' << LowercaseString(RI.get(stackReg).AsmName)
+ << ',' << stackSize << ','
+ << '$' << LowercaseString(RI.get(returnReg).AsmName)
+ << '\n';
+}
+
+/// Emit Set directives.
+const char * MipsAsmPrinter::
+emitCurrentABIString(void)
+{
+ switch(Subtarget->getTargetABI()) {
+ case MipsSubtarget::O32: return "abi32";
+ case MipsSubtarget::O64: return "abiO64";
+ case MipsSubtarget::N32: return "abiN32";
+ case MipsSubtarget::N64: return "abi64";
+ case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64
+ default: break;
+ }
+
+ assert(0 && "Unknown Mips ABI");
+ return NULL;
+}
+
+/// Emit the directives used by GAS on the start of functions
+void MipsAsmPrinter::
+emitFunctionStart(MachineFunction &MF)
+{
+ // Print out the label for the function.
+ const Function *F = MF.getFunction();
+ SwitchToSection(TAI->SectionForGlobal(F));
+
+ // 2 bits aligned
+ EmitAlignment(2, F);
+
+ O << "\t.globl\t" << CurrentFnName << '\n';
+ O << "\t.ent\t" << CurrentFnName << '\n';
+
+ printVisibility(CurrentFnName, F->getVisibility());
+
+ if ((TAI->hasDotTypeDotSizeDirective()) && Subtarget->isLinux())
+ O << "\t.type\t" << CurrentFnName << ", @function\n";
+
+ O << CurrentFnName << ":\n";
+
+ emitFrameDirective(MF);
+ printSavedRegsBitmask(MF);
+
+ O << '\n';
+}
+
+/// Emit the directives used by GAS on the end of functions
+void MipsAsmPrinter::
+emitFunctionEnd(MachineFunction &MF)
+{
+ // There are instruction for this macros, but they must
+ // always be at the function end, and we can't emit and
+ // break with BB logic.
+ O << "\t.set\tmacro\n";
+ O << "\t.set\treorder\n";
+
+ O << "\t.end\t" << CurrentFnName << '\n';
+ if (TAI->hasDotTypeDotSizeDirective() && !Subtarget->isLinux())
+ O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n';
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+bool MipsAsmPrinter::
+runOnMachineFunction(MachineFunction &MF)
+{
+ this->MF = &MF;
+
+ SetupMachineFunction(MF);
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ // Print out jump tables referenced by the function
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+ O << "\n\n";
+
+ // Emit the function start directives
+ emitFunctionStart(MF);
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+
+ // Print a label for the basic block.
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true, true);
+ O << '\n';
+ }
+
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ printInstruction(II);
+ ++EmittedInsts;
+ }
+
+ // Each Basic Block is separated by a newline
+ O << '\n';
+ }
+
+ // Emit function end directives
+ emitFunctionEnd(MF);
+
+ // We didn't modify anything.
+ return false;
+}
+
+// Print out an operand for an inline asm expression.
+bool MipsAsmPrinter::
+PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode)
+{
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+
+ printOperand(MI, OpNo);
+ return false;
+}
+
+void MipsAsmPrinter::
+printOperand(const MachineInstr *MI, int opNum)
+{
+ const MachineOperand &MO = MI->getOperand(opNum);
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+ bool closeP = false;
+ bool isPIC = (TM.getRelocationModel() == Reloc::PIC_);
+ bool isCodeLarge = (TM.getCodeModel() == CodeModel::Large);
+
+ // %hi and %lo used on mips gas to load global addresses on
+ // static code. %got is used to load global addresses when
+ // using PIC_. %call16 is used to load direct call targets
+ // on PIC_ and small code size. %call_lo and %call_hi load
+ // direct call targets on PIC_ and large code size.
+ if (MI->getOpcode() == Mips::LUi && !MO.isReg() && !MO.isImm()) {
+ if ((isPIC) && (isCodeLarge))
+ O << "%call_hi(";
+ else
+ O << "%hi(";
+ closeP = true;
+ } else if ((MI->getOpcode() == Mips::ADDiu) && !MO.isReg() && !MO.isImm()) {
+ const MachineOperand &firstMO = MI->getOperand(opNum-1);
+ if (firstMO.getReg() == Mips::GP)
+ O << "%gp_rel(";
+ else
+ O << "%lo(";
+ closeP = true;
+ } else if ((isPIC) && (MI->getOpcode() == Mips::LW) &&
+ (!MO.isReg()) && (!MO.isImm())) {
+ const MachineOperand &firstMO = MI->getOperand(opNum-1);
+ const MachineOperand &lastMO = MI->getOperand(opNum+1);
+ if ((firstMO.isReg()) && (lastMO.isReg())) {
+ if ((firstMO.getReg() == Mips::T9) && (lastMO.getReg() == Mips::GP)
+ && (!isCodeLarge))
+ O << "%call16(";
+ else if ((firstMO.getReg() != Mips::T9) && (lastMO.getReg() == Mips::GP))
+ O << "%got(";
+ else if ((firstMO.getReg() == Mips::T9) && (lastMO.getReg() != Mips::GP)
+ && (isCodeLarge))
+ O << "%call_lo(";
+ closeP = true;
+ }
+ }
+
+ switch (MO.getType())
+ {
+ case MachineOperand::MO_Register:
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ O << '$' << LowercaseString (RI.get(MO.getReg()).AsmName);
+ else
+ O << '$' << MO.getReg();
+ break;
+
+ case MachineOperand::MO_Immediate:
+ O << (short int)MO.getImm();
+ break;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB());
+ return;
+
+ case MachineOperand::MO_GlobalAddress:
+ {
+ const GlobalValue *GV = MO.getGlobal();
+ O << Mang->getValueName(GV);
+ }
+ break;
+
+ case MachineOperand::MO_ExternalSymbol:
+ O << MO.getSymbolName();
+ break;
+
+ case MachineOperand::MO_JumpTableIndex:
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << TAI->getPrivateGlobalPrefix() << "CPI"
+ << getFunctionNumber() << "_" << MO.getIndex();
+ break;
+
+ default:
+ O << "<unknown operand type>"; abort (); break;
+ }
+
+ if (closeP) O << ")";
+}
+
+void MipsAsmPrinter::
+printUnsignedImm(const MachineInstr *MI, int opNum)
+{
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.getType() == MachineOperand::MO_Immediate)
+ O << (unsigned short int)MO.getImm();
+ else
+ printOperand(MI, opNum);
+}
+
+void MipsAsmPrinter::
+printMemOperand(const MachineInstr *MI, int opNum, const char *Modifier)
+{
+ // when using stack locations for not load/store instructions
+ // print the same way as all normal 3 operand instructions.
+ if (Modifier && !strcmp(Modifier, "stackloc")) {
+ printOperand(MI, opNum+1);
+ O << ", ";
+ printOperand(MI, opNum);
+ return;
+ }
+
+ // Load/Store memory operands -- imm($reg)
+ // If PIC target the target is loaded as the
+ // pattern lw $25,%call16($28)
+ printOperand(MI, opNum);
+ O << "(";
+ printOperand(MI, opNum+1);
+ O << ")";
+}
+
+void MipsAsmPrinter::
+printFCCOperand(const MachineInstr *MI, int opNum, const char *Modifier)
+{
+ const MachineOperand& MO = MI->getOperand(opNum);
+ O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm());
+}
+
+bool MipsAsmPrinter::
+doInitialization(Module &M)
+{
+ Mang = new Mangler(M, "", TAI->getPrivateGlobalPrefix());
+
+ // Tell the assembler which ABI we are using
+ O << "\t.section .mdebug." << emitCurrentABIString() << '\n';
+
+ // TODO: handle O64 ABI
+ if (Subtarget->isABI_EABI())
+ O << "\t.section .gcc_compiled_long" <<
+ (Subtarget->isGP32bit() ? "32" : "64") << '\n';
+
+ // return to previous section
+ O << "\t.previous" << '\n';
+
+ return false; // success
+}
+
+void MipsAsmPrinter::
+printModuleLevelGV(const GlobalVariable* GVar) {
+ const TargetData *TD = TM.getTargetData();
+
+ if (!GVar->hasInitializer())
+ return; // External global require no code
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GVar))
+ return;
+
+ O << "\n\n";
+ std::string name = Mang->getValueName(GVar);
+ Constant *C = GVar->getInitializer();
+ const Type *CTy = C->getType();
+ unsigned Size = TD->getTypeAllocSize(CTy);
+ const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
+ bool printSizeAndType = true;
+
+ // A data structure or array is aligned in memory to the largest
+ // alignment boundary required by any data type inside it (this matches
+ // the Preferred Type Alignment). For integral types, the alignment is
+ // the type size.
+ unsigned Align;
+ if (CTy->getTypeID() == Type::IntegerTyID ||
+ CTy->getTypeID() == Type::VoidTyID) {
+ assert(!(Size & (Size-1)) && "Alignment is not a power of two!");
+ Align = Log2_32(Size);
+ } else
+ Align = TD->getPreferredTypeAlignmentShift(CTy);
+
+ printVisibility(name, GVar->getVisibility());
+
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+
+ if (C->isNullValue() && !GVar->hasSection()) {
+ if (!GVar->isThreadLocal() &&
+ (GVar->hasLocalLinkage() || GVar->isWeakForLinker())) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+
+ if (GVar->hasLocalLinkage())
+ O << "\t.local\t" << name << '\n';
+
+ O << TAI->getCOMMDirective() << name << ',' << Size;
+ if (TAI->getCOMMDirectiveTakesAlignment())
+ O << ',' << (1 << Align);
+
+ O << '\n';
+ return;
+ }
+ }
+ switch (GVar->getLinkage()) {
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::CommonLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ // FIXME: Verify correct for weak.
+ // Nonnull linkonce -> weak
+ O << "\t.weak " << name << '\n';
+ break;
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of their name
+ // or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol
+ O << TAI->getGlobalDirective() << name << '\n';
+ // Fall Through
+ case GlobalValue::PrivateLinkage:
+ case GlobalValue::InternalLinkage:
+ if (CVA && CVA->isCString())
+ printSizeAndType = false;
+ break;
+ case GlobalValue::GhostLinkage:
+ cerr << "Should not have any unmaterialized functions!\n";
+ abort();
+ case GlobalValue::DLLImportLinkage:
+ cerr << "DLLImport linkage is not supported by this target!\n";
+ abort();
+ case GlobalValue::DLLExportLinkage:
+ cerr << "DLLExport linkage is not supported by this target!\n";
+ abort();
+ default:
+ assert(0 && "Unknown linkage type!");
+ }
+
+ EmitAlignment(Align, GVar);
+
+ if (TAI->hasDotTypeDotSizeDirective() && printSizeAndType) {
+ O << "\t.type " << name << ",@object\n";
+ O << "\t.size " << name << ',' << Size << '\n';
+ }
+
+ O << name << ":\n";
+ EmitGlobalConstant(C);
+}
+
+bool MipsAsmPrinter::
+doFinalization(Module &M)
+{
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I)
+ printModuleLevelGV(I);
+
+ O << '\n';
+
+ return AsmPrinter::doFinalization(M);
+}
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
new file mode 100644
index 0000000..70c7a51
--- /dev/null
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -0,0 +1,22 @@
+set(LLVM_TARGET_DEFINITIONS Mips.td)
+
+tablegen(MipsGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(MipsGenRegisterNames.inc -gen-register-enums)
+tablegen(MipsGenRegisterInfo.inc -gen-register-desc)
+tablegen(MipsGenInstrNames.inc -gen-instr-enums)
+tablegen(MipsGenInstrInfo.inc -gen-instr-desc)
+tablegen(MipsGenAsmWriter.inc -gen-asm-writer)
+tablegen(MipsGenDAGISel.inc -gen-dag-isel)
+tablegen(MipsGenCallingConv.inc -gen-callingconv)
+tablegen(MipsGenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(MipsCodeGen
+ MipsDelaySlotFiller.cpp
+ MipsInstrInfo.cpp
+ MipsISelDAGToDAG.cpp
+ MipsISelLowering.cpp
+ MipsRegisterInfo.cpp
+ MipsSubtarget.cpp
+ MipsTargetAsmInfo.cpp
+ MipsTargetMachine.cpp
+ )
diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile
new file mode 100644
index 0000000..48ab5f9
--- /dev/null
+++ b/lib/Target/Mips/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/Mips/Makefile ----------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMMipsCodeGen
+TARGET = Mips
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = MipsGenRegisterInfo.h.inc MipsGenRegisterNames.inc \
+ MipsGenRegisterInfo.inc MipsGenInstrNames.inc \
+ MipsGenInstrInfo.inc MipsGenAsmWriter.inc \
+ MipsGenDAGISel.inc MipsGenCallingConv.inc \
+ MipsGenSubtarget.inc
+
+DIRS = AsmPrinter
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
new file mode 100644
index 0000000..0accb4e
--- /dev/null
+++ b/lib/Target/Mips/Mips.h
@@ -0,0 +1,41 @@
+//===-- Mips.h - Top-level interface for Mips representation ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM Mips back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_MIPS_H
+#define TARGET_MIPS_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class MipsTargetMachine;
+ class FunctionPass;
+ class MachineCodeEmitter;
+ class raw_ostream;
+
+ FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
+ FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
+ FunctionPass *createMipsCodePrinterPass(raw_ostream &OS,
+ MipsTargetMachine &TM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose);
+} // end namespace llvm;
+
+// Defines symbolic names for Mips registers. This defines a mapping from
+// register name to register number.
+#include "MipsGenRegisterNames.inc"
+
+// Defines symbolic names for the Mips instructions.
+#include "MipsGenInstrNames.inc"
+
+#endif
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
new file mode 100644
index 0000000..79a78d8
--- /dev/null
+++ b/lib/Target/Mips/Mips.td
@@ -0,0 +1,88 @@
+//===- Mips.td - Describe the Mips Target Machine ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the Mips target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "MipsRegisterInfo.td"
+include "MipsSchedule.td"
+include "MipsInstrInfo.td"
+include "MipsCallingConv.td"
+
+def MipsInstrInfo : InstrInfo {
+ let TSFlagsFields = [];
+ let TSFlagsShifts = [];
+}
+
+//===----------------------------------------------------------------------===//
+// Mips Subtarget features //
+//===----------------------------------------------------------------------===//
+
+def FeatureGP64Bit : SubtargetFeature<"gp64", "IsGP64bit", "true",
+ "General Purpose Registers are 64-bit wide.">;
+def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true",
+ "Support 64-bit FP registers.">;
+def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
+ "true", "Only supports single precision float">;
+def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1",
+ "Mips1 ISA Support">;
+def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2",
+ "Mips2 ISA Support">;
+def FeatureO32 : SubtargetFeature<"o32", "MipsABI", "O32",
+ "Enable o32 ABI">;
+def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI",
+ "Enable eabi ABI">;
+def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU",
+ "true", "Enable vector FPU instructions.">;
+def FeatureSEInReg : SubtargetFeature<"seinreg", "HasSEInReg", "true",
+ "Enable 'signext in register' instructions.">;
+def FeatureCondMov : SubtargetFeature<"condmov", "HasCondMov", "true",
+ "Enable 'conditional move' instructions.">;
+def FeatureMulDivAdd : SubtargetFeature<"muldivadd", "HasMulDivAdd", "true",
+ "Enable 'multiply add/sub' instructions.">;
+def FeatureMinMax : SubtargetFeature<"minmax", "HasMinMax", "true",
+ "Enable 'min/max' instructions.">;
+def FeatureSwap : SubtargetFeature<"swap", "HasSwap", "true",
+ "Enable 'byte/half swap' instructions.">;
+def FeatureBitCount : SubtargetFeature<"bitcount", "HasBitCount", "true",
+ "Enable 'count leading bits' instructions.">;
+
+//===----------------------------------------------------------------------===//
+// Mips processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, MipsGenericItineraries, Features>;
+
+def : Proc<"mips1", [FeatureMips1]>;
+def : Proc<"r2000", [FeatureMips1]>;
+def : Proc<"r3000", [FeatureMips1]>;
+
+def : Proc<"mips2", [FeatureMips2]>;
+def : Proc<"r6000", [FeatureMips2]>;
+
+// Allegrex is a 32bit subset of r4000, both for interger and fp registers,
+// but much more similar to Mips2 than Mips3. It also contains some of
+// Mips32/Mips32r2 instructions and a custom vector fpu processor.
+def : Proc<"allegrex", [FeatureMips2, FeatureSingleFloat, FeatureEABI,
+ FeatureVFPU, FeatureSEInReg, FeatureCondMov, FeatureMulDivAdd,
+ FeatureMinMax, FeatureSwap, FeatureBitCount]>;
+
+def Mips : Target {
+ let InstructionSet = MipsInstrInfo;
+}
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
new file mode 100644
index 0000000..01fe92e
--- /dev/null
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -0,0 +1,86 @@
+//===- MipsCallingConv.td - Calling Conventions for Mips --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for Mips architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>:
+ CCIf<!strconcat("State.getTarget().getSubtarget<MipsSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Mips O32 Calling Convention
+//===----------------------------------------------------------------------===//
+
+// Only the return rules are defined here for O32. The rules for argument
+// passing are defined in MipsISelLowering.cpp.
+def RetCC_MipsO32 : CallingConv<[
+ // i32 are returned in registers V0, V1
+ CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
+
+ // f32 are returned in registers F0, F1
+ CCIfType<[f32], CCAssignToReg<[F0, F1]>>,
+
+ // f64 are returned in register D0
+ CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D0]>>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Mips EABI Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_MipsEABI : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>,
+
+ // Single fp arguments are passed in pairs within 32-bit mode
+ CCIfType<[f32], CCIfSubtarget<"isSingleFloat()",
+ CCAssignToReg<[F12, F13, F14, F15, F16, F17, F18, F19]>>>,
+
+ CCIfType<[f32], CCIfSubtarget<"isNotSingleFloat()",
+ CCAssignToReg<[F12, F14, F16, F18]>>>,
+
+ // The first 4 doubl fp arguments are passed in single fp registers.
+ CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()",
+ CCAssignToReg<[D6, D7, D8, D9]>>>,
+
+ // Integer values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+
+ // Integer values get stored in stack slots that are 8 bytes in
+ // size and 8-byte aligned.
+ CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToStack<8, 8>>>
+]>;
+
+def RetCC_MipsEABI : CallingConv<[
+ // i32 are returned in registers V0, V1
+ CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
+
+ // f32 are returned in registers F0, F1
+ CCIfType<[f32], CCAssignToReg<[F0, F1]>>,
+
+ // f64 are returned in register D0
+ CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D0]>>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Mips Calling Convention Dispatch
+//===----------------------------------------------------------------------===//
+
+def CC_Mips : CallingConv<[
+ CCIfSubtarget<"isABI_EABI()", CCDelegateTo<CC_MipsEABI>>
+]>;
+
+def RetCC_Mips : CallingConv<[
+ CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>,
+ CCDelegateTo<RetCC_MipsO32>
+]>;
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
new file mode 100644
index 0000000..a2b615d
--- /dev/null
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -0,0 +1,77 @@
+//===-- DelaySlotFiller.cpp - Mips delay slot filler ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Simple pass to fills delay slots with NOPs.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "delay-slot-filler"
+
+#include "Mips.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+STATISTIC(FilledSlots, "Number of delay slots filled");
+
+namespace {
+ struct Filler : public MachineFunctionPass {
+
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+
+ static char ID;
+ Filler(TargetMachine &tm)
+ : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+ virtual const char *getPassName() const {
+ return "Mips Delay Slot Filler";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F) {
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+ }
+
+ };
+ char Filler::ID = 0;
+} // end of anonymous namespace
+
+/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
+/// Currently, we fill delay slots with NOPs. We assume there is only one
+/// delay slot per delayed instruction.
+bool Filler::
+runOnMachineBasicBlock(MachineBasicBlock &MBB)
+{
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ if (I->getDesc().hasDelaySlot()) {
+ MachineBasicBlock::iterator J = I;
+ ++J;
+ BuildMI(MBB, J, I->getDebugLoc(), TII->get(Mips::NOP));
+ ++FilledSlots;
+ Changed = true;
+ }
+ return Changed;
+}
+
+/// createMipsDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in Mips MachineFunctions
+FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
+ return new Filler(tm);
+}
+
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
new file mode 100644
index 0000000..f05ac70
--- /dev/null
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -0,0 +1,392 @@
+//===-- MipsISelDAGToDAG.cpp - A dag to dag inst selector for Mips --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MIPS target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-isel"
+#include "Mips.h"
+#include "MipsISelLowering.h"
+#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsDAGToDAGISel - MIPS specific code to select MIPS machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace {
+
+class VISIBILITY_HIDDEN MipsDAGToDAGISel : public SelectionDAGISel {
+
+ /// TM - Keep a reference to MipsTargetMachine.
+ MipsTargetMachine &TM;
+
+ /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const MipsSubtarget &Subtarget;
+
+public:
+ explicit MipsDAGToDAGISel(MipsTargetMachine &tm) :
+ SelectionDAGISel(tm),
+ TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {}
+
+ virtual void InstructionSelect();
+
+ // Pass Name
+ virtual const char *getPassName() const {
+ return "MIPS DAG->DAG Pattern Instruction Selection";
+ }
+
+
+private:
+ // Include the pieces autogenerated from the target description.
+ #include "MipsGenDAGISel.inc"
+
+ SDValue getGlobalBaseReg();
+ SDNode *Select(SDValue N);
+
+ // Complex Pattern.
+ bool SelectAddr(SDValue Op, SDValue N,
+ SDValue &Base, SDValue &Offset);
+
+
+ // getI32Imm - Return a target constant with the specified
+ // value, of type i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+
+ #ifndef NDEBUG
+ unsigned Indent;
+ #endif
+};
+
+}
+
+/// InstructionSelect - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void MipsDAGToDAGISel::
+InstructionSelect()
+{
+ DEBUG(BB->dump());
+ // Codegen the basic block.
+ #ifndef NDEBUG
+ DOUT << "===== Instruction selection begins:\n";
+ Indent = 0;
+ #endif
+
+ // Select target instructions for the DAG.
+ SelectRoot(*CurDAG);
+
+ #ifndef NDEBUG
+ DOUT << "===== Instruction selection ends:\n";
+ #endif
+
+ CurDAG->RemoveDeadNodes();
+}
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// GOT address into a register.
+SDValue MipsDAGToDAGISel::getGlobalBaseReg() {
+ MachineFunction* MF = BB->getParent();
+ unsigned GP = 0;
+ for(MachineRegisterInfo::livein_iterator ii = MF->getRegInfo().livein_begin(),
+ ee = MF->getRegInfo().livein_end(); ii != ee; ++ii)
+ if (ii->first == Mips::GP) {
+ GP = ii->second;
+ break;
+ }
+ assert(GP && "GOT PTR not in liveins");
+ // FIXME is there a sensible place to get debug info for this?
+ return CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ DebugLoc::getUnknownLoc(), GP, MVT::i32);
+}
+
+/// ComplexPattern used on MipsInstrInfo
+/// Used on Mips Load/Store instructions
+bool MipsDAGToDAGISel::
+SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base)
+{
+ // if Address is FI, get the TargetFrameIndex.
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ // on PIC code Load GA
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ if ((Addr.getOpcode() == ISD::TargetGlobalAddress) ||
+ (Addr.getOpcode() == ISD::TargetJumpTable)){
+ Base = CurDAG->getRegister(Mips::GP, MVT::i32);
+ Offset = Addr;
+ return true;
+ }
+ } else {
+ if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress))
+ return false;
+ }
+
+ // Operand is a result from an ADD.
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+ if (Predicate_immSExt16(CN)) {
+
+ // If the first operand is a FI, get the TargetFI Node
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+ (Addr.getOperand(0))) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ } else {
+ Base = Addr.getOperand(0);
+ }
+
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ }
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+/// Select instructions not customized! Used for
+/// expanded, promoted and normal instructions
+SDNode* MipsDAGToDAGISel::
+Select(SDValue N)
+{
+ SDNode *Node = N.getNode();
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc dl = Node->getDebugLoc();
+
+ // Dump information about the Node being selected
+ #ifndef NDEBUG
+ DOUT << std::string(Indent, ' ') << "Selecting: ";
+ DEBUG(Node->dump(CurDAG));
+ DOUT << "\n";
+ Indent += 2;
+ #endif
+
+ // If we have a custom node, we already have selected!
+ if (Node->isMachineOpcode()) {
+ #ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "== ";
+ DEBUG(Node->dump(CurDAG));
+ DOUT << "\n";
+ Indent -= 2;
+ #endif
+ return NULL;
+ }
+
+ ///
+ // Instruction Selection not handled by the auto-generated
+ // tablegen selection should be handled here.
+ ///
+ switch(Opcode) {
+
+ default: break;
+
+ case ISD::SUBE:
+ case ISD::ADDE: {
+ SDValue InFlag = Node->getOperand(2), CmpLHS;
+ unsigned Opc = InFlag.getOpcode(); Opc=Opc;
+ assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+ (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+ "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+ unsigned MOp;
+ if (Opcode == ISD::ADDE) {
+ CmpLHS = InFlag.getValue(0);
+ MOp = Mips::ADDu;
+ } else {
+ CmpLHS = InFlag.getOperand(0);
+ MOp = Mips::SUBu;
+ }
+
+ SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+
+ MVT VT = LHS.getValueType();
+ SDNode *Carry = CurDAG->getTargetNode(Mips::SLTu, dl, VT, Ops, 2);
+ SDNode *AddCarry = CurDAG->getTargetNode(Mips::ADDu, dl, VT,
+ SDValue(Carry,0), RHS);
+
+ return CurDAG->SelectNodeTo(N.getNode(), MOp, VT, MVT::Flag,
+ LHS, SDValue(AddCarry,0));
+ }
+
+ /// Mul/Div with two results
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ SDValue Op1 = Node->getOperand(0);
+ SDValue Op2 = Node->getOperand(1);
+
+ unsigned Op;
+ if (Opcode == ISD::UMUL_LOHI || Opcode == ISD::SMUL_LOHI)
+ Op = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
+ else
+ Op = (Opcode == ISD::UDIVREM ? Mips::DIVu : Mips::DIV);
+
+ SDNode *Node = CurDAG->getTargetNode(Op, dl, MVT::Flag, Op1, Op2);
+
+ SDValue InFlag = SDValue(Node, 0);
+ SDNode *Lo = CurDAG->getTargetNode(Mips::MFLO, dl, MVT::i32,
+ MVT::Flag, InFlag);
+ InFlag = SDValue(Lo,1);
+ SDNode *Hi = CurDAG->getTargetNode(Mips::MFHI, dl, MVT::i32, InFlag);
+
+ if (!N.getValue(0).use_empty())
+ ReplaceUses(N.getValue(0), SDValue(Lo,0));
+
+ if (!N.getValue(1).use_empty())
+ ReplaceUses(N.getValue(1), SDValue(Hi,0));
+
+ return NULL;
+ }
+
+ /// Special Muls
+ case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU: {
+ SDValue MulOp1 = Node->getOperand(0);
+ SDValue MulOp2 = Node->getOperand(1);
+
+ unsigned MulOp = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
+ SDNode *MulNode = CurDAG->getTargetNode(MulOp, dl,
+ MVT::Flag, MulOp1, MulOp2);
+
+ SDValue InFlag = SDValue(MulNode, 0);
+
+ if (MulOp == ISD::MUL)
+ return CurDAG->getTargetNode(Mips::MFLO, dl, MVT::i32, InFlag);
+ else
+ return CurDAG->getTargetNode(Mips::MFHI, dl, MVT::i32, InFlag);
+ }
+
+ /// Div/Rem operations
+ case ISD::SREM:
+ case ISD::UREM:
+ case ISD::SDIV:
+ case ISD::UDIV: {
+ SDValue Op1 = Node->getOperand(0);
+ SDValue Op2 = Node->getOperand(1);
+
+ unsigned Op, MOp;
+ if (Opcode == ISD::SDIV || Opcode == ISD::UDIV) {
+ Op = (Opcode == ISD::SDIV ? Mips::DIV : Mips::DIVu);
+ MOp = Mips::MFLO;
+ } else {
+ Op = (Opcode == ISD::SREM ? Mips::DIV : Mips::DIVu);
+ MOp = Mips::MFHI;
+ }
+ SDNode *Node = CurDAG->getTargetNode(Op, dl, MVT::Flag, Op1, Op2);
+
+ SDValue InFlag = SDValue(Node, 0);
+ return CurDAG->getTargetNode(MOp, dl, MVT::i32, InFlag);
+ }
+
+ // Get target GOT address.
+ case ISD::GLOBAL_OFFSET_TABLE: {
+ SDValue Result = getGlobalBaseReg();
+ ReplaceUses(N, Result);
+ return NULL;
+ }
+
+ /// Handle direct and indirect calls when using PIC. On PIC, when
+ /// GOT is smaller than about 64k (small code) the GA target is
+ /// loaded with only one instruction. Otherwise GA's target must
+ /// be loaded with 3 instructions.
+ case MipsISD::JmpLink: {
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ //bool isCodeLarge = (TM.getCodeModel() == CodeModel::Large);
+ SDValue Chain = Node->getOperand(0);
+ SDValue Callee = Node->getOperand(1);
+ SDValue T9Reg = CurDAG->getRegister(Mips::T9, MVT::i32);
+ SDValue InFlag(0, 0);
+
+ if ( (isa<GlobalAddressSDNode>(Callee)) ||
+ (isa<ExternalSymbolSDNode>(Callee)) )
+ {
+ /// Direct call for global addresses and external symbols
+ SDValue GPReg = CurDAG->getRegister(Mips::GP, MVT::i32);
+
+ // Use load to get GOT target
+ SDValue Ops[] = { Callee, GPReg, Chain };
+ SDValue Load = SDValue(CurDAG->getTargetNode(Mips::LW, dl, MVT::i32,
+ MVT::Other, Ops, 3), 0);
+ Chain = Load.getValue(1);
+
+ // Call target must be on T9
+ Chain = CurDAG->getCopyToReg(Chain, dl, T9Reg, Load, InFlag);
+ } else
+ /// Indirect call
+ Chain = CurDAG->getCopyToReg(Chain, dl, T9Reg, Callee, InFlag);
+
+ // Emit Jump and Link Register
+ SDNode *ResNode = CurDAG->getTargetNode(Mips::JALR, dl, MVT::Other,
+ MVT::Flag, T9Reg, Chain);
+ Chain = SDValue(ResNode, 0);
+ InFlag = SDValue(ResNode, 1);
+ ReplaceUses(SDValue(Node, 0), Chain);
+ ReplaceUses(SDValue(Node, 1), InFlag);
+ return ResNode;
+ }
+ }
+ }
+
+ // Select the default instruction
+ SDNode *ResNode = SelectCode(N);
+
+ #ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ if (ResNode == NULL || ResNode == N.getNode())
+ DEBUG(N.getNode()->dump(CurDAG));
+ else
+ DEBUG(ResNode->dump(CurDAG));
+ DOUT << "\n";
+ Indent -= 2;
+ #endif
+
+ return ResNode;
+}
+
+/// createMipsISelDag - This pass converts a legalized DAG into a
+/// MIPS-specific DAG, ready for instruction scheduling.
+FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) {
+ return new MipsDAGToDAGISel(TM);
+}
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
new file mode 100644
index 0000000..9281940
--- /dev/null
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -0,0 +1,1254 @@
+//===-- MipsISelLowering.cpp - Mips DAG Lowering Implementation -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Mips uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-lower"
+
+#include "MipsISelLowering.h"
+#include "MipsMachineFunction.h"
+#include "MipsTargetMachine.h"
+#include "MipsSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+const char *MipsTargetLowering::
+getTargetNodeName(unsigned Opcode) const
+{
+ switch (Opcode)
+ {
+ case MipsISD::JmpLink : return "MipsISD::JmpLink";
+ case MipsISD::Hi : return "MipsISD::Hi";
+ case MipsISD::Lo : return "MipsISD::Lo";
+ case MipsISD::GPRel : return "MipsISD::GPRel";
+ case MipsISD::Ret : return "MipsISD::Ret";
+ case MipsISD::CMov : return "MipsISD::CMov";
+ case MipsISD::SelectCC : return "MipsISD::SelectCC";
+ case MipsISD::FPSelectCC : return "MipsISD::FPSelectCC";
+ case MipsISD::FPBrcond : return "MipsISD::FPBrcond";
+ case MipsISD::FPCmp : return "MipsISD::FPCmp";
+ case MipsISD::FPRound : return "MipsISD::FPRound";
+ default : return NULL;
+ }
+}
+
+MipsTargetLowering::
+MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM)
+{
+ Subtarget = &TM.getSubtarget<MipsSubtarget>();
+
+ // Mips does not have i1 type, so use i32 for
+ // setcc operations results (slt, sgt, ...).
+ setBooleanContents(ZeroOrOneBooleanContent);
+
+ // JumpTable targets must use GOT when using PIC_
+ setUsesGlobalOffsetTable(true);
+
+ // Set up the register classes
+ addRegisterClass(MVT::i32, Mips::CPURegsRegisterClass);
+ addRegisterClass(MVT::f32, Mips::FGR32RegisterClass);
+
+ // When dealing with single precision only, use libcalls
+ if (!Subtarget->isSingleFloat())
+ if (!Subtarget->isFP64bit())
+ addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass);
+
+ // Legal fp constants
+ addLegalFPImmediate(APFloat(+0.0f));
+
+ // Load extented operations for i1 types must be promoted
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ // Used by legalize types to correctly generate the setcc result.
+ // Without this, every float setcc comes with a AND/OR with the result,
+ // we don't want this, since the fpcmp result goes to a flag register,
+ // which is used implicitly by brcond and select operations.
+ AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
+
+ // Mips Custom Operations
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::RET, MVT::Other, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::SETCC, MVT::f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::f64, Custom);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+
+ // We custom lower AND/OR to handle the case where the DAG contain 'ands/ors'
+ // with operands comming from setcc fp comparions. This is necessary since
+ // the result from these setcc are in a flag registers (FCR31).
+ setOperationAction(ISD::AND, MVT::i32, Custom);
+ setOperationAction(ISD::OR, MVT::i32, Custom);
+
+ // Operations not directly supported by Mips.
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+ // We don't have line number support yet.
+ setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+ setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+ // Use the default for now
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+
+ if (Subtarget->isSingleFloat())
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+
+ if (!Subtarget->hasSEInReg()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ }
+
+ if (!Subtarget->hasBitCount())
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+
+ if (!Subtarget->hasSwap())
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+ setStackPointerRegisterToSaveRestore(Mips::SP);
+ computeRegisterProperties();
+}
+
+
+MVT MipsTargetLowering::getSetCCResultType(MVT VT) const {
+ return MVT::i32;
+}
+
+
+SDValue MipsTargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG)
+{
+ switch (Op.getOpcode())
+ {
+ case ISD::AND: return LowerANDOR(Op, DAG);
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ case ISD::CALL: return LowerCALL(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
+ case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::OR: return LowerANDOR(Op, DAG);
+ case ISD::RET: return LowerRET(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ }
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Lower helper functions
+//===----------------------------------------------------------------------===//
+
+// AddLiveIn - This helper function adds the specified physical register to the
+// MachineFunction as a live in value. It also creates a corresponding
+// virtual register for it.
+static unsigned
+AddLiveIn(MachineFunction &MF, unsigned PReg, TargetRegisterClass *RC)
+{
+ assert(RC->contains(PReg) && "Not the correct regclass!");
+ unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
+ MF.getRegInfo().addLiveIn(PReg, VReg);
+ return VReg;
+}
+
+// A address must be loaded from a small section if its size is less than the
+// small section size threshold. Data in this section must be addressed using
+// gp_rel operator.
+bool MipsTargetLowering::IsInSmallSection(unsigned Size) {
+ return (Size > 0 && (Size <= Subtarget->getSSectionThreshold()));
+}
+
+// Discover if this global address can be placed into small data/bss section.
+bool MipsTargetLowering::IsGlobalInSmallSection(GlobalValue *GV)
+{
+ const TargetData *TD = getTargetData();
+ const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+
+ if (!GVA)
+ return false;
+
+ const Type *Ty = GV->getType()->getElementType();
+ unsigned Size = TD->getTypeAllocSize(Ty);
+
+ // if this is a internal constant string, there is a special
+ // section for it, but not in small data/bss.
+ if (GVA->hasInitializer() && GV->hasLocalLinkage()) {
+ Constant *C = GVA->getInitializer();
+ const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
+ if (CVA && CVA->isCString())
+ return false;
+ }
+
+ return IsInSmallSection(Size);
+}
+
+// Get fp branch code (not opcode) from condition code.
+static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) {
+ if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
+ return Mips::BRANCH_T;
+
+ if (CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT)
+ return Mips::BRANCH_F;
+
+ return Mips::BRANCH_INVALID;
+}
+
+static unsigned FPBranchCodeToOpc(Mips::FPBranchCode BC) {
+ switch(BC) {
+ default:
+ assert(0 && "Unknown branch code");
+ case Mips::BRANCH_T : return Mips::BC1T;
+ case Mips::BRANCH_F : return Mips::BC1F;
+ case Mips::BRANCH_TL : return Mips::BC1TL;
+ case Mips::BRANCH_FL : return Mips::BC1FL;
+ }
+}
+
+static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown fp condition code!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: return Mips::FCOND_EQ;
+ case ISD::SETUNE: return Mips::FCOND_OGL;
+ case ISD::SETLT:
+ case ISD::SETOLT: return Mips::FCOND_OLT;
+ case ISD::SETGT:
+ case ISD::SETOGT: return Mips::FCOND_OGT;
+ case ISD::SETLE:
+ case ISD::SETOLE: return Mips::FCOND_OLE;
+ case ISD::SETGE:
+ case ISD::SETOGE: return Mips::FCOND_OGE;
+ case ISD::SETULT: return Mips::FCOND_ULT;
+ case ISD::SETULE: return Mips::FCOND_ULE;
+ case ISD::SETUGT: return Mips::FCOND_UGT;
+ case ISD::SETUGE: return Mips::FCOND_UGE;
+ case ISD::SETUO: return Mips::FCOND_UN;
+ case ISD::SETO: return Mips::FCOND_OR;
+ case ISD::SETNE:
+ case ISD::SETONE: return Mips::FCOND_NEQ;
+ case ISD::SETUEQ: return Mips::FCOND_UEQ;
+ }
+}
+
+MachineBasicBlock *
+MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ bool isFPCmp = false;
+ DebugLoc dl = MI->getDebugLoc();
+
+ switch (MI->getOpcode()) {
+ default: assert(false && "Unexpected instr type to insert");
+ case Mips::Select_FCC:
+ case Mips::Select_FCC_S32:
+ case Mips::Select_FCC_D32:
+ isFPCmp = true; // FALL THROUGH
+ case Mips::Select_CC:
+ case Mips::Select_CC_S32:
+ case Mips::Select_CC_D32: {
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+ // Emit the right instruction according to the type of the operands compared
+ if (isFPCmp) {
+ // Find the condiction code present in the setcc operation.
+ Mips::CondCode CC = (Mips::CondCode)MI->getOperand(4).getImm();
+ // Get the branch opcode from the branch code.
+ unsigned Opc = FPBranchCodeToOpc(GetFPBranchCodeFromCond(CC));
+ BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB);
+ } else
+ BuildMI(BB, dl, TII->get(Mips::BNE)).addReg(MI->getOperand(1).getReg())
+ .addReg(Mips::ZERO).addMBB(sinkMBB);
+
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+ // Update machine-CFG edges by first adding all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
+ e = BB->succ_end(); i != e; ++i)
+ sinkMBB->addSuccessor(*i);
+ // Next, remove all successors of the current block, and add the true
+ // and fallthrough blocks as its successors.
+ while(!BB->succ_empty())
+ BB->removeSuccessor(BB->succ_begin());
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(BB, dl, TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(3).getReg()).addMBB(thisMBB);
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return BB;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+
+SDValue MipsTargetLowering::
+LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG)
+{
+ if (!Subtarget->isMips1())
+ return Op;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned CCReg = AddLiveIn(MF, Mips::FCR31, Mips::CCRRegisterClass);
+
+ SDValue Chain = DAG.getEntryNode();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Src = Op.getOperand(0);
+
+ // Set the condition register
+ SDValue CondReg = DAG.getCopyFromReg(Chain, dl, CCReg, MVT::i32);
+ CondReg = DAG.getCopyToReg(Chain, dl, Mips::AT, CondReg);
+ CondReg = DAG.getCopyFromReg(CondReg, dl, Mips::AT, MVT::i32);
+
+ SDValue Cst = DAG.getConstant(3, MVT::i32);
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i32, CondReg, Cst);
+ Cst = DAG.getConstant(2, MVT::i32);
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, MVT::i32, Or, Cst);
+
+ SDValue InFlag(0, 0);
+ CondReg = DAG.getCopyToReg(Chain, dl, Mips::FCR31, Xor, InFlag);
+
+ // Emit the round instruction and bit convert to integer
+ SDValue Trunc = DAG.getNode(MipsISD::FPRound, dl, MVT::f32,
+ Src, CondReg.getValue(1));
+ SDValue BitCvt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Trunc);
+ return BitCvt;
+}
+
+SDValue MipsTargetLowering::
+LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG)
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Get a reference from Mips stack pointer
+ SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, Mips::SP, MVT::i32);
+
+ // Subtract the dynamic size from the actual stack size to
+ // obtain the new stack size.
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size);
+
+ // The Sub result contains the new stack start address, so it
+ // must be placed in the stack pointer register.
+ Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, Mips::SP, Sub);
+
+ // This node always has two return values: a new stack pointer
+ // value and a chain
+ SDValue Ops[2] = { Sub, Chain };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue MipsTargetLowering::
+LowerANDOR(SDValue Op, SelectionDAG &DAG)
+{
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (LHS.getOpcode() != MipsISD::FPCmp || RHS.getOpcode() != MipsISD::FPCmp)
+ return Op;
+
+ SDValue True = DAG.getConstant(1, MVT::i32);
+ SDValue False = DAG.getConstant(0, MVT::i32);
+
+ SDValue LSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
+ LHS, True, False, LHS.getOperand(2));
+ SDValue RSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
+ RHS, True, False, RHS.getOperand(2));
+
+ return DAG.getNode(Op.getOpcode(), dl, MVT::i32, LSEL, RSEL);
+}
+
+SDValue MipsTargetLowering::
+LowerBRCOND(SDValue Op, SelectionDAG &DAG)
+{
+ // The first operand is the chain, the second is the condition, the third is
+ // the block to branch to if the condition is true.
+ SDValue Chain = Op.getOperand(0);
+ SDValue Dest = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Op.getOperand(1).getOpcode() != MipsISD::FPCmp)
+ return Op;
+
+ SDValue CondRes = Op.getOperand(1);
+ SDValue CCNode = CondRes.getOperand(2);
+ Mips::CondCode CC =
+ (Mips::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue();
+ SDValue BrCode = DAG.getConstant(GetFPBranchCodeFromCond(CC), MVT::i32);
+
+ return DAG.getNode(MipsISD::FPBrcond, dl, Op.getValueType(), Chain, BrCode,
+ Dest, CondRes);
+}
+
+SDValue MipsTargetLowering::
+LowerSETCC(SDValue Op, SelectionDAG &DAG)
+{
+ // The operands to this are the left and right operands to compare (ops #0,
+ // and #1) and the condition code to compare them with (op #2) as a
+ // CondCodeSDNode.
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+ return DAG.getNode(MipsISD::FPCmp, dl, Op.getValueType(), LHS, RHS,
+ DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32));
+}
+
+SDValue MipsTargetLowering::
+LowerSELECT(SDValue Op, SelectionDAG &DAG)
+{
+ SDValue Cond = Op.getOperand(0);
+ SDValue True = Op.getOperand(1);
+ SDValue False = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // if the incomming condition comes from a integer compare, the select
+ // operation must be SelectCC or a conditional move if the subtarget
+ // supports it.
+ if (Cond.getOpcode() != MipsISD::FPCmp) {
+ if (Subtarget->hasCondMov() && !True.getValueType().isFloatingPoint())
+ return Op;
+ return DAG.getNode(MipsISD::SelectCC, dl, True.getValueType(),
+ Cond, True, False);
+ }
+
+ // if the incomming condition comes from fpcmp, the select
+ // operation must use FPSelectCC.
+ SDValue CCNode = Cond.getOperand(2);
+ return DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
+ Cond, True, False, CCNode);
+}
+
+SDValue MipsTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG)
+{
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
+
+ if (!Subtarget->hasABICall()) {
+ SDVTList VTs = DAG.getVTList(MVT::i32);
+ SDValue Ops[] = { GA };
+ // %gp_rel relocation
+ if (!isa<Function>(GV) && IsGlobalInSmallSection(GV)) {
+ SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, Ops, 1);
+ SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode);
+ }
+ // %hi/%lo relocation
+ SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, Ops, 1);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA);
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
+
+ } else { // Abicall relocations, TODO: make this cleaner.
+ SDValue ResNode = DAG.getLoad(MVT::i32, dl,
+ DAG.getEntryNode(), GA, NULL, 0);
+ // On functions and global targets not internal linked only
+ // a load from got/GP is necessary for PIC to work.
+ if (!GV->hasLocalLinkage() || isa<Function>(GV))
+ return ResNode;
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA);
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, ResNode, Lo);
+ }
+
+ assert(0 && "Dont know how to handle GlobalAddress");
+ return SDValue(0,0);
+}
+
+SDValue MipsTargetLowering::
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG)
+{
+ assert(0 && "TLS not implemented for MIPS.");
+ return SDValue(); // Not reached
+}
+
+SDValue MipsTargetLowering::
+LowerJumpTable(SDValue Op, SelectionDAG &DAG)
+{
+ SDValue ResNode;
+ SDValue HiPart;
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ MVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+ SDVTList VTs = DAG.getVTList(MVT::i32);
+ SDValue Ops[] = { JTI };
+ HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, Ops, 1);
+ } else // Emit Load from Global Pointer
+ HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, NULL, 0);
+
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTI);
+ ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
+
+ return ResNode;
+}
+
+SDValue MipsTargetLowering::
+LowerConstantPool(SDValue Op, SelectionDAG &DAG)
+{
+ SDValue ResNode;
+ ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+ Constant *C = N->getConstVal();
+ SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment());
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ // gp_rel relocation
+ // FIXME: we should reference the constant pool using small data sections,
+ // but the asm printer currently doens't support this feature without
+ // hacking it. This feature should come soon so we can uncomment the
+ // stuff below.
+ //if (!Subtarget->hasABICall() &&
+ // IsInSmallSection(getTargetData()->getTypeAllocSize(C->getType()))) {
+ // SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, MVT::i32, CP);
+ // SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
+ // ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode);
+ //} else { // %hi/%lo relocation
+ SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CP);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP);
+ ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
+ //}
+
+ return ResNode;
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//
+// The lower operations present on calling convention works on this order:
+// LowerCALL (virt regs --> phys regs, virt regs --> stack)
+// LowerFORMAL_ARGUMENTS (phys --> virt regs, stack --> virt regs)
+// LowerRET (virt regs --> phys regs)
+// LowerCALL (phys regs --> virt regs)
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TODO: Implement a generic logic using tblgen that can support this.
+// Mips O32 ABI rules:
+// ---
+// i32 - Passed in A0, A1, A2, A3 and stack
+// f32 - Only passed in f32 registers if no int reg has been used yet to hold
+// an argument. Otherwise, passed in A1, A2, A3 and stack.
+// f64 - Only passed in two aliased f32 registers if no int reg has been used
+// yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is
+// not used, it must be shadowed. If only A3 is avaiable, shadow it and
+// go to stack.
+//===----------------------------------------------------------------------===//
+
+static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ static const unsigned IntRegsSize=4, FloatRegsSize=2;
+
+ static const unsigned IntRegs[] = {
+ Mips::A0, Mips::A1, Mips::A2, Mips::A3
+ };
+ static const unsigned F32Regs[] = {
+ Mips::F12, Mips::F14
+ };
+ static const unsigned F64Regs[] = {
+ Mips::D6, Mips::D7
+ };
+
+ unsigned Reg=0;
+ unsigned UnallocIntReg = State.getFirstUnallocated(IntRegs, IntRegsSize);
+ bool IntRegUsed = (IntRegs[UnallocIntReg] != (unsigned (Mips::A0)));
+
+ // Promote i8 and i16
+ if (LocVT == MVT::i8 || LocVT == MVT::i16) {
+ LocVT = MVT::i32;
+ if (ArgFlags.isSExt())
+ LocInfo = CCValAssign::SExt;
+ else if (ArgFlags.isZExt())
+ LocInfo = CCValAssign::ZExt;
+ else
+ LocInfo = CCValAssign::AExt;
+ }
+
+ if (ValVT == MVT::i32 || (ValVT == MVT::f32 && IntRegUsed)) {
+ Reg = State.AllocateReg(IntRegs, IntRegsSize);
+ IntRegUsed = true;
+ LocVT = MVT::i32;
+ }
+
+ if (ValVT.isFloatingPoint() && !IntRegUsed) {
+ if (ValVT == MVT::f32)
+ Reg = State.AllocateReg(F32Regs, FloatRegsSize);
+ else
+ Reg = State.AllocateReg(F64Regs, FloatRegsSize);
+ }
+
+ if (ValVT == MVT::f64 && IntRegUsed) {
+ if (UnallocIntReg != IntRegsSize) {
+ // If we hit register A3 as the first not allocated, we must
+ // mark it as allocated (shadow) and use the stack instead.
+ if (IntRegs[UnallocIntReg] != (unsigned (Mips::A3)))
+ Reg = Mips::A2;
+ for (;UnallocIntReg < IntRegsSize; ++UnallocIntReg)
+ State.AllocateReg(UnallocIntReg);
+ }
+ LocVT = MVT::i32;
+ }
+
+ if (!Reg) {
+ unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
+ unsigned Offset = State.AllocateStack(SizeInBytes, SizeInBytes);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ } else
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+
+ return false; // CC must always match
+}
+
+//===----------------------------------------------------------------------===//
+// CALL Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerCALL - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: isVarArg, isTailCall.
+SDValue MipsTargetLowering::
+LowerCALL(SDValue Op, SelectionDAG &DAG)
+{
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+ SDValue Chain = TheCall->getChain();
+ SDValue Callee = TheCall->getCallee();
+ bool isVarArg = TheCall->isVarArg();
+ unsigned CC = TheCall->getCallingConv();
+ DebugLoc dl = TheCall->getDebugLoc();
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+
+ // To meet O32 ABI, Mips must always allocate 16 bytes on
+ // the stack (even if less than 4 are used as arguments)
+ if (Subtarget->isABI_O32()) {
+ int VTsize = MVT(MVT::i32).getSizeInBits()/8;
+ MFI->CreateFixedObject(VTsize, (VTsize*3));
+ CCInfo.AnalyzeCallOperands(TheCall, CC_MipsO32);
+ } else
+ CCInfo.AnalyzeCallOperands(TheCall, CC_Mips);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ // With EABI is it possible to have 16 args on registers.
+ SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ // First/LastArgStackLoc contains the first/last
+ // "at stack" argument location.
+ int LastArgStackLoc = 0;
+ unsigned FirstStackArgLoc = (Subtarget->isABI_EABI() ? 0 : 16);
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ SDValue Arg = TheCall->getArg(i);
+ CCValAssign &VA = ArgLocs[i];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full:
+ if (Subtarget->isABI_O32() && VA.isRegLoc()) {
+ if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i32)
+ Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Arg);
+ if (VA.getValVT() == MVT::f64 && VA.getLocVT() == MVT::i32) {
+ Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg,
+ DAG.getConstant(0, getPointerTy()));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg,
+ DAG.getConstant(1, getPointerTy()));
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
+ RegsToPass.push_back(std::make_pair(VA.getLocReg()+1, Hi));
+ continue;
+ }
+ }
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at
+ // RegsToPass vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ continue;
+ }
+
+ // Register can't get to this point...
+ assert(VA.isMemLoc());
+
+ // Create the frame index object for this incoming parameter
+ // This guarantees that when allocating Local Area the firsts
+ // 16 bytes which are alwayes reserved won't be overwritten
+ // if O32 ABI is used. For EABI the first address is zero.
+ LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset());
+ int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ LastArgStackLoc);
+
+ SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
+
+ // emit ISD::STORE whichs stores the
+ // parameter value to a stack Location
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+ }
+
+ // Transform all store nodes into one single node because all store
+ // nodes are independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emited instructions must be
+ // stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
+ else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+
+ // MipsJmpLink = #chain, #target_address, #opt_in_flags...
+ // = Chain, Callee, Reg#1, Reg#2, ...
+ //
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(MipsISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Create a stack location to hold GP when PIC is used. This stack
+ // location is used on function prologue to save GP and also after all
+ // emited CALL's to restore GP.
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ // Function can have an arbitrary number of calls, so
+ // hold the LastArgStackLoc with the biggest offset.
+ int FI;
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ if (LastArgStackLoc >= MipsFI->getGPStackOffset()) {
+ LastArgStackLoc = (!LastArgStackLoc) ? (16) : (LastArgStackLoc+4);
+ // Create the frame index only once. SPOffset here can be anything
+ // (this will be fixed on processFunctionBeforeFrameFinalized)
+ if (MipsFI->getGPStackOffset() == -1) {
+ FI = MFI->CreateFixedObject(4, 0);
+ MipsFI->setGPFI(FI);
+ }
+ MipsFI->setGPStackOffset(LastArgStackLoc);
+ }
+
+ // Reload GP value.
+ FI = MipsFI->getGPFI();
+ SDValue FIN = DAG.getFrameIndex(FI,getPointerTy());
+ SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN, NULL, 0);
+ Chain = GPLoad.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, DAG.getRegister(Mips::GP, MVT::i32),
+ GPLoad, SDValue(0,0));
+ InFlag = Chain.getValue(1);
+ }
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG), Op.getResNo());
+}
+
+/// LowerCallResult - Lower the result values of an ISD::CALL into the
+/// appropriate copies out of appropriate physical registers. This assumes that
+/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+/// being lowered. Returns a SDNode with the same number of values as the
+/// ISD::CALL.
+SDNode *MipsTargetLowering::
+LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
+ unsigned CallingConv, SelectionDAG &DAG) {
+
+ bool isVarArg = TheCall->isVarArg();
+ DebugLoc dl = TheCall->getDebugLoc();
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
+
+ CCInfo.AnalyzeCallResult(TheCall, RetCC_Mips);
+ SmallVector<SDValue, 8> ResultVals;
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ ResultVals.push_back(Chain.getValue(0));
+ }
+
+ ResultVals.push_back(Chain);
+
+ // Merge everything together with a MERGE_VALUES node.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
+ &ResultVals[0], ResultVals.size()).getNode();
+}
+
+//===----------------------------------------------------------------------===//
+// FORMAL_ARGUMENTS Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerFORMAL_ARGUMENTS - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+/// TODO: isVarArg
+SDValue MipsTargetLowering::
+LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG)
+{
+ SDValue Root = Op.getOperand(0);
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ DebugLoc dl = Op.getDebugLoc();
+
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
+ unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
+
+ unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF);
+
+ // GP must be live into PIC and non-PIC call target.
+ AddLiveIn(MF, Mips::GP, Mips::CPURegsRegisterClass);
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+
+ if (Subtarget->isABI_O32())
+ CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_MipsO32);
+ else
+ CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_Mips);
+
+ SmallVector<SDValue, 16> ArgValues;
+ SDValue StackPtr;
+
+ unsigned FirstStackArgLoc = (Subtarget->isABI_EABI() ? 0 : 16);
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments stored on registers
+ if (VA.isRegLoc()) {
+ MVT RegVT = VA.getLocVT();
+ TargetRegisterClass *RC = 0;
+
+ if (RegVT == MVT::i32)
+ RC = Mips::CPURegsRegisterClass;
+ else if (RegVT == MVT::f32)
+ RC = Mips::FGR32RegisterClass;
+ else if (RegVT == MVT::f64) {
+ if (!Subtarget->isSingleFloat())
+ RC = Mips::AFGR64RegisterClass;
+ } else
+ assert(0 && "RegVT not supported by FORMAL_ARGUMENTS Lowering");
+
+ // Transform the arguments stored on
+ // physical registers into virtual ones
+ unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT);
+
+ // If this is an 8 or 16-bit value, it has been passed promoted
+ // to 32 bits. Insert an assert[sz]ext to capture this, then
+ // truncate to the right size.
+ if (VA.getLocInfo() != CCValAssign::Full) {
+ unsigned Opcode = 0;
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ Opcode = ISD::AssertSext;
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ Opcode = ISD::AssertZext;
+ if (Opcode)
+ ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ }
+
+ // Handle O32 ABI cases: i32->f32 and (i32,i32)->f64
+ if (Subtarget->isABI_O32()) {
+ if (RegVT == MVT::i32 && VA.getValVT() == MVT::f32)
+ ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, ArgValue);
+ if (RegVT == MVT::i32 && VA.getValVT() == MVT::f64) {
+ unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(),
+ VA.getLocReg()+1, RC);
+ SDValue ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg2, RegVT);
+ SDValue Hi = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, ArgValue);
+ SDValue Lo = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, ArgValue2);
+ ArgValue = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::f64, Lo, Hi);
+ }
+ }
+
+ ArgValues.push_back(ArgValue);
+
+ // To meet ABI, when VARARGS are passed on registers, the registers
+ // must have their values written to the caller stack frame.
+ if ((isVarArg) && (Subtarget->isABI_O32())) {
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getRegister(StackReg, getPointerTy());
+
+ // The stack pointer offset is relative to the caller stack frame.
+ // Since the real stack size is unknown here, a negative SPOffset
+ // is used so there's a way to adjust these offsets when the stack
+ // size get known (on EliminateFrameIndex). A dummy SPOffset is
+ // used instead of a direct negative address (which is recorded to
+ // be used on emitPrologue) to avoid mis-calc of the first stack
+ // offset on PEI::calculateFrameObjectOffsets.
+ // Arguments are always 32-bit.
+ int FI = MFI->CreateFixedObject(4, 0);
+ MipsFI->recordStoreVarArgsFI(FI, -(4+(i*4)));
+ SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
+
+ // emit ISD::STORE whichs stores the
+ // parameter value to a stack Location
+ ArgValues.push_back(DAG.getStore(Root, dl, ArgValue, PtrOff, NULL, 0));
+ }
+
+ } else { // VA.isRegLoc()
+
+ // sanity check
+ assert(VA.isMemLoc());
+
+ // The stack pointer offset is relative to the caller stack frame.
+ // Since the real stack size is unknown here, a negative SPOffset
+ // is used so there's a way to adjust these offsets when the stack
+ // size get known (on EliminateFrameIndex). A dummy SPOffset is
+ // used instead of a direct negative address (which is recorded to
+ // be used on emitPrologue) to avoid mis-calc of the first stack
+ // offset on PEI::calculateFrameObjectOffsets.
+ // Arguments are always 32-bit.
+ unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
+ int FI = MFI->CreateFixedObject(ArgSize, 0);
+ MipsFI->recordLoadArgsFI(FI, -(ArgSize+
+ (FirstStackArgLoc + VA.getLocMemOffset())));
+
+ // Create load nodes to retrieve arguments from the stack
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValues.push_back(DAG.getLoad(VA.getValVT(), dl, Root, FIN, NULL, 0));
+ }
+ }
+
+ // The mips ABIs for returning structs by value requires that we copy
+ // the sret argument into $v0 for the return. Save the argument into
+ // a virtual register so that we can access it from the return points.
+ if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+ unsigned Reg = MipsFI->getSRetReturnReg();
+ if (!Reg) {
+ Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i32));
+ MipsFI->setSRetReturnReg(Reg);
+ }
+ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, ArgValues[0]);
+ Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Root);
+ }
+
+ ArgValues.push_back(Root);
+
+ // Return the new list of results.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
+ &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
+}
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+SDValue MipsTargetLowering::
+LowerRET(SDValue Op, SelectionDAG &DAG)
+{
+ // CCValAssign - represent the assignment of
+ // the return value to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+ unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
+
+ // Analize return values of ISD::RET
+ CCInfo.AnalyzeReturn(Op.getNode(), RetCC_Mips);
+
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ // The chain is always operand #0
+ SDValue Chain = Op.getOperand(0);
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ // ISD::RET => ret chain, (regnum1,val1), ...
+ // So i*2+1 index only the regnums
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ Op.getOperand(i*2+1), Flag);
+
+ // guarantee that all emitted copies are
+ // stuck together, avoiding something bad
+ Flag = Chain.getValue(1);
+ }
+
+ // The mips ABIs for returning structs by value requires that we copy
+ // the sret argument into $v0 for the return. We saved the argument into
+ // a virtual register in the entry block, so now we copy the value out
+ // and into $v0.
+ if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ unsigned Reg = MipsFI->getSRetReturnReg();
+
+ if (!Reg)
+ assert(0 && "sret virtual register not created in the entry block");
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+
+ Chain = DAG.getCopyToReg(Chain, dl, Mips::V0, Val, Flag);
+ Flag = Chain.getValue(1);
+ }
+
+ // Return on Mips is always a "jr $ra"
+ if (Flag.getNode())
+ return DAG.getNode(MipsISD::Ret, dl, MVT::Other,
+ Chain, DAG.getRegister(Mips::RA, MVT::i32), Flag);
+ else // Return Void
+ return DAG.getNode(MipsISD::Ret, dl, MVT::Other,
+ Chain, DAG.getRegister(Mips::RA, MVT::i32));
+}
+
+//===----------------------------------------------------------------------===//
+// Mips Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+MipsTargetLowering::ConstraintType MipsTargetLowering::
+getConstraintType(const std::string &Constraint) const
+{
+ // Mips specific constrainy
+ // GCC config/mips/constraints.md
+ //
+ // 'd' : An address register. Equivalent to r
+ // unless generating MIPS16 code.
+ // 'y' : Equivalent to r; retained for
+ // backwards compatibility.
+ // 'f' : Floating Point registers.
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default : break;
+ case 'd':
+ case 'y':
+ case 'f':
+ return C_RegisterClass;
+ break;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"),
+/// return a list of registers that can be used to satisfy the constraint.
+/// This should only be used for C_RegisterClass constraints.
+std::pair<unsigned, const TargetRegisterClass*> MipsTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const
+{
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ return std::make_pair(0U, Mips::CPURegsRegisterClass);
+ case 'f':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, Mips::FGR32RegisterClass);
+ if (VT == MVT::f64)
+ if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit()))
+ return std::make_pair(0U, Mips::AFGR64RegisterClass);
+ }
+ }
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
+std::vector<unsigned> MipsTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const
+{
+ if (Constraint.size() != 1)
+ return std::vector<unsigned>();
+
+ switch (Constraint[0]) {
+ default : break;
+ case 'r':
+ // GCC Mips Constraint Letters
+ case 'd':
+ case 'y':
+ return make_vector<unsigned>(Mips::T0, Mips::T1, Mips::T2, Mips::T3,
+ Mips::T4, Mips::T5, Mips::T6, Mips::T7, Mips::S0, Mips::S1,
+ Mips::S2, Mips::S3, Mips::S4, Mips::S5, Mips::S6, Mips::S7,
+ Mips::T8, 0);
+
+ case 'f':
+ if (VT == MVT::f32) {
+ if (Subtarget->isSingleFloat())
+ return make_vector<unsigned>(Mips::F2, Mips::F3, Mips::F4, Mips::F5,
+ Mips::F6, Mips::F7, Mips::F8, Mips::F9, Mips::F10, Mips::F11,
+ Mips::F20, Mips::F21, Mips::F22, Mips::F23, Mips::F24,
+ Mips::F25, Mips::F26, Mips::F27, Mips::F28, Mips::F29,
+ Mips::F30, Mips::F31, 0);
+ else
+ return make_vector<unsigned>(Mips::F2, Mips::F4, Mips::F6, Mips::F8,
+ Mips::F10, Mips::F20, Mips::F22, Mips::F24, Mips::F26,
+ Mips::F28, Mips::F30, 0);
+ }
+
+ if (VT == MVT::f64)
+ if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit()))
+ return make_vector<unsigned>(Mips::D1, Mips::D2, Mips::D3, Mips::D4,
+ Mips::D5, Mips::D10, Mips::D11, Mips::D12, Mips::D13,
+ Mips::D14, Mips::D15, 0);
+ }
+ return std::vector<unsigned>();
+}
+
+bool
+MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The Mips target isn't yet aware of offsets.
+ return false;
+}
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
new file mode 100644
index 0000000..55cd6ea
--- /dev/null
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -0,0 +1,130 @@
+//===-- MipsISelLowering.h - Mips DAG Lowering Interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Mips uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MipsISELLOWERING_H
+#define MipsISELLOWERING_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "Mips.h"
+#include "MipsSubtarget.h"
+
+namespace llvm {
+ namespace MipsISD {
+ enum NodeType {
+ // Start the numbering from where ISD NodeType finishes.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // Jump and link (call)
+ JmpLink,
+
+ // Get the Higher 16 bits from a 32-bit immediate
+ // No relation with Mips Hi register
+ Hi,
+
+ // Get the Lower 16 bits from a 32-bit immediate
+ // No relation with Mips Lo register
+ Lo,
+
+ // Handle gp_rel (small data/bss sections) relocation.
+ GPRel,
+
+ // Conditional Move
+ CMov,
+
+ // Select CC Pseudo Instruction
+ SelectCC,
+
+ // Floating Point Select CC Pseudo Instruction
+ FPSelectCC,
+
+ // Floating Point Branch Conditional
+ FPBrcond,
+
+ // Floating Point Compare
+ FPCmp,
+
+ // Floating Point Rounding
+ FPRound,
+
+ // Return
+ Ret
+ };
+ }
+
+ //===--------------------------------------------------------------------===//
+ // TargetLowering Implementation
+ //===--------------------------------------------------------------------===//
+ class MipsTargetLowering : public TargetLowering
+ {
+ // FrameIndex for return slot.
+ int ReturnAddrIndex;
+ public:
+
+ explicit MipsTargetLowering(MipsTargetMachine &TM);
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ // DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType - get the ISD::SETCC result ValueType
+ MVT getSetCCResultType(MVT VT) const;
+
+ private:
+ // Subtarget Info
+ const MipsSubtarget *Subtarget;
+
+ // Lower Operand helpers
+ SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
+ unsigned CallingConv, SelectionDAG &DAG);
+ bool IsGlobalInSmallSection(GlobalValue *GV);
+ bool IsInSmallSection(unsigned Size);
+
+ // Lower Operand specifics
+ SDValue LowerANDOR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerCALL(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerRET(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG);
+
+ virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ // Inline asm support
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const;
+
+ std::vector<unsigned>
+ getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+ };
+}
+
+#endif // MipsISELLOWERING_H
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
new file mode 100644
index 0000000..b6a6d2f
--- /dev/null
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -0,0 +1,304 @@
+//===- MipsInstrFPU.td - Mips FPU Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Floating Point Instructions
+// ------------------------
+// * 64bit fp:
+// - 32 64-bit registers (default mode)
+// - 16 even 32-bit registers (32-bit compatible mode) for
+// single and double access.
+// * 32bit fp:
+// - 16 even 32-bit registers - single and double (aliased)
+// - 32 32-bit registers (within single-only mode)
+//===----------------------------------------------------------------------===//
+
+// Floating Point Compare and Branch
+def SDT_MipsFPBrcond : SDTypeProfile<0, 3, [SDTCisSameAs<0, 2>, SDTCisInt<0>,
+ SDTCisVT<1, OtherVT>]>;
+def SDT_MipsFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<0>,
+ SDTCisInt<2>]>;
+def SDT_MipsFPSelectCC : SDTypeProfile<1, 4, [SDTCisInt<1>, SDTCisInt<4>,
+ SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>;
+
+def MipsFPRound : SDNode<"MipsISD::FPRound", SDTFPRoundOp, [SDNPOptInFlag]>;
+def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond,
+ [SDNPHasChain]>;
+def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp>;
+def MipsFPSelectCC : SDNode<"MipsISD::FPSelectCC", SDT_MipsFPSelectCC>;
+
+// Operand for printing out a condition code.
+let PrintMethod = "printFCCOperand" in
+ def condcode : Operand<i32>;
+
+//===----------------------------------------------------------------------===//
+// Feature predicates.
+//===----------------------------------------------------------------------===//
+
+def In32BitMode : Predicate<"!Subtarget.isFP64bit()">;
+def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">;
+def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">;
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//
+// A set of multiclasses is used to address the register usage.
+//
+// S32 - single precision in 16 32bit even fp registers
+// single precision in 32 32bit fp registers in SingleOnly mode
+// S64 - single precision in 32 64bit fp registers (In64BitMode)
+// D32 - double precision in 16 32bit even fp registers
+// D64 - double precision in 32 64bit fp registers (In64BitMode)
+//
+// Only S32 and D32 are supported right now.
+//===----------------------------------------------------------------------===//
+
+multiclass FFR1_1<bits<6> funct, string asmstr>
+{
+ def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
+ !strconcat(asmstr, ".s $fd, $fs"), []>;
+
+ def _D32 : FFR<0x11, funct, 0x1, (outs FGR32:$fd), (ins AFGR64:$fs),
+ !strconcat(asmstr, ".d $fd, $fs"), []>, Requires<[In32BitMode]>;
+}
+
+multiclass FFR1_2<bits<6> funct, string asmstr, SDNode FOp>
+{
+ def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
+ !strconcat(asmstr, ".s $fd, $fs"),
+ [(set FGR32:$fd, (FOp FGR32:$fs))]>;
+
+ def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs),
+ !strconcat(asmstr, ".d $fd, $fs"),
+ [(set AFGR64:$fd, (FOp AFGR64:$fs))]>, Requires<[In32BitMode]>;
+}
+
+class FFR1_3<bits<6> funct, bits<5> fmt, RegisterClass RcSrc,
+ RegisterClass RcDst, string asmstr>:
+ FFR<0x11, funct, fmt, (outs RcSrc:$fd), (ins RcDst:$fs),
+ !strconcat(asmstr, " $fd, $fs"), []>;
+
+
+multiclass FFR1_4<bits<6> funct, string asmstr, SDNode FOp> {
+ def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd),
+ (ins FGR32:$fs, FGR32:$ft),
+ !strconcat(asmstr, ".s $fd, $fs, $ft"),
+ [(set FGR32:$fd, (FOp FGR32:$fs, FGR32:$ft))]>;
+
+ def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd),
+ (ins AFGR64:$fs, AFGR64:$ft),
+ !strconcat(asmstr, ".d $fd, $fs, $ft"),
+ [(set AFGR64:$fd, (FOp AFGR64:$fs, AFGR64:$ft))]>,
+ Requires<[In32BitMode]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating Point Instructions
+//===----------------------------------------------------------------------===//
+
+let ft = 0 in {
+ defm FLOOR_W : FFR1_1<0b001111, "floor.w">;
+ defm CEIL_W : FFR1_1<0b001110, "ceil.w">;
+ defm ROUND_W : FFR1_1<0b001100, "round.w">;
+ defm TRUNC_W : FFR1_1<0b001101, "trunc.w">;
+ defm CVTW : FFR1_1<0b100100, "cvt.w">;
+ defm FMOV : FFR1_1<0b000110, "mov">;
+
+ defm FABS : FFR1_2<0b000101, "abs", fabs>;
+ defm FNEG : FFR1_2<0b000111, "neg", fneg>;
+ defm FSQRT : FFR1_2<0b000100, "sqrt", fsqrt>;
+
+ /// Convert to Single Precison
+ def CVTS_W32 : FFR1_3<0b100000, 0x2, FGR32, FGR32, "cvt.s.w">;
+
+ let Predicates = [IsNotSingleFloat] in {
+ /// Ceil to long signed integer
+ def CEIL_LS : FFR1_3<0b001010, 0x0, FGR32, FGR32, "ceil.l">;
+ def CEIL_LD : FFR1_3<0b001010, 0x1, AFGR64, AFGR64, "ceil.l">;
+
+ /// Round to long signed integer
+ def ROUND_LS : FFR1_3<0b001000, 0x0, FGR32, FGR32, "round.l">;
+ def ROUND_LD : FFR1_3<0b001000, 0x1, AFGR64, AFGR64, "round.l">;
+
+ /// Floor to long signed integer
+ def FLOOR_LS : FFR1_3<0b001011, 0x0, FGR32, FGR32, "floor.l">;
+ def FLOOR_LD : FFR1_3<0b001011, 0x1, AFGR64, AFGR64, "floor.l">;
+
+ /// Trunc to long signed integer
+ def TRUNC_LS : FFR1_3<0b001001, 0x0, FGR32, FGR32, "trunc.l">;
+ def TRUNC_LD : FFR1_3<0b001001, 0x1, AFGR64, AFGR64, "trunc.l">;
+
+ /// Convert to long signed integer
+ def CVTL_S : FFR1_3<0b100101, 0x0, FGR32, FGR32, "cvt.l">;
+ def CVTL_D : FFR1_3<0b100101, 0x1, AFGR64, AFGR64, "cvt.l">;
+
+ /// Convert to Double Precison
+ def CVTD_S32 : FFR1_3<0b100001, 0x0, AFGR64, FGR32, "cvt.d.s">;
+ def CVTD_W32 : FFR1_3<0b100001, 0x2, AFGR64, FGR32, "cvt.d.w">;
+ def CVTD_L32 : FFR1_3<0b100001, 0x3, AFGR64, AFGR64, "cvt.d.l">;
+
+ /// Convert to Single Precison
+ def CVTS_D32 : FFR1_3<0b100000, 0x1, FGR32, AFGR64, "cvt.s.d">;
+ def CVTS_L32 : FFR1_3<0b100000, 0x3, FGR32, AFGR64, "cvt.s.l">;
+ }
+}
+
+// The odd-numbered registers are only referenced when doing loads,
+// stores, and moves between floating-point and integer registers.
+// When defining instructions, we reference all 32-bit registers,
+// regardless of register aliasing.
+let fd = 0 in {
+ /// Move Control Registers From/To CPU Registers
+ def CFC1 : FFR<0x11, 0x0, 0x2, (outs CPURegs:$rt), (ins CCR:$fs),
+ "cfc1 $rt, $fs", []>;
+
+ def CTC1 : FFR<0x11, 0x0, 0x6, (outs CCR:$rt), (ins CPURegs:$fs),
+ "ctc1 $fs, $rt", []>;
+
+ def MFC1 : FFR<0x11, 0x00, 0x00, (outs CPURegs:$rt), (ins FGR32:$fs),
+ "mfc1 $rt, $fs", []>;
+
+ def MTC1 : FFR<0x11, 0x00, 0x04, (outs FGR32:$fs), (ins CPURegs:$rt),
+ "mtc1 $rt, $fs", []>;
+}
+
+/// Floating Point Memory Instructions
+let Predicates = [IsNotSingleFloat] in {
+ def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr),
+ "ldc1 $ft, $addr", [(set AFGR64:$ft, (load addr:$addr))]>;
+
+ def SDC1 : FFI<0b111101, (outs), (ins AFGR64:$ft, mem:$addr),
+ "sdc1 $ft, $addr", [(store AFGR64:$ft, addr:$addr)]>;
+}
+
+// LWC1 and SWC1 can always be emited with odd registers.
+def LWC1 : FFI<0b110001, (outs FGR32:$ft), (ins mem:$addr), "lwc1 $ft, $addr",
+ [(set FGR32:$ft, (load addr:$addr))]>;
+def SWC1 : FFI<0b111001, (outs), (ins FGR32:$ft, mem:$addr), "swc1 $ft, $addr",
+ [(store FGR32:$ft, addr:$addr)]>;
+
+/// Floating-point Aritmetic
+defm FADD : FFR1_4<0x10, "add", fadd>;
+defm FDIV : FFR1_4<0x03, "div", fdiv>;
+defm FMUL : FFR1_4<0x02, "mul", fmul>;
+defm FSUB : FFR1_4<0x01, "sub", fsub>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Branch Codes
+//===----------------------------------------------------------------------===//
+// Mips branch codes. These correspond to condcode in MipsInstrInfo.h.
+// They must be kept in synch.
+def MIPS_BRANCH_F : PatLeaf<(i32 0)>;
+def MIPS_BRANCH_T : PatLeaf<(i32 1)>;
+def MIPS_BRANCH_FL : PatLeaf<(i32 2)>;
+def MIPS_BRANCH_TL : PatLeaf<(i32 3)>;
+
+/// Floating Point Branch of False/True (Likely)
+let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in {
+ class FBRANCH<PatLeaf op, string asmstr> : FFI<0x11, (outs),
+ (ins brtarget:$dst), !strconcat(asmstr, " $dst"),
+ [(MipsFPBrcond op, bb:$dst, FCR31)]>;
+}
+def BC1F : FBRANCH<MIPS_BRANCH_F, "bc1f">;
+def BC1T : FBRANCH<MIPS_BRANCH_T, "bc1t">;
+def BC1FL : FBRANCH<MIPS_BRANCH_FL, "bc1fl">;
+def BC1TL : FBRANCH<MIPS_BRANCH_TL, "bc1tl">;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Flag Conditions
+//===----------------------------------------------------------------------===//
+// Mips condition codes. They must correspond to condcode in MipsInstrInfo.h.
+// They must be kept in synch.
+def MIPS_FCOND_F : PatLeaf<(i32 0)>;
+def MIPS_FCOND_UN : PatLeaf<(i32 1)>;
+def MIPS_FCOND_EQ : PatLeaf<(i32 2)>;
+def MIPS_FCOND_UEQ : PatLeaf<(i32 3)>;
+def MIPS_FCOND_OLT : PatLeaf<(i32 4)>;
+def MIPS_FCOND_ULT : PatLeaf<(i32 5)>;
+def MIPS_FCOND_OLE : PatLeaf<(i32 6)>;
+def MIPS_FCOND_ULE : PatLeaf<(i32 7)>;
+def MIPS_FCOND_SF : PatLeaf<(i32 8)>;
+def MIPS_FCOND_NGLE : PatLeaf<(i32 9)>;
+def MIPS_FCOND_SEQ : PatLeaf<(i32 10)>;
+def MIPS_FCOND_NGL : PatLeaf<(i32 11)>;
+def MIPS_FCOND_LT : PatLeaf<(i32 12)>;
+def MIPS_FCOND_NGE : PatLeaf<(i32 13)>;
+def MIPS_FCOND_LE : PatLeaf<(i32 14)>;
+def MIPS_FCOND_NGT : PatLeaf<(i32 15)>;
+
+/// Floating Point Compare
+let hasDelaySlot = 1, Defs=[FCR31] in {
+ def FCMP_S32 : FCC<0x0, (outs), (ins FGR32:$fs, FGR32:$ft, condcode:$cc),
+ "c.$cc.s $fs, $ft", [(MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc),
+ (implicit FCR31)]>;
+
+ def FCMP_D32 : FCC<0x1, (outs), (ins AFGR64:$fs, AFGR64:$ft, condcode:$cc),
+ "c.$cc.d $fs, $ft", [(MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc),
+ (implicit FCR31)]>, Requires<[In32BitMode]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating Point Pseudo-Instructions
+//===----------------------------------------------------------------------===//
+
+// For some explanation, see Select_CC at MipsInstrInfo.td. We also embedd a
+// condiciton code to enable easy handling by the Custom Inserter.
+let usesCustomDAGSchedInserter = 1, Uses=[FCR31] in {
+ class PseudoFPSelCC<RegisterClass RC, string asmstr> :
+ MipsPseudo<(outs RC:$dst),
+ (ins CPURegs:$CmpRes, RC:$T, RC:$F, condcode:$cc), asmstr,
+ [(set RC:$dst, (MipsFPSelectCC CPURegs:$CmpRes, RC:$T, RC:$F,
+ imm:$cc))]>;
+}
+
+// The values to be selected are fp but the condition test is with integers.
+def Select_CC_S32 : PseudoSelCC<FGR32, "# MipsSelect_CC_S32_f32">;
+def Select_CC_D32 : PseudoSelCC<AFGR64, "# MipsSelect_CC_D32_f32">,
+ Requires<[In32BitMode]>;
+
+// The values to be selected are int but the condition test is done with fp.
+def Select_FCC : PseudoFPSelCC<CPURegs, "# MipsSelect_FCC">;
+
+// The values to be selected and the condition test is done with fp.
+def Select_FCC_S32 : PseudoFPSelCC<FGR32, "# MipsSelect_FCC_S32_f32">;
+def Select_FCC_D32 : PseudoFPSelCC<AFGR64, "# MipsSelect_FCC_D32_f32">,
+ Requires<[In32BitMode]>;
+
+def MOVCCRToCCR : MipsPseudo<(outs CCR:$dst), (ins CCR:$src),
+ "# MOVCCRToCCR", []>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Patterns
+//===----------------------------------------------------------------------===//
+def fpimm0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
+def : Pat<(f32 fpimm0), (MTC1 ZERO)>;
+
+def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVTS_W32 (MTC1 CPURegs:$src))>;
+def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVTD_W32 (MTC1 CPURegs:$src))>;
+
+def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S32 FGR32:$src))>;
+
+def : Pat<(i32 (bitconvert FGR32:$src)), (MFC1 FGR32:$src)>;
+def : Pat<(f32 (bitconvert CPURegs:$src)), (MTC1 CPURegs:$src)>;
+
+let Predicates = [In32BitMode] in {
+ def : Pat<(f32 (fround AFGR64:$src)), (CVTS_D32 AFGR64:$src)>;
+ def : Pat<(f64 (fextend FGR32:$src)), (CVTD_S32 FGR32:$src)>;
+}
+
+// MipsFPRound is only emitted for MipsI targets.
+def : Pat<(f32 (MipsFPRound AFGR64:$src)), (CVTW_D32 AFGR64:$src)>;
+
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
new file mode 100644
index 0000000..0853272
--- /dev/null
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -0,0 +1,182 @@
+//===- MipsRegisterInfo.td - Mips Register defs -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe MIPS instructions format
+//
+// CPU INSTRUCTION FORMATS
+//
+// opcode - operation code.
+// rs - src reg.
+// rt - dst reg (on a 2 regs instr) or src reg (on a 3 reg instr).
+// rd - dst reg, only used on 3 regs instr.
+// shamt - only used on shift instructions, contains the shift amount.
+// funct - combined with opcode field give us an operation code.
+//
+//===----------------------------------------------------------------------===//
+
+// Generic Mips Format
+class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin>: Instruction
+{
+ field bits<32> Inst;
+
+ let Namespace = "Mips";
+
+ bits<6> opcode;
+
+ // Top 5 bits are the 'opcode' field
+ let Inst{31-26} = opcode;
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Itinerary = itin;
+}
+
+// Mips Pseudo Instructions Format
+class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
+ MipsInst<outs, ins, asmstr, pattern, IIPseudo>;
+
+//===----------------------------------------------------------------------===//
+// Format R instruction class in Mips : <|opcode|rs|rt|rd|shamt|funct|>
+//===----------------------------------------------------------------------===//
+
+class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<5> shamt;
+ bits<6> funct;
+
+ let opcode = op;
+ let funct = _funct;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = shamt;
+ let Inst{5-0} = funct;
+}
+
+//===----------------------------------------------------------------------===//
+// Format I instruction class in Mips : <|opcode|rs|rt|immediate|>
+//===----------------------------------------------------------------------===//
+
+class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rt;
+ bits<5> rs;
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = imm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Format J instruction class in Mips : <|opcode|address|>
+//===----------------------------------------------------------------------===//
+
+class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<26> addr;
+
+ let opcode = op;
+
+ let Inst{25-0} = addr;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// FLOATING POINT INSTRUCTION FORMATS
+//
+// opcode - operation code.
+// fs - src reg.
+// ft - dst reg (on a 2 regs instr) or src reg (on a 3 reg instr).
+// fd - dst reg, only used on 3 regs instr.
+// fmt - double or single precision.
+// funct - combined with opcode field give us an operation code.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Format FR instruction class in Mips : <|opcode|fmt|ft|fs|fd|funct|>
+//===----------------------------------------------------------------------===//
+
+class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
+ string asmstr, list<dag> pattern> :
+ MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+ bits<5> fd;
+ bits<5> fs;
+ bits<5> ft;
+ bits<5> fmt;
+ bits<6> funct;
+
+ let opcode = op;
+ let funct = _funct;
+ let fmt = _fmt;
+
+ let Inst{25-21} = fmt;
+ let Inst{20-16} = ft;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = fd;
+ let Inst{5-0} = funct;
+}
+
+//===----------------------------------------------------------------------===//
+// Format FI instruction class in Mips : <|opcode|base|ft|immediate|>
+//===----------------------------------------------------------------------===//
+
+class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
+ MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+ bits<5> ft;
+ bits<5> base;
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{25-21} = base;
+ let Inst{20-16} = ft;
+ let Inst{15-0} = imm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Compare instruction class in Mips : <|010001|fmt|ft|fs|0000011|condcode|>
+//===----------------------------------------------------------------------===//
+
+class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> :
+ MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+ bits<5> fs;
+ bits<5> ft;
+ bits<4> cc;
+ bits<5> fmt;
+
+ let opcode = 0x11;
+ let fmt = _fmt;
+
+ let Inst{25-21} = fmt;
+ let Inst{20-16} = ft;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = 0;
+ let Inst{5-4} = 0b11;
+ let Inst{3-0} = cc;
+}
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
new file mode 100644
index 0000000..6225fa9
--- /dev/null
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -0,0 +1,623 @@
+//===- MipsInstrInfo.cpp - Mips Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsInstrInfo.h"
+#include "MipsTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "MipsGenInstrInfo.inc"
+
+using namespace llvm;
+
+MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm)
+ : TargetInstrInfoImpl(MipsInsts, array_lengthof(MipsInsts)),
+ TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
+
+static bool isZeroImm(const MachineOperand &op) {
+ return op.isImm() && op.getImm() == 0;
+}
+
+/// Return true if the instruction is a register to register move and
+/// leave the source and dest operands in the passed parameters.
+bool MipsInstrInfo::
+isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const
+{
+ SrcSubIdx = DstSubIdx = 0; // No sub-registers.
+
+ // addu $dst, $src, $zero || addu $dst, $zero, $src
+ // or $dst, $src, $zero || or $dst, $zero, $src
+ if ((MI.getOpcode() == Mips::ADDu) || (MI.getOpcode() == Mips::OR)) {
+ if (MI.getOperand(1).getReg() == Mips::ZERO) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(2).getReg();
+ return true;
+ } else if (MI.getOperand(2).getReg() == Mips::ZERO) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ }
+ }
+
+ // mov $fpDst, $fpSrc
+ // mfc $gpDst, $fpSrc
+ // mtc $fpDst, $gpSrc
+ if (MI.getOpcode() == Mips::FMOV_S32 ||
+ MI.getOpcode() == Mips::FMOV_D32 ||
+ MI.getOpcode() == Mips::MFC1 ||
+ MI.getOpcode() == Mips::MTC1 ||
+ MI.getOpcode() == Mips::MOVCCRToCCR) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ }
+
+ // addiu $dst, $src, 0
+ if (MI.getOpcode() == Mips::ADDiu) {
+ if ((MI.getOperand(1).isReg()) && (isZeroImm(MI.getOperand(2)))) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned MipsInstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ if ((MI->getOpcode() == Mips::LW) || (MI->getOpcode() == Mips::LWC1) ||
+ (MI->getOpcode() == Mips::LDC1)) {
+ if ((MI->getOperand(2).isFI()) && // is a stack slot
+ (MI->getOperand(1).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(1)))) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned MipsInstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ if ((MI->getOpcode() == Mips::SW) || (MI->getOpcode() == Mips::SWC1) ||
+ (MI->getOpcode() == Mips::SDC1)) {
+ if ((MI->getOperand(2).isFI()) && // is a stack slot
+ (MI->getOperand(1).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(1)))) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+/// insertNoop - If data hazard condition is found insert the target nop
+/// instruction.
+void MipsInstrInfo::
+insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const
+{
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ BuildMI(MBB, MI, DL, get(Mips::NOP));
+}
+
+bool MipsInstrInfo::
+copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (DestRC != SrcRC) {
+
+ // Copy to/from FCR31 condition register
+ if ((DestRC == Mips::CPURegsRegisterClass) &&
+ (SrcRC == Mips::CCRRegisterClass))
+ BuildMI(MBB, I, DL, get(Mips::CFC1), DestReg).addReg(SrcReg);
+ else if ((DestRC == Mips::CCRRegisterClass) &&
+ (SrcRC == Mips::CPURegsRegisterClass))
+ BuildMI(MBB, I, DL, get(Mips::CTC1), DestReg).addReg(SrcReg);
+
+ // Moves between coprocessors and cpu
+ else if ((DestRC == Mips::CPURegsRegisterClass) &&
+ (SrcRC == Mips::FGR32RegisterClass))
+ BuildMI(MBB, I, DL, get(Mips::MFC1), DestReg).addReg(SrcReg);
+ else if ((DestRC == Mips::FGR32RegisterClass) &&
+ (SrcRC == Mips::CPURegsRegisterClass))
+ BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg).addReg(SrcReg);
+
+ // Move from/to Hi/Lo registers
+ else if ((DestRC == Mips::HILORegisterClass) &&
+ (SrcRC == Mips::CPURegsRegisterClass)) {
+ unsigned Opc = (DestReg == Mips::HI) ? Mips::MTHI : Mips::MTLO;
+ BuildMI(MBB, I, DL, get(Opc), DestReg);
+ } else if ((SrcRC == Mips::HILORegisterClass) &&
+ (DestRC == Mips::CPURegsRegisterClass)) {
+ unsigned Opc = (SrcReg == Mips::HI) ? Mips::MFHI : Mips::MFLO;
+ BuildMI(MBB, I, DL, get(Opc), DestReg);
+
+ // Can't copy this register
+ } else
+ return false;
+
+ return true;
+ }
+
+ if (DestRC == Mips::CPURegsRegisterClass)
+ BuildMI(MBB, I, DL, get(Mips::ADDu), DestReg).addReg(Mips::ZERO)
+ .addReg(SrcReg);
+ else if (DestRC == Mips::FGR32RegisterClass)
+ BuildMI(MBB, I, DL, get(Mips::FMOV_S32), DestReg).addReg(SrcReg);
+ else if (DestRC == Mips::AFGR64RegisterClass)
+ BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg).addReg(SrcReg);
+ else if (DestRC == Mips::CCRRegisterClass)
+ BuildMI(MBB, I, DL, get(Mips::MOVCCRToCCR), DestReg).addReg(SrcReg);
+ else
+ // Can't copy this register
+ return false;
+
+ return true;
+}
+
+void MipsInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC) const {
+ unsigned Opc;
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (RC == Mips::CPURegsRegisterClass)
+ Opc = Mips::SW;
+ else if (RC == Mips::FGR32RegisterClass)
+ Opc = Mips::SWC1;
+ else {
+ assert(RC == Mips::AFGR64RegisterClass);
+ Opc = Mips::SDC1;
+ }
+
+ BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
+ .addImm(0).addFrameIndex(FI);
+}
+
+void MipsInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+ bool isKill, SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr*> &NewMIs) const
+{
+ unsigned Opc;
+ if (RC == Mips::CPURegsRegisterClass)
+ Opc = Mips::SW;
+ else if (RC == Mips::FGR32RegisterClass)
+ Opc = Mips::SWC1;
+ else {
+ assert(RC == Mips::AFGR64RegisterClass);
+ Opc = Mips::SDC1;
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc))
+ .addReg(SrcReg, getKillRegState(isKill));
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+ return;
+}
+
+void MipsInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC) const
+{
+ unsigned Opc;
+ if (RC == Mips::CPURegsRegisterClass)
+ Opc = Mips::LW;
+ else if (RC == Mips::FGR32RegisterClass)
+ Opc = Mips::LWC1;
+ else {
+ assert(RC == Mips::AFGR64RegisterClass);
+ Opc = Mips::LDC1;
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0).addFrameIndex(FI);
+}
+
+void MipsInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ unsigned Opc;
+ if (RC == Mips::CPURegsRegisterClass)
+ Opc = Mips::LW;
+ else if (RC == Mips::FGR32RegisterClass)
+ Opc = Mips::LWC1;
+ else {
+ assert(RC == Mips::AFGR64RegisterClass);
+ Opc = Mips::LDC1;
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+ return;
+}
+
+MachineInstr *MipsInstrInfo::
+foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops, int FI) const
+{
+ if (Ops.size() != 1) return NULL;
+
+ MachineInstr *NewMI = NULL;
+
+ switch (MI->getOpcode()) {
+ case Mips::ADDu:
+ if ((MI->getOperand(0).isReg()) &&
+ (MI->getOperand(1).isReg()) &&
+ (MI->getOperand(1).getReg() == Mips::ZERO) &&
+ (MI->getOperand(2).isReg())) {
+ if (Ops[0] == 0) { // COPY -> STORE
+ unsigned SrcReg = MI->getOperand(2).getReg();
+ bool isKill = MI->getOperand(2).isKill();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(Mips::SW))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addImm(0).addFrameIndex(FI);
+ } else { // COPY -> LOAD
+ unsigned DstReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(Mips::LW))
+ .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+ .addImm(0).addFrameIndex(FI);
+ }
+ }
+ break;
+ case Mips::FMOV_S32:
+ case Mips::FMOV_D32:
+ if ((MI->getOperand(0).isReg()) &&
+ (MI->getOperand(1).isReg())) {
+ const TargetRegisterClass
+ *RC = RI.getRegClass(MI->getOperand(0).getReg());
+ unsigned StoreOpc, LoadOpc;
+
+ if (RC == Mips::FGR32RegisterClass) {
+ LoadOpc = Mips::LWC1; StoreOpc = Mips::SWC1;
+ } else {
+ assert(RC == Mips::AFGR64RegisterClass);
+ LoadOpc = Mips::LDC1; StoreOpc = Mips::SDC1;
+ }
+
+ if (Ops[0] == 0) { // COPY -> STORE
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(StoreOpc))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addImm(0).addFrameIndex(FI) ;
+ } else { // COPY -> LOAD
+ unsigned DstReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(LoadOpc))
+ .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+ .addImm(0).addFrameIndex(FI);
+ }
+ }
+ break;
+ }
+
+ return NewMI;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Analysis
+//===----------------------------------------------------------------------===//
+
+/// GetCondFromBranchOpc - Return the Mips CC that matches
+/// the correspondent Branch instruction opcode.
+static Mips::CondCode GetCondFromBranchOpc(unsigned BrOpc)
+{
+ switch (BrOpc) {
+ default: return Mips::COND_INVALID;
+ case Mips::BEQ : return Mips::COND_E;
+ case Mips::BNE : return Mips::COND_NE;
+ case Mips::BGTZ : return Mips::COND_GZ;
+ case Mips::BGEZ : return Mips::COND_GEZ;
+ case Mips::BLTZ : return Mips::COND_LZ;
+ case Mips::BLEZ : return Mips::COND_LEZ;
+
+ // We dont do fp branch analysis yet!
+ case Mips::BC1T :
+ case Mips::BC1F : return Mips::COND_INVALID;
+ }
+}
+
+/// GetCondBranchFromCond - Return the Branch instruction
+/// opcode that matches the cc.
+unsigned Mips::GetCondBranchFromCond(Mips::CondCode CC)
+{
+ switch (CC) {
+ default: assert(0 && "Illegal condition code!");
+ case Mips::COND_E : return Mips::BEQ;
+ case Mips::COND_NE : return Mips::BNE;
+ case Mips::COND_GZ : return Mips::BGTZ;
+ case Mips::COND_GEZ : return Mips::BGEZ;
+ case Mips::COND_LZ : return Mips::BLTZ;
+ case Mips::COND_LEZ : return Mips::BLEZ;
+
+ case Mips::FCOND_F:
+ case Mips::FCOND_UN:
+ case Mips::FCOND_EQ:
+ case Mips::FCOND_UEQ:
+ case Mips::FCOND_OLT:
+ case Mips::FCOND_ULT:
+ case Mips::FCOND_OLE:
+ case Mips::FCOND_ULE:
+ case Mips::FCOND_SF:
+ case Mips::FCOND_NGLE:
+ case Mips::FCOND_SEQ:
+ case Mips::FCOND_NGL:
+ case Mips::FCOND_LT:
+ case Mips::FCOND_NGE:
+ case Mips::FCOND_LE:
+ case Mips::FCOND_NGT: return Mips::BC1T;
+
+ case Mips::FCOND_T:
+ case Mips::FCOND_OR:
+ case Mips::FCOND_NEQ:
+ case Mips::FCOND_OGL:
+ case Mips::FCOND_UGE:
+ case Mips::FCOND_OGE:
+ case Mips::FCOND_UGT:
+ case Mips::FCOND_OGT:
+ case Mips::FCOND_ST:
+ case Mips::FCOND_GLE:
+ case Mips::FCOND_SNE:
+ case Mips::FCOND_GL:
+ case Mips::FCOND_NLT:
+ case Mips::FCOND_GE:
+ case Mips::FCOND_NLE:
+ case Mips::FCOND_GT: return Mips::BC1F;
+ }
+}
+
+/// GetOppositeBranchCondition - Return the inverse of the specified
+/// condition, e.g. turning COND_E to COND_NE.
+Mips::CondCode Mips::GetOppositeBranchCondition(Mips::CondCode CC)
+{
+ switch (CC) {
+ default: assert(0 && "Illegal condition code!");
+ case Mips::COND_E : return Mips::COND_NE;
+ case Mips::COND_NE : return Mips::COND_E;
+ case Mips::COND_GZ : return Mips::COND_LEZ;
+ case Mips::COND_GEZ : return Mips::COND_LZ;
+ case Mips::COND_LZ : return Mips::COND_GEZ;
+ case Mips::COND_LEZ : return Mips::COND_GZ;
+ case Mips::FCOND_F : return Mips::FCOND_T;
+ case Mips::FCOND_UN : return Mips::FCOND_OR;
+ case Mips::FCOND_EQ : return Mips::FCOND_NEQ;
+ case Mips::FCOND_UEQ: return Mips::FCOND_OGL;
+ case Mips::FCOND_OLT: return Mips::FCOND_UGE;
+ case Mips::FCOND_ULT: return Mips::FCOND_OGE;
+ case Mips::FCOND_OLE: return Mips::FCOND_UGT;
+ case Mips::FCOND_ULE: return Mips::FCOND_OGT;
+ case Mips::FCOND_SF: return Mips::FCOND_ST;
+ case Mips::FCOND_NGLE:return Mips::FCOND_GLE;
+ case Mips::FCOND_SEQ: return Mips::FCOND_SNE;
+ case Mips::FCOND_NGL: return Mips::FCOND_GL;
+ case Mips::FCOND_LT: return Mips::FCOND_NLT;
+ case Mips::FCOND_NGE: return Mips::FCOND_GE;
+ case Mips::FCOND_LE: return Mips::FCOND_NLE;
+ case Mips::FCOND_NGT: return Mips::FCOND_GT;
+ }
+}
+
+bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const
+{
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ unsigned LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (!LastInst->getDesc().isBranch())
+ return true;
+
+ // Unconditional branch
+ if (LastOpc == Mips::J) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ Mips::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode());
+ if (BranchCode == Mips::COND_INVALID)
+ return true; // Can't handle indirect branch.
+
+ // Conditional branch
+ // Block ends with fall-through condbranch.
+ if (LastOpc != Mips::COND_INVALID) {
+ int LastNumOp = LastInst->getNumOperands();
+
+ TBB = LastInst->getOperand(LastNumOp-1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+
+ for (int i=0; i<LastNumOp-1; i++) {
+ Cond.push_back(LastInst->getOperand(i));
+ }
+
+ return false;
+ }
+ }
+
+ // Get the instruction before it if it is a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with Mips::J and a Mips::BNE/Mips::BEQ, handle it.
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+ Mips::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc);
+
+ if (BranchCode != Mips::COND_INVALID && LastOpc == Mips::J) {
+ int SecondNumOp = SecondLastInst->getNumOperands();
+
+ TBB = SecondLastInst->getOperand(SecondNumOp-1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+
+ for (int i=0; i<SecondNumOp-1; i++) {
+ Cond.push_back(SecondLastInst->getOperand(i));
+ }
+
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it. The last
+ // one is not executed, so remove it.
+ if ((SecondLastOpc == Mips::J) && (LastOpc == Mips::J)) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned MipsInstrInfo::
+InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const {
+ // FIXME this should probably have a DebugLoc argument
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 3 || Cond.size() == 2 || Cond.size() == 0) &&
+ "Mips branch conditions can have two|three components!");
+
+ if (FBB == 0) { // One way branch.
+ if (Cond.empty()) {
+ // Unconditional branch?
+ BuildMI(&MBB, dl, get(Mips::J)).addMBB(TBB);
+ } else {
+ // Conditional branch.
+ unsigned Opc = GetCondBranchFromCond((Mips::CondCode)Cond[0].getImm());
+ const TargetInstrDesc &TID = get(Opc);
+
+ if (TID.getNumOperands() == 3)
+ BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg())
+ .addReg(Cond[2].getReg())
+ .addMBB(TBB);
+ else
+ BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg())
+ .addMBB(TBB);
+
+ }
+ return 1;
+ }
+
+ // Two-way Conditional branch.
+ unsigned Opc = GetCondBranchFromCond((Mips::CondCode)Cond[0].getImm());
+ const TargetInstrDesc &TID = get(Opc);
+
+ if (TID.getNumOperands() == 3)
+ BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()).addReg(Cond[2].getReg())
+ .addMBB(TBB);
+ else
+ BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()).addMBB(TBB);
+
+ BuildMI(&MBB, dl, get(Mips::J)).addMBB(FBB);
+ return 2;
+}
+
+unsigned MipsInstrInfo::
+RemoveBranch(MachineBasicBlock &MBB) const
+{
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ if (I->getOpcode() != Mips::J &&
+ GetCondFromBranchOpc(I->getOpcode()) == Mips::COND_INVALID)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (GetCondFromBranchOpc(I->getOpcode()) == Mips::COND_INVALID)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+/// BlockHasNoFallThrough - Analyze if MachineBasicBlock does not
+/// fall-through into its successor block.
+bool MipsInstrInfo::
+BlockHasNoFallThrough(const MachineBasicBlock &MBB) const
+{
+ if (MBB.empty()) return false;
+
+ switch (MBB.back().getOpcode()) {
+ case Mips::RET: // Return.
+ case Mips::JR: // Indirect branch.
+ case Mips::J: // Uncond branch.
+ return true;
+ default: return false;
+ }
+}
+
+/// ReverseBranchCondition - Return the inverse opcode of the
+/// specified Branch instruction.
+bool MipsInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
+{
+ assert( (Cond.size() == 3 || Cond.size() == 2) &&
+ "Invalid Mips branch condition!");
+ Cond[0].setImm(GetOppositeBranchCondition((Mips::CondCode)Cond[0].getImm()));
+ return false;
+}
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
new file mode 100644
index 0000000..334244e
--- /dev/null
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -0,0 +1,223 @@
+//===- MipsInstrInfo.h - Mips Instruction Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSINSTRUCTIONINFO_H
+#define MIPSINSTRUCTIONINFO_H
+
+#include "Mips.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "MipsRegisterInfo.h"
+
+namespace llvm {
+
+namespace Mips {
+
+ // Mips Branch Codes
+ enum FPBranchCode {
+ BRANCH_F,
+ BRANCH_T,
+ BRANCH_FL,
+ BRANCH_TL,
+ BRANCH_INVALID
+ };
+
+ // Mips Condition Codes
+ enum CondCode {
+ // To be used with float branch True
+ FCOND_F,
+ FCOND_UN,
+ FCOND_EQ,
+ FCOND_UEQ,
+ FCOND_OLT,
+ FCOND_ULT,
+ FCOND_OLE,
+ FCOND_ULE,
+ FCOND_SF,
+ FCOND_NGLE,
+ FCOND_SEQ,
+ FCOND_NGL,
+ FCOND_LT,
+ FCOND_NGE,
+ FCOND_LE,
+ FCOND_NGT,
+
+ // To be used with float branch False
+ // This conditions have the same mnemonic as the
+ // above ones, but are used with a branch False;
+ FCOND_T,
+ FCOND_OR,
+ FCOND_NEQ,
+ FCOND_OGL,
+ FCOND_UGE,
+ FCOND_OGE,
+ FCOND_UGT,
+ FCOND_OGT,
+ FCOND_ST,
+ FCOND_GLE,
+ FCOND_SNE,
+ FCOND_GL,
+ FCOND_NLT,
+ FCOND_GE,
+ FCOND_NLE,
+ FCOND_GT,
+
+ // Only integer conditions
+ COND_E,
+ COND_GZ,
+ COND_GEZ,
+ COND_LZ,
+ COND_LEZ,
+ COND_NE,
+ COND_INVALID
+ };
+
+ // Turn condition code into conditional branch opcode.
+ unsigned GetCondBranchFromCond(CondCode CC);
+
+ /// GetOppositeBranchCondition - Return the inverse of the specified cond,
+ /// e.g. turning COND_E to COND_NE.
+ CondCode GetOppositeBranchCondition(Mips::CondCode CC);
+
+ /// MipsCCToString - Map each FP condition code to its string
+ inline static const char *MipsFCCToString(Mips::CondCode CC)
+ {
+ switch (CC) {
+ default: assert(0 && "Unknown condition code");
+ case FCOND_F:
+ case FCOND_T: return "f";
+ case FCOND_UN:
+ case FCOND_OR: return "un";
+ case FCOND_EQ:
+ case FCOND_NEQ: return "eq";
+ case FCOND_UEQ:
+ case FCOND_OGL: return "ueq";
+ case FCOND_OLT:
+ case FCOND_UGE: return "olt";
+ case FCOND_ULT:
+ case FCOND_OGE: return "ult";
+ case FCOND_OLE:
+ case FCOND_UGT: return "ole";
+ case FCOND_ULE:
+ case FCOND_OGT: return "ule";
+ case FCOND_SF:
+ case FCOND_ST: return "sf";
+ case FCOND_NGLE:
+ case FCOND_GLE: return "ngle";
+ case FCOND_SEQ:
+ case FCOND_SNE: return "seq";
+ case FCOND_NGL:
+ case FCOND_GL: return "ngl";
+ case FCOND_LT:
+ case FCOND_NLT: return "lt";
+ case FCOND_NGE:
+ case FCOND_GE: return "ge";
+ case FCOND_LE:
+ case FCOND_NLE: return "nle";
+ case FCOND_NGT:
+ case FCOND_GT: return "gt";
+ }
+ }
+}
+
+class MipsInstrInfo : public TargetInstrInfoImpl {
+ MipsTargetMachine &TM;
+ const MipsRegisterInfo RI;
+public:
+ explicit MipsInstrInfo(MipsTargetMachine &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const MipsRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// Return true if the instruction is a register to register move and return
+ /// the source and dest operands and their sub-register indices by reference.
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// Branch Analysis
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+
+ virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ /// Insert nop instruction when hazard condition is found
+ virtual void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
new file mode 100644
index 0000000..b9276fe
--- /dev/null
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -0,0 +1,707 @@
+//===- MipsInstrInfo.td - Mips Register defs --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "MipsInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Mips profiles and nodes
+//===----------------------------------------------------------------------===//
+
+def SDT_MipsRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_MipsJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
+def SDT_MipsSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<2, 3>, SDTCisInt<1>]>;
+def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>, SDTCisSameAs<3, 4>,
+ SDTCisInt<4>]>;
+def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_MipsCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+// Call
+def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink, [SDNPHasChain,
+ SDNPOutFlag]>;
+
+// Hi and Lo nodes are used to handle global addresses. Used on
+// MipsISelLowering to lower stuff like GlobalAddress, ExternalSymbol
+// static model. (nothing to do with Mips Registers Hi and Lo)
+def MipsHi : SDNode<"MipsISD::Hi", SDTIntUnaryOp>;
+def MipsLo : SDNode<"MipsISD::Lo", SDTIntUnaryOp>;
+def MipsGPRel : SDNode<"MipsISD::GPRel", SDTIntUnaryOp>;
+
+// Return
+def MipsRet : SDNode<"MipsISD::Ret", SDT_MipsRet, [SDNPHasChain,
+ SDNPOptInFlag]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart,
+ [SDNPHasChain, SDNPOutFlag]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+// Select Condition Code
+def MipsSelectCC : SDNode<"MipsISD::SelectCC", SDT_MipsSelectCC>;
+
+// Conditional Move
+def MipsCMov : SDNode<"MipsISD::CMov", SDT_MipsCMov>;
+
+//===----------------------------------------------------------------------===//
+// Mips Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+def HasSEInReg : Predicate<"Subtarget.hasSEInReg()">;
+def HasBitCount : Predicate<"Subtarget.hasBitCount()">;
+def HasSwap : Predicate<"Subtarget.hasSwap()">;
+def HasCondMov : Predicate<"Subtarget.hasCondMov()">;
+
+//===----------------------------------------------------------------------===//
+// Mips Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+// Instruction operand types
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+def simm16 : Operand<i32>;
+def shamt : Operand<i32>;
+
+// Unsigned Operand
+def uimm16 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+}
+
+// Address operand
+def mem : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops simm16, CPURegs);
+}
+
+// Transformation Function - get the lower 16 bits.
+def LO16 : SDNodeXForm<imm, [{
+ return getI32Imm((unsigned)N->getZExtValue() & 0xFFFF);
+}]>;
+
+// Transformation Function - get the higher 16 bits.
+def HI16 : SDNodeXForm<imm, [{
+ return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+// Node immediate fits as 16-bit sign extended on target immediate.
+// e.g. addi, andi
+def immSExt16 : PatLeaf<(imm), [{
+ if (N->getValueType(0) == MVT::i32)
+ return (int32_t)N->getZExtValue() == (short)N->getZExtValue();
+ else
+ return (int64_t)N->getZExtValue() == (short)N->getZExtValue();
+}]>;
+
+// Node immediate fits as 16-bit zero extended on target immediate.
+// The LO16 param means that only the lower 16 bits of the node
+// immediate are caught.
+// e.g. addiu, sltiu
+def immZExt16 : PatLeaf<(imm), [{
+ if (N->getValueType(0) == MVT::i32)
+ return (uint32_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+ else
+ return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+}], LO16>;
+
+// shamt field must fit in 5 bits.
+def immZExt5 : PatLeaf<(imm), [{
+ return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ;
+}]>;
+
+// Mips Address Mode! SDNode frameindex could possibily be a match
+// since load and store instructions from stack used it.
+def addr : ComplexPattern<i32, 2, "SelectAddr", [frameindex], []>;
+
+//===----------------------------------------------------------------------===//
+// Instructions specific format
+//===----------------------------------------------------------------------===//
+
+// Arithmetic 3 register operands
+let isCommutable = 1 in
+class ArithR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode,
+ InstrItinClass itin>:
+ FR< op,
+ func,
+ (outs CPURegs:$dst),
+ (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>;
+
+let isCommutable = 1 in
+class ArithOverflowR<bits<6> op, bits<6> func, string instr_asm>:
+ FR< op,
+ func,
+ (outs CPURegs:$dst),
+ (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [], IIAlu>;
+
+// Arithmetic 2 register operands
+class ArithI<bits<6> op, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type> :
+ FI< op,
+ (outs CPURegs:$dst),
+ (ins CPURegs:$b, Od:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>;
+
+class ArithOverflowI<bits<6> op, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type> :
+ FI< op,
+ (outs CPURegs:$dst),
+ (ins CPURegs:$b, Od:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [], IIAlu>;
+
+// Arithmetic Multiply ADD/SUB
+let rd=0 in
+class MArithR<bits<6> func, string instr_asm> :
+ FR< 0x1c,
+ func,
+ (outs CPURegs:$rs),
+ (ins CPURegs:$rt),
+ !strconcat(instr_asm, "\t$rs, $rt"),
+ [], IIImul>;
+
+// Logical
+class LogicR<bits<6> func, string instr_asm, SDNode OpNode>:
+ FR< 0x00,
+ func,
+ (outs CPURegs:$dst),
+ (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>;
+
+class LogicI<bits<6> op, string instr_asm, SDNode OpNode>:
+ FI< op,
+ (outs CPURegs:$dst),
+ (ins CPURegs:$b, uimm16:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))], IIAlu>;
+
+class LogicNOR<bits<6> op, bits<6> func, string instr_asm>:
+ FR< op,
+ func,
+ (outs CPURegs:$dst),
+ (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (not (or CPURegs:$b, CPURegs:$c)))], IIAlu>;
+
+// Shifts
+let rt = 0 in
+class LogicR_shift_imm<bits<6> func, string instr_asm, SDNode OpNode>:
+ FR< 0x00,
+ func,
+ (outs CPURegs:$dst),
+ (ins CPURegs:$b, shamt:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt5:$c))], IIAlu>;
+
+class LogicR_shift_reg<bits<6> func, string instr_asm, SDNode OpNode>:
+ FR< 0x00,
+ func,
+ (outs CPURegs:$dst),
+ (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>;
+
+// Load Upper Imediate
+class LoadUpper<bits<6> op, string instr_asm>:
+ FI< op,
+ (outs CPURegs:$dst),
+ (ins uimm16:$imm),
+ !strconcat(instr_asm, "\t$dst, $imm"),
+ [], IIAlu>;
+
+// Memory Load/Store
+let canFoldAsLoad = 1, hasDelaySlot = 1 in
+class LoadM<bits<6> op, string instr_asm, PatFrag OpNode>:
+ FI< op,
+ (outs CPURegs:$dst),
+ (ins mem:$addr),
+ !strconcat(instr_asm, "\t$dst, $addr"),
+ [(set CPURegs:$dst, (OpNode addr:$addr))], IILoad>;
+
+class StoreM<bits<6> op, string instr_asm, PatFrag OpNode>:
+ FI< op,
+ (outs),
+ (ins CPURegs:$dst, mem:$addr),
+ !strconcat(instr_asm, "\t$dst, $addr"),
+ [(OpNode CPURegs:$dst, addr:$addr)], IIStore>;
+
+// Conditional Branch
+let isBranch = 1, isTerminator=1, hasDelaySlot = 1 in {
+class CBranch<bits<6> op, string instr_asm, PatFrag cond_op>:
+ FI< op,
+ (outs),
+ (ins CPURegs:$a, CPURegs:$b, brtarget:$offset),
+ !strconcat(instr_asm, "\t$a, $b, $offset"),
+ [(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)],
+ IIBranch>;
+
+
+class CBranchZero<bits<6> op, string instr_asm, PatFrag cond_op>:
+ FI< op,
+ (outs),
+ (ins CPURegs:$src, brtarget:$offset),
+ !strconcat(instr_asm, "\t$src, $offset"),
+ [(brcond (cond_op CPURegs:$src, 0), bb:$offset)],
+ IIBranch>;
+}
+
+// SetCC
+class SetCC_R<bits<6> op, bits<6> func, string instr_asm,
+ PatFrag cond_op>:
+ FR< op,
+ func,
+ (outs CPURegs:$dst),
+ (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (cond_op CPURegs:$b, CPURegs:$c))],
+ IIAlu>;
+
+class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op,
+ Operand Od, PatLeaf imm_type>:
+ FI< op,
+ (outs CPURegs:$dst),
+ (ins CPURegs:$b, Od:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (cond_op CPURegs:$b, imm_type:$c))],
+ IIAlu>;
+
+// Unconditional branch
+let isBranch=1, isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
+class JumpFJ<bits<6> op, string instr_asm>:
+ FJ< op,
+ (outs),
+ (ins brtarget:$target),
+ !strconcat(instr_asm, "\t$target"),
+ [(br bb:$target)], IIBranch>;
+
+let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1 in
+class JumpFR<bits<6> op, bits<6> func, string instr_asm>:
+ FR< op,
+ func,
+ (outs),
+ (ins CPURegs:$target),
+ !strconcat(instr_asm, "\t$target"),
+ [(brind CPURegs:$target)], IIBranch>;
+
+// Jump and Link (Call)
+let isCall=1, hasDelaySlot=1,
+ // All calls clobber the non-callee saved registers...
+ Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
+ K0, K1, F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13,
+ F14, F15, F16, F17, F18, F19], Uses = [GP] in {
+ class JumpLink<bits<6> op, string instr_asm>:
+ FJ< op,
+ (outs),
+ (ins calltarget:$target),
+ !strconcat(instr_asm, "\t$target"),
+ [(MipsJmpLink imm:$target)], IIBranch>;
+
+ let rd=31 in
+ class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm>:
+ FR< op,
+ func,
+ (outs),
+ (ins CPURegs:$rs),
+ !strconcat(instr_asm, "\t$rs"),
+ [(MipsJmpLink CPURegs:$rs)], IIBranch>;
+
+ class BranchLink<string instr_asm>:
+ FI< 0x1,
+ (outs),
+ (ins CPURegs:$rs, brtarget:$target),
+ !strconcat(instr_asm, "\t$rs, $target"),
+ [], IIBranch>;
+}
+
+// Mul, Div
+class MulDiv<bits<6> func, string instr_asm, InstrItinClass itin>:
+ FR< 0x00,
+ func,
+ (outs),
+ (ins CPURegs:$a, CPURegs:$b),
+ !strconcat(instr_asm, "\t$a, $b"),
+ [], itin>;
+
+// Move from Hi/Lo
+class MoveFromLOHI<bits<6> func, string instr_asm>:
+ FR< 0x00,
+ func,
+ (outs CPURegs:$dst),
+ (ins),
+ !strconcat(instr_asm, "\t$dst"),
+ [], IIHiLo>;
+
+class MoveToLOHI<bits<6> func, string instr_asm>:
+ FR< 0x00,
+ func,
+ (outs),
+ (ins CPURegs:$src),
+ !strconcat(instr_asm, "\t$src"),
+ [], IIHiLo>;
+
+class EffectiveAddress<string instr_asm> :
+ FI<0x09,
+ (outs CPURegs:$dst),
+ (ins mem:$addr),
+ instr_asm,
+ [(set CPURegs:$dst, addr:$addr)], IIAlu>;
+
+// Count Leading Ones/Zeros in Word
+class CountLeading<bits<6> func, string instr_asm, SDNode CountOp>:
+ FR< 0x1c, func, (outs CPURegs:$dst), (ins CPURegs:$src),
+ !strconcat(instr_asm, "\t$dst, $src"),
+ [(set CPURegs:$dst, (CountOp CPURegs:$src))], IIAlu>;
+
+// Sign Extend in Register.
+class SignExtInReg<bits<6> func, string instr_asm, ValueType vt>:
+ FR< 0x3f, func, (outs CPURegs:$dst), (ins CPURegs:$src),
+ !strconcat(instr_asm, "\t$dst, $src"),
+ [(set CPURegs:$dst, (sext_inreg CPURegs:$src, vt))], NoItinerary>;
+
+// Byte Swap
+class ByteSwap<bits<6> func, string instr_asm>:
+ FR< 0x1f, func, (outs CPURegs:$dst), (ins CPURegs:$src),
+ !strconcat(instr_asm, "\t$dst, $src"),
+ [(set CPURegs:$dst, (bswap CPURegs:$src))], NoItinerary>;
+
+// Conditional Move
+class CondMov<bits<6> func, string instr_asm, PatLeaf MovCode>:
+ FR< 0x00, func, (outs CPURegs:$dst), (ins CPURegs:$F, CPURegs:$T,
+ CPURegs:$cond), !strconcat(instr_asm, "\t$dst, $T, $cond"),
+ [(set CPURegs:$dst, (MipsCMov CPURegs:$F, CPURegs:$T,
+ CPURegs:$cond, MovCode))], NoItinerary>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+// As stack alignment is always done with addiu, we need a 16-bit immediate
+let Defs = [SP], Uses = [SP] in {
+def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins uimm16:$amt),
+ "!ADJCALLSTACKDOWN $amt",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2),
+ "!ADJCALLSTACKUP $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+// Some assembly macros need to avoid pseudoinstructions and assembler
+// automatic reodering, we should reorder ourselves.
+def MACRO : MipsPseudo<(outs), (ins), ".set\tmacro", []>;
+def REORDER : MipsPseudo<(outs), (ins), ".set\treorder", []>;
+def NOMACRO : MipsPseudo<(outs), (ins), ".set\tnomacro", []>;
+def NOREORDER : MipsPseudo<(outs), (ins), ".set\tnoreorder", []>;
+
+// When handling PIC code the assembler needs .cpload and .cprestore
+// directives. If the real instructions corresponding these directives
+// are used, we have the same behavior, but get also a bunch of warnings
+// from the assembler.
+def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>;
+def CPRESTORE : MipsPseudo<(outs), (ins uimm16:$loc), ".cprestore\t$loc\n", []>;
+
+// The supported Mips ISAs dont have any instruction close to the SELECT_CC
+// operation. The solution is to create a Mips pseudo SELECT_CC instruction
+// (MipsSelectCC), use LowerSELECT_CC to generate this instruction and finally
+// replace it for real supported nodes into EmitInstrWithCustomInserter
+let usesCustomDAGSchedInserter = 1 in {
+ class PseudoSelCC<RegisterClass RC, string asmstr>:
+ MipsPseudo<(outs RC:$dst), (ins CPURegs:$CmpRes, RC:$T, RC:$F), asmstr,
+ [(set RC:$dst, (MipsSelectCC CPURegs:$CmpRes, RC:$T, RC:$F))]>;
+}
+
+def Select_CC : PseudoSelCC<CPURegs, "# MipsSelect_CC_i32">;
+
+//===----------------------------------------------------------------------===//
+// Instruction definition
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsI Instructions
+//===----------------------------------------------------------------------===//
+
+/// Arithmetic Instructions (ALU Immediate)
+def ADDiu : ArithI<0x09, "addiu", add, simm16, immSExt16>;
+def ADDi : ArithOverflowI<0x08, "addi", add, simm16, immSExt16>;
+def SLTi : SetCC_I<0x0a, "slti", setlt, simm16, immSExt16>;
+def SLTiu : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16>;
+def ANDi : LogicI<0x0c, "andi", and>;
+def ORi : LogicI<0x0d, "ori", or>;
+def XORi : LogicI<0x0e, "xori", xor>;
+def LUi : LoadUpper<0x0f, "lui">;
+
+/// Arithmetic Instructions (3-Operand, R-Type)
+def ADDu : ArithR<0x00, 0x21, "addu", add, IIAlu>;
+def SUBu : ArithR<0x00, 0x23, "subu", sub, IIAlu>;
+def ADD : ArithOverflowR<0x00, 0x20, "add">;
+def SUB : ArithOverflowR<0x00, 0x22, "sub">;
+def SLT : SetCC_R<0x00, 0x2a, "slt", setlt>;
+def SLTu : SetCC_R<0x00, 0x2b, "sltu", setult>;
+def AND : LogicR<0x24, "and", and>;
+def OR : LogicR<0x25, "or", or>;
+def XOR : LogicR<0x26, "xor", xor>;
+def NOR : LogicNOR<0x00, 0x27, "nor">;
+
+/// Shift Instructions
+def SLL : LogicR_shift_imm<0x00, "sll", shl>;
+def SRL : LogicR_shift_imm<0x02, "srl", srl>;
+def SRA : LogicR_shift_imm<0x03, "sra", sra>;
+def SLLV : LogicR_shift_reg<0x04, "sllv", shl>;
+def SRLV : LogicR_shift_reg<0x06, "srlv", srl>;
+def SRAV : LogicR_shift_reg<0x07, "srav", sra>;
+
+/// Load and Store Instructions
+def LB : LoadM<0x20, "lb", sextloadi8>;
+def LBu : LoadM<0x24, "lbu", zextloadi8>;
+def LH : LoadM<0x21, "lh", sextloadi16>;
+def LHu : LoadM<0x25, "lhu", zextloadi16>;
+def LW : LoadM<0x23, "lw", load>;
+def SB : StoreM<0x28, "sb", truncstorei8>;
+def SH : StoreM<0x29, "sh", truncstorei16>;
+def SW : StoreM<0x2b, "sw", store>;
+
+/// Jump and Branch Instructions
+def J : JumpFJ<0x02, "j">;
+def JR : JumpFR<0x00, 0x08, "jr">;
+def JAL : JumpLink<0x03, "jal">;
+def JALR : JumpLinkReg<0x00, 0x09, "jalr">;
+def BEQ : CBranch<0x04, "beq", seteq>;
+def BNE : CBranch<0x05, "bne", setne>;
+
+let rt=1 in
+ def BGEZ : CBranchZero<0x01, "bgez", setge>;
+
+let rt=0 in {
+ def BGTZ : CBranchZero<0x07, "bgtz", setgt>;
+ def BLEZ : CBranchZero<0x07, "blez", setle>;
+ def BLTZ : CBranchZero<0x01, "bltz", setlt>;
+}
+
+def BGEZAL : BranchLink<"bgezal">;
+def BLTZAL : BranchLink<"bltzal">;
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1,
+ isBarrier=1, hasCtrlDep=1, rs=0, rt=0, shamt=0 in
+ def RET : FR <0x00, 0x02, (outs), (ins CPURegs:$target),
+ "jr\t$target", [(MipsRet CPURegs:$target)], IIBranch>;
+
+/// Multiply and Divide Instructions.
+let Defs = [HI, LO] in {
+ def MULT : MulDiv<0x18, "mult", IIImul>;
+ def MULTu : MulDiv<0x19, "multu", IIImul>;
+ def DIV : MulDiv<0x1a, "div", IIIdiv>;
+ def DIVu : MulDiv<0x1b, "divu", IIIdiv>;
+}
+
+let Defs = [HI] in
+ def MTHI : MoveToLOHI<0x11, "mthi">;
+let Defs = [LO] in
+ def MTLO : MoveToLOHI<0x13, "mtlo">;
+
+let Uses = [HI] in
+ def MFHI : MoveFromLOHI<0x10, "mfhi">;
+let Uses = [LO] in
+ def MFLO : MoveFromLOHI<0x12, "mflo">;
+
+/// Sign Ext In Register Instructions.
+let Predicates = [HasSEInReg] in {
+ let shamt = 0x10, rs = 0 in
+ def SEB : SignExtInReg<0x21, "seb", i8>;
+
+ let shamt = 0x18, rs = 0 in
+ def SEH : SignExtInReg<0x20, "seh", i16>;
+}
+
+/// Count Leading
+let Predicates = [HasBitCount] in {
+ let rt = 0 in
+ def CLZ : CountLeading<0b010110, "clz", ctlz>;
+}
+
+/// Byte Swap
+let Predicates = [HasSwap] in {
+ let shamt = 0x3, rs = 0 in
+ def WSBW : ByteSwap<0x20, "wsbw">;
+}
+
+/// Conditional Move
+def MIPS_CMOV_ZERO : PatLeaf<(i32 0)>;
+def MIPS_CMOV_NZERO : PatLeaf<(i32 1)>;
+
+let Predicates = [HasCondMov], isTwoAddress = 1 in {
+ def MOVN : CondMov<0x0a, "movn", MIPS_CMOV_NZERO>;
+ def MOVZ : CondMov<0x0b, "movz", MIPS_CMOV_ZERO>;
+}
+
+/// No operation
+let addr=0 in
+ def NOP : FJ<0, (outs), (ins), "nop", [], IIAlu>;
+
+// FrameIndexes are legalized when they are operands from load/store
+// instructions. The same not happens for stack address copies, so an
+// add op with mem ComplexPattern is used and the stack address copy
+// can be matched. It's similar to Sparc LEA_ADDRi
+def LEA_ADDiu : EffectiveAddress<"addiu\t$dst, ${addr:stackloc}">;
+
+// MADD*/MSUB* are not part of MipsI either.
+//def MADD : MArithR<0x00, "madd">;
+//def MADDU : MArithR<0x01, "maddu">;
+//def MSUB : MArithR<0x04, "msub">;
+//def MSUBU : MArithR<0x05, "msubu">;
+
+// MUL is a assembly macro in the current used ISAs. In recent ISA's
+// it is a real instruction.
+//def MUL : ArithR<0x1c, 0x02, "mul", mul, IIImul>;
+
+//===----------------------------------------------------------------------===//
+// Arbitrary patterns that map to one or more instructions
+//===----------------------------------------------------------------------===//
+
+// Small immediates
+def : Pat<(i32 immSExt16:$in),
+ (ADDiu ZERO, imm:$in)>;
+def : Pat<(i32 immZExt16:$in),
+ (ORi ZERO, imm:$in)>;
+
+// Arbitrary immediates
+def : Pat<(i32 imm:$imm),
+ (ORi (LUi (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Carry patterns
+def : Pat<(subc CPURegs:$lhs, CPURegs:$rhs),
+ (SUBu CPURegs:$lhs, CPURegs:$rhs)>;
+def : Pat<(addc CPURegs:$lhs, CPURegs:$rhs),
+ (ADDu CPURegs:$lhs, CPURegs:$rhs)>;
+def : Pat<(addc CPURegs:$src, imm:$imm),
+ (ADDiu CPURegs:$src, imm:$imm)>;
+
+// Call
+def : Pat<(MipsJmpLink (i32 tglobaladdr:$dst)),
+ (JAL tglobaladdr:$dst)>;
+def : Pat<(MipsJmpLink (i32 texternalsym:$dst)),
+ (JAL texternalsym:$dst)>;
+def : Pat<(MipsJmpLink CPURegs:$dst),
+ (JALR CPURegs:$dst)>;
+
+// hi/lo relocs
+def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)),
+ (ADDiu CPURegs:$hi, tglobaladdr:$lo)>;
+
+def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)),
+ (ADDiu CPURegs:$hi, tjumptable:$lo)>;
+
+def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)),
+ (ADDiu CPURegs:$hi, tconstpool:$lo)>;
+
+// gp_rel relocs
+def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
+ (ADDiu CPURegs:$gp, tglobaladdr:$in)>;
+def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)),
+ (ADDiu CPURegs:$gp, tconstpool:$in)>;
+
+// Mips does not have "not", so we expand our way
+def : Pat<(not CPURegs:$in),
+ (NOR CPURegs:$in, ZERO)>;
+
+// extended load and stores
+def : Pat<(extloadi1 addr:$src), (LBu addr:$src)>;
+def : Pat<(extloadi8 addr:$src), (LBu addr:$src)>;
+def : Pat<(extloadi16 addr:$src), (LHu addr:$src)>;
+
+// peepholes
+def : Pat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
+
+// brcond patterns
+def : Pat<(brcond (setne CPURegs:$lhs, 0), bb:$dst),
+ (BNE CPURegs:$lhs, ZERO, bb:$dst)>;
+def : Pat<(brcond (seteq CPURegs:$lhs, 0), bb:$dst),
+ (BEQ CPURegs:$lhs, ZERO, bb:$dst)>;
+
+def : Pat<(brcond (setge CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+ (BEQ (SLT CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setuge CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+ (BEQ (SLTu CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setge CPURegs:$lhs, immSExt16:$rhs), bb:$dst),
+ (BEQ (SLTi CPURegs:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setuge CPURegs:$lhs, immSExt16:$rhs), bb:$dst),
+ (BEQ (SLTiu CPURegs:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+
+def : Pat<(brcond (setle CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+ (BEQ (SLT CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setule CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+ (BEQ (SLTu CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>;
+
+def : Pat<(brcond CPURegs:$cond, bb:$dst),
+ (BNE CPURegs:$cond, ZERO, bb:$dst)>;
+
+// select patterns
+def : Pat<(select (setge CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
+ (MOVZ CPURegs:$F, CPURegs:$T, (SLT CPURegs:$lhs, CPURegs:$rhs))>;
+def : Pat<(select (setuge CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
+ (MOVZ CPURegs:$F, CPURegs:$T, (SLTu CPURegs:$lhs, CPURegs:$rhs))>;
+def : Pat<(select (setge CPURegs:$lhs, immSExt16:$rhs), CPURegs:$T, CPURegs:$F),
+ (MOVZ CPURegs:$F, CPURegs:$T, (SLTi CPURegs:$lhs, immSExt16:$rhs))>;
+def : Pat<(select (setuge CPURegs:$lh, immSExt16:$rh), CPURegs:$T, CPURegs:$F),
+ (MOVZ CPURegs:$F, CPURegs:$T, (SLTiu CPURegs:$lh, immSExt16:$rh))>;
+
+def : Pat<(select (setle CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
+ (MOVZ CPURegs:$F, CPURegs:$T, (SLT CPURegs:$rhs, CPURegs:$lhs))>;
+def : Pat<(select (setule CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
+ (MOVZ CPURegs:$F, CPURegs:$T, (SLTu CPURegs:$rhs, CPURegs:$lhs))>;
+
+def : Pat<(select (seteq CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
+ (MOVZ CPURegs:$F, CPURegs:$T, (XOR CPURegs:$lhs, CPURegs:$rhs))>;
+def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
+ (MOVN CPURegs:$F, CPURegs:$T, (XOR CPURegs:$lhs, CPURegs:$rhs))>;
+
+def : Pat<(select CPURegs:$cond, CPURegs:$T, CPURegs:$F),
+ (MOVN CPURegs:$F, CPURegs:$T, CPURegs:$cond)>;
+
+// setcc patterns
+def : Pat<(seteq CPURegs:$lhs, CPURegs:$rhs),
+ (SLTu (XOR CPURegs:$lhs, CPURegs:$rhs), 1)>;
+def : Pat<(setne CPURegs:$lhs, CPURegs:$rhs),
+ (SLTu ZERO, (XOR CPURegs:$lhs, CPURegs:$rhs))>;
+
+def : Pat<(setle CPURegs:$lhs, CPURegs:$rhs),
+ (XORi (SLT CPURegs:$rhs, CPURegs:$lhs), 1)>;
+def : Pat<(setule CPURegs:$lhs, CPURegs:$rhs),
+ (XORi (SLTu CPURegs:$rhs, CPURegs:$lhs), 1)>;
+
+def : Pat<(setgt CPURegs:$lhs, CPURegs:$rhs),
+ (SLT CPURegs:$rhs, CPURegs:$lhs)>;
+def : Pat<(setugt CPURegs:$lhs, CPURegs:$rhs),
+ (SLTu CPURegs:$rhs, CPURegs:$lhs)>;
+
+def : Pat<(setge CPURegs:$lhs, CPURegs:$rhs),
+ (XORi (SLT CPURegs:$lhs, CPURegs:$rhs), 1)>;
+def : Pat<(setuge CPURegs:$lhs, CPURegs:$rhs),
+ (XORi (SLTu CPURegs:$lhs, CPURegs:$rhs), 1)>;
+
+def : Pat<(setge CPURegs:$lhs, immSExt16:$rhs),
+ (XORi (SLTi CPURegs:$lhs, immSExt16:$rhs), 1)>;
+def : Pat<(setuge CPURegs:$lhs, immSExt16:$rhs),
+ (XORi (SLTiu CPURegs:$lhs, immSExt16:$rhs), 1)>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//===----------------------------------------------------------------------===//
+
+include "MipsInstrFPU.td"
+
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
new file mode 100644
index 0000000..b95394e
--- /dev/null
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -0,0 +1,131 @@
+//===-- MipsMachineFunctionInfo.h - Private data used for Mips ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS_MACHINE_FUNCTION_INFO_H
+#define MIPS_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+namespace llvm {
+
+/// MipsFunctionInfo - This class is derived from MachineFunction private
+/// Mips target-specific information for each MachineFunction.
+class MipsFunctionInfo : public MachineFunctionInfo {
+
+private:
+ /// Holds for each function where on the stack the Frame Pointer must be
+ /// saved. This is used on Prologue and Epilogue to emit FP save/restore
+ int FPStackOffset;
+
+ /// Holds for each function where on the stack the Return Address must be
+ /// saved. This is used on Prologue and Epilogue to emit RA save/restore
+ int RAStackOffset;
+
+ /// At each function entry, two special bitmask directives must be emitted
+ /// to help debugging, for CPU and FPU callee saved registers. Both need
+ /// the negative offset from the final stack size and its higher registers
+ /// location on the stack.
+ int CPUTopSavedRegOff;
+ int FPUTopSavedRegOff;
+
+ /// MipsFIHolder - Holds a FrameIndex and it's Stack Pointer Offset
+ struct MipsFIHolder {
+
+ int FI;
+ int SPOffset;
+
+ MipsFIHolder(int FrameIndex, int StackPointerOffset)
+ : FI(FrameIndex), SPOffset(StackPointerOffset) {}
+ };
+
+ /// When PIC is used the GP must be saved on the stack on the function
+ /// prologue and must be reloaded from this stack location after every
+ /// call. A reference to its stack location and frame index must be kept
+ /// to be used on emitPrologue and processFunctionBeforeFrameFinalized.
+ MipsFIHolder GPHolder;
+
+ /// On LowerFORMAL_ARGUMENTS the stack size is unknown, so the Stack
+ /// Pointer Offset calculation of "not in register arguments" must be
+ /// postponed to emitPrologue.
+ SmallVector<MipsFIHolder, 16> FnLoadArgs;
+ bool HasLoadArgs;
+
+ // When VarArgs, we must write registers back to caller stack, preserving
+ // on register arguments. Since the stack size is unknown on
+ // LowerFORMAL_ARGUMENTS, the Stack Pointer Offset calculation must be
+ // postponed to emitPrologue.
+ SmallVector<MipsFIHolder, 4> FnStoreVarArgs;
+ bool HasStoreVarArgs;
+
+ /// SRetReturnReg - Some subtargets require that sret lowering includes
+ /// returning the value of the returned struct in a register. This field
+ /// holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+
+public:
+ MipsFunctionInfo(MachineFunction& MF)
+ : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0),
+ FPUTopSavedRegOff(0), GPHolder(-1,-1), HasLoadArgs(false),
+ HasStoreVarArgs(false), SRetReturnReg(0)
+ {}
+
+ int getFPStackOffset() const { return FPStackOffset; }
+ void setFPStackOffset(int Off) { FPStackOffset = Off; }
+
+ int getRAStackOffset() const { return RAStackOffset; }
+ void setRAStackOffset(int Off) { RAStackOffset = Off; }
+
+ int getCPUTopSavedRegOff() const { return CPUTopSavedRegOff; }
+ void setCPUTopSavedRegOff(int Off) { CPUTopSavedRegOff = Off; }
+
+ int getFPUTopSavedRegOff() const { return FPUTopSavedRegOff; }
+ void setFPUTopSavedRegOff(int Off) { FPUTopSavedRegOff = Off; }
+
+ int getGPStackOffset() const { return GPHolder.SPOffset; }
+ int getGPFI() const { return GPHolder.FI; }
+ void setGPStackOffset(int Off) { GPHolder.SPOffset = Off; }
+ void setGPFI(int FI) { GPHolder.FI = FI; }
+
+ bool hasLoadArgs() const { return HasLoadArgs; }
+ bool hasStoreVarArgs() const { return HasStoreVarArgs; }
+
+ void recordLoadArgsFI(int FI, int SPOffset) {
+ if (!HasLoadArgs) HasLoadArgs=true;
+ FnLoadArgs.push_back(MipsFIHolder(FI, SPOffset));
+ }
+ void recordStoreVarArgsFI(int FI, int SPOffset) {
+ if (!HasStoreVarArgs) HasStoreVarArgs=true;
+ FnStoreVarArgs.push_back(MipsFIHolder(FI, SPOffset));
+ }
+
+ void adjustLoadArgsFI(MachineFrameInfo *MFI) const {
+ if (!hasLoadArgs()) return;
+ for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i)
+ MFI->setObjectOffset( FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset );
+ }
+ void adjustStoreVarArgsFI(MachineFrameInfo *MFI) const {
+ if (!hasStoreVarArgs()) return;
+ for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i)
+ MFI->setObjectOffset( FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset );
+ }
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+};
+
+} // end of namespace llvm
+
+#endif // MIPS_MACHINE_FUNCTION_INFO_H
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
new file mode 100644
index 0000000..579d4db
--- /dev/null
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -0,0 +1,535 @@
+//===- MipsRegisterInfo.cpp - MIPS Register Information -== -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MIPS implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-reg-info"
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "MipsRegisterInfo.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST,
+ const TargetInstrInfo &tii)
+ : MipsGenRegisterInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
+ Subtarget(ST), TII(tii) {}
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// Mips::RA, return the number that it corresponds to (e.g. 31).
+unsigned MipsRegisterInfo::
+getRegisterNumbering(unsigned RegEnum)
+{
+ switch (RegEnum) {
+ case Mips::ZERO : case Mips::F0 : case Mips::D0 : return 0;
+ case Mips::AT : case Mips::F1 : return 1;
+ case Mips::V0 : case Mips::F2 : case Mips::D1 : return 2;
+ case Mips::V1 : case Mips::F3 : return 3;
+ case Mips::A0 : case Mips::F4 : case Mips::D2 : return 4;
+ case Mips::A1 : case Mips::F5 : return 5;
+ case Mips::A2 : case Mips::F6 : case Mips::D3 : return 6;
+ case Mips::A3 : case Mips::F7 : return 7;
+ case Mips::T0 : case Mips::F8 : case Mips::D4 : return 8;
+ case Mips::T1 : case Mips::F9 : return 9;
+ case Mips::T2 : case Mips::F10: case Mips::D5: return 10;
+ case Mips::T3 : case Mips::F11: return 11;
+ case Mips::T4 : case Mips::F12: case Mips::D6: return 12;
+ case Mips::T5 : case Mips::F13: return 13;
+ case Mips::T6 : case Mips::F14: case Mips::D7: return 14;
+ case Mips::T7 : case Mips::F15: return 15;
+ case Mips::T8 : case Mips::F16: case Mips::D8: return 16;
+ case Mips::T9 : case Mips::F17: return 17;
+ case Mips::S0 : case Mips::F18: case Mips::D9: return 18;
+ case Mips::S1 : case Mips::F19: return 19;
+ case Mips::S2 : case Mips::F20: case Mips::D10: return 20;
+ case Mips::S3 : case Mips::F21: return 21;
+ case Mips::S4 : case Mips::F22: case Mips::D11: return 22;
+ case Mips::S5 : case Mips::F23: return 23;
+ case Mips::S6 : case Mips::F24: case Mips::D12: return 24;
+ case Mips::S7 : case Mips::F25: return 25;
+ case Mips::K0 : case Mips::F26: case Mips::D13: return 26;
+ case Mips::K1 : case Mips::F27: return 27;
+ case Mips::GP : case Mips::F28: case Mips::D14: return 28;
+ case Mips::SP : case Mips::F29: return 29;
+ case Mips::FP : case Mips::F30: case Mips::D15: return 30;
+ case Mips::RA : case Mips::F31: return 31;
+ default: assert(0 && "Unknown register number!");
+ }
+ return 0; // Not reached
+}
+
+unsigned MipsRegisterInfo::getPICCallReg(void) { return Mips::T9; }
+
+//===----------------------------------------------------------------------===//
+// Callee Saved Registers methods
+//===----------------------------------------------------------------------===//
+
+/// Mips Callee Saved Registers
+const unsigned* MipsRegisterInfo::
+getCalleeSavedRegs(const MachineFunction *MF) const
+{
+ // Mips callee-save register range is $16-$23, $f20-$f30
+ static const unsigned SingleFloatOnlyCalleeSavedRegs[] = {
+ Mips::S0, Mips::S1, Mips::S2, Mips::S3,
+ Mips::S4, Mips::S5, Mips::S6, Mips::S7,
+ Mips::F20, Mips::F21, Mips::F22, Mips::F23, Mips::F24, Mips::F25,
+ Mips::F26, Mips::F27, Mips::F28, Mips::F29, Mips::F30, 0
+ };
+
+ static const unsigned BitMode32CalleeSavedRegs[] = {
+ Mips::S0, Mips::S1, Mips::S2, Mips::S3,
+ Mips::S4, Mips::S5, Mips::S6, Mips::S7,
+ Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30,
+ Mips::D10, Mips::D11, Mips::D12, Mips::D13, Mips::D14, Mips::D15,0
+ };
+
+ if (Subtarget.isSingleFloat())
+ return SingleFloatOnlyCalleeSavedRegs;
+ else
+ return BitMode32CalleeSavedRegs;
+}
+
+/// Mips Callee Saved Register Classes
+const TargetRegisterClass* const*
+MipsRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const
+{
+ static const TargetRegisterClass * const SingleFloatOnlyCalleeSavedRC[] = {
+ &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
+ &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
+ &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
+ &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass,
+ &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass,
+ &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass,
+ &Mips::FGR32RegClass, &Mips::FGR32RegClass, 0
+ };
+
+ static const TargetRegisterClass * const BitMode32CalleeSavedRC[] = {
+ &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
+ &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
+ &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
+ &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass,
+ &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass,
+ &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, &Mips::AFGR64RegClass,
+ &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, 0
+ };
+
+ if (Subtarget.isSingleFloat())
+ return SingleFloatOnlyCalleeSavedRC;
+ else
+ return BitMode32CalleeSavedRC;
+}
+
+BitVector MipsRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const
+{
+ BitVector Reserved(getNumRegs());
+ Reserved.set(Mips::ZERO);
+ Reserved.set(Mips::AT);
+ Reserved.set(Mips::K0);
+ Reserved.set(Mips::K1);
+ Reserved.set(Mips::GP);
+ Reserved.set(Mips::SP);
+ Reserved.set(Mips::FP);
+ Reserved.set(Mips::RA);
+
+ // SRV4 requires that odd register can't be used.
+ if (!Subtarget.isSingleFloat())
+ for (unsigned FReg=(Mips::F0)+1; FReg < Mips::F30; FReg+=2)
+ Reserved.set(FReg);
+
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Stack Frame Processing methods
+// +----------------------------+
+//
+// The stack is allocated decrementing the stack pointer on
+// the first instruction of a function prologue. Once decremented,
+// all stack referencesare are done thought a positive offset
+// from the stack/frame pointer, so the stack is considering
+// to grow up! Otherwise terrible hacks would have to be made
+// to get this stack ABI compliant :)
+//
+// The stack frame required by the ABI (after call):
+// Offset
+//
+// 0 ----------
+// 4 Args to pass
+// . saved $GP (used in PIC)
+// . Alloca allocations
+// . Local Area
+// . CPU "Callee Saved" Registers
+// . saved FP
+// . saved RA
+// . FPU "Callee Saved" Registers
+// StackSize -----------
+//
+// Offset - offset from sp after stack allocation on function prologue
+//
+// The sp is the stack pointer subtracted/added from the stack size
+// at the Prologue/Epilogue
+//
+// References to the previous stack (to obtain arguments) are done
+// with offsets that exceeds the stack size: (stacksize+(4*(num_arg-1))
+//
+// Examples:
+// - reference to the actual stack frame
+// for any local area var there is smt like : FI >= 0, StackOffset: 4
+// sw REGX, 4(SP)
+//
+// - reference to previous stack frame
+// suppose there's a load to the 5th arguments : FI < 0, StackOffset: 16.
+// The emitted instruction will be something like:
+// lw REGX, 16+StackSize(SP)
+//
+// Since the total stack size is unknown on LowerFORMAL_ARGUMENTS, all
+// stack references (ObjectOffset) created to reference the function
+// arguments, are negative numbers. This way, on eliminateFrameIndex it's
+// possible to detect those references and the offsets are adjusted to
+// their real location.
+//
+//===----------------------------------------------------------------------===//
+
+void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
+{
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+
+ // Min and Max CSI FrameIndex.
+ int MinCSFI = -1, MaxCSFI = -1;
+
+ // See the description at MipsMachineFunction.h
+ int TopCPUSavedRegOff = -1, TopFPUSavedRegOff = -1;
+
+ // Replace the dummy '0' SPOffset by the negative offsets, as explained on
+ // LowerFORMAL_ARGUMENTS. Leaving '0' for while is necessary to avoid
+ // the approach done by calculateFrameObjectOffsets to the stack frame.
+ MipsFI->adjustLoadArgsFI(MFI);
+ MipsFI->adjustStoreVarArgsFI(MFI);
+
+ // It happens that the default stack frame allocation order does not directly
+ // map to the convention used for mips. So we must fix it. We move the callee
+ // save register slots after the local variables area, as described in the
+ // stack frame above.
+ unsigned CalleeSavedAreaSize = 0;
+ if (!CSI.empty()) {
+ MinCSFI = CSI[0].getFrameIdx();
+ MaxCSFI = CSI[CSI.size()-1].getFrameIdx();
+ }
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i)
+ CalleeSavedAreaSize += MFI->getObjectAlignment(CSI[i].getFrameIdx());
+
+ // Adjust local variables. They should come on the stack right
+ // after the arguments.
+ int LastOffsetFI = -1;
+ for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (i >= MinCSFI && i <= MaxCSFI)
+ continue;
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ unsigned Offset = MFI->getObjectOffset(i) - CalleeSavedAreaSize;
+ if (LastOffsetFI == -1)
+ LastOffsetFI = i;
+ if (Offset > MFI->getObjectOffset(LastOffsetFI))
+ LastOffsetFI = i;
+ MFI->setObjectOffset(i, Offset);
+ }
+
+ // Adjust CPU Callee Saved Registers Area. Registers RA and FP must
+ // be saved in this CPU Area there is the need. This whole Area must
+ // be aligned to the default Stack Alignment requirements.
+ unsigned StackOffset = 0;
+ unsigned RegSize = Subtarget.isGP32bit() ? 4 : 8;
+
+ if (LastOffsetFI >= 0)
+ StackOffset = MFI->getObjectOffset(LastOffsetFI)+
+ MFI->getObjectSize(LastOffsetFI);
+ StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
+
+ for (unsigned i = 0, e = CSI.size(); i != e ; ++i) {
+ if (CSI[i].getRegClass() != Mips::CPURegsRegisterClass)
+ break;
+ MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset);
+ TopCPUSavedRegOff = StackOffset;
+ StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx());
+ }
+
+ if (hasFP(MF)) {
+ MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize),
+ StackOffset);
+ MipsFI->setFPStackOffset(StackOffset);
+ TopCPUSavedRegOff = StackOffset;
+ StackOffset += RegSize;
+ }
+
+ if (MFI->hasCalls()) {
+ MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize),
+ StackOffset);
+ MipsFI->setRAStackOffset(StackOffset);
+ TopCPUSavedRegOff = StackOffset;
+ StackOffset += RegSize;
+ }
+ StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
+
+ // Adjust FPU Callee Saved Registers Area. This Area must be
+ // aligned to the default Stack Alignment requirements.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ if (CSI[i].getRegClass() == Mips::CPURegsRegisterClass)
+ continue;
+ MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset);
+ TopFPUSavedRegOff = StackOffset;
+ StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx());
+ }
+ StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
+
+ // Update frame info
+ MFI->setStackSize(StackOffset);
+
+ // Recalculate the final tops offset. The final values must be '0'
+ // if there isn't a callee saved register for CPU or FPU, otherwise
+ // a negative offset is needed.
+ if (TopCPUSavedRegOff >= 0)
+ MipsFI->setCPUTopSavedRegOff(TopCPUSavedRegOff-StackOffset);
+
+ if (TopFPUSavedRegOff >= 0)
+ MipsFI->setFPUTopSavedRegOff(TopFPUSavedRegOff-StackOffset);
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool MipsRegisterInfo::
+hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return NoFramePointerElim || MFI->hasVarSizedObjects();
+}
+
+// This function eliminate ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void MipsRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+// FrameIndex represent objects inside a abstract stack.
+// We must replace FrameIndex with an stack/frame pointer
+// direct reference.
+void MipsRegisterInfo::
+eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ RegScavenger *RS) const
+{
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+
+ unsigned i = 0;
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+
+ #ifndef NDEBUG
+ DOUT << "\nFunction : " << MF.getFunction()->getName() << "\n";
+ DOUT << "<--------->\n";
+ MI.print(DOUT);
+ #endif
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+ int stackSize = MF.getFrameInfo()->getStackSize();
+ int spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+ #ifndef NDEBUG
+ DOUT << "FrameIndex : " << FrameIndex << "\n";
+ DOUT << "spOffset : " << spOffset << "\n";
+ DOUT << "stackSize : " << stackSize << "\n";
+ #endif
+
+ // as explained on LowerFORMAL_ARGUMENTS, detect negative offsets
+ // and adjust SPOffsets considering the final stack size.
+ int Offset = ((spOffset < 0) ? (stackSize + (-(spOffset+4))) : (spOffset));
+ Offset += MI.getOperand(i-1).getImm();
+
+ #ifndef NDEBUG
+ DOUT << "Offset : " << Offset << "\n";
+ DOUT << "<--------->\n";
+ #endif
+
+ MI.getOperand(i-1).ChangeToImmediate(Offset);
+ MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
+}
+
+void MipsRegisterInfo::
+emitPrologue(MachineFunction &MF) const
+{
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc dl = (MBBI != MBB.end() ?
+ MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+ bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_);
+
+ // Get the right frame order for Mips.
+ adjustMipsStackFrame(MF);
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ unsigned StackSize = MFI->getStackSize();
+
+ // No need to allocate space on the stack.
+ if (StackSize == 0 && !MFI->hasCalls()) return;
+
+ int FPOffset = MipsFI->getFPStackOffset();
+ int RAOffset = MipsFI->getRAStackOffset();
+
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER));
+
+ // TODO: check need from GP here.
+ if (isPIC && Subtarget.isABI_O32())
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::CPLOAD)).addReg(getPICCallReg());
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO));
+
+ // Adjust stack : addi sp, sp, (-imm)
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
+ .addReg(Mips::SP).addImm(-StackSize);
+
+ // Save the return address only if the function isnt a leaf one.
+ // sw $ra, stack_loc($sp)
+ if (MFI->hasCalls()) {
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::SW))
+ .addReg(Mips::RA).addImm(RAOffset).addReg(Mips::SP);
+ }
+
+ // if framepointer enabled, save it and set it
+ // to point to the stack pointer
+ if (hasFP(MF)) {
+ // sw $fp,stack_loc($sp)
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::SW))
+ .addReg(Mips::FP).addImm(FPOffset).addReg(Mips::SP);
+
+ // move $fp, $sp
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::FP)
+ .addReg(Mips::SP).addReg(Mips::ZERO);
+ }
+
+ // PIC speficic function prologue
+ if ((isPIC) && (MFI->hasCalls())) {
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE))
+ .addImm(MipsFI->getGPStackOffset());
+ }
+}
+
+void MipsRegisterInfo::
+emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
+{
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ // Get the number of bytes from FrameInfo
+ int NumBytes = (int) MFI->getStackSize();
+
+ // Get the FI's where RA and FP are saved.
+ int FPOffset = MipsFI->getFPStackOffset();
+ int RAOffset = MipsFI->getRAStackOffset();
+
+ // if framepointer enabled, restore it and restore the
+ // stack pointer
+ if (hasFP(MF)) {
+ // move $sp, $fp
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::SP)
+ .addReg(Mips::FP).addReg(Mips::ZERO);
+
+ // lw $fp,stack_loc($sp)
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::FP)
+ .addImm(FPOffset).addReg(Mips::SP);
+ }
+
+ // Restore the return address only if the function isnt a leaf one.
+ // lw $ra, stack_loc($sp)
+ if (MFI->hasCalls()) {
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::RA)
+ .addImm(RAOffset).addReg(Mips::SP);
+ }
+
+ // adjust stack : insert addi sp, sp, (imm)
+ if (NumBytes) {
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
+ .addReg(Mips::SP).addImm(NumBytes);
+ }
+}
+
+
+void MipsRegisterInfo::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+ // Set the SPOffset on the FI where GP must be saved/loaded.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_);
+ if (MFI->hasCalls() && isPIC) {
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ MFI->setObjectOffset(MipsFI->getGPFI(), MipsFI->getGPStackOffset());
+ }
+}
+
+unsigned MipsRegisterInfo::
+getRARegister() const {
+ return Mips::RA;
+}
+
+unsigned MipsRegisterInfo::
+getFrameRegister(MachineFunction &MF) const {
+ return hasFP(MF) ? Mips::FP : Mips::SP;
+}
+
+unsigned MipsRegisterInfo::
+getEHExceptionRegister() const {
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned MipsRegisterInfo::
+getEHHandlerRegister() const {
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
+
+int MipsRegisterInfo::
+getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ assert(0 && "What is the dwarf register number");
+ return -1;
+}
+
+#include "MipsGenRegisterInfo.inc"
+
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
new file mode 100644
index 0000000..808e995
--- /dev/null
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -0,0 +1,78 @@
+//===- MipsRegisterInfo.h - Mips Register Information Impl ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSREGISTERINFO_H
+#define MIPSREGISTERINFO_H
+
+#include "Mips.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "MipsGenRegisterInfo.h.inc"
+
+namespace llvm {
+class MipsSubtarget;
+class TargetInstrInfo;
+class Type;
+
+struct MipsRegisterInfo : public MipsGenRegisterInfo {
+ const MipsSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+
+ MipsRegisterInfo(const MipsSubtarget &Subtarget, const TargetInstrInfo &tii);
+
+ /// getRegisterNumbering - Given the enum value for some register, e.g.
+ /// Mips::RA, return the number that it corresponds to (e.g. 31).
+ static unsigned getRegisterNumbering(unsigned RegEnum);
+
+ /// Get PIC indirect call register
+ static unsigned getPICCallReg(void);
+
+ /// Adjust the Mips stack frame.
+ void adjustMipsStackFrame(MachineFunction &MF) const;
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+ const TargetRegisterClass* const*
+ getCalleeSavedRegClasses(const MachineFunction* MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ /// Stack Frame Processing Methods
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ /// Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+
+ /// Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
new file mode 100644
index 0000000..bbb275c
--- /dev/null
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -0,0 +1,252 @@
+//===- MipsRegisterInfo.td - Mips Register defs -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the MIPS register file
+//===----------------------------------------------------------------------===//
+
+// We have banks of 32 registers each.
+class MipsReg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "Mips";
+}
+
+// Mips CPU Registers
+class MipsGPRReg<bits<5> num, string n> : MipsReg<n> {
+ let Num = num;
+}
+
+// Mips 32-bit FPU Registers
+class FPR<bits<5> num, string n> : MipsReg<n> {
+ let Num = num;
+}
+
+// Mips 64-bit (aliased) FPU Registers
+class AFPR<bits<5> num, string n, list<Register> aliases> : MipsReg<n> {
+ let Num = num;
+ let Aliases = aliases;
+}
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+let Namespace = "Mips" in {
+
+ // General Purpose Registers
+ def ZERO : MipsGPRReg< 0, "ZERO">, DwarfRegNum<[0]>;
+ def AT : MipsGPRReg< 1, "AT">, DwarfRegNum<[1]>;
+ def V0 : MipsGPRReg< 2, "2">, DwarfRegNum<[2]>;
+ def V1 : MipsGPRReg< 3, "3">, DwarfRegNum<[3]>;
+ def A0 : MipsGPRReg< 4, "4">, DwarfRegNum<[5]>;
+ def A1 : MipsGPRReg< 5, "5">, DwarfRegNum<[5]>;
+ def A2 : MipsGPRReg< 6, "6">, DwarfRegNum<[6]>;
+ def A3 : MipsGPRReg< 7, "7">, DwarfRegNum<[7]>;
+ def T0 : MipsGPRReg< 8, "8">, DwarfRegNum<[8]>;
+ def T1 : MipsGPRReg< 9, "9">, DwarfRegNum<[9]>;
+ def T2 : MipsGPRReg< 10, "10">, DwarfRegNum<[10]>;
+ def T3 : MipsGPRReg< 11, "11">, DwarfRegNum<[11]>;
+ def T4 : MipsGPRReg< 12, "12">, DwarfRegNum<[12]>;
+ def T5 : MipsGPRReg< 13, "13">, DwarfRegNum<[13]>;
+ def T6 : MipsGPRReg< 14, "14">, DwarfRegNum<[14]>;
+ def T7 : MipsGPRReg< 15, "15">, DwarfRegNum<[15]>;
+ def S0 : MipsGPRReg< 16, "16">, DwarfRegNum<[16]>;
+ def S1 : MipsGPRReg< 17, "17">, DwarfRegNum<[17]>;
+ def S2 : MipsGPRReg< 18, "18">, DwarfRegNum<[18]>;
+ def S3 : MipsGPRReg< 19, "19">, DwarfRegNum<[19]>;
+ def S4 : MipsGPRReg< 20, "20">, DwarfRegNum<[20]>;
+ def S5 : MipsGPRReg< 21, "21">, DwarfRegNum<[21]>;
+ def S6 : MipsGPRReg< 22, "22">, DwarfRegNum<[22]>;
+ def S7 : MipsGPRReg< 23, "23">, DwarfRegNum<[23]>;
+ def T8 : MipsGPRReg< 24, "24">, DwarfRegNum<[24]>;
+ def T9 : MipsGPRReg< 25, "25">, DwarfRegNum<[25]>;
+ def K0 : MipsGPRReg< 26, "26">, DwarfRegNum<[26]>;
+ def K1 : MipsGPRReg< 27, "27">, DwarfRegNum<[27]>;
+ def GP : MipsGPRReg< 28, "GP">, DwarfRegNum<[28]>;
+ def SP : MipsGPRReg< 29, "SP">, DwarfRegNum<[29]>;
+ def FP : MipsGPRReg< 30, "FP">, DwarfRegNum<[30]>;
+ def RA : MipsGPRReg< 31, "RA">, DwarfRegNum<[31]>;
+
+ /// Mips Single point precision FPU Registers
+ def F0 : FPR< 0, "F0">, DwarfRegNum<[32]>;
+ def F1 : FPR< 1, "F1">, DwarfRegNum<[33]>;
+ def F2 : FPR< 2, "F2">, DwarfRegNum<[34]>;
+ def F3 : FPR< 3, "F3">, DwarfRegNum<[35]>;
+ def F4 : FPR< 4, "F4">, DwarfRegNum<[36]>;
+ def F5 : FPR< 5, "F5">, DwarfRegNum<[37]>;
+ def F6 : FPR< 6, "F6">, DwarfRegNum<[38]>;
+ def F7 : FPR< 7, "F7">, DwarfRegNum<[39]>;
+ def F8 : FPR< 8, "F8">, DwarfRegNum<[40]>;
+ def F9 : FPR< 9, "F9">, DwarfRegNum<[41]>;
+ def F10 : FPR<10, "F10">, DwarfRegNum<[42]>;
+ def F11 : FPR<11, "F11">, DwarfRegNum<[43]>;
+ def F12 : FPR<12, "F12">, DwarfRegNum<[44]>;
+ def F13 : FPR<13, "F13">, DwarfRegNum<[45]>;
+ def F14 : FPR<14, "F14">, DwarfRegNum<[46]>;
+ def F15 : FPR<15, "F15">, DwarfRegNum<[47]>;
+ def F16 : FPR<16, "F16">, DwarfRegNum<[48]>;
+ def F17 : FPR<17, "F17">, DwarfRegNum<[49]>;
+ def F18 : FPR<18, "F18">, DwarfRegNum<[50]>;
+ def F19 : FPR<19, "F19">, DwarfRegNum<[51]>;
+ def F20 : FPR<20, "F20">, DwarfRegNum<[52]>;
+ def F21 : FPR<21, "F21">, DwarfRegNum<[53]>;
+ def F22 : FPR<22, "F22">, DwarfRegNum<[54]>;
+ def F23 : FPR<23, "F23">, DwarfRegNum<[55]>;
+ def F24 : FPR<24, "F24">, DwarfRegNum<[56]>;
+ def F25 : FPR<25, "F25">, DwarfRegNum<[57]>;
+ def F26 : FPR<26, "F26">, DwarfRegNum<[58]>;
+ def F27 : FPR<27, "F27">, DwarfRegNum<[59]>;
+ def F28 : FPR<28, "F28">, DwarfRegNum<[60]>;
+ def F29 : FPR<29, "F29">, DwarfRegNum<[61]>;
+ def F30 : FPR<30, "F30">, DwarfRegNum<[62]>;
+ def F31 : FPR<31, "F31">, DwarfRegNum<[63]>;
+
+ /// Mips Double point precision FPU Registers (aliased
+ /// with the single precision to hold 64 bit values)
+ def D0 : AFPR< 0, "F0", [F0, F1]>, DwarfRegNum<[32]>;
+ def D1 : AFPR< 2, "F2", [F2, F3]>, DwarfRegNum<[34]>;
+ def D2 : AFPR< 4, "F4", [F4, F5]>, DwarfRegNum<[36]>;
+ def D3 : AFPR< 6, "F6", [F6, F7]>, DwarfRegNum<[38]>;
+ def D4 : AFPR< 8, "F8", [F8, F9]>, DwarfRegNum<[40]>;
+ def D5 : AFPR<10, "F10", [F10, F11]>, DwarfRegNum<[42]>;
+ def D6 : AFPR<12, "F12", [F12, F13]>, DwarfRegNum<[44]>;
+ def D7 : AFPR<14, "F14", [F14, F15]>, DwarfRegNum<[46]>;
+ def D8 : AFPR<16, "F16", [F16, F17]>, DwarfRegNum<[48]>;
+ def D9 : AFPR<18, "F18", [F18, F19]>, DwarfRegNum<[50]>;
+ def D10 : AFPR<20, "F20", [F20, F21]>, DwarfRegNum<[52]>;
+ def D11 : AFPR<22, "F22", [F22, F23]>, DwarfRegNum<[54]>;
+ def D12 : AFPR<24, "F24", [F24, F25]>, DwarfRegNum<[56]>;
+ def D13 : AFPR<26, "F26", [F26, F27]>, DwarfRegNum<[58]>;
+ def D14 : AFPR<28, "F28", [F28, F29]>, DwarfRegNum<[60]>;
+ def D15 : AFPR<30, "F30", [F30, F31]>, DwarfRegNum<[62]>;
+
+ // Hi/Lo registers
+ def HI : Register<"hi">, DwarfRegNum<[64]>;
+ def LO : Register<"lo">, DwarfRegNum<[65]>;
+
+ // Status flags register
+ def FCR31 : Register<"31">;
+}
+
+//===----------------------------------------------------------------------===//
+// Register Classes
+//===----------------------------------------------------------------------===//
+
+def CPURegs : RegisterClass<"Mips", [i32], 32,
+ // Return Values and Arguments
+ [V0, V1, A0, A1, A2, A3,
+ // Not preserved across procedure calls
+ T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
+ // Callee save
+ S0, S1, S2, S3, S4, S5, S6, S7,
+ // Reserved
+ ZERO, AT, K0, K1, GP, SP, FP, RA]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ CPURegsClass::iterator
+ CPURegsClass::allocation_order_end(const MachineFunction &MF) const {
+ // The last 8 registers on the list above are reserved
+ return end()-8;
+ }
+ }];
+}
+
+// 64bit fp:
+// * FGR64 - 32 64-bit registers
+// * AFGR64 - 16 32-bit even registers (32-bit FP Mode)
+//
+// 32bit fp:
+// * FGR32 - 16 32-bit even registers
+// * FGR32 - 32 32-bit registers (single float only mode)
+def FGR32 : RegisterClass<"Mips", [f32], 32,
+ // Return Values and Arguments
+ [F0, F1, F2, F3, F12, F13, F14, F15,
+ // Not preserved across procedure calls
+ F4, F5, F6, F7, F8, F9, F10, F11, F16, F17, F18, F19,
+ // Callee save
+ F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
+ // Reserved
+ F31]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+
+ static const unsigned MIPS_FGR32[] = {
+ Mips::F0, Mips::F1, Mips::F2, Mips::F3, Mips::F12, Mips::F13,
+ Mips::F14, Mips::F15, Mips::F4, Mips::F5, Mips::F6, Mips::F7,
+ Mips::F8, Mips::F9, Mips::F10, Mips::F11, Mips::F16, Mips::F17,
+ Mips::F18, Mips::F19, Mips::F20, Mips::F21, Mips::F22, Mips::F23,
+ Mips::F24, Mips::F25, Mips::F26, Mips::F27, Mips::F28, Mips::F29,
+ Mips::F30
+ };
+
+ static const unsigned MIPS_SVR4_FGR32[] = {
+ Mips::F0, Mips::F2, Mips::F12, Mips::F14, Mips::F4,
+ Mips::F6, Mips::F8, Mips::F10, Mips::F16, Mips::F18,
+ Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30,
+ };
+
+ FGR32Class::iterator
+ FGR32Class::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+
+ if (Subtarget.isSingleFloat())
+ return MIPS_FGR32;
+ else
+ return MIPS_SVR4_FGR32;
+ }
+
+ FGR32Class::iterator
+ FGR32Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+
+ if (Subtarget.isSingleFloat())
+ return MIPS_FGR32 + (sizeof(MIPS_FGR32) / sizeof(unsigned));
+ else
+ return MIPS_SVR4_FGR32 + (sizeof(MIPS_SVR4_FGR32) / sizeof(unsigned));
+ }
+ }];
+}
+
+def AFGR64 : RegisterClass<"Mips", [f64], 64,
+ // Return Values and Arguments
+ [D0, D1, D6, D7,
+ // Not preserved across procedure calls
+ D2, D3, D4, D5, D8, D9,
+ // Callee save
+ D10, D11, D12, D13, D14,
+ // Reserved
+ D15]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ AFGR64Class::iterator
+ AFGR64Class::allocation_order_end(const MachineFunction &MF) const {
+ // The last register on the list above is reserved
+ return end()-1;
+ }
+ }];
+}
+
+// Condition Register for floating point operations
+def CCR : RegisterClass<"Mips", [i32], 32, [FCR31]>;
+
+// Hi/Lo Registers
+def HILO : RegisterClass<"Mips", [i32], 32, [HI, LO]>;
+
diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td
new file mode 100644
index 0000000..0c3ca573
--- /dev/null
+++ b/lib/Target/Mips/MipsSchedule.td
@@ -0,0 +1,63 @@
+//===- MipsSchedule.td - Mips Scheduling Definitions ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Functional units across Mips chips sets. Based on GCC/Mips backend files.
+//===----------------------------------------------------------------------===//
+def ALU : FuncUnit;
+def IMULDIV : FuncUnit;
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for Mips
+//===----------------------------------------------------------------------===//
+def IIAlu : InstrItinClass;
+def IILoad : InstrItinClass;
+def IIStore : InstrItinClass;
+def IIXfer : InstrItinClass;
+def IIBranch : InstrItinClass;
+def IIHiLo : InstrItinClass;
+def IIImul : InstrItinClass;
+def IIIdiv : InstrItinClass;
+def IIFcvt : InstrItinClass;
+def IIFmove : InstrItinClass;
+def IIFcmp : InstrItinClass;
+def IIFadd : InstrItinClass;
+def IIFmulSingle : InstrItinClass;
+def IIFmulDouble : InstrItinClass;
+def IIFdivSingle : InstrItinClass;
+def IIFdivDouble : InstrItinClass;
+def IIFsqrtSingle : InstrItinClass;
+def IIFsqrtDouble : InstrItinClass;
+def IIFrecipFsqrtStep : InstrItinClass;
+def IIPseudo : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Mips Generic instruction itineraries.
+//===----------------------------------------------------------------------===//
+def MipsGenericItineraries : ProcessorItineraries<[
+ InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IILoad , [InstrStage<3, [ALU]>]>,
+ InstrItinData<IIStore , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IIXfer , [InstrStage<2, [ALU]>]>,
+ InstrItinData<IIBranch , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IIHiLo , [InstrStage<1, [IMULDIV]>]>,
+ InstrItinData<IIImul , [InstrStage<17, [IMULDIV]>]>,
+ InstrItinData<IIIdiv , [InstrStage<38, [IMULDIV]>]>,
+ InstrItinData<IIFcvt , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IIFmove , [InstrStage<2, [ALU]>]>,
+ InstrItinData<IIFcmp , [InstrStage<3, [ALU]>]>,
+ InstrItinData<IIFadd , [InstrStage<4, [ALU]>]>,
+ InstrItinData<IIFmulSingle , [InstrStage<7, [ALU]>]>,
+ InstrItinData<IIFmulDouble , [InstrStage<8, [ALU]>]>,
+ InstrItinData<IIFdivSingle , [InstrStage<23, [ALU]>]>,
+ InstrItinData<IIFdivDouble , [InstrStage<36, [ALU]>]>,
+ InstrItinData<IIFsqrtSingle , [InstrStage<54, [ALU]>]>,
+ InstrItinData<IIFsqrtDouble , [InstrStage<12, [ALU]>]>,
+ InstrItinData<IIFrecipFsqrtStep , [InstrStage<5, [ALU]>]>
+]>;
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
new file mode 100644
index 0000000..4245f27
--- /dev/null
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -0,0 +1,77 @@
+//===- MipsSubtarget.cpp - Mips Subtarget Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Mips specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsSubtarget.h"
+#include "Mips.h"
+#include "MipsGenSubtarget.inc"
+#include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+static cl::opt<bool>
+NotABICall("disable-mips-abicall", cl::Hidden,
+ cl::desc("Disable code for SVR4-style dynamic objects"));
+static cl::opt<bool>
+AbsoluteCall("enable-mips-absolute-call", cl::Hidden,
+ cl::desc("Enable absolute call within abicall"));
+static cl::opt<unsigned>
+SSThreshold("mips-ssection-threshold", cl::Hidden,
+ cl::desc("Small data and bss section threshold size (default=8)"),
+ cl::init(8));
+
+MipsSubtarget::MipsSubtarget(const TargetMachine &TM, const Module &M,
+ const std::string &FS, bool little) :
+ MipsArchVersion(Mips1), MipsABI(O32), IsLittle(little), IsSingleFloat(false),
+ IsFP64bit(false), IsGP64bit(false), HasVFPU(false), HasABICall(true),
+ HasAbsoluteCall(false), IsLinux(true), HasSEInReg(false), HasCondMov(false),
+ HasMulDivAdd(false), HasMinMax(false), HasSwap(false), HasBitCount(false)
+{
+ std::string CPU = "mips1";
+ MipsArchVersion = Mips1;
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, CPU);
+ const std::string& TT = M.getTargetTriple();
+
+ // Small section size threshold
+ SSectionThreshold = SSThreshold;
+
+ // Is the target system Linux ?
+ if (TT.find("linux") == std::string::npos)
+ IsLinux = false;
+
+ // When only the target triple is specified and is
+ // a allegrex target, set the features. We also match
+ // big and little endian allegrex cores (dont really
+ // know if a big one exists)
+ if (TT.find("mipsallegrex") != std::string::npos ||
+ TT.find("psp") != std::string::npos) {
+ MipsABI = EABI;
+ IsSingleFloat = true;
+ MipsArchVersion = Mips2;
+ HasVFPU = true; // Enables Allegrex Vector FPU (not supported yet)
+ HasSEInReg = true;
+ HasBitCount = true;
+ HasSwap = true;
+ HasCondMov = true;
+ }
+
+ // Abicall is the default for O32 ABI, but is disabled within EABI and in
+ // static code.
+ if (NotABICall || isABI_EABI() || (TM.getRelocationModel() == Reloc::Static))
+ HasABICall = false;
+
+ // TODO: disable when handling 64 bit symbols in the future.
+ if (HasABICall && AbsoluteCall)
+ HasAbsoluteCall = true;
+}
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
new file mode 100644
index 0000000..61c37c1
--- /dev/null
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -0,0 +1,139 @@
+//=====-- MipsSubtarget.h - Define Subtarget for the Mips -----*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSUBTARGET_H
+#define MIPSSUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <string>
+
+namespace llvm {
+class Module;
+
+class MipsSubtarget : public TargetSubtarget {
+
+public:
+ enum MipsABIEnum {
+ O32, O64, N32, N64, EABI
+ };
+
+protected:
+
+ enum MipsArchEnum {
+ Mips1, Mips2, Mips3, Mips4, Mips32, Mips32r2, Mips64, Mips64r2
+ };
+
+ // Mips architecture version
+ MipsArchEnum MipsArchVersion;
+
+ // Mips supported ABIs
+ MipsABIEnum MipsABI;
+
+ // IsLittle - The target is Little Endian
+ bool IsLittle;
+
+ // IsSingleFloat - The target only supports single precision float
+ // point operations. This enable the target to use all 32 32-bit
+ // floating point registers instead of only using even ones.
+ bool IsSingleFloat;
+
+ // IsFP64bit - The target processor has 64-bit floating point registers.
+ bool IsFP64bit;
+
+ // IsFP64bit - General-purpose registers are 64 bits wide
+ bool IsGP64bit;
+
+ // HasVFPU - Processor has a vector floating point unit.
+ bool HasVFPU;
+
+ // IsABICall - Enable SRV4 code for SVR4-style dynamic objects
+ bool HasABICall;
+
+ // HasAbsoluteCall - Enable code that is not fully position-independent.
+ // Only works with HasABICall enabled.
+ bool HasAbsoluteCall;
+
+ // isLinux - Target system is Linux. Is false we consider ELFOS for now.
+ bool IsLinux;
+
+ // Put global and static items less than or equal to SSectionThreshold
+ // bytes into the small data or bss section. The default is 8.
+ unsigned SSectionThreshold;
+
+ /// Features related to the presence of specific instructions.
+
+ // HasSEInReg - SEB and SEH (signext in register) instructions.
+ bool HasSEInReg;
+
+ // HasCondMov - Conditional mov (MOVZ, MOVN) instructions.
+ bool HasCondMov;
+
+ // HasMulDivAdd - Multiply add and sub (MADD, MADDu, MSUB, MSUBu)
+ // instructions.
+ bool HasMulDivAdd;
+
+ // HasMinMax - MIN and MAX instructions.
+ bool HasMinMax;
+
+ // HasSwap - Byte and half swap instructions.
+ bool HasSwap;
+
+ // HasBitCount - Count leading '1' and '0' bits.
+ bool HasBitCount;
+
+ InstrItineraryData InstrItins;
+
+public:
+
+ /// Only O32 and EABI supported right now.
+ bool isABI_EABI() const { return MipsABI == EABI; }
+ bool isABI_O32() const { return MipsABI == O32; }
+ unsigned getTargetABI() const { return MipsABI; }
+
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ MipsSubtarget(const TargetMachine &TM, const Module &M,
+ const std::string &FS, bool little);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+
+ bool isMips1() const { return MipsArchVersion == Mips1; }
+
+ bool isLittle() const { return IsLittle; }
+ bool isFP64bit() const { return IsFP64bit; };
+ bool isGP64bit() const { return IsGP64bit; };
+ bool isGP32bit() const { return !IsGP64bit; };
+ bool isSingleFloat() const { return IsSingleFloat; };
+ bool isNotSingleFloat() const { return !IsSingleFloat; };
+ bool hasVFPU() const { return HasVFPU; };
+ bool hasABICall() const { return HasABICall; };
+ bool hasAbsoluteCall() const { return HasAbsoluteCall; };
+ bool isLinux() const { return IsLinux; };
+ unsigned getSSectionThreshold() const { return SSectionThreshold; }
+
+ /// Features related to the presence of specific instructions.
+ bool hasSEInReg() const { return HasSEInReg; };
+ bool hasCondMov() const { return HasCondMov; };
+ bool hasMulDivAdd() const { return HasMulDivAdd; };
+ bool hasMinMax() const { return HasMinMax; };
+ bool hasSwap() const { return HasSwap; };
+ bool hasBitCount() const { return HasBitCount; };
+};
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Mips/MipsTargetAsmInfo.cpp b/lib/Target/Mips/MipsTargetAsmInfo.cpp
new file mode 100644
index 0000000..c197b0c
--- /dev/null
+++ b/lib/Target/Mips/MipsTargetAsmInfo.cpp
@@ -0,0 +1,98 @@
+//===-- MipsTargetAsmInfo.cpp - Mips asm properties -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MipsTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsTargetAsmInfo.h"
+#include "MipsTargetMachine.h"
+#include "llvm/GlobalVariable.h"
+
+using namespace llvm;
+
+MipsTargetAsmInfo::MipsTargetAsmInfo(const MipsTargetMachine &TM):
+ ELFTargetAsmInfo(TM) {
+
+ Subtarget = &TM.getSubtarget<MipsSubtarget>();
+
+ AlignmentIsInBytes = false;
+ COMMDirectiveTakesAlignment = true;
+ Data16bitsDirective = "\t.half\t";
+ Data32bitsDirective = "\t.word\t";
+ Data64bitsDirective = NULL;
+ PrivateGlobalPrefix = "$";
+ JumpTableDataSection = "\t.rdata";
+ CommentString = "#";
+ ZeroDirective = "\t.space\t";
+ BSSSection = "\t.section\t.bss";
+ CStringSection = ".rodata.str";
+
+ if (!Subtarget->hasABICall()) {
+ JumpTableDirective = "\t.word\t";
+ SmallDataSection = getNamedSection("\t.sdata", SectionFlags::Writeable);
+ SmallBSSSection = getNamedSection("\t.sbss",
+ SectionFlags::Writeable |
+ SectionFlags::BSS);
+ } else
+ JumpTableDirective = "\t.gpword\t";
+
+}
+
+unsigned MipsTargetAsmInfo::
+SectionFlagsForGlobal(const GlobalValue *GV, const char* Name) const {
+ unsigned Flags = ELFTargetAsmInfo::SectionFlagsForGlobal(GV, Name);
+ // Mask out Small Section flag bit, Mips doesnt support 's' section symbol
+ // for its small sections.
+ return (Flags & (~SectionFlags::Small));
+}
+
+SectionKind::Kind MipsTargetAsmInfo::
+SectionKindForGlobal(const GlobalValue *GV) const {
+ SectionKind::Kind K = ELFTargetAsmInfo::SectionKindForGlobal(GV);
+
+ if (Subtarget->hasABICall())
+ return K;
+
+ if (K != SectionKind::Data && K != SectionKind::BSS &&
+ K != SectionKind::RODataMergeConst)
+ return K;
+
+ if (isa<GlobalVariable>(GV)) {
+ const TargetData *TD = TM.getTargetData();
+ unsigned Size = TD->getTypeAllocSize(GV->getType()->getElementType());
+ unsigned Threshold = Subtarget->getSSectionThreshold();
+
+ if (Size > 0 && Size <= Threshold) {
+ if (K == SectionKind::BSS)
+ return SectionKind::SmallBSS;
+ else
+ return SectionKind::SmallData;
+ }
+ }
+
+ return K;
+}
+
+const Section* MipsTargetAsmInfo::
+SelectSectionForGlobal(const GlobalValue *GV) const {
+ SectionKind::Kind K = SectionKindForGlobal(GV);
+ const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+
+ if (GVA && (!GVA->isWeakForLinker()))
+ switch (K) {
+ case SectionKind::SmallData:
+ return getSmallDataSection();
+ case SectionKind::SmallBSS:
+ return getSmallBSSSection();
+ default: break;
+ }
+
+ return ELFTargetAsmInfo::SelectSectionForGlobal(GV);
+}
diff --git a/lib/Target/Mips/MipsTargetAsmInfo.h b/lib/Target/Mips/MipsTargetAsmInfo.h
new file mode 100644
index 0000000..2b5a739
--- /dev/null
+++ b/lib/Target/Mips/MipsTargetAsmInfo.h
@@ -0,0 +1,51 @@
+//=====-- MipsTargetAsmInfo.h - Mips asm properties -----------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MipsTargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSTARGETASMINFO_H
+#define MIPSTARGETASMINFO_H
+
+#include "MipsSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/ELFTargetAsmInfo.h"
+
+namespace llvm {
+
+ // Forward declaration.
+ class GlobalValue;
+ class MipsTargetMachine;
+
+ struct MipsTargetAsmInfo : public ELFTargetAsmInfo {
+ explicit MipsTargetAsmInfo(const MipsTargetMachine &TM);
+
+ /// SectionKindForGlobal - This hook allows the target to select proper
+ /// section kind used for global emission.
+ virtual SectionKind::Kind
+ SectionKindForGlobal(const GlobalValue *GV) const;
+
+ /// SectionFlagsForGlobal - This hook allows the target to select proper
+ /// section flags either for given global or for section.
+ virtual unsigned
+ SectionFlagsForGlobal(const GlobalValue *GV = NULL,
+ const char* name = NULL) const;
+
+ virtual const Section* SelectSectionForGlobal(const GlobalValue *GV) const;
+
+ private:
+ const MipsSubtarget *Subtarget;
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
new file mode 100644
index 0000000..ef524e3
--- /dev/null
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -0,0 +1,133 @@
+//===-- MipsTargetMachine.cpp - Define TargetMachine for Mips -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about Mips target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips.h"
+#include "MipsTargetAsmInfo.h"
+#include "MipsTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+using namespace llvm;
+
+/// MipsTargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int MipsTargetMachineModule;
+int MipsTargetMachineModule = 0;
+
+// Register the target.
+static RegisterTarget<MipsTargetMachine> X("mips", "Mips");
+static RegisterTarget<MipselTargetMachine> Y("mipsel", "Mipsel");
+
+const TargetAsmInfo *MipsTargetMachine::
+createTargetAsmInfo() const
+{
+ return new MipsTargetAsmInfo(*this);
+}
+
+// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
+// The stack is always 8 byte aligned
+// On function prologue, the stack is created by decrementing
+// its pointer. Once decremented, all references are done with positive
+// offset from the stack/frame pointer, using StackGrowsUp enables
+// an easier handling.
+// Using CodeModel::Large enables different CALL behavior.
+MipsTargetMachine::
+MipsTargetMachine(const Module &M, const std::string &FS, bool isLittle=false):
+ Subtarget(*this, M, FS, isLittle),
+ DataLayout(isLittle ? std::string("e-p:32:32:32-i8:8:32-i16:16:32") :
+ std::string("E-p:32:32:32-i8:8:32-i16:16:32")),
+ InstrInfo(*this),
+ FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0),
+ TLInfo(*this)
+{
+ // Abicall enables PIC by default
+ if (Subtarget.hasABICall())
+ setRelocationModel(Reloc::PIC_);
+
+ // TODO: create an option to enable long calls, like -mlong-calls,
+ // that would be our CodeModel::Large. It must not work with Abicall.
+ if (getCodeModel() == CodeModel::Default)
+ setCodeModel(CodeModel::Small);
+}
+
+MipselTargetMachine::
+MipselTargetMachine(const Module &M, const std::string &FS) :
+ MipsTargetMachine(M, FS, true) {}
+
+// return 0 and must specify -march to gen MIPS code.
+unsigned MipsTargetMachine::
+getModuleMatchQuality(const Module &M)
+{
+ // We strongly match "mips*-*".
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 5 && std::string(TT.begin(), TT.begin()+5) == "mips-")
+ return 20;
+
+ if (TT.size() >= 13 && std::string(TT.begin(),
+ TT.begin()+13) == "mipsallegrex-")
+ return 20;
+
+ return 0;
+}
+
+// return 0 and must specify -march to gen MIPSEL code.
+unsigned MipselTargetMachine::
+getModuleMatchQuality(const Module &M)
+{
+ // We strongly match "mips*el-*".
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 7 && std::string(TT.begin(), TT.begin()+7) == "mipsel-")
+ return 20;
+
+ if (TT.size() >= 15 && std::string(TT.begin(),
+ TT.begin()+15) == "mipsallegrexel-")
+ return 20;
+
+ if (TT.size() == 3 && std::string(TT.begin(), TT.begin()+3) == "psp")
+ return 20;
+
+ return 0;
+}
+
+// Install an instruction selector pass using
+// the ISelDag to gen Mips code.
+bool MipsTargetMachine::
+addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+{
+ PM.add(createMipsISelDag(*this));
+ return false;
+}
+
+// Implemented by targets that want to run passes immediately before
+// machine code is emitted. return true if -print-machineinstrs should
+// print out the code after the passes.
+bool MipsTargetMachine::
+addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+{
+ PM.add(createMipsDelaySlotFillerPass(*this));
+ return true;
+}
+
+// Implements the AssemblyEmitter for the target. Must return
+// true if AssemblyEmitter is supported
+bool MipsTargetMachine::
+addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out)
+{
+ // Output assembly language.
+ PM.add(createMipsCodePrinterPass(Out, *this, OptLevel, Verbose));
+ return false;
+}
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
new file mode 100644
index 0000000..a9e1df2
--- /dev/null
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -0,0 +1,80 @@
+//===-- MipsTargetMachine.h - Define TargetMachine for Mips -00--*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSTARGETMACHINE_H
+#define MIPSTARGETMACHINE_H
+
+#include "MipsSubtarget.h"
+#include "MipsInstrInfo.h"
+#include "MipsISelLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+
+namespace llvm {
+ class raw_ostream;
+
+ class MipsTargetMachine : public LLVMTargetMachine {
+ MipsSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ MipsInstrInfo InstrInfo;
+ TargetFrameInfo FrameInfo;
+ MipsTargetLowering TLInfo;
+
+ protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+ public:
+ MipsTargetMachine(const Module &M, const std::string &FS, bool isLittle);
+
+ virtual const MipsInstrInfo *getInstrInfo() const
+ { return &InstrInfo; }
+ virtual const TargetFrameInfo *getFrameInfo() const
+ { return &FrameInfo; }
+ virtual const MipsSubtarget *getSubtargetImpl() const
+ { return &Subtarget; }
+ virtual const TargetData *getTargetData() const
+ { return &DataLayout;}
+
+ virtual const MipsRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual MipsTargetLowering *getTargetLowering() const {
+ return const_cast<MipsTargetLowering*>(&TLInfo);
+ }
+
+ static unsigned getModuleMatchQuality(const Module &M);
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+ virtual bool addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out);
+ };
+
+/// MipselTargetMachine - Mipsel target machine.
+///
+class MipselTargetMachine : public MipsTargetMachine {
+public:
+ MipselTargetMachine(const Module &M, const std::string &FS);
+
+ static unsigned getModuleMatchQuality(const Module &M);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/PIC16/CMakeLists.txt b/lib/Target/PIC16/CMakeLists.txt
new file mode 100644
index 0000000..00d737a
--- /dev/null
+++ b/lib/Target/PIC16/CMakeLists.txt
@@ -0,0 +1,24 @@
+set(LLVM_TARGET_DEFINITIONS PIC16.td)
+
+tablegen(PIC16GenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(PIC16GenRegisterNames.inc -gen-register-enums)
+tablegen(PIC16GenRegisterInfo.inc -gen-register-desc)
+tablegen(PIC16GenInstrNames.inc -gen-instr-enums)
+tablegen(PIC16GenInstrInfo.inc -gen-instr-desc)
+tablegen(PIC16GenAsmWriter.inc -gen-asm-writer)
+tablegen(PIC16GenDAGISel.inc -gen-dag-isel)
+tablegen(PIC16GenCallingConv.inc -gen-callingconv)
+tablegen(PIC16GenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(PIC16
+ PIC16AsmPrinter.cpp
+ PIC16DebugInfo.cpp
+ PIC16InstrInfo.cpp
+ PIC16ISelDAGToDAG.cpp
+ PIC16ISelLowering.cpp
+ PIC16MemSelOpt.cpp
+ PIC16RegisterInfo.cpp
+ PIC16Subtarget.cpp
+ PIC16TargetAsmInfo.cpp
+ PIC16TargetMachine.cpp
+ )
diff --git a/lib/Target/PIC16/Makefile b/lib/Target/PIC16/Makefile
new file mode 100644
index 0000000..c429324
--- /dev/null
+++ b/lib/Target/PIC16/Makefile
@@ -0,0 +1,21 @@
+##===- lib/Target/PIC16/Makefile ---------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMPIC16
+TARGET = PIC16
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = PIC16GenRegisterInfo.h.inc PIC16GenRegisterNames.inc \
+ PIC16GenRegisterInfo.inc PIC16GenInstrNames.inc \
+ PIC16GenInstrInfo.inc PIC16GenAsmWriter.inc \
+ PIC16GenDAGISel.inc PIC16GenCallingConv.inc \
+ PIC16GenSubtarget.inc
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Target/PIC16/PIC16.h b/lib/Target/PIC16/PIC16.h
new file mode 100644
index 0000000..40bed2f
--- /dev/null
+++ b/lib/Target/PIC16/PIC16.h
@@ -0,0 +1,345 @@
+//===-- PIC16.h - Top-level interface for PIC16 representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM PIC16 back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_PIC16_H
+#define LLVM_TARGET_PIC16_H
+
+#include "llvm/Target/TargetMachine.h"
+#include <iosfwd>
+#include <cassert>
+#include <sstream>
+#include <cstring>
+#include <string>
+
+namespace llvm {
+ class PIC16TargetMachine;
+ class FunctionPass;
+ class MachineCodeEmitter;
+ class raw_ostream;
+
+namespace PIC16CC {
+ enum CondCodes {
+ EQ,
+ NE,
+ LT,
+ LE,
+ GT,
+ GE,
+ ULT,
+ UGT,
+ ULE,
+ UGE
+ };
+}
+ // A Central class to manage all ABI naming conventions.
+ // PAN - [P]ic16 [A]BI [N]ames
+ class PAN {
+ public:
+ // Map the name of the symbol to its section name.
+ // Current ABI:
+ // -----------------------------------------------------
+ // ALL Names are prefixed with the symobl '@'.
+ // ------------------------------------------------------
+ // Global variables do not have any '.' in their names.
+ // These are maily function names and global variable names.
+ // Example - @foo, @i
+ // -------------------------------------------------------
+ // Functions and auto variables.
+ // Names are mangled as <prefix><funcname>.<tag>.<varname>
+ // Where <prefix> is '@' and <tag> is any one of
+ // the following
+ // .auto. - an automatic var of a function.
+ // .temp. - temproray data of a function.
+ // .ret. - return value label for a function.
+ // .frame. - Frame label for a function where retval, args
+ // and temps are stored.
+ // .args. - Label used to pass arguments to a direct call.
+ // Example - Function name: @foo
+ // Its frame: @foo.frame.
+ // Its retval: @foo.ret.
+ // Its local vars: @foo.auto.a
+ // Its temp data: @foo.temp.
+ // Its arg passing: @foo.args.
+ //----------------------------------------------
+ // Libcall - compiler generated libcall names must start with .lib.
+ // This id will be used to emit extern decls for libcalls.
+ // Example - libcall name: @.lib.sra.i8
+ // To pass args: @.lib.sra.i8.args.
+ // To return val: @.lib.sra.i8.ret.
+ //----------------------------------------------
+ // SECTION Names
+ // uninitialized globals - @udata.<num>.#
+ // initialized globals - @idata.<num>.#
+ // Function frame - @<func>.frame_section.
+ // Function autos - @<func>.autos_section.
+ // Declarations - @section.0
+ //----------------------------------------------------------
+
+ // Tags used to mangle different names.
+ enum TAGS {
+ PREFIX_SYMBOL,
+ GLOBAL,
+ STATIC_LOCAL,
+ AUTOS_LABEL,
+ FRAME_LABEL,
+ RET_LABEL,
+ ARGS_LABEL,
+ TEMPS_LABEL,
+
+ LIBCALL,
+
+ FRAME_SECTION,
+ AUTOS_SECTION,
+ CODE_SECTION
+ };
+
+ // Textual names of the tags.
+ inline static const char *getTagName(TAGS tag) {
+ switch (tag) {
+ default: return "";
+ case PREFIX_SYMBOL: return "@";
+ case AUTOS_LABEL: return ".auto.";
+ case FRAME_LABEL: return ".frame.";
+ case TEMPS_LABEL: return ".temp.";
+ case ARGS_LABEL: return ".args.";
+ case RET_LABEL: return ".ret.";
+ case LIBCALL: return ".lib.";
+ case FRAME_SECTION: return ".frame_section.";
+ case AUTOS_SECTION: return ".autos_section.";
+ case CODE_SECTION: return ".code_section.";
+ }
+ }
+
+ // Get tag type for the Symbol.
+ inline static TAGS getSymbolTag(const std::string &Sym) {
+ if (Sym.find(getTagName(TEMPS_LABEL)) != std::string::npos)
+ return TEMPS_LABEL;
+
+ if (Sym.find(getTagName(FRAME_LABEL)) != std::string::npos)
+ return FRAME_LABEL;
+
+ if (Sym.find(getTagName(RET_LABEL)) != std::string::npos)
+ return RET_LABEL;
+
+ if (Sym.find(getTagName(ARGS_LABEL)) != std::string::npos)
+ return ARGS_LABEL;
+
+ if (Sym.find(getTagName(AUTOS_LABEL)) != std::string::npos)
+ return AUTOS_LABEL;
+
+ if (Sym.find(getTagName(LIBCALL)) != std::string::npos)
+ return LIBCALL;
+
+ // It does not have any Tag. So its a true global or static local.
+ if (Sym.find(".") == std::string::npos)
+ return GLOBAL;
+
+ // If a . is there, then it may be static local.
+ // We should mangle these as well in clang.
+ if (Sym.find(".") != std::string::npos)
+ return STATIC_LOCAL;
+
+ assert (0 && "Could not determine Symbol's tag");
+ }
+
+ // addPrefix - add prefix symbol to a name if there isn't one already.
+ inline static std::string addPrefix (const std::string &Name) {
+ std::string prefix = getTagName (PREFIX_SYMBOL);
+
+ // If this name already has a prefix, nothing to do.
+ if (Name.compare(0, prefix.size(), prefix) == 0)
+ return Name;
+
+ return prefix + Name;
+ }
+
+ // Get mangled func name from a mangled sym name.
+ // In all cases func name is the first component before a '.'.
+ static inline std::string getFuncNameForSym(const std::string &Sym1) {
+ assert (getSymbolTag(Sym1) != GLOBAL && "not belongs to a function");
+
+ std::string Sym = addPrefix(Sym1);
+
+ // Position of the . after func name. That's where func name ends.
+ size_t func_name_end = Sym.find ('.');
+
+ return Sym.substr (0, func_name_end);
+ }
+
+ // Get Frame start label for a func.
+ static std::string getFrameLabel(const std::string &Func) {
+ std::string Func1 = addPrefix(Func);
+ std::string tag = getTagName(FRAME_LABEL);
+ return Func1 + tag;
+ }
+
+ static std::string getRetvalLabel(const std::string &Func) {
+ std::string Func1 = addPrefix(Func);
+ std::string tag = getTagName(RET_LABEL);
+ return Func1 + tag;
+ }
+
+ static std::string getArgsLabel(const std::string &Func) {
+ std::string Func1 = addPrefix(Func);
+ std::string tag = getTagName(ARGS_LABEL);
+ return Func1 + tag;
+ }
+
+ static std::string getTempdataLabel(const std::string &Func) {
+ std::string Func1 = addPrefix(Func);
+ std::string tag = getTagName(TEMPS_LABEL);
+ return Func1 + tag;
+ }
+
+ static std::string getFrameSectionName(const std::string &Func) {
+ std::string Func1 = addPrefix(Func);
+ std::string tag = getTagName(FRAME_SECTION);
+ return Func1 + tag + "# UDATA_OVR";
+ }
+
+ static std::string getAutosSectionName(const std::string &Func) {
+ std::string Func1 = addPrefix(Func);
+ std::string tag = getTagName(AUTOS_SECTION);
+ return Func1 + tag + "# UDATA_OVR";
+ }
+
+ static std::string getCodeSectionName(const std::string &Func) {
+ std::string Func1 = addPrefix(Func);
+ std::string tag = getTagName(CODE_SECTION);
+ return Func1 + tag + "# CODE";
+ }
+
+ // udata and idata section names are generated by a given number.
+ // @udata.<num>.#
+ static std::string getUdataSectionName(unsigned num) {
+ std::ostringstream o;
+ o << getTagName(PREFIX_SYMBOL) << "udata." << num << ".# UDATA";
+ return o.str();
+ }
+
+ static std::string getIdataSectionName(unsigned num) {
+ std::ostringstream o;
+ o << getTagName(PREFIX_SYMBOL) << "idata." << num << ".# IDATA";
+ return o.str();
+ }
+
+ inline static bool isLocalName (const std::string &Name) {
+ if (getSymbolTag(Name) == AUTOS_LABEL)
+ return true;
+
+ return false;
+ }
+
+ inline static bool isLocalToFunc (std::string &Func, std::string &Var) {
+ if (! isLocalName(Var)) return false;
+
+ std::string Func1 = addPrefix(Func);
+ // Extract func name of the varilable.
+ const std::string &fname = getFuncNameForSym(Var);
+
+ if (fname.compare(Func1) == 0)
+ return true;
+
+ return false;
+ }
+
+
+ // Get the section for the given external symbol names.
+ // This tries to find the type (Tag) of the symbol from its mangled name
+ // and return appropriate section name for it.
+ static inline std::string getSectionNameForSym(const std::string &Sym1) {
+ std::string Sym = addPrefix(Sym1);
+
+ std::string SectionName;
+
+ std::string Fname = getFuncNameForSym (Sym);
+ TAGS id = getSymbolTag (Sym);
+
+ switch (id) {
+ default : assert (0 && "Could not determine external symbol type");
+ case FRAME_LABEL:
+ case RET_LABEL:
+ case TEMPS_LABEL:
+ case ARGS_LABEL: {
+ return getFrameSectionName(Fname);
+ }
+ case AUTOS_LABEL: {
+ return getAutosSectionName(Fname);
+ }
+ }
+ }
+ }; // class PAN.
+
+
+ // External symbol names require memory to live till the program end.
+ // So we have to allocate it and keep.
+ inline static const char *createESName (const std::string &name) {
+ char *tmpName = new char[name.size() + 1];
+ strcpy (tmpName, name.c_str());
+ return tmpName;
+ }
+
+
+
+ inline static const char *PIC16CondCodeToString(PIC16CC::CondCodes CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown condition code");
+ case PIC16CC::NE: return "ne";
+ case PIC16CC::EQ: return "eq";
+ case PIC16CC::LT: return "lt";
+ case PIC16CC::ULT: return "lt";
+ case PIC16CC::LE: return "le";
+ case PIC16CC::GT: return "gt";
+ case PIC16CC::UGT: return "gt";
+ case PIC16CC::GE: return "ge";
+ }
+ }
+
+ inline static bool isSignedComparison(PIC16CC::CondCodes CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown condition code");
+ case PIC16CC::NE:
+ case PIC16CC::EQ:
+ case PIC16CC::LT:
+ case PIC16CC::LE:
+ case PIC16CC::GE:
+ case PIC16CC::GT:
+ return true;
+ case PIC16CC::ULT:
+ case PIC16CC::UGT:
+ case PIC16CC::ULE:
+ case PIC16CC::UGE:
+ return false; // condition codes for unsigned comparison.
+ }
+ }
+
+
+
+ FunctionPass *createPIC16ISelDag(PIC16TargetMachine &TM);
+ FunctionPass *createPIC16CodePrinterPass(raw_ostream &OS,
+ PIC16TargetMachine &TM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose);
+ // Banksel optimzer pass.
+ FunctionPass *createPIC16MemSelOptimizerPass();
+} // end namespace llvm;
+
+// Defines symbolic names for PIC16 registers. This defines a mapping from
+// register name to register number.
+#include "PIC16GenRegisterNames.inc"
+
+// Defines symbolic names for the PIC16 instructions.
+#include "PIC16GenInstrNames.inc"
+
+#endif
diff --git a/lib/Target/PIC16/PIC16.td b/lib/Target/PIC16/PIC16.td
new file mode 100644
index 0000000..b2b9b1c
--- /dev/null
+++ b/lib/Target/PIC16/PIC16.td
@@ -0,0 +1,40 @@
+//===- PIC16.td - Describe the PIC16 Target Machine -----------*- tblgen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the PIC16 target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+include "PIC16RegisterInfo.td"
+include "PIC16InstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features.
+//===----------------------------------------------------------------------===//
+def FeatureCooper : SubtargetFeature<"cooper", "IsCooper", "true",
+ "PIC16 Cooper ISA Support">;
+
+//===----------------------------------------------------------------------===//
+// PIC16 supported processors.
+//===----------------------------------------------------------------------===//
+
+def : Processor<"generic", NoItineraries, []>;
+def : Processor<"cooper", NoItineraries, [FeatureCooper]>;
+
+
+def PIC16InstrInfo : InstrInfo {}
+
+def PIC16 : Target {
+ let InstructionSet = PIC16InstrInfo;
+}
+
diff --git a/lib/Target/PIC16/PIC16AsmPrinter.cpp b/lib/Target/PIC16/PIC16AsmPrinter.cpp
new file mode 100644
index 0000000..ef3bc4b
--- /dev/null
+++ b/lib/Target/PIC16/PIC16AsmPrinter.cpp
@@ -0,0 +1,404 @@
+//===-- PIC16AsmPrinter.cpp - PIC16 LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PIC16 assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PIC16AsmPrinter.h"
+#include "PIC16TargetAsmInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+
+using namespace llvm;
+
+#include "PIC16GenAsmWriter.inc"
+
+bool PIC16AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ printInstruction(MI);
+ return true;
+}
+
+/// runOnMachineFunction - This uses the printInstruction()
+/// method to print assembly for each instruction.
+///
+bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ this->MF = &MF;
+
+ // This calls the base class function required to be called at beginning
+ // of runOnMachineFunction.
+ SetupMachineFunction(MF);
+
+ // Get the mangled name.
+ const Function *F = MF.getFunction();
+ CurrentFnName = Mang->getValueName(F);
+
+ // Emit the function variables.
+ EmitFunctionFrame(MF);
+
+ // Emit function begin debug directives
+ DbgInfo.EmitFunctBeginDI(F);
+
+ EmitAutos(CurrentFnName);
+ const char *codeSection = PAN::getCodeSectionName(CurrentFnName).c_str();
+
+ const Section *fCodeSection = TAI->getNamedSection(codeSection,
+ SectionFlags::Code);
+ O << "\n";
+ // Start the Code Section.
+ SwitchToSection (fCodeSection);
+
+ // Emit the frame address of the function at the beginning of code.
+ O << "\tretlw low(" << PAN::getFrameLabel(CurrentFnName) << ")\n";
+ O << "\tretlw high(" << PAN::getFrameLabel(CurrentFnName) << ")\n";
+
+ // Emit function start label.
+ O << CurrentFnName << ":\n";
+
+ // For emitting line directives, we need to keep track of the current
+ // source line. When it changes then only emit the line directive.
+ unsigned CurLine = 0;
+ O << "\n";
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true);
+ O << '\n';
+ }
+
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Emit the line directive if source line changed.
+ const DebugLoc DL = II->getDebugLoc();
+ if (!DL.isUnknown()) {
+ unsigned line = MF.getDebugLocTuple(DL).Line;
+ if (line != CurLine) {
+ O << "\t.line " << line << "\n";
+ CurLine = line;
+ }
+ }
+
+ // Print the assembly for the instruction.
+ printMachineInstruction(II);
+ }
+ }
+
+ // Emit function end debug directives.
+ DbgInfo.EmitFunctEndDI(F, CurLine);
+ return false; // we didn't modify anything.
+}
+
+/// createPIC16CodePrinterPass - Returns a pass that prints the PIC16
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+///
+FunctionPass *llvm::createPIC16CodePrinterPass(raw_ostream &o,
+ PIC16TargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose) {
+ return new PIC16AsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+}
+
+
+// printOperand - print operand of insn.
+void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ O << TM.getRegisterInfo()->get(MO.getReg()).AsmName;
+ else
+ assert(0 && "not implemented");
+ return;
+
+ case MachineOperand::MO_Immediate:
+ O << (int)MO.getImm();
+ return;
+
+ case MachineOperand::MO_GlobalAddress: {
+ O << Mang->getValueName(MO.getGlobal());
+ break;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ const char *Sname = MO.getSymbolName();
+
+ // If its a libcall name, record it to decls section.
+ if (PAN::getSymbolTag(Sname) == PAN::LIBCALL) {
+ LibcallDecls.push_back(Sname);
+ }
+
+ O << Sname;
+ break;
+ }
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB());
+ return;
+
+ default:
+ assert(0 && " Operand type not supported.");
+ }
+}
+
+void PIC16AsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) {
+ int CC = (int)MI->getOperand(opNum).getImm();
+ O << PIC16CondCodeToString((PIC16CC::CondCodes)CC);
+}
+
+void PIC16AsmPrinter::printLibcallDecls(void) {
+ // If no libcalls used, return.
+ if (LibcallDecls.empty()) return;
+
+ O << TAI->getCommentString() << "External decls for libcalls - BEGIN." <<"\n";
+ // Remove duplicate entries.
+ LibcallDecls.sort();
+ LibcallDecls.unique();
+ for (std::list<const char*>::const_iterator I = LibcallDecls.begin();
+ I != LibcallDecls.end(); I++) {
+ O << TAI->getExternDirective() << *I << "\n";
+ O << TAI->getExternDirective() << PAN::getArgsLabel(*I) << "\n";
+ O << TAI->getExternDirective() << PAN::getRetvalLabel(*I) << "\n";
+ }
+ O << TAI->getCommentString() << "External decls for libcalls - END." <<"\n";
+}
+
+bool PIC16AsmPrinter::doInitialization (Module &M) {
+ bool Result = AsmPrinter::doInitialization(M);
+ DbgInfo.EmitFileDirective(M);
+
+ // FIXME:: This is temporary solution to generate the include file.
+ // The processor should be passed to llc as in input and the header file
+ // should be generated accordingly.
+ O << "\n\t#include P16F1937.INC\n";
+ MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ assert(MMI);
+ DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
+ assert(DW && "Dwarf Writer is not available");
+ DW->BeginModule(&M, MMI, O, this, TAI);
+
+ // Set the section names for all globals.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ I->setSection(TAI->SectionForGlobal(I)->getName());
+ }
+
+ EmitFunctionDecls(M);
+ EmitUndefinedVars(M);
+ EmitDefinedVars(M);
+ EmitIData(M);
+ EmitUData(M);
+ EmitRomData(M);
+ DbgInfo.PopulateFunctsDI(M);
+ return Result;
+}
+
+// Emit extern decls for functions imported from other modules, and emit
+// global declarations for function defined in this module and which are
+// available to other modules.
+void PIC16AsmPrinter::EmitFunctionDecls (Module &M) {
+ // Emit declarations for external functions.
+ O << TAI->getCommentString() << "Function Declarations - BEGIN." <<"\n";
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; I++) {
+ std::string Name = Mang->getValueName(I);
+ if (Name.compare("@abort") == 0)
+ continue;
+
+ // If it is llvm intrinsic call then don't emit
+ if (Name.find("llvm.") != std::string::npos)
+ continue;
+
+ if (! (I->isDeclaration() || I->hasExternalLinkage()))
+ continue;
+
+ const char *directive = I->isDeclaration() ? TAI->getExternDirective() :
+ TAI->getGlobalDirective();
+
+ O << directive << Name << "\n";
+ O << directive << PAN::getRetvalLabel(Name) << "\n";
+ O << directive << PAN::getArgsLabel(Name) << "\n";
+ }
+
+ O << TAI->getCommentString() << "Function Declarations - END." <<"\n";
+}
+
+// Emit variables imported from other Modules.
+void PIC16AsmPrinter::EmitUndefinedVars (Module &M)
+{
+ std::vector<const GlobalVariable*> Items = PTAI->ExternalVarDecls->Items;
+ if (! Items.size()) return;
+
+ O << "\n" << TAI->getCommentString() << "Imported Variables - BEGIN" << "\n";
+ for (unsigned j = 0; j < Items.size(); j++) {
+ O << TAI->getExternDirective() << Mang->getValueName(Items[j]) << "\n";
+ }
+ O << TAI->getCommentString() << "Imported Variables - END" << "\n";
+}
+
+// Emit variables defined in this module and are available to other modules.
+void PIC16AsmPrinter::EmitDefinedVars (Module &M)
+{
+ std::vector<const GlobalVariable*> Items = PTAI->ExternalVarDefs->Items;
+ if (! Items.size()) return;
+
+ O << "\n" << TAI->getCommentString() << "Exported Variables - BEGIN" << "\n";
+ for (unsigned j = 0; j < Items.size(); j++) {
+ O << TAI->getGlobalDirective() << Mang->getValueName(Items[j]) << "\n";
+ }
+ O << TAI->getCommentString() << "Exported Variables - END" << "\n";
+}
+
+// Emit initialized data placed in ROM.
+void PIC16AsmPrinter::EmitRomData (Module &M)
+{
+
+ std::vector<const GlobalVariable*> Items = PTAI->ROSection->Items;
+ if (! Items.size()) return;
+
+ // Print ROData ection.
+ O << "\n";
+ SwitchToSection(PTAI->ROSection->S_);
+ for (unsigned j = 0; j < Items.size(); j++) {
+ O << Mang->getValueName(Items[j]);
+ Constant *C = Items[j]->getInitializer();
+ int AddrSpace = Items[j]->getType()->getAddressSpace();
+ EmitGlobalConstant(C, AddrSpace);
+ }
+}
+
+bool PIC16AsmPrinter::doFinalization(Module &M) {
+ printLibcallDecls();
+ DbgInfo.EmitVarDebugInfo(M);
+ O << "\n\t" << ".EOF";
+ O << "\n\t" << "END\n";
+ bool Result = AsmPrinter::doFinalization(M);
+ return Result;
+}
+
+void PIC16AsmPrinter::EmitFunctionFrame(MachineFunction &MF) {
+ const Function *F = MF.getFunction();
+ std::string FuncName = Mang->getValueName(F);
+ const TargetData *TD = TM.getTargetData();
+ // Emit the data section name.
+ O << "\n";
+ const char *SectionName = PAN::getFrameSectionName(CurrentFnName).c_str();
+
+ const Section *fPDataSection = TAI->getNamedSection(SectionName,
+ SectionFlags::Writeable);
+ SwitchToSection(fPDataSection);
+
+ // Emit function frame label
+ O << PAN::getFrameLabel(CurrentFnName) << ":\n";
+
+ const Type *RetType = F->getReturnType();
+ unsigned RetSize = 0;
+ if (RetType->getTypeID() != Type::VoidTyID)
+ RetSize = TD->getTypeAllocSize(RetType);
+
+ //Emit function return value space
+ // FIXME: Do not emit RetvalLable when retsize is zero. To do this
+ // we will need to avoid printing a global directive for Retval label
+ // in emitExternandGloblas.
+ if(RetSize > 0)
+ O << PAN::getRetvalLabel(CurrentFnName) << " RES " << RetSize << "\n";
+ else
+ O << PAN::getRetvalLabel(CurrentFnName) << ": \n";
+
+ // Emit variable to hold the space for function arguments
+ unsigned ArgSize = 0;
+ for (Function::const_arg_iterator argi = F->arg_begin(),
+ arge = F->arg_end(); argi != arge ; ++argi) {
+ const Type *Ty = argi->getType();
+ ArgSize += TD->getTypeAllocSize(Ty);
+ }
+
+ O << PAN::getArgsLabel(CurrentFnName) << " RES " << ArgSize << "\n";
+
+ // Emit temporary space
+ int TempSize = PTLI->GetTmpSize();
+ if (TempSize > 0 )
+ O << PAN::getTempdataLabel(CurrentFnName) << " RES " << TempSize <<"\n";
+}
+
+void PIC16AsmPrinter::EmitIData (Module &M) {
+
+ // Print all IDATA sections.
+ std::vector <PIC16Section *>IDATASections = PTAI->IDATASections;
+ for (unsigned i = 0; i < IDATASections.size(); i++) {
+ O << "\n";
+ SwitchToSection(IDATASections[i]->S_);
+ std::vector<const GlobalVariable*> Items = IDATASections[i]->Items;
+ for (unsigned j = 0; j < Items.size(); j++) {
+ std::string Name = Mang->getValueName(Items[j]);
+ Constant *C = Items[j]->getInitializer();
+ int AddrSpace = Items[j]->getType()->getAddressSpace();
+ O << Name;
+ EmitGlobalConstant(C, AddrSpace);
+ }
+ }
+}
+
+void PIC16AsmPrinter::EmitUData (Module &M) {
+ const TargetData *TD = TM.getTargetData();
+
+ // Print all BSS sections.
+ std::vector <PIC16Section *>BSSSections = PTAI->BSSSections;
+ for (unsigned i = 0; i < BSSSections.size(); i++) {
+ O << "\n";
+ SwitchToSection(BSSSections[i]->S_);
+ std::vector<const GlobalVariable*> Items = BSSSections[i]->Items;
+ for (unsigned j = 0; j < Items.size(); j++) {
+ std::string Name = Mang->getValueName(Items[j]);
+ Constant *C = Items[j]->getInitializer();
+ const Type *Ty = C->getType();
+ unsigned Size = TD->getTypeAllocSize(Ty);
+
+ O << Name << " " <<"RES"<< " " << Size ;
+ O << "\n";
+ }
+ }
+}
+
+void PIC16AsmPrinter::EmitAutos (std::string FunctName)
+{
+ // Section names for all globals are already set.
+
+ const TargetData *TD = TM.getTargetData();
+
+ // Now print Autos section for this function.
+ std::string SectionName = PAN::getAutosSectionName(FunctName);
+ std::vector <PIC16Section *>AutosSections = PTAI->AutosSections;
+ for (unsigned i = 0; i < AutosSections.size(); i++) {
+ O << "\n";
+ if (AutosSections[i]->S_->getName() == SectionName) {
+ SwitchToSection(AutosSections[i]->S_);
+ std::vector<const GlobalVariable*> Items = AutosSections[i]->Items;
+ for (unsigned j = 0; j < Items.size(); j++) {
+ std::string VarName = Mang->getValueName(Items[j]);
+ Constant *C = Items[j]->getInitializer();
+ const Type *Ty = C->getType();
+ unsigned Size = TD->getTypeAllocSize(Ty);
+ // Emit memory reserve directive.
+ O << VarName << " RES " << Size << "\n";
+ }
+ break;
+ }
+ }
+}
+
diff --git a/lib/Target/PIC16/PIC16AsmPrinter.h b/lib/Target/PIC16/PIC16AsmPrinter.h
new file mode 100644
index 0000000..2545dfd
--- /dev/null
+++ b/lib/Target/PIC16/PIC16AsmPrinter.h
@@ -0,0 +1,70 @@
+//===-- PIC16AsmPrinter.h - PIC16 LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PIC16 assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PIC16ASMPRINTER_H
+#define PIC16ASMPRINTER_H
+
+#include "PIC16.h"
+#include "PIC16TargetMachine.h"
+#include "PIC16DebugInfo.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "PIC16TargetAsmInfo.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <list>
+#include <string>
+
+namespace llvm {
+ struct VISIBILITY_HIDDEN PIC16AsmPrinter : public AsmPrinter {
+ explicit PIC16AsmPrinter(raw_ostream &O, PIC16TargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : AsmPrinter(O, TM, T, OL, V), DbgInfo(O,T) {
+ PTLI = TM.getTargetLowering();
+ PTAI = static_cast<const PIC16TargetAsmInfo *> (T);
+ }
+ private :
+ virtual const char *getPassName() const {
+ return "PIC16 Assembly Printer";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+ void printOperand(const MachineInstr *MI, int opNum);
+ void printCCOperand(const MachineInstr *MI, int opNum);
+ bool printInstruction(const MachineInstr *MI); // definition autogenerated.
+ bool printMachineInstruction(const MachineInstr *MI);
+ void EmitFunctionDecls (Module &M);
+ void EmitUndefinedVars (Module &M);
+ void EmitDefinedVars (Module &M);
+ void EmitIData (Module &M);
+ void EmitUData (Module &M);
+ void EmitAutos (std::string FunctName);
+ void EmitRomData (Module &M);
+ void EmitFunctionFrame(MachineFunction &MF);
+ void printLibcallDecls(void);
+ protected:
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ private:
+ PIC16TargetLowering *PTLI;
+ PIC16DbgInfo DbgInfo;
+ const PIC16TargetAsmInfo *PTAI;
+ std::list<const char *> LibcallDecls; // List of extern decls.
+ };
+} // end of namespace
+
+#endif
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
new file mode 100644
index 0000000..4d43811
--- /dev/null
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@@ -0,0 +1,270 @@
+//===-- PIC16DebugInfo.cpp - Implementation for PIC16 Debug Information ======//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the helper functions for representing debug information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PIC16.h"
+#include "PIC16DebugInfo.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+PIC16DbgInfo::~PIC16DbgInfo() {
+ for(std::map<std::string, DISubprogram *>::iterator i = FunctNameMap.begin();
+ i!=FunctNameMap.end(); i++)
+ delete i->second;
+ FunctNameMap.clear();
+}
+
+void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo,
+ bool &HasAux, int Aux[],
+ std::string &TypeName) {
+ if (Ty.isBasicType(Ty.getTag())) {
+ std::string Name = "";
+ Ty.getName(Name);
+ unsigned short BaseTy = GetTypeDebugNumber(Name);
+ TypeNo = TypeNo << PIC16Dbg::S_BASIC;
+ TypeNo = TypeNo | (0xffff & BaseTy);
+ }
+ else if (Ty.isDerivedType(Ty.getTag())) {
+ switch(Ty.getTag())
+ {
+ case dwarf::DW_TAG_pointer_type:
+ TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+ TypeNo = TypeNo | PIC16Dbg::DT_PTR;
+ break;
+ default:
+ TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+ }
+ DIType BaseType = DIDerivedType(Ty.getGV()).getTypeDerivedFrom();
+ PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TypeName);
+ }
+ else if (Ty.isCompositeType(Ty.getTag())) {
+ switch (Ty.getTag()) {
+ case dwarf::DW_TAG_array_type: {
+ DICompositeType CTy = DICompositeType(Ty.getGV());
+ DIArray Elements = CTy.getTypeArray();
+ unsigned short size = 1;
+ unsigned short Dimension[4]={0,0,0,0};
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ if (Element.getTag() == dwarf::DW_TAG_subrange_type) {
+ TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+ TypeNo = TypeNo | PIC16Dbg::DT_ARY;
+ DISubrange SubRange = DISubrange(Element.getGV());
+ Dimension[i] = SubRange.getHi() - SubRange.getLo() + 1;
+ // Each dimension is represented by 2 bytes starting at byte 9.
+ Aux[8+i*2+0] = Dimension[i];
+ Aux[8+i*2+1] = Dimension[i] >> 8;
+ size = size * Dimension[i];
+ }
+ }
+ HasAux = true;
+ // In auxillary entry for array, 7th and 8th byte represent array size.
+ Aux[6] = size;
+ Aux[7] = size >> 8;
+ DIType BaseType = CTy.getTypeDerivedFrom();
+ PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TypeName);
+
+ break;
+ }
+ case dwarf:: DW_TAG_union_type:
+ case dwarf::DW_TAG_structure_type: {
+ DICompositeType CTy = DICompositeType(Ty.getGV());
+ TypeNo = TypeNo << PIC16Dbg::S_BASIC;
+ if (Ty.getTag() == dwarf::DW_TAG_structure_type)
+ TypeNo = TypeNo | PIC16Dbg::T_STRUCT;
+ else
+ TypeNo = TypeNo | PIC16Dbg::T_UNION;
+ CTy.getName(TypeName);
+ unsigned size = CTy.getSizeInBits()/8;
+ // 7th and 8th byte represent size.
+ HasAux = true;
+ Aux[6] = size;
+ Aux[7] = size >> 8;
+ break;
+ }
+ case dwarf::DW_TAG_enumeration_type: {
+ TypeNo = TypeNo << PIC16Dbg::S_BASIC;
+ TypeNo = TypeNo | PIC16Dbg::T_ENUM;
+ break;
+ }
+ default:
+ TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+ }
+ }
+ else {
+ TypeNo = PIC16Dbg::T_NULL;
+ HasAux = false;
+ }
+ return;
+}
+
+
+unsigned PIC16DbgInfo::GetTypeDebugNumber(std::string &type) {
+ if (type == "char")
+ return PIC16Dbg::T_CHAR;
+ else if (type == "short")
+ return PIC16Dbg::T_SHORT;
+ else if (type == "int")
+ return PIC16Dbg::T_INT;
+ else if (type == "long")
+ return PIC16Dbg::T_LONG;
+ else if (type == "unsigned char")
+ return PIC16Dbg::T_UCHAR;
+ else if (type == "unsigned short")
+ return PIC16Dbg::T_USHORT;
+ else if (type == "unsigned int")
+ return PIC16Dbg::T_UINT;
+ else if (type == "unsigned long")
+ return PIC16Dbg::T_ULONG;
+ else
+ return 0;
+}
+
+short PIC16DbgInfo::getClass(DIGlobalVariable DIGV) {
+ short ClassNo;
+ if (PAN::isLocalName(DIGV.getGlobal()->getName())) {
+ // Generating C_AUTO here fails due to error in linker. Change it once
+ // linker is fixed.
+ ClassNo = PIC16Dbg::C_STAT;
+ }
+ else if (DIGV.isLocalToUnit())
+ ClassNo = PIC16Dbg::C_STAT;
+ else
+ ClassNo = PIC16Dbg::C_EXT;
+ return ClassNo;
+}
+
+void PIC16DbgInfo::PopulateFunctsDI(Module &M) {
+ GlobalVariable *Root = M.getGlobalVariable("llvm.dbg.subprograms");
+ if (!Root)
+ return;
+ Constant *RootC = cast<Constant>(*Root->use_begin());
+
+ for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end();
+ UI != UE; ++UI)
+ for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end();
+ UUI != UUE; ++UUI) {
+ GlobalVariable *GVSP = cast<GlobalVariable>(*UUI);
+ DISubprogram *SP = new DISubprogram(GVSP);
+ std::string Name;
+ SP->getLinkageName(Name);
+ FunctNameMap[Name] = SP;
+ }
+ return;
+}
+
+DISubprogram* PIC16DbgInfo::getFunctDI(std::string FunctName) {
+ return FunctNameMap[FunctName];
+}
+
+void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) {
+ std::string FunctName = F->getName();
+ DISubprogram *SP = getFunctDI(FunctName);
+ if (SP) {
+ std::string FunctBeginSym = ".bf." + FunctName;
+ std::string BlockBeginSym = ".bb." + FunctName;
+
+ int FunctBeginLine = SP->getLineNumber();
+ int BFAux[PIC16Dbg::AuxSize] = {0};
+ BFAux[4] = FunctBeginLine;
+ BFAux[5] = FunctBeginLine >> 8;
+ // Emit debug directives for beginning of function.
+ EmitSymbol(FunctBeginSym, PIC16Dbg::C_FCN);
+ EmitAuxEntry(FunctBeginSym, BFAux, PIC16Dbg::AuxSize);
+ EmitSymbol(BlockBeginSym, PIC16Dbg::C_BLOCK);
+ EmitAuxEntry(BlockBeginSym, BFAux, PIC16Dbg::AuxSize);
+ }
+}
+
+void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) {
+ std::string FunctName = F->getName();
+ DISubprogram *SP = getFunctDI(FunctName);
+ if (SP) {
+ std::string FunctEndSym = ".ef." + FunctName;
+ std::string BlockEndSym = ".eb." + FunctName;
+
+ // Emit debug directives for end of function.
+ EmitSymbol(BlockEndSym, PIC16Dbg::C_BLOCK);
+ int EFAux[PIC16Dbg::AuxSize] = {0};
+ // 5th and 6th byte stand for line number.
+ EFAux[4] = Line;
+ EFAux[5] = Line >> 8;
+ EmitAuxEntry(BlockEndSym, EFAux, PIC16Dbg::AuxSize);
+ EmitSymbol(FunctEndSym, PIC16Dbg::C_FCN);
+ EmitAuxEntry(FunctEndSym, EFAux, PIC16Dbg::AuxSize);
+ }
+}
+
+/// EmitAuxEntry - Emit Auxiliary debug information.
+///
+void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int num) {
+ O << "\n\t.dim " << VarName << ", 1" ;
+ for (int i = 0; i<num; i++)
+ O << "," << Aux[i];
+}
+
+void PIC16DbgInfo::EmitSymbol(std::string Name, int Class) {
+ O << "\n\t" << ".def "<< Name << ", debug, class = " << Class;
+}
+
+void PIC16DbgInfo::EmitVarDebugInfo(Module &M) {
+ GlobalVariable *Root = M.getGlobalVariable("llvm.dbg.global_variables");
+ if (!Root)
+ return;
+
+ Constant *RootC = cast<Constant>(*Root->use_begin());
+ for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end();
+ UI != UE; ++UI) {
+ for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end();
+ UUI != UUE; ++UUI) {
+ DIGlobalVariable DIGV(cast<GlobalVariable>(*UUI));
+ DIType Ty = DIGV.getType();
+ unsigned short TypeNo = 0;
+ bool HasAux = false;
+ int Aux[PIC16Dbg::AuxSize] = { 0 };
+ std::string TypeName = "";
+ std::string VarName = TAI->getGlobalPrefix()+DIGV.getGlobal()->getName();
+ PopulateDebugInfo(Ty, TypeNo, HasAux, Aux, TypeName);
+ // Emit debug info only if type information is availaible.
+ if (TypeNo != PIC16Dbg::T_NULL) {
+ O << "\n\t.type " << VarName << ", " << TypeNo;
+ short ClassNo = getClass(DIGV);
+ O << "\n\t.class " << VarName << ", " << ClassNo;
+ if (HasAux) {
+ if (TypeName != "") {
+ // Emit debug info for structure and union objects after
+ // .dim directive supports structure/union tag name in aux entry.
+ /* O << "\n\t.dim " << VarName << ", 1," << TypeName;
+ for (int i = 0; i<PIC16Dbg::AuxSize; i++)
+ O << "," << Aux[i];*/
+ }
+ else {
+ EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize);
+ }
+ }
+ }
+ }
+ }
+ O << "\n";
+}
+
+void PIC16DbgInfo::EmitFileDirective(Module &M) {
+ GlobalVariable *CU = M.getNamedGlobal("llvm.dbg.compile_unit");
+ if (CU) {
+ DICompileUnit DIUnit(CU);
+ std::string Dir, FN;
+ O << "\n\t.file\t\"" << DIUnit.getDirectory(Dir) <<"/"
+ << DIUnit.getFilename(FN) << "\"" ;
+ }
+}
diff --git a/lib/Target/PIC16/PIC16DebugInfo.h b/lib/Target/PIC16/PIC16DebugInfo.h
new file mode 100644
index 0000000..96b23da
--- /dev/null
+++ b/lib/Target/PIC16/PIC16DebugInfo.h
@@ -0,0 +1,114 @@
+//===-- PIC16DebugInfo.h - Interfaces for PIC16 Debug Information ============//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the helper functions for representing debug information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PIC16DBG_H
+#define PIC16DBG_H
+
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include <map>
+
+namespace llvm {
+ namespace PIC16Dbg {
+ enum VarType {
+ T_NULL,
+ T_VOID,
+ T_CHAR,
+ T_SHORT,
+ T_INT,
+ T_LONG,
+ T_FLOAT,
+ T_DOUBLE,
+ T_STRUCT,
+ T_UNION,
+ T_ENUM,
+ T_MOE,
+ T_UCHAR,
+ T_USHORT,
+ T_UINT,
+ T_ULONG
+ };
+ enum DerivedType {
+ DT_NONE,
+ DT_PTR,
+ DT_FCN,
+ DT_ARY
+ };
+ enum TypeSize {
+ S_BASIC = 5,
+ S_DERIVED = 3
+ };
+ enum DbgClass {
+ C_NULL,
+ C_AUTO,
+ C_EXT,
+ C_STAT,
+ C_REG,
+ C_EXTDEF,
+ C_LABEL,
+ C_ULABEL,
+ C_MOS,
+ C_ARG,
+ C_STRTAG,
+ C_MOU,
+ C_UNTAG,
+ C_TPDEF,
+ C_USTATIC,
+ C_ENTAG,
+ C_MOE,
+ C_REGPARM,
+ C_FIELD,
+ C_AUTOARG,
+ C_LASTENT,
+ C_BLOCK = 100,
+ C_FCN,
+ C_EOS,
+ C_FILE,
+ C_LINE,
+ C_ALIAS,
+ C_HIDDEN,
+ C_EOF,
+ C_LIST,
+ C_SECTION,
+ C_EFCN = 255
+ };
+ enum SymbolSize {
+ AuxSize =20
+ };
+ }
+
+ class raw_ostream;
+
+ class PIC16DbgInfo {
+ std::map <std::string, DISubprogram *> FunctNameMap;
+ raw_ostream &O;
+ const TargetAsmInfo *TAI;
+ public:
+ PIC16DbgInfo(raw_ostream &o, const TargetAsmInfo *T) : O(o), TAI(T) {}
+ ~PIC16DbgInfo();
+ void PopulateDebugInfo(DIType Ty, unsigned short &TypeNo, bool &HasAux,
+ int Aux[], std::string &TypeName);
+ unsigned GetTypeDebugNumber(std::string &type);
+ short getClass(DIGlobalVariable DIGV);
+ void PopulateFunctsDI(Module &M);
+ DISubprogram *getFunctDI(std::string FunctName);
+ void EmitFunctBeginDI(const Function *F);
+ void EmitFunctEndDI(const Function *F, unsigned Line);
+ void EmitAuxEntry(const std::string VarName, int Aux[], int num);
+ inline void EmitSymbol(std::string Name, int Class);
+ void EmitVarDebugInfo(Module &M);
+ void EmitFileDirective(Module &M);
+ };
+} // end namespace llvm;
+#endif
diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
new file mode 100644
index 0000000..6c2b8ec
--- /dev/null
+++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
@@ -0,0 +1,59 @@
+//===-- PIC16ISelDAGToDAG.cpp - A dag to dag inst selector for PIC16 ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the PIC16 target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pic16-isel"
+
+#include "PIC16ISelDAGToDAG.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+/// createPIC16ISelDag - This pass converts a legalized DAG into a
+/// PIC16-specific DAG, ready for instruction scheduling.
+FunctionPass *llvm::createPIC16ISelDag(PIC16TargetMachine &TM) {
+ return new PIC16DAGToDAGISel(TM);
+}
+
+
+/// InstructionSelect - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void PIC16DAGToDAGISel::InstructionSelect() {
+ DEBUG(BB->dump());
+ SelectRoot(*CurDAG);
+ CurDAG->RemoveDeadNodes();
+}
+
+/// Select - Select instructions not customized! Used for
+/// expanded, promoted and normal instructions.
+SDNode* PIC16DAGToDAGISel::Select(SDValue N) {
+
+ // Select the default instruction.
+ SDNode *ResNode = SelectCode(N);
+
+ return ResNode;
+}
+
+
+// SelectDirectAddr - Match a direct address for DAG.
+// A direct address could be a globaladdress or externalsymbol.
+bool PIC16DAGToDAGISel::SelectDirectAddr(SDValue Op, SDValue N,
+ SDValue &Address) {
+ // Return true if TGA or ES.
+ if (N.getOpcode() == ISD::TargetGlobalAddress
+ || N.getOpcode() == ISD::TargetExternalSymbol) {
+ Address = N;
+ return true;
+ }
+
+ return false;
+}
diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
new file mode 100644
index 0000000..83abed3
--- /dev/null
+++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
@@ -0,0 +1,60 @@
+//===-- PIC16ISelDAGToDAG.cpp - A dag to dag inst selector for PIC16 ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the PIC16 target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pic16-isel"
+
+#include "PIC16.h"
+#include "PIC16ISelLowering.h"
+#include "PIC16RegisterInfo.h"
+#include "PIC16TargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Intrinsics.h"
+using namespace llvm;
+
+namespace {
+
+class VISIBILITY_HIDDEN PIC16DAGToDAGISel : public SelectionDAGISel {
+
+ /// TM - Keep a reference to PIC16TargetMachine.
+ PIC16TargetMachine &TM;
+
+ /// PIC16Lowering - This object fully describes how to lower LLVM code to an
+ /// PIC16-specific SelectionDAG.
+ PIC16TargetLowering PIC16Lowering;
+
+public:
+ explicit PIC16DAGToDAGISel(PIC16TargetMachine &tm) :
+ SelectionDAGISel(tm),
+ TM(tm), PIC16Lowering(*TM.getTargetLowering()) {}
+
+ // Pass Name
+ virtual const char *getPassName() const {
+ return "PIC16 DAG->DAG Pattern Instruction Selection";
+ }
+
+ virtual void InstructionSelect();
+
+private:
+ // Include the pieces autogenerated from the target description.
+#include "PIC16GenDAGISel.inc"
+
+ SDNode *Select(SDValue N);
+
+ // Match direct address complex pattern.
+ bool SelectDirectAddr(SDValue Op, SDValue N, SDValue &Address);
+
+};
+
+}
+
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
new file mode 100644
index 0000000..92fdcb2
--- /dev/null
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -0,0 +1,1756 @@
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that PIC16 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pic16-lower"
+
+#include "PIC16ISelLowering.h"
+#include "PIC16TargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Function.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+
+using namespace llvm;
+
+static const char *getIntrinsicName(unsigned opcode) {
+ std::string Basename;
+ switch(opcode) {
+ default: assert (0 && "do not know intrinsic name");
+ case PIC16ISD::SRA_I8: Basename = "sra.i8"; break;
+ case RTLIB::SRA_I16: Basename = "sra.i16"; break;
+ case RTLIB::SRA_I32: Basename = "sra.i32"; break;
+
+ case PIC16ISD::SLL_I8: Basename = "sll.i8"; break;
+ case RTLIB::SHL_I16: Basename = "sll.i16"; break;
+ case RTLIB::SHL_I32: Basename = "sll.i32"; break;
+
+ case PIC16ISD::SRL_I8: Basename = "srl.i8"; break;
+ case RTLIB::SRL_I16: Basename = "srl.i16"; break;
+ case RTLIB::SRL_I32: Basename = "srl.i32"; break;
+
+ case PIC16ISD::MUL_I8: Basename = "mul.i8"; break;
+ case RTLIB::MUL_I16: Basename = "mul.i16"; break;
+ case RTLIB::MUL_I32: Basename = "mul.i32"; break;
+ }
+
+ std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
+ std::string tagname = PAN::getTagName(PAN::LIBCALL);
+ std::string Fullname = prefix + tagname + Basename;
+
+ // The name has to live through program life.
+ char *tmp = new char[Fullname.size() + 1];
+ strcpy (tmp, Fullname.c_str());
+
+ return tmp;
+}
+
+// PIC16TargetLowering Constructor.
+PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
+ : TargetLowering(TM), TmpSize(0) {
+
+ Subtarget = &TM.getSubtarget<PIC16Subtarget>();
+
+ addRegisterClass(MVT::i8, PIC16::GPRRegisterClass);
+
+ setShiftAmountType(MVT::i8);
+ setShiftAmountFlavor(Extend);
+
+ // SRA library call names
+ setPIC16LibcallName(PIC16ISD::SRA_I8, getIntrinsicName(PIC16ISD::SRA_I8));
+ setLibcallName(RTLIB::SRA_I16, getIntrinsicName(RTLIB::SRA_I16));
+ setLibcallName(RTLIB::SRA_I32, getIntrinsicName(RTLIB::SRA_I32));
+
+ // SHL library call names
+ setPIC16LibcallName(PIC16ISD::SLL_I8, getIntrinsicName(PIC16ISD::SLL_I8));
+ setLibcallName(RTLIB::SHL_I16, getIntrinsicName(RTLIB::SHL_I16));
+ setLibcallName(RTLIB::SHL_I32, getIntrinsicName(RTLIB::SHL_I32));
+
+ // SRL library call names
+ setPIC16LibcallName(PIC16ISD::SRL_I8, getIntrinsicName(PIC16ISD::SRL_I8));
+ setLibcallName(RTLIB::SRL_I16, getIntrinsicName(RTLIB::SRL_I16));
+ setLibcallName(RTLIB::SRL_I32, getIntrinsicName(RTLIB::SRL_I32));
+
+ // MUL Library call names
+ setPIC16LibcallName(PIC16ISD::MUL_I8, getIntrinsicName(PIC16ISD::MUL_I8));
+ setLibcallName(RTLIB::MUL_I16, getIntrinsicName(RTLIB::MUL_I16));
+ setLibcallName(RTLIB::MUL_I32, getIntrinsicName(RTLIB::MUL_I32));
+
+ setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);
+ setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom);
+
+ setOperationAction(ISD::LOAD, MVT::i8, Legal);
+ setOperationAction(ISD::LOAD, MVT::i16, Custom);
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+
+ setOperationAction(ISD::STORE, MVT::i8, Legal);
+ setOperationAction(ISD::STORE, MVT::i16, Custom);
+ setOperationAction(ISD::STORE, MVT::i32, Custom);
+
+ setOperationAction(ISD::ADDE, MVT::i8, Custom);
+ setOperationAction(ISD::ADDC, MVT::i8, Custom);
+ setOperationAction(ISD::SUBE, MVT::i8, Custom);
+ setOperationAction(ISD::SUBC, MVT::i8, Custom);
+ setOperationAction(ISD::ADD, MVT::i8, Custom);
+ setOperationAction(ISD::ADD, MVT::i16, Custom);
+
+ setOperationAction(ISD::OR, MVT::i8, Custom);
+ setOperationAction(ISD::AND, MVT::i8, Custom);
+ setOperationAction(ISD::XOR, MVT::i8, Custom);
+
+ setOperationAction(ISD::FrameIndex, MVT::i16, Custom);
+ setOperationAction(ISD::CALL, MVT::i16, Custom);
+ setOperationAction(ISD::RET, MVT::Other, Custom);
+
+ setOperationAction(ISD::MUL, MVT::i8, Custom);
+ setOperationAction(ISD::MUL, MVT::i16, Expand);
+ setOperationAction(ISD::MUL, MVT::i32, Expand);
+
+ setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::MULHU, MVT::i8, Expand);
+ setOperationAction(ISD::MULHU, MVT::i16, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::i8, Expand);
+ setOperationAction(ISD::MULHS, MVT::i16, Expand);
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+
+ setOperationAction(ISD::SRA, MVT::i8, Custom);
+ setOperationAction(ISD::SRA, MVT::i16, Expand);
+ setOperationAction(ISD::SRA, MVT::i32, Expand);
+ setOperationAction(ISD::SHL, MVT::i8, Custom);
+ setOperationAction(ISD::SHL, MVT::i16, Expand);
+ setOperationAction(ISD::SHL, MVT::i32, Expand);
+ setOperationAction(ISD::SRL, MVT::i8, Custom);
+ setOperationAction(ISD::SRL, MVT::i16, Expand);
+ setOperationAction(ISD::SRL, MVT::i32, Expand);
+
+ // PIC16 does not support shift parts
+ setOperationAction(ISD::SRA_PARTS, MVT::i8, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i16, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i8, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i16, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i8, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i16, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+
+ // PIC16 does not have a SETCC, expand it to SELECT_CC.
+ setOperationAction(ISD::SETCC, MVT::i8, Expand);
+ setOperationAction(ISD::SELECT, MVT::i8, Expand);
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+
+ setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
+ setOperationAction(ISD::BR_CC, MVT::i8, Custom);
+
+ //setOperationAction(ISD::TRUNCATE, MVT::i16, Custom);
+ setTruncStoreAction(MVT::i16, MVT::i8, Custom);
+
+ // Now deduce the information based on the above mentioned
+ // actions
+ computeRegisterProperties();
+}
+
+// getOutFlag - Extract the flag result if the Op has it.
+static SDValue getOutFlag(SDValue &Op) {
+ // Flag is the last value of the node.
+ SDValue Flag = Op.getValue(Op.getNode()->getNumValues() - 1);
+
+ assert (Flag.getValueType() == MVT::Flag
+ && "Node does not have an out Flag");
+
+ return Flag;
+}
+// Get the TmpOffset for FrameIndex
+unsigned PIC16TargetLowering::GetTmpOffsetForFI(unsigned FI, unsigned size) {
+ std::map<unsigned, unsigned>::iterator
+ MapIt = FiTmpOffsetMap.find(FI);
+ if (MapIt != FiTmpOffsetMap.end())
+ return MapIt->second;
+
+ // This FI (FrameIndex) is not yet mapped, so map it
+ FiTmpOffsetMap[FI] = TmpSize;
+ TmpSize += size;
+ return FiTmpOffsetMap[FI];
+}
+
+// To extract chain value from the SDValue Nodes
+// This function will help to maintain the chain extracting
+// code at one place. In case of any change in future it will
+// help maintain the code.
+static SDValue getChain(SDValue &Op) {
+ SDValue Chain = Op.getValue(Op.getNode()->getNumValues() - 1);
+
+ // If the last value returned in Flag then the chain is
+ // second last value returned.
+ if (Chain.getValueType() == MVT::Flag)
+ Chain = Op.getValue(Op.getNode()->getNumValues() - 2);
+
+ // All nodes may not produce a chain. Therefore following assert
+ // verifies that the node is returning a chain only.
+ assert (Chain.getValueType() == MVT::Other
+ && "Node does not have a chain");
+
+ return Chain;
+}
+
+/// PopulateResults - Helper function to LowerOperation.
+/// If a node wants to return multiple results after lowering,
+/// it stuffs them into an array of SDValue called Results.
+
+static void PopulateResults(SDValue N, SmallVectorImpl<SDValue>&Results) {
+ if (N.getOpcode() == ISD::MERGE_VALUES) {
+ int NumResults = N.getNumOperands();
+ for( int i = 0; i < NumResults; i++)
+ Results.push_back(N.getOperand(i));
+ }
+ else
+ Results.push_back(N);
+}
+
+MVT PIC16TargetLowering::getSetCCResultType(MVT ValType) const {
+ return MVT::i8;
+}
+
+/// The type legalizer framework of generating legalizer can generate libcalls
+/// only when the operand/result types are illegal.
+/// PIC16 needs to generate libcalls even for the legal types (i8) for some ops.
+/// For example an arithmetic right shift. These functions are used to lower
+/// such operations that generate libcall for legal types.
+
+void
+PIC16TargetLowering::setPIC16LibcallName(PIC16ISD::PIC16Libcall Call,
+ const char *Name) {
+ PIC16LibcallNames[Call] = Name;
+}
+
+const char *
+PIC16TargetLowering::getPIC16LibcallName(PIC16ISD::PIC16Libcall Call) {
+ return PIC16LibcallNames[Call];
+}
+
+SDValue
+PIC16TargetLowering::MakePIC16Libcall(PIC16ISD::PIC16Libcall Call,
+ MVT RetVT, const SDValue *Ops,
+ unsigned NumOps, bool isSigned,
+ SelectionDAG &DAG, DebugLoc dl) {
+
+ TargetLowering::ArgListTy Args;
+ Args.reserve(NumOps);
+
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ Entry.Node = Ops[i];
+ Entry.Ty = Entry.Node.getValueType().getTypeForMVT();
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(getPIC16LibcallName(Call), MVT::i8);
+
+ const Type *RetTy = RetVT.getTypeForMVT();
+ std::pair<SDValue,SDValue> CallInfo =
+ LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+ false, CallingConv::C, false, Callee, Args, DAG, dl);
+
+ return CallInfo.first;
+}
+
+const char *PIC16TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return NULL;
+ case PIC16ISD::Lo: return "PIC16ISD::Lo";
+ case PIC16ISD::Hi: return "PIC16ISD::Hi";
+ case PIC16ISD::MTLO: return "PIC16ISD::MTLO";
+ case PIC16ISD::MTHI: return "PIC16ISD::MTHI";
+ case PIC16ISD::MTPCLATH: return "PIC16ISD::MTPCLATH";
+ case PIC16ISD::PIC16Connect: return "PIC16ISD::PIC16Connect";
+ case PIC16ISD::Banksel: return "PIC16ISD::Banksel";
+ case PIC16ISD::PIC16Load: return "PIC16ISD::PIC16Load";
+ case PIC16ISD::PIC16LdArg: return "PIC16ISD::PIC16LdArg";
+ case PIC16ISD::PIC16LdWF: return "PIC16ISD::PIC16LdWF";
+ case PIC16ISD::PIC16Store: return "PIC16ISD::PIC16Store";
+ case PIC16ISD::PIC16StWF: return "PIC16ISD::PIC16StWF";
+ case PIC16ISD::BCF: return "PIC16ISD::BCF";
+ case PIC16ISD::LSLF: return "PIC16ISD::LSLF";
+ case PIC16ISD::LRLF: return "PIC16ISD::LRLF";
+ case PIC16ISD::RLF: return "PIC16ISD::RLF";
+ case PIC16ISD::RRF: return "PIC16ISD::RRF";
+ case PIC16ISD::CALL: return "PIC16ISD::CALL";
+ case PIC16ISD::CALLW: return "PIC16ISD::CALLW";
+ case PIC16ISD::SUBCC: return "PIC16ISD::SUBCC";
+ case PIC16ISD::SELECT_ICC: return "PIC16ISD::SELECT_ICC";
+ case PIC16ISD::BRCOND: return "PIC16ISD::BRCOND";
+ case PIC16ISD::Dummy: return "PIC16ISD::Dummy";
+ }
+}
+
+void PIC16TargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) {
+
+ switch (N->getOpcode()) {
+ case ISD::GlobalAddress:
+ Results.push_back(ExpandGlobalAddress(N, DAG));
+ return;
+ case ISD::ExternalSymbol:
+ Results.push_back(ExpandExternalSymbol(N, DAG));
+ return;
+ case ISD::STORE:
+ Results.push_back(ExpandStore(N, DAG));
+ return;
+ case ISD::LOAD:
+ PopulateResults(ExpandLoad(N, DAG), Results);
+ return;
+ case ISD::ADD:
+ // Results.push_back(ExpandAdd(N, DAG));
+ return;
+ case ISD::FrameIndex:
+ Results.push_back(ExpandFrameIndex(N, DAG));
+ return;
+ default:
+ assert (0 && "not implemented");
+ return;
+ }
+}
+
+SDValue PIC16TargetLowering::ExpandFrameIndex(SDNode *N, SelectionDAG &DAG) {
+
+ // Currently handling FrameIndex of size MVT::i16 only
+ // One example of this scenario is when return value is written on
+ // FrameIndex#0
+
+ if (N->getValueType(0) != MVT::i16)
+ return SDValue();
+
+ // Expand the FrameIndex into ExternalSymbol and a Constant node
+ // The constant will represent the frame index number
+ // Get the current function frame
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Function *Func = MF.getFunction();
+ const std::string Name = Func->getName();
+
+ FrameIndexSDNode *FR = dyn_cast<FrameIndexSDNode>(SDValue(N,0));
+ // FIXME there isn't really debug info here
+ DebugLoc dl = FR->getDebugLoc();
+ int Index = FR->getIndex();
+
+ // Expand FrameIndex like GlobalAddress and ExternalSymbol
+ // Also use Offset field for lo and hi parts. The default
+ // offset is zero.
+ SDValue Offset = DAG.getConstant(0, MVT::i8);
+ SDValue FI = DAG.getTargetFrameIndex(Index, MVT::i8);
+ SDValue Lo = DAG.getNode(PIC16ISD::Lo, dl, MVT::i8, FI, Offset);
+ SDValue Hi = DAG.getNode(PIC16ISD::Hi, dl, MVT::i8, FI, Offset);
+ return DAG.getNode(ISD::BUILD_PAIR, dl, N->getValueType(0), Lo, Hi);
+}
+
+
+SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) {
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ SDValue Chain = St->getChain();
+ SDValue Src = St->getValue();
+ SDValue Ptr = St->getBasePtr();
+ MVT ValueType = Src.getValueType();
+ unsigned StoreOffset = 0;
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue PtrLo, PtrHi;
+ LegalizeAddress(Ptr, DAG, PtrLo, PtrHi, StoreOffset, dl);
+
+ if (ValueType == MVT::i8) {
+ return DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other, Chain, Src,
+ PtrLo, PtrHi,
+ DAG.getConstant (0 + StoreOffset, MVT::i8));
+ }
+ else if (ValueType == MVT::i16) {
+ // Get the Lo and Hi parts from MERGE_VALUE or BUILD_PAIR.
+ SDValue SrcLo, SrcHi;
+ GetExpandedParts(Src, DAG, SrcLo, SrcHi);
+ SDValue ChainLo = Chain, ChainHi = Chain;
+ if (Chain.getOpcode() == ISD::TokenFactor) {
+ ChainLo = Chain.getOperand(0);
+ ChainHi = Chain.getOperand(1);
+ }
+ SDValue Store1 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other,
+ ChainLo,
+ SrcLo, PtrLo, PtrHi,
+ DAG.getConstant (0 + StoreOffset, MVT::i8));
+
+ SDValue Store2 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainHi,
+ SrcHi, PtrLo, PtrHi,
+ DAG.getConstant (1 + StoreOffset, MVT::i8));
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, getChain(Store1),
+ getChain(Store2));
+ }
+ else if (ValueType == MVT::i32) {
+ // Get the Lo and Hi parts from MERGE_VALUE or BUILD_PAIR.
+ SDValue SrcLo, SrcHi;
+ GetExpandedParts(Src, DAG, SrcLo, SrcHi);
+
+ // Get the expanded parts of each of SrcLo and SrcHi.
+ SDValue SrcLo1, SrcLo2, SrcHi1, SrcHi2;
+ GetExpandedParts(SrcLo, DAG, SrcLo1, SrcLo2);
+ GetExpandedParts(SrcHi, DAG, SrcHi1, SrcHi2);
+
+ SDValue ChainLo = Chain, ChainHi = Chain;
+ if (Chain.getOpcode() == ISD::TokenFactor) {
+ ChainLo = Chain.getOperand(0);
+ ChainHi = Chain.getOperand(1);
+ }
+ SDValue ChainLo1 = ChainLo, ChainLo2 = ChainLo, ChainHi1 = ChainHi,
+ ChainHi2 = ChainHi;
+ if (ChainLo.getOpcode() == ISD::TokenFactor) {
+ ChainLo1 = ChainLo.getOperand(0);
+ ChainLo2 = ChainLo.getOperand(1);
+ }
+ if (ChainHi.getOpcode() == ISD::TokenFactor) {
+ ChainHi1 = ChainHi.getOperand(0);
+ ChainHi2 = ChainHi.getOperand(1);
+ }
+ SDValue Store1 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other,
+ ChainLo1,
+ SrcLo1, PtrLo, PtrHi,
+ DAG.getConstant (0 + StoreOffset, MVT::i8));
+
+ SDValue Store2 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainLo2,
+ SrcLo2, PtrLo, PtrHi,
+ DAG.getConstant (1 + StoreOffset, MVT::i8));
+
+ SDValue Store3 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainHi1,
+ SrcHi1, PtrLo, PtrHi,
+ DAG.getConstant (2 + StoreOffset, MVT::i8));
+
+ SDValue Store4 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainHi2,
+ SrcHi2, PtrLo, PtrHi,
+ DAG.getConstant (3 + StoreOffset, MVT::i8));
+
+ SDValue RetLo = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ getChain(Store1), getChain(Store2));
+ SDValue RetHi = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ getChain(Store3), getChain(Store4));
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, RetLo, RetHi);
+
+ }
+ else {
+ assert (0 && "value type not supported");
+ return SDValue();
+ }
+}
+
+SDValue PIC16TargetLowering::ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG)
+{
+ ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(SDValue(N, 0));
+ // FIXME there isn't really debug info here
+ DebugLoc dl = ES->getDebugLoc();
+
+ SDValue TES = DAG.getTargetExternalSymbol(ES->getSymbol(), MVT::i8);
+ SDValue Offset = DAG.getConstant(0, MVT::i8);
+ SDValue Lo = DAG.getNode(PIC16ISD::Lo, dl, MVT::i8, TES, Offset);
+ SDValue Hi = DAG.getNode(PIC16ISD::Hi, dl, MVT::i8, TES, Offset);
+
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16, Lo, Hi);
+}
+
+// ExpandGlobalAddress -
+SDValue PIC16TargetLowering::ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG) {
+ GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(SDValue(N, 0));
+ // FIXME there isn't really debug info here
+ DebugLoc dl = G->getDebugLoc();
+
+ SDValue TGA = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i8,
+ G->getOffset());
+
+ SDValue Offset = DAG.getConstant(0, MVT::i8);
+ SDValue Lo = DAG.getNode(PIC16ISD::Lo, dl, MVT::i8, TGA, Offset);
+ SDValue Hi = DAG.getNode(PIC16ISD::Hi, dl, MVT::i8, TGA, Offset);
+
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16, Lo, Hi);
+}
+
+bool PIC16TargetLowering::isDirectAddress(const SDValue &Op) {
+ assert (Op.getNode() != NULL && "Can't operate on NULL SDNode!!");
+
+ if (Op.getOpcode() == ISD::BUILD_PAIR) {
+ if (Op.getOperand(0).getOpcode() == PIC16ISD::Lo)
+ return true;
+ }
+ return false;
+}
+
+// Return true if DirectAddress is in ROM_SPACE
+bool PIC16TargetLowering::isRomAddress(const SDValue &Op) {
+
+ // RomAddress is a GlobalAddress in ROM_SPACE_
+ // If the Op is not a GlobalAddress return NULL without checking
+ // anything further.
+ if (!isDirectAddress(Op))
+ return false;
+
+ // Its a GlobalAddress.
+ // It is BUILD_PAIR((PIC16Lo TGA), (PIC16Hi TGA)) and Op is BUILD_PAIR
+ SDValue TGA = Op.getOperand(0).getOperand(0);
+ GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(TGA);
+
+ if (GSDN->getAddressSpace() == PIC16ISD::ROM_SPACE)
+ return true;
+
+ // Any other address space return it false
+ return false;
+}
+
+
+// GetExpandedParts - This function is on the similiar lines as
+// the GetExpandedInteger in type legalizer is. This returns expanded
+// parts of Op in Lo and Hi.
+
+void PIC16TargetLowering::GetExpandedParts(SDValue Op, SelectionDAG &DAG,
+ SDValue &Lo, SDValue &Hi) {
+ SDNode *N = Op.getNode();
+ DebugLoc dl = N->getDebugLoc();
+ MVT NewVT = getTypeToTransformTo(N->getValueType(0));
+
+ // Extract the lo component.
+ Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NewVT, Op,
+ DAG.getConstant(0, MVT::i8));
+
+ // extract the hi component
+ Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NewVT, Op,
+ DAG.getConstant(1, MVT::i8));
+}
+
+// Legalize FrameIndex into ExternalSymbol and offset.
+void
+PIC16TargetLowering::LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG,
+ SDValue &ES, int &Offset) {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Function *Func = MF.getFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const std::string Name = Func->getName();
+
+ FrameIndexSDNode *FR = dyn_cast<FrameIndexSDNode>(Op);
+
+ // FrameIndices are not stack offsets. But they represent the request
+ // for space on stack. That space requested may be more than one byte.
+ // Therefore, to calculate the stack offset that a FrameIndex aligns
+ // with, we need to traverse all the FrameIndices available earlier in
+ // the list and add their requested size.
+ unsigned FIndex = FR->getIndex();
+ const char *tmpName;
+ if (FIndex < ReservedFrameCount) {
+ tmpName = createESName(PAN::getFrameLabel(Name));
+ ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
+ Offset = 0;
+ for (unsigned i=0; i<FIndex ; ++i) {
+ Offset += MFI->getObjectSize(i);
+ }
+ } else {
+ // FrameIndex has been made for some temporary storage
+ tmpName = createESName(PAN::getTempdataLabel(Name));
+ ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
+ Offset = GetTmpOffsetForFI(FIndex, MFI->getObjectSize(FIndex));
+ }
+
+ return;
+}
+
+// This function legalizes the PIC16 Addresses. If the Pointer is
+// -- Direct address variable residing
+// --> then a Banksel for that variable will be created.
+// -- Rom variable
+// --> then it will be treated as an indirect address.
+// -- Indirect address
+// --> then the address will be loaded into FSR
+// -- ADD with constant operand
+// --> then constant operand of ADD will be returned as Offset
+// and non-constant operand of ADD will be treated as pointer.
+// Returns the high and lo part of the address, and the offset(in case of ADD).
+
+void PIC16TargetLowering::LegalizeAddress(SDValue Ptr, SelectionDAG &DAG,
+ SDValue &Lo, SDValue &Hi,
+ unsigned &Offset, DebugLoc dl) {
+
+ // Offset, by default, should be 0
+ Offset = 0;
+
+ // If the pointer is ADD with constant,
+ // return the constant value as the offset
+ if (Ptr.getOpcode() == ISD::ADD) {
+ SDValue OperLeft = Ptr.getOperand(0);
+ SDValue OperRight = Ptr.getOperand(1);
+ if (OperLeft.getOpcode() == ISD::Constant) {
+ Offset = dyn_cast<ConstantSDNode>(OperLeft)->getZExtValue();
+ Ptr = OperRight;
+ } else if (OperRight.getOpcode() == ISD::Constant) {
+ Offset = dyn_cast<ConstantSDNode>(OperRight)->getZExtValue();
+ Ptr = OperLeft;
+ }
+ }
+
+ // If the pointer is Type i8 and an external symbol
+ // then treat it as direct address.
+ // One example for such case is storing and loading
+ // from function frame during a call
+ if (Ptr.getValueType() == MVT::i8) {
+ switch (Ptr.getOpcode()) {
+ case ISD::TargetExternalSymbol:
+ Lo = Ptr;
+ Hi = DAG.getConstant(1, MVT::i8);
+ return;
+ }
+ }
+
+ // Expansion of FrameIndex has Lo/Hi parts
+ if (isDirectAddress(Ptr)) {
+ SDValue TFI = Ptr.getOperand(0).getOperand(0);
+ if (TFI.getOpcode() == ISD::TargetFrameIndex) {
+ int FrameOffset;
+ LegalizeFrameIndex(TFI, DAG, Lo, FrameOffset);
+ Hi = DAG.getConstant(1, MVT::i8);
+ Offset += FrameOffset;
+ return;
+ }
+ }
+
+ if (isDirectAddress(Ptr) && !isRomAddress(Ptr)) {
+ // Direct addressing case for RAM variables. The Hi part is constant
+ // and the Lo part is the TGA itself.
+ Lo = Ptr.getOperand(0).getOperand(0);
+
+ // For direct addresses Hi is a constant. Value 1 for the constant
+ // signifies that banksel needs to generated for it. Value 0 for
+ // the constant signifies that banksel does not need to be generated
+ // for it. Mark it as 1 now and optimize later.
+ Hi = DAG.getConstant(1, MVT::i8);
+ return;
+ }
+
+ // Indirect addresses. Get the hi and lo parts of ptr.
+ GetExpandedParts(Ptr, DAG, Lo, Hi);
+
+ // Put the hi and lo parts into FSR.
+ Lo = DAG.getNode(PIC16ISD::MTLO, dl, MVT::i8, Lo);
+ Hi = DAG.getNode(PIC16ISD::MTHI, dl, MVT::i8, Hi);
+
+ return;
+}
+
+SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) {
+ LoadSDNode *LD = dyn_cast<LoadSDNode>(SDValue(N, 0));
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ DebugLoc dl = LD->getDebugLoc();
+
+ SDValue Load, Offset;
+ SDVTList Tys;
+ MVT VT, NewVT;
+ SDValue PtrLo, PtrHi;
+ unsigned LoadOffset;
+
+ // Legalize direct/indirect addresses. This will give the lo and hi parts
+ // of the address and the offset.
+ LegalizeAddress(Ptr, DAG, PtrLo, PtrHi, LoadOffset, dl);
+
+ // Load from the pointer (direct address or FSR)
+ VT = N->getValueType(0);
+ unsigned NumLoads = VT.getSizeInBits() / 8;
+ std::vector<SDValue> PICLoads;
+ unsigned iter;
+ MVT MemVT = LD->getMemoryVT();
+ if(ISD::isNON_EXTLoad(N)) {
+ for (iter=0; iter<NumLoads ; ++iter) {
+ // Add the pointer offset if any
+ Offset = DAG.getConstant(iter + LoadOffset, MVT::i8);
+ Tys = DAG.getVTList(MVT::i8, MVT::Other);
+ Load = DAG.getNode(PIC16ISD::PIC16Load, dl, Tys, Chain, PtrLo, PtrHi,
+ Offset);
+ PICLoads.push_back(Load);
+ }
+ } else {
+ // If it is extended load then use PIC16Load for Memory Bytes
+ // and for all extended bytes perform action based on type of
+ // extention - i.e. SignExtendedLoad or ZeroExtendedLoad
+
+
+ // For extended loads this is the memory value type
+ // i.e. without any extension
+ MVT MemVT = LD->getMemoryVT();
+ unsigned MemBytes = MemVT.getSizeInBits() / 8;
+ unsigned ExtdBytes = VT.getSizeInBits() / 8;
+ Offset = DAG.getConstant(LoadOffset, MVT::i8);
+
+ Tys = DAG.getVTList(MVT::i8, MVT::Other);
+ // For MemBytes generate PIC16Load with proper offset
+ for (iter=0; iter<MemBytes; ++iter) {
+ // Add the pointer offset if any
+ Offset = DAG.getConstant(iter + LoadOffset, MVT::i8);
+ Load = DAG.getNode(PIC16ISD::PIC16Load, dl, Tys, Chain, PtrLo, PtrHi,
+ Offset);
+ PICLoads.push_back(Load);
+ }
+
+ // For SignExtendedLoad
+ if (ISD::isSEXTLoad(N)) {
+ // For all ExtdBytes use the Right Shifted(Arithmetic) Value of the
+ // highest MemByte
+ SDValue SRA = DAG.getNode(ISD::SRA, dl, MVT::i8, Load,
+ DAG.getConstant(7, MVT::i8));
+ for (iter=MemBytes; iter<ExtdBytes; ++iter) {
+ PICLoads.push_back(SRA);
+ }
+ } else if (ISD::isZEXTLoad(N)) {
+ // ZeroExtendedLoad -- For all ExtdBytes use constant 0
+ SDValue ConstZero = DAG.getConstant(0, MVT::i8);
+ for (iter=MemBytes; iter<ExtdBytes; ++iter) {
+ PICLoads.push_back(ConstZero);
+ }
+ }
+ }
+ SDValue BP;
+
+ if (VT == MVT::i8) {
+ // Operand of Load is illegal -- Load itself is legal
+ return PICLoads[0];
+ }
+ else if (VT == MVT::i16) {
+ BP = DAG.getNode(ISD::BUILD_PAIR, dl, VT, PICLoads[0], PICLoads[1]);
+ if (MemVT == MVT::i8)
+ Chain = getChain(PICLoads[0]);
+ else
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ getChain(PICLoads[0]), getChain(PICLoads[1]));
+ } else if (VT == MVT::i32) {
+ SDValue BPs[2];
+ BPs[0] = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16,
+ PICLoads[0], PICLoads[1]);
+ BPs[1] = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16,
+ PICLoads[2], PICLoads[3]);
+ BP = DAG.getNode(ISD::BUILD_PAIR, dl, VT, BPs[0], BPs[1]);
+ if (MemVT == MVT::i8)
+ Chain = getChain(PICLoads[0]);
+ else if (MemVT == MVT::i16)
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ getChain(PICLoads[0]), getChain(PICLoads[1]));
+ else {
+ SDValue Chains[2];
+ Chains[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ getChain(PICLoads[0]), getChain(PICLoads[1]));
+ Chains[1] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ getChain(PICLoads[2]), getChain(PICLoads[3]));
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Chains[0], Chains[1]);
+ }
+ }
+ Tys = DAG.getVTList(VT, MVT::Other);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, BP, Chain);
+}
+
+SDValue PIC16TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
+ // We should have handled larger operands in type legalizer itself.
+ assert (Op.getValueType() == MVT::i8 && "illegal shift to lower");
+
+ SDNode *N = Op.getNode();
+ SDValue Value = N->getOperand(0);
+ SDValue Amt = N->getOperand(1);
+ PIC16ISD::PIC16Libcall CallCode;
+ switch (N->getOpcode()) {
+ case ISD::SRA:
+ CallCode = PIC16ISD::SRA_I8;
+ break;
+ case ISD::SHL:
+ CallCode = PIC16ISD::SLL_I8;
+ break;
+ case ISD::SRL:
+ CallCode = PIC16ISD::SRL_I8;
+ break;
+ default:
+ assert ( 0 && "This shift is not implemented yet.");
+ return SDValue();
+ }
+ SmallVector<SDValue, 2> Ops(2);
+ Ops[0] = Value;
+ Ops[1] = Amt;
+ SDValue Call = MakePIC16Libcall(CallCode, N->getValueType(0), &Ops[0], 2,
+ true, DAG, N->getDebugLoc());
+ return Call;
+}
+
+void
+PIC16TargetLowering::LowerOperationWrapper(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) {
+ SDValue Op = SDValue(N, 0);
+ SDValue Res;
+ unsigned i;
+ switch (Op.getOpcode()) {
+ case ISD::FORMAL_ARGUMENTS:
+ Res = LowerFORMAL_ARGUMENTS(Op, DAG); break;
+ case ISD::LOAD:
+ Res = ExpandLoad(Op.getNode(), DAG); break;
+ case ISD::CALL:
+ Res = LowerCALL(Op, DAG); break;
+ default: {
+ // All other operations are handled in LowerOperation.
+ Res = LowerOperation(Op, DAG);
+ if (Res.getNode())
+ Results.push_back(Res);
+
+ return;
+ }
+ }
+
+ N = Res.getNode();
+ unsigned NumValues = N->getNumValues();
+ for (i = 0; i < NumValues ; i++) {
+ Results.push_back(SDValue(N, i));
+ }
+}
+
+SDValue PIC16TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+ switch (Op.getOpcode()) {
+ case ISD::FORMAL_ARGUMENTS:
+ return LowerFORMAL_ARGUMENTS(Op, DAG);
+ case ISD::ADD:
+ case ISD::ADDC:
+ case ISD::ADDE:
+ return LowerADD(Op, DAG);
+ case ISD::SUB:
+ case ISD::SUBC:
+ case ISD::SUBE:
+ return LowerSUB(Op, DAG);
+ case ISD::LOAD:
+ return ExpandLoad(Op.getNode(), DAG);
+ case ISD::STORE:
+ return ExpandStore(Op.getNode(), DAG);
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ return LowerShift(Op, DAG);
+ case ISD::OR:
+ case ISD::AND:
+ case ISD::XOR:
+ return LowerBinOp(Op, DAG);
+ case ISD::CALL:
+ return LowerCALL(Op, DAG);
+ case ISD::RET:
+ return LowerRET(Op, DAG);
+ case ISD::BR_CC:
+ return LowerBR_CC(Op, DAG);
+ case ISD::SELECT_CC:
+ return LowerSELECT_CC(Op, DAG);
+ }
+ return SDValue();
+}
+
+SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op,
+ SelectionDAG &DAG,
+ DebugLoc dl) {
+ assert (Op.getValueType() == MVT::i8
+ && "illegal value type to store on stack.");
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Function *Func = MF.getFunction();
+ const std::string FuncName = Func->getName();
+
+
+ // Put the value on stack.
+ // Get a stack slot index and convert to es.
+ int FI = MF.getFrameInfo()->CreateStackObject(1, 1);
+ const char *tmpName = createESName(PAN::getTempdataLabel(FuncName));
+ SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
+
+ // Store the value to ES.
+ SDValue Store = DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other,
+ DAG.getEntryNode(),
+ Op, ES,
+ DAG.getConstant (1, MVT::i8), // Banksel.
+ DAG.getConstant (GetTmpOffsetForFI(FI, 1),
+ MVT::i8));
+
+ // Load the value from ES.
+ SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other);
+ SDValue Load = DAG.getNode(PIC16ISD::PIC16Load, dl, Tys, Store,
+ ES, DAG.getConstant (1, MVT::i8),
+ DAG.getConstant (GetTmpOffsetForFI(FI, 1),
+ MVT::i8));
+
+ return Load.getValue(0);
+}
+
+SDValue PIC16TargetLowering::
+LowerIndirectCallArguments(SDValue Op, SDValue Chain, SDValue InFlag,
+ SDValue DataAddr_Lo, SDValue DataAddr_Hi,
+ SelectionDAG &DAG) {
+ CallSDNode *TheCall = dyn_cast<CallSDNode>(Op);
+ unsigned NumOps = TheCall->getNumArgs();
+ DebugLoc dl = TheCall->getDebugLoc();
+
+ // If call has no arguments then do nothing and return.
+ if (NumOps == 0)
+ return Chain;
+
+ std::vector<SDValue> Ops;
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue Arg, StoreRet;
+
+ // For PIC16 ABI the arguments come after the return value.
+ unsigned RetVals = TheCall->getNumRetVals();
+ for (unsigned i = 0, ArgOffset = RetVals; i < NumOps; i++) {
+ // Get the arguments
+ Arg = TheCall->getArg(i);
+
+ Ops.clear();
+ Ops.push_back(Chain);
+ Ops.push_back(Arg);
+ Ops.push_back(DataAddr_Lo);
+ Ops.push_back(DataAddr_Hi);
+ Ops.push_back(DAG.getConstant(ArgOffset, MVT::i8));
+ Ops.push_back(InFlag);
+
+ StoreRet = DAG.getNode (PIC16ISD::PIC16StWF, dl, Tys, &Ops[0], Ops.size());
+
+ Chain = getChain(StoreRet);
+ InFlag = getOutFlag(StoreRet);
+ ArgOffset++;
+ }
+ return Chain;
+}
+
+SDValue PIC16TargetLowering::
+LowerDirectCallArguments(SDValue Op, SDValue Chain, SDValue ArgLabel,
+ SDValue InFlag, SelectionDAG &DAG) {
+ CallSDNode *TheCall = dyn_cast<CallSDNode>(Op);
+ unsigned NumOps = TheCall->getNumArgs();
+ DebugLoc dl = TheCall->getDebugLoc();
+ std::string Name;
+ SDValue Arg, StoreAt;
+ MVT ArgVT;
+ unsigned Size=0;
+ unsigned ArgCount=0;
+
+ // If call has no arguments then do nothing and return.
+ if (NumOps == 0)
+ return Chain;
+
+ // FIXME: This portion of code currently assumes only
+ // primitive types being passed as arguments.
+
+ // Legalize the address before use
+ SDValue PtrLo, PtrHi;
+ unsigned AddressOffset;
+ int StoreOffset = 0;
+ LegalizeAddress(ArgLabel, DAG, PtrLo, PtrHi, AddressOffset, dl);
+ SDValue StoreRet;
+
+ std::vector<SDValue> Ops;
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ for (unsigned i=ArgCount, Offset = 0; i<NumOps; i++) {
+ // Get the argument
+ Arg = TheCall->getArg(i);
+ StoreOffset = (Offset + AddressOffset);
+
+ // Store the argument on frame
+
+ Ops.clear();
+ Ops.push_back(Chain);
+ Ops.push_back(Arg);
+ Ops.push_back(PtrLo);
+ Ops.push_back(PtrHi);
+ Ops.push_back(DAG.getConstant(StoreOffset, MVT::i8));
+ Ops.push_back(InFlag);
+
+ StoreRet = DAG.getNode (PIC16ISD::PIC16StWF, dl, Tys, &Ops[0], Ops.size());
+
+ Chain = getChain(StoreRet);
+ InFlag = getOutFlag(StoreRet);
+
+ // Update the frame offset to be used for next argument
+ ArgVT = Arg.getValueType();
+ Size = ArgVT.getSizeInBits();
+ Size = Size/8; // Calculate size in bytes
+ Offset += Size; // Increase the frame offset
+ }
+ return Chain;
+}
+
+SDValue PIC16TargetLowering::
+LowerIndirectCallReturn (SDValue Op, SDValue Chain, SDValue InFlag,
+ SDValue DataAddr_Lo, SDValue DataAddr_Hi,
+ SelectionDAG &DAG) {
+ CallSDNode *TheCall = dyn_cast<CallSDNode>(Op);
+ DebugLoc dl = TheCall->getDebugLoc();
+ unsigned RetVals = TheCall->getNumRetVals();
+
+ // If call does not have anything to return
+ // then do nothing and go back.
+ if (RetVals == 0)
+ return Chain;
+
+ // Call has something to return
+ std::vector<SDValue> ResultVals;
+ SDValue LoadRet;
+
+ SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other, MVT::Flag);
+ for(unsigned i=0;i<RetVals;i++) {
+ LoadRet = DAG.getNode(PIC16ISD::PIC16LdWF, dl, Tys, Chain, DataAddr_Lo,
+ DataAddr_Hi, DAG.getConstant(i, MVT::i8),
+ InFlag);
+ InFlag = getOutFlag(LoadRet);
+ Chain = getChain(LoadRet);
+ ResultVals.push_back(LoadRet);
+ }
+ ResultVals.push_back(Chain);
+ SDValue Res = DAG.getMergeValues(&ResultVals[0], ResultVals.size(), dl);
+ return Res;
+}
+
+SDValue PIC16TargetLowering::
+LowerDirectCallReturn(SDValue Op, SDValue Chain, SDValue RetLabel,
+ SDValue InFlag, SelectionDAG &DAG) {
+ CallSDNode *TheCall = dyn_cast<CallSDNode>(Op);
+ DebugLoc dl = TheCall->getDebugLoc();
+ // Currently handling primitive types only. They will come in
+ // i8 parts
+ unsigned RetVals = TheCall->getNumRetVals();
+
+ std::vector<SDValue> ResultVals;
+
+ // Return immediately if the return type is void
+ if (RetVals == 0)
+ return Chain;
+
+ // Call has something to return
+
+ // Legalize the address before use
+ SDValue LdLo, LdHi;
+ unsigned LdOffset;
+ LegalizeAddress(RetLabel, DAG, LdLo, LdHi, LdOffset, dl);
+
+ SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other, MVT::Flag);
+ SDValue LoadRet;
+
+ for(unsigned i=0, Offset=0;i<RetVals;i++) {
+
+ LoadRet = DAG.getNode(PIC16ISD::PIC16LdWF, dl, Tys, Chain, LdLo, LdHi,
+ DAG.getConstant(LdOffset + Offset, MVT::i8),
+ InFlag);
+
+ InFlag = getOutFlag(LoadRet);
+
+ Chain = getChain(LoadRet);
+ Offset++;
+ ResultVals.push_back(LoadRet);
+ }
+
+ // To return use MERGE_VALUES
+ ResultVals.push_back(Chain);
+ SDValue Res = DAG.getMergeValues(&ResultVals[0], ResultVals.size(), dl);
+ return Res;
+}
+
+SDValue PIC16TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
+ SDValue Chain = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Op.getNumOperands() == 1) // return void
+ return Op;
+
+ // return should have odd number of operands
+ if ((Op.getNumOperands() % 2) == 0 ) {
+ assert(0 && "Do not know how to return this many arguments!");
+ abort();
+ }
+
+ // Number of values to return
+ unsigned NumRet = (Op.getNumOperands() / 2);
+
+ // Function returns value always on stack with the offset starting
+ // from 0
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Function *F = MF.getFunction();
+ std::string FuncName = F->getName();
+
+ const char *tmpName = createESName(PAN::getFrameLabel(FuncName));
+ SDVTList VTs = DAG.getVTList (MVT::i8, MVT::Other);
+ SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
+ SDValue BS = DAG.getConstant(1, MVT::i8);
+ SDValue RetVal;
+ for(unsigned i=0;i<NumRet; ++i) {
+ RetVal = Op.getNode()->getOperand(2*i + 1);
+ Chain = DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other, Chain, RetVal,
+ ES, BS,
+ DAG.getConstant (i, MVT::i8));
+
+ }
+ return DAG.getNode(ISD::RET, dl, MVT::Other, Chain);
+}
+
+// CALL node may have some operands non-legal to PIC16. Generate new CALL
+// node with all the operands legal.
+// Currently only Callee operand of the CALL node is non-legal. This function
+// legalizes the Callee operand and uses all other operands as are to generate
+// new CALL node.
+
+SDValue PIC16TargetLowering::LegalizeCALL(SDValue Op, SelectionDAG &DAG) {
+ CallSDNode *TheCall = dyn_cast<CallSDNode>(Op);
+ SDValue Chain = TheCall->getChain();
+ SDValue Callee = TheCall->getCallee();
+ DebugLoc dl = TheCall->getDebugLoc();
+ unsigned i =0;
+
+ assert(Callee.getValueType() == MVT::i16 &&
+ "Don't know how to legalize this call node!!!");
+ assert(Callee.getOpcode() == ISD::BUILD_PAIR &&
+ "Don't know how to legalize this call node!!!");
+
+ if (isDirectAddress(Callee)) {
+ // Come here for direct calls
+ Callee = Callee.getOperand(0).getOperand(0);
+ } else {
+ // Come here for indirect calls
+ SDValue Lo, Hi;
+ // Indirect addresses. Get the hi and lo parts of ptr.
+ GetExpandedParts(Callee, DAG, Lo, Hi);
+ // Connect Lo and Hi parts of the callee with the PIC16Connect
+ Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, Lo, Hi);
+ }
+ std::vector<SDValue> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add the call arguments and their flags
+ unsigned NumArgs = TheCall->getNumArgs();
+ for(i=0;i<NumArgs;i++) {
+ Ops.push_back(TheCall->getArg(i));
+ Ops.push_back(TheCall->getArgFlagsVal(i));
+ }
+ std::vector<MVT> NodeTys;
+ unsigned NumRets = TheCall->getNumRetVals();
+ for(i=0;i<NumRets;i++)
+ NodeTys.push_back(TheCall->getRetValType(i));
+
+ // Return a Chain as well
+ NodeTys.push_back(MVT::Other);
+
+ SDVTList VTs = DAG.getVTList(&NodeTys[0], NodeTys.size());
+ // Generate new call with all the operands legal
+ return DAG.getCall(TheCall->getCallingConv(), dl,
+ TheCall->isVarArg(), TheCall->isTailCall(),
+ TheCall->isInreg(), VTs, &Ops[0], Ops.size());
+}
+
+void PIC16TargetLowering::
+GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain,
+ SDValue &DataAddr_Lo, SDValue &DataAddr_Hi,
+ SelectionDAG &DAG) {
+ assert (Callee.getOpcode() == PIC16ISD::PIC16Connect
+ && "Don't know what to do of such callee!!");
+ SDValue ZeroOperand = DAG.getConstant(0, MVT::i8);
+ SDValue SeqStart = DAG.getCALLSEQ_START(Chain, ZeroOperand);
+ Chain = getChain(SeqStart);
+ SDValue OperFlag = getOutFlag(SeqStart); // To manage the data dependency
+
+ // Get the Lo and Hi part of code address
+ SDValue Lo = Callee.getOperand(0);
+ SDValue Hi = Callee.getOperand(1);
+
+ SDValue Data_Lo, Data_Hi;
+ SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other, MVT::Flag);
+ // Subtract 2 from Address to get the Lower part of DataAddress.
+ SDVTList VTList = DAG.getVTList(MVT::i8, MVT::Flag);
+ Data_Lo = DAG.getNode(ISD::SUBC, dl, VTList, Lo,
+ DAG.getConstant(2, MVT::i8));
+ SDValue Ops[3] = { Hi, DAG.getConstant(0, MVT::i8), Data_Lo.getValue(1)};
+ Data_Hi = DAG.getNode(ISD::SUBE, dl, VTList, Ops, 3);
+ SDValue PCLATH = DAG.getNode(PIC16ISD::MTPCLATH, dl, MVT::i8, Data_Hi);
+ Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, Data_Lo, PCLATH);
+ SDValue Call = DAG.getNode(PIC16ISD::CALLW, dl, Tys, Chain, Callee,
+ OperFlag);
+ Chain = getChain(Call);
+ OperFlag = getOutFlag(Call);
+ SDValue SeqEnd = DAG.getCALLSEQ_END(Chain, ZeroOperand, ZeroOperand,
+ OperFlag);
+ Chain = getChain(SeqEnd);
+ OperFlag = getOutFlag(SeqEnd);
+
+ // Low part of Data Address
+ DataAddr_Lo = DAG.getNode(PIC16ISD::MTLO, dl, MVT::i8, Call, OperFlag);
+
+ // Make the second call.
+ SeqStart = DAG.getCALLSEQ_START(Chain, ZeroOperand);
+ Chain = getChain(SeqStart);
+ OperFlag = getOutFlag(SeqStart); // To manage the data dependency
+
+ // Subtract 1 from Address to get high part of data address.
+ Data_Lo = DAG.getNode(ISD::SUBC, dl, VTList, Lo,
+ DAG.getConstant(1, MVT::i8));
+ SDValue HiOps[3] = { Hi, DAG.getConstant(0, MVT::i8), Data_Lo.getValue(1)};
+ Data_Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
+ PCLATH = DAG.getNode(PIC16ISD::MTPCLATH, dl, MVT::i8, Data_Hi);
+
+ // Use new Lo to make another CALLW
+ Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, Data_Lo, PCLATH);
+ Call = DAG.getNode(PIC16ISD::CALLW, dl, Tys, Chain, Callee, OperFlag);
+ Chain = getChain(Call);
+ OperFlag = getOutFlag(Call);
+ SeqEnd = DAG.getCALLSEQ_END(Chain, ZeroOperand, ZeroOperand,
+ OperFlag);
+ Chain = getChain(SeqEnd);
+ OperFlag = getOutFlag(SeqEnd);
+ // Hi part of Data Address
+ DataAddr_Hi = DAG.getNode(PIC16ISD::MTHI, dl, MVT::i8, Call, OperFlag);
+}
+
+
+SDValue PIC16TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
+ CallSDNode *TheCall = dyn_cast<CallSDNode>(Op);
+ SDValue Chain = TheCall->getChain();
+ SDValue Callee = TheCall->getCallee();
+ DebugLoc dl = TheCall->getDebugLoc();
+ if (Callee.getValueType() == MVT::i16 &&
+ Callee.getOpcode() == ISD::BUILD_PAIR) {
+ // Control should come here only from TypeLegalizer for lowering
+
+ // Legalize the non-legal arguments of call and return the
+ // new call with legal arguments.
+ return LegalizeCALL(Op, DAG);
+ }
+ // Control should come here from Legalize DAG.
+ // Here all the operands of CALL node should be legal.
+
+ // If this is an indirect call then to pass the arguments
+ // and read the return value back, we need the data address
+ // of the function being called.
+ // To get the data address two more calls need to be made.
+
+ // The flag to track if this is a direct or indirect call.
+ bool IsDirectCall = true;
+ unsigned RetVals = TheCall->getNumRetVals();
+ unsigned NumArgs = TheCall->getNumArgs();
+
+ SDValue DataAddr_Lo, DataAddr_Hi;
+ if (Callee.getOpcode() == PIC16ISD::PIC16Connect) {
+ IsDirectCall = false; // This is indirect call
+ // Read DataAddress only if we have to pass arguments or
+ // read return value.
+ if ((RetVals > 0) || (NumArgs > 0))
+ GetDataAddress(dl, Callee, Chain, DataAddr_Lo, DataAddr_Hi, DAG);
+ }
+
+ SDValue ZeroOperand = DAG.getConstant(0, MVT::i8);
+
+ // Start the call sequence.
+ // Carring the Constant 0 along the CALLSEQSTART
+ // because there is nothing else to carry.
+ SDValue SeqStart = DAG.getCALLSEQ_START(Chain, ZeroOperand);
+ Chain = getChain(SeqStart);
+ SDValue OperFlag = getOutFlag(SeqStart); // To manage the data dependency
+ std::string Name;
+
+ // For any direct call - callee will be GlobalAddressNode or
+ // ExternalSymbol
+ SDValue ArgLabel, RetLabel;
+ if (IsDirectCall) {
+ // Considering the GlobalAddressNode case here.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ GlobalValue *GV = G->getGlobal();
+ Callee = DAG.getTargetGlobalAddress(GV, MVT::i8);
+ Name = G->getGlobal()->getName();
+ } else {// Considering the ExternalSymbol case here
+ ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Callee);
+ Callee = DAG.getTargetExternalSymbol(ES->getSymbol(), MVT::i8);
+ Name = ES->getSymbol();
+ }
+
+ // Label for argument passing
+ const char *argFrame = createESName(PAN::getArgsLabel(Name));
+ ArgLabel = DAG.getTargetExternalSymbol(argFrame, MVT::i8);
+
+ // Label for reading return value
+ const char *retName = createESName(PAN::getRetvalLabel(Name));
+ RetLabel = DAG.getTargetExternalSymbol(retName, MVT::i8);
+ } else {
+ // if indirect call
+ SDValue CodeAddr_Lo = Callee.getOperand(0);
+ SDValue CodeAddr_Hi = Callee.getOperand(1);
+
+ /*CodeAddr_Lo = DAG.getNode(ISD::ADD, dl, MVT::i8, CodeAddr_Lo,
+ DAG.getConstant(2, MVT::i8));*/
+
+ // move Hi part in PCLATH
+ CodeAddr_Hi = DAG.getNode(PIC16ISD::MTPCLATH, dl, MVT::i8, CodeAddr_Hi);
+ Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, CodeAddr_Lo,
+ CodeAddr_Hi);
+ }
+
+ // Pass the argument to function before making the call.
+ SDValue CallArgs;
+ if (IsDirectCall) {
+ CallArgs = LowerDirectCallArguments(Op, Chain, ArgLabel, OperFlag, DAG);
+ Chain = getChain(CallArgs);
+ OperFlag = getOutFlag(CallArgs);
+ } else {
+ CallArgs = LowerIndirectCallArguments(Op, Chain, OperFlag, DataAddr_Lo,
+ DataAddr_Hi, DAG);
+ Chain = getChain(CallArgs);
+ OperFlag = getOutFlag(CallArgs);
+ }
+
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue PICCall = DAG.getNode(PIC16ISD::CALL, dl, Tys, Chain, Callee,
+ OperFlag);
+ Chain = getChain(PICCall);
+ OperFlag = getOutFlag(PICCall);
+
+
+ // Carrying the Constant 0 along the CALLSEQSTART
+ // because there is nothing else to carry.
+ SDValue SeqEnd = DAG.getCALLSEQ_END(Chain, ZeroOperand, ZeroOperand,
+ OperFlag);
+ Chain = getChain(SeqEnd);
+ OperFlag = getOutFlag(SeqEnd);
+
+ // Lower the return value reading after the call.
+ if (IsDirectCall)
+ return LowerDirectCallReturn(Op, Chain, RetLabel, OperFlag, DAG);
+ else
+ return LowerIndirectCallReturn(Op, Chain, OperFlag, DataAddr_Lo,
+ DataAddr_Hi, DAG);
+}
+
+bool PIC16TargetLowering::isDirectLoad(const SDValue Op) {
+ if (Op.getOpcode() == PIC16ISD::PIC16Load)
+ if (Op.getOperand(1).getOpcode() == ISD::TargetGlobalAddress
+ || Op.getOperand(1).getOpcode() == ISD::TargetExternalSymbol)
+ return true;
+ return false;
+}
+
+// NeedToConvertToMemOp - Returns true if one of the operands of the
+// operation 'Op' needs to be put into memory. Also returns the
+// operand no. of the operand to be converted in 'MemOp'. Remember, PIC16 has
+// no instruction that can operation on two registers. Most insns take
+// one register and one memory operand (addwf) / Constant (addlw).
+bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp) {
+ // If one of the operand is a constant, return false.
+ if (Op.getOperand(0).getOpcode() == ISD::Constant ||
+ Op.getOperand(1).getOpcode() == ISD::Constant)
+ return false;
+
+ // Return false if one of the operands is already a direct
+ // load and that operand has only one use.
+ if (isDirectLoad(Op.getOperand(0))) {
+ if (Op.getOperand(0).hasOneUse())
+ return false;
+ else
+ MemOp = 0;
+ }
+ if (isDirectLoad(Op.getOperand(1))) {
+ if (Op.getOperand(1).hasOneUse())
+ return false;
+ else
+ MemOp = 1;
+ }
+ return true;
+}
+
+// LowerBinOp - Lower a commutative binary operation that does not
+// affect status flag carry.
+SDValue PIC16TargetLowering::LowerBinOp(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+
+ // We should have handled larger operands in type legalizer itself.
+ assert (Op.getValueType() == MVT::i8 && "illegal Op to lower");
+
+ unsigned MemOp = 1;
+ if (NeedToConvertToMemOp(Op, MemOp)) {
+ // Put one value on stack.
+ SDValue NewVal = ConvertToMemOperand (Op.getOperand(MemOp), DAG, dl);
+
+ return DAG.getNode(Op.getOpcode(), dl, MVT::i8, Op.getOperand(MemOp ^ 1),
+ NewVal);
+ }
+ else {
+ return Op;
+ }
+}
+
+// LowerADD - Lower all types of ADD operations including the ones
+// that affects carry.
+SDValue PIC16TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) {
+ // We should have handled larger operands in type legalizer itself.
+ assert (Op.getValueType() == MVT::i8 && "illegal add to lower");
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned MemOp = 1;
+ if (NeedToConvertToMemOp(Op, MemOp)) {
+ // Put one value on stack.
+ SDValue NewVal = ConvertToMemOperand (Op.getOperand(MemOp), DAG, dl);
+
+ // ADDC and ADDE produce two results.
+ SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Flag);
+
+ // ADDE has three operands, the last one is the carry bit.
+ if (Op.getOpcode() == ISD::ADDE)
+ return DAG.getNode(Op.getOpcode(), dl, Tys, Op.getOperand(MemOp ^ 1),
+ NewVal, Op.getOperand(2));
+ // ADDC has two operands.
+ else if (Op.getOpcode() == ISD::ADDC)
+ return DAG.getNode(Op.getOpcode(), dl, Tys, Op.getOperand(MemOp ^ 1),
+ NewVal);
+ // ADD it is. It produces only one result.
+ else
+ return DAG.getNode(Op.getOpcode(), dl, MVT::i8, Op.getOperand(MemOp ^ 1),
+ NewVal);
+ }
+ else
+ return Op;
+}
+
+SDValue PIC16TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ // We should have handled larger operands in type legalizer itself.
+ assert (Op.getValueType() == MVT::i8 && "illegal sub to lower");
+
+ // Nothing to do if the first operand is already a direct load and it has
+ // only one use.
+ if (isDirectLoad(Op.getOperand(0)) && Op.getOperand(0).hasOneUse())
+ return Op;
+
+ // Put first operand on stack.
+ SDValue NewVal = ConvertToMemOperand (Op.getOperand(0), DAG, dl);
+
+ SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Flag);
+ if (Op.getOpcode() == ISD::SUBE)
+ return DAG.getNode(Op.getOpcode(), dl, Tys, NewVal, Op.getOperand(1),
+ Op.getOperand(2));
+ else
+ return DAG.getNode(Op.getOpcode(), dl, Tys, NewVal, Op.getOperand(1));
+}
+
+void PIC16TargetLowering::InitReservedFrameCount(const Function *F) {
+ unsigned NumArgs = F->arg_size();
+
+ bool isVoidFunc = (F->getReturnType()->getTypeID() == Type::VoidTyID);
+
+ if (isVoidFunc)
+ ReservedFrameCount = NumArgs;
+ else
+ ReservedFrameCount = NumArgs + 1;
+}
+
+// LowerFORMAL_ARGUMENTS - Argument values are loaded from the
+// <fname>.args + offset. All arguments are already broken to leaglized
+// types, so the offset just runs from 0 to NumArgVals - 1.
+
+SDValue PIC16TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
+ SelectionDAG &DAG) {
+ SmallVector<SDValue, 8> ArgValues;
+ unsigned NumArgVals = Op.getNode()->getNumValues() - 1;
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Chain = Op.getOperand(0); // Formal arguments' chain
+
+
+ // Get the callee's name to create the <fname>.args label to pass args.
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Function *F = MF.getFunction();
+ std::string FuncName = F->getName();
+
+ // Reset the map of FI and TmpOffset
+ ResetTmpOffsetMap();
+ // Initialize the ReserveFrameCount
+ InitReservedFrameCount(F);
+
+ // Create the <fname>.args external symbol.
+ const char *tmpName = createESName(PAN::getArgsLabel(FuncName));
+ SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
+
+ // Load arg values from the label + offset.
+ SDVTList VTs = DAG.getVTList (MVT::i8, MVT::Other);
+ SDValue BS = DAG.getConstant(1, MVT::i8);
+ for (unsigned i = 0; i < NumArgVals ; ++i) {
+ SDValue Offset = DAG.getConstant(i, MVT::i8);
+ SDValue PICLoad = DAG.getNode(PIC16ISD::PIC16LdArg, dl, VTs, Chain, ES, BS,
+ Offset);
+ Chain = getChain(PICLoad);
+ ArgValues.push_back(PICLoad);
+ }
+
+ // Return a MERGE_VALUE node.
+ ArgValues.push_back(Op.getOperand(0));
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
+ &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
+}
+
+// Perform DAGCombine of PIC16Load.
+// FIXME - Need a more elaborate comment here.
+SDValue PIC16TargetLowering::
+PerformPIC16LoadCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Chain = N->getOperand(0);
+ if (N->hasNUsesOfValue(0, 0)) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), Chain);
+ }
+ return SDValue();
+}
+
+// For all the functions with arguments some STORE nodes are generated
+// that store the argument on the frameindex. However in PIC16 the arguments
+// are passed on stack only. Therefore these STORE nodes are redundant.
+// To remove these STORE nodes will be removed in PerformStoreCombine
+//
+// Currently this function is doint nothing and will be updated for removing
+// unwanted store operations
+SDValue PIC16TargetLowering::
+PerformStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+ return SDValue(N, 0);
+ /*
+ // Storing an undef value is of no use, so remove it
+ if (isStoringUndef(N, Chain, DAG)) {
+ return Chain; // remove the store and return the chain
+ }
+ //else everything is ok.
+ return SDValue(N, 0);
+ */
+}
+
+SDValue PIC16TargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ switch (N->getOpcode()) {
+ case ISD::STORE:
+ return PerformStoreCombine(N, DCI);
+ case PIC16ISD::PIC16Load:
+ return PerformPIC16LoadCombine(N, DCI);
+ }
+ return SDValue();
+}
+
+static PIC16CC::CondCodes IntCCToPIC16CC(ISD::CondCode CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown condition code!");
+ case ISD::SETNE: return PIC16CC::NE;
+ case ISD::SETEQ: return PIC16CC::EQ;
+ case ISD::SETGT: return PIC16CC::GT;
+ case ISD::SETGE: return PIC16CC::GE;
+ case ISD::SETLT: return PIC16CC::LT;
+ case ISD::SETLE: return PIC16CC::LE;
+ case ISD::SETULT: return PIC16CC::ULT;
+ case ISD::SETULE: return PIC16CC::LE;
+ case ISD::SETUGE: return PIC16CC::GE;
+ case ISD::SETUGT: return PIC16CC::UGT;
+ }
+}
+
+// Look at LHS/RHS/CC and see if they are a lowered setcc instruction. If so
+// set LHS/RHS and SPCC to the LHS/RHS of the setcc and SPCC to the condition.
+static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
+ ISD::CondCode CC, unsigned &SPCC) {
+ if (isa<ConstantSDNode>(RHS) &&
+ cast<ConstantSDNode>(RHS)->getZExtValue() == 0 &&
+ CC == ISD::SETNE &&
+ (LHS.getOpcode() == PIC16ISD::SELECT_ICC &&
+ LHS.getOperand(3).getOpcode() == PIC16ISD::SUBCC) &&
+ isa<ConstantSDNode>(LHS.getOperand(0)) &&
+ isa<ConstantSDNode>(LHS.getOperand(1)) &&
+ cast<ConstantSDNode>(LHS.getOperand(0))->getZExtValue() == 1 &&
+ cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 0) {
+ SDValue CMPCC = LHS.getOperand(3);
+ SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue();
+ LHS = CMPCC.getOperand(0);
+ RHS = CMPCC.getOperand(1);
+ }
+}
+
+// Returns appropriate CMP insn and corresponding condition code in PIC16CC
+SDValue PIC16TargetLowering::getPIC16Cmp(SDValue LHS, SDValue RHS,
+ unsigned CC, SDValue &PIC16CC,
+ SelectionDAG &DAG, DebugLoc dl) {
+ PIC16CC::CondCodes CondCode = (PIC16CC::CondCodes) CC;
+
+ // PIC16 sub is literal - W. So Swap the operands and condition if needed.
+ // i.e. a < 12 can be rewritten as 12 > a.
+ if (RHS.getOpcode() == ISD::Constant) {
+
+ SDValue Tmp = LHS;
+ LHS = RHS;
+ RHS = Tmp;
+
+ switch (CondCode) {
+ default: break;
+ case PIC16CC::LT:
+ CondCode = PIC16CC::GT;
+ break;
+ case PIC16CC::GT:
+ CondCode = PIC16CC::LT;
+ break;
+ case PIC16CC::ULT:
+ CondCode = PIC16CC::UGT;
+ break;
+ case PIC16CC::UGT:
+ CondCode = PIC16CC::ULT;
+ break;
+ case PIC16CC::GE:
+ CondCode = PIC16CC::LE;
+ break;
+ case PIC16CC::LE:
+ CondCode = PIC16CC::GE;
+ break;
+ case PIC16CC::ULE:
+ CondCode = PIC16CC::UGE;
+ break;
+ case PIC16CC::UGE:
+ CondCode = PIC16CC::ULE;
+ break;
+ }
+ }
+
+ PIC16CC = DAG.getConstant(CondCode, MVT::i8);
+
+ // These are signed comparisons.
+ SDValue Mask = DAG.getConstant(128, MVT::i8);
+ if (isSignedComparison(CondCode)) {
+ LHS = DAG.getNode (ISD::XOR, dl, MVT::i8, LHS, Mask);
+ RHS = DAG.getNode (ISD::XOR, dl, MVT::i8, RHS, Mask);
+ }
+
+ SDVTList VTs = DAG.getVTList (MVT::i8, MVT::Flag);
+ // We can use a subtract operation to set the condition codes. But
+ // we need to put one operand in memory if required.
+ // Nothing to do if the first operand is already a valid type (direct load
+ // for subwf and literal for sublw) and it is used by this operation only.
+ if ((LHS.getOpcode() == ISD::Constant || isDirectLoad(LHS))
+ && LHS.hasOneUse())
+ return DAG.getNode(PIC16ISD::SUBCC, dl, VTs, LHS, RHS);
+
+ // else convert the first operand to mem.
+ LHS = ConvertToMemOperand (LHS, DAG, dl);
+ return DAG.getNode(PIC16ISD::SUBCC, dl, VTs, LHS, RHS);
+}
+
+
+SDValue PIC16TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ SDValue TrueVal = Op.getOperand(2);
+ SDValue FalseVal = Op.getOperand(3);
+ unsigned ORIGCC = ~0;
+ DebugLoc dl = Op.getDebugLoc();
+
+ // If this is a select_cc of a "setcc", and if the setcc got lowered into
+ // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
+ // i.e.
+ // A setcc: lhs, rhs, cc is expanded by llvm to
+ // select_cc: result of setcc, 0, 1, 0, setne
+ // We can think of it as:
+ // select_cc: lhs, rhs, 1, 0, cc
+ LookThroughSetCC(LHS, RHS, CC, ORIGCC);
+ if (ORIGCC == ~0U) ORIGCC = IntCCToPIC16CC (CC);
+
+ SDValue PIC16CC;
+ SDValue Cmp = getPIC16Cmp(LHS, RHS, ORIGCC, PIC16CC, DAG, dl);
+
+ return DAG.getNode (PIC16ISD::SELECT_ICC, dl, TrueVal.getValueType(), TrueVal,
+ FalseVal, PIC16CC, Cmp.getValue(1));
+}
+
+MachineBasicBlock *
+PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ unsigned CC = (PIC16CC::CondCodes)MI->getOperand(3).getImm();
+ DebugLoc dl = MI->getDebugLoc();
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
+ // control-flow pattern. The incoming instruction knows the destination vreg
+ // to set, the condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // [f]bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ BuildMI(BB, dl, TII.get(PIC16::pic16brcond)).addMBB(sinkMBB).addImm(CC);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Update machine-CFG edges by transferring all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ sinkMBB->transferSuccessors(BB);
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(BB, dl, TII.get(PIC16::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return BB;
+}
+
+
+SDValue PIC16TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2); // LHS of the condition.
+ SDValue RHS = Op.getOperand(3); // RHS of the condition.
+ SDValue Dest = Op.getOperand(4); // BB to jump to
+ unsigned ORIGCC = ~0;
+ DebugLoc dl = Op.getDebugLoc();
+
+ // If this is a br_cc of a "setcc", and if the setcc got lowered into
+ // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
+ LookThroughSetCC(LHS, RHS, CC, ORIGCC);
+ if (ORIGCC == ~0U) ORIGCC = IntCCToPIC16CC (CC);
+
+ // Get the Compare insn and condition code.
+ SDValue PIC16CC;
+ SDValue Cmp = getPIC16Cmp(LHS, RHS, ORIGCC, PIC16CC, DAG, dl);
+
+ return DAG.getNode(PIC16ISD::BRCOND, dl, MVT::Other, Chain, Dest, PIC16CC,
+ Cmp.getValue(1));
+}
+
diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h
new file mode 100644
index 0000000..ca9650d
--- /dev/null
+++ b/lib/Target/PIC16/PIC16ISelLowering.h
@@ -0,0 +1,227 @@
+//===-- PIC16ISelLowering.h - PIC16 DAG Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that PIC16 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PIC16ISELLOWERING_H
+#define PIC16ISELLOWERING_H
+
+#include "PIC16.h"
+#include "PIC16Subtarget.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include <map>
+
+namespace llvm {
+ namespace PIC16ISD {
+ enum NodeType {
+ // Start the numbering from where ISD NodeType finishes.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ Lo, // Low 8-bits of GlobalAddress.
+ Hi, // High 8-bits of GlobalAddress.
+ PIC16Load,
+ PIC16LdArg, // This is replica of PIC16Load but used to load function
+ // arguments and is being used for facilitating for some
+ // store removal optimizations.
+
+ PIC16LdWF,
+ PIC16Store,
+ PIC16StWF,
+ Banksel,
+ MTLO, // Move to low part of FSR
+ MTHI, // Move to high part of FSR
+ MTPCLATH, // Move to PCLATCH
+ PIC16Connect, // General connector for PIC16 nodes
+ BCF,
+ LSLF, // PIC16 Logical shift left
+ LRLF, // PIC16 Logical shift right
+ RLF, // Rotate left through carry
+ RRF, // Rotate right through carry
+ CALL, // PIC16 Call instruction
+ CALLW, // PIC16 CALLW instruction
+ SUBCC, // Compare for equality or inequality.
+ SELECT_ICC, // Psuedo to be caught in schedular and expanded to brcond.
+ BRCOND, // Conditional branch.
+ Dummy
+ };
+
+ // Keep track of different address spaces.
+ enum AddressSpace {
+ RAM_SPACE = 0, // RAM address space
+ ROM_SPACE = 1 // ROM address space number is 1
+ };
+ enum PIC16Libcall {
+ MUL_I8 = RTLIB::UNKNOWN_LIBCALL + 1,
+ SRA_I8,
+ SLL_I8,
+ SRL_I8,
+ PIC16UnknownCall
+ };
+ }
+
+
+ //===--------------------------------------------------------------------===//
+ // TargetLowering Implementation
+ //===--------------------------------------------------------------------===//
+ class PIC16TargetLowering : public TargetLowering {
+ public:
+ explicit PIC16TargetLowering(PIC16TargetMachine &TM);
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+ /// getSetCCResultType - Return the ISD::SETCC ValueType
+ virtual MVT getSetCCResultType(MVT ValType) const;
+ SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerShift(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerADD(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSUB(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerBinOp(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerCALL(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerRET(SDValue Op, SelectionDAG &DAG);
+ // Call returns
+ SDValue
+ LowerDirectCallReturn(SDValue Op, SDValue Chain, SDValue FrameAddress,
+ SDValue InFlag, SelectionDAG &DAG);
+ SDValue
+ LowerIndirectCallReturn(SDValue Op, SDValue Chain, SDValue InFlag,
+ SDValue DataAddr_Lo, SDValue DataAddr_Hi,
+ SelectionDAG &DAG);
+
+ // Call arguments
+ SDValue
+ LowerDirectCallArguments(SDValue Op, SDValue Chain, SDValue FrameAddress,
+ SDValue InFlag, SelectionDAG &DAG);
+
+ SDValue
+ LowerIndirectCallArguments(SDValue Op, SDValue Chain, SDValue InFlag,
+ SDValue DataAddr_Lo, SDValue DataAddr_Hi,
+ SelectionDAG &DAG);
+
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
+ SDValue getPIC16Cmp(SDValue LHS, SDValue RHS, unsigned OrigCC, SDValue &CC,
+ SelectionDAG &DAG, DebugLoc dl);
+ virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+ virtual void ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG);
+ virtual void LowerOperationWrapper(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG);
+
+ SDValue ExpandStore(SDNode *N, SelectionDAG &DAG);
+ SDValue ExpandLoad(SDNode *N, SelectionDAG &DAG);
+ SDValue ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG);
+ SDValue ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG);
+ SDValue ExpandFrameIndex(SDNode *N, SelectionDAG &DAG);
+
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue PerformPIC16LoadCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue PerformStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ // This function returns the Tmp Offset for FrameIndex. If any TmpOffset
+ // already exists for the FI then it returns the same else it creates the
+ // new offset and returns.
+ unsigned GetTmpOffsetForFI(unsigned FI, unsigned slot_size);
+ void ResetTmpOffsetMap() { FiTmpOffsetMap.clear(); SetTmpSize(0); }
+ void InitReservedFrameCount(const Function *F);
+
+ // Return the size of Tmp variable
+ unsigned GetTmpSize() { return TmpSize; }
+ void SetTmpSize(unsigned Size) { TmpSize = Size; }
+
+ private:
+ // If the Node is a BUILD_PAIR representing a direct Address,
+ // then this function will return true.
+ bool isDirectAddress(const SDValue &Op);
+
+ // If the Node is a DirectAddress in ROM_SPACE then this
+ // function will return true
+ bool isRomAddress(const SDValue &Op);
+
+ // Extract the Lo and Hi component of Op.
+ void GetExpandedParts(SDValue Op, SelectionDAG &DAG, SDValue &Lo,
+ SDValue &Hi);
+
+
+ // Load pointer can be a direct or indirect address. In PIC16 direct
+ // addresses need Banksel and Indirect addresses need to be loaded to
+ // FSR first. Handle address specific cases here.
+ void LegalizeAddress(SDValue Ptr, SelectionDAG &DAG, SDValue &Chain,
+ SDValue &NewPtr, unsigned &Offset, DebugLoc dl);
+
+ // FrameIndex should be broken down into ExternalSymbol and FrameOffset.
+ void LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG, SDValue &ES,
+ int &Offset);
+
+
+ // CALL node should have all legal operands only. Legalize all non-legal
+ // operands of CALL node and then return the new call will all operands
+ // legal.
+ SDValue LegalizeCALL(SDValue Op, SelectionDAG &DAG);
+
+ // For indirect calls data address of the callee frame need to be
+ // extracted. This function fills the arguments DataAddr_Lo and
+ // DataAddr_Hi with the address of the callee frame.
+ void GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain,
+ SDValue &DataAddr_Lo, SDValue &DataAddr_Hi,
+ SelectionDAG &DAG);
+
+ // We can not have both operands of a binary operation in W.
+ // This function is used to put one operand on stack and generate a load.
+ SDValue ConvertToMemOperand(SDValue Op, SelectionDAG &DAG, DebugLoc dl);
+
+ // This function checks if we need to put an operand of an operation on
+ // stack and generate a load or not.
+ bool NeedToConvertToMemOp(SDValue Op, unsigned &MemOp);
+
+ /// Subtarget - Keep a pointer to the PIC16Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const PIC16Subtarget *Subtarget;
+
+
+ // Extending the LIB Call framework of LLVM
+ // to hold the names of PIC16Libcalls.
+ const char *PIC16LibcallNames[PIC16ISD::PIC16UnknownCall];
+
+ // To set and retrieve the lib call names.
+ void setPIC16LibcallName(PIC16ISD::PIC16Libcall Call, const char *Name);
+ const char *getPIC16LibcallName(PIC16ISD::PIC16Libcall Call);
+
+ // Make PIC16 Libcall.
+ SDValue MakePIC16Libcall(PIC16ISD::PIC16Libcall Call, MVT RetVT,
+ const SDValue *Ops, unsigned NumOps, bool isSigned,
+ SelectionDAG &DAG, DebugLoc dl);
+
+ // Check if operation has a direct load operand.
+ inline bool isDirectLoad(const SDValue Op);
+
+ private:
+ // The frameindexes generated for spill/reload are stack based.
+ // This maps maintain zero based indexes for these FIs.
+ std::map<unsigned, unsigned> FiTmpOffsetMap;
+ unsigned TmpSize;
+
+ // These are the frames for return value and argument passing
+ // These FrameIndices will be expanded to foo.frame external symbol
+ // and all others will be expanded to foo.tmp external symbol.
+ unsigned ReservedFrameCount;
+ };
+} // namespace llvm
+
+#endif // PIC16ISELLOWERING_H
diff --git a/lib/Target/PIC16/PIC16InstrFormats.td b/lib/Target/PIC16/PIC16InstrFormats.td
new file mode 100644
index 0000000..e213ea8
--- /dev/null
+++ b/lib/Target/PIC16/PIC16InstrFormats.td
@@ -0,0 +1,117 @@
+//===- PIC16InstrFormats.td - PIC16 Instruction Formats-------*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe PIC16 instructions format
+//
+// All the possible PIC16 fields are:
+//
+// opcode - operation code.
+// f - 7-bit register file address.
+// d - 1-bit direction specifier
+// k - 8/11 bit literals
+// b - 3 bits bit num specifier
+//
+//===----------------------------------------------------------------------===//
+
+// Generic PIC16 Format
+// PIC16 Instructions are 14-bit wide.
+
+// FIXME: Add Cooper Specific Formats if any.
+
+class PIC16Inst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : Instruction {
+ field bits<14> Inst;
+
+ let Namespace = "PIC16";
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Byte Oriented instruction class in PIC16 : <|opcode|d|f|>
+// opcode = 6 bits.
+// d = direction = 1 bit.
+// f = file register address = 7 bits.
+//===----------------------------------------------------------------------===//
+
+class ByteFormat<bits<6> opcode, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ :PIC16Inst<outs, ins, asmstr, pattern> {
+ bits<1> d;
+ bits<7> f;
+
+ let Inst{13-8} = opcode;
+
+ let Inst{7} = d;
+ let Inst{6-0} = f;
+}
+
+//===----------------------------------------------------------------------===//
+// Bit Oriented instruction class in PIC16 : <|opcode|b|f|>
+// opcode = 4 bits.
+// b = bit specifier = 3 bits.
+// f = file register address = 7 bits.
+//===----------------------------------------------------------------------===//
+
+class BitFormat<bits<4> opcode, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : PIC16Inst<outs, ins, asmstr, pattern> {
+ bits<3> b;
+ bits<7> f;
+
+ let Inst{13-10} = opcode;
+
+ let Inst{9-7} = b;
+ let Inst{6-0} = f;
+}
+
+//===----------------------------------------------------------------------===//
+// Literal Format instruction class in PIC16 : <|opcode|k|>
+// opcode = 6 bits
+// k = literal = 8 bits
+//===----------------------------------------------------------------------===//
+
+class LiteralFormat<bits<6> opcode, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : PIC16Inst<outs, ins, asmstr, pattern> {
+ bits<8> k;
+
+ let Inst{13-8} = opcode;
+
+ let Inst{7-0} = k;
+}
+
+//===----------------------------------------------------------------------===//
+// Control Format instruction class in PIC16 : <|opcode|k|>
+// opcode = 3 bits.
+// k = jump address = 11 bits.
+//===----------------------------------------------------------------------===//
+
+class ControlFormat<bits<3> opcode, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : PIC16Inst<outs, ins, asmstr, pattern> {
+ bits<11> k;
+
+ let Inst{13-11} = opcode;
+
+ let Inst{10-0} = k;
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instruction class in PIC16
+//===----------------------------------------------------------------------===//
+
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : PIC16Inst<outs, ins, asmstr, pattern> {
+ let Inst{13-6} = 0;
+}
diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp
new file mode 100644
index 0000000..2a769e8
--- /dev/null
+++ b/lib/Target/PIC16/PIC16InstrInfo.cpp
@@ -0,0 +1,186 @@
+//===- PIC16InstrInfo.cpp - PIC16 Instruction Information -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PIC16 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PIC16.h"
+#include "PIC16InstrInfo.h"
+#include "PIC16TargetMachine.h"
+#include "PIC16GenInstrInfo.inc"
+#include "llvm/Function.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include <cstdio>
+
+
+using namespace llvm;
+
+// FIXME: Add the subtarget support on this constructor.
+PIC16InstrInfo::PIC16InstrInfo(PIC16TargetMachine &tm)
+ : TargetInstrInfoImpl(PIC16Insts, array_lengthof(PIC16Insts)),
+ TM(tm),
+ RegInfo(*this, *TM.getSubtargetImpl()) {}
+
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot.
+/// If not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned PIC16InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (MI->getOpcode() == PIC16::movwf
+ && MI->getOperand(0).isReg()
+ && MI->getOperand(1).isSymbol()) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ return 0;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the dest reg along with the FrameIndex of the stack slot.
+/// If not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned PIC16InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (MI->getOpcode() == PIC16::movf
+ && MI->getOperand(0).isReg()
+ && MI->getOperand(1).isSymbol()) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ return 0;
+}
+
+
+void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC) const {
+ PIC16TargetLowering *PTLI = TM.getTargetLowering();
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ const Function *Func = MBB.getParent()->getFunction();
+ const std::string FuncName = Func->getName();
+
+ const char *tmpName = createESName(PAN::getTempdataLabel(FuncName));
+
+ // On the order of operands here: think "movwf SrcReg, tmp_slot, offset".
+ if (RC == PIC16::GPRRegisterClass) {
+ //MachineFunction &MF = *MBB.getParent();
+ //MachineRegisterInfo &RI = MF.getRegInfo();
+ BuildMI(MBB, I, DL, get(PIC16::movwf))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addImm(PTLI->GetTmpOffsetForFI(FI, 1))
+ .addExternalSymbol(tmpName)
+ .addImm(1); // Emit banksel for it.
+ }
+ else if (RC == PIC16::FSR16RegisterClass) {
+ // This is a 16-bit register and the frameindex given by llvm is of
+ // size two here. Break this index N into two zero based indexes and
+ // put one into the map. The second one is always obtained by adding 1
+ // to the first zero based index. In fact it is going to use 3 slots
+ // as saving FSRs corrupts W also and hence we need to save/restore W also.
+
+ unsigned opcode = (SrcReg == PIC16::FSR0) ? PIC16::save_fsr0
+ : PIC16::save_fsr1;
+ BuildMI(MBB, I, DL, get(opcode))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addImm(PTLI->GetTmpOffsetForFI(FI, 3))
+ .addExternalSymbol(tmpName)
+ .addImm(1); // Emit banksel for it.
+ }
+ else
+ assert(0 && "Can't store this register to stack slot");
+}
+
+void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC) const {
+ PIC16TargetLowering *PTLI = TM.getTargetLowering();
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ const Function *Func = MBB.getParent()->getFunction();
+ const std::string FuncName = Func->getName();
+
+ const char *tmpName = createESName(PAN::getTempdataLabel(FuncName));
+
+ // On the order of operands here: think "movf FrameIndex, W".
+ if (RC == PIC16::GPRRegisterClass) {
+ //MachineFunction &MF = *MBB.getParent();
+ //MachineRegisterInfo &RI = MF.getRegInfo();
+ BuildMI(MBB, I, DL, get(PIC16::movf), DestReg)
+ .addImm(PTLI->GetTmpOffsetForFI(FI, 1))
+ .addExternalSymbol(tmpName)
+ .addImm(1); // Emit banksel for it.
+ }
+ else if (RC == PIC16::FSR16RegisterClass) {
+ // This is a 16-bit register and the frameindex given by llvm is of
+ // size two here. Break this index N into two zero based indexes and
+ // put one into the map. The second one is always obtained by adding 1
+ // to the first zero based index. In fact it is going to use 3 slots
+ // as saving FSRs corrupts W also and hence we need to save/restore W also.
+
+ unsigned opcode = (DestReg == PIC16::FSR0) ? PIC16::restore_fsr0
+ : PIC16::restore_fsr1;
+ BuildMI(MBB, I, DL, get(opcode), DestReg)
+ .addImm(PTLI->GetTmpOffsetForFI(FI, 3))
+ .addExternalSymbol(tmpName)
+ .addImm(1); // Emit banksel for it.
+ }
+ else
+ assert(0 && "Can't load this register from stack slot");
+}
+
+bool PIC16InstrInfo::copyRegToReg (MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (DestRC == PIC16::FSR16RegisterClass) {
+ BuildMI(MBB, I, DL, get(PIC16::copy_fsr), DestReg).addReg(SrcReg);
+ return true;
+ }
+
+ if (DestRC == PIC16::GPRRegisterClass) {
+ BuildMI(MBB, I, DL, get(PIC16::copy_w), DestReg).addReg(SrcReg);
+ return true;
+ }
+
+ // Not yet supported.
+ return false;
+}
+
+bool PIC16InstrInfo::isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DestReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
+ SrcSubIdx = DstSubIdx = 0; // No sub-registers.
+
+ if (MI.getOpcode() == PIC16::copy_fsr
+ || MI.getOpcode() == PIC16::copy_w) {
+ DestReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ }
+
+ return false;
+}
+
diff --git a/lib/Target/PIC16/PIC16InstrInfo.h b/lib/Target/PIC16/PIC16InstrInfo.h
new file mode 100644
index 0000000..0b67679
--- /dev/null
+++ b/lib/Target/PIC16/PIC16InstrInfo.h
@@ -0,0 +1,70 @@
+//===- PIC16InstrInfo.h - PIC16 Instruction Information----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the niversity of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PIC16 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PIC16INSTRUCTIONINFO_H
+#define PIC16INSTRUCTIONINFO_H
+
+#include "PIC16.h"
+#include "PIC16RegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+namespace llvm {
+
+
+class PIC16InstrInfo : public TargetInstrInfoImpl
+{
+ PIC16TargetMachine &TM;
+ const PIC16RegisterInfo RegInfo;
+public:
+ explicit PIC16InstrInfo(PIC16TargetMachine &TM);
+
+ virtual const PIC16RegisterInfo &getRegisterInfo() const { return RegInfo; }
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ };
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/PIC16/PIC16InstrInfo.td b/lib/Target/PIC16/PIC16InstrInfo.td
new file mode 100644
index 0000000..c572188
--- /dev/null
+++ b/lib/Target/PIC16/PIC16InstrInfo.td
@@ -0,0 +1,522 @@
+//===- PIC16InstrInfo.td - PIC16 Instruction defs -------------*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PIC16 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// PIC16 Specific Type Constraints.
+//===----------------------------------------------------------------------===//
+class SDTCisI8<int OpNum> : SDTCisVT<OpNum, i8>;
+class SDTCisI16<int OpNum> : SDTCisVT<OpNum, i16>;
+
+//===----------------------------------------------------------------------===//
+// PIC16 Specific Type Profiles.
+//===----------------------------------------------------------------------===//
+
+// Generic type profiles for i8/i16 unary/binary operations.
+// Taking one i8 or i16 and producing void.
+def SDTI8VoidOp : SDTypeProfile<0, 1, [SDTCisI8<0>]>;
+def SDTI16VoidOp : SDTypeProfile<0, 1, [SDTCisI16<0>]>;
+
+// Taking one value and producing an output of same type.
+def SDTI8UnaryOp : SDTypeProfile<1, 1, [SDTCisI8<0>, SDTCisI8<1>]>;
+def SDTI16UnaryOp : SDTypeProfile<1, 1, [SDTCisI16<0>, SDTCisI16<1>]>;
+
+// Taking two values and producing an output of same type.
+def SDTI8BinOp : SDTypeProfile<1, 2, [SDTCisI8<0>, SDTCisI8<1>, SDTCisI8<2>]>;
+def SDTI16BinOp : SDTypeProfile<1, 2, [SDTCisI16<0>, SDTCisI16<1>,
+ SDTCisI16<2>]>;
+
+// Node specific type profiles.
+def SDT_PIC16Load : SDTypeProfile<1, 3, [SDTCisI8<0>, SDTCisI8<1>,
+ SDTCisI8<2>, SDTCisI8<3>]>;
+
+def SDT_PIC16Store : SDTypeProfile<0, 4, [SDTCisI8<0>, SDTCisI8<1>,
+ SDTCisI8<2>, SDTCisI8<3>]>;
+
+def SDT_PIC16Connect : SDTypeProfile<1, 2, [SDTCisI8<0>, SDTCisI8<1>,
+ SDTCisI8<2>]>;
+
+// PIC16ISD::CALL type prorile
+def SDT_PIC16call : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
+def SDT_PIC16callw : SDTypeProfile<1, -1, [SDTCisInt<0>]>;
+
+// PIC16ISD::BRCOND
+def SDT_PIC16Brcond: SDTypeProfile<0, 2,
+ [SDTCisVT<0, OtherVT>, SDTCisI8<1>]>;
+
+// PIC16ISD::BRCOND
+def SDT_PIC16Selecticc: SDTypeProfile<1, 3,
+ [SDTCisI8<0>, SDTCisI8<1>, SDTCisI8<2>,
+ SDTCisI8<3>]>;
+
+//===----------------------------------------------------------------------===//
+// PIC16 addressing modes matching via DAG.
+//===----------------------------------------------------------------------===//
+def diraddr : ComplexPattern<i8, 1, "SelectDirectAddr", [], []>;
+
+//===----------------------------------------------------------------------===//
+// PIC16 Specific Node Definitions.
+//===----------------------------------------------------------------------===//
+def PIC16callseq_start : SDNode<"ISD::CALLSEQ_START", SDTI8VoidOp,
+ [SDNPHasChain, SDNPOutFlag]>;
+def PIC16callseq_end : SDNode<"ISD::CALLSEQ_END", SDTI8VoidOp,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+// Low 8-bits of GlobalAddress.
+def PIC16Lo : SDNode<"PIC16ISD::Lo", SDTI8BinOp>;
+
+// High 8-bits of GlobalAddress.
+def PIC16Hi : SDNode<"PIC16ISD::Hi", SDTI8BinOp>;
+
+// The MTHI and MTLO nodes are used only to match them in the incoming
+// DAG for replacement by corresponding set_fsrhi, set_fsrlo insntructions.
+// These nodes are not used for defining any instructions.
+def MTLO : SDNode<"PIC16ISD::MTLO", SDTI8UnaryOp>;
+def MTHI : SDNode<"PIC16ISD::MTHI", SDTI8UnaryOp>;
+def MTPCLATH : SDNode<"PIC16ISD::MTPCLATH", SDTI8UnaryOp>;
+
+// Node to generate Bank Select for a GlobalAddress.
+def Banksel : SDNode<"PIC16ISD::Banksel", SDTI8UnaryOp>;
+
+// Node to match a direct store operation.
+def PIC16Store : SDNode<"PIC16ISD::PIC16Store", SDT_PIC16Store, [SDNPHasChain]>;
+def PIC16StWF : SDNode<"PIC16ISD::PIC16StWF", SDT_PIC16Store,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+
+// Node to match a direct load operation.
+def PIC16Load : SDNode<"PIC16ISD::PIC16Load", SDT_PIC16Load, [SDNPHasChain]>;
+def PIC16LdArg : SDNode<"PIC16ISD::PIC16LdArg", SDT_PIC16Load, [SDNPHasChain]>;
+def PIC16LdWF : SDNode<"PIC16ISD::PIC16LdWF", SDT_PIC16Load,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+def PIC16Connect: SDNode<"PIC16ISD::PIC16Connect", SDT_PIC16Connect, []>;
+
+// Node to match PIC16 call
+def PIC16call : SDNode<"PIC16ISD::CALL", SDT_PIC16call,
+ [SDNPHasChain , SDNPOptInFlag, SDNPOutFlag]>;
+def PIC16callw : SDNode<"PIC16ISD::CALLW", SDT_PIC16callw,
+ [SDNPHasChain , SDNPOptInFlag, SDNPOutFlag]>;
+
+// Node to match a comparison instruction.
+def PIC16Subcc : SDNode<"PIC16ISD::SUBCC", SDTI8BinOp, [SDNPOutFlag]>;
+
+// Node to match a conditional branch.
+def PIC16Brcond : SDNode<"PIC16ISD::BRCOND", SDT_PIC16Brcond,
+ [SDNPHasChain, SDNPInFlag]>;
+
+def PIC16Selecticc : SDNode<"PIC16ISD::SELECT_ICC", SDT_PIC16Selecticc,
+ [SDNPInFlag]>;
+
+//===----------------------------------------------------------------------===//
+// PIC16 Operand Definitions.
+//===----------------------------------------------------------------------===//
+def i8mem : Operand<i8>;
+def brtarget: Operand<OtherVT>;
+
+// Operand for printing out a condition code.
+let PrintMethod = "printCCOperand" in
+ def CCOp : Operand<i8>;
+
+include "PIC16InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// PIC16 Common Classes.
+//===----------------------------------------------------------------------===//
+
+// W = W Op F : Load the value from F and do Op to W.
+let isTwoAddress = 1, mayLoad = 1 in
+class BinOpFW<bits<6> OpCode, string OpcStr, SDNode OpNode>:
+ ByteFormat<OpCode, (outs GPR:$dst),
+ (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
+ !strconcat(OpcStr, " $ptrlo + $offset, W"),
+ [(set GPR:$dst, (OpNode GPR:$src, (PIC16Load diraddr:$ptrlo,
+ (i8 imm:$ptrhi),
+ (i8 imm:$offset))))]>;
+
+// F = F Op W : Load the value from F, do op with W and store in F.
+// This insn class is not marked as TwoAddress because the reg is
+// being used as a source operand only. (Remember a TwoAddress insn
+// needs a copyRegToReg.)
+let mayStore = 1 in
+class BinOpWF<bits<6> OpCode, string OpcStr, SDNode OpNode>:
+ ByteFormat<OpCode, (outs),
+ (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
+ !strconcat(OpcStr, " $ptrlo + $offset"),
+ [(PIC16Store (OpNode GPR:$src, (PIC16Load diraddr:$ptrlo,
+ (i8 imm:$ptrhi),
+ (i8 imm:$offset))),
+ diraddr:$ptrlo,
+ (i8 imm:$ptrhi), (i8 imm:$offset)
+ )]>;
+
+// W = W Op L : Do Op of L with W and place result in W.
+let isTwoAddress = 1 in
+class BinOpLW<bits<6> opcode, string OpcStr, SDNode OpNode> :
+ LiteralFormat<opcode, (outs GPR:$dst),
+ (ins GPR:$src, i8imm:$literal),
+ !strconcat(OpcStr, " $literal"),
+ [(set GPR:$dst, (OpNode GPR:$src, (i8 imm:$literal)))]>;
+
+//===----------------------------------------------------------------------===//
+// PIC16 Instructions.
+//===----------------------------------------------------------------------===//
+
+// Pseudo-instructions.
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i8imm:$amt),
+ "!ADJCALLSTACKDOWN $amt",
+ [(PIC16callseq_start imm:$amt)]>;
+
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i8imm:$amt),
+ "!ADJCALLSTACKUP $amt",
+ [(PIC16callseq_end imm:$amt)]>;
+
+//-----------------------------------
+// Vaious movlw insn patterns.
+//-----------------------------------
+let isReMaterializable = 1 in {
+// Move 8-bit literal to W.
+def movlw : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src),
+ "movlw $src",
+ [(set GPR:$dst, (i8 imm:$src))]>;
+
+// Move a Lo(TGA) to W.
+def movlw_lo_1 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2),
+ "movlw LOW(${src}) + ${src2}",
+ [(set GPR:$dst, (PIC16Lo tglobaladdr:$src, imm:$src2 ))]>;
+
+// Move a Lo(TES) to W.
+def movlw_lo_2 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2),
+ "movlw LOW(${src}) + ${src2}",
+ [(set GPR:$dst, (PIC16Lo texternalsym:$src, imm:$src2 ))]>;
+
+// Move a Hi(TGA) to W.
+def movlw_hi_1 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2),
+ "movlw HIGH(${src}) + ${src2}",
+ [(set GPR:$dst, (PIC16Hi tglobaladdr:$src, imm:$src2))]>;
+
+// Move a Hi(TES) to W.
+def movlw_hi_2 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2),
+ "movlw HIGH(${src}) + ${src2}",
+ [(set GPR:$dst, (PIC16Hi texternalsym:$src, imm:$src2))]>;
+}
+
+//-------------------
+// FSR setting insns.
+//-------------------
+// These insns are matched via a DAG replacement pattern.
+def set_fsrlo:
+ ByteFormat<0, (outs FSR16:$fsr),
+ (ins GPR:$val),
+ "movwf ${fsr}L",
+ []>;
+
+let isTwoAddress = 1 in
+def set_fsrhi:
+ ByteFormat<0, (outs FSR16:$dst),
+ (ins FSR16:$src, GPR:$val),
+ "movwf ${dst}H",
+ []>;
+
+def set_pclath:
+ ByteFormat<0, (outs PCLATHR:$dst),
+ (ins GPR:$val),
+ "movwf ${dst}",
+ [(set PCLATHR:$dst , (MTPCLATH GPR:$val))]>;
+
+//----------------------------
+// copyRegToReg
+// copyRegToReg insns. These are dummy. They should always be deleted
+// by the optimizer and never be present in the final generated code.
+// if they are, then we have to write correct macros for these insns.
+//----------------------------
+def copy_fsr:
+ Pseudo<(outs FSR16:$dst), (ins FSR16:$src), "copy_fsr $dst, $src", []>;
+
+def copy_w:
+ Pseudo<(outs GPR:$dst), (ins GPR:$src), "copy_w $dst, $src", []>;
+
+class SAVE_FSR<string OpcStr>:
+ Pseudo<(outs),
+ (ins FSR16:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
+ !strconcat(OpcStr, " $ptrlo, $offset"),
+ []>;
+
+def save_fsr0: SAVE_FSR<"save_fsr0">;
+def save_fsr1: SAVE_FSR<"save_fsr1">;
+
+class RESTORE_FSR<string OpcStr>:
+ Pseudo<(outs FSR16:$dst),
+ (ins i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
+ !strconcat(OpcStr, " $ptrlo, $offset"),
+ []>;
+
+def restore_fsr0: RESTORE_FSR<"restore_fsr0">;
+def restore_fsr1: RESTORE_FSR<"restore_fsr1">;
+
+//--------------------------
+// Store to memory
+//-------------------------
+
+// Direct store.
+// Input operands are: val = W, ptrlo = GA, offset = offset, ptrhi = banksel.
+let mayStore = 1 in
+class MOVWF_INSN<bits<6> OpCode, SDNode OpNodeDest, SDNode Op>:
+ ByteFormat<0, (outs),
+ (ins GPR:$val, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
+ "movwf ${ptrlo} + ${offset}",
+ [(Op GPR:$val, OpNodeDest:$ptrlo, (i8 imm:$ptrhi),
+ (i8 imm:$offset))]>;
+
+// Store W to a Global Address.
+def movwf : MOVWF_INSN<0, tglobaladdr, PIC16Store>;
+
+// Store W to an External Symobol.
+def movwf_1 : MOVWF_INSN<0, texternalsym, PIC16Store>;
+
+// Store with InFlag and OutFlag
+// This is same as movwf_1 but has a flag. A flag is required to
+// order the stores while passing the params to function.
+def movwf_2 : MOVWF_INSN<0, texternalsym, PIC16StWF>;
+
+// Indirect store. Matched via a DAG replacement pattern.
+def store_indirect :
+ ByteFormat<0, (outs),
+ (ins GPR:$val, FSR16:$fsr, i8imm:$offset),
+ "movwi $offset[$fsr]",
+ []>;
+
+//----------------------------
+// Load from memory
+//----------------------------
+// Direct load.
+// Input Operands are: ptrlo = GA, offset = offset, ptrhi = banksel.
+// Output: dst = W
+let mayLoad = 1 in
+class MOVF_INSN<bits<6> OpCode, SDNode OpNodeSrc, SDNode Op>:
+ ByteFormat<0, (outs GPR:$dst),
+ (ins i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
+ "movf ${ptrlo} + ${offset}, W",
+ [(set GPR:$dst,
+ (Op OpNodeSrc:$ptrlo, (i8 imm:$ptrhi),
+ (i8 imm:$offset)))]>;
+
+// Load from a GA.
+def movf : MOVF_INSN<0, tglobaladdr, PIC16Load>;
+
+// Load from an ES.
+def movf_1 : MOVF_INSN<0, texternalsym, PIC16Load>;
+def movf_1_1 : MOVF_INSN<0, texternalsym, PIC16LdArg>;
+
+// Load with InFlag and OutFlag
+// This is same as movf_1 but has a flag. A flag is required to
+// order the loads while copying the return value of a function.
+def movf_2 : MOVF_INSN<0, texternalsym, PIC16LdWF>;
+
+// Indirect load. Matched via a DAG replacement pattern.
+def load_indirect :
+ ByteFormat<0, (outs GPR:$dst),
+ (ins FSR16:$fsr, i8imm:$offset),
+ "moviw $offset[$fsr]",
+ []>;
+
+//-------------------------
+// Bitwise operations patterns
+//--------------------------
+// W = W op [F]
+let Defs = [STATUS] in {
+def OrFW : BinOpFW<0, "iorwf", or>;
+def XOrFW : BinOpFW<0, "xorwf", xor>;
+def AndFW : BinOpFW<0, "andwf", and>;
+
+// F = W op [F]
+def OrWF : BinOpWF<0, "iorwf", or>;
+def XOrWF : BinOpWF<0, "xorwf", xor>;
+def AndWF : BinOpWF<0, "andwf", and>;
+
+//-------------------------
+// Various add/sub patterns.
+//-------------------------
+
+// W = W + [F]
+def addfw_1: BinOpFW<0, "addwf", add>;
+def addfw_2: BinOpFW<0, "addwf", addc>;
+
+let Uses = [STATUS] in
+def addfwc: BinOpFW<0, "addwfc", adde>; // With Carry.
+
+// F = W + [F]
+def addwf_1: BinOpWF<0, "addwf", add>;
+def addwf_2: BinOpWF<0, "addwf", addc>;
+let Uses = [STATUS] in
+def addwfc: BinOpWF<0, "addwfc", adde>; // With Carry.
+}
+
+// W -= [F] ; load from F and sub the value from W.
+let isTwoAddress = 1, mayLoad = 1 in
+class SUBFW<bits<6> OpCode, string OpcStr, SDNode OpNode>:
+ ByteFormat<OpCode, (outs GPR:$dst),
+ (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
+ !strconcat(OpcStr, " $ptrlo + $offset, W"),
+ [(set GPR:$dst, (OpNode (PIC16Load diraddr:$ptrlo,
+ (i8 imm:$ptrhi), (i8 imm:$offset)),
+ GPR:$src))]>;
+let Defs = [STATUS] in {
+def subfw_1: SUBFW<0, "subwf", sub>;
+def subfw_2: SUBFW<0, "subwf", subc>;
+
+let Uses = [STATUS] in
+def subfwb: SUBFW<0, "subwfb", sube>; // With Borrow.
+
+def subfw_cc: SUBFW<0, "subwf", PIC16Subcc>;
+}
+
+// [F] -= W ;
+let mayStore = 1 in
+class SUBWF<bits<6> OpCode, string OpcStr, SDNode OpNode>:
+ ByteFormat<OpCode, (outs),
+ (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
+ !strconcat(OpcStr, " $ptrlo + $offset"),
+ [(PIC16Store (OpNode (PIC16Load diraddr:$ptrlo,
+ (i8 imm:$ptrhi), (i8 imm:$offset)),
+ GPR:$src), diraddr:$ptrlo,
+ (i8 imm:$ptrhi), (i8 imm:$offset))]>;
+
+let Defs = [STATUS] in {
+def subwf_1: SUBWF<0, "subwf", sub>;
+def subwf_2: SUBWF<0, "subwf", subc>;
+
+let Uses = [STATUS] in
+ def subwfb: SUBWF<0, "subwfb", sube>; // With Borrow.
+
+def subwf_cc: SUBWF<0, "subwf", PIC16Subcc>;
+}
+
+// addlw
+let Defs = [STATUS] in {
+def addlw_1 : BinOpLW<0, "addlw", add>;
+def addlw_2 : BinOpLW<0, "addlw", addc>;
+
+let Uses = [STATUS] in
+def addlwc : BinOpLW<0, "addlwc", adde>; // With Carry. (Assembler macro).
+
+// bitwise operations involving a literal and w.
+def andlw : BinOpLW<0, "andlw", and>;
+def xorlw : BinOpLW<0, "xorlw", xor>;
+def orlw : BinOpLW<0, "iorlw", or>;
+}
+
+// sublw
+// W = C - W ; sub W from literal. (Without borrow).
+let isTwoAddress = 1 in
+class SUBLW<bits<6> opcode, SDNode OpNode> :
+ LiteralFormat<opcode, (outs GPR:$dst),
+ (ins GPR:$src, i8imm:$literal),
+ "sublw $literal",
+ [(set GPR:$dst, (OpNode (i8 imm:$literal), GPR:$src))]>;
+
+let Defs = [STATUS] in {
+def sublw_1 : SUBLW<0, sub>;
+def sublw_2 : SUBLW<0, subc>;
+def sublw_cc : SUBLW<0, PIC16Subcc>;
+}
+
+// Call instruction.
+let isCall = 1,
+ Defs = [W, FSR0, FSR1] in {
+ def CALL: LiteralFormat<0x1, (outs), (ins i8imm:$func),
+ //"call ${func} + 2",
+ "call ${func}",
+ [(PIC16call diraddr:$func)]>;
+}
+
+let isCall = 1,
+ Defs = [W, FSR0, FSR1] in {
+ def CALL_1: LiteralFormat<0x1, (outs), (ins GPR:$func, PCLATHR:$pc),
+ "callw",
+ [(PIC16call (PIC16Connect GPR:$func, PCLATHR:$pc))]>;
+}
+
+let isCall = 1,
+ Defs = [FSR0, FSR1] in {
+ def CALLW: LiteralFormat<0x1, (outs GPR:$dest),
+ (ins GPR:$func, PCLATHR:$pc),
+ "callw",
+ [(set GPR:$dest, (PIC16callw (PIC16Connect GPR:$func, PCLATHR:$pc)))]>;
+}
+
+let Uses = [STATUS], isBranch = 1, isTerminator = 1, hasDelaySlot = 0 in
+def pic16brcond: ControlFormat<0x0, (outs), (ins brtarget:$dst, CCOp:$cc),
+ "b$cc $dst",
+ [(PIC16Brcond bb:$dst, imm:$cc)]>;
+
+// Unconditional branch.
+let isBranch = 1, isTerminator = 1, hasDelaySlot = 0 in
+def br_uncond: ControlFormat<0x0, (outs), (ins brtarget:$dst),
+ "goto $dst",
+ [(br bb:$dst)]>;
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded by the
+// scheduler into a branch sequence.
+let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
+ def SELECT_CC_Int_ICC
+ : Pseudo<(outs GPR:$dst), (ins GPR:$T, GPR:$F, i8imm:$Cond),
+ "; SELECT_CC_Int_ICC PSEUDO!",
+ [(set GPR:$dst, (PIC16Selecticc GPR:$T, GPR:$F,
+ imm:$Cond))]>;
+}
+
+
+// Banksel.
+def banksel :
+ Pseudo<(outs),
+ (ins i8mem:$ptr),
+ "banksel $ptr",
+ []>;
+
+def pagesel :
+ Pseudo<(outs),
+ (ins i8mem:$ptr),
+ "movlp $ptr",
+ []>;
+
+
+// Return insn.
+def Return :
+ ControlFormat<0, (outs), (ins), "return", [(ret)]>;
+
+//===----------------------------------------------------------------------===//
+// PIC16 Replacment Patterns.
+//===----------------------------------------------------------------------===//
+
+// Identify an indirect store and select insns for it.
+def : Pat<(PIC16Store GPR:$val, (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr),
+ imm:$offset),
+ (store_indirect GPR:$val,
+ (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr),
+ imm:$offset)>;
+
+def : Pat<(PIC16StWF GPR:$val, (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr),
+ imm:$offset),
+ (store_indirect GPR:$val,
+ (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr),
+ imm:$offset)>;
+
+// Identify an indirect load and select insns for it.
+def : Pat<(PIC16Load (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr),
+ imm:$offset),
+ (load_indirect (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr),
+ imm:$offset)>;
+
+def : Pat<(PIC16LdWF (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr),
+ imm:$offset),
+ (load_indirect (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr),
+ imm:$offset)>;
+
diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp
new file mode 100644
index 0000000..20f926d
--- /dev/null
+++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp
@@ -0,0 +1,169 @@
+//===-- PIC16MemSelOpt.cpp - PIC16 banksel optimizer --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which optimizes the emitting of banksel
+// instructions before accessing data memory. This currently works within
+// a basic block only and keep tracks of the last accessed memory bank.
+// If memory access continues to be in the same bank it just makes banksel
+// immediate, which is a part of the insn accessing the data memory, from 1
+// to zero. The asm printer emits a banksel only if that immediate is 1.
+//
+// FIXME: this is not implemented yet. The banksel pass only works on local
+// basic blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pic16-codegen"
+#include "PIC16.h"
+#include "PIC16InstrInfo.h"
+#include "PIC16TargetAsmInfo.h"
+#include "PIC16TargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/Compiler.h"
+
+using namespace llvm;
+
+namespace {
+ struct VISIBILITY_HIDDEN MemSelOpt : public MachineFunctionPass {
+ static char ID;
+ MemSelOpt() : MachineFunctionPass(&ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "PIC16 Memsel Optimizer";
+ }
+
+ bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
+ bool processInstruction(MachineInstr *MI);
+
+ private:
+ const TargetInstrInfo *TII; // Machine instruction info.
+ MachineBasicBlock *MBB; // Current basic block
+ std::string CurBank;
+
+ };
+ char MemSelOpt::ID = 0;
+}
+
+FunctionPass *llvm::createPIC16MemSelOptimizerPass() {
+ return new MemSelOpt();
+}
+
+
+/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
+/// register references into FP stack references.
+///
+bool MemSelOpt::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ bool Changed = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ Changed |= processBasicBlock(MF, *I);
+ }
+
+ return Changed;
+}
+
+/// processBasicBlock - Loop over all of the instructions in the basic block,
+/// transforming FP instructions into their stack form.
+///
+bool MemSelOpt::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
+ bool Changed = false;
+ MBB = &BB;
+
+ // Let us assume that when entering a basic block now bank is selected.
+ // Ideally we should look at the predecessors for this information.
+ CurBank="";
+
+ for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
+ Changed |= processInstruction(I);
+ }
+ return Changed;
+}
+
+bool MemSelOpt::processInstruction(MachineInstr *MI) {
+ bool Changed = false;
+
+ unsigned NumOperands = MI->getNumOperands();
+ if (NumOperands == 0) return false;
+
+
+ // If this insn is not going to access any memory, return.
+ const TargetInstrDesc &TID = TII->get(MI->getOpcode());
+ if (! (TID.isCall() || TID.mayLoad() || TID.mayStore()))
+ return false;
+
+ // Scan for the memory address operand.
+ // FIXME: Should we use standard interfaces like memoperands_iterator,
+ // hasMemOperand() etc ?
+ int MemOpPos = -1;
+ for (unsigned i = 0; i < NumOperands; i++) {
+ MachineOperand Op = MI->getOperand(i);
+ if (Op.getType() == MachineOperand::MO_GlobalAddress ||
+ Op.getType() == MachineOperand::MO_ExternalSymbol) {
+ // We found one mem operand. Next one should be BS.
+ MemOpPos = i;
+ break;
+ }
+ }
+
+ // If we did not find an insn accessing memory. Continue.
+ if (MemOpPos == -1) return Changed;
+
+ // Get the MemOp.
+ MachineOperand &Op = MI->getOperand(MemOpPos);
+
+ // If this is a pagesel material, handle it first.
+ if (MI->getOpcode() == PIC16::CALL) {
+ DebugLoc dl = MI->getDebugLoc();
+ BuildMI(*MBB, MI, dl, TII->get(PIC16::pagesel)).
+ addOperand(Op);
+ return true;
+ }
+
+ // Get the section name(NewBank) for MemOp.
+ // This assumes that the section names for globals are laready set by
+ // AsmPrinter->doInitialization.
+ std::string NewBank = CurBank;
+ if (Op.getType() == MachineOperand::MO_GlobalAddress &&
+ Op.getGlobal()->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) {
+ NewBank = Op.getGlobal()->getSection();
+ } else if (Op.getType() == MachineOperand::MO_ExternalSymbol) {
+ // External Symbol is generated for temp data and arguments. They are
+ // in fpdata.<functionname>.# section.
+ std::string Sym = Op.getSymbolName();
+ NewBank = PAN::getSectionNameForSym(Sym);
+ }
+
+ // If the previous and new section names are same, we don't need to
+ // emit banksel.
+ if (NewBank.compare(CurBank) != 0 ) {
+ DebugLoc dl = MI->getDebugLoc();
+ BuildMI(*MBB, MI, dl, TII->get(PIC16::banksel)).
+ addOperand(Op);
+ Changed = true;
+ CurBank = NewBank;
+ }
+
+ return Changed;
+}
+
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.cpp b/lib/Target/PIC16/PIC16RegisterInfo.cpp
new file mode 100644
index 0000000..eb758d8
--- /dev/null
+++ b/lib/Target/PIC16/PIC16RegisterInfo.cpp
@@ -0,0 +1,91 @@
+//===- PIC16RegisterInfo.cpp - PIC16 Register Information -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PIC16 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pic16-reg-info"
+
+#include "PIC16.h"
+#include "PIC16RegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+
+
+using namespace llvm;
+
+PIC16RegisterInfo::PIC16RegisterInfo(const TargetInstrInfo &tii,
+ const PIC16Subtarget &st)
+ : PIC16GenRegisterInfo(PIC16::ADJCALLSTACKDOWN, PIC16::ADJCALLSTACKUP),
+ TII(tii),
+ ST(st) {}
+
+#include "PIC16GenRegisterInfo.inc"
+
+/// PIC16 Callee Saved Registers
+const unsigned* PIC16RegisterInfo::
+getCalleeSavedRegs(const MachineFunction *MF) const {
+ static const unsigned CalleeSavedRegs[] = { 0 };
+ return CalleeSavedRegs;
+}
+
+// PIC16 Callee Saved Reg Classes
+const TargetRegisterClass* const*
+PIC16RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
+ return CalleeSavedRegClasses;
+}
+
+BitVector PIC16RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ return Reserved;
+}
+
+bool PIC16RegisterInfo::hasFP(const MachineFunction &MF) const {
+ return false;
+}
+
+void PIC16RegisterInfo::
+eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ RegScavenger *RS) const
+{ /* NOT YET IMPLEMENTED */ }
+
+void PIC16RegisterInfo::emitPrologue(MachineFunction &MF) const
+{ /* NOT YET IMPLEMENTED */ }
+
+void PIC16RegisterInfo::
+emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
+{ /* NOT YET IMPLEMENTED */ }
+
+int PIC16RegisterInfo::
+getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ assert(0 && "Not keeping track of debug information yet!!");
+ return -1;
+}
+
+unsigned PIC16RegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ assert(0 && "PIC16 Does not have any frame register");
+ return 0;
+}
+
+unsigned PIC16RegisterInfo::getRARegister() const {
+ assert(0 && "PIC16 Does not have any return address register");
+ return 0;
+}
+
+// This function eliminates ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void PIC16RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ // Simply discard ADJCALLSTACKDOWN,
+ // ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.h b/lib/Target/PIC16/PIC16RegisterInfo.h
new file mode 100644
index 0000000..83689d0
--- /dev/null
+++ b/lib/Target/PIC16/PIC16RegisterInfo.h
@@ -0,0 +1,68 @@
+//===- PIC16RegisterInfo.h - PIC16 Register Information Impl ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PIC16 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PIC16REGISTERINFO_H
+#define PIC16REGISTERINFO_H
+
+#include "PIC16GenRegisterInfo.h.inc"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+
+// Forward Declarations.
+ class PIC16Subtarget;
+ class TargetInstrInfo;
+
+class PIC16RegisterInfo : public PIC16GenRegisterInfo {
+ private:
+ const TargetInstrInfo &TII;
+ const PIC16Subtarget &ST;
+
+ public:
+ PIC16RegisterInfo(const TargetInstrInfo &tii,
+ const PIC16Subtarget &st);
+
+
+ //------------------------------------------------------
+ // Pure virtual functions from TargetRegisterInfo
+ //------------------------------------------------------
+
+ // PIC16 callee saved registers
+ virtual const unsigned*
+ getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ // PIC16 callee saved register classes
+ virtual const TargetRegisterClass* const *
+ getCalleeSavedRegClasses(const MachineFunction *MF) const;
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const;
+ virtual bool hasFP(const MachineFunction &MF) const;
+
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, RegScavenger *RS=NULL) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ virtual void emitPrologue(MachineFunction &MF) const;
+ virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+ virtual unsigned getFrameRegister(MachineFunction &MF) const;
+ virtual unsigned getRARegister() const;
+
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.td b/lib/Target/PIC16/PIC16RegisterInfo.td
new file mode 100644
index 0000000..2959d91
--- /dev/null
+++ b/lib/Target/PIC16/PIC16RegisterInfo.td
@@ -0,0 +1,33 @@
+//===- PIC16RegisterInfo.td - PIC16 Register defs ------------*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the PIC16 register file
+//===----------------------------------------------------------------------===//
+
+class PIC16Reg<string n> : Register<n> {
+ let Namespace = "PIC16";
+}
+
+// PIC16 Registers.
+def W : PIC16Reg<"W">;
+def FSR0 : PIC16Reg<"FSR0">;
+def FSR1 : PIC16Reg<"FSR1">;
+def BS : PIC16Reg<"BS">;
+def PCLATH : PIC16Reg<"PCLATH">;
+
+def STATUS : PIC16Reg<"STATUS">;
+
+// PIC16 Register classes.
+def GPR : RegisterClass<"PIC16", [i8], 8, [W]>;
+def FSR16 : RegisterClass<"PIC16", [i16], 8, [FSR0, FSR1]>;
+def BSR : RegisterClass<"PIC16", [i8], 8, [BS]>;
+def PCLATHR : RegisterClass<"PIC16", [i8], 8, [PCLATH]>;
+def STATUSR : RegisterClass<"PIC16", [i8], 8, [STATUS]>;
+
diff --git a/lib/Target/PIC16/PIC16Subtarget.cpp b/lib/Target/PIC16/PIC16Subtarget.cpp
new file mode 100644
index 0000000..db8a5d8
--- /dev/null
+++ b/lib/Target/PIC16/PIC16Subtarget.cpp
@@ -0,0 +1,27 @@
+//===- PIC16Subtarget.cpp - PIC16 Subtarget Information -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PIC16 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PIC16Subtarget.h"
+#include "PIC16GenSubtarget.inc"
+
+using namespace llvm;
+
+PIC16Subtarget::PIC16Subtarget(const Module &M, const std::string &FS,
+ bool Cooper)
+ :IsCooper(Cooper)
+{
+ std::string CPU = "generic";
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, CPU);
+}
diff --git a/lib/Target/PIC16/PIC16Subtarget.h b/lib/Target/PIC16/PIC16Subtarget.h
new file mode 100644
index 0000000..e5147a0
--- /dev/null
+++ b/lib/Target/PIC16/PIC16Subtarget.h
@@ -0,0 +1,45 @@
+//=====-- PIC16Subtarget.h - Define Subtarget for the PIC16 ---*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PIC16 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PIC16SUBTARGET_H
+#define PIC16SUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+
+#include <string>
+
+namespace llvm {
+class Module;
+
+class PIC16Subtarget : public TargetSubtarget {
+
+ // IsCooper - Target ISA is Cooper.
+ bool IsCooper;
+
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ PIC16Subtarget(const Module &M, const std::string &FS, bool Cooper);
+
+ /// isCooper - Returns true if the target ISA is Cooper.
+ bool isCooper() const { return IsCooper; }
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+};
+} // End llvm namespace
+
+#endif // PIC16SUBTARGET_H
diff --git a/lib/Target/PIC16/PIC16TargetAsmInfo.cpp b/lib/Target/PIC16/PIC16TargetAsmInfo.cpp
new file mode 100644
index 0000000..d2657f0
--- /dev/null
+++ b/lib/Target/PIC16/PIC16TargetAsmInfo.cpp
@@ -0,0 +1,264 @@
+//===-- PIC16TargetAsmInfo.cpp - PIC16 asm properties ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the PIC16TargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PIC16TargetAsmInfo.h"
+#include "PIC16TargetMachine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/DerivedTypes.h"
+
+using namespace llvm;
+
+PIC16TargetAsmInfo::
+PIC16TargetAsmInfo(const PIC16TargetMachine &TM)
+ : TargetAsmInfo(TM) {
+ CommentString = ";";
+ GlobalPrefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
+ GlobalDirective = "\tglobal\t";
+ ExternDirective = "\textern\t";
+
+ Data8bitsDirective = " db ";
+ Data16bitsDirective = " dw ";
+ Data32bitsDirective = " dl ";
+ RomData8bitsDirective = " dw ";
+ RomData16bitsDirective = " rom_di ";
+ RomData32bitsDirective = " rom_dl ";
+ ZeroDirective = NULL;
+ AsciiDirective = " dt ";
+ AscizDirective = NULL;
+ BSSSection_ = getNamedSection("udata.# UDATA",
+ SectionFlags::Writeable | SectionFlags::BSS);
+ ReadOnlySection = getNamedSection("romdata.# ROMDATA", SectionFlags::None);
+ DataSection = getNamedSection("idata.# IDATA", SectionFlags::Writeable);
+ SwitchToSectionDirective = "";
+ // Need because otherwise a .text symbol is emitted by DwarfWriter
+ // in BeginModule, and gpasm cribbs for that .text symbol.
+ TextSection = getUnnamedSection("", SectionFlags::Code);
+ ROSection = new PIC16Section(getReadOnlySection());
+ ExternalVarDecls = new PIC16Section(getNamedSection("ExternalVarDecls"));
+ ExternalVarDefs = new PIC16Section(getNamedSection("ExternalVarDefs"));
+ // Set it to false because we weed to generate c file name and not bc file
+ // name.
+ HasSingleParameterDotFile = false;
+}
+
+const char *PIC16TargetAsmInfo::getRomDirective(unsigned size) const
+{
+ if (size == 8)
+ return RomData8bitsDirective;
+ else if (size == 16)
+ return RomData16bitsDirective;
+ else if (size == 32)
+ return RomData32bitsDirective;
+ else
+ return NULL;
+}
+
+
+const char *PIC16TargetAsmInfo::getASDirective(unsigned size,
+ unsigned AS) const {
+ if (AS == PIC16ISD::ROM_SPACE)
+ return getRomDirective(size);
+ else
+ return NULL;
+}
+
+const Section *
+PIC16TargetAsmInfo::getBSSSectionForGlobal(const GlobalVariable *GV) const {
+ assert (GV->hasInitializer() && "This global doesn't need space");
+ Constant *C = GV->getInitializer();
+ assert (C->isNullValue() && "Unitialized globals has non-zero initializer");
+
+ // Find how much space this global needs.
+ const TargetData *TD = TM.getTargetData();
+ const Type *Ty = C->getType();
+ unsigned ValSize = TD->getTypeAllocSize(Ty);
+
+ // Go through all BSS Sections and assign this variable
+ // to the first available section having enough space.
+ PIC16Section *FoundBSS = NULL;
+ for (unsigned i = 0; i < BSSSections.size(); i++) {
+ if (DataBankSize - BSSSections[i]->Size >= ValSize) {
+ FoundBSS = BSSSections[i];
+ break;
+ }
+ }
+
+ // No BSS section spacious enough was found. Crate a new one.
+ if (! FoundBSS) {
+ std::string name = PAN::getUdataSectionName(BSSSections.size());
+ const Section *NewSection = getNamedSection (name.c_str());
+
+ FoundBSS = new PIC16Section(NewSection);
+
+ // Add this newly created BSS section to the list of BSSSections.
+ BSSSections.push_back(FoundBSS);
+ }
+
+ // Insert the GV into this BSS.
+ FoundBSS->Items.push_back(GV);
+ FoundBSS->Size += ValSize;
+
+ // We can't do this here because GV is const .
+ // const std::string SName = FoundBSS->S_->getName();
+ // GV->setSection(SName);
+
+ return FoundBSS->S_;
+}
+
+const Section *
+PIC16TargetAsmInfo::getIDATASectionForGlobal(const GlobalVariable *GV) const {
+ assert (GV->hasInitializer() && "This global doesn't need space");
+ Constant *C = GV->getInitializer();
+ assert (!C->isNullValue() && "initialized globals has zero initializer");
+ assert (GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE &&
+ "can split initialized RAM data only");
+
+ // Find how much space this global needs.
+ const TargetData *TD = TM.getTargetData();
+ const Type *Ty = C->getType();
+ unsigned ValSize = TD->getTypeAllocSize(Ty);
+
+ // Go through all IDATA Sections and assign this variable
+ // to the first available section having enough space.
+ PIC16Section *FoundIDATA = NULL;
+ for (unsigned i = 0; i < IDATASections.size(); i++) {
+ if ( DataBankSize - IDATASections[i]->Size >= ValSize) {
+ FoundIDATA = IDATASections[i];
+ break;
+ }
+ }
+
+ // No IDATA section spacious enough was found. Crate a new one.
+ if (! FoundIDATA) {
+ std::string name = PAN::getIdataSectionName(IDATASections.size());
+ const Section *NewSection = getNamedSection (name.c_str());
+
+ FoundIDATA = new PIC16Section(NewSection);
+
+ // Add this newly created IDATA section to the list of IDATASections.
+ IDATASections.push_back(FoundIDATA);
+ }
+
+ // Insert the GV into this IDATA.
+ FoundIDATA->Items.push_back(GV);
+ FoundIDATA->Size += ValSize;
+
+ // We can't do this here because GV is const .
+ // GV->setSection(FoundIDATA->S->getName());
+
+ return FoundIDATA->S_;
+}
+
+// Get the section for an automatic variable of a function.
+// For PIC16 they are globals only with mangled names.
+const Section *
+PIC16TargetAsmInfo::getSectionForAuto(const GlobalVariable *GV) const {
+
+ const std::string name = PAN::getSectionNameForSym(GV->getName());
+
+ // Go through all Auto Sections and assign this variable
+ // to the appropriate section.
+ PIC16Section *FoundAutoSec = NULL;
+ for (unsigned i = 0; i < AutosSections.size(); i++) {
+ if ( AutosSections[i]->S_->getName() == name) {
+ FoundAutoSec = AutosSections[i];
+ break;
+ }
+ }
+
+ // No Auto section was found. Crate a new one.
+ if (! FoundAutoSec) {
+ const Section *NewSection = getNamedSection (name.c_str());
+
+ FoundAutoSec = new PIC16Section(NewSection);
+
+ // Add this newly created autos section to the list of AutosSections.
+ AutosSections.push_back(FoundAutoSec);
+ }
+
+ // Insert the auto into this section.
+ FoundAutoSec->Items.push_back(GV);
+
+ return FoundAutoSec->S_;
+}
+
+
+// Override default implementation to put the true globals into
+// multiple data sections if required.
+const Section*
+PIC16TargetAsmInfo::SelectSectionForGlobal(const GlobalValue *GV1) const {
+ // We select the section based on the initializer here, so it really
+ // has to be a GlobalVariable.
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(GV1);
+
+ if (!GV)
+ return TargetAsmInfo::SelectSectionForGlobal(GV1);
+
+ // Record Exteranl Var Decls.
+ if (GV->isDeclaration()) {
+ ExternalVarDecls->Items.push_back(GV);
+ return ExternalVarDecls->S_;
+ }
+
+ assert (GV->hasInitializer() && "A def without initializer?");
+
+ // First, if this is an automatic variable for a function, get the section
+ // name for it and return.
+ const std::string name = GV->getName();
+ if (PAN::isLocalName(name)) {
+ return getSectionForAuto(GV);
+ }
+
+ // Record Exteranl Var Defs.
+ if (GV->hasExternalLinkage() || GV->hasCommonLinkage()) {
+ ExternalVarDefs->Items.push_back(GV);
+ }
+
+ // See if this is an uninitialized global.
+ const Constant *C = GV->getInitializer();
+ if (C->isNullValue())
+ return getBSSSectionForGlobal(GV);
+
+ // If this is initialized data in RAM. Put it in the correct IDATA section.
+ if (GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE)
+ return getIDATASectionForGlobal(GV);
+
+ // This is initialized data in rom, put it in the readonly section.
+ if (GV->getType()->getAddressSpace() == PIC16ISD::ROM_SPACE) {
+ ROSection->Items.push_back(GV);
+ return ROSection->S_;
+ }
+
+ // Else let the default implementation take care of it.
+ return TargetAsmInfo::SelectSectionForGlobal(GV);
+}
+
+PIC16TargetAsmInfo::~PIC16TargetAsmInfo() {
+
+ for (unsigned i = 0; i < BSSSections.size(); i++) {
+ delete BSSSections[i];
+ }
+
+ for (unsigned i = 0; i < IDATASections.size(); i++) {
+ delete IDATASections[i];
+ }
+
+ for (unsigned i = 0; i < AutosSections.size(); i++) {
+ delete AutosSections[i];
+ }
+
+ delete ROSection;
+ delete ExternalVarDecls;
+ delete ExternalVarDefs;
+}
diff --git a/lib/Target/PIC16/PIC16TargetAsmInfo.h b/lib/Target/PIC16/PIC16TargetAsmInfo.h
new file mode 100644
index 0000000..e464e36
--- /dev/null
+++ b/lib/Target/PIC16/PIC16TargetAsmInfo.h
@@ -0,0 +1,79 @@
+//=====-- PIC16TargetAsmInfo.h - PIC16 asm properties ---------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the PIC16TargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PIC16TARGETASMINFO_H
+#define PIC16TARGETASMINFO_H
+
+#include "PIC16.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include <vector>
+#include "llvm/Module.h"
+#define DataBankSize 80
+namespace llvm {
+
+ // Forward declaration.
+ class PIC16TargetMachine;
+ class GlobalVariable;
+
+ // PIC16 Splits the global data into mulitple udata and idata sections.
+ // Each udata and idata section needs to contain a list of globals that
+ // they contain, in order to avoid scanning over all the global values
+ // again and printing only those that match the current section.
+ // Keeping values inside the sections make printing a section much easier.
+ struct PIC16Section {
+ const Section *S_; // Connection to actual Section.
+ unsigned Size; // Total size of the objects contained.
+ std::vector<const GlobalVariable*> Items;
+
+ PIC16Section (const Section *s) { S_ = s; Size = 0; }
+ };
+
+ struct PIC16TargetAsmInfo : public TargetAsmInfo {
+ std::string getSectionNameForSym(const std::string &Sym) const;
+ PIC16TargetAsmInfo(const PIC16TargetMachine &TM);
+ mutable std::vector<PIC16Section *> BSSSections;
+ mutable std::vector<PIC16Section *> IDATASections;
+ mutable std::vector<PIC16Section *> AutosSections;
+ mutable PIC16Section *ROSection;
+ mutable PIC16Section *ExternalVarDecls;
+ mutable PIC16Section *ExternalVarDefs;
+ virtual ~PIC16TargetAsmInfo();
+
+ private:
+ const char *RomData8bitsDirective;
+ const char *RomData16bitsDirective;
+ const char *RomData32bitsDirective;
+ const char *getRomDirective(unsigned size) const;
+ virtual const char *getASDirective(unsigned size, unsigned AS) const;
+ const Section *getBSSSectionForGlobal(const GlobalVariable *GV) const;
+ const Section *getIDATASectionForGlobal(const GlobalVariable *GV) const;
+ const Section *getSectionForAuto(const GlobalVariable *GV) const;
+ virtual const Section *SelectSectionForGlobal(const GlobalValue *GV) const;
+
+
+ public:
+ void SetSectionForGVs(Module &M);
+ std::vector<PIC16Section *> getBSSSections() const {
+ return BSSSections;
+ }
+ std::vector<PIC16Section *> getIDATASections() const {
+ return IDATASections;
+ }
+ std::vector<PIC16Section *> getAutosSections() const {
+ return AutosSections;
+ }
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp
new file mode 100644
index 0000000..bda6326
--- /dev/null
+++ b/lib/Target/PIC16/PIC16TargetMachine.cpp
@@ -0,0 +1,79 @@
+//===-- PIC16TargetMachine.cpp - Define TargetMachine for PIC16 -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the PIC16 target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PIC16.h"
+#include "PIC16TargetAsmInfo.h"
+#include "PIC16TargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+
+using namespace llvm;
+
+/// PIC16TargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int PIC16TargetMachineModule;
+int PIC16TargetMachineModule = 0;
+
+
+// Register the targets
+static RegisterTarget<PIC16TargetMachine>
+X("pic16", "PIC16 14-bit [experimental].");
+static RegisterTarget<CooperTargetMachine>
+Y("cooper", "PIC16 Cooper [experimental].");
+
+// PIC16TargetMachine - Traditional PIC16 Machine.
+PIC16TargetMachine::PIC16TargetMachine(const Module &M, const std::string &FS,
+ bool Cooper)
+: Subtarget(M, FS, Cooper),
+ DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"),
+ InstrInfo(*this), TLInfo(*this),
+ FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0) { }
+
+// CooperTargetMachine - Uses the same PIC16TargetMachine, but makes IsCooper
+// as true.
+CooperTargetMachine::CooperTargetMachine(const Module &M, const std::string &FS)
+ : PIC16TargetMachine(M, FS, true) {}
+
+
+const TargetAsmInfo *PIC16TargetMachine::createTargetAsmInfo() const {
+ return new PIC16TargetAsmInfo(*this);
+}
+
+bool PIC16TargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Install an instruction selector.
+ PM.add(createPIC16ISelDag(*this));
+ return false;
+}
+
+bool PIC16TargetMachine::
+addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out) {
+ // Output assembly language.
+ PM.add(createPIC16CodePrinterPass(Out, *this, OptLevel, Verbose));
+ return false;
+}
+
+bool PIC16TargetMachine::addPostRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createPIC16MemSelOptimizerPass());
+ return true; // -print-machineinstr should print after this.
+}
+
+
diff --git a/lib/Target/PIC16/PIC16TargetMachine.h b/lib/Target/PIC16/PIC16TargetMachine.h
new file mode 100644
index 0000000..7f62d5c
--- /dev/null
+++ b/lib/Target/PIC16/PIC16TargetMachine.h
@@ -0,0 +1,76 @@
+//===-- PIC16TargetMachine.h - Define TargetMachine for PIC16 ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PIC16 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef PIC16_TARGETMACHINE_H
+#define PIC16_TARGETMACHINE_H
+
+#include "PIC16InstrInfo.h"
+#include "PIC16ISelLowering.h"
+#include "PIC16RegisterInfo.h"
+#include "PIC16Subtarget.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+/// PIC16TargetMachine
+///
+class PIC16TargetMachine : public LLVMTargetMachine {
+ PIC16Subtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ PIC16InstrInfo InstrInfo;
+ PIC16TargetLowering TLInfo;
+
+ // PIC16 does not have any call stack frame, therefore not having
+ // any PIC16 specific FrameInfo class.
+ TargetFrameInfo FrameInfo;
+
+protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+public:
+ PIC16TargetMachine(const Module &M, const std::string &FS,
+ bool Cooper = false);
+
+ virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual const PIC16InstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetData *getTargetData() const { return &DataLayout;}
+ virtual const PIC16Subtarget *getSubtargetImpl() const { return &Subtarget; }
+
+ virtual const PIC16RegisterInfo *getRegisterInfo() const {
+ return &(InstrInfo.getRegisterInfo());
+ }
+
+ virtual PIC16TargetLowering *getTargetLowering() const {
+ return const_cast<PIC16TargetLowering*>(&TLInfo);
+ }
+
+ virtual bool addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+ virtual bool addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out);
+ virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+}; // PIC16TargetMachine.
+
+/// CooperTargetMachine
+class CooperTargetMachine : public PIC16TargetMachine {
+public:
+ CooperTargetMachine(const Module &M, const std::string &FS);
+}; // CooperTargetMachine.
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt b/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt
new file mode 100644
index 0000000..1ed483a
--- /dev/null
+++ b/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,9 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_partially_linked_object(LLVMPowerPCAsmPrinter
+ PPCAsmPrinter.cpp
+ )
+
+target_name_of_partially_linked_object(LLVMPowerPCCodeGen n)
+
+add_dependencies(LLVMPowerPCAsmPrinter ${n})
diff --git a/lib/Target/PowerPC/AsmPrinter/Makefile b/lib/Target/PowerPC/AsmPrinter/Makefile
new file mode 100644
index 0000000..269ef92
--- /dev/null
+++ b/lib/Target/PowerPC/AsmPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/PowerPC/AsmPrinter/Makefile --------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPowerPCAsmPrinter
+
+# Hack: we need to include 'main' PowerPC target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
new file mode 100644
index 0000000..7723982
--- /dev/null
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -0,0 +1,1204 @@
+//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly --------=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PowerPC assembly language. This printer is
+// the output mechanism used by `llc'.
+//
+// Documentation at http://developer.apple.com/documentation/DeveloperTools/
+// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asmprinter"
+#include "PPC.h"
+#include "PPCPredicates.h"
+#include "PPCTargetMachine.h"
+#include "PPCSubtarget.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSet.h"
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+ class VISIBILITY_HIDDEN PPCAsmPrinter : public AsmPrinter {
+ protected:
+ StringSet<> FnStubs, GVStubs, HiddenGVStubs;
+ const PPCSubtarget &Subtarget;
+ public:
+ explicit PPCAsmPrinter(raw_ostream &O, TargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : AsmPrinter(O, TM, T, OL, V),
+ Subtarget(TM.getSubtarget<PPCSubtarget>()) {}
+
+ virtual const char *getPassName() const {
+ return "PowerPC Assembly Printer";
+ }
+
+ PPCTargetMachine &getTM() {
+ return static_cast<PPCTargetMachine&>(TM);
+ }
+
+ unsigned enumRegToMachineReg(unsigned enumReg) {
+ switch (enumReg) {
+ default: assert(0 && "Unhandled register!"); break;
+ case PPC::CR0: return 0;
+ case PPC::CR1: return 1;
+ case PPC::CR2: return 2;
+ case PPC::CR3: return 3;
+ case PPC::CR4: return 4;
+ case PPC::CR5: return 5;
+ case PPC::CR6: return 6;
+ case PPC::CR7: return 7;
+ }
+ abort();
+ }
+
+ /// printInstruction - This method is automatically generated by tablegen
+ /// from the instruction set description. This method returns true if the
+ /// machine instruction was sufficiently described to print it, otherwise it
+ /// returns false.
+ bool printInstruction(const MachineInstr *MI);
+
+ void printMachineInstruction(const MachineInstr *MI);
+ void printOp(const MachineOperand &MO);
+
+ /// stripRegisterPrefix - This method strips the character prefix from a
+ /// register name so that only the number is left. Used by for linux asm.
+ const char *stripRegisterPrefix(const char *RegName) {
+ switch (RegName[0]) {
+ case 'r':
+ case 'f':
+ case 'v': return RegName + 1;
+ case 'c': if (RegName[1] == 'r') return RegName + 2;
+ }
+
+ return RegName;
+ }
+
+ /// printRegister - Print register according to target requirements.
+ ///
+ void printRegister(const MachineOperand &MO, bool R0AsZero) {
+ unsigned RegNo = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(RegNo) && "Not physreg??");
+
+ // If we should use 0 for R0.
+ if (R0AsZero && RegNo == PPC::R0) {
+ O << "0";
+ return;
+ }
+
+ const char *RegName = TM.getRegisterInfo()->get(RegNo).AsmName;
+ // Linux assembler (Others?) does not take register mnemonics.
+ // FIXME - What about special registers used in mfspr/mtspr?
+ if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
+ O << RegName;
+ }
+
+ void printOperand(const MachineInstr *MI, unsigned OpNo) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (MO.isReg()) {
+ printRegister(MO, false);
+ } else if (MO.isImm()) {
+ O << MO.getImm();
+ } else {
+ printOp(MO);
+ }
+ }
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+
+
+ void printS5ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ char value = MI->getOperand(OpNo).getImm();
+ value = (value << (32-5)) >> (32-5);
+ O << (int)value;
+ }
+ void printU5ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ unsigned char value = MI->getOperand(OpNo).getImm();
+ assert(value <= 31 && "Invalid u5imm argument!");
+ O << (unsigned int)value;
+ }
+ void printU6ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ unsigned char value = MI->getOperand(OpNo).getImm();
+ assert(value <= 63 && "Invalid u6imm argument!");
+ O << (unsigned int)value;
+ }
+ void printS16ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ O << (short)MI->getOperand(OpNo).getImm();
+ }
+ void printU16ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ O << (unsigned short)MI->getOperand(OpNo).getImm();
+ }
+ void printS16X4ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ if (MI->getOperand(OpNo).isImm()) {
+ O << (short)(MI->getOperand(OpNo).getImm()*4);
+ } else {
+ O << "lo16(";
+ printOp(MI->getOperand(OpNo));
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ O << "-\"L" << getFunctionNumber() << "$pb\")";
+ else
+ O << ')';
+ }
+ }
+ void printBranchOperand(const MachineInstr *MI, unsigned OpNo) {
+ // Branches can take an immediate operand. This is used by the branch
+ // selection pass to print $+8, an eight byte displacement from the PC.
+ if (MI->getOperand(OpNo).isImm()) {
+ O << "$+" << MI->getOperand(OpNo).getImm()*4;
+ } else {
+ printOp(MI->getOperand(OpNo));
+ }
+ }
+ void printCallOperand(const MachineInstr *MI, unsigned OpNo) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (TM.getRelocationModel() != Reloc::Static) {
+ if (MO.getType() == MachineOperand::MO_GlobalAddress) {
+ GlobalValue *GV = MO.getGlobal();
+ if (((GV->isDeclaration() || GV->hasWeakLinkage() ||
+ GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) {
+ // Dynamically-resolved functions need a stub for the function.
+ std::string Name = Mang->getValueName(GV);
+ FnStubs.insert(Name);
+ printSuffixedName(Name, "$stub");
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ return;
+ }
+ }
+ if (MO.getType() == MachineOperand::MO_ExternalSymbol) {
+ std::string Name(TAI->getGlobalPrefix()); Name += MO.getSymbolName();
+ FnStubs.insert(Name);
+ printSuffixedName(Name, "$stub");
+ return;
+ }
+ }
+
+ printOp(MI->getOperand(OpNo));
+ }
+ void printAbsAddrOperand(const MachineInstr *MI, unsigned OpNo) {
+ O << (int)MI->getOperand(OpNo).getImm()*4;
+ }
+ void printPICLabel(const MachineInstr *MI, unsigned OpNo) {
+ O << "\"L" << getFunctionNumber() << "$pb\"\n";
+ O << "\"L" << getFunctionNumber() << "$pb\":";
+ }
+ void printSymbolHi(const MachineInstr *MI, unsigned OpNo) {
+ if (MI->getOperand(OpNo).isImm()) {
+ printS16ImmOperand(MI, OpNo);
+ } else {
+ if (Subtarget.isDarwin()) O << "ha16(";
+ printOp(MI->getOperand(OpNo));
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ O << "-\"L" << getFunctionNumber() << "$pb\"";
+ if (Subtarget.isDarwin())
+ O << ')';
+ else
+ O << "@ha";
+ }
+ }
+ void printSymbolLo(const MachineInstr *MI, unsigned OpNo) {
+ if (MI->getOperand(OpNo).isImm()) {
+ printS16ImmOperand(MI, OpNo);
+ } else {
+ if (Subtarget.isDarwin()) O << "lo16(";
+ printOp(MI->getOperand(OpNo));
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ O << "-\"L" << getFunctionNumber() << "$pb\"";
+ if (Subtarget.isDarwin())
+ O << ')';
+ else
+ O << "@l";
+ }
+ }
+ void printcrbitm(const MachineInstr *MI, unsigned OpNo) {
+ unsigned CCReg = MI->getOperand(OpNo).getReg();
+ unsigned RegNo = enumRegToMachineReg(CCReg);
+ O << (0x80 >> RegNo);
+ }
+ // The new addressing mode printers.
+ void printMemRegImm(const MachineInstr *MI, unsigned OpNo) {
+ printSymbolLo(MI, OpNo);
+ O << '(';
+ if (MI->getOperand(OpNo+1).isReg() &&
+ MI->getOperand(OpNo+1).getReg() == PPC::R0)
+ O << "0";
+ else
+ printOperand(MI, OpNo+1);
+ O << ')';
+ }
+ void printMemRegImmShifted(const MachineInstr *MI, unsigned OpNo) {
+ if (MI->getOperand(OpNo).isImm())
+ printS16X4ImmOperand(MI, OpNo);
+ else
+ printSymbolLo(MI, OpNo);
+ O << '(';
+ if (MI->getOperand(OpNo+1).isReg() &&
+ MI->getOperand(OpNo+1).getReg() == PPC::R0)
+ O << "0";
+ else
+ printOperand(MI, OpNo+1);
+ O << ')';
+ }
+
+ void printMemRegReg(const MachineInstr *MI, unsigned OpNo) {
+ // When used as the base register, r0 reads constant zero rather than
+ // the value contained in the register. For this reason, the darwin
+ // assembler requires that we print r0 as 0 (no r) when used as the base.
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ printRegister(MO, true);
+ O << ", ";
+ printOperand(MI, OpNo+1);
+ }
+
+ void printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *Modifier);
+
+ virtual bool runOnMachineFunction(MachineFunction &F) = 0;
+ virtual bool doFinalization(Module &M) = 0;
+
+ virtual void EmitExternalGlobal(const GlobalVariable *GV);
+ };
+
+ /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
+ class VISIBILITY_HIDDEN PPCLinuxAsmPrinter : public PPCAsmPrinter {
+ DwarfWriter *DW;
+ MachineModuleInfo *MMI;
+ public:
+ explicit PPCLinuxAsmPrinter(raw_ostream &O, PPCTargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : PPCAsmPrinter(O, TM, T, OL, V), DW(0), MMI(0) {}
+
+ virtual const char *getPassName() const {
+ return "Linux PPC Assembly Printer";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<DwarfWriter>();
+ PPCAsmPrinter::getAnalysisUsage(AU);
+ }
+
+ void printModuleLevelGV(const GlobalVariable* GVar);
+ };
+
+ /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
+ /// OS X
+ class VISIBILITY_HIDDEN PPCDarwinAsmPrinter : public PPCAsmPrinter {
+ DwarfWriter *DW;
+ MachineModuleInfo *MMI;
+ raw_ostream &OS;
+ public:
+ explicit PPCDarwinAsmPrinter(raw_ostream &O, PPCTargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : PPCAsmPrinter(O, TM, T, OL, V), DW(0), MMI(0), OS(O) {}
+
+ virtual const char *getPassName() const {
+ return "Darwin PPC Assembly Printer";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<DwarfWriter>();
+ PPCAsmPrinter::getAnalysisUsage(AU);
+ }
+
+ void printModuleLevelGV(const GlobalVariable* GVar);
+ };
+} // end of anonymous namespace
+
+// Include the auto-generated portion of the assembly writer
+#include "PPCGenAsmWriter.inc"
+
+void PPCAsmPrinter::printOp(const MachineOperand &MO) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Immediate:
+ cerr << "printOp() does not handle immediate values\n";
+ abort();
+ return;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB());
+ return;
+ case MachineOperand::MO_JumpTableIndex:
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ // FIXME: PIC relocation model
+ return;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ return;
+ case MachineOperand::MO_ExternalSymbol:
+ // Computing the address of an external symbol, not calling it.
+ if (TM.getRelocationModel() != Reloc::Static) {
+ std::string Name(TAI->getGlobalPrefix()); Name += MO.getSymbolName();
+ GVStubs.insert(Name);
+ printSuffixedName(Name, "$non_lazy_ptr");
+ return;
+ }
+ O << TAI->getGlobalPrefix() << MO.getSymbolName();
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ // Computing the address of a global symbol, not calling it.
+ GlobalValue *GV = MO.getGlobal();
+ std::string Name = Mang->getValueName(GV);
+
+ // External or weakly linked global variables need non-lazily-resolved stubs
+ if (TM.getRelocationModel() != Reloc::Static) {
+ if (GV->isDeclaration() || GV->isWeakForLinker()) {
+ if (GV->hasHiddenVisibility()) {
+ if (!GV->isDeclaration() && !GV->hasCommonLinkage())
+ O << Name;
+ else {
+ HiddenGVStubs.insert(Name);
+ printSuffixedName(Name, "$non_lazy_ptr");
+ }
+ } else {
+ GVStubs.insert(Name);
+ printSuffixedName(Name, "$non_lazy_ptr");
+ }
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ return;
+ }
+ }
+ O << Name;
+
+ printOffset(MO.getOffset());
+
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ return;
+ }
+
+ default:
+ O << "<unknown operand type: " << MO.getType() << ">";
+ return;
+ }
+}
+
+/// EmitExternalGlobal - In this case we need to use the indirect symbol.
+///
+void PPCAsmPrinter::EmitExternalGlobal(const GlobalVariable *GV) {
+ std::string Name;
+ getGlobalLinkName(GV, Name);
+ if (TM.getRelocationModel() != Reloc::Static) {
+ if (GV->hasHiddenVisibility())
+ HiddenGVStubs.insert(Name);
+ else
+ GVStubs.insert(Name);
+ printSuffixedName(Name, "$non_lazy_ptr");
+ return;
+ }
+ O << Name;
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'c': // Don't print "$" before a global var name or constant.
+ // PPC never has a prefix.
+ printOperand(MI, OpNo);
+ return false;
+ case 'L': // Write second word of DImode reference.
+ // Verify that this operand has two consecutive registers.
+ if (!MI->getOperand(OpNo).isReg() ||
+ OpNo+1 == MI->getNumOperands() ||
+ !MI->getOperand(OpNo+1).isReg())
+ return true;
+ ++OpNo; // Return the high-part.
+ break;
+ case 'I':
+ // Write 'i' if an integer constant, otherwise nothing. Used to print
+ // addi vs add, etc.
+ if (MI->getOperand(OpNo).isImm())
+ O << "i";
+ return false;
+ }
+ }
+
+ printOperand(MI, OpNo);
+ return false;
+}
+
+bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+ if (MI->getOperand(OpNo).isReg())
+ printMemRegReg(MI, OpNo);
+ else
+ printMemRegImm(MI, OpNo);
+ return false;
+}
+
+void PPCAsmPrinter::printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *Modifier) {
+ assert(Modifier && "Must specify 'cc' or 'reg' as predicate op modifier!");
+ unsigned Code = MI->getOperand(OpNo).getImm();
+ if (!strcmp(Modifier, "cc")) {
+ switch ((PPC::Predicate)Code) {
+ case PPC::PRED_ALWAYS: return; // Don't print anything for always.
+ case PPC::PRED_LT: O << "lt"; return;
+ case PPC::PRED_LE: O << "le"; return;
+ case PPC::PRED_EQ: O << "eq"; return;
+ case PPC::PRED_GE: O << "ge"; return;
+ case PPC::PRED_GT: O << "gt"; return;
+ case PPC::PRED_NE: O << "ne"; return;
+ case PPC::PRED_UN: O << "un"; return;
+ case PPC::PRED_NU: O << "nu"; return;
+ }
+
+ } else {
+ assert(!strcmp(Modifier, "reg") &&
+ "Need to specify 'cc' or 'reg' as predicate op modifier!");
+ // Don't print the register for 'always'.
+ if (Code == PPC::PRED_ALWAYS) return;
+ printOperand(MI, OpNo+1);
+ }
+}
+
+
+/// printMachineInstruction -- Print out a single PowerPC MI in Darwin syntax to
+/// the current output stream.
+///
+void PPCAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ ++EmittedInsts;
+
+ // Check for slwi/srwi mnemonics.
+ if (MI->getOpcode() == PPC::RLWINM) {
+ bool FoundMnemonic = false;
+ unsigned char SH = MI->getOperand(2).getImm();
+ unsigned char MB = MI->getOperand(3).getImm();
+ unsigned char ME = MI->getOperand(4).getImm();
+ if (SH <= 31 && MB == 0 && ME == (31-SH)) {
+ O << "\tslwi "; FoundMnemonic = true;
+ }
+ if (SH <= 31 && MB == (32-SH) && ME == 31) {
+ O << "\tsrwi "; FoundMnemonic = true;
+ SH = 32-SH;
+ }
+ if (FoundMnemonic) {
+ printOperand(MI, 0);
+ O << ", ";
+ printOperand(MI, 1);
+ O << ", " << (unsigned int)SH << '\n';
+ return;
+ }
+ } else if (MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) {
+ if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
+ O << "\tmr ";
+ printOperand(MI, 0);
+ O << ", ";
+ printOperand(MI, 1);
+ O << '\n';
+ return;
+ }
+ } else if (MI->getOpcode() == PPC::RLDICR) {
+ unsigned char SH = MI->getOperand(2).getImm();
+ unsigned char ME = MI->getOperand(3).getImm();
+ // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH
+ if (63-SH == ME) {
+ O << "\tsldi ";
+ printOperand(MI, 0);
+ O << ", ";
+ printOperand(MI, 1);
+ O << ", " << (unsigned int)SH << '\n';
+ return;
+ }
+ }
+
+ if (printInstruction(MI))
+ return; // Printer was automatically generated
+
+ assert(0 && "Unhandled instruction in asm writer!");
+ abort();
+ return;
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ this->MF = &MF;
+
+ SetupMachineFunction(MF);
+ O << "\n\n";
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ // Print out labels for the function.
+ const Function *F = MF.getFunction();
+ SwitchToSection(TAI->SectionForGlobal(F));
+
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::PrivateLinkage:
+ case Function::InternalLinkage: // Symbols default to internal.
+ break;
+ case Function::ExternalLinkage:
+ O << "\t.global\t" << CurrentFnName << '\n'
+ << "\t.type\t" << CurrentFnName << ", @function\n";
+ break;
+ case Function::WeakAnyLinkage:
+ case Function::WeakODRLinkage:
+ case Function::LinkOnceAnyLinkage:
+ case Function::LinkOnceODRLinkage:
+ O << "\t.global\t" << CurrentFnName << '\n';
+ O << "\t.weak\t" << CurrentFnName << '\n';
+ break;
+ }
+
+ printVisibility(CurrentFnName, F->getVisibility());
+
+ EmitAlignment(2, F);
+ O << CurrentFnName << ":\n";
+
+ // Emit pre-function debug information.
+ DW->BeginFunction(&MF);
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true, true);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ printMachineInstruction(II);
+ }
+ }
+
+ O << "\t.size\t" << CurrentFnName << ",.-" << CurrentFnName << '\n';
+
+ // Print out jump tables referenced by the function.
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+ SwitchToSection(TAI->SectionForGlobal(F));
+
+ // Emit post-function debug information.
+ DW->EndFunction(&MF);
+
+ O.flush();
+
+ // We didn't modify anything.
+ return false;
+}
+
+bool PPCLinuxAsmPrinter::doInitialization(Module &M) {
+ bool Result = AsmPrinter::doInitialization(M);
+
+ // Emit initial debug information.
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ assert(MMI);
+ DW = getAnalysisIfAvailable<DwarfWriter>();
+ assert(DW && "DwarfWriter is not available");
+ DW->BeginModule(&M, MMI, O, this, TAI);
+
+ // GNU as handles section names wrapped in quotes
+ Mang->setUseQuotes(true);
+
+ SwitchToSection(TAI->getTextSection());
+
+ return Result;
+}
+
+/// PrintUnmangledNameSafely - Print out the printable characters in the name.
+/// Don't print things like \\n or \\0.
+static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
+ for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen();
+ Name != E; ++Name)
+ if (isprint(*Name))
+ OS << *Name;
+}
+
+void PPCLinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
+ const TargetData *TD = TM.getTargetData();
+
+ if (!GVar->hasInitializer())
+ return; // External global require no code
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GVar))
+ return;
+
+ std::string name = Mang->getValueName(GVar);
+
+ printVisibility(name, GVar->getVisibility());
+
+ Constant *C = GVar->getInitializer();
+ const Type *Type = C->getType();
+ unsigned Size = TD->getTypeAllocSize(Type);
+ unsigned Align = TD->getPreferredAlignmentLog(GVar);
+
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+
+ if (C->isNullValue() && /* FIXME: Verify correct */
+ !GVar->hasSection() &&
+ (GVar->hasLocalLinkage() || GVar->hasExternalLinkage() ||
+ GVar->isWeakForLinker())) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+
+ if (GVar->hasExternalLinkage()) {
+ O << "\t.global " << name << '\n';
+ O << "\t.type " << name << ", @object\n";
+ O << name << ":\n";
+ O << "\t.zero " << Size << '\n';
+ } else if (GVar->hasLocalLinkage()) {
+ O << TAI->getLCOMMDirective() << name << ',' << Size;
+ } else {
+ O << ".comm " << name << ',' << Size;
+ }
+ if (VerboseAsm) {
+ O << "\t\t" << TAI->getCommentString() << " '";
+ PrintUnmangledNameSafely(GVar, O);
+ O << "'";
+ }
+ O << '\n';
+ return;
+ }
+
+ switch (GVar->getLinkage()) {
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::CommonLinkage:
+ O << "\t.global " << name << '\n'
+ << "\t.type " << name << ", @object\n"
+ << "\t.weak " << name << '\n';
+ break;
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol
+ O << "\t.global " << name << '\n'
+ << "\t.type " << name << ", @object\n";
+ // FALL THROUGH
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::PrivateLinkage:
+ break;
+ default:
+ cerr << "Unknown linkage type!";
+ abort();
+ }
+
+ EmitAlignment(Align, GVar);
+ O << name << ":";
+ if (VerboseAsm) {
+ O << "\t\t\t\t" << TAI->getCommentString() << " '";
+ PrintUnmangledNameSafely(GVar, O);
+ O << "'";
+ }
+ O << '\n';
+
+ // If the initializer is a extern weak symbol, remember to emit the weak
+ // reference!
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+
+ EmitGlobalConstant(C);
+ O << '\n';
+}
+
+bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ printModuleLevelGV(I);
+
+ // TODO
+
+ // Emit initial debug information.
+ DW->EndModule();
+
+ return AsmPrinter::doFinalization(M);
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool PPCDarwinAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ this->MF = &MF;
+
+ SetupMachineFunction(MF);
+ O << "\n\n";
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ // Print out labels for the function.
+ const Function *F = MF.getFunction();
+ SwitchToSection(TAI->SectionForGlobal(F));
+
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::PrivateLinkage:
+ case Function::InternalLinkage: // Symbols default to internal.
+ break;
+ case Function::ExternalLinkage:
+ O << "\t.globl\t" << CurrentFnName << '\n';
+ break;
+ case Function::WeakAnyLinkage:
+ case Function::WeakODRLinkage:
+ case Function::LinkOnceAnyLinkage:
+ case Function::LinkOnceODRLinkage:
+ O << "\t.globl\t" << CurrentFnName << '\n';
+ O << "\t.weak_definition\t" << CurrentFnName << '\n';
+ break;
+ }
+
+ printVisibility(CurrentFnName, F->getVisibility());
+
+ EmitAlignment(F->hasFnAttr(Attribute::OptimizeForSize) ? 2 : 4, F);
+ O << CurrentFnName << ":\n";
+
+ // Emit pre-function debug information.
+ DW->BeginFunction(&MF);
+
+ // If the function is empty, then we need to emit *something*. Otherwise, the
+ // function's label might be associated with something that it wasn't meant to
+ // be associated with. We emit a noop in this situation.
+ MachineFunction::iterator I = MF.begin();
+
+ if (++I == MF.end() && MF.front().empty())
+ O << "\tnop\n";
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true, true, VerboseAsm);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ // Print the assembly for the instruction.
+ printMachineInstruction(II);
+ }
+ }
+
+ // Print out jump tables referenced by the function.
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+ // Emit post-function debug information.
+ DW->EndFunction(&MF);
+
+ // We didn't modify anything.
+ return false;
+}
+
+
+bool PPCDarwinAsmPrinter::doInitialization(Module &M) {
+ static const char *const CPUDirectives[] = {
+ "",
+ "ppc",
+ "ppc601",
+ "ppc602",
+ "ppc603",
+ "ppc7400",
+ "ppc750",
+ "ppc970",
+ "ppc64"
+ };
+
+ unsigned Directive = Subtarget.getDarwinDirective();
+ if (Subtarget.isGigaProcessor() && Directive < PPC::DIR_970)
+ Directive = PPC::DIR_970;
+ if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400)
+ Directive = PPC::DIR_7400;
+ if (Subtarget.isPPC64() && Directive < PPC::DIR_970)
+ Directive = PPC::DIR_64;
+ assert(Directive <= PPC::DIR_64 && "Directive out of range.");
+ O << "\t.machine " << CPUDirectives[Directive] << '\n';
+
+ bool Result = AsmPrinter::doInitialization(M);
+
+ // Emit initial debug information.
+ // We need this for Personality functions.
+ // AsmPrinter::doInitialization should have done this analysis.
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ assert(MMI);
+ DW = getAnalysisIfAvailable<DwarfWriter>();
+ assert(DW && "DwarfWriter is not available");
+ DW->BeginModule(&M, MMI, O, this, TAI);
+
+ // Darwin wants symbols to be quoted if they have complex names.
+ Mang->setUseQuotes(true);
+
+ // Prime text sections so they are adjacent. This reduces the likelihood a
+ // large data or debug section causes a branch to exceed 16M limit.
+ SwitchToTextSection("\t.section __TEXT,__textcoal_nt,coalesced,"
+ "pure_instructions");
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ SwitchToTextSection("\t.section __TEXT,__picsymbolstub1,symbol_stubs,"
+ "pure_instructions,32");
+ } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
+ SwitchToTextSection("\t.section __TEXT,__symbol_stub1,symbol_stubs,"
+ "pure_instructions,16");
+ }
+ SwitchToSection(TAI->getTextSection());
+
+ return Result;
+}
+
+void PPCDarwinAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
+ const TargetData *TD = TM.getTargetData();
+
+ if (!GVar->hasInitializer())
+ return; // External global require no code
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GVar)) {
+ if (TM.getRelocationModel() == Reloc::Static) {
+ if (GVar->getName() == "llvm.global_ctors")
+ O << ".reference .constructors_used\n";
+ else if (GVar->getName() == "llvm.global_dtors")
+ O << ".reference .destructors_used\n";
+ }
+ return;
+ }
+
+ std::string name = Mang->getValueName(GVar);
+
+ printVisibility(name, GVar->getVisibility());
+
+ Constant *C = GVar->getInitializer();
+ const Type *Type = C->getType();
+ unsigned Size = TD->getTypeAllocSize(Type);
+ unsigned Align = TD->getPreferredAlignmentLog(GVar);
+
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+
+ if (C->isNullValue() && /* FIXME: Verify correct */
+ !GVar->hasSection() &&
+ (GVar->hasLocalLinkage() || GVar->hasExternalLinkage() ||
+ GVar->isWeakForLinker()) &&
+ TAI->SectionKindForGlobal(GVar) != SectionKind::RODataMergeStr) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+
+ if (GVar->hasExternalLinkage()) {
+ O << "\t.globl " << name << '\n';
+ O << "\t.zerofill __DATA, __common, " << name << ", "
+ << Size << ", " << Align;
+ } else if (GVar->hasLocalLinkage()) {
+ O << TAI->getLCOMMDirective() << name << ',' << Size << ',' << Align;
+ } else if (!GVar->hasCommonLinkage()) {
+ O << "\t.globl " << name << '\n'
+ << TAI->getWeakDefDirective() << name << '\n';
+ EmitAlignment(Align, GVar);
+ O << name << ":";
+ if (VerboseAsm) {
+ O << "\t\t\t\t" << TAI->getCommentString() << " ";
+ PrintUnmangledNameSafely(GVar, O);
+ }
+ O << '\n';
+ EmitGlobalConstant(C);
+ return;
+ } else {
+ O << ".comm " << name << ',' << Size;
+ // Darwin 9 and above support aligned common data.
+ if (Subtarget.isDarwin9())
+ O << ',' << Align;
+ }
+ if (VerboseAsm) {
+ O << "\t\t" << TAI->getCommentString() << " '";
+ PrintUnmangledNameSafely(GVar, O);
+ O << "'";
+ }
+ O << '\n';
+ return;
+ }
+
+ switch (GVar->getLinkage()) {
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::CommonLinkage:
+ O << "\t.globl " << name << '\n'
+ << "\t.weak_definition " << name << '\n';
+ break;
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol
+ O << "\t.globl " << name << '\n';
+ // FALL THROUGH
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::PrivateLinkage:
+ break;
+ default:
+ cerr << "Unknown linkage type!";
+ abort();
+ }
+
+ EmitAlignment(Align, GVar);
+ O << name << ":";
+ if (VerboseAsm) {
+ O << "\t\t\t\t" << TAI->getCommentString() << " '";
+ PrintUnmangledNameSafely(GVar, O);
+ O << "'";
+ }
+ O << '\n';
+
+ // If the initializer is a extern weak symbol, remember to emit the weak
+ // reference!
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+
+ EmitGlobalConstant(C);
+ O << '\n';
+}
+
+bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
+ const TargetData *TD = TM.getTargetData();
+
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ printModuleLevelGV(I);
+
+ bool isPPC64 = TD->getPointerSizeInBits() == 64;
+
+ // Output stubs for dynamically-linked functions
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end();
+ i != e; ++i) {
+ SwitchToTextSection("\t.section __TEXT,__picsymbolstub1,symbol_stubs,"
+ "pure_instructions,32");
+ EmitAlignment(4);
+ const char *p = i->getKeyData();
+ bool hasQuote = p[0]=='\"';
+ printSuffixedName(p, "$stub");
+ O << ":\n";
+ O << "\t.indirect_symbol " << p << '\n';
+ O << "\tmflr r0\n";
+ O << "\tbcl 20,31,";
+ if (hasQuote)
+ O << "\"L0$" << &p[1];
+ else
+ O << "L0$" << p;
+ O << '\n';
+ if (hasQuote)
+ O << "\"L0$" << &p[1];
+ else
+ O << "L0$" << p;
+ O << ":\n";
+ O << "\tmflr r11\n";
+ O << "\taddis r11,r11,ha16(";
+ printSuffixedName(p, "$lazy_ptr");
+ O << "-";
+ if (hasQuote)
+ O << "\"L0$" << &p[1];
+ else
+ O << "L0$" << p;
+ O << ")\n";
+ O << "\tmtlr r0\n";
+ if (isPPC64)
+ O << "\tldu r12,lo16(";
+ else
+ O << "\tlwzu r12,lo16(";
+ printSuffixedName(p, "$lazy_ptr");
+ O << "-";
+ if (hasQuote)
+ O << "\"L0$" << &p[1];
+ else
+ O << "L0$" << p;
+ O << ")(r11)\n";
+ O << "\tmtctr r12\n";
+ O << "\tbctr\n";
+ SwitchToDataSection(".lazy_symbol_pointer");
+ printSuffixedName(p, "$lazy_ptr");
+ O << ":\n";
+ O << "\t.indirect_symbol " << p << '\n';
+ if (isPPC64)
+ O << "\t.quad dyld_stub_binding_helper\n";
+ else
+ O << "\t.long dyld_stub_binding_helper\n";
+ }
+ } else {
+ for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end();
+ i != e; ++i) {
+ SwitchToTextSection("\t.section __TEXT,__symbol_stub1,symbol_stubs,"
+ "pure_instructions,16");
+ EmitAlignment(4);
+ const char *p = i->getKeyData();
+ printSuffixedName(p, "$stub");
+ O << ":\n";
+ O << "\t.indirect_symbol " << p << '\n';
+ O << "\tlis r11,ha16(";
+ printSuffixedName(p, "$lazy_ptr");
+ O << ")\n";
+ if (isPPC64)
+ O << "\tldu r12,lo16(";
+ else
+ O << "\tlwzu r12,lo16(";
+ printSuffixedName(p, "$lazy_ptr");
+ O << ")(r11)\n";
+ O << "\tmtctr r12\n";
+ O << "\tbctr\n";
+ SwitchToDataSection(".lazy_symbol_pointer");
+ printSuffixedName(p, "$lazy_ptr");
+ O << ":\n";
+ O << "\t.indirect_symbol " << p << '\n';
+ if (isPPC64)
+ O << "\t.quad dyld_stub_binding_helper\n";
+ else
+ O << "\t.long dyld_stub_binding_helper\n";
+ }
+ }
+
+ O << '\n';
+
+ if (TAI->doesSupportExceptionHandling() && MMI) {
+ // Add the (possibly multiple) personalities to the set of global values.
+ // Only referenced functions get into the Personalities list.
+ const std::vector<Function *>& Personalities = MMI->getPersonalities();
+
+ for (std::vector<Function *>::const_iterator I = Personalities.begin(),
+ E = Personalities.end(); I != E; ++I)
+ if (*I) GVStubs.insert("_" + (*I)->getName());
+ }
+
+ // Output stubs for external and common global variables.
+ if (!GVStubs.empty()) {
+ SwitchToDataSection(".non_lazy_symbol_pointer");
+ for (StringSet<>::iterator i = GVStubs.begin(), e = GVStubs.end();
+ i != e; ++i) {
+ std::string p = i->getKeyData();
+ printSuffixedName(p, "$non_lazy_ptr");
+ O << ":\n";
+ O << "\t.indirect_symbol " << p << '\n';
+ if (isPPC64)
+ O << "\t.quad\t0\n";
+ else
+ O << "\t.long\t0\n";
+ }
+ }
+
+ if (!HiddenGVStubs.empty()) {
+ SwitchToSection(TAI->getDataSection());
+ for (StringSet<>::iterator i = HiddenGVStubs.begin(), e = HiddenGVStubs.end();
+ i != e; ++i) {
+ std::string p = i->getKeyData();
+ EmitAlignment(isPPC64 ? 3 : 2);
+ printSuffixedName(p, "$non_lazy_ptr");
+ O << ":\n";
+ if (isPPC64)
+ O << "\t.quad\t";
+ else
+ O << "\t.long\t";
+ O << p << '\n';
+ }
+ }
+
+
+ // Emit initial debug information.
+ DW->EndModule();
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never generates
+ // code that does this, it is always safe to set.
+ O << "\t.subsections_via_symbols\n";
+
+ return AsmPrinter::doFinalization(M);
+}
+
+
+
+/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
+/// for a MachineFunction to the given output stream, in a format that the
+/// Darwin assembler can deal with.
+///
+FunctionPass *llvm::createPPCAsmPrinterPass(raw_ostream &o,
+ PPCTargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose) {
+ const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>();
+
+ if (Subtarget->isDarwin()) {
+ return new PPCDarwinAsmPrinter(o, tm, tm.getTargetAsmInfo(),
+ OptLevel, verbose);
+ } else {
+ return new PPCLinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(),
+ OptLevel, verbose);
+ }
+}
+
+namespace {
+ static struct Register {
+ Register() {
+ PPCTargetMachine::registerAsmPrinter(createPPCAsmPrinterPass);
+ }
+ } Registrator;
+}
+
+extern "C" int PowerPCAsmPrinterForceLink;
+int PowerPCAsmPrinterForceLink = 0;
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
new file mode 100644
index 0000000..0b67aff
--- /dev/null
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -0,0 +1,28 @@
+set(LLVM_TARGET_DEFINITIONS PPC.td)
+
+tablegen(PPCGenInstrNames.inc -gen-instr-enums)
+tablegen(PPCGenRegisterNames.inc -gen-register-enums)
+tablegen(PPCGenAsmWriter.inc -gen-asm-writer)
+tablegen(PPCGenCodeEmitter.inc -gen-emitter)
+tablegen(PPCGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(PPCGenRegisterInfo.inc -gen-register-desc)
+tablegen(PPCGenInstrInfo.inc -gen-instr-desc)
+tablegen(PPCGenDAGISel.inc -gen-dag-isel)
+tablegen(PPCGenCallingConv.inc -gen-callingconv)
+tablegen(PPCGenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(PowerPCCodeGen
+ PPCBranchSelector.cpp
+ PPCCodeEmitter.cpp
+ PPCHazardRecognizers.cpp
+ PPCInstrInfo.cpp
+ PPCISelDAGToDAG.cpp
+ PPCISelLowering.cpp
+ PPCJITInfo.cpp
+ PPCMachOWriterInfo.cpp
+ PPCPredicates.cpp
+ PPCRegisterInfo.cpp
+ PPCSubtarget.cpp
+ PPCTargetAsmInfo.cpp
+ PPCTargetMachine.cpp
+ )
diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile
new file mode 100644
index 0000000..db68897
--- /dev/null
+++ b/lib/Target/PowerPC/Makefile
@@ -0,0 +1,22 @@
+##===- lib/Target/PowerPC/Makefile -------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMPowerPCCodeGen
+TARGET = PPC
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = PPCGenInstrNames.inc PPCGenRegisterNames.inc \
+ PPCGenAsmWriter.inc PPCGenCodeEmitter.inc \
+ PPCGenRegisterInfo.h.inc PPCGenRegisterInfo.inc \
+ PPCGenInstrInfo.inc PPCGenDAGISel.inc \
+ PPCGenSubtarget.inc PPCGenCallingConv.inc
+
+DIRS = AsmPrinter
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
new file mode 100644
index 0000000..c844e21
--- /dev/null
+++ b/lib/Target/PowerPC/PPC.h
@@ -0,0 +1,49 @@
+//===-- PPC.h - Top-level interface for PowerPC Target ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// PowerPC back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_H
+#define LLVM_TARGET_POWERPC_H
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class PPCTargetMachine;
+ class FunctionPass;
+ class MachineCodeEmitter;
+ class raw_ostream;
+
+FunctionPass *createPPCBranchSelectionPass();
+FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
+FunctionPass *createPPCAsmPrinterPass(raw_ostream &OS,
+ PPCTargetMachine &TM,
+ CodeGenOpt::Level OptLevel, bool Verbose);
+FunctionPass *createPPCCodeEmitterPass(PPCTargetMachine &TM,
+ MachineCodeEmitter &MCE);
+FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
+ JITCodeEmitter &MCE);
+} // end namespace llvm;
+
+// Defines symbolic names for PowerPC registers. This defines a mapping from
+// register name to register number.
+//
+#include "PPCGenRegisterNames.inc"
+
+// Defines symbolic names for the PowerPC instructions.
+//
+#include "PPCGenInstrNames.inc"
+
+#endif
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
new file mode 100644
index 0000000..08f5bb4
--- /dev/null
+++ b/lib/Target/PowerPC/PPC.td
@@ -0,0 +1,114 @@
+//===- PPC.td - Describe the PowerPC Target Machine --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing.
+//
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC Subtarget features.
+//
+
+//===----------------------------------------------------------------------===//
+// CPU Directives //
+//===----------------------------------------------------------------------===//
+
+def Directive601 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_601", "">;
+def Directive602 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_602", "">;
+def Directive603 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive604 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive620 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive7400: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_7400", "">;
+def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">;
+def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">;
+def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">;
+def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">;
+
+def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
+ "Enable 64-bit instructions">;
+def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true",
+ "Enable 64-bit registers usage for ppc32 [beta]">;
+def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true",
+ "Enable Altivec instructions">;
+def FeatureGPUL : SubtargetFeature<"gpul","IsGigaProcessor", "true",
+ "Enable GPUL instructions">;
+def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
+ "Enable the fsqrt instruction">;
+def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
+ "Enable the stfiwx instruction">;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "PPCRegisterInfo.td"
+include "PPCSchedule.td"
+include "PPCInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC processors supported.
+//
+
+def : Processor<"generic", G3Itineraries, [Directive32]>;
+def : Processor<"601", G3Itineraries, [Directive601]>;
+def : Processor<"602", G3Itineraries, [Directive602]>;
+def : Processor<"603", G3Itineraries, [Directive603]>;
+def : Processor<"603e", G3Itineraries, [Directive603]>;
+def : Processor<"603ev", G3Itineraries, [Directive603]>;
+def : Processor<"604", G3Itineraries, [Directive604]>;
+def : Processor<"604e", G3Itineraries, [Directive604]>;
+def : Processor<"620", G3Itineraries, [Directive620]>;
+def : Processor<"g3", G3Itineraries, [Directive7400]>;
+def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec]>;
+def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec]>;
+def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec]>;
+def : Processor<"g4+", G4PlusItineraries, [Directive750, FeatureAltivec]>;
+def : Processor<"750", G4Itineraries, [Directive750, FeatureAltivec]>;
+def : Processor<"970", G5Itineraries,
+ [Directive970, FeatureAltivec,
+ FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */]>;
+def : Processor<"g5", G5Itineraries,
+ [Directive970, FeatureAltivec,
+ FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */]>;
+def : Processor<"ppc", G3Itineraries, [Directive32]>;
+def : Processor<"ppc64", G5Itineraries,
+ [Directive64, FeatureAltivec,
+ FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */]>;
+
+
+//===----------------------------------------------------------------------===//
+// Calling Conventions
+//===----------------------------------------------------------------------===//
+
+include "PPCCallingConv.td"
+
+def PPCInstrInfo : InstrInfo {
+ // Define how we want to layout our TargetSpecific information field... This
+ // should be kept up-to-date with the fields in the PPCInstrInfo.h file.
+ let TSFlagsFields = ["PPC970_First",
+ "PPC970_Single",
+ "PPC970_Cracked",
+ "PPC970_Unit"];
+ let TSFlagsShifts = [0, 1, 2, 3];
+
+ let isLittleEndianEncoding = 1;
+}
+
+
+def PPC : Target {
+ // Information about the instructions.
+ let InstructionSet = PPCInstrInfo;
+}
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
new file mode 100644
index 0000000..b95a502
--- /dev/null
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -0,0 +1,174 @@
+//===-- PPCBranchSelector.cpp - Emit long conditional branches-----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that scans a machine function to determine which
+// conditional branches need more than 16 bits of displacement to reach their
+// target basic block. It does this in two passes; a calculation of basic block
+// positions pass, and a branch psuedo op to machine branch opcode pass. This
+// pass should be run last, just before the assembly printer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppc-branch-select"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
+#include "PPCPredicates.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+STATISTIC(NumExpanded, "Number of branches expanded to long format");
+
+namespace {
+ struct VISIBILITY_HIDDEN PPCBSel : public MachineFunctionPass {
+ static char ID;
+ PPCBSel() : MachineFunctionPass(&ID) {}
+
+ /// BlockSizes - The sizes of the basic blocks in the function.
+ std::vector<unsigned> BlockSizes;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "PowerPC Branch Selector";
+ }
+ };
+ char PPCBSel::ID = 0;
+}
+
+/// createPPCBranchSelectionPass - returns an instance of the Branch Selection
+/// Pass
+///
+FunctionPass *llvm::createPPCBranchSelectionPass() {
+ return new PPCBSel();
+}
+
+bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
+ const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ // Give the blocks of the function a dense, in-order, numbering.
+ Fn.RenumberBlocks();
+ BlockSizes.resize(Fn.getNumBlockIDs());
+
+ // Measure each MBB and compute a size for the entire function.
+ unsigned FuncSize = 0;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+
+ unsigned BlockSize = 0;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
+ MBBI != EE; ++MBBI)
+ BlockSize += TII->GetInstSizeInBytes(MBBI);
+
+ BlockSizes[MBB->getNumber()] = BlockSize;
+ FuncSize += BlockSize;
+ }
+
+ // If the entire function is smaller than the displacement of a branch field,
+ // we know we don't need to shrink any branches in this function. This is a
+ // common case.
+ if (FuncSize < (1 << 15)) {
+ BlockSizes.clear();
+ return false;
+ }
+
+ // For each conditional branch, if the offset to its destination is larger
+ // than the offset field allows, transform it into a long branch sequence
+ // like this:
+ // short branch:
+ // bCC MBB
+ // long branch:
+ // b!CC $PC+8
+ // b MBB
+ //
+ bool MadeChange = true;
+ bool EverMadeChange = false;
+ while (MadeChange) {
+ // Iteratively expand branches until we reach a fixed point.
+ MadeChange = false;
+
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ unsigned MBBStartOffset = 0;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ if (I->getOpcode() != PPC::BCC || I->getOperand(2).isImm()) {
+ MBBStartOffset += TII->GetInstSizeInBytes(I);
+ continue;
+ }
+
+ // Determine the offset from the current branch to the destination
+ // block.
+ MachineBasicBlock *Dest = I->getOperand(2).getMBB();
+
+ int BranchSize;
+ if (Dest->getNumber() <= MBB.getNumber()) {
+ // If this is a backwards branch, the delta is the offset from the
+ // start of this block to this branch, plus the sizes of all blocks
+ // from this block to the dest.
+ BranchSize = MBBStartOffset;
+
+ for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
+ BranchSize += BlockSizes[i];
+ } else {
+ // Otherwise, add the size of the blocks between this block and the
+ // dest to the number of bytes left in this block.
+ BranchSize = -MBBStartOffset;
+
+ for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
+ BranchSize += BlockSizes[i];
+ }
+
+ // If this branch is in range, ignore it.
+ if (isInt16(BranchSize)) {
+ MBBStartOffset += 4;
+ continue;
+ }
+
+ // Otherwise, we have to expand it to a long branch.
+ // The BCC operands are:
+ // 0. PPC branch predicate
+ // 1. CR register
+ // 2. Target MBB
+ PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
+ unsigned CRReg = I->getOperand(1).getReg();
+
+ MachineInstr *OldBranch = I;
+ DebugLoc dl = OldBranch->getDebugLoc();
+
+ // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
+ BuildMI(MBB, I, dl, TII->get(PPC::BCC))
+ .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
+
+ // Uncond branch to the real destination.
+ I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest);
+
+ // Remove the old branch from the function.
+ OldBranch->eraseFromParent();
+
+ // Remember that this instruction is 8-bytes, increase the size of the
+ // block by 4, remember to iterate.
+ BlockSizes[MBB.getNumber()] += 4;
+ MBBStartOffset += 8;
+ ++NumExpanded;
+ MadeChange = true;
+ }
+ }
+ EverMadeChange |= MadeChange;
+ }
+
+ BlockSizes.clear();
+ return true;
+}
+
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
new file mode 100644
index 0000000..9f916f3
--- /dev/null
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -0,0 +1,66 @@
+//===- PPCCallingConv.td - Calling Conventions for PowerPC ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the PowerPC 32- and 64-bit
+// architectures.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+
+// Return-value convention for PowerPC
+def RetCC_PPC : CallingConv<[
+ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>,
+
+ CCIfType<[f32], CCAssignToReg<[F1]>>,
+ CCIfType<[f64], CCAssignToReg<[F1, F2]>>,
+
+ // Vector types are always returned in V2.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+/*
+def CC_PPC : CallingConv<[
+ // The first 8 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>,
+
+ // Common sub-targets passes FP values in F1 - F13
+ CCIfType<[f32, f64], CCIfSubtarget<"isMachoABI()",
+ CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>>,
+ // ELF32 sub-target pass FP values in F1 - F8.
+ CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+
+ // The first 12 Vector arguments are passed in altivec registers.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32],
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10,V11,V12,V13]>>
+
+/*
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToStack<16, 16>>*/
+]>;
+
+*/
+
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
new file mode 100644
index 0000000..aa3dce1
--- /dev/null
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -0,0 +1,266 @@
+//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC32 -------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC 32-bit CodeEmitter and associated machinery to
+// JIT-compile bitcode to native PowerPC.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCTargetMachine.h"
+#include "PPCRelocations.h"
+#include "PPC.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+namespace {
+ class PPCCodeEmitter {
+ TargetMachine &TM;
+ MachineCodeEmitter &MCE;
+ public:
+ PPCCodeEmitter(TargetMachine &tm, MachineCodeEmitter &mce):
+ TM(tm), MCE(mce) {}
+
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+
+ unsigned getBinaryCodeForInstr(const MachineInstr &MI);
+
+ /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr
+
+ unsigned getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO);
+
+ /// MovePCtoLROffset - When/if we see a MovePCtoLR instruction, we record
+ /// its address in the function into this pointer.
+
+ void *MovePCtoLROffset;
+ };
+
+ template <class CodeEmitter>
+ class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass,
+ public PPCCodeEmitter
+ {
+ TargetMachine &TM;
+ CodeEmitter &MCE;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineModuleInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ public:
+ static char ID;
+ Emitter(TargetMachine &tm, CodeEmitter &mce)
+ : MachineFunctionPass(&ID), PPCCodeEmitter(tm, mce), TM(tm), MCE(mce) {}
+
+ const char *getPassName() const { return "PowerPC Machine Code Emitter"; }
+
+ /// runOnMachineFunction - emits the given MachineFunction to memory
+ ///
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ /// emitBasicBlock - emits the given MachineBasicBlock to memory
+ ///
+ void emitBasicBlock(MachineBasicBlock &MBB);
+
+ /// getValueBit - return the particular bit of Val
+ ///
+ unsigned getValueBit(int64_t Val, unsigned bit) { return (Val >> bit) & 1; }
+ };
+
+ template <class CodeEmitter>
+ char Emitter<CodeEmitter>::ID = 0;
+}
+
+/// createPPCCodeEmitterPass - Return a pass that emits the collected PPC code
+/// to the specified MCE object.
+FunctionPass *llvm::createPPCCodeEmitterPass(PPCTargetMachine &TM,
+ MachineCodeEmitter &MCE) {
+ return new Emitter<MachineCodeEmitter>(TM, MCE);
+}
+
+FunctionPass *llvm::createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new Emitter<JITCodeEmitter>(TM, JCE);
+}
+
+template <class CodeEmitter>
+bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
+ assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
+ MF.getTarget().getRelocationModel() != Reloc::Static) &&
+ "JIT relocation model must be set to static or default!");
+
+ MCE.setModuleInfo(&getAnalysis<MachineModuleInfo>());
+ do {
+ MovePCtoLROffset = 0;
+ MCE.startFunction(MF);
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
+ emitBasicBlock(*BB);
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+template <class CodeEmitter>
+void Emitter<CodeEmitter>::emitBasicBlock(MachineBasicBlock &MBB) {
+ MCE.StartMachineBasicBlock(&MBB);
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){
+ const MachineInstr &MI = *I;
+ switch (MI.getOpcode()) {
+ default:
+ MCE.emitWordBE(getBinaryCodeForInstr(MI));
+ break;
+ case TargetInstrInfo::DBG_LABEL:
+ case TargetInstrInfo::EH_LABEL:
+ MCE.emitLabel(MI.getOperand(0).getImm());
+ break;
+ case TargetInstrInfo::IMPLICIT_DEF:
+ break; // pseudo opcode, no side effects
+ case PPC::MovePCtoLR:
+ case PPC::MovePCtoLR8:
+ assert(TM.getRelocationModel() == Reloc::PIC_);
+ MovePCtoLROffset = (void*)MCE.getCurrentPCValue();
+ MCE.emitWordBE(0x48000005); // bl 1
+ break;
+ }
+ }
+}
+
+unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) {
+
+ unsigned rv = 0; // Return value; defaults to 0 for unhandled cases
+ // or things that get fixed up later by the JIT.
+ if (MO.isReg()) {
+ rv = PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+
+ // Special encoding for MTCRF and MFOCRF, which uses a bit mask for the
+ // register, not the register number directly.
+ if ((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
+ (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)) {
+ rv = 0x80 >> rv;
+ }
+ } else if (MO.isImm()) {
+ rv = MO.getImm();
+ } else if (MO.isGlobal() || MO.isSymbol() ||
+ MO.isCPI() || MO.isJTI()) {
+ unsigned Reloc = 0;
+ if (MI.getOpcode() == PPC::BL_Macho || MI.getOpcode() == PPC::BL8_Macho ||
+ MI.getOpcode() == PPC::BL_ELF || MI.getOpcode() == PPC::BL8_ELF ||
+ MI.getOpcode() == PPC::TAILB || MI.getOpcode() == PPC::TAILB8)
+ Reloc = PPC::reloc_pcrel_bx;
+ else {
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ assert(MovePCtoLROffset && "MovePCtoLR not seen yet?");
+ }
+ switch (MI.getOpcode()) {
+ default: MI.dump(); assert(0 && "Unknown instruction for relocation!");
+ case PPC::LIS:
+ case PPC::LIS8:
+ case PPC::ADDIS:
+ case PPC::ADDIS8:
+ Reloc = PPC::reloc_absolute_high; // Pointer to symbol
+ break;
+ case PPC::LI:
+ case PPC::LI8:
+ case PPC::LA:
+ // Loads.
+ case PPC::LBZ:
+ case PPC::LBZ8:
+ case PPC::LHA:
+ case PPC::LHA8:
+ case PPC::LHZ:
+ case PPC::LHZ8:
+ case PPC::LWZ:
+ case PPC::LWZ8:
+ case PPC::LFS:
+ case PPC::LFD:
+
+ // Stores.
+ case PPC::STB:
+ case PPC::STB8:
+ case PPC::STH:
+ case PPC::STH8:
+ case PPC::STW:
+ case PPC::STW8:
+ case PPC::STFS:
+ case PPC::STFD:
+ Reloc = PPC::reloc_absolute_low;
+ break;
+
+ case PPC::LWA:
+ case PPC::LD:
+ case PPC::STD:
+ case PPC::STD_32:
+ Reloc = PPC::reloc_absolute_low_ix;
+ break;
+ }
+ }
+
+ MachineRelocation R;
+ if (MO.isGlobal()) {
+ R = MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+ MO.getGlobal(), 0,
+ isa<Function>(MO.getGlobal()));
+ } else if (MO.isSymbol()) {
+ R = MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, MO.getSymbolName(), 0);
+ } else if (MO.isCPI()) {
+ R = MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, MO.getIndex(), 0);
+ } else {
+ assert(MO.isJTI());
+ R = MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ Reloc, MO.getIndex(), 0);
+ }
+
+ // If in PIC mode, we need to encode the negated address of the
+ // 'movepctolr' into the unrelocated field. After relocation, we'll have
+ // &gv-&movepctolr-4 in the imm field. Once &movepctolr is added to the imm
+ // field, we get &gv. This doesn't happen for branch relocations, which are
+ // always implicitly pc relative.
+ if (TM.getRelocationModel() == Reloc::PIC_ && Reloc != PPC::reloc_pcrel_bx){
+ assert(MovePCtoLROffset && "MovePCtoLR not seen yet?");
+ R.setConstantVal(-(intptr_t)MovePCtoLROffset - 4);
+ }
+ MCE.addRelocation(R);
+
+ } else if (MO.isMBB()) {
+ unsigned Reloc = 0;
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == PPC::B || Opcode == PPC::BL_Macho ||
+ Opcode == PPC::BLA_Macho || Opcode == PPC::BL_ELF ||
+ Opcode == PPC::BLA_ELF)
+ Reloc = PPC::reloc_pcrel_bx;
+ else // BCC instruction
+ Reloc = PPC::reloc_pcrel_bcx;
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ Reloc, MO.getMBB()));
+ } else {
+ cerr << "ERROR: Unknown type of MachineOperand: " << MO << "\n";
+ abort();
+ }
+
+ return rv;
+}
+
+#include "PPCGenCodeEmitter.inc"
+
diff --git a/lib/Target/PowerPC/PPCFrameInfo.h b/lib/Target/PowerPC/PPCFrameInfo.h
new file mode 100644
index 0000000..1b5893d
--- /dev/null
+++ b/lib/Target/PowerPC/PPCFrameInfo.h
@@ -0,0 +1,93 @@
+//===-- PPCFrameInfo.h - Define TargetFrameInfo for PowerPC -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_FRAMEINFO_H
+#define POWERPC_FRAMEINFO_H
+
+#include "PPC.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class PPCFrameInfo: public TargetFrameInfo {
+ const TargetMachine &TM;
+
+public:
+ PPCFrameInfo(const TargetMachine &tm, bool LP64)
+ : TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0), TM(tm) {
+ }
+
+ /// getReturnSaveOffset - Return the previous frame offset to save the
+ /// return address.
+ static unsigned getReturnSaveOffset(bool LP64, bool isMacho) {
+ if (isMacho)
+ return LP64 ? 16 : 8;
+ // For ELF 32 ABI:
+ return 4;
+ }
+
+ /// getFramePointerSaveOffset - Return the previous frame offset to save the
+ /// frame pointer.
+ static unsigned getFramePointerSaveOffset(bool LP64, bool isMacho) {
+ // For MachO ABI:
+ // Use the TOC save slot in the PowerPC linkage area for saving the frame
+ // pointer (if needed.) LLVM does not generate code that uses the TOC (R2
+ // is treated as a caller saved register.)
+ if (isMacho)
+ return LP64 ? 40 : 20;
+
+ // For ELF 32 ABI:
+ // Save it right before the link register
+ return -4U;
+ }
+
+ /// getLinkageSize - Return the size of the PowerPC ABI linkage area.
+ ///
+ static unsigned getLinkageSize(bool LP64, bool isMacho) {
+ if (isMacho)
+ return 6 * (LP64 ? 8 : 4);
+
+ // For ELF 32 ABI:
+ return 8;
+ }
+
+ /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI
+ /// argument area.
+ static unsigned getMinCallArgumentsSize(bool LP64, bool isMacho) {
+ // For Macho ABI:
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ if (isMacho)
+ return 8 * (LP64 ? 8 : 4);
+
+ // For ELF 32 ABI:
+ // There is no default stack allocated for the 8 first GPR arguments.
+ return 0;
+ }
+
+ /// getMinCallFrameSize - Return the minimum size a call frame can be using
+ /// the PowerPC ABI.
+ static unsigned getMinCallFrameSize(bool LP64, bool isMacho) {
+ // The call frame needs to be at least big enough for linkage and 8 args.
+ return getLinkageSize(LP64, isMacho) +
+ getMinCallArgumentsSize(LP64, isMacho);
+ }
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
new file mode 100644
index 0000000..e7658fc
--- /dev/null
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -0,0 +1,304 @@
+//===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements hazard recognizers for scheduling on PowerPC processors.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "PPCHazardRecognizers.h"
+#include "PPC.h"
+#include "PPCInstrInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// PowerPC 970 Hazard Recognizer
+//
+// This models the dispatch group formation of the PPC970 processor. Dispatch
+// groups are bundles of up to five instructions that can contain various mixes
+// of instructions. The PPC970 can dispatch a peak of 4 non-branch and one
+// branch instruction per-cycle.
+//
+// There are a number of restrictions to dispatch group formation: some
+// instructions can only be issued in the first slot of a dispatch group, & some
+// instructions fill an entire dispatch group. Additionally, only branches can
+// issue in the 5th (last) slot.
+//
+// Finally, there are a number of "structural" hazards on the PPC970. These
+// conditions cause large performance penalties due to misprediction, recovery,
+// and replay logic that has to happen. These cases include setting a CTR and
+// branching through it in the same dispatch group, and storing to an address,
+// then loading from the same address within a dispatch group. To avoid these
+// conditions, we insert no-op instructions when appropriate.
+//
+// FIXME: This is missing some significant cases:
+// 1. Modeling of microcoded instructions.
+// 2. Handling of serialized operations.
+// 3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
+//
+
+PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii)
+ : TII(tii) {
+ EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::EndDispatchGroup() {
+ DOUT << "=== Start of dispatch group\n";
+ NumIssued = 0;
+
+ // Structural hazard info.
+ HasCTRSet = false;
+ NumStores = 0;
+}
+
+
+PPCII::PPC970_Unit
+PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
+ bool &isFirst, bool &isSingle,
+ bool &isCracked,
+ bool &isLoad, bool &isStore) {
+ if ((int)Opcode >= 0) {
+ isFirst = isSingle = isCracked = isLoad = isStore = false;
+ return PPCII::PPC970_Pseudo;
+ }
+ Opcode = ~Opcode;
+
+ const TargetInstrDesc &TID = TII.get(Opcode);
+
+ isLoad = TID.mayLoad();
+ isStore = TID.mayStore();
+
+ unsigned TSFlags = TID.TSFlags;
+
+ isFirst = TSFlags & PPCII::PPC970_First;
+ isSingle = TSFlags & PPCII::PPC970_Single;
+ isCracked = TSFlags & PPCII::PPC970_Cracked;
+ return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask);
+}
+
+/// isLoadOfStoredAddress - If we have a load from the previously stored pointer
+/// as indicated by StorePtr1/StorePtr2/StoreSize, return true.
+bool PPCHazardRecognizer970::
+isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const {
+ for (unsigned i = 0, e = NumStores; i != e; ++i) {
+ // Handle exact and commuted addresses.
+ if (Ptr1 == StorePtr1[i] && Ptr2 == StorePtr2[i])
+ return true;
+ if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i])
+ return true;
+
+ // Okay, we don't have an exact match, if this is an indexed offset, see if
+ // we have overlap (which happens during fp->int conversion for example).
+ if (StorePtr2[i] == Ptr2) {
+ if (ConstantSDNode *StoreOffset = dyn_cast<ConstantSDNode>(StorePtr1[i]))
+ if (ConstantSDNode *LoadOffset = dyn_cast<ConstantSDNode>(Ptr1)) {
+ // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check
+ // to see if the load and store actually overlap.
+ int StoreOffs = StoreOffset->getZExtValue();
+ int LoadOffs = LoadOffset->getZExtValue();
+ if (StoreOffs < LoadOffs) {
+ if (int(StoreOffs+StoreSize[i]) > LoadOffs) return true;
+ } else {
+ if (int(LoadOffs+LoadSize) > StoreOffs) return true;
+ }
+ }
+ }
+ }
+ return false;
+}
+
+/// getHazardType - We return hazard for any non-branch instruction that would
+/// terminate terminate the dispatch group. We turn NoopHazard for any
+/// instructions that wouldn't terminate the dispatch group that would cause a
+/// pipeline flush.
+ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
+getHazardType(SUnit *SU) {
+ const SDNode *Node = SU->getNode()->getFlaggedMachineNode();
+ bool isFirst, isSingle, isCracked, isLoad, isStore;
+ PPCII::PPC970_Unit InstrType =
+ GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+ isLoad, isStore);
+ if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
+ unsigned Opcode = Node->getMachineOpcode();
+
+ // We can only issue a PPC970_First/PPC970_Single instruction (such as
+ // crand/mtspr/etc) if this is the first cycle of the dispatch group.
+ if (NumIssued != 0 && (isFirst || isSingle))
+ return Hazard;
+
+ // If this instruction is cracked into two ops by the decoder, we know that
+ // it is not a branch and that it cannot issue if 3 other instructions are
+ // already in the dispatch group.
+ if (isCracked && NumIssued > 2)
+ return Hazard;
+
+ switch (InstrType) {
+ default: assert(0 && "Unknown instruction type!");
+ case PPCII::PPC970_FXU:
+ case PPCII::PPC970_LSU:
+ case PPCII::PPC970_FPU:
+ case PPCII::PPC970_VALU:
+ case PPCII::PPC970_VPERM:
+ // We can only issue a branch as the last instruction in a group.
+ if (NumIssued == 4) return Hazard;
+ break;
+ case PPCII::PPC970_CRU:
+ // We can only issue a CR instruction in the first two slots.
+ if (NumIssued >= 2) return Hazard;
+ break;
+ case PPCII::PPC970_BRU:
+ break;
+ }
+
+ // Do not allow MTCTR and BCTRL to be in the same dispatch group.
+ if (HasCTRSet && (Opcode == PPC::BCTRL_Macho || Opcode == PPC::BCTRL_ELF))
+ return NoopHazard;
+
+ // If this is a load following a store, make sure it's not to the same or
+ // overlapping address.
+ if (isLoad && NumStores) {
+ unsigned LoadSize;
+ switch (Opcode) {
+ default: assert(0 && "Unknown load!");
+ case PPC::LBZ: case PPC::LBZU:
+ case PPC::LBZX:
+ case PPC::LBZ8: case PPC::LBZU8:
+ case PPC::LBZX8:
+ case PPC::LVEBX:
+ LoadSize = 1;
+ break;
+ case PPC::LHA: case PPC::LHAU:
+ case PPC::LHAX:
+ case PPC::LHZ: case PPC::LHZU:
+ case PPC::LHZX:
+ case PPC::LVEHX:
+ case PPC::LHBRX:
+ case PPC::LHA8: case PPC::LHAU8:
+ case PPC::LHAX8:
+ case PPC::LHZ8: case PPC::LHZU8:
+ case PPC::LHZX8:
+ LoadSize = 2;
+ break;
+ case PPC::LFS: case PPC::LFSU:
+ case PPC::LFSX:
+ case PPC::LWZ: case PPC::LWZU:
+ case PPC::LWZX:
+ case PPC::LWA:
+ case PPC::LWAX:
+ case PPC::LVEWX:
+ case PPC::LWBRX:
+ case PPC::LWZ8:
+ case PPC::LWZX8:
+ LoadSize = 4;
+ break;
+ case PPC::LFD: case PPC::LFDU:
+ case PPC::LFDX:
+ case PPC::LD: case PPC::LDU:
+ case PPC::LDX:
+ LoadSize = 8;
+ break;
+ case PPC::LVX:
+ case PPC::LVXL:
+ LoadSize = 16;
+ break;
+ }
+
+ if (isLoadOfStoredAddress(LoadSize,
+ Node->getOperand(0), Node->getOperand(1)))
+ return NoopHazard;
+ }
+
+ return NoHazard;
+}
+
+void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
+ const SDNode *Node = SU->getNode()->getFlaggedMachineNode();
+ bool isFirst, isSingle, isCracked, isLoad, isStore;
+ PPCII::PPC970_Unit InstrType =
+ GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+ isLoad, isStore);
+ if (InstrType == PPCII::PPC970_Pseudo) return;
+ unsigned Opcode = Node->getMachineOpcode();
+
+ // Update structural hazard information.
+ if (Opcode == PPC::MTCTR) HasCTRSet = true;
+
+ // Track the address stored to.
+ if (isStore) {
+ unsigned ThisStoreSize;
+ switch (Opcode) {
+ default: assert(0 && "Unknown store instruction!");
+ case PPC::STB: case PPC::STB8:
+ case PPC::STBU: case PPC::STBU8:
+ case PPC::STBX: case PPC::STBX8:
+ case PPC::STVEBX:
+ ThisStoreSize = 1;
+ break;
+ case PPC::STH: case PPC::STH8:
+ case PPC::STHU: case PPC::STHU8:
+ case PPC::STHX: case PPC::STHX8:
+ case PPC::STVEHX:
+ case PPC::STHBRX:
+ ThisStoreSize = 2;
+ break;
+ case PPC::STFS:
+ case PPC::STFSU:
+ case PPC::STFSX:
+ case PPC::STWX: case PPC::STWX8:
+ case PPC::STWUX:
+ case PPC::STW: case PPC::STW8:
+ case PPC::STWU: case PPC::STWU8:
+ case PPC::STVEWX:
+ case PPC::STFIWX:
+ case PPC::STWBRX:
+ ThisStoreSize = 4;
+ break;
+ case PPC::STD_32:
+ case PPC::STDX_32:
+ case PPC::STD:
+ case PPC::STDU:
+ case PPC::STFD:
+ case PPC::STFDX:
+ case PPC::STDX:
+ case PPC::STDUX:
+ ThisStoreSize = 8;
+ break;
+ case PPC::STVX:
+ case PPC::STVXL:
+ ThisStoreSize = 16;
+ break;
+ }
+
+ StoreSize[NumStores] = ThisStoreSize;
+ StorePtr1[NumStores] = Node->getOperand(1);
+ StorePtr2[NumStores] = Node->getOperand(2);
+ ++NumStores;
+ }
+
+ if (InstrType == PPCII::PPC970_BRU || isSingle)
+ NumIssued = 4; // Terminate a d-group.
+ ++NumIssued;
+
+ // If this instruction is cracked into two ops by the decoder, remember that
+ // we issued two pieces.
+ if (isCracked)
+ ++NumIssued;
+
+ if (NumIssued == 5)
+ EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::AdvanceCycle() {
+ assert(NumIssued < 5 && "Illegal dispatch group!");
+ ++NumIssued;
+ if (NumIssued == 5)
+ EndDispatchGroup();
+}
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
new file mode 100644
index 0000000..74bf8e5
--- /dev/null
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -0,0 +1,73 @@
+//===-- PPCHazardRecognizers.h - PowerPC Hazard Recognizers -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling on PowerPC processors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCHAZRECS_H
+#define PPCHAZRECS_H
+
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "PPCInstrInfo.h"
+
+namespace llvm {
+
+/// PPCHazardRecognizer970 - This class defines a finite state automata that
+/// models the dispatch logic on the PowerPC 970 (aka G5) processor. This
+/// promotes good dispatch group formation and implements noop insertion to
+/// avoid structural hazards that cause significant performance penalties (e.g.
+/// setting the CTR register then branching through it within a dispatch group),
+/// or storing then loading from the same address within a dispatch group.
+class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
+ const TargetInstrInfo &TII;
+
+ unsigned NumIssued; // Number of insts issued, including advanced cycles.
+
+ // Various things that can cause a structural hazard.
+
+ // HasCTRSet - If the CTR register is set in this group, disallow BCTRL.
+ bool HasCTRSet;
+
+ // StoredPtr - Keep track of the address of any store. If we see a load from
+ // the same address (or one that aliases it), disallow the store. We can have
+ // up to four stores in one dispatch group, hence we track up to 4.
+ //
+ // This is null if we haven't seen a store yet. We keep track of both
+ // operands of the store here, since we support [r+r] and [r+i] addressing.
+ SDValue StorePtr1[4], StorePtr2[4];
+ unsigned StoreSize[4];
+ unsigned NumStores;
+
+public:
+ PPCHazardRecognizer970(const TargetInstrInfo &TII);
+ virtual HazardType getHazardType(SUnit *SU);
+ virtual void EmitInstruction(SUnit *SU);
+ virtual void AdvanceCycle();
+
+private:
+ /// EndDispatchGroup - Called when we are finishing a new dispatch group.
+ ///
+ void EndDispatchGroup();
+
+ /// GetInstrType - Classify the specified powerpc opcode according to its
+ /// pipeline.
+ PPCII::PPC970_Unit GetInstrType(unsigned Opcode,
+ bool &isFirst, bool &isSingle,bool &isCracked,
+ bool &isLoad, bool &isStore);
+
+ bool isLoadOfStoredAddress(unsigned LoadSize,
+ SDValue Ptr1, SDValue Ptr2) const;
+};
+
+} // end namespace llvm
+
+#endif
+
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
new file mode 100644
index 0000000..823e316
--- /dev/null
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -0,0 +1,1170 @@
+//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for PowerPC,
+// converting from a legalized dag to a PPC dag.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppc-codegen"
+#include "PPC.h"
+#include "PPCPredicates.h"
+#include "PPCTargetMachine.h"
+#include "PPCISelLowering.h"
+#include "PPCHazardRecognizers.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Compiler.h"
+using namespace llvm;
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ /// PPCDAGToDAGISel - PPC specific code to select PPC machine
+ /// instructions for SelectionDAG operations.
+ ///
+ class VISIBILITY_HIDDEN PPCDAGToDAGISel : public SelectionDAGISel {
+ PPCTargetMachine &TM;
+ PPCTargetLowering &PPCLowering;
+ const PPCSubtarget &PPCSubTarget;
+ unsigned GlobalBaseReg;
+ public:
+ explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
+ : SelectionDAGISel(tm), TM(tm),
+ PPCLowering(*TM.getTargetLowering()),
+ PPCSubTarget(*TM.getSubtargetImpl()) {}
+
+ virtual bool runOnFunction(Function &Fn) {
+ // Do not codegen any 'available_externally' functions at all, they have
+ // definitions outside the translation unit.
+ if (Fn.hasAvailableExternallyLinkage())
+ return false;
+
+ // Make sure we re-emit a set of the global base reg if necessary
+ GlobalBaseReg = 0;
+ SelectionDAGISel::runOnFunction(Fn);
+
+ InsertVRSaveCode(Fn);
+ return true;
+ }
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ /// getI64Imm - Return a target constant with the specified value, of type
+ /// i64.
+ inline SDValue getI64Imm(uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i64);
+ }
+
+ /// getSmallIPtrImm - Return a target constant of pointer type.
+ inline SDValue getSmallIPtrImm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, PPCLowering.getPointerTy());
+ }
+
+ /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s
+ /// with any number of 0s on either side. The 1s are allowed to wrap from
+ /// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.
+ /// 0x0F0F0000 is not, since all 1s are not contiguous.
+ static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME);
+
+
+ /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
+ /// rotate and mask opcode and mask operation.
+ static bool isRotateAndMask(SDNode *N, unsigned Mask, bool IsShiftMask,
+ unsigned &SH, unsigned &MB, unsigned &ME);
+
+ /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
+ /// base register. Return the virtual register that holds this value.
+ SDNode *getGlobalBaseReg();
+
+ // Select - Convert the specified operand from a target-independent to a
+ // target-specific node if it hasn't already been changed.
+ SDNode *Select(SDValue Op);
+
+ SDNode *SelectBitfieldInsert(SDNode *N);
+
+ /// SelectCC - Select a comparison of the specified values with the
+ /// specified condition code, returning the CR# of the expression.
+ SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, DebugLoc dl);
+
+ /// SelectAddrImm - Returns true if the address N can be represented by
+ /// a base register plus a signed 16-bit displacement [r+imm].
+ bool SelectAddrImm(SDValue Op, SDValue N, SDValue &Disp,
+ SDValue &Base) {
+ return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG);
+ }
+
+ /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
+ /// immediate field. Because preinc imms have already been validated, just
+ /// accept it.
+ bool SelectAddrImmOffs(SDValue Op, SDValue N, SDValue &Out) const {
+ Out = N;
+ return true;
+ }
+
+ /// SelectAddrIdx - Given the specified addressed, check to see if it can be
+ /// represented as an indexed [r+r] operation. Returns false if it can
+ /// be represented by [r+imm], which are preferred.
+ bool SelectAddrIdx(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Index) {
+ return PPCLowering.SelectAddressRegReg(N, Base, Index, *CurDAG);
+ }
+
+ /// SelectAddrIdxOnly - Given the specified addressed, force it to be
+ /// represented as an indexed [r+r] operation.
+ bool SelectAddrIdxOnly(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Index) {
+ return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
+ }
+
+ /// SelectAddrImmShift - Returns true if the address N can be represented by
+ /// a base register plus a signed 14-bit displacement [r+imm*4]. Suitable
+ /// for use by STD and friends.
+ bool SelectAddrImmShift(SDValue Op, SDValue N, SDValue &Disp,
+ SDValue &Base) {
+ return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG);
+ }
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ if (!SelectAddrIdx(Op, Op, Op0, Op1))
+ SelectAddrImm(Op, Op, Op0, Op1);
+ break;
+ case 'o': // offsetable
+ if (!SelectAddrImm(Op, Op, Op0, Op1)) {
+ Op0 = Op;
+ Op1 = getSmallIPtrImm(0);
+ }
+ break;
+ case 'v': // not offsetable
+ SelectAddrIdxOnly(Op, Op, Op0, Op1);
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+ }
+
+ SDValue BuildSDIVSequence(SDNode *N);
+ SDValue BuildUDIVSequence(SDNode *N);
+
+ /// InstructionSelect - This callback is invoked by
+ /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+ virtual void InstructionSelect();
+
+ void InsertVRSaveCode(Function &Fn);
+
+ virtual const char *getPassName() const {
+ return "PowerPC DAG->DAG Pattern Instruction Selection";
+ }
+
+ /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
+ /// this target when scheduling the DAG.
+ virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() {
+ // Should use subtarget info to pick the right hazard recognizer. For
+ // now, always return a PPC970 recognizer.
+ const TargetInstrInfo *II = TM.getInstrInfo();
+ assert(II && "No InstrInfo?");
+ return new PPCHazardRecognizer970(*II);
+ }
+
+// Include the pieces autogenerated from the target description.
+#include "PPCGenDAGISel.inc"
+
+private:
+ SDNode *SelectSETCC(SDValue Op);
+ };
+}
+
+/// InstructionSelect - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void PPCDAGToDAGISel::InstructionSelect() {
+ DEBUG(BB->dump());
+
+ // Select target instructions for the DAG.
+ SelectRoot(*CurDAG);
+ CurDAG->RemoveDeadNodes();
+}
+
+/// InsertVRSaveCode - Once the entire function has been instruction selected,
+/// all virtual registers are created and all machine instructions are built,
+/// check to see if we need to save/restore VRSAVE. If so, do it.
+void PPCDAGToDAGISel::InsertVRSaveCode(Function &F) {
+ // Check to see if this function uses vector registers, which means we have to
+ // save and restore the VRSAVE register and update it with the regs we use.
+ //
+ // In this case, there will be virtual registers of vector type type created
+ // by the scheduler. Detect them now.
+ MachineFunction &Fn = MachineFunction::get(&F);
+ bool HasVectorVReg = false;
+ for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
+ e = RegInfo->getLastVirtReg()+1; i != e; ++i)
+ if (RegInfo->getRegClass(i) == &PPC::VRRCRegClass) {
+ HasVectorVReg = true;
+ break;
+ }
+ if (!HasVectorVReg) return; // nothing to do.
+
+ // If we have a vector register, we want to emit code into the entry and exit
+ // blocks to save and restore the VRSAVE register. We do this here (instead
+ // of marking all vector instructions as clobbering VRSAVE) for two reasons:
+ //
+ // 1. This (trivially) reduces the load on the register allocator, by not
+ // having to represent the live range of the VRSAVE register.
+ // 2. This (more significantly) allows us to create a temporary virtual
+ // register to hold the saved VRSAVE value, allowing this temporary to be
+ // register allocated, instead of forcing it to be spilled to the stack.
+
+ // Create two vregs - one to hold the VRSAVE register that is live-in to the
+ // function and one for the value after having bits or'd into it.
+ unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+ unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ MachineBasicBlock &EntryBB = *Fn.begin();
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ // Emit the following code into the entry block:
+ // InVRSAVE = MFVRSAVE
+ // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
+ // MTVRSAVE UpdatedVRSAVE
+ MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point
+ BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
+ BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
+ UpdatedVRSAVE).addReg(InVRSAVE);
+ BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
+
+ // Find all return blocks, outputting a restore in each epilog.
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ if (!BB->empty() && BB->back().getDesc().isReturn()) {
+ IP = BB->end(); --IP;
+
+ // Skip over all terminator instructions, which are part of the return
+ // sequence.
+ MachineBasicBlock::iterator I2 = IP;
+ while (I2 != BB->begin() && (--I2)->getDesc().isTerminator())
+ IP = I2;
+
+ // Emit: MTVRSAVE InVRSave
+ BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
+ }
+ }
+}
+
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// base address to use for accessing globals into a register.
+///
+SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
+ if (!GlobalBaseReg) {
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = BB->getParent()->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+
+ if (PPCLowering.getPointerTy() == MVT::i32) {
+ GlobalBaseReg = RegInfo->createVirtualRegister(PPC::GPRCRegisterClass);
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR), PPC::LR);
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+ } else {
+ GlobalBaseReg = RegInfo->createVirtualRegister(PPC::G8RCRegisterClass);
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8), PPC::LR8);
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
+ }
+ }
+ return CurDAG->getRegister(GlobalBaseReg,
+ PPCLowering.getPointerTy()).getNode();
+}
+
+/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 16-bit value. If so, this returns true and the
+/// immediate.
+static bool isIntS16Immediate(SDNode *N, short &Imm) {
+ if (N->getOpcode() != ISD::Constant)
+ return false;
+
+ Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
+ if (N->getValueType(0) == MVT::i32)
+ return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ else
+ return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+
+static bool isIntS16Immediate(SDValue Op, short &Imm) {
+ return isIntS16Immediate(Op.getNode(), Imm);
+}
+
+
+/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
+/// operand. If so Imm will receive the 32-bit value.
+static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
+ if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
+ Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ return true;
+ }
+ return false;
+}
+
+/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
+/// operand. If so Imm will receive the 64-bit value.
+static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
+ if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
+ Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ return true;
+ }
+ return false;
+}
+
+// isInt32Immediate - This method tests to see if a constant operand.
+// If so Imm will receive the 32 bit value.
+static bool isInt32Immediate(SDValue N, unsigned &Imm) {
+ return isInt32Immediate(N.getNode(), Imm);
+}
+
+
+// isOpcWithIntImmediate - This method tests to see if the node is a specific
+// opcode and that it has a immediate integer right operand.
+// If so Imm will receive the 32 bit value.
+static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
+ return N->getOpcode() == Opc
+ && isInt32Immediate(N->getOperand(1).getNode(), Imm);
+}
+
+bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
+ if (isShiftedMask_32(Val)) {
+ // look for the first non-zero bit
+ MB = CountLeadingZeros_32(Val);
+ // look for the first zero bit after the run of ones
+ ME = CountLeadingZeros_32((Val - 1) ^ Val);
+ return true;
+ } else {
+ Val = ~Val; // invert mask
+ if (isShiftedMask_32(Val)) {
+ // effectively look for the first zero bit
+ ME = CountLeadingZeros_32(Val) - 1;
+ // effectively look for the first one bit after the run of zeros
+ MB = CountLeadingZeros_32((Val - 1) ^ Val) + 1;
+ return true;
+ }
+ }
+ // no run present
+ return false;
+}
+
+bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
+ bool IsShiftMask, unsigned &SH,
+ unsigned &MB, unsigned &ME) {
+ // Don't even go down this path for i64, since different logic will be
+ // necessary for rldicl/rldicr/rldimi.
+ if (N->getValueType(0) != MVT::i32)
+ return false;
+
+ unsigned Shift = 32;
+ unsigned Indeterminant = ~0; // bit mask marking indeterminant results
+ unsigned Opcode = N->getOpcode();
+ if (N->getNumOperands() != 2 ||
+ !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
+ return false;
+
+ if (Opcode == ISD::SHL) {
+ // apply shift left to mask if it comes first
+ if (IsShiftMask) Mask = Mask << Shift;
+ // determine which bits are made indeterminant by shift
+ Indeterminant = ~(0xFFFFFFFFu << Shift);
+ } else if (Opcode == ISD::SRL) {
+ // apply shift right to mask if it comes first
+ if (IsShiftMask) Mask = Mask >> Shift;
+ // determine which bits are made indeterminant by shift
+ Indeterminant = ~(0xFFFFFFFFu >> Shift);
+ // adjust for the left rotate
+ Shift = 32 - Shift;
+ } else if (Opcode == ISD::ROTL) {
+ Indeterminant = 0;
+ } else {
+ return false;
+ }
+
+ // if the mask doesn't intersect any Indeterminant bits
+ if (Mask && !(Mask & Indeterminant)) {
+ SH = Shift & 31;
+ // make sure the mask is still a mask (wrap arounds may not be)
+ return isRunOfOnes(Mask, MB, ME);
+ }
+ return false;
+}
+
+/// SelectBitfieldInsert - turn an or of two masked values into
+/// the rotate left word immediate then mask insert (rlwimi) instruction.
+SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ APInt LKZ, LKO, RKZ, RKO;
+ CurDAG->ComputeMaskedBits(Op0, APInt::getAllOnesValue(32), LKZ, LKO);
+ CurDAG->ComputeMaskedBits(Op1, APInt::getAllOnesValue(32), RKZ, RKO);
+
+ unsigned TargetMask = LKZ.getZExtValue();
+ unsigned InsertMask = RKZ.getZExtValue();
+
+ if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
+ unsigned Op0Opc = Op0.getOpcode();
+ unsigned Op1Opc = Op1.getOpcode();
+ unsigned Value, SH = 0;
+ TargetMask = ~TargetMask;
+ InsertMask = ~InsertMask;
+
+ // If the LHS has a foldable shift and the RHS does not, then swap it to the
+ // RHS so that we can fold the shift into the insert.
+ if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
+ if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
+ Op0.getOperand(0).getOpcode() == ISD::SRL) {
+ if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
+ Op1.getOperand(0).getOpcode() != ISD::SRL) {
+ std::swap(Op0, Op1);
+ std::swap(Op0Opc, Op1Opc);
+ std::swap(TargetMask, InsertMask);
+ }
+ }
+ } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
+ if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
+ Op1.getOperand(0).getOpcode() != ISD::SRL) {
+ std::swap(Op0, Op1);
+ std::swap(Op0Opc, Op1Opc);
+ std::swap(TargetMask, InsertMask);
+ }
+ }
+
+ unsigned MB, ME;
+ if (InsertMask && isRunOfOnes(InsertMask, MB, ME)) {
+ SDValue Tmp1, Tmp2, Tmp3;
+ bool DisjointMask = (TargetMask ^ InsertMask) == 0xFFFFFFFF;
+
+ if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
+ isInt32Immediate(Op1.getOperand(1), Value)) {
+ Op1 = Op1.getOperand(0);
+ SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
+ }
+ if (Op1Opc == ISD::AND) {
+ unsigned SHOpc = Op1.getOperand(0).getOpcode();
+ if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) &&
+ isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
+ Op1 = Op1.getOperand(0).getOperand(0);
+ SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
+ } else {
+ Op1 = Op1.getOperand(0);
+ }
+ }
+
+ Tmp3 = (Op0Opc == ISD::AND && DisjointMask) ? Op0.getOperand(0) : Op0;
+ SH &= 31;
+ SDValue Ops[] = { Tmp3, Op1, getI32Imm(SH), getI32Imm(MB),
+ getI32Imm(ME) };
+ return CurDAG->getTargetNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+ }
+ }
+ return 0;
+}
+
+/// SelectCC - Select a comparison of the specified values with the specified
+/// condition code, returning the CR# of the expression.
+SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC, DebugLoc dl) {
+ // Always select the LHS.
+ unsigned Opc;
+
+ if (LHS.getValueType() == MVT::i32) {
+ unsigned Imm;
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+ if (isInt32Immediate(RHS, Imm)) {
+ // SETEQ/SETNE comparison with 16-bit immediate, fold it.
+ if (isUInt16(Imm))
+ return SDValue(CurDAG->getTargetNode(PPC::CMPLWI, dl, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ // If this is a 16-bit signed immediate, fold it.
+ if (isInt16((int)Imm))
+ return SDValue(CurDAG->getTargetNode(PPC::CMPWI, dl, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+
+ // For non-equality comparisons, the default code would materialize the
+ // constant, then compare against it, like this:
+ // lis r2, 4660
+ // ori r2, r2, 22136
+ // cmpw cr0, r3, r2
+ // Since we are just comparing for equality, we can emit this instead:
+ // xoris r0,r3,0x1234
+ // cmplwi cr0,r0,0x5678
+ // beq cr0,L6
+ SDValue Xor(CurDAG->getTargetNode(PPC::XORIS, dl, MVT::i32, LHS,
+ getI32Imm(Imm >> 16)), 0);
+ return SDValue(CurDAG->getTargetNode(PPC::CMPLWI, dl, MVT::i32, Xor,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ }
+ Opc = PPC::CMPLW;
+ } else if (ISD::isUnsignedIntSetCC(CC)) {
+ if (isInt32Immediate(RHS, Imm) && isUInt16(Imm))
+ return SDValue(CurDAG->getTargetNode(PPC::CMPLWI, dl, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ Opc = PPC::CMPLW;
+ } else {
+ short SImm;
+ if (isIntS16Immediate(RHS, SImm))
+ return SDValue(CurDAG->getTargetNode(PPC::CMPWI, dl, MVT::i32, LHS,
+ getI32Imm((int)SImm & 0xFFFF)),
+ 0);
+ Opc = PPC::CMPW;
+ }
+ } else if (LHS.getValueType() == MVT::i64) {
+ uint64_t Imm;
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+ if (isInt64Immediate(RHS.getNode(), Imm)) {
+ // SETEQ/SETNE comparison with 16-bit immediate, fold it.
+ if (isUInt16(Imm))
+ return SDValue(CurDAG->getTargetNode(PPC::CMPLDI, dl, MVT::i64, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ // If this is a 16-bit signed immediate, fold it.
+ if (isInt16(Imm))
+ return SDValue(CurDAG->getTargetNode(PPC::CMPDI, dl, MVT::i64, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+
+ // For non-equality comparisons, the default code would materialize the
+ // constant, then compare against it, like this:
+ // lis r2, 4660
+ // ori r2, r2, 22136
+ // cmpd cr0, r3, r2
+ // Since we are just comparing for equality, we can emit this instead:
+ // xoris r0,r3,0x1234
+ // cmpldi cr0,r0,0x5678
+ // beq cr0,L6
+ if (isUInt32(Imm)) {
+ SDValue Xor(CurDAG->getTargetNode(PPC::XORIS8, dl, MVT::i64, LHS,
+ getI64Imm(Imm >> 16)), 0);
+ return SDValue(CurDAG->getTargetNode(PPC::CMPLDI, dl, MVT::i64, Xor,
+ getI64Imm(Imm & 0xFFFF)), 0);
+ }
+ }
+ Opc = PPC::CMPLD;
+ } else if (ISD::isUnsignedIntSetCC(CC)) {
+ if (isInt64Immediate(RHS.getNode(), Imm) && isUInt16(Imm))
+ return SDValue(CurDAG->getTargetNode(PPC::CMPLDI, dl, MVT::i64, LHS,
+ getI64Imm(Imm & 0xFFFF)), 0);
+ Opc = PPC::CMPLD;
+ } else {
+ short SImm;
+ if (isIntS16Immediate(RHS, SImm))
+ return SDValue(CurDAG->getTargetNode(PPC::CMPDI, dl, MVT::i64, LHS,
+ getI64Imm(SImm & 0xFFFF)),
+ 0);
+ Opc = PPC::CMPD;
+ }
+ } else if (LHS.getValueType() == MVT::f32) {
+ Opc = PPC::FCMPUS;
+ } else {
+ assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
+ Opc = PPC::FCMPUD;
+ }
+ return SDValue(CurDAG->getTargetNode(Opc, dl, MVT::i32, LHS, RHS), 0);
+}
+
+static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
+ switch (CC) {
+ case ISD::SETUEQ:
+ case ISD::SETONE:
+ case ISD::SETOLE:
+ case ISD::SETOGE:
+ assert(0 && "Should be lowered by legalize!");
+ default: assert(0 && "Unknown condition!"); abort();
+ case ISD::SETOEQ:
+ case ISD::SETEQ: return PPC::PRED_EQ;
+ case ISD::SETUNE:
+ case ISD::SETNE: return PPC::PRED_NE;
+ case ISD::SETOLT:
+ case ISD::SETLT: return PPC::PRED_LT;
+ case ISD::SETULE:
+ case ISD::SETLE: return PPC::PRED_LE;
+ case ISD::SETOGT:
+ case ISD::SETGT: return PPC::PRED_GT;
+ case ISD::SETUGE:
+ case ISD::SETGE: return PPC::PRED_GE;
+ case ISD::SETO: return PPC::PRED_NU;
+ case ISD::SETUO: return PPC::PRED_UN;
+ // These two are invalid for floating point. Assume we have int.
+ case ISD::SETULT: return PPC::PRED_LT;
+ case ISD::SETUGT: return PPC::PRED_GT;
+ }
+}
+
+/// getCRIdxForSetCC - Return the index of the condition register field
+/// associated with the SetCC condition, and whether or not the field is
+/// treated as inverted. That is, lt = 0; ge = 0 inverted.
+///
+/// If this returns with Other != -1, then the returned comparison is an or of
+/// two simpler comparisons. In this case, Invert is guaranteed to be false.
+static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) {
+ Invert = false;
+ Other = -1;
+ switch (CC) {
+ default: assert(0 && "Unknown condition!"); abort();
+ case ISD::SETOLT:
+ case ISD::SETLT: return 0; // Bit #0 = SETOLT
+ case ISD::SETOGT:
+ case ISD::SETGT: return 1; // Bit #1 = SETOGT
+ case ISD::SETOEQ:
+ case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
+ case ISD::SETUO: return 3; // Bit #3 = SETUO
+ case ISD::SETUGE:
+ case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
+ case ISD::SETULE:
+ case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
+ case ISD::SETUNE:
+ case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
+ case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
+ case ISD::SETUEQ:
+ case ISD::SETOGE:
+ case ISD::SETOLE:
+ case ISD::SETONE:
+ assert(0 && "Invalid branch code: should be expanded by legalize");
+ // These are invalid for floating point. Assume integer.
+ case ISD::SETULT: return 0;
+ case ISD::SETUGT: return 1;
+ }
+ return 0;
+}
+
+SDNode *PPCDAGToDAGISel::SelectSETCC(SDValue Op) {
+ SDNode *N = Op.getNode();
+ DebugLoc dl = N->getDebugLoc();
+ unsigned Imm;
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ if (isInt32Immediate(N->getOperand(1), Imm)) {
+ // We can codegen setcc op, imm very efficiently compared to a brcond.
+ // Check for those cases here.
+ // setcc op, 0
+ if (Imm == 0) {
+ SDValue Op = N->getOperand(0);
+ switch (CC) {
+ default: break;
+ case ISD::SETEQ: {
+ Op = SDValue(CurDAG->getTargetNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
+ SDValue Ops[] = { Op, getI32Imm(27), getI32Imm(5), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ case ISD::SETNE: {
+ SDValue AD =
+ SDValue(CurDAG->getTargetNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
+ Op, getI32Imm(~0U)), 0);
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op,
+ AD.getValue(1));
+ }
+ case ISD::SETLT: {
+ SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ case ISD::SETGT: {
+ SDValue T =
+ SDValue(CurDAG->getTargetNode(PPC::NEG, dl, MVT::i32, Op), 0);
+ T = SDValue(CurDAG->getTargetNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
+ SDValue Ops[] = { T, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ }
+ } else if (Imm == ~0U) { // setcc op, -1
+ SDValue Op = N->getOperand(0);
+ switch (CC) {
+ default: break;
+ case ISD::SETEQ:
+ Op = SDValue(CurDAG->getTargetNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
+ Op, getI32Imm(1)), 0);
+ return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+ SDValue(CurDAG->getTargetNode(PPC::LI, dl,
+ MVT::i32,
+ getI32Imm(0)), 0),
+ Op.getValue(1));
+ case ISD::SETNE: {
+ Op = SDValue(CurDAG->getTargetNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
+ SDNode *AD = CurDAG->getTargetNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
+ Op, getI32Imm(~0U));
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0),
+ Op, SDValue(AD, 1));
+ }
+ case ISD::SETLT: {
+ SDValue AD = SDValue(CurDAG->getTargetNode(PPC::ADDI, dl, MVT::i32, Op,
+ getI32Imm(1)), 0);
+ SDValue AN = SDValue(CurDAG->getTargetNode(PPC::AND, dl, MVT::i32, AD,
+ Op), 0);
+ SDValue Ops[] = { AN, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ case ISD::SETGT: {
+ SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ Op = SDValue(CurDAG->getTargetNode(PPC::RLWINM, dl, MVT::i32, Ops, 4),
+ 0);
+ return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op,
+ getI32Imm(1));
+ }
+ }
+ }
+ }
+
+ bool Inv;
+ int OtherCondIdx;
+ unsigned Idx = getCRIdxForSetCC(CC, Inv, OtherCondIdx);
+ SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
+ SDValue IntCR;
+
+ // Force the ccreg into CR7.
+ SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
+
+ SDValue InFlag(0, 0); // Null incoming flag value.
+ CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
+ InFlag).getValue(1);
+
+ if (PPCSubTarget.isGigaProcessor() && OtherCondIdx == -1)
+ IntCR = SDValue(CurDAG->getTargetNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
+ CCReg), 0);
+ else
+ IntCR = SDValue(CurDAG->getTargetNode(PPC::MFCR, dl, MVT::i32, CCReg), 0);
+
+ SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31),
+ getI32Imm(31), getI32Imm(31) };
+ if (OtherCondIdx == -1 && !Inv)
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+
+ // Get the specified bit.
+ SDValue Tmp =
+ SDValue(CurDAG->getTargetNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+ if (Inv) {
+ assert(OtherCondIdx == -1 && "Can't have split plus negation");
+ return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1));
+ }
+
+ // Otherwise, we have to turn an operation like SETONE -> SETOLT | SETOGT.
+ // We already got the bit for the first part of the comparison (e.g. SETULE).
+
+ // Get the other bit of the comparison.
+ Ops[1] = getI32Imm((32-(3-OtherCondIdx)) & 31);
+ SDValue OtherCond =
+ SDValue(CurDAG->getTargetNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+
+ return CurDAG->SelectNodeTo(N, PPC::OR, MVT::i32, Tmp, OtherCond);
+}
+
+
+// Select - Convert the specified operand from a target-independent to a
+// target-specific node if it hasn't already been changed.
+SDNode *PPCDAGToDAGISel::Select(SDValue Op) {
+ SDNode *N = Op.getNode();
+ DebugLoc dl = Op.getDebugLoc();
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ switch (N->getOpcode()) {
+ default: break;
+
+ case ISD::Constant: {
+ if (N->getValueType(0) == MVT::i64) {
+ // Get 64 bit value.
+ int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ // Assume no remaining bits.
+ unsigned Remainder = 0;
+ // Assume no shift required.
+ unsigned Shift = 0;
+
+ // If it can't be represented as a 32 bit value.
+ if (!isInt32(Imm)) {
+ Shift = CountTrailingZeros_64(Imm);
+ int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
+
+ // If the shifted value fits 32 bits.
+ if (isInt32(ImmSh)) {
+ // Go with the shifted value.
+ Imm = ImmSh;
+ } else {
+ // Still stuck with a 64 bit value.
+ Remainder = Imm;
+ Shift = 32;
+ Imm >>= 32;
+ }
+ }
+
+ // Intermediate operand.
+ SDNode *Result;
+
+ // Handle first 32 bits.
+ unsigned Lo = Imm & 0xFFFF;
+ unsigned Hi = (Imm >> 16) & 0xFFFF;
+
+ // Simple value.
+ if (isInt16(Imm)) {
+ // Just the Lo bits.
+ Result = CurDAG->getTargetNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo));
+ } else if (Lo) {
+ // Handle the Hi bits.
+ unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
+ Result = CurDAG->getTargetNode(OpC, dl, MVT::i64, getI32Imm(Hi));
+ // And Lo bits.
+ Result = CurDAG->getTargetNode(PPC::ORI8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Lo));
+ } else {
+ // Just the Hi bits.
+ Result = CurDAG->getTargetNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
+ }
+
+ // If no shift, we're done.
+ if (!Shift) return Result;
+
+ // Shift for next step if the upper 32-bits were not zero.
+ if (Imm) {
+ Result = CurDAG->getTargetNode(PPC::RLDICR, dl, MVT::i64,
+ SDValue(Result, 0),
+ getI32Imm(Shift), getI32Imm(63 - Shift));
+ }
+
+ // Add in the last bits as required.
+ if ((Hi = (Remainder >> 16) & 0xFFFF)) {
+ Result = CurDAG->getTargetNode(PPC::ORIS8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Hi));
+ }
+ if ((Lo = Remainder & 0xFFFF)) {
+ Result = CurDAG->getTargetNode(PPC::ORI8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Lo));
+ }
+
+ return Result;
+ }
+ break;
+ }
+
+ case ISD::SETCC:
+ return SelectSETCC(Op);
+ case PPCISD::GlobalBaseReg:
+ return getGlobalBaseReg();
+
+ case ISD::FrameIndex: {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType());
+ unsigned Opc = Op.getValueType() == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
+ if (N->hasOneUse())
+ return CurDAG->SelectNodeTo(N, Opc, Op.getValueType(), TFI,
+ getSmallIPtrImm(0));
+ return CurDAG->getTargetNode(Opc, dl, Op.getValueType(), TFI,
+ getSmallIPtrImm(0));
+ }
+
+ case PPCISD::MFCR: {
+ SDValue InFlag = N->getOperand(1);
+ // Use MFOCRF if supported.
+ if (PPCSubTarget.isGigaProcessor())
+ return CurDAG->getTargetNode(PPC::MFOCRF, dl, MVT::i32,
+ N->getOperand(0), InFlag);
+ else
+ return CurDAG->getTargetNode(PPC::MFCR, dl, MVT::i32, InFlag);
+ }
+
+ case ISD::SDIV: {
+ // FIXME: since this depends on the setting of the carry flag from the srawi
+ // we should really be making notes about that for the scheduler.
+ // FIXME: It sure would be nice if we could cheaply recognize the
+ // srl/add/sra pattern the dag combiner will generate for this as
+ // sra/addze rather than having to handle sdiv ourselves. oh well.
+ unsigned Imm;
+ if (isInt32Immediate(N->getOperand(1), Imm)) {
+ SDValue N0 = N->getOperand(0);
+ if ((signed)Imm > 0 && isPowerOf2_32(Imm)) {
+ SDNode *Op =
+ CurDAG->getTargetNode(PPC::SRAWI, dl, MVT::i32, MVT::Flag,
+ N0, getI32Imm(Log2_32(Imm)));
+ return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+ SDValue(Op, 0), SDValue(Op, 1));
+ } else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) {
+ SDNode *Op =
+ CurDAG->getTargetNode(PPC::SRAWI, dl, MVT::i32, MVT::Flag,
+ N0, getI32Imm(Log2_32(-Imm)));
+ SDValue PT =
+ SDValue(CurDAG->getTargetNode(PPC::ADDZE, dl, MVT::i32,
+ SDValue(Op, 0), SDValue(Op, 1)),
+ 0);
+ return CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT);
+ }
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+
+ case ISD::LOAD: {
+ // Handle preincrement loads.
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ MVT LoadedVT = LD->getMemoryVT();
+
+ // Normal loads are handled by code generated from the .td file.
+ if (LD->getAddressingMode() != ISD::PRE_INC)
+ break;
+
+ SDValue Offset = LD->getOffset();
+ if (isa<ConstantSDNode>(Offset) ||
+ Offset.getOpcode() == ISD::TargetGlobalAddress) {
+
+ unsigned Opcode;
+ bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
+ if (LD->getValueType(0) != MVT::i64) {
+ // Handle PPC32 integer and normal FP loads.
+ assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT()) {
+ default: assert(0 && "Invalid PPC load type!");
+ case MVT::f64: Opcode = PPC::LFDU; break;
+ case MVT::f32: Opcode = PPC::LFSU; break;
+ case MVT::i32: Opcode = PPC::LWZU; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZU; break;
+ }
+ } else {
+ assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
+ assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT()) {
+ default: assert(0 && "Invalid PPC load type!");
+ case MVT::i64: Opcode = PPC::LDU; break;
+ case MVT::i32: Opcode = PPC::LWZU8; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZU8; break;
+ }
+ }
+
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[] = { Offset, Base, Chain };
+ // FIXME: PPC64
+ return CurDAG->getTargetNode(Opcode, dl, LD->getValueType(0),
+ PPCLowering.getPointerTy(),
+ MVT::Other, Ops, 3);
+ } else {
+ assert(0 && "R+R preindex loads not supported yet!");
+ }
+ }
+
+ case ISD::AND: {
+ unsigned Imm, Imm2, SH, MB, ME;
+
+ // If this is an and of a value rotated between 0 and 31 bits and then and'd
+ // with a mask, emit rlwinm
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
+ SDValue Val = N->getOperand(0).getOperand(0);
+ SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ // If this is just a masked value where the input is not handled above, and
+ // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ isRunOfOnes(Imm, MB, ME) &&
+ N->getOperand(0).getOpcode() != ISD::ROTL) {
+ SDValue Val = N->getOperand(0);
+ SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ // AND X, 0 -> 0, not "rlwinm 32".
+ if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
+ ReplaceUses(SDValue(N, 0), N->getOperand(1));
+ return NULL;
+ }
+ // ISD::OR doesn't get all the bitfield insertion fun.
+ // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ N->getOperand(0).getOpcode() == ISD::OR &&
+ isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
+ unsigned MB, ME;
+ Imm = ~(Imm^Imm2);
+ if (isRunOfOnes(Imm, MB, ME)) {
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ N->getOperand(0).getOperand(1),
+ getI32Imm(0), getI32Imm(MB),getI32Imm(ME) };
+ return CurDAG->getTargetNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+ }
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::OR:
+ if (N->getValueType(0) == MVT::i32)
+ if (SDNode *I = SelectBitfieldInsert(N))
+ return I;
+
+ // Other cases are autogenerated.
+ break;
+ case ISD::SHL: {
+ unsigned Imm, SH, MB, ME;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
+ isRotateAndMask(N, Imm, true, SH, MB, ME)) {
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::SRL: {
+ unsigned Imm, SH, MB, ME;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
+ isRotateAndMask(N, Imm, true, SH, MB, ME)) {
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::SELECT_CC: {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+ // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+ if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
+ if (N1C->isNullValue() && N3C->isNullValue() &&
+ N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
+ // FIXME: Implement this optzn for PPC64.
+ N->getValueType(0) == MVT::i32) {
+ SDNode *Tmp =
+ CurDAG->getTargetNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
+ N->getOperand(0), getI32Imm(~0U));
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
+ SDValue(Tmp, 0), N->getOperand(0),
+ SDValue(Tmp, 1));
+ }
+
+ SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
+ unsigned BROpc = getPredicateForSetCC(CC);
+
+ unsigned SelectCCOp;
+ if (N->getValueType(0) == MVT::i32)
+ SelectCCOp = PPC::SELECT_CC_I4;
+ else if (N->getValueType(0) == MVT::i64)
+ SelectCCOp = PPC::SELECT_CC_I8;
+ else if (N->getValueType(0) == MVT::f32)
+ SelectCCOp = PPC::SELECT_CC_F4;
+ else if (N->getValueType(0) == MVT::f64)
+ SelectCCOp = PPC::SELECT_CC_F8;
+ else
+ SelectCCOp = PPC::SELECT_CC_VRRC;
+
+ SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
+ getI32Imm(BROpc) };
+ return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops, 4);
+ }
+ case PPCISD::COND_BRANCH: {
+ // Op #0 is the Chain.
+ // Op #1 is the PPC::PRED_* number.
+ // Op #2 is the CR#
+ // Op #3 is the Dest MBB
+ // Op #4 is the Flag.
+ // Prevent PPC::PRED_* from being selected into LI.
+ SDValue Pred =
+ getI32Imm(cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
+ SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
+ N->getOperand(0), N->getOperand(4) };
+ return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 5);
+ }
+ case ISD::BR_CC: {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
+ SDValue Ops[] = { getI32Imm(getPredicateForSetCC(CC)), CondCode,
+ N->getOperand(4), N->getOperand(0) };
+ return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 4);
+ }
+ case ISD::BRIND: {
+ // FIXME: Should custom lower this.
+ SDValue Chain = N->getOperand(0);
+ SDValue Target = N->getOperand(1);
+ unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
+ Chain = SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Other, Target,
+ Chain), 0);
+ return CurDAG->SelectNodeTo(N, PPC::BCTR, MVT::Other, Chain);
+ }
+ case ISD::DECLARE: {
+ SDValue Chain = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N1);
+
+ // FIXME: We need to handle this for VLAs.
+ if (!FINode) {
+ ReplaceUses(Op.getValue(0), Chain);
+ return NULL;
+ }
+
+ if (N2.getOpcode() == ISD::ADD) {
+ if (N2.getOperand(0).getOpcode() == ISD::ADD &&
+ N2.getOperand(0).getOperand(0).getOpcode() == PPCISD::GlobalBaseReg &&
+ N2.getOperand(0).getOperand(1).getOpcode() == PPCISD::Hi &&
+ N2.getOperand(1).getOpcode() == PPCISD::Lo)
+ N2 = N2.getOperand(0).getOperand(1).getOperand(0);
+ else if (N2.getOperand(0).getOpcode() == ISD::ADD &&
+ N2.getOperand(0).getOperand(0).getOpcode() == PPCISD::GlobalBaseReg &&
+ N2.getOperand(0).getOperand(1).getOpcode() == PPCISD::Lo &&
+ N2.getOperand(1).getOpcode() == PPCISD::Hi)
+ N2 = N2.getOperand(0).getOperand(1).getOperand(0);
+ else if (N2.getOperand(0).getOpcode() == PPCISD::Hi &&
+ N2.getOperand(1).getOpcode() == PPCISD::Lo)
+ N2 = N2.getOperand(0).getOperand(0);
+ }
+
+ // If we don't have a global address here, the debug info is mangled, just
+ // drop it.
+ if (!isa<GlobalAddressSDNode>(N2)) {
+ ReplaceUses(Op.getValue(0), Chain);
+ return NULL;
+ }
+ int FI = cast<FrameIndexSDNode>(N1)->getIndex();
+ GlobalValue *GV = cast<GlobalAddressSDNode>(N2)->getGlobal();
+ SDValue Tmp1 = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GV, TLI.getPointerTy());
+ return CurDAG->SelectNodeTo(N, TargetInstrInfo::DECLARE,
+ MVT::Other, Tmp1, Tmp2, Chain);
+ }
+ }
+
+ return SelectCode(Op);
+}
+
+
+
+/// createPPCISelDag - This pass converts a legalized DAG into a
+/// PowerPC-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
+ return new PPCDAGToDAGISel(TM);
+}
+
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
new file mode 100644
index 0000000..a7744b8
--- /dev/null
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -0,0 +1,4878 @@
+//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCISelLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCISelLowering.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCPredicates.h"
+#include "PPCTargetMachine.h"
+#include "PPCPerfectShuffle.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/DerivedTypes.h"
+using namespace llvm;
+
+static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc",
+cl::desc("enable preincrement load/store generation on PPC (experimental)"),
+ cl::Hidden);
+
+PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
+ : TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()) {
+
+ setPow2DivIsCheap();
+
+ // Use _setjmp/_longjmp instead of setjmp/longjmp.
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(true);
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
+ addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
+ addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
+
+ // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // PowerPC has pre-inc load and store's.
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
+
+ // This is used in the ppcf128->int sequence. Note it has different semantics
+ // from FP_ROUND: that rounds to nearest, this rounds to zero.
+ setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
+
+ // PowerPC has no SREM/UREM instructions
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+ // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+
+ // We don't support sin/cos/sqrt/fmod/pow
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FPOW , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f32, Expand);
+
+ setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
+
+ // If we're enabling GP optimizations, use hardware square root
+ if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+ }
+
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+ // PowerPC does not have BSWAP, CTPOP or CTTZ
+ setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+
+ // PowerPC does not have ROTR
+ setOperationAction(ISD::ROTR, MVT::i32 , Expand);
+ setOperationAction(ISD::ROTR, MVT::i64 , Expand);
+
+ // PowerPC does not have Select
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::i64, Expand);
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+
+ // PowerPC wants to turn select_cc of FP into fsel when possible.
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ // PowerPC wants to optimize integer setcc a bit
+ setOperationAction(ISD::SETCC, MVT::i32, Custom);
+
+ // PowerPC does not have BRCOND which requires SetCC
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+
+ // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+
+ // PowerPC does not have [U|S]INT_TO_FP
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+ setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
+ setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
+ setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
+ setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
+
+ // We cannot sextinreg(i1). Expand to shifts.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ // Support label based line numbers.
+ setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+
+
+ // We want to legalize GlobalAddress and ConstantPool nodes into the
+ // appropriate instructions to materialize the address.
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+
+ // RET must be custom lowered, to meet ABI requirements.
+ setOperationAction(ISD::RET , MVT::Other, Custom);
+
+ // TRAP is legal.
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // TRAMPOLINE is custom lowered.
+ setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+
+ // VAARG is custom lowered with ELF 32 ABI
+ if (TM.getSubtarget<PPCSubtarget>().isELF32_ABI())
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ else
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ // Comparisons that require checking two conditions.
+ setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
+
+ if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
+ // They also have instructions for converting between i64 and fp.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+
+ // FIXME: disable this lowered code. This generates 64-bit register values,
+ // and we don't model the fact that the top part is clobbered by calls. We
+ // need to flag these together so that the value isn't live across a call.
+ //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+
+ // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
+ } else {
+ // PowerPC does not have FP_TO_UINT on 32-bit implementations.
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ }
+
+ if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) {
+ // 64-bit PowerPC implementations can support i64 types directly
+ addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
+ // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+ // 64-bit PowerPC wants to expand i128 shifts itself.
+ setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
+ } else {
+ // 32-bit PowerPC wants to expand i64 shifts itself.
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+ }
+
+ if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
+ // First set operation action for all vector types to expand. Then we
+ // will selectively turn on ones that can be effectively codegen'd.
+ for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+
+ // add/sub are legal for all supported vector VT's.
+ setOperationAction(ISD::ADD , VT, Legal);
+ setOperationAction(ISD::SUB , VT, Legal);
+
+ // We promote all shuffles to v16i8.
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
+ AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
+
+ // We promote all non-typed operations to v4i32.
+ setOperationAction(ISD::AND , VT, Promote);
+ AddPromotedToType (ISD::AND , VT, MVT::v4i32);
+ setOperationAction(ISD::OR , VT, Promote);
+ AddPromotedToType (ISD::OR , VT, MVT::v4i32);
+ setOperationAction(ISD::XOR , VT, Promote);
+ AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
+ setOperationAction(ISD::LOAD , VT, Promote);
+ AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
+ setOperationAction(ISD::SELECT, VT, Promote);
+ AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
+ setOperationAction(ISD::STORE, VT, Promote);
+ AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
+
+ // No other operations are legal.
+ setOperationAction(ISD::MUL , VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
+ setOperationAction(ISD::FPOW, VT, Expand);
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ }
+
+ // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
+ // with merges, splats, etc.
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::AND , MVT::v4i32, Legal);
+ setOperationAction(ISD::OR , MVT::v4i32, Legal);
+ setOperationAction(ISD::XOR , MVT::v4i32, Legal);
+ setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
+ setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
+ setOperationAction(ISD::STORE , MVT::v4i32, Legal);
+
+ addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
+ addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
+ addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);
+ addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
+
+ setOperationAction(ISD::MUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+ }
+
+ setShiftAmountType(MVT::i32);
+ setBooleanContents(ZeroOrOneBooleanContent);
+
+ if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+ setStackPointerRegisterToSaveRestore(PPC::X1);
+ setExceptionPointerRegister(PPC::X3);
+ setExceptionSelectorRegister(PPC::X4);
+ } else {
+ setStackPointerRegisterToSaveRestore(PPC::R1);
+ setExceptionPointerRegister(PPC::R3);
+ setExceptionSelectorRegister(PPC::R4);
+ }
+
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::SINT_TO_FP);
+ setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::BR_CC);
+ setTargetDAGCombine(ISD::BSWAP);
+
+ // Darwin long double math library functions have $LDBL128 appended.
+ if (TM.getSubtarget<PPCSubtarget>().isDarwin()) {
+ setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
+ setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
+ setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
+ setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
+ setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
+ setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
+ setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
+ setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
+ setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
+ setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
+ }
+
+ computeRegisterProperties();
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area.
+unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const {
+ TargetMachine &TM = getTargetMachine();
+ // Darwin passes everything on 4 byte boundary.
+ if (TM.getSubtarget<PPCSubtarget>().isDarwin())
+ return 4;
+ // FIXME Elf TBD
+ return 4;
+}
+
+const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case PPCISD::FSEL: return "PPCISD::FSEL";
+ case PPCISD::FCFID: return "PPCISD::FCFID";
+ case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
+ case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
+ case PPCISD::STFIWX: return "PPCISD::STFIWX";
+ case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
+ case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
+ case PPCISD::VPERM: return "PPCISD::VPERM";
+ case PPCISD::Hi: return "PPCISD::Hi";
+ case PPCISD::Lo: return "PPCISD::Lo";
+ case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
+ case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
+ case PPCISD::SRL: return "PPCISD::SRL";
+ case PPCISD::SRA: return "PPCISD::SRA";
+ case PPCISD::SHL: return "PPCISD::SHL";
+ case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
+ case PPCISD::STD_32: return "PPCISD::STD_32";
+ case PPCISD::CALL_ELF: return "PPCISD::CALL_ELF";
+ case PPCISD::CALL_Macho: return "PPCISD::CALL_Macho";
+ case PPCISD::MTCTR: return "PPCISD::MTCTR";
+ case PPCISD::BCTRL_Macho: return "PPCISD::BCTRL_Macho";
+ case PPCISD::BCTRL_ELF: return "PPCISD::BCTRL_ELF";
+ case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
+ case PPCISD::MFCR: return "PPCISD::MFCR";
+ case PPCISD::VCMP: return "PPCISD::VCMP";
+ case PPCISD::VCMPo: return "PPCISD::VCMPo";
+ case PPCISD::LBRX: return "PPCISD::LBRX";
+ case PPCISD::STBRX: return "PPCISD::STBRX";
+ case PPCISD::LARX: return "PPCISD::LARX";
+ case PPCISD::STCX: return "PPCISD::STCX";
+ case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
+ case PPCISD::MFFS: return "PPCISD::MFFS";
+ case PPCISD::MTFSB0: return "PPCISD::MTFSB0";
+ case PPCISD::MTFSB1: return "PPCISD::MTFSB1";
+ case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
+ case PPCISD::MTFSF: return "PPCISD::MTFSF";
+ case PPCISD::TAILCALL: return "PPCISD::TAILCALL";
+ case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
+ }
+}
+
+
+MVT PPCTargetLowering::getSetCCResultType(MVT VT) const {
+ return MVT::i32;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Node matching predicates, for use by the tblgen matching code.
+//===----------------------------------------------------------------------===//
+
+/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
+static bool isFloatingPointZero(SDValue Op) {
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+ return CFP->getValueAPF().isZero();
+ else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
+ // Maybe this has already been legalized into the constant pool?
+ if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+ return CFP->getValueAPF().isZero();
+ }
+ return false;
+}
+
+/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
+/// true if Op is undef or if it matches the specified value.
+static bool isConstantOrUndef(int Op, int Val) {
+ return Op < 0 || Op == Val;
+}
+
+/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
+/// VPKUHUM instruction.
+bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
+ if (!isUnary) {
+ for (unsigned i = 0; i != 16; ++i)
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
+ return false;
+ } else {
+ for (unsigned i = 0; i != 8; ++i)
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+1))
+ return false;
+ }
+ return true;
+}
+
+/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
+/// VPKUWUM instruction.
+bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
+ if (!isUnary) {
+ for (unsigned i = 0; i != 16; i += 2)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
+ return false;
+ } else {
+ for (unsigned i = 0; i != 8; i += 2)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+9), i*2+3))
+ return false;
+ }
+ return true;
+}
+
+/// isVMerge - Common function, used to match vmrg* shuffles.
+///
+static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
+ unsigned LHSStart, unsigned RHSStart) {
+ assert(N->getValueType(0) == MVT::v16i8 &&
+ "PPC only supports shuffles by bytes!");
+ assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
+ "Unsupported merge size!");
+
+ for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
+ for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
+ if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
+ LHSStart+j+i*UnitSize) ||
+ !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
+ RHSStart+j+i*UnitSize))
+ return false;
+ }
+ return true;
+}
+
+/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
+/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
+bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ bool isUnary) {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 8, 24);
+ return isVMerge(N, UnitSize, 8, 8);
+}
+
+/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
+/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
+bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ bool isUnary) {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 0, 16);
+ return isVMerge(N, UnitSize, 0, 0);
+}
+
+
+/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
+/// amount, otherwise return -1.
+int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
+ assert(N->getValueType(0) == MVT::v16i8 &&
+ "PPC only supports shuffles by bytes!");
+
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+
+ // Find the first non-undef value in the shuffle mask.
+ unsigned i;
+ for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
+ /*search*/;
+
+ if (i == 16) return -1; // all undef.
+
+ // Otherwise, check to see if the rest of the elements are consecutively
+ // numbered from this value.
+ unsigned ShiftAmt = SVOp->getMaskElt(i);
+ if (ShiftAmt < i) return -1;
+ ShiftAmt -= i;
+
+ if (!isUnary) {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
+ return -1;
+ } else {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
+ return -1;
+ }
+ return ShiftAmt;
+}
+
+/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a splat of a single element that is suitable for input to
+/// VSPLTB/VSPLTH/VSPLTW.
+bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
+ assert(N->getValueType(0) == MVT::v16i8 &&
+ (EltSize == 1 || EltSize == 2 || EltSize == 4));
+
+ // This is a splat operation if each element of the permute is the same, and
+ // if the value doesn't reference the second vector.
+ unsigned ElementBase = N->getMaskElt(0);
+
+ // FIXME: Handle UNDEF elements too!
+ if (ElementBase >= 16)
+ return false;
+
+ // Check that the indices are consecutive, in the case of a multi-byte element
+ // splatted with a v16i8 mask.
+ for (unsigned i = 1; i != EltSize; ++i)
+ if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
+ return false;
+
+ for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
+ if (N->getMaskElt(i) < 0) continue;
+ for (unsigned j = 0; j != EltSize; ++j)
+ if (N->getMaskElt(i+j) != N->getMaskElt(j))
+ return false;
+ }
+ return true;
+}
+
+/// isAllNegativeZeroVector - Returns true if all elements of build_vector
+/// are -0.0.
+bool PPC::isAllNegativeZeroVector(SDNode *N) {
+ BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
+
+ APInt APVal, APUndef;
+ unsigned BitSize;
+ bool HasAnyUndefs;
+
+ if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32))
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+ return CFP->getValueAPF().isNegZero();
+
+ return false;
+}
+
+/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
+/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
+unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ assert(isSplatShuffleMask(SVOp, EltSize));
+ return SVOp->getMaskElt(0) / EltSize;
+}
+
+/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
+/// by using a vspltis[bhw] instruction of the specified element size, return
+/// the constant being splatted. The ByteSize field indicates the number of
+/// bytes of each element [124] -> [bhw].
+SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
+ SDValue OpVal(0, 0);
+
+ // If ByteSize of the splat is bigger than the element size of the
+ // build_vector, then we have a case where we are checking for a splat where
+ // multiple elements of the buildvector are folded together into a single
+ // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
+ unsigned EltSize = 16/N->getNumOperands();
+ if (EltSize < ByteSize) {
+ unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
+ SDValue UniquedVals[4];
+ assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
+
+ // See if all of the elements in the buildvector agree across.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ // If the element isn't a constant, bail fully out.
+ if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
+
+
+ if (UniquedVals[i&(Multiple-1)].getNode() == 0)
+ UniquedVals[i&(Multiple-1)] = N->getOperand(i);
+ else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
+ return SDValue(); // no match.
+ }
+
+ // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
+ // either constant or undef values that are identical for each chunk. See
+ // if these chunks can form into a larger vspltis*.
+
+ // Check to see if all of the leading entries are either 0 or -1. If
+ // neither, then this won't fit into the immediate field.
+ bool LeadingZero = true;
+ bool LeadingOnes = true;
+ for (unsigned i = 0; i != Multiple-1; ++i) {
+ if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs.
+
+ LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
+ LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
+ }
+ // Finally, check the least significant entry.
+ if (LeadingZero) {
+ if (UniquedVals[Multiple-1].getNode() == 0)
+ return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef
+ int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
+ if (Val < 16)
+ return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4)
+ }
+ if (LeadingOnes) {
+ if (UniquedVals[Multiple-1].getNode() == 0)
+ return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef
+ int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
+ if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
+ return DAG.getTargetConstant(Val, MVT::i32);
+ }
+
+ return SDValue();
+ }
+
+ // Check to see if this buildvec has a single non-undef value in its elements.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (OpVal.getNode() == 0)
+ OpVal = N->getOperand(i);
+ else if (OpVal != N->getOperand(i))
+ return SDValue();
+ }
+
+ if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def.
+
+ unsigned ValSizeInBytes = EltSize;
+ uint64_t Value = 0;
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
+ Value = CN->getZExtValue();
+ } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
+ assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
+ Value = FloatToBits(CN->getValueAPF().convertToFloat());
+ }
+
+ // If the splat value is larger than the element value, then we can never do
+ // this splat. The only case that we could fit the replicated bits into our
+ // immediate field for would be zero, and we prefer to use vxor for it.
+ if (ValSizeInBytes < ByteSize) return SDValue();
+
+ // If the element value is larger than the splat value, cut it in half and
+ // check to see if the two halves are equal. Continue doing this until we
+ // get to ByteSize. This allows us to handle 0x01010101 as 0x01.
+ while (ValSizeInBytes > ByteSize) {
+ ValSizeInBytes >>= 1;
+
+ // If the top half equals the bottom half, we're still ok.
+ if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
+ (Value & ((1 << (8*ValSizeInBytes))-1)))
+ return SDValue();
+ }
+
+ // Properly sign extend the value.
+ int ShAmt = (4-ByteSize)*8;
+ int MaskVal = ((int)Value << ShAmt) >> ShAmt;
+
+ // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
+ if (MaskVal == 0) return SDValue();
+
+ // Finally, if this value fits in a 5 bit sext field, return it
+ if (((MaskVal << (32-5)) >> (32-5)) == MaskVal)
+ return DAG.getTargetConstant(MaskVal, MVT::i32);
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Addressing Mode Selection
+//===----------------------------------------------------------------------===//
+
+/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 16-bit value. If so, this returns true and the
+/// immediate.
+static bool isIntS16Immediate(SDNode *N, short &Imm) {
+ if (N->getOpcode() != ISD::Constant)
+ return false;
+
+ Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
+ if (N->getValueType(0) == MVT::i32)
+ return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ else
+ return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+static bool isIntS16Immediate(SDValue Op, short &Imm) {
+ return isIntS16Immediate(Op.getNode(), Imm);
+}
+
+
+/// SelectAddressRegReg - Given the specified addressed, check to see if it
+/// can be represented as an indexed [r+r] operation. Returns false if it
+/// can be more efficiently represented with [r+imm].
+bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
+ SDValue &Index,
+ SelectionDAG &DAG) const {
+ short imm = 0;
+ if (N.getOpcode() == ISD::ADD) {
+ if (isIntS16Immediate(N.getOperand(1), imm))
+ return false; // r+i
+ if (N.getOperand(1).getOpcode() == PPCISD::Lo)
+ return false; // r+i
+
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ } else if (N.getOpcode() == ISD::OR) {
+ if (isIntS16Immediate(N.getOperand(1), imm))
+ return false; // r+i can fold it if we can.
+
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are provably
+ // disjoint.
+ APInt LHSKnownZero, LHSKnownOne;
+ APInt RHSKnownZero, RHSKnownOne;
+ DAG.ComputeMaskedBits(N.getOperand(0),
+ APInt::getAllOnesValue(N.getOperand(0)
+ .getValueSizeInBits()),
+ LHSKnownZero, LHSKnownOne);
+
+ if (LHSKnownZero.getBoolValue()) {
+ DAG.ComputeMaskedBits(N.getOperand(1),
+ APInt::getAllOnesValue(N.getOperand(1)
+ .getValueSizeInBits()),
+ RHSKnownZero, RHSKnownOne);
+ // If all of the bits are known zero on the LHS or RHS, the add won't
+ // carry.
+ if (~(LHSKnownZero | RHSKnownZero) == 0) {
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+/// Returns true if the address N can be represented by a base register plus
+/// a signed 16-bit displacement [r+imm], and if it is not better
+/// represented as reg+reg.
+bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
+ SDValue &Base,
+ SelectionDAG &DAG) const {
+ // FIXME dl should come from parent load or store, not from address
+ DebugLoc dl = N.getDebugLoc();
+ // If this can be more profitably realized as r+r, fail.
+ if (SelectAddressRegReg(N, Disp, Base, DAG))
+ return false;
+
+ if (N.getOpcode() == ISD::ADD) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm)) {
+ Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
+ return true; // [r+i]
+ } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
+ // Match LOAD (ADD (X, Lo(G))).
+ assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+ && "Cannot handle constant offsets yet!");
+ Disp = N.getOperand(1).getOperand(0); // The global address.
+ assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
+ Disp.getOpcode() == ISD::TargetConstantPool ||
+ Disp.getOpcode() == ISD::TargetJumpTable);
+ Base = N.getOperand(0);
+ return true; // [&g+r]
+ }
+ } else if (N.getOpcode() == ISD::OR) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm)) {
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are
+ // provably disjoint.
+ APInt LHSKnownZero, LHSKnownOne;
+ DAG.ComputeMaskedBits(N.getOperand(0),
+ APInt::getAllOnesValue(N.getOperand(0)
+ .getValueSizeInBits()),
+ LHSKnownZero, LHSKnownOne);
+
+ if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
+ // If all of the bits are known zero on the LHS or RHS, the add won't
+ // carry.
+ Base = N.getOperand(0);
+ Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+ return true;
+ }
+ }
+ } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+ // Loading from a constant address.
+
+ // If this address fits entirely in a 16-bit sext immediate field, codegen
+ // this as "d, 0"
+ short Imm;
+ if (isIntS16Immediate(CN, Imm)) {
+ Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
+ Base = DAG.getRegister(PPC::R0, CN->getValueType(0));
+ return true;
+ }
+
+ // Handle 32-bit sext immediates with LIS + addr mode.
+ if (CN->getValueType(0) == MVT::i32 ||
+ (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
+ int Addr = (int)CN->getZExtValue();
+
+ // Otherwise, break this down into an LIS + disp.
+ Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
+
+ Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
+ unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
+ Base = SDValue(DAG.getTargetNode(Opc, dl, CN->getValueType(0), Base), 0);
+ return true;
+ }
+ }
+
+ Disp = DAG.getTargetConstant(0, getPointerTy());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ else
+ Base = N;
+ return true; // [r+0]
+}
+
+/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
+/// represented as an indexed [r+r] operation.
+bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
+ SDValue &Index,
+ SelectionDAG &DAG) const {
+ // Check to see if we can easily represent this as an [r+r] address. This
+ // will fail if it thinks that the address is more profitably represented as
+ // reg+imm, e.g. where imm = 0.
+ if (SelectAddressRegReg(N, Base, Index, DAG))
+ return true;
+
+ // If the operand is an addition, always emit this as [r+r], since this is
+ // better (for code size, and execution, as the memop does the add for free)
+ // than emitting an explicit add.
+ if (N.getOpcode() == ISD::ADD) {
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ }
+
+ // Otherwise, do it the hard way, using R0 as the base register.
+ Base = DAG.getRegister(PPC::R0, N.getValueType());
+ Index = N;
+ return true;
+}
+
+/// SelectAddressRegImmShift - Returns true if the address N can be
+/// represented by a base register plus a signed 14-bit displacement
+/// [r+imm*4]. Suitable for use by STD and friends.
+bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
+ SDValue &Base,
+ SelectionDAG &DAG) const {
+ // FIXME dl should come from the parent load or store, not the address
+ DebugLoc dl = N.getDebugLoc();
+ // If this can be more profitably realized as r+r, fail.
+ if (SelectAddressRegReg(N, Disp, Base, DAG))
+ return false;
+
+ if (N.getOpcode() == ISD::ADD) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
+ Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
+ return true; // [r+i]
+ } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
+ // Match LOAD (ADD (X, Lo(G))).
+ assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+ && "Cannot handle constant offsets yet!");
+ Disp = N.getOperand(1).getOperand(0); // The global address.
+ assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
+ Disp.getOpcode() == ISD::TargetConstantPool ||
+ Disp.getOpcode() == ISD::TargetJumpTable);
+ Base = N.getOperand(0);
+ return true; // [&g+r]
+ }
+ } else if (N.getOpcode() == ISD::OR) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are
+ // provably disjoint.
+ APInt LHSKnownZero, LHSKnownOne;
+ DAG.ComputeMaskedBits(N.getOperand(0),
+ APInt::getAllOnesValue(N.getOperand(0)
+ .getValueSizeInBits()),
+ LHSKnownZero, LHSKnownOne);
+ if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
+ // If all of the bits are known zero on the LHS or RHS, the add won't
+ // carry.
+ Base = N.getOperand(0);
+ Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
+ return true;
+ }
+ }
+ } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+ // Loading from a constant address. Verify low two bits are clear.
+ if ((CN->getZExtValue() & 3) == 0) {
+ // If this address fits entirely in a 14-bit sext immediate field, codegen
+ // this as "d, 0"
+ short Imm;
+ if (isIntS16Immediate(CN, Imm)) {
+ Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
+ Base = DAG.getRegister(PPC::R0, CN->getValueType(0));
+ return true;
+ }
+
+ // Fold the low-part of 32-bit absolute addresses into addr mode.
+ if (CN->getValueType(0) == MVT::i32 ||
+ (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
+ int Addr = (int)CN->getZExtValue();
+
+ // Otherwise, break this down into an LIS + disp.
+ Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32);
+ Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32);
+ unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
+ Base = SDValue(DAG.getTargetNode(Opc, dl, CN->getValueType(0), Base),0);
+ return true;
+ }
+ }
+ }
+
+ Disp = DAG.getTargetConstant(0, getPointerTy());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ else
+ Base = N;
+ return true; // [r+0]
+}
+
+
+/// getPreIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if the node's address
+/// can be legally represented as pre-indexed load / store address.
+bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ // Disabled by default for now.
+ if (!EnablePPCPreinc) return false;
+
+ SDValue Ptr;
+ MVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ VT = LD->getMemoryVT();
+
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ ST = ST;
+ Ptr = ST->getBasePtr();
+ VT = ST->getMemoryVT();
+ } else
+ return false;
+
+ // PowerPC doesn't have preinc load/store instructions for vectors.
+ if (VT.isVector())
+ return false;
+
+ // TODO: Check reg+reg first.
+
+ // LDU/STU use reg+imm*4, others use reg+imm.
+ if (VT != MVT::i64) {
+ // reg + imm
+ if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
+ return false;
+ } else {
+ // reg + imm * 4.
+ if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
+ return false;
+ }
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
+ // sext i32 to i64 when addr mode is r+i.
+ if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
+ LD->getExtensionType() == ISD::SEXTLOAD &&
+ isa<ConstantSDNode>(Offset))
+ return false;
+ }
+
+ AM = ISD::PRE_INC;
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// LowerOperation implementation
+//===----------------------------------------------------------------------===//
+
+SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
+ SelectionDAG &DAG) {
+ MVT PtrVT = Op.getValueType();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ Constant *C = CP->getConstVal();
+ SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ // FIXME there isn't really any debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ const TargetMachine &TM = DAG.getTarget();
+
+ SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, CPI, Zero);
+ SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, CPI, Zero);
+
+ // If this is a non-darwin platform, we don't support non-static relo models
+ // yet.
+ if (TM.getRelocationModel() == Reloc::Static ||
+ !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
+ // Generate non-pic code that has direct accesses to the constant pool.
+ // The address of the global is just (hi(&g)+lo(&g)).
+ return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
+ }
+
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ // With PIC, the first instruction is actually "GR+hi(&G)".
+ Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
+ DAG.getNode(PPCISD::GlobalBaseReg,
+ DebugLoc::getUnknownLoc(), PtrVT), Hi);
+ }
+
+ Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
+ return Lo;
+}
+
+SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+ MVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ // FIXME there isn't really any debug loc here
+ DebugLoc dl = Op.getDebugLoc();
+
+ const TargetMachine &TM = DAG.getTarget();
+
+ SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, JTI, Zero);
+ SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, JTI, Zero);
+
+ // If this is a non-darwin platform, we don't support non-static relo models
+ // yet.
+ if (TM.getRelocationModel() == Reloc::Static ||
+ !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
+ // Generate non-pic code that has direct accesses to the constant pool.
+ // The address of the global is just (hi(&g)+lo(&g)).
+ return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
+ }
+
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ // With PIC, the first instruction is actually "GR+hi(&G)".
+ Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
+ DAG.getNode(PPCISD::GlobalBaseReg,
+ DebugLoc::getUnknownLoc(), PtrVT), Hi);
+ }
+
+ Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
+ return Lo;
+}
+
+SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) {
+ assert(0 && "TLS not implemented for PPC.");
+ return SDValue(); // Not reached
+}
+
+SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) {
+ MVT PtrVT = Op.getValueType();
+ GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+ GlobalValue *GV = GSDN->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ // FIXME there isn't really any debug info here
+ DebugLoc dl = GSDN->getDebugLoc();
+
+ const TargetMachine &TM = DAG.getTarget();
+
+ SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, GA, Zero);
+ SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, GA, Zero);
+
+ // If this is a non-darwin platform, we don't support non-static relo models
+ // yet.
+ if (TM.getRelocationModel() == Reloc::Static ||
+ !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
+ // Generate non-pic code that has direct accesses to globals.
+ // The address of the global is just (hi(&g)+lo(&g)).
+ return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
+ }
+
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ // With PIC, the first instruction is actually "GR+hi(&G)".
+ Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
+ DAG.getNode(PPCISD::GlobalBaseReg,
+ DebugLoc::getUnknownLoc(), PtrVT), Hi);
+ }
+
+ Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
+
+ if (!TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV))
+ return Lo;
+
+ // If the global is weak or external, we have to go through the lazy
+ // resolution stub.
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, NULL, 0);
+}
+
+SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // If we're comparing for equality to zero, expose the fact that this is
+ // implented as a ctlz/srl pair on ppc, so that the dag combiner can
+ // fold the new nodes.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (C->isNullValue() && CC == ISD::SETEQ) {
+ MVT VT = Op.getOperand(0).getValueType();
+ SDValue Zext = Op.getOperand(0);
+ if (VT.bitsLT(MVT::i32)) {
+ VT = MVT::i32;
+ Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
+ }
+ unsigned Log2b = Log2_32(VT.getSizeInBits());
+ SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
+ SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
+ DAG.getConstant(Log2b, MVT::i32));
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
+ }
+ // Leave comparisons against 0 and -1 alone for now, since they're usually
+ // optimized. FIXME: revisit this when we can custom lower all setcc
+ // optimizations.
+ if (C->isAllOnesValue() || C->isNullValue())
+ return SDValue();
+ }
+
+ // If we have an integer seteq/setne, turn it into a compare against zero
+ // by xor'ing the rhs with the lhs, which is faster than setting a
+ // condition register, reading it back out, and masking the correct bit. The
+ // normal approach here uses sub to do this instead of xor. Using xor exposes
+ // the result to other bit-twiddling opportunities.
+ MVT LHSVT = Op.getOperand(0).getValueType();
+ if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ MVT VT = Op.getValueType();
+ SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
+ Op.getOperand(1));
+ return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
+ }
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
+ int VarArgsFrameIndex,
+ int VarArgsStackOffset,
+ unsigned VarArgsNumGPR,
+ unsigned VarArgsNumFPR,
+ const PPCSubtarget &Subtarget) {
+
+ assert(0 && "VAARG in ELF32 ABI not implemented yet!");
+ return SDValue(); // Not reached
+}
+
+SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Trmp = Op.getOperand(1); // trampoline
+ SDValue FPtr = Op.getOperand(2); // nested function
+ SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+ DebugLoc dl = Op.getDebugLoc();
+
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = (PtrVT == MVT::i64);
+ const Type *IntPtrTy =
+ DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType();
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ Entry.Ty = IntPtrTy;
+ Entry.Node = Trmp; Args.push_back(Entry);
+
+ // TrampSize == (isPPC64 ? 48 : 40);
+ Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ Args.push_back(Entry);
+
+ Entry.Node = FPtr; Args.push_back(Entry);
+ Entry.Node = Nest; Args.push_back(Entry);
+
+ // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
+ std::pair<SDValue, SDValue> CallResult =
+ LowerCallTo(Chain, Op.getValueType().getTypeForMVT(), false, false,
+ false, false, CallingConv::C, false,
+ DAG.getExternalSymbol("__trampoline_setup", PtrVT),
+ Args, DAG, dl);
+
+ SDValue Ops[] =
+ { CallResult.first, CallResult.second };
+
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
+ int VarArgsFrameIndex,
+ int VarArgsStackOffset,
+ unsigned VarArgsNumGPR,
+ unsigned VarArgsNumFPR,
+ const PPCSubtarget &Subtarget) {
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Subtarget.isMachoABI()) {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
+ }
+
+ // For ELF 32 ABI we follow the layout of the va_list struct.
+ // We suppose the given va_list is already allocated.
+ //
+ // typedef struct {
+ // char gpr; /* index into the array of 8 GPRs
+ // * stored in the register save area
+ // * gpr=0 corresponds to r3,
+ // * gpr=1 to r4, etc.
+ // */
+ // char fpr; /* index into the array of 8 FPRs
+ // * stored in the register save area
+ // * fpr=0 corresponds to f1,
+ // * fpr=1 to f2, etc.
+ // */
+ // char *overflow_arg_area;
+ // /* location on stack that holds
+ // * the next overflow argument
+ // */
+ // char *reg_save_area;
+ // /* where r3:r10 and f1:f8 (if saved)
+ // * are stored
+ // */
+ // } va_list[1];
+
+
+ SDValue ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i8);
+ SDValue ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i8);
+
+
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ SDValue StackOffsetFI = DAG.getFrameIndex(VarArgsStackOffset, PtrVT);
+ SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+
+ uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
+ SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
+
+ uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
+ SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
+
+ uint64_t FPROffset = 1;
+ SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
+
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+
+ // Store first byte : number of int regs
+ SDValue firstStore = DAG.getStore(Op.getOperand(0), dl, ArgGPR,
+ Op.getOperand(1), SV, 0);
+ uint64_t nextOffset = FPROffset;
+ SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
+ ConstFPROffset);
+
+ // Store second byte : number of float regs
+ SDValue secondStore =
+ DAG.getStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset);
+ nextOffset += StackOffset;
+ nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
+
+ // Store second word : arguments given on stack
+ SDValue thirdStore =
+ DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, SV, nextOffset);
+ nextOffset += FrameOffset;
+ nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
+
+ // Store third word : arguments given in registers
+ return DAG.getStore(thirdStore, dl, FR, nextPtr, SV, nextOffset);
+
+}
+
+#include "PPCGenCallingConv.inc"
+
+/// GetFPR - Get the set of FP registers that should be allocated for arguments,
+/// depending on which subtarget is selected.
+static const unsigned *GetFPR(const PPCSubtarget &Subtarget) {
+ if (Subtarget.isMachoABI()) {
+ static const unsigned FPR[] = {
+ PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
+ };
+ return FPR;
+ }
+
+
+ static const unsigned FPR[] = {
+ PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8
+ };
+ return FPR;
+}
+
+/// CalculateStackSlotSize - Calculates the size reserved for this argument on
+/// the stack.
+static unsigned CalculateStackSlotSize(SDValue Arg, ISD::ArgFlagsTy Flags,
+ bool isVarArg, unsigned PtrByteSize) {
+ MVT ArgVT = Arg.getValueType();
+ unsigned ArgSize =ArgVT.getSizeInBits()/8;
+ if (Flags.isByVal())
+ ArgSize = Flags.getByValSize();
+ ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+
+ return ArgSize;
+}
+
+SDValue
+PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
+ SelectionDAG &DAG,
+ int &VarArgsFrameIndex,
+ int &VarArgsStackOffset,
+ unsigned &VarArgsNumGPR,
+ unsigned &VarArgsNumFPR,
+ const PPCSubtarget &Subtarget) {
+ // TODO: add description of PPC stack frame format, or at least some docs.
+ //
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ SmallVector<SDValue, 8> ArgValues;
+ SDValue Root = Op.getOperand(0);
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
+ DebugLoc dl = Op.getDebugLoc();
+
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = PtrVT == MVT::i64;
+ bool isMachoABI = Subtarget.isMachoABI();
+ bool isELF32_ABI = Subtarget.isELF32_ABI();
+ // Potential tail calls could cause overwriting of argument stack slots.
+ unsigned CC = MF.getFunction()->getCallingConv();
+ bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast));
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+ unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
+ // Area that is at least reserved in caller of this function.
+ unsigned MinReservedArea = ArgOffset;
+
+ static const unsigned GPR_32[] = { // 32-bit registers.
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ static const unsigned GPR_64[] = { // 64-bit registers.
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+
+ static const unsigned *FPR = GetFPR(Subtarget);
+
+ static const unsigned VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+
+ const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
+ const unsigned Num_FPR_Regs = isMachoABI ? 13 : 8;
+ const unsigned Num_VR_Regs = array_lengthof( VR);
+
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+
+ // In 32-bit non-varargs functions, the stack space for vectors is after the
+ // stack space for non-vectors. We do not use this space unless we have
+ // too many vectors to fit in registers, something that only occurs in
+ // constructed examples:), but we have to walk the arglist to figure
+ // that out...for the pathological case, compute VecArgOffset as the
+ // start of the vector parameter area. Computing VecArgOffset is the
+ // entire point of the following loop.
+ // Altivec is not mentioned in the ppc32 Elf Supplement, so I'm not trying
+ // to handle Elf here.
+ unsigned VecArgOffset = ArgOffset;
+ if (!isVarArg && !isPPC64) {
+ for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues()-1; ArgNo != e;
+ ++ArgNo) {
+ MVT ObjectVT = Op.getValue(ArgNo).getValueType();
+ unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+ ISD::ArgFlagsTy Flags =
+ cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags();
+
+ if (Flags.isByVal()) {
+ // ObjSize is the true size, ArgSize rounded up to multiple of regs.
+ ObjSize = Flags.getByValSize();
+ unsigned ArgSize =
+ ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ VecArgOffset += ArgSize;
+ continue;
+ }
+
+ switch(ObjectVT.getSimpleVT()) {
+ default: assert(0 && "Unhandled argument type!");
+ case MVT::i32:
+ case MVT::f32:
+ VecArgOffset += isPPC64 ? 8 : 4;
+ break;
+ case MVT::i64: // PPC64
+ case MVT::f64:
+ VecArgOffset += 8;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ // Nothing to do, we're only looking at Nonvector args here.
+ break;
+ }
+ }
+ }
+ // We've found where the vector parameter area in memory is. Skip the
+ // first 12 parameters; these don't use that memory.
+ VecArgOffset = ((VecArgOffset+15)/16)*16;
+ VecArgOffset += 12*16;
+
+ // Add DAG nodes to load the arguments or copy them out of registers. On
+ // entry to a function on PPC, the arguments start after the linkage area,
+ // although the first ones are often in registers.
+ //
+ // In the ELF 32 ABI, GPRs and stack are double word align: an argument
+ // represented with two words (long long or double) must be copied to an
+ // even GPR_idx value or to an even ArgOffset value.
+
+ SmallVector<SDValue, 8> MemOps;
+ unsigned nAltivecParamsAtEnd = 0;
+ for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
+ ArgNo != e; ++ArgNo) {
+ SDValue ArgVal;
+ bool needsLoad = false;
+ MVT ObjectVT = Op.getValue(ArgNo).getValueType();
+ unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+ unsigned ArgSize = ObjSize;
+ ISD::ArgFlagsTy Flags =
+ cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags();
+ // See if next argument requires stack alignment in ELF
+ bool Align = Flags.isSplit();
+
+ unsigned CurArgOffset = ArgOffset;
+
+ // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
+ if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
+ ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
+ if (isVarArg || isPPC64) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo),
+ Flags,
+ isVarArg,
+ PtrByteSize);
+ } else nAltivecParamsAtEnd++;
+ } else
+ // Calculate min reserved area.
+ MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo),
+ Flags,
+ isVarArg,
+ PtrByteSize);
+
+ // FIXME alignment for ELF may not be right
+ // FIXME the codegen can be much improved in some cases.
+ // We do not have to keep everything in memory.
+ if (Flags.isByVal()) {
+ // ObjSize is the true size, ArgSize rounded up to multiple of registers.
+ ObjSize = Flags.getByValSize();
+ ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ // Double word align in ELF
+ if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2);
+ // Objects of size 1 and 2 are right justified, everything else is
+ // left justified. This means the memory address is adjusted forwards.
+ if (ObjSize==1 || ObjSize==2) {
+ CurArgOffset = CurArgOffset + (4 - ObjSize);
+ }
+ // The value of the object is its address.
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgValues.push_back(FIN);
+ if (ObjSize==1 || ObjSize==2) {
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
+ RegInfo.addLiveIn(GPR[GPR_idx], VReg);
+ SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
+ SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
+ NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 );
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ if (isMachoABI) ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += PtrByteSize;
+ }
+ continue;
+ }
+ for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
+ // Store whatever pieces of the object are in registers
+ // to memory. ArgVal will be address of the beginning of
+ // the object.
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
+ RegInfo.addLiveIn(GPR[GPR_idx], VReg);
+ int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ if (isMachoABI) ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (ObjectVT.getSimpleVT()) {
+ default: assert(0 && "Unhandled argument type!");
+ case MVT::i32:
+ if (!isPPC64) {
+ // Double word align in ELF
+ if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2);
+
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
+ RegInfo.addLiveIn(GPR[GPR_idx], VReg);
+ ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32);
+ ++GPR_idx;
+ } else {
+ needsLoad = true;
+ ArgSize = PtrByteSize;
+ }
+ // Stack align in ELF
+ if (needsLoad && Align && isELF32_ABI)
+ ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
+ // All int arguments reserve stack space in Macho ABI.
+ if (isMachoABI || needsLoad) ArgOffset += PtrByteSize;
+ break;
+ }
+ // FALLTHROUGH
+ case MVT::i64: // PPC64
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+ RegInfo.addLiveIn(GPR[GPR_idx], VReg);
+ ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i64);
+
+ if (ObjectVT == MVT::i32) {
+ // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
+ // value to MVT::i64 and then truncate to the correct register size.
+ if (Flags.isSExt())
+ ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
+ DAG.getValueType(ObjectVT));
+ else if (Flags.isZExt())
+ ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
+ DAG.getValueType(ObjectVT));
+
+ ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
+ }
+
+ ++GPR_idx;
+ } else {
+ needsLoad = true;
+ ArgSize = PtrByteSize;
+ }
+ // All int arguments reserve stack space in Macho ABI.
+ if (isMachoABI || needsLoad) ArgOffset += 8;
+ break;
+
+ case MVT::f32:
+ case MVT::f64:
+ // Every 4 bytes of argument space consumes one of the GPRs available for
+ // argument passing.
+ if (GPR_idx != Num_GPR_Regs && isMachoABI) {
+ ++GPR_idx;
+ if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
+ ++GPR_idx;
+ }
+ if (FPR_idx != Num_FPR_Regs) {
+ unsigned VReg;
+ if (ObjectVT == MVT::f32)
+ VReg = RegInfo.createVirtualRegister(&PPC::F4RCRegClass);
+ else
+ VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
+ RegInfo.addLiveIn(FPR[FPR_idx], VReg);
+ ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
+ ++FPR_idx;
+ } else {
+ needsLoad = true;
+ }
+
+ // Stack align in ELF
+ if (needsLoad && Align && isELF32_ABI)
+ ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
+ // All FP arguments reserve stack space in Macho ABI.
+ if (isMachoABI || needsLoad) ArgOffset += isPPC64 ? 8 : ObjSize;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ // Note that vector arguments in registers don't reserve stack space,
+ // except in varargs functions.
+ if (VR_idx != Num_VR_Regs) {
+ unsigned VReg = RegInfo.createVirtualRegister(&PPC::VRRCRegClass);
+ RegInfo.addLiveIn(VR[VR_idx], VReg);
+ ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
+ if (isVarArg) {
+ while ((ArgOffset % 16) != 0) {
+ ArgOffset += PtrByteSize;
+ if (GPR_idx != Num_GPR_Regs)
+ GPR_idx++;
+ }
+ ArgOffset += 16;
+ GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs);
+ }
+ ++VR_idx;
+ } else {
+ if (!isVarArg && !isPPC64) {
+ // Vectors go after all the nonvectors.
+ CurArgOffset = VecArgOffset;
+ VecArgOffset += 16;
+ } else {
+ // Vectors are aligned.
+ ArgOffset = ((ArgOffset+15)/16)*16;
+ CurArgOffset = ArgOffset;
+ ArgOffset += 16;
+ }
+ needsLoad = true;
+ }
+ break;
+ }
+
+ // We need to load the argument to a virtual register if we determined above
+ // that we ran out of physical registers of the appropriate type.
+ if (needsLoad) {
+ int FI = MFI->CreateFixedObject(ObjSize,
+ CurArgOffset + (ArgSize - ObjSize),
+ isImmutable);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0);
+ }
+
+ ArgValues.push_back(ArgVal);
+ }
+
+ // Set the size that is at least reserved in caller of this function. Tail
+ // call optimized function's reserved stack space needs to be aligned so that
+ // taking the difference between two stack areas will result in an aligned
+ // stack.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ // Add the Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += 16*nAltivecParamsAtEnd;
+ }
+ MinReservedArea =
+ std::max(MinReservedArea,
+ PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI));
+ unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
+ getStackAlignment();
+ unsigned AlignMask = TargetAlign-1;
+ MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
+ FI->setMinReservedArea(MinReservedArea);
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+
+ int depth;
+ if (isELF32_ABI) {
+ VarArgsNumGPR = GPR_idx;
+ VarArgsNumFPR = FPR_idx;
+
+ // Make room for Num_GPR_Regs, Num_FPR_Regs and for a possible frame
+ // pointer.
+ depth = -(Num_GPR_Regs * PtrVT.getSizeInBits()/8 +
+ Num_FPR_Regs * MVT(MVT::f64).getSizeInBits()/8 +
+ PtrVT.getSizeInBits()/8);
+
+ VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+ ArgOffset);
+
+ }
+ else
+ depth = ArgOffset;
+
+ VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+ depth);
+ SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+
+ // In ELF 32 ABI, the fixed integer arguments of a variadic function are
+ // stored to the VarArgsFrameIndex on the stack.
+ if (isELF32_ABI) {
+ for (GPR_idx = 0; GPR_idx != VarArgsNumGPR; ++GPR_idx) {
+ SDValue Val = DAG.getRegister(GPR[GPR_idx], PtrVT);
+ SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ // Increment the address by four for the next argument to store
+ SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+ }
+
+ // If this function is vararg, store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing the
+ // result of va_next.
+ for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
+ unsigned VReg;
+ if (isPPC64)
+ VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+ else
+ VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
+
+ RegInfo.addLiveIn(GPR[GPR_idx], VReg);
+ SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ // Increment the address by four for the next argument to store
+ SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+
+ // In ELF 32 ABI, the double arguments are stored to the VarArgsFrameIndex
+ // on the stack.
+ if (isELF32_ABI) {
+ for (FPR_idx = 0; FPR_idx != VarArgsNumFPR; ++FPR_idx) {
+ SDValue Val = DAG.getRegister(FPR[FPR_idx], MVT::f64);
+ SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ // Increment the address by eight for the next argument to store
+ SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
+ PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+
+ for (; FPR_idx != Num_FPR_Regs; ++FPR_idx) {
+ unsigned VReg;
+ VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
+
+ RegInfo.addLiveIn(FPR[FPR_idx], VReg);
+ SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::f64);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ // Increment the address by eight for the next argument to store
+ SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
+ PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+ }
+ }
+
+ if (!MemOps.empty())
+ Root = DAG.getNode(ISD::TokenFactor, dl,
+ MVT::Other, &MemOps[0], MemOps.size());
+
+ ArgValues.push_back(Root);
+
+ // Return the new list of results.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
+ &ArgValues[0], ArgValues.size());
+}
+
+/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus
+/// linkage area.
+static unsigned
+CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
+ bool isPPC64,
+ bool isMachoABI,
+ bool isVarArg,
+ unsigned CC,
+ CallSDNode *TheCall,
+ unsigned &nAltivecParamsAtEnd) {
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, and parameter passing area. We start with 24/48 bytes, which is
+ // prereserved space for [SP][CR][LR][3 x unused].
+ unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
+ unsigned NumOps = TheCall->getNumArgs();
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+ // Add up all the space actually used.
+ // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
+ // they all go in registers, but we must reserve stack space for them for
+ // possible use by the caller. In varargs or 64-bit calls, parameters are
+ // assigned stack space in order, with padding so Altivec parameters are
+ // 16-byte aligned.
+ nAltivecParamsAtEnd = 0;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDValue Arg = TheCall->getArg(i);
+ ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
+ MVT ArgVT = Arg.getValueType();
+ // Varargs Altivec parameters are padded to a 16 byte boundary.
+ if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
+ ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
+ if (!isVarArg && !isPPC64) {
+ // Non-varargs Altivec parameters go after all the non-Altivec
+ // parameters; handle those later so we know how much padding we need.
+ nAltivecParamsAtEnd++;
+ continue;
+ }
+ // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
+ NumBytes = ((NumBytes+15)/16)*16;
+ }
+ NumBytes += CalculateStackSlotSize(Arg, Flags, isVarArg, PtrByteSize);
+ }
+
+ // Allow for Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ NumBytes = ((NumBytes+15)/16)*16;
+ NumBytes += 16*nAltivecParamsAtEnd;
+ }
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ NumBytes = std::max(NumBytes,
+ PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI));
+
+ // Tail call needs the stack to be aligned.
+ if (CC==CallingConv::Fast && PerformTailCallOpt) {
+ unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
+ getStackAlignment();
+ unsigned AlignMask = TargetAlign-1;
+ NumBytes = (NumBytes + AlignMask) & ~AlignMask;
+ }
+
+ return NumBytes;
+}
+
+/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
+/// adjusted to accomodate the arguments for the tailcall.
+static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool IsTailCall,
+ unsigned ParamSize) {
+
+ if (!IsTailCall) return 0;
+
+ PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
+ unsigned CallerMinReservedArea = FI->getMinReservedArea();
+ int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
+ // Remember only if the new adjustement is bigger.
+ if (SPDiff < FI->getTailCallSPDelta())
+ FI->setTailCallSPDelta(SPDiff);
+
+ return SPDiff;
+}
+
+/// IsEligibleForTailCallElimination - Check to see whether the next instruction
+/// following the call is a return. A function is eligible if caller/callee
+/// calling conventions match, currently only fastcc supports tail calls, and
+/// the function CALL is immediatly followed by a RET.
+bool
+PPCTargetLowering::IsEligibleForTailCallOptimization(CallSDNode *TheCall,
+ SDValue Ret,
+ SelectionDAG& DAG) const {
+ // Variable argument functions are not supported.
+ if (!PerformTailCallOpt || TheCall->isVarArg())
+ return false;
+
+ if (CheckTailCallReturnConstraints(TheCall, Ret)) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned CallerCC = MF.getFunction()->getCallingConv();
+ unsigned CalleeCC = TheCall->getCallingConv();
+ if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
+ // Functions containing by val parameters are not supported.
+ for (unsigned i = 0; i != TheCall->getNumArgs(); i++) {
+ ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
+ if (Flags.isByVal()) return false;
+ }
+
+ SDValue Callee = TheCall->getCallee();
+ // Non PIC/GOT tail calls are supported.
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+ return true;
+
+ // At the moment we can only do local tail calls (in same module, hidden
+ // or protected) if we are generating PIC.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ return G->getGlobal()->hasHiddenVisibility()
+ || G->getGlobal()->hasProtectedVisibility();
+ }
+ }
+
+ return false;
+}
+
+/// isCallCompatibleAddress - Return the immediate to use if the specified
+/// 32-bit value is representable in the immediate field of a BxA instruction.
+static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C) return 0;
+
+ int Addr = C->getZExtValue();
+ if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
+ (Addr << 6 >> 6) != Addr)
+ return 0; // Top 6 bits have to be sext of immediate.
+
+ return DAG.getConstant((int)C->getZExtValue() >> 2,
+ DAG.getTargetLoweringInfo().getPointerTy()).getNode();
+}
+
+namespace {
+
+struct TailCallArgumentInfo {
+ SDValue Arg;
+ SDValue FrameIdxOp;
+ int FrameIdx;
+
+ TailCallArgumentInfo() : FrameIdx(0) {}
+};
+
+}
+
+/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
+static void
+StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
+ SDValue Chain,
+ const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
+ SmallVector<SDValue, 8> &MemOpChains,
+ DebugLoc dl) {
+ for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
+ SDValue Arg = TailCallArgs[i].Arg;
+ SDValue FIN = TailCallArgs[i].FrameIdxOp;
+ int FI = TailCallArgs[i].FrameIdx;
+ // Store relative to framepointer.
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
+ PseudoSourceValue::getFixedStack(FI),
+ 0));
+ }
+}
+
+/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
+/// the appropriate stack slot for the tail call optimized function call.
+static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
+ MachineFunction &MF,
+ SDValue Chain,
+ SDValue OldRetAddr,
+ SDValue OldFP,
+ int SPDiff,
+ bool isPPC64,
+ bool isMachoABI,
+ DebugLoc dl) {
+ if (SPDiff) {
+ // Calculate the new stack slot for the return address.
+ int SlotSize = isPPC64 ? 8 : 4;
+ int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64,
+ isMachoABI);
+ int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
+ NewRetAddrLoc);
+ int NewFPLoc = SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64,
+ isMachoABI);
+ int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc);
+
+ MVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+ SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
+ Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
+ PseudoSourceValue::getFixedStack(NewRetAddr), 0);
+ SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
+ Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
+ PseudoSourceValue::getFixedStack(NewFPIdx), 0);
+ }
+ return Chain;
+}
+
+/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
+/// the position of the argument.
+static void
+CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
+ SDValue Arg, int SPDiff, unsigned ArgOffset,
+ SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
+ int Offset = ArgOffset + SPDiff;
+ uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
+ int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
+ MVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+ SDValue FIN = DAG.getFrameIndex(FI, VT);
+ TailCallArgumentInfo Info;
+ Info.Arg = Arg;
+ Info.FrameIdxOp = FIN;
+ Info.FrameIdx = FI;
+ TailCallArguments.push_back(Info);
+}
+
+/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
+/// stack slot. Returns the chain as result and the loaded frame pointers in
+/// LROpOut/FPOpout. Used when tail calling.
+SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
+ int SPDiff,
+ SDValue Chain,
+ SDValue &LROpOut,
+ SDValue &FPOpOut,
+ DebugLoc dl) {
+ if (SPDiff) {
+ // Load the LR and FP stack slot for later adjusting.
+ MVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
+ LROpOut = getReturnAddrFrameIndex(DAG);
+ LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0);
+ Chain = SDValue(LROpOut.getNode(), 1);
+ FPOpOut = getFramePointerFrameIndex(DAG);
+ FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0);
+ Chain = SDValue(FPOpOut.getNode(), 1);
+ }
+ return Chain;
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" of size "Size". Alignment information is
+/// specified by the specific parameter attribute. The copy will be passed as
+/// a byval function parameter.
+/// Sometimes what we are copying is the end of a larger object, the part that
+/// does not fit in registers.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+ unsigned Size, DebugLoc dl) {
+ SDValue SizeNode = DAG.getConstant(Size, MVT::i32);
+ return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+ false, NULL, 0, NULL, 0);
+}
+
+/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
+/// tail calls.
+static void
+LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
+ SDValue Arg, SDValue PtrOff, int SPDiff,
+ unsigned ArgOffset, bool isPPC64, bool isTailCall,
+ bool isVector, SmallVector<SDValue, 8> &MemOpChains,
+ SmallVector<TailCallArgumentInfo, 8>& TailCallArguments,
+ DebugLoc dl) {
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ if (!isTailCall) {
+ if (isVector) {
+ SDValue StackPtr;
+ if (isPPC64)
+ StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+ else
+ StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+ DAG.getConstant(ArgOffset, PtrVT));
+ }
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+ // Calculate and remember argument location.
+ } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
+ TailCallArguments);
+}
+
+SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget,
+ TargetMachine &TM) {
+ CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+ SDValue Chain = TheCall->getChain();
+ bool isVarArg = TheCall->isVarArg();
+ unsigned CC = TheCall->getCallingConv();
+ bool isTailCall = TheCall->isTailCall()
+ && CC == CallingConv::Fast && PerformTailCallOpt;
+ SDValue Callee = TheCall->getCallee();
+ unsigned NumOps = TheCall->getNumArgs();
+ DebugLoc dl = TheCall->getDebugLoc();
+
+ bool isMachoABI = Subtarget.isMachoABI();
+ bool isELF32_ABI = Subtarget.isELF32_ABI();
+
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = PtrVT == MVT::i64;
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // args_to_use will accumulate outgoing args for the PPCISD::CALL case in
+ // SelectExpr to use to put the arguments in the appropriate registers.
+ std::vector<SDValue> args_to_use;
+
+ // Mark this function as potentially containing a function that contains a
+ // tail call. As a consequence the frame pointer will be used for dynamicalloc
+ // and restoring the callers stack pointer in this functions epilog. This is
+ // done because by tail calling the called function might overwrite the value
+ // in this function's (MF) stack pointer stack slot 0(SP).
+ if (PerformTailCallOpt && CC==CallingConv::Fast)
+ MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+ unsigned nAltivecParamsAtEnd = 0;
+
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, and parameter passing area. We start with 24/48 bytes, which is
+ // prereserved space for [SP][CR][LR][3 x unused].
+ unsigned NumBytes =
+ CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isMachoABI, isVarArg, CC,
+ TheCall, nAltivecParamsAtEnd);
+
+ // Calculate by how many bytes the stack has to be adjusted in case of tail
+ // call optimization.
+ int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ SDValue CallSeqStart = Chain;
+
+ // Load the return address and frame pointer so it can be move somewhere else
+ // later.
+ SDValue LROp, FPOp;
+ Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
+
+ // Set up a copy of the stack pointer for use loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ SDValue StackPtr;
+ if (isPPC64)
+ StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+ else
+ StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+
+ // Figure out which arguments are going to go in registers, and which in
+ // memory. Also, if this is a vararg function, floating point operations
+ // must be stored to our stack, and loaded into integer regs as well, if
+ // any integer regs are available for argument passing.
+ unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ static const unsigned GPR_32[] = { // 32-bit registers.
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ static const unsigned GPR_64[] = { // 64-bit registers.
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+ static const unsigned *FPR = GetFPR(Subtarget);
+
+ static const unsigned VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+ const unsigned NumGPRs = array_lengthof(GPR_32);
+ const unsigned NumFPRs = isMachoABI ? 13 : 8;
+ const unsigned NumVRs = array_lengthof( VR);
+
+ const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+
+ std::vector<std::pair<unsigned, SDValue> > RegsToPass;
+ SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+
+ SmallVector<SDValue, 8> MemOpChains;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ bool inMem = false;
+ SDValue Arg = TheCall->getArg(i);
+ ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
+ // See if next argument requires stack alignment in ELF
+ bool Align = Flags.isSplit();
+
+ // PtrOff will be used to store the current argument to the stack if a
+ // register cannot be found for it.
+ SDValue PtrOff;
+
+ // Stack align in ELF 32
+ if (isELF32_ABI && Align)
+ PtrOff = DAG.getConstant(ArgOffset + ((ArgOffset/4) % 2) * PtrByteSize,
+ StackPtr.getValueType());
+ else
+ PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+
+ // On PPC64, promote integers to 64-bit values.
+ if (isPPC64 && Arg.getValueType() == MVT::i32) {
+ // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
+ unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
+ }
+
+ // FIXME Elf untested, what are alignment rules?
+ // FIXME memcpy is used way more than necessary. Correctness first.
+ if (Flags.isByVal()) {
+ unsigned Size = Flags.getByValSize();
+ if (isELF32_ABI && Align) GPR_idx += (GPR_idx % 2);
+ if (Size==1 || Size==2) {
+ // Very small objects are passed right-justified.
+ // Everything else is passed left-justified.
+ MVT VT = (Size==1) ? MVT::i8 : MVT::i16;
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
+ NULL, 0, VT);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ if (isMachoABI)
+ ArgOffset += PtrByteSize;
+ } else {
+ SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType());
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
+ CallSeqStart.getNode()->getOperand(0),
+ Flags, DAG, Size, dl);
+ // This must go outside the CALLSEQ_START..END.
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.getNode()->getOperand(1));
+ DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+ NewCallSeqStart.getNode());
+ Chain = CallSeqStart = NewCallSeqStart;
+ ArgOffset += PtrByteSize;
+ }
+ continue;
+ }
+ // Copy entire object into memory. There are cases where gcc-generated
+ // code assumes it is there, even if it could be put entirely into
+ // registers. (This is not what the doc says.)
+ SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
+ CallSeqStart.getNode()->getOperand(0),
+ Flags, DAG, Size, dl);
+ // This must go outside the CALLSEQ_START..END.
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.getNode()->getOperand(1));
+ DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode());
+ Chain = CallSeqStart = NewCallSeqStart;
+ // And copy the pieces of it that fit into registers.
+ for (unsigned j=0; j<Size; j+=PtrByteSize) {
+ SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
+ SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ if (isMachoABI)
+ ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (Arg.getValueType().getSimpleVT()) {
+ default: assert(0 && "Unexpected ValueType for argument!");
+ case MVT::i32:
+ case MVT::i64:
+ // Double word align in ELF
+ if (isELF32_ABI && Align) GPR_idx += (GPR_idx % 2);
+ if (GPR_idx != NumGPRs) {
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+ } else {
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+ inMem = true;
+ }
+ if (inMem || isMachoABI) {
+ // Stack align in ELF
+ if (isELF32_ABI && Align)
+ ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
+
+ ArgOffset += PtrByteSize;
+ }
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ if (FPR_idx != NumFPRs) {
+ RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
+
+ if (isVarArg) {
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0);
+ MemOpChains.push_back(Store);
+
+ // Float varargs are always shadowed in available integer registers
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
+ Load));
+ }
+ if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
+ SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
+ Load));
+ }
+ } else {
+ // If we have any FPRs remaining, we may also have GPRs remaining.
+ // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
+ // GPRs.
+ if (isMachoABI) {
+ if (GPR_idx != NumGPRs)
+ ++GPR_idx;
+ if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
+ !isPPC64) // PPC64 has 64-bit GPR's obviously :)
+ ++GPR_idx;
+ }
+ }
+ } else {
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+ inMem = true;
+ }
+ if (inMem || isMachoABI) {
+ // Stack align in ELF
+ if (isELF32_ABI && Align)
+ ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
+ if (isPPC64)
+ ArgOffset += 8;
+ else
+ ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
+ }
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ if (isVarArg) {
+ // These go aligned on the stack, or in the corresponding R registers
+ // when within range. The Darwin PPC ABI doc claims they also go in
+ // V registers; in fact gcc does this only for arguments that are
+ // prototyped, not for those that match the ... We do it for all
+ // arguments, seems to work.
+ while (ArgOffset % 16 !=0) {
+ ArgOffset += PtrByteSize;
+ if (GPR_idx != NumGPRs)
+ GPR_idx++;
+ }
+ // We could elide this store in the case where the object fits
+ // entirely in R registers. Maybe later.
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+ DAG.getConstant(ArgOffset, PtrVT));
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0);
+ MemOpChains.push_back(Store);
+ if (VR_idx != NumVRs) {
+ SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, NULL, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
+ }
+ ArgOffset += 16;
+ for (unsigned i=0; i<16; i+=PtrByteSize) {
+ if (GPR_idx == NumGPRs)
+ break;
+ SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
+ DAG.getConstant(i, PtrVT));
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, NULL, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ break;
+ }
+
+ // Non-varargs Altivec params generally go in registers, but have
+ // stack space allocated at the end.
+ if (VR_idx != NumVRs) {
+ // Doesn't have GPR space allocated.
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
+ } else if (nAltivecParamsAtEnd==0) {
+ // We are emitting Altivec params in order.
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ ArgOffset += 16;
+ }
+ break;
+ }
+ }
+ // If all Altivec parameters fit in registers, as they usually do,
+ // they get stack space following the non-Altivec parameters. We
+ // don't track this here because nobody below needs it.
+ // If there are more Altivec parameters than fit in registers emit
+ // the stores here.
+ if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
+ unsigned j = 0;
+ // Offset is aligned; skip 1st 12 params which go in V registers.
+ ArgOffset = ((ArgOffset+15)/16)*16;
+ ArgOffset += 12*16;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDValue Arg = TheCall->getArg(i);
+ MVT ArgType = Arg.getValueType();
+ if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
+ ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
+ if (++j > NumVRs) {
+ SDValue PtrOff;
+ // We are emitting Altivec params in order.
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ ArgOffset += 16;
+ }
+ }
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // With the ELF 32 ABI, set CR6 to true if this is a vararg call.
+ if (isVarArg && isELF32_ABI) {
+ SDValue SetCR(DAG.getTargetNode(PPC::CRSET, dl, MVT::i32), 0);
+ Chain = DAG.getCopyToReg(Chain, dl, PPC::CR1EQ, SetCR, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // Emit a sequence of copyto/copyfrom virtual registers for arguments that
+ // might overwrite each other in case of tail call optimization.
+ if (isTailCall) {
+ SmallVector<SDValue, 8> MemOpChains2;
+ // Do not flag preceeding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
+ MemOpChains2, dl);
+ if (!MemOpChains2.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains2[0], MemOpChains2.size());
+
+ // Store the return address to the appropriate stack slot.
+ Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
+ isPPC64, isMachoABI, dl);
+ }
+
+ // Emit callseq_end just before tailcall node.
+ if (isTailCall) {
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ std::vector<MVT> NodeTys;
+ NodeTys.push_back(MVT::Other); // Returns a chain
+ NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
+
+ SmallVector<SDValue, 8> Ops;
+ unsigned CallOpc = isMachoABI? PPCISD::CALL_Macho : PPCISD::CALL_ELF;
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType());
+ else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType());
+ else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
+ // If this is an absolute destination address, use the munged value.
+ Callee = SDValue(Dest, 0);
+ else {
+ // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
+ // to do the call, we can't use PPCISD::CALL.
+ SDValue MTCTROps[] = {Chain, Callee, InFlag};
+ Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,
+ 2 + (InFlag.getNode() != 0));
+ InFlag = Chain.getValue(1);
+
+ // Copy the callee address into R12/X12 on darwin.
+ if (isMachoABI) {
+ unsigned Reg = Callee.getValueType() == MVT::i32 ? PPC::R12 : PPC::X12;
+ Chain = DAG.getCopyToReg(Chain, dl, Reg, Callee, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::Other);
+ NodeTys.push_back(MVT::Flag);
+ Ops.push_back(Chain);
+ CallOpc = isMachoABI ? PPCISD::BCTRL_Macho : PPCISD::BCTRL_ELF;
+ Callee.setNode(0);
+ // Add CTR register as callee so a bctr can be emitted later.
+ if (isTailCall)
+ Ops.push_back(DAG.getRegister(PPC::CTR, getPointerTy()));
+ }
+
+ // If this is a direct call, pass the chain and the callee.
+ if (Callee.getNode()) {
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+ }
+ // If this is a tail call add stack pointer delta.
+ if (isTailCall)
+ Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // When performing tail call optimization the callee pops its arguments off
+ // the stack. Account for this here so these bytes can be pushed back on in
+ // PPCRegisterInfo::eliminateCallFramePseudoInstr.
+ int BytesCalleePops =
+ (CC==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0;
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ // Emit tail call.
+ if (isTailCall) {
+ assert(InFlag.getNode() &&
+ "Flag must be set. Depend on flag being set in LowerRET");
+ Chain = DAG.getNode(PPCISD::TAILCALL, dl,
+ TheCall->getVTList(), &Ops[0], Ops.size());
+ return SDValue(Chain.getNode(), Op.getResNo());
+ }
+
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(BytesCalleePops, true),
+ InFlag);
+ if (TheCall->getValueType(0) != MVT::Other)
+ InFlag = Chain.getValue(1);
+
+ SmallVector<SDValue, 16> ResultVals;
+ SmallVector<CCValAssign, 16> RVLocs;
+ unsigned CallerCC = DAG.getMachineFunction().getFunction()->getCallingConv();
+ CCState CCInfo(CallerCC, isVarArg, TM, RVLocs);
+ CCInfo.AnalyzeCallResult(TheCall, RetCC_PPC);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+ CCValAssign &VA = RVLocs[i];
+ MVT VT = VA.getValVT();
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ Chain = DAG.getCopyFromReg(Chain, dl,
+ VA.getLocReg(), VT, InFlag).getValue(1);
+ ResultVals.push_back(Chain.getValue(0));
+ InFlag = Chain.getValue(2);
+ }
+
+ // If the function returns void, just return the chain.
+ if (RVLocs.empty())
+ return Chain;
+
+ // Otherwise, merge everything together with a MERGE_VALUES node.
+ ResultVals.push_back(Chain);
+ SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
+ &ResultVals[0], ResultVals.size());
+ return Res.getValue(Op.getResNo());
+}
+
+SDValue PPCTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG,
+ TargetMachine &TM) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+ DebugLoc dl = Op.getDebugLoc();
+ CCState CCInfo(CC, isVarArg, TM, RVLocs);
+ CCInfo.AnalyzeReturn(Op.getNode(), RetCC_PPC);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Chain = Op.getOperand(0);
+
+ Chain = GetPossiblePreceedingTailCall(Chain, PPCISD::TAILCALL);
+ if (Chain.getOpcode() == PPCISD::TAILCALL) {
+ SDValue TailCall = Chain;
+ SDValue TargetAddress = TailCall.getOperand(1);
+ SDValue StackAdjustment = TailCall.getOperand(2);
+
+ assert(((TargetAddress.getOpcode() == ISD::Register &&
+ cast<RegisterSDNode>(TargetAddress)->getReg() == PPC::CTR) ||
+ TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
+ TargetAddress.getOpcode() == ISD::TargetGlobalAddress ||
+ isa<ConstantSDNode>(TargetAddress)) &&
+ "Expecting an global address, external symbol, absolute value or register");
+
+ assert(StackAdjustment.getOpcode() == ISD::Constant &&
+ "Expecting a const value");
+
+ SmallVector<SDValue,8> Operands;
+ Operands.push_back(Chain.getOperand(0));
+ Operands.push_back(TargetAddress);
+ Operands.push_back(StackAdjustment);
+ // Copy registers used by the call. Last operand is a flag so it is not
+ // copied.
+ for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) {
+ Operands.push_back(Chain.getOperand(i));
+ }
+ return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Operands[0],
+ Operands.size());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ Op.getOperand(i*2+1), Flag);
+ Flag = Chain.getValue(1);
+ }
+
+ if (Flag.getNode())
+ return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+ else
+ return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) {
+ // When we pop the dynamic allocation we need to restore the SP link.
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Get the corect type for pointers.
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Construct the stack pointer operand.
+ bool IsPPC64 = Subtarget.isPPC64();
+ unsigned SP = IsPPC64 ? PPC::X1 : PPC::R1;
+ SDValue StackPtr = DAG.getRegister(SP, PtrVT);
+
+ // Get the operands for the STACKRESTORE.
+ SDValue Chain = Op.getOperand(0);
+ SDValue SaveSP = Op.getOperand(1);
+
+ // Load the old link SP.
+ SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, NULL, 0);
+
+ // Restore the stack pointer.
+ Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
+
+ // Store the old link SP.
+ return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, NULL, 0);
+}
+
+
+
+SDValue
+PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool IsPPC64 = PPCSubTarget.isPPC64();
+ bool isMachoABI = PPCSubTarget.isMachoABI();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Get current frame pointer save index. The users of this index will be
+ // primarily DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int RASI = FI->getReturnAddrSaveIndex();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!RASI) {
+ // Find out what the fix offset of the frame pointer save area.
+ int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isMachoABI);
+ // Allocate the frame index for frame pointer save area.
+ RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset);
+ // Save the result.
+ FI->setReturnAddrSaveIndex(RASI);
+ }
+ return DAG.getFrameIndex(RASI, PtrVT);
+}
+
+SDValue
+PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool IsPPC64 = PPCSubTarget.isPPC64();
+ bool isMachoABI = PPCSubTarget.isMachoABI();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Get current frame pointer save index. The users of this index will be
+ // primarily DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int FPSI = FI->getFramePointerSaveIndex();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!FPSI) {
+ // Find out what the fix offset of the frame pointer save area.
+ int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isMachoABI);
+
+ // Allocate the frame index for frame pointer save area.
+ FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset);
+ // Save the result.
+ FI->setFramePointerSaveIndex(FPSI);
+ }
+ return DAG.getFrameIndex(FPSI, PtrVT);
+}
+
+SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) {
+ // Get the inputs.
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Get the corect type for pointers.
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ // Negate the size.
+ SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
+ DAG.getConstant(0, PtrVT), Size);
+ // Construct a node for the frame pointer save index.
+ SDValue FPSIdx = getFramePointerFrameIndex(DAG);
+ // Build a DYNALLOC node.
+ SDValue Ops[3] = { Chain, NegSize, FPSIdx };
+ SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
+ return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
+}
+
+/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
+/// possible.
+SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+ // Not FP? Not a fsel.
+ if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
+ !Op.getOperand(2).getValueType().isFloatingPoint())
+ return Op;
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+
+ // Cannot handle SETEQ/SETNE.
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op;
+
+ MVT ResVT = Op.getValueType();
+ MVT CmpVT = Op.getOperand(0).getValueType();
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+ SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // If the RHS of the comparison is a 0.0, we don't need to do the
+ // subtraction at all.
+ if (isFloatingPointZero(RHS))
+ switch (CC) {
+ default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETULT:
+ case ISD::SETLT:
+ std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
+ LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
+ case ISD::SETUGT:
+ case ISD::SETGT:
+ std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
+ case ISD::SETOLE:
+ case ISD::SETLE:
+ if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
+ LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+ DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
+ }
+
+ SDValue Cmp;
+ switch (CC) {
+ default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETULT:
+ case ISD::SETLT:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ case ISD::SETUGT:
+ case ISD::SETGT:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+ case ISD::SETOLE:
+ case ISD::SETLE:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ }
+ return Op;
+}
+
+// FIXME: Split this code up when LegalizeDAGTypes lands.
+SDValue PPCTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG,
+ DebugLoc dl) {
+ assert(Op.getOperand(0).getValueType().isFloatingPoint());
+ SDValue Src = Op.getOperand(0);
+ if (Src.getValueType() == MVT::f32)
+ Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
+
+ SDValue Tmp;
+ switch (Op.getValueType().getSimpleVT()) {
+ default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");
+ case MVT::i32:
+ Tmp = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Src);
+ break;
+ case MVT::i64:
+ Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src);
+ break;
+ }
+
+ // Convert the FP value to an int value through memory.
+ SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
+
+ // Emit a store to the stack slot.
+ SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, NULL, 0);
+
+ // Result is a load from the stack slot. If loading 4 bytes, make sure to
+ // add in a bias.
+ if (Op.getValueType() == MVT::i32)
+ FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
+ DAG.getConstant(4, FIPtr.getValueType()));
+ return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, NULL, 0);
+}
+
+SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Don't handle ppc_fp128 here; let it be lowered to a libcall.
+ if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
+ return SDValue();
+
+ if (Op.getOperand(0).getValueType() == MVT::i64) {
+ SDValue Bits = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::f64, Op.getOperand(0));
+ SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
+ if (Op.getValueType() == MVT::f32)
+ FP = DAG.getNode(ISD::FP_ROUND, dl,
+ MVT::f32, FP, DAG.getIntPtrConstant(0));
+ return FP;
+ }
+
+ assert(Op.getOperand(0).getValueType() == MVT::i32 &&
+ "Unhandled SINT_TO_FP type in custom expander!");
+ // Since we only generate this in 64-bit mode, we can take advantage of
+ // 64-bit registers. In particular, sign extend the input value into the
+ // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
+ // then lfd it and fcfid it.
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(8, 8);
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32,
+ Op.getOperand(0));
+
+ // STD the extended value into the stack slot.
+ MachineMemOperand MO(PseudoSourceValue::getFixedStack(FrameIdx),
+ MachineMemOperand::MOStore, 0, 8, 8);
+ SDValue Store = DAG.getNode(PPCISD::STD_32, dl, MVT::Other,
+ DAG.getEntryNode(), Ext64, FIdx,
+ DAG.getMemOperand(MO));
+ // Load the value as a double.
+ SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0);
+
+ // FCFID it and return it.
+ SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
+ if (Op.getValueType() == MVT::f32)
+ FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
+ return FP;
+}
+
+SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ /*
+ The rounding mode is in bits 30:31 of FPSR, and has the following
+ settings:
+ 00 Round to nearest
+ 01 Round to 0
+ 10 Round to +inf
+ 11 Round to -inf
+
+ FLT_ROUNDS, on the other hand, expects the following:
+ -1 Undefined
+ 0 Round to 0
+ 1 Round to nearest
+ 2 Round to +inf
+ 3 Round to -inf
+
+ To perform the conversion, we do:
+ ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
+ */
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MVT VT = Op.getValueType();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ std::vector<MVT> NodeTys;
+ SDValue MFFSreg, InFlag;
+
+ // Save FP Control Word to register
+ NodeTys.push_back(MVT::f64); // return register
+ NodeTys.push_back(MVT::Flag); // unused in this context
+ SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
+
+ // Save FP register to stack slot
+ int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
+ StackSlot, NULL, 0);
+
+ // Load FP Control Word from low 32 bits of stack slot.
+ SDValue Four = DAG.getConstant(4, PtrVT);
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
+ SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, NULL, 0);
+
+ // Transform as necessary
+ SDValue CWD1 =
+ DAG.getNode(ISD::AND, dl, MVT::i32,
+ CWD, DAG.getConstant(3, MVT::i32));
+ SDValue CWD2 =
+ DAG.getNode(ISD::SRL, dl, MVT::i32,
+ DAG.getNode(ISD::AND, dl, MVT::i32,
+ DAG.getNode(ISD::XOR, dl, MVT::i32,
+ CWD, DAG.getConstant(3, MVT::i32)),
+ DAG.getConstant(3, MVT::i32)),
+ DAG.getConstant(1, MVT::i32));
+
+ SDValue RetVal =
+ DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
+
+ return DAG.getNode((VT.getSizeInBits() < 16 ?
+ ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
+}
+
+SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SHL!");
+
+ // Expand into a bunch of logical ops. Note that these ops
+ // depend on the PPC behavior for oversized shift amounts.
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+ MVT AmtVT = Amt.getValueType();
+
+ SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+ DAG.getConstant(BitWidth, AmtVT), Amt);
+ SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
+ SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
+ SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
+ SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, AmtVT));
+ SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
+ SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
+ SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
+ SDValue OutOps[] = { OutLo, OutHi };
+ return DAG.getMergeValues(OutOps, 2, dl);
+}
+
+SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned BitWidth = VT.getSizeInBits();
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SRL!");
+
+ // Expand into a bunch of logical ops. Note that these ops
+ // depend on the PPC behavior for oversized shift amounts.
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+ MVT AmtVT = Amt.getValueType();
+
+ SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+ DAG.getConstant(BitWidth, AmtVT), Amt);
+ SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
+ SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
+ SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+ SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, AmtVT));
+ SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
+ SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
+ SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
+ SDValue OutOps[] = { OutLo, OutHi };
+ return DAG.getMergeValues(OutOps, 2, dl);
+}
+
+SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ MVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SRA!");
+
+ // Expand into a bunch of logical ops, followed by a select_cc.
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+ MVT AmtVT = Amt.getValueType();
+
+ SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+ DAG.getConstant(BitWidth, AmtVT), Amt);
+ SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
+ SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
+ SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+ SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, AmtVT));
+ SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
+ SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
+ SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
+ Tmp4, Tmp6, ISD::SETLE);
+ SDValue OutOps[] = { OutLo, OutHi };
+ return DAG.getMergeValues(OutOps, 2, dl);
+}
+
+//===----------------------------------------------------------------------===//
+// Vector related lowering.
+//
+
+/// BuildSplatI - Build a canonical splati of Val with an element size of
+/// SplatSize. Cast the result to VT.
+static SDValue BuildSplatI(int Val, unsigned SplatSize, MVT VT,
+ SelectionDAG &DAG, DebugLoc dl) {
+ assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
+
+ static const MVT VTys[] = { // canonical VT to use for each size.
+ MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
+ };
+
+ MVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
+
+ // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
+ if (Val == -1)
+ SplatSize = 1;
+
+ MVT CanonicalVT = VTys[SplatSize-1];
+
+ // Build a canonical splat for this value.
+ SDValue Elt = DAG.getConstant(Val, MVT::i32);
+ SmallVector<SDValue, 8> Ops;
+ Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
+ SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT,
+ &Ops[0], Ops.size());
+ return DAG.getNode(ISD::BIT_CONVERT, dl, ReqVT, Res);
+}
+
+/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, DebugLoc dl,
+ MVT DestVT = MVT::Other) {
+ if (DestVT == MVT::Other) DestVT = LHS.getValueType();
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(IID, MVT::i32), LHS, RHS);
+}
+
+/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
+ SDValue Op2, SelectionDAG &DAG,
+ DebugLoc dl, MVT DestVT = MVT::Other) {
+ if (DestVT == MVT::Other) DestVT = Op0.getValueType();
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
+}
+
+
+/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
+/// amount. The result has the specified value type.
+static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
+ MVT VT, SelectionDAG &DAG, DebugLoc dl) {
+ // Force LHS/RHS to be the right type.
+ LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, LHS);
+ RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, RHS);
+
+ int Ops[16];
+ for (unsigned i = 0; i != 16; ++i)
+ Ops[i] = i + Amt;
+ SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T);
+}
+
+// If this is a case we can't handle, return null and let the default
+// expansion code take care of it. If we CAN select this case, and if it
+// selects to a single instruction, return Op. Otherwise, if we can codegen
+// this case more efficiently than a constant pool load, lower it to the
+// sequence of ops that should be used.
+SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
+
+ // Check if this is a splat of a constant value.
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs) || SplatBitSize > 32)
+ return SDValue();
+
+ unsigned SplatBits = APSplatBits.getZExtValue();
+ unsigned SplatUndef = APSplatUndef.getZExtValue();
+ unsigned SplatSize = SplatBitSize / 8;
+
+ // First, handle single instruction cases.
+
+ // All zeros?
+ if (SplatBits == 0) {
+ // Canonicalize all zero vectors to be v4i32.
+ if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
+ SDValue Z = DAG.getConstant(0, MVT::i32);
+ Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
+ Op = DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Z);
+ }
+ return Op;
+ }
+
+ // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
+ int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
+ (32-SplatBitSize));
+ if (SextVal >= -16 && SextVal <= 15)
+ return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
+
+
+ // Two instruction sequences.
+
+ // If this value is in the range [-32,30] and is even, use:
+ // tmp = VSPLTI[bhw], result = add tmp, tmp
+ if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
+ SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl);
+ Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+ }
+
+ // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
+ // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
+ // for fneg/fabs.
+ if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
+ // Make -1 and vspltisw -1:
+ SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
+
+ // Make the VSLW intrinsic, computing 0x8000_0000.
+ SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
+ OnesV, DAG, dl);
+
+ // xor by OnesV to invert it.
+ Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+ }
+
+ // Check to see if this is a wide variety of vsplti*, binop self cases.
+ static const signed char SplatCsts[] = {
+ -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
+ -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
+ };
+
+ for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
+ // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
+ // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
+ int i = SplatCsts[idx];
+
+ // Figure out what shift amount will be used by altivec if shifted by i in
+ // this splat size.
+ unsigned TypeShiftAmt = i & (SplatBitSize-1);
+
+ // vsplti + shl self.
+ if (SextVal == (i << (int)TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
+ Intrinsic::ppc_altivec_vslw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+ }
+
+ // vsplti + srl self.
+ if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
+ Intrinsic::ppc_altivec_vsrw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+ }
+
+ // vsplti + sra self.
+ if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
+ Intrinsic::ppc_altivec_vsraw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+ }
+
+ // vsplti + rol self.
+ if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
+ ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
+ Intrinsic::ppc_altivec_vrlw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+ }
+
+ // t = vsplti c, result = vsldoi t, t, 1
+ if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
+ }
+ // t = vsplti c, result = vsldoi t, t, 2
+ if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
+ }
+ // t = vsplti c, result = vsldoi t, t, 3
+ if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
+ }
+ }
+
+ // Three instruction sequences.
+
+ // Odd, in range [17,31]: (vsplti C)-(vsplti -16).
+ if (SextVal >= 0 && SextVal <= 31) {
+ SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
+ SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
+ LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS);
+ }
+ // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16).
+ if (SextVal >= -31 && SextVal <= 0) {
+ SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
+ SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
+ LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS);
+ }
+
+ return SDValue();
+}
+
+/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
+/// the specified operations to build the shuffle.
+static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
+ SDValue RHS, SelectionDAG &DAG,
+ DebugLoc dl) {
+ unsigned OpNum = (PFEntry >> 26) & 0x0F;
+ unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
+ unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
+
+ enum {
+ OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+ OP_VMRGHW,
+ OP_VMRGLW,
+ OP_VSPLTISW0,
+ OP_VSPLTISW1,
+ OP_VSPLTISW2,
+ OP_VSPLTISW3,
+ OP_VSLDOI4,
+ OP_VSLDOI8,
+ OP_VSLDOI12
+ };
+
+ if (OpNum == OP_COPY) {
+ if (LHSID == (1*9+2)*9+3) return LHS;
+ assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
+ return RHS;
+ }
+
+ SDValue OpLHS, OpRHS;
+ OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
+ OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
+
+ int ShufIdxs[16];
+ switch (OpNum) {
+ default: assert(0 && "Unknown i32 permute!");
+ case OP_VMRGHW:
+ ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
+ ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
+ ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
+ ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
+ break;
+ case OP_VMRGLW:
+ ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
+ ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
+ ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
+ ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
+ break;
+ case OP_VSPLTISW0:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+0;
+ break;
+ case OP_VSPLTISW1:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+4;
+ break;
+ case OP_VSPLTISW2:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+8;
+ break;
+ case OP_VSPLTISW3:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+12;
+ break;
+ case OP_VSLDOI4:
+ return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
+ case OP_VSLDOI8:
+ return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
+ case OP_VSLDOI12:
+ return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
+ }
+ MVT VT = OpLHS.getValueType();
+ OpLHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpLHS);
+ OpRHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpRHS);
+ SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T);
+}
+
+/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
+/// is a shuffle we can handle in a single instruction, return it. Otherwise,
+/// return the code it can be lowered into. Worst case, it can always be
+/// lowered into a vperm.
+SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
+ SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ MVT VT = Op.getValueType();
+
+ // Cases that are handled by instructions that take permute immediates
+ // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
+ // selected by the instruction selector.
+ if (V2.getOpcode() == ISD::UNDEF) {
+ if (PPC::isSplatShuffleMask(SVOp, 1) ||
+ PPC::isSplatShuffleMask(SVOp, 2) ||
+ PPC::isSplatShuffleMask(SVOp, 4) ||
+ PPC::isVPKUWUMShuffleMask(SVOp, true) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, true) ||
+ PPC::isVSLDOIShuffleMask(SVOp, true) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, true) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, true) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, true) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, true) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, true) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, true)) {
+ return Op;
+ }
+ }
+
+ // Altivec has a variety of "shuffle immediates" that take two vector inputs
+ // and produce a fixed permutation. If any of these match, do not lower to
+ // VPERM.
+ if (PPC::isVPKUWUMShuffleMask(SVOp, false) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, false) ||
+ PPC::isVSLDOIShuffleMask(SVOp, false) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, false) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, false) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, false) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, false) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, false) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, false))
+ return Op;
+
+ // Check to see if this is a shuffle of 4-byte values. If so, we can use our
+ // perfect shuffle table to emit an optimal matching sequence.
+ SmallVector<int, 16> PermMask;
+ SVOp->getMask(PermMask);
+
+ unsigned PFIndexes[4];
+ bool isFourElementShuffle = true;
+ for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
+ unsigned EltNo = 8; // Start out undef.
+ for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
+ if (PermMask[i*4+j] < 0)
+ continue; // Undef, ignore it.
+
+ unsigned ByteSource = PermMask[i*4+j];
+ if ((ByteSource & 3) != j) {
+ isFourElementShuffle = false;
+ break;
+ }
+
+ if (EltNo == 8) {
+ EltNo = ByteSource/4;
+ } else if (EltNo != ByteSource/4) {
+ isFourElementShuffle = false;
+ break;
+ }
+ }
+ PFIndexes[i] = EltNo;
+ }
+
+ // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
+ // perfect shuffle vector to determine if it is cost effective to do this as
+ // discrete instructions, or whether we should use a vperm.
+ if (isFourElementShuffle) {
+ // Compute the index in the perfect shuffle table.
+ unsigned PFTableIndex =
+ PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+
+ unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+ unsigned Cost = (PFEntry >> 30);
+
+ // Determining when to avoid vperm is tricky. Many things affect the cost
+ // of vperm, particularly how many times the perm mask needs to be computed.
+ // For example, if the perm mask can be hoisted out of a loop or is already
+ // used (perhaps because there are multiple permutes with the same shuffle
+ // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
+ // the loop requires an extra register.
+ //
+ // As a compromise, we only emit discrete instructions if the shuffle can be
+ // generated in 3 or fewer operations. When we have loop information
+ // available, if this block is within a loop, we should avoid using vperm
+ // for 3-operation perms and use a constant pool load instead.
+ if (Cost < 3)
+ return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+ }
+
+ // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
+ // vector that will get spilled to the constant pool.
+ if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+
+ // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
+ // that it is in input element units, not in bytes. Convert now.
+ MVT EltVT = V1.getValueType().getVectorElementType();
+ unsigned BytesPerElement = EltVT.getSizeInBits()/8;
+
+ SmallVector<SDValue, 16> ResultMask;
+ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+ unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
+
+ for (unsigned j = 0; j != BytesPerElement; ++j)
+ ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
+ MVT::i32));
+ }
+
+ SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
+ &ResultMask[0], ResultMask.size());
+ return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);
+}
+
+/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
+/// altivec comparison. If it is, return true and fill in Opc/isDot with
+/// information about the intrinsic.
+static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
+ bool &isDot) {
+ unsigned IntrinsicID =
+ cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
+ CompareOpc = -1;
+ isDot = false;
+ switch (IntrinsicID) {
+ default: return false;
+ // Comparison predicates.
+ case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
+
+ // Normal Comparisons.
+ case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
+ }
+ return true;
+}
+
+/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
+/// lower, do it, otherwise return null.
+SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) {
+ // If this is a lowered altivec predicate compare, CompareOpc is set to the
+ // opcode number of the comparison.
+ DebugLoc dl = Op.getDebugLoc();
+ int CompareOpc;
+ bool isDot;
+ if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
+ return SDValue(); // Don't custom lower most intrinsics.
+
+ // If this is a non-dot comparison, make the VCMP node and we are done.
+ if (!isDot) {
+ SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
+ Op.getOperand(1), Op.getOperand(2),
+ DAG.getConstant(CompareOpc, MVT::i32));
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Tmp);
+ }
+
+ // Create the PPCISD altivec 'dot' comparison node.
+ SDValue Ops[] = {
+ Op.getOperand(2), // LHS
+ Op.getOperand(3), // RHS
+ DAG.getConstant(CompareOpc, MVT::i32)
+ };
+ std::vector<MVT> VTs;
+ VTs.push_back(Op.getOperand(2).getValueType());
+ VTs.push_back(MVT::Flag);
+ SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
+
+ // Now that we have the comparison, emit a copy from the CR to a GPR.
+ // This is flagged to the above dot comparison.
+ SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32,
+ DAG.getRegister(PPC::CR6, MVT::i32),
+ CompNode.getValue(1));
+
+ // Unpack the result based on how the target uses it.
+ unsigned BitNo; // Bit # of CR6.
+ bool InvertBit; // Invert result?
+ switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
+ default: // Can't happen, don't crash on invalid number though.
+ case 0: // Return the value of the EQ bit of CR6.
+ BitNo = 0; InvertBit = false;
+ break;
+ case 1: // Return the inverted value of the EQ bit of CR6.
+ BitNo = 0; InvertBit = true;
+ break;
+ case 2: // Return the value of the LT bit of CR6.
+ BitNo = 2; InvertBit = false;
+ break;
+ case 3: // Return the inverted value of the LT bit of CR6.
+ BitNo = 2; InvertBit = true;
+ break;
+ }
+
+ // Shift the bit into the low position.
+ Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
+ DAG.getConstant(8-(3-BitNo), MVT::i32));
+ // Isolate the bit.
+ Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
+ DAG.getConstant(1, MVT::i32));
+
+ // If we are supposed to, toggle the bit.
+ if (InvertBit)
+ Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
+ DAG.getConstant(1, MVT::i32));
+ return Flags;
+}
+
+SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
+ SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Create a stack slot that is 16-byte aligned.
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(16, 16);
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ // Store the input value into Value#0 of the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
+ Op.getOperand(0), FIdx, NULL, 0);
+ // Load it out.
+ return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, NULL, 0);
+}
+
+SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ if (Op.getValueType() == MVT::v4i32) {
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+ SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl);
+ SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
+
+ SDValue RHSSwap = // = vrlw RHS, 16
+ BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
+
+ // Shrinkify inputs to v8i16.
+ LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, LHS);
+ RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, RHS);
+ RHSSwap = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, RHSSwap);
+
+ // Low parts multiplied together, generating 32-bit results (we ignore the
+ // top parts).
+ SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
+ LHS, RHS, DAG, dl, MVT::v4i32);
+
+ SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
+ LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
+ // Shift the high parts up 16 bits.
+ HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
+ Neg16, DAG, dl);
+ return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
+ } else if (Op.getValueType() == MVT::v8i16) {
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+ SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
+
+ return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
+ LHS, RHS, Zero, DAG, dl);
+ } else if (Op.getValueType() == MVT::v16i8) {
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+ // Multiply the even 8-bit parts, producing 16-bit sums.
+ SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
+ LHS, RHS, DAG, dl, MVT::v8i16);
+ EvenParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, EvenParts);
+
+ // Multiply the odd 8-bit parts, producing 16-bit sums.
+ SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
+ LHS, RHS, DAG, dl, MVT::v8i16);
+ OddParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OddParts);
+
+ // Merge the results together.
+ int Ops[16];
+ for (unsigned i = 0; i != 8; ++i) {
+ Ops[i*2 ] = 2*i+1;
+ Ops[i*2+1] = 2*i+1+16;
+ }
+ return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
+ } else {
+ assert(0 && "Unknown mul to lower!");
+ abort();
+ }
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Wasn't expecting to be able to lower this!");
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
+ case ISD::VASTART:
+ return LowerVASTART(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset,
+ VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget);
+
+ case ISD::VAARG:
+ return LowerVAARG(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset,
+ VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget);
+
+ case ISD::FORMAL_ARGUMENTS:
+ return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex,
+ VarArgsStackOffset, VarArgsNumGPR,
+ VarArgsNumFPR, PPCSubTarget);
+
+ case ISD::CALL: return LowerCALL(Op, DAG, PPCSubTarget,
+ getTargetMachine());
+ case ISD::RET: return LowerRET(Op, DAG, getTargetMachine());
+ case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
+ case ISD::DYNAMIC_STACKALLOC:
+ return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
+
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG,
+ Op.getDebugLoc());
+ case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+ case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
+
+ // Lower 64-bit shifts.
+ case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
+ case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
+ case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
+
+ // Vector-related lowering.
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, DAG);
+
+ // Frame & Return address.
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ }
+ return SDValue();
+}
+
+void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) {
+ DebugLoc dl = N->getDebugLoc();
+ switch (N->getOpcode()) {
+ default:
+ assert(false && "Do not know how to custom type legalize this operation!");
+ return;
+ case ISD::FP_ROUND_INREG: {
+ assert(N->getValueType(0) == MVT::ppcf128);
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128);
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+ MVT::f64, N->getOperand(0),
+ DAG.getIntPtrConstant(0));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+ MVT::f64, N->getOperand(0),
+ DAG.getIntPtrConstant(1));
+
+ // This sequence changes FPSCR to do round-to-zero, adds the two halves
+ // of the long double, and puts FPSCR back the way it was. We do not
+ // actually model FPSCR.
+ std::vector<MVT> NodeTys;
+ SDValue Ops[4], Result, MFFSreg, InFlag, FPreg;
+
+ NodeTys.push_back(MVT::f64); // Return register
+ NodeTys.push_back(MVT::Flag); // Returns a flag for later insns
+ Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
+ MFFSreg = Result.getValue(0);
+ InFlag = Result.getValue(1);
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::Flag); // Returns a flag
+ Ops[0] = DAG.getConstant(31, MVT::i32);
+ Ops[1] = InFlag;
+ Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2);
+ InFlag = Result.getValue(0);
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::Flag); // Returns a flag
+ Ops[0] = DAG.getConstant(30, MVT::i32);
+ Ops[1] = InFlag;
+ Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2);
+ InFlag = Result.getValue(0);
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::f64); // result of add
+ NodeTys.push_back(MVT::Flag); // Returns a flag
+ Ops[0] = Lo;
+ Ops[1] = Hi;
+ Ops[2] = InFlag;
+ Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3);
+ FPreg = Result.getValue(0);
+ InFlag = Result.getValue(1);
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::f64);
+ Ops[0] = DAG.getConstant(1, MVT::i32);
+ Ops[1] = MFFSreg;
+ Ops[2] = FPreg;
+ Ops[3] = InFlag;
+ Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4);
+ FPreg = Result.getValue(0);
+
+ // We know the low half is about to be thrown away, so just use something
+ // convenient.
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
+ FPreg, FPreg));
+ return;
+ }
+ case ISD::FP_TO_SINT:
+ Results.push_back(LowerFP_TO_SINT(SDValue(N, 0), DAG, dl));
+ return;
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ bool is64bit, unsigned BinOpcode) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *F = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned incr = MI->getOperand(3).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loopMBB);
+ F->insert(It, exitMBB);
+ exitMBB->transferSuccessors(BB);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned TmpReg = (!BinOpcode) ? incr :
+ RegInfo.createVirtualRegister(
+ is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+ (const TargetRegisterClass *) &PPC::GPRCRegClass);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // l[wd]arx dest, ptr
+ // add r0, dest, incr
+ // st[wd]cx. r0, ptr
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ .addReg(ptrA).addReg(ptrB);
+ if (BinOpcode)
+ BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ return BB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ bool is8bit, // operation
+ unsigned BinOpcode) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ // In 64 bit mode we have to use 64 bits for addresses, even though the
+ // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
+ // registers without caring whether they're 32 or 64, but here we're
+ // doing actual arithmetic on the addresses.
+ bool is64bit = PPCSubTarget.isPPC64();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *F = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned incr = MI->getOperand(3).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loopMBB);
+ F->insert(It, exitMBB);
+ exitMBB->transferSuccessors(BB);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ const TargetRegisterClass *RC =
+ is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+ (const TargetRegisterClass *) &PPC::GPRCRegClass;
+ unsigned PtrReg = RegInfo.createVirtualRegister(RC);
+ unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+ unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned MaskReg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
+ unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+ unsigned Ptr1Reg;
+ unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // The 4-byte load must be aligned, while a char or short may be
+ // anywhere in the word. Hence all this nasty bookkeeping code.
+ // add ptr1, ptrA, ptrB [copy if ptrA==0]
+ // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
+ // xori shift, shift1, 24 [16]
+ // rlwinm ptr, ptr1, 0, 0, 29
+ // slw incr2, incr, shift
+ // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
+ // slw mask, mask2, shift
+ // loopMBB:
+ // lwarx tmpDest, ptr
+ // add tmp, tmpDest, incr2
+ // andc tmp2, tmpDest, mask
+ // and tmp3, tmp, mask
+ // or tmp4, tmp3, tmp2
+ // stwcx. tmp4, ptr
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ // srw dest, tmpDest, shift
+
+ if (ptrA!=PPC::R0) {
+ Ptr1Reg = RegInfo.createVirtualRegister(RC);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
+ .addReg(ptrA).addReg(ptrB);
+ } else {
+ Ptr1Reg = ptrB;
+ }
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
+ .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ if (is64bit)
+ BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(61);
+ else
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
+ BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
+ .addReg(incr).addReg(ShiftReg);
+ if (is8bit)
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
+ else {
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
+ BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
+ }
+ BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
+ .addReg(Mask2Reg).addReg(ShiftReg);
+
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
+ .addReg(PPC::R0).addReg(PtrReg);
+ if (BinOpcode)
+ BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
+ .addReg(Incr2Reg).addReg(TmpDestReg);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
+ .addReg(TmpDestReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
+ .addReg(TmpReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
+ .addReg(Tmp3Reg).addReg(Tmp2Reg);
+ BuildMI(BB, dl, TII->get(PPC::STWCX))
+ .addReg(Tmp4Reg).addReg(PPC::R0).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ BuildMI(BB, dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg).addReg(ShiftReg);
+ return BB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ // To "insert" these instructions we actually have to insert their
+ // control-flow patterns.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ MachineFunction *F = BB->getParent();
+
+ if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8 ||
+ MI->getOpcode() == PPC::SELECT_CC_F4 ||
+ MI->getOpcode() == PPC::SELECT_CC_F8 ||
+ MI->getOpcode() == PPC::SELECT_CC_VRRC) {
+
+ // The incoming instruction knows the destination vreg to set, the
+ // condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ unsigned SelectPred = MI->getOperand(4).getImm();
+ DebugLoc dl = MI->getDebugLoc();
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+ // Update machine-CFG edges by transferring all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ sinkMBB->transferSuccessors(BB);
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(BB, dl, TII->get(PPC::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+ }
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
+ BB = EmitAtomicBinary(MI, BB, false, 0);
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
+ BB = EmitAtomicBinary(MI, BB, true, 0);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
+ bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned oldval = MI->getOperand(3).getReg();
+ unsigned newval = MI->getOperand(4).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loop1MBB);
+ F->insert(It, loop2MBB);
+ F->insert(It, midMBB);
+ F->insert(It, exitMBB);
+ exitMBB->transferSuccessors(BB);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loop1MBB);
+
+ // loop1MBB:
+ // l[wd]arx dest, ptr
+ // cmp[wd] dest, oldval
+ // bne- midMBB
+ // loop2MBB:
+ // st[wd]cx. newval, ptr
+ // bne- loopMBB
+ // b exitBB
+ // midMBB:
+ // st[wd]cx. dest, ptr
+ // exitBB:
+ BB = loop1MBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ .addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
+ .addReg(oldval).addReg(dest);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+ BB->addSuccessor(loop2MBB);
+ BB->addSuccessor(midMBB);
+
+ BB = loop2MBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ .addReg(newval).addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+ BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ BB = midMBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ .addReg(dest).addReg(ptrA).addReg(ptrB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
+ // We must use 64-bit registers for addresses when targeting 64-bit,
+ // since we're actually doing arithmetic on them. Other registers
+ // can be 32-bit.
+ bool is64bit = PPCSubTarget.isPPC64();
+ bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned oldval = MI->getOperand(3).getReg();
+ unsigned newval = MI->getOperand(4).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loop1MBB);
+ F->insert(It, loop2MBB);
+ F->insert(It, midMBB);
+ F->insert(It, exitMBB);
+ exitMBB->transferSuccessors(BB);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ const TargetRegisterClass *RC =
+ is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+ (const TargetRegisterClass *) &PPC::GPRCRegClass;
+ unsigned PtrReg = RegInfo.createVirtualRegister(RC);
+ unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+ unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned MaskReg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
+ unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+ unsigned Ptr1Reg;
+ unsigned TmpReg = RegInfo.createVirtualRegister(RC);
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loop1MBB);
+
+ // The 4-byte load must be aligned, while a char or short may be
+ // anywhere in the word. Hence all this nasty bookkeeping code.
+ // add ptr1, ptrA, ptrB [copy if ptrA==0]
+ // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
+ // xori shift, shift1, 24 [16]
+ // rlwinm ptr, ptr1, 0, 0, 29
+ // slw newval2, newval, shift
+ // slw oldval2, oldval,shift
+ // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
+ // slw mask, mask2, shift
+ // and newval3, newval2, mask
+ // and oldval3, oldval2, mask
+ // loop1MBB:
+ // lwarx tmpDest, ptr
+ // and tmp, tmpDest, mask
+ // cmpw tmp, oldval3
+ // bne- midMBB
+ // loop2MBB:
+ // andc tmp2, tmpDest, mask
+ // or tmp4, tmp2, newval3
+ // stwcx. tmp4, ptr
+ // bne- loop1MBB
+ // b exitBB
+ // midMBB:
+ // stwcx. tmpDest, ptr
+ // exitBB:
+ // srw dest, tmpDest, shift
+ if (ptrA!=PPC::R0) {
+ Ptr1Reg = RegInfo.createVirtualRegister(RC);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
+ .addReg(ptrA).addReg(ptrB);
+ } else {
+ Ptr1Reg = ptrB;
+ }
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
+ .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ if (is64bit)
+ BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(61);
+ else
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
+ BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
+ .addReg(newval).addReg(ShiftReg);
+ BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
+ .addReg(oldval).addReg(ShiftReg);
+ if (is8bit)
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
+ else {
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
+ BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
+ .addReg(Mask3Reg).addImm(65535);
+ }
+ BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
+ .addReg(Mask2Reg).addReg(ShiftReg);
+ BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
+ .addReg(NewVal2Reg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
+ .addReg(OldVal2Reg).addReg(MaskReg);
+
+ BB = loop1MBB;
+ BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
+ .addReg(PPC::R0).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
+ .addReg(TmpDestReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
+ .addReg(TmpReg).addReg(OldVal3Reg);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+ BB->addSuccessor(loop2MBB);
+ BB->addSuccessor(midMBB);
+
+ BB = loop2MBB;
+ BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
+ .addReg(TmpDestReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
+ .addReg(Tmp2Reg).addReg(NewVal3Reg);
+ BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
+ .addReg(PPC::R0).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+ BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ BB = midMBB;
+ BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
+ .addReg(PPC::R0).addReg(PtrReg);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ BuildMI(BB, dl, TII->get(PPC::SRW),dest).addReg(TmpReg).addReg(ShiftReg);
+ } else {
+ assert(0 && "Unexpected instr type to insert");
+ }
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ TargetMachine &TM = getTargetMachine();
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = N->getDebugLoc();
+ switch (N->getOpcode()) {
+ default: break;
+ case PPCISD::SHL:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (C->getZExtValue() == 0) // 0 << V -> 0.
+ return N->getOperand(0);
+ }
+ break;
+ case PPCISD::SRL:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (C->getZExtValue() == 0) // 0 >>u V -> 0.
+ return N->getOperand(0);
+ }
+ break;
+ case PPCISD::SRA:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (C->getZExtValue() == 0 || // 0 >>s V -> 0.
+ C->isAllOnesValue()) // -1 >>s V -> -1.
+ return N->getOperand(0);
+ }
+ break;
+
+ case ISD::SINT_TO_FP:
+ if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
+ if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
+ // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
+ // We allow the src/dst to be either f32/f64, but the intermediate
+ // type must be i64.
+ if (N->getOperand(0).getValueType() == MVT::i64 &&
+ N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
+ SDValue Val = N->getOperand(0).getOperand(0);
+ if (Val.getValueType() == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ }
+
+ Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ if (N->getValueType(0) == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
+ DAG.getIntPtrConstant(0));
+ DCI.AddToWorklist(Val.getNode());
+ }
+ return Val;
+ } else if (N->getOperand(0).getValueType() == MVT::i32) {
+ // If the intermediate type is i32, we can avoid the load/store here
+ // too.
+ }
+ }
+ }
+ break;
+ case ISD::STORE:
+ // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
+ if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
+ !cast<StoreSDNode>(N)->isTruncatingStore() &&
+ N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
+ N->getOperand(1).getValueType() == MVT::i32 &&
+ N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
+ SDValue Val = N->getOperand(1).getOperand(0);
+ if (Val.getValueType() == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ }
+ Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+
+ Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val,
+ N->getOperand(2), N->getOperand(3));
+ DCI.AddToWorklist(Val.getNode());
+ return Val;
+ }
+
+ // Turn STORE (BSWAP) -> sthbrx/stwbrx.
+ if (N->getOperand(1).getOpcode() == ISD::BSWAP &&
+ N->getOperand(1).getNode()->hasOneUse() &&
+ (N->getOperand(1).getValueType() == MVT::i32 ||
+ N->getOperand(1).getValueType() == MVT::i16)) {
+ SDValue BSwapOp = N->getOperand(1).getOperand(0);
+ // Do an any-extend to 32-bits if this is a half-word input.
+ if (BSwapOp.getValueType() == MVT::i16)
+ BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
+
+ return DAG.getNode(PPCISD::STBRX, dl, MVT::Other, N->getOperand(0),
+ BSwapOp, N->getOperand(2), N->getOperand(3),
+ DAG.getValueType(N->getOperand(1).getValueType()));
+ }
+ break;
+ case ISD::BSWAP:
+ // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
+ if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
+ N->getOperand(0).hasOneUse() &&
+ (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {
+ SDValue Load = N->getOperand(0);
+ LoadSDNode *LD = cast<LoadSDNode>(Load);
+ // Create the byte-swapping load.
+ std::vector<MVT> VTs;
+ VTs.push_back(MVT::i32);
+ VTs.push_back(MVT::Other);
+ SDValue MO = DAG.getMemOperand(LD->getMemOperand());
+ SDValue Ops[] = {
+ LD->getChain(), // Chain
+ LD->getBasePtr(), // Ptr
+ MO, // MemOperand
+ DAG.getValueType(N->getValueType(0)) // VT
+ };
+ SDValue BSLoad = DAG.getNode(PPCISD::LBRX, dl, VTs, Ops, 4);
+
+ // If this is an i16 load, insert the truncate.
+ SDValue ResVal = BSLoad;
+ if (N->getValueType(0) == MVT::i16)
+ ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
+
+ // First, combine the bswap away. This makes the value produced by the
+ // load dead.
+ DCI.CombineTo(N, ResVal);
+
+ // Next, combine the load away, we give it a bogus result value but a real
+ // chain result. The result value is dead because the bswap is dead.
+ DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
+
+ // Return N so it doesn't get rechecked!
+ return SDValue(N, 0);
+ }
+
+ break;
+ case PPCISD::VCMP: {
+ // If a VCMPo node already exists with exactly the same operands as this
+ // node, use its result instead of this node (VCMPo computes both a CR6 and
+ // a normal output).
+ //
+ if (!N->getOperand(0).hasOneUse() &&
+ !N->getOperand(1).hasOneUse() &&
+ !N->getOperand(2).hasOneUse()) {
+
+ // Scan all of the users of the LHS, looking for VCMPo's that match.
+ SDNode *VCMPoNode = 0;
+
+ SDNode *LHSN = N->getOperand(0).getNode();
+ for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
+ UI != E; ++UI)
+ if (UI->getOpcode() == PPCISD::VCMPo &&
+ UI->getOperand(1) == N->getOperand(1) &&
+ UI->getOperand(2) == N->getOperand(2) &&
+ UI->getOperand(0) == N->getOperand(0)) {
+ VCMPoNode = *UI;
+ break;
+ }
+
+ // If there is no VCMPo node, or if the flag value has a single use, don't
+ // transform this.
+ if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
+ break;
+
+ // Look at the (necessarily single) use of the flag value. If it has a
+ // chain, this transformation is more complex. Note that multiple things
+ // could use the value result, which we should ignore.
+ SDNode *FlagUser = 0;
+ for (SDNode::use_iterator UI = VCMPoNode->use_begin();
+ FlagUser == 0; ++UI) {
+ assert(UI != VCMPoNode->use_end() && "Didn't find user!");
+ SDNode *User = *UI;
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+ if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
+ FlagUser = User;
+ break;
+ }
+ }
+ }
+
+ // If the user is a MFCR instruction, we know this is safe. Otherwise we
+ // give up for right now.
+ if (FlagUser->getOpcode() == PPCISD::MFCR)
+ return SDValue(VCMPoNode, 0);
+ }
+ break;
+ }
+ case ISD::BR_CC: {
+ // If this is a branch on an altivec predicate comparison, lower this so
+ // that we don't have to do a MFCR: instead, branch directly on CR6. This
+ // lowering is done pre-legalize, because the legalizer lowers the predicate
+ // compare down to code that is difficult to reassemble.
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
+ int CompareOpc;
+ bool isDot;
+
+ if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
+ getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
+ assert(isDot && "Can't compare against a vector result!");
+
+ // If this is a comparison against something other than 0/1, then we know
+ // that the condition is never/always true.
+ unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
+ if (Val != 0 && Val != 1) {
+ if (CC == ISD::SETEQ) // Cond never true, remove branch.
+ return N->getOperand(0);
+ // Always !=, turn it into an unconditional branch.
+ return DAG.getNode(ISD::BR, dl, MVT::Other,
+ N->getOperand(0), N->getOperand(4));
+ }
+
+ bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
+
+ // Create the PPCISD altivec 'dot' comparison node.
+ std::vector<MVT> VTs;
+ SDValue Ops[] = {
+ LHS.getOperand(2), // LHS of compare
+ LHS.getOperand(3), // RHS of compare
+ DAG.getConstant(CompareOpc, MVT::i32)
+ };
+ VTs.push_back(LHS.getOperand(2).getValueType());
+ VTs.push_back(MVT::Flag);
+ SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
+
+ // Unpack the result based on how the target uses it.
+ PPC::Predicate CompOpc;
+ switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
+ default: // Can't happen, don't crash on invalid number though.
+ case 0: // Branch on the value of the EQ bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
+ break;
+ case 1: // Branch on the inverted value of the EQ bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
+ break;
+ case 2: // Branch on the value of the LT bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
+ break;
+ case 3: // Branch on the inverted value of the LT bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
+ break;
+ }
+
+ return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
+ DAG.getConstant(CompOpc, MVT::i32),
+ DAG.getRegister(PPC::CR6, MVT::i32),
+ N->getOperand(4), CompNode.getValue(1));
+ }
+ break;
+ }
+ }
+
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ switch (Op.getOpcode()) {
+ default: break;
+ case PPCISD::LBRX: {
+ // lhbrx is known to have the top bits cleared out.
+ if (cast<VTSDNode>(Op.getOperand(3))->getVT() == MVT::i16)
+ KnownZero = 0xFFFF0000;
+ break;
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
+ default: break;
+ case Intrinsic::ppc_altivec_vcmpbfp_p:
+ case Intrinsic::ppc_altivec_vcmpeqfp_p:
+ case Intrinsic::ppc_altivec_vcmpequb_p:
+ case Intrinsic::ppc_altivec_vcmpequh_p:
+ case Intrinsic::ppc_altivec_vcmpequw_p:
+ case Intrinsic::ppc_altivec_vcmpgefp_p:
+ case Intrinsic::ppc_altivec_vcmpgtfp_p:
+ case Intrinsic::ppc_altivec_vcmpgtsb_p:
+ case Intrinsic::ppc_altivec_vcmpgtsh_p:
+ case Intrinsic::ppc_altivec_vcmpgtsw_p:
+ case Intrinsic::ppc_altivec_vcmpgtub_p:
+ case Intrinsic::ppc_altivec_vcmpgtuh_p:
+ case Intrinsic::ppc_altivec_vcmpgtuw_p:
+ KnownZero = ~1U; // All bits but the low one are known to be zero.
+ break;
+ }
+ }
+ }
+}
+
+
+/// getConstraintType - Given a constraint, return the type of
+/// constraint it is for this target.
+PPCTargetLowering::ConstraintType
+PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'b':
+ case 'r':
+ case 'f':
+ case 'v':
+ case 'y':
+ return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const {
+ if (Constraint.size() == 1) {
+ // GCC RS6000 Constraint Letters
+ switch (Constraint[0]) {
+ case 'b': // R1-R31
+ case 'r': // R0-R31
+ if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+ return std::make_pair(0U, PPC::G8RCRegisterClass);
+ return std::make_pair(0U, PPC::GPRCRegisterClass);
+ case 'f':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, PPC::F4RCRegisterClass);
+ else if (VT == MVT::f64)
+ return std::make_pair(0U, PPC::F8RCRegisterClass);
+ break;
+ case 'v':
+ return std::make_pair(0U, PPC::VRRCRegisterClass);
+ case 'y': // crrc
+ return std::make_pair(0U, PPC::CRRCRegisterClass);
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops. If hasMemory is true
+/// it means one of the asm constraint of the inline asm instruction being
+/// processed is 'm'.
+void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter,
+ bool hasMemory,
+ std::vector<SDValue>&Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result(0,0);
+ switch (Letter) {
+ default: break;
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P': {
+ ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
+ if (!CST) return; // Must be an immediate to match.
+ unsigned Value = CST->getZExtValue();
+ switch (Letter) {
+ default: assert(0 && "Unknown constraint letter!");
+ case 'I': // "I" is a signed 16-bit constant.
+ if ((short)Value == (int)Value)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
+ case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
+ if ((short)Value == 0)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
+ if ((Value >> 16) == 0)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'M': // "M" is a constant that is greater than 31.
+ if (Value > 31)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'N': // "N" is a positive constant that is an exact power of two.
+ if ((int)Value > 0 && isPowerOf2_32(Value))
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'O': // "O" is the constant zero.
+ if (Value == 0)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
+ if ((short)-Value == (int)-Value)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ }
+ break;
+ }
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+
+ // Handle standard constraint letters.
+ TargetLowering::LowerAsmOperandForConstraint(Op, Letter, hasMemory, Ops, DAG);
+}
+
+// isLegalAddressingMode - Return true if the addressing mode represented
+// by AM is legal for this target, for a load/store of the specified type.
+bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const {
+ // FIXME: PPC does not allow r+i addressing modes for vectors!
+
+ // PPC allows a sign-extended 16-bit immediate field.
+ if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+ return false;
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ // PPC only support r+r,
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
+ return false;
+ // Otherwise we have r+r or r+i.
+ break;
+ case 2:
+ if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
+ return false;
+ // Allow 2*r as r+r.
+ break;
+ default:
+ // No other scales are supported.
+ return false;
+ }
+
+ return true;
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode for load / store of the
+/// given type.
+bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{
+ // PPC allows a sign-extended 16-bit immediate field.
+ return (V > -(1 << 16) && V < (1 << 16)-1);
+}
+
+bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
+ return false;
+}
+
+SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Depths > 0 not supported yet!
+ if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
+ return SDValue();
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ // Just load the return address off the stack.
+ SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
+
+ // Make sure the function really does not optimize away the store of the RA
+ // to the stack.
+ FuncInfo->setLRStoreRequired();
+ return DAG.getLoad(getPointerTy(), dl,
+ DAG.getEntryNode(), RetAddrFI, NULL, 0);
+}
+
+SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Depths > 0 not supported yet!
+ if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
+ return SDValue();
+
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = PtrVT == MVT::i64;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool is31 = (NoFramePointerElim || MFI->hasVarSizedObjects())
+ && MFI->getStackSize();
+
+ if (isPPC64)
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl, is31 ? PPC::X31 : PPC::X1,
+ MVT::i64);
+ else
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl, is31 ? PPC::R31 : PPC::R1,
+ MVT::i32);
+}
+
+bool
+PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The PowerPC target isn't yet aware of offsets.
+ return false;
+}
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
new file mode 100644
index 0000000..7946474
--- /dev/null
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -0,0 +1,394 @@
+//===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that PPC uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "PPC.h"
+#include "PPCSubtarget.h"
+
+namespace llvm {
+ namespace PPCISD {
+ enum NodeType {
+ // Start the numbering where the builtin ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// FSEL - Traditional three-operand fsel node.
+ ///
+ FSEL,
+
+ /// FCFID - The FCFID instruction, taking an f64 operand and producing
+ /// and f64 value containing the FP representation of the integer that
+ /// was temporarily in the f64 operand.
+ FCFID,
+
+ /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
+ /// operand, producing an f64 value containing the integer representation
+ /// of that FP value.
+ FCTIDZ, FCTIWZ,
+
+ /// STFIWX - The STFIWX instruction. The first operand is an input token
+ /// chain, then an f64 value to store, then an address to store it to,
+ /// then a SRCVALUE for the address.
+ STFIWX,
+
+ // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
+ // three v4f32 operands and producing a v4f32 result.
+ VMADDFP, VNMSUBFP,
+
+ /// VPERM - The PPC VPERM Instruction.
+ ///
+ VPERM,
+
+ /// Hi/Lo - These represent the high and low 16-bit parts of a global
+ /// address respectively. These nodes have two operands, the first of
+ /// which must be a TargetGlobalAddress, and the second of which must be a
+ /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C',
+ /// though these are usually folded into other nodes.
+ Hi, Lo,
+
+ /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
+ /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
+ /// compute an allocation on the stack.
+ DYNALLOC,
+
+ /// GlobalBaseReg - On Darwin, this node represents the result of the mflr
+ /// at function entry, used for PIC code.
+ GlobalBaseReg,
+
+ /// These nodes represent the 32-bit PPC shifts that operate on 6-bit
+ /// shift amounts. These nodes are generated by the multi-precision shift
+ /// code.
+ SRL, SRA, SHL,
+
+ /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit"
+ /// registers.
+ EXTSW_32,
+
+ /// STD_32 - This is the STD instruction for use with "32-bit" registers.
+ STD_32,
+
+ /// CALL - A direct function call.
+ CALL_Macho, CALL_ELF,
+
+ /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
+ /// MTCTR instruction.
+ MTCTR,
+
+ /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
+ /// BCTRL instruction.
+ BCTRL_Macho, BCTRL_ELF,
+
+ /// Return with a flag operand, matched by 'blr'
+ RET_FLAG,
+
+ /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCR/MFOCRF instructions.
+ /// This copies the bits corresponding to the specified CRREG into the
+ /// resultant GPR. Bits corresponding to other CR regs are undefined.
+ MFCR,
+
+ /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
+ /// instructions. For lack of better number, we use the opcode number
+ /// encoding for the OPC field to identify the compare. For example, 838
+ /// is VCMPGTSH.
+ VCMP,
+
+ /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the
+ /// altivec VCMP*o instructions. For lack of better number, we use the
+ /// opcode number encoding for the OPC field to identify the compare. For
+ /// example, 838 is VCMPGTSH.
+ VCMPo,
+
+ /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
+ /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the
+ /// condition register to branch on, OPC is the branch opcode to use (e.g.
+ /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
+ /// an optional input flag argument.
+ COND_BRANCH,
+
+ /// CHAIN = STBRX CHAIN, GPRC, Ptr, SRCVALUE, Type - This is a
+ /// byte-swapping store instruction. It byte-swaps the low "Type" bits of
+ /// the GPRC input, then stores it through Ptr. Type can be either i16 or
+ /// i32.
+ STBRX,
+
+ /// GPRC, CHAIN = LBRX CHAIN, Ptr, SRCVALUE, Type - This is a
+ /// byte-swapping load instruction. It loads "Type" bits, byte swaps it,
+ /// then puts it in the bottom bits of the GPRC. TYPE can be either i16
+ /// or i32.
+ LBRX,
+
+ // The following 5 instructions are used only as part of the
+ // long double-to-int conversion sequence.
+
+ /// OUTFLAG = MFFS F8RC - This moves the FPSCR (not modelled) into the
+ /// register.
+ MFFS,
+
+ /// OUTFLAG = MTFSB0 INFLAG - This clears a bit in the FPSCR.
+ MTFSB0,
+
+ /// OUTFLAG = MTFSB1 INFLAG - This sets a bit in the FPSCR.
+ MTFSB1,
+
+ /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with
+ /// rounding towards zero. It has flags added so it won't move past the
+ /// FPSCR-setting instructions.
+ FADDRTZ,
+
+ /// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR.
+ MTFSF,
+
+ /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and
+ /// reserve indexed. This is used to implement atomic operations.
+ LARX,
+
+ /// STCX = This corresponds to PPC stcx. instrcution: store conditional
+ /// indexed. This is used to implement atomic operations.
+ STCX,
+
+ /// TAILCALL - Indicates a tail call should be taken.
+ TAILCALL,
+ /// TC_RETURN - A tail call return.
+ /// operand #0 chain
+ /// operand #1 callee (register or absolute)
+ /// operand #2 stack adjustment
+ /// operand #3 optional in flag
+ TC_RETURN
+ };
+ }
+
+ /// Define some predicates that are used for node matching.
+ namespace PPC {
+ /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
+ /// VPKUHUM instruction.
+ bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
+
+ /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
+ /// VPKUWUM instruction.
+ bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
+
+ /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
+ /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
+ bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ bool isUnary);
+
+ /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
+ /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
+ bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ bool isUnary);
+
+ /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
+ /// amount, otherwise return -1.
+ int isVSLDOIShuffleMask(SDNode *N, bool isUnary);
+
+ /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a splat of a single element that is suitable for input to
+ /// VSPLTB/VSPLTH/VSPLTW.
+ bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
+
+ /// isAllNegativeZeroVector - Returns true if all elements of build_vector
+ /// are -0.0.
+ bool isAllNegativeZeroVector(SDNode *N);
+
+ /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
+ /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
+ unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize);
+
+ /// get_VSPLTI_elt - If this is a build_vector of constants which can be
+ /// formed by using a vspltis[bhw] instruction of the specified element
+ /// size, return the constant being splatted. The ByteSize field indicates
+ /// the number of bytes of each element [124] -> [bhw].
+ SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
+ }
+
+ class PPCTargetLowering : public TargetLowering {
+ int VarArgsFrameIndex; // FrameIndex for start of varargs area.
+ int VarArgsStackOffset; // StackOffset for start of stack
+ // arguments.
+ unsigned VarArgsNumGPR; // Index of the first unused integer
+ // register for parameter passing.
+ unsigned VarArgsNumFPR; // Index of the first unused double
+ // register for parameter passing.
+ int ReturnAddrIndex; // FrameIndex for return slot.
+ const PPCSubtarget &PPCSubTarget;
+ public:
+ explicit PPCTargetLowering(PPCTargetMachine &TM);
+
+ /// getTargetNodeName() - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType - Return the ISD::SETCC ValueType
+ virtual MVT getSetCCResultType(MVT VT) const;
+
+ /// getPreIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if the node's address
+ /// can be legally represented as pre-indexed load / store address.
+ virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ /// SelectAddressRegReg - Given the specified addressed, check to see if it
+ /// can be represented as an indexed [r+r] operation. Returns false if it
+ /// can be more efficiently represented with [r+imm].
+ bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
+ SelectionDAG &DAG) const;
+
+ /// SelectAddressRegImm - Returns true if the address N can be represented
+ /// by a base register plus a signed 16-bit displacement [r+imm], and if it
+ /// is not better represented as reg+reg.
+ bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
+ SelectionDAG &DAG) const;
+
+ /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
+ /// represented as an indexed [r+r] operation.
+ bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,
+ SelectionDAG &DAG) const;
+
+ /// SelectAddressRegImmShift - Returns true if the address N can be
+ /// represented by a base register plus a signed 14-bit displacement
+ /// [r+imm*4]. Suitable for use by STD and friends.
+ bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base,
+ SelectionDAG &DAG) const;
+
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ ///
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG);
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+ MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *MBB, bool is64Bit,
+ unsigned BinOpcode) const;
+ MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ bool is8bit, unsigned Opcode) const;
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const;
+
+ /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+ /// function arguments in the caller parameter area. This is the actual
+ /// alignment, not its logarithm.
+ unsigned getByValTypeAlignment(const Type *Ty) const;
+
+ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+ /// vector. If it is invalid, don't add anything to Ops. If hasMemory is
+ /// true it means one of the asm constraint of the inline asm instruction
+ /// being processed is 'm'.
+ virtual void LowerAsmOperandForConstraint(SDValue Op,
+ char ConstraintLetter,
+ bool hasMemory,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+
+ /// isLegalAddressImmediate - Return true if the integer value can be used
+ /// as the offset of the target addressing mode for load / store of the
+ /// given type.
+ virtual bool isLegalAddressImmediate(int64_t V, const Type *Ty) const;
+
+ /// isLegalAddressImmediate - Return true if the GlobalValue can be used as
+ /// the offset of the target addressing mode.
+ virtual bool isLegalAddressImmediate(GlobalValue *GV) const;
+
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Target which want to do tail call
+ /// optimization should implement this function.
+ virtual bool IsEligibleForTailCallOptimization(CallSDNode *TheCall,
+ SDValue Ret,
+ SelectionDAG &DAG) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ private:
+ SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
+ SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
+
+ SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
+ int SPDiff,
+ SDValue Chain,
+ SDValue &LROpOut,
+ SDValue &FPOpOut,
+ DebugLoc dl);
+
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
+ int VarArgsFrameIndex, int VarArgsStackOffset,
+ unsigned VarArgsNumGPR, unsigned VarArgsNumFPR,
+ const PPCSubtarget &Subtarget);
+ SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG, int VarArgsFrameIndex,
+ int VarArgsStackOffset, unsigned VarArgsNumGPR,
+ unsigned VarArgsNumFPR, const PPCSubtarget &Subtarget);
+ SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG,
+ int &VarArgsFrameIndex,
+ int &VarArgsStackOffset,
+ unsigned &VarArgsNumGPR,
+ unsigned &VarArgsNumFPR,
+ const PPCSubtarget &Subtarget);
+ SDValue LowerCALL(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget, TargetMachine &TM);
+ SDValue LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM);
+ SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget);
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget);
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG, DebugLoc dl);
+ SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerMUL(SDValue Op, SelectionDAG &DAG);
+ };
+}
+
+#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
new file mode 100644
index 0000000..417c8ed
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -0,0 +1,723 @@
+//===- PPCInstr64Bit.td - The PowerPC 64-bit Support -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PowerPC 64-bit instructions. These patterns are used
+// both when in ppc64 mode and when in "use 64-bit extensions in 32-bit" mode.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// 64-bit operands.
+//
+def s16imm64 : Operand<i64> {
+ let PrintMethod = "printS16ImmOperand";
+}
+def u16imm64 : Operand<i64> {
+ let PrintMethod = "printU16ImmOperand";
+}
+def symbolHi64 : Operand<i64> {
+ let PrintMethod = "printSymbolHi";
+}
+def symbolLo64 : Operand<i64> {
+ let PrintMethod = "printSymbolLo";
+}
+
+//===----------------------------------------------------------------------===//
+// 64-bit transformation functions.
+//
+
+def SHL64 : SDNodeXForm<imm, [{
+ // Transformation function: 63 - imm
+ return getI32Imm(63 - N->getZExtValue());
+}]>;
+
+def SRL64 : SDNodeXForm<imm, [{
+ // Transformation function: 64 - imm
+ return N->getZExtValue() ? getI32Imm(64 - N->getZExtValue()) : getI32Imm(0);
+}]>;
+
+def HI32_48 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned short)(N->getZExtValue() >> 32));
+}]>;
+
+def HI48_64 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned short)(N->getZExtValue() >> 48));
+}]>;
+
+
+//===----------------------------------------------------------------------===//
+// Calls.
+//
+
+let Defs = [LR8] in
+ def MovePCtoLR8 : Pseudo<(outs), (ins piclabel:$label), "bl $label", []>,
+ PPC970_Unit_BRU;
+
+// Macho ABI Calls.
+let isCall = 1, PPC970_Unit = 7,
+ // All calls clobber the PPC64 non-callee saved registers.
+ Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
+ F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+ V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+ LR8,CTR8,
+ CR0,CR1,CR5,CR6,CR7] in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL8_Macho : IForm<18, 0, 1,
+ (outs), (ins calltarget:$func, variable_ops),
+ "bl $func", BrB, []>; // See Pat patterns below.
+ def BLA8_Macho : IForm<18, 1, 1,
+ (outs), (ins aaddr:$func, variable_ops),
+ "bla $func", BrB, [(PPCcall_Macho (i64 imm:$func))]>;
+ }
+ let Uses = [CTR8, RM] in {
+ def BCTRL8_Macho : XLForm_2_ext<19, 528, 20, 0, 1,
+ (outs), (ins variable_ops),
+ "bctrl", BrB,
+ [(PPCbctrl_Macho)]>, Requires<[In64BitMode]>;
+ }
+}
+
+// ELF 64 ABI Calls = Macho ABI Calls
+// Used to define BL8_ELF and BLA8_ELF
+let isCall = 1, PPC970_Unit = 7,
+ // All calls clobber the PPC64 non-callee saved registers.
+ Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
+ F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+ V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+ LR8,CTR8,
+ CR0,CR1,CR5,CR6,CR7] in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL8_ELF : IForm<18, 0, 1,
+ (outs), (ins calltarget:$func, variable_ops),
+ "bl $func", BrB, []>; // See Pat patterns below.
+ def BLA8_ELF : IForm<18, 1, 1,
+ (outs), (ins aaddr:$func, variable_ops),
+ "bla $func", BrB, [(PPCcall_ELF (i64 imm:$func))]>;
+ }
+ let Uses = [CTR8, RM] in {
+ def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
+ (outs), (ins variable_ops),
+ "bctrl", BrB,
+ [(PPCbctrl_ELF)]>, Requires<[In64BitMode]>;
+ }
+}
+
+
+// Calls
+def : Pat<(PPCcall_Macho (i64 tglobaladdr:$dst)),
+ (BL8_Macho tglobaladdr:$dst)>;
+def : Pat<(PPCcall_Macho (i64 texternalsym:$dst)),
+ (BL8_Macho texternalsym:$dst)>;
+
+def : Pat<(PPCcall_ELF (i64 tglobaladdr:$dst)),
+ (BL8_ELF tglobaladdr:$dst)>;
+def : Pat<(PPCcall_ELF (i64 texternalsym:$dst)),
+ (BL8_ELF texternalsym:$dst)>;
+
+// Atomic operations
+let usesCustomDAGSchedInserter = 1 in {
+ let Uses = [CR0] in {
+ def ATOMIC_LOAD_ADD_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
+ "${:comment} ATOMIC_LOAD_ADD_I64 PSEUDO!",
+ [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>;
+ def ATOMIC_LOAD_SUB_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
+ "${:comment} ATOMIC_LOAD_SUB_I64 PSEUDO!",
+ [(set G8RC:$dst, (atomic_load_sub_64 xoaddr:$ptr, G8RC:$incr))]>;
+ def ATOMIC_LOAD_OR_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
+ "${:comment} ATOMIC_LOAD_OR_I64 PSEUDO!",
+ [(set G8RC:$dst, (atomic_load_or_64 xoaddr:$ptr, G8RC:$incr))]>;
+ def ATOMIC_LOAD_XOR_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
+ "${:comment} ATOMIC_LOAD_XOR_I64 PSEUDO!",
+ [(set G8RC:$dst, (atomic_load_xor_64 xoaddr:$ptr, G8RC:$incr))]>;
+ def ATOMIC_LOAD_AND_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
+ "${:comment} ATOMIC_LOAD_AND_I64 PSEUDO!",
+ [(set G8RC:$dst, (atomic_load_and_64 xoaddr:$ptr, G8RC:$incr))]>;
+ def ATOMIC_LOAD_NAND_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
+ "${:comment} ATOMIC_LOAD_NAND_I64 PSEUDO!",
+ [(set G8RC:$dst, (atomic_load_nand_64 xoaddr:$ptr, G8RC:$incr))]>;
+
+ def ATOMIC_CMP_SWAP_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new),
+ "${:comment} ATOMIC_CMP_SWAP_I64 PSEUDO!",
+ [(set G8RC:$dst,
+ (atomic_cmp_swap_64 xoaddr:$ptr, G8RC:$old, G8RC:$new))]>;
+
+ def ATOMIC_SWAP_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new),
+ "${:comment} ATOMIC_SWAP_I64 PSEUDO!",
+ [(set G8RC:$dst, (atomic_swap_64 xoaddr:$ptr, G8RC:$new))]>;
+ }
+}
+
+// Instructions to support atomic operations
+def LDARX : XForm_1<31, 84, (outs G8RC:$rD), (ins memrr:$ptr),
+ "ldarx $rD, $ptr", LdStLDARX,
+ [(set G8RC:$rD, (PPClarx xoaddr:$ptr))]>;
+
+let Defs = [CR0] in
+def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stdcx. $rS, $dst", LdStSTDCX,
+ [(PPCstcx G8RC:$rS, xoaddr:$dst)]>,
+ isDOT;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNdi8 :Pseudo< (outs),
+ (ins calltarget:$dst, i32imm:$offset, variable_ops),
+ "#TC_RETURNd8 $dst $offset",
+ []>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNai8 :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops),
+ "#TC_RETURNa8 $func $offset",
+ [(PPCtc_return (i64 imm:$func), imm:$offset)]>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset, variable_ops),
+ "#TC_RETURNr8 $dst $offset",
+ []>;
+
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
+ isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in
+def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+ Requires<[In64BitMode]>;
+
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILB8 : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
+ "b $dst", BrB,
+ []>;
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILBA8 : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
+ "ba $dst", BrB,
+ []>;
+
+def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm),
+ (TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
+ (TCRETURNdi8 texternalsym:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
+ (TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit SPR manipulation instrs.
+
+let Uses = [CTR8] in {
+def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs G8RC:$rT), (ins),
+ "mfctr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Pattern = [(PPCmtctr G8RC:$rS)], Defs = [CTR8] in {
+def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS),
+ "mtctr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+let Defs = [X1], Uses = [X1] in
+def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),
+ "${:comment} DYNALLOC8 $result, $negsize, $fpsi",
+ [(set G8RC:$result,
+ (PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>;
+
+let Defs = [LR8] in {
+def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins G8RC:$rS),
+ "mtlr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Uses = [LR8] in {
+def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins),
+ "mflr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+//===----------------------------------------------------------------------===//
+// Fixed point instructions.
+//
+
+let PPC970_Unit = 1 in { // FXU Operations.
+
+// Copies, extends, truncates.
+def OR4To8 : XForm_6<31, 444, (outs G8RC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "or $rA, $rS, $rB", IntGeneral,
+ []>;
+def OR8To4 : XForm_6<31, 444, (outs GPRC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "or $rA, $rS, $rB", IntGeneral,
+ []>;
+
+def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm),
+ "li $rD, $imm", IntGeneral,
+ [(set G8RC:$rD, immSExt16:$imm)]>;
+def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm),
+ "lis $rD, $imm", IntGeneral,
+ [(set G8RC:$rD, imm16ShiftedSExt:$imm)]>;
+
+// Logical ops.
+def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "nand $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (not (and G8RC:$rS, G8RC:$rB)))]>;
+def AND8 : XForm_6<31, 28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "and $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (and G8RC:$rS, G8RC:$rB))]>;
+def ANDC8: XForm_6<31, 60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "andc $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (and G8RC:$rS, (not G8RC:$rB)))]>;
+def OR8 : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "or $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (or G8RC:$rS, G8RC:$rB))]>;
+def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "nor $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (not (or G8RC:$rS, G8RC:$rB)))]>;
+def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "orc $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (or G8RC:$rS, (not G8RC:$rB)))]>;
+def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "eqv $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (not (xor G8RC:$rS, G8RC:$rB)))]>;
+def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "xor $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (xor G8RC:$rS, G8RC:$rB))]>;
+
+// Logical ops with immediate.
+def ANDIo8 : DForm_4<28, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "andi. $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (and G8RC:$src1, immZExt16:$src2))]>,
+ isDOT;
+def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "andis. $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (and G8RC:$src1,imm16ShiftedZExt:$src2))]>,
+ isDOT;
+def ORI8 : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "ori $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (or G8RC:$src1, immZExt16:$src2))]>;
+def ORIS8 : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "oris $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (or G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+def XORI8 : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "xori $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (xor G8RC:$src1, immZExt16:$src2))]>;
+def XORIS8 : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "xoris $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (xor G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+
+def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "add $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>;
+
+def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "addc $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (addc G8RC:$rA, G8RC:$rB))]>,
+ PPC970_DGroup_Cracked;
+def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "adde $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>;
+
+def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+ "addi $rD, $rA, $imm", IntGeneral,
+ [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
+def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm),
+ "addis $rD, $rA, $imm", IntGeneral,
+ [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>;
+
+def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+ "subfic $rD, $rA, $imm", IntGeneral,
+ [(set G8RC:$rD, (subc immSExt16:$imm, G8RC:$rA))]>;
+def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "subf $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>;
+def SUBFC8 : XOForm_1<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "subfc $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (subc G8RC:$rB, G8RC:$rA))]>,
+ PPC970_DGroup_Cracked;
+
+def SUBFE8 : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "subfe $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>;
+def ADDME8 : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "addme $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (adde G8RC:$rA, immAllOnes))]>;
+def ADDZE8 : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "addze $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (adde G8RC:$rA, 0))]>;
+def NEG8 : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "neg $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (ineg G8RC:$rA))]>;
+def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "subfme $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (sube immAllOnes, G8RC:$rA))]>;
+def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "subfze $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (sube 0, G8RC:$rA))]>;
+
+
+
+def MULHD : XOForm_1<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "mulhd $rT, $rA, $rB", IntMulHW,
+ [(set G8RC:$rT, (mulhs G8RC:$rA, G8RC:$rB))]>;
+def MULHDU : XOForm_1<31, 9, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "mulhdu $rT, $rA, $rB", IntMulHWU,
+ [(set G8RC:$rT, (mulhu G8RC:$rA, G8RC:$rB))]>;
+
+def CMPD : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
+ "cmpd $crD, $rA, $rB", IntCompare>, isPPC64;
+def CMPLD : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
+ "cmpld $crD, $rA, $rB", IntCompare>, isPPC64;
+def CMPDI : DForm_5_ext<11, (outs CRRC:$crD), (ins G8RC:$rA, s16imm:$imm),
+ "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64;
+def CMPLDI : DForm_6_ext<10, (outs CRRC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64;
+
+def SLD : XForm_6<31, 27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+ "sld $rA, $rS, $rB", IntRotateD,
+ [(set G8RC:$rA, (PPCshl G8RC:$rS, GPRC:$rB))]>, isPPC64;
+def SRD : XForm_6<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+ "srd $rA, $rS, $rB", IntRotateD,
+ [(set G8RC:$rA, (PPCsrl G8RC:$rS, GPRC:$rB))]>, isPPC64;
+def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+ "srad $rA, $rS, $rB", IntRotateD,
+ [(set G8RC:$rA, (PPCsra G8RC:$rS, GPRC:$rB))]>, isPPC64;
+
+def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS),
+ "extsb $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (sext_inreg G8RC:$rS, i8))]>;
+def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS),
+ "extsh $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (sext_inreg G8RC:$rS, i16))]>;
+
+def EXTSW : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS),
+ "extsw $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (sext_inreg G8RC:$rS, i32))]>, isPPC64;
+/// EXTSW_32 - Just like EXTSW, but works on '32-bit' registers.
+def EXTSW_32 : XForm_11<31, 986, (outs GPRC:$rA), (ins GPRC:$rS),
+ "extsw $rA, $rS", IntGeneral,
+ [(set GPRC:$rA, (PPCextsw_32 GPRC:$rS))]>, isPPC64;
+def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS),
+ "extsw $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (sext GPRC:$rS))]>, isPPC64;
+
+def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH),
+ "sradi $rA, $rS, $SH", IntRotateD,
+ [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64;
+def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS),
+ "cntlzd $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (ctlz G8RC:$rS))]>;
+
+def DIVD : XOForm_1<31, 489, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "divd $rT, $rA, $rB", IntDivD,
+ [(set G8RC:$rT, (sdiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def DIVDU : XOForm_1<31, 457, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "divdu $rT, $rA, $rB", IntDivD,
+ [(set G8RC:$rT, (udiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "mulld $rT, $rA, $rB", IntMulHD,
+ [(set G8RC:$rT, (mul G8RC:$rA, G8RC:$rB))]>, isPPC64;
+
+
+let isCommutable = 1 in {
+def RLDIMI : MDForm_1<30, 3,
+ (outs G8RC:$rA), (ins G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB),
+ "rldimi $rA, $rS, $SH, $MB", IntRotateD,
+ []>, isPPC64, RegConstraint<"$rSi = $rA">,
+ NoEncode<"$rSi">;
+}
+
+// Rotate instructions.
+def RLDCL : MDForm_1<30, 0,
+ (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MB),
+ "rldcl $rA, $rS, $rB, $MB", IntRotateD,
+ []>, isPPC64;
+def RLDICL : MDForm_1<30, 0,
+ (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MB),
+ "rldicl $rA, $rS, $SH, $MB", IntRotateD,
+ []>, isPPC64;
+def RLDICR : MDForm_1<30, 1,
+ (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME),
+ "rldicr $rA, $rS, $SH, $ME", IntRotateD,
+ []>, isPPC64;
+} // End FXU Operations.
+
+
+//===----------------------------------------------------------------------===//
+// Load/Store instructions.
+//
+
+
+// Sign extending loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src),
+ "lha $rD, $src", LdStLHA,
+ [(set G8RC:$rD, (sextloadi16 iaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LWA : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src),
+ "lwa $rD, $src", LdStLWA,
+ [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64,
+ PPC970_DGroup_Cracked;
+def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src),
+ "lhax $rD, $src", LdStLHA,
+ [(set G8RC:$rD, (sextloadi16 xaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src),
+ "lwax $rD, $src", LdStLHA,
+ [(set G8RC:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
+ PPC970_DGroup_Cracked;
+
+// Update forms.
+let mayLoad = 1 in
+def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp,
+ ptr_rc:$rA),
+ "lhau $rD, $disp($rA)", LdStGeneral,
+ []>, RegConstraint<"$rA = $ea_result">,
+ NoEncode<"$ea_result">;
+// NO LWAU!
+
+}
+
+// Zero extending loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src),
+ "lbz $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi8 iaddr:$src))]>;
+def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src),
+ "lhz $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi16 iaddr:$src))]>;
+def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src),
+ "lwz $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
+
+def LBZX8 : XForm_1<31, 87, (outs G8RC:$rD), (ins memrr:$src),
+ "lbzx $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi8 xaddr:$src))]>;
+def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src),
+ "lhzx $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi16 xaddr:$src))]>;
+def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src),
+ "lwzx $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi32 xaddr:$src))]>;
+
+
+// Update forms.
+let mayLoad = 1 in {
+def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lbzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lhzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lwzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+}
+}
+
+
+// Full 8-byte loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
+ "ld $rD, $src", LdStLD,
+ [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64;
+def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src),
+ "ldx $rD, $src", LdStLD,
+ [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64;
+
+let mayLoad = 1 in
+def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr),
+ "ldu $rD, $addr", LdStLD,
+ []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
+ NoEncode<"$ea_result">;
+
+}
+
+let PPC970_Unit = 2 in {
+// Truncating stores.
+def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src),
+ "stb $rS, $src", LdStGeneral,
+ [(truncstorei8 G8RC:$rS, iaddr:$src)]>;
+def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src),
+ "sth $rS, $src", LdStGeneral,
+ [(truncstorei16 G8RC:$rS, iaddr:$src)]>;
+def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src),
+ "stw $rS, $src", LdStGeneral,
+ [(truncstorei32 G8RC:$rS, iaddr:$src)]>;
+def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stbx $rS, $dst", LdStGeneral,
+ [(truncstorei8 G8RC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst),
+ "sthx $rS, $dst", LdStGeneral,
+ [(truncstorei16 G8RC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stwx $rS, $dst", LdStGeneral,
+ [(truncstorei32 G8RC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+// Normal 8-byte stores.
+def STD : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst),
+ "std $rS, $dst", LdStSTD,
+ [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64;
+def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stdx $rS, $dst", LdStSTD,
+ [(store G8RC:$rS, xaddr:$dst)]>, isPPC64,
+ PPC970_DGroup_Cracked;
+}
+
+let PPC970_Unit = 2 in {
+
+def STBU8 : DForm_1<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stbu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STHU8 : DForm_1<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "sthu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STWU8 : DForm_1<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stwu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+
+
+def STDU : DSForm_1<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+ s16immX4:$ptroff, ptr_rc:$ptrreg),
+ "stdu $rS, $ptroff($ptrreg)", LdStSTD,
+ [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ isPPC64;
+
+let mayStore = 1 in
+def STDUX : XForm_8<31, 181, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stdux $rS, $dst", LdStSTD,
+ []>, isPPC64;
+
+// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register.
+def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst),
+ "std $rT, $dst", LdStSTD,
+ [(PPCstd_32 GPRC:$rT, ixaddr:$dst)]>, isPPC64;
+def STDX_32 : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst),
+ "stdx $rT, $dst", LdStSTD,
+ [(PPCstd_32 GPRC:$rT, xaddr:$dst)]>, isPPC64,
+ PPC970_DGroup_Cracked;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Floating point instructions.
+//
+
+
+let PPC970_Unit = 3, Uses = [RM] in { // FPU Operations.
+def FCFID : XForm_26<63, 846, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fcfid $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (PPCfcfid F8RC:$frB))]>, isPPC64;
+def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fctidz $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (PPCfctidz F8RC:$frB))]>, isPPC64;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Patterns
+//
+
+// Extensions and truncates to/from 32-bit regs.
+def : Pat<(i64 (zext GPRC:$in)),
+ (RLDICL (OR4To8 GPRC:$in, GPRC:$in), 0, 32)>;
+def : Pat<(i64 (anyext GPRC:$in)),
+ (OR4To8 GPRC:$in, GPRC:$in)>;
+def : Pat<(i32 (trunc G8RC:$in)),
+ (OR8To4 G8RC:$in, G8RC:$in)>;
+
+// Extending loads with i64 targets.
+def : Pat<(zextloadi1 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(zextloadi1 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi1 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(extloadi1 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi8 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(extloadi8 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi16 iaddr:$src),
+ (LHZ8 iaddr:$src)>;
+def : Pat<(extloadi16 xaddr:$src),
+ (LHZX8 xaddr:$src)>;
+def : Pat<(extloadi32 iaddr:$src),
+ (LWZ8 iaddr:$src)>;
+def : Pat<(extloadi32 xaddr:$src),
+ (LWZX8 xaddr:$src)>;
+
+// Standard shifts. These are represented separately from the real shifts above
+// so that we can distinguish between shifts that allow 6-bit and 7-bit shift
+// amounts.
+def : Pat<(sra G8RC:$rS, GPRC:$rB),
+ (SRAD G8RC:$rS, GPRC:$rB)>;
+def : Pat<(srl G8RC:$rS, GPRC:$rB),
+ (SRD G8RC:$rS, GPRC:$rB)>;
+def : Pat<(shl G8RC:$rS, GPRC:$rB),
+ (SLD G8RC:$rS, GPRC:$rB)>;
+
+// SHL/SRL
+def : Pat<(shl G8RC:$in, (i32 imm:$imm)),
+ (RLDICR G8RC:$in, imm:$imm, (SHL64 imm:$imm))>;
+def : Pat<(srl G8RC:$in, (i32 imm:$imm)),
+ (RLDICL G8RC:$in, (SRL64 imm:$imm), imm:$imm)>;
+
+// ROTL
+def : Pat<(rotl G8RC:$in, GPRC:$sh),
+ (RLDCL G8RC:$in, GPRC:$sh, 0)>;
+def : Pat<(rotl G8RC:$in, (i32 imm:$imm)),
+ (RLDICL G8RC:$in, imm:$imm, 0)>;
+
+// Hi and Lo for Darwin Global Addresses.
+def : Pat<(PPChi tglobaladdr:$in, 0), (LIS8 tglobaladdr:$in)>;
+def : Pat<(PPClo tglobaladdr:$in, 0), (LI8 tglobaladdr:$in)>;
+def : Pat<(PPChi tconstpool:$in , 0), (LIS8 tconstpool:$in)>;
+def : Pat<(PPClo tconstpool:$in , 0), (LI8 tconstpool:$in)>;
+def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>;
+def : Pat<(PPClo tjumptable:$in , 0), (LI8 tjumptable:$in)>;
+def : Pat<(add G8RC:$in, (PPChi tglobaladdr:$g, 0)),
+ (ADDIS8 G8RC:$in, tglobaladdr:$g)>;
+def : Pat<(add G8RC:$in, (PPChi tconstpool:$g, 0)),
+ (ADDIS8 G8RC:$in, tconstpool:$g)>;
+def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)),
+ (ADDIS8 G8RC:$in, tjumptable:$g)>;
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
new file mode 100644
index 0000000..9a5be79
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -0,0 +1,668 @@
+//===- PPCInstrAltivec.td - The PowerPC Altivec Extension --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Altivec extension to the PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Altivec transformation functions and pattern fragments.
+//
+
+
+def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
+}]>;
+def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
+}]>;
+def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
+}]>;
+def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
+}]>;
+
+
+def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
+}]>;
+def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
+}]>;
+def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
+}]>;
+def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
+}]>;
+def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
+}]>;
+def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
+}]>;
+
+
+def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
+}]>;
+def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
+}]>;
+def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
+}]>;
+def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
+}]>;
+def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
+}]>;
+def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
+}]>;
+
+
+def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, false));
+}]>;
+def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVSLDOIShuffleMask(N, false) != -1;
+}], VSLDOI_get_imm>;
+
+
+/// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into
+/// vector_shuffle(X,undef,mask) by the dag combiner.
+def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, true));
+}]>;
+def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVSLDOIShuffleMask(N, true) != -1;
+}], VSLDOI_unary_get_imm>;
+
+
+// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
+def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 1));
+}]>;
+def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1);
+}], VSPLTB_get_imm>;
+def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 2));
+}]>;
+def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2);
+}], VSPLTH_get_imm>;
+def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 4));
+}]>;
+def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 4);
+}], VSPLTW_get_imm>;
+
+
+// VSPLTISB_get_imm xform function: convert build_vector to VSPLTISB imm.
+def VSPLTISB_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 1, *CurDAG);
+}]>;
+def vecspltisb : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 1, *CurDAG).getNode() != 0;
+}], VSPLTISB_get_imm>;
+
+// VSPLTISH_get_imm xform function: convert build_vector to VSPLTISH imm.
+def VSPLTISH_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 2, *CurDAG);
+}]>;
+def vecspltish : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 2, *CurDAG).getNode() != 0;
+}], VSPLTISH_get_imm>;
+
+// VSPLTISW_get_imm xform function: convert build_vector to VSPLTISW imm.
+def VSPLTISW_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 4, *CurDAG);
+}]>;
+def vecspltisw : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != 0;
+}], VSPLTISW_get_imm>;
+
+def V_immneg0 : PatLeaf<(build_vector), [{
+ return PPC::isAllNegativeZeroVector(N);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Helpers for defining instructions that directly correspond to intrinsics.
+
+// VA1a_Int - A VAForm_1a intrinsic definition.
+class VA1a_Int<bits<6> xo, string opc, Intrinsic IntID>
+ : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+ [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB, VRRC:$vC))]>;
+
+// VX1_Int - A VXForm_1 intrinsic definition.
+class VX1_Int<bits<11> xo, string opc, Intrinsic IntID>
+ : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+ [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB))]>;
+
+// VX2_Int - A VXForm_2 intrinsic definition.
+class VX2_Int<bits<11> xo, string opc, Intrinsic IntID>
+ : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
+ !strconcat(opc, " $vD, $vB"), VecFP,
+ [(set VRRC:$vD, (IntID VRRC:$vB))]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions.
+
+def DSS : DSS_Form<822, (outs),
+ (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
+ "dss $STRM", LdStGeneral /*FIXME*/, []>;
+def DSSALL : DSS_Form<822, (outs),
+ (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2),
+ "dssall", LdStGeneral /*FIXME*/, []>;
+def DST : DSS_Form<342, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTT : DSS_Form<342, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTST : DSS_Form<374, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTSTT : DSS_Form<374, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+
+def DST64 : DSS_Form<342, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTT64 : DSS_Form<342, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTST64 : DSS_Form<374, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTSTT64 : DSS_Form<374, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+
+def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins),
+ "mfvscr $vD", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_mfvscr))]>;
+def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB),
+ "mtvscr $vB", LdStGeneral,
+ [(int_ppc_altivec_mtvscr VRRC:$vB)]>;
+
+let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads.
+def LVEBX: XForm_1<31, 7, (outs VRRC:$vD), (ins memrr:$src),
+ "lvebx $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
+def LVEHX: XForm_1<31, 39, (outs VRRC:$vD), (ins memrr:$src),
+ "lvehx $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
+def LVEWX: XForm_1<31, 71, (outs VRRC:$vD), (ins memrr:$src),
+ "lvewx $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
+def LVX : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src),
+ "lvx $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
+def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src),
+ "lvxl $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
+}
+
+def LVSL : XForm_1<31, 6, (outs VRRC:$vD), (ins memrr:$src),
+ "lvsl $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
+ PPC970_Unit_LSU;
+def LVSR : XForm_1<31, 38, (outs VRRC:$vD), (ins memrr:$src),
+ "lvsr $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
+ PPC970_Unit_LSU;
+
+let PPC970_Unit = 2 in { // Stores.
+def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvebx $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvebx VRRC:$rS, xoaddr:$dst)]>;
+def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvehx $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvehx VRRC:$rS, xoaddr:$dst)]>;
+def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvewx $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvewx VRRC:$rS, xoaddr:$dst)]>;
+def STVX : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvx $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvx VRRC:$rS, xoaddr:$dst)]>;
+def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvxl $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvxl VRRC:$rS, xoaddr:$dst)]>;
+}
+
+let PPC970_Unit = 5 in { // VALU Operations.
+// VA-Form instructions. 3-input AltiVec ops.
+def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
+ "vmaddfp $vD, $vA, $vC, $vB", VecFP,
+ [(set VRRC:$vD, (fadd (fmul VRRC:$vA, VRRC:$vC),
+ VRRC:$vB))]>,
+ Requires<[FPContractions]>;
+def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
+ "vnmsubfp $vD, $vA, $vC, $vB", VecFP,
+ [(set VRRC:$vD, (fsub V_immneg0,
+ (fsub (fmul VRRC:$vA, VRRC:$vC),
+ VRRC:$vB)))]>,
+ Requires<[FPContractions]>;
+
+def VMHADDSHS : VA1a_Int<32, "vmhaddshs", int_ppc_altivec_vmhaddshs>;
+def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>;
+def VMLADDUHM : VA1a_Int<34, "vmladduhm", int_ppc_altivec_vmladduhm>;
+def VPERM : VA1a_Int<43, "vperm", int_ppc_altivec_vperm>;
+def VSEL : VA1a_Int<42, "vsel", int_ppc_altivec_vsel>;
+
+// Shuffles.
+def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH),
+ "vsldoi $vD, $vA, $vB, $SH", VecFP,
+ [(set VRRC:$vD,
+ (vsldoi_shuffle:$SH (v16i8 VRRC:$vA), VRRC:$vB))]>;
+
+// VX-Form instructions. AltiVec arithmetic ops.
+def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vaddfp $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (fadd VRRC:$vA, VRRC:$vB))]>;
+
+def VADDUBM : VXForm_1<0, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vaddubm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (add (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VADDUHM : VXForm_1<64, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vadduhm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (add (v8i16 VRRC:$vA), VRRC:$vB))]>;
+def VADDUWM : VXForm_1<128, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vadduwm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (add (v4i32 VRRC:$vA), VRRC:$vB))]>;
+
+def VADDCUW : VX1_Int<384, "vaddcuw", int_ppc_altivec_vaddcuw>;
+def VADDSBS : VX1_Int<768, "vaddsbs", int_ppc_altivec_vaddsbs>;
+def VADDSHS : VX1_Int<832, "vaddshs", int_ppc_altivec_vaddshs>;
+def VADDSWS : VX1_Int<896, "vaddsws", int_ppc_altivec_vaddsws>;
+def VADDUBS : VX1_Int<512, "vaddubs", int_ppc_altivec_vaddubs>;
+def VADDUHS : VX1_Int<576, "vadduhs", int_ppc_altivec_vadduhs>;
+def VADDUWS : VX1_Int<640, "vadduws", int_ppc_altivec_vadduws>;
+
+
+def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vand $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (and (v4i32 VRRC:$vA), VRRC:$vB))]>;
+def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vandc $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (and (v4i32 VRRC:$vA), (vnot VRRC:$vB)))]>;
+
+def VCFSX : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vcfsx $vD, $vB, $UIMM", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vcfsx VRRC:$vB, imm:$UIMM))]>;
+def VCFUX : VXForm_1<778, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vcfux $vD, $vB, $UIMM", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vcfux VRRC:$vB, imm:$UIMM))]>;
+def VCTSXS : VXForm_1<970, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vctsxs $vD, $vB, $UIMM", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vctsxs VRRC:$vB, imm:$UIMM))]>;
+def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vctuxs $vD, $vB, $UIMM", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vctuxs VRRC:$vB, imm:$UIMM))]>;
+def VEXPTEFP : VX2_Int<394, "vexptefp", int_ppc_altivec_vexptefp>;
+def VLOGEFP : VX2_Int<458, "vlogefp", int_ppc_altivec_vlogefp>;
+
+def VAVGSB : VX1_Int<1282, "vavgsb", int_ppc_altivec_vavgsb>;
+def VAVGSH : VX1_Int<1346, "vavgsh", int_ppc_altivec_vavgsh>;
+def VAVGSW : VX1_Int<1410, "vavgsw", int_ppc_altivec_vavgsw>;
+def VAVGUB : VX1_Int<1026, "vavgub", int_ppc_altivec_vavgub>;
+def VAVGUH : VX1_Int<1090, "vavguh", int_ppc_altivec_vavguh>;
+def VAVGUW : VX1_Int<1154, "vavguw", int_ppc_altivec_vavguw>;
+
+def VMAXFP : VX1_Int<1034, "vmaxfp", int_ppc_altivec_vmaxfp>;
+def VMAXSB : VX1_Int< 258, "vmaxsb", int_ppc_altivec_vmaxsb>;
+def VMAXSH : VX1_Int< 322, "vmaxsh", int_ppc_altivec_vmaxsh>;
+def VMAXSW : VX1_Int< 386, "vmaxsw", int_ppc_altivec_vmaxsw>;
+def VMAXUB : VX1_Int< 2, "vmaxub", int_ppc_altivec_vmaxub>;
+def VMAXUH : VX1_Int< 66, "vmaxuh", int_ppc_altivec_vmaxuh>;
+def VMAXUW : VX1_Int< 130, "vmaxuw", int_ppc_altivec_vmaxuw>;
+def VMINFP : VX1_Int<1098, "vminfp", int_ppc_altivec_vminfp>;
+def VMINSB : VX1_Int< 770, "vminsb", int_ppc_altivec_vminsb>;
+def VMINSH : VX1_Int< 834, "vminsh", int_ppc_altivec_vminsh>;
+def VMINSW : VX1_Int< 898, "vminsw", int_ppc_altivec_vminsw>;
+def VMINUB : VX1_Int< 514, "vminub", int_ppc_altivec_vminub>;
+def VMINUH : VX1_Int< 578, "vminuh", int_ppc_altivec_vminuh>;
+def VMINUW : VX1_Int< 642, "vminuw", int_ppc_altivec_vminuw>;
+
+def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrghb $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrghb_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrghh $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrghh_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrghw $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrghw_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrglb $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrglb_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrglh $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrglh_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrglw $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrglw_shuffle VRRC:$vA, VRRC:$vB))]>;
+
+def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>;
+def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>;
+def VMSUMSHS : VA1a_Int<41, "vmsumshs", int_ppc_altivec_vmsumshs>;
+def VMSUMUBM : VA1a_Int<36, "vmsumubm", int_ppc_altivec_vmsumubm>;
+def VMSUMUHM : VA1a_Int<38, "vmsumuhm", int_ppc_altivec_vmsumuhm>;
+def VMSUMUHS : VA1a_Int<39, "vmsumuhs", int_ppc_altivec_vmsumuhs>;
+
+def VMULESB : VX1_Int<776, "vmulesb", int_ppc_altivec_vmulesb>;
+def VMULESH : VX1_Int<840, "vmulesh", int_ppc_altivec_vmulesh>;
+def VMULEUB : VX1_Int<520, "vmuleub", int_ppc_altivec_vmuleub>;
+def VMULEUH : VX1_Int<584, "vmuleuh", int_ppc_altivec_vmuleuh>;
+def VMULOSB : VX1_Int<264, "vmulosb", int_ppc_altivec_vmulosb>;
+def VMULOSH : VX1_Int<328, "vmulosh", int_ppc_altivec_vmulosh>;
+def VMULOUB : VX1_Int< 8, "vmuloub", int_ppc_altivec_vmuloub>;
+def VMULOUH : VX1_Int< 72, "vmulouh", int_ppc_altivec_vmulouh>;
+
+def VREFP : VX2_Int<266, "vrefp", int_ppc_altivec_vrefp>;
+def VRFIM : VX2_Int<714, "vrfim", int_ppc_altivec_vrfim>;
+def VRFIN : VX2_Int<522, "vrfin", int_ppc_altivec_vrfin>;
+def VRFIP : VX2_Int<650, "vrfip", int_ppc_altivec_vrfip>;
+def VRFIZ : VX2_Int<586, "vrfiz", int_ppc_altivec_vrfiz>;
+def VRSQRTEFP : VX2_Int<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
+
+def VSUBCUW : VX1_Int<74, "vsubcuw", int_ppc_altivec_vsubcuw>;
+
+def VSUBFP : VXForm_1<74, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vsubfp $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (fsub VRRC:$vA, VRRC:$vB))]>;
+def VSUBUBM : VXForm_1<1024, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vsububm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (sub (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VSUBUHM : VXForm_1<1088, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vsubuhm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (sub (v8i16 VRRC:$vA), VRRC:$vB))]>;
+def VSUBUWM : VXForm_1<1152, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vsubuwm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (sub (v4i32 VRRC:$vA), VRRC:$vB))]>;
+
+def VSUBSBS : VX1_Int<1792, "vsubsbs" , int_ppc_altivec_vsubsbs>;
+def VSUBSHS : VX1_Int<1856, "vsubshs" , int_ppc_altivec_vsubshs>;
+def VSUBSWS : VX1_Int<1920, "vsubsws" , int_ppc_altivec_vsubsws>;
+def VSUBUBS : VX1_Int<1536, "vsububs" , int_ppc_altivec_vsububs>;
+def VSUBUHS : VX1_Int<1600, "vsubuhs" , int_ppc_altivec_vsubuhs>;
+def VSUBUWS : VX1_Int<1664, "vsubuws" , int_ppc_altivec_vsubuws>;
+def VSUMSWS : VX1_Int<1928, "vsumsws" , int_ppc_altivec_vsumsws>;
+def VSUM2SWS: VX1_Int<1672, "vsum2sws", int_ppc_altivec_vsum2sws>;
+def VSUM4SBS: VX1_Int<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs>;
+def VSUM4SHS: VX1_Int<1608, "vsum4shs", int_ppc_altivec_vsum4shs>;
+def VSUM4UBS: VX1_Int<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs>;
+
+def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vnor $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vnot (or (v4i32 VRRC:$vA), VRRC:$vB)))]>;
+def VOR : VXForm_1<1156, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vor $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (or (v4i32 VRRC:$vA), VRRC:$vB))]>;
+def VXOR : VXForm_1<1220, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vxor $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (xor (v4i32 VRRC:$vA), VRRC:$vB))]>;
+
+def VRLB : VX1_Int< 4, "vrlb", int_ppc_altivec_vrlb>;
+def VRLH : VX1_Int< 68, "vrlh", int_ppc_altivec_vrlh>;
+def VRLW : VX1_Int< 132, "vrlw", int_ppc_altivec_vrlw>;
+
+def VSL : VX1_Int< 452, "vsl" , int_ppc_altivec_vsl >;
+def VSLO : VX1_Int<1036, "vslo", int_ppc_altivec_vslo>;
+def VSLB : VX1_Int< 260, "vslb", int_ppc_altivec_vslb>;
+def VSLH : VX1_Int< 324, "vslh", int_ppc_altivec_vslh>;
+def VSLW : VX1_Int< 388, "vslw", int_ppc_altivec_vslw>;
+
+def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vspltb $vD, $vB, $UIMM", VecPerm,
+ [(set VRRC:$vD,
+ (vspltb_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vsplth $vD, $vB, $UIMM", VecPerm,
+ [(set VRRC:$vD,
+ (vsplth_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vspltw $vD, $vB, $UIMM", VecPerm,
+ [(set VRRC:$vD,
+ (vspltw_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+
+def VSR : VX1_Int< 708, "vsr" , int_ppc_altivec_vsr>;
+def VSRO : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>;
+def VSRAB : VX1_Int< 772, "vsrab", int_ppc_altivec_vsrab>;
+def VSRAH : VX1_Int< 836, "vsrah", int_ppc_altivec_vsrah>;
+def VSRAW : VX1_Int< 900, "vsraw", int_ppc_altivec_vsraw>;
+def VSRB : VX1_Int< 516, "vsrb" , int_ppc_altivec_vsrb>;
+def VSRH : VX1_Int< 580, "vsrh" , int_ppc_altivec_vsrh>;
+def VSRW : VX1_Int< 644, "vsrw" , int_ppc_altivec_vsrw>;
+
+
+def VSPLTISB : VXForm_3<780, (outs VRRC:$vD), (ins s5imm:$SIMM),
+ "vspltisb $vD, $SIMM", VecPerm,
+ [(set VRRC:$vD, (v16i8 vecspltisb:$SIMM))]>;
+def VSPLTISH : VXForm_3<844, (outs VRRC:$vD), (ins s5imm:$SIMM),
+ "vspltish $vD, $SIMM", VecPerm,
+ [(set VRRC:$vD, (v8i16 vecspltish:$SIMM))]>;
+def VSPLTISW : VXForm_3<908, (outs VRRC:$vD), (ins s5imm:$SIMM),
+ "vspltisw $vD, $SIMM", VecPerm,
+ [(set VRRC:$vD, (v4i32 vecspltisw:$SIMM))]>;
+
+// Vector Pack.
+def VPKPX : VX1_Int<782, "vpkpx", int_ppc_altivec_vpkpx>;
+def VPKSHSS : VX1_Int<398, "vpkshss", int_ppc_altivec_vpkshss>;
+def VPKSHUS : VX1_Int<270, "vpkshus", int_ppc_altivec_vpkshus>;
+def VPKSWSS : VX1_Int<462, "vpkswss", int_ppc_altivec_vpkswss>;
+def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>;
+def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vpkuhum $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD,
+ (vpkuhum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>;
+def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vpkuwum $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD,
+ (vpkuwum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>;
+
+// Vector Unpack.
+def VUPKHPX : VX2_Int<846, "vupkhpx", int_ppc_altivec_vupkhpx>;
+def VUPKHSB : VX2_Int<526, "vupkhsb", int_ppc_altivec_vupkhsb>;
+def VUPKHSH : VX2_Int<590, "vupkhsh", int_ppc_altivec_vupkhsh>;
+def VUPKLPX : VX2_Int<974, "vupklpx", int_ppc_altivec_vupklpx>;
+def VUPKLSB : VX2_Int<654, "vupklsb", int_ppc_altivec_vupklsb>;
+def VUPKLSH : VX2_Int<718, "vupklsh", int_ppc_altivec_vupklsh>;
+
+
+// Altivec Comparisons.
+
+class VCMP<bits<10> xo, string asmstr, ValueType Ty>
+ : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
+ [(set VRRC:$vD, (Ty (PPCvcmp VRRC:$vA, VRRC:$vB, xo)))]>;
+class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
+ : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
+ [(set VRRC:$vD, (Ty (PPCvcmp_o VRRC:$vA, VRRC:$vB, xo)))]> {
+ let Defs = [CR6];
+ let RC = 1;
+}
+
+// f32 element comparisons.0
+def VCMPBFP : VCMP <966, "vcmpbfp $vD, $vA, $vB" , v4f32>;
+def VCMPBFPo : VCMPo<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>;
+def VCMPEQFP : VCMP <198, "vcmpeqfp $vD, $vA, $vB" , v4f32>;
+def VCMPEQFPo : VCMPo<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>;
+def VCMPGEFP : VCMP <454, "vcmpgefp $vD, $vA, $vB" , v4f32>;
+def VCMPGEFPo : VCMPo<454, "vcmpgefp. $vD, $vA, $vB", v4f32>;
+def VCMPGTFP : VCMP <710, "vcmpgtfp $vD, $vA, $vB" , v4f32>;
+def VCMPGTFPo : VCMPo<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>;
+
+// i8 element comparisons.
+def VCMPEQUB : VCMP < 6, "vcmpequb $vD, $vA, $vB" , v16i8>;
+def VCMPEQUBo : VCMPo< 6, "vcmpequb. $vD, $vA, $vB", v16i8>;
+def VCMPGTSB : VCMP <774, "vcmpgtsb $vD, $vA, $vB" , v16i8>;
+def VCMPGTSBo : VCMPo<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>;
+def VCMPGTUB : VCMP <518, "vcmpgtub $vD, $vA, $vB" , v16i8>;
+def VCMPGTUBo : VCMPo<518, "vcmpgtub. $vD, $vA, $vB", v16i8>;
+
+// i16 element comparisons.
+def VCMPEQUH : VCMP < 70, "vcmpequh $vD, $vA, $vB" , v8i16>;
+def VCMPEQUHo : VCMPo< 70, "vcmpequh. $vD, $vA, $vB", v8i16>;
+def VCMPGTSH : VCMP <838, "vcmpgtsh $vD, $vA, $vB" , v8i16>;
+def VCMPGTSHo : VCMPo<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>;
+def VCMPGTUH : VCMP <582, "vcmpgtuh $vD, $vA, $vB" , v8i16>;
+def VCMPGTUHo : VCMPo<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>;
+
+// i32 element comparisons.
+def VCMPEQUW : VCMP <134, "vcmpequw $vD, $vA, $vB" , v4i32>;
+def VCMPEQUWo : VCMPo<134, "vcmpequw. $vD, $vA, $vB", v4i32>;
+def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>;
+def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
+def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
+def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
+
+def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins),
+ "vxor $vD, $vD, $vD", VecFP,
+ [(set VRRC:$vD, (v4i32 immAllZerosV))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Additional Altivec Patterns
+//
+
+// DS* intrinsics
+def : Pat<(int_ppc_altivec_dssall), (DSSALL 1, 0, 0, 0)>;
+def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>;
+
+// * 32-bit
+def : Pat<(int_ppc_altivec_dst GPRC:$rA, GPRC:$rB, imm:$STRM),
+ (DST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstt GPRC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstst GPRC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dststt GPRC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+
+// * 64-bit
+def : Pat<(int_ppc_altivec_dst G8RC:$rA, GPRC:$rB, imm:$STRM),
+ (DST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstt G8RC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstst G8RC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dststt G8RC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+
+// Loads.
+def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
+
+// Stores.
+def : Pat<(store (v4i32 VRRC:$rS), xoaddr:$dst),
+ (STVX (v4i32 VRRC:$rS), xoaddr:$dst)>;
+
+// Bit conversions.
+def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 VRRC:$src))), (v16i8 VRRC:$src)>;
+
+def : Pat<(v8i16 (bitconvert (v16i8 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 VRRC:$src))), (v8i16 VRRC:$src)>;
+
+def : Pat<(v4i32 (bitconvert (v16i8 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 VRRC:$src))), (v4i32 VRRC:$src)>;
+
+def : Pat<(v4f32 (bitconvert (v16i8 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
+
+// Shuffles.
+
+// Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x)
+def:Pat<(vsldoi_unary_shuffle:$in (v16i8 VRRC:$vA), undef),
+ (VSLDOI VRRC:$vA, VRRC:$vA, (VSLDOI_unary_get_imm VRRC:$in))>;
+def:Pat<(vpkuwum_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VPKUWUM VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vpkuhum_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VPKUHUM VRRC:$vA, VRRC:$vA)>;
+
+// Match vmrg*(x,x)
+def:Pat<(vmrglb_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGLB VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrglh_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGLH VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrglw_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGLW VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrghb_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGHB VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrghh_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGHH VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGHW VRRC:$vA, VRRC:$vA)>;
+
+// Logical Operations
+def : Pat<(v4i32 (vnot VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>;
+def : Pat<(v4i32 (vnot_conv VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>;
+
+def : Pat<(v4i32 (vnot_conv (or VRRC:$A, VRRC:$B))),
+ (VNOR VRRC:$A, VRRC:$B)>;
+def : Pat<(v4i32 (and VRRC:$A, (vnot_conv VRRC:$B))),
+ (VANDC VRRC:$A, VRRC:$B)>;
+
+def : Pat<(fmul VRRC:$vA, VRRC:$vB),
+ (VMADDFP VRRC:$vA, VRRC:$vB, (v4i32 (V_SET0)))>;
+
+// Fused multiply add and multiply sub for packed float. These are represented
+// separately from the real instructions above, for operations that must have
+// the additional precision, such as Newton-Rhapson (used by divide, sqrt)
+def : Pat<(PPCvmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
+ (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(PPCvnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
+ (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+
+def : Pat<(int_ppc_altivec_vmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
+ (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(int_ppc_altivec_vnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
+ (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+
+def : Pat<(PPCvperm (v16i8 VRRC:$vA), VRRC:$vB, VRRC:$vC),
+ (VPERM VRRC:$vA, VRRC:$vB, VRRC:$vC)>;
diff --git a/lib/Target/PowerPC/PPCInstrBuilder.h b/lib/Target/PowerPC/PPCInstrBuilder.h
new file mode 100644
index 0000000..1de6911
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrBuilder.h
@@ -0,0 +1,43 @@
+//===-- PPCInstrBuilder.h - Aides for building PPC insts --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to simplify generating frame and constant pool
+// references.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate
+// Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_INSTRBUILDER_H
+#define POWERPC_INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+inline const MachineInstrBuilder&
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
+ bool mem = true) {
+ if (mem)
+ return MIB.addImm(Offset).addFrameIndex(FI);
+ else
+ return MIB.addFrameIndex(FI).addImm(Offset);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
new file mode 100644
index 0000000..54cebcd
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -0,0 +1,875 @@
+//===- PowerPCInstrFormats.td - PowerPC Instruction Formats --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// PowerPC instruction formats
+
+class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : Instruction {
+ field bits<32> Inst;
+
+ bit PPC64 = 0; // Default value, override with isPPC64
+
+ let Namespace = "PPC";
+ let Inst{0-5} = opcode;
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Itinerary = itin;
+
+ /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to
+ /// these must be reflected there! See comments there for what these are.
+ bits<1> PPC970_First = 0;
+ bits<1> PPC970_Single = 0;
+ bits<1> PPC970_Cracked = 0;
+ bits<3> PPC970_Unit = 0;
+}
+
+class PPC970_DGroup_First { bits<1> PPC970_First = 1; }
+class PPC970_DGroup_Single { bits<1> PPC970_Single = 1; }
+class PPC970_DGroup_Cracked { bits<1> PPC970_Cracked = 1; }
+class PPC970_MicroCode;
+
+class PPC970_Unit_Pseudo { bits<3> PPC970_Unit = 0; }
+class PPC970_Unit_FXU { bits<3> PPC970_Unit = 1; }
+class PPC970_Unit_LSU { bits<3> PPC970_Unit = 2; }
+class PPC970_Unit_FPU { bits<3> PPC970_Unit = 3; }
+class PPC970_Unit_CRU { bits<3> PPC970_Unit = 4; }
+class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; }
+class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; }
+class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; }
+
+
+// 1.7.1 I-Form
+class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+ bits<24> LI;
+
+ let Inst{6-29} = LI;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+// 1.7.2 B-Form
+class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr>
+ : I<opcode, OOL, IOL, asmstr, BrB> {
+ bits<7> BIBO; // 2 bits of BI and 5 bits of BO.
+ bits<3> CR;
+ bits<14> BD;
+
+ bits<5> BI;
+ let BI{0-1} = BIBO{5-6};
+ let BI{2-4} = CR{0-2};
+
+ let Inst{6-10} = BIBO{4-0};
+ let Inst{11-15} = BI;
+ let Inst{16-29} = BD;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+
+// 1.7.4 D-Form
+class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> B;
+ bits<16> C;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+class DForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<16> C;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern>;
+
+class DForm_2_r0<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<16> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = 0;
+ let Inst{16-31} = B;
+}
+
+class DForm_4<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> B;
+ bits<5> A;
+ bits<16> C;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : DForm_1<opcode, OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+ let B = 0;
+ let C = 0;
+}
+
+class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<1> L;
+ bits<5> RA;
+ bits<16> I;
+
+ let Inst{6-8} = BF;
+ let Inst{9} = 0;
+ let Inst{10} = L;
+ let Inst{11-15} = RA;
+ let Inst{16-31} = I;
+}
+
+class DForm_5_ext<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : DForm_5<opcode, OOL, IOL, asmstr, itin> {
+ let L = PPC64;
+}
+
+class DForm_6<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : DForm_5<opcode, OOL, IOL, asmstr, itin>;
+
+class DForm_6_ext<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : DForm_6<opcode, OOL, IOL, asmstr, itin> {
+ let L = PPC64;
+}
+
+
+// 1.7.5 DS-Form
+class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RST;
+ bits<14> DS;
+ bits<5> RA;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = RA;
+ let Inst{16-29} = DS;
+ let Inst{30-31} = xo;
+}
+
+// 1.7.6 X-Form
+class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RST;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+// This is the same as XForm_base_r3xo, but the first two operands are swapped
+// when code is emitted.
+class XForm_base_r3xo_swapped
+ <bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> RST;
+ bits<5> B;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+
+class XForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>;
+
+class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+}
+
+class XForm_8<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>;
+
+class XForm_10<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+}
+
+class XForm_11<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+ let B = 0;
+ let Pattern = pattern;
+}
+
+class XForm_16<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<1> L;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Inst{6-8} = BF;
+ let Inst{9} = 0;
+ let Inst{10} = L;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_16_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : XForm_16<opcode, xo, OOL, IOL, asmstr, itin> {
+ let L = PPC64;
+}
+
+class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<5> FRA;
+ bits<5> FRB;
+
+ let Inst{6-8} = BF;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_24<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+ let Inst{6-10} = 31;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_24_sync<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+ let Inst{6-10} = 0;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_25<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+}
+
+class XForm_26<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+}
+
+class XForm_28<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+}
+
+// This is used for MFFS, MTFSB0, MTFSB1. 42 is arbitrary; this series of
+// numbers presumably relates to some document, but I haven't found it.
+class XForm_42<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RST;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+class XForm_43<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let Pattern = pattern;
+ bits<5> FM;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = FM;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+// DCB_Form - Form X instruction, used for dcb* instructions.
+class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<31, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = immfield;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+
+// DSS_Form - Form X instruction, used for altivec dss* instructions.
+class DSS_Form<bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<31, OOL, IOL, asmstr, itin> {
+ bits<1> T;
+ bits<2> STRM;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6} = T;
+ let Inst{7-8} = 0;
+ let Inst{9-10} = STRM;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+// 1.7.7 XL-Form
+class XLForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> CRD;
+ bits<5> CRA;
+ bits<5> CRB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = CRD;
+ let Inst{11-15} = CRA;
+ let Inst{16-20} = CRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XLForm_1_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> CRD;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = CRD;
+ let Inst{11-15} = CRD;
+ let Inst{16-20} = CRD;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XLForm_2<bits<6> opcode, bits<10> xo, bit lk, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> BO;
+ bits<5> BI;
+ bits<2> BH;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = BO;
+ let Inst{11-15} = BI;
+ let Inst{16-18} = 0;
+ let Inst{19-20} = BH;
+ let Inst{21-30} = xo;
+ let Inst{31} = lk;
+}
+
+class XLForm_2_br<bits<6> opcode, bits<10> xo, bit lk,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> {
+ bits<7> BIBO; // 2 bits of BI and 5 bits of BO.
+ bits<3> CR;
+
+ let BO = BIBO{2-6};
+ let BI{0-1} = BIBO{0-1};
+ let BI{2-4} = CR;
+ let BH = 0;
+}
+
+
+class XLForm_2_ext<bits<6> opcode, bits<10> xo, bits<5> bo, bits<5> bi, bit lk,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> {
+ let BO = bo;
+ let BI = bi;
+ let BH = 0;
+}
+
+class XLForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<3> BFA;
+
+ let Inst{6-8} = BF;
+ let Inst{9-10} = 0;
+ let Inst{11-13} = BFA;
+ let Inst{14-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+// 1.7.8 XFX-Form
+class XFXForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<10> SPR;
+
+ let Inst{6-10} = RT;
+ let Inst{11} = SPR{4};
+ let Inst{12} = SPR{3};
+ let Inst{13} = SPR{2};
+ let Inst{14} = SPR{1};
+ let Inst{15} = SPR{0};
+ let Inst{16} = SPR{9};
+ let Inst{17} = SPR{8};
+ let Inst{18} = SPR{7};
+ let Inst{19} = SPR{6};
+ let Inst{20} = SPR{5};
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_1_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : XFXForm_1<opcode, xo, OOL, IOL, asmstr, itin> {
+ let SPR = spr;
+}
+
+class XFXForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+
+ let Inst{6-10} = RT;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<8> FXM;
+ bits<5> ST;
+
+ let Inst{6-10} = ST;
+ let Inst{11} = 0;
+ let Inst{12-19} = FXM;
+ let Inst{20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_5a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> ST;
+ bits<8> FXM;
+
+ let Inst{6-10} = ST;
+ let Inst{11} = 1;
+ let Inst{12-19} = FXM;
+ let Inst{20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_7<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : XFXForm_1<opcode, xo, OOL, IOL, asmstr, itin>;
+
+class XFXForm_7_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : XFXForm_7<opcode, xo, OOL, IOL, asmstr, itin> {
+ let SPR = spr;
+}
+
+// XFL-Form - MTFSF
+// This is probably 1.7.9, but I don't have the reference that uses this
+// numbering scheme...
+class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ string cstr, InstrItinClass itin, list<dag>pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<8> FM;
+ bits<5> RT;
+
+ bit RC = 0; // set by isDOT
+ let Pattern = pattern;
+ let Constraints = cstr;
+
+ let Inst{6} = 0;
+ let Inst{7-14} = FM;
+ let Inst{15} = 0;
+ let Inst{16-20} = RT;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+// 1.7.10 XS-Form - SRADI.
+class XSForm_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> RS;
+ bits<6> SH;
+
+ bit RC = 0; // set by isDOT
+ let Pattern = pattern;
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = A;
+ let Inst{16-20} = SH{4,3,2,1,0};
+ let Inst{21-29} = xo;
+ let Inst{30} = SH{5};
+ let Inst{31} = RC;
+}
+
+// 1.7.11 XO-Form
+class XOForm_1<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21} = oe;
+ let Inst{22-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XOForm_3<bits<6> opcode, bits<9> xo, bit oe,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XOForm_1<opcode, xo, oe, OOL, IOL, asmstr, itin, pattern> {
+ let RB = 0;
+}
+
+// 1.7.12 A-Form
+class AForm_1<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> FRT;
+ bits<5> FRA;
+ bits<5> FRC;
+ bits<5> FRB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = FRT;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-25} = FRC;
+ let Inst{26-30} = xo;
+ let Inst{31} = RC;
+}
+
+class AForm_2<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRC = 0;
+}
+
+class AForm_3<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRB = 0;
+}
+
+// 1.7.13 M-Form
+class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RA;
+ bits<5> RS;
+ bits<5> RB;
+ bits<5> MB;
+ bits<5> ME;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-25} = MB;
+ let Inst{26-30} = ME;
+ let Inst{31} = RC;
+}
+
+class MForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : MForm_1<opcode, OOL, IOL, asmstr, itin, pattern> {
+}
+
+// 1.7.14 MD-Form
+class MDForm_1<bits<6> opcode, bits<3> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RA;
+ bits<5> RS;
+ bits<6> SH;
+ bits<6> MBE;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = SH{4,3,2,1,0};
+ let Inst{21-26} = MBE{4,3,2,1,0,5};
+ let Inst{27-29} = xo;
+ let Inst{30} = SH{5};
+ let Inst{31} = RC;
+}
+
+
+
+// E-1 VA-Form
+
+// VAForm_1 - DACB ordering.
+class VAForm_1<bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VC;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-25} = VC;
+ let Inst{26-31} = xo;
+}
+
+// VAForm_1a - DABC ordering.
+class VAForm_1a<bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bits<5> VC;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-25} = VC;
+ let Inst{26-31} = xo;
+}
+
+class VAForm_2<bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bits<4> SH;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21} = 0;
+ let Inst{22-25} = SH;
+ let Inst{26-31} = xo;
+}
+
+// E-2 VX-Form
+class VXForm_1<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+class VXForm_setzero<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : VXForm_1<xo, OOL, IOL, asmstr, itin, pattern> {
+ let VA = VD;
+ let VB = VD;
+}
+
+
+class VXForm_2<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+class VXForm_3<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> IMM;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = IMM;
+ let Inst{16-20} = 0;
+ let Inst{21-31} = xo;
+}
+
+/// VXForm_4 - VX instructions with "VD,0,0" register fields, like mfvscr.
+class VXForm_4<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-31} = xo;
+}
+
+/// VXForm_5 - VX instructions with "0,0,VB" register fields, like mtvscr.
+class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = 0;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+// E-4 VXR-Form
+class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bit RC = 0;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21} = RC;
+ let Inst{22-31} = xo;
+}
+
+//===----------------------------------------------------------------------===//
+class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+ : I<0, OOL, IOL, asmstr, NoItinerary> {
+ let PPC64 = 0;
+ let Pattern = pattern;
+ let Inst{31-0} = 0;
+}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
new file mode 100644
index 0000000..778f034
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -0,0 +1,818 @@
+//===- PPCInstrInfo.cpp - PowerPC32 Instruction Information -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCPredicates.h"
+#include "PPCGenInstrInfo.inc"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetAsmInfo.h"
+using namespace llvm;
+
+extern cl::opt<bool> EnablePPC32RS; // FIXME (64-bit): See PPCRegisterInfo.cpp.
+extern cl::opt<bool> EnablePPC64RS; // FIXME (64-bit): See PPCRegisterInfo.cpp.
+
+PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
+ : TargetInstrInfoImpl(PPCInsts, array_lengthof(PPCInsts)), TM(tm),
+ RI(*TM.getSubtargetImpl(), *this) {}
+
+bool PPCInstrInfo::isMoveInstr(const MachineInstr& MI,
+ unsigned& sourceReg,
+ unsigned& destReg,
+ unsigned& sourceSubIdx,
+ unsigned& destSubIdx) const {
+ sourceSubIdx = destSubIdx = 0; // No sub-registers.
+
+ unsigned oc = MI.getOpcode();
+ if (oc == PPC::OR || oc == PPC::OR8 || oc == PPC::VOR ||
+ oc == PPC::OR4To8 || oc == PPC::OR8To4) { // or r1, r2, r2
+ assert(MI.getNumOperands() >= 3 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ MI.getOperand(2).isReg() &&
+ "invalid PPC OR instruction!");
+ if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ } else if (oc == PPC::ADDI) { // addi r1, r2, 0
+ assert(MI.getNumOperands() >= 3 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(2).isImm() &&
+ "invalid PPC ADDI instruction!");
+ if (MI.getOperand(1).isReg() && MI.getOperand(2).getImm() == 0) {
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ } else if (oc == PPC::ORI) { // ori r1, r2, 0
+ assert(MI.getNumOperands() >= 3 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ MI.getOperand(2).isImm() &&
+ "invalid PPC ORI instruction!");
+ if (MI.getOperand(2).getImm() == 0) {
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ } else if (oc == PPC::FMRS || oc == PPC::FMRD ||
+ oc == PPC::FMRSD) { // fmr r1, r2
+ assert(MI.getNumOperands() >= 2 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ "invalid PPC FMR instruction");
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ } else if (oc == PPC::MCRF) { // mcrf cr1, cr2
+ assert(MI.getNumOperands() >= 2 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ "invalid PPC MCRF instruction");
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ return false;
+}
+
+unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case PPC::LD:
+ case PPC::LWZ:
+ case PPC::LFS:
+ case PPC::LFD:
+ if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
+ MI->getOperand(2).isFI()) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case PPC::STD:
+ case PPC::STW:
+ case PPC::STFS:
+ case PPC::STFD:
+ if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
+ MI->getOperand(2).isFI()) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+// commuteInstruction - We can commute rlwimi instructions, but only if the
+// rotate amt is zero. We also have to munge the immediates a bit.
+MachineInstr *
+PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+ MachineFunction &MF = *MI->getParent()->getParent();
+
+ // Normal instructions can be commuted the obvious way.
+ if (MI->getOpcode() != PPC::RLWIMI)
+ return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+
+ // Cannot commute if it has a non-zero rotate count.
+ if (MI->getOperand(3).getImm() != 0)
+ return 0;
+
+ // If we have a zero rotate count, we have:
+ // M = mask(MB,ME)
+ // Op0 = (Op1 & ~M) | (Op2 & M)
+ // Change this to:
+ // M = mask((ME+1)&31, (MB-1)&31)
+ // Op0 = (Op2 & ~M) | (Op1 & M)
+
+ // Swap op1/op2
+ unsigned Reg0 = MI->getOperand(0).getReg();
+ unsigned Reg1 = MI->getOperand(1).getReg();
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ bool Reg1IsKill = MI->getOperand(1).isKill();
+ bool Reg2IsKill = MI->getOperand(2).isKill();
+ bool ChangeReg0 = false;
+ // If machine instrs are no longer in two-address forms, update
+ // destination register as well.
+ if (Reg0 == Reg1) {
+ // Must be two address instruction!
+ assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) &&
+ "Expecting a two-address instruction!");
+ Reg2IsKill = false;
+ ChangeReg0 = true;
+ }
+
+ // Masks.
+ unsigned MB = MI->getOperand(4).getImm();
+ unsigned ME = MI->getOperand(5).getImm();
+
+ if (NewMI) {
+ // Create a new instruction.
+ unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg();
+ bool Reg0IsDead = MI->getOperand(0).isDead();
+ return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
+ .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
+ .addReg(Reg2, getKillRegState(Reg2IsKill))
+ .addReg(Reg1, getKillRegState(Reg1IsKill))
+ .addImm((ME+1) & 31)
+ .addImm((MB-1) & 31);
+ }
+
+ if (ChangeReg0)
+ MI->getOperand(0).setReg(Reg2);
+ MI->getOperand(2).setReg(Reg1);
+ MI->getOperand(1).setReg(Reg2);
+ MI->getOperand(2).setIsKill(Reg1IsKill);
+ MI->getOperand(1).setIsKill(Reg2IsKill);
+
+ // Swap the mask around.
+ MI->getOperand(4).setImm((ME+1) & 31);
+ MI->getOperand(5).setImm((MB-1) & 31);
+ return MI;
+}
+
+void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ BuildMI(MBB, MI, DL, get(PPC::NOP));
+}
+
+
+// Branch analysis.
+bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastInst->getOpcode() == PPC::B) {
+ if (!LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (LastInst->getOpcode() == PPC::BCC) {
+ if (!LastInst->getOperand(2).isMBB())
+ return true;
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(2).getMBB();
+ Cond.push_back(LastInst->getOperand(0));
+ Cond.push_back(LastInst->getOperand(1));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with PPC::B and PPC:BCC, handle it.
+ if (SecondLastInst->getOpcode() == PPC::BCC &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(2).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = SecondLastInst->getOperand(2).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(0));
+ Cond.push_back(SecondLastInst->getOperand(1));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two PPC:Bs, handle it. The second one is not
+ // executed, so remove it.
+ if (SecondLastInst->getOpcode() == PPC::B &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(0).isMBB())
+ return true;
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (I->getOpcode() != PPC::BCC)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+unsigned
+PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const {
+ // FIXME this should probably have a DebugLoc argument
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "PPC branch conditions have two components!");
+
+ // One-way branch.
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch
+ BuildMI(&MBB, dl, get(PPC::B)).addMBB(TBB);
+ else // Conditional branch
+ BuildMI(&MBB, dl, get(PPC::BCC))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ return 1;
+ }
+
+ // Two-way Conditional Branch.
+ BuildMI(&MBB, dl, get(PPC::BCC))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ BuildMI(&MBB, dl, get(PPC::B)).addMBB(FBB);
+ return 2;
+}
+
+bool PPCInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const {
+ if (DestRC != SrcRC) {
+ // Not yet supported!
+ return false;
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if (DestRC == PPC::GPRCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(PPC::OR), DestReg).addReg(SrcReg).addReg(SrcReg);
+ } else if (DestRC == PPC::G8RCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(PPC::OR8), DestReg).addReg(SrcReg).addReg(SrcReg);
+ } else if (DestRC == PPC::F4RCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(PPC::FMRS), DestReg).addReg(SrcReg);
+ } else if (DestRC == PPC::F8RCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(PPC::FMRD), DestReg).addReg(SrcReg);
+ } else if (DestRC == PPC::CRRCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(PPC::MCRF), DestReg).addReg(SrcReg);
+ } else if (DestRC == PPC::VRRCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(PPC::VOR), DestReg).addReg(SrcReg).addReg(SrcReg);
+ } else if (DestRC == PPC::CRBITRCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(PPC::CROR), DestReg).addReg(SrcReg).addReg(SrcReg);
+ } else {
+ // Attempt to copy register that is not GPR or FPR
+ return false;
+ }
+
+ return true;
+}
+
+bool
+PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
+ unsigned SrcReg, bool isKill,
+ int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const{
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (RC == PPC::GPRCRegisterClass) {
+ if (SrcReg != PPC::LR) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else {
+ // FIXME: this spills LR immediately to memory in one step. To do this,
+ // we use R11, which we know cannot be used in the prolog/epilog. This is
+ // a hack.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR), PPC::R11));
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+ .addReg(PPC::R11,
+ getKillRegState(isKill)),
+ FrameIdx));
+ }
+ } else if (RC == PPC::G8RCRegisterClass) {
+ if (SrcReg != PPC::LR8) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else {
+ // FIXME: this spills LR immediately to memory in one step. To do this,
+ // we use R11, which we know cannot be used in the prolog/epilog. This is
+ // a hack.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11));
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
+ .addReg(PPC::X11,
+ getKillRegState(isKill)),
+ FrameIdx));
+ }
+ } else if (RC == PPC::F8RCRegisterClass) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else if (RC == PPC::F4RCRegisterClass) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFS))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else if (RC == PPC::CRRCRegisterClass) {
+ if ((EnablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
+ (EnablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
+ // FIXME (64-bit): Enable
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ return true;
+ } else {
+ // FIXME: We use R0 here, because it isn't available for RA. We need to
+ // store the CR in the low 4-bits of the saved value. First, issue a MFCR
+ // to save all of the CRBits.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), PPC::R0));
+
+ // If the saved register wasn't CR0, shift the bits left so that they are
+ // in CR0's slot.
+ if (SrcReg != PPC::CR0) {
+ unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(SrcReg)*4;
+ // rlwinm r0, r0, ShiftBits, 0, 31.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0)
+ .addReg(PPC::R0).addImm(ShiftBits).addImm(0).addImm(31));
+ }
+
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+ .addReg(PPC::R0,
+ getKillRegState(isKill)),
+ FrameIdx));
+ }
+ } else if (RC == PPC::CRBITRCRegisterClass) {
+ // FIXME: We use CRi here because there is no mtcrf on a bit. Since the
+ // backend currently only uses CR1EQ as an individual bit, this should
+ // not cause any bug. If we need other uses of CR bits, the following
+ // code may be invalid.
+ unsigned Reg = 0;
+ if (SrcReg >= PPC::CR0LT || SrcReg <= PPC::CR0UN)
+ Reg = PPC::CR0;
+ else if (SrcReg >= PPC::CR1LT || SrcReg <= PPC::CR1UN)
+ Reg = PPC::CR1;
+ else if (SrcReg >= PPC::CR2LT || SrcReg <= PPC::CR2UN)
+ Reg = PPC::CR2;
+ else if (SrcReg >= PPC::CR3LT || SrcReg <= PPC::CR3UN)
+ Reg = PPC::CR3;
+ else if (SrcReg >= PPC::CR4LT || SrcReg <= PPC::CR4UN)
+ Reg = PPC::CR4;
+ else if (SrcReg >= PPC::CR5LT || SrcReg <= PPC::CR5UN)
+ Reg = PPC::CR5;
+ else if (SrcReg >= PPC::CR6LT || SrcReg <= PPC::CR6UN)
+ Reg = PPC::CR6;
+ else if (SrcReg >= PPC::CR7LT || SrcReg <= PPC::CR7UN)
+ Reg = PPC::CR7;
+
+ return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
+ PPC::CRRCRegisterClass, NewMIs);
+
+ } else if (RC == PPC::VRRCRegisterClass) {
+ // We don't have indexed addressing for vector loads. Emit:
+ // R0 = ADDI FI#
+ // STVX VAL, 0, R0
+ //
+ // FIXME: We use R0 here, because it isn't available for RA.
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
+ FrameIdx, 0, 0));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addReg(PPC::R0)
+ .addReg(PPC::R0));
+ } else {
+ assert(0 && "Unknown regclass!");
+ abort();
+ }
+
+ return false;
+}
+
+void
+PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC) const {
+ MachineFunction &MF = *MBB.getParent();
+ SmallVector<MachineInstr*, 4> NewMIs;
+
+ if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) {
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setSpillsCR();
+ }
+
+ for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
+ MBB.insert(MI, NewMIs[i]);
+}
+
+void PPCInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+ bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const{
+ if (Addr[0].isFI()) {
+ if (StoreRegToStackSlot(MF, SrcReg, isKill,
+ Addr[0].getIndex(), RC, NewMIs)) {
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setSpillsCR();
+ }
+
+ return;
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ unsigned Opc = 0;
+ if (RC == PPC::GPRCRegisterClass) {
+ Opc = PPC::STW;
+ } else if (RC == PPC::G8RCRegisterClass) {
+ Opc = PPC::STD;
+ } else if (RC == PPC::F8RCRegisterClass) {
+ Opc = PPC::STFD;
+ } else if (RC == PPC::F4RCRegisterClass) {
+ Opc = PPC::STFS;
+ } else if (RC == PPC::VRRCRegisterClass) {
+ Opc = PPC::STVX;
+ } else {
+ assert(0 && "Unknown regclass!");
+ abort();
+ }
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc))
+ .addReg(SrcReg, getKillRegState(isKill));
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+ return;
+}
+
+void
+PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs)const{
+ if (RC == PPC::GPRCRegisterClass) {
+ if (DestReg != PPC::LR) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+ DestReg), FrameIdx));
+ } else {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+ PPC::R11), FrameIdx));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11));
+ }
+ } else if (RC == PPC::G8RCRegisterClass) {
+ if (DestReg != PPC::LR8) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
+ FrameIdx));
+ } else {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD),
+ PPC::R11), FrameIdx));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::R11));
+ }
+ } else if (RC == PPC::F8RCRegisterClass) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
+ FrameIdx));
+ } else if (RC == PPC::F4RCRegisterClass) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
+ FrameIdx));
+ } else if (RC == PPC::CRRCRegisterClass) {
+ // FIXME: We use R0 here, because it isn't available for RA.
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), PPC::R0),
+ FrameIdx));
+
+ // If the reloaded register isn't CR0, shift the bits right so that they are
+ // in the right CR's slot.
+ if (DestReg != PPC::CR0) {
+ unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(DestReg)*4;
+ // rlwinm r11, r11, 32-ShiftBits, 0, 31.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0)
+ .addReg(PPC::R0).addImm(32-ShiftBits).addImm(0).addImm(31));
+ }
+
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg).addReg(PPC::R0));
+ } else if (RC == PPC::CRBITRCRegisterClass) {
+
+ unsigned Reg = 0;
+ if (DestReg >= PPC::CR0LT || DestReg <= PPC::CR0UN)
+ Reg = PPC::CR0;
+ else if (DestReg >= PPC::CR1LT || DestReg <= PPC::CR1UN)
+ Reg = PPC::CR1;
+ else if (DestReg >= PPC::CR2LT || DestReg <= PPC::CR2UN)
+ Reg = PPC::CR2;
+ else if (DestReg >= PPC::CR3LT || DestReg <= PPC::CR3UN)
+ Reg = PPC::CR3;
+ else if (DestReg >= PPC::CR4LT || DestReg <= PPC::CR4UN)
+ Reg = PPC::CR4;
+ else if (DestReg >= PPC::CR5LT || DestReg <= PPC::CR5UN)
+ Reg = PPC::CR5;
+ else if (DestReg >= PPC::CR6LT || DestReg <= PPC::CR6UN)
+ Reg = PPC::CR6;
+ else if (DestReg >= PPC::CR7LT || DestReg <= PPC::CR7UN)
+ Reg = PPC::CR7;
+
+ return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
+ PPC::CRRCRegisterClass, NewMIs);
+
+ } else if (RC == PPC::VRRCRegisterClass) {
+ // We don't have indexed addressing for vector loads. Emit:
+ // R0 = ADDI FI#
+ // Dest = LVX 0, R0
+ //
+ // FIXME: We use R0 here, because it isn't available for RA.
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
+ FrameIdx, 0, 0));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(PPC::R0)
+ .addReg(PPC::R0));
+ } else {
+ assert(0 && "Unknown regclass!");
+ abort();
+ }
+}
+
+void
+PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC) const {
+ MachineFunction &MF = *MBB.getParent();
+ SmallVector<MachineInstr*, 4> NewMIs;
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
+ for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
+ MBB.insert(MI, NewMIs[i]);
+}
+
+void PPCInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs)const{
+ if (Addr[0].isFI()) {
+ LoadRegFromStackSlot(MF, DebugLoc::getUnknownLoc(),
+ DestReg, Addr[0].getIndex(), RC, NewMIs);
+ return;
+ }
+
+ unsigned Opc = 0;
+ if (RC == PPC::GPRCRegisterClass) {
+ assert(DestReg != PPC::LR && "Can't handle this yet!");
+ Opc = PPC::LWZ;
+ } else if (RC == PPC::G8RCRegisterClass) {
+ assert(DestReg != PPC::LR8 && "Can't handle this yet!");
+ Opc = PPC::LD;
+ } else if (RC == PPC::F8RCRegisterClass) {
+ Opc = PPC::LFD;
+ } else if (RC == PPC::F4RCRegisterClass) {
+ Opc = PPC::LFS;
+ } else if (RC == PPC::VRRCRegisterClass) {
+ Opc = PPC::LVX;
+ } else {
+ assert(0 && "Unknown regclass!");
+ abort();
+ }
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+ return;
+}
+
+/// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into
+/// copy instructions, turning them into load/store instructions.
+MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const {
+ if (Ops.size() != 1) return NULL;
+
+ // Make sure this is a reg-reg copy. Note that we can't handle MCRF, because
+ // it takes more than one instruction to store it.
+ unsigned Opc = MI->getOpcode();
+ unsigned OpNum = Ops[0];
+
+ MachineInstr *NewMI = NULL;
+ if ((Opc == PPC::OR &&
+ MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) {
+ if (OpNum == 0) { // move -> store
+ unsigned InReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STW))
+ .addReg(InReg, getKillRegState(isKill)),
+ FrameIndex);
+ } else { // move -> load
+ unsigned OutReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LWZ))
+ .addReg(OutReg,
+ RegState::Define |
+ getDeadRegState(isDead)),
+ FrameIndex);
+ }
+ } else if ((Opc == PPC::OR8 &&
+ MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) {
+ if (OpNum == 0) { // move -> store
+ unsigned InReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STD))
+ .addReg(InReg, getKillRegState(isKill)),
+ FrameIndex);
+ } else { // move -> load
+ unsigned OutReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LD))
+ .addReg(OutReg,
+ RegState::Define |
+ getDeadRegState(isDead)),
+ FrameIndex);
+ }
+ } else if (Opc == PPC::FMRD) {
+ if (OpNum == 0) { // move -> store
+ unsigned InReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFD))
+ .addReg(InReg, getKillRegState(isKill)),
+ FrameIndex);
+ } else { // move -> load
+ unsigned OutReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFD))
+ .addReg(OutReg,
+ RegState::Define |
+ getDeadRegState(isDead)),
+ FrameIndex);
+ }
+ } else if (Opc == PPC::FMRS) {
+ if (OpNum == 0) { // move -> store
+ unsigned InReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFS))
+ .addReg(InReg, getKillRegState(isKill)),
+ FrameIndex);
+ } else { // move -> load
+ unsigned OutReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFS))
+ .addReg(OutReg,
+ RegState::Define |
+ getDeadRegState(isDead)),
+ FrameIndex);
+ }
+ }
+
+ return NewMI;
+}
+
+bool PPCInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const {
+ if (Ops.size() != 1) return false;
+
+ // Make sure this is a reg-reg copy. Note that we can't handle MCRF, because
+ // it takes more than one instruction to store it.
+ unsigned Opc = MI->getOpcode();
+
+ if ((Opc == PPC::OR &&
+ MI->getOperand(1).getReg() == MI->getOperand(2).getReg()))
+ return true;
+ else if ((Opc == PPC::OR8 &&
+ MI->getOperand(1).getReg() == MI->getOperand(2).getReg()))
+ return true;
+ else if (Opc == PPC::FMRD || Opc == PPC::FMRS)
+ return true;
+
+ return false;
+}
+
+
+bool PPCInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
+ if (MBB.empty()) return false;
+
+ switch (MBB.back().getOpcode()) {
+ case PPC::BLR: // Return.
+ case PPC::B: // Uncond branch.
+ case PPC::BCTR: // Indirect branch.
+ return true;
+ default: return false;
+ }
+}
+
+bool PPCInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
+ // Leave the CR# the same, but invert the condition.
+ Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
+ return false;
+}
+
+/// GetInstSize - Return the number of bytes of code the specified
+/// instruction may be. This returns the maximum number of bytes.
+///
+unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case PPC::INLINEASM: { // Inline Asm: Variable size.
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const char *AsmStr = MI->getOperand(0).getSymbolName();
+ return MF->getTarget().getTargetAsmInfo()->getInlineAsmLength(AsmStr);
+ }
+ case PPC::DBG_LABEL:
+ case PPC::EH_LABEL:
+ case PPC::GC_LABEL:
+ return 0;
+ default:
+ return 4; // PowerPC instructions are all 4 bytes
+ }
+}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
new file mode 100644
index 0000000..492634c
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -0,0 +1,168 @@
+//===- PPCInstrInfo.h - PowerPC Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC32_INSTRUCTIONINFO_H
+#define POWERPC32_INSTRUCTIONINFO_H
+
+#include "PPC.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "PPCRegisterInfo.h"
+
+namespace llvm {
+
+/// PPCII - This namespace holds all of the PowerPC target-specific
+/// per-instruction flags. These must match the corresponding definitions in
+/// PPC.td and PPCInstrFormats.td.
+namespace PPCII {
+enum {
+ // PPC970 Instruction Flags. These flags describe the characteristics of the
+ // PowerPC 970 (aka G5) dispatch groups and how they are formed out of
+ // raw machine instructions.
+
+ /// PPC970_First - This instruction starts a new dispatch group, so it will
+ /// always be the first one in the group.
+ PPC970_First = 0x1,
+
+ /// PPC970_Single - This instruction starts a new dispatch group and
+ /// terminates it, so it will be the sole instruction in the group.
+ PPC970_Single = 0x2,
+
+ /// PPC970_Cracked - This instruction is cracked into two pieces, requiring
+ /// two dispatch pipes to be available to issue.
+ PPC970_Cracked = 0x4,
+
+ /// PPC970_Mask/Shift - This is a bitmask that selects the pipeline type that
+ /// an instruction is issued to.
+ PPC970_Shift = 3,
+ PPC970_Mask = 0x07 << PPC970_Shift
+};
+enum PPC970_Unit {
+ /// These are the various PPC970 execution unit pipelines. Each instruction
+ /// is one of these.
+ PPC970_Pseudo = 0 << PPC970_Shift, // Pseudo instruction
+ PPC970_FXU = 1 << PPC970_Shift, // Fixed Point (aka Integer/ALU) Unit
+ PPC970_LSU = 2 << PPC970_Shift, // Load Store Unit
+ PPC970_FPU = 3 << PPC970_Shift, // Floating Point Unit
+ PPC970_CRU = 4 << PPC970_Shift, // Control Register Unit
+ PPC970_VALU = 5 << PPC970_Shift, // Vector ALU
+ PPC970_VPERM = 6 << PPC970_Shift, // Vector Permute Unit
+ PPC970_BRU = 7 << PPC970_Shift // Branch Unit
+};
+}
+
+
+class PPCInstrInfo : public TargetInstrInfoImpl {
+ PPCTargetMachine &TM;
+ const PPCRegisterInfo RI;
+
+ bool StoreRegToStackSlot(MachineFunction &MF,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+ void LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+public:
+ explicit PPCInstrInfo(PPCTargetMachine &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const PPCRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// Return true if the instruction is a register to register move and return
+ /// the source and dest operands and their sub-register indices by reference.
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+ unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ // commuteInstruction - We can commute rlwimi instructions, but only if the
+ // rotate amt is zero. We also have to munge the immediates a bit.
+ virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const;
+
+ virtual void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+
+ // Branch analysis.
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ /// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into
+ /// copy instructions, turning them into load/store instructions.
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+
+ virtual bool canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const;
+
+ virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ /// GetInstSize - Return the number of bytes of code the specified
+ /// instruction may be. This returns the maximum number of bytes.
+ ///
+ virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
new file mode 100644
index 0000000..772e25a
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -0,0 +1,1475 @@
+//===- PPCInstrInfo.td - The PowerPC Instruction Set -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the subset of the 32-bit PowerPC instruction set, as used
+// by the PowerPC instruction selector.
+//
+//===----------------------------------------------------------------------===//
+
+include "PPCInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific type constraints.
+//
+def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx
+ SDTCisVT<0, f64>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+def SDT_PPCvperm : SDTypeProfile<1, 3, [
+ SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>
+]>;
+
+def SDT_PPCvcmp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>
+]>;
+
+def SDT_PPCcondbr : SDTypeProfile<0, 3, [
+ SDTCisVT<0, i32>, SDTCisVT<2, OtherVT>
+]>;
+
+def SDT_PPClbrx : SDTypeProfile<1, 3, [
+ SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT>
+]>;
+def SDT_PPCstbrx : SDTypeProfile<0, 4, [
+ SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT>
+]>;
+
+def SDT_PPClarx : SDTypeProfile<1, 1, [
+ SDTCisInt<0>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCstcx : SDTypeProfile<0, 2, [
+ SDTCisInt<0>, SDTCisPtrTy<1>
+]>;
+
+def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
+ SDTCisPtrTy<0>, SDTCisVT<1, i32>
+]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific DAG Nodes.
+//
+
+def PPCfcfid : SDNode<"PPCISD::FCFID" , SDTFPUnaryOp, []>;
+def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;
+def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
+def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
+ [SDNPHasChain, SDNPMayStore]>;
+
+// This sequence is used for long double->int conversions. It changes the
+// bits in the FPSCR which is not modelled.
+def PPCmffs : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>,
+ [SDNPOutFlag]>;
+def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+ [SDNPInFlag, SDNPOutFlag]>;
+def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+ [SDNPInFlag, SDNPOutFlag]>;
+def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp,
+ [SDNPInFlag, SDNPOutFlag]>;
+def PPCmtfsf : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3,
+ [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>,
+ SDTCisVT<3, f64>]>,
+ [SDNPInFlag]>;
+
+def PPCfsel : SDNode<"PPCISD::FSEL",
+ // Type constraint for fsel.
+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
+ SDTCisFP<0>, SDTCisVT<1, f64>]>, []>;
+
+def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
+def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
+def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
+def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
+
+def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
+
+// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
+// amounts. These nodes are generated by the multi-precision shift code.
+def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>;
+def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>;
+def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>;
+
+def PPCextsw_32 : SDNode<"PPCISD::EXTSW_32" , SDTIntUnaryOp>;
+def PPCstd_32 : SDNode<"PPCISD::STD_32" , SDTStore,
+ [SDNPHasChain, SDNPMayStore]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart,
+ [SDNPHasChain, SDNPOutFlag]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
+def PPCcall_Macho : SDNode<"PPCISD::CALL_Macho", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def PPCcall_ELF : SDNode<"PPCISD::CALL_ELF", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def PPCbctrl_Macho : SDNode<"PPCISD::BCTRL_Macho", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def PPCbctrl_ELF : SDNode<"PPCISD::BCTRL_ELF", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+def PPCtailcall : SDNode<"PPCISD::TAILCALL", SDT_PPCCall,
+ [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
+
+def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
+def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutFlag]>;
+
+def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
+ [SDNPHasChain, SDNPMayLoad]>;
+def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
+ [SDNPHasChain, SDNPMayStore]>;
+
+// Instructions to support atomic operations
+def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
+ [SDNPHasChain, SDNPMayLoad]>;
+def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx,
+ [SDNPHasChain, SDNPMayStore]>;
+
+// Instructions to support dynamic alloca.
+def SDTDynOp : SDTypeProfile<1, 2, []>;
+def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific transformation functions and pattern fragments.
+//
+
+def SHL32 : SDNodeXForm<imm, [{
+ // Transformation function: 31 - imm
+ return getI32Imm(31 - N->getZExtValue());
+}]>;
+
+def SRL32 : SDNodeXForm<imm, [{
+ // Transformation function: 32 - imm
+ return N->getZExtValue() ? getI32Imm(32 - N->getZExtValue()) : getI32Imm(0);
+}]>;
+
+def LO16 : SDNodeXForm<imm, [{
+ // Transformation function: get the low 16 bits.
+ return getI32Imm((unsigned short)N->getZExtValue());
+}]>;
+
+def HI16 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+def HA16 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ signed int Val = N->getZExtValue();
+ return getI32Imm((Val - (signed short)Val) >> 16);
+}]>;
+def MB : SDNodeXForm<imm, [{
+ // Transformation function: get the start bit of a mask
+ unsigned mb = 0, me;
+ (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+ return getI32Imm(mb);
+}]>;
+
+def ME : SDNodeXForm<imm, [{
+ // Transformation function: get the end bit of a mask
+ unsigned mb, me = 0;
+ (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+ return getI32Imm(me);
+}]>;
+def maskimm32 : PatLeaf<(imm), [{
+ // maskImm predicate - True if immediate is a run of ones.
+ unsigned mb, me;
+ if (N->getValueType(0) == MVT::i32)
+ return isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+ else
+ return false;
+}]>;
+
+def immSExt16 : PatLeaf<(imm), [{
+ // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field. Used by instructions like 'addi'.
+ if (N->getValueType(0) == MVT::i32)
+ return (int32_t)N->getZExtValue() == (short)N->getZExtValue();
+ else
+ return (int64_t)N->getZExtValue() == (short)N->getZExtValue();
+}]>;
+def immZExt16 : PatLeaf<(imm), [{
+ // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
+ // field. Used by instructions like 'ori'.
+ return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+}], LO16>;
+
+// imm16Shifted* - These match immediates where the low 16-bits are zero. There
+// are two forms: imm16ShiftedSExt and imm16ShiftedZExt. These two forms are
+// identical in 32-bit mode, but in 64-bit mode, they return true if the
+// immediate fits into a sign/zero extended 32-bit immediate (with the low bits
+// clear).
+def imm16ShiftedZExt : PatLeaf<(imm), [{
+ // imm16ShiftedZExt predicate - True if only bits in the top 16-bits of the
+ // immediate are set. Used by instructions like 'xoris'.
+ return (N->getZExtValue() & ~uint64_t(0xFFFF0000)) == 0;
+}], HI16>;
+
+def imm16ShiftedSExt : PatLeaf<(imm), [{
+ // imm16ShiftedSExt predicate - True if only bits in the top 16-bits of the
+ // immediate are set. Used by instructions like 'addis'. Identical to
+ // imm16ShiftedZExt in 32-bit mode.
+ if (N->getZExtValue() & 0xFFFF) return false;
+ if (N->getValueType(0) == MVT::i32)
+ return true;
+ // For 64-bit, make sure it is sext right.
+ return N->getZExtValue() == (uint64_t)(int)N->getZExtValue();
+}], HI16>;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Flag Definitions.
+
+class isPPC64 { bit PPC64 = 1; }
+class isDOT {
+ list<Register> Defs = [CR0];
+ bit RC = 1;
+}
+
+class RegConstraint<string C> {
+ string Constraints = C;
+}
+class NoEncode<string E> {
+ string DisableEncoding = E;
+}
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Operand Definitions.
+
+def s5imm : Operand<i32> {
+ let PrintMethod = "printS5ImmOperand";
+}
+def u5imm : Operand<i32> {
+ let PrintMethod = "printU5ImmOperand";
+}
+def u6imm : Operand<i32> {
+ let PrintMethod = "printU6ImmOperand";
+}
+def s16imm : Operand<i32> {
+ let PrintMethod = "printS16ImmOperand";
+}
+def u16imm : Operand<i32> {
+ let PrintMethod = "printU16ImmOperand";
+}
+def s16immX4 : Operand<i32> { // Multiply imm by 4 before printing.
+ let PrintMethod = "printS16X4ImmOperand";
+}
+def target : Operand<OtherVT> {
+ let PrintMethod = "printBranchOperand";
+}
+def calltarget : Operand<iPTR> {
+ let PrintMethod = "printCallOperand";
+}
+def aaddr : Operand<iPTR> {
+ let PrintMethod = "printAbsAddrOperand";
+}
+def piclabel: Operand<iPTR> {
+ let PrintMethod = "printPICLabel";
+}
+def symbolHi: Operand<i32> {
+ let PrintMethod = "printSymbolHi";
+}
+def symbolLo: Operand<i32> {
+ let PrintMethod = "printSymbolLo";
+}
+def crbitm: Operand<i8> {
+ let PrintMethod = "printcrbitm";
+}
+// Address operands
+def memri : Operand<iPTR> {
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+}
+def memrr : Operand<iPTR> {
+ let PrintMethod = "printMemRegReg";
+ let MIOperandInfo = (ops ptr_rc, ptr_rc);
+}
+def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits.
+ let PrintMethod = "printMemRegImmShifted";
+ let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+}
+
+// PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg
+// that doesn't matter.
+def pred : PredicateOperand<OtherVT, (ops imm, CRRC),
+ (ops (i32 20), (i32 zero_reg))> {
+ let PrintMethod = "printPredicateOperand";
+}
+
+// Define PowerPC specific addressing mode.
+def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>;
+def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>;
+def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
+def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std"
+
+/// This is just the offset part of iaddr, used for preinc.
+def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Predicate Definitions.
+def FPContractions : Predicate<"!NoExcessFPPrecision">;
+def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">;
+def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Definitions.
+
+// Pseudo-instructions:
+
+let hasCtrlDep = 1 in {
+let Defs = [R1], Uses = [R1] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt),
+ "${:comment} ADJCALLSTACKDOWN",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
+ "${:comment} ADJCALLSTACKUP",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+def UPDATE_VRSAVE : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS),
+ "UPDATE_VRSAVE $rD, $rS", []>;
+}
+
+let Defs = [R1], Uses = [R1] in
+def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi),
+ "${:comment} DYNALLOC $result, $negsize, $fpsi",
+ [(set GPRC:$result,
+ (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>;
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded by the
+// scheduler into a branch sequence.
+let usesCustomDAGSchedInserter = 1, // Expanded by the scheduler.
+ PPC970_Single = 1 in {
+ def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F,
+ i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+ []>;
+ def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, G8RC:$T, G8RC:$F,
+ i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+ []>;
+ def SELECT_CC_F4 : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F,
+ i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+ []>;
+ def SELECT_CC_F8 : Pseudo<(outs F8RC:$dst), (ins CRRC:$cond, F8RC:$T, F8RC:$F,
+ i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+ []>;
+ def SELECT_CC_VRRC: Pseudo<(outs VRRC:$dst), (ins CRRC:$cond, VRRC:$T, VRRC:$F,
+ i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+ []>;
+}
+
+// SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
+// scavenge a register for it.
+def SPILL_CR : Pseudo<(outs), (ins GPRC:$cond, memri:$F),
+ "${:comment} SPILL_CR $cond $F", []>;
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
+ let isReturn = 1, Uses = [LR, RM] in
+ def BLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$p),
+ "b${p:cc}lr ${p:reg}", BrB,
+ [(retflag)]>;
+ let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in
+ def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>;
+}
+
+let Defs = [LR] in
+ def MovePCtoLR : Pseudo<(outs), (ins piclabel:$label), "bl $label", []>,
+ PPC970_Unit_BRU;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+ let isBarrier = 1 in {
+ def B : IForm<18, 0, 0, (outs), (ins target:$dst),
+ "b $dst", BrB,
+ [(br bb:$dst)]>;
+ }
+
+ // BCC represents an arbitrary conditional branch on a predicate.
+ // FIXME: should be able to write a pattern for PPCcondbranch, but can't use
+ // a two-value operand where a dag node expects two operands. :(
+ def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, target:$dst),
+ "b${cond:cc} ${cond:reg}, $dst"
+ /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
+}
+
+// Macho ABI Calls.
+let isCall = 1, PPC970_Unit = 7,
+ // All calls clobber the non-callee saved registers...
+ Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
+ F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+ V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+ LR,CTR,
+ CR0,CR1,CR5,CR6,CR7,
+ CR0LT,CR0GT,CR0EQ,CR0UN,CR1LT,CR1GT,CR1EQ,CR1UN,CR5LT,CR5GT,CR5EQ,
+ CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN] in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL_Macho : IForm<18, 0, 1,
+ (outs), (ins calltarget:$func, variable_ops),
+ "bl $func", BrB, []>; // See Pat patterns below.
+ def BLA_Macho : IForm<18, 1, 1,
+ (outs), (ins aaddr:$func, variable_ops),
+ "bla $func", BrB, [(PPCcall_Macho (i32 imm:$func))]>;
+ }
+ let Uses = [CTR, RM] in {
+ def BCTRL_Macho : XLForm_2_ext<19, 528, 20, 0, 1,
+ (outs), (ins variable_ops),
+ "bctrl", BrB,
+ [(PPCbctrl_Macho)]>, Requires<[In32BitMode]>;
+ }
+}
+
+// ELF ABI Calls.
+let isCall = 1, PPC970_Unit = 7,
+ // All calls clobber the non-callee saved registers...
+ Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
+ F0,F1,F2,F3,F4,F5,F6,F7,F8,
+ V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+ LR,CTR,
+ CR0,CR1,CR5,CR6,CR7,
+ CR0LT,CR0GT,CR0EQ,CR0UN,CR1LT,CR1GT,CR1EQ,CR1UN,CR5LT,CR5GT,CR5EQ,
+ CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN] in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL_ELF : IForm<18, 0, 1,
+ (outs), (ins calltarget:$func, variable_ops),
+ "bl $func", BrB, []>; // See Pat patterns below.
+ def BLA_ELF : IForm<18, 1, 1,
+ (outs), (ins aaddr:$func, variable_ops),
+ "bla $func", BrB,
+ [(PPCcall_ELF (i32 imm:$func))]>;
+ }
+ let Uses = [CTR, RM] in {
+ def BCTRL_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
+ (outs), (ins variable_ops),
+ "bctrl", BrB,
+ [(PPCbctrl_ELF)]>, Requires<[In32BitMode]>;
+ }
+}
+
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNdi :Pseudo< (outs),
+ (ins calltarget:$dst, i32imm:$offset, variable_ops),
+ "#TC_RETURNd $dst $offset",
+ []>;
+
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNai :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops),
+ "#TC_RETURNa $func $offset",
+ [(PPCtc_return (i32 imm:$func), imm:$offset)]>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset, variable_ops),
+ "#TC_RETURNr $dst $offset",
+ []>;
+
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
+ isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in
+def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+ Requires<[In32BitMode]>;
+
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
+ "b $dst", BrB,
+ []>;
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
+ "ba $dst", BrB,
+ []>;
+
+
+// DCB* instructions.
+def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst),
+ "dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBF : DCB_Form<86, 0, (outs), (ins memrr:$dst),
+ "dcbf $dst", LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst),
+ "dcbi $dst", LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst),
+ "dcbst $dst", LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBT : DCB_Form<278, 0, (outs), (ins memrr:$dst),
+ "dcbt $dst", LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBTST : DCB_Form<246, 0, (outs), (ins memrr:$dst),
+ "dcbtst $dst", LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst),
+ "dcbz $dst", LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst),
+ "dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+
+// Atomic operations
+let usesCustomDAGSchedInserter = 1 in {
+ let Uses = [CR0] in {
+ def ATOMIC_LOAD_ADD_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_ADD_I8 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_SUB_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_SUB_I8 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_sub_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_AND_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_AND_I8 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_and_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_OR_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_OR_I8 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_or_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_XOR_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_XOR_I8 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_xor_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_NAND_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_NAND_I8 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_nand_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_ADD_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_ADD_I16 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_add_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_SUB_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_SUB_I16 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_sub_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_AND_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_AND_I16 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_and_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_OR_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_OR_I16 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_or_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_XOR_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_XOR_I16 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_xor_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_NAND_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_NAND_I16 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_nand_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_ADD_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_ADD_I32 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_add_32 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_SUB_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_SUB_I32 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_sub_32 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_AND_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_AND_I32 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_and_32 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_OR_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_OR_I32 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_or_32 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_XOR_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_XOR_I32 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_xor_32 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_NAND_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_NAND_I32 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_nand_32 xoaddr:$ptr, GPRC:$incr))]>;
+
+ def ATOMIC_CMP_SWAP_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new),
+ "${:comment} ATOMIC_CMP_SWAP_I8 PSEUDO!",
+ [(set GPRC:$dst,
+ (atomic_cmp_swap_8 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+ def ATOMIC_CMP_SWAP_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new),
+ "${:comment} ATOMIC_CMP_SWAP_I16 PSEUDO!",
+ [(set GPRC:$dst,
+ (atomic_cmp_swap_16 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+ def ATOMIC_CMP_SWAP_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new),
+ "${:comment} ATOMIC_CMP_SWAP_I32 PSEUDO!",
+ [(set GPRC:$dst,
+ (atomic_cmp_swap_32 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+
+ def ATOMIC_SWAP_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new),
+ "${:comment} ATOMIC_SWAP_I8 PSEUDO!",
+ [(set GPRC:$dst, (atomic_swap_8 xoaddr:$ptr, GPRC:$new))]>;
+ def ATOMIC_SWAP_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new),
+ "${:comment} ATOMIC_SWAP_I16 PSEUDO!",
+ [(set GPRC:$dst, (atomic_swap_16 xoaddr:$ptr, GPRC:$new))]>;
+ def ATOMIC_SWAP_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new),
+ "${:comment} ATOMIC_SWAP_I32 PSEUDO!",
+ [(set GPRC:$dst, (atomic_swap_32 xoaddr:$ptr, GPRC:$new))]>;
+ }
+}
+
+// Instructions to support atomic operations
+def LWARX : XForm_1<31, 20, (outs GPRC:$rD), (ins memrr:$src),
+ "lwarx $rD, $src", LdStLWARX,
+ [(set GPRC:$rD, (PPClarx xoaddr:$src))]>;
+
+let Defs = [CR0] in
+def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst),
+ "stwcx. $rS, $dst", LdStSTWCX,
+ [(PPCstcx GPRC:$rS, xoaddr:$dst)]>,
+ isDOT;
+
+let isBarrier = 1, hasCtrlDep = 1 in
+def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>;
+
+//===----------------------------------------------------------------------===//
+// PPC32 Load Instructions.
+//
+
+// Unindexed (r+i) Loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src),
+ "lbz $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (zextloadi8 iaddr:$src))]>;
+def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src),
+ "lha $rD, $src", LdStLHA,
+ [(set GPRC:$rD, (sextloadi16 iaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src),
+ "lhz $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (zextloadi16 iaddr:$src))]>;
+def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src),
+ "lwz $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (load iaddr:$src))]>;
+
+def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src),
+ "lfs $rD, $src", LdStLFDU,
+ [(set F4RC:$rD, (load iaddr:$src))]>;
+def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
+ "lfd $rD, $src", LdStLFD,
+ [(set F8RC:$rD, (load iaddr:$src))]>;
+
+
+// Unindexed (r+i) Loads with Update (preinc).
+let mayLoad = 1 in {
+def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lbzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lhau $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lhzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lwzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lfs $rD, $addr", LdStLFDU,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lfd $rD, $addr", LdStLFD,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+}
+}
+
+// Indexed (r+r) Loads.
+//
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZX : XForm_1<31, 87, (outs GPRC:$rD), (ins memrr:$src),
+ "lbzx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (zextloadi8 xaddr:$src))]>;
+def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src),
+ "lhax $rD, $src", LdStLHA,
+ [(set GPRC:$rD, (sextloadi16 xaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src),
+ "lhzx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (zextloadi16 xaddr:$src))]>;
+def LWZX : XForm_1<31, 23, (outs GPRC:$rD), (ins memrr:$src),
+ "lwzx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (load xaddr:$src))]>;
+
+
+def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src),
+ "lhbrx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (PPClbrx xoaddr:$src, srcvalue:$sv, i16))]>;
+def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src),
+ "lwbrx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (PPClbrx xoaddr:$src, srcvalue:$sv, i32))]>;
+
+def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
+ "lfsx $frD, $src", LdStLFDU,
+ [(set F4RC:$frD, (load xaddr:$src))]>;
+def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
+ "lfdx $frD, $src", LdStLFDU,
+ [(set F8RC:$frD, (load xaddr:$src))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// PPC32 Store Instructions.
+//
+
+// Unindexed (r+i) Stores.
+let PPC970_Unit = 2 in {
+def STB : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src),
+ "stb $rS, $src", LdStGeneral,
+ [(truncstorei8 GPRC:$rS, iaddr:$src)]>;
+def STH : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src),
+ "sth $rS, $src", LdStGeneral,
+ [(truncstorei16 GPRC:$rS, iaddr:$src)]>;
+def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src),
+ "stw $rS, $src", LdStGeneral,
+ [(store GPRC:$rS, iaddr:$src)]>;
+def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst),
+ "stfs $rS, $dst", LdStUX,
+ [(store F4RC:$rS, iaddr:$dst)]>;
+def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
+ "stfd $rS, $dst", LdStUX,
+ [(store F8RC:$rS, iaddr:$dst)]>;
+}
+
+// Unindexed (r+i) Stores with Update (preinc).
+let PPC970_Unit = 2 in {
+def STBU : DForm_1<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stbu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STHU : DForm_1<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "sthu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STWU : DForm_1<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stwu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STFSU : DForm_1<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stfsu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STFDU : DForm_1<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stfdu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+}
+
+
+// Indexed (r+r) Stores.
+//
+let PPC970_Unit = 2 in {
+def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst),
+ "stbx $rS, $dst", LdStGeneral,
+ [(truncstorei8 GPRC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STHX : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst),
+ "sthx $rS, $dst", LdStGeneral,
+ [(truncstorei16 GPRC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
+ "stwx $rS, $dst", LdStGeneral,
+ [(store GPRC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+
+let mayStore = 1 in {
+def STWUX : XForm_8<31, 183, (outs), (ins GPRC:$rS, GPRC:$rA, GPRC:$rB),
+ "stwux $rS, $rA, $rB", LdStGeneral,
+ []>;
+}
+def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst),
+ "sthbrx $rS, $dst", LdStGeneral,
+ [(PPCstbrx GPRC:$rS, xoaddr:$dst, srcvalue:$dummy, i16)]>,
+ PPC970_DGroup_Cracked;
+def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst),
+ "stwbrx $rS, $dst", LdStGeneral,
+ [(PPCstbrx GPRC:$rS, xoaddr:$dst, srcvalue:$dummy, i32)]>,
+ PPC970_DGroup_Cracked;
+
+def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst),
+ "stfiwx $frS, $dst", LdStUX,
+ [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>;
+
+def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst),
+ "stfsx $frS, $dst", LdStUX,
+ [(store F4RC:$frS, xaddr:$dst)]>;
+def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst),
+ "stfdx $frS, $dst", LdStUX,
+ [(store F8RC:$frS, xaddr:$dst)]>;
+}
+
+let isBarrier = 1 in
+def SYNC : XForm_24_sync<31, 598, (outs), (ins),
+ "sync", LdStSync,
+ [(int_ppc_sync)]>;
+
+//===----------------------------------------------------------------------===//
+// PPC32 Arithmetic Instructions.
+//
+
+let PPC970_Unit = 1 in { // FXU Operations.
+def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "addi $rD, $rA, $imm", IntGeneral,
+ [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
+def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "addic $rD, $rA, $imm", IntGeneral,
+ [(set GPRC:$rD, (addc GPRC:$rA, immSExt16:$imm))]>,
+ PPC970_DGroup_Cracked;
+def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "addic. $rD, $rA, $imm", IntGeneral,
+ []>;
+def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm),
+ "addis $rD, $rA, $imm", IntGeneral,
+ [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>;
+def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym),
+ "la $rD, $sym($rA)", IntGeneral,
+ [(set GPRC:$rD, (add GPRC:$rA,
+ (PPClo tglobaladdr:$sym, 0)))]>;
+def MULLI : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "mulli $rD, $rA, $imm", IntMulLI,
+ [(set GPRC:$rD, (mul GPRC:$rA, immSExt16:$imm))]>;
+def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "subfic $rD, $rA, $imm", IntGeneral,
+ [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>;
+
+let isReMaterializable = 1 in {
+ def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm),
+ "li $rD, $imm", IntGeneral,
+ [(set GPRC:$rD, immSExt16:$imm)]>;
+ def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm),
+ "lis $rD, $imm", IntGeneral,
+ [(set GPRC:$rD, imm16ShiftedSExt:$imm)]>;
+}
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+def ANDIo : DForm_4<28, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "andi. $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (and GPRC:$src1, immZExt16:$src2))]>,
+ isDOT;
+def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "andis. $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (and GPRC:$src1,imm16ShiftedZExt:$src2))]>,
+ isDOT;
+def ORI : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "ori $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (or GPRC:$src1, immZExt16:$src2))]>;
+def ORIS : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "oris $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (or GPRC:$src1, imm16ShiftedZExt:$src2))]>;
+def XORI : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "xori $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (xor GPRC:$src1, immZExt16:$src2))]>;
+def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "xoris $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (xor GPRC:$src1,imm16ShiftedZExt:$src2))]>;
+def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntGeneral,
+ []>;
+def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm),
+ "cmpwi $crD, $rA, $imm", IntCompare>;
+def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "cmplwi $dst, $src1, $src2", IntCompare>;
+}
+
+
+let PPC970_Unit = 1 in { // FXU Operations.
+def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "nand $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (not (and GPRC:$rS, GPRC:$rB)))]>;
+def AND : XForm_6<31, 28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "and $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (and GPRC:$rS, GPRC:$rB))]>;
+def ANDC : XForm_6<31, 60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "andc $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (and GPRC:$rS, (not GPRC:$rB)))]>;
+def OR : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "or $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (or GPRC:$rS, GPRC:$rB))]>;
+def NOR : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "nor $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (not (or GPRC:$rS, GPRC:$rB)))]>;
+def ORC : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "orc $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (or GPRC:$rS, (not GPRC:$rB)))]>;
+def EQV : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "eqv $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (not (xor GPRC:$rS, GPRC:$rB)))]>;
+def XOR : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "xor $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (xor GPRC:$rS, GPRC:$rB))]>;
+def SLW : XForm_6<31, 24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "slw $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (PPCshl GPRC:$rS, GPRC:$rB))]>;
+def SRW : XForm_6<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "srw $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (PPCsrl GPRC:$rS, GPRC:$rB))]>;
+def SRAW : XForm_6<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "sraw $rA, $rS, $rB", IntShift,
+ [(set GPRC:$rA, (PPCsra GPRC:$rS, GPRC:$rB))]>;
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+def SRAWI : XForm_10<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH),
+ "srawi $rA, $rS, $SH", IntShift,
+ [(set GPRC:$rA, (sra GPRC:$rS, (i32 imm:$SH)))]>;
+def CNTLZW : XForm_11<31, 26, (outs GPRC:$rA), (ins GPRC:$rS),
+ "cntlzw $rA, $rS", IntGeneral,
+ [(set GPRC:$rA, (ctlz GPRC:$rS))]>;
+def EXTSB : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS),
+ "extsb $rA, $rS", IntGeneral,
+ [(set GPRC:$rA, (sext_inreg GPRC:$rS, i8))]>;
+def EXTSH : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS),
+ "extsh $rA, $rS", IntGeneral,
+ [(set GPRC:$rA, (sext_inreg GPRC:$rS, i16))]>;
+
+def CMPW : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
+ "cmpw $crD, $rA, $rB", IntCompare>;
+def CMPLW : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
+ "cmplw $crD, $rA, $rB", IntCompare>;
+}
+let PPC970_Unit = 3 in { // FPU Operations.
+//def FCMPO : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB),
+// "fcmpo $crD, $fA, $fB", FPCompare>;
+def FCMPUS : XForm_17<63, 0, (outs CRRC:$crD), (ins F4RC:$fA, F4RC:$fB),
+ "fcmpu $crD, $fA, $fB", FPCompare>;
+def FCMPUD : XForm_17<63, 0, (outs CRRC:$crD), (ins F8RC:$fA, F8RC:$fB),
+ "fcmpu $crD, $fA, $fB", FPCompare>;
+
+let Uses = [RM] in {
+ def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fctiwz $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (PPCfctiwz F8RC:$frB))]>;
+ def FRSP : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB),
+ "frsp $frD, $frB", FPGeneral,
+ [(set F4RC:$frD, (fround F8RC:$frB))]>;
+ def FSQRT : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fsqrt $frD, $frB", FPSqrt,
+ [(set F8RC:$frD, (fsqrt F8RC:$frB))]>;
+ def FSQRTS : XForm_26<59, 22, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fsqrts $frD, $frB", FPSqrt,
+ [(set F4RC:$frD, (fsqrt F4RC:$frB))]>;
+ }
+}
+
+/// FMR is split into 3 versions, one for 4/8 byte FP, and one for extending.
+///
+/// Note that these are defined as pseudo-ops on the PPC970 because they are
+/// often coalesced away and we don't want the dispatch group builder to think
+/// that they will fill slots (which could cause the load of a LSU reject to
+/// sneak into a d-group with a store).
+def FMRS : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fmr $frD, $frB", FPGeneral,
+ []>, // (set F4RC:$frD, F4RC:$frB)
+ PPC970_Unit_Pseudo;
+def FMRD : XForm_26<63, 72, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fmr $frD, $frB", FPGeneral,
+ []>, // (set F8RC:$frD, F8RC:$frB)
+ PPC970_Unit_Pseudo;
+def FMRSD : XForm_26<63, 72, (outs F8RC:$frD), (ins F4RC:$frB),
+ "fmr $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (fextend F4RC:$frB))]>,
+ PPC970_Unit_Pseudo;
+
+let PPC970_Unit = 3 in { // FPU Operations.
+// These are artificially split into two different forms, for 4/8 byte FP.
+def FABSS : XForm_26<63, 264, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fabs $frD, $frB", FPGeneral,
+ [(set F4RC:$frD, (fabs F4RC:$frB))]>;
+def FABSD : XForm_26<63, 264, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fabs $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (fabs F8RC:$frB))]>;
+def FNABSS : XForm_26<63, 136, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fnabs $frD, $frB", FPGeneral,
+ [(set F4RC:$frD, (fneg (fabs F4RC:$frB)))]>;
+def FNABSD : XForm_26<63, 136, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fnabs $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (fneg (fabs F8RC:$frB)))]>;
+def FNEGS : XForm_26<63, 40, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fneg $frD, $frB", FPGeneral,
+ [(set F4RC:$frD, (fneg F4RC:$frB))]>;
+def FNEGD : XForm_26<63, 40, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fneg $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (fneg F8RC:$frB))]>;
+}
+
+
+// XL-Form instructions. condition register logical ops.
+//
+def MCRF : XLForm_3<19, 0, (outs CRRC:$BF), (ins CRRC:$BFA),
+ "mcrf $BF, $BFA", BrMCR>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
+
+def CREQV : XLForm_1<19, 289, (outs CRBITRC:$CRD),
+ (ins CRBITRC:$CRA, CRBITRC:$CRB),
+ "creqv $CRD, $CRA, $CRB", BrCR,
+ []>;
+
+def CROR : XLForm_1<19, 449, (outs CRBITRC:$CRD),
+ (ins CRBITRC:$CRA, CRBITRC:$CRB),
+ "cror $CRD, $CRA, $CRB", BrCR,
+ []>;
+
+def CRSET : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins),
+ "creqv $dst, $dst, $dst", BrCR,
+ []>;
+
+// XFX-Form instructions. Instructions that deal with SPRs.
+//
+let Uses = [CTR] in {
+def MFCTR : XFXForm_1_ext<31, 339, 9, (outs GPRC:$rT), (ins),
+ "mfctr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Defs = [CTR], Pattern = [(PPCmtctr GPRC:$rS)] in {
+def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins GPRC:$rS),
+ "mtctr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+let Defs = [LR] in {
+def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins GPRC:$rS),
+ "mtlr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Uses = [LR] in {
+def MFLR : XFXForm_1_ext<31, 339, 8, (outs GPRC:$rT), (ins),
+ "mflr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+// Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed like
+// a GPR on the PPC970. As such, copies in and out have the same performance
+// characteristics as an OR instruction.
+def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins GPRC:$rS),
+ "mtspr 256, $rS", IntGeneral>,
+ PPC970_DGroup_Single, PPC970_Unit_FXU;
+def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
+ "mfspr $rT, 256", IntGeneral>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+
+def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS),
+ "mtcrf $FXM, $rS", BrMCRX>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+// FIXME: this Uses all the CR registers. Marking it as such is
+// necessary for DeadMachineInstructionElim to do the right thing.
+// However, marking it also exposes PR 2964, and causes crashes in
+// the Local RA because it doesn't like this sequence:
+// vreg = MCRF CR0
+// MFCR <kill of whatever preg got assigned to vreg>
+// For now DeadMachineInstructionElim is turned off, so don't do the marking.
+def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins), "mfcr $rT", SprMFCR>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
+ "mfcr $rT, $FXM", SprMFCR>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
+
+// Instructions to manipulate FPSCR. Only long double handling uses these.
+// FPSCR is not modelled; we use the SDNode Flag to keep things in order.
+
+let Uses = [RM], Defs = [RM] in {
+ def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
+ "mtfsb0 $FM", IntMTFSB0,
+ [(PPCmtfsb0 (i32 imm:$FM))]>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+ def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
+ "mtfsb1 $FM", IntMTFSB0,
+ [(PPCmtfsb1 (i32 imm:$FM))]>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+ // MTFSF does not actually produce an FP result. We pretend it copies
+ // input reg B to the output. If we didn't do this it would look like the
+ // instruction had no outputs (because we aren't modelling the FPSCR) and
+ // it would be deleted.
+ def MTFSF : XFLForm<63, 711, (outs F8RC:$FRA),
+ (ins i32imm:$FM, F8RC:$rT, F8RC:$FRB),
+ "mtfsf $FM, $rT", "$FRB = $FRA", IntMTFSB0,
+ [(set F8RC:$FRA, (PPCmtfsf (i32 imm:$FM),
+ F8RC:$rT, F8RC:$FRB))]>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+}
+let Uses = [RM] in {
+ def MFFS : XForm_42<63, 583, (outs F8RC:$rT), (ins),
+ "mffs $rT", IntMFFS,
+ [(set F8RC:$rT, (PPCmffs))]>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+ def FADDrtz: AForm_2<63, 21,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+ "fadd $FRT, $FRA, $FRB", FPGeneral,
+ [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+}
+
+
+let PPC970_Unit = 1 in { // FXU Operations.
+
+// XO-Form instructions. Arithmetic instructions that can set overflow bit
+//
+def ADD4 : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "add $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (add GPRC:$rA, GPRC:$rB))]>;
+def ADDC : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "addc $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (addc GPRC:$rA, GPRC:$rB))]>,
+ PPC970_DGroup_Cracked;
+def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "adde $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>;
+def DIVW : XOForm_1<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "divw $rT, $rA, $rB", IntDivW,
+ [(set GPRC:$rT, (sdiv GPRC:$rA, GPRC:$rB))]>,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def DIVWU : XOForm_1<31, 459, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "divwu $rT, $rA, $rB", IntDivW,
+ [(set GPRC:$rT, (udiv GPRC:$rA, GPRC:$rB))]>,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def MULHW : XOForm_1<31, 75, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "mulhw $rT, $rA, $rB", IntMulHW,
+ [(set GPRC:$rT, (mulhs GPRC:$rA, GPRC:$rB))]>;
+def MULHWU : XOForm_1<31, 11, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "mulhwu $rT, $rA, $rB", IntMulHWU,
+ [(set GPRC:$rT, (mulhu GPRC:$rA, GPRC:$rB))]>;
+def MULLW : XOForm_1<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "mullw $rT, $rA, $rB", IntMulHW,
+ [(set GPRC:$rT, (mul GPRC:$rA, GPRC:$rB))]>;
+def SUBF : XOForm_1<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "subf $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (sub GPRC:$rB, GPRC:$rA))]>;
+def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "subfc $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (subc GPRC:$rB, GPRC:$rA))]>,
+ PPC970_DGroup_Cracked;
+def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "subfe $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>;
+def ADDME : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "addme $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (adde GPRC:$rA, immAllOnes))]>;
+def ADDZE : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "addze $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (adde GPRC:$rA, 0))]>;
+def NEG : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "neg $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (ineg GPRC:$rA))]>;
+def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "subfme $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (sube immAllOnes, GPRC:$rA))]>;
+def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "subfze $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (sube 0, GPRC:$rA))]>;
+}
+
+// A-Form instructions. Most of the instructions executed in the FPU are of
+// this type.
+//
+let PPC970_Unit = 3 in { // FPU Operations.
+let Uses = [RM] in {
+ def FMADD : AForm_1<63, 29,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fmadd $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set F8RC:$FRT, (fadd (fmul F8RC:$FRA, F8RC:$FRC),
+ F8RC:$FRB))]>,
+ Requires<[FPContractions]>;
+ def FMADDS : AForm_1<59, 29,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC),
+ F4RC:$FRB))]>,
+ Requires<[FPContractions]>;
+ def FMSUB : AForm_1<63, 28,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fmsub $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set F8RC:$FRT, (fsub (fmul F8RC:$FRA, F8RC:$FRC),
+ F8RC:$FRB))]>,
+ Requires<[FPContractions]>;
+ def FMSUBS : AForm_1<59, 28,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fsub (fmul F4RC:$FRA, F4RC:$FRC),
+ F4RC:$FRB))]>,
+ Requires<[FPContractions]>;
+ def FNMADD : AForm_1<63, 31,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set F8RC:$FRT, (fneg (fadd (fmul F8RC:$FRA, F8RC:$FRC),
+ F8RC:$FRB)))]>,
+ Requires<[FPContractions]>;
+ def FNMADDS : AForm_1<59, 31,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fneg (fadd (fmul F4RC:$FRA, F4RC:$FRC),
+ F4RC:$FRB)))]>,
+ Requires<[FPContractions]>;
+ def FNMSUB : AForm_1<63, 30,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set F8RC:$FRT, (fneg (fsub (fmul F8RC:$FRA, F8RC:$FRC),
+ F8RC:$FRB)))]>,
+ Requires<[FPContractions]>;
+ def FNMSUBS : AForm_1<59, 30,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fneg (fsub (fmul F4RC:$FRA, F4RC:$FRC),
+ F4RC:$FRB)))]>,
+ Requires<[FPContractions]>;
+}
+// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid
+// having 4 of these, force the comparison to always be an 8-byte double (code
+// should use an FMRSD if the input comparison value really wants to be a float)
+// and 4/8 byte forms for the result and operand type..
+def FSELD : AForm_1<63, 23,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F8RC:$FRT, (PPCfsel F8RC:$FRA,F8RC:$FRC,F8RC:$FRB))]>;
+def FSELS : AForm_1<63, 23,
+ (outs F4RC:$FRT), (ins F8RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (PPCfsel F8RC:$FRA,F4RC:$FRC,F4RC:$FRB))]>;
+let Uses = [RM] in {
+ def FADD : AForm_2<63, 21,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+ "fadd $FRT, $FRA, $FRB", FPGeneral,
+ [(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>;
+ def FADDS : AForm_2<59, 21,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+ "fadds $FRT, $FRA, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>;
+ def FDIV : AForm_2<63, 18,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+ "fdiv $FRT, $FRA, $FRB", FPDivD,
+ [(set F8RC:$FRT, (fdiv F8RC:$FRA, F8RC:$FRB))]>;
+ def FDIVS : AForm_2<59, 18,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+ "fdivs $FRT, $FRA, $FRB", FPDivS,
+ [(set F4RC:$FRT, (fdiv F4RC:$FRA, F4RC:$FRB))]>;
+ def FMUL : AForm_3<63, 25,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+ "fmul $FRT, $FRA, $FRB", FPFused,
+ [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRB))]>;
+ def FMULS : AForm_3<59, 25,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+ "fmuls $FRT, $FRA, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRB))]>;
+ def FSUB : AForm_2<63, 20,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+ "fsub $FRT, $FRA, $FRB", FPGeneral,
+ [(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>;
+ def FSUBS : AForm_2<59, 20,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+ "fsubs $FRT, $FRA, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fsub F4RC:$FRA, F4RC:$FRB))]>;
+ }
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+// M-Form instructions. rotate and mask instructions.
+//
+let isCommutable = 1 in {
+// RLWIMI can be commuted if the rotate amount is zero.
+def RLWIMI : MForm_2<20,
+ (outs GPRC:$rA), (ins GPRC:$rSi, GPRC:$rS, u5imm:$SH, u5imm:$MB,
+ u5imm:$ME), "rlwimi $rA, $rS, $SH, $MB, $ME", IntRotate,
+ []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">,
+ NoEncode<"$rSi">;
+}
+def RLWINM : MForm_2<21,
+ (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
+ []>;
+def RLWINMo : MForm_2<21,
+ (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral,
+ []>, isDOT, PPC970_DGroup_Cracked;
+def RLWNM : MForm_2<23,
+ (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB, u5imm:$MB, u5imm:$ME),
+ "rlwnm $rA, $rS, $rB, $MB, $ME", IntGeneral,
+ []>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DWARF Pseudo Instructions
+//
+
+def DWARF_LOC : Pseudo<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file),
+ "${:comment} .loc $file, $line, $col",
+ [(dwarf_loc (i32 imm:$line), (i32 imm:$col),
+ (i32 imm:$file))]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Patterns
+//
+
+// Arbitrary immediate support. Implement in terms of LIS/ORI.
+def : Pat<(i32 imm:$imm),
+ (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Implement the 'not' operation with the NOR instruction.
+def NOT : Pat<(not GPRC:$in),
+ (NOR GPRC:$in, GPRC:$in)>;
+
+// ADD an arbitrary immediate.
+def : Pat<(add GPRC:$in, imm:$imm),
+ (ADDIS (ADDI GPRC:$in, (LO16 imm:$imm)), (HA16 imm:$imm))>;
+// OR an arbitrary immediate.
+def : Pat<(or GPRC:$in, imm:$imm),
+ (ORIS (ORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+// XOR an arbitrary immediate.
+def : Pat<(xor GPRC:$in, imm:$imm),
+ (XORIS (XORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+// SUBFIC
+def : Pat<(sub immSExt16:$imm, GPRC:$in),
+ (SUBFIC GPRC:$in, imm:$imm)>;
+
+// SHL/SRL
+def : Pat<(shl GPRC:$in, (i32 imm:$imm)),
+ (RLWINM GPRC:$in, imm:$imm, 0, (SHL32 imm:$imm))>;
+def : Pat<(srl GPRC:$in, (i32 imm:$imm)),
+ (RLWINM GPRC:$in, (SRL32 imm:$imm), imm:$imm, 31)>;
+
+// ROTL
+def : Pat<(rotl GPRC:$in, GPRC:$sh),
+ (RLWNM GPRC:$in, GPRC:$sh, 0, 31)>;
+def : Pat<(rotl GPRC:$in, (i32 imm:$imm)),
+ (RLWINM GPRC:$in, imm:$imm, 0, 31)>;
+
+// RLWNM
+def : Pat<(and (rotl GPRC:$in, GPRC:$sh), maskimm32:$imm),
+ (RLWNM GPRC:$in, GPRC:$sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
+
+// Calls
+def : Pat<(PPCcall_Macho (i32 tglobaladdr:$dst)),
+ (BL_Macho tglobaladdr:$dst)>;
+def : Pat<(PPCcall_Macho (i32 texternalsym:$dst)),
+ (BL_Macho texternalsym:$dst)>;
+def : Pat<(PPCcall_ELF (i32 tglobaladdr:$dst)),
+ (BL_ELF tglobaladdr:$dst)>;
+def : Pat<(PPCcall_ELF (i32 texternalsym:$dst)),
+ (BL_ELF texternalsym:$dst)>;
+
+
+def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
+ (TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm),
+ (TCRETURNdi texternalsym:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
+ (TCRETURNri CTRRC:$dst, imm:$imm)>;
+
+
+
+// Hi and Lo for Darwin Global Addresses.
+def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>;
+def : Pat<(PPClo tglobaladdr:$in, 0), (LI tglobaladdr:$in)>;
+def : Pat<(PPChi tconstpool:$in, 0), (LIS tconstpool:$in)>;
+def : Pat<(PPClo tconstpool:$in, 0), (LI tconstpool:$in)>;
+def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>;
+def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>;
+def : Pat<(add GPRC:$in, (PPChi tglobaladdr:$g, 0)),
+ (ADDIS GPRC:$in, tglobaladdr:$g)>;
+def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)),
+ (ADDIS GPRC:$in, tconstpool:$g)>;
+def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)),
+ (ADDIS GPRC:$in, tjumptable:$g)>;
+
+// Fused negative multiply subtract, alternate pattern
+def : Pat<(fsub F8RC:$B, (fmul F8RC:$A, F8RC:$C)),
+ (FNMSUB F8RC:$A, F8RC:$C, F8RC:$B)>,
+ Requires<[FPContractions]>;
+def : Pat<(fsub F4RC:$B, (fmul F4RC:$A, F4RC:$C)),
+ (FNMSUBS F4RC:$A, F4RC:$C, F4RC:$B)>,
+ Requires<[FPContractions]>;
+
+// Standard shifts. These are represented separately from the real shifts above
+// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
+// amounts.
+def : Pat<(sra GPRC:$rS, GPRC:$rB),
+ (SRAW GPRC:$rS, GPRC:$rB)>;
+def : Pat<(srl GPRC:$rS, GPRC:$rB),
+ (SRW GPRC:$rS, GPRC:$rB)>;
+def : Pat<(shl GPRC:$rS, GPRC:$rB),
+ (SLW GPRC:$rS, GPRC:$rB)>;
+
+def : Pat<(zextloadi1 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(zextloadi1 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi1 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(extloadi1 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi8 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(extloadi8 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi16 iaddr:$src),
+ (LHZ iaddr:$src)>;
+def : Pat<(extloadi16 xaddr:$src),
+ (LHZX xaddr:$src)>;
+def : Pat<(extloadf32 iaddr:$src),
+ (FMRSD (LFS iaddr:$src))>;
+def : Pat<(extloadf32 xaddr:$src),
+ (FMRSD (LFSX xaddr:$src))>;
+
+// Memory barriers
+def : Pat<(membarrier (i32 imm:$ll),
+ (i32 imm:$ls),
+ (i32 imm:$sl),
+ (i32 imm:$ss),
+ (i32 imm:$device)),
+ (SYNC)>;
+
+include "PPCInstrAltivec.td"
+include "PPCInstr64Bit.td"
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
new file mode 100644
index 0000000..035647e
--- /dev/null
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -0,0 +1,437 @@
+//===-- PPCJITInfo.cpp - Implement the JIT interfaces for the PowerPC -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the 32-bit PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "PPCJITInfo.h"
+#include "PPCRelocations.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/System/Memory.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+#define BUILD_ADDIS(RD,RS,IMM16) \
+ ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
+#define BUILD_ORI(RD,RS,UIMM16) \
+ ((24 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
+#define BUILD_ORIS(RD,RS,UIMM16) \
+ ((25 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
+#define BUILD_RLDICR(RD,RS,SH,ME) \
+ ((30 << 26) | ((RS) << 21) | ((RD) << 16) | (((SH) & 31) << 11) | \
+ (((ME) & 63) << 6) | (1 << 2) | ((((SH) >> 5) & 1) << 1))
+#define BUILD_MTSPR(RS,SPR) \
+ ((31 << 26) | ((RS) << 21) | ((SPR) << 16) | (467 << 1))
+#define BUILD_BCCTRx(BO,BI,LINK) \
+ ((19 << 26) | ((BO) << 21) | ((BI) << 16) | (528 << 1) | ((LINK) & 1))
+#define BUILD_B(TARGET, LINK) \
+ ((18 << 26) | (((TARGET) & 0x00FFFFFF) << 2) | ((LINK) & 1))
+
+// Pseudo-ops
+#define BUILD_LIS(RD,IMM16) BUILD_ADDIS(RD,0,IMM16)
+#define BUILD_SLDI(RD,RS,IMM6) BUILD_RLDICR(RD,RS,IMM6,63-IMM6)
+#define BUILD_MTCTR(RS) BUILD_MTSPR(RS,9)
+#define BUILD_BCTR(LINK) BUILD_BCCTRx(20,0,LINK)
+
+static void EmitBranchToAt(uint64_t At, uint64_t To, bool isCall, bool is64Bit){
+ intptr_t Offset = ((intptr_t)To - (intptr_t)At) >> 2;
+ unsigned *AtI = (unsigned*)(intptr_t)At;
+
+ if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range?
+ AtI[0] = BUILD_B(Offset, isCall); // b/bl target
+ } else if (!is64Bit) {
+ AtI[0] = BUILD_LIS(12, To >> 16); // lis r12, hi16(address)
+ AtI[1] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address)
+ AtI[2] = BUILD_MTCTR(12); // mtctr r12
+ AtI[3] = BUILD_BCTR(isCall); // bctr/bctrl
+ } else {
+ AtI[0] = BUILD_LIS(12, To >> 48); // lis r12, hi16(address)
+ AtI[1] = BUILD_ORI(12, 12, To >> 32); // ori r12, r12, lo16(address)
+ AtI[2] = BUILD_SLDI(12, 12, 32); // sldi r12, r12, 32
+ AtI[3] = BUILD_ORIS(12, 12, To >> 16); // oris r12, r12, hi16(address)
+ AtI[4] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address)
+ AtI[5] = BUILD_MTCTR(12); // mtctr r12
+ AtI[6] = BUILD_BCTR(isCall); // bctr/bctrl
+ }
+}
+
+extern "C" void PPC32CompilationCallback();
+extern "C" void PPC64CompilationCallback();
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+ !(defined(__ppc64__) || defined(__FreeBSD__))
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring.
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl _PPC32CompilationCallback\n"
+"_PPC32CompilationCallback:\n"
+ // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
+ // FIXME: need to save v[0-19] for altivec?
+ // FIXME: could shrink frame
+ // Set up a proper stack frame
+ // FIXME Layout
+ // PowerPC64 ABI linkage - 24 bytes
+ // parameters - 32 bytes
+ // 13 double registers - 104 bytes
+ // 8 int registers - 32 bytes
+ "mflr r0\n"
+ "stw r0, 8(r1)\n"
+ "stwu r1, -208(r1)\n"
+ // Save all int arg registers
+ "stw r10, 204(r1)\n" "stw r9, 200(r1)\n"
+ "stw r8, 196(r1)\n" "stw r7, 192(r1)\n"
+ "stw r6, 188(r1)\n" "stw r5, 184(r1)\n"
+ "stw r4, 180(r1)\n" "stw r3, 176(r1)\n"
+ // Save all call-clobbered FP regs.
+ "stfd f13, 168(r1)\n" "stfd f12, 160(r1)\n"
+ "stfd f11, 152(r1)\n" "stfd f10, 144(r1)\n"
+ "stfd f9, 136(r1)\n" "stfd f8, 128(r1)\n"
+ "stfd f7, 120(r1)\n" "stfd f6, 112(r1)\n"
+ "stfd f5, 104(r1)\n" "stfd f4, 96(r1)\n"
+ "stfd f3, 88(r1)\n" "stfd f2, 80(r1)\n"
+ "stfd f1, 72(r1)\n"
+ // Arguments to Compilation Callback:
+ // r3 - our lr (address of the call instruction in stub plus 4)
+ // r4 - stub's lr (address of instruction that called the stub plus 4)
+ // r5 - is64Bit - always 0.
+ "mr r3, r0\n"
+ "lwz r2, 208(r1)\n" // stub's frame
+ "lwz r4, 8(r2)\n" // stub's lr
+ "li r5, 0\n" // 0 == 32 bit
+ "bl _PPCCompilationCallbackC\n"
+ "mtctr r3\n"
+ // Restore all int arg registers
+ "lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n"
+ "lwz r8, 196(r1)\n" "lwz r7, 192(r1)\n"
+ "lwz r6, 188(r1)\n" "lwz r5, 184(r1)\n"
+ "lwz r4, 180(r1)\n" "lwz r3, 176(r1)\n"
+ // Restore all FP arg registers
+ "lfd f13, 168(r1)\n" "lfd f12, 160(r1)\n"
+ "lfd f11, 152(r1)\n" "lfd f10, 144(r1)\n"
+ "lfd f9, 136(r1)\n" "lfd f8, 128(r1)\n"
+ "lfd f7, 120(r1)\n" "lfd f6, 112(r1)\n"
+ "lfd f5, 104(r1)\n" "lfd f4, 96(r1)\n"
+ "lfd f3, 88(r1)\n" "lfd f2, 80(r1)\n"
+ "lfd f1, 72(r1)\n"
+ // Pop 3 frames off the stack and branch to target
+ "lwz r1, 208(r1)\n"
+ "lwz r2, 8(r1)\n"
+ "mtlr r2\n"
+ "bctr\n"
+ );
+
+#elif defined(__PPC__) && !defined(__ppc64__)
+// Linux & FreeBSD / PPC 32 support
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring.
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl PPC32CompilationCallback\n"
+"PPC32CompilationCallback:\n"
+ // Make space for 8 ints r[3-10] and 8 doubles f[1-8] and the
+ // FIXME: need to save v[0-19] for altivec?
+ // FIXME: could shrink frame
+ // Set up a proper stack frame
+ // FIXME Layout
+ // 8 double registers - 64 bytes
+ // 8 int registers - 32 bytes
+ "mflr 0\n"
+ "stw 0, 4(1)\n"
+ "stwu 1, -104(1)\n"
+ // Save all int arg registers
+ "stw 10, 100(1)\n" "stw 9, 96(1)\n"
+ "stw 8, 92(1)\n" "stw 7, 88(1)\n"
+ "stw 6, 84(1)\n" "stw 5, 80(1)\n"
+ "stw 4, 76(1)\n" "stw 3, 72(1)\n"
+ // Save all call-clobbered FP regs.
+ "stfd 8, 64(1)\n"
+ "stfd 7, 56(1)\n" "stfd 6, 48(1)\n"
+ "stfd 5, 40(1)\n" "stfd 4, 32(1)\n"
+ "stfd 3, 24(1)\n" "stfd 2, 16(1)\n"
+ "stfd 1, 8(1)\n"
+ // Arguments to Compilation Callback:
+ // r3 - our lr (address of the call instruction in stub plus 4)
+ // r4 - stub's lr (address of instruction that called the stub plus 4)
+ // r5 - is64Bit - always 0.
+ "mr 3, 0\n"
+ "lwz 5, 104(1)\n" // stub's frame
+ "lwz 4, 4(5)\n" // stub's lr
+ "li 5, 0\n" // 0 == 32 bit
+ "bl PPCCompilationCallbackC\n"
+ "mtctr 3\n"
+ // Restore all int arg registers
+ "lwz 10, 100(1)\n" "lwz 9, 96(1)\n"
+ "lwz 8, 92(1)\n" "lwz 7, 88(1)\n"
+ "lwz 6, 84(1)\n" "lwz 5, 80(1)\n"
+ "lwz 4, 76(1)\n" "lwz 3, 72(1)\n"
+ // Restore all FP arg registers
+ "lfd 8, 64(1)\n"
+ "lfd 7, 56(1)\n" "lfd 6, 48(1)\n"
+ "lfd 5, 40(1)\n" "lfd 4, 32(1)\n"
+ "lfd 3, 24(1)\n" "lfd 2, 16(1)\n"
+ "lfd 1, 8(1)\n"
+ // Pop 3 frames off the stack and branch to target
+ "lwz 1, 104(1)\n"
+ "lwz 0, 4(1)\n"
+ "mtlr 0\n"
+ "bctr\n"
+ );
+#else
+void PPC32CompilationCallback() {
+ assert(0 && "This is not a power pc, you can't execute this!");
+ abort();
+}
+#endif
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+ defined(__ppc64__)
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl _PPC64CompilationCallback\n"
+"_PPC64CompilationCallback:\n"
+ // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
+ // FIXME: need to save v[0-19] for altivec?
+ // Set up a proper stack frame
+ // Layout
+ // PowerPC64 ABI linkage - 48 bytes
+ // parameters - 64 bytes
+ // 13 double registers - 104 bytes
+ // 8 int registers - 64 bytes
+ "mflr r0\n"
+ "std r0, 16(r1)\n"
+ "stdu r1, -280(r1)\n"
+ // Save all int arg registers
+ "std r10, 272(r1)\n" "std r9, 264(r1)\n"
+ "std r8, 256(r1)\n" "std r7, 248(r1)\n"
+ "std r6, 240(r1)\n" "std r5, 232(r1)\n"
+ "std r4, 224(r1)\n" "std r3, 216(r1)\n"
+ // Save all call-clobbered FP regs.
+ "stfd f13, 208(r1)\n" "stfd f12, 200(r1)\n"
+ "stfd f11, 192(r1)\n" "stfd f10, 184(r1)\n"
+ "stfd f9, 176(r1)\n" "stfd f8, 168(r1)\n"
+ "stfd f7, 160(r1)\n" "stfd f6, 152(r1)\n"
+ "stfd f5, 144(r1)\n" "stfd f4, 136(r1)\n"
+ "stfd f3, 128(r1)\n" "stfd f2, 120(r1)\n"
+ "stfd f1, 112(r1)\n"
+ // Arguments to Compilation Callback:
+ // r3 - our lr (address of the call instruction in stub plus 4)
+ // r4 - stub's lr (address of instruction that called the stub plus 4)
+ // r5 - is64Bit - always 1.
+ "mr r3, r0\n"
+ "ld r2, 280(r1)\n" // stub's frame
+ "ld r4, 16(r2)\n" // stub's lr
+ "li r5, 1\n" // 1 == 64 bit
+ "bl _PPCCompilationCallbackC\n"
+ "mtctr r3\n"
+ // Restore all int arg registers
+ "ld r10, 272(r1)\n" "ld r9, 264(r1)\n"
+ "ld r8, 256(r1)\n" "ld r7, 248(r1)\n"
+ "ld r6, 240(r1)\n" "ld r5, 232(r1)\n"
+ "ld r4, 224(r1)\n" "ld r3, 216(r1)\n"
+ // Restore all FP arg registers
+ "lfd f13, 208(r1)\n" "lfd f12, 200(r1)\n"
+ "lfd f11, 192(r1)\n" "lfd f10, 184(r1)\n"
+ "lfd f9, 176(r1)\n" "lfd f8, 168(r1)\n"
+ "lfd f7, 160(r1)\n" "lfd f6, 152(r1)\n"
+ "lfd f5, 144(r1)\n" "lfd f4, 136(r1)\n"
+ "lfd f3, 128(r1)\n" "lfd f2, 120(r1)\n"
+ "lfd f1, 112(r1)\n"
+ // Pop 3 frames off the stack and branch to target
+ "ld r1, 280(r1)\n"
+ "ld r2, 16(r1)\n"
+ "mtlr r2\n"
+ "bctr\n"
+ );
+#else
+void PPC64CompilationCallback() {
+ assert(0 && "This is not a power pc, you can't execute this!");
+ abort();
+}
+#endif
+
+extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4,
+ unsigned *OrigCallAddrPlus4,
+ bool is64Bit) {
+ // Adjust the pointer to the address of the call instruction in the stub
+ // emitted by emitFunctionStub, rather than the instruction after it.
+ unsigned *StubCallAddr = StubCallAddrPlus4 - 1;
+ unsigned *OrigCallAddr = OrigCallAddrPlus4 - 1;
+
+ void *Target = JITCompilerFunction(StubCallAddr);
+
+ // Check to see if *OrigCallAddr is a 'bl' instruction, and if we can rewrite
+ // it to branch directly to the destination. If so, rewrite it so it does not
+ // need to go through the stub anymore.
+ unsigned OrigCallInst = *OrigCallAddr;
+ if ((OrigCallInst >> 26) == 18) { // Direct call.
+ intptr_t Offset = ((intptr_t)Target - (intptr_t)OrigCallAddr) >> 2;
+
+ if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range?
+ // Clear the original target out.
+ OrigCallInst &= (63 << 26) | 3;
+ // Fill in the new target.
+ OrigCallInst |= (Offset & ((1 << 24)-1)) << 2;
+ // Replace the call.
+ *OrigCallAddr = OrigCallInst;
+ }
+ }
+
+ // Assert that we are coming from a stub that was created with our
+ // emitFunctionStub.
+ if ((*StubCallAddr >> 26) == 18)
+ StubCallAddr -= 3;
+ else {
+ assert((*StubCallAddr >> 26) == 19 && "Call in stub is not indirect!");
+ StubCallAddr -= is64Bit ? 9 : 6;
+ }
+
+ // Rewrite the stub with an unconditional branch to the target, for any users
+ // who took the address of the stub.
+ EmitBranchToAt((intptr_t)StubCallAddr, (intptr_t)Target, false, is64Bit);
+
+ // Put the address of the target function to call and the address to return to
+ // after calling the target function in a place that is easy to get on the
+ // stack after we restore all regs.
+ return Target;
+}
+
+
+
+TargetJITInfo::LazyResolverFn
+PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) {
+ JITCompilerFunction = Fn;
+ return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback;
+}
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+defined(__APPLE__)
+extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
+#endif
+
+void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE) {
+ // If this is just a call to an external function, emit a branch instead of a
+ // call. The code is the same except for one bit of the last instruction.
+ if (Fn != (void*)(intptr_t)PPC32CompilationCallback &&
+ Fn != (void*)(intptr_t)PPC64CompilationCallback) {
+ JCE.startGVStub(F, 7*4);
+ intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ EmitBranchToAt(Addr, (intptr_t)Fn, false, is64Bit);
+ sys::Memory::InvalidateInstructionCache((void*)Addr, 7*4);
+ return JCE.finishGVStub(F);
+ }
+
+ JCE.startGVStub(F, 10*4);
+ intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
+ if (is64Bit) {
+ JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1)
+ JCE.emitWordBE(0x7d6802a6); // mflr r11
+ JCE.emitWordBE(0xf9610060); // std r11, 96(r1)
+ } else if (TM.getSubtargetImpl()->isMachoABI()){
+ JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
+ JCE.emitWordBE(0x7d6802a6); // mflr r11
+ JCE.emitWordBE(0x91610028); // stw r11, 40(r1)
+ } else {
+ JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
+ JCE.emitWordBE(0x7d6802a6); // mflr r11
+ JCE.emitWordBE(0x91610024); // stw r11, 36(r1)
+ }
+ intptr_t BranchAddr = (intptr_t)JCE.getCurrentPCValue();
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ EmitBranchToAt(BranchAddr, (intptr_t)Fn, true, is64Bit);
+ sys::Memory::InvalidateInstructionCache((void*)Addr, 10*4);
+ return JCE.finishGVStub(F);
+}
+
+
+void PPCJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
+ intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
+ switch ((PPC::RelocationType)MR->getRelocationType()) {
+ default: assert(0 && "Unknown relocation type!");
+ case PPC::reloc_pcrel_bx:
+ // PC-relative relocation for b and bl instructions.
+ ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
+ assert(ResultPtr >= -(1 << 23) && ResultPtr < (1 << 23) &&
+ "Relocation out of range!");
+ *RelocPos |= (ResultPtr & ((1 << 24)-1)) << 2;
+ break;
+ case PPC::reloc_pcrel_bcx:
+ // PC-relative relocation for BLT,BLE,BEQ,BGE,BGT,BNE, or other
+ // bcx instructions.
+ ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
+ assert(ResultPtr >= -(1 << 13) && ResultPtr < (1 << 13) &&
+ "Relocation out of range!");
+ *RelocPos |= (ResultPtr & ((1 << 14)-1)) << 2;
+ break;
+ case PPC::reloc_absolute_high: // high bits of ref -> low 16 of instr
+ case PPC::reloc_absolute_low: { // low bits of ref -> low 16 of instr
+ ResultPtr += MR->getConstantVal();
+
+ // If this is a high-part access, get the high-part.
+ if (MR->getRelocationType() == PPC::reloc_absolute_high) {
+ // If the low part will have a carry (really a borrow) from the low
+ // 16-bits into the high 16, add a bit to borrow from.
+ if (((int)ResultPtr << 16) < 0)
+ ResultPtr += 1 << 16;
+ ResultPtr >>= 16;
+ }
+
+ // Do the addition then mask, so the addition does not overflow the 16-bit
+ // immediate section of the instruction.
+ unsigned LowBits = (*RelocPos + ResultPtr) & 65535;
+ unsigned HighBits = *RelocPos & ~65535;
+ *RelocPos = LowBits | HighBits; // Slam into low 16-bits
+ break;
+ }
+ case PPC::reloc_absolute_low_ix: { // low bits of ref -> low 14 of instr
+ ResultPtr += MR->getConstantVal();
+ // Do the addition then mask, so the addition does not overflow the 16-bit
+ // immediate section of the instruction.
+ unsigned LowBits = (*RelocPos + ResultPtr) & 0xFFFC;
+ unsigned HighBits = *RelocPos & 0xFFFF0003;
+ *RelocPos = LowBits | HighBits; // Slam into low 14-bits.
+ break;
+ }
+ }
+ }
+}
+
+void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ EmitBranchToAt((intptr_t)Old, (intptr_t)New, false, is64Bit);
+}
diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h
new file mode 100644
index 0000000..2e25b29
--- /dev/null
+++ b/lib/Target/PowerPC/PPCJITInfo.h
@@ -0,0 +1,48 @@
+//===- PPCJITInfo.h - PowerPC impl. of the JIT interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_JITINFO_H
+#define POWERPC_JITINFO_H
+
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+
+namespace llvm {
+ class PPCTargetMachine;
+
+ class PPCJITInfo : public TargetJITInfo {
+ protected:
+ PPCTargetMachine &TM;
+ bool is64Bit;
+ public:
+ PPCJITInfo(PPCTargetMachine &tm, bool tmIs64Bit) : TM(tm) {
+ useGOT = 0;
+ is64Bit = tmIs64Bit;
+ }
+
+ virtual void *emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE);
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+ };
+}
+
+#endif
diff --git a/lib/Target/PowerPC/PPCMachOWriterInfo.cpp b/lib/Target/PowerPC/PPCMachOWriterInfo.cpp
new file mode 100644
index 0000000..3bfa6d7
--- /dev/null
+++ b/lib/Target/PowerPC/PPCMachOWriterInfo.cpp
@@ -0,0 +1,151 @@
+//===-- PPCMachOWriterInfo.cpp - Mach-O Writer Info for the PowerPC -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Mach-O writer information for the PowerPC backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMachOWriterInfo.h"
+#include "PPCRelocations.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/MachORelocation.h"
+#include "llvm/Support/OutputBuffer.h"
+#include <cstdio>
+using namespace llvm;
+
+PPCMachOWriterInfo::PPCMachOWriterInfo(const PPCTargetMachine &TM)
+ : TargetMachOWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64 ?
+ HDR_CPU_TYPE_POWERPC64 :
+ HDR_CPU_TYPE_POWERPC,
+ HDR_CPU_SUBTYPE_POWERPC_ALL) {}
+PPCMachOWriterInfo::~PPCMachOWriterInfo() {}
+
+/// GetTargetRelocation - For the MachineRelocation MR, convert it to one or
+/// more PowerPC MachORelocation(s), add the new relocations to the
+/// MachOSection, and rewrite the instruction at the section offset if required
+/// by that relocation type.
+unsigned PPCMachOWriterInfo::GetTargetRelocation(MachineRelocation &MR,
+ unsigned FromIdx,
+ unsigned ToAddr,
+ unsigned ToIdx,
+ OutputBuffer &RelocOut,
+ OutputBuffer &SecOut,
+ bool Scattered,
+ bool isExtern) const {
+ unsigned NumRelocs = 0;
+ uint64_t Addr = 0;
+
+ // Get the address of whatever it is we're relocating, if possible.
+ if (!isExtern)
+ Addr = (uintptr_t)MR.getResultPointer() + ToAddr;
+
+ switch ((PPC::RelocationType)MR.getRelocationType()) {
+ default: assert(0 && "Unknown PPC relocation type!");
+ case PPC::reloc_absolute_low_ix:
+ assert(0 && "Unhandled PPC relocation type!");
+ break;
+ case PPC::reloc_vanilla:
+ {
+ // FIXME: need to handle 64 bit vanilla relocs
+ MachORelocation VANILLA(MR.getMachineCodeOffset(), ToIdx,
+ false, 2, isExtern,
+ PPC_RELOC_VANILLA,
+ Scattered, (intptr_t)MR.getResultPointer());
+ ++NumRelocs;
+
+ if (Scattered) {
+ RelocOut.outword(VANILLA.getPackedFields());
+ RelocOut.outword(VANILLA.getAddress());
+ } else {
+ RelocOut.outword(VANILLA.getAddress());
+ RelocOut.outword(VANILLA.getPackedFields());
+ }
+
+ intptr_t SymbolOffset;
+
+ if (Scattered)
+ SymbolOffset = Addr + MR.getConstantVal();
+ else
+ SymbolOffset = Addr;
+
+ printf("vanilla fixup: sec_%x[%x] = %x\n", FromIdx,
+ unsigned(MR.getMachineCodeOffset()),
+ unsigned(SymbolOffset));
+ SecOut.fixword(SymbolOffset, MR.getMachineCodeOffset());
+ }
+ break;
+ case PPC::reloc_pcrel_bx:
+ {
+ // FIXME: Presumably someday we will need to branch to other, non-extern
+ // functions too. Need to figure out some way to distinguish between
+ // target is BB and target is function.
+ if (isExtern) {
+ MachORelocation BR24(MR.getMachineCodeOffset(), ToIdx, true, 2,
+ isExtern, PPC_RELOC_BR24, Scattered,
+ (intptr_t)MR.getMachineCodeOffset());
+ RelocOut.outword(BR24.getAddress());
+ RelocOut.outword(BR24.getPackedFields());
+ ++NumRelocs;
+ }
+
+ Addr -= MR.getMachineCodeOffset();
+ Addr >>= 2;
+ Addr &= 0xFFFFFF;
+ Addr <<= 2;
+ Addr |= (SecOut[MR.getMachineCodeOffset()] << 24);
+ Addr |= (SecOut[MR.getMachineCodeOffset()+3] & 0x3);
+ SecOut.fixword(Addr, MR.getMachineCodeOffset());
+ break;
+ }
+ case PPC::reloc_pcrel_bcx:
+ {
+ Addr -= MR.getMachineCodeOffset();
+ Addr &= 0xFFFC;
+
+ SecOut.fixhalf(Addr, MR.getMachineCodeOffset() + 2);
+ break;
+ }
+ case PPC::reloc_absolute_high:
+ {
+ MachORelocation HA16(MR.getMachineCodeOffset(), ToIdx, false, 2,
+ isExtern, PPC_RELOC_HA16);
+ MachORelocation PAIR(Addr & 0xFFFF, 0xFFFFFF, false, 2, isExtern,
+ PPC_RELOC_PAIR);
+ NumRelocs = 2;
+
+ RelocOut.outword(HA16.getRawAddress());
+ RelocOut.outword(HA16.getPackedFields());
+ RelocOut.outword(PAIR.getRawAddress());
+ RelocOut.outword(PAIR.getPackedFields());
+
+ Addr += 0x8000;
+
+ SecOut.fixhalf(Addr >> 16, MR.getMachineCodeOffset() + 2);
+ break;
+ }
+ case PPC::reloc_absolute_low:
+ {
+ MachORelocation LO16(MR.getMachineCodeOffset(), ToIdx, false, 2,
+ isExtern, PPC_RELOC_LO16);
+ MachORelocation PAIR(Addr >> 16, 0xFFFFFF, false, 2, isExtern,
+ PPC_RELOC_PAIR);
+ NumRelocs = 2;
+
+ RelocOut.outword(LO16.getRawAddress());
+ RelocOut.outword(LO16.getPackedFields());
+ RelocOut.outword(PAIR.getRawAddress());
+ RelocOut.outword(PAIR.getPackedFields());
+
+ SecOut.fixhalf(Addr, MR.getMachineCodeOffset() + 2);
+ break;
+ }
+ }
+
+ return NumRelocs;
+}
diff --git a/lib/Target/PowerPC/PPCMachOWriterInfo.h b/lib/Target/PowerPC/PPCMachOWriterInfo.h
new file mode 100644
index 0000000..d46334d
--- /dev/null
+++ b/lib/Target/PowerPC/PPCMachOWriterInfo.h
@@ -0,0 +1,55 @@
+//===-- PPCMachOWriterInfo.h - Mach-O Writer Info for PowerPC ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Mach-O writer information for the PowerPC backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC_MACHO_WRITER_INFO_H
+#define PPC_MACHO_WRITER_INFO_H
+
+#include "llvm/Target/TargetMachOWriterInfo.h"
+
+namespace llvm {
+
+ // Forward declarations
+ class MachineRelocation;
+ class OutputBuffer;
+ class PPCTargetMachine;
+
+ class PPCMachOWriterInfo : public TargetMachOWriterInfo {
+ public:
+ PPCMachOWriterInfo(const PPCTargetMachine &TM);
+ virtual ~PPCMachOWriterInfo();
+
+ virtual unsigned GetTargetRelocation(MachineRelocation &MR,
+ unsigned FromIdx,
+ unsigned ToAddr,
+ unsigned ToIdx,
+ OutputBuffer &RelocOut,
+ OutputBuffer &SecOut,
+ bool Scattered, bool Extern) const;
+
+ // Constants for the relocation r_type field.
+ // See <mach-o/ppc/reloc.h>
+ enum {
+ PPC_RELOC_VANILLA, // generic relocation
+ PPC_RELOC_PAIR, // the second relocation entry of a pair
+ PPC_RELOC_BR14, // 14 bit branch displacement to word address
+ PPC_RELOC_BR24, // 24 bit branch displacement to word address
+ PPC_RELOC_HI16, // a PAIR follows with the low 16 bits
+ PPC_RELOC_LO16, // a PAIR follows with the high 16 bits
+ PPC_RELOC_HA16, // a PAIR follows, which is sign extended to 32b
+ PPC_RELOC_LO14 // LO16 with low 2 bits implicitly zero
+ };
+ };
+
+} // end llvm namespace
+
+#endif // PPC_MACHO_WRITER_INFO_H
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
new file mode 100644
index 0000000..42883d7
--- /dev/null
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -0,0 +1,104 @@
+//===-- PPCMachineFunctionInfo.h - Private data used for PowerPC --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC_MACHINE_FUNCTION_INFO_H
+#define PPC_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// PPCFunctionInfo - This class is derived from MachineFunction private
+/// PowerPC target-specific information for each MachineFunction.
+class PPCFunctionInfo : public MachineFunctionInfo {
+private:
+ /// FramePointerSaveIndex - Frame index of where the old frame pointer is
+ /// stored. Also used as an anchor for instructions that need to be altered
+ /// when using frame pointers (dyna_add, dyna_sub.)
+ int FramePointerSaveIndex;
+
+ /// ReturnAddrSaveIndex - Frame index of where the return address is stored.
+ ///
+ int ReturnAddrSaveIndex;
+
+ /// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current
+ /// function. This is only valid after the initial scan of the function by
+ /// PEI.
+ bool MustSaveLR;
+
+ /// SpillsCR - Indicates whether CR is spilled in the current function.
+ bool SpillsCR;
+
+ /// LRStoreRequired - The bool indicates whether there is some explicit use of
+ /// the LR/LR8 stack slot that is not obvious from scanning the code. This
+ /// requires that the code generator produce a store of LR to the stack on
+ /// entry, even though LR may otherwise apparently not be used.
+ bool LRStoreRequired;
+
+ /// MinReservedArea - This is the frame size that is at least reserved in a
+ /// potential caller (parameter+linkage area).
+ unsigned MinReservedArea;
+
+ /// TailCallSPDelta - Stack pointer delta used when tail calling. Maximum
+ /// amount the stack pointer is adjusted to make the frame bigger for tail
+ /// calls. Used for creating an area before the register spill area.
+ int TailCallSPDelta;
+
+ /// HasFastCall - Does this function contain a fast call. Used to determine
+ /// how the caller's stack pointer should be calculated (epilog/dynamicalloc).
+ bool HasFastCall;
+
+public:
+ PPCFunctionInfo(MachineFunction &MF)
+ : FramePointerSaveIndex(0),
+ ReturnAddrSaveIndex(0),
+ SpillsCR(false),
+ LRStoreRequired(false),
+ MinReservedArea(0),
+ TailCallSPDelta(0),
+ HasFastCall(false) {}
+
+ int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
+ void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
+
+ int getReturnAddrSaveIndex() const { return ReturnAddrSaveIndex; }
+ void setReturnAddrSaveIndex(int idx) { ReturnAddrSaveIndex = idx; }
+
+ unsigned getMinReservedArea() const { return MinReservedArea; }
+ void setMinReservedArea(unsigned size) { MinReservedArea = size; }
+
+ int getTailCallSPDelta() const { return TailCallSPDelta; }
+ void setTailCallSPDelta(int size) { TailCallSPDelta = size; }
+
+ /// MustSaveLR - This is set when the prolog/epilog inserter does its initial
+ /// scan of the function. It is true if the LR/LR8 register is ever explicitly
+ /// defined/clobbered in the machine function (e.g. by calls and movpctolr,
+ /// which is used in PIC generation), or if the LR stack slot is explicitly
+ /// referenced by builtin_return_address.
+ void setMustSaveLR(bool U) { MustSaveLR = U; }
+ bool mustSaveLR() const { return MustSaveLR; }
+
+ void setSpillsCR() { SpillsCR = true; }
+ bool isCRSpilled() const { return SpillsCR; }
+
+ void setLRStoreRequired() { LRStoreRequired = true; }
+ bool isLRStoreRequired() const { return LRStoreRequired; }
+
+ void setHasFastCall() { HasFastCall = true; }
+ bool hasFastCall() const { return HasFastCall;}
+};
+
+} // end of namespace llvm
+
+
+#endif
diff --git a/lib/Target/PowerPC/PPCPerfectShuffle.h b/lib/Target/PowerPC/PPCPerfectShuffle.h
new file mode 100644
index 0000000..3164e33
--- /dev/null
+++ b/lib/Target/PowerPC/PPCPerfectShuffle.h
@@ -0,0 +1,6586 @@
+//===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file, which was autogenerated by llvm-PerfectShuffle, contains data
+// for the optimal way to build a perfect shuffle without using vperm.
+//
+//===----------------------------------------------------------------------===//
+
+// 31 entries have cost 0
+// 292 entries have cost 1
+// 1384 entries have cost 2
+// 3061 entries have cost 3
+// 1733 entries have cost 4
+// 60 entries have cost 5
+
+// This table is 6561*4 = 26244 bytes in size.
+static const unsigned PerfectShuffleTable[6561+1] = {
+ 202162278U, // <0,0,0,0>: Cost 1 vspltisw0 LHS
+ 1140850790U, // <0,0,0,1>: Cost 2 vmrghw <0,0,0,0>, LHS
+ 2617247181U, // <0,0,0,2>: Cost 3 vsldoi4 <0,0,0,0>, <2,0,3,0>
+ 2635163787U, // <0,0,0,3>: Cost 3 vsldoi4 <3,0,0,0>, <3,0,0,0>
+ 1543507254U, // <0,0,0,4>: Cost 2 vsldoi4 <0,0,0,0>, RHS
+ 2281701705U, // <0,0,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,0,5>
+ 2617250133U, // <0,0,0,6>: Cost 3 vsldoi4 <0,0,0,0>, <6,0,7,0>
+ 2659054575U, // <0,0,0,7>: Cost 3 vsldoi4 <7,0,0,0>, <7,0,0,0>
+ 202162278U, // <0,0,0,u>: Cost 1 vspltisw0 LHS
+ 1141686282U, // <0,0,1,0>: Cost 2 vmrghw LHS, <0,0,1,1>
+ 67944550U, // <0,0,1,1>: Cost 1 vmrghw LHS, LHS
+ 1685241958U, // <0,0,1,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS
+ 2215870716U, // <0,0,1,3>: Cost 3 vmrghw LHS, <0,3,1,0>
+ 1141727570U, // <0,0,1,4>: Cost 2 vmrghw LHS, <0,4,1,5>
+ 2215428562U, // <0,0,1,5>: Cost 3 vmrghw LHS, <0,5,6,7>
+ 2215428589U, // <0,0,1,6>: Cost 3 vmrghw LHS, <0,6,0,7>
+ 2659062768U, // <0,0,1,7>: Cost 3 vsldoi4 <7,0,0,1>, <7,0,0,1>
+ 67945117U, // <0,0,1,u>: Cost 1 vmrghw LHS, LHS
+ 2684356045U, // <0,0,2,0>: Cost 3 vsldoi8 <0,0,0,0>, <2,0,3,0>
+ 2216009830U, // <0,0,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2216009901U, // <0,0,2,2>: Cost 3 vmrghw <0,2,1,2>, <0,2,1,2>
+ 2698290853U, // <0,0,2,3>: Cost 3 vsldoi8 <2,3,0,0>, <2,3,0,0>
+ 3289751890U, // <0,0,2,4>: Cost 4 vmrghw <0,2,1,2>, <0,4,1,5>
+ 3758098275U, // <0,0,2,5>: Cost 4 vsldoi8 <0,0,0,0>, <2,5,3,1>
+ 2684356538U, // <0,0,2,6>: Cost 3 vsldoi8 <0,0,0,0>, <2,6,3,7>
+ 3758098410U, // <0,0,2,7>: Cost 4 vsldoi8 <0,0,0,0>, <2,7,0,1>
+ 2216010397U, // <0,0,2,u>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2702272651U, // <0,0,3,0>: Cost 3 vsldoi8 <3,0,0,0>, <3,0,0,0>
+ 2216656998U, // <0,0,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 3844669704U, // <0,0,3,2>: Cost 4 vsldoi12 <3,2,3,0>, <0,3,2,3>
+ 2216657148U, // <0,0,3,3>: Cost 3 vmrghw <0,3,1,0>, <0,3,1,0>
+ 2684357122U, // <0,0,3,4>: Cost 3 vsldoi8 <0,0,0,0>, <3,4,5,6>
+ 3732820066U, // <0,0,3,5>: Cost 4 vsldoi4 <7,0,0,3>, <5,6,7,0>
+ 3778005624U, // <0,0,3,6>: Cost 4 vsldoi8 <3,3,0,0>, <3,6,0,7>
+ 3374713464U, // <0,0,3,7>: Cost 4 vmrglw <3,2,0,3>, <3,6,0,7>
+ 2216657565U, // <0,0,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2217361408U, // <0,0,4,0>: Cost 3 vmrghw <0,4,1,5>, <0,0,0,0>
+ 1143619686U, // <0,0,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 3291103405U, // <0,0,4,2>: Cost 4 vmrghw <0,4,1,5>, <0,2,1,2>
+ 3827269988U, // <0,0,4,3>: Cost 4 vsldoi12 <0,3,1,0>, <0,4,3,5>
+ 1143619922U, // <0,0,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1610616118U, // <0,0,4,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS
+ 3758099833U, // <0,0,4,6>: Cost 4 vsldoi8 <0,0,0,0>, <4,6,5,2>
+ 3854107016U, // <0,0,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <0,4,7,5>
+ 1143620253U, // <0,0,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2284396544U, // <0,0,5,0>: Cost 3 vmrglw <0,4,0,5>, <0,0,0,0>
+ 2218025062U, // <0,0,5,1>: Cost 3 vmrghw <0,5,1,5>, LHS
+ 3758100203U, // <0,0,5,2>: Cost 4 vsldoi8 <0,0,0,0>, <5,2,1,3>
+ 3395966100U, // <0,0,5,3>: Cost 4 vmrglw <6,7,0,5>, <7,2,0,3>
+ 3804549052U, // <0,0,5,4>: Cost 4 vsldoi8 <7,7,0,0>, <5,4,6,5>
+ 2302314964U, // <0,0,5,5>: Cost 3 vmrglw <3,4,0,5>, <3,4,0,5>
+ 2785821138U, // <0,0,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7>
+ 3395966428U, // <0,0,5,7>: Cost 4 vmrglw <6,7,0,5>, <7,6,0,7>
+ 2787148260U, // <0,0,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <0,5,u,7>
+ 2684358997U, // <0,0,6,0>: Cost 3 vsldoi8 <0,0,0,0>, <6,0,7,0>
+ 2218631270U, // <0,0,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS
+ 2684359162U, // <0,0,6,2>: Cost 3 vsldoi8 <0,0,0,0>, <6,2,7,3>
+ 3758101042U, // <0,0,6,3>: Cost 4 vsldoi8 <0,0,0,0>, <6,3,4,5>
+ 3732843830U, // <0,0,6,4>: Cost 4 vsldoi4 <7,0,0,6>, RHS
+ 3758101227U, // <0,0,6,5>: Cost 4 vsldoi8 <0,0,0,0>, <6,5,7,1>
+ 2684359480U, // <0,0,6,6>: Cost 3 vsldoi8 <0,0,0,0>, <6,6,6,6>
+ 2724836173U, // <0,0,6,7>: Cost 3 vsldoi8 <6,7,0,0>, <6,7,0,0>
+ 2725499806U, // <0,0,6,u>: Cost 3 vsldoi8 <6,u,0,0>, <6,u,0,0>
+ 2726163439U, // <0,0,7,0>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0>
+ 2219311206U, // <0,0,7,1>: Cost 3 vmrghw <0,7,1,0>, LHS
+ 3868557900U, // <0,0,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <0,7,2,3>
+ 3377400112U, // <0,0,7,3>: Cost 4 vmrglw <3,6,0,7>, <3,2,0,3>
+ 2684360038U, // <0,0,7,4>: Cost 3 vsldoi8 <0,0,0,0>, <7,4,5,6>
+ 3732852834U, // <0,0,7,5>: Cost 4 vsldoi4 <7,0,0,7>, <5,6,7,0>
+ 3871507060U, // <0,0,7,6>: Cost 4 vsldoi12 <7,6,7,0>, <0,7,6,7>
+ 2303658616U, // <0,0,7,7>: Cost 3 vmrglw <3,6,0,7>, <3,6,0,7>
+ 2726163439U, // <0,0,7,u>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0>
+ 202162278U, // <0,0,u,0>: Cost 1 vspltisw0 LHS
+ 72589414U, // <0,0,u,1>: Cost 1 vmrghw LHS, LHS
+ 1685242525U, // <0,0,u,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS
+ 2220073212U, // <0,0,u,3>: Cost 3 vmrghw LHS, <0,3,1,0>
+ 1146331474U, // <0,0,u,4>: Cost 2 vmrghw LHS, <0,4,1,5>
+ 1610619034U, // <0,0,u,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS
+ 2785821138U, // <0,0,u,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7>
+ 2659120119U, // <0,0,u,7>: Cost 3 vsldoi4 <7,0,0,u>, <7,0,0,u>
+ 72589981U, // <0,0,u,u>: Cost 1 vmrghw LHS, LHS
+ 2698297344U, // <0,1,0,0>: Cost 3 vsldoi8 <2,3,0,1>, <0,0,0,0>
+ 1624555622U, // <0,1,0,1>: Cost 2 vsldoi8 <2,3,0,1>, LHS
+ 2758984428U, // <0,1,0,2>: Cost 3 vsldoi12 <1,2,3,0>, <1,0,2,1>
+ 2635237524U, // <0,1,0,3>: Cost 3 vsldoi4 <3,0,1,0>, <3,0,1,0>
+ 2693652818U, // <0,1,0,4>: Cost 3 vsldoi8 <1,5,0,1>, <0,4,1,5>
+ 2281701714U, // <0,1,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,1,5>
+ 2698297846U, // <0,1,0,6>: Cost 3 vsldoi8 <2,3,0,1>, <0,6,1,7>
+ 2659128312U, // <0,1,0,7>: Cost 3 vsldoi4 <7,0,1,0>, <7,0,1,0>
+ 1624556189U, // <0,1,0,u>: Cost 2 vsldoi8 <2,3,0,1>, LHS
+ 1543585802U, // <0,1,1,0>: Cost 2 vsldoi4 <0,0,1,1>, <0,0,1,1>
+ 1141728052U, // <0,1,1,1>: Cost 2 vmrghw LHS, <1,1,1,1>
+ 1141728150U, // <0,1,1,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 2295644334U, // <0,1,1,3>: Cost 3 vmrglw <2,3,0,1>, <0,2,1,3>
+ 1543589174U, // <0,1,1,4>: Cost 2 vsldoi4 <0,0,1,1>, RHS
+ 2290999634U, // <0,1,1,5>: Cost 3 vmrglw <1,5,0,1>, <0,4,1,5>
+ 2617332135U, // <0,1,1,6>: Cost 3 vsldoi4 <0,0,1,1>, <6,1,7,1>
+ 2617332720U, // <0,1,1,7>: Cost 3 vsldoi4 <0,0,1,1>, <7,0,0,1>
+ 1142171004U, // <0,1,1,u>: Cost 2 vmrghw LHS, <1,u,3,0>
+ 1561509990U, // <0,1,2,0>: Cost 2 vsldoi4 <3,0,1,2>, LHS
+ 2623308516U, // <0,1,2,1>: Cost 3 vsldoi4 <1,0,1,2>, <1,0,1,2>
+ 2698298984U, // <0,1,2,2>: Cost 3 vsldoi8 <2,3,0,1>, <2,2,2,2>
+ 835584U, // <0,1,2,3>: Cost 0 copy LHS
+ 1561513270U, // <0,1,2,4>: Cost 2 vsldoi4 <3,0,1,2>, RHS
+ 2647199304U, // <0,1,2,5>: Cost 3 vsldoi4 <5,0,1,2>, <5,0,1,2>
+ 2698299322U, // <0,1,2,6>: Cost 3 vsldoi8 <2,3,0,1>, <2,6,3,7>
+ 1585402874U, // <0,1,2,7>: Cost 2 vsldoi4 <7,0,1,2>, <7,0,1,2>
+ 835584U, // <0,1,2,u>: Cost 0 copy LHS
+ 2698299540U, // <0,1,3,0>: Cost 3 vsldoi8 <2,3,0,1>, <3,0,1,0>
+ 3290399540U, // <0,1,3,1>: Cost 4 vmrghw <0,3,1,0>, <1,1,1,1>
+ 2698299720U, // <0,1,3,2>: Cost 3 vsldoi8 <2,3,0,1>, <3,2,3,0>
+ 2698299804U, // <0,1,3,3>: Cost 3 vsldoi8 <2,3,0,1>, <3,3,3,3>
+ 2698299906U, // <0,1,3,4>: Cost 3 vsldoi8 <2,3,0,1>, <3,4,5,6>
+ 3832726521U, // <0,1,3,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,3,5,0>
+ 2724842160U, // <0,1,3,6>: Cost 3 vsldoi8 <6,7,0,1>, <3,6,7,0>
+ 2706926275U, // <0,1,3,7>: Cost 3 vsldoi8 <3,7,0,1>, <3,7,0,1>
+ 2698300190U, // <0,1,3,u>: Cost 3 vsldoi8 <2,3,0,1>, <3,u,1,2>
+ 2635268198U, // <0,1,4,0>: Cost 3 vsldoi4 <3,0,1,4>, LHS
+ 2217362228U, // <0,1,4,1>: Cost 3 vmrghw <0,4,1,5>, <1,1,1,1>
+ 2217362326U, // <0,1,4,2>: Cost 3 vmrghw <0,4,1,5>, <1,2,3,0>
+ 2635270296U, // <0,1,4,3>: Cost 3 vsldoi4 <3,0,1,4>, <3,0,1,4>
+ 2635271478U, // <0,1,4,4>: Cost 3 vsldoi4 <3,0,1,4>, RHS
+ 1624558902U, // <0,1,4,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 2659160910U, // <0,1,4,6>: Cost 3 vsldoi4 <7,0,1,4>, <6,7,0,1>
+ 2659161084U, // <0,1,4,7>: Cost 3 vsldoi4 <7,0,1,4>, <7,0,1,4>
+ 1624559145U, // <0,1,4,u>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 3832726639U, // <0,1,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,0,1>
+ 2714889871U, // <0,1,5,1>: Cost 3 vsldoi8 <5,1,0,1>, <5,1,0,1>
+ 2302314646U, // <0,1,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2>
+ 3834717321U, // <0,1,5,3>: Cost 4 vsldoi12 <1,5,3,0>, <1,5,3,0>
+ 3832726679U, // <0,1,5,4>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,4,5>
+ 2717544403U, // <0,1,5,5>: Cost 3 vsldoi8 <5,5,0,1>, <5,5,0,1>
+ 2718208036U, // <0,1,5,6>: Cost 3 vsldoi8 <5,6,0,1>, <5,6,0,1>
+ 3792613493U, // <0,1,5,7>: Cost 4 vsldoi8 <5,7,0,1>, <5,7,0,1>
+ 2719535302U, // <0,1,5,u>: Cost 3 vsldoi8 <5,u,0,1>, <5,u,0,1>
+ 2659172454U, // <0,1,6,0>: Cost 3 vsldoi4 <7,0,1,6>, LHS
+ 3832726735U, // <0,1,6,1>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,1,7>
+ 2724844026U, // <0,1,6,2>: Cost 3 vsldoi8 <6,7,0,1>, <6,2,7,3>
+ 3775361608U, // <0,1,6,3>: Cost 4 vsldoi8 <2,u,0,1>, <6,3,7,0>
+ 2659175734U, // <0,1,6,4>: Cost 3 vsldoi4 <7,0,1,6>, RHS
+ 3832726771U, // <0,1,6,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,5,7>
+ 2724844344U, // <0,1,6,6>: Cost 3 vsldoi8 <6,7,0,1>, <6,6,6,6>
+ 1651102542U, // <0,1,6,7>: Cost 2 vsldoi8 <6,7,0,1>, <6,7,0,1>
+ 1651766175U, // <0,1,6,u>: Cost 2 vsldoi8 <6,u,0,1>, <6,u,0,1>
+ 2724844536U, // <0,1,7,0>: Cost 3 vsldoi8 <6,7,0,1>, <7,0,1,0>
+ 3377397770U, // <0,1,7,1>: Cost 4 vmrglw <3,6,0,7>, <0,0,1,1>
+ 2698302636U, // <0,1,7,2>: Cost 3 vsldoi8 <2,3,0,1>, <7,2,3,0>
+ 2728162531U, // <0,1,7,3>: Cost 3 vsldoi8 <7,3,0,1>, <7,3,0,1>
+ 2724844902U, // <0,1,7,4>: Cost 3 vsldoi8 <6,7,0,1>, <7,4,5,6>
+ 3377398098U, // <0,1,7,5>: Cost 4 vmrglw <3,6,0,7>, <0,4,1,5>
+ 2724845076U, // <0,1,7,6>: Cost 3 vsldoi8 <6,7,0,1>, <7,6,7,0>
+ 2724845164U, // <0,1,7,7>: Cost 3 vsldoi8 <6,7,0,1>, <7,7,7,7>
+ 2724845186U, // <0,1,7,u>: Cost 3 vsldoi8 <6,7,0,1>, <7,u,1,2>
+ 1561559142U, // <0,1,u,0>: Cost 2 vsldoi4 <3,0,1,u>, LHS
+ 1146331956U, // <0,1,u,1>: Cost 2 vmrghw LHS, <1,1,1,1>
+ 1146332054U, // <0,1,u,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 835584U, // <0,1,u,3>: Cost 0 copy LHS
+ 1561562422U, // <0,1,u,4>: Cost 2 vsldoi4 <3,0,1,u>, RHS
+ 1624561818U, // <0,1,u,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 2220074191U, // <0,1,u,6>: Cost 3 vmrghw LHS, <1,6,1,7>
+ 1585452032U, // <0,1,u,7>: Cost 2 vsldoi4 <7,0,1,u>, <7,0,1,u>
+ 835584U, // <0,1,u,u>: Cost 0 copy LHS
+ 2214593997U, // <0,2,0,0>: Cost 3 vmrghw <0,0,0,0>, <2,0,3,0>
+ 2214675999U, // <0,2,0,1>: Cost 3 vmrghw <0,0,1,1>, <2,1,3,1>
+ 2214594152U, // <0,2,0,2>: Cost 3 vmrghw <0,0,0,0>, <2,2,2,2>
+ 1207959654U, // <0,2,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 3709054262U, // <0,2,0,4>: Cost 4 vsldoi4 <3,0,2,0>, RHS
+ 3375350836U, // <0,2,0,5>: Cost 4 vmrglw <3,3,0,0>, <1,4,2,5>
+ 2214594490U, // <0,2,0,6>: Cost 3 vmrghw <0,0,0,0>, <2,6,3,7>
+ 3288336362U, // <0,2,0,7>: Cost 4 vmrghw <0,0,0,0>, <2,7,0,1>
+ 1207959659U, // <0,2,0,u>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 2215871994U, // <0,2,1,0>: Cost 3 vmrghw LHS, <2,0,u,0>
+ 2215470623U, // <0,2,1,1>: Cost 3 vmrghw LHS, <2,1,3,1>
+ 1141728872U, // <0,2,1,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1141728934U, // <0,2,1,3>: Cost 2 vmrghw LHS, <2,3,0,1>
+ 2215872323U, // <0,2,1,4>: Cost 3 vmrghw LHS, <2,4,u,5>
+ 2215872405U, // <0,2,1,5>: Cost 3 vmrghw LHS, <2,5,u,6>
+ 1141729210U, // <0,2,1,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2215430122U, // <0,2,1,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1141729368U, // <0,2,1,u>: Cost 2 vmrghw LHS, <2,u,3,3>
+ 3289736698U, // <0,2,2,0>: Cost 4 vmrghw <0,2,1,0>, <2,0,u,0>
+ 3289744927U, // <0,2,2,1>: Cost 4 vmrghw <0,2,1,1>, <2,1,3,1>
+ 2216011368U, // <0,2,2,2>: Cost 3 vmrghw <0,2,1,2>, <2,2,2,2>
+ 2216019622U, // <0,2,2,3>: Cost 3 vmrghw <0,2,1,3>, <2,3,0,1>
+ 3289769795U, // <0,2,2,4>: Cost 4 vmrghw <0,2,1,4>, <2,4,u,5>
+ 3289778069U, // <0,2,2,5>: Cost 4 vmrghw <0,2,1,5>, <2,5,u,6>
+ 2216044474U, // <0,2,2,6>: Cost 3 vmrghw <0,2,1,6>, <2,6,3,7>
+ 3732960259U, // <0,2,2,7>: Cost 4 vsldoi4 <7,0,2,2>, <7,0,2,2>
+ 2216061016U, // <0,2,2,u>: Cost 3 vmrghw <0,2,1,u>, <2,u,3,3>
+ 2758985382U, // <0,2,3,0>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,0,1>
+ 2758985392U, // <0,2,3,1>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,1,2>
+ 3290400360U, // <0,2,3,2>: Cost 4 vmrghw <0,3,1,0>, <2,2,2,2>
+ 2758985408U, // <0,2,3,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,3,0>
+ 2758985422U, // <0,2,3,4>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,4,5>
+ 2785822424U, // <0,2,3,5>: Cost 3 vsldoi12 <5,6,7,0>, <2,3,5,6>
+ 3290400698U, // <0,2,3,6>: Cost 4 vmrghw <0,3,1,0>, <2,6,3,7>
+ 2765915876U, // <0,2,3,7>: Cost 3 vsldoi12 <2,3,7,0>, <2,3,7,0>
+ 2758985453U, // <0,2,3,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,u,0>
+ 3291104762U, // <0,2,4,0>: Cost 4 vmrghw <0,4,1,5>, <2,0,u,0>
+ 2217362979U, // <0,2,4,1>: Cost 3 vmrghw <0,4,1,5>, <2,1,3,5>
+ 2217363048U, // <0,2,4,2>: Cost 3 vmrghw <0,4,1,5>, <2,2,2,2>
+ 2217363110U, // <0,2,4,3>: Cost 3 vmrghw <0,4,1,5>, <2,3,0,1>
+ 3291105087U, // <0,2,4,4>: Cost 4 vmrghw <0,4,1,5>, <2,4,u,1>
+ 3291105173U, // <0,2,4,5>: Cost 4 vmrghw <0,4,1,5>, <2,5,u,6>
+ 2217363386U, // <0,2,4,6>: Cost 3 vmrghw <0,4,1,5>, <2,6,3,7>
+ 3788639688U, // <0,2,4,7>: Cost 4 vsldoi8 <5,1,0,2>, <4,7,5,0>
+ 2217363515U, // <0,2,4,u>: Cost 3 vmrghw <0,4,1,5>, <2,u,0,1>
+ 3376054371U, // <0,2,5,0>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,0>
+ 3788639888U, // <0,2,5,1>: Cost 4 vsldoi8 <5,1,0,2>, <5,1,0,2>
+ 3376055912U, // <0,2,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,2,2,2>
+ 2302312550U, // <0,2,5,3>: Cost 3 vmrglw <3,4,0,5>, LHS
+ 3376054375U, // <0,2,5,4>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,4>
+ 3374728244U, // <0,2,5,5>: Cost 4 vmrglw <3,2,0,5>, <1,4,2,5>
+ 3805229154U, // <0,2,5,6>: Cost 4 vsldoi8 <7,u,0,2>, <5,6,7,0>
+ 3376055512U, // <0,2,5,7>: Cost 4 vmrglw <3,4,0,5>, <1,6,2,7>
+ 2302312555U, // <0,2,5,u>: Cost 3 vmrglw <3,4,0,5>, LHS
+ 3709100134U, // <0,2,6,0>: Cost 4 vsldoi4 <3,0,2,6>, LHS
+ 3709100950U, // <0,2,6,1>: Cost 4 vsldoi4 <3,0,2,6>, <1,2,3,0>
+ 3709102010U, // <0,2,6,2>: Cost 4 vsldoi4 <3,0,2,6>, <2,6,3,7>
+ 2758985658U, // <0,2,6,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,3,7>
+ 3709103414U, // <0,2,6,4>: Cost 4 vsldoi4 <3,0,2,6>, RHS
+ 3732992098U, // <0,2,6,5>: Cost 4 vsldoi4 <7,0,2,6>, <5,6,7,0>
+ 3292374970U, // <0,2,6,6>: Cost 4 vmrghw <0,6,0,7>, <2,6,3,7>
+ 3798594383U, // <0,2,6,7>: Cost 4 vsldoi8 <6,7,0,2>, <6,7,0,2>
+ 2758985703U, // <0,2,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,u,7>
+ 3788641274U, // <0,2,7,0>: Cost 4 vsldoi8 <5,1,0,2>, <7,0,1,2>
+ 3377398508U, // <0,2,7,1>: Cost 4 vmrglw <3,6,0,7>, <1,0,2,1>
+ 3377398590U, // <0,2,7,2>: Cost 4 vmrglw <3,6,0,7>, <1,1,2,2>
+ 2303656038U, // <0,2,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 3709111606U, // <0,2,7,4>: Cost 4 vsldoi4 <3,0,2,7>, RHS
+ 3377398836U, // <0,2,7,5>: Cost 4 vmrglw <3,6,0,7>, <1,4,2,5>
+ 3803903447U, // <0,2,7,6>: Cost 4 vsldoi8 <7,6,0,2>, <7,6,0,2>
+ 3293054954U, // <0,2,7,7>: Cost 4 vmrghw <0,7,1,0>, <2,7,0,1>
+ 2303656043U, // <0,2,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 2220074490U, // <0,2,u,0>: Cost 3 vmrghw LHS, <2,0,u,0>
+ 2220074527U, // <0,2,u,1>: Cost 3 vmrghw LHS, <2,1,3,1>
+ 1146332776U, // <0,2,u,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1146332838U, // <0,2,u,3>: Cost 2 vmrghw LHS, <2,3,0,1>
+ 2220074819U, // <0,2,u,4>: Cost 3 vmrghw LHS, <2,4,u,5>
+ 2220074901U, // <0,2,u,5>: Cost 3 vmrghw LHS, <2,5,u,6>
+ 1146333114U, // <0,2,u,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2220074986U, // <0,2,u,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1146333243U, // <0,2,u,u>: Cost 2 vmrghw LHS, <2,u,0,1>
+ 2629410816U, // <0,3,0,0>: Cost 3 vsldoi4 <2,0,3,0>, <0,0,0,0>
+ 2753530006U, // <0,3,0,1>: Cost 3 vsldoi12 <0,3,1,0>, <3,0,1,2>
+ 2629412301U, // <0,3,0,2>: Cost 3 vsldoi4 <2,0,3,0>, <2,0,3,0>
+ 2214594972U, // <0,3,0,3>: Cost 3 vmrghw <0,0,0,0>, <3,3,3,3>
+ 2758985908U, // <0,3,0,4>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,4,5>
+ 3733016674U, // <0,3,0,5>: Cost 4 vsldoi4 <7,0,3,0>, <5,6,7,0>
+ 3777364488U, // <0,3,0,6>: Cost 4 vsldoi8 <3,2,0,3>, <0,6,3,7>
+ 2281703354U, // <0,3,0,7>: Cost 3 vmrglw <0,0,0,0>, <2,6,3,7>
+ 2758985941U, // <0,3,0,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,u,2>
+ 1141729430U, // <0,3,1,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 2215471334U, // <0,3,1,1>: Cost 3 vmrghw LHS, <3,1,1,1>
+ 2215471425U, // <0,3,1,2>: Cost 3 vmrghw LHS, <3,2,2,2>
+ 1141729692U, // <0,3,1,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1141729794U, // <0,3,1,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2215430738U, // <0,3,1,5>: Cost 3 vmrghw LHS, <3,5,5,5>
+ 2215430776U, // <0,3,1,6>: Cost 3 vmrghw LHS, <3,6,0,7>
+ 2295646138U, // <0,3,1,7>: Cost 3 vmrglw <2,3,0,1>, <2,6,3,7>
+ 1141730078U, // <0,3,1,u>: Cost 2 vmrghw LHS, <3,u,1,2>
+ 2758986032U, // <0,3,2,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,2,0,3>
+ 3709141910U, // <0,3,2,1>: Cost 4 vsldoi4 <3,0,3,2>, <1,2,3,0>
+ 3289753921U, // <0,3,2,2>: Cost 4 vmrghw <0,2,1,2>, <3,2,2,2>
+ 2770929992U, // <0,3,2,3>: Cost 3 vsldoi12 <3,2,3,0>, <3,2,3,0>
+ 3289754114U, // <0,3,2,4>: Cost 4 vmrghw <0,2,1,2>, <3,4,5,6>
+ 3362095460U, // <0,3,2,5>: Cost 5 vmrglw <1,1,0,2>, <0,4,3,5>
+ 3832727910U, // <0,3,2,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,2,6,3>
+ 3365414842U, // <0,3,2,7>: Cost 4 vmrglw <1,6,0,2>, <2,6,3,7>
+ 2771298677U, // <0,3,2,u>: Cost 3 vsldoi12 <3,2,u,0>, <3,2,u,0>
+ 2216659094U, // <0,3,3,0>: Cost 3 vmrghw <0,3,1,0>, <3,0,1,2>
+ 3290409190U, // <0,3,3,1>: Cost 4 vmrghw <0,3,1,1>, <3,1,1,1>
+ 2703624496U, // <0,3,3,2>: Cost 3 vsldoi8 <3,2,0,3>, <3,2,0,3>
+ 2216683932U, // <0,3,3,3>: Cost 3 vmrghw <0,3,1,3>, <3,3,3,3>
+ 2216692226U, // <0,3,3,4>: Cost 3 vmrghw <0,3,1,4>, <3,4,5,6>
+ 3733041250U, // <0,3,3,5>: Cost 4 vsldoi4 <7,0,3,3>, <5,6,7,0>
+ 3832727988U, // <0,3,3,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,3,6,0>
+ 3374712762U, // <0,3,3,7>: Cost 4 vmrglw <3,2,0,3>, <2,6,3,7>
+ 2216725278U, // <0,3,3,u>: Cost 3 vmrghw <0,3,1,u>, <3,u,1,2>
+ 2217363606U, // <0,3,4,0>: Cost 3 vmrghw <0,4,1,5>, <3,0,1,2>
+ 3291105510U, // <0,3,4,1>: Cost 4 vmrghw <0,4,1,5>, <3,1,1,1>
+ 3291105601U, // <0,3,4,2>: Cost 4 vmrghw <0,4,1,5>, <3,2,2,2>
+ 2217363868U, // <0,3,4,3>: Cost 3 vmrghw <0,4,1,5>, <3,3,3,3>
+ 2217363970U, // <0,3,4,4>: Cost 3 vmrghw <0,4,1,5>, <3,4,5,6>
+ 2758986242U, // <0,3,4,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,4,5,6>
+ 3727077685U, // <0,3,4,6>: Cost 4 vsldoi4 <6,0,3,4>, <6,0,3,4>
+ 3364767674U, // <0,3,4,7>: Cost 4 vmrglw <1,5,0,4>, <2,6,3,7>
+ 2217364254U, // <0,3,4,u>: Cost 3 vmrghw <0,4,1,5>, <3,u,1,2>
+ 3832728102U, // <0,3,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <3,5,0,6>
+ 3405916003U, // <0,3,5,1>: Cost 4 vmrglw <u,4,0,5>, <2,5,3,1>
+ 3376055840U, // <0,3,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,1,3,2>
+ 3376055679U, // <0,3,5,3>: Cost 4 vmrglw <3,4,0,5>, <1,u,3,3>
+ 3376055194U, // <0,3,5,4>: Cost 4 vmrglw <3,4,0,5>, <1,2,3,4>
+ 3859565138U, // <0,3,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <3,5,5,5>
+ 2727514210U, // <0,3,5,6>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0>
+ 3376056250U, // <0,3,5,7>: Cost 4 vmrglw <3,4,0,5>, <2,6,3,7>
+ 2727514210U, // <0,3,5,u>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0>
+ 2758986360U, // <0,3,6,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7>
+ 3709174678U, // <0,3,6,1>: Cost 4 vsldoi4 <3,0,3,6>, <1,2,3,0>
+ 3795284411U, // <0,3,6,2>: Cost 4 vsldoi8 <6,2,0,3>, <6,2,0,3>
+ 3709175980U, // <0,3,6,3>: Cost 4 vsldoi4 <3,0,3,6>, <3,0,3,6>
+ 3833096860U, // <0,3,6,4>: Cost 4 vsldoi12 <1,2,u,0>, <3,6,4,7>
+ 3376728235U, // <0,3,6,5>: Cost 5 vmrglw <3,5,0,6>, <3,0,3,5>
+ 3859565229U, // <0,3,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <3,6,6,6>
+ 2773879472U, // <0,3,6,7>: Cost 3 vsldoi12 <3,6,7,0>, <3,6,7,0>
+ 2758986360U, // <0,3,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7>
+ 2303656854U, // <0,3,7,0>: Cost 3 vmrglw <3,6,0,7>, <1,2,3,0>
+ 3807229018U, // <0,3,7,1>: Cost 4 vsldoi8 <u,2,0,3>, <7,1,2,u>
+ 2727515284U, // <0,3,7,2>: Cost 3 vsldoi8 <7,2,0,3>, <7,2,0,3>
+ 3377399410U, // <0,3,7,3>: Cost 4 vmrglw <3,6,0,7>, <2,2,3,3>
+ 3377398682U, // <0,3,7,4>: Cost 4 vmrglw <3,6,0,7>, <1,2,3,4>
+ 3801257409U, // <0,3,7,5>: Cost 4 vsldoi8 <7,2,0,3>, <7,5,6,7>
+ 3377399980U, // <0,3,7,6>: Cost 4 vmrglw <3,6,0,7>, <3,0,3,6>
+ 3375409082U, // <0,3,7,7>: Cost 4 vmrglw <3,3,0,7>, <2,6,3,7>
+ 2731497082U, // <0,3,7,u>: Cost 3 vsldoi8 <7,u,0,3>, <7,u,0,3>
+ 1146333334U, // <0,3,u,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 2220075238U, // <0,3,u,1>: Cost 3 vmrghw LHS, <3,1,1,1>
+ 2220075329U, // <0,3,u,2>: Cost 3 vmrghw LHS, <3,2,2,2>
+ 1146333596U, // <0,3,u,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1146333698U, // <0,3,u,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2758986566U, // <0,3,u,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,u,5,6>
+ 2803739472U, // <0,3,u,6>: Cost 3 vsldoi12 <u,6,7,0>, <3,u,6,7>
+ 2295703482U, // <0,3,u,7>: Cost 3 vmrglw <2,3,0,u>, <2,6,3,7>
+ 1146333982U, // <0,3,u,u>: Cost 2 vmrghw LHS, <3,u,1,2>
+ 2214595473U, // <0,4,0,0>: Cost 3 vmrghw <0,0,0,0>, <4,0,5,0>
+ 2693677158U, // <0,4,0,1>: Cost 3 vsldoi8 <1,5,0,4>, LHS
+ 3839437689U, // <0,4,0,2>: Cost 4 vsldoi12 <2,3,4,0>, <4,0,2,3>
+ 3709200559U, // <0,4,0,3>: Cost 4 vsldoi4 <3,0,4,0>, <3,0,4,0>
+ 2693677394U, // <0,4,0,4>: Cost 3 vsldoi8 <1,5,0,4>, <0,4,1,5>
+ 1140854070U, // <0,4,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 3767419409U, // <0,4,0,6>: Cost 4 vsldoi8 <1,5,0,4>, <0,6,4,7>
+ 3854109604U, // <0,4,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,0,7,1>
+ 1140854313U, // <0,4,0,u>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 1141689234U, // <0,4,1,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2215431114U, // <0,4,1,1>: Cost 3 vmrghw LHS, <4,1,2,3>
+ 2215431221U, // <0,4,1,2>: Cost 3 vmrghw LHS, <4,2,5,2>
+ 2635466928U, // <0,4,1,3>: Cost 3 vsldoi4 <3,0,4,1>, <3,0,4,1>
+ 1141689552U, // <0,4,1,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 67947830U, // <0,4,1,5>: Cost 1 vmrghw LHS, RHS
+ 2215431545U, // <0,4,1,6>: Cost 3 vmrghw LHS, <4,6,5,2>
+ 2659357716U, // <0,4,1,7>: Cost 3 vsldoi4 <7,0,4,1>, <7,0,4,1>
+ 67948073U, // <0,4,1,u>: Cost 1 vmrghw LHS, RHS
+ 3767420369U, // <0,4,2,0>: Cost 4 vsldoi8 <1,5,0,4>, <2,0,3,4>
+ 3767420451U, // <0,4,2,1>: Cost 4 vsldoi8 <1,5,0,4>, <2,1,3,5>
+ 3767420520U, // <0,4,2,2>: Cost 4 vsldoi8 <1,5,0,4>, <2,2,2,2>
+ 2698323625U, // <0,4,2,3>: Cost 3 vsldoi8 <2,3,0,4>, <2,3,0,4>
+ 3709218102U, // <0,4,2,4>: Cost 4 vsldoi4 <3,0,4,2>, RHS
+ 2216013110U, // <0,4,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 3767420858U, // <0,4,2,6>: Cost 4 vsldoi8 <1,5,0,4>, <2,6,3,7>
+ 3774719981U, // <0,4,2,7>: Cost 4 vsldoi8 <2,7,0,4>, <2,7,0,4>
+ 2216013353U, // <0,4,2,u>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 3767421078U, // <0,4,3,0>: Cost 4 vsldoi8 <1,5,0,4>, <3,0,1,2>
+ 3776710880U, // <0,4,3,1>: Cost 4 vsldoi8 <3,1,0,4>, <3,1,0,4>
+ 3833097325U, // <0,4,3,2>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,2,4>
+ 3767421340U, // <0,4,3,3>: Cost 4 vsldoi8 <1,5,0,4>, <3,3,3,3>
+ 3767421442U, // <0,4,3,4>: Cost 4 vsldoi8 <1,5,0,4>, <3,4,5,6>
+ 2216660278U, // <0,4,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 3833097361U, // <0,4,3,6>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,6,4>
+ 3780692678U, // <0,4,3,7>: Cost 4 vsldoi8 <3,7,0,4>, <3,7,0,4>
+ 2216660521U, // <0,4,3,u>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 2617573416U, // <0,4,4,0>: Cost 3 vsldoi4 <0,0,4,4>, <0,0,4,4>
+ 2217364450U, // <0,4,4,1>: Cost 3 vmrghw <0,4,1,5>, <4,1,5,0>
+ 3691316771U, // <0,4,4,2>: Cost 4 vsldoi4 <0,0,4,4>, <2,1,3,5>
+ 3709233331U, // <0,4,4,3>: Cost 4 vsldoi4 <3,0,4,4>, <3,0,4,4>
+ 2785823952U, // <0,4,4,4>: Cost 3 vsldoi12 <5,6,7,0>, <4,4,4,4>
+ 1143622966U, // <0,4,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 3691319723U, // <0,4,4,6>: Cost 4 vsldoi4 <0,0,4,4>, <6,1,7,5>
+ 3854109932U, // <0,4,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,4,7,5>
+ 1143623209U, // <0,4,4,u>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 2635497574U, // <0,4,5,0>: Cost 3 vsldoi4 <3,0,4,5>, LHS
+ 2635498390U, // <0,4,5,1>: Cost 3 vsldoi4 <3,0,4,5>, <1,2,3,0>
+ 3709240936U, // <0,4,5,2>: Cost 4 vsldoi4 <3,0,4,5>, <2,2,2,2>
+ 2635499700U, // <0,4,5,3>: Cost 3 vsldoi4 <3,0,4,5>, <3,0,4,5>
+ 2635500854U, // <0,4,5,4>: Cost 3 vsldoi4 <3,0,4,5>, RHS
+ 2785824044U, // <0,4,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <4,5,5,6>
+ 1685245238U, // <0,4,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2659390488U, // <0,4,5,7>: Cost 3 vsldoi4 <7,0,4,5>, <7,0,4,5>
+ 1685245256U, // <0,4,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 3839438161U, // <0,4,6,0>: Cost 4 vsldoi12 <2,3,4,0>, <4,6,0,7>
+ 3798610347U, // <0,4,6,1>: Cost 4 vsldoi8 <6,7,0,4>, <6,1,7,5>
+ 3798610426U, // <0,4,6,2>: Cost 4 vsldoi8 <6,7,0,4>, <6,2,7,3>
+ 3795956237U, // <0,4,6,3>: Cost 4 vsldoi8 <6,3,0,4>, <6,3,0,4>
+ 3733138742U, // <0,4,6,4>: Cost 4 vsldoi4 <7,0,4,6>, RHS
+ 2218634550U, // <0,4,6,5>: Cost 3 vmrghw <0,6,0,7>, RHS
+ 3798610744U, // <0,4,6,6>: Cost 4 vsldoi8 <6,7,0,4>, <6,6,6,6>
+ 2724868945U, // <0,4,6,7>: Cost 3 vsldoi8 <6,7,0,4>, <6,7,0,4>
+ 2725532578U, // <0,4,6,u>: Cost 3 vsldoi8 <6,u,0,4>, <6,u,0,4>
+ 3383371465U, // <0,4,7,0>: Cost 4 vmrglw <4,6,0,7>, <2,3,4,0>
+ 3800601668U, // <0,4,7,1>: Cost 4 vsldoi8 <7,1,0,4>, <7,1,0,4>
+ 3775386826U, // <0,4,7,2>: Cost 5 vsldoi8 <2,u,0,4>, <7,2,6,3>
+ 3801928934U, // <0,4,7,3>: Cost 4 vsldoi8 <7,3,0,4>, <7,3,0,4>
+ 3721202998U, // <0,4,7,4>: Cost 4 vsldoi4 <5,0,4,7>, RHS
+ 2780368328U, // <0,4,7,5>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0>
+ 3383372686U, // <0,4,7,6>: Cost 5 vmrglw <4,6,0,7>, <4,0,4,6>
+ 3854110170U, // <0,4,7,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,7,7,0>
+ 2780368328U, // <0,4,7,u>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0>
+ 1146334098U, // <0,4,u,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2220076002U, // <0,4,u,1>: Cost 3 vmrghw LHS, <4,1,5,0>
+ 2220076085U, // <0,4,u,2>: Cost 3 vmrghw LHS, <4,2,5,2>
+ 2635524279U, // <0,4,u,3>: Cost 3 vsldoi4 <3,0,4,u>, <3,0,4,u>
+ 1146334416U, // <0,4,u,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 72592694U, // <0,4,u,5>: Cost 1 vmrghw LHS, RHS
+ 1685245481U, // <0,4,u,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2659415067U, // <0,4,u,7>: Cost 3 vsldoi4 <7,0,4,u>, <7,0,4,u>
+ 72592937U, // <0,4,u,u>: Cost 1 vmrghw LHS, RHS
+ 2281704337U, // <0,5,0,0>: Cost 3 vmrglw <0,0,0,0>, <4,0,5,0>
+ 2704965734U, // <0,5,0,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 3778707666U, // <0,5,0,2>: Cost 4 vsldoi8 <3,4,0,5>, <0,2,5,3>
+ 3778707708U, // <0,5,0,3>: Cost 4 vsldoi8 <3,4,0,5>, <0,3,1,0>
+ 2687050057U, // <0,5,0,4>: Cost 3 vsldoi8 <0,4,0,5>, <0,4,0,5>
+ 2214596612U, // <0,5,0,5>: Cost 3 vmrghw <0,0,0,0>, <5,5,5,5>
+ 2785824372U, // <0,5,0,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,0,6,1>
+ 3854110332U, // <0,5,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <5,0,7,0>
+ 2704966301U, // <0,5,0,u>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 1567768678U, // <0,5,1,0>: Cost 2 vsldoi4 <4,0,5,1>, LHS
+ 2312236570U, // <0,5,1,1>: Cost 3 vmrglw <5,1,0,1>, <4,u,5,1>
+ 2215431915U, // <0,5,1,2>: Cost 3 vmrghw LHS, <5,2,1,3>
+ 2641512598U, // <0,5,1,3>: Cost 3 vsldoi4 <4,0,5,1>, <3,0,1,2>
+ 1567771538U, // <0,5,1,4>: Cost 2 vsldoi4 <4,0,5,1>, <4,0,5,1>
+ 1141690372U, // <0,5,1,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1141690466U, // <0,5,1,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 2641515514U, // <0,5,1,7>: Cost 3 vsldoi4 <4,0,5,1>, <7,0,1,2>
+ 1141690615U, // <0,5,1,u>: Cost 2 vmrghw LHS, <5,u,5,5>
+ 3772736973U, // <0,5,2,0>: Cost 4 vsldoi8 <2,4,0,5>, <2,0,3,0>
+ 3778709024U, // <0,5,2,1>: Cost 4 vsldoi8 <3,4,0,5>, <2,1,3,2>
+ 3778709096U, // <0,5,2,2>: Cost 4 vsldoi8 <3,4,0,5>, <2,2,2,2>
+ 3778709158U, // <0,5,2,3>: Cost 4 vsldoi8 <3,4,0,5>, <2,3,0,1>
+ 3772737275U, // <0,5,2,4>: Cost 4 vsldoi8 <2,4,0,5>, <2,4,0,5>
+ 3859566351U, // <0,5,2,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,2,5,3>
+ 3778709434U, // <0,5,2,6>: Cost 4 vsldoi8 <3,4,0,5>, <2,6,3,7>
+ 3805251562U, // <0,5,2,7>: Cost 4 vsldoi8 <7,u,0,5>, <2,7,0,1>
+ 3775391807U, // <0,5,2,u>: Cost 4 vsldoi8 <2,u,0,5>, <2,u,0,5>
+ 2704967830U, // <0,5,3,0>: Cost 3 vsldoi8 <3,4,0,5>, <3,0,1,2>
+ 3776719073U, // <0,5,3,1>: Cost 4 vsldoi8 <3,1,0,5>, <3,1,0,5>
+ 3777382706U, // <0,5,3,2>: Cost 4 vsldoi8 <3,2,0,5>, <3,2,0,5>
+ 3778709887U, // <0,5,3,3>: Cost 4 vsldoi8 <3,4,0,5>, <3,3,0,1>
+ 2704968148U, // <0,5,3,4>: Cost 3 vsldoi8 <3,4,0,5>, <3,4,0,5>
+ 3857428317U, // <0,5,3,5>: Cost 4 vsldoi12 <5,3,5,0>, <5,3,5,0>
+ 3364096514U, // <0,5,3,6>: Cost 4 vmrglw <1,4,0,3>, <3,4,5,6>
+ 3780700871U, // <0,5,3,7>: Cost 4 vsldoi8 <3,7,0,5>, <3,7,0,5>
+ 2707622680U, // <0,5,3,u>: Cost 3 vsldoi8 <3,u,0,5>, <3,u,0,5>
+ 2728856466U, // <0,5,4,0>: Cost 3 vsldoi8 <7,4,0,5>, <4,0,5,1>
+ 3697361674U, // <0,5,4,1>: Cost 4 vsldoi4 <1,0,5,4>, <1,0,5,4>
+ 3697362601U, // <0,5,4,2>: Cost 4 vsldoi4 <1,0,5,4>, <2,3,0,4>
+ 3364766635U, // <0,5,4,3>: Cost 4 vmrglw <1,5,0,4>, <1,2,5,3>
+ 2217365428U, // <0,5,4,4>: Cost 3 vmrghw <0,4,1,5>, <5,4,5,6>
+ 2704969014U, // <0,5,4,5>: Cost 3 vsldoi8 <3,4,0,5>, RHS
+ 2785824700U, // <0,5,4,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,4,6,5>
+ 3364766963U, // <0,5,4,7>: Cost 4 vmrglw <1,5,0,4>, <1,6,5,7>
+ 2704969257U, // <0,5,4,u>: Cost 3 vsldoi8 <3,4,0,5>, RHS
+ 3846148050U, // <0,5,5,0>: Cost 4 vsldoi12 <3,4,5,0>, <5,5,0,0>
+ 2326203282U, // <0,5,5,1>: Cost 3 vmrglw <7,4,0,5>, <4,0,5,1>
+ 3291746027U, // <0,5,5,2>: Cost 4 vmrghw <0,5,1,2>, <5,2,1,3>
+ 3376054482U, // <0,5,5,3>: Cost 4 vmrglw <3,4,0,5>, <0,2,5,3>
+ 3790655366U, // <0,5,5,4>: Cost 4 vsldoi8 <5,4,0,5>, <5,4,0,5>
+ 2785824772U, // <0,5,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <5,5,5,5>
+ 2724876386U, // <0,5,5,6>: Cost 3 vsldoi8 <6,7,0,5>, <5,6,7,0>
+ 3858903057U, // <0,5,5,7>: Cost 4 vsldoi12 <5,5,7,0>, <5,5,7,0>
+ 2736820484U, // <0,5,5,u>: Cost 3 vsldoi8 <u,7,0,5>, <5,u,7,0>
+ 2659467366U, // <0,5,6,0>: Cost 3 vsldoi4 <7,0,5,6>, LHS
+ 3859566643U, // <0,5,6,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,6,1,7>
+ 3798618618U, // <0,5,6,2>: Cost 4 vsldoi8 <6,7,0,5>, <6,2,7,3>
+ 3852857410U, // <0,5,6,3>: Cost 4 vsldoi12 <4,5,6,0>, <5,6,3,4>
+ 2659470646U, // <0,5,6,4>: Cost 3 vsldoi4 <7,0,5,6>, RHS
+ 2659471458U, // <0,5,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0>
+ 3832729696U, // <0,5,6,6>: Cost 4 vsldoi12 <1,2,3,0>, <5,6,6,7>
+ 1712083042U, // <0,5,6,7>: Cost 2 vsldoi12 <5,6,7,0>, <5,6,7,0>
+ 1712156779U, // <0,5,6,u>: Cost 2 vsldoi12 <5,6,u,0>, <5,6,u,0>
+ 2731512826U, // <0,5,7,0>: Cost 3 vsldoi8 <7,u,0,5>, <7,0,1,2>
+ 3859566717U, // <0,5,7,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,1,0>
+ 3798619284U, // <0,5,7,2>: Cost 4 vsldoi8 <6,7,0,5>, <7,2,0,3>
+ 3778712803U, // <0,5,7,3>: Cost 4 vsldoi8 <3,4,0,5>, <7,3,0,1>
+ 2728858936U, // <0,5,7,4>: Cost 3 vsldoi8 <7,4,0,5>, <7,4,0,5>
+ 3859566753U, // <0,5,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,5,0>
+ 3377398135U, // <0,5,7,6>: Cost 4 vmrglw <3,6,0,7>, <0,4,5,6>
+ 3798619686U, // <0,5,7,7>: Cost 4 vsldoi8 <6,7,0,5>, <7,7,0,0>
+ 2731513468U, // <0,5,7,u>: Cost 3 vsldoi8 <7,u,0,5>, <7,u,0,5>
+ 1567826022U, // <0,5,u,0>: Cost 2 vsldoi4 <4,0,5,u>, LHS
+ 2704971566U, // <0,5,u,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 2220076779U, // <0,5,u,2>: Cost 3 vmrghw LHS, <5,2,1,3>
+ 2641569942U, // <0,5,u,3>: Cost 3 vsldoi4 <4,0,5,u>, <3,0,1,2>
+ 1567828889U, // <0,5,u,4>: Cost 2 vsldoi4 <4,0,5,u>, <4,0,5,u>
+ 1146335236U, // <0,5,u,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1146335330U, // <0,5,u,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 1713410308U, // <0,5,u,7>: Cost 2 vsldoi12 <5,u,7,0>, <5,u,7,0>
+ 1713484045U, // <0,5,u,u>: Cost 2 vsldoi12 <5,u,u,0>, <5,u,u,0>
+ 2214596949U, // <0,6,0,0>: Cost 3 vmrghw <0,0,0,0>, <6,0,7,0>
+ 2214678951U, // <0,6,0,1>: Cost 3 vmrghw <0,0,1,1>, <6,1,7,1>
+ 2214597114U, // <0,6,0,2>: Cost 3 vmrghw <0,0,0,0>, <6,2,7,3>
+ 3852857653U, // <0,6,0,3>: Cost 4 vsldoi12 <4,5,6,0>, <6,0,3,4>
+ 3832729919U, // <0,6,0,4>: Cost 4 vsldoi12 <1,2,3,0>, <6,0,4,5>
+ 3721293427U, // <0,6,0,5>: Cost 4 vsldoi4 <5,0,6,0>, <5,0,6,0>
+ 2214597432U, // <0,6,0,6>: Cost 3 vmrghw <0,0,0,0>, <6,6,6,6>
+ 1207962934U, // <0,6,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 1207962935U, // <0,6,0,u>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 2215432481U, // <0,6,1,0>: Cost 3 vmrghw LHS, <6,0,1,2>
+ 2215432615U, // <0,6,1,1>: Cost 3 vmrghw LHS, <6,1,7,1>
+ 1141690874U, // <0,6,1,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2215432754U, // <0,6,1,3>: Cost 3 vmrghw LHS, <6,3,4,5>
+ 2215432817U, // <0,6,1,4>: Cost 3 vmrghw LHS, <6,4,2,5>
+ 2215432939U, // <0,6,1,5>: Cost 3 vmrghw LHS, <6,5,7,1>
+ 1141691192U, // <0,6,1,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1221905718U, // <0,6,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 1221905719U, // <0,6,1,u>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 3852857787U, // <0,6,2,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,2,0,3>
+ 3289764265U, // <0,6,2,1>: Cost 4 vmrghw <0,2,1,3>, <6,1,7,3>
+ 3289690618U, // <0,6,2,2>: Cost 4 vmrghw <0,2,0,3>, <6,2,7,3>
+ 3862589907U, // <0,6,2,3>: Cost 4 vsldoi12 <6,2,3,0>, <6,2,3,0>
+ 3733253430U, // <0,6,2,4>: Cost 4 vsldoi4 <7,0,6,2>, RHS
+ 3733254242U, // <0,6,2,5>: Cost 4 vsldoi4 <7,0,6,2>, <5,6,7,0>
+ 3777390522U, // <0,6,2,6>: Cost 4 vsldoi8 <3,2,0,6>, <2,6,3,7>
+ 2785825274U, // <0,6,2,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,7,3>
+ 2785825283U, // <0,6,2,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,u,3>
+ 3777390742U, // <0,6,3,0>: Cost 4 vsldoi8 <3,2,0,6>, <3,0,1,2>
+ 3863106066U, // <0,6,3,1>: Cost 4 vsldoi12 <6,3,1,0>, <6,3,1,0>
+ 3777390899U, // <0,6,3,2>: Cost 4 vsldoi8 <3,2,0,6>, <3,2,0,6>
+ 3290436146U, // <0,6,3,3>: Cost 4 vmrghw <0,3,1,4>, <6,3,4,5>
+ 3779381762U, // <0,6,3,4>: Cost 4 vsldoi8 <3,5,0,6>, <3,4,5,6>
+ 3779381798U, // <0,6,3,5>: Cost 4 vsldoi8 <3,5,0,6>, <3,5,0,6>
+ 3733262920U, // <0,6,3,6>: Cost 4 vsldoi4 <7,0,6,3>, <6,3,7,0>
+ 2300972342U, // <0,6,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 2300972343U, // <0,6,3,u>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 3802606482U, // <0,6,4,0>: Cost 4 vsldoi8 <7,4,0,6>, <4,0,5,1>
+ 2217365931U, // <0,6,4,1>: Cost 3 vmrghw <0,4,1,5>, <6,1,7,5>
+ 2217366010U, // <0,6,4,2>: Cost 3 vmrghw <0,4,1,5>, <6,2,7,3>
+ 3291107890U, // <0,6,4,3>: Cost 4 vmrghw <0,4,1,5>, <6,3,4,5>
+ 3291099805U, // <0,6,4,4>: Cost 4 vmrghw <0,4,1,4>, <6,4,7,4>
+ 3777391926U, // <0,6,4,5>: Cost 4 vsldoi8 <3,2,0,6>, RHS
+ 2217366328U, // <0,6,4,6>: Cost 3 vmrghw <0,4,1,5>, <6,6,6,6>
+ 2291027254U, // <0,6,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 2291027255U, // <0,6,4,u>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 3852858033U, // <0,6,5,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,5,0,6>
+ 3395964532U, // <0,6,5,1>: Cost 4 vmrglw <6,7,0,5>, <5,0,6,1>
+ 3864507069U, // <0,6,5,2>: Cost 4 vsldoi12 <6,5,2,0>, <6,5,2,0>
+ 3376056678U, // <0,6,5,3>: Cost 5 vmrglw <3,4,0,5>, <3,2,6,3>
+ 3721334070U, // <0,6,5,4>: Cost 4 vsldoi4 <5,0,6,5>, RHS
+ 3395964860U, // <0,6,5,5>: Cost 4 vmrglw <6,7,0,5>, <5,4,6,5>
+ 3864802017U, // <0,6,5,6>: Cost 4 vsldoi12 <6,5,6,0>, <6,5,6,0>
+ 2302315830U, // <0,6,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 2302315831U, // <0,6,5,u>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 3852858108U, // <0,6,6,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,6,0,0>
+ 3398624745U, // <0,6,6,1>: Cost 4 vmrglw <7,2,0,6>, <2,0,6,1>
+ 2218668538U, // <0,6,6,2>: Cost 3 vmrghw <0,6,1,2>, <6,2,7,3>
+ 3292418610U, // <0,6,6,3>: Cost 4 vmrghw <0,6,1,3>, <6,3,4,5>
+ 3733286198U, // <0,6,6,4>: Cost 4 vsldoi4 <7,0,6,6>, RHS
+ 3797299889U, // <0,6,6,5>: Cost 4 vsldoi8 <6,5,0,6>, <6,5,0,6>
+ 2785825592U, // <0,6,6,6>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,6,6>
+ 2785825602U, // <0,6,6,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,7,7>
+ 2785825611U, // <0,6,6,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,u,7>
+ 2785825614U, // <0,6,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,0,1>
+ 2758988632U, // <0,6,7,1>: Cost 3 vsldoi12 <1,2,3,0>, <6,7,1,2>
+ 3377400084U, // <0,6,7,2>: Cost 4 vmrglw <3,6,0,7>, <3,1,6,2>
+ 2792166248U, // <0,6,7,3>: Cost 3 vsldoi12 <6,7,3,0>, <6,7,3,0>
+ 2785825654U, // <0,6,7,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,4,5>
+ 2785825664U, // <0,6,7,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6>
+ 3859567493U, // <0,6,7,6>: Cost 4 vsldoi12 <5,6,7,0>, <6,7,6,2>
+ 2303659318U, // <0,6,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2303659319U, // <0,6,7,u>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2785825695U, // <0,6,u,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,0,1>
+ 2220077479U, // <0,6,u,1>: Cost 3 vmrghw LHS, <6,1,7,1>
+ 1146335738U, // <0,6,u,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2792829881U, // <0,6,u,3>: Cost 3 vsldoi12 <6,u,3,0>, <6,u,3,0>
+ 2785825735U, // <0,6,u,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,4,5>
+ 2785825664U, // <0,6,u,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6>
+ 1146336056U, // <0,6,u,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1221963062U, // <0,6,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 1221963063U, // <0,6,u,u>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 2653593600U, // <0,7,0,0>: Cost 3 vsldoi4 <6,0,7,0>, <0,0,0,0>
+ 2706309222U, // <0,7,0,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 3709421498U, // <0,7,0,2>: Cost 4 vsldoi4 <3,0,7,0>, <2,6,3,7>
+ 2281705978U, // <0,7,0,3>: Cost 3 vmrglw <0,0,0,0>, <6,2,7,3>
+ 2785825816U, // <0,7,0,4>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,4,5>
+ 2785825826U, // <0,7,0,5>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,5,6>
+ 2653598037U, // <0,7,0,6>: Cost 3 vsldoi4 <6,0,7,0>, <6,0,7,0>
+ 2214598252U, // <0,7,0,7>: Cost 3 vmrghw <0,0,0,0>, <7,7,7,7>
+ 2706309789U, // <0,7,0,u>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 1141691386U, // <0,7,1,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 2215433290U, // <0,7,1,1>: Cost 3 vmrghw LHS, <7,1,1,1>
+ 2706310038U, // <0,7,1,2>: Cost 3 vsldoi8 <3,6,0,7>, <1,2,3,0>
+ 2322190842U, // <0,7,1,3>: Cost 3 vmrglw <6,7,0,1>, <6,2,7,3>
+ 1141691750U, // <0,7,1,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2215433654U, // <0,7,1,5>: Cost 3 vmrghw LHS, <7,5,5,5>
+ 2653606230U, // <0,7,1,6>: Cost 3 vsldoi4 <6,0,7,1>, <6,0,7,1>
+ 1141692012U, // <0,7,1,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1141692034U, // <0,7,1,u>: Cost 2 vmrghw LHS, <7,u,1,2>
+ 2785825940U, // <0,7,2,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,2,0,3>
+ 3768108576U, // <0,7,2,1>: Cost 5 vsldoi8 <1,6,0,7>, <2,1,3,2>
+ 3780052584U, // <0,7,2,2>: Cost 4 vsldoi8 <3,6,0,7>, <2,2,2,2>
+ 2794820780U, // <0,7,2,3>: Cost 3 vsldoi12 <7,2,3,0>, <7,2,3,0>
+ 3859641528U, // <0,7,2,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,2,4,3>
+ 3733327970U, // <0,7,2,5>: Cost 4 vsldoi4 <7,0,7,2>, <5,6,7,0>
+ 3778062266U, // <0,7,2,6>: Cost 4 vsldoi8 <3,3,0,7>, <2,6,3,7>
+ 3733328944U, // <0,7,2,7>: Cost 4 vsldoi4 <7,0,7,2>, <7,0,7,2>
+ 2795189465U, // <0,7,2,u>: Cost 3 vsldoi12 <7,2,u,0>, <7,2,u,0>
+ 2324861026U, // <0,7,3,0>: Cost 3 vmrglw <7,2,0,3>, <5,6,7,0>
+ 3780053233U, // <0,7,3,1>: Cost 4 vsldoi8 <3,6,0,7>, <3,1,2,3>
+ 3780053296U, // <0,7,3,2>: Cost 4 vsldoi8 <3,6,0,7>, <3,2,0,3>
+ 3778062725U, // <0,7,3,3>: Cost 4 vsldoi8 <3,3,0,7>, <3,3,0,7>
+ 3780053506U, // <0,7,3,4>: Cost 4 vsldoi8 <3,6,0,7>, <3,4,5,6>
+ 3803941469U, // <0,7,3,5>: Cost 4 vsldoi8 <7,6,0,7>, <3,5,6,7>
+ 2706311800U, // <0,7,3,6>: Cost 3 vsldoi8 <3,6,0,7>, <3,6,0,7>
+ 3398603586U, // <0,7,3,7>: Cost 4 vmrglw <7,2,0,3>, <6,6,7,7>
+ 2707639066U, // <0,7,3,u>: Cost 3 vsldoi8 <3,u,0,7>, <3,u,0,7>
+ 2217366522U, // <0,7,4,0>: Cost 3 vmrghw <0,4,1,5>, <7,0,1,2>
+ 3727369110U, // <0,7,4,1>: Cost 4 vsldoi4 <6,0,7,4>, <1,2,3,0>
+ 3291108500U, // <0,7,4,2>: Cost 4 vmrghw <0,4,1,5>, <7,2,0,3>
+ 3727370872U, // <0,7,4,3>: Cost 4 vsldoi4 <6,0,7,4>, <3,6,0,7>
+ 2217366886U, // <0,7,4,4>: Cost 3 vmrghw <0,4,1,5>, <7,4,5,6>
+ 2706312502U, // <0,7,4,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 3786026321U, // <0,7,4,6>: Cost 4 vsldoi8 <4,6,0,7>, <4,6,0,7>
+ 2217367148U, // <0,7,4,7>: Cost 3 vmrghw <0,4,1,5>, <7,7,7,7>
+ 2706312745U, // <0,7,4,u>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 2322223202U, // <0,7,5,0>: Cost 3 vmrglw <6,7,0,5>, <5,6,7,0>
+ 3399946987U, // <0,7,5,1>: Cost 4 vmrglw <7,4,0,5>, <6,5,7,1>
+ 3291780244U, // <0,7,5,2>: Cost 4 vmrghw <0,5,1,6>, <7,2,0,3>
+ 3727378582U, // <0,7,5,3>: Cost 4 vsldoi4 <6,0,7,5>, <3,0,1,2>
+ 3727379766U, // <0,7,5,4>: Cost 4 vsldoi4 <6,0,7,5>, RHS
+ 3859568054U, // <0,7,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,5,5,5>
+ 2785826241U, // <0,7,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <7,5,6,7>
+ 3395965762U, // <0,7,5,7>: Cost 4 vmrglw <6,7,0,5>, <6,6,7,7>
+ 2787153363U, // <0,7,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <7,5,u,7>
+ 2785826268U, // <0,7,6,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,6,0,7>
+ 3780055420U, // <0,7,6,1>: Cost 5 vsldoi8 <3,6,0,7>, <6,1,2,3>
+ 3859568110U, // <0,7,6,2>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,2,7>
+ 3874534903U, // <0,7,6,3>: Cost 4 vsldoi12 <u,2,3,0>, <7,6,3,7>
+ 3859641856U, // <0,7,6,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,6,4,7>
+ 3733360738U, // <0,7,6,5>: Cost 4 vsldoi4 <7,0,7,6>, <5,6,7,0>
+ 3859568145U, // <0,7,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,6,6>
+ 2797770260U, // <0,7,6,7>: Cost 3 vsldoi12 <7,6,7,0>, <7,6,7,0>
+ 2797843997U, // <0,7,6,u>: Cost 3 vsldoi12 <7,6,u,0>, <7,6,u,0>
+ 2785826342U, // <0,7,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,0,0>
+ 3727393686U, // <0,7,7,1>: Cost 4 vsldoi4 <6,0,7,7>, <1,2,3,0>
+ 3868563003U, // <0,7,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <7,7,2,3>
+ 3377397988U, // <0,7,7,3>: Cost 4 vmrglw <3,6,0,7>, <0,2,7,3>
+ 2219349350U, // <0,7,7,4>: Cost 3 vmrghw <0,7,1,4>, <7,4,5,6>
+ 3859568217U, // <0,7,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,7,5,6>
+ 2730202588U, // <0,7,7,6>: Cost 3 vsldoi8 <7,6,0,7>, <7,6,0,7>
+ 2785826412U, // <0,7,7,7>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,7,7>
+ 2731529854U, // <0,7,7,u>: Cost 3 vsldoi8 <7,u,0,7>, <7,u,0,7>
+ 1146336250U, // <0,7,u,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 2706315054U, // <0,7,u,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 2653660845U, // <0,7,u,2>: Cost 3 vsldoi4 <6,0,7,u>, <2,3,0,u>
+ 2322248186U, // <0,7,u,3>: Cost 3 vmrglw <6,7,0,u>, <6,2,7,3>
+ 1146336614U, // <0,7,u,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2706315418U, // <0,7,u,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 2653663581U, // <0,7,u,6>: Cost 3 vsldoi4 <6,0,7,u>, <6,0,7,u>
+ 1146336876U, // <0,7,u,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1146336898U, // <0,7,u,u>: Cost 2 vmrghw LHS, <7,u,1,2>
+ 202162278U, // <0,u,0,0>: Cost 1 vspltisw0 LHS
+ 1624612966U, // <0,u,0,1>: Cost 2 vsldoi8 <2,3,0,u>, LHS
+ 2629780986U, // <0,u,0,2>: Cost 3 vsldoi4 <2,0,u,0>, <2,0,u,0>
+ 1207959708U, // <0,u,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 1544097078U, // <0,u,0,4>: Cost 2 vsldoi4 <0,0,u,0>, RHS
+ 1140856986U, // <0,u,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 2698355253U, // <0,u,0,6>: Cost 3 vsldoi8 <2,3,0,u>, <0,6,u,7>
+ 1207962952U, // <0,u,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 202162278U, // <0,u,0,u>: Cost 1 vspltisw0 LHS
+ 1142134483U, // <0,u,1,0>: Cost 2 vmrghw LHS, <u,0,1,2>
+ 67950382U, // <0,u,1,1>: Cost 1 vmrghw LHS, LHS
+ 1142175624U, // <0,u,1,2>: Cost 2 vmrghw LHS, <u,2,3,3>
+ 1142175676U, // <0,u,1,3>: Cost 2 vmrghw LHS, <u,3,0,1>
+ 1142134847U, // <0,u,1,4>: Cost 2 vmrghw LHS, <u,4,5,6>
+ 67950746U, // <0,u,1,5>: Cost 1 vmrghw LHS, RHS
+ 1142175952U, // <0,u,1,6>: Cost 2 vmrghw LHS, <u,6,3,7>
+ 1221905736U, // <0,u,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 67950949U, // <0,u,1,u>: Cost 1 vmrghw LHS, LHS
+ 1562026086U, // <0,u,2,0>: Cost 2 vsldoi4 <3,0,u,2>, LHS
+ 2216015662U, // <0,u,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2698356328U, // <0,u,2,2>: Cost 3 vsldoi8 <2,3,0,u>, <2,2,2,2>
+ 835584U, // <0,u,2,3>: Cost 0 copy LHS
+ 1562029366U, // <0,u,2,4>: Cost 2 vsldoi4 <3,0,u,2>, RHS
+ 2216016026U, // <0,u,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 2698356666U, // <0,u,2,6>: Cost 3 vsldoi8 <2,3,0,u>, <2,6,3,7>
+ 1585919033U, // <0,u,2,7>: Cost 2 vsldoi4 <7,0,u,2>, <7,0,u,2>
+ 835584U, // <0,u,2,u>: Cost 0 copy LHS
+ 2758989756U, // <0,u,3,0>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,0,1>
+ 2216662830U, // <0,u,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2703665461U, // <0,u,3,2>: Cost 3 vsldoi8 <3,2,0,u>, <3,2,0,u>
+ 2758989782U, // <0,u,3,3>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,3,0>
+ 2758989796U, // <0,u,3,4>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,4,5>
+ 2216663194U, // <0,u,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 2706319993U, // <0,u,3,6>: Cost 3 vsldoi8 <3,6,0,u>, <3,6,0,u>
+ 2300972360U, // <0,u,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 2216663397U, // <0,u,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2217367251U, // <0,u,4,0>: Cost 3 vmrghw <0,4,1,5>, <u,0,1,2>
+ 1143625518U, // <0,u,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2217367432U, // <0,u,4,2>: Cost 3 vmrghw <0,4,1,5>, <u,2,3,3>
+ 2217367484U, // <0,u,4,3>: Cost 3 vmrghw <0,4,1,5>, <u,3,0,1>
+ 1143619922U, // <0,u,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1143625882U, // <0,u,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 2217367760U, // <0,u,4,6>: Cost 3 vmrghw <0,4,1,5>, <u,6,3,7>
+ 2291027272U, // <0,u,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 1143626085U, // <0,u,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2635792486U, // <0,u,5,0>: Cost 3 vsldoi4 <3,0,u,5>, LHS
+ 2635793302U, // <0,u,5,1>: Cost 3 vsldoi4 <3,0,u,5>, <1,2,3,0>
+ 2302314646U, // <0,u,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2>
+ 2635794648U, // <0,u,5,3>: Cost 3 vsldoi4 <3,0,u,5>, <3,0,u,5>
+ 2635795766U, // <0,u,5,4>: Cost 3 vsldoi4 <3,0,u,5>, RHS
+ 2717601754U, // <0,u,5,5>: Cost 3 vsldoi8 <5,5,0,u>, <5,5,0,u>
+ 1685248154U, // <0,u,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2302315848U, // <0,u,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 1685248172U, // <0,u,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2759358645U, // <0,u,6,0>: Cost 3 vsldoi12 <1,2,u,0>, <u,6,0,7>
+ 2218637102U, // <0,u,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS
+ 2724901370U, // <0,u,6,2>: Cost 3 vsldoi8 <6,7,0,u>, <6,2,7,3>
+ 2758990032U, // <0,u,6,3>: Cost 3 vsldoi12 <1,2,3,0>, <u,6,3,7>
+ 2659691830U, // <0,u,6,4>: Cost 3 vsldoi4 <7,0,u,6>, RHS
+ 2659471458U, // <0,u,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0>
+ 2724901688U, // <0,u,6,6>: Cost 3 vsldoi8 <6,7,0,u>, <6,6,6,6>
+ 1651159893U, // <0,u,6,7>: Cost 2 vsldoi8 <6,7,0,u>, <6,7,0,u>
+ 1651823526U, // <0,u,6,u>: Cost 2 vsldoi8 <6,u,0,u>, <6,u,0,u>
+ 2785827072U, // <0,u,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,0,1>
+ 2803964168U, // <0,u,7,1>: Cost 3 vsldoi12 <u,7,1,0>, <u,7,1,0>
+ 2727556249U, // <0,u,7,2>: Cost 3 vsldoi8 <7,2,0,u>, <7,2,0,u>
+ 2303656092U, // <0,u,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 2785827112U, // <0,u,7,4>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,4,5>
+ 2785827122U, // <0,u,7,5>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,5,6>
+ 2730210781U, // <0,u,7,6>: Cost 3 vsldoi8 <7,6,0,u>, <7,6,0,u>
+ 2303659336U, // <0,u,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2303656097U, // <0,u,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 202162278U, // <0,u,u,0>: Cost 1 vspltisw0 LHS
+ 72595246U, // <0,u,u,1>: Cost 1 vmrghw LHS, LHS
+ 1146337160U, // <0,u,u,2>: Cost 2 vmrghw LHS, <u,2,3,3>
+ 835584U, // <0,u,u,3>: Cost 0 copy LHS
+ 1146337343U, // <0,u,u,4>: Cost 2 vmrghw LHS, <u,4,5,6>
+ 72595610U, // <0,u,u,5>: Cost 1 vmrghw LHS, RHS
+ 1146337488U, // <0,u,u,6>: Cost 2 vmrghw LHS, <u,6,3,7>
+ 1221963080U, // <0,u,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 835584U, // <0,u,u,u>: Cost 0 copy LHS
+ 2756853760U, // <1,0,0,0>: Cost 3 vsldoi12 <0,u,1,1>, <0,0,0,0>
+ 1677803530U, // <1,0,0,1>: Cost 2 vsldoi12 <0,0,1,1>, <0,0,1,1>
+ 3759497387U, // <1,0,0,2>: Cost 4 vsldoi8 <0,2,1,0>, <0,2,1,0>
+ 2686419196U, // <1,0,0,3>: Cost 3 vsldoi8 <0,3,1,0>, <0,3,1,0>
+ 2751766565U, // <1,0,0,4>: Cost 3 vsldoi12 <0,0,4,1>, <0,0,4,1>
+ 2687746462U, // <1,0,0,5>: Cost 3 vsldoi8 <0,5,1,0>, <0,5,1,0>
+ 3776086518U, // <1,0,0,6>: Cost 4 vsldoi8 <3,0,1,0>, <0,6,1,7>
+ 2689073728U, // <1,0,0,7>: Cost 3 vsldoi8 <0,7,1,0>, <0,7,1,0>
+ 1678319689U, // <1,0,0,u>: Cost 2 vsldoi12 <0,0,u,1>, <0,0,u,1>
+ 2287091712U, // <1,0,1,0>: Cost 3 vmrglw <0,u,1,1>, <0,0,0,0>
+ 1147568230U, // <1,0,1,1>: Cost 2 vmrghw <1,1,1,1>, LHS
+ 1683112038U, // <1,0,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 3294970108U, // <1,0,1,3>: Cost 4 vmrghw <1,1,0,0>, <0,3,1,0>
+ 2623892790U, // <1,0,1,4>: Cost 3 vsldoi4 <1,1,0,1>, RHS
+ 2647781007U, // <1,0,1,5>: Cost 3 vsldoi4 <5,1,0,1>, <5,1,0,1>
+ 2791948430U, // <1,0,1,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7>
+ 3721524218U, // <1,0,1,7>: Cost 4 vsldoi4 <5,1,0,1>, <7,0,1,2>
+ 1683112092U, // <1,0,1,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 2222112768U, // <1,0,2,0>: Cost 3 vmrghw <1,2,3,0>, <0,0,0,0>
+ 1148371046U, // <1,0,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 3356862524U, // <1,0,2,2>: Cost 4 vmrglw <0,2,1,2>, <2,u,0,2>
+ 2702345894U, // <1,0,2,3>: Cost 3 vsldoi8 <3,0,1,0>, <2,3,0,1>
+ 2222113106U, // <1,0,2,4>: Cost 3 vmrghw <1,2,3,0>, <0,4,1,5>
+ 2299709908U, // <1,0,2,5>: Cost 3 vmrglw <3,0,1,2>, <3,4,0,5>
+ 3760162746U, // <1,0,2,6>: Cost 4 vsldoi8 <0,3,1,0>, <2,6,3,7>
+ 3369470584U, // <1,0,2,7>: Cost 4 vmrglw <2,3,1,2>, <3,6,0,7>
+ 1148371613U, // <1,0,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 2686421142U, // <1,0,3,0>: Cost 3 vsldoi8 <0,3,1,0>, <3,0,1,2>
+ 2283128486U, // <1,0,3,1>: Cost 3 vmrglw <0,2,1,3>, <2,3,0,1>
+ 3296305326U, // <1,0,3,2>: Cost 4 vmrghw <1,3,0,1>, <0,2,1,3>
+ 3760163199U, // <1,0,3,3>: Cost 4 vsldoi8 <0,3,1,0>, <3,3,0,1>
+ 3760163330U, // <1,0,3,4>: Cost 4 vsldoi8 <0,3,1,0>, <3,4,5,6>
+ 3779406377U, // <1,0,3,5>: Cost 4 vsldoi8 <3,5,1,0>, <3,5,1,0>
+ 3865690416U, // <1,0,3,6>: Cost 4 vsldoi12 <6,7,0,1>, <0,3,6,7>
+ 3366824568U, // <1,0,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,0,7>
+ 2707655452U, // <1,0,3,u>: Cost 3 vsldoi8 <3,u,1,0>, <3,u,1,0>
+ 2734861202U, // <1,0,4,0>: Cost 3 vsldoi8 <u,4,1,0>, <4,0,5,1>
+ 2756854098U, // <1,0,4,1>: Cost 3 vsldoi12 <0,u,1,1>, <0,4,1,5>
+ 3830595931U, // <1,0,4,2>: Cost 5 vsldoi12 <0,u,1,1>, <0,4,2,5>
+ 3296968960U, // <1,0,4,3>: Cost 4 vmrghw <1,4,0,1>, <0,3,1,4>
+ 3830595949U, // <1,0,4,4>: Cost 4 vsldoi12 <0,u,1,1>, <0,4,4,5>
+ 2686422326U, // <1,0,4,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 3297378806U, // <1,0,4,6>: Cost 5 vmrghw <1,4,5,6>, <0,6,1,7>
+ 3810594248U, // <1,0,4,7>: Cost 4 vsldoi8 <u,7,1,0>, <4,7,5,0>
+ 2686422569U, // <1,0,4,u>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 2284470272U, // <1,0,5,0>: Cost 3 vmrglw <0,4,1,5>, <0,0,0,0>
+ 2284471974U, // <1,0,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,1>
+ 3809267435U, // <1,0,5,2>: Cost 4 vsldoi8 <u,5,1,0>, <5,2,1,3>
+ 3297968384U, // <1,0,5,3>: Cost 4 vmrghw <1,5,4,6>, <0,3,1,4>
+ 2284471977U, // <1,0,5,4>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,4>
+ 3721555603U, // <1,0,5,5>: Cost 4 vsldoi4 <5,1,0,5>, <5,1,0,5>
+ 3792679010U, // <1,0,5,6>: Cost 4 vsldoi8 <5,7,1,0>, <5,6,7,0>
+ 3792679037U, // <1,0,5,7>: Cost 4 vsldoi8 <5,7,1,0>, <5,7,1,0>
+ 2284471981U, // <1,0,5,u>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,u>
+ 3356893184U, // <1,0,6,0>: Cost 4 vmrglw <0,2,1,6>, <0,0,0,0>
+ 2224676966U, // <1,0,6,1>: Cost 3 vmrghw <1,6,1,7>, LHS
+ 3298295985U, // <1,0,6,2>: Cost 4 vmrghw <1,6,0,1>, <0,2,1,6>
+ 3298345212U, // <1,0,6,3>: Cost 4 vmrghw <1,6,0,7>, <0,3,1,0>
+ 2224972114U, // <1,0,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5>
+ 3808604907U, // <1,0,6,5>: Cost 4 vsldoi8 <u,4,1,0>, <6,5,7,1>
+ 3799978808U, // <1,0,6,6>: Cost 4 vsldoi8 <7,0,1,0>, <6,6,6,6>
+ 2726237006U, // <1,0,6,7>: Cost 3 vsldoi8 <7,0,1,0>, <6,7,0,1>
+ 2224677522U, // <1,0,6,u>: Cost 3 vmrghw <1,6,1,7>, <0,u,1,1>
+ 2726237176U, // <1,0,7,0>: Cost 3 vsldoi8 <7,0,1,0>, <7,0,1,0>
+ 2285815462U, // <1,0,7,1>: Cost 3 vmrglw <0,6,1,7>, <2,3,0,1>
+ 3805951193U, // <1,0,7,2>: Cost 4 vsldoi8 <u,0,1,0>, <7,2,u,0>
+ 3807941859U, // <1,0,7,3>: Cost 4 vsldoi8 <u,3,1,0>, <7,3,0,1>
+ 3799979366U, // <1,0,7,4>: Cost 4 vsldoi8 <7,0,1,0>, <7,4,5,6>
+ 3803297165U, // <1,0,7,5>: Cost 4 vsldoi8 <7,5,1,0>, <7,5,1,0>
+ 3799979540U, // <1,0,7,6>: Cost 4 vsldoi8 <7,0,1,0>, <7,6,7,0>
+ 3799979628U, // <1,0,7,7>: Cost 4 vsldoi8 <7,0,1,0>, <7,7,7,7>
+ 2731546240U, // <1,0,7,u>: Cost 3 vsldoi8 <7,u,1,0>, <7,u,1,0>
+ 2284494848U, // <1,0,u,0>: Cost 3 vmrglw <0,4,1,u>, <0,0,0,0>
+ 1683112594U, // <1,0,u,1>: Cost 2 vsldoi12 <0,u,1,1>, <0,u,1,1>
+ 1683112605U, // <1,0,u,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 2734200772U, // <1,0,u,3>: Cost 3 vsldoi8 <u,3,1,0>, <u,3,1,0>
+ 2757075629U, // <1,0,u,4>: Cost 3 vsldoi12 <0,u,4,1>, <0,u,4,1>
+ 2686425242U, // <1,0,u,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 2791948430U, // <1,0,u,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7>
+ 2736855304U, // <1,0,u,7>: Cost 3 vsldoi8 <u,7,1,0>, <u,7,1,0>
+ 1683112659U, // <1,0,u,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 1610694666U, // <1,1,0,0>: Cost 2 vsldoi8 <0,0,1,1>, <0,0,1,1>
+ 1616003174U, // <1,1,0,1>: Cost 2 vsldoi8 <0,u,1,1>, LHS
+ 2283767958U, // <1,1,0,2>: Cost 3 vmrglw <0,3,1,0>, <3,0,1,2>
+ 3357507596U, // <1,1,0,3>: Cost 4 vmrglw <0,3,1,0>, <0,0,1,3>
+ 2689745234U, // <1,1,0,4>: Cost 3 vsldoi8 <0,u,1,1>, <0,4,1,5>
+ 3357507922U, // <1,1,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,1,5>
+ 3294397647U, // <1,1,0,6>: Cost 4 vmrghw <1,0,1,2>, <1,6,1,7>
+ 3373433334U, // <1,1,0,7>: Cost 4 vmrglw <3,0,1,0>, <0,6,1,7>
+ 1616003730U, // <1,1,0,u>: Cost 2 vsldoi8 <0,u,1,1>, <0,u,1,1>
+ 1550221414U, // <1,1,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,1,1,1>: Cost 1 vspltisw1 LHS
+ 2287093910U, // <1,1,1,2>: Cost 3 vmrglw <0,u,1,1>, <3,0,1,2>
+ 2287092615U, // <1,1,1,3>: Cost 3 vmrglw <0,u,1,1>, <1,2,1,3>
+ 1550224694U, // <1,1,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 2287092050U, // <1,1,1,5>: Cost 3 vmrglw <0,u,1,1>, <0,4,1,5>
+ 2689746127U, // <1,1,1,6>: Cost 3 vsldoi8 <0,u,1,1>, <1,6,1,7>
+ 2659800138U, // <1,1,1,7>: Cost 3 vsldoi4 <7,1,1,1>, <7,1,1,1>
+ 269271142U, // <1,1,1,u>: Cost 1 vspltisw1 LHS
+ 2222113516U, // <1,1,2,0>: Cost 3 vmrghw <1,2,3,0>, <1,0,2,1>
+ 2756854663U, // <1,1,2,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,2,1,3>
+ 1148371862U, // <1,1,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689746598U, // <1,1,2,3>: Cost 3 vsldoi8 <0,u,1,1>, <2,3,0,1>
+ 2618002742U, // <1,1,2,4>: Cost 3 vsldoi4 <0,1,1,2>, RHS
+ 2299707730U, // <1,1,2,5>: Cost 3 vmrglw <3,0,1,2>, <0,4,1,5>
+ 2689746874U, // <1,1,2,6>: Cost 3 vsldoi8 <0,u,1,1>, <2,6,3,7>
+ 3361506511U, // <1,1,2,7>: Cost 4 vmrglw <1,0,1,2>, <1,6,1,7>
+ 1148371862U, // <1,1,2,u>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689747094U, // <1,1,3,0>: Cost 3 vsldoi8 <0,u,1,1>, <3,0,1,2>
+ 2691074278U, // <1,1,3,1>: Cost 3 vsldoi8 <1,1,1,1>, <3,1,1,1>
+ 3356870806U, // <1,1,3,2>: Cost 4 vmrglw <0,2,1,3>, <3,0,1,2>
+ 2283126958U, // <1,1,3,3>: Cost 3 vmrglw <0,2,1,3>, <0,2,1,3>
+ 2689747458U, // <1,1,3,4>: Cost 3 vsldoi8 <0,u,1,1>, <3,4,5,6>
+ 3356868946U, // <1,1,3,5>: Cost 4 vmrglw <0,2,1,3>, <0,4,1,5>
+ 3811265144U, // <1,1,3,6>: Cost 4 vsldoi8 <u,u,1,1>, <3,6,0,7>
+ 3362841807U, // <1,1,3,7>: Cost 4 vmrglw <1,2,1,3>, <1,6,1,7>
+ 2689747742U, // <1,1,3,u>: Cost 3 vsldoi8 <0,u,1,1>, <3,u,1,2>
+ 2623987814U, // <1,1,4,0>: Cost 3 vsldoi4 <1,1,1,4>, LHS
+ 2758181931U, // <1,1,4,1>: Cost 3 vsldoi12 <1,1,1,1>, <1,4,1,5>
+ 2223408022U, // <1,1,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0>
+ 3697731734U, // <1,1,4,3>: Cost 4 vsldoi4 <1,1,1,4>, <3,0,1,2>
+ 2283798784U, // <1,1,4,4>: Cost 3 vmrglw <0,3,1,4>, <0,3,1,4>
+ 1616006454U, // <1,1,4,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 3297379535U, // <1,1,4,6>: Cost 4 vmrghw <1,4,5,6>, <1,6,1,7>
+ 3373466102U, // <1,1,4,7>: Cost 4 vmrglw <3,0,1,4>, <0,6,1,7>
+ 1616006697U, // <1,1,4,u>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 2760762479U, // <1,1,5,0>: Cost 3 vsldoi12 <1,5,0,1>, <1,5,0,1>
+ 2284470282U, // <1,1,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,1>
+ 2284472470U, // <1,1,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,1,2>
+ 3358212270U, // <1,1,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,1,3>
+ 2284470285U, // <1,1,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,4>
+ 1210728786U, // <1,1,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 2737524834U, // <1,1,5,6>: Cost 3 vsldoi8 <u,u,1,1>, <5,6,7,0>
+ 3360867535U, // <1,1,5,7>: Cost 4 vmrglw <0,u,1,5>, <1,6,1,7>
+ 1210728786U, // <1,1,5,u>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 3697746022U, // <1,1,6,0>: Cost 4 vsldoi4 <1,1,1,6>, LHS
+ 2756854991U, // <1,1,6,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,6,1,7>
+ 2737525242U, // <1,1,6,2>: Cost 3 vsldoi8 <u,u,1,1>, <6,2,7,3>
+ 3839149281U, // <1,1,6,3>: Cost 4 vsldoi12 <2,3,0,1>, <1,6,3,7>
+ 3697749302U, // <1,1,6,4>: Cost 4 vsldoi4 <1,1,1,6>, RHS
+ 3356893522U, // <1,1,6,5>: Cost 4 vmrglw <0,2,1,6>, <0,4,1,5>
+ 2283151537U, // <1,1,6,6>: Cost 3 vmrglw <0,2,1,6>, <0,2,1,6>
+ 2791949566U, // <1,1,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <1,6,7,0>
+ 2792613127U, // <1,1,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <1,6,u,0>
+ 2737525754U, // <1,1,7,0>: Cost 3 vsldoi8 <u,u,1,1>, <7,0,1,2>
+ 2291786386U, // <1,1,7,1>: Cost 3 vmrglw <1,6,1,7>, <0,u,1,1>
+ 3365528292U, // <1,1,7,2>: Cost 4 vmrglw <1,6,1,7>, <1,0,1,2>
+ 3365528455U, // <1,1,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,1,3>
+ 2737526118U, // <1,1,7,4>: Cost 3 vsldoi8 <u,u,1,1>, <7,4,5,6>
+ 3365527890U, // <1,1,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,1,5>
+ 3365528377U, // <1,1,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,1,1,6>
+ 2291786959U, // <1,1,7,7>: Cost 3 vmrglw <1,6,1,7>, <1,6,1,7>
+ 2737526402U, // <1,1,7,u>: Cost 3 vsldoi8 <u,u,1,1>, <7,u,1,2>
+ 1550221414U, // <1,1,u,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,1,u,1>: Cost 1 vspltisw1 LHS
+ 1148371862U, // <1,1,u,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689750972U, // <1,1,u,3>: Cost 3 vsldoi8 <0,u,1,1>, <u,3,0,1>
+ 1550224694U, // <1,1,u,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 1616009370U, // <1,1,u,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 2689751248U, // <1,1,u,6>: Cost 3 vsldoi8 <0,u,1,1>, <u,6,3,7>
+ 2736863497U, // <1,1,u,7>: Cost 3 vsldoi8 <u,7,1,1>, <u,7,1,1>
+ 269271142U, // <1,1,u,u>: Cost 1 vspltisw1 LHS
+ 2702360576U, // <1,2,0,0>: Cost 3 vsldoi8 <3,0,1,2>, <0,0,0,0>
+ 1628618854U, // <1,2,0,1>: Cost 2 vsldoi8 <3,0,1,2>, LHS
+ 2685771949U, // <1,2,0,2>: Cost 3 vsldoi8 <0,2,1,2>, <0,2,1,2>
+ 2283765862U, // <1,2,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS
+ 2702360914U, // <1,2,0,4>: Cost 3 vsldoi8 <3,0,1,2>, <0,4,1,5>
+ 3788046813U, // <1,2,0,5>: Cost 4 vsldoi8 <5,0,1,2>, <0,5,u,0>
+ 2688426481U, // <1,2,0,6>: Cost 3 vsldoi8 <0,6,1,2>, <0,6,1,2>
+ 2726249024U, // <1,2,0,7>: Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0>
+ 1628619421U, // <1,2,0,u>: Cost 2 vsldoi8 <3,0,1,2>, LHS
+ 2690417380U, // <1,2,1,0>: Cost 3 vsldoi8 <1,0,1,2>, <1,0,1,2>
+ 2702361396U, // <1,2,1,1>: Cost 3 vsldoi8 <3,0,1,2>, <1,1,1,1>
+ 2287093352U, // <1,2,1,2>: Cost 3 vmrglw <0,u,1,1>, <2,2,2,2>
+ 1213349990U, // <1,2,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 3764159522U, // <1,2,1,4>: Cost 4 vsldoi8 <1,0,1,2>, <1,4,0,5>
+ 3295053672U, // <1,2,1,5>: Cost 4 vmrghw <1,1,1,1>, <2,5,3,6>
+ 2221311930U, // <1,2,1,6>: Cost 3 vmrghw <1,1,1,1>, <2,6,3,7>
+ 3799991593U, // <1,2,1,7>: Cost 4 vsldoi8 <7,0,1,2>, <1,7,2,7>
+ 1213349995U, // <1,2,1,u>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 2624045158U, // <1,2,2,0>: Cost 3 vsldoi4 <1,1,2,2>, LHS
+ 2702362144U, // <1,2,2,1>: Cost 3 vsldoi8 <3,0,1,2>, <2,1,3,2>
+ 2283120232U, // <1,2,2,2>: Cost 3 vmrglw <0,2,1,2>, <2,2,2,2>
+ 1225965670U, // <1,2,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 2624048438U, // <1,2,2,4>: Cost 3 vsldoi4 <1,1,2,2>, RHS
+ 3356860763U, // <1,2,2,5>: Cost 4 vmrglw <0,2,1,2>, <0,4,2,5>
+ 2222114746U, // <1,2,2,6>: Cost 3 vmrghw <1,2,3,0>, <2,6,3,7>
+ 2299708632U, // <1,2,2,7>: Cost 3 vmrglw <3,0,1,2>, <1,6,2,7>
+ 1225965675U, // <1,2,2,u>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 470597734U, // <1,2,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544340276U, // <1,2,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544341096U, // <1,2,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544341916U, // <1,2,3,3>: Cost 2 vsldoi4 LHS, <3,3,3,3>
+ 470601014U, // <1,2,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1592119300U, // <1,2,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1592119802U, // <1,2,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592120314U, // <1,2,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 470603566U, // <1,2,3,u>: Cost 1 vsldoi4 LHS, LHS
+ 2708335471U, // <1,2,4,0>: Cost 3 vsldoi8 <4,0,1,2>, <4,0,1,2>
+ 3838043908U, // <1,2,4,1>: Cost 4 vsldoi12 <2,1,3,1>, <2,4,1,5>
+ 3357541992U, // <1,2,4,2>: Cost 4 vmrglw <0,3,1,4>, <2,2,2,2>
+ 2283798630U, // <1,2,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS
+ 2726251728U, // <1,2,4,4>: Cost 3 vsldoi8 <7,0,1,2>, <4,4,4,4>
+ 1628622134U, // <1,2,4,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 3297077178U, // <1,2,4,6>: Cost 4 vmrghw <1,4,1,5>, <2,6,3,7>
+ 2726251976U, // <1,2,4,7>: Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0>
+ 1628622377U, // <1,2,4,u>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 2714308168U, // <1,2,5,0>: Cost 3 vsldoi8 <5,0,1,2>, <5,0,1,2>
+ 3297633827U, // <1,2,5,1>: Cost 4 vmrghw <1,5,0,1>, <2,1,3,5>
+ 2284471912U, // <1,2,5,2>: Cost 3 vmrglw <0,4,1,5>, <2,2,2,2>
+ 1210728550U, // <1,2,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 3776106420U, // <1,2,5,4>: Cost 4 vsldoi8 <3,0,1,2>, <5,4,5,6>
+ 2726252548U, // <1,2,5,5>: Cost 3 vsldoi8 <7,0,1,2>, <5,5,5,5>
+ 2726252642U, // <1,2,5,6>: Cost 3 vsldoi8 <7,0,1,2>, <5,6,7,0>
+ 3799994538U, // <1,2,5,7>: Cost 4 vsldoi8 <7,0,1,2>, <5,7,6,0>
+ 1210728555U, // <1,2,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2720280865U, // <1,2,6,0>: Cost 3 vsldoi8 <6,0,1,2>, <6,0,1,2>
+ 2702365096U, // <1,2,6,1>: Cost 3 vsldoi8 <3,0,1,2>, <6,1,7,2>
+ 2726253050U, // <1,2,6,2>: Cost 3 vsldoi8 <7,0,1,2>, <6,2,7,3>
+ 2283151462U, // <1,2,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 3697823030U, // <1,2,6,4>: Cost 4 vsldoi4 <1,1,2,6>, RHS
+ 3298715497U, // <1,2,6,5>: Cost 4 vmrghw <1,6,5,7>, <2,5,3,7>
+ 2726253368U, // <1,2,6,6>: Cost 3 vsldoi8 <7,0,1,2>, <6,6,6,6>
+ 2724926296U, // <1,2,6,7>: Cost 3 vsldoi8 <6,7,1,2>, <6,7,1,2>
+ 2283151467U, // <1,2,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 1652511738U, // <1,2,7,0>: Cost 2 vsldoi8 <7,0,1,2>, <7,0,1,2>
+ 3371500916U, // <1,2,7,1>: Cost 4 vmrglw <2,6,1,7>, <1,u,2,1>
+ 3365529192U, // <1,2,7,2>: Cost 4 vmrglw <1,6,1,7>, <2,2,2,2>
+ 2291785830U, // <1,2,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS
+ 2726253926U, // <1,2,7,4>: Cost 3 vsldoi8 <7,0,1,2>, <7,4,5,6>
+ 3788051845U, // <1,2,7,5>: Cost 4 vsldoi8 <5,0,1,2>, <7,5,0,1>
+ 3794023894U, // <1,2,7,6>: Cost 4 vsldoi8 <6,0,1,2>, <7,6,0,1>
+ 2726254119U, // <1,2,7,7>: Cost 3 vsldoi8 <7,0,1,2>, <7,7,0,1>
+ 1657820802U, // <1,2,7,u>: Cost 2 vsldoi8 <7,u,1,2>, <7,u,1,2>
+ 470638699U, // <1,2,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544381236U, // <1,2,u,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544382056U, // <1,2,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544382614U, // <1,2,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 470641974U, // <1,2,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1628625050U, // <1,2,u,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 1592160762U, // <1,2,u,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592161274U, // <1,2,u,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 470644526U, // <1,2,u,u>: Cost 1 vsldoi4 LHS, LHS
+ 2769389708U, // <1,3,0,0>: Cost 3 vsldoi12 <3,0,0,1>, <3,0,0,1>
+ 2685780070U, // <1,3,0,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2685780142U, // <1,3,0,2>: Cost 3 vsldoi8 <0,2,1,3>, <0,2,1,3>
+ 2686443775U, // <1,3,0,3>: Cost 3 vsldoi8 <0,3,1,3>, <0,3,1,3>
+ 2769684656U, // <1,3,0,4>: Cost 3 vsldoi12 <3,0,4,1>, <3,0,4,1>
+ 3357507940U, // <1,3,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,3,5>
+ 3759522294U, // <1,3,0,6>: Cost 4 vsldoi8 <0,2,1,3>, <0,6,1,7>
+ 3357509562U, // <1,3,0,7>: Cost 4 vmrglw <0,3,1,0>, <2,6,3,7>
+ 2685780637U, // <1,3,0,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2287092630U, // <1,3,1,0>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,0>
+ 2221312230U, // <1,3,1,1>: Cost 3 vmrghw <1,1,1,1>, <3,1,1,1>
+ 2691752839U, // <1,3,1,2>: Cost 3 vsldoi8 <1,2,1,3>, <1,2,1,3>
+ 2287093362U, // <1,3,1,3>: Cost 3 vmrglw <0,u,1,1>, <2,2,3,3>
+ 2287092634U, // <1,3,1,4>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,4>
+ 3360835107U, // <1,3,1,5>: Cost 4 vmrglw <0,u,1,1>, <2,1,3,5>
+ 3759523041U, // <1,3,1,6>: Cost 4 vsldoi8 <0,2,1,3>, <1,6,3,7>
+ 2287093690U, // <1,3,1,7>: Cost 3 vmrglw <0,u,1,1>, <2,6,3,7>
+ 2287092638U, // <1,3,1,u>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,u>
+ 2222114966U, // <1,3,2,0>: Cost 3 vmrghw <1,2,3,0>, <3,0,1,2>
+ 2222115057U, // <1,3,2,1>: Cost 3 vmrghw <1,2,3,0>, <3,1,2,3>
+ 2630092320U, // <1,3,2,2>: Cost 3 vsldoi4 <2,1,3,2>, <2,1,3,2>
+ 2685781670U, // <1,3,2,3>: Cost 3 vsldoi8 <0,2,1,3>, <2,3,0,1>
+ 2222115330U, // <1,3,2,4>: Cost 3 vmrghw <1,2,3,0>, <3,4,5,6>
+ 3373449572U, // <1,3,2,5>: Cost 4 vmrglw <3,0,1,2>, <0,4,3,5>
+ 2222115448U, // <1,3,2,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7>
+ 2299709370U, // <1,3,2,7>: Cost 3 vmrglw <3,0,1,2>, <2,6,3,7>
+ 2222115614U, // <1,3,2,u>: Cost 3 vmrghw <1,2,3,0>, <3,u,1,2>
+ 2771380607U, // <1,3,3,0>: Cost 3 vsldoi12 <3,3,0,1>, <3,3,0,1>
+ 3356874468U, // <1,3,3,1>: Cost 4 vmrglw <0,2,1,3>, <u,0,3,1>
+ 3759524168U, // <1,3,3,2>: Cost 4 vsldoi8 <0,2,1,3>, <3,2,3,0>
+ 2283792796U, // <1,3,3,3>: Cost 3 vmrglw <0,3,1,3>, <3,3,3,3>
+ 3356869530U, // <1,3,3,4>: Cost 4 vmrglw <0,2,1,3>, <1,2,3,4>
+ 3721760428U, // <1,3,3,5>: Cost 4 vsldoi4 <5,1,3,3>, <5,1,3,3>
+ 3296496248U, // <1,3,3,6>: Cost 4 vmrghw <1,3,2,6>, <3,6,0,7>
+ 3356870586U, // <1,3,3,7>: Cost 4 vmrglw <0,2,1,3>, <2,6,3,7>
+ 2771970503U, // <1,3,3,u>: Cost 3 vsldoi12 <3,3,u,1>, <3,3,u,1>
+ 2772044240U, // <1,3,4,0>: Cost 3 vsldoi12 <3,4,0,1>, <3,4,0,1>
+ 3362186135U, // <1,3,4,1>: Cost 4 vmrglw <1,1,1,4>, <1,2,3,1>
+ 3297151280U, // <1,3,4,2>: Cost 4 vmrghw <1,4,2,5>, <3,2,0,3>
+ 3357542002U, // <1,3,4,3>: Cost 4 vmrglw <0,3,1,4>, <2,2,3,3>
+ 3357540626U, // <1,3,4,4>: Cost 4 vmrglw <0,3,1,4>, <0,3,3,4>
+ 2685783350U, // <1,3,4,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 3357546622U, // <1,3,4,6>: Cost 4 vmrglw <0,3,1,4>, <u,5,3,6>
+ 3357542330U, // <1,3,4,7>: Cost 4 vmrglw <0,3,1,4>, <2,6,3,7>
+ 2685783593U, // <1,3,4,u>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 2284471190U, // <1,3,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,0>
+ 3358213015U, // <1,3,5,1>: Cost 4 vmrglw <0,4,1,5>, <1,2,3,1>
+ 2630116899U, // <1,3,5,2>: Cost 3 vsldoi4 <2,1,3,5>, <2,1,3,5>
+ 2284471922U, // <1,3,5,3>: Cost 3 vmrglw <0,4,1,5>, <2,2,3,3>
+ 2284471194U, // <1,3,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,4>
+ 2284471843U, // <1,3,5,5>: Cost 3 vmrglw <0,4,1,5>, <2,1,3,5>
+ 3358218366U, // <1,3,5,6>: Cost 4 vmrglw <0,4,1,5>, <u,5,3,6>
+ 2284472250U, // <1,3,5,7>: Cost 3 vmrglw <0,4,1,5>, <2,6,3,7>
+ 2284471198U, // <1,3,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,u>
+ 2224752790U, // <1,3,6,0>: Cost 3 vmrghw <1,6,2,7>, <3,0,1,2>
+ 3832736385U, // <1,3,6,1>: Cost 4 vsldoi12 <1,2,3,1>, <3,6,1,7>
+ 3703866916U, // <1,3,6,2>: Cost 4 vsldoi4 <2,1,3,6>, <2,1,3,6>
+ 3356894834U, // <1,3,6,3>: Cost 4 vmrglw <0,2,1,6>, <2,2,3,3>
+ 3356894106U, // <1,3,6,4>: Cost 4 vmrglw <0,2,1,6>, <1,2,3,4>
+ 3356894755U, // <1,3,6,5>: Cost 5 vmrglw <0,2,1,6>, <2,1,3,5>
+ 3356899130U, // <1,3,6,6>: Cost 4 vmrglw <0,2,1,6>, <u,1,3,6>
+ 2283153338U, // <1,3,6,7>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7>
+ 2283153338U, // <1,3,6,u>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7>
+ 2774035139U, // <1,3,7,0>: Cost 3 vsldoi12 <3,7,0,1>, <3,7,0,1>
+ 3703874767U, // <1,3,7,1>: Cost 4 vsldoi4 <2,1,3,7>, <1,6,1,7>
+ 3703875109U, // <1,3,7,2>: Cost 4 vsldoi4 <2,1,3,7>, <2,1,3,7>
+ 3365529202U, // <1,3,7,3>: Cost 4 vmrglw <1,6,1,7>, <2,2,3,3>
+ 3365528474U, // <1,3,7,4>: Cost 4 vmrglw <1,6,1,7>, <1,2,3,4>
+ 3789387159U, // <1,3,7,5>: Cost 4 vsldoi8 <5,2,1,3>, <7,5,2,1>
+ 3865692927U, // <1,3,7,6>: Cost 4 vsldoi12 <6,7,0,1>, <3,7,6,7>
+ 3363538874U, // <1,3,7,7>: Cost 4 vmrglw <1,3,1,7>, <2,6,3,7>
+ 2774625035U, // <1,3,7,u>: Cost 3 vsldoi12 <3,7,u,1>, <3,7,u,1>
+ 2284495766U, // <1,3,u,0>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,0>
+ 2685785902U, // <1,3,u,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2630141478U, // <1,3,u,2>: Cost 3 vsldoi4 <2,1,3,u>, <2,1,3,u>
+ 2283169880U, // <1,3,u,3>: Cost 3 vmrglw <0,2,1,u>, <2,u,3,3>
+ 2284495770U, // <1,3,u,4>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,4>
+ 2685786266U, // <1,3,u,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 2222115448U, // <1,3,u,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7>
+ 2284496826U, // <1,3,u,7>: Cost 3 vmrglw <0,4,1,u>, <2,6,3,7>
+ 2685786469U, // <1,3,u,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2684461069U, // <1,4,0,0>: Cost 3 vsldoi8 <0,0,1,4>, <0,0,1,4>
+ 2686451814U, // <1,4,0,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS
+ 3759530159U, // <1,4,0,2>: Cost 4 vsldoi8 <0,2,1,4>, <0,2,1,4>
+ 2686451968U, // <1,4,0,3>: Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4>
+ 2684461394U, // <1,4,0,4>: Cost 3 vsldoi8 <0,0,1,4>, <0,4,1,5>
+ 1701989266U, // <1,4,0,5>: Cost 2 vsldoi12 <4,0,5,1>, <4,0,5,1>
+ 3776119286U, // <1,4,0,6>: Cost 4 vsldoi8 <3,0,1,4>, <0,6,1,7>
+ 2689106500U, // <1,4,0,7>: Cost 3 vsldoi8 <0,7,1,4>, <0,7,1,4>
+ 1702210477U, // <1,4,0,u>: Cost 2 vsldoi12 <4,0,u,1>, <4,0,u,1>
+ 2221312914U, // <1,4,1,0>: Cost 3 vmrghw <1,1,1,1>, <4,0,5,1>
+ 2691097399U, // <1,4,1,1>: Cost 3 vsldoi8 <1,1,1,4>, <1,1,1,4>
+ 3760194454U, // <1,4,1,2>: Cost 4 vsldoi8 <0,3,1,4>, <1,2,3,0>
+ 3766166489U, // <1,4,1,3>: Cost 4 vsldoi8 <1,3,1,4>, <1,3,1,4>
+ 2334870736U, // <1,4,1,4>: Cost 3 vmrglw <u,u,1,1>, <4,4,4,4>
+ 1147571510U, // <1,4,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 3760194794U, // <1,4,1,6>: Cost 4 vsldoi8 <0,3,1,4>, <1,6,4,7>
+ 3867315188U, // <1,4,1,7>: Cost 4 vsldoi12 <7,0,4,1>, <4,1,7,0>
+ 1147571753U, // <1,4,1,u>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 2222115730U, // <1,4,2,0>: Cost 3 vmrghw <1,2,3,0>, <4,0,5,1>
+ 2222115812U, // <1,4,2,1>: Cost 3 vmrghw <1,2,3,0>, <4,1,5,2>
+ 3760195176U, // <1,4,2,2>: Cost 4 vsldoi8 <0,3,1,4>, <2,2,2,2>
+ 2702378662U, // <1,4,2,3>: Cost 3 vsldoi8 <3,0,1,4>, <2,3,0,1>
+ 2323598544U, // <1,4,2,4>: Cost 3 vmrglw <7,0,1,2>, <4,4,4,4>
+ 1148374326U, // <1,4,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 3760195514U, // <1,4,2,6>: Cost 4 vsldoi8 <0,3,1,4>, <2,6,3,7>
+ 3373451932U, // <1,4,2,7>: Cost 4 vmrglw <3,0,1,2>, <3,6,4,7>
+ 1148374569U, // <1,4,2,u>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 2702379160U, // <1,4,3,0>: Cost 3 vsldoi8 <3,0,1,4>, <3,0,1,4>
+ 3760195840U, // <1,4,3,1>: Cost 4 vsldoi8 <0,3,1,4>, <3,1,4,0>
+ 3776121160U, // <1,4,3,2>: Cost 4 vsldoi8 <3,0,1,4>, <3,2,3,0>
+ 3760195996U, // <1,4,3,3>: Cost 4 vsldoi8 <0,3,1,4>, <3,3,3,3>
+ 2686454274U, // <1,4,3,4>: Cost 3 vsldoi8 <0,3,1,4>, <3,4,5,6>
+ 3356870350U, // <1,4,3,5>: Cost 4 vmrglw <0,2,1,3>, <2,3,4,5>
+ 3800009392U, // <1,4,3,6>: Cost 4 vsldoi8 <7,0,1,4>, <3,6,7,0>
+ 3366824604U, // <1,4,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,4,7>
+ 2707688224U, // <1,4,3,u>: Cost 3 vsldoi8 <3,u,1,4>, <3,u,1,4>
+ 2775731368U, // <1,4,4,0>: Cost 3 vsldoi12 <4,0,5,1>, <4,4,0,0>
+ 3830820018U, // <1,4,4,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,4,1,1>
+ 3691980454U, // <1,4,4,2>: Cost 4 vsldoi4 <0,1,4,4>, <2,3,0,1>
+ 3357541282U, // <1,4,4,3>: Cost 4 vmrglw <0,3,1,4>, <1,2,4,3>
+ 2781039824U, // <1,4,4,4>: Cost 3 vsldoi12 <4,u,5,1>, <4,4,4,4>
+ 2686455094U, // <1,4,4,5>: Cost 3 vsldoi8 <0,3,1,4>, RHS
+ 3357541528U, // <1,4,4,6>: Cost 4 vmrglw <0,3,1,4>, <1,5,4,6>
+ 3810627020U, // <1,4,4,7>: Cost 4 vsldoi8 <u,7,1,4>, <4,7,5,4>
+ 2686455337U, // <1,4,4,u>: Cost 3 vsldoi8 <0,3,1,4>, RHS
+ 2624217190U, // <1,4,5,0>: Cost 3 vsldoi4 <1,1,4,5>, LHS
+ 2284470309U, // <1,4,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,1>
+ 2618246822U, // <1,4,5,2>: Cost 3 vsldoi4 <0,1,4,5>, <2,3,0,1>
+ 3358212297U, // <1,4,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,4,3>
+ 2284470312U, // <1,4,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,4>
+ 2284470637U, // <1,4,5,5>: Cost 3 vmrglw <0,4,1,5>, <0,4,4,5>
+ 1683115318U, // <1,4,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 3721851898U, // <1,4,5,7>: Cost 4 vsldoi4 <5,1,4,5>, <7,0,1,2>
+ 1683115336U, // <1,4,5,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 3794039075U, // <1,4,6,0>: Cost 4 vsldoi8 <6,0,1,4>, <6,0,1,4>
+ 3830820186U, // <1,4,6,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,6,1,7>
+ 3800011258U, // <1,4,6,2>: Cost 4 vsldoi8 <7,0,1,4>, <6,2,7,3>
+ 3807973938U, // <1,4,6,3>: Cost 4 vsldoi8 <u,3,1,4>, <6,3,4,5>
+ 3298716880U, // <1,4,6,4>: Cost 4 vmrghw <1,6,5,7>, <4,4,4,4>
+ 2224680246U, // <1,4,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 3800011576U, // <1,4,6,6>: Cost 4 vsldoi8 <7,0,1,4>, <6,6,6,6>
+ 2726269774U, // <1,4,6,7>: Cost 3 vsldoi8 <7,0,1,4>, <6,7,0,1>
+ 2224680489U, // <1,4,6,u>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 2726269948U, // <1,4,7,0>: Cost 3 vsldoi8 <7,0,1,4>, <7,0,1,4>
+ 3383444141U, // <1,4,7,1>: Cost 4 vmrglw <4,6,1,7>, <0,u,4,1>
+ 3805983961U, // <1,4,7,2>: Cost 4 vsldoi8 <u,0,1,4>, <7,2,u,0>
+ 3807974667U, // <1,4,7,3>: Cost 4 vsldoi8 <u,3,1,4>, <7,3,4,5>
+ 2736887142U, // <1,4,7,4>: Cost 3 vsldoi8 <u,7,1,4>, <7,4,5,6>
+ 3365528403U, // <1,4,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,1,4,5>
+ 3800012308U, // <1,4,7,6>: Cost 4 vsldoi8 <7,0,1,4>, <7,6,7,0>
+ 3800012396U, // <1,4,7,7>: Cost 4 vsldoi8 <7,0,1,4>, <7,7,7,7>
+ 2731579012U, // <1,4,7,u>: Cost 3 vsldoi8 <7,u,1,4>, <7,u,1,4>
+ 2624241766U, // <1,4,u,0>: Cost 3 vsldoi4 <1,1,4,u>, LHS
+ 2686457646U, // <1,4,u,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS
+ 2618271398U, // <1,4,u,2>: Cost 3 vsldoi4 <0,1,4,u>, <2,3,0,1>
+ 2734233544U, // <1,4,u,3>: Cost 3 vsldoi8 <u,3,1,4>, <u,3,1,4>
+ 2689775679U, // <1,4,u,4>: Cost 3 vsldoi8 <0,u,1,4>, <u,4,5,6>
+ 1152355638U, // <1,4,u,5>: Cost 2 vmrghw <1,u,3,0>, RHS
+ 1683115561U, // <1,4,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 2736888076U, // <1,4,u,7>: Cost 3 vsldoi8 <u,7,1,4>, <u,7,1,4>
+ 1683115579U, // <1,4,u,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 2687123456U, // <1,5,0,0>: Cost 3 vsldoi8 <0,4,1,5>, <0,0,0,0>
+ 1613381734U, // <1,5,0,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 3759538352U, // <1,5,0,2>: Cost 4 vsldoi8 <0,2,1,5>, <0,2,1,5>
+ 3760865532U, // <1,5,0,3>: Cost 4 vsldoi8 <0,4,1,5>, <0,3,1,0>
+ 1613381970U, // <1,5,0,4>: Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5>
+ 2687787427U, // <1,5,0,5>: Cost 3 vsldoi8 <0,5,1,5>, <0,5,1,5>
+ 2781777524U, // <1,5,0,6>: Cost 3 vsldoi12 <5,0,6,1>, <5,0,6,1>
+ 3733828717U, // <1,5,0,7>: Cost 4 vsldoi4 <7,1,5,0>, <7,1,5,0>
+ 1613382301U, // <1,5,0,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 2781040271U, // <1,5,1,0>: Cost 3 vsldoi12 <4,u,5,1>, <5,1,0,1>
+ 2687124276U, // <1,5,1,1>: Cost 3 vsldoi8 <0,4,1,5>, <1,1,1,1>
+ 2687124374U, // <1,5,1,2>: Cost 3 vsldoi8 <0,4,1,5>, <1,2,3,0>
+ 3760866297U, // <1,5,1,3>: Cost 4 vsldoi8 <0,4,1,5>, <1,3,5,0>
+ 2693096491U, // <1,5,1,4>: Cost 3 vsldoi8 <1,4,1,5>, <1,4,1,5>
+ 2687124591U, // <1,5,1,5>: Cost 3 vsldoi8 <0,4,1,5>, <1,5,0,1>
+ 2687124723U, // <1,5,1,6>: Cost 3 vsldoi8 <0,4,1,5>, <1,6,5,7>
+ 3360834803U, // <1,5,1,7>: Cost 4 vmrglw <0,u,1,1>, <1,6,5,7>
+ 2687124860U, // <1,5,1,u>: Cost 3 vsldoi8 <0,4,1,5>, <1,u,3,0>
+ 2323598792U, // <1,5,2,0>: Cost 3 vmrglw <7,0,1,2>, <4,7,5,0>
+ 2687125027U, // <1,5,2,1>: Cost 3 vsldoi8 <0,4,1,5>, <2,1,3,5>
+ 2687125096U, // <1,5,2,2>: Cost 3 vsldoi8 <0,4,1,5>, <2,2,2,2>
+ 2687125158U, // <1,5,2,3>: Cost 3 vsldoi8 <0,4,1,5>, <2,3,0,1>
+ 2642185188U, // <1,5,2,4>: Cost 3 vsldoi4 <4,1,5,2>, <4,1,5,2>
+ 2323598554U, // <1,5,2,5>: Cost 3 vmrglw <7,0,1,2>, <4,4,5,5>
+ 2687125434U, // <1,5,2,6>: Cost 3 vsldoi8 <0,4,1,5>, <2,6,3,7>
+ 3373450483U, // <1,5,2,7>: Cost 4 vmrglw <3,0,1,2>, <1,6,5,7>
+ 2687125563U, // <1,5,2,u>: Cost 3 vsldoi8 <0,4,1,5>, <2,u,0,1>
+ 2687125654U, // <1,5,3,0>: Cost 3 vsldoi8 <0,4,1,5>, <3,0,1,2>
+ 2312990234U, // <1,5,3,1>: Cost 3 vmrglw <5,2,1,3>, <4,u,5,1>
+ 3760867649U, // <1,5,3,2>: Cost 4 vsldoi8 <0,4,1,5>, <3,2,2,2>
+ 2687125916U, // <1,5,3,3>: Cost 3 vsldoi8 <0,4,1,5>, <3,3,3,3>
+ 2687126018U, // <1,5,3,4>: Cost 3 vsldoi8 <0,4,1,5>, <3,4,5,6>
+ 3386731738U, // <1,5,3,5>: Cost 4 vmrglw <5,2,1,3>, <4,4,5,5>
+ 3356871170U, // <1,5,3,6>: Cost 4 vmrglw <0,2,1,3>, <3,4,5,6>
+ 3808643779U, // <1,5,3,7>: Cost 4 vsldoi8 <u,4,1,5>, <3,7,0,1>
+ 2687126302U, // <1,5,3,u>: Cost 3 vsldoi8 <0,4,1,5>, <3,u,1,2>
+ 2642198630U, // <1,5,4,0>: Cost 3 vsldoi4 <4,1,5,4>, LHS
+ 2687126498U, // <1,5,4,1>: Cost 3 vsldoi8 <0,4,1,5>, <4,1,5,0>
+ 3715941923U, // <1,5,4,2>: Cost 4 vsldoi4 <4,1,5,4>, <2,1,3,5>
+ 3709970701U, // <1,5,4,3>: Cost 4 vsldoi4 <3,1,5,4>, <3,1,5,4>
+ 2687126736U, // <1,5,4,4>: Cost 3 vsldoi8 <0,4,1,5>, <4,4,4,4>
+ 1613385014U, // <1,5,4,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2283801090U, // <1,5,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6>
+ 3733861489U, // <1,5,4,7>: Cost 4 vsldoi4 <7,1,5,4>, <7,1,5,4>
+ 1613385257U, // <1,5,4,u>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2624290918U, // <1,5,5,0>: Cost 3 vsldoi4 <1,1,5,5>, LHS
+ 2624291676U, // <1,5,5,1>: Cost 3 vsldoi4 <1,1,5,5>, <1,1,5,5>
+ 3698034211U, // <1,5,5,2>: Cost 4 vsldoi4 <1,1,5,5>, <2,1,3,5>
+ 2284471211U, // <1,5,5,3>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,3>
+ 2624294198U, // <1,5,5,4>: Cost 3 vsldoi4 <1,1,5,5>, RHS
+ 2284471132U, // <1,5,5,5>: Cost 3 vmrglw <0,4,1,5>, <1,1,5,5>
+ 2284472834U, // <1,5,5,6>: Cost 3 vmrglw <0,4,1,5>, <3,4,5,6>
+ 2284471539U, // <1,5,5,7>: Cost 3 vmrglw <0,4,1,5>, <1,6,5,7>
+ 2284471216U, // <1,5,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,u>
+ 2785316900U, // <1,5,6,0>: Cost 3 vsldoi12 <5,6,0,1>, <5,6,0,1>
+ 2781040691U, // <1,5,6,1>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,1,7>
+ 2734903802U, // <1,5,6,2>: Cost 3 vsldoi8 <u,4,1,5>, <6,2,7,3>
+ 3848736834U, // <1,5,6,3>: Cost 4 vsldoi12 <3,u,4,1>, <5,6,3,4>
+ 3298717620U, // <1,5,6,4>: Cost 4 vmrghw <1,6,5,7>, <5,4,5,6>
+ 3298717700U, // <1,5,6,5>: Cost 4 vmrghw <1,6,5,7>, <5,5,5,5>
+ 2734904120U, // <1,5,6,6>: Cost 3 vsldoi8 <u,4,1,5>, <6,6,6,6>
+ 2781040738U, // <1,5,6,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,7,0>
+ 2781040747U, // <1,5,6,u>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,u,0>
+ 2734904314U, // <1,5,7,0>: Cost 3 vsldoi8 <u,4,1,5>, <7,0,1,2>
+ 2315677210U, // <1,5,7,1>: Cost 3 vmrglw <5,6,1,7>, <4,u,5,1>
+ 3808646292U, // <1,5,7,2>: Cost 4 vsldoi8 <u,4,1,5>, <7,2,0,3>
+ 3808646371U, // <1,5,7,3>: Cost 4 vsldoi8 <u,4,1,5>, <7,3,0,1>
+ 2734904678U, // <1,5,7,4>: Cost 3 vsldoi8 <u,4,1,5>, <7,4,5,6>
+ 3389418714U, // <1,5,7,5>: Cost 4 vmrglw <5,6,1,7>, <4,4,5,5>
+ 3365528656U, // <1,5,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,4,5,6>
+ 2734904940U, // <1,5,7,7>: Cost 3 vsldoi8 <u,4,1,5>, <7,7,7,7>
+ 2734904962U, // <1,5,7,u>: Cost 3 vsldoi8 <u,4,1,5>, <7,u,1,2>
+ 2687129299U, // <1,5,u,0>: Cost 3 vsldoi8 <0,4,1,5>, <u,0,1,2>
+ 1613387566U, // <1,5,u,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 2687129480U, // <1,5,u,2>: Cost 3 vsldoi8 <0,4,1,5>, <u,2,3,3>
+ 2687129532U, // <1,5,u,3>: Cost 3 vsldoi8 <0,4,1,5>, <u,3,0,1>
+ 1661163546U, // <1,5,u,4>: Cost 2 vsldoi8 <u,4,1,5>, <u,4,1,5>
+ 1613387930U, // <1,5,u,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2687129808U, // <1,5,u,6>: Cost 3 vsldoi8 <0,4,1,5>, <u,6,3,7>
+ 2781040900U, // <1,5,u,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,u,7,0>
+ 1613388133U, // <1,5,u,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 3759546368U, // <1,6,0,0>: Cost 4 vsldoi8 <0,2,1,6>, <0,0,0,0>
+ 2685804646U, // <1,6,0,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 2685804721U, // <1,6,0,2>: Cost 3 vsldoi8 <0,2,1,6>, <0,2,1,6>
+ 3861270834U, // <1,6,0,3>: Cost 4 vsldoi12 <6,0,3,1>, <6,0,3,1>
+ 3759546706U, // <1,6,0,4>: Cost 4 vsldoi8 <0,2,1,6>, <0,4,1,5>
+ 2687795620U, // <1,6,0,5>: Cost 3 vsldoi8 <0,5,1,6>, <0,5,1,6>
+ 2688459253U, // <1,6,0,6>: Cost 3 vsldoi8 <0,6,1,6>, <0,6,1,6>
+ 2283769142U, // <1,6,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS
+ 2685805213U, // <1,6,0,u>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 3698073702U, // <1,6,1,0>: Cost 4 vsldoi4 <1,1,6,1>, LHS
+ 3759547188U, // <1,6,1,1>: Cost 4 vsldoi8 <0,2,1,6>, <1,1,1,1>
+ 2221314554U, // <1,6,1,2>: Cost 3 vmrghw <1,1,1,1>, <6,2,7,3>
+ 3759547401U, // <1,6,1,3>: Cost 4 vsldoi8 <0,2,1,6>, <1,3,6,7>
+ 3698076982U, // <1,6,1,4>: Cost 4 vsldoi4 <1,1,6,1>, RHS
+ 3767510141U, // <1,6,1,5>: Cost 4 vsldoi8 <1,5,1,6>, <1,5,1,6>
+ 2334872376U, // <1,6,1,6>: Cost 3 vmrglw <u,u,1,1>, <6,6,6,6>
+ 1213353270U, // <1,6,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 1213353271U, // <1,6,1,u>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 3704053862U, // <1,6,2,0>: Cost 4 vsldoi4 <2,1,6,2>, LHS
+ 3759547961U, // <1,6,2,1>: Cost 4 vsldoi8 <0,2,1,6>, <2,1,6,0>
+ 2222117370U, // <1,6,2,2>: Cost 3 vmrghw <1,2,3,0>, <6,2,7,3>
+ 3759548070U, // <1,6,2,3>: Cost 4 vsldoi8 <0,2,1,6>, <2,3,0,1>
+ 3704057142U, // <1,6,2,4>: Cost 4 vsldoi4 <2,1,6,2>, RHS
+ 3373451057U, // <1,6,2,5>: Cost 4 vmrglw <3,0,1,2>, <2,4,6,5>
+ 2685806522U, // <1,6,2,6>: Cost 3 vsldoi8 <0,2,1,6>, <2,6,3,7>
+ 1225968950U, // <1,6,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 1225968951U, // <1,6,2,u>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 3759548566U, // <1,6,3,0>: Cost 4 vsldoi8 <0,2,1,6>, <3,0,1,2>
+ 3842912793U, // <1,6,3,1>: Cost 4 vsldoi12 <2,u,6,1>, <6,3,1,7>
+ 3759548774U, // <1,6,3,2>: Cost 4 vsldoi8 <0,2,1,6>, <3,2,6,3>
+ 3759548828U, // <1,6,3,3>: Cost 4 vsldoi8 <0,2,1,6>, <3,3,3,3>
+ 3759548930U, // <1,6,3,4>: Cost 4 vsldoi8 <0,2,1,6>, <3,4,5,6>
+ 3809315421U, // <1,6,3,5>: Cost 4 vsldoi8 <u,5,1,6>, <3,5,6,7>
+ 3386733368U, // <1,6,3,6>: Cost 4 vmrglw <5,2,1,3>, <6,6,6,6>
+ 2283130166U, // <1,6,3,7>: Cost 3 vmrglw <0,2,1,3>, RHS
+ 2283130167U, // <1,6,3,u>: Cost 3 vmrglw <0,2,1,3>, RHS
+ 3704070246U, // <1,6,4,0>: Cost 4 vsldoi4 <2,1,6,4>, LHS
+ 3862229608U, // <1,6,4,1>: Cost 4 vsldoi12 <6,1,7,1>, <6,4,1,5>
+ 3704071741U, // <1,6,4,2>: Cost 4 vsldoi4 <2,1,6,4>, <2,1,6,4>
+ 3721988610U, // <1,6,4,3>: Cost 4 vsldoi4 <5,1,6,4>, <3,4,5,6>
+ 3704073526U, // <1,6,4,4>: Cost 4 vsldoi4 <2,1,6,4>, RHS
+ 2685807926U, // <1,6,4,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 3865621141U, // <1,6,4,6>: Cost 4 vsldoi12 <6,6,u,1>, <6,4,6,5>
+ 2283801910U, // <1,6,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS
+ 2685808169U, // <1,6,4,u>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 3710050406U, // <1,6,5,0>: Cost 4 vsldoi4 <3,1,6,5>, LHS
+ 3710051571U, // <1,6,5,1>: Cost 4 vsldoi4 <3,1,6,5>, <1,6,5,7>
+ 3405989597U, // <1,6,5,2>: Cost 4 vmrglw <u,4,1,5>, <2,3,6,2>
+ 3358214502U, // <1,6,5,3>: Cost 4 vmrglw <0,4,1,5>, <3,2,6,3>
+ 3710053686U, // <1,6,5,4>: Cost 4 vsldoi4 <3,1,6,5>, RHS
+ 3721998025U, // <1,6,5,5>: Cost 4 vsldoi4 <5,1,6,5>, <5,1,6,5>
+ 2332250936U, // <1,6,5,6>: Cost 3 vmrglw <u,4,1,5>, <6,6,6,6>
+ 1210731830U, // <1,6,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 1210731831U, // <1,6,5,u>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 2791289597U, // <1,6,6,0>: Cost 3 vsldoi12 <6,6,0,1>, <6,6,0,1>
+ 3698115430U, // <1,6,6,1>: Cost 4 vsldoi4 <1,1,6,6>, <1,1,6,6>
+ 3698116538U, // <1,6,6,2>: Cost 4 vsldoi4 <1,1,6,6>, <2,6,3,7>
+ 3356894132U, // <1,6,6,3>: Cost 4 vmrglw <0,2,1,6>, <1,2,6,3>
+ 3698117942U, // <1,6,6,4>: Cost 4 vsldoi4 <1,1,6,6>, RHS
+ 3722006218U, // <1,6,6,5>: Cost 4 vsldoi4 <5,1,6,6>, <5,1,6,6>
+ 2781041464U, // <1,6,6,6>: Cost 3 vsldoi12 <4,u,5,1>, <6,6,6,6>
+ 2283154742U, // <1,6,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 2283154743U, // <1,6,6,u>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 1718211406U, // <1,6,7,0>: Cost 2 vsldoi12 <6,7,0,1>, <6,7,0,1>
+ 2792026967U, // <1,6,7,1>: Cost 3 vsldoi12 <6,7,1,1>, <6,7,1,1>
+ 2765411170U, // <1,6,7,2>: Cost 3 vsldoi12 <2,3,0,1>, <6,7,2,3>
+ 3854783336U, // <1,6,7,3>: Cost 4 vsldoi12 <4,u,5,1>, <6,7,3,0>
+ 2781041526U, // <1,6,7,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,7,4,5>
+ 3365528664U, // <1,6,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,4,6,5>
+ 2791953290U, // <1,6,7,6>: Cost 3 vsldoi12 <6,7,0,1>, <6,7,6,7>
+ 2291789110U, // <1,6,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS
+ 1718801302U, // <1,6,7,u>: Cost 2 vsldoi12 <6,7,u,1>, <6,7,u,1>
+ 1718875039U, // <1,6,u,0>: Cost 2 vsldoi12 <6,u,0,1>, <6,u,0,1>
+ 2685810478U, // <1,6,u,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 2792764337U, // <1,6,u,2>: Cost 3 vsldoi12 <6,u,2,1>, <6,u,2,1>
+ 3759552444U, // <1,6,u,3>: Cost 4 vsldoi8 <0,2,1,6>, <u,3,0,1>
+ 2781041607U, // <1,6,u,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,u,4,5>
+ 2685810842U, // <1,6,u,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 2689792208U, // <1,6,u,6>: Cost 3 vsldoi8 <0,u,1,6>, <u,6,3,7>
+ 1210756406U, // <1,6,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 1210756407U, // <1,6,u,u>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 2793280496U, // <1,7,0,0>: Cost 3 vsldoi12 <7,0,0,1>, <7,0,0,1>
+ 2694439014U, // <1,7,0,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 3393343912U, // <1,7,0,2>: Cost 4 vmrglw <6,3,1,0>, <6,1,7,2>
+ 3397325306U, // <1,7,0,3>: Cost 4 vmrglw <7,0,1,0>, <6,2,7,3>
+ 2793575444U, // <1,7,0,4>: Cost 3 vsldoi12 <7,0,4,1>, <7,0,4,1>
+ 3722030797U, // <1,7,0,5>: Cost 4 vsldoi4 <5,1,7,0>, <5,1,7,0>
+ 2688467446U, // <1,7,0,6>: Cost 3 vsldoi8 <0,6,1,7>, <0,6,1,7>
+ 2689131079U, // <1,7,0,7>: Cost 3 vsldoi8 <0,7,1,7>, <0,7,1,7>
+ 2694439570U, // <1,7,0,u>: Cost 3 vsldoi8 <1,6,1,7>, <0,u,1,1>
+ 2654265354U, // <1,7,1,0>: Cost 3 vsldoi4 <6,1,7,1>, <0,0,1,1>
+ 2794017866U, // <1,7,1,1>: Cost 3 vsldoi12 <7,1,1,1>, <7,1,1,1>
+ 3768181639U, // <1,7,1,2>: Cost 4 vsldoi8 <1,6,1,7>, <1,2,1,3>
+ 2334872058U, // <1,7,1,3>: Cost 3 vmrglw <u,u,1,1>, <6,2,7,3>
+ 2654268726U, // <1,7,1,4>: Cost 3 vsldoi4 <6,1,7,1>, RHS
+ 3792069797U, // <1,7,1,5>: Cost 4 vsldoi8 <5,6,1,7>, <1,5,6,1>
+ 2694440143U, // <1,7,1,6>: Cost 3 vsldoi8 <1,6,1,7>, <1,6,1,7>
+ 2334872386U, // <1,7,1,7>: Cost 3 vmrglw <u,u,1,1>, <6,6,7,7>
+ 2695767409U, // <1,7,1,u>: Cost 3 vsldoi8 <1,u,1,7>, <1,u,1,7>
+ 2654273638U, // <1,7,2,0>: Cost 3 vsldoi4 <6,1,7,2>, LHS
+ 2222117973U, // <1,7,2,1>: Cost 3 vmrghw <1,2,3,0>, <7,1,2,3>
+ 2299711912U, // <1,7,2,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2>
+ 2654275734U, // <1,7,2,3>: Cost 3 vsldoi4 <6,1,7,2>, <3,0,1,2>
+ 2654276918U, // <1,7,2,4>: Cost 3 vsldoi4 <6,1,7,2>, RHS
+ 3385397675U, // <1,7,2,5>: Cost 4 vmrglw <5,0,1,2>, <6,1,7,5>
+ 2654278056U, // <1,7,2,6>: Cost 3 vsldoi4 <6,1,7,2>, <6,1,7,2>
+ 2323599627U, // <1,7,2,7>: Cost 3 vmrglw <7,0,1,2>, <5,u,7,7>
+ 2654279470U, // <1,7,2,u>: Cost 3 vsldoi4 <6,1,7,2>, LHS
+ 2795271395U, // <1,7,3,0>: Cost 3 vsldoi12 <7,3,0,1>, <7,3,0,1>
+ 3768183059U, // <1,7,3,1>: Cost 4 vsldoi8 <1,6,1,7>, <3,1,6,1>
+ 3728025254U, // <1,7,3,2>: Cost 4 vsldoi4 <6,1,7,3>, <2,3,0,1>
+ 3768183196U, // <1,7,3,3>: Cost 4 vsldoi8 <1,6,1,7>, <3,3,3,3>
+ 3768183298U, // <1,7,3,4>: Cost 4 vsldoi8 <1,6,1,7>, <3,4,5,6>
+ 3792071255U, // <1,7,3,5>: Cost 4 vsldoi8 <5,6,1,7>, <3,5,6,1>
+ 3780127361U, // <1,7,3,6>: Cost 4 vsldoi8 <3,6,1,7>, <3,6,1,7>
+ 3847779617U, // <1,7,3,7>: Cost 4 vsldoi12 <3,7,0,1>, <7,3,7,0>
+ 2795861291U, // <1,7,3,u>: Cost 3 vsldoi12 <7,3,u,1>, <7,3,u,1>
+ 2795935028U, // <1,7,4,0>: Cost 3 vsldoi12 <7,4,0,1>, <7,4,0,1>
+ 3728032975U, // <1,7,4,1>: Cost 4 vsldoi4 <6,1,7,4>, <1,6,1,7>
+ 3839153480U, // <1,7,4,2>: Cost 4 vsldoi12 <2,3,0,1>, <7,4,2,3>
+ 3397358074U, // <1,7,4,3>: Cost 4 vmrglw <7,0,1,4>, <6,2,7,3>
+ 3854783835U, // <1,7,4,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,4,4,4>
+ 2694442294U, // <1,7,4,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 3786100058U, // <1,7,4,6>: Cost 4 vsldoi8 <4,6,1,7>, <4,6,1,7>
+ 3722065254U, // <1,7,4,7>: Cost 4 vsldoi4 <5,1,7,4>, <7,4,5,6>
+ 2694442537U, // <1,7,4,u>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 2654298214U, // <1,7,5,0>: Cost 3 vsldoi4 <6,1,7,5>, LHS
+ 3854783893U, // <1,7,5,1>: Cost 4 vsldoi12 <4,u,5,1>, <7,5,1,u>
+ 3710126010U, // <1,7,5,2>: Cost 4 vsldoi4 <3,1,7,5>, <2,6,3,7>
+ 2332250618U, // <1,7,5,3>: Cost 3 vmrglw <u,4,1,5>, <6,2,7,3>
+ 2654301494U, // <1,7,5,4>: Cost 3 vsldoi4 <6,1,7,5>, RHS
+ 2284474795U, // <1,7,5,5>: Cost 3 vmrglw <0,4,1,5>, <6,1,7,5>
+ 2718330931U, // <1,7,5,6>: Cost 3 vsldoi8 <5,6,1,7>, <5,6,1,7>
+ 2332250946U, // <1,7,5,7>: Cost 3 vmrglw <u,4,1,5>, <6,6,7,7>
+ 2719658197U, // <1,7,5,u>: Cost 3 vsldoi8 <5,u,1,7>, <5,u,1,7>
+ 2332921954U, // <1,7,6,0>: Cost 3 vmrglw <u,5,1,6>, <5,6,7,0>
+ 3768185254U, // <1,7,6,1>: Cost 4 vsldoi8 <1,6,1,7>, <6,1,7,0>
+ 3710134202U, // <1,7,6,2>: Cost 4 vsldoi4 <3,1,7,6>, <2,6,3,7>
+ 3710134561U, // <1,7,6,3>: Cost 4 vsldoi4 <3,1,7,6>, <3,1,7,6>
+ 3710135606U, // <1,7,6,4>: Cost 4 vsldoi4 <3,1,7,6>, RHS
+ 3864884745U, // <1,7,6,5>: Cost 4 vsldoi12 <6,5,7,1>, <7,6,5,7>
+ 3854784017U, // <1,7,6,6>: Cost 4 vsldoi12 <4,u,5,1>, <7,6,6,6>
+ 2791953940U, // <1,7,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <7,6,7,0>
+ 2792617501U, // <1,7,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <7,6,u,0>
+ 2797925927U, // <1,7,7,0>: Cost 3 vsldoi12 <7,7,0,1>, <7,7,0,1>
+ 3365528426U, // <1,7,7,1>: Cost 4 vmrglw <1,6,1,7>, <1,1,7,1>
+ 3728058022U, // <1,7,7,2>: Cost 4 vsldoi4 <6,1,7,7>, <2,3,0,1>
+ 3365528509U, // <1,7,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,7,3>
+ 3854784079U, // <1,7,7,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,7,4,5>
+ 3722088148U, // <1,7,7,5>: Cost 4 vsldoi4 <5,1,7,7>, <5,1,7,7>
+ 3728060845U, // <1,7,7,6>: Cost 4 vsldoi4 <6,1,7,7>, <6,1,7,7>
+ 2781042284U, // <1,7,7,7>: Cost 3 vsldoi12 <4,u,5,1>, <7,7,7,7>
+ 2798515823U, // <1,7,7,u>: Cost 3 vsldoi12 <7,7,u,1>, <7,7,u,1>
+ 2654322705U, // <1,7,u,0>: Cost 3 vsldoi4 <6,1,7,u>, <0,0,1,u>
+ 2694444846U, // <1,7,u,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 2299711912U, // <1,7,u,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2>
+ 2323649018U, // <1,7,u,3>: Cost 3 vmrglw <7,0,1,u>, <6,2,7,3>
+ 2654326070U, // <1,7,u,4>: Cost 3 vsldoi4 <6,1,7,u>, RHS
+ 2694445210U, // <1,7,u,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 2654327214U, // <1,7,u,6>: Cost 3 vsldoi4 <6,1,7,u>, <6,1,7,u>
+ 2323649346U, // <1,7,u,7>: Cost 3 vmrglw <7,0,1,u>, <6,6,7,7>
+ 2694445413U, // <1,7,u,u>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 1610752017U, // <1,u,0,0>: Cost 2 vsldoi8 <0,0,1,u>, <0,0,1,u>
+ 1613406310U, // <1,u,0,1>: Cost 2 vsldoi8 <0,4,1,u>, LHS
+ 2685821107U, // <1,u,0,2>: Cost 3 vsldoi8 <0,2,1,u>, <0,2,1,u>
+ 2283765916U, // <1,u,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS
+ 1613406549U, // <1,u,0,4>: Cost 2 vsldoi8 <0,4,1,u>, <0,4,1,u>
+ 1725880054U, // <1,u,0,5>: Cost 2 vsldoi12 <u,0,5,1>, <u,0,5,1>
+ 2688475639U, // <1,u,0,6>: Cost 3 vsldoi8 <0,6,1,u>, <0,6,1,u>
+ 2283769160U, // <1,u,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS
+ 1613406877U, // <1,u,0,u>: Cost 2 vsldoi8 <0,4,1,u>, LHS
+ 1550221414U, // <1,u,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,u,1,1>: Cost 1 vspltisw1 LHS
+ 1683117870U, // <1,u,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 1213350044U, // <1,u,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 1550224694U, // <1,u,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 1147574426U, // <1,u,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 2687149326U, // <1,u,1,6>: Cost 3 vsldoi8 <0,4,1,u>, <1,6,u,7>
+ 1213353288U, // <1,u,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 269271142U, // <1,u,1,u>: Cost 1 vspltisw1 LHS
+ 2222118611U, // <1,u,2,0>: Cost 3 vmrghw <1,2,3,0>, <u,0,1,2>
+ 1148376878U, // <1,u,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 1148371862U, // <1,u,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 1225965724U, // <1,u,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 2222118975U, // <1,u,2,4>: Cost 3 vmrghw <1,2,3,0>, <u,4,5,6>
+ 1148377242U, // <1,u,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 2687150010U, // <1,u,2,6>: Cost 3 vsldoi8 <0,4,1,u>, <2,6,3,7>
+ 1225968968U, // <1,u,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 1148377445U, // <1,u,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 471040156U, // <1,u,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544782644U, // <1,u,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544783464U, // <1,u,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544784022U, // <1,u,3,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 471043382U, // <1,u,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1592561668U, // <1,u,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1592562170U, // <1,u,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592562682U, // <1,u,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 471045934U, // <1,u,3,u>: Cost 1 vsldoi4 LHS, LHS
+ 2708384629U, // <1,u,4,0>: Cost 3 vsldoi8 <4,0,1,u>, <4,0,1,u>
+ 2687151101U, // <1,u,4,1>: Cost 3 vsldoi8 <0,4,1,u>, <4,1,u,0>
+ 2223408022U, // <1,u,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0>
+ 2283798684U, // <1,u,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS
+ 2642422785U, // <1,u,4,4>: Cost 3 vsldoi4 <4,1,u,4>, <4,1,u,4>
+ 1613409590U, // <1,u,4,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 2283801090U, // <1,u,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6>
+ 2283801928U, // <1,u,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS
+ 1613409833U, // <1,u,4,u>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 2284471235U, // <1,u,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,0>
+ 2284472046U, // <1,u,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,u,1>
+ 2284472533U, // <1,u,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,u,2>
+ 1210728604U, // <1,u,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2284471239U, // <1,u,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,4>
+ 1210728786U, // <1,u,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 1683118234U, // <1,u,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 1210731848U, // <1,u,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 1210728609U, // <1,u,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2720330023U, // <1,u,6,0>: Cost 3 vsldoi8 <6,0,1,u>, <6,0,1,u>
+ 2757376190U, // <1,u,6,1>: Cost 3 vsldoi12 <0,u,u,1>, <u,6,1,7>
+ 2726302202U, // <1,u,6,2>: Cost 3 vsldoi8 <7,0,1,u>, <6,2,7,3>
+ 2283151516U, // <1,u,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 2224972114U, // <1,u,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5>
+ 2224683162U, // <1,u,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 2726302520U, // <1,u,6,6>: Cost 3 vsldoi8 <7,0,1,u>, <6,6,6,6>
+ 2283154760U, // <1,u,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 2283151521U, // <1,u,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 1652560896U, // <1,u,7,0>: Cost 2 vsldoi8 <7,0,1,u>, <7,0,1,u>
+ 2333590225U, // <1,u,7,1>: Cost 3 vmrglw <u,6,1,7>, <0,u,u,1>
+ 2765412628U, // <1,u,7,2>: Cost 3 vsldoi12 <2,3,0,1>, <u,7,2,3>
+ 2291785884U, // <1,u,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS
+ 2781042984U, // <1,u,7,4>: Cost 3 vsldoi12 <4,u,5,1>, <u,7,4,5>
+ 3365527953U, // <1,u,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,u,5>
+ 2791954748U, // <1,u,7,6>: Cost 3 vsldoi12 <6,7,0,1>, <u,7,6,7>
+ 2291789128U, // <1,u,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS
+ 1657869960U, // <1,u,7,u>: Cost 2 vsldoi8 <7,u,1,u>, <7,u,1,u>
+ 471081121U, // <1,u,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 269271142U, // <1,u,u,1>: Cost 1 vspltisw1 LHS
+ 1544824424U, // <1,u,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544824982U, // <1,u,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 471084342U, // <1,u,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1613412506U, // <1,u,u,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 1683118477U, // <1,u,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 1210756424U, // <1,u,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 471086894U, // <1,u,u,u>: Cost 1 vsldoi4 LHS, LHS
+ 2226757632U, // <2,0,0,0>: Cost 3 vmrghw <2,0,3,0>, <0,0,0,0>
+ 2226757734U, // <2,0,0,1>: Cost 3 vmrghw <2,0,3,0>, LHS
+ 3826622483U, // <2,0,0,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,0,2,1>
+ 3843211292U, // <2,0,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,0,3,1>
+ 3300499794U, // <2,0,0,4>: Cost 4 vmrghw <2,0,3,0>, <0,4,1,5>
+ 3356256724U, // <2,0,0,5>: Cost 4 vmrglw <0,1,2,0>, <3,4,0,5>
+ 3825664056U, // <2,0,0,6>: Cost 4 vsldoi12 <0,0,6,2>, <0,0,6,2>
+ 3762889289U, // <2,0,0,7>: Cost 4 vsldoi8 <0,7,2,0>, <0,7,2,0>
+ 2226758301U, // <2,0,0,u>: Cost 3 vmrghw <2,0,3,0>, LHS
+ 2227429386U, // <2,0,1,0>: Cost 3 vmrghw <2,1,3,1>, <0,0,1,1>
+ 2227429478U, // <2,0,1,1>: Cost 3 vmrghw <2,1,3,1>, LHS
+ 1691156582U, // <2,0,1,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2666358997U, // <2,0,1,3>: Cost 3 vsldoi4 <u,2,0,1>, <3,0,u,2>
+ 2227462482U, // <2,0,1,4>: Cost 3 vmrghw <2,1,3,5>, <0,4,1,5>
+ 3722186464U, // <2,0,1,5>: Cost 4 vsldoi4 <5,2,0,1>, <5,2,0,1>
+ 3867099278U, // <2,0,1,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,1,6,7>
+ 3366881912U, // <2,0,1,7>: Cost 4 vmrglw <1,u,2,1>, <3,6,0,7>
+ 1691156636U, // <2,0,1,u>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2228027392U, // <2,0,2,0>: Cost 3 vmrghw <2,2,2,2>, <0,0,0,0>
+ 1154285670U, // <2,0,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 2228027565U, // <2,0,2,2>: Cost 3 vmrghw <2,2,2,2>, <0,2,1,2>
+ 3301769468U, // <2,0,2,3>: Cost 4 vmrghw <2,2,2,2>, <0,3,1,0>
+ 2228027730U, // <2,0,2,4>: Cost 3 vmrghw <2,2,2,2>, <0,4,1,5>
+ 3301769635U, // <2,0,2,5>: Cost 4 vmrghw <2,2,2,2>, <0,5,1,5>
+ 3780806586U, // <2,0,2,6>: Cost 4 vsldoi8 <3,7,2,0>, <2,6,3,7>
+ 3368880760U, // <2,0,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,0,7>
+ 1154286237U, // <2,0,2,u>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 1213440000U, // <2,0,3,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1213441702U, // <2,0,3,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 2228535470U, // <2,0,3,2>: Cost 3 vmrghw <2,3,0,1>, <0,2,1,3>
+ 2636515632U, // <2,0,3,3>: Cost 3 vsldoi4 <3,2,0,3>, <3,2,0,3>
+ 2287182962U, // <2,0,3,4>: Cost 3 vmrglw LHS, <1,5,0,4>
+ 2660405346U, // <2,0,3,5>: Cost 3 vsldoi4 <7,2,0,3>, <5,6,7,0>
+ 2228535798U, // <2,0,3,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7>
+ 2660406420U, // <2,0,3,7>: Cost 3 vsldoi4 <7,2,0,3>, <7,2,0,3>
+ 1213441709U, // <2,0,3,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 3368894464U, // <2,0,4,0>: Cost 4 vmrglw <2,2,2,4>, <0,0,0,0>
+ 2764898642U, // <2,0,4,1>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,1,5>
+ 3826622811U, // <2,0,4,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,4,2,5>
+ 3843211620U, // <2,0,4,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,4,3,5>
+ 3838640493U, // <2,0,4,4>: Cost 4 vsldoi12 <2,2,2,2>, <0,4,4,5>
+ 2732944694U, // <2,0,4,5>: Cost 3 vsldoi8 <u,1,2,0>, RHS
+ 3797396857U, // <2,0,4,6>: Cost 4 vsldoi8 <6,5,2,0>, <4,6,5,2>
+ 3867099528U, // <2,0,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <0,4,7,5>
+ 2764898705U, // <2,0,4,u>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,u,5>
+ 3364257792U, // <2,0,5,0>: Cost 4 vmrglw <1,4,2,5>, <0,0,0,0>
+ 2230124646U, // <2,0,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 3304235184U, // <2,0,5,2>: Cost 4 vmrghw <2,5,u,6>, <0,2,1,5>
+ 3364260144U, // <2,0,5,3>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,3>
+ 3303817554U, // <2,0,5,4>: Cost 4 vmrghw <2,5,3,0>, <0,4,1,5>
+ 3364260146U, // <2,0,5,5>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,5>
+ 3867099602U, // <2,0,5,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,5,6,7>
+ 3364260472U, // <2,0,5,7>: Cost 4 vmrglw <1,4,2,5>, <3,6,0,7>
+ 2230125213U, // <2,0,5,u>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 2230796288U, // <2,0,6,0>: Cost 3 vmrghw <2,6,3,7>, <0,0,0,0>
+ 1157054566U, // <2,0,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 2230796465U, // <2,0,6,2>: Cost 3 vmrghw <2,6,3,7>, <0,2,1,6>
+ 3304538364U, // <2,0,6,3>: Cost 4 vmrghw <2,6,3,7>, <0,3,1,0>
+ 2230796626U, // <2,0,6,4>: Cost 3 vmrghw <2,6,3,7>, <0,4,1,5>
+ 3797398205U, // <2,0,6,5>: Cost 4 vsldoi8 <6,5,2,0>, <6,5,2,0>
+ 3304538614U, // <2,0,6,6>: Cost 4 vmrghw <2,6,3,7>, <0,6,1,7>
+ 3798725471U, // <2,0,6,7>: Cost 4 vsldoi8 <6,7,2,0>, <6,7,2,0>
+ 1157055133U, // <2,0,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 3371573248U, // <2,0,7,0>: Cost 4 vmrglw <2,6,2,7>, <0,0,0,0>
+ 2231189606U, // <2,0,7,1>: Cost 3 vmrghw <2,7,0,1>, LHS
+ 3801380003U, // <2,0,7,2>: Cost 4 vsldoi8 <7,2,2,0>, <7,2,2,0>
+ 3802043636U, // <2,0,7,3>: Cost 4 vsldoi8 <7,3,2,0>, <7,3,2,0>
+ 3806688614U, // <2,0,7,4>: Cost 4 vsldoi8 <u,1,2,0>, <7,4,5,6>
+ 3356317308U, // <2,0,7,5>: Cost 4 vmrglw <0,1,2,7>, <7,u,0,5>
+ 3804034535U, // <2,0,7,6>: Cost 4 vsldoi8 <7,6,2,0>, <7,6,2,0>
+ 3806688876U, // <2,0,7,7>: Cost 4 vsldoi8 <u,1,2,0>, <7,7,7,7>
+ 2231190173U, // <2,0,7,u>: Cost 3 vmrghw <2,7,0,1>, LHS
+ 1208836096U, // <2,0,u,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1208837798U, // <2,0,u,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 1691157149U, // <2,0,u,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2636556597U, // <2,0,u,3>: Cost 3 vsldoi4 <3,2,0,u>, <3,2,0,u>
+ 2282579625U, // <2,0,u,4>: Cost 3 vmrglw LHS, <2,3,0,4>
+ 2660446306U, // <2,0,u,5>: Cost 3 vsldoi4 <7,2,0,u>, <5,6,7,0>
+ 2228535798U, // <2,0,u,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7>
+ 2660447385U, // <2,0,u,7>: Cost 3 vsldoi4 <7,2,0,u>, <7,2,0,u>
+ 1208837805U, // <2,0,u,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 3692388523U, // <2,1,0,0>: Cost 4 vsldoi4 <0,2,1,0>, <0,2,1,0>
+ 2757526244U, // <2,1,0,1>: Cost 3 vsldoi12 <1,0,1,2>, <1,0,1,2>
+ 2330290974U, // <2,1,0,2>: Cost 3 vmrglw <u,1,2,0>, <3,u,1,2>
+ 3843212020U, // <2,1,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <1,0,3,0>
+ 3692391734U, // <2,1,0,4>: Cost 4 vsldoi4 <0,2,1,0>, RHS
+ 3300533362U, // <2,1,0,5>: Cost 4 vmrghw <2,0,3,4>, <1,5,0,4>
+ 3794084337U, // <2,1,0,6>: Cost 4 vsldoi8 <6,0,2,1>, <0,6,1,2>
+ 3374170614U, // <2,1,0,7>: Cost 5 vmrglw <3,1,2,0>, <0,6,1,7>
+ 2758042403U, // <2,1,0,u>: Cost 3 vsldoi12 <1,0,u,2>, <1,0,u,2>
+ 2690482924U, // <2,1,1,0>: Cost 3 vsldoi8 <1,0,2,1>, <1,0,2,1>
+ 2764899124U, // <2,1,1,1>: Cost 3 vsldoi12 <2,2,2,2>, <1,1,1,1>
+ 2695791510U, // <2,1,1,2>: Cost 3 vsldoi8 <1,u,2,1>, <1,2,3,0>
+ 3362235271U, // <2,1,1,3>: Cost 4 vmrglw <1,1,2,1>, <1,2,1,3>
+ 3692399926U, // <2,1,1,4>: Cost 4 vsldoi4 <0,2,1,1>, RHS
+ 3832226649U, // <2,1,1,5>: Cost 4 vsldoi12 <1,1,5,2>, <1,1,5,2>
+ 3301205235U, // <2,1,1,6>: Cost 4 vmrghw <2,1,3,5>, <1,6,5,7>
+ 3768870179U, // <2,1,1,7>: Cost 4 vsldoi8 <1,7,2,1>, <1,7,2,1>
+ 2695791988U, // <2,1,1,u>: Cost 3 vsldoi8 <1,u,2,1>, <1,u,2,1>
+ 2618663085U, // <2,1,2,0>: Cost 3 vsldoi4 <0,2,1,2>, <0,2,1,2>
+ 2228028212U, // <2,1,2,1>: Cost 3 vmrghw <2,2,2,2>, <1,1,1,1>
+ 2618664552U, // <2,1,2,2>: Cost 3 vsldoi4 <0,2,1,2>, <2,2,2,2>
+ 2759000984U, // <2,1,2,3>: Cost 3 vsldoi12 <1,2,3,2>, <1,2,3,2>
+ 2618666294U, // <2,1,2,4>: Cost 3 vsldoi4 <0,2,1,2>, RHS
+ 2295136594U, // <2,1,2,5>: Cost 3 vmrglw <2,2,2,2>, <0,4,1,5>
+ 3769534376U, // <2,1,2,6>: Cost 4 vsldoi8 <1,u,2,1>, <2,6,1,7>
+ 2793358266U, // <2,1,2,7>: Cost 3 vsldoi12 <7,0,1,2>, <1,2,7,0>
+ 2618668846U, // <2,1,2,u>: Cost 3 vsldoi4 <0,2,1,2>, LHS
+ 2282536969U, // <2,1,3,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1208795146U, // <2,1,3,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1213442198U, // <2,1,3,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2287181998U, // <2,1,3,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2618674486U, // <2,1,3,4>: Cost 3 vsldoi4 <0,2,1,3>, RHS
+ 1208795474U, // <2,1,3,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2287182001U, // <2,1,3,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287183055U, // <2,1,3,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1208795153U, // <2,1,3,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 3692421295U, // <2,1,4,0>: Cost 4 vsldoi4 <0,2,1,4>, <0,2,1,4>
+ 3838641195U, // <2,1,4,1>: Cost 4 vsldoi12 <2,2,2,2>, <1,4,1,5>
+ 2330323742U, // <2,1,4,2>: Cost 3 vmrglw <u,1,2,4>, <3,u,1,2>
+ 3692423318U, // <2,1,4,3>: Cost 5 vsldoi4 <0,2,1,4>, <3,0,1,2>
+ 3692424502U, // <2,1,4,4>: Cost 4 vsldoi4 <0,2,1,4>, RHS
+ 2695793974U, // <2,1,4,5>: Cost 3 vsldoi8 <1,u,2,1>, RHS
+ 3799395705U, // <2,1,4,6>: Cost 4 vsldoi8 <6,u,2,1>, <4,6,5,2>
+ 3368895695U, // <2,1,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,1,7>
+ 2695794217U, // <2,1,4,u>: Cost 3 vsldoi8 <1,u,2,1>, RHS
+ 3692429488U, // <2,1,5,0>: Cost 4 vsldoi4 <0,2,1,5>, <0,2,1,5>
+ 3364257802U, // <2,1,5,1>: Cost 4 vmrglw <1,4,2,5>, <0,0,1,1>
+ 3692431253U, // <2,1,5,2>: Cost 4 vsldoi4 <0,2,1,5>, <2,5,u,6>
+ 3692431874U, // <2,1,5,3>: Cost 4 vsldoi4 <0,2,1,5>, <3,4,5,6>
+ 3692432694U, // <2,1,5,4>: Cost 4 vsldoi4 <0,2,1,5>, RHS
+ 3364258130U, // <2,1,5,5>: Cost 4 vmrglw <1,4,2,5>, <0,4,1,5>
+ 3303875827U, // <2,1,5,6>: Cost 4 vmrghw <2,5,3,7>, <1,6,5,7>
+ 3867100333U, // <2,1,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <1,5,7,0>
+ 3692435246U, // <2,1,5,u>: Cost 4 vsldoi4 <0,2,1,5>, LHS
+ 2618695857U, // <2,1,6,0>: Cost 3 vsldoi4 <0,2,1,6>, <0,2,1,6>
+ 2230797108U, // <2,1,6,1>: Cost 3 vmrghw <2,6,3,7>, <1,1,1,1>
+ 2618697658U, // <2,1,6,2>: Cost 3 vsldoi4 <0,2,1,6>, <2,6,3,7>
+ 3692439702U, // <2,1,6,3>: Cost 4 vsldoi4 <0,2,1,6>, <3,0,1,2>
+ 2618699062U, // <2,1,6,4>: Cost 3 vsldoi4 <0,2,1,6>, RHS
+ 3364929874U, // <2,1,6,5>: Cost 4 vmrglw <1,5,2,6>, <0,4,1,5>
+ 3692442424U, // <2,1,6,6>: Cost 4 vsldoi4 <0,2,1,6>, <6,6,6,6>
+ 3798733664U, // <2,1,6,7>: Cost 4 vsldoi8 <6,7,2,1>, <6,7,2,1>
+ 2618701614U, // <2,1,6,u>: Cost 3 vsldoi4 <0,2,1,6>, LHS
+ 3799397370U, // <2,1,7,0>: Cost 4 vsldoi8 <6,u,2,1>, <7,0,1,2>
+ 3371573258U, // <2,1,7,1>: Cost 4 vmrglw <2,6,2,7>, <0,0,1,1>
+ 2330351234U, // <2,1,7,2>: Cost 3 vmrglw <u,1,2,7>, <7,u,1,2>
+ 3799397658U, // <2,1,7,3>: Cost 4 vsldoi8 <6,u,2,1>, <7,3,6,2>
+ 3799397734U, // <2,1,7,4>: Cost 4 vsldoi8 <6,u,2,1>, <7,4,5,6>
+ 3371573586U, // <2,1,7,5>: Cost 4 vmrglw <2,6,2,7>, <0,4,1,5>
+ 3799397870U, // <2,1,7,6>: Cost 4 vsldoi8 <6,u,2,1>, <7,6,2,7>
+ 3799397956U, // <2,1,7,7>: Cost 4 vsldoi8 <6,u,2,1>, <7,7,3,3>
+ 2330351234U, // <2,1,7,u>: Cost 3 vmrglw <u,1,2,7>, <7,u,1,2>
+ 2282577929U, // <2,1,u,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1208836106U, // <2,1,u,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1208838294U, // <2,1,u,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2282578094U, // <2,1,u,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2282577933U, // <2,1,u,4>: Cost 3 vmrglw LHS, <0,0,1,4>
+ 1208836434U, // <2,1,u,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2282578097U, // <2,1,u,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287224015U, // <2,1,u,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1208836113U, // <2,1,u,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 2226759117U, // <2,2,0,0>: Cost 3 vmrghw <2,0,3,0>, <2,0,3,0>
+ 1624047718U, // <2,2,0,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 2697789613U, // <2,2,0,2>: Cost 3 vsldoi8 <2,2,2,2>, <0,2,1,2>
+ 2226767526U, // <2,2,0,3>: Cost 3 vmrghw <2,0,3,1>, <2,3,0,1>
+ 2697789778U, // <2,2,0,4>: Cost 3 vsldoi8 <2,2,2,2>, <0,4,1,5>
+ 3300657000U, // <2,2,0,5>: Cost 4 vmrghw <2,0,5,1>, <2,5,3,6>
+ 2226988986U, // <2,2,0,6>: Cost 3 vmrghw <2,0,6,1>, <2,6,3,7>
+ 3734271139U, // <2,2,0,7>: Cost 4 vsldoi4 <7,2,2,0>, <7,2,2,0>
+ 1624048285U, // <2,2,0,u>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 3831268868U, // <2,2,1,0>: Cost 4 vsldoi12 <1,0,1,2>, <2,1,0,1>
+ 2293138804U, // <2,2,1,1>: Cost 3 vmrglw <1,u,2,1>, <1,u,2,1>
+ 2697790358U, // <2,2,1,2>: Cost 3 vsldoi8 <2,2,2,2>, <1,2,3,0>
+ 2293137510U, // <2,2,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 3771532331U, // <2,2,1,4>: Cost 4 vsldoi8 <2,2,2,2>, <1,4,1,5>
+ 3767551106U, // <2,2,1,5>: Cost 4 vsldoi8 <1,5,2,2>, <1,5,2,2>
+ 3301173178U, // <2,2,1,6>: Cost 4 vmrghw <2,1,3,1>, <2,6,3,7>
+ 3372853169U, // <2,2,1,7>: Cost 4 vmrglw <2,u,2,1>, <2,6,2,7>
+ 2293137515U, // <2,2,1,u>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 1556938854U, // <2,2,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 2295137733U, // <2,2,2,1>: Cost 3 vmrglw <2,2,2,2>, <2,0,2,1>
+ 336380006U, // <2,2,2,2>: Cost 1 vspltisw2 LHS
+ 1221394534U, // <2,2,2,3>: Cost 2 vmrglw <2,2,2,2>, LHS
+ 1556942134U, // <2,2,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 2295138061U, // <2,2,2,5>: Cost 3 vmrglw <2,2,2,2>, <2,4,2,5>
+ 2228029370U, // <2,2,2,6>: Cost 3 vmrghw <2,2,2,2>, <2,6,3,7>
+ 2660545701U, // <2,2,2,7>: Cost 3 vsldoi4 <7,2,2,2>, <7,2,2,2>
+ 336380006U, // <2,2,2,u>: Cost 1 vspltisw2 LHS
+ 2697791638U, // <2,2,3,0>: Cost 3 vsldoi8 <2,2,2,2>, <3,0,1,2>
+ 2765489840U, // <2,2,3,1>: Cost 3 vsldoi12 <2,3,1,2>, <2,3,1,2>
+ 1213441640U, // <2,2,3,2>: Cost 2 vmrglw LHS, <2,2,2,2>
+ 135053414U, // <2,2,3,3>: Cost 1 vmrglw LHS, LHS
+ 2697792002U, // <2,2,3,4>: Cost 3 vsldoi8 <2,2,2,2>, <3,4,5,6>
+ 2330313780U, // <2,2,3,5>: Cost 3 vmrglw LHS, <1,4,2,5>
+ 2287183549U, // <2,2,3,6>: Cost 3 vmrglw LHS, <2,3,2,6>
+ 2660553894U, // <2,2,3,7>: Cost 3 vsldoi4 <7,2,2,3>, <7,2,2,3>
+ 135053419U, // <2,2,3,u>: Cost 1 vmrglw LHS, LHS
+ 2630697062U, // <2,2,4,0>: Cost 3 vsldoi4 <2,2,2,4>, LHS
+ 3771534282U, // <2,2,4,1>: Cost 4 vsldoi8 <2,2,2,2>, <4,1,2,3>
+ 2764900109U, // <2,2,4,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,4,2,5>
+ 2295152742U, // <2,2,4,3>: Cost 3 vmrglw <2,2,2,4>, LHS
+ 2295154282U, // <2,2,4,4>: Cost 3 vmrglw <2,2,2,4>, <2,2,2,4>
+ 1624050998U, // <2,2,4,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 2229675962U, // <2,2,4,6>: Cost 3 vmrghw <2,4,6,5>, <2,6,3,7>
+ 3368896433U, // <2,2,4,7>: Cost 4 vmrglw <2,2,2,4>, <2,6,2,7>
+ 1624051241U, // <2,2,4,u>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 3771534920U, // <2,2,5,0>: Cost 4 vsldoi8 <2,2,2,2>, <5,0,1,2>
+ 3364258540U, // <2,2,5,1>: Cost 4 vmrglw <1,4,2,5>, <1,0,2,1>
+ 2296489576U, // <2,2,5,2>: Cost 3 vmrglw <2,4,2,5>, <2,2,2,2>
+ 2290516070U, // <2,2,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 3771535284U, // <2,2,5,4>: Cost 4 vsldoi8 <2,2,2,2>, <5,4,5,6>
+ 2290517044U, // <2,2,5,5>: Cost 3 vmrglw <1,4,2,5>, <1,4,2,5>
+ 2697793634U, // <2,2,5,6>: Cost 3 vsldoi8 <2,2,2,2>, <5,6,7,0>
+ 3370231729U, // <2,2,5,7>: Cost 4 vmrglw <2,4,2,5>, <2,6,2,7>
+ 2290516075U, // <2,2,5,u>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 2230797801U, // <2,2,6,0>: Cost 3 vmrghw <2,6,3,7>, <2,0,6,1>
+ 3304539679U, // <2,2,6,1>: Cost 4 vmrghw <2,6,3,7>, <2,1,3,1>
+ 2764900273U, // <2,2,6,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,2,7>
+ 2764900282U, // <2,2,6,3>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,3,7>
+ 2230798129U, // <2,2,6,4>: Cost 3 vmrghw <2,6,3,7>, <2,4,6,5>
+ 3304540008U, // <2,2,6,5>: Cost 4 vmrghw <2,6,3,7>, <2,5,3,6>
+ 1157056442U, // <2,2,6,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2725000033U, // <2,2,6,7>: Cost 3 vsldoi8 <6,7,2,2>, <6,7,2,2>
+ 1157056442U, // <2,2,6,u>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2793359338U, // <2,2,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <2,7,0,1>
+ 3371574725U, // <2,2,7,1>: Cost 4 vmrglw <2,6,2,7>, <2,0,2,1>
+ 2297833064U, // <2,2,7,2>: Cost 3 vmrglw <2,6,2,7>, <2,2,2,2>
+ 2297831526U, // <2,2,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 2697794918U, // <2,2,7,4>: Cost 3 vsldoi8 <2,2,2,2>, <7,4,5,6>
+ 3371575053U, // <2,2,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,2,5>
+ 3304933297U, // <2,2,7,6>: Cost 4 vmrghw <2,7,0,1>, <2,6,2,7>
+ 2297833393U, // <2,2,7,7>: Cost 3 vmrglw <2,6,2,7>, <2,6,2,7>
+ 2297831531U, // <2,2,7,u>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 1556938854U, // <2,2,u,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 1624053550U, // <2,2,u,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 336380006U, // <2,2,u,2>: Cost 1 vspltisw2 LHS
+ 135094374U, // <2,2,u,3>: Cost 1 vmrglw LHS, LHS
+ 1556942134U, // <2,2,u,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 1624053914U, // <2,2,u,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 1157056442U, // <2,2,u,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2660594859U, // <2,2,u,7>: Cost 3 vsldoi4 <7,2,2,u>, <7,2,2,u>
+ 135094379U, // <2,2,u,u>: Cost 1 vmrglw LHS, LHS
+ 1611448320U, // <2,3,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 537706598U, // <2,3,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2689835181U, // <2,3,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2689835260U, // <2,3,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611448658U, // <2,3,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2732966354U, // <2,3,0,5>: Cost 3 vsldoi8 LHS, <0,5,6,7>
+ 2732966390U, // <2,3,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7>
+ 2660603052U, // <2,3,0,7>: Cost 3 vsldoi4 <7,2,3,0>, <7,2,3,0>
+ 537707165U, // <2,3,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 2689835748U, // <2,3,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2>
+ 1611449140U, // <2,3,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611449238U, // <2,3,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 3763577805U, // <2,3,1,3>: Cost 4 vsldoi8 LHS, <1,3,0,1>
+ 2689836112U, // <2,3,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6>
+ 2689836143U, // <2,3,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2689836239U, // <2,3,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 3366881210U, // <2,3,1,7>: Cost 4 vmrglw <1,u,2,1>, <2,6,3,7>
+ 1616094588U, // <2,3,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 2689836493U, // <2,3,2,0>: Cost 3 vsldoi8 LHS, <2,0,3,0>
+ 2685191711U, // <2,3,2,1>: Cost 3 vsldoi8 LHS, <2,1,3,1>
+ 1611449960U, // <2,3,2,2>: Cost 2 vsldoi8 LHS, <2,2,2,2>
+ 1611450022U, // <2,3,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 2689836822U, // <2,3,2,4>: Cost 3 vsldoi8 LHS, <2,4,3,5>
+ 2689836904U, // <2,3,2,5>: Cost 3 vsldoi8 LHS, <2,5,3,6>
+ 1611450298U, // <2,3,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 2295138234U, // <2,3,2,7>: Cost 3 vmrglw <2,2,2,2>, <2,6,3,7>
+ 1611450456U, // <2,3,2,u>: Cost 2 vsldoi8 LHS, <2,u,3,3>
+ 1213440918U, // <2,3,3,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 2282538527U, // <2,3,3,1>: Cost 3 vmrglw LHS, <2,1,3,1>
+ 1557022322U, // <2,3,3,2>: Cost 2 vsldoi4 <2,2,3,3>, <2,2,3,3>
+ 1208796786U, // <2,3,3,3>: Cost 2 vmrglw LHS, <2,2,3,3>
+ 1213440922U, // <2,3,3,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 2282538531U, // <2,3,3,5>: Cost 3 vmrglw LHS, <2,1,3,5>
+ 2287188094U, // <2,3,3,6>: Cost 3 vmrglw LHS, <u,5,3,6>
+ 1213441978U, // <2,3,3,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 1208796791U, // <2,3,3,u>: Cost 2 vmrglw LHS, <2,2,3,u>
+ 1551056998U, // <2,3,4,0>: Cost 2 vsldoi4 <1,2,3,4>, LHS
+ 1551057818U, // <2,3,4,1>: Cost 2 vsldoi4 <1,2,3,4>, <1,2,3,4>
+ 2624800360U, // <2,3,4,2>: Cost 3 vsldoi4 <1,2,3,4>, <2,2,2,2>
+ 2624800918U, // <2,3,4,3>: Cost 3 vsldoi4 <1,2,3,4>, <3,0,1,2>
+ 1551060278U, // <2,3,4,4>: Cost 2 vsldoi4 <1,2,3,4>, RHS
+ 537709878U, // <2,3,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2732969337U, // <2,3,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2>
+ 2660635824U, // <2,3,4,7>: Cost 3 vsldoi4 <7,2,3,4>, <7,2,3,4>
+ 537710121U, // <2,3,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 2689838664U, // <2,3,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2>
+ 2732969615U, // <2,3,5,1>: Cost 3 vsldoi8 LHS, <5,1,0,1>
+ 2732969707U, // <2,3,5,2>: Cost 3 vsldoi8 LHS, <5,2,1,3>
+ 3763580721U, // <2,3,5,3>: Cost 4 vsldoi8 LHS, <5,3,0,1>
+ 2689839028U, // <2,3,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6>
+ 1659228164U, // <2,3,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1659228258U, // <2,3,5,6>: Cost 2 vsldoi8 LHS, <5,6,7,0>
+ 3364259770U, // <2,3,5,7>: Cost 4 vmrglw <1,4,2,5>, <2,6,3,7>
+ 1659228420U, // <2,3,5,u>: Cost 2 vsldoi8 LHS, <5,u,7,0>
+ 2230798486U, // <2,3,6,0>: Cost 3 vmrghw <2,6,3,7>, <3,0,1,2>
+ 2732970407U, // <2,3,6,1>: Cost 3 vsldoi8 LHS, <6,1,7,1>
+ 1659228666U, // <2,3,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3>
+ 2230798748U, // <2,3,6,3>: Cost 3 vmrghw <2,6,3,7>, <3,3,3,3>
+ 2230798850U, // <2,3,6,4>: Cost 3 vmrghw <2,6,3,7>, <3,4,5,6>
+ 2732970731U, // <2,3,6,5>: Cost 3 vsldoi8 LHS, <6,5,7,1>
+ 1659228984U, // <2,3,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659229006U, // <2,3,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1659229087U, // <2,3,6,u>: Cost 2 vsldoi8 LHS, <6,u,0,1>
+ 1659229178U, // <2,3,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2>
+ 2726999125U, // <2,3,7,1>: Cost 3 vsldoi8 <7,1,2,3>, <7,1,2,3>
+ 2727662758U, // <2,3,7,2>: Cost 3 vsldoi8 <7,2,2,3>, <7,2,2,3>
+ 2732971235U, // <2,3,7,3>: Cost 3 vsldoi8 LHS, <7,3,0,1>
+ 1659229542U, // <2,3,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6>
+ 2732971446U, // <2,3,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5>
+ 2732971484U, // <2,3,7,6>: Cost 3 vsldoi8 LHS, <7,6,0,7>
+ 1659229804U, // <2,3,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7>
+ 1659229826U, // <2,3,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2>
+ 1208837014U, // <2,3,u,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 537712430U, // <2,3,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 1616099205U, // <2,3,u,2>: Cost 2 vsldoi8 LHS, <u,2,3,0>
+ 1208837746U, // <2,3,u,3>: Cost 2 vmrglw LHS, <2,2,3,3>
+ 1208837018U, // <2,3,u,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 537712794U, // <2,3,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1616099536U, // <2,3,u,6>: Cost 2 vsldoi8 LHS, <u,6,3,7>
+ 1208838074U, // <2,3,u,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 537712997U, // <2,3,u,u>: Cost 1 vsldoi8 LHS, LHS
+ 3771547648U, // <2,4,0,0>: Cost 4 vsldoi8 <2,2,2,4>, <0,0,0,0>
+ 2697805926U, // <2,4,0,1>: Cost 3 vsldoi8 <2,2,2,4>, LHS
+ 3770884269U, // <2,4,0,2>: Cost 4 vsldoi8 <2,1,2,4>, <0,2,1,2>
+ 3806716164U, // <2,4,0,3>: Cost 4 vsldoi8 <u,1,2,4>, <0,3,1,u>
+ 3771547986U, // <2,4,0,4>: Cost 4 vsldoi8 <2,2,2,4>, <0,4,1,5>
+ 2226761014U, // <2,4,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 3853462427U, // <2,4,0,6>: Cost 4 vsldoi12 <4,6,5,2>, <4,0,6,1>
+ 3867102116U, // <2,4,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,0,7,1>
+ 2226761257U, // <2,4,0,u>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 3849186231U, // <2,4,1,0>: Cost 4 vsldoi12 <4,0,1,2>, <4,1,0,2>
+ 3301207010U, // <2,4,1,1>: Cost 4 vmrghw <2,1,3,5>, <4,1,5,0>
+ 3766240150U, // <2,4,1,2>: Cost 4 vsldoi8 <1,3,2,4>, <1,2,3,0>
+ 3766240226U, // <2,4,1,3>: Cost 4 vsldoi8 <1,3,2,4>, <1,3,2,4>
+ 3301207248U, // <2,4,1,4>: Cost 4 vmrghw <2,1,3,5>, <4,4,4,4>
+ 2227432758U, // <2,4,1,5>: Cost 3 vmrghw <2,1,3,1>, RHS
+ 3758941400U, // <2,4,1,6>: Cost 4 vsldoi8 <0,1,2,4>, <1,6,2,7>
+ 3768894758U, // <2,4,1,7>: Cost 4 vsldoi8 <1,7,2,4>, <1,7,2,4>
+ 2227433001U, // <2,4,1,u>: Cost 3 vmrghw <2,1,3,1>, RHS
+ 2228030354U, // <2,4,2,0>: Cost 3 vmrghw <2,2,2,2>, <4,0,5,1>
+ 3770885657U, // <2,4,2,1>: Cost 4 vsldoi8 <2,1,2,4>, <2,1,2,4>
+ 2697807466U, // <2,4,2,2>: Cost 3 vsldoi8 <2,2,2,4>, <2,2,2,4>
+ 3368880468U, // <2,4,2,3>: Cost 4 vmrglw <2,2,2,2>, <3,2,4,3>
+ 2228030672U, // <2,4,2,4>: Cost 3 vmrghw <2,2,2,2>, <4,4,4,4>
+ 1154288950U, // <2,4,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 3771549617U, // <2,4,2,6>: Cost 4 vsldoi8 <2,2,2,4>, <2,6,2,7>
+ 3368880796U, // <2,4,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,4,7>
+ 1154289193U, // <2,4,2,u>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 2636808294U, // <2,4,3,0>: Cost 3 vsldoi4 <3,2,4,3>, LHS
+ 2287181861U, // <2,4,3,1>: Cost 3 vmrglw LHS, <0,0,4,1>
+ 2228866102U, // <2,4,3,2>: Cost 3 vmrghw <2,3,4,5>, <4,2,5,3>
+ 2636810580U, // <2,4,3,3>: Cost 3 vsldoi4 <3,2,4,3>, <3,2,4,3>
+ 1256574160U, // <2,4,3,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1213441742U, // <2,4,3,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 2228866430U, // <2,4,3,6>: Cost 3 vmrghw <2,3,4,5>, <4,6,5,7>
+ 2660701368U, // <2,4,3,7>: Cost 3 vsldoi4 <7,2,4,3>, <7,2,4,3>
+ 1213441745U, // <2,4,3,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 3704586342U, // <2,4,4,0>: Cost 4 vsldoi4 <2,2,4,4>, LHS
+ 3782831051U, // <2,4,4,1>: Cost 4 vsldoi8 <4,1,2,4>, <4,1,2,4>
+ 3704587900U, // <2,4,4,2>: Cost 4 vsldoi4 <2,2,4,4>, <2,2,4,4>
+ 3368896123U, // <2,4,4,3>: Cost 4 vmrglw <2,2,2,4>, <2,2,4,3>
+ 2793360592U, // <2,4,4,4>: Cost 3 vsldoi12 <7,0,1,2>, <4,4,4,4>
+ 2697809206U, // <2,4,4,5>: Cost 3 vsldoi8 <2,2,2,4>, RHS
+ 3303198078U, // <2,4,4,6>: Cost 4 vmrghw <2,4,3,5>, <4,6,5,7>
+ 3867102444U, // <2,4,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,4,7,5>
+ 2697809449U, // <2,4,4,u>: Cost 3 vsldoi8 <2,2,2,4>, RHS
+ 2630852710U, // <2,4,5,0>: Cost 3 vsldoi4 <2,2,4,5>, LHS
+ 2624881572U, // <2,4,5,1>: Cost 3 vsldoi4 <1,2,4,5>, <1,2,4,5>
+ 2630854269U, // <2,4,5,2>: Cost 3 vsldoi4 <2,2,4,5>, <2,2,4,5>
+ 2666686677U, // <2,4,5,3>: Cost 3 vsldoi4 <u,2,4,5>, <3,0,u,2>
+ 2630855990U, // <2,4,5,4>: Cost 3 vsldoi4 <2,2,4,5>, RHS
+ 2230127926U, // <2,4,5,5>: Cost 3 vmrghw <2,5,3,6>, RHS
+ 1691159862U, // <2,4,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 3867102520U, // <2,4,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,5,7,0>
+ 1691159880U, // <2,4,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2230799250U, // <2,4,6,0>: Cost 3 vmrghw <2,6,3,7>, <4,0,5,1>
+ 3304541130U, // <2,4,6,1>: Cost 4 vmrghw <2,6,3,7>, <4,1,2,3>
+ 2230799417U, // <2,4,6,2>: Cost 3 vmrghw <2,6,3,7>, <4,2,5,6>
+ 3304541323U, // <2,4,6,3>: Cost 4 vmrghw <2,6,3,7>, <4,3,5,7>
+ 2230799568U, // <2,4,6,4>: Cost 3 vmrghw <2,6,3,7>, <4,4,4,4>
+ 1157057846U, // <2,4,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 3304541566U, // <2,4,6,6>: Cost 4 vmrghw <2,6,3,7>, <4,6,5,7>
+ 3798758243U, // <2,4,6,7>: Cost 4 vsldoi8 <6,7,2,4>, <6,7,2,4>
+ 1157058089U, // <2,4,6,u>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 3806721018U, // <2,4,7,0>: Cost 4 vsldoi8 <u,1,2,4>, <7,0,1,2>
+ 3853831590U, // <2,4,7,1>: Cost 4 vsldoi12 <4,7,1,2>, <4,7,1,2>
+ 3801412775U, // <2,4,7,2>: Cost 4 vsldoi8 <7,2,2,4>, <7,2,2,4>
+ 3802076408U, // <2,4,7,3>: Cost 4 vsldoi8 <7,3,2,4>, <7,3,2,4>
+ 3401436368U, // <2,4,7,4>: Cost 4 vmrglw <7,6,2,7>, <4,4,4,4>
+ 2793360840U, // <2,4,7,5>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,5,0>
+ 3804067307U, // <2,4,7,6>: Cost 4 vsldoi8 <7,6,2,4>, <7,6,2,4>
+ 3867102682U, // <2,4,7,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,7,7,0>
+ 2793360867U, // <2,4,7,u>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,u,0>
+ 2630877286U, // <2,4,u,0>: Cost 3 vsldoi4 <2,2,4,u>, LHS
+ 2282580144U, // <2,4,u,1>: Cost 3 vmrglw LHS, <3,0,4,1>
+ 2630878848U, // <2,4,u,2>: Cost 3 vsldoi4 <2,2,4,u>, <2,2,4,u>
+ 2636851545U, // <2,4,u,3>: Cost 3 vsldoi4 <3,2,4,u>, <3,2,4,u>
+ 1256615120U, // <2,4,u,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1208837838U, // <2,4,u,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 1691160105U, // <2,4,u,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2660742333U, // <2,4,u,7>: Cost 3 vsldoi4 <7,2,4,u>, <7,2,4,u>
+ 1208837841U, // <2,4,u,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 3766910976U, // <2,5,0,0>: Cost 4 vsldoi8 <1,4,2,5>, <0,0,0,0>
+ 2693169254U, // <2,5,0,1>: Cost 3 vsldoi8 <1,4,2,5>, LHS
+ 3760939181U, // <2,5,0,2>: Cost 4 vsldoi8 <0,4,2,5>, <0,2,1,2>
+ 3843214936U, // <2,5,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <5,0,3,0>
+ 3760939355U, // <2,5,0,4>: Cost 4 vsldoi8 <0,4,2,5>, <0,4,2,5>
+ 3867102827U, // <2,5,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,5,1>
+ 3867102836U, // <2,5,0,6>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,6,1>
+ 3867102844U, // <2,5,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,7,0>
+ 2693169821U, // <2,5,0,u>: Cost 3 vsldoi8 <1,4,2,5>, LHS
+ 3766911724U, // <2,5,1,0>: Cost 4 vsldoi8 <1,4,2,5>, <1,0,2,1>
+ 3766911796U, // <2,5,1,1>: Cost 4 vsldoi8 <1,4,2,5>, <1,1,1,1>
+ 2693170070U, // <2,5,1,2>: Cost 3 vsldoi8 <1,4,2,5>, <1,2,3,0>
+ 3384798262U, // <2,5,1,3>: Cost 4 vmrglw <4,u,2,1>, <4,2,5,3>
+ 2693170228U, // <2,5,1,4>: Cost 3 vsldoi8 <1,4,2,5>, <1,4,2,5>
+ 3301208068U, // <2,5,1,5>: Cost 4 vmrghw <2,1,3,5>, <5,5,5,5>
+ 3366879607U, // <2,5,1,6>: Cost 4 vmrglw <1,u,2,1>, <0,4,5,6>
+ 3867102925U, // <2,5,1,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,1,7,0>
+ 2695824760U, // <2,5,1,u>: Cost 3 vsldoi8 <1,u,2,5>, <1,u,2,5>
+ 2642845798U, // <2,5,2,0>: Cost 3 vsldoi4 <4,2,5,2>, LHS
+ 2295139218U, // <2,5,2,1>: Cost 3 vmrglw <2,2,2,2>, <4,0,5,1>
+ 2699142760U, // <2,5,2,2>: Cost 3 vsldoi8 <2,4,2,5>, <2,2,2,2>
+ 3766912678U, // <2,5,2,3>: Cost 4 vsldoi8 <1,4,2,5>, <2,3,0,1>
+ 2699142925U, // <2,5,2,4>: Cost 3 vsldoi8 <2,4,2,5>, <2,4,2,5>
+ 2228031492U, // <2,5,2,5>: Cost 3 vmrghw <2,2,2,2>, <5,5,5,5>
+ 2295138818U, // <2,5,2,6>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,6>
+ 3368879347U, // <2,5,2,7>: Cost 4 vmrglw <2,2,2,2>, <1,6,5,7>
+ 2295138820U, // <2,5,2,u>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,u>
+ 2287184866U, // <2,5,3,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1256573842U, // <2,5,3,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2642855630U, // <2,5,3,2>: Cost 3 vsldoi4 <4,2,5,3>, <2,3,4,5>
+ 2287182763U, // <2,5,3,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287184870U, // <2,5,3,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1256574170U, // <2,5,3,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1213442562U, // <2,5,3,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287183091U, // <2,5,3,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1213442564U, // <2,5,3,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 3716604006U, // <2,5,4,0>: Cost 4 vsldoi4 <4,2,5,4>, LHS
+ 3716604822U, // <2,5,4,1>: Cost 4 vsldoi4 <4,2,5,4>, <1,2,3,0>
+ 3766914099U, // <2,5,4,2>: Cost 4 vsldoi8 <1,4,2,5>, <4,2,5,0>
+ 3368895403U, // <2,5,4,3>: Cost 5 vmrglw <2,2,2,4>, <1,2,5,3>
+ 3716607031U, // <2,5,4,4>: Cost 4 vsldoi4 <4,2,5,4>, <4,2,5,4>
+ 2693172534U, // <2,5,4,5>: Cost 3 vsldoi8 <1,4,2,5>, RHS
+ 3363588610U, // <2,5,4,6>: Cost 4 vmrglw <1,3,2,4>, <3,4,5,6>
+ 3368895731U, // <2,5,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,5,7>
+ 2693172777U, // <2,5,4,u>: Cost 3 vsldoi8 <1,4,2,5>, RHS
+ 3704668262U, // <2,5,5,0>: Cost 4 vsldoi4 <2,2,5,5>, LHS
+ 3704669078U, // <2,5,5,1>: Cost 4 vsldoi4 <2,2,5,5>, <1,2,3,0>
+ 3704669830U, // <2,5,5,2>: Cost 4 vsldoi4 <2,2,5,5>, <2,2,5,5>
+ 3364259460U, // <2,5,5,3>: Cost 4 vmrglw <1,4,2,5>, <2,2,5,3>
+ 3704671542U, // <2,5,5,4>: Cost 4 vsldoi4 <2,2,5,5>, RHS
+ 2793361412U, // <2,5,5,5>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5>
+ 3364258167U, // <2,5,5,6>: Cost 4 vmrglw <1,4,2,5>, <0,4,5,6>
+ 3867103249U, // <2,5,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,5,7,0>
+ 2793361412U, // <2,5,5,u>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5>
+ 2642878566U, // <2,5,6,0>: Cost 3 vsldoi4 <4,2,5,6>, LHS
+ 3386166810U, // <2,5,6,1>: Cost 4 vmrglw <5,1,2,6>, <4,u,5,1>
+ 2723033594U, // <2,5,6,2>: Cost 3 vsldoi8 <6,4,2,5>, <6,2,7,3>
+ 3848523842U, // <2,5,6,3>: Cost 4 vsldoi12 <3,u,1,2>, <5,6,3,4>
+ 2723033713U, // <2,5,6,4>: Cost 3 vsldoi8 <6,4,2,5>, <6,4,2,5>
+ 2230800388U, // <2,5,6,5>: Cost 3 vmrghw <2,6,3,7>, <5,5,5,5>
+ 2230800482U, // <2,5,6,6>: Cost 3 vmrghw <2,6,3,7>, <5,6,7,0>
+ 2785841252U, // <2,5,6,7>: Cost 3 vsldoi12 <5,6,7,2>, <5,6,7,2>
+ 2785914989U, // <2,5,6,u>: Cost 3 vsldoi12 <5,6,u,2>, <5,6,u,2>
+ 3796775930U, // <2,5,7,0>: Cost 4 vsldoi8 <6,4,2,5>, <7,0,1,2>
+ 3800757335U, // <2,5,7,1>: Cost 4 vsldoi8 <7,1,2,5>, <7,1,2,5>
+ 3853463689U, // <2,5,7,2>: Cost 4 vsldoi12 <4,6,5,2>, <5,7,2,3>
+ 3796776218U, // <2,5,7,3>: Cost 4 vsldoi8 <6,4,2,5>, <7,3,6,2>
+ 3796776294U, // <2,5,7,4>: Cost 4 vsldoi8 <6,4,2,5>, <7,4,5,6>
+ 3803411867U, // <2,5,7,5>: Cost 4 vsldoi8 <7,5,2,5>, <7,5,2,5>
+ 3371575081U, // <2,5,7,6>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,6>
+ 3796776516U, // <2,5,7,7>: Cost 4 vsldoi8 <6,4,2,5>, <7,7,3,3>
+ 3371575083U, // <2,5,7,u>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,u>
+ 2287225826U, // <2,5,u,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1256614802U, // <2,5,u,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2642896590U, // <2,5,u,2>: Cost 3 vsldoi4 <4,2,5,u>, <2,3,4,5>
+ 2287223723U, // <2,5,u,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287225830U, // <2,5,u,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1256615130U, // <2,5,u,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1208838658U, // <2,5,u,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287224051U, // <2,5,u,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1208838660U, // <2,5,u,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 3772227584U, // <2,6,0,0>: Cost 4 vsldoi8 <2,3,2,6>, <0,0,0,0>
+ 2698485862U, // <2,6,0,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 3759620282U, // <2,6,0,2>: Cost 4 vsldoi8 <0,2,2,6>, <0,2,2,6>
+ 3710675299U, // <2,6,0,3>: Cost 4 vsldoi4 <3,2,6,0>, <3,2,6,0>
+ 3767583058U, // <2,6,0,4>: Cost 4 vsldoi8 <1,5,2,6>, <0,4,1,5>
+ 3378153265U, // <2,6,0,5>: Cost 5 vmrglw <3,7,2,0>, <2,4,6,5>
+ 3865186637U, // <2,6,0,6>: Cost 4 vsldoi12 <6,6,2,2>, <6,0,6,1>
+ 2330291510U, // <2,6,0,7>: Cost 3 vmrglw <u,1,2,0>, RHS
+ 2698486429U, // <2,6,0,u>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 3734569062U, // <2,6,1,0>: Cost 4 vsldoi4 <7,2,6,1>, LHS
+ 3764929346U, // <2,6,1,1>: Cost 4 vsldoi8 <1,1,2,6>, <1,1,2,6>
+ 3772228502U, // <2,6,1,2>: Cost 4 vsldoi8 <2,3,2,6>, <1,2,3,0>
+ 3734571158U, // <2,6,1,3>: Cost 4 vsldoi4 <7,2,6,1>, <3,0,1,2>
+ 3734572342U, // <2,6,1,4>: Cost 4 vsldoi4 <7,2,6,1>, RHS
+ 3767583878U, // <2,6,1,5>: Cost 4 vsldoi8 <1,5,2,6>, <1,5,2,6>
+ 3768247511U, // <2,6,1,6>: Cost 4 vsldoi8 <1,6,2,6>, <1,6,2,6>
+ 2293140790U, // <2,6,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 2293140791U, // <2,6,1,u>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 3704717414U, // <2,6,2,0>: Cost 4 vsldoi4 <2,2,6,2>, LHS
+ 3395424589U, // <2,6,2,1>: Cost 4 vmrglw <6,6,2,2>, <6,0,6,1>
+ 2228031993U, // <2,6,2,2>: Cost 3 vmrghw <2,2,2,2>, <6,2,7,2>
+ 2698487485U, // <2,6,2,3>: Cost 3 vsldoi8 <2,3,2,6>, <2,3,2,6>
+ 3704720694U, // <2,6,2,4>: Cost 4 vsldoi4 <2,2,6,2>, RHS
+ 3773556575U, // <2,6,2,5>: Cost 4 vsldoi8 <2,5,2,6>, <2,5,2,6>
+ 2698487738U, // <2,6,2,6>: Cost 3 vsldoi8 <2,3,2,6>, <2,6,3,7>
+ 1221397814U, // <2,6,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 1221397815U, // <2,6,2,u>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 2636955750U, // <2,6,3,0>: Cost 3 vsldoi4 <3,2,6,3>, LHS
+ 2330314217U, // <2,6,3,1>: Cost 3 vmrglw LHS, <2,0,6,1>
+ 2636957626U, // <2,6,3,2>: Cost 3 vsldoi4 <3,2,6,3>, <2,6,3,7>
+ 2287184230U, // <2,6,3,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 2636959030U, // <2,6,3,4>: Cost 3 vsldoi4 <3,2,6,3>, RHS
+ 2648903448U, // <2,6,3,5>: Cost 3 vsldoi4 <5,2,6,3>, <5,2,6,3>
+ 1256575800U, // <2,6,3,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135056694U, // <2,6,3,7>: Cost 1 vmrglw LHS, RHS
+ 135056695U, // <2,6,3,u>: Cost 1 vmrglw LHS, RHS
+ 3710705766U, // <2,6,4,0>: Cost 4 vsldoi4 <3,2,6,4>, LHS
+ 3698762677U, // <2,6,4,1>: Cost 5 vsldoi4 <1,2,6,4>, <1,2,6,4>
+ 3710707389U, // <2,6,4,2>: Cost 4 vsldoi4 <3,2,6,4>, <2,3,2,6>
+ 3710708071U, // <2,6,4,3>: Cost 4 vsldoi4 <3,2,6,4>, <3,2,6,4>
+ 3710709046U, // <2,6,4,4>: Cost 4 vsldoi4 <3,2,6,4>, RHS
+ 2698489142U, // <2,6,4,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS
+ 3796782457U, // <2,6,4,6>: Cost 4 vsldoi8 <6,4,2,6>, <4,6,5,2>
+ 2295156022U, // <2,6,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 2295156023U, // <2,6,4,u>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 3303870753U, // <2,6,5,0>: Cost 4 vmrghw <2,5,3,6>, <6,0,1,2>
+ 3788820134U, // <2,6,5,1>: Cost 4 vsldoi8 <5,1,2,6>, <5,1,2,6>
+ 3779530520U, // <2,6,5,2>: Cost 4 vsldoi8 <3,5,2,6>, <5,2,6,3>
+ 3303871026U, // <2,6,5,3>: Cost 4 vmrghw <2,5,3,6>, <6,3,4,5>
+ 3303871117U, // <2,6,5,4>: Cost 4 vmrghw <2,5,3,6>, <6,4,5,6>
+ 3791474666U, // <2,6,5,5>: Cost 4 vsldoi8 <5,5,2,6>, <5,5,2,6>
+ 3792138299U, // <2,6,5,6>: Cost 4 vsldoi8 <5,6,2,6>, <5,6,2,6>
+ 2290519350U, // <2,6,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 2290519351U, // <2,6,5,u>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 2631008358U, // <2,6,6,0>: Cost 3 vsldoi4 <2,2,6,6>, LHS
+ 3372893673U, // <2,6,6,1>: Cost 4 vmrglw <2,u,2,6>, <2,0,6,1>
+ 2791445264U, // <2,6,6,2>: Cost 3 vsldoi12 <6,6,2,2>, <6,6,2,2>
+ 2230800968U, // <2,6,6,3>: Cost 3 vmrghw <2,6,3,7>, <6,3,7,0>
+ 2631011638U, // <2,6,6,4>: Cost 3 vsldoi4 <2,2,6,6>, RHS
+ 3372894001U, // <2,6,6,5>: Cost 4 vmrglw <2,u,2,6>, <2,4,6,5>
+ 2793362232U, // <2,6,6,6>: Cost 3 vsldoi12 <7,0,1,2>, <6,6,6,6>
+ 2295835958U, // <2,6,6,7>: Cost 3 vmrglw <2,3,2,6>, RHS
+ 2295835959U, // <2,6,6,u>: Cost 3 vmrglw <2,3,2,6>, RHS
+ 2793362254U, // <2,6,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,0,1>
+ 2792035160U, // <2,6,7,1>: Cost 3 vsldoi12 <6,7,1,2>, <6,7,1,2>
+ 2792108897U, // <2,6,7,2>: Cost 3 vsldoi12 <6,7,2,2>, <6,7,2,2>
+ 2769474408U, // <2,6,7,3>: Cost 3 vsldoi12 <3,0,1,2>, <6,7,3,0>
+ 2793362294U, // <2,6,7,4>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,4,5>
+ 3371575089U, // <2,6,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,6,5>
+ 2792403845U, // <2,6,7,6>: Cost 3 vsldoi12 <6,7,6,2>, <6,7,6,2>
+ 2297834806U, // <2,6,7,7>: Cost 3 vmrglw <2,6,2,7>, RHS
+ 2297834807U, // <2,6,7,u>: Cost 3 vmrglw <2,6,2,7>, RHS
+ 2636996710U, // <2,6,u,0>: Cost 3 vsldoi4 <3,2,6,u>, LHS
+ 2698491694U, // <2,6,u,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 2636998631U, // <2,6,u,2>: Cost 3 vsldoi4 <3,2,6,u>, <2,6,u,7>
+ 2282580326U, // <2,6,u,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 2636999990U, // <2,6,u,4>: Cost 3 vsldoi4 <3,2,6,u>, RHS
+ 2698492058U, // <2,6,u,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS
+ 1256616760U, // <2,6,u,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135097654U, // <2,6,u,7>: Cost 1 vmrglw LHS, RHS
+ 135097655U, // <2,6,u,u>: Cost 1 vmrglw LHS, RHS
+ 2666864742U, // <2,7,0,0>: Cost 3 vsldoi4 <u,2,7,0>, LHS
+ 1719620602U, // <2,7,0,1>: Cost 2 vsldoi12 <7,0,1,2>, <7,0,1,2>
+ 3768254637U, // <2,7,0,2>: Cost 4 vsldoi8 <1,6,2,7>, <0,2,1,2>
+ 3393417722U, // <2,7,0,3>: Cost 4 vmrglw <6,3,2,0>, <6,2,7,3>
+ 2666868022U, // <2,7,0,4>: Cost 3 vsldoi4 <u,2,7,0>, RHS
+ 3867104290U, // <2,7,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,0,5,6>
+ 3728667127U, // <2,7,0,6>: Cost 4 vsldoi4 <6,2,7,0>, <6,2,7,0>
+ 2666869817U, // <2,7,0,7>: Cost 3 vsldoi4 <u,2,7,0>, <7,0,u,2>
+ 1720136761U, // <2,7,0,u>: Cost 2 vsldoi12 <7,0,u,2>, <7,0,u,2>
+ 3728670822U, // <2,7,1,0>: Cost 4 vsldoi4 <6,2,7,1>, LHS
+ 3774227252U, // <2,7,1,1>: Cost 4 vsldoi8 <2,6,2,7>, <1,1,1,1>
+ 3774227350U, // <2,7,1,2>: Cost 4 vsldoi8 <2,6,2,7>, <1,2,3,0>
+ 2323001850U, // <2,7,1,3>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3>
+ 3728674102U, // <2,7,1,4>: Cost 4 vsldoi4 <6,2,7,1>, RHS
+ 3774227567U, // <2,7,1,5>: Cost 5 vsldoi8 <2,6,2,7>, <1,5,0,1>
+ 2694513880U, // <2,7,1,6>: Cost 3 vsldoi8 <1,6,2,7>, <1,6,2,7>
+ 3396744002U, // <2,7,1,7>: Cost 4 vmrglw <6,u,2,1>, <6,6,7,7>
+ 2323001850U, // <2,7,1,u>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3>
+ 2654937190U, // <2,7,2,0>: Cost 3 vsldoi4 <6,2,7,2>, LHS
+ 3728679732U, // <2,7,2,1>: Cost 4 vsldoi4 <6,2,7,2>, <1,1,1,1>
+ 2700486248U, // <2,7,2,2>: Cost 3 vsldoi8 <2,6,2,7>, <2,2,2,2>
+ 2321682938U, // <2,7,2,3>: Cost 3 vmrglw <6,6,2,2>, <6,2,7,3>
+ 2654940470U, // <2,7,2,4>: Cost 3 vsldoi4 <6,2,7,2>, RHS
+ 3859584196U, // <2,7,2,5>: Cost 4 vsldoi12 <5,6,7,2>, <7,2,5,6>
+ 2700486577U, // <2,7,2,6>: Cost 3 vsldoi8 <2,6,2,7>, <2,6,2,7>
+ 2228033132U, // <2,7,2,7>: Cost 3 vmrghw <2,2,2,2>, <7,7,7,7>
+ 2701813843U, // <2,7,2,u>: Cost 3 vsldoi8 <2,u,2,7>, <2,u,2,7>
+ 1581203558U, // <2,7,3,0>: Cost 2 vsldoi4 <6,2,7,3>, LHS
+ 2654946100U, // <2,7,3,1>: Cost 3 vsldoi4 <6,2,7,3>, <1,1,1,1>
+ 2637031354U, // <2,7,3,2>: Cost 3 vsldoi4 <3,2,7,3>, <2,6,3,7>
+ 1256575482U, // <2,7,3,3>: Cost 2 vmrglw LHS, <6,2,7,3>
+ 1581206838U, // <2,7,3,4>: Cost 2 vsldoi4 <6,2,7,3>, RHS
+ 2654949380U, // <2,7,3,5>: Cost 3 vsldoi4 <6,2,7,3>, <5,5,5,5>
+ 1581208058U, // <2,7,3,6>: Cost 2 vsldoi4 <6,2,7,3>, <6,2,7,3>
+ 1256575810U, // <2,7,3,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1581209390U, // <2,7,3,u>: Cost 2 vsldoi4 <6,2,7,3>, LHS
+ 3728695398U, // <2,7,4,0>: Cost 4 vsldoi4 <6,2,7,4>, LHS
+ 3869758782U, // <2,7,4,1>: Cost 4 vsldoi12 <7,4,1,2>, <7,4,1,2>
+ 3728696936U, // <2,7,4,2>: Cost 4 vsldoi4 <6,2,7,4>, <2,2,2,2>
+ 3393450490U, // <2,7,4,3>: Cost 4 vmrglw <6,3,2,4>, <6,2,7,3>
+ 3728698678U, // <2,7,4,4>: Cost 4 vsldoi4 <6,2,7,4>, RHS
+ 2700487990U, // <2,7,4,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 3728699899U, // <2,7,4,6>: Cost 4 vsldoi4 <6,2,7,4>, <6,2,7,4>
+ 3867104626U, // <2,7,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <7,4,7,0>
+ 2700488233U, // <2,7,4,u>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 3855160709U, // <2,7,5,0>: Cost 4 vsldoi12 <5,0,1,2>, <7,5,0,1>
+ 3728704406U, // <2,7,5,1>: Cost 4 vsldoi4 <6,2,7,5>, <1,2,3,0>
+ 3370233956U, // <2,7,5,2>: Cost 4 vmrglw <2,4,2,5>, <5,6,7,2>
+ 2320380410U, // <2,7,5,3>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3>
+ 3728706870U, // <2,7,5,4>: Cost 4 vsldoi4 <6,2,7,5>, RHS
+ 3867104694U, // <2,7,5,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,5,5,5>
+ 3792146492U, // <2,7,5,6>: Cost 4 vsldoi8 <5,6,2,7>, <5,6,2,7>
+ 3394122562U, // <2,7,5,7>: Cost 4 vmrglw <6,4,2,5>, <6,6,7,7>
+ 2320380410U, // <2,7,5,u>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3>
+ 2230801402U, // <2,7,6,0>: Cost 3 vmrghw <2,6,3,7>, <7,0,1,2>
+ 3768258984U, // <2,7,6,1>: Cost 4 vsldoi8 <1,6,2,7>, <6,1,7,2>
+ 2730349050U, // <2,7,6,2>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3>
+ 3372894575U, // <2,7,6,3>: Cost 4 vmrglw <2,u,2,6>, <3,2,7,3>
+ 2230801766U, // <2,7,6,4>: Cost 3 vmrghw <2,6,3,7>, <7,4,5,6>
+ 3304543670U, // <2,7,6,5>: Cost 4 vmrghw <2,6,3,7>, <7,5,5,5>
+ 3728716285U, // <2,7,6,6>: Cost 4 vsldoi4 <6,2,7,6>, <6,2,7,6>
+ 2230802028U, // <2,7,6,7>: Cost 3 vmrghw <2,6,3,7>, <7,7,7,7>
+ 2730349050U, // <2,7,6,u>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3>
+ 2793362983U, // <2,7,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,0,1>
+ 3728721112U, // <2,7,7,1>: Cost 4 vsldoi4 <6,2,7,7>, <1,6,2,7>
+ 3371574933U, // <2,7,7,2>: Cost 4 vmrglw <2,6,2,7>, <2,2,7,2>
+ 2327695866U, // <2,7,7,3>: Cost 3 vmrglw <7,6,2,7>, <6,2,7,3>
+ 3728723254U, // <2,7,7,4>: Cost 4 vsldoi4 <6,2,7,7>, RHS
+ 3371574855U, // <2,7,7,5>: Cost 5 vmrglw <2,6,2,7>, <2,1,7,5>
+ 2730350062U, // <2,7,7,6>: Cost 3 vsldoi8 <7,6,2,7>, <7,6,2,7>
+ 2793363052U, // <2,7,7,7>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,7,7>
+ 2798671471U, // <2,7,7,u>: Cost 3 vsldoi12 <7,u,1,2>, <7,7,u,1>
+ 1581244518U, // <2,7,u,0>: Cost 2 vsldoi4 <6,2,7,u>, LHS
+ 1724929666U, // <2,7,u,1>: Cost 2 vsldoi12 <7,u,1,2>, <7,u,1,2>
+ 2637072314U, // <2,7,u,2>: Cost 3 vsldoi4 <3,2,7,u>, <2,6,3,7>
+ 1256616442U, // <2,7,u,3>: Cost 2 vmrglw LHS, <6,2,7,3>
+ 1581247798U, // <2,7,u,4>: Cost 2 vsldoi4 <6,2,7,u>, RHS
+ 2700490906U, // <2,7,u,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 1581249023U, // <2,7,u,6>: Cost 2 vsldoi4 <6,2,7,u>, <6,2,7,u>
+ 1256616770U, // <2,7,u,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1581250350U, // <2,7,u,u>: Cost 2 vsldoi4 <6,2,7,u>, LHS
+ 1611489280U, // <2,u,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 537747563U, // <2,u,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685231277U, // <2,u,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2685231356U, // <2,u,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611489618U, // <2,u,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2226763930U, // <2,u,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 2733007350U, // <2,u,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7>
+ 2660971737U, // <2,u,0,7>: Cost 3 vsldoi4 <7,2,u,0>, <7,2,u,0>
+ 537748125U, // <2,u,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 2689876708U, // <2,u,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2>
+ 1611490100U, // <2,u,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611490198U, // <2,u,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 2293137564U, // <2,u,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 2689877072U, // <2,u,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6>
+ 2689877103U, // <2,u,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2689877199U, // <2,u,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 2293140808U, // <2,u,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 1616135548U, // <2,u,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 1556938854U, // <2,u,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 1154291502U, // <2,u,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 336380006U, // <2,u,2,2>: Cost 1 vspltisw2 LHS
+ 1611490982U, // <2,u,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 1556942134U, // <2,u,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 1154291866U, // <2,u,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 1611491258U, // <2,u,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 1221397832U, // <2,u,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 336380006U, // <2,u,2,u>: Cost 1 vspltisw2 LHS
+ 1611491478U, // <2,u,3,0>: Cost 2 vsldoi8 LHS, <3,0,1,2>
+ 1213440073U, // <2,u,3,1>: Cost 2 vmrglw LHS, <0,0,u,1>
+ 1213442261U, // <2,u,3,2>: Cost 2 vmrglw LHS, <3,0,u,2>
+ 135053468U, // <2,u,3,3>: Cost 1 vmrglw LHS, LHS
+ 1611491842U, // <2,u,3,4>: Cost 2 vsldoi8 LHS, <3,4,5,6>
+ 1213440401U, // <2,u,3,5>: Cost 2 vmrglw LHS, <0,4,u,5>
+ 1213442589U, // <2,u,3,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135056712U, // <2,u,3,7>: Cost 1 vmrglw LHS, RHS
+ 135053473U, // <2,u,3,u>: Cost 1 vmrglw LHS, LHS
+ 1551425638U, // <2,u,4,0>: Cost 2 vsldoi4 <1,2,u,4>, LHS
+ 1551426503U, // <2,u,4,1>: Cost 2 vsldoi4 <1,2,u,4>, <1,2,u,4>
+ 2625169000U, // <2,u,4,2>: Cost 3 vsldoi4 <1,2,u,4>, <2,2,2,2>
+ 2625169558U, // <2,u,4,3>: Cost 3 vsldoi4 <1,2,u,4>, <3,0,1,2>
+ 1551428918U, // <2,u,4,4>: Cost 2 vsldoi4 <1,2,u,4>, RHS
+ 537750838U, // <2,u,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2733010297U, // <2,u,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2>
+ 2295156040U, // <2,u,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 537751081U, // <2,u,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 2689879624U, // <2,u,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2>
+ 2230130478U, // <2,u,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 2631149217U, // <2,u,5,2>: Cost 3 vsldoi4 <2,2,u,5>, <2,2,u,5>
+ 2290516124U, // <2,u,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 2689879988U, // <2,u,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6>
+ 1659269124U, // <2,u,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1691162778U, // <2,u,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2290519368U, // <2,u,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 1691162796U, // <2,u,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2230802131U, // <2,u,6,0>: Cost 3 vmrghw <2,6,3,7>, <u,0,1,2>
+ 1157060398U, // <2,u,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 1659269626U, // <2,u,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3>
+ 2764904656U, // <2,u,6,3>: Cost 3 vsldoi12 <2,2,2,2>, <u,6,3,7>
+ 2230802495U, // <2,u,6,4>: Cost 3 vmrghw <2,6,3,7>, <u,4,5,6>
+ 1157060762U, // <2,u,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 1659269944U, // <2,u,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659269966U, // <2,u,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1157060965U, // <2,u,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 1659270138U, // <2,u,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2>
+ 2727040090U, // <2,u,7,1>: Cost 3 vsldoi8 <7,1,2,u>, <7,1,2,u>
+ 2727703723U, // <2,u,7,2>: Cost 3 vsldoi8 <7,2,2,u>, <7,2,2,u>
+ 2297831580U, // <2,u,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 1659270502U, // <2,u,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6>
+ 2733012406U, // <2,u,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5>
+ 2730358255U, // <2,u,7,6>: Cost 3 vsldoi8 <7,6,2,u>, <7,6,2,u>
+ 1659270764U, // <2,u,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7>
+ 1659270786U, // <2,u,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2>
+ 1213481923U, // <2,u,u,0>: Cost 2 vmrglw LHS, <1,2,u,0>
+ 537753390U, // <2,u,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 336380006U, // <2,u,u,2>: Cost 1 vspltisw2 LHS
+ 135094428U, // <2,u,u,3>: Cost 1 vmrglw LHS, LHS
+ 1213481927U, // <2,u,u,4>: Cost 2 vmrglw LHS, <1,2,u,4>
+ 537753754U, // <2,u,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1208838685U, // <2,u,u,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135097672U, // <2,u,u,7>: Cost 1 vmrglw LHS, RHS
+ 135094433U, // <2,u,u,u>: Cost 1 vmrglw LHS, LHS
+ 1678557184U, // <3,0,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0>
+ 1678557194U, // <3,0,0,1>: Cost 2 vsldoi12 LHS, <0,0,1,1>
+ 2631181989U, // <3,0,0,2>: Cost 3 vsldoi4 <2,3,0,0>, <2,3,0,0>
+ 2289223984U, // <3,0,0,3>: Cost 3 vmrglw <1,2,3,0>, <3,2,0,3>
+ 2756943909U, // <3,0,0,4>: Cost 3 vsldoi12 LHS, <0,0,4,1>
+ 3362965729U, // <3,0,0,5>: Cost 4 vmrglw <1,2,3,0>, <3,1,0,5>
+ 3362966054U, // <3,0,0,6>: Cost 4 vmrglw <1,2,3,0>, <3,5,0,6>
+ 2289224312U, // <3,0,0,7>: Cost 3 vmrglw <1,2,3,0>, <3,6,0,7>
+ 1683202121U, // <3,0,0,u>: Cost 2 vsldoi12 LHS, <0,0,u,1>
+ 1557446758U, // <3,0,1,0>: Cost 2 vsldoi4 <2,3,0,1>, LHS
+ 2752741467U, // <3,0,1,1>: Cost 3 vsldoi12 LHS, <0,1,1,1>
+ 604815462U, // <3,0,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2631190676U, // <3,0,1,3>: Cost 3 vsldoi4 <2,3,0,1>, <3,0,1,0>
+ 1557450038U, // <3,0,1,4>: Cost 2 vsldoi4 <2,3,0,1>, RHS
+ 2667024388U, // <3,0,1,5>: Cost 3 vsldoi4 <u,3,0,1>, <5,5,5,5>
+ 2800074894U, // <3,0,1,6>: Cost 3 vsldoi12 LHS, <0,1,6,7>
+ 2661053667U, // <3,0,1,7>: Cost 3 vsldoi4 <7,3,0,1>, <7,3,0,1>
+ 604815516U, // <3,0,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696521165U, // <3,0,2,0>: Cost 3 vsldoi8 <2,0,3,0>, <2,0,3,0>
+ 2752741549U, // <3,0,2,1>: Cost 3 vsldoi12 LHS, <0,2,1,2>
+ 2691876456U, // <3,0,2,2>: Cost 3 vsldoi8 <1,2,3,0>, <2,2,2,2>
+ 2691876518U, // <3,0,2,3>: Cost 3 vsldoi8 <1,2,3,0>, <2,3,0,1>
+ 3830685895U, // <3,0,2,4>: Cost 4 vsldoi12 LHS, <0,2,4,1>
+ 3765618536U, // <3,0,2,5>: Cost 4 vsldoi8 <1,2,3,0>, <2,5,3,6>
+ 2691876794U, // <3,0,2,6>: Cost 3 vsldoi8 <1,2,3,0>, <2,6,3,7>
+ 2701166596U, // <3,0,2,7>: Cost 3 vsldoi8 <2,7,3,0>, <2,7,3,0>
+ 2756944108U, // <3,0,2,u>: Cost 3 vsldoi12 LHS, <0,2,u,2>
+ 2691877014U, // <3,0,3,0>: Cost 3 vsldoi8 <1,2,3,0>, <3,0,1,2>
+ 1161003110U, // <3,0,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2691877168U, // <3,0,3,2>: Cost 3 vsldoi8 <1,2,3,0>, <3,2,0,3>
+ 2691877246U, // <3,0,3,3>: Cost 3 vsldoi8 <1,2,3,0>, <3,3,0,0>
+ 2691877378U, // <3,0,3,4>: Cost 3 vsldoi8 <1,2,3,0>, <3,4,5,6>
+ 3765619238U, // <3,0,3,5>: Cost 4 vsldoi8 <1,2,3,0>, <3,5,0,6>
+ 2691877496U, // <3,0,3,6>: Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7>
+ 3368962680U, // <3,0,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,0,7>
+ 1161003677U, // <3,0,3,u>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2289254400U, // <3,0,4,0>: Cost 3 vmrglw <1,2,3,4>, <0,0,0,0>
+ 1678557522U, // <3,0,4,1>: Cost 2 vsldoi12 LHS, <0,4,1,5>
+ 2631214761U, // <3,0,4,2>: Cost 3 vsldoi4 <2,3,0,4>, <2,3,0,4>
+ 2235580672U, // <3,0,4,3>: Cost 3 vmrghw <3,4,5,6>, <0,3,1,4>
+ 2756944237U, // <3,0,4,4>: Cost 3 vsldoi12 LHS, <0,4,4,5>
+ 1618136374U, // <3,0,4,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS
+ 3309322742U, // <3,0,4,6>: Cost 4 vmrghw <3,4,5,6>, <0,6,1,7>
+ 3362998904U, // <3,0,4,7>: Cost 4 vmrglw <1,2,3,4>, <3,6,0,7>
+ 1683202449U, // <3,0,4,u>: Cost 2 vsldoi12 LHS, <0,4,u,5>
+ 3765620296U, // <3,0,5,0>: Cost 4 vsldoi8 <1,2,3,0>, <5,0,1,2>
+ 2752299427U, // <3,0,5,1>: Cost 3 vsldoi12 LHS, <0,5,1,5>
+ 3789508346U, // <3,0,5,2>: Cost 4 vsldoi8 <5,2,3,0>, <5,2,3,0>
+ 3403486842U, // <3,0,5,3>: Cost 4 vmrglw <u,0,3,5>, <7,u,0,3>
+ 3765620660U, // <3,0,5,4>: Cost 4 vsldoi8 <1,2,3,0>, <5,4,5,6>
+ 2733682692U, // <3,0,5,5>: Cost 3 vsldoi8 <u,2,3,0>, <5,5,5,5>
+ 2800075218U, // <3,0,5,6>: Cost 3 vsldoi12 LHS, <0,5,6,7>
+ 3873817044U, // <3,0,5,7>: Cost 4 vsldoi12 LHS, <0,5,7,0>
+ 2800075234U, // <3,0,5,u>: Cost 3 vsldoi12 LHS, <0,5,u,5>
+ 2752299501U, // <3,0,6,0>: Cost 3 vsldoi12 LHS, <0,6,0,7>
+ 2236547174U, // <3,0,6,1>: Cost 3 vmrghw <3,6,0,7>, LHS
+ 2733683194U, // <3,0,6,2>: Cost 3 vsldoi8 <u,2,3,0>, <6,2,7,3>
+ 3844473352U, // <3,0,6,3>: Cost 4 vsldoi12 <3,2,0,3>, <0,6,3,7>
+ 3310289234U, // <3,0,6,4>: Cost 4 vmrghw <3,6,0,7>, <0,4,1,5>
+ 3873817114U, // <3,0,6,5>: Cost 4 vsldoi12 LHS, <0,6,5,7>
+ 2733683512U, // <3,0,6,6>: Cost 3 vsldoi8 <u,2,3,0>, <6,6,6,6>
+ 2725057384U, // <3,0,6,7>: Cost 3 vsldoi8 <6,7,3,0>, <6,7,3,0>
+ 2236547741U, // <3,0,6,u>: Cost 3 vmrghw <3,6,0,7>, LHS
+ 2297905152U, // <3,0,7,0>: Cost 3 vmrglw <2,6,3,7>, <0,0,0,0>
+ 2297906854U, // <3,0,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,1>
+ 2727711916U, // <3,0,7,2>: Cost 3 vsldoi8 <7,2,3,0>, <7,2,3,0>
+ 3371649328U, // <3,0,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,0,3>
+ 2733684070U, // <3,0,7,4>: Cost 3 vsldoi8 <u,2,3,0>, <7,4,5,6>
+ 3734843490U, // <3,0,7,5>: Cost 4 vsldoi4 <7,3,0,7>, <5,6,7,0>
+ 3798799895U, // <3,0,7,6>: Cost 4 vsldoi8 <6,7,3,0>, <7,6,7,3>
+ 2733684332U, // <3,0,7,7>: Cost 3 vsldoi8 <u,2,3,0>, <7,7,7,7>
+ 2297906861U, // <3,0,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,u>
+ 1557504102U, // <3,0,u,0>: Cost 2 vsldoi4 <2,3,0,u>, LHS
+ 1678557842U, // <3,0,u,1>: Cost 2 vsldoi12 LHS, <0,u,1,1>
+ 604816029U, // <3,0,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 2691880892U, // <3,0,u,3>: Cost 3 vsldoi8 <1,2,3,0>, <u,3,0,1>
+ 1557507382U, // <3,0,u,4>: Cost 2 vsldoi4 <2,3,0,u>, RHS
+ 1618139290U, // <3,0,u,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS
+ 2691881168U, // <3,0,u,6>: Cost 3 vsldoi8 <1,2,3,0>, <u,6,3,7>
+ 2661111018U, // <3,0,u,7>: Cost 3 vsldoi4 <7,3,0,u>, <7,3,0,u>
+ 604816083U, // <3,0,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 2619310332U, // <3,1,0,0>: Cost 3 vsldoi4 <0,3,1,0>, <0,3,1,0>
+ 2756944612U, // <3,1,0,1>: Cost 3 vsldoi12 LHS, <1,0,1,2>
+ 2289221724U, // <3,1,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,1,2>
+ 2619312278U, // <3,1,0,3>: Cost 3 vsldoi4 <0,3,1,0>, <3,0,1,2>
+ 2619313462U, // <3,1,0,4>: Cost 3 vsldoi4 <0,3,1,0>, RHS
+ 2289221970U, // <3,1,0,5>: Cost 3 vmrglw <1,2,3,0>, <0,4,1,5>
+ 2232599768U, // <3,1,0,6>: Cost 3 vmrghw <3,0,1,2>, <1,6,2,7>
+ 3362964687U, // <3,1,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,1,7>
+ 2619316014U, // <3,1,0,u>: Cost 3 vsldoi4 <0,3,1,0>, LHS
+ 2756944683U, // <3,1,1,0>: Cost 3 vsldoi12 LHS, <1,1,0,1>
+ 1678558004U, // <3,1,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2691883927U, // <3,1,1,2>: Cost 3 vsldoi8 <1,2,3,1>, <1,2,3,1>
+ 3826631496U, // <3,1,1,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,1,3,3>
+ 2756944723U, // <3,1,1,4>: Cost 3 vsldoi12 LHS, <1,1,4,5>
+ 2756944732U, // <3,1,1,5>: Cost 3 vsldoi12 LHS, <1,1,5,5>
+ 3830686561U, // <3,1,1,6>: Cost 4 vsldoi12 LHS, <1,1,6,1>
+ 3734869228U, // <3,1,1,7>: Cost 4 vsldoi4 <7,3,1,1>, <7,3,1,1>
+ 1678558004U, // <3,1,1,u>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2696529358U, // <3,1,2,0>: Cost 3 vsldoi8 <2,0,3,1>, <2,0,3,1>
+ 2756944775U, // <3,1,2,1>: Cost 3 vsldoi12 LHS, <1,2,1,3>
+ 2294548630U, // <3,1,2,2>: Cost 3 vmrglw <2,1,3,2>, <3,0,1,2>
+ 1678558102U, // <3,1,2,3>: Cost 2 vsldoi12 LHS, <1,2,3,0>
+ 2631273782U, // <3,1,2,4>: Cost 3 vsldoi4 <2,3,1,2>, RHS
+ 2756944811U, // <3,1,2,5>: Cost 3 vsldoi12 LHS, <1,2,5,3>
+ 3830686644U, // <3,1,2,6>: Cost 4 vsldoi12 LHS, <1,2,6,3>
+ 2800075706U, // <3,1,2,7>: Cost 3 vsldoi12 LHS, <1,2,7,0>
+ 1679000515U, // <3,1,2,u>: Cost 2 vsldoi12 LHS, <1,2,u,0>
+ 2619334911U, // <3,1,3,0>: Cost 3 vsldoi4 <0,3,1,3>, <0,3,1,3>
+ 2295218186U, // <3,1,3,1>: Cost 3 vmrglw <2,2,3,3>, <0,0,1,1>
+ 2293229718U, // <3,1,3,2>: Cost 3 vmrglw <1,u,3,3>, <3,0,1,2>
+ 2619337116U, // <3,1,3,3>: Cost 3 vsldoi4 <0,3,1,3>, <3,3,3,3>
+ 2619338038U, // <3,1,3,4>: Cost 3 vsldoi4 <0,3,1,3>, RHS
+ 2295218514U, // <3,1,3,5>: Cost 3 vmrglw <2,2,3,3>, <0,4,1,5>
+ 3830686729U, // <3,1,3,6>: Cost 4 vsldoi12 LHS, <1,3,6,7>
+ 3368961231U, // <3,1,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,1,7>
+ 2619340590U, // <3,1,3,u>: Cost 3 vsldoi4 <0,3,1,3>, LHS
+ 2619343104U, // <3,1,4,0>: Cost 3 vsldoi4 <0,3,1,4>, <0,3,1,4>
+ 2289254410U, // <3,1,4,1>: Cost 3 vmrglw <1,2,3,4>, <0,0,1,1>
+ 2289256598U, // <3,1,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,1,2>
+ 2619345410U, // <3,1,4,3>: Cost 3 vsldoi4 <0,3,1,4>, <3,4,5,6>
+ 2619346230U, // <3,1,4,4>: Cost 3 vsldoi4 <0,3,1,4>, RHS
+ 2756944976U, // <3,1,4,5>: Cost 3 vsldoi12 LHS, <1,4,5,6>
+ 3362996401U, // <3,1,4,6>: Cost 4 vmrglw <1,2,3,4>, <0,2,1,6>
+ 3362997455U, // <3,1,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,1,7>
+ 2619348782U, // <3,1,4,u>: Cost 3 vsldoi4 <0,3,1,4>, LHS
+ 2756945007U, // <3,1,5,0>: Cost 3 vsldoi12 LHS, <1,5,0,1>
+ 3830686840U, // <3,1,5,1>: Cost 4 vsldoi12 LHS, <1,5,1,1>
+ 3358361750U, // <3,1,5,2>: Cost 4 vmrglw <0,4,3,5>, <3,0,1,2>
+ 3830686857U, // <3,1,5,3>: Cost 4 vsldoi12 LHS, <1,5,3,0>
+ 2756945047U, // <3,1,5,4>: Cost 3 vsldoi12 LHS, <1,5,4,5>
+ 2294571346U, // <3,1,5,5>: Cost 3 vmrglw <2,1,3,5>, <0,4,1,5>
+ 3806105698U, // <3,1,5,6>: Cost 4 vsldoi8 <u,0,3,1>, <5,6,7,0>
+ 3873817774U, // <3,1,5,7>: Cost 4 vsldoi12 LHS, <1,5,7,1>
+ 2756945079U, // <3,1,5,u>: Cost 3 vsldoi12 LHS, <1,5,u,1>
+ 3830686912U, // <3,1,6,0>: Cost 4 vsldoi12 LHS, <1,6,0,1>
+ 2756945103U, // <3,1,6,1>: Cost 3 vsldoi12 LHS, <1,6,1,7>
+ 2236547990U, // <3,1,6,2>: Cost 3 vmrghw <3,6,0,7>, <1,2,3,0>
+ 3826631905U, // <3,1,6,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,6,3,7>
+ 3830686952U, // <3,1,6,4>: Cost 4 vsldoi12 LHS, <1,6,4,5>
+ 2756945139U, // <3,1,6,5>: Cost 3 vsldoi12 LHS, <1,6,5,7>
+ 3830686972U, // <3,1,6,6>: Cost 4 vsldoi12 LHS, <1,6,6,7>
+ 2800076030U, // <3,1,6,7>: Cost 3 vsldoi12 LHS, <1,6,7,0>
+ 2756945166U, // <3,1,6,u>: Cost 3 vsldoi12 LHS, <1,6,u,7>
+ 3699081318U, // <3,1,7,0>: Cost 4 vsldoi4 <1,3,1,7>, LHS
+ 2297905162U, // <3,1,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,1>
+ 2297907350U, // <3,1,7,2>: Cost 3 vmrglw <2,6,3,7>, <3,0,1,2>
+ 3365675182U, // <3,1,7,3>: Cost 4 vmrglw <1,6,3,7>, <0,2,1,3>
+ 3699084598U, // <3,1,7,4>: Cost 4 vsldoi4 <1,3,1,7>, RHS
+ 2297905490U, // <3,1,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,1,5>
+ 2297905329U, // <3,1,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6>
+ 3368330447U, // <3,1,7,7>: Cost 4 vmrglw <2,1,3,7>, <1,6,1,7>
+ 2297905169U, // <3,1,7,u>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,u>
+ 2619375876U, // <3,1,u,0>: Cost 3 vsldoi4 <0,3,1,u>, <0,3,1,u>
+ 1678558004U, // <3,1,u,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2289289366U, // <3,1,u,2>: Cost 3 vmrglw <1,2,3,u>, <3,0,1,2>
+ 1679000956U, // <3,1,u,3>: Cost 2 vsldoi12 LHS, <1,u,3,0>
+ 2619378998U, // <3,1,u,4>: Cost 3 vsldoi4 <0,3,1,u>, RHS
+ 2756945297U, // <3,1,u,5>: Cost 3 vsldoi12 LHS, <1,u,5,3>
+ 2297905329U, // <3,1,u,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6>
+ 2800076192U, // <3,1,u,7>: Cost 3 vsldoi12 LHS, <1,u,7,0>
+ 1683203497U, // <3,1,u,u>: Cost 2 vsldoi12 LHS, <1,u,u,0>
+ 3362964203U, // <3,2,0,0>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,0>
+ 2289222380U, // <3,2,0,1>: Cost 3 vmrglw <1,2,3,0>, <1,0,2,1>
+ 2289222462U, // <3,2,0,2>: Cost 3 vmrglw <1,2,3,0>, <1,1,2,2>
+ 1215479910U, // <3,2,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 3362964207U, // <3,2,0,4>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,4>
+ 2289222708U, // <3,2,0,5>: Cost 3 vmrglw <1,2,3,0>, <1,4,2,5>
+ 2232600506U, // <3,2,0,6>: Cost 3 vmrghw <3,0,1,2>, <2,6,3,7>
+ 3396142296U, // <3,2,0,7>: Cost 4 vmrglw <6,7,3,0>, <1,6,2,7>
+ 1215479915U, // <3,2,0,u>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 3699105894U, // <3,2,1,0>: Cost 4 vsldoi4 <1,3,2,1>, LHS
+ 3765633844U, // <3,2,1,1>: Cost 4 vsldoi8 <1,2,3,2>, <1,1,1,1>
+ 2691892120U, // <3,2,1,2>: Cost 3 vsldoi8 <1,2,3,2>, <1,2,3,2>
+ 2752300575U, // <3,2,1,3>: Cost 3 vsldoi12 LHS, <2,1,3,1>
+ 3699109174U, // <3,2,1,4>: Cost 4 vsldoi4 <1,3,2,1>, RHS
+ 3830687280U, // <3,2,1,5>: Cost 5 vsldoi12 LHS, <2,1,5,0>
+ 3830687289U, // <3,2,1,6>: Cost 4 vsldoi12 LHS, <2,1,6,0>
+ 3874260548U, // <3,2,1,7>: Cost 4 vsldoi12 LHS, <2,1,7,2>
+ 2752742988U, // <3,2,1,u>: Cost 3 vsldoi12 LHS, <2,1,u,1>
+ 2631344230U, // <3,2,2,0>: Cost 3 vsldoi4 <2,3,2,2>, LHS
+ 2697201184U, // <3,2,2,1>: Cost 3 vsldoi8 <2,1,3,2>, <2,1,3,2>
+ 1678558824U, // <3,2,2,2>: Cost 2 vsldoi12 LHS, <2,2,2,2>
+ 1678558834U, // <3,2,2,3>: Cost 2 vsldoi12 LHS, <2,2,3,3>
+ 2631347510U, // <3,2,2,4>: Cost 3 vsldoi4 <2,3,2,2>, RHS
+ 3368953613U, // <3,2,2,5>: Cost 4 vmrglw <2,2,3,2>, <2,4,2,5>
+ 2234304442U, // <3,2,2,6>: Cost 3 vmrghw <3,2,6,3>, <2,6,3,7>
+ 3368953777U, // <3,2,2,7>: Cost 4 vmrglw <2,2,3,2>, <2,6,2,7>
+ 1679001247U, // <3,2,2,u>: Cost 2 vsldoi12 LHS, <2,2,u,3>
+ 1678558886U, // <3,2,3,0>: Cost 2 vsldoi12 LHS, <2,3,0,1>
+ 2752300719U, // <3,2,3,1>: Cost 3 vsldoi12 LHS, <2,3,1,1>
+ 2752300729U, // <3,2,3,2>: Cost 3 vsldoi12 LHS, <2,3,2,2>
+ 1221476454U, // <3,2,3,3>: Cost 2 vmrglw <2,2,3,3>, LHS
+ 1678558926U, // <3,2,3,4>: Cost 2 vsldoi12 LHS, <2,3,4,5>
+ 2800076503U, // <3,2,3,5>: Cost 3 vsldoi12 LHS, <2,3,5,5>
+ 2234746810U, // <3,2,3,6>: Cost 3 vmrghw <3,3,3,3>, <2,6,3,7>
+ 2800076516U, // <3,2,3,7>: Cost 3 vsldoi12 LHS, <2,3,7,0>
+ 1678558958U, // <3,2,3,u>: Cost 2 vsldoi12 LHS, <2,3,u,1>
+ 3699130470U, // <3,2,4,0>: Cost 4 vsldoi4 <1,3,2,4>, LHS
+ 3362996972U, // <3,2,4,1>: Cost 4 vmrglw <1,2,3,4>, <1,0,2,1>
+ 2289256040U, // <3,2,4,2>: Cost 3 vmrglw <1,2,3,4>, <2,2,2,2>
+ 1215512678U, // <3,2,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 3362998676U, // <3,2,4,4>: Cost 4 vmrglw <1,2,3,4>, <3,3,2,4>
+ 2691894582U, // <3,2,4,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS
+ 2235582394U, // <3,2,4,6>: Cost 3 vmrghw <3,4,5,6>, <2,6,3,7>
+ 3734967544U, // <3,2,4,7>: Cost 4 vsldoi4 <7,3,2,4>, <7,3,2,4>
+ 1215512683U, // <3,2,4,u>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 3705110630U, // <3,2,5,0>: Cost 4 vsldoi4 <2,3,2,5>, LHS
+ 3368313985U, // <3,2,5,1>: Cost 4 vmrglw <2,1,3,5>, <1,5,2,1>
+ 3368314472U, // <3,2,5,2>: Cost 4 vmrglw <2,1,3,5>, <2,2,2,2>
+ 2756945768U, // <3,2,5,3>: Cost 3 vsldoi12 LHS, <2,5,3,6>
+ 3705113910U, // <3,2,5,4>: Cost 4 vsldoi4 <2,3,2,5>, RHS
+ 3310061416U, // <3,2,5,5>: Cost 4 vmrghw <3,5,6,6>, <2,5,3,6>
+ 3310135226U, // <3,2,5,6>: Cost 4 vmrghw <3,5,7,6>, <2,6,3,7>
+ 3370305457U, // <3,2,5,7>: Cost 5 vmrglw <2,4,3,5>, <2,6,2,7>
+ 2752743317U, // <3,2,5,u>: Cost 3 vsldoi12 LHS, <2,5,u,6>
+ 2631376998U, // <3,2,6,0>: Cost 3 vsldoi4 <2,3,2,6>, LHS
+ 3705119540U, // <3,2,6,1>: Cost 4 vsldoi4 <2,3,2,6>, <1,1,1,1>
+ 2631378621U, // <3,2,6,2>: Cost 3 vsldoi4 <2,3,2,6>, <2,3,2,6>
+ 1678559162U, // <3,2,6,3>: Cost 2 vsldoi12 LHS, <2,6,3,7>
+ 2631380278U, // <3,2,6,4>: Cost 3 vsldoi4 <2,3,2,6>, RHS
+ 3370976956U, // <3,2,6,5>: Cost 4 vmrglw <2,5,3,6>, <2,3,2,5>
+ 2237065146U, // <3,2,6,6>: Cost 3 vmrghw <3,6,7,7>, <2,6,3,7>
+ 3798815594U, // <3,2,6,7>: Cost 4 vsldoi8 <6,7,3,2>, <6,7,3,2>
+ 1679001575U, // <3,2,6,u>: Cost 2 vsldoi12 LHS, <2,6,u,7>
+ 2800076778U, // <3,2,7,0>: Cost 3 vsldoi12 LHS, <2,7,0,1>
+ 3371647724U, // <3,2,7,1>: Cost 4 vmrglw <2,6,3,7>, <1,0,2,1>
+ 2297906792U, // <3,2,7,2>: Cost 3 vmrglw <2,6,3,7>, <2,2,2,2>
+ 1224163430U, // <3,2,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 3705130294U, // <3,2,7,4>: Cost 4 vsldoi4 <2,3,2,7>, RHS
+ 3371648052U, // <3,2,7,5>: Cost 4 vmrglw <2,6,3,7>, <1,4,2,5>
+ 2297906877U, // <3,2,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,3,2,6>
+ 3371648702U, // <3,2,7,7>: Cost 4 vmrglw <2,6,3,7>, <2,3,2,7>
+ 1224163435U, // <3,2,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1679001659U, // <3,2,u,0>: Cost 2 vsldoi12 LHS, <2,u,0,1>
+ 2752743492U, // <3,2,u,1>: Cost 3 vsldoi12 LHS, <2,u,1,1>
+ 1678558824U, // <3,2,u,2>: Cost 2 vsldoi12 LHS, <2,2,2,2>
+ 1678559320U, // <3,2,u,3>: Cost 2 vsldoi12 LHS, <2,u,3,3>
+ 1679001699U, // <3,2,u,4>: Cost 2 vsldoi12 LHS, <2,u,4,5>
+ 2691897498U, // <3,2,u,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS
+ 2237908922U, // <3,2,u,6>: Cost 3 vmrghw <3,u,1,2>, <2,6,3,7>
+ 2800519289U, // <3,2,u,7>: Cost 3 vsldoi12 LHS, <2,u,7,0>
+ 1679001731U, // <3,2,u,u>: Cost 2 vsldoi12 LHS, <2,u,u,1>
+ 1215480726U, // <3,3,0,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0>
+ 1678559382U, // <3,3,0,1>: Cost 2 vsldoi12 LHS, <3,0,1,2>
+ 2631403200U, // <3,3,0,2>: Cost 3 vsldoi4 <2,3,3,0>, <2,3,3,0>
+ 2289223282U, // <3,3,0,3>: Cost 3 vmrglw <1,2,3,0>, <2,2,3,3>
+ 2752301232U, // <3,3,0,4>: Cost 3 vsldoi12 LHS, <3,0,4,1>
+ 3362965027U, // <3,3,0,5>: Cost 4 vmrglw <1,2,3,0>, <2,1,3,5>
+ 3362965352U, // <3,3,0,6>: Cost 4 vmrglw <1,2,3,0>, <2,5,3,6>
+ 2289223610U, // <3,3,0,7>: Cost 3 vmrglw <1,2,3,0>, <2,6,3,7>
+ 1678559445U, // <3,3,0,u>: Cost 2 vsldoi12 LHS, <3,0,u,2>
+ 3830687964U, // <3,3,1,0>: Cost 4 vsldoi12 LHS, <3,1,0,0>
+ 2752301286U, // <3,3,1,1>: Cost 3 vsldoi12 LHS, <3,1,1,1>
+ 2752301297U, // <3,3,1,2>: Cost 3 vsldoi12 LHS, <3,1,2,3>
+ 2305157532U, // <3,3,1,3>: Cost 3 vmrglw <3,u,3,1>, <3,3,3,3>
+ 3830688000U, // <3,3,1,4>: Cost 4 vsldoi12 LHS, <3,1,4,0>
+ 3830688009U, // <3,3,1,5>: Cost 4 vsldoi12 LHS, <3,1,5,0>
+ 3830688019U, // <3,3,1,6>: Cost 4 vsldoi12 LHS, <3,1,6,1>
+ 3362973626U, // <3,3,1,7>: Cost 4 vmrglw <1,2,3,1>, <2,6,3,7>
+ 2752743719U, // <3,3,1,u>: Cost 3 vsldoi12 LHS, <3,1,u,3>
+ 2631417958U, // <3,3,2,0>: Cost 3 vsldoi4 <2,3,3,2>, LHS
+ 3826043193U, // <3,3,2,1>: Cost 4 vsldoi12 LHS, <3,2,1,3>
+ 1624131186U, // <3,3,2,2>: Cost 2 vsldoi8 <2,2,3,3>, <2,2,3,3>
+ 2752301384U, // <3,3,2,3>: Cost 3 vsldoi12 LHS, <3,2,3,0>
+ 2631421238U, // <3,3,2,4>: Cost 3 vsldoi4 <2,3,3,2>, RHS
+ 3826485602U, // <3,3,2,5>: Cost 4 vsldoi12 LHS, <3,2,5,u>
+ 2752301414U, // <3,3,2,6>: Cost 3 vsldoi12 LHS, <3,2,6,3>
+ 2771249519U, // <3,3,2,7>: Cost 3 vsldoi12 <3,2,7,3>, <3,2,7,3>
+ 1628112984U, // <3,3,2,u>: Cost 2 vsldoi8 <2,u,3,3>, <2,u,3,3>
+ 1563656294U, // <3,3,3,0>: Cost 2 vsldoi4 <3,3,3,3>, LHS
+ 2301855911U, // <3,3,3,1>: Cost 3 vmrglw <3,3,3,3>, <3,0,3,1>
+ 2697873730U, // <3,3,3,2>: Cost 3 vsldoi8 <2,2,3,3>, <3,2,2,3>
+ 403488870U, // <3,3,3,3>: Cost 1 vspltisw3 LHS
+ 1563659574U, // <3,3,3,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS
+ 2301856239U, // <3,3,3,5>: Cost 3 vmrglw <3,3,3,3>, <3,4,3,5>
+ 2697874067U, // <3,3,3,6>: Cost 3 vsldoi8 <2,2,3,3>, <3,6,3,7>
+ 2295220154U, // <3,3,3,7>: Cost 3 vmrglw <2,2,3,3>, <2,6,3,7>
+ 403488870U, // <3,3,3,u>: Cost 1 vspltisw3 LHS
+ 2289255318U, // <3,3,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,3,0>
+ 2631435162U, // <3,3,4,1>: Cost 3 vsldoi4 <2,3,3,4>, <1,2,3,4>
+ 2631435972U, // <3,3,4,2>: Cost 3 vsldoi4 <2,3,3,4>, <2,3,3,4>
+ 2289256050U, // <3,3,4,3>: Cost 3 vmrglw <1,2,3,4>, <2,2,3,3>
+ 1215513498U, // <3,3,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4>
+ 1679002114U, // <3,3,4,5>: Cost 2 vsldoi12 LHS, <3,4,5,6>
+ 3362998120U, // <3,3,4,6>: Cost 4 vmrglw <1,2,3,4>, <2,5,3,6>
+ 2289256378U, // <3,3,4,7>: Cost 3 vmrglw <1,2,3,4>, <2,6,3,7>
+ 1679002141U, // <3,3,4,u>: Cost 2 vsldoi12 LHS, <3,4,u,6>
+ 3831130657U, // <3,3,5,0>: Cost 4 vsldoi12 LHS, <3,5,0,1>
+ 3376277671U, // <3,3,5,1>: Cost 4 vmrglw <3,4,3,5>, <3,0,3,1>
+ 3771617012U, // <3,3,5,2>: Cost 4 vsldoi8 <2,2,3,3>, <5,2,2,3>
+ 2302536092U, // <3,3,5,3>: Cost 3 vmrglw <3,4,3,5>, <3,3,3,3>
+ 3831130697U, // <3,3,5,4>: Cost 4 vsldoi12 LHS, <3,5,4,5>
+ 2294572579U, // <3,3,5,5>: Cost 3 vmrglw <2,1,3,5>, <2,1,3,5>
+ 2800519773U, // <3,3,5,6>: Cost 3 vsldoi12 LHS, <3,5,6,7>
+ 3368314810U, // <3,3,5,7>: Cost 4 vmrglw <2,1,3,5>, <2,6,3,7>
+ 2800519791U, // <3,3,5,u>: Cost 3 vsldoi12 LHS, <3,5,u,7>
+ 2800077432U, // <3,3,6,0>: Cost 3 vsldoi12 LHS, <3,6,0,7>
+ 3310291185U, // <3,3,6,1>: Cost 4 vmrghw <3,6,0,7>, <3,1,2,3>
+ 2789165706U, // <3,3,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <3,6,2,7>
+ 2764982931U, // <3,3,6,3>: Cost 3 vsldoi12 <2,2,3,3>, <3,6,3,7>
+ 2800077468U, // <3,3,6,4>: Cost 3 vsldoi12 LHS, <3,6,4,7>
+ 3873819301U, // <3,3,6,5>: Cost 4 vsldoi12 LHS, <3,6,5,7>
+ 2297235304U, // <3,3,6,6>: Cost 3 vmrglw <2,5,3,6>, <2,5,3,6>
+ 2725081963U, // <3,3,6,7>: Cost 3 vsldoi8 <6,7,3,3>, <6,7,3,3>
+ 2725745596U, // <3,3,6,u>: Cost 3 vsldoi8 <6,u,3,3>, <6,u,3,3>
+ 2631458918U, // <3,3,7,0>: Cost 3 vsldoi4 <2,3,3,7>, LHS
+ 3705201460U, // <3,3,7,1>: Cost 4 vsldoi4 <2,3,3,7>, <1,1,1,1>
+ 2631460551U, // <3,3,7,2>: Cost 3 vsldoi4 <2,3,3,7>, <2,3,3,7>
+ 2297906802U, // <3,3,7,3>: Cost 3 vmrglw <2,6,3,7>, <2,2,3,3>
+ 2631462198U, // <3,3,7,4>: Cost 3 vsldoi4 <2,3,3,7>, RHS
+ 3371648547U, // <3,3,7,5>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,5>
+ 3371648548U, // <3,3,7,6>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,6>
+ 1224165306U, // <3,3,7,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 1224165306U, // <3,3,7,u>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 1215480726U, // <3,3,u,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0>
+ 1679002398U, // <3,3,u,1>: Cost 2 vsldoi12 LHS, <3,u,1,2>
+ 1659967368U, // <3,3,u,2>: Cost 2 vsldoi8 <u,2,3,3>, <u,2,3,3>
+ 403488870U, // <3,3,u,3>: Cost 1 vspltisw3 LHS
+ 1563659574U, // <3,3,u,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS
+ 1679002438U, // <3,3,u,5>: Cost 2 vsldoi12 LHS, <3,u,5,6>
+ 2756946764U, // <3,3,u,6>: Cost 3 vsldoi12 LHS, <3,u,6,3>
+ 1224165306U, // <3,3,u,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 403488870U, // <3,3,u,u>: Cost 1 vspltisw3 LHS
+ 2691907584U, // <3,4,0,0>: Cost 3 vsldoi8 <1,2,3,4>, <0,0,0,0>
+ 1618165862U, // <3,4,0,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 2631476937U, // <3,4,0,2>: Cost 3 vsldoi4 <2,3,4,0>, <2,3,4,0>
+ 2232601732U, // <3,4,0,3>: Cost 3 vmrghw <3,0,1,2>, <4,3,5,0>
+ 2691907922U, // <3,4,0,4>: Cost 3 vsldoi8 <1,2,3,4>, <0,4,1,5>
+ 1158860086U, // <3,4,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS
+ 3306343806U, // <3,4,0,6>: Cost 4 vmrghw <3,0,1,2>, <4,6,5,7>
+ 3366947484U, // <3,4,0,7>: Cost 4 vmrglw <1,u,3,0>, <3,6,4,7>
+ 1618166429U, // <3,4,0,u>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 2631483494U, // <3,4,1,0>: Cost 3 vsldoi4 <2,3,4,1>, LHS
+ 2691908404U, // <3,4,1,1>: Cost 3 vsldoi8 <1,2,3,4>, <1,1,1,1>
+ 1618166682U, // <3,4,1,2>: Cost 2 vsldoi8 <1,2,3,4>, <1,2,3,4>
+ 3765650393U, // <3,4,1,3>: Cost 4 vsldoi8 <1,2,3,4>, <1,3,1,4>
+ 2631486774U, // <3,4,1,4>: Cost 3 vsldoi4 <2,3,4,1>, RHS
+ 2756946914U, // <3,4,1,5>: Cost 3 vsldoi12 LHS, <4,1,5,0>
+ 3765650639U, // <3,4,1,6>: Cost 4 vsldoi8 <1,2,3,4>, <1,6,1,7>
+ 3735090439U, // <3,4,1,7>: Cost 4 vsldoi4 <7,3,4,1>, <7,3,4,1>
+ 1622148480U, // <3,4,1,u>: Cost 2 vsldoi8 <1,u,3,4>, <1,u,3,4>
+ 3765650893U, // <3,4,2,0>: Cost 4 vsldoi8 <1,2,3,4>, <2,0,3,0>
+ 3831131154U, // <3,4,2,1>: Cost 4 vsldoi12 LHS, <4,2,1,3>
+ 2691909224U, // <3,4,2,2>: Cost 3 vsldoi8 <1,2,3,4>, <2,2,2,2>
+ 2691909286U, // <3,4,2,3>: Cost 3 vsldoi8 <1,2,3,4>, <2,3,0,1>
+ 2699208469U, // <3,4,2,4>: Cost 3 vsldoi8 <2,4,3,4>, <2,4,3,4>
+ 2233863478U, // <3,4,2,5>: Cost 3 vmrghw <3,2,0,3>, RHS
+ 2691909562U, // <3,4,2,6>: Cost 3 vsldoi8 <1,2,3,4>, <2,6,3,7>
+ 2701199368U, // <3,4,2,7>: Cost 3 vsldoi8 <2,7,3,4>, <2,7,3,4>
+ 2691909691U, // <3,4,2,u>: Cost 3 vsldoi8 <1,2,3,4>, <2,u,0,1>
+ 2691909782U, // <3,4,3,0>: Cost 3 vsldoi8 <1,2,3,4>, <3,0,1,2>
+ 3765651686U, // <3,4,3,1>: Cost 4 vsldoi8 <1,2,3,4>, <3,1,1,1>
+ 2691909972U, // <3,4,3,2>: Cost 3 vsldoi8 <1,2,3,4>, <3,2,4,3>
+ 2691910044U, // <3,4,3,3>: Cost 3 vsldoi8 <1,2,3,4>, <3,3,3,3>
+ 2691910096U, // <3,4,3,4>: Cost 3 vsldoi8 <1,2,3,4>, <3,4,0,1>
+ 1161006390U, // <3,4,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2691910300U, // <3,4,3,6>: Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7>
+ 3368962716U, // <3,4,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,4,7>
+ 1161006633U, // <3,4,3,u>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2631508070U, // <3,4,4,0>: Cost 3 vsldoi4 <2,3,4,4>, LHS
+ 2631508890U, // <3,4,4,1>: Cost 3 vsldoi4 <2,3,4,4>, <1,2,3,4>
+ 2631509709U, // <3,4,4,2>: Cost 3 vsldoi4 <2,3,4,4>, <2,3,4,4>
+ 2289256788U, // <3,4,4,3>: Cost 3 vmrglw <1,2,3,4>, <3,2,4,3>
+ 1726336208U, // <3,4,4,4>: Cost 2 vsldoi12 LHS, <4,4,4,4>
+ 1618169142U, // <3,4,4,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 3362998858U, // <3,4,4,6>: Cost 4 vmrglw <1,2,3,4>, <3,5,4,6>
+ 2289257116U, // <3,4,4,7>: Cost 3 vmrglw <1,2,3,4>, <3,6,4,7>
+ 1618169385U, // <3,4,4,u>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 1557774438U, // <3,4,5,0>: Cost 2 vsldoi4 <2,3,4,5>, LHS
+ 2631516980U, // <3,4,5,1>: Cost 3 vsldoi4 <2,3,4,5>, <1,1,1,1>
+ 1557776078U, // <3,4,5,2>: Cost 2 vsldoi4 <2,3,4,5>, <2,3,4,5>
+ 2631518358U, // <3,4,5,3>: Cost 3 vsldoi4 <2,3,4,5>, <3,0,1,2>
+ 1557777718U, // <3,4,5,4>: Cost 2 vsldoi4 <2,3,4,5>, RHS
+ 2296563406U, // <3,4,5,5>: Cost 3 vmrglw <2,4,3,5>, <2,3,4,5>
+ 604818742U, // <3,4,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2661381387U, // <3,4,5,7>: Cost 3 vsldoi4 <7,3,4,5>, <7,3,4,5>
+ 604818760U, // <3,4,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 3705266278U, // <3,4,6,0>: Cost 4 vsldoi4 <2,3,4,6>, LHS
+ 3831131482U, // <3,4,6,1>: Cost 4 vsldoi12 LHS, <4,6,1,7>
+ 2733715962U, // <3,4,6,2>: Cost 3 vsldoi8 <u,2,3,4>, <6,2,7,3>
+ 3844771180U, // <3,4,6,3>: Cost 4 vsldoi12 <3,2,4,3>, <4,6,3,7>
+ 2800078197U, // <3,4,6,4>: Cost 3 vsldoi12 LHS, <4,6,4,7>
+ 2236550454U, // <3,4,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 2733716280U, // <3,4,6,6>: Cost 3 vsldoi8 <u,2,3,4>, <6,6,6,6>
+ 2725090156U, // <3,4,6,7>: Cost 3 vsldoi8 <6,7,3,4>, <6,7,3,4>
+ 2236550697U, // <3,4,6,u>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 2733716474U, // <3,4,7,0>: Cost 3 vsldoi8 <u,2,3,4>, <7,0,1,2>
+ 3371647013U, // <3,4,7,1>: Cost 4 vmrglw <2,6,3,7>, <0,0,4,1>
+ 2727744688U, // <3,4,7,2>: Cost 3 vsldoi8 <7,2,3,4>, <7,2,3,4>
+ 3371649364U, // <3,4,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,4,3>
+ 2733716838U, // <3,4,7,4>: Cost 3 vsldoi8 <u,2,3,4>, <7,4,5,6>
+ 2297906894U, // <3,4,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,5>
+ 3371647180U, // <3,4,7,6>: Cost 4 vmrglw <2,6,3,7>, <0,2,4,6>
+ 2733717100U, // <3,4,7,7>: Cost 3 vsldoi8 <u,2,3,4>, <7,7,7,7>
+ 2297906897U, // <3,4,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,u>
+ 1557799014U, // <3,4,u,0>: Cost 2 vsldoi4 <2,3,4,u>, LHS
+ 1618171694U, // <3,4,u,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 1557800657U, // <3,4,u,2>: Cost 2 vsldoi4 <2,3,4,u>, <2,3,4,u>
+ 2691913660U, // <3,4,u,3>: Cost 3 vsldoi8 <1,2,3,4>, <u,3,0,1>
+ 1557802294U, // <3,4,u,4>: Cost 2 vsldoi4 <2,3,4,u>, RHS
+ 1618172058U, // <3,4,u,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 604818985U, // <3,4,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 2661405966U, // <3,4,u,7>: Cost 3 vsldoi4 <7,3,4,u>, <7,3,4,u>
+ 604819003U, // <3,4,u,u>: Cost 1 vsldoi12 LHS, RHS
+ 2643492966U, // <3,5,0,0>: Cost 3 vsldoi4 <4,3,5,0>, LHS
+ 2756947528U, // <3,5,0,1>: Cost 3 vsldoi12 LHS, <5,0,1,2>
+ 2331029019U, // <3,5,0,2>: Cost 3 vmrglw <u,2,3,0>, <4,u,5,2>
+ 2643495062U, // <3,5,0,3>: Cost 3 vsldoi4 <4,3,5,0>, <3,0,1,2>
+ 2756947554U, // <3,5,0,4>: Cost 3 vsldoi12 LHS, <5,0,4,1>
+ 2800078443U, // <3,5,0,5>: Cost 3 vsldoi12 LHS, <5,0,5,1>
+ 2289224194U, // <3,5,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,5,6>
+ 3362964723U, // <3,5,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,5,7>
+ 2756947590U, // <3,5,0,u>: Cost 3 vsldoi12 LHS, <5,0,u,1>
+ 2800078479U, // <3,5,1,0>: Cost 3 vsldoi12 LHS, <5,1,0,1>
+ 2333027218U, // <3,5,1,1>: Cost 3 vmrglw <u,5,3,1>, <4,0,5,1>
+ 2691916699U, // <3,5,1,2>: Cost 3 vsldoi8 <1,2,3,5>, <1,2,3,5>
+ 3832901294U, // <3,5,1,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,1,3,5>
+ 2800078519U, // <3,5,1,4>: Cost 3 vsldoi12 LHS, <5,1,4,5>
+ 3830689467U, // <3,5,1,5>: Cost 4 vsldoi12 LHS, <5,1,5,0>
+ 3830689481U, // <3,5,1,6>: Cost 4 vsldoi12 LHS, <5,1,6,5>
+ 3873820365U, // <3,5,1,7>: Cost 4 vsldoi12 LHS, <5,1,7,0>
+ 2800078551U, // <3,5,1,u>: Cost 3 vsldoi12 LHS, <5,1,u,1>
+ 3770967487U, // <3,5,2,0>: Cost 4 vsldoi8 <2,1,3,5>, <2,0,1,4>
+ 2697225763U, // <3,5,2,1>: Cost 3 vsldoi8 <2,1,3,5>, <2,1,3,5>
+ 3830689523U, // <3,5,2,2>: Cost 4 vsldoi12 LHS, <5,2,2,2>
+ 2699216590U, // <3,5,2,3>: Cost 3 vsldoi8 <2,4,3,5>, <2,3,4,5>
+ 2699216662U, // <3,5,2,4>: Cost 3 vsldoi8 <2,4,3,5>, <2,4,3,5>
+ 2783047439U, // <3,5,2,5>: Cost 3 vsldoi12 <5,2,5,3>, <5,2,5,3>
+ 2783121176U, // <3,5,2,6>: Cost 3 vsldoi12 <5,2,6,3>, <5,2,6,3>
+ 3856936737U, // <3,5,2,7>: Cost 4 vsldoi12 <5,2,7,3>, <5,2,7,3>
+ 2701871194U, // <3,5,2,u>: Cost 3 vsldoi8 <2,u,3,5>, <2,u,3,5>
+ 2643517542U, // <3,5,3,0>: Cost 3 vsldoi4 <4,3,5,3>, LHS
+ 2331052946U, // <3,5,3,1>: Cost 3 vmrglw <u,2,3,3>, <4,0,5,1>
+ 3699345010U, // <3,5,3,2>: Cost 4 vsldoi4 <1,3,5,3>, <2,2,3,3>
+ 2705189276U, // <3,5,3,3>: Cost 3 vsldoi8 <3,4,3,5>, <3,3,3,3>
+ 2705189359U, // <3,5,3,4>: Cost 3 vsldoi8 <3,4,3,5>, <3,4,3,5>
+ 2331053274U, // <3,5,3,5>: Cost 3 vmrglw <u,2,3,3>, <4,4,5,5>
+ 2295220738U, // <3,5,3,6>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,6>
+ 3368961267U, // <3,5,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,5,7>
+ 2295220740U, // <3,5,3,u>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,u>
+ 2643525734U, // <3,5,4,0>: Cost 3 vsldoi4 <4,3,5,4>, LHS
+ 2331061138U, // <3,5,4,1>: Cost 3 vmrglw <u,2,3,4>, <4,0,5,1>
+ 2235584280U, // <3,5,4,2>: Cost 3 vmrghw <3,4,5,6>, <5,2,6,3>
+ 2643528194U, // <3,5,4,3>: Cost 3 vsldoi4 <4,3,5,4>, <3,4,5,6>
+ 2735713498U, // <3,5,4,4>: Cost 3 vsldoi8 <u,5,3,5>, <4,4,5,5>
+ 2756947892U, // <3,5,4,5>: Cost 3 vsldoi12 LHS, <5,4,5,6>
+ 2289256962U, // <3,5,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,5,6>
+ 3362997491U, // <3,5,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,5,7>
+ 2756947919U, // <3,5,4,u>: Cost 3 vsldoi12 LHS, <5,4,u,6>
+ 2800078803U, // <3,5,5,0>: Cost 3 vsldoi12 LHS, <5,5,0,1>
+ 2800078812U, // <3,5,5,1>: Cost 3 vsldoi12 LHS, <5,5,1,1>
+ 2631591639U, // <3,5,5,2>: Cost 3 vsldoi4 <2,3,5,5>, <2,3,5,5>
+ 3832901616U, // <3,5,5,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,5,3,3>
+ 2800078843U, // <3,5,5,4>: Cost 3 vsldoi12 LHS, <5,5,4,5>
+ 1726337028U, // <3,5,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2800078862U, // <3,5,5,6>: Cost 3 vsldoi12 LHS, <5,5,6,6>
+ 3368314099U, // <3,5,5,7>: Cost 4 vmrglw <2,1,3,5>, <1,6,5,7>
+ 1726337028U, // <3,5,5,u>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2800078884U, // <3,5,6,0>: Cost 3 vsldoi12 LHS, <5,6,0,1>
+ 2800078899U, // <3,5,6,1>: Cost 3 vsldoi12 LHS, <5,6,1,7>
+ 2631599832U, // <3,5,6,2>: Cost 3 vsldoi4 <2,3,5,6>, <2,3,5,6>
+ 2800078914U, // <3,5,6,3>: Cost 3 vsldoi12 LHS, <5,6,3,4>
+ 2800078924U, // <3,5,6,4>: Cost 3 vsldoi12 LHS, <5,6,4,5>
+ 2800078935U, // <3,5,6,5>: Cost 3 vsldoi12 LHS, <5,6,5,7>
+ 2297235970U, // <3,5,6,6>: Cost 3 vmrglw <2,5,3,6>, <3,4,5,6>
+ 1726337122U, // <3,5,6,7>: Cost 2 vsldoi12 LHS, <5,6,7,0>
+ 1726337131U, // <3,5,6,u>: Cost 2 vsldoi12 LHS, <5,6,u,0>
+ 3699376230U, // <3,5,7,0>: Cost 4 vsldoi4 <1,3,5,7>, LHS
+ 2333739922U, // <3,5,7,1>: Cost 3 vmrglw <u,6,3,7>, <4,0,5,1>
+ 3699378106U, // <3,5,7,2>: Cost 4 vsldoi4 <1,3,5,7>, <2,6,3,7>
+ 3371647915U, // <3,5,7,3>: Cost 4 vmrglw <2,6,3,7>, <1,2,5,3>
+ 3699379510U, // <3,5,7,4>: Cost 4 vsldoi4 <1,3,5,7>, RHS
+ 2333740250U, // <3,5,7,5>: Cost 3 vmrglw <u,6,3,7>, <4,4,5,5>
+ 2297907714U, // <3,5,7,6>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,6>
+ 3370984691U, // <3,5,7,7>: Cost 4 vmrglw <2,5,3,7>, <1,6,5,7>
+ 2297907716U, // <3,5,7,u>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,u>
+ 2800079046U, // <3,5,u,0>: Cost 3 vsldoi12 LHS, <5,u,0,1>
+ 2756948176U, // <3,5,u,1>: Cost 3 vsldoi12 LHS, <5,u,1,2>
+ 2331029019U, // <3,5,u,2>: Cost 3 vmrglw <u,2,3,0>, <4,u,5,2>
+ 2800079076U, // <3,5,u,3>: Cost 3 vsldoi12 LHS, <5,u,3,4>
+ 2800079085U, // <3,5,u,4>: Cost 3 vsldoi12 LHS, <5,u,4,4>
+ 1726337028U, // <3,5,u,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2289289730U, // <3,5,u,6>: Cost 3 vmrglw <1,2,3,u>, <3,4,5,6>
+ 1726337284U, // <3,5,u,7>: Cost 2 vsldoi12 LHS, <5,u,7,0>
+ 1726337293U, // <3,5,u,u>: Cost 2 vsldoi12 LHS, <5,u,u,0>
+ 3773628416U, // <3,6,0,0>: Cost 4 vsldoi8 <2,5,3,6>, <0,0,0,0>
+ 2699886694U, // <3,6,0,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS
+ 2789167401U, // <3,6,0,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,0,2,1>
+ 3362965862U, // <3,6,0,3>: Cost 4 vmrglw <1,2,3,0>, <3,2,6,3>
+ 3773628754U, // <3,6,0,4>: Cost 4 vsldoi8 <2,5,3,6>, <0,4,1,5>
+ 3723284326U, // <3,6,0,5>: Cost 4 vsldoi4 <5,3,6,0>, <5,3,6,0>
+ 2800079181U, // <3,6,0,6>: Cost 3 vsldoi12 LHS, <6,0,6,1>
+ 1215483190U, // <3,6,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 1215483191U, // <3,6,0,u>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 3873821032U, // <3,6,1,0>: Cost 4 vsldoi12 LHS, <6,1,0,1>
+ 3773629236U, // <3,6,1,1>: Cost 4 vsldoi8 <2,5,3,6>, <1,1,1,1>
+ 2691924892U, // <3,6,1,2>: Cost 3 vsldoi8 <1,2,3,6>, <1,2,3,6>
+ 3830690184U, // <3,6,1,3>: Cost 5 vsldoi12 LHS, <6,1,3,6>
+ 3873821072U, // <3,6,1,4>: Cost 4 vsldoi12 LHS, <6,1,4,5>
+ 3873821082U, // <3,6,1,5>: Cost 4 vsldoi12 LHS, <6,1,5,6>
+ 3403453240U, // <3,6,1,6>: Cost 4 vmrglw <u,0,3,1>, <6,6,6,6>
+ 2289233206U, // <3,6,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 2289233207U, // <3,6,1,u>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 2661498982U, // <3,6,2,0>: Cost 3 vsldoi4 <7,3,6,2>, LHS
+ 3770975780U, // <3,6,2,1>: Cost 4 vsldoi8 <2,1,3,6>, <2,1,3,6>
+ 2631640797U, // <3,6,2,2>: Cost 3 vsldoi4 <2,3,6,2>, <2,3,6,2>
+ 3771639485U, // <3,6,2,3>: Cost 4 vsldoi8 <2,2,3,6>, <2,3,2,6>
+ 2661502262U, // <3,6,2,4>: Cost 3 vsldoi4 <7,3,6,2>, RHS
+ 2699888488U, // <3,6,2,5>: Cost 3 vsldoi8 <2,5,3,6>, <2,5,3,6>
+ 2661503482U, // <3,6,2,6>: Cost 3 vsldoi4 <7,3,6,2>, <6,2,7,3>
+ 1715425786U, // <3,6,2,7>: Cost 2 vsldoi12 <6,2,7,3>, <6,2,7,3>
+ 1715499523U, // <3,6,2,u>: Cost 2 vsldoi12 <6,2,u,3>, <6,2,u,3>
+ 3773630614U, // <3,6,3,0>: Cost 4 vsldoi8 <2,5,3,6>, <3,0,1,2>
+ 3372942825U, // <3,6,3,1>: Cost 4 vmrglw <2,u,3,3>, <2,0,6,1>
+ 2234749434U, // <3,6,3,2>: Cost 3 vmrghw <3,3,3,3>, <6,2,7,3>
+ 3368962406U, // <3,6,3,3>: Cost 4 vmrglw <2,2,3,3>, <3,2,6,3>
+ 2699889154U, // <3,6,3,4>: Cost 3 vsldoi8 <2,5,3,6>, <3,4,5,6>
+ 3773631068U, // <3,6,3,5>: Cost 4 vsldoi8 <2,5,3,6>, <3,5,6,6>
+ 2331054904U, // <3,6,3,6>: Cost 3 vmrglw <u,2,3,3>, <6,6,6,6>
+ 1221479734U, // <3,6,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 1221479735U, // <3,6,3,u>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 2235584801U, // <3,6,4,0>: Cost 3 vmrghw <3,4,5,6>, <6,0,1,2>
+ 3717342106U, // <3,6,4,1>: Cost 4 vsldoi4 <4,3,6,4>, <1,2,3,4>
+ 2789167729U, // <3,6,4,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,4,2,5>
+ 2235585074U, // <3,6,4,3>: Cost 3 vmrghw <3,4,5,6>, <6,3,4,5>
+ 2235585165U, // <3,6,4,4>: Cost 3 vmrghw <3,4,5,6>, <6,4,5,6>
+ 2699889974U, // <3,6,4,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS
+ 2800079509U, // <3,6,4,6>: Cost 3 vsldoi12 LHS, <6,4,6,5>
+ 1215515958U, // <3,6,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 1215515959U, // <3,6,4,u>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 3873821356U, // <3,6,5,0>: Cost 4 vsldoi12 LHS, <6,5,0,1>
+ 3372959209U, // <3,6,5,1>: Cost 5 vmrglw <2,u,3,5>, <2,0,6,1>
+ 3862909629U, // <3,6,5,2>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,2,0>
+ 3773632358U, // <3,6,5,3>: Cost 4 vsldoi8 <2,5,3,6>, <5,3,6,0>
+ 3873821396U, // <3,6,5,4>: Cost 4 vsldoi12 LHS, <6,5,4,5>
+ 3873821405U, // <3,6,5,5>: Cost 4 vsldoi12 LHS, <6,5,5,5>
+ 3862909672U, // <3,6,5,6>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,6,7>
+ 2294574390U, // <3,6,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 2294574391U, // <3,6,5,u>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 2800079613U, // <3,6,6,0>: Cost 3 vsldoi12 LHS, <6,6,0,1>
+ 3873821446U, // <3,6,6,1>: Cost 4 vsldoi12 LHS, <6,6,1,1>
+ 2789167888U, // <3,6,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,6,2,2>
+ 3844920090U, // <3,6,6,3>: Cost 4 vsldoi12 <3,2,6,3>, <6,6,3,3>
+ 2800079653U, // <3,6,6,4>: Cost 3 vsldoi12 LHS, <6,6,4,5>
+ 3723333484U, // <3,6,6,5>: Cost 4 vsldoi4 <5,3,6,6>, <5,3,6,6>
+ 1726337848U, // <3,6,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1726337858U, // <3,6,6,7>: Cost 2 vsldoi12 LHS, <6,6,7,7>
+ 1726337867U, // <3,6,6,u>: Cost 2 vsldoi12 LHS, <6,6,u,7>
+ 1726337870U, // <3,6,7,0>: Cost 2 vsldoi12 LHS, <6,7,0,1>
+ 2297906665U, // <3,6,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,0,6,1>
+ 2792117090U, // <3,6,7,2>: Cost 3 vsldoi12 <6,7,2,3>, <6,7,2,3>
+ 2297907558U, // <3,6,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,6,3>
+ 1726337910U, // <3,6,7,4>: Cost 2 vsldoi12 LHS, <6,7,4,5>
+ 2297906993U, // <3,6,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,4,6,5>
+ 2297906832U, // <3,6,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,2,6,6>
+ 1224166710U, // <3,6,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1224166711U, // <3,6,7,u>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1726337951U, // <3,6,u,0>: Cost 2 vsldoi12 LHS, <6,u,0,1>
+ 2699892526U, // <3,6,u,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS
+ 2789168049U, // <3,6,u,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,u,2,1>
+ 2792854460U, // <3,6,u,3>: Cost 3 vsldoi12 <6,u,3,3>, <6,u,3,3>
+ 1726337991U, // <3,6,u,4>: Cost 2 vsldoi12 LHS, <6,u,4,5>
+ 2699892890U, // <3,6,u,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS
+ 1726337848U, // <3,6,u,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1215548726U, // <3,6,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 1215548727U, // <3,6,u,u>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 2700558336U, // <3,7,0,0>: Cost 3 vsldoi8 <2,6,3,7>, <0,0,0,0>
+ 1626816614U, // <3,7,0,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 2700558513U, // <3,7,0,2>: Cost 3 vsldoi8 <2,6,3,7>, <0,2,1,6>
+ 2331030010U, // <3,7,0,3>: Cost 3 vmrglw <u,2,3,0>, <6,2,7,3>
+ 2700558674U, // <3,7,0,4>: Cost 3 vsldoi8 <2,6,3,7>, <0,4,1,5>
+ 2800079906U, // <3,7,0,5>: Cost 3 vsldoi12 LHS, <7,0,5,6>
+ 2655588936U, // <3,7,0,6>: Cost 3 vsldoi4 <6,3,7,0>, <6,3,7,0>
+ 2800079919U, // <3,7,0,7>: Cost 3 vsldoi12 LHS, <7,0,7,1>
+ 1626817181U, // <3,7,0,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 3774300899U, // <3,7,1,0>: Cost 4 vsldoi8 <2,6,3,7>, <1,0,1,1>
+ 2700559156U, // <3,7,1,1>: Cost 3 vsldoi8 <2,6,3,7>, <1,1,1,1>
+ 2700559254U, // <3,7,1,2>: Cost 3 vsldoi8 <2,6,3,7>, <1,2,3,0>
+ 3774301148U, // <3,7,1,3>: Cost 4 vsldoi8 <2,6,3,7>, <1,3,1,7>
+ 3774301227U, // <3,7,1,4>: Cost 4 vsldoi8 <2,6,3,7>, <1,4,1,5>
+ 3774301295U, // <3,7,1,5>: Cost 4 vsldoi8 <2,6,3,7>, <1,5,0,1>
+ 3768329441U, // <3,7,1,6>: Cost 4 vsldoi8 <1,6,3,7>, <1,6,3,7>
+ 3403453250U, // <3,7,1,7>: Cost 4 vmrglw <u,0,3,1>, <6,6,7,7>
+ 2700559740U, // <3,7,1,u>: Cost 3 vsldoi8 <2,6,3,7>, <1,u,3,0>
+ 2700559849U, // <3,7,2,0>: Cost 3 vsldoi8 <2,6,3,7>, <2,0,6,1>
+ 3770983973U, // <3,7,2,1>: Cost 4 vsldoi8 <2,1,3,7>, <2,1,3,7>
+ 2700559976U, // <3,7,2,2>: Cost 3 vsldoi8 <2,6,3,7>, <2,2,2,2>
+ 2698569415U, // <3,7,2,3>: Cost 3 vsldoi8 <2,3,3,7>, <2,3,3,7>
+ 2700560177U, // <3,7,2,4>: Cost 3 vsldoi8 <2,6,3,7>, <2,4,6,5>
+ 3773638505U, // <3,7,2,5>: Cost 4 vsldoi8 <2,5,3,7>, <2,5,3,7>
+ 1626818490U, // <3,7,2,6>: Cost 2 vsldoi8 <2,6,3,7>, <2,6,3,7>
+ 2795140307U, // <3,7,2,7>: Cost 3 vsldoi12 <7,2,7,3>, <7,2,7,3>
+ 1628145756U, // <3,7,2,u>: Cost 2 vsldoi8 <2,u,3,7>, <2,u,3,7>
+ 2700560534U, // <3,7,3,0>: Cost 3 vsldoi8 <2,6,3,7>, <3,0,1,2>
+ 3774302438U, // <3,7,3,1>: Cost 4 vsldoi8 <2,6,3,7>, <3,1,1,1>
+ 2700560742U, // <3,7,3,2>: Cost 3 vsldoi8 <2,6,3,7>, <3,2,6,3>
+ 2700560796U, // <3,7,3,3>: Cost 3 vsldoi8 <2,6,3,7>, <3,3,3,3>
+ 2700560898U, // <3,7,3,4>: Cost 3 vsldoi8 <2,6,3,7>, <3,4,5,6>
+ 3774302821U, // <3,7,3,5>: Cost 4 vsldoi8 <2,6,3,7>, <3,5,7,6>
+ 2700561079U, // <3,7,3,6>: Cost 3 vsldoi8 <2,6,3,7>, <3,6,7,7>
+ 2700561091U, // <3,7,3,7>: Cost 3 vsldoi8 <2,6,3,7>, <3,7,0,1>
+ 2700561182U, // <3,7,3,u>: Cost 3 vsldoi8 <2,6,3,7>, <3,u,1,2>
+ 2655617126U, // <3,7,4,0>: Cost 3 vsldoi4 <6,3,7,4>, LHS
+ 3774303178U, // <3,7,4,1>: Cost 4 vsldoi8 <2,6,3,7>, <4,1,2,3>
+ 2655619002U, // <3,7,4,2>: Cost 3 vsldoi4 <6,3,7,4>, <2,6,3,7>
+ 2331062778U, // <3,7,4,3>: Cost 3 vmrglw <u,2,3,4>, <6,2,7,3>
+ 2655620406U, // <3,7,4,4>: Cost 3 vsldoi4 <6,3,7,4>, RHS
+ 1626819894U, // <3,7,4,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 2655621708U, // <3,7,4,6>: Cost 3 vsldoi4 <6,3,7,4>, <6,3,7,4>
+ 2800080247U, // <3,7,4,7>: Cost 3 vsldoi12 LHS, <7,4,7,5>
+ 1626820137U, // <3,7,4,u>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 3774303816U, // <3,7,5,0>: Cost 4 vsldoi8 <2,6,3,7>, <5,0,1,2>
+ 3873822093U, // <3,7,5,1>: Cost 4 vsldoi12 LHS, <7,5,1,0>
+ 3774303998U, // <3,7,5,2>: Cost 4 vsldoi8 <2,6,3,7>, <5,2,3,4>
+ 3862910368U, // <3,7,5,3>: Cost 4 vsldoi12 <6,2,7,3>, <7,5,3,1>
+ 3774304180U, // <3,7,5,4>: Cost 4 vsldoi8 <2,6,3,7>, <5,4,5,6>
+ 2800080310U, // <3,7,5,5>: Cost 3 vsldoi12 LHS, <7,5,5,5>
+ 2800080321U, // <3,7,5,6>: Cost 3 vsldoi12 LHS, <7,5,6,7>
+ 3873822147U, // <3,7,5,7>: Cost 4 vsldoi12 LHS, <7,5,7,0>
+ 2800080339U, // <3,7,5,u>: Cost 3 vsldoi12 LHS, <7,5,u,7>
+ 2800080348U, // <3,7,6,0>: Cost 3 vsldoi12 LHS, <7,6,0,7>
+ 3873822181U, // <3,7,6,1>: Cost 4 vsldoi12 LHS, <7,6,1,7>
+ 2789168622U, // <3,7,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <7,6,2,7>
+ 2700563016U, // <3,7,6,3>: Cost 3 vsldoi8 <2,6,3,7>, <6,3,7,0>
+ 2800080384U, // <3,7,6,4>: Cost 3 vsldoi12 LHS, <7,6,4,7>
+ 3862910472U, // <3,7,6,5>: Cost 4 vsldoi12 <6,2,7,3>, <7,6,5,6>
+ 2700563256U, // <3,7,6,6>: Cost 3 vsldoi8 <2,6,3,7>, <6,6,6,6>
+ 2800080404U, // <3,7,6,7>: Cost 3 vsldoi12 LHS, <7,6,7,0>
+ 2793149988U, // <3,7,6,u>: Cost 3 vsldoi12 <6,u,7,3>, <7,6,u,7>
+ 2637725798U, // <3,7,7,0>: Cost 3 vsldoi4 <3,3,7,7>, LHS
+ 3371649227U, // <3,7,7,1>: Cost 4 vmrglw <2,6,3,7>, <3,0,7,1>
+ 2637727674U, // <3,7,7,2>: Cost 3 vsldoi4 <3,3,7,7>, <2,6,3,7>
+ 2297907567U, // <3,7,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,7,3>
+ 2637729078U, // <3,7,7,4>: Cost 3 vsldoi4 <3,3,7,7>, RHS
+ 3371649312U, // <3,7,7,5>: Cost 4 vmrglw <2,6,3,7>, <3,1,7,5>
+ 2655646287U, // <3,7,7,6>: Cost 3 vsldoi4 <6,3,7,7>, <6,3,7,7>
+ 1726338668U, // <3,7,7,7>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 1726338668U, // <3,7,7,u>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 2700564179U, // <3,7,u,0>: Cost 3 vsldoi8 <2,6,3,7>, <u,0,1,2>
+ 1626822446U, // <3,7,u,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 2700564357U, // <3,7,u,2>: Cost 3 vsldoi8 <2,6,3,7>, <u,2,3,0>
+ 2700564412U, // <3,7,u,3>: Cost 3 vsldoi8 <2,6,3,7>, <u,3,0,1>
+ 2700564543U, // <3,7,u,4>: Cost 3 vsldoi8 <2,6,3,7>, <u,4,5,6>
+ 1626822810U, // <3,7,u,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 1662654672U, // <3,7,u,6>: Cost 2 vsldoi8 <u,6,3,7>, <u,6,3,7>
+ 1726338668U, // <3,7,u,7>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 1626823013U, // <3,7,u,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 1678557184U, // <3,u,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0>
+ 1679005395U, // <3,u,0,1>: Cost 2 vsldoi12 LHS, <u,0,1,2>
+ 2289221787U, // <3,u,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,u,2>
+ 1215479964U, // <3,u,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 2752747245U, // <3,u,0,4>: Cost 3 vsldoi12 LHS, <u,0,4,1>
+ 1158863002U, // <3,u,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS
+ 2289224221U, // <3,u,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,u,6>
+ 1215483208U, // <3,u,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 1679005458U, // <3,u,0,u>: Cost 2 vsldoi12 LHS, <u,0,u,2>
+ 1558036582U, // <3,u,1,0>: Cost 2 vsldoi4 <2,3,u,1>, LHS
+ 1678558004U, // <3,u,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 604821294U, // <3,u,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2752747317U, // <3,u,1,3>: Cost 3 vsldoi12 LHS, <u,1,3,1>
+ 1558039862U, // <3,u,1,4>: Cost 2 vsldoi4 <2,3,u,1>, RHS
+ 2756949830U, // <3,u,1,5>: Cost 3 vsldoi12 LHS, <u,1,5,0>
+ 2800080726U, // <3,u,1,6>: Cost 3 vsldoi12 LHS, <u,1,6,7>
+ 2289233224U, // <3,u,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 604821348U, // <3,u,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696586709U, // <3,u,2,0>: Cost 3 vsldoi8 <2,0,3,u>, <2,0,3,u>
+ 2757392246U, // <3,u,2,1>: Cost 3 vsldoi12 LHS, <u,2,1,3>
+ 1624172151U, // <3,u,2,2>: Cost 2 vsldoi8 <2,2,3,u>, <2,2,3,u>
+ 1679005576U, // <3,u,2,3>: Cost 2 vsldoi12 LHS, <u,2,3,3>
+ 2631789878U, // <3,u,2,4>: Cost 3 vsldoi4 <2,3,u,2>, RHS
+ 2699904874U, // <3,u,2,5>: Cost 3 vsldoi8 <2,5,3,u>, <2,5,3,u>
+ 1626826683U, // <3,u,2,6>: Cost 2 vsldoi8 <2,6,3,u>, <2,6,3,u>
+ 1726338988U, // <3,u,2,7>: Cost 2 vsldoi12 LHS, <u,2,7,3>
+ 1683208117U, // <3,u,2,u>: Cost 2 vsldoi12 LHS, <u,2,u,3>
+ 1679005628U, // <3,u,3,0>: Cost 2 vsldoi12 LHS, <u,3,0,1>
+ 1161008942U, // <3,u,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2752747471U, // <3,u,3,2>: Cost 3 vsldoi12 LHS, <u,3,2,2>
+ 403488870U, // <3,u,3,3>: Cost 1 vspltisw3 LHS
+ 1679005668U, // <3,u,3,4>: Cost 2 vsldoi12 LHS, <u,3,4,5>
+ 1161009306U, // <3,u,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2691943104U, // <3,u,3,6>: Cost 3 vsldoi8 <1,2,3,u>, <3,6,u,7>
+ 1221479752U, // <3,u,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 403488870U, // <3,u,3,u>: Cost 1 vspltisw3 LHS
+ 2289255363U, // <3,u,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,u,0>
+ 1161844526U, // <3,u,4,1>: Cost 2 vmrghw <3,4,5,6>, LHS
+ 2289256661U, // <3,u,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,u,2>
+ 1215512732U, // <3,u,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 1215513498U, // <3,u,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4>
+ 1679005759U, // <3,u,4,5>: Cost 2 vsldoi12 LHS, <u,4,5,6>
+ 2289256989U, // <3,u,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,u,6>
+ 1215515976U, // <3,u,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 1679005786U, // <3,u,4,u>: Cost 2 vsldoi12 LHS, <u,4,u,6>
+ 1558069350U, // <3,u,5,0>: Cost 2 vsldoi4 <2,3,u,5>, LHS
+ 2631811892U, // <3,u,5,1>: Cost 3 vsldoi4 <2,3,u,5>, <1,1,1,1>
+ 1558071026U, // <3,u,5,2>: Cost 2 vsldoi4 <2,3,u,5>, <2,3,u,5>
+ 2752747646U, // <3,u,5,3>: Cost 3 vsldoi12 LHS, <u,5,3,6>
+ 1558072630U, // <3,u,5,4>: Cost 2 vsldoi4 <2,3,u,5>, RHS
+ 1726337028U, // <3,u,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 604821658U, // <3,u,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2294574408U, // <3,u,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 604821676U, // <3,u,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 2631819366U, // <3,u,6,0>: Cost 3 vsldoi4 <2,3,u,6>, LHS
+ 2757392574U, // <3,u,6,1>: Cost 3 vsldoi12 LHS, <u,6,1,7>
+ 2631821043U, // <3,u,6,2>: Cost 3 vsldoi4 <2,3,u,6>, <2,3,u,6>
+ 1679005904U, // <3,u,6,3>: Cost 2 vsldoi12 LHS, <u,6,3,7>
+ 2631822646U, // <3,u,6,4>: Cost 3 vsldoi4 <2,3,u,6>, RHS
+ 2236553370U, // <3,u,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 1726337848U, // <3,u,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1726339309U, // <3,u,6,7>: Cost 2 vsldoi12 LHS, <u,6,7,0>
+ 1683208445U, // <3,u,6,u>: Cost 2 vsldoi12 LHS, <u,6,u,7>
+ 1726339328U, // <3,u,7,0>: Cost 2 vsldoi12 LHS, <u,7,0,1>
+ 2297905225U, // <3,u,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,u,1>
+ 2631829236U, // <3,u,7,2>: Cost 3 vsldoi4 <2,3,u,7>, <2,3,u,7>
+ 1224163484U, // <3,u,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1726339368U, // <3,u,7,4>: Cost 2 vsldoi12 LHS, <u,7,4,5>
+ 2297905553U, // <3,u,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,u,5>
+ 2297905392U, // <3,u,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,u,6>
+ 1224166728U, // <3,u,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1224163489U, // <3,u,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1683208529U, // <3,u,u,0>: Cost 2 vsldoi12 LHS, <u,u,0,1>
+ 1679006043U, // <3,u,u,1>: Cost 2 vsldoi12 LHS, <u,u,1,2>
+ 604821861U, // <3,u,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 403488870U, // <3,u,u,3>: Cost 1 vspltisw3 LHS
+ 1683208569U, // <3,u,u,4>: Cost 2 vsldoi12 LHS, <u,u,4,5>
+ 1679006083U, // <3,u,u,5>: Cost 2 vsldoi12 LHS, <u,u,5,6>
+ 604821901U, // <3,u,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 1215548744U, // <3,u,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 604821915U, // <3,u,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 2759016448U, // <4,0,0,0>: Cost 3 vsldoi12 <1,2,3,4>, <0,0,0,0>
+ 1165115494U, // <4,0,0,1>: Cost 2 vmrghw <4,0,5,1>, LHS
+ 3717531337U, // <4,0,0,2>: Cost 4 vsldoi4 <4,4,0,0>, <2,3,4,0>
+ 3369675785U, // <4,0,0,3>: Cost 4 vmrglw <2,3,4,0>, <4,2,0,3>
+ 2751791144U, // <4,0,0,4>: Cost 3 vsldoi12 <0,0,4,4>, <0,0,4,4>
+ 2238857630U, // <4,0,0,5>: Cost 3 vmrghw <4,0,5,1>, <0,5,1,0>
+ 3312591341U, // <4,0,0,6>: Cost 4 vmrghw <4,0,5,0>, <0,6,0,7>
+ 3369676113U, // <4,0,0,7>: Cost 4 vmrglw <2,3,4,0>, <4,6,0,7>
+ 1165116061U, // <4,0,0,u>: Cost 2 vmrghw <4,0,5,1>, LHS
+ 2637824102U, // <4,0,1,0>: Cost 3 vsldoi4 <3,4,0,1>, LHS
+ 2637824922U, // <4,0,1,1>: Cost 3 vsldoi4 <3,4,0,1>, <1,2,3,4>
+ 1685274726U, // <4,0,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2637826512U, // <4,0,1,3>: Cost 3 vsldoi4 <3,4,0,1>, <3,4,0,1>
+ 2637827382U, // <4,0,1,4>: Cost 3 vsldoi4 <3,4,0,1>, RHS
+ 2661716070U, // <4,0,1,5>: Cost 3 vsldoi4 <7,4,0,1>, <5,6,7,4>
+ 3729486427U, // <4,0,1,6>: Cost 4 vsldoi4 <6,4,0,1>, <6,4,0,1>
+ 2661717300U, // <4,0,1,7>: Cost 3 vsldoi4 <7,4,0,1>, <7,4,0,1>
+ 1685274780U, // <4,0,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 3711574118U, // <4,0,2,0>: Cost 4 vsldoi4 <3,4,0,2>, LHS
+ 2240200806U, // <4,0,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS
+ 3771663992U, // <4,0,2,2>: Cost 4 vsldoi8 <2,2,4,0>, <2,2,4,0>
+ 2698585801U, // <4,0,2,3>: Cost 3 vsldoi8 <2,3,4,0>, <2,3,4,0>
+ 3373672105U, // <4,0,2,4>: Cost 4 vmrglw <3,0,4,2>, <2,3,0,4>
+ 3810813795U, // <4,0,2,5>: Cost 4 vsldoi8 <u,7,4,0>, <2,5,3,1>
+ 3772327866U, // <4,0,2,6>: Cost 4 vsldoi8 <2,3,4,0>, <2,6,3,7>
+ 3386280568U, // <4,0,2,7>: Cost 5 vmrglw <5,1,4,2>, <3,6,0,7>
+ 2701903966U, // <4,0,2,u>: Cost 3 vsldoi8 <2,u,4,0>, <2,u,4,0>
+ 3699638374U, // <4,0,3,0>: Cost 4 vsldoi4 <1,4,0,3>, LHS
+ 2753560832U, // <4,0,3,1>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4>
+ 3772328276U, // <4,0,3,2>: Cost 4 vsldoi8 <2,3,4,0>, <3,2,4,3>
+ 3827302674U, // <4,0,3,3>: Cost 4 vsldoi12 <0,3,1,4>, <0,3,3,4>
+ 3699641654U, // <4,0,3,4>: Cost 4 vsldoi4 <1,4,0,3>, RHS
+ 3779627588U, // <4,0,3,5>: Cost 4 vsldoi8 <3,5,4,0>, <3,5,4,0>
+ 3772328604U, // <4,0,3,6>: Cost 4 vsldoi8 <2,3,4,0>, <3,6,4,7>
+ 3780954854U, // <4,0,3,7>: Cost 4 vsldoi8 <3,7,4,0>, <3,7,4,0>
+ 2753560832U, // <4,0,3,u>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4>
+ 2725129106U, // <4,0,4,0>: Cost 3 vsldoi8 <6,7,4,0>, <4,0,5,1>
+ 1167720550U, // <4,0,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 3839172953U, // <4,0,4,2>: Cost 4 vsldoi12 <2,3,0,4>, <0,4,2,3>
+ 3772329051U, // <4,0,4,3>: Cost 4 vsldoi8 <2,3,4,0>, <4,3,0,4>
+ 2241462610U, // <4,0,4,4>: Cost 3 vmrghw <4,4,4,4>, <0,4,1,5>
+ 2698587446U, // <4,0,4,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS
+ 3772329297U, // <4,0,4,6>: Cost 4 vsldoi8 <2,3,4,0>, <4,6,0,7>
+ 3735483703U, // <4,0,4,7>: Cost 4 vsldoi4 <7,4,0,4>, <7,4,0,4>
+ 1167721117U, // <4,0,4,u>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 1168556032U, // <4,0,5,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 94814310U, // <4,0,5,1>: Cost 1 vmrghw RHS, LHS
+ 2242298029U, // <4,0,5,2>: Cost 3 vmrghw RHS, <0,2,1,2>
+ 2637859284U, // <4,0,5,3>: Cost 3 vsldoi4 <3,4,0,5>, <3,4,0,5>
+ 1168556370U, // <4,0,5,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2242306530U, // <4,0,5,5>: Cost 3 vmrghw RHS, <0,5,u,5>
+ 2242298358U, // <4,0,5,6>: Cost 3 vmrghw RHS, <0,6,1,7>
+ 2661750072U, // <4,0,5,7>: Cost 3 vsldoi4 <7,4,0,5>, <7,4,0,5>
+ 94814877U, // <4,0,5,u>: Cost 1 vmrghw RHS, LHS
+ 3316580362U, // <4,0,6,0>: Cost 4 vmrghw <4,6,5,1>, <0,0,1,1>
+ 2242846822U, // <4,0,6,1>: Cost 3 vmrghw <4,6,5,2>, LHS
+ 3798872570U, // <4,0,6,2>: Cost 4 vsldoi8 <6,7,4,0>, <6,2,7,3>
+ 3796218413U, // <4,0,6,3>: Cost 4 vsldoi8 <6,3,4,0>, <6,3,4,0>
+ 3834528273U, // <4,0,6,4>: Cost 4 vsldoi12 <1,5,0,4>, <0,6,4,7>
+ 3798872811U, // <4,0,6,5>: Cost 4 vsldoi8 <6,7,4,0>, <6,5,7,1>
+ 3316621876U, // <4,0,6,6>: Cost 4 vmrghw <4,6,5,6>, <0,6,u,6>
+ 2725131121U, // <4,0,6,7>: Cost 3 vsldoi8 <6,7,4,0>, <6,7,4,0>
+ 2242847389U, // <4,0,6,u>: Cost 3 vmrghw <4,6,5,2>, LHS
+ 3377692672U, // <4,0,7,0>: Cost 4 vmrglw <3,6,4,7>, <0,0,0,0>
+ 2243493990U, // <4,0,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 3775648970U, // <4,0,7,2>: Cost 5 vsldoi8 <2,u,4,0>, <7,2,6,3>
+ 3802191110U, // <4,0,7,3>: Cost 4 vsldoi8 <7,3,4,0>, <7,3,4,0>
+ 3317236050U, // <4,0,7,4>: Cost 4 vmrghw <4,7,5,0>, <0,4,1,5>
+ 3803518376U, // <4,0,7,5>: Cost 4 vsldoi8 <7,5,4,0>, <7,5,4,0>
+ 3317236214U, // <4,0,7,6>: Cost 5 vmrghw <4,7,5,0>, <0,6,1,7>
+ 3798873708U, // <4,0,7,7>: Cost 4 vsldoi8 <6,7,4,0>, <7,7,7,7>
+ 2243494557U, // <4,0,7,u>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 1170546688U, // <4,0,u,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 96804966U, // <4,0,u,1>: Cost 1 vmrghw RHS, LHS
+ 1685275293U, // <4,0,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2637883863U, // <4,0,u,3>: Cost 3 vsldoi4 <3,4,0,u>, <3,4,0,u>
+ 1170547026U, // <4,0,u,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2698590362U, // <4,0,u,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS
+ 2244289014U, // <4,0,u,6>: Cost 3 vmrghw RHS, <0,6,1,7>
+ 2661774651U, // <4,0,u,7>: Cost 3 vsldoi4 <7,4,0,u>, <7,4,0,u>
+ 96805533U, // <4,0,u,u>: Cost 1 vmrghw RHS, LHS
+ 2667749478U, // <4,1,0,0>: Cost 3 vsldoi4 <u,4,1,0>, LHS
+ 2689966182U, // <4,1,0,1>: Cost 3 vsldoi8 <0,u,4,1>, LHS
+ 2238571418U, // <4,1,0,2>: Cost 3 vmrghw <4,0,1,2>, <1,2,3,4>
+ 3711633880U, // <4,1,0,3>: Cost 4 vsldoi4 <3,4,1,0>, <3,4,1,0>
+ 2689966418U, // <4,1,0,4>: Cost 3 vsldoi8 <0,u,4,1>, <0,4,1,5>
+ 3361046866U, // <4,1,0,5>: Cost 4 vmrglw <0,u,4,0>, <0,4,1,5>
+ 3741495802U, // <4,1,0,6>: Cost 4 vsldoi4 <u,4,1,0>, <6,2,7,3>
+ 3741496314U, // <4,1,0,7>: Cost 4 vsldoi4 <u,4,1,0>, <7,0,1,2>
+ 2689966765U, // <4,1,0,u>: Cost 3 vsldoi8 <0,u,4,1>, <0,u,4,1>
+ 3764372222U, // <4,1,1,0>: Cost 4 vsldoi8 <1,0,4,1>, <1,0,4,1>
+ 2758206263U, // <4,1,1,1>: Cost 3 vsldoi12 <1,1,1,4>, <1,1,1,4>
+ 2698593178U, // <4,1,1,2>: Cost 3 vsldoi8 <2,3,4,1>, <1,2,3,4>
+ 3361057810U, // <4,1,1,3>: Cost 4 vmrglw <0,u,4,1>, <4,2,1,3>
+ 3827303250U, // <4,1,1,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,1,4,4>
+ 2287313234U, // <4,1,1,5>: Cost 3 vmrglw <0,u,4,1>, <0,4,1,5>
+ 3763709171U, // <4,1,1,6>: Cost 4 vsldoi8 <0,u,4,1>, <1,6,5,7>
+ 3361058138U, // <4,1,1,7>: Cost 4 vmrglw <0,u,4,1>, <4,6,1,7>
+ 2239759744U, // <4,1,1,u>: Cost 3 vmrghw <4,1,u,3>, <1,u,3,4>
+ 2637906022U, // <4,1,2,0>: Cost 3 vsldoi4 <3,4,1,2>, LHS
+ 2637906842U, // <4,1,2,1>: Cost 3 vsldoi4 <3,4,1,2>, <1,2,3,4>
+ 3763709544U, // <4,1,2,2>: Cost 4 vsldoi8 <0,u,4,1>, <2,2,2,2>
+ 1685275546U, // <4,1,2,3>: Cost 2 vsldoi12 <1,2,3,4>, <1,2,3,4>
+ 2637909302U, // <4,1,2,4>: Cost 3 vsldoi4 <3,4,1,2>, RHS
+ 3361063250U, // <4,1,2,5>: Cost 4 vmrglw <0,u,4,2>, <0,4,1,5>
+ 3763709882U, // <4,1,2,6>: Cost 4 vsldoi8 <0,u,4,1>, <2,6,3,7>
+ 3735541054U, // <4,1,2,7>: Cost 4 vsldoi4 <7,4,1,2>, <7,4,1,2>
+ 1685644231U, // <4,1,2,u>: Cost 2 vsldoi12 <1,2,u,4>, <1,2,u,4>
+ 2702575792U, // <4,1,3,0>: Cost 3 vsldoi8 <3,0,4,1>, <3,0,4,1>
+ 3832759257U, // <4,1,3,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,3,1,4>
+ 3833349090U, // <4,1,3,2>: Cost 4 vsldoi12 <1,3,2,4>, <1,3,2,4>
+ 3763710364U, // <4,1,3,3>: Cost 4 vsldoi8 <0,u,4,1>, <3,3,3,3>
+ 2707884546U, // <4,1,3,4>: Cost 3 vsldoi8 <3,u,4,1>, <3,4,5,6>
+ 3361071442U, // <4,1,3,5>: Cost 4 vmrglw <0,u,4,3>, <0,4,1,5>
+ 3772336796U, // <4,1,3,6>: Cost 4 vsldoi8 <2,3,4,1>, <3,6,4,7>
+ 3775654595U, // <4,1,3,7>: Cost 5 vsldoi8 <2,u,4,1>, <3,7,0,1>
+ 2707884856U, // <4,1,3,u>: Cost 3 vsldoi8 <3,u,4,1>, <3,u,4,1>
+ 2667782246U, // <4,1,4,0>: Cost 3 vsldoi4 <u,4,1,4>, LHS
+ 2241463092U, // <4,1,4,1>: Cost 3 vmrghw <4,4,4,4>, <1,1,1,1>
+ 2241553306U, // <4,1,4,2>: Cost 3 vmrghw <4,4,5,6>, <1,2,3,4>
+ 3827303484U, // <4,1,4,3>: Cost 4 vsldoi12 <0,3,1,4>, <1,4,3,4>
+ 2667785424U, // <4,1,4,4>: Cost 3 vsldoi4 <u,4,1,4>, <4,4,4,4>
+ 2689969462U, // <4,1,4,5>: Cost 3 vsldoi8 <0,u,4,1>, RHS
+ 3763711322U, // <4,1,4,6>: Cost 4 vsldoi8 <0,u,4,1>, <4,6,1,7>
+ 3867116636U, // <4,1,4,7>: Cost 4 vsldoi12 <7,0,1,4>, <1,4,7,0>
+ 2689969705U, // <4,1,4,u>: Cost 3 vsldoi8 <0,u,4,1>, RHS
+ 1546273106U, // <4,1,5,0>: Cost 2 vsldoi4 <0,4,1,5>, <0,4,1,5>
+ 1168556852U, // <4,1,5,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1168556950U, // <4,1,5,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 2620016790U, // <4,1,5,3>: Cost 3 vsldoi4 <0,4,1,5>, <3,0,1,2>
+ 1546276150U, // <4,1,5,4>: Cost 2 vsldoi4 <0,4,1,5>, RHS
+ 2620018692U, // <4,1,5,5>: Cost 3 vsldoi4 <0,4,1,5>, <5,5,5,5>
+ 2242299087U, // <4,1,5,6>: Cost 3 vmrghw RHS, <1,6,1,7>
+ 2667795450U, // <4,1,5,7>: Cost 3 vsldoi4 <u,4,1,5>, <7,0,1,2>
+ 1546278702U, // <4,1,5,u>: Cost 2 vsldoi4 <0,4,1,5>, LHS
+ 3781628193U, // <4,1,6,0>: Cost 4 vsldoi8 <3,u,4,1>, <6,0,1,2>
+ 3832759503U, // <4,1,6,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,1,7>
+ 3316261786U, // <4,1,6,2>: Cost 4 vmrghw <4,6,0,7>, <1,2,3,4>
+ 3781628466U, // <4,1,6,3>: Cost 4 vsldoi8 <3,u,4,1>, <6,3,4,5>
+ 3827303658U, // <4,1,6,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,6,4,7>
+ 3361096018U, // <4,1,6,5>: Cost 4 vmrglw <0,u,4,6>, <0,4,1,5>
+ 3788264248U, // <4,1,6,6>: Cost 4 vsldoi8 <5,0,4,1>, <6,6,6,6>
+ 3788264270U, // <4,1,6,7>: Cost 4 vsldoi8 <5,0,4,1>, <6,7,0,1>
+ 3832759566U, // <4,1,6,u>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,u,7>
+ 2726466580U, // <4,1,7,0>: Cost 3 vsldoi8 <7,0,4,1>, <7,0,4,1>
+ 3377692682U, // <4,1,7,1>: Cost 4 vmrglw <3,6,4,7>, <0,0,1,1>
+ 3377694870U, // <4,1,7,2>: Cost 4 vmrglw <3,6,4,7>, <3,0,1,2>
+ 3802199303U, // <4,1,7,3>: Cost 4 vsldoi8 <7,3,4,1>, <7,3,4,1>
+ 2731775334U, // <4,1,7,4>: Cost 3 vsldoi8 <7,u,4,1>, <7,4,5,6>
+ 3377693010U, // <4,1,7,5>: Cost 4 vmrglw <3,6,4,7>, <0,4,1,5>
+ 3365749804U, // <4,1,7,6>: Cost 5 vmrglw <1,6,4,7>, <1,4,1,6>
+ 3788265068U, // <4,1,7,7>: Cost 4 vsldoi8 <5,0,4,1>, <7,7,7,7>
+ 2731775644U, // <4,1,7,u>: Cost 3 vsldoi8 <7,u,4,1>, <7,u,4,1>
+ 1546297685U, // <4,1,u,0>: Cost 2 vsldoi4 <0,4,1,u>, <0,4,1,u>
+ 1170547508U, // <4,1,u,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1170547606U, // <4,1,u,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 1689257344U, // <4,1,u,3>: Cost 2 vsldoi12 <1,u,3,4>, <1,u,3,4>
+ 1546300726U, // <4,1,u,4>: Cost 2 vsldoi4 <0,4,1,u>, RHS
+ 2284716370U, // <4,1,u,5>: Cost 3 vmrglw <0,4,4,u>, <0,4,1,5>
+ 2244289743U, // <4,1,u,6>: Cost 3 vmrghw RHS, <1,6,1,7>
+ 2667820026U, // <4,1,u,7>: Cost 3 vsldoi4 <u,4,1,u>, <7,0,1,2>
+ 1546303278U, // <4,1,u,u>: Cost 2 vsldoi4 <0,4,1,u>, LHS
+ 3729621094U, // <4,2,0,0>: Cost 4 vsldoi4 <6,4,2,0>, LHS
+ 3763716198U, // <4,2,0,1>: Cost 4 vsldoi8 <0,u,4,2>, LHS
+ 2238858856U, // <4,2,0,2>: Cost 3 vmrghw <4,0,5,1>, <2,2,2,2>
+ 2295930982U, // <4,2,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 3763716434U, // <4,2,0,4>: Cost 4 vsldoi8 <0,u,4,2>, <0,4,1,5>
+ 2238859107U, // <4,2,0,5>: Cost 3 vmrghw <4,0,5,1>, <2,5,3,1>
+ 2238859194U, // <4,2,0,6>: Cost 3 vmrghw <4,0,5,1>, <2,6,3,7>
+ 3312601066U, // <4,2,0,7>: Cost 4 vmrghw <4,0,5,1>, <2,7,0,1>
+ 2295930987U, // <4,2,0,u>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 3699769446U, // <4,2,1,0>: Cost 4 vsldoi4 <1,4,2,1>, LHS
+ 3313255971U, // <4,2,1,1>: Cost 4 vmrghw <4,1,5,0>, <2,1,3,5>
+ 3361056360U, // <4,2,1,2>: Cost 4 vmrglw <0,u,4,1>, <2,2,2,2>
+ 2287312998U, // <4,2,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 3788932148U, // <4,2,1,4>: Cost 4 vsldoi8 <5,1,4,2>, <1,4,2,5>
+ 3313256290U, // <4,2,1,5>: Cost 4 vmrghw <4,1,5,0>, <2,5,3,0>
+ 3838289469U, // <4,2,1,6>: Cost 4 vsldoi12 <2,1,6,4>, <2,1,6,4>
+ 3369682865U, // <4,2,1,7>: Cost 5 vmrglw <2,3,4,1>, <2,6,2,7>
+ 2287313003U, // <4,2,1,u>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 3838658133U, // <4,2,2,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,0,1>
+ 3711722394U, // <4,2,2,1>: Cost 4 vsldoi4 <3,4,2,2>, <1,2,3,4>
+ 2759018088U, // <4,2,2,2>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,2,2>
+ 2759018098U, // <4,2,2,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,3,3>
+ 3838658168U, // <4,2,2,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,4,0>
+ 3369027341U, // <4,2,2,5>: Cost 4 vmrglw <2,2,4,2>, <2,4,2,5>
+ 2240227258U, // <4,2,2,6>: Cost 3 vmrghw <4,2,5,6>, <2,6,3,7>
+ 3735614791U, // <4,2,2,7>: Cost 4 vsldoi4 <7,4,2,2>, <7,4,2,2>
+ 2759018143U, // <4,2,2,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,u,3>
+ 2759018150U, // <4,2,3,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,0,1>
+ 3831948975U, // <4,2,3,1>: Cost 4 vsldoi12 <1,1,1,4>, <2,3,1,1>
+ 3832759993U, // <4,2,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <2,3,2,2>
+ 2759018180U, // <4,2,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,3,4>
+ 2759018185U, // <4,2,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,4,0>
+ 3839542998U, // <4,2,3,5>: Cost 4 vsldoi12 <2,3,5,4>, <2,3,5,4>
+ 3314640826U, // <4,2,3,6>: Cost 4 vmrghw <4,3,5,7>, <2,6,3,7>
+ 2765948648U, // <4,2,3,7>: Cost 3 vsldoi12 <2,3,7,4>, <2,3,7,4>
+ 2759018222U, // <4,2,3,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,u,1>
+ 3838658295U, // <4,2,4,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,4,0,1>
+ 3315205667U, // <4,2,4,1>: Cost 4 vmrghw <4,4,4,4>, <2,1,3,5>
+ 2241463912U, // <4,2,4,2>: Cost 3 vmrghw <4,4,4,4>, <2,2,2,2>
+ 1234829414U, // <4,2,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 2241464085U, // <4,2,4,4>: Cost 3 vmrghw <4,4,4,4>, <2,4,3,4>
+ 2241546087U, // <4,2,4,5>: Cost 3 vmrghw <4,4,5,5>, <2,5,3,5>
+ 2241464250U, // <4,2,4,6>: Cost 3 vmrghw <4,4,4,4>, <2,6,3,7>
+ 3741602873U, // <4,2,4,7>: Cost 4 vsldoi4 <u,4,2,4>, <7,0,u,2>
+ 1234829419U, // <4,2,4,u>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 2626060390U, // <4,2,5,0>: Cost 3 vsldoi4 <1,4,2,5>, LHS
+ 2626061364U, // <4,2,5,1>: Cost 3 vsldoi4 <1,4,2,5>, <1,4,2,5>
+ 1168557672U, // <4,2,5,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1222230118U, // <4,2,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 2626063670U, // <4,2,5,4>: Cost 3 vsldoi4 <1,4,2,5>, RHS
+ 2242299752U, // <4,2,5,5>: Cost 3 vmrghw RHS, <2,5,3,6>
+ 1168558010U, // <4,2,5,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2242299882U, // <4,2,5,7>: Cost 3 vmrghw RHS, <2,7,0,1>
+ 1222230123U, // <4,2,5,u>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 3711754342U, // <4,2,6,0>: Cost 4 vsldoi4 <3,4,2,6>, LHS
+ 3711755162U, // <4,2,6,1>: Cost 4 vsldoi4 <3,4,2,6>, <1,2,3,4>
+ 3838658481U, // <4,2,6,2>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,2,7>
+ 2759018426U, // <4,2,6,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,3,7>
+ 3838658499U, // <4,2,6,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,4,7>
+ 3735646310U, // <4,2,6,5>: Cost 4 vsldoi4 <7,4,2,6>, <5,6,7,4>
+ 3316590522U, // <4,2,6,6>: Cost 4 vmrghw <4,6,5,2>, <2,6,3,7>
+ 3798889331U, // <4,2,6,7>: Cost 4 vsldoi8 <6,7,4,2>, <6,7,4,2>
+ 2759018471U, // <4,2,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,u,7>
+ 3874564074U, // <4,2,7,0>: Cost 4 vsldoi12 <u,2,3,4>, <2,7,0,1>
+ 3800880230U, // <4,2,7,1>: Cost 4 vsldoi8 <7,1,4,2>, <7,1,4,2>
+ 3371722344U, // <4,2,7,2>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,2>
+ 2303950950U, // <4,2,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 3371722346U, // <4,2,7,4>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,4>
+ 3371722509U, // <4,2,7,5>: Cost 5 vmrglw <2,6,4,7>, <2,4,2,5>
+ 3317237690U, // <4,2,7,6>: Cost 4 vmrghw <4,7,5,0>, <2,6,3,7>
+ 3317237738U, // <4,2,7,7>: Cost 4 vmrghw <4,7,5,0>, <2,7,0,1>
+ 2303950955U, // <4,2,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 2759018555U, // <4,2,u,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,0,1>
+ 2626085943U, // <4,2,u,1>: Cost 3 vsldoi4 <1,4,2,u>, <1,4,2,u>
+ 1170548328U, // <4,2,u,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1222254694U, // <4,2,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 2759018595U, // <4,2,u,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,4,5>
+ 2244290408U, // <4,2,u,5>: Cost 3 vmrghw RHS, <2,5,3,6>
+ 1170548666U, // <4,2,u,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2769266813U, // <4,2,u,7>: Cost 3 vsldoi12 <2,u,7,4>, <2,u,7,4>
+ 1222254699U, // <4,2,u,u>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 2238859414U, // <4,3,0,0>: Cost 3 vmrghw <4,0,5,1>, <3,0,1,2>
+ 2759018646U, // <4,3,0,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,1,2>
+ 3312314708U, // <4,3,0,2>: Cost 4 vmrghw <4,0,1,2>, <3,2,4,3>
+ 2238859676U, // <4,3,0,3>: Cost 3 vmrghw <4,0,5,1>, <3,3,3,3>
+ 2295931802U, // <4,3,0,4>: Cost 3 vmrglw <2,3,4,0>, <1,2,3,4>
+ 3735670886U, // <4,3,0,5>: Cost 4 vsldoi4 <7,4,3,0>, <5,6,7,4>
+ 3312315036U, // <4,3,0,6>: Cost 4 vmrghw <4,0,1,2>, <3,6,4,7>
+ 3369674682U, // <4,3,0,7>: Cost 4 vmrglw <2,3,4,0>, <2,6,3,7>
+ 2759018709U, // <4,3,0,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,u,2>
+ 3361055638U, // <4,3,1,0>: Cost 4 vmrglw <0,u,4,1>, <1,2,3,0>
+ 3831949542U, // <4,3,1,1>: Cost 4 vsldoi12 <1,1,1,4>, <3,1,1,1>
+ 2703917978U, // <4,3,1,2>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4>
+ 3361056370U, // <4,3,1,3>: Cost 4 vmrglw <0,u,4,1>, <2,2,3,3>
+ 2295939994U, // <4,3,1,4>: Cost 3 vmrglw <2,3,4,1>, <1,2,3,4>
+ 3361056291U, // <4,3,1,5>: Cost 4 vmrglw <0,u,4,1>, <2,1,3,5>
+ 3378972520U, // <4,3,1,6>: Cost 4 vmrglw <3,u,4,1>, <2,5,3,6>
+ 3361056698U, // <4,3,1,7>: Cost 4 vmrglw <0,u,4,1>, <2,6,3,7>
+ 2703917978U, // <4,3,1,u>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4>
+ 3832760624U, // <4,3,2,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,0,3>
+ 3711796122U, // <4,3,2,1>: Cost 4 vsldoi4 <3,4,3,2>, <1,2,3,4>
+ 3832760641U, // <4,3,2,2>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,2,2>
+ 2770962764U, // <4,3,2,3>: Cost 3 vsldoi12 <3,2,3,4>, <3,2,3,4>
+ 2759018836U, // <4,3,2,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,2,4,3>
+ 3827304802U, // <4,3,2,5>: Cost 5 vsldoi12 <0,3,1,4>, <3,2,5,u>
+ 3832760678U, // <4,3,2,6>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,6,3>
+ 3859597679U, // <4,3,2,7>: Cost 4 vsldoi12 <5,6,7,4>, <3,2,7,3>
+ 2771331449U, // <4,3,2,u>: Cost 3 vsldoi12 <3,2,u,4>, <3,2,u,4>
+ 2240841878U, // <4,3,3,0>: Cost 3 vmrghw <4,3,5,0>, <3,0,1,2>
+ 3776997635U, // <4,3,3,1>: Cost 4 vsldoi8 <3,1,4,3>, <3,1,4,3>
+ 2703919444U, // <4,3,3,2>: Cost 3 vsldoi8 <3,2,4,3>, <3,2,4,3>
+ 2759018908U, // <4,3,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,3,3>
+ 2759018918U, // <4,3,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,4,4>
+ 3386951446U, // <4,3,3,5>: Cost 4 vmrglw <5,2,4,3>, <2,4,3,5>
+ 3777661596U, // <4,3,3,6>: Cost 4 vsldoi8 <3,2,4,3>, <3,6,4,7>
+ 3375007674U, // <4,3,3,7>: Cost 4 vmrglw <3,2,4,3>, <2,6,3,7>
+ 2707901242U, // <4,3,3,u>: Cost 3 vsldoi8 <3,u,4,3>, <3,u,4,3>
+ 2759018960U, // <4,3,4,0>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,0,1>
+ 2759018970U, // <4,3,4,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,1,2>
+ 2632099605U, // <4,3,4,2>: Cost 3 vsldoi4 <2,4,3,4>, <2,4,3,4>
+ 2241464732U, // <4,3,4,3>: Cost 3 vmrghw <4,4,4,4>, <3,3,3,3>
+ 2759019000U, // <4,3,4,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,4,5>
+ 2753563138U, // <4,3,4,5>: Cost 3 vsldoi12 <0,3,1,4>, <3,4,5,6>
+ 3777662316U, // <4,3,4,6>: Cost 4 vsldoi8 <3,2,4,3>, <4,6,3,7>
+ 2308573114U, // <4,3,4,7>: Cost 3 vmrglw <4,4,4,4>, <2,6,3,7>
+ 2759019032U, // <4,3,4,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,u,1>
+ 1168558230U, // <4,3,5,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2242300134U, // <4,3,5,1>: Cost 3 vmrghw RHS, <3,1,1,1>
+ 2632107798U, // <4,3,5,2>: Cost 3 vsldoi4 <2,4,3,5>, <2,4,3,5>
+ 1168558492U, // <4,3,5,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1168558594U, // <4,3,5,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 2295973654U, // <4,3,5,5>: Cost 3 vmrglw <2,3,4,5>, <2,4,3,5>
+ 2242300536U, // <4,3,5,6>: Cost 3 vmrghw RHS, <3,6,0,7>
+ 2295973818U, // <4,3,5,7>: Cost 3 vmrglw <2,3,4,5>, <2,6,3,7>
+ 1168558878U, // <4,3,5,u>: Cost 2 vmrghw RHS, <3,u,1,2>
+ 3832760952U, // <4,3,6,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,6,0,7>
+ 3711828890U, // <4,3,6,1>: Cost 4 vsldoi4 <3,4,3,6>, <1,2,3,4>
+ 3316484436U, // <4,3,6,2>: Cost 4 vmrghw <4,6,3,7>, <3,2,4,3>
+ 3711830512U, // <4,3,6,3>: Cost 4 vsldoi4 <3,4,3,6>, <3,4,3,6>
+ 2759019164U, // <4,3,6,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7>
+ 3361097251U, // <4,3,6,5>: Cost 5 vmrglw <0,u,4,6>, <2,1,3,5>
+ 3316624045U, // <4,3,6,6>: Cost 4 vmrghw <4,6,5,6>, <3,6,6,6>
+ 2773912244U, // <4,3,6,7>: Cost 3 vsldoi12 <3,6,7,4>, <3,6,7,4>
+ 2759019164U, // <4,3,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7>
+ 3377693590U, // <4,3,7,0>: Cost 4 vmrglw <3,6,4,7>, <1,2,3,0>
+ 3365751680U, // <4,3,7,1>: Cost 5 vmrglw <1,6,4,7>, <4,0,3,1>
+ 2727810232U, // <4,3,7,2>: Cost 3 vsldoi8 <7,2,4,3>, <7,2,4,3>
+ 3377694322U, // <4,3,7,3>: Cost 4 vmrglw <3,6,4,7>, <2,2,3,3>
+ 2303951770U, // <4,3,7,4>: Cost 3 vmrglw <3,6,4,7>, <1,2,3,4>
+ 3741700198U, // <4,3,7,5>: Cost 4 vsldoi4 <u,4,3,7>, <5,6,7,4>
+ 3377695216U, // <4,3,7,6>: Cost 4 vmrglw <3,6,4,7>, <3,4,3,6>
+ 3375703994U, // <4,3,7,7>: Cost 4 vmrglw <3,3,4,7>, <2,6,3,7>
+ 2731792030U, // <4,3,7,u>: Cost 3 vsldoi8 <7,u,4,3>, <7,u,4,3>
+ 1170548886U, // <4,3,u,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2759019294U, // <4,3,u,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,1,2>
+ 2632132377U, // <4,3,u,2>: Cost 3 vsldoi4 <2,4,3,u>, <2,4,3,u>
+ 1170549148U, // <4,3,u,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1170549250U, // <4,3,u,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 2759019334U, // <4,3,u,5>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,5,6>
+ 2244291192U, // <4,3,u,6>: Cost 3 vmrghw RHS, <3,6,0,7>
+ 2295998394U, // <4,3,u,7>: Cost 3 vmrglw <2,3,4,u>, <2,6,3,7>
+ 1170549534U, // <4,3,u,u>: Cost 2 vmrghw RHS, <3,u,1,2>
+ 1165118354U, // <4,4,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1637482598U, // <4,4,0,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS
+ 3711854285U, // <4,4,0,2>: Cost 4 vsldoi4 <3,4,4,0>, <2,3,4,4>
+ 3827305344U, // <4,4,0,3>: Cost 4 vsldoi12 <0,3,1,4>, <4,0,3,1>
+ 2711224658U, // <4,4,0,4>: Cost 3 vsldoi8 <4,4,4,4>, <0,4,1,5>
+ 1165118774U, // <4,4,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 3312602489U, // <4,4,0,6>: Cost 4 vmrghw <4,0,5,1>, <4,6,5,2>
+ 3369675420U, // <4,4,0,7>: Cost 4 vmrglw <2,3,4,0>, <3,6,4,7>
+ 1165119017U, // <4,4,0,u>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 3369682633U, // <4,4,1,0>: Cost 4 vmrglw <2,3,4,1>, <2,3,4,0>
+ 2287313581U, // <4,4,1,1>: Cost 3 vmrglw <0,u,4,1>, <0,u,4,1>
+ 2759019466U, // <4,4,1,2>: Cost 3 vsldoi12 <1,2,3,4>, <4,1,2,3>
+ 3369683284U, // <4,4,1,3>: Cost 4 vmrglw <2,3,4,1>, <3,2,4,3>
+ 2311204048U, // <4,4,1,4>: Cost 3 vmrglw <4,u,4,1>, <4,4,4,4>
+ 2239319350U, // <4,4,1,5>: Cost 3 vmrghw <4,1,2,3>, RHS
+ 3784967411U, // <4,4,1,6>: Cost 4 vsldoi8 <4,4,4,4>, <1,6,5,7>
+ 3369683612U, // <4,4,1,7>: Cost 4 vmrglw <2,3,4,1>, <3,6,4,7>
+ 2763000832U, // <4,4,1,u>: Cost 3 vsldoi12 <1,u,3,4>, <4,1,u,3>
+ 3711869030U, // <4,4,2,0>: Cost 4 vsldoi4 <3,4,4,2>, LHS
+ 3711869850U, // <4,4,2,1>: Cost 4 vsldoi4 <3,4,4,2>, <1,2,3,4>
+ 2240203830U, // <4,4,2,2>: Cost 3 vmrghw <4,2,5,3>, <4,2,5,3>
+ 2698618573U, // <4,4,2,3>: Cost 3 vsldoi8 <2,3,4,4>, <2,3,4,4>
+ 2711226133U, // <4,4,2,4>: Cost 3 vsldoi8 <4,4,4,4>, <2,4,3,4>
+ 2240204086U, // <4,4,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS
+ 2711226298U, // <4,4,2,6>: Cost 3 vsldoi8 <4,4,4,4>, <2,6,3,7>
+ 3832761416U, // <4,4,2,7>: Cost 4 vsldoi12 <1,2,3,4>, <4,2,7,3>
+ 2701936738U, // <4,4,2,u>: Cost 3 vsldoi8 <2,u,4,4>, <2,u,4,4>
+ 2711226518U, // <4,4,3,0>: Cost 3 vsldoi8 <4,4,4,4>, <3,0,1,2>
+ 3777005828U, // <4,4,3,1>: Cost 4 vsldoi8 <3,1,4,4>, <3,1,4,4>
+ 3832761453U, // <4,4,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,2,4>
+ 2301266260U, // <4,4,3,3>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3>
+ 2705254903U, // <4,4,3,4>: Cost 3 vsldoi8 <3,4,4,4>, <3,4,4,4>
+ 2240843062U, // <4,4,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS
+ 3832761489U, // <4,4,3,6>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,6,4>
+ 3375008412U, // <4,4,3,7>: Cost 4 vmrglw <3,2,4,3>, <3,6,4,7>
+ 2301266260U, // <4,4,3,u>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3>
+ 1570373734U, // <4,4,4,0>: Cost 2 vsldoi4 <4,4,4,4>, LHS
+ 2308574089U, // <4,4,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,4,1>
+ 2644117096U, // <4,4,4,2>: Cost 3 vsldoi4 <4,4,4,4>, <2,2,2,2>
+ 2638146039U, // <4,4,4,3>: Cost 3 vsldoi4 <3,4,4,4>, <3,4,4,4>
+ 229035318U, // <4,4,4,4>: Cost 1 vspltisw0 RHS
+ 1167723830U, // <4,4,4,5>: Cost 2 vmrghw <4,4,4,4>, RHS
+ 2644120058U, // <4,4,4,6>: Cost 3 vsldoi4 <4,4,4,4>, <6,2,7,3>
+ 2662036827U, // <4,4,4,7>: Cost 3 vsldoi4 <7,4,4,4>, <7,4,4,4>
+ 229035318U, // <4,4,4,u>: Cost 1 vspltisw0 RHS
+ 1168558994U, // <4,4,5,0>: Cost 2 vmrghw RHS, <4,0,5,1>
+ 2638152602U, // <4,4,5,1>: Cost 3 vsldoi4 <3,4,4,5>, <1,2,3,4>
+ 2242300981U, // <4,4,5,2>: Cost 3 vmrghw RHS, <4,2,5,2>
+ 2638154232U, // <4,4,5,3>: Cost 3 vsldoi4 <3,4,4,5>, <3,4,4,5>
+ 1168559322U, // <4,4,5,4>: Cost 2 vmrghw RHS, <4,4,5,5>
+ 94817590U, // <4,4,5,5>: Cost 1 vmrghw RHS, RHS
+ 1685278006U, // <4,4,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 2242309576U, // <4,4,5,7>: Cost 3 vmrghw RHS, <4,7,5,0>
+ 94817833U, // <4,4,5,u>: Cost 1 vmrghw RHS, RHS
+ 3316591506U, // <4,4,6,0>: Cost 4 vmrghw <4,6,5,2>, <4,0,5,1>
+ 3758428587U, // <4,4,6,1>: Cost 4 vsldoi8 <0,0,4,4>, <6,1,7,5>
+ 2711228922U, // <4,4,6,2>: Cost 3 vsldoi8 <4,4,4,4>, <6,2,7,3>
+ 3796251185U, // <4,4,6,3>: Cost 4 vsldoi8 <6,3,4,4>, <6,3,4,4>
+ 2711229085U, // <4,4,6,4>: Cost 3 vsldoi8 <4,4,4,4>, <6,4,7,4>
+ 2242850102U, // <4,4,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2242850169U, // <4,4,6,6>: Cost 3 vmrghw <4,6,5,2>, <4,6,5,2>
+ 2725163893U, // <4,4,6,7>: Cost 3 vsldoi8 <6,7,4,4>, <6,7,4,4>
+ 2242850345U, // <4,4,6,u>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2711229434U, // <4,4,7,0>: Cost 3 vsldoi8 <4,4,4,4>, <7,0,1,2>
+ 3377694410U, // <4,4,7,1>: Cost 4 vmrglw <3,6,4,7>, <2,3,4,1>
+ 3868593584U, // <4,4,7,2>: Cost 4 vsldoi12 <7,2,3,4>, <4,7,2,3>
+ 3377695060U, // <4,4,7,3>: Cost 4 vmrglw <3,6,4,7>, <3,2,4,3>
+ 2729145691U, // <4,4,7,4>: Cost 3 vsldoi8 <7,4,4,4>, <7,4,4,4>
+ 2243497270U, // <4,4,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 3871542744U, // <4,4,7,6>: Cost 4 vsldoi12 <7,6,7,4>, <4,7,6,7>
+ 2303953564U, // <4,4,7,7>: Cost 3 vmrglw <3,6,4,7>, <3,6,4,7>
+ 2243497513U, // <4,4,7,u>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 1170549650U, // <4,4,u,0>: Cost 2 vmrghw RHS, <4,0,5,1>
+ 1637488430U, // <4,4,u,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS
+ 2244291637U, // <4,4,u,2>: Cost 3 vmrghw RHS, <4,2,5,2>
+ 2638178811U, // <4,4,u,3>: Cost 3 vsldoi4 <3,4,4,u>, <3,4,4,u>
+ 229035318U, // <4,4,u,4>: Cost 1 vspltisw0 RHS
+ 96808246U, // <4,4,u,5>: Cost 1 vmrghw RHS, RHS
+ 1685278249U, // <4,4,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 2244292040U, // <4,4,u,7>: Cost 3 vmrghw RHS, <4,7,5,0>
+ 96808489U, // <4,4,u,u>: Cost 1 vmrghw RHS, RHS
+ 2698625024U, // <4,5,0,0>: Cost 3 vsldoi8 <2,3,4,5>, <0,0,0,0>
+ 1624883302U, // <4,5,0,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 2638186190U, // <4,5,0,2>: Cost 3 vsldoi4 <3,4,5,0>, <2,3,4,5>
+ 2638187004U, // <4,5,0,3>: Cost 3 vsldoi4 <3,4,5,0>, <3,4,5,0>
+ 2687345005U, // <4,5,0,4>: Cost 3 vsldoi8 <0,4,4,5>, <0,4,4,5>
+ 2238861316U, // <4,5,0,5>: Cost 3 vmrghw <4,0,5,1>, <5,5,5,5>
+ 2662077302U, // <4,5,0,6>: Cost 3 vsldoi4 <7,4,5,0>, <6,7,4,5>
+ 2662077792U, // <4,5,0,7>: Cost 3 vsldoi4 <7,4,5,0>, <7,4,5,0>
+ 1624883869U, // <4,5,0,u>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 3361057762U, // <4,5,1,0>: Cost 4 vmrglw <0,u,4,1>, <4,1,5,0>
+ 2691326803U, // <4,5,1,1>: Cost 3 vsldoi8 <1,1,4,5>, <1,1,4,5>
+ 2698625942U, // <4,5,1,2>: Cost 3 vsldoi8 <2,3,4,5>, <1,2,3,0>
+ 3361055659U, // <4,5,1,3>: Cost 4 vmrglw <0,u,4,1>, <1,2,5,3>
+ 3761087567U, // <4,5,1,4>: Cost 4 vsldoi8 <0,4,4,5>, <1,4,5,5>
+ 2693981335U, // <4,5,1,5>: Cost 3 vsldoi8 <1,5,4,5>, <1,5,4,5>
+ 2305231362U, // <4,5,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6>
+ 3361055987U, // <4,5,1,7>: Cost 4 vmrglw <0,u,4,1>, <1,6,5,7>
+ 2695972234U, // <4,5,1,u>: Cost 3 vsldoi8 <1,u,4,5>, <1,u,4,5>
+ 2638200934U, // <4,5,2,0>: Cost 3 vsldoi4 <3,4,5,2>, LHS
+ 3761088035U, // <4,5,2,1>: Cost 4 vsldoi8 <0,4,4,5>, <2,1,3,5>
+ 2697963133U, // <4,5,2,2>: Cost 3 vsldoi8 <2,2,4,5>, <2,2,4,5>
+ 1624884942U, // <4,5,2,3>: Cost 2 vsldoi8 <2,3,4,5>, <2,3,4,5>
+ 2698626838U, // <4,5,2,4>: Cost 3 vsldoi8 <2,3,4,5>, <2,4,3,5>
+ 3772368744U, // <4,5,2,5>: Cost 4 vsldoi8 <2,3,4,5>, <2,5,3,6>
+ 2698627002U, // <4,5,2,6>: Cost 3 vsldoi8 <2,3,4,5>, <2,6,3,7>
+ 3775023122U, // <4,5,2,7>: Cost 4 vsldoi8 <2,7,4,5>, <2,7,4,5>
+ 1628203107U, // <4,5,2,u>: Cost 2 vsldoi8 <2,u,4,5>, <2,u,4,5>
+ 2698627222U, // <4,5,3,0>: Cost 3 vsldoi8 <2,3,4,5>, <3,0,1,2>
+ 3765070057U, // <4,5,3,1>: Cost 4 vsldoi8 <1,1,4,5>, <3,1,1,4>
+ 2698627404U, // <4,5,3,2>: Cost 3 vsldoi8 <2,3,4,5>, <3,2,3,4>
+ 2698627484U, // <4,5,3,3>: Cost 3 vsldoi8 <2,3,4,5>, <3,3,3,3>
+ 2698627580U, // <4,5,3,4>: Cost 3 vsldoi8 <2,3,4,5>, <3,4,5,0>
+ 3779668553U, // <4,5,3,5>: Cost 4 vsldoi8 <3,5,4,5>, <3,5,4,5>
+ 2725169844U, // <4,5,3,6>: Cost 3 vsldoi8 <6,7,4,5>, <3,6,7,4>
+ 2707253995U, // <4,5,3,7>: Cost 3 vsldoi8 <3,7,4,5>, <3,7,4,5>
+ 2698627870U, // <4,5,3,u>: Cost 3 vsldoi8 <2,3,4,5>, <3,u,1,2>
+ 2638217318U, // <4,5,4,0>: Cost 3 vsldoi4 <3,4,5,4>, LHS
+ 2308574098U, // <4,5,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,5,1>
+ 2698628150U, // <4,5,4,2>: Cost 3 vsldoi8 <2,3,4,5>, <4,2,5,3>
+ 2638219776U, // <4,5,4,3>: Cost 3 vsldoi4 <3,4,5,4>, <3,4,5,4>
+ 2698628314U, // <4,5,4,4>: Cost 3 vsldoi8 <2,3,4,5>, <4,4,5,5>
+ 1624886582U, // <4,5,4,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 2698628478U, // <4,5,4,6>: Cost 3 vsldoi8 <2,3,4,5>, <4,6,5,7>
+ 2662110564U, // <4,5,4,7>: Cost 3 vsldoi4 <7,4,5,4>, <7,4,5,4>
+ 1624886825U, // <4,5,4,u>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 1570455654U, // <4,5,5,0>: Cost 2 vsldoi4 <4,4,5,5>, LHS
+ 2312564250U, // <4,5,5,1>: Cost 3 vmrglw <5,1,4,5>, <4,u,5,1>
+ 2644199118U, // <4,5,5,2>: Cost 3 vsldoi4 <4,4,5,5>, <2,3,4,5>
+ 2295974966U, // <4,5,5,3>: Cost 3 vmrglw <2,3,4,5>, <4,2,5,3>
+ 1570458842U, // <4,5,5,4>: Cost 2 vsldoi4 <4,4,5,5>, <4,4,5,5>
+ 1168568324U, // <4,5,5,5>: Cost 2 vmrghw RHS, <5,5,5,5>
+ 1168568418U, // <4,5,5,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 2295975294U, // <4,5,5,7>: Cost 3 vmrglw <2,3,4,5>, <4,6,5,7>
+ 1168716036U, // <4,5,5,u>: Cost 2 vmrghw RHS, <5,u,7,0>
+ 1564491878U, // <4,5,6,0>: Cost 2 vsldoi4 <3,4,5,6>, LHS
+ 2626290768U, // <4,5,6,1>: Cost 3 vsldoi4 <1,4,5,6>, <1,4,5,6>
+ 2632263465U, // <4,5,6,2>: Cost 3 vsldoi4 <2,4,5,6>, <2,4,5,6>
+ 1564494338U, // <4,5,6,3>: Cost 2 vsldoi4 <3,4,5,6>, <3,4,5,6>
+ 1564495158U, // <4,5,6,4>: Cost 2 vsldoi4 <3,4,5,6>, RHS
+ 2638237464U, // <4,5,6,5>: Cost 3 vsldoi4 <3,4,5,6>, <5,2,6,3>
+ 2656154253U, // <4,5,6,6>: Cost 3 vsldoi4 <6,4,5,6>, <6,4,5,6>
+ 27705344U, // <4,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,6,u>: Cost 0 copy RHS
+ 2725172218U, // <4,5,7,0>: Cost 3 vsldoi8 <6,7,4,5>, <7,0,1,2>
+ 3859599489U, // <4,5,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <5,7,1,4>
+ 2698630320U, // <4,5,7,2>: Cost 3 vsldoi8 <2,3,4,5>, <7,2,3,4>
+ 2728490251U, // <4,5,7,3>: Cost 3 vsldoi8 <7,3,4,5>, <7,3,4,5>
+ 2725172576U, // <4,5,7,4>: Cost 3 vsldoi8 <6,7,4,5>, <7,4,5,0>
+ 3317239812U, // <4,5,7,5>: Cost 4 vmrghw <4,7,5,0>, <5,5,5,5>
+ 2725172760U, // <4,5,7,6>: Cost 3 vsldoi8 <6,7,4,5>, <7,6,7,4>
+ 2725172844U, // <4,5,7,7>: Cost 3 vsldoi8 <6,7,4,5>, <7,7,7,7>
+ 2725172866U, // <4,5,7,u>: Cost 3 vsldoi8 <6,7,4,5>, <7,u,1,2>
+ 1564508262U, // <4,5,u,0>: Cost 2 vsldoi4 <3,4,5,u>, LHS
+ 1624889134U, // <4,5,u,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 2698631045U, // <4,5,u,2>: Cost 3 vsldoi8 <2,3,4,5>, <u,2,3,0>
+ 1564510724U, // <4,5,u,3>: Cost 2 vsldoi4 <3,4,5,u>, <3,4,5,u>
+ 1564511542U, // <4,5,u,4>: Cost 2 vsldoi4 <3,4,5,u>, RHS
+ 1624889498U, // <4,5,u,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 1170550882U, // <4,5,u,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 27705344U, // <4,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,u,u>: Cost 0 copy RHS
+ 3312595285U, // <4,6,0,0>: Cost 4 vmrghw <4,0,5,0>, <6,0,7,0>
+ 3763748966U, // <4,6,0,1>: Cost 4 vsldoi8 <0,u,4,6>, LHS
+ 2238861818U, // <4,6,0,2>: Cost 3 vmrghw <4,0,5,1>, <6,2,7,3>
+ 3767730432U, // <4,6,0,3>: Cost 4 vsldoi8 <1,5,4,6>, <0,3,1,4>
+ 3763749202U, // <4,6,0,4>: Cost 4 vsldoi8 <0,u,4,6>, <0,4,1,5>
+ 2238862059U, // <4,6,0,5>: Cost 3 vmrghw <4,0,5,1>, <6,5,7,1>
+ 2238862136U, // <4,6,0,6>: Cost 3 vmrghw <4,0,5,1>, <6,6,6,6>
+ 2295934262U, // <4,6,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 2295934263U, // <4,6,0,u>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 3378973999U, // <4,6,1,0>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,0>
+ 3378974648U, // <4,6,1,1>: Cost 4 vmrglw <3,u,4,1>, <5,4,6,1>
+ 3779675034U, // <4,6,1,2>: Cost 4 vsldoi8 <3,5,4,6>, <1,2,3,4>
+ 3378974002U, // <4,6,1,3>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,3>
+ 3378974003U, // <4,6,1,4>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,4>
+ 3767731352U, // <4,6,1,5>: Cost 4 vsldoi8 <1,5,4,6>, <1,5,4,6>
+ 3378974734U, // <4,6,1,6>: Cost 4 vmrglw <3,u,4,1>, <5,5,6,6>
+ 2287316278U, // <4,6,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 2287316279U, // <4,6,1,u>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 3735904358U, // <4,6,2,0>: Cost 4 vsldoi4 <7,4,6,2>, LHS
+ 3763750435U, // <4,6,2,1>: Cost 5 vsldoi8 <0,u,4,6>, <2,1,3,5>
+ 3313938937U, // <4,6,2,2>: Cost 4 vmrghw <4,2,5,2>, <6,2,7,2>
+ 3772376782U, // <4,6,2,3>: Cost 4 vsldoi8 <2,3,4,6>, <2,3,4,5>
+ 3852890591U, // <4,6,2,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,2,4,3>
+ 3735908454U, // <4,6,2,5>: Cost 4 vsldoi4 <7,4,6,2>, <5,6,7,4>
+ 3801573306U, // <4,6,2,6>: Cost 4 vsldoi8 <7,2,4,6>, <2,6,3,7>
+ 2785858042U, // <4,6,2,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,7,3>
+ 2785858051U, // <4,6,2,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,u,3>
+ 3863065101U, // <4,6,3,0>: Cost 4 vsldoi12 <6,3,0,4>, <6,3,0,4>
+ 3314586024U, // <4,6,3,1>: Cost 4 vmrghw <4,3,5,0>, <6,1,7,2>
+ 3863212575U, // <4,6,3,2>: Cost 4 vsldoi12 <6,3,2,4>, <6,3,2,4>
+ 3863286312U, // <4,6,3,3>: Cost 4 vsldoi12 <6,3,3,4>, <6,3,3,4>
+ 3767732738U, // <4,6,3,4>: Cost 4 vsldoi8 <1,5,4,6>, <3,4,5,6>
+ 3779676746U, // <4,6,3,5>: Cost 4 vsldoi8 <3,5,4,6>, <3,5,4,6>
+ 3398898488U, // <4,6,3,6>: Cost 4 vmrglw <7,2,4,3>, <6,6,6,6>
+ 2301267254U, // <4,6,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 2301267255U, // <4,6,3,u>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 3852890715U, // <4,6,4,0>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,0,1>
+ 3315208615U, // <4,6,4,1>: Cost 4 vmrghw <4,4,4,4>, <6,1,7,1>
+ 2241466874U, // <4,6,4,2>: Cost 3 vmrghw <4,4,4,4>, <6,2,7,3>
+ 3852890745U, // <4,6,4,3>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,3,4>
+ 2241467037U, // <4,6,4,4>: Cost 3 vmrghw <4,4,4,4>, <6,4,7,4>
+ 2241549039U, // <4,6,4,5>: Cost 3 vmrghw <4,4,5,5>, <6,5,7,5>
+ 2241467192U, // <4,6,4,6>: Cost 3 vmrghw <4,4,4,4>, <6,6,6,6>
+ 1234832694U, // <4,6,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 1234832695U, // <4,6,4,u>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 2242302241U, // <4,6,5,0>: Cost 3 vmrghw RHS, <6,0,1,2>
+ 2242310567U, // <4,6,5,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1168568826U, // <4,6,5,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2242302514U, // <4,6,5,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2242302605U, // <4,6,5,4>: Cost 3 vmrghw RHS, <6,4,5,6>
+ 2242310891U, // <4,6,5,5>: Cost 3 vmrghw RHS, <6,5,7,1>
+ 1168569144U, // <4,6,5,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1222233398U, // <4,6,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 1222233399U, // <4,6,5,u>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 3316576545U, // <4,6,6,0>: Cost 4 vmrghw <4,6,5,0>, <6,0,1,2>
+ 3316584871U, // <4,6,6,1>: Cost 4 vmrghw <4,6,5,1>, <6,1,7,1>
+ 2242851322U, // <4,6,6,2>: Cost 3 vmrghw <4,6,5,2>, <6,2,7,3>
+ 3316601394U, // <4,6,6,3>: Cost 4 vmrghw <4,6,5,3>, <6,3,4,5>
+ 3852890916U, // <4,6,6,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,6,4,4>
+ 3316617963U, // <4,6,6,5>: Cost 4 vmrghw <4,6,5,5>, <6,5,7,1>
+ 2242884408U, // <4,6,6,6>: Cost 3 vmrghw <4,6,5,6>, <6,6,6,6>
+ 2785858370U, // <4,6,6,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,7,7>
+ 2785858379U, // <4,6,6,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,u,7>
+ 2785858382U, // <4,6,7,0>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,0,1>
+ 3859600215U, // <4,6,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <6,7,1,1>
+ 3317240314U, // <4,6,7,2>: Cost 4 vmrghw <4,7,5,0>, <6,2,7,3>
+ 2792199020U, // <4,6,7,3>: Cost 3 vsldoi12 <6,7,3,4>, <6,7,3,4>
+ 2785858422U, // <4,6,7,4>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,4,5>
+ 3856651132U, // <4,6,7,5>: Cost 4 vsldoi12 <5,2,3,4>, <6,7,5,2>
+ 3317240632U, // <4,6,7,6>: Cost 4 vmrghw <4,7,5,0>, <6,6,6,6>
+ 2303954230U, // <4,6,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2303954231U, // <4,6,7,u>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2244292897U, // <4,6,u,0>: Cost 3 vmrghw RHS, <6,0,1,2>
+ 2244293031U, // <4,6,u,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1170551290U, // <4,6,u,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2244293170U, // <4,6,u,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2244293261U, // <4,6,u,4>: Cost 3 vmrghw RHS, <6,4,5,6>
+ 2244293355U, // <4,6,u,5>: Cost 3 vmrghw RHS, <6,5,7,1>
+ 1170551608U, // <4,6,u,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1222257974U, // <4,6,u,7>: Cost 2 vmrglw <2,3,4,u>, RHS
+ 1222257975U, // <4,6,u,u>: Cost 2 vmrglw <2,3,4,u>, RHS
+ 2238862330U, // <4,7,0,0>: Cost 3 vmrghw <4,0,5,1>, <7,0,1,2>
+ 2706604134U, // <4,7,0,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 3312604308U, // <4,7,0,2>: Cost 4 vmrghw <4,0,5,1>, <7,2,0,3>
+ 3768402176U, // <4,7,0,3>: Cost 4 vsldoi8 <1,6,4,7>, <0,3,1,4>
+ 2238862648U, // <4,7,0,4>: Cost 3 vmrghw <4,0,5,1>, <7,4,0,5>
+ 3859600418U, // <4,7,0,5>: Cost 4 vsldoi12 <5,6,7,4>, <7,0,5,6>
+ 3729994393U, // <4,7,0,6>: Cost 4 vsldoi4 <6,4,7,0>, <6,4,7,0>
+ 2238862956U, // <4,7,0,7>: Cost 3 vmrghw <4,0,5,1>, <7,7,7,7>
+ 2706604701U, // <4,7,0,u>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 3385610338U, // <4,7,1,0>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,0>
+ 3780346676U, // <4,7,1,1>: Cost 4 vsldoi8 <3,6,4,7>, <1,1,1,1>
+ 2706604954U, // <4,7,1,2>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4>
+ 3385610746U, // <4,7,1,3>: Cost 4 vmrglw <5,0,4,1>, <6,2,7,3>
+ 3385610342U, // <4,7,1,4>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,4>
+ 3385610667U, // <4,7,1,5>: Cost 4 vmrglw <5,0,4,1>, <6,1,7,5>
+ 3768403178U, // <4,7,1,6>: Cost 4 vsldoi8 <1,6,4,7>, <1,6,4,7>
+ 3385611074U, // <4,7,1,7>: Cost 4 vmrglw <5,0,4,1>, <6,6,7,7>
+ 2706604954U, // <4,7,1,u>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4>
+ 3859600532U, // <4,7,2,0>: Cost 4 vsldoi12 <5,6,7,4>, <7,2,0,3>
+ 3712091034U, // <4,7,2,1>: Cost 5 vsldoi4 <3,4,7,2>, <1,2,3,4>
+ 3774375528U, // <4,7,2,2>: Cost 4 vsldoi8 <2,6,4,7>, <2,2,2,2>
+ 2794853552U, // <4,7,2,3>: Cost 3 vsldoi12 <7,2,3,4>, <7,2,3,4>
+ 2785858744U, // <4,7,2,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,2,4,3>
+ 3735982182U, // <4,7,2,5>: Cost 4 vsldoi4 <7,4,7,2>, <5,6,7,4>
+ 3774375875U, // <4,7,2,6>: Cost 4 vsldoi8 <2,6,4,7>, <2,6,4,7>
+ 3735983476U, // <4,7,2,7>: Cost 4 vsldoi4 <7,4,7,2>, <7,4,7,2>
+ 2795222237U, // <4,7,2,u>: Cost 3 vsldoi12 <7,2,u,4>, <7,2,u,4>
+ 3780348054U, // <4,7,3,0>: Cost 4 vsldoi8 <3,6,4,7>, <3,0,1,2>
+ 3730015130U, // <4,7,3,1>: Cost 4 vsldoi4 <6,4,7,3>, <1,2,3,4>
+ 3780348244U, // <4,7,3,2>: Cost 4 vsldoi8 <3,6,4,7>, <3,2,4,3>
+ 3778357673U, // <4,7,3,3>: Cost 4 vsldoi8 <3,3,4,7>, <3,3,4,7>
+ 2325155942U, // <4,7,3,4>: Cost 3 vmrglw <7,2,4,3>, <5,6,7,4>
+ 3779684939U, // <4,7,3,5>: Cost 5 vsldoi8 <3,5,4,7>, <3,5,4,7>
+ 2706606748U, // <4,7,3,6>: Cost 3 vsldoi8 <3,6,4,7>, <3,6,4,7>
+ 3398898498U, // <4,7,3,7>: Cost 4 vmrglw <7,2,4,3>, <6,6,7,7>
+ 2707934014U, // <4,7,3,u>: Cost 3 vsldoi8 <3,u,4,7>, <3,u,4,7>
+ 2785858868U, // <4,7,4,0>: Cost 3 vsldoi12 <5,6,7,4>, <7,4,0,1>
+ 3780348874U, // <4,7,4,1>: Cost 4 vsldoi8 <3,6,4,7>, <4,1,2,3>
+ 3780349000U, // <4,7,4,2>: Cost 4 vsldoi8 <3,6,4,7>, <4,2,7,3>
+ 2308575738U, // <4,7,4,3>: Cost 3 vmrglw <4,4,4,4>, <6,2,7,3>
+ 2656283856U, // <4,7,4,4>: Cost 3 vsldoi4 <6,4,7,4>, <4,4,4,4>
+ 2706607414U, // <4,7,4,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 2656285341U, // <4,7,4,6>: Cost 3 vsldoi4 <6,4,7,4>, <6,4,7,4>
+ 2241468012U, // <4,7,4,7>: Cost 3 vmrghw <4,4,4,4>, <7,7,7,7>
+ 2706607657U, // <4,7,4,u>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 1168569338U, // <4,7,5,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2242311242U, // <4,7,5,1>: Cost 3 vmrghw RHS, <7,1,1,1>
+ 2242303178U, // <4,7,5,2>: Cost 3 vmrghw RHS, <7,2,6,3>
+ 2242311395U, // <4,7,5,3>: Cost 3 vmrghw RHS, <7,3,0,1>
+ 1168569702U, // <4,7,5,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 2242311606U, // <4,7,5,5>: Cost 3 vmrghw RHS, <7,5,5,5>
+ 2242311662U, // <4,7,5,6>: Cost 3 vmrghw RHS, <7,6,2,7>
+ 1168569964U, // <4,7,5,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1168569986U, // <4,7,5,u>: Cost 2 vmrghw RHS, <7,u,1,2>
+ 3316593658U, // <4,7,6,0>: Cost 4 vmrghw <4,6,5,2>, <7,0,1,2>
+ 3316593738U, // <4,7,6,1>: Cost 5 vmrghw <4,6,5,2>, <7,1,1,1>
+ 3316634800U, // <4,7,6,2>: Cost 4 vmrghw <4,6,5,7>, <7,2,3,4>
+ 3386978810U, // <4,7,6,3>: Cost 4 vmrglw <5,2,4,6>, <6,2,7,3>
+ 2785859072U, // <4,7,6,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,6,4,7>
+ 3736014950U, // <4,7,6,5>: Cost 4 vsldoi4 <7,4,7,6>, <5,6,7,4>
+ 3316594158U, // <4,7,6,6>: Cost 4 vmrghw <4,6,5,2>, <7,6,2,7>
+ 2797803032U, // <4,7,6,7>: Cost 3 vsldoi12 <7,6,7,4>, <7,6,7,4>
+ 2797876769U, // <4,7,6,u>: Cost 3 vsldoi12 <7,6,u,4>, <7,6,u,4>
+ 2243499002U, // <4,7,7,0>: Cost 3 vmrghw <4,7,5,0>, <7,0,1,2>
+ 3718103962U, // <4,7,7,1>: Cost 4 vsldoi4 <4,4,7,7>, <1,2,3,4>
+ 3317257418U, // <4,7,7,2>: Cost 4 vmrghw <4,7,5,2>, <7,2,6,3>
+ 3377695816U, // <4,7,7,3>: Cost 4 vmrglw <3,6,4,7>, <4,2,7,3>
+ 2243532134U, // <4,7,7,4>: Cost 3 vmrghw <4,7,5,4>, <7,4,5,6>
+ 3317282230U, // <4,7,7,5>: Cost 4 vmrghw <4,7,5,5>, <7,5,5,5>
+ 2730497536U, // <4,7,7,6>: Cost 3 vsldoi8 <7,6,4,7>, <7,6,4,7>
+ 2243556972U, // <4,7,7,7>: Cost 3 vmrghw <4,7,5,7>, <7,7,7,7>
+ 2243565186U, // <4,7,7,u>: Cost 3 vmrghw <4,7,5,u>, <7,u,1,2>
+ 1170551802U, // <4,7,u,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2706609966U, // <4,7,u,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 2244293797U, // <4,7,u,2>: Cost 3 vmrghw RHS, <7,2,2,2>
+ 2244293859U, // <4,7,u,3>: Cost 3 vmrghw RHS, <7,3,0,1>
+ 1170552166U, // <4,7,u,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 2706610330U, // <4,7,u,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 2244294126U, // <4,7,u,6>: Cost 3 vmrghw RHS, <7,6,2,7>
+ 1170552428U, // <4,7,u,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1170552450U, // <4,7,u,u>: Cost 2 vmrghw RHS, <7,u,1,2>
+ 1165118354U, // <4,u,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1624907878U, // <4,u,0,1>: Cost 2 vsldoi8 <2,3,4,u>, LHS
+ 2638407377U, // <4,u,0,2>: Cost 3 vsldoi4 <3,4,u,0>, <2,3,4,u>
+ 2295931036U, // <4,u,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 2687369584U, // <4,u,0,4>: Cost 3 vsldoi8 <0,4,4,u>, <0,4,4,u>
+ 1165121690U, // <4,u,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 2662298489U, // <4,u,0,6>: Cost 3 vsldoi4 <7,4,u,0>, <6,7,4,u>
+ 2295934280U, // <4,u,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 1624908445U, // <4,u,0,u>: Cost 2 vsldoi8 <2,3,4,u>, LHS
+ 2638413926U, // <4,u,1,0>: Cost 3 vsldoi4 <3,4,u,1>, LHS
+ 2691351382U, // <4,u,1,1>: Cost 3 vsldoi8 <1,1,4,u>, <1,1,4,u>
+ 1685280558U, // <4,u,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2287313052U, // <4,u,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 2299257799U, // <4,u,1,4>: Cost 3 vmrglw <2,u,4,1>, <1,2,u,4>
+ 2694005914U, // <4,u,1,5>: Cost 3 vsldoi8 <1,5,4,u>, <1,5,4,u>
+ 2305231362U, // <4,u,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6>
+ 2287316296U, // <4,u,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 1685280612U, // <4,u,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2638422118U, // <4,u,2,0>: Cost 3 vsldoi4 <3,4,u,2>, LHS
+ 2240206638U, // <4,u,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS
+ 2697987712U, // <4,u,2,2>: Cost 3 vsldoi8 <2,2,4,u>, <2,2,4,u>
+ 1624909521U, // <4,u,2,3>: Cost 2 vsldoi8 <2,3,4,u>, <2,3,4,u>
+ 2759391121U, // <4,u,2,4>: Cost 3 vsldoi12 <1,2,u,4>, <u,2,4,3>
+ 2240207002U, // <4,u,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS
+ 2698651578U, // <4,u,2,6>: Cost 3 vsldoi8 <2,3,4,u>, <2,6,3,7>
+ 2785859500U, // <4,u,2,7>: Cost 3 vsldoi12 <5,6,7,4>, <u,2,7,3>
+ 1628227686U, // <4,u,2,u>: Cost 2 vsldoi8 <2,u,4,u>, <2,u,4,u>
+ 2759022524U, // <4,u,3,0>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,0,1>
+ 2801342408U, // <4,u,3,1>: Cost 3 vsldoi12 <u,3,1,4>, <u,3,1,4>
+ 2703960409U, // <4,u,3,2>: Cost 3 vsldoi8 <3,2,4,u>, <3,2,4,u>
+ 2759022554U, // <4,u,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,3,4>
+ 2759022564U, // <4,u,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,4,5>
+ 2240845978U, // <4,u,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS
+ 2706614941U, // <4,u,3,6>: Cost 3 vsldoi8 <3,6,4,u>, <3,6,4,u>
+ 2301267272U, // <4,u,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 2759022596U, // <4,u,3,u>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,u,1>
+ 1570668646U, // <4,u,4,0>: Cost 2 vsldoi4 <4,4,u,4>, LHS
+ 1167726382U, // <4,u,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 2698652753U, // <4,u,4,2>: Cost 3 vsldoi8 <2,3,4,u>, <4,2,u,3>
+ 1234829468U, // <4,u,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 229035318U, // <4,u,4,4>: Cost 1 vspltisw0 RHS
+ 1624911158U, // <4,u,4,5>: Cost 2 vsldoi8 <2,3,4,u>, RHS
+ 2698653081U, // <4,u,4,6>: Cost 3 vsldoi8 <2,3,4,u>, <4,6,u,7>
+ 1234832712U, // <4,u,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 229035318U, // <4,u,4,u>: Cost 1 vspltisw0 RHS
+ 1168561875U, // <4,u,5,0>: Cost 2 vmrghw RHS, <u,0,1,2>
+ 94820142U, // <4,u,5,1>: Cost 1 vmrghw RHS, LHS
+ 1168562053U, // <4,u,5,2>: Cost 2 vmrghw RHS, <u,2,3,0>
+ 1222230172U, // <4,u,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 1168562239U, // <4,u,5,4>: Cost 2 vmrghw RHS, <u,4,5,6>
+ 94820506U, // <4,u,5,5>: Cost 1 vmrghw RHS, RHS
+ 1685280922U, // <4,u,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 1222233416U, // <4,u,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 94820709U, // <4,u,5,u>: Cost 1 vmrghw RHS, LHS
+ 1564713062U, // <4,u,6,0>: Cost 2 vsldoi4 <3,4,u,6>, LHS
+ 2626511979U, // <4,u,6,1>: Cost 3 vsldoi4 <1,4,u,6>, <1,4,u,6>
+ 2632484676U, // <4,u,6,2>: Cost 3 vsldoi4 <2,4,u,6>, <2,4,u,6>
+ 1564715549U, // <4,u,6,3>: Cost 2 vsldoi4 <3,4,u,6>, <3,4,u,6>
+ 1564716342U, // <4,u,6,4>: Cost 2 vsldoi4 <3,4,u,6>, RHS
+ 2242853018U, // <4,u,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2656375464U, // <4,u,6,6>: Cost 3 vsldoi4 <6,4,u,6>, <6,4,u,6>
+ 27705344U, // <4,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,6,u>: Cost 0 copy RHS
+ 2785859840U, // <4,u,7,0>: Cost 3 vsldoi12 <5,6,7,4>, <u,7,0,1>
+ 2243499822U, // <4,u,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 2727851197U, // <4,u,7,2>: Cost 3 vsldoi8 <7,2,4,u>, <7,2,4,u>
+ 2303951004U, // <4,u,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 2785859880U, // <4,u,7,4>: Cost 3 vsldoi12 <5,6,7,4>, <u,7,4,5>
+ 2243500186U, // <4,u,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 2730505729U, // <4,u,7,6>: Cost 3 vsldoi8 <7,6,4,u>, <7,6,4,u>
+ 2303954248U, // <4,u,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2303951009U, // <4,u,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 1564729446U, // <4,u,u,0>: Cost 2 vsldoi4 <3,4,u,u>, LHS
+ 96810798U, // <4,u,u,1>: Cost 1 vmrghw RHS, LHS
+ 1685281125U, // <4,u,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 1222254748U, // <4,u,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 229035318U, // <4,u,u,4>: Cost 1 vspltisw0 RHS
+ 96811162U, // <4,u,u,5>: Cost 1 vmrghw RHS, RHS
+ 1685281165U, // <4,u,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 27705344U, // <4,u,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,u,u>: Cost 0 copy RHS
+ 2754232320U, // <5,0,0,0>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,0,0>
+ 2754232330U, // <5,0,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,1,1>
+ 3718194894U, // <5,0,0,2>: Cost 4 vsldoi4 <4,5,0,0>, <2,3,4,5>
+ 3376385762U, // <5,0,0,3>: Cost 4 vmrglw <3,4,5,0>, <5,2,0,3>
+ 2754232357U, // <5,0,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,4,1>
+ 3845816370U, // <5,0,0,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,0,5,5>
+ 3782353389U, // <5,0,0,6>: Cost 4 vsldoi8 <4,0,5,0>, <0,6,0,7>
+ 3376386090U, // <5,0,0,7>: Cost 4 vmrglw <3,4,5,0>, <5,6,0,7>
+ 2757402697U, // <5,0,0,u>: Cost 3 vsldoi12 <0,u,u,5>, <0,0,u,1>
+ 2626543718U, // <5,0,1,0>: Cost 3 vsldoi4 <1,5,0,1>, LHS
+ 2626544751U, // <5,0,1,1>: Cost 3 vsldoi4 <1,5,0,1>, <1,5,0,1>
+ 1680490598U, // <5,0,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 3766428665U, // <5,0,1,3>: Cost 4 vsldoi8 <1,3,5,0>, <1,3,5,0>
+ 2626546998U, // <5,0,1,4>: Cost 3 vsldoi4 <1,5,0,1>, RHS
+ 2650435539U, // <5,0,1,5>: Cost 3 vsldoi4 <5,5,0,1>, <5,5,0,1>
+ 3783017715U, // <5,0,1,6>: Cost 4 vsldoi8 <4,1,5,0>, <1,6,5,7>
+ 3385019000U, // <5,0,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,0,7>
+ 1680490652U, // <5,0,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 3376398336U, // <5,0,2,0>: Cost 4 vmrglw <3,4,5,2>, <0,0,0,0>
+ 2245877862U, // <5,0,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 3773064808U, // <5,0,2,2>: Cost 4 vsldoi8 <2,4,5,0>, <2,2,2,2>
+ 2705295054U, // <5,0,2,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5>
+ 3827974343U, // <5,0,2,4>: Cost 4 vsldoi12 <0,4,1,5>, <0,2,4,1>
+ 3845816530U, // <5,0,2,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,2,5,3>
+ 3779037114U, // <5,0,2,6>: Cost 4 vsldoi8 <3,4,5,0>, <2,6,3,7>
+ 3810887658U, // <5,0,2,7>: Cost 4 vsldoi8 <u,7,5,0>, <2,7,0,1>
+ 2245878429U, // <5,0,2,u>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 2710603926U, // <5,0,3,0>: Cost 3 vsldoi8 <4,3,5,0>, <3,0,1,2>
+ 3827974396U, // <5,0,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <0,3,1,0>
+ 3779037516U, // <5,0,3,2>: Cost 4 vsldoi8 <3,4,5,0>, <3,2,3,4>
+ 3779037596U, // <5,0,3,3>: Cost 4 vsldoi8 <3,4,5,0>, <3,3,3,3>
+ 2705295868U, // <5,0,3,4>: Cost 3 vsldoi8 <3,4,5,0>, <3,4,5,0>
+ 3379726804U, // <5,0,3,5>: Cost 4 vmrglw <4,0,5,3>, <3,4,0,5>
+ 3802925748U, // <5,0,3,6>: Cost 4 vsldoi8 <7,4,5,0>, <3,6,7,4>
+ 3363138168U, // <5,0,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,0,7>
+ 2707950400U, // <5,0,3,u>: Cost 3 vsldoi8 <3,u,5,0>, <3,u,5,0>
+ 2626568294U, // <5,0,4,0>: Cost 3 vsldoi4 <1,5,0,4>, LHS
+ 1680490834U, // <5,0,4,1>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5>
+ 3828048219U, // <5,0,4,2>: Cost 4 vsldoi12 <0,4,2,5>, <0,4,2,5>
+ 2710604932U, // <5,0,4,3>: Cost 3 vsldoi8 <4,3,5,0>, <4,3,5,0>
+ 2754232685U, // <5,0,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,4,4,5>
+ 2705296694U, // <5,0,4,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS
+ 3779038590U, // <5,0,4,6>: Cost 4 vsldoi8 <3,4,5,0>, <4,6,5,7>
+ 2713259464U, // <5,0,4,7>: Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0>
+ 1680490834U, // <5,0,4,u>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5>
+ 2311307264U, // <5,0,5,0>: Cost 3 vmrglw <4,u,5,5>, <0,0,0,0>
+ 1174437990U, // <5,0,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 3779038946U, // <5,0,5,2>: Cost 4 vsldoi8 <3,4,5,0>, <5,2,0,3>
+ 3845816752U, // <5,0,5,3>: Cost 4 vsldoi12 <3,4,0,5>, <0,5,3,0>
+ 2248180050U, // <5,0,5,4>: Cost 3 vmrghw <5,5,5,5>, <0,4,1,5>
+ 2248180194U, // <5,0,5,5>: Cost 3 vmrghw <5,5,5,5>, <0,5,u,5>
+ 3779039274U, // <5,0,5,6>: Cost 4 vsldoi8 <3,4,5,0>, <5,6,0,7>
+ 3385051768U, // <5,0,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,0,7>
+ 1174438557U, // <5,0,5,u>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 2302689280U, // <5,0,6,0>: Cost 3 vmrglw <3,4,5,6>, <0,0,0,0>
+ 1175208038U, // <5,0,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 3787002362U, // <5,0,6,2>: Cost 4 vsldoi8 <4,7,5,0>, <6,2,7,3>
+ 3376432160U, // <5,0,6,3>: Cost 4 vmrglw <3,4,5,6>, <1,4,0,3>
+ 2248950098U, // <5,0,6,4>: Cost 3 vmrghw <5,6,7,0>, <0,4,1,5>
+ 2248950180U, // <5,0,6,5>: Cost 3 vmrghw <5,6,7,0>, <0,5,1,6>
+ 3376433702U, // <5,0,6,6>: Cost 4 vmrglw <3,4,5,6>, <3,5,0,6>
+ 2729186166U, // <5,0,6,7>: Cost 3 vsldoi8 <7,4,5,0>, <6,7,4,5>
+ 1175208605U, // <5,0,6,u>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 2713261050U, // <5,0,7,0>: Cost 3 vsldoi8 <4,7,5,0>, <7,0,1,2>
+ 3365823599U, // <5,0,7,1>: Cost 4 vmrglw <1,6,5,7>, <1,5,0,1>
+ 3808900317U, // <5,0,7,2>: Cost 4 vsldoi8 <u,4,5,0>, <7,2,u,4>
+ 3784348899U, // <5,0,7,3>: Cost 4 vsldoi8 <4,3,5,0>, <7,3,0,1>
+ 2729186656U, // <5,0,7,4>: Cost 3 vsldoi8 <7,4,5,0>, <7,4,5,0>
+ 3787003268U, // <5,0,7,5>: Cost 4 vsldoi8 <4,7,5,0>, <7,5,0,0>
+ 3802928664U, // <5,0,7,6>: Cost 4 vsldoi8 <7,4,5,0>, <7,6,7,4>
+ 3787003431U, // <5,0,7,7>: Cost 4 vsldoi8 <4,7,5,0>, <7,7,0,1>
+ 2731841188U, // <5,0,7,u>: Cost 3 vsldoi8 <7,u,5,0>, <7,u,5,0>
+ 2626601062U, // <5,0,u,0>: Cost 3 vsldoi4 <1,5,0,u>, LHS
+ 1683145366U, // <5,0,u,1>: Cost 2 vsldoi12 <0,u,1,5>, <0,u,1,5>
+ 1680491165U, // <5,0,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2705295054U, // <5,0,u,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5>
+ 2754233005U, // <5,0,u,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,u,4,1>
+ 2705299610U, // <5,0,u,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS
+ 3779041488U, // <5,0,u,6>: Cost 4 vsldoi8 <3,4,5,0>, <u,6,3,7>
+ 2737150252U, // <5,0,u,7>: Cost 3 vsldoi8 <u,7,5,0>, <u,7,5,0>
+ 1680491219U, // <5,0,u,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2713927680U, // <5,1,0,0>: Cost 3 vsldoi8 <4,u,5,1>, <0,0,0,0>
+ 1640185958U, // <5,1,0,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2310607866U, // <5,1,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2>
+ 3787669756U, // <5,1,0,3>: Cost 4 vsldoi8 <4,u,5,1>, <0,3,1,0>
+ 2713928018U, // <5,1,0,4>: Cost 3 vsldoi8 <4,u,5,1>, <0,4,1,5>
+ 2306621778U, // <5,1,0,5>: Cost 3 vmrglw <4,1,5,0>, <0,4,1,5>
+ 3787670006U, // <5,1,0,6>: Cost 4 vsldoi8 <4,u,5,1>, <0,6,1,7>
+ 3736188301U, // <5,1,0,7>: Cost 4 vsldoi4 <7,5,1,0>, <7,5,1,0>
+ 1640186525U, // <5,1,0,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2650505318U, // <5,1,1,0>: Cost 3 vsldoi4 <5,5,1,1>, LHS
+ 2754233140U, // <5,1,1,1>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,1,1>
+ 2311276694U, // <5,1,1,2>: Cost 3 vmrglw <4,u,5,1>, <3,0,1,2>
+ 2311278315U, // <5,1,1,3>: Cost 3 vmrglw <4,u,5,1>, <5,2,1,3>
+ 2758435667U, // <5,1,1,4>: Cost 3 vsldoi12 <1,1,4,5>, <1,1,4,5>
+ 2754233180U, // <5,1,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,5,5>
+ 3385016497U, // <5,1,1,6>: Cost 4 vmrglw <4,u,5,1>, <0,2,1,6>
+ 2311278643U, // <5,1,1,7>: Cost 3 vmrglw <4,u,5,1>, <5,6,1,7>
+ 2758730615U, // <5,1,1,u>: Cost 3 vsldoi12 <1,1,u,5>, <1,1,u,5>
+ 3700367462U, // <5,1,2,0>: Cost 4 vsldoi4 <1,5,1,2>, LHS
+ 3830629255U, // <5,1,2,1>: Cost 4 vsldoi12 <0,u,1,5>, <1,2,1,3>
+ 2713929320U, // <5,1,2,2>: Cost 3 vsldoi8 <4,u,5,1>, <2,2,2,2>
+ 2754233238U, // <5,1,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,3,0>
+ 2759099300U, // <5,1,2,4>: Cost 3 vsldoi12 <1,2,4,5>, <1,2,4,5>
+ 2754233259U, // <5,1,2,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,5,3>
+ 2713929658U, // <5,1,2,6>: Cost 3 vsldoi8 <4,u,5,1>, <2,6,3,7>
+ 3872359354U, // <5,1,2,7>: Cost 4 vsldoi12 <7,u,0,5>, <1,2,7,0>
+ 2754233283U, // <5,1,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,u,0>
+ 2713929878U, // <5,1,3,0>: Cost 3 vsldoi8 <4,u,5,1>, <3,0,1,2>
+ 3363135498U, // <5,1,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,1,1>
+ 3363137686U, // <5,1,3,2>: Cost 4 vmrglw <1,2,5,3>, <3,0,1,2>
+ 2713930140U, // <5,1,3,3>: Cost 3 vsldoi8 <4,u,5,1>, <3,3,3,3>
+ 2713930242U, // <5,1,3,4>: Cost 3 vsldoi8 <4,u,5,1>, <3,4,5,6>
+ 2289394002U, // <5,1,3,5>: Cost 3 vmrglw <1,2,5,3>, <0,4,1,5>
+ 3787672184U, // <5,1,3,6>: Cost 4 vsldoi8 <4,u,5,1>, <3,6,0,7>
+ 3787672259U, // <5,1,3,7>: Cost 4 vsldoi8 <4,u,5,1>, <3,7,0,1>
+ 2713930526U, // <5,1,3,u>: Cost 3 vsldoi8 <4,u,5,1>, <3,u,1,2>
+ 1634880402U, // <5,1,4,0>: Cost 2 vsldoi8 <4,0,5,1>, <4,0,5,1>
+ 2760205355U, // <5,1,4,1>: Cost 3 vsldoi12 <1,4,1,5>, <1,4,1,5>
+ 2760279092U, // <5,1,4,2>: Cost 3 vsldoi12 <1,4,2,5>, <1,4,2,5>
+ 3787672708U, // <5,1,4,3>: Cost 4 vsldoi8 <4,u,5,1>, <4,3,5,0>
+ 2713930960U, // <5,1,4,4>: Cost 3 vsldoi8 <4,u,5,1>, <4,4,4,4>
+ 1640189238U, // <5,1,4,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS
+ 3786345848U, // <5,1,4,6>: Cost 4 vsldoi8 <4,6,5,1>, <4,6,5,1>
+ 3787009481U, // <5,1,4,7>: Cost 4 vsldoi8 <4,7,5,1>, <4,7,5,1>
+ 1640189466U, // <5,1,4,u>: Cost 2 vsldoi8 <4,u,5,1>, <4,u,5,1>
+ 2754233455U, // <5,1,5,0>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,0,1>
+ 2713931407U, // <5,1,5,1>: Cost 3 vsldoi8 <4,u,5,1>, <5,1,0,1>
+ 2713931499U, // <5,1,5,2>: Cost 3 vsldoi8 <4,u,5,1>, <5,2,1,3>
+ 3827975305U, // <5,1,5,3>: Cost 4 vsldoi12 <0,4,1,5>, <1,5,3,0>
+ 2754233495U, // <5,1,5,4>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,4,5>
+ 2288746834U, // <5,1,5,5>: Cost 3 vmrglw <1,1,5,5>, <0,4,1,5>
+ 2713931827U, // <5,1,5,6>: Cost 3 vsldoi8 <4,u,5,1>, <5,6,1,7>
+ 3787673725U, // <5,1,5,7>: Cost 4 vsldoi8 <4,u,5,1>, <5,7,1,0>
+ 2754233527U, // <5,1,5,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,u,1>
+ 2668462182U, // <5,1,6,0>: Cost 3 vsldoi4 <u,5,1,6>, LHS
+ 2290746002U, // <5,1,6,1>: Cost 3 vmrglw <1,4,5,6>, <0,u,1,1>
+ 2302691478U, // <5,1,6,2>: Cost 3 vmrglw <3,4,5,6>, <3,0,1,2>
+ 3364488071U, // <5,1,6,3>: Cost 4 vmrglw <1,4,5,6>, <1,2,1,3>
+ 2302689536U, // <5,1,6,4>: Cost 3 vmrglw <3,4,5,6>, <0,3,1,4>
+ 2754233587U, // <5,1,6,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,6,5,7>
+ 2713932600U, // <5,1,6,6>: Cost 3 vsldoi8 <4,u,5,1>, <6,6,6,6>
+ 2713932622U, // <5,1,6,7>: Cost 3 vsldoi8 <4,u,5,1>, <6,7,0,1>
+ 2302689297U, // <5,1,6,u>: Cost 3 vmrglw <3,4,5,6>, <0,0,1,u>
+ 2713932794U, // <5,1,7,0>: Cost 3 vsldoi8 <4,u,5,1>, <7,0,1,2>
+ 3365822474U, // <5,1,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,1,1>
+ 3365824662U, // <5,1,7,2>: Cost 4 vmrglw <1,6,5,7>, <3,0,1,2>
+ 3787674851U, // <5,1,7,3>: Cost 4 vsldoi8 <4,u,5,1>, <7,3,0,1>
+ 2713933158U, // <5,1,7,4>: Cost 3 vsldoi8 <4,u,5,1>, <7,4,5,6>
+ 2292080978U, // <5,1,7,5>: Cost 3 vmrglw <1,6,5,7>, <0,4,1,5>
+ 3365823613U, // <5,1,7,6>: Cost 4 vmrglw <1,6,5,7>, <1,5,1,6>
+ 2713933420U, // <5,1,7,7>: Cost 3 vsldoi8 <4,u,5,1>, <7,7,7,7>
+ 2713933442U, // <5,1,7,u>: Cost 3 vsldoi8 <4,u,5,1>, <7,u,1,2>
+ 1658771190U, // <5,1,u,0>: Cost 2 vsldoi8 <u,0,5,1>, <u,0,5,1>
+ 1640191790U, // <5,1,u,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2762933624U, // <5,1,u,2>: Cost 3 vsldoi12 <1,u,2,5>, <1,u,2,5>
+ 2754233724U, // <5,1,u,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,u,3,0>
+ 2763081098U, // <5,1,u,4>: Cost 3 vsldoi12 <1,u,4,5>, <1,u,4,5>
+ 1640192154U, // <5,1,u,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS
+ 2713934032U, // <5,1,u,6>: Cost 3 vsldoi8 <4,u,5,1>, <u,6,3,7>
+ 2713934080U, // <5,1,u,7>: Cost 3 vsldoi8 <4,u,5,1>, <u,7,0,1>
+ 1640192357U, // <5,1,u,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 3779051520U, // <5,2,0,0>: Cost 4 vsldoi8 <3,4,5,2>, <0,0,0,0>
+ 2705309798U, // <5,2,0,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS
+ 3838813637U, // <5,2,0,2>: Cost 4 vsldoi12 <2,2,4,5>, <2,0,2,1>
+ 2302640230U, // <5,2,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 3765117266U, // <5,2,0,4>: Cost 4 vsldoi8 <1,1,5,2>, <0,4,1,5>
+ 3381027892U, // <5,2,0,5>: Cost 4 vmrglw <4,2,5,0>, <1,4,2,5>
+ 3842794985U, // <5,2,0,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,0,6,1>
+ 3408232554U, // <5,2,0,7>: Cost 4 vmrglw <u,7,5,0>, <0,1,2,7>
+ 2302640235U, // <5,2,0,u>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 3700432998U, // <5,2,1,0>: Cost 4 vsldoi4 <1,5,2,1>, LHS
+ 3765117785U, // <5,2,1,1>: Cost 4 vsldoi8 <1,1,5,2>, <1,1,5,2>
+ 2311276136U, // <5,2,1,2>: Cost 3 vmrglw <4,u,5,1>, <2,2,2,2>
+ 1237532774U, // <5,2,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 3700436278U, // <5,2,1,4>: Cost 4 vsldoi4 <1,5,2,1>, RHS
+ 3381036084U, // <5,2,1,5>: Cost 4 vmrglw <4,2,5,1>, <1,4,2,5>
+ 3385018045U, // <5,2,1,6>: Cost 4 vmrglw <4,u,5,1>, <2,3,2,6>
+ 3385017560U, // <5,2,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,2,7>
+ 1237532779U, // <5,2,1,u>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 3700441190U, // <5,2,2,0>: Cost 4 vsldoi4 <1,5,2,2>, LHS
+ 3700442242U, // <5,2,2,1>: Cost 4 vsldoi4 <1,5,2,2>, <1,5,2,2>
+ 2754233960U, // <5,2,2,2>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,2,2>
+ 2754233970U, // <5,2,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,3,3>
+ 2765071997U, // <5,2,2,4>: Cost 3 vsldoi12 <2,2,4,5>, <2,2,4,5>
+ 3834021508U, // <5,2,2,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,2,5,3>
+ 3842795152U, // <5,2,2,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,2,6,6>
+ 3376402492U, // <5,2,2,7>: Cost 4 vmrglw <3,4,5,2>, <5,6,2,7>
+ 2754234015U, // <5,2,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,u,3>
+ 2754234022U, // <5,2,3,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,3,0,1>
+ 3827975855U, // <5,2,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <2,3,1,1>
+ 2644625102U, // <5,2,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5>
+ 2289393766U, // <5,2,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS
+ 1691993806U, // <5,2,3,4>: Cost 2 vsldoi12 <2,3,4,5>, <2,3,4,5>
+ 2785052375U, // <5,2,3,5>: Cost 3 vsldoi12 <5,5,5,5>, <2,3,5,5>
+ 3854812897U, // <5,2,3,6>: Cost 4 vsldoi12 <4,u,5,5>, <2,3,6,6>
+ 3802942187U, // <5,2,3,7>: Cost 4 vsldoi8 <7,4,5,2>, <3,7,4,5>
+ 1692288754U, // <5,2,3,u>: Cost 2 vsldoi12 <2,3,u,5>, <2,3,u,5>
+ 3839846139U, // <5,2,4,0>: Cost 4 vsldoi12 <2,4,0,5>, <2,4,0,5>
+ 2709294052U, // <5,2,4,1>: Cost 3 vsldoi8 <4,1,5,2>, <4,1,5,2>
+ 2766251789U, // <5,2,4,2>: Cost 3 vsldoi12 <2,4,2,5>, <2,4,2,5>
+ 2765735702U, // <5,2,4,3>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,3,5>
+ 3840141087U, // <5,2,4,4>: Cost 4 vsldoi12 <2,4,4,5>, <2,4,4,5>
+ 2705313078U, // <5,2,4,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS
+ 2712612217U, // <5,2,4,6>: Cost 3 vsldoi8 <4,6,5,2>, <4,6,5,2>
+ 3787017674U, // <5,2,4,7>: Cost 4 vsldoi8 <4,7,5,2>, <4,7,5,2>
+ 2765735747U, // <5,2,4,u>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,u,5>
+ 3834021704U, // <5,2,5,0>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,0,1>
+ 3834021714U, // <5,2,5,1>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,1,2>
+ 2311308904U, // <5,2,5,2>: Cost 3 vmrglw <4,u,5,5>, <2,2,2,2>
+ 1237565542U, // <5,2,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 3834021744U, // <5,2,5,4>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,4,5>
+ 3369124916U, // <5,2,5,5>: Cost 4 vmrglw <2,2,5,5>, <1,4,2,5>
+ 2248181690U, // <5,2,5,6>: Cost 3 vmrghw <5,5,5,5>, <2,6,3,7>
+ 3786354825U, // <5,2,5,7>: Cost 4 vsldoi8 <4,6,5,2>, <5,7,2,3>
+ 1237565547U, // <5,2,5,u>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 3700473958U, // <5,2,6,0>: Cost 4 vsldoi4 <1,5,2,6>, LHS
+ 3700475014U, // <5,2,6,1>: Cost 4 vsldoi4 <1,5,2,6>, <1,5,2,6>
+ 2296718952U, // <5,2,6,2>: Cost 3 vmrglw <2,4,5,6>, <2,2,2,2>
+ 1228947558U, // <5,2,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 3700477238U, // <5,2,6,4>: Cost 4 vsldoi4 <1,5,2,6>, RHS
+ 3834021836U, // <5,2,6,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,6,5,7>
+ 2248951738U, // <5,2,6,6>: Cost 3 vmrghw <5,6,7,0>, <2,6,3,7>
+ 3370461105U, // <5,2,6,7>: Cost 4 vmrglw <2,4,5,6>, <2,6,2,7>
+ 1228947563U, // <5,2,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 3786355706U, // <5,2,7,0>: Cost 4 vsldoi8 <4,6,5,2>, <7,0,1,2>
+ 3783038037U, // <5,2,7,1>: Cost 4 vsldoi8 <4,1,5,2>, <7,1,2,3>
+ 3365824104U, // <5,2,7,2>: Cost 4 vmrglw <1,6,5,7>, <2,2,2,2>
+ 2292080742U, // <5,2,7,3>: Cost 3 vmrglw <1,6,5,7>, LHS
+ 3842131986U, // <5,2,7,4>: Cost 4 vsldoi12 <2,7,4,5>, <2,7,4,5>
+ 3371795508U, // <5,2,7,5>: Cost 4 vmrglw <2,6,5,7>, <1,4,2,5>
+ 3786356206U, // <5,2,7,6>: Cost 4 vsldoi8 <4,6,5,2>, <7,6,2,7>
+ 3786356332U, // <5,2,7,7>: Cost 4 vsldoi8 <4,6,5,2>, <7,7,7,7>
+ 2292080747U, // <5,2,7,u>: Cost 3 vmrglw <1,6,5,7>, LHS
+ 2754234427U, // <5,2,u,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,u,0,1>
+ 2705315630U, // <5,2,u,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS
+ 2296735336U, // <5,2,u,2>: Cost 3 vmrglw <2,4,5,u>, <2,2,2,2>
+ 1228963942U, // <5,2,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS
+ 1695311971U, // <5,2,u,4>: Cost 2 vsldoi12 <2,u,4,5>, <2,u,4,5>
+ 2705315994U, // <5,2,u,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS
+ 2769201269U, // <5,2,u,6>: Cost 3 vsldoi12 <2,u,6,5>, <2,u,6,5>
+ 3370477489U, // <5,2,u,7>: Cost 4 vmrglw <2,4,5,u>, <2,6,2,7>
+ 1695606919U, // <5,2,u,u>: Cost 2 vsldoi12 <2,u,u,5>, <2,u,u,5>
+ 3827976331U, // <5,3,0,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,0,0,0>
+ 2754234518U, // <5,3,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,1,2>
+ 3706472290U, // <5,3,0,2>: Cost 4 vsldoi4 <2,5,3,0>, <2,5,3,0>
+ 3700500630U, // <5,3,0,3>: Cost 4 vsldoi4 <1,5,3,0>, <3,0,1,2>
+ 2754234544U, // <5,3,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,4,1>
+ 3376383766U, // <5,3,0,5>: Cost 4 vmrglw <3,4,5,0>, <2,4,3,5>
+ 3769770513U, // <5,3,0,6>: Cost 5 vsldoi8 <1,u,5,3>, <0,6,4,7>
+ 3376383930U, // <5,3,0,7>: Cost 4 vmrglw <3,4,5,0>, <2,6,3,7>
+ 2754234581U, // <5,3,0,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,u,2>
+ 2311275414U, // <5,3,1,0>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,0>
+ 2305967971U, // <5,3,1,1>: Cost 3 vmrglw <4,0,5,1>, <2,5,3,1>
+ 2692047787U, // <5,3,1,2>: Cost 3 vsldoi8 <1,2,5,3>, <1,2,5,3>
+ 2311276146U, // <5,3,1,3>: Cost 3 vmrglw <4,u,5,1>, <2,2,3,3>
+ 2311275418U, // <5,3,1,4>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,4>
+ 3765789807U, // <5,3,1,5>: Cost 4 vsldoi8 <1,2,5,3>, <1,5,0,1>
+ 3765789939U, // <5,3,1,6>: Cost 4 vsldoi8 <1,2,5,3>, <1,6,5,7>
+ 2311276474U, // <5,3,1,7>: Cost 3 vmrglw <4,u,5,1>, <2,6,3,7>
+ 2696029585U, // <5,3,1,u>: Cost 3 vsldoi8 <1,u,5,3>, <1,u,5,3>
+ 2311288709U, // <5,3,2,0>: Cost 3 vmrglw <4,u,5,2>, <u,2,3,0>
+ 3765790243U, // <5,3,2,1>: Cost 4 vsldoi8 <1,2,5,3>, <2,1,3,5>
+ 3827976513U, // <5,3,2,2>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,2,2>
+ 2765736268U, // <5,3,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <3,2,3,4>
+ 2246248962U, // <5,3,2,4>: Cost 3 vmrghw <5,2,6,3>, <3,4,5,6>
+ 3765790563U, // <5,3,2,5>: Cost 4 vsldoi8 <1,2,5,3>, <2,5,3,1>
+ 3827976550U, // <5,3,2,6>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,6,3>
+ 3842795887U, // <5,3,2,7>: Cost 4 vsldoi12 <2,u,4,5>, <3,2,7,3>
+ 2769054073U, // <5,3,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <3,2,u,4>
+ 3827976575U, // <5,3,3,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,3,0,1>
+ 3765790963U, // <5,3,3,1>: Cost 4 vsldoi8 <1,2,5,3>, <3,1,2,5>
+ 3839478162U, // <5,3,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <3,3,2,2>
+ 2754234780U, // <5,3,3,3>: Cost 3 vsldoi12 <0,4,1,5>, <3,3,3,3>
+ 2771708327U, // <5,3,3,4>: Cost 3 vsldoi12 <3,3,4,5>, <3,3,4,5>
+ 3363137059U, // <5,3,3,5>: Cost 4 vmrglw <1,2,5,3>, <2,1,3,5>
+ 3375081320U, // <5,3,3,6>: Cost 4 vmrglw <3,2,5,3>, <2,5,3,6>
+ 3363137466U, // <5,3,3,7>: Cost 4 vmrglw <1,2,5,3>, <2,6,3,7>
+ 2772003275U, // <5,3,3,u>: Cost 3 vsldoi12 <3,3,u,5>, <3,3,u,5>
+ 2772077012U, // <5,3,4,0>: Cost 3 vsldoi12 <3,4,0,5>, <3,4,0,5>
+ 3765791714U, // <5,3,4,1>: Cost 4 vsldoi8 <1,2,5,3>, <4,1,5,0>
+ 2709965878U, // <5,3,4,2>: Cost 3 vsldoi8 <4,2,5,3>, <4,2,5,3>
+ 2772298223U, // <5,3,4,3>: Cost 3 vsldoi12 <3,4,3,5>, <3,4,3,5>
+ 2772371960U, // <5,3,4,4>: Cost 3 vsldoi12 <3,4,4,5>, <3,4,4,5>
+ 2754234882U, // <5,3,4,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,5,6>
+ 3839478282U, // <5,3,4,6>: Cost 4 vsldoi12 <2,3,4,5>, <3,4,6,5>
+ 3376416698U, // <5,3,4,7>: Cost 4 vmrglw <3,4,5,4>, <2,6,3,7>
+ 2754234909U, // <5,3,4,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,u,6>
+ 2311308182U, // <5,3,5,0>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,0>
+ 3765792421U, // <5,3,5,1>: Cost 4 vsldoi8 <1,2,5,3>, <5,1,2,5>
+ 2715938575U, // <5,3,5,2>: Cost 3 vsldoi8 <5,2,5,3>, <5,2,5,3>
+ 2311308914U, // <5,3,5,3>: Cost 3 vmrglw <4,u,5,5>, <2,2,3,3>
+ 2311308186U, // <5,3,5,4>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,4>
+ 2248182354U, // <5,3,5,5>: Cost 3 vmrghw <5,5,5,5>, <3,5,5,5>
+ 3765792837U, // <5,3,5,6>: Cost 4 vsldoi8 <1,2,5,3>, <5,6,3,7>
+ 2311309242U, // <5,3,5,7>: Cost 3 vmrglw <4,u,5,5>, <2,6,3,7>
+ 2311308190U, // <5,3,5,u>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,u>
+ 2632777830U, // <5,3,6,0>: Cost 3 vsldoi4 <2,5,3,6>, LHS
+ 3706520372U, // <5,3,6,1>: Cost 4 vsldoi4 <2,5,3,6>, <1,1,1,1>
+ 2632779624U, // <5,3,6,2>: Cost 3 vsldoi4 <2,5,3,6>, <2,5,3,6>
+ 2632780290U, // <5,3,6,3>: Cost 3 vsldoi4 <2,5,3,6>, <3,4,5,6>
+ 2632781110U, // <5,3,6,4>: Cost 3 vsldoi4 <2,5,3,6>, RHS
+ 2248952413U, // <5,3,6,5>: Cost 3 vmrghw <5,6,7,0>, <3,5,6,7>
+ 2302691176U, // <5,3,6,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6>
+ 2302691258U, // <5,3,6,7>: Cost 3 vmrglw <3,4,5,6>, <2,6,3,7>
+ 2632783662U, // <5,3,6,u>: Cost 3 vsldoi4 <2,5,3,6>, LHS
+ 3365823382U, // <5,3,7,0>: Cost 4 vmrglw <1,6,5,7>, <1,2,3,0>
+ 3706529011U, // <5,3,7,1>: Cost 4 vsldoi4 <2,5,3,7>, <1,6,5,7>
+ 3706529641U, // <5,3,7,2>: Cost 4 vsldoi4 <2,5,3,7>, <2,5,3,7>
+ 3365824114U, // <5,3,7,3>: Cost 4 vmrglw <1,6,5,7>, <2,2,3,3>
+ 2774362859U, // <5,3,7,4>: Cost 3 vsldoi12 <3,7,4,5>, <3,7,4,5>
+ 3365824035U, // <5,3,7,5>: Cost 4 vmrglw <1,6,5,7>, <2,1,3,5>
+ 3383740183U, // <5,3,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,4,3,6>
+ 3363833786U, // <5,3,7,7>: Cost 4 vmrglw <1,3,5,7>, <2,6,3,7>
+ 2774657807U, // <5,3,7,u>: Cost 3 vsldoi12 <3,7,u,5>, <3,7,u,5>
+ 2632794214U, // <5,3,u,0>: Cost 3 vsldoi4 <2,5,3,u>, LHS
+ 2754235166U, // <5,3,u,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,1,2>
+ 2632796010U, // <5,3,u,2>: Cost 3 vsldoi4 <2,5,3,u>, <2,5,3,u>
+ 2632796676U, // <5,3,u,3>: Cost 3 vsldoi4 <2,5,3,u>, <3,4,5,u>
+ 2632797494U, // <5,3,u,4>: Cost 3 vsldoi4 <2,5,3,u>, RHS
+ 2754235206U, // <5,3,u,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,5,6>
+ 2302691176U, // <5,3,u,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6>
+ 2302707642U, // <5,3,u,7>: Cost 3 vmrglw <3,4,5,u>, <2,6,3,7>
+ 2754235229U, // <5,3,u,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,u,2>
+ 3765133325U, // <5,4,0,0>: Cost 4 vsldoi8 <1,1,5,4>, <0,0,1,4>
+ 2705326182U, // <5,4,0,1>: Cost 3 vsldoi8 <3,4,5,4>, LHS
+ 3718489806U, // <5,4,0,2>: Cost 4 vsldoi4 <4,5,4,0>, <2,3,4,5>
+ 3718490624U, // <5,4,0,3>: Cost 4 vsldoi4 <4,5,4,0>, <3,4,5,4>
+ 2709307730U, // <5,4,0,4>: Cost 3 vsldoi8 <4,1,5,4>, <0,4,1,5>
+ 2302641870U, // <5,4,0,5>: Cost 3 vmrglw <3,4,5,0>, <2,3,4,5>
+ 3376383695U, // <5,4,0,6>: Cost 5 vmrglw <3,4,5,0>, <2,3,4,6>
+ 3384351018U, // <5,4,0,7>: Cost 4 vmrglw <4,7,5,0>, <u,7,4,7>
+ 2705326749U, // <5,4,0,u>: Cost 3 vsldoi8 <3,4,5,4>, LHS
+ 2305971057U, // <5,4,1,0>: Cost 3 vmrglw <4,0,5,1>, <6,7,4,0>
+ 3765134171U, // <5,4,1,1>: Cost 4 vsldoi8 <1,1,5,4>, <1,1,5,4>
+ 3766461338U, // <5,4,1,2>: Cost 4 vsldoi8 <1,3,5,4>, <1,2,3,4>
+ 3766461437U, // <5,4,1,3>: Cost 4 vsldoi8 <1,3,5,4>, <1,3,5,4>
+ 2311277776U, // <5,4,1,4>: Cost 3 vmrglw <4,u,5,1>, <4,4,4,4>
+ 2754235362U, // <5,4,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <4,1,5,0>
+ 3783050483U, // <5,4,1,6>: Cost 4 vsldoi8 <4,1,5,4>, <1,6,5,7>
+ 3385019036U, // <5,4,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,4,7>
+ 2311276241U, // <5,4,1,u>: Cost 3 vmrglw <4,u,5,1>, <2,3,4,u>
+ 3718504550U, // <5,4,2,0>: Cost 4 vsldoi4 <4,5,4,2>, LHS
+ 3783050787U, // <5,4,2,1>: Cost 4 vsldoi8 <4,1,5,4>, <2,1,3,5>
+ 3773097576U, // <5,4,2,2>: Cost 4 vsldoi8 <2,4,5,4>, <2,2,2,2>
+ 2705327822U, // <5,4,2,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5>
+ 3773097767U, // <5,4,2,4>: Cost 4 vsldoi8 <2,4,5,4>, <2,4,5,4>
+ 2765737014U, // <5,4,2,5>: Cost 3 vsldoi12 <2,3,4,5>, <4,2,5,3>
+ 3779069882U, // <5,4,2,6>: Cost 4 vsldoi8 <3,4,5,4>, <2,6,3,7>
+ 3376401052U, // <5,4,2,7>: Cost 5 vmrglw <3,4,5,2>, <3,6,4,7>
+ 2245881370U, // <5,4,2,u>: Cost 3 vmrghw <5,2,1,3>, <4,u,5,1>
+ 3779070102U, // <5,4,3,0>: Cost 4 vsldoi8 <3,4,5,4>, <3,0,1,2>
+ 3363135525U, // <5,4,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,4,1>
+ 3779070284U, // <5,4,3,2>: Cost 4 vsldoi8 <3,4,5,4>, <3,2,3,4>
+ 3779070364U, // <5,4,3,3>: Cost 4 vsldoi8 <3,4,5,4>, <3,3,3,3>
+ 2705328640U, // <5,4,3,4>: Cost 3 vsldoi8 <3,4,5,4>, <3,4,5,4>
+ 2307311310U, // <5,4,3,5>: Cost 3 vmrglw <4,2,5,3>, <2,3,4,5>
+ 3866021012U, // <5,4,3,6>: Cost 4 vsldoi12 <6,7,4,5>, <4,3,6,7>
+ 3363138204U, // <5,4,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,4,7>
+ 2707983172U, // <5,4,3,u>: Cost 3 vsldoi8 <3,u,5,4>, <3,u,5,4>
+ 2708646805U, // <5,4,4,0>: Cost 3 vsldoi8 <4,0,5,4>, <4,0,5,4>
+ 2709310438U, // <5,4,4,1>: Cost 3 vsldoi8 <4,1,5,4>, <4,1,5,4>
+ 3779071030U, // <5,4,4,2>: Cost 4 vsldoi8 <3,4,5,4>, <4,2,5,3>
+ 2710637704U, // <5,4,4,3>: Cost 3 vsldoi8 <4,3,5,4>, <4,3,5,4>
+ 2754235600U, // <5,4,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <4,4,4,4>
+ 1704676570U, // <5,4,4,5>: Cost 2 vsldoi12 <4,4,5,5>, <4,4,5,5>
+ 3779071358U, // <5,4,4,6>: Cost 4 vsldoi8 <3,4,5,4>, <4,6,5,7>
+ 2713292236U, // <5,4,4,7>: Cost 3 vsldoi8 <4,7,5,4>, <4,7,5,4>
+ 1704897781U, // <5,4,4,u>: Cost 2 vsldoi12 <4,4,u,5>, <4,4,u,5>
+ 2626871398U, // <5,4,5,0>: Cost 3 vsldoi4 <1,5,4,5>, LHS
+ 2626872471U, // <5,4,5,1>: Cost 3 vsldoi4 <1,5,4,5>, <1,5,4,5>
+ 2765737230U, // <5,4,5,2>: Cost 3 vsldoi12 <2,3,4,5>, <4,5,2,3>
+ 3700615318U, // <5,4,5,3>: Cost 4 vsldoi4 <1,5,4,5>, <3,0,1,2>
+ 2626874678U, // <5,4,5,4>: Cost 3 vsldoi4 <1,5,4,5>, RHS
+ 1174441270U, // <5,4,5,5>: Cost 2 vmrghw <5,5,5,5>, RHS
+ 1680493878U, // <5,4,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 3385051804U, // <5,4,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,4,7>
+ 1680493896U, // <5,4,5,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2248952722U, // <5,4,6,0>: Cost 3 vmrghw <5,6,7,0>, <4,0,5,1>
+ 2302692152U, // <5,4,6,1>: Cost 3 vmrglw <3,4,5,6>, <3,u,4,1>
+ 3382406107U, // <5,4,6,2>: Cost 4 vmrglw <4,4,5,6>, <4,1,4,2>
+ 3700623874U, // <5,4,6,3>: Cost 4 vsldoi4 <1,5,4,6>, <3,4,5,6>
+ 2248953040U, // <5,4,6,4>: Cost 3 vmrghw <5,6,7,0>, <4,4,4,4>
+ 1175211318U, // <5,4,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 3376432280U, // <5,4,6,6>: Cost 4 vmrglw <3,4,5,6>, <1,5,4,6>
+ 2729218934U, // <5,4,6,7>: Cost 3 vsldoi8 <7,4,5,4>, <6,7,4,5>
+ 1175211561U, // <5,4,6,u>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 3787035642U, // <5,4,7,0>: Cost 4 vsldoi8 <4,7,5,4>, <7,0,1,2>
+ 3365822501U, // <5,4,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,4,1>
+ 3808933085U, // <5,4,7,2>: Cost 4 vsldoi8 <u,4,5,4>, <7,2,u,4>
+ 3784381707U, // <5,4,7,3>: Cost 4 vsldoi8 <4,3,5,4>, <7,3,4,5>
+ 2713294182U, // <5,4,7,4>: Cost 3 vsldoi8 <4,7,5,4>, <7,4,5,6>
+ 2309998286U, // <5,4,7,5>: Cost 3 vmrglw <4,6,5,7>, <2,3,4,5>
+ 3383740111U, // <5,4,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,3,4,6>
+ 3787036239U, // <5,4,7,7>: Cost 4 vsldoi8 <4,7,5,4>, <7,7,4,5>
+ 2731873960U, // <5,4,7,u>: Cost 3 vsldoi8 <7,u,5,4>, <7,u,5,4>
+ 2626895974U, // <5,4,u,0>: Cost 3 vsldoi4 <1,5,4,u>, LHS
+ 2626897050U, // <5,4,u,1>: Cost 3 vsldoi4 <1,5,4,u>, <1,5,4,u>
+ 2644813518U, // <5,4,u,2>: Cost 3 vsldoi4 <4,5,4,u>, <2,3,4,5>
+ 2705327822U, // <5,4,u,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5>
+ 2626899254U, // <5,4,u,4>: Cost 3 vsldoi4 <1,5,4,u>, RHS
+ 1707331102U, // <5,4,u,5>: Cost 2 vsldoi12 <4,u,5,5>, <4,u,5,5>
+ 1680494121U, // <5,4,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2737183024U, // <5,4,u,7>: Cost 3 vsldoi8 <u,7,5,4>, <u,7,5,4>
+ 1680494139U, // <5,4,u,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2302642684U, // <5,5,0,0>: Cost 3 vmrglw <3,4,5,0>, <3,4,5,0>
+ 1640218726U, // <5,5,0,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 3376384510U, // <5,5,0,2>: Cost 4 vmrglw <3,4,5,0>, <3,4,5,2>
+ 3376385078U, // <5,5,0,3>: Cost 4 vmrglw <3,4,5,0>, <4,2,5,3>
+ 2754236002U, // <5,5,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <5,0,4,1>
+ 2717942242U, // <5,5,0,5>: Cost 3 vsldoi8 <5,5,5,5>, <0,5,u,5>
+ 2244907106U, // <5,5,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0>
+ 3376385406U, // <5,5,0,7>: Cost 4 vmrglw <3,4,5,0>, <4,6,5,7>
+ 1640219293U, // <5,5,0,u>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 2305969365U, // <5,5,1,0>: Cost 3 vmrglw <4,0,5,1>, <4,4,5,0>
+ 1237536282U, // <5,5,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 2713961366U, // <5,5,1,2>: Cost 3 vsldoi8 <4,u,5,5>, <1,2,3,0>
+ 3766469630U, // <5,5,1,3>: Cost 4 vsldoi8 <1,3,5,5>, <1,3,5,5>
+ 2782326455U, // <5,5,1,4>: Cost 3 vsldoi12 <5,1,4,5>, <5,1,4,5>
+ 2311277786U, // <5,5,1,5>: Cost 3 vmrglw <4,u,5,1>, <4,4,5,5>
+ 2311277058U, // <5,5,1,6>: Cost 3 vmrglw <4,u,5,1>, <3,4,5,6>
+ 3385017587U, // <5,5,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,5,7>
+ 1237536282U, // <5,5,1,u>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 3376400892U, // <5,5,2,0>: Cost 4 vmrglw <3,4,5,2>, <3,4,5,0>
+ 3827977963U, // <5,5,2,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,2,1,3>
+ 2302659070U, // <5,5,2,2>: Cost 3 vmrglw <3,4,5,2>, <3,4,5,2>
+ 2765737726U, // <5,5,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <5,2,3,4>
+ 3839479558U, // <5,5,2,4>: Cost 4 vsldoi12 <2,3,4,5>, <5,2,4,3>
+ 2781073167U, // <5,5,2,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,2,5,3>
+ 2713962426U, // <5,5,2,6>: Cost 3 vsldoi8 <4,u,5,5>, <2,6,3,7>
+ 3376401790U, // <5,5,2,7>: Cost 4 vmrglw <3,4,5,2>, <4,6,5,7>
+ 2769055531U, // <5,5,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <5,2,u,4>
+ 2713962646U, // <5,5,3,0>: Cost 3 vsldoi8 <4,u,5,5>, <3,0,1,2>
+ 3765143786U, // <5,5,3,1>: Cost 4 vsldoi8 <1,1,5,5>, <3,1,1,5>
+ 3839479621U, // <5,5,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,3,2,3>
+ 2289394603U, // <5,5,3,3>: Cost 3 vmrglw <1,2,5,3>, <1,2,5,3>
+ 2713963010U, // <5,5,3,4>: Cost 3 vsldoi8 <4,u,5,5>, <3,4,5,6>
+ 2313285150U, // <5,5,3,5>: Cost 3 vmrglw <5,2,5,3>, <4,u,5,5>
+ 3363138050U, // <5,5,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,5,6>
+ 3363136755U, // <5,5,3,7>: Cost 4 vmrglw <1,2,5,3>, <1,6,5,7>
+ 2713963294U, // <5,5,3,u>: Cost 3 vsldoi8 <4,u,5,5>, <3,u,1,2>
+ 2713963410U, // <5,5,4,0>: Cost 3 vsldoi8 <4,u,5,5>, <4,0,5,1>
+ 3827978127U, // <5,5,4,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,4,1,5>
+ 3839479704U, // <5,5,4,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,4,2,5>
+ 3376417846U, // <5,5,4,3>: Cost 4 vmrglw <3,4,5,4>, <4,2,5,3>
+ 1637567706U, // <5,5,4,4>: Cost 2 vsldoi8 <4,4,5,5>, <4,4,5,5>
+ 1640222006U, // <5,5,4,5>: Cost 2 vsldoi8 <4,u,5,5>, RHS
+ 2310640998U, // <5,5,4,6>: Cost 3 vmrglw <4,7,5,4>, <7,4,5,6>
+ 3376418174U, // <5,5,4,7>: Cost 4 vmrglw <3,4,5,4>, <4,6,5,7>
+ 1640222238U, // <5,5,4,u>: Cost 2 vsldoi8 <4,u,5,5>, <4,u,5,5>
+ 1577091174U, // <5,5,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 2311310226U, // <5,5,5,1>: Cost 3 vmrglw <4,u,5,5>, <4,0,5,1>
+ 2713964303U, // <5,5,5,2>: Cost 3 vsldoi8 <4,u,5,5>, <5,2,5,3>
+ 2311311119U, // <5,5,5,3>: Cost 3 vmrglw <4,u,5,5>, <5,2,5,3>
+ 1577094454U, // <5,5,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,5,5,5>: Cost 1 vspltisw1 RHS
+ 2311309826U, // <5,5,5,6>: Cost 3 vmrglw <4,u,5,5>, <3,4,5,6>
+ 2311311447U, // <5,5,5,7>: Cost 3 vmrglw <4,u,5,5>, <5,6,5,7>
+ 296144182U, // <5,5,5,u>: Cost 1 vspltisw1 RHS
+ 2248953460U, // <5,5,6,0>: Cost 3 vmrghw <5,6,7,0>, <5,0,6,1>
+ 2326580114U, // <5,5,6,1>: Cost 3 vmrglw <7,4,5,6>, <4,0,5,1>
+ 2713965050U, // <5,5,6,2>: Cost 3 vsldoi8 <4,u,5,5>, <6,2,7,3>
+ 3700697602U, // <5,5,6,3>: Cost 4 vsldoi4 <1,5,5,6>, <3,4,5,6>
+ 2785644620U, // <5,5,6,4>: Cost 3 vsldoi12 <5,6,4,5>, <5,6,4,5>
+ 2781073495U, // <5,5,6,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,6,5,7>
+ 1228950018U, // <5,5,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713965390U, // <5,5,6,7>: Cost 3 vsldoi8 <4,u,5,5>, <6,7,0,1>
+ 1228950018U, // <5,5,6,u>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713965562U, // <5,5,7,0>: Cost 3 vsldoi8 <4,u,5,5>, <7,0,1,2>
+ 3383741330U, // <5,5,7,1>: Cost 4 vmrglw <4,6,5,7>, <4,0,5,1>
+ 3718620878U, // <5,5,7,2>: Cost 4 vsldoi4 <4,5,5,7>, <2,3,4,5>
+ 3365823403U, // <5,5,7,3>: Cost 4 vmrglw <1,6,5,7>, <1,2,5,3>
+ 2713965926U, // <5,5,7,4>: Cost 3 vsldoi8 <4,u,5,5>, <7,4,5,6>
+ 2717947318U, // <5,5,7,5>: Cost 3 vsldoi8 <5,5,5,5>, <7,5,5,5>
+ 3365825026U, // <5,5,7,6>: Cost 4 vmrglw <1,6,5,7>, <3,4,5,6>
+ 2292081907U, // <5,5,7,7>: Cost 3 vmrglw <1,6,5,7>, <1,6,5,7>
+ 2713966210U, // <5,5,7,u>: Cost 3 vsldoi8 <4,u,5,5>, <7,u,1,2>
+ 1577091174U, // <5,5,u,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 1640224558U, // <5,5,u,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 2713966469U, // <5,5,u,2>: Cost 3 vsldoi8 <4,u,5,5>, <u,2,3,0>
+ 2713966524U, // <5,5,u,3>: Cost 3 vsldoi8 <4,u,5,5>, <u,3,0,1>
+ 1577094454U, // <5,5,u,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,5,u,5>: Cost 1 vspltisw1 RHS
+ 1228950018U, // <5,5,u,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713966848U, // <5,5,u,7>: Cost 3 vsldoi8 <4,u,5,5>, <u,7,0,1>
+ 296144182U, // <5,5,u,u>: Cost 1 vspltisw1 RHS
+ 2705342464U, // <5,6,0,0>: Cost 3 vsldoi8 <3,4,5,6>, <0,0,0,0>
+ 1631600742U, // <5,6,0,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 3773112493U, // <5,6,0,2>: Cost 4 vsldoi8 <2,4,5,6>, <0,2,1,2>
+ 2705342720U, // <5,6,0,3>: Cost 3 vsldoi8 <3,4,5,6>, <0,3,1,4>
+ 2705342802U, // <5,6,0,4>: Cost 3 vsldoi8 <3,4,5,6>, <0,4,1,5>
+ 3779084708U, // <5,6,0,5>: Cost 4 vsldoi8 <3,4,5,6>, <0,5,1,6>
+ 3779084790U, // <5,6,0,6>: Cost 4 vsldoi8 <3,4,5,6>, <0,6,1,7>
+ 2302643510U, // <5,6,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS
+ 1631601309U, // <5,6,0,u>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 3767141092U, // <5,6,1,0>: Cost 4 vsldoi8 <1,4,5,6>, <1,0,1,2>
+ 2705343284U, // <5,6,1,1>: Cost 3 vsldoi8 <3,4,5,6>, <1,1,1,1>
+ 2705343382U, // <5,6,1,2>: Cost 3 vsldoi8 <3,4,5,6>, <1,2,3,0>
+ 3779085282U, // <5,6,1,3>: Cost 4 vsldoi8 <3,4,5,6>, <1,3,2,4>
+ 2693399632U, // <5,6,1,4>: Cost 3 vsldoi8 <1,4,5,6>, <1,4,5,6>
+ 3767805089U, // <5,6,1,5>: Cost 4 vsldoi8 <1,5,5,6>, <1,5,5,6>
+ 2311279416U, // <5,6,1,6>: Cost 3 vmrglw <4,u,5,1>, <6,6,6,6>
+ 1237536054U, // <5,6,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 1237536055U, // <5,6,1,u>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 3773113789U, // <5,6,2,0>: Cost 4 vsldoi8 <2,4,5,6>, <2,0,1,2>
+ 3779085855U, // <5,6,2,1>: Cost 4 vsldoi8 <3,4,5,6>, <2,1,3,1>
+ 2699372136U, // <5,6,2,2>: Cost 3 vsldoi8 <2,4,5,6>, <2,2,2,2>
+ 2705344166U, // <5,6,2,3>: Cost 3 vsldoi8 <3,4,5,6>, <2,3,0,1>
+ 2699372329U, // <5,6,2,4>: Cost 3 vsldoi8 <2,4,5,6>, <2,4,5,6>
+ 2705344360U, // <5,6,2,5>: Cost 3 vsldoi8 <3,4,5,6>, <2,5,3,6>
+ 2705344442U, // <5,6,2,6>: Cost 3 vsldoi8 <3,4,5,6>, <2,6,3,7>
+ 2302659894U, // <5,6,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS
+ 2702026861U, // <5,6,2,u>: Cost 3 vsldoi8 <2,u,5,6>, <2,u,5,6>
+ 2705344662U, // <5,6,3,0>: Cost 3 vsldoi8 <3,4,5,6>, <3,0,1,2>
+ 3767142661U, // <5,6,3,1>: Cost 4 vsldoi8 <1,4,5,6>, <3,1,4,5>
+ 3773114689U, // <5,6,3,2>: Cost 4 vsldoi8 <2,4,5,6>, <3,2,2,2>
+ 2705344924U, // <5,6,3,3>: Cost 3 vsldoi8 <3,4,5,6>, <3,3,3,3>
+ 1631603202U, // <5,6,3,4>: Cost 2 vsldoi8 <3,4,5,6>, <3,4,5,6>
+ 3842945597U, // <5,6,3,5>: Cost 4 vsldoi12 <2,u,6,5>, <6,3,5,7>
+ 3779086962U, // <5,6,3,6>: Cost 4 vsldoi8 <3,4,5,6>, <3,6,0,1>
+ 2289397046U, // <5,6,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS
+ 1634257734U, // <5,6,3,u>: Cost 2 vsldoi8 <3,u,5,6>, <3,u,5,6>
+ 2644926566U, // <5,6,4,0>: Cost 3 vsldoi4 <4,5,6,4>, LHS
+ 3779087306U, // <5,6,4,1>: Cost 4 vsldoi8 <3,4,5,6>, <4,1,2,3>
+ 2790142577U, // <5,6,4,2>: Cost 3 vsldoi12 <6,4,2,5>, <6,4,2,5>
+ 2644929026U, // <5,6,4,3>: Cost 3 vsldoi4 <4,5,6,4>, <3,4,5,6>
+ 2711317723U, // <5,6,4,4>: Cost 3 vsldoi8 <4,4,5,6>, <4,4,5,6>
+ 1631604022U, // <5,6,4,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 2712644989U, // <5,6,4,6>: Cost 3 vsldoi8 <4,6,5,6>, <4,6,5,6>
+ 2302676278U, // <5,6,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS
+ 1631604265U, // <5,6,4,u>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 3842945708U, // <5,6,5,0>: Cost 4 vsldoi12 <2,u,6,5>, <6,5,0,1>
+ 3767144133U, // <5,6,5,1>: Cost 4 vsldoi8 <1,4,5,6>, <5,1,6,1>
+ 2705346328U, // <5,6,5,2>: Cost 3 vsldoi8 <3,4,5,6>, <5,2,6,3>
+ 3779088207U, // <5,6,5,3>: Cost 4 vsldoi8 <3,4,5,6>, <5,3,3,4>
+ 2717290420U, // <5,6,5,4>: Cost 3 vsldoi8 <5,4,5,6>, <5,4,5,6>
+ 2705346574U, // <5,6,5,5>: Cost 3 vsldoi8 <3,4,5,6>, <5,5,6,6>
+ 2705346596U, // <5,6,5,6>: Cost 3 vsldoi8 <3,4,5,6>, <5,6,0,1>
+ 1237568822U, // <5,6,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 1237568823U, // <5,6,5,u>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 2650914918U, // <5,6,6,0>: Cost 3 vsldoi4 <5,5,6,6>, LHS
+ 3364490949U, // <5,6,6,1>: Cost 4 vmrglw <1,4,5,6>, <5,1,6,1>
+ 2248954362U, // <5,6,6,2>: Cost 3 vmrghw <5,6,7,0>, <6,2,7,3>
+ 2302693144U, // <5,6,6,3>: Cost 3 vmrglw <3,4,5,6>, <5,2,6,3>
+ 2650918198U, // <5,6,6,4>: Cost 3 vsldoi4 <5,5,6,6>, RHS
+ 2650918926U, // <5,6,6,5>: Cost 3 vsldoi4 <5,5,6,6>, <5,5,6,6>
+ 2302693390U, // <5,6,6,6>: Cost 3 vmrglw <3,4,5,6>, <5,5,6,6>
+ 1228950838U, // <5,6,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 1228950839U, // <5,6,6,u>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 497467494U, // <5,6,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1571210036U, // <5,6,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1571210856U, // <5,6,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571211414U, // <5,6,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497470774U, // <5,6,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1571213316U, // <5,6,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1571213818U, // <5,6,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1571214956U, // <5,6,7,7>: Cost 2 vsldoi4 RHS, <7,7,7,7>
+ 497473326U, // <5,6,7,u>: Cost 1 vsldoi4 RHS, LHS
+ 497475686U, // <5,6,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631606574U, // <5,6,u,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 1571219048U, // <5,6,u,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571219606U, // <5,6,u,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497478967U, // <5,6,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 1631606938U, // <5,6,u,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 1571222010U, // <5,6,u,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1228967222U, // <5,6,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS
+ 497481518U, // <5,6,u,u>: Cost 1 vsldoi4 RHS, LHS
+ 3768475648U, // <5,7,0,0>: Cost 4 vsldoi8 <1,6,5,7>, <0,0,0,0>
+ 2694733926U, // <5,7,0,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 3718711395U, // <5,7,0,2>: Cost 4 vsldoi4 <4,5,7,0>, <2,u,4,5>
+ 3384349178U, // <5,7,0,3>: Cost 4 vmrglw <4,7,5,0>, <6,2,7,3>
+ 2694734162U, // <5,7,0,4>: Cost 3 vsldoi8 <1,6,5,7>, <0,4,1,5>
+ 3384347884U, // <5,7,0,5>: Cost 4 vmrglw <4,7,5,0>, <4,4,7,5>
+ 3730658026U, // <5,7,0,6>: Cost 4 vsldoi4 <6,5,7,0>, <6,5,7,0>
+ 3718714362U, // <5,7,0,7>: Cost 4 vsldoi4 <4,5,7,0>, <7,0,1,2>
+ 2694734493U, // <5,7,0,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2311278690U, // <5,7,1,0>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,0>
+ 2305970923U, // <5,7,1,1>: Cost 3 vmrglw <4,0,5,1>, <6,5,7,1>
+ 3768476566U, // <5,7,1,2>: Cost 4 vsldoi8 <1,6,5,7>, <1,2,3,0>
+ 2311279098U, // <5,7,1,3>: Cost 3 vmrglw <4,u,5,1>, <6,2,7,3>
+ 2311278694U, // <5,7,1,4>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,4>
+ 3768476783U, // <5,7,1,5>: Cost 4 vsldoi8 <1,6,5,7>, <1,5,0,1>
+ 2694735091U, // <5,7,1,6>: Cost 3 vsldoi8 <1,6,5,7>, <1,6,5,7>
+ 2311279426U, // <5,7,1,7>: Cost 3 vmrglw <4,u,5,1>, <6,6,7,7>
+ 2696062357U, // <5,7,1,u>: Cost 3 vsldoi8 <1,u,5,7>, <1,u,5,7>
+ 3383701602U, // <5,7,2,0>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,0>
+ 3768477219U, // <5,7,2,1>: Cost 4 vsldoi8 <1,6,5,7>, <2,1,3,5>
+ 3768477288U, // <5,7,2,2>: Cost 4 vsldoi8 <1,6,5,7>, <2,2,2,2>
+ 2309960186U, // <5,7,2,3>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3>
+ 3383701606U, // <5,7,2,4>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,4>
+ 3768477545U, // <5,7,2,5>: Cost 4 vsldoi8 <1,6,5,7>, <2,5,3,7>
+ 3766486970U, // <5,7,2,6>: Cost 4 vsldoi8 <1,3,5,7>, <2,6,3,7>
+ 3383702338U, // <5,7,2,7>: Cost 4 vmrglw <4,6,5,2>, <6,6,7,7>
+ 2309960186U, // <5,7,2,u>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3>
+ 3768477846U, // <5,7,3,0>: Cost 4 vsldoi8 <1,6,5,7>, <3,0,1,2>
+ 3768477975U, // <5,7,3,1>: Cost 4 vsldoi8 <1,6,5,7>, <3,1,6,5>
+ 3786393932U, // <5,7,3,2>: Cost 4 vsldoi8 <4,6,5,7>, <3,2,3,4>
+ 3768478108U, // <5,7,3,3>: Cost 4 vsldoi8 <1,6,5,7>, <3,3,3,3>
+ 2795599115U, // <5,7,3,4>: Cost 3 vsldoi12 <7,3,4,5>, <7,3,4,5>
+ 3385037470U, // <5,7,3,5>: Cost 4 vmrglw <4,u,5,3>, <6,4,7,5>
+ 3780422309U, // <5,7,3,6>: Cost 4 vsldoi8 <3,6,5,7>, <3,6,5,7>
+ 3848107301U, // <5,7,3,7>: Cost 4 vsldoi12 <3,7,4,5>, <7,3,7,4>
+ 2795894063U, // <5,7,3,u>: Cost 3 vsldoi12 <7,3,u,5>, <7,3,u,5>
+ 2795967800U, // <5,7,4,0>: Cost 3 vsldoi12 <7,4,0,5>, <7,4,0,5>
+ 3768478690U, // <5,7,4,1>: Cost 4 vsldoi8 <1,6,5,7>, <4,1,5,0>
+ 3718744163U, // <5,7,4,2>: Cost 4 vsldoi4 <4,5,7,4>, <2,u,4,5>
+ 3784404107U, // <5,7,4,3>: Cost 4 vsldoi8 <4,3,5,7>, <4,3,5,7>
+ 2796262748U, // <5,7,4,4>: Cost 3 vsldoi12 <7,4,4,5>, <7,4,4,5>
+ 2694737206U, // <5,7,4,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2712653182U, // <5,7,4,6>: Cost 3 vsldoi8 <4,6,5,7>, <4,6,5,7>
+ 2713316815U, // <5,7,4,7>: Cost 3 vsldoi8 <4,7,5,7>, <4,7,5,7>
+ 2694737449U, // <5,7,4,u>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2311311458U, // <5,7,5,0>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,0>
+ 3768479433U, // <5,7,5,1>: Cost 4 vsldoi8 <1,6,5,7>, <5,1,6,5>
+ 3768479521U, // <5,7,5,2>: Cost 4 vsldoi8 <1,6,5,7>, <5,2,7,3>
+ 2311311866U, // <5,7,5,3>: Cost 3 vmrglw <4,u,5,5>, <6,2,7,3>
+ 2311311462U, // <5,7,5,4>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,4>
+ 2248185270U, // <5,7,5,5>: Cost 3 vmrghw <5,5,5,5>, <7,5,5,5>
+ 2718625879U, // <5,7,5,6>: Cost 3 vsldoi8 <5,6,5,7>, <5,6,5,7>
+ 2311312194U, // <5,7,5,7>: Cost 3 vmrglw <4,u,5,5>, <6,6,7,7>
+ 2311311466U, // <5,7,5,u>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,u>
+ 2248954874U, // <5,7,6,0>: Cost 3 vmrghw <5,6,7,0>, <7,0,1,2>
+ 3322696778U, // <5,7,6,1>: Cost 4 vmrghw <5,6,7,0>, <7,1,1,1>
+ 2248955028U, // <5,7,6,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3>
+ 2656963074U, // <5,7,6,3>: Cost 3 vsldoi4 <6,5,7,6>, <3,4,5,6>
+ 2248955238U, // <5,7,6,4>: Cost 3 vmrghw <5,6,7,0>, <7,4,5,6>
+ 2248955329U, // <5,7,6,5>: Cost 3 vmrghw <5,6,7,0>, <7,5,6,7>
+ 2656965360U, // <5,7,6,6>: Cost 3 vsldoi4 <6,5,7,6>, <6,5,7,6>
+ 2248955500U, // <5,7,6,7>: Cost 3 vmrghw <5,6,7,0>, <7,7,7,7>
+ 2248955522U, // <5,7,6,u>: Cost 3 vmrghw <5,6,7,0>, <7,u,1,2>
+ 3718766694U, // <5,7,7,0>: Cost 4 vsldoi4 <4,5,7,7>, LHS
+ 3724739827U, // <5,7,7,1>: Cost 4 vsldoi4 <5,5,7,7>, <1,6,5,7>
+ 3718768739U, // <5,7,7,2>: Cost 4 vsldoi4 <4,5,7,7>, <2,u,4,5>
+ 3365826337U, // <5,7,7,3>: Cost 4 vmrglw <1,6,5,7>, <5,2,7,3>
+ 2798253647U, // <5,7,7,4>: Cost 3 vsldoi12 <7,7,4,5>, <7,7,4,5>
+ 3365826258U, // <5,7,7,5>: Cost 4 vmrglw <1,6,5,7>, <5,1,7,5>
+ 3730715377U, // <5,7,7,6>: Cost 4 vsldoi4 <6,5,7,7>, <6,5,7,7>
+ 2310665836U, // <5,7,7,7>: Cost 3 vmrglw <4,7,5,7>, <7,7,7,7>
+ 2798548595U, // <5,7,7,u>: Cost 3 vsldoi12 <7,7,u,5>, <7,7,u,5>
+ 2311336034U, // <5,7,u,0>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,0>
+ 2694739758U, // <5,7,u,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2248955028U, // <5,7,u,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3>
+ 2311336442U, // <5,7,u,3>: Cost 3 vmrglw <4,u,5,u>, <6,2,7,3>
+ 2311336038U, // <5,7,u,4>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,4>
+ 2694740122U, // <5,7,u,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2656981746U, // <5,7,u,6>: Cost 3 vsldoi4 <6,5,7,u>, <6,5,7,u>
+ 2311336770U, // <5,7,u,7>: Cost 3 vmrglw <4,u,5,u>, <6,6,7,7>
+ 2694740325U, // <5,7,u,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2705358848U, // <5,u,0,0>: Cost 3 vsldoi8 <3,4,5,u>, <0,0,0,0>
+ 1631617126U, // <5,u,0,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 2310607866U, // <5,u,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2>
+ 2302640284U, // <5,u,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 2754238189U, // <5,u,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <u,0,4,1>
+ 2305296114U, // <5,u,0,5>: Cost 3 vmrglw <3,u,5,0>, <2,3,u,5>
+ 2244907106U, // <5,u,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0>
+ 2302643528U, // <5,u,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS
+ 1631617693U, // <5,u,0,u>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 2627133542U, // <5,u,1,0>: Cost 3 vsldoi4 <1,5,u,1>, LHS
+ 1237536282U, // <5,u,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 1680496430U, // <5,u,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 1237532828U, // <5,u,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 2693416018U, // <5,u,1,4>: Cost 3 vsldoi8 <1,4,5,u>, <1,4,5,u>
+ 2756892486U, // <5,u,1,5>: Cost 3 vsldoi12 <0,u,1,5>, <u,1,5,0>
+ 2694743284U, // <5,u,1,6>: Cost 3 vsldoi8 <1,6,5,u>, <1,6,5,u>
+ 1237536072U, // <5,u,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 1680496484U, // <5,u,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2311288709U, // <5,u,2,0>: Cost 3 vmrglw <4,u,5,2>, <u,2,3,0>
+ 2245883694U, // <5,u,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 2699388520U, // <5,u,2,2>: Cost 3 vsldoi8 <2,4,5,u>, <2,2,2,2>
+ 2754238344U, // <5,u,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <u,2,3,3>
+ 2699388715U, // <5,u,2,4>: Cost 3 vsldoi8 <2,4,5,u>, <2,4,5,u>
+ 2757408666U, // <5,u,2,5>: Cost 3 vsldoi12 <0,u,u,5>, <u,2,5,3>
+ 2705360826U, // <5,u,2,6>: Cost 3 vsldoi8 <3,4,5,u>, <2,6,3,7>
+ 2302659912U, // <5,u,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS
+ 2754238389U, // <5,u,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <u,2,u,3>
+ 2754238396U, // <5,u,3,0>: Cost 3 vsldoi12 <0,4,1,5>, <u,3,0,1>
+ 3827980229U, // <5,u,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <u,3,1,1>
+ 2644625102U, // <5,u,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5>
+ 2289393820U, // <5,u,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS
+ 1631619588U, // <5,u,3,4>: Cost 2 vsldoi8 <3,4,5,u>, <3,4,5,u>
+ 2785056749U, // <5,u,3,5>: Cost 3 vsldoi12 <5,5,5,5>, <u,3,5,5>
+ 3363138077U, // <5,u,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,u,6>
+ 2289397064U, // <5,u,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS
+ 1634274120U, // <5,u,3,u>: Cost 2 vsldoi8 <3,u,5,u>, <3,u,5,u>
+ 1634937753U, // <5,u,4,0>: Cost 2 vsldoi8 <4,0,5,u>, <4,0,5,u>
+ 1728272410U, // <5,u,4,1>: Cost 2 vsldoi12 <u,4,1,5>, <u,4,1,5>
+ 2710006843U, // <5,u,4,2>: Cost 3 vsldoi8 <4,2,5,u>, <4,2,5,u>
+ 2765740076U, // <5,u,4,3>: Cost 3 vsldoi12 <2,3,4,5>, <u,4,3,5>
+ 1637592285U, // <5,u,4,4>: Cost 2 vsldoi8 <4,4,5,u>, <4,4,5,u>
+ 1631620406U, // <5,u,4,5>: Cost 2 vsldoi8 <3,4,5,u>, RHS
+ 2712661375U, // <5,u,4,6>: Cost 3 vsldoi8 <4,6,5,u>, <4,6,5,u>
+ 2302676296U, // <5,u,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS
+ 1631620649U, // <5,u,4,u>: Cost 2 vsldoi8 <3,4,5,u>, RHS
+ 1577091174U, // <5,u,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 1174443822U, // <5,u,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 2766035058U, // <5,u,5,2>: Cost 3 vsldoi12 <2,3,u,5>, <u,5,2,3>
+ 1237565596U, // <5,u,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 1577094454U, // <5,u,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,u,5,5>: Cost 1 vspltisw1 RHS
+ 1680496794U, // <5,u,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 1237568840U, // <5,u,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 296144182U, // <5,u,5,u>: Cost 1 vspltisw1 RHS
+ 2633146470U, // <5,u,6,0>: Cost 3 vsldoi4 <2,5,u,6>, LHS
+ 1175213870U, // <5,u,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 2633148309U, // <5,u,6,2>: Cost 3 vsldoi4 <2,5,u,6>, <2,5,u,6>
+ 1228947612U, // <5,u,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 2633149750U, // <5,u,6,4>: Cost 3 vsldoi4 <2,5,u,6>, RHS
+ 1175214234U, // <5,u,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 1228950018U, // <5,u,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 1228950856U, // <5,u,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 1228947617U, // <5,u,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 497614950U, // <5,u,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1571357492U, // <5,u,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1571358312U, // <5,u,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571358870U, // <5,u,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497618248U, // <5,u,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1571360772U, // <5,u,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1571361274U, // <5,u,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1571361786U, // <5,u,7,7>: Cost 2 vsldoi4 RHS, <7,0,1,2>
+ 497620782U, // <5,u,7,u>: Cost 1 vsldoi4 RHS, LHS
+ 497623142U, // <5,u,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631622958U, // <5,u,u,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 1680496997U, // <5,u,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 1228963996U, // <5,u,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS
+ 497626441U, // <5,u,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 296144182U, // <5,u,u,5>: Cost 1 vspltisw1 RHS
+ 1680497037U, // <5,u,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 1228967240U, // <5,u,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS
+ 497628974U, // <5,u,u,u>: Cost 1 vsldoi4 RHS, LHS
+ 2772451328U, // <6,0,0,0>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,0,0>
+ 2772451338U, // <6,0,0,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,1,1>
+ 3771146417U, // <6,0,0,2>: Cost 4 vsldoi8 <2,1,6,0>, <0,2,1,6>
+ 3383095739U, // <6,0,0,3>: Cost 4 vmrglw <4,5,6,0>, <6,2,0,3>
+ 3846193189U, // <6,0,0,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,0,4,1>
+ 3724832803U, // <6,0,0,5>: Cost 4 vsldoi4 <5,6,0,0>, <5,6,0,0>
+ 3383095985U, // <6,0,0,6>: Cost 4 vmrglw <4,5,6,0>, <6,5,0,6>
+ 3383096067U, // <6,0,0,7>: Cost 4 vmrglw <4,5,6,0>, <6,6,0,7>
+ 2772451401U, // <6,0,0,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,u,1>
+ 2651095142U, // <6,0,1,0>: Cost 3 vsldoi4 <5,6,0,1>, LHS
+ 2251612262U, // <6,0,1,1>: Cost 3 vmrghw <6,1,7,1>, LHS
+ 1698709606U, // <6,0,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2651097602U, // <6,0,1,3>: Cost 3 vsldoi4 <5,6,0,1>, <3,4,5,6>
+ 2651098422U, // <6,0,1,4>: Cost 3 vsldoi4 <5,6,0,1>, RHS
+ 2651099172U, // <6,0,1,5>: Cost 3 vsldoi4 <5,6,0,1>, <5,6,0,1>
+ 2657071869U, // <6,0,1,6>: Cost 3 vsldoi4 <6,6,0,1>, <6,6,0,1>
+ 3724841978U, // <6,0,1,7>: Cost 4 vsldoi4 <5,6,0,1>, <7,0,1,2>
+ 1698709660U, // <6,0,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2252292096U, // <6,0,2,0>: Cost 3 vmrghw <6,2,7,3>, <0,0,0,0>
+ 1178550374U, // <6,0,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 3826655418U, // <6,0,2,2>: Cost 4 vsldoi12 <0,2,1,6>, <0,2,2,6>
+ 3777783485U, // <6,0,2,3>: Cost 4 vsldoi8 <3,2,6,0>, <2,3,2,6>
+ 2252292434U, // <6,0,2,4>: Cost 3 vmrghw <6,2,7,3>, <0,4,1,5>
+ 3785746280U, // <6,0,2,5>: Cost 4 vsldoi8 <4,5,6,0>, <2,5,3,6>
+ 2252292593U, // <6,0,2,6>: Cost 3 vmrghw <6,2,7,3>, <0,6,1,2>
+ 3736794583U, // <6,0,2,7>: Cost 4 vsldoi4 <7,6,0,2>, <7,6,0,2>
+ 1178550941U, // <6,0,2,u>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 3375153152U, // <6,0,3,0>: Cost 4 vmrglw <3,2,6,3>, <0,0,0,0>
+ 2772451584U, // <6,0,3,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,1,4>
+ 3777784163U, // <6,0,3,2>: Cost 4 vsldoi8 <3,2,6,0>, <3,2,6,0>
+ 3846193426U, // <6,0,3,3>: Cost 4 vsldoi12 <3,4,5,6>, <0,3,3,4>
+ 2712005122U, // <6,0,3,4>: Cost 3 vsldoi8 <4,5,6,0>, <3,4,5,6>
+ 3724857382U, // <6,0,3,5>: Cost 4 vsldoi4 <5,6,0,3>, <5,6,0,3>
+ 3802335864U, // <6,0,3,6>: Cost 4 vsldoi8 <7,3,6,0>, <3,6,0,7>
+ 3801672410U, // <6,0,3,7>: Cost 4 vsldoi8 <7,2,6,0>, <3,7,2,6>
+ 2772451647U, // <6,0,3,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,u,4>
+ 3383123968U, // <6,0,4,0>: Cost 4 vmrglw <4,5,6,4>, <0,0,0,0>
+ 2772451666U, // <6,0,4,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,1,5>
+ 3773803577U, // <6,0,4,2>: Cost 4 vsldoi8 <2,5,6,0>, <4,2,5,6>
+ 3724864002U, // <6,0,4,3>: Cost 4 vsldoi4 <5,6,0,4>, <3,4,5,6>
+ 3846193517U, // <6,0,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,4,4,5>
+ 2712005935U, // <6,0,4,5>: Cost 3 vsldoi8 <4,5,6,0>, <4,5,6,0>
+ 3327009265U, // <6,0,4,6>: Cost 4 vmrghw <6,4,2,5>, <0,6,1,2>
+ 3383126648U, // <6,0,4,7>: Cost 5 vmrglw <4,5,6,4>, <3,6,0,7>
+ 2772451729U, // <6,0,4,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,u,5>
+ 3373178880U, // <6,0,5,0>: Cost 4 vmrglw <2,u,6,5>, <0,0,0,0>
+ 2254266470U, // <6,0,5,1>: Cost 3 vmrghw <6,5,7,1>, LHS
+ 3785748248U, // <6,0,5,2>: Cost 4 vsldoi8 <4,5,6,0>, <5,2,6,3>
+ 3790393190U, // <6,0,5,3>: Cost 4 vsldoi8 <5,3,6,0>, <5,3,6,0>
+ 3328000338U, // <6,0,5,4>: Cost 4 vmrghw <6,5,7,0>, <0,4,1,5>
+ 3785748494U, // <6,0,5,5>: Cost 4 vsldoi8 <4,5,6,0>, <5,5,6,6>
+ 3785748516U, // <6,0,5,6>: Cost 4 vsldoi8 <4,5,6,0>, <5,6,0,1>
+ 3379153528U, // <6,0,5,7>: Cost 4 vmrglw <3,u,6,5>, <3,6,0,7>
+ 2254267037U, // <6,0,5,u>: Cost 3 vmrghw <6,5,7,1>, LHS
+ 2254897152U, // <6,0,6,0>: Cost 3 vmrghw <6,6,6,6>, <0,0,0,0>
+ 1181155430U, // <6,0,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 3785748923U, // <6,0,6,2>: Cost 4 vsldoi8 <4,5,6,0>, <6,2,0,3>
+ 3785749042U, // <6,0,6,3>: Cost 4 vsldoi8 <4,5,6,0>, <6,3,4,5>
+ 2254897490U, // <6,0,6,4>: Cost 3 vmrghw <6,6,6,6>, <0,4,1,5>
+ 3785749169U, // <6,0,6,5>: Cost 4 vsldoi8 <4,5,6,0>, <6,5,0,6>
+ 2724614962U, // <6,0,6,6>: Cost 3 vsldoi8 <6,6,6,0>, <6,6,6,0>
+ 3787739982U, // <6,0,6,7>: Cost 4 vsldoi8 <4,u,6,0>, <6,7,0,1>
+ 1181155997U, // <6,0,6,u>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 1235664896U, // <6,0,7,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235666598U, // <6,0,7,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 3712943720U, // <6,0,7,2>: Cost 4 vsldoi4 <3,6,0,7>, <2,2,2,2>
+ 2639202936U, // <6,0,7,3>: Cost 3 vsldoi4 <3,6,0,7>, <3,6,0,7>
+ 2639203638U, // <6,0,7,4>: Cost 3 vsldoi4 <3,6,0,7>, RHS
+ 2309409236U, // <6,0,7,5>: Cost 3 vmrglw RHS, <3,4,0,5>
+ 3712946517U, // <6,0,7,6>: Cost 4 vsldoi4 <3,6,0,7>, <6,0,7,0>
+ 2309409400U, // <6,0,7,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1235666605U, // <6,0,7,u>: Cost 2 vmrglw RHS, <2,3,0,u>
+ 1235673088U, // <6,0,u,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235674790U, // <6,0,u,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 1698710173U, // <6,0,u,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2639211129U, // <6,0,u,3>: Cost 3 vsldoi4 <3,6,0,u>, <3,6,0,u>
+ 2639211830U, // <6,0,u,4>: Cost 3 vsldoi4 <3,6,0,u>, RHS
+ 2712008858U, // <6,0,u,5>: Cost 3 vsldoi8 <4,5,6,0>, RHS
+ 2657129220U, // <6,0,u,6>: Cost 3 vsldoi4 <6,6,0,u>, <6,6,0,u>
+ 2309417592U, // <6,0,u,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1698710227U, // <6,0,u,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 3775799296U, // <6,1,0,0>: Cost 4 vsldoi8 <2,u,6,1>, <0,0,0,0>
+ 2702057574U, // <6,1,0,1>: Cost 3 vsldoi8 <2,u,6,1>, LHS
+ 3373143763U, // <6,1,0,2>: Cost 4 vmrglw <2,u,6,0>, <u,0,1,2>
+ 3695045122U, // <6,1,0,3>: Cost 4 vsldoi4 <0,6,1,0>, <3,4,5,6>
+ 3775799634U, // <6,1,0,4>: Cost 4 vsldoi8 <2,u,6,1>, <0,4,1,5>
+ 3383091538U, // <6,1,0,5>: Cost 4 vmrglw <4,5,6,0>, <0,4,1,5>
+ 3368493233U, // <6,1,0,6>: Cost 4 vmrglw <2,1,6,0>, <0,2,1,6>
+ 3362522319U, // <6,1,0,7>: Cost 5 vmrglw <1,1,6,0>, <1,6,1,7>
+ 2702058141U, // <6,1,0,u>: Cost 3 vsldoi8 <2,u,6,1>, LHS
+ 3834250027U, // <6,1,1,0>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,0,1>
+ 2772452148U, // <6,1,1,1>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1>
+ 3832038210U, // <6,1,1,2>: Cost 4 vsldoi12 <1,1,2,6>, <1,1,2,6>
+ 3373150660U, // <6,1,1,3>: Cost 4 vmrglw <2,u,6,1>, <6,2,1,3>
+ 3834250067U, // <6,1,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,4,5>
+ 3373146450U, // <6,1,1,5>: Cost 4 vmrglw <2,u,6,1>, <0,4,1,5>
+ 3826656102U, // <6,1,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,1,6,6>
+ 3362530511U, // <6,1,1,7>: Cost 4 vmrglw <1,1,6,1>, <1,6,1,7>
+ 2772452148U, // <6,1,1,u>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1>
+ 2669092966U, // <6,1,2,0>: Cost 3 vsldoi4 <u,6,1,2>, LHS
+ 2252292916U, // <6,1,2,1>: Cost 3 vmrghw <6,2,7,3>, <1,1,1,1>
+ 2252293014U, // <6,1,2,2>: Cost 3 vmrghw <6,2,7,3>, <1,2,3,0>
+ 2772452246U, // <6,1,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,2,3,0>
+ 2669096246U, // <6,1,2,4>: Cost 3 vsldoi4 <u,6,1,2>, RHS
+ 3846194091U, // <6,1,2,5>: Cost 4 vsldoi12 <3,4,5,6>, <1,2,5,3>
+ 2702059450U, // <6,1,2,6>: Cost 3 vsldoi8 <2,u,6,1>, <2,6,3,7>
+ 3870081978U, // <6,1,2,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,2,7,0>
+ 2702059633U, // <6,1,2,u>: Cost 3 vsldoi8 <2,u,6,1>, <2,u,6,1>
+ 3775801494U, // <6,1,3,0>: Cost 4 vsldoi8 <2,u,6,1>, <3,0,1,2>
+ 3777128723U, // <6,1,3,1>: Cost 4 vsldoi8 <3,1,6,1>, <3,1,6,1>
+ 3775801702U, // <6,1,3,2>: Cost 4 vsldoi8 <2,u,6,1>, <3,2,6,3>
+ 3775801756U, // <6,1,3,3>: Cost 4 vsldoi8 <2,u,6,1>, <3,3,3,3>
+ 3775801858U, // <6,1,3,4>: Cost 4 vsldoi8 <2,u,6,1>, <3,4,5,6>
+ 3375153490U, // <6,1,3,5>: Cost 4 vmrglw <3,2,6,3>, <0,4,1,5>
+ 3826656265U, // <6,1,3,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,3,6,7>
+ 3775802051U, // <6,1,3,7>: Cost 4 vsldoi8 <2,u,6,1>, <3,7,0,1>
+ 3775802142U, // <6,1,3,u>: Cost 4 vsldoi8 <2,u,6,1>, <3,u,1,2>
+ 3846194206U, // <6,1,4,0>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,0,1>
+ 3846194219U, // <6,1,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,1,5>
+ 3846194228U, // <6,1,4,2>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,2,5>
+ 3846194236U, // <6,1,4,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,3,4>
+ 3846194246U, // <6,1,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,4,5>
+ 2760508496U, // <6,1,4,5>: Cost 3 vsldoi12 <1,4,5,6>, <1,4,5,6>
+ 3368526001U, // <6,1,4,6>: Cost 4 vmrglw <2,1,6,4>, <0,2,1,6>
+ 3870082144U, // <6,1,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,4,7,4>
+ 2760729707U, // <6,1,4,u>: Cost 3 vsldoi12 <1,4,u,6>, <1,4,u,6>
+ 2714668660U, // <6,1,5,0>: Cost 3 vsldoi8 <5,0,6,1>, <5,0,6,1>
+ 3834619005U, // <6,1,5,1>: Cost 4 vsldoi12 <1,5,1,6>, <1,5,1,6>
+ 3834692742U, // <6,1,5,2>: Cost 4 vsldoi12 <1,5,2,6>, <1,5,2,6>
+ 3846194317U, // <6,1,5,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,5,3,4>
+ 3834840216U, // <6,1,5,4>: Cost 4 vsldoi12 <1,5,4,6>, <1,5,4,6>
+ 3834913953U, // <6,1,5,5>: Cost 4 vsldoi12 <1,5,5,6>, <1,5,5,6>
+ 2719977570U, // <6,1,5,6>: Cost 3 vsldoi8 <5,u,6,1>, <5,6,7,0>
+ 3367208143U, // <6,1,5,7>: Cost 4 vmrglw <1,u,6,5>, <1,6,1,7>
+ 2719977724U, // <6,1,5,u>: Cost 3 vsldoi8 <5,u,6,1>, <5,u,6,1>
+ 2669125734U, // <6,1,6,0>: Cost 3 vsldoi4 <u,6,1,6>, LHS
+ 2254897972U, // <6,1,6,1>: Cost 3 vmrghw <6,6,6,6>, <1,1,1,1>
+ 2254898070U, // <6,1,6,2>: Cost 3 vmrghw <6,6,6,6>, <1,2,3,0>
+ 3775803929U, // <6,1,6,3>: Cost 4 vsldoi8 <2,u,6,1>, <6,3,1,7>
+ 2669129014U, // <6,1,6,4>: Cost 3 vsldoi4 <u,6,1,6>, RHS
+ 2322006354U, // <6,1,6,5>: Cost 3 vmrglw <6,6,6,6>, <0,4,1,5>
+ 2725950264U, // <6,1,6,6>: Cost 3 vsldoi8 <6,u,6,1>, <6,6,6,6>
+ 3793720142U, // <6,1,6,7>: Cost 4 vsldoi8 <5,u,6,1>, <6,7,0,1>
+ 2254898556U, // <6,1,6,u>: Cost 3 vmrghw <6,6,6,6>, <1,u,3,0>
+ 2627330150U, // <6,1,7,0>: Cost 3 vsldoi4 <1,6,1,7>, LHS
+ 1235664906U, // <6,1,7,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235667094U, // <6,1,7,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2309406894U, // <6,1,7,3>: Cost 3 vmrglw RHS, <0,2,1,3>
+ 2627333430U, // <6,1,7,4>: Cost 3 vsldoi4 <1,6,1,7>, RHS
+ 1235665234U, // <6,1,7,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309406897U, // <6,1,7,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309407222U, // <6,1,7,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235664913U, // <6,1,7,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 2627338342U, // <6,1,u,0>: Cost 3 vsldoi4 <1,6,1,u>, LHS
+ 1235673098U, // <6,1,u,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235675286U, // <6,1,u,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2772452732U, // <6,1,u,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,u,3,0>
+ 2627341622U, // <6,1,u,4>: Cost 3 vsldoi4 <1,6,1,u>, RHS
+ 1235673426U, // <6,1,u,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309415089U, // <6,1,u,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309415414U, // <6,1,u,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235673105U, // <6,1,u,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 3324683725U, // <6,2,0,0>: Cost 4 vmrghw <6,0,7,0>, <2,0,3,0>
+ 2725290086U, // <6,2,0,1>: Cost 3 vsldoi8 <6,7,6,2>, LHS
+ 3771162801U, // <6,2,0,2>: Cost 4 vsldoi8 <2,1,6,2>, <0,2,1,6>
+ 2309349478U, // <6,2,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 3730951478U, // <6,2,0,4>: Cost 4 vsldoi4 <6,6,2,0>, RHS
+ 3840738784U, // <6,2,0,5>: Cost 4 vsldoi12 <2,5,3,6>, <2,0,5,1>
+ 3842655721U, // <6,2,0,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,0,6,1>
+ 3736925671U, // <6,2,0,7>: Cost 4 vsldoi4 <7,6,2,0>, <7,6,2,0>
+ 2309349483U, // <6,2,0,u>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 3367840468U, // <6,2,1,0>: Cost 4 vmrglw <2,0,6,1>, <3,7,2,0>
+ 3325355551U, // <6,2,1,1>: Cost 4 vmrghw <6,1,7,1>, <2,1,3,1>
+ 3373147752U, // <6,2,1,2>: Cost 4 vmrglw <2,u,6,1>, <2,2,2,2>
+ 2299404390U, // <6,2,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 3701099830U, // <6,2,1,4>: Cost 5 vsldoi4 <1,6,2,1>, RHS
+ 3767846054U, // <6,2,1,5>: Cost 4 vsldoi8 <1,5,6,2>, <1,5,6,2>
+ 3826656825U, // <6,2,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <2,1,6,0>
+ 3373147838U, // <6,2,1,7>: Cost 5 vmrglw <2,u,6,1>, <2,3,2,7>
+ 2299404395U, // <6,2,1,u>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 2657222758U, // <6,2,2,0>: Cost 3 vsldoi4 <6,6,2,2>, LHS
+ 3771164219U, // <6,2,2,1>: Cost 4 vsldoi8 <2,1,6,2>, <2,1,6,2>
+ 2766481000U, // <6,2,2,2>: Cost 3 vsldoi12 <2,4,5,6>, <2,2,2,2>
+ 2772452978U, // <6,2,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,3,3>
+ 2657226038U, // <6,2,2,4>: Cost 3 vsldoi4 <6,6,2,2>, RHS
+ 3790407528U, // <6,2,2,5>: Cost 4 vsldoi8 <5,3,6,2>, <2,5,3,6>
+ 2252294074U, // <6,2,2,6>: Cost 3 vmrghw <6,2,7,3>, <2,6,3,7>
+ 2252294148U, // <6,2,2,7>: Cost 3 vmrghw <6,2,7,3>, <2,7,3,0>
+ 2772453023U, // <6,2,2,u>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,u,3>
+ 2772453030U, // <6,2,3,0>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,0,1>
+ 3834250930U, // <6,2,3,1>: Cost 4 vsldoi12 <1,4,5,6>, <2,3,1,4>
+ 2765596349U, // <6,2,3,2>: Cost 3 vsldoi12 <2,3,2,6>, <2,3,2,6>
+ 2301411430U, // <6,2,3,3>: Cost 3 vmrglw <3,2,6,3>, LHS
+ 2772453070U, // <6,2,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,4,5>
+ 2765817560U, // <6,2,3,5>: Cost 3 vsldoi12 <2,3,5,6>, <2,3,5,6>
+ 2252933050U, // <6,2,3,6>: Cost 3 vmrghw <6,3,7,0>, <2,6,3,7>
+ 2796340968U, // <6,2,3,7>: Cost 3 vsldoi12 <7,4,5,6>, <2,3,7,4>
+ 2766038771U, // <6,2,3,u>: Cost 3 vsldoi12 <2,3,u,6>, <2,3,u,6>
+ 3725008998U, // <6,2,4,0>: Cost 4 vsldoi4 <5,6,2,4>, LHS
+ 3368530217U, // <6,2,4,1>: Cost 5 vmrglw <2,1,6,4>, <6,0,2,1>
+ 3840222989U, // <6,2,4,2>: Cost 4 vsldoi12 <2,4,5,6>, <2,4,2,5>
+ 2309382246U, // <6,2,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS
+ 3725012278U, // <6,2,4,4>: Cost 4 vsldoi4 <5,6,2,4>, RHS
+ 2766481193U, // <6,2,4,5>: Cost 3 vsldoi12 <2,4,5,6>, <2,4,5,6>
+ 3842656049U, // <6,2,4,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,4,6,5>
+ 3327010820U, // <6,2,4,7>: Cost 4 vmrghw <6,4,2,5>, <2,7,3,0>
+ 2766702404U, // <6,2,4,u>: Cost 3 vsldoi12 <2,4,u,6>, <2,4,u,6>
+ 3713073254U, // <6,2,5,0>: Cost 4 vsldoi4 <3,6,2,5>, LHS
+ 3789082310U, // <6,2,5,1>: Cost 4 vsldoi8 <5,1,6,2>, <5,1,6,2>
+ 3840665439U, // <6,2,5,2>: Cost 4 vsldoi12 <2,5,2,6>, <2,5,2,6>
+ 2766997352U, // <6,2,5,3>: Cost 3 vsldoi12 <2,5,3,6>, <2,5,3,6>
+ 3713076534U, // <6,2,5,4>: Cost 4 vsldoi4 <3,6,2,5>, RHS
+ 3791736842U, // <6,2,5,5>: Cost 4 vsldoi8 <5,5,6,2>, <5,5,6,2>
+ 3373180605U, // <6,2,5,6>: Cost 4 vmrglw <2,u,6,5>, <2,3,2,6>
+ 3793064108U, // <6,2,5,7>: Cost 4 vsldoi8 <5,7,6,2>, <5,7,6,2>
+ 2767366037U, // <6,2,5,u>: Cost 3 vsldoi12 <2,5,u,6>, <2,5,u,6>
+ 3701137510U, // <6,2,6,0>: Cost 4 vsldoi4 <1,6,2,6>, LHS
+ 3701138647U, // <6,2,6,1>: Cost 4 vsldoi4 <1,6,2,6>, <1,6,2,6>
+ 2254898792U, // <6,2,6,2>: Cost 3 vmrghw <6,6,6,6>, <2,2,2,2>
+ 1248264294U, // <6,2,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 3701140790U, // <6,2,6,4>: Cost 4 vsldoi4 <1,6,2,6>, RHS
+ 3725029435U, // <6,2,6,5>: Cost 4 vsldoi4 <5,6,2,6>, <5,6,2,6>
+ 2254899130U, // <6,2,6,6>: Cost 3 vmrghw <6,6,6,6>, <2,6,3,7>
+ 2725294981U, // <6,2,6,7>: Cost 3 vsldoi8 <6,7,6,2>, <6,7,6,2>
+ 1248264299U, // <6,2,6,u>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 2633375846U, // <6,2,7,0>: Cost 3 vsldoi4 <2,6,2,7>, LHS
+ 2309407468U, // <6,2,7,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235666536U, // <6,2,7,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 161923174U, // <6,2,7,3>: Cost 1 vmrglw RHS, LHS
+ 2633379126U, // <6,2,7,4>: Cost 3 vsldoi4 <2,6,2,7>, RHS
+ 2309407796U, // <6,2,7,5>: Cost 3 vmrglw RHS, <1,4,2,5>
+ 2309408445U, // <6,2,7,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309407960U, // <6,2,7,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 161923179U, // <6,2,7,u>: Cost 1 vmrglw RHS, LHS
+ 2633384038U, // <6,2,u,0>: Cost 3 vsldoi4 <2,6,2,u>, LHS
+ 2309415660U, // <6,2,u,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235674728U, // <6,2,u,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 161931366U, // <6,2,u,3>: Cost 1 vmrglw RHS, LHS
+ 2633387318U, // <6,2,u,4>: Cost 3 vsldoi4 <2,6,2,u>, RHS
+ 2769135725U, // <6,2,u,5>: Cost 3 vsldoi12 <2,u,5,6>, <2,u,5,6>
+ 2309416637U, // <6,2,u,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309416152U, // <6,2,u,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 161931371U, // <6,2,u,u>: Cost 1 vmrglw RHS, LHS
+ 3777806336U, // <6,3,0,0>: Cost 4 vsldoi8 <3,2,6,3>, <0,0,0,0>
+ 2704064614U, // <6,3,0,1>: Cost 3 vsldoi8 <3,2,6,3>, LHS
+ 3765862577U, // <6,3,0,2>: Cost 4 vsldoi8 <1,2,6,3>, <0,2,1,6>
+ 3843393708U, // <6,3,0,3>: Cost 4 vsldoi12 <3,0,3,6>, <3,0,3,6>
+ 2250516994U, // <6,3,0,4>: Cost 3 vmrghw <6,0,1,2>, <3,4,5,6>
+ 3725054014U, // <6,3,0,5>: Cost 4 vsldoi4 <5,6,3,0>, <5,6,3,0>
+ 3383093096U, // <6,3,0,6>: Cost 4 vmrglw <4,5,6,0>, <2,5,3,6>
+ 3368495034U, // <6,3,0,7>: Cost 4 vmrglw <2,1,6,0>, <2,6,3,7>
+ 2704065181U, // <6,3,0,u>: Cost 3 vsldoi8 <3,2,6,3>, LHS
+ 2251622550U, // <6,3,1,0>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2>
+ 3777807156U, // <6,3,1,1>: Cost 4 vsldoi8 <3,2,6,3>, <1,1,1,1>
+ 3765863348U, // <6,3,1,2>: Cost 4 vsldoi8 <1,2,6,3>, <1,2,6,3>
+ 3373147762U, // <6,3,1,3>: Cost 4 vmrglw <2,u,6,1>, <2,2,3,3>
+ 3834251525U, // <6,3,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <3,1,4,5>
+ 3373147683U, // <6,3,1,5>: Cost 5 vmrglw <2,u,6,1>, <2,1,3,5>
+ 3391727545U, // <6,3,1,6>: Cost 4 vmrglw <6,0,6,1>, <2,6,3,6>
+ 2299406266U, // <6,3,1,7>: Cost 3 vmrglw <2,u,6,1>, <2,6,3,7>
+ 2251622550U, // <6,3,1,u>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2>
+ 2252294294U, // <6,3,2,0>: Cost 3 vmrghw <6,2,7,3>, <3,0,1,2>
+ 3326036198U, // <6,3,2,1>: Cost 4 vmrghw <6,2,7,3>, <3,1,1,1>
+ 3771836045U, // <6,3,2,2>: Cost 4 vsldoi8 <2,2,6,3>, <2,2,6,3>
+ 2252294556U, // <6,3,2,3>: Cost 3 vmrghw <6,2,7,3>, <3,3,3,3>
+ 2252294658U, // <6,3,2,4>: Cost 3 vmrghw <6,2,7,3>, <3,4,5,6>
+ 3840739677U, // <6,3,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <3,2,5,3>
+ 2704066490U, // <6,3,2,6>: Cost 3 vsldoi8 <3,2,6,3>, <2,6,3,7>
+ 3368511418U, // <6,3,2,7>: Cost 4 vmrglw <2,1,6,2>, <2,6,3,7>
+ 2252294942U, // <6,3,2,u>: Cost 3 vmrghw <6,2,7,3>, <3,u,1,2>
+ 3707158630U, // <6,3,3,0>: Cost 4 vsldoi4 <2,6,3,3>, LHS
+ 3765864692U, // <6,3,3,1>: Cost 5 vsldoi8 <1,2,6,3>, <3,1,2,6>
+ 2704066918U, // <6,3,3,2>: Cost 3 vsldoi8 <3,2,6,3>, <3,2,6,3>
+ 2772453788U, // <6,3,3,3>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,3,3>
+ 2772453799U, // <6,3,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,4,5>
+ 3789752888U, // <6,3,3,5>: Cost 4 vsldoi8 <5,2,6,3>, <3,5,2,6>
+ 3840739770U, // <6,3,3,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,3,6,6>
+ 2301413306U, // <6,3,3,7>: Cost 3 vmrglw <3,2,6,3>, <2,6,3,7>
+ 2775108043U, // <6,3,3,u>: Cost 3 vsldoi12 <3,u,5,6>, <3,3,u,5>
+ 2651340902U, // <6,3,4,0>: Cost 3 vsldoi4 <5,6,3,4>, LHS
+ 3846195674U, // <6,3,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <3,4,1,2>
+ 3845974503U, // <6,3,4,2>: Cost 4 vsldoi12 <3,4,2,6>, <3,4,2,6>
+ 2651343362U, // <6,3,4,3>: Cost 3 vsldoi4 <5,6,3,4>, <3,4,5,6>
+ 2651344182U, // <6,3,4,4>: Cost 3 vsldoi4 <5,6,3,4>, RHS
+ 1698712066U, // <6,3,4,5>: Cost 2 vsldoi12 <3,4,5,6>, <3,4,5,6>
+ 3383125864U, // <6,3,4,6>: Cost 4 vmrglw <4,5,6,4>, <2,5,3,6>
+ 3368527802U, // <6,3,4,7>: Cost 4 vmrglw <2,1,6,4>, <2,6,3,7>
+ 1698933277U, // <6,3,4,u>: Cost 2 vsldoi12 <3,4,u,6>, <3,4,u,6>
+ 3373179798U, // <6,3,5,0>: Cost 4 vmrglw <2,u,6,5>, <1,2,3,0>
+ 3707176179U, // <6,3,5,1>: Cost 5 vsldoi4 <2,6,3,5>, <1,6,5,7>
+ 2716012312U, // <6,3,5,2>: Cost 3 vsldoi8 <5,2,6,3>, <5,2,6,3>
+ 3373180530U, // <6,3,5,3>: Cost 4 vmrglw <2,u,6,5>, <2,2,3,3>
+ 2254309890U, // <6,3,5,4>: Cost 3 vmrghw <6,5,7,6>, <3,4,5,6>
+ 3785773070U, // <6,3,5,5>: Cost 4 vsldoi8 <4,5,6,3>, <5,5,6,6>
+ 3840739932U, // <6,3,5,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,5,6,6>
+ 2299439034U, // <6,3,5,7>: Cost 3 vmrglw <2,u,6,5>, <2,6,3,7>
+ 2719994110U, // <6,3,5,u>: Cost 3 vsldoi8 <5,u,6,3>, <5,u,6,3>
+ 2254899350U, // <6,3,6,0>: Cost 3 vmrghw <6,6,6,6>, <3,0,1,2>
+ 3328641254U, // <6,3,6,1>: Cost 4 vmrghw <6,6,6,6>, <3,1,1,1>
+ 2633443257U, // <6,3,6,2>: Cost 3 vsldoi4 <2,6,3,6>, <2,6,3,6>
+ 2254899612U, // <6,3,6,3>: Cost 3 vmrghw <6,6,6,6>, <3,3,3,3>
+ 2254899714U, // <6,3,6,4>: Cost 3 vmrghw <6,6,6,6>, <3,4,5,6>
+ 3785773772U, // <6,3,6,5>: Cost 4 vsldoi8 <4,5,6,3>, <6,5,3,6>
+ 2725966648U, // <6,3,6,6>: Cost 3 vsldoi8 <6,u,6,3>, <6,6,6,6>
+ 2322007994U, // <6,3,6,7>: Cost 3 vmrglw <6,6,6,6>, <2,6,3,7>
+ 2254899998U, // <6,3,6,u>: Cost 3 vmrghw <6,6,6,6>, <3,u,1,2>
+ 1559707750U, // <6,3,7,0>: Cost 2 vsldoi4 <2,6,3,7>, LHS
+ 2633450292U, // <6,3,7,1>: Cost 3 vsldoi4 <2,6,3,7>, <1,1,1,1>
+ 1559709626U, // <6,3,7,2>: Cost 2 vsldoi4 <2,6,3,7>, <2,6,3,7>
+ 1235666546U, // <6,3,7,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1559711030U, // <6,3,7,4>: Cost 2 vsldoi4 <2,6,3,7>, RHS
+ 2309408291U, // <6,3,7,5>: Cost 3 vmrglw RHS, <2,1,3,5>
+ 2633454152U, // <6,3,7,6>: Cost 3 vsldoi4 <2,6,3,7>, <6,3,7,0>
+ 1235666874U, // <6,3,7,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1559713582U, // <6,3,7,u>: Cost 2 vsldoi4 <2,6,3,7>, LHS
+ 1559715942U, // <6,3,u,0>: Cost 2 vsldoi4 <2,6,3,u>, LHS
+ 2633458484U, // <6,3,u,1>: Cost 3 vsldoi4 <2,6,3,u>, <1,1,1,1>
+ 1559717819U, // <6,3,u,2>: Cost 2 vsldoi4 <2,6,3,u>, <2,6,3,u>
+ 1235674738U, // <6,3,u,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1559719222U, // <6,3,u,4>: Cost 2 vsldoi4 <2,6,3,u>, RHS
+ 1701366598U, // <6,3,u,5>: Cost 2 vsldoi12 <3,u,5,6>, <3,u,5,6>
+ 2633462353U, // <6,3,u,6>: Cost 3 vsldoi4 <2,6,3,u>, <6,3,u,0>
+ 1235675066U, // <6,3,u,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1559721774U, // <6,3,u,u>: Cost 2 vsldoi4 <2,6,3,u>, LHS
+ 3785777152U, // <6,4,0,0>: Cost 4 vsldoi8 <4,5,6,4>, <0,0,0,0>
+ 2712035430U, // <6,4,0,1>: Cost 3 vsldoi8 <4,5,6,4>, LHS
+ 3771179185U, // <6,4,0,2>: Cost 4 vsldoi8 <2,1,6,4>, <0,2,1,6>
+ 3846196096U, // <6,4,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <4,0,3,1>
+ 3785777490U, // <6,4,0,4>: Cost 4 vsldoi8 <4,5,6,4>, <0,4,1,5>
+ 2250517814U, // <6,4,0,5>: Cost 3 vmrghw <6,0,1,2>, RHS
+ 3324259703U, // <6,4,0,6>: Cost 4 vmrghw <6,0,1,2>, <4,6,5,0>
+ 3383092458U, // <6,4,0,7>: Cost 5 vmrglw <4,5,6,0>, <1,6,4,7>
+ 2712035997U, // <6,4,0,u>: Cost 3 vsldoi8 <4,5,6,4>, LHS
+ 3325356946U, // <6,4,1,0>: Cost 4 vmrghw <6,1,7,1>, <4,0,5,1>
+ 3785777972U, // <6,4,1,1>: Cost 4 vsldoi8 <4,5,6,4>, <1,1,1,1>
+ 3846196170U, // <6,4,1,2>: Cost 4 vsldoi12 <3,4,5,6>, <4,1,2,3>
+ 3325365380U, // <6,4,1,3>: Cost 4 vmrghw <6,1,7,2>, <4,3,5,0>
+ 3852168155U, // <6,4,1,4>: Cost 4 vsldoi12 <4,4,5,6>, <4,1,4,2>
+ 2251615542U, // <6,4,1,5>: Cost 3 vmrghw <6,1,7,1>, RHS
+ 3325357432U, // <6,4,1,6>: Cost 4 vmrghw <6,1,7,1>, <4,6,5,1>
+ 3870084088U, // <6,4,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <4,1,7,4>
+ 2251615785U, // <6,4,1,u>: Cost 3 vmrghw <6,1,7,1>, RHS
+ 2252295058U, // <6,4,2,0>: Cost 3 vmrghw <6,2,7,3>, <4,0,5,1>
+ 3771180605U, // <6,4,2,1>: Cost 4 vsldoi8 <2,1,6,4>, <2,1,6,4>
+ 3785778792U, // <6,4,2,2>: Cost 4 vsldoi8 <4,5,6,4>, <2,2,2,2>
+ 3777816253U, // <6,4,2,3>: Cost 4 vsldoi8 <3,2,6,4>, <2,3,2,6>
+ 2252295376U, // <6,4,2,4>: Cost 3 vmrghw <6,2,7,3>, <4,4,4,4>
+ 1178553654U, // <6,4,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 2252295545U, // <6,4,2,6>: Cost 3 vmrghw <6,2,7,3>, <4,6,5,2>
+ 3326037448U, // <6,4,2,7>: Cost 4 vmrghw <6,2,7,3>, <4,7,5,0>
+ 1178553897U, // <6,4,2,u>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 3785779350U, // <6,4,3,0>: Cost 4 vsldoi8 <4,5,6,4>, <3,0,1,2>
+ 3383118648U, // <6,4,3,1>: Cost 4 vmrglw <4,5,6,3>, <3,u,4,1>
+ 3777816935U, // <6,4,3,2>: Cost 4 vsldoi8 <3,2,6,4>, <3,2,6,4>
+ 3785779612U, // <6,4,3,3>: Cost 4 vsldoi8 <4,5,6,4>, <3,3,3,3>
+ 2712037890U, // <6,4,3,4>: Cost 3 vsldoi8 <4,5,6,4>, <3,4,5,6>
+ 2252754230U, // <6,4,3,5>: Cost 3 vmrghw <6,3,4,5>, RHS
+ 3784452764U, // <6,4,3,6>: Cost 4 vsldoi8 <4,3,6,4>, <3,6,4,7>
+ 3801705178U, // <6,4,3,7>: Cost 4 vsldoi8 <7,2,6,4>, <3,7,2,6>
+ 2252754473U, // <6,4,3,u>: Cost 3 vmrghw <6,3,4,5>, RHS
+ 3787770770U, // <6,4,4,0>: Cost 4 vsldoi8 <4,u,6,4>, <4,0,5,1>
+ 3383126840U, // <6,4,4,1>: Cost 4 vmrglw <4,5,6,4>, <3,u,4,1>
+ 3327380534U, // <6,4,4,2>: Cost 4 vmrghw <6,4,7,5>, <4,2,5,3>
+ 3784453265U, // <6,4,4,3>: Cost 4 vsldoi8 <4,3,6,4>, <4,3,6,4>
+ 2253630672U, // <6,4,4,4>: Cost 3 vmrghw <6,4,7,4>, <4,4,4,4>
+ 2778426587U, // <6,4,4,5>: Cost 3 vsldoi12 <4,4,5,6>, <4,4,5,6>
+ 3383128789U, // <6,4,4,6>: Cost 4 vmrglw <4,5,6,4>, <6,5,4,6>
+ 3381799580U, // <6,4,4,7>: Cost 4 vmrglw <4,3,6,4>, <3,6,4,7>
+ 2778647798U, // <6,4,4,u>: Cost 3 vsldoi12 <4,4,u,6>, <4,4,u,6>
+ 2651422822U, // <6,4,5,0>: Cost 3 vsldoi4 <5,6,4,5>, LHS
+ 3701277928U, // <6,4,5,1>: Cost 4 vsldoi4 <1,6,4,5>, <1,6,4,5>
+ 3701278650U, // <6,4,5,2>: Cost 4 vsldoi4 <1,6,4,5>, <2,6,3,7>
+ 2651425282U, // <6,4,5,3>: Cost 3 vsldoi4 <5,6,4,5>, <3,4,5,6>
+ 2651426102U, // <6,4,5,4>: Cost 3 vsldoi4 <5,6,4,5>, RHS
+ 2651426892U, // <6,4,5,5>: Cost 3 vsldoi4 <5,6,4,5>, <5,6,4,5>
+ 1698712886U, // <6,4,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 3725169658U, // <6,4,5,7>: Cost 4 vsldoi4 <5,6,4,5>, <7,0,1,2>
+ 1698712904U, // <6,4,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2254900114U, // <6,4,6,0>: Cost 3 vmrghw <6,6,6,6>, <4,0,5,1>
+ 3389115192U, // <6,4,6,1>: Cost 4 vmrglw <5,5,6,6>, <3,u,4,1>
+ 3785781727U, // <6,4,6,2>: Cost 4 vsldoi8 <4,5,6,4>, <6,2,4,3>
+ 3785781810U, // <6,4,6,3>: Cost 4 vsldoi8 <4,5,6,4>, <6,3,4,5>
+ 2254900432U, // <6,4,6,4>: Cost 3 vmrghw <6,6,6,6>, <4,4,4,4>
+ 1181158710U, // <6,4,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 2254900605U, // <6,4,6,6>: Cost 3 vmrghw <6,6,6,6>, <4,6,5,6>
+ 3787772750U, // <6,4,6,7>: Cost 4 vsldoi8 <4,u,6,4>, <6,7,0,1>
+ 1181158953U, // <6,4,6,u>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 2639495270U, // <6,4,7,0>: Cost 3 vsldoi4 <3,6,4,7>, LHS
+ 2639496090U, // <6,4,7,1>: Cost 3 vsldoi4 <3,6,4,7>, <1,2,3,4>
+ 3707267011U, // <6,4,7,2>: Cost 4 vsldoi4 <2,6,4,7>, <2,6,4,7>
+ 2639497884U, // <6,4,7,3>: Cost 3 vsldoi4 <3,6,4,7>, <3,6,4,7>
+ 1237658832U, // <6,4,7,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235666638U, // <6,4,7,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 3713241753U, // <6,4,7,6>: Cost 4 vsldoi4 <3,6,4,7>, <6,4,7,0>
+ 2309409436U, // <6,4,7,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1235666641U, // <6,4,7,u>: Cost 2 vmrglw RHS, <2,3,4,u>
+ 2639503462U, // <6,4,u,0>: Cost 3 vsldoi4 <3,6,4,u>, LHS
+ 2639504282U, // <6,4,u,1>: Cost 3 vsldoi4 <3,6,4,u>, <1,2,3,4>
+ 3701303226U, // <6,4,u,2>: Cost 4 vsldoi4 <1,6,4,u>, <2,6,3,7>
+ 2639506077U, // <6,4,u,3>: Cost 3 vsldoi4 <3,6,4,u>, <3,6,4,u>
+ 1235676368U, // <6,4,u,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235674830U, // <6,4,u,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 1698713129U, // <6,4,u,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2309417628U, // <6,4,u,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1698713147U, // <6,4,u,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 3775832064U, // <6,5,0,0>: Cost 4 vsldoi8 <2,u,6,5>, <0,0,0,0>
+ 2702090342U, // <6,5,0,1>: Cost 3 vsldoi8 <2,u,6,5>, LHS
+ 3775832241U, // <6,5,0,2>: Cost 4 vsldoi8 <2,u,6,5>, <0,2,1,6>
+ 3719227906U, // <6,5,0,3>: Cost 4 vsldoi4 <4,6,5,0>, <3,4,5,6>
+ 3775832402U, // <6,5,0,4>: Cost 4 vsldoi8 <2,u,6,5>, <0,4,1,5>
+ 3385085146U, // <6,5,0,5>: Cost 4 vmrglw <4,u,6,0>, <4,4,5,5>
+ 2309351938U, // <6,5,0,6>: Cost 3 vmrglw <4,5,6,0>, <3,4,5,6>
+ 3376459134U, // <6,5,0,7>: Cost 5 vmrglw <3,4,6,0>, <4,6,5,7>
+ 2702090909U, // <6,5,0,u>: Cost 3 vsldoi8 <2,u,6,5>, LHS
+ 3719233546U, // <6,5,1,0>: Cost 4 vsldoi4 <4,6,5,1>, <0,0,1,1>
+ 3775832884U, // <6,5,1,1>: Cost 4 vsldoi8 <2,u,6,5>, <1,1,1,1>
+ 3775832982U, // <6,5,1,2>: Cost 4 vsldoi8 <2,u,6,5>, <1,2,3,0>
+ 3846196909U, // <6,5,1,3>: Cost 4 vsldoi12 <3,4,5,6>, <5,1,3,4>
+ 3719236984U, // <6,5,1,4>: Cost 4 vsldoi4 <4,6,5,1>, <4,6,5,1>
+ 3856150209U, // <6,5,1,5>: Cost 4 vsldoi12 <5,1,5,6>, <5,1,5,6>
+ 3834252997U, // <6,5,1,6>: Cost 4 vsldoi12 <1,4,5,6>, <5,1,6,1>
+ 3870084817U, // <6,5,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,1,7,4>
+ 3769861532U, // <6,5,1,u>: Cost 4 vsldoi8 <1,u,6,5>, <1,u,6,5>
+ 2645500006U, // <6,5,2,0>: Cost 3 vsldoi4 <4,6,5,2>, LHS
+ 3719242548U, // <6,5,2,1>: Cost 4 vsldoi4 <4,6,5,2>, <1,1,1,1>
+ 3775833704U, // <6,5,2,2>: Cost 4 vsldoi8 <2,u,6,5>, <2,2,2,2>
+ 3775833766U, // <6,5,2,3>: Cost 4 vsldoi8 <2,u,6,5>, <2,3,0,1>
+ 2645503353U, // <6,5,2,4>: Cost 3 vsldoi4 <4,6,5,2>, <4,6,5,2>
+ 2252296196U, // <6,5,2,5>: Cost 3 vmrghw <6,2,7,3>, <5,5,5,5>
+ 2702092218U, // <6,5,2,6>: Cost 3 vsldoi8 <2,u,6,5>, <2,6,3,7>
+ 3719246842U, // <6,5,2,7>: Cost 4 vsldoi4 <4,6,5,2>, <7,0,1,2>
+ 2702092405U, // <6,5,2,u>: Cost 3 vsldoi8 <2,u,6,5>, <2,u,6,5>
+ 3775834262U, // <6,5,3,0>: Cost 4 vsldoi8 <2,u,6,5>, <3,0,1,2>
+ 3777161495U, // <6,5,3,1>: Cost 4 vsldoi8 <3,1,6,5>, <3,1,6,5>
+ 3775834470U, // <6,5,3,2>: Cost 4 vsldoi8 <2,u,6,5>, <3,2,6,3>
+ 3775834524U, // <6,5,3,3>: Cost 4 vsldoi8 <2,u,6,5>, <3,3,3,3>
+ 3775834626U, // <6,5,3,4>: Cost 4 vsldoi8 <2,u,6,5>, <3,4,5,6>
+ 3385109722U, // <6,5,3,5>: Cost 4 vmrglw <4,u,6,3>, <4,4,5,5>
+ 2309376514U, // <6,5,3,6>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6>
+ 3775834819U, // <6,5,3,7>: Cost 4 vsldoi8 <2,u,6,5>, <3,7,0,1>
+ 2309376514U, // <6,5,3,u>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6>
+ 3719258214U, // <6,5,4,0>: Cost 4 vsldoi4 <4,6,5,4>, LHS
+ 3385117586U, // <6,5,4,1>: Cost 4 vmrglw <4,u,6,4>, <4,0,5,1>
+ 3327242008U, // <6,5,4,2>: Cost 4 vmrghw <6,4,5,6>, <5,2,6,3>
+ 3719260674U, // <6,5,4,3>: Cost 4 vsldoi4 <4,6,5,4>, <3,4,5,6>
+ 3719261563U, // <6,5,4,4>: Cost 4 vsldoi4 <4,6,5,4>, <4,6,5,4>
+ 2702093622U, // <6,5,4,5>: Cost 3 vsldoi8 <2,u,6,5>, RHS
+ 2309384706U, // <6,5,4,6>: Cost 3 vmrglw <4,5,6,4>, <3,4,5,6>
+ 3870085060U, // <6,5,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,4,7,4>
+ 2702093865U, // <6,5,4,u>: Cost 3 vsldoi8 <2,u,6,5>, RHS
+ 3719266406U, // <6,5,5,0>: Cost 4 vsldoi4 <4,6,5,5>, LHS
+ 3789106889U, // <6,5,5,1>: Cost 4 vsldoi8 <5,1,6,5>, <5,1,6,5>
+ 3785789208U, // <6,5,5,2>: Cost 4 vsldoi8 <4,5,6,5>, <5,2,6,3>
+ 3373183950U, // <6,5,5,3>: Cost 4 vmrglw <2,u,6,5>, <6,u,5,3>
+ 2717355964U, // <6,5,5,4>: Cost 3 vsldoi8 <5,4,6,5>, <5,4,6,5>
+ 2791772164U, // <6,5,5,5>: Cost 3 vsldoi12 <6,6,6,6>, <5,5,5,5>
+ 2772455438U, // <6,5,5,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,5,6,6>
+ 3373183549U, // <6,5,5,7>: Cost 4 vmrglw <2,u,6,5>, <6,3,5,7>
+ 2720010496U, // <6,5,5,u>: Cost 3 vsldoi8 <5,u,6,5>, <5,u,6,5>
+ 2772455460U, // <6,5,6,0>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,0,1>
+ 2322008978U, // <6,5,6,1>: Cost 3 vmrglw <6,6,6,6>, <4,0,5,1>
+ 3840225335U, // <6,5,6,2>: Cost 4 vsldoi12 <2,4,5,6>, <5,6,2,2>
+ 2772455490U, // <6,5,6,3>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,3,4>
+ 2772455500U, // <6,5,6,4>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,4,5>
+ 2254901252U, // <6,5,6,5>: Cost 3 vmrghw <6,6,6,6>, <5,5,5,5>
+ 2772455520U, // <6,5,6,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,6,7>
+ 2785874024U, // <6,5,6,7>: Cost 3 vsldoi12 <5,6,7,6>, <5,6,7,6>
+ 2772455532U, // <6,5,6,u>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,u,1>
+ 2627625062U, // <6,5,7,0>: Cost 3 vsldoi4 <1,6,5,7>, LHS
+ 1235667858U, // <6,5,7,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309409278U, // <6,5,7,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309407659U, // <6,5,7,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2627628342U, // <6,5,7,4>: Cost 3 vsldoi4 <1,6,5,7>, RHS
+ 1235668186U, // <6,5,7,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235667458U, // <6,5,7,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309407987U, // <6,5,7,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235667460U, // <6,5,7,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 2627633254U, // <6,5,u,0>: Cost 3 vsldoi4 <1,6,5,u>, LHS
+ 1235676050U, // <6,5,u,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309417470U, // <6,5,u,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309415851U, // <6,5,u,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2627636534U, // <6,5,u,4>: Cost 3 vsldoi4 <1,6,5,u>, RHS
+ 1235676378U, // <6,5,u,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235675650U, // <6,5,u,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309416179U, // <6,5,u,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235675652U, // <6,5,u,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 2309352751U, // <6,6,0,0>: Cost 3 vmrglw <4,5,6,0>, <4,5,6,0>
+ 1650917478U, // <6,6,0,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 2250584570U, // <6,6,0,2>: Cost 3 vmrghw <6,0,2,1>, <6,2,7,3>
+ 3846197554U, // <6,6,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <6,0,3,1>
+ 2724659538U, // <6,6,0,4>: Cost 3 vsldoi8 <6,6,6,6>, <0,4,1,5>
+ 3725275225U, // <6,6,0,5>: Cost 4 vsldoi4 <5,6,6,0>, <5,6,6,0>
+ 2791772493U, // <6,6,0,6>: Cost 3 vsldoi12 <6,6,6,6>, <6,0,6,1>
+ 2309352758U, // <6,6,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS
+ 1650918045U, // <6,6,0,u>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 3325358368U, // <6,6,1,0>: Cost 4 vmrghw <6,1,7,1>, <6,0,1,1>
+ 2299406449U, // <6,6,1,1>: Cost 3 vmrglw <2,u,6,1>, <2,u,6,1>
+ 2724660118U, // <6,6,1,2>: Cost 3 vsldoi8 <6,6,6,6>, <1,2,3,0>
+ 3373148518U, // <6,6,1,3>: Cost 4 vmrglw <2,u,6,1>, <3,2,6,3>
+ 3834253712U, // <6,6,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <6,1,4,5>
+ 3373147953U, // <6,6,1,5>: Cost 4 vmrglw <2,u,6,1>, <2,4,6,5>
+ 2323297080U, // <6,6,1,6>: Cost 3 vmrglw <6,u,6,1>, <6,6,6,6>
+ 2299407670U, // <6,6,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 2299407671U, // <6,6,1,u>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 2252296489U, // <6,6,2,0>: Cost 3 vmrghw <6,2,7,3>, <6,0,2,1>
+ 3326038394U, // <6,6,2,1>: Cost 4 vmrghw <6,2,7,3>, <6,1,2,1>
+ 1178554874U, // <6,6,2,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2724660902U, // <6,6,2,3>: Cost 3 vsldoi8 <6,6,6,6>, <2,3,0,1>
+ 2252296817U, // <6,6,2,4>: Cost 3 vmrghw <6,2,7,3>, <6,4,2,5>
+ 3840741864U, // <6,6,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <6,2,5,3>
+ 2252296976U, // <6,6,2,6>: Cost 3 vmrghw <6,2,7,3>, <6,6,2,2>
+ 2785874426U, // <6,6,2,7>: Cost 3 vsldoi12 <5,6,7,6>, <6,2,7,3>
+ 1178554874U, // <6,6,2,u>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2724661398U, // <6,6,3,0>: Cost 3 vsldoi8 <6,6,6,6>, <3,0,1,2>
+ 3375154665U, // <6,6,3,1>: Cost 4 vmrglw <3,2,6,3>, <2,0,6,1>
+ 3375154909U, // <6,6,3,2>: Cost 4 vmrglw <3,2,6,3>, <2,3,6,2>
+ 2301413734U, // <6,6,3,3>: Cost 3 vmrglw <3,2,6,3>, <3,2,6,3>
+ 2772455986U, // <6,6,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <6,3,4,5>
+ 3375154993U, // <6,6,3,5>: Cost 4 vmrglw <3,2,6,3>, <2,4,6,5>
+ 2323313464U, // <6,6,3,6>: Cost 3 vmrglw <6,u,6,3>, <6,6,6,6>
+ 2301414710U, // <6,6,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 2301414711U, // <6,6,3,u>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 2724662162U, // <6,6,4,0>: Cost 3 vsldoi8 <6,6,6,6>, <4,0,5,1>
+ 3326939559U, // <6,6,4,1>: Cost 4 vmrghw <6,4,1,5>, <6,1,7,1>
+ 2253271546U, // <6,6,4,2>: Cost 3 vmrghw <6,4,2,5>, <6,2,7,3>
+ 3383127346U, // <6,6,4,3>: Cost 4 vmrglw <4,5,6,4>, <4,5,6,3>
+ 2309385523U, // <6,6,4,4>: Cost 3 vmrglw <4,5,6,4>, <4,5,6,4>
+ 1650920758U, // <6,6,4,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 2724662653U, // <6,6,4,6>: Cost 3 vsldoi8 <6,6,6,6>, <4,6,5,6>
+ 2309385526U, // <6,6,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS
+ 1650921001U, // <6,6,4,u>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 3725312102U, // <6,6,5,0>: Cost 4 vsldoi4 <5,6,6,5>, LHS
+ 3373180393U, // <6,6,5,1>: Cost 4 vmrglw <2,u,6,5>, <2,0,6,1>
+ 3791769368U, // <6,6,5,2>: Cost 4 vsldoi8 <5,5,6,6>, <5,2,6,3>
+ 3373181286U, // <6,6,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,6,3>
+ 3725315382U, // <6,6,5,4>: Cost 4 vsldoi4 <5,6,6,5>, RHS
+ 2299439221U, // <6,6,5,5>: Cost 3 vmrglw <2,u,6,5>, <2,u,6,5>
+ 2724663394U, // <6,6,5,6>: Cost 3 vsldoi8 <6,6,6,6>, <5,6,7,0>
+ 2299440438U, // <6,6,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 2299440439U, // <6,6,5,u>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 1583808614U, // <6,6,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 2322010445U, // <6,6,6,1>: Cost 3 vmrglw <6,6,6,6>, <6,0,6,1>
+ 2254574074U, // <6,6,6,2>: Cost 3 vmrghw <6,6,2,2>, <6,2,7,3>
+ 2322010609U, // <6,6,6,3>: Cost 3 vmrglw <6,6,6,6>, <6,2,6,3>
+ 1583811894U, // <6,6,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 2322010773U, // <6,6,6,5>: Cost 3 vmrglw <6,6,6,6>, <6,4,6,5>
+ 363253046U, // <6,6,6,6>: Cost 1 vspltisw2 RHS
+ 1248267574U, // <6,6,6,7>: Cost 2 vmrglw <6,6,6,6>, RHS
+ 363253046U, // <6,6,6,u>: Cost 1 vspltisw2 RHS
+ 2309410095U, // <6,6,7,0>: Cost 3 vmrglw RHS, <4,5,6,0>
+ 2309408233U, // <6,6,7,1>: Cost 3 vmrglw RHS, <2,0,6,1>
+ 2311402373U, // <6,6,7,2>: Cost 3 vmrglw RHS, <6,7,6,2>
+ 2309409126U, // <6,6,7,3>: Cost 3 vmrglw RHS, <3,2,6,3>
+ 2309410099U, // <6,6,7,4>: Cost 3 vmrglw RHS, <4,5,6,4>
+ 2309408561U, // <6,6,7,5>: Cost 3 vmrglw RHS, <2,4,6,5>
+ 1237660472U, // <6,6,7,6>: Cost 2 vmrglw RHS, <6,6,6,6>
+ 161926454U, // <6,6,7,7>: Cost 1 vmrglw RHS, RHS
+ 161926455U, // <6,6,7,u>: Cost 1 vmrglw RHS, RHS
+ 1583808614U, // <6,6,u,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 1650923310U, // <6,6,u,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 1178554874U, // <6,6,u,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2309417318U, // <6,6,u,3>: Cost 3 vmrglw RHS, <3,2,6,3>
+ 1583811894U, // <6,6,u,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 1650923674U, // <6,6,u,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 363253046U, // <6,6,u,6>: Cost 1 vspltisw2 RHS
+ 161934646U, // <6,6,u,7>: Cost 1 vmrglw RHS, RHS
+ 161934647U, // <6,6,u,u>: Cost 1 vmrglw RHS, RHS
+ 1638318080U, // <6,7,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564576358U, // <6,7,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712060077U, // <6,7,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2712060156U, // <6,7,0,3>: Cost 3 vsldoi8 RHS, <0,3,1,0>
+ 1638318418U, // <6,7,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1577865314U, // <6,7,0,5>: Cost 2 vsldoi4 <5,6,7,0>, <5,6,7,0>
+ 2712060406U, // <6,7,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2651608058U, // <6,7,0,7>: Cost 3 vsldoi4 <5,6,7,0>, <7,0,1,2>
+ 564576925U, // <6,7,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 2712060643U, // <6,7,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1>
+ 1638318900U, // <6,7,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1638318998U, // <6,7,1,2>: Cost 2 vsldoi8 RHS, <1,2,3,0>
+ 3766559753U, // <6,7,1,3>: Cost 4 vsldoi8 <1,3,6,7>, <1,3,6,7>
+ 2712060971U, // <6,7,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5>
+ 2712061039U, // <6,7,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2712061135U, // <6,7,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7>
+ 3373148612U, // <6,7,1,7>: Cost 4 vmrglw <2,u,6,1>, <3,3,7,7>
+ 1638319484U, // <6,7,1,u>: Cost 2 vsldoi8 RHS, <1,u,3,0>
+ 2712061373U, // <6,7,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 2712061471U, // <6,7,2,1>: Cost 3 vsldoi8 RHS, <2,1,3,1>
+ 1638319720U, // <6,7,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638319782U, // <6,7,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712061709U, // <6,7,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 2712061800U, // <6,7,2,5>: Cost 3 vsldoi8 RHS, <2,5,3,6>
+ 1638320058U, // <6,7,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7>
+ 2252297836U, // <6,7,2,7>: Cost 3 vmrghw <6,2,7,3>, <7,7,7,7>
+ 1638320187U, // <6,7,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1>
+ 1638320278U, // <6,7,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712062182U, // <6,7,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2712062256U, // <6,7,3,2>: Cost 3 vsldoi8 RHS, <3,2,0,3>
+ 1638320540U, // <6,7,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638320642U, // <6,7,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712062546U, // <6,7,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 2712062584U, // <6,7,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7>
+ 2712062659U, // <6,7,3,7>: Cost 3 vsldoi8 RHS, <3,7,0,1>
+ 1638320926U, // <6,7,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638321042U, // <6,7,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712062922U, // <6,7,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712063029U, // <6,7,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2712063108U, // <6,7,4,3>: Cost 3 vsldoi8 RHS, <4,3,5,0>
+ 1638321360U, // <6,7,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564579638U, // <6,7,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712063357U, // <6,7,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,6>
+ 2712063439U, // <6,7,4,7>: Cost 3 vsldoi8 RHS, <4,7,5,7>
+ 564579881U, // <6,7,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 2712063560U, // <6,7,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2>
+ 2714054287U, // <6,7,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712063742U, // <6,7,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 3373181295U, // <6,7,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,7,3>
+ 2712063924U, // <6,7,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6>
+ 1638322180U, // <6,7,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1638322274U, // <6,7,5,6>: Cost 2 vsldoi8 RHS, <5,6,7,0>
+ 3373181380U, // <6,7,5,7>: Cost 4 vmrglw <2,u,6,5>, <3,3,7,7>
+ 1640313092U, // <6,7,5,u>: Cost 2 vsldoi8 RHS, <5,u,7,0>
+ 2712064289U, // <6,7,6,0>: Cost 3 vsldoi8 RHS, <6,0,1,2>
+ 2712064423U, // <6,7,6,1>: Cost 3 vsldoi8 RHS, <6,1,7,1>
+ 1638322682U, // <6,7,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 2712064562U, // <6,7,6,3>: Cost 3 vsldoi8 RHS, <6,3,4,5>
+ 2712064653U, // <6,7,6,4>: Cost 3 vsldoi8 RHS, <6,4,5,6>
+ 2712064747U, // <6,7,6,5>: Cost 3 vsldoi8 RHS, <6,5,7,1>
+ 1638323000U, // <6,7,6,6>: Cost 2 vsldoi8 RHS, <6,6,6,6>
+ 1638323022U, // <6,7,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 1638323168U, // <6,7,6,u>: Cost 2 vsldoi8 RHS, <6,u,7,3>
+ 1237659746U, // <6,7,7,0>: Cost 2 vmrglw RHS, <5,6,7,0>
+ 2309411158U, // <6,7,7,1>: Cost 3 vmrglw RHS, <6,0,7,1>
+ 2639718330U, // <6,7,7,2>: Cost 3 vsldoi4 <3,6,7,7>, <2,6,3,7>
+ 1235669498U, // <6,7,7,3>: Cost 2 vmrglw RHS, <6,2,7,3>
+ 1237659750U, // <6,7,7,4>: Cost 2 vmrglw RHS, <5,6,7,4>
+ 2309411243U, // <6,7,7,5>: Cost 3 vmrglw RHS, <6,1,7,5>
+ 1583895362U, // <6,7,7,6>: Cost 2 vsldoi4 <6,6,7,7>, <6,6,7,7>
+ 1235669826U, // <6,7,7,7>: Cost 2 vmrglw RHS, <6,6,7,7>
+ 1235669503U, // <6,7,7,u>: Cost 2 vmrglw RHS, <6,2,7,u>
+ 1638323923U, // <6,7,u,0>: Cost 2 vsldoi8 RHS, <u,0,1,2>
+ 564582190U, // <6,7,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1638324101U, // <6,7,u,2>: Cost 2 vsldoi8 RHS, <u,2,3,0>
+ 1638324156U, // <6,7,u,3>: Cost 2 vsldoi8 RHS, <u,3,0,1>
+ 1638324287U, // <6,7,u,4>: Cost 2 vsldoi8 RHS, <u,4,5,6>
+ 564582554U, // <6,7,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 1638324432U, // <6,7,u,6>: Cost 2 vsldoi8 RHS, <u,6,3,7>
+ 1235678018U, // <6,7,u,7>: Cost 2 vmrglw RHS, <6,6,7,7>
+ 564582757U, // <6,7,u,u>: Cost 1 vsldoi8 RHS, LHS
+ 1638326272U, // <6,u,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564584550U, // <6,u,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712068269U, // <6,u,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2309349532U, // <6,u,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 1638326610U, // <6,u,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1577939051U, // <6,u,0,5>: Cost 2 vsldoi4 <5,6,u,0>, <5,6,u,0>
+ 2712068598U, // <6,u,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2309352776U, // <6,u,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS
+ 564585117U, // <6,u,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 2712068835U, // <6,u,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1>
+ 1638327092U, // <6,u,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1698715438U, // <6,u,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2299404444U, // <6,u,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 2712069163U, // <6,u,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5>
+ 2712069231U, // <6,u,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2712069327U, // <6,u,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7>
+ 2299407688U, // <6,u,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 1698715492U, // <6,u,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2712069565U, // <6,u,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 1178556206U, // <6,u,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 1638327912U, // <6,u,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638327974U, // <6,u,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712069901U, // <6,u,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 1178556570U, // <6,u,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 1638328250U, // <6,u,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7>
+ 2252298496U, // <6,u,2,7>: Cost 3 vmrghw <6,2,7,3>, <u,7,0,1>
+ 1638328379U, // <6,u,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1>
+ 1638328470U, // <6,u,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712070374U, // <6,u,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2704107883U, // <6,u,3,2>: Cost 3 vsldoi8 <3,2,6,u>, <3,2,6,u>
+ 1638328732U, // <6,u,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638328834U, // <6,u,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712070738U, // <6,u,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 2712070776U, // <6,u,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7>
+ 2301414728U, // <6,u,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 1638329118U, // <6,u,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638329234U, // <6,u,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712071114U, // <6,u,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712071221U, // <6,u,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2309382300U, // <6,u,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS
+ 1638329552U, // <6,u,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564587831U, // <6,u,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712071545U, // <6,u,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,2>
+ 2309385544U, // <6,u,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS
+ 564588073U, // <6,u,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 2712071752U, // <6,u,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2>
+ 2714062479U, // <6,u,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712071934U, // <6,u,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 2299437212U, // <6,u,5,3>: Cost 3 vmrglw <2,u,6,5>, LHS
+ 2712072116U, // <6,u,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6>
+ 1638330372U, // <6,u,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1698715802U, // <6,u,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2299440456U, // <6,u,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 1698715820U, // <6,u,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 1583808614U, // <6,u,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 1181161262U, // <6,u,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 1638330874U, // <6,u,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 1248264348U, // <6,u,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 1583811894U, // <6,u,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 1181161626U, // <6,u,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 363253046U, // <6,u,6,6>: Cost 1 vspltisw2 RHS
+ 1638331214U, // <6,u,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 363253046U, // <6,u,6,u>: Cost 1 vspltisw2 RHS
+ 1560076390U, // <6,u,7,0>: Cost 2 vsldoi4 <2,6,u,7>, LHS
+ 1235664969U, // <6,u,7,1>: Cost 2 vmrglw RHS, <0,0,u,1>
+ 1560078311U, // <6,u,7,2>: Cost 2 vsldoi4 <2,6,u,7>, <2,6,u,7>
+ 161923228U, // <6,u,7,3>: Cost 1 vmrglw RHS, LHS
+ 1560079670U, // <6,u,7,4>: Cost 2 vsldoi4 <2,6,u,7>, RHS
+ 1235665297U, // <6,u,7,5>: Cost 2 vmrglw RHS, <0,4,u,5>
+ 1235667485U, // <6,u,7,6>: Cost 2 vmrglw RHS, <3,4,u,6>
+ 161926472U, // <6,u,7,7>: Cost 1 vmrglw RHS, RHS
+ 161923233U, // <6,u,7,u>: Cost 1 vmrglw RHS, LHS
+ 1560084582U, // <6,u,u,0>: Cost 2 vsldoi4 <2,6,u,u>, LHS
+ 564590382U, // <6,u,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1560086504U, // <6,u,u,2>: Cost 2 vsldoi4 <2,6,u,u>, <2,6,u,u>
+ 161931420U, // <6,u,u,3>: Cost 1 vmrglw RHS, LHS
+ 1560087862U, // <6,u,u,4>: Cost 2 vsldoi4 <2,6,u,u>, RHS
+ 564590746U, // <6,u,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 363253046U, // <6,u,u,6>: Cost 1 vspltisw2 RHS
+ 161934664U, // <6,u,u,7>: Cost 1 vmrglw RHS, RHS
+ 161931425U, // <6,u,u,u>: Cost 1 vmrglw RHS, LHS
+ 1705426944U, // <7,0,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0>
+ 1705426954U, // <7,0,0,1>: Cost 2 vsldoi12 RHS, <0,0,1,1>
+ 3713550266U, // <7,0,0,2>: Cost 4 vsldoi4 <3,7,0,0>, <2,6,3,7>
+ 2316063892U, // <7,0,0,3>: Cost 3 vmrglw <5,6,7,0>, <7,2,0,3>
+ 2779168805U, // <7,0,0,4>: Cost 3 vsldoi12 RHS, <0,0,4,1>
+ 2663698530U, // <7,0,0,5>: Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0>
+ 2657727309U, // <7,0,0,6>: Cost 3 vsldoi4 <6,7,0,0>, <6,7,0,0>
+ 2316064220U, // <7,0,0,7>: Cost 3 vmrglw <5,6,7,0>, <7,6,0,7>
+ 1705427017U, // <7,0,0,u>: Cost 2 vsldoi12 RHS, <0,0,u,1>
+ 1583988838U, // <7,0,1,0>: Cost 2 vsldoi4 <6,7,0,1>, LHS
+ 2779168859U, // <7,0,1,1>: Cost 3 vsldoi12 RHS, <0,1,1,1>
+ 631685222U, // <7,0,1,2>: Cost 1 vsldoi12 RHS, LHS
+ 2639817411U, // <7,0,1,3>: Cost 3 vsldoi4 <3,7,0,1>, <3,7,0,1>
+ 1583992118U, // <7,0,1,4>: Cost 2 vsldoi4 <6,7,0,1>, RHS
+ 2657734660U, // <7,0,1,5>: Cost 3 vsldoi4 <6,7,0,1>, <5,5,5,5>
+ 1583993678U, // <7,0,1,6>: Cost 2 vsldoi4 <6,7,0,1>, <6,7,0,1>
+ 2657735672U, // <7,0,1,7>: Cost 3 vsldoi4 <6,7,0,1>, <7,0,1,0>
+ 631685276U, // <7,0,1,u>: Cost 1 vsldoi12 RHS, LHS
+ 2779168933U, // <7,0,2,0>: Cost 3 vsldoi12 RHS, <0,2,0,3>
+ 2767667377U, // <7,0,2,1>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,1,6>
+ 2718713448U, // <7,0,2,2>: Cost 3 vsldoi8 <5,6,7,0>, <2,2,2,2>
+ 2718713510U, // <7,0,2,3>: Cost 3 vsldoi8 <5,6,7,0>, <2,3,0,1>
+ 3841409228U, // <7,0,2,4>: Cost 4 vsldoi12 <2,6,3,7>, <0,2,4,6>
+ 3852910802U, // <7,0,2,5>: Cost 4 vsldoi12 RHS, <0,2,5,3>
+ 2718713786U, // <7,0,2,6>: Cost 3 vsldoi8 <5,6,7,0>, <2,6,3,7>
+ 3847160036U, // <7,0,2,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,2,7,3>
+ 2767667440U, // <7,0,2,u>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,u,6>
+ 2718714006U, // <7,0,3,0>: Cost 3 vsldoi8 <5,6,7,0>, <3,0,1,2>
+ 2779169020U, // <7,0,3,1>: Cost 3 vsldoi12 RHS, <0,3,1,0>
+ 3852910853U, // <7,0,3,2>: Cost 4 vsldoi12 RHS, <0,3,2,0>
+ 2718714268U, // <7,0,3,3>: Cost 3 vsldoi8 <5,6,7,0>, <3,3,3,3>
+ 2718714370U, // <7,0,3,4>: Cost 3 vsldoi8 <5,6,7,0>, <3,4,5,6>
+ 2718714461U, // <7,0,3,5>: Cost 3 vsldoi8 <5,6,7,0>, <3,5,6,7>
+ 2706770608U, // <7,0,3,6>: Cost 3 vsldoi8 <3,6,7,0>, <3,6,7,0>
+ 3847160114U, // <7,0,3,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,3,7,0>
+ 2779169083U, // <7,0,3,u>: Cost 3 vsldoi12 RHS, <0,3,u,0>
+ 2718714770U, // <7,0,4,0>: Cost 3 vsldoi8 <5,6,7,0>, <4,0,5,1>
+ 1705427282U, // <7,0,4,1>: Cost 2 vsldoi12 RHS, <0,4,1,5>
+ 3713583034U, // <7,0,4,2>: Cost 4 vsldoi4 <3,7,0,4>, <2,6,3,7>
+ 3713583814U, // <7,0,4,3>: Cost 4 vsldoi4 <3,7,0,4>, <3,7,0,4>
+ 2779169133U, // <7,0,4,4>: Cost 3 vsldoi12 RHS, <0,4,4,5>
+ 1644973366U, // <7,0,4,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS
+ 2657760081U, // <7,0,4,6>: Cost 3 vsldoi4 <6,7,0,4>, <6,7,0,4>
+ 2259468868U, // <7,0,4,7>: Cost 3 vmrghw <7,4,5,6>, <0,7,1,4>
+ 1705427345U, // <7,0,4,u>: Cost 2 vsldoi12 RHS, <0,4,u,5>
+ 2718715508U, // <7,0,5,0>: Cost 3 vsldoi8 <5,6,7,0>, <5,0,6,1>
+ 2260123750U, // <7,0,5,1>: Cost 3 vmrghw <7,5,5,5>, LHS
+ 3792457451U, // <7,0,5,2>: Cost 4 vsldoi8 <5,6,7,0>, <5,2,1,3>
+ 3852911024U, // <7,0,5,3>: Cost 4 vsldoi12 RHS, <0,5,3,0>
+ 2718715836U, // <7,0,5,4>: Cost 3 vsldoi8 <5,6,7,0>, <5,4,6,5>
+ 2718715908U, // <7,0,5,5>: Cost 3 vsldoi8 <5,6,7,0>, <5,5,5,5>
+ 1644974178U, // <7,0,5,6>: Cost 2 vsldoi8 <5,6,7,0>, <5,6,7,0>
+ 3792457853U, // <7,0,5,7>: Cost 4 vsldoi8 <5,6,7,0>, <5,7,1,0>
+ 1646301444U, // <7,0,5,u>: Cost 2 vsldoi8 <5,u,7,0>, <5,u,7,0>
+ 2720706901U, // <7,0,6,0>: Cost 3 vsldoi8 <6,0,7,0>, <6,0,7,0>
+ 2779169270U, // <7,0,6,1>: Cost 3 vsldoi12 RHS, <0,6,1,7>
+ 2718716410U, // <7,0,6,2>: Cost 3 vsldoi8 <5,6,7,0>, <6,2,7,3>
+ 2722697800U, // <7,0,6,3>: Cost 3 vsldoi8 <6,3,7,0>, <6,3,7,0>
+ 3852911121U, // <7,0,6,4>: Cost 4 vsldoi12 RHS, <0,6,4,7>
+ 3852911130U, // <7,0,6,5>: Cost 4 vsldoi12 RHS, <0,6,5,7>
+ 2718716728U, // <7,0,6,6>: Cost 3 vsldoi8 <5,6,7,0>, <6,6,6,6>
+ 2718716750U, // <7,0,6,7>: Cost 3 vsldoi8 <5,6,7,0>, <6,7,0,1>
+ 2779169333U, // <7,0,6,u>: Cost 3 vsldoi12 RHS, <0,6,u,7>
+ 2718716922U, // <7,0,7,0>: Cost 3 vsldoi8 <5,6,7,0>, <7,0,1,2>
+ 1187872870U, // <7,0,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 2718717076U, // <7,0,7,2>: Cost 3 vsldoi8 <5,6,7,0>, <7,2,0,3>
+ 3847160408U, // <7,0,7,3>: Cost 4 vsldoi12 <3,6,0,7>, <0,7,3,6>
+ 2718717286U, // <7,0,7,4>: Cost 3 vsldoi8 <5,6,7,0>, <7,4,5,6>
+ 2718717377U, // <7,0,7,5>: Cost 3 vsldoi8 <5,6,7,0>, <7,5,6,7>
+ 2718717404U, // <7,0,7,6>: Cost 3 vsldoi8 <5,6,7,0>, <7,6,0,7>
+ 2718717478U, // <7,0,7,7>: Cost 3 vsldoi8 <5,6,7,0>, <7,7,0,0>
+ 1187873437U, // <7,0,7,u>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 1584046182U, // <7,0,u,0>: Cost 2 vsldoi4 <6,7,0,u>, LHS
+ 1705427602U, // <7,0,u,1>: Cost 2 vsldoi12 RHS, <0,u,1,1>
+ 631685789U, // <7,0,u,2>: Cost 1 vsldoi12 RHS, LHS
+ 2639874762U, // <7,0,u,3>: Cost 3 vsldoi4 <3,7,0,u>, <3,7,0,u>
+ 1584049462U, // <7,0,u,4>: Cost 2 vsldoi4 <6,7,0,u>, RHS
+ 1644976282U, // <7,0,u,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS
+ 1584051029U, // <7,0,u,6>: Cost 2 vsldoi4 <6,7,0,u>, <6,7,0,u>
+ 2718718208U, // <7,0,u,7>: Cost 3 vsldoi8 <5,6,7,0>, <u,7,0,1>
+ 631685843U, // <7,0,u,u>: Cost 1 vsldoi12 RHS, LHS
+ 2721374218U, // <7,1,0,0>: Cost 3 vsldoi8 <6,1,7,1>, <0,0,1,1>
+ 2779169507U, // <7,1,0,1>: Cost 3 vsldoi12 RHS, <1,0,1,1>
+ 2779169516U, // <7,1,0,2>: Cost 3 vsldoi12 RHS, <1,0,2,1>
+ 3852911348U, // <7,1,0,3>: Cost 4 vsldoi12 RHS, <1,0,3,0>
+ 2669743414U, // <7,1,0,4>: Cost 3 vsldoi4 <u,7,1,0>, RHS
+ 2316058962U, // <7,1,0,5>: Cost 3 vmrglw <5,6,7,0>, <0,4,1,5>
+ 2316059044U, // <7,1,0,6>: Cost 3 vmrglw <5,6,7,0>, <0,5,1,6>
+ 2669745146U, // <7,1,0,7>: Cost 3 vsldoi4 <u,7,1,0>, <7,0,1,2>
+ 2779169570U, // <7,1,0,u>: Cost 3 vsldoi12 RHS, <1,0,u,1>
+ 2779169579U, // <7,1,1,0>: Cost 3 vsldoi12 RHS, <1,1,0,1>
+ 1705427764U, // <7,1,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779169598U, // <7,1,1,2>: Cost 3 vsldoi12 RHS, <1,1,2,2>
+ 3713632972U, // <7,1,1,3>: Cost 4 vsldoi4 <3,7,1,1>, <3,7,1,1>
+ 2779169619U, // <7,1,1,4>: Cost 3 vsldoi12 RHS, <1,1,4,5>
+ 2779169628U, // <7,1,1,5>: Cost 3 vsldoi12 RHS, <1,1,5,5>
+ 2657809239U, // <7,1,1,6>: Cost 3 vsldoi4 <6,7,1,1>, <6,7,1,1>
+ 3835290474U, // <7,1,1,7>: Cost 4 vsldoi12 <1,6,1,7>, <1,1,7,1>
+ 1705427764U, // <7,1,1,u>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779169660U, // <7,1,2,0>: Cost 3 vsldoi12 RHS, <1,2,0,1>
+ 2779169671U, // <7,1,2,1>: Cost 3 vsldoi12 RHS, <1,2,1,3>
+ 2779169680U, // <7,1,2,2>: Cost 3 vsldoi12 RHS, <1,2,2,3>
+ 1705427862U, // <7,1,2,3>: Cost 2 vsldoi12 RHS, <1,2,3,0>
+ 2779169700U, // <7,1,2,4>: Cost 3 vsldoi12 RHS, <1,2,4,5>
+ 2779169707U, // <7,1,2,5>: Cost 3 vsldoi12 RHS, <1,2,5,3>
+ 2657817432U, // <7,1,2,6>: Cost 3 vsldoi4 <6,7,1,2>, <6,7,1,2>
+ 2803057594U, // <7,1,2,7>: Cost 3 vsldoi12 RHS, <1,2,7,0>
+ 1705427907U, // <7,1,2,u>: Cost 2 vsldoi12 RHS, <1,2,u,0>
+ 3776538827U, // <7,1,3,0>: Cost 4 vsldoi8 <3,0,7,1>, <3,0,7,1>
+ 2319400970U, // <7,1,3,1>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,1>
+ 2316085398U, // <7,1,3,2>: Cost 3 vmrglw <5,6,7,3>, <3,0,1,2>
+ 3852911591U, // <7,1,3,3>: Cost 4 vsldoi12 RHS, <1,3,3,0>
+ 3852911600U, // <7,1,3,4>: Cost 4 vsldoi12 RHS, <1,3,4,0>
+ 2319401298U, // <7,1,3,5>: Cost 3 vmrglw <6,2,7,3>, <0,4,1,5>
+ 3833668617U, // <7,1,3,6>: Cost 4 vsldoi12 <1,3,6,7>, <1,3,6,7>
+ 3367265487U, // <7,1,3,7>: Cost 4 vmrglw <1,u,7,3>, <1,6,1,7>
+ 2319400977U, // <7,1,3,u>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,u>
+ 2724031378U, // <7,1,4,0>: Cost 3 vsldoi8 <6,5,7,1>, <4,0,5,1>
+ 2779169835U, // <7,1,4,1>: Cost 3 vsldoi12 RHS, <1,4,1,5>
+ 2779169844U, // <7,1,4,2>: Cost 3 vsldoi12 RHS, <1,4,2,5>
+ 3852911672U, // <7,1,4,3>: Cost 4 vsldoi12 RHS, <1,4,3,0>
+ 2669776182U, // <7,1,4,4>: Cost 3 vsldoi4 <u,7,1,4>, RHS
+ 2779169872U, // <7,1,4,5>: Cost 3 vsldoi12 RHS, <1,4,5,6>
+ 3835290712U, // <7,1,4,6>: Cost 4 vsldoi12 <1,6,1,7>, <1,4,6,5>
+ 2669778278U, // <7,1,4,7>: Cost 3 vsldoi4 <u,7,1,4>, <7,4,5,6>
+ 2779169898U, // <7,1,4,u>: Cost 3 vsldoi12 RHS, <1,4,u,5>
+ 2779169903U, // <7,1,5,0>: Cost 3 vsldoi12 RHS, <1,5,0,1>
+ 3835585661U, // <7,1,5,1>: Cost 4 vsldoi12 <1,6,5,7>, <1,5,1,6>
+ 3841410182U, // <7,1,5,2>: Cost 4 vsldoi12 <2,6,3,7>, <1,5,2,6>
+ 3852911753U, // <7,1,5,3>: Cost 4 vsldoi12 RHS, <1,5,3,0>
+ 2779169943U, // <7,1,5,4>: Cost 3 vsldoi12 RHS, <1,5,4,5>
+ 2318754130U, // <7,1,5,5>: Cost 3 vmrglw <6,1,7,5>, <0,4,1,5>
+ 2718724195U, // <7,1,5,6>: Cost 3 vsldoi8 <5,6,7,1>, <5,6,7,1>
+ 3859178670U, // <7,1,5,7>: Cost 4 vsldoi12 <5,6,1,7>, <1,5,7,1>
+ 2779169975U, // <7,1,5,u>: Cost 3 vsldoi12 RHS, <1,5,u,1>
+ 2720715094U, // <7,1,6,0>: Cost 3 vsldoi8 <6,0,7,1>, <6,0,7,1>
+ 2761549007U, // <7,1,6,1>: Cost 3 vsldoi12 <1,6,1,7>, <1,6,1,7>
+ 2779170008U, // <7,1,6,2>: Cost 3 vsldoi12 RHS, <1,6,2,7>
+ 3835438305U, // <7,1,6,3>: Cost 4 vsldoi12 <1,6,3,7>, <1,6,3,7>
+ 3835512042U, // <7,1,6,4>: Cost 4 vsldoi12 <1,6,4,7>, <1,6,4,7>
+ 2761843955U, // <7,1,6,5>: Cost 3 vsldoi12 <1,6,5,7>, <1,6,5,7>
+ 3835659516U, // <7,1,6,6>: Cost 4 vsldoi12 <1,6,6,7>, <1,6,6,7>
+ 2803057918U, // <7,1,6,7>: Cost 3 vsldoi12 RHS, <1,6,7,0>
+ 2762065166U, // <7,1,6,u>: Cost 3 vsldoi12 <1,6,u,7>, <1,6,u,7>
+ 2669797478U, // <7,1,7,0>: Cost 3 vsldoi4 <u,7,1,7>, LHS
+ 2322087946U, // <7,1,7,1>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,1>
+ 2317448186U, // <7,1,7,2>: Cost 3 vmrglw <5,u,7,7>, <7,0,1,2>
+ 3395829934U, // <7,1,7,3>: Cost 4 vmrglw <6,6,7,7>, <0,2,1,3>
+ 2669800758U, // <7,1,7,4>: Cost 3 vsldoi4 <u,7,1,7>, RHS
+ 2322088274U, // <7,1,7,5>: Cost 3 vmrglw <6,6,7,7>, <0,4,1,5>
+ 3375923377U, // <7,1,7,6>: Cost 4 vmrglw <3,3,7,7>, <0,2,1,6>
+ 2731996780U, // <7,1,7,7>: Cost 3 vsldoi8 <7,u,7,1>, <7,7,7,7>
+ 2322087953U, // <7,1,7,u>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,u>
+ 2779170146U, // <7,1,u,0>: Cost 3 vsldoi12 RHS, <1,u,0,1>
+ 1705427764U, // <7,1,u,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779170164U, // <7,1,u,2>: Cost 3 vsldoi12 RHS, <1,u,2,1>
+ 1705428348U, // <7,1,u,3>: Cost 2 vsldoi12 RHS, <1,u,3,0>
+ 2779170186U, // <7,1,u,4>: Cost 3 vsldoi12 RHS, <1,u,4,5>
+ 2763171221U, // <7,1,u,5>: Cost 3 vsldoi12 <1,u,5,7>, <1,u,5,7>
+ 2657866590U, // <7,1,u,6>: Cost 3 vsldoi4 <6,7,1,u>, <6,7,1,u>
+ 2803058080U, // <7,1,u,7>: Cost 3 vsldoi12 RHS, <1,u,7,0>
+ 1705428393U, // <7,1,u,u>: Cost 2 vsldoi12 RHS, <1,u,u,0>
+ 3713695846U, // <7,2,0,0>: Cost 4 vsldoi4 <3,7,2,0>, LHS
+ 2779170237U, // <7,2,0,1>: Cost 3 vsldoi12 RHS, <2,0,1,2>
+ 2779170245U, // <7,2,0,2>: Cost 3 vsldoi12 RHS, <2,0,2,1>
+ 1242316902U, // <7,2,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 3713699126U, // <7,2,0,4>: Cost 4 vsldoi4 <3,7,2,0>, RHS
+ 3852912096U, // <7,2,0,5>: Cost 4 vsldoi12 RHS, <2,0,5,1>
+ 2767668713U, // <7,2,0,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,0,6,1>
+ 2256488426U, // <7,2,0,7>: Cost 3 vmrghw <7,0,1,2>, <2,7,0,1>
+ 1242316907U, // <7,2,0,u>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 3852912132U, // <7,2,1,0>: Cost 4 vsldoi12 RHS, <2,1,0,1>
+ 3852912141U, // <7,2,1,1>: Cost 4 vsldoi12 RHS, <2,1,1,1>
+ 3852912149U, // <7,2,1,2>: Cost 4 vsldoi12 RHS, <2,1,2,0>
+ 2779170335U, // <7,2,1,3>: Cost 3 vsldoi12 RHS, <2,1,3,1>
+ 3852912172U, // <7,2,1,4>: Cost 4 vsldoi12 RHS, <2,1,4,5>
+ 3840747062U, // <7,2,1,5>: Cost 5 vsldoi12 <2,5,3,7>, <2,1,5,6>
+ 3841410617U, // <7,2,1,6>: Cost 4 vsldoi12 <2,6,3,7>, <2,1,6,0>
+ 3795125538U, // <7,2,1,7>: Cost 4 vsldoi8 <6,1,7,2>, <1,7,2,0>
+ 2779170380U, // <7,2,1,u>: Cost 3 vsldoi12 RHS, <2,1,u,1>
+ 2779170389U, // <7,2,2,0>: Cost 3 vsldoi12 RHS, <2,2,0,1>
+ 3852912222U, // <7,2,2,1>: Cost 4 vsldoi12 RHS, <2,2,1,1>
+ 1705428584U, // <7,2,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1705428594U, // <7,2,2,3>: Cost 2 vsldoi12 RHS, <2,2,3,3>
+ 2779170429U, // <7,2,2,4>: Cost 3 vsldoi12 RHS, <2,2,4,5>
+ 3852912259U, // <7,2,2,5>: Cost 4 vsldoi12 RHS, <2,2,5,2>
+ 2767668880U, // <7,2,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,2,6,6>
+ 3841336981U, // <7,2,2,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,2,7,2>
+ 1705428639U, // <7,2,2,u>: Cost 2 vsldoi12 RHS, <2,2,u,3>
+ 1705428646U, // <7,2,3,0>: Cost 2 vsldoi12 RHS, <2,3,0,1>
+ 2779170479U, // <7,2,3,1>: Cost 3 vsldoi12 RHS, <2,3,1,1>
+ 2767668925U, // <7,2,3,2>: Cost 3 vsldoi12 <2,6,3,7>, <2,3,2,6>
+ 1245659238U, // <7,2,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS
+ 1705428686U, // <7,2,3,4>: Cost 2 vsldoi12 RHS, <2,3,4,5>
+ 2779170519U, // <7,2,3,5>: Cost 3 vsldoi12 RHS, <2,3,5,5>
+ 2657899362U, // <7,2,3,6>: Cost 3 vsldoi4 <6,7,2,3>, <6,7,2,3>
+ 2319406574U, // <7,2,3,7>: Cost 3 vmrglw <6,2,7,3>, <7,6,2,7>
+ 1705428718U, // <7,2,3,u>: Cost 2 vsldoi12 RHS, <2,3,u,1>
+ 3713728614U, // <7,2,4,0>: Cost 4 vsldoi4 <3,7,2,4>, LHS
+ 3852912388U, // <7,2,4,1>: Cost 4 vsldoi12 RHS, <2,4,1,5>
+ 2779170573U, // <7,2,4,2>: Cost 3 vsldoi12 RHS, <2,4,2,5>
+ 1242349670U, // <7,2,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 3713731894U, // <7,2,4,4>: Cost 4 vsldoi4 <3,7,2,4>, RHS
+ 2779170601U, // <7,2,4,5>: Cost 3 vsldoi12 RHS, <2,4,5,6>
+ 2767669041U, // <7,2,4,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,4,6,5>
+ 3389834456U, // <7,2,4,7>: Cost 4 vmrglw <5,6,7,4>, <1,6,2,7>
+ 1242349675U, // <7,2,4,u>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 3852912456U, // <7,2,5,0>: Cost 4 vsldoi12 RHS, <2,5,0,1>
+ 3852912466U, // <7,2,5,1>: Cost 4 vsldoi12 RHS, <2,5,1,2>
+ 3852912475U, // <7,2,5,2>: Cost 4 vsldoi12 RHS, <2,5,2,2>
+ 2779170664U, // <7,2,5,3>: Cost 3 vsldoi12 RHS, <2,5,3,6>
+ 3852912496U, // <7,2,5,4>: Cost 4 vsldoi12 RHS, <2,5,4,5>
+ 3792474116U, // <7,2,5,5>: Cost 4 vsldoi8 <5,6,7,2>, <5,5,5,5>
+ 2718732388U, // <7,2,5,6>: Cost 3 vsldoi8 <5,6,7,2>, <5,6,7,2>
+ 3841337228U, // <7,2,5,7>: Cost 5 vsldoi12 <2,6,2,7>, <2,5,7,6>
+ 2779170709U, // <7,2,5,u>: Cost 3 vsldoi12 RHS, <2,5,u,6>
+ 2640003174U, // <7,2,6,0>: Cost 3 vsldoi4 <3,7,2,6>, LHS
+ 2721386920U, // <7,2,6,1>: Cost 3 vsldoi8 <6,1,7,2>, <6,1,7,2>
+ 2767595441U, // <7,2,6,2>: Cost 3 vsldoi12 <2,6,2,7>, <2,6,2,7>
+ 1693927354U, // <7,2,6,3>: Cost 2 vsldoi12 <2,6,3,7>, <2,6,3,7>
+ 2640006454U, // <7,2,6,4>: Cost 3 vsldoi4 <3,7,2,6>, RHS
+ 3841558476U, // <7,2,6,5>: Cost 4 vsldoi12 <2,6,5,7>, <2,6,5,7>
+ 2657923941U, // <7,2,6,6>: Cost 3 vsldoi4 <6,7,2,6>, <6,7,2,6>
+ 3841337310U, // <7,2,6,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,6,7,7>
+ 1694296039U, // <7,2,6,u>: Cost 2 vsldoi12 <2,6,u,7>, <2,6,u,7>
+ 2803058666U, // <7,2,7,0>: Cost 3 vsldoi12 RHS, <2,7,0,1>
+ 3852912632U, // <7,2,7,1>: Cost 4 vsldoi12 RHS, <2,7,1,6>
+ 2322089576U, // <7,2,7,2>: Cost 3 vmrglw <6,6,7,7>, <2,2,2,2>
+ 1248346214U, // <7,2,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 3841337362U, // <7,2,7,4>: Cost 4 vsldoi12 <2,6,2,7>, <2,7,4,5>
+ 3395830836U, // <7,2,7,5>: Cost 4 vmrglw <6,6,7,7>, <1,4,2,5>
+ 2261616570U, // <7,2,7,6>: Cost 3 vmrghw <7,7,7,7>, <2,6,3,7>
+ 3371943857U, // <7,2,7,7>: Cost 4 vmrglw <2,6,7,7>, <2,6,2,7>
+ 1248346219U, // <7,2,7,u>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 1705429051U, // <7,2,u,0>: Cost 2 vsldoi12 RHS, <2,u,0,1>
+ 2779170884U, // <7,2,u,1>: Cost 3 vsldoi12 RHS, <2,u,1,1>
+ 1705428584U, // <7,2,u,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1695254620U, // <7,2,u,3>: Cost 2 vsldoi12 <2,u,3,7>, <2,u,3,7>
+ 1705429091U, // <7,2,u,4>: Cost 2 vsldoi12 RHS, <2,u,4,5>
+ 2779170924U, // <7,2,u,5>: Cost 3 vsldoi12 RHS, <2,u,5,5>
+ 2767669361U, // <7,2,u,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,u,6,1>
+ 2803058809U, // <7,2,u,7>: Cost 3 vsldoi12 RHS, <2,u,7,0>
+ 1695623305U, // <7,2,u,u>: Cost 2 vsldoi12 <2,u,u,7>, <2,u,u,7>
+ 2779170955U, // <7,3,0,0>: Cost 3 vsldoi12 RHS, <3,0,0,0>
+ 1705429142U, // <7,3,0,1>: Cost 2 vsldoi12 RHS, <3,0,1,2>
+ 2634057732U, // <7,3,0,2>: Cost 3 vsldoi4 <2,7,3,0>, <2,7,3,0>
+ 2779170983U, // <7,3,0,3>: Cost 3 vsldoi12 RHS, <3,0,3,1>
+ 2779170992U, // <7,3,0,4>: Cost 3 vsldoi12 RHS, <3,0,4,1>
+ 3852912829U, // <7,3,0,5>: Cost 4 vsldoi12 RHS, <3,0,5,5>
+ 2657948520U, // <7,3,0,6>: Cost 3 vsldoi4 <6,7,3,0>, <6,7,3,0>
+ 2316060602U, // <7,3,0,7>: Cost 3 vmrglw <5,6,7,0>, <2,6,3,7>
+ 1705429205U, // <7,3,0,u>: Cost 2 vsldoi12 RHS, <3,0,u,2>
+ 3852912860U, // <7,3,1,0>: Cost 4 vsldoi12 RHS, <3,1,0,0>
+ 2779171046U, // <7,3,1,1>: Cost 3 vsldoi12 RHS, <3,1,1,1>
+ 2779171057U, // <7,3,1,2>: Cost 3 vsldoi12 RHS, <3,1,2,3>
+ 3852912887U, // <7,3,1,3>: Cost 4 vsldoi12 RHS, <3,1,3,0>
+ 3852912896U, // <7,3,1,4>: Cost 4 vsldoi12 RHS, <3,1,4,0>
+ 3852912905U, // <7,3,1,5>: Cost 4 vsldoi12 RHS, <3,1,5,0>
+ 3835291923U, // <7,3,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <3,1,6,1>
+ 3841411356U, // <7,3,1,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,1,7,1>
+ 2779171111U, // <7,3,1,u>: Cost 3 vsldoi12 RHS, <3,1,u,3>
+ 2779171120U, // <7,3,2,0>: Cost 3 vsldoi12 RHS, <3,2,0,3>
+ 3852912952U, // <7,3,2,1>: Cost 4 vsldoi12 RHS, <3,2,1,2>
+ 2779171137U, // <7,3,2,2>: Cost 3 vsldoi12 RHS, <3,2,2,2>
+ 2779171144U, // <7,3,2,3>: Cost 3 vsldoi12 RHS, <3,2,3,0>
+ 2779171156U, // <7,3,2,4>: Cost 3 vsldoi12 RHS, <3,2,4,3>
+ 3852912989U, // <7,3,2,5>: Cost 4 vsldoi12 RHS, <3,2,5,3>
+ 2767669606U, // <7,3,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,6,3>
+ 2767669615U, // <7,3,2,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,7,3>
+ 2779171189U, // <7,3,2,u>: Cost 3 vsldoi12 RHS, <3,2,u,0>
+ 2779171198U, // <7,3,3,0>: Cost 3 vsldoi12 RHS, <3,3,0,0>
+ 3852913032U, // <7,3,3,1>: Cost 4 vsldoi12 RHS, <3,3,1,1>
+ 2704140655U, // <7,3,3,2>: Cost 3 vsldoi8 <3,2,7,3>, <3,2,7,3>
+ 1705429404U, // <7,3,3,3>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2779171238U, // <7,3,3,4>: Cost 3 vsldoi12 RHS, <3,3,4,4>
+ 3852913070U, // <7,3,3,5>: Cost 4 vsldoi12 RHS, <3,3,5,3>
+ 2657973099U, // <7,3,3,6>: Cost 3 vsldoi4 <6,7,3,3>, <6,7,3,3>
+ 2767669700U, // <7,3,3,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,3,7,7>
+ 1705429404U, // <7,3,3,u>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2779171280U, // <7,3,4,0>: Cost 3 vsldoi12 RHS, <3,4,0,1>
+ 2779171290U, // <7,3,4,1>: Cost 3 vsldoi12 RHS, <3,4,1,2>
+ 2634090504U, // <7,3,4,2>: Cost 3 vsldoi4 <2,7,3,4>, <2,7,3,4>
+ 2779171311U, // <7,3,4,3>: Cost 3 vsldoi12 RHS, <3,4,3,5>
+ 2779171319U, // <7,3,4,4>: Cost 3 vsldoi12 RHS, <3,4,4,4>
+ 1705429506U, // <7,3,4,5>: Cost 2 vsldoi12 RHS, <3,4,5,6>
+ 2722057593U, // <7,3,4,6>: Cost 3 vsldoi8 <6,2,7,3>, <4,6,5,2>
+ 2316093370U, // <7,3,4,7>: Cost 3 vmrglw <5,6,7,4>, <2,6,3,7>
+ 1705429533U, // <7,3,4,u>: Cost 2 vsldoi12 RHS, <3,4,u,6>
+ 3852913185U, // <7,3,5,0>: Cost 4 vsldoi12 RHS, <3,5,0,1>
+ 3795799695U, // <7,3,5,1>: Cost 4 vsldoi8 <6,2,7,3>, <5,1,0,1>
+ 3852913203U, // <7,3,5,2>: Cost 4 vsldoi12 RHS, <3,5,2,1>
+ 3852913214U, // <7,3,5,3>: Cost 4 vsldoi12 RHS, <3,5,3,3>
+ 3852913225U, // <7,3,5,4>: Cost 4 vsldoi12 RHS, <3,5,4,5>
+ 2779171410U, // <7,3,5,5>: Cost 3 vsldoi12 RHS, <3,5,5,5>
+ 2718740581U, // <7,3,5,6>: Cost 3 vsldoi8 <5,6,7,3>, <5,6,7,3>
+ 3841411685U, // <7,3,5,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,5,7,6>
+ 2720067847U, // <7,3,5,u>: Cost 3 vsldoi8 <5,u,7,3>, <5,u,7,3>
+ 2773420664U, // <7,3,6,0>: Cost 3 vsldoi12 <3,6,0,7>, <3,6,0,7>
+ 3847236225U, // <7,3,6,1>: Cost 4 vsldoi12 <3,6,1,7>, <3,6,1,7>
+ 1648316922U, // <7,3,6,2>: Cost 2 vsldoi8 <6,2,7,3>, <6,2,7,3>
+ 2773641875U, // <7,3,6,3>: Cost 3 vsldoi12 <3,6,3,7>, <3,6,3,7>
+ 2773715612U, // <7,3,6,4>: Cost 3 vsldoi12 <3,6,4,7>, <3,6,4,7>
+ 3847531173U, // <7,3,6,5>: Cost 4 vsldoi12 <3,6,5,7>, <3,6,5,7>
+ 2722059024U, // <7,3,6,6>: Cost 3 vsldoi8 <6,2,7,3>, <6,6,2,2>
+ 2767669943U, // <7,3,6,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,6,7,7>
+ 1652298720U, // <7,3,6,u>: Cost 2 vsldoi8 <6,u,7,3>, <6,u,7,3>
+ 2767669955U, // <7,3,7,0>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,0,1>
+ 3841411788U, // <7,3,7,1>: Cost 4 vsldoi12 <2,6,3,7>, <3,7,1,1>
+ 2767669978U, // <7,3,7,2>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,2,6>
+ 2722059546U, // <7,3,7,3>: Cost 3 vsldoi8 <6,2,7,3>, <7,3,6,2>
+ 2767669995U, // <7,3,7,4>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,4,5>
+ 3852913396U, // <7,3,7,5>: Cost 4 vsldoi12 RHS, <3,7,5,5>
+ 2722059758U, // <7,3,7,6>: Cost 3 vsldoi8 <6,2,7,3>, <7,6,2,7>
+ 2302183354U, // <7,3,7,7>: Cost 3 vmrglw <3,3,7,7>, <2,6,3,7>
+ 2767670027U, // <7,3,7,u>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,u,1>
+ 2774747930U, // <7,3,u,0>: Cost 3 vsldoi12 <3,u,0,7>, <3,u,0,7>
+ 1705429790U, // <7,3,u,1>: Cost 2 vsldoi12 RHS, <3,u,1,2>
+ 1660262316U, // <7,3,u,2>: Cost 2 vsldoi8 <u,2,7,3>, <u,2,7,3>
+ 1705429404U, // <7,3,u,3>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2775042878U, // <7,3,u,4>: Cost 3 vsldoi12 <3,u,4,7>, <3,u,4,7>
+ 1705429830U, // <7,3,u,5>: Cost 2 vsldoi12 RHS, <3,u,5,6>
+ 2779171660U, // <7,3,u,6>: Cost 3 vsldoi12 RHS, <3,u,6,3>
+ 2767670101U, // <7,3,u,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,u,7,3>
+ 1705429853U, // <7,3,u,u>: Cost 2 vsldoi12 RHS, <3,u,u,2>
+ 2718744576U, // <7,4,0,0>: Cost 3 vsldoi8 <5,6,7,4>, <0,0,0,0>
+ 1645002854U, // <7,4,0,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS
+ 3852913527U, // <7,4,0,2>: Cost 4 vsldoi12 RHS, <4,0,2,1>
+ 3852913536U, // <7,4,0,3>: Cost 4 vsldoi12 RHS, <4,0,3,1>
+ 2316061904U, // <7,4,0,4>: Cost 3 vmrglw <5,6,7,0>, <4,4,4,4>
+ 1705429906U, // <7,4,0,5>: Cost 2 vsldoi12 RHS, <4,0,5,1>
+ 2658022257U, // <7,4,0,6>: Cost 3 vsldoi4 <6,7,4,0>, <6,7,4,0>
+ 2256489928U, // <7,4,0,7>: Cost 3 vmrghw <7,0,1,2>, <4,7,5,0>
+ 1707420589U, // <7,4,0,u>: Cost 2 vsldoi12 RHS, <4,0,u,1>
+ 3852913590U, // <7,4,1,0>: Cost 4 vsldoi12 RHS, <4,1,0,1>
+ 2718745396U, // <7,4,1,1>: Cost 3 vsldoi8 <5,6,7,4>, <1,1,1,1>
+ 2779171786U, // <7,4,1,2>: Cost 3 vsldoi12 RHS, <4,1,2,3>
+ 3852913616U, // <7,4,1,3>: Cost 4 vsldoi12 RHS, <4,1,3,0>
+ 3852913627U, // <7,4,1,4>: Cost 4 vsldoi12 RHS, <4,1,4,2>
+ 2779171810U, // <7,4,1,5>: Cost 3 vsldoi12 RHS, <4,1,5,0>
+ 3792487631U, // <7,4,1,6>: Cost 4 vsldoi8 <5,6,7,4>, <1,6,1,7>
+ 3394456220U, // <7,4,1,7>: Cost 4 vmrglw <6,4,7,1>, <3,6,4,7>
+ 2779171837U, // <7,4,1,u>: Cost 3 vsldoi12 RHS, <4,1,u,0>
+ 3852913673U, // <7,4,2,0>: Cost 4 vsldoi12 RHS, <4,2,0,3>
+ 3852913682U, // <7,4,2,1>: Cost 4 vsldoi12 RHS, <4,2,1,3>
+ 2718746216U, // <7,4,2,2>: Cost 3 vsldoi8 <5,6,7,4>, <2,2,2,2>
+ 2718746278U, // <7,4,2,3>: Cost 3 vsldoi8 <5,6,7,4>, <2,3,0,1>
+ 2779171885U, // <7,4,2,4>: Cost 3 vsldoi12 RHS, <4,2,4,3>
+ 2779171893U, // <7,4,2,5>: Cost 3 vsldoi12 RHS, <4,2,5,2>
+ 2718746554U, // <7,4,2,6>: Cost 3 vsldoi8 <5,6,7,4>, <2,6,3,7>
+ 3847457864U, // <7,4,2,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,2,7,3>
+ 2779171921U, // <7,4,2,u>: Cost 3 vsldoi12 RHS, <4,2,u,3>
+ 2718746774U, // <7,4,3,0>: Cost 3 vsldoi8 <5,6,7,4>, <3,0,1,2>
+ 3852913762U, // <7,4,3,1>: Cost 4 vsldoi12 RHS, <4,3,1,2>
+ 3852913772U, // <7,4,3,2>: Cost 4 vsldoi12 RHS, <4,3,2,3>
+ 2718747036U, // <7,4,3,3>: Cost 3 vsldoi8 <5,6,7,4>, <3,3,3,3>
+ 2718747138U, // <7,4,3,4>: Cost 3 vsldoi8 <5,6,7,4>, <3,4,5,6>
+ 2779171972U, // <7,4,3,5>: Cost 3 vsldoi12 RHS, <4,3,5,0>
+ 2706803380U, // <7,4,3,6>: Cost 3 vsldoi8 <3,6,7,4>, <3,6,7,4>
+ 3847457946U, // <7,4,3,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,3,7,4>
+ 2781162655U, // <7,4,3,u>: Cost 3 vsldoi12 RHS, <4,3,u,0>
+ 2718747538U, // <7,4,4,0>: Cost 3 vsldoi8 <5,6,7,4>, <4,0,5,1>
+ 3852913842U, // <7,4,4,1>: Cost 4 vsldoi12 RHS, <4,4,1,1>
+ 3852913852U, // <7,4,4,2>: Cost 4 vsldoi12 RHS, <4,4,2,2>
+ 2316096696U, // <7,4,4,3>: Cost 3 vmrglw <5,6,7,4>, <7,2,4,3>
+ 1705430224U, // <7,4,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4>
+ 1705430234U, // <7,4,4,5>: Cost 2 vsldoi12 RHS, <4,4,5,5>
+ 2658055029U, // <7,4,4,6>: Cost 3 vsldoi4 <6,7,4,4>, <6,7,4,4>
+ 2316097024U, // <7,4,4,7>: Cost 3 vmrglw <5,6,7,4>, <7,6,4,7>
+ 1707420917U, // <7,4,4,u>: Cost 2 vsldoi12 RHS, <4,4,u,5>
+ 1584316518U, // <7,4,5,0>: Cost 2 vsldoi4 <6,7,4,5>, LHS
+ 2658059060U, // <7,4,5,1>: Cost 3 vsldoi4 <6,7,4,5>, <1,1,1,1>
+ 2640144314U, // <7,4,5,2>: Cost 3 vsldoi4 <3,7,4,5>, <2,6,3,7>
+ 2640145131U, // <7,4,5,3>: Cost 3 vsldoi4 <3,7,4,5>, <3,7,4,5>
+ 1584319798U, // <7,4,5,4>: Cost 2 vsldoi4 <6,7,4,5>, RHS
+ 2779172134U, // <7,4,5,5>: Cost 3 vsldoi12 RHS, <4,5,5,0>
+ 631688502U, // <7,4,5,6>: Cost 1 vsldoi12 RHS, RHS
+ 2658063354U, // <7,4,5,7>: Cost 3 vsldoi4 <6,7,4,5>, <7,0,1,2>
+ 631688520U, // <7,4,5,u>: Cost 1 vsldoi12 RHS, RHS
+ 3852914001U, // <7,4,6,0>: Cost 4 vsldoi12 RHS, <4,6,0,7>
+ 3852914010U, // <7,4,6,1>: Cost 4 vsldoi12 RHS, <4,6,1,7>
+ 2718749178U, // <7,4,6,2>: Cost 3 vsldoi8 <5,6,7,4>, <6,2,7,3>
+ 2722730572U, // <7,4,6,3>: Cost 3 vsldoi8 <6,3,7,4>, <6,3,7,4>
+ 2723394205U, // <7,4,6,4>: Cost 3 vsldoi8 <6,4,7,4>, <6,4,7,4>
+ 2779172221U, // <7,4,6,5>: Cost 3 vsldoi12 RHS, <4,6,5,6>
+ 2718749496U, // <7,4,6,6>: Cost 3 vsldoi8 <5,6,7,4>, <6,6,6,6>
+ 2718749518U, // <7,4,6,7>: Cost 3 vsldoi8 <5,6,7,4>, <6,7,0,1>
+ 2779172249U, // <7,4,6,u>: Cost 3 vsldoi12 RHS, <4,6,u,7>
+ 2718749690U, // <7,4,7,0>: Cost 3 vsldoi8 <5,6,7,4>, <7,0,1,2>
+ 3847458214U, // <7,4,7,1>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,1,2>
+ 2718749880U, // <7,4,7,2>: Cost 3 vsldoi8 <5,6,7,4>, <7,2,4,3>
+ 3847458236U, // <7,4,7,3>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,3,6>
+ 2718750004U, // <7,4,7,4>: Cost 3 vsldoi8 <5,6,7,4>, <7,4,0,1>
+ 1187876150U, // <7,4,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 2718750208U, // <7,4,7,6>: Cost 3 vsldoi8 <5,6,7,4>, <7,6,4,7>
+ 2718750286U, // <7,4,7,7>: Cost 3 vsldoi8 <5,6,7,4>, <7,7,4,4>
+ 1187876393U, // <7,4,7,u>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 1584341094U, // <7,4,u,0>: Cost 2 vsldoi4 <6,7,4,u>, LHS
+ 1645008686U, // <7,4,u,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS
+ 2640168890U, // <7,4,u,2>: Cost 3 vsldoi4 <3,7,4,u>, <2,6,3,7>
+ 2640169710U, // <7,4,u,3>: Cost 3 vsldoi4 <3,7,4,u>, <3,7,4,u>
+ 1584344374U, // <7,4,u,4>: Cost 2 vsldoi4 <6,7,4,u>, RHS
+ 1705430554U, // <7,4,u,5>: Cost 2 vsldoi12 RHS, <4,u,5,1>
+ 631688745U, // <7,4,u,6>: Cost 1 vsldoi12 RHS, RHS
+ 2718750976U, // <7,4,u,7>: Cost 3 vsldoi8 <5,6,7,4>, <u,7,0,1>
+ 631688763U, // <7,4,u,u>: Cost 1 vsldoi12 RHS, RHS
+ 2646147174U, // <7,5,0,0>: Cost 3 vsldoi4 <4,7,5,0>, LHS
+ 2779172424U, // <7,5,0,1>: Cost 3 vsldoi12 RHS, <5,0,1,2>
+ 3852914258U, // <7,5,0,2>: Cost 4 vsldoi12 RHS, <5,0,2,3>
+ 3852914268U, // <7,5,0,3>: Cost 4 vsldoi12 RHS, <5,0,3,4>
+ 2779172450U, // <7,5,0,4>: Cost 3 vsldoi12 RHS, <5,0,4,1>
+ 2316061914U, // <7,5,0,5>: Cost 3 vmrglw <5,6,7,0>, <4,4,5,5>
+ 2316061186U, // <7,5,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,5,6>
+ 2646152186U, // <7,5,0,7>: Cost 3 vsldoi4 <4,7,5,0>, <7,0,1,2>
+ 2779172486U, // <7,5,0,u>: Cost 3 vsldoi12 RHS, <5,0,u,1>
+ 2781163151U, // <7,5,1,0>: Cost 3 vsldoi12 RHS, <5,1,0,1>
+ 2321378194U, // <7,5,1,1>: Cost 3 vmrglw <6,5,7,1>, <4,0,5,1>
+ 3852914339U, // <7,5,1,2>: Cost 4 vsldoi12 RHS, <5,1,2,3>
+ 3852914350U, // <7,5,1,3>: Cost 4 vsldoi12 RHS, <5,1,3,5>
+ 2781163191U, // <7,5,1,4>: Cost 3 vsldoi12 RHS, <5,1,4,5>
+ 3852914363U, // <7,5,1,5>: Cost 4 vsldoi12 RHS, <5,1,5,0>
+ 3835588297U, // <7,5,1,6>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,6,5>
+ 3835588306U, // <7,5,1,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,7,5>
+ 2781163223U, // <7,5,1,u>: Cost 3 vsldoi12 RHS, <5,1,u,1>
+ 3852914400U, // <7,5,2,0>: Cost 4 vsldoi12 RHS, <5,2,0,1>
+ 2781163243U, // <7,5,2,1>: Cost 3 vsldoi12 RHS, <5,2,1,3>
+ 3852914419U, // <7,5,2,2>: Cost 4 vsldoi12 RHS, <5,2,2,2>
+ 2779172606U, // <7,5,2,3>: Cost 3 vsldoi12 RHS, <5,2,3,4>
+ 3780552497U, // <7,5,2,4>: Cost 4 vsldoi8 <3,6,7,5>, <2,4,6,5>
+ 2781163279U, // <7,5,2,5>: Cost 3 vsldoi12 RHS, <5,2,5,3>
+ 2779172632U, // <7,5,2,6>: Cost 3 vsldoi12 RHS, <5,2,6,3>
+ 3835588385U, // <7,5,2,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,2,7,3>
+ 2779172650U, // <7,5,2,u>: Cost 3 vsldoi12 RHS, <5,2,u,3>
+ 3852914481U, // <7,5,3,0>: Cost 4 vsldoi12 RHS, <5,3,0,1>
+ 2319403922U, // <7,5,3,1>: Cost 3 vmrglw <6,2,7,3>, <4,0,5,1>
+ 2319404409U, // <7,5,3,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2>
+ 3852914510U, // <7,5,3,3>: Cost 4 vsldoi12 RHS, <5,3,3,3>
+ 3779226131U, // <7,5,3,4>: Cost 4 vsldoi8 <3,4,7,5>, <3,4,7,5>
+ 2319404250U, // <7,5,3,5>: Cost 3 vmrglw <6,2,7,3>, <4,4,5,5>
+ 2319403522U, // <7,5,3,6>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,6>
+ 3852914547U, // <7,5,3,7>: Cost 4 vsldoi12 RHS, <5,3,7,4>
+ 2319403524U, // <7,5,3,u>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,u>
+ 2646179942U, // <7,5,4,0>: Cost 3 vsldoi4 <4,7,5,4>, LHS
+ 2316094354U, // <7,5,4,1>: Cost 3 vmrglw <5,6,7,4>, <4,0,5,1>
+ 3852914582U, // <7,5,4,2>: Cost 4 vsldoi12 RHS, <5,4,2,3>
+ 3852914592U, // <7,5,4,3>: Cost 4 vsldoi12 RHS, <5,4,3,4>
+ 2646183372U, // <7,5,4,4>: Cost 3 vsldoi4 <4,7,5,4>, <4,7,5,4>
+ 2779172788U, // <7,5,4,5>: Cost 3 vsldoi12 RHS, <5,4,5,6>
+ 2316093954U, // <7,5,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,5,6>
+ 2646185318U, // <7,5,4,7>: Cost 3 vsldoi4 <4,7,5,4>, <7,4,5,6>
+ 2779172815U, // <7,5,4,u>: Cost 3 vsldoi12 RHS, <5,4,u,6>
+ 2781163475U, // <7,5,5,0>: Cost 3 vsldoi12 RHS, <5,5,0,1>
+ 2781163484U, // <7,5,5,1>: Cost 3 vsldoi12 RHS, <5,5,1,1>
+ 3852914662U, // <7,5,5,2>: Cost 4 vsldoi12 RHS, <5,5,2,2>
+ 3852914672U, // <7,5,5,3>: Cost 4 vsldoi12 RHS, <5,5,3,3>
+ 2781163515U, // <7,5,5,4>: Cost 3 vsldoi12 RHS, <5,5,4,5>
+ 1705431044U, // <7,5,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779172878U, // <7,5,5,6>: Cost 3 vsldoi12 RHS, <5,5,6,6>
+ 3835588632U, // <7,5,5,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,5,7,7>
+ 1705431044U, // <7,5,5,u>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779172900U, // <7,5,6,0>: Cost 3 vsldoi12 RHS, <5,6,0,1>
+ 2781163571U, // <7,5,6,1>: Cost 3 vsldoi12 RHS, <5,6,1,7>
+ 3852914743U, // <7,5,6,2>: Cost 4 vsldoi12 RHS, <5,6,2,2>
+ 2779172930U, // <7,5,6,3>: Cost 3 vsldoi12 RHS, <5,6,3,4>
+ 2779172940U, // <7,5,6,4>: Cost 3 vsldoi12 RHS, <5,6,4,5>
+ 2781163607U, // <7,5,6,5>: Cost 3 vsldoi12 RHS, <5,6,5,7>
+ 2779172960U, // <7,5,6,6>: Cost 3 vsldoi12 RHS, <5,6,6,7>
+ 1705431138U, // <7,5,6,7>: Cost 2 vsldoi12 RHS, <5,6,7,0>
+ 1705578603U, // <7,5,6,u>: Cost 2 vsldoi12 RHS, <5,6,u,0>
+ 2646204518U, // <7,5,7,0>: Cost 3 vsldoi4 <4,7,5,7>, LHS
+ 2322090898U, // <7,5,7,1>: Cost 3 vmrglw <6,6,7,7>, <4,0,5,1>
+ 3719947880U, // <7,5,7,2>: Cost 4 vsldoi4 <4,7,5,7>, <2,2,2,2>
+ 3719948438U, // <7,5,7,3>: Cost 4 vsldoi4 <4,7,5,7>, <3,0,1,2>
+ 2646207951U, // <7,5,7,4>: Cost 3 vsldoi4 <4,7,5,7>, <4,7,5,7>
+ 2322091226U, // <7,5,7,5>: Cost 3 vmrglw <6,6,7,7>, <4,4,5,5>
+ 2322090498U, // <7,5,7,6>: Cost 3 vmrglw <6,6,7,7>, <3,4,5,6>
+ 2646210156U, // <7,5,7,7>: Cost 3 vsldoi4 <4,7,5,7>, <7,7,7,7>
+ 2646210350U, // <7,5,7,u>: Cost 3 vsldoi4 <4,7,5,7>, LHS
+ 2779173062U, // <7,5,u,0>: Cost 3 vsldoi12 RHS, <5,u,0,1>
+ 2779173072U, // <7,5,u,1>: Cost 3 vsldoi12 RHS, <5,u,1,2>
+ 2319404409U, // <7,5,u,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2>
+ 2779173092U, // <7,5,u,3>: Cost 3 vsldoi12 RHS, <5,u,3,4>
+ 2779173101U, // <7,5,u,4>: Cost 3 vsldoi12 RHS, <5,u,4,4>
+ 1705431044U, // <7,5,u,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779173118U, // <7,5,u,6>: Cost 3 vsldoi12 RHS, <5,u,6,3>
+ 1705578756U, // <7,5,u,7>: Cost 2 vsldoi12 RHS, <5,u,7,0>
+ 1707421965U, // <7,5,u,u>: Cost 2 vsldoi12 RHS, <5,u,u,0>
+ 3852914966U, // <7,6,0,0>: Cost 4 vsldoi12 RHS, <6,0,0,0>
+ 2779173153U, // <7,6,0,1>: Cost 3 vsldoi12 RHS, <6,0,1,2>
+ 2256491002U, // <7,6,0,2>: Cost 3 vmrghw <7,0,1,2>, <6,2,7,3>
+ 3852914994U, // <7,6,0,3>: Cost 4 vsldoi12 RHS, <6,0,3,1>
+ 3852915003U, // <7,6,0,4>: Cost 4 vsldoi12 RHS, <6,0,4,1>
+ 2316062652U, // <7,6,0,5>: Cost 3 vmrglw <5,6,7,0>, <5,4,6,5>
+ 2316063544U, // <7,6,0,6>: Cost 3 vmrglw <5,6,7,0>, <6,6,6,6>
+ 1242320182U, // <7,6,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 1242320183U, // <7,6,0,u>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 3852915048U, // <7,6,1,0>: Cost 4 vsldoi12 RHS, <6,1,0,1>
+ 3377866217U, // <7,6,1,1>: Cost 4 vmrglw <3,6,7,1>, <2,0,6,1>
+ 3852915068U, // <7,6,1,2>: Cost 4 vsldoi12 RHS, <6,1,2,3>
+ 3833672072U, // <7,6,1,3>: Cost 5 vsldoi12 <1,3,6,7>, <6,1,3,6>
+ 3852915088U, // <7,6,1,4>: Cost 4 vsldoi12 RHS, <6,1,4,5>
+ 3395122056U, // <7,6,1,5>: Cost 4 vmrglw <6,5,7,1>, <6,7,6,5>
+ 3389813560U, // <7,6,1,6>: Cost 4 vmrglw <5,6,7,1>, <6,6,6,6>
+ 2779173287U, // <7,6,1,7>: Cost 3 vsldoi12 RHS, <6,1,7,1>
+ 2779320752U, // <7,6,1,u>: Cost 3 vsldoi12 RHS, <6,1,u,1>
+ 2658181222U, // <7,6,2,0>: Cost 3 vsldoi4 <6,7,6,2>, LHS
+ 3852915140U, // <7,6,2,1>: Cost 4 vsldoi12 RHS, <6,2,1,3>
+ 2257973754U, // <7,6,2,2>: Cost 3 vmrghw <7,2,3,3>, <6,2,7,3>
+ 3841413589U, // <7,6,2,3>: Cost 4 vsldoi12 <2,6,3,7>, <6,2,3,2>
+ 2658184502U, // <7,6,2,4>: Cost 3 vsldoi4 <6,7,6,2>, RHS
+ 3852915176U, // <7,6,2,5>: Cost 4 vsldoi12 RHS, <6,2,5,3>
+ 2658186117U, // <7,6,2,6>: Cost 3 vsldoi4 <6,7,6,2>, <6,7,6,2>
+ 1705431546U, // <7,6,2,7>: Cost 2 vsldoi12 RHS, <6,2,7,3>
+ 1705579011U, // <7,6,2,u>: Cost 2 vsldoi12 RHS, <6,2,u,3>
+ 3714015334U, // <7,6,3,0>: Cost 4 vsldoi4 <3,7,6,3>, LHS
+ 3777243425U, // <7,6,3,1>: Cost 4 vsldoi8 <3,1,7,6>, <3,1,7,6>
+ 2319405957U, // <7,6,3,2>: Cost 3 vmrglw <6,2,7,3>, <6,7,6,2>
+ 3375229286U, // <7,6,3,3>: Cost 4 vmrglw <3,2,7,3>, <3,2,6,3>
+ 2779173426U, // <7,6,3,4>: Cost 3 vsldoi12 RHS, <6,3,4,5>
+ 3375228721U, // <7,6,3,5>: Cost 4 vmrglw <3,2,7,3>, <2,4,6,5>
+ 2319405880U, // <7,6,3,6>: Cost 3 vmrglw <6,2,7,3>, <6,6,6,6>
+ 1245662518U, // <7,6,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 1245662519U, // <7,6,3,u>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 3852915291U, // <7,6,4,0>: Cost 4 vsldoi12 RHS, <6,4,0,1>
+ 3389834729U, // <7,6,4,1>: Cost 4 vmrglw <5,6,7,4>, <2,0,6,1>
+ 2259472890U, // <7,6,4,2>: Cost 3 vmrghw <7,4,5,6>, <6,2,7,3>
+ 3852915321U, // <7,6,4,3>: Cost 4 vsldoi12 RHS, <6,4,3,4>
+ 3852915330U, // <7,6,4,4>: Cost 4 vsldoi12 RHS, <6,4,4,4>
+ 2779173517U, // <7,6,4,5>: Cost 3 vsldoi12 RHS, <6,4,5,6>
+ 2316096312U, // <7,6,4,6>: Cost 3 vmrglw <5,6,7,4>, <6,6,6,6>
+ 1242352950U, // <7,6,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 1242352951U, // <7,6,4,u>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 3852915372U, // <7,6,5,0>: Cost 4 vsldoi12 RHS, <6,5,0,1>
+ 3835294392U, // <7,6,5,1>: Cost 5 vsldoi12 <1,6,1,7>, <6,5,1,4>
+ 3852915395U, // <7,6,5,2>: Cost 4 vsldoi12 RHS, <6,5,2,6>
+ 3852915404U, // <7,6,5,3>: Cost 4 vsldoi12 RHS, <6,5,3,6>
+ 3852915412U, // <7,6,5,4>: Cost 4 vsldoi12 RHS, <6,5,4,5>
+ 3377899313U, // <7,6,5,5>: Cost 4 vmrglw <3,6,7,5>, <2,4,6,5>
+ 2718765160U, // <7,6,5,6>: Cost 3 vsldoi8 <5,6,7,6>, <5,6,7,6>
+ 2779173611U, // <7,6,5,7>: Cost 3 vsldoi12 RHS, <6,5,7,1>
+ 2779321076U, // <7,6,5,u>: Cost 3 vsldoi12 RHS, <6,5,u,1>
+ 2658213990U, // <7,6,6,0>: Cost 3 vsldoi4 <6,7,6,6>, LHS
+ 3852915462U, // <7,6,6,1>: Cost 4 vsldoi12 RHS, <6,6,1,1>
+ 2718765562U, // <7,6,6,2>: Cost 3 vsldoi8 <5,6,7,6>, <6,2,7,3>
+ 3714042622U, // <7,6,6,3>: Cost 4 vsldoi4 <3,7,6,6>, <3,7,6,6>
+ 2658217270U, // <7,6,6,4>: Cost 3 vsldoi4 <6,7,6,6>, RHS
+ 2724074224U, // <7,6,6,5>: Cost 3 vsldoi8 <6,5,7,6>, <6,5,7,6>
+ 1705431864U, // <7,6,6,6>: Cost 2 vsldoi12 RHS, <6,6,6,6>
+ 1705431874U, // <7,6,6,7>: Cost 2 vsldoi12 RHS, <6,6,7,7>
+ 1705579339U, // <7,6,6,u>: Cost 2 vsldoi12 RHS, <6,6,u,7>
+ 1705431886U, // <7,6,7,0>: Cost 2 vsldoi12 RHS, <6,7,0,1>
+ 2779173719U, // <7,6,7,1>: Cost 3 vsldoi12 RHS, <6,7,1,1>
+ 2779173729U, // <7,6,7,2>: Cost 3 vsldoi12 RHS, <6,7,2,2>
+ 2779173736U, // <7,6,7,3>: Cost 3 vsldoi12 RHS, <6,7,3,0>
+ 1705431926U, // <7,6,7,4>: Cost 2 vsldoi12 RHS, <6,7,4,5>
+ 2779173759U, // <7,6,7,5>: Cost 3 vsldoi12 RHS, <6,7,5,5>
+ 2779173765U, // <7,6,7,6>: Cost 3 vsldoi12 RHS, <6,7,6,2>
+ 1248349494U, // <7,6,7,7>: Cost 2 vmrglw <6,6,7,7>, RHS
+ 1705431958U, // <7,6,7,u>: Cost 2 vsldoi12 RHS, <6,7,u,1>
+ 1705579423U, // <7,6,u,0>: Cost 2 vsldoi12 RHS, <6,u,0,1>
+ 2779173801U, // <7,6,u,1>: Cost 3 vsldoi12 RHS, <6,u,1,2>
+ 2779321266U, // <7,6,u,2>: Cost 3 vsldoi12 RHS, <6,u,2,2>
+ 2779321273U, // <7,6,u,3>: Cost 3 vsldoi12 RHS, <6,u,3,0>
+ 1705579463U, // <7,6,u,4>: Cost 2 vsldoi12 RHS, <6,u,4,5>
+ 2779173841U, // <7,6,u,5>: Cost 3 vsldoi12 RHS, <6,u,5,6>
+ 1705431864U, // <7,6,u,6>: Cost 2 vsldoi12 RHS, <6,6,6,6>
+ 1705432032U, // <7,6,u,7>: Cost 2 vsldoi12 RHS, <6,u,7,3>
+ 1705579495U, // <7,6,u,u>: Cost 2 vsldoi12 RHS, <6,u,u,1>
+ 1242320994U, // <7,7,0,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0>
+ 1705432058U, // <7,7,0,1>: Cost 2 vsldoi12 RHS, <7,0,1,2>
+ 3841414146U, // <7,7,0,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,0,2,1>
+ 2316063226U, // <7,7,0,3>: Cost 3 vmrglw <5,6,7,0>, <6,2,7,3>
+ 2779173908U, // <7,7,0,4>: Cost 3 vsldoi12 RHS, <7,0,4,1>
+ 2658242658U, // <7,7,0,5>: Cost 3 vsldoi4 <6,7,7,0>, <5,6,7,0>
+ 2658243468U, // <7,7,0,6>: Cost 3 vsldoi4 <6,7,7,0>, <6,7,7,0>
+ 2316063554U, // <7,7,0,7>: Cost 3 vmrglw <5,6,7,0>, <6,6,7,7>
+ 1705432121U, // <7,7,0,u>: Cost 2 vsldoi12 RHS, <7,0,u,2>
+ 3852915777U, // <7,7,1,0>: Cost 4 vsldoi12 RHS, <7,1,0,1>
+ 2779173962U, // <7,7,1,1>: Cost 3 vsldoi12 RHS, <7,1,1,1>
+ 2779173973U, // <7,7,1,2>: Cost 3 vsldoi12 RHS, <7,1,2,3>
+ 3389813242U, // <7,7,1,3>: Cost 4 vmrglw <5,6,7,1>, <6,2,7,3>
+ 3852915813U, // <7,7,1,4>: Cost 4 vsldoi12 RHS, <7,1,4,1>
+ 3852915821U, // <7,7,1,5>: Cost 4 vsldoi12 RHS, <7,1,5,0>
+ 3835294839U, // <7,7,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <7,1,6,1>
+ 2329343596U, // <7,7,1,7>: Cost 3 vmrglw <7,u,7,1>, <7,7,7,7>
+ 2779174027U, // <7,7,1,u>: Cost 3 vsldoi12 RHS, <7,1,u,3>
+ 2803061908U, // <7,7,2,0>: Cost 3 vsldoi12 RHS, <7,2,0,3>
+ 3852915869U, // <7,7,2,1>: Cost 4 vsldoi12 RHS, <7,2,1,3>
+ 2779174053U, // <7,7,2,2>: Cost 3 vsldoi12 RHS, <7,2,2,2>
+ 2779174060U, // <7,7,2,3>: Cost 3 vsldoi12 RHS, <7,2,3,0>
+ 2803061944U, // <7,7,2,4>: Cost 3 vsldoi12 RHS, <7,2,4,3>
+ 3852915905U, // <7,7,2,5>: Cost 4 vsldoi12 RHS, <7,2,5,3>
+ 2767672522U, // <7,7,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <7,2,6,3>
+ 2791855315U, // <7,7,2,7>: Cost 3 vsldoi12 <6,6,7,7>, <7,2,7,3>
+ 2768999644U, // <7,7,2,u>: Cost 3 vsldoi12 <2,u,3,7>, <7,2,u,3>
+ 2779174115U, // <7,7,3,0>: Cost 3 vsldoi12 RHS, <7,3,0,1>
+ 3852915948U, // <7,7,3,1>: Cost 4 vsldoi12 RHS, <7,3,1,1>
+ 3841414394U, // <7,7,3,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,3,2,6>
+ 1245663738U, // <7,7,3,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 2779174155U, // <7,7,3,4>: Cost 3 vsldoi12 RHS, <7,3,4,5>
+ 3852915988U, // <7,7,3,5>: Cost 4 vsldoi12 RHS, <7,3,5,5>
+ 2706827959U, // <7,7,3,6>: Cost 3 vsldoi8 <3,6,7,7>, <3,6,7,7>
+ 2319405890U, // <7,7,3,7>: Cost 3 vmrglw <6,2,7,3>, <6,6,7,7>
+ 1245663738U, // <7,7,3,u>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 2779174200U, // <7,7,4,0>: Cost 3 vsldoi12 RHS, <7,4,0,5>
+ 3852916030U, // <7,7,4,1>: Cost 4 vsldoi12 RHS, <7,4,1,2>
+ 3714099130U, // <7,7,4,2>: Cost 4 vsldoi4 <3,7,7,4>, <2,6,3,7>
+ 2316095994U, // <7,7,4,3>: Cost 3 vmrglw <5,6,7,4>, <6,2,7,3>
+ 1242353766U, // <7,7,4,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4>
+ 1705432422U, // <7,7,4,5>: Cost 2 vsldoi12 RHS, <7,4,5,6>
+ 2658276240U, // <7,7,4,6>: Cost 3 vsldoi4 <6,7,7,4>, <6,7,7,4>
+ 2316096322U, // <7,7,4,7>: Cost 3 vmrglw <5,6,7,4>, <6,6,7,7>
+ 1705432449U, // <7,7,4,u>: Cost 2 vsldoi12 RHS, <7,4,u,6>
+ 3852916101U, // <7,7,5,0>: Cost 4 vsldoi12 RHS, <7,5,0,1>
+ 3854906765U, // <7,7,5,1>: Cost 4 vsldoi12 RHS, <7,5,1,0>
+ 3852916121U, // <7,7,5,2>: Cost 4 vsldoi12 RHS, <7,5,2,3>
+ 3389846010U, // <7,7,5,3>: Cost 4 vmrglw <5,6,7,5>, <6,2,7,3>
+ 3852916141U, // <7,7,5,4>: Cost 4 vsldoi12 RHS, <7,5,4,5>
+ 2779174326U, // <7,7,5,5>: Cost 3 vsldoi12 RHS, <7,5,5,5>
+ 2779174337U, // <7,7,5,6>: Cost 3 vsldoi12 RHS, <7,5,6,7>
+ 2329376364U, // <7,7,5,7>: Cost 3 vmrglw <7,u,7,5>, <7,7,7,7>
+ 2779321811U, // <7,7,5,u>: Cost 3 vsldoi12 RHS, <7,5,u,7>
+ 2658287718U, // <7,7,6,0>: Cost 3 vsldoi4 <6,7,7,6>, LHS
+ 3852916197U, // <7,7,6,1>: Cost 4 vsldoi12 RHS, <7,6,1,7>
+ 2779174382U, // <7,7,6,2>: Cost 3 vsldoi12 RHS, <7,6,2,7>
+ 2316112378U, // <7,7,6,3>: Cost 3 vmrglw <5,6,7,6>, <6,2,7,3>
+ 2658290998U, // <7,7,6,4>: Cost 3 vsldoi4 <6,7,7,6>, RHS
+ 3852916233U, // <7,7,6,5>: Cost 4 vsldoi12 RHS, <7,6,5,7>
+ 1651004226U, // <7,7,6,6>: Cost 2 vsldoi8 <6,6,7,7>, <6,6,7,7>
+ 2779174420U, // <7,7,6,7>: Cost 3 vsldoi12 RHS, <7,6,7,0>
+ 1652331492U, // <7,7,6,u>: Cost 2 vsldoi8 <6,u,7,7>, <6,u,7,7>
+ 1590526054U, // <7,7,7,0>: Cost 2 vsldoi4 <7,7,7,7>, LHS
+ 2328728623U, // <7,7,7,1>: Cost 3 vmrglw <7,7,7,7>, <7,0,7,1>
+ 2724746451U, // <7,7,7,2>: Cost 3 vsldoi8 <6,6,7,7>, <7,2,7,3>
+ 2322092538U, // <7,7,7,3>: Cost 3 vmrglw <6,6,7,7>, <6,2,7,3>
+ 1590529334U, // <7,7,7,4>: Cost 2 vsldoi4 <7,7,7,7>, RHS
+ 2328728951U, // <7,7,7,5>: Cost 3 vmrglw <7,7,7,7>, <7,4,7,5>
+ 2724746770U, // <7,7,7,6>: Cost 3 vsldoi8 <6,6,7,7>, <7,6,6,7>
+ 430361910U, // <7,7,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,7,7,u>: Cost 1 vspltisw3 RHS
+ 1242320994U, // <7,7,u,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0>
+ 1705580162U, // <7,7,u,1>: Cost 2 vsldoi12 RHS, <7,u,1,2>
+ 2779321996U, // <7,7,u,2>: Cost 3 vsldoi12 RHS, <7,u,2,3>
+ 1245663738U, // <7,7,u,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 1242353766U, // <7,7,u,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4>
+ 1705580202U, // <7,7,u,5>: Cost 2 vsldoi12 RHS, <7,u,5,6>
+ 1662949620U, // <7,7,u,6>: Cost 2 vsldoi8 <u,6,7,7>, <u,6,7,7>
+ 430361910U, // <7,7,u,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,7,u,u>: Cost 1 vspltisw3 RHS
+ 1705426944U, // <7,u,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0>
+ 1705432787U, // <7,u,0,1>: Cost 2 vsldoi12 RHS, <u,0,1,2>
+ 2316060885U, // <7,u,0,2>: Cost 3 vmrglw <5,6,7,0>, <3,0,u,2>
+ 1242316956U, // <7,u,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 2779174637U, // <7,u,0,4>: Cost 3 vsldoi12 RHS, <u,0,4,1>
+ 1182750874U, // <7,u,0,5>: Cost 2 vmrghw <7,0,1,2>, RHS
+ 2316061213U, // <7,u,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,u,6>
+ 1242320200U, // <7,u,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 1705432850U, // <7,u,0,u>: Cost 2 vsldoi12 RHS, <u,0,u,2>
+ 1584578662U, // <7,u,1,0>: Cost 2 vsldoi4 <6,7,u,1>, LHS
+ 1705427764U, // <7,u,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 631691054U, // <7,u,1,2>: Cost 1 vsldoi12 RHS, LHS
+ 2640407307U, // <7,u,1,3>: Cost 3 vsldoi4 <3,7,u,1>, <3,7,u,1>
+ 1584581942U, // <7,u,1,4>: Cost 2 vsldoi4 <6,7,u,1>, RHS
+ 2779174726U, // <7,u,1,5>: Cost 3 vsldoi12 RHS, <u,1,5,0>
+ 1584583574U, // <7,u,1,6>: Cost 2 vsldoi4 <6,7,u,1>, <6,7,u,1>
+ 2779322201U, // <7,u,1,7>: Cost 3 vsldoi12 RHS, <u,1,7,1>
+ 631691108U, // <7,u,1,u>: Cost 1 vsldoi12 RHS, LHS
+ 2779174763U, // <7,u,2,0>: Cost 3 vsldoi12 RHS, <u,2,0,1>
+ 2779174774U, // <7,u,2,1>: Cost 3 vsldoi12 RHS, <u,2,1,3>
+ 1705428584U, // <7,u,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1705432965U, // <7,u,2,3>: Cost 2 vsldoi12 RHS, <u,2,3,0>
+ 2779174801U, // <7,u,2,4>: Cost 3 vsldoi12 RHS, <u,2,4,3>
+ 2779174810U, // <7,u,2,5>: Cost 3 vsldoi12 RHS, <u,2,5,3>
+ 2767673251U, // <7,u,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <u,2,6,3>
+ 1705580460U, // <7,u,2,7>: Cost 2 vsldoi12 RHS, <u,2,7,3>
+ 1705433010U, // <7,u,2,u>: Cost 2 vsldoi12 RHS, <u,2,u,0>
+ 1705433020U, // <7,u,3,0>: Cost 2 vsldoi12 RHS, <u,3,0,1>
+ 2779174853U, // <7,u,3,1>: Cost 3 vsldoi12 RHS, <u,3,1,1>
+ 2767673299U, // <7,u,3,2>: Cost 3 vsldoi12 <2,6,3,7>, <u,3,2,6>
+ 1245659292U, // <7,u,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS
+ 1705433060U, // <7,u,3,4>: Cost 2 vsldoi12 RHS, <u,3,4,5>
+ 2779174893U, // <7,u,3,5>: Cost 3 vsldoi12 RHS, <u,3,5,5>
+ 2706836152U, // <7,u,3,6>: Cost 3 vsldoi8 <3,6,7,u>, <3,6,7,u>
+ 1245662536U, // <7,u,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 1705433092U, // <7,u,3,u>: Cost 2 vsldoi12 RHS, <u,3,u,1>
+ 2779174925U, // <7,u,4,0>: Cost 3 vsldoi12 RHS, <u,4,0,1>
+ 1185732398U, // <7,u,4,1>: Cost 2 vmrghw <7,4,5,6>, LHS
+ 2316093653U, // <7,u,4,2>: Cost 3 vmrglw <5,6,7,4>, <3,0,u,2>
+ 1242349724U, // <7,u,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 1705430224U, // <7,u,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4>
+ 1705433151U, // <7,u,4,5>: Cost 2 vsldoi12 RHS, <u,4,5,6>
+ 2316093981U, // <7,u,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,u,6>
+ 1242352968U, // <7,u,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 1705433178U, // <7,u,4,u>: Cost 2 vsldoi12 RHS, <u,4,u,6>
+ 1584611430U, // <7,u,5,0>: Cost 2 vsldoi4 <6,7,u,5>, LHS
+ 2781165670U, // <7,u,5,1>: Cost 3 vsldoi12 RHS, <u,5,1,0>
+ 2640439226U, // <7,u,5,2>: Cost 3 vsldoi4 <3,7,u,5>, <2,6,3,7>
+ 2640440079U, // <7,u,5,3>: Cost 3 vsldoi4 <3,7,u,5>, <3,7,u,5>
+ 1584614710U, // <7,u,5,4>: Cost 2 vsldoi4 <6,7,u,5>, RHS
+ 1705431044U, // <7,u,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 631691418U, // <7,u,5,6>: Cost 1 vsldoi12 RHS, RHS
+ 2779322525U, // <7,u,5,7>: Cost 3 vsldoi12 RHS, <u,5,7,1>
+ 631691436U, // <7,u,5,u>: Cost 1 vsldoi12 RHS, RHS
+ 2779175087U, // <7,u,6,0>: Cost 3 vsldoi12 RHS, <u,6,0,1>
+ 2779175102U, // <7,u,6,1>: Cost 3 vsldoi12 RHS, <u,6,1,7>
+ 1648357887U, // <7,u,6,2>: Cost 2 vsldoi8 <6,2,7,u>, <6,2,7,u>
+ 1705433296U, // <7,u,6,3>: Cost 2 vsldoi12 RHS, <u,6,3,7>
+ 2779175127U, // <7,u,6,4>: Cost 3 vsldoi12 RHS, <u,6,4,5>
+ 2779175138U, // <7,u,6,5>: Cost 3 vsldoi12 RHS, <u,6,5,7>
+ 1651012419U, // <7,u,6,6>: Cost 2 vsldoi8 <6,6,7,u>, <6,6,7,u>
+ 1705580788U, // <7,u,6,7>: Cost 2 vsldoi12 RHS, <u,6,7,7>
+ 1705433341U, // <7,u,6,u>: Cost 2 vsldoi12 RHS, <u,6,u,7>
+ 1705580800U, // <7,u,7,0>: Cost 2 vsldoi12 RHS, <u,7,0,1>
+ 1187878702U, // <7,u,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 2768042263U, // <7,u,7,2>: Cost 3 vsldoi12 <2,6,u,7>, <u,7,2,6>
+ 1248346268U, // <7,u,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 1705580840U, // <7,u,7,4>: Cost 2 vsldoi12 RHS, <u,7,4,5>
+ 1187879066U, // <7,u,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 2779322679U, // <7,u,7,6>: Cost 3 vsldoi12 RHS, <u,7,6,2>
+ 430361910U, // <7,u,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,u,7,u>: Cost 1 vspltisw3 RHS
+ 1705433425U, // <7,u,u,0>: Cost 2 vsldoi12 RHS, <u,u,0,1>
+ 1705433435U, // <7,u,u,1>: Cost 2 vsldoi12 RHS, <u,u,1,2>
+ 631691621U, // <7,u,u,2>: Cost 1 vsldoi12 RHS, LHS
+ 1705433451U, // <7,u,u,3>: Cost 2 vsldoi12 RHS, <u,u,3,0>
+ 1705433465U, // <7,u,u,4>: Cost 2 vsldoi12 RHS, <u,u,4,5>
+ 1705433475U, // <7,u,u,5>: Cost 2 vsldoi12 RHS, <u,u,5,6>
+ 631691661U, // <7,u,u,6>: Cost 1 vsldoi12 RHS, RHS
+ 430361910U, // <7,u,u,7>: Cost 1 vspltisw3 RHS
+ 631691675U, // <7,u,u,u>: Cost 1 vsldoi12 RHS, LHS
+ 202162278U, // <u,0,0,0>: Cost 1 vspltisw0 LHS
+ 1678598154U, // <u,0,0,1>: Cost 2 vsldoi12 LHS, <0,0,1,1>
+ 2634500154U, // <u,0,0,2>: Cost 3 vsldoi4 <2,u,0,0>, <2,u,0,0>
+ 2289596269U, // <u,0,0,3>: Cost 3 vmrglw <1,2,u,0>, <u,2,0,3>
+ 1548815670U, // <u,0,0,4>: Cost 2 vsldoi4 <0,u,0,0>, RHS
+ 2663698530U, // <u,0,0,5>: Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0>
+ 2658390942U, // <u,0,0,6>: Cost 3 vsldoi4 <6,u,0,0>, <6,u,0,0>
+ 2289596597U, // <u,0,0,7>: Cost 3 vmrglw <1,2,u,0>, <u,6,0,7>
+ 202162278U, // <u,0,0,u>: Cost 1 vspltisw0 LHS
+ 1560764518U, // <u,0,1,0>: Cost 2 vsldoi4 <2,u,0,1>, LHS
+ 115720294U, // <u,0,1,1>: Cost 1 vmrghw LHS, LHS
+ 604856427U, // <u,0,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2634508438U, // <u,0,1,3>: Cost 3 vsldoi4 <2,u,0,1>, <3,0,1,2>
+ 1560767798U, // <u,0,1,4>: Cost 2 vsldoi4 <2,u,0,1>, RHS
+ 2652426438U, // <u,0,1,5>: Cost 3 vsldoi4 <5,u,0,1>, <5,u,0,1>
+ 1584657311U, // <u,0,1,6>: Cost 2 vsldoi4 <6,u,0,1>, <6,u,0,1>
+ 2658399226U, // <u,0,1,7>: Cost 3 vsldoi4 <6,u,0,1>, <7,0,1,2>
+ 604856476U, // <u,0,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696889850U, // <u,0,2,0>: Cost 3 vsldoi8 <2,0,u,0>, <2,0,u,0>
+ 1190174822U, // <u,0,2,1>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 2692245096U, // <u,0,2,2>: Cost 3 vsldoi8 <1,2,u,0>, <2,2,2,2>
+ 2692245158U, // <u,0,2,3>: Cost 3 vsldoi8 <1,2,u,0>, <2,3,0,1>
+ 2263916882U, // <u,0,2,4>: Cost 3 vmrghw <u,2,3,0>, <0,4,1,5>
+ 2299709908U, // <u,0,2,5>: Cost 3 vmrglw <3,0,1,2>, <3,4,0,5>
+ 2692245434U, // <u,0,2,6>: Cost 3 vsldoi8 <1,2,u,0>, <2,6,3,7>
+ 2701535281U, // <u,0,2,7>: Cost 3 vsldoi8 <2,7,u,0>, <2,7,u,0>
+ 1190175389U, // <u,0,2,u>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 1209237504U, // <u,0,3,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1209239206U, // <u,0,3,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 2704189813U, // <u,0,3,2>: Cost 3 vsldoi8 <3,2,u,0>, <3,2,u,0>
+ 2692245916U, // <u,0,3,3>: Cost 3 vsldoi8 <1,2,u,0>, <3,3,3,3>
+ 2282981033U, // <u,0,3,4>: Cost 3 vmrglw LHS, <2,3,0,4>
+ 2664386658U, // <u,0,3,5>: Cost 3 vsldoi4 <7,u,0,3>, <5,6,7,0>
+ 2691877496U, // <u,0,3,6>: Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7>
+ 2664388218U, // <u,0,3,7>: Cost 3 vsldoi4 <7,u,0,3>, <7,u,0,3>
+ 1209239213U, // <u,0,3,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 2289623040U, // <u,0,4,0>: Cost 3 vmrglw <1,2,u,4>, <0,0,0,0>
+ 1678598482U, // <u,0,4,1>: Cost 2 vsldoi12 LHS, <0,4,1,5>
+ 2634532926U, // <u,0,4,2>: Cost 3 vsldoi4 <2,u,0,4>, <2,u,0,4>
+ 2235580672U, // <u,0,4,3>: Cost 3 vmrghw <3,4,5,6>, <0,3,1,4>
+ 1143619922U, // <u,0,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1618505014U, // <u,0,4,5>: Cost 2 vsldoi8 <1,2,u,0>, RHS
+ 2658423714U, // <u,0,4,6>: Cost 3 vsldoi4 <6,u,0,4>, <6,u,0,4>
+ 2713259464U, // <u,0,4,7>: Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0>
+ 1683243409U, // <u,0,4,u>: Cost 2 vsldoi12 LHS, <0,4,u,5>
+ 1192443904U, // <u,0,5,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 118702182U, // <u,0,5,1>: Cost 1 vmrghw RHS, LHS
+ 2266185901U, // <u,0,5,2>: Cost 3 vmrghw RHS, <0,2,1,2>
+ 2640513816U, // <u,0,5,3>: Cost 3 vsldoi4 <3,u,0,5>, <3,u,0,5>
+ 1192444242U, // <u,0,5,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2718789636U, // <u,0,5,5>: Cost 3 vsldoi8 <5,6,u,0>, <5,5,5,5>
+ 1645047915U, // <u,0,5,6>: Cost 2 vsldoi8 <5,6,u,0>, <5,6,u,0>
+ 2664404604U, // <u,0,5,7>: Cost 3 vsldoi4 <7,u,0,5>, <7,u,0,5>
+ 118702749U, // <u,0,5,u>: Cost 1 vmrghw RHS, LHS
+ 2302910464U, // <u,0,6,0>: Cost 3 vmrglw <3,4,u,6>, <0,0,0,0>
+ 1192886374U, // <u,0,6,1>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 2718790138U, // <u,0,6,2>: Cost 3 vsldoi8 <5,6,u,0>, <6,2,7,3>
+ 2722771537U, // <u,0,6,3>: Cost 3 vsldoi8 <6,3,u,0>, <6,3,u,0>
+ 2266628434U, // <u,0,6,4>: Cost 3 vmrghw <u,6,3,7>, <0,4,1,5>
+ 2248950180U, // <u,0,6,5>: Cost 3 vmrghw <5,6,7,0>, <0,5,1,6>
+ 2718790456U, // <u,0,6,6>: Cost 3 vsldoi8 <5,6,u,0>, <6,6,6,6>
+ 2718790478U, // <u,0,6,7>: Cost 3 vsldoi8 <5,6,u,0>, <6,7,0,1>
+ 1192886941U, // <u,0,6,u>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 1235812352U, // <u,0,7,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235814054U, // <u,0,7,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 2728080601U, // <u,0,7,2>: Cost 3 vsldoi8 <7,2,u,0>, <7,2,u,0>
+ 2640530202U, // <u,0,7,3>: Cost 3 vsldoi4 <3,u,0,7>, <3,u,0,7>
+ 2640530742U, // <u,0,7,4>: Cost 3 vsldoi4 <3,u,0,7>, RHS
+ 2309556692U, // <u,0,7,5>: Cost 3 vmrglw RHS, <3,4,0,5>
+ 2730735133U, // <u,0,7,6>: Cost 3 vsldoi8 <7,6,u,0>, <7,6,u,0>
+ 2309556856U, // <u,0,7,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1235814061U, // <u,0,7,u>: Cost 2 vmrglw RHS, <2,3,0,u>
+ 202162278U, // <u,0,u,0>: Cost 1 vspltisw0 LHS
+ 120365158U, // <u,0,u,1>: Cost 1 vmrghw LHS, LHS
+ 604856989U, // <u,0,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 2692249532U, // <u,0,u,3>: Cost 3 vsldoi8 <1,2,u,0>, <u,3,0,1>
+ 1560825142U, // <u,0,u,4>: Cost 2 vsldoi4 <2,u,0,u>, RHS
+ 1618507930U, // <u,0,u,5>: Cost 2 vsldoi8 <1,2,u,0>, RHS
+ 1584714662U, // <u,0,u,6>: Cost 2 vsldoi4 <6,u,0,u>, <6,u,0,u>
+ 2309565048U, // <u,0,u,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 604857043U, // <u,0,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 1611210825U, // <u,1,0,0>: Cost 2 vsldoi8 <0,0,u,1>, <0,0,u,1>
+ 1616519270U, // <u,1,0,1>: Cost 2 vsldoi8 <0,u,u,1>, LHS
+ 2287605459U, // <u,1,0,2>: Cost 3 vmrglw <0,u,u,0>, <u,0,1,2>
+ 2640546588U, // <u,1,0,3>: Cost 3 vsldoi4 <3,u,1,0>, <3,u,1,0>
+ 2622631222U, // <u,1,0,4>: Cost 3 vsldoi4 <0,u,1,0>, RHS
+ 2289590610U, // <u,1,0,5>: Cost 3 vmrglw <1,2,u,0>, <0,4,1,5>
+ 2664436630U, // <u,1,0,6>: Cost 3 vsldoi4 <7,u,1,0>, <6,7,u,1>
+ 2664437376U, // <u,1,0,7>: Cost 3 vsldoi4 <7,u,1,0>, <7,u,1,0>
+ 1616519889U, // <u,1,0,u>: Cost 2 vsldoi8 <0,u,u,1>, <0,u,u,1>
+ 1548894866U, // <u,1,1,0>: Cost 2 vsldoi4 <0,u,1,1>, <0,u,1,1>
+ 269271142U, // <u,1,1,1>: Cost 1 vspltisw1 LHS
+ 1189462934U, // <u,1,1,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 2622638230U, // <u,1,1,3>: Cost 3 vsldoi4 <0,u,1,1>, <3,0,1,2>
+ 1548897590U, // <u,1,1,4>: Cost 2 vsldoi4 <0,u,1,1>, RHS
+ 2756985692U, // <u,1,1,5>: Cost 3 vsldoi12 LHS, <1,1,5,5>
+ 2658472872U, // <u,1,1,6>: Cost 3 vsldoi4 <6,u,1,1>, <6,u,1,1>
+ 2287614142U, // <u,1,1,7>: Cost 3 vmrglw <0,u,u,1>, <u,6,1,7>
+ 269271142U, // <u,1,1,u>: Cost 1 vspltisw1 LHS
+ 1566818406U, // <u,1,2,0>: Cost 2 vsldoi4 <3,u,1,2>, LHS
+ 2756985735U, // <u,1,2,1>: Cost 3 vsldoi12 LHS, <1,2,1,3>
+ 1148371862U, // <u,1,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 835584U, // <u,1,2,3>: Cost 0 copy LHS
+ 1566821686U, // <u,1,2,4>: Cost 2 vsldoi4 <3,u,1,2>, RHS
+ 2756985771U, // <u,1,2,5>: Cost 3 vsldoi12 LHS, <1,2,5,3>
+ 2690262970U, // <u,1,2,6>: Cost 3 vsldoi8 <0,u,u,1>, <2,6,3,7>
+ 1590711938U, // <u,1,2,7>: Cost 2 vsldoi4 <7,u,1,2>, <7,u,1,2>
+ 835584U, // <u,1,2,u>: Cost 0 copy LHS
+ 2282979337U, // <u,1,3,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1209237514U, // <u,1,3,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1209239702U, // <u,1,3,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2282979502U, // <u,1,3,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2282979341U, // <u,1,3,4>: Cost 3 vmrglw LHS, <0,0,1,4>
+ 1209237842U, // <u,1,3,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2282979505U, // <u,1,3,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287625423U, // <u,1,3,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1209237521U, // <u,1,3,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 1635101613U, // <u,1,4,0>: Cost 2 vsldoi8 <4,0,u,1>, <4,0,u,1>
+ 2289623050U, // <u,1,4,1>: Cost 3 vmrglw <1,2,u,4>, <0,0,1,1>
+ 2289625238U, // <u,1,4,2>: Cost 3 vmrglw <1,2,u,4>, <3,0,1,2>
+ 2640579360U, // <u,1,4,3>: Cost 3 vsldoi4 <3,u,1,4>, <3,u,1,4>
+ 2622663990U, // <u,1,4,4>: Cost 3 vsldoi4 <0,u,1,4>, RHS
+ 1616522550U, // <u,1,4,5>: Cost 2 vsldoi8 <0,u,u,1>, RHS
+ 2664469398U, // <u,1,4,6>: Cost 3 vsldoi4 <7,u,1,4>, <6,7,u,1>
+ 2664470148U, // <u,1,4,7>: Cost 3 vsldoi4 <7,u,1,4>, <7,u,1,4>
+ 1616522793U, // <u,1,4,u>: Cost 2 vsldoi8 <0,u,u,1>, RHS
+ 1548927638U, // <u,1,5,0>: Cost 2 vsldoi4 <0,u,1,5>, <0,u,1,5>
+ 1192444724U, // <u,1,5,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1192444822U, // <u,1,5,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 2622670998U, // <u,1,5,3>: Cost 3 vsldoi4 <0,u,1,5>, <3,0,1,2>
+ 1548930358U, // <u,1,5,4>: Cost 2 vsldoi4 <0,u,1,5>, RHS
+ 1210728786U, // <u,1,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 2714153058U, // <u,1,5,6>: Cost 3 vsldoi8 <4,u,u,1>, <5,6,7,0>
+ 2670449658U, // <u,1,5,7>: Cost 3 vsldoi4 <u,u,1,5>, <7,0,1,2>
+ 1548932910U, // <u,1,5,u>: Cost 2 vsldoi4 <0,u,1,5>, LHS
+ 2622677655U, // <u,1,6,0>: Cost 3 vsldoi4 <0,u,1,6>, <0,u,1,6>
+ 2756986063U, // <u,1,6,1>: Cost 3 vsldoi12 LHS, <1,6,1,7>
+ 2302912662U, // <u,1,6,2>: Cost 3 vmrglw <3,4,u,6>, <3,0,1,2>
+ 3696421014U, // <u,1,6,3>: Cost 4 vsldoi4 <0,u,1,6>, <3,0,1,2>
+ 2622680374U, // <u,1,6,4>: Cost 3 vsldoi4 <0,u,1,6>, RHS
+ 2756986099U, // <u,1,6,5>: Cost 3 vsldoi12 LHS, <1,6,5,7>
+ 2714153784U, // <u,1,6,6>: Cost 3 vsldoi8 <4,u,u,1>, <6,6,6,6>
+ 1651692438U, // <u,1,6,7>: Cost 2 vsldoi8 <6,7,u,1>, <6,7,u,1>
+ 1652356071U, // <u,1,6,u>: Cost 2 vsldoi8 <6,u,u,1>, <6,u,u,1>
+ 2628657254U, // <u,1,7,0>: Cost 3 vsldoi4 <1,u,1,7>, LHS
+ 1235812362U, // <u,1,7,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235814550U, // <u,1,7,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2309554350U, // <u,1,7,3>: Cost 3 vmrglw RHS, <0,2,1,3>
+ 2628660534U, // <u,1,7,4>: Cost 3 vsldoi4 <1,u,1,7>, RHS
+ 1235812690U, // <u,1,7,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309554353U, // <u,1,7,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309554678U, // <u,1,7,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235812369U, // <u,1,7,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 1548952217U, // <u,1,u,0>: Cost 2 vsldoi4 <0,u,1,u>, <0,u,1,u>
+ 269271142U, // <u,1,u,1>: Cost 1 vspltisw1 LHS
+ 1209280662U, // <u,1,u,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 835584U, // <u,1,u,3>: Cost 0 copy LHS
+ 1548954934U, // <u,1,u,4>: Cost 2 vsldoi4 <0,u,1,u>, RHS
+ 1209278802U, // <u,1,u,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2283020465U, // <u,1,u,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 1590761096U, // <u,1,u,7>: Cost 2 vsldoi4 <7,u,1,u>, <7,u,1,u>
+ 835584U, // <u,1,u,u>: Cost 0 copy LHS
+ 2702876672U, // <u,2,0,0>: Cost 3 vsldoi8 <3,0,u,2>, <0,0,0,0>
+ 1629134950U, // <u,2,0,1>: Cost 2 vsldoi8 <3,0,u,2>, LHS
+ 2289591912U, // <u,2,0,2>: Cost 3 vmrglw <1,2,u,0>, <2,2,2,2>
+ 1215848550U, // <u,2,0,3>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 2702877010U, // <u,2,0,4>: Cost 3 vsldoi8 <3,0,u,2>, <0,4,1,5>
+ 2289222708U, // <u,2,0,5>: Cost 3 vmrglw <1,2,3,0>, <1,4,2,5>
+ 2779178473U, // <u,2,0,6>: Cost 3 vsldoi12 RHS, <2,0,6,1>
+ 2726249024U, // <u,2,0,7>: Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0>
+ 1215848555U, // <u,2,0,u>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 2690933539U, // <u,2,1,0>: Cost 3 vsldoi8 <1,0,u,2>, <1,0,u,2>
+ 2628683124U, // <u,2,1,1>: Cost 3 vsldoi4 <1,u,2,1>, <1,u,2,1>
+ 1189463656U, // <u,2,1,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1213866086U, // <u,2,1,3>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 2628685110U, // <u,2,1,4>: Cost 3 vsldoi4 <1,u,2,1>, RHS
+ 2263205736U, // <u,2,1,5>: Cost 3 vmrghw LHS, <2,5,3,6>
+ 1189463994U, // <u,2,1,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2263205866U, // <u,2,1,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1213866091U, // <u,2,1,u>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 1556938854U, // <u,2,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 2697569869U, // <u,2,2,1>: Cost 3 vsldoi8 <2,1,u,2>, <2,1,u,2>
+ 336380006U, // <u,2,2,2>: Cost 1 vspltisw2 LHS
+ 1678599794U, // <u,2,2,3>: Cost 2 vsldoi12 LHS, <2,2,3,3>
+ 1556942134U, // <u,2,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 2295138061U, // <u,2,2,5>: Cost 3 vmrglw <2,2,2,2>, <2,4,2,5>
+ 2702878650U, // <u,2,2,6>: Cost 3 vsldoi8 <3,0,u,2>, <2,6,3,7>
+ 2300229831U, // <u,2,2,7>: Cost 3 vmrglw <3,0,u,2>, <u,6,2,7>
+ 336380006U, // <u,2,2,u>: Cost 1 vspltisw2 LHS
+ 475243165U, // <u,2,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1548985140U, // <u,2,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1209239144U, // <u,2,3,2>: Cost 2 vmrglw LHS, <2,2,2,2>
+ 135495782U, // <u,2,3,3>: Cost 1 vmrglw LHS, LHS
+ 475245878U, // <u,2,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1596764164U, // <u,2,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1596764666U, // <u,2,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1596765178U, // <u,2,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 135495787U, // <u,2,3,u>: Cost 1 vmrglw LHS, LHS
+ 2708851630U, // <u,2,4,0>: Cost 3 vsldoi8 <4,0,u,2>, <4,0,u,2>
+ 2217362979U, // <u,2,4,1>: Cost 3 vmrghw <0,4,1,5>, <2,1,3,5>
+ 2289624680U, // <u,2,4,2>: Cost 3 vmrglw <1,2,u,4>, <2,2,2,2>
+ 1215881318U, // <u,2,4,3>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 2726767824U, // <u,2,4,4>: Cost 3 vsldoi8 <7,0,u,2>, <4,4,4,4>
+ 1629138230U, // <u,2,4,5>: Cost 2 vsldoi8 <3,0,u,2>, RHS
+ 2779178801U, // <u,2,4,6>: Cost 3 vsldoi12 RHS, <2,4,6,5>
+ 2726251976U, // <u,2,4,7>: Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0>
+ 1215881323U, // <u,2,4,u>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 2628714598U, // <u,2,5,0>: Cost 3 vsldoi4 <1,u,2,5>, LHS
+ 2628715896U, // <u,2,5,1>: Cost 3 vsldoi4 <1,u,2,5>, <1,u,2,5>
+ 1192445544U, // <u,2,5,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1213898854U, // <u,2,5,3>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 2628717878U, // <u,2,5,4>: Cost 3 vsldoi4 <1,u,2,5>, RHS
+ 2726768644U, // <u,2,5,5>: Cost 3 vsldoi8 <7,0,u,2>, <5,5,5,5>
+ 1192445882U, // <u,2,5,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2266187754U, // <u,2,5,7>: Cost 3 vmrghw RHS, <2,7,0,1>
+ 1213898859U, // <u,2,5,u>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 2634694758U, // <u,2,6,0>: Cost 3 vsldoi4 <2,u,2,6>, LHS
+ 2721460657U, // <u,2,6,1>: Cost 3 vsldoi8 <6,1,u,2>, <6,1,u,2>
+ 2296940136U, // <u,2,6,2>: Cost 3 vmrglw <2,4,u,6>, <2,2,2,2>
+ 1678600122U, // <u,2,6,3>: Cost 2 vsldoi12 LHS, <2,6,3,7>
+ 2634698038U, // <u,2,6,4>: Cost 3 vsldoi4 <2,u,2,6>, RHS
+ 3370682125U, // <u,2,6,5>: Cost 4 vmrglw <2,4,u,6>, <2,4,2,5>
+ 1157056442U, // <u,2,6,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2725442455U, // <u,2,6,7>: Cost 3 vsldoi8 <6,7,u,2>, <6,7,u,2>
+ 1678600167U, // <u,2,6,u>: Cost 2 vsldoi12 LHS, <2,6,u,7>
+ 1653027897U, // <u,2,7,0>: Cost 2 vsldoi8 <7,0,u,2>, <7,0,u,2>
+ 2309554924U, // <u,2,7,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235813992U, // <u,2,7,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 162070630U, // <u,2,7,3>: Cost 1 vmrglw RHS, LHS
+ 2634706230U, // <u,2,7,4>: Cost 3 vsldoi4 <2,u,2,7>, RHS
+ 2309555252U, // <u,2,7,5>: Cost 3 vmrglw RHS, <1,4,2,5>
+ 2309555901U, // <u,2,7,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309555416U, // <u,2,7,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 162070635U, // <u,2,7,u>: Cost 1 vmrglw RHS, LHS
+ 475284130U, // <u,2,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 1549026100U, // <u,2,u,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 336380006U, // <u,2,u,2>: Cost 1 vspltisw2 LHS
+ 135536742U, // <u,2,u,3>: Cost 1 vmrglw LHS, LHS
+ 475286838U, // <u,2,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1629141146U, // <u,2,u,5>: Cost 2 vsldoi8 <3,0,u,2>, RHS
+ 1194108858U, // <u,2,u,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 1596806138U, // <u,2,u,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 135536747U, // <u,2,u,u>: Cost 1 vmrglw LHS, LHS
+ 1611890688U, // <u,3,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 538149020U, // <u,3,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685632685U, // <u,3,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2685632764U, // <u,3,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611891026U, // <u,3,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2733408722U, // <u,3,0,5>: Cost 3 vsldoi8 LHS, <0,5,6,7>
+ 2658612153U, // <u,3,0,6>: Cost 3 vsldoi4 <6,u,3,0>, <6,u,3,0>
+ 2289592250U, // <u,3,0,7>: Cost 3 vmrglw <1,2,u,0>, <2,6,3,7>
+ 538149533U, // <u,3,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 1189464214U, // <u,3,1,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 1611891508U, // <u,3,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611891606U, // <u,3,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 1189464476U, // <u,3,1,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1189464578U, // <u,3,1,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2690278511U, // <u,3,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2690278607U, // <u,3,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 2287609786U, // <u,3,1,7>: Cost 3 vmrglw <0,u,u,1>, <2,6,3,7>
+ 1611892092U, // <u,3,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 2685634042U, // <u,3,2,0>: Cost 3 vsldoi8 LHS, <2,0,u,0>
+ 2685634079U, // <u,3,2,1>: Cost 3 vsldoi8 LHS, <2,1,3,1>
+ 1611892328U, // <u,3,2,2>: Cost 2 vsldoi8 LHS, <2,2,2,2>
+ 1611892390U, // <u,3,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 2685634371U, // <u,3,2,4>: Cost 3 vsldoi8 LHS, <2,4,u,5>
+ 2685634453U, // <u,3,2,5>: Cost 3 vsldoi8 LHS, <2,5,u,6>
+ 1611892666U, // <u,3,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 2300225466U, // <u,3,2,7>: Cost 3 vmrglw <3,0,u,2>, <2,6,3,7>
+ 1611892795U, // <u,3,2,u>: Cost 2 vsldoi8 LHS, <2,u,0,1>
+ 1209238422U, // <u,3,3,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 2282980247U, // <u,3,3,1>: Cost 3 vmrglw LHS, <1,2,3,1>
+ 1561004120U, // <u,3,3,2>: Cost 2 vsldoi4 <2,u,3,3>, <2,u,3,3>
+ 403488870U, // <u,3,3,3>: Cost 1 vspltisw3 LHS
+ 1209238426U, // <u,3,3,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 2282980899U, // <u,3,3,5>: Cost 3 vmrglw LHS, <2,1,3,5>
+ 2282985598U, // <u,3,3,6>: Cost 3 vmrglw LHS, <u,5,3,6>
+ 1209239482U, // <u,3,3,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 403488870U, // <u,3,3,u>: Cost 1 vspltisw3 LHS
+ 1555038310U, // <u,3,4,0>: Cost 2 vsldoi4 <1,u,3,4>, LHS
+ 1555039616U, // <u,3,4,1>: Cost 2 vsldoi4 <1,u,3,4>, <1,u,3,4>
+ 2628781672U, // <u,3,4,2>: Cost 3 vsldoi4 <1,u,3,4>, <2,2,2,2>
+ 2289624690U, // <u,3,4,3>: Cost 3 vmrglw <1,2,u,4>, <2,2,3,3>
+ 1555041590U, // <u,3,4,4>: Cost 2 vsldoi4 <1,u,3,4>, RHS
+ 538152246U, // <u,3,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2658644925U, // <u,3,4,6>: Cost 3 vsldoi4 <6,u,3,4>, <6,u,3,4>
+ 2289625018U, // <u,3,4,7>: Cost 3 vmrglw <1,2,u,4>, <2,6,3,7>
+ 538152489U, // <u,3,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 1192446102U, // <u,3,5,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2733411983U, // <u,3,5,1>: Cost 3 vsldoi8 LHS, <5,1,0,1>
+ 2634762330U, // <u,3,5,2>: Cost 3 vsldoi4 <2,u,3,5>, <2,u,3,5>
+ 1192446364U, // <u,3,5,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1192446466U, // <u,3,5,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 1659670532U, // <u,3,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1659670626U, // <u,3,5,6>: Cost 2 vsldoi8 LHS, <5,6,7,0>
+ 2287642554U, // <u,3,5,7>: Cost 3 vmrglw <0,u,u,5>, <2,6,3,7>
+ 1659670788U, // <u,3,5,u>: Cost 2 vsldoi8 LHS, <5,u,7,0>
+ 2634768486U, // <u,3,6,0>: Cost 3 vsldoi4 <2,u,3,6>, LHS
+ 2733412775U, // <u,3,6,1>: Cost 3 vsldoi8 LHS, <6,1,7,1>
+ 1648390659U, // <u,3,6,2>: Cost 2 vsldoi8 <6,2,u,3>, <6,2,u,3>
+ 2634770973U, // <u,3,6,3>: Cost 3 vsldoi4 <2,u,3,6>, <3,4,u,6>
+ 2634771766U, // <u,3,6,4>: Cost 3 vsldoi4 <2,u,3,6>, RHS
+ 2733413099U, // <u,3,6,5>: Cost 3 vsldoi8 LHS, <6,5,7,1>
+ 1659671352U, // <u,3,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659671374U, // <u,3,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1652372457U, // <u,3,6,u>: Cost 2 vsldoi8 <6,u,u,3>, <6,u,u,3>
+ 1561034854U, // <u,3,7,0>: Cost 2 vsldoi4 <2,u,3,7>, LHS
+ 2634777396U, // <u,3,7,1>: Cost 3 vsldoi4 <2,u,3,7>, <1,1,1,1>
+ 1561036892U, // <u,3,7,2>: Cost 2 vsldoi4 <2,u,3,7>, <2,u,3,7>
+ 1235814002U, // <u,3,7,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1561038134U, // <u,3,7,4>: Cost 2 vsldoi4 <2,u,3,7>, RHS
+ 2309555747U, // <u,3,7,5>: Cost 3 vmrglw RHS, <2,1,3,5>
+ 2309556072U, // <u,3,7,6>: Cost 3 vmrglw RHS, <2,5,3,6>
+ 1235814330U, // <u,3,7,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1561040686U, // <u,3,7,u>: Cost 2 vsldoi4 <2,u,3,7>, LHS
+ 1611896531U, // <u,3,u,0>: Cost 2 vsldoi8 LHS, <u,0,1,2>
+ 538154798U, // <u,3,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 1611896712U, // <u,3,u,2>: Cost 2 vsldoi8 LHS, <u,2,3,3>
+ 403488870U, // <u,3,u,3>: Cost 1 vspltisw3 LHS
+ 1611896895U, // <u,3,u,4>: Cost 2 vsldoi8 LHS, <u,4,5,6>
+ 538155162U, // <u,3,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1611897040U, // <u,3,u,6>: Cost 2 vsldoi8 LHS, <u,6,3,7>
+ 1209280442U, // <u,3,u,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 538155365U, // <u,3,u,u>: Cost 1 vsldoi8 LHS, LHS
+ 1165118354U, // <u,4,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1618534502U, // <u,4,0,1>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 2634795102U, // <u,4,0,2>: Cost 3 vsldoi4 <2,u,4,0>, <2,u,4,0>
+ 2686451968U, // <u,4,0,3>: Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4>
+ 2692276562U, // <u,4,0,4>: Cost 3 vsldoi8 <1,2,u,4>, <0,4,1,5>
+ 1705438098U, // <u,4,0,5>: Cost 2 vsldoi12 RHS, <4,0,5,1>
+ 2658685890U, // <u,4,0,6>: Cost 3 vsldoi4 <6,u,4,0>, <6,u,4,0>
+ 2256489928U, // <u,4,0,7>: Cost 3 vmrghw <7,0,1,2>, <4,7,5,0>
+ 1618535069U, // <u,4,0,u>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 1189464978U, // <u,4,1,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2692277044U, // <u,4,1,1>: Cost 3 vsldoi8 <1,2,u,4>, <1,1,1,1>
+ 1618535367U, // <u,4,1,2>: Cost 2 vsldoi8 <1,2,u,4>, <1,2,u,4>
+ 2640775992U, // <u,4,1,3>: Cost 3 vsldoi4 <3,u,4,1>, <3,u,4,1>
+ 1189465296U, // <u,4,1,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 115723574U, // <u,4,1,5>: Cost 1 vmrghw LHS, RHS
+ 2263207289U, // <u,4,1,6>: Cost 3 vmrghw LHS, <4,6,5,2>
+ 2664666780U, // <u,4,1,7>: Cost 3 vsldoi4 <7,u,4,1>, <7,u,4,1>
+ 115723817U, // <u,4,1,u>: Cost 1 vmrghw LHS, RHS
+ 2263919506U, // <u,4,2,0>: Cost 3 vmrghw <u,2,3,0>, <4,0,5,1>
+ 2222115812U, // <u,4,2,1>: Cost 3 vmrghw <1,2,3,0>, <4,1,5,2>
+ 2692277864U, // <u,4,2,2>: Cost 3 vsldoi8 <1,2,u,4>, <2,2,2,2>
+ 2692277926U, // <u,4,2,3>: Cost 3 vsldoi8 <1,2,u,4>, <2,3,0,1>
+ 2324114640U, // <u,4,2,4>: Cost 3 vmrglw <7,0,u,2>, <4,4,4,4>
+ 1190178102U, // <u,4,2,5>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 2692278202U, // <u,4,2,6>: Cost 3 vsldoi8 <1,2,u,4>, <2,6,3,7>
+ 2701568053U, // <u,4,2,7>: Cost 3 vsldoi8 <2,7,u,4>, <2,7,u,4>
+ 1190178345U, // <u,4,2,u>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 2692278422U, // <u,4,3,0>: Cost 3 vsldoi8 <1,2,u,4>, <3,0,1,2>
+ 2282981552U, // <u,4,3,1>: Cost 3 vmrglw LHS, <3,0,4,1>
+ 2704222585U, // <u,4,3,2>: Cost 3 vsldoi8 <3,2,u,4>, <3,2,u,4>
+ 2692278684U, // <u,4,3,3>: Cost 3 vsldoi8 <1,2,u,4>, <3,3,3,3>
+ 1257016528U, // <u,4,3,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1209239246U, // <u,4,3,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 2691910300U, // <u,4,3,6>: Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7>
+ 2664683166U, // <u,4,3,7>: Cost 3 vsldoi4 <7,u,4,3>, <7,u,4,3>
+ 1209239249U, // <u,4,3,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 1573027942U, // <u,4,4,0>: Cost 2 vsldoi4 <4,u,4,4>, LHS
+ 2634826695U, // <u,4,4,1>: Cost 3 vsldoi4 <2,u,4,4>, <1,2,u,4>
+ 2634827874U, // <u,4,4,2>: Cost 3 vsldoi4 <2,u,4,4>, <2,u,4,4>
+ 2289629073U, // <u,4,4,3>: Cost 3 vmrglw <1,2,u,4>, <u,2,4,3>
+ 229035318U, // <u,4,4,4>: Cost 1 vspltisw0 RHS
+ 1618537782U, // <u,4,4,5>: Cost 2 vsldoi8 <1,2,u,4>, RHS
+ 2658718662U, // <u,4,4,6>: Cost 3 vsldoi4 <6,u,4,4>, <6,u,4,4>
+ 2289629401U, // <u,4,4,7>: Cost 3 vmrglw <1,2,u,4>, <u,6,4,7>
+ 229035318U, // <u,4,4,u>: Cost 1 vspltisw0 RHS
+ 1561092198U, // <u,4,5,0>: Cost 2 vsldoi4 <2,u,4,5>, LHS
+ 2628863370U, // <u,4,5,1>: Cost 3 vsldoi4 <1,u,4,5>, <1,u,4,5>
+ 1561094243U, // <u,4,5,2>: Cost 2 vsldoi4 <2,u,4,5>, <2,u,4,5>
+ 2634836118U, // <u,4,5,3>: Cost 3 vsldoi4 <2,u,4,5>, <3,0,1,2>
+ 1561095478U, // <u,4,5,4>: Cost 2 vsldoi4 <2,u,4,5>, RHS
+ 118705462U, // <u,4,5,5>: Cost 1 vmrghw RHS, RHS
+ 604859702U, // <u,4,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2658726906U, // <u,4,5,7>: Cost 3 vsldoi4 <6,u,4,5>, <7,0,1,2>
+ 604859720U, // <u,4,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 2266631058U, // <u,4,6,0>: Cost 3 vmrghw <u,6,3,7>, <4,0,5,1>
+ 2302692152U, // <u,4,6,1>: Cost 3 vmrglw <3,4,5,6>, <3,u,4,1>
+ 2718822906U, // <u,4,6,2>: Cost 3 vsldoi8 <5,6,u,4>, <6,2,7,3>
+ 2722804309U, // <u,4,6,3>: Cost 3 vsldoi8 <6,3,u,4>, <6,3,u,4>
+ 2723467942U, // <u,4,6,4>: Cost 3 vsldoi8 <6,4,u,4>, <6,4,u,4>
+ 1192889654U, // <u,4,6,5>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 2718823224U, // <u,4,6,6>: Cost 3 vsldoi8 <5,6,u,4>, <6,6,6,6>
+ 2718823246U, // <u,4,6,7>: Cost 3 vsldoi8 <5,6,u,4>, <6,7,0,1>
+ 1192889897U, // <u,4,6,u>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 2640822374U, // <u,4,7,0>: Cost 3 vsldoi4 <3,u,4,7>, LHS
+ 2640823194U, // <u,4,7,1>: Cost 3 vsldoi4 <3,u,4,7>, <1,2,3,4>
+ 2728113373U, // <u,4,7,2>: Cost 3 vsldoi8 <7,2,u,4>, <7,2,u,4>
+ 2640825150U, // <u,4,7,3>: Cost 3 vsldoi4 <3,u,4,7>, <3,u,4,7>
+ 1235815632U, // <u,4,7,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235814094U, // <u,4,7,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 2730767905U, // <u,4,7,6>: Cost 3 vsldoi8 <7,6,u,4>, <7,6,u,4>
+ 2309556892U, // <u,4,7,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1235814097U, // <u,4,7,u>: Cost 2 vmrglw RHS, <2,3,4,u>
+ 1561116774U, // <u,4,u,0>: Cost 2 vsldoi4 <2,u,4,u>, LHS
+ 1618540334U, // <u,4,u,1>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 1561118822U, // <u,4,u,2>: Cost 2 vsldoi4 <2,u,4,u>, <2,u,4,u>
+ 2692282300U, // <u,4,u,3>: Cost 3 vsldoi8 <1,2,u,4>, <u,3,0,1>
+ 229035318U, // <u,4,u,4>: Cost 1 vspltisw0 RHS
+ 120368438U, // <u,4,u,5>: Cost 1 vmrghw LHS, RHS
+ 604859945U, // <u,4,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 2309565084U, // <u,4,u,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 604859963U, // <u,4,u,u>: Cost 1 vsldoi12 LHS, RHS
+ 2690293760U, // <u,5,0,0>: Cost 3 vsldoi8 <0,u,u,5>, <0,0,0,0>
+ 1616552038U, // <u,5,0,1>: Cost 2 vsldoi8 <0,u,u,5>, LHS
+ 2640840434U, // <u,5,0,2>: Cost 3 vsldoi4 <3,u,5,0>, <2,3,u,5>
+ 2640841536U, // <u,5,0,3>: Cost 3 vsldoi4 <3,u,5,0>, <3,u,5,0>
+ 1613381970U, // <u,5,0,4>: Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5>
+ 2316135642U, // <u,5,0,5>: Cost 3 vmrglw <5,6,u,0>, <4,4,5,5>
+ 2289592834U, // <u,5,0,6>: Cost 3 vmrglw <1,2,u,0>, <3,4,5,6>
+ 2664732324U, // <u,5,0,7>: Cost 3 vsldoi4 <7,u,5,0>, <7,u,5,0>
+ 1616552661U, // <u,5,0,u>: Cost 2 vsldoi8 <0,u,u,5>, <0,u,u,5>
+ 1573077094U, // <u,5,1,0>: Cost 2 vsldoi4 <4,u,5,1>, LHS
+ 1237536282U, // <u,5,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 2690294678U, // <u,5,1,2>: Cost 3 vsldoi8 <0,u,u,5>, <1,2,3,0>
+ 2646821014U, // <u,5,1,3>: Cost 3 vsldoi4 <4,u,5,1>, <3,0,1,2>
+ 1573080602U, // <u,5,1,4>: Cost 2 vsldoi4 <4,u,5,1>, <4,u,5,1>
+ 1189466116U, // <u,5,1,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1189466210U, // <u,5,1,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 2646823930U, // <u,5,1,7>: Cost 3 vsldoi4 <4,u,5,1>, <7,0,1,2>
+ 1573082926U, // <u,5,1,u>: Cost 2 vsldoi4 <4,u,5,1>, LHS
+ 2640855142U, // <u,5,2,0>: Cost 3 vsldoi4 <3,u,5,2>, LHS
+ 2697594448U, // <u,5,2,1>: Cost 3 vsldoi8 <2,1,u,5>, <2,1,u,5>
+ 2690295400U, // <u,5,2,2>: Cost 3 vsldoi8 <0,u,u,5>, <2,2,2,2>
+ 1625179890U, // <u,5,2,3>: Cost 2 vsldoi8 <2,3,u,5>, <2,3,u,5>
+ 2699585347U, // <u,5,2,4>: Cost 3 vsldoi8 <2,4,u,5>, <2,4,u,5>
+ 2781171471U, // <u,5,2,5>: Cost 3 vsldoi12 RHS, <5,2,5,3>
+ 2690295738U, // <u,5,2,6>: Cost 3 vsldoi8 <0,u,u,5>, <2,6,3,7>
+ 3775318070U, // <u,5,2,7>: Cost 4 vsldoi8 <2,7,u,5>, <2,7,u,5>
+ 1628498055U, // <u,5,2,u>: Cost 2 vsldoi8 <2,u,u,5>, <2,u,u,5>
+ 2287627234U, // <u,5,3,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1257016210U, // <u,5,3,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2646836942U, // <u,5,3,2>: Cost 3 vsldoi4 <4,u,5,3>, <2,3,4,5>
+ 2287625131U, // <u,5,3,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287627238U, // <u,5,3,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1257016538U, // <u,5,3,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1209240066U, // <u,5,3,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287625459U, // <u,5,3,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1209240068U, // <u,5,3,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 2640871526U, // <u,5,4,0>: Cost 3 vsldoi4 <3,u,5,4>, LHS
+ 2316168082U, // <u,5,4,1>: Cost 3 vmrglw <5,6,u,4>, <4,0,5,1>
+ 2640873202U, // <u,5,4,2>: Cost 3 vsldoi4 <3,u,5,4>, <2,3,u,5>
+ 2640874308U, // <u,5,4,3>: Cost 3 vsldoi4 <3,u,5,4>, <3,u,5,4>
+ 1637788917U, // <u,5,4,4>: Cost 2 vsldoi8 <4,4,u,5>, <4,4,u,5>
+ 1616555318U, // <u,5,4,5>: Cost 2 vsldoi8 <0,u,u,5>, RHS
+ 2287638591U, // <u,5,4,6>: Cost 3 vmrglw <0,u,u,4>, <u,4,5,6>
+ 2664765096U, // <u,5,4,7>: Cost 3 vsldoi4 <7,u,5,4>, <7,u,5,4>
+ 1616555561U, // <u,5,4,u>: Cost 2 vsldoi8 <0,u,u,5>, RHS
+ 1573109862U, // <u,5,5,0>: Cost 2 vsldoi4 <4,u,5,5>, LHS
+ 2646852404U, // <u,5,5,1>: Cost 3 vsldoi4 <4,u,5,5>, <1,1,1,1>
+ 2646853224U, // <u,5,5,2>: Cost 3 vsldoi4 <4,u,5,5>, <2,2,2,2>
+ 2287646618U, // <u,5,5,3>: Cost 3 vmrglw <0,u,u,5>, <u,2,5,3>
+ 1573113374U, // <u,5,5,4>: Cost 2 vsldoi4 <4,u,5,5>, <4,u,5,5>
+ 296144182U, // <u,5,5,5>: Cost 1 vspltisw1 RHS
+ 1192448098U, // <u,5,5,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 2287646946U, // <u,5,5,7>: Cost 3 vmrglw <0,u,u,5>, <u,6,5,7>
+ 296144182U, // <u,5,5,u>: Cost 1 vspltisw1 RHS
+ 1567146086U, // <u,5,6,0>: Cost 2 vsldoi4 <3,u,5,6>, LHS
+ 2628945300U, // <u,5,6,1>: Cost 3 vsldoi4 <1,u,5,6>, <1,u,5,6>
+ 2634917997U, // <u,5,6,2>: Cost 3 vsldoi4 <2,u,5,6>, <2,u,5,6>
+ 1567148870U, // <u,5,6,3>: Cost 2 vsldoi4 <3,u,5,6>, <3,u,5,6>
+ 1567149366U, // <u,5,6,4>: Cost 2 vsldoi4 <3,u,5,6>, RHS
+ 2781171799U, // <u,5,6,5>: Cost 3 vsldoi12 RHS, <5,6,5,7>
+ 1228950018U, // <u,5,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 27705344U, // <u,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,6,u>: Cost 0 copy RHS
+ 2628952166U, // <u,5,7,0>: Cost 3 vsldoi4 <1,u,5,7>, LHS
+ 1235815314U, // <u,5,7,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309556734U, // <u,5,7,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309555115U, // <u,5,7,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2628955446U, // <u,5,7,4>: Cost 3 vsldoi4 <1,u,5,7>, RHS
+ 1235815642U, // <u,5,7,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235814914U, // <u,5,7,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309555443U, // <u,5,7,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235814916U, // <u,5,7,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 1567162470U, // <u,5,u,0>: Cost 2 vsldoi4 <3,u,5,u>, LHS
+ 1616557870U, // <u,5,u,1>: Cost 2 vsldoi8 <0,u,u,5>, LHS
+ 2690299781U, // <u,5,u,2>: Cost 3 vsldoi8 <0,u,u,5>, <u,2,3,0>
+ 1567165256U, // <u,5,u,3>: Cost 2 vsldoi4 <3,u,5,u>, <3,u,5,u>
+ 1567165750U, // <u,5,u,4>: Cost 2 vsldoi4 <3,u,5,u>, RHS
+ 296144182U, // <u,5,u,5>: Cost 1 vspltisw1 RHS
+ 1209281026U, // <u,5,u,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 27705344U, // <u,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,u,u>: Cost 0 copy RHS
+ 2705563648U, // <u,6,0,0>: Cost 3 vsldoi8 <3,4,u,6>, <0,0,0,0>
+ 1631821926U, // <u,6,0,1>: Cost 2 vsldoi8 <3,4,u,6>, LHS
+ 2262462970U, // <u,6,0,2>: Cost 3 vmrghw <u,0,1,2>, <6,2,7,3>
+ 2646886941U, // <u,6,0,3>: Cost 3 vsldoi4 <4,u,6,0>, <3,4,u,6>
+ 2705563986U, // <u,6,0,4>: Cost 3 vsldoi8 <3,4,u,6>, <0,4,1,5>
+ 2316062652U, // <u,6,0,5>: Cost 3 vmrglw <5,6,7,0>, <5,4,6,5>
+ 2316137272U, // <u,6,0,6>: Cost 3 vmrglw <5,6,u,0>, <6,6,6,6>
+ 1215851830U, // <u,6,0,7>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 1215851831U, // <u,6,0,u>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 2634948710U, // <u,6,1,0>: Cost 3 vsldoi4 <2,u,6,1>, LHS
+ 2705564468U, // <u,6,1,1>: Cost 3 vsldoi8 <3,4,u,6>, <1,1,1,1>
+ 1189466618U, // <u,6,1,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2263208498U, // <u,6,1,3>: Cost 3 vmrghw LHS, <6,3,4,5>
+ 2693620843U, // <u,6,1,4>: Cost 3 vsldoi8 <1,4,u,6>, <1,4,u,6>
+ 2652868860U, // <u,6,1,5>: Cost 3 vsldoi4 <5,u,6,1>, <5,u,6,1>
+ 1189466936U, // <u,6,1,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1213869366U, // <u,6,1,7>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 1213869367U, // <u,6,1,u>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 2658844774U, // <u,6,2,0>: Cost 3 vsldoi4 <6,u,6,2>, LHS
+ 3771344465U, // <u,6,2,1>: Cost 4 vsldoi8 <2,1,u,6>, <2,1,u,6>
+ 1178554874U, // <u,6,2,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2698929907U, // <u,6,2,3>: Cost 3 vsldoi8 <2,3,u,6>, <2,3,u,6>
+ 2699593540U, // <u,6,2,4>: Cost 3 vsldoi8 <2,4,u,6>, <2,4,u,6>
+ 2700257173U, // <u,6,2,5>: Cost 3 vsldoi8 <2,5,u,6>, <2,5,u,6>
+ 2705565626U, // <u,6,2,6>: Cost 3 vsldoi8 <3,4,u,6>, <2,6,3,7>
+ 1226485046U, // <u,6,2,7>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 1226485047U, // <u,6,2,u>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 2705565846U, // <u,6,3,0>: Cost 3 vsldoi8 <3,4,u,6>, <3,0,1,2>
+ 2330756585U, // <u,6,3,1>: Cost 3 vmrglw LHS, <2,0,6,1>
+ 2330756829U, // <u,6,3,2>: Cost 3 vmrglw LHS, <2,3,6,2>
+ 2282981734U, // <u,6,3,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 1631824413U, // <u,6,3,4>: Cost 2 vsldoi8 <3,4,u,6>, <3,4,u,6>
+ 2652885246U, // <u,6,3,5>: Cost 3 vsldoi4 <5,u,6,3>, <5,u,6,3>
+ 1257018168U, // <u,6,3,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135499062U, // <u,6,3,7>: Cost 1 vmrglw LHS, RHS
+ 135499063U, // <u,6,3,u>: Cost 1 vmrglw LHS, RHS
+ 2646917222U, // <u,6,4,0>: Cost 3 vsldoi4 <4,u,6,4>, LHS
+ 2217365931U, // <u,6,4,1>: Cost 3 vmrghw <0,4,1,5>, <6,1,7,5>
+ 2790167156U, // <u,6,4,2>: Cost 3 vsldoi12 <6,4,2,u>, <6,4,2,u>
+ 2646919709U, // <u,6,4,3>: Cost 3 vsldoi4 <4,u,6,4>, <3,4,u,6>
+ 2711538934U, // <u,6,4,4>: Cost 3 vsldoi8 <4,4,u,6>, <4,4,u,6>
+ 1631825206U, // <u,6,4,5>: Cost 2 vsldoi8 <3,4,u,6>, RHS
+ 2316170040U, // <u,6,4,6>: Cost 3 vmrglw <5,6,u,4>, <6,6,6,6>
+ 1215884598U, // <u,6,4,7>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 1215884599U, // <u,6,4,u>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 2634981478U, // <u,6,5,0>: Cost 3 vsldoi4 <2,u,6,5>, LHS
+ 2266190247U, // <u,6,5,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1192448506U, // <u,6,5,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2266190386U, // <u,6,5,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2634984758U, // <u,6,5,4>: Cost 3 vsldoi4 <2,u,6,5>, RHS
+ 2652901632U, // <u,6,5,5>: Cost 3 vsldoi4 <5,u,6,5>, <5,u,6,5>
+ 1192448824U, // <u,6,5,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1213902134U, // <u,6,5,7>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 1213902135U, // <u,6,5,u>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 1583808614U, // <u,6,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 2322010445U, // <u,6,6,1>: Cost 3 vmrglw <6,6,6,6>, <6,0,6,1>
+ 2718839290U, // <u,6,6,2>: Cost 3 vsldoi8 <5,6,u,6>, <6,2,7,3>
+ 2670823965U, // <u,6,6,3>: Cost 3 vsldoi4 <u,u,6,6>, <3,4,u,6>
+ 1583811894U, // <u,6,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 2724147961U, // <u,6,6,5>: Cost 3 vsldoi8 <6,5,u,6>, <6,5,u,6>
+ 363253046U, // <u,6,6,6>: Cost 1 vspltisw2 RHS
+ 1229172022U, // <u,6,6,7>: Cost 2 vmrglw <3,4,u,6>, RHS
+ 363253046U, // <u,6,6,u>: Cost 1 vspltisw2 RHS
+ 499458150U, // <u,6,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1573200692U, // <u,6,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1573201512U, // <u,6,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1573202070U, // <u,6,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 499461673U, // <u,6,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1573203972U, // <u,6,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1235817272U, // <u,6,7,6>: Cost 2 vmrglw RHS, <6,6,6,6>
+ 162073910U, // <u,6,7,7>: Cost 1 vmrglw RHS, RHS
+ 162073911U, // <u,6,7,u>: Cost 1 vmrglw RHS, RHS
+ 499466342U, // <u,6,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631827758U, // <u,6,u,1>: Cost 2 vsldoi8 <3,4,u,6>, LHS
+ 1573209704U, // <u,6,u,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1573210262U, // <u,6,u,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 499469866U, // <u,6,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 1631828122U, // <u,6,u,5>: Cost 2 vsldoi8 <3,4,u,6>, RHS
+ 363253046U, // <u,6,u,6>: Cost 1 vspltisw2 RHS
+ 135540022U, // <u,6,u,7>: Cost 1 vmrglw LHS, RHS
+ 135540023U, // <u,6,u,u>: Cost 1 vmrglw LHS, RHS
+ 1638465536U, // <u,7,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564723814U, // <u,7,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712207533U, // <u,7,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2712207612U, // <u,7,0,3>: Cost 3 vsldoi8 RHS, <0,3,1,0>
+ 1638465874U, // <u,7,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1579192580U, // <u,7,0,5>: Cost 2 vsldoi4 <5,u,7,0>, <5,u,7,0>
+ 2712207862U, // <u,7,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2316137282U, // <u,7,0,7>: Cost 3 vmrglw <5,6,u,0>, <6,6,7,7>
+ 564724381U, // <u,7,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 1189467130U, // <u,7,1,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 1638466356U, // <u,7,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1638466454U, // <u,7,1,2>: Cost 2 vsldoi8 RHS, <1,2,3,0>
+ 2311500282U, // <u,7,1,3>: Cost 3 vmrglw <4,u,u,1>, <6,2,7,3>
+ 1189467494U, // <u,7,1,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2712208495U, // <u,7,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2694956302U, // <u,7,1,6>: Cost 3 vsldoi8 <1,6,u,7>, <1,6,u,7>
+ 1189467756U, // <u,7,1,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1638466940U, // <u,7,1,u>: Cost 2 vsldoi8 RHS, <1,u,3,0>
+ 2712208829U, // <u,7,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 2712208927U, // <u,7,2,1>: Cost 3 vsldoi8 RHS, <2,1,3,1>
+ 1638467176U, // <u,7,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638467238U, // <u,7,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712209165U, // <u,7,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 2712209256U, // <u,7,2,5>: Cost 3 vsldoi8 RHS, <2,5,3,6>
+ 1627187175U, // <u,7,2,6>: Cost 2 vsldoi8 <2,6,u,7>, <2,6,u,7>
+ 2324116290U, // <u,7,2,7>: Cost 3 vmrglw <7,0,u,2>, <6,6,7,7>
+ 1628514441U, // <u,7,2,u>: Cost 2 vsldoi8 <2,u,u,7>, <2,u,u,7>
+ 1638467734U, // <u,7,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712209638U, // <u,7,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2700929387U, // <u,7,3,2>: Cost 3 vsldoi8 <2,6,u,7>, <3,2,6,u>
+ 1638467996U, // <u,7,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638468098U, // <u,7,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712210002U, // <u,7,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 1585189856U, // <u,7,3,6>: Cost 2 vsldoi4 <6,u,7,3>, <6,u,7,3>
+ 1257018178U, // <u,7,3,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1638468382U, // <u,7,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638468498U, // <u,7,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712210378U, // <u,7,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712210485U, // <u,7,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2712210564U, // <u,7,4,3>: Cost 3 vsldoi8 RHS, <4,3,5,0>
+ 1638468816U, // <u,7,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564727112U, // <u,7,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712210809U, // <u,7,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,2>
+ 2712210888U, // <u,7,4,7>: Cost 3 vsldoi8 RHS, <4,7,5,0>
+ 564727337U, // <u,7,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 1192449018U, // <u,7,5,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2714201743U, // <u,7,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712211198U, // <u,7,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 2311533050U, // <u,7,5,3>: Cost 3 vmrglw <4,u,u,5>, <6,2,7,3>
+ 1192449382U, // <u,7,5,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 1638469636U, // <u,7,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1638469730U, // <u,7,5,6>: Cost 2 vsldoi8 RHS, <5,6,7,0>
+ 1192449644U, // <u,7,5,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1638469892U, // <u,7,5,u>: Cost 2 vsldoi8 RHS, <5,u,7,0>
+ 2712211745U, // <u,7,6,0>: Cost 3 vsldoi8 RHS, <6,0,1,2>
+ 2712211879U, // <u,7,6,1>: Cost 3 vsldoi8 RHS, <6,1,7,1>
+ 1638470138U, // <u,7,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 2712212018U, // <u,7,6,3>: Cost 3 vsldoi8 RHS, <6,3,4,5>
+ 2712212109U, // <u,7,6,4>: Cost 3 vsldoi8 RHS, <6,4,5,6>
+ 2712212203U, // <u,7,6,5>: Cost 3 vsldoi8 RHS, <6,5,7,1>
+ 1638470456U, // <u,7,6,6>: Cost 2 vsldoi8 RHS, <6,6,6,6>
+ 1638470478U, // <u,7,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 1638470559U, // <u,7,6,u>: Cost 2 vsldoi8 RHS, <6,u,0,1>
+ 1235816546U, // <u,7,7,0>: Cost 2 vmrglw RHS, <5,6,7,0>
+ 2309558371U, // <u,7,7,1>: Cost 3 vmrglw RHS, <5,6,7,1>
+ 2641045434U, // <u,7,7,2>: Cost 3 vsldoi4 <3,u,7,7>, <2,6,3,7>
+ 1235816954U, // <u,7,7,3>: Cost 2 vmrglw RHS, <6,2,7,3>
+ 1235816550U, // <u,7,7,4>: Cost 2 vmrglw RHS, <5,6,7,4>
+ 2309558375U, // <u,7,7,5>: Cost 3 vmrglw RHS, <5,6,7,5>
+ 1585222628U, // <u,7,7,6>: Cost 2 vsldoi4 <6,u,7,7>, <6,u,7,7>
+ 430361910U, // <u,7,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <u,7,7,u>: Cost 1 vspltisw3 RHS
+ 1638471379U, // <u,7,u,0>: Cost 2 vsldoi8 RHS, <u,0,1,2>
+ 564729646U, // <u,7,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1638471557U, // <u,7,u,2>: Cost 2 vsldoi8 RHS, <u,2,3,0>
+ 1638471612U, // <u,7,u,3>: Cost 2 vsldoi8 RHS, <u,3,0,1>
+ 1638471743U, // <u,7,u,4>: Cost 2 vsldoi8 RHS, <u,4,5,6>
+ 564730010U, // <u,7,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 1638471888U, // <u,7,u,6>: Cost 2 vsldoi8 RHS, <u,6,3,7>
+ 430361910U, // <u,7,u,7>: Cost 1 vspltisw3 RHS
+ 564730213U, // <u,7,u,u>: Cost 1 vsldoi8 RHS, LHS
+ 202162278U, // <u,u,0,0>: Cost 1 vspltisw0 LHS
+ 538189985U, // <u,u,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685673645U, // <u,u,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 1215848604U, // <u,u,0,3>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 1611931986U, // <u,u,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 1579266317U, // <u,u,0,5>: Cost 2 vsldoi4 <5,u,u,0>, <5,u,u,0>
+ 2289592861U, // <u,u,0,6>: Cost 3 vmrglw <1,2,u,0>, <3,4,u,6>
+ 1215851848U, // <u,u,0,7>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 538190493U, // <u,u,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 1549411025U, // <u,u,1,0>: Cost 2 vsldoi4 <0,u,u,1>, <0,u,u,1>
+ 115726126U, // <u,u,1,1>: Cost 1 vmrghw LHS, LHS
+ 604862254U, // <u,u,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 1213866140U, // <u,u,1,3>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 1549413686U, // <u,u,1,4>: Cost 2 vsldoi4 <0,u,u,1>, RHS
+ 115726490U, // <u,u,1,5>: Cost 1 vmrghw LHS, RHS
+ 1585247207U, // <u,u,1,6>: Cost 2 vsldoi4 <6,u,u,1>, <6,u,u,1>
+ 1213869384U, // <u,u,1,7>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 604862308U, // <u,u,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 1567334502U, // <u,u,2,0>: Cost 2 vsldoi4 <3,u,u,2>, LHS
+ 1190180654U, // <u,u,2,1>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 336380006U, // <u,u,2,2>: Cost 1 vspltisw2 LHS
+ 835584U, // <u,u,2,3>: Cost 0 copy LHS
+ 1567337782U, // <u,u,2,4>: Cost 2 vsldoi4 <3,u,u,2>, RHS
+ 1190181018U, // <u,u,2,5>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 1611933626U, // <u,u,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 1226485064U, // <u,u,2,7>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 835584U, // <u,u,2,u>: Cost 0 copy LHS
+ 475685587U, // <u,u,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1209239278U, // <u,u,3,1>: Cost 2 vmrglw LHS, <2,3,u,1>
+ 1209239765U, // <u,u,3,2>: Cost 2 vmrglw LHS, <3,0,u,2>
+ 135495836U, // <u,u,3,3>: Cost 1 vmrglw LHS, LHS
+ 475688246U, // <u,u,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1209239282U, // <u,u,3,5>: Cost 2 vmrglw LHS, <2,3,u,5>
+ 1209240093U, // <u,u,3,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135499080U, // <u,u,3,7>: Cost 1 vmrglw LHS, RHS
+ 135495841U, // <u,u,3,u>: Cost 1 vmrglw LHS, LHS
+ 1555406950U, // <u,u,4,0>: Cost 2 vsldoi4 <1,u,u,4>, LHS
+ 1555408301U, // <u,u,4,1>: Cost 2 vsldoi4 <1,u,u,4>, <1,u,u,4>
+ 2289625301U, // <u,u,4,2>: Cost 3 vmrglw <1,2,u,4>, <3,0,u,2>
+ 1215881372U, // <u,u,4,3>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 229035318U, // <u,u,4,4>: Cost 1 vspltisw0 RHS
+ 538193206U, // <u,u,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2289625629U, // <u,u,4,6>: Cost 3 vmrglw <1,2,u,4>, <3,4,u,6>
+ 1215884616U, // <u,u,4,7>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 538193449U, // <u,u,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 1549443797U, // <u,u,5,0>: Cost 2 vsldoi4 <0,u,u,5>, <0,u,u,5>
+ 118708014U, // <u,u,5,1>: Cost 1 vmrghw RHS, LHS
+ 1561389191U, // <u,u,5,2>: Cost 2 vsldoi4 <2,u,u,5>, <2,u,u,5>
+ 1213898908U, // <u,u,5,3>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 1549446454U, // <u,u,5,4>: Cost 2 vsldoi4 <0,u,u,5>, RHS
+ 118708378U, // <u,u,5,5>: Cost 1 vmrghw RHS, RHS
+ 604862618U, // <u,u,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 1213902152U, // <u,u,5,7>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 604862636U, // <u,u,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 1567367270U, // <u,u,6,0>: Cost 2 vsldoi4 <3,u,u,6>, LHS
+ 1192892206U, // <u,u,6,1>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 1638478330U, // <u,u,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 1679046864U, // <u,u,6,3>: Cost 2 vsldoi12 LHS, <u,6,3,7>
+ 1567370550U, // <u,u,6,4>: Cost 2 vsldoi4 <3,u,u,6>, RHS
+ 1192892570U, // <u,u,6,5>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 363253046U, // <u,u,6,6>: Cost 1 vspltisw2 RHS
+ 27705344U, // <u,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,u,6,u>: Cost 0 copy RHS
+ 499605606U, // <u,u,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1235812425U, // <u,u,7,1>: Cost 2 vmrglw RHS, <0,0,u,1>
+ 1561405577U, // <u,u,7,2>: Cost 2 vsldoi4 <2,u,u,7>, <2,u,u,7>
+ 162070684U, // <u,u,7,3>: Cost 1 vmrglw RHS, LHS
+ 499609147U, // <u,u,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1235812753U, // <u,u,7,5>: Cost 2 vmrglw RHS, <0,4,u,5>
+ 1235814941U, // <u,u,7,6>: Cost 2 vmrglw RHS, <3,4,u,6>
+ 162073928U, // <u,u,7,7>: Cost 1 vmrglw RHS, RHS
+ 162070689U, // <u,u,7,u>: Cost 1 vmrglw RHS, LHS
+ 475726552U, // <u,u,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 538195758U, // <u,u,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 604862821U, // <u,u,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 835584U, // <u,u,u,3>: Cost 0 copy LHS
+ 475729206U, // <u,u,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 538196122U, // <u,u,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 604862861U, // <u,u,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 27705344U, // <u,u,u,7>: Cost 0 copy RHS
+ 835584U, // <u,u,u,u>: Cost 0 copy LHS
+ 0
+};
diff --git a/lib/Target/PowerPC/PPCPredicates.cpp b/lib/Target/PowerPC/PPCPredicates.cpp
new file mode 100644
index 0000000..08a2812
--- /dev/null
+++ b/lib/Target/PowerPC/PPCPredicates.cpp
@@ -0,0 +1,30 @@
+//===-- PPCPredicates.cpp - PPC Branch Predicate Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PowerPC branch predicates.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCPredicates.h"
+#include <cassert>
+using namespace llvm;
+
+PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
+ switch (Opcode) {
+ default: assert(0 && "Unknown PPC branch opcode!");
+ case PPC::PRED_EQ: return PPC::PRED_NE;
+ case PPC::PRED_NE: return PPC::PRED_EQ;
+ case PPC::PRED_LT: return PPC::PRED_GE;
+ case PPC::PRED_GE: return PPC::PRED_LT;
+ case PPC::PRED_GT: return PPC::PRED_LE;
+ case PPC::PRED_LE: return PPC::PRED_GT;
+ case PPC::PRED_NU: return PPC::PRED_UN;
+ case PPC::PRED_UN: return PPC::PRED_NU;
+ }
+}
diff --git a/lib/Target/PowerPC/PPCPredicates.h b/lib/Target/PowerPC/PPCPredicates.h
new file mode 100644
index 0000000..b2c8315
--- /dev/null
+++ b/lib/Target/PowerPC/PPCPredicates.h
@@ -0,0 +1,39 @@
+//===-- PPCPredicates.h - PPC Branch Predicate Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PowerPC branch predicates.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H
+#define LLVM_TARGET_POWERPC_PPCPREDICATES_H
+
+#include "PPC.h"
+
+namespace llvm {
+namespace PPC {
+ /// Predicate - These are "(BI << 5) | BO" for various predicates.
+ enum Predicate {
+ PRED_ALWAYS = (0 << 5) | 20,
+ PRED_LT = (0 << 5) | 12,
+ PRED_LE = (1 << 5) | 4,
+ PRED_EQ = (2 << 5) | 12,
+ PRED_GE = (0 << 5) | 4,
+ PRED_GT = (1 << 5) | 12,
+ PRED_NE = (2 << 5) | 4,
+ PRED_UN = (3 << 5) | 12,
+ PRED_NU = (3 << 5) | 4
+ };
+
+ /// Invert the specified predicate. != -> ==, < -> >=.
+ Predicate InvertPredicate(Predicate Opcode);
+}
+}
+
+#endif
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
new file mode 100644
index 0000000..5d5beeb
--- /dev/null
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -0,0 +1,1446 @@
+//===- PPCRegisterInfo.cpp - PowerPC Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reginfo"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCRegisterInfo.h"
+#include "PPCFrameInfo.h"
+#include "PPCSubtarget.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cstdlib>
+using namespace llvm;
+
+// FIXME This disables some code that aligns the stack to a boundary
+// bigger than the default (16 bytes on Darwin) when there is a stack local
+// of greater alignment. This does not currently work, because the delta
+// between old and new stack pointers is added to offsets that reference
+// incoming parameters after the prolog is generated, and the code that
+// does that doesn't handle a variable delta. You don't want to do that
+// anyway; a better approach is to reserve another register that retains
+// to the incoming stack pointer, and reference parameters relative to that.
+#define ALIGN_STACK 0
+
+// FIXME (64-bit): Eventually enable by default.
+cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger",
+ cl::init(false),
+ cl::desc("Enable PPC32 register scavenger"),
+ cl::Hidden);
+cl::opt<bool> EnablePPC64RS("enable-ppc64-regscavenger",
+ cl::init(false),
+ cl::desc("Enable PPC64 register scavenger"),
+ cl::Hidden);
+#define EnableRegisterScavenging \
+ ((EnablePPC32RS && !Subtarget.isPPC64()) || \
+ (EnablePPC64RS && Subtarget.isPPC64()))
+
+// FIXME (64-bit): Should be inlined.
+bool
+PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const {
+ return EnableRegisterScavenging;
+}
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// PPC::F14, return the number that it corresponds to (e.g. 14).
+unsigned PPCRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+ using namespace PPC;
+ switch (RegEnum) {
+ case 0: return 0;
+ case R0 : case X0 : case F0 : case V0 : case CR0: case CR0LT: return 0;
+ case R1 : case X1 : case F1 : case V1 : case CR1: case CR0GT: return 1;
+ case R2 : case X2 : case F2 : case V2 : case CR2: case CR0EQ: return 2;
+ case R3 : case X3 : case F3 : case V3 : case CR3: case CR0UN: return 3;
+ case R4 : case X4 : case F4 : case V4 : case CR4: case CR1LT: return 4;
+ case R5 : case X5 : case F5 : case V5 : case CR5: case CR1GT: return 5;
+ case R6 : case X6 : case F6 : case V6 : case CR6: case CR1EQ: return 6;
+ case R7 : case X7 : case F7 : case V7 : case CR7: case CR1UN: return 7;
+ case R8 : case X8 : case F8 : case V8 : case CR2LT: return 8;
+ case R9 : case X9 : case F9 : case V9 : case CR2GT: return 9;
+ case R10: case X10: case F10: case V10: case CR2EQ: return 10;
+ case R11: case X11: case F11: case V11: case CR2UN: return 11;
+ case R12: case X12: case F12: case V12: case CR3LT: return 12;
+ case R13: case X13: case F13: case V13: case CR3GT: return 13;
+ case R14: case X14: case F14: case V14: case CR3EQ: return 14;
+ case R15: case X15: case F15: case V15: case CR3UN: return 15;
+ case R16: case X16: case F16: case V16: case CR4LT: return 16;
+ case R17: case X17: case F17: case V17: case CR4GT: return 17;
+ case R18: case X18: case F18: case V18: case CR4EQ: return 18;
+ case R19: case X19: case F19: case V19: case CR4UN: return 19;
+ case R20: case X20: case F20: case V20: case CR5LT: return 20;
+ case R21: case X21: case F21: case V21: case CR5GT: return 21;
+ case R22: case X22: case F22: case V22: case CR5EQ: return 22;
+ case R23: case X23: case F23: case V23: case CR5UN: return 23;
+ case R24: case X24: case F24: case V24: case CR6LT: return 24;
+ case R25: case X25: case F25: case V25: case CR6GT: return 25;
+ case R26: case X26: case F26: case V26: case CR6EQ: return 26;
+ case R27: case X27: case F27: case V27: case CR6UN: return 27;
+ case R28: case X28: case F28: case V28: case CR7LT: return 28;
+ case R29: case X29: case F29: case V29: case CR7GT: return 29;
+ case R30: case X30: case F30: case V30: case CR7EQ: return 30;
+ case R31: case X31: case F31: case V31: case CR7UN: return 31;
+ default:
+ cerr << "Unhandled reg in PPCRegisterInfo::getRegisterNumbering!\n";
+ abort();
+ }
+}
+
+PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
+ const TargetInstrInfo &tii)
+ : PPCGenRegisterInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
+ Subtarget(ST), TII(tii) {
+ ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX;
+ ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX;
+ ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX;
+ ImmToIdxMap[PPC::LWZ] = PPC::LWZX; ImmToIdxMap[PPC::LWA] = PPC::LWAX;
+ ImmToIdxMap[PPC::LFS] = PPC::LFSX; ImmToIdxMap[PPC::LFD] = PPC::LFDX;
+ ImmToIdxMap[PPC::STH] = PPC::STHX; ImmToIdxMap[PPC::STW] = PPC::STWX;
+ ImmToIdxMap[PPC::STFS] = PPC::STFSX; ImmToIdxMap[PPC::STFD] = PPC::STFDX;
+ ImmToIdxMap[PPC::ADDI] = PPC::ADD4;
+
+ // 64-bit
+ ImmToIdxMap[PPC::LHA8] = PPC::LHAX8; ImmToIdxMap[PPC::LBZ8] = PPC::LBZX8;
+ ImmToIdxMap[PPC::LHZ8] = PPC::LHZX8; ImmToIdxMap[PPC::LWZ8] = PPC::LWZX8;
+ ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8;
+ ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX;
+ ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32;
+}
+
+/// getPointerRegClass - Return the register class to use to hold pointers.
+/// This is used for addressing modes.
+const TargetRegisterClass *PPCRegisterInfo::getPointerRegClass() const {
+ if (Subtarget.isPPC64())
+ return &PPC::G8RCRegClass;
+ else
+ return &PPC::GPRCRegClass;
+}
+
+const unsigned*
+PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ // 32-bit Darwin calling convention.
+ static const unsigned Macho32_CalleeSavedRegs[] = {
+ PPC::R13, PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31,
+
+ PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+ PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+ PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+ PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+ PPC::F30, PPC::F31,
+
+ PPC::CR2, PPC::CR3, PPC::CR4,
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+
+ PPC::LR, 0
+ };
+
+ static const unsigned ELF32_CalleeSavedRegs[] = {
+ PPC::R13, PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31,
+
+ PPC::F9,
+ PPC::F10, PPC::F11, PPC::F12, PPC::F13,
+ PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+ PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+ PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+ PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+ PPC::F30, PPC::F31,
+
+ PPC::CR2, PPC::CR3, PPC::CR4,
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+
+ PPC::LR, 0
+ };
+ // 64-bit Darwin calling convention.
+ static const unsigned Macho64_CalleeSavedRegs[] = {
+ PPC::X14, PPC::X15,
+ PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+ PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+ PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+ PPC::X28, PPC::X29, PPC::X30, PPC::X31,
+
+ PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+ PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+ PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+ PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+ PPC::F30, PPC::F31,
+
+ PPC::CR2, PPC::CR3, PPC::CR4,
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+
+ PPC::LR8, 0
+ };
+
+ if (Subtarget.isMachoABI())
+ return Subtarget.isPPC64() ? Macho64_CalleeSavedRegs :
+ Macho32_CalleeSavedRegs;
+
+ // ELF 32.
+ return ELF32_CalleeSavedRegs;
+}
+
+const TargetRegisterClass* const*
+PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ // 32-bit Macho calling convention.
+ static const TargetRegisterClass * const Macho32_CalleeSavedRegClasses[] = {
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,
+
+ &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass,
+
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+
+ &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,
+
+ &PPC::GPRCRegClass, 0
+ };
+
+ static const TargetRegisterClass * const ELF32_CalleeSavedRegClasses[] = {
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+
+ &PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,
+
+ &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass,
+
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+
+ &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,
+
+ &PPC::GPRCRegClass, 0
+ };
+
+ // 64-bit Macho calling convention.
+ static const TargetRegisterClass * const Macho64_CalleeSavedRegClasses[] = {
+ &PPC::G8RCRegClass,&PPC::G8RCRegClass,
+ &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
+ &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
+ &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
+ &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
+
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,
+
+ &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass,
+
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+
+ &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,
+
+ &PPC::G8RCRegClass, 0
+ };
+
+ if (Subtarget.isMachoABI())
+ return Subtarget.isPPC64() ? Macho64_CalleeSavedRegClasses :
+ Macho32_CalleeSavedRegClasses;
+
+ // ELF 32.
+ return ELF32_CalleeSavedRegClasses;
+}
+
+// needsFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+//
+static bool needsFP(const MachineFunction &MF) {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return NoFramePointerElim || MFI->hasVarSizedObjects() ||
+ (PerformTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall());
+}
+
+static bool spillsCR(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->isCRSpilled();
+}
+
+BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(PPC::R0);
+ Reserved.set(PPC::R1);
+ Reserved.set(PPC::LR);
+ Reserved.set(PPC::LR8);
+ Reserved.set(PPC::RM);
+
+ // In Linux, r2 is reserved for the OS.
+ if (!Subtarget.isDarwin())
+ Reserved.set(PPC::R2);
+
+ // On PPC64, r13 is the thread pointer. Never allocate this register. Note
+ // that this is over conservative, as it also prevents allocation of R31 when
+ // the FP is not needed.
+ if (Subtarget.isPPC64()) {
+ Reserved.set(PPC::R13);
+ Reserved.set(PPC::R31);
+
+ if (!EnableRegisterScavenging)
+ Reserved.set(PPC::R0); // FIXME (64-bit): Remove
+
+ Reserved.set(PPC::X0);
+ Reserved.set(PPC::X1);
+ Reserved.set(PPC::X13);
+ Reserved.set(PPC::X31);
+ }
+
+ if (needsFP(MF))
+ Reserved.set(PPC::R31);
+
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+// hasFP - Return true if the specified function actually has a dedicated frame
+// pointer register. This is true if the function needs a frame pointer and has
+// a non-zero stack size.
+bool PPCRegisterInfo::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->getStackSize() && needsFP(MF);
+}
+
+/// MustSaveLR - Return true if this function requires that we save the LR
+/// register onto the stack in the prolog and restore it in the epilog of the
+/// function.
+static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
+ const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
+
+ // We need a save/restore of LR if there is any def of LR (which is
+ // defined by calls, including the PIC setup sequence), or if there is
+ // some use of the LR stack slot (e.g. for builtin_return_address).
+ // (LR comes in 32 and 64 bit versions.)
+ MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
+ return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
+}
+
+
+
+void PPCRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (PerformTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) {
+ // Add (actually subtract) back the amount the callee popped on return.
+ if (int CalleeAmt = I->getOperand(1).getImm()) {
+ bool is64Bit = Subtarget.isPPC64();
+ CalleeAmt *= -1;
+ unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
+ unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
+ unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
+ unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
+ unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
+ MachineInstr *MI = I;
+ DebugLoc dl = MI->getDebugLoc();
+
+ if (isInt16(CalleeAmt)) {
+ BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg).addReg(StackReg).
+ addImm(CalleeAmt);
+ } else {
+ MachineBasicBlock::iterator MBBI = I;
+ BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+ .addImm(CalleeAmt >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
+ .addReg(TmpReg, RegState::Kill)
+ .addImm(CalleeAmt & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDInstr))
+ .addReg(StackReg)
+ .addReg(StackReg)
+ .addReg(TmpReg);
+ }
+ }
+ }
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered
+/// register first and then a spilled callee-saved register if that fails.
+static
+unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS,
+ const TargetRegisterClass *RC, int SPAdj) {
+ assert(RS && "Register scavenging must be on");
+ unsigned Reg = RS->FindUnusedReg(RC, true);
+ // FIXME: move ARM callee-saved reg scan to target independent code, then
+ // search for already spilled CS register here.
+ if (Reg == 0)
+ Reg = RS->scavengeRegister(RC, II, SPAdj);
+ return Reg;
+}
+
+/// lowerDynamicAlloc - Generate the code for allocating an object in the
+/// current frame. The sequence of code with be in the general form
+///
+/// addi R0, SP, \#frameSize ; get the address of the previous frame
+/// stwxu R0, SP, Rnegsize ; add and update the SP with the negated size
+/// addi Rnew, SP, \#maxCalFrameSize ; get the top of the allocation
+///
+void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ // Get the instruction.
+ MachineInstr &MI = *II;
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ // Get the basic block's function.
+ MachineFunction &MF = *MBB.getParent();
+ // Get the frame info.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ // Determine whether 64-bit pointers are used.
+ bool LP64 = Subtarget.isPPC64();
+ DebugLoc dl = MI.getDebugLoc();
+
+ // Get the maximum call stack size.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+ // Get the total frame size.
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get stack alignments.
+ unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ assert(MaxAlign <= TargetAlign &&
+ "Dynamic alloca with large aligns not supported");
+
+ // Determine the previous frame's address. If FrameSize can't be
+ // represented as 16 bits or we need special alignment, then we load the
+ // previous frame's address from 0(SP). Why not do an addis of the hi?
+ // Because R0 is our only safe tmp register and addi/addis treat R0 as zero.
+ // Constructing the constant and adding would take 3 instructions.
+ // Fortunately, a frame greater than 32K is rare.
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *RC = LP64 ? G8RC : GPRC;
+
+ // FIXME (64-bit): Use "findScratchRegister"
+ unsigned Reg;
+ if (EnableRegisterScavenging)
+ Reg = findScratchRegister(II, RS, RC, SPAdj);
+ else
+ Reg = PPC::R0;
+
+ if (MaxAlign < TargetAlign && isInt16(FrameSize)) {
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
+ .addReg(PPC::R31)
+ .addImm(FrameSize);
+ } else if (LP64) {
+ if (EnableRegisterScavenging) // FIXME (64-bit): Use "true" part.
+ BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
+ .addImm(0)
+ .addReg(PPC::X1);
+ else
+ BuildMI(MBB, II, dl, TII.get(PPC::LD), PPC::X0)
+ .addImm(0)
+ .addReg(PPC::X1);
+ } else {
+ BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg)
+ .addImm(0)
+ .addReg(PPC::R1);
+ }
+
+ // Grow the stack and update the stack pointer link, then determine the
+ // address of new allocated space.
+ if (LP64) {
+ if (EnableRegisterScavenging) // FIXME (64-bit): Use "true" part.
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::X1)
+ .addReg(MI.getOperand(1).getReg());
+ else
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
+ .addReg(PPC::X0, RegState::Kill)
+ .addReg(PPC::X1)
+ .addReg(MI.getOperand(1).getReg());
+
+ if (!MI.getOperand(1).isKill())
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
+ .addReg(PPC::X1)
+ .addImm(maxCallFrameSize);
+ else
+ // Implicitly kill the register.
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
+ .addReg(PPC::X1)
+ .addImm(maxCallFrameSize)
+ .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill);
+ } else {
+ BuildMI(MBB, II, dl, TII.get(PPC::STWUX))
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::R1)
+ .addReg(MI.getOperand(1).getReg());
+
+ if (!MI.getOperand(1).isKill())
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
+ .addReg(PPC::R1)
+ .addImm(maxCallFrameSize);
+ else
+ // Implicitly kill the register.
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
+ .addReg(PPC::R1)
+ .addImm(maxCallFrameSize)
+ .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill);
+ }
+
+ // Discard the DYNALLOC instruction.
+ MBB.erase(II);
+}
+
+/// lowerCRSpilling - Generate the code for spilling a CR register. Instead of
+/// reserving a whole register (R0), we scrounge for one here. This generates
+/// code like this:
+///
+/// mfcr rA ; Move the conditional register into GPR rA.
+/// rlwinm rA, rA, SB, 0, 31 ; Shift the bits left so they are in CR0's slot.
+/// stw rA, FI ; Store rA to the frame.
+///
+void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex, int SPAdj,
+ RegScavenger *RS) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset>, <FI>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
+ unsigned Reg = findScratchRegister(II, RS, RC, SPAdj);
+
+ // We need to store the CR in the low 4-bits of the saved value. First, issue
+ // an MFCR to save all of the CRBits. Add an implicit kill of the CR.
+ if (!MI.getOperand(0).isKill())
+ BuildMI(MBB, II, dl, TII.get(PPC::MFCR), Reg);
+ else
+ // Implicitly kill the CR register.
+ BuildMI(MBB, II, dl, TII.get(PPC::MFCR), Reg)
+ .addReg(MI.getOperand(0).getReg(), RegState::ImplicitKill);
+
+ // If the saved register wasn't CR0, shift the bits left so that they are in
+ // CR0's slot.
+ unsigned SrcReg = MI.getOperand(0).getReg();
+ if (SrcReg != PPC::CR0)
+ // rlwinm rA, rA, ShiftBits, 0, 31.
+ BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(PPCRegisterInfo::getRegisterNumbering(SrcReg) * 4)
+ .addImm(0)
+ .addImm(31);
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW))
+ .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())),
+ FrameIndex);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ // Get the instruction.
+ MachineInstr &MI = *II;
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ // Get the basic block's function.
+ MachineFunction &MF = *MBB.getParent();
+ // Get the frame info.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ DebugLoc dl = MI.getDebugLoc();
+
+ // Find out which operand is the frame index.
+ unsigned FIOperandNo = 0;
+ while (!MI.getOperand(FIOperandNo).isFI()) {
+ ++FIOperandNo;
+ assert(FIOperandNo != MI.getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+ // Take into account whether it's an add or mem instruction
+ unsigned OffsetOperandNo = (FIOperandNo == 2) ? 1 : 2;
+ if (MI.getOpcode() == TargetInstrInfo::INLINEASM)
+ OffsetOperandNo = FIOperandNo-1;
+
+ // Get the frame index.
+ int FrameIndex = MI.getOperand(FIOperandNo).getIndex();
+
+ // Get the frame pointer save index. Users of this index are primarily
+ // DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int FPSI = FI->getFramePointerSaveIndex();
+ // Get the instruction opcode.
+ unsigned OpC = MI.getOpcode();
+
+ // Special case for dynamic alloca.
+ if (FPSI && FrameIndex == FPSI &&
+ (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
+ lowerDynamicAlloc(II, SPAdj, RS);
+ return;
+ }
+
+ // Special case for pseudo-op SPILL_CR.
+ if (EnableRegisterScavenging) // FIXME (64-bit): Enable by default.
+ if (OpC == PPC::SPILL_CR) {
+ lowerCRSpilling(II, FrameIndex, SPAdj, RS);
+ return;
+ }
+
+ // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
+ MI.getOperand(FIOperandNo).ChangeToRegister(hasFP(MF) ? PPC::R31 : PPC::R1,
+ false);
+
+ // Figure out if the offset in the instruction is shifted right two bits. This
+ // is true for instructions like "STD", which the machine implicitly adds two
+ // low zeros to.
+ bool isIXAddr = false;
+ switch (OpC) {
+ case PPC::LWA:
+ case PPC::LD:
+ case PPC::STD:
+ case PPC::STD_32:
+ isIXAddr = true;
+ break;
+ }
+
+ // Now add the frame object offset to the offset from r1.
+ int Offset = MFI->getObjectOffset(FrameIndex);
+ if (!isIXAddr)
+ Offset += MI.getOperand(OffsetOperandNo).getImm();
+ else
+ Offset += MI.getOperand(OffsetOperandNo).getImm() << 2;
+
+ // If we're not using a Frame Pointer that has been set to the value of the
+ // SP before having the stack size subtracted from it, then add the stack size
+ // to Offset to get the correct offset.
+ Offset += MFI->getStackSize();
+
+ // If we can, encode the offset directly into the instruction. If this is a
+ // normal PPC "ri" instruction, any 16-bit value can be safely encoded. If
+ // this is a PPC64 "ix" instruction, only a 16-bit value with the low two bits
+ // clear can be encoded. This is extremely uncommon, because normally you
+ // only "std" to a stack slot that is at least 4-byte aligned, but it can
+ // happen in invalid code.
+ if (isInt16(Offset) && (!isIXAddr || (Offset & 3) == 0)) {
+ if (isIXAddr)
+ Offset >>= 2; // The actual encoded value has the low two bits zero.
+ MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // The offset doesn't fit into a single register, scavenge one to build the
+ // offset in.
+ // FIXME: figure out what SPAdj is doing here.
+
+ // FIXME (64-bit): Use "findScratchRegister".
+ unsigned SReg;
+ if (EnableRegisterScavenging)
+ SReg = findScratchRegister(II, RS, &PPC::GPRCRegClass, SPAdj);
+ else
+ SReg = PPC::R0;
+
+ // Insert a set of rA with the full offset value before the ld, st, or add
+ BuildMI(MBB, II, dl, TII.get(PPC::LIS), SReg)
+ .addImm(Offset >> 16);
+ BuildMI(MBB, II, dl, TII.get(PPC::ORI), SReg)
+ .addReg(SReg, RegState::Kill)
+ .addImm(Offset);
+
+ // Convert into indexed form of the instruction:
+ //
+ // sth 0:rA, 1:imm 2:(rB) ==> sthx 0:rA, 2:rB, 1:r0
+ // addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0
+ unsigned OperandBase;
+
+ if (OpC != TargetInstrInfo::INLINEASM) {
+ assert(ImmToIdxMap.count(OpC) &&
+ "No indexed form of load or store available!");
+ unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
+ MI.setDesc(TII.get(NewOpcode));
+ OperandBase = 1;
+ } else {
+ OperandBase = OffsetOperandNo;
+ }
+
+ unsigned StackReg = MI.getOperand(FIOperandNo).getReg();
+ MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
+ MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false);
+}
+
+/// VRRegNo - Map from a numbered VR register to its enum value.
+///
+static const unsigned short VRRegNo[] = {
+ PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
+ PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+ PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
+};
+
+/// RemoveVRSaveCode - We have found that this function does not need any code
+/// to manipulate the VRSAVE register, even though it uses vector registers.
+/// This can happen when the only registers used are known to be live in or out
+/// of the function. Remove all of the VRSAVE related code from the function.
+static void RemoveVRSaveCode(MachineInstr *MI) {
+ MachineBasicBlock *Entry = MI->getParent();
+ MachineFunction *MF = Entry->getParent();
+
+ // We know that the MTVRSAVE instruction immediately follows MI. Remove it.
+ MachineBasicBlock::iterator MBBI = MI;
+ ++MBBI;
+ assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
+ MBBI->eraseFromParent();
+
+ bool RemovedAllMTVRSAVEs = true;
+ // See if we can find and remove the MTVRSAVE instruction from all of the
+ // epilog blocks.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
+ // If last instruction is a return instruction, add an epilogue
+ if (!I->empty() && I->back().getDesc().isReturn()) {
+ bool FoundIt = false;
+ for (MBBI = I->end(); MBBI != I->begin(); ) {
+ --MBBI;
+ if (MBBI->getOpcode() == PPC::MTVRSAVE) {
+ MBBI->eraseFromParent(); // remove it.
+ FoundIt = true;
+ break;
+ }
+ }
+ RemovedAllMTVRSAVEs &= FoundIt;
+ }
+ }
+
+ // If we found and removed all MTVRSAVE instructions, remove the read of
+ // VRSAVE as well.
+ if (RemovedAllMTVRSAVEs) {
+ MBBI = MI;
+ assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
+ --MBBI;
+ assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
+ MBBI->eraseFromParent();
+ }
+
+ // Finally, nuke the UPDATE_VRSAVE.
+ MI->eraseFromParent();
+}
+
+// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
+// instruction selector. Based on the vector registers that have been used,
+// transform this into the appropriate ORI instruction.
+static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
+ MachineFunction *MF = MI->getParent()->getParent();
+ DebugLoc dl = MI->getDebugLoc();
+
+ unsigned UsedRegMask = 0;
+ for (unsigned i = 0; i != 32; ++i)
+ if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i]))
+ UsedRegMask |= 1 << (31-i);
+
+ // Live in and live out values already must be in the mask, so don't bother
+ // marking them.
+ for (MachineRegisterInfo::livein_iterator
+ I = MF->getRegInfo().livein_begin(),
+ E = MF->getRegInfo().livein_end(); I != E; ++I) {
+ unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(I->first);
+ if (VRRegNo[RegNo] == I->first) // If this really is a vector reg.
+ UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
+ }
+ for (MachineRegisterInfo::liveout_iterator
+ I = MF->getRegInfo().liveout_begin(),
+ E = MF->getRegInfo().liveout_end(); I != E; ++I) {
+ unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(*I);
+ if (VRRegNo[RegNo] == *I) // If this really is a vector reg.
+ UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
+ }
+
+ // If no registers are used, turn this into a copy.
+ if (UsedRegMask == 0) {
+ // Remove all VRSAVE code.
+ RemoveVRSaveCode(MI);
+ return;
+ }
+
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask);
+ } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask >> 16);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask >> 16);
+ } else {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask >> 16);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask >> 16);
+
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(DstReg, RegState::Kill)
+ .addImm(UsedRegMask & 0xFFFF);
+ }
+
+ // Remove the old UPDATE_VRSAVE instruction.
+ MI->eraseFromParent();
+}
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Get the number of bytes to allocate from the FrameInfo
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get the alignments provided by the target, and the maximum alignment
+ // (if any) of the fixed frame objects.
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned AlignMask = TargetAlign - 1; //
+
+ // If we are a leaf function, and use up to 224 bytes of stack space,
+ // don't have a frame pointer, calls, or dynamic alloca then we do not need
+ // to adjust the stack pointer (we fit in the Red Zone).
+ if (!DisableRedZone &&
+ FrameSize <= 224 && // Fits in red zone.
+ !MFI->hasVarSizedObjects() && // No dynamic alloca.
+ !MFI->hasCalls() && // No calls.
+ (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
+ // No need for frame
+ MFI->setStackSize(0);
+ return;
+ }
+
+ // Get the maximum call frame size of all the calls.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+ // Maximum call frame needs to be at least big enough for linkage and 8 args.
+ unsigned minCallFrameSize =
+ PPCFrameInfo::getMinCallFrameSize(Subtarget.isPPC64(),
+ Subtarget.isMachoABI());
+ maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
+
+ // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+ // that allocations will be aligned.
+ if (MFI->hasVarSizedObjects())
+ maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+
+ // Update maximum call frame size.
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+ // Include call frame size in total.
+ FrameSize += maxCallFrameSize;
+
+ // Make sure the frame is aligned.
+ FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+
+ // Update frame info.
+ MFI->setStackSize(FrameSize);
+}
+
+void
+PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // Save and clear the LR state.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ unsigned LR = getRARegister();
+ FI->setMustSaveLR(MustSaveLR(MF, LR));
+ MF.getRegInfo().setPhysRegUnused(LR);
+
+ // Save R31 if necessary
+ int FPSI = FI->getFramePointerSaveIndex();
+ bool IsPPC64 = Subtarget.isPPC64();
+ bool IsELF32_ABI = Subtarget.isELF32_ABI();
+ bool IsMachoABI = Subtarget.isMachoABI();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!FPSI && (NoFramePointerElim || MFI->hasVarSizedObjects()) &&
+ IsELF32_ABI) {
+ // Find out what the fix offset of the frame pointer save area.
+ int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64,
+ IsMachoABI);
+ // Allocate the frame index for frame pointer save area.
+ FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset);
+ // Save the result.
+ FI->setFramePointerSaveIndex(FPSI);
+ }
+
+ // Reserve stack space to move the linkage area to in case of a tail call.
+ int TCSPDelta = 0;
+ if (PerformTailCallOpt && (TCSPDelta=FI->getTailCallSPDelta()) < 0) {
+ int AddFPOffsetAmount = IsELF32_ABI ? -4 : 0;
+ MF.getFrameInfo()->CreateFixedObject( -1 * TCSPDelta,
+ AddFPOffsetAmount + TCSPDelta);
+ }
+ // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
+ // a large stack, which will require scavenging a register to materialize a
+ // large offset.
+ // FIXME: this doesn't actually check stack size, so is a bit pessimistic
+ // FIXME: doesn't detect whether or not we need to spill vXX, which requires
+ // r0 for now.
+
+ if (EnableRegisterScavenging) // FIXME (64-bit): Enable.
+ if (needsFP(MF) || spillsCR(MF)) {
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *RC = IsPPC64 ? G8RC : GPRC;
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment()));
+ }
+}
+
+void
+PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) ||
+ !MF.getFunction()->doesNotThrow() ||
+ UnwindTablesMandatory;
+
+ // Prepare for frame info.
+ unsigned FrameLabelId = 0;
+
+ // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
+ // process it.
+ for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
+ if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
+ HandleVRSaveUpdate(MBBI, TII);
+ break;
+ }
+ }
+
+ // Move MBBI back to the beginning of the function.
+ MBBI = MBB.begin();
+
+ // Work out frame sizes.
+ determineFrameLayout(MF);
+ unsigned FrameSize = MFI->getStackSize();
+
+ int NegFrameSize = -FrameSize;
+
+ // Get processor type.
+ bool IsPPC64 = Subtarget.isPPC64();
+ // Get operating system
+ bool IsMachoABI = Subtarget.isMachoABI();
+ // Check if the link register (LR) must be saved.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ bool MustSaveLR = FI->mustSaveLR();
+ // Do we have a frame pointer for this function?
+ bool HasFP = hasFP(MF) && FrameSize;
+
+ int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, IsMachoABI);
+ int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, IsMachoABI);
+
+ if (IsPPC64) {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
+ .addReg(PPC::X31)
+ .addImm(FPOffset/4)
+ .addReg(PPC::X1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
+ .addReg(PPC::X0)
+ .addImm(LROffset / 4)
+ .addReg(PPC::X1);
+ } else {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
+ .addReg(PPC::R31)
+ .addImm(FPOffset)
+ .addReg(PPC::R1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
+ .addReg(PPC::R0)
+ .addImm(LROffset)
+ .addReg(PPC::R1);
+ }
+
+ // Skip if a leaf routine.
+ if (!FrameSize) return;
+
+ // Get stack alignments.
+ unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+
+ if (needsFrameMoves) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ FrameLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(FrameLabelId);
+ }
+
+ // Adjust stack pointer: r1 += NegFrameSize.
+ // If there is a preferred stack alignment, align R1 now
+ if (!IsPPC64) {
+ // PPC32.
+ if (ALIGN_STACK && MaxAlign > TargetAlign) {
+ assert(isPowerOf2_32(MaxAlign)&&isInt16(MaxAlign)&&"Invalid alignment!");
+ assert(isInt16(NegFrameSize) && "Unhandled stack size and alignment!");
+
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0)
+ .addReg(PPC::R1)
+ .addImm(0)
+ .addImm(32 - Log2_32(MaxAlign))
+ .addImm(31);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0)
+ .addReg(PPC::R0, RegState::Kill)
+ .addImm(NegFrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
+ .addReg(PPC::R1)
+ .addReg(PPC::R1)
+ .addReg(PPC::R0);
+ } else if (isInt16(NegFrameSize)) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1)
+ .addReg(PPC::R1)
+ .addImm(NegFrameSize)
+ .addReg(PPC::R1);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
+ .addReg(PPC::R0, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
+ .addReg(PPC::R1)
+ .addReg(PPC::R1)
+ .addReg(PPC::R0);
+ }
+ } else { // PPC64.
+ if (ALIGN_STACK && MaxAlign > TargetAlign) {
+ assert(isPowerOf2_32(MaxAlign)&&isInt16(MaxAlign)&&"Invalid alignment!");
+ assert(isInt16(NegFrameSize) && "Unhandled stack size and alignment!");
+
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0)
+ .addReg(PPC::X1)
+ .addImm(0)
+ .addImm(64 - Log2_32(MaxAlign));
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0)
+ .addReg(PPC::X0)
+ .addImm(NegFrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
+ .addReg(PPC::X1)
+ .addReg(PPC::X1)
+ .addReg(PPC::X0);
+ } else if (isInt16(NegFrameSize)) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1)
+ .addReg(PPC::X1)
+ .addImm(NegFrameSize / 4)
+ .addReg(PPC::X1);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
+ .addReg(PPC::X0, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
+ .addReg(PPC::X1)
+ .addReg(PPC::X1)
+ .addReg(PPC::X0);
+ }
+ }
+
+ if (needsFrameMoves) {
+ std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+
+ if (NegFrameSize) {
+ // Show update of SP.
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize);
+ Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+ } else {
+ MachineLocation SP(IsPPC64 ? PPC::X31 : PPC::R31);
+ Moves.push_back(MachineMove(FrameLabelId, SP, SP));
+ }
+
+ if (HasFP) {
+ MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset);
+ MachineLocation FPSrc(IsPPC64 ? PPC::X31 : PPC::R31);
+ Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc));
+ }
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+ unsigned Reg = CSI[I].getReg();
+ if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc));
+ }
+
+ MachineLocation LRDst(MachineLocation::VirtualFP, LROffset);
+ MachineLocation LRSrc(IsPPC64 ? PPC::LR8 : PPC::LR);
+ Moves.push_back(MachineMove(FrameLabelId, LRDst, LRSrc));
+
+ // Mark effective beginning of when frame pointer is ready.
+ unsigned ReadyLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(ReadyLabelId);
+
+ MachineLocation FPDst(HasFP ? (IsPPC64 ? PPC::X31 : PPC::R31) :
+ (IsPPC64 ? PPC::X1 : PPC::R1));
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
+ }
+
+ // If there is a frame pointer, copy R1 into R31
+ if (HasFP) {
+ if (!IsPPC64) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31)
+ .addReg(PPC::R1)
+ .addReg(PPC::R1);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X31)
+ .addReg(PPC::X1)
+ .addReg(PPC::X1);
+ }
+ }
+}
+
+void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+
+ assert( (RetOpcode == PPC::BLR ||
+ RetOpcode == PPC::TCRETURNri ||
+ RetOpcode == PPC::TCRETURNdi ||
+ RetOpcode == PPC::TCRETURNai ||
+ RetOpcode == PPC::TCRETURNri8 ||
+ RetOpcode == PPC::TCRETURNdi8 ||
+ RetOpcode == PPC::TCRETURNai8) &&
+ "Can only insert epilog into returning blocks");
+
+ // Get alignment info so we know how to restore r1
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+
+ // Get the number of bytes allocated from the FrameInfo.
+ int FrameSize = MFI->getStackSize();
+
+ // Get processor type.
+ bool IsPPC64 = Subtarget.isPPC64();
+ // Get operating system
+ bool IsMachoABI = Subtarget.isMachoABI();
+ // Check if the link register (LR) has been saved.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ bool MustSaveLR = FI->mustSaveLR();
+ // Do we have a frame pointer for this function?
+ bool HasFP = hasFP(MF) && FrameSize;
+
+ int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, IsMachoABI);
+ int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, IsMachoABI);
+
+ bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
+ RetOpcode == PPC::TCRETURNdi ||
+ RetOpcode == PPC::TCRETURNai ||
+ RetOpcode == PPC::TCRETURNri8 ||
+ RetOpcode == PPC::TCRETURNdi8 ||
+ RetOpcode == PPC::TCRETURNai8;
+
+ if (UsesTCRet) {
+ int MaxTCRetDelta = FI->getTailCallSPDelta();
+ MachineOperand &StackAdjust = MBBI->getOperand(1);
+ assert(StackAdjust.isImm() && "Expecting immediate value.");
+ // Adjust stack pointer.
+ int StackAdj = StackAdjust.getImm();
+ int Delta = StackAdj - MaxTCRetDelta;
+ assert((Delta >= 0) && "Delta must be positive");
+ if (MaxTCRetDelta>0)
+ FrameSize += (StackAdj +Delta);
+ else
+ FrameSize += StackAdj;
+ }
+
+ if (FrameSize) {
+ // The loaded (or persistent) stack pointer value is offset by the 'stwu'
+ // on entry to the function. Add this offset back now.
+ if (!IsPPC64) {
+ // If this function contained a fastcc call and PerformTailCallOpt is
+ // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
+ // call which invalidates the stack pointer value in SP(0). So we use the
+ // value of R31 in this case.
+ if (FI->hasFastCall() && isInt16(FrameSize)) {
+ assert(hasFP(MF) && "Expecting a valid the frame pointer.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
+ .addReg(PPC::R31).addImm(FrameSize);
+ } else if(FI->hasFastCall()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
+ .addImm(FrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
+ .addReg(PPC::R0, RegState::Kill)
+ .addImm(FrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD4))
+ .addReg(PPC::R1)
+ .addReg(PPC::R31)
+ .addReg(PPC::R0);
+ } else if (isInt16(FrameSize) &&
+ (!ALIGN_STACK || TargetAlign >= MaxAlign) &&
+ !MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
+ .addReg(PPC::R1).addImm(FrameSize);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ),PPC::R1)
+ .addImm(0).addReg(PPC::R1);
+ }
+ } else {
+ if (FI->hasFastCall() && isInt16(FrameSize)) {
+ assert(hasFP(MF) && "Expecting a valid the frame pointer.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
+ .addReg(PPC::X31).addImm(FrameSize);
+ } else if(FI->hasFastCall()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
+ .addImm(FrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
+ .addReg(PPC::X0, RegState::Kill)
+ .addImm(FrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD8))
+ .addReg(PPC::X1)
+ .addReg(PPC::X31)
+ .addReg(PPC::X0);
+ } else if (isInt16(FrameSize) && TargetAlign >= MaxAlign &&
+ !MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
+ .addReg(PPC::X1).addImm(FrameSize);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X1)
+ .addImm(0).addReg(PPC::X1);
+ }
+ }
+ }
+
+ if (IsPPC64) {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0)
+ .addImm(LROffset/4).addReg(PPC::X1);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31)
+ .addImm(FPOffset/4).addReg(PPC::X1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0);
+ } else {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0)
+ .addImm(LROffset).addReg(PPC::R1);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31)
+ .addImm(FPOffset).addReg(PPC::R1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR)).addReg(PPC::R0);
+ }
+
+ // Callee pop calling convention. Pop parameter/linkage area. Used for tail
+ // call optimization
+ if (PerformTailCallOpt && RetOpcode == PPC::BLR &&
+ MF.getFunction()->getCallingConv() == CallingConv::Fast) {
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ unsigned CallerAllocatedAmt = FI->getMinReservedArea();
+ unsigned StackReg = IsPPC64 ? PPC::X1 : PPC::R1;
+ unsigned FPReg = IsPPC64 ? PPC::X31 : PPC::R31;
+ unsigned TmpReg = IsPPC64 ? PPC::X0 : PPC::R0;
+ unsigned ADDIInstr = IsPPC64 ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDInstr = IsPPC64 ? PPC::ADD8 : PPC::ADD4;
+ unsigned LISInstr = IsPPC64 ? PPC::LIS8 : PPC::LIS;
+ unsigned ORIInstr = IsPPC64 ? PPC::ORI8 : PPC::ORI;
+
+ if (CallerAllocatedAmt && isInt16(CallerAllocatedAmt)) {
+ BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg)
+ .addReg(StackReg).addImm(CallerAllocatedAmt);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+ .addImm(CallerAllocatedAmt >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
+ .addReg(TmpReg, RegState::Kill)
+ .addImm(CallerAllocatedAmt & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDInstr))
+ .addReg(StackReg)
+ .addReg(FPReg)
+ .addReg(TmpReg);
+ }
+ } else if (RetOpcode == PPC::TCRETURNdi) {
+ MBBI = prior(MBB.end());
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ } else if (RetOpcode == PPC::TCRETURNri) {
+ MBBI = prior(MBB.end());
+ assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
+ } else if (RetOpcode == PPC::TCRETURNai) {
+ MBBI = prior(MBB.end());
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
+ } else if (RetOpcode == PPC::TCRETURNdi8) {
+ MBBI = prior(MBB.end());
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ } else if (RetOpcode == PPC::TCRETURNri8) {
+ MBBI = prior(MBB.end());
+ assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
+ } else if (RetOpcode == PPC::TCRETURNai8) {
+ MBBI = prior(MBB.end());
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
+ }
+}
+
+unsigned PPCRegisterInfo::getRARegister() const {
+ return !Subtarget.isPPC64() ? PPC::LR : PPC::LR8;
+}
+
+unsigned PPCRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ if (!Subtarget.isPPC64())
+ return hasFP(MF) ? PPC::R31 : PPC::R1;
+ else
+ return hasFP(MF) ? PPC::X31 : PPC::X1;
+}
+
+void PPCRegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves)
+ const {
+ // Initial state of the frame pointer is R1.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(PPC::R1, 0);
+ Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+unsigned PPCRegisterInfo::getEHExceptionRegister() const {
+ return !Subtarget.isPPC64() ? PPC::R3 : PPC::X3;
+}
+
+unsigned PPCRegisterInfo::getEHHandlerRegister() const {
+ return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4;
+}
+
+int PPCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ // FIXME: Most probably dwarf numbers differs for Linux and Darwin
+ return PPCGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+#include "PPCGenRegisterInfo.inc"
+
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
new file mode 100644
index 0000000..9506b65
--- /dev/null
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -0,0 +1,95 @@
+//===- PPCRegisterInfo.h - PowerPC Register Information Impl -----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC32_REGISTERINFO_H
+#define POWERPC32_REGISTERINFO_H
+
+#include "PPC.h"
+#include "PPCGenRegisterInfo.h.inc"
+#include <map>
+
+namespace llvm {
+class PPCSubtarget;
+class TargetInstrInfo;
+class Type;
+
+class PPCRegisterInfo : public PPCGenRegisterInfo {
+ std::map<unsigned, unsigned> ImmToIdxMap;
+ const PPCSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+public:
+ PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii);
+
+ /// getRegisterNumbering - Given the enum value for some register, e.g.
+ /// PPC::F14, return the number that it corresponds to (e.g. 14).
+ static unsigned getRegisterNumbering(unsigned RegEnum);
+
+ /// getPointerRegClass - Return the register class to use to hold pointers.
+ /// This is used for addressing modes.
+ virtual const TargetRegisterClass *getPointerRegClass() const;
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+ const TargetRegisterClass* const*
+ getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ /// targetHandlesStackFrameRounding - Returns true if the target is
+ /// responsible for rounding up the stack frame (probably at emitPrologue
+ /// time).
+ bool targetHandlesStackFrameRounding() const { return true; }
+
+ /// requiresRegisterScavenging - We require a register scavenger.
+ /// FIXME (64-bit): Should be inlined.
+ bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void lowerDynamicAlloc(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const;
+ void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
+ int SPAdj, RegScavenger *RS) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ /// determineFrameLayout - Determine the size of the frame and maximum call
+ /// frame size.
+ void determineFrameLayout(MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
new file mode 100644
index 0000000..9e15a55
--- /dev/null
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -0,0 +1,360 @@
+//===- PPCRegisterInfo.td - The PowerPC Register File ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+class PPCReg<string n> : Register<n> {
+ let Namespace = "PPC";
+}
+
+// We identify all our registers with a 5-bit ID, for consistency's sake.
+
+// GPR - One of the 32 32-bit general-purpose registers
+class GPR<bits<5> num, string n> : PPCReg<n> {
+ field bits<5> Num = num;
+}
+
+// GP8 - One of the 32 64-bit general-purpose registers
+class GP8<GPR SubReg, string n> : PPCReg<n> {
+ field bits<5> Num = SubReg.Num;
+ let SubRegs = [SubReg];
+}
+
+// SPR - One of the 32-bit special-purpose registers
+class SPR<bits<10> num, string n> : PPCReg<n> {
+ field bits<10> Num = num;
+}
+
+// FPR - One of the 32 64-bit floating-point registers
+class FPR<bits<5> num, string n> : PPCReg<n> {
+ field bits<5> Num = num;
+}
+
+// VR - One of the 32 128-bit vector registers
+class VR<bits<5> num, string n> : PPCReg<n> {
+ field bits<5> Num = num;
+}
+
+// CR - One of the 8 4-bit condition registers
+class CR<bits<3> num, string n> : PPCReg<n> {
+ field bits<3> Num = num;
+}
+
+// CRBIT - One of the 32 1-bit condition register fields
+class CRBIT<bits<5> num, string n> : PPCReg<n> {
+ field bits<5> Num = num;
+}
+
+
+// General-purpose registers
+def R0 : GPR< 0, "r0">, DwarfRegNum<[0]>;
+def R1 : GPR< 1, "r1">, DwarfRegNum<[1]>;
+def R2 : GPR< 2, "r2">, DwarfRegNum<[2]>;
+def R3 : GPR< 3, "r3">, DwarfRegNum<[3]>;
+def R4 : GPR< 4, "r4">, DwarfRegNum<[4]>;
+def R5 : GPR< 5, "r5">, DwarfRegNum<[5]>;
+def R6 : GPR< 6, "r6">, DwarfRegNum<[6]>;
+def R7 : GPR< 7, "r7">, DwarfRegNum<[7]>;
+def R8 : GPR< 8, "r8">, DwarfRegNum<[8]>;
+def R9 : GPR< 9, "r9">, DwarfRegNum<[9]>;
+def R10 : GPR<10, "r10">, DwarfRegNum<[10]>;
+def R11 : GPR<11, "r11">, DwarfRegNum<[11]>;
+def R12 : GPR<12, "r12">, DwarfRegNum<[12]>;
+def R13 : GPR<13, "r13">, DwarfRegNum<[13]>;
+def R14 : GPR<14, "r14">, DwarfRegNum<[14]>;
+def R15 : GPR<15, "r15">, DwarfRegNum<[15]>;
+def R16 : GPR<16, "r16">, DwarfRegNum<[16]>;
+def R17 : GPR<17, "r17">, DwarfRegNum<[17]>;
+def R18 : GPR<18, "r18">, DwarfRegNum<[18]>;
+def R19 : GPR<19, "r19">, DwarfRegNum<[19]>;
+def R20 : GPR<20, "r20">, DwarfRegNum<[20]>;
+def R21 : GPR<21, "r21">, DwarfRegNum<[21]>;
+def R22 : GPR<22, "r22">, DwarfRegNum<[22]>;
+def R23 : GPR<23, "r23">, DwarfRegNum<[23]>;
+def R24 : GPR<24, "r24">, DwarfRegNum<[24]>;
+def R25 : GPR<25, "r25">, DwarfRegNum<[25]>;
+def R26 : GPR<26, "r26">, DwarfRegNum<[26]>;
+def R27 : GPR<27, "r27">, DwarfRegNum<[27]>;
+def R28 : GPR<28, "r28">, DwarfRegNum<[28]>;
+def R29 : GPR<29, "r29">, DwarfRegNum<[29]>;
+def R30 : GPR<30, "r30">, DwarfRegNum<[30]>;
+def R31 : GPR<31, "r31">, DwarfRegNum<[31]>;
+
+// 64-bit General-purpose registers
+def X0 : GP8< R0, "r0">, DwarfRegNum<[0]>;
+def X1 : GP8< R1, "r1">, DwarfRegNum<[1]>;
+def X2 : GP8< R2, "r2">, DwarfRegNum<[2]>;
+def X3 : GP8< R3, "r3">, DwarfRegNum<[3]>;
+def X4 : GP8< R4, "r4">, DwarfRegNum<[4]>;
+def X5 : GP8< R5, "r5">, DwarfRegNum<[5]>;
+def X6 : GP8< R6, "r6">, DwarfRegNum<[6]>;
+def X7 : GP8< R7, "r7">, DwarfRegNum<[7]>;
+def X8 : GP8< R8, "r8">, DwarfRegNum<[8]>;
+def X9 : GP8< R9, "r9">, DwarfRegNum<[9]>;
+def X10 : GP8<R10, "r10">, DwarfRegNum<[10]>;
+def X11 : GP8<R11, "r11">, DwarfRegNum<[11]>;
+def X12 : GP8<R12, "r12">, DwarfRegNum<[12]>;
+def X13 : GP8<R13, "r13">, DwarfRegNum<[13]>;
+def X14 : GP8<R14, "r14">, DwarfRegNum<[14]>;
+def X15 : GP8<R15, "r15">, DwarfRegNum<[15]>;
+def X16 : GP8<R16, "r16">, DwarfRegNum<[16]>;
+def X17 : GP8<R17, "r17">, DwarfRegNum<[17]>;
+def X18 : GP8<R18, "r18">, DwarfRegNum<[18]>;
+def X19 : GP8<R19, "r19">, DwarfRegNum<[19]>;
+def X20 : GP8<R20, "r20">, DwarfRegNum<[20]>;
+def X21 : GP8<R21, "r21">, DwarfRegNum<[21]>;
+def X22 : GP8<R22, "r22">, DwarfRegNum<[22]>;
+def X23 : GP8<R23, "r23">, DwarfRegNum<[23]>;
+def X24 : GP8<R24, "r24">, DwarfRegNum<[24]>;
+def X25 : GP8<R25, "r25">, DwarfRegNum<[25]>;
+def X26 : GP8<R26, "r26">, DwarfRegNum<[26]>;
+def X27 : GP8<R27, "r27">, DwarfRegNum<[27]>;
+def X28 : GP8<R28, "r28">, DwarfRegNum<[28]>;
+def X29 : GP8<R29, "r29">, DwarfRegNum<[29]>;
+def X30 : GP8<R30, "r30">, DwarfRegNum<[30]>;
+def X31 : GP8<R31, "r31">, DwarfRegNum<[31]>;
+
+// Floating-point registers
+def F0 : FPR< 0, "f0">, DwarfRegNum<[32]>;
+def F1 : FPR< 1, "f1">, DwarfRegNum<[33]>;
+def F2 : FPR< 2, "f2">, DwarfRegNum<[34]>;
+def F3 : FPR< 3, "f3">, DwarfRegNum<[35]>;
+def F4 : FPR< 4, "f4">, DwarfRegNum<[36]>;
+def F5 : FPR< 5, "f5">, DwarfRegNum<[37]>;
+def F6 : FPR< 6, "f6">, DwarfRegNum<[38]>;
+def F7 : FPR< 7, "f7">, DwarfRegNum<[39]>;
+def F8 : FPR< 8, "f8">, DwarfRegNum<[40]>;
+def F9 : FPR< 9, "f9">, DwarfRegNum<[41]>;
+def F10 : FPR<10, "f10">, DwarfRegNum<[42]>;
+def F11 : FPR<11, "f11">, DwarfRegNum<[43]>;
+def F12 : FPR<12, "f12">, DwarfRegNum<[44]>;
+def F13 : FPR<13, "f13">, DwarfRegNum<[45]>;
+def F14 : FPR<14, "f14">, DwarfRegNum<[46]>;
+def F15 : FPR<15, "f15">, DwarfRegNum<[47]>;
+def F16 : FPR<16, "f16">, DwarfRegNum<[48]>;
+def F17 : FPR<17, "f17">, DwarfRegNum<[49]>;
+def F18 : FPR<18, "f18">, DwarfRegNum<[50]>;
+def F19 : FPR<19, "f19">, DwarfRegNum<[51]>;
+def F20 : FPR<20, "f20">, DwarfRegNum<[52]>;
+def F21 : FPR<21, "f21">, DwarfRegNum<[53]>;
+def F22 : FPR<22, "f22">, DwarfRegNum<[54]>;
+def F23 : FPR<23, "f23">, DwarfRegNum<[55]>;
+def F24 : FPR<24, "f24">, DwarfRegNum<[56]>;
+def F25 : FPR<25, "f25">, DwarfRegNum<[57]>;
+def F26 : FPR<26, "f26">, DwarfRegNum<[58]>;
+def F27 : FPR<27, "f27">, DwarfRegNum<[59]>;
+def F28 : FPR<28, "f28">, DwarfRegNum<[60]>;
+def F29 : FPR<29, "f29">, DwarfRegNum<[61]>;
+def F30 : FPR<30, "f30">, DwarfRegNum<[62]>;
+def F31 : FPR<31, "f31">, DwarfRegNum<[63]>;
+
+// Vector registers
+def V0 : VR< 0, "v0">, DwarfRegNum<[77]>;
+def V1 : VR< 1, "v1">, DwarfRegNum<[78]>;
+def V2 : VR< 2, "v2">, DwarfRegNum<[79]>;
+def V3 : VR< 3, "v3">, DwarfRegNum<[80]>;
+def V4 : VR< 4, "v4">, DwarfRegNum<[81]>;
+def V5 : VR< 5, "v5">, DwarfRegNum<[82]>;
+def V6 : VR< 6, "v6">, DwarfRegNum<[83]>;
+def V7 : VR< 7, "v7">, DwarfRegNum<[84]>;
+def V8 : VR< 8, "v8">, DwarfRegNum<[85]>;
+def V9 : VR< 9, "v9">, DwarfRegNum<[86]>;
+def V10 : VR<10, "v10">, DwarfRegNum<[87]>;
+def V11 : VR<11, "v11">, DwarfRegNum<[88]>;
+def V12 : VR<12, "v12">, DwarfRegNum<[89]>;
+def V13 : VR<13, "v13">, DwarfRegNum<[90]>;
+def V14 : VR<14, "v14">, DwarfRegNum<[91]>;
+def V15 : VR<15, "v15">, DwarfRegNum<[92]>;
+def V16 : VR<16, "v16">, DwarfRegNum<[93]>;
+def V17 : VR<17, "v17">, DwarfRegNum<[94]>;
+def V18 : VR<18, "v18">, DwarfRegNum<[95]>;
+def V19 : VR<19, "v19">, DwarfRegNum<[96]>;
+def V20 : VR<20, "v20">, DwarfRegNum<[97]>;
+def V21 : VR<21, "v21">, DwarfRegNum<[98]>;
+def V22 : VR<22, "v22">, DwarfRegNum<[99]>;
+def V23 : VR<23, "v23">, DwarfRegNum<[100]>;
+def V24 : VR<24, "v24">, DwarfRegNum<[101]>;
+def V25 : VR<25, "v25">, DwarfRegNum<[102]>;
+def V26 : VR<26, "v26">, DwarfRegNum<[103]>;
+def V27 : VR<27, "v27">, DwarfRegNum<[104]>;
+def V28 : VR<28, "v28">, DwarfRegNum<[105]>;
+def V29 : VR<29, "v29">, DwarfRegNum<[106]>;
+def V30 : VR<30, "v30">, DwarfRegNum<[107]>;
+def V31 : VR<31, "v31">, DwarfRegNum<[108]>;
+
+// Condition registers
+def CR0 : CR<0, "cr0">, DwarfRegNum<[68]>;
+def CR1 : CR<1, "cr1">, DwarfRegNum<[69]>;
+def CR2 : CR<2, "cr2">, DwarfRegNum<[70]>;
+def CR3 : CR<3, "cr3">, DwarfRegNum<[71]>;
+def CR4 : CR<4, "cr4">, DwarfRegNum<[72]>;
+def CR5 : CR<5, "cr5">, DwarfRegNum<[73]>;
+def CR6 : CR<6, "cr6">, DwarfRegNum<[74]>;
+def CR7 : CR<7, "cr7">, DwarfRegNum<[75]>;
+
+// Condition register bits
+def CR0LT : CRBIT< 0, "0">, DwarfRegNum<[0]>;
+def CR0GT : CRBIT< 1, "1">, DwarfRegNum<[0]>;
+def CR0EQ : CRBIT< 2, "2">, DwarfRegNum<[0]>;
+def CR0UN : CRBIT< 3, "3">, DwarfRegNum<[0]>;
+def CR1LT : CRBIT< 4, "4">, DwarfRegNum<[0]>;
+def CR1GT : CRBIT< 5, "5">, DwarfRegNum<[0]>;
+def CR1EQ : CRBIT< 6, "6">, DwarfRegNum<[0]>;
+def CR1UN : CRBIT< 7, "7">, DwarfRegNum<[0]>;
+def CR2LT : CRBIT< 8, "8">, DwarfRegNum<[0]>;
+def CR2GT : CRBIT< 9, "9">, DwarfRegNum<[0]>;
+def CR2EQ : CRBIT<10, "10">, DwarfRegNum<[0]>;
+def CR2UN : CRBIT<11, "11">, DwarfRegNum<[0]>;
+def CR3LT : CRBIT<12, "12">, DwarfRegNum<[0]>;
+def CR3GT : CRBIT<13, "13">, DwarfRegNum<[0]>;
+def CR3EQ : CRBIT<14, "14">, DwarfRegNum<[0]>;
+def CR3UN : CRBIT<15, "15">, DwarfRegNum<[0]>;
+def CR4LT : CRBIT<16, "16">, DwarfRegNum<[0]>;
+def CR4GT : CRBIT<17, "17">, DwarfRegNum<[0]>;
+def CR4EQ : CRBIT<18, "18">, DwarfRegNum<[0]>;
+def CR4UN : CRBIT<19, "19">, DwarfRegNum<[0]>;
+def CR5LT : CRBIT<20, "20">, DwarfRegNum<[0]>;
+def CR5GT : CRBIT<21, "21">, DwarfRegNum<[0]>;
+def CR5EQ : CRBIT<22, "22">, DwarfRegNum<[0]>;
+def CR5UN : CRBIT<23, "23">, DwarfRegNum<[0]>;
+def CR6LT : CRBIT<24, "24">, DwarfRegNum<[0]>;
+def CR6GT : CRBIT<25, "25">, DwarfRegNum<[0]>;
+def CR6EQ : CRBIT<26, "26">, DwarfRegNum<[0]>;
+def CR6UN : CRBIT<27, "27">, DwarfRegNum<[0]>;
+def CR7LT : CRBIT<28, "28">, DwarfRegNum<[0]>;
+def CR7GT : CRBIT<29, "29">, DwarfRegNum<[0]>;
+def CR7EQ : CRBIT<30, "30">, DwarfRegNum<[0]>;
+def CR7UN : CRBIT<31, "31">, DwarfRegNum<[0]>;
+
+def : SubRegSet<1, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
+ [CR0LT, CR1LT, CR2LT, CR3LT, CR4LT, CR5LT, CR6LT, CR7LT]>;
+def : SubRegSet<2, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
+ [CR0GT, CR1GT, CR2GT, CR3GT, CR4GT, CR5GT, CR6GT, CR7GT]>;
+def : SubRegSet<3, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
+ [CR0EQ, CR1EQ, CR2EQ, CR3EQ, CR4EQ, CR5EQ, CR6EQ, CR7EQ]>;
+def : SubRegSet<4, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
+ [CR0UN, CR1UN, CR2UN, CR3UN, CR4UN, CR5UN, CR6UN, CR7UN]>;
+
+// Link register
+def LR : SPR<8, "lr">, DwarfRegNum<[65]>;
+//let Aliases = [LR] in
+def LR8 : SPR<8, "lr">, DwarfRegNum<[65]>;
+
+// Count register
+def CTR : SPR<9, "ctr">, DwarfRegNum<[66]>;
+def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66]>;
+
+// VRsave register
+def VRSAVE: SPR<256, "VRsave">, DwarfRegNum<[107]>;
+
+// FP rounding mode: bits 30 and 31 of the FP status and control register
+// This is not allocated as a normal register; it appears only in
+// Uses and Defs. The ABI says it needs to be preserved by a function,
+// but this is not achieved by saving and restoring it as with
+// most registers, it has to be done in code; to make this work all the
+// return and call instructions are described as Uses of RM, so instructions
+// that do nothing but change RM will not get deleted.
+// Also, in the architecture it is not really a SPR; 512 is arbitrary.
+def RM: SPR<512, "**ROUNDING MODE**">, DwarfRegNum<[0]>;
+
+/// Register classes
+// Allocate volatiles first
+// then nonvolatiles in reverse order since stmw/lmw save from rN to r31
+def GPRC : RegisterClass<"PPC", [i32], 32,
+ [R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12,
+ R30, R29, R28, R27, R26, R25, R24, R23, R22, R21, R20, R19, R18, R17,
+ R16, R15, R14, R13, R31, R0, R1, LR]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GPRCClass::iterator
+ GPRCClass::allocation_order_begin(const MachineFunction &MF) const {
+ // In Linux, r2 is reserved for the OS.
+ if (!MF.getTarget().getSubtarget<PPCSubtarget>().isDarwin())
+ return begin()+1;
+
+ return begin();
+ }
+ GPRCClass::iterator
+ GPRCClass::allocation_order_end(const MachineFunction &MF) const {
+ // On PPC64, r13 is the thread pointer. Never allocate this register.
+ // Note that this is overconservative, as it also prevents allocation of
+ // R31 when the FP is not needed.
+ if (MF.getTarget().getSubtarget<PPCSubtarget>().isPPC64())
+ return end()-5; // don't allocate R13, R31, R0, R1, LR
+
+ if (needsFP(MF))
+ return end()-4; // don't allocate R31, R0, R1, LR
+ else
+ return end()-3; // don't allocate R0, R1, LR
+ }
+ }];
+}
+def G8RC : RegisterClass<"PPC", [i64], 64,
+ [X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12,
+ X30, X29, X28, X27, X26, X25, X24, X23, X22, X21, X20, X19, X18, X17,
+ X16, X15, X14, X31, X13, X0, X1, LR8]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ G8RCClass::iterator
+ G8RCClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin();
+ }
+ G8RCClass::iterator
+ G8RCClass::allocation_order_end(const MachineFunction &MF) const {
+ if (needsFP(MF))
+ return end()-5;
+ else
+ return end()-4;
+ }
+ }];
+}
+
+
+
+def F8RC : RegisterClass<"PPC", [f64], 64, [F0, F1, F2, F3, F4, F5, F6, F7,
+ F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21,
+ F22, F23, F24, F25, F26, F27, F28, F29, F30, F31]>;
+def F4RC : RegisterClass<"PPC", [f32], 32, [F0, F1, F2, F3, F4, F5, F6, F7,
+ F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21,
+ F22, F23, F24, F25, F26, F27, F28, F29, F30, F31]>;
+
+def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,
+ [V2, V3, V4, V5, V0, V1,
+ V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V20, V21,
+ V22, V23, V24, V25, V26, V27, V28, V29, V30, V31]>;
+
+def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2,
+ CR3, CR4]>;
+
+def CRBITRC : RegisterClass<"PPC", [i32], 32,
+ [CR0LT, CR0GT, CR0EQ, CR0UN,
+ CR1LT, CR1GT, CR1EQ, CR1UN,
+ CR2LT, CR2GT, CR2EQ, CR2UN,
+ CR3LT, CR3GT, CR3EQ, CR3UN,
+ CR4LT, CR4GT, CR4EQ, CR4UN,
+ CR5LT, CR5GT, CR5EQ, CR5UN,
+ CR6LT, CR6GT, CR6EQ, CR6UN,
+ CR7LT, CR7GT, CR7EQ, CR7UN
+ ]>
+{
+ let CopyCost = -1;
+}
+
+
+def CTRRC : RegisterClass<"PPC", [i32], 32, [CTR]>;
+def CTRRC8 : RegisterClass<"PPC", [i64], 64, [CTR8]>;
diff --git a/lib/Target/PowerPC/PPCRelocations.h b/lib/Target/PowerPC/PPCRelocations.h
new file mode 100644
index 0000000..a33e7e0
--- /dev/null
+++ b/lib/Target/PowerPC/PPCRelocations.h
@@ -0,0 +1,56 @@
+//===- PPCRelocations.h - PPC32 Code Relocations ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC 32-bit target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC32RELOCATIONS_H
+#define PPC32RELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+// Hack to rid us of a PPC pre-processor symbol which is erroneously
+// defined in a PowerPC header file (bug in Linux/PPC)
+#ifdef PPC
+#undef PPC
+#endif
+
+namespace llvm {
+ namespace PPC {
+ enum RelocationType {
+ // reloc_vanilla - A standard relocation, where the address of the
+ // relocated object completely overwrites the address of the relocation.
+ reloc_vanilla,
+
+ // reloc_pcrel_bx - PC relative relocation, for the b or bl instructions.
+ reloc_pcrel_bx,
+
+ // reloc_pcrel_bcx - PC relative relocation, for BLT,BLE,BEQ,BGE,BGT,BNE,
+ // and other bcx instructions.
+ reloc_pcrel_bcx,
+
+ // reloc_absolute_high - Absolute relocation, for the loadhi instruction
+ // (which is really addis). Add the high 16-bits of the specified global
+ // address into the low 16-bits of the instruction.
+ reloc_absolute_high,
+
+ // reloc_absolute_low - Absolute relocation, for the la instruction (which
+ // is really an addi). Add the low 16-bits of the specified global
+ // address into the low 16-bits of the instruction.
+ reloc_absolute_low,
+
+ // reloc_absolute_low_ix - Absolute relocation for the 64-bit load/store
+ // instruction which have two implicit zero bits.
+ reloc_absolute_low_ix
+ };
+ }
+}
+
+#endif
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
new file mode 100644
index 0000000..d589414
--- /dev/null
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -0,0 +1,508 @@
+//===- PPCSchedule.td - PowerPC Scheduling Definitions -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Functional units across PowerPC chips sets
+//
+def BPU : FuncUnit; // Branch unit
+def SLU : FuncUnit; // Store/load unit
+def SRU : FuncUnit; // special register unit
+def IU1 : FuncUnit; // integer unit 1 (simple)
+def IU2 : FuncUnit; // integer unit 2 (complex)
+def IU3 : FuncUnit; // integer unit 3 (7450 simple)
+def IU4 : FuncUnit; // integer unit 4 (7450 simple)
+def FPU1 : FuncUnit; // floating point unit 1
+def FPU2 : FuncUnit; // floating point unit 2
+def VPU : FuncUnit; // vector permutation unit
+def VIU1 : FuncUnit; // vector integer unit 1 (simple)
+def VIU2 : FuncUnit; // vector integer unit 2 (complex)
+def VFPU : FuncUnit; // vector floating point unit
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for PowerPC
+//
+def IntGeneral : InstrItinClass;
+def IntCompare : InstrItinClass;
+def IntDivD : InstrItinClass;
+def IntDivW : InstrItinClass;
+def IntMFFS : InstrItinClass;
+def IntMFVSCR : InstrItinClass;
+def IntMTFSB0 : InstrItinClass;
+def IntMTSRD : InstrItinClass;
+def IntMulHD : InstrItinClass;
+def IntMulHW : InstrItinClass;
+def IntMulHWU : InstrItinClass;
+def IntMulLI : InstrItinClass;
+def IntRFID : InstrItinClass;
+def IntRotateD : InstrItinClass;
+def IntRotate : InstrItinClass;
+def IntShift : InstrItinClass;
+def IntTrapD : InstrItinClass;
+def IntTrapW : InstrItinClass;
+def BrB : InstrItinClass;
+def BrCR : InstrItinClass;
+def BrMCR : InstrItinClass;
+def BrMCRX : InstrItinClass;
+def LdStDCBA : InstrItinClass;
+def LdStDCBF : InstrItinClass;
+def LdStDCBI : InstrItinClass;
+def LdStGeneral : InstrItinClass;
+def LdStDSS : InstrItinClass;
+def LdStICBI : InstrItinClass;
+def LdStUX : InstrItinClass;
+def LdStLD : InstrItinClass;
+def LdStLDARX : InstrItinClass;
+def LdStLFD : InstrItinClass;
+def LdStLFDU : InstrItinClass;
+def LdStLHA : InstrItinClass;
+def LdStLMW : InstrItinClass;
+def LdStLVecX : InstrItinClass;
+def LdStLWA : InstrItinClass;
+def LdStLWARX : InstrItinClass;
+def LdStSLBIA : InstrItinClass;
+def LdStSLBIE : InstrItinClass;
+def LdStSTD : InstrItinClass;
+def LdStSTDCX : InstrItinClass;
+def LdStSTVEBX : InstrItinClass;
+def LdStSTWCX : InstrItinClass;
+def LdStSync : InstrItinClass;
+def SprISYNC : InstrItinClass;
+def SprMFSR : InstrItinClass;
+def SprMTMSR : InstrItinClass;
+def SprMTSR : InstrItinClass;
+def SprTLBSYNC : InstrItinClass;
+def SprMFCR : InstrItinClass;
+def SprMFMSR : InstrItinClass;
+def SprMFSPR : InstrItinClass;
+def SprMFTB : InstrItinClass;
+def SprMTSPR : InstrItinClass;
+def SprMTSRIN : InstrItinClass;
+def SprRFI : InstrItinClass;
+def SprSC : InstrItinClass;
+def FPGeneral : InstrItinClass;
+def FPCompare : InstrItinClass;
+def FPDivD : InstrItinClass;
+def FPDivS : InstrItinClass;
+def FPFused : InstrItinClass;
+def FPRes : InstrItinClass;
+def FPSqrt : InstrItinClass;
+def VecGeneral : InstrItinClass;
+def VecFP : InstrItinClass;
+def VecFPCompare : InstrItinClass;
+def VecComplex : InstrItinClass;
+def VecPerm : InstrItinClass;
+def VecFPRound : InstrItinClass;
+def VecVSL : InstrItinClass;
+def VecVSR : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+include "PPCScheduleG3.td"
+include "PPCScheduleG4.td"
+include "PPCScheduleG4Plus.td"
+include "PPCScheduleG5.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction to itinerary class map - When add new opcodes to the supported
+// set, refer to the following table to determine which itinerary class the
+// opcode belongs.
+//
+// opcode itinerary class
+// ====== ===============
+// add IntGeneral
+// addc IntGeneral
+// adde IntGeneral
+// addi IntGeneral
+// addic IntGeneral
+// addic. IntGeneral
+// addis IntGeneral
+// addme IntGeneral
+// addze IntGeneral
+// and IntGeneral
+// andc IntGeneral
+// andi. IntGeneral
+// andis. IntGeneral
+// b BrB
+// bc BrB
+// bcctr BrB
+// bclr BrB
+// cmp IntCompare
+// cmpi IntCompare
+// cmpl IntCompare
+// cmpli IntCompare
+// cntlzd IntRotateD
+// cntlzw IntGeneral
+// crand BrCR
+// crandc BrCR
+// creqv BrCR
+// crnand BrCR
+// crnor BrCR
+// cror BrCR
+// crorc BrCR
+// crxor BrCR
+// dcba LdStDCBA
+// dcbf LdStDCBF
+// dcbi LdStDCBI
+// dcbst LdStDCBF
+// dcbt LdStGeneral
+// dcbtst LdStGeneral
+// dcbz LdStDCBF
+// divd IntDivD
+// divdu IntDivD
+// divw IntDivW
+// divwu IntDivW
+// dss LdStDSS
+// dst LdStDSS
+// dstst LdStDSS
+// eciwx LdStGeneral
+// ecowx LdStGeneral
+// eieio LdStGeneral
+// eqv IntGeneral
+// extsb IntGeneral
+// extsh IntGeneral
+// extsw IntRotateD
+// fabs FPGeneral
+// fadd FPGeneral
+// fadds FPGeneral
+// fcfid FPGeneral
+// fcmpo FPCompare
+// fcmpu FPCompare
+// fctid FPGeneral
+// fctidz FPGeneral
+// fctiw FPGeneral
+// fctiwz FPGeneral
+// fdiv FPDivD
+// fdivs FPDivS
+// fmadd FPFused
+// fmadds FPGeneral
+// fmr FPGeneral
+// fmsub FPFused
+// fmsubs FPGeneral
+// fmul FPFused
+// fmuls FPGeneral
+// fnabs FPGeneral
+// fneg FPGeneral
+// fnmadd FPFused
+// fnmadds FPGeneral
+// fnmsub FPFused
+// fnmsubs FPGeneral
+// fres FPRes
+// frsp FPGeneral
+// frsqrte FPGeneral
+// fsel FPGeneral
+// fsqrt FPSqrt
+// fsqrts FPSqrt
+// fsub FPGeneral
+// fsubs FPGeneral
+// icbi LdStICBI
+// isync SprISYNC
+// lbz LdStGeneral
+// lbzu LdStGeneral
+// lbzux LdStUX
+// lbzx LdStGeneral
+// ld LdStLD
+// ldarx LdStLDARX
+// ldu LdStLD
+// ldux LdStLD
+// ldx LdStLD
+// lfd LdStLFD
+// lfdu LdStLFDU
+// lfdux LdStLFDU
+// lfdx LdStLFDU
+// lfs LdStLFDU
+// lfsu LdStLFDU
+// lfsux LdStLFDU
+// lfsx LdStLFDU
+// lha LdStLHA
+// lhau LdStLHA
+// lhaux LdStLHA
+// lhax LdStLHA
+// lhbrx LdStGeneral
+// lhz LdStGeneral
+// lhzu LdStGeneral
+// lhzux LdStUX
+// lhzx LdStGeneral
+// lmw LdStLMW
+// lswi LdStLMW
+// lswx LdStLMW
+// lvebx LdStLVecX
+// lvehx LdStLVecX
+// lvewx LdStLVecX
+// lvsl LdStLVecX
+// lvsr LdStLVecX
+// lvx LdStLVecX
+// lvxl LdStLVecX
+// lwa LdStLWA
+// lwarx LdStLWARX
+// lwaux LdStLHA
+// lwax LdStLHA
+// lwbrx LdStGeneral
+// lwz LdStGeneral
+// lwzu LdStGeneral
+// lwzux LdStUX
+// lwzx LdStGeneral
+// mcrf BrMCR
+// mcrfs FPGeneral
+// mcrxr BrMCRX
+// mfcr SprMFCR
+// mffs IntMFFS
+// mfmsr SprMFMSR
+// mfspr SprMFSPR
+// mfsr SprMFSR
+// mfsrin SprMFSR
+// mftb SprMFTB
+// mfvscr IntMFVSCR
+// mtcrf BrMCRX
+// mtfsb0 IntMTFSB0
+// mtfsb1 IntMTFSB0
+// mtfsf IntMTFSB0
+// mtfsfi IntMTFSB0
+// mtmsr SprMTMSR
+// mtmsrd LdStLD
+// mtspr SprMTSPR
+// mtsr SprMTSR
+// mtsrd IntMTSRD
+// mtsrdin IntMTSRD
+// mtsrin SprMTSRIN
+// mtvscr IntMFVSCR
+// mulhd IntMulHD
+// mulhdu IntMulHD
+// mulhw IntMulHW
+// mulhwu IntMulHWU
+// mulld IntMulHD
+// mulli IntMulLI
+// mullw IntMulHW
+// nand IntGeneral
+// neg IntGeneral
+// nor IntGeneral
+// or IntGeneral
+// orc IntGeneral
+// ori IntGeneral
+// oris IntGeneral
+// rfi SprRFI
+// rfid IntRFID
+// rldcl IntRotateD
+// rldcr IntRotateD
+// rldic IntRotateD
+// rldicl IntRotateD
+// rldicr IntRotateD
+// rldimi IntRotateD
+// rlwimi IntRotate
+// rlwinm IntGeneral
+// rlwnm IntGeneral
+// sc SprSC
+// slbia LdStSLBIA
+// slbie LdStSLBIE
+// sld IntRotateD
+// slw IntGeneral
+// srad IntRotateD
+// sradi IntRotateD
+// sraw IntShift
+// srawi IntShift
+// srd IntRotateD
+// srw IntGeneral
+// stb LdStGeneral
+// stbu LdStGeneral
+// stbux LdStGeneral
+// stbx LdStGeneral
+// std LdStSTD
+// stdcx. LdStSTDCX
+// stdu LdStSTD
+// stdux LdStSTD
+// stdx LdStSTD
+// stfd LdStUX
+// stfdu LdStUX
+// stfdux LdStUX
+// stfdx LdStUX
+// stfiwx LdStUX
+// stfs LdStUX
+// stfsu LdStUX
+// stfsux LdStUX
+// stfsx LdStUX
+// sth LdStGeneral
+// sthbrx LdStGeneral
+// sthu LdStGeneral
+// sthux LdStGeneral
+// sthx LdStGeneral
+// stmw LdStLMW
+// stswi LdStLMW
+// stswx LdStLMW
+// stvebx LdStSTVEBX
+// stvehx LdStSTVEBX
+// stvewx LdStSTVEBX
+// stvx LdStSTVEBX
+// stvxl LdStSTVEBX
+// stw LdStGeneral
+// stwbrx LdStGeneral
+// stwcx. LdStSTWCX
+// stwu LdStGeneral
+// stwux LdStGeneral
+// stwx LdStGeneral
+// subf IntGeneral
+// subfc IntGeneral
+// subfe IntGeneral
+// subfic IntGeneral
+// subfme IntGeneral
+// subfze IntGeneral
+// sync LdStSync
+// td IntTrapD
+// tdi IntTrapD
+// tlbia LdStSLBIA
+// tlbie LdStDCBF
+// tlbsync SprTLBSYNC
+// tw IntTrapW
+// twi IntTrapW
+// vaddcuw VecGeneral
+// vaddfp VecFP
+// vaddsbs VecGeneral
+// vaddshs VecGeneral
+// vaddsws VecGeneral
+// vaddubm VecGeneral
+// vaddubs VecGeneral
+// vadduhm VecGeneral
+// vadduhs VecGeneral
+// vadduwm VecGeneral
+// vadduws VecGeneral
+// vand VecGeneral
+// vandc VecGeneral
+// vavgsb VecGeneral
+// vavgsh VecGeneral
+// vavgsw VecGeneral
+// vavgub VecGeneral
+// vavguh VecGeneral
+// vavguw VecGeneral
+// vcfsx VecFP
+// vcfux VecFP
+// vcmpbfp VecFPCompare
+// vcmpeqfp VecFPCompare
+// vcmpequb VecGeneral
+// vcmpequh VecGeneral
+// vcmpequw VecGeneral
+// vcmpgefp VecFPCompare
+// vcmpgtfp VecFPCompare
+// vcmpgtsb VecGeneral
+// vcmpgtsh VecGeneral
+// vcmpgtsw VecGeneral
+// vcmpgtub VecGeneral
+// vcmpgtuh VecGeneral
+// vcmpgtuw VecGeneral
+// vctsxs VecFP
+// vctuxs VecFP
+// vexptefp VecFP
+// vlogefp VecFP
+// vmaddfp VecFP
+// vmaxfp VecFPCompare
+// vmaxsb VecGeneral
+// vmaxsh VecGeneral
+// vmaxsw VecGeneral
+// vmaxub VecGeneral
+// vmaxuh VecGeneral
+// vmaxuw VecGeneral
+// vmhaddshs VecComplex
+// vmhraddshs VecComplex
+// vminfp VecFPCompare
+// vminsb VecGeneral
+// vminsh VecGeneral
+// vminsw VecGeneral
+// vminub VecGeneral
+// vminuh VecGeneral
+// vminuw VecGeneral
+// vmladduhm VecComplex
+// vmrghb VecPerm
+// vmrghh VecPerm
+// vmrghw VecPerm
+// vmrglb VecPerm
+// vmrglh VecPerm
+// vmrglw VecPerm
+// vmsubfp VecFP
+// vmsummbm VecComplex
+// vmsumshm VecComplex
+// vmsumshs VecComplex
+// vmsumubm VecComplex
+// vmsumuhm VecComplex
+// vmsumuhs VecComplex
+// vmulesb VecComplex
+// vmulesh VecComplex
+// vmuleub VecComplex
+// vmuleuh VecComplex
+// vmulosb VecComplex
+// vmulosh VecComplex
+// vmuloub VecComplex
+// vmulouh VecComplex
+// vnor VecGeneral
+// vor VecGeneral
+// vperm VecPerm
+// vpkpx VecPerm
+// vpkshss VecPerm
+// vpkshus VecPerm
+// vpkswss VecPerm
+// vpkswus VecPerm
+// vpkuhum VecPerm
+// vpkuhus VecPerm
+// vpkuwum VecPerm
+// vpkuwus VecPerm
+// vrefp VecFPRound
+// vrfim VecFPRound
+// vrfin VecFPRound
+// vrfip VecFPRound
+// vrfiz VecFPRound
+// vrlb VecGeneral
+// vrlh VecGeneral
+// vrlw VecGeneral
+// vrsqrtefp VecFP
+// vsel VecGeneral
+// vsl VecVSL
+// vslb VecGeneral
+// vsldoi VecPerm
+// vslh VecGeneral
+// vslo VecPerm
+// vslw VecGeneral
+// vspltb VecPerm
+// vsplth VecPerm
+// vspltisb VecPerm
+// vspltish VecPerm
+// vspltisw VecPerm
+// vspltw VecPerm
+// vsr VecVSR
+// vsrab VecGeneral
+// vsrah VecGeneral
+// vsraw VecGeneral
+// vsrb VecGeneral
+// vsrh VecGeneral
+// vsro VecPerm
+// vsrw VecGeneral
+// vsubcuw VecGeneral
+// vsubfp VecFP
+// vsubsbs VecGeneral
+// vsubshs VecGeneral
+// vsubsws VecGeneral
+// vsububm VecGeneral
+// vsububs VecGeneral
+// vsubuhm VecGeneral
+// vsubuhs VecGeneral
+// vsubuwm VecGeneral
+// vsubuws VecGeneral
+// vsum2sws VecComplex
+// vsum4sbs VecComplex
+// vsum4shs VecComplex
+// vsum4ubs VecComplex
+// vsumsws VecComplex
+// vupkhpx VecPerm
+// vupkhsb VecPerm
+// vupkhsh VecPerm
+// vupklpx VecPerm
+// vupklsb VecPerm
+// vupklsh VecPerm
+// vxor VecGeneral
+// xor IntGeneral
+// xori IntGeneral
+// xoris IntGeneral
+//
diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td
new file mode 100644
index 0000000..f72194d
--- /dev/null
+++ b/lib/Target/PowerPC/PPCScheduleG3.td
@@ -0,0 +1,63 @@
+//===- PPCScheduleG3.td - PPC G3 Scheduling Definitions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G3 (750) processor.
+//
+//===----------------------------------------------------------------------===//
+
+
+def G3Itineraries : ProcessorItineraries<[
+ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,
+ InstrItinData<IntMFFS , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<3, [FPU1]>]>,
+ InstrItinData<IntMulHW , [InstrStage<5, [IU1]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<6, [IU1]>]>,
+ InstrItinData<IntMulLI , [InstrStage<3, [IU1]>]>,
+ InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntShift , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>,
+ InstrItinData<LdStDCBA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<8, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMFSR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMTSR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMFTB , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMTSRIN , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprSC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>,
+ InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>,
+ InstrItinData<FPFused , [InstrStage<2, [FPU1]>]>,
+ InstrItinData<FPRes , [InstrStage<10, [FPU1]>]>
+]>;
diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td
new file mode 100644
index 0000000..92ed20f
--- /dev/null
+++ b/lib/Target/PowerPC/PPCScheduleG4.td
@@ -0,0 +1,73 @@
+//===- PPCScheduleG4.td - PPC G4 Scheduling Definitions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G4 (7400) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G4Itineraries : ProcessorItineraries<[
+ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,
+ InstrItinData<IntMFFS , [InstrStage<3, [FPU1]>]>,
+ InstrItinData<IntMFVSCR , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<3, [FPU1]>]>,
+ InstrItinData<IntMulHW , [InstrStage<5, [IU1]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<6, [IU1]>]>,
+ InstrItinData<IntMulLI , [InstrStage<3, [IU1]>]>,
+ InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntShift , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStDCBI , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStDSS , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>,
+ InstrItinData<LdStLVecX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTVEBX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<8, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMFSR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMTSR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<8, [SRU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMFTB , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMTSRIN , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprSC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>,
+ InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>,
+ InstrItinData<FPFused , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPRes , [InstrStage<10, [FPU1]>]>,
+ InstrItinData<VecGeneral , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecFP , [InstrStage<4, [VFPU]>]>,
+ InstrItinData<VecFPCompare, [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecComplex , [InstrStage<3, [VIU2]>]>,
+ InstrItinData<VecPerm , [InstrStage<1, [VPU]>]>,
+ InstrItinData<VecFPRound , [InstrStage<4, [VFPU]>]>,
+ InstrItinData<VecVSL , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecVSR , [InstrStage<1, [VIU1]>]>
+]>;
diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td
new file mode 100644
index 0000000..7474ba4
--- /dev/null
+++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -0,0 +1,76 @@
+//===- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G4+ (7450) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G4PlusItineraries : ProcessorItineraries<[
+ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntDivW , [InstrStage<23, [IU2]>]>,
+ InstrItinData<IntMFFS , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<IntMFVSCR , [InstrStage<2, [VFPU]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<IntMulHW , [InstrStage<4, [IU2]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<4, [IU2]>]>,
+ InstrItinData<IntMulLI , [InstrStage<3, [IU2]>]>,
+ InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntShift , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<BrMCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<BrMCRX , [InstrStage<2, [IU2]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDSS , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<3, [IU2]>]>,
+ InstrItinData<LdStUX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<37, [SLU]>]>,
+ InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLWA , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTDCX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTVEBX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<SprMFSR , [InstrStage<4, [IU2]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMTSR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<3, [IU2]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<4, [IU2]>]>,
+ InstrItinData<SprMFTB , [InstrStage<5, [IU2]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMTSRIN , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprRFI , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<SprSC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<FPGeneral , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<FPCompare , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<FPDivD , [InstrStage<35, [FPU1]>]>,
+ InstrItinData<FPDivS , [InstrStage<21, [FPU1]>]>,
+ InstrItinData<FPFused , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<FPRes , [InstrStage<14, [FPU1]>]>,
+ InstrItinData<VecGeneral , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecFP , [InstrStage<4, [VFPU]>]>,
+ InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>,
+ InstrItinData<VecComplex , [InstrStage<4, [VIU2]>]>,
+ InstrItinData<VecPerm , [InstrStage<2, [VPU]>]>,
+ InstrItinData<VecFPRound , [InstrStage<4, [VIU1]>]>,
+ InstrItinData<VecVSL , [InstrStage<2, [VPU]>]>,
+ InstrItinData<VecVSR , [InstrStage<2, [VPU]>]>
+]>;
diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td
new file mode 100644
index 0000000..d282147
--- /dev/null
+++ b/lib/Target/PowerPC/PPCScheduleG5.td
@@ -0,0 +1,83 @@
+//===- PPCScheduleG5.td - PPC G5 Scheduling Definitions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G5 (970) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G5Itineraries : ProcessorItineraries<[
+ InstrItinData<IntGeneral , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<IntCompare , [InstrStage<3, [IU1, IU2]>]>,
+ InstrItinData<IntDivD , [InstrStage<68, [IU1]>]>,
+ InstrItinData<IntDivW , [InstrStage<36, [IU1]>]>,
+ InstrItinData<IntMFFS , [InstrStage<6, [IU2]>]>,
+ InstrItinData<IntMFVSCR , [InstrStage<1, [VFPU]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<IntMulHD , [InstrStage<7, [IU1, IU2]>]>,
+ InstrItinData<IntMulHW , [InstrStage<5, [IU1, IU2]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<5, [IU1, IU2]>]>,
+ InstrItinData<IntMulLI , [InstrStage<4, [IU1, IU2]>]>,
+ InstrItinData<IntRFID , [InstrStage<1, [IU2]>]>,
+ InstrItinData<IntRotateD , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<IntRotate , [InstrStage<4, [IU1, IU2]>]>,
+ InstrItinData<IntShift , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<IntTrapD , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntTrapW , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<4, [BPU]>]>,
+ InstrItinData<BrMCR , [InstrStage<2, [BPU]>]>,
+ InstrItinData<BrMCRX , [InstrStage<3, [BPU]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDSS , [InstrStage<10, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<40, [SLU]>]>,
+ InstrItinData<LdStUX , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStLD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLDARX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<64, [SLU]>]>,
+ InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLWA , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStSLBIA , [InstrStage<40, [SLU]>]>, // needs work
+ InstrItinData<LdStSLBIE , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTDCX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStSTVEBX , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<40, [SLU]>]>, // needs work
+ InstrItinData<SprMFSR , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMTSR , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<3, [IU2]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<3, [IU2]>]>,
+ InstrItinData<SprMFTB , [InstrStage<10, [IU2]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<8, [IU2]>]>,
+ InstrItinData<SprSC , [InstrStage<1, [IU2]>]>,
+ InstrItinData<FPGeneral , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<FPCompare , [InstrStage<8, [FPU1, FPU2]>]>,
+ InstrItinData<FPDivD , [InstrStage<33, [FPU1, FPU2]>]>,
+ InstrItinData<FPDivS , [InstrStage<33, [FPU1, FPU2]>]>,
+ InstrItinData<FPFused , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<FPRes , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<FPSqrt , [InstrStage<40, [FPU1, FPU2]>]>,
+ InstrItinData<VecGeneral , [InstrStage<2, [VIU1]>]>,
+ InstrItinData<VecFP , [InstrStage<8, [VFPU]>]>,
+ InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>,
+ InstrItinData<VecComplex , [InstrStage<5, [VIU2]>]>,
+ InstrItinData<VecPerm , [InstrStage<3, [VPU]>]>,
+ InstrItinData<VecFPRound , [InstrStage<8, [VFPU]>]>,
+ InstrItinData<VecVSL , [InstrStage<2, [VIU1]>]>,
+ InstrItinData<VecVSR , [InstrStage<3, [VPU]>]>
+]>;
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
new file mode 100644
index 0000000..425d8e6
--- /dev/null
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -0,0 +1,152 @@
+//===- PowerPCSubtarget.cpp - PPC Subtarget Information -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPC specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCSubtarget.h"
+#include "PPC.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetMachine.h"
+#include "PPCGenSubtarget.inc"
+#include <cstdlib>
+using namespace llvm;
+
+#if defined(__APPLE__)
+#include <mach/mach.h>
+#include <mach/mach_host.h>
+#include <mach/host_info.h>
+#include <mach/machine.h>
+
+/// GetCurrentPowerPCFeatures - Returns the current CPUs features.
+static const char *GetCurrentPowerPCCPU() {
+ host_basic_info_data_t hostInfo;
+ mach_msg_type_number_t infoCount;
+
+ infoCount = HOST_BASIC_INFO_COUNT;
+ host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo,
+ &infoCount);
+
+ if (hostInfo.cpu_type != CPU_TYPE_POWERPC) return "generic";
+
+ switch(hostInfo.cpu_subtype) {
+ case CPU_SUBTYPE_POWERPC_601: return "601";
+ case CPU_SUBTYPE_POWERPC_602: return "602";
+ case CPU_SUBTYPE_POWERPC_603: return "603";
+ case CPU_SUBTYPE_POWERPC_603e: return "603e";
+ case CPU_SUBTYPE_POWERPC_603ev: return "603ev";
+ case CPU_SUBTYPE_POWERPC_604: return "604";
+ case CPU_SUBTYPE_POWERPC_604e: return "604e";
+ case CPU_SUBTYPE_POWERPC_620: return "620";
+ case CPU_SUBTYPE_POWERPC_750: return "750";
+ case CPU_SUBTYPE_POWERPC_7400: return "7400";
+ case CPU_SUBTYPE_POWERPC_7450: return "7450";
+ case CPU_SUBTYPE_POWERPC_970: return "970";
+ default: ;
+ }
+
+ return "generic";
+}
+#endif
+
+
+PPCSubtarget::PPCSubtarget(const TargetMachine &tm, const Module &M,
+ const std::string &FS, bool is64Bit)
+ : TM(tm)
+ , StackAlignment(16)
+ , DarwinDirective(PPC::DIR_NONE)
+ , IsGigaProcessor(false)
+ , Has64BitSupport(false)
+ , Use64BitRegs(false)
+ , IsPPC64(is64Bit)
+ , HasAltivec(false)
+ , HasFSQRT(false)
+ , HasSTFIWX(false)
+ , HasLazyResolverStubs(false)
+ , DarwinVers(0) {
+
+ // Determine default and user specified characteristics
+ std::string CPU = "generic";
+#if defined(__APPLE__)
+ CPU = GetCurrentPowerPCCPU();
+#endif
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, CPU);
+
+ // If we are generating code for ppc64, verify that options make sense.
+ if (is64Bit) {
+ Has64BitSupport = true;
+ // Silently force 64-bit register use on ppc64.
+ Use64BitRegs = true;
+ }
+
+ // If the user requested use of 64-bit regs, but the cpu selected doesn't
+ // support it, ignore.
+ if (use64BitRegs() && !has64BitSupport())
+ Use64BitRegs = false;
+
+ // Set the boolean corresponding to the current target triple, or the default
+ // if one cannot be determined, to true.
+ const std::string &TT = M.getTargetTriple();
+ if (TT.length() > 7) {
+ // Determine which version of darwin this is.
+ size_t DarwinPos = TT.find("-darwin");
+ if (DarwinPos != std::string::npos) {
+ if (isdigit(TT[DarwinPos+7]))
+ DarwinVers = atoi(&TT[DarwinPos+7]);
+ else
+ DarwinVers = 8; // Minimum supported darwin is Tiger.
+ }
+ } else if (TT.empty()) {
+ // Try to autosense the subtarget from the host compiler.
+#if defined(__APPLE__)
+#if __APPLE_CC__ > 5400
+ DarwinVers = 9; // GCC 5400+ is Leopard.
+#else
+ DarwinVers = 8; // Minimum supported darwin is Tiger.
+#endif
+#endif
+ }
+
+ // Set up darwin-specific properties.
+ if (isDarwin()) {
+ HasLazyResolverStubs = true;
+ AsmFlavor = NewMnemonic;
+ } else {
+ AsmFlavor = OldMnemonic;
+ }
+}
+
+/// SetJITMode - This is called to inform the subtarget info that we are
+/// producing code for the JIT.
+void PPCSubtarget::SetJITMode() {
+ // JIT mode doesn't want lazy resolver stubs, it knows exactly where
+ // everything is. This matters for PPC64, which codegens in PIC mode without
+ // stubs.
+ HasLazyResolverStubs = false;
+}
+
+
+/// hasLazyResolverStub - Return true if accesses to the specified global have
+/// to go through a dyld lazy resolution stub. This means that an extra load
+/// is required to get the address of the global.
+bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV) const {
+ // We never hae stubs if HasLazyResolverStubs=false or if in static mode.
+ if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static)
+ return false;
+ // If symbol visibility is hidden, the extra load is not needed if
+ // the symbol is definitely defined in the current translation unit.
+ bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode();
+ if (GV->hasHiddenVisibility() && !isDecl && !GV->hasCommonLinkage())
+ return false;
+ return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
+ GV->hasCommonLinkage() || isDecl;
+}
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
new file mode 100644
index 0000000..176f3e1
--- /dev/null
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -0,0 +1,160 @@
+//=====-- PPCSubtarget.h - Define Subtarget for the PPC -------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPCSUBTARGET_H
+#define POWERPCSUBTARGET_H
+
+#include "llvm/Target/TargetInstrItineraries.h"
+#include "llvm/Target/TargetSubtarget.h"
+
+#include <string>
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+namespace llvm {
+
+namespace PPC {
+ // -m directive values.
+ enum {
+ DIR_NONE,
+ DIR_32,
+ DIR_601,
+ DIR_602,
+ DIR_603,
+ DIR_7400,
+ DIR_750,
+ DIR_970,
+ DIR_64
+ };
+}
+
+class Module;
+class GlobalValue;
+class TargetMachine;
+
+class PPCSubtarget : public TargetSubtarget {
+public:
+ enum AsmWriterFlavorTy {
+ OldMnemonic, NewMnemonic, Unset
+ };
+protected:
+ const TargetMachine &TM;
+
+ /// stackAlignment - The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ unsigned StackAlignment;
+
+ /// Selected instruction itineraries (one entry per itinerary class.)
+ InstrItineraryData InstrItins;
+
+ /// Which cpu directive was used.
+ unsigned DarwinDirective;
+
+ /// AsmFlavor - Which PPC asm dialect to use.
+ AsmWriterFlavorTy AsmFlavor;
+
+ /// Used by the ISel to turn in optimizations for POWER4-derived architectures
+ bool IsGigaProcessor;
+ bool Has64BitSupport;
+ bool Use64BitRegs;
+ bool IsPPC64;
+ bool HasAltivec;
+ bool HasFSQRT;
+ bool HasSTFIWX;
+ bool HasLazyResolverStubs;
+
+ /// DarwinVers - Nonzero if this is a darwin platform. Otherwise, the numeric
+ /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
+ unsigned char DarwinVers; // Is any darwin-ppc platform.
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ PPCSubtarget(const TargetMachine &TM, const Module &M,
+ const std::string &FS, bool is64Bit);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+
+
+ /// SetJITMode - This is called to inform the subtarget info that we are
+ /// producing code for the JIT.
+ void SetJITMode();
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by every
+ /// function for this subtarget.
+ unsigned getStackAlignment() const { return StackAlignment; }
+
+ /// getDarwinDirective - Returns the -m directive specified for the cpu.
+ ///
+ unsigned getDarwinDirective() const { return DarwinDirective; }
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// selection.
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+ /// getTargetDataString - Return the pointer size and type alignment
+ /// properties of this subtarget.
+ const char *getTargetDataString() const {
+ // Note, the alignment values for f64 and i64 on ppc64 in Darwin
+ // documentation are wrong; these are correct (i.e. "what gcc does").
+ return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128"
+ : "E-p:32:32-f64:32:64-i64:32:64-f128:64:128";
+ }
+
+ /// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
+ ///
+ bool isPPC64() const { return IsPPC64; }
+
+ /// has64BitSupport - Return true if the selected CPU supports 64-bit
+ /// instructions, regardless of whether we are in 32-bit or 64-bit mode.
+ bool has64BitSupport() const { return Has64BitSupport; }
+
+ /// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit
+ /// registers in 32-bit mode when possible. This can only true if
+ /// has64BitSupport() returns true.
+ bool use64BitRegs() const { return Use64BitRegs; }
+
+ /// hasLazyResolverStub - Return true if accesses to the specified global have
+ /// to go through a dyld lazy resolution stub. This means that an extra load
+ /// is required to get the address of the global.
+ bool hasLazyResolverStub(const GlobalValue *GV) const;
+
+ // Specific obvious features.
+ bool hasFSQRT() const { return HasFSQRT; }
+ bool hasSTFIWX() const { return HasSTFIWX; }
+ bool hasAltivec() const { return HasAltivec; }
+ bool isGigaProcessor() const { return IsGigaProcessor; }
+
+ /// isDarwin - True if this is any darwin platform.
+ bool isDarwin() const { return DarwinVers != 0; }
+ /// isDarwin - True if this is darwin9 (leopard, 10.5) or above.
+ bool isDarwin9() const { return DarwinVers >= 9; }
+
+ /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard.
+ unsigned getDarwinVers() const { return DarwinVers; }
+
+ bool isMachoABI() const { return isDarwin() || IsPPC64; }
+ bool isELF32_ABI() const { return !isDarwin() && !IsPPC64; }
+
+ unsigned getAsmFlavor() const {
+ return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0;
+ }
+};
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/PowerPC/PPCTargetAsmInfo.cpp b/lib/Target/PowerPC/PPCTargetAsmInfo.cpp
new file mode 100644
index 0000000..c69e591
--- /dev/null
+++ b/lib/Target/PowerPC/PPCTargetAsmInfo.cpp
@@ -0,0 +1,161 @@
+//===-- PPCTargetAsmInfo.cpp - PPC asm properties ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the DarwinTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCTargetAsmInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Dwarf.h"
+
+using namespace llvm;
+using namespace llvm::dwarf;
+
+PPCDarwinTargetAsmInfo::PPCDarwinTargetAsmInfo(const PPCTargetMachine &TM):
+ PPCTargetAsmInfo<DarwinTargetAsmInfo>(TM) {
+ PCSymbol = ".";
+ CommentString = ";";
+ GlobalPrefix = "_";
+ PrivateGlobalPrefix = "L";
+ LessPrivateGlobalPrefix = "l";
+ StringConstantPrefix = "\1LC";
+ ConstantPoolSection = "\t.const\t";
+ JumpTableDataSection = ".const";
+ CStringSection = "\t.cstring";
+ if (TM.getRelocationModel() == Reloc::Static) {
+ StaticCtorsSection = ".constructor";
+ StaticDtorsSection = ".destructor";
+ } else {
+ StaticCtorsSection = ".mod_init_func";
+ StaticDtorsSection = ".mod_term_func";
+ }
+ HasSingleParameterDotFile = false;
+ SwitchToSectionDirective = "\t.section ";
+ UsedDirective = "\t.no_dead_strip\t";
+ WeakDefDirective = "\t.weak_definition ";
+ WeakRefDirective = "\t.weak_reference ";
+ HiddenDirective = "\t.private_extern ";
+ SupportsExceptionHandling = true;
+ NeedsIndirectEncoding = true;
+ NeedsSet = true;
+ BSSSection = 0;
+
+ DwarfEHFrameSection =
+ ".section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support";
+ DwarfExceptionSection = ".section __DATA,__gcc_except_tab";
+ GlobalEHDirective = "\t.globl\t";
+ SupportsWeakOmittedEHFrame = false;
+
+ DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
+ DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
+ DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
+ DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
+ DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
+ DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
+ DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
+ DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
+ DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
+ DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
+ DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
+
+ // In non-PIC modes, emit a special label before jump tables so that the
+ // linker can perform more accurate dead code stripping.
+ if (TM.getRelocationModel() != Reloc::PIC_) {
+ // Emit a local label that is preserved until the linker runs.
+ JumpTableSpecialLabelPrefix = "l";
+ }
+}
+
+/// PreferredEHDataFormat - This hook allows the target to select data
+/// format used for encoding pointers in exception handling data. Reason is
+/// 0 for data, 1 for code labels, 2 for function pointers. Global is true
+/// if the symbol can be relocated.
+unsigned
+PPCDarwinTargetAsmInfo::PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const {
+ if (Reason == DwarfEncoding::Functions && Global)
+ return (DW_EH_PE_pcrel | DW_EH_PE_indirect | DW_EH_PE_sdata4);
+ else if (Reason == DwarfEncoding::CodeLabels || !Global)
+ return DW_EH_PE_pcrel;
+ else
+ return DW_EH_PE_absptr;
+}
+
+const char *
+PPCDarwinTargetAsmInfo::getEHGlobalPrefix() const
+{
+ const PPCSubtarget* Subtarget = &TM.getSubtarget<PPCSubtarget>();
+ if (Subtarget->getDarwinVers() > 9)
+ return PrivateGlobalPrefix;
+ else
+ return "";
+}
+
+PPCLinuxTargetAsmInfo::PPCLinuxTargetAsmInfo(const PPCTargetMachine &TM) :
+ PPCTargetAsmInfo<ELFTargetAsmInfo>(TM) {
+ CommentString = "#";
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = ".L";
+ ConstantPoolSection = "\t.section .rodata.cst4\t";
+ JumpTableDataSection = ".section .rodata.cst4";
+ CStringSection = ".rodata.str";
+ StaticCtorsSection = ".section\t.ctors,\"aw\",@progbits";
+ StaticDtorsSection = ".section\t.dtors,\"aw\",@progbits";
+ UsedDirective = "\t# .no_dead_strip\t";
+ WeakRefDirective = "\t.weak\t";
+ BSSSection = "\t.section\t\".sbss\",\"aw\",@nobits";
+
+ // PPC/Linux normally uses named section for BSS.
+ BSSSection_ = getNamedSection("\t.bss",
+ SectionFlags::Writeable | SectionFlags::BSS,
+ /* Override */ true);
+
+ // Debug Information
+ AbsoluteDebugSectionOffsets = true;
+ SupportsDebugInformation = true;
+ DwarfAbbrevSection = "\t.section\t.debug_abbrev,\"\",@progbits";
+ DwarfInfoSection = "\t.section\t.debug_info,\"\",@progbits";
+ DwarfLineSection = "\t.section\t.debug_line,\"\",@progbits";
+ DwarfFrameSection = "\t.section\t.debug_frame,\"\",@progbits";
+ DwarfPubNamesSection ="\t.section\t.debug_pubnames,\"\",@progbits";
+ DwarfPubTypesSection ="\t.section\t.debug_pubtypes,\"\",@progbits";
+ DwarfStrSection = "\t.section\t.debug_str,\"\",@progbits";
+ DwarfLocSection = "\t.section\t.debug_loc,\"\",@progbits";
+ DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits";
+ DwarfRangesSection = "\t.section\t.debug_ranges,\"\",@progbits";
+ DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
+
+ PCSymbol = ".";
+
+ // Set up DWARF directives
+ HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
+
+ // Exceptions handling
+ if (!TM.getSubtargetImpl()->isPPC64())
+ SupportsExceptionHandling = true;
+ AbsoluteEHSectionOffsets = false;
+ DwarfEHFrameSection = "\t.section\t.eh_frame,\"aw\",@progbits";
+ DwarfExceptionSection = "\t.section\t.gcc_except_table,\"a\",@progbits";
+}
+
+/// PreferredEHDataFormat - This hook allows the target to select data
+/// format used for encoding pointers in exception handling data. Reason is
+/// 0 for data, 1 for code labels, 2 for function pointers. Global is true
+/// if the symbol can be relocated.
+unsigned
+PPCLinuxTargetAsmInfo::PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const {
+ // We really need to write something here.
+ return TargetAsmInfo::PreferredEHDataFormat(Reason, Global);
+}
+
+// Instantiate default implementation.
+TEMPLATE_INSTANTIATION(class PPCTargetAsmInfo<TargetAsmInfo>);
diff --git a/lib/Target/PowerPC/PPCTargetAsmInfo.h b/lib/Target/PowerPC/PPCTargetAsmInfo.h
new file mode 100644
index 0000000..edf40c9
--- /dev/null
+++ b/lib/Target/PowerPC/PPCTargetAsmInfo.h
@@ -0,0 +1,62 @@
+//=====-- PPCTargetAsmInfo.h - PPC asm properties -------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the DarwinTargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCTARGETASMINFO_H
+#define PPCTARGETASMINFO_H
+
+#include "PPCTargetMachine.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/DarwinTargetAsmInfo.h"
+#include "llvm/Target/ELFTargetAsmInfo.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+ template <class BaseTAI>
+ struct PPCTargetAsmInfo : public BaseTAI {
+ explicit PPCTargetAsmInfo(const PPCTargetMachine &TM):
+ BaseTAI(TM) {
+ const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
+ bool isPPC64 = Subtarget->isPPC64();
+
+ BaseTAI::ZeroDirective = "\t.space\t";
+ BaseTAI::SetDirective = "\t.set";
+ BaseTAI::Data64bitsDirective = isPPC64 ? "\t.quad\t" : 0;
+ BaseTAI::AlignmentIsInBytes = false;
+ BaseTAI::LCOMMDirective = "\t.lcomm\t";
+ BaseTAI::InlineAsmStart = "# InlineAsm Start";
+ BaseTAI::InlineAsmEnd = "# InlineAsm End";
+ BaseTAI::AssemblerDialect = Subtarget->getAsmFlavor();
+ }
+ };
+
+ typedef PPCTargetAsmInfo<TargetAsmInfo> PPCGenericTargetAsmInfo;
+
+ EXTERN_TEMPLATE_INSTANTIATION(class PPCTargetAsmInfo<TargetAsmInfo>);
+
+ struct PPCDarwinTargetAsmInfo : public PPCTargetAsmInfo<DarwinTargetAsmInfo> {
+ explicit PPCDarwinTargetAsmInfo(const PPCTargetMachine &TM);
+ virtual unsigned PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const;
+ virtual const char *getEHGlobalPrefix() const;
+ };
+
+ struct PPCLinuxTargetAsmInfo : public PPCTargetAsmInfo<ELFTargetAsmInfo> {
+ explicit PPCLinuxTargetAsmInfo(const PPCTargetMachine &TM);
+ virtual unsigned PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const;
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
new file mode 100644
index 0000000..ef3f0fc
--- /dev/null
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -0,0 +1,250 @@
+//===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCTargetAsmInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// PowerPCTargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int PowerPCTargetMachineModule;
+int PowerPCTargetMachineModule = 0;
+
+// Register the targets
+static RegisterTarget<PPC32TargetMachine>
+X("ppc32", "PowerPC 32");
+static RegisterTarget<PPC64TargetMachine>
+Y("ppc64", "PowerPC 64");
+
+// No assembler printer by default
+PPCTargetMachine::AsmPrinterCtorFn PPCTargetMachine::AsmPrinterCtor = 0;
+
+const TargetAsmInfo *PPCTargetMachine::createTargetAsmInfo() const {
+ if (Subtarget.isDarwin())
+ return new PPCDarwinTargetAsmInfo(*this);
+ else
+ return new PPCLinuxTargetAsmInfo(*this);
+}
+
+unsigned PPC32TargetMachine::getJITMatchQuality() {
+#if defined(__POWERPC__) || defined (__ppc__) || defined(_POWER) || defined(__PPC__)
+ if (sizeof(void*) == 4)
+ return 10;
+#endif
+ return 0;
+}
+unsigned PPC64TargetMachine::getJITMatchQuality() {
+#if defined(__POWERPC__) || defined (__ppc__) || defined(_POWER) || defined(__PPC__)
+ if (sizeof(void*) == 8)
+ return 10;
+#endif
+ return 0;
+}
+
+unsigned PPC32TargetMachine::getModuleMatchQuality(const Module &M) {
+ // We strongly match "powerpc-*".
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 8 && std::string(TT.begin(), TT.begin()+8) == "powerpc-")
+ return 20;
+
+ // If the target triple is something non-powerpc, we don't match.
+ if (!TT.empty()) return 0;
+
+ if (M.getEndianness() == Module::BigEndian &&
+ M.getPointerSize() == Module::Pointer32)
+ return 10; // Weak match
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+ return getJITMatchQuality()/2;
+}
+
+unsigned PPC64TargetMachine::getModuleMatchQuality(const Module &M) {
+ // We strongly match "powerpc64-*".
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 10 && std::string(TT.begin(), TT.begin()+10) == "powerpc64-")
+ return 20;
+
+ if (M.getEndianness() == Module::BigEndian &&
+ M.getPointerSize() == Module::Pointer64)
+ return 10; // Weak match
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+ return getJITMatchQuality()/2;
+}
+
+
+PPCTargetMachine::PPCTargetMachine(const Module &M, const std::string &FS,
+ bool is64Bit)
+ : Subtarget(*this, M, FS, is64Bit),
+ DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
+ FrameInfo(*this, is64Bit), JITInfo(*this, is64Bit), TLInfo(*this),
+ InstrItins(Subtarget.getInstrItineraryData()), MachOWriterInfo(*this) {
+
+ if (getRelocationModel() == Reloc::Default) {
+ if (Subtarget.isDarwin())
+ setRelocationModel(Reloc::DynamicNoPIC);
+ else
+ setRelocationModel(Reloc::Static);
+ }
+}
+
+/// Override this for PowerPC. Tail merging happily breaks up instruction issue
+/// groups, which typically degrades performance.
+bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; }
+
+PPC32TargetMachine::PPC32TargetMachine(const Module &M, const std::string &FS)
+ : PPCTargetMachine(M, FS, false) {
+}
+
+
+PPC64TargetMachine::PPC64TargetMachine(const Module &M, const std::string &FS)
+ : PPCTargetMachine(M, FS, true) {
+}
+
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool PPCTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Install an instruction selector.
+ PM.add(createPPCISelDag(*this));
+ return false;
+}
+
+bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Must run branch selection immediately preceding the asm printer.
+ PM.add(createPPCBranchSelectionPass());
+ return false;
+}
+
+bool PPCTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose,
+ raw_ostream &Out) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+
+ return false;
+}
+
+bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm, MachineCodeEmitter &MCE) {
+ // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64.
+ // FIXME: This should be moved to TargetJITInfo!!
+ if (Subtarget.isPPC64()) {
+ // We use PIC codegen in ppc64 mode, because otherwise we'd have to use many
+ // instructions to materialize arbitrary global variable + function +
+ // constant pool addresses.
+ setRelocationModel(Reloc::PIC_);
+ // Temporary workaround for the inability of PPC64 JIT to handle jump
+ // tables.
+ DisableJumpTables = true;
+ } else {
+ setRelocationModel(Reloc::Static);
+ }
+
+ // Inform the subtarget that we are in JIT mode. FIXME: does this break macho
+ // writing?
+ Subtarget.SetJITMode();
+
+ // Machine code emitter pass for PowerPC.
+ PM.add(createPPCCodeEmitterPass(*this, MCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
+bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm, JITCodeEmitter &JCE) {
+ // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64.
+ // FIXME: This should be moved to TargetJITInfo!!
+ if (Subtarget.isPPC64()) {
+ // We use PIC codegen in ppc64 mode, because otherwise we'd have to use many
+ // instructions to materialize arbitrary global variable + function +
+ // constant pool addresses.
+ setRelocationModel(Reloc::PIC_);
+ // Temporary workaround for the inability of PPC64 JIT to handle jump
+ // tables.
+ DisableJumpTables = true;
+ } else {
+ setRelocationModel(Reloc::Static);
+ }
+
+ // Inform the subtarget that we are in JIT mode. FIXME: does this break macho
+ // writing?
+ Subtarget.SetJITMode();
+
+ // Machine code emitter pass for PowerPC.
+ PM.add(createPPCJITCodeEmitterPass(*this, JCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
+bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ MachineCodeEmitter &MCE) {
+ // Machine code emitter pass for PowerPC.
+ PM.add(createPPCCodeEmitterPass(*this, MCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
+bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ JITCodeEmitter &JCE) {
+ // Machine code emitter pass for PowerPC.
+ PM.add(createPPCJITCodeEmitterPass(*this, JCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
new file mode 100644
index 0000000..086d2f4
--- /dev/null
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -0,0 +1,120 @@
+//===-- PPCTargetMachine.h - Define TargetMachine for PowerPC -----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC_TARGETMACHINE_H
+#define PPC_TARGETMACHINE_H
+
+#include "PPCFrameInfo.h"
+#include "PPCSubtarget.h"
+#include "PPCJITInfo.h"
+#include "PPCInstrInfo.h"
+#include "PPCISelLowering.h"
+#include "PPCMachOWriterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+class PassManager;
+class GlobalValue;
+
+/// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets.
+///
+class PPCTargetMachine : public LLVMTargetMachine {
+ PPCSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ PPCInstrInfo InstrInfo;
+ PPCFrameInfo FrameInfo;
+ PPCJITInfo JITInfo;
+ PPCTargetLowering TLInfo;
+ InstrItineraryData InstrItins;
+ PPCMachOWriterInfo MachOWriterInfo;
+
+protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+ // To avoid having target depend on the asmprinter stuff libraries, asmprinter
+ // set this functions to ctor pointer at startup time if they are linked in.
+ typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+ PPCTargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose);
+ static AsmPrinterCtorFn AsmPrinterCtor;
+
+public:
+ PPCTargetMachine(const Module &M, const std::string &FS, bool is64Bit);
+
+ virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const PPCFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual PPCJITInfo *getJITInfo() { return &JITInfo; }
+ virtual PPCTargetLowering *getTargetLowering() const {
+ return const_cast<PPCTargetLowering*>(&TLInfo);
+ }
+ virtual const PPCRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const PPCSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ virtual const InstrItineraryData getInstrItineraryData() const {
+ return InstrItins;
+ }
+ virtual const PPCMachOWriterInfo *getMachOWriterInfo() const {
+ return &MachOWriterInfo;
+ }
+
+ static void registerAsmPrinter(AsmPrinterCtorFn F) {
+ AsmPrinterCtor = F;
+ }
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ bool DumpAsm, MachineCodeEmitter &MCE);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ bool DumpAsm, JITCodeEmitter &JCE);
+ virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm, MachineCodeEmitter &MCE);
+ virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm, JITCodeEmitter &JCE);
+ virtual bool getEnableTailMergeDefault() const;
+};
+
+/// PPC32TargetMachine - PowerPC 32-bit target machine.
+///
+class PPC32TargetMachine : public PPCTargetMachine {
+public:
+ PPC32TargetMachine(const Module &M, const std::string &FS);
+
+ static unsigned getJITMatchQuality();
+ static unsigned getModuleMatchQuality(const Module &M);
+};
+
+/// PPC64TargetMachine - PowerPC 64-bit target machine.
+///
+class PPC64TargetMachine : public PPCTargetMachine {
+public:
+ PPC64TargetMachine(const Module &M, const std::string &FS);
+
+ static unsigned getJITMatchQuality();
+ static unsigned getModuleMatchQuality(const Module &M);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
new file mode 100644
index 0000000..688fb30
--- /dev/null
+++ b/lib/Target/PowerPC/README.txt
@@ -0,0 +1,799 @@
+//===- README.txt - Notes for improving PowerPC-specific code gen ---------===//
+
+TODO:
+* gpr0 allocation
+* implement do-loop -> bdnz transform
+* lmw/stmw pass a la arm load store optimizer for prolog/epilog
+
+===-------------------------------------------------------------------------===
+
+Support 'update' load/store instructions. These are cracked on the G5, but are
+still a codesize win.
+
+With preinc enabled, this:
+
+long *%test4(long *%X, long *%dest) {
+ %Y = getelementptr long* %X, int 4
+ %A = load long* %Y
+ store long %A, long* %dest
+ ret long* %Y
+}
+
+compiles to:
+
+_test4:
+ mr r2, r3
+ lwzu r5, 32(r2)
+ lwz r3, 36(r3)
+ stw r5, 0(r4)
+ stw r3, 4(r4)
+ mr r3, r2
+ blr
+
+with -sched=list-burr, I get:
+
+_test4:
+ lwz r2, 36(r3)
+ lwzu r5, 32(r3)
+ stw r2, 4(r4)
+ stw r5, 0(r4)
+ blr
+
+===-------------------------------------------------------------------------===
+
+We compile the hottest inner loop of viterbi to:
+
+ li r6, 0
+ b LBB1_84 ;bb432.i
+LBB1_83: ;bb420.i
+ lbzx r8, r5, r7
+ addi r6, r7, 1
+ stbx r8, r4, r7
+LBB1_84: ;bb432.i
+ mr r7, r6
+ cmplwi cr0, r7, 143
+ bne cr0, LBB1_83 ;bb420.i
+
+The CBE manages to produce:
+
+ li r0, 143
+ mtctr r0
+loop:
+ lbzx r2, r2, r11
+ stbx r0, r2, r9
+ addi r2, r2, 1
+ bdz later
+ b loop
+
+This could be much better (bdnz instead of bdz) but it still beats us. If we
+produced this with bdnz, the loop would be a single dispatch group.
+
+===-------------------------------------------------------------------------===
+
+Compile:
+
+void foo(int *P) {
+ if (P) *P = 0;
+}
+
+into:
+
+_foo:
+ cmpwi cr0,r3,0
+ beqlr cr0
+ li r0,0
+ stw r0,0(r3)
+ blr
+
+This is effectively a simple form of predication.
+
+===-------------------------------------------------------------------------===
+
+Lump the constant pool for each function into ONE pic object, and reference
+pieces of it as offsets from the start. For functions like this (contrived
+to have lots of constants obviously):
+
+double X(double Y) { return (Y*1.23 + 4.512)*2.34 + 14.38; }
+
+We generate:
+
+_X:
+ lis r2, ha16(.CPI_X_0)
+ lfd f0, lo16(.CPI_X_0)(r2)
+ lis r2, ha16(.CPI_X_1)
+ lfd f2, lo16(.CPI_X_1)(r2)
+ fmadd f0, f1, f0, f2
+ lis r2, ha16(.CPI_X_2)
+ lfd f1, lo16(.CPI_X_2)(r2)
+ lis r2, ha16(.CPI_X_3)
+ lfd f2, lo16(.CPI_X_3)(r2)
+ fmadd f1, f0, f1, f2
+ blr
+
+It would be better to materialize .CPI_X into a register, then use immediates
+off of the register to avoid the lis's. This is even more important in PIC
+mode.
+
+Note that this (and the static variable version) is discussed here for GCC:
+http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html
+
+Here's another example (the sgn function):
+double testf(double a) {
+ return a == 0.0 ? 0.0 : (a > 0.0 ? 1.0 : -1.0);
+}
+
+it produces a BB like this:
+LBB1_1: ; cond_true
+ lis r2, ha16(LCPI1_0)
+ lfs f0, lo16(LCPI1_0)(r2)
+ lis r2, ha16(LCPI1_1)
+ lis r3, ha16(LCPI1_2)
+ lfs f2, lo16(LCPI1_2)(r3)
+ lfs f3, lo16(LCPI1_1)(r2)
+ fsub f0, f0, f1
+ fsel f1, f0, f2, f3
+ blr
+
+===-------------------------------------------------------------------------===
+
+PIC Code Gen IPO optimization:
+
+Squish small scalar globals together into a single global struct, allowing the
+address of the struct to be CSE'd, avoiding PIC accesses (also reduces the size
+of the GOT on targets with one).
+
+Note that this is discussed here for GCC:
+http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html
+
+===-------------------------------------------------------------------------===
+
+Implement Newton-Rhapson method for improving estimate instructions to the
+correct accuracy, and implementing divide as multiply by reciprocal when it has
+more than one use. Itanium will want this too.
+
+===-------------------------------------------------------------------------===
+
+Compile offsets from allocas:
+
+int *%test() {
+ %X = alloca { int, int }
+ %Y = getelementptr {int,int}* %X, int 0, uint 1
+ ret int* %Y
+}
+
+into a single add, not two:
+
+_test:
+ addi r2, r1, -8
+ addi r3, r2, 4
+ blr
+
+--> important for C++.
+
+===-------------------------------------------------------------------------===
+
+No loads or stores of the constants should be needed:
+
+struct foo { double X, Y; };
+void xxx(struct foo F);
+void bar() { struct foo R = { 1.0, 2.0 }; xxx(R); }
+
+===-------------------------------------------------------------------------===
+
+Darwin Stub LICM optimization:
+
+Loops like this:
+
+ for (...) bar();
+
+Have to go through an indirect stub if bar is external or linkonce. It would
+be better to compile it as:
+
+ fp = &bar;
+ for (...) fp();
+
+which only computes the address of bar once (instead of each time through the
+stub). This is Darwin specific and would have to be done in the code generator.
+Probably not a win on x86.
+
+===-------------------------------------------------------------------------===
+
+Simple IPO for argument passing, change:
+ void foo(int X, double Y, int Z) -> void foo(int X, int Z, double Y)
+
+the Darwin ABI specifies that any integer arguments in the first 32 bytes worth
+of arguments get assigned to r3 through r10. That is, if you have a function
+foo(int, double, int) you get r3, f1, r6, since the 64 bit double ate up the
+argument bytes for r4 and r5. The trick then would be to shuffle the argument
+order for functions we can internalize so that the maximum number of
+integers/pointers get passed in regs before you see any of the fp arguments.
+
+Instead of implementing this, it would actually probably be easier to just
+implement a PPC fastcc, where we could do whatever we wanted to the CC,
+including having this work sanely.
+
+===-------------------------------------------------------------------------===
+
+Fix Darwin FP-In-Integer Registers ABI
+
+Darwin passes doubles in structures in integer registers, which is very very
+bad. Add something like a BIT_CONVERT to LLVM, then do an i-p transformation
+that percolates these things out of functions.
+
+Check out how horrible this is:
+http://gcc.gnu.org/ml/gcc/2005-10/msg01036.html
+
+This is an extension of "interprocedural CC unmunging" that can't be done with
+just fastcc.
+
+===-------------------------------------------------------------------------===
+
+Compile this:
+
+int foo(int a) {
+ int b = (a < 8);
+ if (b) {
+ return b * 3; // ignore the fact that this is always 3.
+ } else {
+ return 2;
+ }
+}
+
+into something not this:
+
+_foo:
+1) cmpwi cr7, r3, 8
+ mfcr r2, 1
+ rlwinm r2, r2, 29, 31, 31
+1) cmpwi cr0, r3, 7
+ bgt cr0, LBB1_2 ; UnifiedReturnBlock
+LBB1_1: ; then
+ rlwinm r2, r2, 0, 31, 31
+ mulli r3, r2, 3
+ blr
+LBB1_2: ; UnifiedReturnBlock
+ li r3, 2
+ blr
+
+In particular, the two compares (marked 1) could be shared by reversing one.
+This could be done in the dag combiner, by swapping a BR_CC when a SETCC of the
+same operands (but backwards) exists. In this case, this wouldn't save us
+anything though, because the compares still wouldn't be shared.
+
+===-------------------------------------------------------------------------===
+
+We should custom expand setcc instead of pretending that we have it. That
+would allow us to expose the access of the crbit after the mfcr, allowing
+that access to be trivially folded into other ops. A simple example:
+
+int foo(int a, int b) { return (a < b) << 4; }
+
+compiles into:
+
+_foo:
+ cmpw cr7, r3, r4
+ mfcr r2, 1
+ rlwinm r2, r2, 29, 31, 31
+ slwi r3, r2, 4
+ blr
+
+===-------------------------------------------------------------------------===
+
+Fold add and sub with constant into non-extern, non-weak addresses so this:
+
+static int a;
+void bar(int b) { a = b; }
+void foo(unsigned char *c) {
+ *c = a;
+}
+
+So that
+
+_foo:
+ lis r2, ha16(_a)
+ la r2, lo16(_a)(r2)
+ lbz r2, 3(r2)
+ stb r2, 0(r3)
+ blr
+
+Becomes
+
+_foo:
+ lis r2, ha16(_a+3)
+ lbz r2, lo16(_a+3)(r2)
+ stb r2, 0(r3)
+ blr
+
+===-------------------------------------------------------------------------===
+
+We generate really bad code for this:
+
+int f(signed char *a, _Bool b, _Bool c) {
+ signed char t = 0;
+ if (b) t = *a;
+ if (c) *a = t;
+}
+
+===-------------------------------------------------------------------------===
+
+This:
+int test(unsigned *P) { return *P >> 24; }
+
+Should compile to:
+
+_test:
+ lbz r3,0(r3)
+ blr
+
+not:
+
+_test:
+ lwz r2, 0(r3)
+ srwi r3, r2, 24
+ blr
+
+===-------------------------------------------------------------------------===
+
+On the G5, logical CR operations are more expensive in their three
+address form: ops that read/write the same register are half as expensive as
+those that read from two registers that are different from their destination.
+
+We should model this with two separate instructions. The isel should generate
+the "two address" form of the instructions. When the register allocator
+detects that it needs to insert a copy due to the two-addresness of the CR
+logical op, it will invoke PPCInstrInfo::convertToThreeAddress. At this point
+we can convert to the "three address" instruction, to save code space.
+
+This only matters when we start generating cr logical ops.
+
+===-------------------------------------------------------------------------===
+
+We should compile these two functions to the same thing:
+
+#include <stdlib.h>
+void f(int a, int b, int *P) {
+ *P = (a-b)>=0?(a-b):(b-a);
+}
+void g(int a, int b, int *P) {
+ *P = abs(a-b);
+}
+
+Further, they should compile to something better than:
+
+_g:
+ subf r2, r4, r3
+ subfic r3, r2, 0
+ cmpwi cr0, r2, -1
+ bgt cr0, LBB2_2 ; entry
+LBB2_1: ; entry
+ mr r2, r3
+LBB2_2: ; entry
+ stw r2, 0(r5)
+ blr
+
+GCC produces:
+
+_g:
+ subf r4,r4,r3
+ srawi r2,r4,31
+ xor r0,r2,r4
+ subf r0,r2,r0
+ stw r0,0(r5)
+ blr
+
+... which is much nicer.
+
+This theoretically may help improve twolf slightly (used in dimbox.c:142?).
+
+===-------------------------------------------------------------------------===
+
+int foo(int N, int ***W, int **TK, int X) {
+ int t, i;
+
+ for (t = 0; t < N; ++t)
+ for (i = 0; i < 4; ++i)
+ W[t / X][i][t % X] = TK[i][t];
+
+ return 5;
+}
+
+We generate relatively atrocious code for this loop compared to gcc.
+
+We could also strength reduce the rem and the div:
+http://www.lcs.mit.edu/pubs/pdf/MIT-LCS-TM-600.pdf
+
+===-------------------------------------------------------------------------===
+
+float foo(float X) { return (int)(X); }
+
+Currently produces:
+
+_foo:
+ fctiwz f0, f1
+ stfd f0, -8(r1)
+ lwz r2, -4(r1)
+ extsw r2, r2
+ std r2, -16(r1)
+ lfd f0, -16(r1)
+ fcfid f0, f0
+ frsp f1, f0
+ blr
+
+We could use a target dag combine to turn the lwz/extsw into an lwa when the
+lwz has a single use. Since LWA is cracked anyway, this would be a codesize
+win only.
+
+===-------------------------------------------------------------------------===
+
+We generate ugly code for this:
+
+void func(unsigned int *ret, float dx, float dy, float dz, float dw) {
+ unsigned code = 0;
+ if(dx < -dw) code |= 1;
+ if(dx > dw) code |= 2;
+ if(dy < -dw) code |= 4;
+ if(dy > dw) code |= 8;
+ if(dz < -dw) code |= 16;
+ if(dz > dw) code |= 32;
+ *ret = code;
+}
+
+===-------------------------------------------------------------------------===
+
+Complete the signed i32 to FP conversion code using 64-bit registers
+transformation, good for PI. See PPCISelLowering.cpp, this comment:
+
+ // FIXME: disable this lowered code. This generates 64-bit register values,
+ // and we don't model the fact that the top part is clobbered by calls. We
+ // need to flag these together so that the value isn't live across a call.
+ //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+
+Also, if the registers are spilled to the stack, we have to ensure that all
+64-bits of them are save/restored, otherwise we will miscompile the code. It
+sounds like we need to get the 64-bit register classes going.
+
+===-------------------------------------------------------------------------===
+
+%struct.B = type { i8, [3 x i8] }
+
+define void @bar(%struct.B* %b) {
+entry:
+ %tmp = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1]
+ %tmp = load i32* %tmp ; <uint> [#uses=1]
+ %tmp3 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1]
+ %tmp4 = load i32* %tmp3 ; <uint> [#uses=1]
+ %tmp8 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=2]
+ %tmp9 = load i32* %tmp8 ; <uint> [#uses=1]
+ %tmp4.mask17 = shl i32 %tmp4, i8 1 ; <uint> [#uses=1]
+ %tmp1415 = and i32 %tmp4.mask17, 2147483648 ; <uint> [#uses=1]
+ %tmp.masked = and i32 %tmp, 2147483648 ; <uint> [#uses=1]
+ %tmp11 = or i32 %tmp1415, %tmp.masked ; <uint> [#uses=1]
+ %tmp12 = and i32 %tmp9, 2147483647 ; <uint> [#uses=1]
+ %tmp13 = or i32 %tmp12, %tmp11 ; <uint> [#uses=1]
+ store i32 %tmp13, i32* %tmp8
+ ret void
+}
+
+We emit:
+
+_foo:
+ lwz r2, 0(r3)
+ slwi r4, r2, 1
+ or r4, r4, r2
+ rlwimi r2, r4, 0, 0, 0
+ stw r2, 0(r3)
+ blr
+
+We could collapse a bunch of those ORs and ANDs and generate the following
+equivalent code:
+
+_foo:
+ lwz r2, 0(r3)
+ rlwinm r4, r2, 1, 0, 0
+ or r2, r2, r4
+ stw r2, 0(r3)
+ blr
+
+===-------------------------------------------------------------------------===
+
+We compile:
+
+unsigned test6(unsigned x) {
+ return ((x & 0x00FF0000) >> 16) | ((x & 0x000000FF) << 16);
+}
+
+into:
+
+_test6:
+ lis r2, 255
+ rlwinm r3, r3, 16, 0, 31
+ ori r2, r2, 255
+ and r3, r3, r2
+ blr
+
+GCC gets it down to:
+
+_test6:
+ rlwinm r0,r3,16,8,15
+ rlwinm r3,r3,16,24,31
+ or r3,r3,r0
+ blr
+
+
+===-------------------------------------------------------------------------===
+
+Consider a function like this:
+
+float foo(float X) { return X + 1234.4123f; }
+
+The FP constant ends up in the constant pool, so we need to get the LR register.
+ This ends up producing code like this:
+
+_foo:
+.LBB_foo_0: ; entry
+ mflr r11
+*** stw r11, 8(r1)
+ bl "L00000$pb"
+"L00000$pb":
+ mflr r2
+ addis r2, r2, ha16(.CPI_foo_0-"L00000$pb")
+ lfs f0, lo16(.CPI_foo_0-"L00000$pb")(r2)
+ fadds f1, f1, f0
+*** lwz r11, 8(r1)
+ mtlr r11
+ blr
+
+This is functional, but there is no reason to spill the LR register all the way
+to the stack (the two marked instrs): spilling it to a GPR is quite enough.
+
+Implementing this will require some codegen improvements. Nate writes:
+
+"So basically what we need to support the "no stack frame save and restore" is a
+generalization of the LR optimization to "callee-save regs".
+
+Currently, we have LR marked as a callee-save reg. The register allocator sees
+that it's callee save, and spills it directly to the stack.
+
+Ideally, something like this would happen:
+
+LR would be in a separate register class from the GPRs. The class of LR would be
+marked "unspillable". When the register allocator came across an unspillable
+reg, it would ask "what is the best class to copy this into that I *can* spill"
+If it gets a class back, which it will in this case (the gprs), it grabs a free
+register of that class. If it is then later necessary to spill that reg, so be
+it.
+
+===-------------------------------------------------------------------------===
+
+We compile this:
+int test(_Bool X) {
+ return X ? 524288 : 0;
+}
+
+to:
+_test:
+ cmplwi cr0, r3, 0
+ lis r2, 8
+ li r3, 0
+ beq cr0, LBB1_2 ;entry
+LBB1_1: ;entry
+ mr r3, r2
+LBB1_2: ;entry
+ blr
+
+instead of:
+_test:
+ addic r2,r3,-1
+ subfe r0,r2,r3
+ slwi r3,r0,19
+ blr
+
+This sort of thing occurs a lot due to globalopt.
+
+===-------------------------------------------------------------------------===
+
+We currently compile 32-bit bswap:
+
+declare i32 @llvm.bswap.i32(i32 %A)
+define i32 @test(i32 %A) {
+ %B = call i32 @llvm.bswap.i32(i32 %A)
+ ret i32 %B
+}
+
+to:
+
+_test:
+ rlwinm r2, r3, 24, 16, 23
+ slwi r4, r3, 24
+ rlwimi r2, r3, 8, 24, 31
+ rlwimi r4, r3, 8, 8, 15
+ rlwimi r4, r2, 0, 16, 31
+ mr r3, r4
+ blr
+
+it would be more efficient to produce:
+
+_foo: mr r0,r3
+ rlwinm r3,r3,8,0xffffffff
+ rlwimi r3,r0,24,0,7
+ rlwimi r3,r0,24,16,23
+ blr
+
+===-------------------------------------------------------------------------===
+
+test/CodeGen/PowerPC/2007-03-24-cntlzd.ll compiles to:
+
+__ZNK4llvm5APInt17countLeadingZerosEv:
+ ld r2, 0(r3)
+ cntlzd r2, r2
+ or r2, r2, r2 <<-- silly.
+ addi r3, r2, -64
+ blr
+
+The dead or is a 'truncate' from 64- to 32-bits.
+
+===-------------------------------------------------------------------------===
+
+We generate horrible ppc code for this:
+
+#define N 2000000
+double a[N],c[N];
+void simpleloop() {
+ int j;
+ for (j=0; j<N; j++)
+ c[j] = a[j];
+}
+
+LBB1_1: ;bb
+ lfdx f0, r3, r4
+ addi r5, r5, 1 ;; Extra IV for the exit value compare.
+ stfdx f0, r2, r4
+ addi r4, r4, 8
+
+ xoris r6, r5, 30 ;; This is due to a large immediate.
+ cmplwi cr0, r6, 33920
+ bne cr0, LBB1_1
+
+//===---------------------------------------------------------------------===//
+
+This:
+ #include <algorithm>
+ inline std::pair<unsigned, bool> full_add(unsigned a, unsigned b)
+ { return std::make_pair(a + b, a + b < a); }
+ bool no_overflow(unsigned a, unsigned b)
+ { return !full_add(a, b).second; }
+
+Should compile to:
+
+__Z11no_overflowjj:
+ add r4,r3,r4
+ subfc r3,r3,r4
+ li r3,0
+ adde r3,r3,r3
+ blr
+
+(or better) not:
+
+__Z11no_overflowjj:
+ add r2, r4, r3
+ cmplw cr7, r2, r3
+ mfcr r2
+ rlwinm r2, r2, 29, 31, 31
+ xori r3, r2, 1
+ blr
+
+//===---------------------------------------------------------------------===//
+
+We compile some FP comparisons into an mfcr with two rlwinms and an or. For
+example:
+#include <math.h>
+int test(double x, double y) { return islessequal(x, y);}
+int test2(double x, double y) { return islessgreater(x, y);}
+int test3(double x, double y) { return !islessequal(x, y);}
+
+Compiles into (all three are similar, but the bits differ):
+
+_test:
+ fcmpu cr7, f1, f2
+ mfcr r2
+ rlwinm r3, r2, 29, 31, 31
+ rlwinm r2, r2, 31, 31, 31
+ or r3, r2, r3
+ blr
+
+GCC compiles this into:
+
+ _test:
+ fcmpu cr7,f1,f2
+ cror 30,28,30
+ mfcr r3
+ rlwinm r3,r3,31,1
+ blr
+
+which is more efficient and can use mfocr. See PR642 for some more context.
+
+//===---------------------------------------------------------------------===//
+
+void foo(float *data, float d) {
+ long i;
+ for (i = 0; i < 8000; i++)
+ data[i] = d;
+}
+void foo2(float *data, float d) {
+ long i;
+ data--;
+ for (i = 0; i < 8000; i++) {
+ data[1] = d;
+ data++;
+ }
+}
+
+These compile to:
+
+_foo:
+ li r2, 0
+LBB1_1: ; bb
+ addi r4, r2, 4
+ stfsx f1, r3, r2
+ cmplwi cr0, r4, 32000
+ mr r2, r4
+ bne cr0, LBB1_1 ; bb
+ blr
+_foo2:
+ li r2, 0
+LBB2_1: ; bb
+ addi r4, r2, 4
+ stfsx f1, r3, r2
+ cmplwi cr0, r4, 32000
+ mr r2, r4
+ bne cr0, LBB2_1 ; bb
+ blr
+
+The 'mr' could be eliminated to folding the add into the cmp better.
+
+//===---------------------------------------------------------------------===//
+Codegen for the following (low-probability) case deteriorated considerably
+when the correctness fixes for unordered comparisons went in (PR 642, 58871).
+It should be possible to recover the code quality described in the comments.
+
+; RUN: llvm-as < %s | llc -march=ppc32 | grep or | count 3
+; This should produce one 'or' or 'cror' instruction per function.
+
+; RUN: llvm-as < %s | llc -march=ppc32 | grep mfcr | count 3
+; PR2964
+
+define i32 @test(double %x, double %y) nounwind {
+entry:
+ %tmp3 = fcmp ole double %x, %y ; <i1> [#uses=1]
+ %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1]
+ ret i32 %tmp345
+}
+
+define i32 @test2(double %x, double %y) nounwind {
+entry:
+ %tmp3 = fcmp one double %x, %y ; <i1> [#uses=1]
+ %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1]
+ ret i32 %tmp345
+}
+
+define i32 @test3(double %x, double %y) nounwind {
+entry:
+ %tmp3 = fcmp ugt double %x, %y ; <i1> [#uses=1]
+ %tmp34 = zext i1 %tmp3 to i32 ; <i32> [#uses=1]
+ ret i32 %tmp34
+}
+//===----------------------------------------------------------------------===//
+; RUN: llvm-as < %s | llc -march=ppc32 | not grep fneg
+
+; This could generate FSEL with appropriate flags (FSEL is not IEEE-safe, and
+; should not be generated except with -enable-finite-only-fp-math or the like).
+; With the correctness fixes for PR642 (58871) LowerSELECT_CC would need to
+; recognize a more elaborate tree than a simple SETxx.
+
+define double @test_FNEG_sel(double %A, double %B, double %C) {
+ %D = sub double -0.000000e+00, %A ; <double> [#uses=1]
+ %Cond = fcmp ugt double %D, -0.000000e+00 ; <i1> [#uses=1]
+ %E = select i1 %Cond, double %B, double %C ; <double> [#uses=1]
+ ret double %E
+}
+
diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt
new file mode 100644
index 0000000..1e4c6fb
--- /dev/null
+++ b/lib/Target/PowerPC/README_ALTIVEC.txt
@@ -0,0 +1,211 @@
+//===- README_ALTIVEC.txt - Notes for improving Altivec code gen ----------===//
+
+Implement PPCInstrInfo::isLoadFromStackSlot/isStoreToStackSlot for vector
+registers, to generate better spill code.
+
+//===----------------------------------------------------------------------===//
+
+The first should be a single lvx from the constant pool, the second should be
+a xor/stvx:
+
+void foo(void) {
+ int x[8] __attribute__((aligned(128))) = { 1, 1, 1, 17, 1, 1, 1, 1 };
+ bar (x);
+}
+
+#include <string.h>
+void foo(void) {
+ int x[8] __attribute__((aligned(128)));
+ memset (x, 0, sizeof (x));
+ bar (x);
+}
+
+//===----------------------------------------------------------------------===//
+
+Altivec: Codegen'ing MUL with vector FMADD should add -0.0, not 0.0:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=8763
+
+When -ffast-math is on, we can use 0.0.
+
+//===----------------------------------------------------------------------===//
+
+ Consider this:
+ v4f32 Vector;
+ v4f32 Vector2 = { Vector.X, Vector.X, Vector.X, Vector.X };
+
+Since we know that "Vector" is 16-byte aligned and we know the element offset
+of ".X", we should change the load into a lve*x instruction, instead of doing
+a load/store/lve*x sequence.
+
+//===----------------------------------------------------------------------===//
+
+For functions that use altivec AND have calls, we are VRSAVE'ing all call
+clobbered regs.
+
+//===----------------------------------------------------------------------===//
+
+Implement passing vectors by value into calls and receiving them as arguments.
+
+//===----------------------------------------------------------------------===//
+
+GCC apparently tries to codegen { C1, C2, Variable, C3 } as a constant pool load
+of C1/C2/C3, then a load and vperm of Variable.
+
+//===----------------------------------------------------------------------===//
+
+We need a way to teach tblgen that some operands of an intrinsic are required to
+be constants. The verifier should enforce this constraint.
+
+//===----------------------------------------------------------------------===//
+
+We currently codegen SCALAR_TO_VECTOR as a store of the scalar to a 16-byte
+aligned stack slot, followed by a load/vperm. We should probably just store it
+to a scalar stack slot, then use lvsl/vperm to load it. If the value is already
+in memory this is a big win.
+
+//===----------------------------------------------------------------------===//
+
+extract_vector_elt of an arbitrary constant vector can be done with the
+following instructions:
+
+vTemp = vec_splat(v0,2); // 2 is the element the src is in.
+vec_ste(&destloc,0,vTemp);
+
+We can do an arbitrary non-constant value by using lvsr/perm/ste.
+
+//===----------------------------------------------------------------------===//
+
+If we want to tie instruction selection into the scheduler, we can do some
+constant formation with different instructions. For example, we can generate
+"vsplti -1" with "vcmpequw R,R" and 1,1,1,1 with "vsubcuw R,R", and 0,0,0,0 with
+"vsplti 0" or "vxor", each of which use different execution units, thus could
+help scheduling.
+
+This is probably only reasonable for a post-pass scheduler.
+
+//===----------------------------------------------------------------------===//
+
+For this function:
+
+void test(vector float *A, vector float *B) {
+ vector float C = (vector float)vec_cmpeq(*A, *B);
+ if (!vec_any_eq(*A, *B))
+ *B = (vector float){0,0,0,0};
+ *A = C;
+}
+
+we get the following basic block:
+
+ ...
+ lvx v2, 0, r4
+ lvx v3, 0, r3
+ vcmpeqfp v4, v3, v2
+ vcmpeqfp. v2, v3, v2
+ bne cr6, LBB1_2 ; cond_next
+
+The vcmpeqfp/vcmpeqfp. instructions currently cannot be merged when the
+vcmpeqfp. result is used by a branch. This can be improved.
+
+//===----------------------------------------------------------------------===//
+
+The code generated for this is truly aweful:
+
+vector float test(float a, float b) {
+ return (vector float){ 0.0, a, 0.0, 0.0};
+}
+
+LCPI1_0: ; float
+ .space 4
+ .text
+ .globl _test
+ .align 4
+_test:
+ mfspr r2, 256
+ oris r3, r2, 4096
+ mtspr 256, r3
+ lis r3, ha16(LCPI1_0)
+ addi r4, r1, -32
+ stfs f1, -16(r1)
+ addi r5, r1, -16
+ lfs f0, lo16(LCPI1_0)(r3)
+ stfs f0, -32(r1)
+ lvx v2, 0, r4
+ lvx v3, 0, r5
+ vmrghw v3, v3, v2
+ vspltw v2, v2, 0
+ vmrghw v2, v2, v3
+ mtspr 256, r2
+ blr
+
+//===----------------------------------------------------------------------===//
+
+int foo(vector float *x, vector float *y) {
+ if (vec_all_eq(*x,*y)) return 3245;
+ else return 12;
+}
+
+A predicate compare being used in a select_cc should have the same peephole
+applied to it as a predicate compare used by a br_cc. There should be no
+mfcr here:
+
+_foo:
+ mfspr r2, 256
+ oris r5, r2, 12288
+ mtspr 256, r5
+ li r5, 12
+ li r6, 3245
+ lvx v2, 0, r4
+ lvx v3, 0, r3
+ vcmpeqfp. v2, v3, v2
+ mfcr r3, 2
+ rlwinm r3, r3, 25, 31, 31
+ cmpwi cr0, r3, 0
+ bne cr0, LBB1_2 ; entry
+LBB1_1: ; entry
+ mr r6, r5
+LBB1_2: ; entry
+ mr r3, r6
+ mtspr 256, r2
+ blr
+
+//===----------------------------------------------------------------------===//
+
+CodeGen/PowerPC/vec_constants.ll has an and operation that should be
+codegen'd to andc. The issue is that the 'all ones' build vector is
+SelectNodeTo'd a VSPLTISB instruction node before the and/xor is selected
+which prevents the vnot pattern from matching.
+
+
+//===----------------------------------------------------------------------===//
+
+An alternative to the store/store/load approach for illegal insert element
+lowering would be:
+
+1. store element to any ol' slot
+2. lvx the slot
+3. lvsl 0; splat index; vcmpeq to generate a select mask
+4. lvsl slot + x; vperm to rotate result into correct slot
+5. vsel result together.
+
+//===----------------------------------------------------------------------===//
+
+Should codegen branches on vec_any/vec_all to avoid mfcr. Two examples:
+
+#include <altivec.h>
+ int f(vector float a, vector float b)
+ {
+ int aa = 0;
+ if (vec_all_ge(a, b))
+ aa |= 0x1;
+ if (vec_any_ge(a,b))
+ aa |= 0x2;
+ return aa;
+}
+
+vector float f(vector float a, vector float b) {
+ if (vec_any_eq(a, b))
+ return a;
+ else
+ return b;
+}
+
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
new file mode 100644
index 0000000..f68cf0e
--- /dev/null
+++ b/lib/Target/README.txt
@@ -0,0 +1,1679 @@
+Target Independent Opportunities:
+
+//===---------------------------------------------------------------------===//
+
+With the recent changes to make the implicit def/use set explicit in
+machineinstrs, we should change the target descriptions for 'call' instructions
+so that the .td files don't list all the call-clobbered registers as implicit
+defs. Instead, these should be added by the code generator (e.g. on the dag).
+
+This has a number of uses:
+
+1. PPC32/64 and X86 32/64 can avoid having multiple copies of call instructions
+ for their different impdef sets.
+2. Targets with multiple calling convs (e.g. x86) which have different clobber
+ sets don't need copies of call instructions.
+3. 'Interprocedural register allocation' can be done to reduce the clobber sets
+ of calls.
+
+//===---------------------------------------------------------------------===//
+
+Make the PPC branch selector target independant
+
+//===---------------------------------------------------------------------===//
+
+Get the C front-end to expand hypot(x,y) -> llvm.sqrt(x*x+y*y) when errno and
+precision don't matter (ffastmath). Misc/mandel will like this. :) This isn't
+safe in general, even on darwin. See the libm implementation of hypot for
+examples (which special case when x/y are exactly zero to get signed zeros etc
+right).
+
+//===---------------------------------------------------------------------===//
+
+Solve this DAG isel folding deficiency:
+
+int X, Y;
+
+void fn1(void)
+{
+ X = X | (Y << 3);
+}
+
+compiles to
+
+fn1:
+ movl Y, %eax
+ shll $3, %eax
+ orl X, %eax
+ movl %eax, X
+ ret
+
+The problem is the store's chain operand is not the load X but rather
+a TokenFactor of the load X and load Y, which prevents the folding.
+
+There are two ways to fix this:
+
+1. The dag combiner can start using alias analysis to realize that y/x
+ don't alias, making the store to X not dependent on the load from Y.
+2. The generated isel could be made smarter in the case it can't
+ disambiguate the pointers.
+
+Number 1 is the preferred solution.
+
+This has been "fixed" by a TableGen hack. But that is a short term workaround
+which will be removed once the proper fix is made.
+
+//===---------------------------------------------------------------------===//
+
+On targets with expensive 64-bit multiply, we could LSR this:
+
+for (i = ...; ++i) {
+ x = 1ULL << i;
+
+into:
+ long long tmp = 1;
+ for (i = ...; ++i, tmp+=tmp)
+ x = tmp;
+
+This would be a win on ppc32, but not x86 or ppc64.
+
+//===---------------------------------------------------------------------===//
+
+Shrink: (setlt (loadi32 P), 0) -> (setlt (loadi8 Phi), 0)
+
+//===---------------------------------------------------------------------===//
+
+Reassociate should turn: X*X*X*X -> t=(X*X) (t*t) to eliminate a multiply.
+
+//===---------------------------------------------------------------------===//
+
+Interesting? testcase for add/shift/mul reassoc:
+
+int bar(int x, int y) {
+ return x*x*x+y+x*x*x*x*x*y*y*y*y;
+}
+int foo(int z, int n) {
+ return bar(z, n) + bar(2*z, 2*n);
+}
+
+Reassociate should handle the example in GCC PR16157.
+
+//===---------------------------------------------------------------------===//
+
+These two functions should generate the same code on big-endian systems:
+
+int g(int *j,int *l) { return memcmp(j,l,4); }
+int h(int *j, int *l) { return *j - *l; }
+
+this could be done in SelectionDAGISel.cpp, along with other special cases,
+for 1,2,4,8 bytes.
+
+//===---------------------------------------------------------------------===//
+
+It would be nice to revert this patch:
+http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20060213/031986.html
+
+And teach the dag combiner enough to simplify the code expanded before
+legalize. It seems plausible that this knowledge would let it simplify other
+stuff too.
+
+//===---------------------------------------------------------------------===//
+
+For vector types, TargetData.cpp::getTypeInfo() returns alignment that is equal
+to the type size. It works but can be overly conservative as the alignment of
+specific vector types are target dependent.
+
+//===---------------------------------------------------------------------===//
+
+We should produce an unaligned load from code like this:
+
+v4sf example(float *P) {
+ return (v4sf){P[0], P[1], P[2], P[3] };
+}
+
+//===---------------------------------------------------------------------===//
+
+Add support for conditional increments, and other related patterns. Instead
+of:
+
+ movl 136(%esp), %eax
+ cmpl $0, %eax
+ je LBB16_2 #cond_next
+LBB16_1: #cond_true
+ incl _foo
+LBB16_2: #cond_next
+
+emit:
+ movl _foo, %eax
+ cmpl $1, %edi
+ sbbl $-1, %eax
+ movl %eax, _foo
+
+//===---------------------------------------------------------------------===//
+
+Combine: a = sin(x), b = cos(x) into a,b = sincos(x).
+
+Expand these to calls of sin/cos and stores:
+ double sincos(double x, double *sin, double *cos);
+ float sincosf(float x, float *sin, float *cos);
+ long double sincosl(long double x, long double *sin, long double *cos);
+
+Doing so could allow SROA of the destination pointers. See also:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17687
+
+This is now easily doable with MRVs. We could even make an intrinsic for this
+if anyone cared enough about sincos.
+
+//===---------------------------------------------------------------------===//
+
+Turn this into a single byte store with no load (the other 3 bytes are
+unmodified):
+
+define void @test(i32* %P) {
+ %tmp = load i32* %P
+ %tmp14 = or i32 %tmp, 3305111552
+ %tmp15 = and i32 %tmp14, 3321888767
+ store i32 %tmp15, i32* %P
+ ret void
+}
+
+//===---------------------------------------------------------------------===//
+
+dag/inst combine "clz(x)>>5 -> x==0" for 32-bit x.
+
+Compile:
+
+int bar(int x)
+{
+ int t = __builtin_clz(x);
+ return -(t>>5);
+}
+
+to:
+
+_bar: addic r3,r3,-1
+ subfe r3,r3,r3
+ blr
+
+//===---------------------------------------------------------------------===//
+
+Legalize should lower ctlz like this:
+ ctlz(x) = popcnt((x-1) & ~x)
+
+on targets that have popcnt but not ctlz. itanium, what else?
+
+//===---------------------------------------------------------------------===//
+
+quantum_sigma_x in 462.libquantum contains the following loop:
+
+ for(i=0; i<reg->size; i++)
+ {
+ /* Flip the target bit of each basis state */
+ reg->node[i].state ^= ((MAX_UNSIGNED) 1 << target);
+ }
+
+Where MAX_UNSIGNED/state is a 64-bit int. On a 32-bit platform it would be just
+so cool to turn it into something like:
+
+ long long Res = ((MAX_UNSIGNED) 1 << target);
+ if (target < 32) {
+ for(i=0; i<reg->size; i++)
+ reg->node[i].state ^= Res & 0xFFFFFFFFULL;
+ } else {
+ for(i=0; i<reg->size; i++)
+ reg->node[i].state ^= Res & 0xFFFFFFFF00000000ULL
+ }
+
+... which would only do one 32-bit XOR per loop iteration instead of two.
+
+It would also be nice to recognize the reg->size doesn't alias reg->node[i], but
+alas...
+
+//===---------------------------------------------------------------------===//
+
+This isn't recognized as bswap by instcombine (yes, it really is bswap):
+
+unsigned long reverse(unsigned v) {
+ unsigned t;
+ t = v ^ ((v << 16) | (v >> 16));
+ t &= ~0xff0000;
+ v = (v << 24) | (v >> 8);
+ return v ^ (t >> 8);
+}
+
+//===---------------------------------------------------------------------===//
+
+These idioms should be recognized as popcount (see PR1488):
+
+unsigned countbits_slow(unsigned v) {
+ unsigned c;
+ for (c = 0; v; v >>= 1)
+ c += v & 1;
+ return c;
+}
+unsigned countbits_fast(unsigned v){
+ unsigned c;
+ for (c = 0; v; c++)
+ v &= v - 1; // clear the least significant bit set
+ return c;
+}
+
+BITBOARD = unsigned long long
+int PopCnt(register BITBOARD a) {
+ register int c=0;
+ while(a) {
+ c++;
+ a &= a - 1;
+ }
+ return c;
+}
+unsigned int popcount(unsigned int input) {
+ unsigned int count = 0;
+ for (unsigned int i = 0; i < 4 * 8; i++)
+ count += (input >> i) & i;
+ return count;
+}
+
+//===---------------------------------------------------------------------===//
+
+These should turn into single 16-bit (unaligned?) loads on little/big endian
+processors.
+
+unsigned short read_16_le(const unsigned char *adr) {
+ return adr[0] | (adr[1] << 8);
+}
+unsigned short read_16_be(const unsigned char *adr) {
+ return (adr[0] << 8) | adr[1];
+}
+
+//===---------------------------------------------------------------------===//
+
+-instcombine should handle this transform:
+ icmp pred (sdiv X / C1 ), C2
+when X, C1, and C2 are unsigned. Similarly for udiv and signed operands.
+
+Currently InstCombine avoids this transform but will do it when the signs of
+the operands and the sign of the divide match. See the FIXME in
+InstructionCombining.cpp in the visitSetCondInst method after the switch case
+for Instruction::UDiv (around line 4447) for more details.
+
+The SingleSource/Benchmarks/Shootout-C++/hash and hash2 tests have examples of
+this construct.
+
+//===---------------------------------------------------------------------===//
+
+viterbi speeds up *significantly* if the various "history" related copy loops
+are turned into memcpy calls at the source level. We need a "loops to memcpy"
+pass.
+
+//===---------------------------------------------------------------------===//
+
+Consider:
+
+typedef unsigned U32;
+typedef unsigned long long U64;
+int test (U32 *inst, U64 *regs) {
+ U64 effective_addr2;
+ U32 temp = *inst;
+ int r1 = (temp >> 20) & 0xf;
+ int b2 = (temp >> 16) & 0xf;
+ effective_addr2 = temp & 0xfff;
+ if (b2) effective_addr2 += regs[b2];
+ b2 = (temp >> 12) & 0xf;
+ if (b2) effective_addr2 += regs[b2];
+ effective_addr2 &= regs[4];
+ if ((effective_addr2 & 3) == 0)
+ return 1;
+ return 0;
+}
+
+Note that only the low 2 bits of effective_addr2 are used. On 32-bit systems,
+we don't eliminate the computation of the top half of effective_addr2 because
+we don't have whole-function selection dags. On x86, this means we use one
+extra register for the function when effective_addr2 is declared as U64 than
+when it is declared U32.
+
+//===---------------------------------------------------------------------===//
+
+Promote for i32 bswap can use i64 bswap + shr. Useful on targets with 64-bit
+regs and bswap, like itanium.
+
+//===---------------------------------------------------------------------===//
+
+LSR should know what GPR types a target has. This code:
+
+volatile short X, Y; // globals
+
+void foo(int N) {
+ int i;
+ for (i = 0; i < N; i++) { X = i; Y = i*4; }
+}
+
+produces two identical IV's (after promotion) on PPC/ARM:
+
+LBB1_1: @bb.preheader
+ mov r3, #0
+ mov r2, r3
+ mov r1, r3
+LBB1_2: @bb
+ ldr r12, LCPI1_0
+ ldr r12, [r12]
+ strh r2, [r12]
+ ldr r12, LCPI1_1
+ ldr r12, [r12]
+ strh r3, [r12]
+ add r1, r1, #1 <- [0,+,1]
+ add r3, r3, #4
+ add r2, r2, #1 <- [0,+,1]
+ cmp r1, r0
+ bne LBB1_2 @bb
+
+
+//===---------------------------------------------------------------------===//
+
+Tail call elim should be more aggressive, checking to see if the call is
+followed by an uncond branch to an exit block.
+
+; This testcase is due to tail-duplication not wanting to copy the return
+; instruction into the terminating blocks because there was other code
+; optimized out of the function after the taildup happened.
+; RUN: llvm-as < %s | opt -tailcallelim | llvm-dis | not grep call
+
+define i32 @t4(i32 %a) {
+entry:
+ %tmp.1 = and i32 %a, 1 ; <i32> [#uses=1]
+ %tmp.2 = icmp ne i32 %tmp.1, 0 ; <i1> [#uses=1]
+ br i1 %tmp.2, label %then.0, label %else.0
+
+then.0: ; preds = %entry
+ %tmp.5 = add i32 %a, -1 ; <i32> [#uses=1]
+ %tmp.3 = call i32 @t4( i32 %tmp.5 ) ; <i32> [#uses=1]
+ br label %return
+
+else.0: ; preds = %entry
+ %tmp.7 = icmp ne i32 %a, 0 ; <i1> [#uses=1]
+ br i1 %tmp.7, label %then.1, label %return
+
+then.1: ; preds = %else.0
+ %tmp.11 = add i32 %a, -2 ; <i32> [#uses=1]
+ %tmp.9 = call i32 @t4( i32 %tmp.11 ) ; <i32> [#uses=1]
+ br label %return
+
+return: ; preds = %then.1, %else.0, %then.0
+ %result.0 = phi i32 [ 0, %else.0 ], [ %tmp.3, %then.0 ],
+ [ %tmp.9, %then.1 ]
+ ret i32 %result.0
+}
+
+//===---------------------------------------------------------------------===//
+
+Tail recursion elimination is not transforming this function, because it is
+returning n, which fails the isDynamicConstant check in the accumulator
+recursion checks.
+
+long long fib(const long long n) {
+ switch(n) {
+ case 0:
+ case 1:
+ return n;
+ default:
+ return fib(n-1) + fib(n-2);
+ }
+}
+
+//===---------------------------------------------------------------------===//
+
+Tail recursion elimination should handle:
+
+int pow2m1(int n) {
+ if (n == 0)
+ return 0;
+ return 2 * pow2m1 (n - 1) + 1;
+}
+
+Also, multiplies can be turned into SHL's, so they should be handled as if
+they were associative. "return foo() << 1" can be tail recursion eliminated.
+
+//===---------------------------------------------------------------------===//
+
+Argument promotion should promote arguments for recursive functions, like
+this:
+
+; RUN: llvm-as < %s | opt -argpromotion | llvm-dis | grep x.val
+
+define internal i32 @foo(i32* %x) {
+entry:
+ %tmp = load i32* %x ; <i32> [#uses=0]
+ %tmp.foo = call i32 @foo( i32* %x ) ; <i32> [#uses=1]
+ ret i32 %tmp.foo
+}
+
+define i32 @bar(i32* %x) {
+entry:
+ %tmp3 = call i32 @foo( i32* %x ) ; <i32> [#uses=1]
+ ret i32 %tmp3
+}
+
+//===---------------------------------------------------------------------===//
+
+"basicaa" should know how to look through "or" instructions that act like add
+instructions. For example in this code, the x*4+1 is turned into x*4 | 1, and
+basicaa can't analyze the array subscript, leading to duplicated loads in the
+generated code:
+
+void test(int X, int Y, int a[]) {
+int i;
+ for (i=2; i<1000; i+=4) {
+ a[i+0] = a[i-1+0]*a[i-2+0];
+ a[i+1] = a[i-1+1]*a[i-2+1];
+ a[i+2] = a[i-1+2]*a[i-2+2];
+ a[i+3] = a[i-1+3]*a[i-2+3];
+ }
+}
+
+BasicAA also doesn't do this for add. It needs to know that &A[i+1] != &A[i].
+
+//===---------------------------------------------------------------------===//
+
+We should investigate an instruction sinking pass. Consider this silly
+example in pic mode:
+
+#include <assert.h>
+void foo(int x) {
+ assert(x);
+ //...
+}
+
+we compile this to:
+_foo:
+ subl $28, %esp
+ call "L1$pb"
+"L1$pb":
+ popl %eax
+ cmpl $0, 32(%esp)
+ je LBB1_2 # cond_true
+LBB1_1: # return
+ # ...
+ addl $28, %esp
+ ret
+LBB1_2: # cond_true
+...
+
+The PIC base computation (call+popl) is only used on one path through the
+code, but is currently always computed in the entry block. It would be
+better to sink the picbase computation down into the block for the
+assertion, as it is the only one that uses it. This happens for a lot of
+code with early outs.
+
+Another example is loads of arguments, which are usually emitted into the
+entry block on targets like x86. If not used in all paths through a
+function, they should be sunk into the ones that do.
+
+In this case, whole-function-isel would also handle this.
+
+//===---------------------------------------------------------------------===//
+
+Investigate lowering of sparse switch statements into perfect hash tables:
+http://burtleburtle.net/bob/hash/perfect.html
+
+//===---------------------------------------------------------------------===//
+
+We should turn things like "load+fabs+store" and "load+fneg+store" into the
+corresponding integer operations. On a yonah, this loop:
+
+double a[256];
+void foo() {
+ int i, b;
+ for (b = 0; b < 10000000; b++)
+ for (i = 0; i < 256; i++)
+ a[i] = -a[i];
+}
+
+is twice as slow as this loop:
+
+long long a[256];
+void foo() {
+ int i, b;
+ for (b = 0; b < 10000000; b++)
+ for (i = 0; i < 256; i++)
+ a[i] ^= (1ULL << 63);
+}
+
+and I suspect other processors are similar. On X86 in particular this is a
+big win because doing this with integers allows the use of read/modify/write
+instructions.
+
+//===---------------------------------------------------------------------===//
+
+DAG Combiner should try to combine small loads into larger loads when
+profitable. For example, we compile this C++ example:
+
+struct THotKey { short Key; bool Control; bool Shift; bool Alt; };
+extern THotKey m_HotKey;
+THotKey GetHotKey () { return m_HotKey; }
+
+into (-O3 -fno-exceptions -static -fomit-frame-pointer):
+
+__Z9GetHotKeyv:
+ pushl %esi
+ movl 8(%esp), %eax
+ movb _m_HotKey+3, %cl
+ movb _m_HotKey+4, %dl
+ movb _m_HotKey+2, %ch
+ movw _m_HotKey, %si
+ movw %si, (%eax)
+ movb %ch, 2(%eax)
+ movb %cl, 3(%eax)
+ movb %dl, 4(%eax)
+ popl %esi
+ ret $4
+
+GCC produces:
+
+__Z9GetHotKeyv:
+ movl _m_HotKey, %edx
+ movl 4(%esp), %eax
+ movl %edx, (%eax)
+ movzwl _m_HotKey+4, %edx
+ movw %dx, 4(%eax)
+ ret $4
+
+The LLVM IR contains the needed alignment info, so we should be able to
+merge the loads and stores into 4-byte loads:
+
+ %struct.THotKey = type { i16, i8, i8, i8 }
+define void @_Z9GetHotKeyv(%struct.THotKey* sret %agg.result) nounwind {
+...
+ %tmp2 = load i16* getelementptr (@m_HotKey, i32 0, i32 0), align 8
+ %tmp5 = load i8* getelementptr (@m_HotKey, i32 0, i32 1), align 2
+ %tmp8 = load i8* getelementptr (@m_HotKey, i32 0, i32 2), align 1
+ %tmp11 = load i8* getelementptr (@m_HotKey, i32 0, i32 3), align 2
+
+Alternatively, we should use a small amount of base-offset alias analysis
+to make it so the scheduler doesn't need to hold all the loads in regs at
+once.
+
+//===---------------------------------------------------------------------===//
+
+We should add an FRINT node to the DAG to model targets that have legal
+implementations of ceil/floor/rint.
+
+//===---------------------------------------------------------------------===//
+
+This GCC bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34043
+contains a testcase that compiles down to:
+
+ %struct.XMM128 = type { <4 x float> }
+..
+ %src = alloca %struct.XMM128
+..
+ %tmp6263 = bitcast %struct.XMM128* %src to <2 x i64>*
+ %tmp65 = getelementptr %struct.XMM128* %src, i32 0, i32 0
+ store <2 x i64> %tmp5899, <2 x i64>* %tmp6263, align 16
+ %tmp66 = load <4 x float>* %tmp65, align 16
+ %tmp71 = add <4 x float> %tmp66, %tmp66
+
+If the mid-level optimizer turned the bitcast of pointer + store of tmp5899
+into a bitcast of the vector value and a store to the pointer, then the
+store->load could be easily removed.
+
+//===---------------------------------------------------------------------===//
+
+Consider:
+
+int test() {
+ long long input[8] = {1,1,1,1,1,1,1,1};
+ foo(input);
+}
+
+We currently compile this into a memcpy from a global array since the
+initializer is fairly large and not memset'able. This is good, but the memcpy
+gets lowered to load/stores in the code generator. This is also ok, except
+that the codegen lowering for memcpy doesn't handle the case when the source
+is a constant global. This gives us atrocious code like this:
+
+ call "L1$pb"
+"L1$pb":
+ popl %eax
+ movl _C.0.1444-"L1$pb"+32(%eax), %ecx
+ movl %ecx, 40(%esp)
+ movl _C.0.1444-"L1$pb"+20(%eax), %ecx
+ movl %ecx, 28(%esp)
+ movl _C.0.1444-"L1$pb"+36(%eax), %ecx
+ movl %ecx, 44(%esp)
+ movl _C.0.1444-"L1$pb"+44(%eax), %ecx
+ movl %ecx, 52(%esp)
+ movl _C.0.1444-"L1$pb"+40(%eax), %ecx
+ movl %ecx, 48(%esp)
+ movl _C.0.1444-"L1$pb"+12(%eax), %ecx
+ movl %ecx, 20(%esp)
+ movl _C.0.1444-"L1$pb"+4(%eax), %ecx
+...
+
+instead of:
+ movl $1, 16(%esp)
+ movl $0, 20(%esp)
+ movl $1, 24(%esp)
+ movl $0, 28(%esp)
+ movl $1, 32(%esp)
+ movl $0, 36(%esp)
+ ...
+
+//===---------------------------------------------------------------------===//
+
+http://llvm.org/PR717:
+
+The following code should compile into "ret int undef". Instead, LLVM
+produces "ret int 0":
+
+int f() {
+ int x = 4;
+ int y;
+ if (x == 3) y = 0;
+ return y;
+}
+
+//===---------------------------------------------------------------------===//
+
+The loop unroller should partially unroll loops (instead of peeling them)
+when code growth isn't too bad and when an unroll count allows simplification
+of some code within the loop. One trivial example is:
+
+#include <stdio.h>
+int main() {
+ int nRet = 17;
+ int nLoop;
+ for ( nLoop = 0; nLoop < 1000; nLoop++ ) {
+ if ( nLoop & 1 )
+ nRet += 2;
+ else
+ nRet -= 1;
+ }
+ return nRet;
+}
+
+Unrolling by 2 would eliminate the '&1' in both copies, leading to a net
+reduction in code size. The resultant code would then also be suitable for
+exit value computation.
+
+//===---------------------------------------------------------------------===//
+
+We miss a bunch of rotate opportunities on various targets, including ppc, x86,
+etc. On X86, we miss a bunch of 'rotate by variable' cases because the rotate
+matching code in dag combine doesn't look through truncates aggressively
+enough. Here are some testcases reduces from GCC PR17886:
+
+unsigned long long f(unsigned long long x, int y) {
+ return (x << y) | (x >> 64-y);
+}
+unsigned f2(unsigned x, int y){
+ return (x << y) | (x >> 32-y);
+}
+unsigned long long f3(unsigned long long x){
+ int y = 9;
+ return (x << y) | (x >> 64-y);
+}
+unsigned f4(unsigned x){
+ int y = 10;
+ return (x << y) | (x >> 32-y);
+}
+unsigned long long f5(unsigned long long x, unsigned long long y) {
+ return (x << 8) | ((y >> 48) & 0xffull);
+}
+unsigned long long f6(unsigned long long x, unsigned long long y, int z) {
+ switch(z) {
+ case 1:
+ return (x << 8) | ((y >> 48) & 0xffull);
+ case 2:
+ return (x << 16) | ((y >> 40) & 0xffffull);
+ case 3:
+ return (x << 24) | ((y >> 32) & 0xffffffull);
+ case 4:
+ return (x << 32) | ((y >> 24) & 0xffffffffull);
+ default:
+ return (x << 40) | ((y >> 16) & 0xffffffffffull);
+ }
+}
+
+On X86-64, we only handle f2/f3/f4 right. On x86-32, a few of these
+generate truly horrible code, instead of using shld and friends. On
+ARM, we end up with calls to L___lshrdi3/L___ashldi3 in f, which is
+badness. PPC64 misses f, f5 and f6. CellSPU aborts in isel.
+
+//===---------------------------------------------------------------------===//
+
+We do a number of simplifications in simplify libcalls to strength reduce
+standard library functions, but we don't currently merge them together. For
+example, it is useful to merge memcpy(a,b,strlen(b)) -> strcpy. This can only
+be done safely if "b" isn't modified between the strlen and memcpy of course.
+
+//===---------------------------------------------------------------------===//
+
+Reassociate should turn things like:
+
+int factorial(int X) {
+ return X*X*X*X*X*X*X*X;
+}
+
+into llvm.powi calls, allowing the code generator to produce balanced
+multiplication trees.
+
+//===---------------------------------------------------------------------===//
+
+We generate a horrible libcall for llvm.powi. For example, we compile:
+
+#include <cmath>
+double f(double a) { return std::pow(a, 4); }
+
+into:
+
+__Z1fd:
+ subl $12, %esp
+ movsd 16(%esp), %xmm0
+ movsd %xmm0, (%esp)
+ movl $4, 8(%esp)
+ call L___powidf2$stub
+ addl $12, %esp
+ ret
+
+GCC produces:
+
+__Z1fd:
+ subl $12, %esp
+ movsd 16(%esp), %xmm0
+ mulsd %xmm0, %xmm0
+ mulsd %xmm0, %xmm0
+ movsd %xmm0, (%esp)
+ fldl (%esp)
+ addl $12, %esp
+ ret
+
+//===---------------------------------------------------------------------===//
+
+We compile this program: (from GCC PR11680)
+http://gcc.gnu.org/bugzilla/attachment.cgi?id=4487
+
+Into code that runs the same speed in fast/slow modes, but both modes run 2x
+slower than when compile with GCC (either 4.0 or 4.2):
+
+$ llvm-g++ perf.cpp -O3 -fno-exceptions
+$ time ./a.out fast
+1.821u 0.003s 0:01.82 100.0% 0+0k 0+0io 0pf+0w
+
+$ g++ perf.cpp -O3 -fno-exceptions
+$ time ./a.out fast
+0.821u 0.001s 0:00.82 100.0% 0+0k 0+0io 0pf+0w
+
+It looks like we are making the same inlining decisions, so this may be raw
+codegen badness or something else (haven't investigated).
+
+//===---------------------------------------------------------------------===//
+
+We miss some instcombines for stuff like this:
+void bar (void);
+void foo (unsigned int a) {
+ /* This one is equivalent to a >= (3 << 2). */
+ if ((a >> 2) >= 3)
+ bar ();
+}
+
+A few other related ones are in GCC PR14753.
+
+//===---------------------------------------------------------------------===//
+
+Divisibility by constant can be simplified (according to GCC PR12849) from
+being a mulhi to being a mul lo (cheaper). Testcase:
+
+void bar(unsigned n) {
+ if (n % 3 == 0)
+ true();
+}
+
+I think this basically amounts to a dag combine to simplify comparisons against
+multiply hi's into a comparison against the mullo.
+
+//===---------------------------------------------------------------------===//
+
+Better mod/ref analysis for scanf would allow us to eliminate the vtable and a
+bunch of other stuff from this example (see PR1604):
+
+#include <cstdio>
+struct test {
+ int val;
+ virtual ~test() {}
+};
+
+int main() {
+ test t;
+ std::scanf("%d", &t.val);
+ std::printf("%d\n", t.val);
+}
+
+//===---------------------------------------------------------------------===//
+
+Instcombine will merge comparisons like (x >= 10) && (x < 20) by producing (x -
+10) u< 10, but only when the comparisons have matching sign.
+
+This could be converted with a similiar technique. (PR1941)
+
+define i1 @test(i8 %x) {
+ %A = icmp uge i8 %x, 5
+ %B = icmp slt i8 %x, 20
+ %C = and i1 %A, %B
+ ret i1 %C
+}
+
+//===---------------------------------------------------------------------===//
+
+These functions perform the same computation, but produce different assembly.
+
+define i8 @select(i8 %x) readnone nounwind {
+ %A = icmp ult i8 %x, 250
+ %B = select i1 %A, i8 0, i8 1
+ ret i8 %B
+}
+
+define i8 @addshr(i8 %x) readnone nounwind {
+ %A = zext i8 %x to i9
+ %B = add i9 %A, 6 ;; 256 - 250 == 6
+ %C = lshr i9 %B, 8
+ %D = trunc i9 %C to i8
+ ret i8 %D
+}
+
+//===---------------------------------------------------------------------===//
+
+From gcc bug 24696:
+int
+f (unsigned long a, unsigned long b, unsigned long c)
+{
+ return ((a & (c - 1)) != 0) || ((b & (c - 1)) != 0);
+}
+int
+f (unsigned long a, unsigned long b, unsigned long c)
+{
+ return ((a & (c - 1)) != 0) | ((b & (c - 1)) != 0);
+}
+Both should combine to ((a|b) & (c-1)) != 0. Currently not optimized with
+"clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+From GCC Bug 20192:
+#define PMD_MASK (~((1UL << 23) - 1))
+void clear_pmd_range(unsigned long start, unsigned long end)
+{
+ if (!(start & ~PMD_MASK) && !(end & ~PMD_MASK))
+ f();
+}
+The expression should optimize to something like
+"!((start|end)&~PMD_MASK). Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+From GCC Bug 15241:
+unsigned int
+foo (unsigned int a, unsigned int b)
+{
+ if (a <= 7 && b <= 7)
+ baz ();
+}
+Should combine to "(a|b) <= 7". Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+From GCC Bug 3756:
+int
+pn (int n)
+{
+ return (n >= 0 ? 1 : -1);
+}
+Should combine to (n >> 31) | 1. Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts | llc".
+
+//===---------------------------------------------------------------------===//
+
+From GCC Bug 28685:
+int test(int a, int b)
+{
+ int lt = a < b;
+ int eq = a == b;
+
+ return (lt || eq);
+}
+Should combine to "a <= b". Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts | llc".
+
+//===---------------------------------------------------------------------===//
+
+void a(int variable)
+{
+ if (variable == 4 || variable == 6)
+ bar();
+}
+This should optimize to "if ((variable | 2) == 6)". Currently not
+optimized with "clang -emit-llvm-bc | opt -std-compile-opts | llc".
+
+//===---------------------------------------------------------------------===//
+
+unsigned int f(unsigned int i, unsigned int n) {++i; if (i == n) ++i; return
+i;}
+unsigned int f2(unsigned int i, unsigned int n) {++i; i += i == n; return i;}
+These should combine to the same thing. Currently, the first function
+produces better code on X86.
+
+//===---------------------------------------------------------------------===//
+
+From GCC Bug 15784:
+#define abs(x) x>0?x:-x
+int f(int x, int y)
+{
+ return (abs(x)) >= 0;
+}
+This should optimize to x == INT_MIN. (With -fwrapv.) Currently not
+optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+From GCC Bug 14753:
+void
+rotate_cst (unsigned int a)
+{
+ a = (a << 10) | (a >> 22);
+ if (a == 123)
+ bar ();
+}
+void
+minus_cst (unsigned int a)
+{
+ unsigned int tem;
+
+ tem = 20 - a;
+ if (tem == 5)
+ bar ();
+}
+void
+mask_gt (unsigned int a)
+{
+ /* This is equivalent to a > 15. */
+ if ((a & ~7) > 8)
+ bar ();
+}
+void
+rshift_gt (unsigned int a)
+{
+ /* This is equivalent to a > 23. */
+ if ((a >> 2) > 5)
+ bar ();
+}
+All should simplify to a single comparison. All of these are
+currently not optimized with "clang -emit-llvm-bc | opt
+-std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+From GCC Bug 32605:
+int c(int* x) {return (char*)x+2 == (char*)x;}
+Should combine to 0. Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts" (although llc can optimize it).
+
+//===---------------------------------------------------------------------===//
+
+int a(unsigned char* b) {return *b > 99;}
+There's an unnecessary zext in the generated code with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(unsigned b) {return ((b << 31) | (b << 30)) >> 31;}
+Should be combined to "((b >> 1) | b) & 1". Currently not optimized
+with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+unsigned a(unsigned x, unsigned y) { return x | (y & 1) | (y & 2);}
+Should combine to "x | (y & 3)". Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+unsigned a(unsigned a) {return ((a | 1) & 3) | (a & -4);}
+Should combine to "a | 1". Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int a, int b, int c) {return (~a & c) | ((c|a) & b);}
+Should fold to "(~a & c) | (a & b)". Currently not optimized with
+"clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int a,int b) {return (~(a|b))|a;}
+Should fold to "a|~b". Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int a, int b) {return (a&&b) || (a&&!b);}
+Should fold to "a". Currently not optimized with "clang -emit-llvm-bc
+| opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int a, int b, int c) {return (a&&b) || (!a&&c);}
+Should fold to "a ? b : c", or at least something sane. Currently not
+optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int a, int b, int c) {return (a&&b) || (a&&c) || (a&&b&&c);}
+Should fold to a && (b || c). Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int x) {return x | ((x & 8) ^ 8);}
+Should combine to x | 8. Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int x) {return x ^ ((x & 8) ^ 8);}
+Should also combine to x | 8. Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int x) {return (x & 8) == 0 ? -1 : -9;}
+Should combine to (x | -9) ^ 8. Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int x) {return (x & 8) == 0 ? -9 : -1;}
+Should combine to x | -9. Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int x) {return ((x | -9) ^ 8) & x;}
+Should combine to x & -9. Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+unsigned a(unsigned a) {return a * 0x11111111 >> 28 & 1;}
+Should combine to "a * 0x88888888 >> 31". Currently not optimized
+with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+unsigned a(char* x) {if ((*x & 32) == 0) return b();}
+There's an unnecessary zext in the generated code with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+unsigned a(unsigned long long x) {return 40 * (x >> 1);}
+Should combine to "20 * (((unsigned)x) & -2)". Currently not
+optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+We would like to do the following transform in the instcombiner:
+
+ -X/C -> X/-C
+
+However, this isn't valid if (-X) overflows. We can implement this when we
+have the concept of a "C signed subtraction" operator that which is undefined
+on overflow.
+
+//===---------------------------------------------------------------------===//
+
+This was noticed in the entryblock for grokdeclarator in 403.gcc:
+
+ %tmp = icmp eq i32 %decl_context, 4
+ %decl_context_addr.0 = select i1 %tmp, i32 3, i32 %decl_context
+ %tmp1 = icmp eq i32 %decl_context_addr.0, 1
+ %decl_context_addr.1 = select i1 %tmp1, i32 0, i32 %decl_context_addr.0
+
+tmp1 should be simplified to something like:
+ (!tmp || decl_context == 1)
+
+This allows recursive simplifications, tmp1 is used all over the place in
+the function, e.g. by:
+
+ %tmp23 = icmp eq i32 %decl_context_addr.1, 0 ; <i1> [#uses=1]
+ %tmp24 = xor i1 %tmp1, true ; <i1> [#uses=1]
+ %or.cond8 = and i1 %tmp23, %tmp24 ; <i1> [#uses=1]
+
+later.
+
+//===---------------------------------------------------------------------===//
+
+Store sinking: This code:
+
+void f (int n, int *cond, int *res) {
+ int i;
+ *res = 0;
+ for (i = 0; i < n; i++)
+ if (*cond)
+ *res ^= 234; /* (*) */
+}
+
+On this function GVN hoists the fully redundant value of *res, but nothing
+moves the store out. This gives us this code:
+
+bb: ; preds = %bb2, %entry
+ %.rle = phi i32 [ 0, %entry ], [ %.rle6, %bb2 ]
+ %i.05 = phi i32 [ 0, %entry ], [ %indvar.next, %bb2 ]
+ %1 = load i32* %cond, align 4
+ %2 = icmp eq i32 %1, 0
+ br i1 %2, label %bb2, label %bb1
+
+bb1: ; preds = %bb
+ %3 = xor i32 %.rle, 234
+ store i32 %3, i32* %res, align 4
+ br label %bb2
+
+bb2: ; preds = %bb, %bb1
+ %.rle6 = phi i32 [ %3, %bb1 ], [ %.rle, %bb ]
+ %indvar.next = add i32 %i.05, 1
+ %exitcond = icmp eq i32 %indvar.next, %n
+ br i1 %exitcond, label %return, label %bb
+
+DSE should sink partially dead stores to get the store out of the loop.
+
+Here's another partial dead case:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12395
+
+//===---------------------------------------------------------------------===//
+
+Scalar PRE hoists the mul in the common block up to the else:
+
+int test (int a, int b, int c, int g) {
+ int d, e;
+ if (a)
+ d = b * c;
+ else
+ d = b - c;
+ e = b * c + g;
+ return d + e;
+}
+
+It would be better to do the mul once to reduce codesize above the if.
+This is GCC PR38204.
+
+//===---------------------------------------------------------------------===//
+
+GCC PR37810 is an interesting case where we should sink load/store reload
+into the if block and outside the loop, so we don't reload/store it on the
+non-call path.
+
+for () {
+ *P += 1;
+ if ()
+ call();
+ else
+ ...
+->
+tmp = *P
+for () {
+ tmp += 1;
+ if () {
+ *P = tmp;
+ call();
+ tmp = *P;
+ } else ...
+}
+*P = tmp;
+
+We now hoist the reload after the call (Transforms/GVN/lpre-call-wrap.ll), but
+we don't sink the store. We need partially dead store sinking.
+
+//===---------------------------------------------------------------------===//
+
+[PHI TRANSLATE GEPs]
+
+GCC PR37166: Sinking of loads prevents SROA'ing the "g" struct on the stack
+leading to excess stack traffic. This could be handled by GVN with some crazy
+symbolic phi translation. The code we get looks like (g is on the stack):
+
+bb2: ; preds = %bb1
+..
+ %9 = getelementptr %struct.f* %g, i32 0, i32 0
+ store i32 %8, i32* %9, align bel %bb3
+
+bb3: ; preds = %bb1, %bb2, %bb
+ %c_addr.0 = phi %struct.f* [ %g, %bb2 ], [ %c, %bb ], [ %c, %bb1 ]
+ %b_addr.0 = phi %struct.f* [ %b, %bb2 ], [ %g, %bb ], [ %b, %bb1 ]
+ %10 = getelementptr %struct.f* %c_addr.0, i32 0, i32 0
+ %11 = load i32* %10, align 4
+
+%11 is fully redundant, an in BB2 it should have the value %8.
+
+GCC PR33344 is a similar case.
+
+//===---------------------------------------------------------------------===//
+
+There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the
+GCC testsuite. There are many pre testcases as ssa-pre-*.c
+
+//===---------------------------------------------------------------------===//
+
+There are some interesting cases in testsuite/gcc.dg/tree-ssa/pred-comm* in the
+GCC testsuite. For example, predcom-1.c is:
+
+ for (i = 2; i < 1000; i++)
+ fib[i] = (fib[i-1] + fib[i - 2]) & 0xffff;
+
+which compiles into:
+
+bb1: ; preds = %bb1, %bb1.thread
+ %indvar = phi i32 [ 0, %bb1.thread ], [ %0, %bb1 ]
+ %i.0.reg2mem.0 = add i32 %indvar, 2
+ %0 = add i32 %indvar, 1 ; <i32> [#uses=3]
+ %1 = getelementptr [1000 x i32]* @fib, i32 0, i32 %0
+ %2 = load i32* %1, align 4 ; <i32> [#uses=1]
+ %3 = getelementptr [1000 x i32]* @fib, i32 0, i32 %indvar
+ %4 = load i32* %3, align 4 ; <i32> [#uses=1]
+ %5 = add i32 %4, %2 ; <i32> [#uses=1]
+ %6 = and i32 %5, 65535 ; <i32> [#uses=1]
+ %7 = getelementptr [1000 x i32]* @fib, i32 0, i32 %i.0.reg2mem.0
+ store i32 %6, i32* %7, align 4
+ %exitcond = icmp eq i32 %0, 998 ; <i1> [#uses=1]
+ br i1 %exitcond, label %return, label %bb1
+
+This is basically:
+ LOAD fib[i+1]
+ LOAD fib[i]
+ STORE fib[i+2]
+
+instead of handling this as a loop or other xform, all we'd need to do is teach
+load PRE to phi translate the %0 add (i+1) into the predecessor as (i'+1+1) =
+(i'+2) (where i' is the previous iteration of i). This would find the store
+which feeds it.
+
+predcom-2.c is apparently the same as predcom-1.c
+predcom-3.c is very similar but needs loads feeding each other instead of
+store->load.
+predcom-4.c seems the same as the rest.
+
+
+//===---------------------------------------------------------------------===//
+
+Other simple load PRE cases:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35287 [LPRE crit edge splitting]
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34677 (licm does this, LPRE crit edge)
+ llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as | opt -mem2reg -simplifycfg -gvn | llvm-dis
+
+//===---------------------------------------------------------------------===//
+
+Type based alias analysis:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14705
+
+//===---------------------------------------------------------------------===//
+
+When GVN/PRE finds a store of float* to a must aliases pointer when expecting
+an int*, it should turn it into a bitcast. This is a nice generalization of
+the SROA hack that would apply to other cases, e.g.:
+
+int foo(int C, int *P, float X) {
+ if (C) {
+ bar();
+ *P = 42;
+ } else
+ *(float*)P = X;
+
+ return *P;
+}
+
+
+One example (that requires crazy phi translation) is:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16799 [BITCAST PHI TRANS]
+
+//===---------------------------------------------------------------------===//
+
+A/B get pinned to the stack because we turn an if/then into a select instead
+of PRE'ing the load/store. This may be fixable in instcombine:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37892
+
+
+
+Interesting missed case because of control flow flattening (should be 2 loads):
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26629
+With: llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as |
+ opt -mem2reg -gvn -instcombine | llvm-dis
+we miss it because we need 1) GEP PHI TRAN, 2) CRIT EDGE 3) MULTIPLE DIFFERENT
+VALS PRODUCED BY ONE BLOCK OVER DIFFERENT PATHS
+
+//===---------------------------------------------------------------------===//
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19633
+We could eliminate the branch condition here, loading from null is undefined:
+
+struct S { int w, x, y, z; };
+struct T { int r; struct S s; };
+void bar (struct S, int);
+void foo (int a, struct T b)
+{
+ struct S *c = 0;
+ if (a)
+ c = &b.s;
+ bar (*c, a);
+}
+
+//===---------------------------------------------------------------------===//
+
+simplifylibcalls should do several optimizations for strspn/strcspn:
+
+strcspn(x, "") -> strlen(x)
+strcspn("", x) -> 0
+strspn("", x) -> 0
+strspn(x, "") -> strlen(x)
+strspn(x, "a") -> strchr(x, 'a')-x
+
+strcspn(x, "a") -> inlined loop for up to 3 letters (similarly for strspn):
+
+size_t __strcspn_c3 (__const char *__s, int __reject1, int __reject2,
+ int __reject3) {
+ register size_t __result = 0;
+ while (__s[__result] != '\0' && __s[__result] != __reject1 &&
+ __s[__result] != __reject2 && __s[__result] != __reject3)
+ ++__result;
+ return __result;
+}
+
+This should turn into a switch on the character. See PR3253 for some notes on
+codegen.
+
+456.hmmer apparently uses strcspn and strspn a lot. 471.omnetpp uses strspn.
+
+//===---------------------------------------------------------------------===//
+
+"gas" uses this idiom:
+ else if (strchr ("+-/*%|&^:[]()~", *intel_parser.op_string))
+..
+ else if (strchr ("<>", *intel_parser.op_string)
+
+Those should be turned into a switch.
+
+//===---------------------------------------------------------------------===//
+
+252.eon contains this interesting code:
+
+ %3072 = getelementptr [100 x i8]* %tempString, i32 0, i32 0
+ %3073 = call i8* @strcpy(i8* %3072, i8* %3071) nounwind
+ %strlen = call i32 @strlen(i8* %3072) ; uses = 1
+ %endptr = getelementptr [100 x i8]* %tempString, i32 0, i32 %strlen
+ call void @llvm.memcpy.i32(i8* %endptr,
+ i8* getelementptr ([5 x i8]* @"\01LC42", i32 0, i32 0), i32 5, i32 1)
+ %3074 = call i32 @strlen(i8* %endptr) nounwind readonly
+
+This is interesting for a couple reasons. First, in this:
+
+ %3073 = call i8* @strcpy(i8* %3072, i8* %3071) nounwind
+ %strlen = call i32 @strlen(i8* %3072)
+
+The strlen could be replaced with: %strlen = sub %3072, %3073, because the
+strcpy call returns a pointer to the end of the string. Based on that, the
+endptr GEP just becomes equal to 3073, which eliminates a strlen call and GEP.
+
+Second, the memcpy+strlen strlen can be replaced with:
+
+ %3074 = call i32 @strlen([5 x i8]* @"\01LC42") nounwind readonly
+
+Because the destination was just copied into the specified memory buffer. This,
+in turn, can be constant folded to "4".
+
+In other code, it contains:
+
+ %endptr6978 = bitcast i8* %endptr69 to i32*
+ store i32 7107374, i32* %endptr6978, align 1
+ %3167 = call i32 @strlen(i8* %endptr69) nounwind readonly
+
+Which could also be constant folded. Whatever is producing this should probably
+be fixed to leave this as a memcpy from a string.
+
+Further, eon also has an interesting partially redundant strlen call:
+
+bb8: ; preds = %_ZN18eonImageCalculatorC1Ev.exit
+ %682 = getelementptr i8** %argv, i32 6 ; <i8**> [#uses=2]
+ %683 = load i8** %682, align 4 ; <i8*> [#uses=4]
+ %684 = load i8* %683, align 1 ; <i8> [#uses=1]
+ %685 = icmp eq i8 %684, 0 ; <i1> [#uses=1]
+ br i1 %685, label %bb10, label %bb9
+
+bb9: ; preds = %bb8
+ %686 = call i32 @strlen(i8* %683) nounwind readonly
+ %687 = icmp ugt i32 %686, 254 ; <i1> [#uses=1]
+ br i1 %687, label %bb10, label %bb11
+
+bb10: ; preds = %bb9, %bb8
+ %688 = call i32 @strlen(i8* %683) nounwind readonly
+
+This could be eliminated by doing the strlen once in bb8, saving code size and
+improving perf on the bb8->9->10 path.
+
+//===---------------------------------------------------------------------===//
+
+I see an interesting fully redundant call to strlen left in 186.crafty:InputMove
+which looks like:
+ %movetext11 = getelementptr [128 x i8]* %movetext, i32 0, i32 0
+
+
+bb62: ; preds = %bb55, %bb53
+ %promote.0 = phi i32 [ %169, %bb55 ], [ 0, %bb53 ]
+ %171 = call i32 @strlen(i8* %movetext11) nounwind readonly align 1
+ %172 = add i32 %171, -1 ; <i32> [#uses=1]
+ %173 = getelementptr [128 x i8]* %movetext, i32 0, i32 %172
+
+... no stores ...
+ br i1 %or.cond, label %bb65, label %bb72
+
+bb65: ; preds = %bb62
+ store i8 0, i8* %173, align 1
+ br label %bb72
+
+bb72: ; preds = %bb65, %bb62
+ %trank.1 = phi i32 [ %176, %bb65 ], [ -1, %bb62 ]
+ %177 = call i32 @strlen(i8* %movetext11) nounwind readonly align 1
+
+Note that on the bb62->bb72 path, that the %177 strlen call is partially
+redundant with the %171 call. At worst, we could shove the %177 strlen call
+up into the bb65 block moving it out of the bb62->bb72 path. However, note
+that bb65 stores to the string, zeroing out the last byte. This means that on
+that path the value of %177 is actually just %171-1. A sub is cheaper than a
+strlen!
+
+This pattern repeats several times, basically doing:
+
+ A = strlen(P);
+ P[A-1] = 0;
+ B = strlen(P);
+ where it is "obvious" that B = A-1.
+
+//===---------------------------------------------------------------------===//
+
+186.crafty contains this interesting pattern:
+
+%77 = call i8* @strstr(i8* getelementptr ([6 x i8]* @"\01LC5", i32 0, i32 0),
+ i8* %30)
+%phitmp648 = icmp eq i8* %77, getelementptr ([6 x i8]* @"\01LC5", i32 0, i32 0)
+br i1 %phitmp648, label %bb70, label %bb76
+
+bb70: ; preds = %OptionMatch.exit91, %bb69
+ %78 = call i32 @strlen(i8* %30) nounwind readonly align 1 ; <i32> [#uses=1]
+
+This is basically:
+ cststr = "abcdef";
+ if (strstr(cststr, P) == cststr) {
+ x = strlen(P);
+ ...
+
+The strstr call would be significantly cheaper written as:
+
+cststr = "abcdef";
+if (memcmp(P, str, strlen(P)))
+ x = strlen(P);
+
+This is memcmp+strlen instead of strstr. This also makes the strlen fully
+redundant.
+
+//===---------------------------------------------------------------------===//
+
+186.crafty also contains this code:
+
+%1906 = call i32 @strlen(i8* getelementptr ([32 x i8]* @pgn_event, i32 0,i32 0))
+%1907 = getelementptr [32 x i8]* @pgn_event, i32 0, i32 %1906
+%1908 = call i8* @strcpy(i8* %1907, i8* %1905) nounwind align 1
+%1909 = call i32 @strlen(i8* getelementptr ([32 x i8]* @pgn_event, i32 0,i32 0))
+%1910 = getelementptr [32 x i8]* @pgn_event, i32 0, i32 %1909
+
+The last strlen is computable as 1908-@pgn_event, which means 1910=1908.
+
+//===---------------------------------------------------------------------===//
+
+186.crafty has this interesting pattern with the "out.4543" variable:
+
+call void @llvm.memcpy.i32(
+ i8* getelementptr ([10 x i8]* @out.4543, i32 0, i32 0),
+ i8* getelementptr ([7 x i8]* @"\01LC28700", i32 0, i32 0), i32 7, i32 1)
+%101 = call@printf(i8* ... @out.4543, i32 0, i32 0)) nounwind
+
+It is basically doing:
+
+ memcpy(globalarray, "string");
+ printf(..., globalarray);
+
+Anyway, by knowing that printf just reads the memory and forward substituting
+the string directly into the printf, this eliminates reads from globalarray.
+Since this pattern occurs frequently in crafty (due to the "DisplayTime" and
+other similar functions) there are many stores to "out". Once all the printfs
+stop using "out", all that is left is the memcpy's into it. This should allow
+globalopt to remove the "stored only" global.
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+define inreg i32 @foo(i8* inreg %p) nounwind {
+ %tmp0 = load i8* %p
+ %tmp1 = ashr i8 %tmp0, 5
+ %tmp2 = sext i8 %tmp1 to i32
+ ret i32 %tmp2
+}
+
+could be dagcombine'd to a sign-extending load with a shift.
+For example, on x86 this currently gets this:
+
+ movb (%eax), %al
+ sarb $5, %al
+ movsbl %al, %eax
+
+while it could get this:
+
+ movsbl (%eax), %eax
+ sarl $5, %eax
+
+//===---------------------------------------------------------------------===//
+
+GCC PR31029:
+
+int test(int x) { return 1-x == x; } // --> return false
+int test2(int x) { return 2-x == x; } // --> return x == 1 ?
+
+Always foldable for odd constants, what is the rule for even?
+
+//===---------------------------------------------------------------------===//
+
+PR 3381: GEP to field of size 0 inside a struct could be turned into GEP
+for next field in struct (which is at same address).
+
+For example: store of float into { {{}}, float } could be turned into a store to
+the float directly.
+
+//===---------------------------------------------------------------------===//
+
+#include <math.h>
+double foo(double a) { return sin(a); }
+
+This compiles into this on x86-64 Linux:
+foo:
+ subq $8, %rsp
+ call sin
+ addq $8, %rsp
+ ret
+vs:
+
+foo:
+ jmp sin
+
+//===---------------------------------------------------------------------===//
+
+The arg promotion pass should make use of nocapture to make its alias analysis
+stuff much more precise.
+
+//===---------------------------------------------------------------------===//
+
+The following functions should be optimized to use a select instead of a
+branch (from gcc PR40072):
+
+char char_int(int m) {if(m>7) return 0; return m;}
+int int_char(char m) {if(m>7) return 0; return m;}
+
+//===---------------------------------------------------------------------===//
+
+Instcombine should replace the load with a constant in:
+
+ static const char x[4] = {'a', 'b', 'c', 'd'};
+
+ unsigned int y(void) {
+ return *(unsigned int *)x;
+ }
+
+It currently only does this transformation when the size of the constant
+is the same as the size of the integer (so, try x[5]) and the last byte
+is a null (making it a C string). There's no need for these restrictions.
+
+//===---------------------------------------------------------------------===//
+
+InstCombine's "turn load from constant into constant" optimization should be
+more aggressive in the presence of bitcasts. For example, because of unions,
+this code:
+
+union vec2d {
+ double e[2];
+ double v __attribute__((vector_size(16)));
+};
+typedef union vec2d vec2d;
+
+static vec2d a={{1,2}}, b={{3,4}};
+
+vec2d foo () {
+ return (vec2d){ .v = a.v + b.v * (vec2d){{5,5}}.v };
+}
+
+Compiles into:
+
+@a = internal constant %0 { [2 x double]
+ [double 1.000000e+00, double 2.000000e+00] }, align 16
+@b = internal constant %0 { [2 x double]
+ [double 3.000000e+00, double 4.000000e+00] }, align 16
+...
+define void @foo(%struct.vec2d* noalias nocapture sret %agg.result) nounwind {
+entry:
+ %0 = load <2 x double>* getelementptr (%struct.vec2d*
+ bitcast (%0* @a to %struct.vec2d*), i32 0, i32 0), align 16
+ %1 = load <2 x double>* getelementptr (%struct.vec2d*
+ bitcast (%0* @b to %struct.vec2d*), i32 0, i32 0), align 16
+
+
+Instcombine should be able to optimize away the loads (and thus the globals).
+
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/AsmPrinter/CMakeLists.txt b/lib/Target/Sparc/AsmPrinter/CMakeLists.txt
new file mode 100644
index 0000000..394b4cd
--- /dev/null
+++ b/lib/Target/Sparc/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,9 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_partially_linked_object(LLVMSparcAsmPrinter
+ SparcAsmPrinter.cpp
+ )
+
+target_name_of_partially_linked_object(LLVMSparcCodeGen n)
+
+add_dependencies(LLVMSparcAsmPrinter ${n})
diff --git a/lib/Target/Sparc/AsmPrinter/Makefile b/lib/Target/Sparc/AsmPrinter/Makefile
new file mode 100644
index 0000000..f12a6ac
--- /dev/null
+++ b/lib/Target/Sparc/AsmPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/Sparc/Makefile ---------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMSparcAsmPrinter
+
+# Hack: we need to include 'main' Sparc target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
new file mode 100644
index 0000000..61707f5
--- /dev/null
+++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
@@ -0,0 +1,355 @@
+//===-- SparcAsmPrinter.cpp - Sparc LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format SPARC assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Sparc.h"
+#include "SparcInstrInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
+#include <cctype>
+#include <cstring>
+#include <map>
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+ class VISIBILITY_HIDDEN SparcAsmPrinter : public AsmPrinter {
+ /// We name each basic block in a Function with a unique number, so
+ /// that we can consistently refer to them later. This is cleared
+ /// at the beginning of each call to runOnMachineFunction().
+ ///
+ typedef std::map<const Value *, unsigned> ValueMapTy;
+ ValueMapTy NumberForBB;
+ public:
+ explicit SparcAsmPrinter(raw_ostream &O, TargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : AsmPrinter(O, TM, T, OL, V) {}
+
+ virtual const char *getPassName() const {
+ return "Sparc Assembly Printer";
+ }
+
+ void printModuleLevelGV(const GlobalVariable* GVar);
+ void printOperand(const MachineInstr *MI, int opNum);
+ void printMemOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier = 0);
+ void printCCOperand(const MachineInstr *MI, int opNum);
+
+ bool printInstruction(const MachineInstr *MI); // autogenerated.
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ };
+} // end of anonymous namespace
+
+#include "SparcGenAsmWriter.inc"
+
+/// createSparcCodePrinterPass - Returns a pass that prints the SPARC
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+///
+FunctionPass *llvm::createSparcCodePrinterPass(raw_ostream &o,
+ TargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose) {
+ return new SparcAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+}
+
+/// runOnMachineFunction - This uses the printInstruction()
+/// method to print assembly for each instruction.
+///
+bool SparcAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ this->MF = &MF;
+
+ SetupMachineFunction(MF);
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ // BBNumber is used here so that a given Printer will never give two
+ // BBs the same name. (If you have a better way, please let me know!)
+ static unsigned BBNumber = 0;
+
+ O << "\n\n";
+
+ // Print out the label for the function.
+ const Function *F = MF.getFunction();
+ SwitchToSection(TAI->SectionForGlobal(F));
+ EmitAlignment(4, F);
+ O << "\t.globl\t" << CurrentFnName << '\n';
+
+ printVisibility(CurrentFnName, F->getVisibility());
+
+ O << "\t.type\t" << CurrentFnName << ", #function\n";
+ O << CurrentFnName << ":\n";
+
+ // Number each basic block so that we can consistently refer to them
+ // in PC-relative references.
+ // FIXME: Why not use the MBB numbers?
+ NumberForBB.clear();
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ NumberForBB[I->getBasicBlock()] = BBNumber++;
+ }
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true, true);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ printInstruction(II);
+ ++EmittedInsts;
+ }
+ }
+
+ // We didn't modify anything.
+ return false;
+}
+
+void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
+ const MachineOperand &MO = MI->getOperand (opNum);
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+ bool CloseParen = false;
+ if (MI->getOpcode() == SP::SETHIi && !MO.isReg() && !MO.isImm()) {
+ O << "%hi(";
+ CloseParen = true;
+ } else if ((MI->getOpcode() == SP::ORri || MI->getOpcode() == SP::ADDri) &&
+ !MO.isReg() && !MO.isImm()) {
+ O << "%lo(";
+ CloseParen = true;
+ }
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ O << "%" << LowercaseString (RI.get(MO.getReg()).AsmName);
+ else
+ O << "%reg" << MO.getReg();
+ break;
+
+ case MachineOperand::MO_Immediate:
+ O << (int)MO.getImm();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB());
+ return;
+ case MachineOperand::MO_GlobalAddress:
+ {
+ const GlobalValue *GV = MO.getGlobal();
+ O << Mang->getValueName(GV);
+ }
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ O << MO.getSymbolName();
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+ << MO.getIndex();
+ break;
+ default:
+ O << "<unknown operand type>"; abort (); break;
+ }
+ if (CloseParen) O << ")";
+}
+
+void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier) {
+ printOperand(MI, opNum);
+
+ // If this is an ADD operand, emit it like normal operands.
+ if (Modifier && !strcmp(Modifier, "arith")) {
+ O << ", ";
+ printOperand(MI, opNum+1);
+ return;
+ }
+
+ if (MI->getOperand(opNum+1).isReg() &&
+ MI->getOperand(opNum+1).getReg() == SP::G0)
+ return; // don't print "+%g0"
+ if (MI->getOperand(opNum+1).isImm() &&
+ MI->getOperand(opNum+1).getImm() == 0)
+ return; // don't print "+0"
+
+ O << "+";
+ if (MI->getOperand(opNum+1).isGlobal() ||
+ MI->getOperand(opNum+1).isCPI()) {
+ O << "%lo(";
+ printOperand(MI, opNum+1);
+ O << ")";
+ } else {
+ printOperand(MI, opNum+1);
+ }
+}
+
+void SparcAsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) {
+ int CC = (int)MI->getOperand(opNum).getImm();
+ O << SPARCCondCodeToString((SPCC::CondCodes)CC);
+}
+
+bool SparcAsmPrinter::doInitialization(Module &M) {
+ Mang = new Mangler(M, "", TAI->getPrivateGlobalPrefix());
+ return false; // success
+}
+
+bool SparcAsmPrinter::doFinalization(Module &M) {
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ printModuleLevelGV(I);
+
+ O << '\n';
+
+ return AsmPrinter::doFinalization(M);
+}
+
+void SparcAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
+ const TargetData *TD = TM.getTargetData();
+
+ if (!GVar->hasInitializer())
+ return; // External global require no code
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GVar))
+ return;
+
+ O << "\n\n";
+ std::string name = Mang->getValueName(GVar);
+ Constant *C = GVar->getInitializer();
+ unsigned Size = TD->getTypeAllocSize(C->getType());
+ unsigned Align = TD->getPreferredAlignment(GVar);
+
+ printVisibility(name, GVar->getVisibility());
+
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+
+ if (C->isNullValue() && !GVar->hasSection()) {
+ if (!GVar->isThreadLocal() &&
+ (GVar->hasLocalLinkage() || GVar->isWeakForLinker())) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+
+ if (GVar->hasLocalLinkage())
+ O << "\t.local " << name << '\n';
+
+ O << TAI->getCOMMDirective() << name << ',' << Size;
+ if (TAI->getCOMMDirectiveTakesAlignment())
+ O << ',' << (1 << Align);
+
+ O << '\n';
+ return;
+ }
+ }
+
+ switch (GVar->getLinkage()) {
+ case GlobalValue::CommonLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage: // FIXME: Verify correct for weak.
+ case GlobalValue::WeakODRLinkage: // FIXME: Verify correct for weak.
+ // Nonnull linkonce -> weak
+ O << "\t.weak " << name << '\n';
+ break;
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol
+ O << TAI->getGlobalDirective() << name << '\n';
+ // FALL THROUGH
+ case GlobalValue::PrivateLinkage:
+ case GlobalValue::InternalLinkage:
+ break;
+ case GlobalValue::GhostLinkage:
+ cerr << "Should not have any unmaterialized functions!\n";
+ abort();
+ case GlobalValue::DLLImportLinkage:
+ cerr << "DLLImport linkage is not supported by this target!\n";
+ abort();
+ case GlobalValue::DLLExportLinkage:
+ cerr << "DLLExport linkage is not supported by this target!\n";
+ abort();
+ default:
+ assert(0 && "Unknown linkage type!");
+ }
+
+ EmitAlignment(Align, GVar);
+
+ if (TAI->hasDotTypeDotSizeDirective()) {
+ O << "\t.type " << name << ",#object\n";
+ O << "\t.size " << name << ',' << Size << '\n';
+ }
+
+ O << name << ":\n";
+ EmitGlobalConstant(C);
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'r':
+ break;
+ }
+ }
+
+ printOperand(MI, OpNo);
+
+ return false;
+}
+
+bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier
+
+ O << '[';
+ printMemOperand(MI, OpNo);
+ O << ']';
+
+ return false;
+}
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
new file mode 100644
index 0000000..eefa7e8
--- /dev/null
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -0,0 +1,23 @@
+set(LLVM_TARGET_DEFINITIONS Sparc.td)
+
+tablegen(SparcGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(SparcGenRegisterNames.inc -gen-register-enums)
+tablegen(SparcGenRegisterInfo.inc -gen-register-desc)
+tablegen(SparcGenInstrNames.inc -gen-instr-enums)
+tablegen(SparcGenInstrInfo.inc -gen-instr-desc)
+tablegen(SparcGenAsmWriter.inc -gen-asm-writer)
+tablegen(SparcGenDAGISel.inc -gen-dag-isel)
+tablegen(SparcGenSubtarget.inc -gen-subtarget)
+tablegen(SparcGenCallingConv.inc -gen-callingconv)
+
+add_llvm_target(SparcCodeGen
+ DelaySlotFiller.cpp
+ FPMover.cpp
+ SparcInstrInfo.cpp
+ SparcISelDAGToDAG.cpp
+ SparcISelLowering.cpp
+ SparcRegisterInfo.cpp
+ SparcSubtarget.cpp
+ SparcTargetAsmInfo.cpp
+ SparcTargetMachine.cpp
+ )
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
new file mode 100644
index 0000000..15b26c2
--- /dev/null
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -0,0 +1,76 @@
+//===-- DelaySlotFiller.cpp - SPARC delay slot filler ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a simple local pass that fills delay slots with NOPs.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "delayslotfiller"
+#include "Sparc.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(FilledSlots, "Number of delay slots filled");
+
+namespace {
+ struct Filler : public MachineFunctionPass {
+ /// Target machine description which we query for reg. names, data
+ /// layout, etc.
+ ///
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+
+ static char ID;
+ Filler(TargetMachine &tm)
+ : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+ virtual const char *getPassName() const {
+ return "SPARC Delay Slot Filler";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F) {
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+ }
+
+ };
+ char Filler::ID = 0;
+} // end of anonymous namespace
+
+/// createSparcDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in Sparc MachineFunctions
+///
+FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) {
+ return new Filler(tm);
+}
+
+/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
+/// Currently, we fill delay slots with NOPs. We assume there is only one
+/// delay slot per delayed instruction.
+///
+bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ if (I->getDesc().hasDelaySlot()) {
+ MachineBasicBlock::iterator J = I;
+ ++J;
+ BuildMI(MBB, J, DebugLoc::getUnknownLoc(), TII->get(SP::NOP));
+ ++FilledSlots;
+ Changed = true;
+ }
+ return Changed;
+}
diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp
new file mode 100644
index 0000000..f72a4c4
--- /dev/null
+++ b/lib/Target/Sparc/FPMover.cpp
@@ -0,0 +1,139 @@
+//===-- FPMover.cpp - Sparc double-precision floating point move fixer ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand FpMOVD/FpABSD/FpNEGD instructions into their single-precision pieces.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "fpmover"
+#include "Sparc.h"
+#include "SparcSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+STATISTIC(NumFpDs , "Number of instructions translated");
+STATISTIC(NoopFpDs, "Number of noop instructions removed");
+
+namespace {
+ struct FPMover : public MachineFunctionPass {
+ /// Target machine description which we query for reg. names, data
+ /// layout, etc.
+ ///
+ TargetMachine &TM;
+
+ static char ID;
+ explicit FPMover(TargetMachine &tm)
+ : MachineFunctionPass(&ID), TM(tm) { }
+
+ virtual const char *getPassName() const {
+ return "Sparc Double-FP Move Fixer";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F);
+ };
+ char FPMover::ID = 0;
+} // end of anonymous namespace
+
+/// createSparcFPMoverPass - Returns a pass that turns FpMOVD
+/// instructions into FMOVS instructions
+///
+FunctionPass *llvm::createSparcFPMoverPass(TargetMachine &tm) {
+ return new FPMover(tm);
+}
+
+/// getDoubleRegPair - Given a DFP register, return the even and odd FP
+/// registers that correspond to it.
+static void getDoubleRegPair(unsigned DoubleReg, unsigned &EvenReg,
+ unsigned &OddReg) {
+ static const unsigned EvenHalvesOfPairs[] = {
+ SP::F0, SP::F2, SP::F4, SP::F6, SP::F8, SP::F10, SP::F12, SP::F14,
+ SP::F16, SP::F18, SP::F20, SP::F22, SP::F24, SP::F26, SP::F28, SP::F30
+ };
+ static const unsigned OddHalvesOfPairs[] = {
+ SP::F1, SP::F3, SP::F5, SP::F7, SP::F9, SP::F11, SP::F13, SP::F15,
+ SP::F17, SP::F19, SP::F21, SP::F23, SP::F25, SP::F27, SP::F29, SP::F31
+ };
+ static const unsigned DoubleRegsInOrder[] = {
+ SP::D0, SP::D1, SP::D2, SP::D3, SP::D4, SP::D5, SP::D6, SP::D7, SP::D8,
+ SP::D9, SP::D10, SP::D11, SP::D12, SP::D13, SP::D14, SP::D15
+ };
+ for (unsigned i = 0; i < sizeof(DoubleRegsInOrder)/sizeof(unsigned); ++i)
+ if (DoubleRegsInOrder[i] == DoubleReg) {
+ EvenReg = EvenHalvesOfPairs[i];
+ OddReg = OddHalvesOfPairs[i];
+ return;
+ }
+ assert(0 && "Can't find reg");
+}
+
+/// runOnMachineBasicBlock - Fixup FpMOVD instructions in this MBB.
+///
+bool FPMover::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
+ MachineInstr *MI = I++;
+ DebugLoc dl = MI->getDebugLoc();
+ if (MI->getOpcode() == SP::FpMOVD || MI->getOpcode() == SP::FpABSD ||
+ MI->getOpcode() == SP::FpNEGD) {
+ Changed = true;
+ unsigned DestDReg = MI->getOperand(0).getReg();
+ unsigned SrcDReg = MI->getOperand(1).getReg();
+ if (DestDReg == SrcDReg && MI->getOpcode() == SP::FpMOVD) {
+ MBB.erase(MI); // Eliminate the noop copy.
+ ++NoopFpDs;
+ continue;
+ }
+
+ unsigned EvenSrcReg = 0, OddSrcReg = 0, EvenDestReg = 0, OddDestReg = 0;
+ getDoubleRegPair(DestDReg, EvenDestReg, OddDestReg);
+ getDoubleRegPair(SrcDReg, EvenSrcReg, OddSrcReg);
+
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ if (MI->getOpcode() == SP::FpMOVD)
+ MI->setDesc(TII->get(SP::FMOVS));
+ else if (MI->getOpcode() == SP::FpNEGD)
+ MI->setDesc(TII->get(SP::FNEGS));
+ else if (MI->getOpcode() == SP::FpABSD)
+ MI->setDesc(TII->get(SP::FABSS));
+ else
+ assert(0 && "Unknown opcode!");
+
+ MI->getOperand(0).setReg(EvenDestReg);
+ MI->getOperand(1).setReg(EvenSrcReg);
+ DOUT << "FPMover: the modified instr is: " << *MI;
+ // Insert copy for the other half of the double.
+ if (DestDReg != SrcDReg) {
+ MI = BuildMI(MBB, I, dl, TM.getInstrInfo()->get(SP::FMOVS), OddDestReg)
+ .addReg(OddSrcReg);
+ DOUT << "FPMover: the inserted instr is: " << *MI;
+ }
+ ++NumFpDs;
+ }
+ }
+ return Changed;
+}
+
+bool FPMover::runOnMachineFunction(MachineFunction &F) {
+ // If the target has V9 instructions, the fp-mover pseudos will never be
+ // emitted. Avoid a scan of the instructions to improve compile time.
+ if (TM.getSubtarget<SparcSubtarget>().isV9())
+ return false;
+
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+}
diff --git a/lib/Target/Sparc/Makefile b/lib/Target/Sparc/Makefile
new file mode 100644
index 0000000..fdf6afa
--- /dev/null
+++ b/lib/Target/Sparc/Makefile
@@ -0,0 +1,22 @@
+##===- lib/Target/Sparc/Makefile ---------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMSparcCodeGen
+TARGET = Sparc
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = SparcGenRegisterInfo.h.inc SparcGenRegisterNames.inc \
+ SparcGenRegisterInfo.inc SparcGenInstrNames.inc \
+ SparcGenInstrInfo.inc SparcGenAsmWriter.inc \
+ SparcGenDAGISel.inc SparcGenSubtarget.inc SparcGenCallingConv.inc
+
+DIRS = AsmPrinter
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Target/Sparc/README.txt b/lib/Target/Sparc/README.txt
new file mode 100644
index 0000000..cc24abf
--- /dev/null
+++ b/lib/Target/Sparc/README.txt
@@ -0,0 +1,58 @@
+
+To-do
+-----
+
+* Keep the address of the constant pool in a register instead of forming its
+ address all of the time.
+* We can fold small constant offsets into the %hi/%lo references to constant
+ pool addresses as well.
+* When in V9 mode, register allocate %icc[0-3].
+* Add support for isel'ing UMUL_LOHI instead of marking it as Expand.
+* Emit the 'Branch on Integer Register with Prediction' instructions. It's
+ not clear how to write a pattern for this though:
+
+float %t1(int %a, int* %p) {
+ %C = seteq int %a, 0
+ br bool %C, label %T, label %F
+T:
+ store int 123, int* %p
+ br label %F
+F:
+ ret float undef
+}
+
+codegens to this:
+
+t1:
+ save -96, %o6, %o6
+1) subcc %i0, 0, %l0
+1) bne .LBBt1_2 ! F
+ nop
+.LBBt1_1: ! T
+ or %g0, 123, %l0
+ st %l0, [%i1]
+.LBBt1_2: ! F
+ restore %g0, %g0, %g0
+ retl
+ nop
+
+1) should be replaced with a brz in V9 mode.
+
+* Same as above, but emit conditional move on register zero (p192) in V9
+ mode. Testcase:
+
+int %t1(int %a, int %b) {
+ %C = seteq int %a, 0
+ %D = select bool %C, int %a, int %b
+ ret int %D
+}
+
+* Emit MULX/[SU]DIVX instructions in V9 mode instead of fiddling
+ with the Y register, if they are faster.
+
+* Codegen bswap(load)/store(bswap) -> load/store ASI
+
+* Implement frame pointer elimination, e.g. eliminate save/restore for
+ leaf fns.
+* Fill delay slots
+
diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h
new file mode 100644
index 0000000..bb03f30
--- /dev/null
+++ b/lib/Target/Sparc/Sparc.h
@@ -0,0 +1,119 @@
+//===-- Sparc.h - Top-level interface for Sparc representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Sparc back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_SPARC_H
+#define TARGET_SPARC_H
+
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+
+namespace llvm {
+ class FunctionPass;
+ class SparcTargetMachine;
+ class raw_ostream;
+
+ FunctionPass *createSparcISelDag(SparcTargetMachine &TM);
+ FunctionPass *createSparcCodePrinterPass(raw_ostream &OS, TargetMachine &TM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose);
+ FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM);
+ FunctionPass *createSparcFPMoverPass(TargetMachine &TM);
+} // end namespace llvm;
+
+// Defines symbolic names for Sparc registers. This defines a mapping from
+// register name to register number.
+//
+#include "SparcGenRegisterNames.inc"
+
+// Defines symbolic names for the Sparc instructions.
+//
+#include "SparcGenInstrNames.inc"
+
+
+namespace llvm {
+ // Enums corresponding to Sparc condition codes, both icc's and fcc's. These
+ // values must be kept in sync with the ones in the .td file.
+ namespace SPCC {
+ enum CondCodes {
+ //ICC_A = 8 , // Always
+ //ICC_N = 0 , // Never
+ ICC_NE = 9 , // Not Equal
+ ICC_E = 1 , // Equal
+ ICC_G = 10 , // Greater
+ ICC_LE = 2 , // Less or Equal
+ ICC_GE = 11 , // Greater or Equal
+ ICC_L = 3 , // Less
+ ICC_GU = 12 , // Greater Unsigned
+ ICC_LEU = 4 , // Less or Equal Unsigned
+ ICC_CC = 13 , // Carry Clear/Great or Equal Unsigned
+ ICC_CS = 5 , // Carry Set/Less Unsigned
+ ICC_POS = 14 , // Positive
+ ICC_NEG = 6 , // Negative
+ ICC_VC = 15 , // Overflow Clear
+ ICC_VS = 7 , // Overflow Set
+
+ //FCC_A = 8+16, // Always
+ //FCC_N = 0+16, // Never
+ FCC_U = 7+16, // Unordered
+ FCC_G = 6+16, // Greater
+ FCC_UG = 5+16, // Unordered or Greater
+ FCC_L = 4+16, // Less
+ FCC_UL = 3+16, // Unordered or Less
+ FCC_LG = 2+16, // Less or Greater
+ FCC_NE = 1+16, // Not Equal
+ FCC_E = 9+16, // Equal
+ FCC_UE = 10+16, // Unordered or Equal
+ FCC_GE = 11+16, // Greater or Equal
+ FCC_UGE = 12+16, // Unordered or Greater or Equal
+ FCC_LE = 13+16, // Less or Equal
+ FCC_ULE = 14+16, // Unordered or Less or Equal
+ FCC_O = 15+16 // Ordered
+ };
+ }
+
+ inline static const char *SPARCCondCodeToString(SPCC::CondCodes CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown condition code");
+ case SPCC::ICC_NE: return "ne";
+ case SPCC::ICC_E: return "e";
+ case SPCC::ICC_G: return "g";
+ case SPCC::ICC_LE: return "le";
+ case SPCC::ICC_GE: return "ge";
+ case SPCC::ICC_L: return "l";
+ case SPCC::ICC_GU: return "gu";
+ case SPCC::ICC_LEU: return "leu";
+ case SPCC::ICC_CC: return "cc";
+ case SPCC::ICC_CS: return "cs";
+ case SPCC::ICC_POS: return "pos";
+ case SPCC::ICC_NEG: return "neg";
+ case SPCC::ICC_VC: return "vc";
+ case SPCC::ICC_VS: return "vs";
+ case SPCC::FCC_U: return "u";
+ case SPCC::FCC_G: return "g";
+ case SPCC::FCC_UG: return "ug";
+ case SPCC::FCC_L: return "l";
+ case SPCC::FCC_UL: return "ul";
+ case SPCC::FCC_LG: return "lg";
+ case SPCC::FCC_NE: return "ne";
+ case SPCC::FCC_E: return "e";
+ case SPCC::FCC_UE: return "ue";
+ case SPCC::FCC_GE: return "ge";
+ case SPCC::FCC_UGE: return "uge";
+ case SPCC::FCC_LE: return "le";
+ case SPCC::FCC_ULE: return "ule";
+ case SPCC::FCC_O: return "o";
+ }
+ }
+} // end namespace llvm
+#endif
diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td
new file mode 100644
index 0000000..53ea8f4
--- /dev/null
+++ b/lib/Target/Sparc/Sparc.td
@@ -0,0 +1,76 @@
+//===- Sparc.td - Describe the Sparc Target Machine -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// SPARC Subtarget features.
+//
+
+def FeatureV9
+ : SubtargetFeature<"v9", "IsV9", "true",
+ "Enable SPARC-V9 instructions">;
+def FeatureV8Deprecated
+ : SubtargetFeature<"deprecated-v8", "V8DeprecatedInsts", "true",
+ "Enable deprecated V8 instructions in V9 mode">;
+def FeatureVIS
+ : SubtargetFeature<"vis", "IsVIS", "true",
+ "Enable UltraSPARC Visual Instruction Set extensions">;
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "SparcRegisterInfo.td"
+include "SparcCallingConv.td"
+include "SparcInstrInfo.td"
+
+def SparcInstrInfo : InstrInfo {
+ // Define how we want to layout our target-specific information field.
+ let TSFlagsFields = [];
+ let TSFlagsShifts = [];
+}
+
+//===----------------------------------------------------------------------===//
+// SPARC processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+def : Proc<"v8", []>;
+def : Proc<"supersparc", []>;
+def : Proc<"sparclite", []>;
+def : Proc<"f934", []>;
+def : Proc<"hypersparc", []>;
+def : Proc<"sparclite86x", []>;
+def : Proc<"sparclet", []>;
+def : Proc<"tsc701", []>;
+def : Proc<"v9", [FeatureV9]>;
+def : Proc<"ultrasparc", [FeatureV9, FeatureV8Deprecated]>;
+def : Proc<"ultrasparc3", [FeatureV9, FeatureV8Deprecated]>;
+def : Proc<"ultrasparc3-vis", [FeatureV9, FeatureV8Deprecated, FeatureVIS]>;
+
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def Sparc : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = SparcInstrInfo;
+}
diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td
new file mode 100644
index 0000000..33ecfdf
--- /dev/null
+++ b/lib/Target/Sparc/SparcCallingConv.td
@@ -0,0 +1,32 @@
+//===- SparcCallingConv.td - Calling Conventions Sparc -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the Sparc architectures.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Sparc 32-bit C return-value convention.
+def RetCC_Sparc32 : CallingConv<[
+ CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+ CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1]>>
+]>;
+
+// Sparc 32-bit C Calling convention.
+def CC_Sparc32 : CallingConv<[
+ // All arguments get passed in integer registers if there is space.
+ CCIfType<[i32, f32, f64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+
+ // Alternatively, they are assigned to the stack in 4-byte aligned units.
+ CCAssignToStack<4, 4>
+]>;
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
new file mode 100644
index 0000000..c9bd62d
--- /dev/null
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -0,0 +1,215 @@
+//===-- SparcISelDAGToDAG.cpp - A dag to dag inst selector for Sparc ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the SPARC target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcISelLowering.h"
+#include "SparcTargetMachine.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===--------------------------------------------------------------------===//
+/// SparcDAGToDAGISel - SPARC specific code to select SPARC machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+class SparcDAGToDAGISel : public SelectionDAGISel {
+ /// Subtarget - Keep a pointer to the Sparc Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const SparcSubtarget &Subtarget;
+public:
+ explicit SparcDAGToDAGISel(SparcTargetMachine &TM)
+ : SelectionDAGISel(TM),
+ Subtarget(TM.getSubtarget<SparcSubtarget>()) {
+ }
+
+ SDNode *Select(SDValue Op);
+
+ // Complex Pattern Selectors.
+ bool SelectADDRrr(SDValue Op, SDValue N, SDValue &R1, SDValue &R2);
+ bool SelectADDRri(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Offset);
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+
+ /// InstructionSelect - This callback is invoked by
+ /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+ virtual void InstructionSelect();
+
+ virtual const char *getPassName() const {
+ return "SPARC DAG->DAG Pattern Instruction Selection";
+ }
+
+ // Include the pieces autogenerated from the target description.
+#include "SparcGenDAGISel.inc"
+};
+} // end anonymous namespace
+
+/// InstructionSelect - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void SparcDAGToDAGISel::InstructionSelect() {
+ DEBUG(BB->dump());
+
+ // Select target instructions for the DAG.
+ SelectRoot(*CurDAG);
+ CurDAG->RemoveDeadNodes();
+}
+
+bool SparcDAGToDAGISel::SelectADDRri(SDValue Op, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // direct calls.
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+ if (Predicate_simm13(CN)) {
+ if (FrameIndexSDNode *FIN =
+ dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
+ // Constant offset from frame ref.
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ } else {
+ Base = Addr.getOperand(0);
+ }
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ if (Addr.getOperand(0).getOpcode() == SPISD::Lo) {
+ Base = Addr.getOperand(1);
+ Offset = Addr.getOperand(0).getOperand(0);
+ return true;
+ }
+ if (Addr.getOperand(1).getOpcode() == SPISD::Lo) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1).getOperand(0);
+ return true;
+ }
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool SparcDAGToDAGISel::SelectADDRrr(SDValue Op, SDValue Addr,
+ SDValue &R1, SDValue &R2) {
+ if (Addr.getOpcode() == ISD::FrameIndex) return false;
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // direct calls.
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (isa<ConstantSDNode>(Addr.getOperand(1)) &&
+ Predicate_simm13(Addr.getOperand(1).getNode()))
+ return false; // Let the reg+imm pattern catch this!
+ if (Addr.getOperand(0).getOpcode() == SPISD::Lo ||
+ Addr.getOperand(1).getOpcode() == SPISD::Lo)
+ return false; // Let the reg+imm pattern catch this!
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ return true;
+ }
+
+ R1 = Addr;
+ R2 = CurDAG->getRegister(SP::G0, MVT::i32);
+ return true;
+}
+
+SDNode *SparcDAGToDAGISel::Select(SDValue Op) {
+ SDNode *N = Op.getNode();
+ DebugLoc dl = N->getDebugLoc();
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::SDIV:
+ case ISD::UDIV: {
+ // FIXME: should use a custom expander to expose the SRA to the dag.
+ SDValue DivLHS = N->getOperand(0);
+ SDValue DivRHS = N->getOperand(1);
+
+ // Set the Y register to the high-part.
+ SDValue TopPart;
+ if (N->getOpcode() == ISD::SDIV) {
+ TopPart = SDValue(CurDAG->getTargetNode(SP::SRAri, dl, MVT::i32, DivLHS,
+ CurDAG->getTargetConstant(31, MVT::i32)), 0);
+ } else {
+ TopPart = CurDAG->getRegister(SP::G0, MVT::i32);
+ }
+ TopPart = SDValue(CurDAG->getTargetNode(SP::WRYrr, dl, MVT::Flag, TopPart,
+ CurDAG->getRegister(SP::G0, MVT::i32)), 0);
+
+ // FIXME: Handle div by immediate.
+ unsigned Opcode = N->getOpcode() == ISD::SDIV ? SP::SDIVrr : SP::UDIVrr;
+ return CurDAG->SelectNodeTo(N, Opcode, MVT::i32, DivLHS, DivRHS,
+ TopPart);
+ }
+ case ISD::MULHU:
+ case ISD::MULHS: {
+ // FIXME: Handle mul by immediate.
+ SDValue MulLHS = N->getOperand(0);
+ SDValue MulRHS = N->getOperand(1);
+ unsigned Opcode = N->getOpcode() == ISD::MULHU ? SP::UMULrr : SP::SMULrr;
+ SDNode *Mul = CurDAG->getTargetNode(Opcode, dl, MVT::i32, MVT::Flag,
+ MulLHS, MulRHS);
+ // The high part is in the Y register.
+ return CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, SDValue(Mul, 1));
+ return NULL;
+ }
+ }
+
+ return SelectCode(Op);
+}
+
+
+/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+/// inline asm expressions.
+bool
+SparcDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ if (!SelectADDRrr(Op, Op, Op0, Op1))
+ SelectADDRri(Op, Op, Op0, Op1);
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+}
+
+/// createSparcISelDag - This pass converts a legalized DAG into a
+/// SPARC-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createSparcISelDag(SparcTargetMachine &TM) {
+ return new SparcDAGToDAGISel(TM);
+}
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
new file mode 100644
index 0000000..3ec7e06
--- /dev/null
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -0,0 +1,1049 @@
+//===-- SparcISelLowering.cpp - Sparc DAG Lowering Implementation ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that Sparc uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcISelLowering.h"
+#include "SparcTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/VectorExtras.h"
+using namespace llvm;
+
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "SparcGenCallingConv.inc"
+
+static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) {
+ // CCValAssign - represent the assignment of the return value to locations.
+ SmallVector<CCValAssign, 16> RVLocs;
+ unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CC, isVarArg, DAG.getTarget(), RVLocs);
+
+ // Analize return values of ISD::RET
+ CCInfo.AnalyzeReturn(Op.getNode(), RetCC_Sparc32);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ // ISD::RET => ret chain, (regnum1,val1), ...
+ // So i*2+1 index only the regnums.
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ Op.getOperand(i*2+1), Flag);
+
+ // Guarantee that all emitted copies are stuck together with flags.
+ Flag = Chain.getValue(1);
+ }
+
+ if (Flag.getNode())
+ return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+ return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+/// LowerArguments - V8 uses a very simple ABI, where all values are passed in
+/// either one or two GPRs, including FP values. TODO: we should pass FP values
+/// in FP registers for fastcc functions.
+void
+SparcTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &ArgValues,
+ DebugLoc dl) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ static const unsigned ArgRegs[] = {
+ SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+ };
+
+ const unsigned *CurArgReg = ArgRegs, *ArgRegEnd = ArgRegs+6;
+ unsigned ArgOffset = 68;
+
+ SDValue Root = DAG.getRoot();
+ std::vector<SDValue> OutChains;
+
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
+ MVT ObjectVT = getValueType(I->getType());
+
+ switch (ObjectVT.getSimpleVT()) {
+ default: assert(0 && "Unhandled argument type!");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ if (I->use_empty()) { // Argument is dead.
+ if (CurArgReg < ArgRegEnd) ++CurArgReg;
+ ArgValues.push_back(DAG.getUNDEF(ObjectVT));
+ } else if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR
+ unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ MF.getRegInfo().addLiveIn(*CurArgReg++, VReg);
+ SDValue Arg = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32);
+ if (ObjectVT != MVT::i32) {
+ unsigned AssertOp = ISD::AssertSext;
+ Arg = DAG.getNode(AssertOp, dl, MVT::i32, Arg,
+ DAG.getValueType(ObjectVT));
+ Arg = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Arg);
+ }
+ ArgValues.push_back(Arg);
+ } else {
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset);
+ SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+ SDValue Load;
+ if (ObjectVT == MVT::i32) {
+ Load = DAG.getLoad(MVT::i32, dl, Root, FIPtr, NULL, 0);
+ } else {
+ ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
+
+ // Sparc is big endian, so add an offset based on the ObjectVT.
+ unsigned Offset = 4-std::max(1U, ObjectVT.getSizeInBits()/8);
+ FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
+ DAG.getConstant(Offset, MVT::i32));
+ Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Root, FIPtr,
+ NULL, 0, ObjectVT);
+ Load = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Load);
+ }
+ ArgValues.push_back(Load);
+ }
+
+ ArgOffset += 4;
+ break;
+ case MVT::f32:
+ if (I->use_empty()) { // Argument is dead.
+ if (CurArgReg < ArgRegEnd) ++CurArgReg;
+ ArgValues.push_back(DAG.getUNDEF(ObjectVT));
+ } else if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR
+ // FP value is passed in an integer register.
+ unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ MF.getRegInfo().addLiveIn(*CurArgReg++, VReg);
+ SDValue Arg = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32);
+
+ Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Arg);
+ ArgValues.push_back(Arg);
+ } else {
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset);
+ SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+ SDValue Load = DAG.getLoad(MVT::f32, dl, Root, FIPtr, NULL, 0);
+ ArgValues.push_back(Load);
+ }
+ ArgOffset += 4;
+ break;
+
+ case MVT::i64:
+ case MVT::f64:
+ if (I->use_empty()) { // Argument is dead.
+ if (CurArgReg < ArgRegEnd) ++CurArgReg;
+ if (CurArgReg < ArgRegEnd) ++CurArgReg;
+ ArgValues.push_back(DAG.getUNDEF(ObjectVT));
+ } else {
+ SDValue HiVal;
+ if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR
+ unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ MF.getRegInfo().addLiveIn(*CurArgReg++, VRegHi);
+ HiVal = DAG.getCopyFromReg(Root, dl, VRegHi, MVT::i32);
+ } else {
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset);
+ SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+ HiVal = DAG.getLoad(MVT::i32, dl, Root, FIPtr, NULL, 0);
+ }
+
+ SDValue LoVal;
+ if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR
+ unsigned VRegLo = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ MF.getRegInfo().addLiveIn(*CurArgReg++, VRegLo);
+ LoVal = DAG.getCopyFromReg(Root, dl, VRegLo, MVT::i32);
+ } else {
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4);
+ SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+ LoVal = DAG.getLoad(MVT::i32, dl, Root, FIPtr, NULL, 0);
+ }
+
+ // Compose the two halves together into an i64 unit.
+ SDValue WholeValue =
+ DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
+
+ // If we want a double, do a bit convert.
+ if (ObjectVT == MVT::f64)
+ WholeValue = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, WholeValue);
+
+ ArgValues.push_back(WholeValue);
+ }
+ ArgOffset += 8;
+ break;
+ }
+ }
+
+ // Store remaining ArgRegs to the stack if this is a varargs function.
+ if (F.isVarArg()) {
+ // Remember the vararg offset for the va_start implementation.
+ VarArgsFrameOffset = ArgOffset;
+
+ for (; CurArgReg != ArgRegEnd; ++CurArgReg) {
+ unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ MF.getRegInfo().addLiveIn(*CurArgReg, VReg);
+ SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32);
+
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset);
+ SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+
+ OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0));
+ ArgOffset += 4;
+ }
+ }
+
+ if (!OutChains.empty())
+ DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size()));
+}
+
+static SDValue LowerCALL(SDValue Op, SelectionDAG &DAG) {
+ CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+ unsigned CallingConv = TheCall->getCallingConv();
+ SDValue Chain = TheCall->getChain();
+ SDValue Callee = TheCall->getCallee();
+ bool isVarArg = TheCall->isVarArg();
+ DebugLoc dl = TheCall->getDebugLoc();
+
+#if 0
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallingConv, isVarArg, DAG.getTarget(), ArgLocs);
+ CCInfo.AnalyzeCallOperands(Op.getNode(), CC_Sparc32);
+
+ // Get the size of the outgoing arguments stack space requirement.
+ unsigned ArgsSize = CCInfo.getNextStackOffset();
+ // FIXME: We can't use this until f64 is known to take two GPRs.
+#else
+ (void)CC_Sparc32;
+
+ // Count the size of the outgoing arguments.
+ unsigned ArgsSize = 0;
+ for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; ++i) {
+ switch (TheCall->getArg(i).getValueType().getSimpleVT()) {
+ default: assert(0 && "Unknown value type!");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::f32:
+ ArgsSize += 4;
+ break;
+ case MVT::i64:
+ case MVT::f64:
+ ArgsSize += 8;
+ break;
+ }
+ }
+ if (ArgsSize > 4*6)
+ ArgsSize -= 4*6; // Space for first 6 arguments is prereserved.
+ else
+ ArgsSize = 0;
+#endif
+
+ // Keep stack frames 8-byte aligned.
+ ArgsSize = (ArgsSize+7) & ~7;
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+#if 0
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments start after the 5 first operands of ISD::CALL
+ SDValue Arg = TheCall->getArg(i);
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at
+ // RegsToPass vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ continue;
+ }
+
+ assert(VA.isMemLoc());
+
+ // Create a store off the stack pointer for this argument.
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ // FIXME: VERIFY THAT 68 IS RIGHT.
+ SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+68);
+ PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
+ }
+
+#else
+ static const unsigned ArgRegs[] = {
+ SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+ };
+ unsigned ArgOffset = 68;
+
+ for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; ++i) {
+ SDValue Val = TheCall->getArg(i);
+ MVT ObjectVT = Val.getValueType();
+ SDValue ValToStore(0, 0);
+ unsigned ObjSize;
+ switch (ObjectVT.getSimpleVT()) {
+ default: assert(0 && "Unhandled argument type!");
+ case MVT::i32:
+ ObjSize = 4;
+
+ if (RegsToPass.size() >= 6) {
+ ValToStore = Val;
+ } else {
+ RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Val));
+ }
+ break;
+ case MVT::f32:
+ ObjSize = 4;
+ if (RegsToPass.size() >= 6) {
+ ValToStore = Val;
+ } else {
+ // Convert this to a FP value in an int reg.
+ Val = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Val);
+ RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Val));
+ }
+ break;
+ case MVT::f64: {
+ ObjSize = 8;
+ if (RegsToPass.size() >= 6) {
+ ValToStore = Val; // Whole thing is passed in memory.
+ break;
+ }
+
+ // Break into top and bottom parts by storing to the stack and loading
+ // out the parts as integers. Top part goes in a reg.
+ SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
+ Val, StackPtr, NULL, 0);
+ // Sparc is big-endian, so the high part comes first.
+ SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, 0);
+ // Increment the pointer to the other half.
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getIntPtrConstant(4));
+ // Load the low part.
+ SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, 0);
+
+ RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Hi));
+
+ if (RegsToPass.size() >= 6) {
+ ValToStore = Lo;
+ ArgOffset += 4;
+ ObjSize = 4;
+ } else {
+ RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Lo));
+ }
+ break;
+ }
+ case MVT::i64: {
+ ObjSize = 8;
+ if (RegsToPass.size() >= 6) {
+ ValToStore = Val; // Whole thing is passed in memory.
+ break;
+ }
+
+ // Split the value into top and bottom part. Top part goes in a reg.
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Val,
+ DAG.getConstant(1, MVT::i32));
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Val,
+ DAG.getConstant(0, MVT::i32));
+ RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Hi));
+
+ if (RegsToPass.size() >= 6) {
+ ValToStore = Lo;
+ ArgOffset += 4;
+ ObjSize = 4;
+ } else {
+ RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Lo));
+ }
+ break;
+ }
+ }
+
+ if (ValToStore.getNode()) {
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ SDValue PtrOff = DAG.getConstant(ArgOffset, MVT::i32);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, ValToStore,
+ PtrOff, NULL, 0));
+ }
+ ArgOffset += ObjSize;
+ }
+#endif
+
+ // Emit all stores, make sure the occur before any copies into physregs.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emited instructions must be
+ // stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ unsigned Reg = RegsToPass[i].first;
+ // Remap I0->I7 -> O0->O7.
+ if (Reg >= SP::I0 && Reg <= SP::I7)
+ Reg = Reg-SP::I0+SP::O0;
+
+ Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ // Likewise ExternalSymbol -> TargetExternalSymbol.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32);
+ else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
+
+ std::vector<MVT> NodeTys;
+ NodeTys.push_back(MVT::Other); // Returns a chain
+ NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
+ SDValue Ops[] = { Chain, Callee, InFlag };
+ Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, Ops, InFlag.getNode() ? 3 : 2);
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState RVInfo(CallingConv, isVarArg, DAG.getTarget(), RVLocs);
+
+ RVInfo.AnalyzeCallResult(TheCall, RetCC_Sparc32);
+ SmallVector<SDValue, 8> ResultVals;
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ unsigned Reg = RVLocs[i].getLocReg();
+
+ // Remap I0->I7 -> O0->O7.
+ if (Reg >= SP::I0 && Reg <= SP::I7)
+ Reg = Reg-SP::I0+SP::O0;
+
+ Chain = DAG.getCopyFromReg(Chain, dl, Reg,
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ ResultVals.push_back(Chain.getValue(0));
+ }
+
+ ResultVals.push_back(Chain);
+
+ // Merge everything together with a MERGE_VALUES node.
+ return DAG.getNode(ISD::MERGE_VALUES, dl,
+ TheCall->getVTList(), &ResultVals[0],
+ ResultVals.size());
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+/// IntCondCCodeToICC - Convert a DAG integer condition code to a SPARC ICC
+/// condition.
+static SPCC::CondCodes IntCondCCodeToICC(ISD::CondCode CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown integer condition code!");
+ case ISD::SETEQ: return SPCC::ICC_E;
+ case ISD::SETNE: return SPCC::ICC_NE;
+ case ISD::SETLT: return SPCC::ICC_L;
+ case ISD::SETGT: return SPCC::ICC_G;
+ case ISD::SETLE: return SPCC::ICC_LE;
+ case ISD::SETGE: return SPCC::ICC_GE;
+ case ISD::SETULT: return SPCC::ICC_CS;
+ case ISD::SETULE: return SPCC::ICC_LEU;
+ case ISD::SETUGT: return SPCC::ICC_GU;
+ case ISD::SETUGE: return SPCC::ICC_CC;
+ }
+}
+
+/// FPCondCCodeToFCC - Convert a DAG floatingp oint condition code to a SPARC
+/// FCC condition.
+static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown fp condition code!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: return SPCC::FCC_E;
+ case ISD::SETNE:
+ case ISD::SETUNE: return SPCC::FCC_NE;
+ case ISD::SETLT:
+ case ISD::SETOLT: return SPCC::FCC_L;
+ case ISD::SETGT:
+ case ISD::SETOGT: return SPCC::FCC_G;
+ case ISD::SETLE:
+ case ISD::SETOLE: return SPCC::FCC_LE;
+ case ISD::SETGE:
+ case ISD::SETOGE: return SPCC::FCC_GE;
+ case ISD::SETULT: return SPCC::FCC_UL;
+ case ISD::SETULE: return SPCC::FCC_ULE;
+ case ISD::SETUGT: return SPCC::FCC_UG;
+ case ISD::SETUGE: return SPCC::FCC_UGE;
+ case ISD::SETUO: return SPCC::FCC_U;
+ case ISD::SETO: return SPCC::FCC_O;
+ case ISD::SETONE: return SPCC::FCC_LG;
+ case ISD::SETUEQ: return SPCC::FCC_UE;
+ }
+}
+
+
+SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
+ : TargetLowering(TM) {
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, SP::IntRegsRegisterClass);
+ addRegisterClass(MVT::f32, SP::FPRegsRegisterClass);
+ addRegisterClass(MVT::f64, SP::DFPRegsRegisterClass);
+
+ // Turn FP extload into load/fextend
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ // Sparc doesn't have i1 sign extending load
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ // Turn FP truncstore into trunc + store.
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // Custom legalize GlobalAddress nodes into LO/HI parts.
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool , MVT::i32, Custom);
+
+ // Sparc doesn't have sext_inreg, replace them with shl/sra
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+
+ // Sparc has no REM or DIVREM operations.
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+ // Custom expand fp<->sint
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+
+ // Expand fp<->uint
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+ setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
+ setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
+
+ // Sparc has no select or setcc: expand to SELECT_CC.
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ setOperationAction(ISD::SETCC, MVT::i32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f64, Expand);
+
+ // Sparc doesn't have BRCOND either, it has BR_CC.
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ // SPARC has no intrinsics for these particular operations.
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+ setOperationAction(ISD::ROTL , MVT::i32, Expand);
+ setOperationAction(ISD::ROTR , MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f64, Expand);
+ setOperationAction(ISD::FPOW , MVT::f32, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+ // FIXME: Sparc provides these multiplies, but we don't have them yet.
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+
+ // We don't have line number support yet.
+ setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+ setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+ // RET must be custom lowered, to meet ABI requirements
+ setOperationAction(ISD::RET , MVT::Other, Custom);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+ // VAARG needs to be lowered to not do unaligned accesses for doubles.
+ setOperationAction(ISD::VAARG , MVT::Other, Custom);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+
+ // No debug info support yet.
+ setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
+ setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+ setOperationAction(ISD::DECLARE, MVT::Other, Expand);
+
+ setStackPointerRegisterToSaveRestore(SP::O6);
+
+ if (TM.getSubtarget<SparcSubtarget>().isV9())
+ setOperationAction(ISD::CTPOP, MVT::i32, Legal);
+
+ computeRegisterProperties();
+}
+
+const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case SPISD::CMPICC: return "SPISD::CMPICC";
+ case SPISD::CMPFCC: return "SPISD::CMPFCC";
+ case SPISD::BRICC: return "SPISD::BRICC";
+ case SPISD::BRFCC: return "SPISD::BRFCC";
+ case SPISD::SELECT_ICC: return "SPISD::SELECT_ICC";
+ case SPISD::SELECT_FCC: return "SPISD::SELECT_FCC";
+ case SPISD::Hi: return "SPISD::Hi";
+ case SPISD::Lo: return "SPISD::Lo";
+ case SPISD::FTOI: return "SPISD::FTOI";
+ case SPISD::ITOF: return "SPISD::ITOF";
+ case SPISD::CALL: return "SPISD::CALL";
+ case SPISD::RET_FLAG: return "SPISD::RET_FLAG";
+ }
+}
+
+/// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
+/// be zero. Op is expected to be a target specific node. Used by DAG
+/// combiner.
+void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ APInt KnownZero2, KnownOne2;
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything.
+
+ switch (Op.getOpcode()) {
+ default: break;
+ case SPISD::SELECT_ICC:
+ case SPISD::SELECT_FCC:
+ DAG.ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne,
+ Depth+1);
+ DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ }
+}
+
+// Look at LHS/RHS/CC and see if they are a lowered setcc instruction. If so
+// set LHS/RHS and SPCC to the LHS/RHS of the setcc and SPCC to the condition.
+static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
+ ISD::CondCode CC, unsigned &SPCC) {
+ if (isa<ConstantSDNode>(RHS) &&
+ cast<ConstantSDNode>(RHS)->getZExtValue() == 0 &&
+ CC == ISD::SETNE &&
+ ((LHS.getOpcode() == SPISD::SELECT_ICC &&
+ LHS.getOperand(3).getOpcode() == SPISD::CMPICC) ||
+ (LHS.getOpcode() == SPISD::SELECT_FCC &&
+ LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) &&
+ isa<ConstantSDNode>(LHS.getOperand(0)) &&
+ isa<ConstantSDNode>(LHS.getOperand(1)) &&
+ cast<ConstantSDNode>(LHS.getOperand(0))->getZExtValue() == 1 &&
+ cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 0) {
+ SDValue CMPCC = LHS.getOperand(3);
+ SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue();
+ LHS = CMPCC.getOperand(0);
+ RHS = CMPCC.getOperand(1);
+ }
+}
+
+static SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) {
+ GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ // FIXME there isn't really any debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
+ SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA);
+ SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA);
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+}
+
+static SDValue LowerCONSTANTPOOL(SDValue Op, SelectionDAG &DAG) {
+ ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+ // FIXME there isn't really any debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ Constant *C = N->getConstVal();
+ SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment());
+ SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP);
+ SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP);
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+}
+
+static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Convert the fp value to integer in an FP register.
+ assert(Op.getValueType() == MVT::i32);
+ Op = DAG.getNode(SPISD::FTOI, dl, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+}
+
+static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ assert(Op.getOperand(0).getValueType() == MVT::i32);
+ SDValue Tmp = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0));
+ // Convert the int value to FP in an FP register.
+ return DAG.getNode(SPISD::ITOF, dl, Op.getValueType(), Tmp);
+}
+
+static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc, SPCC = ~0U;
+
+ // If this is a br_cc of a "setcc", and if the setcc got lowered into
+ // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
+ LookThroughSetCC(LHS, RHS, CC, SPCC);
+
+ // Get the condition flag.
+ SDValue CompareFlag;
+ if (LHS.getValueType() == MVT::i32) {
+ std::vector<MVT> VTs;
+ VTs.push_back(MVT::i32);
+ VTs.push_back(MVT::Flag);
+ SDValue Ops[2] = { LHS, RHS };
+ CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
+ if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
+ Opc = SPISD::BRICC;
+ } else {
+ CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Flag, LHS, RHS);
+ if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+ Opc = SPISD::BRFCC;
+ }
+ return DAG.getNode(Opc, dl, MVT::Other, Chain, Dest,
+ DAG.getConstant(SPCC, MVT::i32), CompareFlag);
+}
+
+static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ SDValue TrueVal = Op.getOperand(2);
+ SDValue FalseVal = Op.getOperand(3);
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc, SPCC = ~0U;
+
+ // If this is a select_cc of a "setcc", and if the setcc got lowered into
+ // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
+ LookThroughSetCC(LHS, RHS, CC, SPCC);
+
+ SDValue CompareFlag;
+ if (LHS.getValueType() == MVT::i32) {
+ std::vector<MVT> VTs;
+ VTs.push_back(LHS.getValueType()); // subcc returns a value
+ VTs.push_back(MVT::Flag);
+ SDValue Ops[2] = { LHS, RHS };
+ CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
+ Opc = SPISD::SELECT_ICC;
+ if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
+ } else {
+ CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Flag, LHS, RHS);
+ Opc = SPISD::SELECT_FCC;
+ if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+ }
+ return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal,
+ DAG.getConstant(SPCC, MVT::i32), CompareFlag);
+}
+
+static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
+ SparcTargetLowering &TLI) {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Offset = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ DAG.getRegister(SP::I6, MVT::i32),
+ DAG.getConstant(TLI.getVarArgsFrameOffset(),
+ MVT::i32));
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0);
+}
+
+static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
+ SDNode *Node = Op.getNode();
+ MVT VT = Node->getValueType(0);
+ SDValue InChain = Node->getOperand(0);
+ SDValue VAListPtr = Node->getOperand(1);
+ const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, SV, 0);
+ // Increment the pointer, VAList, to the next vaarg
+ SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList,
+ DAG.getConstant(VT.getSizeInBits()/8,
+ MVT::i32));
+ // Store the incremented VAList to the legalized pointer
+ InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr,
+ VAListPtr, SV, 0);
+ // Load the actual argument out of the pointer VAList, unless this is an
+ // f64 load.
+ if (VT != MVT::f64)
+ return DAG.getLoad(VT, dl, InChain, VAList, NULL, 0);
+
+ // Otherwise, load it as i64, then do a bitconvert.
+ SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, NULL, 0);
+
+ // Bit-Convert the value to f64.
+ SDValue Ops[2] = {
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, V),
+ V.getValue(1)
+ };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
+ SDValue Chain = Op.getOperand(0); // Legalize the chain.
+ SDValue Size = Op.getOperand(1); // Legalize the size.
+ DebugLoc dl = Op.getDebugLoc();
+
+ unsigned SPReg = SP::O6;
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32);
+ SDValue NewSP = DAG.getNode(ISD::SUB, dl, MVT::i32, SP, Size); // Value
+ Chain = DAG.getCopyToReg(SP.getValue(1), dl, SPReg, NewSP); // Output chain
+
+ // The resultant pointer is actually 16 words from the bottom of the stack,
+ // to provide a register spill area.
+ SDValue NewVal = DAG.getNode(ISD::ADD, dl, MVT::i32, NewSP,
+ DAG.getConstant(96, MVT::i32));
+ SDValue Ops[2] = { NewVal, Chain };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+
+SDValue SparcTargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG) {
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Should not custom lower this!");
+ // Frame & Return address. Currently unimplemented
+ case ISD::RETURNADDR: return SDValue();
+ case ISD::FRAMEADDR: return SDValue();
+ case ISD::GlobalTLSAddress:
+ assert(0 && "TLS not implemented for Sparc.");
+ case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG);
+ case ISD::ConstantPool: return LowerCONSTANTPOOL(Op, DAG);
+ case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
+ case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG, *this);
+ case ISD::VAARG: return LowerVAARG(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::CALL: return LowerCALL(Op, DAG);
+ case ISD::RET: return LowerRET(Op, DAG);
+ }
+}
+
+MachineBasicBlock *
+SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ unsigned BROpcode;
+ unsigned CC;
+ DebugLoc dl = MI->getDebugLoc();
+ // Figure out the conditional branch opcode to use for this select_cc.
+ switch (MI->getOpcode()) {
+ default: assert(0 && "Unknown SELECT_CC!");
+ case SP::SELECT_CC_Int_ICC:
+ case SP::SELECT_CC_FP_ICC:
+ case SP::SELECT_CC_DFP_ICC:
+ BROpcode = SP::BCOND;
+ break;
+ case SP::SELECT_CC_Int_FCC:
+ case SP::SELECT_CC_FP_FCC:
+ case SP::SELECT_CC_DFP_FCC:
+ BROpcode = SP::FBCOND;
+ break;
+ }
+
+ CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
+ // control-flow pattern. The incoming instruction knows the destination vreg
+ // to set, the condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // [f]bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ BuildMI(BB, dl, TII.get(BROpcode)).addMBB(sinkMBB).addImm(CC);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+ // Update machine-CFG edges by transferring all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ sinkMBB->transferSuccessors(BB);
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(BB, dl, TII.get(SP::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Sparc Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+SparcTargetLowering::ConstraintType
+SparcTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'r': return C_RegisterClass;
+ }
+ }
+
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+SparcTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ return std::make_pair(0U, SP::IntRegsRegisterClass);
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+std::vector<unsigned> SparcTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const {
+ if (Constraint.size() != 1)
+ return std::vector<unsigned>();
+
+ switch (Constraint[0]) {
+ default: break;
+ case 'r':
+ return make_vector<unsigned>(SP::L0, SP::L1, SP::L2, SP::L3,
+ SP::L4, SP::L5, SP::L6, SP::L7,
+ SP::I0, SP::I1, SP::I2, SP::I3,
+ SP::I4, SP::I5,
+ SP::O0, SP::O1, SP::O2, SP::O3,
+ SP::O4, SP::O5, SP::O7, 0);
+ }
+
+ return std::vector<unsigned>();
+}
+
+bool
+SparcTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The Sparc target isn't yet aware of offsets.
+ return false;
+}
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
new file mode 100644
index 0000000..fe6811f
--- /dev/null
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -0,0 +1,79 @@
+//===-- SparcISelLowering.h - Sparc DAG Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Sparc uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_ISELLOWERING_H
+#define SPARC_ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "Sparc.h"
+
+namespace llvm {
+ namespace SPISD {
+ enum {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ CMPICC, // Compare two GPR operands, set icc.
+ CMPFCC, // Compare two FP operands, set fcc.
+ BRICC, // Branch to dest on icc condition
+ BRFCC, // Branch to dest on fcc condition
+ SELECT_ICC, // Select between two values using the current ICC flags.
+ SELECT_FCC, // Select between two values using the current FCC flags.
+
+ Hi, Lo, // Hi/Lo operations, typically on a global address.
+
+ FTOI, // FP to Int within a FP register.
+ ITOF, // Int to FP within a FP register.
+
+ CALL, // A call instruction.
+ RET_FLAG // Return with a flag operand.
+ };
+ }
+
+ class SparcTargetLowering : public TargetLowering {
+ int VarArgsFrameOffset; // Frame offset to start of varargs area.
+ public:
+ SparcTargetLowering(TargetMachine &TM);
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+ int getVarArgsFrameOffset() const { return VarArgsFrameOffset; }
+
+ /// computeMaskedBitsForTargetNode - Determine which of the bits specified
+ /// in Mask are known to be either zero or one and return them in the
+ /// KnownZero/KnownOne bitsets.
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual void LowerArguments(Function &F, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &ArgValues,
+ DebugLoc dl);
+ virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const;
+ std::vector<unsigned>
+ getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+ };
+} // end namespace llvm
+
+#endif // SPARC_ISELLOWERING_H
diff --git a/lib/Target/Sparc/SparcInstrFormats.td b/lib/Target/Sparc/SparcInstrFormats.td
new file mode 100644
index 0000000..6535259
--- /dev/null
+++ b/lib/Target/Sparc/SparcInstrFormats.td
@@ -0,0 +1,114 @@
+//===- SparcInstrFormats.td - Sparc Instruction Formats ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+class InstSP<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction {
+ field bits<32> Inst;
+
+ let Namespace = "SP";
+
+ bits<2> op;
+ let Inst{31-30} = op; // Top two bits are the 'op' field
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+}
+
+//===----------------------------------------------------------------------===//
+// Format #2 instruction classes in the Sparc
+//===----------------------------------------------------------------------===//
+
+// Format 2 instructions
+class F2<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSP<outs, ins, asmstr, pattern> {
+ bits<3> op2;
+ bits<22> imm22;
+ let op = 0; // op = 0
+ let Inst{24-22} = op2;
+ let Inst{21-0} = imm22;
+}
+
+// Specific F2 classes: SparcV8 manual, page 44
+//
+class F2_1<bits<3> op2Val, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : F2<outs, ins, asmstr, pattern> {
+ bits<5> rd;
+
+ let op2 = op2Val;
+
+ let Inst{29-25} = rd;
+}
+
+class F2_2<bits<4> condVal, bits<3> op2Val, dag outs, dag ins, string asmstr,
+ list<dag> pattern> : F2<outs, ins, asmstr, pattern> {
+ bits<4> cond;
+ bit annul = 0; // currently unused
+
+ let cond = condVal;
+ let op2 = op2Val;
+
+ let Inst{29} = annul;
+ let Inst{28-25} = cond;
+}
+
+//===----------------------------------------------------------------------===//
+// Format #3 instruction classes in the Sparc
+//===----------------------------------------------------------------------===//
+
+class F3<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSP<outs, ins, asmstr, pattern> {
+ bits<5> rd;
+ bits<6> op3;
+ bits<5> rs1;
+ let op{1} = 1; // Op = 2 or 3
+ let Inst{29-25} = rd;
+ let Inst{24-19} = op3;
+ let Inst{18-14} = rs1;
+}
+
+// Specific F3 classes: SparcV8 manual, page 44
+//
+class F3_1<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
+ string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+ bits<8> asi = 0; // asi not currently used
+ bits<5> rs2;
+
+ let op = opVal;
+ let op3 = op3val;
+
+ let Inst{13} = 0; // i field = 0
+ let Inst{12-5} = asi; // address space identifier
+ let Inst{4-0} = rs2;
+}
+
+class F3_2<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
+ string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+ bits<13> simm13;
+
+ let op = opVal;
+ let op3 = op3val;
+
+ let Inst{13} = 1; // i field = 1
+ let Inst{12-0} = simm13;
+}
+
+// floating-point
+class F3_3<bits<2> opVal, bits<6> op3val, bits<9> opfval, dag outs, dag ins,
+ string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+ bits<5> rs2;
+
+ let op = opVal;
+ let op3 = op3val;
+
+ let Inst{13-5} = opfval; // fp opcode
+ let Inst{4-0} = rs2;
+}
+
+
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
new file mode 100644
index 0000000..d2f6b9b
--- /dev/null
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -0,0 +1,277 @@
+//===- SparcInstrInfo.cpp - Sparc Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcInstrInfo.h"
+#include "SparcSubtarget.h"
+#include "Sparc.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "SparcGenInstrInfo.inc"
+using namespace llvm;
+
+SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST)
+ : TargetInstrInfoImpl(SparcInsts, array_lengthof(SparcInsts)),
+ RI(ST, *this), Subtarget(ST) {
+}
+
+static bool isZeroImm(const MachineOperand &op) {
+ return op.isImm() && op.getImm() == 0;
+}
+
+/// Return true if the instruction is a register to register move and
+/// leave the source and dest operands in the passed parameters.
+///
+bool SparcInstrInfo::isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSR, unsigned &DstSR) const {
+ SrcSR = DstSR = 0; // No sub-registers.
+
+ // We look for 3 kinds of patterns here:
+ // or with G0 or 0
+ // add with G0 or 0
+ // fmovs or FpMOVD (pseudo double move).
+ if (MI.getOpcode() == SP::ORrr || MI.getOpcode() == SP::ADDrr) {
+ if (MI.getOperand(1).getReg() == SP::G0) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(2).getReg();
+ return true;
+ } else if (MI.getOperand(2).getReg() == SP::G0) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ }
+ } else if ((MI.getOpcode() == SP::ORri || MI.getOpcode() == SP::ADDri) &&
+ isZeroImm(MI.getOperand(2)) && MI.getOperand(1).isReg()) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ } else if (MI.getOpcode() == SP::FMOVS || MI.getOpcode() == SP::FpMOVD ||
+ MI.getOpcode() == SP::FMOVD) {
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ return false;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned SparcInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (MI->getOpcode() == SP::LDri ||
+ MI->getOpcode() == SP::LDFri ||
+ MI->getOpcode() == SP::LDDFri) {
+ if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (MI->getOpcode() == SP::STri ||
+ MI->getOpcode() == SP::STFri ||
+ MI->getOpcode() == SP::STDFri) {
+ if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() &&
+ MI->getOperand(1).getImm() == 0) {
+ FrameIndex = MI->getOperand(0).getIndex();
+ return MI->getOperand(2).getReg();
+ }
+ }
+ return 0;
+}
+
+unsigned
+SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond)const{
+ // FIXME this should probably take a DebugLoc argument
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ // Can only insert uncond branches so far.
+ assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
+ BuildMI(&MBB, dl, get(SP::BA)).addMBB(TBB);
+ return 1;
+}
+
+bool SparcInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const {
+ if (DestRC != SrcRC) {
+ // Not yet supported!
+ return false;
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (DestRC == SP::IntRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0).addReg(SrcReg);
+ else if (DestRC == SP::FPRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg).addReg(SrcReg);
+ else if (DestRC == SP::DFPRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(Subtarget.isV9() ? SP::FMOVD : SP::FpMOVD),DestReg)
+ .addReg(SrcReg);
+ else
+ // Can't copy this register
+ return false;
+
+ return true;
+}
+
+void SparcInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ // On the order of operands here: think "[FrameIdx + 0] = SrcReg".
+ if (RC == SP::IntRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill));
+ else if (RC == SP::FPRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill));
+ else if (RC == SP::DFPRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::STDFri)).addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill));
+ else
+ assert(0 && "Can't store this register to stack slot");
+}
+
+void SparcInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+ bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ unsigned Opc = 0;
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (RC == SP::IntRegsRegisterClass)
+ Opc = SP::STri;
+ else if (RC == SP::FPRegsRegisterClass)
+ Opc = SP::STFri;
+ else if (RC == SP::DFPRegsRegisterClass)
+ Opc = SP::STDFri;
+ else
+ assert(0 && "Can't load this register");
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ MIB.addReg(SrcReg, getKillRegState(isKill));
+ NewMIs.push_back(MIB);
+ return;
+}
+
+void SparcInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (RC == SP::IntRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0);
+ else if (RC == SP::FPRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0);
+ else if (RC == SP::DFPRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::LDDFri), DestReg).addFrameIndex(FI).addImm(0);
+ else
+ assert(0 && "Can't load this register from stack slot");
+}
+
+void SparcInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ unsigned Opc = 0;
+ if (RC == SP::IntRegsRegisterClass)
+ Opc = SP::LDri;
+ else if (RC == SP::FPRegsRegisterClass)
+ Opc = SP::LDFri;
+ else if (RC == SP::DFPRegsRegisterClass)
+ Opc = SP::LDDFri;
+ else
+ assert(0 && "Can't load this register");
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+ return;
+}
+
+MachineInstr *SparcInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FI) const {
+ if (Ops.size() != 1) return NULL;
+
+ unsigned OpNum = Ops[0];
+ bool isFloat = false;
+ MachineInstr *NewMI = NULL;
+ switch (MI->getOpcode()) {
+ case SP::ORrr:
+ if (MI->getOperand(1).isReg() && MI->getOperand(1).getReg() == SP::G0&&
+ MI->getOperand(0).isReg() && MI->getOperand(2).isReg()) {
+ if (OpNum == 0) // COPY -> STORE
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(SP::STri))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addReg(MI->getOperand(2).getReg());
+ else // COPY -> LOAD
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(SP::LDri),
+ MI->getOperand(0).getReg())
+ .addFrameIndex(FI)
+ .addImm(0);
+ }
+ break;
+ case SP::FMOVS:
+ isFloat = true;
+ // FALLTHROUGH
+ case SP::FMOVD:
+ if (OpNum == 0) { // COPY -> STORE
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ NewMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isFloat ? SP::STFri : SP::STDFri))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill));
+ } else { // COPY -> LOAD
+ unsigned DstReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ NewMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isFloat ? SP::LDFri : SP::LDDFri))
+ .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+ .addFrameIndex(FI)
+ .addImm(0);
+ }
+ break;
+ }
+
+ return NewMI;
+}
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
new file mode 100644
index 0000000..ab661b9
--- /dev/null
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -0,0 +1,114 @@
+//===- SparcInstrInfo.h - Sparc Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCINSTRUCTIONINFO_H
+#define SPARCINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "SparcRegisterInfo.h"
+
+namespace llvm {
+
+/// SPII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace SPII {
+ enum {
+ Pseudo = (1<<0),
+ Load = (1<<1),
+ Store = (1<<2),
+ DelaySlot = (1<<3)
+ };
+}
+
+class SparcInstrInfo : public TargetInstrInfoImpl {
+ const SparcRegisterInfo RI;
+ const SparcSubtarget& Subtarget;
+public:
+ explicit SparcInstrInfo(SparcSubtarget &ST);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const SparcRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// Return true if the instruction is a register to register move and return
+ /// the source and dest operands and their sub-register indices by reference.
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+};
+
+}
+
+#endif
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
new file mode 100644
index 0000000..2d6c920
--- /dev/null
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -0,0 +1,769 @@
+//===- SparcInstrInfo.td - Target Description for Sparc Target ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Sparc instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "SparcInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Feature predicates.
+//===----------------------------------------------------------------------===//
+
+// HasV9 - This predicate is true when the target processor supports V9
+// instructions. Note that the machine may be running in 32-bit mode.
+def HasV9 : Predicate<"Subtarget.isV9()">;
+
+// HasNoV9 - This predicate is true when the target doesn't have V9
+// instructions. Use of this is just a hack for the isel not having proper
+// costs for V8 instructions that are more expensive than their V9 ones.
+def HasNoV9 : Predicate<"!Subtarget.isV9()">;
+
+// HasVIS - This is true when the target processor has VIS extensions.
+def HasVIS : Predicate<"Subtarget.isVIS()">;
+
+// UseDeprecatedInsts - This predicate is true when the target processor is a
+// V8, or when it is V9 but the V8 deprecated instructions are efficient enough
+// to use when appropriate. In either of these cases, the instruction selector
+// will pick deprecated instructions.
+def UseDeprecatedInsts : Predicate<"Subtarget.useDeprecatedV8Instructions()">;
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+def simm11 : PatLeaf<(imm), [{
+ // simm11 predicate - True if the imm fits in a 11-bit sign extended field.
+ return (((int)N->getZExtValue() << (32-11)) >> (32-11)) ==
+ (int)N->getZExtValue();
+}]>;
+
+def simm13 : PatLeaf<(imm), [{
+ // simm13 predicate - True if the imm fits in a 13-bit sign extended field.
+ return (((int)N->getZExtValue() << (32-13)) >> (32-13)) ==
+ (int)N->getZExtValue();
+}]>;
+
+def LO10 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((unsigned)N->getZExtValue() & 1023,
+ MVT::i32);
+}]>;
+
+def HI22 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return CurDAG->getTargetConstant((unsigned)N->getZExtValue() >> 10, MVT::i32);
+}]>;
+
+def SETHIimm : PatLeaf<(imm), [{
+ return (((unsigned)N->getZExtValue() >> 10) << 10) ==
+ (unsigned)N->getZExtValue();
+}], HI22>;
+
+// Addressing modes.
+def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
+
+// Address operands
+def MEMrr : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+def MEMri : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops IntRegs, i32imm);
+}
+
+// Branch targets have OtherVT type.
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+
+// Operand for printing out a condition code.
+let PrintMethod = "printCCOperand" in
+ def CCOp : Operand<i32>;
+
+def SDTSPcmpfcc :
+SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>;
+def SDTSPbrcc :
+SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
+def SDTSPselectcc :
+SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>]>;
+def SDTSPFTOI :
+SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
+def SDTSPITOF :
+SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+
+def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutFlag]>;
+def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutFlag]>;
+def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInFlag]>;
+def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInFlag]>;
+
+def SPhi : SDNode<"SPISD::Hi", SDTIntUnaryOp>;
+def SPlo : SDNode<"SPISD::Lo", SDTIntUnaryOp>;
+
+def SPftoi : SDNode<"SPISD::FTOI", SDTSPFTOI>;
+def SPitof : SDNode<"SPISD::ITOF", SDTSPITOF>;
+
+def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInFlag]>;
+def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInFlag]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
+ [SDNPHasChain, SDNPOutFlag]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def call : SDNode<"SPISD::CALL", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def retflag : SDNode<"SPISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+//===----------------------------------------------------------------------===//
+// SPARC Flag Conditions
+//===----------------------------------------------------------------------===//
+
+// Note that these values must be kept in sync with the CCOp::CondCode enum
+// values.
+class ICC_VAL<int N> : PatLeaf<(i32 N)>;
+def ICC_NE : ICC_VAL< 9>; // Not Equal
+def ICC_E : ICC_VAL< 1>; // Equal
+def ICC_G : ICC_VAL<10>; // Greater
+def ICC_LE : ICC_VAL< 2>; // Less or Equal
+def ICC_GE : ICC_VAL<11>; // Greater or Equal
+def ICC_L : ICC_VAL< 3>; // Less
+def ICC_GU : ICC_VAL<12>; // Greater Unsigned
+def ICC_LEU : ICC_VAL< 4>; // Less or Equal Unsigned
+def ICC_CC : ICC_VAL<13>; // Carry Clear/Great or Equal Unsigned
+def ICC_CS : ICC_VAL< 5>; // Carry Set/Less Unsigned
+def ICC_POS : ICC_VAL<14>; // Positive
+def ICC_NEG : ICC_VAL< 6>; // Negative
+def ICC_VC : ICC_VAL<15>; // Overflow Clear
+def ICC_VS : ICC_VAL< 7>; // Overflow Set
+
+class FCC_VAL<int N> : PatLeaf<(i32 N)>;
+def FCC_U : FCC_VAL<23>; // Unordered
+def FCC_G : FCC_VAL<22>; // Greater
+def FCC_UG : FCC_VAL<21>; // Unordered or Greater
+def FCC_L : FCC_VAL<20>; // Less
+def FCC_UL : FCC_VAL<19>; // Unordered or Less
+def FCC_LG : FCC_VAL<18>; // Less or Greater
+def FCC_NE : FCC_VAL<17>; // Not Equal
+def FCC_E : FCC_VAL<25>; // Equal
+def FCC_UE : FCC_VAL<24>; // Unordered or Equal
+def FCC_GE : FCC_VAL<25>; // Greater or Equal
+def FCC_UGE : FCC_VAL<26>; // Unordered or Greater or Equal
+def FCC_LE : FCC_VAL<27>; // Less or Equal
+def FCC_ULE : FCC_VAL<28>; // Unordered or Less or Equal
+def FCC_O : FCC_VAL<29>; // Ordered
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+/// F3_12 multiclass - Define a normal F3_1/F3_2 pattern in one shot.
+multiclass F3_12<string OpcStr, bits<6> Op3Val, SDNode OpNode> {
+ def rr : F3_1<2, Op3Val,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"),
+ [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+ def ri : F3_2<2, Op3Val,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"),
+ [(set IntRegs:$dst, (OpNode IntRegs:$b, simm13:$c))]>;
+}
+
+/// F3_12np multiclass - Define a normal F3_1/F3_2 pattern in one shot, with no
+/// pattern.
+multiclass F3_12np<string OpcStr, bits<6> Op3Val> {
+ def rr : F3_1<2, Op3Val,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"), []>;
+ def ri : F3_2<2, Op3Val,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"), []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// Pseudo instructions.
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSP<outs, ins, asmstr, pattern>;
+
+let Defs = [O6], Uses = [O6] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+ "!ADJCALLSTACKDOWN $amt",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "!ADJCALLSTACKUP $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+// FpMOVD/FpNEGD/FpABSD - These are lowered to single-precision ops by the
+// fpmover pass.
+let Predicates = [HasNoV9] in { // Only emit these in V8 mode.
+ def FpMOVD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "!FpMOVD $src, $dst", []>;
+ def FpNEGD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "!FpNEGD $src, $dst",
+ [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
+ def FpABSD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "!FpABSD $src, $dst",
+ [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
+}
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded by the
+// scheduler into a branch sequence. This has to handle all permutations of
+// selection between i32/f32/f64 on ICC and FCC.
+let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
+ def SELECT_CC_Int_ICC
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_Int_ICC PSEUDO!",
+ [(set IntRegs:$dst, (SPselecticc IntRegs:$T, IntRegs:$F,
+ imm:$Cond))]>;
+ def SELECT_CC_Int_FCC
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_Int_FCC PSEUDO!",
+ [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F,
+ imm:$Cond))]>;
+ def SELECT_CC_FP_ICC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_FP_ICC PSEUDO!",
+ [(set FPRegs:$dst, (SPselecticc FPRegs:$T, FPRegs:$F,
+ imm:$Cond))]>;
+ def SELECT_CC_FP_FCC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_FP_FCC PSEUDO!",
+ [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F,
+ imm:$Cond))]>;
+ def SELECT_CC_DFP_ICC
+ : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_DFP_ICC PSEUDO!",
+ [(set DFPRegs:$dst, (SPselecticc DFPRegs:$T, DFPRegs:$F,
+ imm:$Cond))]>;
+ def SELECT_CC_DFP_FCC
+ : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_DFP_FCC PSEUDO!",
+ [(set DFPRegs:$dst, (SPselectfcc DFPRegs:$T, DFPRegs:$F,
+ imm:$Cond))]>;
+}
+
+
+// Section A.3 - Synthetic Instructions, p. 85
+// special cases of JMPL:
+let isReturn = 1, isTerminator = 1, hasDelaySlot = 1 in {
+ let rd = O7.Num, rs1 = G0.Num, simm13 = 8 in
+ def RETL: F3_2<2, 0b111000, (outs), (ins), "retl", [(retflag)]>;
+}
+
+// Section B.1 - Load Integer Instructions, p. 90
+def LDSBrr : F3_1<3, 0b001001,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "ldsb [$addr], $dst",
+ [(set IntRegs:$dst, (sextloadi8 ADDRrr:$addr))]>;
+def LDSBri : F3_2<3, 0b001001,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "ldsb [$addr], $dst",
+ [(set IntRegs:$dst, (sextloadi8 ADDRri:$addr))]>;
+def LDSHrr : F3_1<3, 0b001010,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "ldsh [$addr], $dst",
+ [(set IntRegs:$dst, (sextloadi16 ADDRrr:$addr))]>;
+def LDSHri : F3_2<3, 0b001010,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "ldsh [$addr], $dst",
+ [(set IntRegs:$dst, (sextloadi16 ADDRri:$addr))]>;
+def LDUBrr : F3_1<3, 0b000001,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "ldub [$addr], $dst",
+ [(set IntRegs:$dst, (zextloadi8 ADDRrr:$addr))]>;
+def LDUBri : F3_2<3, 0b000001,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "ldub [$addr], $dst",
+ [(set IntRegs:$dst, (zextloadi8 ADDRri:$addr))]>;
+def LDUHrr : F3_1<3, 0b000010,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "lduh [$addr], $dst",
+ [(set IntRegs:$dst, (zextloadi16 ADDRrr:$addr))]>;
+def LDUHri : F3_2<3, 0b000010,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "lduh [$addr], $dst",
+ [(set IntRegs:$dst, (zextloadi16 ADDRri:$addr))]>;
+def LDrr : F3_1<3, 0b000000,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "ld [$addr], $dst",
+ [(set IntRegs:$dst, (load ADDRrr:$addr))]>;
+def LDri : F3_2<3, 0b000000,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "ld [$addr], $dst",
+ [(set IntRegs:$dst, (load ADDRri:$addr))]>;
+
+// Section B.2 - Load Floating-point Instructions, p. 92
+def LDFrr : F3_1<3, 0b100000,
+ (outs FPRegs:$dst), (ins MEMrr:$addr),
+ "ld [$addr], $dst",
+ [(set FPRegs:$dst, (load ADDRrr:$addr))]>;
+def LDFri : F3_2<3, 0b100000,
+ (outs FPRegs:$dst), (ins MEMri:$addr),
+ "ld [$addr], $dst",
+ [(set FPRegs:$dst, (load ADDRri:$addr))]>;
+def LDDFrr : F3_1<3, 0b100011,
+ (outs DFPRegs:$dst), (ins MEMrr:$addr),
+ "ldd [$addr], $dst",
+ [(set DFPRegs:$dst, (load ADDRrr:$addr))]>;
+def LDDFri : F3_2<3, 0b100011,
+ (outs DFPRegs:$dst), (ins MEMri:$addr),
+ "ldd [$addr], $dst",
+ [(set DFPRegs:$dst, (load ADDRri:$addr))]>;
+
+// Section B.4 - Store Integer Instructions, p. 95
+def STBrr : F3_1<3, 0b000101,
+ (outs), (ins MEMrr:$addr, IntRegs:$src),
+ "stb $src, [$addr]",
+ [(truncstorei8 IntRegs:$src, ADDRrr:$addr)]>;
+def STBri : F3_2<3, 0b000101,
+ (outs), (ins MEMri:$addr, IntRegs:$src),
+ "stb $src, [$addr]",
+ [(truncstorei8 IntRegs:$src, ADDRri:$addr)]>;
+def STHrr : F3_1<3, 0b000110,
+ (outs), (ins MEMrr:$addr, IntRegs:$src),
+ "sth $src, [$addr]",
+ [(truncstorei16 IntRegs:$src, ADDRrr:$addr)]>;
+def STHri : F3_2<3, 0b000110,
+ (outs), (ins MEMri:$addr, IntRegs:$src),
+ "sth $src, [$addr]",
+ [(truncstorei16 IntRegs:$src, ADDRri:$addr)]>;
+def STrr : F3_1<3, 0b000100,
+ (outs), (ins MEMrr:$addr, IntRegs:$src),
+ "st $src, [$addr]",
+ [(store IntRegs:$src, ADDRrr:$addr)]>;
+def STri : F3_2<3, 0b000100,
+ (outs), (ins MEMri:$addr, IntRegs:$src),
+ "st $src, [$addr]",
+ [(store IntRegs:$src, ADDRri:$addr)]>;
+
+// Section B.5 - Store Floating-point Instructions, p. 97
+def STFrr : F3_1<3, 0b100100,
+ (outs), (ins MEMrr:$addr, FPRegs:$src),
+ "st $src, [$addr]",
+ [(store FPRegs:$src, ADDRrr:$addr)]>;
+def STFri : F3_2<3, 0b100100,
+ (outs), (ins MEMri:$addr, FPRegs:$src),
+ "st $src, [$addr]",
+ [(store FPRegs:$src, ADDRri:$addr)]>;
+def STDFrr : F3_1<3, 0b100111,
+ (outs), (ins MEMrr:$addr, DFPRegs:$src),
+ "std $src, [$addr]",
+ [(store DFPRegs:$src, ADDRrr:$addr)]>;
+def STDFri : F3_2<3, 0b100111,
+ (outs), (ins MEMri:$addr, DFPRegs:$src),
+ "std $src, [$addr]",
+ [(store DFPRegs:$src, ADDRri:$addr)]>;
+
+// Section B.9 - SETHI Instruction, p. 104
+def SETHIi: F2_1<0b100,
+ (outs IntRegs:$dst), (ins i32imm:$src),
+ "sethi $src, $dst",
+ [(set IntRegs:$dst, SETHIimm:$src)]>;
+
+// Section B.10 - NOP Instruction, p. 105
+// (It's a special case of SETHI)
+let rd = 0, imm22 = 0 in
+ def NOP : F2_1<0b100, (outs), (ins), "nop", []>;
+
+// Section B.11 - Logical Instructions, p. 106
+defm AND : F3_12<"and", 0b000001, and>;
+
+def ANDNrr : F3_1<2, 0b000101,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ "andn $b, $c, $dst",
+ [(set IntRegs:$dst, (and IntRegs:$b, (not IntRegs:$c)))]>;
+def ANDNri : F3_2<2, 0b000101,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ "andn $b, $c, $dst", []>;
+
+defm OR : F3_12<"or", 0b000010, or>;
+
+def ORNrr : F3_1<2, 0b000110,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ "orn $b, $c, $dst",
+ [(set IntRegs:$dst, (or IntRegs:$b, (not IntRegs:$c)))]>;
+def ORNri : F3_2<2, 0b000110,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ "orn $b, $c, $dst", []>;
+defm XOR : F3_12<"xor", 0b000011, xor>;
+
+def XNORrr : F3_1<2, 0b000111,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ "xnor $b, $c, $dst",
+ [(set IntRegs:$dst, (not (xor IntRegs:$b, IntRegs:$c)))]>;
+def XNORri : F3_2<2, 0b000111,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ "xnor $b, $c, $dst", []>;
+
+// Section B.12 - Shift Instructions, p. 107
+defm SLL : F3_12<"sll", 0b100101, shl>;
+defm SRL : F3_12<"srl", 0b100110, srl>;
+defm SRA : F3_12<"sra", 0b100111, sra>;
+
+// Section B.13 - Add Instructions, p. 108
+defm ADD : F3_12<"add", 0b000000, add>;
+
+// "LEA" forms of add (patterns to make tblgen happy)
+def LEA_ADDri : F3_2<2, 0b000000,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "add ${addr:arith}, $dst",
+ [(set IntRegs:$dst, ADDRri:$addr)]>;
+
+defm ADDCC : F3_12<"addcc", 0b010000, addc>;
+defm ADDX : F3_12<"addx", 0b001000, adde>;
+
+// Section B.15 - Subtract Instructions, p. 110
+defm SUB : F3_12 <"sub" , 0b000100, sub>;
+defm SUBX : F3_12 <"subx" , 0b001100, sube>;
+defm SUBCC : F3_12 <"subcc", 0b010100, SPcmpicc>;
+
+def SUBXCCrr: F3_1<2, 0b011100,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ "subxcc $b, $c, $dst", []>;
+
+// Section B.18 - Multiply Instructions, p. 113
+defm UMUL : F3_12np<"umul", 0b001010>;
+defm SMUL : F3_12 <"smul", 0b001011, mul>;
+
+
+// Section B.19 - Divide Instructions, p. 115
+defm UDIV : F3_12np<"udiv", 0b001110>;
+defm SDIV : F3_12np<"sdiv", 0b001111>;
+
+// Section B.20 - SAVE and RESTORE, p. 117
+defm SAVE : F3_12np<"save" , 0b111100>;
+defm RESTORE : F3_12np<"restore", 0b111101>;
+
+// Section B.21 - Branch on Integer Condition Codes Instructions, p. 119
+
+// conditional branch class:
+class BranchSP<bits<4> cc, dag ins, string asmstr, list<dag> pattern>
+ : F2_2<cc, 0b010, (outs), ins, asmstr, pattern> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+}
+
+let isBarrier = 1 in
+ def BA : BranchSP<0b1000, (ins brtarget:$dst),
+ "ba $dst",
+ [(br bb:$dst)]>;
+
+// FIXME: the encoding for the JIT should look at the condition field.
+def BCOND : BranchSP<0, (ins brtarget:$dst, CCOp:$cc),
+ "b$cc $dst",
+ [(SPbricc bb:$dst, imm:$cc)]>;
+
+
+// Section B.22 - Branch on Floating-point Condition Codes Instructions, p. 121
+
+// floating-point conditional branch class:
+class FPBranchSP<bits<4> cc, dag ins, string asmstr, list<dag> pattern>
+ : F2_2<cc, 0b110, (outs), ins, asmstr, pattern> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+}
+
+// FIXME: the encoding for the JIT should look at the condition field.
+def FBCOND : FPBranchSP<0, (ins brtarget:$dst, CCOp:$cc),
+ "fb$cc $dst",
+ [(SPbrfcc bb:$dst, imm:$cc)]>;
+
+
+// Section B.24 - Call and Link Instruction, p. 125
+// This is the only Format 1 instruction
+let Uses = [O0, O1, O2, O3, O4, O5],
+ hasDelaySlot = 1, isCall = 1,
+ Defs = [O0, O1, O2, O3, O4, O5, O7, G1, G2, G3, G4, G5, G6, G7,
+ D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15] in {
+ def CALL : InstSP<(outs), (ins calltarget:$dst),
+ "call $dst", []> {
+ bits<30> disp;
+ let op = 1;
+ let Inst{29-0} = disp;
+ }
+
+ // indirect calls
+ def JMPLrr : F3_1<2, 0b111000,
+ (outs), (ins MEMrr:$ptr),
+ "call $ptr",
+ [(call ADDRrr:$ptr)]>;
+ def JMPLri : F3_2<2, 0b111000,
+ (outs), (ins MEMri:$ptr),
+ "call $ptr",
+ [(call ADDRri:$ptr)]>;
+}
+
+// Section B.28 - Read State Register Instructions
+def RDY : F3_1<2, 0b101000,
+ (outs IntRegs:$dst), (ins),
+ "rd %y, $dst", []>;
+
+// Section B.29 - Write State Register Instructions
+def WRYrr : F3_1<2, 0b110000,
+ (outs), (ins IntRegs:$b, IntRegs:$c),
+ "wr $b, $c, %y", []>;
+def WRYri : F3_2<2, 0b110000,
+ (outs), (ins IntRegs:$b, i32imm:$c),
+ "wr $b, $c, %y", []>;
+
+// Convert Integer to Floating-point Instructions, p. 141
+def FITOS : F3_3<2, 0b110100, 0b011000100,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fitos $src, $dst",
+ [(set FPRegs:$dst, (SPitof FPRegs:$src))]>;
+def FITOD : F3_3<2, 0b110100, 0b011001000,
+ (outs DFPRegs:$dst), (ins FPRegs:$src),
+ "fitod $src, $dst",
+ [(set DFPRegs:$dst, (SPitof FPRegs:$src))]>;
+
+// Convert Floating-point to Integer Instructions, p. 142
+def FSTOI : F3_3<2, 0b110100, 0b011010001,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fstoi $src, $dst",
+ [(set FPRegs:$dst, (SPftoi FPRegs:$src))]>;
+def FDTOI : F3_3<2, 0b110100, 0b011010010,
+ (outs FPRegs:$dst), (ins DFPRegs:$src),
+ "fdtoi $src, $dst",
+ [(set FPRegs:$dst, (SPftoi DFPRegs:$src))]>;
+
+// Convert between Floating-point Formats Instructions, p. 143
+def FSTOD : F3_3<2, 0b110100, 0b011001001,
+ (outs DFPRegs:$dst), (ins FPRegs:$src),
+ "fstod $src, $dst",
+ [(set DFPRegs:$dst, (fextend FPRegs:$src))]>;
+def FDTOS : F3_3<2, 0b110100, 0b011000110,
+ (outs FPRegs:$dst), (ins DFPRegs:$src),
+ "fdtos $src, $dst",
+ [(set FPRegs:$dst, (fround DFPRegs:$src))]>;
+
+// Floating-point Move Instructions, p. 144
+def FMOVS : F3_3<2, 0b110100, 0b000000001,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fmovs $src, $dst", []>;
+def FNEGS : F3_3<2, 0b110100, 0b000000101,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fnegs $src, $dst",
+ [(set FPRegs:$dst, (fneg FPRegs:$src))]>;
+def FABSS : F3_3<2, 0b110100, 0b000001001,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fabss $src, $dst",
+ [(set FPRegs:$dst, (fabs FPRegs:$src))]>;
+
+
+// Floating-point Square Root Instructions, p.145
+def FSQRTS : F3_3<2, 0b110100, 0b000101001,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fsqrts $src, $dst",
+ [(set FPRegs:$dst, (fsqrt FPRegs:$src))]>;
+def FSQRTD : F3_3<2, 0b110100, 0b000101010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fsqrtd $src, $dst",
+ [(set DFPRegs:$dst, (fsqrt DFPRegs:$src))]>;
+
+
+
+// Floating-point Add and Subtract Instructions, p. 146
+def FADDS : F3_3<2, 0b110100, 0b001000001,
+ (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fadds $src1, $src2, $dst",
+ [(set FPRegs:$dst, (fadd FPRegs:$src1, FPRegs:$src2))]>;
+def FADDD : F3_3<2, 0b110100, 0b001000010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "faddd $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fadd DFPRegs:$src1, DFPRegs:$src2))]>;
+def FSUBS : F3_3<2, 0b110100, 0b001000101,
+ (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fsubs $src1, $src2, $dst",
+ [(set FPRegs:$dst, (fsub FPRegs:$src1, FPRegs:$src2))]>;
+def FSUBD : F3_3<2, 0b110100, 0b001000110,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "fsubd $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fsub DFPRegs:$src1, DFPRegs:$src2))]>;
+
+// Floating-point Multiply and Divide Instructions, p. 147
+def FMULS : F3_3<2, 0b110100, 0b001001001,
+ (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fmuls $src1, $src2, $dst",
+ [(set FPRegs:$dst, (fmul FPRegs:$src1, FPRegs:$src2))]>;
+def FMULD : F3_3<2, 0b110100, 0b001001010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "fmuld $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fmul DFPRegs:$src1, DFPRegs:$src2))]>;
+def FSMULD : F3_3<2, 0b110100, 0b001101001,
+ (outs DFPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fsmuld $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fmul (fextend FPRegs:$src1),
+ (fextend FPRegs:$src2)))]>;
+def FDIVS : F3_3<2, 0b110100, 0b001001101,
+ (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fdivs $src1, $src2, $dst",
+ [(set FPRegs:$dst, (fdiv FPRegs:$src1, FPRegs:$src2))]>;
+def FDIVD : F3_3<2, 0b110100, 0b001001110,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "fdivd $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fdiv DFPRegs:$src1, DFPRegs:$src2))]>;
+
+// Floating-point Compare Instructions, p. 148
+// Note: the 2nd template arg is different for these guys.
+// Note 2: the result of a FCMP is not available until the 2nd cycle
+// after the instr is retired, but there is no interlock. This behavior
+// is modelled with a forced noop after the instruction.
+def FCMPS : F3_3<2, 0b110101, 0b001010001,
+ (outs), (ins FPRegs:$src1, FPRegs:$src2),
+ "fcmps $src1, $src2\n\tnop",
+ [(SPcmpfcc FPRegs:$src1, FPRegs:$src2)]>;
+def FCMPD : F3_3<2, 0b110101, 0b001010010,
+ (outs), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "fcmpd $src1, $src2\n\tnop",
+ [(SPcmpfcc DFPRegs:$src1, DFPRegs:$src2)]>;
+
+
+//===----------------------------------------------------------------------===//
+// V9 Instructions
+//===----------------------------------------------------------------------===//
+
+// V9 Conditional Moves.
+let Predicates = [HasV9], isTwoAddress = 1 in {
+ // Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual.
+ // FIXME: Add instruction encodings for the JIT some day.
+ def MOVICCrr
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
+ "mov$cc %icc, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+ def MOVICCri
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
+ "mov$cc %icc, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>;
+
+ def MOVFCCrr
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
+ "mov$cc %fcc0, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+ def MOVFCCri
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
+ "mov$cc %fcc0, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>;
+
+ def FMOVS_ICC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
+ "fmovs$cc %icc, $F, $dst",
+ [(set FPRegs:$dst,
+ (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+ def FMOVD_ICC
+ : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
+ "fmovd$cc %icc, $F, $dst",
+ [(set DFPRegs:$dst,
+ (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+ def FMOVS_FCC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
+ "fmovs$cc %fcc0, $F, $dst",
+ [(set FPRegs:$dst,
+ (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+ def FMOVD_FCC
+ : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
+ "fmovd$cc %fcc0, $F, $dst",
+ [(set DFPRegs:$dst,
+ (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+
+}
+
+// Floating-Point Move Instructions, p. 164 of the V9 manual.
+let Predicates = [HasV9] in {
+ def FMOVD : F3_3<2, 0b110100, 0b000000010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fmovd $src, $dst", []>;
+ def FNEGD : F3_3<2, 0b110100, 0b000000110,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fnegd $src, $dst",
+ [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
+ def FABSD : F3_3<2, 0b110100, 0b000001010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fabsd $src, $dst",
+ [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
+}
+
+// POPCrr - This does a ctpop of a 64-bit register. As such, we have to clear
+// the top 32-bits before using it. To do this clearing, we use a SLLri X,0.
+def POPCrr : F3_1<2, 0b101110,
+ (outs IntRegs:$dst), (ins IntRegs:$src),
+ "popc $src, $dst", []>, Requires<[HasV9]>;
+def : Pat<(ctpop IntRegs:$src),
+ (POPCrr (SLLri IntRegs:$src, 0))>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Small immediates.
+def : Pat<(i32 simm13:$val),
+ (ORri G0, imm:$val)>;
+// Arbitrary immediates.
+def : Pat<(i32 imm:$val),
+ (ORri (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>;
+
+// subc
+def : Pat<(subc IntRegs:$b, IntRegs:$c),
+ (SUBCCrr IntRegs:$b, IntRegs:$c)>;
+def : Pat<(subc IntRegs:$b, simm13:$val),
+ (SUBCCri IntRegs:$b, imm:$val)>;
+
+// Global addresses, constant pool entries
+def : Pat<(SPhi tglobaladdr:$in), (SETHIi tglobaladdr:$in)>;
+def : Pat<(SPlo tglobaladdr:$in), (ORri G0, tglobaladdr:$in)>;
+def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>;
+def : Pat<(SPlo tconstpool:$in), (ORri G0, tconstpool:$in)>;
+
+// Add reg, lo. This is used when taking the addr of a global/constpool entry.
+def : Pat<(add IntRegs:$r, (SPlo tglobaladdr:$in)),
+ (ADDri IntRegs:$r, tglobaladdr:$in)>;
+def : Pat<(add IntRegs:$r, (SPlo tconstpool:$in)),
+ (ADDri IntRegs:$r, tconstpool:$in)>;
+
+// Calls:
+def : Pat<(call tglobaladdr:$dst),
+ (CALL tglobaladdr:$dst)>;
+def : Pat<(call texternalsym:$dst),
+ (CALL texternalsym:$dst)>;
+
+def : Pat<(ret), (RETL)>;
+
+// Map integer extload's to zextloads.
+def : Pat<(i32 (extloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>;
+def : Pat<(i32 (extloadi8 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi8 ADDRri:$src)), (LDUBri ADDRri:$src)>;
+def : Pat<(i32 (extloadi16 ADDRrr:$src)), (LDUHrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi16 ADDRri:$src)), (LDUHri ADDRri:$src)>;
+
+// zextload bool -> zextload byte
+def : Pat<(i32 (zextloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (zextloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>;
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
new file mode 100644
index 0000000..59efb19
--- /dev/null
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -0,0 +1,196 @@
+//===- SparcRegisterInfo.cpp - SPARC Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SPARC implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sparc.h"
+#include "SparcRegisterInfo.h"
+#include "SparcSubtarget.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st,
+ const TargetInstrInfo &tii)
+ : SparcGenRegisterInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP),
+ Subtarget(st), TII(tii) {
+}
+
+const unsigned* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ static const unsigned CalleeSavedRegs[] = { 0 };
+ return CalleeSavedRegs;
+}
+
+BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(SP::G2);
+ Reserved.set(SP::G3);
+ Reserved.set(SP::G4);
+ Reserved.set(SP::O6);
+ Reserved.set(SP::I6);
+ Reserved.set(SP::I7);
+ Reserved.set(SP::G0);
+ Reserved.set(SP::G5);
+ Reserved.set(SP::G6);
+ Reserved.set(SP::G7);
+ return Reserved;
+}
+
+
+const TargetRegisterClass* const*
+SparcRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
+ return CalleeSavedRegClasses;
+}
+
+bool SparcRegisterInfo::hasFP(const MachineFunction &MF) const {
+ return false;
+}
+
+void SparcRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ MachineInstr &MI = *I;
+ DebugLoc dl = MI.getDebugLoc();
+ int Size = MI.getOperand(0).getImm();
+ if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
+ Size = -Size;
+ if (Size)
+ BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size);
+ MBB.erase(I);
+}
+
+void SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ DebugLoc dl = MI.getDebugLoc();
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+
+ // Addressable stack objects are accessed using neg. offsets from %fp
+ MachineFunction &MF = *MI.getParent()->getParent();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+ MI.getOperand(i+1).getImm();
+
+ // Replace frame index with a frame pointer reference.
+ if (Offset >= -4096 && Offset <= 4095) {
+ // If the offset is small enough to fit in the immediate field, directly
+ // encode it.
+ MI.getOperand(i).ChangeToRegister(SP::I6, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+ } else {
+ // Otherwise, emit a G1 = SETHI %hi(offset). FIXME: it would be better to
+ // scavenge a register here instead of reserving G1 all of the time.
+ unsigned OffHi = (unsigned)Offset >> 10U;
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
+ // Emit G1 = G1 + I6
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
+ .addReg(SP::I6);
+ // Insert: G1+%lo(offset) into the user.
+ MI.getOperand(i).ChangeToRegister(SP::G1, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1));
+ }
+}
+
+void SparcRegisterInfo::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {}
+
+void SparcRegisterInfo::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc dl = (MBBI != MBB.end() ?
+ MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+
+ // Get the number of bytes to allocate from the FrameInfo
+ int NumBytes = (int) MFI->getStackSize();
+
+ // Emit the correct save instruction based on the number of bytes in
+ // the frame. Minimum stack frame size according to V8 ABI is:
+ // 16 words for register window spill
+ // 1 word for address of returned aggregate-value
+ // + 6 words for passing parameters on the stack
+ // ----------
+ // 23 words * 4 bytes per word = 92 bytes
+ NumBytes += 92;
+
+ // Round up to next doubleword boundary -- a double-word boundary
+ // is required by the ABI.
+ NumBytes = (NumBytes + 7) & ~7;
+ NumBytes = -NumBytes;
+
+ if (NumBytes >= -4096) {
+ BuildMI(MBB, MBBI, dl, TII.get(SP::SAVEri), SP::O6)
+ .addReg(SP::O6).addImm(NumBytes);
+ } else {
+ // Emit this the hard way. This clobbers G1 which we always know is
+ // available here.
+ unsigned OffHi = (unsigned)NumBytes >> 10U;
+ BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
+ // Emit G1 = G1 + I6
+ BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
+ .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
+ BuildMI(MBB, MBBI, dl, TII.get(SP::SAVErr), SP::O6)
+ .addReg(SP::O6).addReg(SP::G1);
+ }
+}
+
+void SparcRegisterInfo::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ DebugLoc dl = MBBI->getDebugLoc();
+ assert(MBBI->getOpcode() == SP::RETL &&
+ "Can only put epilog before 'retl' instruction!");
+ BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0)
+ .addReg(SP::G0);
+}
+
+unsigned SparcRegisterInfo::getRARegister() const {
+ assert(0 && "What is the return address register");
+ return 0;
+}
+
+unsigned SparcRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ assert(0 && "What is the frame register");
+ return SP::G1;
+}
+
+unsigned SparcRegisterInfo::getEHExceptionRegister() const {
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned SparcRegisterInfo::getEHHandlerRegister() const {
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
+
+int SparcRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ assert(0 && "What is the dwarf register number");
+ return -1;
+}
+
+#include "SparcGenRegisterInfo.inc"
+
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
new file mode 100644
index 0000000..fc863f3
--- /dev/null
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -0,0 +1,67 @@
+//===- SparcRegisterInfo.h - Sparc Register Information Impl ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCREGISTERINFO_H
+#define SPARCREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "SparcGenRegisterInfo.h.inc"
+
+namespace llvm {
+
+class SparcSubtarget;
+class TargetInstrInfo;
+class Type;
+
+struct SparcRegisterInfo : public SparcGenRegisterInfo {
+ SparcSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+
+ SparcRegisterInfo(SparcSubtarget &st, const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ const TargetRegisterClass* const* getCalleeSavedRegClasses(
+ const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td
new file mode 100644
index 0000000..e3a50ca
--- /dev/null
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -0,0 +1,158 @@
+//===- SparcRegisterInfo.td - Sparc Register defs ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the Sparc register file
+//===----------------------------------------------------------------------===//
+
+class SparcReg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "SP";
+}
+
+// Registers are identified with 5-bit ID numbers.
+// Ri - 32-bit integer registers
+class Ri<bits<5> num, string n> : SparcReg<n> {
+ let Num = num;
+}
+// Rf - 32-bit floating-point registers
+class Rf<bits<5> num, string n> : SparcReg<n> {
+ let Num = num;
+}
+// Rd - Slots in the FP register file for 64-bit floating-point values.
+class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
+ let Num = num;
+ let SubRegs = subregs;
+}
+
+// Integer registers
+def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>;
+def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>;
+def G2 : Ri< 2, "G2">, DwarfRegNum<[2]>;
+def G3 : Ri< 3, "G3">, DwarfRegNum<[3]>;
+def G4 : Ri< 4, "G4">, DwarfRegNum<[4]>;
+def G5 : Ri< 5, "G5">, DwarfRegNum<[5]>;
+def G6 : Ri< 6, "G6">, DwarfRegNum<[6]>;
+def G7 : Ri< 7, "G7">, DwarfRegNum<[7]>;
+def O0 : Ri< 8, "O0">, DwarfRegNum<[8]>;
+def O1 : Ri< 9, "O1">, DwarfRegNum<[9]>;
+def O2 : Ri<10, "O2">, DwarfRegNum<[10]>;
+def O3 : Ri<11, "O3">, DwarfRegNum<[11]>;
+def O4 : Ri<12, "O4">, DwarfRegNum<[12]>;
+def O5 : Ri<13, "O5">, DwarfRegNum<[13]>;
+def O6 : Ri<14, "O6">, DwarfRegNum<[14]>;
+def O7 : Ri<15, "O7">, DwarfRegNum<[15]>;
+def L0 : Ri<16, "L0">, DwarfRegNum<[16]>;
+def L1 : Ri<17, "L1">, DwarfRegNum<[17]>;
+def L2 : Ri<18, "L2">, DwarfRegNum<[18]>;
+def L3 : Ri<19, "L3">, DwarfRegNum<[19]>;
+def L4 : Ri<20, "L4">, DwarfRegNum<[20]>;
+def L5 : Ri<21, "L5">, DwarfRegNum<[21]>;
+def L6 : Ri<22, "L6">, DwarfRegNum<[22]>;
+def L7 : Ri<23, "L7">, DwarfRegNum<[23]>;
+def I0 : Ri<24, "I0">, DwarfRegNum<[24]>;
+def I1 : Ri<25, "I1">, DwarfRegNum<[25]>;
+def I2 : Ri<26, "I2">, DwarfRegNum<[26]>;
+def I3 : Ri<27, "I3">, DwarfRegNum<[27]>;
+def I4 : Ri<28, "I4">, DwarfRegNum<[28]>;
+def I5 : Ri<29, "I5">, DwarfRegNum<[29]>;
+def I6 : Ri<30, "I6">, DwarfRegNum<[30]>;
+def I7 : Ri<31, "I7">, DwarfRegNum<[31]>;
+
+// Floating-point registers
+def F0 : Rf< 0, "F0">, DwarfRegNum<[32]>;
+def F1 : Rf< 1, "F1">, DwarfRegNum<[33]>;
+def F2 : Rf< 2, "F2">, DwarfRegNum<[34]>;
+def F3 : Rf< 3, "F3">, DwarfRegNum<[35]>;
+def F4 : Rf< 4, "F4">, DwarfRegNum<[36]>;
+def F5 : Rf< 5, "F5">, DwarfRegNum<[37]>;
+def F6 : Rf< 6, "F6">, DwarfRegNum<[38]>;
+def F7 : Rf< 7, "F7">, DwarfRegNum<[39]>;
+def F8 : Rf< 8, "F8">, DwarfRegNum<[40]>;
+def F9 : Rf< 9, "F9">, DwarfRegNum<[41]>;
+def F10 : Rf<10, "F10">, DwarfRegNum<[42]>;
+def F11 : Rf<11, "F11">, DwarfRegNum<[43]>;
+def F12 : Rf<12, "F12">, DwarfRegNum<[44]>;
+def F13 : Rf<13, "F13">, DwarfRegNum<[45]>;
+def F14 : Rf<14, "F14">, DwarfRegNum<[46]>;
+def F15 : Rf<15, "F15">, DwarfRegNum<[47]>;
+def F16 : Rf<16, "F16">, DwarfRegNum<[48]>;
+def F17 : Rf<17, "F17">, DwarfRegNum<[49]>;
+def F18 : Rf<18, "F18">, DwarfRegNum<[50]>;
+def F19 : Rf<19, "F19">, DwarfRegNum<[51]>;
+def F20 : Rf<20, "F20">, DwarfRegNum<[52]>;
+def F21 : Rf<21, "F21">, DwarfRegNum<[53]>;
+def F22 : Rf<22, "F22">, DwarfRegNum<[54]>;
+def F23 : Rf<23, "F23">, DwarfRegNum<[55]>;
+def F24 : Rf<24, "F24">, DwarfRegNum<[56]>;
+def F25 : Rf<25, "F25">, DwarfRegNum<[57]>;
+def F26 : Rf<26, "F26">, DwarfRegNum<[58]>;
+def F27 : Rf<27, "F27">, DwarfRegNum<[59]>;
+def F28 : Rf<28, "F28">, DwarfRegNum<[60]>;
+def F29 : Rf<29, "F29">, DwarfRegNum<[61]>;
+def F30 : Rf<30, "F30">, DwarfRegNum<[62]>;
+def F31 : Rf<31, "F31">, DwarfRegNum<[63]>;
+
+// Aliases of the F* registers used to hold 64-bit fp values (doubles)
+def D0 : Rd< 0, "F0", [F0, F1]>, DwarfRegNum<[32]>;
+def D1 : Rd< 2, "F2", [F2, F3]>, DwarfRegNum<[34]>;
+def D2 : Rd< 4, "F4", [F4, F5]>, DwarfRegNum<[36]>;
+def D3 : Rd< 6, "F6", [F6, F7]>, DwarfRegNum<[38]>;
+def D4 : Rd< 8, "F8", [F8, F9]>, DwarfRegNum<[40]>;
+def D5 : Rd<10, "F10", [F10, F11]>, DwarfRegNum<[42]>;
+def D6 : Rd<12, "F12", [F12, F13]>, DwarfRegNum<[44]>;
+def D7 : Rd<14, "F14", [F14, F15]>, DwarfRegNum<[46]>;
+def D8 : Rd<16, "F16", [F16, F17]>, DwarfRegNum<[48]>;
+def D9 : Rd<18, "F18", [F18, F19]>, DwarfRegNum<[50]>;
+def D10 : Rd<20, "F20", [F20, F21]>, DwarfRegNum<[52]>;
+def D11 : Rd<22, "F22", [F22, F23]>, DwarfRegNum<[54]>;
+def D12 : Rd<24, "F24", [F24, F25]>, DwarfRegNum<[56]>;
+def D13 : Rd<26, "F26", [F26, F27]>, DwarfRegNum<[58]>;
+def D14 : Rd<28, "F28", [F28, F29]>, DwarfRegNum<[60]>;
+def D15 : Rd<30, "F30", [F30, F31]>, DwarfRegNum<[62]>;
+
+// Register classes.
+//
+// FIXME: the register order should be defined in terms of the preferred
+// allocation order...
+//
+def IntRegs : RegisterClass<"SP", [i32], 32, [L0, L1, L2, L3, L4, L5, L6, L7,
+ I0, I1, I2, I3, I4, I5,
+ O0, O1, O2, O3, O4, O5, O7,
+
+ // FIXME: G1 reserved for now for large imm generation by frame code.
+ G1,
+ // Non-allocatable regs:
+ G2, G3, G4, // FIXME: OK for use only in
+ // applications, not libraries.
+ O6, // stack ptr
+ I6, // frame ptr
+ I7, // return address
+ G0, // constant zero
+ G5, G6, G7 // reserved for kernel
+ ]> {
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ IntRegsClass::iterator
+ IntRegsClass::allocation_order_end(const MachineFunction &MF) const {
+ // FIXME: These special regs should be taken out of the regclass!
+ return end()-10 // Don't allocate special registers
+ -1; // FIXME: G1 reserved for large imm generation by frame code.
+ }
+ }];
+}
+
+def FPRegs : RegisterClass<"SP", [f32], 32, [F0, F1, F2, F3, F4, F5, F6, F7, F8,
+ F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22,
+ F23, F24, F25, F26, F27, F28, F29, F30, F31]>;
+
+def DFPRegs : RegisterClass<"SP", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7,
+ D8, D9, D10, D11, D12, D13, D14, D15]>;
diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp
new file mode 100644
index 0000000..aaddbff
--- /dev/null
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@@ -0,0 +1,43 @@
+//===- SparcSubtarget.cpp - SPARC Subtarget Information -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPARC specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcSubtarget.h"
+#include "SparcGenSubtarget.inc"
+using namespace llvm;
+
+// FIXME: temporary.
+#include "llvm/Support/CommandLine.h"
+namespace {
+ cl::opt<bool> EnableV9("enable-sparc-v9-insts", cl::Hidden,
+ cl::desc("Enable V9 instructions in the V8 target"));
+}
+
+SparcSubtarget::SparcSubtarget(const Module &M, const std::string &FS) {
+ // Set the default features.
+ IsV9 = false;
+ V8DeprecatedInsts = false;
+ IsVIS = false;
+
+ // Determine default and user specified characteristics
+ std::string CPU = "generic";
+
+ // FIXME: autodetect host here!
+ CPU = "v9"; // What is a good way to detect V9?
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, CPU);
+
+ // Unless explicitly enabled, disable the V9 instructions.
+ if (!EnableV9)
+ IsV9 = false;
+}
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
new file mode 100644
index 0000000..e5a5ba4
--- /dev/null
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -0,0 +1,43 @@
+//=====-- SparcSubtarget.h - Define Subtarget for the SPARC ----*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SPARC specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_SUBTARGET_H
+#define SPARC_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include <string>
+
+namespace llvm {
+ class Module;
+
+class SparcSubtarget : public TargetSubtarget {
+ bool IsV9;
+ bool V8DeprecatedInsts;
+ bool IsVIS;
+public:
+ SparcSubtarget(const Module &M, const std::string &FS);
+
+ bool isV9() const { return IsV9; }
+ bool isVIS() const { return IsVIS; }
+ bool useDeprecatedV8Instructions() const { return V8DeprecatedInsts; }
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Sparc/SparcTargetAsmInfo.cpp b/lib/Target/Sparc/SparcTargetAsmInfo.cpp
new file mode 100644
index 0000000..c13d45c
--- /dev/null
+++ b/lib/Target/Sparc/SparcTargetAsmInfo.cpp
@@ -0,0 +1,50 @@
+//===-- SparcTargetAsmInfo.cpp - Sparc asm properties -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the SparcTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcTargetAsmInfo.h"
+
+using namespace llvm;
+
+SparcELFTargetAsmInfo::SparcELFTargetAsmInfo(const TargetMachine &TM):
+ ELFTargetAsmInfo(TM) {
+ Data16bitsDirective = "\t.half\t";
+ Data32bitsDirective = "\t.word\t";
+ Data64bitsDirective = 0; // .xword is only supported by V9.
+ ZeroDirective = "\t.skip\t";
+ CommentString = "!";
+ ConstantPoolSection = "\t.section \".rodata\",#alloc\n";
+ COMMDirectiveTakesAlignment = true;
+ CStringSection=".rodata.str";
+
+ // Sparc normally uses named section for BSS.
+ BSSSection_ = getNamedSection("\t.bss",
+ SectionFlags::Writeable | SectionFlags::BSS,
+ /* Override */ true);
+}
+
+std::string SparcELFTargetAsmInfo::printSectionFlags(unsigned flags) const {
+ if (flags & SectionFlags::Mergeable)
+ return ELFTargetAsmInfo::printSectionFlags(flags);
+
+ std::string Flags;
+ if (!(flags & SectionFlags::Debug))
+ Flags += ",#alloc";
+ if (flags & SectionFlags::Code)
+ Flags += ",#execinstr";
+ if (flags & SectionFlags::Writeable)
+ Flags += ",#write";
+ if (flags & SectionFlags::TLS)
+ Flags += ",#tls";
+
+ return Flags;
+}
diff --git a/lib/Target/Sparc/SparcTargetAsmInfo.h b/lib/Target/Sparc/SparcTargetAsmInfo.h
new file mode 100644
index 0000000..1af5d80
--- /dev/null
+++ b/lib/Target/Sparc/SparcTargetAsmInfo.h
@@ -0,0 +1,33 @@
+//=====-- SparcTargetAsmInfo.h - Sparc asm properties ---------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SparcTargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCTARGETASMINFO_H
+#define SPARCTARGETASMINFO_H
+
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/ELFTargetAsmInfo.h"
+
+namespace llvm {
+
+ // Forward declaration.
+ class TargetMachine;
+
+ struct SparcELFTargetAsmInfo : public ELFTargetAsmInfo {
+ explicit SparcELFTargetAsmInfo(const TargetMachine &TM);
+
+ std::string printSectionFlags(unsigned flags) const;
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
new file mode 100644
index 0000000..eda0309
--- /dev/null
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -0,0 +1,94 @@
+//===-- SparcTargetMachine.cpp - Define TargetMachine for Sparc -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcTargetAsmInfo.h"
+#include "SparcTargetMachine.h"
+#include "Sparc.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+using namespace llvm;
+
+/// SparcTargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int SparcTargetMachineModule;
+int SparcTargetMachineModule = 0;
+
+// Register the target.
+static RegisterTarget<SparcTargetMachine> X("sparc", "SPARC");
+
+const TargetAsmInfo *SparcTargetMachine::createTargetAsmInfo() const {
+ // FIXME: Handle Solaris subtarget someday :)
+ return new SparcELFTargetAsmInfo(*this);
+}
+
+/// SparcTargetMachine ctor - Create an ILP32 architecture model
+///
+SparcTargetMachine::SparcTargetMachine(const Module &M, const std::string &FS)
+ : DataLayout("E-p:32:32-f128:128:128"),
+ Subtarget(M, FS), TLInfo(*this), InstrInfo(Subtarget),
+ FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
+}
+
+unsigned SparcTargetMachine::getModuleMatchQuality(const Module &M) {
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 6 && std::string(TT.begin(), TT.begin()+6) == "sparc-")
+ return 20;
+
+ // If the target triple is something non-sparc, we don't match.
+ if (!TT.empty()) return 0;
+
+ if (M.getEndianness() == Module::BigEndian &&
+ M.getPointerSize() == Module::Pointer32)
+#ifdef __sparc__
+ return 20; // BE/32 ==> Prefer sparc on sparc
+#else
+ return 5; // BE/32 ==> Prefer ppc elsewhere
+#endif
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+#if defined(__sparc__)
+ return 10;
+#else
+ return 0;
+#endif
+}
+
+bool SparcTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createSparcISelDag(*this));
+ return false;
+}
+
+/// addPreEmitPass - This pass may be implemented by targets that want to run
+/// passes immediately before machine code is emitted. This should return
+/// true if -print-machineinstrs should print out the code after the passes.
+bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel){
+ PM.add(createSparcFPMoverPass(*this));
+ PM.add(createSparcDelaySlotFillerPass(*this));
+ return true;
+}
+
+bool SparcTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose,
+ raw_ostream &Out) {
+ // Output assembly language.
+ PM.add(createSparcCodePrinterPass(Out, *this, OptLevel, Verbose));
+ return false;
+}
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
new file mode 100644
index 0000000..40b44f2
--- /dev/null
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -0,0 +1,63 @@
+//===-- SparcTargetMachine.h - Define TargetMachine for Sparc ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Sparc specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCTARGETMACHINE_H
+#define SPARCTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "SparcInstrInfo.h"
+#include "SparcSubtarget.h"
+#include "SparcISelLowering.h"
+
+namespace llvm {
+
+class Module;
+
+class SparcTargetMachine : public LLVMTargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+ SparcSubtarget Subtarget;
+ SparcTargetLowering TLInfo;
+ SparcInstrInfo InstrInfo;
+ TargetFrameInfo FrameInfo;
+
+protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+public:
+ SparcTargetMachine(const Module &M, const std::string &FS);
+
+ virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual const SparcSubtarget *getSubtargetImpl() const{ return &Subtarget; }
+ virtual const SparcRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual SparcTargetLowering* getTargetLowering() const {
+ return const_cast<SparcTargetLowering*>(&TLInfo);
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ static unsigned getModuleMatchQuality(const Module &M);
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp
new file mode 100644
index 0000000..f937025
--- /dev/null
+++ b/lib/Target/SubtargetFeature.cpp
@@ -0,0 +1,364 @@
+//===- SubtargetFeature.cpp - CPU characteristics Implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SubtargetFeature interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/SubtargetFeature.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Streams.h"
+#include <algorithm>
+#include <ostream>
+#include <cassert>
+#include <cctype>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Static Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// hasFlag - Determine if a feature has a flag; '+' or '-'
+///
+static inline bool hasFlag(const std::string &Feature) {
+ assert(!Feature.empty() && "Empty string");
+ // Get first character
+ char Ch = Feature[0];
+ // Check if first character is '+' or '-' flag
+ return Ch == '+' || Ch =='-';
+}
+
+/// StripFlag - Return string stripped of flag.
+///
+static inline std::string StripFlag(const std::string &Feature) {
+ return hasFlag(Feature) ? Feature.substr(1) : Feature;
+}
+
+/// isEnabled - Return true if enable flag; '+'.
+///
+static inline bool isEnabled(const std::string &Feature) {
+ assert(!Feature.empty() && "Empty string");
+ // Get first character
+ char Ch = Feature[0];
+ // Check if first character is '+' for enabled
+ return Ch == '+';
+}
+
+/// PrependFlag - Return a string with a prepended flag; '+' or '-'.
+///
+static inline std::string PrependFlag(const std::string &Feature,
+ bool IsEnabled) {
+ assert(!Feature.empty() && "Empty string");
+ if (hasFlag(Feature)) return Feature;
+ return std::string(IsEnabled ? "+" : "-") + Feature;
+}
+
+/// Split - Splits a string of comma separated items in to a vector of strings.
+///
+static void Split(std::vector<std::string> &V, const std::string &S) {
+ // Start at beginning of string.
+ size_t Pos = 0;
+ while (true) {
+ // Find the next comma
+ size_t Comma = S.find(',', Pos);
+ // If no comma found then the the rest of the string is used
+ if (Comma == std::string::npos) {
+ // Add string to vector
+ V.push_back(S.substr(Pos));
+ break;
+ }
+ // Otherwise add substring to vector
+ V.push_back(S.substr(Pos, Comma - Pos));
+ // Advance to next item
+ Pos = Comma + 1;
+ }
+}
+
+/// Join a vector of strings to a string with a comma separating each element.
+///
+static std::string Join(const std::vector<std::string> &V) {
+ // Start with empty string.
+ std::string Result;
+ // If the vector is not empty
+ if (!V.empty()) {
+ // Start with the CPU feature
+ Result = V[0];
+ // For each successive feature
+ for (size_t i = 1; i < V.size(); i++) {
+ // Add a comma
+ Result += ",";
+ // Add the feature
+ Result += V[i];
+ }
+ }
+ // Return the features string
+ return Result;
+}
+
+/// Adding features.
+void SubtargetFeatures::AddFeature(const std::string &String,
+ bool IsEnabled) {
+ // Don't add empty features
+ if (!String.empty()) {
+ // Convert to lowercase, prepend flag and add to vector
+ Features.push_back(PrependFlag(LowercaseString(String), IsEnabled));
+ }
+}
+
+/// Find KV in array using binary search.
+template<typename T> const T *Find(const std::string &S, const T *A, size_t L) {
+ // Make the lower bound element we're looking for
+ T KV;
+ KV.Key = S.c_str();
+ // Determine the end of the array
+ const T *Hi = A + L;
+ // Binary search the array
+ const T *F = std::lower_bound(A, Hi, KV);
+ // If not found then return NULL
+ if (F == Hi || std::string(F->Key) != S) return NULL;
+ // Return the found array item
+ return F;
+}
+
+/// getLongestEntryLength - Return the length of the longest entry in the table.
+///
+static size_t getLongestEntryLength(const SubtargetFeatureKV *Table,
+ size_t Size) {
+ size_t MaxLen = 0;
+ for (size_t i = 0; i < Size; i++)
+ MaxLen = std::max(MaxLen, std::strlen(Table[i].Key));
+ return MaxLen;
+}
+
+/// Display help for feature choices.
+///
+static void Help(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize,
+ const SubtargetFeatureKV *FeatTable, size_t FeatTableSize) {
+ // Determine the length of the longest CPU and Feature entries.
+ unsigned MaxCPULen = getLongestEntryLength(CPUTable, CPUTableSize);
+ unsigned MaxFeatLen = getLongestEntryLength(FeatTable, FeatTableSize);
+
+ // Print the CPU table.
+ cerr << "Available CPUs for this target:\n\n";
+ for (size_t i = 0; i != CPUTableSize; i++)
+ cerr << " " << CPUTable[i].Key
+ << std::string(MaxCPULen - std::strlen(CPUTable[i].Key), ' ')
+ << " - " << CPUTable[i].Desc << ".\n";
+ cerr << "\n";
+
+ // Print the Feature table.
+ cerr << "Available features for this target:\n\n";
+ for (size_t i = 0; i != FeatTableSize; i++)
+ cerr << " " << FeatTable[i].Key
+ << std::string(MaxFeatLen - std::strlen(FeatTable[i].Key), ' ')
+ << " - " << FeatTable[i].Desc << ".\n";
+ cerr << "\n";
+
+ cerr << "Use +feature to enable a feature, or -feature to disable it.\n"
+ << "For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n";
+ exit(1);
+}
+
+//===----------------------------------------------------------------------===//
+// SubtargetFeatures Implementation
+//===----------------------------------------------------------------------===//
+
+SubtargetFeatures::SubtargetFeatures(const std::string &Initial) {
+ // Break up string into separate features
+ Split(Features, Initial);
+}
+
+
+std::string SubtargetFeatures::getString() const {
+ return Join(Features);
+}
+void SubtargetFeatures::setString(const std::string &Initial) {
+ // Throw out old features
+ Features.clear();
+ // Break up string into separate features
+ Split(Features, LowercaseString(Initial));
+}
+
+
+/// setCPU - Set the CPU string. Replaces previous setting. Setting to ""
+/// clears CPU.
+void SubtargetFeatures::setCPU(const std::string &String) {
+ Features[0] = LowercaseString(String);
+}
+
+
+/// setCPUIfNone - Setting CPU string only if no string is set.
+///
+void SubtargetFeatures::setCPUIfNone(const std::string &String) {
+ if (Features[0].empty()) setCPU(String);
+}
+
+/// getCPU - Returns current CPU.
+///
+const std::string & SubtargetFeatures::getCPU() const {
+ return Features[0];
+}
+
+
+/// SetImpliedBits - For each feature that is (transitively) implied by this
+/// feature, set it.
+///
+static
+void SetImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry,
+ const SubtargetFeatureKV *FeatureTable,
+ size_t FeatureTableSize) {
+ for (size_t i = 0; i < FeatureTableSize; ++i) {
+ const SubtargetFeatureKV &FE = FeatureTable[i];
+
+ if (FeatureEntry->Value == FE.Value) continue;
+
+ if (FeatureEntry->Implies & FE.Value) {
+ Bits |= FE.Value;
+ SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+ }
+ }
+}
+
+/// ClearImpliedBits - For each feature that (transitively) implies this
+/// feature, clear it.
+///
+static
+void ClearImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry,
+ const SubtargetFeatureKV *FeatureTable,
+ size_t FeatureTableSize) {
+ for (size_t i = 0; i < FeatureTableSize; ++i) {
+ const SubtargetFeatureKV &FE = FeatureTable[i];
+
+ if (FeatureEntry->Value == FE.Value) continue;
+
+ if (FE.Implies & FeatureEntry->Value) {
+ Bits &= ~FE.Value;
+ ClearImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+ }
+ }
+}
+
+/// getBits - Get feature bits.
+///
+uint32_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable,
+ size_t CPUTableSize,
+ const SubtargetFeatureKV *FeatureTable,
+ size_t FeatureTableSize) {
+ assert(CPUTable && "missing CPU table");
+ assert(FeatureTable && "missing features table");
+#ifndef NDEBUG
+ for (size_t i = 1; i < CPUTableSize; i++) {
+ assert(strcmp(CPUTable[i - 1].Key, CPUTable[i].Key) < 0 &&
+ "CPU table is not sorted");
+ }
+ for (size_t i = 1; i < FeatureTableSize; i++) {
+ assert(strcmp(FeatureTable[i - 1].Key, FeatureTable[i].Key) < 0 &&
+ "CPU features table is not sorted");
+ }
+#endif
+ uint32_t Bits = 0; // Resulting bits
+
+ // Check if help is needed
+ if (Features[0] == "help")
+ Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize);
+
+ // Find CPU entry
+ const SubtargetFeatureKV *CPUEntry =
+ Find(Features[0], CPUTable, CPUTableSize);
+ // If there is a match
+ if (CPUEntry) {
+ // Set base feature bits
+ Bits = CPUEntry->Value;
+
+ // Set the feature implied by this CPU feature, if any.
+ for (size_t i = 0; i < FeatureTableSize; ++i) {
+ const SubtargetFeatureKV &FE = FeatureTable[i];
+ if (CPUEntry->Value & FE.Value)
+ SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+ }
+ } else {
+ cerr << "'" << Features[0]
+ << "' is not a recognized processor for this target"
+ << " (ignoring processor)"
+ << "\n";
+ }
+ // Iterate through each feature
+ for (size_t i = 1; i < Features.size(); i++) {
+ const std::string &Feature = Features[i];
+
+ // Check for help
+ if (Feature == "+help")
+ Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize);
+
+ // Find feature in table.
+ const SubtargetFeatureKV *FeatureEntry =
+ Find(StripFlag(Feature), FeatureTable, FeatureTableSize);
+ // If there is a match
+ if (FeatureEntry) {
+ // Enable/disable feature in bits
+ if (isEnabled(Feature)) {
+ Bits |= FeatureEntry->Value;
+
+ // For each feature that this implies, set it.
+ SetImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+ } else {
+ Bits &= ~FeatureEntry->Value;
+
+ // For each feature that implies this, clear it.
+ ClearImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+ }
+ } else {
+ cerr << "'" << Feature
+ << "' is not a recognized feature for this target"
+ << " (ignoring feature)"
+ << "\n";
+ }
+ }
+
+ return Bits;
+}
+
+/// Get info pointer
+void *SubtargetFeatures::getInfo(const SubtargetInfoKV *Table,
+ size_t TableSize) {
+ assert(Table && "missing table");
+#ifndef NDEBUG
+ for (size_t i = 1; i < TableSize; i++) {
+ assert(strcmp(Table[i - 1].Key, Table[i].Key) < 0 && "Table is not sorted");
+ }
+#endif
+
+ // Find entry
+ const SubtargetInfoKV *Entry = Find(Features[0], Table, TableSize);
+
+ if (Entry) {
+ return Entry->Value;
+ } else {
+ cerr << "'" << Features[0]
+ << "' is not a recognized processor for this target"
+ << " (ignoring processor)"
+ << "\n";
+ return NULL;
+ }
+}
+
+/// print - Print feature string.
+///
+void SubtargetFeatures::print(std::ostream &OS) const {
+ for (size_t i = 0; i < Features.size(); i++) {
+ OS << Features[i] << " ";
+ }
+ OS << "\n";
+}
+
+/// dump - Dump feature info.
+///
+void SubtargetFeatures::dump() const {
+ print(*cerr.stream());
+}
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
new file mode 100644
index 0000000..ed544b7
--- /dev/null
+++ b/lib/Target/Target.cpp
@@ -0,0 +1,94 @@
+//===-- Target.cpp --------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the C bindings for libLLVMTarget.a, which implements
+// target information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Target.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetData.h"
+#include <cstring>
+
+using namespace llvm;
+
+LLVMTargetDataRef LLVMCreateTargetData(const char *StringRep) {
+ return wrap(new TargetData(StringRep));
+}
+
+void LLVMAddTargetData(LLVMTargetDataRef TD, LLVMPassManagerRef PM) {
+ unwrap(PM)->add(new TargetData(*unwrap(TD)));
+}
+
+char *LLVMCopyStringRepOfTargetData(LLVMTargetDataRef TD) {
+ std::string StringRep = unwrap(TD)->getStringRepresentation();
+ return strdup(StringRep.c_str());
+}
+
+LLVMByteOrdering LLVMByteOrder(LLVMTargetDataRef TD) {
+ return unwrap(TD)->isLittleEndian();
+}
+
+unsigned LLVMPointerSize(LLVMTargetDataRef TD) {
+ return unwrap(TD)->getPointerSize();
+}
+
+LLVMTypeRef LLVMIntPtrType(LLVMTargetDataRef TD) {
+ return wrap(unwrap(TD)->getIntPtrType());
+}
+
+unsigned long long LLVMSizeOfTypeInBits(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getTypeSizeInBits(unwrap(Ty));
+}
+
+unsigned long long LLVMStoreSizeOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getTypeStoreSize(unwrap(Ty));
+}
+
+unsigned long long LLVMABISizeOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getTypeAllocSize(unwrap(Ty));
+}
+
+unsigned LLVMABIAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getABITypeAlignment(unwrap(Ty));
+}
+
+unsigned LLVMCallFrameAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getCallFrameTypeAlignment(unwrap(Ty));
+}
+
+unsigned LLVMPreferredAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getPrefTypeAlignment(unwrap(Ty));
+}
+
+unsigned LLVMPreferredAlignmentOfGlobal(LLVMTargetDataRef TD,
+ LLVMValueRef GlobalVar) {
+ return unwrap(TD)->getPreferredAlignment(unwrap<GlobalVariable>(GlobalVar));
+}
+
+unsigned LLVMElementAtOffset(LLVMTargetDataRef TD, LLVMTypeRef StructTy,
+ unsigned long long Offset) {
+ const StructType *STy = unwrap<StructType>(StructTy);
+ return unwrap(TD)->getStructLayout(STy)->getElementContainingOffset(Offset);
+}
+
+unsigned long long LLVMOffsetOfElement(LLVMTargetDataRef TD, LLVMTypeRef StructTy,
+ unsigned Element) {
+ const StructType *STy = unwrap<StructType>(StructTy);
+ return unwrap(TD)->getStructLayout(STy)->getElementOffset(Element);
+}
+
+void LLVMInvalidateStructLayout(LLVMTargetDataRef TD, LLVMTypeRef StructTy) {
+ unwrap(TD)->InvalidateStructLayoutInfo(unwrap<StructType>(StructTy));
+}
+
+void LLVMDisposeTargetData(LLVMTargetDataRef TD) {
+ delete unwrap(TD);
+}
diff --git a/lib/Target/TargetAsmInfo.cpp b/lib/Target/TargetAsmInfo.cpp
new file mode 100644
index 0000000..6a2de6f
--- /dev/null
+++ b/lib/Target/TargetAsmInfo.cpp
@@ -0,0 +1,461 @@
+//===-- TargetAsmInfo.cpp - Asm Info ---------------------------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Dwarf.h"
+#include <cctype>
+#include <cstring>
+
+using namespace llvm;
+
+void TargetAsmInfo::fillDefaultValues() {
+ BSSSection = "\t.bss";
+ BSSSection_ = 0;
+ ReadOnlySection = 0;
+ SmallDataSection = 0;
+ SmallBSSSection = 0;
+ SmallRODataSection = 0;
+ TLSDataSection = 0;
+ TLSBSSSection = 0;
+ ZeroFillDirective = 0;
+ NonexecutableStackDirective = 0;
+ NeedsSet = false;
+ MaxInstLength = 4;
+ PCSymbol = "$";
+ SeparatorChar = ';';
+ CommentString = "#";
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = ".";
+ LessPrivateGlobalPrefix = "";
+ JumpTableSpecialLabelPrefix = 0;
+ GlobalVarAddrPrefix = "";
+ GlobalVarAddrSuffix = "";
+ FunctionAddrPrefix = "";
+ FunctionAddrSuffix = "";
+ PersonalityPrefix = "";
+ PersonalitySuffix = "";
+ NeedsIndirectEncoding = false;
+ InlineAsmStart = "#APP";
+ InlineAsmEnd = "#NO_APP";
+ AssemblerDialect = 0;
+ StringConstantPrefix = ".str";
+ ZeroDirective = "\t.zero\t";
+ ZeroDirectiveSuffix = 0;
+ AsciiDirective = "\t.ascii\t";
+ AscizDirective = "\t.asciz\t";
+ Data8bitsDirective = "\t.byte\t";
+ Data16bitsDirective = "\t.short\t";
+ Data32bitsDirective = "\t.long\t";
+ Data64bitsDirective = "\t.quad\t";
+ AlignDirective = "\t.align\t";
+ AlignmentIsInBytes = true;
+ TextAlignFillValue = 0;
+ SwitchToSectionDirective = "\t.section\t";
+ TextSectionStartSuffix = "";
+ DataSectionStartSuffix = "";
+ SectionEndDirectiveSuffix = 0;
+ ConstantPoolSection = "\t.section .rodata";
+ JumpTableDataSection = "\t.section .rodata";
+ JumpTableDirective = 0;
+ CStringSection = 0;
+ CStringSection_ = 0;
+ // FIXME: Flags are ELFish - replace with normal section stuff.
+ StaticCtorsSection = "\t.section .ctors,\"aw\",@progbits";
+ StaticDtorsSection = "\t.section .dtors,\"aw\",@progbits";
+ GlobalDirective = "\t.globl\t";
+ SetDirective = 0;
+ LCOMMDirective = 0;
+ COMMDirective = "\t.comm\t";
+ COMMDirectiveTakesAlignment = true;
+ HasDotTypeDotSizeDirective = true;
+ HasSingleParameterDotFile = true;
+ UsedDirective = 0;
+ WeakRefDirective = 0;
+ WeakDefDirective = 0;
+ // FIXME: These are ELFish - move to ELFTAI.
+ HiddenDirective = "\t.hidden\t";
+ ProtectedDirective = "\t.protected\t";
+ AbsoluteDebugSectionOffsets = false;
+ AbsoluteEHSectionOffsets = false;
+ HasLEB128 = false;
+ HasDotLocAndDotFile = false;
+ SupportsDebugInformation = false;
+ SupportsExceptionHandling = false;
+ DwarfRequiresFrameSection = true;
+ DwarfUsesInlineInfoSection = false;
+ SupportsMacInfoSection = true;
+ NonLocalEHFrameLabel = false;
+ GlobalEHDirective = 0;
+ SupportsWeakOmittedEHFrame = true;
+ DwarfSectionOffsetDirective = 0;
+ DwarfAbbrevSection = ".debug_abbrev";
+ DwarfInfoSection = ".debug_info";
+ DwarfLineSection = ".debug_line";
+ DwarfFrameSection = ".debug_frame";
+ DwarfPubNamesSection = ".debug_pubnames";
+ DwarfPubTypesSection = ".debug_pubtypes";
+ DwarfDebugInlineSection = ".debug_inlined";
+ DwarfStrSection = ".debug_str";
+ DwarfLocSection = ".debug_loc";
+ DwarfARangesSection = ".debug_aranges";
+ DwarfRangesSection = ".debug_ranges";
+ DwarfMacInfoSection = ".debug_macinfo";
+ DwarfEHFrameSection = ".eh_frame";
+ DwarfExceptionSection = ".gcc_except_table";
+ AsmTransCBE = 0;
+ TextSection = getUnnamedSection("\t.text", SectionFlags::Code);
+ DataSection = getUnnamedSection("\t.data", SectionFlags::Writeable);
+}
+
+TargetAsmInfo::TargetAsmInfo(const TargetMachine &tm)
+ : TM(tm) {
+ fillDefaultValues();
+}
+
+TargetAsmInfo::~TargetAsmInfo() {
+}
+
+/// Measure the specified inline asm to determine an approximation of its
+/// length.
+/// Comments (which run till the next SeparatorChar or newline) do not
+/// count as an instruction.
+/// Any other non-whitespace text is considered an instruction, with
+/// multiple instructions separated by SeparatorChar or newlines.
+/// Variable-length instructions are not handled here; this function
+/// may be overloaded in the target code to do that.
+unsigned TargetAsmInfo::getInlineAsmLength(const char *Str) const {
+ // Count the number of instructions in the asm.
+ bool atInsnStart = true;
+ unsigned Length = 0;
+ for (; *Str; ++Str) {
+ if (*Str == '\n' || *Str == SeparatorChar)
+ atInsnStart = true;
+ if (atInsnStart && !isspace(*Str)) {
+ Length += MaxInstLength;
+ atInsnStart = false;
+ }
+ if (atInsnStart && strncmp(Str, CommentString, strlen(CommentString))==0)
+ atInsnStart = false;
+ }
+
+ return Length;
+}
+
+unsigned TargetAsmInfo::PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const {
+ return dwarf::DW_EH_PE_absptr;
+}
+
+static bool isSuitableForBSS(const GlobalVariable *GV) {
+ if (!GV->hasInitializer())
+ return true;
+
+ // Leave constant zeros in readonly constant sections, so they can be shared
+ Constant *C = GV->getInitializer();
+ return (C->isNullValue() && !GV->isConstant() && !NoZerosInBSS);
+}
+
+static bool isConstantString(const Constant *C) {
+ // First check: is we have constant array of i8 terminated with zero
+ const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
+ // Check, if initializer is a null-terminated string
+ if (CVA && CVA->isCString())
+ return true;
+
+ // Another possibility: [1 x i8] zeroinitializer
+ if (isa<ConstantAggregateZero>(C)) {
+ if (const ArrayType *Ty = dyn_cast<ArrayType>(C->getType())) {
+ return (Ty->getElementType() == Type::Int8Ty &&
+ Ty->getNumElements() == 1);
+ }
+ }
+
+ return false;
+}
+
+unsigned TargetAsmInfo::RelocBehaviour() const {
+ // By default - all relocations in PIC mode would force symbol to be
+ // placed in r/w section.
+ return (TM.getRelocationModel() != Reloc::Static ?
+ Reloc::LocalOrGlobal : Reloc::None);
+}
+
+SectionKind::Kind
+TargetAsmInfo::SectionKindForGlobal(const GlobalValue *GV) const {
+ // Early exit - functions should be always in text sections.
+ if (isa<Function>(GV))
+ return SectionKind::Text;
+
+ const GlobalVariable* GVar = dyn_cast<GlobalVariable>(GV);
+ bool isThreadLocal = GVar->isThreadLocal();
+ assert(GVar && "Invalid global value for section selection");
+
+ if (isSuitableForBSS(GVar)) {
+ // Variable can be easily put to BSS section.
+ return (isThreadLocal ? SectionKind::ThreadBSS : SectionKind::BSS);
+ } else if (GVar->isConstant() && !isThreadLocal) {
+ // Now we know, that varible has initializer and it is constant. We need to
+ // check its initializer to decide, which section to output it into. Also
+ // note, there is no thread-local r/o section.
+ Constant *C = GVar->getInitializer();
+ if (C->ContainsRelocations(Reloc::LocalOrGlobal)) {
+ // Decide, whether it is still possible to put symbol into r/o section.
+ unsigned Reloc = RelocBehaviour();
+
+ // We already did a query for 'all' relocs, thus - early exits.
+ if (Reloc == Reloc::LocalOrGlobal)
+ return SectionKind::Data;
+ else if (Reloc == Reloc::None)
+ return SectionKind::ROData;
+ else {
+ // Ok, target wants something funny. Honour it.
+ return (C->ContainsRelocations(Reloc) ?
+ SectionKind::Data : SectionKind::ROData);
+ }
+ } else {
+ // Check, if initializer is a null-terminated string
+ if (isConstantString(C))
+ return SectionKind::RODataMergeStr;
+ else
+ return SectionKind::RODataMergeConst;
+ }
+ }
+
+ // Variable either is not constant or thread-local - output to data section.
+ return (isThreadLocal ? SectionKind::ThreadData : SectionKind::Data);
+}
+
+unsigned
+TargetAsmInfo::SectionFlagsForGlobal(const GlobalValue *GV,
+ const char* Name) const {
+ unsigned Flags = SectionFlags::None;
+
+ // Decode flags from global itself.
+ if (GV) {
+ SectionKind::Kind Kind = SectionKindForGlobal(GV);
+ switch (Kind) {
+ case SectionKind::Text:
+ Flags |= SectionFlags::Code;
+ break;
+ case SectionKind::ThreadData:
+ case SectionKind::ThreadBSS:
+ Flags |= SectionFlags::TLS;
+ // FALLS THROUGH
+ case SectionKind::Data:
+ case SectionKind::DataRel:
+ case SectionKind::DataRelLocal:
+ case SectionKind::DataRelRO:
+ case SectionKind::DataRelROLocal:
+ case SectionKind::BSS:
+ Flags |= SectionFlags::Writeable;
+ break;
+ case SectionKind::ROData:
+ case SectionKind::RODataMergeStr:
+ case SectionKind::RODataMergeConst:
+ // No additional flags here
+ break;
+ case SectionKind::SmallData:
+ case SectionKind::SmallBSS:
+ Flags |= SectionFlags::Writeable;
+ // FALLS THROUGH
+ case SectionKind::SmallROData:
+ Flags |= SectionFlags::Small;
+ break;
+ default:
+ assert(0 && "Unexpected section kind!");
+ }
+
+ if (GV->isWeakForLinker())
+ Flags |= SectionFlags::Linkonce;
+ }
+
+ // Add flags from sections, if any.
+ if (Name && *Name) {
+ Flags |= SectionFlags::Named;
+
+ // Some lame default implementation based on some magic section names.
+ if (strncmp(Name, ".gnu.linkonce.b.", 16) == 0 ||
+ strncmp(Name, ".llvm.linkonce.b.", 17) == 0 ||
+ strncmp(Name, ".gnu.linkonce.sb.", 17) == 0 ||
+ strncmp(Name, ".llvm.linkonce.sb.", 18) == 0)
+ Flags |= SectionFlags::BSS;
+ else if (strcmp(Name, ".tdata") == 0 ||
+ strncmp(Name, ".tdata.", 7) == 0 ||
+ strncmp(Name, ".gnu.linkonce.td.", 17) == 0 ||
+ strncmp(Name, ".llvm.linkonce.td.", 18) == 0)
+ Flags |= SectionFlags::TLS;
+ else if (strcmp(Name, ".tbss") == 0 ||
+ strncmp(Name, ".tbss.", 6) == 0 ||
+ strncmp(Name, ".gnu.linkonce.tb.", 17) == 0 ||
+ strncmp(Name, ".llvm.linkonce.tb.", 18) == 0)
+ Flags |= SectionFlags::BSS | SectionFlags::TLS;
+ }
+
+ return Flags;
+}
+
+const Section*
+TargetAsmInfo::SectionForGlobal(const GlobalValue *GV) const {
+ const Section* S;
+ // Select section name
+ if (GV->hasSection()) {
+ // Honour section already set, if any
+ unsigned Flags = SectionFlagsForGlobal(GV,
+ GV->getSection().c_str());
+ S = getNamedSection(GV->getSection().c_str(), Flags);
+ } else {
+ // Use default section depending on the 'type' of global
+ S = SelectSectionForGlobal(GV);
+ }
+
+ return S;
+}
+
+// Lame default implementation. Calculate the section name for global.
+const Section*
+TargetAsmInfo::SelectSectionForGlobal(const GlobalValue *GV) const {
+ SectionKind::Kind Kind = SectionKindForGlobal(GV);
+
+ if (GV->isWeakForLinker()) {
+ std::string Name = UniqueSectionForGlobal(GV, Kind);
+ unsigned Flags = SectionFlagsForGlobal(GV, Name.c_str());
+ return getNamedSection(Name.c_str(), Flags);
+ } else {
+ if (Kind == SectionKind::Text)
+ return getTextSection();
+ else if (isBSS(Kind) && getBSSSection_())
+ return getBSSSection_();
+ else if (getReadOnlySection() && SectionKind::isReadOnly(Kind))
+ return getReadOnlySection();
+ }
+
+ return getDataSection();
+}
+
+// Lame default implementation. Calculate the section name for machine const.
+const Section*
+TargetAsmInfo::SelectSectionForMachineConst(const Type *Ty) const {
+ // FIXME: Support data.rel stuff someday
+ return getDataSection();
+}
+
+std::string
+TargetAsmInfo::UniqueSectionForGlobal(const GlobalValue* GV,
+ SectionKind::Kind Kind) const {
+ switch (Kind) {
+ case SectionKind::Text:
+ return ".gnu.linkonce.t." + GV->getName();
+ case SectionKind::Data:
+ return ".gnu.linkonce.d." + GV->getName();
+ case SectionKind::DataRel:
+ return ".gnu.linkonce.d.rel" + GV->getName();
+ case SectionKind::DataRelLocal:
+ return ".gnu.linkonce.d.rel.local" + GV->getName();
+ case SectionKind::DataRelRO:
+ return ".gnu.linkonce.d.rel.ro" + GV->getName();
+ case SectionKind::DataRelROLocal:
+ return ".gnu.linkonce.d.rel.ro.local" + GV->getName();
+ case SectionKind::SmallData:
+ return ".gnu.linkonce.s." + GV->getName();
+ case SectionKind::BSS:
+ return ".gnu.linkonce.b." + GV->getName();
+ case SectionKind::SmallBSS:
+ return ".gnu.linkonce.sb." + GV->getName();
+ case SectionKind::ROData:
+ case SectionKind::RODataMergeConst:
+ case SectionKind::RODataMergeStr:
+ return ".gnu.linkonce.r." + GV->getName();
+ case SectionKind::SmallROData:
+ return ".gnu.linkonce.s2." + GV->getName();
+ case SectionKind::ThreadData:
+ return ".gnu.linkonce.td." + GV->getName();
+ case SectionKind::ThreadBSS:
+ return ".gnu.linkonce.tb." + GV->getName();
+ default:
+ assert(0 && "Unknown section kind");
+ }
+ return NULL;
+}
+
+const Section*
+TargetAsmInfo::getNamedSection(const char *Name, unsigned Flags,
+ bool Override) const {
+ Section& S = Sections[Name];
+
+ // This is newly-created section, set it up properly.
+ if (S.Flags == SectionFlags::Invalid || Override) {
+ S.Flags = Flags | SectionFlags::Named;
+ S.Name = Name;
+ }
+
+ return &S;
+}
+
+const Section*
+TargetAsmInfo::getUnnamedSection(const char *Directive, unsigned Flags,
+ bool Override) const {
+ Section& S = Sections[Directive];
+
+ // This is newly-created section, set it up properly.
+ if (S.Flags == SectionFlags::Invalid || Override) {
+ S.Flags = Flags & ~SectionFlags::Named;
+ S.Name = Directive;
+ }
+
+ return &S;
+}
+
+const std::string&
+TargetAsmInfo::getSectionFlags(unsigned Flags) const {
+ SectionFlags::FlagsStringsMapType::iterator I = FlagsStrings.find(Flags);
+
+ // We didn't print these flags yet, print and save them to map. This reduces
+ // amount of heap trashing due to std::string construction / concatenation.
+ if (I == FlagsStrings.end())
+ I = FlagsStrings.insert(std::make_pair(Flags,
+ printSectionFlags(Flags))).first;
+
+ return I->second;
+}
+
+unsigned TargetAsmInfo::getULEB128Size(unsigned Value) {
+ unsigned Size = 0;
+ do {
+ Value >>= 7;
+ Size += sizeof(int8_t);
+ } while (Value);
+ return Size;
+}
+
+unsigned TargetAsmInfo::getSLEB128Size(int Value) {
+ unsigned Size = 0;
+ int Sign = Value >> (8 * sizeof(Value) - 1);
+ bool IsMore;
+
+ do {
+ unsigned Byte = Value & 0x7f;
+ Value >>= 7;
+ IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+ Size += sizeof(int8_t);
+ } while (IsMore);
+ return Size;
+}
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
new file mode 100644
index 0000000..67fefbb
--- /dev/null
+++ b/lib/Target/TargetData.cpp
@@ -0,0 +1,603 @@
+//===-- TargetData.cpp - Data size & alignment routines --------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target properties related to datatype size/offset/alignment
+// information.
+//
+// This structure should be created once, filled in if the defaults are not
+// correct and then passed around by const&. None of the members functions
+// require modification to the object.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetData.h"
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <cstdlib>
+using namespace llvm;
+
+// Handle the Pass registration stuff necessary to use TargetData's.
+
+// Register the default SparcV9 implementation...
+static RegisterPass<TargetData> X("targetdata", "Target Data Layout", false,
+ true);
+char TargetData::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// Support for StructLayout
+//===----------------------------------------------------------------------===//
+
+StructLayout::StructLayout(const StructType *ST, const TargetData &TD) {
+ StructAlignment = 0;
+ StructSize = 0;
+ NumElements = ST->getNumElements();
+
+ // Loop over each of the elements, placing them in memory.
+ for (unsigned i = 0, e = NumElements; i != e; ++i) {
+ const Type *Ty = ST->getElementType(i);
+ unsigned TyAlign = ST->isPacked() ? 1 : TD.getABITypeAlignment(Ty);
+
+ // Add padding if necessary to align the data element properly.
+ if ((StructSize & (TyAlign-1)) != 0)
+ StructSize = TargetData::RoundUpAlignment(StructSize, TyAlign);
+
+ // Keep track of maximum alignment constraint.
+ StructAlignment = std::max(TyAlign, StructAlignment);
+
+ MemberOffsets[i] = StructSize;
+ StructSize += TD.getTypeAllocSize(Ty); // Consume space for this data item
+ }
+
+ // Empty structures have alignment of 1 byte.
+ if (StructAlignment == 0) StructAlignment = 1;
+
+ // Add padding to the end of the struct so that it could be put in an array
+ // and all array elements would be aligned correctly.
+ if ((StructSize & (StructAlignment-1)) != 0)
+ StructSize = TargetData::RoundUpAlignment(StructSize, StructAlignment);
+}
+
+
+/// getElementContainingOffset - Given a valid offset into the structure,
+/// return the structure index that contains it.
+unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const {
+ const uint64_t *SI =
+ std::upper_bound(&MemberOffsets[0], &MemberOffsets[NumElements], Offset);
+ assert(SI != &MemberOffsets[0] && "Offset not in structure type!");
+ --SI;
+ assert(*SI <= Offset && "upper_bound didn't work");
+ assert((SI == &MemberOffsets[0] || *(SI-1) <= Offset) &&
+ (SI+1 == &MemberOffsets[NumElements] || *(SI+1) > Offset) &&
+ "Upper bound didn't work!");
+
+ // Multiple fields can have the same offset if any of them are zero sized.
+ // For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop
+ // at the i32 element, because it is the last element at that offset. This is
+ // the right one to return, because anything after it will have a higher
+ // offset, implying that this element is non-empty.
+ return SI-&MemberOffsets[0];
+}
+
+//===----------------------------------------------------------------------===//
+// TargetAlignElem, TargetAlign support
+//===----------------------------------------------------------------------===//
+
+TargetAlignElem
+TargetAlignElem::get(AlignTypeEnum align_type, unsigned char abi_align,
+ unsigned char pref_align, uint32_t bit_width) {
+ assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
+ TargetAlignElem retval;
+ retval.AlignType = align_type;
+ retval.ABIAlign = abi_align;
+ retval.PrefAlign = pref_align;
+ retval.TypeBitWidth = bit_width;
+ return retval;
+}
+
+bool
+TargetAlignElem::operator==(const TargetAlignElem &rhs) const {
+ return (AlignType == rhs.AlignType
+ && ABIAlign == rhs.ABIAlign
+ && PrefAlign == rhs.PrefAlign
+ && TypeBitWidth == rhs.TypeBitWidth);
+}
+
+std::ostream &
+TargetAlignElem::dump(std::ostream &os) const {
+ return os << AlignType
+ << TypeBitWidth
+ << ":" << (int) (ABIAlign * 8)
+ << ":" << (int) (PrefAlign * 8);
+}
+
+const TargetAlignElem TargetData::InvalidAlignmentElem =
+ TargetAlignElem::get((AlignTypeEnum) -1, 0, 0, 0);
+
+//===----------------------------------------------------------------------===//
+// TargetData Class Implementation
+//===----------------------------------------------------------------------===//
+
+/*!
+ A TargetDescription string consists of a sequence of hyphen-delimited
+ specifiers for target endianness, pointer size and alignments, and various
+ primitive type sizes and alignments. A typical string looks something like:
+ <br><br>
+ "E-p:32:32:32-i1:8:8-i8:8:8-i32:32:32-i64:32:64-f32:32:32-f64:32:64"
+ <br><br>
+ (note: this string is not fully specified and is only an example.)
+ \p
+ Alignments come in two flavors: ABI and preferred. ABI alignment (abi_align,
+ below) dictates how a type will be aligned within an aggregate and when used
+ as an argument. Preferred alignment (pref_align, below) determines a type's
+ alignment when emitted as a global.
+ \p
+ Specifier string details:
+ <br><br>
+ <i>[E|e]</i>: Endianness. "E" specifies a big-endian target data model, "e"
+ specifies a little-endian target data model.
+ <br><br>
+ <i>p:@verbatim<size>:<abi_align>:<pref_align>@endverbatim</i>: Pointer size,
+ ABI and preferred alignment.
+ <br><br>
+ <i>@verbatim<type><size>:<abi_align>:<pref_align>@endverbatim</i>: Numeric type
+ alignment. Type is
+ one of <i>i|f|v|a</i>, corresponding to integer, floating point, vector (aka
+ packed) or aggregate. Size indicates the size, e.g., 32 or 64 bits.
+ \p
+ The default string, fully specified is:
+ <br><br>
+ "E-p:64:64:64-a0:0:0-f32:32:32-f64:0:64"
+ "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:0:64"
+ "-v64:64:64-v128:128:128"
+ <br><br>
+ Note that in the case of aggregates, 0 is the default ABI and preferred
+ alignment. This is a special case, where the aggregate's computed worst-case
+ alignment will be used.
+ */
+void TargetData::init(const std::string &TargetDescription) {
+ std::string temp = TargetDescription;
+
+ LittleEndian = false;
+ PointerMemSize = 8;
+ PointerABIAlign = 8;
+ PointerPrefAlign = PointerABIAlign;
+
+ // Default alignments
+ setAlignment(INTEGER_ALIGN, 1, 1, 1); // i1
+ setAlignment(INTEGER_ALIGN, 1, 1, 8); // i8
+ setAlignment(INTEGER_ALIGN, 2, 2, 16); // i16
+ setAlignment(INTEGER_ALIGN, 4, 4, 32); // i32
+ setAlignment(INTEGER_ALIGN, 4, 8, 64); // i64
+ setAlignment(FLOAT_ALIGN, 4, 4, 32); // float
+ setAlignment(FLOAT_ALIGN, 8, 8, 64); // double
+ setAlignment(VECTOR_ALIGN, 8, 8, 64); // v2i32
+ setAlignment(VECTOR_ALIGN, 16, 16, 128); // v16i8, v8i16, v4i32, ...
+ setAlignment(AGGREGATE_ALIGN, 0, 8, 0); // struct, union, class, ...
+
+ while (!temp.empty()) {
+ std::string token = getToken(temp, "-");
+ std::string arg0 = getToken(token, ":");
+ const char *p = arg0.c_str();
+ switch(*p) {
+ case 'E':
+ LittleEndian = false;
+ break;
+ case 'e':
+ LittleEndian = true;
+ break;
+ case 'p':
+ PointerMemSize = atoi(getToken(token,":").c_str()) / 8;
+ PointerABIAlign = atoi(getToken(token,":").c_str()) / 8;
+ PointerPrefAlign = atoi(getToken(token,":").c_str()) / 8;
+ if (PointerPrefAlign == 0)
+ PointerPrefAlign = PointerABIAlign;
+ break;
+ case 'i':
+ case 'v':
+ case 'f':
+ case 'a':
+ case 's': {
+ AlignTypeEnum align_type = STACK_ALIGN; // Dummy init, silence warning
+ switch(*p) {
+ case 'i': align_type = INTEGER_ALIGN; break;
+ case 'v': align_type = VECTOR_ALIGN; break;
+ case 'f': align_type = FLOAT_ALIGN; break;
+ case 'a': align_type = AGGREGATE_ALIGN; break;
+ case 's': align_type = STACK_ALIGN; break;
+ }
+ uint32_t size = (uint32_t) atoi(++p);
+ unsigned char abi_align = atoi(getToken(token, ":").c_str()) / 8;
+ unsigned char pref_align = atoi(getToken(token, ":").c_str()) / 8;
+ if (pref_align == 0)
+ pref_align = abi_align;
+ setAlignment(align_type, abi_align, pref_align, size);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+}
+
+TargetData::TargetData(const Module *M)
+ : ImmutablePass(&ID) {
+ init(M->getDataLayout());
+}
+
+void
+TargetData::setAlignment(AlignTypeEnum align_type, unsigned char abi_align,
+ unsigned char pref_align, uint32_t bit_width) {
+ assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
+ for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+ if (Alignments[i].AlignType == align_type &&
+ Alignments[i].TypeBitWidth == bit_width) {
+ // Update the abi, preferred alignments.
+ Alignments[i].ABIAlign = abi_align;
+ Alignments[i].PrefAlign = pref_align;
+ return;
+ }
+ }
+
+ Alignments.push_back(TargetAlignElem::get(align_type, abi_align,
+ pref_align, bit_width));
+}
+
+/// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or
+/// preferred if ABIInfo = false) the target wants for the specified datatype.
+unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
+ uint32_t BitWidth, bool ABIInfo,
+ const Type *Ty) const {
+ // Check to see if we have an exact match and remember the best match we see.
+ int BestMatchIdx = -1;
+ int LargestInt = -1;
+ for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+ if (Alignments[i].AlignType == AlignType &&
+ Alignments[i].TypeBitWidth == BitWidth)
+ return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign;
+
+ // The best match so far depends on what we're looking for.
+ if (AlignType == VECTOR_ALIGN && Alignments[i].AlignType == VECTOR_ALIGN) {
+ // If this is a specification for a smaller vector type, we will fall back
+ // to it. This happens because <128 x double> can be implemented in terms
+ // of 64 <2 x double>.
+ if (Alignments[i].TypeBitWidth < BitWidth) {
+ // Verify that we pick the biggest of the fallbacks.
+ if (BestMatchIdx == -1 ||
+ Alignments[BestMatchIdx].TypeBitWidth < Alignments[i].TypeBitWidth)
+ BestMatchIdx = i;
+ }
+ } else if (AlignType == INTEGER_ALIGN &&
+ Alignments[i].AlignType == INTEGER_ALIGN) {
+ // The "best match" for integers is the smallest size that is larger than
+ // the BitWidth requested.
+ if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 ||
+ Alignments[i].TypeBitWidth < Alignments[BestMatchIdx].TypeBitWidth))
+ BestMatchIdx = i;
+ // However, if there isn't one that's larger, then we must use the
+ // largest one we have (see below)
+ if (LargestInt == -1 ||
+ Alignments[i].TypeBitWidth > Alignments[LargestInt].TypeBitWidth)
+ LargestInt = i;
+ }
+ }
+
+ // Okay, we didn't find an exact solution. Fall back here depending on what
+ // is being looked for.
+ if (BestMatchIdx == -1) {
+ // If we didn't find an integer alignment, fall back on most conservative.
+ if (AlignType == INTEGER_ALIGN) {
+ BestMatchIdx = LargestInt;
+ } else {
+ assert(AlignType == VECTOR_ALIGN && "Unknown alignment type!");
+
+ // If we didn't find a vector size that is smaller or equal to this type,
+ // then we will end up scalarizing this to its element type. Just return
+ // the alignment of the element.
+ return getAlignment(cast<VectorType>(Ty)->getElementType(), ABIInfo);
+ }
+ }
+
+ // Since we got a "best match" index, just return it.
+ return ABIInfo ? Alignments[BestMatchIdx].ABIAlign
+ : Alignments[BestMatchIdx].PrefAlign;
+}
+
+namespace {
+
+/// LayoutInfo - The lazy cache of structure layout information maintained by
+/// TargetData. Note that the struct types must have been free'd before
+/// llvm_shutdown is called (and thus this is deallocated) because all the
+/// targets with cached elements should have been destroyed.
+///
+typedef std::pair<const TargetData*,const StructType*> LayoutKey;
+
+struct DenseMapLayoutKeyInfo {
+ static inline LayoutKey getEmptyKey() { return LayoutKey(0, 0); }
+ static inline LayoutKey getTombstoneKey() {
+ return LayoutKey((TargetData*)(intptr_t)-1, 0);
+ }
+ static unsigned getHashValue(const LayoutKey &Val) {
+ return DenseMapInfo<void*>::getHashValue(Val.first) ^
+ DenseMapInfo<void*>::getHashValue(Val.second);
+ }
+ static bool isEqual(const LayoutKey &LHS, const LayoutKey &RHS) {
+ return LHS == RHS;
+ }
+
+ static bool isPod() { return true; }
+};
+
+typedef DenseMap<LayoutKey, StructLayout*, DenseMapLayoutKeyInfo> LayoutInfoTy;
+
+}
+
+static ManagedStatic<LayoutInfoTy> LayoutInfo;
+
+TargetData::~TargetData() {
+ if (!LayoutInfo.isConstructed())
+ return;
+
+ // Remove any layouts for this TD.
+ LayoutInfoTy &TheMap = *LayoutInfo;
+ for (LayoutInfoTy::iterator I = TheMap.begin(), E = TheMap.end(); I != E; ) {
+ if (I->first.first == this) {
+ I->second->~StructLayout();
+ free(I->second);
+ TheMap.erase(I++);
+ } else {
+ ++I;
+ }
+ }
+}
+
+const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
+ LayoutInfoTy &TheMap = *LayoutInfo;
+
+ StructLayout *&SL = TheMap[LayoutKey(this, Ty)];
+ if (SL) return SL;
+
+ // Otherwise, create the struct layout. Because it is variable length, we
+ // malloc it, then use placement new.
+ int NumElts = Ty->getNumElements();
+ StructLayout *L =
+ (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1)*sizeof(uint64_t));
+
+ // Set SL before calling StructLayout's ctor. The ctor could cause other
+ // entries to be added to TheMap, invalidating our reference.
+ SL = L;
+
+ new (L) StructLayout(Ty, *this);
+ return L;
+}
+
+/// InvalidateStructLayoutInfo - TargetData speculatively caches StructLayout
+/// objects. If a TargetData object is alive when types are being refined and
+/// removed, this method must be called whenever a StructType is removed to
+/// avoid a dangling pointer in this cache.
+void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const {
+ if (!LayoutInfo.isConstructed()) return; // No cache.
+
+ LayoutInfoTy::iterator I = LayoutInfo->find(LayoutKey(this, Ty));
+ if (I == LayoutInfo->end()) return;
+
+ I->second->~StructLayout();
+ free(I->second);
+ LayoutInfo->erase(I);
+}
+
+
+std::string TargetData::getStringRepresentation() const {
+ std::string repr;
+ repr.append(LittleEndian ? "e" : "E");
+ repr.append("-p:").append(itostr((int64_t) (PointerMemSize * 8))).
+ append(":").append(itostr((int64_t) (PointerABIAlign * 8))).
+ append(":").append(itostr((int64_t) (PointerPrefAlign * 8)));
+ for (align_const_iterator I = Alignments.begin();
+ I != Alignments.end();
+ ++I) {
+ repr.append("-").append(1, (char) I->AlignType).
+ append(utostr((int64_t) I->TypeBitWidth)).
+ append(":").append(utostr((uint64_t) (I->ABIAlign * 8))).
+ append(":").append(utostr((uint64_t) (I->PrefAlign * 8)));
+ }
+ return repr;
+}
+
+
+uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const {
+ assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
+ switch (Ty->getTypeID()) {
+ case Type::LabelTyID:
+ case Type::PointerTyID:
+ return getPointerSizeInBits();
+ case Type::ArrayTyID: {
+ const ArrayType *ATy = cast<ArrayType>(Ty);
+ return getTypeAllocSizeInBits(ATy->getElementType())*ATy->getNumElements();
+ }
+ case Type::StructTyID:
+ // Get the layout annotation... which is lazily created on demand.
+ return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
+ case Type::IntegerTyID:
+ return cast<IntegerType>(Ty)->getBitWidth();
+ case Type::VoidTyID:
+ return 8;
+ case Type::FloatTyID:
+ return 32;
+ case Type::DoubleTyID:
+ return 64;
+ case Type::PPC_FP128TyID:
+ case Type::FP128TyID:
+ return 128;
+ // In memory objects this is always aligned to a higher boundary, but
+ // only 80 bits contain information.
+ case Type::X86_FP80TyID:
+ return 80;
+ case Type::VectorTyID:
+ return cast<VectorType>(Ty)->getBitWidth();
+ default:
+ assert(0 && "TargetData::getTypeSizeInBits(): Unsupported type");
+ break;
+ }
+ return 0;
+}
+
+/*!
+ \param abi_or_pref Flag that determines which alignment is returned. true
+ returns the ABI alignment, false returns the preferred alignment.
+ \param Ty The underlying type for which alignment is determined.
+
+ Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref
+ == false) for the requested type \a Ty.
+ */
+unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
+ int AlignType = -1;
+
+ assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
+ switch (Ty->getTypeID()) {
+ // Early escape for the non-numeric types.
+ case Type::LabelTyID:
+ case Type::PointerTyID:
+ return (abi_or_pref
+ ? getPointerABIAlignment()
+ : getPointerPrefAlignment());
+ case Type::ArrayTyID:
+ return getAlignment(cast<ArrayType>(Ty)->getElementType(), abi_or_pref);
+
+ case Type::StructTyID: {
+ // Packed structure types always have an ABI alignment of one.
+ if (cast<StructType>(Ty)->isPacked() && abi_or_pref)
+ return 1;
+
+ // Get the layout annotation... which is lazily created on demand.
+ const StructLayout *Layout = getStructLayout(cast<StructType>(Ty));
+ unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty);
+ return std::max(Align, (unsigned)Layout->getAlignment());
+ }
+ case Type::IntegerTyID:
+ case Type::VoidTyID:
+ AlignType = INTEGER_ALIGN;
+ break;
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ // PPC_FP128TyID and FP128TyID have different data contents, but the
+ // same size and alignment, so they look the same here.
+ case Type::PPC_FP128TyID:
+ case Type::FP128TyID:
+ case Type::X86_FP80TyID:
+ AlignType = FLOAT_ALIGN;
+ break;
+ case Type::VectorTyID:
+ AlignType = VECTOR_ALIGN;
+ break;
+ default:
+ assert(0 && "Bad type for getAlignment!!!");
+ break;
+ }
+
+ return getAlignmentInfo((AlignTypeEnum)AlignType, getTypeSizeInBits(Ty),
+ abi_or_pref, Ty);
+}
+
+unsigned char TargetData::getABITypeAlignment(const Type *Ty) const {
+ return getAlignment(Ty, true);
+}
+
+unsigned char TargetData::getCallFrameTypeAlignment(const Type *Ty) const {
+ for (unsigned i = 0, e = Alignments.size(); i != e; ++i)
+ if (Alignments[i].AlignType == STACK_ALIGN)
+ return Alignments[i].ABIAlign;
+
+ return getABITypeAlignment(Ty);
+}
+
+unsigned char TargetData::getPrefTypeAlignment(const Type *Ty) const {
+ return getAlignment(Ty, false);
+}
+
+unsigned char TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const {
+ unsigned Align = (unsigned) getPrefTypeAlignment(Ty);
+ assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
+ return Log2_32(Align);
+}
+
+/// getIntPtrType - Return an unsigned integer type that is the same size or
+/// greater to the host pointer size.
+const IntegerType *TargetData::getIntPtrType() const {
+ return IntegerType::get(getPointerSizeInBits());
+}
+
+
+uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices,
+ unsigned NumIndices) const {
+ const Type *Ty = ptrTy;
+ assert(isa<PointerType>(Ty) && "Illegal argument for getIndexedOffset()");
+ uint64_t Result = 0;
+
+ generic_gep_type_iterator<Value* const*>
+ TI = gep_type_begin(ptrTy, Indices, Indices+NumIndices);
+ for (unsigned CurIDX = 0; CurIDX != NumIndices; ++CurIDX, ++TI) {
+ if (const StructType *STy = dyn_cast<StructType>(*TI)) {
+ assert(Indices[CurIDX]->getType() == Type::Int32Ty &&
+ "Illegal struct idx");
+ unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue();
+
+ // Get structure layout information...
+ const StructLayout *Layout = getStructLayout(STy);
+
+ // Add in the offset, as calculated by the structure layout info...
+ Result += Layout->getElementOffset(FieldNo);
+
+ // Update Ty to refer to current element
+ Ty = STy->getElementType(FieldNo);
+ } else {
+ // Update Ty to refer to current element
+ Ty = cast<SequentialType>(Ty)->getElementType();
+
+ // Get the array index and the size of each array element.
+ int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue();
+ Result += arrayIdx * (int64_t)getTypeAllocSize(Ty);
+ }
+ }
+
+ return Result;
+}
+
+/// getPreferredAlignment - Return the preferred alignment of the specified
+/// global. This includes an explicitly requested alignment (if the global
+/// has one).
+unsigned TargetData::getPreferredAlignment(const GlobalVariable *GV) const {
+ const Type *ElemType = GV->getType()->getElementType();
+ unsigned Alignment = getPrefTypeAlignment(ElemType);
+ if (GV->getAlignment() > Alignment)
+ Alignment = GV->getAlignment();
+
+ if (GV->hasInitializer()) {
+ if (Alignment < 16) {
+ // If the global is not external, see if it is large. If so, give it a
+ // larger alignment.
+ if (getTypeSizeInBits(ElemType) > 128)
+ Alignment = 16; // 16-byte alignment.
+ }
+ }
+ return Alignment;
+}
+
+/// getPreferredAlignmentLog - Return the preferred alignment of the
+/// specified global, returned in log form. This includes an explicitly
+/// requested alignment (if the global has one).
+unsigned TargetData::getPreferredAlignmentLog(const GlobalVariable *GV) const {
+ return Log2_32(getPreferredAlignment(GV));
+}
diff --git a/lib/Target/TargetFrameInfo.cpp b/lib/Target/TargetFrameInfo.cpp
new file mode 100644
index 0000000..873d60a
--- /dev/null
+++ b/lib/Target/TargetFrameInfo.cpp
@@ -0,0 +1,19 @@
+//===-- TargetFrameInfo.cpp - Implement machine frame interface -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the layout of a stack frame on the target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetFrameInfo.h"
+#include <cstdlib>
+using namespace llvm;
+
+TargetFrameInfo::~TargetFrameInfo() {
+}
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
new file mode 100644
index 0000000..ceaea0c
--- /dev/null
+++ b/lib/Target/TargetInstrInfo.cpp
@@ -0,0 +1,50 @@
+//===-- TargetInstrInfo.cpp - Target Instruction Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Constant.h"
+#include "llvm/DerivedTypes.h"
+using namespace llvm;
+
+TargetInstrInfo::TargetInstrInfo(const TargetInstrDesc* Desc,
+ unsigned numOpcodes)
+ : Descriptors(Desc), NumOpcodes(numOpcodes) {
+}
+
+TargetInstrInfo::~TargetInstrInfo() {
+}
+
+bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.isTerminator()) return false;
+
+ // Conditional branch is a special case.
+ if (TID.isBranch() && !TID.isBarrier())
+ return true;
+ if (!TID.isPredicable())
+ return true;
+ return !isPredicated(MI);
+}
+
+/// getInstrOperandRegClass - Return register class of the operand of an
+/// instruction of the specified TargetInstrDesc.
+const TargetRegisterClass*
+llvm::getInstrOperandRegClass(const TargetRegisterInfo *TRI,
+ const TargetInstrDesc &II, unsigned Op) {
+ if (Op >= II.getNumOperands())
+ return NULL;
+ if (II.OpInfo[Op].isLookupPtrRegClass())
+ return TRI->getPointerRegClass();
+ return TRI->getRegClass(II.OpInfo[Op].RegClass);
+}
diff --git a/lib/Target/TargetIntrinsicInfo.cpp b/lib/Target/TargetIntrinsicInfo.cpp
new file mode 100644
index 0000000..d8da08e
--- /dev/null
+++ b/lib/Target/TargetIntrinsicInfo.cpp
@@ -0,0 +1,22 @@
+//===-- TargetIntrinsicInfo.cpp - Target Instruction Information ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetIntrinsicInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetIntrinsicInfo.h"
+using namespace llvm;
+
+TargetIntrinsicInfo::TargetIntrinsicInfo(const char **desc, unsigned count)
+ : Intrinsics(desc), NumIntrinsics(count) {
+}
+
+TargetIntrinsicInfo::~TargetIntrinsicInfo() {
+}
diff --git a/lib/Target/TargetMachOWriterInfo.cpp b/lib/Target/TargetMachOWriterInfo.cpp
new file mode 100644
index 0000000..d608119
--- /dev/null
+++ b/lib/Target/TargetMachOWriterInfo.cpp
@@ -0,0 +1,25 @@
+//===-- llvm/Target/TargetMachOWriterInfo.h - MachO Writer Info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the TargetMachOWriterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachOWriterInfo.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+using namespace llvm;
+
+TargetMachOWriterInfo::~TargetMachOWriterInfo() {}
+
+MachineRelocation
+TargetMachOWriterInfo::GetJTRelocation(unsigned Offset,
+ MachineBasicBlock *MBB) const {
+ // FIXME: do something about PIC
+ return MachineRelocation::getBB(Offset, MachineRelocation::VANILLA, MBB);
+}
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
new file mode 100644
index 0000000..1b042dd
--- /dev/null
+++ b/lib/Target/TargetMachine.cpp
@@ -0,0 +1,229 @@
+//===-- TargetMachine.cpp - General Target Information ---------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the general parts of a Target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+//---------------------------------------------------------------------------
+// Command-line options that tend to be useful on more than one back-end.
+//
+
+namespace llvm {
+ bool LessPreciseFPMADOption;
+ bool PrintMachineCode;
+ bool NoFramePointerElim;
+ bool NoExcessFPPrecision;
+ bool UnsafeFPMath;
+ bool FiniteOnlyFPMathOption;
+ bool HonorSignDependentRoundingFPMathOption;
+ bool UseSoftFloat;
+ bool NoImplicitFloat;
+ bool NoZerosInBSS;
+ bool ExceptionHandling;
+ bool UnwindTablesMandatory;
+ Reloc::Model RelocationModel;
+ CodeModel::Model CMModel;
+ bool PerformTailCallOpt;
+ unsigned StackAlignment;
+ bool RealignStack;
+ bool DisableJumpTables;
+ bool StrongPHIElim;
+ bool DisableRedZone;
+ bool AsmVerbosityDefault(false);
+}
+
+static cl::opt<bool, true>
+PrintCode("print-machineinstrs",
+ cl::desc("Print generated machine code"),
+ cl::location(PrintMachineCode), cl::init(false));
+static cl::opt<bool, true>
+DisableFPElim("disable-fp-elim",
+ cl::desc("Disable frame pointer elimination optimization"),
+ cl::location(NoFramePointerElim),
+ cl::init(false));
+static cl::opt<bool, true>
+DisableExcessPrecision("disable-excess-fp-precision",
+ cl::desc("Disable optimizations that may increase FP precision"),
+ cl::location(NoExcessFPPrecision),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableFPMAD("enable-fp-mad",
+ cl::desc("Enable less precise MAD instructions to be generated"),
+ cl::location(LessPreciseFPMADOption),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableUnsafeFPMath("enable-unsafe-fp-math",
+ cl::desc("Enable optimizations that may decrease FP precision"),
+ cl::location(UnsafeFPMath),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableFiniteOnlyFPMath("enable-finite-only-fp-math",
+ cl::desc("Enable optimizations that assumes non- NaNs / +-Infs"),
+ cl::location(FiniteOnlyFPMathOption),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math",
+ cl::Hidden,
+ cl::desc("Force codegen to assume rounding mode can change dynamically"),
+ cl::location(HonorSignDependentRoundingFPMathOption),
+ cl::init(false));
+static cl::opt<bool, true>
+GenerateSoftFloatCalls("soft-float",
+ cl::desc("Generate software floating point library calls"),
+ cl::location(UseSoftFloat),
+ cl::init(false));
+static cl::opt<bool, true>
+GenerateNoImplicitFloats("no-implicit-float",
+ cl::desc("Don't generate implicit floating point instructions (x86-only)"),
+ cl::location(NoImplicitFloat),
+ cl::init(false));
+static cl::opt<bool, true>
+DontPlaceZerosInBSS("nozero-initialized-in-bss",
+ cl::desc("Don't place zero-initialized symbols into bss section"),
+ cl::location(NoZerosInBSS),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableExceptionHandling("enable-eh",
+ cl::desc("Emit DWARF exception handling (default if target supports)"),
+ cl::location(ExceptionHandling),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableUnwindTables("unwind-tables",
+ cl::desc("Generate unwinding tables for all functions"),
+ cl::location(UnwindTablesMandatory),
+ cl::init(false));
+
+static cl::opt<llvm::Reloc::Model, true>
+DefRelocationModel("relocation-model",
+ cl::desc("Choose relocation model"),
+ cl::location(RelocationModel),
+ cl::init(Reloc::Default),
+ cl::values(
+ clEnumValN(Reloc::Default, "default",
+ "Target default relocation model"),
+ clEnumValN(Reloc::Static, "static",
+ "Non-relocatable code"),
+ clEnumValN(Reloc::PIC_, "pic",
+ "Fully relocatable, position independent code"),
+ clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic",
+ "Relocatable external references, non-relocatable code"),
+ clEnumValEnd));
+static cl::opt<llvm::CodeModel::Model, true>
+DefCodeModel("code-model",
+ cl::desc("Choose code model"),
+ cl::location(CMModel),
+ cl::init(CodeModel::Default),
+ cl::values(
+ clEnumValN(CodeModel::Default, "default",
+ "Target default code model"),
+ clEnumValN(CodeModel::Small, "small",
+ "Small code model"),
+ clEnumValN(CodeModel::Kernel, "kernel",
+ "Kernel code model"),
+ clEnumValN(CodeModel::Medium, "medium",
+ "Medium code model"),
+ clEnumValN(CodeModel::Large, "large",
+ "Large code model"),
+ clEnumValEnd));
+static cl::opt<bool, true>
+EnablePerformTailCallOpt("tailcallopt",
+ cl::desc("Turn on tail call optimization."),
+ cl::location(PerformTailCallOpt),
+ cl::init(false));
+static cl::opt<unsigned, true>
+OverrideStackAlignment("stack-alignment",
+ cl::desc("Override default stack alignment"),
+ cl::location(StackAlignment),
+ cl::init(0));
+static cl::opt<bool, true>
+EnableRealignStack("realign-stack",
+ cl::desc("Realign stack if needed"),
+ cl::location(RealignStack),
+ cl::init(true));
+static cl::opt<bool, true>
+DisableSwitchTables(cl::Hidden, "disable-jump-tables",
+ cl::desc("Do not generate jump tables."),
+ cl::location(DisableJumpTables),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableStrongPHIElim(cl::Hidden, "strong-phi-elim",
+ cl::desc("Use strong PHI elimination."),
+ cl::location(StrongPHIElim),
+ cl::init(false));
+static cl::opt<bool, true>
+DisableRedZoneOption("disable-red-zone",
+ cl::desc("Do not emit code that uses the red zone."),
+ cl::location(DisableRedZone),
+ cl::init(false));
+
+//---------------------------------------------------------------------------
+// TargetMachine Class
+//
+
+TargetMachine::~TargetMachine() {
+ delete AsmInfo;
+}
+
+/// getRelocationModel - Returns the code generation relocation model. The
+/// choices are static, PIC, and dynamic-no-pic, and target default.
+Reloc::Model TargetMachine::getRelocationModel() {
+ return RelocationModel;
+}
+
+/// setRelocationModel - Sets the code generation relocation model.
+void TargetMachine::setRelocationModel(Reloc::Model Model) {
+ RelocationModel = Model;
+}
+
+/// getCodeModel - Returns the code model. The choices are small, kernel,
+/// medium, large, and target default.
+CodeModel::Model TargetMachine::getCodeModel() {
+ return CMModel;
+}
+
+/// setCodeModel - Sets the code model.
+void TargetMachine::setCodeModel(CodeModel::Model Model) {
+ CMModel = Model;
+}
+
+bool TargetMachine::getAsmVerbosityDefault() {
+ return AsmVerbosityDefault;
+}
+
+void TargetMachine::setAsmVerbosityDefault(bool V) {
+ AsmVerbosityDefault = V;
+}
+
+namespace llvm {
+ /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
+ /// is specified on the command line. When this flag is off(default), the
+ /// code generator is not allowed to generate mad (multiply add) if the
+ /// result is "less precise" than doing those operations individually.
+ bool LessPreciseFPMAD() { return UnsafeFPMath || LessPreciseFPMADOption; }
+
+ /// FiniteOnlyFPMath - This returns true when the -enable-finite-only-fp-math
+ /// option is specified on the command line. If this returns false (default),
+ /// the code generator is not allowed to assume that FP arithmetic arguments
+ /// and results are never NaNs or +-Infs.
+ bool FiniteOnlyFPMath() { return UnsafeFPMath || FiniteOnlyFPMathOption; }
+
+ /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
+ /// that the rounding mode of the FPU can change from its default.
+ bool HonorSignDependentRoundingFPMath() {
+ return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
+ }
+}
+
diff --git a/lib/Target/TargetMachineRegistry.cpp b/lib/Target/TargetMachineRegistry.cpp
new file mode 100644
index 0000000..c1a4777
--- /dev/null
+++ b/lib/Target/TargetMachineRegistry.cpp
@@ -0,0 +1,78 @@
+//===-- TargetMachineRegistry.cpp - Target Auto Registration Impl ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes the RegisterTarget class, which TargetMachine
+// implementations should use to register themselves with the system. This file
+// also exposes the TargetMachineRegistry class, which allows tools to inspect
+// all of registered targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachineRegistry.h"
+#include <algorithm>
+using namespace llvm;
+
+/// getClosestStaticTargetForModule - Given an LLVM module, pick the best target
+/// that is compatible with the module. If no close target can be found, this
+/// returns null and sets the Error string to a reason.
+const TargetMachineRegistry::entry *
+TargetMachineRegistry::getClosestStaticTargetForModule(const Module &M,
+ std::string &Error) {
+ std::vector<std::pair<unsigned, const entry *> > UsableTargets;
+ for (Registry<TargetMachine>::iterator I = begin(), E = end(); I != E; ++I)
+ if (unsigned Qual = I->ModuleMatchQualityFn(M))
+ UsableTargets.push_back(std::make_pair(Qual, &*I));
+
+ if (UsableTargets.empty()) {
+ Error = "No available targets are compatible with this module";
+ return 0;
+ } else if (UsableTargets.size() == 1)
+ return UsableTargets.back().second;
+
+ // Otherwise, take the best target, but make sure we don't have two equally
+ // good best targets.
+ std::sort(UsableTargets.begin(), UsableTargets.end());
+ if (UsableTargets.back().first ==UsableTargets[UsableTargets.size()-2].first){
+ Error = "Cannot choose between targets \"" +
+ std::string(UsableTargets.back().second->Name) + "\" and \"" +
+ std::string(UsableTargets[UsableTargets.size()-2].second->Name) + "\"";
+ return 0;
+ }
+ return UsableTargets.back().second;
+}
+
+/// getClosestTargetForJIT - Pick the best target that is compatible with
+/// the current host. If no close target can be found, this returns null
+/// and sets the Error string to a reason.
+const TargetMachineRegistry::entry *
+TargetMachineRegistry::getClosestTargetForJIT(std::string &Error) {
+ std::vector<std::pair<unsigned, const entry *> > UsableTargets;
+ for (Registry<TargetMachine>::iterator I = begin(), E = end(); I != E; ++I)
+ if (unsigned Qual = I->JITMatchQualityFn())
+ UsableTargets.push_back(std::make_pair(Qual, &*I));
+
+ if (UsableTargets.empty()) {
+ Error = "No JIT is available for this host";
+ return 0;
+ } else if (UsableTargets.size() == 1)
+ return UsableTargets.back().second;
+
+ // Otherwise, take the best target. If there is a tie, just pick one.
+ unsigned MaxQual = UsableTargets.front().first;
+ const entry *MaxQualTarget = UsableTargets.front().second;
+
+ for (unsigned i = 1, e = UsableTargets.size(); i != e; ++i)
+ if (UsableTargets[i].first > MaxQual) {
+ MaxQual = UsableTargets[i].first;
+ MaxQualTarget = UsableTargets[i].second;
+ }
+
+ return MaxQualTarget;
+}
+
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
new file mode 100644
index 0000000..a84fdaa
--- /dev/null
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -0,0 +1,144 @@
+//===- TargetRegisterInfo.cpp - Target Register Information Implementation ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetRegisterInfo interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/ADT/BitVector.h"
+
+using namespace llvm;
+
+TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR,
+ regclass_iterator RCB, regclass_iterator RCE,
+ int CFSO, int CFDO,
+ const unsigned* subregs, const unsigned subregsize,
+ const unsigned* superregs, const unsigned superregsize,
+ const unsigned* aliases, const unsigned aliasessize)
+ : SubregHash(subregs), SubregHashSize(subregsize),
+ SuperregHash(superregs), SuperregHashSize(superregsize),
+ AliasesHash(aliases), AliasesHashSize(aliasessize),
+ Desc(D), NumRegs(NR), RegClassBegin(RCB), RegClassEnd(RCE) {
+ assert(NumRegs < FirstVirtualRegister &&
+ "Target has too many physical registers!");
+
+ CallFrameSetupOpcode = CFSO;
+ CallFrameDestroyOpcode = CFDO;
+}
+
+TargetRegisterInfo::~TargetRegisterInfo() {}
+
+/// getPhysicalRegisterRegClass - Returns the Register Class of a physical
+/// register of the given type. If type is MVT::Other, then just return any
+/// register class the register belongs to.
+const TargetRegisterClass *
+TargetRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, MVT VT) const {
+ assert(isPhysicalRegister(reg) && "reg must be a physical register");
+
+ // Pick the most super register class of the right type that contains
+ // this physreg.
+ const TargetRegisterClass* BestRC = 0;
+ for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){
+ const TargetRegisterClass* RC = *I;
+ if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) &&
+ (!BestRC || BestRC->hasSuperClass(RC)))
+ BestRC = RC;
+ }
+
+ assert(BestRC && "Couldn't find the register class");
+ return BestRC;
+}
+
+/// getAllocatableSetForRC - Toggle the bits that represent allocatable
+/// registers for the specific register class.
+static void getAllocatableSetForRC(MachineFunction &MF,
+ const TargetRegisterClass *RC, BitVector &R){
+ for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
+ E = RC->allocation_order_end(MF); I != E; ++I)
+ R.set(*I);
+}
+
+BitVector TargetRegisterInfo::getAllocatableSet(MachineFunction &MF,
+ const TargetRegisterClass *RC) const {
+ BitVector Allocatable(NumRegs);
+ if (RC) {
+ getAllocatableSetForRC(MF, RC, Allocatable);
+ return Allocatable;
+ }
+
+ for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
+ E = regclass_end(); I != E; ++I)
+ getAllocatableSetForRC(MF, *I, Allocatable);
+ return Allocatable;
+}
+
+/// getFrameIndexOffset - Returns the displacement from the frame register to
+/// the stack frame of the specified index. This is the default implementation
+/// which is likely incorrect for the target.
+int TargetRegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const {
+ const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->getObjectOffset(FI) + MFI->getStackSize() -
+ TFI.getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
+}
+
+/// getInitialFrameState - Returns a list of machine moves that are assumed
+/// on entry to a function.
+void
+TargetRegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const {
+ // Default is to do nothing.
+}
+
+const TargetRegisterClass *
+llvm::getCommonSubClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B) {
+ // First take care of the trivial cases
+ if (A == B)
+ return A;
+ if (!A || !B)
+ return 0;
+
+ // If B is a subclass of A, it will be handled in the loop below
+ if (B->hasSubClass(A))
+ return A;
+
+ const TargetRegisterClass *Best = 0;
+ for (TargetRegisterClass::sc_iterator I = A->subclasses_begin();
+ const TargetRegisterClass *X = *I; ++I) {
+ if (X == B)
+ return B; // B is a subclass of A
+
+ // X must be a common subclass of A and B
+ if (!B->hasSubClass(X))
+ continue;
+
+ // A superclass is definitely better.
+ if (!Best || Best->hasSuperClass(X)) {
+ Best = X;
+ continue;
+ }
+
+ // A subclass is definitely worse
+ if (Best->hasSubClass(X))
+ continue;
+
+ // Best and *I have no super/sub class relation - pick the larger class, or
+ // the smaller spill size.
+ int nb = std::distance(Best->begin(), Best->end());
+ int ni = std::distance(X->begin(), X->end());
+ if (ni>nb || (ni==nb && X->getSize() < Best->getSize()))
+ Best = X;
+ }
+ return Best;
+}
diff --git a/lib/Target/TargetSubtarget.cpp b/lib/Target/TargetSubtarget.cpp
new file mode 100644
index 0000000..95c92ca
--- /dev/null
+++ b/lib/Target/TargetSubtarget.cpp
@@ -0,0 +1,22 @@
+//===-- TargetSubtarget.cpp - General Target Information -------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the general parts of a Subtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetSubtarget.h"
+using namespace llvm;
+
+//---------------------------------------------------------------------------
+// TargetSubtarget Class
+//
+TargetSubtarget::TargetSubtarget() {}
+
+TargetSubtarget::~TargetSubtarget() {}
diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/AsmPrinter/CMakeLists.txt
new file mode 100644
index 0000000..dbd03d8
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,11 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_partially_linked_object(LLVMX86AsmPrinter
+ X86ATTAsmPrinter.cpp
+ X86AsmPrinter.cpp
+ X86IntelAsmPrinter.cpp
+ )
+
+target_name_of_partially_linked_object(LLVMX86CodeGen n)
+
+add_dependencies(LLVMX86AsmPrinter ${n})
diff --git a/lib/Target/X86/AsmPrinter/Makefile b/lib/Target/X86/AsmPrinter/Makefile
new file mode 100644
index 0000000..ba89ac6
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/X86/Makefile -----------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86AsmPrinter
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
new file mode 100644
index 0000000..8afe2ea
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
@@ -0,0 +1,1075 @@
+//===-- X86ATTAsmPrinter.cpp - Convert X86 LLVM code to AT&T assembly -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to AT&T format assembly
+// language. This printer is the output mechanism used by `llc'.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "X86ATTAsmPrinter.h"
+#include "X86.h"
+#include "X86COFF.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "X86TargetAsmInfo.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+static std::string getPICLabelString(unsigned FnNum,
+ const TargetAsmInfo *TAI,
+ const X86Subtarget* Subtarget) {
+ std::string label;
+ if (Subtarget->isTargetDarwin())
+ label = "\"L" + utostr_32(FnNum) + "$pb\"";
+ else if (Subtarget->isTargetELF())
+ label = ".Lllvm$" + utostr_32(FnNum) + "." "$piclabel";
+ else
+ assert(0 && "Don't know how to print PIC label!\n");
+
+ return label;
+}
+
+static X86MachineFunctionInfo calculateFunctionInfo(const Function *F,
+ const TargetData *TD) {
+ X86MachineFunctionInfo Info;
+ uint64_t Size = 0;
+
+ switch (F->getCallingConv()) {
+ case CallingConv::X86_StdCall:
+ Info.setDecorationStyle(StdCall);
+ break;
+ case CallingConv::X86_FastCall:
+ Info.setDecorationStyle(FastCall);
+ break;
+ default:
+ return Info;
+ }
+
+ unsigned argNum = 1;
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI, ++argNum) {
+ const Type* Ty = AI->getType();
+
+ // 'Dereference' type in case of byval parameter attribute
+ if (F->paramHasAttr(argNum, Attribute::ByVal))
+ Ty = cast<PointerType>(Ty)->getElementType();
+
+ // Size should be aligned to DWORD boundary
+ Size += ((TD->getTypeAllocSize(Ty) + 3)/4)*4;
+ }
+
+ // We're not supporting tooooo huge arguments :)
+ Info.setBytesToPopOnReturn((unsigned int)Size);
+ return Info;
+}
+
+/// PrintUnmangledNameSafely - Print out the printable characters in the name.
+/// Don't print things like \\n or \\0.
+static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
+ for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen();
+ Name != E; ++Name)
+ if (isprint(*Name))
+ OS << *Name;
+}
+
+/// decorateName - Query FunctionInfoMap and use this information for various
+/// name decoration.
+void X86ATTAsmPrinter::decorateName(std::string &Name,
+ const GlobalValue *GV) {
+ const Function *F = dyn_cast<Function>(GV);
+ if (!F) return;
+
+ // We don't want to decorate non-stdcall or non-fastcall functions right now
+ unsigned CC = F->getCallingConv();
+ if (CC != CallingConv::X86_StdCall && CC != CallingConv::X86_FastCall)
+ return;
+
+ // Decorate names only when we're targeting Cygwin/Mingw32 targets
+ if (!Subtarget->isTargetCygMing())
+ return;
+
+ FMFInfoMap::const_iterator info_item = FunctionInfoMap.find(F);
+
+ const X86MachineFunctionInfo *Info;
+ if (info_item == FunctionInfoMap.end()) {
+ // Calculate apropriate function info and populate map
+ FunctionInfoMap[F] = calculateFunctionInfo(F, TM.getTargetData());
+ Info = &FunctionInfoMap[F];
+ } else {
+ Info = &info_item->second;
+ }
+
+ const FunctionType *FT = F->getFunctionType();
+ switch (Info->getDecorationStyle()) {
+ case None:
+ break;
+ case StdCall:
+ // "Pure" variadic functions do not receive @0 suffix.
+ if (!FT->isVarArg() || (FT->getNumParams() == 0) ||
+ (FT->getNumParams() == 1 && F->hasStructRetAttr()))
+ Name += '@' + utostr_32(Info->getBytesToPopOnReturn());
+ break;
+ case FastCall:
+ // "Pure" variadic functions do not receive @0 suffix.
+ if (!FT->isVarArg() || (FT->getNumParams() == 0) ||
+ (FT->getNumParams() == 1 && F->hasStructRetAttr()))
+ Name += '@' + utostr_32(Info->getBytesToPopOnReturn());
+
+ if (Name[0] == '_') {
+ Name[0] = '@';
+ } else {
+ Name = '@' + Name;
+ }
+ break;
+ default:
+ assert(0 && "Unsupported DecorationStyle");
+ }
+}
+
+void X86ATTAsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
+ const Function *F = MF.getFunction();
+
+ decorateName(CurrentFnName, F);
+
+ SwitchToSection(TAI->SectionForGlobal(F));
+
+ unsigned FnAlign = 4;
+ if (F->hasFnAttr(Attribute::OptimizeForSize))
+ FnAlign = 1;
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::InternalLinkage: // Symbols default to internal.
+ case Function::PrivateLinkage:
+ EmitAlignment(FnAlign, F);
+ break;
+ case Function::DLLExportLinkage:
+ case Function::ExternalLinkage:
+ EmitAlignment(FnAlign, F);
+ O << "\t.globl\t" << CurrentFnName << '\n';
+ break;
+ case Function::LinkOnceAnyLinkage:
+ case Function::LinkOnceODRLinkage:
+ case Function::WeakAnyLinkage:
+ case Function::WeakODRLinkage:
+ EmitAlignment(FnAlign, F);
+ if (Subtarget->isTargetDarwin()) {
+ O << "\t.globl\t" << CurrentFnName << '\n';
+ O << TAI->getWeakDefDirective() << CurrentFnName << '\n';
+ } else if (Subtarget->isTargetCygMing()) {
+ O << "\t.globl\t" << CurrentFnName << "\n"
+ "\t.linkonce discard\n";
+ } else {
+ O << "\t.weak\t" << CurrentFnName << '\n';
+ }
+ break;
+ }
+
+ printVisibility(CurrentFnName, F->getVisibility());
+
+ if (Subtarget->isTargetELF())
+ O << "\t.type\t" << CurrentFnName << ",@function\n";
+ else if (Subtarget->isTargetCygMing()) {
+ O << "\t.def\t " << CurrentFnName
+ << ";\t.scl\t" <<
+ (F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT)
+ << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT)
+ << ";\t.endef\n";
+ }
+
+ O << CurrentFnName << ":\n";
+ // Add some workaround for linkonce linkage on Cygwin\MinGW
+ if (Subtarget->isTargetCygMing() &&
+ (F->hasLinkOnceLinkage() || F->hasWeakLinkage()))
+ O << "Lllvm$workaround$fake$stub$" << CurrentFnName << ":\n";
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool X86ATTAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ const Function *F = MF.getFunction();
+ this->MF = &MF;
+ unsigned CC = F->getCallingConv();
+
+ SetupMachineFunction(MF);
+ O << "\n\n";
+
+ // Populate function information map. Actually, We don't want to populate
+ // non-stdcall or non-fastcall functions' information right now.
+ if (CC == CallingConv::X86_StdCall || CC == CallingConv::X86_FastCall)
+ FunctionInfoMap[F] = *MF.getInfo<X86MachineFunctionInfo>();
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ if (F->hasDLLExportLinkage())
+ DLLExportedFns.insert(Mang->makeNameProper(F->getName(), ""));
+
+ // Print the 'header' of function
+ emitFunctionHeader(MF);
+
+ // Emit pre-function debug and/or EH information.
+ if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
+ DW->BeginFunction(&MF);
+
+ // Print out code for the function.
+ bool hasAnyRealCode = false;
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ if (!VerboseAsm && (I->pred_empty() || I->isOnlyReachableByFallthrough())) {
+ // This is an entry block or a block that's only reachable via a
+ // fallthrough edge. In non-VerboseAsm mode, don't print the label.
+ } else {
+ printBasicBlockLabel(I, true, true, VerboseAsm);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ // Print the assembly for the instruction.
+ if (!II->isLabel())
+ hasAnyRealCode = true;
+ printMachineInstruction(II);
+ }
+ }
+
+ if (Subtarget->isTargetDarwin() && !hasAnyRealCode) {
+ // If the function is empty, then we need to emit *something*. Otherwise,
+ // the function's label might be associated with something that it wasn't
+ // meant to be associated with. We emit a noop in this situation.
+ // We are assuming inline asms are code.
+ O << "\tnop\n";
+ }
+
+ if (TAI->hasDotTypeDotSizeDirective())
+ O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n';
+
+ // Emit post-function debug information.
+ if (TAI->doesSupportDebugInformation())
+ DW->EndFunction(&MF);
+
+ // Print out jump tables referenced by the function.
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+ O.flush();
+
+ // We didn't modify anything.
+ return false;
+}
+
+static inline bool shouldPrintGOT(TargetMachine &TM, const X86Subtarget* ST) {
+ return ST->isPICStyleGOT() && TM.getRelocationModel() == Reloc::PIC_;
+}
+
+static inline bool shouldPrintPLT(TargetMachine &TM, const X86Subtarget* ST) {
+ return ST->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_ &&
+ (ST->isPICStyleRIPRel() || ST->isPICStyleGOT());
+}
+
+static inline bool shouldPrintStub(TargetMachine &TM, const X86Subtarget* ST) {
+ return ST->isPICStyleStub() && TM.getRelocationModel() != Reloc::Static;
+}
+
+void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *Modifier, bool NotRIPRel) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register: {
+ assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ "Virtual registers should not make it this far!");
+ O << '%';
+ unsigned Reg = MO.getReg();
+ if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
+ MVT VT = (strcmp(Modifier+6,"64") == 0) ?
+ MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
+ ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
+ Reg = getX86SubSuperRegister(Reg, VT);
+ }
+ O << TRI->getAsmName(Reg);
+ return;
+ }
+
+ case MachineOperand::MO_Immediate:
+ if (!Modifier || (strcmp(Modifier, "debug") &&
+ strcmp(Modifier, "mem") &&
+ strcmp(Modifier, "call")))
+ O << '$';
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB(), false, false, VerboseAsm);
+ return;
+ case MachineOperand::MO_JumpTableIndex: {
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ if (!isMemOp) O << '$';
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_'
+ << MO.getIndex();
+
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ if (Subtarget->isPICStyleStub())
+ O << "-\"" << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
+ << "$pb\"";
+ else if (Subtarget->isPICStyleGOT())
+ O << "@GOTOFF";
+ }
+
+ if (isMemOp && Subtarget->isPICStyleRIPRel() && !NotRIPRel)
+ O << "(%rip)";
+ return;
+ }
+ case MachineOperand::MO_ConstantPoolIndex: {
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ if (!isMemOp) O << '$';
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+ << MO.getIndex();
+
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ if (Subtarget->isPICStyleStub())
+ O << "-\"" << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
+ << "$pb\"";
+ else if (Subtarget->isPICStyleGOT())
+ O << "@GOTOFF";
+ }
+
+ printOffset(MO.getOffset());
+
+ if (isMemOp && Subtarget->isPICStyleRIPRel() && !NotRIPRel)
+ O << "(%rip)";
+ return;
+ }
+ case MachineOperand::MO_GlobalAddress: {
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ bool needCloseParen = false;
+
+ const GlobalValue *GV = MO.getGlobal();
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (!GVar) {
+ // If GV is an alias then use the aliasee for determining
+ // thread-localness.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
+ }
+
+ bool isThreadLocal = GVar && GVar->isThreadLocal();
+
+ std::string Name = Mang->getValueName(GV);
+ decorateName(Name, GV);
+
+ if (!isMemOp && !isCallOp)
+ O << '$';
+ else if (Name[0] == '$') {
+ // The name begins with a dollar-sign. In order to avoid having it look
+ // like an integer immediate to the assembler, enclose it in parens.
+ O << '(';
+ needCloseParen = true;
+ }
+
+ if (shouldPrintStub(TM, Subtarget)) {
+ // Link-once, declaration, or Weakly-linked global variables need
+ // non-lazily-resolved stubs
+ if (GV->isDeclaration() || GV->isWeakForLinker()) {
+ // Dynamically-resolved functions need a stub for the function.
+ if (isCallOp && isa<Function>(GV)) {
+ // Function stubs are no longer needed for Mac OS X 10.5 and up.
+ if (Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9) {
+ O << Name;
+ } else {
+ FnStubs.insert(Name);
+ printSuffixedName(Name, "$stub");
+ }
+ } else if (GV->hasHiddenVisibility()) {
+ if (!GV->isDeclaration() && !GV->hasCommonLinkage())
+ // Definition is not definitely in the current translation unit.
+ O << Name;
+ else {
+ HiddenGVStubs.insert(Name);
+ printSuffixedName(Name, "$non_lazy_ptr");
+ }
+ } else {
+ GVStubs.insert(Name);
+ printSuffixedName(Name, "$non_lazy_ptr");
+ }
+ } else {
+ if (GV->hasDLLImportLinkage())
+ O << "__imp_";
+ O << Name;
+ }
+
+ if (!isCallOp && TM.getRelocationModel() == Reloc::PIC_)
+ O << '-' << getPICLabelString(getFunctionNumber(), TAI, Subtarget);
+ } else {
+ if (GV->hasDLLImportLinkage()) {
+ O << "__imp_";
+ }
+ O << Name;
+
+ if (isCallOp) {
+ if (shouldPrintPLT(TM, Subtarget)) {
+ // Assemble call via PLT for externally visible symbols
+ if (!GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() &&
+ !GV->hasLocalLinkage())
+ O << "@PLT";
+ }
+ if (Subtarget->isTargetCygMing() && GV->isDeclaration())
+ // Save function name for later type emission
+ FnStubs.insert(Name);
+ }
+ }
+
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+
+ printOffset(MO.getOffset());
+
+ if (isThreadLocal) {
+ TLSModel::Model model = getTLSModel(GVar, TM.getRelocationModel());
+ switch (model) {
+ case TLSModel::GeneralDynamic:
+ O << "@TLSGD";
+ break;
+ case TLSModel::LocalDynamic:
+ // O << "@TLSLD"; // local dynamic not implemented
+ O << "@TLSGD";
+ break;
+ case TLSModel::InitialExec:
+ if (Subtarget->is64Bit()) {
+ assert (!NotRIPRel);
+ O << "@GOTTPOFF(%rip)";
+ } else {
+ O << "@INDNTPOFF";
+ }
+ break;
+ case TLSModel::LocalExec:
+ if (Subtarget->is64Bit())
+ O << "@TPOFF";
+ else
+ O << "@NTPOFF";
+ break;
+ default:
+ assert (0 && "Unknown TLS model");
+ }
+ } else if (isMemOp) {
+ if (shouldPrintGOT(TM, Subtarget)) {
+ if (Subtarget->GVRequiresExtraLoad(GV, TM, false))
+ O << "@GOT";
+ else
+ O << "@GOTOFF";
+ } else if (Subtarget->isPICStyleRIPRel() && !NotRIPRel) {
+ if (TM.getRelocationModel() != Reloc::Static) {
+ if (Subtarget->GVRequiresExtraLoad(GV, TM, false))
+ O << "@GOTPCREL";
+
+ if (needCloseParen) {
+ needCloseParen = false;
+ O << ')';
+ }
+ }
+
+ // Use rip when possible to reduce code size, except when
+ // index or base register are also part of the address. e.g.
+ // foo(%rip)(%rcx,%rax,4) is not legal
+ O << "(%rip)";
+ }
+ }
+
+ if (needCloseParen)
+ O << ')';
+
+ return;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ bool needCloseParen = false;
+ std::string Name(TAI->getGlobalPrefix());
+ Name += MO.getSymbolName();
+ // Print function stub suffix unless it's Mac OS X 10.5 and up.
+ if (isCallOp && shouldPrintStub(TM, Subtarget) &&
+ !(Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9)) {
+ FnStubs.insert(Name);
+ printSuffixedName(Name, "$stub");
+ return;
+ }
+ if (!isMemOp && !isCallOp)
+ O << '$';
+ else if (Name[0] == '$') {
+ // The name begins with a dollar-sign. In order to avoid having it look
+ // like an integer immediate to the assembler, enclose it in parens.
+ O << '(';
+ needCloseParen = true;
+ }
+
+ O << Name;
+
+ if (shouldPrintPLT(TM, Subtarget)) {
+ std::string GOTName(TAI->getGlobalPrefix());
+ GOTName+="_GLOBAL_OFFSET_TABLE_";
+ if (Name == GOTName)
+ // HACK! Emit extra offset to PC during printing GOT offset to
+ // compensate for the size of popl instruction. The resulting code
+ // should look like:
+ // call .piclabel
+ // piclabel:
+ // popl %some_register
+ // addl $_GLOBAL_ADDRESS_TABLE_ + [.-piclabel], %some_register
+ O << " + [.-"
+ << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << ']';
+
+ if (isCallOp)
+ O << "@PLT";
+ }
+
+ if (needCloseParen)
+ O << ')';
+
+ if (!isCallOp && Subtarget->isPICStyleRIPRel())
+ O << "(%rip)";
+
+ return;
+ }
+ default:
+ O << "<unknown operand type>"; return;
+ }
+}
+
+void X86ATTAsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op) {
+ unsigned char value = MI->getOperand(Op).getImm();
+ assert(value <= 7 && "Invalid ssecc argument!");
+ switch (value) {
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ }
+}
+
+void X86ATTAsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier,
+ bool NotRIPRel) {
+ MachineOperand BaseReg = MI->getOperand(Op);
+ MachineOperand IndexReg = MI->getOperand(Op+2);
+ const MachineOperand &DispSpec = MI->getOperand(Op+3);
+
+ NotRIPRel |= IndexReg.getReg() || BaseReg.getReg();
+ if (DispSpec.isGlobal() ||
+ DispSpec.isCPI() ||
+ DispSpec.isJTI() ||
+ DispSpec.isSymbol()) {
+ printOperand(MI, Op+3, "mem", NotRIPRel);
+ } else {
+ int DispVal = DispSpec.getImm();
+ if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
+ O << DispVal;
+ }
+
+ if (IndexReg.getReg() || BaseReg.getReg()) {
+ unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+ unsigned BaseRegOperand = 0, IndexRegOperand = 2;
+
+ // There are cases where we can end up with ESP/RSP in the indexreg slot.
+ // If this happens, swap the base/index register to support assemblers that
+ // don't work when the index is *SP.
+ if (IndexReg.getReg() == X86::ESP || IndexReg.getReg() == X86::RSP) {
+ assert(ScaleVal == 1 && "Scale not supported for stack pointer!");
+ std::swap(BaseReg, IndexReg);
+ std::swap(BaseRegOperand, IndexRegOperand);
+ }
+
+ O << '(';
+ if (BaseReg.getReg())
+ printOperand(MI, Op+BaseRegOperand, Modifier);
+
+ if (IndexReg.getReg()) {
+ O << ',';
+ printOperand(MI, Op+IndexRegOperand, Modifier);
+ if (ScaleVal != 1)
+ O << ',' << ScaleVal;
+ }
+ O << ')';
+ }
+}
+
+void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier, bool NotRIPRel){
+ assert(isMem(MI, Op) && "Invalid memory reference!");
+ MachineOperand Segment = MI->getOperand(Op+4);
+ if (Segment.getReg()) {
+ printOperand(MI, Op+4, Modifier);
+ O << ':';
+ }
+ printLeaMemReference(MI, Op, Modifier, NotRIPRel);
+}
+
+void X86ATTAsmPrinter::printPICJumpTableSetLabel(unsigned uid,
+ const MachineBasicBlock *MBB) const {
+ if (!TAI->getSetDirective())
+ return;
+
+ // We don't need .set machinery if we have GOT-style relocations
+ if (Subtarget->isPICStyleGOT())
+ return;
+
+ O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix()
+ << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ',';
+ printBasicBlockLabel(MBB, false, false, false);
+ if (Subtarget->isPICStyleRIPRel())
+ O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << uid << '\n';
+ else
+ O << '-' << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << '\n';
+}
+
+void X86ATTAsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) {
+ std::string label = getPICLabelString(getFunctionNumber(), TAI, Subtarget);
+ O << label << '\n' << label << ':';
+}
+
+
+void X86ATTAsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned uid) const
+{
+ const char *JTEntryDirective = MJTI->getEntrySize() == 4 ?
+ TAI->getData32bitsDirective() : TAI->getData64bitsDirective();
+
+ O << JTEntryDirective << ' ';
+
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ if (Subtarget->isPICStyleRIPRel() || Subtarget->isPICStyleStub()) {
+ O << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
+ << '_' << uid << "_set_" << MBB->getNumber();
+ } else if (Subtarget->isPICStyleGOT()) {
+ printBasicBlockLabel(MBB, false, false, false);
+ O << "@GOTOFF";
+ } else
+ assert(0 && "Don't know how to print MBB label for this PIC mode");
+ } else
+ printBasicBlockLabel(MBB, false, false, false);
+}
+
+bool X86ATTAsmPrinter::printAsmMRegister(const MachineOperand &MO,
+ const char Mode) {
+ unsigned Reg = MO.getReg();
+ switch (Mode) {
+ default: return true; // Unknown mode.
+ case 'b': // Print QImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i8);
+ break;
+ case 'h': // Print QImode high register
+ Reg = getX86SubSuperRegister(Reg, MVT::i8, true);
+ break;
+ case 'w': // Print HImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i16);
+ break;
+ case 'k': // Print SImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i32);
+ break;
+ case 'q': // Print DImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i64);
+ break;
+ }
+
+ O << '%'<< TRI->getAsmName(Reg);
+ return false;
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool X86ATTAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'c': // Don't print "$" before a global var name or constant.
+ printOperand(MI, OpNo, "mem", /*NotRIPRel=*/true);
+ return false;
+ case 'b': // Print QImode register
+ case 'h': // Print QImode high register
+ case 'w': // Print HImode register
+ case 'k': // Print SImode register
+ case 'q': // Print DImode register
+ if (MI->getOperand(OpNo).isReg())
+ return printAsmMRegister(MI->getOperand(OpNo), ExtraCode[0]);
+ printOperand(MI, OpNo);
+ return false;
+
+ case 'P': // Don't print @PLT, but do print as memory.
+ printOperand(MI, OpNo, "mem", /*NotRIPRel=*/true);
+ return false;
+ }
+ }
+
+ printOperand(MI, OpNo);
+ return false;
+}
+
+bool X86ATTAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'b': // Print QImode register
+ case 'h': // Print QImode high register
+ case 'w': // Print HImode register
+ case 'k': // Print SImode register
+ case 'q': // Print SImode register
+ // These only apply to registers, ignore on mem.
+ break;
+ case 'P': // Don't print @PLT, but do print as memory.
+ printMemReference(MI, OpNo, "mem", /*NotRIPRel=*/true);
+ return false;
+ }
+ }
+ printMemReference(MI, OpNo);
+ return false;
+}
+
+/// printMachineInstruction -- Print out a single X86 LLVM instruction MI in
+/// AT&T syntax to the current output stream.
+///
+void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ ++EmittedInsts;
+
+ // Call the autogenerated instruction printer routines.
+ printInstruction(MI);
+}
+
+/// doInitialization
+bool X86ATTAsmPrinter::doInitialization(Module &M) {
+
+ bool Result = AsmPrinter::doInitialization(M);
+
+ if (TAI->doesSupportDebugInformation()) {
+ // Let PassManager know we need debug information and relay
+ // the MachineModuleInfo address on to DwarfWriter.
+ // AsmPrinter::doInitialization did this analysis.
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ DW = getAnalysisIfAvailable<DwarfWriter>();
+ DW->BeginModule(&M, MMI, O, this, TAI);
+ }
+
+ // Darwin wants symbols to be quoted if they have complex names.
+ if (Subtarget->isTargetDarwin())
+ Mang->setUseQuotes(true);
+
+ return Result;
+}
+
+
+void X86ATTAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
+ const TargetData *TD = TM.getTargetData();
+
+ if (!GVar->hasInitializer())
+ return; // External global require no code
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GVar)) {
+ if (Subtarget->isTargetDarwin() &&
+ TM.getRelocationModel() == Reloc::Static) {
+ if (GVar->getName() == "llvm.global_ctors")
+ O << ".reference .constructors_used\n";
+ else if (GVar->getName() == "llvm.global_dtors")
+ O << ".reference .destructors_used\n";
+ }
+ return;
+ }
+
+ std::string name = Mang->getValueName(GVar);
+ Constant *C = GVar->getInitializer();
+ const Type *Type = C->getType();
+ unsigned Size = TD->getTypeAllocSize(Type);
+ unsigned Align = TD->getPreferredAlignmentLog(GVar);
+
+ printVisibility(name, GVar->getVisibility());
+
+ if (Subtarget->isTargetELF())
+ O << "\t.type\t" << name << ",@object\n";
+
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+
+ if (C->isNullValue() && !GVar->hasSection() &&
+ !(Subtarget->isTargetDarwin() &&
+ TAI->SectionKindForGlobal(GVar) == SectionKind::RODataMergeStr)) {
+ // FIXME: This seems to be pretty darwin-specific
+ if (GVar->hasExternalLinkage()) {
+ if (const char *Directive = TAI->getZeroFillDirective()) {
+ O << "\t.globl " << name << '\n';
+ O << Directive << "__DATA, __common, " << name << ", "
+ << Size << ", " << Align << '\n';
+ return;
+ }
+ }
+
+ if (!GVar->isThreadLocal() &&
+ (GVar->hasLocalLinkage() || GVar->isWeakForLinker())) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+
+ if (TAI->getLCOMMDirective() != NULL) {
+ if (GVar->hasLocalLinkage()) {
+ O << TAI->getLCOMMDirective() << name << ',' << Size;
+ if (Subtarget->isTargetDarwin())
+ O << ',' << Align;
+ } else if (Subtarget->isTargetDarwin() && !GVar->hasCommonLinkage()) {
+ O << "\t.globl " << name << '\n'
+ << TAI->getWeakDefDirective() << name << '\n';
+ EmitAlignment(Align, GVar);
+ O << name << ":";
+ if (VerboseAsm) {
+ O << "\t\t\t\t" << TAI->getCommentString() << ' ';
+ PrintUnmangledNameSafely(GVar, O);
+ }
+ O << '\n';
+ EmitGlobalConstant(C);
+ return;
+ } else {
+ O << TAI->getCOMMDirective() << name << ',' << Size;
+ if (TAI->getCOMMDirectiveTakesAlignment())
+ O << ',' << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
+ }
+ } else {
+ if (!Subtarget->isTargetCygMing()) {
+ if (GVar->hasLocalLinkage())
+ O << "\t.local\t" << name << '\n';
+ }
+ O << TAI->getCOMMDirective() << name << ',' << Size;
+ if (TAI->getCOMMDirectiveTakesAlignment())
+ O << ',' << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
+ }
+ if (VerboseAsm) {
+ O << "\t\t" << TAI->getCommentString() << ' ';
+ PrintUnmangledNameSafely(GVar, O);
+ }
+ O << '\n';
+ return;
+ }
+ }
+
+ switch (GVar->getLinkage()) {
+ case GlobalValue::CommonLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ if (Subtarget->isTargetDarwin()) {
+ O << "\t.globl " << name << '\n'
+ << TAI->getWeakDefDirective() << name << '\n';
+ } else if (Subtarget->isTargetCygMing()) {
+ O << "\t.globl\t" << name << "\n"
+ "\t.linkonce same_size\n";
+ } else {
+ O << "\t.weak\t" << name << '\n';
+ }
+ break;
+ case GlobalValue::DLLExportLinkage:
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol
+ O << "\t.globl " << name << '\n';
+ // FALL THROUGH
+ case GlobalValue::PrivateLinkage:
+ case GlobalValue::InternalLinkage:
+ break;
+ default:
+ assert(0 && "Unknown linkage type!");
+ }
+
+ EmitAlignment(Align, GVar);
+ O << name << ":";
+ if (VerboseAsm){
+ O << "\t\t\t\t" << TAI->getCommentString() << ' ';
+ PrintUnmangledNameSafely(GVar, O);
+ }
+ O << '\n';
+ if (TAI->hasDotTypeDotSizeDirective())
+ O << "\t.size\t" << name << ", " << Size << '\n';
+
+ EmitGlobalConstant(C);
+}
+
+/// printGVStub - Print stub for a global value.
+///
+void X86ATTAsmPrinter::printGVStub(const char *GV, const char *Prefix) {
+ printSuffixedName(GV, "$non_lazy_ptr", Prefix);
+ O << ":\n\t.indirect_symbol ";
+ if (Prefix) O << Prefix;
+ O << GV << "\n\t.long\t0\n";
+}
+
+/// printHiddenGVStub - Print stub for a hidden global value.
+///
+void X86ATTAsmPrinter::printHiddenGVStub(const char *GV, const char *Prefix) {
+ EmitAlignment(2);
+ printSuffixedName(GV, "$non_lazy_ptr", Prefix);
+ if (Prefix) O << Prefix;
+ O << ":\n" << TAI->getData32bitsDirective() << GV << '\n';
+}
+
+
+bool X86ATTAsmPrinter::doFinalization(Module &M) {
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ printModuleLevelGV(I);
+
+ if (I->hasDLLExportLinkage())
+ DLLExportedGVs.insert(Mang->makeNameProper(I->getName(),""));
+
+ // If the global is a extern weak symbol, remember to emit the weak
+ // reference!
+ // FIXME: This is rather hacky, since we'll emit references to ALL weak
+ // stuff, not used. But currently it's the only way to deal with extern weak
+ // initializers hidden deep inside constant expressions.
+ if (I->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(I);
+ }
+
+ for (Module::const_iterator I = M.begin(), E = M.end();
+ I != E; ++I) {
+ // If the global is a extern weak symbol, remember to emit the weak
+ // reference!
+ // FIXME: This is rather hacky, since we'll emit references to ALL weak
+ // stuff, not used. But currently it's the only way to deal with extern weak
+ // initializers hidden deep inside constant expressions.
+ if (I->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(I);
+ }
+
+ // Output linker support code for dllexported globals
+ if (!DLLExportedGVs.empty())
+ SwitchToDataSection(".section .drectve");
+
+ for (StringSet<>::iterator i = DLLExportedGVs.begin(),
+ e = DLLExportedGVs.end();
+ i != e; ++i)
+ O << "\t.ascii \" -export:" << i->getKeyData() << ",data\"\n";
+
+ if (!DLLExportedFns.empty()) {
+ SwitchToDataSection(".section .drectve");
+ }
+
+ for (StringSet<>::iterator i = DLLExportedFns.begin(),
+ e = DLLExportedFns.end();
+ i != e; ++i)
+ O << "\t.ascii \" -export:" << i->getKeyData() << "\"\n";
+
+ if (Subtarget->isTargetDarwin()) {
+ SwitchToDataSection("");
+
+ // Output stubs for dynamically-linked functions
+ for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end();
+ i != e; ++i) {
+ SwitchToDataSection("\t.section __IMPORT,__jump_table,symbol_stubs,"
+ "self_modifying_code+pure_instructions,5", 0);
+ const char *p = i->getKeyData();
+ printSuffixedName(p, "$stub");
+ O << ":\n"
+ "\t.indirect_symbol " << p << "\n"
+ "\thlt ; hlt ; hlt ; hlt ; hlt\n";
+ }
+
+ O << '\n';
+
+ // Print global value stubs.
+ bool InStubSection = false;
+ if (TAI->doesSupportExceptionHandling() && MMI && !Subtarget->is64Bit()) {
+ // Add the (possibly multiple) personalities to the set of global values.
+ // Only referenced functions get into the Personalities list.
+ const std::vector<Function *>& Personalities = MMI->getPersonalities();
+ for (std::vector<Function *>::const_iterator I = Personalities.begin(),
+ E = Personalities.end(); I != E; ++I) {
+ if (!*I)
+ continue;
+ if (!InStubSection) {
+ SwitchToDataSection(
+ "\t.section __IMPORT,__pointers,non_lazy_symbol_pointers");
+ InStubSection = true;
+ }
+ printGVStub((*I)->getNameStart(), "_");
+ }
+ }
+
+ // Output stubs for external and common global variables.
+ if (!InStubSection && !GVStubs.empty())
+ SwitchToDataSection(
+ "\t.section __IMPORT,__pointers,non_lazy_symbol_pointers");
+ for (StringSet<>::iterator i = GVStubs.begin(), e = GVStubs.end();
+ i != e; ++i)
+ printGVStub(i->getKeyData());
+
+ if (!HiddenGVStubs.empty()) {
+ SwitchToSection(TAI->getDataSection());
+ for (StringSet<>::iterator i = HiddenGVStubs.begin(), e = HiddenGVStubs.end();
+ i != e; ++i)
+ printHiddenGVStub(i->getKeyData());
+ }
+
+ // Emit final debug information.
+ DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
+ DW->EndModule();
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never
+ // generates code that does this, it is always safe to set.
+ O << "\t.subsections_via_symbols\n";
+ } else if (Subtarget->isTargetCygMing()) {
+ // Emit type information for external functions
+ for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end();
+ i != e; ++i) {
+ O << "\t.def\t " << i->getKeyData()
+ << ";\t.scl\t" << COFF::C_EXT
+ << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT)
+ << ";\t.endef\n";
+ }
+
+ // Emit final debug information.
+ DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
+ DW->EndModule();
+ } else if (Subtarget->isTargetELF()) {
+ // Emit final debug information.
+ DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
+ DW->EndModule();
+ }
+
+ return AsmPrinter::doFinalization(M);
+}
+
+// Include the auto-generated portion of the assembly writer.
+#include "X86GenAsmWriter.inc"
diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
new file mode 100644
index 0000000..5b40e73
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
@@ -0,0 +1,164 @@
+//===-- X86ATTAsmPrinter.h - Convert X86 LLVM code to AT&T assembly -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AT&T assembly code printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86ATTASMPRINTER_H
+#define X86ATTASMPRINTER_H
+
+#include "../X86.h"
+#include "../X86MachineFunctionInfo.h"
+#include "../X86TargetMachine.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MachineJumpTableInfo;
+
+class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
+ DwarfWriter *DW;
+ MachineModuleInfo *MMI;
+ const X86Subtarget *Subtarget;
+ public:
+ explicit X86ATTAsmPrinter(raw_ostream &O, X86TargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : AsmPrinter(O, TM, T, OL, V), DW(0), MMI(0) {
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "X86 AT&T-Style Assembly Printer";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ if (Subtarget->isTargetDarwin() ||
+ Subtarget->isTargetELF() ||
+ Subtarget->isTargetCygMing()) {
+ AU.addRequired<MachineModuleInfo>();
+ }
+ AU.addRequired<DwarfWriter>();
+ AsmPrinter::getAnalysisUsage(AU);
+ }
+
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ /// printInstruction - This method is automatically generated by tablegen
+ /// from the instruction set description. This method returns true if the
+ /// machine instruction was sufficiently described to print it, otherwise it
+ /// returns false.
+ bool printInstruction(const MachineInstr *MI);
+
+ // These methods are used by the tablegen'erated instruction printer.
+ void printOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *Modifier = 0, bool NotRIPRel = false);
+ void printi8mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi16mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi32mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi64mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi128mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printf32mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printf64mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printf80mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printf128mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printlea32mem(const MachineInstr *MI, unsigned OpNo) {
+ printLeaMemReference(MI, OpNo);
+ }
+ void printlea64mem(const MachineInstr *MI, unsigned OpNo) {
+ printLeaMemReference(MI, OpNo);
+ }
+ void printlea64_32mem(const MachineInstr *MI, unsigned OpNo) {
+ printLeaMemReference(MI, OpNo, "subreg64");
+ }
+
+ bool printAsmMRegister(const MachineOperand &MO, const char Mode);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+
+ void printMachineInstruction(const MachineInstr *MI);
+ void printSSECC(const MachineInstr *MI, unsigned Op);
+ void printMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier=NULL, bool NotRIPRel = false);
+ void printLeaMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier=NULL, bool NotRIPRel = false);
+ void printPICJumpTableSetLabel(unsigned uid,
+ const MachineBasicBlock *MBB) const;
+ void printPICJumpTableSetLabel(unsigned uid, unsigned uid2,
+ const MachineBasicBlock *MBB) const {
+ AsmPrinter::printPICJumpTableSetLabel(uid, uid2, MBB);
+ }
+ void printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned uid) const;
+
+ void printPICLabel(const MachineInstr *MI, unsigned Op);
+ void printModuleLevelGV(const GlobalVariable* GVar);
+
+ void printGVStub(const char *GV, const char *Prefix = NULL);
+ void printHiddenGVStub(const char *GV, const char *Prefix = NULL);
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+ void emitFunctionHeader(const MachineFunction &MF);
+
+ // Necessary for Darwin to print out the apprioriate types of linker stubs
+ StringSet<> FnStubs, GVStubs, HiddenGVStubs;
+
+ // Necessary for dllexport support
+ StringSet<> DLLExportedFns, DLLExportedGVs;
+
+ // We have to propagate some information about MachineFunction to
+ // AsmPrinter. It's ok, when we're printing the function, since we have
+ // access to MachineFunction and can get the appropriate MachineFunctionInfo.
+ // Unfortunately, this is not possible when we're printing reference to
+ // Function (e.g. calling it and so on). Even more, there is no way to get the
+ // corresponding MachineFunctions: it can even be not created at all. That's
+ // why we should use additional structure, when we're collecting all necessary
+ // information.
+ //
+ // This structure is using e.g. for name decoration for stdcall & fastcall'ed
+ // function, since we have to use arguments' size for decoration.
+ typedef std::map<const Function*, X86MachineFunctionInfo> FMFInfoMap;
+ FMFInfoMap FunctionInfoMap;
+
+ void decorateName(std::string& Name, const GlobalValue* GV);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
new file mode 100644
index 0000000..c874849
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -0,0 +1,50 @@
+//===-- X86AsmPrinter.cpp - Convert X86 LLVM IR to X86 assembly -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file the shared super class printer that converts from our internal
+// representation of machine-dependent LLVM code to Intel and AT&T format
+// assembly language.
+// This printer is the output mechanism used by `llc'.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86ATTAsmPrinter.h"
+#include "X86IntelAsmPrinter.h"
+#include "X86Subtarget.h"
+using namespace llvm;
+
+/// createX86CodePrinterPass - Returns a pass that prints the X86 assembly code
+/// for a MachineFunction to the given output stream, using the given target
+/// machine description.
+///
+FunctionPass *llvm::createX86CodePrinterPass(raw_ostream &o,
+ X86TargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose) {
+ const X86Subtarget *Subtarget = &tm.getSubtarget<X86Subtarget>();
+
+ if (Subtarget->isFlavorIntel()) {
+ return new X86IntelAsmPrinter(o, tm, tm.getTargetAsmInfo(),
+ OptLevel, verbose);
+ } else {
+ return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(),
+ OptLevel, verbose);
+ }
+}
+
+namespace {
+ static struct Register {
+ Register() {
+ X86TargetMachine::registerAsmPrinter(createX86CodePrinterPass);
+ }
+ } Registrator;
+}
+
+extern "C" int X86AsmPrinterForceLink;
+int X86AsmPrinterForceLink = 0;
diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
new file mode 100644
index 0000000..6599349
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
@@ -0,0 +1,609 @@
+//===-- X86IntelAsmPrinter.cpp - Convert X86 LLVM code to Intel assembly --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to Intel format assembly language.
+// This printer is the output mechanism used by `llc'.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "X86IntelAsmPrinter.h"
+#include "X86InstrInfo.h"
+#include "X86TargetAsmInfo.h"
+#include "X86.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+static X86MachineFunctionInfo calculateFunctionInfo(const Function *F,
+ const TargetData *TD) {
+ X86MachineFunctionInfo Info;
+ uint64_t Size = 0;
+
+ switch (F->getCallingConv()) {
+ case CallingConv::X86_StdCall:
+ Info.setDecorationStyle(StdCall);
+ break;
+ case CallingConv::X86_FastCall:
+ Info.setDecorationStyle(FastCall);
+ break;
+ default:
+ return Info;
+ }
+
+ unsigned argNum = 1;
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI, ++argNum) {
+ const Type* Ty = AI->getType();
+
+ // 'Dereference' type in case of byval parameter attribute
+ if (F->paramHasAttr(argNum, Attribute::ByVal))
+ Ty = cast<PointerType>(Ty)->getElementType();
+
+ // Size should be aligned to DWORD boundary
+ Size += ((TD->getTypeAllocSize(Ty) + 3)/4)*4;
+ }
+
+ // We're not supporting tooooo huge arguments :)
+ Info.setBytesToPopOnReturn((unsigned int)Size);
+ return Info;
+}
+
+
+/// decorateName - Query FunctionInfoMap and use this information for various
+/// name decoration.
+void X86IntelAsmPrinter::decorateName(std::string &Name,
+ const GlobalValue *GV) {
+ const Function *F = dyn_cast<Function>(GV);
+ if (!F) return;
+
+ // We don't want to decorate non-stdcall or non-fastcall functions right now
+ unsigned CC = F->getCallingConv();
+ if (CC != CallingConv::X86_StdCall && CC != CallingConv::X86_FastCall)
+ return;
+
+ FMFInfoMap::const_iterator info_item = FunctionInfoMap.find(F);
+
+ const X86MachineFunctionInfo *Info;
+ if (info_item == FunctionInfoMap.end()) {
+ // Calculate apropriate function info and populate map
+ FunctionInfoMap[F] = calculateFunctionInfo(F, TM.getTargetData());
+ Info = &FunctionInfoMap[F];
+ } else {
+ Info = &info_item->second;
+ }
+
+ const FunctionType *FT = F->getFunctionType();
+ switch (Info->getDecorationStyle()) {
+ case None:
+ break;
+ case StdCall:
+ // "Pure" variadic functions do not receive @0 suffix.
+ if (!FT->isVarArg() || (FT->getNumParams() == 0) ||
+ (FT->getNumParams() == 1 && F->hasStructRetAttr()))
+ Name += '@' + utostr_32(Info->getBytesToPopOnReturn());
+ break;
+ case FastCall:
+ // "Pure" variadic functions do not receive @0 suffix.
+ if (!FT->isVarArg() || (FT->getNumParams() == 0) ||
+ (FT->getNumParams() == 1 && F->hasStructRetAttr()))
+ Name += '@' + utostr_32(Info->getBytesToPopOnReturn());
+
+ if (Name[0] == '_')
+ Name[0] = '@';
+ else
+ Name = '@' + Name;
+
+ break;
+ default:
+ assert(0 && "Unsupported DecorationStyle");
+ }
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool X86IntelAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ this->MF = &MF;
+ SetupMachineFunction(MF);
+ O << "\n\n";
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ // Print out labels for the function.
+ const Function *F = MF.getFunction();
+ unsigned CC = F->getCallingConv();
+
+ // Populate function information map. Actually, We don't want to populate
+ // non-stdcall or non-fastcall functions' information right now.
+ if (CC == CallingConv::X86_StdCall || CC == CallingConv::X86_FastCall)
+ FunctionInfoMap[F] = *MF.getInfo<X86MachineFunctionInfo>();
+
+ decorateName(CurrentFnName, F);
+
+ SwitchToTextSection("_text", F);
+
+ unsigned FnAlign = 4;
+ if (F->hasFnAttr(Attribute::OptimizeForSize))
+ FnAlign = 1;
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unsupported linkage type!");
+ case Function::PrivateLinkage:
+ case Function::InternalLinkage:
+ EmitAlignment(FnAlign);
+ break;
+ case Function::DLLExportLinkage:
+ DLLExportedFns.insert(CurrentFnName);
+ //FALLS THROUGH
+ case Function::ExternalLinkage:
+ O << "\tpublic " << CurrentFnName << "\n";
+ EmitAlignment(FnAlign);
+ break;
+ }
+
+ O << CurrentFnName << "\tproc near\n";
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block if there are any predecessors.
+ if (!I->pred_empty()) {
+ printBasicBlockLabel(I, true, true);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ printMachineInstruction(II);
+ }
+ }
+
+ // Print out jump tables referenced by the function.
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+ O << CurrentFnName << "\tendp\n";
+
+ O.flush();
+
+ // We didn't modify anything.
+ return false;
+}
+
+void X86IntelAsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op) {
+ unsigned char value = MI->getOperand(Op).getImm();
+ assert(value <= 7 && "Invalid ssecc argument!");
+ switch (value) {
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ }
+}
+
+void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
+ const char *Modifier) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register: {
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+ if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
+ MVT VT = (strcmp(Modifier,"subreg64") == 0) ?
+ MVT::i64 : ((strcmp(Modifier, "subreg32") == 0) ? MVT::i32 :
+ ((strcmp(Modifier,"subreg16") == 0) ? MVT::i16 :MVT::i8));
+ Reg = getX86SubSuperRegister(Reg, VT);
+ }
+ O << TRI->getName(Reg);
+ } else
+ O << "reg" << MO.getReg();
+ return;
+ }
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB());
+ return;
+ case MachineOperand::MO_JumpTableIndex: {
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ if (!isMemOp) O << "OFFSET ";
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << "_" << MO.getIndex();
+ return;
+ }
+ case MachineOperand::MO_ConstantPoolIndex: {
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ if (!isMemOp) O << "OFFSET ";
+ O << "[" << TAI->getPrivateGlobalPrefix() << "CPI"
+ << getFunctionNumber() << "_" << MO.getIndex();
+ printOffset(MO.getOffset());
+ O << "]";
+ return;
+ }
+ case MachineOperand::MO_GlobalAddress: {
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ GlobalValue *GV = MO.getGlobal();
+ std::string Name = Mang->getValueName(GV);
+
+ decorateName(Name, GV);
+
+ if (!isMemOp && !isCallOp) O << "OFFSET ";
+ if (GV->hasDLLImportLinkage()) {
+ // FIXME: This should be fixed with full support of stdcall & fastcall
+ // CC's
+ O << "__imp_";
+ }
+ O << Name;
+ printOffset(MO.getOffset());
+ return;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
+ if (!isCallOp) O << "OFFSET ";
+ O << TAI->getGlobalPrefix() << MO.getSymbolName();
+ return;
+ }
+ default:
+ O << "<unknown operand type>"; return;
+ }
+}
+
+void X86IntelAsmPrinter::printLeaMemReference(const MachineInstr *MI,
+ unsigned Op,
+ const char *Modifier) {
+ const MachineOperand &BaseReg = MI->getOperand(Op);
+ int ScaleVal = MI->getOperand(Op+1).getImm();
+ const MachineOperand &IndexReg = MI->getOperand(Op+2);
+ const MachineOperand &DispSpec = MI->getOperand(Op+3);
+
+ O << "[";
+ bool NeedPlus = false;
+ if (BaseReg.getReg()) {
+ printOp(BaseReg, Modifier);
+ NeedPlus = true;
+ }
+
+ if (IndexReg.getReg()) {
+ if (NeedPlus) O << " + ";
+ if (ScaleVal != 1)
+ O << ScaleVal << "*";
+ printOp(IndexReg, Modifier);
+ NeedPlus = true;
+ }
+
+ if (DispSpec.isGlobal() || DispSpec.isCPI() ||
+ DispSpec.isJTI()) {
+ if (NeedPlus)
+ O << " + ";
+ printOp(DispSpec, "mem");
+ } else {
+ int DispVal = DispSpec.getImm();
+ if (DispVal || (!BaseReg.getReg() && !IndexReg.getReg())) {
+ if (NeedPlus) {
+ if (DispVal > 0)
+ O << " + ";
+ else {
+ O << " - ";
+ DispVal = -DispVal;
+ }
+ }
+ O << DispVal;
+ }
+ }
+ O << "]";
+}
+
+void X86IntelAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier) {
+ assert(isMem(MI, Op) && "Invalid memory reference!");
+ MachineOperand Segment = MI->getOperand(Op+4);
+ if (Segment.getReg()) {
+ printOperand(MI, Op+4, Modifier);
+ O << ':';
+ }
+ printLeaMemReference(MI, Op, Modifier);
+}
+
+void X86IntelAsmPrinter::printPICJumpTableSetLabel(unsigned uid,
+ const MachineBasicBlock *MBB) const {
+ if (!TAI->getSetDirective())
+ return;
+
+ O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix()
+ << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ',';
+ printBasicBlockLabel(MBB, false, false, false);
+ O << '-' << "\"L" << getFunctionNumber() << "$pb\"'\n";
+}
+
+void X86IntelAsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) {
+ O << "\"L" << getFunctionNumber() << "$pb\"\n";
+ O << "\"L" << getFunctionNumber() << "$pb\":";
+}
+
+bool X86IntelAsmPrinter::printAsmMRegister(const MachineOperand &MO,
+ const char Mode) {
+ unsigned Reg = MO.getReg();
+ switch (Mode) {
+ default: return true; // Unknown mode.
+ case 'b': // Print QImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i8);
+ break;
+ case 'h': // Print QImode high register
+ Reg = getX86SubSuperRegister(Reg, MVT::i8, true);
+ break;
+ case 'w': // Print HImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i16);
+ break;
+ case 'k': // Print SImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i32);
+ break;
+ }
+
+ O << '%' << TRI->getName(Reg);
+ return false;
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool X86IntelAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'b': // Print QImode register
+ case 'h': // Print QImode high register
+ case 'w': // Print HImode register
+ case 'k': // Print SImode register
+ return printAsmMRegister(MI->getOperand(OpNo), ExtraCode[0]);
+ }
+ }
+
+ printOperand(MI, OpNo);
+ return false;
+}
+
+bool X86IntelAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+ printMemReference(MI, OpNo);
+ return false;
+}
+
+/// printMachineInstruction -- Print out a single X86 LLVM instruction
+/// MI in Intel syntax to the current output stream.
+///
+void X86IntelAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ ++EmittedInsts;
+
+ // Call the autogenerated instruction printer routines.
+ printInstruction(MI);
+}
+
+bool X86IntelAsmPrinter::doInitialization(Module &M) {
+ bool Result = AsmPrinter::doInitialization(M);
+
+ Mang->markCharUnacceptable('.');
+
+ O << "\t.686\n\t.model flat\n\n";
+
+ // Emit declarations for external functions.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (I->isDeclaration()) {
+ std::string Name = Mang->getValueName(I);
+ decorateName(Name, I);
+
+ O << "\textern " ;
+ if (I->hasDLLImportLinkage()) {
+ O << "__imp_";
+ }
+ O << Name << ":near\n";
+ }
+
+ // Emit declarations for external globals. Note that VC++ always declares
+ // external globals to have type byte, and if that's good enough for VC++...
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (I->isDeclaration()) {
+ std::string Name = Mang->getValueName(I);
+
+ O << "\textern " ;
+ if (I->hasDLLImportLinkage()) {
+ O << "__imp_";
+ }
+ O << Name << ":byte\n";
+ }
+ }
+
+ return Result;
+}
+
+bool X86IntelAsmPrinter::doFinalization(Module &M) {
+ const TargetData *TD = TM.getTargetData();
+
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (I->isDeclaration()) continue; // External global require no code
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(I))
+ continue;
+
+ std::string name = Mang->getValueName(I);
+ Constant *C = I->getInitializer();
+ unsigned Align = TD->getPreferredAlignmentLog(I);
+ bool bCustomSegment = false;
+
+ switch (I->getLinkage()) {
+ case GlobalValue::CommonLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ SwitchToDataSection("");
+ O << name << "?\tsegment common 'COMMON'\n";
+ bCustomSegment = true;
+ // FIXME: the default alignment is 16 bytes, but 1, 2, 4, and 256
+ // are also available.
+ break;
+ case GlobalValue::AppendingLinkage:
+ SwitchToDataSection("");
+ O << name << "?\tsegment public 'DATA'\n";
+ bCustomSegment = true;
+ // FIXME: the default alignment is 16 bytes, but 1, 2, 4, and 256
+ // are also available.
+ break;
+ case GlobalValue::DLLExportLinkage:
+ DLLExportedGVs.insert(name);
+ // FALL THROUGH
+ case GlobalValue::ExternalLinkage:
+ O << "\tpublic " << name << "\n";
+ // FALL THROUGH
+ case GlobalValue::InternalLinkage:
+ SwitchToSection(TAI->getDataSection());
+ break;
+ default:
+ assert(0 && "Unknown linkage type!");
+ }
+
+ if (!bCustomSegment)
+ EmitAlignment(Align, I);
+
+ O << name << ":";
+ if (VerboseAsm)
+ O << "\t\t\t\t" << TAI->getCommentString()
+ << " " << I->getName();
+ O << '\n';
+
+ EmitGlobalConstant(C);
+
+ if (bCustomSegment)
+ O << name << "?\tends\n";
+ }
+
+ // Output linker support code for dllexported globals
+ if (!DLLExportedGVs.empty() || !DLLExportedFns.empty()) {
+ SwitchToDataSection("");
+ O << "; WARNING: The following code is valid only with MASM v8.x"
+ << "and (possible) higher\n"
+ << "; This version of MASM is usually shipped with Microsoft "
+ << "Visual Studio 2005\n"
+ << "; or (possible) further versions. Unfortunately, there is no "
+ << "way to support\n"
+ << "; dllexported symbols in the earlier versions of MASM in fully "
+ << "automatic way\n\n";
+ O << "_drectve\t segment info alias('.drectve')\n";
+ }
+
+ for (StringSet<>::iterator i = DLLExportedGVs.begin(),
+ e = DLLExportedGVs.end();
+ i != e; ++i)
+ O << "\t db ' /EXPORT:" << i->getKeyData() << ",data'\n";
+
+ for (StringSet<>::iterator i = DLLExportedFns.begin(),
+ e = DLLExportedFns.end();
+ i != e; ++i)
+ O << "\t db ' /EXPORT:" << i->getKeyData() << "'\n";
+
+ if (!DLLExportedGVs.empty() || !DLLExportedFns.empty())
+ O << "_drectve\t ends\n";
+
+ // Bypass X86SharedAsmPrinter::doFinalization().
+ bool Result = AsmPrinter::doFinalization(M);
+ SwitchToDataSection("");
+ O << "\tend\n";
+ return Result;
+}
+
+void X86IntelAsmPrinter::EmitString(const ConstantArray *CVA) const {
+ unsigned NumElts = CVA->getNumOperands();
+ if (NumElts) {
+ // ML does not have escape sequences except '' for '. It also has a maximum
+ // string length of 255.
+ unsigned len = 0;
+ bool inString = false;
+ for (unsigned i = 0; i < NumElts; i++) {
+ int n = cast<ConstantInt>(CVA->getOperand(i))->getZExtValue() & 255;
+ if (len == 0)
+ O << "\tdb ";
+
+ if (n >= 32 && n <= 127) {
+ if (!inString) {
+ if (len > 0) {
+ O << ",'";
+ len += 2;
+ } else {
+ O << "'";
+ len++;
+ }
+ inString = true;
+ }
+ if (n == '\'') {
+ O << "'";
+ len++;
+ }
+ O << char(n);
+ } else {
+ if (inString) {
+ O << "'";
+ len++;
+ inString = false;
+ }
+ if (len > 0) {
+ O << ",";
+ len++;
+ }
+ O << n;
+ len += 1 + (n > 9) + (n > 99);
+ }
+
+ if (len > 60) {
+ if (inString) {
+ O << "'";
+ inString = false;
+ }
+ O << "\n";
+ len = 0;
+ }
+ }
+
+ if (len > 0) {
+ if (inString)
+ O << "'";
+ O << "\n";
+ }
+ }
+}
+
+// Include the auto-generated portion of the assembly writer.
+#include "X86GenAsmWriter1.inc"
diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
new file mode 100644
index 0000000..9520d98
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
@@ -0,0 +1,152 @@
+//===-- X86IntelAsmPrinter.h - Convert X86 LLVM code to Intel assembly ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Intel assembly code printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86INTELASMPRINTER_H
+#define X86INTELASMPRINTER_H
+
+#include "../X86.h"
+#include "../X86MachineFunctionInfo.h"
+#include "../X86TargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter {
+ explicit X86IntelAsmPrinter(raw_ostream &O, X86TargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : AsmPrinter(O, TM, T, OL, V) {}
+
+ virtual const char *getPassName() const {
+ return "X86 Intel-Style Assembly Printer";
+ }
+
+ /// printInstruction - This method is automatically generated by tablegen
+ /// from the instruction set description. This method returns true if the
+ /// machine instruction was sufficiently described to print it, otherwise it
+ /// returns false.
+ bool printInstruction(const MachineInstr *MI);
+
+ // This method is used by the tablegen'erated instruction printer.
+ void printOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *Modifier = 0) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (MO.isReg()) {
+ assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ "Not physreg??");
+ O << TM.getRegisterInfo()->get(MO.getReg()).Name; // Capitalized names
+ } else {
+ printOp(MO, Modifier);
+ }
+ }
+
+ void printi8mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "BYTE PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printi16mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "WORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printi32mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "DWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printi64mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "QWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printi128mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "XMMWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printf32mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "DWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printf64mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "QWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printf80mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "XWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printf128mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "XMMWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printlea32mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "DWORD PTR ";
+ printLeaMemReference(MI, OpNo);
+ }
+ void printlea64mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "QWORD PTR ";
+ printLeaMemReference(MI, OpNo);
+ }
+ void printlea64_32mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "QWORD PTR ";
+ printLeaMemReference(MI, OpNo, "subreg64");
+ }
+
+ bool printAsmMRegister(const MachineOperand &MO, const char Mode);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ void printMachineInstruction(const MachineInstr *MI);
+ void printOp(const MachineOperand &MO, const char *Modifier = 0);
+ void printSSECC(const MachineInstr *MI, unsigned Op);
+ void printMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier=NULL);
+ void printLeaMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier=NULL);
+ void printPICJumpTableSetLabel(unsigned uid,
+ const MachineBasicBlock *MBB) const;
+ void printPICJumpTableSetLabel(unsigned uid, unsigned uid2,
+ const MachineBasicBlock *MBB) const {
+ AsmPrinter::printPICJumpTableSetLabel(uid, uid2, MBB);
+ }
+ void printPICLabel(const MachineInstr *MI, unsigned Op);
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ // We have to propagate some information about MachineFunction to
+ // AsmPrinter. It's ok, when we're printing the function, since we have
+ // access to MachineFunction and can get the appropriate MachineFunctionInfo.
+ // Unfortunately, this is not possible when we're printing reference to
+ // Function (e.g. calling it and so on). Even more, there is no way to get the
+ // corresponding MachineFunctions: it can even be not created at all. That's
+ // why we should use additional structure, when we're collecting all necessary
+ // information.
+ //
+ // This structure is using e.g. for name decoration for stdcall & fastcall'ed
+ // function, since we have to use arguments' size for decoration.
+ typedef std::map<const Function*, X86MachineFunctionInfo> FMFInfoMap;
+ FMFInfoMap FunctionInfoMap;
+
+ void decorateName(std::string& Name, const GlobalValue* GV);
+
+ virtual void EmitString(const ConstantArray *CVA) const;
+
+ // Necessary for dllexport support
+ StringSet<> DLLExportedFns, DLLExportedGVs;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
new file mode 100644
index 0000000..d982990
--- /dev/null
+++ b/lib/Target/X86/CMakeLists.txt
@@ -0,0 +1,29 @@
+set(LLVM_TARGET_DEFINITIONS X86.td)
+
+tablegen(X86GenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(X86GenRegisterNames.inc -gen-register-enums)
+tablegen(X86GenRegisterInfo.inc -gen-register-desc)
+tablegen(X86GenInstrNames.inc -gen-instr-enums)
+tablegen(X86GenInstrInfo.inc -gen-instr-desc)
+tablegen(X86GenAsmWriter.inc -gen-asm-writer)
+tablegen(X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
+tablegen(X86GenDAGISel.inc -gen-dag-isel)
+tablegen(X86GenFastISel.inc -gen-fast-isel)
+tablegen(X86GenCallingConv.inc -gen-callingconv)
+tablegen(X86GenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(X86CodeGen
+ X86CodeEmitter.cpp
+ X86ELFWriterInfo.cpp
+ X86FloatingPoint.cpp
+ X86FloatingPointRegKill.cpp
+ X86ISelDAGToDAG.cpp
+ X86ISelLowering.cpp
+ X86InstrInfo.cpp
+ X86JITInfo.cpp
+ X86RegisterInfo.cpp
+ X86Subtarget.cpp
+ X86TargetAsmInfo.cpp
+ X86TargetMachine.cpp
+ X86FastISel.cpp
+ )
diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile
new file mode 100644
index 0000000..44f1c5d
--- /dev/null
+++ b/lib/Target/X86/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/X86/Makefile -----------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMX86CodeGen
+TARGET = X86
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \
+ X86GenRegisterInfo.inc X86GenInstrNames.inc \
+ X86GenInstrInfo.inc X86GenAsmWriter.inc \
+ X86GenAsmWriter1.inc X86GenDAGISel.inc \
+ X86GenFastISel.inc \
+ X86GenCallingConv.inc X86GenSubtarget.inc
+
+DIRS = AsmPrinter
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/README-FPStack.txt b/lib/Target/X86/README-FPStack.txt
new file mode 100644
index 0000000..be28e8b
--- /dev/null
+++ b/lib/Target/X86/README-FPStack.txt
@@ -0,0 +1,85 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the X86 backend: FP stack related stuff
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+
+Some targets (e.g. athlons) prefer freep to fstp ST(0):
+http://gcc.gnu.org/ml/gcc-patches/2004-04/msg00659.html
+
+//===---------------------------------------------------------------------===//
+
+This should use fiadd on chips where it is profitable:
+double foo(double P, int *I) { return P+*I; }
+
+We have fiadd patterns now but the followings have the same cost and
+complexity. We need a way to specify the later is more profitable.
+
+def FpADD32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP:$dst, (fadd RFP:$src1,
+ (extloadf64f32 addr:$src2)))]>;
+ // ST(0) = ST(0) + [mem32]
+
+def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
+ [(set RFP:$dst, (fadd RFP:$src1,
+ (X86fild addr:$src2, i32)))]>;
+ // ST(0) = ST(0) + [mem32int]
+
+//===---------------------------------------------------------------------===//
+
+The FP stackifier needs to be global. Also, it should handle simple permutates
+to reduce number of shuffle instructions, e.g. turning:
+
+fld P -> fld Q
+fld Q fld P
+fxch
+
+or:
+
+fxch -> fucomi
+fucomi jl X
+jg X
+
+Ideas:
+http://gcc.gnu.org/ml/gcc-patches/2004-11/msg02410.html
+
+
+//===---------------------------------------------------------------------===//
+
+Add a target specific hook to DAG combiner to handle SINT_TO_FP and
+FP_TO_SINT when the source operand is already in memory.
+
+//===---------------------------------------------------------------------===//
+
+Open code rint,floor,ceil,trunc:
+http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02006.html
+http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02011.html
+
+Opencode the sincos[f] libcall.
+
+//===---------------------------------------------------------------------===//
+
+None of the FPStack instructions are handled in
+X86RegisterInfo::foldMemoryOperand, which prevents the spiller from
+folding spill code into the instructions.
+
+//===---------------------------------------------------------------------===//
+
+Currently the x86 codegen isn't very good at mixing SSE and FPStack
+code:
+
+unsigned int foo(double x) { return x; }
+
+foo:
+ subl $20, %esp
+ movsd 24(%esp), %xmm0
+ movsd %xmm0, 8(%esp)
+ fldl 8(%esp)
+ fisttpll (%esp)
+ movl (%esp), %eax
+ addl $20, %esp
+ ret
+
+This just requires being smarter when custom expanding fptoui.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README-MMX.txt b/lib/Target/X86/README-MMX.txt
new file mode 100644
index 0000000..a6c8616
--- /dev/null
+++ b/lib/Target/X86/README-MMX.txt
@@ -0,0 +1,71 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the X86 backend: MMX-specific stuff.
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+
+This:
+
+#include <mmintrin.h>
+
+__v2si qux(int A) {
+ return (__v2si){ 0, A };
+}
+
+is compiled into:
+
+_qux:
+ subl $28, %esp
+ movl 32(%esp), %eax
+ movd %eax, %mm0
+ movq %mm0, (%esp)
+ movl (%esp), %eax
+ movl %eax, 20(%esp)
+ movq %mm0, 8(%esp)
+ movl 12(%esp), %eax
+ movl %eax, 16(%esp)
+ movq 16(%esp), %mm0
+ addl $28, %esp
+ ret
+
+Yuck!
+
+GCC gives us:
+
+_qux:
+ subl $12, %esp
+ movl 16(%esp), %eax
+ movl 20(%esp), %edx
+ movl $0, (%eax)
+ movl %edx, 4(%eax)
+ addl $12, %esp
+ ret $4
+
+//===---------------------------------------------------------------------===//
+
+We generate crappy code for this:
+
+__m64 t() {
+ return _mm_cvtsi32_si64(1);
+}
+
+_t:
+ subl $12, %esp
+ movl $1, %eax
+ movd %eax, %mm0
+ movq %mm0, (%esp)
+ movl (%esp), %eax
+ movl 4(%esp), %edx
+ addl $12, %esp
+ ret
+
+The extra stack traffic is covered in the previous entry. But the other reason
+is we are not smart about materializing constants in MMX registers. With -m64
+
+ movl $1, %eax
+ movd %eax, %mm0
+ movd %mm0, %rax
+ ret
+
+We should be using a constantpool load instead:
+ movq LC0(%rip), %rax
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
new file mode 100644
index 0000000..71ad51c
--- /dev/null
+++ b/lib/Target/X86/README-SSE.txt
@@ -0,0 +1,918 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the X86 backend: SSE-specific stuff.
+//===---------------------------------------------------------------------===//
+
+- Consider eliminating the unaligned SSE load intrinsics, replacing them with
+ unaligned LLVM load instructions.
+
+//===---------------------------------------------------------------------===//
+
+Expand libm rounding functions inline: Significant speedups possible.
+http://gcc.gnu.org/ml/gcc-patches/2006-10/msg00909.html
+
+//===---------------------------------------------------------------------===//
+
+When compiled with unsafemath enabled, "main" should enable SSE DAZ mode and
+other fast SSE modes.
+
+//===---------------------------------------------------------------------===//
+
+Think about doing i64 math in SSE regs on x86-32.
+
+//===---------------------------------------------------------------------===//
+
+This testcase should have no SSE instructions in it, and only one load from
+a constant pool:
+
+double %test3(bool %B) {
+ %C = select bool %B, double 123.412, double 523.01123123
+ ret double %C
+}
+
+Currently, the select is being lowered, which prevents the dag combiner from
+turning 'select (load CPI1), (load CPI2)' -> 'load (select CPI1, CPI2)'
+
+The pattern isel got this one right.
+
+//===---------------------------------------------------------------------===//
+
+SSE doesn't have [mem] op= reg instructions. If we have an SSE instruction
+like this:
+
+ X += y
+
+and the register allocator decides to spill X, it is cheaper to emit this as:
+
+Y += [xslot]
+store Y -> [xslot]
+
+than as:
+
+tmp = [xslot]
+tmp += y
+store tmp -> [xslot]
+
+..and this uses one fewer register (so this should be done at load folding
+time, not at spiller time). *Note* however that this can only be done
+if Y is dead. Here's a testcase:
+
+@.str_3 = external global [15 x i8]
+declare void @printf(i32, ...)
+define void @main() {
+build_tree.exit:
+ br label %no_exit.i7
+
+no_exit.i7: ; preds = %no_exit.i7, %build_tree.exit
+ %tmp.0.1.0.i9 = phi double [ 0.000000e+00, %build_tree.exit ],
+ [ %tmp.34.i18, %no_exit.i7 ]
+ %tmp.0.0.0.i10 = phi double [ 0.000000e+00, %build_tree.exit ],
+ [ %tmp.28.i16, %no_exit.i7 ]
+ %tmp.28.i16 = add double %tmp.0.0.0.i10, 0.000000e+00
+ %tmp.34.i18 = add double %tmp.0.1.0.i9, 0.000000e+00
+ br i1 false, label %Compute_Tree.exit23, label %no_exit.i7
+
+Compute_Tree.exit23: ; preds = %no_exit.i7
+ tail call void (i32, ...)* @printf( i32 0 )
+ store double %tmp.34.i18, double* null
+ ret void
+}
+
+We currently emit:
+
+.BBmain_1:
+ xorpd %XMM1, %XMM1
+ addsd %XMM0, %XMM1
+*** movsd %XMM2, QWORD PTR [%ESP + 8]
+*** addsd %XMM2, %XMM1
+*** movsd QWORD PTR [%ESP + 8], %XMM2
+ jmp .BBmain_1 # no_exit.i7
+
+This is a bugpoint reduced testcase, which is why the testcase doesn't make
+much sense (e.g. its an infinite loop). :)
+
+//===---------------------------------------------------------------------===//
+
+SSE should implement 'select_cc' using 'emulated conditional moves' that use
+pcmp/pand/pandn/por to do a selection instead of a conditional branch:
+
+double %X(double %Y, double %Z, double %A, double %B) {
+ %C = setlt double %A, %B
+ %z = add double %Z, 0.0 ;; select operand is not a load
+ %D = select bool %C, double %Y, double %z
+ ret double %D
+}
+
+We currently emit:
+
+_X:
+ subl $12, %esp
+ xorpd %xmm0, %xmm0
+ addsd 24(%esp), %xmm0
+ movsd 32(%esp), %xmm1
+ movsd 16(%esp), %xmm2
+ ucomisd 40(%esp), %xmm1
+ jb LBB_X_2
+LBB_X_1:
+ movsd %xmm0, %xmm2
+LBB_X_2:
+ movsd %xmm2, (%esp)
+ fldl (%esp)
+ addl $12, %esp
+ ret
+
+//===---------------------------------------------------------------------===//
+
+It's not clear whether we should use pxor or xorps / xorpd to clear XMM
+registers. The choice may depend on subtarget information. We should do some
+more experiments on different x86 machines.
+
+//===---------------------------------------------------------------------===//
+
+Lower memcpy / memset to a series of SSE 128 bit move instructions when it's
+feasible.
+
+//===---------------------------------------------------------------------===//
+
+Codegen:
+ if (copysign(1.0, x) == copysign(1.0, y))
+into:
+ if (x^y & mask)
+when using SSE.
+
+//===---------------------------------------------------------------------===//
+
+Use movhps to update upper 64-bits of a v4sf value. Also movlps on lower half
+of a v4sf value.
+
+//===---------------------------------------------------------------------===//
+
+Better codegen for vector_shuffles like this { x, 0, 0, 0 } or { x, 0, x, 0}.
+Perhaps use pxor / xorp* to clear a XMM register first?
+
+//===---------------------------------------------------------------------===//
+
+How to decide when to use the "floating point version" of logical ops? Here are
+some code fragments:
+
+ movaps LCPI5_5, %xmm2
+ divps %xmm1, %xmm2
+ mulps %xmm2, %xmm3
+ mulps 8656(%ecx), %xmm3
+ addps 8672(%ecx), %xmm3
+ andps LCPI5_6, %xmm2
+ andps LCPI5_1, %xmm3
+ por %xmm2, %xmm3
+ movdqa %xmm3, (%edi)
+
+ movaps LCPI5_5, %xmm1
+ divps %xmm0, %xmm1
+ mulps %xmm1, %xmm3
+ mulps 8656(%ecx), %xmm3
+ addps 8672(%ecx), %xmm3
+ andps LCPI5_6, %xmm1
+ andps LCPI5_1, %xmm3
+ orps %xmm1, %xmm3
+ movaps %xmm3, 112(%esp)
+ movaps %xmm3, (%ebx)
+
+Due to some minor source change, the later case ended up using orps and movaps
+instead of por and movdqa. Does it matter?
+
+//===---------------------------------------------------------------------===//
+
+X86RegisterInfo::copyRegToReg() returns X86::MOVAPSrr for VR128. Is it possible
+to choose between movaps, movapd, and movdqa based on types of source and
+destination?
+
+How about andps, andpd, and pand? Do we really care about the type of the packed
+elements? If not, why not always use the "ps" variants which are likely to be
+shorter.
+
+//===---------------------------------------------------------------------===//
+
+External test Nurbs exposed some problems. Look for
+__ZN15Nurbs_SSE_Cubic17TessellateSurfaceE, bb cond_next140. This is what icc
+emits:
+
+ movaps (%edx), %xmm2 #59.21
+ movaps (%edx), %xmm5 #60.21
+ movaps (%edx), %xmm4 #61.21
+ movaps (%edx), %xmm3 #62.21
+ movl 40(%ecx), %ebp #69.49
+ shufps $0, %xmm2, %xmm5 #60.21
+ movl 100(%esp), %ebx #69.20
+ movl (%ebx), %edi #69.20
+ imull %ebp, %edi #69.49
+ addl (%eax), %edi #70.33
+ shufps $85, %xmm2, %xmm4 #61.21
+ shufps $170, %xmm2, %xmm3 #62.21
+ shufps $255, %xmm2, %xmm2 #63.21
+ lea (%ebp,%ebp,2), %ebx #69.49
+ negl %ebx #69.49
+ lea -3(%edi,%ebx), %ebx #70.33
+ shll $4, %ebx #68.37
+ addl 32(%ecx), %ebx #68.37
+ testb $15, %bl #91.13
+ jne L_B1.24 # Prob 5% #91.13
+
+This is the llvm code after instruction scheduling:
+
+cond_next140 (0xa910740, LLVM BB @0xa90beb0):
+ %reg1078 = MOV32ri -3
+ %reg1079 = ADD32rm %reg1078, %reg1068, 1, %NOREG, 0
+ %reg1037 = MOV32rm %reg1024, 1, %NOREG, 40
+ %reg1080 = IMUL32rr %reg1079, %reg1037
+ %reg1081 = MOV32rm %reg1058, 1, %NOREG, 0
+ %reg1038 = LEA32r %reg1081, 1, %reg1080, -3
+ %reg1036 = MOV32rm %reg1024, 1, %NOREG, 32
+ %reg1082 = SHL32ri %reg1038, 4
+ %reg1039 = ADD32rr %reg1036, %reg1082
+ %reg1083 = MOVAPSrm %reg1059, 1, %NOREG, 0
+ %reg1034 = SHUFPSrr %reg1083, %reg1083, 170
+ %reg1032 = SHUFPSrr %reg1083, %reg1083, 0
+ %reg1035 = SHUFPSrr %reg1083, %reg1083, 255
+ %reg1033 = SHUFPSrr %reg1083, %reg1083, 85
+ %reg1040 = MOV32rr %reg1039
+ %reg1084 = AND32ri8 %reg1039, 15
+ CMP32ri8 %reg1084, 0
+ JE mbb<cond_next204,0xa914d30>
+
+Still ok. After register allocation:
+
+cond_next140 (0xa910740, LLVM BB @0xa90beb0):
+ %EAX = MOV32ri -3
+ %EDX = MOV32rm <fi#3>, 1, %NOREG, 0
+ ADD32rm %EAX<def&use>, %EDX, 1, %NOREG, 0
+ %EDX = MOV32rm <fi#7>, 1, %NOREG, 0
+ %EDX = MOV32rm %EDX, 1, %NOREG, 40
+ IMUL32rr %EAX<def&use>, %EDX
+ %ESI = MOV32rm <fi#5>, 1, %NOREG, 0
+ %ESI = MOV32rm %ESI, 1, %NOREG, 0
+ MOV32mr <fi#4>, 1, %NOREG, 0, %ESI
+ %EAX = LEA32r %ESI, 1, %EAX, -3
+ %ESI = MOV32rm <fi#7>, 1, %NOREG, 0
+ %ESI = MOV32rm %ESI, 1, %NOREG, 32
+ %EDI = MOV32rr %EAX
+ SHL32ri %EDI<def&use>, 4
+ ADD32rr %EDI<def&use>, %ESI
+ %XMM0 = MOVAPSrm %ECX, 1, %NOREG, 0
+ %XMM1 = MOVAPSrr %XMM0
+ SHUFPSrr %XMM1<def&use>, %XMM1, 170
+ %XMM2 = MOVAPSrr %XMM0
+ SHUFPSrr %XMM2<def&use>, %XMM2, 0
+ %XMM3 = MOVAPSrr %XMM0
+ SHUFPSrr %XMM3<def&use>, %XMM3, 255
+ SHUFPSrr %XMM0<def&use>, %XMM0, 85
+ %EBX = MOV32rr %EDI
+ AND32ri8 %EBX<def&use>, 15
+ CMP32ri8 %EBX, 0
+ JE mbb<cond_next204,0xa914d30>
+
+This looks really bad. The problem is shufps is a destructive opcode. Since it
+appears as operand two in more than one shufps ops. It resulted in a number of
+copies. Note icc also suffers from the same problem. Either the instruction
+selector should select pshufd or The register allocator can made the two-address
+to three-address transformation.
+
+It also exposes some other problems. See MOV32ri -3 and the spills.
+
+//===---------------------------------------------------------------------===//
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=25500
+
+LLVM is producing bad code.
+
+LBB_main_4: # cond_true44
+ addps %xmm1, %xmm2
+ subps %xmm3, %xmm2
+ movaps (%ecx), %xmm4
+ movaps %xmm2, %xmm1
+ addps %xmm4, %xmm1
+ addl $16, %ecx
+ incl %edx
+ cmpl $262144, %edx
+ movaps %xmm3, %xmm2
+ movaps %xmm4, %xmm3
+ jne LBB_main_4 # cond_true44
+
+There are two problems. 1) No need to two loop induction variables. We can
+compare against 262144 * 16. 2) Known register coalescer issue. We should
+be able eliminate one of the movaps:
+
+ addps %xmm2, %xmm1 <=== Commute!
+ subps %xmm3, %xmm1
+ movaps (%ecx), %xmm4
+ movaps %xmm1, %xmm1 <=== Eliminate!
+ addps %xmm4, %xmm1
+ addl $16, %ecx
+ incl %edx
+ cmpl $262144, %edx
+ movaps %xmm3, %xmm2
+ movaps %xmm4, %xmm3
+ jne LBB_main_4 # cond_true44
+
+//===---------------------------------------------------------------------===//
+
+Consider:
+
+__m128 test(float a) {
+ return _mm_set_ps(0.0, 0.0, 0.0, a*a);
+}
+
+This compiles into:
+
+movss 4(%esp), %xmm1
+mulss %xmm1, %xmm1
+xorps %xmm0, %xmm0
+movss %xmm1, %xmm0
+ret
+
+Because mulss doesn't modify the top 3 elements, the top elements of
+xmm1 are already zero'd. We could compile this to:
+
+movss 4(%esp), %xmm0
+mulss %xmm0, %xmm0
+ret
+
+//===---------------------------------------------------------------------===//
+
+Here's a sick and twisted idea. Consider code like this:
+
+__m128 test(__m128 a) {
+ float b = *(float*)&A;
+ ...
+ return _mm_set_ps(0.0, 0.0, 0.0, b);
+}
+
+This might compile to this code:
+
+movaps c(%esp), %xmm1
+xorps %xmm0, %xmm0
+movss %xmm1, %xmm0
+ret
+
+Now consider if the ... code caused xmm1 to get spilled. This might produce
+this code:
+
+movaps c(%esp), %xmm1
+movaps %xmm1, c2(%esp)
+...
+
+xorps %xmm0, %xmm0
+movaps c2(%esp), %xmm1
+movss %xmm1, %xmm0
+ret
+
+However, since the reload is only used by these instructions, we could
+"fold" it into the uses, producing something like this:
+
+movaps c(%esp), %xmm1
+movaps %xmm1, c2(%esp)
+...
+
+movss c2(%esp), %xmm0
+ret
+
+... saving two instructions.
+
+The basic idea is that a reload from a spill slot, can, if only one 4-byte
+chunk is used, bring in 3 zeros the the one element instead of 4 elements.
+This can be used to simplify a variety of shuffle operations, where the
+elements are fixed zeros.
+
+//===---------------------------------------------------------------------===//
+
+__m128d test1( __m128d A, __m128d B) {
+ return _mm_shuffle_pd(A, B, 0x3);
+}
+
+compiles to
+
+shufpd $3, %xmm1, %xmm0
+
+Perhaps it's better to use unpckhpd instead?
+
+unpckhpd %xmm1, %xmm0
+
+Don't know if unpckhpd is faster. But it is shorter.
+
+//===---------------------------------------------------------------------===//
+
+This code generates ugly code, probably due to costs being off or something:
+
+define void @test(float* %P, <4 x float>* %P2 ) {
+ %xFloat0.688 = load float* %P
+ %tmp = load <4 x float>* %P2
+ %inFloat3.713 = insertelement <4 x float> %tmp, float 0.0, i32 3
+ store <4 x float> %inFloat3.713, <4 x float>* %P2
+ ret void
+}
+
+Generates:
+
+_test:
+ movl 8(%esp), %eax
+ movaps (%eax), %xmm0
+ pxor %xmm1, %xmm1
+ movaps %xmm0, %xmm2
+ shufps $50, %xmm1, %xmm2
+ shufps $132, %xmm2, %xmm0
+ movaps %xmm0, (%eax)
+ ret
+
+Would it be better to generate:
+
+_test:
+ movl 8(%esp), %ecx
+ movaps (%ecx), %xmm0
+ xor %eax, %eax
+ pinsrw $6, %eax, %xmm0
+ pinsrw $7, %eax, %xmm0
+ movaps %xmm0, (%ecx)
+ ret
+
+?
+
+//===---------------------------------------------------------------------===//
+
+Some useful information in the Apple Altivec / SSE Migration Guide:
+
+http://developer.apple.com/documentation/Performance/Conceptual/
+Accelerate_sse_migration/index.html
+
+e.g. SSE select using and, andnot, or. Various SSE compare translations.
+
+//===---------------------------------------------------------------------===//
+
+Add hooks to commute some CMPP operations.
+
+//===---------------------------------------------------------------------===//
+
+Apply the same transformation that merged four float into a single 128-bit load
+to loads from constant pool.
+
+//===---------------------------------------------------------------------===//
+
+Floating point max / min are commutable when -enable-unsafe-fp-path is
+specified. We should turn int_x86_sse_max_ss and X86ISD::FMIN etc. into other
+nodes which are selected to max / min instructions that are marked commutable.
+
+//===---------------------------------------------------------------------===//
+
+We should materialize vector constants like "all ones" and "signbit" with
+code like:
+
+ cmpeqps xmm1, xmm1 ; xmm1 = all-ones
+
+and:
+ cmpeqps xmm1, xmm1 ; xmm1 = all-ones
+ psrlq xmm1, 31 ; xmm1 = all 100000000000...
+
+instead of using a load from the constant pool. The later is important for
+ABS/NEG/copysign etc.
+
+//===---------------------------------------------------------------------===//
+
+These functions:
+
+#include <xmmintrin.h>
+__m128i a;
+void x(unsigned short n) {
+ a = _mm_slli_epi32 (a, n);
+}
+void y(unsigned n) {
+ a = _mm_slli_epi32 (a, n);
+}
+
+compile to ( -O3 -static -fomit-frame-pointer):
+_x:
+ movzwl 4(%esp), %eax
+ movd %eax, %xmm0
+ movaps _a, %xmm1
+ pslld %xmm0, %xmm1
+ movaps %xmm1, _a
+ ret
+_y:
+ movd 4(%esp), %xmm0
+ movaps _a, %xmm1
+ pslld %xmm0, %xmm1
+ movaps %xmm1, _a
+ ret
+
+"y" looks good, but "x" does silly movzwl stuff around into a GPR. It seems
+like movd would be sufficient in both cases as the value is already zero
+extended in the 32-bit stack slot IIRC. For signed short, it should also be
+save, as a really-signed value would be undefined for pslld.
+
+
+//===---------------------------------------------------------------------===//
+
+#include <math.h>
+int t1(double d) { return signbit(d); }
+
+This currently compiles to:
+ subl $12, %esp
+ movsd 16(%esp), %xmm0
+ movsd %xmm0, (%esp)
+ movl 4(%esp), %eax
+ shrl $31, %eax
+ addl $12, %esp
+ ret
+
+We should use movmskp{s|d} instead.
+
+//===---------------------------------------------------------------------===//
+
+CodeGen/X86/vec_align.ll tests whether we can turn 4 scalar loads into a single
+(aligned) vector load. This functionality has a couple of problems.
+
+1. The code to infer alignment from loads of globals is in the X86 backend,
+ not the dag combiner. This is because dagcombine2 needs to be able to see
+ through the X86ISD::Wrapper node, which DAGCombine can't really do.
+2. The code for turning 4 x load into a single vector load is target
+ independent and should be moved to the dag combiner.
+3. The code for turning 4 x load into a vector load can only handle a direct
+ load from a global or a direct load from the stack. It should be generalized
+ to handle any load from P, P+4, P+8, P+12, where P can be anything.
+4. The alignment inference code cannot handle loads from globals in non-static
+ mode because it doesn't look through the extra dyld stub load. If you try
+ vec_align.ll without -relocation-model=static, you'll see what I mean.
+
+//===---------------------------------------------------------------------===//
+
+We should lower store(fneg(load p), q) into an integer load+xor+store, which
+eliminates a constant pool load. For example, consider:
+
+define i64 @ccosf(float %z.0, float %z.1) nounwind readonly {
+entry:
+ %tmp6 = sub float -0.000000e+00, %z.1 ; <float> [#uses=1]
+ %tmp20 = tail call i64 @ccoshf( float %tmp6, float %z.0 ) nounwind readonly
+ ret i64 %tmp20
+}
+
+This currently compiles to:
+
+LCPI1_0: # <4 x float>
+ .long 2147483648 # float -0
+ .long 2147483648 # float -0
+ .long 2147483648 # float -0
+ .long 2147483648 # float -0
+_ccosf:
+ subl $12, %esp
+ movss 16(%esp), %xmm0
+ movss %xmm0, 4(%esp)
+ movss 20(%esp), %xmm0
+ xorps LCPI1_0, %xmm0
+ movss %xmm0, (%esp)
+ call L_ccoshf$stub
+ addl $12, %esp
+ ret
+
+Note the load into xmm0, then xor (to negate), then store. In PIC mode,
+this code computes the pic base and does two loads to do the constant pool
+load, so the improvement is much bigger.
+
+The tricky part about this xform is that the argument load/store isn't exposed
+until post-legalize, and at that point, the fneg has been custom expanded into
+an X86 fxor. This means that we need to handle this case in the x86 backend
+instead of in target independent code.
+
+//===---------------------------------------------------------------------===//
+
+Non-SSE4 insert into 16 x i8 is atrociously bad.
+
+//===---------------------------------------------------------------------===//
+
+<2 x i64> extract is substantially worse than <2 x f64>, even if the destination
+is memory.
+
+//===---------------------------------------------------------------------===//
+
+SSE4 extract-to-mem ops aren't being pattern matched because of the AssertZext
+sitting between the truncate and the extract.
+
+//===---------------------------------------------------------------------===//
+
+INSERTPS can match any insert (extract, imm1), imm2 for 4 x float, and insert
+any number of 0.0 simultaneously. Currently we only use it for simple
+insertions.
+
+See comments in LowerINSERT_VECTOR_ELT_SSE4.
+
+//===---------------------------------------------------------------------===//
+
+On a random note, SSE2 should declare insert/extract of 2 x f64 as legal, not
+Custom. All combinations of insert/extract reg-reg, reg-mem, and mem-reg are
+legal, it'll just take a few extra patterns written in the .td file.
+
+Note: this is not a code quality issue; the custom lowered code happens to be
+right, but we shouldn't have to custom lower anything. This is probably related
+to <2 x i64> ops being so bad.
+
+//===---------------------------------------------------------------------===//
+
+'select' on vectors and scalars could be a whole lot better. We currently
+lower them to conditional branches. On x86-64 for example, we compile this:
+
+double test(double a, double b, double c, double d) { return a<b ? c : d; }
+
+to:
+
+_test:
+ ucomisd %xmm0, %xmm1
+ ja LBB1_2 # entry
+LBB1_1: # entry
+ movapd %xmm3, %xmm2
+LBB1_2: # entry
+ movapd %xmm2, %xmm0
+ ret
+
+instead of:
+
+_test:
+ cmpltsd %xmm1, %xmm0
+ andpd %xmm0, %xmm2
+ andnpd %xmm3, %xmm0
+ orpd %xmm2, %xmm0
+ ret
+
+For unpredictable branches, the later is much more efficient. This should
+just be a matter of having scalar sse map to SELECT_CC and custom expanding
+or iseling it.
+
+//===---------------------------------------------------------------------===//
+
+LLVM currently generates stack realignment code, when it is not necessary
+needed. The problem is that we need to know about stack alignment too early,
+before RA runs.
+
+At that point we don't know, whether there will be vector spill, or not.
+Stack realignment logic is overly conservative here, but otherwise we can
+produce unaligned loads/stores.
+
+Fixing this will require some huge RA changes.
+
+Testcase:
+#include <emmintrin.h>
+
+typedef short vSInt16 __attribute__ ((__vector_size__ (16)));
+
+static const vSInt16 a = {- 22725, - 12873, - 22725, - 12873, - 22725, - 12873,
+- 22725, - 12873};;
+
+vSInt16 madd(vSInt16 b)
+{
+ return _mm_madd_epi16(a, b);
+}
+
+Generated code (x86-32, linux):
+madd:
+ pushl %ebp
+ movl %esp, %ebp
+ andl $-16, %esp
+ movaps .LCPI1_0, %xmm1
+ pmaddwd %xmm1, %xmm0
+ movl %ebp, %esp
+ popl %ebp
+ ret
+
+//===---------------------------------------------------------------------===//
+
+Consider:
+#include <emmintrin.h>
+__m128 foo2 (float x) {
+ return _mm_set_ps (0, 0, x, 0);
+}
+
+In x86-32 mode, we generate this spiffy code:
+
+_foo2:
+ movss 4(%esp), %xmm0
+ pshufd $81, %xmm0, %xmm0
+ ret
+
+in x86-64 mode, we generate this code, which could be better:
+
+_foo2:
+ xorps %xmm1, %xmm1
+ movss %xmm0, %xmm1
+ pshufd $81, %xmm1, %xmm0
+ ret
+
+In sse4 mode, we could use insertps to make both better.
+
+Here's another testcase that could use insertps [mem]:
+
+#include <xmmintrin.h>
+extern float x2, x3;
+__m128 foo1 (float x1, float x4) {
+ return _mm_set_ps (x2, x1, x3, x4);
+}
+
+gcc mainline compiles it to:
+
+foo1:
+ insertps $0x10, x2(%rip), %xmm0
+ insertps $0x10, x3(%rip), %xmm1
+ movaps %xmm1, %xmm2
+ movlhps %xmm0, %xmm2
+ movaps %xmm2, %xmm0
+ ret
+
+//===---------------------------------------------------------------------===//
+
+We compile vector multiply-by-constant into poor code:
+
+define <4 x i32> @f(<4 x i32> %i) nounwind {
+ %A = mul <4 x i32> %i, < i32 10, i32 10, i32 10, i32 10 >
+ ret <4 x i32> %A
+}
+
+On targets without SSE4.1, this compiles into:
+
+LCPI1_0: ## <4 x i32>
+ .long 10
+ .long 10
+ .long 10
+ .long 10
+ .text
+ .align 4,0x90
+ .globl _f
+_f:
+ pshufd $3, %xmm0, %xmm1
+ movd %xmm1, %eax
+ imull LCPI1_0+12, %eax
+ movd %eax, %xmm1
+ pshufd $1, %xmm0, %xmm2
+ movd %xmm2, %eax
+ imull LCPI1_0+4, %eax
+ movd %eax, %xmm2
+ punpckldq %xmm1, %xmm2
+ movd %xmm0, %eax
+ imull LCPI1_0, %eax
+ movd %eax, %xmm1
+ movhlps %xmm0, %xmm0
+ movd %xmm0, %eax
+ imull LCPI1_0+8, %eax
+ movd %eax, %xmm0
+ punpckldq %xmm0, %xmm1
+ movaps %xmm1, %xmm0
+ punpckldq %xmm2, %xmm0
+ ret
+
+It would be better to synthesize integer vector multiplication by constants
+using shifts and adds, pslld and paddd here. And even on targets with SSE4.1,
+simple cases such as multiplication by powers of two would be better as
+vector shifts than as multiplications.
+
+//===---------------------------------------------------------------------===//
+
+We compile this:
+
+__m128i
+foo2 (char x)
+{
+ return _mm_set_epi8 (1, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 1, 0, 0, 0, 0);
+}
+
+into:
+ movl $1, %eax
+ xorps %xmm0, %xmm0
+ pinsrw $2, %eax, %xmm0
+ movzbl 4(%esp), %eax
+ pinsrw $3, %eax, %xmm0
+ movl $256, %eax
+ pinsrw $7, %eax, %xmm0
+ ret
+
+
+gcc-4.2:
+ subl $12, %esp
+ movzbl 16(%esp), %eax
+ movdqa LC0, %xmm0
+ pinsrw $3, %eax, %xmm0
+ addl $12, %esp
+ ret
+ .const
+ .align 4
+LC0:
+ .word 0
+ .word 0
+ .word 1
+ .word 0
+ .word 0
+ .word 0
+ .word 0
+ .word 256
+
+With SSE4, it should be
+ movdqa .LC0(%rip), %xmm0
+ pinsrb $6, %edi, %xmm0
+
+//===---------------------------------------------------------------------===//
+
+We should transform a shuffle of two vectors of constants into a single vector
+of constants. Also, insertelement of a constant into a vector of constants
+should also result in a vector of constants. e.g. 2008-06-25-VecISelBug.ll.
+
+We compiled it to something horrible:
+
+ .align 4
+LCPI1_1: ## float
+ .long 1065353216 ## float 1
+ .const
+
+ .align 4
+LCPI1_0: ## <4 x float>
+ .space 4
+ .long 1065353216 ## float 1
+ .space 4
+ .long 1065353216 ## float 1
+ .text
+ .align 4,0x90
+ .globl _t
+_t:
+ xorps %xmm0, %xmm0
+ movhps LCPI1_0, %xmm0
+ movss LCPI1_1, %xmm1
+ movaps %xmm0, %xmm2
+ shufps $2, %xmm1, %xmm2
+ shufps $132, %xmm2, %xmm0
+ movaps %xmm0, 0
+
+//===---------------------------------------------------------------------===//
+rdar://5907648
+
+This function:
+
+float foo(unsigned char x) {
+ return x;
+}
+
+compiles to (x86-32):
+
+define float @foo(i8 zeroext %x) nounwind {
+ %tmp12 = uitofp i8 %x to float ; <float> [#uses=1]
+ ret float %tmp12
+}
+
+compiles to:
+
+_foo:
+ subl $4, %esp
+ movzbl 8(%esp), %eax
+ cvtsi2ss %eax, %xmm0
+ movss %xmm0, (%esp)
+ flds (%esp)
+ addl $4, %esp
+ ret
+
+We should be able to use:
+ cvtsi2ss 8($esp), %xmm0
+since we know the stack slot is already zext'd.
+
+//===---------------------------------------------------------------------===//
+
+Consider using movlps instead of movsd to implement (scalar_to_vector (loadf64))
+when code size is critical. movlps is slower than movsd on core2 but it's one
+byte shorter.
+
+//===---------------------------------------------------------------------===//
+
+We should use a dynamic programming based approach to tell when using FPStack
+operations is cheaper than SSE. SciMark montecarlo contains code like this
+for example:
+
+double MonteCarlo_num_flops(int Num_samples) {
+ return ((double) Num_samples)* 4.0;
+}
+
+In fpstack mode, this compiles into:
+
+LCPI1_0:
+ .long 1082130432 ## float 4.000000e+00
+_MonteCarlo_num_flops:
+ subl $4, %esp
+ movl 8(%esp), %eax
+ movl %eax, (%esp)
+ fildl (%esp)
+ fmuls LCPI1_0
+ addl $4, %esp
+ ret
+
+in SSE mode, it compiles into significantly slower code:
+
+_MonteCarlo_num_flops:
+ subl $12, %esp
+ cvtsi2sd 16(%esp), %xmm0
+ mulsd LCPI1_0, %xmm0
+ movsd %xmm0, (%esp)
+ fldl (%esp)
+ addl $12, %esp
+ ret
+
+There are also other cases in scimark where using fpstack is better, it is
+cheaper to do fld1 than load from a constant pool for example, so
+"load, add 1.0, store" is better done in the fp stack, etc.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README-UNIMPLEMENTED.txt b/lib/Target/X86/README-UNIMPLEMENTED.txt
new file mode 100644
index 0000000..69dc8ee
--- /dev/null
+++ b/lib/Target/X86/README-UNIMPLEMENTED.txt
@@ -0,0 +1,14 @@
+//===---------------------------------------------------------------------===//
+// Testcases that crash the X86 backend because they aren't implemented
+//===---------------------------------------------------------------------===//
+
+These are cases we know the X86 backend doesn't handle. Patches are welcome
+and appreciated, because no one has signed up to implemented these yet.
+Implementing these would allow elimination of the corresponding intrinsics,
+which would be great.
+
+1) vector shifts
+2) vector comparisons
+3) vector fp<->int conversions: PR2683, PR2684, PR2685, PR2686, PR2688
+4) bitcasts from vectors to scalars: PR2804
+
diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt
new file mode 100644
index 0000000..ad12137
--- /dev/null
+++ b/lib/Target/X86/README-X86-64.txt
@@ -0,0 +1,251 @@
+//===- README_X86_64.txt - Notes for X86-64 code gen ----------------------===//
+
+Implement different PIC models? Right now we only support Mac OS X with small
+PIC code model.
+
+//===---------------------------------------------------------------------===//
+
+For this:
+
+extern void xx(void);
+void bar(void) {
+ xx();
+}
+
+gcc compiles to:
+
+.globl _bar
+_bar:
+ jmp _xx
+
+We need to do the tailcall optimization as well.
+
+//===---------------------------------------------------------------------===//
+
+AMD64 Optimization Manual 8.2 has some nice information about optimizing integer
+multiplication by a constant. How much of it applies to Intel's X86-64
+implementation? There are definite trade-offs to consider: latency vs. register
+pressure vs. code size.
+
+//===---------------------------------------------------------------------===//
+
+Are we better off using branches instead of cmove to implement FP to
+unsigned i64?
+
+_conv:
+ ucomiss LC0(%rip), %xmm0
+ cvttss2siq %xmm0, %rdx
+ jb L3
+ subss LC0(%rip), %xmm0
+ movabsq $-9223372036854775808, %rax
+ cvttss2siq %xmm0, %rdx
+ xorq %rax, %rdx
+L3:
+ movq %rdx, %rax
+ ret
+
+instead of
+
+_conv:
+ movss LCPI1_0(%rip), %xmm1
+ cvttss2siq %xmm0, %rcx
+ movaps %xmm0, %xmm2
+ subss %xmm1, %xmm2
+ cvttss2siq %xmm2, %rax
+ movabsq $-9223372036854775808, %rdx
+ xorq %rdx, %rax
+ ucomiss %xmm1, %xmm0
+ cmovb %rcx, %rax
+ ret
+
+Seems like the jb branch has high likelyhood of being taken. It would have
+saved a few instructions.
+
+//===---------------------------------------------------------------------===//
+
+Poor codegen:
+
+int X[2];
+int b;
+void test(void) {
+ memset(X, b, 2*sizeof(X[0]));
+}
+
+llc:
+ movq _b@GOTPCREL(%rip), %rax
+ movzbq (%rax), %rax
+ movq %rax, %rcx
+ shlq $8, %rcx
+ orq %rax, %rcx
+ movq %rcx, %rax
+ shlq $16, %rax
+ orq %rcx, %rax
+ movq %rax, %rcx
+ shlq $32, %rcx
+ movq _X@GOTPCREL(%rip), %rdx
+ orq %rax, %rcx
+ movq %rcx, (%rdx)
+ ret
+
+gcc:
+ movq _b@GOTPCREL(%rip), %rax
+ movabsq $72340172838076673, %rdx
+ movzbq (%rax), %rax
+ imulq %rdx, %rax
+ movq _X@GOTPCREL(%rip), %rdx
+ movq %rax, (%rdx)
+ ret
+
+//===---------------------------------------------------------------------===//
+
+Vararg function prologue can be further optimized. Currently all XMM registers
+are stored into register save area. Most of them can be eliminated since the
+upper bound of the number of XMM registers used are passed in %al. gcc produces
+something like the following:
+
+ movzbl %al, %edx
+ leaq 0(,%rdx,4), %rax
+ leaq 4+L2(%rip), %rdx
+ leaq 239(%rsp), %rax
+ jmp *%rdx
+ movaps %xmm7, -15(%rax)
+ movaps %xmm6, -31(%rax)
+ movaps %xmm5, -47(%rax)
+ movaps %xmm4, -63(%rax)
+ movaps %xmm3, -79(%rax)
+ movaps %xmm2, -95(%rax)
+ movaps %xmm1, -111(%rax)
+ movaps %xmm0, -127(%rax)
+L2:
+
+It jumps over the movaps that do not need to be stored. Hard to see this being
+significant as it added 5 instruciton (including a indirect branch) to avoid
+executing 0 to 8 stores in the function prologue.
+
+Perhaps we can optimize for the common case where no XMM registers are used for
+parameter passing. i.e. is %al == 0 jump over all stores. Or in the case of a
+leaf function where we can determine that no XMM input parameter is need, avoid
+emitting the stores at all.
+
+//===---------------------------------------------------------------------===//
+
+AMD64 has a complex calling convention for aggregate passing by value:
+
+1. If the size of an object is larger than two eightbytes, or in C++, is a non-
+ POD structure or union type, or contains unaligned fields, it has class
+ MEMORY.
+2. Both eightbytes get initialized to class NO_CLASS.
+3. Each field of an object is classified recursively so that always two fields
+ are considered. The resulting class is calculated according to the classes
+ of the fields in the eightbyte:
+ (a) If both classes are equal, this is the resulting class.
+ (b) If one of the classes is NO_CLASS, the resulting class is the other
+ class.
+ (c) If one of the classes is MEMORY, the result is the MEMORY class.
+ (d) If one of the classes is INTEGER, the result is the INTEGER.
+ (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, MEMORY is used as
+ class.
+ (f) Otherwise class SSE is used.
+4. Then a post merger cleanup is done:
+ (a) If one of the classes is MEMORY, the whole argument is passed in memory.
+ (b) If SSEUP is not preceeded by SSE, it is converted to SSE.
+
+Currently llvm frontend does not handle this correctly.
+
+Problem 1:
+ typedef struct { int i; double d; } QuadWordS;
+It is currently passed in two i64 integer registers. However, gcc compiled
+callee expects the second element 'd' to be passed in XMM0.
+
+Problem 2:
+ typedef struct { int32_t i; float j; double d; } QuadWordS;
+The size of the first two fields == i64 so they will be combined and passed in
+a integer register RDI. The third field is still passed in XMM0.
+
+Problem 3:
+ typedef struct { int64_t i; int8_t j; int64_t d; } S;
+ void test(S s)
+The size of this aggregate is greater than two i64 so it should be passed in
+memory. Currently llvm breaks this down and passed it in three integer
+registers.
+
+Problem 4:
+Taking problem 3 one step ahead where a function expects a aggregate value
+in memory followed by more parameter(s) passed in register(s).
+ void test(S s, int b)
+
+LLVM IR does not allow parameter passing by aggregates, therefore it must break
+the aggregates value (in problem 3 and 4) into a number of scalar values:
+ void %test(long %s.i, byte %s.j, long %s.d);
+
+However, if the backend were to lower this code literally it would pass the 3
+values in integer registers. To force it be passed in memory, the frontend
+should change the function signiture to:
+ void %test(long %undef1, long %undef2, long %undef3, long %undef4,
+ long %undef5, long %undef6,
+ long %s.i, byte %s.j, long %s.d);
+And the callee would look something like this:
+ call void %test( undef, undef, undef, undef, undef, undef,
+ %tmp.s.i, %tmp.s.j, %tmp.s.d );
+The first 6 undef parameters would exhaust the 6 integer registers used for
+parameter passing. The following three integer values would then be forced into
+memory.
+
+For problem 4, the parameter 'd' would be moved to the front of the parameter
+list so it will be passed in register:
+ void %test(int %d,
+ long %undef1, long %undef2, long %undef3, long %undef4,
+ long %undef5, long %undef6,
+ long %s.i, byte %s.j, long %s.d);
+
+//===---------------------------------------------------------------------===//
+
+Right now the asm printer assumes GlobalAddress are accessed via RIP relative
+addressing. Therefore, it is not possible to generate this:
+ movabsq $__ZTV10polynomialIdE+16, %rax
+
+That is ok for now since we currently only support small model. So the above
+is selected as
+ leaq __ZTV10polynomialIdE+16(%rip), %rax
+
+This is probably slightly slower but is much shorter than movabsq. However, if
+we were to support medium or larger code models, we need to use the movabs
+instruction. We should probably introduce something like AbsoluteAddress to
+distinguish it from GlobalAddress so the asm printer and JIT code emitter can
+do the right thing.
+
+//===---------------------------------------------------------------------===//
+
+It's not possible to reference AH, BH, CH, and DH registers in an instruction
+requiring REX prefix. However, divb and mulb both produce results in AH. If isel
+emits a CopyFromReg which gets turned into a movb and that can be allocated a
+r8b - r15b.
+
+To get around this, isel emits a CopyFromReg from AX and then right shift it
+down by 8 and truncate it. It's not pretty but it works. We need some register
+allocation magic to make the hack go away (e.g. putting additional constraints
+on the result of the movb).
+
+//===---------------------------------------------------------------------===//
+
+The x86-64 ABI for hidden-argument struct returns requires that the
+incoming value of %rdi be copied into %rax by the callee upon return.
+
+The idea is that it saves callers from having to remember this value,
+which would often require a callee-saved register. Callees usually
+need to keep this value live for most of their body anyway, so it
+doesn't add a significant burden on them.
+
+We currently implement this in codegen, however this is suboptimal
+because it means that it would be quite awkward to implement the
+optimization for callers.
+
+A better implementation would be to relax the LLVM IR rules for sret
+arguments to allow a function with an sret argument to have a non-void
+return type, and to have the front-end to set up the sret argument value
+as the return value of the function. The front-end could more easily
+emit uses of the returned struct value to be in terms of the function's
+lowered return value, and it would free non-C frontends from a
+complication only required by a C-based ABI.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
new file mode 100644
index 0000000..710bd03
--- /dev/null
+++ b/lib/Target/X86/README.txt
@@ -0,0 +1,1899 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the X86 backend.
+//===---------------------------------------------------------------------===//
+
+We should add support for the "movbe" instruction, which does a byte-swapping
+copy (3-addr bswap + memory support?) This is available on Atom processors.
+
+//===---------------------------------------------------------------------===//
+
+CodeGen/X86/lea-3.ll:test3 should be a single LEA, not a shift/move. The X86
+backend knows how to three-addressify this shift, but it appears the register
+allocator isn't even asking it to do so in this case. We should investigate
+why this isn't happening, it could have significant impact on other important
+cases for X86 as well.
+
+//===---------------------------------------------------------------------===//
+
+This should be one DIV/IDIV instruction, not a libcall:
+
+unsigned test(unsigned long long X, unsigned Y) {
+ return X/Y;
+}
+
+This can be done trivially with a custom legalizer. What about overflow
+though? http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14224
+
+//===---------------------------------------------------------------------===//
+
+Improvements to the multiply -> shift/add algorithm:
+http://gcc.gnu.org/ml/gcc-patches/2004-08/msg01590.html
+
+//===---------------------------------------------------------------------===//
+
+Improve code like this (occurs fairly frequently, e.g. in LLVM):
+long long foo(int x) { return 1LL << x; }
+
+http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01109.html
+http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01128.html
+http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01136.html
+
+Another useful one would be ~0ULL >> X and ~0ULL << X.
+
+One better solution for 1LL << x is:
+ xorl %eax, %eax
+ xorl %edx, %edx
+ testb $32, %cl
+ sete %al
+ setne %dl
+ sall %cl, %eax
+ sall %cl, %edx
+
+But that requires good 8-bit subreg support.
+
+Also, this might be better. It's an extra shift, but it's one instruction
+shorter, and doesn't stress 8-bit subreg support.
+(From http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01148.html,
+but without the unnecessary and.)
+ movl %ecx, %eax
+ shrl $5, %eax
+ movl %eax, %edx
+ xorl $1, %edx
+ sall %cl, %eax
+ sall %cl. %edx
+
+64-bit shifts (in general) expand to really bad code. Instead of using
+cmovs, we should expand to a conditional branch like GCC produces.
+
+//===---------------------------------------------------------------------===//
+
+Compile this:
+_Bool f(_Bool a) { return a!=1; }
+
+into:
+ movzbl %dil, %eax
+ xorl $1, %eax
+ ret
+
+(Although note that this isn't a legal way to express the code that llvm-gcc
+currently generates for that function.)
+
+//===---------------------------------------------------------------------===//
+
+Some isel ideas:
+
+1. Dynamic programming based approach when compile time if not an
+ issue.
+2. Code duplication (addressing mode) during isel.
+3. Other ideas from "Register-Sensitive Selection, Duplication, and
+ Sequencing of Instructions".
+4. Scheduling for reduced register pressure. E.g. "Minimum Register
+ Instruction Sequence Problem: Revisiting Optimal Code Generation for DAGs"
+ and other related papers.
+ http://citeseer.ist.psu.edu/govindarajan01minimum.html
+
+//===---------------------------------------------------------------------===//
+
+Should we promote i16 to i32 to avoid partial register update stalls?
+
+//===---------------------------------------------------------------------===//
+
+Leave any_extend as pseudo instruction and hint to register
+allocator. Delay codegen until post register allocation.
+Note. any_extend is now turned into an INSERT_SUBREG. We still need to teach
+the coalescer how to deal with it though.
+
+//===---------------------------------------------------------------------===//
+
+It appears icc use push for parameter passing. Need to investigate.
+
+//===---------------------------------------------------------------------===//
+
+Only use inc/neg/not instructions on processors where they are faster than
+add/sub/xor. They are slower on the P4 due to only updating some processor
+flags.
+
+//===---------------------------------------------------------------------===//
+
+The instruction selector sometimes misses folding a load into a compare. The
+pattern is written as (cmp reg, (load p)). Because the compare isn't
+commutative, it is not matched with the load on both sides. The dag combiner
+should be made smart enough to cannonicalize the load into the RHS of a compare
+when it can invert the result of the compare for free.
+
+//===---------------------------------------------------------------------===//
+
+How about intrinsics? An example is:
+ *res = _mm_mulhi_epu16(*A, _mm_mul_epu32(*B, *C));
+
+compiles to
+ pmuludq (%eax), %xmm0
+ movl 8(%esp), %eax
+ movdqa (%eax), %xmm1
+ pmulhuw %xmm0, %xmm1
+
+The transformation probably requires a X86 specific pass or a DAG combiner
+target specific hook.
+
+//===---------------------------------------------------------------------===//
+
+In many cases, LLVM generates code like this:
+
+_test:
+ movl 8(%esp), %eax
+ cmpl %eax, 4(%esp)
+ setl %al
+ movzbl %al, %eax
+ ret
+
+on some processors (which ones?), it is more efficient to do this:
+
+_test:
+ movl 8(%esp), %ebx
+ xor %eax, %eax
+ cmpl %ebx, 4(%esp)
+ setl %al
+ ret
+
+Doing this correctly is tricky though, as the xor clobbers the flags.
+
+//===---------------------------------------------------------------------===//
+
+We should generate bts/btr/etc instructions on targets where they are cheap or
+when codesize is important. e.g., for:
+
+void setbit(int *target, int bit) {
+ *target |= (1 << bit);
+}
+void clearbit(int *target, int bit) {
+ *target &= ~(1 << bit);
+}
+
+//===---------------------------------------------------------------------===//
+
+Instead of the following for memset char*, 1, 10:
+
+ movl $16843009, 4(%edx)
+ movl $16843009, (%edx)
+ movw $257, 8(%edx)
+
+It might be better to generate
+
+ movl $16843009, %eax
+ movl %eax, 4(%edx)
+ movl %eax, (%edx)
+ movw al, 8(%edx)
+
+when we can spare a register. It reduces code size.
+
+//===---------------------------------------------------------------------===//
+
+Evaluate what the best way to codegen sdiv X, (2^C) is. For X/8, we currently
+get this:
+
+define i32 @test1(i32 %X) {
+ %Y = sdiv i32 %X, 8
+ ret i32 %Y
+}
+
+_test1:
+ movl 4(%esp), %eax
+ movl %eax, %ecx
+ sarl $31, %ecx
+ shrl $29, %ecx
+ addl %ecx, %eax
+ sarl $3, %eax
+ ret
+
+GCC knows several different ways to codegen it, one of which is this:
+
+_test1:
+ movl 4(%esp), %eax
+ cmpl $-1, %eax
+ leal 7(%eax), %ecx
+ cmovle %ecx, %eax
+ sarl $3, %eax
+ ret
+
+which is probably slower, but it's interesting at least :)
+
+//===---------------------------------------------------------------------===//
+
+We are currently lowering large (1MB+) memmove/memcpy to rep/stosl and rep/movsl
+We should leave these as libcalls for everything over a much lower threshold,
+since libc is hand tuned for medium and large mem ops (avoiding RFO for large
+stores, TLB preheating, etc)
+
+//===---------------------------------------------------------------------===//
+
+Optimize this into something reasonable:
+ x * copysign(1.0, y) * copysign(1.0, z)
+
+//===---------------------------------------------------------------------===//
+
+Optimize copysign(x, *y) to use an integer load from y.
+
+//===---------------------------------------------------------------------===//
+
+The following tests perform worse with LSR:
+
+lambda, siod, optimizer-eval, ackermann, hash2, nestedloop, strcat, and Treesor.
+
+//===---------------------------------------------------------------------===//
+
+Teach the coalescer to coalesce vregs of different register classes. e.g. FR32 /
+FR64 to VR128.
+
+//===---------------------------------------------------------------------===//
+
+Adding to the list of cmp / test poor codegen issues:
+
+int test(__m128 *A, __m128 *B) {
+ if (_mm_comige_ss(*A, *B))
+ return 3;
+ else
+ return 4;
+}
+
+_test:
+ movl 8(%esp), %eax
+ movaps (%eax), %xmm0
+ movl 4(%esp), %eax
+ movaps (%eax), %xmm1
+ comiss %xmm0, %xmm1
+ setae %al
+ movzbl %al, %ecx
+ movl $3, %eax
+ movl $4, %edx
+ cmpl $0, %ecx
+ cmove %edx, %eax
+ ret
+
+Note the setae, movzbl, cmpl, cmove can be replaced with a single cmovae. There
+are a number of issues. 1) We are introducing a setcc between the result of the
+intrisic call and select. 2) The intrinsic is expected to produce a i32 value
+so a any extend (which becomes a zero extend) is added.
+
+We probably need some kind of target DAG combine hook to fix this.
+
+//===---------------------------------------------------------------------===//
+
+We generate significantly worse code for this than GCC:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21150
+http://gcc.gnu.org/bugzilla/attachment.cgi?id=8701
+
+There is also one case we do worse on PPC.
+
+//===---------------------------------------------------------------------===//
+
+For this:
+
+int test(int a)
+{
+ return a * 3;
+}
+
+We currently emits
+ imull $3, 4(%esp), %eax
+
+Perhaps this is what we really should generate is? Is imull three or four
+cycles? Note: ICC generates this:
+ movl 4(%esp), %eax
+ leal (%eax,%eax,2), %eax
+
+The current instruction priority is based on pattern complexity. The former is
+more "complex" because it folds a load so the latter will not be emitted.
+
+Perhaps we should use AddedComplexity to give LEA32r a higher priority? We
+should always try to match LEA first since the LEA matching code does some
+estimate to determine whether the match is profitable.
+
+However, if we care more about code size, then imull is better. It's two bytes
+shorter than movl + leal.
+
+On a Pentium M, both variants have the same characteristics with regard
+to throughput; however, the multiplication has a latency of four cycles, as
+opposed to two cycles for the movl+lea variant.
+
+//===---------------------------------------------------------------------===//
+
+__builtin_ffs codegen is messy.
+
+int ffs_(unsigned X) { return __builtin_ffs(X); }
+
+llvm produces:
+ffs_:
+ movl 4(%esp), %ecx
+ bsfl %ecx, %eax
+ movl $32, %edx
+ cmove %edx, %eax
+ incl %eax
+ xorl %edx, %edx
+ testl %ecx, %ecx
+ cmove %edx, %eax
+ ret
+
+vs gcc:
+
+_ffs_:
+ movl $-1, %edx
+ bsfl 4(%esp), %eax
+ cmove %edx, %eax
+ addl $1, %eax
+ ret
+
+Another example of __builtin_ffs (use predsimplify to eliminate a select):
+
+int foo (unsigned long j) {
+ if (j)
+ return __builtin_ffs (j) - 1;
+ else
+ return 0;
+}
+
+//===---------------------------------------------------------------------===//
+
+It appears gcc place string data with linkonce linkage in
+.section __TEXT,__const_coal,coalesced instead of
+.section __DATA,__const_coal,coalesced.
+Take a look at darwin.h, there are other Darwin assembler directives that we
+do not make use of.
+
+//===---------------------------------------------------------------------===//
+
+define i32 @foo(i32* %a, i32 %t) {
+entry:
+ br label %cond_true
+
+cond_true: ; preds = %cond_true, %entry
+ %x.0.0 = phi i32 [ 0, %entry ], [ %tmp9, %cond_true ] ; <i32> [#uses=3]
+ %t_addr.0.0 = phi i32 [ %t, %entry ], [ %tmp7, %cond_true ] ; <i32> [#uses=1]
+ %tmp2 = getelementptr i32* %a, i32 %x.0.0 ; <i32*> [#uses=1]
+ %tmp3 = load i32* %tmp2 ; <i32> [#uses=1]
+ %tmp5 = add i32 %t_addr.0.0, %x.0.0 ; <i32> [#uses=1]
+ %tmp7 = add i32 %tmp5, %tmp3 ; <i32> [#uses=2]
+ %tmp9 = add i32 %x.0.0, 1 ; <i32> [#uses=2]
+ %tmp = icmp sgt i32 %tmp9, 39 ; <i1> [#uses=1]
+ br i1 %tmp, label %bb12, label %cond_true
+
+bb12: ; preds = %cond_true
+ ret i32 %tmp7
+}
+is pessimized by -loop-reduce and -indvars
+
+//===---------------------------------------------------------------------===//
+
+u32 to float conversion improvement:
+
+float uint32_2_float( unsigned u ) {
+ float fl = (int) (u & 0xffff);
+ float fh = (int) (u >> 16);
+ fh *= 0x1.0p16f;
+ return fh + fl;
+}
+
+00000000 subl $0x04,%esp
+00000003 movl 0x08(%esp,1),%eax
+00000007 movl %eax,%ecx
+00000009 shrl $0x10,%ecx
+0000000c cvtsi2ss %ecx,%xmm0
+00000010 andl $0x0000ffff,%eax
+00000015 cvtsi2ss %eax,%xmm1
+00000019 mulss 0x00000078,%xmm0
+00000021 addss %xmm1,%xmm0
+00000025 movss %xmm0,(%esp,1)
+0000002a flds (%esp,1)
+0000002d addl $0x04,%esp
+00000030 ret
+
+//===---------------------------------------------------------------------===//
+
+When using fastcc abi, align stack slot of argument of type double on 8 byte
+boundary to improve performance.
+
+//===---------------------------------------------------------------------===//
+
+Codegen:
+
+int f(int a, int b) {
+ if (a == 4 || a == 6)
+ b++;
+ return b;
+}
+
+
+as:
+
+or eax, 2
+cmp eax, 6
+jz label
+
+//===---------------------------------------------------------------------===//
+
+GCC's ix86_expand_int_movcc function (in i386.c) has a ton of interesting
+simplifications for integer "x cmp y ? a : b". For example, instead of:
+
+int G;
+void f(int X, int Y) {
+ G = X < 0 ? 14 : 13;
+}
+
+compiling to:
+
+_f:
+ movl $14, %eax
+ movl $13, %ecx
+ movl 4(%esp), %edx
+ testl %edx, %edx
+ cmovl %eax, %ecx
+ movl %ecx, _G
+ ret
+
+it could be:
+_f:
+ movl 4(%esp), %eax
+ sarl $31, %eax
+ notl %eax
+ addl $14, %eax
+ movl %eax, _G
+ ret
+
+etc.
+
+Another is:
+int usesbb(unsigned int a, unsigned int b) {
+ return (a < b ? -1 : 0);
+}
+to:
+_usesbb:
+ movl 8(%esp), %eax
+ cmpl %eax, 4(%esp)
+ sbbl %eax, %eax
+ ret
+
+instead of:
+_usesbb:
+ xorl %eax, %eax
+ movl 8(%esp), %ecx
+ cmpl %ecx, 4(%esp)
+ movl $4294967295, %ecx
+ cmovb %ecx, %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+Currently we don't have elimination of redundant stack manipulations. Consider
+the code:
+
+int %main() {
+entry:
+ call fastcc void %test1( )
+ call fastcc void %test2( sbyte* cast (void ()* %test1 to sbyte*) )
+ ret int 0
+}
+
+declare fastcc void %test1()
+
+declare fastcc void %test2(sbyte*)
+
+
+This currently compiles to:
+
+ subl $16, %esp
+ call _test5
+ addl $12, %esp
+ subl $16, %esp
+ movl $_test5, (%esp)
+ call _test6
+ addl $12, %esp
+
+The add\sub pair is really unneeded here.
+
+//===---------------------------------------------------------------------===//
+
+Consider the expansion of:
+
+define i32 @test3(i32 %X) {
+ %tmp1 = urem i32 %X, 255
+ ret i32 %tmp1
+}
+
+Currently it compiles to:
+
+...
+ movl $2155905153, %ecx
+ movl 8(%esp), %esi
+ movl %esi, %eax
+ mull %ecx
+...
+
+This could be "reassociated" into:
+
+ movl $2155905153, %eax
+ movl 8(%esp), %ecx
+ mull %ecx
+
+to avoid the copy. In fact, the existing two-address stuff would do this
+except that mul isn't a commutative 2-addr instruction. I guess this has
+to be done at isel time based on the #uses to mul?
+
+//===---------------------------------------------------------------------===//
+
+Make sure the instruction which starts a loop does not cross a cacheline
+boundary. This requires knowning the exact length of each machine instruction.
+That is somewhat complicated, but doable. Example 256.bzip2:
+
+In the new trace, the hot loop has an instruction which crosses a cacheline
+boundary. In addition to potential cache misses, this can't help decoding as I
+imagine there has to be some kind of complicated decoder reset and realignment
+to grab the bytes from the next cacheline.
+
+532 532 0x3cfc movb (1809(%esp, %esi), %bl <<<--- spans 2 64 byte lines
+942 942 0x3d03 movl %dh, (1809(%esp, %esi)
+937 937 0x3d0a incl %esi
+3 3 0x3d0b cmpb %bl, %dl
+27 27 0x3d0d jnz 0x000062db <main+11707>
+
+//===---------------------------------------------------------------------===//
+
+In c99 mode, the preprocessor doesn't like assembly comments like #TRUNCATE.
+
+//===---------------------------------------------------------------------===//
+
+This could be a single 16-bit load.
+
+int f(char *p) {
+ if ((p[0] == 1) & (p[1] == 2)) return 1;
+ return 0;
+}
+
+//===---------------------------------------------------------------------===//
+
+We should inline lrintf and probably other libc functions.
+
+//===---------------------------------------------------------------------===//
+
+Start using the flags more. For example, compile:
+
+int add_zf(int *x, int y, int a, int b) {
+ if ((*x += y) == 0)
+ return a;
+ else
+ return b;
+}
+
+to:
+ addl %esi, (%rdi)
+ movl %edx, %eax
+ cmovne %ecx, %eax
+ ret
+instead of:
+
+_add_zf:
+ addl (%rdi), %esi
+ movl %esi, (%rdi)
+ testl %esi, %esi
+ cmove %edx, %ecx
+ movl %ecx, %eax
+ ret
+
+and:
+
+int add_zf(int *x, int y, int a, int b) {
+ if ((*x + y) < 0)
+ return a;
+ else
+ return b;
+}
+
+to:
+
+add_zf:
+ addl (%rdi), %esi
+ movl %edx, %eax
+ cmovns %ecx, %eax
+ ret
+
+instead of:
+
+_add_zf:
+ addl (%rdi), %esi
+ testl %esi, %esi
+ cmovs %edx, %ecx
+ movl %ecx, %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+These two functions have identical effects:
+
+unsigned int f(unsigned int i, unsigned int n) {++i; if (i == n) ++i; return i;}
+unsigned int f2(unsigned int i, unsigned int n) {++i; i += i == n; return i;}
+
+We currently compile them to:
+
+_f:
+ movl 4(%esp), %eax
+ movl %eax, %ecx
+ incl %ecx
+ movl 8(%esp), %edx
+ cmpl %edx, %ecx
+ jne LBB1_2 #UnifiedReturnBlock
+LBB1_1: #cond_true
+ addl $2, %eax
+ ret
+LBB1_2: #UnifiedReturnBlock
+ movl %ecx, %eax
+ ret
+_f2:
+ movl 4(%esp), %eax
+ movl %eax, %ecx
+ incl %ecx
+ cmpl 8(%esp), %ecx
+ sete %cl
+ movzbl %cl, %ecx
+ leal 1(%ecx,%eax), %eax
+ ret
+
+both of which are inferior to GCC's:
+
+_f:
+ movl 4(%esp), %edx
+ leal 1(%edx), %eax
+ addl $2, %edx
+ cmpl 8(%esp), %eax
+ cmove %edx, %eax
+ ret
+_f2:
+ movl 4(%esp), %eax
+ addl $1, %eax
+ xorl %edx, %edx
+ cmpl 8(%esp), %eax
+ sete %dl
+ addl %edx, %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+void test(int X) {
+ if (X) abort();
+}
+
+is currently compiled to:
+
+_test:
+ subl $12, %esp
+ cmpl $0, 16(%esp)
+ jne LBB1_1
+ addl $12, %esp
+ ret
+LBB1_1:
+ call L_abort$stub
+
+It would be better to produce:
+
+_test:
+ subl $12, %esp
+ cmpl $0, 16(%esp)
+ jne L_abort$stub
+ addl $12, %esp
+ ret
+
+This can be applied to any no-return function call that takes no arguments etc.
+Alternatively, the stack save/restore logic could be shrink-wrapped, producing
+something like this:
+
+_test:
+ cmpl $0, 4(%esp)
+ jne LBB1_1
+ ret
+LBB1_1:
+ subl $12, %esp
+ call L_abort$stub
+
+Both are useful in different situations. Finally, it could be shrink-wrapped
+and tail called, like this:
+
+_test:
+ cmpl $0, 4(%esp)
+ jne LBB1_1
+ ret
+LBB1_1:
+ pop %eax # realign stack.
+ call L_abort$stub
+
+Though this probably isn't worth it.
+
+//===---------------------------------------------------------------------===//
+
+We need to teach the codegen to convert two-address INC instructions to LEA
+when the flags are dead (likewise dec). For example, on X86-64, compile:
+
+int foo(int A, int B) {
+ return A+1;
+}
+
+to:
+
+_foo:
+ leal 1(%edi), %eax
+ ret
+
+instead of:
+
+_foo:
+ incl %edi
+ movl %edi, %eax
+ ret
+
+Another example is:
+
+;; X's live range extends beyond the shift, so the register allocator
+;; cannot coalesce it with Y. Because of this, a copy needs to be
+;; emitted before the shift to save the register value before it is
+;; clobbered. However, this copy is not needed if the register
+;; allocator turns the shift into an LEA. This also occurs for ADD.
+
+; Check that the shift gets turned into an LEA.
+; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: not grep {mov E.X, E.X}
+
+@G = external global i32 ; <i32*> [#uses=3]
+
+define i32 @test1(i32 %X, i32 %Y) {
+ %Z = add i32 %X, %Y ; <i32> [#uses=1]
+ volatile store i32 %Y, i32* @G
+ volatile store i32 %Z, i32* @G
+ ret i32 %X
+}
+
+define i32 @test2(i32 %X) {
+ %Z = add i32 %X, 1 ; <i32> [#uses=1]
+ volatile store i32 %Z, i32* @G
+ ret i32 %X
+}
+
+//===---------------------------------------------------------------------===//
+
+Sometimes it is better to codegen subtractions from a constant (e.g. 7-x) with
+a neg instead of a sub instruction. Consider:
+
+int test(char X) { return 7-X; }
+
+we currently produce:
+_test:
+ movl $7, %eax
+ movsbl 4(%esp), %ecx
+ subl %ecx, %eax
+ ret
+
+We would use one fewer register if codegen'd as:
+
+ movsbl 4(%esp), %eax
+ neg %eax
+ add $7, %eax
+ ret
+
+Note that this isn't beneficial if the load can be folded into the sub. In
+this case, we want a sub:
+
+int test(int X) { return 7-X; }
+_test:
+ movl $7, %eax
+ subl 4(%esp), %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+Leaf functions that require one 4-byte spill slot have a prolog like this:
+
+_foo:
+ pushl %esi
+ subl $4, %esp
+...
+and an epilog like this:
+ addl $4, %esp
+ popl %esi
+ ret
+
+It would be smaller, and potentially faster, to push eax on entry and to
+pop into a dummy register instead of using addl/subl of esp. Just don't pop
+into any return registers :)
+
+//===---------------------------------------------------------------------===//
+
+The X86 backend should fold (branch (or (setcc, setcc))) into multiple
+branches. We generate really poor code for:
+
+double testf(double a) {
+ return a == 0.0 ? 0.0 : (a > 0.0 ? 1.0 : -1.0);
+}
+
+For example, the entry BB is:
+
+_testf:
+ subl $20, %esp
+ pxor %xmm0, %xmm0
+ movsd 24(%esp), %xmm1
+ ucomisd %xmm0, %xmm1
+ setnp %al
+ sete %cl
+ testb %cl, %al
+ jne LBB1_5 # UnifiedReturnBlock
+LBB1_1: # cond_true
+
+
+it would be better to replace the last four instructions with:
+
+ jp LBB1_1
+ je LBB1_5
+LBB1_1:
+
+We also codegen the inner ?: into a diamond:
+
+ cvtss2sd LCPI1_0(%rip), %xmm2
+ cvtss2sd LCPI1_1(%rip), %xmm3
+ ucomisd %xmm1, %xmm0
+ ja LBB1_3 # cond_true
+LBB1_2: # cond_true
+ movapd %xmm3, %xmm2
+LBB1_3: # cond_true
+ movapd %xmm2, %xmm0
+ ret
+
+We should sink the load into xmm3 into the LBB1_2 block. This should
+be pretty easy, and will nuke all the copies.
+
+//===---------------------------------------------------------------------===//
+
+This:
+ #include <algorithm>
+ inline std::pair<unsigned, bool> full_add(unsigned a, unsigned b)
+ { return std::make_pair(a + b, a + b < a); }
+ bool no_overflow(unsigned a, unsigned b)
+ { return !full_add(a, b).second; }
+
+Should compile to:
+
+
+ _Z11no_overflowjj:
+ addl %edi, %esi
+ setae %al
+ ret
+
+FIXME: That code looks wrong; bool return is normally defined as zext.
+
+on x86-64, not:
+
+__Z11no_overflowjj:
+ addl %edi, %esi
+ cmpl %edi, %esi
+ setae %al
+ movzbl %al, %eax
+ ret
+
+
+//===---------------------------------------------------------------------===//
+
+Re-materialize MOV32r0 etc. with xor instead of changing them to moves if the
+condition register is dead. xor reg reg is shorter than mov reg, #0.
+
+//===---------------------------------------------------------------------===//
+
+We aren't matching RMW instructions aggressively
+enough. Here's a reduced testcase (more in PR1160):
+
+define void @test(i32* %huge_ptr, i32* %target_ptr) {
+ %A = load i32* %huge_ptr ; <i32> [#uses=1]
+ %B = load i32* %target_ptr ; <i32> [#uses=1]
+ %C = or i32 %A, %B ; <i32> [#uses=1]
+ store i32 %C, i32* %target_ptr
+ ret void
+}
+
+$ llvm-as < t.ll | llc -march=x86-64
+
+_test:
+ movl (%rdi), %eax
+ orl (%rsi), %eax
+ movl %eax, (%rsi)
+ ret
+
+That should be something like:
+
+_test:
+ movl (%rdi), %eax
+ orl %eax, (%rsi)
+ ret
+
+//===---------------------------------------------------------------------===//
+
+The following code:
+
+bb114.preheader: ; preds = %cond_next94
+ %tmp231232 = sext i16 %tmp62 to i32 ; <i32> [#uses=1]
+ %tmp233 = sub i32 32, %tmp231232 ; <i32> [#uses=1]
+ %tmp245246 = sext i16 %tmp65 to i32 ; <i32> [#uses=1]
+ %tmp252253 = sext i16 %tmp68 to i32 ; <i32> [#uses=1]
+ %tmp254 = sub i32 32, %tmp252253 ; <i32> [#uses=1]
+ %tmp553554 = bitcast i16* %tmp37 to i8* ; <i8*> [#uses=2]
+ %tmp583584 = sext i16 %tmp98 to i32 ; <i32> [#uses=1]
+ %tmp585 = sub i32 32, %tmp583584 ; <i32> [#uses=1]
+ %tmp614615 = sext i16 %tmp101 to i32 ; <i32> [#uses=1]
+ %tmp621622 = sext i16 %tmp104 to i32 ; <i32> [#uses=1]
+ %tmp623 = sub i32 32, %tmp621622 ; <i32> [#uses=1]
+ br label %bb114
+
+produces:
+
+LBB3_5: # bb114.preheader
+ movswl -68(%ebp), %eax
+ movl $32, %ecx
+ movl %ecx, -80(%ebp)
+ subl %eax, -80(%ebp)
+ movswl -52(%ebp), %eax
+ movl %ecx, -84(%ebp)
+ subl %eax, -84(%ebp)
+ movswl -70(%ebp), %eax
+ movl %ecx, -88(%ebp)
+ subl %eax, -88(%ebp)
+ movswl -50(%ebp), %eax
+ subl %eax, %ecx
+ movl %ecx, -76(%ebp)
+ movswl -42(%ebp), %eax
+ movl %eax, -92(%ebp)
+ movswl -66(%ebp), %eax
+ movl %eax, -96(%ebp)
+ movw $0, -98(%ebp)
+
+This appears to be bad because the RA is not folding the store to the stack
+slot into the movl. The above instructions could be:
+ movl $32, -80(%ebp)
+...
+ movl $32, -84(%ebp)
+...
+This seems like a cross between remat and spill folding.
+
+This has redundant subtractions of %eax from a stack slot. However, %ecx doesn't
+change, so we could simply subtract %eax from %ecx first and then use %ecx (or
+vice-versa).
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+ %tmp659 = icmp slt i16 %tmp654, 0 ; <i1> [#uses=1]
+ br i1 %tmp659, label %cond_true662, label %cond_next715
+
+produces this:
+
+ testw %cx, %cx
+ movswl %cx, %esi
+ jns LBB4_109 # cond_next715
+
+Shark tells us that using %cx in the testw instruction is sub-optimal. It
+suggests using the 32-bit register (which is what ICC uses).
+
+//===---------------------------------------------------------------------===//
+
+We compile this:
+
+void compare (long long foo) {
+ if (foo < 4294967297LL)
+ abort();
+}
+
+to:
+
+compare:
+ subl $4, %esp
+ cmpl $0, 8(%esp)
+ setne %al
+ movzbw %al, %ax
+ cmpl $1, 12(%esp)
+ setg %cl
+ movzbw %cl, %cx
+ cmove %ax, %cx
+ testb $1, %cl
+ jne .LBB1_2 # UnifiedReturnBlock
+.LBB1_1: # ifthen
+ call abort
+.LBB1_2: # UnifiedReturnBlock
+ addl $4, %esp
+ ret
+
+(also really horrible code on ppc). This is due to the expand code for 64-bit
+compares. GCC produces multiple branches, which is much nicer:
+
+compare:
+ subl $12, %esp
+ movl 20(%esp), %edx
+ movl 16(%esp), %eax
+ decl %edx
+ jle .L7
+.L5:
+ addl $12, %esp
+ ret
+ .p2align 4,,7
+.L7:
+ jl .L4
+ cmpl $0, %eax
+ .p2align 4,,8
+ ja .L5
+.L4:
+ .p2align 4,,9
+ call abort
+
+//===---------------------------------------------------------------------===//
+
+Tail call optimization improvements: Tail call optimization currently
+pushes all arguments on the top of the stack (their normal place for
+non-tail call optimized calls) that source from the callers arguments
+or that source from a virtual register (also possibly sourcing from
+callers arguments).
+This is done to prevent overwriting of parameters (see example
+below) that might be used later.
+
+example:
+
+int callee(int32, int64);
+int caller(int32 arg1, int32 arg2) {
+ int64 local = arg2 * 2;
+ return callee(arg2, (int64)local);
+}
+
+[arg1] [!arg2 no longer valid since we moved local onto it]
+[arg2] -> [(int64)
+[RETADDR] local ]
+
+Moving arg1 onto the stack slot of callee function would overwrite
+arg2 of the caller.
+
+Possible optimizations:
+
+
+ - Analyse the actual parameters of the callee to see which would
+ overwrite a caller parameter which is used by the callee and only
+ push them onto the top of the stack.
+
+ int callee (int32 arg1, int32 arg2);
+ int caller (int32 arg1, int32 arg2) {
+ return callee(arg1,arg2);
+ }
+
+ Here we don't need to write any variables to the top of the stack
+ since they don't overwrite each other.
+
+ int callee (int32 arg1, int32 arg2);
+ int caller (int32 arg1, int32 arg2) {
+ return callee(arg2,arg1);
+ }
+
+ Here we need to push the arguments because they overwrite each
+ other.
+
+//===---------------------------------------------------------------------===//
+
+main ()
+{
+ int i = 0;
+ unsigned long int z = 0;
+
+ do {
+ z -= 0x00004000;
+ i++;
+ if (i > 0x00040000)
+ abort ();
+ } while (z > 0);
+ exit (0);
+}
+
+gcc compiles this to:
+
+_main:
+ subl $28, %esp
+ xorl %eax, %eax
+ jmp L2
+L3:
+ cmpl $262144, %eax
+ je L10
+L2:
+ addl $1, %eax
+ cmpl $262145, %eax
+ jne L3
+ call L_abort$stub
+L10:
+ movl $0, (%esp)
+ call L_exit$stub
+
+llvm:
+
+_main:
+ subl $12, %esp
+ movl $1, %eax
+ movl $16384, %ecx
+LBB1_1: # bb
+ cmpl $262145, %eax
+ jge LBB1_4 # cond_true
+LBB1_2: # cond_next
+ incl %eax
+ addl $4294950912, %ecx
+ cmpl $16384, %ecx
+ jne LBB1_1 # bb
+LBB1_3: # bb11
+ xorl %eax, %eax
+ addl $12, %esp
+ ret
+LBB1_4: # cond_true
+ call L_abort$stub
+
+1. LSR should rewrite the first cmp with induction variable %ecx.
+2. DAG combiner should fold
+ leal 1(%eax), %edx
+ cmpl $262145, %edx
+ =>
+ cmpl $262144, %eax
+
+//===---------------------------------------------------------------------===//
+
+define i64 @test(double %X) {
+ %Y = fptosi double %X to i64
+ ret i64 %Y
+}
+
+compiles to:
+
+_test:
+ subl $20, %esp
+ movsd 24(%esp), %xmm0
+ movsd %xmm0, 8(%esp)
+ fldl 8(%esp)
+ fisttpll (%esp)
+ movl 4(%esp), %edx
+ movl (%esp), %eax
+ addl $20, %esp
+ #FP_REG_KILL
+ ret
+
+This should just fldl directly from the input stack slot.
+
+//===---------------------------------------------------------------------===//
+
+This code:
+int foo (int x) { return (x & 65535) | 255; }
+
+Should compile into:
+
+_foo:
+ movzwl 4(%esp), %eax
+ orl $255, %eax
+ ret
+
+instead of:
+_foo:
+ movl $255, %eax
+ orl 4(%esp), %eax
+ andl $65535, %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+We're codegen'ing multiply of long longs inefficiently:
+
+unsigned long long LLM(unsigned long long arg1, unsigned long long arg2) {
+ return arg1 * arg2;
+}
+
+We compile to (fomit-frame-pointer):
+
+_LLM:
+ pushl %esi
+ movl 8(%esp), %ecx
+ movl 16(%esp), %esi
+ movl %esi, %eax
+ mull %ecx
+ imull 12(%esp), %esi
+ addl %edx, %esi
+ imull 20(%esp), %ecx
+ movl %esi, %edx
+ addl %ecx, %edx
+ popl %esi
+ ret
+
+This looks like a scheduling deficiency and lack of remat of the load from
+the argument area. ICC apparently produces:
+
+ movl 8(%esp), %ecx
+ imull 12(%esp), %ecx
+ movl 16(%esp), %eax
+ imull 4(%esp), %eax
+ addl %eax, %ecx
+ movl 4(%esp), %eax
+ mull 12(%esp)
+ addl %ecx, %edx
+ ret
+
+Note that it remat'd loads from 4(esp) and 12(esp). See this GCC PR:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17236
+
+//===---------------------------------------------------------------------===//
+
+We can fold a store into "zeroing a reg". Instead of:
+
+xorl %eax, %eax
+movl %eax, 124(%esp)
+
+we should get:
+
+movl $0, 124(%esp)
+
+if the flags of the xor are dead.
+
+Likewise, we isel "x<<1" into "add reg,reg". If reg is spilled, this should
+be folded into: shl [mem], 1
+
+//===---------------------------------------------------------------------===//
+
+This testcase misses a read/modify/write opportunity (from PR1425):
+
+void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){
+ int i;
+ for(i=0; i<width; i++)
+ b1[i] += (1*(b0[i] + b2[i])+0)>>0;
+}
+
+We compile it down to:
+
+LBB1_2: # bb
+ movl (%esi,%edi,4), %ebx
+ addl (%ecx,%edi,4), %ebx
+ addl (%edx,%edi,4), %ebx
+ movl %ebx, (%ecx,%edi,4)
+ incl %edi
+ cmpl %eax, %edi
+ jne LBB1_2 # bb
+
+the inner loop should add to the memory location (%ecx,%edi,4), saving
+a mov. Something like:
+
+ movl (%esi,%edi,4), %ebx
+ addl (%edx,%edi,4), %ebx
+ addl %ebx, (%ecx,%edi,4)
+
+Here is another interesting example:
+
+void vertical_compose97iH1(int *b0, int *b1, int *b2, int width){
+ int i;
+ for(i=0; i<width; i++)
+ b1[i] -= (1*(b0[i] + b2[i])+0)>>0;
+}
+
+We miss the r/m/w opportunity here by using 2 subs instead of an add+sub[mem]:
+
+LBB9_2: # bb
+ movl (%ecx,%edi,4), %ebx
+ subl (%esi,%edi,4), %ebx
+ subl (%edx,%edi,4), %ebx
+ movl %ebx, (%ecx,%edi,4)
+ incl %edi
+ cmpl %eax, %edi
+ jne LBB9_2 # bb
+
+Additionally, LSR should rewrite the exit condition of these loops to use
+a stride-4 IV, would would allow all the scales in the loop to go away.
+This would result in smaller code and more efficient microops.
+
+//===---------------------------------------------------------------------===//
+
+In SSE mode, we turn abs and neg into a load from the constant pool plus a xor
+or and instruction, for example:
+
+ xorpd LCPI1_0, %xmm2
+
+However, if xmm2 gets spilled, we end up with really ugly code like this:
+
+ movsd (%esp), %xmm0
+ xorpd LCPI1_0, %xmm0
+ movsd %xmm0, (%esp)
+
+Since we 'know' that this is a 'neg', we can actually "fold" the spill into
+the neg/abs instruction, turning it into an *integer* operation, like this:
+
+ xorl 2147483648, [mem+4] ## 2147483648 = (1 << 31)
+
+you could also use xorb, but xorl is less likely to lead to a partial register
+stall. Here is a contrived testcase:
+
+double a, b, c;
+void test(double *P) {
+ double X = *P;
+ a = X;
+ bar();
+ X = -X;
+ b = X;
+ bar();
+ c = X;
+}
+
+//===---------------------------------------------------------------------===//
+
+handling llvm.memory.barrier on pre SSE2 cpus
+
+should generate:
+lock ; mov %esp, %esp
+
+//===---------------------------------------------------------------------===//
+
+The generated code on x86 for checking for signed overflow on a multiply the
+obvious way is much longer than it needs to be.
+
+int x(int a, int b) {
+ long long prod = (long long)a*b;
+ return prod > 0x7FFFFFFF || prod < (-0x7FFFFFFF-1);
+}
+
+See PR2053 for more details.
+
+//===---------------------------------------------------------------------===//
+
+We should investigate using cdq/ctld (effect: edx = sar eax, 31)
+more aggressively; it should cost the same as a move+shift on any modern
+processor, but it's a lot shorter. Downside is that it puts more
+pressure on register allocation because it has fixed operands.
+
+Example:
+int abs(int x) {return x < 0 ? -x : x;}
+
+gcc compiles this to the following when using march/mtune=pentium2/3/4/m/etc.:
+abs:
+ movl 4(%esp), %eax
+ cltd
+ xorl %edx, %eax
+ subl %edx, %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+Consider:
+int test(unsigned long a, unsigned long b) { return -(a < b); }
+
+We currently compile this to:
+
+define i32 @test(i32 %a, i32 %b) nounwind {
+ %tmp3 = icmp ult i32 %a, %b ; <i1> [#uses=1]
+ %tmp34 = zext i1 %tmp3 to i32 ; <i32> [#uses=1]
+ %tmp5 = sub i32 0, %tmp34 ; <i32> [#uses=1]
+ ret i32 %tmp5
+}
+
+and
+
+_test:
+ movl 8(%esp), %eax
+ cmpl %eax, 4(%esp)
+ setb %al
+ movzbl %al, %eax
+ negl %eax
+ ret
+
+Several deficiencies here. First, we should instcombine zext+neg into sext:
+
+define i32 @test2(i32 %a, i32 %b) nounwind {
+ %tmp3 = icmp ult i32 %a, %b ; <i1> [#uses=1]
+ %tmp34 = sext i1 %tmp3 to i32 ; <i32> [#uses=1]
+ ret i32 %tmp34
+}
+
+However, before we can do that, we have to fix the bad codegen that we get for
+sext from bool:
+
+_test2:
+ movl 8(%esp), %eax
+ cmpl %eax, 4(%esp)
+ setb %al
+ movzbl %al, %eax
+ shll $31, %eax
+ sarl $31, %eax
+ ret
+
+This code should be at least as good as the code above. Once this is fixed, we
+can optimize this specific case even more to:
+
+ movl 8(%esp), %eax
+ xorl %ecx, %ecx
+ cmpl %eax, 4(%esp)
+ sbbl %ecx, %ecx
+
+//===---------------------------------------------------------------------===//
+
+Take the following code (from
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16541):
+
+extern unsigned char first_one[65536];
+int FirstOnet(unsigned long long arg1)
+{
+ if (arg1 >> 48)
+ return (first_one[arg1 >> 48]);
+ return 0;
+}
+
+
+The following code is currently generated:
+FirstOnet:
+ movl 8(%esp), %eax
+ cmpl $65536, %eax
+ movl 4(%esp), %ecx
+ jb .LBB1_2 # UnifiedReturnBlock
+.LBB1_1: # ifthen
+ shrl $16, %eax
+ movzbl first_one(%eax), %eax
+ ret
+.LBB1_2: # UnifiedReturnBlock
+ xorl %eax, %eax
+ ret
+
+There are a few possible improvements here:
+1. We should be able to eliminate the dead load into %ecx
+2. We could change the "movl 8(%esp), %eax" into
+ "movzwl 10(%esp), %eax"; this lets us change the cmpl
+ into a testl, which is shorter, and eliminate the shift.
+
+We could also in theory eliminate the branch by using a conditional
+for the address of the load, but that seems unlikely to be worthwhile
+in general.
+
+//===---------------------------------------------------------------------===//
+
+We compile this function:
+
+define i32 @foo(i32 %a, i32 %b, i32 %c, i8 zeroext %d) nounwind {
+entry:
+ %tmp2 = icmp eq i8 %d, 0 ; <i1> [#uses=1]
+ br i1 %tmp2, label %bb7, label %bb
+
+bb: ; preds = %entry
+ %tmp6 = add i32 %b, %a ; <i32> [#uses=1]
+ ret i32 %tmp6
+
+bb7: ; preds = %entry
+ %tmp10 = sub i32 %a, %c ; <i32> [#uses=1]
+ ret i32 %tmp10
+}
+
+to:
+
+_foo:
+ cmpb $0, 16(%esp)
+ movl 12(%esp), %ecx
+ movl 8(%esp), %eax
+ movl 4(%esp), %edx
+ je LBB1_2 # bb7
+LBB1_1: # bb
+ addl %edx, %eax
+ ret
+LBB1_2: # bb7
+ movl %edx, %eax
+ subl %ecx, %eax
+ ret
+
+The coalescer could coalesce "edx" with "eax" to avoid the movl in LBB1_2
+if it commuted the addl in LBB1_1.
+
+//===---------------------------------------------------------------------===//
+
+See rdar://4653682.
+
+From flops:
+
+LBB1_15: # bb310
+ cvtss2sd LCPI1_0, %xmm1
+ addsd %xmm1, %xmm0
+ movsd 176(%esp), %xmm2
+ mulsd %xmm0, %xmm2
+ movapd %xmm2, %xmm3
+ mulsd %xmm3, %xmm3
+ movapd %xmm3, %xmm4
+ mulsd LCPI1_23, %xmm4
+ addsd LCPI1_24, %xmm4
+ mulsd %xmm3, %xmm4
+ addsd LCPI1_25, %xmm4
+ mulsd %xmm3, %xmm4
+ addsd LCPI1_26, %xmm4
+ mulsd %xmm3, %xmm4
+ addsd LCPI1_27, %xmm4
+ mulsd %xmm3, %xmm4
+ addsd LCPI1_28, %xmm4
+ mulsd %xmm3, %xmm4
+ addsd %xmm1, %xmm4
+ mulsd %xmm2, %xmm4
+ movsd 152(%esp), %xmm1
+ addsd %xmm4, %xmm1
+ movsd %xmm1, 152(%esp)
+ incl %eax
+ cmpl %eax, %esi
+ jge LBB1_15 # bb310
+LBB1_16: # bb358.loopexit
+ movsd 152(%esp), %xmm0
+ addsd %xmm0, %xmm0
+ addsd LCPI1_22, %xmm0
+ movsd %xmm0, 152(%esp)
+
+Rather than spilling the result of the last addsd in the loop, we should have
+insert a copy to split the interval (one for the duration of the loop, one
+extending to the fall through). The register pressure in the loop isn't high
+enough to warrant the spill.
+
+Also check why xmm7 is not used at all in the function.
+
+//===---------------------------------------------------------------------===//
+
+Legalize loses track of the fact that bools are always zero extended when in
+memory. This causes us to compile abort_gzip (from 164.gzip) from:
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+@in_exit.4870.b = internal global i1 false ; <i1*> [#uses=2]
+define fastcc void @abort_gzip() noreturn nounwind {
+entry:
+ %tmp.b.i = load i1* @in_exit.4870.b ; <i1> [#uses=1]
+ br i1 %tmp.b.i, label %bb.i, label %bb4.i
+bb.i: ; preds = %entry
+ tail call void @exit( i32 1 ) noreturn nounwind
+ unreachable
+bb4.i: ; preds = %entry
+ store i1 true, i1* @in_exit.4870.b
+ tail call void @exit( i32 1 ) noreturn nounwind
+ unreachable
+}
+declare void @exit(i32) noreturn nounwind
+
+into:
+
+_abort_gzip:
+ subl $12, %esp
+ movb _in_exit.4870.b, %al
+ notb %al
+ testb $1, %al
+ jne LBB1_2 ## bb4.i
+LBB1_1: ## bb.i
+ ...
+
+//===---------------------------------------------------------------------===//
+
+We compile:
+
+int test(int x, int y) {
+ return x-y-1;
+}
+
+into (-m64):
+
+_test:
+ decl %edi
+ movl %edi, %eax
+ subl %esi, %eax
+ ret
+
+it would be better to codegen as: x+~y (notl+addl)
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+int foo(const char *str,...)
+{
+ __builtin_va_list a; int x;
+ __builtin_va_start(a,str); x = __builtin_va_arg(a,int); __builtin_va_end(a);
+ return x;
+}
+
+gets compiled into this on x86-64:
+ subq $200, %rsp
+ movaps %xmm7, 160(%rsp)
+ movaps %xmm6, 144(%rsp)
+ movaps %xmm5, 128(%rsp)
+ movaps %xmm4, 112(%rsp)
+ movaps %xmm3, 96(%rsp)
+ movaps %xmm2, 80(%rsp)
+ movaps %xmm1, 64(%rsp)
+ movaps %xmm0, 48(%rsp)
+ movq %r9, 40(%rsp)
+ movq %r8, 32(%rsp)
+ movq %rcx, 24(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rsi, 8(%rsp)
+ leaq (%rsp), %rax
+ movq %rax, 192(%rsp)
+ leaq 208(%rsp), %rax
+ movq %rax, 184(%rsp)
+ movl $48, 180(%rsp)
+ movl $8, 176(%rsp)
+ movl 176(%rsp), %eax
+ cmpl $47, %eax
+ jbe .LBB1_3 # bb
+.LBB1_1: # bb3
+ movq 184(%rsp), %rcx
+ leaq 8(%rcx), %rax
+ movq %rax, 184(%rsp)
+.LBB1_2: # bb4
+ movl (%rcx), %eax
+ addq $200, %rsp
+ ret
+.LBB1_3: # bb
+ movl %eax, %ecx
+ addl $8, %eax
+ addq 192(%rsp), %rcx
+ movl %eax, 176(%rsp)
+ jmp .LBB1_2 # bb4
+
+gcc 4.3 generates:
+ subq $96, %rsp
+.LCFI0:
+ leaq 104(%rsp), %rax
+ movq %rsi, -80(%rsp)
+ movl $8, -120(%rsp)
+ movq %rax, -112(%rsp)
+ leaq -88(%rsp), %rax
+ movq %rax, -104(%rsp)
+ movl $8, %eax
+ cmpl $48, %eax
+ jb .L6
+ movq -112(%rsp), %rdx
+ movl (%rdx), %eax
+ addq $96, %rsp
+ ret
+ .p2align 4,,10
+ .p2align 3
+.L6:
+ mov %eax, %edx
+ addq -104(%rsp), %rdx
+ addl $8, %eax
+ movl %eax, -120(%rsp)
+ movl (%rdx), %eax
+ addq $96, %rsp
+ ret
+
+and it gets compiled into this on x86:
+ pushl %ebp
+ movl %esp, %ebp
+ subl $4, %esp
+ leal 12(%ebp), %eax
+ movl %eax, -4(%ebp)
+ leal 16(%ebp), %eax
+ movl %eax, -4(%ebp)
+ movl 12(%ebp), %eax
+ addl $4, %esp
+ popl %ebp
+ ret
+
+gcc 4.3 generates:
+ pushl %ebp
+ movl %esp, %ebp
+ movl 12(%ebp), %eax
+ popl %ebp
+ ret
+
+//===---------------------------------------------------------------------===//
+
+Teach tblgen not to check bitconvert source type in some cases. This allows us
+to consolidate the following patterns in X86InstrMMX.td:
+
+def : Pat<(v2i32 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))),
+ (v2i32 (MMX_MOVDQ2Qrr VR128:$src))>;
+def : Pat<(v4i16 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))),
+ (v4i16 (MMX_MOVDQ2Qrr VR128:$src))>;
+def : Pat<(v8i8 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))),
+ (v8i8 (MMX_MOVDQ2Qrr VR128:$src))>;
+
+There are other cases in various td files.
+
+//===---------------------------------------------------------------------===//
+
+Take something like the following on x86-32:
+unsigned a(unsigned long long x, unsigned y) {return x % y;}
+
+We currently generate a libcall, but we really shouldn't: the expansion is
+shorter and likely faster than the libcall. The expected code is something
+like the following:
+
+ movl 12(%ebp), %eax
+ movl 16(%ebp), %ecx
+ xorl %edx, %edx
+ divl %ecx
+ movl 8(%ebp), %eax
+ divl %ecx
+ movl %edx, %eax
+ ret
+
+A similar code sequence works for division.
+
+//===---------------------------------------------------------------------===//
+
+These should compile to the same code, but the later codegen's to useless
+instructions on X86. This may be a trivial dag combine (GCC PR7061):
+
+struct s1 { unsigned char a, b; };
+unsigned long f1(struct s1 x) {
+ return x.a + x.b;
+}
+struct s2 { unsigned a: 8, b: 8; };
+unsigned long f2(struct s2 x) {
+ return x.a + x.b;
+}
+
+//===---------------------------------------------------------------------===//
+
+We currently compile this:
+
+define i32 @func1(i32 %v1, i32 %v2) nounwind {
+entry:
+ %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+ %sum = extractvalue {i32, i1} %t, 0
+ %obit = extractvalue {i32, i1} %t, 1
+ br i1 %obit, label %overflow, label %normal
+normal:
+ ret i32 %sum
+overflow:
+ call void @llvm.trap()
+ unreachable
+}
+declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32)
+declare void @llvm.trap()
+
+to:
+
+_func1:
+ movl 4(%esp), %eax
+ addl 8(%esp), %eax
+ jo LBB1_2 ## overflow
+LBB1_1: ## normal
+ ret
+LBB1_2: ## overflow
+ ud2
+
+it would be nice to produce "into" someday.
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+void vec_mpys1(int y[], const int x[], int scaler) {
+int i;
+for (i = 0; i < 150; i++)
+ y[i] += (((long long)scaler * (long long)x[i]) >> 31);
+}
+
+Compiles to this loop with GCC 3.x:
+
+.L5:
+ movl %ebx, %eax
+ imull (%edi,%ecx,4)
+ shrdl $31, %edx, %eax
+ addl %eax, (%esi,%ecx,4)
+ incl %ecx
+ cmpl $149, %ecx
+ jle .L5
+
+llvm-gcc compiles it to the much uglier:
+
+LBB1_1: ## bb1
+ movl 24(%esp), %eax
+ movl (%eax,%edi,4), %ebx
+ movl %ebx, %ebp
+ imull %esi, %ebp
+ movl %ebx, %eax
+ mull %ecx
+ addl %ebp, %edx
+ sarl $31, %ebx
+ imull %ecx, %ebx
+ addl %edx, %ebx
+ shldl $1, %eax, %ebx
+ movl 20(%esp), %eax
+ addl %ebx, (%eax,%edi,4)
+ incl %edi
+ cmpl $150, %edi
+ jne LBB1_1 ## bb1
+
+//===---------------------------------------------------------------------===//
+
+Test instructions can be eliminated by using EFLAGS values from arithmetic
+instructions. This is currently not done for mul, and, or, xor, neg, shl,
+sra, srl, shld, shrd, atomic ops, and others. It is also currently not done
+for read-modify-write instructions. It is also current not done if the
+OF or CF flags are needed.
+
+The shift operators have the complication that when the shift count is
+zero, EFLAGS is not set, so they can only subsume a test instruction if
+the shift count is known to be non-zero. Also, using the EFLAGS value
+from a shift is apparently very slow on some x86 implementations.
+
+In read-modify-write instructions, the root node in the isel match is
+the store, and isel has no way for the use of the EFLAGS result of the
+arithmetic to be remapped to the new node.
+
+Add and subtract instructions set OF on signed overflow and CF on unsiged
+overflow, while test instructions always clear OF and CF. In order to
+replace a test with an add or subtract in a situation where OF or CF is
+needed, codegen must be able to prove that the operation cannot see
+signed or unsigned overflow, respectively.
+
+//===---------------------------------------------------------------------===//
+
+memcpy/memmove do not lower to SSE copies when possible. A silly example is:
+define <16 x float> @foo(<16 x float> %A) nounwind {
+ %tmp = alloca <16 x float>, align 16
+ %tmp2 = alloca <16 x float>, align 16
+ store <16 x float> %A, <16 x float>* %tmp
+ %s = bitcast <16 x float>* %tmp to i8*
+ %s2 = bitcast <16 x float>* %tmp2 to i8*
+ call void @llvm.memcpy.i64(i8* %s, i8* %s2, i64 64, i32 16)
+ %R = load <16 x float>* %tmp2
+ ret <16 x float> %R
+}
+
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+
+which compiles to:
+
+_foo:
+ subl $140, %esp
+ movaps %xmm3, 112(%esp)
+ movaps %xmm2, 96(%esp)
+ movaps %xmm1, 80(%esp)
+ movaps %xmm0, 64(%esp)
+ movl 60(%esp), %eax
+ movl %eax, 124(%esp)
+ movl 56(%esp), %eax
+ movl %eax, 120(%esp)
+ movl 52(%esp), %eax
+ <many many more 32-bit copies>
+ movaps (%esp), %xmm0
+ movaps 16(%esp), %xmm1
+ movaps 32(%esp), %xmm2
+ movaps 48(%esp), %xmm3
+ addl $140, %esp
+ ret
+
+On Nehalem, it may even be cheaper to just use movups when unaligned than to
+fall back to lower-granularity chunks.
+
+//===---------------------------------------------------------------------===//
+
+Implement processor-specific optimizations for parity with GCC on these
+processors. GCC does two optimizations:
+
+1. ix86_pad_returns inserts a noop before ret instructions if immediately
+ preceeded by a conditional branch or is the target of a jump.
+2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of
+ code contains more than 3 branches.
+
+The first one is done for all AMDs, Core2, and "Generic"
+The second one is done for: Atom, Pentium Pro, all AMDs, Pentium 4, Nocona,
+ Core 2, and "Generic"
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
new file mode 100644
index 0000000..fd13b02
--- /dev/null
+++ b/lib/Target/X86/X86.h
@@ -0,0 +1,84 @@
+//===-- X86.h - Top-level interface for X86 representation ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the x86
+// target library, as used by the LLVM JIT.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_X86_H
+#define TARGET_X86_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class X86TargetMachine;
+class FunctionPass;
+class MachineCodeEmitter;
+class JITCodeEmitter;
+class raw_ostream;
+
+/// createX86ISelDag - This pass converts a legalized DAG into a
+/// X86-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *createX86ISelDag(X86TargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+/// createX86FloatingPointStackifierPass - This function returns a pass which
+/// converts floating point register references and pseudo instructions into
+/// floating point stack references and physical instructions.
+///
+FunctionPass *createX86FloatingPointStackifierPass();
+
+/// createX87FPRegKillInserterPass - This function returns a pass which
+/// inserts FP_REG_KILL instructions where needed.
+///
+FunctionPass *createX87FPRegKillInserterPass();
+
+/// createX86CodePrinterPass - Returns a pass that prints the X86
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description.
+///
+FunctionPass *createX86CodePrinterPass(raw_ostream &o,
+ X86TargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose);
+
+/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
+/// to the specified MCE object.
+
+FunctionPass *createX86CodeEmitterPass(X86TargetMachine &TM,
+ MachineCodeEmitter &MCE);
+FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
+ JITCodeEmitter &JCE);
+
+/// createX86EmitCodeToMemory - Returns a pass that converts a register
+/// allocated function into raw machine code in a dynamically
+/// allocated chunk of memory.
+///
+FunctionPass *createEmitX86CodeToMemory();
+
+/// createX86MaxStackAlignmentCalculatorPass - This function returns a pass
+/// which calculates maximal stack alignment required for function
+///
+FunctionPass *createX86MaxStackAlignmentCalculatorPass();
+
+} // End llvm namespace
+
+// Defines symbolic names for X86 registers. This defines a mapping from
+// register name to register number.
+//
+#include "X86GenRegisterNames.inc"
+
+// Defines symbolic names for the X86 instructions.
+//
+#include "X86GenInstrNames.inc"
+
+#endif
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
new file mode 100644
index 0000000..8df138d
--- /dev/null
+++ b/lib/Target/X86/X86.td
@@ -0,0 +1,184 @@
+//===- X86.td - Target definition file for the Intel X86 ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a target description file for the Intel i386 architecture, refered to
+// here as the "X86" architecture.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing...
+//
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// X86 Subtarget features.
+//===----------------------------------------------------------------------===//
+
+def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
+ "Enable MMX instructions">;
+def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
+ "Enable SSE instructions",
+ [FeatureMMX]>;
+def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
+ "Enable SSE2 instructions",
+ [FeatureSSE1]>;
+def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
+ "Enable SSE3 instructions",
+ [FeatureSSE2]>;
+def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
+ "Enable SSSE3 instructions",
+ [FeatureSSE3]>;
+def FeatureSSE41 : SubtargetFeature<"sse41", "X86SSELevel", "SSE41",
+ "Enable SSE 4.1 instructions",
+ [FeatureSSSE3]>;
+def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
+ "Enable SSE 4.2 instructions",
+ [FeatureSSE41]>;
+def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
+ "Enable 3DNow! instructions">;
+def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
+ "Enable 3DNow! Athlon instructions",
+ [Feature3DNow]>;
+// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
+// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
+// without disabling 64-bit mode.
+def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
+ "Support 64-bit instructions">;
+def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
+ "Bit testing of memory is slow">;
+def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
+ "Support SSE 4a instructions">;
+
+//===----------------------------------------------------------------------===//
+// X86 processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+def : Proc<"i386", []>;
+def : Proc<"i486", []>;
+def : Proc<"i586", []>;
+def : Proc<"pentium", []>;
+def : Proc<"pentium-mmx", [FeatureMMX]>;
+def : Proc<"i686", []>;
+def : Proc<"pentiumpro", []>;
+def : Proc<"pentium2", [FeatureMMX]>;
+def : Proc<"pentium3", [FeatureSSE1]>;
+def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>;
+def : Proc<"pentium4", [FeatureSSE2]>;
+def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>;
+
+def : Proc<"k6", [FeatureMMX]>;
+def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
+def : Proc<"k6-3", [FeatureMMX, Feature3DNow]>;
+def : Proc<"athlon", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"k8", [FeatureSSE2, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"opteron", [FeatureSSE2, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"athlon64", [FeatureSSE2, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"athlon-fx", [FeatureSSE2, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"k8-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"amdfam10", [FeatureSSE3, FeatureSSE4A,
+ Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"barcelona", [FeatureSSE3, FeatureSSE4A,
+ Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
+
+def : Proc<"winchip-c6", [FeatureMMX]>;
+def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>;
+def : Proc<"c3", [FeatureMMX, Feature3DNow]>;
+def : Proc<"c3-2", [FeatureSSE1]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "X86RegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "X86InstrInfo.td"
+
+def X86InstrInfo : InstrInfo {
+
+ // Define how we want to layout our TargetSpecific information field... This
+ // should be kept up-to-date with the fields in the X86InstrInfo.h file.
+ let TSFlagsFields = ["FormBits",
+ "hasOpSizePrefix",
+ "hasAdSizePrefix",
+ "Prefix",
+ "hasREX_WPrefix",
+ "ImmTypeBits",
+ "FPFormBits",
+ "hasLockPrefix",
+ "SegOvrBits",
+ "Opcode"];
+ let TSFlagsShifts = [0,
+ 6,
+ 7,
+ 8,
+ 12,
+ 13,
+ 16,
+ 19,
+ 20,
+ 24];
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Conventions
+//===----------------------------------------------------------------------===//
+
+include "X86CallingConv.td"
+
+
+//===----------------------------------------------------------------------===//
+// Assembly Printers
+//===----------------------------------------------------------------------===//
+
+// The X86 target supports two different syntaxes for emitting machine code.
+// This is controlled by the -x86-asm-syntax={att|intel}
+def ATTAsmWriter : AsmWriter {
+ string AsmWriterClassName = "ATTAsmPrinter";
+ int Variant = 0;
+}
+def IntelAsmWriter : AsmWriter {
+ string AsmWriterClassName = "IntelAsmPrinter";
+ int Variant = 1;
+}
+
+
+def X86 : Target {
+ // Information about the instructions...
+ let InstructionSet = X86InstrInfo;
+
+ let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
+}
diff --git a/lib/Target/X86/X86COFF.h b/lib/Target/X86/X86COFF.h
new file mode 100644
index 0000000..0a8e4e6
--- /dev/null
+++ b/lib/Target/X86/X86COFF.h
@@ -0,0 +1,95 @@
+//===--- X86COFF.h - Some definitions from COFF documentations ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file just defines some symbols found in COFF documentation. They are
+// used to emit function type information for COFF targets (Cygwin/Mingw32).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86COFF_H
+#define X86COFF_H
+
+namespace COFF
+{
+/// Storage class tells where and what the symbol represents
+enum StorageClass {
+ C_EFCN = -1, ///< Physical end of function
+ C_NULL = 0, ///< No symbol
+ C_AUTO = 1, ///< External definition
+ C_EXT = 2, ///< External symbol
+ C_STAT = 3, ///< Static
+ C_REG = 4, ///< Register variable
+ C_EXTDEF = 5, ///< External definition
+ C_LABEL = 6, ///< Label
+ C_ULABEL = 7, ///< Undefined label
+ C_MOS = 8, ///< Member of structure
+ C_ARG = 9, ///< Function argument
+ C_STRTAG = 10, ///< Structure tag
+ C_MOU = 11, ///< Member of union
+ C_UNTAG = 12, ///< Union tag
+ C_TPDEF = 13, ///< Type definition
+ C_USTATIC = 14, ///< Undefined static
+ C_ENTAG = 15, ///< Enumeration tag
+ C_MOE = 16, ///< Member of enumeration
+ C_REGPARM = 17, ///< Register parameter
+ C_FIELD = 18, ///< Bit field
+
+ C_BLOCK = 100, ///< ".bb" or ".eb" - beginning or end of block
+ C_FCN = 101, ///< ".bf" or ".ef" - beginning or end of function
+ C_EOS = 102, ///< End of structure
+ C_FILE = 103, ///< File name
+ C_LINE = 104, ///< Line number, reformatted as symbol
+ C_ALIAS = 105, ///< Duplicate tag
+ C_HIDDEN = 106 ///< External symbol in dmert public lib
+};
+
+/// The type of the symbol. This is made up of a base type and a derived type.
+/// For example, pointer to int is "pointer to T" and "int"
+enum SymbolType {
+ T_NULL = 0, ///< No type info
+ T_ARG = 1, ///< Void function argument (only used by compiler)
+ T_VOID = 1, ///< The same as above. Just named differently in some specs.
+ T_CHAR = 2, ///< Character
+ T_SHORT = 3, ///< Short integer
+ T_INT = 4, ///< Integer
+ T_LONG = 5, ///< Long integer
+ T_FLOAT = 6, ///< Floating point
+ T_DOUBLE = 7, ///< Double word
+ T_STRUCT = 8, ///< Structure
+ T_UNION = 9, ///< Union
+ T_ENUM = 10, ///< Enumeration
+ T_MOE = 11, ///< Member of enumeration
+ T_UCHAR = 12, ///< Unsigned character
+ T_USHORT = 13, ///< Unsigned short
+ T_UINT = 14, ///< Unsigned integer
+ T_ULONG = 15 ///< Unsigned long
+};
+
+/// Derived type of symbol
+enum SymbolDerivedType {
+ DT_NON = 0, ///< No derived type
+ DT_PTR = 1, ///< Pointer to T
+ DT_FCN = 2, ///< Function returning T
+ DT_ARY = 3 ///< Array of T
+};
+
+/// Masks for extracting parts of type
+enum SymbolTypeMasks {
+ N_BTMASK = 017, ///< Mask for base type
+ N_TMASK = 060 ///< Mask for derived type
+};
+
+/// Offsets of parts of type
+enum Shifts {
+ N_BTSHFT = 4 ///< Type is formed as (base + derived << N_BTSHIFT)
+};
+
+}
+
+#endif // X86COFF_H
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
new file mode 100644
index 0000000..7f99203
--- /dev/null
+++ b/lib/Target/X86/X86CallingConv.td
@@ -0,0 +1,360 @@
+//===- X86CallingConv.td - Calling Conventions X86 32/64 ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the X86-32 and X86-64
+// architectures.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<X86Subtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Return-value conventions common to all X86 CC's.
+def RetCC_X86Common : CallingConv<[
+ // Scalar values are returned in AX first, then DX. For i8, the ABI
+ // requires the values to be in AL and AH, however this code uses AL and DL
+ // instead. This is because using AH for the second register conflicts with
+ // the way LLVM does multiple return values -- a return of {i16,i8} would end
+ // up in AX and AH, which overlap. Front-ends wishing to conform to the ABI
+ // for functions that return two i8 values are currently expected to pack the
+ // values into an i16 (which uses AX, and thus AL:AH).
+ CCIfType<[i8] , CCAssignToReg<[AL, DL]>>,
+ CCIfType<[i16], CCAssignToReg<[AX, DX]>>,
+ CCIfType<[i32], CCAssignToReg<[EAX, EDX]>>,
+ CCIfType<[i64], CCAssignToReg<[RAX, RDX]>>,
+
+ // Vector types are returned in XMM0 and XMM1, when they fit. XMMM2 and XMM3
+ // can only be used by ABI non-compliant code. If the target doesn't have XMM
+ // registers, it won't have vector types.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
+
+ // MMX vector types are always returned in MM0. If the target doesn't have
+ // MM0, it doesn't support these vector types.
+ CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], CCAssignToReg<[MM0]>>,
+
+ // Long double types are always returned in ST0 (even with SSE).
+ CCIfType<[f80], CCAssignToReg<[ST0, ST1]>>
+]>;
+
+// X86-32 C return-value convention.
+def RetCC_X86_32_C : CallingConv<[
+ // The X86-32 calling convention returns FP values in ST0, unless marked
+ // with "inreg" (used here to distinguish one kind of reg from another,
+ // weirdly; this is really the sse-regparm calling convention) in which
+ // case they use XMM0, otherwise it is the same as the common X86 calling
+ // conv.
+ CCIfInReg<CCIfSubtarget<"hasSSE2()",
+ CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
+ CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>,
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
+// X86-32 FastCC return-value convention.
+def RetCC_X86_32_Fast : CallingConv<[
+ // The X86-32 fastcc returns 1, 2, or 3 FP values in XMM0-2 if the target has
+ // SSE2, otherwise it is the the C calling conventions.
+ // This can happen when a float, 2 x float, or 3 x float vector is split by
+ // target lowering, and is returned in 1-3 sse regs.
+ CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+ CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
+// X86-64 C return-value convention.
+def RetCC_X86_64_C : CallingConv<[
+ // The X86-64 calling convention always returns FP values in XMM0.
+ CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>,
+ CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>,
+
+ // MMX vector types are always returned in XMM0 except for v1i64 which is
+ // returned in RAX. This disagrees with ABI documentation but is bug
+ // compatible with gcc.
+ CCIfType<[v1i64], CCAssignToReg<[RAX]>>,
+ CCIfType<[v8i8, v4i16, v2i32, v2f32], CCAssignToReg<[XMM0, XMM1]>>,
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
+// X86-Win64 C return-value convention.
+def RetCC_X86_Win64_C : CallingConv<[
+ // The X86-Win64 calling convention always returns __m64 values in RAX.
+ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[RAX]>>,
+
+ // And FP in XMM0 only.
+ CCIfType<[f32], CCAssignToReg<[XMM0]>>,
+ CCIfType<[f64], CCAssignToReg<[XMM0]>>,
+
+ // Otherwise, everything is the same as 'normal' X86-64 C CC.
+ CCDelegateTo<RetCC_X86_64_C>
+]>;
+
+
+// This is the root return-value convention for the X86-32 backend.
+def RetCC_X86_32 : CallingConv<[
+ // If FastCC, use RetCC_X86_32_Fast.
+ CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
+ // Otherwise, use RetCC_X86_32_C.
+ CCDelegateTo<RetCC_X86_32_C>
+]>;
+
+// This is the root return-value convention for the X86-64 backend.
+def RetCC_X86_64 : CallingConv<[
+ // Mingw64 and native Win64 use Win64 CC
+ CCIfSubtarget<"isTargetWin64()", CCDelegateTo<RetCC_X86_Win64_C>>,
+
+ // Otherwise, drop to normal X86-64 CC
+ CCDelegateTo<RetCC_X86_64_C>
+]>;
+
+// This is the return-value convention used for the entire X86 backend.
+def RetCC_X86 : CallingConv<[
+ CCIfSubtarget<"is64Bit()", CCDelegateTo<RetCC_X86_64>>,
+ CCDelegateTo<RetCC_X86_32>
+]>;
+
+//===----------------------------------------------------------------------===//
+// X86-64 Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+
+def CC_X86_64_C : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<8, 8>>,
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in R10.
+ CCIfNest<CCAssignToReg<[R10]>>,
+
+ // The first 6 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,
+ CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,
+
+ // The first 8 FP/Vector arguments are passed in XMM registers.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfSubtarget<"hasSSE1()",
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
+
+ // The first 8 MMX (except for v1i64) vector arguments are passed in XMM
+ // registers on Darwin.
+ CCIfType<[v8i8, v4i16, v2i32, v2f32],
+ CCIfSubtarget<"isTargetDarwin()",
+ CCIfSubtarget<"hasSSE2()",
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>>,
+
+ // The first 8 v1i64 vector arguments are passed in GPRs on Darwin.
+ CCIfType<[v1i64],
+ CCIfSubtarget<"isTargetDarwin()",
+ CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>,
+
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+ // Long doubles get stack slots whose size and alignment depends on the
+ // subtarget.
+ CCIfType<[f80], CCAssignToStack<0, 0>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+
+ // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
+ CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], CCAssignToStack<8, 8>>
+]>;
+
+// Calling convention used on Win64
+def CC_X86_Win64_C : CallingConv<[
+ // FIXME: Handle byval stuff.
+ // FIXME: Handle varargs.
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in R10.
+ CCIfNest<CCAssignToReg<[R10]>>,
+
+ // The first 4 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ],
+ [XMM0, XMM1, XMM2, XMM3]>>,
+ CCIfType<[i64], CCAssignToRegWithShadow<[RCX , RDX , R8 , R9 ],
+ [XMM0, XMM1, XMM2, XMM3]>>,
+
+ // The first 4 FP/Vector arguments are passed in XMM registers.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToRegWithShadow<[XMM0, XMM1, XMM2, XMM3],
+ [RCX , RDX , R8 , R9 ]>>,
+
+ // The first 4 MMX vector arguments are passed in GPRs.
+ CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32],
+ CCAssignToRegWithShadow<[RCX , RDX , R8 , R9 ],
+ [XMM0, XMM1, XMM2, XMM3]>>,
+
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 16-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 16>>,
+
+ // Long doubles get stack slots whose size and alignment depends on the
+ // subtarget.
+ CCIfType<[f80], CCAssignToStack<0, 0>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+
+ // __m64 vectors get 8-byte stack slots that are 16-byte aligned.
+ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 16>>
+]>;
+
+// Tail call convention (fast): One register is reserved for target address,
+// namely R9
+def CC_X86_64_TailCall : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<8, 8>>,
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in R10.
+ CCIfNest<CCAssignToReg<[R10]>>,
+
+ // The first 6 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D]>>,
+ CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>,
+
+ // The first 8 FP/Vector arguments are passed in XMM registers.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfSubtarget<"hasSSE1()",
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
+
+ // The first 8 MMX (except for v1i64) vector arguments are passed in XMM
+ // registers on Darwin.
+ CCIfType<[v8i8, v4i16, v2i32, v2f32],
+ CCIfSubtarget<"isTargetDarwin()",
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
+
+ // The first 8 v1i64 vector arguments are passed in GPRs on Darwin.
+ CCIfType<[v1i64],
+ CCIfSubtarget<"isTargetDarwin()",
+ CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>,
+
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+
+ // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
+ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// X86 C Calling Convention
+//===----------------------------------------------------------------------===//
+
+/// CC_X86_32_Common - In all X86-32 calling conventions, extra integers and FP
+/// values are spilled on the stack, and the first 4 vector values go in XMM
+/// regs.
+def CC_X86_32_Common : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ // The first 3 float or double arguments, if marked 'inreg' and if the call
+ // is not a vararg call and if SSE2 is available, are passed in SSE registers.
+ CCIfNotVarArg<CCIfInReg<CCIfType<[f32,f64],
+ CCIfSubtarget<"hasSSE2()",
+ CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>,
+
+ // The first 3 __m64 (except for v1i64) vector arguments are passed in mmx
+ // registers if the call is not a vararg call.
+ CCIfNotVarArg<CCIfType<[v8i8, v4i16, v2i32, v2f32],
+ CCAssignToReg<[MM0, MM1, MM2]>>>,
+
+ // Integer/Float values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+
+ // Doubles get 8-byte slots that are 4-byte aligned.
+ CCIfType<[f64], CCAssignToStack<8, 4>>,
+
+ // Long doubles get slots whose size depends on the subtarget.
+ CCIfType<[f80], CCAssignToStack<0, 4>>,
+
+ // The first 4 SSE vector arguments are passed in XMM registers.
+ CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>>,
+
+ // Other SSE vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+
+ // __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are
+ // passed in the parameter area.
+ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 4>>]>;
+
+def CC_X86_32_C : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in ECX.
+ CCIfNest<CCAssignToReg<[ECX]>>,
+
+ // The first 3 integer arguments, if marked 'inreg' and if the call is not
+ // a vararg call, are passed in integer registers.
+ CCIfNotVarArg<CCIfInReg<CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_FastCall : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in EAX.
+ CCIfNest<CCAssignToReg<[EAX]>>,
+
+ // The first 2 integer arguments are passed in ECX/EDX
+ CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_FastCC : CallingConv<[
+ // Handles byval parameters. Note that we can't rely on the delegation
+ // to CC_X86_32_Common for this because that happens after code that
+ // puts arguments in registers.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in EAX.
+ CCIfNest<CCAssignToReg<[EAX]>>,
+
+ // The first 2 integer arguments are passed in ECX/EDX
+ CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>,
+
+ // The first 3 float or double arguments, if the call is not a vararg
+ // call and if SSE2 is available, are passed in SSE registers.
+ CCIfNotVarArg<CCIfType<[f32,f64],
+ CCIfSubtarget<"hasSSE2()",
+ CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
+
+ // Doubles get 8-byte slots that are 8-byte aligned.
+ CCIfType<[f64], CCAssignToStack<8, 8>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
new file mode 100644
index 0000000..e988a5c
--- /dev/null
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -0,0 +1,811 @@
+//===-- X86/X86CodeEmitter.cpp - Convert X86 code to machine code ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the X86 machine instructions into
+// relocatable machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-emitter"
+#include "X86InstrInfo.h"
+#include "X86JITInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "X86Relocations.h"
+#include "X86.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+template<class CodeEmitter>
+ class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass {
+ const X86InstrInfo *II;
+ const TargetData *TD;
+ X86TargetMachine &TM;
+ CodeEmitter &MCE;
+ intptr_t PICBaseOffset;
+ bool Is64BitMode;
+ bool IsPIC;
+ public:
+ static char ID;
+ explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce)
+ : MachineFunctionPass(&ID), II(0), TD(0), TM(tm),
+ MCE(mce), PICBaseOffset(0), Is64BitMode(false),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+ Emitter(X86TargetMachine &tm, CodeEmitter &mce,
+ const X86InstrInfo &ii, const TargetData &td, bool is64)
+ : MachineFunctionPass(&ID), II(&ii), TD(&td), TM(tm),
+ MCE(mce), PICBaseOffset(0), Is64BitMode(is64),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "X86 Machine Code Emitter";
+ }
+
+ void emitInstruction(const MachineInstr &MI,
+ const TargetInstrDesc *Desc);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineModuleInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
+ void emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
+ intptr_t Disp = 0, intptr_t PCAdj = 0,
+ bool NeedStub = false, bool Indirect = false);
+ void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
+ void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0,
+ intptr_t PCAdj = 0);
+ void emitJumpTableAddress(unsigned JTI, unsigned Reloc,
+ intptr_t PCAdj = 0);
+
+ void emitDisplacementField(const MachineOperand *RelocOp, int DispVal,
+ intptr_t PCAdj = 0);
+
+ void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField);
+ void emitRegModRMByte(unsigned RegOpcodeField);
+ void emitSIBByte(unsigned SS, unsigned Index, unsigned Base);
+ void emitConstant(uint64_t Val, unsigned Size);
+
+ void emitMemModRMByte(const MachineInstr &MI,
+ unsigned Op, unsigned RegOpcodeField,
+ intptr_t PCAdj = 0);
+
+ unsigned getX86RegNum(unsigned RegNo) const;
+
+ bool gvNeedsNonLazyPtr(const GlobalValue *GV);
+ };
+
+template<class CodeEmitter>
+ char Emitter<CodeEmitter>::ID = 0;
+}
+
+/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
+/// to the specified templated MachineCodeEmitter object.
+
+namespace llvm {
+
+FunctionPass *createX86CodeEmitterPass(X86TargetMachine &TM,
+ MachineCodeEmitter &MCE) {
+ return new Emitter<MachineCodeEmitter>(TM, MCE);
+}
+FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new Emitter<JITCodeEmitter>(TM, JCE);
+}
+
+} // end namespace llvm
+
+template<class CodeEmitter>
+bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
+
+ MCE.setModuleInfo(&getAnalysis<MachineModuleInfo>());
+
+ II = TM.getInstrInfo();
+ TD = TM.getTargetData();
+ Is64BitMode = TM.getSubtarget<X86Subtarget>().is64Bit();
+ IsPIC = TM.getRelocationModel() == Reloc::PIC_;
+
+ do {
+ DOUT << "JITTing function '" << MF.getFunction()->getName() << "'\n";
+ MCE.startFunction(MF);
+ for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+ MBB != E; ++MBB) {
+ MCE.StartMachineBasicBlock(MBB);
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ const TargetInstrDesc &Desc = I->getDesc();
+ emitInstruction(*I, &Desc);
+ // MOVPC32r is basically a call plus a pop instruction.
+ if (Desc.getOpcode() == X86::MOVPC32r)
+ emitInstruction(*I, &II->get(X86::POP32r));
+ NumEmitted++; // Keep track of the # of mi's emitted
+ }
+ }
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+/// emitPCRelativeBlockAddress - This method keeps track of the information
+/// necessary to resolve the address of this block later and emits a dummy
+/// value.
+///
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) {
+ // Remember where this reference was and where it is to so we can
+ // deal with it later.
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ X86::reloc_pcrel_word, MBB));
+ MCE.emitWordLE(0);
+}
+
+/// emitGlobalAddress - Emit the specified address to the code stream assuming
+/// this is part of a "take the address of a global" instruction.
+///
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
+ intptr_t Disp /* = 0 */,
+ intptr_t PCAdj /* = 0 */,
+ bool NeedStub /* = false */,
+ bool Indirect /* = false */) {
+ intptr_t RelocCST = 0;
+ if (Reloc == X86::reloc_picrel_word)
+ RelocCST = PICBaseOffset;
+ else if (Reloc == X86::reloc_pcrel_word)
+ RelocCST = PCAdj;
+ MachineRelocation MR = Indirect
+ ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
+ GV, RelocCST, NeedStub)
+ : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+ GV, RelocCST, NeedStub);
+ MCE.addRelocation(MR);
+ // The relocated value will be added to the displacement
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitDWordLE(Disp);
+ else
+ MCE.emitWordLE((int32_t)Disp);
+}
+
+/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitExternalSymbolAddress(const char *ES,
+ unsigned Reloc) {
+ intptr_t RelocCST = (Reloc == X86::reloc_picrel_word) ? PICBaseOffset : 0;
+ MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, ES, RelocCST));
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitDWordLE(0);
+ else
+ MCE.emitWordLE(0);
+}
+
+/// emitConstPoolAddress - Arrange for the address of an constant pool
+/// to be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitConstPoolAddress(unsigned CPI, unsigned Reloc,
+ intptr_t Disp /* = 0 */,
+ intptr_t PCAdj /* = 0 */) {
+ intptr_t RelocCST = 0;
+ if (Reloc == X86::reloc_picrel_word)
+ RelocCST = PICBaseOffset;
+ else if (Reloc == X86::reloc_pcrel_word)
+ RelocCST = PCAdj;
+ MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, CPI, RelocCST));
+ // The relocated value will be added to the displacement
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitDWordLE(Disp);
+ else
+ MCE.emitWordLE((int32_t)Disp);
+}
+
+/// emitJumpTableAddress - Arrange for the address of a jump table to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTI, unsigned Reloc,
+ intptr_t PCAdj /* = 0 */) {
+ intptr_t RelocCST = 0;
+ if (Reloc == X86::reloc_picrel_word)
+ RelocCST = PICBaseOffset;
+ else if (Reloc == X86::reloc_pcrel_word)
+ RelocCST = PCAdj;
+ MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ Reloc, JTI, RelocCST));
+ // The relocated value will be added to the displacement
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitDWordLE(0);
+ else
+ MCE.emitWordLE(0);
+}
+
+template<class CodeEmitter>
+unsigned Emitter<CodeEmitter>::getX86RegNum(unsigned RegNo) const {
+ return II->getRegisterInfo().getX86RegNum(RegNo);
+}
+
+inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
+ unsigned RM) {
+ assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
+ return RM | (RegOpcode << 3) | (Mod << 6);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitRegModRMByte(unsigned ModRMReg,
+ unsigned RegOpcodeFld){
+ MCE.emitByte(ModRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg)));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitRegModRMByte(unsigned RegOpcodeFld) {
+ MCE.emitByte(ModRMByte(3, RegOpcodeFld, 0));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitSIBByte(unsigned SS,
+ unsigned Index,
+ unsigned Base) {
+ // SIB byte is in the same format as the ModRMByte...
+ MCE.emitByte(ModRMByte(SS, Index, Base));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitConstant(uint64_t Val, unsigned Size) {
+ // Output the constant in little endian byte order...
+ for (unsigned i = 0; i != Size; ++i) {
+ MCE.emitByte(Val & 255);
+ Val >>= 8;
+ }
+}
+
+/// isDisp8 - Return true if this signed displacement fits in a 8-bit
+/// sign-extended field.
+static bool isDisp8(int Value) {
+ return Value == (signed char)Value;
+}
+
+template<class CodeEmitter>
+bool Emitter<CodeEmitter>::gvNeedsNonLazyPtr(const GlobalValue *GV) {
+ // For Darwin, simulate the linktime GOT by using the same non-lazy-pointer
+ // mechanism as 32-bit mode.
+ return (!Is64BitMode || TM.getSubtarget<X86Subtarget>().isTargetDarwin()) &&
+ TM.getSubtarget<X86Subtarget>().GVRequiresExtraLoad(GV, TM, false);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp,
+ int DispVal, intptr_t PCAdj) {
+ // If this is a simple integer displacement that doesn't require a relocation,
+ // emit it now.
+ if (!RelocOp) {
+ emitConstant(DispVal, 4);
+ return;
+ }
+
+ // Otherwise, this is something that requires a relocation. Emit it as such
+ // now.
+ if (RelocOp->isGlobal()) {
+ // In 64-bit static small code model, we could potentially emit absolute.
+ // But it's probably not beneficial.
+ // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative
+ // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+ bool NeedStub = isa<Function>(RelocOp->getGlobal());
+ bool Indirect = gvNeedsNonLazyPtr(RelocOp->getGlobal());
+ emitGlobalAddress(RelocOp->getGlobal(), rt, RelocOp->getOffset(),
+ PCAdj, NeedStub, Indirect);
+ } else if (RelocOp->isCPI()) {
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word : X86::reloc_picrel_word;
+ emitConstPoolAddress(RelocOp->getIndex(), rt,
+ RelocOp->getOffset(), PCAdj);
+ } else if (RelocOp->isJTI()) {
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word : X86::reloc_picrel_word;
+ emitJumpTableAddress(RelocOp->getIndex(), rt, PCAdj);
+ } else {
+ assert(0 && "Unknown value to relocate!");
+ }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
+ unsigned Op, unsigned RegOpcodeField,
+ intptr_t PCAdj) {
+ const MachineOperand &Op3 = MI.getOperand(Op+3);
+ int DispVal = 0;
+ const MachineOperand *DispForReloc = 0;
+
+ // Figure out what sort of displacement we have to handle here.
+ if (Op3.isGlobal()) {
+ DispForReloc = &Op3;
+ } else if (Op3.isCPI()) {
+ if (Is64BitMode || IsPIC) {
+ DispForReloc = &Op3;
+ } else {
+ DispVal += MCE.getConstantPoolEntryAddress(Op3.getIndex());
+ DispVal += Op3.getOffset();
+ }
+ } else if (Op3.isJTI()) {
+ if (Is64BitMode || IsPIC) {
+ DispForReloc = &Op3;
+ } else {
+ DispVal += MCE.getJumpTableEntryAddress(Op3.getIndex());
+ }
+ } else {
+ DispVal = Op3.getImm();
+ }
+
+ const MachineOperand &Base = MI.getOperand(Op);
+ const MachineOperand &Scale = MI.getOperand(Op+1);
+ const MachineOperand &IndexReg = MI.getOperand(Op+2);
+
+ unsigned BaseReg = Base.getReg();
+
+ // Is a SIB byte needed?
+ if ((!Is64BitMode || DispForReloc || BaseReg != 0) &&
+ IndexReg.getReg() == 0 &&
+ (BaseReg == 0 || getX86RegNum(BaseReg) != N86::ESP)) {
+ if (BaseReg == 0) { // Just a displacement?
+ // Emit special case [disp32] encoding
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
+
+ emitDisplacementField(DispForReloc, DispVal, PCAdj);
+ } else {
+ unsigned BaseRegNo = getX86RegNum(BaseReg);
+ if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
+ // Emit simple indirect register encoding... [EAX] f.e.
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo));
+ } else if (!DispForReloc && isDisp8(DispVal)) {
+ // Emit the disp8 encoding... [REG+disp8]
+ MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo));
+ emitConstant(DispVal, 1);
+ } else {
+ // Emit the most general non-SIB encoding: [REG+disp32]
+ MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo));
+ emitDisplacementField(DispForReloc, DispVal, PCAdj);
+ }
+ }
+
+ } else { // We need a SIB byte, so start by outputting the ModR/M byte first
+ assert(IndexReg.getReg() != X86::ESP &&
+ IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
+
+ bool ForceDisp32 = false;
+ bool ForceDisp8 = false;
+ if (BaseReg == 0) {
+ // If there is no base register, we emit the special case SIB byte with
+ // MOD=0, BASE=5, to JUST get the index, scale, and displacement.
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
+ ForceDisp32 = true;
+ } else if (DispForReloc) {
+ // Emit the normal disp32 encoding.
+ MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
+ ForceDisp32 = true;
+ } else if (DispVal == 0 && getX86RegNum(BaseReg) != N86::EBP) {
+ // Emit no displacement ModR/M byte
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
+ } else if (isDisp8(DispVal)) {
+ // Emit the disp8 encoding...
+ MCE.emitByte(ModRMByte(1, RegOpcodeField, 4));
+ ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP
+ } else {
+ // Emit the normal disp32 encoding...
+ MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
+ }
+
+ // Calculate what the SS field value should be...
+ static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 };
+ unsigned SS = SSTable[Scale.getImm()];
+
+ if (BaseReg == 0) {
+ // Handle the SIB byte for the case where there is no base. The
+ // displacement has already been output.
+ unsigned IndexRegNo;
+ if (IndexReg.getReg())
+ IndexRegNo = getX86RegNum(IndexReg.getReg());
+ else
+ IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
+ emitSIBByte(SS, IndexRegNo, 5);
+ } else {
+ unsigned BaseRegNo = getX86RegNum(BaseReg);
+ unsigned IndexRegNo;
+ if (IndexReg.getReg())
+ IndexRegNo = getX86RegNum(IndexReg.getReg());
+ else
+ IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
+ emitSIBByte(SS, IndexRegNo, BaseRegNo);
+ }
+
+ // Do we need to output a displacement?
+ if (ForceDisp8) {
+ emitConstant(DispVal, 1);
+ } else if (DispVal != 0 || ForceDisp32) {
+ emitDisplacementField(DispForReloc, DispVal, PCAdj);
+ }
+ }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitInstruction(
+ const MachineInstr &MI,
+ const TargetInstrDesc *Desc) {
+ DOUT << MI;
+
+ unsigned Opcode = Desc->Opcode;
+
+ // Emit the lock opcode prefix as needed.
+ if (Desc->TSFlags & X86II::LOCK) MCE.emitByte(0xF0);
+
+ // Emit segment override opcode prefix as needed.
+ switch (Desc->TSFlags & X86II::SegOvrMask) {
+ case X86II::FS:
+ MCE.emitByte(0x64);
+ break;
+ case X86II::GS:
+ MCE.emitByte(0x65);
+ break;
+ default: assert(0 && "Invalid segment!");
+ case 0: break; // No segment override!
+ }
+
+ // Emit the repeat opcode prefix as needed.
+ if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) MCE.emitByte(0xF3);
+
+ // Emit the operand size opcode prefix as needed.
+ if (Desc->TSFlags & X86II::OpSize) MCE.emitByte(0x66);
+
+ // Emit the address size opcode prefix as needed.
+ if (Desc->TSFlags & X86II::AdSize) MCE.emitByte(0x67);
+
+ bool Need0FPrefix = false;
+ switch (Desc->TSFlags & X86II::Op0Mask) {
+ case X86II::TB: // Two-byte opcode prefix
+ case X86II::T8: // 0F 38
+ case X86II::TA: // 0F 3A
+ Need0FPrefix = true;
+ break;
+ case X86II::REP: break; // already handled.
+ case X86II::XS: // F3 0F
+ MCE.emitByte(0xF3);
+ Need0FPrefix = true;
+ break;
+ case X86II::XD: // F2 0F
+ MCE.emitByte(0xF2);
+ Need0FPrefix = true;
+ break;
+ case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
+ case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
+ MCE.emitByte(0xD8+
+ (((Desc->TSFlags & X86II::Op0Mask)-X86II::D8)
+ >> X86II::Op0Shift));
+ break; // Two-byte opcode prefix
+ default: assert(0 && "Invalid prefix!");
+ case 0: break; // No prefix!
+ }
+
+ if (Is64BitMode) {
+ // REX prefix
+ unsigned REX = X86InstrInfo::determineREX(MI);
+ if (REX)
+ MCE.emitByte(0x40 | REX);
+ }
+
+ // 0x0F escape code must be emitted just before the opcode.
+ if (Need0FPrefix)
+ MCE.emitByte(0x0F);
+
+ switch (Desc->TSFlags & X86II::Op0Mask) {
+ case X86II::T8: // 0F 38
+ MCE.emitByte(0x38);
+ break;
+ case X86II::TA: // 0F 3A
+ MCE.emitByte(0x3A);
+ break;
+ }
+
+ // If this is a two-address instruction, skip one of the register operands.
+ unsigned NumOps = Desc->getNumOperands();
+ unsigned CurOp = 0;
+ if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1)
+ ++CurOp;
+ else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0)
+ // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
+ --NumOps;
+
+ unsigned char BaseOpcode = II->getBaseOpcodeFor(Desc);
+ switch (Desc->TSFlags & X86II::FormMask) {
+ default: assert(0 && "Unknown FormMask value in X86 MachineCodeEmitter!");
+ case X86II::Pseudo:
+ // Remember the current PC offset, this is the PIC relocation
+ // base address.
+ switch (Opcode) {
+ default:
+ assert(0 && "psuedo instructions should be removed before code emission");
+ break;
+ case TargetInstrInfo::INLINEASM: {
+ // We allow inline assembler nodes with empty bodies - they can
+ // implicitly define registers, which is ok for JIT.
+ if (MI.getOperand(0).getSymbolName()[0]) {
+ assert(0 && "JIT does not support inline asm!\n");
+ abort();
+ }
+ break;
+ }
+ case TargetInstrInfo::DBG_LABEL:
+ case TargetInstrInfo::EH_LABEL:
+ MCE.emitLabel(MI.getOperand(0).getImm());
+ break;
+ case TargetInstrInfo::IMPLICIT_DEF:
+ case TargetInstrInfo::DECLARE:
+ case X86::DWARF_LOC:
+ case X86::FP_REG_KILL:
+ break;
+ case X86::MOVPC32r: {
+ // This emits the "call" portion of this pseudo instruction.
+ MCE.emitByte(BaseOpcode);
+ emitConstant(0, X86InstrInfo::sizeOfImm(Desc));
+ // Remember PIC base.
+ PICBaseOffset = (intptr_t) MCE.getCurrentPCOffset();
+ X86JITInfo *JTI = TM.getJITInfo();
+ JTI->setPICBase(MCE.getCurrentPCValue());
+ break;
+ }
+ }
+ CurOp = NumOps;
+ break;
+ case X86II::RawFrm:
+ MCE.emitByte(BaseOpcode);
+
+ if (CurOp != NumOps) {
+ const MachineOperand &MO = MI.getOperand(CurOp++);
+
+ DOUT << "RawFrm CurOp " << CurOp << "\n";
+ DOUT << "isMBB " << MO.isMBB() << "\n";
+ DOUT << "isGlobal " << MO.isGlobal() << "\n";
+ DOUT << "isSymbol " << MO.isSymbol() << "\n";
+ DOUT << "isImm " << MO.isImm() << "\n";
+
+ if (MO.isMBB()) {
+ emitPCRelativeBlockAddress(MO.getMBB());
+ } else if (MO.isGlobal()) {
+ // Assume undefined functions may be outside the Small codespace.
+ bool NeedStub =
+ (Is64BitMode &&
+ (TM.getCodeModel() == CodeModel::Large ||
+ TM.getSubtarget<X86Subtarget>().isTargetDarwin())) ||
+ Opcode == X86::TAILJMPd;
+ emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word,
+ MO.getOffset(), 0, NeedStub);
+ } else if (MO.isSymbol()) {
+ emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word);
+ } else if (MO.isImm()) {
+ if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) {
+ // Fix up immediate operand for pc relative calls.
+ intptr_t Imm = (intptr_t)MO.getImm();
+ Imm = Imm - MCE.getCurrentPCValue() - 4;
+ emitConstant(Imm, X86InstrInfo::sizeOfImm(Desc));
+ } else
+ emitConstant(MO.getImm(), X86InstrInfo::sizeOfImm(Desc));
+ } else {
+ assert(0 && "Unknown RawFrm operand!");
+ }
+ }
+ break;
+
+ case X86II::AddRegFrm:
+ MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg()));
+
+ if (CurOp != NumOps) {
+ const MachineOperand &MO1 = MI.getOperand(CurOp++);
+ unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+ if (MO1.isImm())
+ emitConstant(MO1.getImm(), Size);
+ else {
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+ // This should not occur on Darwin for relocatable objects.
+ if (Opcode == X86::MOV64ri)
+ rt = X86::reloc_absolute_dword; // FIXME: add X86II flag?
+ if (MO1.isGlobal()) {
+ bool NeedStub = isa<Function>(MO1.getGlobal());
+ bool Indirect = gvNeedsNonLazyPtr(MO1.getGlobal());
+ emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
+ NeedStub, Indirect);
+ } else if (MO1.isSymbol())
+ emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+ else if (MO1.isCPI())
+ emitConstPoolAddress(MO1.getIndex(), rt);
+ else if (MO1.isJTI())
+ emitJumpTableAddress(MO1.getIndex(), rt);
+ }
+ }
+ break;
+
+ case X86II::MRMDestReg: {
+ MCE.emitByte(BaseOpcode);
+ emitRegModRMByte(MI.getOperand(CurOp).getReg(),
+ getX86RegNum(MI.getOperand(CurOp+1).getReg()));
+ CurOp += 2;
+ if (CurOp != NumOps)
+ emitConstant(MI.getOperand(CurOp++).getImm(), X86InstrInfo::sizeOfImm(Desc));
+ break;
+ }
+ case X86II::MRMDestMem: {
+ MCE.emitByte(BaseOpcode);
+ emitMemModRMByte(MI, CurOp,
+ getX86RegNum(MI.getOperand(CurOp + X86AddrNumOperands)
+ .getReg()));
+ CurOp += X86AddrNumOperands + 1;
+ if (CurOp != NumOps)
+ emitConstant(MI.getOperand(CurOp++).getImm(), X86InstrInfo::sizeOfImm(Desc));
+ break;
+ }
+
+ case X86II::MRMSrcReg:
+ MCE.emitByte(BaseOpcode);
+ emitRegModRMByte(MI.getOperand(CurOp+1).getReg(),
+ getX86RegNum(MI.getOperand(CurOp).getReg()));
+ CurOp += 2;
+ if (CurOp != NumOps)
+ emitConstant(MI.getOperand(CurOp++).getImm(),
+ X86InstrInfo::sizeOfImm(Desc));
+ break;
+
+ case X86II::MRMSrcMem: {
+ // FIXME: Maybe lea should have its own form?
+ int AddrOperands;
+ if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
+ Opcode == X86::LEA16r || Opcode == X86::LEA32r)
+ AddrOperands = X86AddrNumOperands - 1; // No segment register
+ else
+ AddrOperands = X86AddrNumOperands;
+
+ intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ?
+ X86InstrInfo::sizeOfImm(Desc) : 0;
+
+ MCE.emitByte(BaseOpcode);
+ emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg()),
+ PCAdj);
+ CurOp += AddrOperands + 1;
+ if (CurOp != NumOps)
+ emitConstant(MI.getOperand(CurOp++).getImm(),
+ X86InstrInfo::sizeOfImm(Desc));
+ break;
+ }
+
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r: {
+ MCE.emitByte(BaseOpcode);
+
+ // Special handling of lfence, mfence, monitor, and mwait.
+ if (Desc->getOpcode() == X86::LFENCE ||
+ Desc->getOpcode() == X86::MFENCE ||
+ Desc->getOpcode() == X86::MONITOR ||
+ Desc->getOpcode() == X86::MWAIT) {
+ emitRegModRMByte((Desc->TSFlags & X86II::FormMask)-X86II::MRM0r);
+
+ switch (Desc->getOpcode()) {
+ default: break;
+ case X86::MONITOR:
+ MCE.emitByte(0xC8);
+ break;
+ case X86::MWAIT:
+ MCE.emitByte(0xC9);
+ break;
+ }
+ } else {
+ emitRegModRMByte(MI.getOperand(CurOp++).getReg(),
+ (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r);
+ }
+
+ if (CurOp != NumOps) {
+ const MachineOperand &MO1 = MI.getOperand(CurOp++);
+ unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+ if (MO1.isImm())
+ emitConstant(MO1.getImm(), Size);
+ else {
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+ if (Opcode == X86::MOV64ri32)
+ rt = X86::reloc_absolute_word; // FIXME: add X86II flag?
+ if (MO1.isGlobal()) {
+ bool NeedStub = isa<Function>(MO1.getGlobal());
+ bool Indirect = gvNeedsNonLazyPtr(MO1.getGlobal());
+ emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
+ NeedStub, Indirect);
+ } else if (MO1.isSymbol())
+ emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+ else if (MO1.isCPI())
+ emitConstPoolAddress(MO1.getIndex(), rt);
+ else if (MO1.isJTI())
+ emitJumpTableAddress(MO1.getIndex(), rt);
+ }
+ }
+ break;
+ }
+
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m: {
+ intptr_t PCAdj = (CurOp + X86AddrNumOperands != NumOps) ?
+ (MI.getOperand(CurOp+X86AddrNumOperands).isImm() ?
+ X86InstrInfo::sizeOfImm(Desc) : 4) : 0;
+
+ MCE.emitByte(BaseOpcode);
+ emitMemModRMByte(MI, CurOp, (Desc->TSFlags & X86II::FormMask)-X86II::MRM0m,
+ PCAdj);
+ CurOp += X86AddrNumOperands;
+
+ if (CurOp != NumOps) {
+ const MachineOperand &MO = MI.getOperand(CurOp++);
+ unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+ if (MO.isImm())
+ emitConstant(MO.getImm(), Size);
+ else {
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+ if (Opcode == X86::MOV64mi32)
+ rt = X86::reloc_absolute_word; // FIXME: add X86II flag?
+ if (MO.isGlobal()) {
+ bool NeedStub = isa<Function>(MO.getGlobal());
+ bool Indirect = gvNeedsNonLazyPtr(MO.getGlobal());
+ emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0,
+ NeedStub, Indirect);
+ } else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), rt);
+ else if (MO.isCPI())
+ emitConstPoolAddress(MO.getIndex(), rt);
+ else if (MO.isJTI())
+ emitJumpTableAddress(MO.getIndex(), rt);
+ }
+ }
+ break;
+ }
+
+ case X86II::MRMInitReg:
+ MCE.emitByte(BaseOpcode);
+ // Duplicate register, used by things like MOV8r0 (aka xor reg,reg).
+ emitRegModRMByte(MI.getOperand(CurOp).getReg(),
+ getX86RegNum(MI.getOperand(CurOp).getReg()));
+ ++CurOp;
+ break;
+ }
+
+ if (!Desc->isVariadic() && CurOp != NumOps) {
+ cerr << "Cannot encode: ";
+ MI.dump();
+ cerr << '\n';
+ abort();
+ }
+}
+
diff --git a/lib/Target/X86/X86CompilationCallback_Win64.asm b/lib/Target/X86/X86CompilationCallback_Win64.asm
new file mode 100644
index 0000000..8002f98
--- /dev/null
+++ b/lib/Target/X86/X86CompilationCallback_Win64.asm
@@ -0,0 +1,67 @@
+;;===-- X86CompilationCallback_Win64.asm - Implement Win64 JIT callback ---===
+;;
+;; The LLVM Compiler Infrastructure
+;;
+;; This file is distributed under the University of Illinois Open Source
+;; License. See LICENSE.TXT for details.
+;;
+;;===----------------------------------------------------------------------===
+;;
+;; This file implements the JIT interfaces for the X86 target.
+;;
+;;===----------------------------------------------------------------------===
+
+extrn X86CompilationCallback2: PROC
+
+.code
+X86CompilationCallback proc
+ push rbp
+
+ ; Save RSP
+ mov rbp, rsp
+
+ ; Save all int arg registers
+ push rcx
+ push rdx
+ push r8
+ push r9
+
+ ; Align stack on 16-byte boundary.
+ and rsp, -16
+
+ ; Save all XMM arg registers
+ sub rsp, 64
+ movaps [rsp], xmm0
+ movaps [rsp+16], xmm1
+ movaps [rsp+32], xmm2
+ movaps [rsp+48], xmm3
+
+ ; JIT callee
+
+ ; Pass prev frame and return address
+ mov rcx, rbp
+ mov rdx, qword ptr [rbp+8]
+ call X86CompilationCallback2
+
+ ; Restore all XMM arg registers
+ movaps xmm3, [rsp+48]
+ movaps xmm2, [rsp+32]
+ movaps xmm1, [rsp+16]
+ movaps xmm0, [rsp]
+
+ ; Restore RSP
+ mov rsp, rbp
+
+ ; Restore all int arg registers
+ sub rsp, 32
+ pop r9
+ pop r8
+ pop rdx
+ pop rcx
+
+ ; Restore RBP
+ pop rbp
+ ret
+X86CompilationCallback endp
+
+End
diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp
new file mode 100644
index 0000000..4c3cc82
--- /dev/null
+++ b/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -0,0 +1,18 @@
+//===-- X86ELFWriterInfo.cpp - ELF Writer Info for the X86 backend --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the X86 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86ELFWriterInfo.h"
+using namespace llvm;
+
+X86ELFWriterInfo::X86ELFWriterInfo() : TargetELFWriterInfo(EM_386) {}
+X86ELFWriterInfo::~X86ELFWriterInfo() {}
diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h
new file mode 100644
index 0000000..06e051a
--- /dev/null
+++ b/lib/Target/X86/X86ELFWriterInfo.h
@@ -0,0 +1,29 @@
+//===-- X86ELFWriterInfo.h - ELF Writer Info for X86 ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the X86 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_ELF_WRITER_INFO_H
+#define X86_ELF_WRITER_INFO_H
+
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+
+ class X86ELFWriterInfo : public TargetELFWriterInfo {
+ public:
+ X86ELFWriterInfo();
+ virtual ~X86ELFWriterInfo();
+ };
+
+} // end llvm namespace
+
+#endif // X86_ELF_WRITER_INFO_H
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
new file mode 100644
index 0000000..b3667be
--- /dev/null
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -0,0 +1,1549 @@
+//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86-specific support for the FastISel class. Much
+// of the target-specific code is generated by tablegen in the file
+// X86GenFastISel.inc, which is #included here.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86ISelLowering.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+namespace {
+
+class X86FastISel : public FastISel {
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+
+ /// StackPtr - Register used as the stack pointer.
+ ///
+ unsigned StackPtr;
+
+ /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
+ /// floating point ops.
+ /// When SSE is available, use it for f32 operations.
+ /// When SSE2 is available, use it for f64 operations.
+ bool X86ScalarSSEf64;
+ bool X86ScalarSSEf32;
+
+public:
+ explicit X86FastISel(MachineFunction &mf,
+ MachineModuleInfo *mmi,
+ DwarfWriter *dw,
+ DenseMap<const Value *, unsigned> &vm,
+ DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
+ DenseMap<const AllocaInst *, int> &am
+#ifndef NDEBUG
+ , SmallSet<Instruction*, 8> &cil
+#endif
+ )
+ : FastISel(mf, mmi, dw, vm, bm, am
+#ifndef NDEBUG
+ , cil
+#endif
+ ) {
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
+ X86ScalarSSEf64 = Subtarget->hasSSE2();
+ X86ScalarSSEf32 = Subtarget->hasSSE1();
+ }
+
+ virtual bool TargetSelectInstruction(Instruction *I);
+
+#include "X86GenFastISel.inc"
+
+private:
+ bool X86FastEmitCompare(Value *LHS, Value *RHS, MVT VT);
+
+ bool X86FastEmitLoad(MVT VT, const X86AddressMode &AM, unsigned &RR);
+
+ bool X86FastEmitStore(MVT VT, Value *Val,
+ const X86AddressMode &AM);
+ bool X86FastEmitStore(MVT VT, unsigned Val,
+ const X86AddressMode &AM);
+
+ bool X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT, unsigned Src, MVT SrcVT,
+ unsigned &ResultReg);
+
+ bool X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall);
+
+ bool X86SelectLoad(Instruction *I);
+
+ bool X86SelectStore(Instruction *I);
+
+ bool X86SelectCmp(Instruction *I);
+
+ bool X86SelectZExt(Instruction *I);
+
+ bool X86SelectBranch(Instruction *I);
+
+ bool X86SelectShift(Instruction *I);
+
+ bool X86SelectSelect(Instruction *I);
+
+ bool X86SelectTrunc(Instruction *I);
+
+ bool X86SelectFPExt(Instruction *I);
+ bool X86SelectFPTrunc(Instruction *I);
+
+ bool X86SelectExtractValue(Instruction *I);
+
+ bool X86VisitIntrinsicCall(IntrinsicInst &I);
+ bool X86SelectCall(Instruction *I);
+
+ CCAssignFn *CCAssignFnForCall(unsigned CC, bool isTailCall = false);
+
+ const X86InstrInfo *getInstrInfo() const {
+ return getTargetMachine()->getInstrInfo();
+ }
+ const X86TargetMachine *getTargetMachine() const {
+ return static_cast<const X86TargetMachine *>(&TM);
+ }
+
+ unsigned TargetMaterializeConstant(Constant *C);
+
+ unsigned TargetMaterializeAlloca(AllocaInst *C);
+
+ /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
+ /// computed in an SSE register, not on the X87 floating point stack.
+ bool isScalarFPTypeInSSEReg(MVT VT) const {
+ return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
+ (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
+ }
+
+ bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false);
+};
+
+} // end anonymous namespace.
+
+bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) {
+ VT = TLI.getValueType(Ty, /*HandleUnknown=*/true);
+ if (VT == MVT::Other || !VT.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // For now, require SSE/SSE2 for performing floating-point operations,
+ // since x87 requires additional work.
+ if (VT == MVT::f64 && !X86ScalarSSEf64)
+ return false;
+ if (VT == MVT::f32 && !X86ScalarSSEf32)
+ return false;
+ // Similarly, no f80 support yet.
+ if (VT == MVT::f80)
+ return false;
+ // We only handle legal types. For example, on x86-32 the instruction
+ // selector contains all of the 64-bit instructions from x86-64,
+ // under the assumption that i64 won't be used if the target doesn't
+ // support it.
+ return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
+}
+
+#include "X86GenCallingConv.inc"
+
+/// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling
+/// convention.
+CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) {
+ if (Subtarget->is64Bit()) {
+ if (Subtarget->isTargetWin64())
+ return CC_X86_Win64_C;
+ else if (CC == CallingConv::Fast && isTaillCall)
+ return CC_X86_64_TailCall;
+ else
+ return CC_X86_64_C;
+ }
+
+ if (CC == CallingConv::X86_FastCall)
+ return CC_X86_32_FastCall;
+ else if (CC == CallingConv::Fast)
+ return CC_X86_32_FastCC;
+ else
+ return CC_X86_32_C;
+}
+
+/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
+/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
+/// Return true and the result register by reference if it is possible.
+bool X86FastISel::X86FastEmitLoad(MVT VT, const X86AddressMode &AM,
+ unsigned &ResultReg) {
+ // Get opcode and regclass of the output for the given load instruction.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ switch (VT.getSimpleVT()) {
+ default: return false;
+ case MVT::i8:
+ Opc = X86::MOV8rm;
+ RC = X86::GR8RegisterClass;
+ break;
+ case MVT::i16:
+ Opc = X86::MOV16rm;
+ RC = X86::GR16RegisterClass;
+ break;
+ case MVT::i32:
+ Opc = X86::MOV32rm;
+ RC = X86::GR32RegisterClass;
+ break;
+ case MVT::i64:
+ // Must be in x86-64 mode.
+ Opc = X86::MOV64rm;
+ RC = X86::GR64RegisterClass;
+ break;
+ case MVT::f32:
+ if (Subtarget->hasSSE1()) {
+ Opc = X86::MOVSSrm;
+ RC = X86::FR32RegisterClass;
+ } else {
+ Opc = X86::LD_Fp32m;
+ RC = X86::RFP32RegisterClass;
+ }
+ break;
+ case MVT::f64:
+ if (Subtarget->hasSSE2()) {
+ Opc = X86::MOVSDrm;
+ RC = X86::FR64RegisterClass;
+ } else {
+ Opc = X86::LD_Fp64m;
+ RC = X86::RFP64RegisterClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return false;
+ }
+
+ ResultReg = createResultReg(RC);
+ addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
+ return true;
+}
+
+/// X86FastEmitStore - Emit a machine instruction to store a value Val of
+/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
+/// and a displacement offset, or a GlobalAddress,
+/// i.e. V. Return true if it is possible.
+bool
+X86FastISel::X86FastEmitStore(MVT VT, unsigned Val,
+ const X86AddressMode &AM) {
+ // Get opcode and regclass of the output for the given store instruction.
+ unsigned Opc = 0;
+ switch (VT.getSimpleVT()) {
+ case MVT::f80: // No f80 support yet.
+ default: return false;
+ case MVT::i8: Opc = X86::MOV8mr; break;
+ case MVT::i16: Opc = X86::MOV16mr; break;
+ case MVT::i32: Opc = X86::MOV32mr; break;
+ case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
+ case MVT::f32:
+ Opc = Subtarget->hasSSE1() ? X86::MOVSSmr : X86::ST_Fp32m;
+ break;
+ case MVT::f64:
+ Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m;
+ break;
+ }
+
+ addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM).addReg(Val);
+ return true;
+}
+
+bool X86FastISel::X86FastEmitStore(MVT VT, Value *Val,
+ const X86AddressMode &AM) {
+ // Handle 'null' like i32/i64 0.
+ if (isa<ConstantPointerNull>(Val))
+ Val = Constant::getNullValue(TD.getIntPtrType());
+
+ // If this is a store of a simple constant, fold the constant into the store.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+ unsigned Opc = 0;
+ switch (VT.getSimpleVT()) {
+ default: break;
+ case MVT::i8: Opc = X86::MOV8mi; break;
+ case MVT::i16: Opc = X86::MOV16mi; break;
+ case MVT::i32: Opc = X86::MOV32mi; break;
+ case MVT::i64:
+ // Must be a 32-bit sign extended value.
+ if ((int)CI->getSExtValue() == CI->getSExtValue())
+ Opc = X86::MOV64mi32;
+ break;
+ }
+
+ if (Opc) {
+ addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM)
+ .addImm(CI->getSExtValue());
+ return true;
+ }
+ }
+
+ unsigned ValReg = getRegForValue(Val);
+ if (ValReg == 0)
+ return false;
+
+ return X86FastEmitStore(VT, ValReg, AM);
+}
+
+/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
+/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
+/// ISD::SIGN_EXTEND).
+bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT,
+ unsigned Src, MVT SrcVT,
+ unsigned &ResultReg) {
+ unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src);
+
+ if (RR != 0) {
+ ResultReg = RR;
+ return true;
+ } else
+ return false;
+}
+
+/// X86SelectAddress - Attempt to fill in an address from the given value.
+///
+bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
+ User *U;
+ unsigned Opcode = Instruction::UserOp1;
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ Opcode = I->getOpcode();
+ U = I;
+ } else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ switch (Opcode) {
+ default: break;
+ case Instruction::BitCast:
+ // Look past bitcasts.
+ return X86SelectAddress(U->getOperand(0), AM, isCall);
+
+ case Instruction::IntToPtr:
+ // Look past no-op inttoptrs.
+ if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return X86SelectAddress(U->getOperand(0), AM, isCall);
+ break;
+
+ case Instruction::PtrToInt:
+ // Look past no-op ptrtoints.
+ if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return X86SelectAddress(U->getOperand(0), AM, isCall);
+ break;
+
+ case Instruction::Alloca: {
+ if (isCall) break;
+ // Do static allocas.
+ const AllocaInst *A = cast<AllocaInst>(V);
+ DenseMap<const AllocaInst*, int>::iterator SI = StaticAllocaMap.find(A);
+ if (SI != StaticAllocaMap.end()) {
+ AM.BaseType = X86AddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = SI->second;
+ return true;
+ }
+ break;
+ }
+
+ case Instruction::Add: {
+ if (isCall) break;
+ // Adds of constants are common and easy enough.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
+ // They have to fit in the 32-bit signed displacement field though.
+ if (isInt32(Disp)) {
+ AM.Disp = (uint32_t)Disp;
+ return X86SelectAddress(U->getOperand(0), AM, isCall);
+ }
+ }
+ break;
+ }
+
+ case Instruction::GetElementPtr: {
+ if (isCall) break;
+ // Pattern-match simple GEPs.
+ uint64_t Disp = (int32_t)AM.Disp;
+ unsigned IndexReg = AM.IndexReg;
+ unsigned Scale = AM.Scale;
+ gep_type_iterator GTI = gep_type_begin(U);
+ // Iterate through the indices, folding what we can. Constants can be
+ // folded, and one dynamic index can be handled, if the scale is supported.
+ for (User::op_iterator i = U->op_begin() + 1, e = U->op_end();
+ i != e; ++i, ++GTI) {
+ Value *Op = *i;
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = TD.getStructLayout(STy);
+ unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
+ Disp += SL->getElementOffset(Idx);
+ } else {
+ uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ Disp += CI->getSExtValue() * S;
+ } else if (IndexReg == 0 &&
+ (!AM.GV ||
+ !getTargetMachine()->symbolicAddressesAreRIPRel()) &&
+ (S == 1 || S == 2 || S == 4 || S == 8)) {
+ // Scaled-index addressing.
+ Scale = S;
+ IndexReg = getRegForGEPIndex(Op);
+ if (IndexReg == 0)
+ return false;
+ } else
+ // Unsupported.
+ goto unsupported_gep;
+ }
+ }
+ // Check for displacement overflow.
+ if (!isInt32(Disp))
+ break;
+ // Ok, the GEP indices were covered by constant-offset and scaled-index
+ // addressing. Update the address state and move on to examining the base.
+ AM.IndexReg = IndexReg;
+ AM.Scale = Scale;
+ AM.Disp = (uint32_t)Disp;
+ return X86SelectAddress(U->getOperand(0), AM, isCall);
+ unsupported_gep:
+ // Ok, the GEP indices weren't all covered.
+ break;
+ }
+ }
+
+ // Handle constant address.
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ // Can't handle alternate code models yet.
+ if (TM.getCodeModel() != CodeModel::Default &&
+ TM.getCodeModel() != CodeModel::Small)
+ return false;
+
+ // RIP-relative addresses can't have additional register operands.
+ if (getTargetMachine()->symbolicAddressesAreRIPRel() &&
+ (AM.Base.Reg != 0 || AM.IndexReg != 0))
+ return false;
+
+ // Can't handle TLS yet.
+ if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->isThreadLocal())
+ return false;
+
+ // Set up the basic address.
+ AM.GV = GV;
+ if (!isCall &&
+ TM.getRelocationModel() == Reloc::PIC_ &&
+ !Subtarget->is64Bit())
+ AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF);
+
+ // Emit an extra load if the ABI requires it.
+ if (Subtarget->GVRequiresExtraLoad(GV, TM, isCall)) {
+ // Check to see if we've already materialized this
+ // value in a register in this block.
+ if (unsigned Reg = LocalValueMap[V]) {
+ AM.Base.Reg = Reg;
+ AM.GV = 0;
+ return true;
+ }
+ // Issue load from stub if necessary.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ if (TLI.getPointerTy() == MVT::i32) {
+ Opc = X86::MOV32rm;
+ RC = X86::GR32RegisterClass;
+ } else {
+ Opc = X86::MOV64rm;
+ RC = X86::GR64RegisterClass;
+ }
+
+ X86AddressMode StubAM;
+ StubAM.Base.Reg = AM.Base.Reg;
+ StubAM.GV = AM.GV;
+ unsigned ResultReg = createResultReg(RC);
+ addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), StubAM);
+
+ // Now construct the final address. Note that the Disp, Scale,
+ // and Index values may already be set here.
+ AM.Base.Reg = ResultReg;
+ AM.GV = 0;
+
+ // Prevent loading GV stub multiple times in same MBB.
+ LocalValueMap[V] = AM.Base.Reg;
+ }
+ return true;
+ }
+
+ // If all else fails, try to materialize the value in a register.
+ if (!AM.GV || !getTargetMachine()->symbolicAddressesAreRIPRel()) {
+ if (AM.Base.Reg == 0) {
+ AM.Base.Reg = getRegForValue(V);
+ return AM.Base.Reg != 0;
+ }
+ if (AM.IndexReg == 0) {
+ assert(AM.Scale == 1 && "Scale with no index!");
+ AM.IndexReg = getRegForValue(V);
+ return AM.IndexReg != 0;
+ }
+ }
+
+ return false;
+}
+
+/// X86SelectStore - Select and emit code to implement store instructions.
+bool X86FastISel::X86SelectStore(Instruction* I) {
+ MVT VT;
+ if (!isTypeLegal(I->getOperand(0)->getType(), VT))
+ return false;
+
+ X86AddressMode AM;
+ if (!X86SelectAddress(I->getOperand(1), AM, false))
+ return false;
+
+ return X86FastEmitStore(VT, I->getOperand(0), AM);
+}
+
+/// X86SelectLoad - Select and emit code to implement load instructions.
+///
+bool X86FastISel::X86SelectLoad(Instruction *I) {
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
+ return false;
+
+ X86AddressMode AM;
+ if (!X86SelectAddress(I->getOperand(0), AM, false))
+ return false;
+
+ unsigned ResultReg = 0;
+ if (X86FastEmitLoad(VT, AM, ResultReg)) {
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ return false;
+}
+
+static unsigned X86ChooseCmpOpcode(MVT VT) {
+ switch (VT.getSimpleVT()) {
+ default: return 0;
+ case MVT::i8: return X86::CMP8rr;
+ case MVT::i16: return X86::CMP16rr;
+ case MVT::i32: return X86::CMP32rr;
+ case MVT::i64: return X86::CMP64rr;
+ case MVT::f32: return X86::UCOMISSrr;
+ case MVT::f64: return X86::UCOMISDrr;
+ }
+}
+
+/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
+/// of the comparison, return an opcode that works for the compare (e.g.
+/// CMP32ri) otherwise return 0.
+static unsigned X86ChooseCmpImmediateOpcode(MVT VT, ConstantInt *RHSC) {
+ switch (VT.getSimpleVT()) {
+ // Otherwise, we can't fold the immediate into this comparison.
+ default: return 0;
+ case MVT::i8: return X86::CMP8ri;
+ case MVT::i16: return X86::CMP16ri;
+ case MVT::i32: return X86::CMP32ri;
+ case MVT::i64:
+ // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
+ // field.
+ if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
+ return X86::CMP64ri32;
+ return 0;
+ }
+}
+
+bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, MVT VT) {
+ unsigned Op0Reg = getRegForValue(Op0);
+ if (Op0Reg == 0) return false;
+
+ // Handle 'null' like i32/i64 0.
+ if (isa<ConstantPointerNull>(Op1))
+ Op1 = Constant::getNullValue(TD.getIntPtrType());
+
+ // We have two options: compare with register or immediate. If the RHS of
+ // the compare is an immediate that we can fold into this compare, use
+ // CMPri, otherwise use CMPrr.
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
+ BuildMI(MBB, DL, TII.get(CompareImmOpc)).addReg(Op0Reg)
+ .addImm(Op1C->getSExtValue());
+ return true;
+ }
+ }
+
+ unsigned CompareOpc = X86ChooseCmpOpcode(VT);
+ if (CompareOpc == 0) return false;
+
+ unsigned Op1Reg = getRegForValue(Op1);
+ if (Op1Reg == 0) return false;
+ BuildMI(MBB, DL, TII.get(CompareOpc)).addReg(Op0Reg).addReg(Op1Reg);
+
+ return true;
+}
+
+bool X86FastISel::X86SelectCmp(Instruction *I) {
+ CmpInst *CI = cast<CmpInst>(I);
+
+ MVT VT;
+ if (!isTypeLegal(I->getOperand(0)->getType(), VT))
+ return false;
+
+ unsigned ResultReg = createResultReg(&X86::GR8RegClass);
+ unsigned SetCCOpc;
+ bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0.
+ switch (CI->getPredicate()) {
+ case CmpInst::FCMP_OEQ: {
+ if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
+ return false;
+
+ unsigned EReg = createResultReg(&X86::GR8RegClass);
+ unsigned NPReg = createResultReg(&X86::GR8RegClass);
+ BuildMI(MBB, DL, TII.get(X86::SETEr), EReg);
+ BuildMI(MBB, DL, TII.get(X86::SETNPr), NPReg);
+ BuildMI(MBB, DL,
+ TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ case CmpInst::FCMP_UNE: {
+ if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
+ return false;
+
+ unsigned NEReg = createResultReg(&X86::GR8RegClass);
+ unsigned PReg = createResultReg(&X86::GR8RegClass);
+ BuildMI(MBB, DL, TII.get(X86::SETNEr), NEReg);
+ BuildMI(MBB, DL, TII.get(X86::SETPr), PReg);
+ BuildMI(MBB, DL, TII.get(X86::OR8rr), ResultReg).addReg(PReg).addReg(NEReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr; break;
+ case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
+ case CmpInst::FCMP_OLT: SwapArgs = true; SetCCOpc = X86::SETAr; break;
+ case CmpInst::FCMP_OLE: SwapArgs = true; SetCCOpc = X86::SETAEr; break;
+ case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
+ case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break;
+ case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr; break;
+ case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr; break;
+ case CmpInst::FCMP_UGT: SwapArgs = true; SetCCOpc = X86::SETBr; break;
+ case CmpInst::FCMP_UGE: SwapArgs = true; SetCCOpc = X86::SETBEr; break;
+ case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break;
+ case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
+
+ case CmpInst::ICMP_EQ: SwapArgs = false; SetCCOpc = X86::SETEr; break;
+ case CmpInst::ICMP_NE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
+ case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr; break;
+ case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
+ case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break;
+ case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
+ case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr; break;
+ case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break;
+ case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr; break;
+ case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break;
+ default:
+ return false;
+ }
+
+ Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+ if (SwapArgs)
+ std::swap(Op0, Op1);
+
+ // Emit a compare of Op0/Op1.
+ if (!X86FastEmitCompare(Op0, Op1, VT))
+ return false;
+
+ BuildMI(MBB, DL, TII.get(SetCCOpc), ResultReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86SelectZExt(Instruction *I) {
+ // Handle zero-extension from i1 to i8, which is common.
+ if (I->getType() == Type::Int8Ty &&
+ I->getOperand(0)->getType() == Type::Int1Ty) {
+ unsigned ResultReg = getRegForValue(I->getOperand(0));
+ if (ResultReg == 0) return false;
+ // Set the high bits to zero.
+ ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg);
+ if (ResultReg == 0) return false;
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+
+ return false;
+}
+
+
+bool X86FastISel::X86SelectBranch(Instruction *I) {
+ // Unconditional branches are selected by tablegen-generated code.
+ // Handle a conditional branch.
+ BranchInst *BI = cast<BranchInst>(I);
+ MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)];
+
+ // Fold the common case of a conditional branch with a comparison.
+ if (CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+ if (CI->hasOneUse()) {
+ MVT VT = TLI.getValueType(CI->getOperand(0)->getType());
+
+ // Try to take advantage of fallthrough opportunities.
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ if (MBB->isLayoutSuccessor(TrueMBB)) {
+ std::swap(TrueMBB, FalseMBB);
+ Predicate = CmpInst::getInversePredicate(Predicate);
+ }
+
+ bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0.
+ unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA"
+
+ switch (Predicate) {
+ case CmpInst::FCMP_OEQ:
+ std::swap(TrueMBB, FalseMBB);
+ Predicate = CmpInst::FCMP_UNE;
+ // FALL THROUGH
+ case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE; break;
+ case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA; break;
+ case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE; break;
+ case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA; break;
+ case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE; break;
+ case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE; break;
+ case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP; break;
+ case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP; break;
+ case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE; break;
+ case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB; break;
+ case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE; break;
+ case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break;
+ case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break;
+
+ case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE; break;
+ case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE; break;
+ case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA; break;
+ case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE; break;
+ case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break;
+ case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break;
+ case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG; break;
+ case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE; break;
+ case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL; break;
+ case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE; break;
+ default:
+ return false;
+ }
+
+ Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+ if (SwapArgs)
+ std::swap(Op0, Op1);
+
+ // Emit a compare of the LHS and RHS, setting the flags.
+ if (!X86FastEmitCompare(Op0, Op1, VT))
+ return false;
+
+ BuildMI(MBB, DL, TII.get(BranchOpc)).addMBB(TrueMBB);
+
+ if (Predicate == CmpInst::FCMP_UNE) {
+ // X86 requires a second branch to handle UNE (and OEQ,
+ // which is mapped to UNE above).
+ BuildMI(MBB, DL, TII.get(X86::JP)).addMBB(TrueMBB);
+ }
+
+ FastEmitBranch(FalseMBB);
+ MBB->addSuccessor(TrueMBB);
+ return true;
+ }
+ } else if (ExtractValueInst *EI =
+ dyn_cast<ExtractValueInst>(BI->getCondition())) {
+ // Check to see if the branch instruction is from an "arithmetic with
+ // overflow" intrinsic. The main way these intrinsics are used is:
+ //
+ // %t = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+ // %sum = extractvalue { i32, i1 } %t, 0
+ // %obit = extractvalue { i32, i1 } %t, 1
+ // br i1 %obit, label %overflow, label %normal
+ //
+ // The %sum and %obit are converted in an ADD and a SETO/SETB before
+ // reaching the branch. Therefore, we search backwards through the MBB
+ // looking for the SETO/SETB instruction. If an instruction modifies the
+ // EFLAGS register before we reach the SETO/SETB instruction, then we can't
+ // convert the branch into a JO/JB instruction.
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){
+ if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow ||
+ CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) {
+ const MachineInstr *SetMI = 0;
+ unsigned Reg = lookUpRegForValue(EI);
+
+ for (MachineBasicBlock::const_reverse_iterator
+ RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) {
+ const MachineInstr &MI = *RI;
+
+ if (MI.modifiesRegister(Reg)) {
+ unsigned Src, Dst, SrcSR, DstSR;
+
+ if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) {
+ Reg = Src;
+ continue;
+ }
+
+ SetMI = &MI;
+ break;
+ }
+
+ const TargetInstrDesc &TID = MI.getDesc();
+ if (TID.hasUnmodeledSideEffects() ||
+ TID.hasImplicitDefOfPhysReg(X86::EFLAGS))
+ break;
+ }
+
+ if (SetMI) {
+ unsigned OpCode = SetMI->getOpcode();
+
+ if (OpCode == X86::SETOr || OpCode == X86::SETBr) {
+ BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? X86::JO : X86::JB))
+ .addMBB(TrueMBB);
+ FastEmitBranch(FalseMBB);
+ MBB->addSuccessor(TrueMBB);
+ return true;
+ }
+ }
+ }
+ }
+ }
+
+ // Otherwise do a clumsy setcc and re-test it.
+ unsigned OpReg = getRegForValue(BI->getCondition());
+ if (OpReg == 0) return false;
+
+ BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg);
+ BuildMI(MBB, DL, TII.get(X86::JNE)).addMBB(TrueMBB);
+ FastEmitBranch(FalseMBB);
+ MBB->addSuccessor(TrueMBB);
+ return true;
+}
+
+bool X86FastISel::X86SelectShift(Instruction *I) {
+ unsigned CReg = 0, OpReg = 0, OpImm = 0;
+ const TargetRegisterClass *RC = NULL;
+ if (I->getType() == Type::Int8Ty) {
+ CReg = X86::CL;
+ RC = &X86::GR8RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR8rCL; OpImm = X86::SHR8ri; break;
+ case Instruction::AShr: OpReg = X86::SAR8rCL; OpImm = X86::SAR8ri; break;
+ case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break;
+ default: return false;
+ }
+ } else if (I->getType() == Type::Int16Ty) {
+ CReg = X86::CX;
+ RC = &X86::GR16RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR16rCL; OpImm = X86::SHR16ri; break;
+ case Instruction::AShr: OpReg = X86::SAR16rCL; OpImm = X86::SAR16ri; break;
+ case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break;
+ default: return false;
+ }
+ } else if (I->getType() == Type::Int32Ty) {
+ CReg = X86::ECX;
+ RC = &X86::GR32RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR32rCL; OpImm = X86::SHR32ri; break;
+ case Instruction::AShr: OpReg = X86::SAR32rCL; OpImm = X86::SAR32ri; break;
+ case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break;
+ default: return false;
+ }
+ } else if (I->getType() == Type::Int64Ty) {
+ CReg = X86::RCX;
+ RC = &X86::GR64RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR64rCL; OpImm = X86::SHR64ri; break;
+ case Instruction::AShr: OpReg = X86::SAR64rCL; OpImm = X86::SAR64ri; break;
+ case Instruction::Shl: OpReg = X86::SHL64rCL; OpImm = X86::SHL64ri; break;
+ default: return false;
+ }
+ } else {
+ return false;
+ }
+
+ MVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
+ if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
+ return false;
+
+ unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ if (Op0Reg == 0) return false;
+
+ // Fold immediate in shl(x,3).
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(MBB, DL, TII.get(OpImm),
+ ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (Op1Reg == 0) return false;
+ TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC);
+
+ // The shift instruction uses X86::CL. If we defined a super-register
+ // of X86::CL, emit an EXTRACT_SUBREG to precisely describe what
+ // we're doing here.
+ if (CReg != X86::CL)
+ BuildMI(MBB, DL, TII.get(TargetInstrInfo::EXTRACT_SUBREG), X86::CL)
+ .addReg(CReg).addImm(X86::SUBREG_8BIT);
+
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(MBB, DL, TII.get(OpReg), ResultReg).addReg(Op0Reg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86SelectSelect(Instruction *I) {
+ MVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
+ if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
+ return false;
+
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ if (VT.getSimpleVT() == MVT::i16) {
+ Opc = X86::CMOVE16rr;
+ RC = &X86::GR16RegClass;
+ } else if (VT.getSimpleVT() == MVT::i32) {
+ Opc = X86::CMOVE32rr;
+ RC = &X86::GR32RegClass;
+ } else if (VT.getSimpleVT() == MVT::i64) {
+ Opc = X86::CMOVE64rr;
+ RC = &X86::GR64RegClass;
+ } else {
+ return false;
+ }
+
+ unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ if (Op0Reg == 0) return false;
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (Op1Reg == 0) return false;
+ unsigned Op2Reg = getRegForValue(I->getOperand(2));
+ if (Op2Reg == 0) return false;
+
+ BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(Op0Reg).addReg(Op0Reg);
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(MBB, DL, TII.get(Opc), ResultReg).addReg(Op1Reg).addReg(Op2Reg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86SelectFPExt(Instruction *I) {
+ // fpext from float to double.
+ if (Subtarget->hasSSE2() && I->getType() == Type::DoubleTy) {
+ Value *V = I->getOperand(0);
+ if (V->getType() == Type::FloatTy) {
+ unsigned OpReg = getRegForValue(V);
+ if (OpReg == 0) return false;
+ unsigned ResultReg = createResultReg(X86::FR64RegisterClass);
+ BuildMI(MBB, DL, TII.get(X86::CVTSS2SDrr), ResultReg).addReg(OpReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool X86FastISel::X86SelectFPTrunc(Instruction *I) {
+ if (Subtarget->hasSSE2()) {
+ if (I->getType() == Type::FloatTy) {
+ Value *V = I->getOperand(0);
+ if (V->getType() == Type::DoubleTy) {
+ unsigned OpReg = getRegForValue(V);
+ if (OpReg == 0) return false;
+ unsigned ResultReg = createResultReg(X86::FR32RegisterClass);
+ BuildMI(MBB, DL, TII.get(X86::CVTSD2SSrr), ResultReg).addReg(OpReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+bool X86FastISel::X86SelectTrunc(Instruction *I) {
+ if (Subtarget->is64Bit())
+ // All other cases should be handled by the tblgen generated code.
+ return false;
+ MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ MVT DstVT = TLI.getValueType(I->getType());
+
+ // This code only handles truncation to byte right now.
+ if (DstVT != MVT::i8 && DstVT != MVT::i1)
+ // All other cases should be handled by the tblgen generated code.
+ return false;
+ if (SrcVT != MVT::i16 && SrcVT != MVT::i32)
+ // All other cases should be handled by the tblgen generated code.
+ return false;
+
+ unsigned InputReg = getRegForValue(I->getOperand(0));
+ if (!InputReg)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ // First issue a copy to GR16_ABCD or GR32_ABCD.
+ unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr;
+ const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
+ ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
+ unsigned CopyReg = createResultReg(CopyRC);
+ BuildMI(MBB, DL, TII.get(CopyOpc), CopyReg).addReg(InputReg);
+
+ // Then issue an extract_subreg.
+ unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
+ CopyReg, X86::SUBREG_8BIT);
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86SelectExtractValue(Instruction *I) {
+ ExtractValueInst *EI = cast<ExtractValueInst>(I);
+ Value *Agg = EI->getAggregateOperand();
+
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Agg)) {
+ switch (CI->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ // Cheat a little. We know that the registers for "add" and "seto" are
+ // allocated sequentially. However, we only keep track of the register
+ // for "add" in the value map. Use extractvalue's index to get the
+ // correct register for "seto".
+ UpdateValueMap(I, lookUpRegForValue(Agg) + *EI->idx_begin());
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
+ // FIXME: Handle more intrinsics.
+ switch (I.getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow: {
+ // Replace "add with overflow" intrinsics with an "add" instruction followed
+ // by a seto/setc instruction. Later on, when the "extractvalue"
+ // instructions are encountered, we use the fact that two registers were
+ // created sequentially to get the correct registers for the "sum" and the
+ // "overflow bit".
+ const Function *Callee = I.getCalledFunction();
+ const Type *RetTy =
+ cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
+
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ Value *Op1 = I.getOperand(1);
+ Value *Op2 = I.getOperand(2);
+ unsigned Reg1 = getRegForValue(Op1);
+ unsigned Reg2 = getRegForValue(Op2);
+
+ if (Reg1 == 0 || Reg2 == 0)
+ // FIXME: Handle values *not* in registers.
+ return false;
+
+ unsigned OpC = 0;
+ if (VT == MVT::i32)
+ OpC = X86::ADD32rr;
+ else if (VT == MVT::i64)
+ OpC = X86::ADD64rr;
+ else
+ return false;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(MBB, DL, TII.get(OpC), ResultReg).addReg(Reg1).addReg(Reg2);
+ unsigned DestReg1 = UpdateValueMap(&I, ResultReg);
+
+ // If the add with overflow is an intra-block value then we just want to
+ // create temporaries for it like normal. If it is a cross-block value then
+ // UpdateValueMap will return the cross-block register used. Since we
+ // *really* want the value to be live in the register pair known by
+ // UpdateValueMap, we have to use DestReg1+1 as the destination register in
+ // the cross block case. In the non-cross-block case, we should just make
+ // another register for the value.
+ if (DestReg1 != ResultReg)
+ ResultReg = DestReg1+1;
+ else
+ ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8));
+
+ unsigned Opc = X86::SETBr;
+ if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
+ Opc = X86::SETOr;
+ BuildMI(MBB, DL, TII.get(Opc), ResultReg);
+ return true;
+ }
+ }
+}
+
+bool X86FastISel::X86SelectCall(Instruction *I) {
+ CallInst *CI = cast<CallInst>(I);
+ Value *Callee = I->getOperand(0);
+
+ // Can't handle inline asm yet.
+ if (isa<InlineAsm>(Callee))
+ return false;
+
+ // Handle intrinsic calls.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
+ return X86VisitIntrinsicCall(*II);
+
+ // Handle only C and fastcc calling conventions for now.
+ CallSite CS(CI);
+ unsigned CC = CS.getCallingConv();
+ if (CC != CallingConv::C &&
+ CC != CallingConv::Fast &&
+ CC != CallingConv::X86_FastCall)
+ return false;
+
+ // On X86, -tailcallopt changes the fastcc ABI. FastISel doesn't
+ // handle this for now.
+ if (CC == CallingConv::Fast && PerformTailCallOpt)
+ return false;
+
+ // Let SDISel handle vararg functions.
+ const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ if (FTy->isVarArg())
+ return false;
+
+ // Handle *simple* calls for now.
+ const Type *RetTy = CS.getType();
+ MVT RetVT;
+ if (RetTy == Type::VoidTy)
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(RetTy, RetVT, true))
+ return false;
+
+ // Materialize callee address in a register. FIXME: GV address can be
+ // handled with a CALLpcrel32 instead.
+ X86AddressMode CalleeAM;
+ if (!X86SelectAddress(Callee, CalleeAM, true))
+ return false;
+ unsigned CalleeOp = 0;
+ GlobalValue *GV = 0;
+ if (CalleeAM.Base.Reg != 0) {
+ assert(CalleeAM.GV == 0);
+ CalleeOp = CalleeAM.Base.Reg;
+ } else if (CalleeAM.GV != 0) {
+ assert(CalleeAM.GV != 0);
+ GV = CalleeAM.GV;
+ } else
+ return false;
+
+ // Allow calls which produce i1 results.
+ bool AndToI1 = false;
+ if (RetVT == MVT::i1) {
+ RetVT = MVT::i8;
+ AndToI1 = true;
+ }
+
+ // Deal with call operands first.
+ SmallVector<Value*, 8> ArgVals;
+ SmallVector<unsigned, 8> Args;
+ SmallVector<MVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+ Args.reserve(CS.arg_size());
+ ArgVals.reserve(CS.arg_size());
+ ArgVTs.reserve(CS.arg_size());
+ ArgFlags.reserve(CS.arg_size());
+ for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ unsigned Arg = getRegForValue(*i);
+ if (Arg == 0)
+ return false;
+ ISD::ArgFlagsTy Flags;
+ unsigned AttrInd = i - CS.arg_begin() + 1;
+ if (CS.paramHasAttr(AttrInd, Attribute::SExt))
+ Flags.setSExt();
+ if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
+ Flags.setZExt();
+
+ // FIXME: Only handle *easy* calls for now.
+ if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
+ CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
+ CS.paramHasAttr(AttrInd, Attribute::Nest) ||
+ CS.paramHasAttr(AttrInd, Attribute::ByVal))
+ return false;
+
+ const Type *ArgTy = (*i)->getType();
+ MVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT))
+ return false;
+ unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(Arg);
+ ArgVals.push_back(*i);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, false, TM, ArgLocs);
+ CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ // Issue CALLSEQ_START
+ unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
+ BuildMI(MBB, DL, TII.get(AdjStackDown)).addImm(NumBytes);
+
+ // Process argument: walk the register/memloc assignments, inserting
+ // copies / loads.
+ SmallVector<unsigned, 4> RegArgs;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ unsigned Arg = Args[VA.getValNo()];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt: {
+ bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ assert(Emitted && "Failed to emit a sext!"); Emitted=Emitted;
+ Emitted = true;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::ZExt: {
+ bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ assert(Emitted && "Failed to emit a zext!"); Emitted=Emitted;
+ Emitted = true;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::AExt: {
+ bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ if (!Emitted)
+ Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ if (!Emitted)
+ Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+
+ assert(Emitted && "Failed to emit a aext!"); Emitted=Emitted;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ }
+
+ if (VA.isRegLoc()) {
+ TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT);
+ bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(),
+ Arg, RC, RC);
+ assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
+ Emitted = true;
+ RegArgs.push_back(VA.getLocReg());
+ } else {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ X86AddressMode AM;
+ AM.Base.Reg = StackPtr;
+ AM.Disp = LocMemOffset;
+ Value *ArgVal = ArgVals[VA.getValNo()];
+
+ // If this is a really simple value, emit this with the Value* version of
+ // X86FastEmitStore. If it isn't simple, we don't want to do this, as it
+ // can cause us to reevaluate the argument.
+ if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal))
+ X86FastEmitStore(ArgVT, ArgVal, AM);
+ else
+ X86FastEmitStore(ArgVT, Arg, AM);
+ }
+ }
+
+ // ELF / PIC requires GOT in the EBX register before function calls via PLT
+ // GOT pointer.
+ if (!Subtarget->is64Bit() &&
+ TM.getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT()) {
+ TargetRegisterClass *RC = X86::GR32RegisterClass;
+ unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF);
+ bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC);
+ assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
+ Emitted = true;
+ }
+
+ // Issue the call.
+ unsigned CallOpc = CalleeOp
+ ? (Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r)
+ : (Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32);
+ MachineInstrBuilder MIB = CalleeOp
+ ? BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp)
+ : BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV);
+
+ // Add an implicit use GOT pointer in EBX.
+ if (!Subtarget->is64Bit() &&
+ TM.getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT())
+ MIB.addReg(X86::EBX);
+
+ // Add implicit physical register uses to the call.
+ for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+ MIB.addReg(RegArgs[i]);
+
+ // Issue CALLSEQ_END
+ unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
+ BuildMI(MBB, DL, TII.get(AdjStackUp)).addImm(NumBytes).addImm(0);
+
+ // Now handle call return value (if any).
+ if (RetVT.getSimpleVT() != MVT::isVoid) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, false, TM, RVLocs);
+ CCInfo.AnalyzeCallResult(RetVT, RetCC_X86);
+
+ // Copy all of the result registers out of their specified physreg.
+ assert(RVLocs.size() == 1 && "Can't handle multi-value calls!");
+ MVT CopyVT = RVLocs[0].getValVT();
+ TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
+ TargetRegisterClass *SrcRC = DstRC;
+
+ // If this is a call to a function that returns an fp value on the x87 fp
+ // stack, but where we prefer to use the value in xmm registers, copy it
+ // out as F80 and use a truncate to move it from fp stack reg to xmm reg.
+ if ((RVLocs[0].getLocReg() == X86::ST0 ||
+ RVLocs[0].getLocReg() == X86::ST1) &&
+ isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
+ CopyVT = MVT::f80;
+ SrcRC = X86::RSTRegisterClass;
+ DstRC = X86::RFP80RegisterClass;
+ }
+
+ unsigned ResultReg = createResultReg(DstRC);
+ bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ RVLocs[0].getLocReg(), DstRC, SrcRC);
+ assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
+ Emitted = true;
+ if (CopyVT != RVLocs[0].getValVT()) {
+ // Round the F80 the right size, which also moves to the appropriate xmm
+ // register. This is accomplished by storing the F80 value in memory and
+ // then loading it back. Ewww...
+ MVT ResVT = RVLocs[0].getValVT();
+ unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
+ unsigned MemSize = ResVT.getSizeInBits()/8;
+ int FI = MFI.CreateStackObject(MemSize, MemSize);
+ addFrameReference(BuildMI(MBB, DL, TII.get(Opc)), FI).addReg(ResultReg);
+ DstRC = ResVT == MVT::f32
+ ? X86::FR32RegisterClass : X86::FR64RegisterClass;
+ Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
+ ResultReg = createResultReg(DstRC);
+ addFrameReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), FI);
+ }
+
+ if (AndToI1) {
+ // Mask out all but lowest bit for some call which produces an i1.
+ unsigned AndResult = createResultReg(X86::GR8RegisterClass);
+ BuildMI(MBB, DL,
+ TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1);
+ ResultReg = AndResult;
+ }
+
+ UpdateValueMap(I, ResultReg);
+ }
+
+ return true;
+}
+
+
+bool
+X86FastISel::TargetSelectInstruction(Instruction *I) {
+ switch (I->getOpcode()) {
+ default: break;
+ case Instruction::Load:
+ return X86SelectLoad(I);
+ case Instruction::Store:
+ return X86SelectStore(I);
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return X86SelectCmp(I);
+ case Instruction::ZExt:
+ return X86SelectZExt(I);
+ case Instruction::Br:
+ return X86SelectBranch(I);
+ case Instruction::Call:
+ return X86SelectCall(I);
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::Shl:
+ return X86SelectShift(I);
+ case Instruction::Select:
+ return X86SelectSelect(I);
+ case Instruction::Trunc:
+ return X86SelectTrunc(I);
+ case Instruction::FPExt:
+ return X86SelectFPExt(I);
+ case Instruction::FPTrunc:
+ return X86SelectFPTrunc(I);
+ case Instruction::ExtractValue:
+ return X86SelectExtractValue(I);
+ case Instruction::IntToPtr: // Deliberate fall-through.
+ case Instruction::PtrToInt: {
+ MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ MVT DstVT = TLI.getValueType(I->getType());
+ if (DstVT.bitsGT(SrcVT))
+ return X86SelectZExt(I);
+ if (DstVT.bitsLT(SrcVT))
+ return X86SelectTrunc(I);
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0) return false;
+ UpdateValueMap(I, Reg);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+unsigned X86FastISel::TargetMaterializeConstant(Constant *C) {
+ MVT VT;
+ if (!isTypeLegal(C->getType(), VT))
+ return false;
+
+ // Get opcode and regclass of the output for the given load instruction.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ switch (VT.getSimpleVT()) {
+ default: return false;
+ case MVT::i8:
+ Opc = X86::MOV8rm;
+ RC = X86::GR8RegisterClass;
+ break;
+ case MVT::i16:
+ Opc = X86::MOV16rm;
+ RC = X86::GR16RegisterClass;
+ break;
+ case MVT::i32:
+ Opc = X86::MOV32rm;
+ RC = X86::GR32RegisterClass;
+ break;
+ case MVT::i64:
+ // Must be in x86-64 mode.
+ Opc = X86::MOV64rm;
+ RC = X86::GR64RegisterClass;
+ break;
+ case MVT::f32:
+ if (Subtarget->hasSSE1()) {
+ Opc = X86::MOVSSrm;
+ RC = X86::FR32RegisterClass;
+ } else {
+ Opc = X86::LD_Fp32m;
+ RC = X86::RFP32RegisterClass;
+ }
+ break;
+ case MVT::f64:
+ if (Subtarget->hasSSE2()) {
+ Opc = X86::MOVSDrm;
+ RC = X86::FR64RegisterClass;
+ } else {
+ Opc = X86::LD_Fp64m;
+ RC = X86::RFP64RegisterClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return false;
+ }
+
+ // Materialize addresses with LEA instructions.
+ if (isa<GlobalValue>(C)) {
+ X86AddressMode AM;
+ if (X86SelectAddress(C, AM, false)) {
+ if (TLI.getPointerTy() == MVT::i32)
+ Opc = X86::LEA32r;
+ else
+ Opc = X86::LEA64r;
+ unsigned ResultReg = createResultReg(RC);
+ addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
+ return ResultReg;
+ }
+ return 0;
+ }
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(C->getType());
+ if (Align == 0) {
+ // Alignment of vector types. FIXME!
+ Align = TD.getTypeAllocSize(C->getType());
+ }
+
+ // x86-32 PIC requires a PIC base register for constant pools.
+ unsigned PICBase = 0;
+ if (TM.getRelocationModel() == Reloc::PIC_ &&
+ !Subtarget->is64Bit())
+ PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
+
+ // Create the load from the constant pool.
+ unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align);
+ unsigned ResultReg = createResultReg(RC);
+ addConstantPoolReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), MCPOffset,
+ PICBase);
+
+ return ResultReg;
+}
+
+unsigned X86FastISel::TargetMaterializeAlloca(AllocaInst *C) {
+ // Fail on dynamic allocas. At this point, getRegForValue has already
+ // checked its CSE maps, so if we're here trying to handle a dynamic
+ // alloca, we're not going to succeed. X86SelectAddress has a
+ // check for dynamic allocas, because it's called directly from
+ // various places, but TargetMaterializeAlloca also needs a check
+ // in order to avoid recursion between getRegForValue,
+ // X86SelectAddrss, and TargetMaterializeAlloca.
+ if (!StaticAllocaMap.count(C))
+ return 0;
+
+ X86AddressMode AM;
+ if (!X86SelectAddress(C, AM, false))
+ return 0;
+ unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
+ TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
+ unsigned ResultReg = createResultReg(RC);
+ addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
+ return ResultReg;
+}
+
+namespace llvm {
+ llvm::FastISel *X86::createFastISel(MachineFunction &mf,
+ MachineModuleInfo *mmi,
+ DwarfWriter *dw,
+ DenseMap<const Value *, unsigned> &vm,
+ DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
+ DenseMap<const AllocaInst *, int> &am
+#ifndef NDEBUG
+ , SmallSet<Instruction*, 8> &cil
+#endif
+ ) {
+ return new X86FastISel(mf, mmi, dw, vm, bm, am
+#ifndef NDEBUG
+ , cil
+#endif
+ );
+ }
+}
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
new file mode 100644
index 0000000..0f2fbcc
--- /dev/null
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -0,0 +1,1187 @@
+//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which converts floating point instructions from
+// virtual registers into register stack instructions. This pass uses live
+// variable information to indicate where the FPn registers are used and their
+// lifetimes.
+//
+// This pass is hampered by the lack of decent CFG manipulation routines for
+// machine code. In particular, this wants to be able to split critical edges
+// as necessary, traverse the machine basic block CFG in depth-first order, and
+// allow there to be multiple machine basic blocks for each LLVM basicblock
+// (needed for critical edge splitting).
+//
+// In particular, this pass currently barfs on critical edges. Because of this,
+// it requires the instruction selector to insert FP_REG_KILL instructions on
+// the exits of any basic block that has critical edges going from it, or which
+// branch to a critical basic block.
+//
+// FIXME: this is not implemented yet. The stackifier pass only works on local
+// basic blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-codegen"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumFXCH, "Number of fxch instructions inserted");
+STATISTIC(NumFP , "Number of floating point instructions");
+
+namespace {
+ struct VISIBILITY_HIDDEN FPS : public MachineFunctionPass {
+ static char ID;
+ FPS() : MachineFunctionPass(&ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const { return "X86 FP Stackifier"; }
+
+ private:
+ const TargetInstrInfo *TII; // Machine instruction info.
+ MachineBasicBlock *MBB; // Current basic block
+ unsigned Stack[8]; // FP<n> Registers in each stack slot...
+ unsigned RegMap[8]; // Track which stack slot contains each register
+ unsigned StackTop; // The current top of the FP stack.
+
+ void dumpStack() const {
+ cerr << "Stack contents:";
+ for (unsigned i = 0; i != StackTop; ++i) {
+ cerr << " FP" << Stack[i];
+ assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
+ }
+ cerr << "\n";
+ }
+ private:
+ /// isStackEmpty - Return true if the FP stack is empty.
+ bool isStackEmpty() const {
+ return StackTop == 0;
+ }
+
+ // getSlot - Return the stack slot number a particular register number is
+ // in.
+ unsigned getSlot(unsigned RegNo) const {
+ assert(RegNo < 8 && "Regno out of range!");
+ return RegMap[RegNo];
+ }
+
+ // getStackEntry - Return the X86::FP<n> register in register ST(i).
+ unsigned getStackEntry(unsigned STi) const {
+ assert(STi < StackTop && "Access past stack top!");
+ return Stack[StackTop-1-STi];
+ }
+
+ // getSTReg - Return the X86::ST(i) register which contains the specified
+ // FP<RegNo> register.
+ unsigned getSTReg(unsigned RegNo) const {
+ return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0;
+ }
+
+ // pushReg - Push the specified FP<n> register onto the stack.
+ void pushReg(unsigned Reg) {
+ assert(Reg < 8 && "Register number out of range!");
+ assert(StackTop < 8 && "Stack overflow!");
+ Stack[StackTop] = Reg;
+ RegMap[Reg] = StackTop++;
+ }
+
+ bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
+ void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
+ MachineInstr *MI = I;
+ DebugLoc dl = MI->getDebugLoc();
+ if (isAtTop(RegNo)) return;
+
+ unsigned STReg = getSTReg(RegNo);
+ unsigned RegOnTop = getStackEntry(0);
+
+ // Swap the slots the regs are in.
+ std::swap(RegMap[RegNo], RegMap[RegOnTop]);
+
+ // Swap stack slot contents.
+ assert(RegMap[RegOnTop] < StackTop);
+ std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
+
+ // Emit an fxch to update the runtime processors version of the state.
+ BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
+ NumFXCH++;
+ }
+
+ void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) {
+ DebugLoc dl = I->getDebugLoc();
+ unsigned STReg = getSTReg(RegNo);
+ pushReg(AsReg); // New register on top of stack
+
+ BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
+ }
+
+ // popStackAfter - Pop the current value off of the top of the FP stack
+ // after the specified instruction.
+ void popStackAfter(MachineBasicBlock::iterator &I);
+
+ // freeStackSlotAfter - Free the specified register from the register stack,
+ // so that it is no longer in a register. If the register is currently at
+ // the top of the stack, we just pop the current instruction, otherwise we
+ // store the current top-of-stack into the specified slot, then pop the top
+ // of stack.
+ void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
+
+ bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
+
+ void handleZeroArgFP(MachineBasicBlock::iterator &I);
+ void handleOneArgFP(MachineBasicBlock::iterator &I);
+ void handleOneArgFPRW(MachineBasicBlock::iterator &I);
+ void handleTwoArgFP(MachineBasicBlock::iterator &I);
+ void handleCompareFP(MachineBasicBlock::iterator &I);
+ void handleCondMovFP(MachineBasicBlock::iterator &I);
+ void handleSpecialFP(MachineBasicBlock::iterator &I);
+ };
+ char FPS::ID = 0;
+}
+
+FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
+
+/// getFPReg - Return the X86::FPx register number for the specified operand.
+/// For example, this returns 3 for X86::FP3.
+static unsigned getFPReg(const MachineOperand &MO) {
+ assert(MO.isReg() && "Expected an FP register!");
+ unsigned Reg = MO.getReg();
+ assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
+ return Reg - X86::FP0;
+}
+
+
+/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
+/// register references into FP stack references.
+///
+bool FPS::runOnMachineFunction(MachineFunction &MF) {
+ // We only need to run this pass if there are any FP registers used in this
+ // function. If it is all integer, there is nothing for us to do!
+ bool FPIsUsed = false;
+
+ assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!");
+ for (unsigned i = 0; i <= 6; ++i)
+ if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
+ FPIsUsed = true;
+ break;
+ }
+
+ // Early exit.
+ if (!FPIsUsed) return false;
+
+ TII = MF.getTarget().getInstrInfo();
+ StackTop = 0;
+
+ // Process the function in depth first order so that we process at least one
+ // of the predecessors for every reachable block in the function.
+ SmallPtrSet<MachineBasicBlock*, 8> Processed;
+ MachineBasicBlock *Entry = MF.begin();
+
+ bool Changed = false;
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 8> >
+ I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed);
+ I != E; ++I)
+ Changed |= processBasicBlock(MF, **I);
+
+ return Changed;
+}
+
+/// processBasicBlock - Loop over all of the instructions in the basic block,
+/// transforming FP instructions into their stack form.
+///
+bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
+ bool Changed = false;
+ MBB = &BB;
+
+ for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
+ MachineInstr *MI = I;
+ unsigned Flags = MI->getDesc().TSFlags;
+
+ unsigned FPInstClass = Flags & X86II::FPTypeMask;
+ if (MI->getOpcode() == TargetInstrInfo::INLINEASM)
+ FPInstClass = X86II::SpecialFP;
+
+ if (FPInstClass == X86II::NotFP)
+ continue; // Efficiently ignore non-fp insts!
+
+ MachineInstr *PrevMI = 0;
+ if (I != BB.begin())
+ PrevMI = prior(I);
+
+ ++NumFP; // Keep track of # of pseudo instrs
+ DOUT << "\nFPInst:\t" << *MI;
+
+ // Get dead variables list now because the MI pointer may be deleted as part
+ // of processing!
+ SmallVector<unsigned, 8> DeadRegs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDead())
+ DeadRegs.push_back(MO.getReg());
+ }
+
+ switch (FPInstClass) {
+ case X86II::ZeroArgFP: handleZeroArgFP(I); break;
+ case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0)
+ case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
+ case X86II::TwoArgFP: handleTwoArgFP(I); break;
+ case X86II::CompareFP: handleCompareFP(I); break;
+ case X86II::CondMovFP: handleCondMovFP(I); break;
+ case X86II::SpecialFP: handleSpecialFP(I); break;
+ default: assert(0 && "Unknown FP Type!");
+ }
+
+ // Check to see if any of the values defined by this instruction are dead
+ // after definition. If so, pop them.
+ for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
+ unsigned Reg = DeadRegs[i];
+ if (Reg >= X86::FP0 && Reg <= X86::FP6) {
+ DOUT << "Register FP#" << Reg-X86::FP0 << " is dead!\n";
+ freeStackSlotAfter(I, Reg-X86::FP0);
+ }
+ }
+
+ // Print out all of the instructions expanded to if -debug
+ DEBUG(
+ MachineBasicBlock::iterator PrevI(PrevMI);
+ if (I == PrevI) {
+ cerr << "Just deleted pseudo instruction\n";
+ } else {
+ MachineBasicBlock::iterator Start = I;
+ // Rewind to first instruction newly inserted.
+ while (Start != BB.begin() && prior(Start) != PrevI) --Start;
+ cerr << "Inserted instructions:\n\t";
+ Start->print(*cerr.stream(), &MF.getTarget());
+ while (++Start != next(I)) {}
+ }
+ dumpStack();
+ );
+
+ Changed = true;
+ }
+
+ assert(isStackEmpty() && "Stack not empty at end of basic block?");
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Efficient Lookup Table Support
+//===----------------------------------------------------------------------===//
+
+namespace {
+ struct TableEntry {
+ unsigned from;
+ unsigned to;
+ bool operator<(const TableEntry &TE) const { return from < TE.from; }
+ friend bool operator<(const TableEntry &TE, unsigned V) {
+ return TE.from < V;
+ }
+ friend bool operator<(unsigned V, const TableEntry &TE) {
+ return V < TE.from;
+ }
+ };
+}
+
+#ifndef NDEBUG
+static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
+ for (unsigned i = 0; i != NumEntries-1; ++i)
+ if (!(Table[i] < Table[i+1])) return false;
+ return true;
+}
+#endif
+
+static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) {
+ const TableEntry *I = std::lower_bound(Table, Table+N, Opcode);
+ if (I != Table+N && I->from == Opcode)
+ return I->to;
+ return -1;
+}
+
+#ifdef NDEBUG
+#define ASSERT_SORTED(TABLE)
+#else
+#define ASSERT_SORTED(TABLE) \
+ { static bool TABLE##Checked = false; \
+ if (!TABLE##Checked) { \
+ assert(TableIsSorted(TABLE, array_lengthof(TABLE)) && \
+ "All lookup tables must be sorted for efficient access!"); \
+ TABLE##Checked = true; \
+ } \
+ }
+#endif
+
+//===----------------------------------------------------------------------===//
+// Register File -> Register Stack Mapping Methods
+//===----------------------------------------------------------------------===//
+
+// OpcodeTable - Sorted map of register instructions to their stack version.
+// The first element is an register file pseudo instruction, the second is the
+// concrete X86 instruction which uses the register stack.
+//
+static const TableEntry OpcodeTable[] = {
+ { X86::ABS_Fp32 , X86::ABS_F },
+ { X86::ABS_Fp64 , X86::ABS_F },
+ { X86::ABS_Fp80 , X86::ABS_F },
+ { X86::ADD_Fp32m , X86::ADD_F32m },
+ { X86::ADD_Fp64m , X86::ADD_F64m },
+ { X86::ADD_Fp64m32 , X86::ADD_F32m },
+ { X86::ADD_Fp80m32 , X86::ADD_F32m },
+ { X86::ADD_Fp80m64 , X86::ADD_F64m },
+ { X86::ADD_FpI16m32 , X86::ADD_FI16m },
+ { X86::ADD_FpI16m64 , X86::ADD_FI16m },
+ { X86::ADD_FpI16m80 , X86::ADD_FI16m },
+ { X86::ADD_FpI32m32 , X86::ADD_FI32m },
+ { X86::ADD_FpI32m64 , X86::ADD_FI32m },
+ { X86::ADD_FpI32m80 , X86::ADD_FI32m },
+ { X86::CHS_Fp32 , X86::CHS_F },
+ { X86::CHS_Fp64 , X86::CHS_F },
+ { X86::CHS_Fp80 , X86::CHS_F },
+ { X86::CMOVBE_Fp32 , X86::CMOVBE_F },
+ { X86::CMOVBE_Fp64 , X86::CMOVBE_F },
+ { X86::CMOVBE_Fp80 , X86::CMOVBE_F },
+ { X86::CMOVB_Fp32 , X86::CMOVB_F },
+ { X86::CMOVB_Fp64 , X86::CMOVB_F },
+ { X86::CMOVB_Fp80 , X86::CMOVB_F },
+ { X86::CMOVE_Fp32 , X86::CMOVE_F },
+ { X86::CMOVE_Fp64 , X86::CMOVE_F },
+ { X86::CMOVE_Fp80 , X86::CMOVE_F },
+ { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
+ { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
+ { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
+ { X86::CMOVNB_Fp32 , X86::CMOVNB_F },
+ { X86::CMOVNB_Fp64 , X86::CMOVNB_F },
+ { X86::CMOVNB_Fp80 , X86::CMOVNB_F },
+ { X86::CMOVNE_Fp32 , X86::CMOVNE_F },
+ { X86::CMOVNE_Fp64 , X86::CMOVNE_F },
+ { X86::CMOVNE_Fp80 , X86::CMOVNE_F },
+ { X86::CMOVNP_Fp32 , X86::CMOVNP_F },
+ { X86::CMOVNP_Fp64 , X86::CMOVNP_F },
+ { X86::CMOVNP_Fp80 , X86::CMOVNP_F },
+ { X86::CMOVP_Fp32 , X86::CMOVP_F },
+ { X86::CMOVP_Fp64 , X86::CMOVP_F },
+ { X86::CMOVP_Fp80 , X86::CMOVP_F },
+ { X86::COS_Fp32 , X86::COS_F },
+ { X86::COS_Fp64 , X86::COS_F },
+ { X86::COS_Fp80 , X86::COS_F },
+ { X86::DIVR_Fp32m , X86::DIVR_F32m },
+ { X86::DIVR_Fp64m , X86::DIVR_F64m },
+ { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
+ { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
+ { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
+ { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
+ { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
+ { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
+ { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
+ { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
+ { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
+ { X86::DIV_Fp32m , X86::DIV_F32m },
+ { X86::DIV_Fp64m , X86::DIV_F64m },
+ { X86::DIV_Fp64m32 , X86::DIV_F32m },
+ { X86::DIV_Fp80m32 , X86::DIV_F32m },
+ { X86::DIV_Fp80m64 , X86::DIV_F64m },
+ { X86::DIV_FpI16m32 , X86::DIV_FI16m },
+ { X86::DIV_FpI16m64 , X86::DIV_FI16m },
+ { X86::DIV_FpI16m80 , X86::DIV_FI16m },
+ { X86::DIV_FpI32m32 , X86::DIV_FI32m },
+ { X86::DIV_FpI32m64 , X86::DIV_FI32m },
+ { X86::DIV_FpI32m80 , X86::DIV_FI32m },
+ { X86::ILD_Fp16m32 , X86::ILD_F16m },
+ { X86::ILD_Fp16m64 , X86::ILD_F16m },
+ { X86::ILD_Fp16m80 , X86::ILD_F16m },
+ { X86::ILD_Fp32m32 , X86::ILD_F32m },
+ { X86::ILD_Fp32m64 , X86::ILD_F32m },
+ { X86::ILD_Fp32m80 , X86::ILD_F32m },
+ { X86::ILD_Fp64m32 , X86::ILD_F64m },
+ { X86::ILD_Fp64m64 , X86::ILD_F64m },
+ { X86::ILD_Fp64m80 , X86::ILD_F64m },
+ { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
+ { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
+ { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
+ { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
+ { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
+ { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
+ { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
+ { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
+ { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
+ { X86::IST_Fp16m32 , X86::IST_F16m },
+ { X86::IST_Fp16m64 , X86::IST_F16m },
+ { X86::IST_Fp16m80 , X86::IST_F16m },
+ { X86::IST_Fp32m32 , X86::IST_F32m },
+ { X86::IST_Fp32m64 , X86::IST_F32m },
+ { X86::IST_Fp32m80 , X86::IST_F32m },
+ { X86::IST_Fp64m32 , X86::IST_FP64m },
+ { X86::IST_Fp64m64 , X86::IST_FP64m },
+ { X86::IST_Fp64m80 , X86::IST_FP64m },
+ { X86::LD_Fp032 , X86::LD_F0 },
+ { X86::LD_Fp064 , X86::LD_F0 },
+ { X86::LD_Fp080 , X86::LD_F0 },
+ { X86::LD_Fp132 , X86::LD_F1 },
+ { X86::LD_Fp164 , X86::LD_F1 },
+ { X86::LD_Fp180 , X86::LD_F1 },
+ { X86::LD_Fp32m , X86::LD_F32m },
+ { X86::LD_Fp32m64 , X86::LD_F32m },
+ { X86::LD_Fp32m80 , X86::LD_F32m },
+ { X86::LD_Fp64m , X86::LD_F64m },
+ { X86::LD_Fp64m80 , X86::LD_F64m },
+ { X86::LD_Fp80m , X86::LD_F80m },
+ { X86::MUL_Fp32m , X86::MUL_F32m },
+ { X86::MUL_Fp64m , X86::MUL_F64m },
+ { X86::MUL_Fp64m32 , X86::MUL_F32m },
+ { X86::MUL_Fp80m32 , X86::MUL_F32m },
+ { X86::MUL_Fp80m64 , X86::MUL_F64m },
+ { X86::MUL_FpI16m32 , X86::MUL_FI16m },
+ { X86::MUL_FpI16m64 , X86::MUL_FI16m },
+ { X86::MUL_FpI16m80 , X86::MUL_FI16m },
+ { X86::MUL_FpI32m32 , X86::MUL_FI32m },
+ { X86::MUL_FpI32m64 , X86::MUL_FI32m },
+ { X86::MUL_FpI32m80 , X86::MUL_FI32m },
+ { X86::SIN_Fp32 , X86::SIN_F },
+ { X86::SIN_Fp64 , X86::SIN_F },
+ { X86::SIN_Fp80 , X86::SIN_F },
+ { X86::SQRT_Fp32 , X86::SQRT_F },
+ { X86::SQRT_Fp64 , X86::SQRT_F },
+ { X86::SQRT_Fp80 , X86::SQRT_F },
+ { X86::ST_Fp32m , X86::ST_F32m },
+ { X86::ST_Fp64m , X86::ST_F64m },
+ { X86::ST_Fp64m32 , X86::ST_F32m },
+ { X86::ST_Fp80m32 , X86::ST_F32m },
+ { X86::ST_Fp80m64 , X86::ST_F64m },
+ { X86::ST_FpP80m , X86::ST_FP80m },
+ { X86::SUBR_Fp32m , X86::SUBR_F32m },
+ { X86::SUBR_Fp64m , X86::SUBR_F64m },
+ { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
+ { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
+ { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
+ { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
+ { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
+ { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
+ { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
+ { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
+ { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
+ { X86::SUB_Fp32m , X86::SUB_F32m },
+ { X86::SUB_Fp64m , X86::SUB_F64m },
+ { X86::SUB_Fp64m32 , X86::SUB_F32m },
+ { X86::SUB_Fp80m32 , X86::SUB_F32m },
+ { X86::SUB_Fp80m64 , X86::SUB_F64m },
+ { X86::SUB_FpI16m32 , X86::SUB_FI16m },
+ { X86::SUB_FpI16m64 , X86::SUB_FI16m },
+ { X86::SUB_FpI16m80 , X86::SUB_FI16m },
+ { X86::SUB_FpI32m32 , X86::SUB_FI32m },
+ { X86::SUB_FpI32m64 , X86::SUB_FI32m },
+ { X86::SUB_FpI32m80 , X86::SUB_FI32m },
+ { X86::TST_Fp32 , X86::TST_F },
+ { X86::TST_Fp64 , X86::TST_F },
+ { X86::TST_Fp80 , X86::TST_F },
+ { X86::UCOM_FpIr32 , X86::UCOM_FIr },
+ { X86::UCOM_FpIr64 , X86::UCOM_FIr },
+ { X86::UCOM_FpIr80 , X86::UCOM_FIr },
+ { X86::UCOM_Fpr32 , X86::UCOM_Fr },
+ { X86::UCOM_Fpr64 , X86::UCOM_Fr },
+ { X86::UCOM_Fpr80 , X86::UCOM_Fr },
+};
+
+static unsigned getConcreteOpcode(unsigned Opcode) {
+ ASSERT_SORTED(OpcodeTable);
+ int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode);
+ assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
+ return Opc;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Methods
+//===----------------------------------------------------------------------===//
+
+// PopTable - Sorted map of instructions to their popping version. The first
+// element is an instruction, the second is the version which pops.
+//
+static const TableEntry PopTable[] = {
+ { X86::ADD_FrST0 , X86::ADD_FPrST0 },
+
+ { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
+ { X86::DIV_FrST0 , X86::DIV_FPrST0 },
+
+ { X86::IST_F16m , X86::IST_FP16m },
+ { X86::IST_F32m , X86::IST_FP32m },
+
+ { X86::MUL_FrST0 , X86::MUL_FPrST0 },
+
+ { X86::ST_F32m , X86::ST_FP32m },
+ { X86::ST_F64m , X86::ST_FP64m },
+ { X86::ST_Frr , X86::ST_FPrr },
+
+ { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
+ { X86::SUB_FrST0 , X86::SUB_FPrST0 },
+
+ { X86::UCOM_FIr , X86::UCOM_FIPr },
+
+ { X86::UCOM_FPr , X86::UCOM_FPPr },
+ { X86::UCOM_Fr , X86::UCOM_FPr },
+};
+
+/// popStackAfter - Pop the current value off of the top of the FP stack after
+/// the specified instruction. This attempts to be sneaky and combine the pop
+/// into the instruction itself if possible. The iterator is left pointing to
+/// the last instruction, be it a new pop instruction inserted, or the old
+/// instruction if it was modified in place.
+///
+void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
+ MachineInstr* MI = I;
+ DebugLoc dl = MI->getDebugLoc();
+ ASSERT_SORTED(PopTable);
+ assert(StackTop > 0 && "Cannot pop empty stack!");
+ RegMap[Stack[--StackTop]] = ~0; // Update state
+
+ // Check to see if there is a popping version of this instruction...
+ int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode());
+ if (Opcode != -1) {
+ I->setDesc(TII->get(Opcode));
+ if (Opcode == X86::UCOM_FPPr)
+ I->RemoveOperand(0);
+ } else { // Insert an explicit pop
+ I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
+ }
+}
+
+/// freeStackSlotAfter - Free the specified register from the register stack, so
+/// that it is no longer in a register. If the register is currently at the top
+/// of the stack, we just pop the current instruction, otherwise we store the
+/// current top-of-stack into the specified slot, then pop the top of stack.
+void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
+ if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy.
+ popStackAfter(I);
+ return;
+ }
+
+ // Otherwise, store the top of stack into the dead slot, killing the operand
+ // without having to add in an explicit xchg then pop.
+ //
+ unsigned STReg = getSTReg(FPRegNo);
+ unsigned OldSlot = getSlot(FPRegNo);
+ unsigned TopReg = Stack[StackTop-1];
+ Stack[OldSlot] = TopReg;
+ RegMap[TopReg] = OldSlot;
+ RegMap[FPRegNo] = ~0;
+ Stack[--StackTop] = ~0;
+ MachineInstr *MI = I;
+ DebugLoc dl = MI->getDebugLoc();
+ I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(STReg);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Instruction transformation implementation
+//===----------------------------------------------------------------------===//
+
+/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem>
+///
+void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+ unsigned DestReg = getFPReg(MI->getOperand(0));
+
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(0); // Remove the explicit ST(0) operand
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ // Result gets pushed on the stack.
+ pushReg(DestReg);
+}
+
+/// handleOneArgFP - fst <mem>, ST(0)
+///
+void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ assert((NumOps == X86AddrNumOperands + 1 || NumOps == 1) &&
+ "Can only handle fst* & ftst instructions!");
+
+ // Is this the last use of the source register?
+ unsigned Reg = getFPReg(MI->getOperand(NumOps-1));
+ bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
+
+ // FISTP64m is strange because there isn't a non-popping versions.
+ // If we have one _and_ we don't want to pop the operand, duplicate the value
+ // on the stack instead of moving it. This ensure that popping the value is
+ // always ok.
+ // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
+ //
+ if (!KillsSrc &&
+ (MI->getOpcode() == X86::IST_Fp64m32 ||
+ MI->getOpcode() == X86::ISTT_Fp16m32 ||
+ MI->getOpcode() == X86::ISTT_Fp32m32 ||
+ MI->getOpcode() == X86::ISTT_Fp64m32 ||
+ MI->getOpcode() == X86::IST_Fp64m64 ||
+ MI->getOpcode() == X86::ISTT_Fp16m64 ||
+ MI->getOpcode() == X86::ISTT_Fp32m64 ||
+ MI->getOpcode() == X86::ISTT_Fp64m64 ||
+ MI->getOpcode() == X86::IST_Fp64m80 ||
+ MI->getOpcode() == X86::ISTT_Fp16m80 ||
+ MI->getOpcode() == X86::ISTT_Fp32m80 ||
+ MI->getOpcode() == X86::ISTT_Fp64m80 ||
+ MI->getOpcode() == X86::ST_FpP80m)) {
+ duplicateToTop(Reg, 7 /*temp register*/, I);
+ } else {
+ moveToTop(Reg, I); // Move to the top of the stack...
+ }
+
+ // Convert from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ if (MI->getOpcode() == X86::IST_FP64m ||
+ MI->getOpcode() == X86::ISTT_FP16m ||
+ MI->getOpcode() == X86::ISTT_FP32m ||
+ MI->getOpcode() == X86::ISTT_FP64m ||
+ MI->getOpcode() == X86::ST_FP80m) {
+ assert(StackTop > 0 && "Stack empty??");
+ --StackTop;
+ } else if (KillsSrc) { // Last use of operand?
+ popStackAfter(I);
+ }
+}
+
+
+/// handleOneArgFPRW: Handle instructions that read from the top of stack and
+/// replace the value with a newly computed value. These instructions may have
+/// non-fp operands after their FP operands.
+///
+/// Examples:
+/// R1 = fchs R2
+/// R1 = fadd R2, [mem]
+///
+void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+#ifndef NDEBUG
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
+#endif
+
+ // Is this the last use of the source register?
+ unsigned Reg = getFPReg(MI->getOperand(1));
+ bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
+
+ if (KillsSrc) {
+ // If this is the last use of the source register, just make sure it's on
+ // the top of the stack.
+ moveToTop(Reg, I);
+ assert(StackTop > 0 && "Stack cannot be empty!");
+ --StackTop;
+ pushReg(getFPReg(MI->getOperand(0)));
+ } else {
+ // If this is not the last use of the source register, _copy_ it to the top
+ // of the stack.
+ duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I);
+ }
+
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(1); // Drop the source operand.
+ MI->RemoveOperand(0); // Drop the destination operand.
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define tables of various ways to map pseudo instructions
+//
+
+// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i)
+static const TableEntry ForwardST0Table[] = {
+ { X86::ADD_Fp32 , X86::ADD_FST0r },
+ { X86::ADD_Fp64 , X86::ADD_FST0r },
+ { X86::ADD_Fp80 , X86::ADD_FST0r },
+ { X86::DIV_Fp32 , X86::DIV_FST0r },
+ { X86::DIV_Fp64 , X86::DIV_FST0r },
+ { X86::DIV_Fp80 , X86::DIV_FST0r },
+ { X86::MUL_Fp32 , X86::MUL_FST0r },
+ { X86::MUL_Fp64 , X86::MUL_FST0r },
+ { X86::MUL_Fp80 , X86::MUL_FST0r },
+ { X86::SUB_Fp32 , X86::SUB_FST0r },
+ { X86::SUB_Fp64 , X86::SUB_FST0r },
+ { X86::SUB_Fp80 , X86::SUB_FST0r },
+};
+
+// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0)
+static const TableEntry ReverseST0Table[] = {
+ { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative
+ { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative
+ { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative
+ { X86::DIV_Fp32 , X86::DIVR_FST0r },
+ { X86::DIV_Fp64 , X86::DIVR_FST0r },
+ { X86::DIV_Fp80 , X86::DIVR_FST0r },
+ { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative
+ { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative
+ { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative
+ { X86::SUB_Fp32 , X86::SUBR_FST0r },
+ { X86::SUB_Fp64 , X86::SUBR_FST0r },
+ { X86::SUB_Fp80 , X86::SUBR_FST0r },
+};
+
+// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i)
+static const TableEntry ForwardSTiTable[] = {
+ { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative
+ { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative
+ { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative
+ { X86::DIV_Fp32 , X86::DIVR_FrST0 },
+ { X86::DIV_Fp64 , X86::DIVR_FrST0 },
+ { X86::DIV_Fp80 , X86::DIVR_FrST0 },
+ { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative
+ { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative
+ { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative
+ { X86::SUB_Fp32 , X86::SUBR_FrST0 },
+ { X86::SUB_Fp64 , X86::SUBR_FrST0 },
+ { X86::SUB_Fp80 , X86::SUBR_FrST0 },
+};
+
+// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0)
+static const TableEntry ReverseSTiTable[] = {
+ { X86::ADD_Fp32 , X86::ADD_FrST0 },
+ { X86::ADD_Fp64 , X86::ADD_FrST0 },
+ { X86::ADD_Fp80 , X86::ADD_FrST0 },
+ { X86::DIV_Fp32 , X86::DIV_FrST0 },
+ { X86::DIV_Fp64 , X86::DIV_FrST0 },
+ { X86::DIV_Fp80 , X86::DIV_FrST0 },
+ { X86::MUL_Fp32 , X86::MUL_FrST0 },
+ { X86::MUL_Fp64 , X86::MUL_FrST0 },
+ { X86::MUL_Fp80 , X86::MUL_FrST0 },
+ { X86::SUB_Fp32 , X86::SUB_FrST0 },
+ { X86::SUB_Fp64 , X86::SUB_FrST0 },
+ { X86::SUB_Fp80 , X86::SUB_FrST0 },
+};
+
+
+/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
+/// instructions which need to be simplified and possibly transformed.
+///
+/// Result: ST(0) = fsub ST(0), ST(i)
+/// ST(i) = fsub ST(0), ST(i)
+/// ST(0) = fsubr ST(0), ST(i)
+/// ST(i) = fsubr ST(0), ST(i)
+///
+void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
+ ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
+ ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
+ MachineInstr *MI = I;
+
+ unsigned NumOperands = MI->getDesc().getNumOperands();
+ assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
+ unsigned Dest = getFPReg(MI->getOperand(0));
+ unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
+ unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
+ bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
+ bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
+ DebugLoc dl = MI->getDebugLoc();
+
+ unsigned TOS = getStackEntry(0);
+
+ // One of our operands must be on the top of the stack. If neither is yet, we
+ // need to move one.
+ if (Op0 != TOS && Op1 != TOS) { // No operand at TOS?
+ // We can choose to move either operand to the top of the stack. If one of
+ // the operands is killed by this instruction, we want that one so that we
+ // can update right on top of the old version.
+ if (KillsOp0) {
+ moveToTop(Op0, I); // Move dead operand to TOS.
+ TOS = Op0;
+ } else if (KillsOp1) {
+ moveToTop(Op1, I);
+ TOS = Op1;
+ } else {
+ // All of the operands are live after this instruction executes, so we
+ // cannot update on top of any operand. Because of this, we must
+ // duplicate one of the stack elements to the top. It doesn't matter
+ // which one we pick.
+ //
+ duplicateToTop(Op0, Dest, I);
+ Op0 = TOS = Dest;
+ KillsOp0 = true;
+ }
+ } else if (!KillsOp0 && !KillsOp1) {
+ // If we DO have one of our operands at the top of the stack, but we don't
+ // have a dead operand, we must duplicate one of the operands to a new slot
+ // on the stack.
+ duplicateToTop(Op0, Dest, I);
+ Op0 = TOS = Dest;
+ KillsOp0 = true;
+ }
+
+ // Now we know that one of our operands is on the top of the stack, and at
+ // least one of our operands is killed by this instruction.
+ assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
+ "Stack conditions not set up right!");
+
+ // We decide which form to use based on what is on the top of the stack, and
+ // which operand is killed by this instruction.
+ const TableEntry *InstTable;
+ bool isForward = TOS == Op0;
+ bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
+ if (updateST0) {
+ if (isForward)
+ InstTable = ForwardST0Table;
+ else
+ InstTable = ReverseST0Table;
+ } else {
+ if (isForward)
+ InstTable = ForwardSTiTable;
+ else
+ InstTable = ReverseSTiTable;
+ }
+
+ int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table),
+ MI->getOpcode());
+ assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
+
+ // NotTOS - The register which is not on the top of stack...
+ unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
+
+ // Replace the old instruction with a new instruction
+ MBB->remove(I++);
+ I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
+
+ // If both operands are killed, pop one off of the stack in addition to
+ // overwriting the other one.
+ if (KillsOp0 && KillsOp1 && Op0 != Op1) {
+ assert(!updateST0 && "Should have updated other operand!");
+ popStackAfter(I); // Pop the top of stack
+ }
+
+ // Update stack information so that we know the destination register is now on
+ // the stack.
+ unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
+ assert(UpdatedSlot < StackTop && Dest < 7);
+ Stack[UpdatedSlot] = Dest;
+ RegMap[Dest] = UpdatedSlot;
+ MBB->getParent()->DeleteMachineInstr(MI); // Remove the old instruction
+}
+
+/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
+/// register arguments and no explicit destinations.
+///
+void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
+ ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
+ ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
+ MachineInstr *MI = I;
+
+ unsigned NumOperands = MI->getDesc().getNumOperands();
+ assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
+ unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
+ unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
+ bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
+ bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
+
+ // Make sure the first operand is on the top of stack, the other one can be
+ // anywhere.
+ moveToTop(Op0, I);
+
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->getOperand(0).setReg(getSTReg(Op1));
+ MI->RemoveOperand(1);
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ // If any of the operands are killed by this instruction, free them.
+ if (KillsOp0) freeStackSlotAfter(I, Op0);
+ if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
+}
+
+/// handleCondMovFP - Handle two address conditional move instructions. These
+/// instructions move a st(i) register to st(0) iff a condition is true. These
+/// instructions require that the first operand is at the top of the stack, but
+/// otherwise don't modify the stack at all.
+void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+
+ unsigned Op0 = getFPReg(MI->getOperand(0));
+ unsigned Op1 = getFPReg(MI->getOperand(2));
+ bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
+
+ // The first operand *must* be on the top of the stack.
+ moveToTop(Op0, I);
+
+ // Change the second operand to the stack register that the operand is in.
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(0);
+ MI->RemoveOperand(1);
+ MI->getOperand(0).setReg(getSTReg(Op1));
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ // If we kill the second operand, make sure to pop it from the stack.
+ if (Op0 != Op1 && KillsOp1) {
+ // Get this value off of the register stack.
+ freeStackSlotAfter(I, Op1);
+ }
+}
+
+
+/// handleSpecialFP - Handle special instructions which behave unlike other
+/// floating point instructions. This is primarily intended for use by pseudo
+/// instructions.
+///
+void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+ DebugLoc dl = MI->getDebugLoc();
+ switch (MI->getOpcode()) {
+ default: assert(0 && "Unknown SpecialFP instruction!");
+ case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type!
+ case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type!
+ case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type!
+ assert(StackTop == 0 && "Stack should be empty after a call!");
+ pushReg(getFPReg(MI->getOperand(0)));
+ break;
+ case X86::FpGET_ST1_32:// Appears immediately after a call returning FP type!
+ case X86::FpGET_ST1_64:// Appears immediately after a call returning FP type!
+ case X86::FpGET_ST1_80:{// Appears immediately after a call returning FP type!
+ // FpGET_ST1 should occur right after a FpGET_ST0 for a call or inline asm.
+ // The pattern we expect is:
+ // CALL
+ // FP1 = FpGET_ST0
+ // FP4 = FpGET_ST1
+ //
+ // At this point, we've pushed FP1 on the top of stack, so it should be
+ // present if it isn't dead. If it was dead, we already emitted a pop to
+ // remove it from the stack and StackTop = 0.
+
+ // Push FP4 as top of stack next.
+ pushReg(getFPReg(MI->getOperand(0)));
+
+ // If StackTop was 0 before we pushed our operand, then ST(0) must have been
+ // dead. In this case, the ST(1) value is the only thing that is live, so
+ // it should be on the TOS (after the pop that was emitted) and is. Just
+ // continue in this case.
+ if (StackTop == 1)
+ break;
+
+ // Because pushReg just pushed ST(1) as TOS, we now have to swap the two top
+ // elements so that our accounting is correct.
+ unsigned RegOnTop = getStackEntry(0);
+ unsigned RegNo = getStackEntry(1);
+
+ // Swap the slots the regs are in.
+ std::swap(RegMap[RegNo], RegMap[RegOnTop]);
+
+ // Swap stack slot contents.
+ assert(RegMap[RegOnTop] < StackTop);
+ std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
+ break;
+ }
+ case X86::FpSET_ST0_32:
+ case X86::FpSET_ST0_64:
+ case X86::FpSET_ST0_80:
+ assert((StackTop == 1 || StackTop == 2)
+ && "Stack should have one or two element on it to return!");
+ --StackTop; // "Forget" we have something on the top of stack!
+ break;
+ case X86::FpSET_ST1_32:
+ case X86::FpSET_ST1_64:
+ case X86::FpSET_ST1_80:
+ // StackTop can be 1 if a FpSET_ST0_* was before this. Exchange them.
+ if (StackTop == 1) {
+ BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(X86::ST1);
+ NumFXCH++;
+ StackTop = 0;
+ break;
+ }
+ assert(StackTop == 2 && "Stack should have two element on it to return!");
+ --StackTop; // "Forget" we have something on the top of stack!
+ break;
+ case X86::MOV_Fp3232:
+ case X86::MOV_Fp3264:
+ case X86::MOV_Fp6432:
+ case X86::MOV_Fp6464:
+ case X86::MOV_Fp3280:
+ case X86::MOV_Fp6480:
+ case X86::MOV_Fp8032:
+ case X86::MOV_Fp8064:
+ case X86::MOV_Fp8080: {
+ const MachineOperand &MO1 = MI->getOperand(1);
+ unsigned SrcReg = getFPReg(MO1);
+
+ const MachineOperand &MO0 = MI->getOperand(0);
+ // These can be created due to inline asm. Two address pass can introduce
+ // copies from RFP registers to virtual registers.
+ if (MO0.getReg() == X86::ST0 && SrcReg == 0) {
+ assert(MO1.isKill());
+ // Treat %ST0<def> = MOV_Fp8080 %FP0<kill>
+ // like FpSET_ST0_80 %FP0<kill>, %ST0<imp-def>
+ assert((StackTop == 1 || StackTop == 2)
+ && "Stack should have one or two element on it to return!");
+ --StackTop; // "Forget" we have something on the top of stack!
+ break;
+ } else if (MO0.getReg() == X86::ST1 && SrcReg == 1) {
+ assert(MO1.isKill());
+ // Treat %ST1<def> = MOV_Fp8080 %FP1<kill>
+ // like FpSET_ST1_80 %FP0<kill>, %ST1<imp-def>
+ // StackTop can be 1 if a FpSET_ST0_* was before this. Exchange them.
+ if (StackTop == 1) {
+ BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(X86::ST1);
+ NumFXCH++;
+ StackTop = 0;
+ break;
+ }
+ assert(StackTop == 2 && "Stack should have two element on it to return!");
+ --StackTop; // "Forget" we have something on the top of stack!
+ break;
+ }
+
+ unsigned DestReg = getFPReg(MO0);
+ if (MI->killsRegister(X86::FP0+SrcReg)) {
+ // If the input operand is killed, we can just change the owner of the
+ // incoming stack slot into the result.
+ unsigned Slot = getSlot(SrcReg);
+ assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!");
+ Stack[Slot] = DestReg;
+ RegMap[DestReg] = Slot;
+
+ } else {
+ // For FMOV we just duplicate the specified value to a new stack slot.
+ // This could be made better, but would require substantial changes.
+ duplicateToTop(SrcReg, DestReg, I);
+ }
+ }
+ break;
+ case TargetInstrInfo::INLINEASM: {
+ // The inline asm MachineInstr currently only *uses* FP registers for the
+ // 'f' constraint. These should be turned into the current ST(x) register
+ // in the machine instr. Also, any kills should be explicitly popped after
+ // the inline asm.
+ unsigned Kills[7];
+ unsigned NumKills = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
+ continue;
+ assert(Op.isUse() && "Only handle inline asm uses right now");
+
+ unsigned FPReg = getFPReg(Op);
+ Op.setReg(getSTReg(FPReg));
+
+ // If we kill this operand, make sure to pop it from the stack after the
+ // asm. We just remember it for now, and pop them all off at the end in
+ // a batch.
+ if (Op.isKill())
+ Kills[NumKills++] = FPReg;
+ }
+
+ // If this asm kills any FP registers (is the last use of them) we must
+ // explicitly emit pop instructions for them. Do this now after the asm has
+ // executed so that the ST(x) numbers are not off (which would happen if we
+ // did this inline with operand rewriting).
+ //
+ // Note: this might be a non-optimal pop sequence. We might be able to do
+ // better by trying to pop in stack order or something.
+ MachineBasicBlock::iterator InsertPt = MI;
+ while (NumKills)
+ freeStackSlotAfter(InsertPt, Kills[--NumKills]);
+
+ // Don't delete the inline asm!
+ return;
+ }
+
+ case X86::RET:
+ case X86::RETI:
+ // If RET has an FP register use operand, pass the first one in ST(0) and
+ // the second one in ST(1).
+ if (isStackEmpty()) return; // Quick check to see if any are possible.
+
+ // Find the register operands.
+ unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
+ continue;
+ // FP Register uses must be kills unless there are two uses of the same
+ // register, in which case only one will be a kill.
+ assert(Op.isUse() &&
+ (Op.isKill() || // Marked kill.
+ getFPReg(Op) == FirstFPRegOp || // Second instance.
+ MI->killsRegister(Op.getReg())) && // Later use is marked kill.
+ "Ret only defs operands, and values aren't live beyond it");
+
+ if (FirstFPRegOp == ~0U)
+ FirstFPRegOp = getFPReg(Op);
+ else {
+ assert(SecondFPRegOp == ~0U && "More than two fp operands!");
+ SecondFPRegOp = getFPReg(Op);
+ }
+
+ // Remove the operand so that later passes don't see it.
+ MI->RemoveOperand(i);
+ --i, --e;
+ }
+
+ // There are only four possibilities here:
+ // 1) we are returning a single FP value. In this case, it has to be in
+ // ST(0) already, so just declare success by removing the value from the
+ // FP Stack.
+ if (SecondFPRegOp == ~0U) {
+ // Assert that the top of stack contains the right FP register.
+ assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
+ "Top of stack not the right register for RET!");
+
+ // Ok, everything is good, mark the value as not being on the stack
+ // anymore so that our assertion about the stack being empty at end of
+ // block doesn't fire.
+ StackTop = 0;
+ return;
+ }
+
+ // Otherwise, we are returning two values:
+ // 2) If returning the same value for both, we only have one thing in the FP
+ // stack. Consider: RET FP1, FP1
+ if (StackTop == 1) {
+ assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
+ "Stack misconfiguration for RET!");
+
+ // Duplicate the TOS so that we return it twice. Just pick some other FPx
+ // register to hold it.
+ unsigned NewReg = (FirstFPRegOp+1)%7;
+ duplicateToTop(FirstFPRegOp, NewReg, MI);
+ FirstFPRegOp = NewReg;
+ }
+
+ /// Okay we know we have two different FPx operands now:
+ assert(StackTop == 2 && "Must have two values live!");
+
+ /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
+ /// in ST(1). In this case, emit an fxch.
+ if (getStackEntry(0) == SecondFPRegOp) {
+ assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
+ moveToTop(FirstFPRegOp, MI);
+ }
+
+ /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
+ /// ST(1). Just remove both from our understanding of the stack and return.
+ assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
+ assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");
+ StackTop = 0;
+ return;
+ }
+
+ I = MBB->erase(I); // Remove the pseudo instruction
+ --I;
+}
diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp
new file mode 100644
index 0000000..009846e
--- /dev/null
+++ b/lib/Target/X86/X86FloatingPointRegKill.cpp
@@ -0,0 +1,139 @@
+//===-- X86FloatingPoint.cpp - FP_REG_KILL inserter -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which inserts FP_REG_KILL instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-codegen"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumFPKill, "Number of FP_REG_KILL instructions added");
+
+namespace {
+ struct VISIBILITY_HIDDEN FPRegKiller : public MachineFunctionPass {
+ static char ID;
+ FPRegKiller() : MachineFunctionPass(&ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const { return "X86 FP_REG_KILL inserter"; }
+ };
+ char FPRegKiller::ID = 0;
+}
+
+FunctionPass *llvm::createX87FPRegKillInserterPass() { return new FPRegKiller(); }
+
+bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
+ // If we are emitting FP stack code, scan the basic block to determine if this
+ // block defines any FP values. If so, put an FP_REG_KILL instruction before
+ // the terminator of the block.
+
+ // Note that FP stack instructions are used in all modes for long double,
+ // so we always need to do this check.
+ // Also note that it's possible for an FP stack register to be live across
+ // an instruction that produces multiple basic blocks (SSE CMOV) so we
+ // must check all the generated basic blocks.
+
+ // Scan all of the machine instructions in these MBBs, checking for FP
+ // stores. (RFP32 and RFP64 will not exist in SSE mode, but RFP80 might.)
+
+ // Fast-path: If nothing is using the x87 registers, we don't need to do
+ // any scanning.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (MRI.getRegClassVirtRegs(X86::RFP80RegisterClass).empty() &&
+ MRI.getRegClassVirtRegs(X86::RFP64RegisterClass).empty() &&
+ MRI.getRegClassVirtRegs(X86::RFP32RegisterClass).empty())
+ return false;
+
+ bool Changed = false;
+ const X86Subtarget &Subtarget = MF.getTarget().getSubtarget<X86Subtarget>();
+ MachineFunction::iterator MBBI = MF.begin();
+ MachineFunction::iterator EndMBB = MF.end();
+ for (; MBBI != EndMBB; ++MBBI) {
+ MachineBasicBlock *MBB = MBBI;
+
+ // If this block returns, ignore it. We don't want to insert an FP_REG_KILL
+ // before the return.
+ if (!MBB->empty()) {
+ MachineBasicBlock::iterator EndI = MBB->end();
+ --EndI;
+ if (EndI->getDesc().isReturn())
+ continue;
+ }
+
+ bool ContainsFPCode = false;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ !ContainsFPCode && I != E; ++I) {
+ if (I->getNumOperands() != 0 && I->getOperand(0).isReg()) {
+ const TargetRegisterClass *clas;
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
+ if (I->getOperand(op).isReg() && I->getOperand(op).isDef() &&
+ TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) &&
+ ((clas = MRI.getRegClass(I->getOperand(op).getReg())) ==
+ X86::RFP32RegisterClass ||
+ clas == X86::RFP64RegisterClass ||
+ clas == X86::RFP80RegisterClass)) {
+ ContainsFPCode = true;
+ break;
+ }
+ }
+ }
+ }
+ // Check PHI nodes in successor blocks. These PHI's will be lowered to have
+ // a copy of the input value in this block. In SSE mode, we only care about
+ // 80-bit values.
+ if (!ContainsFPCode) {
+ // Final check, check LLVM BB's that are successors to the LLVM BB
+ // corresponding to BB for FP PHI nodes.
+ const BasicBlock *LLVMBB = MBB->getBasicBlock();
+ const PHINode *PN;
+ for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB);
+ !ContainsFPCode && SI != E; ++SI) {
+ for (BasicBlock::const_iterator II = SI->begin();
+ (PN = dyn_cast<PHINode>(II)); ++II) {
+ if (PN->getType()==Type::X86_FP80Ty ||
+ (!Subtarget.hasSSE1() && PN->getType()->isFloatingPoint()) ||
+ (!Subtarget.hasSSE2() && PN->getType()==Type::DoubleTy)) {
+ ContainsFPCode = true;
+ break;
+ }
+ }
+ }
+ }
+ // Finally, if we found any FP code, emit the FP_REG_KILL instruction.
+ if (ContainsFPCode) {
+ BuildMI(*MBB, MBBI->getFirstTerminator(), DebugLoc::getUnknownLoc(),
+ MF.getTarget().getInstrInfo()->get(X86::FP_REG_KILL));
+ ++NumFPKill;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
new file mode 100644
index 0000000..bd1fea7
--- /dev/null
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -0,0 +1,1716 @@
+//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a DAG pattern matching instruction selector for X86,
+// converting from a legalized dag to a X86 dag.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-isel"
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86ISelLowering.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+#include "llvm/Support/CommandLine.h"
+static cl::opt<bool> AvoidDupAddrCompute("x86-avoid-dup-address", cl::Hidden);
+
+STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
+
+//===----------------------------------------------------------------------===//
+// Pattern Matcher Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
+ /// SDValue's instead of register numbers for the leaves of the matched
+ /// tree.
+ struct X86ISelAddressMode {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ struct { // This is really a union, discriminated by BaseType!
+ SDValue Reg;
+ int FrameIndex;
+ } Base;
+
+ bool isRIPRel; // RIP as base?
+ unsigned Scale;
+ SDValue IndexReg;
+ int32_t Disp;
+ SDValue Segment;
+ GlobalValue *GV;
+ Constant *CP;
+ const char *ES;
+ int JT;
+ unsigned Align; // CP alignment.
+
+ X86ISelAddressMode()
+ : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0),
+ Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0) {
+ }
+
+ bool hasSymbolicDisplacement() const {
+ return GV != 0 || CP != 0 || ES != 0 || JT != -1;
+ }
+
+ void dump() {
+ cerr << "X86ISelAddressMode " << this << "\n";
+ cerr << "Base.Reg ";
+ if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump();
+ else cerr << "nul";
+ cerr << " Base.FrameIndex " << Base.FrameIndex << "\n";
+ cerr << "isRIPRel " << isRIPRel << " Scale" << Scale << "\n";
+ cerr << "IndexReg ";
+ if (IndexReg.getNode() != 0) IndexReg.getNode()->dump();
+ else cerr << "nul";
+ cerr << " Disp " << Disp << "\n";
+ cerr << "GV "; if (GV) GV->dump();
+ else cerr << "nul";
+ cerr << " CP "; if (CP) CP->dump();
+ else cerr << "nul";
+ cerr << "\n";
+ cerr << "ES "; if (ES) cerr << ES; else cerr << "nul";
+ cerr << " JT" << JT << " Align" << Align << "\n";
+ }
+ };
+}
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ /// ISel - X86 specific code to select X86 machine instructions for
+ /// SelectionDAG operations.
+ ///
+ class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel {
+ /// TM - Keep a reference to X86TargetMachine.
+ ///
+ X86TargetMachine &TM;
+
+ /// X86Lowering - This object fully describes how to lower LLVM code to an
+ /// X86-specific SelectionDAG.
+ X86TargetLowering &X86Lowering;
+
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+
+ /// CurBB - Current BB being isel'd.
+ ///
+ MachineBasicBlock *CurBB;
+
+ /// OptForSize - If true, selector should try to optimize for code size
+ /// instead of performance.
+ bool OptForSize;
+
+ public:
+ explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(tm, OptLevel),
+ TM(tm), X86Lowering(*TM.getTargetLowering()),
+ Subtarget(&TM.getSubtarget<X86Subtarget>()),
+ OptForSize(false) {}
+
+ virtual const char *getPassName() const {
+ return "X86 DAG->DAG Instruction Selection";
+ }
+
+ /// InstructionSelect - This callback is invoked by
+ /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+ virtual void InstructionSelect();
+
+ virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
+
+ virtual
+ bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const;
+
+// Include the pieces autogenerated from the target description.
+#include "X86GenDAGISel.inc"
+
+ private:
+ SDNode *Select(SDValue N);
+ SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
+
+ bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM);
+ bool MatchLoad(SDValue N, X86ISelAddressMode &AM);
+ bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
+ bool MatchAddress(SDValue N, X86ISelAddressMode &AM,
+ unsigned Depth = 0);
+ bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
+ bool SelectAddr(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
+ bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index, SDValue &Disp);
+ bool SelectScalarSSELoad(SDValue Op, SDValue Pred,
+ SDValue N, SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment,
+ SDValue &InChain, SDValue &OutChain);
+ bool TryFoldLoad(SDValue P, SDValue N,
+ SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
+ void PreprocessForRMW();
+ void PreprocessForFPConvert();
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+
+ void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
+
+ inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
+ SDValue &Scale, SDValue &Index,
+ SDValue &Disp, SDValue &Segment) {
+ Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
+ CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
+ AM.Base.Reg;
+ Scale = getI8Imm(AM.Scale);
+ Index = AM.IndexReg;
+ // These are 32-bit even in 64-bit mode since RIP relative offset
+ // is 32-bit.
+ if (AM.GV)
+ Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp);
+ else if (AM.CP)
+ Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
+ AM.Align, AM.Disp);
+ else if (AM.ES)
+ Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32);
+ else if (AM.JT != -1)
+ Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32);
+ else
+ Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
+
+ if (AM.Segment.getNode())
+ Segment = AM.Segment;
+ else
+ Segment = CurDAG->getRegister(0, MVT::i32);
+ }
+
+ /// getI8Imm - Return a target constant with the specified value, of type
+ /// i8.
+ inline SDValue getI8Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i8);
+ }
+
+ /// getI16Imm - Return a target constant with the specified value, of type
+ /// i16.
+ inline SDValue getI16Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i16);
+ }
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ /// getGlobalBaseReg - Return an SDNode that returns the value of
+ /// the global base register. Output instructions required to
+ /// initialize the global base register, if necessary.
+ ///
+ SDNode *getGlobalBaseReg();
+
+#ifndef NDEBUG
+ unsigned Indent;
+#endif
+ };
+}
+
+
+bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
+ SDNode *Root) const {
+ if (OptLevel == CodeGenOpt::None) return false;
+
+ if (U == Root)
+ switch (U->getOpcode()) {
+ default: break;
+ case ISD::ADD:
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ SDValue Op1 = U->getOperand(1);
+
+ // If the other operand is a 8-bit immediate we should fold the immediate
+ // instead. This reduces code size.
+ // e.g.
+ // movl 4(%esp), %eax
+ // addl $4, %eax
+ // vs.
+ // movl $4, %eax
+ // addl 4(%esp), %eax
+ // The former is 2 bytes shorter. In case where the increment is 1, then
+ // the saving can be 4 bytes (by using incl %eax).
+ if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
+ if (Imm->getAPIntValue().isSignedIntN(8))
+ return false;
+
+ // If the other operand is a TLS address, we should fold it instead.
+ // This produces
+ // movl %gs:0, %eax
+ // leal i@NTPOFF(%eax), %eax
+ // instead of
+ // movl $i@NTPOFF, %eax
+ // addl %gs:0, %eax
+ // if the block also has an access to a second TLS address this will save
+ // a load.
+ // FIXME: This is probably also true for non TLS addresses.
+ if (Op1.getOpcode() == X86ISD::Wrapper) {
+ SDValue Val = Op1.getOperand(0);
+ if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
+ return false;
+ }
+ }
+ }
+
+ // Proceed to 'generic' cycle finder code
+ return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root);
+}
+
+/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
+/// and move load below the TokenFactor. Replace store's chain operand with
+/// load's chain result.
+static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load,
+ SDValue Store, SDValue TF) {
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i)
+ if (Load.getNode() == TF.getOperand(i).getNode())
+ Ops.push_back(Load.getOperand(0));
+ else
+ Ops.push_back(TF.getOperand(i));
+ CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size());
+ CurDAG->UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2));
+ CurDAG->UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1),
+ Store.getOperand(2), Store.getOperand(3));
+}
+
+/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG.
+///
+static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address,
+ SDValue &Load) {
+ if (N.getOpcode() == ISD::BIT_CONVERT)
+ N = N.getOperand(0);
+
+ LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
+ if (!LD || LD->isVolatile())
+ return false;
+ if (LD->getAddressingMode() != ISD::UNINDEXED)
+ return false;
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD)
+ return false;
+
+ if (N.hasOneUse() &&
+ N.getOperand(1) == Address &&
+ N.getNode()->isOperandOf(Chain.getNode())) {
+ Load = N;
+ return true;
+ }
+ return false;
+}
+
+/// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain
+/// operand and move load below the call's chain operand.
+static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load,
+ SDValue Call, SDValue CallSeqStart) {
+ SmallVector<SDValue, 8> Ops;
+ SDValue Chain = CallSeqStart.getOperand(0);
+ if (Chain.getNode() == Load.getNode())
+ Ops.push_back(Load.getOperand(0));
+ else {
+ assert(Chain.getOpcode() == ISD::TokenFactor &&
+ "Unexpected CallSeqStart chain operand");
+ for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
+ if (Chain.getOperand(i).getNode() == Load.getNode())
+ Ops.push_back(Load.getOperand(0));
+ else
+ Ops.push_back(Chain.getOperand(i));
+ SDValue NewChain =
+ CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(),
+ MVT::Other, &Ops[0], Ops.size());
+ Ops.clear();
+ Ops.push_back(NewChain);
+ }
+ for (unsigned i = 1, e = CallSeqStart.getNumOperands(); i != e; ++i)
+ Ops.push_back(CallSeqStart.getOperand(i));
+ CurDAG->UpdateNodeOperands(CallSeqStart, &Ops[0], Ops.size());
+ CurDAG->UpdateNodeOperands(Load, Call.getOperand(0),
+ Load.getOperand(1), Load.getOperand(2));
+ Ops.clear();
+ Ops.push_back(SDValue(Load.getNode(), 1));
+ for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i)
+ Ops.push_back(Call.getOperand(i));
+ CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size());
+}
+
+/// isCalleeLoad - Return true if call address is a load and it can be
+/// moved below CALLSEQ_START and the chains leading up to the call.
+/// Return the CALLSEQ_START by reference as a second output.
+static bool isCalleeLoad(SDValue Callee, SDValue &Chain) {
+ if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
+ return false;
+ LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
+ if (!LD ||
+ LD->isVolatile() ||
+ LD->getAddressingMode() != ISD::UNINDEXED ||
+ LD->getExtensionType() != ISD::NON_EXTLOAD)
+ return false;
+
+ // Now let's find the callseq_start.
+ while (Chain.getOpcode() != ISD::CALLSEQ_START) {
+ if (!Chain.hasOneUse())
+ return false;
+ Chain = Chain.getOperand(0);
+ }
+
+ if (Chain.getOperand(0).getNode() == Callee.getNode())
+ return true;
+ if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
+ Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()))
+ return true;
+ return false;
+}
+
+
+/// PreprocessForRMW - Preprocess the DAG to make instruction selection better.
+/// This is only run if not in -O0 mode.
+/// This allows the instruction selector to pick more read-modify-write
+/// instructions. This is a common case:
+///
+/// [Load chain]
+/// ^
+/// |
+/// [Load]
+/// ^ ^
+/// | |
+/// / \-
+/// / |
+/// [TokenFactor] [Op]
+/// ^ ^
+/// | |
+/// \ /
+/// \ /
+/// [Store]
+///
+/// The fact the store's chain operand != load's chain will prevent the
+/// (store (op (load))) instruction from being selected. We can transform it to:
+///
+/// [Load chain]
+/// ^
+/// |
+/// [TokenFactor]
+/// ^
+/// |
+/// [Load]
+/// ^ ^
+/// | |
+/// | \-
+/// | |
+/// | [Op]
+/// | ^
+/// | |
+/// \ /
+/// \ /
+/// [Store]
+void X86DAGToDAGISel::PreprocessForRMW() {
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E; ++I) {
+ if (I->getOpcode() == X86ISD::CALL) {
+ /// Also try moving call address load from outside callseq_start to just
+ /// before the call to allow it to be folded.
+ ///
+ /// [Load chain]
+ /// ^
+ /// |
+ /// [Load]
+ /// ^ ^
+ /// | |
+ /// / \--
+ /// / |
+ ///[CALLSEQ_START] |
+ /// ^ |
+ /// | |
+ /// [LOAD/C2Reg] |
+ /// | |
+ /// \ /
+ /// \ /
+ /// [CALL]
+ SDValue Chain = I->getOperand(0);
+ SDValue Load = I->getOperand(1);
+ if (!isCalleeLoad(Load, Chain))
+ continue;
+ MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain);
+ ++NumLoadMoved;
+ continue;
+ }
+
+ if (!ISD::isNON_TRUNCStore(I))
+ continue;
+ SDValue Chain = I->getOperand(0);
+
+ if (Chain.getNode()->getOpcode() != ISD::TokenFactor)
+ continue;
+
+ SDValue N1 = I->getOperand(1);
+ SDValue N2 = I->getOperand(2);
+ if ((N1.getValueType().isFloatingPoint() &&
+ !N1.getValueType().isVector()) ||
+ !N1.hasOneUse())
+ continue;
+
+ bool RModW = false;
+ SDValue Load;
+ unsigned Opcode = N1.getNode()->getOpcode();
+ switch (Opcode) {
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::VECTOR_SHUFFLE: {
+ SDValue N10 = N1.getOperand(0);
+ SDValue N11 = N1.getOperand(1);
+ RModW = isRMWLoad(N10, Chain, N2, Load);
+ if (!RModW)
+ RModW = isRMWLoad(N11, Chain, N2, Load);
+ break;
+ }
+ case ISD::SUB:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ case ISD::SUBC:
+ case ISD::SUBE:
+ case X86ISD::SHLD:
+ case X86ISD::SHRD: {
+ SDValue N10 = N1.getOperand(0);
+ RModW = isRMWLoad(N10, Chain, N2, Load);
+ break;
+ }
+ }
+
+ if (RModW) {
+ MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain);
+ ++NumLoadMoved;
+ }
+ }
+}
+
+
+/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend
+/// nodes that target the FP stack to be store and load to the stack. This is a
+/// gross hack. We would like to simply mark these as being illegal, but when
+/// we do that, legalize produces these when it expands calls, then expands
+/// these in the same legalize pass. We would like dag combine to be able to
+/// hack on these between the call expansion and the node legalization. As such
+/// this pass basically does "really late" legalization of these inline with the
+/// X86 isel pass.
+void X86DAGToDAGISel::PreprocessForFPConvert() {
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E; ) {
+ SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
+ if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
+ continue;
+
+ // If the source and destination are SSE registers, then this is a legal
+ // conversion that should not be lowered.
+ MVT SrcVT = N->getOperand(0).getValueType();
+ MVT DstVT = N->getValueType(0);
+ bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
+ bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
+ if (SrcIsSSE && DstIsSSE)
+ continue;
+
+ if (!SrcIsSSE && !DstIsSSE) {
+ // If this is an FPStack extension, it is a noop.
+ if (N->getOpcode() == ISD::FP_EXTEND)
+ continue;
+ // If this is a value-preserving FPStack truncation, it is a noop.
+ if (N->getConstantOperandVal(1))
+ continue;
+ }
+
+ // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
+ // FPStack has extload and truncstore. SSE can fold direct loads into other
+ // operations. Based on this, decide what we want to do.
+ MVT MemVT;
+ if (N->getOpcode() == ISD::FP_ROUND)
+ MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
+ else
+ MemVT = SrcIsSSE ? SrcVT : DstVT;
+
+ SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
+ DebugLoc dl = N->getDebugLoc();
+
+ // FIXME: optimize the case where the src/dest is a load or store?
+ SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
+ N->getOperand(0),
+ MemTmp, NULL, 0, MemVT);
+ SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
+ NULL, 0, MemVT);
+
+ // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
+ // extload we created. This will cause general havok on the dag because
+ // anything below the conversion could be folded into other existing nodes.
+ // To avoid invalidating 'I', back it up to the convert node.
+ --I;
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+
+ // Now that we did that, the node is dead. Increment the iterator to the
+ // next node to process, then delete N.
+ ++I;
+ CurDAG->DeleteNode(N);
+ }
+}
+
+/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
+/// when it has created a SelectionDAG for us to codegen.
+void X86DAGToDAGISel::InstructionSelect() {
+ CurBB = BB; // BB can change as result of isel.
+ const Function *F = CurDAG->getMachineFunction().getFunction();
+ OptForSize = F->hasFnAttr(Attribute::OptimizeForSize);
+
+ DEBUG(BB->dump());
+ if (OptLevel != CodeGenOpt::None)
+ PreprocessForRMW();
+
+ // FIXME: This should only happen when not compiled with -O0.
+ PreprocessForFPConvert();
+
+ // Codegen the basic block.
+#ifndef NDEBUG
+ DOUT << "===== Instruction selection begins:\n";
+ Indent = 0;
+#endif
+ SelectRoot(*CurDAG);
+#ifndef NDEBUG
+ DOUT << "===== Instruction selection ends:\n";
+#endif
+
+ CurDAG->RemoveDeadNodes();
+}
+
+/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
+/// the main function.
+void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
+ MachineFrameInfo *MFI) {
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ if (Subtarget->isTargetCygMing())
+ BuildMI(BB, DebugLoc::getUnknownLoc(),
+ TII->get(X86::CALLpcrel32)).addExternalSymbol("__main");
+}
+
+void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
+ // If this is main, emit special code for main.
+ MachineBasicBlock *BB = MF.begin();
+ if (Fn.hasExternalLinkage() && Fn.getName() == "main")
+ EmitSpecialCodeForMain(BB, MF.getFrameInfo());
+}
+
+
+bool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N,
+ X86ISelAddressMode &AM) {
+ assert(N.getOpcode() == X86ISD::SegmentBaseAddress);
+ SDValue Segment = N.getOperand(0);
+
+ if (AM.Segment.getNode() == 0) {
+ AM.Segment = Segment;
+ return false;
+ }
+
+ return true;
+}
+
+bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) {
+ // This optimization is valid because the GNU TLS model defines that
+ // gs:0 (or fs:0 on X86-64) contains its own address.
+ // For more information see http://people.redhat.com/drepper/tls.pdf
+
+ SDValue Address = N.getOperand(1);
+ if (Address.getOpcode() == X86ISD::SegmentBaseAddress &&
+ !MatchSegmentBaseAddress (Address, AM))
+ return false;
+
+ return true;
+}
+
+bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
+ bool is64Bit = Subtarget->is64Bit();
+ DOUT << "Wrapper: 64bit " << is64Bit;
+ DOUT << " AM "; DEBUG(AM.dump()); DOUT << "\n";
+
+ // Under X86-64 non-small code model, GV (and friends) are 64-bits.
+ if (is64Bit && (TM.getCodeModel() != CodeModel::Small))
+ return true;
+
+ // Base and index reg must be 0 in order to use rip as base.
+ bool canUsePICRel = !AM.Base.Reg.getNode() && !AM.IndexReg.getNode();
+ if (is64Bit && !canUsePICRel && TM.symbolicAddressesAreRIPRel())
+ return true;
+
+ if (AM.hasSymbolicDisplacement())
+ return true;
+ // If value is available in a register both base and index components have
+ // been picked, we can't fit the result available in the register in the
+ // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement.
+
+ SDValue N0 = N.getOperand(0);
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+ uint64_t Offset = G->getOffset();
+ if (!is64Bit || isInt32(AM.Disp + Offset)) {
+ GlobalValue *GV = G->getGlobal();
+ bool isRIPRel = TM.symbolicAddressesAreRIPRel();
+ if (N0.getOpcode() == llvm::ISD::TargetGlobalTLSAddress) {
+ TLSModel::Model model =
+ getTLSModel (GV, TM.getRelocationModel());
+ if (is64Bit && model == TLSModel::InitialExec)
+ isRIPRel = true;
+ }
+ AM.GV = GV;
+ AM.Disp += Offset;
+ AM.isRIPRel = isRIPRel;
+ return false;
+ }
+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+ uint64_t Offset = CP->getOffset();
+ if (!is64Bit || isInt32(AM.Disp + Offset)) {
+ AM.CP = CP->getConstVal();
+ AM.Align = CP->getAlignment();
+ AM.Disp += Offset;
+ AM.isRIPRel = TM.symbolicAddressesAreRIPRel();
+ return false;
+ }
+ } else if (ExternalSymbolSDNode *S =dyn_cast<ExternalSymbolSDNode>(N0)) {
+ AM.ES = S->getSymbol();
+ AM.isRIPRel = TM.symbolicAddressesAreRIPRel();
+ return false;
+ } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
+ AM.JT = J->getIndex();
+ AM.isRIPRel = TM.symbolicAddressesAreRIPRel();
+ return false;
+ }
+
+ return true;
+}
+
+/// MatchAddress - Add the specified node to the specified addressing mode,
+/// returning true if it cannot be done. This just pattern matches for the
+/// addressing mode.
+bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
+ unsigned Depth) {
+ bool is64Bit = Subtarget->is64Bit();
+ DebugLoc dl = N.getDebugLoc();
+ DOUT << "MatchAddress: "; DEBUG(AM.dump());
+ // Limit recursion.
+ if (Depth > 5)
+ return MatchAddressBase(N, AM);
+
+ // RIP relative addressing: %rip + 32-bit displacement!
+ if (AM.isRIPRel) {
+ if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) {
+ uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+ if (!is64Bit || isInt32(AM.Disp + Val)) {
+ AM.Disp += Val;
+ return false;
+ }
+ }
+ return true;
+ }
+
+ switch (N.getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+ if (!is64Bit || isInt32(AM.Disp + Val)) {
+ AM.Disp += Val;
+ return false;
+ }
+ break;
+ }
+
+ case X86ISD::SegmentBaseAddress:
+ if (!MatchSegmentBaseAddress(N, AM))
+ return false;
+ break;
+
+ case X86ISD::Wrapper:
+ if (!MatchWrapper(N, AM))
+ return false;
+ break;
+
+ case ISD::LOAD:
+ if (!MatchLoad(N, AM))
+ return false;
+ break;
+
+ case ISD::FrameIndex:
+ if (AM.BaseType == X86ISelAddressMode::RegBase
+ && AM.Base.Reg.getNode() == 0) {
+ AM.BaseType = X86ISelAddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
+ return false;
+ }
+ break;
+
+ case ISD::SHL:
+ if (AM.IndexReg.getNode() != 0 || AM.Scale != 1 || AM.isRIPRel)
+ break;
+
+ if (ConstantSDNode
+ *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
+ unsigned Val = CN->getZExtValue();
+ if (Val == 1 || Val == 2 || Val == 3) {
+ AM.Scale = 1 << Val;
+ SDValue ShVal = N.getNode()->getOperand(0);
+
+ // Okay, we know that we have a scale by now. However, if the scaled
+ // value is an add of something and a constant, we can fold the
+ // constant into the disp field here.
+ if (ShVal.getNode()->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
+ isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) {
+ AM.IndexReg = ShVal.getNode()->getOperand(0);
+ ConstantSDNode *AddVal =
+ cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
+ uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val);
+ if (!is64Bit || isInt32(Disp))
+ AM.Disp = Disp;
+ else
+ AM.IndexReg = ShVal;
+ } else {
+ AM.IndexReg = ShVal;
+ }
+ return false;
+ }
+ break;
+ }
+
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI:
+ // A mul_lohi where we need the low part can be folded as a plain multiply.
+ if (N.getResNo() != 0) break;
+ // FALL THROUGH
+ case ISD::MUL:
+ case X86ISD::MUL_IMM:
+ // X*[3,5,9] -> X+X*[2,4,8]
+ if (AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base.Reg.getNode() == 0 &&
+ AM.IndexReg.getNode() == 0 &&
+ !AM.isRIPRel) {
+ if (ConstantSDNode
+ *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
+ if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
+ CN->getZExtValue() == 9) {
+ AM.Scale = unsigned(CN->getZExtValue())-1;
+
+ SDValue MulVal = N.getNode()->getOperand(0);
+ SDValue Reg;
+
+ // Okay, we know that we have a scale by now. However, if the scaled
+ // value is an add of something and a constant, we can fold the
+ // constant into the disp field here.
+ if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
+ isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) {
+ Reg = MulVal.getNode()->getOperand(0);
+ ConstantSDNode *AddVal =
+ cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
+ uint64_t Disp = AM.Disp + AddVal->getSExtValue() *
+ CN->getZExtValue();
+ if (!is64Bit || isInt32(Disp))
+ AM.Disp = Disp;
+ else
+ Reg = N.getNode()->getOperand(0);
+ } else {
+ Reg = N.getNode()->getOperand(0);
+ }
+
+ AM.IndexReg = AM.Base.Reg = Reg;
+ return false;
+ }
+ }
+ break;
+
+ case ISD::SUB: {
+ // Given A-B, if A can be completely folded into the address and
+ // the index field with the index field unused, use -B as the index.
+ // This is a win if a has multiple parts that can be folded into
+ // the address. Also, this saves a mov if the base register has
+ // other uses, since it avoids a two-address sub instruction, however
+ // it costs an additional mov if the index register has other uses.
+
+ // Test if the LHS of the sub can be folded.
+ X86ISelAddressMode Backup = AM;
+ if (MatchAddress(N.getNode()->getOperand(0), AM, Depth+1)) {
+ AM = Backup;
+ break;
+ }
+ // Test if the index field is free for use.
+ if (AM.IndexReg.getNode() || AM.isRIPRel) {
+ AM = Backup;
+ break;
+ }
+ int Cost = 0;
+ SDValue RHS = N.getNode()->getOperand(1);
+ // If the RHS involves a register with multiple uses, this
+ // transformation incurs an extra mov, due to the neg instruction
+ // clobbering its operand.
+ if (!RHS.getNode()->hasOneUse() ||
+ RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
+ RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
+ RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
+ (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
+ RHS.getNode()->getOperand(0).getValueType() == MVT::i32))
+ ++Cost;
+ // If the base is a register with multiple uses, this
+ // transformation may save a mov.
+ if ((AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base.Reg.getNode() &&
+ !AM.Base.Reg.getNode()->hasOneUse()) ||
+ AM.BaseType == X86ISelAddressMode::FrameIndexBase)
+ --Cost;
+ // If the folded LHS was interesting, this transformation saves
+ // address arithmetic.
+ if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
+ ((AM.Disp != 0) && (Backup.Disp == 0)) +
+ (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
+ --Cost;
+ // If it doesn't look like it may be an overall win, don't do it.
+ if (Cost >= 0) {
+ AM = Backup;
+ break;
+ }
+
+ // Ok, the transformation is legal and appears profitable. Go for it.
+ SDValue Zero = CurDAG->getConstant(0, N.getValueType());
+ SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
+ AM.IndexReg = Neg;
+ AM.Scale = 1;
+
+ // Insert the new nodes into the topological ordering.
+ if (Zero.getNode()->getNodeId() == -1 ||
+ Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), Zero.getNode());
+ Zero.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ if (Neg.getNode()->getNodeId() == -1 ||
+ Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), Neg.getNode());
+ Neg.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ return false;
+ }
+
+ case ISD::ADD: {
+ X86ISelAddressMode Backup = AM;
+ if (!MatchAddress(N.getNode()->getOperand(0), AM, Depth+1) &&
+ !MatchAddress(N.getNode()->getOperand(1), AM, Depth+1))
+ return false;
+ AM = Backup;
+ if (!MatchAddress(N.getNode()->getOperand(1), AM, Depth+1) &&
+ !MatchAddress(N.getNode()->getOperand(0), AM, Depth+1))
+ return false;
+ AM = Backup;
+
+ // If we couldn't fold both operands into the address at the same time,
+ // see if we can just put each operand into a register and fold at least
+ // the add.
+ if (AM.BaseType == X86ISelAddressMode::RegBase &&
+ !AM.Base.Reg.getNode() &&
+ !AM.IndexReg.getNode() &&
+ !AM.isRIPRel) {
+ AM.Base.Reg = N.getNode()->getOperand(0);
+ AM.IndexReg = N.getNode()->getOperand(1);
+ AM.Scale = 1;
+ return false;
+ }
+ break;
+ }
+
+ case ISD::OR:
+ // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ X86ISelAddressMode Backup = AM;
+ uint64_t Offset = CN->getSExtValue();
+ // Start with the LHS as an addr mode.
+ if (!MatchAddress(N.getOperand(0), AM, Depth+1) &&
+ // Address could not have picked a GV address for the displacement.
+ AM.GV == NULL &&
+ // On x86-64, the resultant disp must fit in 32-bits.
+ (!is64Bit || isInt32(AM.Disp + Offset)) &&
+ // Check to see if the LHS & C is zero.
+ CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
+ AM.Disp += Offset;
+ return false;
+ }
+ AM = Backup;
+ }
+ break;
+
+ case ISD::AND: {
+ // Perform some heroic transforms on an and of a constant-count shift
+ // with a constant to enable use of the scaled offset field.
+
+ SDValue Shift = N.getOperand(0);
+ if (Shift.getNumOperands() != 2) break;
+
+ // Scale must not be used already.
+ if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
+
+ // Not when RIP is used as the base.
+ if (AM.isRIPRel) break;
+
+ SDValue X = Shift.getOperand(0);
+ ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
+ if (!C1 || !C2) break;
+
+ // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This
+ // allows us to convert the shift and and into an h-register extract and
+ // a scaled index.
+ if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) {
+ unsigned ScaleLog = 8 - C1->getZExtValue();
+ if (ScaleLog > 0 && ScaleLog < 4 &&
+ C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) {
+ SDValue Eight = CurDAG->getConstant(8, MVT::i8);
+ SDValue Mask = CurDAG->getConstant(0xff, N.getValueType());
+ SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
+ X, Eight);
+ SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(),
+ Srl, Mask);
+ SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8);
+ SDValue Shl = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
+ And, ShlCount);
+
+ // Insert the new nodes into the topological ordering.
+ if (Eight.getNode()->getNodeId() == -1 ||
+ Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(X.getNode(), Eight.getNode());
+ Eight.getNode()->setNodeId(X.getNode()->getNodeId());
+ }
+ if (Mask.getNode()->getNodeId() == -1 ||
+ Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(X.getNode(), Mask.getNode());
+ Mask.getNode()->setNodeId(X.getNode()->getNodeId());
+ }
+ if (Srl.getNode()->getNodeId() == -1 ||
+ Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(Shift.getNode(), Srl.getNode());
+ Srl.getNode()->setNodeId(Shift.getNode()->getNodeId());
+ }
+ if (And.getNode()->getNodeId() == -1 ||
+ And.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), And.getNode());
+ And.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ if (ShlCount.getNode()->getNodeId() == -1 ||
+ ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(X.getNode(), ShlCount.getNode());
+ ShlCount.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ if (Shl.getNode()->getNodeId() == -1 ||
+ Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), Shl.getNode());
+ Shl.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ CurDAG->ReplaceAllUsesWith(N, Shl);
+ AM.IndexReg = And;
+ AM.Scale = (1 << ScaleLog);
+ return false;
+ }
+ }
+
+ // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
+ // allows us to fold the shift into this addressing mode.
+ if (Shift.getOpcode() != ISD::SHL) break;
+
+ // Not likely to be profitable if either the AND or SHIFT node has more
+ // than one use (unless all uses are for address computation). Besides,
+ // isel mechanism requires their node ids to be reused.
+ if (!N.hasOneUse() || !Shift.hasOneUse())
+ break;
+
+ // Verify that the shift amount is something we can fold.
+ unsigned ShiftCst = C1->getZExtValue();
+ if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3)
+ break;
+
+ // Get the new AND mask, this folds to a constant.
+ SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
+ SDValue(C2, 0), SDValue(C1, 0));
+ SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X,
+ NewANDMask);
+ SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
+ NewAND, SDValue(C1, 0));
+
+ // Insert the new nodes into the topological ordering.
+ if (C1->getNodeId() > X.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(X.getNode(), C1);
+ C1->setNodeId(X.getNode()->getNodeId());
+ }
+ if (NewANDMask.getNode()->getNodeId() == -1 ||
+ NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode());
+ NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId());
+ }
+ if (NewAND.getNode()->getNodeId() == -1 ||
+ NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode());
+ NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId());
+ }
+ if (NewSHIFT.getNode()->getNodeId() == -1 ||
+ NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode());
+ NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+
+ CurDAG->ReplaceAllUsesWith(N, NewSHIFT);
+
+ AM.Scale = 1 << ShiftCst;
+ AM.IndexReg = NewAND;
+ return false;
+ }
+ }
+
+ return MatchAddressBase(N, AM);
+}
+
+/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
+/// specified addressing mode without any further recursion.
+bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
+ // Is the base register already occupied?
+ if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) {
+ // If so, check to see if the scale index register is set.
+ if (AM.IndexReg.getNode() == 0 && !AM.isRIPRel) {
+ AM.IndexReg = N;
+ AM.Scale = 1;
+ return false;
+ }
+
+ // Otherwise, we cannot select it.
+ return true;
+ }
+
+ // Default, generate it as a register.
+ AM.BaseType = X86ISelAddressMode::RegBase;
+ AM.Base.Reg = N;
+ return false;
+}
+
+/// SelectAddr - returns true if it is able pattern match an addressing mode.
+/// It returns the operands which make up the maximal addressing mode it can
+/// match by reference.
+bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index,
+ SDValue &Disp, SDValue &Segment) {
+ X86ISelAddressMode AM;
+ bool Done = false;
+ if (AvoidDupAddrCompute && !N.hasOneUse()) {
+ unsigned Opcode = N.getOpcode();
+ if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex &&
+ Opcode != X86ISD::Wrapper) {
+ // If we are able to fold N into addressing mode, then we'll allow it even
+ // if N has multiple uses. In general, addressing computation is used as
+ // addresses by all of its uses. But watch out for CopyToReg uses, that
+ // means the address computation is liveout. It will be computed by a LEA
+ // so we want to avoid computing the address twice.
+ for (SDNode::use_iterator UI = N.getNode()->use_begin(),
+ UE = N.getNode()->use_end(); UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::CopyToReg) {
+ MatchAddressBase(N, AM);
+ Done = true;
+ break;
+ }
+ }
+ }
+ }
+
+ if (!Done && MatchAddress(N, AM))
+ return false;
+
+ MVT VT = N.getValueType();
+ if (AM.BaseType == X86ISelAddressMode::RegBase) {
+ if (!AM.Base.Reg.getNode())
+ AM.Base.Reg = CurDAG->getRegister(0, VT);
+ }
+
+ if (!AM.IndexReg.getNode())
+ AM.IndexReg = CurDAG->getRegister(0, VT);
+
+ getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+ return true;
+}
+
+/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
+/// match a load whose top elements are either undef or zeros. The load flavor
+/// is derived from the type of N, which is either v4f32 or v2f64.
+bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred,
+ SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index,
+ SDValue &Disp, SDValue &Segment,
+ SDValue &InChain,
+ SDValue &OutChain) {
+ if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ InChain = N.getOperand(0).getValue(1);
+ if (ISD::isNON_EXTLoad(InChain.getNode()) &&
+ InChain.getValue(0).hasOneUse() &&
+ N.hasOneUse() &&
+ IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op.getNode())) {
+ LoadSDNode *LD = cast<LoadSDNode>(InChain);
+ if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+ return false;
+ OutChain = LD->getChain();
+ return true;
+ }
+ }
+
+ // Also handle the case where we explicitly require zeros in the top
+ // elements. This is a vector shuffle from the zero vector.
+ if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
+ // Check to see if the top elements are all zeros (or bitcast of zeros).
+ N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ N.getOperand(0).getNode()->hasOneUse() &&
+ ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
+ N.getOperand(0).getOperand(0).hasOneUse()) {
+ // Okay, this is a zero extending load. Fold it.
+ LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
+ if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+ return false;
+ OutChain = LD->getChain();
+ InChain = SDValue(LD, 1);
+ return true;
+ }
+ return false;
+}
+
+
+/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
+/// mode it matches can be cost effectively emitted as an LEA instruction.
+bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N,
+ SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp) {
+ X86ISelAddressMode AM;
+
+ // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
+ // segments.
+ SDValue Copy = AM.Segment;
+ SDValue T = CurDAG->getRegister(0, MVT::i32);
+ AM.Segment = T;
+ if (MatchAddress(N, AM))
+ return false;
+ assert (T == AM.Segment);
+ AM.Segment = Copy;
+
+ MVT VT = N.getValueType();
+ unsigned Complexity = 0;
+ if (AM.BaseType == X86ISelAddressMode::RegBase)
+ if (AM.Base.Reg.getNode())
+ Complexity = 1;
+ else
+ AM.Base.Reg = CurDAG->getRegister(0, VT);
+ else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
+ Complexity = 4;
+
+ if (AM.IndexReg.getNode())
+ Complexity++;
+ else
+ AM.IndexReg = CurDAG->getRegister(0, VT);
+
+ // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
+ // a simple shift.
+ if (AM.Scale > 1)
+ Complexity++;
+
+ // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
+ // to a LEA. This is determined with some expermentation but is by no means
+ // optimal (especially for code size consideration). LEA is nice because of
+ // its three-address nature. Tweak the cost function again when we can run
+ // convertToThreeAddress() at register allocation time.
+ if (AM.hasSymbolicDisplacement()) {
+ // For X86-64, we should always use lea to materialize RIP relative
+ // addresses.
+ if (Subtarget->is64Bit())
+ Complexity = 4;
+ else
+ Complexity += 2;
+ }
+
+ if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
+ Complexity++;
+
+ if (Complexity > 2) {
+ SDValue Segment;
+ getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+ return true;
+ }
+ return false;
+}
+
+bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,
+ SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment) {
+ if (ISD::isNON_EXTLoad(N.getNode()) &&
+ N.hasOneUse() &&
+ IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode()))
+ return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
+ return false;
+}
+
+/// getGlobalBaseReg - Return an SDNode that returns the value of
+/// the global base register. Output instructions required to
+/// initialize the global base register, if necessary.
+///
+SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
+ MachineFunction *MF = CurBB->getParent();
+ unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF);
+ return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+static SDNode *FindCallStartFromCall(SDNode *Node) {
+ if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
+ assert(Node->getOperand(0).getValueType() == MVT::Other &&
+ "Node doesn't have a token chain argument!");
+ return FindCallStartFromCall(Node->getOperand(0).getNode());
+}
+
+SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
+ SDValue Chain = Node->getOperand(0);
+ SDValue In1 = Node->getOperand(1);
+ SDValue In2L = Node->getOperand(2);
+ SDValue In2H = Node->getOperand(3);
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ return NULL;
+ SDValue LSI = Node->getOperand(4); // MemOperand
+ const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, LSI, Chain};
+ return CurDAG->getTargetNode(Opc, Node->getDebugLoc(),
+ MVT::i32, MVT::i32, MVT::Other, Ops,
+ array_lengthof(Ops));
+}
+
+SDNode *X86DAGToDAGISel::Select(SDValue N) {
+ SDNode *Node = N.getNode();
+ MVT NVT = Node->getValueType(0);
+ unsigned Opc, MOpc;
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc dl = Node->getDebugLoc();
+
+#ifndef NDEBUG
+ DOUT << std::string(Indent, ' ') << "Selecting: ";
+ DEBUG(Node->dump(CurDAG));
+ DOUT << "\n";
+ Indent += 2;
+#endif
+
+ if (Node->isMachineOpcode()) {
+#ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "== ";
+ DEBUG(Node->dump(CurDAG));
+ DOUT << "\n";
+ Indent -= 2;
+#endif
+ return NULL; // Already selected.
+ }
+
+ switch (Opcode) {
+ default: break;
+ case X86ISD::GlobalBaseReg:
+ return getGlobalBaseReg();
+
+ case X86ISD::ATOMOR64_DAG:
+ return SelectAtomic64(Node, X86::ATOMOR6432);
+ case X86ISD::ATOMXOR64_DAG:
+ return SelectAtomic64(Node, X86::ATOMXOR6432);
+ case X86ISD::ATOMADD64_DAG:
+ return SelectAtomic64(Node, X86::ATOMADD6432);
+ case X86ISD::ATOMSUB64_DAG:
+ return SelectAtomic64(Node, X86::ATOMSUB6432);
+ case X86ISD::ATOMNAND64_DAG:
+ return SelectAtomic64(Node, X86::ATOMNAND6432);
+ case X86ISD::ATOMAND64_DAG:
+ return SelectAtomic64(Node, X86::ATOMAND6432);
+ case X86ISD::ATOMSWAP64_DAG:
+ return SelectAtomic64(Node, X86::ATOMSWAP6432);
+
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ bool isSigned = Opcode == ISD::SMUL_LOHI;
+ if (!isSigned)
+ switch (NVT.getSimpleVT()) {
+ default: assert(0 && "Unsupported VT!");
+ case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
+ case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
+ case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
+ case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
+ }
+ else
+ switch (NVT.getSimpleVT()) {
+ default: assert(0 && "Unsupported VT!");
+ case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
+ case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
+ case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
+ case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
+ }
+
+ unsigned LoReg, HiReg;
+ switch (NVT.getSimpleVT()) {
+ default: assert(0 && "Unsupported VT!");
+ case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
+ case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
+ case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
+ case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
+ }
+
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ // multiplty is commmutative
+ if (!foldedLoad) {
+ foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ if (foldedLoad)
+ std::swap(N0, N1);
+ }
+
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+ N0, SDValue()).getValue(1);
+
+ if (foldedLoad) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
+ InFlag };
+ SDNode *CNode =
+ CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
+ array_lengthof(Ops));
+ InFlag = SDValue(CNode, 1);
+ // Update the chain.
+ ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
+ } else {
+ InFlag =
+ SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
+ }
+
+ // Copy the low half of the result, if it is needed.
+ if (!N.getValue(0).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ LoReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(N.getValue(0), Result);
+#ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ DEBUG(Result.getNode()->dump(CurDAG));
+ DOUT << "\n";
+#endif
+ }
+ // Copy the high half of the result, if it is needed.
+ if (!N.getValue(1).use_empty()) {
+ SDValue Result;
+ if (HiReg == X86::AH && Subtarget->is64Bit()) {
+ // Prevent use of AH in a REX instruction by referencing AX instead.
+ // Shift it down 8 bits.
+ Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::AX, MVT::i16, InFlag);
+ InFlag = Result.getValue(2);
+ Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16,
+ Result,
+ CurDAG->getTargetConstant(8, MVT::i8)), 0);
+ // Then truncate it down to i8.
+ SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32);
+ Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl,
+ MVT::i8, Result, SRIdx), 0);
+ } else {
+ Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ HiReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ }
+ ReplaceUses(N.getValue(1), Result);
+#ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ DEBUG(Result.getNode()->dump(CurDAG));
+ DOUT << "\n";
+#endif
+ }
+
+#ifndef NDEBUG
+ Indent -= 2;
+#endif
+
+ return NULL;
+ }
+
+ case ISD::SDIVREM:
+ case ISD::UDIVREM: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ bool isSigned = Opcode == ISD::SDIVREM;
+ if (!isSigned)
+ switch (NVT.getSimpleVT()) {
+ default: assert(0 && "Unsupported VT!");
+ case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
+ case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
+ case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
+ case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
+ }
+ else
+ switch (NVT.getSimpleVT()) {
+ default: assert(0 && "Unsupported VT!");
+ case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
+ case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
+ case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
+ case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
+ }
+
+ unsigned LoReg, HiReg;
+ unsigned ClrOpcode, SExtOpcode;
+ switch (NVT.getSimpleVT()) {
+ default: assert(0 && "Unsupported VT!");
+ case MVT::i8:
+ LoReg = X86::AL; HiReg = X86::AH;
+ ClrOpcode = 0;
+ SExtOpcode = X86::CBW;
+ break;
+ case MVT::i16:
+ LoReg = X86::AX; HiReg = X86::DX;
+ ClrOpcode = X86::MOV16r0;
+ SExtOpcode = X86::CWD;
+ break;
+ case MVT::i32:
+ LoReg = X86::EAX; HiReg = X86::EDX;
+ ClrOpcode = X86::MOV32r0;
+ SExtOpcode = X86::CDQ;
+ break;
+ case MVT::i64:
+ LoReg = X86::RAX; HiReg = X86::RDX;
+ ClrOpcode = X86::MOV64r0;
+ SExtOpcode = X86::CQO;
+ break;
+ }
+
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ bool signBitIsZero = CurDAG->SignBitIsZero(N0);
+
+ SDValue InFlag;
+ if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
+ // Special case for div8, just use a move with zero extension to AX to
+ // clear the upper 8 bits (AH).
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
+ if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
+ Move =
+ SDValue(CurDAG->getTargetNode(X86::MOVZX16rm8, dl, MVT::i16,
+ MVT::Other, Ops,
+ array_lengthof(Ops)), 0);
+ Chain = Move.getValue(1);
+ ReplaceUses(N0.getValue(1), Chain);
+ } else {
+ Move =
+ SDValue(CurDAG->getTargetNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0);
+ Chain = CurDAG->getEntryNode();
+ }
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue());
+ InFlag = Chain.getValue(1);
+ } else {
+ InFlag =
+ CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
+ LoReg, N0, SDValue()).getValue(1);
+ if (isSigned && !signBitIsZero) {
+ // Sign extend the low part into the high part.
+ InFlag =
+ SDValue(CurDAG->getTargetNode(SExtOpcode, dl, MVT::Flag, InFlag),0);
+ } else {
+ // Zero out the high part, effectively zero extending the input.
+ SDValue ClrNode = SDValue(CurDAG->getTargetNode(ClrOpcode, dl, NVT),
+ 0);
+ InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, HiReg,
+ ClrNode, InFlag).getValue(1);
+ }
+ }
+
+ if (foldedLoad) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
+ InFlag };
+ SDNode *CNode =
+ CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
+ array_lengthof(Ops));
+ InFlag = SDValue(CNode, 1);
+ // Update the chain.
+ ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
+ } else {
+ InFlag =
+ SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
+ }
+
+ // Copy the division (low) result, if it is needed.
+ if (!N.getValue(0).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ LoReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(N.getValue(0), Result);
+#ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ DEBUG(Result.getNode()->dump(CurDAG));
+ DOUT << "\n";
+#endif
+ }
+ // Copy the remainder (high) result, if it is needed.
+ if (!N.getValue(1).use_empty()) {
+ SDValue Result;
+ if (HiReg == X86::AH && Subtarget->is64Bit()) {
+ // Prevent use of AH in a REX instruction by referencing AX instead.
+ // Shift it down 8 bits.
+ Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::AX, MVT::i16, InFlag);
+ InFlag = Result.getValue(2);
+ Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16,
+ Result,
+ CurDAG->getTargetConstant(8, MVT::i8)),
+ 0);
+ // Then truncate it down to i8.
+ SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32);
+ Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl,
+ MVT::i8, Result, SRIdx), 0);
+ } else {
+ Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ HiReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ }
+ ReplaceUses(N.getValue(1), Result);
+#ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ DEBUG(Result.getNode()->dump(CurDAG));
+ DOUT << "\n";
+#endif
+ }
+
+#ifndef NDEBUG
+ Indent -= 2;
+#endif
+
+ return NULL;
+ }
+
+ case ISD::DECLARE: {
+ // Handle DECLARE nodes here because the second operand may have been
+ // wrapped in X86ISD::Wrapper.
+ SDValue Chain = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+ SDValue N2 = Node->getOperand(2);
+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N1);
+
+ // FIXME: We need to handle this for VLAs.
+ if (!FINode) {
+ ReplaceUses(N.getValue(0), Chain);
+ return NULL;
+ }
+
+ if (N2.getOpcode() == ISD::ADD &&
+ N2.getOperand(0).getOpcode() == X86ISD::GlobalBaseReg)
+ N2 = N2.getOperand(1);
+
+ // If N2 is not Wrapper(decriptor) then the llvm.declare is mangled
+ // somehow, just ignore it.
+ if (N2.getOpcode() != X86ISD::Wrapper) {
+ ReplaceUses(N.getValue(0), Chain);
+ return NULL;
+ }
+ GlobalAddressSDNode *GVNode =
+ dyn_cast<GlobalAddressSDNode>(N2.getOperand(0));
+ if (GVNode == 0) {
+ ReplaceUses(N.getValue(0), Chain);
+ return NULL;
+ }
+ SDValue Tmp1 = CurDAG->getTargetFrameIndex(FINode->getIndex(),
+ TLI.getPointerTy());
+ SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GVNode->getGlobal(),
+ TLI.getPointerTy());
+ SDValue Ops[] = { Tmp1, Tmp2, Chain };
+ return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, dl,
+ MVT::Other, Ops,
+ array_lengthof(Ops));
+ }
+ }
+
+ SDNode *ResNode = SelectCode(N);
+
+#ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ if (ResNode == NULL || ResNode == N.getNode())
+ DEBUG(N.getNode()->dump(CurDAG));
+ else
+ DEBUG(ResNode->dump(CurDAG));
+ DOUT << "\n";
+ Indent -= 2;
+#endif
+
+ return ResNode;
+}
+
+bool X86DAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1, Op2, Op3, Op4;
+ switch (ConstraintCode) {
+ case 'o': // offsetable ??
+ case 'v': // not offsetable ??
+ default: return true;
+ case 'm': // memory
+ if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3, Op4))
+ return true;
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ OutOps.push_back(Op2);
+ OutOps.push_back(Op3);
+ OutOps.push_back(Op4);
+ return false;
+}
+
+/// createX86ISelDag - This pass converts a legalized DAG into a
+/// X86-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
+ llvm::CodeGenOpt::Level OptLevel) {
+ return new X86DAGToDAGISel(TM, OptLevel);
+}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
new file mode 100644
index 0000000..882ee3a
--- /dev/null
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -0,0 +1,8794 @@
+//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that X86 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86ISelLowering.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+static cl::opt<bool>
+DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX"));
+
+// Forward declarations.
+static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
+ SDValue V2);
+
+X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
+ : TargetLowering(TM) {
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ X86ScalarSSEf64 = Subtarget->hasSSE2();
+ X86ScalarSSEf32 = Subtarget->hasSSE1();
+ X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
+
+ RegInfo = TM.getRegisterInfo();
+ TD = getTargetData();
+
+ // Set up the TargetLowering object.
+
+ // X86 is weird, it always uses i8 for shift amounts and setcc results.
+ setShiftAmountType(MVT::i8);
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setSchedulingPreference(SchedulingForRegPressure);
+ setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0
+ setStackPointerRegisterToSaveRestore(X86StackPtr);
+
+ if (Subtarget->isTargetDarwin()) {
+ // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
+ setUseUnderscoreSetJmp(false);
+ setUseUnderscoreLongJmp(false);
+ } else if (Subtarget->isTargetMingw()) {
+ // MS runtime is weird: it exports _setjmp, but longjmp!
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(false);
+ } else {
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(true);
+ }
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i8, X86::GR8RegisterClass);
+ addRegisterClass(MVT::i16, X86::GR16RegisterClass);
+ addRegisterClass(MVT::i32, X86::GR32RegisterClass);
+ if (Subtarget->is64Bit())
+ addRegisterClass(MVT::i64, X86::GR64RegisterClass);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ // We don't accept any truncstore of integer registers.
+ setTruncStoreAction(MVT::i64, MVT::i32, Expand);
+ setTruncStoreAction(MVT::i64, MVT::i16, Expand);
+ setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
+ setTruncStoreAction(MVT::i32, MVT::i16, Expand);
+ setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
+ setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+
+ // SETOEQ and SETUNE require checking two conditions.
+ setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
+
+ // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
+ // operation.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
+ } else if (!UseSoftFloat) {
+ if (X86ScalarSSEf64) {
+ // We have an impenetrably clever algorithm for ui64->double only.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
+ }
+ // We have an algorithm for SSE2, and we turn this into a 64-bit
+ // FILD for other targets.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
+ }
+
+ // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
+ // this operation.
+ setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
+
+ if (!UseSoftFloat && !NoImplicitFloat) {
+ // SSE has no i16 to fp conversion, only i32
+ if (X86ScalarSSEf32) {
+ setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
+ // f32 and f64 cases are Legal, f80 case is not
+ setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
+ } else {
+ setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
+ }
+ } else {
+ setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
+ }
+
+ // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
+ // are Legal, f80 is custom lowered.
+ setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
+
+ // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
+ // this operation.
+ setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
+ setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
+
+ if (X86ScalarSSEf32) {
+ setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
+ // f32 and f64 cases are Legal, f80 case is not
+ setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
+ } else {
+ setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
+ setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
+ }
+
+ // Handle FP_TO_UINT by promoting the destination to a larger signed
+ // conversion.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
+ } else if (!UseSoftFloat) {
+ if (X86ScalarSSEf32 && !Subtarget->hasSSE3())
+ // Expand FP_TO_UINT into a select.
+ // FIXME: We would like to use a Custom expander here eventually to do
+ // the optimal thing for SSE vs. the default expansion in the legalizer.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
+ else
+ // With SSE3 we can use fisttpll to convert to a signed i64; without
+ // SSE, we're stuck with a fistpll.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
+ }
+
+ // TODO: when we have SSE, these could be more efficient, by using movd/movq.
+ if (!X86ScalarSSEf64) {
+ setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand);
+ setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
+ }
+
+ // Scalar integer divide and remainder are lowered to use operations that
+ // produce two results, to match the available instructions. This exposes
+ // the two-result form to trivial CSE, which is able to combine x/y and x%y
+ // into a single instruction.
+ //
+ // Scalar integer multiply-high is also lowered to use two-result
+ // operations, to match the available instructions. However, plain multiply
+ // (low) operations are left as Legal, as there are single-result
+ // instructions for this in x86. Using the two-result multiply instructions
+ // when both high and low results are needed must be arranged by dagcombine.
+ setOperationAction(ISD::MULHS , MVT::i8 , Expand);
+ setOperationAction(ISD::MULHU , MVT::i8 , Expand);
+ setOperationAction(ISD::SDIV , MVT::i8 , Expand);
+ setOperationAction(ISD::UDIV , MVT::i8 , Expand);
+ setOperationAction(ISD::SREM , MVT::i8 , Expand);
+ setOperationAction(ISD::UREM , MVT::i8 , Expand);
+ setOperationAction(ISD::MULHS , MVT::i16 , Expand);
+ setOperationAction(ISD::MULHU , MVT::i16 , Expand);
+ setOperationAction(ISD::SDIV , MVT::i16 , Expand);
+ setOperationAction(ISD::UDIV , MVT::i16 , Expand);
+ setOperationAction(ISD::SREM , MVT::i16 , Expand);
+ setOperationAction(ISD::UREM , MVT::i16 , Expand);
+ setOperationAction(ISD::MULHS , MVT::i32 , Expand);
+ setOperationAction(ISD::MULHU , MVT::i32 , Expand);
+ setOperationAction(ISD::SDIV , MVT::i32 , Expand);
+ setOperationAction(ISD::UDIV , MVT::i32 , Expand);
+ setOperationAction(ISD::SREM , MVT::i32 , Expand);
+ setOperationAction(ISD::UREM , MVT::i32 , Expand);
+ setOperationAction(ISD::MULHS , MVT::i64 , Expand);
+ setOperationAction(ISD::MULHU , MVT::i64 , Expand);
+ setOperationAction(ISD::SDIV , MVT::i64 , Expand);
+ setOperationAction(ISD::UDIV , MVT::i64 , Expand);
+ setOperationAction(ISD::SREM , MVT::i64 , Expand);
+ setOperationAction(ISD::UREM , MVT::i64 , Expand);
+
+ setOperationAction(ISD::BR_JT , MVT::Other, Expand);
+ setOperationAction(ISD::BRCOND , MVT::Other, Custom);
+ setOperationAction(ISD::BR_CC , MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+ setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
+ setOperationAction(ISD::FREM , MVT::f32 , Expand);
+ setOperationAction(ISD::FREM , MVT::f64 , Expand);
+ setOperationAction(ISD::FREM , MVT::f80 , Expand);
+ setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
+
+ setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i8 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
+ setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
+ setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
+ }
+
+ setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
+ setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
+
+ // These should be promoted to a larger select which is supported.
+ setOperationAction(ISD::SELECT , MVT::i1 , Promote);
+ setOperationAction(ISD::SELECT , MVT::i8 , Promote);
+ // X86 wants to expand cmov itself.
+ setOperationAction(ISD::SELECT , MVT::i16 , Custom);
+ setOperationAction(ISD::SELECT , MVT::i32 , Custom);
+ setOperationAction(ISD::SELECT , MVT::f32 , Custom);
+ setOperationAction(ISD::SELECT , MVT::f64 , Custom);
+ setOperationAction(ISD::SELECT , MVT::f80 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i8 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i16 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i32 , Custom);
+ setOperationAction(ISD::SETCC , MVT::f32 , Custom);
+ setOperationAction(ISD::SETCC , MVT::f64 , Custom);
+ setOperationAction(ISD::SETCC , MVT::f80 , Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::SELECT , MVT::i64 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i64 , Custom);
+ }
+ // X86 ret instruction may pop stack.
+ setOperationAction(ISD::RET , MVT::Other, Custom);
+ setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
+
+ // Darwin ABI issue.
+ setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
+ setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
+ setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+ setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
+ setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
+ setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
+ setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
+ }
+ // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
+ setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
+ setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
+ setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom);
+ setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom);
+ setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
+ }
+
+ if (Subtarget->hasSSE1())
+ setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
+
+ if (!Subtarget->hasSSE2())
+ setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand);
+
+ // Expand certain atomics
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
+
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
+
+ if (!Subtarget->is64Bit()) {
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
+ }
+
+ // Use the default ISD::DBG_STOPPOINT, ISD::DECLARE expansion.
+ setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
+ // FIXME - use subtarget debug flags
+ if (!Subtarget->isTargetDarwin() &&
+ !Subtarget->isTargetELF() &&
+ !Subtarget->isTargetCygMing()) {
+ setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+ }
+
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+ if (Subtarget->is64Bit()) {
+ setExceptionPointerRegister(X86::RAX);
+ setExceptionSelectorRegister(X86::RDX);
+ } else {
+ setExceptionPointerRegister(X86::EAX);
+ setExceptionSelectorRegister(X86::EDX);
+ }
+ setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
+ setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
+
+ setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::VAARG , MVT::Other, Custom);
+ setOperationAction(ISD::VACOPY , MVT::Other, Custom);
+ } else {
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ }
+
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+ if (Subtarget->isTargetCygMing())
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ else
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+
+ if (!UseSoftFloat && X86ScalarSSEf64) {
+ // f32 and f64 use SSE.
+ // Set up the FP register classes.
+ addRegisterClass(MVT::f32, X86::FR32RegisterClass);
+ addRegisterClass(MVT::f64, X86::FR64RegisterClass);
+
+ // Use ANDPD to simulate FABS.
+ setOperationAction(ISD::FABS , MVT::f64, Custom);
+ setOperationAction(ISD::FABS , MVT::f32, Custom);
+
+ // Use XORP to simulate FNEG.
+ setOperationAction(ISD::FNEG , MVT::f64, Custom);
+ setOperationAction(ISD::FNEG , MVT::f32, Custom);
+
+ // Use ANDPD and ORPD to simulate FCOPYSIGN.
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+
+ // We don't support sin/cos/fmod
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+
+ // Expand FP immediates into loads from the stack, except for the special
+ // cases we handle.
+ addLegalFPImmediate(APFloat(+0.0)); // xorpd
+ addLegalFPImmediate(APFloat(+0.0f)); // xorps
+ } else if (!UseSoftFloat && X86ScalarSSEf32) {
+ // Use SSE for f32, x87 for f64.
+ // Set up the FP register classes.
+ addRegisterClass(MVT::f32, X86::FR32RegisterClass);
+ addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
+
+ // Use ANDPS to simulate FABS.
+ setOperationAction(ISD::FABS , MVT::f32, Custom);
+
+ // Use XORP to simulate FNEG.
+ setOperationAction(ISD::FNEG , MVT::f32, Custom);
+
+ setOperationAction(ISD::UNDEF, MVT::f64, Expand);
+
+ // Use ANDPS and ORPS to simulate FCOPYSIGN.
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+
+ // We don't support sin/cos/fmod
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+
+ // Special cases we handle for FP constants.
+ addLegalFPImmediate(APFloat(+0.0f)); // xorps
+ addLegalFPImmediate(APFloat(+0.0)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
+
+ if (!UnsafeFPMath) {
+ setOperationAction(ISD::FSIN , MVT::f64 , Expand);
+ setOperationAction(ISD::FCOS , MVT::f64 , Expand);
+ }
+ } else if (!UseSoftFloat) {
+ // f32 and f64 in x87.
+ // Set up the FP register classes.
+ addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
+ addRegisterClass(MVT::f32, X86::RFP32RegisterClass);
+
+ setOperationAction(ISD::UNDEF, MVT::f64, Expand);
+ setOperationAction(ISD::UNDEF, MVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+ if (!UnsafeFPMath) {
+ setOperationAction(ISD::FSIN , MVT::f64 , Expand);
+ setOperationAction(ISD::FCOS , MVT::f64 , Expand);
+ }
+ addLegalFPImmediate(APFloat(+0.0)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
+ addLegalFPImmediate(APFloat(+0.0f)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0f)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
+ }
+
+ // Long double always uses X87.
+ if (!UseSoftFloat) {
+ addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
+ setOperationAction(ISD::UNDEF, MVT::f80, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
+ {
+ bool ignored;
+ APFloat TmpFlt(+0.0);
+ TmpFlt.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
+ &ignored);
+ addLegalFPImmediate(TmpFlt); // FLD0
+ TmpFlt.changeSign();
+ addLegalFPImmediate(TmpFlt); // FLD0/FCHS
+ APFloat TmpFlt2(+1.0);
+ TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
+ &ignored);
+ addLegalFPImmediate(TmpFlt2); // FLD1
+ TmpFlt2.changeSign();
+ addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
+ }
+
+ if (!UnsafeFPMath) {
+ setOperationAction(ISD::FSIN , MVT::f80 , Expand);
+ setOperationAction(ISD::FCOS , MVT::f80 , Expand);
+ }
+ }
+
+ // Always use a library call for pow.
+ setOperationAction(ISD::FPOW , MVT::f32 , Expand);
+ setOperationAction(ISD::FPOW , MVT::f64 , Expand);
+ setOperationAction(ISD::FPOW , MVT::f80 , Expand);
+
+ setOperationAction(ISD::FLOG, MVT::f80, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f80, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f80, Expand);
+ setOperationAction(ISD::FEXP, MVT::f80, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f80, Expand);
+
+ // First set operation action for all vector types to either promote
+ // (for widening) or expand (for scalarization). Then we will selectively
+ // turn on ones that can be effectively codegen'd.
+ for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
+ setOperationAction(ISD::ADD , (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SUB , (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FADD, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FNEG, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FSUB, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::MUL , (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FMUL, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SDIV, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UDIV, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FDIV, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SDIVREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UDIVREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ROTL, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ROTR, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::VSETCC, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FLOG, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FLOG2, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FEXP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand);
+ }
+
+ // FIXME: In order to prevent SSE instructions being expanded to MMX ones
+ // with -msoft-float, disable use of MMX as well.
+ if (!UseSoftFloat && !DisableMMX && Subtarget->hasMMX()) {
+ addRegisterClass(MVT::v8i8, X86::VR64RegisterClass);
+ addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
+ addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
+ addRegisterClass(MVT::v2f32, X86::VR64RegisterClass);
+ addRegisterClass(MVT::v1i64, X86::VR64RegisterClass);
+
+ setOperationAction(ISD::ADD, MVT::v8i8, Legal);
+ setOperationAction(ISD::ADD, MVT::v4i16, Legal);
+ setOperationAction(ISD::ADD, MVT::v2i32, Legal);
+ setOperationAction(ISD::ADD, MVT::v1i64, Legal);
+
+ setOperationAction(ISD::SUB, MVT::v8i8, Legal);
+ setOperationAction(ISD::SUB, MVT::v4i16, Legal);
+ setOperationAction(ISD::SUB, MVT::v2i32, Legal);
+ setOperationAction(ISD::SUB, MVT::v1i64, Legal);
+
+ setOperationAction(ISD::MULHS, MVT::v4i16, Legal);
+ setOperationAction(ISD::MUL, MVT::v4i16, Legal);
+
+ setOperationAction(ISD::AND, MVT::v8i8, Promote);
+ AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64);
+ setOperationAction(ISD::AND, MVT::v4i16, Promote);
+ AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64);
+ setOperationAction(ISD::AND, MVT::v2i32, Promote);
+ AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64);
+ setOperationAction(ISD::AND, MVT::v1i64, Legal);
+
+ setOperationAction(ISD::OR, MVT::v8i8, Promote);
+ AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64);
+ setOperationAction(ISD::OR, MVT::v4i16, Promote);
+ AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64);
+ setOperationAction(ISD::OR, MVT::v2i32, Promote);
+ AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64);
+ setOperationAction(ISD::OR, MVT::v1i64, Legal);
+
+ setOperationAction(ISD::XOR, MVT::v8i8, Promote);
+ AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64);
+ setOperationAction(ISD::XOR, MVT::v4i16, Promote);
+ AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64);
+ setOperationAction(ISD::XOR, MVT::v2i32, Promote);
+ AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64);
+ setOperationAction(ISD::XOR, MVT::v1i64, Legal);
+
+ setOperationAction(ISD::LOAD, MVT::v8i8, Promote);
+ AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64);
+ setOperationAction(ISD::LOAD, MVT::v4i16, Promote);
+ AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64);
+ setOperationAction(ISD::LOAD, MVT::v2i32, Promote);
+ AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64);
+ setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
+ AddPromotedToType (ISD::LOAD, MVT::v2f32, MVT::v1i64);
+ setOperationAction(ISD::LOAD, MVT::v1i64, Legal);
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f32, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom);
+
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom);
+
+ setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i8, Expand);
+ setOperationAction(ISD::SELECT, MVT::v8i8, Promote);
+ setOperationAction(ISD::SELECT, MVT::v4i16, Promote);
+ setOperationAction(ISD::SELECT, MVT::v2i32, Promote);
+ setOperationAction(ISD::SELECT, MVT::v1i64, Custom);
+ }
+
+ if (!UseSoftFloat && Subtarget->hasSSE1()) {
+ addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
+
+ setOperationAction(ISD::FADD, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
+ setOperationAction(ISD::VSETCC, MVT::v4f32, Custom);
+ }
+
+ if (!UseSoftFloat && Subtarget->hasSSE2()) {
+ addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
+
+ // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
+ // registers cannot be used even for integer operations.
+ addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
+ addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
+ addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
+ addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
+
+ setOperationAction(ISD::ADD, MVT::v16i8, Legal);
+ setOperationAction(ISD::ADD, MVT::v8i16, Legal);
+ setOperationAction(ISD::ADD, MVT::v4i32, Legal);
+ setOperationAction(ISD::ADD, MVT::v2i64, Legal);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SUB, MVT::v16i8, Legal);
+ setOperationAction(ISD::SUB, MVT::v8i16, Legal);
+ setOperationAction(ISD::SUB, MVT::v4i32, Legal);
+ setOperationAction(ISD::SUB, MVT::v2i64, Legal);
+ setOperationAction(ISD::MUL, MVT::v8i16, Legal);
+ setOperationAction(ISD::FADD, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
+
+ setOperationAction(ISD::VSETCC, MVT::v2f64, Custom);
+ setOperationAction(ISD::VSETCC, MVT::v16i8, Custom);
+ setOperationAction(ISD::VSETCC, MVT::v8i16, Custom);
+ setOperationAction(ISD::VSETCC, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+
+ // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
+ for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+ // Do not attempt to custom lower non-power-of-2 vectors
+ if (!isPowerOf2_32(VT.getVectorNumElements()))
+ continue;
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ }
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
+ }
+
+ // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
+ for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
+ setOperationAction(ISD::AND, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::AND, (MVT::SimpleValueType)VT, MVT::v2i64);
+ setOperationAction(ISD::OR, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::OR, (MVT::SimpleValueType)VT, MVT::v2i64);
+ setOperationAction(ISD::XOR, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::XOR, (MVT::SimpleValueType)VT, MVT::v2i64);
+ setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::LOAD, (MVT::SimpleValueType)VT, MVT::v2i64);
+ setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v2i64);
+ }
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // Custom lower v2i64 and v2f64 selects.
+ setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
+ setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
+ setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
+
+ }
+
+ if (Subtarget->hasSSE41()) {
+ // FIXME: Do we need to handle scalar-to-vector here?
+ setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+
+ // i8 and i16 vectors are custom , because the source register and source
+ // source memory operand types are not the same width. f32 vectors are
+ // custom since the immediate controlling the insert encodes additional
+ // information.
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
+ }
+ }
+
+ if (Subtarget->hasSSE42()) {
+ setOperationAction(ISD::VSETCC, MVT::v2i64, Custom);
+ }
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ // Add/Sub/Mul with overflow operations are custom lowered.
+ setOperationAction(ISD::SADDO, MVT::i32, Custom);
+ setOperationAction(ISD::SADDO, MVT::i64, Custom);
+ setOperationAction(ISD::UADDO, MVT::i32, Custom);
+ setOperationAction(ISD::UADDO, MVT::i64, Custom);
+ setOperationAction(ISD::SSUBO, MVT::i32, Custom);
+ setOperationAction(ISD::SSUBO, MVT::i64, Custom);
+ setOperationAction(ISD::USUBO, MVT::i32, Custom);
+ setOperationAction(ISD::USUBO, MVT::i64, Custom);
+ setOperationAction(ISD::SMULO, MVT::i32, Custom);
+ setOperationAction(ISD::SMULO, MVT::i64, Custom);
+ setOperationAction(ISD::UMULO, MVT::i32, Custom);
+ setOperationAction(ISD::UMULO, MVT::i64, Custom);
+
+ if (!Subtarget->is64Bit()) {
+ // These libcalls are not available in 32-bit.
+ setLibcallName(RTLIB::SHL_I128, 0);
+ setLibcallName(RTLIB::SRL_I128, 0);
+ setLibcallName(RTLIB::SRA_I128, 0);
+ }
+
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+ setTargetDAGCombine(ISD::BUILD_VECTOR);
+ setTargetDAGCombine(ISD::SELECT);
+ setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::SRA);
+ setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::STORE);
+ if (Subtarget->is64Bit())
+ setTargetDAGCombine(ISD::MUL);
+
+ computeRegisterProperties();
+
+ // FIXME: These should be based on subtarget info. Plus, the values should
+ // be smaller when we are in optimizing for size mode.
+ maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
+ maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores
+ maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores
+ allowUnalignedMemoryAccesses = true; // x86 supports it!
+ setPrefLoopAlignment(16);
+ benefitFromCodePlacementOpt = true;
+}
+
+
+MVT X86TargetLowering::getSetCCResultType(MVT VT) const {
+ return MVT::i8;
+}
+
+
+/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
+/// the desired ByVal argument alignment.
+static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) {
+ if (MaxAlign == 16)
+ return;
+ if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ if (VTy->getBitWidth() == 128)
+ MaxAlign = 16;
+ } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(ATy->getElementType(), EltAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(STy->getElementType(i), EltAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ if (MaxAlign == 16)
+ break;
+ }
+ }
+ return;
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area. For X86, aggregates
+/// that contain SSE vectors are placed at 16-byte boundaries while the rest
+/// are at 4-byte boundaries.
+unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
+ if (Subtarget->is64Bit()) {
+ // Max of 8 and alignment of type.
+ unsigned TyAlign = TD->getABITypeAlignment(Ty);
+ if (TyAlign > 8)
+ return TyAlign;
+ return 8;
+ }
+
+ unsigned Align = 4;
+ if (Subtarget->hasSSE1())
+ getMaxByValAlign(Ty, Align);
+ return Align;
+}
+
+/// getOptimalMemOpType - Returns the target specific optimal type for load
+/// and store operations as a result of memset, memcpy, and memmove
+/// lowering. It returns MVT::iAny if SelectionDAG should be responsible for
+/// determining it.
+MVT
+X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
+ bool isSrcConst, bool isSrcStr) const {
+ // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
+ // linux. This is because the stack realignment code can't handle certain
+ // cases like PR2962. This should be removed when PR2962 is fixed.
+ if (!NoImplicitFloat && Subtarget->getStackAlignment() >= 16) {
+ if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
+ return MVT::v4i32;
+ if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
+ return MVT::v4f32;
+ }
+ if (Subtarget->is64Bit() && Size >= 8)
+ return MVT::i64;
+ return MVT::i32;
+}
+
+/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
+/// jumptable.
+SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const {
+ if (usesGlobalOffsetTable())
+ return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
+ if (!Subtarget->isPICStyleRIPRel())
+ // This doesn't have DebugLoc associated with it, but is not really the
+ // same as a Register.
+ return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc::getUnknownLoc(),
+ getPointerTy());
+ return Table;
+}
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "X86GenCallingConv.inc"
+
+/// LowerRET - Lower an ISD::RET node.
+SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args");
+
+ SmallVector<CCValAssign, 16> RVLocs;
+ unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
+ CCInfo.AnalyzeReturn(Op.getNode(), RetCC_X86);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+ SDValue Chain = Op.getOperand(0);
+
+ // Handle tail call return.
+ Chain = GetPossiblePreceedingTailCall(Chain, X86ISD::TAILCALL);
+ if (Chain.getOpcode() == X86ISD::TAILCALL) {
+ SDValue TailCall = Chain;
+ SDValue TargetAddress = TailCall.getOperand(1);
+ SDValue StackAdjustment = TailCall.getOperand(2);
+ assert(((TargetAddress.getOpcode() == ISD::Register &&
+ (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::EAX ||
+ cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) ||
+ TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
+ TargetAddress.getOpcode() == ISD::TargetGlobalAddress) &&
+ "Expecting an global address, external symbol, or register");
+ assert(StackAdjustment.getOpcode() == ISD::Constant &&
+ "Expecting a const value");
+
+ SmallVector<SDValue,8> Operands;
+ Operands.push_back(Chain.getOperand(0));
+ Operands.push_back(TargetAddress);
+ Operands.push_back(StackAdjustment);
+ // Copy registers used by the call. Last operand is a flag so it is not
+ // copied.
+ for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) {
+ Operands.push_back(Chain.getOperand(i));
+ }
+ return DAG.getNode(X86ISD::TC_RETURN, dl, MVT::Other, &Operands[0],
+ Operands.size());
+ }
+
+ // Regular return.
+ SDValue Flag;
+
+ SmallVector<SDValue, 6> RetOps;
+ RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
+ // Operand #1 = Bytes To Pop
+ RetOps.push_back(DAG.getConstant(getBytesToPopOnReturn(), MVT::i16));
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ SDValue ValToCopy = Op.getOperand(i*2+1);
+
+ // Returns in ST0/ST1 are handled specially: these are pushed as operands to
+ // the RET instruction and handled by the FP Stackifier.
+ if (VA.getLocReg() == X86::ST0 ||
+ VA.getLocReg() == X86::ST1) {
+ // If this is a copy from an xmm register to ST(0), use an FPExtend to
+ // change the value to the FP stack register class.
+ if (isScalarFPTypeInSSEReg(VA.getValVT()))
+ ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
+ RetOps.push_back(ValToCopy);
+ // Don't emit a copytoreg.
+ continue;
+ }
+
+ // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
+ // which is returned in RAX / RDX.
+ if (Subtarget->is64Bit()) {
+ MVT ValVT = ValToCopy.getValueType();
+ if (ValVT.isVector() && ValVT.getSizeInBits() == 64) {
+ ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy);
+ if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1)
+ ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy);
+ }
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
+ Flag = Chain.getValue(1);
+ }
+
+ // The x86-64 ABI for returning structs by value requires that we copy
+ // the sret argument into %rax for the return. We saved the argument into
+ // a virtual register in the entry block, so now we copy the value out
+ // and into %rax.
+ if (Subtarget->is64Bit() &&
+ DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ unsigned Reg = FuncInfo->getSRetReturnReg();
+ if (!Reg) {
+ Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+ FuncInfo->setSRetReturnReg(Reg);
+ }
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+
+ Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
+ Flag = Chain.getValue(1);
+ }
+
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(X86ISD::RET_FLAG, dl,
+ MVT::Other, &RetOps[0], RetOps.size());
+}
+
+
+/// LowerCallResult - Lower the result values of an ISD::CALL into the
+/// appropriate copies out of appropriate physical registers. This assumes that
+/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+/// being lowered. The returns a SDNode with the same number of values as the
+/// ISD::CALL.
+SDNode *X86TargetLowering::
+LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
+ unsigned CallingConv, SelectionDAG &DAG) {
+
+ DebugLoc dl = TheCall->getDebugLoc();
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ bool isVarArg = TheCall->isVarArg();
+ bool Is64Bit = Subtarget->is64Bit();
+ CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
+ CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);
+
+ SmallVector<SDValue, 8> ResultVals;
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ MVT CopyVT = VA.getValVT();
+
+ // If this is x86-64, and we disabled SSE, we can't return FP values
+ if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
+ ((Is64Bit || TheCall->isInreg()) && !Subtarget->hasSSE1())) {
+ cerr << "SSE register return with SSE disabled\n";
+ exit(1);
+ }
+
+ // If this is a call to a function that returns an fp value on the floating
+ // point stack, but where we prefer to use the value in xmm registers, copy
+ // it out as F80 and use a truncate to move it from fp stack reg to xmm reg.
+ if ((VA.getLocReg() == X86::ST0 ||
+ VA.getLocReg() == X86::ST1) &&
+ isScalarFPTypeInSSEReg(VA.getValVT())) {
+ CopyVT = MVT::f80;
+ }
+
+ SDValue Val;
+ if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) {
+ // For x86-64, MMX values are returned in XMM0 / XMM1 except for v1i64.
+ if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
+ Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+ MVT::v2i64, InFlag).getValue(1);
+ Val = Chain.getValue(0);
+ Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
+ Val, DAG.getConstant(0, MVT::i64));
+ } else {
+ Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+ MVT::i64, InFlag).getValue(1);
+ Val = Chain.getValue(0);
+ }
+ Val = DAG.getNode(ISD::BIT_CONVERT, dl, CopyVT, Val);
+ } else {
+ Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+ CopyVT, InFlag).getValue(1);
+ Val = Chain.getValue(0);
+ }
+ InFlag = Chain.getValue(2);
+
+ if (CopyVT != VA.getValVT()) {
+ // Round the F80 the right size, which also moves to the appropriate xmm
+ // register.
+ Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
+ // This truncation won't change the value.
+ DAG.getIntPtrConstant(1));
+ }
+
+ ResultVals.push_back(Val);
+ }
+
+ // Merge everything together with a MERGE_VALUES node.
+ ResultVals.push_back(Chain);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
+ &ResultVals[0], ResultVals.size()).getNode();
+}
+
+
+//===----------------------------------------------------------------------===//
+// C & StdCall & Fast Calling Convention implementation
+//===----------------------------------------------------------------------===//
+// StdCall calling convention seems to be standard for many Windows' API
+// routines and around. It differs from C calling convention just a little:
+// callee should clean up the stack, not caller. Symbols should be also
+// decorated in some fancy way :) It doesn't support any vector arguments.
+// For info on fast calling convention see Fast Calling Convention (tail call)
+// implementation LowerX86_32FastCCCallTo.
+
+/// CallIsStructReturn - Determines whether a CALL node uses struct return
+/// semantics.
+static bool CallIsStructReturn(CallSDNode *TheCall) {
+ unsigned NumOps = TheCall->getNumArgs();
+ if (!NumOps)
+ return false;
+
+ return TheCall->getArgFlags(0).isSRet();
+}
+
+/// ArgsAreStructReturn - Determines whether a FORMAL_ARGUMENTS node uses struct
+/// return semantics.
+static bool ArgsAreStructReturn(SDValue Op) {
+ unsigned NumArgs = Op.getNode()->getNumValues() - 1;
+ if (!NumArgs)
+ return false;
+
+ return cast<ARG_FLAGSSDNode>(Op.getOperand(3))->getArgFlags().isSRet();
+}
+
+/// IsCalleePop - Determines whether a CALL or FORMAL_ARGUMENTS node requires
+/// the callee to pop its own arguments. Callee pop is necessary to support tail
+/// calls.
+bool X86TargetLowering::IsCalleePop(bool IsVarArg, unsigned CallingConv) {
+ if (IsVarArg)
+ return false;
+
+ switch (CallingConv) {
+ default:
+ return false;
+ case CallingConv::X86_StdCall:
+ return !Subtarget->is64Bit();
+ case CallingConv::X86_FastCall:
+ return !Subtarget->is64Bit();
+ case CallingConv::Fast:
+ return PerformTailCallOpt;
+ }
+}
+
+/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
+/// given CallingConvention value.
+CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const {
+ if (Subtarget->is64Bit()) {
+ if (Subtarget->isTargetWin64())
+ return CC_X86_Win64_C;
+ else if (CC == CallingConv::Fast && PerformTailCallOpt)
+ return CC_X86_64_TailCall;
+ else
+ return CC_X86_64_C;
+ }
+
+ if (CC == CallingConv::X86_FastCall)
+ return CC_X86_32_FastCall;
+ else if (CC == CallingConv::Fast)
+ return CC_X86_32_FastCC;
+ else
+ return CC_X86_32_C;
+}
+
+/// NameDecorationForFORMAL_ARGUMENTS - Selects the appropriate decoration to
+/// apply to a MachineFunction containing a given FORMAL_ARGUMENTS node.
+NameDecorationStyle
+X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDValue Op) {
+ unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ if (CC == CallingConv::X86_FastCall)
+ return FastCall;
+ else if (CC == CallingConv::X86_StdCall)
+ return StdCall;
+ return None;
+}
+
+
+/// CallRequiresGOTInRegister - Check whether the call requires the GOT pointer
+/// in a register before calling.
+bool X86TargetLowering::CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall) {
+ return !IsTailCall && !Is64Bit &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT();
+}
+
+/// CallRequiresFnAddressInReg - Check whether the call requires the function
+/// address to be loaded in a register.
+bool
+X86TargetLowering::CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall) {
+ return !Is64Bit && IsTailCall &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT();
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" with size and alignment information specified by
+/// the specific parameter attribute. The copy will be passed as a byval
+/// function parameter.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+ DebugLoc dl) {
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+ return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+ /*AlwaysInline=*/true, NULL, 0, NULL, 0);
+}
+
+SDValue X86TargetLowering::LowerMemArgument(SDValue Op, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ MachineFrameInfo *MFI,
+ unsigned CC,
+ SDValue Root, unsigned i) {
+ // Create the nodes corresponding to a load from this parameter slot.
+ ISD::ArgFlagsTy Flags =
+ cast<ARG_FLAGSSDNode>(Op.getOperand(3 + i))->getArgFlags();
+ bool AlwaysUseMutable = (CC==CallingConv::Fast) && PerformTailCallOpt;
+ bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
+
+ // FIXME: For now, all byval parameter objects are marked mutable. This can be
+ // changed with more analysis.
+ // In case of tail call optimization mark all arguments mutable. Since they
+ // could be overwritten by lowering of arguments in case of a tail call.
+ int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), isImmutable);
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ if (Flags.isByVal())
+ return FIN;
+ return DAG.getLoad(VA.getValVT(), Op.getDebugLoc(), Root, FIN,
+ PseudoSourceValue::getFixedStack(FI), 0);
+}
+
+SDValue
+X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ DebugLoc dl = Op.getDebugLoc();
+
+ const Function* Fn = MF.getFunction();
+ if (Fn->hasExternalLinkage() &&
+ Subtarget->isTargetCygMing() &&
+ Fn->getName() == "main")
+ FuncInfo->setForceFramePointer(true);
+
+ // Decorate the function name.
+ FuncInfo->setDecorationStyle(NameDecorationForFORMAL_ARGUMENTS(Op));
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ SDValue Root = Op.getOperand(0);
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
+ unsigned CC = MF.getFunction()->getCallingConv();
+ bool Is64Bit = Subtarget->is64Bit();
+ bool IsWin64 = Subtarget->isTargetWin64();
+
+ assert(!(isVarArg && CC == CallingConv::Fast) &&
+ "Var args not supported with calling convention fastcc");
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+ CCInfo.AnalyzeFormalArguments(Op.getNode(), CCAssignFnForNode(CC));
+
+ SmallVector<SDValue, 8> ArgValues;
+ unsigned LastVal = ~0U;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
+ // places.
+ assert(VA.getValNo() != LastVal &&
+ "Don't support value assigned to multiple locs yet");
+ LastVal = VA.getValNo();
+
+ if (VA.isRegLoc()) {
+ MVT RegVT = VA.getLocVT();
+ TargetRegisterClass *RC = NULL;
+ if (RegVT == MVT::i32)
+ RC = X86::GR32RegisterClass;
+ else if (Is64Bit && RegVT == MVT::i64)
+ RC = X86::GR64RegisterClass;
+ else if (RegVT == MVT::f32)
+ RC = X86::FR32RegisterClass;
+ else if (RegVT == MVT::f64)
+ RC = X86::FR64RegisterClass;
+ else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
+ RC = X86::VR128RegisterClass;
+ else if (RegVT.isVector()) {
+ assert(RegVT.getSizeInBits() == 64);
+ if (!Is64Bit)
+ RC = X86::VR64RegisterClass; // MMX values are passed in MMXs.
+ else {
+ // Darwin calling convention passes MMX values in either GPRs or
+ // XMMs in x86-64. Other targets pass them in memory.
+ if (RegVT != MVT::v1i64 && Subtarget->hasSSE2()) {
+ RC = X86::VR128RegisterClass; // MMX values are passed in XMMs.
+ RegVT = MVT::v2i64;
+ } else {
+ RC = X86::GR64RegisterClass; // v1i64 values are passed in GPRs.
+ RegVT = MVT::i64;
+ }
+ }
+ } else {
+ assert(0 && "Unknown argument type!");
+ }
+
+ unsigned Reg = DAG.getMachineFunction().addLiveIn(VA.getLocReg(), RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT);
+
+ // If this is an 8 or 16-bit value, it is really passed promoted to 32
+ // bits. Insert an assert[sz]ext to capture this, then truncate to the
+ // right size.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+
+ if (VA.getLocInfo() != CCValAssign::Full)
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+
+ // Handle MMX values passed in GPRs.
+ if (Is64Bit && RegVT != VA.getLocVT()) {
+ if (RegVT.getSizeInBits() == 64 && RC == X86::GR64RegisterClass)
+ ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), ArgValue);
+ else if (RC == X86::VR128RegisterClass) {
+ ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
+ ArgValue, DAG.getConstant(0, MVT::i64));
+ ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), ArgValue);
+ }
+ }
+
+ ArgValues.push_back(ArgValue);
+ } else {
+ assert(VA.isMemLoc());
+ ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, CC, Root, i));
+ }
+ }
+
+ // The x86-64 ABI for returning structs by value requires that we copy
+ // the sret argument into %rax for the return. Save the argument into
+ // a virtual register so that we can access it from the return points.
+ if (Is64Bit && DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ unsigned Reg = FuncInfo->getSRetReturnReg();
+ if (!Reg) {
+ Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+ FuncInfo->setSRetReturnReg(Reg);
+ }
+ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, ArgValues[0]);
+ Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Root);
+ }
+
+ unsigned StackSize = CCInfo.getNextStackOffset();
+ // align stack specially for tail calls
+ if (PerformTailCallOpt && CC == CallingConv::Fast)
+ StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+ if (Is64Bit || CC != CallingConv::X86_FastCall) {
+ VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
+ }
+ if (Is64Bit) {
+ unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
+
+ // FIXME: We should really autogenerate these arrays
+ static const unsigned GPR64ArgRegsWin64[] = {
+ X86::RCX, X86::RDX, X86::R8, X86::R9
+ };
+ static const unsigned XMMArgRegsWin64[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
+ };
+ static const unsigned GPR64ArgRegs64Bit[] = {
+ X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
+ };
+ static const unsigned XMMArgRegs64Bit[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+ const unsigned *GPR64ArgRegs, *XMMArgRegs;
+
+ if (IsWin64) {
+ TotalNumIntRegs = 4; TotalNumXMMRegs = 4;
+ GPR64ArgRegs = GPR64ArgRegsWin64;
+ XMMArgRegs = XMMArgRegsWin64;
+ } else {
+ TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
+ GPR64ArgRegs = GPR64ArgRegs64Bit;
+ XMMArgRegs = XMMArgRegs64Bit;
+ }
+ unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
+ TotalNumIntRegs);
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs,
+ TotalNumXMMRegs);
+
+ assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
+ "SSE register cannot be used when SSE is disabled!");
+ assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloat) &&
+ "SSE register cannot be used when SSE is disabled!");
+ if (UseSoftFloat || NoImplicitFloat || !Subtarget->hasSSE1())
+ // Kernel mode asks for SSE to be disabled, so don't push them
+ // on the stack.
+ TotalNumXMMRegs = 0;
+
+ // For X86-64, if there are vararg parameters that are passed via
+ // registers, then we must store them to their spots on the stack so they
+ // may be loaded by deferencing the result of va_next.
+ VarArgsGPOffset = NumIntRegs * 8;
+ VarArgsFPOffset = TotalNumIntRegs * 8 + NumXMMRegs * 16;
+ RegSaveFrameIndex = MFI->CreateStackObject(TotalNumIntRegs * 8 +
+ TotalNumXMMRegs * 16, 16);
+
+ // Store the integer parameter registers.
+ SmallVector<SDValue, 8> MemOps;
+ SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
+ SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
+ DAG.getIntPtrConstant(VarArgsGPOffset));
+ for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
+ unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
+ X86::GR64RegisterClass);
+ SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i64);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
+ DAG.getIntPtrConstant(8));
+ }
+
+ // Now store the XMM (fp + vector) parameter registers.
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
+ DAG.getIntPtrConstant(VarArgsFPOffset));
+ for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
+ unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs],
+ X86::VR128RegisterClass);
+ SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::v4f32);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
+ DAG.getIntPtrConstant(16));
+ }
+ if (!MemOps.empty())
+ Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+ }
+ }
+
+ ArgValues.push_back(Root);
+
+ // Some CCs need callee pop.
+ if (IsCalleePop(isVarArg, CC)) {
+ BytesToPopOnReturn = StackSize; // Callee pops everything.
+ BytesCallerReserves = 0;
+ } else {
+ BytesToPopOnReturn = 0; // Callee pops nothing.
+ // If this is an sret function, the return should pop the hidden pointer.
+ if (!Is64Bit && CC != CallingConv::Fast && ArgsAreStructReturn(Op))
+ BytesToPopOnReturn = 4;
+ BytesCallerReserves = StackSize;
+ }
+
+ if (!Is64Bit) {
+ RegSaveFrameIndex = 0xAAAAAAA; // RegSaveFrameIndex is X86-64 only.
+ if (CC == CallingConv::X86_FastCall)
+ VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
+ }
+
+ FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
+
+ // Return the new list of results.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
+ &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
+}
+
+SDValue
+X86TargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
+ const SDValue &StackPtr,
+ const CCValAssign &VA,
+ SDValue Chain,
+ SDValue Arg, ISD::ArgFlagsTy Flags) {
+ DebugLoc dl = TheCall->getDebugLoc();
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ if (Flags.isByVal()) {
+ return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
+ }
+ return DAG.getStore(Chain, dl, Arg, PtrOff,
+ PseudoSourceValue::getStack(), LocMemOffset);
+}
+
+/// EmitTailCallLoadRetAddr - Emit a load of return address if tail call
+/// optimization is performed and it is required.
+SDValue
+X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
+ SDValue &OutRetAddr,
+ SDValue Chain,
+ bool IsTailCall,
+ bool Is64Bit,
+ int FPDiff,
+ DebugLoc dl) {
+ if (!IsTailCall || FPDiff==0) return Chain;
+
+ // Adjust the Return address stack slot.
+ MVT VT = getPointerTy();
+ OutRetAddr = getReturnAddressFrameIndex(DAG);
+
+ // Load the "old" Return address.
+ OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0);
+ return SDValue(OutRetAddr.getNode(), 1);
+}
+
+/// EmitTailCallStoreRetAddr - Emit a store of the return adress if tail call
+/// optimization is performed and it is required (FPDiff!=0).
+static SDValue
+EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
+ SDValue Chain, SDValue RetAddrFrIdx,
+ bool Is64Bit, int FPDiff, DebugLoc dl) {
+ // Store the return address to the appropriate stack slot.
+ if (!FPDiff) return Chain;
+ // Calculate the new stack slot for the return address.
+ int SlotSize = Is64Bit ? 8 : 4;
+ int NewReturnAddrFI =
+ MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
+ MVT VT = Is64Bit ? MVT::i64 : MVT::i32;
+ SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
+ Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
+ PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0);
+ return Chain;
+}
+
+SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+ SDValue Chain = TheCall->getChain();
+ unsigned CC = TheCall->getCallingConv();
+ bool isVarArg = TheCall->isVarArg();
+ bool IsTailCall = TheCall->isTailCall() &&
+ CC == CallingConv::Fast && PerformTailCallOpt;
+ SDValue Callee = TheCall->getCallee();
+ bool Is64Bit = Subtarget->is64Bit();
+ bool IsStructRet = CallIsStructReturn(TheCall);
+ DebugLoc dl = TheCall->getDebugLoc();
+
+ assert(!(isVarArg && CC == CallingConv::Fast) &&
+ "Var args not supported with calling convention fastcc");
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+ CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC));
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+ if (PerformTailCallOpt && CC == CallingConv::Fast)
+ NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
+
+ int FPDiff = 0;
+ if (IsTailCall) {
+ // Lower arguments at fp - stackoffset + fpdiff.
+ unsigned NumBytesCallerPushed =
+ MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
+ FPDiff = NumBytesCallerPushed - NumBytes;
+
+ // Set the delta of movement of the returnaddr stackslot.
+ // But only set if delta is greater than previous delta.
+ if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta()))
+ MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
+ }
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SDValue RetAddrFrIdx;
+ // Load return adress for tail calls.
+ Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, IsTailCall, Is64Bit,
+ FPDiff, dl);
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+ SDValue StackPtr;
+
+ // Walk the register/memloc assignments, inserting copies/loads. In the case
+ // of tail call optimization arguments are handle later.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = TheCall->getArg(i);
+ ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
+ bool isByVal = Flags.isByVal();
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isRegLoc()) {
+ if (Is64Bit) {
+ MVT RegVT = VA.getLocVT();
+ if (RegVT.isVector() && RegVT.getSizeInBits() == 64)
+ switch (VA.getLocReg()) {
+ default:
+ break;
+ case X86::RDI: case X86::RSI: case X86::RDX: case X86::RCX:
+ case X86::R8: {
+ // Special case: passing MMX values in GPR registers.
+ Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
+ break;
+ }
+ case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3:
+ case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7: {
+ // Special case: passing MMX values in XMM registers.
+ Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
+ Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
+ Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
+ break;
+ }
+ }
+ }
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ if (!IsTailCall || (IsTailCall && isByVal)) {
+ assert(VA.isMemLoc());
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy());
+
+ MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA,
+ Chain, Arg, Flags));
+ }
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into registers.
+ SDValue InFlag;
+ // Tail call byval lowering might overwrite argument registers so in case of
+ // tail call optimization the copies to registers are lowered later.
+ if (!IsTailCall)
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // ELF / PIC requires GOT in the EBX register before function calls via PLT
+ // GOT pointer.
+ if (CallRequiresGOTPtrInReg(Is64Bit, IsTailCall)) {
+ Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc::getUnknownLoc(),
+ getPointerTy()),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ // If we are tail calling and generating PIC/GOT style code load the address
+ // of the callee into ecx. The value in ecx is used as target of the tail
+ // jump. This is done to circumvent the ebx/callee-saved problem for tail
+ // calls on PIC/GOT architectures. Normally we would just put the address of
+ // GOT into ebx and then call target@PLT. But for tail callss ebx would be
+ // restored (since ebx is callee saved) before jumping to the target@PLT.
+ if (CallRequiresFnAddressInReg(Is64Bit, IsTailCall)) {
+ // Note: The actual moving to ecx is done further down.
+ GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ if (G && !G->getGlobal()->hasHiddenVisibility() &&
+ !G->getGlobal()->hasProtectedVisibility())
+ Callee = LowerGlobalAddress(Callee, DAG);
+ else if (isa<ExternalSymbolSDNode>(Callee))
+ Callee = LowerExternalSymbol(Callee,DAG);
+ }
+
+ if (Is64Bit && isVarArg) {
+ // From AMD64 ABI document:
+ // For calls that may call functions that use varargs or stdargs
+ // (prototype-less calls or calls to functions containing ellipsis (...) in
+ // the declaration) %al is used as hidden argument to specify the number
+ // of SSE registers used. The contents of %al do not need to match exactly
+ // the number of registers, but must be an ubound on the number of SSE
+ // registers used and is in the range 0 - 8 inclusive.
+
+ // FIXME: Verify this on Win64
+ // Count the number of XMM registers allocated.
+ static const unsigned XMMArgRegs[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+ assert((Subtarget->hasSSE1() || !NumXMMRegs)
+ && "SSE registers cannot be used when SSE is disabled");
+
+ Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
+ DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+
+ // For tail calls lower the arguments to the 'real' stack slot.
+ if (IsTailCall) {
+ SmallVector<SDValue, 8> MemOpChains2;
+ SDValue FIN;
+ int FI = 0;
+ // Do not flag preceeding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (!VA.isRegLoc()) {
+ assert(VA.isMemLoc());
+ SDValue Arg = TheCall->getArg(i);
+ ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
+ // Create frame index.
+ int32_t Offset = VA.getLocMemOffset()+FPDiff;
+ uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
+ FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
+ FIN = DAG.getFrameIndex(FI, getPointerTy());
+
+ if (Flags.isByVal()) {
+ // Copy relative to framepointer.
+ SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr,
+ getPointerTy());
+ Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
+
+ MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, Chain,
+ Flags, DAG, dl));
+ } else {
+ // Store relative to framepointer.
+ MemOpChains2.push_back(
+ DAG.getStore(Chain, dl, Arg, FIN,
+ PseudoSourceValue::getFixedStack(FI), 0));
+ }
+ }
+ }
+
+ if (!MemOpChains2.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains2[0], MemOpChains2.size());
+
+ // Copy arguments to their registers.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ InFlag =SDValue();
+
+ // Store the return address to the appropriate stack slot.
+ Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit,
+ FPDiff, dl);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ // We should use extra load for direct calls to dllimported functions in
+ // non-JIT mode.
+ if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
+ getTargetMachine(), true))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy(),
+ G->getOffset());
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+ } else if (IsTailCall) {
+ unsigned Opc = Is64Bit ? X86::R9 : X86::EAX;
+
+ Chain = DAG.getCopyToReg(Chain, dl,
+ DAG.getRegister(Opc, getPointerTy()),
+ Callee,InFlag);
+ Callee = DAG.getRegister(Opc, getPointerTy());
+ // Add register as live out.
+ DAG.getMachineFunction().getRegInfo().addLiveOut(Opc);
+ }
+
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SmallVector<SDValue, 8> Ops;
+
+ if (IsTailCall) {
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Returns a chain & a flag for retval copy to use.
+ NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ Ops.clear();
+ }
+
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ if (IsTailCall)
+ Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // Add an implicit use GOT pointer in EBX.
+ if (!IsTailCall && !Is64Bit &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT())
+ Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
+
+ // Add an implicit use of AL for x86 vararg functions.
+ if (Is64Bit && isVarArg)
+ Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ if (IsTailCall) {
+ assert(InFlag.getNode() &&
+ "Flag must be set. Depend on flag being set in LowerRET");
+ Chain = DAG.getNode(X86ISD::TAILCALL, dl,
+ TheCall->getVTList(), &Ops[0], Ops.size());
+
+ return SDValue(Chain.getNode(), Op.getResNo());
+ }
+
+ Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ unsigned NumBytesForCalleeToPush;
+ if (IsCalleePop(isVarArg, CC))
+ NumBytesForCalleeToPush = NumBytes; // Callee pops everything
+ else if (!Is64Bit && CC != CallingConv::Fast && IsStructRet)
+ // If this is is a call to a struct-return function, the callee
+ // pops the hidden struct pointer, so we have to push it back.
+ // This is common for Darwin/X86, Linux & Mingw32 targets.
+ NumBytesForCalleeToPush = 4;
+ else
+ NumBytesForCalleeToPush = 0; // Callee pops nothing.
+
+ // Returns a flag for retval copy to use.
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(NumBytesForCalleeToPush,
+ true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG),
+ Op.getResNo());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Fast Calling Convention (tail call) implementation
+//===----------------------------------------------------------------------===//
+
+// Like std call, callee cleans arguments, convention except that ECX is
+// reserved for storing the tail called function address. Only 2 registers are
+// free for argument passing (inreg). Tail call optimization is performed
+// provided:
+// * tailcallopt is enabled
+// * caller/callee are fastcc
+// On X86_64 architecture with GOT-style position independent code only local
+// (within module) calls are supported at the moment.
+// To keep the stack aligned according to platform abi the function
+// GetAlignedArgumentStackSize ensures that argument delta is always multiples
+// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
+// If a tail called function callee has more arguments than the caller the
+// caller needs to make sure that there is room to move the RETADDR to. This is
+// achieved by reserving an area the size of the argument delta right after the
+// original REtADDR, but before the saved framepointer or the spilled registers
+// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
+// stack layout:
+// arg1
+// arg2
+// RETADDR
+// [ new RETADDR
+// move area ]
+// (possible EBP)
+// ESI
+// EDI
+// local1 ..
+
+/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
+/// for a 16 byte align requirement.
+unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
+ SelectionDAG& DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetMachine &TM = MF.getTarget();
+ const TargetFrameInfo &TFI = *TM.getFrameInfo();
+ unsigned StackAlignment = TFI.getStackAlignment();
+ uint64_t AlignMask = StackAlignment - 1;
+ int64_t Offset = StackSize;
+ uint64_t SlotSize = TD->getPointerSize();
+ if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
+ // Number smaller than 12 so just add the difference.
+ Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
+ } else {
+ // Mask out lower bits, add stackalignment once plus the 12 bytes.
+ Offset = ((~AlignMask) & Offset) + StackAlignment +
+ (StackAlignment-SlotSize);
+ }
+ return Offset;
+}
+
+/// IsEligibleForTailCallElimination - Check to see whether the next instruction
+/// following the call is a return. A function is eligible if caller/callee
+/// calling conventions match, currently only fastcc supports tail calls, and
+/// the function CALL is immediatly followed by a RET.
+bool X86TargetLowering::IsEligibleForTailCallOptimization(CallSDNode *TheCall,
+ SDValue Ret,
+ SelectionDAG& DAG) const {
+ if (!PerformTailCallOpt)
+ return false;
+
+ if (CheckTailCallReturnConstraints(TheCall, Ret)) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned CallerCC = MF.getFunction()->getCallingConv();
+ unsigned CalleeCC= TheCall->getCallingConv();
+ if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
+ SDValue Callee = TheCall->getCallee();
+ // On x86/32Bit PIC/GOT tail calls are supported.
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ ||
+ !Subtarget->isPICStyleGOT()|| !Subtarget->is64Bit())
+ return true;
+
+ // Can only do local tail calls (in same module, hidden or protected) on
+ // x86_64 PIC/GOT at the moment.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ return G->getGlobal()->hasHiddenVisibility()
+ || G->getGlobal()->hasProtectedVisibility();
+ }
+ }
+
+ return false;
+}
+
+FastISel *
+X86TargetLowering::createFastISel(MachineFunction &mf,
+ MachineModuleInfo *mmo,
+ DwarfWriter *dw,
+ DenseMap<const Value *, unsigned> &vm,
+ DenseMap<const BasicBlock *,
+ MachineBasicBlock *> &bm,
+ DenseMap<const AllocaInst *, int> &am
+#ifndef NDEBUG
+ , SmallSet<Instruction*, 8> &cil
+#endif
+ ) {
+ return X86::createFastISel(mf, mmo, dw, vm, bm, am
+#ifndef NDEBUG
+ , cil
+#endif
+ );
+}
+
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Hooks
+//===----------------------------------------------------------------------===//
+
+
+SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ int ReturnAddrIndex = FuncInfo->getRAIndex();
+
+ if (ReturnAddrIndex == 0) {
+ // Set up a frame object for the return address.
+ uint64_t SlotSize = TD->getPointerSize();
+ ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize);
+ FuncInfo->setRAIndex(ReturnAddrIndex);
+ }
+
+ return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+}
+
+
+/// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
+/// specific condition code, returning the condition code and the LHS/RHS of the
+/// comparison to make.
+static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
+ SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) {
+ if (!isFP) {
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+ if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
+ // X > -1 -> X == 0, jump !sign.
+ RHS = DAG.getConstant(0, RHS.getValueType());
+ return X86::COND_NS;
+ } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
+ // X < 0 -> X == 0, jump on sign.
+ return X86::COND_S;
+ } else if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
+ // X < 1 -> X <= 0
+ RHS = DAG.getConstant(0, RHS.getValueType());
+ return X86::COND_LE;
+ }
+ }
+
+ switch (SetCCOpcode) {
+ default: assert(0 && "Invalid integer condition!");
+ case ISD::SETEQ: return X86::COND_E;
+ case ISD::SETGT: return X86::COND_G;
+ case ISD::SETGE: return X86::COND_GE;
+ case ISD::SETLT: return X86::COND_L;
+ case ISD::SETLE: return X86::COND_LE;
+ case ISD::SETNE: return X86::COND_NE;
+ case ISD::SETULT: return X86::COND_B;
+ case ISD::SETUGT: return X86::COND_A;
+ case ISD::SETULE: return X86::COND_BE;
+ case ISD::SETUGE: return X86::COND_AE;
+ }
+ }
+
+ // First determine if it is required or is profitable to flip the operands.
+
+ // If LHS is a foldable load, but RHS is not, flip the condition.
+ if ((ISD::isNON_EXTLoad(LHS.getNode()) && LHS.hasOneUse()) &&
+ !(ISD::isNON_EXTLoad(RHS.getNode()) && RHS.hasOneUse())) {
+ SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
+ std::swap(LHS, RHS);
+ }
+
+ switch (SetCCOpcode) {
+ default: break;
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ std::swap(LHS, RHS);
+ break;
+ }
+
+ // On a floating point condition, the flags are set as follows:
+ // ZF PF CF op
+ // 0 | 0 | 0 | X > Y
+ // 0 | 0 | 1 | X < Y
+ // 1 | 0 | 0 | X == Y
+ // 1 | 1 | 1 | unordered
+ switch (SetCCOpcode) {
+ default: assert(0 && "Condcode should be pre-legalized away");
+ case ISD::SETUEQ:
+ case ISD::SETEQ: return X86::COND_E;
+ case ISD::SETOLT: // flipped
+ case ISD::SETOGT:
+ case ISD::SETGT: return X86::COND_A;
+ case ISD::SETOLE: // flipped
+ case ISD::SETOGE:
+ case ISD::SETGE: return X86::COND_AE;
+ case ISD::SETUGT: // flipped
+ case ISD::SETULT:
+ case ISD::SETLT: return X86::COND_B;
+ case ISD::SETUGE: // flipped
+ case ISD::SETULE:
+ case ISD::SETLE: return X86::COND_BE;
+ case ISD::SETONE:
+ case ISD::SETNE: return X86::COND_NE;
+ case ISD::SETUO: return X86::COND_P;
+ case ISD::SETO: return X86::COND_NP;
+ }
+}
+
+/// hasFPCMov - is there a floating point cmov for the specific X86 condition
+/// code. Current x86 isa includes the following FP cmov instructions:
+/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
+static bool hasFPCMov(unsigned X86CC) {
+ switch (X86CC) {
+ default:
+ return false;
+ case X86::COND_B:
+ case X86::COND_BE:
+ case X86::COND_E:
+ case X86::COND_P:
+ case X86::COND_A:
+ case X86::COND_AE:
+ case X86::COND_NE:
+ case X86::COND_NP:
+ return true;
+ }
+}
+
+/// isUndefOrInRange - Return true if Val is undef or if its value falls within
+/// the specified range (L, H].
+static bool isUndefOrInRange(int Val, int Low, int Hi) {
+ return (Val < 0) || (Val >= Low && Val < Hi);
+}
+
+/// isUndefOrEqual - Val is either less than zero (undef) or equal to the
+/// specified value.
+static bool isUndefOrEqual(int Val, int CmpVal) {
+ if (Val < 0 || Val == CmpVal)
+ return true;
+ return false;
+}
+
+/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference
+/// the second operand.
+static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+ if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16)
+ return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
+ if (VT == MVT::v2f64 || VT == MVT::v2i64)
+ return (Mask[0] < 2 && Mask[1] < 2);
+ return false;
+}
+
+bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isPSHUFDMask(M, N->getValueType(0));
+}
+
+/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PSHUFHW.
+static bool isPSHUFHWMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+ if (VT != MVT::v8i16)
+ return false;
+
+ // Lower quadword copied in order or undef.
+ for (int i = 0; i != 4; ++i)
+ if (Mask[i] >= 0 && Mask[i] != i)
+ return false;
+
+ // Upper quadword shuffled.
+ for (int i = 4; i != 8; ++i)
+ if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7))
+ return false;
+
+ return true;
+}
+
+bool X86::isPSHUFHWMask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isPSHUFHWMask(M, N->getValueType(0));
+}
+
+/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PSHUFLW.
+static bool isPSHUFLWMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+ if (VT != MVT::v8i16)
+ return false;
+
+ // Upper quadword copied in order.
+ for (int i = 4; i != 8; ++i)
+ if (Mask[i] >= 0 && Mask[i] != i)
+ return false;
+
+ // Lower quadword shuffled.
+ for (int i = 0; i != 4; ++i)
+ if (Mask[i] >= 4)
+ return false;
+
+ return true;
+}
+
+bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isPSHUFLWMask(M, N->getValueType(0));
+}
+
+/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to SHUFP*.
+static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+ int NumElems = VT.getVectorNumElements();
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+
+ int Half = NumElems / 2;
+ for (int i = 0; i < Half; ++i)
+ if (!isUndefOrInRange(Mask[i], 0, NumElems))
+ return false;
+ for (int i = Half; i < NumElems; ++i)
+ if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
+ return false;
+
+ return true;
+}
+
+bool X86::isSHUFPMask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isSHUFPMask(M, N->getValueType(0));
+}
+
+/// isCommutedSHUFP - Returns true if the shuffle mask is exactly
+/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
+/// half elements to come from vector 1 (which would equal the dest.) and
+/// the upper half to come from vector 2.
+static bool isCommutedSHUFPMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+ int NumElems = VT.getVectorNumElements();
+
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+
+ int Half = NumElems / 2;
+ for (int i = 0; i < Half; ++i)
+ if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
+ return false;
+ for (int i = Half; i < NumElems; ++i)
+ if (!isUndefOrInRange(Mask[i], 0, NumElems))
+ return false;
+ return true;
+}
+
+static bool isCommutedSHUFP(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return isCommutedSHUFPMask(M, N->getValueType(0));
+}
+
+/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
+bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
+ if (N->getValueType(0).getVectorNumElements() != 4)
+ return false;
+
+ // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
+ return isUndefOrEqual(N->getMaskElt(0), 6) &&
+ isUndefOrEqual(N->getMaskElt(1), 7) &&
+ isUndefOrEqual(N->getMaskElt(2), 2) &&
+ isUndefOrEqual(N->getMaskElt(3), 3);
+}
+
+/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
+bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
+ unsigned NumElems = N->getValueType(0).getVectorNumElements();
+
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+
+ for (unsigned i = 0; i < NumElems/2; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i), i + NumElems))
+ return false;
+
+ for (unsigned i = NumElems/2; i < NumElems; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i), i))
+ return false;
+
+ return true;
+}
+
+/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
+/// and MOVLHPS.
+bool X86::isMOVHPMask(ShuffleVectorSDNode *N) {
+ unsigned NumElems = N->getValueType(0).getVectorNumElements();
+
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+
+ for (unsigned i = 0; i < NumElems/2; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i), i))
+ return false;
+
+ for (unsigned i = 0; i < NumElems/2; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i + NumElems/2), i + NumElems))
+ return false;
+
+ return true;
+}
+
+/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
+/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
+/// <2, 3, 2, 3>
+bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
+ unsigned NumElems = N->getValueType(0).getVectorNumElements();
+
+ if (NumElems != 4)
+ return false;
+
+ return isUndefOrEqual(N->getMaskElt(0), 2) &&
+ isUndefOrEqual(N->getMaskElt(1), 3) &&
+ isUndefOrEqual(N->getMaskElt(2), 2) &&
+ isUndefOrEqual(N->getMaskElt(3), 3);
+}
+
+/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to UNPCKL.
+static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, MVT VT,
+ bool V2IsSplat = false) {
+ int NumElts = VT.getVectorNumElements();
+ if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+ return false;
+
+ for (int i = 0, j = 0; i != NumElts; i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (V2IsSplat) {
+ if (!isUndefOrEqual(BitI1, NumElts))
+ return false;
+ } else {
+ if (!isUndefOrEqual(BitI1, j + NumElts))
+ return false;
+ }
+ }
+ return true;
+}
+
+bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isUNPCKLMask(M, N->getValueType(0), V2IsSplat);
+}
+
+/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to UNPCKH.
+static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, MVT VT,
+ bool V2IsSplat = false) {
+ int NumElts = VT.getVectorNumElements();
+ if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+ return false;
+
+ for (int i = 0, j = 0; i != NumElts; i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j + NumElts/2))
+ return false;
+ if (V2IsSplat) {
+ if (isUndefOrEqual(BitI1, NumElts))
+ return false;
+ } else {
+ if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts))
+ return false;
+ }
+ }
+ return true;
+}
+
+bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isUNPCKHMask(M, N->getValueType(0), V2IsSplat);
+}
+
+/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
+/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
+/// <0, 0, 1, 1>
+static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, MVT VT) {
+ int NumElems = VT.getVectorNumElements();
+ if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
+ return false;
+
+ for (int i = 0, j = 0; i != NumElems; i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (!isUndefOrEqual(BitI1, j))
+ return false;
+ }
+ return true;
+}
+
+bool X86::isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isUNPCKL_v_undef_Mask(M, N->getValueType(0));
+}
+
+/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
+/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
+/// <2, 2, 3, 3>
+static bool isUNPCKH_v_undef_Mask(const SmallVectorImpl<int> &Mask, MVT VT) {
+ int NumElems = VT.getVectorNumElements();
+ if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
+ return false;
+
+ for (int i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (!isUndefOrEqual(BitI1, j))
+ return false;
+ }
+ return true;
+}
+
+bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isUNPCKH_v_undef_Mask(M, N->getValueType(0));
+}
+
+/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSS,
+/// MOVSD, and MOVD, i.e. setting the lowest element.
+static bool isMOVLMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+ int NumElts = VT.getVectorNumElements();
+ if (NumElts != 2 && NumElts != 4)
+ return false;
+
+ if (!isUndefOrEqual(Mask[0], NumElts))
+ return false;
+
+ for (int i = 1; i < NumElts; ++i)
+ if (!isUndefOrEqual(Mask[i], i))
+ return false;
+
+ return true;
+}
+
+bool X86::isMOVLMask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isMOVLMask(M, N->getValueType(0));
+}
+
+/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
+/// of what x86 movss want. X86 movs requires the lowest element to be lowest
+/// element of vector 2 and the other elements to come from vector 1 in order.
+static bool isCommutedMOVLMask(const SmallVectorImpl<int> &Mask, MVT VT,
+ bool V2IsSplat = false, bool V2IsUndef = false) {
+ int NumOps = VT.getVectorNumElements();
+ if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
+ return false;
+
+ if (!isUndefOrEqual(Mask[0], 0))
+ return false;
+
+ for (int i = 1; i < NumOps; ++i)
+ if (!(isUndefOrEqual(Mask[i], i+NumOps) ||
+ (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) ||
+ (V2IsSplat && isUndefOrEqual(Mask[i], NumOps))))
+ return false;
+
+ return true;
+}
+
+static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false,
+ bool V2IsUndef = false) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return isCommutedMOVLMask(M, N->getValueType(0), V2IsSplat, V2IsUndef);
+}
+
+/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
+bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N) {
+ if (N->getValueType(0).getVectorNumElements() != 4)
+ return false;
+
+ // Expect 1, 1, 3, 3
+ for (unsigned i = 0; i < 2; ++i) {
+ int Elt = N->getMaskElt(i);
+ if (Elt >= 0 && Elt != 1)
+ return false;
+ }
+
+ bool HasHi = false;
+ for (unsigned i = 2; i < 4; ++i) {
+ int Elt = N->getMaskElt(i);
+ if (Elt >= 0 && Elt != 3)
+ return false;
+ if (Elt == 3)
+ HasHi = true;
+ }
+ // Don't use movshdup if it can be done with a shufps.
+ // FIXME: verify that matching u, u, 3, 3 is what we want.
+ return HasHi;
+}
+
+/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
+bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N) {
+ if (N->getValueType(0).getVectorNumElements() != 4)
+ return false;
+
+ // Expect 0, 0, 2, 2
+ for (unsigned i = 0; i < 2; ++i)
+ if (N->getMaskElt(i) > 0)
+ return false;
+
+ bool HasHi = false;
+ for (unsigned i = 2; i < 4; ++i) {
+ int Elt = N->getMaskElt(i);
+ if (Elt >= 0 && Elt != 2)
+ return false;
+ if (Elt == 2)
+ HasHi = true;
+ }
+ // Don't use movsldup if it can be done with a shufps.
+ return HasHi;
+}
+
+/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVDDUP.
+bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) {
+ int e = N->getValueType(0).getVectorNumElements() / 2;
+
+ for (int i = 0; i < e; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i), i))
+ return false;
+ for (int i = 0; i < e; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(e+i), i))
+ return false;
+ return true;
+}
+
+/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
+/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
+/// instructions.
+unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ int NumOperands = SVOp->getValueType(0).getVectorNumElements();
+
+ unsigned Shift = (NumOperands == 4) ? 2 : 1;
+ unsigned Mask = 0;
+ for (int i = 0; i < NumOperands; ++i) {
+ int Val = SVOp->getMaskElt(NumOperands-i-1);
+ if (Val < 0) Val = 0;
+ if (Val >= NumOperands) Val -= NumOperands;
+ Mask |= Val;
+ if (i != NumOperands - 1)
+ Mask <<= Shift;
+ }
+ return Mask;
+}
+
+/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
+/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
+/// instructions.
+unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ unsigned Mask = 0;
+ // 8 nodes, but we only care about the last 4.
+ for (unsigned i = 7; i >= 4; --i) {
+ int Val = SVOp->getMaskElt(i);
+ if (Val >= 0)
+ Mask |= (Val - 4);
+ if (i != 4)
+ Mask <<= 2;
+ }
+ return Mask;
+}
+
+/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
+/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
+/// instructions.
+unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ unsigned Mask = 0;
+ // 8 nodes, but we only care about the first 4.
+ for (int i = 3; i >= 0; --i) {
+ int Val = SVOp->getMaskElt(i);
+ if (Val >= 0)
+ Mask |= Val;
+ if (i != 0)
+ Mask <<= 2;
+ }
+ return Mask;
+}
+
+/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in
+/// their permute mask.
+static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
+ SelectionDAG &DAG) {
+ MVT VT = SVOp->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 8> MaskVec;
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ int idx = SVOp->getMaskElt(i);
+ if (idx < 0)
+ MaskVec.push_back(idx);
+ else if (idx < (int)NumElems)
+ MaskVec.push_back(idx + NumElems);
+ else
+ MaskVec.push_back(idx - NumElems);
+ }
+ return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(1),
+ SVOp->getOperand(0), &MaskVec[0]);
+}
+
+/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
+/// the two vector operands have swapped position.
+static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, MVT VT) {
+ unsigned NumElems = VT.getVectorNumElements();
+ for (unsigned i = 0; i != NumElems; ++i) {
+ int idx = Mask[i];
+ if (idx < 0)
+ continue;
+ else if (idx < (int)NumElems)
+ Mask[i] = idx + NumElems;
+ else
+ Mask[i] = idx - NumElems;
+ }
+}
+
+/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
+/// match movhlps. The lower half elements should come from upper half of
+/// V1 (and in order), and the upper half elements should come from the upper
+/// half of V2 (and in order).
+static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) {
+ if (Op->getValueType(0).getVectorNumElements() != 4)
+ return false;
+ for (unsigned i = 0, e = 2; i != e; ++i)
+ if (!isUndefOrEqual(Op->getMaskElt(i), i+2))
+ return false;
+ for (unsigned i = 2; i != 4; ++i)
+ if (!isUndefOrEqual(Op->getMaskElt(i), i+4))
+ return false;
+ return true;
+}
+
+/// isScalarLoadToVector - Returns true if the node is a scalar load that
+/// is promoted to a vector. It also returns the LoadSDNode by reference if
+/// required.
+static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
+ if (N->getOpcode() != ISD::SCALAR_TO_VECTOR)
+ return false;
+ N = N->getOperand(0).getNode();
+ if (!ISD::isNON_EXTLoad(N))
+ return false;
+ if (LD)
+ *LD = cast<LoadSDNode>(N);
+ return true;
+}
+
+/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
+/// match movlp{s|d}. The lower half elements should come from lower half of
+/// V1 (and in order), and the upper half elements should come from the upper
+/// half of V2 (and in order). And since V1 will become the source of the
+/// MOVLP, it must be either a vector load or a scalar load to vector.
+static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
+ ShuffleVectorSDNode *Op) {
+ if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
+ return false;
+ // Is V2 is a vector load, don't do this transformation. We will try to use
+ // load folding shufps op.
+ if (ISD::isNON_EXTLoad(V2))
+ return false;
+
+ unsigned NumElems = Op->getValueType(0).getVectorNumElements();
+
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+ for (unsigned i = 0, e = NumElems/2; i != e; ++i)
+ if (!isUndefOrEqual(Op->getMaskElt(i), i))
+ return false;
+ for (unsigned i = NumElems/2; i != NumElems; ++i)
+ if (!isUndefOrEqual(Op->getMaskElt(i), i+NumElems))
+ return false;
+ return true;
+}
+
+/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
+/// all the same.
+static bool isSplatVector(SDNode *N) {
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ SDValue SplatValue = N->getOperand(0);
+ for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i) != SplatValue)
+ return false;
+ return true;
+}
+
+/// isZeroNode - Returns true if Elt is a constant zero or a floating point
+/// constant +0.0.
+static inline bool isZeroNode(SDValue Elt) {
+ return ((isa<ConstantSDNode>(Elt) &&
+ cast<ConstantSDNode>(Elt)->getZExtValue() == 0) ||
+ (isa<ConstantFPSDNode>(Elt) &&
+ cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
+}
+
+/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
+/// to an zero vector.
+/// FIXME: move to dag combiner / method on ShuffleVectorSDNode
+static bool isZeroShuffle(ShuffleVectorSDNode *N) {
+ SDValue V1 = N->getOperand(0);
+ SDValue V2 = N->getOperand(1);
+ unsigned NumElems = N->getValueType(0).getVectorNumElements();
+ for (unsigned i = 0; i != NumElems; ++i) {
+ int Idx = N->getMaskElt(i);
+ if (Idx >= (int)NumElems) {
+ unsigned Opc = V2.getOpcode();
+ if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode()))
+ continue;
+ if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V2.getOperand(Idx-NumElems)))
+ return false;
+ } else if (Idx >= 0) {
+ unsigned Opc = V1.getOpcode();
+ if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode()))
+ continue;
+ if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V1.getOperand(Idx)))
+ return false;
+ }
+ }
+ return true;
+}
+
+/// getZeroVector - Returns a vector of specified type with all zero elements.
+///
+static SDValue getZeroVector(MVT VT, bool HasSSE2, SelectionDAG &DAG,
+ DebugLoc dl) {
+ assert(VT.isVector() && "Expected a vector type");
+
+ // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest
+ // type. This ensures they get CSE'd.
+ SDValue Vec;
+ if (VT.getSizeInBits() == 64) { // MMX
+ SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
+ } else if (HasSSE2) { // SSE2
+ SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+ } else { // SSE1
+ SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
+ }
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
+}
+
+/// getOnesVector - Returns a vector of specified type with all bits set.
+///
+static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) {
+ assert(VT.isVector() && "Expected a vector type");
+
+ // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest
+ // type. This ensures they get CSE'd.
+ SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
+ SDValue Vec;
+ if (VT.getSizeInBits() == 64) // MMX
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
+ else // SSE
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
+}
+
+
+/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
+/// that point to V2 points to its first element.
+static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
+ MVT VT = SVOp->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ bool Changed = false;
+ SmallVector<int, 8> MaskVec;
+ SVOp->getMask(MaskVec);
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (MaskVec[i] > (int)NumElems) {
+ MaskVec[i] = NumElems;
+ Changed = true;
+ }
+ }
+ if (Changed)
+ return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(0),
+ SVOp->getOperand(1), &MaskVec[0]);
+ return SDValue(SVOp, 0);
+}
+
+/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
+/// operation of specified width.
+static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
+ SDValue V2) {
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 8> Mask;
+ Mask.push_back(NumElems);
+ for (unsigned i = 1; i != NumElems; ++i)
+ Mask.push_back(i);
+ return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
+/// getUnpackl - Returns a vector_shuffle node for an unpackl operation.
+static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
+ SDValue V2) {
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
+ Mask.push_back(i);
+ Mask.push_back(i + NumElems);
+ }
+ return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
+/// getUnpackhMask - Returns a vector_shuffle node for an unpackh operation.
+static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
+ SDValue V2) {
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned Half = NumElems/2;
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0; i != Half; ++i) {
+ Mask.push_back(i + Half);
+ Mask.push_back(i + NumElems + Half);
+ }
+ return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
+/// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32.
+static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG,
+ bool HasSSE2) {
+ if (SV->getValueType(0).getVectorNumElements() <= 4)
+ return SDValue(SV, 0);
+
+ MVT PVT = MVT::v4f32;
+ MVT VT = SV->getValueType(0);
+ DebugLoc dl = SV->getDebugLoc();
+ SDValue V1 = SV->getOperand(0);
+ int NumElems = VT.getVectorNumElements();
+ int EltNo = SV->getSplatIndex();
+
+ // unpack elements to the correct location
+ while (NumElems > 4) {
+ if (EltNo < NumElems/2) {
+ V1 = getUnpackl(DAG, dl, VT, V1, V1);
+ } else {
+ V1 = getUnpackh(DAG, dl, VT, V1, V1);
+ EltNo -= NumElems/2;
+ }
+ NumElems >>= 1;
+ }
+
+ // Perform the splat.
+ int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
+ V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1);
+ V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1);
+}
+
+/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
+/// vector of zero or undef vector. This produces a shuffle where the low
+/// element of V2 is swizzled into the zero/undef vector, landing at element
+/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
+static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
+ bool isZero, bool HasSSE2,
+ SelectionDAG &DAG) {
+ MVT VT = V2.getValueType();
+ SDValue V1 = isZero
+ ? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT);
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 16> MaskVec;
+ for (unsigned i = 0; i != NumElems; ++i)
+ // If this is the insertion idx, put the low elt of V2 here.
+ MaskVec.push_back(i == Idx ? NumElems : i);
+ return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]);
+}
+
+/// getNumOfConsecutiveZeros - Return the number of elements in a result of
+/// a shuffle that is zero.
+static
+unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems,
+ bool Low, SelectionDAG &DAG) {
+ unsigned NumZeros = 0;
+ for (int i = 0; i < NumElems; ++i) {
+ unsigned Index = Low ? i : NumElems-i-1;
+ int Idx = SVOp->getMaskElt(Index);
+ if (Idx < 0) {
+ ++NumZeros;
+ continue;
+ }
+ SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index);
+ if (Elt.getNode() && isZeroNode(Elt))
+ ++NumZeros;
+ else
+ break;
+ }
+ return NumZeros;
+}
+
+/// isVectorShift - Returns true if the shuffle can be implemented as a
+/// logical left or right shift of a vector.
+/// FIXME: split into pslldqi, psrldqi, palignr variants.
+static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+ bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+ int NumElems = SVOp->getValueType(0).getVectorNumElements();
+
+ isLeft = true;
+ unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, true, DAG);
+ if (!NumZeros) {
+ isLeft = false;
+ NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, false, DAG);
+ if (!NumZeros)
+ return false;
+ }
+ bool SeenV1 = false;
+ bool SeenV2 = false;
+ for (int i = NumZeros; i < NumElems; ++i) {
+ int Val = isLeft ? (i - NumZeros) : i;
+ int Idx = SVOp->getMaskElt(isLeft ? i : (i - NumZeros));
+ if (Idx < 0)
+ continue;
+ if (Idx < NumElems)
+ SeenV1 = true;
+ else {
+ Idx -= NumElems;
+ SeenV2 = true;
+ }
+ if (Idx != Val)
+ return false;
+ }
+ if (SeenV1 && SeenV2)
+ return false;
+
+ ShVal = SeenV1 ? SVOp->getOperand(0) : SVOp->getOperand(1);
+ ShAmt = NumZeros;
+ return true;
+}
+
+
+/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
+///
+static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
+ unsigned NumNonZero, unsigned NumZero,
+ SelectionDAG &DAG, TargetLowering &TLI) {
+ if (NumNonZero > 8)
+ return SDValue();
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue V(0, 0);
+ bool First = true;
+ for (unsigned i = 0; i < 16; ++i) {
+ bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
+ if (ThisIsNonZero && First) {
+ if (NumZero)
+ V = getZeroVector(MVT::v8i16, true, DAG, dl);
+ else
+ V = DAG.getUNDEF(MVT::v8i16);
+ First = false;
+ }
+
+ if ((i & 1) != 0) {
+ SDValue ThisElt(0, 0), LastElt(0, 0);
+ bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
+ if (LastIsNonZero) {
+ LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl,
+ MVT::i16, Op.getOperand(i-1));
+ }
+ if (ThisIsNonZero) {
+ ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
+ ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16,
+ ThisElt, DAG.getConstant(8, MVT::i8));
+ if (LastIsNonZero)
+ ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
+ } else
+ ThisElt = LastElt;
+
+ if (ThisElt.getNode())
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
+ DAG.getIntPtrConstant(i/2));
+ }
+ }
+
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V);
+}
+
+/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
+///
+static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
+ unsigned NumNonZero, unsigned NumZero,
+ SelectionDAG &DAG, TargetLowering &TLI) {
+ if (NumNonZero > 4)
+ return SDValue();
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue V(0, 0);
+ bool First = true;
+ for (unsigned i = 0; i < 8; ++i) {
+ bool isNonZero = (NonZeros & (1 << i)) != 0;
+ if (isNonZero) {
+ if (First) {
+ if (NumZero)
+ V = getZeroVector(MVT::v8i16, true, DAG, dl);
+ else
+ V = DAG.getUNDEF(MVT::v8i16);
+ First = false;
+ }
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+ MVT::v8i16, V, Op.getOperand(i),
+ DAG.getIntPtrConstant(i));
+ }
+ }
+
+ return V;
+}
+
+/// getVShift - Return a vector logical shift node.
+///
+static SDValue getVShift(bool isLeft, MVT VT, SDValue SrcOp,
+ unsigned NumBits, SelectionDAG &DAG,
+ const TargetLowering &TLI, DebugLoc dl) {
+ bool isMMX = VT.getSizeInBits() == 64;
+ MVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64;
+ unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
+ SrcOp = DAG.getNode(ISD::BIT_CONVERT, dl, ShVT, SrcOp);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ DAG.getNode(Opc, dl, ShVT, SrcOp,
+ DAG.getConstant(NumBits, TLI.getShiftAmountTy())));
+}
+
+SDValue
+X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ // All zero's are handled with pxor, all one's are handled with pcmpeqd.
+ if (ISD::isBuildVectorAllZeros(Op.getNode())
+ || ISD::isBuildVectorAllOnes(Op.getNode())) {
+ // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to
+ // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
+ // eliminated on x86-32 hosts.
+ if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32)
+ return Op;
+
+ if (ISD::isBuildVectorAllOnes(Op.getNode()))
+ return getOnesVector(Op.getValueType(), DAG, dl);
+ return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl);
+ }
+
+ MVT VT = Op.getValueType();
+ MVT EVT = VT.getVectorElementType();
+ unsigned EVTBits = EVT.getSizeInBits();
+
+ unsigned NumElems = Op.getNumOperands();
+ unsigned NumZero = 0;
+ unsigned NumNonZero = 0;
+ unsigned NonZeros = 0;
+ bool IsAllConstants = true;
+ SmallSet<SDValue, 8> Values;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue Elt = Op.getOperand(i);
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ Values.insert(Elt);
+ if (Elt.getOpcode() != ISD::Constant &&
+ Elt.getOpcode() != ISD::ConstantFP)
+ IsAllConstants = false;
+ if (isZeroNode(Elt))
+ NumZero++;
+ else {
+ NonZeros |= (1 << i);
+ NumNonZero++;
+ }
+ }
+
+ if (NumNonZero == 0) {
+ // All undef vector. Return an UNDEF. All zero vectors were handled above.
+ return DAG.getUNDEF(VT);
+ }
+
+ // Special case for single non-zero, non-undef, element.
+ if (NumNonZero == 1 && NumElems <= 4) {
+ unsigned Idx = CountTrailingZeros_32(NonZeros);
+ SDValue Item = Op.getOperand(Idx);
+
+ // If this is an insertion of an i64 value on x86-32, and if the top bits of
+ // the value are obviously zero, truncate the value to i32 and do the
+ // insertion that way. Only do this if the value is non-constant or if the
+ // value is a constant being inserted into element 0. It is cheaper to do
+ // a constant pool load than it is to do a movd + shuffle.
+ if (EVT == MVT::i64 && !Subtarget->is64Bit() &&
+ (!IsAllConstants || Idx == 0)) {
+ if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
+ // Handle MMX and SSE both.
+ MVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32;
+ unsigned VecElts = VT == MVT::v2i64 ? 4 : 2;
+
+ // Truncate the value (which may itself be a constant) to i32, and
+ // convert it to a vector with movd (S2V+shuffle to zero extend).
+ Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, true,
+ Subtarget->hasSSE2(), DAG);
+
+ // Now we have our 32-bit value zero extended in the low element of
+ // a vector. If Idx != 0, swizzle it into place.
+ if (Idx != 0) {
+ SmallVector<int, 4> Mask;
+ Mask.push_back(Idx);
+ for (unsigned i = 1; i != VecElts; ++i)
+ Mask.push_back(i);
+ Item = DAG.getVectorShuffle(VecVT, dl, Item,
+ DAG.getUNDEF(Item.getValueType()),
+ &Mask[0]);
+ }
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Item);
+ }
+ }
+
+ // If we have a constant or non-constant insertion into the low element of
+ // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
+ // the rest of the elements. This will be matched as movd/movq/movss/movsd
+ // depending on what the source datatype is. Because we can only get here
+ // when NumElems <= 4, this only needs to handle i32/f32/i64/f64.
+ if (Idx == 0 &&
+ // Don't do this for i64 values on x86-32.
+ (EVT != MVT::i64 || Subtarget->is64Bit())) {
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+ // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
+ return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
+ Subtarget->hasSSE2(), DAG);
+ }
+
+ // Is it a vector logical left shift?
+ if (NumElems == 2 && Idx == 1 &&
+ isZeroNode(Op.getOperand(0)) && !isZeroNode(Op.getOperand(1))) {
+ unsigned NumBits = VT.getSizeInBits();
+ return getVShift(true, VT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ VT, Op.getOperand(1)),
+ NumBits/2, DAG, *this, dl);
+ }
+
+ if (IsAllConstants) // Otherwise, it's better to do a constpool load.
+ return SDValue();
+
+ // Otherwise, if this is a vector with i32 or f32 elements, and the element
+ // is a non-constant being inserted into an element other than the low one,
+ // we can't use a constant pool load. Instead, use SCALAR_TO_VECTOR (aka
+ // movd/movss) to move this into the low element, then shuffle it into
+ // place.
+ if (EVTBits == 32) {
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+
+ // Turn it into a shuffle of zero and zero-extended scalar to vector.
+ Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
+ Subtarget->hasSSE2(), DAG);
+ SmallVector<int, 8> MaskVec;
+ for (unsigned i = 0; i < NumElems; i++)
+ MaskVec.push_back(i == Idx ? 0 : 1);
+ return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]);
+ }
+ }
+
+ // Splat is obviously ok. Let legalizer expand it to a shuffle.
+ if (Values.size() == 1)
+ return SDValue();
+
+ // A vector full of immediates; various special cases are already
+ // handled, so this is best done with a single constant-pool load.
+ if (IsAllConstants)
+ return SDValue();
+
+ // Let legalizer expand 2-wide build_vectors.
+ if (EVTBits == 64) {
+ if (NumNonZero == 1) {
+ // One half is zero or undef.
+ unsigned Idx = CountTrailingZeros_32(NonZeros);
+ SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
+ Op.getOperand(Idx));
+ return getShuffleVectorZeroOrUndef(V2, Idx, true,
+ Subtarget->hasSSE2(), DAG);
+ }
+ return SDValue();
+ }
+
+ // If element VT is < 32 bits, convert it to inserts into a zero vector.
+ if (EVTBits == 8 && NumElems == 16) {
+ SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
+ *this);
+ if (V.getNode()) return V;
+ }
+
+ if (EVTBits == 16 && NumElems == 8) {
+ SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
+ *this);
+ if (V.getNode()) return V;
+ }
+
+ // If element VT is == 32 bits, turn it into a number of shuffles.
+ SmallVector<SDValue, 8> V;
+ V.resize(NumElems);
+ if (NumElems == 4 && NumZero > 0) {
+ for (unsigned i = 0; i < 4; ++i) {
+ bool isZero = !(NonZeros & (1 << i));
+ if (isZero)
+ V[i] = getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
+ else
+ V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
+ }
+
+ for (unsigned i = 0; i < 2; ++i) {
+ switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
+ default: break;
+ case 0:
+ V[i] = V[i*2]; // Must be a zero vector.
+ break;
+ case 1:
+ V[i] = getMOVL(DAG, dl, VT, V[i*2+1], V[i*2]);
+ break;
+ case 2:
+ V[i] = getMOVL(DAG, dl, VT, V[i*2], V[i*2+1]);
+ break;
+ case 3:
+ V[i] = getUnpackl(DAG, dl, VT, V[i*2], V[i*2+1]);
+ break;
+ }
+ }
+
+ SmallVector<int, 8> MaskVec;
+ bool Reverse = (NonZeros & 0x3) == 2;
+ for (unsigned i = 0; i < 2; ++i)
+ MaskVec.push_back(Reverse ? 1-i : i);
+ Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
+ for (unsigned i = 0; i < 2; ++i)
+ MaskVec.push_back(Reverse ? 1-i+NumElems : i+NumElems);
+ return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
+ }
+
+ if (Values.size() > 2) {
+ // If we have SSE 4.1, Expand into a number of inserts unless the number of
+ // values to be inserted is equal to the number of elements, in which case
+ // use the unpack code below in the hopes of matching the consecutive elts
+ // load merge pattern for shuffles.
+ // FIXME: We could probably just check that here directly.
+ if (Values.size() < NumElems && VT.getSizeInBits() == 128 &&
+ getSubtarget()->hasSSE41()) {
+ V[0] = DAG.getUNDEF(VT);
+ for (unsigned i = 0; i < NumElems; ++i)
+ if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
+ V[0] = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V[0],
+ Op.getOperand(i), DAG.getIntPtrConstant(i));
+ return V[0];
+ }
+ // Expand into a number of unpckl*.
+ // e.g. for v4f32
+ // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
+ // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
+ // Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
+ for (unsigned i = 0; i < NumElems; ++i)
+ V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
+ NumElems >>= 1;
+ while (NumElems != 0) {
+ for (unsigned i = 0; i < NumElems; ++i)
+ V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + NumElems]);
+ NumElems >>= 1;
+ }
+ return V[0];
+ }
+
+ return SDValue();
+}
+
+// v8i16 shuffles - Prefer shuffles in the following order:
+// 1. [all] pshuflw, pshufhw, optional move
+// 2. [ssse3] 1 x pshufb
+// 3. [ssse3] 2 x pshufb + 1 x por
+// 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw)
+static
+SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
+ SelectionDAG &DAG, X86TargetLowering &TLI) {
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ DebugLoc dl = SVOp->getDebugLoc();
+ SmallVector<int, 8> MaskVals;
+
+ // Determine if more than 1 of the words in each of the low and high quadwords
+ // of the result come from the same quadword of one of the two inputs. Undef
+ // mask values count as coming from any quadword, for better codegen.
+ SmallVector<unsigned, 4> LoQuad(4);
+ SmallVector<unsigned, 4> HiQuad(4);
+ BitVector InputQuads(4);
+ for (unsigned i = 0; i < 8; ++i) {
+ SmallVectorImpl<unsigned> &Quad = i < 4 ? LoQuad : HiQuad;
+ int EltIdx = SVOp->getMaskElt(i);
+ MaskVals.push_back(EltIdx);
+ if (EltIdx < 0) {
+ ++Quad[0];
+ ++Quad[1];
+ ++Quad[2];
+ ++Quad[3];
+ continue;
+ }
+ ++Quad[EltIdx / 4];
+ InputQuads.set(EltIdx / 4);
+ }
+
+ int BestLoQuad = -1;
+ unsigned MaxQuad = 1;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (LoQuad[i] > MaxQuad) {
+ BestLoQuad = i;
+ MaxQuad = LoQuad[i];
+ }
+ }
+
+ int BestHiQuad = -1;
+ MaxQuad = 1;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (HiQuad[i] > MaxQuad) {
+ BestHiQuad = i;
+ MaxQuad = HiQuad[i];
+ }
+ }
+
+ // For SSSE3, If all 8 words of the result come from only 1 quadword of each
+ // of the two input vectors, shuffle them into one input vector so only a
+ // single pshufb instruction is necessary. If There are more than 2 input
+ // quads, disable the next transformation since it does not help SSSE3.
+ bool V1Used = InputQuads[0] || InputQuads[1];
+ bool V2Used = InputQuads[2] || InputQuads[3];
+ if (TLI.getSubtarget()->hasSSSE3()) {
+ if (InputQuads.count() == 2 && V1Used && V2Used) {
+ BestLoQuad = InputQuads.find_first();
+ BestHiQuad = InputQuads.find_next(BestLoQuad);
+ }
+ if (InputQuads.count() > 2) {
+ BestLoQuad = -1;
+ BestHiQuad = -1;
+ }
+ }
+
+ // If BestLoQuad or BestHiQuad are set, shuffle the quads together and update
+ // the shuffle mask. If a quad is scored as -1, that means that it contains
+ // words from all 4 input quadwords.
+ SDValue NewV;
+ if (BestLoQuad >= 0 || BestHiQuad >= 0) {
+ SmallVector<int, 8> MaskV;
+ MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad);
+ MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad);
+ NewV = DAG.getVectorShuffle(MVT::v2i64, dl,
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1),
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), &MaskV[0]);
+ NewV = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, NewV);
+
+ // Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the
+ // source words for the shuffle, to aid later transformations.
+ bool AllWordsInNewV = true;
+ bool InOrder[2] = { true, true };
+ for (unsigned i = 0; i != 8; ++i) {
+ int idx = MaskVals[i];
+ if (idx != (int)i)
+ InOrder[i/4] = false;
+ if (idx < 0 || (idx/4) == BestLoQuad || (idx/4) == BestHiQuad)
+ continue;
+ AllWordsInNewV = false;
+ break;
+ }
+
+ bool pshuflw = AllWordsInNewV, pshufhw = AllWordsInNewV;
+ if (AllWordsInNewV) {
+ for (int i = 0; i != 8; ++i) {
+ int idx = MaskVals[i];
+ if (idx < 0)
+ continue;
+ idx = MaskVals[i] = (idx / 4) == BestLoQuad ? (idx & 3) : (idx & 3) + 4;
+ if ((idx != i) && idx < 4)
+ pshufhw = false;
+ if ((idx != i) && idx > 3)
+ pshuflw = false;
+ }
+ V1 = NewV;
+ V2Used = false;
+ BestLoQuad = 0;
+ BestHiQuad = 1;
+ }
+
+ // If we've eliminated the use of V2, and the new mask is a pshuflw or
+ // pshufhw, that's as cheap as it gets. Return the new shuffle.
+ if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) {
+ return DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
+ DAG.getUNDEF(MVT::v8i16), &MaskVals[0]);
+ }
+ }
+
+ // If we have SSSE3, and all words of the result are from 1 input vector,
+ // case 2 is generated, otherwise case 3 is generated. If no SSSE3
+ // is present, fall back to case 4.
+ if (TLI.getSubtarget()->hasSSSE3()) {
+ SmallVector<SDValue,16> pshufbMask;
+
+ // If we have elements from both input vectors, set the high bit of the
+ // shuffle mask element to zero out elements that come from V2 in the V1
+ // mask, and elements that come from V1 in the V2 mask, so that the two
+ // results can be OR'd together.
+ bool TwoInputs = V1Used && V2Used;
+ for (unsigned i = 0; i != 8; ++i) {
+ int EltIdx = MaskVals[i] * 2;
+ if (TwoInputs && (EltIdx >= 16)) {
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ continue;
+ }
+ pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(EltIdx+1, MVT::i8));
+ }
+ V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V1);
+ V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v16i8, &pshufbMask[0], 16));
+ if (!TwoInputs)
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1);
+
+ // Calculate the shuffle mask for the second input, shuffle it, and
+ // OR it with the first shuffled input.
+ pshufbMask.clear();
+ for (unsigned i = 0; i != 8; ++i) {
+ int EltIdx = MaskVals[i] * 2;
+ if (EltIdx < 16) {
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ continue;
+ }
+ pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(EltIdx - 15, MVT::i8));
+ }
+ V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V2);
+ V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v16i8, &pshufbMask[0], 16));
+ V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1);
+ }
+
+ // If BestLoQuad >= 0, generate a pshuflw to put the low elements in order,
+ // and update MaskVals with new element order.
+ BitVector InOrder(8);
+ if (BestLoQuad >= 0) {
+ SmallVector<int, 8> MaskV;
+ for (int i = 0; i != 4; ++i) {
+ int idx = MaskVals[i];
+ if (idx < 0) {
+ MaskV.push_back(-1);
+ InOrder.set(i);
+ } else if ((idx / 4) == BestLoQuad) {
+ MaskV.push_back(idx & 3);
+ InOrder.set(i);
+ } else {
+ MaskV.push_back(-1);
+ }
+ }
+ for (unsigned i = 4; i != 8; ++i)
+ MaskV.push_back(i);
+ NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
+ &MaskV[0]);
+ }
+
+ // If BestHi >= 0, generate a pshufhw to put the high elements in order,
+ // and update MaskVals with the new element order.
+ if (BestHiQuad >= 0) {
+ SmallVector<int, 8> MaskV;
+ for (unsigned i = 0; i != 4; ++i)
+ MaskV.push_back(i);
+ for (unsigned i = 4; i != 8; ++i) {
+ int idx = MaskVals[i];
+ if (idx < 0) {
+ MaskV.push_back(-1);
+ InOrder.set(i);
+ } else if ((idx / 4) == BestHiQuad) {
+ MaskV.push_back((idx & 3) + 4);
+ InOrder.set(i);
+ } else {
+ MaskV.push_back(-1);
+ }
+ }
+ NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
+ &MaskV[0]);
+ }
+
+ // In case BestHi & BestLo were both -1, which means each quadword has a word
+ // from each of the four input quadwords, calculate the InOrder bitvector now
+ // before falling through to the insert/extract cleanup.
+ if (BestLoQuad == -1 && BestHiQuad == -1) {
+ NewV = V1;
+ for (int i = 0; i != 8; ++i)
+ if (MaskVals[i] < 0 || MaskVals[i] == i)
+ InOrder.set(i);
+ }
+
+ // The other elements are put in the right place using pextrw and pinsrw.
+ for (unsigned i = 0; i != 8; ++i) {
+ if (InOrder[i])
+ continue;
+ int EltIdx = MaskVals[i];
+ if (EltIdx < 0)
+ continue;
+ SDValue ExtOp = (EltIdx < 8)
+ ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V1,
+ DAG.getIntPtrConstant(EltIdx))
+ : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V2,
+ DAG.getIntPtrConstant(EltIdx - 8));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, ExtOp,
+ DAG.getIntPtrConstant(i));
+ }
+ return NewV;
+}
+
+// v16i8 shuffles - Prefer shuffles in the following order:
+// 1. [ssse3] 1 x pshufb
+// 2. [ssse3] 2 x pshufb + 1 x por
+// 3. [all] v8i16 shuffle + N x pextrw + rotate + pinsrw
+static
+SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
+ SelectionDAG &DAG, X86TargetLowering &TLI) {
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ DebugLoc dl = SVOp->getDebugLoc();
+ SmallVector<int, 16> MaskVals;
+ SVOp->getMask(MaskVals);
+
+ // If we have SSSE3, case 1 is generated when all result bytes come from
+ // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is
+ // present, fall back to case 3.
+ // FIXME: kill V2Only once shuffles are canonizalized by getNode.
+ bool V1Only = true;
+ bool V2Only = true;
+ for (unsigned i = 0; i < 16; ++i) {
+ int EltIdx = MaskVals[i];
+ if (EltIdx < 0)
+ continue;
+ if (EltIdx < 16)
+ V2Only = false;
+ else
+ V1Only = false;
+ }
+
+ // If SSSE3, use 1 pshufb instruction per vector with elements in the result.
+ if (TLI.getSubtarget()->hasSSSE3()) {
+ SmallVector<SDValue,16> pshufbMask;
+
+ // If all result elements are from one input vector, then only translate
+ // undef mask values to 0x80 (zero out result) in the pshufb mask.
+ //
+ // Otherwise, we have elements from both input vectors, and must zero out
+ // elements that come from V2 in the first mask, and V1 in the second mask
+ // so that we can OR them together.
+ bool TwoInputs = !(V1Only || V2Only);
+ for (unsigned i = 0; i != 16; ++i) {
+ int EltIdx = MaskVals[i];
+ if (EltIdx < 0 || (TwoInputs && EltIdx >= 16)) {
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ continue;
+ }
+ pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
+ }
+ // If all the elements are from V2, assign it to V1 and return after
+ // building the first pshufb.
+ if (V2Only)
+ V1 = V2;
+ V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v16i8, &pshufbMask[0], 16));
+ if (!TwoInputs)
+ return V1;
+
+ // Calculate the shuffle mask for the second input, shuffle it, and
+ // OR it with the first shuffled input.
+ pshufbMask.clear();
+ for (unsigned i = 0; i != 16; ++i) {
+ int EltIdx = MaskVals[i];
+ if (EltIdx < 16) {
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ continue;
+ }
+ pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8));
+ }
+ V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v16i8, &pshufbMask[0], 16));
+ return DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
+ }
+
+ // No SSSE3 - Calculate in place words and then fix all out of place words
+ // With 0-16 extracts & inserts. Worst case is 16 bytes out of order from
+ // the 16 different words that comprise the two doublequadword input vectors.
+ V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1);
+ V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V2);
+ SDValue NewV = V2Only ? V2 : V1;
+ for (int i = 0; i != 8; ++i) {
+ int Elt0 = MaskVals[i*2];
+ int Elt1 = MaskVals[i*2+1];
+
+ // This word of the result is all undef, skip it.
+ if (Elt0 < 0 && Elt1 < 0)
+ continue;
+
+ // This word of the result is already in the correct place, skip it.
+ if (V1Only && (Elt0 == i*2) && (Elt1 == i*2+1))
+ continue;
+ if (V2Only && (Elt0 == i*2+16) && (Elt1 == i*2+17))
+ continue;
+
+ SDValue Elt0Src = Elt0 < 16 ? V1 : V2;
+ SDValue Elt1Src = Elt1 < 16 ? V1 : V2;
+ SDValue InsElt;
+
+ // If Elt0 and Elt1 are defined, are consecutive, and can be load
+ // using a single extract together, load it and store it.
+ if ((Elt0 >= 0) && ((Elt0 + 1) == Elt1) && ((Elt0 & 1) == 0)) {
+ InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src,
+ DAG.getIntPtrConstant(Elt1 / 2));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
+ DAG.getIntPtrConstant(i));
+ continue;
+ }
+
+ // If Elt1 is defined, extract it from the appropriate source. If the
+ // source byte is not also odd, shift the extracted word left 8 bits
+ // otherwise clear the bottom 8 bits if we need to do an or.
+ if (Elt1 >= 0) {
+ InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src,
+ DAG.getIntPtrConstant(Elt1 / 2));
+ if ((Elt1 & 1) == 0)
+ InsElt = DAG.getNode(ISD::SHL, dl, MVT::i16, InsElt,
+ DAG.getConstant(8, TLI.getShiftAmountTy()));
+ else if (Elt0 >= 0)
+ InsElt = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt,
+ DAG.getConstant(0xFF00, MVT::i16));
+ }
+ // If Elt0 is defined, extract it from the appropriate source. If the
+ // source byte is not also even, shift the extracted word right 8 bits. If
+ // Elt1 was also defined, OR the extracted values together before
+ // inserting them in the result.
+ if (Elt0 >= 0) {
+ SDValue InsElt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16,
+ Elt0Src, DAG.getIntPtrConstant(Elt0 / 2));
+ if ((Elt0 & 1) != 0)
+ InsElt0 = DAG.getNode(ISD::SRL, dl, MVT::i16, InsElt0,
+ DAG.getConstant(8, TLI.getShiftAmountTy()));
+ else if (Elt1 >= 0)
+ InsElt0 = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt0,
+ DAG.getConstant(0x00FF, MVT::i16));
+ InsElt = Elt1 >= 0 ? DAG.getNode(ISD::OR, dl, MVT::i16, InsElt, InsElt0)
+ : InsElt0;
+ }
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
+ DAG.getIntPtrConstant(i));
+ }
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, NewV);
+}
+
+/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
+/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be
+/// done when every pair / quad of shuffle mask elements point to elements in
+/// the right sequence. e.g.
+/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
+static
+SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
+ SelectionDAG &DAG,
+ TargetLowering &TLI, DebugLoc dl) {
+ MVT VT = SVOp->getValueType(0);
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned NewWidth = (NumElems == 4) ? 2 : 4;
+ MVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
+ MVT MaskEltVT = MaskVT.getVectorElementType();
+ MVT NewVT = MaskVT;
+ switch (VT.getSimpleVT()) {
+ default: assert(false && "Unexpected!");
+ case MVT::v4f32: NewVT = MVT::v2f64; break;
+ case MVT::v4i32: NewVT = MVT::v2i64; break;
+ case MVT::v8i16: NewVT = MVT::v4i32; break;
+ case MVT::v16i8: NewVT = MVT::v4i32; break;
+ }
+
+ if (NewWidth == 2) {
+ if (VT.isInteger())
+ NewVT = MVT::v2i64;
+ else
+ NewVT = MVT::v2f64;
+ }
+ int Scale = NumElems / NewWidth;
+ SmallVector<int, 8> MaskVec;
+ for (unsigned i = 0; i < NumElems; i += Scale) {
+ int StartIdx = -1;
+ for (int j = 0; j < Scale; ++j) {
+ int EltIdx = SVOp->getMaskElt(i+j);
+ if (EltIdx < 0)
+ continue;
+ if (StartIdx == -1)
+ StartIdx = EltIdx - (EltIdx % Scale);
+ if (EltIdx != StartIdx + j)
+ return SDValue();
+ }
+ if (StartIdx == -1)
+ MaskVec.push_back(-1);
+ else
+ MaskVec.push_back(StartIdx / Scale);
+ }
+
+ V1 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V1);
+ V2 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V2);
+ return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]);
+}
+
+/// getVZextMovL - Return a zero-extending vector move low node.
+///
+static SDValue getVZextMovL(MVT VT, MVT OpVT,
+ SDValue SrcOp, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget, DebugLoc dl) {
+ if (VT == MVT::v2f64 || VT == MVT::v4f32) {
+ LoadSDNode *LD = NULL;
+ if (!isScalarLoadToVector(SrcOp.getNode(), &LD))
+ LD = dyn_cast<LoadSDNode>(SrcOp);
+ if (!LD) {
+ // movssrr and movsdrr do not clear top bits. Try to use movd, movq
+ // instead.
+ MVT EVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
+ if ((EVT != MVT::i64 || Subtarget->is64Bit()) &&
+ SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT &&
+ SrcOp.getOperand(0).getOperand(0).getValueType() == EVT) {
+ // PR2108
+ OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ OpVT,
+ SrcOp.getOperand(0)
+ .getOperand(0))));
+ }
+ }
+ }
+
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
+ DAG.getNode(ISD::BIT_CONVERT, dl,
+ OpVT, SrcOp)));
+}
+
+/// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of
+/// shuffles.
+static SDValue
+LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ DebugLoc dl = SVOp->getDebugLoc();
+ MVT VT = SVOp->getValueType(0);
+
+ SmallVector<std::pair<int, int>, 8> Locs;
+ Locs.resize(4);
+ SmallVector<int, 8> Mask1(4U, -1);
+ SmallVector<int, 8> PermMask;
+ SVOp->getMask(PermMask);
+
+ unsigned NumHi = 0;
+ unsigned NumLo = 0;
+ for (unsigned i = 0; i != 4; ++i) {
+ int Idx = PermMask[i];
+ if (Idx < 0) {
+ Locs[i] = std::make_pair(-1, -1);
+ } else {
+ assert(Idx < 8 && "Invalid VECTOR_SHUFFLE index!");
+ if (Idx < 4) {
+ Locs[i] = std::make_pair(0, NumLo);
+ Mask1[NumLo] = Idx;
+ NumLo++;
+ } else {
+ Locs[i] = std::make_pair(1, NumHi);
+ if (2+NumHi < 4)
+ Mask1[2+NumHi] = Idx;
+ NumHi++;
+ }
+ }
+ }
+
+ if (NumLo <= 2 && NumHi <= 2) {
+ // If no more than two elements come from either vector. This can be
+ // implemented with two shuffles. First shuffle gather the elements.
+ // The second shuffle, which takes the first shuffle as both of its
+ // vector operands, put the elements into the right order.
+ V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+
+ SmallVector<int, 8> Mask2(4U, -1);
+
+ for (unsigned i = 0; i != 4; ++i) {
+ if (Locs[i].first == -1)
+ continue;
+ else {
+ unsigned Idx = (i < 2) ? 0 : 4;
+ Idx += Locs[i].first * 2 + Locs[i].second;
+ Mask2[i] = Idx;
+ }
+ }
+
+ return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]);
+ } else if (NumLo == 3 || NumHi == 3) {
+ // Otherwise, we must have three elements from one vector, call it X, and
+ // one element from the other, call it Y. First, use a shufps to build an
+ // intermediate vector with the one element from Y and the element from X
+ // that will be in the same half in the final destination (the indexes don't
+ // matter). Then, use a shufps to build the final vector, taking the half
+ // containing the element from Y from the intermediate, and the other half
+ // from X.
+ if (NumHi == 3) {
+ // Normalize it so the 3 elements come from V1.
+ CommuteVectorShuffleMask(PermMask, VT);
+ std::swap(V1, V2);
+ }
+
+ // Find the element from V2.
+ unsigned HiIndex;
+ for (HiIndex = 0; HiIndex < 3; ++HiIndex) {
+ int Val = PermMask[HiIndex];
+ if (Val < 0)
+ continue;
+ if (Val >= 4)
+ break;
+ }
+
+ Mask1[0] = PermMask[HiIndex];
+ Mask1[1] = -1;
+ Mask1[2] = PermMask[HiIndex^1];
+ Mask1[3] = -1;
+ V2 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+
+ if (HiIndex >= 2) {
+ Mask1[0] = PermMask[0];
+ Mask1[1] = PermMask[1];
+ Mask1[2] = HiIndex & 1 ? 6 : 4;
+ Mask1[3] = HiIndex & 1 ? 4 : 6;
+ return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+ } else {
+ Mask1[0] = HiIndex & 1 ? 2 : 0;
+ Mask1[1] = HiIndex & 1 ? 0 : 2;
+ Mask1[2] = PermMask[2];
+ Mask1[3] = PermMask[3];
+ if (Mask1[2] >= 0)
+ Mask1[2] += 4;
+ if (Mask1[3] >= 0)
+ Mask1[3] += 4;
+ return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]);
+ }
+ }
+
+ // Break it into (shuffle shuffle_hi, shuffle_lo).
+ Locs.clear();
+ SmallVector<int,8> LoMask(4U, -1);
+ SmallVector<int,8> HiMask(4U, -1);
+
+ SmallVector<int,8> *MaskPtr = &LoMask;
+ unsigned MaskIdx = 0;
+ unsigned LoIdx = 0;
+ unsigned HiIdx = 2;
+ for (unsigned i = 0; i != 4; ++i) {
+ if (i == 2) {
+ MaskPtr = &HiMask;
+ MaskIdx = 1;
+ LoIdx = 0;
+ HiIdx = 2;
+ }
+ int Idx = PermMask[i];
+ if (Idx < 0) {
+ Locs[i] = std::make_pair(-1, -1);
+ } else if (Idx < 4) {
+ Locs[i] = std::make_pair(MaskIdx, LoIdx);
+ (*MaskPtr)[LoIdx] = Idx;
+ LoIdx++;
+ } else {
+ Locs[i] = std::make_pair(MaskIdx, HiIdx);
+ (*MaskPtr)[HiIdx] = Idx;
+ HiIdx++;
+ }
+ }
+
+ SDValue LoShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &LoMask[0]);
+ SDValue HiShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &HiMask[0]);
+ SmallVector<int, 8> MaskOps;
+ for (unsigned i = 0; i != 4; ++i) {
+ if (Locs[i].first == -1) {
+ MaskOps.push_back(-1);
+ } else {
+ unsigned Idx = Locs[i].first * 4 + Locs[i].second;
+ MaskOps.push_back(Idx);
+ }
+ }
+ return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]);
+}
+
+SDValue
+X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ MVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned NumElems = VT.getVectorNumElements();
+ bool isMMX = VT.getSizeInBits() == 64;
+ bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
+ bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
+ bool V1IsSplat = false;
+ bool V2IsSplat = false;
+
+ if (isZeroShuffle(SVOp))
+ return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
+
+ // Promote splats to v4f32.
+ if (SVOp->isSplat()) {
+ if (isMMX || NumElems < 4)
+ return Op;
+ return PromoteSplat(SVOp, DAG, Subtarget->hasSSE2());
+ }
+
+ // If the shuffle can be profitably rewritten as a narrower shuffle, then
+ // do it!
+ if (VT == MVT::v8i16 || VT == MVT::v16i8) {
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
+ if (NewOp.getNode())
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ LowerVECTOR_SHUFFLE(NewOp, DAG));
+ } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
+ // FIXME: Figure out a cleaner way to do this.
+ // Try to make use of movq to zero out the top part.
+ if (ISD::isBuildVectorAllZeros(V2.getNode())) {
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
+ if (NewOp.getNode()) {
+ if (isCommutedMOVL(cast<ShuffleVectorSDNode>(NewOp), true, false))
+ return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0),
+ DAG, Subtarget, dl);
+ }
+ } else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
+ if (NewOp.getNode() && X86::isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)))
+ return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1),
+ DAG, Subtarget, dl);
+ }
+ }
+
+ if (X86::isPSHUFDMask(SVOp))
+ return Op;
+
+ // Check if this can be converted into a logical shift.
+ bool isLeft = false;
+ unsigned ShAmt = 0;
+ SDValue ShVal;
+ bool isShift = getSubtarget()->hasSSE2() &&
+ isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
+ if (isShift && ShVal.hasOneUse()) {
+ // If the shifted value has multiple uses, it may be cheaper to use
+ // v_set0 + movlhps or movhlps, etc.
+ MVT EVT = VT.getVectorElementType();
+ ShAmt *= EVT.getSizeInBits();
+ return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
+ }
+
+ if (X86::isMOVLMask(SVOp)) {
+ if (V1IsUndef)
+ return V2;
+ if (ISD::isBuildVectorAllZeros(V1.getNode()))
+ return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
+ if (!isMMX)
+ return Op;
+ }
+
+ // FIXME: fold these into legal mask.
+ if (!isMMX && (X86::isMOVSHDUPMask(SVOp) ||
+ X86::isMOVSLDUPMask(SVOp) ||
+ X86::isMOVHLPSMask(SVOp) ||
+ X86::isMOVHPMask(SVOp) ||
+ X86::isMOVLPMask(SVOp)))
+ return Op;
+
+ if (ShouldXformToMOVHLPS(SVOp) ||
+ ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp))
+ return CommuteVectorShuffle(SVOp, DAG);
+
+ if (isShift) {
+ // No better options. Use a vshl / vsrl.
+ MVT EVT = VT.getVectorElementType();
+ ShAmt *= EVT.getSizeInBits();
+ return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
+ }
+
+ bool Commuted = false;
+ // FIXME: This should also accept a bitcast of a splat? Be careful, not
+ // 1,1,1,1 -> v8i16 though.
+ V1IsSplat = isSplatVector(V1.getNode());
+ V2IsSplat = isSplatVector(V2.getNode());
+
+ // Canonicalize the splat or undef, if present, to be on the RHS.
+ if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
+ Op = CommuteVectorShuffle(SVOp, DAG);
+ SVOp = cast<ShuffleVectorSDNode>(Op);
+ V1 = SVOp->getOperand(0);
+ V2 = SVOp->getOperand(1);
+ std::swap(V1IsSplat, V2IsSplat);
+ std::swap(V1IsUndef, V2IsUndef);
+ Commuted = true;
+ }
+
+ if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) {
+ // Shuffling low element of v1 into undef, just return v1.
+ if (V2IsUndef)
+ return V1;
+ // If V2 is a splat, the mask may be malformed such as <4,3,3,3>, which
+ // the instruction selector will not match, so get a canonical MOVL with
+ // swapped operands to undo the commute.
+ return getMOVL(DAG, dl, VT, V2, V1);
+ }
+
+ if (X86::isUNPCKL_v_undef_Mask(SVOp) ||
+ X86::isUNPCKH_v_undef_Mask(SVOp) ||
+ X86::isUNPCKLMask(SVOp) ||
+ X86::isUNPCKHMask(SVOp))
+ return Op;
+
+ if (V2IsSplat) {
+ // Normalize mask so all entries that point to V2 points to its first
+ // element then try to match unpck{h|l} again. If match, return a
+ // new vector_shuffle with the corrected mask.
+ SDValue NewMask = NormalizeMask(SVOp, DAG);
+ ShuffleVectorSDNode *NSVOp = cast<ShuffleVectorSDNode>(NewMask);
+ if (NSVOp != SVOp) {
+ if (X86::isUNPCKLMask(NSVOp, true)) {
+ return NewMask;
+ } else if (X86::isUNPCKHMask(NSVOp, true)) {
+ return NewMask;
+ }
+ }
+ }
+
+ if (Commuted) {
+ // Commute is back and try unpck* again.
+ // FIXME: this seems wrong.
+ SDValue NewOp = CommuteVectorShuffle(SVOp, DAG);
+ ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
+ if (X86::isUNPCKL_v_undef_Mask(NewSVOp) ||
+ X86::isUNPCKH_v_undef_Mask(NewSVOp) ||
+ X86::isUNPCKLMask(NewSVOp) ||
+ X86::isUNPCKHMask(NewSVOp))
+ return NewOp;
+ }
+
+ // FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle.
+
+ // Normalize the node to match x86 shuffle ops if needed
+ if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
+ return CommuteVectorShuffle(SVOp, DAG);
+
+ // Check for legal shuffle and return?
+ SmallVector<int, 16> PermMask;
+ SVOp->getMask(PermMask);
+ if (isShuffleMaskLegal(PermMask, VT))
+ return Op;
+
+ // Handle v8i16 specifically since SSE can do byte extraction and insertion.
+ if (VT == MVT::v8i16) {
+ SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(SVOp, DAG, *this);
+ if (NewOp.getNode())
+ return NewOp;
+ }
+
+ if (VT == MVT::v16i8) {
+ SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, DAG, *this);
+ if (NewOp.getNode())
+ return NewOp;
+ }
+
+ // Handle all 4 wide cases with a number of shuffles except for MMX.
+ if (NumElems == 4 && !isMMX)
+ return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG);
+
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ if (VT.getSizeInBits() == 8) {
+ SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32,
+ Op.getOperand(0), Op.getOperand(1));
+ SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
+ DAG.getValueType(VT));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
+ } else if (VT.getSizeInBits() == 16) {
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ // If Idx is 0, it's cheaper to do a move instead of a pextrw.
+ if (Idx == 0)
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::v4i32,
+ Op.getOperand(0)),
+ Op.getOperand(1)));
+ SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32,
+ Op.getOperand(0), Op.getOperand(1));
+ SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
+ DAG.getValueType(VT));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
+ } else if (VT == MVT::f32) {
+ // EXTRACTPS outputs to a GPR32 register which will require a movd to copy
+ // the result back to FR32 register. It's only worth matching if the
+ // result has a single use which is a store or a bitcast to i32. And in
+ // the case of a store, it's not worth it if the index is a constant 0,
+ // because a MOVSSmr can be used instead, which is smaller and faster.
+ if (!Op.hasOneUse())
+ return SDValue();
+ SDNode *User = *Op.getNode()->use_begin();
+ if ((User->getOpcode() != ISD::STORE ||
+ (isa<ConstantSDNode>(Op.getOperand(1)) &&
+ cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) &&
+ (User->getOpcode() != ISD::BIT_CONVERT ||
+ User->getValueType(0) != MVT::i32))
+ return SDValue();
+ SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32,
+ Op.getOperand(0)),
+ Op.getOperand(1));
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Extract);
+ } else if (VT == MVT::i32) {
+ // ExtractPS works with constant index.
+ if (isa<ConstantSDNode>(Op.getOperand(1)))
+ return Op;
+ }
+ return SDValue();
+}
+
+
+SDValue
+X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ return SDValue();
+
+ if (Subtarget->hasSSE41()) {
+ SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
+ if (Res.getNode())
+ return Res;
+ }
+
+ MVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ // TODO: handle v16i8.
+ if (VT.getSizeInBits() == 16) {
+ SDValue Vec = Op.getOperand(0);
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ if (Idx == 0)
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::v4i32, Vec),
+ Op.getOperand(1)));
+ // Transform it so it match pextrw which produces a 32-bit result.
+ MVT EVT = (MVT::SimpleValueType)(VT.getSimpleVT()+1);
+ SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EVT,
+ Op.getOperand(0), Op.getOperand(1));
+ SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EVT, Extract,
+ DAG.getValueType(VT));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
+ } else if (VT.getSizeInBits() == 32) {
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ if (Idx == 0)
+ return Op;
+
+ // SHUFPS the element to the lowest double word, then movss.
+ int Mask[4] = { Idx, -1, -1, -1 };
+ MVT VVT = Op.getOperand(0).getValueType();
+ SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
+ DAG.getUNDEF(VVT), Mask);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
+ DAG.getIntPtrConstant(0));
+ } else if (VT.getSizeInBits() == 64) {
+ // FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b
+ // FIXME: seems like this should be unnecessary if mov{h,l}pd were taught
+ // to match extract_elt for f64.
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ if (Idx == 0)
+ return Op;
+
+ // UNPCKHPD the element to the lowest double word, then movsd.
+ // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
+ // to a f64mem, the whole operation is folded into a single MOVHPDmr.
+ int Mask[2] = { 1, -1 };
+ MVT VVT = Op.getOperand(0).getValueType();
+ SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
+ DAG.getUNDEF(VVT), Mask);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
+ DAG.getIntPtrConstant(0));
+ }
+
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){
+ MVT VT = Op.getValueType();
+ MVT EVT = VT.getVectorElementType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+
+ if ((EVT.getSizeInBits() == 8 || EVT.getSizeInBits() == 16) &&
+ isa<ConstantSDNode>(N2)) {
+ unsigned Opc = (EVT.getSizeInBits() == 8) ? X86ISD::PINSRB
+ : X86ISD::PINSRW;
+ // Transform it so it match pinsr{b,w} which expects a GR32 as its second
+ // argument.
+ if (N1.getValueType() != MVT::i32)
+ N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
+ if (N2.getValueType() != MVT::i32)
+ N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
+ return DAG.getNode(Opc, dl, VT, N0, N1, N2);
+ } else if (EVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
+ // Bits [7:6] of the constant are the source select. This will always be
+ // zero here. The DAG Combiner may combine an extract_elt index into these
+ // bits. For example (insert (extract, 3), 2) could be matched by putting
+ // the '3' into bits [7:6] of X86ISD::INSERTPS.
+ // Bits [5:4] of the constant are the destination select. This is the
+ // value of the incoming immediate.
+ // Bits [3:0] of the constant are the zero mask. The DAG Combiner may
+ // combine either bitwise AND or insert of float 0.0 to set these bits.
+ N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
+ return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
+ } else if (EVT == MVT::i32) {
+ // InsertPS works with constant index.
+ if (isa<ConstantSDNode>(N2))
+ return Op;
+ }
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ MVT EVT = VT.getVectorElementType();
+
+ if (Subtarget->hasSSE41())
+ return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
+
+ if (EVT == MVT::i8)
+ return SDValue();
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+
+ if (EVT.getSizeInBits() == 16) {
+ // Transform it so it match pinsrw which expects a 16-bit value in a GR32
+ // as its second argument.
+ if (N1.getValueType() != MVT::i32)
+ N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
+ if (N2.getValueType() != MVT::i32)
+ N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
+ return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2);
+ }
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ if (Op.getValueType() == MVT::v2f32)
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f32,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i32,
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32,
+ Op.getOperand(0))));
+
+ SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
+ MVT VT = MVT::v2i32;
+ switch (Op.getValueType().getSimpleVT()) {
+ default: break;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ VT = MVT::v4i32;
+ break;
+ }
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(),
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, AnyExt));
+}
+
+// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
+// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
+// one of the above mentioned nodes. It has to be wrapped because otherwise
+// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
+// be used to form addressing mode. These wrapped nodes will be selected
+// into MOV32ri.
+SDValue
+X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ // FIXME there isn't really any debug info here, should come from the parent
+ DebugLoc dl = CP->getDebugLoc();
+ SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
+ CP->getAlignment());
+ Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+ // With PIC, the address is actually $g + Offset.
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ !Subtarget->isPICStyleRIPRel()) {
+ Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc::getUnknownLoc(),
+ getPointerTy()),
+ Result);
+ }
+
+ return Result;
+}
+
+SDValue
+X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
+ int64_t Offset,
+ SelectionDAG &DAG) const {
+ bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+ bool ExtraLoadRequired =
+ Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false);
+
+ // Create the TargetGlobalAddress node, folding in the constant
+ // offset if it is legal.
+ SDValue Result;
+ if (!IsPic && !ExtraLoadRequired && isInt32(Offset)) {
+ Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset);
+ Offset = 0;
+ } else
+ Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0);
+ Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+
+ // With PIC, the address is actually $g + Offset.
+ if (IsPic && !Subtarget->isPICStyleRIPRel()) {
+ Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
+ Result);
+ }
+
+ // For Darwin & Mingw32, external and weak symbols are indirect, so we want to
+ // load the value at address GV, not the value of GV itself. This means that
+ // the GlobalAddress must be in the base or index register of the address, not
+ // the GV offset field. Platform check is inside GVRequiresExtraLoad() call
+ // The same applies for external symbols during PIC codegen
+ if (ExtraLoadRequired)
+ Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
+ PseudoSourceValue::getGOT(), 0);
+
+ // If there was a non-zero offset that we didn't fold, create an explicit
+ // addition for it.
+ if (Offset != 0)
+ Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result,
+ DAG.getConstant(Offset, getPointerTy()));
+
+ return Result;
+}
+
+SDValue
+X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+ return LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG);
+}
+
+static SDValue
+GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
+ SDValue *InFlag, const MVT PtrVT, unsigned ReturnReg) {
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ DebugLoc dl = GA->getDebugLoc();
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
+ GA->getValueType(0),
+ GA->getOffset());
+ if (InFlag) {
+ SDValue Ops[] = { Chain, TGA, *InFlag };
+ Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 3);
+ } else {
+ SDValue Ops[] = { Chain, TGA };
+ Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2);
+ }
+ SDValue Flag = Chain.getValue(1);
+ return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
+static SDValue
+LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+ const MVT PtrVT) {
+ SDValue InFlag;
+ DebugLoc dl = GA->getDebugLoc(); // ? function entry point might be better
+ SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc::getUnknownLoc(),
+ PtrVT), InFlag);
+ InFlag = Chain.getValue(1);
+
+ return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit
+static SDValue
+LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+ const MVT PtrVT) {
+ return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, X86::RAX);
+}
+
+// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
+// "local exec" model.
+static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+ const MVT PtrVT, TLSModel::Model model,
+ bool is64Bit) {
+ DebugLoc dl = GA->getDebugLoc();
+ // Get the Thread Pointer
+ SDValue Base = DAG.getNode(X86ISD::SegmentBaseAddress,
+ DebugLoc::getUnknownLoc(), PtrVT,
+ DAG.getRegister(is64Bit? X86::FS : X86::GS,
+ MVT::i32));
+
+ SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Base,
+ NULL, 0);
+
+ // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
+ // exec)
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
+ GA->getValueType(0),
+ GA->getOffset());
+ SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA);
+
+ if (model == TLSModel::InitialExec)
+ Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
+ PseudoSourceValue::getGOT(), 0);
+
+ // The address of the thread local variable is the add of the thread
+ // pointer with the offset of the variable.
+ return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+}
+
+SDValue
+X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
+ // TODO: implement the "local dynamic" model
+ // TODO: implement the "initial exec"model for pic executables
+ assert(Subtarget->isTargetELF() &&
+ "TLS not implemented for non-ELF targets");
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ GlobalValue *GV = GA->getGlobal();
+ TLSModel::Model model =
+ getTLSModel (GV, getTargetMachine().getRelocationModel());
+ if (Subtarget->is64Bit()) {
+ switch (model) {
+ case TLSModel::GeneralDynamic:
+ case TLSModel::LocalDynamic: // not implemented
+ return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
+
+ case TLSModel::InitialExec:
+ case TLSModel::LocalExec:
+ return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, true);
+ }
+ } else {
+ switch (model) {
+ case TLSModel::GeneralDynamic:
+ case TLSModel::LocalDynamic: // not implemented
+ return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
+
+ case TLSModel::InitialExec:
+ case TLSModel::LocalExec:
+ return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, false);
+ }
+ }
+ assert(0 && "Unreachable");
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
+ // FIXME there isn't really any debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+ SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+ Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+ // With PIC, the address is actually $g + Offset.
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ !Subtarget->isPICStyleRIPRel()) {
+ Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc::getUnknownLoc(),
+ getPointerTy()),
+ Result);
+ }
+
+ return Result;
+}
+
+SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ // FIXME there isn't really any debug into here
+ DebugLoc dl = JT->getDebugLoc();
+ SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
+ Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+ // With PIC, the address is actually $g + Offset.
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ !Subtarget->isPICStyleRIPRel()) {
+ Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc::getUnknownLoc(),
+ getPointerTy()),
+ Result);
+ }
+
+ return Result;
+}
+
+/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and
+/// take a 2 x i32 value to shift plus a shift amount.
+SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
+ assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+ MVT VT = Op.getValueType();
+ unsigned VTBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+ bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
+ SDValue ShOpLo = Op.getOperand(0);
+ SDValue ShOpHi = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+ SDValue Tmp1 = isSRA ?
+ DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
+ DAG.getConstant(VTBits - 1, MVT::i8)) :
+ DAG.getConstant(0, VT);
+
+ SDValue Tmp2, Tmp3;
+ if (Op.getOpcode() == ISD::SHL_PARTS) {
+ Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt);
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+ } else {
+ Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt);
+ Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, ShAmt);
+ }
+
+ SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
+ DAG.getConstant(VTBits, MVT::i8));
+ SDValue Cond = DAG.getNode(X86ISD::CMP, dl, VT,
+ AndNode, DAG.getConstant(0, MVT::i8));
+
+ SDValue Hi, Lo;
+ SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond };
+ SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond };
+
+ if (Op.getOpcode() == ISD::SHL_PARTS) {
+ Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4);
+ Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4);
+ } else {
+ Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4);
+ Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4);
+ }
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ MVT SrcVT = Op.getOperand(0).getValueType();
+ assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 &&
+ "Unknown SINT_TO_FP to lower!");
+
+ // These are really Legal; return the operand so the caller accepts it as
+ // Legal.
+ if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType()))
+ return Op;
+ if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
+ Subtarget->is64Bit()) {
+ return Op;
+ }
+
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Size = SrcVT.getSizeInBits()/8;
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+ StackSlot,
+ PseudoSourceValue::getFixedStack(SSFI), 0);
+ return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
+}
+
+SDValue X86TargetLowering::BuildFILD(SDValue Op, MVT SrcVT, SDValue Chain,
+ SDValue StackSlot,
+ SelectionDAG &DAG) {
+ // Build the FILD
+ DebugLoc dl = Op.getDebugLoc();
+ SDVTList Tys;
+ bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
+ if (useSSE)
+ Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
+ else
+ Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(StackSlot);
+ Ops.push_back(DAG.getValueType(SrcVT));
+ SDValue Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, dl,
+ Tys, &Ops[0], Ops.size());
+
+ if (useSSE) {
+ Chain = Result.getValue(1);
+ SDValue InFlag = Result.getValue(2);
+
+ // FIXME: Currently the FST is flagged to the FILD_FLAG. This
+ // shouldn't be necessary except that RFP cannot be live across
+ // multiple blocks. When stackifier is fixed, they can be uncoupled.
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ Tys = DAG.getVTList(MVT::Other);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Result);
+ Ops.push_back(StackSlot);
+ Ops.push_back(DAG.getValueType(Op.getValueType()));
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(X86ISD::FST, dl, Tys, &Ops[0], Ops.size());
+ Result = DAG.getLoad(Op.getValueType(), dl, Chain, StackSlot,
+ PseudoSourceValue::getFixedStack(SSFI), 0);
+ }
+
+ return Result;
+}
+
+// LowerUINT_TO_FP_i64 - 64-bit unsigned integer to double expansion.
+SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) {
+ // This algorithm is not obvious. Here it is in C code, more or less:
+ /*
+ double uint64_to_double( uint32_t hi, uint32_t lo ) {
+ static const __m128i exp = { 0x4330000045300000ULL, 0 };
+ static const __m128d bias = { 0x1.0p84, 0x1.0p52 };
+
+ // Copy ints to xmm registers.
+ __m128i xh = _mm_cvtsi32_si128( hi );
+ __m128i xl = _mm_cvtsi32_si128( lo );
+
+ // Combine into low half of a single xmm register.
+ __m128i x = _mm_unpacklo_epi32( xh, xl );
+ __m128d d;
+ double sd;
+
+ // Merge in appropriate exponents to give the integer bits the right
+ // magnitude.
+ x = _mm_unpacklo_epi32( x, exp );
+
+ // Subtract away the biases to deal with the IEEE-754 double precision
+ // implicit 1.
+ d = _mm_sub_pd( (__m128d) x, bias );
+
+ // All conversions up to here are exact. The correctly rounded result is
+ // calculated using the current rounding mode using the following
+ // horizontal add.
+ d = _mm_add_sd( d, _mm_unpackhi_pd( d, d ) );
+ _mm_store_sd( &sd, d ); // Because we are returning doubles in XMM, this
+ // store doesn't really need to be here (except
+ // maybe to zero the other double)
+ return sd;
+ }
+ */
+
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Build some magic constants.
+ std::vector<Constant*> CV0;
+ CV0.push_back(ConstantInt::get(APInt(32, 0x45300000)));
+ CV0.push_back(ConstantInt::get(APInt(32, 0x43300000)));
+ CV0.push_back(ConstantInt::get(APInt(32, 0)));
+ CV0.push_back(ConstantInt::get(APInt(32, 0)));
+ Constant *C0 = ConstantVector::get(CV0);
+ SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
+
+ std::vector<Constant*> CV1;
+ CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4530000000000000ULL))));
+ CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4330000000000000ULL))));
+ Constant *C1 = ConstantVector::get(CV1);
+ SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
+
+ SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
+ DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Op.getOperand(0),
+ DAG.getIntPtrConstant(1)));
+ SDValue XR2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
+ DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Op.getOperand(0),
+ DAG.getIntPtrConstant(0)));
+ SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2);
+ SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, 16);
+ SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0);
+ SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2);
+ SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, 16);
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
+
+ // Add the halves; easiest way is to swap them into another reg first.
+ int ShufMask[2] = { 1, -1 };
+ SDValue Shuf = DAG.getVectorShuffle(MVT::v2f64, dl, Sub,
+ DAG.getUNDEF(MVT::v2f64), ShufMask);
+ SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuf, Sub);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Add,
+ DAG.getIntPtrConstant(0));
+}
+
+// LowerUINT_TO_FP_i32 - 32-bit unsigned integer to float expansion.
+SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ // FP constant to bias correct the final result.
+ SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL),
+ MVT::f64);
+
+ // Load the 32-bit value into an XMM register.
+ SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
+ DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Op.getOperand(0),
+ DAG.getIntPtrConstant(0)));
+
+ Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Load),
+ DAG.getIntPtrConstant(0));
+
+ // Or the load with the bias.
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64,
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ MVT::v2f64, Load)),
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ MVT::v2f64, Bias)));
+ Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Or),
+ DAG.getIntPtrConstant(0));
+
+ // Subtract the bias.
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
+
+ // Handle final rounding.
+ MVT DestVT = Op.getValueType();
+
+ if (DestVT.bitsLT(MVT::f64)) {
+ return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
+ DAG.getIntPtrConstant(0));
+ } else if (DestVT.bitsGT(MVT::f64)) {
+ return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
+ }
+
+ // Handle final rounding.
+ return Sub;
+}
+
+SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ SDValue N0 = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Now not UINT_TO_FP is legal (it's marked custom), dag combiner won't
+ // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
+ // the optimization here.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0);
+
+ MVT SrcVT = N0.getValueType();
+ if (SrcVT == MVT::i64) {
+ // We only handle SSE2 f64 target here; caller can expand the rest.
+ if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64)
+ return SDValue();
+
+ return LowerUINT_TO_FP_i64(Op, DAG);
+ } else if (SrcVT == MVT::i32 && X86ScalarSSEf64) {
+ return LowerUINT_TO_FP_i32(Op, DAG);
+ }
+
+ assert(SrcVT == MVT::i32 && "Unknown UINT_TO_FP to lower!");
+
+ // Make a 64-bit buffer, and use it to build an FILD.
+ SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
+ SDValue WordOff = DAG.getConstant(4, getPointerTy());
+ SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
+ getPointerTy(), StackSlot, WordOff);
+ SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+ StackSlot, NULL, 0);
+ SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
+ OffsetSlot, NULL, 0);
+ return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
+}
+
+std::pair<SDValue,SDValue> X86TargetLowering::
+FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
+ DebugLoc dl = Op.getDebugLoc();
+
+ MVT DstTy = Op.getValueType();
+
+ if (!IsSigned) {
+ assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
+ DstTy = MVT::i64;
+ }
+
+ assert(DstTy.getSimpleVT() <= MVT::i64 &&
+ DstTy.getSimpleVT() >= MVT::i16 &&
+ "Unknown FP_TO_SINT to lower!");
+
+ // These are really Legal.
+ if (DstTy == MVT::i32 &&
+ isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
+ return std::make_pair(SDValue(), SDValue());
+ if (Subtarget->is64Bit() &&
+ DstTy == MVT::i64 &&
+ isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
+ return std::make_pair(SDValue(), SDValue());
+
+ // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
+ // stack slot.
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned MemSize = DstTy.getSizeInBits()/8;
+ int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+
+ unsigned Opc;
+ switch (DstTy.getSimpleVT()) {
+ default: assert(0 && "Invalid FP_TO_SINT to lower!");
+ case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
+ case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
+ case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
+ }
+
+ SDValue Chain = DAG.getEntryNode();
+ SDValue Value = Op.getOperand(0);
+ if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) {
+ assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
+ Chain = DAG.getStore(Chain, dl, Value, StackSlot,
+ PseudoSourceValue::getFixedStack(SSFI), 0);
+ SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
+ SDValue Ops[] = {
+ Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
+ };
+ Value = DAG.getNode(X86ISD::FLD, dl, Tys, Ops, 3);
+ Chain = Value.getValue(1);
+ SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
+ StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ }
+
+ // Build the FP_TO_INT*_IN_MEM
+ SDValue Ops[] = { Chain, Value, StackSlot };
+ SDValue FIST = DAG.getNode(Opc, dl, MVT::Other, Ops, 3);
+
+ return std::make_pair(FIST, StackSlot);
+}
+
+SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
+ std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true);
+ SDValue FIST = Vals.first, StackSlot = Vals.second;
+ // If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
+ if (FIST.getNode() == 0) return Op;
+
+ // Load the result.
+ return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
+ FIST, StackSlot, NULL, 0);
+}
+
+SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) {
+ std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, false);
+ SDValue FIST = Vals.first, StackSlot = Vals.second;
+ assert(FIST.getNode() && "Unexpected failure");
+
+ // Load the result.
+ return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
+ FIST, StackSlot, NULL, 0);
+}
+
+SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ MVT VT = Op.getValueType();
+ MVT EltVT = VT;
+ if (VT.isVector())
+ EltVT = VT.getVectorElementType();
+ std::vector<Constant*> CV;
+ if (EltVT == MVT::f64) {
+ Constant *C = ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63))));
+ CV.push_back(C);
+ CV.push_back(C);
+ } else {
+ Constant *C = ConstantFP::get(APFloat(APInt(32, ~(1U << 31))));
+ CV.push_back(C);
+ CV.push_back(C);
+ CV.push_back(C);
+ CV.push_back(C);
+ }
+ Constant *C = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, 16);
+ return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
+}
+
+SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ MVT VT = Op.getValueType();
+ MVT EltVT = VT;
+ unsigned EltNum = 1;
+ if (VT.isVector()) {
+ EltVT = VT.getVectorElementType();
+ EltNum = VT.getVectorNumElements();
+ }
+ std::vector<Constant*> CV;
+ if (EltVT == MVT::f64) {
+ Constant *C = ConstantFP::get(APFloat(APInt(64, 1ULL << 63)));
+ CV.push_back(C);
+ CV.push_back(C);
+ } else {
+ Constant *C = ConstantFP::get(APFloat(APInt(32, 1U << 31)));
+ CV.push_back(C);
+ CV.push_back(C);
+ CV.push_back(C);
+ CV.push_back(C);
+ }
+ Constant *C = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, 16);
+ if (VT.isVector()) {
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ DAG.getNode(ISD::XOR, dl, MVT::v2i64,
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64,
+ Op.getOperand(0)),
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, Mask)));
+ } else {
+ return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
+ }
+}
+
+SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ MVT VT = Op.getValueType();
+ MVT SrcVT = Op1.getValueType();
+
+ // If second operand is smaller, extend it first.
+ if (SrcVT.bitsLT(VT)) {
+ Op1 = DAG.getNode(ISD::FP_EXTEND, dl, VT, Op1);
+ SrcVT = VT;
+ }
+ // And if it is bigger, shrink it first.
+ if (SrcVT.bitsGT(VT)) {
+ Op1 = DAG.getNode(ISD::FP_ROUND, dl, VT, Op1, DAG.getIntPtrConstant(1));
+ SrcVT = VT;
+ }
+
+ // At this point the operands and the result should have the same
+ // type, and that won't be f80 since that is not custom lowered.
+
+ // First get the sign bit of second operand.
+ std::vector<Constant*> CV;
+ if (SrcVT == MVT::f64) {
+ CV.push_back(ConstantFP::get(APFloat(APInt(64, 1ULL << 63))));
+ CV.push_back(ConstantFP::get(APFloat(APInt(64, 0))));
+ } else {
+ CV.push_back(ConstantFP::get(APFloat(APInt(32, 1U << 31))));
+ CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
+ }
+ Constant *C = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, 16);
+ SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1);
+
+ // Shift sign bit right or left if the two operands have different types.
+ if (SrcVT.bitsGT(VT)) {
+ // Op0 is MVT::f32, Op1 is MVT::f64.
+ SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, SignBit);
+ SignBit = DAG.getNode(X86ISD::FSRL, dl, MVT::v2f64, SignBit,
+ DAG.getConstant(32, MVT::i32));
+ SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, SignBit);
+ SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, SignBit,
+ DAG.getIntPtrConstant(0));
+ }
+
+ // Clear first operand sign bit.
+ CV.clear();
+ if (VT == MVT::f64) {
+ CV.push_back(ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63)))));
+ CV.push_back(ConstantFP::get(APFloat(APInt(64, 0))));
+ } else {
+ CV.push_back(ConstantFP::get(APFloat(APInt(32, ~(1U << 31)))));
+ CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
+ }
+ C = ConstantVector::get(CV);
+ CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, 16);
+ SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2);
+
+ // Or the value with the sign bit.
+ return DAG.getNode(X86ISD::FOR, dl, VT, Val, SignBit);
+}
+
+/// Emit nodes that will be selected as "test Op0,Op0", or something
+/// equivalent.
+SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
+ SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+
+ // CF and OF aren't always set the way we want. Determine which
+ // of these we need.
+ bool NeedCF = false;
+ bool NeedOF = false;
+ switch (X86CC) {
+ case X86::COND_A: case X86::COND_AE:
+ case X86::COND_B: case X86::COND_BE:
+ NeedCF = true;
+ break;
+ case X86::COND_G: case X86::COND_GE:
+ case X86::COND_L: case X86::COND_LE:
+ case X86::COND_O: case X86::COND_NO:
+ NeedOF = true;
+ break;
+ default: break;
+ }
+
+ // See if we can use the EFLAGS value from the operand instead of
+ // doing a separate TEST. TEST always sets OF and CF to 0, so unless
+ // we prove that the arithmetic won't overflow, we can't use OF or CF.
+ if (Op.getResNo() == 0 && !NeedOF && !NeedCF) {
+ unsigned Opcode = 0;
+ unsigned NumOperands = 0;
+ switch (Op.getNode()->getOpcode()) {
+ case ISD::ADD:
+ // Due to an isel shortcoming, be conservative if this add is likely to
+ // be selected as part of a load-modify-store instruction. When the root
+ // node in a match is a store, isel doesn't know how to remap non-chain
+ // non-flag uses of other nodes in the match, such as the ADD in this
+ // case. This leads to the ADD being left around and reselected, with
+ // the result being two adds in the output.
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI)
+ if (UI->getOpcode() == ISD::STORE)
+ goto default_case;
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
+ // An add of one will be selected as an INC.
+ if (C->getAPIntValue() == 1) {
+ Opcode = X86ISD::INC;
+ NumOperands = 1;
+ break;
+ }
+ // An add of negative one (subtract of one) will be selected as a DEC.
+ if (C->getAPIntValue().isAllOnesValue()) {
+ Opcode = X86ISD::DEC;
+ NumOperands = 1;
+ break;
+ }
+ }
+ // Otherwise use a regular EFLAGS-setting add.
+ Opcode = X86ISD::ADD;
+ NumOperands = 2;
+ break;
+ case ISD::SUB:
+ // Due to the ISEL shortcoming noted above, be conservative if this sub is
+ // likely to be selected as part of a load-modify-store instruction.
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI)
+ if (UI->getOpcode() == ISD::STORE)
+ goto default_case;
+ // Otherwise use a regular EFLAGS-setting sub.
+ Opcode = X86ISD::SUB;
+ NumOperands = 2;
+ break;
+ case X86ISD::ADD:
+ case X86ISD::SUB:
+ case X86ISD::INC:
+ case X86ISD::DEC:
+ return SDValue(Op.getNode(), 1);
+ default:
+ default_case:
+ break;
+ }
+ if (Opcode != 0) {
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0; i != NumOperands; ++i)
+ Ops.push_back(Op.getOperand(i));
+ SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands);
+ DAG.ReplaceAllUsesWith(Op, New);
+ return SDValue(New.getNode(), 1);
+ }
+ }
+
+ // Otherwise just emit a CMP with 0, which is the TEST pattern.
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, Op.getValueType()));
+}
+
+/// Emit nodes that will be selected as "cmp Op0,Op1", or something
+/// equivalent.
+SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
+ SelectionDAG &DAG) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1))
+ if (C->getAPIntValue() == 0)
+ return EmitTest(Op0, X86CC, DAG);
+
+ DebugLoc dl = Op0.getDebugLoc();
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
+}
+
+SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
+ assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+ // Lower (X & (1 << N)) == 0 to BT(X, N).
+ // Lower ((X >>u N) & 1) != 0 to BT(X, N).
+ // Lower ((X >>s N) & 1) != 0 to BT(X, N).
+ if (Op0.getOpcode() == ISD::AND &&
+ Op0.hasOneUse() &&
+ Op1.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(Op1)->getZExtValue() == 0 &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ SDValue LHS, RHS;
+ if (Op0.getOperand(1).getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *Op010C =
+ dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0)))
+ if (Op010C->getZExtValue() == 1) {
+ LHS = Op0.getOperand(0);
+ RHS = Op0.getOperand(1).getOperand(1);
+ }
+ } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *Op000C =
+ dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0)))
+ if (Op000C->getZExtValue() == 1) {
+ LHS = Op0.getOperand(1);
+ RHS = Op0.getOperand(0).getOperand(1);
+ }
+ } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) {
+ ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1));
+ SDValue AndLHS = Op0.getOperand(0);
+ if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {
+ LHS = AndLHS.getOperand(0);
+ RHS = AndLHS.getOperand(1);
+ }
+ }
+
+ if (LHS.getNode()) {
+ // If LHS is i8, promote it to i16 with any_extend. There is no i8 BT
+ // instruction. Since the shift amount is in-range-or-undefined, we know
+ // that doing a bittest on the i16 value is ok. We extend to i32 because
+ // the encoding for the i16 version is larger than the i32 version.
+ if (LHS.getValueType() == MVT::i8)
+ LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
+
+ // If the operand types disagree, extend the shift amount to match. Since
+ // BT ignores high bits (like shifts) we can use anyextend.
+ if (LHS.getValueType() != RHS.getValueType())
+ RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
+
+ SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
+ unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(Cond, MVT::i8), BT);
+ }
+ }
+
+ bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
+ unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
+
+ SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG);
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), Cond);
+}
+
+SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
+ SDValue Cond;
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue CC = Op.getOperand(2);
+ MVT VT = Op.getValueType();
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (isFP) {
+ unsigned SSECC = 8;
+ MVT VT0 = Op0.getValueType();
+ assert(VT0 == MVT::v4f32 || VT0 == MVT::v2f64);
+ unsigned Opc = VT0 == MVT::v4f32 ? X86ISD::CMPPS : X86ISD::CMPPD;
+ bool Swap = false;
+
+ switch (SetCCOpcode) {
+ default: break;
+ case ISD::SETOEQ:
+ case ISD::SETEQ: SSECC = 0; break;
+ case ISD::SETOGT:
+ case ISD::SETGT: Swap = true; // Fallthrough
+ case ISD::SETLT:
+ case ISD::SETOLT: SSECC = 1; break;
+ case ISD::SETOGE:
+ case ISD::SETGE: Swap = true; // Fallthrough
+ case ISD::SETLE:
+ case ISD::SETOLE: SSECC = 2; break;
+ case ISD::SETUO: SSECC = 3; break;
+ case ISD::SETUNE:
+ case ISD::SETNE: SSECC = 4; break;
+ case ISD::SETULE: Swap = true;
+ case ISD::SETUGE: SSECC = 5; break;
+ case ISD::SETULT: Swap = true;
+ case ISD::SETUGT: SSECC = 6; break;
+ case ISD::SETO: SSECC = 7; break;
+ }
+ if (Swap)
+ std::swap(Op0, Op1);
+
+ // In the two special cases we can't handle, emit two comparisons.
+ if (SSECC == 8) {
+ if (SetCCOpcode == ISD::SETUEQ) {
+ SDValue UNORD, EQ;
+ UNORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(3, MVT::i8));
+ EQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(0, MVT::i8));
+ return DAG.getNode(ISD::OR, dl, VT, UNORD, EQ);
+ }
+ else if (SetCCOpcode == ISD::SETONE) {
+ SDValue ORD, NEQ;
+ ORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(7, MVT::i8));
+ NEQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(4, MVT::i8));
+ return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ);
+ }
+ assert(0 && "Illegal FP comparison");
+ }
+ // Handle all other FP comparisons here.
+ return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8));
+ }
+
+ // We are handling one of the integer comparisons here. Since SSE only has
+ // GT and EQ comparisons for integer, swapping operands and multiple
+ // operations may be required for some comparisons.
+ unsigned Opc = 0, EQOpc = 0, GTOpc = 0;
+ bool Swap = false, Invert = false, FlipSigns = false;
+
+ switch (VT.getSimpleVT()) {
+ default: break;
+ case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break;
+ case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break;
+ case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break;
+ case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break;
+ }
+
+ switch (SetCCOpcode) {
+ default: break;
+ case ISD::SETNE: Invert = true;
+ case ISD::SETEQ: Opc = EQOpc; break;
+ case ISD::SETLT: Swap = true;
+ case ISD::SETGT: Opc = GTOpc; break;
+ case ISD::SETGE: Swap = true;
+ case ISD::SETLE: Opc = GTOpc; Invert = true; break;
+ case ISD::SETULT: Swap = true;
+ case ISD::SETUGT: Opc = GTOpc; FlipSigns = true; break;
+ case ISD::SETUGE: Swap = true;
+ case ISD::SETULE: Opc = GTOpc; FlipSigns = true; Invert = true; break;
+ }
+ if (Swap)
+ std::swap(Op0, Op1);
+
+ // Since SSE has no unsigned integer comparisons, we need to flip the sign
+ // bits of the inputs before performing those operations.
+ if (FlipSigns) {
+ MVT EltVT = VT.getVectorElementType();
+ SDValue SignBit = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()),
+ EltVT);
+ std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit);
+ SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &SignBits[0],
+ SignBits.size());
+ Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SignVec);
+ Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SignVec);
+ }
+
+ SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+
+ // If the logical-not of the result is required, perform that now.
+ if (Invert)
+ Result = DAG.getNOT(dl, Result, VT);
+
+ return Result;
+}
+
+// isX86LogicalCmp - Return true if opcode is a X86 logical comparison.
+static bool isX86LogicalCmp(SDValue Op) {
+ unsigned Opc = Op.getNode()->getOpcode();
+ if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI)
+ return true;
+ if (Op.getResNo() == 1 &&
+ (Opc == X86ISD::ADD ||
+ Opc == X86ISD::SUB ||
+ Opc == X86ISD::SMUL ||
+ Opc == X86ISD::UMUL ||
+ Opc == X86ISD::INC ||
+ Opc == X86ISD::DEC))
+ return true;
+
+ return false;
+}
+
+SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) {
+ bool addTest = true;
+ SDValue Cond = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue CC;
+
+ if (Cond.getOpcode() == ISD::SETCC)
+ Cond = LowerSETCC(Cond, DAG);
+
+ // If condition flag is set by a X86ISD::CMP, then use it as the condition
+ // setting operand in place of the X86ISD::SETCC.
+ if (Cond.getOpcode() == X86ISD::SETCC) {
+ CC = Cond.getOperand(0);
+
+ SDValue Cmp = Cond.getOperand(1);
+ unsigned Opc = Cmp.getOpcode();
+ MVT VT = Op.getValueType();
+
+ bool IllegalFPCMov = false;
+ if (VT.isFloatingPoint() && !VT.isVector() &&
+ !isScalarFPTypeInSSEReg(VT)) // FPStack?
+ IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());
+
+ if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) ||
+ Opc == X86ISD::BT) { // FIXME
+ Cond = Cmp;
+ addTest = false;
+ }
+ }
+
+ if (addTest) {
+ CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ Cond = EmitTest(Cond, X86::COND_NE, DAG);
+ }
+
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag);
+ SmallVector<SDValue, 4> Ops;
+ // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
+ // condition is true.
+ Ops.push_back(Op.getOperand(2));
+ Ops.push_back(Op.getOperand(1));
+ Ops.push_back(CC);
+ Ops.push_back(Cond);
+ return DAG.getNode(X86ISD::CMOV, dl, VTs, &Ops[0], Ops.size());
+}
+
+// isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or
+// ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart
+// from the AND / OR.
+static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) {
+ Opc = Op.getOpcode();
+ if (Opc != ISD::OR && Opc != ISD::AND)
+ return false;
+ return (Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
+ Op.getOperand(0).hasOneUse() &&
+ Op.getOperand(1).getOpcode() == X86ISD::SETCC &&
+ Op.getOperand(1).hasOneUse());
+}
+
+// isXor1OfSetCC - Return true if node is an ISD::XOR of a X86ISD::SETCC and
+// 1 and that the SETCC node has a single use.
+static bool isXor1OfSetCC(SDValue Op) {
+ if (Op.getOpcode() != ISD::XOR)
+ return false;
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (N1C && N1C->getAPIntValue() == 1) {
+ return Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
+ Op.getOperand(0).hasOneUse();
+ }
+ return false;
+}
+
+SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
+ bool addTest = true;
+ SDValue Chain = Op.getOperand(0);
+ SDValue Cond = Op.getOperand(1);
+ SDValue Dest = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue CC;
+
+ if (Cond.getOpcode() == ISD::SETCC)
+ Cond = LowerSETCC(Cond, DAG);
+#if 0
+ // FIXME: LowerXALUO doesn't handle these!!
+ else if (Cond.getOpcode() == X86ISD::ADD ||
+ Cond.getOpcode() == X86ISD::SUB ||
+ Cond.getOpcode() == X86ISD::SMUL ||
+ Cond.getOpcode() == X86ISD::UMUL)
+ Cond = LowerXALUO(Cond, DAG);
+#endif
+
+ // If condition flag is set by a X86ISD::CMP, then use it as the condition
+ // setting operand in place of the X86ISD::SETCC.
+ if (Cond.getOpcode() == X86ISD::SETCC) {
+ CC = Cond.getOperand(0);
+
+ SDValue Cmp = Cond.getOperand(1);
+ unsigned Opc = Cmp.getOpcode();
+ // FIXME: WHY THE SPECIAL CASING OF LogicalCmp??
+ if (isX86LogicalCmp(Cmp) || Opc == X86ISD::BT) {
+ Cond = Cmp;
+ addTest = false;
+ } else {
+ switch (cast<ConstantSDNode>(CC)->getZExtValue()) {
+ default: break;
+ case X86::COND_O:
+ case X86::COND_B:
+ // These can only come from an arithmetic instruction with overflow,
+ // e.g. SADDO, UADDO.
+ Cond = Cond.getNode()->getOperand(1);
+ addTest = false;
+ break;
+ }
+ }
+ } else {
+ unsigned CondOpc;
+ if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) {
+ SDValue Cmp = Cond.getOperand(0).getOperand(1);
+ if (CondOpc == ISD::OR) {
+ // Also, recognize the pattern generated by an FCMP_UNE. We can emit
+ // two branches instead of an explicit OR instruction with a
+ // separate test.
+ if (Cmp == Cond.getOperand(1).getOperand(1) &&
+ isX86LogicalCmp(Cmp)) {
+ CC = Cond.getOperand(0).getOperand(0);
+ Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+ Chain, Dest, CC, Cmp);
+ CC = Cond.getOperand(1).getOperand(0);
+ Cond = Cmp;
+ addTest = false;
+ }
+ } else { // ISD::AND
+ // Also, recognize the pattern generated by an FCMP_OEQ. We can emit
+ // two branches instead of an explicit AND instruction with a
+ // separate test. However, we only do this if this block doesn't
+ // have a fall-through edge, because this requires an explicit
+ // jmp when the condition is false.
+ if (Cmp == Cond.getOperand(1).getOperand(1) &&
+ isX86LogicalCmp(Cmp) &&
+ Op.getNode()->hasOneUse()) {
+ X86::CondCode CCode =
+ (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ CC = DAG.getConstant(CCode, MVT::i8);
+ SDValue User = SDValue(*Op.getNode()->use_begin(), 0);
+ // Look for an unconditional branch following this conditional branch.
+ // We need this because we need to reverse the successors in order
+ // to implement FCMP_OEQ.
+ if (User.getOpcode() == ISD::BR) {
+ SDValue FalseBB = User.getOperand(1);
+ SDValue NewBR =
+ DAG.UpdateNodeOperands(User, User.getOperand(0), Dest);
+ assert(NewBR == User);
+ Dest = FalseBB;
+
+ Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+ Chain, Dest, CC, Cmp);
+ X86::CondCode CCode =
+ (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0);
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ CC = DAG.getConstant(CCode, MVT::i8);
+ Cond = Cmp;
+ addTest = false;
+ }
+ }
+ }
+ } else if (Cond.hasOneUse() && isXor1OfSetCC(Cond)) {
+ // Recognize for xorb (setcc), 1 patterns. The xor inverts the condition.
+ // It should be transformed during dag combiner except when the condition
+ // is set by a arithmetics with overflow node.
+ X86::CondCode CCode =
+ (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ CC = DAG.getConstant(CCode, MVT::i8);
+ Cond = Cond.getOperand(0).getOperand(1);
+ addTest = false;
+ }
+ }
+
+ if (addTest) {
+ CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ Cond = EmitTest(Cond, X86::COND_NE, DAG);
+ }
+ return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+ Chain, Dest, CC, Cond);
+}
+
+
+// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
+// Calls to _alloca is needed to probe the stack when allocating more than 4k
+// bytes in one go. Touching the stack at 4K increments is necessary to ensure
+// that the guard pages used by the OS virtual memory manager are allocated in
+// correct sequence.
+SDValue
+X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) {
+ assert(Subtarget->isTargetCygMing() &&
+ "This should be used only on Cygwin/Mingw targets");
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Get the inputs.
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ // FIXME: Ensure alignment here
+
+ SDValue Flag;
+
+ MVT IntPtr = getPointerTy();
+ MVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
+
+ Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag);
+ Flag = Chain.getValue(1);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue Ops[] = { Chain,
+ DAG.getTargetExternalSymbol("_alloca", IntPtr),
+ DAG.getRegister(X86::EAX, IntPtr),
+ DAG.getRegister(X86StackPtr, SPTy),
+ Flag };
+ Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops, 5);
+ Flag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getIntPtrConstant(0, true),
+ DAG.getIntPtrConstant(0, true),
+ Flag);
+
+ Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1);
+
+ SDValue Ops1[2] = { Chain.getValue(0), Chain };
+ return DAG.getMergeValues(Ops1, 2, dl);
+}
+
+SDValue
+X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ const Value *DstSV,
+ uint64_t DstSVOff) {
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+
+ // If not DWORD aligned or size is more than the threshold, call the library.
+ // The libc version is likely to be faster for these cases. It can use the
+ // address value and run time information about the CPU.
+ if ((Align & 3) != 0 ||
+ !ConstantSize ||
+ ConstantSize->getZExtValue() >
+ getSubtarget()->getMaxInlineSizeThreshold()) {
+ SDValue InFlag(0, 0);
+
+ // Check to see if there is a specialized entry-point for memory zeroing.
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
+
+ if (const char *bzeroEntry = V &&
+ V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
+ MVT IntPtr = getPointerTy();
+ const Type *IntPtrTy = TD->getIntPtrType();
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Dst;
+ Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+ Entry.Node = Size;
+ Args.push_back(Entry);
+ std::pair<SDValue,SDValue> CallResult =
+ LowerCallTo(Chain, Type::VoidTy, false, false, false, false,
+ CallingConv::C, false,
+ DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl);
+ return CallResult.second;
+ }
+
+ // Otherwise have the target-independent code call memset.
+ return SDValue();
+ }
+
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ SDValue InFlag(0, 0);
+ MVT AVT;
+ SDValue Count;
+ ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
+ unsigned BytesLeft = 0;
+ bool TwoRepStos = false;
+ if (ValC) {
+ unsigned ValReg;
+ uint64_t Val = ValC->getZExtValue() & 255;
+
+ // If the value is a constant, then we can potentially use larger sets.
+ switch (Align & 3) {
+ case 2: // WORD aligned
+ AVT = MVT::i16;
+ ValReg = X86::AX;
+ Val = (Val << 8) | Val;
+ break;
+ case 0: // DWORD aligned
+ AVT = MVT::i32;
+ ValReg = X86::EAX;
+ Val = (Val << 8) | Val;
+ Val = (Val << 16) | Val;
+ if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
+ AVT = MVT::i64;
+ ValReg = X86::RAX;
+ Val = (Val << 32) | Val;
+ }
+ break;
+ default: // Byte aligned
+ AVT = MVT::i8;
+ ValReg = X86::AL;
+ Count = DAG.getIntPtrConstant(SizeVal);
+ break;
+ }
+
+ if (AVT.bitsGT(MVT::i8)) {
+ unsigned UBytes = AVT.getSizeInBits() / 8;
+ Count = DAG.getIntPtrConstant(SizeVal / UBytes);
+ BytesLeft = SizeVal % UBytes;
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ } else {
+ AVT = MVT::i8;
+ Count = DAG.getIntPtrConstant(SizeVal);
+ Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+ X86::ECX,
+ Count, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+ X86::EDI,
+ Dst, InFlag);
+ InFlag = Chain.getValue(1);
+
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(DAG.getValueType(AVT));
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, &Ops[0], Ops.size());
+
+ if (TwoRepStos) {
+ InFlag = Chain.getValue(1);
+ Count = Size;
+ MVT CVT = Count.getValueType();
+ SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
+ DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
+ Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX :
+ X86::ECX,
+ Left, InFlag);
+ InFlag = Chain.getValue(1);
+ Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ Ops.clear();
+ Ops.push_back(Chain);
+ Ops.push_back(DAG.getValueType(MVT::i8));
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, &Ops[0], Ops.size());
+ } else if (BytesLeft) {
+ // Handle the last 1 - 7 bytes.
+ unsigned Offset = SizeVal - BytesLeft;
+ MVT AddrVT = Dst.getValueType();
+ MVT SizeVT = Size.getValueType();
+
+ Chain = DAG.getMemset(Chain, dl,
+ DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
+ DAG.getConstant(Offset, AddrVT)),
+ Src,
+ DAG.getConstant(BytesLeft, SizeVT),
+ Align, DstSV, DstSVOff + Offset);
+ }
+
+ // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
+ return Chain;
+}
+
+SDValue
+X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool AlwaysInline,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff) {
+ // This requires the copy size to be a constant, preferrably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDValue();
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
+ return SDValue();
+
+ /// If not DWORD aligned, call the library.
+ if ((Align & 3) != 0)
+ return SDValue();
+
+ // DWORD aligned
+ MVT AVT = MVT::i32;
+ if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned
+ AVT = MVT::i64;
+
+ unsigned UBytes = AVT.getSizeInBits() / 8;
+ unsigned CountVal = SizeVal / UBytes;
+ SDValue Count = DAG.getIntPtrConstant(CountVal);
+ unsigned BytesLeft = SizeVal % UBytes;
+
+ SDValue InFlag(0, 0);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+ X86::ECX,
+ Count, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+ X86::EDI,
+ Dst, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
+ X86::ESI,
+ Src, InFlag);
+ InFlag = Chain.getValue(1);
+
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(DAG.getValueType(AVT));
+ Ops.push_back(InFlag);
+ SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, &Ops[0], Ops.size());
+
+ SmallVector<SDValue, 4> Results;
+ Results.push_back(RepMovs);
+ if (BytesLeft) {
+ // Handle the last 1 - 7 bytes.
+ unsigned Offset = SizeVal - BytesLeft;
+ MVT DstVT = Dst.getValueType();
+ MVT SrcVT = Src.getValueType();
+ MVT SizeVT = Size.getValueType();
+ Results.push_back(DAG.getMemcpy(Chain, dl,
+ DAG.getNode(ISD::ADD, dl, DstVT, Dst,
+ DAG.getConstant(Offset, DstVT)),
+ DAG.getNode(ISD::ADD, dl, SrcVT, Src,
+ DAG.getConstant(Offset, SrcVT)),
+ DAG.getConstant(BytesLeft, SizeVT),
+ Align, AlwaysInline,
+ DstSV, DstSVOff + Offset,
+ SrcSV, SrcSVOff + Offset));
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Results[0], Results.size());
+}
+
+SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (!Subtarget->is64Bit()) {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
+ }
+
+ // __va_list_tag:
+ // gp_offset (0 - 6 * 8)
+ // fp_offset (48 - 48 + 8 * 16)
+ // overflow_arg_area (point to parameters coming in memory).
+ // reg_save_area
+ SmallVector<SDValue, 8> MemOps;
+ SDValue FIN = Op.getOperand(1);
+ // Store gp_offset
+ SDValue Store = DAG.getStore(Op.getOperand(0), dl,
+ DAG.getConstant(VarArgsGPOffset, MVT::i32),
+ FIN, SV, 0);
+ MemOps.push_back(Store);
+
+ // Store fp_offset
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FIN, DAG.getIntPtrConstant(4));
+ Store = DAG.getStore(Op.getOperand(0), dl,
+ DAG.getConstant(VarArgsFPOffset, MVT::i32),
+ FIN, SV, 0);
+ MemOps.push_back(Store);
+
+ // Store ptr to overflow_arg_area
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FIN, DAG.getIntPtrConstant(4));
+ SDValue OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+ Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0);
+ MemOps.push_back(Store);
+
+ // Store ptr to reg_save_area.
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FIN, DAG.getIntPtrConstant(8));
+ SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
+ Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0);
+ MemOps.push_back(Store);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+}
+
+SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) {
+ // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
+ assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!");
+ SDValue Chain = Op.getOperand(0);
+ SDValue SrcPtr = Op.getOperand(1);
+ SDValue SrcSV = Op.getOperand(2);
+
+ assert(0 && "VAArgInst is not yet implemented for x86-64!");
+ abort();
+ return SDValue();
+}
+
+SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) {
+ // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
+ assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!");
+ SDValue Chain = Op.getOperand(0);
+ SDValue DstPtr = Op.getOperand(1);
+ SDValue SrcPtr = Op.getOperand(2);
+ const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+ const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+ DebugLoc dl = Op.getDebugLoc();
+
+ return DAG.getMemcpy(Chain, dl, DstPtr, SrcPtr,
+ DAG.getIntPtrConstant(24), 8, false,
+ DstSV, 0, SrcSV, 0);
+}
+
+SDValue
+X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default: return SDValue(); // Don't custom lower most intrinsics.
+ // Comparison intrinsics.
+ case Intrinsic::x86_sse_comieq_ss:
+ case Intrinsic::x86_sse_comilt_ss:
+ case Intrinsic::x86_sse_comile_ss:
+ case Intrinsic::x86_sse_comigt_ss:
+ case Intrinsic::x86_sse_comige_ss:
+ case Intrinsic::x86_sse_comineq_ss:
+ case Intrinsic::x86_sse_ucomieq_ss:
+ case Intrinsic::x86_sse_ucomilt_ss:
+ case Intrinsic::x86_sse_ucomile_ss:
+ case Intrinsic::x86_sse_ucomigt_ss:
+ case Intrinsic::x86_sse_ucomige_ss:
+ case Intrinsic::x86_sse_ucomineq_ss:
+ case Intrinsic::x86_sse2_comieq_sd:
+ case Intrinsic::x86_sse2_comilt_sd:
+ case Intrinsic::x86_sse2_comile_sd:
+ case Intrinsic::x86_sse2_comigt_sd:
+ case Intrinsic::x86_sse2_comige_sd:
+ case Intrinsic::x86_sse2_comineq_sd:
+ case Intrinsic::x86_sse2_ucomieq_sd:
+ case Intrinsic::x86_sse2_ucomilt_sd:
+ case Intrinsic::x86_sse2_ucomile_sd:
+ case Intrinsic::x86_sse2_ucomigt_sd:
+ case Intrinsic::x86_sse2_ucomige_sd:
+ case Intrinsic::x86_sse2_ucomineq_sd: {
+ unsigned Opc = 0;
+ ISD::CondCode CC = ISD::SETCC_INVALID;
+ switch (IntNo) {
+ default: break;
+ case Intrinsic::x86_sse_comieq_ss:
+ case Intrinsic::x86_sse2_comieq_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETEQ;
+ break;
+ case Intrinsic::x86_sse_comilt_ss:
+ case Intrinsic::x86_sse2_comilt_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETLT;
+ break;
+ case Intrinsic::x86_sse_comile_ss:
+ case Intrinsic::x86_sse2_comile_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETLE;
+ break;
+ case Intrinsic::x86_sse_comigt_ss:
+ case Intrinsic::x86_sse2_comigt_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETGT;
+ break;
+ case Intrinsic::x86_sse_comige_ss:
+ case Intrinsic::x86_sse2_comige_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETGE;
+ break;
+ case Intrinsic::x86_sse_comineq_ss:
+ case Intrinsic::x86_sse2_comineq_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETNE;
+ break;
+ case Intrinsic::x86_sse_ucomieq_ss:
+ case Intrinsic::x86_sse2_ucomieq_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETEQ;
+ break;
+ case Intrinsic::x86_sse_ucomilt_ss:
+ case Intrinsic::x86_sse2_ucomilt_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETLT;
+ break;
+ case Intrinsic::x86_sse_ucomile_ss:
+ case Intrinsic::x86_sse2_ucomile_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETLE;
+ break;
+ case Intrinsic::x86_sse_ucomigt_ss:
+ case Intrinsic::x86_sse2_ucomigt_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETGT;
+ break;
+ case Intrinsic::x86_sse_ucomige_ss:
+ case Intrinsic::x86_sse2_ucomige_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETGE;
+ break;
+ case Intrinsic::x86_sse_ucomineq_ss:
+ case Intrinsic::x86_sse2_ucomineq_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETNE;
+ break;
+ }
+
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+ unsigned X86CC = TranslateX86CC(CC, true, LHS, RHS, DAG);
+ SDValue Cond = DAG.getNode(Opc, dl, MVT::i32, LHS, RHS);
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), Cond);
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+ }
+
+ // Fix vector shift instructions where the last operand is a non-immediate
+ // i32 value.
+ case Intrinsic::x86_sse2_pslli_w:
+ case Intrinsic::x86_sse2_pslli_d:
+ case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_mmx_pslli_w:
+ case Intrinsic::x86_mmx_pslli_d:
+ case Intrinsic::x86_mmx_pslli_q:
+ case Intrinsic::x86_mmx_psrli_w:
+ case Intrinsic::x86_mmx_psrli_d:
+ case Intrinsic::x86_mmx_psrli_q:
+ case Intrinsic::x86_mmx_psrai_w:
+ case Intrinsic::x86_mmx_psrai_d: {
+ SDValue ShAmt = Op.getOperand(2);
+ if (isa<ConstantSDNode>(ShAmt))
+ return SDValue();
+
+ unsigned NewIntNo = 0;
+ MVT ShAmtVT = MVT::v4i32;
+ switch (IntNo) {
+ case Intrinsic::x86_sse2_pslli_w:
+ NewIntNo = Intrinsic::x86_sse2_psll_w;
+ break;
+ case Intrinsic::x86_sse2_pslli_d:
+ NewIntNo = Intrinsic::x86_sse2_psll_d;
+ break;
+ case Intrinsic::x86_sse2_pslli_q:
+ NewIntNo = Intrinsic::x86_sse2_psll_q;
+ break;
+ case Intrinsic::x86_sse2_psrli_w:
+ NewIntNo = Intrinsic::x86_sse2_psrl_w;
+ break;
+ case Intrinsic::x86_sse2_psrli_d:
+ NewIntNo = Intrinsic::x86_sse2_psrl_d;
+ break;
+ case Intrinsic::x86_sse2_psrli_q:
+ NewIntNo = Intrinsic::x86_sse2_psrl_q;
+ break;
+ case Intrinsic::x86_sse2_psrai_w:
+ NewIntNo = Intrinsic::x86_sse2_psra_w;
+ break;
+ case Intrinsic::x86_sse2_psrai_d:
+ NewIntNo = Intrinsic::x86_sse2_psra_d;
+ break;
+ default: {
+ ShAmtVT = MVT::v2i32;
+ switch (IntNo) {
+ case Intrinsic::x86_mmx_pslli_w:
+ NewIntNo = Intrinsic::x86_mmx_psll_w;
+ break;
+ case Intrinsic::x86_mmx_pslli_d:
+ NewIntNo = Intrinsic::x86_mmx_psll_d;
+ break;
+ case Intrinsic::x86_mmx_pslli_q:
+ NewIntNo = Intrinsic::x86_mmx_psll_q;
+ break;
+ case Intrinsic::x86_mmx_psrli_w:
+ NewIntNo = Intrinsic::x86_mmx_psrl_w;
+ break;
+ case Intrinsic::x86_mmx_psrli_d:
+ NewIntNo = Intrinsic::x86_mmx_psrl_d;
+ break;
+ case Intrinsic::x86_mmx_psrli_q:
+ NewIntNo = Intrinsic::x86_mmx_psrl_q;
+ break;
+ case Intrinsic::x86_mmx_psrai_w:
+ NewIntNo = Intrinsic::x86_mmx_psra_w;
+ break;
+ case Intrinsic::x86_mmx_psrai_d:
+ NewIntNo = Intrinsic::x86_mmx_psra_d;
+ break;
+ default: abort(); // Can't reach here.
+ }
+ break;
+ }
+ }
+ MVT VT = Op.getValueType();
+ ShAmt = DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShAmtVT, ShAmt));
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(NewIntNo, MVT::i32),
+ Op.getOperand(1), ShAmt);
+ }
+ }
+}
+
+SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Depth > 0) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset =
+ DAG.getConstant(TD->getPointerSize(),
+ Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FrameAddr, Offset),
+ NULL, 0);
+ }
+
+ // Just load the return address.
+ SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ RetAddrFI, NULL, 0);
+}
+
+SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+ MVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP;
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0);
+ return FrameAddr;
+}
+
+SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
+ SelectionDAG &DAG) {
+ return DAG.getIntPtrConstant(2*TD->getPointerSize());
+}
+
+SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
+{
+ MachineFunction &MF = DAG.getMachineFunction();
+ SDValue Chain = Op.getOperand(0);
+ SDValue Offset = Op.getOperand(1);
+ SDValue Handler = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue Frame = DAG.getRegister(Subtarget->is64Bit() ? X86::RBP : X86::EBP,
+ getPointerTy());
+ unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX);
+
+ SDValue StoreAddr = DAG.getNode(ISD::SUB, dl, getPointerTy(), Frame,
+ DAG.getIntPtrConstant(-TD->getPointerSize()));
+ StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
+ Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0);
+ Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
+ MF.getRegInfo().addLiveOut(StoreAddrReg);
+
+ return DAG.getNode(X86ISD::EH_RETURN, dl,
+ MVT::Other,
+ Chain, DAG.getRegister(StoreAddrReg, getPointerTy()));
+}
+
+SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) {
+ SDValue Root = Op.getOperand(0);
+ SDValue Trmp = Op.getOperand(1); // trampoline
+ SDValue FPtr = Op.getOperand(2); // nested function
+ SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+ DebugLoc dl = Op.getDebugLoc();
+
+ const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+
+ const X86InstrInfo *TII =
+ ((X86TargetMachine&)getTargetMachine()).getInstrInfo();
+
+ if (Subtarget->is64Bit()) {
+ SDValue OutChains[6];
+
+ // Large code-model.
+
+ const unsigned char JMP64r = TII->getBaseOpcodeFor(X86::JMP64r);
+ const unsigned char MOV64ri = TII->getBaseOpcodeFor(X86::MOV64ri);
+
+ const unsigned char N86R10 = RegInfo->getX86RegNum(X86::R10);
+ const unsigned char N86R11 = RegInfo->getX86RegNum(X86::R11);
+
+ const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix
+
+ // Load the pointer to the nested function into R11.
+ unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11
+ SDValue Addr = Trmp;
+ OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+ Addr, TrmpAddr, 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(2, MVT::i64));
+ OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2, false, 2);
+
+ // Load the 'nest' parameter value into R10.
+ // R10 is specified in X86CallingConv.td
+ OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(10, MVT::i64));
+ OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+ Addr, TrmpAddr, 10);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(12, MVT::i64));
+ OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12, false, 2);
+
+ // Jump to the nested function.
+ OpCode = (JMP64r << 8) | REX_WB; // jmpq *...
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(20, MVT::i64));
+ OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+ Addr, TrmpAddr, 20);
+
+ unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(22, MVT::i64));
+ OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr,
+ TrmpAddr, 22);
+
+ SDValue Ops[] =
+ { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) };
+ return DAG.getMergeValues(Ops, 2, dl);
+ } else {
+ const Function *Func =
+ cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
+ unsigned CC = Func->getCallingConv();
+ unsigned NestReg;
+
+ switch (CC) {
+ default:
+ assert(0 && "Unsupported calling convention");
+ case CallingConv::C:
+ case CallingConv::X86_StdCall: {
+ // Pass 'nest' parameter in ECX.
+ // Must be kept in sync with X86CallingConv.td
+ NestReg = X86::ECX;
+
+ // Check that ECX wasn't needed by an 'inreg' parameter.
+ const FunctionType *FTy = Func->getFunctionType();
+ const AttrListPtr &Attrs = Func->getAttributes();
+
+ if (!Attrs.isEmpty() && !Func->isVarArg()) {
+ unsigned InRegCount = 0;
+ unsigned Idx = 1;
+
+ for (FunctionType::param_iterator I = FTy->param_begin(),
+ E = FTy->param_end(); I != E; ++I, ++Idx)
+ if (Attrs.paramHasAttr(Idx, Attribute::InReg))
+ // FIXME: should only count parameters that are lowered to integers.
+ InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
+
+ if (InRegCount > 2) {
+ cerr << "Nest register in use - reduce number of inreg parameters!\n";
+ abort();
+ }
+ }
+ break;
+ }
+ case CallingConv::X86_FastCall:
+ case CallingConv::Fast:
+ // Pass 'nest' parameter in EAX.
+ // Must be kept in sync with X86CallingConv.td
+ NestReg = X86::EAX;
+ break;
+ }
+
+ SDValue OutChains[4];
+ SDValue Addr, Disp;
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(10, MVT::i32));
+ Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr);
+
+ const unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri);
+ const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg);
+ OutChains[0] = DAG.getStore(Root, dl,
+ DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
+ Trmp, TrmpAddr, 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(1, MVT::i32));
+ OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1, false, 1);
+
+ const unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP);
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(5, MVT::i32));
+ OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr,
+ TrmpAddr, 5, false, 1);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(6, MVT::i32));
+ OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6, false, 1);
+
+ SDValue Ops[] =
+ { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4) };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+}
+
+SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
+ /*
+ The rounding mode is in bits 11:10 of FPSR, and has the following
+ settings:
+ 00 Round to nearest
+ 01 Round to -inf
+ 10 Round to +inf
+ 11 Round to 0
+
+ FLT_ROUNDS, on the other hand, expects the following:
+ -1 Undefined
+ 0 Round to 0
+ 1 Round to nearest
+ 2 Round to +inf
+ 3 Round to -inf
+
+ To perform the conversion, we do:
+ (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3)
+ */
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetMachine &TM = MF.getTarget();
+ const TargetFrameInfo &TFI = *TM.getFrameInfo();
+ unsigned StackAlignment = TFI.getStackAlignment();
+ MVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Save FP Control Word to stack slot
+ int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+
+ SDValue Chain = DAG.getNode(X86ISD::FNSTCW16m, dl, MVT::Other,
+ DAG.getEntryNode(), StackSlot);
+
+ // Load FP Control Word from stack slot
+ SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0);
+
+ // Transform as necessary
+ SDValue CWD1 =
+ DAG.getNode(ISD::SRL, dl, MVT::i16,
+ DAG.getNode(ISD::AND, dl, MVT::i16,
+ CWD, DAG.getConstant(0x800, MVT::i16)),
+ DAG.getConstant(11, MVT::i8));
+ SDValue CWD2 =
+ DAG.getNode(ISD::SRL, dl, MVT::i16,
+ DAG.getNode(ISD::AND, dl, MVT::i16,
+ CWD, DAG.getConstant(0x400, MVT::i16)),
+ DAG.getConstant(9, MVT::i8));
+
+ SDValue RetVal =
+ DAG.getNode(ISD::AND, dl, MVT::i16,
+ DAG.getNode(ISD::ADD, dl, MVT::i16,
+ DAG.getNode(ISD::OR, dl, MVT::i16, CWD1, CWD2),
+ DAG.getConstant(1, MVT::i16)),
+ DAG.getConstant(3, MVT::i16));
+
+
+ return DAG.getNode((VT.getSizeInBits() < 16 ?
+ ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
+}
+
+SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ MVT OpVT = VT;
+ unsigned NumBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+
+ Op = Op.getOperand(0);
+ if (VT == MVT::i8) {
+ // Zero extend to i32 since there is not an i8 bsr.
+ OpVT = MVT::i32;
+ Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
+ }
+
+ // Issue a bsr (scan bits in reverse) which also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+ Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op);
+
+ // If src is zero (i.e. bsr sets ZF), returns NumBits.
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(Op);
+ Ops.push_back(DAG.getConstant(NumBits+NumBits-1, OpVT));
+ Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8));
+ Ops.push_back(Op.getValue(1));
+ Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, &Ops[0], 4);
+
+ // Finally xor with NumBits-1.
+ Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
+
+ if (VT == MVT::i8)
+ Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
+ return Op;
+}
+
+SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ MVT OpVT = VT;
+ unsigned NumBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+
+ Op = Op.getOperand(0);
+ if (VT == MVT::i8) {
+ OpVT = MVT::i32;
+ Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
+ }
+
+ // Issue a bsf (scan bits forward) which also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+ Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op);
+
+ // If src is zero (i.e. bsf sets ZF), returns NumBits.
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(Op);
+ Ops.push_back(DAG.getConstant(NumBits, OpVT));
+ Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8));
+ Ops.push_back(Op.getValue(1));
+ Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, &Ops[0], 4);
+
+ if (VT == MVT::i8)
+ Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
+ return Op;
+}
+
+SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply");
+ DebugLoc dl = Op.getDebugLoc();
+
+ // ulong2 Ahi = __builtin_ia32_psrlqi128( a, 32);
+ // ulong2 Bhi = __builtin_ia32_psrlqi128( b, 32);
+ // ulong2 AloBlo = __builtin_ia32_pmuludq128( a, b );
+ // ulong2 AloBhi = __builtin_ia32_pmuludq128( a, Bhi );
+ // ulong2 AhiBlo = __builtin_ia32_pmuludq128( Ahi, b );
+ //
+ // AloBhi = __builtin_ia32_psllqi128( AloBhi, 32 );
+ // AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 );
+ // return AloBlo + AloBhi + AhiBlo;
+
+ SDValue A = Op.getOperand(0);
+ SDValue B = Op.getOperand(1);
+
+ SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+ A, DAG.getConstant(32, MVT::i32));
+ SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+ B, DAG.getConstant(32, MVT::i32));
+ SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+ A, B);
+ SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+ A, Bhi);
+ SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+ Ahi, B);
+ AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+ AloBhi, DAG.getConstant(32, MVT::i32));
+ AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+ AhiBlo, DAG.getConstant(32, MVT::i32));
+ SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
+ Res = DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
+ return Res;
+}
+
+
+SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) {
+ // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
+ // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
+ // looks for this combo and may remove the "setcc" instruction if the "setcc"
+ // has only one use.
+ SDNode *N = Op.getNode();
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ unsigned BaseOp = 0;
+ unsigned Cond = 0;
+ DebugLoc dl = Op.getDebugLoc();
+
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Unknown ovf instruction!");
+ case ISD::SADDO:
+ // A subtract of one will be selected as a INC. Note that INC doesn't
+ // set CF, so we can't do this for UADDO.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+ if (C->getAPIntValue() == 1) {
+ BaseOp = X86ISD::INC;
+ Cond = X86::COND_O;
+ break;
+ }
+ BaseOp = X86ISD::ADD;
+ Cond = X86::COND_O;
+ break;
+ case ISD::UADDO:
+ BaseOp = X86ISD::ADD;
+ Cond = X86::COND_B;
+ break;
+ case ISD::SSUBO:
+ // A subtract of one will be selected as a DEC. Note that DEC doesn't
+ // set CF, so we can't do this for USUBO.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+ if (C->getAPIntValue() == 1) {
+ BaseOp = X86ISD::DEC;
+ Cond = X86::COND_O;
+ break;
+ }
+ BaseOp = X86ISD::SUB;
+ Cond = X86::COND_O;
+ break;
+ case ISD::USUBO:
+ BaseOp = X86ISD::SUB;
+ Cond = X86::COND_B;
+ break;
+ case ISD::SMULO:
+ BaseOp = X86ISD::SMUL;
+ Cond = X86::COND_O;
+ break;
+ case ISD::UMULO:
+ BaseOp = X86ISD::UMUL;
+ Cond = X86::COND_B;
+ break;
+ }
+
+ // Also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
+ SDValue Sum = DAG.getNode(BaseOp, dl, VTs, LHS, RHS);
+
+ SDValue SetCC =
+ DAG.getNode(X86ISD::SETCC, dl, N->getValueType(1),
+ DAG.getConstant(Cond, MVT::i32), SDValue(Sum.getNode(), 1));
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
+ return Sum;
+}
+
+SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) {
+ MVT T = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Reg = 0;
+ unsigned size = 0;
+ switch(T.getSimpleVT()) {
+ default:
+ assert(false && "Invalid value type!");
+ case MVT::i8: Reg = X86::AL; size = 1; break;
+ case MVT::i16: Reg = X86::AX; size = 2; break;
+ case MVT::i32: Reg = X86::EAX; size = 4; break;
+ case MVT::i64:
+ assert(Subtarget->is64Bit() && "Node not type legal!");
+ Reg = X86::RAX; size = 8;
+ break;
+ }
+ SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), dl, Reg,
+ Op.getOperand(2), SDValue());
+ SDValue Ops[] = { cpIn.getValue(0),
+ Op.getOperand(1),
+ Op.getOperand(3),
+ DAG.getTargetConstant(size, MVT::i8),
+ cpIn.getValue(1) };
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue Result = DAG.getNode(X86ISD::LCMPXCHG_DAG, dl, Tys, Ops, 5);
+ SDValue cpOut =
+ DAG.getCopyFromReg(Result.getValue(0), dl, Reg, T, Result.getValue(1));
+ return cpOut;
+}
+
+SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
+ SelectionDAG &DAG) {
+ assert(Subtarget->is64Bit() && "Result not type legalized?");
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue TheChain = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
+ SDValue rax = DAG.getCopyFromReg(rd, dl, X86::RAX, MVT::i64, rd.getValue(1));
+ SDValue rdx = DAG.getCopyFromReg(rax.getValue(1), dl, X86::RDX, MVT::i64,
+ rax.getValue(2));
+ SDValue Tmp = DAG.getNode(ISD::SHL, dl, MVT::i64, rdx,
+ DAG.getConstant(32, MVT::i8));
+ SDValue Ops[] = {
+ DAG.getNode(ISD::OR, dl, MVT::i64, rax, Tmp),
+ rdx.getValue(1)
+ };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+ MVT T = Node->getValueType(0);
+ SDValue negOp = DAG.getNode(ISD::SUB, dl, T,
+ DAG.getConstant(0, T), Node->getOperand(2));
+ return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl,
+ cast<AtomicSDNode>(Node)->getMemoryVT(),
+ Node->getOperand(0),
+ Node->getOperand(1), negOp,
+ cast<AtomicSDNode>(Node)->getSrcValue(),
+ cast<AtomicSDNode>(Node)->getAlignment());
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Should not custom lower this!");
+ case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG);
+ case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
+ case ISD::SHL_PARTS:
+ case ISD::SRA_PARTS:
+ case ISD::SRL_PARTS: return LowerShift(Op, DAG);
+ case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+ case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
+ case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
+ case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
+ case ISD::FABS: return LowerFABS(Op, DAG);
+ case ISD::FNEG: return LowerFNEG(Op, DAG);
+ case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::VSETCC: return LowerVSETCC(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::CALL: return LowerCALL(Op, DAG);
+ case ISD::RET: return LowerRET(Op, DAG);
+ case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::VAARG: return LowerVAARG(Op, DAG);
+ case ISD::VACOPY: return LowerVACOPY(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
+ case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
+ case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
+ case ISD::CTLZ: return LowerCTLZ(Op, DAG);
+ case ISD::CTTZ: return LowerCTTZ(Op, DAG);
+ case ISD::MUL: return LowerMUL_V2I64(Op, DAG);
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO: return LowerXALUO(Op, DAG);
+ case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG);
+ }
+}
+
+void X86TargetLowering::
+ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG, unsigned NewOp) {
+ MVT T = Node->getValueType(0);
+ DebugLoc dl = Node->getDebugLoc();
+ assert (T == MVT::i64 && "Only know how to expand i64 atomics");
+
+ SDValue Chain = Node->getOperand(0);
+ SDValue In1 = Node->getOperand(1);
+ SDValue In2L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(2), DAG.getIntPtrConstant(0));
+ SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(2), DAG.getIntPtrConstant(1));
+ // This is a generalized SDNode, not an AtomicSDNode, so it doesn't
+ // have a MemOperand. Pass the info through as a normal operand.
+ SDValue LSI = DAG.getMemOperand(cast<MemSDNode>(Node)->getMemOperand());
+ SDValue Ops[] = { Chain, In1, In2L, In2H, LSI };
+ SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
+ SDValue Result = DAG.getNode(NewOp, dl, Tys, Ops, 5);
+ SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)};
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
+ Results.push_back(Result.getValue(2));
+}
+
+/// ReplaceNodeResults - Replace a node with an illegal result type
+/// with a new node built out of custom code.
+void X86TargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) {
+ DebugLoc dl = N->getDebugLoc();
+ switch (N->getOpcode()) {
+ default:
+ assert(false && "Do not know how to custom type legalize this operation!");
+ return;
+ case ISD::FP_TO_SINT: {
+ std::pair<SDValue,SDValue> Vals =
+ FP_TO_INTHelper(SDValue(N, 0), DAG, true);
+ SDValue FIST = Vals.first, StackSlot = Vals.second;
+ if (FIST.getNode() != 0) {
+ MVT VT = N->getValueType(0);
+ // Return a load from the stack slot.
+ Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0));
+ }
+ return;
+ }
+ case ISD::READCYCLECOUNTER: {
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue TheChain = N->getOperand(0);
+ SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
+ SDValue eax = DAG.getCopyFromReg(rd, dl, X86::EAX, MVT::i32,
+ rd.getValue(1));
+ SDValue edx = DAG.getCopyFromReg(eax.getValue(1), dl, X86::EDX, MVT::i32,
+ eax.getValue(2));
+ // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+ SDValue Ops[] = { eax, edx };
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops, 2));
+ Results.push_back(edx.getValue(1));
+ return;
+ }
+ case ISD::ATOMIC_CMP_SWAP: {
+ MVT T = N->getValueType(0);
+ assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap");
+ SDValue cpInL, cpInH;
+ cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2),
+ DAG.getConstant(0, MVT::i32));
+ cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2),
+ DAG.getConstant(1, MVT::i32));
+ cpInL = DAG.getCopyToReg(N->getOperand(0), dl, X86::EAX, cpInL, SDValue());
+ cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl, X86::EDX, cpInH,
+ cpInL.getValue(1));
+ SDValue swapInL, swapInH;
+ swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3),
+ DAG.getConstant(0, MVT::i32));
+ swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3),
+ DAG.getConstant(1, MVT::i32));
+ swapInL = DAG.getCopyToReg(cpInH.getValue(0), dl, X86::EBX, swapInL,
+ cpInH.getValue(1));
+ swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl, X86::ECX, swapInH,
+ swapInL.getValue(1));
+ SDValue Ops[] = { swapInH.getValue(0),
+ N->getOperand(1),
+ swapInH.getValue(1) };
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue Result = DAG.getNode(X86ISD::LCMPXCHG8_DAG, dl, Tys, Ops, 3);
+ SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, X86::EAX,
+ MVT::i32, Result.getValue(1));
+ SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, X86::EDX,
+ MVT::i32, cpOutL.getValue(2));
+ SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
+ Results.push_back(cpOutH.getValue(1));
+ return;
+ }
+ case ISD::ATOMIC_LOAD_ADD:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMADD64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_AND:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMAND64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_NAND:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMNAND64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_OR:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMOR64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_SUB:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSUB64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_XOR:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMXOR64_DAG);
+ return;
+ case ISD::ATOMIC_SWAP:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG);
+ return;
+ }
+}
+
+const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return NULL;
+ case X86ISD::BSF: return "X86ISD::BSF";
+ case X86ISD::BSR: return "X86ISD::BSR";
+ case X86ISD::SHLD: return "X86ISD::SHLD";
+ case X86ISD::SHRD: return "X86ISD::SHRD";
+ case X86ISD::FAND: return "X86ISD::FAND";
+ case X86ISD::FOR: return "X86ISD::FOR";
+ case X86ISD::FXOR: return "X86ISD::FXOR";
+ case X86ISD::FSRL: return "X86ISD::FSRL";
+ case X86ISD::FILD: return "X86ISD::FILD";
+ case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
+ case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
+ case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
+ case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
+ case X86ISD::FLD: return "X86ISD::FLD";
+ case X86ISD::FST: return "X86ISD::FST";
+ case X86ISD::CALL: return "X86ISD::CALL";
+ case X86ISD::TAILCALL: return "X86ISD::TAILCALL";
+ case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";
+ case X86ISD::BT: return "X86ISD::BT";
+ case X86ISD::CMP: return "X86ISD::CMP";
+ case X86ISD::COMI: return "X86ISD::COMI";
+ case X86ISD::UCOMI: return "X86ISD::UCOMI";
+ case X86ISD::SETCC: return "X86ISD::SETCC";
+ case X86ISD::CMOV: return "X86ISD::CMOV";
+ case X86ISD::BRCOND: return "X86ISD::BRCOND";
+ case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
+ case X86ISD::REP_STOS: return "X86ISD::REP_STOS";
+ case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
+ case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
+ case X86ISD::Wrapper: return "X86ISD::Wrapper";
+ case X86ISD::PEXTRB: return "X86ISD::PEXTRB";
+ case X86ISD::PEXTRW: return "X86ISD::PEXTRW";
+ case X86ISD::INSERTPS: return "X86ISD::INSERTPS";
+ case X86ISD::PINSRB: return "X86ISD::PINSRB";
+ case X86ISD::PINSRW: return "X86ISD::PINSRW";
+ case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
+ case X86ISD::FMAX: return "X86ISD::FMAX";
+ case X86ISD::FMIN: return "X86ISD::FMIN";
+ case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
+ case X86ISD::FRCP: return "X86ISD::FRCP";
+ case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
+ case X86ISD::SegmentBaseAddress: return "X86ISD::SegmentBaseAddress";
+ case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
+ case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
+ case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
+ case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
+ case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
+ case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG";
+ case X86ISD::ATOMSUB64_DAG: return "X86ISD::ATOMSUB64_DAG";
+ case X86ISD::ATOMOR64_DAG: return "X86ISD::ATOMOR64_DAG";
+ case X86ISD::ATOMXOR64_DAG: return "X86ISD::ATOMXOR64_DAG";
+ case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG";
+ case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
+ case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
+ case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
+ case X86ISD::VSHL: return "X86ISD::VSHL";
+ case X86ISD::VSRL: return "X86ISD::VSRL";
+ case X86ISD::CMPPD: return "X86ISD::CMPPD";
+ case X86ISD::CMPPS: return "X86ISD::CMPPS";
+ case X86ISD::PCMPEQB: return "X86ISD::PCMPEQB";
+ case X86ISD::PCMPEQW: return "X86ISD::PCMPEQW";
+ case X86ISD::PCMPEQD: return "X86ISD::PCMPEQD";
+ case X86ISD::PCMPEQQ: return "X86ISD::PCMPEQQ";
+ case X86ISD::PCMPGTB: return "X86ISD::PCMPGTB";
+ case X86ISD::PCMPGTW: return "X86ISD::PCMPGTW";
+ case X86ISD::PCMPGTD: return "X86ISD::PCMPGTD";
+ case X86ISD::PCMPGTQ: return "X86ISD::PCMPGTQ";
+ case X86ISD::ADD: return "X86ISD::ADD";
+ case X86ISD::SUB: return "X86ISD::SUB";
+ case X86ISD::SMUL: return "X86ISD::SMUL";
+ case X86ISD::UMUL: return "X86ISD::UMUL";
+ case X86ISD::INC: return "X86ISD::INC";
+ case X86ISD::DEC: return "X86ISD::DEC";
+ case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
+ }
+}
+
+// isLegalAddressingMode - Return true if the addressing mode represented
+// by AM is legal for this target, for a load/store of the specified type.
+bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const {
+ // X86 supports extremely general addressing modes.
+
+ // X86 allows a sign-extended 32-bit immediate field as a displacement.
+ if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1)
+ return false;
+
+ if (AM.BaseGV) {
+ // We can only fold this if we don't need an extra load.
+ if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false))
+ return false;
+ // If BaseGV requires a register, we cannot also have a BaseReg.
+ if (Subtarget->GVRequiresRegister(AM.BaseGV, getTargetMachine(), false) &&
+ AM.HasBaseReg)
+ return false;
+
+ // X86-64 only supports addr of globals in small code model.
+ if (Subtarget->is64Bit()) {
+ if (getTargetMachine().getCodeModel() != CodeModel::Small)
+ return false;
+ // If lower 4G is not available, then we must use rip-relative addressing.
+ if (AM.BaseOffs || AM.Scale > 1)
+ return false;
+ }
+ }
+
+ switch (AM.Scale) {
+ case 0:
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ // These scales always work.
+ break;
+ case 3:
+ case 5:
+ case 9:
+ // These scales are formed with basereg+scalereg. Only accept if there is
+ // no basereg yet.
+ if (AM.HasBaseReg)
+ return false;
+ break;
+ default: // Other stuff never works.
+ return false;
+ }
+
+ return true;
+}
+
+
+bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
+ if (!Ty1->isInteger() || !Ty2->isInteger())
+ return false;
+ unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
+ unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
+ if (NumBits1 <= NumBits2)
+ return false;
+ return Subtarget->is64Bit() || NumBits1 < 64;
+}
+
+bool X86TargetLowering::isTruncateFree(MVT VT1, MVT VT2) const {
+ if (!VT1.isInteger() || !VT2.isInteger())
+ return false;
+ unsigned NumBits1 = VT1.getSizeInBits();
+ unsigned NumBits2 = VT2.getSizeInBits();
+ if (NumBits1 <= NumBits2)
+ return false;
+ return Subtarget->is64Bit() || NumBits1 < 64;
+}
+
+bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
+ // x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
+ return Ty1 == Type::Int32Ty && Ty2 == Type::Int64Ty && Subtarget->is64Bit();
+}
+
+bool X86TargetLowering::isZExtFree(MVT VT1, MVT VT2) const {
+ // x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
+ return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit();
+}
+
+bool X86TargetLowering::isNarrowingProfitable(MVT VT1, MVT VT2) const {
+ // i16 instructions are longer (0x66 prefix) and potentially slower.
+ return !(VT1 == MVT::i32 && VT2 == MVT::i16);
+}
+
+/// isShuffleMaskLegal - Targets can use this to indicate that they only
+/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
+/// are assumed to be legal.
+bool
+X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
+ MVT VT) const {
+ // Only do shuffles on 128-bit vector types for now.
+ if (VT.getSizeInBits() == 64)
+ return false;
+
+ // FIXME: pshufb, blends, palignr, shifts.
+ return (VT.getVectorNumElements() == 2 ||
+ ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+ isMOVLMask(M, VT) ||
+ isSHUFPMask(M, VT) ||
+ isPSHUFDMask(M, VT) ||
+ isPSHUFHWMask(M, VT) ||
+ isPSHUFLWMask(M, VT) ||
+ isUNPCKLMask(M, VT) ||
+ isUNPCKHMask(M, VT) ||
+ isUNPCKL_v_undef_Mask(M, VT) ||
+ isUNPCKH_v_undef_Mask(M, VT));
+}
+
+bool
+X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
+ MVT VT) const {
+ unsigned NumElts = VT.getVectorNumElements();
+ // FIXME: This collection of masks seems suspect.
+ if (NumElts == 2)
+ return true;
+ if (NumElts == 4 && VT.getSizeInBits() == 128) {
+ return (isMOVLMask(Mask, VT) ||
+ isCommutedMOVLMask(Mask, VT, true) ||
+ isSHUFPMask(Mask, VT) ||
+ isCommutedSHUFPMask(Mask, VT));
+ }
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// X86 Scheduler Hooks
+//===----------------------------------------------------------------------===//
+
+// private utility function
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
+ MachineBasicBlock *MBB,
+ unsigned regOpc,
+ unsigned immOpc,
+ unsigned LoadOpc,
+ unsigned CXchgOpc,
+ unsigned copyOpc,
+ unsigned notOpc,
+ unsigned EAXreg,
+ TargetRegisterClass *RC,
+ bool invSrc) const {
+ // For the atomic bitwise operator, we generate
+ // thisMBB:
+ // newMBB:
+ // ld t1 = [bitinstr.addr]
+ // op t2 = t1, [bitinstr.val]
+ // mov EAX = t1
+ // lcs dest = [bitinstr.addr], t2 [EAX is implicit]
+ // bz newMBB
+ // fallthrough -->nextMBB
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ /// First build the CFG
+ MachineFunction *F = MBB->getParent();
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(MBBIter, newMBB);
+ F->insert(MBBIter, nextMBB);
+
+ // Move all successors to thisMBB to nextMBB
+ nextMBB->transferSuccessors(thisMBB);
+
+ // Update thisMBB to fall through to newMBB
+ thisMBB->addSuccessor(newMBB);
+
+ // newMBB jumps to itself and fall through to nextMBB
+ newMBB->addSuccessor(nextMBB);
+ newMBB->addSuccessor(newMBB);
+
+ // Insert instructions into newMBB based on incoming instruction
+ assert(bInstr->getNumOperands() < X86AddrNumOperands + 4 &&
+ "unexpected number of operands");
+ DebugLoc dl = bInstr->getDebugLoc();
+ MachineOperand& destOper = bInstr->getOperand(0);
+ MachineOperand* argOpers[2 + X86AddrNumOperands];
+ int numArgs = bInstr->getNumOperands() - 1;
+ for (int i=0; i < numArgs; ++i)
+ argOpers[i] = &bInstr->getOperand(i+1);
+
+ // x86 address has 4 operands: base, index, scale, and displacement
+ int lastAddrIndx = X86AddrNumOperands - 1; // [0,3]
+ int valArgIndx = lastAddrIndx + 1;
+
+ unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
+ MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(LoadOpc), t1);
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+
+ unsigned tt = F->getRegInfo().createVirtualRegister(RC);
+ if (invSrc) {
+ MIB = BuildMI(newMBB, dl, TII->get(notOpc), tt).addReg(t1);
+ }
+ else
+ tt = t1;
+
+ unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
+ assert((argOpers[valArgIndx]->isReg() ||
+ argOpers[valArgIndx]->isImm()) &&
+ "invalid operand");
+ if (argOpers[valArgIndx]->isReg())
+ MIB = BuildMI(newMBB, dl, TII->get(regOpc), t2);
+ else
+ MIB = BuildMI(newMBB, dl, TII->get(immOpc), t2);
+ MIB.addReg(tt);
+ (*MIB).addOperand(*argOpers[valArgIndx]);
+
+ MIB = BuildMI(newMBB, dl, TII->get(copyOpc), EAXreg);
+ MIB.addReg(t1);
+
+ MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc));
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ MIB.addReg(t2);
+ assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
+ (*MIB).addMemOperand(*F, *bInstr->memoperands_begin());
+
+ MIB = BuildMI(newMBB, dl, TII->get(copyOpc), destOper.getReg());
+ MIB.addReg(EAXreg);
+
+ // insert branch
+ BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB);
+
+ F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now.
+ return nextMBB;
+}
+
+// private utility function: 64 bit atomics on 32 bit host.
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
+ MachineBasicBlock *MBB,
+ unsigned regOpcL,
+ unsigned regOpcH,
+ unsigned immOpcL,
+ unsigned immOpcH,
+ bool invSrc) const {
+ // For the atomic bitwise operator, we generate
+ // thisMBB (instructions are in pairs, except cmpxchg8b)
+ // ld t1,t2 = [bitinstr.addr]
+ // newMBB:
+ // out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4)
+ // op t5, t6 <- out1, out2, [bitinstr.val]
+ // (for SWAP, substitute: mov t5, t6 <- [bitinstr.val])
+ // mov ECX, EBX <- t5, t6
+ // mov EAX, EDX <- t1, t2
+ // cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit]
+ // mov t3, t4 <- EAX, EDX
+ // bz newMBB
+ // result in out1, out2
+ // fallthrough -->nextMBB
+
+ const TargetRegisterClass *RC = X86::GR32RegisterClass;
+ const unsigned LoadOpc = X86::MOV32rm;
+ const unsigned copyOpc = X86::MOV32rr;
+ const unsigned NotOpc = X86::NOT32r;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ /// First build the CFG
+ MachineFunction *F = MBB->getParent();
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(MBBIter, newMBB);
+ F->insert(MBBIter, nextMBB);
+
+ // Move all successors to thisMBB to nextMBB
+ nextMBB->transferSuccessors(thisMBB);
+
+ // Update thisMBB to fall through to newMBB
+ thisMBB->addSuccessor(newMBB);
+
+ // newMBB jumps to itself and fall through to nextMBB
+ newMBB->addSuccessor(nextMBB);
+ newMBB->addSuccessor(newMBB);
+
+ DebugLoc dl = bInstr->getDebugLoc();
+ // Insert instructions into newMBB based on incoming instruction
+ // There are 8 "real" operands plus 9 implicit def/uses, ignored here.
+ assert(bInstr->getNumOperands() < X86AddrNumOperands + 14 &&
+ "unexpected number of operands");
+ MachineOperand& dest1Oper = bInstr->getOperand(0);
+ MachineOperand& dest2Oper = bInstr->getOperand(1);
+ MachineOperand* argOpers[2 + X86AddrNumOperands];
+ for (int i=0; i < 2 + X86AddrNumOperands; ++i)
+ argOpers[i] = &bInstr->getOperand(i+2);
+
+ // x86 address has 4 operands: base, index, scale, and displacement
+ int lastAddrIndx = X86AddrNumOperands - 1; // [0,3]
+
+ unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
+ MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1);
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
+ MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t2);
+ // add 4 to displacement.
+ for (int i=0; i <= lastAddrIndx-2; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ MachineOperand newOp3 = *(argOpers[3]);
+ if (newOp3.isImm())
+ newOp3.setImm(newOp3.getImm()+4);
+ else
+ newOp3.setOffset(newOp3.getOffset()+4);
+ (*MIB).addOperand(newOp3);
+ (*MIB).addOperand(*argOpers[lastAddrIndx]);
+
+ // t3/4 are defined later, at the bottom of the loop
+ unsigned t3 = F->getRegInfo().createVirtualRegister(RC);
+ unsigned t4 = F->getRegInfo().createVirtualRegister(RC);
+ BuildMI(newMBB, dl, TII->get(X86::PHI), dest1Oper.getReg())
+ .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB);
+ BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg())
+ .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB);
+
+ unsigned tt1 = F->getRegInfo().createVirtualRegister(RC);
+ unsigned tt2 = F->getRegInfo().createVirtualRegister(RC);
+ if (invSrc) {
+ MIB = BuildMI(newMBB, dl, TII->get(NotOpc), tt1).addReg(t1);
+ MIB = BuildMI(newMBB, dl, TII->get(NotOpc), tt2).addReg(t2);
+ } else {
+ tt1 = t1;
+ tt2 = t2;
+ }
+
+ int valArgIndx = lastAddrIndx + 1;
+ assert((argOpers[valArgIndx]->isReg() ||
+ argOpers[valArgIndx]->isImm()) &&
+ "invalid operand");
+ unsigned t5 = F->getRegInfo().createVirtualRegister(RC);
+ unsigned t6 = F->getRegInfo().createVirtualRegister(RC);
+ if (argOpers[valArgIndx]->isReg())
+ MIB = BuildMI(newMBB, dl, TII->get(regOpcL), t5);
+ else
+ MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5);
+ if (regOpcL != X86::MOV32rr)
+ MIB.addReg(tt1);
+ (*MIB).addOperand(*argOpers[valArgIndx]);
+ assert(argOpers[valArgIndx + 1]->isReg() ==
+ argOpers[valArgIndx]->isReg());
+ assert(argOpers[valArgIndx + 1]->isImm() ==
+ argOpers[valArgIndx]->isImm());
+ if (argOpers[valArgIndx + 1]->isReg())
+ MIB = BuildMI(newMBB, dl, TII->get(regOpcH), t6);
+ else
+ MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6);
+ if (regOpcH != X86::MOV32rr)
+ MIB.addReg(tt2);
+ (*MIB).addOperand(*argOpers[valArgIndx + 1]);
+
+ MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EAX);
+ MIB.addReg(t1);
+ MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EDX);
+ MIB.addReg(t2);
+
+ MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EBX);
+ MIB.addReg(t5);
+ MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::ECX);
+ MIB.addReg(t6);
+
+ MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B));
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+
+ assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
+ (*MIB).addMemOperand(*F, *bInstr->memoperands_begin());
+
+ MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t3);
+ MIB.addReg(X86::EAX);
+ MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t4);
+ MIB.addReg(X86::EDX);
+
+ // insert branch
+ BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB);
+
+ F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now.
+ return nextMBB;
+}
+
+// private utility function
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
+ MachineBasicBlock *MBB,
+ unsigned cmovOpc) const {
+ // For the atomic min/max operator, we generate
+ // thisMBB:
+ // newMBB:
+ // ld t1 = [min/max.addr]
+ // mov t2 = [min/max.val]
+ // cmp t1, t2
+ // cmov[cond] t2 = t1
+ // mov EAX = t1
+ // lcs dest = [bitinstr.addr], t2 [EAX is implicit]
+ // bz newMBB
+ // fallthrough -->nextMBB
+ //
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ /// First build the CFG
+ MachineFunction *F = MBB->getParent();
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(MBBIter, newMBB);
+ F->insert(MBBIter, nextMBB);
+
+ // Move all successors to thisMBB to nextMBB
+ nextMBB->transferSuccessors(thisMBB);
+
+ // Update thisMBB to fall through to newMBB
+ thisMBB->addSuccessor(newMBB);
+
+ // newMBB jumps to newMBB and fall through to nextMBB
+ newMBB->addSuccessor(nextMBB);
+ newMBB->addSuccessor(newMBB);
+
+ DebugLoc dl = mInstr->getDebugLoc();
+ // Insert instructions into newMBB based on incoming instruction
+ assert(mInstr->getNumOperands() < X86AddrNumOperands + 4 &&
+ "unexpected number of operands");
+ MachineOperand& destOper = mInstr->getOperand(0);
+ MachineOperand* argOpers[2 + X86AddrNumOperands];
+ int numArgs = mInstr->getNumOperands() - 1;
+ for (int i=0; i < numArgs; ++i)
+ argOpers[i] = &mInstr->getOperand(i+1);
+
+ // x86 address has 4 operands: base, index, scale, and displacement
+ int lastAddrIndx = X86AddrNumOperands - 1; // [0,3]
+ int valArgIndx = lastAddrIndx + 1;
+
+ unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rm), t1);
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+
+ // We only support register and immediate values
+ assert((argOpers[valArgIndx]->isReg() ||
+ argOpers[valArgIndx]->isImm()) &&
+ "invalid operand");
+
+ unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ if (argOpers[valArgIndx]->isReg())
+ MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
+ else
+ MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
+ (*MIB).addOperand(*argOpers[valArgIndx]);
+
+ MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), X86::EAX);
+ MIB.addReg(t1);
+
+ MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr));
+ MIB.addReg(t1);
+ MIB.addReg(t2);
+
+ // Generate movc
+ unsigned t3 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ MIB = BuildMI(newMBB, dl, TII->get(cmovOpc),t3);
+ MIB.addReg(t2);
+ MIB.addReg(t1);
+
+ // Cmp and exchange if none has modified the memory location
+ MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG32));
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ MIB.addReg(t3);
+ assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand");
+ (*MIB).addMemOperand(*F, *mInstr->memoperands_begin());
+
+ MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), destOper.getReg());
+ MIB.addReg(X86::EAX);
+
+ // insert branch
+ BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB);
+
+ F->DeleteMachineInstr(mInstr); // The pseudo instruction is gone now.
+ return nextMBB;
+}
+
+
+MachineBasicBlock *
+X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ switch (MI->getOpcode()) {
+ default: assert(false && "Unexpected instr type to insert");
+ case X86::CMOV_V1I64:
+ case X86::CMOV_FR32:
+ case X86::CMOV_FR64:
+ case X86::CMOV_V4F32:
+ case X86::CMOV_V2F64:
+ case X86::CMOV_V2I64: {
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ unsigned Opc =
+ X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
+ BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+ // Update machine-CFG edges by transferring all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ sinkMBB->transferSuccessors(BB);
+
+ // Add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(BB, dl, TII->get(X86::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return BB;
+ }
+
+ case X86::FP32_TO_INT16_IN_MEM:
+ case X86::FP32_TO_INT32_IN_MEM:
+ case X86::FP32_TO_INT64_IN_MEM:
+ case X86::FP64_TO_INT16_IN_MEM:
+ case X86::FP64_TO_INT32_IN_MEM:
+ case X86::FP64_TO_INT64_IN_MEM:
+ case X86::FP80_TO_INT16_IN_MEM:
+ case X86::FP80_TO_INT32_IN_MEM:
+ case X86::FP80_TO_INT64_IN_MEM: {
+ // Change the floating point control register to use "round towards zero"
+ // mode when truncating to an integer value.
+ MachineFunction *F = BB->getParent();
+ int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
+ addFrameReference(BuildMI(BB, dl, TII->get(X86::FNSTCW16m)), CWFrameIdx);
+
+ // Load the old value of the high byte of the control word...
+ unsigned OldCW =
+ F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass);
+ addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16rm), OldCW),
+ CWFrameIdx);
+
+ // Set the high part to be round to zero...
+ addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16mi)), CWFrameIdx)
+ .addImm(0xC7F);
+
+ // Reload the modified control word now...
+ addFrameReference(BuildMI(BB, dl, TII->get(X86::FLDCW16m)), CWFrameIdx);
+
+ // Restore the memory image of control word to original value
+ addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16mr)), CWFrameIdx)
+ .addReg(OldCW);
+
+ // Get the X86 opcode to use.
+ unsigned Opc;
+ switch (MI->getOpcode()) {
+ default: assert(0 && "illegal opcode!");
+ case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
+ case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
+ case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
+ case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break;
+ case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break;
+ case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break;
+ case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break;
+ case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break;
+ case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break;
+ }
+
+ X86AddressMode AM;
+ MachineOperand &Op = MI->getOperand(0);
+ if (Op.isReg()) {
+ AM.BaseType = X86AddressMode::RegBase;
+ AM.Base.Reg = Op.getReg();
+ } else {
+ AM.BaseType = X86AddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = Op.getIndex();
+ }
+ Op = MI->getOperand(1);
+ if (Op.isImm())
+ AM.Scale = Op.getImm();
+ Op = MI->getOperand(2);
+ if (Op.isImm())
+ AM.IndexReg = Op.getImm();
+ Op = MI->getOperand(3);
+ if (Op.isGlobal()) {
+ AM.GV = Op.getGlobal();
+ } else {
+ AM.Disp = Op.getImm();
+ }
+ addFullAddress(BuildMI(BB, dl, TII->get(Opc)), AM)
+ .addReg(MI->getOperand(X86AddrNumOperands).getReg());
+
+ // Reload the original control word now.
+ addFrameReference(BuildMI(BB, dl, TII->get(X86::FLDCW16m)), CWFrameIdx);
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return BB;
+ }
+ case X86::ATOMAND32:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
+ X86::AND32ri, X86::MOV32rm,
+ X86::LCMPXCHG32, X86::MOV32rr,
+ X86::NOT32r, X86::EAX,
+ X86::GR32RegisterClass);
+ case X86::ATOMOR32:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr,
+ X86::OR32ri, X86::MOV32rm,
+ X86::LCMPXCHG32, X86::MOV32rr,
+ X86::NOT32r, X86::EAX,
+ X86::GR32RegisterClass);
+ case X86::ATOMXOR32:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr,
+ X86::XOR32ri, X86::MOV32rm,
+ X86::LCMPXCHG32, X86::MOV32rr,
+ X86::NOT32r, X86::EAX,
+ X86::GR32RegisterClass);
+ case X86::ATOMNAND32:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
+ X86::AND32ri, X86::MOV32rm,
+ X86::LCMPXCHG32, X86::MOV32rr,
+ X86::NOT32r, X86::EAX,
+ X86::GR32RegisterClass, true);
+ case X86::ATOMMIN32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr);
+ case X86::ATOMMAX32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr);
+ case X86::ATOMUMIN32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr);
+ case X86::ATOMUMAX32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr);
+
+ case X86::ATOMAND16:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
+ X86::AND16ri, X86::MOV16rm,
+ X86::LCMPXCHG16, X86::MOV16rr,
+ X86::NOT16r, X86::AX,
+ X86::GR16RegisterClass);
+ case X86::ATOMOR16:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr,
+ X86::OR16ri, X86::MOV16rm,
+ X86::LCMPXCHG16, X86::MOV16rr,
+ X86::NOT16r, X86::AX,
+ X86::GR16RegisterClass);
+ case X86::ATOMXOR16:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr,
+ X86::XOR16ri, X86::MOV16rm,
+ X86::LCMPXCHG16, X86::MOV16rr,
+ X86::NOT16r, X86::AX,
+ X86::GR16RegisterClass);
+ case X86::ATOMNAND16:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
+ X86::AND16ri, X86::MOV16rm,
+ X86::LCMPXCHG16, X86::MOV16rr,
+ X86::NOT16r, X86::AX,
+ X86::GR16RegisterClass, true);
+ case X86::ATOMMIN16:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr);
+ case X86::ATOMMAX16:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG16rr);
+ case X86::ATOMUMIN16:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB16rr);
+ case X86::ATOMUMAX16:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA16rr);
+
+ case X86::ATOMAND8:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
+ X86::AND8ri, X86::MOV8rm,
+ X86::LCMPXCHG8, X86::MOV8rr,
+ X86::NOT8r, X86::AL,
+ X86::GR8RegisterClass);
+ case X86::ATOMOR8:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr,
+ X86::OR8ri, X86::MOV8rm,
+ X86::LCMPXCHG8, X86::MOV8rr,
+ X86::NOT8r, X86::AL,
+ X86::GR8RegisterClass);
+ case X86::ATOMXOR8:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr,
+ X86::XOR8ri, X86::MOV8rm,
+ X86::LCMPXCHG8, X86::MOV8rr,
+ X86::NOT8r, X86::AL,
+ X86::GR8RegisterClass);
+ case X86::ATOMNAND8:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
+ X86::AND8ri, X86::MOV8rm,
+ X86::LCMPXCHG8, X86::MOV8rr,
+ X86::NOT8r, X86::AL,
+ X86::GR8RegisterClass, true);
+ // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
+ // This group is for 64-bit host.
+ case X86::ATOMAND64:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
+ X86::AND64ri32, X86::MOV64rm,
+ X86::LCMPXCHG64, X86::MOV64rr,
+ X86::NOT64r, X86::RAX,
+ X86::GR64RegisterClass);
+ case X86::ATOMOR64:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr,
+ X86::OR64ri32, X86::MOV64rm,
+ X86::LCMPXCHG64, X86::MOV64rr,
+ X86::NOT64r, X86::RAX,
+ X86::GR64RegisterClass);
+ case X86::ATOMXOR64:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr,
+ X86::XOR64ri32, X86::MOV64rm,
+ X86::LCMPXCHG64, X86::MOV64rr,
+ X86::NOT64r, X86::RAX,
+ X86::GR64RegisterClass);
+ case X86::ATOMNAND64:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
+ X86::AND64ri32, X86::MOV64rm,
+ X86::LCMPXCHG64, X86::MOV64rr,
+ X86::NOT64r, X86::RAX,
+ X86::GR64RegisterClass, true);
+ case X86::ATOMMIN64:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr);
+ case X86::ATOMMAX64:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG64rr);
+ case X86::ATOMUMIN64:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr);
+ case X86::ATOMUMAX64:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr);
+
+ // This group does 64-bit operations on a 32-bit host.
+ case X86::ATOMAND6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::AND32rr, X86::AND32rr,
+ X86::AND32ri, X86::AND32ri,
+ false);
+ case X86::ATOMOR6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::OR32rr, X86::OR32rr,
+ X86::OR32ri, X86::OR32ri,
+ false);
+ case X86::ATOMXOR6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::XOR32rr, X86::XOR32rr,
+ X86::XOR32ri, X86::XOR32ri,
+ false);
+ case X86::ATOMNAND6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::AND32rr, X86::AND32rr,
+ X86::AND32ri, X86::AND32ri,
+ true);
+ case X86::ATOMADD6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::ADD32rr, X86::ADC32rr,
+ X86::ADD32ri, X86::ADC32ri,
+ false);
+ case X86::ATOMSUB6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::SUB32rr, X86::SBB32rr,
+ X86::SUB32ri, X86::SBB32ri,
+ false);
+ case X86::ATOMSWAP6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::MOV32rr, X86::MOV32rr,
+ X86::MOV32ri, X86::MOV32ri,
+ false);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// X86 Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ unsigned Opc = Op.getOpcode();
+ assert((Opc >= ISD::BUILTIN_OP_END ||
+ Opc == ISD::INTRINSIC_WO_CHAIN ||
+ Opc == ISD::INTRINSIC_W_CHAIN ||
+ Opc == ISD::INTRINSIC_VOID) &&
+ "Should use MaskedValueIsZero if you don't know whether Op"
+ " is a target node!");
+
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything.
+ switch (Opc) {
+ default: break;
+ case X86ISD::ADD:
+ case X86ISD::SUB:
+ case X86ISD::SMUL:
+ case X86ISD::UMUL:
+ case X86ISD::INC:
+ case X86ISD::DEC:
+ // These nodes' second result is a boolean.
+ if (Op.getResNo() == 0)
+ break;
+ // Fallthrough
+ case X86ISD::SETCC:
+ KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(),
+ Mask.getBitWidth() - 1);
+ break;
+ }
+}
+
+/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
+/// node is a GlobalAddress + offset.
+bool X86TargetLowering::isGAPlusOffset(SDNode *N,
+ GlobalValue* &GA, int64_t &Offset) const{
+ if (N->getOpcode() == X86ISD::Wrapper) {
+ if (isa<GlobalAddressSDNode>(N->getOperand(0))) {
+ GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
+ Offset = cast<GlobalAddressSDNode>(N->getOperand(0))->getOffset();
+ return true;
+ }
+ }
+ return TargetLowering::isGAPlusOffset(N, GA, Offset);
+}
+
+static bool isBaseAlignmentOfN(unsigned N, SDNode *Base,
+ const TargetLowering &TLI) {
+ GlobalValue *GV;
+ int64_t Offset = 0;
+ if (TLI.isGAPlusOffset(Base, GV, Offset))
+ return (GV->getAlignment() >= N && (Offset % N) == 0);
+ // DAG combine handles the stack object case.
+ return false;
+}
+
+static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
+ MVT EVT, SDNode *&Base,
+ SelectionDAG &DAG, MachineFrameInfo *MFI,
+ const TargetLowering &TLI) {
+ Base = NULL;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ if (N->getMaskElt(i) < 0) {
+ if (!Base)
+ return false;
+ continue;
+ }
+
+ SDValue Elt = DAG.getShuffleScalarElt(N, i);
+ if (!Elt.getNode() ||
+ (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
+ return false;
+ if (!Base) {
+ Base = Elt.getNode();
+ if (Base->getOpcode() == ISD::UNDEF)
+ return false;
+ continue;
+ }
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+
+ if (!TLI.isConsecutiveLoad(Elt.getNode(), Base,
+ EVT.getSizeInBits()/8, i, MFI))
+ return false;
+ }
+ return true;
+}
+
+/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
+/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
+/// if the load addresses are consecutive, non-overlapping, and in the right
+/// order. In the case of v2i64, it will see if it can rewrite the
+/// shuffle to be an appropriate build vector so it can take advantage of
+// performBuildVectorCombine.
+static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ DebugLoc dl = N->getDebugLoc();
+ MVT VT = N->getValueType(0);
+ MVT EVT = VT.getVectorElementType();
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ // For x86-32 machines, if we see an insert and then a shuffle in a v2i64
+ // where the upper half is 0, it is advantageous to rewrite it as a build
+ // vector of (0, val) so it can use movq.
+ if (VT == MVT::v2i64) {
+ SDValue In[2];
+ In[0] = N->getOperand(0);
+ In[1] = N->getOperand(1);
+ int Idx0 = SVN->getMaskElt(0);
+ int Idx1 = SVN->getMaskElt(1);
+ // FIXME: can we take advantage of undef index?
+ if (Idx0 >= 0 && Idx1 >= 0 &&
+ In[Idx0/2].getOpcode() == ISD::INSERT_VECTOR_ELT &&
+ In[Idx1/2].getOpcode() == ISD::BUILD_VECTOR) {
+ ConstantSDNode* InsertVecIdx =
+ dyn_cast<ConstantSDNode>(In[Idx0/2].getOperand(2));
+ if (InsertVecIdx &&
+ InsertVecIdx->getZExtValue() == (unsigned)(Idx0 % 2) &&
+ isZeroNode(In[Idx1/2].getOperand(Idx1 % 2))) {
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
+ In[Idx0/2].getOperand(1),
+ In[Idx1/2].getOperand(Idx1 % 2));
+ }
+ }
+ }
+
+ // Try to combine a vector_shuffle into a 128-bit load.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ SDNode *Base = NULL;
+ if (!EltsFromConsecutiveLoads(SVN, NumElems, EVT, Base, DAG, MFI, TLI))
+ return SDValue();
+
+ LoadSDNode *LD = cast<LoadSDNode>(Base);
+ if (isBaseAlignmentOfN(16, Base->getOperand(1).getNode(), TLI))
+ return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
+ LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->isVolatile());
+ return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
+ LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->isVolatile(), LD->getAlignment());
+}
+
+/// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd.
+static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget,
+ const TargetLowering &TLI) {
+ unsigned NumOps = N->getNumOperands();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Ignore single operand BUILD_VECTOR.
+ if (NumOps == 1)
+ return SDValue();
+
+ MVT VT = N->getValueType(0);
+ MVT EVT = VT.getVectorElementType();
+ if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit())
+ // We are looking for load i64 and zero extend. We want to transform
+ // it before legalizer has a chance to expand it. Also look for i64
+ // BUILD_PAIR bit casted to f64.
+ return SDValue();
+ // This must be an insertion into a zero vector.
+ SDValue HighElt = N->getOperand(1);
+ if (!isZeroNode(HighElt))
+ return SDValue();
+
+ // Value must be a load.
+ SDNode *Base = N->getOperand(0).getNode();
+ if (!isa<LoadSDNode>(Base)) {
+ if (Base->getOpcode() != ISD::BIT_CONVERT)
+ return SDValue();
+ Base = Base->getOperand(0).getNode();
+ if (!isa<LoadSDNode>(Base))
+ return SDValue();
+ }
+
+ // Transform it into VZEXT_LOAD addr.
+ LoadSDNode *LD = cast<LoadSDNode>(Base);
+
+ // Load must not be an extload.
+ if (LD->getExtensionType() != ISD::NON_EXTLOAD)
+ return SDValue();
+
+ // Load type should legal type so we don't have to legalize it.
+ if (!TLI.isTypeLegal(VT))
+ return SDValue();
+
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };
+ SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
+ TargetLowering::TargetLoweringOpt TLO(DAG);
+ TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1));
+ DCI.CommitTargetLoweringOpt(TLO);
+ return ResNode;
+}
+
+/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
+static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ DebugLoc DL = N->getDebugLoc();
+ SDValue Cond = N->getOperand(0);
+ // Get the LHS/RHS of the select.
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+
+ // If we have SSE[12] support, try to form min/max nodes.
+ if (Subtarget->hasSSE2() &&
+ (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) &&
+ Cond.getOpcode() == ISD::SETCC) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+ unsigned Opcode = 0;
+ if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
+ switch (CC) {
+ default: break;
+ case ISD::SETOLE: // (X <= Y) ? X : Y -> min
+ case ISD::SETULE:
+ case ISD::SETLE:
+ if (!UnsafeFPMath) break;
+ // FALL THROUGH.
+ case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min
+ case ISD::SETLT:
+ Opcode = X86ISD::FMIN;
+ break;
+
+ case ISD::SETOGT: // (X > Y) ? X : Y -> max
+ case ISD::SETUGT:
+ case ISD::SETGT:
+ if (!UnsafeFPMath) break;
+ // FALL THROUGH.
+ case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max
+ case ISD::SETGE:
+ Opcode = X86ISD::FMAX;
+ break;
+ }
+ } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
+ switch (CC) {
+ default: break;
+ case ISD::SETOGT: // (X > Y) ? Y : X -> min
+ case ISD::SETUGT:
+ case ISD::SETGT:
+ if (!UnsafeFPMath) break;
+ // FALL THROUGH.
+ case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min
+ case ISD::SETGE:
+ Opcode = X86ISD::FMIN;
+ break;
+
+ case ISD::SETOLE: // (X <= Y) ? Y : X -> max
+ case ISD::SETULE:
+ case ISD::SETLE:
+ if (!UnsafeFPMath) break;
+ // FALL THROUGH.
+ case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max
+ case ISD::SETLT:
+ Opcode = X86ISD::FMAX;
+ break;
+ }
+ }
+
+ if (Opcode)
+ return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
+ }
+
+ // If this is a select between two integer constants, try to do some
+ // optimizations.
+ if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) {
+ if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(RHS))
+ // Don't do this for crazy integer types.
+ if (DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType())) {
+ // If this is efficiently invertible, canonicalize the LHSC/RHSC values
+ // so that TrueC (the true value) is larger than FalseC.
+ bool NeedsCondInvert = false;
+
+ if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) &&
+ // Efficiently invertible.
+ (Cond.getOpcode() == ISD::SETCC || // setcc -> invertible.
+ (Cond.getOpcode() == ISD::XOR && // xor(X, C) -> invertible.
+ isa<ConstantSDNode>(Cond.getOperand(1))))) {
+ NeedsCondInvert = true;
+ std::swap(TrueC, FalseC);
+ }
+
+ // Optimize C ? 8 : 0 -> zext(C) << 3. Likewise for any pow2/0.
+ if (FalseC->getAPIntValue() == 0 &&
+ TrueC->getAPIntValue().isPowerOf2()) {
+ if (NeedsCondInvert) // Invert the condition if needed.
+ Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(1, Cond.getValueType()));
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond);
+
+ unsigned ShAmt = TrueC->getAPIntValue().logBase2();
+ return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond,
+ DAG.getConstant(ShAmt, MVT::i8));
+ }
+
+ // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.
+ if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
+ if (NeedsCondInvert) // Invert the condition if needed.
+ Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(1, Cond.getValueType()));
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ FalseC->getValueType(0), Cond);
+ return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+ }
+
+ // Optimize cases that will turn into an LEA instruction. This requires
+ // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
+ if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
+ uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
+ if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
+
+ bool isFastMultiplier = false;
+ if (Diff < 10) {
+ switch ((unsigned char)Diff) {
+ default: break;
+ case 1: // result = add base, cond
+ case 2: // result = lea base( , cond*2)
+ case 3: // result = lea base(cond, cond*2)
+ case 4: // result = lea base( , cond*4)
+ case 5: // result = lea base(cond, cond*4)
+ case 8: // result = lea base( , cond*8)
+ case 9: // result = lea base(cond, cond*8)
+ isFastMultiplier = true;
+ break;
+ }
+ }
+
+ if (isFastMultiplier) {
+ APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
+ if (NeedsCondInvert) // Invert the condition if needed.
+ Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(1, Cond.getValueType()));
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
+ Cond);
+ // Scale the condition by the difference.
+ if (Diff != 1)
+ Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(Diff, Cond.getValueType()));
+
+ // Add the base if non-zero.
+ if (FalseC->getAPIntValue() != 0)
+ Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+ return Cond;
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
+static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ DebugLoc DL = N->getDebugLoc();
+
+ // If the flag operand isn't dead, don't touch this CMOV.
+ if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty())
+ return SDValue();
+
+ // If this is a select between two integer constants, try to do some
+ // optimizations. Note that the operands are ordered the opposite of SELECT
+ // operands.
+ if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ // Canonicalize the TrueC/FalseC values so that TrueC (the true value) is
+ // larger than FalseC (the false value).
+ X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
+
+ if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
+ CC = X86::GetOppositeBranchCondition(CC);
+ std::swap(TrueC, FalseC);
+ }
+
+ // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0.
+ // This is efficient for any integer data type (including i8/i16) and
+ // shift amount.
+ if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
+ SDValue Cond = N->getOperand(3);
+ Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(CC, MVT::i8), Cond);
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond);
+
+ unsigned ShAmt = TrueC->getAPIntValue().logBase2();
+ Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(ShAmt, MVT::i8));
+ if (N->getNumValues() == 2) // Dead flag value?
+ return DCI.CombineTo(N, Cond, SDValue());
+ return Cond;
+ }
+
+ // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient
+ // for any integer data type, including i8/i16.
+ if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
+ SDValue Cond = N->getOperand(3);
+ Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(CC, MVT::i8), Cond);
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ FalseC->getValueType(0), Cond);
+ Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+
+ if (N->getNumValues() == 2) // Dead flag value?
+ return DCI.CombineTo(N, Cond, SDValue());
+ return Cond;
+ }
+
+ // Optimize cases that will turn into an LEA instruction. This requires
+ // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
+ if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
+ uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
+ if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
+
+ bool isFastMultiplier = false;
+ if (Diff < 10) {
+ switch ((unsigned char)Diff) {
+ default: break;
+ case 1: // result = add base, cond
+ case 2: // result = lea base( , cond*2)
+ case 3: // result = lea base(cond, cond*2)
+ case 4: // result = lea base( , cond*4)
+ case 5: // result = lea base(cond, cond*4)
+ case 8: // result = lea base( , cond*8)
+ case 9: // result = lea base(cond, cond*8)
+ isFastMultiplier = true;
+ break;
+ }
+ }
+
+ if (isFastMultiplier) {
+ APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
+ SDValue Cond = N->getOperand(3);
+ Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(CC, MVT::i8), Cond);
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
+ Cond);
+ // Scale the condition by the difference.
+ if (Diff != 1)
+ Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(Diff, Cond.getValueType()));
+
+ // Add the base if non-zero.
+ if (FalseC->getAPIntValue() != 0)
+ Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+ if (N->getNumValues() == 2) // Dead flag value?
+ return DCI.CombineTo(N, Cond, SDValue());
+ return Cond;
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
+
+/// PerformMulCombine - Optimize a single multiply with constant into two
+/// in order to implement it with two cheaper instructions, e.g.
+/// LEA + SHL, LEA + LEA.
+static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DAG.getMachineFunction().
+ getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ return SDValue();
+
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ MVT VT = N->getValueType(0);
+ if (VT != MVT::i64)
+ return SDValue();
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!C)
+ return SDValue();
+ uint64_t MulAmt = C->getZExtValue();
+ if (isPowerOf2_64(MulAmt) || MulAmt == 3 || MulAmt == 5 || MulAmt == 9)
+ return SDValue();
+
+ uint64_t MulAmt1 = 0;
+ uint64_t MulAmt2 = 0;
+ if ((MulAmt % 9) == 0) {
+ MulAmt1 = 9;
+ MulAmt2 = MulAmt / 9;
+ } else if ((MulAmt % 5) == 0) {
+ MulAmt1 = 5;
+ MulAmt2 = MulAmt / 5;
+ } else if ((MulAmt % 3) == 0) {
+ MulAmt1 = 3;
+ MulAmt2 = MulAmt / 3;
+ }
+ if (MulAmt2 &&
+ (isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){
+ DebugLoc DL = N->getDebugLoc();
+
+ if (isPowerOf2_64(MulAmt2) &&
+ !(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
+ // If second multiplifer is pow2, issue it first. We want the multiply by
+ // 3, 5, or 9 to be folded into the addressing mode unless the lone use
+ // is an add.
+ std::swap(MulAmt1, MulAmt2);
+
+ SDValue NewMul;
+ if (isPowerOf2_64(MulAmt1))
+ NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(Log2_64(MulAmt1), MVT::i8));
+ else
+ NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
+ DAG.getConstant(MulAmt1, VT));
+
+ if (isPowerOf2_64(MulAmt2))
+ NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul,
+ DAG.getConstant(Log2_64(MulAmt2), MVT::i8));
+ else
+ NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
+ DAG.getConstant(MulAmt2, VT));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, NewMul, false);
+ }
+ return SDValue();
+}
+
+
+/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
+/// when possible.
+static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ // On X86 with SSE2 support, we can transform this to a vector shift if
+ // all elements are shifted by the same amount. We can't do this in legalize
+ // because the a constant vector is typically transformed to a constant pool
+ // so we have no knowledge of the shift amount.
+ if (!Subtarget->hasSSE2())
+ return SDValue();
+
+ MVT VT = N->getValueType(0);
+ if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
+ return SDValue();
+
+ SDValue ShAmtOp = N->getOperand(1);
+ MVT EltVT = VT.getVectorElementType();
+ DebugLoc DL = N->getDebugLoc();
+ SDValue BaseShAmt;
+ if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned i = 0;
+ for (; i != NumElts; ++i) {
+ SDValue Arg = ShAmtOp.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ BaseShAmt = Arg;
+ break;
+ }
+ for (; i != NumElts; ++i) {
+ SDValue Arg = ShAmtOp.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ if (Arg != BaseShAmt) {
+ return SDValue();
+ }
+ }
+ } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) {
+ BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
+ DAG.getIntPtrConstant(0));
+ } else
+ return SDValue();
+
+ if (EltVT.bitsGT(MVT::i32))
+ BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt);
+ else if (EltVT.bitsLT(MVT::i32))
+ BaseShAmt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, BaseShAmt);
+
+ // The shift amount is identical so we can do a vector shift.
+ SDValue ValOp = N->getOperand(0);
+ switch (N->getOpcode()) {
+ default:
+ assert(0 && "Unknown shift opcode!");
+ break;
+ case ISD::SHL:
+ if (VT == MVT::v2i64)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+ ValOp, BaseShAmt);
+ if (VT == MVT::v4i32)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
+ ValOp, BaseShAmt);
+ if (VT == MVT::v8i16)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
+ ValOp, BaseShAmt);
+ break;
+ case ISD::SRA:
+ if (VT == MVT::v4i32)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32),
+ ValOp, BaseShAmt);
+ if (VT == MVT::v8i16)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
+ ValOp, BaseShAmt);
+ break;
+ case ISD::SRL:
+ if (VT == MVT::v2i64)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+ ValOp, BaseShAmt);
+ if (VT == MVT::v4i32)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32),
+ ValOp, BaseShAmt);
+ if (VT == MVT::v8i16)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
+ ValOp, BaseShAmt);
+ break;
+ }
+ return SDValue();
+}
+
+/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
+static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ // Turn load->store of MMX types into GPR load/stores. This avoids clobbering
+ // the FP state in cases where an emms may be missing.
+ // A preferable solution to the general problem is to figure out the right
+ // places to insert EMMS. This qualifies as a quick hack.
+
+ // Similarly, turn load->store of i64 into double load/stores in 32-bit mode.
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ MVT VT = St->getValue().getValueType();
+ if (VT.getSizeInBits() != 64)
+ return SDValue();
+
+ bool F64IsLegal = !UseSoftFloat && !NoImplicitFloat && Subtarget->hasSSE2();
+ if ((VT.isVector() ||
+ (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
+ isa<LoadSDNode>(St->getValue()) &&
+ !cast<LoadSDNode>(St->getValue())->isVolatile() &&
+ St->getChain().hasOneUse() && !St->isVolatile()) {
+ SDNode* LdVal = St->getValue().getNode();
+ LoadSDNode *Ld = 0;
+ int TokenFactorIndex = -1;
+ SmallVector<SDValue, 8> Ops;
+ SDNode* ChainVal = St->getChain().getNode();
+ // Must be a store of a load. We currently handle two cases: the load
+ // is a direct child, and it's under an intervening TokenFactor. It is
+ // possible to dig deeper under nested TokenFactors.
+ if (ChainVal == LdVal)
+ Ld = cast<LoadSDNode>(St->getChain());
+ else if (St->getValue().hasOneUse() &&
+ ChainVal->getOpcode() == ISD::TokenFactor) {
+ for (unsigned i=0, e = ChainVal->getNumOperands(); i != e; ++i) {
+ if (ChainVal->getOperand(i).getNode() == LdVal) {
+ TokenFactorIndex = i;
+ Ld = cast<LoadSDNode>(St->getValue());
+ } else
+ Ops.push_back(ChainVal->getOperand(i));
+ }
+ }
+
+ if (!Ld || !ISD::isNormalLoad(Ld))
+ return SDValue();
+
+ // If this is not the MMX case, i.e. we are just turning i64 load/store
+ // into f64 load/store, avoid the transformation if there are multiple
+ // uses of the loaded value.
+ if (!VT.isVector() && !Ld->hasNUsesOfValue(1, 0))
+ return SDValue();
+
+ DebugLoc LdDL = Ld->getDebugLoc();
+ DebugLoc StDL = N->getDebugLoc();
+ // If we are a 64-bit capable x86, lower to a single movq load/store pair.
+ // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
+ // pair instead.
+ if (Subtarget->is64Bit() || F64IsLegal) {
+ MVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
+ SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(),
+ Ld->getBasePtr(), Ld->getSrcValue(),
+ Ld->getSrcValueOffset(), Ld->isVolatile(),
+ Ld->getAlignment());
+ SDValue NewChain = NewLd.getValue(1);
+ if (TokenFactorIndex != -1) {
+ Ops.push_back(NewChain);
+ NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
+ Ops.size());
+ }
+ return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
+ St->getSrcValue(), St->getSrcValueOffset(),
+ St->isVolatile(), St->getAlignment());
+ }
+
+ // Otherwise, lower to two pairs of 32-bit loads / stores.
+ SDValue LoAddr = Ld->getBasePtr();
+ SDValue HiAddr = DAG.getNode(ISD::ADD, LdDL, MVT::i32, LoAddr,
+ DAG.getConstant(4, MVT::i32));
+
+ SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
+ Ld->getSrcValue(), Ld->getSrcValueOffset(),
+ Ld->isVolatile(), Ld->getAlignment());
+ SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
+ Ld->getSrcValue(), Ld->getSrcValueOffset()+4,
+ Ld->isVolatile(),
+ MinAlign(Ld->getAlignment(), 4));
+
+ SDValue NewChain = LoLd.getValue(1);
+ if (TokenFactorIndex != -1) {
+ Ops.push_back(LoLd);
+ Ops.push_back(HiLd);
+ NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
+ Ops.size());
+ }
+
+ LoAddr = St->getBasePtr();
+ HiAddr = DAG.getNode(ISD::ADD, StDL, MVT::i32, LoAddr,
+ DAG.getConstant(4, MVT::i32));
+
+ SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr,
+ St->getSrcValue(), St->getSrcValueOffset(),
+ St->isVolatile(), St->getAlignment());
+ SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr,
+ St->getSrcValue(),
+ St->getSrcValueOffset() + 4,
+ St->isVolatile(),
+ MinAlign(St->getAlignment(), 4));
+ return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt);
+ }
+ return SDValue();
+}
+
+/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and
+/// X86ISD::FXOR nodes.
+static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
+ // F[X]OR(0.0, x) -> x
+ // F[X]OR(x, 0.0) -> x
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(1);
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(0);
+ return SDValue();
+}
+
+/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes.
+static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
+ // FAND(0.0, x) -> 0.0
+ // FAND(x, 0.0) -> 0.0
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(0);
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(1);
+ return SDValue();
+}
+
+static SDValue PerformBTCombine(SDNode *N,
+ SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // BT ignores high bits in the bit index operand.
+ SDValue Op1 = N->getOperand(1);
+ if (Op1.hasOneUse()) {
+ unsigned BitWidth = Op1.getValueSizeInBits();
+ APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth));
+ APInt KnownZero, KnownOne;
+ TargetLowering::TargetLoweringOpt TLO(DAG);
+ TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) ||
+ TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO))
+ DCI.CommitTargetLoweringOpt(TLO);
+ }
+ return SDValue();
+}
+
+SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
+ case ISD::BUILD_VECTOR:
+ return PerformBuildVectorCombine(N, DAG, DCI, Subtarget, *this);
+ case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
+ case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
+ case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget);
+ case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
+ case X86ISD::FXOR:
+ case X86ISD::FOR: return PerformFORCombine(N, DAG);
+ case X86ISD::FAND: return PerformFANDCombine(N, DAG);
+ case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
+ }
+
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// X86 Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+X86TargetLowering::ConstraintType
+X86TargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'A':
+ return C_Register;
+ case 'f':
+ case 'r':
+ case 'R':
+ case 'l':
+ case 'q':
+ case 'Q':
+ case 'x':
+ case 'y':
+ case 'Y':
+ return C_RegisterClass;
+ case 'e':
+ case 'Z':
+ return C_Other;
+ default:
+ break;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// LowerXConstraint - try to replace an X constraint, which matches anything,
+/// with another that has more specific requirements based on the type of the
+/// corresponding operand.
+const char *X86TargetLowering::
+LowerXConstraint(MVT ConstraintVT) const {
+ // FP X constraints get lowered to SSE1/2 registers if available, otherwise
+ // 'f' like normal targets.
+ if (ConstraintVT.isFloatingPoint()) {
+ if (Subtarget->hasSSE2())
+ return "Y";
+ if (Subtarget->hasSSE1())
+ return "x";
+ }
+
+ return TargetLowering::LowerXConstraint(ConstraintVT);
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ char Constraint,
+ bool hasMemory,
+ std::vector<SDValue>&Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result(0, 0);
+
+ switch (Constraint) {
+ default: break;
+ case 'I':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (C->getZExtValue() <= 31) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ return;
+ case 'J':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (C->getZExtValue() <= 63) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ return;
+ case 'N':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (C->getZExtValue() <= 255) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ return;
+ case 'e': {
+ // 32-bit signed value
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ const ConstantInt *CI = C->getConstantIntValue();
+ if (CI->isValueValidForType(Type::Int32Ty, C->getSExtValue())) {
+ // Widen to 64 bits here to get it sign extended.
+ Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64);
+ break;
+ }
+ // FIXME gcc accepts some relocatable values here too, but only in certain
+ // memory models; it's complicated.
+ }
+ return;
+ }
+ case 'Z': {
+ // 32-bit unsigned value
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ const ConstantInt *CI = C->getConstantIntValue();
+ if (CI->isValueValidForType(Type::Int32Ty, C->getZExtValue())) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ // FIXME gcc accepts some relocatable values here too, but only in certain
+ // memory models; it's complicated.
+ return;
+ }
+ case 'i': {
+ // Literal immediates are always ok.
+ if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) {
+ // Widen to 64 bits here to get it sign extended.
+ Result = DAG.getTargetConstant(CST->getSExtValue(), MVT::i64);
+ break;
+ }
+
+ // If we are in non-pic codegen mode, we allow the address of a global (with
+ // an optional displacement) to be used with 'i'.
+ GlobalAddressSDNode *GA = 0;
+ int64_t Offset = 0;
+
+ // Match either (GA), (GA+C), (GA+C1+C2), etc.
+ while (1) {
+ if ((GA = dyn_cast<GlobalAddressSDNode>(Op))) {
+ Offset += GA->getOffset();
+ break;
+ } else if (Op.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ Offset += C->getZExtValue();
+ Op = Op.getOperand(0);
+ continue;
+ }
+ } else if (Op.getOpcode() == ISD::SUB) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ Offset += -C->getZExtValue();
+ Op = Op.getOperand(0);
+ continue;
+ }
+ }
+
+ // Otherwise, this isn't something we can handle, reject it.
+ return;
+ }
+
+ if (hasMemory)
+ Op = LowerGlobalAddress(GA->getGlobal(), Op.getDebugLoc(), Offset, DAG);
+ else
+ Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
+ Offset);
+ Result = Op;
+ break;
+ }
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+ return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory,
+ Ops, DAG);
+}
+
+std::vector<unsigned> X86TargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const {
+ if (Constraint.size() == 1) {
+ // FIXME: not handling fp-stack yet!
+ switch (Constraint[0]) { // GCC X86 Constraint Letters
+ default: break; // Unknown constraint letter
+ case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode)
+ case 'Q': // Q_REGS
+ if (VT == MVT::i32)
+ return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
+ else if (VT == MVT::i16)
+ return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
+ else if (VT == MVT::i8)
+ return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0);
+ else if (VT == MVT::i64)
+ return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0);
+ break;
+ }
+ }
+
+ return std::vector<unsigned>();
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const {
+ // First, see if this is a constraint that directly corresponds to an LLVM
+ // register class.
+ if (Constraint.size() == 1) {
+ // GCC Constraint Letters
+ switch (Constraint[0]) {
+ default: break;
+ case 'r': // GENERAL_REGS
+ case 'R': // LEGACY_REGS
+ case 'l': // INDEX_REGS
+ if (VT == MVT::i8)
+ return std::make_pair(0U, X86::GR8RegisterClass);
+ if (VT == MVT::i16)
+ return std::make_pair(0U, X86::GR16RegisterClass);
+ if (VT == MVT::i32 || !Subtarget->is64Bit())
+ return std::make_pair(0U, X86::GR32RegisterClass);
+ return std::make_pair(0U, X86::GR64RegisterClass);
+ case 'f': // FP Stack registers.
+ // If SSE is enabled for this VT, use f80 to ensure the isel moves the
+ // value to the correct fpstack register class.
+ if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT))
+ return std::make_pair(0U, X86::RFP32RegisterClass);
+ if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT))
+ return std::make_pair(0U, X86::RFP64RegisterClass);
+ return std::make_pair(0U, X86::RFP80RegisterClass);
+ case 'y': // MMX_REGS if MMX allowed.
+ if (!Subtarget->hasMMX()) break;
+ return std::make_pair(0U, X86::VR64RegisterClass);
+ case 'Y': // SSE_REGS if SSE2 allowed
+ if (!Subtarget->hasSSE2()) break;
+ // FALL THROUGH.
+ case 'x': // SSE_REGS if SSE1 allowed
+ if (!Subtarget->hasSSE1()) break;
+
+ switch (VT.getSimpleVT()) {
+ default: break;
+ // Scalar SSE types.
+ case MVT::f32:
+ case MVT::i32:
+ return std::make_pair(0U, X86::FR32RegisterClass);
+ case MVT::f64:
+ case MVT::i64:
+ return std::make_pair(0U, X86::FR64RegisterClass);
+ // Vector types.
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ return std::make_pair(0U, X86::VR128RegisterClass);
+ }
+ break;
+ }
+ }
+
+ // Use the default implementation in TargetLowering to convert the register
+ // constraint into a member of a register class.
+ std::pair<unsigned, const TargetRegisterClass*> Res;
+ Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+
+ // Not found as a standard register?
+ if (Res.second == 0) {
+ // GCC calls "st(0)" just plain "st".
+ if (StringsEqualNoCase("{st}", Constraint)) {
+ Res.first = X86::ST0;
+ Res.second = X86::RFP80RegisterClass;
+ }
+ // 'A' means EAX + EDX.
+ if (Constraint == "A") {
+ Res.first = X86::EAX;
+ Res.second = X86::GRADRegisterClass;
+ }
+ return Res;
+ }
+
+ // Otherwise, check to see if this is a register class of the wrong value
+ // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to
+ // turn into {ax},{dx}.
+ if (Res.second->hasType(VT))
+ return Res; // Correct type already, nothing to do.
+
+ // All of the single-register GCC register classes map their values onto
+ // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we
+ // really want an 8-bit or 32-bit register, map to the appropriate register
+ // class and return the appropriate register.
+ if (Res.second == X86::GR16RegisterClass) {
+ if (VT == MVT::i8) {
+ unsigned DestReg = 0;
+ switch (Res.first) {
+ default: break;
+ case X86::AX: DestReg = X86::AL; break;
+ case X86::DX: DestReg = X86::DL; break;
+ case X86::CX: DestReg = X86::CL; break;
+ case X86::BX: DestReg = X86::BL; break;
+ }
+ if (DestReg) {
+ Res.first = DestReg;
+ Res.second = X86::GR8RegisterClass;
+ }
+ } else if (VT == MVT::i32) {
+ unsigned DestReg = 0;
+ switch (Res.first) {
+ default: break;
+ case X86::AX: DestReg = X86::EAX; break;
+ case X86::DX: DestReg = X86::EDX; break;
+ case X86::CX: DestReg = X86::ECX; break;
+ case X86::BX: DestReg = X86::EBX; break;
+ case X86::SI: DestReg = X86::ESI; break;
+ case X86::DI: DestReg = X86::EDI; break;
+ case X86::BP: DestReg = X86::EBP; break;
+ case X86::SP: DestReg = X86::ESP; break;
+ }
+ if (DestReg) {
+ Res.first = DestReg;
+ Res.second = X86::GR32RegisterClass;
+ }
+ } else if (VT == MVT::i64) {
+ unsigned DestReg = 0;
+ switch (Res.first) {
+ default: break;
+ case X86::AX: DestReg = X86::RAX; break;
+ case X86::DX: DestReg = X86::RDX; break;
+ case X86::CX: DestReg = X86::RCX; break;
+ case X86::BX: DestReg = X86::RBX; break;
+ case X86::SI: DestReg = X86::RSI; break;
+ case X86::DI: DestReg = X86::RDI; break;
+ case X86::BP: DestReg = X86::RBP; break;
+ case X86::SP: DestReg = X86::RSP; break;
+ }
+ if (DestReg) {
+ Res.first = DestReg;
+ Res.second = X86::GR64RegisterClass;
+ }
+ }
+ } else if (Res.second == X86::FR32RegisterClass ||
+ Res.second == X86::FR64RegisterClass ||
+ Res.second == X86::VR128RegisterClass) {
+ // Handle references to XMM physical registers that got mapped into the
+ // wrong class. This can happen with constraints like {xmm0} where the
+ // target independent register mapper will just pick the first match it can
+ // find, ignoring the required type.
+ if (VT == MVT::f32)
+ Res.second = X86::FR32RegisterClass;
+ else if (VT == MVT::f64)
+ Res.second = X86::FR64RegisterClass;
+ else if (X86::VR128RegisterClass->hasType(VT))
+ Res.second = X86::VR128RegisterClass;
+ }
+
+ return Res;
+}
+
+//===----------------------------------------------------------------------===//
+// X86 Widen vector type
+//===----------------------------------------------------------------------===//
+
+/// getWidenVectorType: given a vector type, returns the type to widen
+/// to (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself.
+/// If there is no vector type that we want to widen to, returns MVT::Other
+/// When and where to widen is target dependent based on the cost of
+/// scalarizing vs using the wider vector type.
+
+MVT X86TargetLowering::getWidenVectorType(MVT VT) const {
+ assert(VT.isVector());
+ if (isTypeLegal(VT))
+ return VT;
+
+ // TODO: In computeRegisterProperty, we can compute the list of legal vector
+ // type based on element type. This would speed up our search (though
+ // it may not be worth it since the size of the list is relatively
+ // small).
+ MVT EltVT = VT.getVectorElementType();
+ unsigned NElts = VT.getVectorNumElements();
+
+ // On X86, it make sense to widen any vector wider than 1
+ if (NElts <= 1)
+ return MVT::Other;
+
+ for (unsigned nVT = MVT::FIRST_VECTOR_VALUETYPE;
+ nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ MVT SVT = (MVT::SimpleValueType)nVT;
+
+ if (isTypeLegal(SVT) &&
+ SVT.getVectorElementType() == EltVT &&
+ SVT.getVectorNumElements() > NElts)
+ return SVT;
+ }
+ return MVT::Other;
+}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
new file mode 100644
index 0000000..550f8bd
--- /dev/null
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -0,0 +1,705 @@
+//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that X86 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86ISELLOWERING_H
+#define X86ISELLOWERING_H
+
+#include "X86Subtarget.h"
+#include "X86RegisterInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+
+namespace llvm {
+ namespace X86ISD {
+ // X86 Specific DAG Nodes
+ enum NodeType {
+ // Start the numbering where the builtin ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// BSF - Bit scan forward.
+ /// BSR - Bit scan reverse.
+ BSF,
+ BSR,
+
+ /// SHLD, SHRD - Double shift instructions. These correspond to
+ /// X86::SHLDxx and X86::SHRDxx instructions.
+ SHLD,
+ SHRD,
+
+ /// FAND - Bitwise logical AND of floating point values. This corresponds
+ /// to X86::ANDPS or X86::ANDPD.
+ FAND,
+
+ /// FOR - Bitwise logical OR of floating point values. This corresponds
+ /// to X86::ORPS or X86::ORPD.
+ FOR,
+
+ /// FXOR - Bitwise logical XOR of floating point values. This corresponds
+ /// to X86::XORPS or X86::XORPD.
+ FXOR,
+
+ /// FSRL - Bitwise logical right shift of floating point values. These
+ /// corresponds to X86::PSRLDQ.
+ FSRL,
+
+ /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the
+ /// integer source in memory and FP reg result. This corresponds to the
+ /// X86::FILD*m instructions. It has three inputs (token chain, address,
+ /// and source type) and two outputs (FP value and token chain). FILD_FLAG
+ /// also produces a flag).
+ FILD,
+ FILD_FLAG,
+
+ /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the
+ /// integer destination in memory and a FP reg source. This corresponds
+ /// to the X86::FIST*m instructions and the rounding mode change stuff. It
+ /// has two inputs (token chain and address) and two outputs (int value
+ /// and token chain).
+ FP_TO_INT16_IN_MEM,
+ FP_TO_INT32_IN_MEM,
+ FP_TO_INT64_IN_MEM,
+
+ /// FLD - This instruction implements an extending load to FP stack slots.
+ /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
+ /// operand, ptr to load from, and a ValueType node indicating the type
+ /// to load to.
+ FLD,
+
+ /// FST - This instruction implements a truncating store to FP stack
+ /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
+ /// chain operand, value to store, address, and a ValueType to store it
+ /// as.
+ FST,
+
+ /// CALL/TAILCALL - These operations represent an abstract X86 call
+ /// instruction, which includes a bunch of information. In particular the
+ /// operands of these node are:
+ ///
+ /// #0 - The incoming token chain
+ /// #1 - The callee
+ /// #2 - The number of arg bytes the caller pushes on the stack.
+ /// #3 - The number of arg bytes the callee pops off the stack.
+ /// #4 - The value to pass in AL/AX/EAX (optional)
+ /// #5 - The value to pass in DL/DX/EDX (optional)
+ ///
+ /// The result values of these nodes are:
+ ///
+ /// #0 - The outgoing token chain
+ /// #1 - The first register result value (optional)
+ /// #2 - The second register result value (optional)
+ ///
+ /// The CALL vs TAILCALL distinction boils down to whether the callee is
+ /// known not to modify the caller's stack frame, as is standard with
+ /// LLVM.
+ CALL,
+ TAILCALL,
+
+ /// RDTSC_DAG - This operation implements the lowering for
+ /// readcyclecounter
+ RDTSC_DAG,
+
+ /// X86 compare and logical compare instructions.
+ CMP, COMI, UCOMI,
+
+ /// X86 bit-test instructions.
+ BT,
+
+ /// X86 SetCC. Operand 0 is condition code, and operand 1 is the flag
+ /// operand produced by a CMP instruction.
+ SETCC,
+
+ /// X86 conditional moves. Operand 0 and operand 1 are the two values
+ /// to select from. Operand 2 is the condition code, and operand 3 is the
+ /// flag operand produced by a CMP or TEST instruction. It also writes a
+ /// flag result.
+ CMOV,
+
+ /// X86 conditional branches. Operand 0 is the chain operand, operand 1
+ /// is the block to branch if condition is true, operand 2 is the
+ /// condition code, and operand 3 is the flag operand produced by a CMP
+ /// or TEST instruction.
+ BRCOND,
+
+ /// Return with a flag operand. Operand 0 is the chain operand, operand
+ /// 1 is the number of bytes of stack to pop.
+ RET_FLAG,
+
+ /// REP_STOS - Repeat fill, corresponds to X86::REP_STOSx.
+ REP_STOS,
+
+ /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx.
+ REP_MOVS,
+
+ /// GlobalBaseReg - On Darwin, this node represents the result of the popl
+ /// at function entry, used for PIC code.
+ GlobalBaseReg,
+
+ /// Wrapper - A wrapper node for TargetConstantPool,
+ /// TargetExternalSymbol, and TargetGlobalAddress.
+ Wrapper,
+
+ /// WrapperRIP - Special wrapper used under X86-64 PIC mode for RIP
+ /// relative displacements.
+ WrapperRIP,
+
+ /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
+ /// i32, corresponds to X86::PEXTRB.
+ PEXTRB,
+
+ /// PEXTRW - Extract a 16-bit value from a vector and zero extend it to
+ /// i32, corresponds to X86::PEXTRW.
+ PEXTRW,
+
+ /// INSERTPS - Insert any element of a 4 x float vector into any element
+ /// of a destination 4 x floatvector.
+ INSERTPS,
+
+ /// PINSRB - Insert the lower 8-bits of a 32-bit value to a vector,
+ /// corresponds to X86::PINSRB.
+ PINSRB,
+
+ /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector,
+ /// corresponds to X86::PINSRW.
+ PINSRW,
+
+ /// PSHUFB - Shuffle 16 8-bit values within a vector.
+ PSHUFB,
+
+ /// FMAX, FMIN - Floating point max and min.
+ ///
+ FMAX, FMIN,
+
+ /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal
+ /// approximation. Note that these typically require refinement
+ /// in order to obtain suitable precision.
+ FRSQRT, FRCP,
+
+ // TLSADDR - Thread Local Storage.
+ TLSADDR,
+
+ // SegmentBaseAddress - The address segment:0
+ SegmentBaseAddress,
+
+ // EH_RETURN - Exception Handling helpers.
+ EH_RETURN,
+
+ /// TC_RETURN - Tail call return.
+ /// operand #0 chain
+ /// operand #1 callee (register or absolute)
+ /// operand #2 stack adjustment
+ /// operand #3 optional in flag
+ TC_RETURN,
+
+ // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap.
+ LCMPXCHG_DAG,
+ LCMPXCHG8_DAG,
+
+ // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
+ // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
+ // Atomic 64-bit binary operations.
+ ATOMADD64_DAG,
+ ATOMSUB64_DAG,
+ ATOMOR64_DAG,
+ ATOMXOR64_DAG,
+ ATOMAND64_DAG,
+ ATOMNAND64_DAG,
+ ATOMSWAP64_DAG,
+
+ // FNSTCW16m - Store FP control world into i16 memory.
+ FNSTCW16m,
+
+ // VZEXT_MOVL - Vector move low and zero extend.
+ VZEXT_MOVL,
+
+ // VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
+ VZEXT_LOAD,
+
+ // VSHL, VSRL - Vector logical left / right shift.
+ VSHL, VSRL,
+
+ // CMPPD, CMPPS - Vector double/float comparison.
+ // CMPPD, CMPPS - Vector double/float comparison.
+ CMPPD, CMPPS,
+
+ // PCMP* - Vector integer comparisons.
+ PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ,
+ PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ,
+
+ // ADD, SUB, SMUL, UMUL, etc. - Arithmetic operations with FLAGS results.
+ ADD, SUB, SMUL, UMUL,
+ INC, DEC,
+
+ // MUL_IMM - X86 specific multiply by immediate.
+ MUL_IMM
+ };
+ }
+
+ /// Define some predicates that are used for node matching.
+ namespace X86 {
+ /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+ bool isPSHUFDMask(ShuffleVectorSDNode *N);
+
+ /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+ bool isPSHUFHWMask(ShuffleVectorSDNode *N);
+
+ /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+ bool isPSHUFLWMask(ShuffleVectorSDNode *N);
+
+ /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to SHUFP*.
+ bool isSHUFPMask(ShuffleVectorSDNode *N);
+
+ /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
+ bool isMOVHLPSMask(ShuffleVectorSDNode *N);
+
+ /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
+ /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
+ /// <2, 3, 2, 3>
+ bool isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N);
+
+ /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for MOVLP{S|D}.
+ bool isMOVLPMask(ShuffleVectorSDNode *N);
+
+ /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for MOVHP{S|D}.
+ /// as well as MOVLHPS.
+ bool isMOVHPMask(ShuffleVectorSDNode *N);
+
+ /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to UNPCKL.
+ bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
+
+ /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to UNPCKH.
+ bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
+
+ /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
+ /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
+ /// <0, 0, 1, 1>
+ bool isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N);
+
+ /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
+ /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
+ /// <2, 2, 3, 3>
+ bool isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N);
+
+ /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVSS,
+ /// MOVSD, and MOVD, i.e. setting the lowest element.
+ bool isMOVLMask(ShuffleVectorSDNode *N);
+
+ /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
+ bool isMOVSHDUPMask(ShuffleVectorSDNode *N);
+
+ /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
+ bool isMOVSLDUPMask(ShuffleVectorSDNode *N);
+
+ /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVDDUP.
+ bool isMOVDDUPMask(ShuffleVectorSDNode *N);
+
+ /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
+ /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
+ /// instructions.
+ unsigned getShuffleSHUFImmediate(SDNode *N);
+
+ /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
+ /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
+ /// instructions.
+ unsigned getShufflePSHUFHWImmediate(SDNode *N);
+
+ /// getShufflePSHUFKWImmediate - Return the appropriate immediate to shuffle
+ /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
+ /// instructions.
+ unsigned getShufflePSHUFLWImmediate(SDNode *N);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // X86TargetLowering - X86 Implementation of the TargetLowering interface
+ class X86TargetLowering : public TargetLowering {
+ int VarArgsFrameIndex; // FrameIndex for start of varargs area.
+ int RegSaveFrameIndex; // X86-64 vararg func register save area.
+ unsigned VarArgsGPOffset; // X86-64 vararg func int reg offset.
+ unsigned VarArgsFPOffset; // X86-64 vararg func fp reg offset.
+ int BytesToPopOnReturn; // Number of arg bytes ret should pop.
+ int BytesCallerReserves; // Number of arg bytes caller makes.
+
+ public:
+ explicit X86TargetLowering(X86TargetMachine &TM);
+
+ /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
+ /// jumptable.
+ SDValue getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const;
+
+ // Return the number of bytes that a function should pop when it returns (in
+ // addition to the space used by the return address).
+ //
+ unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
+
+ // Return the number of bytes that the caller reserves for arguments passed
+ // to this function.
+ unsigned getBytesCallerReserves() const { return BytesCallerReserves; }
+
+ /// getStackPtrReg - Return the stack pointer register we are using: either
+ /// ESP or RSP.
+ unsigned getStackPtrReg() const { return X86StackPtr; }
+
+ /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+ /// function arguments in the caller parameter area. For X86, aggregates
+ /// that contains are placed at 16-byte boundaries while the rest are at
+ /// 4-byte boundaries.
+ virtual unsigned getByValTypeAlignment(const Type *Ty) const;
+
+ /// getOptimalMemOpType - Returns the target specific optimal type for load
+ /// and store operations as a result of memset, memcpy, and memmove
+ /// lowering. It returns MVT::iAny if SelectionDAG should be responsible for
+ /// determining it.
+ virtual
+ MVT getOptimalMemOpType(uint64_t Size, unsigned Align,
+ bool isSrcConst, bool isSrcStr) const;
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ ///
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG);
+
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType - Return the ISD::SETCC ValueType
+ virtual MVT getSetCCResultType(MVT VT) const;
+
+ /// computeMaskedBitsForTargetNode - Determine which of the bits specified
+ /// in Mask are known to be either zero or one and return them in the
+ /// KnownZero/KnownOne bitsets.
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual bool
+ isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) const;
+
+ SDValue getReturnAddressFrameIndex(SelectionDAG &DAG);
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ std::vector<unsigned>
+ getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const;
+
+ virtual const char *LowerXConstraint(MVT ConstraintVT) const;
+
+ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+ /// vector. If it is invalid, don't add anything to Ops. If hasMemory is
+ /// true it means one of the asm constraint of the inline asm instruction
+ /// being processed is 'm'.
+ virtual void LowerAsmOperandForConstraint(SDValue Op,
+ char ConstraintLetter,
+ bool hasMemory,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ /// getRegForInlineAsmConstraint - Given a physical register constraint
+ /// (e.g. {edx}), return the register number and the register class for the
+ /// register. This should only be used for C_Register constraints. On
+ /// error, this returns a register number of 0.
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const;
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+
+ /// isTruncateFree - Return true if it's free to truncate a value of
+ /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
+ /// register EAX to i16 by referencing its sub-register AX.
+ virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const;
+ virtual bool isTruncateFree(MVT VT1, MVT VT2) const;
+
+ /// isZExtFree - Return true if any actual instruction that defines a
+ /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
+ /// register. This does not necessarily include registers defined in
+ /// unknown ways, such as incoming arguments, or copies from unknown
+ /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
+ /// does not necessarily apply to truncate instructions. e.g. on x86-64,
+ /// all instructions that define 32-bit values implicit zero-extend the
+ /// result out to 64 bits.
+ virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const;
+ virtual bool isZExtFree(MVT VT1, MVT VT2) const;
+
+ /// isNarrowingProfitable - Return true if it's profitable to narrow
+ /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
+ /// from i32 to i8 but not from i32 to i16.
+ virtual bool isNarrowingProfitable(MVT VT1, MVT VT2) const;
+
+ /// isShuffleMaskLegal - Targets can use this to indicate that they only
+ /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+ /// By default, if a target supports the VECTOR_SHUFFLE node, all mask
+ /// values are assumed to be legal.
+ virtual bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask,
+ MVT VT) const;
+
+ /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is
+ /// used by Targets can use this to indicate if there is a suitable
+ /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
+ /// pool entry.
+ virtual bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
+ MVT VT) const;
+
+ /// ShouldShrinkFPConstant - If true, then instruction selection should
+ /// seek to shrink the FP constant of the specified type to a smaller type
+ /// in order to save space and / or reduce runtime.
+ virtual bool ShouldShrinkFPConstant(MVT VT) const {
+ // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
+ // expensive than a straight movsd. On the other hand, it's important to
+ // shrink long double fp constant since fldt is very slow.
+ return !X86ScalarSSEf64 || VT == MVT::f80;
+ }
+
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Target which want to do tail call
+ /// optimization should implement this function.
+ virtual bool IsEligibleForTailCallOptimization(CallSDNode *TheCall,
+ SDValue Ret,
+ SelectionDAG &DAG) const;
+
+ virtual const X86Subtarget* getSubtarget() {
+ return Subtarget;
+ }
+
+ /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
+ /// computed in an SSE register, not on the X87 floating point stack.
+ bool isScalarFPTypeInSSEReg(MVT VT) const {
+ return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
+ (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
+ }
+
+ /// getWidenVectorType: given a vector type, returns the type to widen
+ /// to (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself.
+ /// If there is no vector type that we want to widen to, returns MVT::Other
+ /// When and were to widen is target dependent based on the cost of
+ /// scalarizing vs using the wider vector type.
+ virtual MVT getWidenVectorType(MVT VT) const;
+
+ /// createFastISel - This method returns a target specific FastISel object,
+ /// or null if the target does not support "fast" ISel.
+ virtual FastISel *
+ createFastISel(MachineFunction &mf,
+ MachineModuleInfo *mmi, DwarfWriter *dw,
+ DenseMap<const Value *, unsigned> &,
+ DenseMap<const BasicBlock *, MachineBasicBlock *> &,
+ DenseMap<const AllocaInst *, int> &
+#ifndef NDEBUG
+ , SmallSet<Instruction*, 8> &
+#endif
+ );
+
+ private:
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+ const X86RegisterInfo *RegInfo;
+ const TargetData *TD;
+
+ /// X86StackPtr - X86 physical register used as stack ptr.
+ unsigned X86StackPtr;
+
+ /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
+ /// floating point ops.
+ /// When SSE is available, use it for f32 operations.
+ /// When SSE2 is available, use it for f64 operations.
+ bool X86ScalarSSEf32;
+ bool X86ScalarSSEf64;
+
+ SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
+ unsigned CallingConv, SelectionDAG &DAG);
+
+ SDValue LowerMemArgument(SDValue Op, SelectionDAG &DAG,
+ const CCValAssign &VA, MachineFrameInfo *MFI,
+ unsigned CC, SDValue Root, unsigned i);
+
+ SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
+ const SDValue &StackPtr,
+ const CCValAssign &VA, SDValue Chain,
+ SDValue Arg, ISD::ArgFlagsTy Flags);
+
+ // Call lowering helpers.
+ bool IsCalleePop(bool isVarArg, unsigned CallingConv);
+ bool CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall);
+ bool CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall);
+ SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
+ SDValue Chain, bool IsTailCall, bool Is64Bit,
+ int FPDiff, DebugLoc dl);
+
+ CCAssignFn *CCAssignFnForNode(unsigned CallingConv) const;
+ NameDecorationStyle NameDecorationForFORMAL_ARGUMENTS(SDValue Op);
+ unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG);
+
+ std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
+ bool isSigned);
+
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
+ int64_t Offset, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerShift(SDValue Op, SelectionDAG &DAG);
+ SDValue BuildFILD(SDValue Op, MVT SrcVT, SDValue Chain, SDValue StackSlot,
+ SelectionDAG &DAG);
+ SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFABS(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerCALL(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerRET(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG);
+
+ SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG);
+
+ void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG, unsigned NewOp);
+
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ const Value *DstSV, uint64_t DstSVOff);
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool AlwaysInline,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff);
+
+ /// Utility function to emit atomic bitwise operations (and, or, xor).
+ // It takes the bitwise instruction to expand, the associated machine basic
+ // block, and the associated X86 opcodes for reg/reg and reg/imm.
+ MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter(
+ MachineInstr *BInstr,
+ MachineBasicBlock *BB,
+ unsigned regOpc,
+ unsigned immOpc,
+ unsigned loadOpc,
+ unsigned cxchgOpc,
+ unsigned copyOpc,
+ unsigned notOpc,
+ unsigned EAXreg,
+ TargetRegisterClass *RC,
+ bool invSrc = false) const;
+
+ MachineBasicBlock *EmitAtomicBit6432WithCustomInserter(
+ MachineInstr *BInstr,
+ MachineBasicBlock *BB,
+ unsigned regOpcL,
+ unsigned regOpcH,
+ unsigned immOpcL,
+ unsigned immOpcH,
+ bool invSrc = false) const;
+
+ /// Utility function to emit atomic min and max. It takes the min/max
+ /// instruction to expand, the associated basic block, and the associated
+ /// cmov opcode for moving the min or max value.
+ MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr,
+ MachineBasicBlock *BB,
+ unsigned cmovOpc) const;
+
+ /// Emit nodes that will be selected as "test Op0,Op0", or something
+ /// equivalent, for use with the given x86 condition code.
+ SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG);
+
+ /// Emit nodes that will be selected as "cmp Op0,Op1", or something
+ /// equivalent, for use with the given x86 condition code.
+ SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
+ SelectionDAG &DAG);
+ };
+
+ namespace X86 {
+ FastISel *createFastISel(MachineFunction &mf,
+ MachineModuleInfo *mmi, DwarfWriter *dw,
+ DenseMap<const Value *, unsigned> &,
+ DenseMap<const BasicBlock *, MachineBasicBlock *> &,
+ DenseMap<const AllocaInst *, int> &
+#ifndef NDEBUG
+ , SmallSet<Instruction*, 8> &
+#endif
+ );
+ }
+}
+
+#endif // X86ISELLOWERING_H
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
new file mode 100644
index 0000000..dc15e4a
--- /dev/null
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -0,0 +1,1937 @@
+//====- X86Instr64bit.td - Describe X86-64 Instructions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86-64 instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+// 64-bits but only 32 bits are significant.
+def i64i32imm : Operand<i64>;
+// 64-bits but only 8 bits are significant.
+def i64i8imm : Operand<i64>;
+
+def lea64mem : Operand<i64> {
+ let PrintMethod = "printlea64mem";
+ let MIOperandInfo = (ops GR64, i8imm, GR64, i32imm);
+}
+
+def lea64_32mem : Operand<i32> {
+ let PrintMethod = "printlea64_32mem";
+ let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
+}
+
+//===----------------------------------------------------------------------===//
+// Complex Pattern Definitions.
+//
+def lea64addr : ComplexPattern<i64, 4, "SelectLEAAddr",
+ [add, mul, X86mul_imm, shl, or, frameindex, X86Wrapper],
+ []>;
+
+//===----------------------------------------------------------------------===//
+// Pattern fragments.
+//
+
+def i64immSExt8 : PatLeaf<(i64 imm), [{
+ // i64immSExt8 predicate - True if the 64-bit immediate fits in a 8-bit
+ // sign extended field.
+ return (int64_t)N->getZExtValue() == (int8_t)N->getZExtValue();
+}]>;
+
+def i64immSExt32 : PatLeaf<(i64 imm), [{
+ // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+ // sign extended field.
+ return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue();
+}]>;
+
+def i64immZExt32 : PatLeaf<(i64 imm), [{
+ // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+ // unsignedsign extended field.
+ return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
+}]>;
+
+def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
+def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
+def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
+
+def zextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>;
+def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>;
+def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
+def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
+
+def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>;
+def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
+def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
+def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
+
+//===----------------------------------------------------------------------===//
+// Instruction list...
+//
+
+// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber EFLAGS.
+let Defs = [RSP, EFLAGS], Uses = [RSP] in {
+def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
+ "#ADJCALLSTACKDOWN",
+ [(X86callseq_start timm:$amt)]>,
+ Requires<[In64BitMode]>;
+def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "#ADJCALLSTACKUP",
+ [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+ Requires<[In64BitMode]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+let isCall = 1 in
+ // All calls clobber the non-callee saved registers. RSP is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [RSP] in {
+
+ // NOTE: this pattern doesn't match "X86call imm", because we do not know
+ // that the offset between an arbitrary immediate and the call will fit in
+ // the 32-bit pcrel field that we have.
+ def CALL64pcrel32 : I<0xE8, RawFrm,
+ (outs), (ins i64i32imm:$dst, variable_ops),
+ "call\t${dst:call}", []>,
+ Requires<[In64BitMode]>;
+ def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
+ "call\t{*}$dst", [(X86call GR64:$dst)]>;
+ def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
+ "call\t{*}$dst", [(X86call (loadi64 addr:$dst))]>;
+ }
+
+
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+def TCRETURNdi64 : I<0, Pseudo, (outs), (ins i64imm:$dst, i32imm:$offset,
+ variable_ops),
+ "#TC_RETURN $dst $offset",
+ []>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64:$dst, i32imm:$offset,
+ variable_ops),
+ "#TC_RETURN $dst $offset",
+ []>;
+
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+ def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64:$dst),
+ "jmp{q}\t{*}$dst # TAILCALL",
+ []>;
+
+// Branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
+ [(brind GR64:$dst)]>;
+ def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
+ [(brind (loadi64 addr:$dst))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// EH Pseudo Instructions
+//
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasCtrlDep = 1 in {
+def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
+ "ret\t#eh_return, addr: $addr",
+ [(X86ehret GR64:$addr)]>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions...
+//
+let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
+def LEAVE64 : I<0xC9, RawFrm,
+ (outs), (ins), "leave", []>;
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
+let mayLoad = 1 in
+def POP64r : I<0x58, AddRegFrm,
+ (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
+let mayStore = 1 in
+def PUSH64r : I<0x50, AddRegFrm,
+ (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+}
+
+let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1 in
+def POPFQ : I<0x9D, RawFrm, (outs), (ins), "popf", []>, REX_W;
+let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1 in
+def PUSHFQ : I<0x9C, RawFrm, (outs), (ins), "pushf", []>;
+
+def LEA64_32r : I<0x8D, MRMSrcMem,
+ (outs GR32:$dst), (ins lea64_32mem:$src),
+ "lea{l}\t{$src|$dst}, {$dst|$src}",
+ [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>;
+
+let isReMaterializable = 1 in
+def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src),
+ "lea{q}\t{$src|$dst}, {$dst|$src}",
+ [(set GR64:$dst, lea64addr:$src)]>;
+
+let isTwoAddress = 1 in
+def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
+ "bswap{q}\t$dst",
+ [(set GR64:$dst, (bswap GR64:$src))]>, TB;
+
+// Bit scan instructions.
+let Defs = [EFLAGS] in {
+def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "bsf{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (X86bsf GR64:$src)), (implicit EFLAGS)]>, TB;
+def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "bsf{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (X86bsf (loadi64 addr:$src))),
+ (implicit EFLAGS)]>, TB;
+
+def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "bsr{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (X86bsr GR64:$src)), (implicit EFLAGS)]>, TB;
+def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "bsr{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (X86bsr (loadi64 addr:$src))),
+ (implicit EFLAGS)]>, TB;
+} // Defs = [EFLAGS]
+
+// Repeat string ops
+let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI] in
+def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
+ [(X86rep_movs i64)]>, REP;
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI] in
+def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
+ [(X86rep_stos i64)]>, REP;
+
+//===----------------------------------------------------------------------===//
+// Move Instructions...
+//
+
+let neverHasSideEffects = 1 in
+def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
+ "movabs{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, imm:$src)]>;
+def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, i64immSExt32:$src)]>;
+}
+
+let canFoldAsLoad = 1 in
+def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (load addr:$src))]>;
+
+def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(store GR64:$src, addr:$dst)]>;
+def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(store i64immSExt32:$src, addr:$dst)]>;
+
+// Sign/Zero extenders
+
+// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
+// operand, which makes it a rare instruction with an 8-bit register
+// operand that can never access an h register. If support for h registers
+// were generalized, this would require a special register class.
+def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
+ "movs{bq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR8:$src))]>, TB;
+def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
+ "movs{bq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB;
+def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+ "movs{wq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR16:$src))]>, TB;
+def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "movs{wq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB;
+def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+ "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR32:$src))]>;
+def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
+ "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i32 addr:$src))]>;
+
+// Use movzbl instead of movzbq when the destination is a register; it's
+// equivalent due to implicit zero-extending, and it has a smaller encoding.
+def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
+ "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR64:$dst, (zext GR8:$src))]>, TB;
+def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
+ "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB;
+// Use movzwl instead of movzwq when the destination is a register; it's
+// equivalent due to implicit zero-extending, and it has a smaller encoding.
+def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+ "movz{wl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR64:$dst, (zext GR16:$src))]>, TB;
+def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "movz{wl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB;
+
+// There's no movzlq instruction, but movl can be used for this purpose, using
+// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
+// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit
+// zero-extension, however this isn't possible when the 32-bit value is
+// defined by a truncate or is copied from something where the high bits aren't
+// necessarily all zero. In such cases, we fall back to these explicit zext
+// instructions.
+def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR64:$dst, (zext GR32:$src))]>;
+def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
+ "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
+
+// Any instruction that defines a 32-bit result leaves the high half of the
+// register. Truncate can be lowered to EXTRACT_SUBREG, and CopyFromReg may
+// be copying from a truncate, but any other 32-bit operation will zero-extend
+// up to 64 bits.
+def def32 : PatLeaf<(i32 GR32:$src), [{
+ return N->getOpcode() != ISD::TRUNCATE &&
+ N->getOpcode() != TargetInstrInfo::EXTRACT_SUBREG &&
+ N->getOpcode() != ISD::CopyFromReg;
+}]>;
+
+// In the case of a 32-bit def that is known to implicitly zero-extend,
+// we can use a SUBREG_TO_REG.
+def : Pat<(i64 (zext def32:$src)),
+ (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>;
+
+let neverHasSideEffects = 1 in {
+ let Defs = [RAX], Uses = [EAX] in
+ def CDQE : RI<0x98, RawFrm, (outs), (ins),
+ "{cltq|cdqe}", []>; // RAX = signext(EAX)
+
+ let Defs = [RAX,RDX], Uses = [RAX] in
+ def CQO : RI<0x99, RawFrm, (outs), (ins),
+ "{cqto|cqo}", []>; // RDX:RAX = signext(RAX)
+}
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions...
+//
+
+let Defs = [EFLAGS] in {
+let isTwoAddress = 1 in {
+let isConvertibleToThreeAddress = 1 in {
+let isCommutable = 1 in
+// Register-Register Addition
+def ADD64rr : RI<0x01, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "add{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, GR64:$src2)),
+ (implicit EFLAGS)]>;
+
+// Register-Integer Addition
+def ADD64ri8 : RIi8<0x83, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "add{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, i64immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+def ADD64ri32 : RIi32<0x81, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "add{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, i64immSExt32:$src2)),
+ (implicit EFLAGS)]>;
+} // isConvertibleToThreeAddress
+
+// Register-Memory Addition
+def ADD64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "add{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+} // isTwoAddress
+
+// Memory-Register Addition
+def ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "add{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), GR64:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
+def ADD64mi8 : RIi8<0x83, MRM0m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
+ "add{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), i64immSExt8:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
+def ADD64mi32 : RIi32<0x81, MRM0m, (outs), (ins i64mem:$dst, i64i32imm :$src2),
+ "add{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), i64immSExt32:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
+
+let Uses = [EFLAGS] in {
+let isTwoAddress = 1 in {
+let isCommutable = 1 in
+def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "adc{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (adde GR64:$src1, GR64:$src2))]>;
+
+def ADC64rm : RI<0x13, MRMSrcMem , (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "adc{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (adde GR64:$src1, (load addr:$src2)))]>;
+
+def ADC64ri8 : RIi8<0x83, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "adc{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (adde GR64:$src1, i64immSExt8:$src2))]>;
+def ADC64ri32 : RIi32<0x81, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "adc{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (adde GR64:$src1, i64immSExt32:$src2))]>;
+} // isTwoAddress
+
+def ADC64mr : RI<0x11, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "adc{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), GR64:$src2), addr:$dst)]>;
+def ADC64mi8 : RIi8<0x83, MRM2m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
+ "adc{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
+def ADC64mi32 : RIi32<0x81, MRM2m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
+ "adc{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
+} // Uses = [EFLAGS]
+
+let isTwoAddress = 1 in {
+// Register-Register Subtraction
+def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "sub{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sub GR64:$src1, GR64:$src2)),
+ (implicit EFLAGS)]>;
+
+// Register-Memory Subtraction
+def SUB64rm : RI<0x2B, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "sub{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sub GR64:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+
+// Register-Integer Subtraction
+def SUB64ri8 : RIi8<0x83, MRM5r, (outs GR64:$dst),
+ (ins GR64:$src1, i64i8imm:$src2),
+ "sub{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sub GR64:$src1, i64immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst),
+ (ins GR64:$src1, i64i32imm:$src2),
+ "sub{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sub GR64:$src1, i64immSExt32:$src2)),
+ (implicit EFLAGS)]>;
+} // isTwoAddress
+
+// Memory-Register Subtraction
+def SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "sub{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), GR64:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// Memory-Integer Subtraction
+def SUB64mi8 : RIi8<0x83, MRM5m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
+ "sub{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), i64immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)]>;
+def SUB64mi32 : RIi32<0x81, MRM5m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
+ "sub{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), i64immSExt32:$src2),
+ addr:$dst),
+ (implicit EFLAGS)]>;
+
+let Uses = [EFLAGS] in {
+let isTwoAddress = 1 in {
+def SBB64rr : RI<0x19, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "sbb{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sube GR64:$src1, GR64:$src2))]>;
+
+def SBB64rm : RI<0x1B, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "sbb{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sube GR64:$src1, (load addr:$src2)))]>;
+
+def SBB64ri8 : RIi8<0x83, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "sbb{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sube GR64:$src1, i64immSExt8:$src2))]>;
+def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "sbb{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>;
+} // isTwoAddress
+
+def SBB64mr : RI<0x19, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "sbb{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), GR64:$src2), addr:$dst)]>;
+def SBB64mi8 : RIi8<0x83, MRM3m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
+ "sbb{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
+def SBB64mi32 : RIi32<0x81, MRM3m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
+ "sbb{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>;
+} // Uses = [EFLAGS]
+} // Defs = [EFLAGS]
+
+// Unsigned multiplication
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in {
+def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
+ "mul{q}\t$src", []>; // RAX,RDX = RAX*GR64
+let mayLoad = 1 in
+def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
+ "mul{q}\t$src", []>; // RAX,RDX = RAX*[mem64]
+
+// Signed multiplication
+def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src),
+ "imul{q}\t$src", []>; // RAX,RDX = RAX*GR64
+let mayLoad = 1 in
+def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
+ "imul{q}\t$src", []>; // RAX,RDX = RAX*[mem64]
+}
+
+let Defs = [EFLAGS] in {
+let isTwoAddress = 1 in {
+let isCommutable = 1 in
+// Register-Register Signed Integer Multiplication
+def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2),
+ "imul{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, GR64:$src2)),
+ (implicit EFLAGS)]>, TB;
+
+// Register-Memory Signed Integer Multiplication
+def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$src1, i64mem:$src2),
+ "imul{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>, TB;
+} // isTwoAddress
+
+// Suprisingly enough, these are not two address instructions!
+
+// Register-Integer Signed Integer Multiplication
+def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
+ (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, i64immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32
+ (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2)),
+ (implicit EFLAGS)]>;
+
+// Memory-Integer Signed Integer Multiplication
+def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
+ (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, (mul (load addr:$src1),
+ i64immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32
+ (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, (mul (load addr:$src1),
+ i64immSExt32:$src2)),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+// Unsigned division / remainder
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in {
+def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src), // RDX:RAX/r64 = RAX,RDX
+ "div{q}\t$src", []>;
+// Signed division / remainder
+def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src), // RDX:RAX/r64 = RAX,RDX
+ "idiv{q}\t$src", []>;
+let mayLoad = 1 in {
+def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src), // RDX:RAX/[mem64] = RAX,RDX
+ "div{q}\t$src", []>;
+def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), // RDX:RAX/[mem64] = RAX,RDX
+ "idiv{q}\t$src", []>;
+}
+}
+
+// Unary instructions
+let Defs = [EFLAGS], CodeSize = 2 in {
+let isTwoAddress = 1 in
+def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src), "neg{q}\t$dst",
+ [(set GR64:$dst, (ineg GR64:$src)),
+ (implicit EFLAGS)]>;
+def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
+ [(store (ineg (loadi64 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>;
+
+let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in
+def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src), "inc{q}\t$dst",
+ [(set GR64:$dst, (add GR64:$src, 1)),
+ (implicit EFLAGS)]>;
+def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
+ [(store (add (loadi64 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>;
+
+let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in
+def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src), "dec{q}\t$dst",
+ [(set GR64:$dst, (add GR64:$src, -1)),
+ (implicit EFLAGS)]>;
+def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
+ [(store (add (loadi64 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// In 64-bit mode, single byte INC and DEC cannot be encoded.
+let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in {
+// Can transform into LEA.
+def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src), "inc{w}\t$dst",
+ [(set GR16:$dst, (add GR16:$src, 1)),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In64BitMode]>;
+def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src), "inc{l}\t$dst",
+ [(set GR32:$dst, (add GR32:$src, 1)),
+ (implicit EFLAGS)]>,
+ Requires<[In64BitMode]>;
+def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src), "dec{w}\t$dst",
+ [(set GR16:$dst, (add GR16:$src, -1)),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In64BitMode]>;
+def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src), "dec{l}\t$dst",
+ [(set GR32:$dst, (add GR32:$src, -1)),
+ (implicit EFLAGS)]>,
+ Requires<[In64BitMode]>;
+} // isConvertibleToThreeAddress
+
+// These are duplicates of their 32-bit counterparts. Only needed so X86 knows
+// how to unfold them.
+let isTwoAddress = 0, CodeSize = 2 in {
+ def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In64BitMode]>;
+ def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In64BitMode]>;
+ def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In64BitMode]>;
+ def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In64BitMode]>;
+}
+} // Defs = [EFLAGS], CodeSize
+
+
+let Defs = [EFLAGS] in {
+// Shift instructions
+let isTwoAddress = 1 in {
+let Uses = [CL] in
+def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src),
+ "shl{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (shl GR64:$src, CL))]>;
+let isConvertibleToThreeAddress = 1 in // Can transform into LEA.
+def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
+ "shl{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>;
+// NOTE: We don't use shifts of a register by one, because 'add reg,reg' is
+// cheaper.
+} // isTwoAddress
+
+let Uses = [CL] in
+def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
+ "shl{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>;
+def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "shl{q}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
+ "shl{q}\t$dst",
+ [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+let Uses = [CL] in
+def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src),
+ "shr{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (srl GR64:$src, CL))]>;
+def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
+ "shr{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>;
+def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
+ "shr{q}\t$dst",
+ [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>;
+} // isTwoAddress
+
+let Uses = [CL] in
+def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
+ "shr{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>;
+def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "shr{q}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
+ "shr{q}\t$dst",
+ [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+let Uses = [CL] in
+def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src),
+ "sar{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (sra GR64:$src, CL))]>;
+def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
+ "sar{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>;
+def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
+ "sar{q}\t$dst",
+ [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>;
+} // isTwoAddress
+
+let Uses = [CL] in
+def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
+ "sar{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>;
+def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "sar{q}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
+ "sar{q}\t$dst",
+ [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+// Rotate instructions
+let isTwoAddress = 1 in {
+let Uses = [CL] in
+def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src),
+ "rol{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (rotl GR64:$src, CL))]>;
+def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
+ "rol{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>;
+def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
+ "rol{q}\t$dst",
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
+} // isTwoAddress
+
+let Uses = [CL] in
+def ROL64mCL : I<0xD3, MRM0m, (outs), (ins i64mem:$dst),
+ "rol{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>;
+def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "rol{q}\t{$src, $dst|$dst, $src}",
+ [(store (rotl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
+ "rol{q}\t$dst",
+ [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+let Uses = [CL] in
+def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src),
+ "ror{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (rotr GR64:$src, CL))]>;
+def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
+ "ror{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>;
+def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
+ "ror{q}\t$dst",
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>;
+} // isTwoAddress
+
+let Uses = [CL] in
+def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
+ "ror{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>;
+def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "ror{q}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
+ "ror{q}\t$dst",
+ [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+// Double shift instructions (generalizations of rotate)
+let isTwoAddress = 1 in {
+let Uses = [CL] in {
+def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>, TB;
+def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>, TB;
+}
+
+let isCommutable = 1 in { // FIXME: Update X86InstrInfo::commuteInstruction
+def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+ "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+ "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+} // isCommutable
+} // isTwoAddress
+
+let Uses = [CL] in {
+def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL),
+ addr:$dst)]>, TB;
+def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL),
+ addr:$dst)]>, TB;
+}
+def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
+ (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+ "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shld (loadi64 addr:$dst), GR64:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
+ (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+ "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+} // Defs = [EFLAGS]
+
+//===----------------------------------------------------------------------===//
+// Logical Instructions...
+//
+
+let isTwoAddress = 1 , AddedComplexity = 15 in
+def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src), "not{q}\t$dst",
+ [(set GR64:$dst, (not GR64:$src))]>;
+def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
+ [(store (not (loadi64 addr:$dst)), addr:$dst)]>;
+
+let Defs = [EFLAGS] in {
+let isTwoAddress = 1 in {
+let isCommutable = 1 in
+def AND64rr : RI<0x21, MRMDestReg,
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "and{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, GR64:$src2)),
+ (implicit EFLAGS)]>;
+def AND64rm : RI<0x23, MRMSrcMem,
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "and{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+def AND64ri8 : RIi8<0x83, MRM4r,
+ (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "and{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, i64immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+def AND64ri32 : RIi32<0x81, MRM4r,
+ (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "and{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, i64immSExt32:$src2)),
+ (implicit EFLAGS)]>;
+} // isTwoAddress
+
+def AND64mr : RI<0x21, MRMDestMem,
+ (outs), (ins i64mem:$dst, GR64:$src),
+ "and{q}\t{$src, $dst|$dst, $src}",
+ [(store (and (load addr:$dst), GR64:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+def AND64mi8 : RIi8<0x83, MRM4m,
+ (outs), (ins i64mem:$dst, i64i8imm :$src),
+ "and{q}\t{$src, $dst|$dst, $src}",
+ [(store (and (load addr:$dst), i64immSExt8:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+def AND64mi32 : RIi32<0x81, MRM4m,
+ (outs), (ins i64mem:$dst, i64i32imm:$src),
+ "and{q}\t{$src, $dst|$dst, $src}",
+ [(store (and (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+
+let isTwoAddress = 1 in {
+let isCommutable = 1 in
+def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "or{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, GR64:$src2)),
+ (implicit EFLAGS)]>;
+def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "or{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+def OR64ri8 : RIi8<0x83, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "or{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "or{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2)),
+ (implicit EFLAGS)]>;
+} // isTwoAddress
+
+def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "or{q}\t{$src, $dst|$dst, $src}",
+ [(store (or (load addr:$dst), GR64:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+def OR64mi8 : RIi8<0x83, MRM1m, (outs), (ins i64mem:$dst, i64i8imm:$src),
+ "or{q}\t{$src, $dst|$dst, $src}",
+ [(store (or (load addr:$dst), i64immSExt8:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src),
+ "or{q}\t{$src, $dst|$dst, $src}",
+ [(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+
+let isTwoAddress = 1 in {
+let isCommutable = 1 in
+def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "xor{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (xor GR64:$src1, GR64:$src2)),
+ (implicit EFLAGS)]>;
+def XOR64rm : RI<0x33, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "xor{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (xor GR64:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+def XOR64ri8 : RIi8<0x83, MRM6r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "xor{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (xor GR64:$src1, i64immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+def XOR64ri32 : RIi32<0x81, MRM6r,
+ (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "xor{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (xor GR64:$src1, i64immSExt32:$src2)),
+ (implicit EFLAGS)]>;
+} // isTwoAddress
+
+def XOR64mr : RI<0x31, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "xor{q}\t{$src, $dst|$dst, $src}",
+ [(store (xor (load addr:$dst), GR64:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+def XOR64mi8 : RIi8<0x83, MRM6m, (outs), (ins i64mem:$dst, i64i8imm :$src),
+ "xor{q}\t{$src, $dst|$dst, $src}",
+ [(store (xor (load addr:$dst), i64immSExt8:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src),
+ "xor{q}\t{$src, $dst|$dst, $src}",
+ [(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+//===----------------------------------------------------------------------===//
+// Comparison Instructions...
+//
+
+// Integer comparison
+let Defs = [EFLAGS] in {
+let isCommutable = 1 in
+def TEST64rr : RI<0x85, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "test{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR64:$src1, GR64:$src2), 0),
+ (implicit EFLAGS)]>;
+def TEST64rm : RI<0x85, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
+ "test{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR64:$src1, (loadi64 addr:$src2)), 0),
+ (implicit EFLAGS)]>;
+def TEST64ri32 : RIi32<0xF7, MRM0r, (outs),
+ (ins GR64:$src1, i64i32imm:$src2),
+ "test{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR64:$src1, i64immSExt32:$src2), 0),
+ (implicit EFLAGS)]>;
+def TEST64mi32 : RIi32<0xF7, MRM0m, (outs),
+ (ins i64mem:$src1, i64i32imm:$src2),
+ "test{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and (loadi64 addr:$src1), i64immSExt32:$src2), 0),
+ (implicit EFLAGS)]>;
+
+def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "cmp{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR64:$src1, GR64:$src2),
+ (implicit EFLAGS)]>;
+def CMP64mr : RI<0x39, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "cmp{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi64 addr:$src1), GR64:$src2),
+ (implicit EFLAGS)]>;
+def CMP64rm : RI<0x3B, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
+ "cmp{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR64:$src1, (loadi64 addr:$src2)),
+ (implicit EFLAGS)]>;
+def CMP64ri8 : RIi8<0x83, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "cmp{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR64:$src1, i64immSExt8:$src2),
+ (implicit EFLAGS)]>;
+def CMP64ri32 : RIi32<0x81, MRM7r, (outs), (ins GR64:$src1, i64i32imm:$src2),
+ "cmp{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR64:$src1, i64immSExt32:$src2),
+ (implicit EFLAGS)]>;
+def CMP64mi8 : RIi8<0x83, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "cmp{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi64 addr:$src1), i64immSExt8:$src2),
+ (implicit EFLAGS)]>;
+def CMP64mi32 : RIi32<0x81, MRM7m, (outs),
+ (ins i64mem:$src1, i64i32imm:$src2),
+ "cmp{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi64 addr:$src1), i64immSExt32:$src2),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+// Bit tests.
+// TODO: BTC, BTR, and BTS
+let Defs = [EFLAGS] in {
+def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt GR64:$src1, GR64:$src2),
+ (implicit EFLAGS)]>, TB;
+
+// Unlike with the register+register form, the memory+register form of the
+// bt instruction does not ignore the high bits of the index. From ISel's
+// perspective, this is pretty bizarre. Disable these instructions for now.
+//def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+// "bt{q}\t{$src2, $src1|$src1, $src2}",
+// [(X86bt (loadi64 addr:$src1), GR64:$src2),
+// (implicit EFLAGS)]>, TB;
+
+def BT64ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt GR64:$src1, i64immSExt8:$src2),
+ (implicit EFLAGS)]>, TB;
+// Note that these instructions don't need FastBTMem because that
+// only applies when the other operand is in a register. When it's
+// an immediate, bt is still fast.
+def BT64mi8 : Ii8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt (loadi64 addr:$src1), i64immSExt8:$src2),
+ (implicit EFLAGS)]>, TB;
+} // Defs = [EFLAGS]
+
+// Conditional moves
+let Uses = [EFLAGS], isTwoAddress = 1 in {
+let isCommutable = 1 in {
+def CMOVB64rr : RI<0x42, MRMSrcReg, // if <u, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovb\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_B, EFLAGS))]>, TB;
+def CMOVAE64rr: RI<0x43, MRMSrcReg, // if >=u, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovae\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_AE, EFLAGS))]>, TB;
+def CMOVE64rr : RI<0x44, MRMSrcReg, // if ==, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmove\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_E, EFLAGS))]>, TB;
+def CMOVNE64rr: RI<0x45, MRMSrcReg, // if !=, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovne\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_NE, EFLAGS))]>, TB;
+def CMOVBE64rr: RI<0x46, MRMSrcReg, // if <=u, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovbe\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_BE, EFLAGS))]>, TB;
+def CMOVA64rr : RI<0x47, MRMSrcReg, // if >u, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmova\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_A, EFLAGS))]>, TB;
+def CMOVL64rr : RI<0x4C, MRMSrcReg, // if <s, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovl\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_L, EFLAGS))]>, TB;
+def CMOVGE64rr: RI<0x4D, MRMSrcReg, // if >=s, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovge\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_GE, EFLAGS))]>, TB;
+def CMOVLE64rr: RI<0x4E, MRMSrcReg, // if <=s, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovle\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_LE, EFLAGS))]>, TB;
+def CMOVG64rr : RI<0x4F, MRMSrcReg, // if >s, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovg\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_G, EFLAGS))]>, TB;
+def CMOVS64rr : RI<0x48, MRMSrcReg, // if signed, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovs\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_S, EFLAGS))]>, TB;
+def CMOVNS64rr: RI<0x49, MRMSrcReg, // if !signed, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovns\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_NS, EFLAGS))]>, TB;
+def CMOVP64rr : RI<0x4A, MRMSrcReg, // if parity, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovp\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_P, EFLAGS))]>, TB;
+def CMOVNP64rr : RI<0x4B, MRMSrcReg, // if !parity, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovnp\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_NP, EFLAGS))]>, TB;
+def CMOVO64rr : RI<0x40, MRMSrcReg, // if overflow, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovo\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_O, EFLAGS))]>, TB;
+def CMOVNO64rr : RI<0x41, MRMSrcReg, // if !overflow, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovno\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_NO, EFLAGS))]>, TB;
+} // isCommutable = 1
+
+def CMOVB64rm : RI<0x42, MRMSrcMem, // if <u, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovb\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_B, EFLAGS))]>, TB;
+def CMOVAE64rm: RI<0x43, MRMSrcMem, // if >=u, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovae\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_AE, EFLAGS))]>, TB;
+def CMOVE64rm : RI<0x44, MRMSrcMem, // if ==, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmove\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_E, EFLAGS))]>, TB;
+def CMOVNE64rm: RI<0x45, MRMSrcMem, // if !=, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovne\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_NE, EFLAGS))]>, TB;
+def CMOVBE64rm: RI<0x46, MRMSrcMem, // if <=u, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovbe\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_BE, EFLAGS))]>, TB;
+def CMOVA64rm : RI<0x47, MRMSrcMem, // if >u, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmova\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_A, EFLAGS))]>, TB;
+def CMOVL64rm : RI<0x4C, MRMSrcMem, // if <s, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovl\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_L, EFLAGS))]>, TB;
+def CMOVGE64rm: RI<0x4D, MRMSrcMem, // if >=s, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovge\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_GE, EFLAGS))]>, TB;
+def CMOVLE64rm: RI<0x4E, MRMSrcMem, // if <=s, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovle\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_LE, EFLAGS))]>, TB;
+def CMOVG64rm : RI<0x4F, MRMSrcMem, // if >s, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovg\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_G, EFLAGS))]>, TB;
+def CMOVS64rm : RI<0x48, MRMSrcMem, // if signed, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovs\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_S, EFLAGS))]>, TB;
+def CMOVNS64rm: RI<0x49, MRMSrcMem, // if !signed, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovns\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_NS, EFLAGS))]>, TB;
+def CMOVP64rm : RI<0x4A, MRMSrcMem, // if parity, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovp\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_P, EFLAGS))]>, TB;
+def CMOVNP64rm : RI<0x4B, MRMSrcMem, // if !parity, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovnp\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_NP, EFLAGS))]>, TB;
+def CMOVO64rm : RI<0x40, MRMSrcMem, // if overflow, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovo\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_O, EFLAGS))]>, TB;
+def CMOVNO64rm : RI<0x41, MRMSrcMem, // if !overflow, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovno\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_NO, EFLAGS))]>, TB;
+} // isTwoAddress
+
+//===----------------------------------------------------------------------===//
+// Conversion Instructions...
+//
+
+// f64 -> signed i64
+def Int_CVTSD2SI64rr: RSDI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "cvtsd2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst,
+ (int_x86_sse2_cvtsd2si64 VR128:$src))]>;
+def Int_CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f128mem:$src),
+ "cvtsd2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (int_x86_sse2_cvtsd2si64
+ (load addr:$src)))]>;
+def CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src),
+ "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (fp_to_sint FR64:$src))]>;
+def CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f64mem:$src),
+ "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
+def Int_CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst,
+ (int_x86_sse2_cvttsd2si64 VR128:$src))]>;
+def Int_CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f128mem:$src),
+ "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst,
+ (int_x86_sse2_cvttsd2si64
+ (load addr:$src)))]>;
+
+// Signed i64 -> f64
+def CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+ "cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (sint_to_fp GR64:$src))]>;
+def CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+ "cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
+
+let isTwoAddress = 1 in {
+def Int_CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
+ "cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtsi642sd VR128:$src1,
+ GR64:$src2))]>;
+def Int_CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
+ "cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtsi642sd VR128:$src1,
+ (loadi64 addr:$src2)))]>;
+} // isTwoAddress
+
+// Signed i64 -> f32
+def CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR64:$src),
+ "cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (sint_to_fp GR64:$src))]>;
+def CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i64mem:$src),
+ "cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
+
+let isTwoAddress = 1 in {
+ def Int_CVTSI2SS64rr : RSSI<0x2A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
+ "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse_cvtsi642ss VR128:$src1,
+ GR64:$src2))]>;
+ def Int_CVTSI2SS64rm : RSSI<0x2A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
+ "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse_cvtsi642ss VR128:$src1,
+ (loadi64 addr:$src2)))]>;
+}
+
+// f32 -> signed i64
+def Int_CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "cvtss2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst,
+ (int_x86_sse_cvtss2si64 VR128:$src))]>;
+def Int_CVTSS2SI64rm: RSSI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
+ "cvtss2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (int_x86_sse_cvtss2si64
+ (load addr:$src)))]>;
+def CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src),
+ "cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (fp_to_sint FR32:$src))]>;
+def CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
+ "cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
+def Int_CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst,
+ (int_x86_sse_cvttss2si64 VR128:$src))]>;
+def Int_CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
+ "cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst,
+ (int_x86_sse_cvttss2si64 (load addr:$src)))]>;
+
+//===----------------------------------------------------------------------===//
+// Alias Instructions
+//===----------------------------------------------------------------------===//
+
+// Alias instructions that map movr0 to xor. Use xorl instead of xorq; it's
+// equivalent due to implicit zero-extending, and it sometimes has a smaller
+// encoding.
+// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+// FIXME: AddedComplexity gives MOV64r0 a higher priority than MOV64ri32. Remove
+// when we have a better way to specify isel priority.
+let Defs = [EFLAGS], AddedComplexity = 1,
+ isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins),
+ "xor{l}\t${dst:subreg32}, ${dst:subreg32}",
+ [(set GR64:$dst, 0)]>;
+
+// Materialize i64 constant where top 32-bits are zero.
+let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
+ "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR64:$dst, i64immZExt32:$src)]>;
+
+//===----------------------------------------------------------------------===//
+// Thread Local Storage Instructions
+//===----------------------------------------------------------------------===//
+
+// All calls clobber the non-callee saved registers. RSP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [RSP] in
+def TLS_addr64 : I<0, Pseudo, (outs), (ins i64imm:$sym),
+ ".byte\t0x66; "
+ "leaq\t${sym:mem}(%rip), %rdi; "
+ ".word\t0x6666; "
+ "rex64; "
+ "call\t__tls_get_addr@PLT",
+ [(X86tlsaddr tglobaltlsaddr:$sym)]>,
+ Requires<[In64BitMode]>;
+
+let AddedComplexity = 5 in
+def MOV64GSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "movq\t%gs:$src, $dst",
+ [(set GR64:$dst, (gsload addr:$src))]>, SegGS;
+
+let AddedComplexity = 5 in
+def MOV64FSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "movq\t%fs:$src, $dst",
+ [(set GR64:$dst, (fsload addr:$src))]>, SegFS;
+
+//===----------------------------------------------------------------------===//
+// Atomic Instructions
+//===----------------------------------------------------------------------===//
+
+let Defs = [RAX, EFLAGS], Uses = [RAX] in {
+def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
+ "lock\n\t"
+ "cmpxchgq\t$swap,$ptr",
+ [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK;
+}
+
+let Constraints = "$val = $dst" in {
+let Defs = [EFLAGS] in
+def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val),
+ "lock\n\t"
+ "xadd\t$val, $ptr",
+ [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
+ TB, LOCK;
+def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val),
+ "xchg\t$val, $ptr",
+ [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>;
+}
+
+// Atomic exchange, and, or, xor
+let Constraints = "$val = $dst", Defs = [EFLAGS],
+ usesCustomDAGSchedInserter = 1 in {
+def ATOMAND64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMAND64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_and_64 addr:$ptr, GR64:$val))]>;
+def ATOMOR64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMOR64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_or_64 addr:$ptr, GR64:$val))]>;
+def ATOMXOR64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMXOR64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_xor_64 addr:$ptr, GR64:$val))]>;
+def ATOMNAND64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMNAND64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_nand_64 addr:$ptr, GR64:$val))]>;
+def ATOMMIN64: I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val),
+ "#ATOMMIN64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_min_64 addr:$ptr, GR64:$val))]>;
+def ATOMMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMMAX64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_max_64 addr:$ptr, GR64:$val))]>;
+def ATOMUMIN64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMUMIN64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_umin_64 addr:$ptr, GR64:$val))]>;
+def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMUMAX64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
+def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
+ (MOV64ri tconstpool :$dst)>, Requires<[NotSmallCode]>;
+def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
+ (MOV64ri tjumptable :$dst)>, Requires<[NotSmallCode]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+ (MOV64ri tglobaladdr :$dst)>, Requires<[NotSmallCode]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+ (MOV64ri texternalsym:$dst)>, Requires<[NotSmallCode]>;
+
+def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tconstpool:$src)>,
+ Requires<[SmallCode, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tjumptable:$src)>,
+ Requires<[SmallCode, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
+ Requires<[SmallCode, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, texternalsym:$src)>,
+ Requires<[SmallCode, IsStatic]>;
+
+// Calls
+// Direct PC relative function call for small code model. 32-bit displacement
+// sign extended to 64-bit.
+def : Pat<(X86call (i64 tglobaladdr:$dst)),
+ (CALL64pcrel32 tglobaladdr:$dst)>;
+def : Pat<(X86call (i64 texternalsym:$dst)),
+ (CALL64pcrel32 texternalsym:$dst)>;
+
+def : Pat<(X86tailcall (i64 tglobaladdr:$dst)),
+ (CALL64pcrel32 tglobaladdr:$dst)>;
+def : Pat<(X86tailcall (i64 texternalsym:$dst)),
+ (CALL64pcrel32 texternalsym:$dst)>;
+
+def : Pat<(X86tailcall GR64:$dst),
+ (CALL64r GR64:$dst)>;
+
+
+// tailcall stuff
+def : Pat<(X86tailcall GR32:$dst),
+ (TAILCALL)>;
+def : Pat<(X86tailcall (i64 tglobaladdr:$dst)),
+ (TAILCALL)>;
+def : Pat<(X86tailcall (i64 texternalsym:$dst)),
+ (TAILCALL)>;
+
+def : Pat<(X86tcret GR64:$dst, imm:$off),
+ (TCRETURNri64 GR64:$dst, imm:$off)>;
+
+def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
+ (TCRETURNdi64 texternalsym:$dst, imm:$off)>;
+
+def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
+ (TCRETURNdi64 texternalsym:$dst, imm:$off)>;
+
+// Comparisons.
+
+// TEST R,R is smaller than CMP R,0
+def : Pat<(parallel (X86cmp GR64:$src1, 0), (implicit EFLAGS)),
+ (TEST64rr GR64:$src1, GR64:$src1)>;
+
+// Conditional moves with folded loads with operands swapped and conditions
+// inverted.
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_B, EFLAGS),
+ (CMOVAE64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_AE, EFLAGS),
+ (CMOVB64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_E, EFLAGS),
+ (CMOVNE64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NE, EFLAGS),
+ (CMOVE64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_BE, EFLAGS),
+ (CMOVA64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_A, EFLAGS),
+ (CMOVBE64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_L, EFLAGS),
+ (CMOVGE64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_GE, EFLAGS),
+ (CMOVL64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_LE, EFLAGS),
+ (CMOVG64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_G, EFLAGS),
+ (CMOVLE64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_P, EFLAGS),
+ (CMOVNP64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NP, EFLAGS),
+ (CMOVP64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_S, EFLAGS),
+ (CMOVNS64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NS, EFLAGS),
+ (CMOVS64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_O, EFLAGS),
+ (CMOVNO64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NO, EFLAGS),
+ (CMOVO64rm GR64:$src2, addr:$src1)>;
+
+// zextload bool -> zextload byte
+def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
+
+// extload
+// When extloading from 16-bit and smaller memory locations into 64-bit registers,
+// use zero-extending loads so that the entire 64-bit register is defined, avoiding
+// partial-register updates.
+def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
+def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>;
+def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
+// For other extloads, use subregs, since the high contents of the register are
+// defined after an extload.
+def : Pat<(extloadi64i32 addr:$src),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (MOV32rm addr:$src),
+ x86_subreg_32bit)>;
+def : Pat<(extloadi16i1 addr:$src),
+ (INSERT_SUBREG (i16 (IMPLICIT_DEF)), (MOV8rm addr:$src),
+ x86_subreg_8bit)>,
+ Requires<[In64BitMode]>;
+def : Pat<(extloadi16i8 addr:$src),
+ (INSERT_SUBREG (i16 (IMPLICIT_DEF)), (MOV8rm addr:$src),
+ x86_subreg_8bit)>,
+ Requires<[In64BitMode]>;
+
+// anyext
+def : Pat<(i64 (anyext GR8:$src)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>;
+def : Pat<(i64 (anyext GR16:$src)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>;
+def : Pat<(i64 (anyext GR32:$src)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, x86_subreg_32bit)>;
+def : Pat<(i16 (anyext GR8:$src)),
+ (INSERT_SUBREG (i16 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>,
+ Requires<[In64BitMode]>;
+def : Pat<(i32 (anyext GR8:$src)),
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>,
+ Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// Some peepholes
+//===----------------------------------------------------------------------===//
+
+// Odd encoding trick: -128 fits into an 8-bit immediate field while
+// +128 doesn't, so in this special case use a sub instead of an add.
+def : Pat<(add GR64:$src1, 128),
+ (SUB64ri8 GR64:$src1, -128)>;
+def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
+ (SUB64mi8 addr:$dst, -128)>;
+
+// The same trick applies for 32-bit immediate fields in 64-bit
+// instructions.
+def : Pat<(add GR64:$src1, 0x0000000080000000),
+ (SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
+def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
+ (SUB64mi32 addr:$dst, 0xffffffff80000000)>;
+
+// r & (2^32-1) ==> movz
+def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
+ (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;
+// r & (2^16-1) ==> movz
+def : Pat<(and GR64:$src, 0xffff),
+ (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR64:$src, 0xff),
+ (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR32:$src1, 0xff),
+ (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, x86_subreg_8bit))>,
+ Requires<[In64BitMode]>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR16:$src1, 0xff),
+ (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, x86_subreg_8bit)))>,
+ Requires<[In64BitMode]>;
+
+// sext_inreg patterns
+def : Pat<(sext_inreg GR64:$src, i32),
+ (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;
+def : Pat<(sext_inreg GR64:$src, i16),
+ (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>;
+def : Pat<(sext_inreg GR64:$src, i8),
+ (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit))>;
+def : Pat<(sext_inreg GR32:$src, i8),
+ (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit))>,
+ Requires<[In64BitMode]>;
+def : Pat<(sext_inreg GR16:$src, i8),
+ (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)))>,
+ Requires<[In64BitMode]>;
+
+// trunc patterns
+def : Pat<(i32 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)>;
+def : Pat<(i16 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)>;
+def : Pat<(i8 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)>;
+def : Pat<(i8 (trunc GR32:$src)),
+ (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit)>,
+ Requires<[In64BitMode]>;
+def : Pat<(i8 (trunc GR16:$src)),
+ (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)>,
+ Requires<[In64BitMode]>;
+
+// h-register tricks.
+// For now, be conservative on x86-64 and use an h-register extract only if the
+// value is immediately zero-extended or stored, which are somewhat common
+// cases. This uses a bunch of code to prevent a register requiring a REX prefix
+// from being allocated in the same instruction as the h register, as there's
+// currently no way to describe this requirement to the register allocator.
+
+// h-register extract and zero-extend.
+def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
+ (SUBREG_TO_REG
+ (i64 0),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR64:$src, GR64_ABCD),
+ x86_subreg_8bit_hi)),
+ x86_subreg_32bit)>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD),
+ x86_subreg_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(srl_su GR16:$src, (i8 8)),
+ (EXTRACT_SUBREG
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+ x86_subreg_8bit_hi)),
+ x86_subreg_16bit)>,
+ Requires<[In64BitMode]>;
+def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+ x86_subreg_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
+ (SUBREG_TO_REG
+ (i64 0),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+ x86_subreg_8bit_hi)),
+ x86_subreg_32bit)>;
+
+// h-register extract and store.
+def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR64:$src, GR64_ABCD),
+ x86_subreg_8bit_hi))>;
+def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD),
+ x86_subreg_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+ x86_subreg_8bit_hi))>,
+ Requires<[In64BitMode]>;
+
+// (shl x, 1) ==> (add x, x)
+def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
+
+// (shl x (and y, 63)) ==> (shl x, y)
+def : Pat<(shl GR64:$src1, (and CL:$amt, 63)),
+ (SHL64rCL GR64:$src1)>;
+def : Pat<(store (shl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst),
+ (SHL64mCL addr:$dst)>;
+
+def : Pat<(srl GR64:$src1, (and CL:$amt, 63)),
+ (SHR64rCL GR64:$src1)>;
+def : Pat<(store (srl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst),
+ (SHR64mCL addr:$dst)>;
+
+def : Pat<(sra GR64:$src1, (and CL:$amt, 63)),
+ (SAR64rCL GR64:$src1)>;
+def : Pat<(store (sra (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst),
+ (SAR64mCL addr:$dst)>;
+
+// (or (x >> c) | (y << (64 - c))) ==> (shrd64 x, y, c)
+def : Pat<(or (srl GR64:$src1, CL:$amt),
+ (shl GR64:$src2, (sub 64, CL:$amt))),
+ (SHRD64rrCL GR64:$src1, GR64:$src2)>;
+
+def : Pat<(store (or (srl (loadi64 addr:$dst), CL:$amt),
+ (shl GR64:$src2, (sub 64, CL:$amt))), addr:$dst),
+ (SHRD64mrCL addr:$dst, GR64:$src2)>;
+
+def : Pat<(or (srl GR64:$src1, (i8 (trunc RCX:$amt))),
+ (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
+ (SHRD64rrCL GR64:$src1, GR64:$src2)>;
+
+def : Pat<(store (or (srl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))),
+ (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
+ addr:$dst),
+ (SHRD64mrCL addr:$dst, GR64:$src2)>;
+
+def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),
+ (SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1),
+ GR64:$src2, (i8 imm:$amt2)), addr:$dst),
+ (SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
+
+// (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
+def : Pat<(or (shl GR64:$src1, CL:$amt),
+ (srl GR64:$src2, (sub 64, CL:$amt))),
+ (SHLD64rrCL GR64:$src1, GR64:$src2)>;
+
+def : Pat<(store (or (shl (loadi64 addr:$dst), CL:$amt),
+ (srl GR64:$src2, (sub 64, CL:$amt))), addr:$dst),
+ (SHLD64mrCL addr:$dst, GR64:$src2)>;
+
+def : Pat<(or (shl GR64:$src1, (i8 (trunc RCX:$amt))),
+ (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
+ (SHLD64rrCL GR64:$src1, GR64:$src2)>;
+
+def : Pat<(store (or (shl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))),
+ (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
+ addr:$dst),
+ (SHLD64mrCL addr:$dst, GR64:$src2)>;
+
+def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),
+ (SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1),
+ GR64:$src2, (i8 imm:$amt2)), addr:$dst),
+ (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
+
+// X86 specific add which produces a flag.
+def : Pat<(addc GR64:$src1, GR64:$src2),
+ (ADD64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(addc GR64:$src1, (load addr:$src2)),
+ (ADD64rm GR64:$src1, addr:$src2)>;
+def : Pat<(addc GR64:$src1, i64immSExt8:$src2),
+ (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(addc GR64:$src1, i64immSExt32:$src2),
+ (ADD64ri32 GR64:$src1, imm:$src2)>;
+
+def : Pat<(subc GR64:$src1, GR64:$src2),
+ (SUB64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(subc GR64:$src1, (load addr:$src2)),
+ (SUB64rm GR64:$src1, addr:$src2)>;
+def : Pat<(subc GR64:$src1, i64immSExt8:$src2),
+ (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(subc GR64:$src1, imm:$src2),
+ (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+//===----------------------------------------------------------------------===//
+// EFLAGS-defining Patterns
+//===----------------------------------------------------------------------===//
+
+// Register-Register Addition with EFLAGS result
+def : Pat<(parallel (X86add_flag GR64:$src1, GR64:$src2),
+ (implicit EFLAGS)),
+ (ADD64rr GR64:$src1, GR64:$src2)>;
+
+// Register-Integer Addition with EFLAGS result
+def : Pat<(parallel (X86add_flag GR64:$src1, i64immSExt8:$src2),
+ (implicit EFLAGS)),
+ (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(parallel (X86add_flag GR64:$src1, i64immSExt32:$src2),
+ (implicit EFLAGS)),
+ (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Register-Memory Addition with EFLAGS result
+def : Pat<(parallel (X86add_flag GR64:$src1, (loadi64 addr:$src2)),
+ (implicit EFLAGS)),
+ (ADD64rm GR64:$src1, addr:$src2)>;
+
+// Memory-Register Addition with EFLAGS result
+def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), GR64:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD64mr addr:$dst, GR64:$src2)>;
+def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), i64immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD64mi8 addr:$dst, i64immSExt8:$src2)>;
+def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), i64immSExt32:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD64mi32 addr:$dst, i64immSExt32:$src2)>;
+
+// Register-Register Subtraction with EFLAGS result
+def : Pat<(parallel (X86sub_flag GR64:$src1, GR64:$src2),
+ (implicit EFLAGS)),
+ (SUB64rr GR64:$src1, GR64:$src2)>;
+
+// Register-Memory Subtraction with EFLAGS result
+def : Pat<(parallel (X86sub_flag GR64:$src1, (loadi64 addr:$src2)),
+ (implicit EFLAGS)),
+ (SUB64rm GR64:$src1, addr:$src2)>;
+
+// Register-Integer Subtraction with EFLAGS result
+def : Pat<(parallel (X86sub_flag GR64:$src1, i64immSExt8:$src2),
+ (implicit EFLAGS)),
+ (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(parallel (X86sub_flag GR64:$src1, i64immSExt32:$src2),
+ (implicit EFLAGS)),
+ (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Memory-Register Subtraction with EFLAGS result
+def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), GR64:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB64mr addr:$dst, GR64:$src2)>;
+
+// Memory-Integer Subtraction with EFLAGS result
+def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), i64immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB64mi8 addr:$dst, i64immSExt8:$src2)>;
+def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), i64immSExt32:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB64mi32 addr:$dst, i64immSExt32:$src2)>;
+
+// Register-Register Signed Integer Multiplication with EFLAGS result
+def : Pat<(parallel (X86smul_flag GR64:$src1, GR64:$src2),
+ (implicit EFLAGS)),
+ (IMUL64rr GR64:$src1, GR64:$src2)>;
+
+// Register-Memory Signed Integer Multiplication with EFLAGS result
+def : Pat<(parallel (X86smul_flag GR64:$src1, (loadi64 addr:$src2)),
+ (implicit EFLAGS)),
+ (IMUL64rm GR64:$src1, addr:$src2)>;
+
+// Register-Integer Signed Integer Multiplication with EFLAGS result
+def : Pat<(parallel (X86smul_flag GR64:$src1, i64immSExt8:$src2),
+ (implicit EFLAGS)),
+ (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(parallel (X86smul_flag GR64:$src1, i64immSExt32:$src2),
+ (implicit EFLAGS)),
+ (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Memory-Integer Signed Integer Multiplication with EFLAGS result
+def : Pat<(parallel (X86smul_flag (loadi64 addr:$src1), i64immSExt8:$src2),
+ (implicit EFLAGS)),
+ (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
+def : Pat<(parallel (X86smul_flag (loadi64 addr:$src1), i64immSExt32:$src2),
+ (implicit EFLAGS)),
+ (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
+
+// INC and DEC with EFLAGS result. Note that these do not set CF.
+def : Pat<(parallel (X86inc_flag GR16:$src), (implicit EFLAGS)),
+ (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(parallel (store (i16 (X86inc_flag (loadi16 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (INC64_16m addr:$dst)>, Requires<[In64BitMode]>;
+def : Pat<(parallel (X86dec_flag GR16:$src), (implicit EFLAGS)),
+ (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(parallel (store (i16 (X86dec_flag (loadi16 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (DEC64_16m addr:$dst)>, Requires<[In64BitMode]>;
+
+def : Pat<(parallel (X86inc_flag GR32:$src), (implicit EFLAGS)),
+ (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(parallel (store (i32 (X86inc_flag (loadi32 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (INC64_32m addr:$dst)>, Requires<[In64BitMode]>;
+def : Pat<(parallel (X86dec_flag GR32:$src), (implicit EFLAGS)),
+ (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (DEC64_32m addr:$dst)>, Requires<[In64BitMode]>;
+
+def : Pat<(parallel (X86inc_flag GR64:$src), (implicit EFLAGS)),
+ (INC64r GR64:$src)>;
+def : Pat<(parallel (store (i64 (X86inc_flag (loadi64 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (INC64m addr:$dst)>;
+def : Pat<(parallel (X86dec_flag GR64:$src), (implicit EFLAGS)),
+ (DEC64r GR64:$src)>;
+def : Pat<(parallel (store (i64 (X86dec_flag (loadi64 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (DEC64m addr:$dst)>;
+
+//===----------------------------------------------------------------------===//
+// X86-64 SSE Instructions
+//===----------------------------------------------------------------------===//
+
+// Move instructions...
+
+def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector GR64:$src)))]>;
+def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
+ (iPTR 0)))]>;
+
+def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert GR64:$src))]>;
+def MOV64toSDrm : RPDI<0x6E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
+
+def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (bitconvert FR64:$src))]>;
+def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// X86-64 SSE4.1 Instructions
+//===----------------------------------------------------------------------===//
+
+/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
+multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR64:$dst,
+ (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(store (extractelt (v2i64 VR128:$src1), imm:$src2),
+ addr:$dst)]>, OpSize, REX_W;
+}
+
+defm PEXTRQ : SS41I_extract64<0x16, "pextrq">;
+
+let isTwoAddress = 1 in {
+ multiclass SS41I_insert64<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
+ OpSize, REX_W;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
+ imm:$src3)))]>, OpSize, REX_W;
+ }
+}
+
+defm PINSRQ : SS41I_insert64<0x22, "pinsrq">;
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
new file mode 100644
index 0000000..39504cd
--- /dev/null
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -0,0 +1,168 @@
+//===-- X86InstrBuilder.h - Functions to aid building x86 insts -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to handle X86'isms in a clean way.
+//
+// The BuildMem function may be used with the BuildMI function to add entire
+// memory references in a single, typed, function call. X86 memory references
+// can be very complex expressions (described in the README), so wrapping them
+// up behind an easier to use interface makes sense. Descriptions of the
+// functions are included below.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Base, Scale, Index, Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86INSTRBUILDER_H
+#define X86INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+
+namespace llvm {
+
+/// X86AddressMode - This struct holds a generalized full x86 address mode.
+/// The base register can be a frame index, which will eventually be replaced
+/// with BP or SP and Disp being offsetted accordingly. The displacement may
+/// also include the offset of a global value.
+struct X86AddressMode {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ union {
+ unsigned Reg;
+ int FrameIndex;
+ } Base;
+
+ unsigned Scale;
+ unsigned IndexReg;
+ unsigned Disp;
+ GlobalValue *GV;
+
+ X86AddressMode() : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0) {
+ Base.Reg = 0;
+ }
+};
+
+/// addDirectMem - This function is used to add a direct memory reference to the
+/// current instruction -- that is, a dereference of an address in a register,
+/// with no scale, index or displacement. An example is: DWORD PTR [EAX].
+///
+inline const MachineInstrBuilder &addDirectMem(const MachineInstrBuilder &MIB,
+ unsigned Reg) {
+ // Because memory references are always represented with four
+ // values, this adds: Reg, [1, NoReg, 0] to the instruction.
+ return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0);
+}
+
+inline const MachineInstrBuilder &addLeaOffset(const MachineInstrBuilder &MIB,
+ int Offset) {
+ return MIB.addImm(1).addReg(0).addImm(Offset);
+}
+
+inline const MachineInstrBuilder &addOffset(const MachineInstrBuilder &MIB,
+ int Offset) {
+ return addLeaOffset(MIB, Offset).addReg(0);
+}
+
+/// addRegOffset - This function is used to add a memory reference of the form
+/// [Reg + Offset], i.e., one with no scale or index, but with a
+/// displacement. An example is: DWORD PTR [EAX + 4].
+///
+inline const MachineInstrBuilder &addRegOffset(const MachineInstrBuilder &MIB,
+ unsigned Reg, bool isKill,
+ int Offset) {
+ return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
+}
+
+inline const MachineInstrBuilder &addLeaRegOffset(const MachineInstrBuilder &MIB,
+ unsigned Reg, bool isKill,
+ int Offset) {
+ return addLeaOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
+}
+
+/// addRegReg - This function is used to add a memory reference of the form:
+/// [Reg + Reg].
+inline const MachineInstrBuilder &addRegReg(const MachineInstrBuilder &MIB,
+ unsigned Reg1, bool isKill1,
+ unsigned Reg2, bool isKill2) {
+ return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(1)
+ .addReg(Reg2, getKillRegState(isKill2)).addImm(0);
+}
+
+inline const MachineInstrBuilder &addLeaAddress(const MachineInstrBuilder &MIB,
+ const X86AddressMode &AM) {
+ assert (AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8);
+
+ if (AM.BaseType == X86AddressMode::RegBase)
+ MIB.addReg(AM.Base.Reg);
+ else if (AM.BaseType == X86AddressMode::FrameIndexBase)
+ MIB.addFrameIndex(AM.Base.FrameIndex);
+ else
+ assert (0);
+ MIB.addImm(AM.Scale).addReg(AM.IndexReg);
+ if (AM.GV)
+ return MIB.addGlobalAddress(AM.GV, AM.Disp);
+ else
+ return MIB.addImm(AM.Disp);
+}
+
+inline const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB,
+ const X86AddressMode &AM) {
+ return addLeaAddress(MIB, AM).addReg(0);
+}
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+inline const MachineInstrBuilder &
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
+ MachineInstr *MI = MIB;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ const TargetInstrDesc &TID = MI->getDesc();
+ unsigned Flags = 0;
+ if (TID.mayLoad())
+ Flags |= MachineMemOperand::MOLoad;
+ if (TID.mayStore())
+ Flags |= MachineMemOperand::MOStore;
+ MachineMemOperand MMO(PseudoSourceValue::getFixedStack(FI),
+ Flags,
+ MFI.getObjectOffset(FI) + Offset,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ return addOffset(MIB.addFrameIndex(FI), Offset)
+ .addMemOperand(MMO);
+}
+
+/// addConstantPoolReference - This function is used to add a reference to the
+/// base of a constant value spilled to the per-function constant pool. The
+/// reference uses the abstract ConstantPoolIndex which is retained until
+/// either machine code emission or assembly output. In PIC mode on x86-32,
+/// the GlobalBaseReg parameter can be used to make this a
+/// GlobalBaseReg-relative reference.
+///
+inline const MachineInstrBuilder &
+addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI,
+ unsigned GlobalBaseReg = 0) {
+ //FIXME: factor this
+ return MIB.addReg(GlobalBaseReg).addImm(1).addReg(0)
+ .addConstantPoolIndex(CPI).addReg(0);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
new file mode 100644
index 0000000..bc7def4
--- /dev/null
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -0,0 +1,597 @@
+//==- X86InstrFPStack.td - Describe the X86 Instruction Set --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 x87 FPU instruction set, defining the
+// instructions, and properties of the instructions which are needed for code
+// generation, machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FPStack specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisVT<0, f80>,
+ SDTCisVT<1, f80>]>;
+def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisFP<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, OtherVT>]>;
+def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, OtherVT>]>;
+def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>,
+ SDTCisVT<2, OtherVT>]>;
+def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
+
+def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld,
+ [SDNPHasChain, SDNPMayLoad]>;
+def X86fst : SDNode<"X86ISD::FST", SDTX86Fst,
+ [SDNPHasChain, SDNPInFlag, SDNPMayStore]>;
+def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
+ [SDNPHasChain, SDNPMayLoad]>;
+def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
+ [SDNPHasChain, SDNPOutFlag, SDNPMayLoad]>;
+def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
+ [SDNPHasChain, SDNPMayStore]>;
+def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem,
+ [SDNPHasChain, SDNPMayStore]>;
+def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem,
+ [SDNPHasChain, SDNPMayStore]>;
+def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore,
+ [SDNPHasChain, SDNPMayStore, SDNPSideEffect]>;
+
+//===----------------------------------------------------------------------===//
+// FPStack pattern fragments
+//===----------------------------------------------------------------------===//
+
+def fpimm0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
+def fpimmneg0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(-0.0);
+}]>;
+
+def fpimm1 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+1.0);
+}]>;
+
+def fpimmneg1 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(-1.0);
+}]>;
+
+// Some 'special' instructions
+let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
+ def FP32_TO_INT16_IN_MEM : I<0, Pseudo,
+ (outs), (ins i16mem:$dst, RFP32:$src),
+ "##FP32_TO_INT16_IN_MEM PSEUDO!",
+ [(X86fp_to_i16mem RFP32:$src, addr:$dst)]>;
+ def FP32_TO_INT32_IN_MEM : I<0, Pseudo,
+ (outs), (ins i32mem:$dst, RFP32:$src),
+ "##FP32_TO_INT32_IN_MEM PSEUDO!",
+ [(X86fp_to_i32mem RFP32:$src, addr:$dst)]>;
+ def FP32_TO_INT64_IN_MEM : I<0, Pseudo,
+ (outs), (ins i64mem:$dst, RFP32:$src),
+ "##FP32_TO_INT64_IN_MEM PSEUDO!",
+ [(X86fp_to_i64mem RFP32:$src, addr:$dst)]>;
+ def FP64_TO_INT16_IN_MEM : I<0, Pseudo,
+ (outs), (ins i16mem:$dst, RFP64:$src),
+ "##FP64_TO_INT16_IN_MEM PSEUDO!",
+ [(X86fp_to_i16mem RFP64:$src, addr:$dst)]>;
+ def FP64_TO_INT32_IN_MEM : I<0, Pseudo,
+ (outs), (ins i32mem:$dst, RFP64:$src),
+ "##FP64_TO_INT32_IN_MEM PSEUDO!",
+ [(X86fp_to_i32mem RFP64:$src, addr:$dst)]>;
+ def FP64_TO_INT64_IN_MEM : I<0, Pseudo,
+ (outs), (ins i64mem:$dst, RFP64:$src),
+ "##FP64_TO_INT64_IN_MEM PSEUDO!",
+ [(X86fp_to_i64mem RFP64:$src, addr:$dst)]>;
+ def FP80_TO_INT16_IN_MEM : I<0, Pseudo,
+ (outs), (ins i16mem:$dst, RFP80:$src),
+ "##FP80_TO_INT16_IN_MEM PSEUDO!",
+ [(X86fp_to_i16mem RFP80:$src, addr:$dst)]>;
+ def FP80_TO_INT32_IN_MEM : I<0, Pseudo,
+ (outs), (ins i32mem:$dst, RFP80:$src),
+ "##FP80_TO_INT32_IN_MEM PSEUDO!",
+ [(X86fp_to_i32mem RFP80:$src, addr:$dst)]>;
+ def FP80_TO_INT64_IN_MEM : I<0, Pseudo,
+ (outs), (ins i64mem:$dst, RFP80:$src),
+ "##FP80_TO_INT64_IN_MEM PSEUDO!",
+ [(X86fp_to_i64mem RFP80:$src, addr:$dst)]>;
+}
+
+let isTerminator = 1 in
+ let Defs = [FP0, FP1, FP2, FP3, FP4, FP5, FP6] in
+ def FP_REG_KILL : I<0, Pseudo, (outs), (ins), "##FP_REG_KILL", []>;
+
+// All FP Stack operations are represented with four instructions here. The
+// first three instructions, generated by the instruction selector, use "RFP32"
+// "RFP64" or "RFP80" registers: traditional register files to reference 32-bit,
+// 64-bit or 80-bit floating point values. These sizes apply to the values,
+// not the registers, which are always 80 bits; RFP32, RFP64 and RFP80 can be
+// copied to each other without losing information. These instructions are all
+// pseudo instructions and use the "_Fp" suffix.
+// In some cases there are additional variants with a mixture of different
+// register sizes.
+// The second instruction is defined with FPI, which is the actual instruction
+// emitted by the assembler. These use "RST" registers, although frequently
+// the actual register(s) used are implicit. These are always 80 bits.
+// The FP stackifier pass converts one to the other after register allocation
+// occurs.
+//
+// Note that the FpI instruction should have instruction selection info (e.g.
+// a pattern) and the FPI instruction should have emission info (e.g. opcode
+// encoding and asm printing info).
+
+// Pseudo Instructions for FP stack return values.
+def FpGET_ST0_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, []>; // FPR = ST(0)
+def FpGET_ST0_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, []>; // FPR = ST(0)
+def FpGET_ST0_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FPR = ST(0)
+
+// FpGET_ST1* should only be issued *after* an FpGET_ST0* has been issued when
+// there are two values live out on the stack from a call or inlineasm. This
+// magic is handled by the stackifier. It is not valid to emit FpGET_ST1* and
+// then FpGET_ST0*. In addition, it is invalid for any FP-using operations to
+// occur between them.
+def FpGET_ST1_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, []>; // FPR = ST(1)
+def FpGET_ST1_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, []>; // FPR = ST(1)
+def FpGET_ST1_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FPR = ST(1)
+
+let Defs = [ST0] in {
+def FpSET_ST0_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP, []>; // ST(0) = FPR
+def FpSET_ST0_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(0) = FPR
+def FpSET_ST0_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(0) = FPR
+}
+
+let Defs = [ST1] in {
+def FpSET_ST1_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP, []>; // ST(1) = FPR
+def FpSET_ST1_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(1) = FPR
+def FpSET_ST1_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(1) = FPR
+}
+
+// FpIf32, FpIf64 - Floating Point Psuedo Instruction template.
+// f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1.
+// f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2.
+// f80 instructions cannot use SSE and use neither of these.
+class FpIf32<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+ FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32]>;
+class FpIf64<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+ FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64]>;
+
+// Register copies. Just copies, the shortening ones do not truncate.
+let neverHasSideEffects = 1 in {
+ def MOV_Fp3232 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), SpecialFP, []>;
+ def MOV_Fp3264 : FpIf32<(outs RFP64:$dst), (ins RFP32:$src), SpecialFP, []>;
+ def MOV_Fp6432 : FpIf32<(outs RFP32:$dst), (ins RFP64:$src), SpecialFP, []>;
+ def MOV_Fp6464 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), SpecialFP, []>;
+ def MOV_Fp8032 : FpIf32<(outs RFP32:$dst), (ins RFP80:$src), SpecialFP, []>;
+ def MOV_Fp3280 : FpIf32<(outs RFP80:$dst), (ins RFP32:$src), SpecialFP, []>;
+ def MOV_Fp8064 : FpIf64<(outs RFP64:$dst), (ins RFP80:$src), SpecialFP, []>;
+ def MOV_Fp6480 : FpIf64<(outs RFP80:$dst), (ins RFP64:$src), SpecialFP, []>;
+ def MOV_Fp8080 : FpI_ <(outs RFP80:$dst), (ins RFP80:$src), SpecialFP, []>;
+}
+
+// Factoring for arithmetic.
+multiclass FPBinary_rr<SDNode OpNode> {
+// Register op register -> register
+// These are separated out because they have no reversed form.
+def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), TwoArgFP,
+ [(set RFP32:$dst, (OpNode RFP32:$src1, RFP32:$src2))]>;
+def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), TwoArgFP,
+ [(set RFP64:$dst, (OpNode RFP64:$src1, RFP64:$src2))]>;
+def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP,
+ [(set RFP80:$dst, (OpNode RFP80:$src1, RFP80:$src2))]>;
+}
+// The FopST0 series are not included here because of the irregularities
+// in where the 'r' goes in assembly output.
+// These instructions cannot address 80-bit memory.
+multiclass FPBinary<SDNode OpNode, Format fp, string asmstring> {
+// ST(0) = ST(0) + [mem]
+def _Fp32m : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst,
+ (OpNode RFP32:$src1, (loadf32 addr:$src2)))]>;
+def _Fp64m : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, f64mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst,
+ (OpNode RFP64:$src1, (loadf64 addr:$src2)))]>;
+def _Fp64m32: FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst,
+ (OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2))))]>;
+def _Fp80m32: FpI_<(outs RFP80:$dst), (ins RFP80:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP80:$dst,
+ (OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2))))]>;
+def _Fp80m64: FpI_<(outs RFP80:$dst), (ins RFP80:$src1, f64mem:$src2), OneArgFPRW,
+ [(set RFP80:$dst,
+ (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2))))]>;
+def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src),
+ !strconcat("f", !strconcat(asmstring, "{s}\t$src"))> { let mayLoad = 1; }
+def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
+ !strconcat("f", !strconcat(asmstring, "{l}\t$src"))> { let mayLoad = 1; }
+// ST(0) = ST(0) + [memint]
+def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (OpNode RFP32:$src1,
+ (X86fild addr:$src2, i16)))]>;
+def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (OpNode RFP32:$src1,
+ (X86fild addr:$src2, i32)))]>;
+def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (OpNode RFP64:$src1,
+ (X86fild addr:$src2, i16)))]>;
+def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (OpNode RFP64:$src1,
+ (X86fild addr:$src2, i32)))]>;
+def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2), OneArgFPRW,
+ [(set RFP80:$dst, (OpNode RFP80:$src1,
+ (X86fild addr:$src2, i16)))]>;
+def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2), OneArgFPRW,
+ [(set RFP80:$dst, (OpNode RFP80:$src1,
+ (X86fild addr:$src2, i32)))]>;
+def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src),
+ !strconcat("fi", !strconcat(asmstring, "{s}\t$src"))> { let mayLoad = 1; }
+def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
+ !strconcat("fi", !strconcat(asmstring, "{l}\t$src"))> { let mayLoad = 1; }
+}
+
+defm ADD : FPBinary_rr<fadd>;
+defm SUB : FPBinary_rr<fsub>;
+defm MUL : FPBinary_rr<fmul>;
+defm DIV : FPBinary_rr<fdiv>;
+defm ADD : FPBinary<fadd, MRM0m, "add">;
+defm SUB : FPBinary<fsub, MRM4m, "sub">;
+defm SUBR: FPBinary<fsub ,MRM5m, "subr">;
+defm MUL : FPBinary<fmul, MRM1m, "mul">;
+defm DIV : FPBinary<fdiv, MRM6m, "div">;
+defm DIVR: FPBinary<fdiv, MRM7m, "divr">;
+
+class FPST0rInst<bits<8> o, string asm>
+ : FPI<o, AddRegFrm, (outs), (ins RST:$op), asm>, D8;
+class FPrST0Inst<bits<8> o, string asm>
+ : FPI<o, AddRegFrm, (outs), (ins RST:$op), asm>, DC;
+class FPrST0PInst<bits<8> o, string asm>
+ : FPI<o, AddRegFrm, (outs), (ins RST:$op), asm>, DE;
+
+// NOTE: GAS and apparently all other AT&T style assemblers have a broken notion
+// of some of the 'reverse' forms of the fsub and fdiv instructions. As such,
+// we have to put some 'r's in and take them out of weird places.
+def ADD_FST0r : FPST0rInst <0xC0, "fadd\t$op">;
+def ADD_FrST0 : FPrST0Inst <0xC0, "fadd\t{%st(0), $op|$op, %ST(0)}">;
+def ADD_FPrST0 : FPrST0PInst<0xC0, "faddp\t$op">;
+def SUBR_FST0r : FPST0rInst <0xE8, "fsubr\t$op">;
+def SUB_FrST0 : FPrST0Inst <0xE8, "fsub{r}\t{%st(0), $op|$op, %ST(0)}">;
+def SUB_FPrST0 : FPrST0PInst<0xE8, "fsub{r}p\t$op">;
+def SUB_FST0r : FPST0rInst <0xE0, "fsub\t$op">;
+def SUBR_FrST0 : FPrST0Inst <0xE0, "fsub{|r}\t{%st(0), $op|$op, %ST(0)}">;
+def SUBR_FPrST0 : FPrST0PInst<0xE0, "fsub{|r}p\t$op">;
+def MUL_FST0r : FPST0rInst <0xC8, "fmul\t$op">;
+def MUL_FrST0 : FPrST0Inst <0xC8, "fmul\t{%st(0), $op|$op, %ST(0)}">;
+def MUL_FPrST0 : FPrST0PInst<0xC8, "fmulp\t$op">;
+def DIVR_FST0r : FPST0rInst <0xF8, "fdivr\t$op">;
+def DIV_FrST0 : FPrST0Inst <0xF8, "fdiv{r}\t{%st(0), $op|$op, %ST(0)}">;
+def DIV_FPrST0 : FPrST0PInst<0xF8, "fdiv{r}p\t$op">;
+def DIV_FST0r : FPST0rInst <0xF0, "fdiv\t$op">;
+def DIVR_FrST0 : FPrST0Inst <0xF0, "fdiv{|r}\t{%st(0), $op|$op, %ST(0)}">;
+def DIVR_FPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p\t$op">;
+
+// Unary operations.
+multiclass FPUnary<SDNode OpNode, bits<8> opcode, string asmstring> {
+def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), OneArgFPRW,
+ [(set RFP32:$dst, (OpNode RFP32:$src))]>;
+def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), OneArgFPRW,
+ [(set RFP64:$dst, (OpNode RFP64:$src))]>;
+def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW,
+ [(set RFP80:$dst, (OpNode RFP80:$src))]>;
+def _F : FPI<opcode, RawFrm, (outs), (ins), asmstring>, D9;
+}
+
+defm CHS : FPUnary<fneg, 0xE0, "fchs">;
+defm ABS : FPUnary<fabs, 0xE1, "fabs">;
+defm SQRT: FPUnary<fsqrt,0xFA, "fsqrt">;
+defm SIN : FPUnary<fsin, 0xFE, "fsin">;
+defm COS : FPUnary<fcos, 0xFF, "fcos">;
+
+let neverHasSideEffects = 1 in {
+def TST_Fp32 : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>;
+def TST_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>;
+def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>;
+}
+def TST_F : FPI<0xE4, RawFrm, (outs), (ins), "ftst">, D9;
+
+// Floating point cmovs.
+multiclass FPCMov<PatLeaf cc> {
+ def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2),
+ CondMovFP,
+ [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
+ cc, EFLAGS))]>;
+ def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2),
+ CondMovFP,
+ [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
+ cc, EFLAGS))]>;
+ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2),
+ CondMovFP,
+ [(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2,
+ cc, EFLAGS))]>;
+}
+let Uses = [EFLAGS], isTwoAddress = 1 in {
+defm CMOVB : FPCMov<X86_COND_B>;
+defm CMOVBE : FPCMov<X86_COND_BE>;
+defm CMOVE : FPCMov<X86_COND_E>;
+defm CMOVP : FPCMov<X86_COND_P>;
+defm CMOVNB : FPCMov<X86_COND_AE>;
+defm CMOVNBE: FPCMov<X86_COND_A>;
+defm CMOVNE : FPCMov<X86_COND_NE>;
+defm CMOVNP : FPCMov<X86_COND_NP>;
+}
+
+// These are not factored because there's no clean way to pass DA/DB.
+def CMOVB_F : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovb\t{$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVBE_F : FPI<0xD0, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovbe\t{$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVE_F : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins),
+ "fcmove\t{$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovu\t {$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVNB_F : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovnb\t{$op, %st(0)|%ST(0), $op}">, DB;
+def CMOVNBE_F: FPI<0xD0, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovnbe\t{$op, %st(0)|%ST(0), $op}">, DB;
+def CMOVNE_F : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovne\t{$op, %st(0)|%ST(0), $op}">, DB;
+def CMOVNP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovnu\t{$op, %st(0)|%ST(0), $op}">, DB;
+
+// Floating point loads & stores.
+let canFoldAsLoad = 1 in {
+def LD_Fp32m : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (loadf32 addr:$src))]>;
+let isReMaterializable = 1, mayHaveSideEffects = 1 in
+ def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (loadf64 addr:$src))]>;
+def LD_Fp80m : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (loadf80 addr:$src))]>;
+}
+def LD_Fp32m64 : FpIf64<(outs RFP64:$dst), (ins f32mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (f64 (extloadf32 addr:$src)))]>;
+def LD_Fp64m80 : FpI_<(outs RFP80:$dst), (ins f64mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (f80 (extloadf64 addr:$src)))]>;
+def LD_Fp32m80 : FpI_<(outs RFP80:$dst), (ins f32mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (f80 (extloadf32 addr:$src)))]>;
+def ILD_Fp16m32: FpIf32<(outs RFP32:$dst), (ins i16mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m32: FpIf32<(outs RFP32:$dst), (ins i32mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m32: FpIf32<(outs RFP32:$dst), (ins i64mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (X86fild addr:$src, i64))]>;
+def ILD_Fp16m64: FpIf64<(outs RFP64:$dst), (ins i16mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m64: FpIf64<(outs RFP64:$dst), (ins i32mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m64: FpIf64<(outs RFP64:$dst), (ins i64mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (X86fild addr:$src, i64))]>;
+def ILD_Fp16m80: FpI_<(outs RFP80:$dst), (ins i16mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m80: FpI_<(outs RFP80:$dst), (ins i32mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (X86fild addr:$src, i64))]>;
+
+def ST_Fp32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP,
+ [(store RFP32:$src, addr:$op)]>;
+def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP,
+ [(truncstoref32 RFP64:$src, addr:$op)]>;
+def ST_Fp64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP,
+ [(store RFP64:$src, addr:$op)]>;
+def ST_Fp80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP,
+ [(truncstoref32 RFP80:$src, addr:$op)]>;
+def ST_Fp80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP,
+ [(truncstoref64 RFP80:$src, addr:$op)]>;
+// FST does not support 80-bit memory target; FSTP must be used.
+
+let mayStore = 1, neverHasSideEffects = 1 in {
+def ST_FpP32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, []>;
+def ST_FpP64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, []>;
+def ST_FpP64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, []>;
+def ST_FpP80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP, []>;
+def ST_FpP80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP, []>;
+}
+def ST_FpP80m : FpI_<(outs), (ins f80mem:$op, RFP80:$src), OneArgFP,
+ [(store RFP80:$src, addr:$op)]>;
+let mayStore = 1, neverHasSideEffects = 1 in {
+def IST_Fp16m32 : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp64m32 : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp16m64 : FpIf64<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp32m64 : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp64m64 : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>;
+def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
+def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
+}
+
+let mayLoad = 1 in {
+def LD_F32m : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src">;
+def LD_F64m : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src">;
+def LD_F80m : FPI<0xDB, MRM5m, (outs), (ins f80mem:$src), "fld{t}\t$src">;
+def ILD_F16m : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s}\t$src">;
+def ILD_F32m : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src">;
+def ILD_F64m : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src">;
+}
+let mayStore = 1 in {
+def ST_F32m : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst">;
+def ST_F64m : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst">;
+def ST_FP32m : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst">;
+def ST_FP64m : FPI<0xDD, MRM3m, (outs), (ins f64mem:$dst), "fstp{l}\t$dst">;
+def ST_FP80m : FPI<0xDB, MRM7m, (outs), (ins f80mem:$dst), "fstp{t}\t$dst">;
+def IST_F16m : FPI<0xDF, MRM2m, (outs), (ins i16mem:$dst), "fist{s}\t$dst">;
+def IST_F32m : FPI<0xDB, MRM2m, (outs), (ins i32mem:$dst), "fist{l}\t$dst">;
+def IST_FP16m : FPI<0xDF, MRM3m, (outs), (ins i16mem:$dst), "fistp{s}\t$dst">;
+def IST_FP32m : FPI<0xDB, MRM3m, (outs), (ins i32mem:$dst), "fistp{l}\t$dst">;
+def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">;
+}
+
+// FISTTP requires SSE3 even though it's a FPStack op.
+def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP,
+ [(X86fp_to_i16mem RFP32:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
+ [(X86fp_to_i32mem RFP32:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp64m32 : FpI_<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP,
+ [(X86fp_to_i64mem RFP32:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp16m64 : FpI_<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP,
+ [(X86fp_to_i16mem RFP64:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp32m64 : FpI_<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP,
+ [(X86fp_to_i32mem RFP64:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp64m64 : FpI_<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP,
+ [(X86fp_to_i64mem RFP64:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP,
+ [(X86fp_to_i16mem RFP80:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP,
+ [(X86fp_to_i32mem RFP80:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
+ [(X86fp_to_i64mem RFP80:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+
+let mayStore = 1 in {
+def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">;
+def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst">;
+def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), "fisttp{ll}\t$dst">;
+}
+
+// FP Stack manipulation instructions.
+def LD_Frr : FPI<0xC0, AddRegFrm, (outs), (ins RST:$op), "fld\t$op">, D9;
+def ST_Frr : FPI<0xD0, AddRegFrm, (outs), (ins RST:$op), "fst\t$op">, DD;
+def ST_FPrr : FPI<0xD8, AddRegFrm, (outs), (ins RST:$op), "fstp\t$op">, DD;
+def XCH_F : FPI<0xC8, AddRegFrm, (outs), (ins RST:$op), "fxch\t$op">, D9;
+
+// Floating point constant loads.
+let isReMaterializable = 1 in {
+def LD_Fp032 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
+ [(set RFP32:$dst, fpimm0)]>;
+def LD_Fp132 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
+ [(set RFP32:$dst, fpimm1)]>;
+def LD_Fp064 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
+ [(set RFP64:$dst, fpimm0)]>;
+def LD_Fp164 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
+ [(set RFP64:$dst, fpimm1)]>;
+def LD_Fp080 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
+ [(set RFP80:$dst, fpimm0)]>;
+def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
+ [(set RFP80:$dst, fpimm1)]>;
+}
+
+def LD_F0 : FPI<0xEE, RawFrm, (outs), (ins), "fldz">, D9;
+def LD_F1 : FPI<0xE8, RawFrm, (outs), (ins), "fld1">, D9;
+
+
+// Floating point compares.
+let Defs = [EFLAGS] in {
+def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+ []>; // FPSW = cmp ST(0) with ST(i)
+def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+ []>; // FPSW = cmp ST(0) with ST(i)
+def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
+ []>; // FPSW = cmp ST(0) with ST(i)
+
+def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+ [(X86cmp RFP32:$lhs, RFP32:$rhs),
+ (implicit EFLAGS)]>; // CC = ST(0) cmp ST(i)
+def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+ [(X86cmp RFP64:$lhs, RFP64:$rhs),
+ (implicit EFLAGS)]>; // CC = ST(0) cmp ST(i)
+def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
+ [(X86cmp RFP80:$lhs, RFP80:$rhs),
+ (implicit EFLAGS)]>; // CC = ST(0) cmp ST(i)
+}
+
+let Defs = [EFLAGS], Uses = [ST0] in {
+def UCOM_Fr : FPI<0xE0, AddRegFrm, // FPSW = cmp ST(0) with ST(i)
+ (outs), (ins RST:$reg),
+ "fucom\t$reg">, DD;
+def UCOM_FPr : FPI<0xE8, AddRegFrm, // FPSW = cmp ST(0) with ST(i), pop
+ (outs), (ins RST:$reg),
+ "fucomp\t$reg">, DD;
+def UCOM_FPPr : FPI<0xE9, RawFrm, // cmp ST(0) with ST(1), pop, pop
+ (outs), (ins),
+ "fucompp">, DA;
+
+def UCOM_FIr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i)
+ (outs), (ins RST:$reg),
+ "fucomi\t{$reg, %st(0)|%ST(0), $reg}">, DB;
+def UCOM_FIPr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i), pop
+ (outs), (ins RST:$reg),
+ "fucomip\t{$reg, %st(0)|%ST(0), $reg}">, DF;
+}
+
+// Floating point flag ops.
+let Defs = [AX] in
+def FNSTSW8r : I<0xE0, RawFrm, // AX = fp flags
+ (outs), (ins), "fnstsw", []>, DF;
+
+def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world
+ (outs), (ins i16mem:$dst), "fnstcw\t$dst",
+ [(X86fp_cwd_get16 addr:$dst)]>;
+
+let mayLoad = 1 in
+def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16]
+ (outs), (ins i16mem:$dst), "fldcw\t$dst", []>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Required for RET of f32 / f64 / f80 values.
+def : Pat<(X86fld addr:$src, f32), (LD_Fp32m addr:$src)>;
+def : Pat<(X86fld addr:$src, f64), (LD_Fp64m addr:$src)>;
+def : Pat<(X86fld addr:$src, f80), (LD_Fp80m addr:$src)>;
+
+// Required for CALL which return f32 / f64 / f80 values.
+def : Pat<(X86fst RFP32:$src, addr:$op, f32), (ST_Fp32m addr:$op, RFP32:$src)>;
+def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op, RFP64:$src)>;
+def : Pat<(X86fst RFP64:$src, addr:$op, f64), (ST_Fp64m addr:$op, RFP64:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op, RFP80:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op, RFP80:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op, RFP80:$src)>;
+
+// Floating point constant -0.0 and -1.0
+def : Pat<(f32 fpimmneg0), (CHS_Fp32 (LD_Fp032))>, Requires<[FPStackf32]>;
+def : Pat<(f32 fpimmneg1), (CHS_Fp32 (LD_Fp132))>, Requires<[FPStackf32]>;
+def : Pat<(f64 fpimmneg0), (CHS_Fp64 (LD_Fp064))>, Requires<[FPStackf64]>;
+def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStackf64]>;
+def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>;
+def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
+
+// Used to conv. i64 to f64 since there isn't a SSE version.
+def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>;
+
+// FP extensions map onto simple pseudo-value conversions if they are to/from
+// the FP stack.
+def : Pat<(f64 (fextend RFP32:$src)), (MOV_Fp3264 RFP32:$src)>,
+ Requires<[FPStackf32]>;
+def : Pat<(f80 (fextend RFP32:$src)), (MOV_Fp3280 RFP32:$src)>,
+ Requires<[FPStackf32]>;
+def : Pat<(f80 (fextend RFP64:$src)), (MOV_Fp6480 RFP64:$src)>,
+ Requires<[FPStackf64]>;
+
+// FP truncations map onto simple pseudo-value conversions if they are to/from
+// the FP stack. We have validated that only value-preserving truncations make
+// it through isel.
+def : Pat<(f32 (fround RFP64:$src)), (MOV_Fp6432 RFP64:$src)>,
+ Requires<[FPStackf32]>;
+def : Pat<(f32 (fround RFP80:$src)), (MOV_Fp8032 RFP80:$src)>,
+ Requires<[FPStackf32]>;
+def : Pat<(f64 (fround RFP80:$src)), (MOV_Fp8064 RFP80:$src)>,
+ Requires<[FPStackf64]>;
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
new file mode 100644
index 0000000..eeed5bd
--- /dev/null
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -0,0 +1,285 @@
+//===- X86InstrFormats.td - X86 Instruction Formats --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Format Definitions.
+//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<6> val> {
+ bits<6> Value = val;
+}
+
+def Pseudo : Format<0>; def RawFrm : Format<1>;
+def AddRegFrm : Format<2>; def MRMDestReg : Format<3>;
+def MRMDestMem : Format<4>; def MRMSrcReg : Format<5>;
+def MRMSrcMem : Format<6>;
+def MRM0r : Format<16>; def MRM1r : Format<17>; def MRM2r : Format<18>;
+def MRM3r : Format<19>; def MRM4r : Format<20>; def MRM5r : Format<21>;
+def MRM6r : Format<22>; def MRM7r : Format<23>;
+def MRM0m : Format<24>; def MRM1m : Format<25>; def MRM2m : Format<26>;
+def MRM3m : Format<27>; def MRM4m : Format<28>; def MRM5m : Format<29>;
+def MRM6m : Format<30>; def MRM7m : Format<31>;
+def MRMInitReg : Format<32>;
+
+
+// ImmType - This specifies the immediate type used by an instruction. This is
+// part of the ad-hoc solution used to emit machine instruction encodings by our
+// machine code emitter.
+class ImmType<bits<3> val> {
+ bits<3> Value = val;
+}
+def NoImm : ImmType<0>;
+def Imm8 : ImmType<1>;
+def Imm16 : ImmType<2>;
+def Imm32 : ImmType<3>;
+def Imm64 : ImmType<4>;
+
+// FPFormat - This specifies what form this FP instruction has. This is used by
+// the Floating-Point stackifier pass.
+class FPFormat<bits<3> val> {
+ bits<3> Value = val;
+}
+def NotFP : FPFormat<0>;
+def ZeroArgFP : FPFormat<1>;
+def OneArgFP : FPFormat<2>;
+def OneArgFPRW : FPFormat<3>;
+def TwoArgFP : FPFormat<4>;
+def CompareFP : FPFormat<5>;
+def CondMovFP : FPFormat<6>;
+def SpecialFP : FPFormat<7>;
+
+// Prefix byte classes which are used to indicate to the ad-hoc machine code
+// emitter that various prefix bytes are required.
+class OpSize { bit hasOpSizePrefix = 1; }
+class AdSize { bit hasAdSizePrefix = 1; }
+class REX_W { bit hasREX_WPrefix = 1; }
+class LOCK { bit hasLockPrefix = 1; }
+class SegFS { bits<2> SegOvrBits = 1; }
+class SegGS { bits<2> SegOvrBits = 2; }
+class TB { bits<4> Prefix = 1; }
+class REP { bits<4> Prefix = 2; }
+class D8 { bits<4> Prefix = 3; }
+class D9 { bits<4> Prefix = 4; }
+class DA { bits<4> Prefix = 5; }
+class DB { bits<4> Prefix = 6; }
+class DC { bits<4> Prefix = 7; }
+class DD { bits<4> Prefix = 8; }
+class DE { bits<4> Prefix = 9; }
+class DF { bits<4> Prefix = 10; }
+class XD { bits<4> Prefix = 11; }
+class XS { bits<4> Prefix = 12; }
+class T8 { bits<4> Prefix = 13; }
+class TA { bits<4> Prefix = 14; }
+
+class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
+ string AsmStr>
+ : Instruction {
+ let Namespace = "X86";
+
+ bits<8> Opcode = opcod;
+ Format Form = f;
+ bits<6> FormBits = Form.Value;
+ ImmType ImmT = i;
+ bits<3> ImmTypeBits = ImmT.Value;
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ string AsmString = AsmStr;
+
+ //
+ // Attributes specific to X86 instructions...
+ //
+ bit hasOpSizePrefix = 0; // Does this inst have a 0x66 prefix?
+ bit hasAdSizePrefix = 0; // Does this inst have a 0x67 prefix?
+
+ bits<4> Prefix = 0; // Which prefix byte does this inst have?
+ bit hasREX_WPrefix = 0; // Does this inst requires the REX.W prefix?
+ FPFormat FPForm; // What flavor of FP instruction is this?
+ bits<3> FPFormBits = 0;
+ bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
+ bits<2> SegOvrBits = 0; // Segment override prefix.
+}
+
+class I<bits<8> o, Format f, dag outs, dag ins, string asm, list<dag> pattern>
+ : X86Inst<o, f, NoImm, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm, list<dag> pattern>
+ : X86Inst<o, f, Imm8 , outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm, list<dag> pattern>
+ : X86Inst<o, f, Imm16, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm, list<dag> pattern>
+ : X86Inst<o, f, Imm32, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+// FPStack Instruction Templates:
+// FPI - Floating Point Instruction template.
+class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
+ : I<o, F, outs, ins, asm, []> {}
+
+// FpI_ - Floating Point Psuedo Instruction template. Not Predicated.
+class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
+ : X86Inst<0, Pseudo, NoImm, outs, ins, ""> {
+ let FPForm = fp; let FPFormBits = FPForm.Value;
+ let Pattern = pattern;
+}
+
+// SSE1 Instruction Templates:
+//
+// SSI - SSE1 instructions with XS prefix.
+// PSI - SSE1 instructions with TB prefix.
+// PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
+
+class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
+class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
+class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasSSE1]>;
+class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, TB, Requires<[HasSSE1]>;
+
+// SSE2 Instruction Templates:
+//
+// SDI - SSE2 instructions with XD prefix.
+// SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix.
+// SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
+// PDI - SSE2 instructions with TB and OpSize prefixes.
+// PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
+
+class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>;
+class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>;
+class SSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
+class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
+class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
+
+// SSE3 Instruction Templates:
+//
+// S3I - SSE3 instructions with TB and OpSize prefixes.
+// S3SI - SSE3 instructions with XS prefix.
+// S3DI - SSE3 instructions with XD prefix.
+
+class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE3]>;
+class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE3]>;
+class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE3]>;
+
+
+// SSSE3 Instruction Templates:
+//
+// SS38I - SSSE3 instructions with T8 prefix.
+// SS3AI - SSSE3 instructions with TA prefix.
+//
+// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version
+// uses the MMX registers. We put those instructions here because they better
+// fit into the SSSE3 instruction category rather than the MMX category.
+
+class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSSE3]>;
+class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSSE3]>;
+
+// SSE4.1 Instruction Templates:
+//
+// SS48I - SSE 4.1 instructions with T8 prefix.
+// SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8.
+//
+class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE41]>;
+class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE41]>;
+
+// SSE4.2 Instruction Templates:
+//
+// SS428I - SSE 4.2 instructions with T8 prefix.
+class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE42]>;
+
+// X86-64 Instruction templates...
+//
+
+class RI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii32<o, F, outs, ins, asm, pattern>, REX_W;
+
+class RIi64<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm64, outs, ins, asm>, REX_W {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+class RSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : SSI<o, F, outs, ins, asm, pattern>, REX_W;
+class RSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : SDI<o, F, outs, ins, asm, pattern>, REX_W;
+class RPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : PDI<o, F, outs, ins, asm, pattern>, REX_W;
+
+// MMX Instruction templates
+//
+
+// MMXI - MMX instructions with TB prefix.
+// MMXI64 - MMX instructions with TB prefix valid only in 64 bit mode.
+// MMX2I - MMX / SSE2 instructions with TB and OpSize prefixes.
+// MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix.
+// MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix.
+// MMXID - MMX instructions with XD prefix.
+// MMXIS - MMX instructions with XS prefix.
+class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>;
+class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX,In64BitMode]>;
+class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, REX_W, Requires<[HasMMX]>;
+class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasMMX]>;
+class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>;
+class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>;
+class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;
+
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
new file mode 100644
index 0000000..2cd3733
--- /dev/null
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -0,0 +1,3227 @@
+//===- X86InstrInfo.cpp - X86 Instruction Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstrInfo.h"
+#include "X86.h"
+#include "X86GenInstrInfo.inc"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetAsmInfo.h"
+
+using namespace llvm;
+
+namespace {
+ cl::opt<bool>
+ NoFusing("disable-spill-fusing",
+ cl::desc("Disable fusing of spill code into instructions"));
+ cl::opt<bool>
+ PrintFailedFusing("print-failed-fuse-candidates",
+ cl::desc("Print instructions that the allocator wants to"
+ " fuse, but the X86 backend currently can't"),
+ cl::Hidden);
+ cl::opt<bool>
+ ReMatPICStubLoad("remat-pic-stub-load",
+ cl::desc("Re-materialize load from stub in PIC mode"),
+ cl::init(false), cl::Hidden);
+}
+
+X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
+ : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)),
+ TM(tm), RI(tm, *this) {
+ SmallVector<unsigned,16> AmbEntries;
+ static const unsigned OpTbl2Addr[][2] = {
+ { X86::ADC32ri, X86::ADC32mi },
+ { X86::ADC32ri8, X86::ADC32mi8 },
+ { X86::ADC32rr, X86::ADC32mr },
+ { X86::ADC64ri32, X86::ADC64mi32 },
+ { X86::ADC64ri8, X86::ADC64mi8 },
+ { X86::ADC64rr, X86::ADC64mr },
+ { X86::ADD16ri, X86::ADD16mi },
+ { X86::ADD16ri8, X86::ADD16mi8 },
+ { X86::ADD16rr, X86::ADD16mr },
+ { X86::ADD32ri, X86::ADD32mi },
+ { X86::ADD32ri8, X86::ADD32mi8 },
+ { X86::ADD32rr, X86::ADD32mr },
+ { X86::ADD64ri32, X86::ADD64mi32 },
+ { X86::ADD64ri8, X86::ADD64mi8 },
+ { X86::ADD64rr, X86::ADD64mr },
+ { X86::ADD8ri, X86::ADD8mi },
+ { X86::ADD8rr, X86::ADD8mr },
+ { X86::AND16ri, X86::AND16mi },
+ { X86::AND16ri8, X86::AND16mi8 },
+ { X86::AND16rr, X86::AND16mr },
+ { X86::AND32ri, X86::AND32mi },
+ { X86::AND32ri8, X86::AND32mi8 },
+ { X86::AND32rr, X86::AND32mr },
+ { X86::AND64ri32, X86::AND64mi32 },
+ { X86::AND64ri8, X86::AND64mi8 },
+ { X86::AND64rr, X86::AND64mr },
+ { X86::AND8ri, X86::AND8mi },
+ { X86::AND8rr, X86::AND8mr },
+ { X86::DEC16r, X86::DEC16m },
+ { X86::DEC32r, X86::DEC32m },
+ { X86::DEC64_16r, X86::DEC64_16m },
+ { X86::DEC64_32r, X86::DEC64_32m },
+ { X86::DEC64r, X86::DEC64m },
+ { X86::DEC8r, X86::DEC8m },
+ { X86::INC16r, X86::INC16m },
+ { X86::INC32r, X86::INC32m },
+ { X86::INC64_16r, X86::INC64_16m },
+ { X86::INC64_32r, X86::INC64_32m },
+ { X86::INC64r, X86::INC64m },
+ { X86::INC8r, X86::INC8m },
+ { X86::NEG16r, X86::NEG16m },
+ { X86::NEG32r, X86::NEG32m },
+ { X86::NEG64r, X86::NEG64m },
+ { X86::NEG8r, X86::NEG8m },
+ { X86::NOT16r, X86::NOT16m },
+ { X86::NOT32r, X86::NOT32m },
+ { X86::NOT64r, X86::NOT64m },
+ { X86::NOT8r, X86::NOT8m },
+ { X86::OR16ri, X86::OR16mi },
+ { X86::OR16ri8, X86::OR16mi8 },
+ { X86::OR16rr, X86::OR16mr },
+ { X86::OR32ri, X86::OR32mi },
+ { X86::OR32ri8, X86::OR32mi8 },
+ { X86::OR32rr, X86::OR32mr },
+ { X86::OR64ri32, X86::OR64mi32 },
+ { X86::OR64ri8, X86::OR64mi8 },
+ { X86::OR64rr, X86::OR64mr },
+ { X86::OR8ri, X86::OR8mi },
+ { X86::OR8rr, X86::OR8mr },
+ { X86::ROL16r1, X86::ROL16m1 },
+ { X86::ROL16rCL, X86::ROL16mCL },
+ { X86::ROL16ri, X86::ROL16mi },
+ { X86::ROL32r1, X86::ROL32m1 },
+ { X86::ROL32rCL, X86::ROL32mCL },
+ { X86::ROL32ri, X86::ROL32mi },
+ { X86::ROL64r1, X86::ROL64m1 },
+ { X86::ROL64rCL, X86::ROL64mCL },
+ { X86::ROL64ri, X86::ROL64mi },
+ { X86::ROL8r1, X86::ROL8m1 },
+ { X86::ROL8rCL, X86::ROL8mCL },
+ { X86::ROL8ri, X86::ROL8mi },
+ { X86::ROR16r1, X86::ROR16m1 },
+ { X86::ROR16rCL, X86::ROR16mCL },
+ { X86::ROR16ri, X86::ROR16mi },
+ { X86::ROR32r1, X86::ROR32m1 },
+ { X86::ROR32rCL, X86::ROR32mCL },
+ { X86::ROR32ri, X86::ROR32mi },
+ { X86::ROR64r1, X86::ROR64m1 },
+ { X86::ROR64rCL, X86::ROR64mCL },
+ { X86::ROR64ri, X86::ROR64mi },
+ { X86::ROR8r1, X86::ROR8m1 },
+ { X86::ROR8rCL, X86::ROR8mCL },
+ { X86::ROR8ri, X86::ROR8mi },
+ { X86::SAR16r1, X86::SAR16m1 },
+ { X86::SAR16rCL, X86::SAR16mCL },
+ { X86::SAR16ri, X86::SAR16mi },
+ { X86::SAR32r1, X86::SAR32m1 },
+ { X86::SAR32rCL, X86::SAR32mCL },
+ { X86::SAR32ri, X86::SAR32mi },
+ { X86::SAR64r1, X86::SAR64m1 },
+ { X86::SAR64rCL, X86::SAR64mCL },
+ { X86::SAR64ri, X86::SAR64mi },
+ { X86::SAR8r1, X86::SAR8m1 },
+ { X86::SAR8rCL, X86::SAR8mCL },
+ { X86::SAR8ri, X86::SAR8mi },
+ { X86::SBB32ri, X86::SBB32mi },
+ { X86::SBB32ri8, X86::SBB32mi8 },
+ { X86::SBB32rr, X86::SBB32mr },
+ { X86::SBB64ri32, X86::SBB64mi32 },
+ { X86::SBB64ri8, X86::SBB64mi8 },
+ { X86::SBB64rr, X86::SBB64mr },
+ { X86::SHL16rCL, X86::SHL16mCL },
+ { X86::SHL16ri, X86::SHL16mi },
+ { X86::SHL32rCL, X86::SHL32mCL },
+ { X86::SHL32ri, X86::SHL32mi },
+ { X86::SHL64rCL, X86::SHL64mCL },
+ { X86::SHL64ri, X86::SHL64mi },
+ { X86::SHL8rCL, X86::SHL8mCL },
+ { X86::SHL8ri, X86::SHL8mi },
+ { X86::SHLD16rrCL, X86::SHLD16mrCL },
+ { X86::SHLD16rri8, X86::SHLD16mri8 },
+ { X86::SHLD32rrCL, X86::SHLD32mrCL },
+ { X86::SHLD32rri8, X86::SHLD32mri8 },
+ { X86::SHLD64rrCL, X86::SHLD64mrCL },
+ { X86::SHLD64rri8, X86::SHLD64mri8 },
+ { X86::SHR16r1, X86::SHR16m1 },
+ { X86::SHR16rCL, X86::SHR16mCL },
+ { X86::SHR16ri, X86::SHR16mi },
+ { X86::SHR32r1, X86::SHR32m1 },
+ { X86::SHR32rCL, X86::SHR32mCL },
+ { X86::SHR32ri, X86::SHR32mi },
+ { X86::SHR64r1, X86::SHR64m1 },
+ { X86::SHR64rCL, X86::SHR64mCL },
+ { X86::SHR64ri, X86::SHR64mi },
+ { X86::SHR8r1, X86::SHR8m1 },
+ { X86::SHR8rCL, X86::SHR8mCL },
+ { X86::SHR8ri, X86::SHR8mi },
+ { X86::SHRD16rrCL, X86::SHRD16mrCL },
+ { X86::SHRD16rri8, X86::SHRD16mri8 },
+ { X86::SHRD32rrCL, X86::SHRD32mrCL },
+ { X86::SHRD32rri8, X86::SHRD32mri8 },
+ { X86::SHRD64rrCL, X86::SHRD64mrCL },
+ { X86::SHRD64rri8, X86::SHRD64mri8 },
+ { X86::SUB16ri, X86::SUB16mi },
+ { X86::SUB16ri8, X86::SUB16mi8 },
+ { X86::SUB16rr, X86::SUB16mr },
+ { X86::SUB32ri, X86::SUB32mi },
+ { X86::SUB32ri8, X86::SUB32mi8 },
+ { X86::SUB32rr, X86::SUB32mr },
+ { X86::SUB64ri32, X86::SUB64mi32 },
+ { X86::SUB64ri8, X86::SUB64mi8 },
+ { X86::SUB64rr, X86::SUB64mr },
+ { X86::SUB8ri, X86::SUB8mi },
+ { X86::SUB8rr, X86::SUB8mr },
+ { X86::XOR16ri, X86::XOR16mi },
+ { X86::XOR16ri8, X86::XOR16mi8 },
+ { X86::XOR16rr, X86::XOR16mr },
+ { X86::XOR32ri, X86::XOR32mi },
+ { X86::XOR32ri8, X86::XOR32mi8 },
+ { X86::XOR32rr, X86::XOR32mr },
+ { X86::XOR64ri32, X86::XOR64mi32 },
+ { X86::XOR64ri8, X86::XOR64mi8 },
+ { X86::XOR64rr, X86::XOR64mr },
+ { X86::XOR8ri, X86::XOR8mi },
+ { X86::XOR8rr, X86::XOR8mr }
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
+ unsigned RegOp = OpTbl2Addr[i][0];
+ unsigned MemOp = OpTbl2Addr[i][1];
+ if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp,
+ MemOp)).second)
+ assert(false && "Duplicated entries?");
+ unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); // Index 0,folded load and store
+ if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
+ std::make_pair(RegOp,
+ AuxInfo))).second)
+ AmbEntries.push_back(MemOp);
+ }
+
+ // If the third value is 1, then it's folding either a load or a store.
+ static const unsigned OpTbl0[][3] = {
+ { X86::BT16ri8, X86::BT16mi8, 1 },
+ { X86::BT32ri8, X86::BT32mi8, 1 },
+ { X86::BT64ri8, X86::BT64mi8, 1 },
+ { X86::CALL32r, X86::CALL32m, 1 },
+ { X86::CALL64r, X86::CALL64m, 1 },
+ { X86::CMP16ri, X86::CMP16mi, 1 },
+ { X86::CMP16ri8, X86::CMP16mi8, 1 },
+ { X86::CMP16rr, X86::CMP16mr, 1 },
+ { X86::CMP32ri, X86::CMP32mi, 1 },
+ { X86::CMP32ri8, X86::CMP32mi8, 1 },
+ { X86::CMP32rr, X86::CMP32mr, 1 },
+ { X86::CMP64ri32, X86::CMP64mi32, 1 },
+ { X86::CMP64ri8, X86::CMP64mi8, 1 },
+ { X86::CMP64rr, X86::CMP64mr, 1 },
+ { X86::CMP8ri, X86::CMP8mi, 1 },
+ { X86::CMP8rr, X86::CMP8mr, 1 },
+ { X86::DIV16r, X86::DIV16m, 1 },
+ { X86::DIV32r, X86::DIV32m, 1 },
+ { X86::DIV64r, X86::DIV64m, 1 },
+ { X86::DIV8r, X86::DIV8m, 1 },
+ { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0 },
+ { X86::FsMOVAPDrr, X86::MOVSDmr, 0 },
+ { X86::FsMOVAPSrr, X86::MOVSSmr, 0 },
+ { X86::IDIV16r, X86::IDIV16m, 1 },
+ { X86::IDIV32r, X86::IDIV32m, 1 },
+ { X86::IDIV64r, X86::IDIV64m, 1 },
+ { X86::IDIV8r, X86::IDIV8m, 1 },
+ { X86::IMUL16r, X86::IMUL16m, 1 },
+ { X86::IMUL32r, X86::IMUL32m, 1 },
+ { X86::IMUL64r, X86::IMUL64m, 1 },
+ { X86::IMUL8r, X86::IMUL8m, 1 },
+ { X86::JMP32r, X86::JMP32m, 1 },
+ { X86::JMP64r, X86::JMP64m, 1 },
+ { X86::MOV16ri, X86::MOV16mi, 0 },
+ { X86::MOV16rr, X86::MOV16mr, 0 },
+ { X86::MOV32ri, X86::MOV32mi, 0 },
+ { X86::MOV32rr, X86::MOV32mr, 0 },
+ { X86::MOV64ri32, X86::MOV64mi32, 0 },
+ { X86::MOV64rr, X86::MOV64mr, 0 },
+ { X86::MOV8ri, X86::MOV8mi, 0 },
+ { X86::MOV8rr, X86::MOV8mr, 0 },
+ { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0 },
+ { X86::MOVAPDrr, X86::MOVAPDmr, 0 },
+ { X86::MOVAPSrr, X86::MOVAPSmr, 0 },
+ { X86::MOVDQArr, X86::MOVDQAmr, 0 },
+ { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0 },
+ { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0 },
+ { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0 },
+ { X86::MOVSDrr, X86::MOVSDmr, 0 },
+ { X86::MOVSDto64rr, X86::MOVSDto64mr, 0 },
+ { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0 },
+ { X86::MOVSSrr, X86::MOVSSmr, 0 },
+ { X86::MOVUPDrr, X86::MOVUPDmr, 0 },
+ { X86::MOVUPSrr, X86::MOVUPSmr, 0 },
+ { X86::MUL16r, X86::MUL16m, 1 },
+ { X86::MUL32r, X86::MUL32m, 1 },
+ { X86::MUL64r, X86::MUL64m, 1 },
+ { X86::MUL8r, X86::MUL8m, 1 },
+ { X86::SETAEr, X86::SETAEm, 0 },
+ { X86::SETAr, X86::SETAm, 0 },
+ { X86::SETBEr, X86::SETBEm, 0 },
+ { X86::SETBr, X86::SETBm, 0 },
+ { X86::SETEr, X86::SETEm, 0 },
+ { X86::SETGEr, X86::SETGEm, 0 },
+ { X86::SETGr, X86::SETGm, 0 },
+ { X86::SETLEr, X86::SETLEm, 0 },
+ { X86::SETLr, X86::SETLm, 0 },
+ { X86::SETNEr, X86::SETNEm, 0 },
+ { X86::SETNOr, X86::SETNOm, 0 },
+ { X86::SETNPr, X86::SETNPm, 0 },
+ { X86::SETNSr, X86::SETNSm, 0 },
+ { X86::SETOr, X86::SETOm, 0 },
+ { X86::SETPr, X86::SETPm, 0 },
+ { X86::SETSr, X86::SETSm, 0 },
+ { X86::TAILJMPr, X86::TAILJMPm, 1 },
+ { X86::TEST16ri, X86::TEST16mi, 1 },
+ { X86::TEST32ri, X86::TEST32mi, 1 },
+ { X86::TEST64ri32, X86::TEST64mi32, 1 },
+ { X86::TEST8ri, X86::TEST8mi, 1 }
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
+ unsigned RegOp = OpTbl0[i][0];
+ unsigned MemOp = OpTbl0[i][1];
+ if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp,
+ MemOp)).second)
+ assert(false && "Duplicated entries?");
+ unsigned FoldedLoad = OpTbl0[i][2];
+ // Index 0, folded load or store.
+ unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5);
+ if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr)
+ if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
+ std::make_pair(RegOp, AuxInfo))).second)
+ AmbEntries.push_back(MemOp);
+ }
+
+ static const unsigned OpTbl1[][2] = {
+ { X86::CMP16rr, X86::CMP16rm },
+ { X86::CMP32rr, X86::CMP32rm },
+ { X86::CMP64rr, X86::CMP64rm },
+ { X86::CMP8rr, X86::CMP8rm },
+ { X86::CVTSD2SSrr, X86::CVTSD2SSrm },
+ { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm },
+ { X86::CVTSI2SDrr, X86::CVTSI2SDrm },
+ { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm },
+ { X86::CVTSI2SSrr, X86::CVTSI2SSrm },
+ { X86::CVTSS2SDrr, X86::CVTSS2SDrm },
+ { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm },
+ { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm },
+ { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm },
+ { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm },
+ { X86::FsMOVAPDrr, X86::MOVSDrm },
+ { X86::FsMOVAPSrr, X86::MOVSSrm },
+ { X86::IMUL16rri, X86::IMUL16rmi },
+ { X86::IMUL16rri8, X86::IMUL16rmi8 },
+ { X86::IMUL32rri, X86::IMUL32rmi },
+ { X86::IMUL32rri8, X86::IMUL32rmi8 },
+ { X86::IMUL64rri32, X86::IMUL64rmi32 },
+ { X86::IMUL64rri8, X86::IMUL64rmi8 },
+ { X86::Int_CMPSDrr, X86::Int_CMPSDrm },
+ { X86::Int_CMPSSrr, X86::Int_CMPSSrm },
+ { X86::Int_COMISDrr, X86::Int_COMISDrm },
+ { X86::Int_COMISSrr, X86::Int_COMISSrm },
+ { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm },
+ { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm },
+ { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm },
+ { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm },
+ { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm },
+ { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm },
+ { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm },
+ { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm },
+ { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm },
+ { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm },
+ { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm },
+ { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm },
+ { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm },
+ { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm },
+ { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm },
+ { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm },
+ { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm },
+ { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm },
+ { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm },
+ { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm },
+ { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm },
+ { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm },
+ { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm },
+ { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm },
+ { X86::MOV16rr, X86::MOV16rm },
+ { X86::MOV32rr, X86::MOV32rm },
+ { X86::MOV64rr, X86::MOV64rm },
+ { X86::MOV64toPQIrr, X86::MOVQI2PQIrm },
+ { X86::MOV64toSDrr, X86::MOV64toSDrm },
+ { X86::MOV8rr, X86::MOV8rm },
+ { X86::MOVAPDrr, X86::MOVAPDrm },
+ { X86::MOVAPSrr, X86::MOVAPSrm },
+ { X86::MOVDDUPrr, X86::MOVDDUPrm },
+ { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm },
+ { X86::MOVDI2SSrr, X86::MOVDI2SSrm },
+ { X86::MOVDQArr, X86::MOVDQArm },
+ { X86::MOVSD2PDrr, X86::MOVSD2PDrm },
+ { X86::MOVSDrr, X86::MOVSDrm },
+ { X86::MOVSHDUPrr, X86::MOVSHDUPrm },
+ { X86::MOVSLDUPrr, X86::MOVSLDUPrm },
+ { X86::MOVSS2PSrr, X86::MOVSS2PSrm },
+ { X86::MOVSSrr, X86::MOVSSrm },
+ { X86::MOVSX16rr8, X86::MOVSX16rm8 },
+ { X86::MOVSX32rr16, X86::MOVSX32rm16 },
+ { X86::MOVSX32rr8, X86::MOVSX32rm8 },
+ { X86::MOVSX64rr16, X86::MOVSX64rm16 },
+ { X86::MOVSX64rr32, X86::MOVSX64rm32 },
+ { X86::MOVSX64rr8, X86::MOVSX64rm8 },
+ { X86::MOVUPDrr, X86::MOVUPDrm },
+ { X86::MOVUPSrr, X86::MOVUPSrm },
+ { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm },
+ { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm },
+ { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm },
+ { X86::MOVZX16rr8, X86::MOVZX16rm8 },
+ { X86::MOVZX32rr16, X86::MOVZX32rm16 },
+ { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8 },
+ { X86::MOVZX32rr8, X86::MOVZX32rm8 },
+ { X86::MOVZX64rr16, X86::MOVZX64rm16 },
+ { X86::MOVZX64rr32, X86::MOVZX64rm32 },
+ { X86::MOVZX64rr8, X86::MOVZX64rm8 },
+ { X86::PSHUFDri, X86::PSHUFDmi },
+ { X86::PSHUFHWri, X86::PSHUFHWmi },
+ { X86::PSHUFLWri, X86::PSHUFLWmi },
+ { X86::RCPPSr, X86::RCPPSm },
+ { X86::RCPPSr_Int, X86::RCPPSm_Int },
+ { X86::RSQRTPSr, X86::RSQRTPSm },
+ { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int },
+ { X86::RSQRTSSr, X86::RSQRTSSm },
+ { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int },
+ { X86::SQRTPDr, X86::SQRTPDm },
+ { X86::SQRTPDr_Int, X86::SQRTPDm_Int },
+ { X86::SQRTPSr, X86::SQRTPSm },
+ { X86::SQRTPSr_Int, X86::SQRTPSm_Int },
+ { X86::SQRTSDr, X86::SQRTSDm },
+ { X86::SQRTSDr_Int, X86::SQRTSDm_Int },
+ { X86::SQRTSSr, X86::SQRTSSm },
+ { X86::SQRTSSr_Int, X86::SQRTSSm_Int },
+ { X86::TEST16rr, X86::TEST16rm },
+ { X86::TEST32rr, X86::TEST32rm },
+ { X86::TEST64rr, X86::TEST64rm },
+ { X86::TEST8rr, X86::TEST8rm },
+ // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
+ { X86::UCOMISDrr, X86::UCOMISDrm },
+ { X86::UCOMISSrr, X86::UCOMISSrm }
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
+ unsigned RegOp = OpTbl1[i][0];
+ unsigned MemOp = OpTbl1[i][1];
+ if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp,
+ MemOp)).second)
+ assert(false && "Duplicated entries?");
+ unsigned AuxInfo = 1 | (1 << 4); // Index 1, folded load
+ if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr)
+ if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
+ std::make_pair(RegOp, AuxInfo))).second)
+ AmbEntries.push_back(MemOp);
+ }
+
+ static const unsigned OpTbl2[][2] = {
+ { X86::ADC32rr, X86::ADC32rm },
+ { X86::ADC64rr, X86::ADC64rm },
+ { X86::ADD16rr, X86::ADD16rm },
+ { X86::ADD32rr, X86::ADD32rm },
+ { X86::ADD64rr, X86::ADD64rm },
+ { X86::ADD8rr, X86::ADD8rm },
+ { X86::ADDPDrr, X86::ADDPDrm },
+ { X86::ADDPSrr, X86::ADDPSrm },
+ { X86::ADDSDrr, X86::ADDSDrm },
+ { X86::ADDSSrr, X86::ADDSSrm },
+ { X86::ADDSUBPDrr, X86::ADDSUBPDrm },
+ { X86::ADDSUBPSrr, X86::ADDSUBPSrm },
+ { X86::AND16rr, X86::AND16rm },
+ { X86::AND32rr, X86::AND32rm },
+ { X86::AND64rr, X86::AND64rm },
+ { X86::AND8rr, X86::AND8rm },
+ { X86::ANDNPDrr, X86::ANDNPDrm },
+ { X86::ANDNPSrr, X86::ANDNPSrm },
+ { X86::ANDPDrr, X86::ANDPDrm },
+ { X86::ANDPSrr, X86::ANDPSrm },
+ { X86::CMOVA16rr, X86::CMOVA16rm },
+ { X86::CMOVA32rr, X86::CMOVA32rm },
+ { X86::CMOVA64rr, X86::CMOVA64rm },
+ { X86::CMOVAE16rr, X86::CMOVAE16rm },
+ { X86::CMOVAE32rr, X86::CMOVAE32rm },
+ { X86::CMOVAE64rr, X86::CMOVAE64rm },
+ { X86::CMOVB16rr, X86::CMOVB16rm },
+ { X86::CMOVB32rr, X86::CMOVB32rm },
+ { X86::CMOVB64rr, X86::CMOVB64rm },
+ { X86::CMOVBE16rr, X86::CMOVBE16rm },
+ { X86::CMOVBE32rr, X86::CMOVBE32rm },
+ { X86::CMOVBE64rr, X86::CMOVBE64rm },
+ { X86::CMOVE16rr, X86::CMOVE16rm },
+ { X86::CMOVE32rr, X86::CMOVE32rm },
+ { X86::CMOVE64rr, X86::CMOVE64rm },
+ { X86::CMOVG16rr, X86::CMOVG16rm },
+ { X86::CMOVG32rr, X86::CMOVG32rm },
+ { X86::CMOVG64rr, X86::CMOVG64rm },
+ { X86::CMOVGE16rr, X86::CMOVGE16rm },
+ { X86::CMOVGE32rr, X86::CMOVGE32rm },
+ { X86::CMOVGE64rr, X86::CMOVGE64rm },
+ { X86::CMOVL16rr, X86::CMOVL16rm },
+ { X86::CMOVL32rr, X86::CMOVL32rm },
+ { X86::CMOVL64rr, X86::CMOVL64rm },
+ { X86::CMOVLE16rr, X86::CMOVLE16rm },
+ { X86::CMOVLE32rr, X86::CMOVLE32rm },
+ { X86::CMOVLE64rr, X86::CMOVLE64rm },
+ { X86::CMOVNE16rr, X86::CMOVNE16rm },
+ { X86::CMOVNE32rr, X86::CMOVNE32rm },
+ { X86::CMOVNE64rr, X86::CMOVNE64rm },
+ { X86::CMOVNO16rr, X86::CMOVNO16rm },
+ { X86::CMOVNO32rr, X86::CMOVNO32rm },
+ { X86::CMOVNO64rr, X86::CMOVNO64rm },
+ { X86::CMOVNP16rr, X86::CMOVNP16rm },
+ { X86::CMOVNP32rr, X86::CMOVNP32rm },
+ { X86::CMOVNP64rr, X86::CMOVNP64rm },
+ { X86::CMOVNS16rr, X86::CMOVNS16rm },
+ { X86::CMOVNS32rr, X86::CMOVNS32rm },
+ { X86::CMOVNS64rr, X86::CMOVNS64rm },
+ { X86::CMOVO16rr, X86::CMOVO16rm },
+ { X86::CMOVO32rr, X86::CMOVO32rm },
+ { X86::CMOVO64rr, X86::CMOVO64rm },
+ { X86::CMOVP16rr, X86::CMOVP16rm },
+ { X86::CMOVP32rr, X86::CMOVP32rm },
+ { X86::CMOVP64rr, X86::CMOVP64rm },
+ { X86::CMOVS16rr, X86::CMOVS16rm },
+ { X86::CMOVS32rr, X86::CMOVS32rm },
+ { X86::CMOVS64rr, X86::CMOVS64rm },
+ { X86::CMPPDrri, X86::CMPPDrmi },
+ { X86::CMPPSrri, X86::CMPPSrmi },
+ { X86::CMPSDrr, X86::CMPSDrm },
+ { X86::CMPSSrr, X86::CMPSSrm },
+ { X86::DIVPDrr, X86::DIVPDrm },
+ { X86::DIVPSrr, X86::DIVPSrm },
+ { X86::DIVSDrr, X86::DIVSDrm },
+ { X86::DIVSSrr, X86::DIVSSrm },
+ { X86::FsANDNPDrr, X86::FsANDNPDrm },
+ { X86::FsANDNPSrr, X86::FsANDNPSrm },
+ { X86::FsANDPDrr, X86::FsANDPDrm },
+ { X86::FsANDPSrr, X86::FsANDPSrm },
+ { X86::FsORPDrr, X86::FsORPDrm },
+ { X86::FsORPSrr, X86::FsORPSrm },
+ { X86::FsXORPDrr, X86::FsXORPDrm },
+ { X86::FsXORPSrr, X86::FsXORPSrm },
+ { X86::HADDPDrr, X86::HADDPDrm },
+ { X86::HADDPSrr, X86::HADDPSrm },
+ { X86::HSUBPDrr, X86::HSUBPDrm },
+ { X86::HSUBPSrr, X86::HSUBPSrm },
+ { X86::IMUL16rr, X86::IMUL16rm },
+ { X86::IMUL32rr, X86::IMUL32rm },
+ { X86::IMUL64rr, X86::IMUL64rm },
+ { X86::MAXPDrr, X86::MAXPDrm },
+ { X86::MAXPDrr_Int, X86::MAXPDrm_Int },
+ { X86::MAXPSrr, X86::MAXPSrm },
+ { X86::MAXPSrr_Int, X86::MAXPSrm_Int },
+ { X86::MAXSDrr, X86::MAXSDrm },
+ { X86::MAXSDrr_Int, X86::MAXSDrm_Int },
+ { X86::MAXSSrr, X86::MAXSSrm },
+ { X86::MAXSSrr_Int, X86::MAXSSrm_Int },
+ { X86::MINPDrr, X86::MINPDrm },
+ { X86::MINPDrr_Int, X86::MINPDrm_Int },
+ { X86::MINPSrr, X86::MINPSrm },
+ { X86::MINPSrr_Int, X86::MINPSrm_Int },
+ { X86::MINSDrr, X86::MINSDrm },
+ { X86::MINSDrr_Int, X86::MINSDrm_Int },
+ { X86::MINSSrr, X86::MINSSrm },
+ { X86::MINSSrr_Int, X86::MINSSrm_Int },
+ { X86::MULPDrr, X86::MULPDrm },
+ { X86::MULPSrr, X86::MULPSrm },
+ { X86::MULSDrr, X86::MULSDrm },
+ { X86::MULSSrr, X86::MULSSrm },
+ { X86::OR16rr, X86::OR16rm },
+ { X86::OR32rr, X86::OR32rm },
+ { X86::OR64rr, X86::OR64rm },
+ { X86::OR8rr, X86::OR8rm },
+ { X86::ORPDrr, X86::ORPDrm },
+ { X86::ORPSrr, X86::ORPSrm },
+ { X86::PACKSSDWrr, X86::PACKSSDWrm },
+ { X86::PACKSSWBrr, X86::PACKSSWBrm },
+ { X86::PACKUSWBrr, X86::PACKUSWBrm },
+ { X86::PADDBrr, X86::PADDBrm },
+ { X86::PADDDrr, X86::PADDDrm },
+ { X86::PADDQrr, X86::PADDQrm },
+ { X86::PADDSBrr, X86::PADDSBrm },
+ { X86::PADDSWrr, X86::PADDSWrm },
+ { X86::PADDWrr, X86::PADDWrm },
+ { X86::PANDNrr, X86::PANDNrm },
+ { X86::PANDrr, X86::PANDrm },
+ { X86::PAVGBrr, X86::PAVGBrm },
+ { X86::PAVGWrr, X86::PAVGWrm },
+ { X86::PCMPEQBrr, X86::PCMPEQBrm },
+ { X86::PCMPEQDrr, X86::PCMPEQDrm },
+ { X86::PCMPEQWrr, X86::PCMPEQWrm },
+ { X86::PCMPGTBrr, X86::PCMPGTBrm },
+ { X86::PCMPGTDrr, X86::PCMPGTDrm },
+ { X86::PCMPGTWrr, X86::PCMPGTWrm },
+ { X86::PINSRWrri, X86::PINSRWrmi },
+ { X86::PMADDWDrr, X86::PMADDWDrm },
+ { X86::PMAXSWrr, X86::PMAXSWrm },
+ { X86::PMAXUBrr, X86::PMAXUBrm },
+ { X86::PMINSWrr, X86::PMINSWrm },
+ { X86::PMINUBrr, X86::PMINUBrm },
+ { X86::PMULDQrr, X86::PMULDQrm },
+ { X86::PMULHUWrr, X86::PMULHUWrm },
+ { X86::PMULHWrr, X86::PMULHWrm },
+ { X86::PMULLDrr, X86::PMULLDrm },
+ { X86::PMULLDrr_int, X86::PMULLDrm_int },
+ { X86::PMULLWrr, X86::PMULLWrm },
+ { X86::PMULUDQrr, X86::PMULUDQrm },
+ { X86::PORrr, X86::PORrm },
+ { X86::PSADBWrr, X86::PSADBWrm },
+ { X86::PSLLDrr, X86::PSLLDrm },
+ { X86::PSLLQrr, X86::PSLLQrm },
+ { X86::PSLLWrr, X86::PSLLWrm },
+ { X86::PSRADrr, X86::PSRADrm },
+ { X86::PSRAWrr, X86::PSRAWrm },
+ { X86::PSRLDrr, X86::PSRLDrm },
+ { X86::PSRLQrr, X86::PSRLQrm },
+ { X86::PSRLWrr, X86::PSRLWrm },
+ { X86::PSUBBrr, X86::PSUBBrm },
+ { X86::PSUBDrr, X86::PSUBDrm },
+ { X86::PSUBSBrr, X86::PSUBSBrm },
+ { X86::PSUBSWrr, X86::PSUBSWrm },
+ { X86::PSUBWrr, X86::PSUBWrm },
+ { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm },
+ { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm },
+ { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm },
+ { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm },
+ { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm },
+ { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm },
+ { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm },
+ { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm },
+ { X86::PXORrr, X86::PXORrm },
+ { X86::SBB32rr, X86::SBB32rm },
+ { X86::SBB64rr, X86::SBB64rm },
+ { X86::SHUFPDrri, X86::SHUFPDrmi },
+ { X86::SHUFPSrri, X86::SHUFPSrmi },
+ { X86::SUB16rr, X86::SUB16rm },
+ { X86::SUB32rr, X86::SUB32rm },
+ { X86::SUB64rr, X86::SUB64rm },
+ { X86::SUB8rr, X86::SUB8rm },
+ { X86::SUBPDrr, X86::SUBPDrm },
+ { X86::SUBPSrr, X86::SUBPSrm },
+ { X86::SUBSDrr, X86::SUBSDrm },
+ { X86::SUBSSrr, X86::SUBSSrm },
+ // FIXME: TEST*rr -> swapped operand of TEST*mr.
+ { X86::UNPCKHPDrr, X86::UNPCKHPDrm },
+ { X86::UNPCKHPSrr, X86::UNPCKHPSrm },
+ { X86::UNPCKLPDrr, X86::UNPCKLPDrm },
+ { X86::UNPCKLPSrr, X86::UNPCKLPSrm },
+ { X86::XOR16rr, X86::XOR16rm },
+ { X86::XOR32rr, X86::XOR32rm },
+ { X86::XOR64rr, X86::XOR64rm },
+ { X86::XOR8rr, X86::XOR8rm },
+ { X86::XORPDrr, X86::XORPDrm },
+ { X86::XORPSrr, X86::XORPSrm }
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
+ unsigned RegOp = OpTbl2[i][0];
+ unsigned MemOp = OpTbl2[i][1];
+ if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp,
+ MemOp)).second)
+ assert(false && "Duplicated entries?");
+ unsigned AuxInfo = 2 | (1 << 4); // Index 2, folded load
+ if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
+ std::make_pair(RegOp, AuxInfo))).second)
+ AmbEntries.push_back(MemOp);
+ }
+
+ // Remove ambiguous entries.
+ assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?");
+}
+
+bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case X86::MOV8rr:
+ case X86::MOV8rr_NOREX:
+ case X86::MOV16rr:
+ case X86::MOV32rr:
+ case X86::MOV64rr:
+ case X86::MOVSSrr:
+ case X86::MOVSDrr:
+
+ // FP Stack register class copies
+ case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080:
+ case X86::MOV_Fp3264: case X86::MOV_Fp3280:
+ case X86::MOV_Fp6432: case X86::MOV_Fp8032:
+
+ case X86::FsMOVAPSrr:
+ case X86::FsMOVAPDrr:
+ case X86::MOVAPSrr:
+ case X86::MOVAPDrr:
+ case X86::MOVDQArr:
+ case X86::MOVSS2PSrr:
+ case X86::MOVSD2PDrr:
+ case X86::MOVPS2SSrr:
+ case X86::MOVPD2SDrr:
+ case X86::MMX_MOVQ64rr:
+ assert(MI.getNumOperands() >= 2 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ "invalid register-register move instruction");
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ SrcSubIdx = MI.getOperand(1).getSubReg();
+ DstSubIdx = MI.getOperand(0).getSubReg();
+ return true;
+ }
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::MOV8rm:
+ case X86::MOV16rm:
+ case X86::MOV32rm:
+ case X86::MOV64rm:
+ case X86::LD_Fp64m:
+ case X86::MOVSSrm:
+ case X86::MOVSDrm:
+ case X86::MOVAPSrm:
+ case X86::MOVAPDrm:
+ case X86::MOVDQArm:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() &&
+ MI->getOperand(3).isReg() && MI->getOperand(4).isImm() &&
+ MI->getOperand(2).getImm() == 1 &&
+ MI->getOperand(3).getReg() == 0 &&
+ MI->getOperand(4).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::MOV8mr:
+ case X86::MOV16mr:
+ case X86::MOV32mr:
+ case X86::MOV64mr:
+ case X86::ST_FpP64m:
+ case X86::MOVSSmr:
+ case X86::MOVSDmr:
+ case X86::MOVAPSmr:
+ case X86::MOVAPDmr:
+ case X86::MOVDQAmr:
+ case X86::MMX_MOVD64mr:
+ case X86::MMX_MOVQ64mr:
+ case X86::MMX_MOVNTQmr:
+ if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() &&
+ MI->getOperand(2).isReg() && MI->getOperand(3).isImm() &&
+ MI->getOperand(1).getImm() == 1 &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImm() == 0) {
+ FrameIndex = MI->getOperand(0).getIndex();
+ return MI->getOperand(X86AddrNumOperands).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+
+/// regIsPICBase - Return true if register is PIC base (i.e.g defined by
+/// X86::MOVPC32r.
+static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
+ bool isPICBase = false;
+ for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
+ E = MRI.def_end(); I != E; ++I) {
+ MachineInstr *DefMI = I.getOperand().getParent();
+ if (DefMI->getOpcode() != X86::MOVPC32r)
+ return false;
+ assert(!isPICBase && "More than one PIC base?");
+ isPICBase = true;
+ }
+ return isPICBase;
+}
+
+/// isGVStub - Return true if the GV requires an extra load to get the
+/// real address.
+static inline bool isGVStub(GlobalValue *GV, X86TargetMachine &TM) {
+ return TM.getSubtarget<X86Subtarget>().GVRequiresExtraLoad(GV, TM, false);
+}
+
+bool
+X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::MOV8rm:
+ case X86::MOV16rm:
+ case X86::MOV32rm:
+ case X86::MOV64rm:
+ case X86::LD_Fp64m:
+ case X86::MOVSSrm:
+ case X86::MOVSDrm:
+ case X86::MOVAPSrm:
+ case X86::MOVAPDrm:
+ case X86::MOVDQArm:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm: {
+ // Loads from constant pools are trivially rematerializable.
+ if (MI->getOperand(1).isReg() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+ (MI->getOperand(4).isCPI() ||
+ (MI->getOperand(4).isGlobal() &&
+ isGVStub(MI->getOperand(4).getGlobal(), TM)))) {
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg == 0)
+ return true;
+ // Allow re-materialization of PIC load.
+ if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal())
+ return false;
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ bool isPICBase = false;
+ for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
+ E = MRI.def_end(); I != E; ++I) {
+ MachineInstr *DefMI = I.getOperand().getParent();
+ if (DefMI->getOpcode() != X86::MOVPC32r)
+ return false;
+ assert(!isPICBase && "More than one PIC base?");
+ isPICBase = true;
+ }
+ return isPICBase;
+ }
+ return false;
+ }
+
+ case X86::LEA32r:
+ case X86::LEA64r: {
+ if (MI->getOperand(2).isImm() &&
+ MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+ !MI->getOperand(4).isReg()) {
+ // lea fi#, lea GV, etc. are all rematerializable.
+ if (!MI->getOperand(1).isReg())
+ return true;
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg == 0)
+ return true;
+ // Allow re-materialization of lea PICBase + x.
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ return regIsPICBase(BaseReg, MRI);
+ }
+ return false;
+ }
+ }
+
+ // All other instructions marked M_REMATERIALIZABLE are always trivially
+ // rematerializable.
+ return true;
+}
+
+/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that
+/// would clobber the EFLAGS condition register. Note the result may be
+/// conservative. If it cannot definitely determine the safety after visiting
+/// two instructions it assumes it's not safe.
+static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ // It's always safe to clobber EFLAGS at the end of a block.
+ if (I == MBB.end())
+ return true;
+
+ // For compile time consideration, if we are not able to determine the
+ // safety after visiting 2 instructions, we will assume it's not safe.
+ for (unsigned i = 0; i < 2; ++i) {
+ bool SeenDef = false;
+ for (unsigned j = 0, e = I->getNumOperands(); j != e; ++j) {
+ MachineOperand &MO = I->getOperand(j);
+ if (!MO.isReg())
+ continue;
+ if (MO.getReg() == X86::EFLAGS) {
+ if (MO.isUse())
+ return false;
+ SeenDef = true;
+ }
+ }
+
+ if (SeenDef)
+ // This instruction defines EFLAGS, no need to look any further.
+ return true;
+ ++I;
+
+ // If we make it to the end of the block, it's safe to clobber EFLAGS.
+ if (I == MBB.end())
+ return true;
+ }
+
+ // Conservative answer.
+ return false;
+}
+
+void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg,
+ const MachineInstr *Orig) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ unsigned SubIdx = Orig->getOperand(0).isReg()
+ ? Orig->getOperand(0).getSubReg() : 0;
+ bool ChangeSubIdx = SubIdx != 0;
+ if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
+ DestReg = RI.getSubReg(DestReg, SubIdx);
+ SubIdx = 0;
+ }
+
+ // MOV32r0 etc. are implemented with xor which clobbers condition code.
+ // Re-materialize them as movri instructions to avoid side effects.
+ bool Emitted = false;
+ switch (Orig->getOpcode()) {
+ default: break;
+ case X86::MOV8r0:
+ case X86::MOV16r0:
+ case X86::MOV32r0:
+ case X86::MOV64r0: {
+ if (!isSafeToClobberEFLAGS(MBB, I)) {
+ unsigned Opc = 0;
+ switch (Orig->getOpcode()) {
+ default: break;
+ case X86::MOV8r0: Opc = X86::MOV8ri; break;
+ case X86::MOV16r0: Opc = X86::MOV16ri; break;
+ case X86::MOV32r0: Opc = X86::MOV32ri; break;
+ case X86::MOV64r0: Opc = X86::MOV64ri32; break;
+ }
+ BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0);
+ Emitted = true;
+ }
+ break;
+ }
+ }
+
+ if (!Emitted) {
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+ MI->getOperand(0).setReg(DestReg);
+ MBB.insert(I, MI);
+ }
+
+ if (ChangeSubIdx) {
+ MachineInstr *NewMI = prior(I);
+ NewMI->getOperand(0).setSubReg(SubIdx);
+ }
+}
+
+/// isInvariantLoad - Return true if the specified instruction (which is marked
+/// mayLoad) is loading from a location whose value is invariant across the
+/// function. For example, loading a value from the constant pool or from
+/// from the argument area of a function if it does not change. This should
+/// only return true of *all* loads the instruction does are invariant (if it
+/// does multiple loads).
+bool X86InstrInfo::isInvariantLoad(const MachineInstr *MI) const {
+ // This code cares about loads from three cases: constant pool entries,
+ // invariant argument slots, and global stubs. In order to handle these cases
+ // for all of the myriad of X86 instructions, we just scan for a CP/FI/GV
+ // operand and base our analysis on it. This is safe because the address of
+ // none of these three cases is ever used as anything other than a load base
+ // and X86 doesn't have any instructions that load from multiple places.
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ // Loads from constant pools are trivially invariant.
+ if (MO.isCPI())
+ return true;
+
+ if (MO.isGlobal())
+ return isGVStub(MO.getGlobal(), TM);
+
+ // If this is a load from an invariant stack slot, the load is a constant.
+ if (MO.isFI()) {
+ const MachineFrameInfo &MFI =
+ *MI->getParent()->getParent()->getFrameInfo();
+ int Idx = MO.getIndex();
+ return MFI.isFixedObjectIndex(Idx) && MFI.isImmutableObjectIndex(Idx);
+ }
+ }
+
+ // All other instances of these instructions are presumed to have other
+ // issues.
+ return false;
+}
+
+/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that
+/// is not marked dead.
+static bool hasLiveCondCodeDef(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() &&
+ MO.getReg() == X86::EFLAGS && !MO.isDead()) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/// convertToThreeAddress - This method must be implemented by targets that
+/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
+/// may be able to convert a two-address instruction into a true
+/// three-address instruction on demand. This allows the X86 target (for
+/// example) to convert ADD and SHL instructions into LEA instructions if they
+/// would require register copies due to two-addressness.
+///
+/// This method returns a null pointer if the transformation cannot be
+/// performed, otherwise it returns the new instruction.
+///
+MachineInstr *
+X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+ MachineInstr *MI = MBBI;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ // All instructions input are two-addr instructions. Get the known operands.
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ bool isKill = MI->getOperand(1).isKill();
+
+ MachineInstr *NewMI = NULL;
+ // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When
+ // we have better subtarget support, enable the 16-bit LEA generation here.
+ bool DisableLEA16 = true;
+
+ unsigned MIOpc = MI->getOpcode();
+ switch (MIOpc) {
+ case X86::SHUFPSrri: {
+ assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
+ if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
+
+ unsigned B = MI->getOperand(1).getReg();
+ unsigned C = MI->getOperand(2).getReg();
+ if (B != C) return 0;
+ unsigned A = MI->getOperand(0).getReg();
+ unsigned M = MI->getOperand(3).getImm();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
+ .addReg(A, RegState::Define | getDeadRegState(isDead))
+ .addReg(B, getKillRegState(isKill)).addImm(M);
+ break;
+ }
+ case X86::SHL64ri: {
+ assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
+ // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+ // the flags produced by a shift yet, so this is safe.
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(0).addImm(1 << ShAmt)
+ .addReg(Src, getKillRegState(isKill))
+ .addImm(0);
+ break;
+ }
+ case X86::SHL32ri: {
+ assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
+ // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+ // the flags produced by a shift yet, so this is safe.
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+ unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() ?
+ X86::LEA64_32r : X86::LEA32r;
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(0).addImm(1 << ShAmt)
+ .addReg(Src, getKillRegState(isKill)).addImm(0);
+ break;
+ }
+ case X86::SHL16ri: {
+ assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
+ // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+ // the flags produced by a shift yet, so this is safe.
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+ if (DisableLEA16) {
+ // If 16-bit LEA is disabled, use 32-bit LEA via subregisters.
+ MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
+ unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::LEA64_32r : X86::LEA32r;
+ unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+
+ // Build and insert into an implicit UNDEF value. This is OK because
+ // well be shifting and then extracting the lower 16-bits.
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
+ MachineInstr *InsMI =
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg)
+ .addReg(leaInReg)
+ .addReg(Src, getKillRegState(isKill))
+ .addImm(X86::SUBREG_16BIT);
+
+ NewMI = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg)
+ .addReg(0).addImm(1 << ShAmt)
+ .addReg(leaInReg, RegState::Kill)
+ .addImm(0);
+
+ MachineInstr *ExtMI =
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(leaOutReg, RegState::Kill)
+ .addImm(X86::SUBREG_16BIT);
+
+ if (LV) {
+ // Update live variables
+ LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
+ LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
+ if (isKill)
+ LV->replaceKillInstruction(Src, MI, InsMI);
+ if (isDead)
+ LV->replaceKillInstruction(Dest, MI, ExtMI);
+ }
+ return ExtMI;
+ } else {
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(0).addImm(1 << ShAmt)
+ .addReg(Src, getKillRegState(isKill))
+ .addImm(0);
+ }
+ break;
+ }
+ default: {
+ // The following opcodes also sets the condition code register(s). Only
+ // convert them to equivalent lea if the condition code register def's
+ // are dead!
+ if (hasLiveCondCodeDef(MI))
+ return 0;
+
+ bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
+ switch (MIOpc) {
+ default: return 0;
+ case X86::INC64r:
+ case X86::INC32r:
+ case X86::INC64_32r: {
+ assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
+ unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
+ : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, 1);
+ break;
+ }
+ case X86::INC16r:
+ case X86::INC64_16r:
+ if (DisableLEA16) return 0;
+ assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, 1);
+ break;
+ case X86::DEC64r:
+ case X86::DEC32r:
+ case X86::DEC64_32r: {
+ assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
+ unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
+ : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, -1);
+ break;
+ }
+ case X86::DEC16r:
+ case X86::DEC64_16r:
+ if (DisableLEA16) return 0;
+ assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, -1);
+ break;
+ case X86::ADD64rr:
+ case X86::ADD32rr: {
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r
+ : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ unsigned Src2 = MI->getOperand(2).getReg();
+ bool isKill2 = MI->getOperand(2).isKill();
+ NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, Src2, isKill2);
+ if (LV && isKill2)
+ LV->replaceKillInstruction(Src2, MI, NewMI);
+ break;
+ }
+ case X86::ADD16rr: {
+ if (DisableLEA16) return 0;
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ unsigned Src2 = MI->getOperand(2).getReg();
+ bool isKill2 = MI->getOperand(2).isKill();
+ NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, Src2, isKill2);
+ if (LV && isKill2)
+ LV->replaceKillInstruction(Src2, MI, NewMI);
+ break;
+ }
+ case X86::ADD64ri32:
+ case X86::ADD64ri8:
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ if (MI->getOperand(2).isImm())
+ NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, MI->getOperand(2).getImm());
+ break;
+ case X86::ADD32ri:
+ case X86::ADD32ri8:
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ if (MI->getOperand(2).isImm()) {
+ unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, MI->getOperand(2).getImm());
+ }
+ break;
+ case X86::ADD16ri:
+ case X86::ADD16ri8:
+ if (DisableLEA16) return 0;
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ if (MI->getOperand(2).isImm())
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, MI->getOperand(2).getImm());
+ break;
+ case X86::SHL16ri:
+ if (DisableLEA16) return 0;
+ case X86::SHL32ri:
+ case X86::SHL64ri: {
+ assert(MI->getNumOperands() >= 3 && MI->getOperand(2).isImm() &&
+ "Unknown shl instruction!");
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ if (ShAmt == 1 || ShAmt == 2 || ShAmt == 3) {
+ X86AddressMode AM;
+ AM.Scale = 1 << ShAmt;
+ AM.IndexReg = Src;
+ unsigned Opc = MIOpc == X86::SHL64ri ? X86::LEA64r
+ : (MIOpc == X86::SHL32ri
+ ? (is64Bit ? X86::LEA64_32r : X86::LEA32r) : X86::LEA16r);
+ NewMI = addFullAddress(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)), AM);
+ if (isKill)
+ NewMI->getOperand(3).setIsKill(true);
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ if (!NewMI) return 0;
+
+ if (LV) { // Update live variables
+ if (isKill)
+ LV->replaceKillInstruction(Src, MI, NewMI);
+ if (isDead)
+ LV->replaceKillInstruction(Dest, MI, NewMI);
+ }
+
+ MFI->insert(MBBI, NewMI); // Insert the new inst
+ return NewMI;
+}
+
+/// commuteInstruction - We have a few instructions that must be hacked on to
+/// commute them.
+///
+MachineInstr *
+X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+ switch (MI->getOpcode()) {
+ case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I)
+ case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I)
+ case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I)
+ case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I)
+ case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I)
+ case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I)
+ unsigned Opc;
+ unsigned Size;
+ switch (MI->getOpcode()) {
+ default: assert(0 && "Unreachable!");
+ case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break;
+ case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break;
+ case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break;
+ case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break;
+ case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break;
+ case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break;
+ }
+ unsigned Amt = MI->getOperand(3).getImm();
+ if (NewMI) {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MI = MF.CloneMachineInstr(MI);
+ NewMI = false;
+ }
+ MI->setDesc(get(Opc));
+ MI->getOperand(3).setImm(Size-Amt);
+ return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+ }
+ case X86::CMOVB16rr:
+ case X86::CMOVB32rr:
+ case X86::CMOVB64rr:
+ case X86::CMOVAE16rr:
+ case X86::CMOVAE32rr:
+ case X86::CMOVAE64rr:
+ case X86::CMOVE16rr:
+ case X86::CMOVE32rr:
+ case X86::CMOVE64rr:
+ case X86::CMOVNE16rr:
+ case X86::CMOVNE32rr:
+ case X86::CMOVNE64rr:
+ case X86::CMOVBE16rr:
+ case X86::CMOVBE32rr:
+ case X86::CMOVBE64rr:
+ case X86::CMOVA16rr:
+ case X86::CMOVA32rr:
+ case X86::CMOVA64rr:
+ case X86::CMOVL16rr:
+ case X86::CMOVL32rr:
+ case X86::CMOVL64rr:
+ case X86::CMOVGE16rr:
+ case X86::CMOVGE32rr:
+ case X86::CMOVGE64rr:
+ case X86::CMOVLE16rr:
+ case X86::CMOVLE32rr:
+ case X86::CMOVLE64rr:
+ case X86::CMOVG16rr:
+ case X86::CMOVG32rr:
+ case X86::CMOVG64rr:
+ case X86::CMOVS16rr:
+ case X86::CMOVS32rr:
+ case X86::CMOVS64rr:
+ case X86::CMOVNS16rr:
+ case X86::CMOVNS32rr:
+ case X86::CMOVNS64rr:
+ case X86::CMOVP16rr:
+ case X86::CMOVP32rr:
+ case X86::CMOVP64rr:
+ case X86::CMOVNP16rr:
+ case X86::CMOVNP32rr:
+ case X86::CMOVNP64rr:
+ case X86::CMOVO16rr:
+ case X86::CMOVO32rr:
+ case X86::CMOVO64rr:
+ case X86::CMOVNO16rr:
+ case X86::CMOVNO32rr:
+ case X86::CMOVNO64rr: {
+ unsigned Opc = 0;
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break;
+ case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break;
+ case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break;
+ case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break;
+ case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break;
+ case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break;
+ case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break;
+ case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break;
+ case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break;
+ case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break;
+ case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break;
+ case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break;
+ case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break;
+ case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break;
+ case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break;
+ case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break;
+ case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break;
+ case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break;
+ case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break;
+ case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break;
+ case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break;
+ case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break;
+ case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break;
+ case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break;
+ case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break;
+ case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break;
+ case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break;
+ case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break;
+ case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break;
+ case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break;
+ case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break;
+ case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break;
+ case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break;
+ case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break;
+ case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break;
+ case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break;
+ case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break;
+ case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break;
+ case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break;
+ case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break;
+ case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break;
+ case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break;
+ case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break;
+ case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break;
+ case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break;
+ case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break;
+ case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break;
+ case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break;
+ }
+ if (NewMI) {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MI = MF.CloneMachineInstr(MI);
+ NewMI = false;
+ }
+ MI->setDesc(get(Opc));
+ // Fallthrough intended.
+ }
+ default:
+ return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+ }
+}
+
+static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) {
+ switch (BrOpc) {
+ default: return X86::COND_INVALID;
+ case X86::JE: return X86::COND_E;
+ case X86::JNE: return X86::COND_NE;
+ case X86::JL: return X86::COND_L;
+ case X86::JLE: return X86::COND_LE;
+ case X86::JG: return X86::COND_G;
+ case X86::JGE: return X86::COND_GE;
+ case X86::JB: return X86::COND_B;
+ case X86::JBE: return X86::COND_BE;
+ case X86::JA: return X86::COND_A;
+ case X86::JAE: return X86::COND_AE;
+ case X86::JS: return X86::COND_S;
+ case X86::JNS: return X86::COND_NS;
+ case X86::JP: return X86::COND_P;
+ case X86::JNP: return X86::COND_NP;
+ case X86::JO: return X86::COND_O;
+ case X86::JNO: return X86::COND_NO;
+ }
+}
+
+unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
+ switch (CC) {
+ default: assert(0 && "Illegal condition code!");
+ case X86::COND_E: return X86::JE;
+ case X86::COND_NE: return X86::JNE;
+ case X86::COND_L: return X86::JL;
+ case X86::COND_LE: return X86::JLE;
+ case X86::COND_G: return X86::JG;
+ case X86::COND_GE: return X86::JGE;
+ case X86::COND_B: return X86::JB;
+ case X86::COND_BE: return X86::JBE;
+ case X86::COND_A: return X86::JA;
+ case X86::COND_AE: return X86::JAE;
+ case X86::COND_S: return X86::JS;
+ case X86::COND_NS: return X86::JNS;
+ case X86::COND_P: return X86::JP;
+ case X86::COND_NP: return X86::JNP;
+ case X86::COND_O: return X86::JO;
+ case X86::COND_NO: return X86::JNO;
+ }
+}
+
+/// GetOppositeBranchCondition - Return the inverse of the specified condition,
+/// e.g. turning COND_E to COND_NE.
+X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
+ switch (CC) {
+ default: assert(0 && "Illegal condition code!");
+ case X86::COND_E: return X86::COND_NE;
+ case X86::COND_NE: return X86::COND_E;
+ case X86::COND_L: return X86::COND_GE;
+ case X86::COND_LE: return X86::COND_G;
+ case X86::COND_G: return X86::COND_LE;
+ case X86::COND_GE: return X86::COND_L;
+ case X86::COND_B: return X86::COND_AE;
+ case X86::COND_BE: return X86::COND_A;
+ case X86::COND_A: return X86::COND_BE;
+ case X86::COND_AE: return X86::COND_B;
+ case X86::COND_S: return X86::COND_NS;
+ case X86::COND_NS: return X86::COND_S;
+ case X86::COND_P: return X86::COND_NP;
+ case X86::COND_NP: return X86::COND_P;
+ case X86::COND_O: return X86::COND_NO;
+ case X86::COND_NO: return X86::COND_O;
+ }
+}
+
+bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.isTerminator()) return false;
+
+ // Conditional branch is a special case.
+ if (TID.isBranch() && !TID.isBarrier())
+ return true;
+ if (!TID.isPredicable())
+ return true;
+ return !isPredicated(MI);
+}
+
+// For purposes of branch analysis do not count FP_REG_KILL as a terminator.
+static bool isBrAnalysisUnpredicatedTerminator(const MachineInstr *MI,
+ const X86InstrInfo &TII) {
+ if (MI->getOpcode() == X86::FP_REG_KILL)
+ return false;
+ return TII.isUnpredicatedTerminator(MI);
+}
+
+bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // Start from the bottom of the block and work up, examining the
+ // terminator instructions.
+ MachineBasicBlock::iterator I = MBB.end();
+ while (I != MBB.begin()) {
+ --I;
+ // Working from the bottom, when we see a non-terminator
+ // instruction, we're done.
+ if (!isBrAnalysisUnpredicatedTerminator(I, *this))
+ break;
+ // A terminator that isn't a branch can't easily be handled
+ // by this analysis.
+ if (!I->getDesc().isBranch())
+ return true;
+ // Handle unconditional branches.
+ if (I->getOpcode() == X86::JMP) {
+ if (!AllowModify) {
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ // If the block has any instructions after a JMP, delete them.
+ while (next(I) != MBB.end())
+ next(I)->eraseFromParent();
+ Cond.clear();
+ FBB = 0;
+ // Delete the JMP if it's equivalent to a fall-through.
+ if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+ TBB = 0;
+ I->eraseFromParent();
+ I = MBB.end();
+ continue;
+ }
+ // TBB is used to indicate the unconditinal destination.
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+ // Handle conditional branches.
+ X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode());
+ if (BranchCode == X86::COND_INVALID)
+ return true; // Can't handle indirect branch.
+ // Working from the bottom, handle the first conditional branch.
+ if (Cond.empty()) {
+ FBB = TBB;
+ TBB = I->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ continue;
+ }
+ // Handle subsequent conditional branches. Only handle the case
+ // where all conditional branches branch to the same destination
+ // and their condition opcodes fit one of the special
+ // multi-branch idioms.
+ assert(Cond.size() == 1);
+ assert(TBB);
+ // Only handle the case where all conditional branches branch to
+ // the same destination.
+ if (TBB != I->getOperand(0).getMBB())
+ return true;
+ X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
+ // If the conditions are the same, we can leave them alone.
+ if (OldBranchCode == BranchCode)
+ continue;
+ // If they differ, see if they fit one of the known patterns.
+ // Theoretically we could handle more patterns here, but
+ // we shouldn't expect to see them if instruction selection
+ // has done a reasonable job.
+ if ((OldBranchCode == X86::COND_NP &&
+ BranchCode == X86::COND_E) ||
+ (OldBranchCode == X86::COND_E &&
+ BranchCode == X86::COND_NP))
+ BranchCode = X86::COND_NP_OR_E;
+ else if ((OldBranchCode == X86::COND_P &&
+ BranchCode == X86::COND_NE) ||
+ (OldBranchCode == X86::COND_NE &&
+ BranchCode == X86::COND_P))
+ BranchCode = X86::COND_NE_OR_P;
+ else
+ return true;
+ // Update the MachineOperand.
+ Cond[0].setImm(BranchCode);
+ }
+
+ return false;
+}
+
+unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ unsigned Count = 0;
+
+ while (I != MBB.begin()) {
+ --I;
+ if (I->getOpcode() != X86::JMP &&
+ GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
+ break;
+ // Remove the branch.
+ I->eraseFromParent();
+ I = MBB.end();
+ ++Count;
+ }
+
+ return Count;
+}
+
+unsigned
+X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const {
+ // FIXME this should probably have a DebugLoc operand
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "X86 branch conditions have one component!");
+
+ if (Cond.empty()) {
+ // Unconditional branch?
+ assert(!FBB && "Unconditional branch with multiple successors!");
+ BuildMI(&MBB, dl, get(X86::JMP)).addMBB(TBB);
+ return 1;
+ }
+
+ // Conditional branch.
+ unsigned Count = 0;
+ X86::CondCode CC = (X86::CondCode)Cond[0].getImm();
+ switch (CC) {
+ case X86::COND_NP_OR_E:
+ // Synthesize NP_OR_E with two branches.
+ BuildMI(&MBB, dl, get(X86::JNP)).addMBB(TBB);
+ ++Count;
+ BuildMI(&MBB, dl, get(X86::JE)).addMBB(TBB);
+ ++Count;
+ break;
+ case X86::COND_NE_OR_P:
+ // Synthesize NE_OR_P with two branches.
+ BuildMI(&MBB, dl, get(X86::JNE)).addMBB(TBB);
+ ++Count;
+ BuildMI(&MBB, dl, get(X86::JP)).addMBB(TBB);
+ ++Count;
+ break;
+ default: {
+ unsigned Opc = GetCondBranchFromCond(CC);
+ BuildMI(&MBB, dl, get(Opc)).addMBB(TBB);
+ ++Count;
+ }
+ }
+ if (FBB) {
+ // Two-way Conditional branch. Insert the second branch.
+ BuildMI(&MBB, dl, get(X86::JMP)).addMBB(FBB);
+ ++Count;
+ }
+ return Count;
+}
+
+/// isHReg - Test if the given register is a physical h register.
+static bool isHReg(unsigned Reg) {
+ return X86::GR8_ABCD_HRegClass.contains(Reg);
+}
+
+bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ // Determine if DstRC and SrcRC have a common superclass in common.
+ const TargetRegisterClass *CommonRC = DestRC;
+ if (DestRC == SrcRC)
+ /* Source and destination have the same register class. */;
+ else if (CommonRC->hasSuperClass(SrcRC))
+ CommonRC = SrcRC;
+ else if (!DestRC->hasSubClass(SrcRC))
+ CommonRC = 0;
+
+ if (CommonRC) {
+ unsigned Opc;
+ if (CommonRC == &X86::GR64RegClass) {
+ Opc = X86::MOV64rr;
+ } else if (CommonRC == &X86::GR32RegClass) {
+ Opc = X86::MOV32rr;
+ } else if (CommonRC == &X86::GR16RegClass) {
+ Opc = X86::MOV16rr;
+ } else if (CommonRC == &X86::GR8RegClass) {
+ // Copying to or from a physical H register on x86-64 requires a NOREX
+ // move. Otherwise use a normal move.
+ if ((isHReg(DestReg) || isHReg(SrcReg)) &&
+ TM.getSubtarget<X86Subtarget>().is64Bit())
+ Opc = X86::MOV8rr_NOREX;
+ else
+ Opc = X86::MOV8rr;
+ } else if (CommonRC == &X86::GR64_ABCDRegClass) {
+ Opc = X86::MOV64rr;
+ } else if (CommonRC == &X86::GR32_ABCDRegClass) {
+ Opc = X86::MOV32rr;
+ } else if (CommonRC == &X86::GR16_ABCDRegClass) {
+ Opc = X86::MOV16rr;
+ } else if (CommonRC == &X86::GR8_ABCD_LRegClass) {
+ Opc = X86::MOV8rr;
+ } else if (CommonRC == &X86::GR8_ABCD_HRegClass) {
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ Opc = X86::MOV8rr_NOREX;
+ else
+ Opc = X86::MOV8rr;
+ } else if (CommonRC == &X86::GR64_NOREXRegClass) {
+ Opc = X86::MOV64rr;
+ } else if (CommonRC == &X86::GR32_NOREXRegClass) {
+ Opc = X86::MOV32rr;
+ } else if (CommonRC == &X86::GR16_NOREXRegClass) {
+ Opc = X86::MOV16rr;
+ } else if (CommonRC == &X86::GR8_NOREXRegClass) {
+ Opc = X86::MOV8rr;
+ } else if (CommonRC == &X86::RFP32RegClass) {
+ Opc = X86::MOV_Fp3232;
+ } else if (CommonRC == &X86::RFP64RegClass || CommonRC == &X86::RSTRegClass) {
+ Opc = X86::MOV_Fp6464;
+ } else if (CommonRC == &X86::RFP80RegClass) {
+ Opc = X86::MOV_Fp8080;
+ } else if (CommonRC == &X86::FR32RegClass) {
+ Opc = X86::FsMOVAPSrr;
+ } else if (CommonRC == &X86::FR64RegClass) {
+ Opc = X86::FsMOVAPDrr;
+ } else if (CommonRC == &X86::VR128RegClass) {
+ Opc = X86::MOVAPSrr;
+ } else if (CommonRC == &X86::VR64RegClass) {
+ Opc = X86::MMX_MOVQ64rr;
+ } else {
+ return false;
+ }
+ BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(SrcReg);
+ return true;
+ }
+
+ // Moving EFLAGS to / from another register requires a push and a pop.
+ if (SrcRC == &X86::CCRRegClass) {
+ if (SrcReg != X86::EFLAGS)
+ return false;
+ if (DestRC == &X86::GR64RegClass) {
+ BuildMI(MBB, MI, DL, get(X86::PUSHFQ));
+ BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
+ return true;
+ } else if (DestRC == &X86::GR32RegClass) {
+ BuildMI(MBB, MI, DL, get(X86::PUSHFD));
+ BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
+ return true;
+ }
+ } else if (DestRC == &X86::CCRRegClass) {
+ if (DestReg != X86::EFLAGS)
+ return false;
+ if (SrcRC == &X86::GR64RegClass) {
+ BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg);
+ BuildMI(MBB, MI, DL, get(X86::POPFQ));
+ return true;
+ } else if (SrcRC == &X86::GR32RegClass) {
+ BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg);
+ BuildMI(MBB, MI, DL, get(X86::POPFD));
+ return true;
+ }
+ }
+
+ // Moving from ST(0) turns into FpGET_ST0_32 etc.
+ if (SrcRC == &X86::RSTRegClass) {
+ // Copying from ST(0)/ST(1).
+ if (SrcReg != X86::ST0 && SrcReg != X86::ST1)
+ // Can only copy from ST(0)/ST(1) right now
+ return false;
+ bool isST0 = SrcReg == X86::ST0;
+ unsigned Opc;
+ if (DestRC == &X86::RFP32RegClass)
+ Opc = isST0 ? X86::FpGET_ST0_32 : X86::FpGET_ST1_32;
+ else if (DestRC == &X86::RFP64RegClass)
+ Opc = isST0 ? X86::FpGET_ST0_64 : X86::FpGET_ST1_64;
+ else {
+ if (DestRC != &X86::RFP80RegClass)
+ return false;
+ Opc = isST0 ? X86::FpGET_ST0_80 : X86::FpGET_ST1_80;
+ }
+ BuildMI(MBB, MI, DL, get(Opc), DestReg);
+ return true;
+ }
+
+ // Moving to ST(0) turns into FpSET_ST0_32 etc.
+ if (DestRC == &X86::RSTRegClass) {
+ // Copying to ST(0) / ST(1).
+ if (DestReg != X86::ST0 && DestReg != X86::ST1)
+ // Can only copy to TOS right now
+ return false;
+ bool isST0 = DestReg == X86::ST0;
+ unsigned Opc;
+ if (SrcRC == &X86::RFP32RegClass)
+ Opc = isST0 ? X86::FpSET_ST0_32 : X86::FpSET_ST1_32;
+ else if (SrcRC == &X86::RFP64RegClass)
+ Opc = isST0 ? X86::FpSET_ST0_64 : X86::FpSET_ST1_64;
+ else {
+ if (SrcRC != &X86::RFP80RegClass)
+ return false;
+ Opc = isST0 ? X86::FpSET_ST0_80 : X86::FpSET_ST1_80;
+ }
+ BuildMI(MBB, MI, DL, get(Opc)).addReg(SrcReg);
+ return true;
+ }
+
+ // Not yet supported!
+ return false;
+}
+
+static unsigned getStoreRegOpcode(unsigned SrcReg,
+ const TargetRegisterClass *RC,
+ bool isStackAligned,
+ TargetMachine &TM) {
+ unsigned Opc = 0;
+ if (RC == &X86::GR64RegClass) {
+ Opc = X86::MOV64mr;
+ } else if (RC == &X86::GR32RegClass) {
+ Opc = X86::MOV32mr;
+ } else if (RC == &X86::GR16RegClass) {
+ Opc = X86::MOV16mr;
+ } else if (RC == &X86::GR8RegClass) {
+ // Copying to or from a physical H register on x86-64 requires a NOREX
+ // move. Otherwise use a normal move.
+ if (isHReg(SrcReg) &&
+ TM.getSubtarget<X86Subtarget>().is64Bit())
+ Opc = X86::MOV8mr_NOREX;
+ else
+ Opc = X86::MOV8mr;
+ } else if (RC == &X86::GR64_ABCDRegClass) {
+ Opc = X86::MOV64mr;
+ } else if (RC == &X86::GR32_ABCDRegClass) {
+ Opc = X86::MOV32mr;
+ } else if (RC == &X86::GR16_ABCDRegClass) {
+ Opc = X86::MOV16mr;
+ } else if (RC == &X86::GR8_ABCD_LRegClass) {
+ Opc = X86::MOV8mr;
+ } else if (RC == &X86::GR8_ABCD_HRegClass) {
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ Opc = X86::MOV8mr_NOREX;
+ else
+ Opc = X86::MOV8mr;
+ } else if (RC == &X86::GR64_NOREXRegClass) {
+ Opc = X86::MOV64mr;
+ } else if (RC == &X86::GR32_NOREXRegClass) {
+ Opc = X86::MOV32mr;
+ } else if (RC == &X86::GR16_NOREXRegClass) {
+ Opc = X86::MOV16mr;
+ } else if (RC == &X86::GR8_NOREXRegClass) {
+ Opc = X86::MOV8mr;
+ } else if (RC == &X86::RFP80RegClass) {
+ Opc = X86::ST_FpP80m; // pops
+ } else if (RC == &X86::RFP64RegClass) {
+ Opc = X86::ST_Fp64m;
+ } else if (RC == &X86::RFP32RegClass) {
+ Opc = X86::ST_Fp32m;
+ } else if (RC == &X86::FR32RegClass) {
+ Opc = X86::MOVSSmr;
+ } else if (RC == &X86::FR64RegClass) {
+ Opc = X86::MOVSDmr;
+ } else if (RC == &X86::VR128RegClass) {
+ // If stack is realigned we can use aligned stores.
+ Opc = isStackAligned ? X86::MOVAPSmr : X86::MOVUPSmr;
+ } else if (RC == &X86::VR64RegClass) {
+ Opc = X86::MMX_MOVQ64mr;
+ } else {
+ assert(0 && "Unknown regclass");
+ abort();
+ }
+
+ return Opc;
+}
+
+void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC) const {
+ const MachineFunction &MF = *MBB.getParent();
+ bool isAligned = (RI.getStackAlignment() >= 16) ||
+ RI.needsStackRealignment(MF);
+ unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
+ .addReg(SrcReg, getKillRegState(isKill));
+}
+
+void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+ bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ bool isAligned = (RI.getStackAlignment() >= 16) ||
+ RI.needsStackRealignment(MF);
+ unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ MIB.addReg(SrcReg, getKillRegState(isKill));
+ NewMIs.push_back(MIB);
+}
+
+static unsigned getLoadRegOpcode(unsigned DestReg,
+ const TargetRegisterClass *RC,
+ bool isStackAligned,
+ const TargetMachine &TM) {
+ unsigned Opc = 0;
+ if (RC == &X86::GR64RegClass) {
+ Opc = X86::MOV64rm;
+ } else if (RC == &X86::GR32RegClass) {
+ Opc = X86::MOV32rm;
+ } else if (RC == &X86::GR16RegClass) {
+ Opc = X86::MOV16rm;
+ } else if (RC == &X86::GR8RegClass) {
+ // Copying to or from a physical H register on x86-64 requires a NOREX
+ // move. Otherwise use a normal move.
+ if (isHReg(DestReg) &&
+ TM.getSubtarget<X86Subtarget>().is64Bit())
+ Opc = X86::MOV8rm_NOREX;
+ else
+ Opc = X86::MOV8rm;
+ } else if (RC == &X86::GR64_ABCDRegClass) {
+ Opc = X86::MOV64rm;
+ } else if (RC == &X86::GR32_ABCDRegClass) {
+ Opc = X86::MOV32rm;
+ } else if (RC == &X86::GR16_ABCDRegClass) {
+ Opc = X86::MOV16rm;
+ } else if (RC == &X86::GR8_ABCD_LRegClass) {
+ Opc = X86::MOV8rm;
+ } else if (RC == &X86::GR8_ABCD_HRegClass) {
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ Opc = X86::MOV8rm_NOREX;
+ else
+ Opc = X86::MOV8rm;
+ } else if (RC == &X86::GR64_NOREXRegClass) {
+ Opc = X86::MOV64rm;
+ } else if (RC == &X86::GR32_NOREXRegClass) {
+ Opc = X86::MOV32rm;
+ } else if (RC == &X86::GR16_NOREXRegClass) {
+ Opc = X86::MOV16rm;
+ } else if (RC == &X86::GR8_NOREXRegClass) {
+ Opc = X86::MOV8rm;
+ } else if (RC == &X86::RFP80RegClass) {
+ Opc = X86::LD_Fp80m;
+ } else if (RC == &X86::RFP64RegClass) {
+ Opc = X86::LD_Fp64m;
+ } else if (RC == &X86::RFP32RegClass) {
+ Opc = X86::LD_Fp32m;
+ } else if (RC == &X86::FR32RegClass) {
+ Opc = X86::MOVSSrm;
+ } else if (RC == &X86::FR64RegClass) {
+ Opc = X86::MOVSDrm;
+ } else if (RC == &X86::VR128RegClass) {
+ // If stack is realigned we can use aligned loads.
+ Opc = isStackAligned ? X86::MOVAPSrm : X86::MOVUPSrm;
+ } else if (RC == &X86::VR64RegClass) {
+ Opc = X86::MMX_MOVQ64rm;
+ } else {
+ assert(0 && "Unknown regclass");
+ abort();
+ }
+
+ return Opc;
+}
+
+void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC) const{
+ const MachineFunction &MF = *MBB.getParent();
+ bool isAligned = (RI.getStackAlignment() >= 16) ||
+ RI.needsStackRealignment(MF);
+ unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
+}
+
+void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ bool isAligned = (RI.getStackAlignment() >= 16) ||
+ RI.needsStackRealignment(MF);
+ unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+}
+
+bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
+ unsigned SlotSize = is64Bit ? 8 : 4;
+
+ MachineFunction &MF = *MBB.getParent();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ X86FI->setCalleeSavedFrameSize(CSI.size() * SlotSize);
+
+ unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r;
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ BuildMI(MBB, MI, DL, get(Opc))
+ .addReg(Reg, RegState::Kill);
+ }
+ return true;
+}
+
+bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
+
+ unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r;
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ BuildMI(MBB, MI, DL, get(Opc), Reg);
+ }
+ return true;
+}
+
+static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ MachineInstr *MI,
+ const TargetInstrInfo &TII) {
+ // Create the base instruction with the memory operand as the first part.
+ MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
+ MI->getDebugLoc(), true);
+ MachineInstrBuilder MIB(NewMI);
+ unsigned NumAddrOps = MOs.size();
+ for (unsigned i = 0; i != NumAddrOps; ++i)
+ MIB.addOperand(MOs[i]);
+ if (NumAddrOps < 4) // FrameIndex only
+ addOffset(MIB, 0);
+
+ // Loop over the rest of the ri operands, converting them over.
+ unsigned NumOps = MI->getDesc().getNumOperands()-2;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ MachineOperand &MO = MI->getOperand(i+2);
+ MIB.addOperand(MO);
+ }
+ for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ MIB.addOperand(MO);
+ }
+ return MIB;
+}
+
+static MachineInstr *FuseInst(MachineFunction &MF,
+ unsigned Opcode, unsigned OpNo,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ MachineInstr *MI, const TargetInstrInfo &TII) {
+ MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
+ MI->getDebugLoc(), true);
+ MachineInstrBuilder MIB(NewMI);
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (i == OpNo) {
+ assert(MO.isReg() && "Expected to fold into reg operand!");
+ unsigned NumAddrOps = MOs.size();
+ for (unsigned i = 0; i != NumAddrOps; ++i)
+ MIB.addOperand(MOs[i]);
+ if (NumAddrOps < 4) // FrameIndex only
+ addOffset(MIB, 0);
+ } else {
+ MIB.addOperand(MO);
+ }
+ }
+ return MIB;
+}
+
+static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ MachineInstr *MI) {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode));
+
+ unsigned NumAddrOps = MOs.size();
+ for (unsigned i = 0; i != NumAddrOps; ++i)
+ MIB.addOperand(MOs[i]);
+ if (NumAddrOps < 4) // FrameIndex only
+ addOffset(MIB, 0);
+ return MIB.addImm(0);
+}
+
+MachineInstr*
+X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI, unsigned i,
+ const SmallVectorImpl<MachineOperand> &MOs) const{
+ const DenseMap<unsigned*, unsigned> *OpcodeTablePtr = NULL;
+ bool isTwoAddrFold = false;
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ bool isTwoAddr = NumOps > 1 &&
+ MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1;
+
+ MachineInstr *NewMI = NULL;
+ // Folding a memory location into the two-address part of a two-address
+ // instruction is different than folding it other places. It requires
+ // replacing the *two* registers with the memory location.
+ if (isTwoAddr && NumOps >= 2 && i < 2 &&
+ MI->getOperand(0).isReg() &&
+ MI->getOperand(1).isReg() &&
+ MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
+ OpcodeTablePtr = &RegOp2MemOpTable2Addr;
+ isTwoAddrFold = true;
+ } else if (i == 0) { // If operand 0
+ if (MI->getOpcode() == X86::MOV16r0)
+ NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI);
+ else if (MI->getOpcode() == X86::MOV32r0)
+ NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
+ else if (MI->getOpcode() == X86::MOV64r0)
+ NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI);
+ else if (MI->getOpcode() == X86::MOV8r0)
+ NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
+ if (NewMI)
+ return NewMI;
+
+ OpcodeTablePtr = &RegOp2MemOpTable0;
+ } else if (i == 1) {
+ OpcodeTablePtr = &RegOp2MemOpTable1;
+ } else if (i == 2) {
+ OpcodeTablePtr = &RegOp2MemOpTable2;
+ }
+
+ // If table selected...
+ if (OpcodeTablePtr) {
+ // Find the Opcode to fuse
+ DenseMap<unsigned*, unsigned>::iterator I =
+ OpcodeTablePtr->find((unsigned*)MI->getOpcode());
+ if (I != OpcodeTablePtr->end()) {
+ if (isTwoAddrFold)
+ NewMI = FuseTwoAddrInst(MF, I->second, MOs, MI, *this);
+ else
+ NewMI = FuseInst(MF, I->second, i, MOs, MI, *this);
+ return NewMI;
+ }
+ }
+
+ // No fusion
+ if (PrintFailedFusing)
+ cerr << "We failed to fuse operand " << i << " in " << *MI;
+ return NULL;
+}
+
+
+MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const {
+ // Check switch flag
+ if (NoFusing) return NULL;
+
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
+ // FIXME: Move alignment requirement into tables?
+ if (Alignment < 16) {
+ switch (MI->getOpcode()) {
+ default: break;
+ // Not always safe to fold movsd into these instructions since their load
+ // folding variants expects the address to be 16 byte aligned.
+ case X86::FsANDNPDrr:
+ case X86::FsANDNPSrr:
+ case X86::FsANDPDrr:
+ case X86::FsANDPSrr:
+ case X86::FsORPDrr:
+ case X86::FsORPSrr:
+ case X86::FsXORPDrr:
+ case X86::FsXORPSrr:
+ return NULL;
+ }
+ }
+
+ if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+ unsigned NewOpc = 0;
+ switch (MI->getOpcode()) {
+ default: return NULL;
+ case X86::TEST8rr: NewOpc = X86::CMP8ri; break;
+ case X86::TEST16rr: NewOpc = X86::CMP16ri; break;
+ case X86::TEST32rr: NewOpc = X86::CMP32ri; break;
+ case X86::TEST64rr: NewOpc = X86::CMP64ri32; break;
+ }
+ // Change to CMPXXri r, 0 first.
+ MI->setDesc(get(NewOpc));
+ MI->getOperand(1).ChangeToImmediate(0);
+ } else if (Ops.size() != 1)
+ return NULL;
+
+ SmallVector<MachineOperand,4> MOs;
+ MOs.push_back(MachineOperand::CreateFI(FrameIndex));
+ return foldMemoryOperandImpl(MF, MI, Ops[0], MOs);
+}
+
+MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) const {
+ // Check switch flag
+ if (NoFusing) return NULL;
+
+ // Determine the alignment of the load.
+ unsigned Alignment = 0;
+ if (LoadMI->hasOneMemOperand())
+ Alignment = LoadMI->memoperands_begin()->getAlignment();
+
+ // FIXME: Move alignment requirement into tables?
+ if (Alignment < 16) {
+ switch (MI->getOpcode()) {
+ default: break;
+ // Not always safe to fold movsd into these instructions since their load
+ // folding variants expects the address to be 16 byte aligned.
+ case X86::FsANDNPDrr:
+ case X86::FsANDNPSrr:
+ case X86::FsANDPDrr:
+ case X86::FsANDPSrr:
+ case X86::FsORPDrr:
+ case X86::FsORPSrr:
+ case X86::FsXORPDrr:
+ case X86::FsXORPSrr:
+ return NULL;
+ }
+ }
+
+ if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+ unsigned NewOpc = 0;
+ switch (MI->getOpcode()) {
+ default: return NULL;
+ case X86::TEST8rr: NewOpc = X86::CMP8ri; break;
+ case X86::TEST16rr: NewOpc = X86::CMP16ri; break;
+ case X86::TEST32rr: NewOpc = X86::CMP32ri; break;
+ case X86::TEST64rr: NewOpc = X86::CMP64ri32; break;
+ }
+ // Change to CMPXXri r, 0 first.
+ MI->setDesc(get(NewOpc));
+ MI->getOperand(1).ChangeToImmediate(0);
+ } else if (Ops.size() != 1)
+ return NULL;
+
+ SmallVector<MachineOperand,X86AddrNumOperands> MOs;
+ if (LoadMI->getOpcode() == X86::V_SET0 ||
+ LoadMI->getOpcode() == X86::V_SETALLONES) {
+ // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
+ // Create a constant-pool entry and operands to load from it.
+
+ // x86-32 PIC requires a PIC base register for constant pools.
+ unsigned PICBase = 0;
+ if (TM.getRelocationModel() == Reloc::PIC_ &&
+ !TM.getSubtarget<X86Subtarget>().is64Bit())
+ // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF);
+ // This doesn't work for several reasons.
+ // 1. GlobalBaseReg may have been spilled.
+ // 2. It may not be live at MI.
+ return false;
+
+ // Create a v4i32 constant-pool entry.
+ MachineConstantPool &MCP = *MF.getConstantPool();
+ const VectorType *Ty = VectorType::get(Type::Int32Ty, 4);
+ Constant *C = LoadMI->getOpcode() == X86::V_SET0 ?
+ ConstantVector::getNullValue(Ty) :
+ ConstantVector::getAllOnesValue(Ty);
+ unsigned CPI = MCP.getConstantPoolIndex(C, 16);
+
+ // Create operands to load from the constant pool entry.
+ MOs.push_back(MachineOperand::CreateReg(PICBase, false));
+ MOs.push_back(MachineOperand::CreateImm(1));
+ MOs.push_back(MachineOperand::CreateReg(0, false));
+ MOs.push_back(MachineOperand::CreateCPI(CPI, 0));
+ MOs.push_back(MachineOperand::CreateReg(0, false));
+ } else {
+ // Folding a normal load. Just copy the load's address operands.
+ unsigned NumOps = LoadMI->getDesc().getNumOperands();
+ for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i)
+ MOs.push_back(LoadMI->getOperand(i));
+ }
+ return foldMemoryOperandImpl(MF, MI, Ops[0], MOs);
+}
+
+
+bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const {
+ // Check switch flag
+ if (NoFusing) return 0;
+
+ if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+ switch (MI->getOpcode()) {
+ default: return false;
+ case X86::TEST8rr:
+ case X86::TEST16rr:
+ case X86::TEST32rr:
+ case X86::TEST64rr:
+ return true;
+ }
+ }
+
+ if (Ops.size() != 1)
+ return false;
+
+ unsigned OpNum = Ops[0];
+ unsigned Opc = MI->getOpcode();
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ bool isTwoAddr = NumOps > 1 &&
+ MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1;
+
+ // Folding a memory location into the two-address part of a two-address
+ // instruction is different than folding it other places. It requires
+ // replacing the *two* registers with the memory location.
+ const DenseMap<unsigned*, unsigned> *OpcodeTablePtr = NULL;
+ if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
+ OpcodeTablePtr = &RegOp2MemOpTable2Addr;
+ } else if (OpNum == 0) { // If operand 0
+ switch (Opc) {
+ case X86::MOV16r0:
+ case X86::MOV32r0:
+ case X86::MOV64r0:
+ case X86::MOV8r0:
+ return true;
+ default: break;
+ }
+ OpcodeTablePtr = &RegOp2MemOpTable0;
+ } else if (OpNum == 1) {
+ OpcodeTablePtr = &RegOp2MemOpTable1;
+ } else if (OpNum == 2) {
+ OpcodeTablePtr = &RegOp2MemOpTable2;
+ }
+
+ if (OpcodeTablePtr) {
+ // Find the Opcode to fuse
+ DenseMap<unsigned*, unsigned>::iterator I =
+ OpcodeTablePtr->find((unsigned*)Opc);
+ if (I != OpcodeTablePtr->end())
+ return true;
+ }
+ return false;
+}
+
+bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+ MemOp2RegOpTable.find((unsigned*)MI->getOpcode());
+ if (I == MemOp2RegOpTable.end())
+ return false;
+ DebugLoc dl = MI->getDebugLoc();
+ unsigned Opc = I->second.first;
+ unsigned Index = I->second.second & 0xf;
+ bool FoldedLoad = I->second.second & (1 << 4);
+ bool FoldedStore = I->second.second & (1 << 5);
+ if (UnfoldLoad && !FoldedLoad)
+ return false;
+ UnfoldLoad &= FoldedLoad;
+ if (UnfoldStore && !FoldedStore)
+ return false;
+ UnfoldStore &= FoldedStore;
+
+ const TargetInstrDesc &TID = get(Opc);
+ const TargetOperandInfo &TOI = TID.OpInfo[Index];
+ const TargetRegisterClass *RC = TOI.isLookupPtrRegClass()
+ ? RI.getPointerRegClass() : RI.getRegClass(TOI.RegClass);
+ SmallVector<MachineOperand, X86AddrNumOperands> AddrOps;
+ SmallVector<MachineOperand,2> BeforeOps;
+ SmallVector<MachineOperand,2> AfterOps;
+ SmallVector<MachineOperand,4> ImpOps;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (i >= Index && i < Index + X86AddrNumOperands)
+ AddrOps.push_back(Op);
+ else if (Op.isReg() && Op.isImplicit())
+ ImpOps.push_back(Op);
+ else if (i < Index)
+ BeforeOps.push_back(Op);
+ else if (i > Index)
+ AfterOps.push_back(Op);
+ }
+
+ // Emit the load instruction.
+ if (UnfoldLoad) {
+ loadRegFromAddr(MF, Reg, AddrOps, RC, NewMIs);
+ if (UnfoldStore) {
+ // Address operands cannot be marked isKill.
+ for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) {
+ MachineOperand &MO = NewMIs[0]->getOperand(i);
+ if (MO.isReg())
+ MO.setIsKill(false);
+ }
+ }
+ }
+
+ // Emit the data processing instruction.
+ MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true);
+ MachineInstrBuilder MIB(DataMI);
+
+ if (FoldedStore)
+ MIB.addReg(Reg, RegState::Define);
+ for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i)
+ MIB.addOperand(BeforeOps[i]);
+ if (FoldedLoad)
+ MIB.addReg(Reg);
+ for (unsigned i = 0, e = AfterOps.size(); i != e; ++i)
+ MIB.addOperand(AfterOps[i]);
+ for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) {
+ MachineOperand &MO = ImpOps[i];
+ MIB.addReg(MO.getReg(),
+ getDefRegState(MO.isDef()) |
+ RegState::Implicit |
+ getKillRegState(MO.isKill()) |
+ getDeadRegState(MO.isDead()));
+ }
+ // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
+ unsigned NewOpc = 0;
+ switch (DataMI->getOpcode()) {
+ default: break;
+ case X86::CMP64ri32:
+ case X86::CMP32ri:
+ case X86::CMP16ri:
+ case X86::CMP8ri: {
+ MachineOperand &MO0 = DataMI->getOperand(0);
+ MachineOperand &MO1 = DataMI->getOperand(1);
+ if (MO1.getImm() == 0) {
+ switch (DataMI->getOpcode()) {
+ default: break;
+ case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
+ case X86::CMP32ri: NewOpc = X86::TEST32rr; break;
+ case X86::CMP16ri: NewOpc = X86::TEST16rr; break;
+ case X86::CMP8ri: NewOpc = X86::TEST8rr; break;
+ }
+ DataMI->setDesc(get(NewOpc));
+ MO1.ChangeToRegister(MO0.getReg(), false);
+ }
+ }
+ }
+ NewMIs.push_back(DataMI);
+
+ // Emit the store instruction.
+ if (UnfoldStore) {
+ const TargetOperandInfo &DstTOI = TID.OpInfo[0];
+ const TargetRegisterClass *DstRC = DstTOI.isLookupPtrRegClass()
+ ? RI.getPointerRegClass() : RI.getRegClass(DstTOI.RegClass);
+ storeRegToAddr(MF, Reg, true, AddrOps, DstRC, NewMIs);
+ }
+
+ return true;
+}
+
+bool
+X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode*> &NewNodes) const {
+ if (!N->isMachineOpcode())
+ return false;
+
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+ MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode());
+ if (I == MemOp2RegOpTable.end())
+ return false;
+ unsigned Opc = I->second.first;
+ unsigned Index = I->second.second & 0xf;
+ bool FoldedLoad = I->second.second & (1 << 4);
+ bool FoldedStore = I->second.second & (1 << 5);
+ const TargetInstrDesc &TID = get(Opc);
+ const TargetOperandInfo &TOI = TID.OpInfo[Index];
+ const TargetRegisterClass *RC = TOI.isLookupPtrRegClass()
+ ? RI.getPointerRegClass() : RI.getRegClass(TOI.RegClass);
+ unsigned NumDefs = TID.NumDefs;
+ std::vector<SDValue> AddrOps;
+ std::vector<SDValue> BeforeOps;
+ std::vector<SDValue> AfterOps;
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumOps = N->getNumOperands();
+ for (unsigned i = 0; i != NumOps-1; ++i) {
+ SDValue Op = N->getOperand(i);
+ if (i >= Index-NumDefs && i < Index-NumDefs + X86AddrNumOperands)
+ AddrOps.push_back(Op);
+ else if (i < Index-NumDefs)
+ BeforeOps.push_back(Op);
+ else if (i > Index-NumDefs)
+ AfterOps.push_back(Op);
+ }
+ SDValue Chain = N->getOperand(NumOps-1);
+ AddrOps.push_back(Chain);
+
+ // Emit the load instruction.
+ SDNode *Load = 0;
+ const MachineFunction &MF = DAG.getMachineFunction();
+ if (FoldedLoad) {
+ MVT VT = *RC->vt_begin();
+ bool isAligned = (RI.getStackAlignment() >= 16) ||
+ RI.needsStackRealignment(MF);
+ Load = DAG.getTargetNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
+ VT, MVT::Other, &AddrOps[0], AddrOps.size());
+ NewNodes.push_back(Load);
+ }
+
+ // Emit the data processing instruction.
+ std::vector<MVT> VTs;
+ const TargetRegisterClass *DstRC = 0;
+ if (TID.getNumDefs() > 0) {
+ const TargetOperandInfo &DstTOI = TID.OpInfo[0];
+ DstRC = DstTOI.isLookupPtrRegClass()
+ ? RI.getPointerRegClass() : RI.getRegClass(DstTOI.RegClass);
+ VTs.push_back(*DstRC->vt_begin());
+ }
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ MVT VT = N->getValueType(i);
+ if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs())
+ VTs.push_back(VT);
+ }
+ if (Load)
+ BeforeOps.push_back(SDValue(Load, 0));
+ std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps));
+ SDNode *NewNode= DAG.getTargetNode(Opc, dl, VTs, &BeforeOps[0],
+ BeforeOps.size());
+ NewNodes.push_back(NewNode);
+
+ // Emit the store instruction.
+ if (FoldedStore) {
+ AddrOps.pop_back();
+ AddrOps.push_back(SDValue(NewNode, 0));
+ AddrOps.push_back(Chain);
+ bool isAligned = (RI.getStackAlignment() >= 16) ||
+ RI.needsStackRealignment(MF);
+ SDNode *Store = DAG.getTargetNode(getStoreRegOpcode(0, DstRC,
+ isAligned, TM),
+ dl, MVT::Other,
+ &AddrOps[0], AddrOps.size());
+ NewNodes.push_back(Store);
+ }
+
+ return true;
+}
+
+unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore) const {
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+ MemOp2RegOpTable.find((unsigned*)Opc);
+ if (I == MemOp2RegOpTable.end())
+ return 0;
+ bool FoldedLoad = I->second.second & (1 << 4);
+ bool FoldedStore = I->second.second & (1 << 5);
+ if (UnfoldLoad && !FoldedLoad)
+ return 0;
+ if (UnfoldStore && !FoldedStore)
+ return 0;
+ return I->second.first;
+}
+
+bool X86InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
+ if (MBB.empty()) return false;
+
+ switch (MBB.back().getOpcode()) {
+ case X86::TCRETURNri:
+ case X86::TCRETURNdi:
+ case X86::RET: // Return.
+ case X86::RETI:
+ case X86::TAILJMPd:
+ case X86::TAILJMPr:
+ case X86::TAILJMPm:
+ case X86::JMP: // Uncond branch.
+ case X86::JMP32r: // Indirect branch.
+ case X86::JMP64r: // Indirect branch (64-bit).
+ case X86::JMP32m: // Indirect branch through mem.
+ case X86::JMP64m: // Indirect branch through mem (64-bit).
+ return true;
+ default: return false;
+ }
+}
+
+bool X86InstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 1 && "Invalid X86 branch condition!");
+ X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm());
+ if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E)
+ return true;
+ Cond[0].setImm(GetOppositeBranchCondition(CC));
+ return false;
+}
+
+bool X86InstrInfo::
+isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
+ // FIXME: Return false for x87 stack register classes for now. We can't
+ // allow any loads of these registers before FpGet_ST0_80.
+ return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass ||
+ RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass);
+}
+
+unsigned X86InstrInfo::sizeOfImm(const TargetInstrDesc *Desc) {
+ switch (Desc->TSFlags & X86II::ImmMask) {
+ case X86II::Imm8: return 1;
+ case X86II::Imm16: return 2;
+ case X86II::Imm32: return 4;
+ case X86II::Imm64: return 8;
+ default: assert(0 && "Immediate size not set!");
+ return 0;
+ }
+}
+
+/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended register?
+/// e.g. r8, xmm8, etc.
+bool X86InstrInfo::isX86_64ExtendedReg(const MachineOperand &MO) {
+ if (!MO.isReg()) return false;
+ switch (MO.getReg()) {
+ default: break;
+ case X86::R8: case X86::R9: case X86::R10: case X86::R11:
+ case X86::R12: case X86::R13: case X86::R14: case X86::R15:
+ case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D:
+ case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D:
+ case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W:
+ case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W:
+ case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B:
+ case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B:
+ case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
+ case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
+ return true;
+ }
+ return false;
+}
+
+
+/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64
+/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
+/// size, and 3) use of X86-64 extended registers.
+unsigned X86InstrInfo::determineREX(const MachineInstr &MI) {
+ unsigned REX = 0;
+ const TargetInstrDesc &Desc = MI.getDesc();
+
+ // Pseudo instructions do not need REX prefix byte.
+ if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
+ return 0;
+ if (Desc.TSFlags & X86II::REX_W)
+ REX |= 1 << 3;
+
+ unsigned NumOps = Desc.getNumOperands();
+ if (NumOps) {
+ bool isTwoAddr = NumOps > 1 &&
+ Desc.getOperandConstraint(1, TOI::TIED_TO) != -1;
+
+ // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
+ unsigned i = isTwoAddr ? 1 : 0;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ if (isX86_64NonExtLowByteReg(Reg))
+ REX |= 0x40;
+ }
+ }
+
+ switch (Desc.TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg:
+ if (isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= (1 << 0) | (1 << 2);
+ break;
+ case X86II::MRMSrcReg: {
+ if (isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 2;
+ i = isTwoAddr ? 2 : 1;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (isX86_64ExtendedReg(MO))
+ REX |= 1 << 0;
+ }
+ break;
+ }
+ case X86II::MRMSrcMem: {
+ if (isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 2;
+ unsigned Bit = 0;
+ i = isTwoAddr ? 2 : 1;
+ for (; i != NumOps; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ if (isX86_64ExtendedReg(MO))
+ REX |= 1 << Bit;
+ Bit++;
+ }
+ }
+ break;
+ }
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ case X86II::MRMDestMem: {
+ unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands);
+ i = isTwoAddr ? 1 : 0;
+ if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e)))
+ REX |= 1 << 2;
+ unsigned Bit = 0;
+ for (; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ if (isX86_64ExtendedReg(MO))
+ REX |= 1 << Bit;
+ Bit++;
+ }
+ }
+ break;
+ }
+ default: {
+ if (isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 0;
+ i = isTwoAddr ? 2 : 1;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (isX86_64ExtendedReg(MO))
+ REX |= 1 << 2;
+ }
+ break;
+ }
+ }
+ }
+ return REX;
+}
+
+/// sizePCRelativeBlockAddress - This method returns the size of a PC
+/// relative block address instruction
+///
+static unsigned sizePCRelativeBlockAddress() {
+ return 4;
+}
+
+/// sizeGlobalAddress - Give the size of the emission of this global address
+///
+static unsigned sizeGlobalAddress(bool dword) {
+ return dword ? 8 : 4;
+}
+
+/// sizeConstPoolAddress - Give the size of the emission of this constant
+/// pool address
+///
+static unsigned sizeConstPoolAddress(bool dword) {
+ return dword ? 8 : 4;
+}
+
+/// sizeExternalSymbolAddress - Give the size of the emission of this external
+/// symbol
+///
+static unsigned sizeExternalSymbolAddress(bool dword) {
+ return dword ? 8 : 4;
+}
+
+/// sizeJumpTableAddress - Give the size of the emission of this jump
+/// table address
+///
+static unsigned sizeJumpTableAddress(bool dword) {
+ return dword ? 8 : 4;
+}
+
+static unsigned sizeConstant(unsigned Size) {
+ return Size;
+}
+
+static unsigned sizeRegModRMByte(){
+ return 1;
+}
+
+static unsigned sizeSIBByte(){
+ return 1;
+}
+
+static unsigned getDisplacementFieldSize(const MachineOperand *RelocOp) {
+ unsigned FinalSize = 0;
+ // If this is a simple integer displacement that doesn't require a relocation.
+ if (!RelocOp) {
+ FinalSize += sizeConstant(4);
+ return FinalSize;
+ }
+
+ // Otherwise, this is something that requires a relocation.
+ if (RelocOp->isGlobal()) {
+ FinalSize += sizeGlobalAddress(false);
+ } else if (RelocOp->isCPI()) {
+ FinalSize += sizeConstPoolAddress(false);
+ } else if (RelocOp->isJTI()) {
+ FinalSize += sizeJumpTableAddress(false);
+ } else {
+ assert(0 && "Unknown value to relocate!");
+ }
+ return FinalSize;
+}
+
+static unsigned getMemModRMByteSize(const MachineInstr &MI, unsigned Op,
+ bool IsPIC, bool Is64BitMode) {
+ const MachineOperand &Op3 = MI.getOperand(Op+3);
+ int DispVal = 0;
+ const MachineOperand *DispForReloc = 0;
+ unsigned FinalSize = 0;
+
+ // Figure out what sort of displacement we have to handle here.
+ if (Op3.isGlobal()) {
+ DispForReloc = &Op3;
+ } else if (Op3.isCPI()) {
+ if (Is64BitMode || IsPIC) {
+ DispForReloc = &Op3;
+ } else {
+ DispVal = 1;
+ }
+ } else if (Op3.isJTI()) {
+ if (Is64BitMode || IsPIC) {
+ DispForReloc = &Op3;
+ } else {
+ DispVal = 1;
+ }
+ } else {
+ DispVal = 1;
+ }
+
+ const MachineOperand &Base = MI.getOperand(Op);
+ const MachineOperand &IndexReg = MI.getOperand(Op+2);
+
+ unsigned BaseReg = Base.getReg();
+
+ // Is a SIB byte needed?
+ if ((!Is64BitMode || DispForReloc || BaseReg != 0) &&
+ IndexReg.getReg() == 0 &&
+ (BaseReg == 0 || X86RegisterInfo::getX86RegNum(BaseReg) != N86::ESP)) {
+ if (BaseReg == 0) { // Just a displacement?
+ // Emit special case [disp32] encoding
+ ++FinalSize;
+ FinalSize += getDisplacementFieldSize(DispForReloc);
+ } else {
+ unsigned BaseRegNo = X86RegisterInfo::getX86RegNum(BaseReg);
+ if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
+ // Emit simple indirect register encoding... [EAX] f.e.
+ ++FinalSize;
+ // Be pessimistic and assume it's a disp32, not a disp8
+ } else {
+ // Emit the most general non-SIB encoding: [REG+disp32]
+ ++FinalSize;
+ FinalSize += getDisplacementFieldSize(DispForReloc);
+ }
+ }
+
+ } else { // We need a SIB byte, so start by outputting the ModR/M byte first
+ assert(IndexReg.getReg() != X86::ESP &&
+ IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
+
+ bool ForceDisp32 = false;
+ if (BaseReg == 0 || DispForReloc) {
+ // Emit the normal disp32 encoding.
+ ++FinalSize;
+ ForceDisp32 = true;
+ } else {
+ ++FinalSize;
+ }
+
+ FinalSize += sizeSIBByte();
+
+ // Do we need to output a displacement?
+ if (DispVal != 0 || ForceDisp32) {
+ FinalSize += getDisplacementFieldSize(DispForReloc);
+ }
+ }
+ return FinalSize;
+}
+
+
+static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
+ const TargetInstrDesc *Desc,
+ bool IsPIC, bool Is64BitMode) {
+
+ unsigned Opcode = Desc->Opcode;
+ unsigned FinalSize = 0;
+
+ // Emit the lock opcode prefix as needed.
+ if (Desc->TSFlags & X86II::LOCK) ++FinalSize;
+
+ // Emit segment override opcode prefix as needed.
+ switch (Desc->TSFlags & X86II::SegOvrMask) {
+ case X86II::FS:
+ case X86II::GS:
+ ++FinalSize;
+ break;
+ default: assert(0 && "Invalid segment!");
+ case 0: break; // No segment override!
+ }
+
+ // Emit the repeat opcode prefix as needed.
+ if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) ++FinalSize;
+
+ // Emit the operand size opcode prefix as needed.
+ if (Desc->TSFlags & X86II::OpSize) ++FinalSize;
+
+ // Emit the address size opcode prefix as needed.
+ if (Desc->TSFlags & X86II::AdSize) ++FinalSize;
+
+ bool Need0FPrefix = false;
+ switch (Desc->TSFlags & X86II::Op0Mask) {
+ case X86II::TB: // Two-byte opcode prefix
+ case X86II::T8: // 0F 38
+ case X86II::TA: // 0F 3A
+ Need0FPrefix = true;
+ break;
+ case X86II::REP: break; // already handled.
+ case X86II::XS: // F3 0F
+ ++FinalSize;
+ Need0FPrefix = true;
+ break;
+ case X86II::XD: // F2 0F
+ ++FinalSize;
+ Need0FPrefix = true;
+ break;
+ case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
+ case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
+ ++FinalSize;
+ break; // Two-byte opcode prefix
+ default: assert(0 && "Invalid prefix!");
+ case 0: break; // No prefix!
+ }
+
+ if (Is64BitMode) {
+ // REX prefix
+ unsigned REX = X86InstrInfo::determineREX(MI);
+ if (REX)
+ ++FinalSize;
+ }
+
+ // 0x0F escape code must be emitted just before the opcode.
+ if (Need0FPrefix)
+ ++FinalSize;
+
+ switch (Desc->TSFlags & X86II::Op0Mask) {
+ case X86II::T8: // 0F 38
+ ++FinalSize;
+ break;
+ case X86II::TA: // 0F 3A
+ ++FinalSize;
+ break;
+ }
+
+ // If this is a two-address instruction, skip one of the register operands.
+ unsigned NumOps = Desc->getNumOperands();
+ unsigned CurOp = 0;
+ if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1)
+ CurOp++;
+ else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0)
+ // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
+ --NumOps;
+
+ switch (Desc->TSFlags & X86II::FormMask) {
+ default: assert(0 && "Unknown FormMask value in X86 MachineCodeEmitter!");
+ case X86II::Pseudo:
+ // Remember the current PC offset, this is the PIC relocation
+ // base address.
+ switch (Opcode) {
+ default:
+ break;
+ case TargetInstrInfo::INLINEASM: {
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const char *AsmStr = MI.getOperand(0).getSymbolName();
+ const TargetAsmInfo* AI = MF->getTarget().getTargetAsmInfo();
+ FinalSize += AI->getInlineAsmLength(AsmStr);
+ break;
+ }
+ case TargetInstrInfo::DBG_LABEL:
+ case TargetInstrInfo::EH_LABEL:
+ break;
+ case TargetInstrInfo::IMPLICIT_DEF:
+ case TargetInstrInfo::DECLARE:
+ case X86::DWARF_LOC:
+ case X86::FP_REG_KILL:
+ break;
+ case X86::MOVPC32r: {
+ // This emits the "call" portion of this pseudo instruction.
+ ++FinalSize;
+ FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+ break;
+ }
+ }
+ CurOp = NumOps;
+ break;
+ case X86II::RawFrm:
+ ++FinalSize;
+
+ if (CurOp != NumOps) {
+ const MachineOperand &MO = MI.getOperand(CurOp++);
+ if (MO.isMBB()) {
+ FinalSize += sizePCRelativeBlockAddress();
+ } else if (MO.isGlobal()) {
+ FinalSize += sizeGlobalAddress(false);
+ } else if (MO.isSymbol()) {
+ FinalSize += sizeExternalSymbolAddress(false);
+ } else if (MO.isImm()) {
+ FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+ } else {
+ assert(0 && "Unknown RawFrm operand!");
+ }
+ }
+ break;
+
+ case X86II::AddRegFrm:
+ ++FinalSize;
+ ++CurOp;
+
+ if (CurOp != NumOps) {
+ const MachineOperand &MO1 = MI.getOperand(CurOp++);
+ unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+ if (MO1.isImm())
+ FinalSize += sizeConstant(Size);
+ else {
+ bool dword = false;
+ if (Opcode == X86::MOV64ri)
+ dword = true;
+ if (MO1.isGlobal()) {
+ FinalSize += sizeGlobalAddress(dword);
+ } else if (MO1.isSymbol())
+ FinalSize += sizeExternalSymbolAddress(dword);
+ else if (MO1.isCPI())
+ FinalSize += sizeConstPoolAddress(dword);
+ else if (MO1.isJTI())
+ FinalSize += sizeJumpTableAddress(dword);
+ }
+ }
+ break;
+
+ case X86II::MRMDestReg: {
+ ++FinalSize;
+ FinalSize += sizeRegModRMByte();
+ CurOp += 2;
+ if (CurOp != NumOps) {
+ ++CurOp;
+ FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+ }
+ break;
+ }
+ case X86II::MRMDestMem: {
+ ++FinalSize;
+ FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode);
+ CurOp += X86AddrNumOperands + 1;
+ if (CurOp != NumOps) {
+ ++CurOp;
+ FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+ }
+ break;
+ }
+
+ case X86II::MRMSrcReg:
+ ++FinalSize;
+ FinalSize += sizeRegModRMByte();
+ CurOp += 2;
+ if (CurOp != NumOps) {
+ ++CurOp;
+ FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+ }
+ break;
+
+ case X86II::MRMSrcMem: {
+ int AddrOperands;
+ if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
+ Opcode == X86::LEA16r || Opcode == X86::LEA32r)
+ AddrOperands = X86AddrNumOperands - 1; // No segment register
+ else
+ AddrOperands = X86AddrNumOperands;
+
+ ++FinalSize;
+ FinalSize += getMemModRMByteSize(MI, CurOp+1, IsPIC, Is64BitMode);
+ CurOp += AddrOperands + 1;
+ if (CurOp != NumOps) {
+ ++CurOp;
+ FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+ }
+ break;
+ }
+
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r:
+ ++FinalSize;
+ if (Desc->getOpcode() == X86::LFENCE ||
+ Desc->getOpcode() == X86::MFENCE) {
+ // Special handling of lfence and mfence;
+ FinalSize += sizeRegModRMByte();
+ } else if (Desc->getOpcode() == X86::MONITOR ||
+ Desc->getOpcode() == X86::MWAIT) {
+ // Special handling of monitor and mwait.
+ FinalSize += sizeRegModRMByte() + 1; // +1 for the opcode.
+ } else {
+ ++CurOp;
+ FinalSize += sizeRegModRMByte();
+ }
+
+ if (CurOp != NumOps) {
+ const MachineOperand &MO1 = MI.getOperand(CurOp++);
+ unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+ if (MO1.isImm())
+ FinalSize += sizeConstant(Size);
+ else {
+ bool dword = false;
+ if (Opcode == X86::MOV64ri32)
+ dword = true;
+ if (MO1.isGlobal()) {
+ FinalSize += sizeGlobalAddress(dword);
+ } else if (MO1.isSymbol())
+ FinalSize += sizeExternalSymbolAddress(dword);
+ else if (MO1.isCPI())
+ FinalSize += sizeConstPoolAddress(dword);
+ else if (MO1.isJTI())
+ FinalSize += sizeJumpTableAddress(dword);
+ }
+ }
+ break;
+
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m: {
+
+ ++FinalSize;
+ FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode);
+ CurOp += X86AddrNumOperands;
+
+ if (CurOp != NumOps) {
+ const MachineOperand &MO = MI.getOperand(CurOp++);
+ unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+ if (MO.isImm())
+ FinalSize += sizeConstant(Size);
+ else {
+ bool dword = false;
+ if (Opcode == X86::MOV64mi32)
+ dword = true;
+ if (MO.isGlobal()) {
+ FinalSize += sizeGlobalAddress(dword);
+ } else if (MO.isSymbol())
+ FinalSize += sizeExternalSymbolAddress(dword);
+ else if (MO.isCPI())
+ FinalSize += sizeConstPoolAddress(dword);
+ else if (MO.isJTI())
+ FinalSize += sizeJumpTableAddress(dword);
+ }
+ }
+ break;
+ }
+
+ case X86II::MRMInitReg:
+ ++FinalSize;
+ // Duplicate register, used by things like MOV8r0 (aka xor reg,reg).
+ FinalSize += sizeRegModRMByte();
+ ++CurOp;
+ break;
+ }
+
+ if (!Desc->isVariadic() && CurOp != NumOps) {
+ cerr << "Cannot determine size: ";
+ MI.dump();
+ cerr << '\n';
+ abort();
+ }
+
+
+ return FinalSize;
+}
+
+
+unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ const TargetInstrDesc &Desc = MI->getDesc();
+ bool IsPIC = (TM.getRelocationModel() == Reloc::PIC_);
+ bool Is64BitMode = TM.getSubtargetImpl()->is64Bit();
+ unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode);
+ if (Desc.getOpcode() == X86::MOVPC32r) {
+ Size += GetInstSizeWithDesc(*MI, &get(X86::POP32r), IsPIC, Is64BitMode);
+ }
+ return Size;
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+ assert(!TM.getSubtarget<X86Subtarget>().is64Bit() &&
+ "X86-64 PIC uses RIP relative addressing");
+
+ X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
+ unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
+ if (GlobalBaseReg != 0)
+ return GlobalBaseReg;
+
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MBBI != FirstMBB.end()) DL = MBBI->getDebugLoc();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ // Operand of MovePCtoStack is completely ignored by asm printer. It's
+ // only used in JIT code emission as displacement to pc.
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC)
+ .addImm(0);
+
+ // If we're using vanilla 'GOT' PIC style, we should use relative addressing
+ // not to pc, but to _GLOBAL_ADDRESS_TABLE_ external
+ if (TM.getRelocationModel() == Reloc::PIC_ &&
+ TM.getSubtarget<X86Subtarget>().isPICStyleGOT()) {
+ GlobalBaseReg =
+ RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
+ .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_");
+ } else {
+ GlobalBaseReg = PC;
+ }
+
+ X86FI->setGlobalBaseReg(GlobalBaseReg);
+ return GlobalBaseReg;
+}
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
new file mode 100644
index 0000000..e09769e
--- /dev/null
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -0,0 +1,461 @@
+//===- X86InstrInfo.h - X86 Instruction Information ------------*- C++ -*- ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86INSTRUCTIONINFO_H
+#define X86INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "X86.h"
+#include "X86RegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+ class X86RegisterInfo;
+ class X86TargetMachine;
+
+namespace X86 {
+ // X86 specific condition code. These correspond to X86_*_COND in
+ // X86InstrInfo.td. They must be kept in synch.
+ enum CondCode {
+ COND_A = 0,
+ COND_AE = 1,
+ COND_B = 2,
+ COND_BE = 3,
+ COND_E = 4,
+ COND_G = 5,
+ COND_GE = 6,
+ COND_L = 7,
+ COND_LE = 8,
+ COND_NE = 9,
+ COND_NO = 10,
+ COND_NP = 11,
+ COND_NS = 12,
+ COND_O = 13,
+ COND_P = 14,
+ COND_S = 15,
+
+ // Artificial condition codes. These are used by AnalyzeBranch
+ // to indicate a block terminated with two conditional branches to
+ // the same location. This occurs in code using FCMP_OEQ or FCMP_UNE,
+ // which can't be represented on x86 with a single condition. These
+ // are never used in MachineInstrs.
+ COND_NE_OR_P,
+ COND_NP_OR_E,
+
+ COND_INVALID
+ };
+
+ // Turn condition code into conditional branch opcode.
+ unsigned GetCondBranchFromCond(CondCode CC);
+
+ /// GetOppositeBranchCondition - Return the inverse of the specified cond,
+ /// e.g. turning COND_E to COND_NE.
+ CondCode GetOppositeBranchCondition(X86::CondCode CC);
+
+}
+
+/// X86II - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace X86II {
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction types. These are the standard/most common forms for X86
+ // instructions.
+ //
+
+ // PseudoFrm - This represents an instruction that is a pseudo instruction
+ // or one that has not been implemented yet. It is illegal to code generate
+ // it, but tolerated for intermediate implementation stages.
+ Pseudo = 0,
+
+ /// Raw - This form is for instructions that don't have any operands, so
+ /// they are just a fixed opcode value, like 'leave'.
+ RawFrm = 1,
+
+ /// AddRegFrm - This form is used for instructions like 'push r32' that have
+ /// their one register operand added to their opcode.
+ AddRegFrm = 2,
+
+ /// MRMDestReg - This form is used for instructions that use the Mod/RM byte
+ /// to specify a destination, which in this case is a register.
+ ///
+ MRMDestReg = 3,
+
+ /// MRMDestMem - This form is used for instructions that use the Mod/RM byte
+ /// to specify a destination, which in this case is memory.
+ ///
+ MRMDestMem = 4,
+
+ /// MRMSrcReg - This form is used for instructions that use the Mod/RM byte
+ /// to specify a source, which in this case is a register.
+ ///
+ MRMSrcReg = 5,
+
+ /// MRMSrcMem - This form is used for instructions that use the Mod/RM byte
+ /// to specify a source, which in this case is memory.
+ ///
+ MRMSrcMem = 6,
+
+ /// MRM[0-7][rm] - These forms are used to represent instructions that use
+ /// a Mod/RM byte, and use the middle field to hold extended opcode
+ /// information. In the intel manual these are represented as /0, /1, ...
+ ///
+
+ // First, instructions that operate on a register r/m operand...
+ MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19, // Format /0 /1 /2 /3
+ MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23, // Format /4 /5 /6 /7
+
+ // Next, instructions that operate on a memory r/m operand...
+ MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27, // Format /0 /1 /2 /3
+ MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31, // Format /4 /5 /6 /7
+
+ // MRMInitReg - This form is used for instructions whose source and
+ // destinations are the same register.
+ MRMInitReg = 32,
+
+ FormMask = 63,
+
+ //===------------------------------------------------------------------===//
+ // Actual flags...
+
+ // OpSize - Set if this instruction requires an operand size prefix (0x66),
+ // which most often indicates that the instruction operates on 16 bit data
+ // instead of 32 bit data.
+ OpSize = 1 << 6,
+
+ // AsSize - Set if this instruction requires an operand size prefix (0x67),
+ // which most often indicates that the instruction address 16 bit address
+ // instead of 32 bit address (or 32 bit address in 64 bit mode).
+ AdSize = 1 << 7,
+
+ //===------------------------------------------------------------------===//
+ // Op0Mask - There are several prefix bytes that are used to form two byte
+ // opcodes. These are currently 0x0F, 0xF3, and 0xD8-0xDF. This mask is
+ // used to obtain the setting of this field. If no bits in this field is
+ // set, there is no prefix byte for obtaining a multibyte opcode.
+ //
+ Op0Shift = 8,
+ Op0Mask = 0xF << Op0Shift,
+
+ // TB - TwoByte - Set if this instruction has a two byte opcode, which
+ // starts with a 0x0F byte before the real opcode.
+ TB = 1 << Op0Shift,
+
+ // REP - The 0xF3 prefix byte indicating repetition of the following
+ // instruction.
+ REP = 2 << Op0Shift,
+
+ // D8-DF - These escape opcodes are used by the floating point unit. These
+ // values must remain sequential.
+ D8 = 3 << Op0Shift, D9 = 4 << Op0Shift,
+ DA = 5 << Op0Shift, DB = 6 << Op0Shift,
+ DC = 7 << Op0Shift, DD = 8 << Op0Shift,
+ DE = 9 << Op0Shift, DF = 10 << Op0Shift,
+
+ // XS, XD - These prefix codes are for single and double precision scalar
+ // floating point operations performed in the SSE registers.
+ XD = 11 << Op0Shift, XS = 12 << Op0Shift,
+
+ // T8, TA - Prefix after the 0x0F prefix.
+ T8 = 13 << Op0Shift, TA = 14 << Op0Shift,
+
+ //===------------------------------------------------------------------===//
+ // REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
+ // They are used to specify GPRs and SSE registers, 64-bit operand size,
+ // etc. We only cares about REX.W and REX.R bits and only the former is
+ // statically determined.
+ //
+ REXShift = 12,
+ REX_W = 1 << REXShift,
+
+ //===------------------------------------------------------------------===//
+ // This three-bit field describes the size of an immediate operand. Zero is
+ // unused so that we can tell if we forgot to set a value.
+ ImmShift = 13,
+ ImmMask = 7 << ImmShift,
+ Imm8 = 1 << ImmShift,
+ Imm16 = 2 << ImmShift,
+ Imm32 = 3 << ImmShift,
+ Imm64 = 4 << ImmShift,
+
+ //===------------------------------------------------------------------===//
+ // FP Instruction Classification... Zero is non-fp instruction.
+
+ // FPTypeMask - Mask for all of the FP types...
+ FPTypeShift = 16,
+ FPTypeMask = 7 << FPTypeShift,
+
+ // NotFP - The default, set for instructions that do not use FP registers.
+ NotFP = 0 << FPTypeShift,
+
+ // ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0
+ ZeroArgFP = 1 << FPTypeShift,
+
+ // OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst
+ OneArgFP = 2 << FPTypeShift,
+
+ // OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a
+ // result back to ST(0). For example, fcos, fsqrt, etc.
+ //
+ OneArgFPRW = 3 << FPTypeShift,
+
+ // TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an
+ // explicit argument, storing the result to either ST(0) or the implicit
+ // argument. For example: fadd, fsub, fmul, etc...
+ TwoArgFP = 4 << FPTypeShift,
+
+ // CompareFP - 2 arg FP instructions which implicitly read ST(0) and an
+ // explicit argument, but have no destination. Example: fucom, fucomi, ...
+ CompareFP = 5 << FPTypeShift,
+
+ // CondMovFP - "2 operand" floating point conditional move instructions.
+ CondMovFP = 6 << FPTypeShift,
+
+ // SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
+ SpecialFP = 7 << FPTypeShift,
+
+ // Lock prefix
+ LOCKShift = 19,
+ LOCK = 1 << LOCKShift,
+
+ // Segment override prefixes. Currently we just need ability to address
+ // stuff in gs and fs segments.
+ SegOvrShift = 20,
+ SegOvrMask = 3 << SegOvrShift,
+ FS = 1 << SegOvrShift,
+ GS = 2 << SegOvrShift,
+
+ // Bits 22 -> 23 are unused
+ OpcodeShift = 24,
+ OpcodeMask = 0xFF << OpcodeShift
+ };
+}
+
+const int X86AddrNumOperands = 5;
+
+inline static bool isScale(const MachineOperand &MO) {
+ return MO.isImm() &&
+ (MO.getImm() == 1 || MO.getImm() == 2 ||
+ MO.getImm() == 4 || MO.getImm() == 8);
+}
+
+inline static bool isLeaMem(const MachineInstr *MI, unsigned Op) {
+ if (MI->getOperand(Op).isFI()) return true;
+ return Op+4 <= MI->getNumOperands() &&
+ MI->getOperand(Op ).isReg() && isScale(MI->getOperand(Op+1)) &&
+ MI->getOperand(Op+2).isReg() &&
+ (MI->getOperand(Op+3).isImm() ||
+ MI->getOperand(Op+3).isGlobal() ||
+ MI->getOperand(Op+3).isCPI() ||
+ MI->getOperand(Op+3).isJTI());
+}
+
+inline static bool isMem(const MachineInstr *MI, unsigned Op) {
+ if (MI->getOperand(Op).isFI()) return true;
+ return Op+5 <= MI->getNumOperands() &&
+ MI->getOperand(Op+4).isReg() &&
+ isLeaMem(MI, Op);
+}
+
+class X86InstrInfo : public TargetInstrInfoImpl {
+ X86TargetMachine &TM;
+ const X86RegisterInfo RI;
+
+ /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
+ /// RegOp2MemOpTable2 - Load / store folding opcode maps.
+ ///
+ DenseMap<unsigned*, unsigned> RegOp2MemOpTable2Addr;
+ DenseMap<unsigned*, unsigned> RegOp2MemOpTable0;
+ DenseMap<unsigned*, unsigned> RegOp2MemOpTable1;
+ DenseMap<unsigned*, unsigned> RegOp2MemOpTable2;
+
+ /// MemOp2RegOpTable - Load / store unfolding opcode map.
+ ///
+ DenseMap<unsigned*, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
+
+public:
+ explicit X86InstrInfo(X86TargetMachine &tm);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const X86RegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// Return true if the instruction is a register to register move and return
+ /// the source and dest operands and their sub-register indices by reference.
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+
+ bool isReallyTriviallyReMaterializable(const MachineInstr *MI) const;
+ void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned DestReg, const MachineInstr *Orig) const;
+
+ bool isInvariantLoad(const MachineInstr *MI) const;
+
+ /// convertToThreeAddress - This method must be implemented by targets that
+ /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
+ /// may be able to convert a two-address instruction into a true
+ /// three-address instruction on demand. This allows the X86 target (for
+ /// example) to convert ADD and SHL instructions into LEA instructions if they
+ /// would require register copies due to two-addressness.
+ ///
+ /// This method returns a null pointer if the transformation cannot be
+ /// performed, otherwise it returns the new instruction.
+ ///
+ virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
+ /// commuteInstruction - We have a few instructions that must be hacked on to
+ /// commute them.
+ ///
+ virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const;
+
+ // Branch analysis.
+ virtual bool isUnpredicatedTerminator(const MachineInstr* MI) const;
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const;
+
+ virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const;
+
+ /// foldMemoryOperand - If this target supports it, fold a load or store of
+ /// the specified stack slot into the specified machine instruction for the
+ /// specified operand(s). If this is possible, the target should perform the
+ /// folding and return true, otherwise it should return false. If it folds
+ /// the instruction, it is likely that the MachineInstruction the iterator
+ /// references has been changed.
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+
+ /// foldMemoryOperand - Same as the previous version except it allows folding
+ /// of any load and store from / to any address, not just from a specific
+ /// stack slot.
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const;
+
+ /// canFoldMemoryOperand - Returns true if the specified load / store is
+ /// folding is possible.
+ virtual bool canFoldMemoryOperand(const MachineInstr*,
+ const SmallVectorImpl<unsigned> &) const;
+
+ /// unfoldMemoryOperand - Separate a single instruction which folded a load or
+ /// a store or a load and a store into two or more instruction. If this is
+ /// possible, returns true as well as the new instructions by reference.
+ virtual bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode*> &NewNodes) const;
+
+ /// getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new
+ /// instruction after load / store are unfolded from an instruction of the
+ /// specified opcode. It returns zero if the specified unfolding is not
+ /// possible.
+ virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore) const;
+
+ virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ /// isSafeToMoveRegClassDefs - Return true if it's safe to move a machine
+ /// instruction that defines the specified register class.
+ bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+
+ // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
+ // specified machine instruction.
+ //
+ unsigned char getBaseOpcodeFor(const TargetInstrDesc *TID) const {
+ return TID->TSFlags >> X86II::OpcodeShift;
+ }
+ unsigned char getBaseOpcodeFor(unsigned Opcode) const {
+ return getBaseOpcodeFor(&get(Opcode));
+ }
+
+ static bool isX86_64NonExtLowByteReg(unsigned reg) {
+ return (reg == X86::SPL || reg == X86::BPL ||
+ reg == X86::SIL || reg == X86::DIL);
+ }
+
+ static unsigned sizeOfImm(const TargetInstrDesc *Desc);
+ static bool isX86_64ExtendedReg(const MachineOperand &MO);
+ static unsigned determineREX(const MachineInstr &MI);
+
+ /// GetInstSize - Returns the size of the specified MachineInstr.
+ ///
+ virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+
+ /// getGlobalBaseReg - Return a virtual register initialized with the
+ /// the global base register value. Output instructions required to
+ /// initialize the register in the function entry block, if necessary.
+ ///
+ unsigned getGlobalBaseReg(MachineFunction *MF) const;
+
+private:
+ MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ unsigned OpNum,
+ const SmallVectorImpl<MachineOperand> &MOs) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
new file mode 100644
index 0000000..50ae417
--- /dev/null
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -0,0 +1,3961 @@
+//===- X86InstrInfo.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 instruction set, defining the instructions, and
+// properties of the instructions which are needed for code generation, machine
+// code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// X86 specific DAG Nodes.
+//
+
+def SDTIntShiftDOp: SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisInt<0>, SDTCisInt<3>]>;
+
+def SDTX86CmpTest : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+
+def SDTX86Cmov : SDTypeProfile<1, 4,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
+
+// Unary and binary operator instructions that set EFLAGS as a side-effect.
+def SDTUnaryArithWithFlags : SDTypeProfile<1, 1,
+ [SDTCisInt<0>]>;
+def SDTBinaryArithWithFlags : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisInt<0>]>;
+def SDTX86BrCond : SDTypeProfile<0, 3,
+ [SDTCisVT<0, OtherVT>,
+ SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+
+def SDTX86SetCC : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i8>,
+ SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+
+def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
+ SDTCisVT<2, i8>]>;
+def SDTX86cas8 : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def SDTX86atomicBinary : SDTypeProfile<2, 3, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisPtrTy<2>, SDTCisInt<3>,SDTCisInt<4>]>;
+def SDTX86Ret : SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>;
+
+def SDT_X86CallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_X86CallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+
+def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
+
+def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
+
+def SDTX86RdTsc : SDTypeProfile<0, 0, []>;
+
+def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+
+def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86SegmentBaseAddress : SDTypeProfile<1, 1, [SDTCisPtrTy<0>]>;
+
+def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
+
+def X86bsf : SDNode<"X86ISD::BSF", SDTIntUnaryOp>;
+def X86bsr : SDNode<"X86ISD::BSR", SDTIntUnaryOp>;
+def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>;
+def X86shrd : SDNode<"X86ISD::SHRD", SDTIntShiftDOp>;
+
+def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest>;
+
+def X86bt : SDNode<"X86ISD::BT", SDTX86CmpTest>;
+
+def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>;
+def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond,
+ [SDNPHasChain]>;
+def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>;
+
+def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
+ SDNPMayLoad]>;
+def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
+ SDNPMayLoad]>;
+def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+def X86callseq_start :
+ SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
+ [SDNPHasChain, SDNPOutFlag]>;
+def X86callseq_end :
+ SDNode<"ISD::CALLSEQ_END", SDT_X86CallSeqEnd,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def X86call : SDNode<"X86ISD::CALL", SDT_X86Call,
+ [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
+
+def X86tailcall: SDNode<"X86ISD::TAILCALL", SDT_X86Call,
+ [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
+
+def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore]>;
+def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
+ SDNPMayLoad]>;
+
+def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG",SDTX86RdTsc,
+ [SDNPHasChain, SDNPOutFlag, SDNPSideEffect]>;
+
+def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
+def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
+
+def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def X86SegmentBaseAddress : SDNode<"X86ISD::SegmentBaseAddress",
+ SDT_X86SegmentBaseAddress, []>;
+
+def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
+ [SDNPHasChain]>;
+
+def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags>;
+def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>;
+def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags>;
+def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags>;
+def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;
+def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>;
+
+def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
+
+//===----------------------------------------------------------------------===//
+// X86 Operand Definitions.
+//
+
+// *mem - Operand definitions for the funky X86 addressing mode operands.
+//
+class X86MemOperand<string printMethod> : Operand<iPTR> {
+ let PrintMethod = printMethod;
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm, i8imm);
+}
+
+def i8mem : X86MemOperand<"printi8mem">;
+def i16mem : X86MemOperand<"printi16mem">;
+def i32mem : X86MemOperand<"printi32mem">;
+def i64mem : X86MemOperand<"printi64mem">;
+def i128mem : X86MemOperand<"printi128mem">;
+def f32mem : X86MemOperand<"printf32mem">;
+def f64mem : X86MemOperand<"printf64mem">;
+def f80mem : X86MemOperand<"printf80mem">;
+def f128mem : X86MemOperand<"printf128mem">;
+
+// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
+// plain GR64, so that it doesn't potentially require a REX prefix.
+def i8mem_NOREX : Operand<i64> {
+ let PrintMethod = "printi8mem";
+ let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX, i32imm, i8imm);
+}
+
+def lea32mem : Operand<i32> {
+ let PrintMethod = "printlea32mem";
+ let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
+}
+
+def SSECC : Operand<i8> {
+ let PrintMethod = "printSSECC";
+}
+
+def piclabel: Operand<i32> {
+ let PrintMethod = "printPICLabel";
+}
+
+// A couple of more descriptive operand definitions.
+// 16-bits but only 8 bits are significant.
+def i16i8imm : Operand<i16>;
+// 32-bits but only 8 bits are significant.
+def i32i8imm : Operand<i32>;
+
+// Branch targets have OtherVT type.
+def brtarget : Operand<OtherVT>;
+
+//===----------------------------------------------------------------------===//
+// X86 Complex Pattern Definitions.
+//
+
+// Define X86 specific addressing mode.
+def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], []>;
+def lea32addr : ComplexPattern<i32, 4, "SelectLEAAddr",
+ [add, sub, mul, shl, or, frameindex], []>;
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Predicate Definitions.
+def HasMMX : Predicate<"Subtarget->hasMMX()">;
+def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
+def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
+def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
+def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
+def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
+def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
+def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
+def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
+def In32BitMode : Predicate<"!Subtarget->is64Bit()">;
+def In64BitMode : Predicate<"Subtarget->is64Bit()">;
+def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
+def NotSmallCode : Predicate<"TM.getCodeModel() != CodeModel::Small">;
+def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
+def OptForSpeed : Predicate<"!OptForSize">;
+def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
+def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Format Definitions.
+//
+
+include "X86InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Pattern fragments...
+//
+
+// X86 specific condition code. These correspond to CondCode in
+// X86InstrInfo.h. They must be kept in synch.
+def X86_COND_A : PatLeaf<(i8 0)>; // alt. COND_NBE
+def X86_COND_AE : PatLeaf<(i8 1)>; // alt. COND_NC
+def X86_COND_B : PatLeaf<(i8 2)>; // alt. COND_C
+def X86_COND_BE : PatLeaf<(i8 3)>; // alt. COND_NA
+def X86_COND_E : PatLeaf<(i8 4)>; // alt. COND_Z
+def X86_COND_G : PatLeaf<(i8 5)>; // alt. COND_NLE
+def X86_COND_GE : PatLeaf<(i8 6)>; // alt. COND_NL
+def X86_COND_L : PatLeaf<(i8 7)>; // alt. COND_NGE
+def X86_COND_LE : PatLeaf<(i8 8)>; // alt. COND_NG
+def X86_COND_NE : PatLeaf<(i8 9)>; // alt. COND_NZ
+def X86_COND_NO : PatLeaf<(i8 10)>;
+def X86_COND_NP : PatLeaf<(i8 11)>; // alt. COND_PO
+def X86_COND_NS : PatLeaf<(i8 12)>;
+def X86_COND_O : PatLeaf<(i8 13)>;
+def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE
+def X86_COND_S : PatLeaf<(i8 15)>;
+
+def i16immSExt8 : PatLeaf<(i16 imm), [{
+ // i16immSExt8 predicate - True if the 16-bit immediate fits in a 8-bit
+ // sign extended field.
+ return (int16_t)N->getZExtValue() == (int8_t)N->getZExtValue();
+}]>;
+
+def i32immSExt8 : PatLeaf<(i32 imm), [{
+ // i32immSExt8 predicate - True if the 32-bit immediate fits in a 8-bit
+ // sign extended field.
+ return (int32_t)N->getZExtValue() == (int8_t)N->getZExtValue();
+}]>;
+
+// Helper fragments for loads.
+// It's always safe to treat a anyext i16 load as a i32 load if the i16 is
+// known to be 32-bit aligned or better. Ditto for i8 to i16.
+def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (const Value *Src = LD->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (PT->getAddressSpace() > 255)
+ return false;
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ return true;
+ if (ExtType == ISD::EXTLOAD)
+ return LD->getAlignment() >= 2 && !LD->isVolatile();
+ return false;
+}]>;
+
+def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (const Value *Src = LD->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (PT->getAddressSpace() > 255)
+ return false;
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::EXTLOAD)
+ return LD->getAlignment() >= 2 && !LD->isVolatile();
+ return false;
+}]>;
+
+def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (const Value *Src = LD->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (PT->getAddressSpace() > 255)
+ return false;
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ return true;
+ if (ExtType == ISD::EXTLOAD)
+ return LD->getAlignment() >= 4 && !LD->isVolatile();
+ return false;
+}]>;
+
+def nvloadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (const Value *Src = LD->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (PT->getAddressSpace() > 255)
+ return false;
+ if (LD->isVolatile())
+ return false;
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ return true;
+ if (ExtType == ISD::EXTLOAD)
+ return LD->getAlignment() >= 4;
+ return false;
+}]>;
+
+def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ return PT->getAddressSpace() == 256;
+ return false;
+}]>;
+
+def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ return PT->getAddressSpace() == 257;
+ return false;
+}]>;
+
+def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr)), [{
+ if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (PT->getAddressSpace() > 255)
+ return false;
+ return true;
+}]>;
+def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr)), [{
+ if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (PT->getAddressSpace() > 255)
+ return false;
+ return true;
+}]>;
+
+def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr)), [{
+ if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (PT->getAddressSpace() > 255)
+ return false;
+ return true;
+}]>;
+def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr)), [{
+ if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (PT->getAddressSpace() > 255)
+ return false;
+ return true;
+}]>;
+def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr)), [{
+ if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (PT->getAddressSpace() > 255)
+ return false;
+ return true;
+}]>;
+
+def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
+def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
+def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>;
+
+def zextloadi8i1 : PatFrag<(ops node:$ptr), (i8 (zextloadi1 node:$ptr))>;
+def zextloadi16i1 : PatFrag<(ops node:$ptr), (i16 (zextloadi1 node:$ptr))>;
+def zextloadi32i1 : PatFrag<(ops node:$ptr), (i32 (zextloadi1 node:$ptr))>;
+def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
+def zextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>;
+def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>;
+
+def extloadi8i1 : PatFrag<(ops node:$ptr), (i8 (extloadi1 node:$ptr))>;
+def extloadi16i1 : PatFrag<(ops node:$ptr), (i16 (extloadi1 node:$ptr))>;
+def extloadi32i1 : PatFrag<(ops node:$ptr), (i32 (extloadi1 node:$ptr))>;
+def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 (extloadi8 node:$ptr))>;
+def extloadi32i8 : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>;
+def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
+
+
+// An 'and' node with a single use.
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+// An 'srl' node with a single use.
+def srl_su : PatFrag<(ops node:$lhs, node:$rhs), (srl node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+// An 'trunc' node with a single use.
+def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
+ return N->hasOneUse();
+}]>;
+
+// 'shld' and 'shrd' instruction patterns. Note that even though these have
+// the srl and shl in their patterns, the C++ code must still check for them,
+// because predicates are tested before children nodes are explored.
+
+def shrd : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2),
+ (or (srl node:$src1, node:$amt1),
+ (shl node:$src2, node:$amt2)), [{
+ assert(N->getOpcode() == ISD::OR);
+ return N->getOperand(0).getOpcode() == ISD::SRL &&
+ N->getOperand(1).getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N->getOperand(0).getOperand(1)) &&
+ isa<ConstantSDNode>(N->getOperand(1).getOperand(1)) &&
+ N->getOperand(0).getConstantOperandVal(1) ==
+ N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1);
+}]>;
+
+def shld : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2),
+ (or (shl node:$src1, node:$amt1),
+ (srl node:$src2, node:$amt2)), [{
+ assert(N->getOpcode() == ISD::OR);
+ return N->getOperand(0).getOpcode() == ISD::SHL &&
+ N->getOperand(1).getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(N->getOperand(0).getOperand(1)) &&
+ isa<ConstantSDNode>(N->getOperand(1).getOperand(1)) &&
+ N->getOperand(0).getConstantOperandVal(1) ==
+ N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction list...
+//
+
+// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber EFLAGS.
+let Defs = [ESP, EFLAGS], Uses = [ESP] in {
+def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
+ "#ADJCALLSTACKDOWN",
+ [(X86callseq_start timm:$amt)]>,
+ Requires<[In32BitMode]>;
+def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "#ADJCALLSTACKUP",
+ [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+ Requires<[In32BitMode]>;
+}
+
+// Nop
+let neverHasSideEffects = 1 in
+ def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
+
+// PIC base
+let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
+ def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins piclabel:$label),
+ "call\t$label\n\t"
+ "pop{l}\t$reg", []>;
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions...
+//
+
+// Return instructions.
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasCtrlDep = 1, FPForm = SpecialFP, FPFormBits = SpecialFP.Value in {
+ def RET : I <0xC3, RawFrm, (outs), (ins variable_ops),
+ "ret",
+ [(X86retflag 0)]>;
+ def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+ "ret\t$amt",
+ [(X86retflag imm:$amt)]>;
+}
+
+// All branches are RawFrm, Void, Branch, and Terminators
+let isBranch = 1, isTerminator = 1 in
+ class IBr<bits<8> opcode, dag ins, string asm, list<dag> pattern> :
+ I<opcode, RawFrm, (outs), ins, asm, pattern>;
+
+let isBranch = 1, isBarrier = 1 in
+ def JMP : IBr<0xE9, (ins brtarget:$dst), "jmp\t$dst", [(br bb:$dst)]>;
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
+ [(brind GR32:$dst)]>;
+ def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
+ [(brind (loadi32 addr:$dst))]>;
+}
+
+// Conditional branches
+let Uses = [EFLAGS] in {
+def JE : IBr<0x84, (ins brtarget:$dst), "je\t$dst",
+ [(X86brcond bb:$dst, X86_COND_E, EFLAGS)]>, TB;
+def JNE : IBr<0x85, (ins brtarget:$dst), "jne\t$dst",
+ [(X86brcond bb:$dst, X86_COND_NE, EFLAGS)]>, TB;
+def JL : IBr<0x8C, (ins brtarget:$dst), "jl\t$dst",
+ [(X86brcond bb:$dst, X86_COND_L, EFLAGS)]>, TB;
+def JLE : IBr<0x8E, (ins brtarget:$dst), "jle\t$dst",
+ [(X86brcond bb:$dst, X86_COND_LE, EFLAGS)]>, TB;
+def JG : IBr<0x8F, (ins brtarget:$dst), "jg\t$dst",
+ [(X86brcond bb:$dst, X86_COND_G, EFLAGS)]>, TB;
+def JGE : IBr<0x8D, (ins brtarget:$dst), "jge\t$dst",
+ [(X86brcond bb:$dst, X86_COND_GE, EFLAGS)]>, TB;
+
+def JB : IBr<0x82, (ins brtarget:$dst), "jb\t$dst",
+ [(X86brcond bb:$dst, X86_COND_B, EFLAGS)]>, TB;
+def JBE : IBr<0x86, (ins brtarget:$dst), "jbe\t$dst",
+ [(X86brcond bb:$dst, X86_COND_BE, EFLAGS)]>, TB;
+def JA : IBr<0x87, (ins brtarget:$dst), "ja\t$dst",
+ [(X86brcond bb:$dst, X86_COND_A, EFLAGS)]>, TB;
+def JAE : IBr<0x83, (ins brtarget:$dst), "jae\t$dst",
+ [(X86brcond bb:$dst, X86_COND_AE, EFLAGS)]>, TB;
+
+def JS : IBr<0x88, (ins brtarget:$dst), "js\t$dst",
+ [(X86brcond bb:$dst, X86_COND_S, EFLAGS)]>, TB;
+def JNS : IBr<0x89, (ins brtarget:$dst), "jns\t$dst",
+ [(X86brcond bb:$dst, X86_COND_NS, EFLAGS)]>, TB;
+def JP : IBr<0x8A, (ins brtarget:$dst), "jp\t$dst",
+ [(X86brcond bb:$dst, X86_COND_P, EFLAGS)]>, TB;
+def JNP : IBr<0x8B, (ins brtarget:$dst), "jnp\t$dst",
+ [(X86brcond bb:$dst, X86_COND_NP, EFLAGS)]>, TB;
+def JO : IBr<0x80, (ins brtarget:$dst), "jo\t$dst",
+ [(X86brcond bb:$dst, X86_COND_O, EFLAGS)]>, TB;
+def JNO : IBr<0x81, (ins brtarget:$dst), "jno\t$dst",
+ [(X86brcond bb:$dst, X86_COND_NO, EFLAGS)]>, TB;
+} // Uses = [EFLAGS]
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+let isCall = 1 in
+ // All calls clobber the non-callee saved registers. ESP is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [ESP] in {
+ def CALLpcrel32 : Ii32<0xE8, RawFrm, (outs), (ins i32imm:$dst,variable_ops),
+ "call\t${dst:call}", []>;
+ def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
+ "call\t{*}$dst", [(X86call GR32:$dst)]>;
+ def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
+ "call\t{*}$dst", [(X86call (loadi32 addr:$dst))]>;
+ }
+
+// Tail call stuff.
+
+def TAILCALL : I<0, Pseudo, (outs), (ins),
+ "#TAILCALL",
+ []>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+def TCRETURNdi : I<0, Pseudo, (outs), (ins i32imm:$dst, i32imm:$offset, variable_ops),
+ "#TC_RETURN $dst $offset",
+ []>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+def TCRETURNri : I<0, Pseudo, (outs), (ins GR32:$dst, i32imm:$offset, variable_ops),
+ "#TC_RETURN $dst $offset",
+ []>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+
+ def TAILJMPd : IBr<0xE9, (ins i32imm:$dst), "jmp\t${dst:call} # TAILCALL",
+ []>;
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+ def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst # TAILCALL",
+ []>;
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+ def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem:$dst),
+ "jmp\t{*}$dst # TAILCALL", []>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions...
+//
+let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in
+def LEAVE : I<0xC9, RawFrm,
+ (outs), (ins), "leave", []>;
+
+let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in {
+let mayLoad = 1 in
+def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>;
+
+let mayStore = 1 in
+def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
+}
+
+let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in
+def POPFD : I<0x9D, RawFrm, (outs), (ins), "popf", []>;
+let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
+def PUSHFD : I<0x9C, RawFrm, (outs), (ins), "pushf", []>;
+
+let isTwoAddress = 1 in // GR32 = bswap GR32
+ def BSWAP32r : I<0xC8, AddRegFrm,
+ (outs GR32:$dst), (ins GR32:$src),
+ "bswap{l}\t$dst",
+ [(set GR32:$dst, (bswap GR32:$src))]>, TB;
+
+
+// Bit scan instructions.
+let Defs = [EFLAGS] in {
+def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "bsf{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (X86bsf GR16:$src)), (implicit EFLAGS)]>, TB;
+def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "bsf{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (X86bsf (loadi16 addr:$src))),
+ (implicit EFLAGS)]>, TB;
+def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "bsf{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (X86bsf GR32:$src)), (implicit EFLAGS)]>, TB;
+def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "bsf{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (X86bsf (loadi32 addr:$src))),
+ (implicit EFLAGS)]>, TB;
+
+def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "bsr{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (X86bsr GR16:$src)), (implicit EFLAGS)]>, TB;
+def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "bsr{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (X86bsr (loadi16 addr:$src))),
+ (implicit EFLAGS)]>, TB;
+def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "bsr{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (X86bsr GR32:$src)), (implicit EFLAGS)]>, TB;
+def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "bsr{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (X86bsr (loadi32 addr:$src))),
+ (implicit EFLAGS)]>, TB;
+} // Defs = [EFLAGS]
+
+let neverHasSideEffects = 1 in
+def LEA16r : I<0x8D, MRMSrcMem,
+ (outs GR16:$dst), (ins i32mem:$src),
+ "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;
+let isReMaterializable = 1 in
+def LEA32r : I<0x8D, MRMSrcMem,
+ (outs GR32:$dst), (ins lea32mem:$src),
+ "lea{l}\t{$src|$dst}, {$dst|$src}",
+ [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
+
+let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI] in {
+def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
+ [(X86rep_movs i8)]>, REP;
+def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
+ [(X86rep_movs i16)]>, REP, OpSize;
+def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
+ [(X86rep_movs i32)]>, REP;
+}
+
+let Defs = [ECX,EDI], Uses = [AL,ECX,EDI] in
+def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
+ [(X86rep_stos i8)]>, REP;
+let Defs = [ECX,EDI], Uses = [AX,ECX,EDI] in
+def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
+ [(X86rep_stos i16)]>, REP, OpSize;
+let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI] in
+def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
+ [(X86rep_stos i32)]>, REP;
+
+let Defs = [RAX, RDX] in
+def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>,
+ TB;
+
+let isBarrier = 1, hasCtrlDep = 1 in {
+def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
+}
+
+//===----------------------------------------------------------------------===//
+// Input/Output Instructions...
+//
+let Defs = [AL], Uses = [DX] in
+def IN8rr : I<0xEC, RawFrm, (outs), (ins),
+ "in{b}\t{%dx, %al|%AL, %DX}", []>;
+let Defs = [AX], Uses = [DX] in
+def IN16rr : I<0xED, RawFrm, (outs), (ins),
+ "in{w}\t{%dx, %ax|%AX, %DX}", []>, OpSize;
+let Defs = [EAX], Uses = [DX] in
+def IN32rr : I<0xED, RawFrm, (outs), (ins),
+ "in{l}\t{%dx, %eax|%EAX, %DX}", []>;
+
+let Defs = [AL] in
+def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i16i8imm:$port),
+ "in{b}\t{$port, %al|%AL, $port}", []>;
+let Defs = [AX] in
+def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i16i8imm:$port),
+ "in{w}\t{$port, %ax|%AX, $port}", []>, OpSize;
+let Defs = [EAX] in
+def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i16i8imm:$port),
+ "in{l}\t{$port, %eax|%EAX, $port}", []>;
+
+let Uses = [DX, AL] in
+def OUT8rr : I<0xEE, RawFrm, (outs), (ins),
+ "out{b}\t{%al, %dx|%DX, %AL}", []>;
+let Uses = [DX, AX] in
+def OUT16rr : I<0xEF, RawFrm, (outs), (ins),
+ "out{w}\t{%ax, %dx|%DX, %AX}", []>, OpSize;
+let Uses = [DX, EAX] in
+def OUT32rr : I<0xEF, RawFrm, (outs), (ins),
+ "out{l}\t{%eax, %dx|%DX, %EAX}", []>;
+
+let Uses = [AL] in
+def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i16i8imm:$port),
+ "out{b}\t{%al, $port|$port, %AL}", []>;
+let Uses = [AX] in
+def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i16i8imm:$port),
+ "out{w}\t{%ax, $port|$port, %AX}", []>, OpSize;
+let Uses = [EAX] in
+def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i16i8imm:$port),
+ "out{l}\t{%eax, $port|$port, %EAX}", []>;
+
+//===----------------------------------------------------------------------===//
+// Move Instructions...
+//
+let neverHasSideEffects = 1 in {
+def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}", []>;
+def MOV16rr : I<0x89, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+}
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(set GR8:$dst, imm:$src)]>;
+def MOV16ri : Ii16<0xB8, AddRegFrm, (outs GR16:$dst), (ins i16imm:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, imm:$src)]>, OpSize;
+def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, imm:$src)]>;
+}
+def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(store (i8 imm:$src), addr:$dst)]>;
+def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(store (i16 imm:$src), addr:$dst)]>, OpSize;
+def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(store (i32 imm:$src), addr:$dst)]>;
+
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
+def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(set GR8:$dst, (loadi8 addr:$src))]>;
+def MOV16rm : I<0x8B, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (loadi16 addr:$src))]>, OpSize;
+def MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (loadi32 addr:$src))]>;
+}
+
+def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(store GR8:$src, addr:$dst)]>;
+def MOV16mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(store GR16:$src, addr:$dst)]>, OpSize;
+def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(store GR32:$src, addr:$dst)]>;
+
+// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
+// that they can be used for copying and storing h registers, which can't be
+// encoded when a REX prefix is present.
+let neverHasSideEffects = 1 in
+def MOV8rr_NOREX : I<0x88, MRMDestReg,
+ (outs GR8_NOREX:$dst), (ins GR8_NOREX:$src),
+ "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
+let mayStore = 1 in
+def MOV8mr_NOREX : I<0x88, MRMDestMem,
+ (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
+ "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
+let mayLoad = 1,
+ canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
+ (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
+ "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
+
+//===----------------------------------------------------------------------===//
+// Fixed-Register Multiplication and Division Instructions...
+//
+
+// Extra precision multiplication
+let Defs = [AL,AH,EFLAGS], Uses = [AL] in
+def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
+ // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+ // This probably ought to be moved to a def : Pat<> if the
+ // syntax can be accepted.
+ [(set AL, (mul AL, GR8:$src)),
+ (implicit EFLAGS)]>; // AL,AH = AL*GR8
+
+let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
+def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
+ "mul{w}\t$src",
+ []>, OpSize; // AX,DX = AX*GR16
+
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
+def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
+ "mul{l}\t$src",
+ []>; // EAX,EDX = EAX*GR32
+
+let Defs = [AL,AH,EFLAGS], Uses = [AL] in
+def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
+ "mul{b}\t$src",
+ // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+ // This probably ought to be moved to a def : Pat<> if the
+ // syntax can be accepted.
+ [(set AL, (mul AL, (loadi8 addr:$src))),
+ (implicit EFLAGS)]>; // AL,AH = AL*[mem8]
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
+ "mul{w}\t$src",
+ []>, OpSize; // AX,DX = AX*[mem16]
+
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
+ "mul{l}\t$src",
+ []>; // EAX,EDX = EAX*[mem32]
+}
+
+let neverHasSideEffects = 1 in {
+let Defs = [AL,AH,EFLAGS], Uses = [AL] in
+def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>;
+ // AL,AH = AL*GR8
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>,
+ OpSize; // AX,DX = AX*GR16
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>;
+ // EAX,EDX = EAX*GR32
+let mayLoad = 1 in {
+let Defs = [AL,AH,EFLAGS], Uses = [AL] in
+def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
+ "imul{b}\t$src", []>; // AL,AH = AL*[mem8]
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
+ "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16]
+let Defs = [EAX,EDX], Uses = [EAX] in
+def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
+ "imul{l}\t$src", []>; // EAX,EDX = EAX*[mem32]
+}
+} // neverHasSideEffects
+
+// unsigned division/remainder
+let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
+ "div{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def DIV16r : I<0xF7, MRM6r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
+ "div{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
+ "div{l}\t$src", []>;
+let mayLoad = 1 in {
+let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
+ "div{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
+ "div{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), // EDX:EAX/[mem32] = EAX,EDX
+ "div{l}\t$src", []>;
+}
+
+// Signed division/remainder.
+let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
+ "idiv{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def IDIV16r: I<0xF7, MRM7r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
+ "idiv{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
+ "idiv{l}\t$src", []>;
+let mayLoad = 1, mayLoad = 1 in {
+let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
+ "idiv{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
+ "idiv{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), // EDX:EAX/[mem32] = EAX,EDX
+ "idiv{l}\t$src", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Two address Instructions.
+//
+let isTwoAddress = 1 in {
+
+// Conditional moves
+let Uses = [EFLAGS] in {
+let isCommutable = 1 in {
+def CMOVB16rr : I<0x42, MRMSrcReg, // if <u, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovb\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_B, EFLAGS))]>,
+ TB, OpSize;
+def CMOVB32rr : I<0x42, MRMSrcReg, // if <u, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovb\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_B, EFLAGS))]>,
+ TB;
+def CMOVAE16rr: I<0x43, MRMSrcReg, // if >=u, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovae\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_AE, EFLAGS))]>,
+ TB, OpSize;
+def CMOVAE32rr: I<0x43, MRMSrcReg, // if >=u, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovae\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_AE, EFLAGS))]>,
+ TB;
+def CMOVE16rr : I<0x44, MRMSrcReg, // if ==, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmove\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_E, EFLAGS))]>,
+ TB, OpSize;
+def CMOVE32rr : I<0x44, MRMSrcReg, // if ==, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmove\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_E, EFLAGS))]>,
+ TB;
+def CMOVNE16rr: I<0x45, MRMSrcReg, // if !=, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovne\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_NE, EFLAGS))]>,
+ TB, OpSize;
+def CMOVNE32rr: I<0x45, MRMSrcReg, // if !=, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovne\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_NE, EFLAGS))]>,
+ TB;
+def CMOVBE16rr: I<0x46, MRMSrcReg, // if <=u, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovbe\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_BE, EFLAGS))]>,
+ TB, OpSize;
+def CMOVBE32rr: I<0x46, MRMSrcReg, // if <=u, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovbe\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_BE, EFLAGS))]>,
+ TB;
+def CMOVA16rr : I<0x47, MRMSrcReg, // if >u, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmova\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_A, EFLAGS))]>,
+ TB, OpSize;
+def CMOVA32rr : I<0x47, MRMSrcReg, // if >u, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmova\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_A, EFLAGS))]>,
+ TB;
+def CMOVL16rr : I<0x4C, MRMSrcReg, // if <s, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovl\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_L, EFLAGS))]>,
+ TB, OpSize;
+def CMOVL32rr : I<0x4C, MRMSrcReg, // if <s, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovl\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_L, EFLAGS))]>,
+ TB;
+def CMOVGE16rr: I<0x4D, MRMSrcReg, // if >=s, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovge\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_GE, EFLAGS))]>,
+ TB, OpSize;
+def CMOVGE32rr: I<0x4D, MRMSrcReg, // if >=s, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovge\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_GE, EFLAGS))]>,
+ TB;
+def CMOVLE16rr: I<0x4E, MRMSrcReg, // if <=s, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovle\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_LE, EFLAGS))]>,
+ TB, OpSize;
+def CMOVLE32rr: I<0x4E, MRMSrcReg, // if <=s, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovle\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_LE, EFLAGS))]>,
+ TB;
+def CMOVG16rr : I<0x4F, MRMSrcReg, // if >s, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovg\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_G, EFLAGS))]>,
+ TB, OpSize;
+def CMOVG32rr : I<0x4F, MRMSrcReg, // if >s, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovg\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_G, EFLAGS))]>,
+ TB;
+def CMOVS16rr : I<0x48, MRMSrcReg, // if signed, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovs\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_S, EFLAGS))]>,
+ TB, OpSize;
+def CMOVS32rr : I<0x48, MRMSrcReg, // if signed, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovs\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_S, EFLAGS))]>,
+ TB;
+def CMOVNS16rr: I<0x49, MRMSrcReg, // if !signed, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovns\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_NS, EFLAGS))]>,
+ TB, OpSize;
+def CMOVNS32rr: I<0x49, MRMSrcReg, // if !signed, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovns\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_NS, EFLAGS))]>,
+ TB;
+def CMOVP16rr : I<0x4A, MRMSrcReg, // if parity, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovp\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_P, EFLAGS))]>,
+ TB, OpSize;
+def CMOVP32rr : I<0x4A, MRMSrcReg, // if parity, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovp\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_P, EFLAGS))]>,
+ TB;
+def CMOVNP16rr : I<0x4B, MRMSrcReg, // if !parity, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovnp\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_NP, EFLAGS))]>,
+ TB, OpSize;
+def CMOVNP32rr : I<0x4B, MRMSrcReg, // if !parity, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovnp\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_NP, EFLAGS))]>,
+ TB;
+def CMOVO16rr : I<0x40, MRMSrcReg, // if overflow, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovo\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_O, EFLAGS))]>,
+ TB, OpSize;
+def CMOVO32rr : I<0x40, MRMSrcReg, // if overflow, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovo\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_O, EFLAGS))]>,
+ TB;
+def CMOVNO16rr : I<0x41, MRMSrcReg, // if !overflow, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovno\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_NO, EFLAGS))]>,
+ TB, OpSize;
+def CMOVNO32rr : I<0x41, MRMSrcReg, // if !overflow, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovno\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_NO, EFLAGS))]>,
+ TB;
+} // isCommutable = 1
+
+def CMOVB16rm : I<0x42, MRMSrcMem, // if <u, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovb\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_B, EFLAGS))]>,
+ TB, OpSize;
+def CMOVB32rm : I<0x42, MRMSrcMem, // if <u, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovb\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_B, EFLAGS))]>,
+ TB;
+def CMOVAE16rm: I<0x43, MRMSrcMem, // if >=u, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovae\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_AE, EFLAGS))]>,
+ TB, OpSize;
+def CMOVAE32rm: I<0x43, MRMSrcMem, // if >=u, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovae\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_AE, EFLAGS))]>,
+ TB;
+def CMOVE16rm : I<0x44, MRMSrcMem, // if ==, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmove\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_E, EFLAGS))]>,
+ TB, OpSize;
+def CMOVE32rm : I<0x44, MRMSrcMem, // if ==, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmove\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_E, EFLAGS))]>,
+ TB;
+def CMOVNE16rm: I<0x45, MRMSrcMem, // if !=, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovne\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_NE, EFLAGS))]>,
+ TB, OpSize;
+def CMOVNE32rm: I<0x45, MRMSrcMem, // if !=, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovne\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_NE, EFLAGS))]>,
+ TB;
+def CMOVBE16rm: I<0x46, MRMSrcMem, // if <=u, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovbe\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_BE, EFLAGS))]>,
+ TB, OpSize;
+def CMOVBE32rm: I<0x46, MRMSrcMem, // if <=u, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovbe\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_BE, EFLAGS))]>,
+ TB;
+def CMOVA16rm : I<0x47, MRMSrcMem, // if >u, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmova\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_A, EFLAGS))]>,
+ TB, OpSize;
+def CMOVA32rm : I<0x47, MRMSrcMem, // if >u, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmova\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_A, EFLAGS))]>,
+ TB;
+def CMOVL16rm : I<0x4C, MRMSrcMem, // if <s, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovl\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_L, EFLAGS))]>,
+ TB, OpSize;
+def CMOVL32rm : I<0x4C, MRMSrcMem, // if <s, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovl\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_L, EFLAGS))]>,
+ TB;
+def CMOVGE16rm: I<0x4D, MRMSrcMem, // if >=s, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovge\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_GE, EFLAGS))]>,
+ TB, OpSize;
+def CMOVGE32rm: I<0x4D, MRMSrcMem, // if >=s, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovge\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_GE, EFLAGS))]>,
+ TB;
+def CMOVLE16rm: I<0x4E, MRMSrcMem, // if <=s, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovle\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_LE, EFLAGS))]>,
+ TB, OpSize;
+def CMOVLE32rm: I<0x4E, MRMSrcMem, // if <=s, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovle\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_LE, EFLAGS))]>,
+ TB;
+def CMOVG16rm : I<0x4F, MRMSrcMem, // if >s, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovg\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_G, EFLAGS))]>,
+ TB, OpSize;
+def CMOVG32rm : I<0x4F, MRMSrcMem, // if >s, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovg\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_G, EFLAGS))]>,
+ TB;
+def CMOVS16rm : I<0x48, MRMSrcMem, // if signed, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovs\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_S, EFLAGS))]>,
+ TB, OpSize;
+def CMOVS32rm : I<0x48, MRMSrcMem, // if signed, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovs\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_S, EFLAGS))]>,
+ TB;
+def CMOVNS16rm: I<0x49, MRMSrcMem, // if !signed, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovns\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_NS, EFLAGS))]>,
+ TB, OpSize;
+def CMOVNS32rm: I<0x49, MRMSrcMem, // if !signed, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovns\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_NS, EFLAGS))]>,
+ TB;
+def CMOVP16rm : I<0x4A, MRMSrcMem, // if parity, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovp\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_P, EFLAGS))]>,
+ TB, OpSize;
+def CMOVP32rm : I<0x4A, MRMSrcMem, // if parity, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovp\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_P, EFLAGS))]>,
+ TB;
+def CMOVNP16rm : I<0x4B, MRMSrcMem, // if !parity, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovnp\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_NP, EFLAGS))]>,
+ TB, OpSize;
+def CMOVNP32rm : I<0x4B, MRMSrcMem, // if !parity, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovnp\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_NP, EFLAGS))]>,
+ TB;
+def CMOVO16rm : I<0x40, MRMSrcMem, // if overflow, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovo\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_O, EFLAGS))]>,
+ TB, OpSize;
+def CMOVO32rm : I<0x40, MRMSrcMem, // if overflow, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovo\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_O, EFLAGS))]>,
+ TB;
+def CMOVNO16rm : I<0x41, MRMSrcMem, // if !overflow, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovno\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_NO, EFLAGS))]>,
+ TB, OpSize;
+def CMOVNO32rm : I<0x41, MRMSrcMem, // if !overflow, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovno\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_NO, EFLAGS))]>,
+ TB;
+} // Uses = [EFLAGS]
+
+
+// unary instructions
+let CodeSize = 2 in {
+let Defs = [EFLAGS] in {
+def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src), "neg{b}\t$dst",
+ [(set GR8:$dst, (ineg GR8:$src)),
+ (implicit EFLAGS)]>;
+def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src), "neg{w}\t$dst",
+ [(set GR16:$dst, (ineg GR16:$src)),
+ (implicit EFLAGS)]>, OpSize;
+def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src), "neg{l}\t$dst",
+ [(set GR32:$dst, (ineg GR32:$src)),
+ (implicit EFLAGS)]>;
+let isTwoAddress = 0 in {
+ def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), "neg{b}\t$dst",
+ [(store (ineg (loadi8 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>;
+ def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst), "neg{w}\t$dst",
+ [(store (ineg (loadi16 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
+ def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), "neg{l}\t$dst",
+ [(store (ineg (loadi32 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>;
+}
+} // Defs = [EFLAGS]
+
+// Match xor -1 to not. Favors these over a move imm + xor to save code size.
+let AddedComplexity = 15 in {
+def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src), "not{b}\t$dst",
+ [(set GR8:$dst, (not GR8:$src))]>;
+def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src), "not{w}\t$dst",
+ [(set GR16:$dst, (not GR16:$src))]>, OpSize;
+def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src), "not{l}\t$dst",
+ [(set GR32:$dst, (not GR32:$src))]>;
+}
+let isTwoAddress = 0 in {
+ def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), "not{b}\t$dst",
+ [(store (not (loadi8 addr:$dst)), addr:$dst)]>;
+ def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst), "not{w}\t$dst",
+ [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
+ def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), "not{l}\t$dst",
+ [(store (not (loadi32 addr:$dst)), addr:$dst)]>;
+}
+} // CodeSize
+
+// TODO: inc/dec is slow for P4, but fast for Pentium-M.
+let Defs = [EFLAGS] in {
+let CodeSize = 2 in
+def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), "inc{b}\t$dst",
+ [(set GR8:$dst, (add GR8:$src, 1)),
+ (implicit EFLAGS)]>;
+let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
+def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), "inc{w}\t$dst",
+ [(set GR16:$dst, (add GR16:$src, 1)),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In32BitMode]>;
+def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "inc{l}\t$dst",
+ [(set GR32:$dst, (add GR32:$src, 1)),
+ (implicit EFLAGS)]>, Requires<[In32BitMode]>;
+}
+let isTwoAddress = 0, CodeSize = 2 in {
+ def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
+ [(store (add (loadi8 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>;
+ def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In32BitMode]>;
+ def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In32BitMode]>;
+}
+
+let CodeSize = 2 in
+def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src), "dec{b}\t$dst",
+ [(set GR8:$dst, (add GR8:$src, -1)),
+ (implicit EFLAGS)]>;
+let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
+def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), "dec{w}\t$dst",
+ [(set GR16:$dst, (add GR16:$src, -1)),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In32BitMode]>;
+def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "dec{l}\t$dst",
+ [(set GR32:$dst, (add GR32:$src, -1)),
+ (implicit EFLAGS)]>, Requires<[In32BitMode]>;
+}
+
+let isTwoAddress = 0, CodeSize = 2 in {
+ def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
+ [(store (add (loadi8 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>;
+ def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In32BitMode]>;
+ def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In32BitMode]>;
+}
+} // Defs = [EFLAGS]
+
+// Logical operators...
+let Defs = [EFLAGS] in {
+let isCommutable = 1 in { // X = AND Y, Z --> X = AND Z, Y
+def AND8rr : I<0x20, MRMDestReg,
+ (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
+ "and{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (and GR8:$src1, GR8:$src2)),
+ (implicit EFLAGS)]>;
+def AND16rr : I<0x21, MRMDestReg,
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "and{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (and GR16:$src1, GR16:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def AND32rr : I<0x21, MRMDestReg,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "and{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, GR32:$src2)),
+ (implicit EFLAGS)]>;
+}
+
+def AND8rm : I<0x22, MRMSrcMem,
+ (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2),
+ "and{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (and GR8:$src1, (loadi8 addr:$src2))),
+ (implicit EFLAGS)]>;
+def AND16rm : I<0x23, MRMSrcMem,
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "and{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (and GR16:$src1, (loadi16 addr:$src2))),
+ (implicit EFLAGS)]>, OpSize;
+def AND32rm : I<0x23, MRMSrcMem,
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "and{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, (loadi32 addr:$src2))),
+ (implicit EFLAGS)]>;
+
+def AND8ri : Ii8<0x80, MRM4r,
+ (outs GR8 :$dst), (ins GR8 :$src1, i8imm :$src2),
+ "and{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (and GR8:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+def AND16ri : Ii16<0x81, MRM4r,
+ (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "and{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (and GR16:$src1, imm:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def AND32ri : Ii32<0x81, MRM4r,
+ (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ "and{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+def AND16ri8 : Ii8<0x83, MRM4r,
+ (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+ "and{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (and GR16:$src1, i16immSExt8:$src2)),
+ (implicit EFLAGS)]>,
+ OpSize;
+def AND32ri8 : Ii8<0x83, MRM4r,
+ (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+ "and{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, i32immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+
+let isTwoAddress = 0 in {
+ def AND8mr : I<0x20, MRMDestMem,
+ (outs), (ins i8mem :$dst, GR8 :$src),
+ "and{b}\t{$src, $dst|$dst, $src}",
+ [(store (and (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def AND16mr : I<0x21, MRMDestMem,
+ (outs), (ins i16mem:$dst, GR16:$src),
+ "and{w}\t{$src, $dst|$dst, $src}",
+ [(store (and (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize;
+ def AND32mr : I<0x21, MRMDestMem,
+ (outs), (ins i32mem:$dst, GR32:$src),
+ "and{l}\t{$src, $dst|$dst, $src}",
+ [(store (and (load addr:$dst), GR32:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def AND8mi : Ii8<0x80, MRM4m,
+ (outs), (ins i8mem :$dst, i8imm :$src),
+ "and{b}\t{$src, $dst|$dst, $src}",
+ [(store (and (loadi8 addr:$dst), imm:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def AND16mi : Ii16<0x81, MRM4m,
+ (outs), (ins i16mem:$dst, i16imm:$src),
+ "and{w}\t{$src, $dst|$dst, $src}",
+ [(store (and (loadi16 addr:$dst), imm:$src), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize;
+ def AND32mi : Ii32<0x81, MRM4m,
+ (outs), (ins i32mem:$dst, i32imm:$src),
+ "and{l}\t{$src, $dst|$dst, $src}",
+ [(store (and (loadi32 addr:$dst), imm:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def AND16mi8 : Ii8<0x83, MRM4m,
+ (outs), (ins i16mem:$dst, i16i8imm :$src),
+ "and{w}\t{$src, $dst|$dst, $src}",
+ [(store (and (load addr:$dst), i16immSExt8:$src), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize;
+ def AND32mi8 : Ii8<0x83, MRM4m,
+ (outs), (ins i32mem:$dst, i32i8imm :$src),
+ "and{l}\t{$src, $dst|$dst, $src}",
+ [(store (and (load addr:$dst), i32immSExt8:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+}
+
+
+let isCommutable = 1 in { // X = OR Y, Z --> X = OR Z, Y
+def OR8rr : I<0x08, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
+ "or{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (or GR8:$src1, GR8:$src2)),
+ (implicit EFLAGS)]>;
+def OR16rr : I<0x09, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "or{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (or GR16:$src1, GR16:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def OR32rr : I<0x09, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "or{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, GR32:$src2)),
+ (implicit EFLAGS)]>;
+}
+def OR8rm : I<0x0A, MRMSrcMem , (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2),
+ "or{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (or GR8:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+def OR16rm : I<0x0B, MRMSrcMem , (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "or{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (or GR16:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>, OpSize;
+def OR32rm : I<0x0B, MRMSrcMem , (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "or{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+
+def OR8ri : Ii8 <0x80, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "or{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (or GR8:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+def OR16ri : Ii16<0x81, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "or{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (or GR16:$src1, imm:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def OR32ri : Ii32<0x81, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ "or{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+
+def OR16ri8 : Ii8<0x83, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+ "or{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (or GR16:$src1, i16immSExt8:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def OR32ri8 : Ii8<0x83, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+ "or{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, i32immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+let isTwoAddress = 0 in {
+ def OR8mr : I<0x08, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
+ "or{b}\t{$src, $dst|$dst, $src}",
+ [(store (or (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def OR16mr : I<0x09, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+ "or{w}\t{$src, $dst|$dst, $src}",
+ [(store (or (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
+ def OR32mr : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "or{l}\t{$src, $dst|$dst, $src}",
+ [(store (or (load addr:$dst), GR32:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def OR8mi : Ii8<0x80, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "or{b}\t{$src, $dst|$dst, $src}",
+ [(store (or (loadi8 addr:$dst), imm:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def OR16mi : Ii16<0x81, MRM1m, (outs), (ins i16mem:$dst, i16imm:$src),
+ "or{w}\t{$src, $dst|$dst, $src}",
+ [(store (or (loadi16 addr:$dst), imm:$src), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize;
+ def OR32mi : Ii32<0x81, MRM1m, (outs), (ins i32mem:$dst, i32imm:$src),
+ "or{l}\t{$src, $dst|$dst, $src}",
+ [(store (or (loadi32 addr:$dst), imm:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def OR16mi8 : Ii8<0x83, MRM1m, (outs), (ins i16mem:$dst, i16i8imm:$src),
+ "or{w}\t{$src, $dst|$dst, $src}",
+ [(store (or (load addr:$dst), i16immSExt8:$src), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize;
+ def OR32mi8 : Ii8<0x83, MRM1m, (outs), (ins i32mem:$dst, i32i8imm:$src),
+ "or{l}\t{$src, $dst|$dst, $src}",
+ [(store (or (load addr:$dst), i32immSExt8:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+} // isTwoAddress = 0
+
+
+let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y
+ def XOR8rr : I<0x30, MRMDestReg,
+ (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
+ "xor{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (xor GR8:$src1, GR8:$src2)),
+ (implicit EFLAGS)]>;
+ def XOR16rr : I<0x31, MRMDestReg,
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "xor{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (xor GR16:$src1, GR16:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+ def XOR32rr : I<0x31, MRMDestReg,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "xor{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (xor GR32:$src1, GR32:$src2)),
+ (implicit EFLAGS)]>;
+} // isCommutable = 1
+
+def XOR8rm : I<0x32, MRMSrcMem ,
+ (outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2),
+ "xor{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (xor GR8:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+def XOR16rm : I<0x33, MRMSrcMem ,
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "xor{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (xor GR16:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>,
+ OpSize;
+def XOR32rm : I<0x33, MRMSrcMem ,
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "xor{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (xor GR32:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+
+def XOR8ri : Ii8<0x80, MRM6r,
+ (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "xor{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (xor GR8:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+def XOR16ri : Ii16<0x81, MRM6r,
+ (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "xor{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (xor GR16:$src1, imm:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def XOR32ri : Ii32<0x81, MRM6r,
+ (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ "xor{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (xor GR32:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+def XOR16ri8 : Ii8<0x83, MRM6r,
+ (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+ "xor{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (xor GR16:$src1, i16immSExt8:$src2)),
+ (implicit EFLAGS)]>,
+ OpSize;
+def XOR32ri8 : Ii8<0x83, MRM6r,
+ (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+ "xor{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (xor GR32:$src1, i32immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+
+let isTwoAddress = 0 in {
+ def XOR8mr : I<0x30, MRMDestMem,
+ (outs), (ins i8mem :$dst, GR8 :$src),
+ "xor{b}\t{$src, $dst|$dst, $src}",
+ [(store (xor (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def XOR16mr : I<0x31, MRMDestMem,
+ (outs), (ins i16mem:$dst, GR16:$src),
+ "xor{w}\t{$src, $dst|$dst, $src}",
+ [(store (xor (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize;
+ def XOR32mr : I<0x31, MRMDestMem,
+ (outs), (ins i32mem:$dst, GR32:$src),
+ "xor{l}\t{$src, $dst|$dst, $src}",
+ [(store (xor (load addr:$dst), GR32:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def XOR8mi : Ii8<0x80, MRM6m,
+ (outs), (ins i8mem :$dst, i8imm :$src),
+ "xor{b}\t{$src, $dst|$dst, $src}",
+ [(store (xor (loadi8 addr:$dst), imm:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def XOR16mi : Ii16<0x81, MRM6m,
+ (outs), (ins i16mem:$dst, i16imm:$src),
+ "xor{w}\t{$src, $dst|$dst, $src}",
+ [(store (xor (loadi16 addr:$dst), imm:$src), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize;
+ def XOR32mi : Ii32<0x81, MRM6m,
+ (outs), (ins i32mem:$dst, i32imm:$src),
+ "xor{l}\t{$src, $dst|$dst, $src}",
+ [(store (xor (loadi32 addr:$dst), imm:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def XOR16mi8 : Ii8<0x83, MRM6m,
+ (outs), (ins i16mem:$dst, i16i8imm :$src),
+ "xor{w}\t{$src, $dst|$dst, $src}",
+ [(store (xor (load addr:$dst), i16immSExt8:$src), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize;
+ def XOR32mi8 : Ii8<0x83, MRM6m,
+ (outs), (ins i32mem:$dst, i32i8imm :$src),
+ "xor{l}\t{$src, $dst|$dst, $src}",
+ [(store (xor (load addr:$dst), i32immSExt8:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+} // isTwoAddress = 0
+} // Defs = [EFLAGS]
+
+// Shift instructions
+let Defs = [EFLAGS] in {
+let Uses = [CL] in {
+def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src),
+ "shl{b}\t{%cl, $dst|$dst, %CL}",
+ [(set GR8:$dst, (shl GR8:$src, CL))]>;
+def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src),
+ "shl{w}\t{%cl, $dst|$dst, %CL}",
+ [(set GR16:$dst, (shl GR16:$src, CL))]>, OpSize;
+def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src),
+ "shl{l}\t{%cl, $dst|$dst, %CL}",
+ [(set GR32:$dst, (shl GR32:$src, CL))]>;
+} // Uses = [CL]
+
+def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "shl{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
+let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
+def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "shl{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "shl{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>;
+// NOTE: We don't use shifts of a register by one, because 'add reg,reg' is
+// cheaper.
+} // isConvertibleToThreeAddress = 1
+
+let isTwoAddress = 0 in {
+ let Uses = [CL] in {
+ def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
+ "shl{b}\t{%cl, $dst|$dst, %CL}",
+ [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>;
+ def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst),
+ "shl{w}\t{%cl, $dst|$dst, %CL}",
+ [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+ def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst),
+ "shl{l}\t{%cl, $dst|$dst, %CL}",
+ [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>;
+ }
+ def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "shl{b}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "shl{w}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+ def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "shl{l}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+ // Shift by 1
+ def SHL8m1 : I<0xD0, MRM4m, (outs), (ins i8mem :$dst),
+ "shl{b}\t$dst",
+ [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ def SHL16m1 : I<0xD1, MRM4m, (outs), (ins i16mem:$dst),
+ "shl{w}\t$dst",
+ [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+ def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst),
+ "shl{l}\t$dst",
+ [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+}
+
+let Uses = [CL] in {
+def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src),
+ "shr{b}\t{%cl, $dst|$dst, %CL}",
+ [(set GR8:$dst, (srl GR8:$src, CL))]>;
+def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src),
+ "shr{w}\t{%cl, $dst|$dst, %CL}",
+ [(set GR16:$dst, (srl GR16:$src, CL))]>, OpSize;
+def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src),
+ "shr{l}\t{%cl, $dst|$dst, %CL}",
+ [(set GR32:$dst, (srl GR32:$src, CL))]>;
+}
+
+def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "shr{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>;
+def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "shr{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "shr{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>;
+
+// Shift by 1
+def SHR8r1 : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1),
+ "shr{b}\t$dst",
+ [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>;
+def SHR16r1 : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
+ "shr{w}\t$dst",
+ [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize;
+def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
+ "shr{l}\t$dst",
+ [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>;
+
+let isTwoAddress = 0 in {
+ let Uses = [CL] in {
+ def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
+ "shr{b}\t{%cl, $dst|$dst, %CL}",
+ [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>;
+ def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst),
+ "shr{w}\t{%cl, $dst|$dst, %CL}",
+ [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>,
+ OpSize;
+ def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst),
+ "shr{l}\t{%cl, $dst|$dst, %CL}",
+ [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>;
+ }
+ def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "shr{b}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "shr{w}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+ def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "shr{l}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+ // Shift by 1
+ def SHR8m1 : I<0xD0, MRM5m, (outs), (ins i8mem :$dst),
+ "shr{b}\t$dst",
+ [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ def SHR16m1 : I<0xD1, MRM5m, (outs), (ins i16mem:$dst),
+ "shr{w}\t$dst",
+ [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize;
+ def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst),
+ "shr{l}\t$dst",
+ [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+}
+
+let Uses = [CL] in {
+def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src),
+ "sar{b}\t{%cl, $dst|$dst, %CL}",
+ [(set GR8:$dst, (sra GR8:$src, CL))]>;
+def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src),
+ "sar{w}\t{%cl, $dst|$dst, %CL}",
+ [(set GR16:$dst, (sra GR16:$src, CL))]>, OpSize;
+def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src),
+ "sar{l}\t{%cl, $dst|$dst, %CL}",
+ [(set GR32:$dst, (sra GR32:$src, CL))]>;
+}
+
+def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "sar{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>;
+def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "sar{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>,
+ OpSize;
+def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "sar{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>;
+
+// Shift by 1
+def SAR8r1 : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "sar{b}\t$dst",
+ [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>;
+def SAR16r1 : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
+ "sar{w}\t$dst",
+ [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize;
+def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
+ "sar{l}\t$dst",
+ [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>;
+
+let isTwoAddress = 0 in {
+ let Uses = [CL] in {
+ def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
+ "sar{b}\t{%cl, $dst|$dst, %CL}",
+ [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>;
+ def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst),
+ "sar{w}\t{%cl, $dst|$dst, %CL}",
+ [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+ def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst),
+ "sar{l}\t{%cl, $dst|$dst, %CL}",
+ [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>;
+ }
+ def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "sar{b}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "sar{w}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+ def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "sar{l}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+ // Shift by 1
+ def SAR8m1 : I<0xD0, MRM7m, (outs), (ins i8mem :$dst),
+ "sar{b}\t$dst",
+ [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ def SAR16m1 : I<0xD1, MRM7m, (outs), (ins i16mem:$dst),
+ "sar{w}\t$dst",
+ [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+ def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst),
+ "sar{l}\t$dst",
+ [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+}
+
+// Rotate instructions
+// FIXME: provide shorter instructions when imm8 == 1
+let Uses = [CL] in {
+def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src),
+ "rol{b}\t{%cl, $dst|$dst, %CL}",
+ [(set GR8:$dst, (rotl GR8:$src, CL))]>;
+def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src),
+ "rol{w}\t{%cl, $dst|$dst, %CL}",
+ [(set GR16:$dst, (rotl GR16:$src, CL))]>, OpSize;
+def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src),
+ "rol{l}\t{%cl, $dst|$dst, %CL}",
+ [(set GR32:$dst, (rotl GR32:$src, CL))]>;
+}
+
+def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "rol{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>;
+def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "rol{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "rol{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>;
+
+// Rotate by 1
+def ROL8r1 : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "rol{b}\t$dst",
+ [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>;
+def ROL16r1 : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+ "rol{w}\t$dst",
+ [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize;
+def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+ "rol{l}\t$dst",
+ [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>;
+
+let isTwoAddress = 0 in {
+ let Uses = [CL] in {
+ def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
+ "rol{b}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>;
+ def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst),
+ "rol{w}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+ def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst),
+ "rol{l}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>;
+ }
+ def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "rol{b}\t{$src, $dst|$dst, $src}",
+ [(store (rotl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "rol{w}\t{$src, $dst|$dst, $src}",
+ [(store (rotl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+ def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "rol{l}\t{$src, $dst|$dst, $src}",
+ [(store (rotl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+ // Rotate by 1
+ def ROL8m1 : I<0xD0, MRM0m, (outs), (ins i8mem :$dst),
+ "rol{b}\t$dst",
+ [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ def ROL16m1 : I<0xD1, MRM0m, (outs), (ins i16mem:$dst),
+ "rol{w}\t$dst",
+ [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+ def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst),
+ "rol{l}\t$dst",
+ [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+}
+
+let Uses = [CL] in {
+def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src),
+ "ror{b}\t{%cl, $dst|$dst, %CL}",
+ [(set GR8:$dst, (rotr GR8:$src, CL))]>;
+def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src),
+ "ror{w}\t{%cl, $dst|$dst, %CL}",
+ [(set GR16:$dst, (rotr GR16:$src, CL))]>, OpSize;
+def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src),
+ "ror{l}\t{%cl, $dst|$dst, %CL}",
+ [(set GR32:$dst, (rotr GR32:$src, CL))]>;
+}
+
+def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "ror{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>;
+def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "ror{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "ror{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>;
+
+// Rotate by 1
+def ROR8r1 : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "ror{b}\t$dst",
+ [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>;
+def ROR16r1 : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+ "ror{w}\t$dst",
+ [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize;
+def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+ "ror{l}\t$dst",
+ [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>;
+
+let isTwoAddress = 0 in {
+ let Uses = [CL] in {
+ def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
+ "ror{b}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>;
+ def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst),
+ "ror{w}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+ def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst),
+ "ror{l}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>;
+ }
+ def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "ror{b}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "ror{w}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+ def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "ror{l}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+ // Rotate by 1
+ def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
+ "ror{b}\t$dst",
+ [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ def ROR16m1 : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
+ "ror{w}\t$dst",
+ [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+ def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
+ "ror{l}\t$dst",
+ [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+}
+
+
+
+// Double shift instructions (generalizations of rotate)
+let Uses = [CL] in {
+def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "shld{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB;
+def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB;
+def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "shld{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
+ TB, OpSize;
+def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
+ TB, OpSize;
+}
+
+let isCommutable = 1 in { // These instructions commute to each other.
+def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+ "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+ "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+ "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
+ (i8 imm:$src3)))]>,
+ TB, OpSize;
+def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+ "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
+ (i8 imm:$src3)))]>,
+ TB, OpSize;
+}
+
+let isTwoAddress = 0 in {
+ let Uses = [CL] in {
+ def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "shld{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
+ addr:$dst)]>, TB;
+ def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
+ addr:$dst)]>, TB;
+ }
+ def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
+ (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+ "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shld (loadi32 addr:$dst), GR32:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+ def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
+ (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+ "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+
+ let Uses = [CL] in {
+ def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "shld{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
+ addr:$dst)]>, TB, OpSize;
+ def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
+ addr:$dst)]>, TB, OpSize;
+ }
+ def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
+ (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+ "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shld (loadi16 addr:$dst), GR16:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB, OpSize;
+ def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
+ (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+ "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB, OpSize;
+}
+} // Defs = [EFLAGS]
+
+
+// Arithmetic.
+let Defs = [EFLAGS] in {
+let isCommutable = 1 in { // X = ADD Y, Z --> X = ADD Z, Y
+// Register-Register Addition
+def ADD8rr : I<0x00, MRMDestReg, (outs GR8 :$dst),
+ (ins GR8 :$src1, GR8 :$src2),
+ "add{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (add GR8:$src1, GR8:$src2)),
+ (implicit EFLAGS)]>;
+
+let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
+// Register-Register Addition
+def ADD16rr : I<0x01, MRMDestReg, (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2),
+ "add{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (add GR16:$src1, GR16:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def ADD32rr : I<0x01, MRMDestReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2),
+ "add{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (add GR32:$src1, GR32:$src2)),
+ (implicit EFLAGS)]>;
+} // end isConvertibleToThreeAddress
+} // end isCommutable
+
+// Register-Memory Addition
+def ADD8rm : I<0x02, MRMSrcMem, (outs GR8 :$dst),
+ (ins GR8 :$src1, i8mem :$src2),
+ "add{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (add GR8:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+def ADD16rm : I<0x03, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$src1, i16mem:$src2),
+ "add{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (add GR16:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>, OpSize;
+def ADD32rm : I<0x03, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i32mem:$src2),
+ "add{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (add GR32:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+
+// Register-Integer Addition
+def ADD8ri : Ii8<0x80, MRM0r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "add{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (add GR8:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+
+let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
+// Register-Integer Addition
+def ADD16ri : Ii16<0x81, MRM0r, (outs GR16:$dst),
+ (ins GR16:$src1, i16imm:$src2),
+ "add{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (add GR16:$src1, imm:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def ADD32ri : Ii32<0x81, MRM0r, (outs GR32:$dst),
+ (ins GR32:$src1, i32imm:$src2),
+ "add{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (add GR32:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+def ADD16ri8 : Ii8<0x83, MRM0r, (outs GR16:$dst),
+ (ins GR16:$src1, i16i8imm:$src2),
+ "add{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (add GR16:$src1, i16immSExt8:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def ADD32ri8 : Ii8<0x83, MRM0r, (outs GR32:$dst),
+ (ins GR32:$src1, i32i8imm:$src2),
+ "add{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (add GR32:$src1, i32immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+}
+
+let isTwoAddress = 0 in {
+ // Memory-Register Addition
+ def ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
+ "add{b}\t{$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), GR8:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
+ def ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "add{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), GR16:$src2), addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
+ def ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "add{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), GR32:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
+ def ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2),
+ "add{b}\t{$src2, $dst|$dst, $src2}",
+ [(store (add (loadi8 addr:$dst), imm:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
+ def ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2),
+ "add{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (add (loadi16 addr:$dst), imm:$src2), addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
+ def ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2),
+ "add{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (add (loadi32 addr:$dst), imm:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
+ def ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
+ "add{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), i16immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
+ def ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
+ "add{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), i32immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)]>;
+}
+
+let Uses = [EFLAGS] in {
+let isCommutable = 1 in { // X = ADC Y, Z --> X = ADC Z, Y
+def ADC8rr : I<0x10, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ "adc{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (adde GR8:$src1, GR8:$src2))]>;
+def ADC16rr : I<0x11, MRMDestReg, (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2),
+ "adc{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (adde GR16:$src1, GR16:$src2))]>, OpSize;
+def ADC32rr : I<0x11, MRMDestReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2),
+ "adc{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (adde GR32:$src1, GR32:$src2))]>;
+}
+def ADC8rm : I<0x12, MRMSrcMem , (outs GR8:$dst),
+ (ins GR8:$src1, i8mem:$src2),
+ "adc{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (adde GR8:$src1, (load addr:$src2)))]>;
+def ADC16rm : I<0x13, MRMSrcMem , (outs GR16:$dst),
+ (ins GR16:$src1, i16mem:$src2),
+ "adc{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (adde GR16:$src1, (load addr:$src2)))]>,
+ OpSize;
+def ADC32rm : I<0x13, MRMSrcMem , (outs GR32:$dst),
+ (ins GR32:$src1, i32mem:$src2),
+ "adc{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (adde GR32:$src1, (load addr:$src2)))]>;
+def ADC8ri : Ii8<0x80, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "adc{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (adde GR8:$src1, imm:$src2))]>;
+def ADC16ri : Ii16<0x81, MRM2r, (outs GR16:$dst),
+ (ins GR16:$src1, i16imm:$src2),
+ "adc{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (adde GR16:$src1, imm:$src2))]>, OpSize;
+def ADC16ri8 : Ii8<0x83, MRM2r, (outs GR16:$dst),
+ (ins GR16:$src1, i16i8imm:$src2),
+ "adc{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (adde GR16:$src1, i16immSExt8:$src2))]>,
+ OpSize;
+def ADC32ri : Ii32<0x81, MRM2r, (outs GR32:$dst),
+ (ins GR32:$src1, i32imm:$src2),
+ "adc{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (adde GR32:$src1, imm:$src2))]>;
+def ADC32ri8 : Ii8<0x83, MRM2r, (outs GR32:$dst),
+ (ins GR32:$src1, i32i8imm:$src2),
+ "adc{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (adde GR32:$src1, i32immSExt8:$src2))]>;
+
+let isTwoAddress = 0 in {
+ def ADC8mr : I<0x10, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
+ "adc{b}\t{$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), GR8:$src2), addr:$dst)]>;
+ def ADC16mr : I<0x11, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "adc{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), GR16:$src2), addr:$dst)]>,
+ OpSize;
+ def ADC32mr : I<0x11, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "adc{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), GR32:$src2), addr:$dst)]>;
+ def ADC8mi : Ii8<0x80, MRM2m, (outs), (ins i8mem:$dst, i8imm:$src2),
+ "adc{b}\t{$src2, $dst|$dst, $src2}",
+ [(store (adde (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
+ def ADC16mi : Ii16<0x81, MRM2m, (outs), (ins i16mem:$dst, i16imm:$src2),
+ "adc{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (adde (loadi16 addr:$dst), imm:$src2), addr:$dst)]>,
+ OpSize;
+ def ADC16mi8 : Ii8<0x83, MRM2m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
+ "adc{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>,
+ OpSize;
+ def ADC32mi : Ii32<0x81, MRM2m, (outs), (ins i32mem:$dst, i32imm:$src2),
+ "adc{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (adde (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
+ def ADC32mi8 : Ii8<0x83, MRM2m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
+ "adc{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
+}
+} // Uses = [EFLAGS]
+
+// Register-Register Subtraction
+def SUB8rr : I<0x28, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ "sub{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sub GR8:$src1, GR8:$src2)),
+ (implicit EFLAGS)]>;
+def SUB16rr : I<0x29, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
+ "sub{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sub GR16:$src1, GR16:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def SUB32rr : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
+ "sub{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sub GR32:$src1, GR32:$src2)),
+ (implicit EFLAGS)]>;
+
+// Register-Memory Subtraction
+def SUB8rm : I<0x2A, MRMSrcMem, (outs GR8 :$dst),
+ (ins GR8 :$src1, i8mem :$src2),
+ "sub{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+def SUB16rm : I<0x2B, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$src1, i16mem:$src2),
+ "sub{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>, OpSize;
+def SUB32rm : I<0x2B, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i32mem:$src2),
+ "sub{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sub GR32:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+
+// Register-Integer Subtraction
+def SUB8ri : Ii8 <0x80, MRM5r, (outs GR8:$dst),
+ (ins GR8:$src1, i8imm:$src2),
+ "sub{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sub GR8:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+def SUB16ri : Ii16<0x81, MRM5r, (outs GR16:$dst),
+ (ins GR16:$src1, i16imm:$src2),
+ "sub{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sub GR16:$src1, imm:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def SUB32ri : Ii32<0x81, MRM5r, (outs GR32:$dst),
+ (ins GR32:$src1, i32imm:$src2),
+ "sub{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sub GR32:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+def SUB16ri8 : Ii8<0x83, MRM5r, (outs GR16:$dst),
+ (ins GR16:$src1, i16i8imm:$src2),
+ "sub{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sub GR16:$src1, i16immSExt8:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst),
+ (ins GR32:$src1, i32i8imm:$src2),
+ "sub{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sub GR32:$src1, i32immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+
+let isTwoAddress = 0 in {
+ // Memory-Register Subtraction
+ def SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
+ "sub{b}\t{$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), GR8:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
+ def SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "sub{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), GR16:$src2), addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
+ def SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "sub{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), GR32:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
+
+ // Memory-Integer Subtraction
+ def SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2),
+ "sub{b}\t{$src2, $dst|$dst, $src2}",
+ [(store (sub (loadi8 addr:$dst), imm:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
+ def SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2),
+ "sub{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (sub (loadi16 addr:$dst), imm:$src2),addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
+ def SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2),
+ "sub{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (sub (loadi32 addr:$dst), imm:$src2),addr:$dst),
+ (implicit EFLAGS)]>;
+ def SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
+ "sub{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), i16immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
+ def SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
+ "sub{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), i32immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)]>;
+}
+
+let Uses = [EFLAGS] in {
+def SBB8rr : I<0x18, MRMDestReg, (outs GR8:$dst),
+ (ins GR8:$src1, GR8:$src2),
+ "sbb{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sube GR8:$src1, GR8:$src2))]>;
+def SBB16rr : I<0x19, MRMDestReg, (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2),
+ "sbb{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sube GR16:$src1, GR16:$src2))]>, OpSize;
+def SBB32rr : I<0x19, MRMDestReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2),
+ "sbb{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sube GR32:$src1, GR32:$src2))]>;
+
+let isTwoAddress = 0 in {
+ def SBB8mr : I<0x18, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
+ "sbb{b}\t{$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), GR8:$src2), addr:$dst)]>;
+ def SBB16mr : I<0x19, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "sbb{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), GR16:$src2), addr:$dst)]>,
+ OpSize;
+ def SBB32mr : I<0x19, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "sbb{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), GR32:$src2), addr:$dst)]>;
+ def SBB8mi : Ii32<0x80, MRM3m, (outs), (ins i8mem:$dst, i8imm:$src2),
+ "sbb{b}\t{$src2, $dst|$dst, $src2}",
+ [(store (sube (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
+ def SBB16mi : Ii16<0x81, MRM3m, (outs), (ins i16mem:$dst, i16imm:$src2),
+ "sbb{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (sube (loadi16 addr:$dst), imm:$src2), addr:$dst)]>,
+ OpSize;
+ def SBB16mi8 : Ii8<0x83, MRM3m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
+ "sbb{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>,
+ OpSize;
+ def SBB32mi : Ii32<0x81, MRM3m, (outs), (ins i32mem:$dst, i32imm:$src2),
+ "sbb{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (sube (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
+ def SBB32mi8 : Ii8<0x83, MRM3m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
+ "sbb{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
+}
+def SBB8rm : I<0x1A, MRMSrcMem, (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2),
+ "sbb{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sube GR8:$src1, (load addr:$src2)))]>;
+def SBB16rm : I<0x1B, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$src1, i16mem:$src2),
+ "sbb{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sube GR16:$src1, (load addr:$src2)))]>,
+ OpSize;
+def SBB32rm : I<0x1B, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i32mem:$src2),
+ "sbb{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sube GR32:$src1, (load addr:$src2)))]>;
+def SBB8ri : Ii8<0x80, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "sbb{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sube GR8:$src1, imm:$src2))]>;
+def SBB16ri : Ii16<0x81, MRM3r, (outs GR16:$dst),
+ (ins GR16:$src1, i16imm:$src2),
+ "sbb{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sube GR16:$src1, imm:$src2))]>, OpSize;
+def SBB16ri8 : Ii8<0x83, MRM3r, (outs GR16:$dst),
+ (ins GR16:$src1, i16i8imm:$src2),
+ "sbb{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sube GR16:$src1, i16immSExt8:$src2))]>,
+ OpSize;
+def SBB32ri : Ii32<0x81, MRM3r, (outs GR32:$dst),
+ (ins GR32:$src1, i32imm:$src2),
+ "sbb{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sube GR32:$src1, imm:$src2))]>;
+def SBB32ri8 : Ii8<0x83, MRM3r, (outs GR32:$dst),
+ (ins GR32:$src1, i32i8imm:$src2),
+ "sbb{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sube GR32:$src1, i32immSExt8:$src2))]>;
+} // Uses = [EFLAGS]
+} // Defs = [EFLAGS]
+
+let Defs = [EFLAGS] in {
+let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y
+// Register-Register Signed Integer Multiply
+def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
+ "imul{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (mul GR16:$src1, GR16:$src2)),
+ (implicit EFLAGS)]>, TB, OpSize;
+def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
+ "imul{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (mul GR32:$src1, GR32:$src2)),
+ (implicit EFLAGS)]>, TB;
+}
+
+// Register-Memory Signed Integer Multiply
+def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$src1, i16mem:$src2),
+ "imul{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (mul GR16:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>, TB, OpSize;
+def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "imul{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (mul GR32:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>, TB;
+} // Defs = [EFLAGS]
+} // end Two Address instructions
+
+// Suprisingly enough, these are not two address instructions!
+let Defs = [EFLAGS] in {
+// Register-Integer Signed Integer Multiply
+def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
+ (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, (mul GR16:$src1, imm:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32
+ (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (mul GR32:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8
+ (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, (mul GR16:$src1, i16immSExt8:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8
+ (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (mul GR32:$src1, i32immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+
+// Memory-Integer Signed Integer Multiply
+def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
+ (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, (mul (load addr:$src1), imm:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32
+ (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (mul (load addr:$src1), imm:$src2)),
+ (implicit EFLAGS)]>;
+def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8
+ (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, (mul (load addr:$src1),
+ i16immSExt8:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8
+ (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (mul (load addr:$src1),
+ i32immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+//===----------------------------------------------------------------------===//
+// Test instructions are just like AND, except they don't generate a result.
+//
+let Defs = [EFLAGS] in {
+let isCommutable = 1 in { // TEST X, Y --> TEST Y, X
+def TEST8rr : I<0x84, MRMDestReg, (outs), (ins GR8:$src1, GR8:$src2),
+ "test{b}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and_su GR8:$src1, GR8:$src2), 0),
+ (implicit EFLAGS)]>;
+def TEST16rr : I<0x85, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+ "test{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and_su GR16:$src1, GR16:$src2), 0),
+ (implicit EFLAGS)]>,
+ OpSize;
+def TEST32rr : I<0x85, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+ "test{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and_su GR32:$src1, GR32:$src2), 0),
+ (implicit EFLAGS)]>;
+}
+
+def TEST8rm : I<0x84, MRMSrcMem, (outs), (ins GR8 :$src1, i8mem :$src2),
+ "test{b}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR8:$src1, (loadi8 addr:$src2)), 0),
+ (implicit EFLAGS)]>;
+def TEST16rm : I<0x85, MRMSrcMem, (outs), (ins GR16:$src1, i16mem:$src2),
+ "test{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR16:$src1, (loadi16 addr:$src2)), 0),
+ (implicit EFLAGS)]>, OpSize;
+def TEST32rm : I<0x85, MRMSrcMem, (outs), (ins GR32:$src1, i32mem:$src2),
+ "test{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR32:$src1, (loadi32 addr:$src2)), 0),
+ (implicit EFLAGS)]>;
+
+def TEST8ri : Ii8 <0xF6, MRM0r, // flags = GR8 & imm8
+ (outs), (ins GR8:$src1, i8imm:$src2),
+ "test{b}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and_su GR8:$src1, imm:$src2), 0),
+ (implicit EFLAGS)]>;
+def TEST16ri : Ii16<0xF7, MRM0r, // flags = GR16 & imm16
+ (outs), (ins GR16:$src1, i16imm:$src2),
+ "test{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and_su GR16:$src1, imm:$src2), 0),
+ (implicit EFLAGS)]>, OpSize;
+def TEST32ri : Ii32<0xF7, MRM0r, // flags = GR32 & imm32
+ (outs), (ins GR32:$src1, i32imm:$src2),
+ "test{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and_su GR32:$src1, imm:$src2), 0),
+ (implicit EFLAGS)]>;
+
+def TEST8mi : Ii8 <0xF6, MRM0m, // flags = [mem8] & imm8
+ (outs), (ins i8mem:$src1, i8imm:$src2),
+ "test{b}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and (loadi8 addr:$src1), imm:$src2), 0),
+ (implicit EFLAGS)]>;
+def TEST16mi : Ii16<0xF7, MRM0m, // flags = [mem16] & imm16
+ (outs), (ins i16mem:$src1, i16imm:$src2),
+ "test{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and (loadi16 addr:$src1), imm:$src2), 0),
+ (implicit EFLAGS)]>, OpSize;
+def TEST32mi : Ii32<0xF7, MRM0m, // flags = [mem32] & imm32
+ (outs), (ins i32mem:$src1, i32imm:$src2),
+ "test{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and (loadi32 addr:$src1), imm:$src2), 0),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+
+// Condition code ops, incl. set if equal/not equal/...
+let Defs = [EFLAGS], Uses = [AH], neverHasSideEffects = 1 in
+def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", []>; // flags = AH
+let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
+def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags
+
+let Uses = [EFLAGS] in {
+def SETEr : I<0x94, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "sete\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_E, EFLAGS))]>,
+ TB; // GR8 = ==
+def SETEm : I<0x94, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "sete\t$dst",
+ [(store (X86setcc X86_COND_E, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = ==
+
+def SETNEr : I<0x95, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setne\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_NE, EFLAGS))]>,
+ TB; // GR8 = !=
+def SETNEm : I<0x95, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setne\t$dst",
+ [(store (X86setcc X86_COND_NE, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = !=
+
+def SETLr : I<0x9C, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setl\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_L, EFLAGS))]>,
+ TB; // GR8 = < signed
+def SETLm : I<0x9C, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setl\t$dst",
+ [(store (X86setcc X86_COND_L, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = < signed
+
+def SETGEr : I<0x9D, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setge\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_GE, EFLAGS))]>,
+ TB; // GR8 = >= signed
+def SETGEm : I<0x9D, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setge\t$dst",
+ [(store (X86setcc X86_COND_GE, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = >= signed
+
+def SETLEr : I<0x9E, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setle\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_LE, EFLAGS))]>,
+ TB; // GR8 = <= signed
+def SETLEm : I<0x9E, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setle\t$dst",
+ [(store (X86setcc X86_COND_LE, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = <= signed
+
+def SETGr : I<0x9F, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setg\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_G, EFLAGS))]>,
+ TB; // GR8 = > signed
+def SETGm : I<0x9F, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setg\t$dst",
+ [(store (X86setcc X86_COND_G, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = > signed
+
+def SETBr : I<0x92, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setb\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_B, EFLAGS))]>,
+ TB; // GR8 = < unsign
+def SETBm : I<0x92, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setb\t$dst",
+ [(store (X86setcc X86_COND_B, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = < unsign
+
+def SETAEr : I<0x93, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setae\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_AE, EFLAGS))]>,
+ TB; // GR8 = >= unsign
+def SETAEm : I<0x93, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setae\t$dst",
+ [(store (X86setcc X86_COND_AE, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = >= unsign
+
+def SETBEr : I<0x96, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setbe\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_BE, EFLAGS))]>,
+ TB; // GR8 = <= unsign
+def SETBEm : I<0x96, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setbe\t$dst",
+ [(store (X86setcc X86_COND_BE, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = <= unsign
+
+def SETAr : I<0x97, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "seta\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_A, EFLAGS))]>,
+ TB; // GR8 = > signed
+def SETAm : I<0x97, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "seta\t$dst",
+ [(store (X86setcc X86_COND_A, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = > signed
+
+def SETSr : I<0x98, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "sets\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_S, EFLAGS))]>,
+ TB; // GR8 = <sign bit>
+def SETSm : I<0x98, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "sets\t$dst",
+ [(store (X86setcc X86_COND_S, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = <sign bit>
+def SETNSr : I<0x99, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setns\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_NS, EFLAGS))]>,
+ TB; // GR8 = !<sign bit>
+def SETNSm : I<0x99, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setns\t$dst",
+ [(store (X86setcc X86_COND_NS, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = !<sign bit>
+
+def SETPr : I<0x9A, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setp\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_P, EFLAGS))]>,
+ TB; // GR8 = parity
+def SETPm : I<0x9A, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setp\t$dst",
+ [(store (X86setcc X86_COND_P, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = parity
+def SETNPr : I<0x9B, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setnp\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_NP, EFLAGS))]>,
+ TB; // GR8 = not parity
+def SETNPm : I<0x9B, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setnp\t$dst",
+ [(store (X86setcc X86_COND_NP, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = not parity
+
+def SETOr : I<0x90, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "seto\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_O, EFLAGS))]>,
+ TB; // GR8 = overflow
+def SETOm : I<0x90, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "seto\t$dst",
+ [(store (X86setcc X86_COND_O, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = overflow
+def SETNOr : I<0x91, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setno\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_NO, EFLAGS))]>,
+ TB; // GR8 = not overflow
+def SETNOm : I<0x91, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setno\t$dst",
+ [(store (X86setcc X86_COND_NO, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = not overflow
+} // Uses = [EFLAGS]
+
+
+// Integer comparisons
+let Defs = [EFLAGS] in {
+def CMP8rr : I<0x38, MRMDestReg,
+ (outs), (ins GR8 :$src1, GR8 :$src2),
+ "cmp{b}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR8:$src1, GR8:$src2), (implicit EFLAGS)]>;
+def CMP16rr : I<0x39, MRMDestReg,
+ (outs), (ins GR16:$src1, GR16:$src2),
+ "cmp{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR16:$src1, GR16:$src2), (implicit EFLAGS)]>, OpSize;
+def CMP32rr : I<0x39, MRMDestReg,
+ (outs), (ins GR32:$src1, GR32:$src2),
+ "cmp{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR32:$src1, GR32:$src2), (implicit EFLAGS)]>;
+def CMP8mr : I<0x38, MRMDestMem,
+ (outs), (ins i8mem :$src1, GR8 :$src2),
+ "cmp{b}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi8 addr:$src1), GR8:$src2),
+ (implicit EFLAGS)]>;
+def CMP16mr : I<0x39, MRMDestMem,
+ (outs), (ins i16mem:$src1, GR16:$src2),
+ "cmp{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi16 addr:$src1), GR16:$src2),
+ (implicit EFLAGS)]>, OpSize;
+def CMP32mr : I<0x39, MRMDestMem,
+ (outs), (ins i32mem:$src1, GR32:$src2),
+ "cmp{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi32 addr:$src1), GR32:$src2),
+ (implicit EFLAGS)]>;
+def CMP8rm : I<0x3A, MRMSrcMem,
+ (outs), (ins GR8 :$src1, i8mem :$src2),
+ "cmp{b}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR8:$src1, (loadi8 addr:$src2)),
+ (implicit EFLAGS)]>;
+def CMP16rm : I<0x3B, MRMSrcMem,
+ (outs), (ins GR16:$src1, i16mem:$src2),
+ "cmp{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR16:$src1, (loadi16 addr:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def CMP32rm : I<0x3B, MRMSrcMem,
+ (outs), (ins GR32:$src1, i32mem:$src2),
+ "cmp{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR32:$src1, (loadi32 addr:$src2)),
+ (implicit EFLAGS)]>;
+def CMP8ri : Ii8<0x80, MRM7r,
+ (outs), (ins GR8:$src1, i8imm:$src2),
+ "cmp{b}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR8:$src1, imm:$src2), (implicit EFLAGS)]>;
+def CMP16ri : Ii16<0x81, MRM7r,
+ (outs), (ins GR16:$src1, i16imm:$src2),
+ "cmp{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR16:$src1, imm:$src2),
+ (implicit EFLAGS)]>, OpSize;
+def CMP32ri : Ii32<0x81, MRM7r,
+ (outs), (ins GR32:$src1, i32imm:$src2),
+ "cmp{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR32:$src1, imm:$src2), (implicit EFLAGS)]>;
+def CMP8mi : Ii8 <0x80, MRM7m,
+ (outs), (ins i8mem :$src1, i8imm :$src2),
+ "cmp{b}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi8 addr:$src1), imm:$src2),
+ (implicit EFLAGS)]>;
+def CMP16mi : Ii16<0x81, MRM7m,
+ (outs), (ins i16mem:$src1, i16imm:$src2),
+ "cmp{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi16 addr:$src1), imm:$src2),
+ (implicit EFLAGS)]>, OpSize;
+def CMP32mi : Ii32<0x81, MRM7m,
+ (outs), (ins i32mem:$src1, i32imm:$src2),
+ "cmp{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi32 addr:$src1), imm:$src2),
+ (implicit EFLAGS)]>;
+def CMP16ri8 : Ii8<0x83, MRM7r,
+ (outs), (ins GR16:$src1, i16i8imm:$src2),
+ "cmp{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR16:$src1, i16immSExt8:$src2),
+ (implicit EFLAGS)]>, OpSize;
+def CMP16mi8 : Ii8<0x83, MRM7m,
+ (outs), (ins i16mem:$src1, i16i8imm:$src2),
+ "cmp{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi16 addr:$src1), i16immSExt8:$src2),
+ (implicit EFLAGS)]>, OpSize;
+def CMP32mi8 : Ii8<0x83, MRM7m,
+ (outs), (ins i32mem:$src1, i32i8imm:$src2),
+ "cmp{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi32 addr:$src1), i32immSExt8:$src2),
+ (implicit EFLAGS)]>;
+def CMP32ri8 : Ii8<0x83, MRM7r,
+ (outs), (ins GR32:$src1, i32i8imm:$src2),
+ "cmp{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR32:$src1, i32immSExt8:$src2),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+// Bit tests.
+// TODO: BTC, BTR, and BTS
+let Defs = [EFLAGS] in {
+def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt GR16:$src1, GR16:$src2),
+ (implicit EFLAGS)]>, OpSize, TB;
+def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt GR32:$src1, GR32:$src2),
+ (implicit EFLAGS)]>, TB;
+
+// Unlike with the register+register form, the memory+register form of the
+// bt instruction does not ignore the high bits of the index. From ISel's
+// perspective, this is pretty bizarre. Disable these instructions for now.
+//def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+// "bt{w}\t{$src2, $src1|$src1, $src2}",
+// [(X86bt (loadi16 addr:$src1), GR16:$src2),
+// (implicit EFLAGS)]>, OpSize, TB, Requires<[FastBTMem]>;
+//def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+// "bt{l}\t{$src2, $src1|$src1, $src2}",
+// [(X86bt (loadi32 addr:$src1), GR32:$src2),
+// (implicit EFLAGS)]>, TB, Requires<[FastBTMem]>;
+
+def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt GR16:$src1, i16immSExt8:$src2),
+ (implicit EFLAGS)]>, OpSize, TB;
+def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt GR32:$src1, i32immSExt8:$src2),
+ (implicit EFLAGS)]>, TB;
+// Note that these instructions don't need FastBTMem because that
+// only applies when the other operand is in a register. When it's
+// an immediate, bt is still fast.
+def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt (loadi16 addr:$src1), i16immSExt8:$src2),
+ (implicit EFLAGS)]>, OpSize, TB;
+def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt (loadi32 addr:$src1), i32immSExt8:$src2),
+ (implicit EFLAGS)]>, TB;
+} // Defs = [EFLAGS]
+
+// Sign/Zero extenders
+// Use movsbl intead of movsbw; we don't care about the high 16 bits
+// of the register here. This has a smaller encoding and avoids a
+// partial-register update.
+def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
+ "movs{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR16:$dst, (sext GR8:$src))]>, TB;
+def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
+ "movs{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB;
+def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
+ "movs{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sext GR8:$src))]>, TB;
+def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+ "movs{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB;
+def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+ "movs{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sext GR16:$src))]>, TB;
+def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+ "movs{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;
+
+// Use movzbl intead of movzbw; we don't care about the high 16 bits
+// of the register here. This has a smaller encoding and avoids a
+// partial-register update.
+def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
+ "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR16:$dst, (zext GR8:$src))]>, TB;
+def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
+ "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB;
+def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zext GR8:$src))]>, TB;
+def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB;
+def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+ "movz{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zext GR16:$src))]>, TB;
+def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+ "movz{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB;
+
+// These are the same as the regular regular MOVZX32rr8 and MOVZX32rm8
+// except that they use GR32_NOREX for the output operand register class
+// instead of GR32. This allows them to operate on h registers on x86-64.
+def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
+ (outs GR32_NOREX:$dst), (ins GR8:$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
+ []>, TB;
+let mayLoad = 1 in
+def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
+ (outs GR32_NOREX:$dst), (ins i8mem:$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
+ []>, TB;
+
+let neverHasSideEffects = 1 in {
+ let Defs = [AX], Uses = [AL] in
+ def CBW : I<0x98, RawFrm, (outs), (ins),
+ "{cbtw|cbw}", []>, OpSize; // AX = signext(AL)
+ let Defs = [EAX], Uses = [AX] in
+ def CWDE : I<0x98, RawFrm, (outs), (ins),
+ "{cwtl|cwde}", []>; // EAX = signext(AX)
+
+ let Defs = [AX,DX], Uses = [AX] in
+ def CWD : I<0x99, RawFrm, (outs), (ins),
+ "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX)
+ let Defs = [EAX,EDX], Uses = [EAX] in
+ def CDQ : I<0x99, RawFrm, (outs), (ins),
+ "{cltd|cdq}", []>; // EDX:EAX = signext(EAX)
+}
+
+//===----------------------------------------------------------------------===//
+// Alias Instructions
+//===----------------------------------------------------------------------===//
+
+// Alias instructions that map movr0 to xor.
+// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins),
+ "xor{b}\t$dst, $dst",
+ [(set GR8:$dst, 0)]>;
+// Use xorl instead of xorw since we don't care about the high 16 bits,
+// it's smaller, and it avoids a partial-register update.
+def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
+ "xor{l}\t${dst:subreg32}, ${dst:subreg32}",
+ [(set GR16:$dst, 0)]>;
+def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins),
+ "xor{l}\t$dst, $dst",
+ [(set GR32:$dst, 0)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Thread Local Storage Instructions
+//
+
+// All calls clobber the non-callee saved registers. ESP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [ESP, EBX] in
+def TLS_addr32 : I<0, Pseudo, (outs), (ins i32imm:$sym),
+ "leal\t${sym:mem}(,%ebx,1), %eax; "
+ "call\t___tls_get_addr@PLT",
+ [(X86tlsaddr tglobaltlsaddr:$sym)]>,
+ Requires<[In32BitMode]>;
+
+let AddedComplexity = 5 in
+def GS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "movl\t%gs:$src, $dst",
+ [(set GR32:$dst, (gsload addr:$src))]>, SegGS;
+
+let AddedComplexity = 5 in
+def FS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "movl\t%fs:$src, $dst",
+ [(set GR32:$dst, (fsload addr:$src))]>, SegFS;
+
+//===----------------------------------------------------------------------===//
+// DWARF Pseudo Instructions
+//
+
+def DWARF_LOC : I<0, Pseudo, (outs),
+ (ins i32imm:$line, i32imm:$col, i32imm:$file),
+ ".loc\t${file:debug} ${line:debug} ${col:debug}",
+ [(dwarf_loc (i32 imm:$line), (i32 imm:$col),
+ (i32 imm:$file))]>;
+
+//===----------------------------------------------------------------------===//
+// EH Pseudo Instructions
+//
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasCtrlDep = 1 in {
+def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
+ "ret\t#eh_return, addr: $addr",
+ [(X86ehret GR32:$addr)]>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Atomic support
+//
+
+// Atomic swap. These are just normal xchg instructions. But since a memory
+// operand is referenced, the atomicity is ensured.
+let Constraints = "$val = $dst" in {
+def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
+ "xchg{l}\t{$val, $ptr|$ptr, $val}",
+ [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>;
+def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
+ "xchg{w}\t{$val, $ptr|$ptr, $val}",
+ [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>,
+ OpSize;
+def XCHG8rm : I<0x86, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val),
+ "xchg{b}\t{$val, $ptr|$ptr, $val}",
+ [(set GR8:$dst, (atomic_swap_8 addr:$ptr, GR8:$val))]>;
+}
+
+// Atomic compare and swap.
+let Defs = [EAX, EFLAGS], Uses = [EAX] in {
+def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
+ "lock\n\t"
+ "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}",
+ [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;
+}
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in {
+def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i32mem:$ptr),
+ "lock\n\t"
+ "cmpxchg8b\t$ptr",
+ [(X86cas8 addr:$ptr)]>, TB, LOCK;
+}
+
+let Defs = [AX, EFLAGS], Uses = [AX] in {
+def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap),
+ "lock\n\t"
+ "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}",
+ [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK;
+}
+let Defs = [AL, EFLAGS], Uses = [AL] in {
+def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
+ "lock\n\t"
+ "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
+ [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK;
+}
+
+// Atomic exchange and add
+let Constraints = "$val = $dst", Defs = [EFLAGS] in {
+def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
+ "lock\n\t"
+ "xadd{l}\t{$val, $ptr|$ptr, $val}",
+ [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>,
+ TB, LOCK;
+def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
+ "lock\n\t"
+ "xadd{w}\t{$val, $ptr|$ptr, $val}",
+ [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>,
+ TB, OpSize, LOCK;
+def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val),
+ "lock\n\t"
+ "xadd{b}\t{$val, $ptr|$ptr, $val}",
+ [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>,
+ TB, LOCK;
+}
+
+// Atomic exchange, and, or, xor
+let Constraints = "$val = $dst", Defs = [EFLAGS],
+ usesCustomDAGSchedInserter = 1 in {
+def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMAND32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>;
+def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMOR32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>;
+def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMXOR32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>;
+def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMNAND32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>;
+def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
+ "#ATOMMIN32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>;
+def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMMAX32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>;
+def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMUMIN32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>;
+def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMUMAX32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>;
+
+def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMAND16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>;
+def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMOR16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>;
+def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMXOR16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>;
+def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMNAND16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>;
+def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
+ "#ATOMMIN16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>;
+def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMMAX16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>;
+def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMUMIN16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>;
+def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMUMAX16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>;
+
+def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMAND8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>;
+def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMOR8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>;
+def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMXOR8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>;
+def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMNAND8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>;
+}
+
+let Constraints = "$val1 = $dst1, $val2 = $dst2",
+ Defs = [EFLAGS, EAX, EBX, ECX, EDX],
+ Uses = [EAX, EBX, ECX, EDX],
+ mayLoad = 1, mayStore = 1,
+ usesCustomDAGSchedInserter = 1 in {
+def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMAND6432 PSEUDO!", []>;
+def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMOR6432 PSEUDO!", []>;
+def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMXOR6432 PSEUDO!", []>;
+def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMNAND6432 PSEUDO!", []>;
+def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMADD6432 PSEUDO!", []>;
+def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMSUB6432 PSEUDO!", []>;
+def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMSWAP6432 PSEUDO!", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
+def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;
+def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
+def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
+def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
+def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
+
+def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
+ (ADD32ri GR32:$src1, tconstpool:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
+ (ADD32ri GR32:$src1, tjumptable:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
+ (ADD32ri GR32:$src1, tglobaladdr:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
+ (ADD32ri GR32:$src1, texternalsym:$src2)>;
+
+def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
+ (MOV32mi addr:$dst, tglobaladdr:$src)>;
+def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
+ (MOV32mi addr:$dst, texternalsym:$src)>;
+
+// Calls
+// tailcall stuff
+def : Pat<(X86tailcall GR32:$dst),
+ (TAILCALL)>;
+
+def : Pat<(X86tailcall (i32 tglobaladdr:$dst)),
+ (TAILCALL)>;
+def : Pat<(X86tailcall (i32 texternalsym:$dst)),
+ (TAILCALL)>;
+
+def : Pat<(X86tcret GR32:$dst, imm:$off),
+ (TCRETURNri GR32:$dst, imm:$off)>;
+
+def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
+ (TCRETURNdi texternalsym:$dst, imm:$off)>;
+
+def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
+ (TCRETURNdi texternalsym:$dst, imm:$off)>;
+
+def : Pat<(X86call (i32 tglobaladdr:$dst)),
+ (CALLpcrel32 tglobaladdr:$dst)>;
+def : Pat<(X86call (i32 texternalsym:$dst)),
+ (CALLpcrel32 texternalsym:$dst)>;
+def : Pat<(X86call (i32 imm:$dst)),
+ (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
+
+// X86 specific add which produces a flag.
+def : Pat<(addc GR32:$src1, GR32:$src2),
+ (ADD32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(addc GR32:$src1, (load addr:$src2)),
+ (ADD32rm GR32:$src1, addr:$src2)>;
+def : Pat<(addc GR32:$src1, imm:$src2),
+ (ADD32ri GR32:$src1, imm:$src2)>;
+def : Pat<(addc GR32:$src1, i32immSExt8:$src2),
+ (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+def : Pat<(subc GR32:$src1, GR32:$src2),
+ (SUB32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(subc GR32:$src1, (load addr:$src2)),
+ (SUB32rm GR32:$src1, addr:$src2)>;
+def : Pat<(subc GR32:$src1, imm:$src2),
+ (SUB32ri GR32:$src1, imm:$src2)>;
+def : Pat<(subc GR32:$src1, i32immSExt8:$src2),
+ (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// Comparisons.
+
+// TEST R,R is smaller than CMP R,0
+def : Pat<(parallel (X86cmp GR8:$src1, 0), (implicit EFLAGS)),
+ (TEST8rr GR8:$src1, GR8:$src1)>;
+def : Pat<(parallel (X86cmp GR16:$src1, 0), (implicit EFLAGS)),
+ (TEST16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(parallel (X86cmp GR32:$src1, 0), (implicit EFLAGS)),
+ (TEST32rr GR32:$src1, GR32:$src1)>;
+
+// Conditional moves with folded loads with operands swapped and conditions
+// inverted.
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_B, EFLAGS),
+ (CMOVAE16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_B, EFLAGS),
+ (CMOVAE32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_AE, EFLAGS),
+ (CMOVB16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_AE, EFLAGS),
+ (CMOVB32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_E, EFLAGS),
+ (CMOVNE16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_E, EFLAGS),
+ (CMOVNE32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NE, EFLAGS),
+ (CMOVE16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NE, EFLAGS),
+ (CMOVE32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_BE, EFLAGS),
+ (CMOVA16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_BE, EFLAGS),
+ (CMOVA32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_A, EFLAGS),
+ (CMOVBE16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_A, EFLAGS),
+ (CMOVBE32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_L, EFLAGS),
+ (CMOVGE16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_L, EFLAGS),
+ (CMOVGE32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_GE, EFLAGS),
+ (CMOVL16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_GE, EFLAGS),
+ (CMOVL32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_LE, EFLAGS),
+ (CMOVG16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_LE, EFLAGS),
+ (CMOVG32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_G, EFLAGS),
+ (CMOVLE16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_G, EFLAGS),
+ (CMOVLE32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_P, EFLAGS),
+ (CMOVNP16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_P, EFLAGS),
+ (CMOVNP32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NP, EFLAGS),
+ (CMOVP16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NP, EFLAGS),
+ (CMOVP32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_S, EFLAGS),
+ (CMOVNS16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_S, EFLAGS),
+ (CMOVNS32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NS, EFLAGS),
+ (CMOVS16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NS, EFLAGS),
+ (CMOVS32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_O, EFLAGS),
+ (CMOVNO16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_O, EFLAGS),
+ (CMOVNO32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NO, EFLAGS),
+ (CMOVO16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NO, EFLAGS),
+ (CMOVO32rm GR32:$src2, addr:$src1)>;
+
+// zextload bool -> zextload byte
+def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
+def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
+
+// extload bool -> extload byte
+def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
+def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>,
+ Requires<[In32BitMode]>;
+def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>,
+ Requires<[In32BitMode]>;
+def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
+
+// anyext
+def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i32 (anyext GR16:$src)),
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>;
+
+// (and (i32 load), 255) -> (zextload i8)
+def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 255))),
+ (MOVZX32rm8 addr:$src)>;
+def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 65535))),
+ (MOVZX32rm16 addr:$src)>;
+
+//===----------------------------------------------------------------------===//
+// Some peepholes
+//===----------------------------------------------------------------------===//
+
+// Odd encoding trick: -128 fits into an 8-bit immediate field while
+// +128 doesn't, so in this special case use a sub instead of an add.
+def : Pat<(add GR16:$src1, 128),
+ (SUB16ri8 GR16:$src1, -128)>;
+def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
+ (SUB16mi8 addr:$dst, -128)>;
+def : Pat<(add GR32:$src1, 128),
+ (SUB32ri8 GR32:$src1, -128)>;
+def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
+ (SUB32mi8 addr:$dst, -128)>;
+
+// r & (2^16-1) ==> movz
+def : Pat<(and GR32:$src1, 0xffff),
+ (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR32:$src1, 0xff),
+ (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src1, GR32_ABCD),
+ x86_subreg_8bit))>,
+ Requires<[In32BitMode]>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR16:$src1, 0xff),
+ (MOVZX16rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD),
+ x86_subreg_8bit))>,
+ Requires<[In32BitMode]>;
+
+// sext_inreg patterns
+def : Pat<(sext_inreg GR32:$src, i16),
+ (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>;
+def : Pat<(sext_inreg GR32:$src, i8),
+ (MOVSX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD),
+ x86_subreg_8bit))>,
+ Requires<[In32BitMode]>;
+def : Pat<(sext_inreg GR16:$src, i8),
+ (MOVSX16rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+ x86_subreg_8bit))>,
+ Requires<[In32BitMode]>;
+
+// trunc patterns
+def : Pat<(i16 (trunc GR32:$src)),
+ (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit)>;
+def : Pat<(i8 (trunc GR32:$src)),
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD),
+ x86_subreg_8bit)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i8 (trunc GR16:$src)),
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+ x86_subreg_8bit)>,
+ Requires<[In32BitMode]>;
+
+// h-register tricks
+def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+ x86_subreg_8bit_hi)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD),
+ x86_subreg_8bit_hi)>,
+ Requires<[In32BitMode]>;
+def : Pat<(srl_su GR16:$src, (i8 8)),
+ (EXTRACT_SUBREG
+ (MOVZX32rr8
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+ x86_subreg_8bit_hi)),
+ x86_subreg_16bit)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+ x86_subreg_8bit_hi))>,
+ Requires<[In32BitMode]>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+ (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD),
+ x86_subreg_8bit_hi))>,
+ Requires<[In32BitMode]>;
+
+// (shl x, 1) ==> (add x, x)
+def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
+def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
+
+// (shl x (and y, 31)) ==> (shl x, y)
+def : Pat<(shl GR8:$src1, (and CL:$amt, 31)),
+ (SHL8rCL GR8:$src1)>;
+def : Pat<(shl GR16:$src1, (and CL:$amt, 31)),
+ (SHL16rCL GR16:$src1)>;
+def : Pat<(shl GR32:$src1, (and CL:$amt, 31)),
+ (SHL32rCL GR32:$src1)>;
+def : Pat<(store (shl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SHL8mCL addr:$dst)>;
+def : Pat<(store (shl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SHL16mCL addr:$dst)>;
+def : Pat<(store (shl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SHL32mCL addr:$dst)>;
+
+def : Pat<(srl GR8:$src1, (and CL:$amt, 31)),
+ (SHR8rCL GR8:$src1)>;
+def : Pat<(srl GR16:$src1, (and CL:$amt, 31)),
+ (SHR16rCL GR16:$src1)>;
+def : Pat<(srl GR32:$src1, (and CL:$amt, 31)),
+ (SHR32rCL GR32:$src1)>;
+def : Pat<(store (srl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SHR8mCL addr:$dst)>;
+def : Pat<(store (srl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SHR16mCL addr:$dst)>;
+def : Pat<(store (srl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SHR32mCL addr:$dst)>;
+
+def : Pat<(sra GR8:$src1, (and CL:$amt, 31)),
+ (SAR8rCL GR8:$src1)>;
+def : Pat<(sra GR16:$src1, (and CL:$amt, 31)),
+ (SAR16rCL GR16:$src1)>;
+def : Pat<(sra GR32:$src1, (and CL:$amt, 31)),
+ (SAR32rCL GR32:$src1)>;
+def : Pat<(store (sra (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SAR8mCL addr:$dst)>;
+def : Pat<(store (sra (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SAR16mCL addr:$dst)>;
+def : Pat<(store (sra (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SAR32mCL addr:$dst)>;
+
+// (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c)
+def : Pat<(or (srl GR32:$src1, CL:$amt),
+ (shl GR32:$src2, (sub 32, CL:$amt))),
+ (SHRD32rrCL GR32:$src1, GR32:$src2)>;
+
+def : Pat<(store (or (srl (loadi32 addr:$dst), CL:$amt),
+ (shl GR32:$src2, (sub 32, CL:$amt))), addr:$dst),
+ (SHRD32mrCL addr:$dst, GR32:$src2)>;
+
+def : Pat<(or (srl GR32:$src1, (i8 (trunc ECX:$amt))),
+ (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
+ (SHRD32rrCL GR32:$src1, GR32:$src2)>;
+
+def : Pat<(store (or (srl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))),
+ (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
+ addr:$dst),
+ (SHRD32mrCL addr:$dst, GR32:$src2)>;
+
+def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)),
+ (SHRD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shrd (loadi32 addr:$dst), (i8 imm:$amt1),
+ GR32:$src2, (i8 imm:$amt2)), addr:$dst),
+ (SHRD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>;
+
+// (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c)
+def : Pat<(or (shl GR32:$src1, CL:$amt),
+ (srl GR32:$src2, (sub 32, CL:$amt))),
+ (SHLD32rrCL GR32:$src1, GR32:$src2)>;
+
+def : Pat<(store (or (shl (loadi32 addr:$dst), CL:$amt),
+ (srl GR32:$src2, (sub 32, CL:$amt))), addr:$dst),
+ (SHLD32mrCL addr:$dst, GR32:$src2)>;
+
+def : Pat<(or (shl GR32:$src1, (i8 (trunc ECX:$amt))),
+ (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
+ (SHLD32rrCL GR32:$src1, GR32:$src2)>;
+
+def : Pat<(store (or (shl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))),
+ (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
+ addr:$dst),
+ (SHLD32mrCL addr:$dst, GR32:$src2)>;
+
+def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)),
+ (SHLD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shld (loadi32 addr:$dst), (i8 imm:$amt1),
+ GR32:$src2, (i8 imm:$amt2)), addr:$dst),
+ (SHLD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>;
+
+// (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c)
+def : Pat<(or (srl GR16:$src1, CL:$amt),
+ (shl GR16:$src2, (sub 16, CL:$amt))),
+ (SHRD16rrCL GR16:$src1, GR16:$src2)>;
+
+def : Pat<(store (or (srl (loadi16 addr:$dst), CL:$amt),
+ (shl GR16:$src2, (sub 16, CL:$amt))), addr:$dst),
+ (SHRD16mrCL addr:$dst, GR16:$src2)>;
+
+def : Pat<(or (srl GR16:$src1, (i8 (trunc CX:$amt))),
+ (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
+ (SHRD16rrCL GR16:$src1, GR16:$src2)>;
+
+def : Pat<(store (or (srl (loadi16 addr:$dst), (i8 (trunc CX:$amt))),
+ (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
+ addr:$dst),
+ (SHRD16mrCL addr:$dst, GR16:$src2)>;
+
+def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)),
+ (SHRD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shrd (loadi16 addr:$dst), (i8 imm:$amt1),
+ GR16:$src2, (i8 imm:$amt2)), addr:$dst),
+ (SHRD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
+
+// (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c)
+def : Pat<(or (shl GR16:$src1, CL:$amt),
+ (srl GR16:$src2, (sub 16, CL:$amt))),
+ (SHLD16rrCL GR16:$src1, GR16:$src2)>;
+
+def : Pat<(store (or (shl (loadi16 addr:$dst), CL:$amt),
+ (srl GR16:$src2, (sub 16, CL:$amt))), addr:$dst),
+ (SHLD16mrCL addr:$dst, GR16:$src2)>;
+
+def : Pat<(or (shl GR16:$src1, (i8 (trunc CX:$amt))),
+ (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
+ (SHLD16rrCL GR16:$src1, GR16:$src2)>;
+
+def : Pat<(store (or (shl (loadi16 addr:$dst), (i8 (trunc CX:$amt))),
+ (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
+ addr:$dst),
+ (SHLD16mrCL addr:$dst, GR16:$src2)>;
+
+def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)),
+ (SHLD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1),
+ GR16:$src2, (i8 imm:$amt2)), addr:$dst),
+ (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
+
+//===----------------------------------------------------------------------===//
+// EFLAGS-defining Patterns
+//===----------------------------------------------------------------------===//
+
+// Register-Register Addition with EFLAGS result
+def : Pat<(parallel (X86add_flag GR8:$src1, GR8:$src2),
+ (implicit EFLAGS)),
+ (ADD8rr GR8:$src1, GR8:$src2)>;
+def : Pat<(parallel (X86add_flag GR16:$src1, GR16:$src2),
+ (implicit EFLAGS)),
+ (ADD16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(parallel (X86add_flag GR32:$src1, GR32:$src2),
+ (implicit EFLAGS)),
+ (ADD32rr GR32:$src1, GR32:$src2)>;
+
+// Register-Memory Addition with EFLAGS result
+def : Pat<(parallel (X86add_flag GR8:$src1, (loadi8 addr:$src2)),
+ (implicit EFLAGS)),
+ (ADD8rm GR8:$src1, addr:$src2)>;
+def : Pat<(parallel (X86add_flag GR16:$src1, (loadi16 addr:$src2)),
+ (implicit EFLAGS)),
+ (ADD16rm GR16:$src1, addr:$src2)>;
+def : Pat<(parallel (X86add_flag GR32:$src1, (loadi32 addr:$src2)),
+ (implicit EFLAGS)),
+ (ADD32rm GR32:$src1, addr:$src2)>;
+
+// Register-Integer Addition with EFLAGS result
+def : Pat<(parallel (X86add_flag GR8:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (ADD8ri GR8:$src1, imm:$src2)>;
+def : Pat<(parallel (X86add_flag GR16:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (ADD16ri GR16:$src1, imm:$src2)>;
+def : Pat<(parallel (X86add_flag GR32:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (ADD32ri GR32:$src1, imm:$src2)>;
+def : Pat<(parallel (X86add_flag GR16:$src1, i16immSExt8:$src2),
+ (implicit EFLAGS)),
+ (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(parallel (X86add_flag GR32:$src1, i32immSExt8:$src2),
+ (implicit EFLAGS)),
+ (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// Memory-Register Addition with EFLAGS result
+def : Pat<(parallel (store (X86add_flag (loadi8 addr:$dst), GR8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD8mr addr:$dst, GR8:$src2)>;
+def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), GR16:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD16mr addr:$dst, GR16:$src2)>;
+def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), GR32:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD32mr addr:$dst, GR32:$src2)>;
+
+// Memory-Integer Addition with EFLAGS result
+def : Pat<(parallel (store (X86add_flag (loadi8 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD8mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD16mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD32mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), i16immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD16mi8 addr:$dst, i16immSExt8:$src2)>;
+def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), i32immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD32mi8 addr:$dst, i32immSExt8:$src2)>;
+
+// Register-Register Subtraction with EFLAGS result
+def : Pat<(parallel (X86sub_flag GR8:$src1, GR8:$src2),
+ (implicit EFLAGS)),
+ (SUB8rr GR8:$src1, GR8:$src2)>;
+def : Pat<(parallel (X86sub_flag GR16:$src1, GR16:$src2),
+ (implicit EFLAGS)),
+ (SUB16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(parallel (X86sub_flag GR32:$src1, GR32:$src2),
+ (implicit EFLAGS)),
+ (SUB32rr GR32:$src1, GR32:$src2)>;
+
+// Register-Memory Subtraction with EFLAGS result
+def : Pat<(parallel (X86sub_flag GR8:$src1, (loadi8 addr:$src2)),
+ (implicit EFLAGS)),
+ (SUB8rm GR8:$src1, addr:$src2)>;
+def : Pat<(parallel (X86sub_flag GR16:$src1, (loadi16 addr:$src2)),
+ (implicit EFLAGS)),
+ (SUB16rm GR16:$src1, addr:$src2)>;
+def : Pat<(parallel (X86sub_flag GR32:$src1, (loadi32 addr:$src2)),
+ (implicit EFLAGS)),
+ (SUB32rm GR32:$src1, addr:$src2)>;
+
+// Register-Integer Subtraction with EFLAGS result
+def : Pat<(parallel (X86sub_flag GR8:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (SUB8ri GR8:$src1, imm:$src2)>;
+def : Pat<(parallel (X86sub_flag GR16:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (SUB16ri GR16:$src1, imm:$src2)>;
+def : Pat<(parallel (X86sub_flag GR32:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (SUB32ri GR32:$src1, imm:$src2)>;
+def : Pat<(parallel (X86sub_flag GR16:$src1, i16immSExt8:$src2),
+ (implicit EFLAGS)),
+ (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(parallel (X86sub_flag GR32:$src1, i32immSExt8:$src2),
+ (implicit EFLAGS)),
+ (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// Memory-Register Subtraction with EFLAGS result
+def : Pat<(parallel (store (X86sub_flag (loadi8 addr:$dst), GR8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB8mr addr:$dst, GR8:$src2)>;
+def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), GR16:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB16mr addr:$dst, GR16:$src2)>;
+def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), GR32:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB32mr addr:$dst, GR32:$src2)>;
+
+// Memory-Integer Subtraction with EFLAGS result
+def : Pat<(parallel (store (X86sub_flag (loadi8 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB8mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB16mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB32mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), i16immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB16mi8 addr:$dst, i16immSExt8:$src2)>;
+def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), i32immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB32mi8 addr:$dst, i32immSExt8:$src2)>;
+
+
+// Register-Register Signed Integer Multiply with EFLAGS result
+def : Pat<(parallel (X86smul_flag GR16:$src1, GR16:$src2),
+ (implicit EFLAGS)),
+ (IMUL16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(parallel (X86smul_flag GR32:$src1, GR32:$src2),
+ (implicit EFLAGS)),
+ (IMUL32rr GR32:$src1, GR32:$src2)>;
+
+// Register-Memory Signed Integer Multiply with EFLAGS result
+def : Pat<(parallel (X86smul_flag GR16:$src1, (loadi16 addr:$src2)),
+ (implicit EFLAGS)),
+ (IMUL16rm GR16:$src1, addr:$src2)>;
+def : Pat<(parallel (X86smul_flag GR32:$src1, (loadi32 addr:$src2)),
+ (implicit EFLAGS)),
+ (IMUL32rm GR32:$src1, addr:$src2)>;
+
+// Register-Integer Signed Integer Multiply with EFLAGS result
+def : Pat<(parallel (X86smul_flag GR16:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (IMUL16rri GR16:$src1, imm:$src2)>;
+def : Pat<(parallel (X86smul_flag GR32:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (IMUL32rri GR32:$src1, imm:$src2)>;
+def : Pat<(parallel (X86smul_flag GR16:$src1, i16immSExt8:$src2),
+ (implicit EFLAGS)),
+ (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(parallel (X86smul_flag GR32:$src1, i32immSExt8:$src2),
+ (implicit EFLAGS)),
+ (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// Memory-Integer Signed Integer Multiply with EFLAGS result
+def : Pat<(parallel (X86smul_flag (loadi16 addr:$src1), imm:$src2),
+ (implicit EFLAGS)),
+ (IMUL16rmi addr:$src1, imm:$src2)>;
+def : Pat<(parallel (X86smul_flag (loadi32 addr:$src1), imm:$src2),
+ (implicit EFLAGS)),
+ (IMUL32rmi addr:$src1, imm:$src2)>;
+def : Pat<(parallel (X86smul_flag (loadi16 addr:$src1), i16immSExt8:$src2),
+ (implicit EFLAGS)),
+ (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
+def : Pat<(parallel (X86smul_flag (loadi32 addr:$src1), i32immSExt8:$src2),
+ (implicit EFLAGS)),
+ (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
+
+// Optimize multiply by 2 with EFLAGS result.
+let AddedComplexity = 2 in {
+def : Pat<(parallel (X86smul_flag GR16:$src1, 2),
+ (implicit EFLAGS)),
+ (ADD16rr GR16:$src1, GR16:$src1)>;
+
+def : Pat<(parallel (X86smul_flag GR32:$src1, 2),
+ (implicit EFLAGS)),
+ (ADD32rr GR32:$src1, GR32:$src1)>;
+}
+
+// INC and DEC with EFLAGS result. Note that these do not set CF.
+def : Pat<(parallel (X86inc_flag GR8:$src), (implicit EFLAGS)),
+ (INC8r GR8:$src)>;
+def : Pat<(parallel (store (i8 (X86inc_flag (loadi8 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (INC8m addr:$dst)>;
+def : Pat<(parallel (X86dec_flag GR8:$src), (implicit EFLAGS)),
+ (DEC8r GR8:$src)>;
+def : Pat<(parallel (store (i8 (X86dec_flag (loadi8 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (DEC8m addr:$dst)>;
+
+def : Pat<(parallel (X86inc_flag GR16:$src), (implicit EFLAGS)),
+ (INC16r GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(parallel (store (i16 (X86inc_flag (loadi16 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (INC16m addr:$dst)>, Requires<[In32BitMode]>;
+def : Pat<(parallel (X86dec_flag GR16:$src), (implicit EFLAGS)),
+ (DEC16r GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(parallel (store (i16 (X86dec_flag (loadi16 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (DEC16m addr:$dst)>, Requires<[In32BitMode]>;
+
+def : Pat<(parallel (X86inc_flag GR32:$src), (implicit EFLAGS)),
+ (INC32r GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(parallel (store (i32 (X86inc_flag (loadi32 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (INC32m addr:$dst)>, Requires<[In32BitMode]>;
+def : Pat<(parallel (X86dec_flag GR32:$src), (implicit EFLAGS)),
+ (DEC32r GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (DEC32m addr:$dst)>, Requires<[In32BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Stack Support
+//===----------------------------------------------------------------------===//
+
+include "X86InstrFPStack.td"
+
+//===----------------------------------------------------------------------===//
+// X86-64 Support
+//===----------------------------------------------------------------------===//
+
+include "X86Instr64bit.td"
+
+//===----------------------------------------------------------------------===//
+// XMM Floating point support (requires SSE / SSE2)
+//===----------------------------------------------------------------------===//
+
+include "X86InstrSSE.td"
+
+//===----------------------------------------------------------------------===//
+// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2)
+//===----------------------------------------------------------------------===//
+
+include "X86InstrMMX.td"
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
new file mode 100644
index 0000000..8f287e1
--- /dev/null
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -0,0 +1,694 @@
+//====- X86InstrMMX.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 MMX instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MMX Pattern Fragments
+//===----------------------------------------------------------------------===//
+
+def load_mmx : PatFrag<(ops node:$ptr), (v1i64 (load node:$ptr))>;
+
+def bc_v8i8 : PatFrag<(ops node:$in), (v8i8 (bitconvert node:$in))>;
+def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>;
+def bc_v2i32 : PatFrag<(ops node:$in), (v2i32 (bitconvert node:$in))>;
+def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>;
+
+//===----------------------------------------------------------------------===//
+// MMX Masks
+//===----------------------------------------------------------------------===//
+
+// MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to
+// PSHUFW imm.
+def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShuffleSHUFImmediate(N));
+}]>;
+
+// Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...>
+def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+// Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...>
+def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+// Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
+def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+// Patterns for: vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
+def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
+}], MMX_SHUFFLE_get_shuf_imm>;
+
+//===----------------------------------------------------------------------===//
+// MMX Multiclasses
+//===----------------------------------------------------------------------===//
+
+let isTwoAddress = 1 in {
+ // MMXI_binop_rm - Simple MMX binary operator.
+ multiclass MMXI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, bit Commutable = 0> {
+ def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (OpVT (OpNode VR64:$src1, VR64:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+ def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (OpVT (OpNode VR64:$src1,
+ (bitconvert
+ (load_mmx addr:$src2)))))]>;
+ }
+
+ multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+ bit Commutable = 0> {
+ def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]> {
+ let isCommutable = Commutable;
+ }
+ def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1,
+ (bitconvert (load_mmx addr:$src2))))]>;
+ }
+
+ // MMXI_binop_rm_v1i64 - Simple MMX binary operator whose type is v1i64.
+ //
+ // FIXME: we could eliminate this and use MMXI_binop_rm instead if tblgen knew
+ // to collapse (bitconvert VT to VT) into its operand.
+ //
+ multiclass MMXI_binop_rm_v1i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ bit Commutable = 0> {
+ def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (v1i64 (OpNode VR64:$src1, VR64:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+ def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst,
+ (OpNode VR64:$src1,(load_mmx addr:$src2)))]>;
+ }
+
+ multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
+ string OpcodeStr, Intrinsic IntId,
+ Intrinsic IntId2> {
+ def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>;
+ def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1,
+ (bitconvert (load_mmx addr:$src2))))]>;
+ def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
+ (ins VR64:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))]>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// MMX EMMS & FEMMS Instructions
+//===----------------------------------------------------------------------===//
+
+def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", [(int_x86_mmx_emms)]>;
+def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>;
+
+//===----------------------------------------------------------------------===//
+// MMX Scalar Instructions
+//===----------------------------------------------------------------------===//
+
+// Data Transfer Instructions
+def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (v2i32 (scalar_to_vector GR32:$src)))]>;
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (v2i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+let mayStore = 1 in
+def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
+ "movd\t{$src, $dst|$dst, $src}", []>;
+
+let neverHasSideEffects = 1 in
+def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ []>;
+
+let neverHasSideEffects = 1 in
+def MMX_MOVD64from64rr : MMXRI<0x7E, MRMSrcReg,
+ (outs GR64:$dst), (ins VR64:$src),
+ "movd\t{$src, $dst|$dst, $src}", []>;
+
+let neverHasSideEffects = 1 in
+def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+ "movq\t{$src, $dst|$dst, $src}", []>;
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (load_mmx addr:$src))]>;
+def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (v1i64 VR64:$src), addr:$dst)]>;
+
+def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMDestMem, (outs VR64:$dst), (ins VR128:$src),
+ "movdq2q\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (v1i64 (bitconvert
+ (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))))]>;
+
+def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMDestMem, (outs VR128:$dst), (ins VR64:$src),
+ "movq2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (movl immAllZerosV,
+ (v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src))))))]>;
+
+let neverHasSideEffects = 1 in
+def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMDestMem, (outs FR64:$dst), (ins VR64:$src),
+ "movq2dq\t{$src, $dst|$dst, $src}", []>;
+
+def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
+ "movntq\t{$src, $dst|$dst, $src}",
+ [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>;
+
+let AddedComplexity = 15 in
+// movd to MMX register zero-extends
+def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (v2i32 (X86vzmovl (v2i32 (scalar_to_vector GR32:$src)))))]>;
+let AddedComplexity = 20 in
+def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (v2i32 (X86vzmovl (v2i32
+ (scalar_to_vector (loadi32 addr:$src))))))]>;
+
+// Arithmetic Instructions
+
+// -- Addition
+defm MMX_PADDB : MMXI_binop_rm<0xFC, "paddb", add, v8i8, 1>;
+defm MMX_PADDW : MMXI_binop_rm<0xFD, "paddw", add, v4i16, 1>;
+defm MMX_PADDD : MMXI_binop_rm<0xFE, "paddd", add, v2i32, 1>;
+defm MMX_PADDQ : MMXI_binop_rm<0xD4, "paddq", add, v1i64, 1>;
+
+defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b, 1>;
+defm MMX_PADDSW : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w, 1>;
+
+defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b, 1>;
+defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w, 1>;
+
+// -- Subtraction
+defm MMX_PSUBB : MMXI_binop_rm<0xF8, "psubb", sub, v8i8>;
+defm MMX_PSUBW : MMXI_binop_rm<0xF9, "psubw", sub, v4i16>;
+defm MMX_PSUBD : MMXI_binop_rm<0xFA, "psubd", sub, v2i32>;
+defm MMX_PSUBQ : MMXI_binop_rm<0xFB, "psubq", sub, v1i64>;
+
+defm MMX_PSUBSB : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b>;
+defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>;
+
+defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b>;
+defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w>;
+
+// -- Multiplication
+defm MMX_PMULLW : MMXI_binop_rm<0xD5, "pmullw", mul, v4i16, 1>;
+
+defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w, 1>;
+defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w, 1>;
+defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq, 1>;
+
+// -- Miscellanea
+defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, 1>;
+
+defm MMX_PAVGB : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b, 1>;
+defm MMX_PAVGW : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w, 1>;
+
+defm MMX_PMINUB : MMXI_binop_rm_int<0xDA, "pminub", int_x86_mmx_pminu_b, 1>;
+defm MMX_PMINSW : MMXI_binop_rm_int<0xEA, "pminsw", int_x86_mmx_pmins_w, 1>;
+
+defm MMX_PMAXUB : MMXI_binop_rm_int<0xDE, "pmaxub", int_x86_mmx_pmaxu_b, 1>;
+defm MMX_PMAXSW : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w, 1>;
+
+defm MMX_PSADBW : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw, 1>;
+
+// Logical Instructions
+defm MMX_PAND : MMXI_binop_rm_v1i64<0xDB, "pand", and, 1>;
+defm MMX_POR : MMXI_binop_rm_v1i64<0xEB, "por" , or, 1>;
+defm MMX_PXOR : MMXI_binop_rm_v1i64<0xEF, "pxor", xor, 1>;
+
+let isTwoAddress = 1 in {
+ def MMX_PANDNrr : MMXI<0xDF, MRMSrcReg,
+ (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
+ "pandn\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1),
+ VR64:$src2)))]>;
+ def MMX_PANDNrm : MMXI<0xDF, MRMSrcMem,
+ (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
+ "pandn\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1),
+ (load addr:$src2))))]>;
+}
+
+// Shift Instructions
+defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
+ int_x86_mmx_psrl_w, int_x86_mmx_psrli_w>;
+defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
+ int_x86_mmx_psrl_d, int_x86_mmx_psrli_d>;
+defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
+ int_x86_mmx_psrl_q, int_x86_mmx_psrli_q>;
+
+defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
+ int_x86_mmx_psll_w, int_x86_mmx_pslli_w>;
+defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
+ int_x86_mmx_psll_d, int_x86_mmx_pslli_d>;
+defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
+ int_x86_mmx_psll_q, int_x86_mmx_pslli_q>;
+
+defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
+ int_x86_mmx_psra_w, int_x86_mmx_psrai_w>;
+defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
+ int_x86_mmx_psra_d, int_x86_mmx_psrai_d>;
+
+// Shift up / down and insert zero's.
+def : Pat<(v1i64 (X86vshl VR64:$src, (i8 imm:$amt))),
+ (v1i64 (MMX_PSLLQri VR64:$src, imm:$amt))>;
+def : Pat<(v1i64 (X86vshr VR64:$src, (i8 imm:$amt))),
+ (v1i64 (MMX_PSRLQri VR64:$src, imm:$amt))>;
+
+// Comparison Instructions
+defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>;
+defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w>;
+defm MMX_PCMPEQD : MMXI_binop_rm_int<0x76, "pcmpeqd", int_x86_mmx_pcmpeq_d>;
+
+defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b>;
+defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w>;
+defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d>;
+
+// Conversion Instructions
+
+// -- Unpack Instructions
+let isTwoAddress = 1 in {
+ // Unpack High Packed Data Instructions
+ def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg,
+ (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
+ "punpckhbw\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v8i8 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
+ def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem,
+ (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
+ "punpckhbw\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v8i8 (mmx_unpckh VR64:$src1,
+ (bc_v8i8 (load_mmx addr:$src2)))))]>;
+
+ def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg,
+ (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
+ "punpckhwd\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v4i16 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
+ def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem,
+ (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
+ "punpckhwd\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v4i16 (mmx_unpckh VR64:$src1,
+ (bc_v4i16 (load_mmx addr:$src2)))))]>;
+
+ def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg,
+ (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
+ "punpckhdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v2i32 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
+ def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem,
+ (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
+ "punpckhdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v2i32 (mmx_unpckh VR64:$src1,
+ (bc_v2i32 (load_mmx addr:$src2)))))]>;
+
+ // Unpack Low Packed Data Instructions
+ def MMX_PUNPCKLBWrr : MMXI<0x60, MRMSrcReg,
+ (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
+ "punpcklbw\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v8i8 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
+ def MMX_PUNPCKLBWrm : MMXI<0x60, MRMSrcMem,
+ (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
+ "punpcklbw\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v8i8 (mmx_unpckl VR64:$src1,
+ (bc_v8i8 (load_mmx addr:$src2)))))]>;
+
+ def MMX_PUNPCKLWDrr : MMXI<0x61, MRMSrcReg,
+ (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
+ "punpcklwd\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v4i16 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
+ def MMX_PUNPCKLWDrm : MMXI<0x61, MRMSrcMem,
+ (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
+ "punpcklwd\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v4i16 (mmx_unpckl VR64:$src1,
+ (bc_v4i16 (load_mmx addr:$src2)))))]>;
+
+ def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg,
+ (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
+ "punpckldq\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v2i32 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
+ def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem,
+ (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
+ "punpckldq\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v2i32 (mmx_unpckl VR64:$src1,
+ (bc_v2i32 (load_mmx addr:$src2)))))]>;
+}
+
+// -- Pack Instructions
+defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb>;
+defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw>;
+defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb>;
+
+// -- Shuffle Instructions
+def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
+ (outs VR64:$dst), (ins VR64:$src1, i8imm:$src2),
+ "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR64:$dst,
+ (v4i16 (mmx_pshufw:$src2 VR64:$src1, (undef))))]>;
+def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
+ (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
+ "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR64:$dst,
+ (mmx_pshufw:$src2 (bc_v4i16 (load_mmx addr:$src1)),
+ (undef)))]>;
+
+// -- Conversion Instructions
+let neverHasSideEffects = 1 in {
+def MMX_CVTPD2PIrr : MMX2I<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
+ "cvtpd2pi\t{$src, $dst|$dst, $src}", []>;
+let mayLoad = 1 in
+def MMX_CVTPD2PIrm : MMX2I<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
+ "cvtpd2pi\t{$src, $dst|$dst, $src}", []>;
+
+def MMX_CVTPI2PDrr : MMX2I<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
+ "cvtpi2pd\t{$src, $dst|$dst, $src}", []>;
+let mayLoad = 1 in
+def MMX_CVTPI2PDrm : MMX2I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "cvtpi2pd\t{$src, $dst|$dst, $src}", []>;
+
+def MMX_CVTPI2PSrr : MMXI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
+ "cvtpi2ps\t{$src, $dst|$dst, $src}", []>;
+let mayLoad = 1 in
+def MMX_CVTPI2PSrm : MMXI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "cvtpi2ps\t{$src, $dst|$dst, $src}", []>;
+
+def MMX_CVTPS2PIrr : MMXI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
+ "cvtps2pi\t{$src, $dst|$dst, $src}", []>;
+let mayLoad = 1 in
+def MMX_CVTPS2PIrm : MMXI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
+ "cvtps2pi\t{$src, $dst|$dst, $src}", []>;
+
+def MMX_CVTTPD2PIrr : MMX2I<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
+ "cvttpd2pi\t{$src, $dst|$dst, $src}", []>;
+let mayLoad = 1 in
+def MMX_CVTTPD2PIrm : MMX2I<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
+ "cvttpd2pi\t{$src, $dst|$dst, $src}", []>;
+
+def MMX_CVTTPS2PIrr : MMXI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
+ "cvttps2pi\t{$src, $dst|$dst, $src}", []>;
+let mayLoad = 1 in
+def MMX_CVTTPS2PIrm : MMXI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
+ "cvttps2pi\t{$src, $dst|$dst, $src}", []>;
+} // end neverHasSideEffects
+
+
+// Extract / Insert
+def MMX_X86pextrw : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, []>, []>;
+def MMX_X86pinsrw : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, []>, []>;
+
+def MMX_PEXTRWri : MMXIi8<0xC5, MRMSrcReg,
+ (outs GR32:$dst), (ins VR64:$src1, i16i8imm:$src2),
+ "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (MMX_X86pextrw (v4i16 VR64:$src1),
+ (iPTR imm:$src2)))]>;
+let isTwoAddress = 1 in {
+ def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg,
+ (outs VR64:$dst), (ins VR64:$src1, GR32:$src2, i16i8imm:$src3),
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR64:$dst, (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1),
+ GR32:$src2, (iPTR imm:$src3))))]>;
+ def MMX_PINSRWrmi : MMXIi8<0xC4, MRMSrcMem,
+ (outs VR64:$dst), (ins VR64:$src1, i16mem:$src2, i16i8imm:$src3),
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR64:$dst,
+ (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1),
+ (i32 (anyext (loadi16 addr:$src2))),
+ (iPTR imm:$src3))))]>;
+}
+
+// Mask creation
+def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_mmx_pmovmskb VR64:$src))]>;
+
+// Misc.
+let Uses = [EDI] in
+def MMX_MASKMOVQ : MMXI<0xF7, MRMDestMem, (outs), (ins VR64:$src, VR64:$mask),
+ "maskmovq\t{$mask, $src|$src, $mask}",
+ [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)]>;
+let Uses = [RDI] in
+def MMX_MASKMOVQ64: MMXI64<0xF7, MRMDestMem, (outs), (ins VR64:$src, VR64:$mask),
+ "maskmovq\t{$mask, $src|$src, $mask}",
+ [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>;
+
+//===----------------------------------------------------------------------===//
+// Alias Instructions
+//===----------------------------------------------------------------------===//
+
+// Alias instructions that map zero vector to pxor.
+let isReMaterializable = 1 in {
+ def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins),
+ "pxor\t$dst, $dst",
+ [(set VR64:$dst, (v2i32 immAllZerosV))]>;
+ def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (outs VR64:$dst), (ins),
+ "pcmpeqd\t$dst, $dst",
+ [(set VR64:$dst, (v2i32 immAllOnesV))]>;
+}
+
+let Predicates = [HasMMX] in {
+ def : Pat<(v1i64 immAllZerosV), (MMX_V_SET0)>;
+ def : Pat<(v4i16 immAllZerosV), (MMX_V_SET0)>;
+ def : Pat<(v8i8 immAllZerosV), (MMX_V_SET0)>;
+}
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Store 64-bit integer vector values.
+def : Pat<(store (v8i8 VR64:$src), addr:$dst),
+ (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
+def : Pat<(store (v4i16 VR64:$src), addr:$dst),
+ (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
+def : Pat<(store (v2i32 VR64:$src), addr:$dst),
+ (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
+def : Pat<(store (v2f32 VR64:$src), addr:$dst),
+ (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
+def : Pat<(store (v1i64 VR64:$src), addr:$dst),
+ (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
+
+// Bit convert.
+def : Pat<(v8i8 (bitconvert (v1i64 VR64:$src))), (v8i8 VR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v2i32 VR64:$src))), (v8i8 VR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v2f32 VR64:$src))), (v8i8 VR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v4i16 VR64:$src))), (v8i8 VR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v1i64 VR64:$src))), (v4i16 VR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v2i32 VR64:$src))), (v4i16 VR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v2f32 VR64:$src))), (v4i16 VR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v8i8 VR64:$src))), (v4i16 VR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v1i64 VR64:$src))), (v2i32 VR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v2f32 VR64:$src))), (v2i32 VR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v8i8 VR64:$src))), (v2i32 VR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v1i64 VR64:$src))), (v2f32 VR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v2i32 VR64:$src))), (v2f32 VR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v4i16 VR64:$src))), (v2f32 VR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v8i8 VR64:$src))), (v2f32 VR64:$src)>;
+def : Pat<(v1i64 (bitconvert (v2i32 VR64:$src))), (v1i64 VR64:$src)>;
+def : Pat<(v1i64 (bitconvert (v2f32 VR64:$src))), (v1i64 VR64:$src)>;
+def : Pat<(v1i64 (bitconvert (v4i16 VR64:$src))), (v1i64 VR64:$src)>;
+def : Pat<(v1i64 (bitconvert (v8i8 VR64:$src))), (v1i64 VR64:$src)>;
+
+// 64-bit bit convert.
+def : Pat<(v1i64 (bitconvert (i64 GR64:$src))),
+ (MMX_MOVD64to64rr GR64:$src)>;
+def : Pat<(v2i32 (bitconvert (i64 GR64:$src))),
+ (MMX_MOVD64to64rr GR64:$src)>;
+def : Pat<(v2f32 (bitconvert (i64 GR64:$src))),
+ (MMX_MOVD64to64rr GR64:$src)>;
+def : Pat<(v4i16 (bitconvert (i64 GR64:$src))),
+ (MMX_MOVD64to64rr GR64:$src)>;
+def : Pat<(v8i8 (bitconvert (i64 GR64:$src))),
+ (MMX_MOVD64to64rr GR64:$src)>;
+def : Pat<(i64 (bitconvert (v1i64 VR64:$src))),
+ (MMX_MOVD64from64rr VR64:$src)>;
+def : Pat<(i64 (bitconvert (v2i32 VR64:$src))),
+ (MMX_MOVD64from64rr VR64:$src)>;
+def : Pat<(i64 (bitconvert (v2f32 VR64:$src))),
+ (MMX_MOVD64from64rr VR64:$src)>;
+def : Pat<(i64 (bitconvert (v4i16 VR64:$src))),
+ (MMX_MOVD64from64rr VR64:$src)>;
+def : Pat<(i64 (bitconvert (v8i8 VR64:$src))),
+ (MMX_MOVD64from64rr VR64:$src)>;
+def : Pat<(f64 (bitconvert (v1i64 VR64:$src))),
+ (MMX_MOVQ2FR64rr VR64:$src)>;
+def : Pat<(f64 (bitconvert (v2i32 VR64:$src))),
+ (MMX_MOVQ2FR64rr VR64:$src)>;
+def : Pat<(f64 (bitconvert (v4i16 VR64:$src))),
+ (MMX_MOVQ2FR64rr VR64:$src)>;
+def : Pat<(f64 (bitconvert (v8i8 VR64:$src))),
+ (MMX_MOVQ2FR64rr VR64:$src)>;
+
+// Move scalar to MMX zero-extended
+// movd to MMX register zero-extends
+let AddedComplexity = 15 in {
+ def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))))),
+ (MMX_MOVZDI2PDIrr GR32:$src)>;
+ def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))))),
+ (MMX_MOVZDI2PDIrr GR32:$src)>;
+}
+
+let AddedComplexity = 20 in {
+ def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (load_mmx addr:$src)))),
+ (MMX_MOVZDI2PDIrm addr:$src)>;
+ def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (load_mmx addr:$src)))),
+ (MMX_MOVZDI2PDIrm addr:$src)>;
+ def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))),
+ (MMX_MOVZDI2PDIrm addr:$src)>;
+}
+
+// Clear top half.
+let AddedComplexity = 15 in {
+ def : Pat<(v8i8 (X86vzmovl VR64:$src)),
+ (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
+ def : Pat<(v4i16 (X86vzmovl VR64:$src)),
+ (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
+ def : Pat<(v2i32 (X86vzmovl VR64:$src)),
+ (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
+}
+
+// Scalar to v4i16 / v8i8. The source may be a GR32, but only the lower
+// 8 or 16-bits matter.
+def : Pat<(bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))),
+ (MMX_MOVD64rr GR32:$src)>;
+def : Pat<(bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))),
+ (MMX_MOVD64rr GR32:$src)>;
+
+// Patterns to perform canonical versions of vector shuffling.
+let AddedComplexity = 10 in {
+ def : Pat<(v8i8 (mmx_unpckl_undef VR64:$src, (undef))),
+ (MMX_PUNPCKLBWrr VR64:$src, VR64:$src)>;
+ def : Pat<(v4i16 (mmx_unpckl_undef VR64:$src, (undef))),
+ (MMX_PUNPCKLWDrr VR64:$src, VR64:$src)>;
+ def : Pat<(v2i32 (mmx_unpckl_undef VR64:$src, (undef))),
+ (MMX_PUNPCKLDQrr VR64:$src, VR64:$src)>;
+}
+
+let AddedComplexity = 10 in {
+ def : Pat<(v8i8 (mmx_unpckh_undef VR64:$src, (undef))),
+ (MMX_PUNPCKHBWrr VR64:$src, VR64:$src)>;
+ def : Pat<(v4i16 (mmx_unpckh_undef VR64:$src, (undef))),
+ (MMX_PUNPCKHWDrr VR64:$src, VR64:$src)>;
+ def : Pat<(v2i32 (mmx_unpckh_undef VR64:$src, (undef))),
+ (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>;
+}
+
+// Patterns to perform vector shuffling with a zeroed out vector.
+let AddedComplexity = 20 in {
+ def : Pat<(bc_v2i32 (mmx_unpckl immAllZerosV,
+ (v2i32 (scalar_to_vector (load_mmx addr:$src))))),
+ (MMX_PUNPCKLDQrm VR64:$src, VR64:$src)>;
+}
+
+// Some special case PANDN patterns.
+// FIXME: Get rid of these.
+def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
+ VR64:$src2)),
+ (MMX_PANDNrr VR64:$src1, VR64:$src2)>;
+def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v4i16 immAllOnesV_bc))),
+ VR64:$src2)),
+ (MMX_PANDNrr VR64:$src1, VR64:$src2)>;
+def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8 immAllOnesV_bc))),
+ VR64:$src2)),
+ (MMX_PANDNrr VR64:$src1, VR64:$src2)>;
+
+def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
+ (load addr:$src2))),
+ (MMX_PANDNrm VR64:$src1, addr:$src2)>;
+def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v4i16 immAllOnesV_bc))),
+ (load addr:$src2))),
+ (MMX_PANDNrm VR64:$src1, addr:$src2)>;
+def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8 immAllOnesV_bc))),
+ (load addr:$src2))),
+ (MMX_PANDNrm VR64:$src1, addr:$src2)>;
+
+// Move MMX to lower 64-bit of XMM
+def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v8i8 VR64:$src))))),
+ (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
+def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v4i16 VR64:$src))))),
+ (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
+def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v2i32 VR64:$src))))),
+ (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
+def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v1i64 VR64:$src))))),
+ (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
+
+// Move lower 64-bit of XMM to MMX.
+def : Pat<(v2i32 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))),
+ (v2i32 (MMX_MOVDQ2Qrr VR128:$src))>;
+def : Pat<(v4i16 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))),
+ (v4i16 (MMX_MOVDQ2Qrr VR128:$src))>;
+def : Pat<(v8i8 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))),
+ (v8i8 (MMX_MOVDQ2Qrr VR128:$src))>;
+
+// CMOV* - Used to implement the SELECT DAG operation. Expanded by the
+// scheduler into a branch sequence.
+// These are expanded by the scheduler.
+let Uses = [EFLAGS], usesCustomDAGSchedInserter = 1 in {
+ def CMOV_V1I64 : I<0, Pseudo,
+ (outs VR64:$dst), (ins VR64:$t, VR64:$f, i8imm:$cond),
+ "#CMOV_V1I64 PSEUDO!",
+ [(set VR64:$dst,
+ (v1i64 (X86cmov VR64:$t, VR64:$f, imm:$cond,
+ EFLAGS)))]>;
+}
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
new file mode 100644
index 0000000..1fafa46
--- /dev/null
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -0,0 +1,3643 @@
+//====- X86InstrSSE.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 SSE instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// SSE specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
+ SDTCisFP<0>, SDTCisInt<2> ]>;
+def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
+ SDTCisFP<1>, SDTCisVT<3, i8>]>;
+
+def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
+def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
+def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
+def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
+def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
+def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
+def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
+def X86pshufb : SDNode<"X86ISD::PSHUFB",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86pextrb : SDNode<"X86ISD::PEXTRB",
+ SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+def X86pextrw : SDNode<"X86ISD::PEXTRW",
+ SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+def X86pinsrb : SDNode<"X86ISD::PINSRB",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
+def X86pinsrw : SDNode<"X86ISD::PINSRW",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
+def X86insrtps : SDNode<"X86ISD::INSERTPS",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, f32>, SDTCisPtrTy<3>]>>;
+def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
+ SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
+def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
+ [SDNPHasChain, SDNPMayLoad]>;
+def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>;
+def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>;
+def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>;
+def X86cmppd : SDNode<"X86ISD::CMPPD", SDTX86VFCMP>;
+def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqw : SDNode<"X86ISD::PCMPEQW", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>;
+def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
+def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
+def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
+
+//===----------------------------------------------------------------------===//
+// SSE Complex Patterns
+//===----------------------------------------------------------------------===//
+
+// These are 'extloads' from a scalar to the low element of a vector, zeroing
+// the top elements. These are used for the SSE 'ss' and 'sd' instruction
+// forms.
+def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [],
+ [SDNPHasChain, SDNPMayLoad]>;
+def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [],
+ [SDNPHasChain, SDNPMayLoad]>;
+
+def ssmem : Operand<v4f32> {
+ let PrintMethod = "printf32mem";
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm, i8imm);
+}
+def sdmem : Operand<v2f64> {
+ let PrintMethod = "printf64mem";
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm, i8imm);
+}
+
+//===----------------------------------------------------------------------===//
+// SSE pattern fragments
+//===----------------------------------------------------------------------===//
+
+def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
+def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
+def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
+def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
+
+// Like 'store', but always requires vector alignment.
+def alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+// Like 'load', but always requires vector alignment.
+def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+def alignedloadfsf32 : PatFrag<(ops node:$ptr), (f32 (alignedload node:$ptr))>;
+def alignedloadfsf64 : PatFrag<(ops node:$ptr), (f64 (alignedload node:$ptr))>;
+def alignedloadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (alignedload node:$ptr))>;
+def alignedloadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (alignedload node:$ptr))>;
+def alignedloadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (alignedload node:$ptr))>;
+def alignedloadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (alignedload node:$ptr))>;
+
+// Like 'load', but uses special alignment checks suitable for use in
+// memory operands in most SSE instructions, which are required to
+// be naturally aligned on some targets but not on others.
+// FIXME: Actually implement support for targets that don't require the
+// alignment. This probably wants a subtarget predicate.
+def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
+def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
+def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
+def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
+def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
+def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
+def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
+
+// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
+// 16-byte boundary.
+// FIXME: 8 byte alignment for mmx reads is not required
+def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 8;
+}]>;
+
+def memopv8i8 : PatFrag<(ops node:$ptr), (v8i8 (memop64 node:$ptr))>;
+def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>;
+def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>;
+def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>;
+
+def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
+def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
+def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
+def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
+def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
+def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
+
+def vzmovl_v2i64 : PatFrag<(ops node:$src),
+ (bitconvert (v2i64 (X86vzmovl
+ (v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
+def vzmovl_v4i32 : PatFrag<(ops node:$src),
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 node:$src))))))>;
+
+def vzload_v2i64 : PatFrag<(ops node:$src),
+ (bitconvert (v2i64 (X86vzload node:$src)))>;
+
+
+def fp32imm0 : PatLeaf<(f32 fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
+def PSxLDQ_imm : SDNodeXForm<imm, [{
+ // Transformation function: imm >> 3
+ return getI32Imm(N->getZExtValue() >> 3);
+}]>;
+
+// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
+// SHUFP* etc. imm.
+def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShuffleSHUFImmediate(N));
+}]>;
+
+// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
+// PSHUFHW imm.
+def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
+}]>;
+
+// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
+// PSHUFLW imm.
+def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
+}]>;
+
+def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ return SVOp->isSplat() && SVOp->getSplatIndex() == 0;
+}]>;
+
+def movddup : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVDDUPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movhlps : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVHLPSMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movhp : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVHPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movlp : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVLPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movl : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movshdup : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVSHDUPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movsldup : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVSLDUPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckl : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckh : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def pshufd : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_shuf_imm>;
+
+def shufp : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isSHUFPMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_shuf_imm>;
+
+def pshufhw : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isPSHUFHWMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_pshufhw_imm>;
+
+def pshuflw : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_pshuflw_imm>;
+
+//===----------------------------------------------------------------------===//
+// SSE scalar FP Instructions
+//===----------------------------------------------------------------------===//
+
+// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
+// scheduler into a branch sequence.
+// These are expanded by the scheduler.
+let Uses = [EFLAGS], usesCustomDAGSchedInserter = 1 in {
+ def CMOV_FR32 : I<0, Pseudo,
+ (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
+ "#CMOV_FR32 PSEUDO!",
+ [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond,
+ EFLAGS))]>;
+ def CMOV_FR64 : I<0, Pseudo,
+ (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
+ "#CMOV_FR64 PSEUDO!",
+ [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond,
+ EFLAGS))]>;
+ def CMOV_V4F32 : I<0, Pseudo,
+ (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V4F32 PSEUDO!",
+ [(set VR128:$dst,
+ (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V2F64 : I<0, Pseudo,
+ (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V2F64 PSEUDO!",
+ [(set VR128:$dst,
+ (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V2I64 : I<0, Pseudo,
+ (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V2I64 PSEUDO!",
+ [(set VR128:$dst,
+ (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+ EFLAGS)))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE1 Instructions
+//===----------------------------------------------------------------------===//
+
+// Move Instructions
+let neverHasSideEffects = 1 in
+def MOVSSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ "movss\t{$src, $dst|$dst, $src}", []>;
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
+ "movss\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (loadf32 addr:$src))]>;
+def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
+ "movss\t{$src, $dst|$dst, $src}",
+ [(store FR32:$src, addr:$dst)]>;
+
+// Conversion instructions
+def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
+ "cvttss2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (fp_to_sint FR32:$src))]>;
+def CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
+ "cvttss2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
+def CVTSI2SSrr : SSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+ "cvtsi2ss\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (sint_to_fp GR32:$src))]>;
+def CVTSI2SSrm : SSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+ "cvtsi2ss\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
+
+// Match intrinsics which expect XMM operand(s).
+def Int_CVTSS2SIrr : SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "cvtss2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse_cvtss2si VR128:$src))]>;
+def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
+ "cvtss2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse_cvtss2si
+ (load addr:$src)))]>;
+
+// Match intrinisics which expect MM and XMM operand(s).
+def Int_CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
+ "cvtps2pi\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>;
+def Int_CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
+ "cvtps2pi\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (int_x86_sse_cvtps2pi
+ (load addr:$src)))]>;
+def Int_CVTTPS2PIrr: PSI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
+ "cvttps2pi\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (int_x86_sse_cvttps2pi VR128:$src))]>;
+def Int_CVTTPS2PIrm: PSI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
+ "cvttps2pi\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (int_x86_sse_cvttps2pi
+ (load addr:$src)))]>;
+let Constraints = "$src1 = $dst" in {
+ def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR64:$src2),
+ "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1,
+ VR64:$src2))]>;
+ def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
+ "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1,
+ (load addr:$src2)))]>;
+}
+
+// Aliases for intrinsics
+def Int_CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "cvttss2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst,
+ (int_x86_sse_cvttss2si VR128:$src))]>;
+def Int_CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
+ "cvttss2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst,
+ (int_x86_sse_cvttss2si(load addr:$src)))]>;
+
+let Constraints = "$src1 = $dst" in {
+ def Int_CVTSI2SSrr : SSI<0x2A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
+ "cvtsi2ss\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
+ GR32:$src2))]>;
+ def Int_CVTSI2SSrm : SSI<0x2A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
+ "cvtsi2ss\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
+ (loadi32 addr:$src2)))]>;
+}
+
+// Comparison instructions
+let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
+ def CMPSSrr : SSIi8<0xC2, MRMSrcReg,
+ (outs FR32:$dst), (ins FR32:$src1, FR32:$src, SSECC:$cc),
+ "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
+let mayLoad = 1 in
+ def CMPSSrm : SSIi8<0xC2, MRMSrcMem,
+ (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc),
+ "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
+}
+
+let Defs = [EFLAGS] in {
+def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2),
+ "ucomiss\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp FR32:$src1, FR32:$src2), (implicit EFLAGS)]>;
+def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2),
+ "ucomiss\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp FR32:$src1, (loadf32 addr:$src2)),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+// Aliases to match intrinsics which expect XMM operand(s).
+let Constraints = "$src1 = $dst" in {
+ def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
+ "cmp${cc}ss\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
+ VR128:$src, imm:$cc))]>;
+ def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f32mem:$src, SSECC:$cc),
+ "cmp${cc}ss\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
+ (load addr:$src), imm:$cc))]>;
+}
+
+let Defs = [EFLAGS] in {
+def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "ucomiss\t{$src2, $src1|$src1, $src2}",
+ [(X86ucomi (v4f32 VR128:$src1), VR128:$src2),
+ (implicit EFLAGS)]>;
+def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2),
+ "ucomiss\t{$src2, $src1|$src1, $src2}",
+ [(X86ucomi (v4f32 VR128:$src1), (load addr:$src2)),
+ (implicit EFLAGS)]>;
+
+def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "comiss\t{$src2, $src1|$src1, $src2}",
+ [(X86comi (v4f32 VR128:$src1), VR128:$src2),
+ (implicit EFLAGS)]>;
+def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+ "comiss\t{$src2, $src1|$src1, $src2}",
+ [(X86comi (v4f32 VR128:$src1), (load addr:$src2)),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+// Aliases of packed SSE1 instructions for scalar use. These all have names that
+// start with 'Fs'.
+
+// Alias instructions that map fld0 to pxor for sse.
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins),
+ "pxor\t$dst, $dst", [(set FR32:$dst, fp32imm0)]>,
+ Requires<[HasSSE1]>, TB, OpSize;
+
+// Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are
+// disregarded.
+let neverHasSideEffects = 1 in
+def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ "movaps\t{$src, $dst|$dst, $src}", []>;
+
+// Alias instruction to load FR32 from f128mem using movaps. Upper bits are
+// disregarded.
+let canFoldAsLoad = 1 in
+def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
+
+// Alias bitwise logical operations using SSE logical ops on packed FP values.
+let Constraints = "$src1 = $dst" in {
+let isCommutable = 1 in {
+ def FsANDPSrr : PSI<0x54, MRMSrcReg, (outs FR32:$dst),
+ (ins FR32:$src1, FR32:$src2),
+ "andps\t{$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
+ def FsORPSrr : PSI<0x56, MRMSrcReg, (outs FR32:$dst),
+ (ins FR32:$src1, FR32:$src2),
+ "orps\t{$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86for FR32:$src1, FR32:$src2))]>;
+ def FsXORPSrr : PSI<0x57, MRMSrcReg, (outs FR32:$dst),
+ (ins FR32:$src1, FR32:$src2),
+ "xorps\t{$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
+}
+
+def FsANDPSrm : PSI<0x54, MRMSrcMem, (outs FR32:$dst),
+ (ins FR32:$src1, f128mem:$src2),
+ "andps\t{$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86fand FR32:$src1,
+ (memopfsf32 addr:$src2)))]>;
+def FsORPSrm : PSI<0x56, MRMSrcMem, (outs FR32:$dst),
+ (ins FR32:$src1, f128mem:$src2),
+ "orps\t{$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86for FR32:$src1,
+ (memopfsf32 addr:$src2)))]>;
+def FsXORPSrm : PSI<0x57, MRMSrcMem, (outs FR32:$dst),
+ (ins FR32:$src1, f128mem:$src2),
+ "xorps\t{$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86fxor FR32:$src1,
+ (memopfsf32 addr:$src2)))]>;
+
+let neverHasSideEffects = 1 in {
+def FsANDNPSrr : PSI<0x55, MRMSrcReg,
+ (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
+ "andnps\t{$src2, $dst|$dst, $src2}", []>;
+let mayLoad = 1 in
+def FsANDNPSrm : PSI<0x55, MRMSrcMem,
+ (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
+ "andnps\t{$src2, $dst|$dst, $src2}", []>;
+}
+}
+
+/// basic_sse1_fp_binop_rm - SSE1 binops come in both scalar and vector forms.
+///
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation. This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a scalar)
+/// and leaves the top elements unmodified (therefore these cannot be commuted).
+///
+/// These three forms can each be reg+reg or reg+mem, so there are a total of
+/// six "instructions".
+///
+let Constraints = "$src1 = $dst" in {
+multiclass basic_sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F32Int,
+ bit Commutable = 0> {
+ // Scalar operation, reg+reg.
+ def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Scalar operation, reg+mem.
+ def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
+ (ins FR32:$src1, f32mem:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
+
+ // Vector operation, reg+reg.
+ def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector operation, reg+mem.
+ def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
+
+ // Intrinsic operation, reg+reg.
+ def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]>;
+
+ // Intrinsic operation, reg+mem.
+ def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F32Int VR128:$src1,
+ sse_load_f32:$src2))]>;
+}
+}
+
+// Arithmetic instructions
+defm ADD : basic_sse1_fp_binop_rm<0x58, "add", fadd, int_x86_sse_add_ss, 1>;
+defm MUL : basic_sse1_fp_binop_rm<0x59, "mul", fmul, int_x86_sse_mul_ss, 1>;
+defm SUB : basic_sse1_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse_sub_ss>;
+defm DIV : basic_sse1_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse_div_ss>;
+
+/// sse1_fp_binop_rm - Other SSE1 binops
+///
+/// This multiclass is like basic_sse1_fp_binop_rm, with the addition of
+/// instructions for a full-vector intrinsic form. Operations that map
+/// onto C operators don't use this form since they just use the plain
+/// vector form instead of having a separate vector intrinsic form.
+///
+/// This provides a total of eight "instructions".
+///
+let Constraints = "$src1 = $dst" in {
+multiclass sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode,
+ Intrinsic F32Int,
+ Intrinsic V4F32Int,
+ bit Commutable = 0> {
+
+ // Scalar operation, reg+reg.
+ def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Scalar operation, reg+mem.
+ def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
+ (ins FR32:$src1, f32mem:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
+
+ // Vector operation, reg+reg.
+ def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector operation, reg+mem.
+ def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
+
+ // Intrinsic operation, reg+reg.
+ def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Intrinsic operation, reg+mem.
+ def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F32Int VR128:$src1,
+ sse_load_f32:$src2))]>;
+
+ // Vector intrinsic operation, reg+reg.
+ def PSrr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src1, VR128:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector intrinsic operation, reg+mem.
+ def PSrm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src1, (memopv4f32 addr:$src2)))]>;
+}
+}
+
+defm MAX : sse1_fp_binop_rm<0x5F, "max", X86fmax,
+ int_x86_sse_max_ss, int_x86_sse_max_ps>;
+defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin,
+ int_x86_sse_min_ss, int_x86_sse_min_ps>;
+
+//===----------------------------------------------------------------------===//
+// SSE packed FP Instructions
+
+// Move Instructions
+let neverHasSideEffects = 1 in
+def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movaps\t{$src, $dst|$dst, $src}", []>;
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+def MOVAPSrm : PSI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (alignedloadv4f32 addr:$src))]>;
+
+def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
+
+let neverHasSideEffects = 1 in
+def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}", []>;
+let canFoldAsLoad = 1 in
+def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (loadv4f32 addr:$src))]>;
+def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(store (v4f32 VR128:$src), addr:$dst)]>;
+
+// Intrinsic forms of MOVUPS load and store
+let canFoldAsLoad = 1 in
+def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
+def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>;
+
+let Constraints = "$src1 = $dst" in {
+ let AddedComplexity = 20 in {
+ def MOVLPSrm : PSI<0x12, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
+ "movlps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (movlp VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
+ def MOVHPSrm : PSI<0x16, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
+ "movhps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (movhp VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
+ } // AddedComplexity
+} // Constraints = "$src1 = $dst"
+
+
+def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+ (iPTR 0))), addr:$dst)]>;
+
+// v2f64 extract element 1 is always custom lowered to unpack high to low
+// and extract element 0 so the non-store version isn't too horrible.
+def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (unpckh (bc_v2f64 (v4f32 VR128:$src)),
+ (undef)), (iPTR 0))), addr:$dst)]>;
+
+let Constraints = "$src1 = $dst" in {
+let AddedComplexity = 20 in {
+def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movlhps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movhp VR128:$src1, VR128:$src2)))]>;
+
+def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movhlps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>;
+} // AddedComplexity
+} // Constraints = "$src1 = $dst"
+
+let AddedComplexity = 20 in {
+def : Pat<(v4f32 (movddup VR128:$src, (undef))),
+ (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
+def : Pat<(v2i64 (movddup VR128:$src, (undef))),
+ (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
+}
+
+
+
+// Arithmetic
+
+/// sse1_fp_unop_rm - SSE1 unops come in both scalar and vector forms.
+///
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation. This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a
+/// scalar) and leaves the top elements undefined.
+///
+/// And, we have a special variant form for a full-vector intrinsic form.
+///
+/// These four forms can each have a reg or a mem operand, so there are a
+/// total of eight "instructions".
+///
+multiclass sse1_fp_unop_rm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode,
+ Intrinsic F32Int,
+ Intrinsic V4F32Int,
+ bit Commutable = 0> {
+ // Scalar operation, reg.
+ def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode FR32:$src))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Scalar operation, mem.
+ def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode (load addr:$src)))]>;
+
+ // Vector operation, reg.
+ def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector operation, mem.
+ def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>;
+
+ // Intrinsic operation, reg.
+ def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F32Int VR128:$src))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Intrinsic operation, mem.
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
+
+ // Vector intrinsic operation, reg
+ def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector intrinsic operation, mem
+ def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
+}
+
+// Square root.
+defm SQRT : sse1_fp_unop_rm<0x51, "sqrt", fsqrt,
+ int_x86_sse_sqrt_ss, int_x86_sse_sqrt_ps>;
+
+// Reciprocal approximations. Note that these typically require refinement
+// in order to obtain suitable precision.
+defm RSQRT : sse1_fp_unop_rm<0x52, "rsqrt", X86frsqrt,
+ int_x86_sse_rsqrt_ss, int_x86_sse_rsqrt_ps>;
+defm RCP : sse1_fp_unop_rm<0x53, "rcp", X86frcp,
+ int_x86_sse_rcp_ss, int_x86_sse_rcp_ps>;
+
+// Logical
+let Constraints = "$src1 = $dst" in {
+ let isCommutable = 1 in {
+ def ANDPSrr : PSI<0x54, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "andps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64
+ (and VR128:$src1, VR128:$src2)))]>;
+ def ORPSrr : PSI<0x56, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "orps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64
+ (or VR128:$src1, VR128:$src2)))]>;
+ def XORPSrr : PSI<0x57, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "xorps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64
+ (xor VR128:$src1, VR128:$src2)))]>;
+ }
+
+ def ANDPSrm : PSI<0x54, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "andps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (and (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>;
+ def ORPSrm : PSI<0x56, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "orps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (or (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>;
+ def XORPSrm : PSI<0x57, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "xorps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (xor (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>;
+ def ANDNPSrr : PSI<0x55, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "andnps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (and (xor VR128:$src1,
+ (bc_v2i64 (v4i32 immAllOnesV))),
+ VR128:$src2)))]>;
+ def ANDNPSrm : PSI<0x55, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2),
+ "andnps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)),
+ (bc_v2i64 (v4i32 immAllOnesV))),
+ (memopv2i64 addr:$src2))))]>;
+}
+
+let Constraints = "$src1 = $dst" in {
+ def CMPPSrri : PSIi8<0xC2, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
+ "cmp${cc}ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
+ VR128:$src, imm:$cc))]>;
+ def CMPPSrmi : PSIi8<0xC2, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
+ "cmp${cc}ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
+ (memop addr:$src), imm:$cc))]>;
+}
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
+ (CMPPSrri VR128:$src1, VR128:$src2, imm:$cc)>;
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (CMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>;
+
+// Shuffle and unpack instructions
+let Constraints = "$src1 = $dst" in {
+ let isConvertibleToThreeAddress = 1 in // Convert to pshufd
+ def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1,
+ VR128:$src2, i8imm:$src3),
+ "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
+ def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1,
+ f128mem:$src2, i8imm:$src3),
+ "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (v4f32 (shufp:$src3
+ VR128:$src1, (memopv4f32 addr:$src2))))]>;
+
+ let AddedComplexity = 10 in {
+ def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "unpckhps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (unpckh VR128:$src1, VR128:$src2)))]>;
+ def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "unpckhps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (unpckh VR128:$src1,
+ (memopv4f32 addr:$src2))))]>;
+
+ def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "unpcklps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (unpckl VR128:$src1, VR128:$src2)))]>;
+ def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "unpcklps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (unpckl VR128:$src1, (memopv4f32 addr:$src2)))]>;
+ } // AddedComplexity
+} // Constraints = "$src1 = $dst"
+
+// Mask creation
+def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "movmskps\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>;
+def MOVMSKPDrr : PDI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_movmsk_pd VR128:$src))]>;
+
+// Prefetch intrinsic.
+def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
+ "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>;
+def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
+ "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>;
+def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
+ "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>;
+def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
+ "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>;
+
+// Non-temporal stores
+def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
+
+// Load, store, and memory fence
+def SFENCE : PSI<0xAE, MRM7r, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>;
+
+// MXCSR register
+def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
+def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
+
+// Alias instructions that map zero vector to pxor / xorp* for sse.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-zeros value if folding it would be beneficial.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1 in
+def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins),
+ "xorps\t$dst, $dst",
+ [(set VR128:$dst, (v4i32 immAllZerosV))]>;
+
+let Predicates = [HasSSE1] in {
+ def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
+ def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
+ def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
+ def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
+ def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
+}
+
+// FR32 to 128-bit vector conversion.
+let isAsCheapAsAMove = 1 in
+def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR32:$src),
+ "movss\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4f32 (scalar_to_vector FR32:$src)))]>;
+def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
+ "movss\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
+
+// FIXME: may not be able to eliminate this movss with coalescing the src and
+// dest register classes are different. We really want to write this pattern
+// like this:
+// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+// (f32 FR32:$src)>;
+let isAsCheapAsAMove = 1 in
+def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins VR128:$src),
+ "movss\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (vector_extract (v4f32 VR128:$src),
+ (iPTR 0)))]>;
+def MOVPS2SSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
+ "movss\t{$src, $dst|$dst, $src}",
+ [(store (f32 (vector_extract (v4f32 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+
+
+// Move to lower bits of a VR128, leaving upper bits alone.
+// Three operand (but two address) aliases.
+let Constraints = "$src1 = $dst" in {
+let neverHasSideEffects = 1 in
+ def MOVLSS2PSrr : SSI<0x10, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
+ "movss\t{$src2, $dst|$dst, $src2}", []>;
+
+ let AddedComplexity = 15 in
+ def MOVLPSrr : SSI<0x10, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "movss\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movl VR128:$src1, VR128:$src2)))]>;
+}
+
+// Move to lower bits of a VR128 and zeroing upper bits.
+// Loading from memory automatically zeroing upper bits.
+let AddedComplexity = 20 in
+def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
+ "movss\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4f32 (X86vzmovl (v4f32 (scalar_to_vector
+ (loadf32 addr:$src))))))]>;
+
+def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
+ (MOVZSS2PSrm addr:$src)>;
+
+//===----------------------------------------------------------------------===//
+// SSE2 Instructions
+//===----------------------------------------------------------------------===//
+
+// Move Instructions
+let neverHasSideEffects = 1 in
+def MOVSDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+ "movsd\t{$src, $dst|$dst, $src}", []>;
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (loadf64 addr:$src))]>;
+def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(store FR64:$src, addr:$dst)]>;
+
+// Conversion instructions
+def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
+ "cvttsd2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (fp_to_sint FR64:$src))]>;
+def CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f64mem:$src),
+ "cvttsd2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
+def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
+ "cvtsd2ss\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (fround FR64:$src))]>;
+def CVTSD2SSrm : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
+ "cvtsd2ss\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
+def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src),
+ "cvtsi2sd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (sint_to_fp GR32:$src))]>;
+def CVTSI2SDrm : SDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i32mem:$src),
+ "cvtsi2sd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
+
+// SSE2 instructions with XS prefix
+def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
+ "cvtss2sd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (fextend FR32:$src))]>, XS,
+ Requires<[HasSSE2]>;
+def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
+ "cvtss2sd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
+ Requires<[HasSSE2]>;
+
+// Match intrinsics which expect XMM operand(s).
+def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "cvtsd2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_cvtsd2si VR128:$src))]>;
+def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
+ "cvtsd2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_cvtsd2si
+ (load addr:$src)))]>;
+
+// Match intrinisics which expect MM and XMM operand(s).
+def Int_CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
+ "cvtpd2pi\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>;
+def Int_CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
+ "cvtpd2pi\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (int_x86_sse_cvtpd2pi
+ (memop addr:$src)))]>;
+def Int_CVTTPD2PIrr: PDI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
+ "cvttpd2pi\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (int_x86_sse_cvttpd2pi VR128:$src))]>;
+def Int_CVTTPD2PIrm: PDI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
+ "cvttpd2pi\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (int_x86_sse_cvttpd2pi
+ (memop addr:$src)))]>;
+def Int_CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
+ "cvtpi2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_cvtpi2pd VR64:$src))]>;
+def Int_CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "cvtpi2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_cvtpi2pd
+ (load addr:$src)))]>;
+
+// Aliases for intrinsics
+def Int_CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "cvttsd2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst,
+ (int_x86_sse2_cvttsd2si VR128:$src))]>;
+def Int_CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
+ "cvttsd2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_cvttsd2si
+ (load addr:$src)))]>;
+
+// Comparison instructions
+let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
+ def CMPSDrr : SDIi8<0xC2, MRMSrcReg,
+ (outs FR64:$dst), (ins FR64:$src1, FR64:$src, SSECC:$cc),
+ "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
+let mayLoad = 1 in
+ def CMPSDrm : SDIi8<0xC2, MRMSrcMem,
+ (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc),
+ "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
+}
+
+let Defs = [EFLAGS] in {
+def UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins FR64:$src1, FR64:$src2),
+ "ucomisd\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp FR64:$src1, FR64:$src2), (implicit EFLAGS)]>;
+def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2),
+ "ucomisd\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp FR64:$src1, (loadf64 addr:$src2)),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+// Aliases to match intrinsics which expect XMM operand(s).
+let Constraints = "$src1 = $dst" in {
+ def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
+ "cmp${cc}sd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
+ VR128:$src, imm:$cc))]>;
+ def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src, SSECC:$cc),
+ "cmp${cc}sd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
+ (load addr:$src), imm:$cc))]>;
+}
+
+let Defs = [EFLAGS] in {
+def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "ucomisd\t{$src2, $src1|$src1, $src2}",
+ [(X86ucomi (v2f64 VR128:$src1), (v2f64 VR128:$src2)),
+ (implicit EFLAGS)]>;
+def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2),
+ "ucomisd\t{$src2, $src1|$src1, $src2}",
+ [(X86ucomi (v2f64 VR128:$src1), (load addr:$src2)),
+ (implicit EFLAGS)]>;
+
+def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "comisd\t{$src2, $src1|$src1, $src2}",
+ [(X86comi (v2f64 VR128:$src1), (v2f64 VR128:$src2)),
+ (implicit EFLAGS)]>;
+def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+ "comisd\t{$src2, $src1|$src1, $src2}",
+ [(X86comi (v2f64 VR128:$src1), (load addr:$src2)),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+// Aliases of packed SSE2 instructions for scalar use. These all have names that
+// start with 'Fs'.
+
+// Alias instructions that map fld0 to pxor for sse.
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins),
+ "pxor\t$dst, $dst", [(set FR64:$dst, fpimm0)]>,
+ Requires<[HasSSE2]>, TB, OpSize;
+
+// Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are
+// disregarded.
+let neverHasSideEffects = 1 in
+def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+ "movapd\t{$src, $dst|$dst, $src}", []>;
+
+// Alias instruction to load FR64 from f128mem using movapd. Upper bits are
+// disregarded.
+let canFoldAsLoad = 1 in
+def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
+
+// Alias bitwise logical operations using SSE logical ops on packed FP values.
+let Constraints = "$src1 = $dst" in {
+let isCommutable = 1 in {
+ def FsANDPDrr : PDI<0x54, MRMSrcReg, (outs FR64:$dst),
+ (ins FR64:$src1, FR64:$src2),
+ "andpd\t{$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
+ def FsORPDrr : PDI<0x56, MRMSrcReg, (outs FR64:$dst),
+ (ins FR64:$src1, FR64:$src2),
+ "orpd\t{$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86for FR64:$src1, FR64:$src2))]>;
+ def FsXORPDrr : PDI<0x57, MRMSrcReg, (outs FR64:$dst),
+ (ins FR64:$src1, FR64:$src2),
+ "xorpd\t{$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
+}
+
+def FsANDPDrm : PDI<0x54, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1, f128mem:$src2),
+ "andpd\t{$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86fand FR64:$src1,
+ (memopfsf64 addr:$src2)))]>;
+def FsORPDrm : PDI<0x56, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1, f128mem:$src2),
+ "orpd\t{$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86for FR64:$src1,
+ (memopfsf64 addr:$src2)))]>;
+def FsXORPDrm : PDI<0x57, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1, f128mem:$src2),
+ "xorpd\t{$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86fxor FR64:$src1,
+ (memopfsf64 addr:$src2)))]>;
+
+let neverHasSideEffects = 1 in {
+def FsANDNPDrr : PDI<0x55, MRMSrcReg,
+ (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
+ "andnpd\t{$src2, $dst|$dst, $src2}", []>;
+let mayLoad = 1 in
+def FsANDNPDrm : PDI<0x55, MRMSrcMem,
+ (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
+ "andnpd\t{$src2, $dst|$dst, $src2}", []>;
+}
+}
+
+/// basic_sse2_fp_binop_rm - SSE2 binops come in both scalar and vector forms.
+///
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation. This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a scalar)
+/// and leaves the top elements unmodified (therefore these cannot be commuted).
+///
+/// These three forms can each be reg+reg or reg+mem, so there are a total of
+/// six "instructions".
+///
+let Constraints = "$src1 = $dst" in {
+multiclass basic_sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F64Int,
+ bit Commutable = 0> {
+ // Scalar operation, reg+reg.
+ def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
+ !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+ [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Scalar operation, reg+mem.
+ def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1, f64mem:$src2),
+ !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+ [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
+
+ // Vector operation, reg+reg.
+ def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector operation, reg+mem.
+ def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
+
+ // Intrinsic operation, reg+reg.
+ def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]>;
+
+ // Intrinsic operation, reg+mem.
+ def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, sdmem:$src2),
+ !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F64Int VR128:$src1,
+ sse_load_f64:$src2))]>;
+}
+}
+
+// Arithmetic instructions
+defm ADD : basic_sse2_fp_binop_rm<0x58, "add", fadd, int_x86_sse2_add_sd, 1>;
+defm MUL : basic_sse2_fp_binop_rm<0x59, "mul", fmul, int_x86_sse2_mul_sd, 1>;
+defm SUB : basic_sse2_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse2_sub_sd>;
+defm DIV : basic_sse2_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse2_div_sd>;
+
+/// sse2_fp_binop_rm - Other SSE2 binops
+///
+/// This multiclass is like basic_sse2_fp_binop_rm, with the addition of
+/// instructions for a full-vector intrinsic form. Operations that map
+/// onto C operators don't use this form since they just use the plain
+/// vector form instead of having a separate vector intrinsic form.
+///
+/// This provides a total of eight "instructions".
+///
+let Constraints = "$src1 = $dst" in {
+multiclass sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode,
+ Intrinsic F64Int,
+ Intrinsic V2F64Int,
+ bit Commutable = 0> {
+
+ // Scalar operation, reg+reg.
+ def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
+ !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+ [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Scalar operation, reg+mem.
+ def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1, f64mem:$src2),
+ !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+ [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
+
+ // Vector operation, reg+reg.
+ def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector operation, reg+mem.
+ def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
+
+ // Intrinsic operation, reg+reg.
+ def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Intrinsic operation, reg+mem.
+ def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, sdmem:$src2),
+ !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F64Int VR128:$src1,
+ sse_load_f64:$src2))]>;
+
+ // Vector intrinsic operation, reg+reg.
+ def PDrr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (V2F64Int VR128:$src1, VR128:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector intrinsic operation, reg+mem.
+ def PDrm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (V2F64Int VR128:$src1,
+ (memopv2f64 addr:$src2)))]>;
+}
+}
+
+defm MAX : sse2_fp_binop_rm<0x5F, "max", X86fmax,
+ int_x86_sse2_max_sd, int_x86_sse2_max_pd>;
+defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin,
+ int_x86_sse2_min_sd, int_x86_sse2_min_pd>;
+
+//===----------------------------------------------------------------------===//
+// SSE packed FP Instructions
+
+// Move Instructions
+let neverHasSideEffects = 1 in
+def MOVAPDrr : PDI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}", []>;
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+def MOVAPDrm : PDI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (alignedloadv2f64 addr:$src))]>;
+
+def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
+
+let neverHasSideEffects = 1 in
+def MOVUPDrr : PDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}", []>;
+let canFoldAsLoad = 1 in
+def MOVUPDrm : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (loadv2f64 addr:$src))]>;
+def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(store (v2f64 VR128:$src), addr:$dst)]>;
+
+// Intrinsic forms of MOVUPD load and store
+def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>;
+def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>;
+
+let Constraints = "$src1 = $dst" in {
+ let AddedComplexity = 20 in {
+ def MOVLPDrm : PDI<0x12, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
+ "movlpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2f64 (movlp VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))))]>;
+ def MOVHPDrm : PDI<0x16, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
+ "movhpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2f64 (movhp VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))))]>;
+ } // AddedComplexity
+} // Constraints = "$src1 = $dst"
+
+def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (v2f64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+
+// v2f64 extract element 1 is always custom lowered to unpack high to low
+// and extract element 0 so the non-store version isn't too horrible.
+def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (v2f64 (unpckh VR128:$src, (undef))),
+ (iPTR 0))), addr:$dst)]>;
+
+// SSE2 instructions without OpSize prefix
+def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
+ TB, Requires<[HasSSE2]>;
+def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
+ (bitconvert (memopv2i64 addr:$src))))]>,
+ TB, Requires<[HasSSE2]>;
+
+// SSE2 instructions with XS prefix
+def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
+ XS, Requires<[HasSSE2]>;
+def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
+ (bitconvert (memopv2i64 addr:$src))))]>,
+ XS, Requires<[HasSSE2]>;
+
+def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
+def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq
+ (memop addr:$src)))]>;
+// SSE2 packed instructions with XS prefix
+def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))]>,
+ XS, Requires<[HasSSE2]>;
+def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttps2dq
+ (memop addr:$src)))]>,
+ XS, Requires<[HasSSE2]>;
+
+// SSE2 packed instructions with XD prefix
+def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+ XD, Requires<[HasSSE2]>;
+def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
+ (memop addr:$src)))]>,
+ XD, Requires<[HasSSE2]>;
+
+def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
+def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+ (memop addr:$src)))]>;
+
+// SSE2 instructions without OpSize prefix
+def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
+ TB, Requires<[HasSSE2]>;
+def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd
+ (load addr:$src)))]>,
+ TB, Requires<[HasSSE2]>;
+
+def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
+def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
+ (memop addr:$src)))]>;
+
+// Match intrinsics which expect XMM operand(s).
+// Aliases for intrinsics
+let Constraints = "$src1 = $dst" in {
+def Int_CVTSI2SDrr: SDI<0x2A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
+ "cvtsi2sd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
+ GR32:$src2))]>;
+def Int_CVTSI2SDrm: SDI<0x2A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
+ "cvtsi2sd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
+ (loadi32 addr:$src2)))]>;
+def Int_CVTSD2SSrr: SDI<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
+ VR128:$src2))]>;
+def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
+ "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
+ (load addr:$src2)))]>;
+def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "cvtss2sd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ VR128:$src2))]>, XS,
+ Requires<[HasSSE2]>;
+def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+ "cvtss2sd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ (load addr:$src2)))]>, XS,
+ Requires<[HasSSE2]>;
+}
+
+// Arithmetic
+
+/// sse2_fp_unop_rm - SSE2 unops come in both scalar and vector forms.
+///
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation. This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a
+/// scalar) and leaves the top elements undefined.
+///
+/// And, we have a special variant form for a full-vector intrinsic form.
+///
+/// These four forms can each have a reg or a mem operand, so there are a
+/// total of eight "instructions".
+///
+multiclass sse2_fp_unop_rm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode,
+ Intrinsic F64Int,
+ Intrinsic V2F64Int,
+ bit Commutable = 0> {
+ // Scalar operation, reg.
+ def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set FR64:$dst, (OpNode FR64:$src))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Scalar operation, mem.
+ def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set FR64:$dst, (OpNode (load addr:$src)))]>;
+
+ // Vector operation, reg.
+ def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector operation, mem.
+ def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>;
+
+ // Intrinsic operation, reg.
+ def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F64Int VR128:$src))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Intrinsic operation, mem.
+ def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
+
+ // Vector intrinsic operation, reg
+ def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V2F64Int VR128:$src))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector intrinsic operation, mem
+ def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>;
+}
+
+// Square root.
+defm SQRT : sse2_fp_unop_rm<0x51, "sqrt", fsqrt,
+ int_x86_sse2_sqrt_sd, int_x86_sse2_sqrt_pd>;
+
+// There is no f64 version of the reciprocal approximation instructions.
+
+// Logical
+let Constraints = "$src1 = $dst" in {
+ let isCommutable = 1 in {
+ def ANDPDrr : PDI<0x54, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "andpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (and (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64 VR128:$src2))))]>;
+ def ORPDrr : PDI<0x56, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "orpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (or (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64 VR128:$src2))))]>;
+ def XORPDrr : PDI<0x57, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "xorpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (xor (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64 VR128:$src2))))]>;
+ }
+
+ def ANDPDrm : PDI<0x54, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "andpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (and (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>;
+ def ORPDrm : PDI<0x56, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "orpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (or (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>;
+ def XORPDrm : PDI<0x57, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "xorpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (xor (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>;
+ def ANDNPDrr : PDI<0x55, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "andnpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
+ (bc_v2i64 (v2f64 VR128:$src2))))]>;
+ def ANDNPDrm : PDI<0x55, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2),
+ "andnpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
+ (memopv2i64 addr:$src2)))]>;
+}
+
+let Constraints = "$src1 = $dst" in {
+ def CMPPDrri : PDIi8<0xC2, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
+ "cmp${cc}pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
+ VR128:$src, imm:$cc))]>;
+ def CMPPDrmi : PDIi8<0xC2, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
+ "cmp${cc}pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
+ (memop addr:$src), imm:$cc))]>;
+}
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
+ (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+
+// Shuffle and unpack instructions
+let Constraints = "$src1 = $dst" in {
+ def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
+ def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1,
+ f128mem:$src2, i8imm:$src3),
+ "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (v2f64 (shufp:$src3
+ VR128:$src1, (memopv2f64 addr:$src2))))]>;
+
+ let AddedComplexity = 10 in {
+ def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "unpckhpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2f64 (unpckh VR128:$src1, VR128:$src2)))]>;
+ def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "unpckhpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2f64 (unpckh VR128:$src1,
+ (memopv2f64 addr:$src2))))]>;
+
+ def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "unpcklpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2f64 (unpckl VR128:$src1, VR128:$src2)))]>;
+ def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "unpcklpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (unpckl VR128:$src1, (memopv2f64 addr:$src2)))]>;
+ } // AddedComplexity
+} // Constraints = "$src1 = $dst"
+
+
+//===----------------------------------------------------------------------===//
+// SSE integer instructions
+
+// Move Instructions
+let neverHasSideEffects = 1 in
+def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>;
+let canFoldAsLoad = 1, mayLoad = 1 in
+def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}",
+ [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
+let mayStore = 1 in
+def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}",
+ [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
+let canFoldAsLoad = 1, mayLoad = 1 in
+def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
+ XS, Requires<[HasSSE2]>;
+let mayStore = 1 in
+def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
+ XS, Requires<[HasSSE2]>;
+
+// Intrinsic forms of MOVDQU load and store
+let canFoldAsLoad = 1 in
+def MOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
+ XS, Requires<[HasSSE2]>;
+def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
+ XS, Requires<[HasSSE2]>;
+
+let Constraints = "$src1 = $dst" in {
+
+multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+ bit Commutable = 0> {
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> {
+ let isCommutable = Commutable;
+ }
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))))]>;
+}
+
+multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
+ string OpcodeStr,
+ Intrinsic IntId, Intrinsic IntId2> {
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))))]>;
+ def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
+}
+
+/// PDI_binop_rm - Simple SSE2 binary operator.
+multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, bit Commutable = 0> {
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2)))))]>;
+}
+
+/// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64.
+///
+/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew
+/// to collapse (bitconvert VT to VT) into its operand.
+///
+multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ bit Commutable = 0> {
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpNode VR128:$src1,(memopv2i64 addr:$src2)))]>;
+}
+
+} // Constraints = "$src1 = $dst"
+
+// 128-bit Integer Arithmetic
+
+defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
+defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
+defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
+defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
+
+defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
+defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
+defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
+defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
+
+defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>;
+defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>;
+defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>;
+defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>;
+
+defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>;
+defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>;
+defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>;
+defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>;
+
+defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
+
+defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>;
+defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w , 1>;
+defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>;
+
+defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>;
+
+defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
+defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
+
+
+defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
+defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
+defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
+defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
+defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>;
+
+
+defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
+ int_x86_sse2_psll_w, int_x86_sse2_pslli_w>;
+defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
+ int_x86_sse2_psll_d, int_x86_sse2_pslli_d>;
+defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
+ int_x86_sse2_psll_q, int_x86_sse2_pslli_q>;
+
+defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
+ int_x86_sse2_psrl_w, int_x86_sse2_psrli_w>;
+defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
+ int_x86_sse2_psrl_d, int_x86_sse2_psrli_d>;
+defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
+ int_x86_sse2_psrl_q, int_x86_sse2_psrli_q>;
+
+defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
+ int_x86_sse2_psra_w, int_x86_sse2_psrai_w>;
+defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
+ int_x86_sse2_psra_d, int_x86_sse2_psrai_d>;
+
+// 128-bit logical shifts.
+let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
+ def PSLLDQri : PDIi8<0x73, MRM7r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "pslldq\t{$src2, $dst|$dst, $src2}", []>;
+ def PSRLDQri : PDIi8<0x73, MRM3r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "psrldq\t{$src2, $dst|$dst, $src2}", []>;
+ // PSRADQri doesn't exist in SSE[1-3].
+}
+
+let Predicates = [HasSSE2] in {
+ def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
+ (v2i64 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
+ def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
+ (v2i64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
+ def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
+ (v2i64 (PSLLDQri VR128:$src1, imm:$src2))>;
+ def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
+ (v2i64 (PSRLDQri VR128:$src1, imm:$src2))>;
+ def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
+ (v2f64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
+
+ // Shift up / down and insert zero's.
+ def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
+ (v2i64 (PSLLDQri VR128:$src, (PSxLDQ_imm imm:$amt)))>;
+ def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
+ (v2i64 (PSRLDQri VR128:$src, (PSxLDQ_imm imm:$amt)))>;
+}
+
+// Logical
+defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
+defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or , 1>;
+defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
+
+let Constraints = "$src1 = $dst" in {
+ def PANDNrr : PDI<0xDF, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "pandn\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+ VR128:$src2)))]>;
+
+ def PANDNrm : PDI<0xDF, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "pandn\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+ (memopv2i64 addr:$src2))))]>;
+}
+
+// SSE2 Integer comparison
+defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b>;
+defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w>;
+defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d>;
+defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
+defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
+defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
+
+def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
+ (PCMPEQBrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
+ (PCMPEQBrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
+ (PCMPEQWrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
+ (PCMPEQWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
+ (PCMPEQDrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
+ (PCMPEQDrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
+ (PCMPGTBrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
+ (PCMPGTBrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
+ (PCMPGTWrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
+ (PCMPGTWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
+ (PCMPGTDrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
+ (PCMPGTDrm VR128:$src1, addr:$src2)>;
+
+
+// Pack instructions
+defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>;
+defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
+defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
+
+// Shuffle and unpack instructions
+def PSHUFDri : PDIi8<0x70, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+ "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v4i32 (pshufd:$src2
+ VR128:$src1, (undef))))]>;
+def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
+ (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+ "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v4i32 (pshufd:$src2
+ (bc_v4i32(memopv2i64 addr:$src1)),
+ (undef))))]>;
+
+// SSE2 with ImmT == Imm8 and XS prefix.
+def PSHUFHWri : Ii8<0x70, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+ "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v8i16 (pshufhw:$src2 VR128:$src1,
+ (undef))))]>,
+ XS, Requires<[HasSSE2]>;
+def PSHUFHWmi : Ii8<0x70, MRMSrcMem,
+ (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+ "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v8i16 (pshufhw:$src2
+ (bc_v8i16 (memopv2i64 addr:$src1)),
+ (undef))))]>,
+ XS, Requires<[HasSSE2]>;
+
+// SSE2 with ImmT == Imm8 and XD prefix.
+def PSHUFLWri : Ii8<0x70, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+ "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v8i16 (pshuflw:$src2 VR128:$src1,
+ (undef))))]>,
+ XD, Requires<[HasSSE2]>;
+def PSHUFLWmi : Ii8<0x70, MRMSrcMem,
+ (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+ "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v8i16 (pshuflw:$src2
+ (bc_v8i16 (memopv2i64 addr:$src1)),
+ (undef))))]>,
+ XD, Requires<[HasSSE2]>;
+
+
+let Constraints = "$src1 = $dst" in {
+ def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "punpcklbw\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v16i8 (unpckl VR128:$src1, VR128:$src2)))]>;
+ def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "punpcklbw\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (unpckl VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2))))]>;
+ def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "punpcklwd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v8i16 (unpckl VR128:$src1, VR128:$src2)))]>;
+ def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "punpcklwd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (unpckl VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2))))]>;
+ def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "punpckldq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4i32 (unpckl VR128:$src1, VR128:$src2)))]>;
+ def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "punpckldq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (unpckl VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2))))]>;
+ def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "punpcklqdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>;
+ def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "punpcklqdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (unpckl VR128:$src1,
+ (memopv2i64 addr:$src2))))]>;
+
+ def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "punpckhbw\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v16i8 (unpckh VR128:$src1, VR128:$src2)))]>;
+ def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "punpckhbw\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (unpckh VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2))))]>;
+ def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "punpckhwd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v8i16 (unpckh VR128:$src1, VR128:$src2)))]>;
+ def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "punpckhwd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (unpckh VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2))))]>;
+ def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "punpckhdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4i32 (unpckh VR128:$src1, VR128:$src2)))]>;
+ def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "punpckhdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (unpckh VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2))))]>;
+ def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "punpckhqdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>;
+ def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "punpckhqdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (unpckh VR128:$src1,
+ (memopv2i64 addr:$src2))))]>;
+}
+
+// Extract / Insert
+def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
+ (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
+ imm:$src2))]>;
+let Constraints = "$src1 = $dst" in {
+ def PINSRWrri : PDIi8<0xC4, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1,
+ GR32:$src2, i32i8imm:$src3),
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>;
+ def PINSRWrmi : PDIi8<0xC4, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1,
+ i16mem:$src2, i32i8imm:$src3),
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
+ imm:$src3))]>;
+}
+
+// Mask creation
+def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
+
+// Conditional store
+let Uses = [EDI] in
+def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
+
+let Uses = [RDI] in
+def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
+
+// Non-temporal stores
+def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
+def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
+def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "movnti\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
+ TB, Requires<[HasSSE2]>;
+
+// Flush cache
+def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
+ "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
+ TB, Requires<[HasSSE2]>;
+
+// Load, store, and memory fence
+def LFENCE : I<0xAE, MRM5r, (outs), (ins),
+ "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
+def MFENCE : I<0xAE, MRM6r, (outs), (ins),
+ "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+
+//TODO: custom lower this so as to never even generate the noop
+def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
+ (i8 0)), (NOOP)>;
+def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
+def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
+def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
+ (i8 1)), (MFENCE)>;
+
+// Alias instructions that map zero vector to pxor / xorp* for sse.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-ones value if folding it would be beneficial.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1 in
+ def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins),
+ "pcmpeqd\t$dst, $dst",
+ [(set VR128:$dst, (v4i32 immAllOnesV))]>;
+
+// FR64 to 128-bit vector conversion.
+let isAsCheapAsAMove = 1 in
+def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2f64 (scalar_to_vector FR64:$src)))]>;
+def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
+
+def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector GR32:$src)))]>;
+def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+
+def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert GR32:$src))]>;
+
+def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
+
+// SSE2 instructions with XS prefix
+def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+ Requires<[HasSSE2]>;
+def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+
+// FIXME: may not be able to eliminate this movss with coalescing the src and
+// dest register classes are different. We really want to write this pattern
+// like this:
+// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+// (f32 FR32:$src)>;
+let isAsCheapAsAMove = 1 in
+def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins VR128:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (vector_extract (v2f64 VR128:$src),
+ (iPTR 0)))]>;
+def MOVPD2SDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (v2f64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
+ (iPTR 0)))]>;
+def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (vector_extract (v4i32 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+
+def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (bitconvert FR32:$src))]>;
+def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
+
+
+// Move to lower bits of a VR128, leaving upper bits alone.
+// Three operand (but two address) aliases.
+let Constraints = "$src1 = $dst" in {
+ let neverHasSideEffects = 1 in
+ def MOVLSD2PDrr : SDI<0x10, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
+ "movsd\t{$src2, $dst|$dst, $src2}", []>;
+
+ let AddedComplexity = 15 in
+ def MOVLPDrr : SDI<0x10, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "movsd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2f64 (movl VR128:$src1, VR128:$src2)))]>;
+}
+
+// Store / copy lower 64-bits of a XMM register.
+def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
+
+// Move to lower bits of a VR128 and zeroing upper bits.
+// Loading from memory automatically zeroing upper bits.
+let AddedComplexity = 20 in {
+def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2f64 (X86vzmovl (v2f64 (scalar_to_vector
+ (loadf64 addr:$src))))))]>;
+
+def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+ (MOVZSD2PDrm addr:$src)>;
+def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+ (MOVZSD2PDrm addr:$src)>;
+def : Pat<(v2f64 (X86vzload addr:$src)), (MOVZSD2PDrm addr:$src)>;
+}
+
+// movd / movq to XMM register zero-extends
+let AddedComplexity = 15 in {
+def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector GR32:$src)))))]>;
+// This is X86-64 only.
+def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (v2i64 (scalar_to_vector GR64:$src)))))]>;
+}
+
+let AddedComplexity = 20 in {
+def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
+ (loadi32 addr:$src))))))]>;
+
+def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
+ (MOVZDI2PDIrm addr:$src)>;
+def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
+ (MOVZDI2PDIrm addr:$src)>;
+def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ (MOVZDI2PDIrm addr:$src)>;
+
+def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
+ (loadi64 addr:$src))))))]>, XS,
+ Requires<[HasSSE2]>;
+
+def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+ (MOVZQI2PQIrm addr:$src)>;
+def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
+ (MOVZQI2PQIrm addr:$src)>;
+def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
+}
+
+// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
+// IA32 document. movq xmm1, xmm2 does clear the high bits.
+let AddedComplexity = 15 in
+def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
+ XS, Requires<[HasSSE2]>;
+
+let AddedComplexity = 20 in {
+def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (loadv2i64 addr:$src))))]>,
+ XS, Requires<[HasSSE2]>;
+
+def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
+ (MOVZPQILo2PQIrm addr:$src)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE3 Instructions
+//===----------------------------------------------------------------------===//
+
+// Move Instructions
+def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movshdup\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4f32 (movshdup
+ VR128:$src, (undef))))]>;
+def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "movshdup\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (movshdup
+ (memopv4f32 addr:$src), (undef)))]>;
+
+def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movsldup\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4f32 (movsldup
+ VR128:$src, (undef))))]>;
+def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "movsldup\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (movsldup
+ (memopv4f32 addr:$src), (undef)))]>;
+
+def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movddup\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>;
+def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "movddup\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)),
+ (undef))))]>;
+
+def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
+ (undef)),
+ (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+let AddedComplexity = 5 in {
+def : Pat<(movddup (memopv2f64 addr:$src), (undef)),
+ (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)),
+ (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+def : Pat<(movddup (memopv2i64 addr:$src), (undef)),
+ (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)),
+ (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+}
+
+// Arithmetic
+let Constraints = "$src1 = $dst" in {
+ def ADDSUBPSrr : S3DI<0xD0, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "addsubps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
+ VR128:$src2))]>;
+ def ADDSUBPSrm : S3DI<0xD0, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "addsubps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
+ (memop addr:$src2)))]>;
+ def ADDSUBPDrr : S3I<0xD0, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "addsubpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
+ VR128:$src2))]>;
+ def ADDSUBPDrm : S3I<0xD0, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "addsubpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
+ (memop addr:$src2)))]>;
+}
+
+def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "lddqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
+
+// Horizontal ops
+class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
+ : S3DI<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
+class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
+ : S3DI<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (memop addr:$src2))))]>;
+class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
+ : S3I<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
+class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
+ : S3I<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (memopv2f64 addr:$src2))))]>;
+
+let Constraints = "$src1 = $dst" in {
+ def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>;
+ def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>;
+ def HADDPDrr : S3_Intrr <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
+ def HADDPDrm : S3_Intrm <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
+ def HSUBPSrr : S3D_Intrr<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
+ def HSUBPSrm : S3D_Intrm<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
+ def HSUBPDrr : S3_Intrr <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
+ def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
+}
+
+// Thread synchronization
+def MONITOR : I<0x01, MRM1r, (outs), (ins), "monitor",
+ [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
+def MWAIT : I<0x01, MRM1r, (outs), (ins), "mwait",
+ [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
+
+// vector_shuffle v1, <undef> <1, 1, 3, 3>
+let AddedComplexity = 15 in
+def : Pat<(v4i32 (movshdup VR128:$src, (undef))),
+ (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
+let AddedComplexity = 20 in
+def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
+ (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+// vector_shuffle v1, <undef> <0, 0, 2, 2>
+let AddedComplexity = 15 in
+ def : Pat<(v4i32 (movsldup VR128:$src, (undef))),
+ (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
+let AddedComplexity = 20 in
+ def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
+ (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+//===----------------------------------------------------------------------===//
+// SSSE3 Instructions
+//===----------------------------------------------------------------------===//
+
+/// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8.
+multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64, Intrinsic IntId128> {
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst, (IntId64 VR64:$src))]>;
+
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst,
+ (IntId64 (bitconvert (memopv8i8 addr:$src))))]>;
+
+ def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId128 VR128:$src))]>,
+ OpSize;
+
+ def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId128
+ (bitconvert (memopv16i8 addr:$src))))]>, OpSize;
+}
+
+/// SS3I_unop_rm_int_16 - Simple SSSE3 unary operator whose type is v*i16.
+multiclass SS3I_unop_rm_int_16<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64, Intrinsic IntId128> {
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst, (IntId64 VR64:$src))]>;
+
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins i64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst,
+ (IntId64
+ (bitconvert (memopv4i16 addr:$src))))]>;
+
+ def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId128 VR128:$src))]>,
+ OpSize;
+
+ def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId128
+ (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
+}
+
+/// SS3I_unop_rm_int_32 - Simple SSSE3 unary operator whose type is v*i32.
+multiclass SS3I_unop_rm_int_32<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64, Intrinsic IntId128> {
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst, (IntId64 VR64:$src))]>;
+
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins i64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst,
+ (IntId64
+ (bitconvert (memopv2i32 addr:$src))))]>;
+
+ def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId128 VR128:$src))]>,
+ OpSize;
+
+ def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId128
+ (bitconvert (memopv4i32 addr:$src))))]>, OpSize;
+}
+
+defm PABSB : SS3I_unop_rm_int_8 <0x1C, "pabsb",
+ int_x86_ssse3_pabs_b,
+ int_x86_ssse3_pabs_b_128>;
+defm PABSW : SS3I_unop_rm_int_16<0x1D, "pabsw",
+ int_x86_ssse3_pabs_w,
+ int_x86_ssse3_pabs_w_128>;
+defm PABSD : SS3I_unop_rm_int_32<0x1E, "pabsd",
+ int_x86_ssse3_pabs_d,
+ int_x86_ssse3_pabs_d_128>;
+
+/// SS3I_binop_rm_int_8 - Simple SSSE3 binary operator whose type is v*i8.
+let Constraints = "$src1 = $dst" in {
+ multiclass SS3I_binop_rm_int_8<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64, Intrinsic IntId128,
+ bit Commutable = 0> {
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
+ let isCommutable = Commutable;
+ }
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst,
+ (IntId64 VR64:$src1,
+ (bitconvert (memopv8i8 addr:$src2))))]>;
+
+ def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize {
+ let isCommutable = Commutable;
+ }
+ def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+ }
+}
+
+/// SS3I_binop_rm_int_16 - Simple SSSE3 binary operator whose type is v*i16.
+let Constraints = "$src1 = $dst" in {
+ multiclass SS3I_binop_rm_int_16<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64, Intrinsic IntId128,
+ bit Commutable = 0> {
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
+ let isCommutable = Commutable;
+ }
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst,
+ (IntId64 VR64:$src1,
+ (bitconvert (memopv4i16 addr:$src2))))]>;
+
+ def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize {
+ let isCommutable = Commutable;
+ }
+ def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv8i16 addr:$src2))))]>, OpSize;
+ }
+}
+
+/// SS3I_binop_rm_int_32 - Simple SSSE3 binary operator whose type is v*i32.
+let Constraints = "$src1 = $dst" in {
+ multiclass SS3I_binop_rm_int_32<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64, Intrinsic IntId128,
+ bit Commutable = 0> {
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
+ let isCommutable = Commutable;
+ }
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst,
+ (IntId64 VR64:$src1,
+ (bitconvert (memopv2i32 addr:$src2))))]>;
+
+ def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize {
+ let isCommutable = Commutable;
+ }
+ def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv4i32 addr:$src2))))]>, OpSize;
+ }
+}
+
+defm PHADDW : SS3I_binop_rm_int_16<0x01, "phaddw",
+ int_x86_ssse3_phadd_w,
+ int_x86_ssse3_phadd_w_128>;
+defm PHADDD : SS3I_binop_rm_int_32<0x02, "phaddd",
+ int_x86_ssse3_phadd_d,
+ int_x86_ssse3_phadd_d_128>;
+defm PHADDSW : SS3I_binop_rm_int_16<0x03, "phaddsw",
+ int_x86_ssse3_phadd_sw,
+ int_x86_ssse3_phadd_sw_128>;
+defm PHSUBW : SS3I_binop_rm_int_16<0x05, "phsubw",
+ int_x86_ssse3_phsub_w,
+ int_x86_ssse3_phsub_w_128>;
+defm PHSUBD : SS3I_binop_rm_int_32<0x06, "phsubd",
+ int_x86_ssse3_phsub_d,
+ int_x86_ssse3_phsub_d_128>;
+defm PHSUBSW : SS3I_binop_rm_int_16<0x07, "phsubsw",
+ int_x86_ssse3_phsub_sw,
+ int_x86_ssse3_phsub_sw_128>;
+defm PMADDUBSW : SS3I_binop_rm_int_8 <0x04, "pmaddubsw",
+ int_x86_ssse3_pmadd_ub_sw,
+ int_x86_ssse3_pmadd_ub_sw_128>;
+defm PMULHRSW : SS3I_binop_rm_int_16<0x0B, "pmulhrsw",
+ int_x86_ssse3_pmul_hr_sw,
+ int_x86_ssse3_pmul_hr_sw_128, 1>;
+defm PSHUFB : SS3I_binop_rm_int_8 <0x00, "pshufb",
+ int_x86_ssse3_pshuf_b,
+ int_x86_ssse3_pshuf_b_128>;
+defm PSIGNB : SS3I_binop_rm_int_8 <0x08, "psignb",
+ int_x86_ssse3_psign_b,
+ int_x86_ssse3_psign_b_128>;
+defm PSIGNW : SS3I_binop_rm_int_16<0x09, "psignw",
+ int_x86_ssse3_psign_w,
+ int_x86_ssse3_psign_w_128>;
+defm PSIGND : SS3I_binop_rm_int_32<0x0A, "psignd",
+ int_x86_ssse3_psign_d,
+ int_x86_ssse3_psign_d_128>;
+
+let Constraints = "$src1 = $dst" in {
+ def PALIGNR64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2, i16imm:$src3),
+ "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR64:$dst,
+ (int_x86_ssse3_palign_r
+ VR64:$src1, VR64:$src2,
+ imm:$src3))]>;
+ def PALIGNR64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2, i16imm:$src3),
+ "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR64:$dst,
+ (int_x86_ssse3_palign_r
+ VR64:$src1,
+ (bitconvert (memopv2i32 addr:$src2)),
+ imm:$src3))]>;
+
+ def PALIGNR128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i32imm:$src3),
+ "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (int_x86_ssse3_palign_r_128
+ VR128:$src1, VR128:$src2,
+ imm:$src3))]>, OpSize;
+ def PALIGNR128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i32imm:$src3),
+ "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (int_x86_ssse3_palign_r_128
+ VR128:$src1,
+ (bitconvert (memopv4i32 addr:$src2)),
+ imm:$src3))]>, OpSize;
+}
+
+def : Pat<(X86pshufb VR128:$src, VR128:$mask),
+ (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>;
+def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
+ (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// extload f32 -> f64. This matches load+fextend because we have a hack in
+// the isel (PreprocessForFPConvert) that can introduce loads after dag combine.
+// Since these loads aren't folded into the fextend, we have to match it
+// explicitly here.
+let Predicates = [HasSSE2] in
+ def : Pat<(fextend (loadf32 addr:$src)),
+ (CVTSS2SDrm addr:$src)>;
+
+// bit_convert
+let Predicates = [HasSSE2] in {
+ def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
+}
+
+// Move scalar to XMM zero-extended
+// movd to XMM register zero-extends
+let AddedComplexity = 15 in {
+// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
+def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
+ (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
+ (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE1]>;
+def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+ (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>;
+def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
+ (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>;
+}
+
+// Splat v2f64 / v2i64
+let AddedComplexity = 10 in {
+def : Pat<(splat_lo (v2f64 VR128:$src), (undef)),
+ (UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(unpckh (v2f64 VR128:$src), (undef)),
+ (UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
+ (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(unpckh (v2i64 VR128:$src), (undef)),
+ (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+}
+
+// Special unary SHUFPSrri case.
+def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
+ (SHUFPSrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>,
+ Requires<[HasSSE1]>;
+let AddedComplexity = 5 in
+def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
+ (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+ Requires<[HasSSE2]>;
+// Special unary SHUFPDrri case.
+def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
+ (SHUFPDrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>,
+ Requires<[HasSSE2]>;
+// Special unary SHUFPDrri case.
+def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
+ (SHUFPDrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>,
+ Requires<[HasSSE2]>;
+// Unary v4f32 shuffle with PSHUF* in order to fold a load.
+def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
+ (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+ Requires<[HasSSE2]>;
+
+// Special binary v4i32 shuffle cases with SHUFPS.
+def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
+ (SHUFPSrri VR128:$src1, VR128:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>,
+ Requires<[HasSSE2]>;
+def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (SHUFPSrmi VR128:$src1, addr:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>,
+ Requires<[HasSSE2]>;
+// Special binary v2i64 shuffle cases using SHUFPDrri.
+def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
+ (SHUFPDrri VR128:$src1, VR128:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>,
+ Requires<[HasSSE2]>;
+
+// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
+let AddedComplexity = 15 in {
+def : Pat<(v4i32 (unpckl_undef:$src2 VR128:$src, (undef))),
+ (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+ Requires<[OptForSpeed, HasSSE2]>;
+def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))),
+ (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+ Requires<[OptForSpeed, HasSSE2]>;
+}
+let AddedComplexity = 10 in {
+def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))),
+ (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
+def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))),
+ (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))),
+ (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))),
+ (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+}
+
+// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
+let AddedComplexity = 15 in {
+def : Pat<(v4i32 (unpckh_undef:$src2 VR128:$src, (undef))),
+ (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+ Requires<[OptForSpeed, HasSSE2]>;
+def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))),
+ (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+ Requires<[OptForSpeed, HasSSE2]>;
+}
+let AddedComplexity = 10 in {
+def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))),
+ (UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
+def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))),
+ (PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))),
+ (PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))),
+ (PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+}
+
+let AddedComplexity = 20 in {
+// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
+def : Pat<(v4i32 (movhp VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+
+// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
+def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+
+// vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
+def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))),
+ (MOVHLPSrr VR128:$src1, VR128:$src1)>;
+def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
+ (MOVHLPSrr VR128:$src1, VR128:$src1)>;
+}
+
+let AddedComplexity = 20 in {
+// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
+// vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS
+def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
+def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v4f32 (movhp VR128:$src1, (load addr:$src2))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
+def : Pat<(v2f64 (movhp VR128:$src1, (load addr:$src2))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+
+def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v4i32 (movhp VR128:$src1, (load addr:$src2))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
+def : Pat<(v2i64 (movhp VR128:$src1, (load addr:$src2))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+}
+
+// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
+// (store (vector_shuffle (load addr), v2, <0, 1, 4, 5>), addr) using MOVHPS
+def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
+def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(store (v4f32 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
+def : Pat<(store (v2f64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+
+def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
+ addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
+def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(store (v4i32 (movhp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
+ addr:$src1),
+ (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
+def : Pat<(store (v2i64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+
+
+let AddedComplexity = 15 in {
+// Setting the lowest element in the vector.
+def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
+ (MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
+ (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+
+// vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd)
+def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
+ (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
+ (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+}
+
+// Set lowest element and zero upper elements.
+let AddedComplexity = 15 in
+def : Pat<(v2f64 (movl immAllZerosV_bc, VR128:$src)),
+ (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
+ (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
+
+// Some special case pandn patterns.
+def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
+ VR128:$src2)),
+ (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
+ VR128:$src2)),
+ (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
+ VR128:$src2)),
+ (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+
+def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
+ (memop addr:$src2))),
+ (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
+ (memop addr:$src2))),
+ (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
+ (memop addr:$src2))),
+ (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+
+// vector -> vector casts
+def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
+ (Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
+ (Int_CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v2f64 (sint_to_fp (v2i32 VR64:$src))),
+ (Int_CVTPI2PDrr VR64:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))),
+ (Int_CVTTPD2PIrr VR128:$src)>, Requires<[HasSSE2]>;
+
+// Use movaps / movups for SSE integer load / store (one byte shorter).
+def : Pat<(alignedloadv4i32 addr:$src),
+ (MOVAPSrm addr:$src)>, Requires<[HasSSE1]>;
+def : Pat<(loadv4i32 addr:$src),
+ (MOVUPSrm addr:$src)>, Requires<[HasSSE1]>;
+def : Pat<(alignedloadv2i64 addr:$src),
+ (MOVAPSrm addr:$src)>, Requires<[HasSSE2]>;
+def : Pat<(loadv2i64 addr:$src),
+ (MOVUPSrm addr:$src)>, Requires<[HasSSE2]>;
+
+def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
+ string OpcodeStr,
+ Intrinsic V4F32Int,
+ Intrinsic V2F64Int> {
+ // Intrinsic operation, reg.
+ // Vector intrinsic operation, reg
+ def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src1, imm:$src2))]>,
+ OpSize;
+
+ // Vector intrinsic operation, mem
+ def PSm_Int : SS4AIi8<opcps, MRMSrcMem,
+ (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
+ OpSize;
+
+ // Vector intrinsic operation, reg
+ def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (V2F64Int VR128:$src1, imm:$src2))]>,
+ OpSize;
+
+ // Vector intrinsic operation, mem
+ def PDm_Int : SS4AIi8<opcpd, MRMSrcMem,
+ (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (V2F64Int (memopv2f64 addr:$src1),imm:$src2))]>,
+ OpSize;
+}
+
+let Constraints = "$src1 = $dst" in {
+multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
+ string OpcodeStr,
+ Intrinsic F32Int,
+ Intrinsic F64Int> {
+ // Intrinsic operation, reg.
+ def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
+ (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Intrinsic operation, mem.
+ def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
+ (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Intrinsic operation, reg.
+ def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
+ (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Intrinsic operation, mem.
+ def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
+ (outs VR128:$dst),
+ (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
+ OpSize;
+}
+}
+
+// FP round - roundss, roundps, roundsd, roundpd
+defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round",
+ int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
+defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
+ int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
+
+// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
+multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128> {
+ def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize;
+ def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId128
+ (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
+}
+
+defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
+ int_x86_sse41_phminposuw>;
+
+/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
+let Constraints = "$src1 = $dst" in {
+ multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Commutable = 0> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize {
+ let isCommutable = Commutable;
+ }
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+ }
+}
+
+defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq",
+ int_x86_sse41_pcmpeqq, 1>;
+defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw",
+ int_x86_sse41_packusdw, 0>;
+defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb",
+ int_x86_sse41_pminsb, 1>;
+defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd",
+ int_x86_sse41_pminsd, 1>;
+defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud",
+ int_x86_sse41_pminud, 1>;
+defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw",
+ int_x86_sse41_pminuw, 1>;
+defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb",
+ int_x86_sse41_pmaxsb, 1>;
+defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd",
+ int_x86_sse41_pmaxsd, 1>;
+defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud",
+ int_x86_sse41_pmaxud, 1>;
+defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw",
+ int_x86_sse41_pmaxuw, 1>;
+
+defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq, 1>;
+
+def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
+ (PCMPEQQrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
+ (PCMPEQQrm VR128:$src1, addr:$src2)>;
+
+/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
+let Constraints = "$src1 = $dst" in {
+ multiclass SS41I_binop_patint<bits<8> opc, string OpcodeStr, ValueType OpVT,
+ SDNode OpNode, Intrinsic IntId128,
+ bit Commutable = 0> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpNode (OpVT VR128:$src1),
+ VR128:$src2))]>, OpSize {
+ let isCommutable = Commutable;
+ }
+ def rr_int : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize {
+ let isCommutable = Commutable;
+ }
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst,
+ (OpNode VR128:$src1, (memop addr:$src2)))]>, OpSize;
+ def rm_int : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1, (memop addr:$src2)))]>,
+ OpSize;
+ }
+}
+defm PMULLD : SS41I_binop_patint<0x40, "pmulld", v4i32, mul,
+ int_x86_sse41_pmulld, 1>;
+
+/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
+let Constraints = "$src1 = $dst" in {
+ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Commutable = 0> {
+ def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize {
+ let isCommutable = Commutable;
+ }
+ def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>,
+ OpSize;
+ }
+}
+
+defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps",
+ int_x86_sse41_blendps, 0>;
+defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd",
+ int_x86_sse41_blendpd, 0>;
+defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw",
+ int_x86_sse41_pblendw, 0>;
+defm DPPS : SS41I_binop_rmi_int<0x40, "dpps",
+ int_x86_sse41_dpps, 1>;
+defm DPPD : SS41I_binop_rmi_int<0x41, "dppd",
+ int_x86_sse41_dppd, 1>;
+defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw",
+ int_x86_sse41_mpsadbw, 1>;
+
+
+/// SS41I_ternary_int - SSE 4.1 ternary operator
+let Uses = [XMM0], Constraints = "$src1 = $dst" in {
+ multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr,
+ "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
+ OpSize;
+
+ def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr,
+ "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
+ [(set VR128:$dst,
+ (IntId VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize;
+ }
+}
+
+defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
+defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
+defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
+
+
+multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
+ OpSize;
+}
+
+defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
+defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>;
+defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>;
+defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>;
+defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>;
+defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>;
+
+// Common patterns involving scalar load.
+def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
+ (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
+ (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
+ (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
+ (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
+ (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
+ (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
+ (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
+ (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
+ (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
+ (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
+ (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
+ (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>;
+
+
+multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
+ OpSize;
+}
+
+defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
+defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
+defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
+defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>;
+
+// Common patterns involving scalar load
+def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
+ (PMOVSXBDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
+ (PMOVSXWQrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
+ (PMOVZXBDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
+ (PMOVZXWQrm addr:$src)>, Requires<[HasSSE41]>;
+
+
+multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+
+ // Expecting a i16 load any extended to i32 value.
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i16mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId (bitconvert
+ (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))]>,
+ OpSize;
+}
+
+defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
+defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovsxbq", int_x86_sse41_pmovzxbq>;
+
+// Common patterns involving scalar load
+def : Pat<(int_x86_sse41_pmovsxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVSXBQrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVZXBQrm addr:$src)>, Requires<[HasSSE41]>;
+
+
+/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
+multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>,
+ OpSize;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+// FIXME:
+// There's an AssertZext in the way of writing the store pattern
+// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
+}
+
+defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
+
+
+/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
+multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+// FIXME:
+// There's an AssertZext in the way of writing the store pattern
+// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
+}
+
+defm PEXTRW : SS41I_extract16<0x15, "pextrw">;
+
+
+/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
+multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR32:$dst,
+ (extractelt (v4i32 VR128:$src1), imm:$src2))]>, OpSize;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(store (extractelt (v4i32 VR128:$src1), imm:$src2),
+ addr:$dst)]>, OpSize;
+}
+
+defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
+
+
+/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
+/// destination
+multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR32:$dst,
+ (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
+ OpSize;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
+ addr:$dst)]>, OpSize;
+}
+
+defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
+
+// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
+def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
+ imm:$src2))),
+ addr:$dst),
+ (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
+ Requires<[HasSSE41]>;
+
+let Constraints = "$src1 = $dst" in {
+ multiclass SS41I_insert8<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
+ imm:$src3))]>, OpSize;
+ }
+}
+
+defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
+
+let Constraints = "$src1 = $dst" in {
+ multiclass SS41I_insert32<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
+ OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
+ imm:$src3)))]>, OpSize;
+ }
+}
+
+defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
+
+let Constraints = "$src1 = $dst" in {
+ multiclass SS41I_insertf32<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, FR32:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (X86insrtps VR128:$src1, FR32:$src2, imm:$src3))]>, OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (X86insrtps VR128:$src1, (loadf32 addr:$src2),
+ imm:$src3))]>, OpSize;
+ }
+}
+
+defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
+
+let Defs = [EFLAGS] in {
+def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "ptest \t{$src2, $src1|$src1, $src2}", []>, OpSize;
+def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
+ "ptest \t{$src2, $src1|$src1, $src2}", []>, OpSize;
+}
+
+def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movntdqa\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>;
+
+/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
+let Constraints = "$src1 = $dst" in {
+ multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Commutable = 0> {
+ def rr : SS428I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize {
+ let isCommutable = Commutable;
+ }
+ def rm : SS428I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+ }
+}
+
+defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>;
+
+def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
+ (PCMPGTQrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
+ (PCMPGTQrm VR128:$src1, addr:$src2)>;
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
new file mode 100644
index 0000000..f923106
--- /dev/null
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -0,0 +1,560 @@
+//===-- X86JITInfo.cpp - Implement the JIT interfaces for the X86 target --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the X86 target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "X86JITInfo.h"
+#include "X86Relocations.h"
+#include "X86Subtarget.h"
+#include "llvm/Function.h"
+#include "llvm/Config/alloca.h"
+#include "llvm/Support/Compiler.h"
+#include <cstdlib>
+#include <cstring>
+using namespace llvm;
+
+// Determine the platform we're running on
+#if defined (__x86_64__) || defined (_M_AMD64)
+# define X86_64_JIT
+#elif defined(__i386__) || defined(i386) || defined(_M_IX86)
+# define X86_32_JIT
+#endif
+
+void X86JITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ unsigned char *OldByte = (unsigned char *)Old;
+ *OldByte++ = 0xE9; // Emit JMP opcode.
+ unsigned *OldWord = (unsigned *)OldByte;
+ unsigned NewAddr = (intptr_t)New;
+ unsigned OldAddr = (intptr_t)OldWord;
+ *OldWord = NewAddr - OldAddr - 4; // Emit PC-relative addr of New code.
+}
+
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+// Get the ASMPREFIX for the current host. This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+// Check if building with -fPIC
+#if defined(__PIC__) && __PIC__ && defined(__linux__)
+#define ASMCALLSUFFIX "@PLT"
+#else
+#define ASMCALLSUFFIX
+#endif
+
+// For ELF targets, use a .size and .type directive, to let tools
+// know the extent of functions defined in assembler.
+#if defined(__ELF__)
+# define SIZE(sym) ".size " #sym ", . - " #sym "\n"
+# define TYPE_FUNCTION(sym) ".type " #sym ", @function\n"
+#else
+# define SIZE(sym)
+# define TYPE_FUNCTION(sym)
+#endif
+
+// Provide a convenient way for disabling usage of CFI directives.
+// This is needed for old/broken assemblers (for example, gas on
+// Darwin is pretty old and doesn't support these directives)
+#if defined(__APPLE__)
+# define CFI(x)
+#else
+// FIXME: Disable this until we really want to use it. Also, we will
+// need to add some workarounds for compilers, which support
+// only subset of these directives.
+# define CFI(x)
+#endif
+
+// Provide a wrapper for X86CompilationCallback2 that saves non-traditional
+// callee saved registers, for the fastcc calling convention.
+extern "C" {
+#if defined(X86_64_JIT)
+# ifndef _MSC_VER
+ // No need to save EAX/EDX for X86-64.
+ void X86CompilationCallback(void);
+ asm(
+ ".text\n"
+ ".align 8\n"
+ ".globl " ASMPREFIX "X86CompilationCallback\n"
+ TYPE_FUNCTION(X86CompilationCallback)
+ ASMPREFIX "X86CompilationCallback:\n"
+ CFI(".cfi_startproc\n")
+ // Save RBP
+ "pushq %rbp\n"
+ CFI(".cfi_def_cfa_offset 16\n")
+ CFI(".cfi_offset %rbp, -16\n")
+ // Save RSP
+ "movq %rsp, %rbp\n"
+ CFI(".cfi_def_cfa_register %rbp\n")
+ // Save all int arg registers
+ "pushq %rdi\n"
+ CFI(".cfi_rel_offset %rdi, 0\n")
+ "pushq %rsi\n"
+ CFI(".cfi_rel_offset %rsi, 8\n")
+ "pushq %rdx\n"
+ CFI(".cfi_rel_offset %rdx, 16\n")
+ "pushq %rcx\n"
+ CFI(".cfi_rel_offset %rcx, 24\n")
+ "pushq %r8\n"
+ CFI(".cfi_rel_offset %r8, 32\n")
+ "pushq %r9\n"
+ CFI(".cfi_rel_offset %r9, 40\n")
+ // Align stack on 16-byte boundary. ESP might not be properly aligned
+ // (8 byte) if this is called from an indirect stub.
+ "andq $-16, %rsp\n"
+ // Save all XMM arg registers
+ "subq $128, %rsp\n"
+ "movaps %xmm0, (%rsp)\n"
+ "movaps %xmm1, 16(%rsp)\n"
+ "movaps %xmm2, 32(%rsp)\n"
+ "movaps %xmm3, 48(%rsp)\n"
+ "movaps %xmm4, 64(%rsp)\n"
+ "movaps %xmm5, 80(%rsp)\n"
+ "movaps %xmm6, 96(%rsp)\n"
+ "movaps %xmm7, 112(%rsp)\n"
+ // JIT callee
+ "movq %rbp, %rdi\n" // Pass prev frame and return address
+ "movq 8(%rbp), %rsi\n"
+ "call " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n"
+ // Restore all XMM arg registers
+ "movaps 112(%rsp), %xmm7\n"
+ "movaps 96(%rsp), %xmm6\n"
+ "movaps 80(%rsp), %xmm5\n"
+ "movaps 64(%rsp), %xmm4\n"
+ "movaps 48(%rsp), %xmm3\n"
+ "movaps 32(%rsp), %xmm2\n"
+ "movaps 16(%rsp), %xmm1\n"
+ "movaps (%rsp), %xmm0\n"
+ // Restore RSP
+ "movq %rbp, %rsp\n"
+ CFI(".cfi_def_cfa_register %rsp\n")
+ // Restore all int arg registers
+ "subq $48, %rsp\n"
+ CFI(".cfi_adjust_cfa_offset 48\n")
+ "popq %r9\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %r9\n")
+ "popq %r8\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %r8\n")
+ "popq %rcx\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rcx\n")
+ "popq %rdx\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rdx\n")
+ "popq %rsi\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rsi\n")
+ "popq %rdi\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rdi\n")
+ // Restore RBP
+ "popq %rbp\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rbp\n")
+ "ret\n"
+ CFI(".cfi_endproc\n")
+ SIZE(X86CompilationCallback)
+ );
+# else
+ // No inline assembler support on this platform. The routine is in external
+ // file.
+ void X86CompilationCallback();
+
+# endif
+#elif defined (X86_32_JIT)
+# ifndef _MSC_VER
+ void X86CompilationCallback(void);
+ asm(
+ ".text\n"
+ ".align 8\n"
+ ".globl " ASMPREFIX "X86CompilationCallback\n"
+ TYPE_FUNCTION(X86CompilationCallback)
+ ASMPREFIX "X86CompilationCallback:\n"
+ CFI(".cfi_startproc\n")
+ "pushl %ebp\n"
+ CFI(".cfi_def_cfa_offset 8\n")
+ CFI(".cfi_offset %ebp, -8\n")
+ "movl %esp, %ebp\n" // Standard prologue
+ CFI(".cfi_def_cfa_register %ebp\n")
+ "pushl %eax\n"
+ CFI(".cfi_rel_offset %eax, 0\n")
+ "pushl %edx\n" // Save EAX/EDX/ECX
+ CFI(".cfi_rel_offset %edx, 4\n")
+ "pushl %ecx\n"
+ CFI(".cfi_rel_offset %ecx, 8\n")
+# if defined(__APPLE__)
+ "andl $-16, %esp\n" // Align ESP on 16-byte boundary
+# endif
+ "subl $16, %esp\n"
+ "movl 4(%ebp), %eax\n" // Pass prev frame and return address
+ "movl %eax, 4(%esp)\n"
+ "movl %ebp, (%esp)\n"
+ "call " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n"
+ "movl %ebp, %esp\n" // Restore ESP
+ CFI(".cfi_def_cfa_register %esp\n")
+ "subl $12, %esp\n"
+ CFI(".cfi_adjust_cfa_offset 12\n")
+ "popl %ecx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ecx\n")
+ "popl %edx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %edx\n")
+ "popl %eax\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %eax\n")
+ "popl %ebp\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ebp\n")
+ "ret\n"
+ CFI(".cfi_endproc\n")
+ SIZE(X86CompilationCallback)
+ );
+
+ // Same as X86CompilationCallback but also saves XMM argument registers.
+ void X86CompilationCallback_SSE(void);
+ asm(
+ ".text\n"
+ ".align 8\n"
+ ".globl " ASMPREFIX "X86CompilationCallback_SSE\n"
+ TYPE_FUNCTION(X86CompilationCallback_SSE)
+ ASMPREFIX "X86CompilationCallback_SSE:\n"
+ CFI(".cfi_startproc\n")
+ "pushl %ebp\n"
+ CFI(".cfi_def_cfa_offset 8\n")
+ CFI(".cfi_offset %ebp, -8\n")
+ "movl %esp, %ebp\n" // Standard prologue
+ CFI(".cfi_def_cfa_register %ebp\n")
+ "pushl %eax\n"
+ CFI(".cfi_rel_offset %eax, 0\n")
+ "pushl %edx\n" // Save EAX/EDX/ECX
+ CFI(".cfi_rel_offset %edx, 4\n")
+ "pushl %ecx\n"
+ CFI(".cfi_rel_offset %ecx, 8\n")
+ "andl $-16, %esp\n" // Align ESP on 16-byte boundary
+ // Save all XMM arg registers
+ "subl $64, %esp\n"
+ // FIXME: provide frame move information for xmm registers.
+ // This can be tricky, because CFA register is ebp (unaligned)
+ // and we need to produce offsets relative to it.
+ "movaps %xmm0, (%esp)\n"
+ "movaps %xmm1, 16(%esp)\n"
+ "movaps %xmm2, 32(%esp)\n"
+ "movaps %xmm3, 48(%esp)\n"
+ "subl $16, %esp\n"
+ "movl 4(%ebp), %eax\n" // Pass prev frame and return address
+ "movl %eax, 4(%esp)\n"
+ "movl %ebp, (%esp)\n"
+ "call " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n"
+ "addl $16, %esp\n"
+ "movaps 48(%esp), %xmm3\n"
+ CFI(".cfi_restore %xmm3\n")
+ "movaps 32(%esp), %xmm2\n"
+ CFI(".cfi_restore %xmm2\n")
+ "movaps 16(%esp), %xmm1\n"
+ CFI(".cfi_restore %xmm1\n")
+ "movaps (%esp), %xmm0\n"
+ CFI(".cfi_restore %xmm0\n")
+ "movl %ebp, %esp\n" // Restore ESP
+ CFI(".cfi_def_cfa_register esp\n")
+ "subl $12, %esp\n"
+ CFI(".cfi_adjust_cfa_offset 12\n")
+ "popl %ecx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ecx\n")
+ "popl %edx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %edx\n")
+ "popl %eax\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %eax\n")
+ "popl %ebp\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ebp\n")
+ "ret\n"
+ CFI(".cfi_endproc\n")
+ SIZE(X86CompilationCallback_SSE)
+ );
+# else
+ void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr);
+
+ _declspec(naked) void X86CompilationCallback(void) {
+ __asm {
+ push ebp
+ mov ebp, esp
+ push eax
+ push edx
+ push ecx
+ and esp, -16
+ mov eax, dword ptr [ebp+4]
+ mov dword ptr [esp+4], eax
+ mov dword ptr [esp], ebp
+ call X86CompilationCallback2
+ mov esp, ebp
+ sub esp, 12
+ pop ecx
+ pop edx
+ pop eax
+ pop ebp
+ ret
+ }
+ }
+
+# endif // _MSC_VER
+
+#else // Not an i386 host
+ void X86CompilationCallback() {
+ assert(0 && "Cannot call X86CompilationCallback() on a non-x86 arch!\n");
+ abort();
+ }
+#endif
+}
+
+/// X86CompilationCallback2 - This is the target-specific function invoked by the
+/// function stub when we did not know the real target of a call. This function
+/// must locate the start of the stub or call site and pass it into the JIT
+/// compiler function.
+extern "C" void ATTRIBUTE_USED
+X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
+ intptr_t *RetAddrLoc = &StackPtr[1];
+ assert(*RetAddrLoc == RetAddr &&
+ "Could not find return address on the stack!");
+
+ // It's a stub if there is an interrupt marker after the call.
+ bool isStub = ((unsigned char*)RetAddr)[0] == 0xCD;
+
+ // The call instruction should have pushed the return value onto the stack...
+#if defined (X86_64_JIT)
+ RetAddr--; // Backtrack to the reference itself...
+#else
+ RetAddr -= 4; // Backtrack to the reference itself...
+#endif
+
+#if 0
+ DOUT << "In callback! Addr=" << (void*)RetAddr
+ << " ESP=" << (void*)StackPtr
+ << ": Resolving call to function: "
+ << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n";
+#endif
+
+ // Sanity check to make sure this really is a call instruction.
+#if defined (X86_64_JIT)
+ assert(((unsigned char*)RetAddr)[-2] == 0x41 &&"Not a call instr!");
+ assert(((unsigned char*)RetAddr)[-1] == 0xFF &&"Not a call instr!");
+#else
+ assert(((unsigned char*)RetAddr)[-1] == 0xE8 &&"Not a call instr!");
+#endif
+
+ intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)RetAddr);
+
+ // Rewrite the call target... so that we don't end up here every time we
+ // execute the call.
+#if defined (X86_64_JIT)
+ if (!isStub)
+ *(intptr_t *)(RetAddr - 0xa) = NewVal;
+#else
+ *(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4);
+#endif
+
+ if (isStub) {
+ // If this is a stub, rewrite the call into an unconditional branch
+ // instruction so that two return addresses are not pushed onto the stack
+ // when the requested function finally gets called. This also makes the
+ // 0xCD byte (interrupt) dead, so the marker doesn't effect anything.
+#if defined (X86_64_JIT)
+ // If the target address is within 32-bit range of the stub, use a
+ // PC-relative branch instead of loading the actual address. (This is
+ // considerably shorter than the 64-bit immediate load already there.)
+ // We assume here intptr_t is 64 bits.
+ intptr_t diff = NewVal-RetAddr+7;
+ if (diff >= -2147483648LL && diff <= 2147483647LL) {
+ *(unsigned char*)(RetAddr-0xc) = 0xE9;
+ *(intptr_t *)(RetAddr-0xb) = diff & 0xffffffff;
+ } else {
+ *(intptr_t *)(RetAddr - 0xa) = NewVal;
+ ((unsigned char*)RetAddr)[0] = (2 | (4 << 3) | (3 << 6));
+ }
+#else
+ ((unsigned char*)RetAddr)[-1] = 0xE9;
+#endif
+ }
+
+ // Change the return address to reexecute the call instruction...
+#if defined (X86_64_JIT)
+ *RetAddrLoc -= 0xd;
+#else
+ *RetAddrLoc -= 5;
+#endif
+}
+
+TargetJITInfo::LazyResolverFn
+X86JITInfo::getLazyResolverFunction(JITCompilerFn F) {
+ JITCompilerFunction = F;
+
+#if defined (X86_32_JIT) && !defined (_MSC_VER)
+ unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+ union {
+ unsigned u[3];
+ char c[12];
+ } text;
+
+ if (!X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1)) {
+ // FIXME: support for AMD family of processors.
+ if (memcmp(text.c, "GenuineIntel", 12) == 0) {
+ X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
+ if ((EDX >> 25) & 0x1)
+ return X86CompilationCallback_SSE;
+ }
+ }
+#endif
+
+ return X86CompilationCallback;
+}
+
+void *X86JITInfo::emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+ JITCodeEmitter &JCE) {
+#if defined (X86_64_JIT)
+ JCE.startGVStub(GV, 8, 8);
+ JCE.emitWordLE((unsigned)(intptr_t)ptr);
+ JCE.emitWordLE((unsigned)(((intptr_t)ptr) >> 32));
+#else
+ JCE.startGVStub(GV, 4, 4);
+ JCE.emitWordLE((intptr_t)ptr);
+#endif
+ return JCE.finishGVStub(GV);
+}
+
+void *X86JITInfo::emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE) {
+ // Note, we cast to intptr_t here to silence a -pedantic warning that
+ // complains about casting a function pointer to a normal pointer.
+#if defined (X86_32_JIT) && !defined (_MSC_VER)
+ bool NotCC = (Fn != (void*)(intptr_t)X86CompilationCallback &&
+ Fn != (void*)(intptr_t)X86CompilationCallback_SSE);
+#else
+ bool NotCC = Fn != (void*)(intptr_t)X86CompilationCallback;
+#endif
+ if (NotCC) {
+#if defined (X86_64_JIT)
+ JCE.startGVStub(F, 13, 4);
+ JCE.emitByte(0x49); // REX prefix
+ JCE.emitByte(0xB8+2); // movabsq r10
+ JCE.emitWordLE((unsigned)(intptr_t)Fn);
+ JCE.emitWordLE((unsigned)(((intptr_t)Fn) >> 32));
+ JCE.emitByte(0x41); // REX prefix
+ JCE.emitByte(0xFF); // jmpq *r10
+ JCE.emitByte(2 | (4 << 3) | (3 << 6));
+#else
+ JCE.startGVStub(F, 5, 4);
+ JCE.emitByte(0xE9);
+ JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4);
+#endif
+ return JCE.finishGVStub(F);
+ }
+
+#if defined (X86_64_JIT)
+ JCE.startGVStub(F, 14, 4);
+ JCE.emitByte(0x49); // REX prefix
+ JCE.emitByte(0xB8+2); // movabsq r10
+ JCE.emitWordLE((unsigned)(intptr_t)Fn);
+ JCE.emitWordLE((unsigned)(((intptr_t)Fn) >> 32));
+ JCE.emitByte(0x41); // REX prefix
+ JCE.emitByte(0xFF); // callq *r10
+ JCE.emitByte(2 | (2 << 3) | (3 << 6));
+#else
+ JCE.startGVStub(F, 6, 4);
+ JCE.emitByte(0xE8); // Call with 32 bit pc-rel destination...
+
+ JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4);
+#endif
+
+ JCE.emitByte(0xCD); // Interrupt - Just a marker identifying the stub!
+ return JCE.finishGVStub(F);
+}
+
+void X86JITInfo::emitFunctionStubAtAddr(const Function* F, void *Fn, void *Stub,
+ JITCodeEmitter &JCE) {
+ // Note, we cast to intptr_t here to silence a -pedantic warning that
+ // complains about casting a function pointer to a normal pointer.
+ JCE.startGVStub(F, Stub, 5);
+ JCE.emitByte(0xE9);
+#if defined (X86_64_JIT)
+ assert(((((intptr_t)Fn-JCE.getCurrentPCValue()-5) << 32) >> 32) ==
+ ((intptr_t)Fn-JCE.getCurrentPCValue()-5)
+ && "PIC displacement does not fit in displacement field!");
+#endif
+ JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4);
+ JCE.finishGVStub(F);
+}
+
+/// getPICJumpTableEntry - Returns the value of the jumptable entry for the
+/// specific basic block.
+uintptr_t X86JITInfo::getPICJumpTableEntry(uintptr_t BB, uintptr_t Entry) {
+#if defined(X86_64_JIT)
+ return BB - Entry;
+#else
+ return BB - PICBase;
+#endif
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void X86JITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
+ intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
+ switch ((X86::RelocationType)MR->getRelocationType()) {
+ case X86::reloc_pcrel_word: {
+ // PC relative relocation, add the relocated value to the value already in
+ // memory, after we adjust it for where the PC is.
+ ResultPtr = ResultPtr -(intptr_t)RelocPos - 4 - MR->getConstantVal();
+ *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ break;
+ }
+ case X86::reloc_picrel_word: {
+ // PIC base relative relocation, add the relocated value to the value
+ // already in memory, after we adjust it for where the PIC base is.
+ ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal());
+ *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ break;
+ }
+ case X86::reloc_absolute_word:
+ // Absolute relocation, just add the relocated value to the value already
+ // in memory.
+ *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ break;
+ case X86::reloc_absolute_dword:
+ *((intptr_t*)RelocPos) += ResultPtr;
+ break;
+ }
+ }
+}
+
+char* X86JITInfo::allocateThreadLocalMemory(size_t size) {
+#if defined(X86_32_JIT) && !defined(__APPLE__) && !defined(_MSC_VER)
+ TLSOffset -= size;
+ return TLSOffset;
+#else
+ assert(0 && "Cannot allocate thread local storage on this arch!\n");
+ return 0;
+#endif
+}
diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h
new file mode 100644
index 0000000..6a4e214
--- /dev/null
+++ b/lib/Target/X86/X86JITInfo.h
@@ -0,0 +1,84 @@
+//===- X86JITInfo.h - X86 implementation of the JIT interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86JITINFO_H
+#define X86JITINFO_H
+
+#include "llvm/Function.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Target/TargetJITInfo.h"
+
+namespace llvm {
+ class X86TargetMachine;
+
+ class X86JITInfo : public TargetJITInfo {
+ X86TargetMachine &TM;
+ uintptr_t PICBase;
+ char* TLSOffset;
+ public:
+ explicit X86JITInfo(X86TargetMachine &tm) : TM(tm) {
+ useGOT = 0;
+ TLSOffset = 0;
+ }
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+ /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object
+ /// to emit an indirect symbol which contains the address of the specified
+ /// ptr.
+ virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+ JITCodeEmitter &JCE);
+
+ /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
+ /// small native function that simply calls the function at the specified
+ /// address.
+ virtual void *emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE);
+
+ /// emitFunctionStubAtAddr - Use the specified JITCodeEmitter object to
+ /// emit a small native function that simply calls Fn. Emit the stub into
+ /// the supplied buffer.
+ virtual void emitFunctionStubAtAddr(const Function* F, void *Fn,
+ void *Buffer, JITCodeEmitter &JCE);
+
+ /// getPICJumpTableEntry - Returns the value of the jumptable entry for the
+ /// specific basic block.
+ virtual uintptr_t getPICJumpTableEntry(uintptr_t BB, uintptr_t JTBase);
+
+ /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+ /// relocate - Before the JIT can run a block of code that has been emitted,
+ /// it must rewrite the code to contain the actual addresses of any
+ /// referenced global symbols.
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+
+ /// allocateThreadLocalMemory - Each target has its own way of
+ /// handling thread local variables. This method returns a value only
+ /// meaningful to the target.
+ virtual char* allocateThreadLocalMemory(size_t size);
+
+ /// setPICBase / getPICBase - Getter / setter of PICBase, used to compute
+ /// PIC jumptable entry.
+ void setPICBase(uintptr_t Base) { PICBase = Base; }
+ uintptr_t getPICBase() const { return PICBase; }
+ };
+}
+
+#endif
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
new file mode 100644
index 0000000..8a5ac2c
--- /dev/null
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -0,0 +1,112 @@
+//====- X86MachineFuctionInfo.h - X86 machine function info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares X86-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86MACHINEFUNCTIONINFO_H
+#define X86MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+enum NameDecorationStyle {
+ None,
+ StdCall,
+ FastCall
+};
+
+/// X86MachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private X86 target-specific information for each MachineFunction.
+class X86MachineFunctionInfo : public MachineFunctionInfo {
+ /// ForceFramePointer - True if the function is required to use of frame
+ /// pointer for reasons other than it containing dynamic allocation or
+ /// that FP eliminatation is turned off. For example, Cygwin main function
+ /// contains stack pointer re-alignment code which requires FP.
+ bool ForceFramePointer;
+
+ /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
+ /// stack frame in bytes.
+ unsigned CalleeSavedFrameSize;
+
+ /// BytesToPopOnReturn - Number of bytes function pops on return.
+ /// Used on windows platform for stdcall & fastcall name decoration
+ unsigned BytesToPopOnReturn;
+
+ /// DecorationStyle - If the function requires additional name decoration,
+ /// DecorationStyle holds the right way to do so.
+ NameDecorationStyle DecorationStyle;
+
+ /// ReturnAddrIndex - FrameIndex for return slot.
+ int ReturnAddrIndex;
+
+ /// TailCallReturnAddrDelta - Delta the ReturnAddr stack slot is moved
+ /// Used for creating an area before the register spill area on the stack
+ /// the returnaddr can be savely move to this area
+ int TailCallReturnAddrDelta;
+
+ /// SRetReturnReg - Some subtargets require that sret lowering includes
+ /// returning the value of the returned struct in a register. This field
+ /// holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+
+ /// GlobalBaseReg - keeps track of the virtual register initialized for
+ /// use as the global base register. This is used for PIC in some PIC
+ /// relocation models.
+ unsigned GlobalBaseReg;
+
+public:
+ X86MachineFunctionInfo() : ForceFramePointer(false),
+ CalleeSavedFrameSize(0),
+ BytesToPopOnReturn(0),
+ DecorationStyle(None),
+ ReturnAddrIndex(0),
+ TailCallReturnAddrDelta(0),
+ SRetReturnReg(0),
+ GlobalBaseReg(0) {}
+
+ X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false),
+ CalleeSavedFrameSize(0),
+ BytesToPopOnReturn(0),
+ DecorationStyle(None),
+ ReturnAddrIndex(0),
+ TailCallReturnAddrDelta(0),
+ SRetReturnReg(0),
+ GlobalBaseReg(0) {}
+
+ bool getForceFramePointer() const { return ForceFramePointer;}
+ void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
+
+ unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+ void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+
+ unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
+ void setBytesToPopOnReturn (unsigned bytes) { BytesToPopOnReturn = bytes;}
+
+ NameDecorationStyle getDecorationStyle() const { return DecorationStyle; }
+ void setDecorationStyle(NameDecorationStyle style) { DecorationStyle = style;}
+
+ int getRAIndex() const { return ReturnAddrIndex; }
+ void setRAIndex(int Index) { ReturnAddrIndex = Index; }
+
+ int getTCReturnAddrDelta() const { return TailCallReturnAddrDelta; }
+ void setTCReturnAddrDelta(int delta) {TailCallReturnAddrDelta = delta;}
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+ unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
new file mode 100644
index 0000000..5af1fb1
--- /dev/null
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -0,0 +1,1280 @@
+//===- X86RegisterInfo.cpp - X86 Register Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetRegisterInfo class.
+// This file is responsible for the frame pointer elimination optimization
+// on X86.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86RegisterInfo.h"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Compiler.h"
+using namespace llvm;
+
+X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
+ const TargetInstrInfo &tii)
+ : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() ?
+ X86::ADJCALLSTACKDOWN64 :
+ X86::ADJCALLSTACKDOWN32,
+ tm.getSubtarget<X86Subtarget>().is64Bit() ?
+ X86::ADJCALLSTACKUP64 :
+ X86::ADJCALLSTACKUP32),
+ TM(tm), TII(tii) {
+ // Cache some information.
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ Is64Bit = Subtarget->is64Bit();
+ IsWin64 = Subtarget->isTargetWin64();
+ StackAlign = TM.getFrameInfo()->getStackAlignment();
+ if (Is64Bit) {
+ SlotSize = 8;
+ StackPtr = X86::RSP;
+ FramePtr = X86::RBP;
+ } else {
+ SlotSize = 4;
+ StackPtr = X86::ESP;
+ FramePtr = X86::EBP;
+ }
+}
+
+// getDwarfRegNum - This function maps LLVM register identifiers to the
+// Dwarf specific numbering, used in debug info and exception tables.
+
+int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ unsigned Flavour = DWARFFlavour::X86_64;
+ if (!Subtarget->is64Bit()) {
+ if (Subtarget->isTargetDarwin()) {
+ if (isEH)
+ Flavour = DWARFFlavour::X86_32_DarwinEH;
+ else
+ Flavour = DWARFFlavour::X86_32_Generic;
+ } else if (Subtarget->isTargetCygMing()) {
+ // Unsupported by now, just quick fallback
+ Flavour = DWARFFlavour::X86_32_Generic;
+ } else {
+ Flavour = DWARFFlavour::X86_32_Generic;
+ }
+ }
+
+ return X86GenRegisterInfo::getDwarfRegNumFull(RegNo, Flavour);
+}
+
+// getX86RegNum - This function maps LLVM register identifiers to their X86
+// specific numbering, which is used in various places encoding instructions.
+//
+unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
+ switch(RegNo) {
+ case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
+ case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
+ case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
+ case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
+ case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH:
+ return N86::ESP;
+ case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH:
+ return N86::EBP;
+ case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH:
+ return N86::ESI;
+ case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH:
+ return N86::EDI;
+
+ case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
+ return N86::EAX;
+ case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
+ return N86::ECX;
+ case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
+ return N86::EDX;
+ case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
+ return N86::EBX;
+ case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
+ return N86::ESP;
+ case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
+ return N86::EBP;
+ case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
+ return N86::ESI;
+ case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
+ return N86::EDI;
+
+ case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3:
+ case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
+ return RegNo-X86::ST0;
+
+ case X86::XMM0: case X86::XMM8: case X86::MM0:
+ return 0;
+ case X86::XMM1: case X86::XMM9: case X86::MM1:
+ return 1;
+ case X86::XMM2: case X86::XMM10: case X86::MM2:
+ return 2;
+ case X86::XMM3: case X86::XMM11: case X86::MM3:
+ return 3;
+ case X86::XMM4: case X86::XMM12: case X86::MM4:
+ return 4;
+ case X86::XMM5: case X86::XMM13: case X86::MM5:
+ return 5;
+ case X86::XMM6: case X86::XMM14: case X86::MM6:
+ return 6;
+ case X86::XMM7: case X86::XMM15: case X86::MM7:
+ return 7;
+
+ default:
+ assert(isVirtualRegister(RegNo) && "Unknown physical register!");
+ assert(0 && "Register allocator hasn't allocated reg correctly yet!");
+ return 0;
+ }
+}
+
+const TargetRegisterClass *X86RegisterInfo::getPointerRegClass() const {
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ if (Subtarget->is64Bit())
+ return &X86::GR64RegClass;
+ else
+ return &X86::GR32RegClass;
+}
+
+const TargetRegisterClass *
+X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+ if (RC == &X86::CCRRegClass) {
+ if (Is64Bit)
+ return &X86::GR64RegClass;
+ else
+ return &X86::GR32RegClass;
+ }
+ return NULL;
+}
+
+const unsigned *
+X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ bool callsEHReturn = false;
+
+ if (MF) {
+ const MachineFrameInfo *MFI = MF->getFrameInfo();
+ const MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+ callsEHReturn = (MMI ? MMI->callsEHReturn() : false);
+ }
+
+ static const unsigned CalleeSavedRegs32Bit[] = {
+ X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0
+ };
+
+ static const unsigned CalleeSavedRegs32EHRet[] = {
+ X86::EAX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0
+ };
+
+ static const unsigned CalleeSavedRegs64Bit[] = {
+ X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+
+ static const unsigned CalleeSavedRegs64EHRet[] = {
+ X86::RAX, X86::RDX, X86::RBX, X86::R12,
+ X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+
+ static const unsigned CalleeSavedRegsWin64[] = {
+ X86::RBX, X86::RBP, X86::RDI, X86::RSI,
+ X86::R12, X86::R13, X86::R14, X86::R15,
+ X86::XMM6, X86::XMM7, X86::XMM8, X86::XMM9,
+ X86::XMM10, X86::XMM11, X86::XMM12, X86::XMM13,
+ X86::XMM14, X86::XMM15, 0
+ };
+
+ if (Is64Bit) {
+ if (IsWin64)
+ return CalleeSavedRegsWin64;
+ else
+ return (callsEHReturn ? CalleeSavedRegs64EHRet : CalleeSavedRegs64Bit);
+ } else {
+ return (callsEHReturn ? CalleeSavedRegs32EHRet : CalleeSavedRegs32Bit);
+ }
+}
+
+const TargetRegisterClass* const*
+X86RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ bool callsEHReturn = false;
+
+ if (MF) {
+ const MachineFrameInfo *MFI = MF->getFrameInfo();
+ const MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+ callsEHReturn = (MMI ? MMI->callsEHReturn() : false);
+ }
+
+ static const TargetRegisterClass * const CalleeSavedRegClasses32Bit[] = {
+ &X86::GR32RegClass, &X86::GR32RegClass,
+ &X86::GR32RegClass, &X86::GR32RegClass, 0
+ };
+ static const TargetRegisterClass * const CalleeSavedRegClasses32EHRet[] = {
+ &X86::GR32RegClass, &X86::GR32RegClass,
+ &X86::GR32RegClass, &X86::GR32RegClass,
+ &X86::GR32RegClass, &X86::GR32RegClass, 0
+ };
+ static const TargetRegisterClass * const CalleeSavedRegClasses64Bit[] = {
+ &X86::GR64RegClass, &X86::GR64RegClass,
+ &X86::GR64RegClass, &X86::GR64RegClass,
+ &X86::GR64RegClass, &X86::GR64RegClass, 0
+ };
+ static const TargetRegisterClass * const CalleeSavedRegClasses64EHRet[] = {
+ &X86::GR64RegClass, &X86::GR64RegClass,
+ &X86::GR64RegClass, &X86::GR64RegClass,
+ &X86::GR64RegClass, &X86::GR64RegClass,
+ &X86::GR64RegClass, &X86::GR64RegClass, 0
+ };
+ static const TargetRegisterClass * const CalleeSavedRegClassesWin64[] = {
+ &X86::GR64RegClass, &X86::GR64RegClass,
+ &X86::GR64RegClass, &X86::GR64RegClass,
+ &X86::GR64RegClass, &X86::GR64RegClass,
+ &X86::GR64RegClass, &X86::GR64RegClass,
+ &X86::VR128RegClass, &X86::VR128RegClass,
+ &X86::VR128RegClass, &X86::VR128RegClass,
+ &X86::VR128RegClass, &X86::VR128RegClass,
+ &X86::VR128RegClass, &X86::VR128RegClass,
+ &X86::VR128RegClass, &X86::VR128RegClass, 0
+ };
+
+ if (Is64Bit) {
+ if (IsWin64)
+ return CalleeSavedRegClassesWin64;
+ else
+ return (callsEHReturn ?
+ CalleeSavedRegClasses64EHRet : CalleeSavedRegClasses64Bit);
+ } else {
+ return (callsEHReturn ?
+ CalleeSavedRegClasses32EHRet : CalleeSavedRegClasses32Bit);
+ }
+}
+
+BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ // Set the stack-pointer register and its aliases as reserved.
+ Reserved.set(X86::RSP);
+ Reserved.set(X86::ESP);
+ Reserved.set(X86::SP);
+ Reserved.set(X86::SPL);
+ // Set the frame-pointer register and its aliases as reserved if needed.
+ if (hasFP(MF)) {
+ Reserved.set(X86::RBP);
+ Reserved.set(X86::EBP);
+ Reserved.set(X86::BP);
+ Reserved.set(X86::BPL);
+ }
+ // Mark the x87 stack registers as reserved, since they don't
+ // behave normally with respect to liveness. We don't fully
+ // model the effects of x87 stack pushes and pops after
+ // stackification.
+ Reserved.set(X86::ST0);
+ Reserved.set(X86::ST1);
+ Reserved.set(X86::ST2);
+ Reserved.set(X86::ST3);
+ Reserved.set(X86::ST4);
+ Reserved.set(X86::ST5);
+ Reserved.set(X86::ST6);
+ Reserved.set(X86::ST7);
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) {
+ unsigned MaxAlign = 0;
+ for (int i = FFI->getObjectIndexBegin(),
+ e = FFI->getObjectIndexEnd(); i != e; ++i) {
+ if (FFI->isDeadObjectIndex(i))
+ continue;
+ unsigned Align = FFI->getObjectAlignment(i);
+ MaxAlign = std::max(MaxAlign, Align);
+ }
+
+ return MaxAlign;
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+//
+bool X86RegisterInfo::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+
+ return (NoFramePointerElim ||
+ needsStackRealignment(MF) ||
+ MFI->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken() ||
+ MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
+ (MMI && MMI->callsUnwindInit()));
+}
+
+bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // FIXME: Currently we don't support stack realignment for functions with
+ // variable-sized allocas
+ return (RealignStack &&
+ (MFI->getMaxAlignment() > StackAlign &&
+ !MFI->hasVarSizedObjects()));
+}
+
+bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+int
+X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const {
+ int Offset = MF.getFrameInfo()->getObjectOffset(FI) + SlotSize;
+ uint64_t StackSize = MF.getFrameInfo()->getStackSize();
+
+ if (needsStackRealignment(MF)) {
+ if (FI < 0)
+ // Skip the saved EBP
+ Offset += SlotSize;
+ else {
+ unsigned Align = MF.getFrameInfo()->getObjectAlignment(FI);
+ assert( (-(Offset + StackSize)) % Align == 0);
+ Align = 0;
+ return Offset + StackSize;
+ }
+
+ // FIXME: Support tail calls
+ } else {
+ if (!hasFP(MF))
+ return Offset + StackSize;
+
+ // Skip the saved EBP
+ Offset += SlotSize;
+
+ // Skip the RETADDR move area
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+ if (TailCallReturnAddrDelta < 0) Offset -= TailCallReturnAddrDelta;
+ }
+
+ return Offset;
+}
+
+void X86RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (!hasReservedCallFrame(MF)) {
+ // If the stack pointer can be changed after prologue, turn the
+ // adjcallstackup instruction into a 'sub ESP, <amt>' and the
+ // adjcallstackdown instruction into 'add ESP, <amt>'
+ // TODO: consider using push / pop instead of sub + store / add
+ MachineInstr *Old = I;
+ uint64_t Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ Amount = (Amount+StackAlign-1)/StackAlign*StackAlign;
+
+ MachineInstr *New = 0;
+ if (Old->getOpcode() == getCallFrameSetupOpcode()) {
+ New = BuildMI(MF, Old->getDebugLoc(),
+ TII.get(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri),
+ StackPtr).addReg(StackPtr).addImm(Amount);
+ } else {
+ assert(Old->getOpcode() == getCallFrameDestroyOpcode());
+ // factor out the amount the callee already popped.
+ uint64_t CalleeAmt = Old->getOperand(1).getImm();
+ Amount -= CalleeAmt;
+ if (Amount) {
+ unsigned Opc = (Amount < 128) ?
+ (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) :
+ (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri);
+ New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr)
+ .addReg(StackPtr).addImm(Amount);
+ }
+ }
+
+ if (New) {
+ // The EFLAGS implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+ } else if (I->getOpcode() == getCallFrameDestroyOpcode()) {
+ // If we are performing frame pointer elimination and if the callee pops
+ // something off the stack pointer, add it back. We do this until we have
+ // more advanced stack pointer tracking ability.
+ if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
+ unsigned Opc = (CalleeAmt < 128) ?
+ (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) :
+ (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri);
+ MachineInstr *Old = I;
+ MachineInstr *New =
+ BuildMI(MF, Old->getDebugLoc(), TII.get(Opc),
+ StackPtr).addReg(StackPtr).addImm(CalleeAmt);
+ // The EFLAGS implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ MBB.insert(I, New);
+ }
+ }
+
+ MBB.erase(I);
+}
+
+void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const{
+ assert(SPAdj == 0 && "Unexpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+
+ unsigned BasePtr;
+ if (needsStackRealignment(MF))
+ BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
+ else
+ BasePtr = (hasFP(MF) ? FramePtr : StackPtr);
+
+ // This must be part of a four operand memory reference. Replace the
+ // FrameIndex with base register with EBP. Add an offset to the offset.
+ MI.getOperand(i).ChangeToRegister(BasePtr, false);
+
+ // Now add the frame object offset to the offset from EBP.
+ if (MI.getOperand(i+3).isImm()) {
+ // Offset is a 32-bit integer.
+ int Offset = getFrameIndexOffset(MF, FrameIndex) +
+ (int)(MI.getOperand(i+3).getImm());
+
+ MI.getOperand(i+3).ChangeToImmediate(Offset);
+ } else {
+ // Offset is symbolic. This is extremely rare.
+ uint64_t Offset = getFrameIndexOffset(MF, FrameIndex) +
+ (uint64_t)MI.getOperand(i+3).getOffset();
+ MI.getOperand(i+3).setOffset(Offset);
+ }
+}
+
+void
+X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+
+ // Calculate and set max stack object alignment early, so we can decide
+ // whether we will need stack realignment (and thus FP).
+ unsigned MaxAlign = std::max(FFI->getMaxAlignment(),
+ calculateMaxStackAlignment(FFI));
+
+ FFI->setMaxAlignment(MaxAlign);
+}
+
+void
+X86RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const{
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+ if (TailCallReturnAddrDelta < 0) {
+ // create RETURNADDR area
+ // arg
+ // arg
+ // RETADDR
+ // { ...
+ // RETADDR area
+ // ...
+ // }
+ // [EBP]
+ MF.getFrameInfo()->
+ CreateFixedObject(-TailCallReturnAddrDelta,
+ (-1*SlotSize)+TailCallReturnAddrDelta);
+ }
+ if (hasFP(MF)) {
+ assert((TailCallReturnAddrDelta <= 0) &&
+ "The Delta should always be zero or negative");
+ // Create a frame entry for the EBP register that must be saved.
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize,
+ (int)SlotSize * -2+
+ TailCallReturnAddrDelta);
+ assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
+ "Slot for EBP register must be last in order to be found!");
+ FrameIdx = 0;
+ }
+}
+
+/// emitSPUpdate - Emit a series of instructions to increment / decrement the
+/// stack pointer by a constant value.
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr, int64_t NumBytes, bool Is64Bit,
+ const TargetInstrInfo &TII) {
+ bool isSub = NumBytes < 0;
+ uint64_t Offset = isSub ? -NumBytes : NumBytes;
+ unsigned Opc = isSub
+ ? ((Offset < 128) ?
+ (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) :
+ (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri))
+ : ((Offset < 128) ?
+ (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) :
+ (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri));
+ uint64_t Chunk = (1LL << 31) - 1;
+ DebugLoc DL = (MBBI != MBB.end() ? MBBI->getDebugLoc() :
+ DebugLoc::getUnknownLoc());
+
+ while (Offset) {
+ uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr).addImm(ThisVal);
+ // The EFLAGS implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ Offset -= ThisVal;
+ }
+}
+
+// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
+static
+void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr, uint64_t *NumBytes = NULL) {
+ if (MBBI == MBB.begin()) return;
+
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ unsigned Opc = PI->getOpcode();
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes += PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes -= PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ }
+}
+
+// mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator.
+static
+void mergeSPUpdatesDown(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr, uint64_t *NumBytes = NULL) {
+ return;
+
+ if (MBBI == MBB.end()) return;
+
+ MachineBasicBlock::iterator NI = next(MBBI);
+ if (NI == MBB.end()) return;
+
+ unsigned Opc = NI->getOpcode();
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ NI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes -= NI->getOperand(2).getImm();
+ MBB.erase(NI);
+ MBBI = NI;
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ NI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes += NI->getOperand(2).getImm();
+ MBB.erase(NI);
+ MBBI = NI;
+ }
+}
+
+/// mergeSPUpdates - Checks the instruction before/after the passed
+/// instruction. If it is an ADD/SUB instruction it is deleted
+/// argument and the stack adjustment is returned as a positive value for ADD
+/// and a negative for SUB.
+static int mergeSPUpdates(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr,
+ bool doMergeWithPrevious) {
+
+ if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
+ (!doMergeWithPrevious && MBBI == MBB.end()))
+ return 0;
+
+ int Offset = 0;
+
+ MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
+ MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : next(MBBI);
+ unsigned Opc = PI->getOpcode();
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr){
+ Offset += PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ if (!doMergeWithPrevious) MBBI = NI;
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ Offset -= PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ if (!doMergeWithPrevious) MBBI = NI;
+ }
+
+ return Offset;
+}
+
+void X86RegisterInfo::emitFrameMoves(MachineFunction &MF,
+ unsigned FrameLabelId,
+ unsigned ReadyLabelId) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+ if (!MMI)
+ return;
+
+ uint64_t StackSize = MFI->getStackSize();
+ std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+ const TargetData *TD = MF.getTarget().getTargetData();
+
+ // Calculate amount of bytes used for return address storing
+ int stackGrowth =
+ (MF.getTarget().getFrameInfo()->getStackGrowthDirection() ==
+ TargetFrameInfo::StackGrowsUp ?
+ TD->getPointerSize() : -TD->getPointerSize());
+
+ if (StackSize) {
+ // Show update of SP.
+ if (hasFP(MF)) {
+ // Adjust SP
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, 2*stackGrowth);
+ Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+ } else {
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP,
+ -StackSize+stackGrowth);
+ Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+ }
+ } else {
+ //FIXME: Verify & implement for FP
+ MachineLocation SPDst(StackPtr);
+ MachineLocation SPSrc(StackPtr, stackGrowth);
+ Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+ }
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ // FIXME: This is dirty hack. The code itself is pretty mess right now.
+ // It should be rewritten from scratch and generalized sometimes.
+
+ // Determine maximum offset (minumum due to stack growth)
+ int64_t MaxOffset = 0;
+ for (unsigned I = 0, E = CSI.size(); I!=E; ++I)
+ MaxOffset = std::min(MaxOffset,
+ MFI->getObjectOffset(CSI[I].getFrameIdx()));
+
+ // Calculate offsets
+ int64_t saveAreaOffset = (hasFP(MF) ? 3 : 2)*stackGrowth;
+ for (unsigned I = 0, E = CSI.size(); I!=E; ++I) {
+ int64_t Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+ unsigned Reg = CSI[I].getReg();
+ Offset = (MaxOffset-Offset+saveAreaOffset);
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc));
+ }
+
+ if (hasFP(MF)) {
+ // Save FP
+ MachineLocation FPDst(MachineLocation::VirtualFP, 2*stackGrowth);
+ MachineLocation FPSrc(FramePtr);
+ Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
+ }
+
+ MachineLocation FPDst(hasFP(MF) ? FramePtr : StackPtr);
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
+}
+
+
+void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function* Fn = MF.getFunction();
+ const X86Subtarget* Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>();
+ MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) ||
+ !Fn->doesNotThrow() ||
+ UnwindTablesMandatory;
+ DebugLoc DL = (MBBI != MBB.end() ? MBBI->getDebugLoc() :
+ DebugLoc::getUnknownLoc());
+
+ // Prepare for frame info.
+ unsigned FrameLabelId = 0;
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ uint64_t StackSize = MFI->getStackSize();
+
+ // Get desired stack alignment
+ uint64_t MaxAlign = MFI->getMaxAlignment();
+
+ // Add RETADDR move area to callee saved frame size.
+ int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+ if (TailCallReturnAddrDelta < 0)
+ X86FI->setCalleeSavedFrameSize(
+ X86FI->getCalleeSavedFrameSize() +(-TailCallReturnAddrDelta));
+
+ // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
+ // function, and use up to 128 bytes of stack space, don't have a frame
+ // pointer, calls, or dynamic alloca then we do not need to adjust the
+ // stack pointer (we fit in the Red Zone).
+ if (Is64Bit && !DisableRedZone &&
+ !needsStackRealignment(MF) &&
+ !MFI->hasVarSizedObjects() && // No dynamic alloca.
+ !MFI->hasCalls()) { // No calls.
+ uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
+ if (hasFP(MF)) MinSize += SlotSize;
+ StackSize = std::max(MinSize,
+ StackSize > 128 ? StackSize - 128 : 0);
+ MFI->setStackSize(StackSize);
+ }
+
+ // Insert stack pointer adjustment for later moving of return addr. Only
+ // applies to tail call optimized functions where the callee argument stack
+ // size is bigger than the callers.
+ if (TailCallReturnAddrDelta < 0) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(Is64Bit? X86::SUB64ri32 : X86::SUB32ri),
+ StackPtr).addReg(StackPtr).addImm(-TailCallReturnAddrDelta);
+ // The EFLAGS implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+
+ uint64_t NumBytes = 0;
+ if (hasFP(MF)) {
+ // Calculate required stack adjustment
+ uint64_t FrameSize = StackSize - SlotSize;
+ if (needsStackRealignment(MF))
+ FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
+
+ NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
+
+ // Get the offset of the stack slot for the EBP register... which is
+ // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
+ // Update the frame offset adjustment.
+ MFI->setOffsetAdjustment(-NumBytes);
+
+ // Save EBP into the appropriate stack slot...
+ BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
+ .addReg(FramePtr, RegState::Kill);
+
+ if (needsFrameMoves) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ FrameLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(FrameLabelId);
+ }
+
+ // Update EBP with the new base value...
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
+ .addReg(StackPtr);
+
+ // Mark the FramePtr as live-in in every block except the entry.
+ for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
+ I != E; ++I)
+ I->addLiveIn(FramePtr);
+
+ // Realign stack
+ if (needsStackRealignment(MF)) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri),
+ StackPtr).addReg(StackPtr).addImm(-MaxAlign);
+ // The EFLAGS implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+ } else {
+ NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
+ }
+
+ unsigned ReadyLabelId = 0;
+ if (needsFrameMoves) {
+ // Mark effective beginning of when frame pointer is ready.
+ ReadyLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
+ }
+
+ // Skip the callee-saved push instructions.
+ while (MBBI != MBB.end() &&
+ (MBBI->getOpcode() == X86::PUSH32r ||
+ MBBI->getOpcode() == X86::PUSH64r))
+ ++MBBI;
+
+ if (MBBI != MBB.end())
+ DL = MBBI->getDebugLoc();
+
+ if (NumBytes) { // adjust stack pointer: ESP -= numbytes
+ if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) {
+ // Check, whether EAX is livein for this function
+ bool isEAXAlive = false;
+ for (MachineRegisterInfo::livein_iterator
+ II = MF.getRegInfo().livein_begin(),
+ EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) {
+ unsigned Reg = II->first;
+ isEAXAlive = (Reg == X86::EAX || Reg == X86::AX ||
+ Reg == X86::AH || Reg == X86::AL);
+ }
+
+ // Function prologue calls _alloca to probe the stack when allocating
+ // more than 4k bytes in one go. Touching the stack at 4K increments is
+ // necessary to ensure that the guard pages used by the OS virtual memory
+ // manager are allocated in correct sequence.
+ if (!isEAXAlive) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+ .addImm(NumBytes);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
+ .addExternalSymbol("_alloca");
+ } else {
+ // Save EAX
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
+ .addReg(X86::EAX, RegState::Kill);
+ // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
+ // allocated bytes for EAX.
+ BuildMI(MBB, MBBI, DL,
+ TII.get(X86::MOV32ri), X86::EAX).addImm(NumBytes-4);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
+ .addExternalSymbol("_alloca");
+ // Restore EAX
+ MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
+ X86::EAX),
+ StackPtr, false, NumBytes-4);
+ MBB.insert(MBBI, MI);
+ }
+ } else {
+ // If there is an SUB32ri of ESP immediately before this instruction,
+ // merge the two. This can be the case when tail call elimination is
+ // enabled and the callee has more arguments then the caller.
+ NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
+ // If there is an ADD32ri or SUB32ri of ESP immediately after this
+ // instruction, merge the two instructions.
+ mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
+
+ if (NumBytes)
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
+ }
+ }
+
+ if (needsFrameMoves)
+ emitFrameMoves(MF, FrameLabelId, ReadyLabelId);
+}
+
+void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc DL = MBBI->getDebugLoc();
+
+ switch (RetOpcode) {
+ case X86::RET:
+ case X86::RETI:
+ case X86::TCRETURNdi:
+ case X86::TCRETURNri:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNdi64:
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64:
+ case X86::TAILJMPd:
+ case X86::TAILJMPr:
+ case X86::TAILJMPm: break; // These are ok
+ default:
+ assert(0 && "Can only insert epilog into returning blocks");
+ }
+
+ // Get the number of bytes to allocate from the FrameInfo
+ uint64_t StackSize = MFI->getStackSize();
+ uint64_t MaxAlign = MFI->getMaxAlignment();
+ unsigned CSSize = X86FI->getCalleeSavedFrameSize();
+ uint64_t NumBytes = 0;
+
+ if (hasFP(MF)) {
+ // Calculate required stack adjustment
+ uint64_t FrameSize = StackSize - SlotSize;
+ if (needsStackRealignment(MF))
+ FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
+
+ NumBytes = FrameSize - CSSize;
+
+ // pop EBP.
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
+ } else {
+ NumBytes = StackSize - CSSize;
+ }
+
+ // Skip the callee-saved pop instructions.
+ MachineBasicBlock::iterator LastCSPop = MBBI;
+ while (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ unsigned Opc = PI->getOpcode();
+ if (Opc != X86::POP32r && Opc != X86::POP64r &&
+ !PI->getDesc().isTerminator())
+ break;
+ --MBBI;
+ }
+
+ DL = MBBI->getDebugLoc();
+
+ // If there is an ADD32ri or SUB32ri of ESP immediately before this
+ // instruction, merge the two instructions.
+ if (NumBytes || MFI->hasVarSizedObjects())
+ mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
+
+ // If dynamic alloca is used, then reset esp to point to the last callee-saved
+ // slot before popping them off! Same applies for the case, when stack was
+ // realigned
+ if (needsStackRealignment(MF)) {
+ // We cannot use LEA here, because stack pointer was realigned. We need to
+ // deallocate local frame back
+ if (CSSize) {
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
+ MBBI = prior(LastCSPop);
+ }
+
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+ StackPtr).addReg(FramePtr);
+ } else if (MFI->hasVarSizedObjects()) {
+ if (CSSize) {
+ unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
+ MachineInstr *MI = addLeaRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
+ FramePtr, false, -CSSize);
+ MBB.insert(MBBI, MI);
+ } else
+ BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+ StackPtr).addReg(FramePtr);
+
+ } else {
+ // adjust stack pointer back: ESP += numbytes
+ if (NumBytes)
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
+ }
+
+ // We're returning from function via eh_return.
+ if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
+ MBBI = prior(MBB.end());
+ MachineOperand &DestAddr = MBBI->getOperand(0);
+ assert(DestAddr.isReg() && "Offset should be in register!");
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+ StackPtr).addReg(DestAddr.getReg());
+ // Tail call return: adjust the stack pointer and jump to callee
+ } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
+ RetOpcode== X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64) {
+ MBBI = prior(MBB.end());
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ MachineOperand &StackAdjust = MBBI->getOperand(1);
+ assert(StackAdjust.isImm() && "Expecting immediate value.");
+
+ // Adjust stack pointer.
+ int StackAdj = StackAdjust.getImm();
+ int MaxTCDelta = X86FI->getTCReturnAddrDelta();
+ int Offset = 0;
+ assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
+ // Incoporate the retaddr area.
+ Offset = StackAdj-MaxTCDelta;
+ assert(Offset >= 0 && "Offset should never be negative");
+
+ if (Offset) {
+ // Check for possible merge with preceeding ADD instruction.
+ Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
+ emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII);
+ }
+
+ // Jump to label or value in register.
+ if (RetOpcode == X86::TCRETURNdi|| RetOpcode == X86::TCRETURNdi64)
+ BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPd)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ else if (RetOpcode== X86::TCRETURNri64)
+ BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64), JumpTarget.getReg());
+ else
+ BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr), JumpTarget.getReg());
+
+ // Delete the pseudo instruction TCRETURN.
+ MBB.erase(MBBI);
+ } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) &&
+ (X86FI->getTCReturnAddrDelta() < 0)) {
+ // Add the return addr area delta back since we are not tail calling.
+ int delta = -1*X86FI->getTCReturnAddrDelta();
+ MBBI = prior(MBB.end());
+ // Check for possible merge with preceeding ADD instruction.
+ delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
+ emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII);
+ }
+}
+
+unsigned X86RegisterInfo::getRARegister() const {
+ if (Is64Bit)
+ return X86::RIP; // Should have dwarf #16
+ else
+ return X86::EIP; // Should have dwarf #8
+}
+
+unsigned X86RegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ return hasFP(MF) ? FramePtr : StackPtr;
+}
+
+void X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves)
+ const {
+ // Calculate amount of bytes used for return address storing
+ int stackGrowth = (Is64Bit ? -8 : -4);
+
+ // Initial state of the frame pointer is esp+4.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(StackPtr, stackGrowth);
+ Moves.push_back(MachineMove(0, Dst, Src));
+
+ // Add return address to move list
+ MachineLocation CSDst(StackPtr, stackGrowth);
+ MachineLocation CSSrc(getRARegister());
+ Moves.push_back(MachineMove(0, CSDst, CSSrc));
+}
+
+unsigned X86RegisterInfo::getEHExceptionRegister() const {
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned X86RegisterInfo::getEHHandlerRegister() const {
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
+
+namespace llvm {
+unsigned getX86SubSuperRegister(unsigned Reg, MVT VT, bool High) {
+ switch (VT.getSimpleVT()) {
+ default: return Reg;
+ case MVT::i8:
+ if (High) {
+ switch (Reg) {
+ default: return 0;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AH;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DH;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CH;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BH;
+ }
+ } else {
+ switch (Reg) {
+ default: return 0;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AL;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DL;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CL;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BL;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SIL;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DIL;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BPL;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SPL;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8B;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9B;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10B;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11B;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12B;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13B;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14B;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15B;
+ }
+ }
+ case MVT::i16:
+ switch (Reg) {
+ default: return Reg;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8W;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9W;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10W;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11W;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12W;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13W;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14W;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15W;
+ }
+ case MVT::i32:
+ switch (Reg) {
+ default: return Reg;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::EAX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::EDX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::ECX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::EBX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::ESI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::EDI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::EBP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::ESP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8D;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9D;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10D;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11D;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12D;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13D;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14D;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15D;
+ }
+ case MVT::i64:
+ switch (Reg) {
+ default: return Reg;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::RAX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::RDX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::RCX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::RBX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::RSI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::RDI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::RBP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::RSP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15;
+ }
+ }
+
+ return Reg;
+}
+}
+
+#include "X86GenRegisterInfo.inc"
+
+namespace {
+ struct VISIBILITY_HIDDEN MSAC : public MachineFunctionPass {
+ static char ID;
+ MSAC() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ MachineRegisterInfo &RI = MF.getRegInfo();
+
+ // Calculate max stack alignment of all already allocated stack objects.
+ unsigned MaxAlign = calculateMaxStackAlignment(FFI);
+
+ // Be over-conservative: scan over all vreg defs and find, whether vector
+ // registers are used. If yes - there is probability, that vector register
+ // will be spilled and thus stack needs to be aligned properly.
+ for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
+ RegNum < RI.getLastVirtReg(); ++RegNum)
+ MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment());
+
+ FFI->setMaxAlignment(MaxAlign);
+
+ return false;
+ }
+
+ virtual const char *getPassName() const {
+ return "X86 Maximal Stack Alignment Calculator";
+ }
+ };
+
+ char MSAC::ID = 0;
+}
+
+FunctionPass*
+llvm::createX86MaxStackAlignmentCalculatorPass() { return new MSAC(); }
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
new file mode 100644
index 0000000..33b9f5e
--- /dev/null
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -0,0 +1,163 @@
+//===- X86RegisterInfo.h - X86 Register Information Impl --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86REGISTERINFO_H
+#define X86REGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "X86GenRegisterInfo.h.inc"
+
+namespace llvm {
+ class Type;
+ class TargetInstrInfo;
+ class X86TargetMachine;
+
+/// N86 namespace - Native X86 register numbers
+///
+namespace N86 {
+ enum {
+ EAX = 0, ECX = 1, EDX = 2, EBX = 3, ESP = 4, EBP = 5, ESI = 6, EDI = 7
+ };
+}
+
+namespace X86 {
+ /// SubregIndex - The index of various sized subregister classes. Note that
+ /// these indices must be kept in sync with the class indices in the
+ /// X86RegisterInfo.td file.
+ enum SubregIndex {
+ SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4
+ };
+}
+
+/// DWARFFlavour - Flavour of dwarf regnumbers
+///
+namespace DWARFFlavour {
+ enum {
+ X86_64 = 0, X86_32_DarwinEH = 1, X86_32_Generic = 2
+ };
+}
+
+class X86RegisterInfo : public X86GenRegisterInfo {
+public:
+ X86TargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+private:
+ /// Is64Bit - Is the target 64-bits.
+ ///
+ bool Is64Bit;
+
+ /// IsWin64 - Is the target on of win64 flavours
+ ///
+ bool IsWin64;
+
+ /// SlotSize - Stack slot size in bytes.
+ ///
+ unsigned SlotSize;
+
+ /// StackAlign - Default stack alignment.
+ ///
+ unsigned StackAlign;
+
+ /// StackPtr - X86 physical register used as stack ptr.
+ ///
+ unsigned StackPtr;
+
+ /// FramePtr - X86 physical register used as frame ptr.
+ ///
+ unsigned FramePtr;
+
+public:
+ X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii);
+
+ /// getX86RegNum - Returns the native X86 register number for the given LLVM
+ /// register identifier.
+ static unsigned getX86RegNum(unsigned RegNo);
+
+ unsigned getStackAlignment() const { return StackAlign; }
+
+ /// getDwarfRegNum - allows modification of X86GenRegisterInfo::getDwarfRegNum
+ /// (created by TableGen) for target dependencies.
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+
+ /// Code Generation virtual methods...
+ ///
+
+ /// getPointerRegClass - Returns a TargetRegisterClass used for pointer
+ /// values.
+ const TargetRegisterClass *getPointerRegClass() const;
+
+ /// getCrossCopyRegClass - Returns a legal register class to copy a register
+ /// in the specified class to or from. Returns NULL if it is possible to copy
+ /// between a two registers of the specified class.
+ const TargetRegisterClass *
+ getCrossCopyRegClass(const TargetRegisterClass *RC) const;
+
+ /// getCalleeSavedRegs - Return a null-terminated list of all of the
+ /// callee-save registers on this target.
+ const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+ /// getCalleeSavedRegClasses - Return a null-terminated list of the preferred
+ /// register classes to spill each callee-saved register with. The order and
+ /// length of this list match the getCalleeSavedRegs() list.
+ const TargetRegisterClass* const*
+ getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
+
+ /// getReservedRegs - Returns a bitset indexed by physical register number
+ /// indicating if a register is a special register that has particular uses and
+ /// should be considered unavailable at all times, e.g. SP, RA. This is used by
+ /// register scavenger to determine what registers are free.
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ bool needsStackRealignment(const MachineFunction &MF) const;
+
+ bool hasReservedCallFrame(MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ void emitFrameMoves(MachineFunction &MF,
+ unsigned FrameLabelId, unsigned ReadyLabelId) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+ int getFrameIndexOffset(MachineFunction &MF, int FI) const;
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+};
+
+// getX86SubSuperRegister - X86 utility function. It returns the sub or super
+// register of a specific X86 register.
+// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) return X86:AX
+unsigned getX86SubSuperRegister(unsigned, MVT, bool High=false);
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
new file mode 100644
index 0000000..d552cb3
--- /dev/null
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -0,0 +1,762 @@
+//===- X86RegisterInfo.td - Describe the X86 Register File --*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 Register file, defining the registers themselves,
+// aliases between the registers, and the register classes built out of the
+// registers.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Register definitions...
+//
+let Namespace = "X86" in {
+
+ // In the register alias definitions below, we define which registers alias
+ // which others. We only specify which registers the small registers alias,
+ // because the register file generator is smart enough to figure out that
+ // AL aliases AX if we tell it that AX aliased AL (for example).
+
+ // Dwarf numbering is different for 32-bit and 64-bit, and there are
+ // variations by target as well. Currently the first entry is for X86-64,
+ // second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux
+ // and debug information on X86-32/Darwin)
+
+ // 8-bit registers
+ // Low registers
+ def AL : Register<"al">, DwarfRegNum<[0, 0, 0]>;
+ def DL : Register<"dl">, DwarfRegNum<[1, 2, 2]>;
+ def CL : Register<"cl">, DwarfRegNum<[2, 1, 1]>;
+ def BL : Register<"bl">, DwarfRegNum<[3, 3, 3]>;
+
+ // X86-64 only
+ def SIL : Register<"sil">, DwarfRegNum<[4, 6, 6]>;
+ def DIL : Register<"dil">, DwarfRegNum<[5, 7, 7]>;
+ def BPL : Register<"bpl">, DwarfRegNum<[6, 4, 5]>;
+ def SPL : Register<"spl">, DwarfRegNum<[7, 5, 4]>;
+ def R8B : Register<"r8b">, DwarfRegNum<[8, -2, -2]>;
+ def R9B : Register<"r9b">, DwarfRegNum<[9, -2, -2]>;
+ def R10B : Register<"r10b">, DwarfRegNum<[10, -2, -2]>;
+ def R11B : Register<"r11b">, DwarfRegNum<[11, -2, -2]>;
+ def R12B : Register<"r12b">, DwarfRegNum<[12, -2, -2]>;
+ def R13B : Register<"r13b">, DwarfRegNum<[13, -2, -2]>;
+ def R14B : Register<"r14b">, DwarfRegNum<[14, -2, -2]>;
+ def R15B : Register<"r15b">, DwarfRegNum<[15, -2, -2]>;
+
+ // High registers. On x86-64, these cannot be used in any instruction
+ // with a REX prefix.
+ def AH : Register<"ah">, DwarfRegNum<[0, 0, 0]>;
+ def DH : Register<"dh">, DwarfRegNum<[1, 2, 2]>;
+ def CH : Register<"ch">, DwarfRegNum<[2, 1, 1]>;
+ def BH : Register<"bh">, DwarfRegNum<[3, 3, 3]>;
+
+ // 16-bit registers
+ def AX : RegisterWithSubRegs<"ax", [AL,AH]>, DwarfRegNum<[0, 0, 0]>;
+ def DX : RegisterWithSubRegs<"dx", [DL,DH]>, DwarfRegNum<[1, 2, 2]>;
+ def CX : RegisterWithSubRegs<"cx", [CL,CH]>, DwarfRegNum<[2, 1, 1]>;
+ def BX : RegisterWithSubRegs<"bx", [BL,BH]>, DwarfRegNum<[3, 3, 3]>;
+ def SI : RegisterWithSubRegs<"si", [SIL]>, DwarfRegNum<[4, 6, 6]>;
+ def DI : RegisterWithSubRegs<"di", [DIL]>, DwarfRegNum<[5, 7, 7]>;
+ def BP : RegisterWithSubRegs<"bp", [BPL]>, DwarfRegNum<[6, 4, 5]>;
+ def SP : RegisterWithSubRegs<"sp", [SPL]>, DwarfRegNum<[7, 5, 4]>;
+ def IP : Register<"ip">, DwarfRegNum<[16]>;
+
+ // X86-64 only
+ def R8W : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>;
+ def R9W : RegisterWithSubRegs<"r9w", [R9B]>, DwarfRegNum<[9, -2, -2]>;
+ def R10W : RegisterWithSubRegs<"r10w", [R10B]>, DwarfRegNum<[10, -2, -2]>;
+ def R11W : RegisterWithSubRegs<"r11w", [R11B]>, DwarfRegNum<[11, -2, -2]>;
+ def R12W : RegisterWithSubRegs<"r12w", [R12B]>, DwarfRegNum<[12, -2, -2]>;
+ def R13W : RegisterWithSubRegs<"r13w", [R13B]>, DwarfRegNum<[13, -2, -2]>;
+ def R14W : RegisterWithSubRegs<"r14w", [R14B]>, DwarfRegNum<[14, -2, -2]>;
+ def R15W : RegisterWithSubRegs<"r15w", [R15B]>, DwarfRegNum<[15, -2, -2]>;
+
+ // 32-bit registers
+ def EAX : RegisterWithSubRegs<"eax", [AX]>, DwarfRegNum<[0, 0, 0]>;
+ def EDX : RegisterWithSubRegs<"edx", [DX]>, DwarfRegNum<[1, 2, 2]>;
+ def ECX : RegisterWithSubRegs<"ecx", [CX]>, DwarfRegNum<[2, 1, 1]>;
+ def EBX : RegisterWithSubRegs<"ebx", [BX]>, DwarfRegNum<[3, 3, 3]>;
+ def ESI : RegisterWithSubRegs<"esi", [SI]>, DwarfRegNum<[4, 6, 6]>;
+ def EDI : RegisterWithSubRegs<"edi", [DI]>, DwarfRegNum<[5, 7, 7]>;
+ def EBP : RegisterWithSubRegs<"ebp", [BP]>, DwarfRegNum<[6, 4, 5]>;
+ def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[7, 5, 4]>;
+ def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[16, 8, 8]>;
+
+ // X86-64 only
+ def R8D : RegisterWithSubRegs<"r8d", [R8W]>, DwarfRegNum<[8, -2, -2]>;
+ def R9D : RegisterWithSubRegs<"r9d", [R9W]>, DwarfRegNum<[9, -2, -2]>;
+ def R10D : RegisterWithSubRegs<"r10d", [R10W]>, DwarfRegNum<[10, -2, -2]>;
+ def R11D : RegisterWithSubRegs<"r11d", [R11W]>, DwarfRegNum<[11, -2, -2]>;
+ def R12D : RegisterWithSubRegs<"r12d", [R12W]>, DwarfRegNum<[12, -2, -2]>;
+ def R13D : RegisterWithSubRegs<"r13d", [R13W]>, DwarfRegNum<[13, -2, -2]>;
+ def R14D : RegisterWithSubRegs<"r14d", [R14W]>, DwarfRegNum<[14, -2, -2]>;
+ def R15D : RegisterWithSubRegs<"r15d", [R15W]>, DwarfRegNum<[15, -2, -2]>;
+
+ // 64-bit registers, X86-64 only
+ def RAX : RegisterWithSubRegs<"rax", [EAX]>, DwarfRegNum<[0, -2, -2]>;
+ def RDX : RegisterWithSubRegs<"rdx", [EDX]>, DwarfRegNum<[1, -2, -2]>;
+ def RCX : RegisterWithSubRegs<"rcx", [ECX]>, DwarfRegNum<[2, -2, -2]>;
+ def RBX : RegisterWithSubRegs<"rbx", [EBX]>, DwarfRegNum<[3, -2, -2]>;
+ def RSI : RegisterWithSubRegs<"rsi", [ESI]>, DwarfRegNum<[4, -2, -2]>;
+ def RDI : RegisterWithSubRegs<"rdi", [EDI]>, DwarfRegNum<[5, -2, -2]>;
+ def RBP : RegisterWithSubRegs<"rbp", [EBP]>, DwarfRegNum<[6, -2, -2]>;
+ def RSP : RegisterWithSubRegs<"rsp", [ESP]>, DwarfRegNum<[7, -2, -2]>;
+
+ def R8 : RegisterWithSubRegs<"r8", [R8D]>, DwarfRegNum<[8, -2, -2]>;
+ def R9 : RegisterWithSubRegs<"r9", [R9D]>, DwarfRegNum<[9, -2, -2]>;
+ def R10 : RegisterWithSubRegs<"r10", [R10D]>, DwarfRegNum<[10, -2, -2]>;
+ def R11 : RegisterWithSubRegs<"r11", [R11D]>, DwarfRegNum<[11, -2, -2]>;
+ def R12 : RegisterWithSubRegs<"r12", [R12D]>, DwarfRegNum<[12, -2, -2]>;
+ def R13 : RegisterWithSubRegs<"r13", [R13D]>, DwarfRegNum<[13, -2, -2]>;
+ def R14 : RegisterWithSubRegs<"r14", [R14D]>, DwarfRegNum<[14, -2, -2]>;
+ def R15 : RegisterWithSubRegs<"r15", [R15D]>, DwarfRegNum<[15, -2, -2]>;
+ def RIP : RegisterWithSubRegs<"rip", [EIP]>, DwarfRegNum<[16, -2, -2]>;
+
+ // MMX Registers. These are actually aliased to ST0 .. ST7
+ def MM0 : Register<"mm0">, DwarfRegNum<[41, 29, 29]>;
+ def MM1 : Register<"mm1">, DwarfRegNum<[42, 30, 30]>;
+ def MM2 : Register<"mm2">, DwarfRegNum<[43, 31, 31]>;
+ def MM3 : Register<"mm3">, DwarfRegNum<[44, 32, 32]>;
+ def MM4 : Register<"mm4">, DwarfRegNum<[45, 33, 33]>;
+ def MM5 : Register<"mm5">, DwarfRegNum<[46, 34, 34]>;
+ def MM6 : Register<"mm6">, DwarfRegNum<[47, 35, 35]>;
+ def MM7 : Register<"mm7">, DwarfRegNum<[48, 36, 36]>;
+
+ // Pseudo Floating Point registers
+ def FP0 : Register<"fp0">;
+ def FP1 : Register<"fp1">;
+ def FP2 : Register<"fp2">;
+ def FP3 : Register<"fp3">;
+ def FP4 : Register<"fp4">;
+ def FP5 : Register<"fp5">;
+ def FP6 : Register<"fp6">;
+
+ // XMM Registers, used by the various SSE instruction set extensions
+ def XMM0: Register<"xmm0">, DwarfRegNum<[17, 21, 21]>;
+ def XMM1: Register<"xmm1">, DwarfRegNum<[18, 22, 22]>;
+ def XMM2: Register<"xmm2">, DwarfRegNum<[19, 23, 23]>;
+ def XMM3: Register<"xmm3">, DwarfRegNum<[20, 24, 24]>;
+ def XMM4: Register<"xmm4">, DwarfRegNum<[21, 25, 25]>;
+ def XMM5: Register<"xmm5">, DwarfRegNum<[22, 26, 26]>;
+ def XMM6: Register<"xmm6">, DwarfRegNum<[23, 27, 27]>;
+ def XMM7: Register<"xmm7">, DwarfRegNum<[24, 28, 28]>;
+
+ // X86-64 only
+ def XMM8: Register<"xmm8">, DwarfRegNum<[25, -2, -2]>;
+ def XMM9: Register<"xmm9">, DwarfRegNum<[26, -2, -2]>;
+ def XMM10: Register<"xmm10">, DwarfRegNum<[27, -2, -2]>;
+ def XMM11: Register<"xmm11">, DwarfRegNum<[28, -2, -2]>;
+ def XMM12: Register<"xmm12">, DwarfRegNum<[29, -2, -2]>;
+ def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>;
+ def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>;
+ def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
+
+ // Floating point stack registers
+ def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>;
+ def ST1 : Register<"st(1)">, DwarfRegNum<[34, 13, 12]>;
+ def ST2 : Register<"st(2)">, DwarfRegNum<[35, 14, 13]>;
+ def ST3 : Register<"st(3)">, DwarfRegNum<[36, 15, 14]>;
+ def ST4 : Register<"st(4)">, DwarfRegNum<[37, 16, 15]>;
+ def ST5 : Register<"st(5)">, DwarfRegNum<[38, 17, 16]>;
+ def ST6 : Register<"st(6)">, DwarfRegNum<[39, 18, 17]>;
+ def ST7 : Register<"st(7)">, DwarfRegNum<[40, 19, 18]>;
+
+ // Status flags register
+ def EFLAGS : Register<"flags">;
+
+ // Segment registers
+ def CS : Register<"cs">;
+ def DS : Register<"ds">;
+ def SS : Register<"ss">;
+ def ES : Register<"es">;
+ def FS : Register<"fs">;
+ def GS : Register<"gs">;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Subregister Set Definitions... now that we have all of the pieces, define the
+// sub registers for each register.
+//
+
+def x86_subreg_8bit : PatLeaf<(i32 1)>;
+def x86_subreg_8bit_hi : PatLeaf<(i32 2)>;
+def x86_subreg_16bit : PatLeaf<(i32 3)>;
+def x86_subreg_32bit : PatLeaf<(i32 4)>;
+
+def : SubRegSet<1, [AX, CX, DX, BX, SP, BP, SI, DI,
+ R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W],
+ [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
+ R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
+
+def : SubRegSet<2, [AX, CX, DX, BX],
+ [AH, CH, DH, BH]>;
+
+def : SubRegSet<1, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
+ R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
+ [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
+ R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
+
+def : SubRegSet<2, [EAX, ECX, EDX, EBX],
+ [AH, CH, DH, BH]>;
+
+def : SubRegSet<3, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
+ R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
+ [AX, CX, DX, BX, SP, BP, SI, DI,
+ R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
+
+def : SubRegSet<1, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
+ R8, R9, R10, R11, R12, R13, R14, R15],
+ [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
+ R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
+
+def : SubRegSet<2, [RAX, RCX, RDX, RBX],
+ [AH, CH, DH, BH]>;
+
+def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
+ R8, R9, R10, R11, R12, R13, R14, R15],
+ [AX, CX, DX, BX, SP, BP, SI, DI,
+ R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
+
+def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
+ R8, R9, R10, R11, R12, R13, R14, R15],
+ [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
+ R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>;
+
+//===----------------------------------------------------------------------===//
+// Register Class Definitions... now that we have all of the pieces, define the
+// top-level register classes. The order specified in the register list is
+// implicitly defined to be the register allocation order.
+//
+
+// List call-clobbered registers before callee-save registers. RBX, RBP, (and
+// R12, R13, R14, and R15 for X86-64) are callee-save registers.
+// In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and
+// R8B, ... R15B.
+// Allocate R12 and R13 last, as these require an extra byte when
+// encoded in x86_64 instructions.
+// FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in
+// 64-bit mode. The main complication is that they cannot be encoded in an
+// instruction requiring a REX prefix, while SIL, DIL, BPL, R8D, etc.
+// require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d"
+// cannot be encoded.
+def GR8 : RegisterClass<"X86", [i8], 8,
+ [AL, CL, DL, BL, AH, CH, DH, BH, SIL, DIL, BPL, SPL,
+ R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // Does the function dedicate RBP / EBP to being a frame ptr?
+ // If so, don't allocate SPL or BPL.
+ static const unsigned X86_GR8_AO_64_fp[] = {
+ X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL,
+ X86::R8B, X86::R9B, X86::R10B, X86::R11B,
+ X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B
+ };
+ // If not, just don't allocate SPL.
+ static const unsigned X86_GR8_AO_64[] = {
+ X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL,
+ X86::R8B, X86::R9B, X86::R10B, X86::R11B,
+ X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B, X86::BPL
+ };
+ // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
+ static const unsigned X86_GR8_AO_32[] = {
+ X86::AL, X86::CL, X86::DL, X86::AH, X86::CH, X86::DH, X86::BL, X86::BH
+ };
+
+ GR8Class::iterator
+ GR8Class::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return X86_GR8_AO_32;
+ else if (RI->hasFP(MF))
+ return X86_GR8_AO_64_fp;
+ else
+ return X86_GR8_AO_64;
+ }
+
+ GR8Class::iterator
+ GR8Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return X86_GR8_AO_32 + (sizeof(X86_GR8_AO_32) / sizeof(unsigned));
+ else if (RI->hasFP(MF))
+ return X86_GR8_AO_64_fp + (sizeof(X86_GR8_AO_64_fp) / sizeof(unsigned));
+ else
+ return X86_GR8_AO_64 + (sizeof(X86_GR8_AO_64) / sizeof(unsigned));
+ }
+ }];
+}
+
+
+def GR16 : RegisterClass<"X86", [i16], 16,
+ [AX, CX, DX, SI, DI, BX, BP, SP,
+ R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> {
+ let SubRegClassList = [GR8, GR8];
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // Does the function dedicate RBP / EBP to being a frame ptr?
+ // If so, don't allocate SP or BP.
+ static const unsigned X86_GR16_AO_64_fp[] = {
+ X86::AX, X86::CX, X86::DX, X86::SI, X86::DI,
+ X86::R8W, X86::R9W, X86::R10W, X86::R11W,
+ X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W
+ };
+ static const unsigned X86_GR16_AO_32_fp[] = {
+ X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX
+ };
+ // If not, just don't allocate SP.
+ static const unsigned X86_GR16_AO_64[] = {
+ X86::AX, X86::CX, X86::DX, X86::SI, X86::DI,
+ X86::R8W, X86::R9W, X86::R10W, X86::R11W,
+ X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W, X86::BP
+ };
+ static const unsigned X86_GR16_AO_32[] = {
+ X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX, X86::BP
+ };
+
+ GR16Class::iterator
+ GR16Class::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (Subtarget.is64Bit()) {
+ if (RI->hasFP(MF))
+ return X86_GR16_AO_64_fp;
+ else
+ return X86_GR16_AO_64;
+ } else {
+ if (RI->hasFP(MF))
+ return X86_GR16_AO_32_fp;
+ else
+ return X86_GR16_AO_32;
+ }
+ }
+
+ GR16Class::iterator
+ GR16Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (Subtarget.is64Bit()) {
+ if (RI->hasFP(MF))
+ return X86_GR16_AO_64_fp+(sizeof(X86_GR16_AO_64_fp)/sizeof(unsigned));
+ else
+ return X86_GR16_AO_64 + (sizeof(X86_GR16_AO_64) / sizeof(unsigned));
+ } else {
+ if (RI->hasFP(MF))
+ return X86_GR16_AO_32_fp+(sizeof(X86_GR16_AO_32_fp)/sizeof(unsigned));
+ else
+ return X86_GR16_AO_32 + (sizeof(X86_GR16_AO_32) / sizeof(unsigned));
+ }
+ }
+ }];
+}
+
+
+def GR32 : RegisterClass<"X86", [i32], 32,
+ [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
+ R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
+ let SubRegClassList = [GR8, GR8, GR16];
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // Does the function dedicate RBP / EBP to being a frame ptr?
+ // If so, don't allocate ESP or EBP.
+ static const unsigned X86_GR32_AO_64_fp[] = {
+ X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI,
+ X86::R8D, X86::R9D, X86::R10D, X86::R11D,
+ X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D
+ };
+ static const unsigned X86_GR32_AO_32_fp[] = {
+ X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX
+ };
+ // If not, just don't allocate ESP.
+ static const unsigned X86_GR32_AO_64[] = {
+ X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI,
+ X86::R8D, X86::R9D, X86::R10D, X86::R11D,
+ X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP
+ };
+ static const unsigned X86_GR32_AO_32[] = {
+ X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP
+ };
+
+ GR32Class::iterator
+ GR32Class::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (Subtarget.is64Bit()) {
+ if (RI->hasFP(MF))
+ return X86_GR32_AO_64_fp;
+ else
+ return X86_GR32_AO_64;
+ } else {
+ if (RI->hasFP(MF))
+ return X86_GR32_AO_32_fp;
+ else
+ return X86_GR32_AO_32;
+ }
+ }
+
+ GR32Class::iterator
+ GR32Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (Subtarget.is64Bit()) {
+ if (RI->hasFP(MF))
+ return X86_GR32_AO_64_fp+(sizeof(X86_GR32_AO_64_fp)/sizeof(unsigned));
+ else
+ return X86_GR32_AO_64 + (sizeof(X86_GR32_AO_64) / sizeof(unsigned));
+ } else {
+ if (RI->hasFP(MF))
+ return X86_GR32_AO_32_fp+(sizeof(X86_GR32_AO_32_fp)/sizeof(unsigned));
+ else
+ return X86_GR32_AO_32 + (sizeof(X86_GR32_AO_32) / sizeof(unsigned));
+ }
+ }
+ }];
+}
+
+
+def GR64 : RegisterClass<"X86", [i64], 64,
+ [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ RBX, R14, R15, R12, R13, RBP, RSP]> {
+ let SubRegClassList = [GR8, GR8, GR16, GR32];
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GR64Class::iterator
+ GR64Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return begin(); // None of these are allocatable in 32-bit.
+ if (RI->hasFP(MF)) // Does the function dedicate RBP to being a frame ptr?
+ return end()-2; // If so, don't allocate RSP or RBP
+ else
+ return end()-1; // If not, just don't allocate RSP
+ }
+ }];
+}
+
+
+// GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
+// GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d"
+// registers. On x86-32, GR16_ABCD and GR32_ABCD are classes for registers
+// that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD,
+// and GR64_ABCD are classes for registers that support 8-bit h-register
+// operations.
+def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]> {
+}
+def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]> {
+}
+def GR16_ABCD : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> {
+ let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H];
+}
+def GR32_ABCD : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> {
+ let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD];
+}
+def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> {
+ let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD];
+}
+
+// GR8_NOREX, GR16_NOREX, GR32_NOREX, GR64_NOREX - Subclasses of
+// GR8, GR16, GR32, and GR64 which contain only the first 8 GPRs.
+// On x86-64, GR64_NOREX, GR32_NOREX and GR16_NOREX are the classes
+// of registers which do not by themselves require a REX prefix.
+def GR8_NOREX : RegisterClass<"X86", [i8], 8,
+ [AL, CL, DL, BL, AH, CH, DH, BH,
+ SIL, DIL, BPL, SPL]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // Does the function dedicate RBP / EBP to being a frame ptr?
+ // If so, don't allocate SPL or BPL.
+ static const unsigned X86_GR8_NOREX_AO_64_fp[] = {
+ X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, X86::BL
+ };
+ // If not, just don't allocate SPL.
+ static const unsigned X86_GR8_NOREX_AO_64[] = {
+ X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, X86::BL, X86::BPL
+ };
+ // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
+ static const unsigned X86_GR8_NOREX_AO_32[] = {
+ X86::AL, X86::CL, X86::DL, X86::AH, X86::CH, X86::DH, X86::BL, X86::BH
+ };
+
+ GR8_NOREXClass::iterator
+ GR8_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return X86_GR8_NOREX_AO_32;
+ else if (RI->hasFP(MF))
+ return X86_GR8_NOREX_AO_64_fp;
+ else
+ return X86_GR8_NOREX_AO_64;
+ }
+
+ GR8_NOREXClass::iterator
+ GR8_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return X86_GR8_NOREX_AO_32 +
+ (sizeof(X86_GR8_NOREX_AO_32) / sizeof(unsigned));
+ else if (RI->hasFP(MF))
+ return X86_GR8_NOREX_AO_64_fp +
+ (sizeof(X86_GR8_NOREX_AO_64_fp) / sizeof(unsigned));
+ else
+ return X86_GR8_NOREX_AO_64 +
+ (sizeof(X86_GR8_NOREX_AO_64) / sizeof(unsigned));
+ }
+ }];
+}
+def GR16_NOREX : RegisterClass<"X86", [i16], 16,
+ [AX, CX, DX, SI, DI, BX, BP, SP]> {
+ let SubRegClassList = [GR8_NOREX, GR8_NOREX];
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // Does the function dedicate RBP / EBP to being a frame ptr?
+ // If so, don't allocate SP or BP.
+ static const unsigned X86_GR16_AO_fp[] = {
+ X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX
+ };
+ // If not, just don't allocate SP.
+ static const unsigned X86_GR16_AO[] = {
+ X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX, X86::BP
+ };
+
+ GR16_NOREXClass::iterator
+ GR16_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->hasFP(MF))
+ return X86_GR16_AO_fp;
+ else
+ return X86_GR16_AO;
+ }
+
+ GR16_NOREXClass::iterator
+ GR16_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->hasFP(MF))
+ return X86_GR16_AO_fp+(sizeof(X86_GR16_AO_fp)/sizeof(unsigned));
+ else
+ return X86_GR16_AO + (sizeof(X86_GR16_AO) / sizeof(unsigned));
+ }
+ }];
+}
+// GR32_NOREX - GR32 registers which do not require a REX prefix.
+def GR32_NOREX : RegisterClass<"X86", [i32], 32,
+ [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> {
+ let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX];
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // Does the function dedicate RBP / EBP to being a frame ptr?
+ // If so, don't allocate ESP or EBP.
+ static const unsigned X86_GR32_NOREX_AO_fp[] = {
+ X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX
+ };
+ // If not, just don't allocate ESP.
+ static const unsigned X86_GR32_NOREX_AO[] = {
+ X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP
+ };
+
+ GR32_NOREXClass::iterator
+ GR32_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->hasFP(MF))
+ return X86_GR32_NOREX_AO_fp;
+ else
+ return X86_GR32_NOREX_AO;
+ }
+
+ GR32_NOREXClass::iterator
+ GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->hasFP(MF))
+ return X86_GR32_NOREX_AO_fp +
+ (sizeof(X86_GR32_NOREX_AO_fp) / sizeof(unsigned));
+ else
+ return X86_GR32_NOREX_AO +
+ (sizeof(X86_GR32_NOREX_AO) / sizeof(unsigned));
+ }
+ }];
+}
+
+// GR64_NOREX - GR64 registers which do not require a REX prefix.
+def GR64_NOREX : RegisterClass<"X86", [i64], 64,
+ [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP]> {
+ let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX];
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // Does the function dedicate RBP / EBP to being a frame ptr?
+ // If so, don't allocate RSP or RBP.
+ static const unsigned X86_GR64_NOREX_AO_fp[] = {
+ X86::RAX, X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RBX
+ };
+ // If not, just don't allocate RSP.
+ static const unsigned X86_GR64_NOREX_AO[] = {
+ X86::RAX, X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RBX, X86::RBP
+ };
+
+ GR64_NOREXClass::iterator
+ GR64_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->hasFP(MF))
+ return X86_GR64_NOREX_AO_fp;
+ else
+ return X86_GR64_NOREX_AO;
+ }
+
+ GR64_NOREXClass::iterator
+ GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->hasFP(MF))
+ return X86_GR64_NOREX_AO_fp +
+ (sizeof(X86_GR64_NOREX_AO_fp) / sizeof(unsigned));
+ else
+ return X86_GR64_NOREX_AO +
+ (sizeof(X86_GR64_NOREX_AO) / sizeof(unsigned));
+ }
+ }];
+}
+
+// A class to support the 'A' assembler constraint: EAX then EDX.
+def GRAD : RegisterClass<"X86", [i32], 32, [EAX, EDX]>;
+
+// Scalar SSE2 floating point registers.
+def FR32 : RegisterClass<"X86", [f32], 32,
+ [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11,
+ XMM12, XMM13, XMM14, XMM15]> {
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ FR32Class::iterator
+ FR32Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
+ else
+ return end();
+ }
+ }];
+}
+
+def FR64 : RegisterClass<"X86", [f64], 64,
+ [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11,
+ XMM12, XMM13, XMM14, XMM15]> {
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ FR64Class::iterator
+ FR64Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
+ else
+ return end();
+ }
+ }];
+}
+
+
+// FIXME: This sets up the floating point register files as though they are f64
+// values, though they really are f80 values. This will cause us to spill
+// values as 64-bit quantities instead of 80-bit quantities, which is much much
+// faster on common hardware. In reality, this should be controlled by a
+// command line option or something.
+
+def RFP32 : RegisterClass<"X86",[f32], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
+def RFP64 : RegisterClass<"X86",[f64], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
+def RFP80 : RegisterClass<"X86",[f80], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
+
+// Floating point stack registers (these are not allocatable by the
+// register allocator - the floating point stackifier is responsible
+// for transforming FPn allocations to STn registers)
+def RST : RegisterClass<"X86", [f80, f64, f32], 32,
+ [ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7]> {
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ RSTClass::iterator
+ RSTClass::allocation_order_end(const MachineFunction &MF) const {
+ return begin();
+ }
+ }];
+}
+
+// Generic vector registers: VR64 and VR128.
+def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+ [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>;
+def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
+ [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11,
+ XMM12, XMM13, XMM14, XMM15]> {
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ VR128Class::iterator
+ VR128Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
+ else
+ return end();
+ }
+ }];
+}
+
+// Status flags registers.
+def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> {
+ let CopyCost = -1; // Don't allow copying of status registers.
+}
diff --git a/lib/Target/X86/X86Relocations.h b/lib/Target/X86/X86Relocations.h
new file mode 100644
index 0000000..b225f48
--- /dev/null
+++ b/lib/Target/X86/X86Relocations.h
@@ -0,0 +1,42 @@
+//===- X86Relocations.h - X86 Code Relocations ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86 target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86RELOCATIONS_H
+#define X86RELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace X86 {
+ /// RelocationType - An enum for the x86 relocation codes. Note that
+ /// the terminology here doesn't follow x86 convention - word means
+ /// 32-bit and dword means 64-bit.
+ enum RelocationType {
+ // reloc_pcrel_word - PC relative relocation, add the relocated value to
+ // the value already in memory, after we adjust it for where the PC is.
+ reloc_pcrel_word = 0,
+
+ // reloc_picrel_word - PIC base relative relocation, add the relocated
+ // value to the value already in memory, after we adjust it for where the
+ // PIC base is.
+ reloc_picrel_word = 1,
+
+ // reloc_absolute_word, reloc_absolute_dword - Absolute relocation, just
+ // add the relocated value to the value already in memory.
+ reloc_absolute_word = 2,
+ reloc_absolute_dword = 3
+ };
+ }
+}
+
+#endif
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
new file mode 100644
index 0000000..03ce1ae
--- /dev/null
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -0,0 +1,446 @@
+//===-- X86Subtarget.cpp - X86 Subtarget Information ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the X86 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "subtarget"
+#include "X86Subtarget.h"
+#include "X86GenSubtarget.inc"
+#include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+#if defined(_MSC_VER)
+ #include <intrin.h>
+#endif
+
+static cl::opt<X86Subtarget::AsmWriterFlavorTy>
+AsmWriterFlavor("x86-asm-syntax", cl::init(X86Subtarget::Unset),
+ cl::desc("Choose style of code to emit from X86 backend:"),
+ cl::values(
+ clEnumValN(X86Subtarget::ATT, "att", "Emit AT&T-style assembly"),
+ clEnumValN(X86Subtarget::Intel, "intel", "Emit Intel-style assembly"),
+ clEnumValEnd));
+
+
+/// True if accessing the GV requires an extra load. For Windows, dllimported
+/// symbols are indirect, loading the value at address GV rather then the
+/// value of GV itself. This means that the GlobalAddress must be in the base
+/// or index register of the address, not the GV offset field.
+bool X86Subtarget::GVRequiresExtraLoad(const GlobalValue* GV,
+ const TargetMachine& TM,
+ bool isDirectCall) const
+{
+ // FIXME: PIC
+ if (TM.getRelocationModel() != Reloc::Static &&
+ TM.getCodeModel() != CodeModel::Large) {
+ if (isTargetDarwin()) {
+ if (isDirectCall)
+ return false;
+ bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode();
+ if (GV->hasHiddenVisibility() &&
+ (Is64Bit || (!isDecl && !GV->hasCommonLinkage())))
+ // If symbol visibility is hidden, the extra load is not needed if
+ // target is x86-64 or the symbol is definitely defined in the current
+ // translation unit.
+ return false;
+ return !isDirectCall && (isDecl || GV->isWeakForLinker());
+ } else if (isTargetELF()) {
+ // Extra load is needed for all externally visible.
+ if (isDirectCall)
+ return false;
+ if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+ return false;
+ return true;
+ } else if (isTargetCygMing() || isTargetWindows()) {
+ return (GV->hasDLLImportLinkage());
+ }
+ }
+ return false;
+}
+
+/// True if accessing the GV requires a register. This is a superset of the
+/// cases where GVRequiresExtraLoad is true. Some variations of PIC require
+/// a register, but not an extra load.
+bool X86Subtarget::GVRequiresRegister(const GlobalValue *GV,
+ const TargetMachine& TM,
+ bool isDirectCall) const
+{
+ if (GVRequiresExtraLoad(GV, TM, isDirectCall))
+ return true;
+ // Code below here need only consider cases where GVRequiresExtraLoad
+ // returns false.
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return !isDirectCall &&
+ (GV->hasLocalLinkage() || GV->hasExternalLinkage());
+ return false;
+}
+
+/// getBZeroEntry - This function returns the name of a function which has an
+/// interface like the non-standard bzero function, if such a function exists on
+/// the current subtarget and it is considered prefereable over memset with zero
+/// passed as the second argument. Otherwise it returns null.
+const char *X86Subtarget::getBZeroEntry() const {
+ // Darwin 10 has a __bzero entry point for this purpose.
+ if (getDarwinVers() >= 10)
+ return "__bzero";
+
+ return 0;
+}
+
+/// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
+/// to immediate address.
+bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
+ if (Is64Bit)
+ return false;
+ return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
+}
+
+/// getSpecialAddressLatency - For targets where it is beneficial to
+/// backschedule instructions that compute addresses, return a value
+/// indicating the number of scheduling cycles of backscheduling that
+/// should be attempted.
+unsigned X86Subtarget::getSpecialAddressLatency() const {
+ // For x86 out-of-order targets, back-schedule address computations so
+ // that loads and stores aren't blocked.
+ // This value was chosen arbitrarily.
+ return 200;
+}
+
+/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
+/// specified arguments. If we can't run cpuid on the host, return true.
+bool X86::GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
+ unsigned *rECX, unsigned *rEDX) {
+#if defined(__x86_64__) || defined(_M_AMD64)
+ #if defined(__GNUC__)
+ // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+ asm ("movq\t%%rbx, %%rsi\n\t"
+ "cpuid\n\t"
+ "xchgq\t%%rbx, %%rsi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+ #elif defined(_MSC_VER)
+ int registers[4];
+ __cpuid(registers, value);
+ *rEAX = registers[0];
+ *rEBX = registers[1];
+ *rECX = registers[2];
+ *rEDX = registers[3];
+ return false;
+ #endif
+#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+ #if defined(__GNUC__)
+ asm ("movl\t%%ebx, %%esi\n\t"
+ "cpuid\n\t"
+ "xchgl\t%%ebx, %%esi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+ #elif defined(_MSC_VER)
+ __asm {
+ mov eax,value
+ cpuid
+ mov esi,rEAX
+ mov dword ptr [esi],eax
+ mov esi,rEBX
+ mov dword ptr [esi],ebx
+ mov esi,rECX
+ mov dword ptr [esi],ecx
+ mov esi,rEDX
+ mov dword ptr [esi],edx
+ }
+ return false;
+ #endif
+#endif
+ return true;
+}
+
+static void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) {
+ Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+ Model = (EAX >> 4) & 0xf; // Bits 4 - 7
+ if (Family == 6 || Family == 0xf) {
+ if (Family == 0xf)
+ // Examine extended family ID if family ID is F.
+ Family += (EAX >> 20) & 0xff; // Bits 20 - 27
+ // Examine extended model ID if family ID is 6 or F.
+ Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
+ }
+}
+
+void X86Subtarget::AutoDetectSubtargetFeatures() {
+ unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+ union {
+ unsigned u[3];
+ char c[12];
+ } text;
+
+ if (X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1))
+ return;
+
+ X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
+
+ if ((EDX >> 23) & 0x1) X86SSELevel = MMX;
+ if ((EDX >> 25) & 0x1) X86SSELevel = SSE1;
+ if ((EDX >> 26) & 0x1) X86SSELevel = SSE2;
+ if (ECX & 0x1) X86SSELevel = SSE3;
+ if ((ECX >> 9) & 0x1) X86SSELevel = SSSE3;
+ if ((ECX >> 19) & 0x1) X86SSELevel = SSE41;
+ if ((ECX >> 20) & 0x1) X86SSELevel = SSE42;
+
+ bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
+ bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
+ if (IsIntel || IsAMD) {
+ // Determine if bit test memory instructions are slow.
+ unsigned Family = 0;
+ unsigned Model = 0;
+ DetectFamilyModel(EAX, Family, Model);
+ IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13);
+
+ X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+ HasX86_64 = (EDX >> 29) & 0x1;
+ HasSSE4A = IsAMD && ((ECX >> 6) & 0x1);
+ }
+}
+
+static const char *GetCurrentX86CPU() {
+ unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+ if (X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
+ return "generic";
+ unsigned Family = 0;
+ unsigned Model = 0;
+ DetectFamilyModel(EAX, Family, Model);
+
+ X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+ bool Em64T = (EDX >> 29) & 0x1;
+ bool HasSSE3 = (ECX & 0x1);
+
+ union {
+ unsigned u[3];
+ char c[12];
+ } text;
+
+ X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
+ if (memcmp(text.c, "GenuineIntel", 12) == 0) {
+ switch (Family) {
+ case 3:
+ return "i386";
+ case 4:
+ return "i486";
+ case 5:
+ switch (Model) {
+ case 4: return "pentium-mmx";
+ default: return "pentium";
+ }
+ case 6:
+ switch (Model) {
+ case 1: return "pentiumpro";
+ case 3:
+ case 5:
+ case 6: return "pentium2";
+ case 7:
+ case 8:
+ case 10:
+ case 11: return "pentium3";
+ case 9:
+ case 13: return "pentium-m";
+ case 14: return "yonah";
+ case 15:
+ case 22: // Celeron M 540
+ return "core2";
+ case 23: // 45nm: Penryn , Wolfdale, Yorkfield (XE)
+ return "penryn";
+ default: return "i686";
+ }
+ case 15: {
+ switch (Model) {
+ case 3:
+ case 4:
+ case 6: // same as 4, but 65nm
+ return (Em64T) ? "nocona" : "prescott";
+ case 26:
+ return "corei7";
+ case 28:
+ return "atom";
+ default:
+ return (Em64T) ? "x86-64" : "pentium4";
+ }
+ }
+
+ default:
+ return "generic";
+ }
+ } else if (memcmp(text.c, "AuthenticAMD", 12) == 0) {
+ // FIXME: this poorly matches the generated SubtargetFeatureKV table. There
+ // appears to be no way to generate the wide variety of AMD-specific targets
+ // from the information returned from CPUID.
+ switch (Family) {
+ case 4:
+ return "i486";
+ case 5:
+ switch (Model) {
+ case 6:
+ case 7: return "k6";
+ case 8: return "k6-2";
+ case 9:
+ case 13: return "k6-3";
+ default: return "pentium";
+ }
+ case 6:
+ switch (Model) {
+ case 4: return "athlon-tbird";
+ case 6:
+ case 7:
+ case 8: return "athlon-mp";
+ case 10: return "athlon-xp";
+ default: return "athlon";
+ }
+ case 15:
+ if (HasSSE3) {
+ switch (Model) {
+ default: return "k8-sse3";
+ }
+ } else {
+ switch (Model) {
+ case 1: return "opteron";
+ case 5: return "athlon-fx"; // also opteron
+ default: return "athlon64";
+ }
+ }
+ case 16:
+ switch (Model) {
+ default: return "amdfam10";
+ }
+ default:
+ return "generic";
+ }
+ } else {
+ return "generic";
+ }
+}
+
+X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit)
+ : AsmFlavor(AsmWriterFlavor)
+ , PICStyle(PICStyles::None)
+ , X86SSELevel(NoMMXSSE)
+ , X863DNowLevel(NoThreeDNow)
+ , HasX86_64(false)
+ , IsBTMemSlow(false)
+ , DarwinVers(0)
+ , IsLinux(false)
+ , stackAlignment(8)
+ // FIXME: this is a known good value for Yonah. How about others?
+ , MaxInlineSizeThreshold(128)
+ , Is64Bit(is64Bit)
+ , TargetType(isELF) { // Default to ELF unless otherwise specified.
+
+ // Determine default and user specified characteristics
+ if (!FS.empty()) {
+ // If feature string is not empty, parse features string.
+ std::string CPU = GetCurrentX86CPU();
+ ParseSubtargetFeatures(FS, CPU);
+ // All X86-64 CPUs also have SSE2, however user might request no SSE via
+ // -mattr, so don't force SSELevel here.
+ } else {
+ // Otherwise, use CPUID to auto-detect feature set.
+ AutoDetectSubtargetFeatures();
+ // Make sure SSE2 is enabled; it is available on all X86-64 CPUs.
+ if (Is64Bit && X86SSELevel < SSE2)
+ X86SSELevel = SSE2;
+ }
+
+ // If requesting codegen for X86-64, make sure that 64-bit features
+ // are enabled.
+ if (Is64Bit)
+ HasX86_64 = true;
+
+ DOUT << "Subtarget features: SSELevel " << X86SSELevel
+ << ", 3DNowLevel " << X863DNowLevel
+ << ", 64bit " << HasX86_64 << "\n";
+ assert((!Is64Bit || HasX86_64) &&
+ "64-bit code requested on a subtarget that doesn't support it!");
+
+ // Set the boolean corresponding to the current target triple, or the default
+ // if one cannot be determined, to true.
+ const std::string& TT = M.getTargetTriple();
+ if (TT.length() > 5) {
+ size_t Pos;
+ if ((Pos = TT.find("-darwin")) != std::string::npos) {
+ TargetType = isDarwin;
+
+ // Compute the darwin version number.
+ if (isdigit(TT[Pos+7]))
+ DarwinVers = atoi(&TT[Pos+7]);
+ else
+ DarwinVers = 8; // Minimum supported darwin is Tiger.
+ } else if (TT.find("linux") != std::string::npos) {
+ // Linux doesn't imply ELF, but we don't currently support anything else.
+ TargetType = isELF;
+ IsLinux = true;
+ } else if (TT.find("cygwin") != std::string::npos) {
+ TargetType = isCygwin;
+ } else if (TT.find("mingw") != std::string::npos) {
+ TargetType = isMingw;
+ } else if (TT.find("win32") != std::string::npos) {
+ TargetType = isWindows;
+ } else if (TT.find("windows") != std::string::npos) {
+ TargetType = isWindows;
+ }
+ else if (TT.find("-cl") != std::string::npos) {
+ TargetType = isDarwin;
+ DarwinVers = 9;
+ }
+ } else if (TT.empty()) {
+#if defined(__CYGWIN__)
+ TargetType = isCygwin;
+#elif defined(__MINGW32__) || defined(__MINGW64__)
+ TargetType = isMingw;
+#elif defined(__APPLE__)
+ TargetType = isDarwin;
+#if __APPLE_CC__ > 5400
+ DarwinVers = 9; // GCC 5400+ is Leopard.
+#else
+ DarwinVers = 8; // Minimum supported darwin is Tiger.
+#endif
+
+#elif defined(_WIN32) || defined(_WIN64)
+ TargetType = isWindows;
+#elif defined(__linux__)
+ // Linux doesn't imply ELF, but we don't currently support anything else.
+ TargetType = isELF;
+ IsLinux = true;
+#endif
+ }
+
+ // If the asm syntax hasn't been overridden on the command line, use whatever
+ // the target wants.
+ if (AsmFlavor == X86Subtarget::Unset) {
+ AsmFlavor = (TargetType == isWindows)
+ ? X86Subtarget::Intel : X86Subtarget::ATT;
+ }
+
+ // Stack alignment is 16 bytes on Darwin (both 32 and 64 bit) and for all 64
+ // bit targets.
+ if (TargetType == isDarwin || Is64Bit)
+ stackAlignment = 16;
+
+ if (StackAlignment)
+ stackAlignment = StackAlignment;
+}
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
new file mode 100644
index 0000000..46476f2
--- /dev/null
+++ b/lib/Target/X86/X86Subtarget.h
@@ -0,0 +1,224 @@
+//=====---- X86Subtarget.h - Define Subtarget for the X86 -----*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the X86 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86SUBTARGET_H
+#define X86SUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include <string>
+
+namespace llvm {
+class Module;
+class GlobalValue;
+class TargetMachine;
+
+namespace PICStyles {
+enum Style {
+ Stub, GOT, RIPRel, WinPIC, None
+};
+}
+
+class X86Subtarget : public TargetSubtarget {
+public:
+ enum AsmWriterFlavorTy {
+ // Note: This numbering has to match the GCC assembler dialects for inline
+ // asm alternatives to work right.
+ ATT = 0, Intel = 1, Unset
+ };
+protected:
+ enum X86SSEEnum {
+ NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42
+ };
+
+ enum X863DNowEnum {
+ NoThreeDNow, ThreeDNow, ThreeDNowA
+ };
+
+ /// AsmFlavor - Which x86 asm dialect to use.
+ ///
+ AsmWriterFlavorTy AsmFlavor;
+
+ /// PICStyle - Which PIC style to use
+ ///
+ PICStyles::Style PICStyle;
+
+ /// X86SSELevel - MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or
+ /// none supported.
+ X86SSEEnum X86SSELevel;
+
+ /// X863DNowLevel - 3DNow or 3DNow Athlon, or none supported.
+ ///
+ X863DNowEnum X863DNowLevel;
+
+ /// HasX86_64 - True if the processor supports X86-64 instructions.
+ ///
+ bool HasX86_64;
+
+ /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
+ bool IsBTMemSlow;
+
+ /// HasSSE4A - True if the processor supports SSE4A instructions.
+ bool HasSSE4A;
+
+ /// DarwinVers - Nonzero if this is a darwin platform: the numeric
+ /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
+ unsigned char DarwinVers; // Is any darwin-x86 platform.
+
+ /// isLinux - true if this is a "linux" platform.
+ bool IsLinux;
+
+ /// stackAlignment - The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ unsigned stackAlignment;
+
+ /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
+ ///
+ unsigned MaxInlineSizeThreshold;
+
+private:
+ /// Is64Bit - True if the processor supports 64-bit instructions and module
+ /// pointer size is 64 bit.
+ bool Is64Bit;
+
+public:
+ enum {
+ isELF, isCygwin, isDarwin, isWindows, isMingw
+ } TargetType;
+
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ X86Subtarget(const Module &M, const std::string &FS, bool is64Bit);
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by every
+ /// function for this subtarget.
+ unsigned getStackAlignment() const { return stackAlignment; }
+
+ /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
+ /// that still makes it profitable to inline the call.
+ unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; }
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+
+ /// AutoDetectSubtargetFeatures - Auto-detect CPU features using CPUID
+ /// instruction.
+ void AutoDetectSubtargetFeatures();
+
+ bool is64Bit() const { return Is64Bit; }
+
+ PICStyles::Style getPICStyle() const { return PICStyle; }
+ void setPICStyle(PICStyles::Style Style) { PICStyle = Style; }
+
+ bool hasMMX() const { return X86SSELevel >= MMX; }
+ bool hasSSE1() const { return X86SSELevel >= SSE1; }
+ bool hasSSE2() const { return X86SSELevel >= SSE2; }
+ bool hasSSE3() const { return X86SSELevel >= SSE3; }
+ bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
+ bool hasSSE41() const { return X86SSELevel >= SSE41; }
+ bool hasSSE42() const { return X86SSELevel >= SSE42; }
+ bool hasSSE4A() const { return HasSSE4A; }
+ bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
+ bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
+
+ bool isBTMemSlow() const { return IsBTMemSlow; }
+
+ unsigned getAsmFlavor() const {
+ return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0;
+ }
+
+ bool isFlavorAtt() const { return AsmFlavor == ATT; }
+ bool isFlavorIntel() const { return AsmFlavor == Intel; }
+
+ bool isTargetDarwin() const { return TargetType == isDarwin; }
+ bool isTargetELF() const {
+ return TargetType == isELF;
+ }
+ bool isTargetWindows() const { return TargetType == isWindows; }
+ bool isTargetMingw() const { return TargetType == isMingw; }
+ bool isTargetCygMing() const { return (TargetType == isMingw ||
+ TargetType == isCygwin); }
+ bool isTargetCygwin() const { return TargetType == isCygwin; }
+ bool isTargetWin64() const {
+ return (Is64Bit && (TargetType == isMingw || TargetType == isWindows));
+ }
+
+ std::string getDataLayout() const {
+ const char *p;
+ if (is64Bit())
+ p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128";
+ else {
+ if (isTargetDarwin())
+ p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128";
+ else
+ p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32";
+ }
+ return std::string(p);
+ }
+
+ bool isPICStyleSet() const { return PICStyle != PICStyles::None; }
+ bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; }
+ bool isPICStyleStub() const { return PICStyle == PICStyles::Stub; }
+ bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; }
+ bool isPICStyleWinPIC() const { return PICStyle == PICStyles:: WinPIC; }
+
+ /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard.
+ unsigned getDarwinVers() const { return DarwinVers; }
+
+ /// isLinux - Return true if the target is "Linux".
+ bool isLinux() const { return IsLinux; }
+
+ /// True if accessing the GV requires an extra load. For Windows, dllimported
+ /// symbols are indirect, loading the value at address GV rather then the
+ /// value of GV itself. This means that the GlobalAddress must be in the base
+ /// or index register of the address, not the GV offset field.
+ bool GVRequiresExtraLoad(const GlobalValue* GV, const TargetMachine& TM,
+ bool isDirectCall) const;
+
+ /// True if accessing the GV requires a register. This is a superset of the
+ /// cases where GVRequiresExtraLoad is true. Some variations of PIC require
+ /// a register, but not an extra load.
+ bool GVRequiresRegister(const GlobalValue* GV, const TargetMachine& TM,
+ bool isDirectCall) const;
+
+ /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
+ /// to immediate address.
+ bool IsLegalToCallImmediateAddr(const TargetMachine &TM) const;
+
+ /// This function returns the name of a function which has an interface
+ /// like the non-standard bzero function, if such a function exists on
+ /// the current subtarget and it is considered prefereable over
+ /// memset with zero passed as the second argument. Otherwise it
+ /// returns null.
+ const char *getBZeroEntry() const;
+
+ /// getSpecialAddressLatency - For targets where it is beneficial to
+ /// backschedule instructions that compute addresses, return a value
+ /// indicating the number of scheduling cycles of backscheduling that
+ /// should be attempted.
+ unsigned getSpecialAddressLatency() const;
+};
+
+namespace X86 {
+ /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in
+ /// the specified arguments. If we can't run cpuid on the host, return true.
+ bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
+ unsigned *rECX, unsigned *rEDX);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/X86/X86TargetAsmInfo.cpp b/lib/Target/X86/X86TargetAsmInfo.cpp
new file mode 100644
index 0000000..5dda5f4
--- /dev/null
+++ b/lib/Target/X86/X86TargetAsmInfo.cpp
@@ -0,0 +1,461 @@
+//===-- X86TargetAsmInfo.cpp - X86 asm properties ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the X86TargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86TargetAsmInfo.h"
+#include "X86TargetMachine.h"
+#include "X86Subtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Dwarf.h"
+
+using namespace llvm;
+using namespace llvm::dwarf;
+
+const char *const llvm::x86_asm_table[] = {
+ "{si}", "S",
+ "{di}", "D",
+ "{ax}", "a",
+ "{cx}", "c",
+ "{memory}", "memory",
+ "{flags}", "",
+ "{dirflag}", "",
+ "{fpsr}", "",
+ "{cc}", "cc",
+ 0,0};
+
+X86DarwinTargetAsmInfo::X86DarwinTargetAsmInfo(const X86TargetMachine &TM):
+ X86TargetAsmInfo<DarwinTargetAsmInfo>(TM) {
+ const X86Subtarget* Subtarget = &TM.getSubtarget<X86Subtarget>();
+ bool is64Bit = Subtarget->is64Bit();
+
+ AlignmentIsInBytes = false;
+ TextAlignFillValue = 0x90;
+ GlobalPrefix = "_";
+ if (!is64Bit)
+ Data64bitsDirective = 0; // we can't emit a 64-bit unit
+ ZeroDirective = "\t.space\t"; // ".space N" emits N zeros.
+ PrivateGlobalPrefix = "L"; // Marker for constant pool idxs
+ LessPrivateGlobalPrefix = "l"; // Marker for some ObjC metadata
+ BSSSection = 0; // no BSS section.
+ ZeroFillDirective = "\t.zerofill\t"; // Uses .zerofill
+ if (TM.getRelocationModel() != Reloc::Static)
+ ConstantPoolSection = "\t.const_data";
+ else
+ ConstantPoolSection = "\t.const\n";
+ JumpTableDataSection = "\t.const\n";
+ CStringSection = "\t.cstring";
+ // FIXME: Why don't always use this section?
+ if (is64Bit) {
+ SixteenByteConstantSection = getUnnamedSection("\t.literal16\n",
+ SectionFlags::Mergeable);
+ }
+ LCOMMDirective = "\t.lcomm\t";
+ SwitchToSectionDirective = "\t.section ";
+ StringConstantPrefix = "\1LC";
+ // Leopard and above support aligned common symbols.
+ COMMDirectiveTakesAlignment = (Subtarget->getDarwinVers() >= 9);
+ HasDotTypeDotSizeDirective = false;
+ HasSingleParameterDotFile = false;
+ NonLocalEHFrameLabel = true;
+ if (TM.getRelocationModel() == Reloc::Static) {
+ StaticCtorsSection = ".constructor";
+ StaticDtorsSection = ".destructor";
+ } else {
+ StaticCtorsSection = ".mod_init_func";
+ StaticDtorsSection = ".mod_term_func";
+ }
+ if (is64Bit) {
+ PersonalityPrefix = "";
+ PersonalitySuffix = "+4@GOTPCREL";
+ } else {
+ PersonalityPrefix = "L";
+ PersonalitySuffix = "$non_lazy_ptr";
+ }
+ NeedsIndirectEncoding = true;
+ InlineAsmStart = "## InlineAsm Start";
+ InlineAsmEnd = "## InlineAsm End";
+ CommentString = "##";
+ SetDirective = "\t.set";
+ PCSymbol = ".";
+ UsedDirective = "\t.no_dead_strip\t";
+ WeakDefDirective = "\t.weak_definition ";
+ WeakRefDirective = "\t.weak_reference ";
+ HiddenDirective = "\t.private_extern ";
+ ProtectedDirective = "\t.globl\t";
+
+ // In non-PIC modes, emit a special label before jump tables so that the
+ // linker can perform more accurate dead code stripping.
+ if (TM.getRelocationModel() != Reloc::PIC_) {
+ // Emit a local label that is preserved until the linker runs.
+ JumpTableSpecialLabelPrefix = "l";
+ }
+
+ SupportsDebugInformation = true;
+ NeedsSet = true;
+ DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
+ DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
+ DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
+ DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
+ DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
+ DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
+ DwarfDebugInlineSection = ".section __DWARF,__debug_inlined,regular,debug";
+ DwarfUsesInlineInfoSection = true;
+ DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
+ DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
+ DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
+ DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
+ DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
+
+ // Exceptions handling
+ SupportsExceptionHandling = true;
+ GlobalEHDirective = "\t.globl\t";
+ SupportsWeakOmittedEHFrame = false;
+ AbsoluteEHSectionOffsets = false;
+ DwarfEHFrameSection =
+ ".section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support";
+ DwarfExceptionSection = ".section __DATA,__gcc_except_tab";
+}
+
+unsigned
+X86DarwinTargetAsmInfo::PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const {
+ if (Reason == DwarfEncoding::Functions && Global)
+ return (DW_EH_PE_pcrel | DW_EH_PE_indirect | DW_EH_PE_sdata4);
+ else if (Reason == DwarfEncoding::CodeLabels || !Global)
+ return DW_EH_PE_pcrel;
+ else
+ return DW_EH_PE_absptr;
+}
+
+const char *
+X86DarwinTargetAsmInfo::getEHGlobalPrefix() const
+{
+ const X86Subtarget* Subtarget = &TM.getSubtarget<X86Subtarget>();
+ if (Subtarget->getDarwinVers() > 9)
+ return PrivateGlobalPrefix;
+ else
+ return "";
+}
+
+X86ELFTargetAsmInfo::X86ELFTargetAsmInfo(const X86TargetMachine &TM):
+ X86TargetAsmInfo<ELFTargetAsmInfo>(TM) {
+
+ CStringSection = ".rodata.str";
+ PrivateGlobalPrefix = ".L";
+ WeakRefDirective = "\t.weak\t";
+ SetDirective = "\t.set\t";
+ PCSymbol = ".";
+
+ // Set up DWARF directives
+ HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
+
+ // Debug Information
+ AbsoluteDebugSectionOffsets = true;
+ SupportsDebugInformation = true;
+ DwarfAbbrevSection = "\t.section\t.debug_abbrev,\"\",@progbits";
+ DwarfInfoSection = "\t.section\t.debug_info,\"\",@progbits";
+ DwarfLineSection = "\t.section\t.debug_line,\"\",@progbits";
+ DwarfFrameSection = "\t.section\t.debug_frame,\"\",@progbits";
+ DwarfPubNamesSection ="\t.section\t.debug_pubnames,\"\",@progbits";
+ DwarfPubTypesSection ="\t.section\t.debug_pubtypes,\"\",@progbits";
+ DwarfStrSection = "\t.section\t.debug_str,\"\",@progbits";
+ DwarfLocSection = "\t.section\t.debug_loc,\"\",@progbits";
+ DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits";
+ DwarfRangesSection = "\t.section\t.debug_ranges,\"\",@progbits";
+ DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
+
+ // Exceptions handling
+ SupportsExceptionHandling = true;
+ AbsoluteEHSectionOffsets = false;
+ DwarfEHFrameSection = "\t.section\t.eh_frame,\"aw\",@progbits";
+ DwarfExceptionSection = "\t.section\t.gcc_except_table,\"a\",@progbits";
+
+ // On Linux we must declare when we can use a non-executable stack.
+ if (TM.getSubtarget<X86Subtarget>().isLinux())
+ NonexecutableStackDirective = "\t.section\t.note.GNU-stack,\"\",@progbits";
+}
+
+unsigned
+X86ELFTargetAsmInfo::PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const {
+ CodeModel::Model CM = TM.getCodeModel();
+ bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
+
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ unsigned Format = 0;
+
+ if (!is64Bit)
+ // 32 bit targets always encode pointers as 4 bytes
+ Format = DW_EH_PE_sdata4;
+ else {
+ // 64 bit targets encode pointers in 4 bytes iff:
+ // - code model is small OR
+ // - code model is medium and we're emitting externally visible symbols
+ // or any code symbols
+ if (CM == CodeModel::Small ||
+ (CM == CodeModel::Medium && (Global ||
+ Reason != DwarfEncoding::Data)))
+ Format = DW_EH_PE_sdata4;
+ else
+ Format = DW_EH_PE_sdata8;
+ }
+
+ if (Global)
+ Format |= DW_EH_PE_indirect;
+
+ return (Format | DW_EH_PE_pcrel);
+ } else {
+ if (is64Bit &&
+ (CM == CodeModel::Small ||
+ (CM == CodeModel::Medium && Reason != DwarfEncoding::Data)))
+ return DW_EH_PE_udata4;
+ else
+ return DW_EH_PE_absptr;
+ }
+}
+
+X86COFFTargetAsmInfo::X86COFFTargetAsmInfo(const X86TargetMachine &TM):
+ X86GenericTargetAsmInfo(TM) {
+
+ GlobalPrefix = "_";
+ LCOMMDirective = "\t.lcomm\t";
+ COMMDirectiveTakesAlignment = false;
+ HasDotTypeDotSizeDirective = false;
+ HasSingleParameterDotFile = false;
+ StaticCtorsSection = "\t.section .ctors,\"aw\"";
+ StaticDtorsSection = "\t.section .dtors,\"aw\"";
+ HiddenDirective = NULL;
+ PrivateGlobalPrefix = "L"; // Prefix for private global symbols
+ WeakRefDirective = "\t.weak\t";
+ SetDirective = "\t.set\t";
+
+ // Set up DWARF directives
+ HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
+ AbsoluteDebugSectionOffsets = true;
+ AbsoluteEHSectionOffsets = false;
+ SupportsDebugInformation = true;
+ DwarfSectionOffsetDirective = "\t.secrel32\t";
+ DwarfAbbrevSection = "\t.section\t.debug_abbrev,\"dr\"";
+ DwarfInfoSection = "\t.section\t.debug_info,\"dr\"";
+ DwarfLineSection = "\t.section\t.debug_line,\"dr\"";
+ DwarfFrameSection = "\t.section\t.debug_frame,\"dr\"";
+ DwarfPubNamesSection ="\t.section\t.debug_pubnames,\"dr\"";
+ DwarfPubTypesSection ="\t.section\t.debug_pubtypes,\"dr\"";
+ DwarfStrSection = "\t.section\t.debug_str,\"dr\"";
+ DwarfLocSection = "\t.section\t.debug_loc,\"dr\"";
+ DwarfARangesSection = "\t.section\t.debug_aranges,\"dr\"";
+ DwarfRangesSection = "\t.section\t.debug_ranges,\"dr\"";
+ DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"dr\"";
+}
+
+unsigned
+X86COFFTargetAsmInfo::PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const {
+ CodeModel::Model CM = TM.getCodeModel();
+ bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
+
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ unsigned Format = 0;
+
+ if (!is64Bit)
+ // 32 bit targets always encode pointers as 4 bytes
+ Format = DW_EH_PE_sdata4;
+ else {
+ // 64 bit targets encode pointers in 4 bytes iff:
+ // - code model is small OR
+ // - code model is medium and we're emitting externally visible symbols
+ // or any code symbols
+ if (CM == CodeModel::Small ||
+ (CM == CodeModel::Medium && (Global ||
+ Reason != DwarfEncoding::Data)))
+ Format = DW_EH_PE_sdata4;
+ else
+ Format = DW_EH_PE_sdata8;
+ }
+
+ if (Global)
+ Format |= DW_EH_PE_indirect;
+
+ return (Format | DW_EH_PE_pcrel);
+ } else {
+ if (is64Bit &&
+ (CM == CodeModel::Small ||
+ (CM == CodeModel::Medium && Reason != DwarfEncoding::Data)))
+ return DW_EH_PE_udata4;
+ else
+ return DW_EH_PE_absptr;
+ }
+}
+
+std::string
+X86COFFTargetAsmInfo::UniqueSectionForGlobal(const GlobalValue* GV,
+ SectionKind::Kind kind) const {
+ switch (kind) {
+ case SectionKind::Text:
+ return ".text$linkonce" + GV->getName();
+ case SectionKind::Data:
+ case SectionKind::BSS:
+ case SectionKind::ThreadData:
+ case SectionKind::ThreadBSS:
+ return ".data$linkonce" + GV->getName();
+ case SectionKind::ROData:
+ case SectionKind::RODataMergeConst:
+ case SectionKind::RODataMergeStr:
+ return ".rdata$linkonce" + GV->getName();
+ default:
+ assert(0 && "Unknown section kind");
+ }
+ return NULL;
+}
+
+std::string X86COFFTargetAsmInfo::printSectionFlags(unsigned flags) const {
+ std::string Flags = ",\"";
+
+ if (flags & SectionFlags::Code)
+ Flags += 'x';
+ if (flags & SectionFlags::Writeable)
+ Flags += 'w';
+
+ Flags += "\"";
+
+ return Flags;
+}
+
+X86WinTargetAsmInfo::X86WinTargetAsmInfo(const X86TargetMachine &TM):
+ X86GenericTargetAsmInfo(TM) {
+ GlobalPrefix = "_";
+ CommentString = ";";
+
+ PrivateGlobalPrefix = "$";
+ AlignDirective = "\talign\t";
+ ZeroDirective = "\tdb\t";
+ ZeroDirectiveSuffix = " dup(0)";
+ AsciiDirective = "\tdb\t";
+ AscizDirective = 0;
+ Data8bitsDirective = "\tdb\t";
+ Data16bitsDirective = "\tdw\t";
+ Data32bitsDirective = "\tdd\t";
+ Data64bitsDirective = "\tdq\t";
+ HasDotTypeDotSizeDirective = false;
+ HasSingleParameterDotFile = false;
+
+ TextSection = getUnnamedSection("_text", SectionFlags::Code);
+ DataSection = getUnnamedSection("_data", SectionFlags::Writeable);
+
+ JumpTableDataSection = NULL;
+ SwitchToSectionDirective = "";
+ TextSectionStartSuffix = "\tsegment 'CODE'";
+ DataSectionStartSuffix = "\tsegment 'DATA'";
+ SectionEndDirectiveSuffix = "\tends\n";
+}
+
+template <class BaseTAI>
+bool X86TargetAsmInfo<BaseTAI>::LowerToBSwap(CallInst *CI) const {
+ // FIXME: this should verify that we are targetting a 486 or better. If not,
+ // we will turn this bswap into something that will be lowered to logical ops
+ // instead of emitting the bswap asm. For now, we don't support 486 or lower
+ // so don't worry about this.
+
+ // Verify this is a simple bswap.
+ if (CI->getNumOperands() != 2 ||
+ CI->getType() != CI->getOperand(1)->getType() ||
+ !CI->getType()->isInteger())
+ return false;
+
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+
+ // Okay, we can do this xform, do so now.
+ const Type *Tys[] = { Ty };
+ Module *M = CI->getParent()->getParent()->getParent();
+ Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
+
+ Value *Op = CI->getOperand(1);
+ Op = CallInst::Create(Int, Op, CI->getName(), CI);
+
+ CI->replaceAllUsesWith(Op);
+ CI->eraseFromParent();
+ return true;
+}
+
+template <class BaseTAI>
+bool X86TargetAsmInfo<BaseTAI>::ExpandInlineAsm(CallInst *CI) const {
+ InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+ std::vector<InlineAsm::ConstraintInfo> Constraints = IA->ParseConstraints();
+
+ std::string AsmStr = IA->getAsmString();
+
+ // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
+ std::vector<std::string> AsmPieces;
+ SplitString(AsmStr, AsmPieces, "\n"); // ; as separator?
+
+ switch (AsmPieces.size()) {
+ default: return false;
+ case 1:
+ AsmStr = AsmPieces[0];
+ AsmPieces.clear();
+ SplitString(AsmStr, AsmPieces, " \t"); // Split with whitespace.
+
+ // bswap $0
+ if (AsmPieces.size() == 2 &&
+ (AsmPieces[0] == "bswap" ||
+ AsmPieces[0] == "bswapq" ||
+ AsmPieces[0] == "bswapl") &&
+ (AsmPieces[1] == "$0" ||
+ AsmPieces[1] == "${0:q}")) {
+ // No need to check constraints, nothing other than the equivalent of
+ // "=r,0" would be valid here.
+ return LowerToBSwap(CI);
+ }
+ // rorw $$8, ${0:w} --> llvm.bswap.i16
+ if (CI->getType() == Type::Int16Ty &&
+ AsmPieces.size() == 3 &&
+ AsmPieces[0] == "rorw" &&
+ AsmPieces[1] == "$$8," &&
+ AsmPieces[2] == "${0:w}" &&
+ IA->getConstraintString() == "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}") {
+ return LowerToBSwap(CI);
+ }
+ break;
+ case 3:
+ if (CI->getType() == Type::Int64Ty && Constraints.size() >= 2 &&
+ Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
+ Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
+ // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64
+ std::vector<std::string> Words;
+ SplitString(AsmPieces[0], Words, " \t");
+ if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
+ Words.clear();
+ SplitString(AsmPieces[1], Words, " \t");
+ if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") {
+ Words.clear();
+ SplitString(AsmPieces[2], Words, " \t,");
+ if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
+ Words[2] == "%edx") {
+ return LowerToBSwap(CI);
+ }
+ }
+ }
+ }
+ break;
+ }
+ return false;
+}
+
+// Instantiate default implementation.
+TEMPLATE_INSTANTIATION(class X86TargetAsmInfo<TargetAsmInfo>);
diff --git a/lib/Target/X86/X86TargetAsmInfo.h b/lib/Target/X86/X86TargetAsmInfo.h
new file mode 100644
index 0000000..f89171d
--- /dev/null
+++ b/lib/Target/X86/X86TargetAsmInfo.h
@@ -0,0 +1,75 @@
+//=====-- X86TargetAsmInfo.h - X86 asm properties -------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the X86TargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86TARGETASMINFO_H
+#define X86TARGETASMINFO_H
+
+#include "X86TargetMachine.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/ELFTargetAsmInfo.h"
+#include "llvm/Target/DarwinTargetAsmInfo.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+ extern const char *const x86_asm_table[];
+
+ template <class BaseTAI>
+ struct X86TargetAsmInfo : public BaseTAI {
+ explicit X86TargetAsmInfo(const X86TargetMachine &TM):
+ BaseTAI(TM) {
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+
+ BaseTAI::AsmTransCBE = x86_asm_table;
+ BaseTAI::AssemblerDialect = Subtarget->getAsmFlavor();
+ }
+
+ virtual bool ExpandInlineAsm(CallInst *CI) const;
+
+ private:
+ bool LowerToBSwap(CallInst *CI) const;
+ };
+
+ typedef X86TargetAsmInfo<TargetAsmInfo> X86GenericTargetAsmInfo;
+
+ EXTERN_TEMPLATE_INSTANTIATION(class X86TargetAsmInfo<TargetAsmInfo>);
+
+ struct X86DarwinTargetAsmInfo : public X86TargetAsmInfo<DarwinTargetAsmInfo> {
+ explicit X86DarwinTargetAsmInfo(const X86TargetMachine &TM);
+ virtual unsigned PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const;
+ virtual const char *getEHGlobalPrefix() const;
+ };
+
+ struct X86ELFTargetAsmInfo : public X86TargetAsmInfo<ELFTargetAsmInfo> {
+ explicit X86ELFTargetAsmInfo(const X86TargetMachine &TM);
+ virtual unsigned PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const;
+ };
+
+ struct X86COFFTargetAsmInfo : public X86GenericTargetAsmInfo {
+ explicit X86COFFTargetAsmInfo(const X86TargetMachine &TM);
+ virtual unsigned PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const;
+ virtual std::string UniqueSectionForGlobal(const GlobalValue* GV,
+ SectionKind::Kind kind) const;
+ virtual std::string printSectionFlags(unsigned flags) const;
+ };
+
+ struct X86WinTargetAsmInfo : public X86GenericTargetAsmInfo {
+ explicit X86WinTargetAsmInfo(const X86TargetMachine &TM);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
new file mode 100644
index 0000000..8264462
--- /dev/null
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -0,0 +1,317 @@
+//===-- X86TargetMachine.cpp - Define TargetMachine for the X86 -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86TargetAsmInfo.h"
+#include "X86TargetMachine.h"
+#include "X86.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+using namespace llvm;
+
+/// X86TargetMachineModule - Note that this is used on hosts that cannot link
+/// in a library unless there are references into the library. In particular,
+/// it seems that it is not possible to get things to work on Win32 without
+/// this. Though it is unused, do not remove it.
+extern "C" int X86TargetMachineModule;
+int X86TargetMachineModule = 0;
+
+// Register the target.
+static RegisterTarget<X86_32TargetMachine>
+X("x86", "32-bit X86: Pentium-Pro and above");
+static RegisterTarget<X86_64TargetMachine>
+Y("x86-64", "64-bit X86: EM64T and AMD64");
+
+// No assembler printer by default
+X86TargetMachine::AsmPrinterCtorFn X86TargetMachine::AsmPrinterCtor = 0;
+
+const TargetAsmInfo *X86TargetMachine::createTargetAsmInfo() const {
+ if (Subtarget.isFlavorIntel())
+ return new X86WinTargetAsmInfo(*this);
+ else
+ switch (Subtarget.TargetType) {
+ case X86Subtarget::isDarwin:
+ return new X86DarwinTargetAsmInfo(*this);
+ case X86Subtarget::isELF:
+ return new X86ELFTargetAsmInfo(*this);
+ case X86Subtarget::isMingw:
+ case X86Subtarget::isCygwin:
+ return new X86COFFTargetAsmInfo(*this);
+ case X86Subtarget::isWindows:
+ return new X86WinTargetAsmInfo(*this);
+ default:
+ return new X86GenericTargetAsmInfo(*this);
+ }
+}
+
+unsigned X86_32TargetMachine::getJITMatchQuality() {
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+ return 10;
+#endif
+ return 0;
+}
+
+unsigned X86_64TargetMachine::getJITMatchQuality() {
+#if defined(__x86_64__) || defined(_M_AMD64)
+ return 10;
+#endif
+ return 0;
+}
+
+unsigned X86_32TargetMachine::getModuleMatchQuality(const Module &M) {
+ // We strongly match "i[3-9]86-*".
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' &&
+ TT[4] == '-' && TT[1] - '3' < 6)
+ return 20;
+ // If the target triple is something non-X86, we don't match.
+ if (!TT.empty()) return 0;
+
+ if (M.getEndianness() == Module::LittleEndian &&
+ M.getPointerSize() == Module::Pointer32)
+ return 10; // Weak match
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+ return getJITMatchQuality()/2;
+}
+
+unsigned X86_64TargetMachine::getModuleMatchQuality(const Module &M) {
+ // We strongly match "x86_64-*".
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 7 && TT[0] == 'x' && TT[1] == '8' && TT[2] == '6' &&
+ TT[3] == '_' && TT[4] == '6' && TT[5] == '4' && TT[6] == '-')
+ return 20;
+
+ // We strongly match "amd64-*".
+ if (TT.size() >= 6 && TT[0] == 'a' && TT[1] == 'm' && TT[2] == 'd' &&
+ TT[3] == '6' && TT[4] == '4' && TT[5] == '-')
+ return 20;
+
+ // If the target triple is something non-X86-64, we don't match.
+ if (!TT.empty()) return 0;
+
+ if (M.getEndianness() == Module::LittleEndian &&
+ M.getPointerSize() == Module::Pointer64)
+ return 10; // Weak match
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+ return getJITMatchQuality()/2;
+}
+
+X86_32TargetMachine::X86_32TargetMachine(const Module &M, const std::string &FS)
+ : X86TargetMachine(M, FS, false) {
+}
+
+
+X86_64TargetMachine::X86_64TargetMachine(const Module &M, const std::string &FS)
+ : X86TargetMachine(M, FS, true) {
+}
+
+/// X86TargetMachine ctor - Create an ILP32 architecture model
+///
+X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS,
+ bool is64Bit)
+ : Subtarget(M, FS, is64Bit),
+ DataLayout(Subtarget.getDataLayout()),
+ FrameInfo(TargetFrameInfo::StackGrowsDown,
+ Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4),
+ InstrInfo(*this), JITInfo(*this), TLInfo(*this) {
+ DefRelocModel = getRelocationModel();
+ // FIXME: Correctly select PIC model for Win64 stuff
+ if (getRelocationModel() == Reloc::Default) {
+ if (Subtarget.isTargetDarwin() ||
+ (Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64()))
+ setRelocationModel(Reloc::DynamicNoPIC);
+ else
+ setRelocationModel(Reloc::Static);
+ }
+
+ // ELF doesn't have a distinct dynamic-no-PIC model. Dynamic-no-PIC
+ // is defined as a model for code which may be used in static or
+ // dynamic executables but not necessarily a shared library. On ELF
+ // implement this by using the Static model.
+ if (Subtarget.isTargetELF() &&
+ getRelocationModel() == Reloc::DynamicNoPIC)
+ setRelocationModel(Reloc::Static);
+
+ if (Subtarget.is64Bit()) {
+ // No DynamicNoPIC support under X86-64.
+ if (getRelocationModel() == Reloc::DynamicNoPIC)
+ setRelocationModel(Reloc::PIC_);
+ // Default X86-64 code model is small.
+ if (getCodeModel() == CodeModel::Default)
+ setCodeModel(CodeModel::Small);
+ }
+
+ if (Subtarget.isTargetCygMing())
+ Subtarget.setPICStyle(PICStyles::WinPIC);
+ else if (Subtarget.isTargetDarwin()) {
+ if (Subtarget.is64Bit())
+ Subtarget.setPICStyle(PICStyles::RIPRel);
+ else
+ Subtarget.setPICStyle(PICStyles::Stub);
+ } else if (Subtarget.isTargetELF()) {
+ if (Subtarget.is64Bit())
+ Subtarget.setPICStyle(PICStyles::RIPRel);
+ else
+ Subtarget.setPICStyle(PICStyles::GOT);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool X86TargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Install an instruction selector.
+ PM.add(createX86ISelDag(*this, OptLevel));
+
+ // If we're using Fast-ISel, clean up the mess.
+ if (EnableFastISel)
+ PM.add(createDeadMachineInstructionElimPass());
+
+ // Install a pass to insert x87 FP_REG_KILL instructions, as needed.
+ PM.add(createX87FPRegKillInserterPass());
+
+ return false;
+}
+
+bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Calculate and set max stack object alignment early, so we can decide
+ // whether we will need stack realignment (and thus FP).
+ PM.add(createX86MaxStackAlignmentCalculatorPass());
+ return false; // -print-machineinstr shouldn't print after this.
+}
+
+bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createX86FloatingPointStackifierPass());
+ return true; // -print-machineinstr should print after this.
+}
+
+bool X86TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose,
+ raw_ostream &Out) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+ return false;
+}
+
+bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ MachineCodeEmitter &MCE) {
+ // FIXME: Move this to TargetJITInfo!
+ // On Darwin, do not override 64-bit setting made in X86TargetMachine().
+ if (DefRelocModel == Reloc::Default &&
+ (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit()))
+ setRelocationModel(Reloc::Static);
+
+ // 64-bit JIT places everything in the same buffer except external functions.
+ // On Darwin, use small code model but hack the call instruction for
+ // externals. Elsewhere, do not assume globals are in the lower 4G.
+ if (Subtarget.is64Bit()) {
+ if (Subtarget.isTargetDarwin())
+ setCodeModel(CodeModel::Small);
+ else
+ setCodeModel(CodeModel::Large);
+ }
+
+ PM.add(createX86CodeEmitterPass(*this, MCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
+bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ JITCodeEmitter &JCE) {
+ // FIXME: Move this to TargetJITInfo!
+ // On Darwin, do not override 64-bit setting made in X86TargetMachine().
+ if (DefRelocModel == Reloc::Default &&
+ (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit()))
+ setRelocationModel(Reloc::Static);
+
+ // 64-bit JIT places everything in the same buffer except external functions.
+ // On Darwin, use small code model but hack the call instruction for
+ // externals. Elsewhere, do not assume globals are in the lower 4G.
+ if (Subtarget.is64Bit()) {
+ if (Subtarget.isTargetDarwin())
+ setCodeModel(CodeModel::Small);
+ else
+ setCodeModel(CodeModel::Large);
+ }
+
+ PM.add(createX86JITCodeEmitterPass(*this, JCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
+bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ MachineCodeEmitter &MCE) {
+ PM.add(createX86CodeEmitterPass(*this, MCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
+bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ JITCodeEmitter &JCE) {
+ PM.add(createX86JITCodeEmitterPass(*this, JCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
+/// symbolicAddressesAreRIPRel - Return true if symbolic addresses are
+/// RIP-relative on this machine, taking into consideration the relocation
+/// model and subtarget. RIP-relative addresses cannot have a separate
+/// base or index register.
+bool X86TargetMachine::symbolicAddressesAreRIPRel() const {
+ return getRelocationModel() != Reloc::Static &&
+ Subtarget.isPICStyleRIPRel();
+}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
new file mode 100644
index 0000000..ecc1d39
--- /dev/null
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -0,0 +1,124 @@
+//===-- X86TargetMachine.h - Define TargetMachine for the X86 ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the X86 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86TARGETMACHINE_H
+#define X86TARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "X86.h"
+#include "X86ELFWriterInfo.h"
+#include "X86InstrInfo.h"
+#include "X86JITInfo.h"
+#include "X86Subtarget.h"
+#include "X86ISelLowering.h"
+
+namespace llvm {
+
+class raw_ostream;
+
+class X86TargetMachine : public LLVMTargetMachine {
+ X86Subtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ TargetFrameInfo FrameInfo;
+ X86InstrInfo InstrInfo;
+ X86JITInfo JITInfo;
+ X86TargetLowering TLInfo;
+ X86ELFWriterInfo ELFWriterInfo;
+ Reloc::Model DefRelocModel; // Reloc model before it's overridden.
+
+protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+ // To avoid having target depend on the asmprinter stuff libraries, asmprinter
+ // set this functions to ctor pointer at startup time if they are linked in.
+ typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+ X86TargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose);
+ static AsmPrinterCtorFn AsmPrinterCtor;
+
+public:
+ X86TargetMachine(const Module &M, const std::string &FS, bool is64Bit);
+
+ virtual const X86InstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual X86JITInfo *getJITInfo() { return &JITInfo; }
+ virtual const X86Subtarget *getSubtargetImpl() const{ return &Subtarget; }
+ virtual X86TargetLowering *getTargetLowering() const {
+ return const_cast<X86TargetLowering*>(&TLInfo);
+ }
+ virtual const X86RegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const X86ELFWriterInfo *getELFWriterInfo() const {
+ return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+ }
+
+ static unsigned getModuleMatchQuality(const Module &M);
+ static unsigned getJITMatchQuality();
+
+ static void registerAsmPrinter(AsmPrinterCtorFn F) {
+ AsmPrinterCtor = F;
+ }
+
+ // Set up the pass pipeline.
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ bool DumpAsm, MachineCodeEmitter &MCE);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ bool DumpAsm, JITCodeEmitter &JCE);
+ virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm, MachineCodeEmitter &MCE);
+ virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm, JITCodeEmitter &JCE);
+
+ /// symbolicAddressesAreRIPRel - Return true if symbolic addresses are
+ /// RIP-relative on this machine, taking into consideration the relocation
+ /// model and subtarget. RIP-relative addresses cannot have a separate
+ /// base or index register.
+ bool symbolicAddressesAreRIPRel() const;
+};
+
+/// X86_32TargetMachine - X86 32-bit target machine.
+///
+class X86_32TargetMachine : public X86TargetMachine {
+public:
+ X86_32TargetMachine(const Module &M, const std::string &FS);
+
+ static unsigned getJITMatchQuality();
+ static unsigned getModuleMatchQuality(const Module &M);
+};
+
+/// X86_64TargetMachine - X86 64-bit target machine.
+///
+class X86_64TargetMachine : public X86TargetMachine {
+public:
+ X86_64TargetMachine(const Module &M, const std::string &FS);
+
+ static unsigned getJITMatchQuality();
+ static unsigned getModuleMatchQuality(const Module &M);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
new file mode 100644
index 0000000..a7aba14
--- /dev/null
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -0,0 +1,23 @@
+set(LLVM_TARGET_DEFINITIONS XCore.td)
+
+tablegen(XCoreGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(XCoreGenRegisterNames.inc -gen-register-enums)
+tablegen(XCoreGenRegisterInfo.inc -gen-register-desc)
+tablegen(XCoreGenInstrNames.inc -gen-instr-enums)
+tablegen(XCoreGenInstrInfo.inc -gen-instr-desc)
+tablegen(XCoreGenAsmWriter.inc -gen-asm-writer)
+tablegen(XCoreGenDAGISel.inc -gen-dag-isel)
+tablegen(XCoreGenCallingConv.inc -gen-callingconv)
+tablegen(XCoreGenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(XCore
+ XCoreAsmPrinter.cpp
+ XCoreFrameInfo.cpp
+ XCoreInstrInfo.cpp
+ XCoreISelDAGToDAG.cpp
+ XCoreISelLowering.cpp
+ XCoreRegisterInfo.cpp
+ XCoreSubtarget.cpp
+ XCoreTargetAsmInfo.cpp
+ XCoreTargetMachine.cpp
+ )
diff --git a/lib/Target/XCore/Makefile b/lib/Target/XCore/Makefile
new file mode 100644
index 0000000..568df70
--- /dev/null
+++ b/lib/Target/XCore/Makefile
@@ -0,0 +1,21 @@
+##===- lib/Target/XCore/Makefile ---------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMXCore
+TARGET = XCore
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = XCoreGenRegisterInfo.h.inc XCoreGenRegisterNames.inc \
+ XCoreGenRegisterInfo.inc XCoreGenInstrNames.inc \
+ XCoreGenInstrInfo.inc XCoreGenAsmWriter.inc \
+ XCoreGenDAGISel.inc XCoreGenCallingConv.inc \
+ XCoreGenSubtarget.inc
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Target/XCore/README.txt b/lib/Target/XCore/README.txt
new file mode 100644
index 0000000..deaeb0f
--- /dev/null
+++ b/lib/Target/XCore/README.txt
@@ -0,0 +1,8 @@
+To-do
+-----
+
+* Instruction encodings
+* Tailcalls
+* Investigate loop alignment
+* Add builtins
+* Make better use of lmul / macc
diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h
new file mode 100644
index 0000000..5722b87
--- /dev/null
+++ b/lib/Target/XCore/XCore.h
@@ -0,0 +1,42 @@
+//===-- XCore.h - Top-level interface for XCore representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// XCore back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_XCORE_H
+#define TARGET_XCORE_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class FunctionPass;
+ class TargetMachine;
+ class XCoreTargetMachine;
+ class raw_ostream;
+
+ FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM);
+ FunctionPass *createXCoreCodePrinterPass(raw_ostream &OS,
+ XCoreTargetMachine &TM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose);
+} // end namespace llvm;
+
+// Defines symbolic names for XCore registers. This defines a mapping from
+// register name to register number.
+//
+#include "XCoreGenRegisterNames.inc"
+
+// Defines symbolic names for the XCore instructions.
+//
+#include "XCoreGenInstrNames.inc"
+
+#endif
diff --git a/lib/Target/XCore/XCore.td b/lib/Target/XCore/XCore.td
new file mode 100644
index 0000000..7a2dcdb
--- /dev/null
+++ b/lib/Target/XCore/XCore.td
@@ -0,0 +1,62 @@
+//===- XCore.td - Describe the XCore Target Machine --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Descriptions
+//===----------------------------------------------------------------------===//
+
+include "XCoreRegisterInfo.td"
+include "XCoreInstrInfo.td"
+include "XCoreCallingConv.td"
+
+def XCoreInstrInfo : InstrInfo {
+ let TSFlagsFields = [];
+ let TSFlagsShifts = [];
+}
+
+//===----------------------------------------------------------------------===//
+// XCore Subtarget features.
+//===----------------------------------------------------------------------===//
+
+def FeatureXS1A
+ : SubtargetFeature<"xs1a", "IsXS1A", "true",
+ "Enable XS1A instructions">;
+
+def FeatureXS1B
+ : SubtargetFeature<"xs1b", "IsXS1B", "true",
+ "Enable XS1B instructions">;
+
+//===----------------------------------------------------------------------===//
+// XCore processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", [FeatureXS1A]>;
+def : Proc<"xs1a-generic", [FeatureXS1A]>;
+def : Proc<"xs1b-generic", [FeatureXS1B]>;
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def XCore : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = XCoreInstrInfo;
+}
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
new file mode 100644
index 0000000..c9a6d8a
--- /dev/null
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -0,0 +1,472 @@
+//===-- XCoreAsmPrinter.cpp - XCore LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the XAS-format XCore assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "XCore.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreSubtarget.h"
+#include "XCoreTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cctype>
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+static cl::opt<std::string> FileDirective("xcore-file-directive", cl::Optional,
+ cl::desc("Output a file directive into the assembly file"),
+ cl::Hidden,
+ cl::value_desc("filename"),
+ cl::init(""));
+
+static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional,
+ cl::desc("Maximum number of threads (for emulation thread-local storage)"),
+ cl::Hidden,
+ cl::value_desc("number"),
+ cl::init(8));
+
+namespace {
+ class VISIBILITY_HIDDEN XCoreAsmPrinter : public AsmPrinter {
+ DwarfWriter *DW;
+ const XCoreSubtarget &Subtarget;
+ public:
+ explicit XCoreAsmPrinter(raw_ostream &O, XCoreTargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : AsmPrinter(O, TM, T, OL, V), DW(0),
+ Subtarget(*TM.getSubtargetImpl()) {}
+
+ virtual const char *getPassName() const {
+ return "XCore Assembly Printer";
+ }
+
+ void printMemOperand(const MachineInstr *MI, int opNum);
+ void printOperand(const MachineInstr *MI, int opNum);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+
+ void emitFileDirective(const std::string &filename);
+ void emitGlobalDirective(const std::string &name);
+ void emitExternDirective(const std::string &name);
+
+ void emitArrayBound(const std::string &name, const GlobalVariable *GV);
+ void emitGlobal(const GlobalVariable *GV);
+
+ void emitFunctionStart(MachineFunction &MF);
+ void emitFunctionEnd(MachineFunction &MF);
+
+ bool printInstruction(const MachineInstr *MI); // autogenerated.
+ void printMachineInstruction(const MachineInstr *MI);
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AsmPrinter::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<DwarfWriter>();
+ }
+ };
+} // end of anonymous namespace
+
+#include "XCoreGenAsmWriter.inc"
+
+/// createXCoreCodePrinterPass - Returns a pass that prints the XCore
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+///
+FunctionPass *llvm::createXCoreCodePrinterPass(raw_ostream &o,
+ XCoreTargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose) {
+ return new XCoreAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+}
+
+// PrintEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+static void PrintEscapedString(const std::string &Str, raw_ostream &Out) {
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ unsigned char C = Str[i];
+ if (isprint(C) && C != '"' && C != '\\') {
+ Out << C;
+ } else {
+ Out << '\\'
+ << (char) ((C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A'))
+ << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
+ }
+ }
+}
+
+void XCoreAsmPrinter::
+emitFileDirective(const std::string &name)
+{
+ O << "\t.file\t\"";
+ PrintEscapedString(name, O);
+ O << "\"\n";
+}
+
+void XCoreAsmPrinter::
+emitGlobalDirective(const std::string &name)
+{
+ O << TAI->getGlobalDirective() << name;
+ O << "\n";
+}
+
+void XCoreAsmPrinter::
+emitExternDirective(const std::string &name)
+{
+ O << "\t.extern\t" << name;
+ O << '\n';
+}
+
+void XCoreAsmPrinter::
+emitArrayBound(const std::string &name, const GlobalVariable *GV)
+{
+ assert(((GV->hasExternalLinkage() ||
+ GV->hasWeakLinkage()) ||
+ GV->hasLinkOnceLinkage()) && "Unexpected linkage");
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(
+ cast<PointerType>(GV->getType())->getElementType()))
+ {
+ O << TAI->getGlobalDirective() << name << ".globound" << "\n";
+ O << TAI->getSetDirective() << name << ".globound" << ","
+ << ATy->getNumElements() << "\n";
+ if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) {
+ // TODO Use COMDAT groups for LinkOnceLinkage
+ O << TAI->getWeakDefDirective() << name << ".globound" << "\n";
+ }
+ }
+}
+
+void XCoreAsmPrinter::
+emitGlobal(const GlobalVariable *GV)
+{
+ const TargetData *TD = TM.getTargetData();
+
+ if (GV->hasInitializer()) {
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GV))
+ return;
+
+ SwitchToSection(TAI->SectionForGlobal(GV));
+
+ std::string name = Mang->getValueName(GV);
+ Constant *C = GV->getInitializer();
+ unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType());
+
+ // Mark the start of the global
+ O << "\t.cc_top " << name << ".data," << name << "\n";
+
+ switch (GV->getLinkage()) {
+ case GlobalValue::AppendingLinkage:
+ cerr << "AppendingLinkage is not supported by this target!\n";
+ abort();
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::ExternalLinkage:
+ emitArrayBound(name, GV);
+ emitGlobalDirective(name);
+ // TODO Use COMDAT groups for LinkOnceLinkage
+ if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) {
+ O << TAI->getWeakDefDirective() << name << "\n";
+ }
+ // FALL THROUGH
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::PrivateLinkage:
+ break;
+ case GlobalValue::GhostLinkage:
+ cerr << "Should not have any unmaterialized functions!\n";
+ abort();
+ case GlobalValue::DLLImportLinkage:
+ cerr << "DLLImport linkage is not supported by this target!\n";
+ abort();
+ case GlobalValue::DLLExportLinkage:
+ cerr << "DLLExport linkage is not supported by this target!\n";
+ abort();
+ default:
+ assert(0 && "Unknown linkage type!");
+ }
+
+ EmitAlignment(Align, GV, 2);
+
+ unsigned Size = TD->getTypeAllocSize(C->getType());
+ if (GV->isThreadLocal()) {
+ Size *= MaxThreads;
+ }
+ if (TAI->hasDotTypeDotSizeDirective()) {
+ O << "\t.type " << name << ",@object\n";
+ O << "\t.size " << name << "," << Size << "\n";
+ }
+ O << name << ":\n";
+
+ EmitGlobalConstant(C);
+ if (GV->isThreadLocal()) {
+ for (unsigned i = 1; i < MaxThreads; ++i) {
+ EmitGlobalConstant(C);
+ }
+ }
+ if (Size < 4) {
+ // The ABI requires that unsigned scalar types smaller than 32 bits
+ // are are padded to 32 bits.
+ EmitZeros(4 - Size);
+ }
+
+ // Mark the end of the global
+ O << "\t.cc_bottom " << name << ".data\n";
+ } else {
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ }
+}
+
+/// Emit the directives on the start of functions
+void XCoreAsmPrinter::
+emitFunctionStart(MachineFunction &MF)
+{
+ // Print out the label for the function.
+ const Function *F = MF.getFunction();
+
+ SwitchToSection(TAI->SectionForGlobal(F));
+
+ // Mark the start of the function
+ O << "\t.cc_top " << CurrentFnName << ".function," << CurrentFnName << "\n";
+
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::InternalLinkage: // Symbols default to internal.
+ case Function::PrivateLinkage:
+ break;
+ case Function::ExternalLinkage:
+ emitGlobalDirective(CurrentFnName);
+ break;
+ case Function::LinkOnceAnyLinkage:
+ case Function::LinkOnceODRLinkage:
+ case Function::WeakAnyLinkage:
+ case Function::WeakODRLinkage:
+ // TODO Use COMDAT groups for LinkOnceLinkage
+ O << TAI->getGlobalDirective() << CurrentFnName << "\n";
+ O << TAI->getWeakDefDirective() << CurrentFnName << "\n";
+ break;
+ }
+ // (1 << 1) byte aligned
+ EmitAlignment(1, F, 1);
+ if (TAI->hasDotTypeDotSizeDirective()) {
+ O << "\t.type " << CurrentFnName << ",@function\n";
+ }
+ O << CurrentFnName << ":\n";
+}
+
+/// Emit the directives on the end of functions
+void XCoreAsmPrinter::
+emitFunctionEnd(MachineFunction &MF)
+{
+ // Mark the end of the function
+ O << "\t.cc_bottom " << CurrentFnName << ".function\n";
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool XCoreAsmPrinter::runOnMachineFunction(MachineFunction &MF)
+{
+ this->MF = &MF;
+
+ SetupMachineFunction(MF);
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ // Print out jump tables referenced by the function
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+ // Emit the function start directives
+ emitFunctionStart(MF);
+
+ // Emit pre-function debug information.
+ DW->BeginFunction(&MF);
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+
+ // Print a label for the basic block.
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true , true);
+ O << '\n';
+ }
+
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ O << "\t";
+ printMachineInstruction(II);
+ }
+
+ // Each Basic Block is separated by a newline
+ O << '\n';
+ }
+
+ // Emit function end directives
+ emitFunctionEnd(MF);
+
+ // Emit post-function debug information.
+ DW->EndFunction(&MF);
+
+ // We didn't modify anything.
+ return false;
+}
+
+void XCoreAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum)
+{
+ printOperand(MI, opNum);
+
+ if (MI->getOperand(opNum+1).isImm()
+ && MI->getOperand(opNum+1).getImm() == 0)
+ return;
+
+ O << "+";
+ printOperand(MI, opNum+1);
+}
+
+void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ O << TM.getRegisterInfo()->get(MO.getReg()).AsmName;
+ else
+ assert(0 && "not implemented");
+ break;
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB());
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ {
+ const GlobalValue *GV = MO.getGlobal();
+ O << Mang->getValueName(GV);
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ }
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ O << MO.getSymbolName();
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+ default:
+ assert(0 && "not implemented");
+ }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ printOperand(MI, OpNo);
+ return false;
+}
+
+void XCoreAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ ++EmittedInsts;
+
+ // Check for mov mnemonic
+ unsigned src, dst, srcSR, dstSR;
+ if (TM.getInstrInfo()->isMoveInstr(*MI, src, dst, srcSR, dstSR)) {
+ O << "\tmov ";
+ O << TM.getRegisterInfo()->get(dst).AsmName;
+ O << ", ";
+ O << TM.getRegisterInfo()->get(src).AsmName;
+ O << "\n";
+ return;
+ }
+ if (printInstruction(MI)) {
+ return;
+ }
+ assert(0 && "Unhandled instruction in asm writer!");
+}
+
+bool XCoreAsmPrinter::doInitialization(Module &M) {
+ bool Result = AsmPrinter::doInitialization(M);
+
+ if (!FileDirective.empty()) {
+ emitFileDirective(FileDirective);
+ }
+
+ // Print out type strings for external functions here
+ for (Module::const_iterator I = M.begin(), E = M.end();
+ I != E; ++I) {
+ if (I->isDeclaration() && !I->isIntrinsic()) {
+ switch (I->getLinkage()) {
+ default:
+ assert(0 && "Unexpected linkage");
+ case Function::ExternalWeakLinkage:
+ ExtWeakSymbols.insert(I);
+ // fallthrough
+ case Function::ExternalLinkage:
+ break;
+ }
+ }
+ }
+
+ // Emit initial debug information.
+ DW = getAnalysisIfAvailable<DwarfWriter>();
+ assert(DW && "Dwarf Writer is not available");
+ DW->BeginModule(&M, getAnalysisIfAvailable<MachineModuleInfo>(),
+ O, this, TAI);
+ return Result;
+}
+
+bool XCoreAsmPrinter::doFinalization(Module &M) {
+
+ // Print out module-level global variables.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ emitGlobal(I);
+ }
+
+ // Emit final debug information.
+ DW->EndModule();
+
+ return AsmPrinter::doFinalization(M);
+}
diff --git a/lib/Target/XCore/XCoreCallingConv.td b/lib/Target/XCore/XCoreCallingConv.td
new file mode 100644
index 0000000..8107e32
--- /dev/null
+++ b/lib/Target/XCore/XCoreCallingConv.td
@@ -0,0 +1,33 @@
+//===- XCoreCallingConv.td - Calling Conventions for XCore -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for XCore architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XCore Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_XCore : CallingConv<[
+ // i32 are returned in registers R0, R1, R2, R3
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// XCore Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_XCore : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The first 4 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+
+ // Integer values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32], CCAssignToStack<4, 4>>
+]>;
diff --git a/lib/Target/XCore/XCoreFrameInfo.cpp b/lib/Target/XCore/XCoreFrameInfo.cpp
new file mode 100644
index 0000000..f50dc96
--- /dev/null
+++ b/lib/Target/XCore/XCoreFrameInfo.cpp
@@ -0,0 +1,27 @@
+//===-- XCoreFrameInfo.cpp - Frame info for XCore Target ---------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains XCore frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "XCoreFrameInfo.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// XCoreFrameInfo:
+//===----------------------------------------------------------------------===//
+
+XCoreFrameInfo::XCoreFrameInfo(const TargetMachine &tm):
+ TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 4, 0)
+{
+ // Do nothing
+}
diff --git a/lib/Target/XCore/XCoreFrameInfo.h b/lib/Target/XCore/XCoreFrameInfo.h
new file mode 100644
index 0000000..2c67577
--- /dev/null
+++ b/lib/Target/XCore/XCoreFrameInfo.h
@@ -0,0 +1,34 @@
+//===-- XCoreFrameInfo.h - Frame info for XCore Target -----------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains XCore frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREFRAMEINFO_H
+#define XCOREFRAMEINFO_H
+
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class XCoreFrameInfo: public TargetFrameInfo {
+
+ public:
+ XCoreFrameInfo(const TargetMachine &tm);
+
+ //! Stack slot size (4 bytes)
+ static int stackSlotSize() {
+ return 4;
+ }
+ };
+}
+
+#endif // XCOREFRAMEINFO_H
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
new file mode 100644
index 0000000..eed34a4
--- /dev/null
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -0,0 +1,230 @@
+//===-- XCoreISelDAGToDAG.cpp - A dag to dag inst selector for XCore ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the XCore target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "XCoreISelLowering.h"
+#include "XCoreTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include <queue>
+#include <set>
+using namespace llvm;
+
+/// XCoreDAGToDAGISel - XCore specific code to select XCore machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+ class XCoreDAGToDAGISel : public SelectionDAGISel {
+ XCoreTargetLowering &Lowering;
+ const XCoreSubtarget &Subtarget;
+
+ public:
+ XCoreDAGToDAGISel(XCoreTargetMachine &TM)
+ : SelectionDAGISel(TM),
+ Lowering(*TM.getTargetLowering()),
+ Subtarget(*TM.getSubtargetImpl()) { }
+
+ SDNode *Select(SDValue Op);
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ // Complex Pattern Selectors.
+ bool SelectADDRspii(SDValue Op, SDValue Addr, SDValue &Base,
+ SDValue &Offset);
+ bool SelectADDRdpii(SDValue Op, SDValue Addr, SDValue &Base,
+ SDValue &Offset);
+ bool SelectADDRcpii(SDValue Op, SDValue Addr, SDValue &Base,
+ SDValue &Offset);
+
+ virtual void InstructionSelect();
+
+ virtual const char *getPassName() const {
+ return "XCore DAG->DAG Pattern Instruction Selection";
+ }
+
+ // Include the pieces autogenerated from the target description.
+ #include "XCoreGenDAGISel.inc"
+ };
+} // end anonymous namespace
+
+/// createXCoreISelDag - This pass converts a legalized DAG into a
+/// XCore-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM) {
+ return new XCoreDAGToDAGISel(TM);
+}
+
+bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Op, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ FrameIndexSDNode *FIN = 0;
+ if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::ADD) {
+ ConstantSDNode *CN = 0;
+ if ((FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
+ && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
+ // Constant positive word offset from frame index
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Op, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ if (Addr.getOpcode() == XCoreISD::DPRelativeWrapper) {
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::ADD) {
+ ConstantSDNode *CN = 0;
+ if ((Addr.getOperand(0).getOpcode() == XCoreISD::DPRelativeWrapper)
+ && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ && (CN->getSExtValue() % 4 == 0)) {
+ // Constant word offset from a object in the data region
+ Base = Addr.getOperand(0).getOperand(0);
+ Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Op, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ if (Addr.getOpcode() == XCoreISD::CPRelativeWrapper) {
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::ADD) {
+ ConstantSDNode *CN = 0;
+ if ((Addr.getOperand(0).getOpcode() == XCoreISD::CPRelativeWrapper)
+ && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ && (CN->getSExtValue() % 4 == 0)) {
+ // Constant word offset from a object in the data region
+ Base = Addr.getOperand(0).getOperand(0);
+ Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ return false;
+}
+
+/// InstructionSelect - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void XCoreDAGToDAGISel::
+InstructionSelect() {
+ DEBUG(BB->dump());
+
+ // Select target instructions for the DAG.
+ SelectRoot(*CurDAG);
+
+ CurDAG->RemoveDeadNodes();
+}
+
+SDNode *XCoreDAGToDAGISel::Select(SDValue Op) {
+ SDNode *N = Op.getNode();
+ DebugLoc dl = N->getDebugLoc();
+ MVT NVT = N->getValueType(0);
+ if (NVT == MVT::i32) {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ if (Predicate_immMskBitp(N)) {
+ SDValue MskSize = Transform_msksize_xform(N);
+ return CurDAG->getTargetNode(XCore::MKMSK_rus, dl, MVT::i32, MskSize);
+ }
+ else if (! Predicate_immU16(N)) {
+ unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
+ SDValue CPIdx =
+ CurDAG->getTargetConstantPool(ConstantInt::get(Type::Int32Ty, Val),
+ TLI.getPointerTy());
+ return CurDAG->getTargetNode(XCore::LDWCP_lru6, dl, MVT::i32,
+ MVT::Other, CPIdx,
+ CurDAG->getEntryNode());
+ }
+ break;
+ }
+ case ISD::SMUL_LOHI: {
+ // FIXME fold addition into the macc instruction
+ if (!Subtarget.isXS1A()) {
+ SDValue Zero(CurDAG->getTargetNode(XCore::LDC_ru6, dl, MVT::i32,
+ CurDAG->getTargetConstant(0, MVT::i32)), 0);
+ SDValue Ops[] = { Zero, Zero, Op.getOperand(0), Op.getOperand(1) };
+ SDNode *ResNode = CurDAG->getTargetNode(XCore::MACCS_l4r, dl,
+ MVT::i32, MVT::i32, Ops, 4);
+ ReplaceUses(SDValue(N, 0), SDValue(ResNode, 1));
+ ReplaceUses(SDValue(N, 1), SDValue(ResNode, 0));
+ return NULL;
+ }
+ break;
+ }
+ case ISD::UMUL_LOHI: {
+ // FIXME fold addition into the macc / lmul instruction
+ SDValue Zero(CurDAG->getTargetNode(XCore::LDC_ru6, dl, MVT::i32,
+ CurDAG->getTargetConstant(0, MVT::i32)), 0);
+ SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
+ Zero, Zero };
+ SDNode *ResNode = CurDAG->getTargetNode(XCore::LMUL_l6r, dl, MVT::i32,
+ MVT::i32, Ops, 4);
+ ReplaceUses(SDValue(N, 0), SDValue(ResNode, 1));
+ ReplaceUses(SDValue(N, 1), SDValue(ResNode, 0));
+ return NULL;
+ }
+ case XCoreISD::LADD: {
+ if (!Subtarget.isXS1A()) {
+ SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
+ Op.getOperand(2) };
+ return CurDAG->getTargetNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32,
+ Ops, 3);
+ }
+ break;
+ }
+ case XCoreISD::LSUB: {
+ if (!Subtarget.isXS1A()) {
+ SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
+ Op.getOperand(2) };
+ return CurDAG->getTargetNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32,
+ Ops, 3);
+ }
+ break;
+ }
+ // Other cases are autogenerated.
+ }
+ }
+ return SelectCode(Op);
+}
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
new file mode 100644
index 0000000..93c5f59
--- /dev/null
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -0,0 +1,934 @@
+//===-- XCoreISelLowering.cpp - XCore DAG Lowering Implementation ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the XCoreTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "xcore-lower"
+
+#include "XCoreISelLowering.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "XCore.h"
+#include "XCoreTargetMachine.h"
+#include "XCoreSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/VectorExtras.h"
+#include <queue>
+#include <set>
+using namespace llvm;
+
+const char *XCoreTargetLowering::
+getTargetNodeName(unsigned Opcode) const
+{
+ switch (Opcode)
+ {
+ case XCoreISD::BL : return "XCoreISD::BL";
+ case XCoreISD::PCRelativeWrapper : return "XCoreISD::PCRelativeWrapper";
+ case XCoreISD::DPRelativeWrapper : return "XCoreISD::DPRelativeWrapper";
+ case XCoreISD::CPRelativeWrapper : return "XCoreISD::CPRelativeWrapper";
+ case XCoreISD::STWSP : return "XCoreISD::STWSP";
+ case XCoreISD::RETSP : return "XCoreISD::RETSP";
+ default : return NULL;
+ }
+}
+
+XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
+ : TargetLowering(XTM),
+ TM(XTM),
+ Subtarget(*XTM.getSubtargetImpl()) {
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, XCore::GRRegsRegisterClass);
+
+ // Compute derived properties from the register classes
+ computeRegisterProperties();
+
+ // Division is expensive
+ setIntDivIsCheap(false);
+
+ setShiftAmountType(MVT::i32);
+ // shl X, 32 == 0
+ setShiftAmountFlavor(Extend);
+ setStackPointerRegisterToSaveRestore(XCore::SP);
+
+ setSchedulingPreference(SchedulingForRegPressure);
+
+ // Use i32 for setcc operations results (slt, sgt, ...).
+ setBooleanContents(ZeroOrOneBooleanContent);
+
+ // XCore does not have the NodeTypes below.
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::ADDC, MVT::i32, Expand);
+ setOperationAction(ISD::ADDE, MVT::i32, Expand);
+ setOperationAction(ISD::SUBC, MVT::i32, Expand);
+ setOperationAction(ISD::SUBE, MVT::i32, Expand);
+
+ // Stop the combiner recombining select and set_cc
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ // 64bit
+ if (!Subtarget.isXS1A()) {
+ setOperationAction(ISD::ADD, MVT::i64, Custom);
+ setOperationAction(ISD::SUB, MVT::i64, Custom);
+ }
+ if (Subtarget.isXS1A()) {
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ }
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+ // Bit Manipulation
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL , MVT::i32, Expand);
+ setOperationAction(ISD::ROTR , MVT::i32, Expand);
+
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // Expand jump tables for now
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+
+ // RET must be custom lowered, to meet ABI requirements
+ setOperationAction(ISD::RET, MVT::Other, Custom);
+
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+
+ // Thread Local Storage
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+
+ // Conversion of i64 -> double produces constantpool nodes
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+
+ // Loads
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
+
+ // Varargs
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+
+ // Dynamic stack
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+
+ // Debug
+ setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+}
+
+SDValue XCoreTargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG) {
+ switch (Op.getOpcode())
+ {
+ case ISD::CALL: return LowerCALL(Op, DAG);
+ case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
+ case ISD::RET: return LowerRET(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::VAARG: return LowerVAARG(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ // FIXME: Remove these when LegalizeDAGTypes lands.
+ case ISD::ADD:
+ case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ default:
+ assert(0 && "unimplemented operand");
+ return SDValue();
+ }
+}
+
+/// ReplaceNodeResults - Replace the results of node with an illegal result
+/// type with new values built out of custom code.
+void XCoreTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) {
+ switch (N->getOpcode()) {
+ default:
+ assert(0 && "Don't know how to custom expand this!");
+ return;
+ case ISD::ADD:
+ case ISD::SUB:
+ Results.push_back(ExpandADDSUB(N, DAG));
+ return;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+
+SDValue XCoreTargetLowering::
+LowerSELECT_CC(SDValue Op, SelectionDAG &DAG)
+{
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Cond = DAG.getNode(ISD::SETCC, dl, MVT::i32, Op.getOperand(2),
+ Op.getOperand(3), Op.getOperand(4));
+ return DAG.getNode(ISD::SELECT, dl, MVT::i32, Cond, Op.getOperand(0),
+ Op.getOperand(1));
+}
+
+SDValue XCoreTargetLowering::
+getGlobalAddressWrapper(SDValue GA, GlobalValue *GV, SelectionDAG &DAG)
+{
+ // FIXME there is no actual debug info here
+ DebugLoc dl = GA.getDebugLoc();
+ if (isa<Function>(GV)) {
+ return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA);
+ } else if (!Subtarget.isXS1A()) {
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (!GVar) {
+ // If GV is an alias then use the aliasee to determine constness
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
+ }
+ bool isConst = GVar && GVar->isConstant();
+ if (isConst) {
+ return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA);
+ }
+ }
+ return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA);
+}
+
+SDValue XCoreTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG)
+{
+ GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
+ // If it's a debug information descriptor, don't mess with it.
+ if (DAG.isVerifiedDebugInfoDesc(Op))
+ return GA;
+ return getGlobalAddressWrapper(GA, GV, DAG);
+}
+
+static inline SDValue BuildGetId(SelectionDAG &DAG, DebugLoc dl) {
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
+ DAG.getConstant(Intrinsic::xcore_getid, MVT::i32));
+}
+
+static inline bool isZeroLengthArray(const Type *Ty) {
+ const ArrayType *AT = dyn_cast_or_null<ArrayType>(Ty);
+ return AT && (AT->getNumElements() == 0);
+}
+
+SDValue XCoreTargetLowering::
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG)
+{
+ // FIXME there isn't really debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ // transform to label + getid() * size
+ GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (!GVar) {
+ // If GV is an alias then use the aliasee to determine size
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
+ }
+ if (! GVar) {
+ assert(0 && "Thread local object not a GlobalVariable?");
+ return SDValue();
+ }
+ const Type *Ty = cast<PointerType>(GV->getType())->getElementType();
+ if (!Ty->isSized() || isZeroLengthArray(Ty)) {
+ cerr << "Size of thread local object " << GVar->getName()
+ << " is unknown\n";
+ abort();
+ }
+ SDValue base = getGlobalAddressWrapper(GA, GV, DAG);
+ const TargetData *TD = TM.getTargetData();
+ unsigned Size = TD->getTypeAllocSize(Ty);
+ SDValue offset = DAG.getNode(ISD::MUL, dl, MVT::i32, BuildGetId(DAG, dl),
+ DAG.getConstant(Size, MVT::i32));
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, base, offset);
+}
+
+SDValue XCoreTargetLowering::
+LowerConstantPool(SDValue Op, SelectionDAG &DAG)
+{
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ // FIXME there isn't really debug info here
+ DebugLoc dl = CP->getDebugLoc();
+ if (Subtarget.isXS1A()) {
+ assert(0 && "Lowering of constant pool unimplemented");
+ return SDValue();
+ } else {
+ MVT PtrVT = Op.getValueType();
+ SDValue Res;
+ if (CP->isMachineConstantPoolEntry()) {
+ Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+ CP->getAlignment());
+ } else {
+ Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlignment());
+ }
+ return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, Res);
+ }
+}
+
+SDValue XCoreTargetLowering::
+LowerJumpTable(SDValue Op, SelectionDAG &DAG)
+{
+ // FIXME there isn't really debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ MVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+ return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, JTI);
+}
+
+SDValue XCoreTargetLowering::
+ExpandADDSUB(SDNode *N, SelectionDAG &DAG)
+{
+ assert(N->getValueType(0) == MVT::i64 &&
+ (N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
+ "Unknown operand to lower!");
+ assert(!Subtarget.isXS1A() && "Cannot custom lower ADD/SUB on xs1a");
+ DebugLoc dl = N->getDebugLoc();
+
+ // Extract components
+ SDValue LHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(0), DAG.getConstant(0, MVT::i32));
+ SDValue LHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(0), DAG.getConstant(1, MVT::i32));
+ SDValue RHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(1), DAG.getConstant(0, MVT::i32));
+ SDValue RHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(1), DAG.getConstant(1, MVT::i32));
+
+ // Expand
+ unsigned Opcode = (N->getOpcode() == ISD::ADD) ? XCoreISD::LADD :
+ XCoreISD::LSUB;
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ SDValue Carry = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ LHSL, RHSL, Zero);
+ SDValue Lo(Carry.getNode(), 1);
+
+ SDValue Ignored = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ LHSH, RHSH, Carry);
+ SDValue Hi(Ignored.getNode(), 1);
+ // Merge the pieces
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+}
+
+SDValue XCoreTargetLowering::
+LowerVAARG(SDValue Op, SelectionDAG &DAG)
+{
+ assert(0 && "unimplemented");
+ // FIX Arguments passed by reference need a extra dereference.
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+ const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ MVT VT = Node->getValueType(0);
+ SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0),
+ Node->getOperand(1), V, 0);
+ // Increment the pointer, VAList, to the next vararg
+ SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList,
+ DAG.getConstant(VT.getSizeInBits(),
+ getPointerTy()));
+ // Store the incremented VAList to the legalized pointer
+ Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), V, 0);
+ // Load the actual argument out of the pointer VAList
+ return DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0);
+}
+
+SDValue XCoreTargetLowering::
+LowerVASTART(SDValue Op, SelectionDAG &DAG)
+{
+ DebugLoc dl = Op.getDebugLoc();
+ // vastart stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument
+ MachineFunction &MF = DAG.getMachineFunction();
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+ SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), SV, 0);
+}
+
+SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Depths > 0 not supported yet!
+ if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
+ return SDValue();
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetRegisterInfo *RegInfo = getTargetMachine().getRegisterInfo();
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ RegInfo->getFrameRegister(MF), MVT::i32);
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//
+// The lower operations present on calling convention works on this order:
+// LowerCALL (virt regs --> phys regs, virt regs --> stack)
+// LowerFORMAL_ARGUMENTS (phys --> virt regs, stack --> virt regs)
+// LowerRET (virt regs --> phys regs)
+// LowerCALL (phys regs --> virt regs)
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// CALL Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// XCore custom CALL implementation
+SDValue XCoreTargetLowering::
+LowerCALL(SDValue Op, SelectionDAG &DAG)
+{
+ CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+ unsigned CallingConv = TheCall->getCallingConv();
+ // For now, only CallingConv::C implemented
+ switch (CallingConv)
+ {
+ default:
+ assert(0 && "Unsupported calling convention");
+ case CallingConv::Fast:
+ case CallingConv::C:
+ return LowerCCCCallTo(Op, DAG, CallingConv);
+ }
+}
+
+/// LowerCCCCallTo - functions arguments are copied from virtual
+/// regs to (physical regs)/(stack frame), CALLSEQ_START and
+/// CALLSEQ_END are emitted.
+/// TODO: isTailCall, sret.
+SDValue XCoreTargetLowering::
+LowerCCCCallTo(SDValue Op, SelectionDAG &DAG, unsigned CC)
+{
+ CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+ SDValue Chain = TheCall->getChain();
+ SDValue Callee = TheCall->getCallee();
+ bool isVarArg = TheCall->isVarArg();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+
+ // The ABI dictates there should be one stack slot available to the callee
+ // on function entry (for saving lr).
+ CCInfo.AllocateStack(4, 4);
+
+ CCInfo.AnalyzeCallOperands(TheCall, CC_XCore);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes,
+ getPointerTy(), true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+ SmallVector<SDValue, 12> MemOpChains;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments start after the 5 first operands of ISD::CALL
+ SDValue Arg = TheCall->getArg(i);
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at
+ // RegsToPass vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ assert(VA.isMemLoc());
+
+ int Offset = VA.getLocMemOffset();
+
+ MemOpChains.push_back(DAG.getNode(XCoreISD::STWSP, dl, MVT::Other,
+ Chain, Arg,
+ DAG.getConstant(Offset/4, MVT::i32)));
+ }
+ }
+
+ // Transform all store nodes into one single node because
+ // all store nodes are independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emited instructions must be
+ // stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ // Likewise ExternalSymbol -> TargetExternalSymbol.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32);
+ else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
+
+ // XCoreBranchLink = #chain, #target_address, #opt_in_flags...
+ // = Chain, Callee, Reg#1, Reg#2, ...
+ //
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(XCoreISD::BL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getConstant(NumBytes, getPointerTy(), true),
+ DAG.getConstant(0, getPointerTy(), true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG),
+ Op.getResNo());
+}
+
+/// LowerCallResult - Lower the result values of an ISD::CALL into the
+/// appropriate copies out of appropriate physical registers. This assumes that
+/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+/// being lowered. Returns a SDNode with the same number of values as the
+/// ISD::CALL.
+SDNode *XCoreTargetLowering::
+LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
+ unsigned CallingConv, SelectionDAG &DAG) {
+ bool isVarArg = TheCall->isVarArg();
+ DebugLoc dl = TheCall->getDebugLoc();
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
+
+ CCInfo.AnalyzeCallResult(TheCall, RetCC_XCore);
+ SmallVector<SDValue, 8> ResultVals;
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ ResultVals.push_back(Chain.getValue(0));
+ }
+
+ ResultVals.push_back(Chain);
+
+ // Merge everything together with a MERGE_VALUES node.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
+ &ResultVals[0], ResultVals.size()).getNode();
+}
+
+//===----------------------------------------------------------------------===//
+// FORMAL_ARGUMENTS Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// XCore custom FORMAL_ARGUMENTS implementation
+SDValue XCoreTargetLowering::
+LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG)
+{
+ unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ switch(CC)
+ {
+ default:
+ assert(0 && "Unsupported calling convention");
+ case CallingConv::C:
+ case CallingConv::Fast:
+ return LowerCCCArguments(Op, DAG);
+ }
+}
+
+/// LowerCCCArguments - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+/// TODO: sret
+SDValue XCoreTargetLowering::
+LowerCCCArguments(SDValue Op, SelectionDAG &DAG)
+{
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ SDValue Root = Op.getOperand(0);
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
+ unsigned CC = MF.getFunction()->getCallingConv();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+
+ CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_XCore);
+
+ unsigned StackSlotSize = XCoreFrameInfo::stackSlotSize();
+
+ SmallVector<SDValue, 16> ArgValues;
+
+ unsigned LRSaveSize = StackSlotSize;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+
+ CCValAssign &VA = ArgLocs[i];
+
+ if (VA.isRegLoc()) {
+ // Arguments passed in registers
+ MVT RegVT = VA.getLocVT();
+ switch (RegVT.getSimpleVT()) {
+ default:
+ cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
+ << RegVT.getSimpleVT()
+ << "\n";
+ abort();
+ case MVT::i32:
+ unsigned VReg = RegInfo.createVirtualRegister(
+ XCore::GRRegsRegisterClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ ArgValues.push_back(DAG.getCopyFromReg(Root, dl, VReg, RegVT));
+ }
+ } else {
+ // sanity check
+ assert(VA.isMemLoc());
+ // Load the argument to a virtual register
+ unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
+ if (ObjSize > StackSlotSize) {
+ cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
+ << VA.getLocVT().getSimpleVT()
+ << "\n";
+ }
+ // Create the frame index object for this incoming parameter...
+ int FI = MFI->CreateFixedObject(ObjSize,
+ LRSaveSize + VA.getLocMemOffset());
+
+ // Create the SelectionDAG nodes corresponding to a load
+ //from this parameter
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+ ArgValues.push_back(DAG.getLoad(VA.getLocVT(), dl, Root, FIN, NULL, 0));
+ }
+ }
+
+ if (isVarArg) {
+ /* Argument registers */
+ static const unsigned ArgRegs[] = {
+ XCore::R0, XCore::R1, XCore::R2, XCore::R3
+ };
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+ unsigned FirstVAReg = CCInfo.getFirstUnallocated(ArgRegs,
+ array_lengthof(ArgRegs));
+ if (FirstVAReg < array_lengthof(ArgRegs)) {
+ SmallVector<SDValue, 4> MemOps;
+ int offset = 0;
+ // Save remaining registers, storing higher register numbers at a higher
+ // address
+ for (unsigned i = array_lengthof(ArgRegs) - 1; i >= FirstVAReg; --i) {
+ // Create a stack slot
+ int FI = MFI->CreateFixedObject(4, offset);
+ if (i == FirstVAReg) {
+ XFI->setVarArgsFrameIndex(FI);
+ }
+ offset -= StackSlotSize;
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+ // Move argument from phys reg -> virt reg
+ unsigned VReg = RegInfo.createVirtualRegister(
+ XCore::GRRegsRegisterClass);
+ RegInfo.addLiveIn(ArgRegs[i], VReg);
+ SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32);
+ // Move argument from virt reg -> stack
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ }
+ if (!MemOps.empty())
+ Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+ } else {
+ // This will point to the next argument passed via stack.
+ XFI->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset()));
+ }
+ }
+
+ ArgValues.push_back(Root);
+
+ // Return the new list of results.
+ std::vector<MVT> RetVT(Op.getNode()->value_begin(),
+ Op.getNode()->value_end());
+ return DAG.getNode(ISD::MERGE_VALUES, dl, RetVT,
+ &ArgValues[0], ArgValues.size());
+}
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+SDValue XCoreTargetLowering::
+LowerRET(SDValue Op, SelectionDAG &DAG)
+{
+ // CCValAssign - represent the assignment of
+ // the return value to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+ unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
+
+ // Analize return values of ISD::RET
+ CCInfo.AnalyzeReturn(Op.getNode(), RetCC_XCore);
+
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ // The chain is always operand #0
+ SDValue Chain = Op.getOperand(0);
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ // ISD::RET => ret chain, (regnum1,val1), ...
+ // So i*2+1 index only the regnums
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ Op.getOperand(i*2+1), Flag);
+
+ // guarantee that all emitted copies are
+ // stuck together, avoiding something bad
+ Flag = Chain.getValue(1);
+ }
+
+ // Return on XCore is always a "retsp 0"
+ if (Flag.getNode())
+ return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other,
+ Chain, DAG.getConstant(0, MVT::i32), Flag);
+ else // Return Void
+ return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other,
+ Chain, DAG.getConstant(0, MVT::i32));
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ assert((MI->getOpcode() == XCore::SELECT_CC) &&
+ "Unexpected instr type to insert");
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
+ // control-flow pattern. The incoming instruction knows the destination vreg
+ // to set, the condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ BuildMI(BB, dl, TII.get(XCore::BRFT_lru6))
+ .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+ // Update machine-CFG edges by transferring all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ sinkMBB->transferSuccessors(BB);
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(BB, dl, TII.get(XCore::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Addressing mode description hooks
+//===----------------------------------------------------------------------===//
+
+static inline bool isImmUs(int64_t val)
+{
+ return (val >= 0 && val <= 11);
+}
+
+static inline bool isImmUs2(int64_t val)
+{
+ return (val%2 == 0 && isImmUs(val/2));
+}
+
+static inline bool isImmUs4(int64_t val)
+{
+ return (val%4 == 0 && isImmUs(val/4));
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool
+XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const {
+ MVT VT = getValueType(Ty, true);
+ // Get expected value type after legalization
+ switch (VT.getSimpleVT()) {
+ // Legal load / stores
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ break;
+ // Expand i1 -> i8
+ case MVT::i1:
+ VT = MVT::i8;
+ break;
+ // Everything else is lowered to words
+ default:
+ VT = MVT::i32;
+ break;
+ }
+ if (AM.BaseGV) {
+ return VT == MVT::i32 && !AM.HasBaseReg && AM.Scale == 0 &&
+ AM.BaseOffs%4 == 0;
+ }
+
+ switch (VT.getSimpleVT()) {
+ default:
+ return false;
+ case MVT::i8:
+ // reg + imm
+ if (AM.Scale == 0) {
+ return isImmUs(AM.BaseOffs);
+ }
+ return AM.Scale == 1 && AM.BaseOffs == 0;
+ case MVT::i16:
+ // reg + imm
+ if (AM.Scale == 0) {
+ return isImmUs2(AM.BaseOffs);
+ }
+ return AM.Scale == 2 && AM.BaseOffs == 0;
+ case MVT::i32:
+ // reg + imm
+ if (AM.Scale == 0) {
+ return isImmUs4(AM.BaseOffs);
+ }
+ // reg + reg<<2
+ return AM.Scale == 4 && AM.BaseOffs == 0;
+ }
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// XCore Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+std::vector<unsigned> XCoreTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const
+{
+ if (Constraint.size() != 1)
+ return std::vector<unsigned>();
+
+ switch (Constraint[0]) {
+ default : break;
+ case 'r':
+ return make_vector<unsigned>(XCore::R0, XCore::R1, XCore::R2,
+ XCore::R3, XCore::R4, XCore::R5,
+ XCore::R6, XCore::R7, XCore::R8,
+ XCore::R9, XCore::R10, XCore::R11, 0);
+ break;
+ }
+ return std::vector<unsigned>();
+}
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
new file mode 100644
index 0000000..993ecbd
--- /dev/null
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -0,0 +1,123 @@
+//===-- XCoreISelLowering.h - XCore DAG Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that XCore uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREISELLOWERING_H
+#define XCOREISELLOWERING_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "XCore.h"
+
+namespace llvm {
+
+ // Forward delcarations
+ class XCoreSubtarget;
+ class XCoreTargetMachine;
+
+ namespace XCoreISD {
+ enum NodeType {
+ // Start the numbering where the builtin ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END+XCore::INSTRUCTION_LIST_END,
+
+ // Branch and link (call)
+ BL,
+
+ // pc relative address
+ PCRelativeWrapper,
+
+ // dp relative address
+ DPRelativeWrapper,
+
+ // cp relative address
+ CPRelativeWrapper,
+
+ // Store word to stack
+ STWSP,
+
+ // Corresponds to retsp instruction
+ RETSP,
+
+ // Corresponds to LADD instruction
+ LADD,
+
+ // Corresponds to LSUB instruction
+ LSUB
+ };
+ }
+
+ //===--------------------------------------------------------------------===//
+ // TargetLowering Implementation
+ //===--------------------------------------------------------------------===//
+ class XCoreTargetLowering : public TargetLowering
+ {
+ public:
+
+ explicit XCoreTargetLowering(XCoreTargetMachine &TM);
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG);
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ // DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ virtual bool isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const;
+
+ private:
+ const XCoreTargetMachine &TM;
+ const XCoreSubtarget &Subtarget;
+
+ // Lower Operand helpers
+ SDValue LowerCCCArguments(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerCCCCallTo(SDValue Op, SelectionDAG &DAG, unsigned CC);
+ SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode*TheCall,
+ unsigned CallingConv, SelectionDAG &DAG);
+ SDValue getReturnAddressFrameIndex(SelectionDAG &DAG);
+ SDValue getGlobalAddressWrapper(SDValue GA, GlobalValue *GV,
+ SelectionDAG &DAG);
+
+ // Lower Operand specifics
+ SDValue LowerRET(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerCALL(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
+
+ // Inline asm support
+ std::vector<unsigned>
+ getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const;
+
+ // Expand specifics
+ SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG);
+ };
+}
+
+#endif // XCOREISELLOWERING_H
diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td
new file mode 100644
index 0000000..8002c99
--- /dev/null
+++ b/lib/Target/XCore/XCoreInstrFormats.td
@@ -0,0 +1,120 @@
+//===- XCoreInstrFormats.td - XCore Instruction Formats ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+class InstXCore<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : Instruction {
+ field bits<32> Inst;
+
+ let Namespace = "XCore";
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+}
+
+// XCore pseudo instructions format
+class PseudoInstXCore<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern>;
+
+//===----------------------------------------------------------------------===//
+// Instruction formats
+//===----------------------------------------------------------------------===//
+
+class _F3R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FL3R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _F2RUS<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FL2RUS<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FRU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FLRU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FLU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FU10<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FLU10<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _F2R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FRUS<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FL2R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _F1R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _F0R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _L4R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _L5R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _L6R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
new file mode 100644
index 0000000..504d202
--- /dev/null
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -0,0 +1,524 @@
+//===- XCoreInstrInfo.cpp - XCore Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreMachineFunctionInfo.h"
+#include "XCoreInstrInfo.h"
+#include "XCore.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "XCoreGenInstrInfo.inc"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+namespace XCore {
+
+ // XCore Condition Codes
+ enum CondCode {
+ COND_TRUE,
+ COND_FALSE,
+ COND_INVALID
+ };
+}
+}
+
+using namespace llvm;
+
+XCoreInstrInfo::XCoreInstrInfo(void)
+ : TargetInstrInfoImpl(XCoreInsts, array_lengthof(XCoreInsts)),
+ RI(*this) {
+}
+
+static bool isZeroImm(const MachineOperand &op) {
+ return op.isImm() && op.getImm() == 0;
+}
+
+/// Return true if the instruction is a register to register move and
+/// leave the source and dest operands in the passed parameters.
+///
+bool XCoreInstrInfo::isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSR, unsigned &DstSR) const {
+ SrcSR = DstSR = 0; // No sub-registers.
+
+ // We look for 4 kinds of patterns here:
+ // add dst, src, 0
+ // sub dst, src, 0
+ // or dst, src, src
+ // and dst, src, src
+ if ((MI.getOpcode() == XCore::ADD_2rus || MI.getOpcode() == XCore::SUB_2rus)
+ && isZeroImm(MI.getOperand(2))) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ } else if ((MI.getOpcode() == XCore::OR_3r || MI.getOpcode() == XCore::AND_3r)
+ && MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ }
+ return false;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned
+XCoreInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const{
+ int Opcode = MI->getOpcode();
+ if (Opcode == XCore::LDWFI)
+ {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2))))
+ {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+unsigned
+XCoreInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ int Opcode = MI->getOpcode();
+ if (Opcode == XCore::STWFI)
+ {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2))))
+ {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+/// isInvariantLoad - Return true if the specified instruction (which is marked
+/// mayLoad) is loading from a location whose value is invariant across the
+/// function. For example, loading a value from the constant pool or from
+/// from the argument area of a function if it does not change. This should
+/// only return true of *all* loads the instruction does are invariant (if it
+/// does multiple loads).
+bool
+XCoreInstrInfo::isInvariantLoad(const MachineInstr *MI) const {
+ // Loads from constants pools and loads from invariant argument slots are
+ // invariant
+ int Opcode = MI->getOpcode();
+ if (Opcode == XCore::LDWCP_ru6 || Opcode == XCore::LDWCP_lru6) {
+ return MI->getOperand(1).isCPI();
+ }
+ int FrameIndex;
+ if (isLoadFromStackSlot(MI, FrameIndex)) {
+ const MachineFrameInfo &MFI =
+ *MI->getParent()->getParent()->getFrameInfo();
+ return MFI.isFixedObjectIndex(FrameIndex) &&
+ MFI.isImmutableObjectIndex(FrameIndex);
+ }
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Analysis
+//===----------------------------------------------------------------------===//
+
+static inline bool IsBRU(unsigned BrOpc) {
+ return BrOpc == XCore::BRFU_u6
+ || BrOpc == XCore::BRFU_lu6
+ || BrOpc == XCore::BRBU_u6
+ || BrOpc == XCore::BRBU_lu6;
+}
+
+static inline bool IsBRT(unsigned BrOpc) {
+ return BrOpc == XCore::BRFT_ru6
+ || BrOpc == XCore::BRFT_lru6
+ || BrOpc == XCore::BRBT_ru6
+ || BrOpc == XCore::BRBT_lru6;
+}
+
+static inline bool IsBRF(unsigned BrOpc) {
+ return BrOpc == XCore::BRFF_ru6
+ || BrOpc == XCore::BRFF_lru6
+ || BrOpc == XCore::BRBF_ru6
+ || BrOpc == XCore::BRBF_lru6;
+}
+
+static inline bool IsCondBranch(unsigned BrOpc) {
+ return IsBRF(BrOpc) || IsBRT(BrOpc);
+}
+
+/// GetCondFromBranchOpc - Return the XCore CC that matches
+/// the correspondent Branch instruction opcode.
+static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc)
+{
+ if (IsBRT(BrOpc)) {
+ return XCore::COND_TRUE;
+ } else if (IsBRF(BrOpc)) {
+ return XCore::COND_FALSE;
+ } else {
+ return XCore::COND_INVALID;
+ }
+}
+
+/// GetCondBranchFromCond - Return the Branch instruction
+/// opcode that matches the cc.
+static inline unsigned GetCondBranchFromCond(XCore::CondCode CC)
+{
+ switch (CC) {
+ default: assert(0 && "Illegal condition code!");
+ case XCore::COND_TRUE : return XCore::BRFT_lru6;
+ case XCore::COND_FALSE : return XCore::BRFF_lru6;
+ }
+}
+
+/// GetOppositeBranchCondition - Return the inverse of the specified
+/// condition, e.g. turning COND_E to COND_NE.
+static inline XCore::CondCode GetOppositeBranchCondition(XCore::CondCode CC)
+{
+ switch (CC) {
+ default: assert(0 && "Illegal condition code!");
+ case XCore::COND_TRUE : return XCore::COND_FALSE;
+ case XCore::COND_FALSE : return XCore::COND_TRUE;
+ }
+}
+
+/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
+/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
+/// implemented for a target). Upon success, this returns false and returns
+/// with the following information in various cases:
+///
+/// 1. If this block ends with no branches (it just falls through to its succ)
+/// just return false, leaving TBB/FBB null.
+/// 2. If this block ends with only an unconditional branch, it sets TBB to be
+/// the destination block.
+/// 3. If this block ends with an conditional branch and it falls through to
+/// an successor block, it sets TBB to be the branch destination block and a
+/// list of operands that evaluate the condition. These
+/// operands can be passed to other TargetInstrInfo methods to create new
+/// branches.
+/// 4. If this block ends with an conditional branch and an unconditional
+/// block, it returns the 'true' destination in TBB, the 'false' destination
+/// in FBB, and a list of operands that evaluate the condition. These
+/// operands can be passed to other TargetInstrInfo methods to create new
+/// branches.
+///
+/// Note that RemoveBranch and InsertBranch must be implemented to support
+/// cases where this method returns success.
+///
+bool
+XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (IsBRU(LastInst->getOpcode())) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ XCore::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode());
+ if (BranchCode == XCore::COND_INVALID)
+ return true; // Can't handle indirect branch.
+
+ // Conditional branch
+ // Block ends with fall-through condbranch.
+
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+ XCore::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc);
+
+ // If the block ends with conditional branch followed by unconditional,
+ // handle it.
+ if (BranchCode != XCore::COND_INVALID
+ && IsBRU(LastInst->getOpcode())) {
+
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ Cond.push_back(SecondLastInst->getOperand(0));
+
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it. The second
+ // one is not executed, so remove it.
+ if (IsBRU(SecondLastInst->getOpcode()) &&
+ IsBRU(LastInst->getOpcode())) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned
+XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond)const{
+ // FIXME there should probably be a DebugLoc argument here
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "Unexpected number of components!");
+
+ if (FBB == 0) { // One way branch.
+ if (Cond.empty()) {
+ // Unconditional branch
+ BuildMI(&MBB, dl, get(XCore::BRFU_lu6)).addMBB(TBB);
+ } else {
+ // Conditional branch.
+ unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm());
+ BuildMI(&MBB, dl, get(Opc)).addReg(Cond[1].getReg())
+ .addMBB(TBB);
+ }
+ return 1;
+ }
+
+ // Two-way Conditional branch.
+ assert(Cond.size() == 2 && "Unexpected number of components!");
+ unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm());
+ BuildMI(&MBB, dl, get(Opc)).addReg(Cond[1].getReg())
+ .addMBB(TBB);
+ BuildMI(&MBB, dl, get(XCore::BRFU_lu6)).addMBB(FBB);
+ return 2;
+}
+
+unsigned
+XCoreInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ if (!IsBRU(I->getOpcode()) && !IsCondBranch(I->getOpcode()))
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (!IsCondBranch(I->getOpcode()))
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+bool XCoreInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (DestRC == SrcRC) {
+ if (DestRC == XCore::GRRegsRegisterClass) {
+ BuildMI(MBB, I, DL, get(XCore::ADD_2rus), DestReg)
+ .addReg(SrcReg)
+ .addImm(0);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ if (SrcRC == XCore::RRegsRegisterClass && SrcReg == XCore::SP &&
+ DestRC == XCore::GRRegsRegisterClass) {
+ BuildMI(MBB, I, DL, get(XCore::LDAWSP_ru6), DestReg)
+ .addImm(0);
+ return true;
+ }
+ if (DestRC == XCore::RRegsRegisterClass && DestReg == XCore::SP &&
+ SrcRC == XCore::GRRegsRegisterClass) {
+ BuildMI(MBB, I, DL, get(XCore::SETSP_1r))
+ .addReg(SrcReg);
+ return true;
+ }
+ return false;
+}
+
+void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC) const
+{
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ BuildMI(MBB, I, DL, get(XCore::STWFI))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIndex)
+ .addImm(0);
+}
+
+void XCoreInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+ bool isKill, SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const
+{
+ assert(0 && "unimplemented\n");
+}
+
+void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const
+{
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ BuildMI(MBB, I, DL, get(XCore::LDWFI), DestReg)
+ .addFrameIndex(FrameIndex)
+ .addImm(0);
+}
+
+void XCoreInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const
+{
+ assert(0 && "unimplemented\n");
+}
+
+bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const
+{
+ if (CSI.empty()) {
+ return true;
+ }
+ MachineFunction *MF = MBB.getParent();
+ const MachineFrameInfo *MFI = MF->getFrameInfo();
+ MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+ XCoreFunctionInfo *XFI = MF->getInfo<XCoreFunctionInfo>();
+
+ bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(*MF);
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
+ it != CSI.end(); ++it) {
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(it->getReg());
+
+ storeRegToStackSlot(MBB, MI, it->getReg(), true,
+ it->getFrameIdx(), it->getRegClass());
+ if (emitFrameMoves) {
+ unsigned SaveLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MI, DL, get(XCore::DBG_LABEL)).addImm(SaveLabelId);
+ XFI->getSpillLabels().push_back(
+ std::pair<unsigned, CalleeSavedInfo>(SaveLabelId, *it));
+ }
+ }
+ return true;
+}
+
+bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const
+{
+ bool AtStart = MI == MBB.begin();
+ MachineBasicBlock::iterator BeforeI = MI;
+ if (!AtStart)
+ --BeforeI;
+ for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
+ it != CSI.end(); ++it) {
+
+ loadRegFromStackSlot(MBB, MI, it->getReg(),
+ it->getFrameIdx(),
+ it->getRegClass());
+ assert(MI != MBB.begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert multiple
+ // instructions.
+ if (AtStart)
+ MI = MBB.begin();
+ else {
+ MI = BeforeI;
+ ++MI;
+ }
+ }
+ return true;
+}
+
+/// BlockHasNoFallThrough - Analyse if MachineBasicBlock does not
+/// fall-through into its successor block.
+bool XCoreInstrInfo::
+BlockHasNoFallThrough(const MachineBasicBlock &MBB) const
+{
+ if (MBB.empty()) return false;
+
+ switch (MBB.back().getOpcode()) {
+ case XCore::RETSP_u6: // Return.
+ case XCore::RETSP_lu6:
+ case XCore::BAU_1r: // Indirect branch.
+ case XCore::BRFU_u6: // Uncond branch.
+ case XCore::BRFU_lu6:
+ case XCore::BRBU_u6:
+ case XCore::BRBU_lu6:
+ return true;
+ default: return false;
+ }
+}
+
+/// ReverseBranchCondition - Return the inverse opcode of the
+/// specified Branch instruction.
+bool XCoreInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
+{
+ assert((Cond.size() == 2) &&
+ "Invalid XCore branch condition!");
+ Cond[0].setImm(GetOppositeBranchCondition((XCore::CondCode)Cond[0].getImm()));
+ return false;
+}
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
new file mode 100644
index 0000000..0870886
--- /dev/null
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -0,0 +1,110 @@
+//===- XCoreInstrInfo.h - XCore Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREINSTRUCTIONINFO_H
+#define XCOREINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "XCoreRegisterInfo.h"
+
+namespace llvm {
+
+class XCoreInstrInfo : public TargetInstrInfoImpl {
+ const XCoreRegisterInfo RI;
+public:
+ XCoreInstrInfo(void);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// Return true if the instruction is a register to register move and return
+ /// the source and dest operands and their sub-register indices by reference.
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual bool isInvariantLoad(const MachineInstr *MI) const;
+
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const;
+
+ virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const;
+
+ virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
+
+ virtual bool ReverseBranchCondition(
+ SmallVectorImpl<MachineOperand> &Cond) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
new file mode 100644
index 0000000..65cd4fe
--- /dev/null
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -0,0 +1,991 @@
+//===- XCoreInstrInfo.td - Target Description for XCore ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the XCore instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+// Uses of CP, DP are not currently reflected in the patterns, since
+// having a physical register as an operand prevents loop hoisting and
+// since the value of these registers never changes during the life of the
+// function.
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass.
+//===----------------------------------------------------------------------===//
+
+include "XCoreInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Feature predicates.
+//===----------------------------------------------------------------------===//
+
+// HasXS1A - This predicate is true when the target processor supports XS1A
+// instructions.
+def HasXS1A : Predicate<"Subtarget.isXS1A()">;
+
+// HasXS1B - This predicate is true when the target processor supports XS1B
+// instructions.
+def HasXS1B : Predicate<"Subtarget.isXS1B()">;
+
+//===----------------------------------------------------------------------===//
+// XCore specific DAG Nodes.
+//
+
+// Call
+def SDT_XCoreBranchLink : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+def SDT_XCoreAddress : SDTypeProfile<1, 1,
+ [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+
+def pcrelwrapper : SDNode<"XCoreISD::PCRelativeWrapper", SDT_XCoreAddress,
+ []>;
+
+def dprelwrapper : SDNode<"XCoreISD::DPRelativeWrapper", SDT_XCoreAddress,
+ []>;
+
+def cprelwrapper : SDNode<"XCoreISD::CPRelativeWrapper", SDT_XCoreAddress,
+ []>;
+
+def SDT_XCoreStwsp : SDTypeProfile<0, 2, [SDTCisInt<1>]>;
+def XCoreStwsp : SDNode<"XCoreISD::STWSP", SDT_XCoreStwsp,
+ [SDNPHasChain]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_XCoreCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_XCoreCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_XCoreCallSeqStart,
+ [SDNPHasChain, SDNPOutFlag]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_XCoreCallSeqEnd,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+def div4_xform : SDNodeXForm<imm, [{
+ // Transformation function: imm/4
+ assert(N->getZExtValue() % 4 == 0);
+ return getI32Imm(N->getZExtValue()/4);
+}]>;
+
+def msksize_xform : SDNodeXForm<imm, [{
+ // Transformation function: get the size of a mask
+ assert(isMask_32(N->getZExtValue()));
+ // look for the first non-zero bit
+ return getI32Imm(32 - CountLeadingZeros_32(N->getZExtValue()));
+}]>;
+
+def neg_xform : SDNodeXForm<imm, [{
+ // Transformation function: -imm
+ uint32_t value = N->getZExtValue();
+ return getI32Imm(-value);
+}]>;
+
+def div4neg_xform : SDNodeXForm<imm, [{
+ // Transformation function: -imm/4
+ uint32_t value = N->getZExtValue();
+ assert(-value % 4 == 0);
+ return getI32Imm(-value/4);
+}]>;
+
+def immUs4Neg : PatLeaf<(imm), [{
+ uint32_t value = (uint32_t)N->getZExtValue();
+ return (-value)%4 == 0 && (-value)/4 <= 11;
+}]>;
+
+def immUs4 : PatLeaf<(imm), [{
+ uint32_t value = (uint32_t)N->getZExtValue();
+ return value%4 == 0 && value/4 <= 11;
+}]>;
+
+def immUsNeg : PatLeaf<(imm), [{
+ return -((uint32_t)N->getZExtValue()) <= 11;
+}]>;
+
+def immUs : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() <= 11;
+}]>;
+
+def immU6 : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() < (1 << 6);
+}]>;
+
+def immU10 : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() < (1 << 10);
+}]>;
+
+def immU16 : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() < (1 << 16);
+}]>;
+
+def immU20 : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() < (1 << 20);
+}]>;
+
+// FIXME check subtarget. Currently we check if the immediate
+// is in the common subset of legal immediate values for both
+// XS1A and XS1B.
+def immMskBitp : PatLeaf<(imm), [{
+ uint32_t value = (uint32_t)N->getZExtValue();
+ if (!isMask_32(value)) {
+ return false;
+ }
+ int msksize = 32 - CountLeadingZeros_32(value);
+ return (msksize >= 1 && msksize <= 8)
+ || msksize == 16
+ || msksize == 24
+ || msksize == 32;
+}]>;
+
+// FIXME check subtarget. Currently we check if the immediate
+// is in the common subset of legal immediate values for both
+// XS1A and XS1B.
+def immBitp : PatLeaf<(imm), [{
+ uint32_t value = (uint32_t)N->getZExtValue();
+ return (value >= 1 && value <= 8)
+ || value == 16
+ || value == 24
+ || value == 32;
+}]>;
+
+def lda16f : PatFrag<(ops node:$addr, node:$offset),
+ (add node:$addr, (shl node:$offset, 1))>;
+def lda16b : PatFrag<(ops node:$addr, node:$offset),
+ (sub node:$addr, (shl node:$offset, 1))>;
+def ldawf : PatFrag<(ops node:$addr, node:$offset),
+ (add node:$addr, (shl node:$offset, 2))>;
+def ldawb : PatFrag<(ops node:$addr, node:$offset),
+ (sub node:$addr, (shl node:$offset, 2))>;
+
+// Instruction operand types
+def calltarget : Operand<i32>;
+def brtarget : Operand<OtherVT>;
+def pclabel : Operand<i32>;
+
+// Addressing modes
+def ADDRspii : ComplexPattern<i32, 2, "SelectADDRspii", [add, frameindex], []>;
+def ADDRdpii : ComplexPattern<i32, 2, "SelectADDRdpii", [add, dprelwrapper],
+ []>;
+def ADDRcpii : ComplexPattern<i32, 2, "SelectADDRcpii", [add, cprelwrapper],
+ []>;
+
+// Address operands
+def MEMii : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+// Three operand short
+
+multiclass F3R_2RUS<string OpcStr, SDNode OpNode> {
+ def _3r: _F3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _2rus : _F2RUS<
+ (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
+}
+
+multiclass F3R_2RUS_np<string OpcStr> {
+ def _3r: _F3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ []>;
+ def _2rus : _F2RUS<
+ (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ []>;
+}
+
+multiclass F3R_2RBITP<string OpcStr, SDNode OpNode> {
+ def _3r: _F3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _2rus : _F2RUS<
+ (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
+}
+
+class F3R<string OpcStr, SDNode OpNode> : _F3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+
+class F3R_np<string OpcStr> : _F3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ []>;
+// Three operand long
+
+/// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot.
+multiclass FL3R_L2RUS<string OpcStr, SDNode OpNode> {
+ def _l3r: _FL3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _l2rus : _FL2RUS<
+ (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
+}
+
+/// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot.
+multiclass FL3R_L2RBITP<string OpcStr, SDNode OpNode> {
+ def _l3r: _FL3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _l2rus : _FL2RUS<
+ (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
+}
+
+class FL3R<string OpcStr, SDNode OpNode> : _FL3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+
+// Register - U6
+// Operand register - U6
+multiclass FRU6_LRU6_branch<string OpcStr> {
+ def _ru6: _FRU6<
+ (outs), (ins GRRegs:$cond, brtarget:$dest),
+ !strconcat(OpcStr, " $cond, $dest"),
+ []>;
+ def _lru6: _FLRU6<
+ (outs), (ins GRRegs:$cond, brtarget:$dest),
+ !strconcat(OpcStr, " $cond, $dest"),
+ []>;
+}
+
+multiclass FRU6_LRU6_cp<string OpcStr> {
+ def _ru6: _FRU6<
+ (outs GRRegs:$dst), (ins i32imm:$a),
+ !strconcat(OpcStr, " $dst, cp[$a]"),
+ []>;
+ def _lru6: _FLRU6<
+ (outs GRRegs:$dst), (ins i32imm:$a),
+ !strconcat(OpcStr, " $dst, cp[$a]"),
+ []>;
+}
+
+// U6
+multiclass FU6_LU6<string OpcStr, SDNode OpNode> {
+ def _u6: _FU6<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ [(OpNode immU6:$b)]>;
+ def _lu6: _FLU6<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ [(OpNode immU16:$b)]>;
+}
+
+multiclass FU6_LU6_np<string OpcStr> {
+ def _u6: _FU6<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ []>;
+ def _lu6: _FLU6<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ []>;
+}
+
+// U10
+multiclass FU10_LU10_np<string OpcStr> {
+ def _u10: _FU10<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ []>;
+ def _lu10: _FLU10<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ []>;
+}
+
+// Two operand short
+
+class F2R_np<string OpcStr> : _F2R<
+ (outs GRRegs:$dst), (ins GRRegs:$b),
+ !strconcat(OpcStr, " $dst, $b"),
+ []>;
+
+// Two operand long
+
+//===----------------------------------------------------------------------===//
+// Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+let Defs = [SP], Uses = [SP] in {
+def ADJCALLSTACKDOWN : PseudoInstXCore<(outs), (ins i32imm:$amt),
+ "${:comment} ADJCALLSTACKDOWN $amt",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : PseudoInstXCore<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "${:comment} ADJCALLSTACKUP $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+def LDWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr),
+ "${:comment} LDWFI $dst, $addr",
+ [(set GRRegs:$dst, (load ADDRspii:$addr))]>;
+
+def LDAWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr),
+ "${:comment} LDAWFI $dst, $addr",
+ [(set GRRegs:$dst, ADDRspii:$addr)]>;
+
+def STWFI : PseudoInstXCore<(outs), (ins GRRegs:$src, MEMii:$addr),
+ "${:comment} STWFI $src, $addr",
+ [(store GRRegs:$src, ADDRspii:$addr)]>;
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded by the
+// scheduler into a branch sequence.
+let usesCustomDAGSchedInserter = 1 in {
+ def SELECT_CC : PseudoInstXCore<(outs GRRegs:$dst),
+ (ins GRRegs:$cond, GRRegs:$T, GRRegs:$F),
+ "${:comment} SELECT_CC PSEUDO!",
+ [(set GRRegs:$dst,
+ (select GRRegs:$cond, GRRegs:$T, GRRegs:$F))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// Three operand short
+defm ADD : F3R_2RUS<"add", add>;
+defm SUB : F3R_2RUS<"sub", sub>;
+let neverHasSideEffects = 1 in {
+defm EQ : F3R_2RUS_np<"eq">;
+def LSS_3r : F3R_np<"lss">;
+def LSU_3r : F3R_np<"lsu">;
+}
+def AND_3r : F3R<"and", and>;
+def OR_3r : F3R<"or", or>;
+
+let mayLoad=1 in {
+def LDW_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "ldw $dst, $addr[$offset]",
+ []>;
+
+def LDW_2rus : _F2RUS<(outs GRRegs:$dst), (ins GRRegs:$addr, i32imm:$offset),
+ "ldw $dst, $addr[$offset]",
+ []>;
+
+def LD16S_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "ld16s $dst, $addr[$offset]",
+ []>;
+
+def LD8U_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "ld8u $dst, $addr[$offset]",
+ []>;
+}
+
+let mayStore=1 in {
+def STW_3r : _F3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+ "stw $val, $addr[$offset]",
+ []>;
+
+def STW_2rus : _F2RUS<(outs), (ins GRRegs:$val, GRRegs:$addr, i32imm:$offset),
+ "stw $val, $addr[$offset]",
+ []>;
+}
+
+defm SHL : F3R_2RBITP<"shl", shl>;
+defm SHR : F3R_2RBITP<"shr", srl>;
+// TODO tsetr
+
+// Three operand long
+def LDAWF_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "ldaw $dst, $addr[$offset]",
+ [(set GRRegs:$dst, (ldawf GRRegs:$addr, GRRegs:$offset))]>;
+
+let neverHasSideEffects = 1 in
+def LDAWF_l2rus : _FL2RUS<(outs GRRegs:$dst),
+ (ins GRRegs:$addr, i32imm:$offset),
+ "ldaw $dst, $addr[$offset]",
+ []>;
+
+def LDAWB_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "ldaw $dst, $addr[-$offset]",
+ [(set GRRegs:$dst, (ldawb GRRegs:$addr, GRRegs:$offset))]>;
+
+let neverHasSideEffects = 1 in
+def LDAWB_l2rus : _FL2RUS<(outs GRRegs:$dst),
+ (ins GRRegs:$addr, i32imm:$offset),
+ "ldaw $dst, $addr[-$offset]",
+ []>;
+
+def LDA16F_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "lda16 $dst, $addr[$offset]",
+ [(set GRRegs:$dst, (lda16f GRRegs:$addr, GRRegs:$offset))]>;
+
+def LDA16B_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "lda16 $dst, $addr[-$offset]",
+ [(set GRRegs:$dst, (lda16b GRRegs:$addr, GRRegs:$offset))]>;
+
+def MUL_l3r : FL3R<"mul", mul>;
+// Instructions which may trap are marked as side effecting.
+let hasSideEffects = 1 in {
+def DIVS_l3r : FL3R<"divs", sdiv>;
+def DIVU_l3r : FL3R<"divu", udiv>;
+def REMS_l3r : FL3R<"rems", srem>;
+def REMU_l3r : FL3R<"remu", urem>;
+}
+def XOR_l3r : FL3R<"xor", xor>;
+defm ASHR : FL3R_L2RBITP<"ashr", sra>;
+// TODO crc32, crc8, inpw, outpw
+let mayStore=1 in {
+def ST16_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+ "st16 $val, $addr[$offset]",
+ []>;
+
+def ST8_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+ "st8 $val, $addr[$offset]",
+ []>;
+}
+
+// Four operand long
+let Predicates = [HasXS1B], Constraints = "$src1 = $dst1,$src2 = $dst2" in {
+def MACCU_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
+ GRRegs:$src4),
+ "maccu $dst1, $dst2, $src3, $src4",
+ []>;
+
+def MACCS_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
+ GRRegs:$src4),
+ "maccs $dst1, $dst2, $src3, $src4",
+ []>;
+}
+
+// Five operand long
+
+let Predicates = [HasXS1B] in {
+def LADD_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "ladd $dst1, $dst2, $src1, $src2, $src3",
+ []>;
+
+def LSUB_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "lsub $dst1, $dst2, $src1, $src2, $src3",
+ []>;
+
+def LDIV_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "ldiv $dst1, $dst2, $src1, $src2, $src3",
+ []>;
+}
+
+// Six operand long
+
+def LMUL_l6r : _L6R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
+ GRRegs:$src4),
+ "lmul $dst1, $dst2, $src1, $src2, $src3, $src4",
+ []>;
+
+let Predicates = [HasXS1A] in
+def MACC_l6r : _L6R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
+ GRRegs:$src4),
+ "macc $dst1, $dst2, $src1, $src2, $src3, $src4",
+ []>;
+
+// Register - U6
+
+//let Uses = [DP] in ...
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def LDAWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a),
+ "ldaw $dst, dp[$a]",
+ []>;
+
+let isReMaterializable = 1 in
+def LDAWDP_lru6: _FLRU6<
+ (outs GRRegs:$dst), (ins MEMii:$a),
+ "ldaw $dst, dp[$a]",
+ [(set GRRegs:$dst, ADDRdpii:$a)]>;
+
+let mayLoad=1 in
+def LDWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a),
+ "ldw $dst, dp[$a]",
+ []>;
+
+def LDWDP_lru6: _FLRU6<
+ (outs GRRegs:$dst), (ins MEMii:$a),
+ "ldw $dst, dp[$a]",
+ [(set GRRegs:$dst, (load ADDRdpii:$a))]>;
+
+let mayStore=1 in
+def STWDP_ru6 : _FRU6<(outs), (ins GRRegs:$val, MEMii:$addr),
+ "stw $val, dp[$addr]",
+ []>;
+
+def STWDP_lru6 : _FLRU6<(outs), (ins GRRegs:$val, MEMii:$addr),
+ "stw $val, dp[$addr]",
+ [(store GRRegs:$val, ADDRdpii:$addr)]>;
+
+//let Uses = [CP] in ..
+let mayLoad = 1, isReMaterializable = 1 in
+defm LDWCP : FRU6_LRU6_cp<"ldw">;
+
+let Uses = [SP] in {
+let mayStore=1 in {
+def STWSP_ru6 : _FRU6<
+ (outs), (ins GRRegs:$val, i32imm:$index),
+ "stw $val, sp[$index]",
+ [(XCoreStwsp GRRegs:$val, immU6:$index)]>;
+
+def STWSP_lru6 : _FLRU6<
+ (outs), (ins GRRegs:$val, i32imm:$index),
+ "stw $val, sp[$index]",
+ [(XCoreStwsp GRRegs:$val, immU16:$index)]>;
+}
+
+let mayLoad=1 in {
+def LDWSP_ru6 : _FRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldw $dst, sp[$b]",
+ []>;
+
+def LDWSP_lru6 : _FLRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldw $dst, sp[$b]",
+ []>;
+}
+
+let neverHasSideEffects = 1 in {
+def LDAWSP_ru6 : _FRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldaw $dst, sp[$b]",
+ []>;
+
+def LDAWSP_lru6 : _FLRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldaw $dst, sp[$b]",
+ []>;
+
+def LDAWSP_ru6_RRegs : _FRU6<
+ (outs RRegs:$dst), (ins i32imm:$b),
+ "ldaw $dst, sp[$b]",
+ []>;
+
+def LDAWSP_lru6_RRegs : _FLRU6<
+ (outs RRegs:$dst), (ins i32imm:$b),
+ "ldaw $dst, sp[$b]",
+ []>;
+}
+}
+
+let isReMaterializable = 1 in {
+def LDC_ru6 : _FRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldc $dst, $b",
+ [(set GRRegs:$dst, immU6:$b)]>;
+
+def LDC_lru6 : _FLRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldc $dst, $b",
+ [(set GRRegs:$dst, immU16:$b)]>;
+}
+
+// Operand register - U6
+// TODO setc
+let isBranch = 1, isTerminator = 1 in {
+defm BRFT: FRU6_LRU6_branch<"bt">;
+defm BRBT: FRU6_LRU6_branch<"bt">;
+defm BRFF: FRU6_LRU6_branch<"bf">;
+defm BRBF: FRU6_LRU6_branch<"bf">;
+}
+
+// U6
+let Defs = [SP], Uses = [SP] in {
+let neverHasSideEffects = 1 in
+defm EXTSP : FU6_LU6_np<"extsp">;
+let mayStore = 1 in
+defm ENTSP : FU6_LU6_np<"entsp">;
+
+let isReturn = 1, isTerminator = 1, mayLoad = 1 in {
+defm RETSP : FU6_LU6<"retsp", XCoreRetsp>;
+}
+}
+
+// TODO extdp, kentsp, krestsp, blat, setsr
+// clrsr, getsr, kalli
+let isBranch = 1, isTerminator = 1 in {
+def BRBU_u6 : _FU6<
+ (outs),
+ (ins brtarget:$target),
+ "bu $target",
+ []>;
+
+def BRBU_lu6 : _FLU6<
+ (outs),
+ (ins brtarget:$target),
+ "bu $target",
+ []>;
+
+def BRFU_u6 : _FU6<
+ (outs),
+ (ins brtarget:$target),
+ "bu $target",
+ []>;
+
+def BRFU_lu6 : _FLU6<
+ (outs),
+ (ins brtarget:$target),
+ "bu $target",
+ []>;
+}
+
+//let Uses = [CP] in ...
+let Predicates = [HasXS1B], Defs = [R11], neverHasSideEffects = 1,
+ isReMaterializable = 1 in
+def LDAWCP_u6: _FRU6<(outs), (ins MEMii:$a),
+ "ldaw r11, cp[$a]",
+ []>;
+
+let Predicates = [HasXS1B], Defs = [R11], isReMaterializable = 1 in
+def LDAWCP_lu6: _FLRU6<
+ (outs), (ins MEMii:$a),
+ "ldaw r11, cp[$a]",
+ [(set R11, ADDRcpii:$a)]>;
+
+// U10
+// TODO ldwcpl, blacp
+
+let Defs = [R11], isReMaterializable = 1, neverHasSideEffects = 1 in
+def LDAP_u10 : _FU10<
+ (outs),
+ (ins i32imm:$addr),
+ "ldap r11, $addr",
+ []>;
+
+let Defs = [R11], isReMaterializable = 1 in
+def LDAP_lu10 : _FLU10<
+ (outs),
+ (ins i32imm:$addr),
+ "ldap r11, $addr",
+ [(set R11, (pcrelwrapper tglobaladdr:$addr))]>;
+
+let isCall=1,
+// All calls clobber the the link register and the non-callee-saved registers:
+Defs = [R0, R1, R2, R3, R11, LR] in {
+def BL_u10 : _FU10<
+ (outs),
+ (ins calltarget:$target, variable_ops),
+ "bl $target",
+ [(XCoreBranchLink immU10:$target)]>;
+
+def BL_lu10 : _FLU10<
+ (outs),
+ (ins calltarget:$target, variable_ops),
+ "bl $target",
+ [(XCoreBranchLink immU20:$target)]>;
+}
+
+// Two operand short
+// TODO getr, getst
+def NOT : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
+ "not $dst, $b",
+ [(set GRRegs:$dst, (not GRRegs:$b))]>;
+
+def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
+ "neg $dst, $b",
+ [(set GRRegs:$dst, (ineg GRRegs:$b))]>;
+
+// TODO setd, eet, eef, getts, setpt, outct, inct, chkct, outt, intt, out,
+// in, outshr, inshr, testct, testwct, tinitpc, tinitdp, tinitsp, tinitcp,
+// tsetmr, sext (reg), zext (reg)
+let isTwoAddress = 1 in {
+let neverHasSideEffects = 1 in
+def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
+ "sext $dst, $src2",
+ []>;
+
+let neverHasSideEffects = 1 in
+def ZEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
+ "zext $dst, $src2",
+ []>;
+
+def ANDNOT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+ "andnot $dst, $src2",
+ [(set GRRegs:$dst, (and GRRegs:$src1, (not GRRegs:$src2)))]>;
+}
+
+let isReMaterializable = 1, neverHasSideEffects = 1 in
+def MKMSK_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$size),
+ "mkmsk $dst, $size",
+ []>;
+
+def MKMSK_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$size),
+ "mkmsk $dst, $size",
+ [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), 0xffffffff))]>;
+
+// Two operand long
+// TODO settw, setclk, setrdy, setpsc, endin, peek,
+// getd, testlcl, tinitlr, getps, setps
+def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "bitrev $dst, $src",
+ [(set GRRegs:$dst, (int_xcore_bitrev GRRegs:$src))]>;
+
+def BYTEREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "byterev $dst, $src",
+ [(set GRRegs:$dst, (bswap GRRegs:$src))]>;
+
+def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "clz $dst, $src",
+ [(set GRRegs:$dst, (ctlz GRRegs:$src))]>;
+
+// One operand short
+// TODO edu, eeu, waitet, waitef, freer, tstart, msync, mjoin, syncr, clrtp
+// bru, setdp, setcp, setv, setev, kcall
+// dgetreg
+let isBranch=1, isIndirectBranch=1, isTerminator=1 in
+def BAU_1r : _F1R<(outs), (ins GRRegs:$addr),
+ "bau $addr",
+ [(brind GRRegs:$addr)]>;
+
+let Defs=[SP], neverHasSideEffects=1 in
+def SETSP_1r : _F1R<(outs), (ins GRRegs:$src),
+ "set sp, $src",
+ []>;
+
+let isBarrier = 1, hasCtrlDep = 1 in
+def ECALLT_1r : _F1R<(outs), (ins GRRegs:$src),
+ "ecallt $src",
+ []>;
+
+let isBarrier = 1, hasCtrlDep = 1 in
+def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src),
+ "ecallf $src",
+ []>;
+
+let isCall=1,
+// All calls clobber the the link register and the non-callee-saved registers:
+Defs = [R0, R1, R2, R3, R11, LR] in {
+def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops),
+ "bla $addr",
+ [(XCoreBranchLink GRRegs:$addr)]>;
+}
+
+// Zero operand short
+// TODO waiteu, clre, ssync, freet, ldspc, stspc, ldssr, stssr, ldsed, stsed,
+// stet, geted, getet, getkep, getksp, setkep, getid, kret, dcall, dret,
+// dentsp, drestsp
+
+let Defs = [R11] in
+def GETID_0R : _F0R<(outs), (ins),
+ "get r11, id",
+ [(set R11, (int_xcore_getid))]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat<(XCoreBranchLink tglobaladdr:$addr), (BL_lu10 tglobaladdr:$addr)>;
+def : Pat<(XCoreBranchLink texternalsym:$addr), (BL_lu10 texternalsym:$addr)>;
+
+/// sext_inreg
+def : Pat<(sext_inreg GRRegs:$b, i1), (SEXT_rus GRRegs:$b, 1)>;
+def : Pat<(sext_inreg GRRegs:$b, i8), (SEXT_rus GRRegs:$b, 8)>;
+def : Pat<(sext_inreg GRRegs:$b, i16), (SEXT_rus GRRegs:$b, 16)>;
+
+/// loads
+def : Pat<(zextloadi8 (add GRRegs:$addr, GRRegs:$offset)),
+ (LD8U_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(zextloadi8 GRRegs:$addr), (LD8U_3r GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(zextloadi16 (lda16f GRRegs:$addr, GRRegs:$offset)),
+ (LD16S_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(sextloadi16 GRRegs:$addr), (LD16S_3r GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(load (ldawf GRRegs:$addr, GRRegs:$offset)),
+ (LDW_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(load (add GRRegs:$addr, immUs4:$offset)),
+ (LDW_2rus GRRegs:$addr, (div4_xform immUs4:$offset))>;
+def : Pat<(load GRRegs:$addr), (LDW_2rus GRRegs:$addr, 0)>;
+
+/// anyext
+def : Pat<(extloadi8 (add GRRegs:$addr, GRRegs:$offset)),
+ (LD8U_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(extloadi8 GRRegs:$addr), (LD8U_3r GRRegs:$addr, (LDC_ru6 0))>;
+def : Pat<(extloadi16 (lda16f GRRegs:$addr, GRRegs:$offset)),
+ (LD16S_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(extloadi16 GRRegs:$addr), (LD16S_3r GRRegs:$addr, (LDC_ru6 0))>;
+
+/// stores
+def : Pat<(truncstorei8 GRRegs:$val, (add GRRegs:$addr, GRRegs:$offset)),
+ (ST8_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(truncstorei8 GRRegs:$val, GRRegs:$addr),
+ (ST8_l3r GRRegs:$val, GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(truncstorei16 GRRegs:$val, (lda16f GRRegs:$addr, GRRegs:$offset)),
+ (ST16_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(truncstorei16 GRRegs:$val, GRRegs:$addr),
+ (ST16_l3r GRRegs:$val, GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(store GRRegs:$val, (ldawf GRRegs:$addr, GRRegs:$offset)),
+ (STW_3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(store GRRegs:$val, (add GRRegs:$addr, immUs4:$offset)),
+ (STW_2rus GRRegs:$val, GRRegs:$addr, (div4_xform immUs4:$offset))>;
+def : Pat<(store GRRegs:$val, GRRegs:$addr),
+ (STW_2rus GRRegs:$val, GRRegs:$addr, 0)>;
+
+/// cttz
+def : Pat<(cttz GRRegs:$src), (CLZ_l2r (BITREV_l2r GRRegs:$src))>;
+
+/// trap
+def : Pat<(trap), (ECALLF_1r (LDC_ru6 0))>;
+
+///
+/// branch patterns
+///
+
+// unconditional branch
+def : Pat<(br bb:$addr), (BRFU_lu6 bb:$addr)>;
+
+// direct match equal/notequal zero brcond
+def : Pat<(brcond (setne GRRegs:$lhs, 0), bb:$dst),
+ (BRFT_lru6 GRRegs:$lhs, bb:$dst)>;
+def : Pat<(brcond (seteq GRRegs:$lhs, 0), bb:$dst),
+ (BRFF_lru6 GRRegs:$lhs, bb:$dst)>;
+
+def : Pat<(brcond (setle GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (LSS_3r GRRegs:$rhs, GRRegs:$lhs), bb:$dst)>;
+def : Pat<(brcond (setule GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (LSU_3r GRRegs:$rhs, GRRegs:$lhs), bb:$dst)>;
+def : Pat<(brcond (setge GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (LSS_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>;
+def : Pat<(brcond (setuge GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (LSU_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>;
+def : Pat<(brcond (setne GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (EQ_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>;
+def : Pat<(brcond (setne GRRegs:$lhs, immUs:$rhs), bb:$dst),
+ (BRFF_lru6 (EQ_2rus GRRegs:$lhs, immUs:$rhs), bb:$dst)>;
+
+// generic brcond pattern
+def : Pat<(brcond GRRegs:$cond, bb:$addr), (BRFT_lru6 GRRegs:$cond, bb:$addr)>;
+
+
+///
+/// Select patterns
+///
+
+// direct match equal/notequal zero select
+def : Pat<(select (setne GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC GRRegs:$lhs, GRRegs:$T, GRRegs:$F)>;
+
+def : Pat<(select (seteq GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC GRRegs:$lhs, GRRegs:$F, GRRegs:$T)>;
+
+def : Pat<(select (setle GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (LSS_3r GRRegs:$rhs, GRRegs:$lhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setule GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (LSU_3r GRRegs:$rhs, GRRegs:$lhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setge GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (LSS_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setuge GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (LSU_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setne GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (EQ_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setne GRRegs:$lhs, immUs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (EQ_2rus GRRegs:$lhs, immUs:$rhs), GRRegs:$F, GRRegs:$T)>;
+
+///
+/// setcc patterns, only matched when none of the above brcond
+/// patterns match
+///
+
+// setcc 2 register operands
+def : Pat<(setle GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (LSS_3r GRRegs:$rhs, GRRegs:$lhs), 0)>;
+def : Pat<(setule GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (LSU_3r GRRegs:$rhs, GRRegs:$lhs), 0)>;
+
+def : Pat<(setgt GRRegs:$lhs, GRRegs:$rhs),
+ (LSS_3r GRRegs:$rhs, GRRegs:$lhs)>;
+def : Pat<(setugt GRRegs:$lhs, GRRegs:$rhs),
+ (LSU_3r GRRegs:$rhs, GRRegs:$lhs)>;
+
+def : Pat<(setge GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (LSS_3r GRRegs:$lhs, GRRegs:$rhs), 0)>;
+def : Pat<(setuge GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (LSU_3r GRRegs:$lhs, GRRegs:$rhs), 0)>;
+
+def : Pat<(setlt GRRegs:$lhs, GRRegs:$rhs),
+ (LSS_3r GRRegs:$lhs, GRRegs:$rhs)>;
+def : Pat<(setult GRRegs:$lhs, GRRegs:$rhs),
+ (LSU_3r GRRegs:$lhs, GRRegs:$rhs)>;
+
+def : Pat<(setne GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (EQ_3r GRRegs:$lhs, GRRegs:$rhs), 0)>;
+
+def : Pat<(seteq GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_3r GRRegs:$lhs, GRRegs:$rhs)>;
+
+// setcc reg/imm operands
+def : Pat<(seteq GRRegs:$lhs, immUs:$rhs),
+ (EQ_2rus GRRegs:$lhs, immUs:$rhs)>;
+def : Pat<(setne GRRegs:$lhs, immUs:$rhs),
+ (EQ_2rus (EQ_2rus GRRegs:$lhs, immUs:$rhs), 0)>;
+
+// misc
+def : Pat<(add GRRegs:$addr, immUs4:$offset),
+ (LDAWF_l2rus GRRegs:$addr, (div4_xform immUs4:$offset))>;
+
+def : Pat<(sub GRRegs:$addr, immUs4:$offset),
+ (LDAWB_l2rus GRRegs:$addr, (div4_xform immUs4:$offset))>;
+
+def : Pat<(and GRRegs:$val, immMskBitp:$mask),
+ (ZEXT_rus GRRegs:$val, (msksize_xform immMskBitp:$mask))>;
+
+// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
+def : Pat<(add GRRegs:$src1, immUsNeg:$src2),
+ (SUB_2rus GRRegs:$src1, (neg_xform immUsNeg:$src2))>;
+
+def : Pat<(add GRRegs:$src1, immUs4Neg:$src2),
+ (LDAWB_l2rus GRRegs:$src1, (div4neg_xform immUs4Neg:$src2))>;
+
+///
+/// Some peepholes
+///
+
+def : Pat<(mul GRRegs:$src, 3),
+ (LDA16F_l3r GRRegs:$src, GRRegs:$src)>;
+
+def : Pat<(mul GRRegs:$src, 5),
+ (LDAWF_l3r GRRegs:$src, GRRegs:$src)>;
+
+def : Pat<(mul GRRegs:$src, -3),
+ (LDAWB_l3r GRRegs:$src, GRRegs:$src)>;
+
+// ashr X, 32 is equivalent to ashr X, 31 on the XCore.
+def : Pat<(sra GRRegs:$src, 31),
+ (ASHR_l2rus GRRegs:$src, 32)>;
+
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h
new file mode 100644
index 0000000..43adb0f
--- /dev/null
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -0,0 +1,69 @@
+//====- XCoreMachineFuctionInfo.h - XCore machine function info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares XCore-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREMACHINEFUNCTIONINFO_H
+#define XCOREMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include <vector>
+
+namespace llvm {
+
+// Forward declarations
+class Function;
+
+/// XCoreFunctionInfo - This class is derived from MachineFunction private
+/// XCore target-specific information for each MachineFunction.
+class XCoreFunctionInfo : public MachineFunctionInfo {
+private:
+ bool UsesLR;
+ int LRSpillSlot;
+ int FPSpillSlot;
+ int VarArgsFrameIndex;
+ std::vector<std::pair<unsigned, CalleeSavedInfo> > SpillLabels;
+
+public:
+ XCoreFunctionInfo() :
+ UsesLR(false),
+ LRSpillSlot(0),
+ FPSpillSlot(0),
+ VarArgsFrameIndex(0) {}
+
+ XCoreFunctionInfo(MachineFunction &MF) :
+ UsesLR(false),
+ LRSpillSlot(0),
+ FPSpillSlot(0),
+ VarArgsFrameIndex(0) {}
+
+ ~XCoreFunctionInfo() {}
+
+ void setVarArgsFrameIndex(int off) { VarArgsFrameIndex = off; }
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+
+ void setUsesLR(bool val) { UsesLR = val; }
+ bool getUsesLR() const { return UsesLR; }
+
+ void setLRSpillSlot(int off) { LRSpillSlot = off; }
+ int getLRSpillSlot() const { return LRSpillSlot; }
+
+ void setFPSpillSlot(int off) { FPSpillSlot = off; }
+ int getFPSpillSlot() const { return FPSpillSlot; }
+
+ std::vector<std::pair<unsigned, CalleeSavedInfo> >&getSpillLabels() {
+ return SpillLabels;
+ }
+};
+} // End llvm namespace
+
+#endif // XCOREMACHINEFUNCTIONINFO_H
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
new file mode 100644
index 0000000..82cd92d
--- /dev/null
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -0,0 +1,598 @@
+//===- XCoreRegisterInfo.cpp - XCore Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreRegisterInfo.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "XCore.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+XCoreRegisterInfo::XCoreRegisterInfo(const TargetInstrInfo &tii)
+ : XCoreGenRegisterInfo(XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP),
+ TII(tii) {
+}
+
+// helper functions
+static inline bool isImmUs(unsigned val) {
+ return val <= 11;
+}
+
+static inline bool isImmU6(unsigned val) {
+ return val < (1 << 6);
+}
+
+static inline bool isImmU16(unsigned val) {
+ return val < (1 << 16);
+}
+
+static const unsigned XCore_ArgRegs[] = {
+ XCore::R0, XCore::R1, XCore::R2, XCore::R3
+};
+
+const unsigned * XCoreRegisterInfo::getArgRegs(const MachineFunction *MF)
+{
+ return XCore_ArgRegs;
+}
+
+unsigned XCoreRegisterInfo::getNumArgRegs(const MachineFunction *MF)
+{
+ return array_lengthof(XCore_ArgRegs);
+}
+
+bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF)
+{
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+ return (MMI && MMI->hasDebugInfo()) ||
+ !MF.getFunction()->doesNotThrow() ||
+ UnwindTablesMandatory;
+}
+
+const unsigned* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ static const unsigned CalleeSavedRegs[] = {
+ XCore::R4, XCore::R5, XCore::R6, XCore::R7,
+ XCore::R8, XCore::R9, XCore::R10, XCore::LR,
+ 0
+ };
+ return CalleeSavedRegs;
+}
+
+const TargetRegisterClass* const*
+XCoreRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
+ XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass,
+ XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass,
+ XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass,
+ XCore::GRRegsRegisterClass, XCore::RRegsRegisterClass,
+ 0
+ };
+ return CalleeSavedRegClasses;
+}
+
+BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(XCore::CP);
+ Reserved.set(XCore::DP);
+ Reserved.set(XCore::SP);
+ Reserved.set(XCore::LR);
+ if (hasFP(MF)) {
+ Reserved.set(XCore::R10);
+ }
+ return Reserved;
+}
+
+bool
+XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
+ // TODO can we estimate stack size?
+ return hasFP(MF);
+}
+
+bool XCoreRegisterInfo::hasFP(const MachineFunction &MF) const {
+ return NoFramePointerElim || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+// This function eliminates ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void XCoreRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (!hasReservedCallFrame(MF)) {
+ // Turn the adjcallstackdown instruction into 'extsp <amt>' and the
+ // adjcallstackup instruction into 'ldaw sp, sp[<amt>]'
+ MachineInstr *Old = I;
+ uint64_t Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ assert(Amount%4 == 0);
+ Amount /= 4;
+
+ bool isU6 = isImmU6(Amount);
+
+ if (!isU6 && !isImmU16(Amount)) {
+ // FIX could emit multiple instructions in this case.
+ cerr << "eliminateCallFramePseudoInstr size too big: "
+ << Amount << "\n";
+ abort();
+ }
+
+ MachineInstr *New;
+ if (Old->getOpcode() == XCore::ADJCALLSTACKDOWN) {
+ int Opcode = isU6 ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
+ New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode))
+ .addImm(Amount);
+ } else {
+ assert(Old->getOpcode() == XCore::ADJCALLSTACKUP);
+ int Opcode = isU6 ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
+ New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode), XCore::SP)
+ .addImm(Amount);
+ }
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+
+ MBB.erase(I);
+}
+
+void XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+ MachineInstr &MI = *II;
+ DebugLoc dl = MI.getDebugLoc();
+ unsigned i = 0;
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ MachineOperand &FrameOp = MI.getOperand(i);
+ int FrameIndex = FrameOp.getIndex();
+
+ MachineFunction &MF = *MI.getParent()->getParent();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+ int StackSize = MF.getFrameInfo()->getStackSize();
+
+ #ifndef NDEBUG
+ DOUT << "\nFunction : " << MF.getFunction()->getName() << "\n";
+ DOUT << "<--------->\n";
+ MI.print(DOUT);
+ DOUT << "FrameIndex : " << FrameIndex << "\n";
+ DOUT << "FrameOffset : " << Offset << "\n";
+ DOUT << "StackSize : " << StackSize << "\n";
+ #endif
+
+ Offset += StackSize;
+
+ // fold constant into offset.
+ Offset += MI.getOperand(i + 1).getImm();
+ MI.getOperand(i + 1).ChangeToImmediate(0);
+
+ assert(Offset%4 == 0 && "Misaligned stack offset");
+
+ #ifndef NDEBUG
+ DOUT << "Offset : " << Offset << "\n";
+ DOUT << "<--------->\n";
+ #endif
+
+ Offset/=4;
+
+ bool FP = hasFP(MF);
+
+ unsigned Reg = MI.getOperand(0).getReg();
+ bool isKill = MI.getOpcode() == XCore::STWFI && MI.getOperand(0).isKill();
+
+ assert(XCore::GRRegsRegisterClass->contains(Reg) &&
+ "Unexpected register operand");
+
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ if (FP) {
+ bool isUs = isImmUs(Offset);
+ unsigned FramePtr = XCore::R10;
+
+ MachineInstr *New = 0;
+ if (!isUs) {
+ if (!RS) {
+ cerr << "eliminateFrameIndex Frame size too big: " << Offset << "\n";
+ abort();
+ }
+ unsigned ScratchReg = RS->scavengeRegister(XCore::GRRegsRegisterClass, II,
+ SPAdj);
+ loadConstant(MBB, II, ScratchReg, Offset, dl);
+ switch (MI.getOpcode()) {
+ case XCore::LDWFI:
+ New = BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg)
+ .addReg(FramePtr)
+ .addReg(ScratchReg, RegState::Kill);
+ break;
+ case XCore::STWFI:
+ New = BuildMI(MBB, II, dl, TII.get(XCore::STW_3r))
+ .addReg(Reg, getKillRegState(isKill))
+ .addReg(FramePtr)
+ .addReg(ScratchReg, RegState::Kill);
+ break;
+ case XCore::LDAWFI:
+ New = BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg)
+ .addReg(FramePtr)
+ .addReg(ScratchReg, RegState::Kill);
+ break;
+ default:
+ assert(0 && "Unexpected Opcode\n");
+ }
+ } else {
+ switch (MI.getOpcode()) {
+ case XCore::LDWFI:
+ New = BuildMI(MBB, II, dl, TII.get(XCore::LDW_2rus), Reg)
+ .addReg(FramePtr)
+ .addImm(Offset);
+ break;
+ case XCore::STWFI:
+ New = BuildMI(MBB, II, dl, TII.get(XCore::STW_2rus))
+ .addReg(Reg, getKillRegState(isKill))
+ .addReg(FramePtr)
+ .addImm(Offset);
+ break;
+ case XCore::LDAWFI:
+ New = BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l2rus), Reg)
+ .addReg(FramePtr)
+ .addImm(Offset);
+ break;
+ default:
+ assert(0 && "Unexpected Opcode\n");
+ }
+ }
+ } else {
+ bool isU6 = isImmU6(Offset);
+ if (!isU6 && !isImmU16(Offset)) {
+ // FIXME could make this work for LDWSP, LDAWSP.
+ cerr << "eliminateFrameIndex Frame size too big: " << Offset << "\n";
+ abort();
+ }
+
+ switch (MI.getOpcode()) {
+ int NewOpcode;
+ case XCore::LDWFI:
+ NewOpcode = (isU6) ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6;
+ BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg)
+ .addImm(Offset);
+ break;
+ case XCore::STWFI:
+ NewOpcode = (isU6) ? XCore::STWSP_ru6 : XCore::STWSP_lru6;
+ BuildMI(MBB, II, dl, TII.get(NewOpcode))
+ .addReg(Reg, getKillRegState(isKill))
+ .addImm(Offset);
+ break;
+ case XCore::LDAWFI:
+ NewOpcode = (isU6) ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6;
+ BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg)
+ .addImm(Offset);
+ break;
+ default:
+ assert(0 && "Unexpected Opcode\n");
+ }
+ }
+ // Erase old instruction.
+ MBB.erase(II);
+}
+
+void
+XCoreRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool LRUsed = MF.getRegInfo().isPhysRegUsed(XCore::LR);
+ const TargetRegisterClass *RC = XCore::GRRegsRegisterClass;
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+ if (LRUsed) {
+ MF.getRegInfo().setPhysRegUnused(XCore::LR);
+
+ bool isVarArg = MF.getFunction()->isVarArg();
+ int FrameIdx;
+ if (! isVarArg) {
+ // A fixed offset of 0 allows us to save / restore LR using entsp / retsp.
+ FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0);
+ } else {
+ FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment());
+ }
+ XFI->setUsesLR(FrameIdx);
+ XFI->setLRSpillSlot(FrameIdx);
+ }
+ if (requiresRegisterScavenging(MF)) {
+ // Reserve a slot close to SP or frame pointer.
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment()));
+ }
+ if (hasFP(MF)) {
+ // A callee save register is used to hold the FP.
+ // This needs saving / restoring in the epilogue / prologue.
+ XFI->setFPSpillSlot(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment()));
+ }
+}
+
+void XCoreRegisterInfo::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+
+}
+
+void XCoreRegisterInfo::
+loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DstReg, int64_t Value, DebugLoc dl) const {
+ // TODO use mkmsk if possible.
+ if (!isImmU16(Value)) {
+ // TODO use constant pool.
+ cerr << "loadConstant value too big " << Value << "\n";
+ abort();
+ }
+ int Opcode = isImmU6(Value) ? XCore::LDC_ru6 : XCore::LDC_lru6;
+ BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value);
+}
+
+void XCoreRegisterInfo::
+storeToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, int Offset, DebugLoc dl) const {
+ assert(Offset%4 == 0 && "Misaligned stack offset");
+ Offset/=4;
+ bool isU6 = isImmU6(Offset);
+ if (!isU6 && !isImmU16(Offset)) {
+ cerr << "storeToStack offset too big " << Offset << "\n";
+ abort();
+ }
+ int Opcode = isU6 ? XCore::STWSP_ru6 : XCore::STWSP_lru6;
+ BuildMI(MBB, I, dl, TII.get(Opcode))
+ .addReg(SrcReg)
+ .addImm(Offset);
+}
+
+void XCoreRegisterInfo::
+loadFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DstReg, int Offset, DebugLoc dl) const {
+ assert(Offset%4 == 0 && "Misaligned stack offset");
+ Offset/=4;
+ bool isU6 = isImmU6(Offset);
+ if (!isU6 && !isImmU16(Offset)) {
+ cerr << "loadFromStack offset too big " << Offset << "\n";
+ abort();
+ }
+ int Opcode = isU6 ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6;
+ BuildMI(MBB, I, dl, TII.get(Opcode), DstReg)
+ .addImm(Offset);
+}
+
+void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+ DebugLoc dl = (MBBI != MBB.end() ?
+ MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+
+ bool FP = hasFP(MF);
+
+ // Work out frame sizes.
+ int FrameSize = MFI->getStackSize();
+
+ assert(FrameSize%4 == 0 && "Misaligned frame size");
+
+ FrameSize/=4;
+
+ bool isU6 = isImmU6(FrameSize);
+
+ if (!isU6 && !isImmU16(FrameSize)) {
+ // FIXME could emit multiple instructions.
+ cerr << "emitPrologue Frame size too big: " << FrameSize << "\n";
+ abort();
+ }
+ bool emitFrameMoves = needsFrameMoves(MF);
+
+ // Do we need to allocate space on the stack?
+ if (FrameSize) {
+ bool saveLR = XFI->getUsesLR();
+ bool LRSavedOnEntry = false;
+ int Opcode;
+ if (saveLR && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0)) {
+ Opcode = (isU6) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6;
+ MBB.addLiveIn(XCore::LR);
+ saveLR = false;
+ LRSavedOnEntry = true;
+ } else {
+ Opcode = (isU6) ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
+ }
+ BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
+
+ if (emitFrameMoves) {
+ std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+
+ // Show update of SP.
+ unsigned FrameLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(FrameLabelId);
+
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4);
+ Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+
+ if (LRSavedOnEntry) {
+ MachineLocation CSDst(MachineLocation::VirtualFP, 0);
+ MachineLocation CSSrc(XCore::LR);
+ Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc));
+ }
+ }
+ if (saveLR) {
+ int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot());
+ storeToStack(MBB, MBBI, XCore::LR, LRSpillOffset + FrameSize*4, dl);
+ MBB.addLiveIn(XCore::LR);
+
+ if (emitFrameMoves) {
+ unsigned SaveLRLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(SaveLRLabelId);
+ MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset);
+ MachineLocation CSSrc(XCore::LR);
+ MMI->getFrameMoves().push_back(MachineMove(SaveLRLabelId,
+ CSDst, CSSrc));
+ }
+ }
+ }
+
+ if (FP) {
+ // Save R10 to the stack.
+ int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot());
+ storeToStack(MBB, MBBI, XCore::R10, FPSpillOffset + FrameSize*4, dl);
+ // R10 is live-in. It is killed at the spill.
+ MBB.addLiveIn(XCore::R10);
+ if (emitFrameMoves) {
+ unsigned SaveR10LabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(SaveR10LabelId);
+ MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset);
+ MachineLocation CSSrc(XCore::R10);
+ MMI->getFrameMoves().push_back(MachineMove(SaveR10LabelId,
+ CSDst, CSSrc));
+ }
+ // Set the FP from the SP.
+ unsigned FramePtr = XCore::R10;
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr)
+ .addImm(0);
+ if (emitFrameMoves) {
+ // Show FP is now valid.
+ unsigned FrameLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(FrameLabelId);
+ MachineLocation SPDst(FramePtr);
+ MachineLocation SPSrc(MachineLocation::VirtualFP);
+ MMI->getFrameMoves().push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+ }
+ }
+
+ if (emitFrameMoves) {
+ // Frame moves for callee saved.
+ std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+ std::vector<std::pair<unsigned, CalleeSavedInfo> >&SpillLabels =
+ XFI->getSpillLabels();
+ for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) {
+ unsigned SpillLabel = SpillLabels[I].first;
+ CalleeSavedInfo &CSI = SpillLabels[I].second;
+ int Offset = MFI->getObjectOffset(CSI.getFrameIdx());
+ unsigned Reg = CSI.getReg();
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(SpillLabel, CSDst, CSSrc));
+ }
+ }
+}
+
+void XCoreRegisterInfo::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ bool FP = hasFP(MF);
+
+ if (FP) {
+ // Restore the stack pointer.
+ unsigned FramePtr = XCore::R10;
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::SETSP_1r))
+ .addReg(FramePtr);
+ }
+
+ // Work out frame sizes.
+ int FrameSize = MFI->getStackSize();
+
+ assert(FrameSize%4 == 0 && "Misaligned frame size");
+
+ FrameSize/=4;
+
+ bool isU6 = isImmU6(FrameSize);
+
+ if (!isU6 && !isImmU16(FrameSize)) {
+ // FIXME could emit multiple instructions.
+ cerr << "emitEpilogue Frame size too big: " << FrameSize << "\n";
+ abort();
+ }
+
+ if (FrameSize) {
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+
+ if (FP) {
+ // Restore R10
+ int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot());
+ FPSpillOffset += FrameSize*4;
+ loadFromStack(MBB, MBBI, XCore::R10, FPSpillOffset, dl);
+ }
+ bool restoreLR = XFI->getUsesLR();
+ if (restoreLR && MFI->getObjectOffset(XFI->getLRSpillSlot()) != 0) {
+ int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot());
+ LRSpillOffset += FrameSize*4;
+ loadFromStack(MBB, MBBI, XCore::LR, LRSpillOffset, dl);
+ restoreLR = false;
+ }
+ if (restoreLR) {
+ // Fold prologue into return instruction
+ assert(MBBI->getOpcode() == XCore::RETSP_u6
+ || MBBI->getOpcode() == XCore::RETSP_lu6);
+ int Opcode = (isU6) ? XCore::RETSP_u6 : XCore::RETSP_lu6;
+ BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
+ MBB.erase(MBBI);
+ } else {
+ int Opcode = (isU6) ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
+ BuildMI(MBB, MBBI, dl, TII.get(Opcode), XCore::SP).addImm(FrameSize);
+ }
+ }
+}
+
+int XCoreRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ return XCoreGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+unsigned XCoreRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ bool FP = hasFP(MF);
+
+ return FP ? XCore::R10 : XCore::SP;
+}
+
+unsigned XCoreRegisterInfo::getRARegister() const {
+ return XCore::LR;
+}
+
+void XCoreRegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves)
+ const {
+ // Initial state of the frame pointer is SP.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(XCore::SP, 0);
+ Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+#include "XCoreGenRegisterInfo.inc"
+
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
new file mode 100644
index 0000000..00b7caa
--- /dev/null
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -0,0 +1,94 @@
+//===- XCoreRegisterInfo.h - XCore Register Information Impl ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREREGISTERINFO_H
+#define XCOREREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "XCoreGenRegisterInfo.h.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+
+struct XCoreRegisterInfo : public XCoreGenRegisterInfo {
+private:
+ const TargetInstrInfo &TII;
+
+ void loadConstant(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, int64_t Value, DebugLoc dl) const;
+
+ void storeToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned SrcReg, int Offset, DebugLoc dl) const;
+
+ void loadFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, int Offset, DebugLoc dl) const;
+
+public:
+ XCoreRegisterInfo(const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ const TargetRegisterClass* const* getCalleeSavedRegClasses(
+ const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+ //! Return the array of argument passing registers
+ /*!
+ \note The size of this array is returned by getArgRegsSize().
+ */
+ static const unsigned *getArgRegs(const MachineFunction *MF = 0);
+
+ //! Return the size of the argument passing register array
+ static unsigned getNumArgRegs(const MachineFunction *MF = 0);
+
+ //! Return whether to emit frame moves
+ static bool needsFrameMoves(const MachineFunction &MF);
+
+ //! Get DWARF debugging register number
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/XCore/XCoreRegisterInfo.td b/lib/Target/XCore/XCoreRegisterInfo.td
new file mode 100644
index 0000000..62daf5d
--- /dev/null
+++ b/lib/Target/XCore/XCoreRegisterInfo.td
@@ -0,0 +1,91 @@
+//===- XCoreRegisterInfo.td - XCore Register defs ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the XCore register file
+//===----------------------------------------------------------------------===//
+
+class XCoreReg<string n> : Register<n> {
+ field bits<4> Num;
+ let Namespace = "XCore";
+}
+
+// Registers are identified with 4-bit ID numbers.
+// Ri - 32-bit integer registers
+class Ri<bits<4> num, string n> : XCoreReg<n> {
+ let Num = num;
+}
+
+// CPU registers
+def R0 : Ri< 0, "r0">, DwarfRegNum<[0]>;
+def R1 : Ri< 1, "r1">, DwarfRegNum<[1]>;
+def R2 : Ri< 2, "r2">, DwarfRegNum<[2]>;
+def R3 : Ri< 3, "r3">, DwarfRegNum<[3]>;
+def R4 : Ri< 4, "r4">, DwarfRegNum<[4]>;
+def R5 : Ri< 5, "r5">, DwarfRegNum<[5]>;
+def R6 : Ri< 6, "r6">, DwarfRegNum<[6]>;
+def R7 : Ri< 7, "r7">, DwarfRegNum<[7]>;
+def R8 : Ri< 8, "r8">, DwarfRegNum<[8]>;
+def R9 : Ri< 9, "r9">, DwarfRegNum<[9]>;
+def R10 : Ri<10, "r10">, DwarfRegNum<[10]>;
+def R11 : Ri<11, "r11">, DwarfRegNum<[11]>;
+def CP : Ri<12, "cp">, DwarfRegNum<[12]>;
+def DP : Ri<13, "dp">, DwarfRegNum<[13]>;
+def SP : Ri<14, "sp">, DwarfRegNum<[14]>;
+def LR : Ri<15, "lr">, DwarfRegNum<[15]>;
+
+// Register classes.
+//
+def GRRegs : RegisterClass<"XCore", [i32], 32,
+ // Return values and arguments
+ [R0, R1, R2, R3,
+ // Not preserved across procedure calls
+ R11,
+ // Callee save
+ R4, R5, R6, R7, R8, R9, R10]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GRRegsClass::iterator
+ GRRegsClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin();
+ }
+ GRRegsClass::iterator
+ GRRegsClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->hasFP(MF))
+ return end()-1; // don't allocate R10
+ else
+ return end();
+ }
+ }];
+}
+
+def RRegs : RegisterClass<"XCore", [i32], 32,
+ // Reserved
+ [CP, DP, SP, LR]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ RRegsClass::iterator
+ RRegsClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin();
+ }
+ RRegsClass::iterator
+ RRegsClass::allocation_order_end(const MachineFunction &MF) const {
+ // No allocatable registers
+ return begin();
+ }
+ }];
+}
diff --git a/lib/Target/XCore/XCoreSubtarget.cpp b/lib/Target/XCore/XCoreSubtarget.cpp
new file mode 100644
index 0000000..dc53da4
--- /dev/null
+++ b/lib/Target/XCore/XCoreSubtarget.cpp
@@ -0,0 +1,28 @@
+//===- XCoreSubtarget.cpp - XCore Subtarget Information -----------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the XCore specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreSubtarget.h"
+#include "XCore.h"
+#include "XCoreGenSubtarget.inc"
+using namespace llvm;
+
+XCoreSubtarget::XCoreSubtarget(const TargetMachine &TM, const Module &M,
+ const std::string &FS)
+ : IsXS1A(false),
+ IsXS1B(false)
+{
+ std::string CPU = "xs1a-generic";
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, CPU);
+}
diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h
new file mode 100644
index 0000000..ff6475b
--- /dev/null
+++ b/lib/Target/XCore/XCoreSubtarget.h
@@ -0,0 +1,46 @@
+//=====-- XCoreSubtarget.h - Define Subtarget for the XCore -----*- C++ -*--==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the XCore specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORESUBTARGET_H
+#define XCORESUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <string>
+
+namespace llvm {
+class Module;
+
+class XCoreSubtarget : public TargetSubtarget {
+ bool IsXS1A;
+ bool IsXS1B;
+
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ XCoreSubtarget(const TargetMachine &TM, const Module &M,
+ const std::string &FS);
+
+ bool isXS1A() const { return IsXS1A; }
+ bool isXS1B() const { return IsXS1B; }
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+};
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/XCore/XCoreTargetAsmInfo.cpp b/lib/Target/XCore/XCoreTargetAsmInfo.cpp
new file mode 100644
index 0000000..5513762
--- /dev/null
+++ b/lib/Target/XCore/XCoreTargetAsmInfo.cpp
@@ -0,0 +1,201 @@
+//===-- XCoreTargetAsmInfo.cpp - XCore asm properties -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the XCoreTargetAsmInfo properties.
+// We use the small section flag for the CP relative and DP relative
+// flags. If a section is small and writable then it is DP relative. If a
+// section is small and not writable then it is CP relative.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreTargetAsmInfo.h"
+#include "XCoreTargetMachine.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/ADT/StringExtras.h"
+
+using namespace llvm;
+
+XCoreTargetAsmInfo::XCoreTargetAsmInfo(const XCoreTargetMachine &TM)
+ : ELFTargetAsmInfo(TM),
+ Subtarget(TM.getSubtargetImpl()) {
+ TextSection = getUnnamedSection("\t.text", SectionFlags::Code);
+ DataSection = getNamedSection("\t.dp.data", SectionFlags::Writeable |
+ SectionFlags::Small);
+ BSSSection_ = getNamedSection("\t.dp.bss", SectionFlags::Writeable |
+ SectionFlags::BSS | SectionFlags::Small);
+ if (Subtarget->isXS1A()) {
+ ReadOnlySection = getNamedSection("\t.dp.rodata", SectionFlags::None |
+ SectionFlags::Writeable |
+ SectionFlags::Small);
+ } else {
+ ReadOnlySection = getNamedSection("\t.cp.rodata", SectionFlags::None |
+ SectionFlags::Small);
+ }
+ Data16bitsDirective = "\t.short\t";
+ Data32bitsDirective = "\t.long\t";
+ Data64bitsDirective = 0;
+ ZeroDirective = "\t.space\t";
+ CommentString = "#";
+ ConstantPoolSection = "\t.section\t.cp.rodata,\"ac\",@progbits";
+ JumpTableDataSection = "\t.section\t.dp.data,\"awd\",@progbits";
+ PrivateGlobalPrefix = ".L";
+ AscizDirective = ".asciiz";
+ WeakDefDirective = "\t.weak\t";
+ WeakRefDirective = "\t.weak\t";
+ SetDirective = "\t.set\t";
+
+ // Debug
+ HasLEB128 = true;
+ AbsoluteDebugSectionOffsets = true;
+
+ DwarfAbbrevSection = "\t.section\t.debug_abbrev,\"\",@progbits";
+ DwarfInfoSection = "\t.section\t.debug_info,\"\",@progbits";
+ DwarfLineSection = "\t.section\t.debug_line,\"\",@progbits";
+ DwarfFrameSection = "\t.section\t.debug_frame,\"\",@progbits";
+ DwarfPubNamesSection = "\t.section\t.debug_pubnames,\"\",@progbits";
+ DwarfPubTypesSection = "\t.section\t.debug_pubtypes,\"\",@progbits";
+ DwarfStrSection = "\t.section\t.debug_str,\"\",@progbits";
+ DwarfLocSection = "\t.section\t.debug_loc,\"\",@progbits";
+ DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits";
+ DwarfRangesSection = "\t.section\t.debug_ranges,\"\",@progbits";
+ DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
+}
+
+const Section*
+XCoreTargetAsmInfo::SelectSectionForGlobal(const GlobalValue *GV) const {
+ SectionKind::Kind Kind = SectionKindForGlobal(GV);
+
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ {
+ if (!GVar->isWeakForLinker()) {
+ switch (Kind) {
+ case SectionKind::RODataMergeStr:
+ return MergeableStringSection(GVar);
+ case SectionKind::RODataMergeConst:
+ return getReadOnlySection();
+ case SectionKind::ThreadData:
+ return DataSection;
+ case SectionKind::ThreadBSS:
+ return getBSSSection_();
+ default:
+ break;
+ }
+ }
+ }
+ return ELFTargetAsmInfo::SelectSectionForGlobal(GV);
+}
+
+const Section*
+XCoreTargetAsmInfo::SelectSectionForMachineConst(const Type *Ty) const {
+ return MergeableConstSection(Ty);
+}
+
+const Section*
+XCoreTargetAsmInfo::MergeableConstSection(const GlobalVariable *GV) const {
+ Constant *C = GV->getInitializer();
+ return MergeableConstSection(C->getType());
+}
+
+inline const Section*
+XCoreTargetAsmInfo::MergeableConstSection(const Type *Ty) const {
+ const TargetData *TD = TM.getTargetData();
+
+ unsigned Size = TD->getTypeAllocSize(Ty);
+ if (Size == 4 || Size == 8 || Size == 16) {
+ std::string Name = ".cp.const" + utostr(Size);
+
+ return getNamedSection(Name.c_str(),
+ SectionFlags::setEntitySize(SectionFlags::Mergeable |
+ SectionFlags::Small,
+ Size));
+ }
+
+ return getReadOnlySection();
+}
+
+const Section* XCoreTargetAsmInfo::
+MergeableStringSection(const GlobalVariable *GV) const {
+ // FIXME insert in correct mergable section
+ return getReadOnlySection();
+}
+
+unsigned XCoreTargetAsmInfo::
+SectionFlagsForGlobal(const GlobalValue *GV,
+ const char* Name) const {
+ unsigned Flags = ELFTargetAsmInfo::SectionFlagsForGlobal(GV, Name);
+ // Mask out unsupported flags
+ Flags &= ~(SectionFlags::Small | SectionFlags::TLS);
+
+ // Set CP / DP relative flags
+ if (GV) {
+ SectionKind::Kind Kind = SectionKindForGlobal(GV);
+ switch (Kind) {
+ case SectionKind::ThreadData:
+ case SectionKind::ThreadBSS:
+ case SectionKind::Data:
+ case SectionKind::BSS:
+ case SectionKind::SmallData:
+ case SectionKind::SmallBSS:
+ Flags |= SectionFlags::Small;
+ break;
+ case SectionKind::ROData:
+ case SectionKind::RODataMergeStr:
+ case SectionKind::SmallROData:
+ if (Subtarget->isXS1A()) {
+ Flags |= SectionFlags::Writeable;
+ }
+ Flags |=SectionFlags::Small;
+ break;
+ case SectionKind::RODataMergeConst:
+ Flags |=SectionFlags::Small;
+ default:
+ break;
+ }
+ }
+
+ return Flags;
+}
+
+std::string XCoreTargetAsmInfo::
+printSectionFlags(unsigned flags) const {
+ std::string Flags = ",\"";
+
+ if (!(flags & SectionFlags::Debug))
+ Flags += 'a';
+ if (flags & SectionFlags::Code)
+ Flags += 'x';
+ if (flags & SectionFlags::Writeable)
+ Flags += 'w';
+ if (flags & SectionFlags::Mergeable)
+ Flags += 'M';
+ if (flags & SectionFlags::Strings)
+ Flags += 'S';
+ if (flags & SectionFlags::TLS)
+ Flags += 'T';
+ if (flags & SectionFlags::Small) {
+ if (flags & SectionFlags::Writeable)
+ Flags += 'd'; // DP relative
+ else
+ Flags += 'c'; // CP relative
+ }
+
+ Flags += "\",";
+
+ Flags += '@';
+
+ if (flags & SectionFlags::BSS)
+ Flags += "nobits";
+ else
+ Flags += "progbits";
+
+ if (unsigned entitySize = SectionFlags::getEntitySize(flags))
+ Flags += "," + utostr(entitySize);
+
+ return Flags;
+}
diff --git a/lib/Target/XCore/XCoreTargetAsmInfo.h b/lib/Target/XCore/XCoreTargetAsmInfo.h
new file mode 100644
index 0000000..79fd36a
--- /dev/null
+++ b/lib/Target/XCore/XCoreTargetAsmInfo.h
@@ -0,0 +1,45 @@
+//=====-- XCoreTargetAsmInfo.h - XCore asm properties ---------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the XCoreTargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORETARGETASMINFO_H
+#define XCORETARGETASMINFO_H
+
+#include "llvm/Target/ELFTargetAsmInfo.h"
+
+namespace llvm {
+
+ // Forward declarations.
+ class XCoreTargetMachine;
+ class XCoreSubtarget;
+
+ class XCoreTargetAsmInfo : public ELFTargetAsmInfo {
+ private:
+ const XCoreSubtarget *Subtarget;
+ public:
+ explicit XCoreTargetAsmInfo(const XCoreTargetMachine &TM);
+
+ virtual const Section* SelectSectionForGlobal(const GlobalValue *GV) const;
+ virtual std::string printSectionFlags(unsigned flags) const;
+ const Section* MergeableConstSection(const GlobalVariable *GV) const;
+ inline const Section* MergeableConstSection(const Type *Ty) const;
+ const Section* MergeableStringSection(const GlobalVariable *GV) const;
+ virtual const Section*
+ SelectSectionForMachineConst(const Type *Ty) const;
+ virtual unsigned
+ SectionFlagsForGlobal(const GlobalValue *GV = NULL,
+ const char* name = NULL) const;
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
new file mode 100644
index 0000000..5437c57
--- /dev/null
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -0,0 +1,71 @@
+//===-- XCoreTargetMachine.cpp - Define TargetMachine for XCore -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreTargetAsmInfo.h"
+#include "XCoreTargetMachine.h"
+#include "XCore.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+using namespace llvm;
+
+/// XCoreTargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int XCoreTargetMachineModule;
+int XCoreTargetMachineModule = 0;
+
+namespace {
+ // Register the target.
+ RegisterTarget<XCoreTargetMachine> X("xcore", "XCore");
+}
+
+const TargetAsmInfo *XCoreTargetMachine::createTargetAsmInfo() const {
+ return new XCoreTargetAsmInfo(*this);
+}
+
+/// XCoreTargetMachine ctor - Create an ILP32 architecture model
+///
+XCoreTargetMachine::XCoreTargetMachine(const Module &M, const std::string &FS)
+ : Subtarget(*this, M, FS),
+ DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
+ "i16:16:32-i32:32:32-i64:32:32"),
+ InstrInfo(),
+ FrameInfo(*this),
+ TLInfo(*this) {
+}
+
+unsigned XCoreTargetMachine::getModuleMatchQuality(const Module &M) {
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 6 && std::string(TT.begin(), TT.begin()+6) == "xcore-")
+ return 20;
+
+ // Otherwise we don't match.
+ return 0;
+}
+
+bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createXCoreISelDag(*this));
+ return false;
+}
+
+bool XCoreTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose,
+ raw_ostream &Out) {
+ // Output assembly language.
+ PM.add(createXCoreCodePrinterPass(Out, *this, OptLevel, Verbose));
+ return false;
+}
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
new file mode 100644
index 0000000..2385aed
--- /dev/null
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -0,0 +1,63 @@
+//===-- XCoreTargetMachine.h - Define TargetMachine for XCore ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the XCore specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORETARGETMACHINE_H
+#define XCORETARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "XCoreFrameInfo.h"
+#include "XCoreSubtarget.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreISelLowering.h"
+
+namespace llvm {
+
+class Module;
+
+class XCoreTargetMachine : public LLVMTargetMachine {
+ XCoreSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ XCoreInstrInfo InstrInfo;
+ XCoreFrameInfo FrameInfo;
+ XCoreTargetLowering TLInfo;
+
+protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+public:
+ XCoreTargetMachine(const Module &M, const std::string &FS);
+
+ virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const XCoreFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual const XCoreSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ virtual XCoreTargetLowering *getTargetLowering() const {
+ return const_cast<XCoreTargetLowering*>(&TLInfo);
+ }
+
+ virtual const TargetRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ static unsigned getModuleMatchQuality(const Module &M);
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Transforms/Hello/CMakeLists.txt b/lib/Transforms/Hello/CMakeLists.txt
new file mode 100644
index 0000000..b80d15b
--- /dev/null
+++ b/lib/Transforms/Hello/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_library( LLVMHello
+ Hello.cpp
+ )
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp
new file mode 100644
index 0000000..d07f613
--- /dev/null
+++ b/lib/Transforms/Hello/Hello.cpp
@@ -0,0 +1,67 @@
+//===- Hello.cpp - Example code from "Writing an LLVM Pass" ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements two versions of the LLVM "Hello World" pass described
+// in docs/WritingAnLLVMPass.html
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hello"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(HelloCounter, "Counts number of functions greeted");
+
+namespace {
+ // Hello - The first implementation, without getAnalysisUsage.
+ struct Hello : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ Hello() : FunctionPass(&ID) {}
+
+ virtual bool runOnFunction(Function &F) {
+ HelloCounter++;
+ std::string fname = F.getName();
+ EscapeString(fname);
+ cerr << "Hello: " << fname << "\n";
+ return false;
+ }
+ };
+}
+
+char Hello::ID = 0;
+static RegisterPass<Hello> X("hello", "Hello World Pass");
+
+namespace {
+ // Hello2 - The second implementation with getAnalysisUsage implemented.
+ struct Hello2 : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ Hello2() : FunctionPass(&ID) {}
+
+ virtual bool runOnFunction(Function &F) {
+ HelloCounter++;
+ std::string fname = F.getName();
+ EscapeString(fname);
+ cerr << "Hello: " << fname << "\n";
+ return false;
+ }
+
+ // We don't modify the program, so we preserve all analyses
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ };
+ };
+}
+
+char Hello2::ID = 0;
+static RegisterPass<Hello2>
+Y("hello2", "Hello World Pass (with getAnalysisUsage implemented)");
diff --git a/lib/Transforms/Hello/Makefile b/lib/Transforms/Hello/Makefile
new file mode 100644
index 0000000..c5e75d4
--- /dev/null
+++ b/lib/Transforms/Hello/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Transforms/Hello/Makefile -----------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMHello
+LOADABLE_MODULE = 1
+USEDLIBS =
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
new file mode 100644
index 0000000..2bb6428
--- /dev/null
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -0,0 +1,863 @@
+//===-- ArgumentPromotion.cpp - Promote by-reference arguments ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass promotes "by reference" arguments to be "by value" arguments. In
+// practice, this means looking for internal functions that have pointer
+// arguments. If it can prove, through the use of alias analysis, that an
+// argument is *only* loaded, then it can pass the value into the function
+// instead of the address of the value. This can cause recursive simplification
+// of code and lead to the elimination of allocas (especially in C++ template
+// code like the STL).
+//
+// This pass also handles aggregate arguments that are passed into a function,
+// scalarizing them if the elements of the aggregate are only loaded. Note that
+// by default it refuses to scalarize aggregates which would require passing in
+// more than three operands to the function, because passing thousands of
+// operands for a large array or structure is unprofitable! This limit can be
+// configured or disabled, however.
+//
+// Note that this transformation could also be done for arguments that are only
+// stored to (returning the value instead), but does not currently. This case
+// would be best handled when and if LLVM begins supporting multiple return
+// values from functions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "argpromotion"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Compiler.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumArgumentsPromoted , "Number of pointer arguments promoted");
+STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted");
+STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted");
+STATISTIC(NumArgumentsDead , "Number of dead pointer args eliminated");
+
+namespace {
+ /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
+ ///
+ struct VISIBILITY_HIDDEN ArgPromotion : public CallGraphSCCPass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetData>();
+ CallGraphSCCPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnSCC(const std::vector<CallGraphNode *> &SCC);
+ static char ID; // Pass identification, replacement for typeid
+ explicit ArgPromotion(unsigned maxElements = 3)
+ : CallGraphSCCPass(&ID), maxElements(maxElements) {}
+
+ /// A vector used to hold the indices of a single GEP instruction
+ typedef std::vector<uint64_t> IndicesVector;
+
+ private:
+ bool PromoteArguments(CallGraphNode *CGN);
+ bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const;
+ Function *DoPromotion(Function *F,
+ SmallPtrSet<Argument*, 8> &ArgsToPromote,
+ SmallPtrSet<Argument*, 8> &ByValArgsToTransform);
+ /// The maximum number of elements to expand, or 0 for unlimited.
+ unsigned maxElements;
+ };
+}
+
+char ArgPromotion::ID = 0;
+static RegisterPass<ArgPromotion>
+X("argpromotion", "Promote 'by reference' arguments to scalars");
+
+Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
+ return new ArgPromotion(maxElements);
+}
+
+bool ArgPromotion::runOnSCC(const std::vector<CallGraphNode *> &SCC) {
+ bool Changed = false, LocalChange;
+
+ do { // Iterate until we stop promoting from this SCC.
+ LocalChange = false;
+ // Attempt to promote arguments from all functions in this SCC.
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i)
+ LocalChange |= PromoteArguments(SCC[i]);
+ Changed |= LocalChange; // Remember that we changed something.
+ } while (LocalChange);
+
+ return Changed;
+}
+
+/// PromoteArguments - This method checks the specified function to see if there
+/// are any promotable arguments and if it is safe to promote the function (for
+/// example, all callers are direct). If safe to promote some arguments, it
+/// calls the DoPromotion method.
+///
+bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
+ Function *F = CGN->getFunction();
+
+ // Make sure that it is local to this module.
+ if (!F || !F->hasLocalLinkage()) return false;
+
+ // First check: see if there are any pointer arguments! If not, quick exit.
+ SmallVector<std::pair<Argument*, unsigned>, 16> PointerArgs;
+ unsigned ArgNo = 0;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++ArgNo)
+ if (isa<PointerType>(I->getType()))
+ PointerArgs.push_back(std::pair<Argument*, unsigned>(I, ArgNo));
+ if (PointerArgs.empty()) return false;
+
+ // Second check: make sure that all callers are direct callers. We can't
+ // transform functions that have indirect callers.
+ for (Value::use_iterator UI = F->use_begin(), E = F->use_end();
+ UI != E; ++UI) {
+ CallSite CS = CallSite::get(*UI);
+ if (!CS.getInstruction()) // "Taking the address" of the function
+ return false;
+
+ // Ensure that this call site is CALLING the function, not passing it as
+ // an argument.
+ if (!CS.isCallee(UI))
+ return false;
+ }
+
+ // Check to see which arguments are promotable. If an argument is promotable,
+ // add it to ArgsToPromote.
+ SmallPtrSet<Argument*, 8> ArgsToPromote;
+ SmallPtrSet<Argument*, 8> ByValArgsToTransform;
+ for (unsigned i = 0; i != PointerArgs.size(); ++i) {
+ bool isByVal = F->paramHasAttr(PointerArgs[i].second+1, Attribute::ByVal);
+
+ // If this is a byval argument, and if the aggregate type is small, just
+ // pass the elements, which is always safe.
+ Argument *PtrArg = PointerArgs[i].first;
+ if (isByVal) {
+ const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
+ if (const StructType *STy = dyn_cast<StructType>(AgTy)) {
+ if (maxElements > 0 && STy->getNumElements() > maxElements) {
+ DOUT << "argpromotion disable promoting argument '"
+ << PtrArg->getName() << "' because it would require adding more "
+ << "than " << maxElements << " arguments to the function.\n";
+ } else {
+ // If all the elements are single-value types, we can promote it.
+ bool AllSimple = true;
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ if (!STy->getElementType(i)->isSingleValueType()) {
+ AllSimple = false;
+ break;
+ }
+
+ // Safe to transform, don't even bother trying to "promote" it.
+ // Passing the elements as a scalar will allow scalarrepl to hack on
+ // the new alloca we introduce.
+ if (AllSimple) {
+ ByValArgsToTransform.insert(PtrArg);
+ continue;
+ }
+ }
+ }
+ }
+
+ // Otherwise, see if we can promote the pointer to its value.
+ if (isSafeToPromoteArgument(PtrArg, isByVal))
+ ArgsToPromote.insert(PtrArg);
+ }
+
+ // No promotable pointer arguments.
+ if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return false;
+
+ Function *NewF = DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
+
+ // Update the call graph to know that the function has been transformed.
+ getAnalysis<CallGraph>().changeFunction(F, NewF);
+ return true;
+}
+
+/// IsAlwaysValidPointer - Return true if the specified pointer is always legal
+/// to load.
+static bool IsAlwaysValidPointer(Value *V) {
+ if (isa<AllocaInst>(V) || isa<GlobalVariable>(V)) return true;
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V))
+ return IsAlwaysValidPointer(GEP->getOperand(0));
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::GetElementPtr)
+ return IsAlwaysValidPointer(CE->getOperand(0));
+
+ return false;
+}
+
+/// AllCalleesPassInValidPointerForArgument - Return true if we can prove that
+/// all callees pass in a valid pointer for the specified function argument.
+static bool AllCalleesPassInValidPointerForArgument(Argument *Arg) {
+ Function *Callee = Arg->getParent();
+
+ unsigned ArgNo = std::distance(Callee->arg_begin(),
+ Function::arg_iterator(Arg));
+
+ // Look at all call sites of the function. At this pointer we know we only
+ // have direct callees.
+ for (Value::use_iterator UI = Callee->use_begin(), E = Callee->use_end();
+ UI != E; ++UI) {
+ CallSite CS = CallSite::get(*UI);
+ assert(CS.getInstruction() && "Should only have direct calls!");
+
+ if (!IsAlwaysValidPointer(CS.getArgument(ArgNo)))
+ return false;
+ }
+ return true;
+}
+
+/// Returns true if Prefix is a prefix of longer. That means, Longer has a size
+/// that is greater than or equal to the size of prefix, and each of the
+/// elements in Prefix is the same as the corresponding elements in Longer.
+///
+/// This means it also returns true when Prefix and Longer are equal!
+static bool IsPrefix(const ArgPromotion::IndicesVector &Prefix,
+ const ArgPromotion::IndicesVector &Longer) {
+ if (Prefix.size() > Longer.size())
+ return false;
+ for (unsigned i = 0, e = Prefix.size(); i != e; ++i)
+ if (Prefix[i] != Longer[i])
+ return false;
+ return true;
+}
+
+
+/// Checks if Indices, or a prefix of Indices, is in Set.
+static bool PrefixIn(const ArgPromotion::IndicesVector &Indices,
+ std::set<ArgPromotion::IndicesVector> &Set) {
+ std::set<ArgPromotion::IndicesVector>::iterator Low;
+ Low = Set.upper_bound(Indices);
+ if (Low != Set.begin())
+ Low--;
+ // Low is now the last element smaller than or equal to Indices. This means
+ // it points to a prefix of Indices (possibly Indices itself), if such
+ // prefix exists.
+ //
+ // This load is safe if any prefix of its operands is safe to load.
+ return Low != Set.end() && IsPrefix(*Low, Indices);
+}
+
+/// Mark the given indices (ToMark) as safe in the the given set of indices
+/// (Safe). Marking safe usually means adding ToMark to Safe. However, if there
+/// is already a prefix of Indices in Safe, Indices are implicitely marked safe
+/// already. Furthermore, any indices that Indices is itself a prefix of, are
+/// removed from Safe (since they are implicitely safe because of Indices now).
+static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark,
+ std::set<ArgPromotion::IndicesVector> &Safe) {
+ std::set<ArgPromotion::IndicesVector>::iterator Low;
+ Low = Safe.upper_bound(ToMark);
+ // Guard against the case where Safe is empty
+ if (Low != Safe.begin())
+ Low--;
+ // Low is now the last element smaller than or equal to Indices. This
+ // means it points to a prefix of Indices (possibly Indices itself), if
+ // such prefix exists.
+ if (Low != Safe.end()) {
+ if (IsPrefix(*Low, ToMark))
+ // If there is already a prefix of these indices (or exactly these
+ // indices) marked a safe, don't bother adding these indices
+ return;
+
+ // Increment Low, so we can use it as a "insert before" hint
+ ++Low;
+ }
+ // Insert
+ Low = Safe.insert(Low, ToMark);
+ ++Low;
+ // If there we're a prefix of longer index list(s), remove those
+ std::set<ArgPromotion::IndicesVector>::iterator End = Safe.end();
+ while (Low != End && IsPrefix(ToMark, *Low)) {
+ std::set<ArgPromotion::IndicesVector>::iterator Remove = Low;
+ ++Low;
+ Safe.erase(Remove);
+ }
+}
+
+/// isSafeToPromoteArgument - As you might guess from the name of this method,
+/// it checks to see if it is both safe and useful to promote the argument.
+/// This method limits promotion of aggregates to only promote up to three
+/// elements of the aggregate in order to avoid exploding the number of
+/// arguments passed in.
+bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
+ typedef std::set<IndicesVector> GEPIndicesSet;
+
+ // Quick exit for unused arguments
+ if (Arg->use_empty())
+ return true;
+
+ // We can only promote this argument if all of the uses are loads, or are GEP
+ // instructions (with constant indices) that are subsequently loaded.
+ //
+ // Promoting the argument causes it to be loaded in the caller
+ // unconditionally. This is only safe if we can prove that either the load
+ // would have happened in the callee anyway (ie, there is a load in the entry
+ // block) or the pointer passed in at every call site is guaranteed to be
+ // valid.
+ // In the former case, invalid loads can happen, but would have happened
+ // anyway, in the latter case, invalid loads won't happen. This prevents us
+ // from introducing an invalid load that wouldn't have happened in the
+ // original code.
+ //
+ // This set will contain all sets of indices that are loaded in the entry
+ // block, and thus are safe to unconditionally load in the caller.
+ GEPIndicesSet SafeToUnconditionallyLoad;
+
+ // This set contains all the sets of indices that we are planning to promote.
+ // This makes it possible to limit the number of arguments added.
+ GEPIndicesSet ToPromote;
+
+ // If the pointer is always valid, any load with first index 0 is valid.
+ if(isByVal || AllCalleesPassInValidPointerForArgument(Arg))
+ SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
+
+ // First, iterate the entry block and mark loads of (geps of) arguments as
+ // safe.
+ BasicBlock *EntryBlock = Arg->getParent()->begin();
+ // Declare this here so we can reuse it
+ IndicesVector Indices;
+ for (BasicBlock::iterator I = EntryBlock->begin(), E = EntryBlock->end();
+ I != E; ++I)
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ Value *V = LI->getPointerOperand();
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
+ V = GEP->getPointerOperand();
+ if (V == Arg) {
+ // This load actually loads (part of) Arg? Check the indices then.
+ Indices.reserve(GEP->getNumIndices());
+ for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
+ II != IE; ++II)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(*II))
+ Indices.push_back(CI->getSExtValue());
+ else
+ // We found a non-constant GEP index for this argument? Bail out
+ // right away, can't promote this argument at all.
+ return false;
+
+ // Indices checked out, mark them as safe
+ MarkIndicesSafe(Indices, SafeToUnconditionallyLoad);
+ Indices.clear();
+ }
+ } else if (V == Arg) {
+ // Direct loads are equivalent to a GEP with a single 0 index.
+ MarkIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
+ }
+ }
+
+ // Now, iterate all uses of the argument to see if there are any uses that are
+ // not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
+ SmallVector<LoadInst*, 16> Loads;
+ IndicesVector Operands;
+ for (Value::use_iterator UI = Arg->use_begin(), E = Arg->use_end();
+ UI != E; ++UI) {
+ Operands.clear();
+ if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+ if (LI->isVolatile()) return false; // Don't hack volatile loads
+ Loads.push_back(LI);
+ // Direct loads are equivalent to a GEP with a zero index and then a load.
+ Operands.push_back(0);
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) {
+ if (GEP->use_empty()) {
+ // Dead GEP's cause trouble later. Just remove them if we run into
+ // them.
+ getAnalysis<AliasAnalysis>().deleteValue(GEP);
+ GEP->eraseFromParent();
+ // TODO: This runs the above loop over and over again for dead GEPS
+ // Couldn't we just do increment the UI iterator earlier and erase the
+ // use?
+ return isSafeToPromoteArgument(Arg, isByVal);
+ }
+
+ // Ensure that all of the indices are constants.
+ for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end();
+ i != e; ++i)
+ if (ConstantInt *C = dyn_cast<ConstantInt>(*i))
+ Operands.push_back(C->getSExtValue());
+ else
+ return false; // Not a constant operand GEP!
+
+ // Ensure that the only users of the GEP are load instructions.
+ for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end();
+ UI != E; ++UI)
+ if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+ if (LI->isVolatile()) return false; // Don't hack volatile loads
+ Loads.push_back(LI);
+ } else {
+ // Other uses than load?
+ return false;
+ }
+ } else {
+ return false; // Not a load or a GEP.
+ }
+
+ // Now, see if it is safe to promote this load / loads of this GEP. Loading
+ // is safe if Operands, or a prefix of Operands, is marked as safe.
+ if (!PrefixIn(Operands, SafeToUnconditionallyLoad))
+ return false;
+
+ // See if we are already promoting a load with these indices. If not, check
+ // to make sure that we aren't promoting too many elements. If so, nothing
+ // to do.
+ if (ToPromote.find(Operands) == ToPromote.end()) {
+ if (maxElements > 0 && ToPromote.size() == maxElements) {
+ DOUT << "argpromotion not promoting argument '"
+ << Arg->getName() << "' because it would require adding more "
+ << "than " << maxElements << " arguments to the function.\n";
+ // We limit aggregate promotion to only promoting up to a fixed number
+ // of elements of the aggregate.
+ return false;
+ }
+ ToPromote.insert(Operands);
+ }
+ }
+
+ if (Loads.empty()) return true; // No users, this is a dead argument.
+
+ // Okay, now we know that the argument is only used by load instructions and
+ // it is safe to unconditionally perform all of them. Use alias analysis to
+ // check to see if the pointer is guaranteed to not be modified from entry of
+ // the function to each of the load instructions.
+
+ // Because there could be several/many load instructions, remember which
+ // blocks we know to be transparent to the load.
+ SmallPtrSet<BasicBlock*, 16> TranspBlocks;
+
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ TargetData &TD = getAnalysis<TargetData>();
+
+ for (unsigned i = 0, e = Loads.size(); i != e; ++i) {
+ // Check to see if the load is invalidated from the start of the block to
+ // the load itself.
+ LoadInst *Load = Loads[i];
+ BasicBlock *BB = Load->getParent();
+
+ const PointerType *LoadTy =
+ cast<PointerType>(Load->getPointerOperand()->getType());
+ unsigned LoadSize = (unsigned)TD.getTypeStoreSize(LoadTy->getElementType());
+
+ if (AA.canInstructionRangeModify(BB->front(), *Load, Arg, LoadSize))
+ return false; // Pointer is invalidated!
+
+ // Now check every path from the entry block to the load for transparency.
+ // To do this, we perform a depth first search on the inverse CFG from the
+ // loading block.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> >
+ I = idf_ext_begin(*PI, TranspBlocks),
+ E = idf_ext_end(*PI, TranspBlocks); I != E; ++I)
+ if (AA.canBasicBlockModify(**I, Arg, LoadSize))
+ return false;
+ }
+
+ // If the path from the entry of the function to each load is free of
+ // instructions that potentially invalidate the load, we can make the
+ // transformation!
+ return true;
+}
+
+/// DoPromotion - This method actually performs the promotion of the specified
+/// arguments, and returns the new function. At this point, we know that it's
+/// safe to do so.
+Function *ArgPromotion::DoPromotion(Function *F,
+ SmallPtrSet<Argument*, 8> &ArgsToPromote,
+ SmallPtrSet<Argument*, 8> &ByValArgsToTransform) {
+
+ // Start by computing a new prototype for the function, which is the same as
+ // the old function, but has modified arguments.
+ const FunctionType *FTy = F->getFunctionType();
+ std::vector<const Type*> Params;
+
+ typedef std::set<IndicesVector> ScalarizeTable;
+
+ // ScalarizedElements - If we are promoting a pointer that has elements
+ // accessed out of it, keep track of which elements are accessed so that we
+ // can add one argument for each.
+ //
+ // Arguments that are directly loaded will have a zero element value here, to
+ // handle cases where there are both a direct load and GEP accesses.
+ //
+ std::map<Argument*, ScalarizeTable> ScalarizedElements;
+
+ // OriginalLoads - Keep track of a representative load instruction from the
+ // original function so that we can tell the alias analysis implementation
+ // what the new GEP/Load instructions we are inserting look like.
+ std::map<IndicesVector, LoadInst*> OriginalLoads;
+
+ // Attributes - Keep track of the parameter attributes for the arguments
+ // that we are *not* promoting. For the ones that we do promote, the parameter
+ // attributes are lost
+ SmallVector<AttributeWithIndex, 8> AttributesVec;
+ const AttrListPtr &PAL = F->getAttributes();
+
+ // Add any return attributes.
+ if (Attributes attrs = PAL.getRetAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
+
+ // First, determine the new argument list
+ unsigned ArgIndex = 1;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
+ ++I, ++ArgIndex) {
+ if (ByValArgsToTransform.count(I)) {
+ // Simple byval argument? Just add all the struct element types.
+ const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ const StructType *STy = cast<StructType>(AgTy);
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ Params.push_back(STy->getElementType(i));
+ ++NumByValArgsPromoted;
+ } else if (!ArgsToPromote.count(I)) {
+ // Unchanged argument
+ Params.push_back(I->getType());
+ if (Attributes attrs = PAL.getParamAttributes(ArgIndex))
+ AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs));
+ } else if (I->use_empty()) {
+ // Dead argument (which are always marked as promotable)
+ ++NumArgumentsDead;
+ } else {
+ // Okay, this is being promoted. This means that the only uses are loads
+ // or GEPs which are only used by loads
+
+ // In this table, we will track which indices are loaded from the argument
+ // (where direct loads are tracked as no indices).
+ ScalarizeTable &ArgIndices = ScalarizedElements[I];
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+ ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ assert(isa<LoadInst>(User) || isa<GetElementPtrInst>(User));
+ IndicesVector Indices;
+ Indices.reserve(User->getNumOperands() - 1);
+ // Since loads will only have a single operand, and GEPs only a single
+ // non-index operand, this will record direct loads without any indices,
+ // and gep+loads with the GEP indices.
+ for (User::op_iterator II = User->op_begin() + 1, IE = User->op_end();
+ II != IE; ++II)
+ Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
+ // GEPs with a single 0 index can be merged with direct loads
+ if (Indices.size() == 1 && Indices.front() == 0)
+ Indices.clear();
+ ArgIndices.insert(Indices);
+ LoadInst *OrigLoad;
+ if (LoadInst *L = dyn_cast<LoadInst>(User))
+ OrigLoad = L;
+ else
+ // Take any load, we will use it only to update Alias Analysis
+ OrigLoad = cast<LoadInst>(User->use_back());
+ OriginalLoads[Indices] = OrigLoad;
+ }
+
+ // Add a parameter to the function for each element passed in.
+ for (ScalarizeTable::iterator SI = ArgIndices.begin(),
+ E = ArgIndices.end(); SI != E; ++SI) {
+ // not allowed to dereference ->begin() if size() is 0
+ Params.push_back(GetElementPtrInst::getIndexedType(I->getType(),
+ SI->begin(),
+ SI->end()));
+ assert(Params.back());
+ }
+
+ if (ArgIndices.size() == 1 && ArgIndices.begin()->empty())
+ ++NumArgumentsPromoted;
+ else
+ ++NumAggregatesPromoted;
+ }
+ }
+
+ // Add any function attributes.
+ if (Attributes attrs = PAL.getFnAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
+
+ const Type *RetTy = FTy->getReturnType();
+
+ // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which
+ // have zero fixed arguments.
+ bool ExtraArgHack = false;
+ if (Params.empty() && FTy->isVarArg()) {
+ ExtraArgHack = true;
+ Params.push_back(Type::Int32Ty);
+ }
+
+ // Construct the new function type using the new arguments.
+ FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
+
+ // Create the new function body and insert it into the module...
+ Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
+ NF->copyAttributesFrom(F);
+
+ // Recompute the parameter attributes list based on the new arguments for
+ // the function.
+ NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end()));
+ AttributesVec.clear();
+
+ F->getParent()->getFunctionList().insert(F, NF);
+ NF->takeName(F);
+
+ // Get the alias analysis information that we need to update to reflect our
+ // changes.
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+ // Get the callgraph information that we need to update to reflect our
+ // changes.
+ CallGraph &CG = getAnalysis<CallGraph>();
+
+ // Loop over all of the callers of the function, transforming the call sites
+ // to pass in the loaded pointers.
+ //
+ SmallVector<Value*, 16> Args;
+ while (!F->use_empty()) {
+ CallSite CS = CallSite::get(F->use_back());
+ Instruction *Call = CS.getInstruction();
+ const AttrListPtr &CallPAL = CS.getAttributes();
+
+ // Add any return attributes.
+ if (Attributes attrs = CallPAL.getRetAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
+
+ // Loop over the operands, inserting GEP and loads in the caller as
+ // appropriate.
+ CallSite::arg_iterator AI = CS.arg_begin();
+ ArgIndex = 1;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++AI, ++ArgIndex)
+ if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
+ Args.push_back(*AI); // Unmodified argument
+
+ if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
+ AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+
+ } else if (ByValArgsToTransform.count(I)) {
+ // Emit a GEP and load for each element of the struct.
+ const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ const StructType *STy = cast<StructType>(AgTy);
+ Value *Idxs[2] = { ConstantInt::get(Type::Int32Ty, 0), 0 };
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Idxs[1] = ConstantInt::get(Type::Int32Ty, i);
+ Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2,
+ (*AI)->getName()+"."+utostr(i),
+ Call);
+ // TODO: Tell AA about the new values?
+ Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call));
+ }
+ } else if (!I->use_empty()) {
+ // Non-dead argument: insert GEPs and loads as appropriate.
+ ScalarizeTable &ArgIndices = ScalarizedElements[I];
+ // Store the Value* version of the indices in here, but declare it now
+ // for reuse
+ std::vector<Value*> Ops;
+ for (ScalarizeTable::iterator SI = ArgIndices.begin(),
+ E = ArgIndices.end(); SI != E; ++SI) {
+ Value *V = *AI;
+ LoadInst *OrigLoad = OriginalLoads[*SI];
+ if (!SI->empty()) {
+ Ops.reserve(SI->size());
+ const Type *ElTy = V->getType();
+ for (IndicesVector::const_iterator II = SI->begin(),
+ IE = SI->end(); II != IE; ++II) {
+ // Use i32 to index structs, and i64 for others (pointers/arrays).
+ // This satisfies GEP constraints.
+ const Type *IdxTy = (isa<StructType>(ElTy) ? Type::Int32Ty : Type::Int64Ty);
+ Ops.push_back(ConstantInt::get(IdxTy, *II));
+ // Keep track of the type we're currently indexing
+ ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II);
+ }
+ // And create a GEP to extract those indices
+ V = GetElementPtrInst::Create(V, Ops.begin(), Ops.end(),
+ V->getName()+".idx", Call);
+ Ops.clear();
+ AA.copyValue(OrigLoad->getOperand(0), V);
+ }
+ Args.push_back(new LoadInst(V, V->getName()+".val", Call));
+ AA.copyValue(OrigLoad, Args.back());
+ }
+ }
+
+ if (ExtraArgHack)
+ Args.push_back(Constant::getNullValue(Type::Int32Ty));
+
+ // Push any varargs arguments on the list
+ for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
+ Args.push_back(*AI);
+ if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
+ AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+ }
+
+ // Add any function attributes.
+ if (Attributes attrs = CallPAL.getFnAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
+
+ Instruction *New;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args.begin(), Args.end(), "", Call);
+ cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(),
+ AttributesVec.end()));
+ } else {
+ New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
+ cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(),
+ AttributesVec.end()));
+ if (cast<CallInst>(Call)->isTailCall())
+ cast<CallInst>(New)->setTailCall();
+ }
+ Args.clear();
+ AttributesVec.clear();
+
+ // Update the alias analysis implementation to know that we are replacing
+ // the old call with a new one.
+ AA.replaceWithNewValue(Call, New);
+
+ // Update the callgraph to know that the callsite has been transformed.
+ CG[Call->getParent()->getParent()]->replaceCallSite(Call, New);
+
+ if (!Call->use_empty()) {
+ Call->replaceAllUsesWith(New);
+ New->takeName(Call);
+ }
+
+ // Finally, remove the old call from the program, reducing the use-count of
+ // F.
+ Call->eraseFromParent();
+ }
+
+ // Since we have now created the new function, splice the body of the old
+ // function right into the new function, leaving the old rotting hulk of the
+ // function empty.
+ NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+
+ // Loop over the argument list, transfering uses of the old arguments over to
+ // the new arguments, also transfering over the names as well.
+ //
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
+ I2 = NF->arg_begin(); I != E; ++I) {
+ if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
+ // If this is an unmodified argument, move the name and users over to the
+ // new version.
+ I->replaceAllUsesWith(I2);
+ I2->takeName(I);
+ AA.replaceWithNewValue(I, I2);
+ ++I2;
+ continue;
+ }
+
+ if (ByValArgsToTransform.count(I)) {
+ // In the callee, we create an alloca, and store each of the new incoming
+ // arguments into the alloca.
+ Instruction *InsertPt = NF->begin()->begin();
+
+ // Just add all the struct element types.
+ const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt);
+ const StructType *STy = cast<StructType>(AgTy);
+ Value *Idxs[2] = { ConstantInt::get(Type::Int32Ty, 0), 0 };
+
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Idxs[1] = ConstantInt::get(Type::Int32Ty, i);
+ std::string Name = TheAlloca->getName()+"."+utostr(i);
+ Value *Idx = GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2,
+ Name, InsertPt);
+ I2->setName(I->getName()+"."+utostr(i));
+ new StoreInst(I2++, Idx, InsertPt);
+ }
+
+ // Anything that used the arg should now use the alloca.
+ I->replaceAllUsesWith(TheAlloca);
+ TheAlloca->takeName(I);
+ AA.replaceWithNewValue(I, TheAlloca);
+ continue;
+ }
+
+ if (I->use_empty()) {
+ AA.deleteValue(I);
+ continue;
+ }
+
+ // Otherwise, if we promoted this argument, then all users are load
+ // instructions (or GEPs with only load users), and all loads should be
+ // using the new argument that we added.
+ ScalarizeTable &ArgIndices = ScalarizedElements[I];
+
+ while (!I->use_empty()) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I->use_back())) {
+ assert(ArgIndices.begin()->empty() &&
+ "Load element should sort to front!");
+ I2->setName(I->getName()+".val");
+ LI->replaceAllUsesWith(I2);
+ AA.replaceWithNewValue(LI, I2);
+ LI->eraseFromParent();
+ DOUT << "*** Promoted load of argument '" << I->getName()
+ << "' in function '" << F->getName() << "'\n";
+ } else {
+ GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back());
+ IndicesVector Operands;
+ Operands.reserve(GEP->getNumIndices());
+ for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
+ II != IE; ++II)
+ Operands.push_back(cast<ConstantInt>(*II)->getSExtValue());
+
+ // GEPs with a single 0 index can be merged with direct loads
+ if (Operands.size() == 1 && Operands.front() == 0)
+ Operands.clear();
+
+ Function::arg_iterator TheArg = I2;
+ for (ScalarizeTable::iterator It = ArgIndices.begin();
+ *It != Operands; ++It, ++TheArg) {
+ assert(It != ArgIndices.end() && "GEP not handled??");
+ }
+
+ std::string NewName = I->getName();
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ NewName += "." + utostr(Operands[i]);
+ }
+ NewName += ".val";
+ TheArg->setName(NewName);
+
+ DOUT << "*** Promoted agg argument '" << TheArg->getName()
+ << "' of function '" << NF->getName() << "'\n";
+
+ // All of the uses must be load instructions. Replace them all with
+ // the argument specified by ArgNo.
+ while (!GEP->use_empty()) {
+ LoadInst *L = cast<LoadInst>(GEP->use_back());
+ L->replaceAllUsesWith(TheArg);
+ AA.replaceWithNewValue(L, TheArg);
+ L->eraseFromParent();
+ }
+ AA.deleteValue(GEP);
+ GEP->eraseFromParent();
+ }
+ }
+
+ // Increment I2 past all of the arguments added for this promoted pointer.
+ for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i)
+ ++I2;
+ }
+
+ // Notify the alias analysis implementation that we inserted a new argument.
+ if (ExtraArgHack)
+ AA.copyValue(Constant::getNullValue(Type::Int32Ty), NF->arg_begin());
+
+
+ // Tell the alias analysis that the old function is about to disappear.
+ AA.replaceWithNewValue(F, NF);
+
+ // Now that the old function is dead, delete it.
+ F->eraseFromParent();
+ return NF;
+}
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
new file mode 100644
index 0000000..4b85e13
--- /dev/null
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -0,0 +1,25 @@
+add_llvm_library(LLVMipo
+ FunctionAttrs.cpp
+ ArgumentPromotion.cpp
+ ConstantMerge.cpp
+ DeadArgumentElimination.cpp
+ DeadTypeElimination.cpp
+ ExtractGV.cpp
+ GlobalDCE.cpp
+ GlobalOpt.cpp
+ IndMemRemoval.cpp
+ InlineAlways.cpp
+ Inliner.cpp
+ InlineSimple.cpp
+ Internalize.cpp
+ IPConstantPropagation.cpp
+ LoopExtractor.cpp
+ LowerSetJmp.cpp
+ MergeFunctions.cpp
+ PartialSpecialization.cpp
+ PruneEH.cpp
+ RaiseAllocations.cpp
+ StripDeadPrototypes.cpp
+ StripSymbols.cpp
+ StructRetPromotion.cpp
+ )
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
new file mode 100644
index 0000000..237e6db
--- /dev/null
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -0,0 +1,114 @@
+//===- ConstantMerge.cpp - Merge duplicate global constants ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface to a pass that merges duplicate global
+// constants together into a single constant that is shared. This is useful
+// because some passes (ie TraceValues) insert a lot of string constants into
+// the program, regardless of whether or not an existing string is available.
+//
+// Algorithm: ConstantMerge is designed to build up a map of available constants
+// and eliminate duplicates when it is initialized.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "constmerge"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumMerged, "Number of global constants merged");
+
+namespace {
+ struct VISIBILITY_HIDDEN ConstantMerge : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ ConstantMerge() : ModulePass(&ID) {}
+
+ // run - For this pass, process all of the globals in the module,
+ // eliminating duplicate constants.
+ //
+ bool runOnModule(Module &M);
+ };
+}
+
+char ConstantMerge::ID = 0;
+static RegisterPass<ConstantMerge>
+X("constmerge", "Merge Duplicate Global Constants");
+
+ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
+
+bool ConstantMerge::runOnModule(Module &M) {
+ // Map unique constant/section pairs to globals. We don't want to merge
+ // globals in different sections.
+ std::map<std::pair<Constant*, std::string>, GlobalVariable*> CMap;
+
+ // Replacements - This vector contains a list of replacements to perform.
+ std::vector<std::pair<GlobalVariable*, GlobalVariable*> > Replacements;
+
+ bool MadeChange = false;
+
+ // Iterate constant merging while we are still making progress. Merging two
+ // constants together may allow us to merge other constants together if the
+ // second level constants have initializers which point to the globals that
+ // were just merged.
+ while (1) {
+ // First pass: identify all globals that can be merged together, filling in
+ // the Replacements vector. We cannot do the replacement in this pass
+ // because doing so may cause initializers of other globals to be rewritten,
+ // invalidating the Constant* pointers in CMap.
+ //
+ for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+ GVI != E; ) {
+ GlobalVariable *GV = GVI++;
+
+ // If this GV is dead, remove it.
+ GV->removeDeadConstantUsers();
+ if (GV->use_empty() && GV->hasLocalLinkage()) {
+ GV->eraseFromParent();
+ continue;
+ }
+
+ // Only process constants with initializers.
+ if (GV->isConstant() && GV->hasInitializer()) {
+ Constant *Init = GV->getInitializer();
+
+ // Check to see if the initializer is already known.
+ GlobalVariable *&Slot = CMap[std::make_pair(Init, GV->getSection())];
+
+ if (Slot == 0) { // Nope, add it to the map.
+ Slot = GV;
+ } else if (GV->hasLocalLinkage()) { // Yup, this is a duplicate!
+ // Make all uses of the duplicate constant use the canonical version.
+ Replacements.push_back(std::make_pair(GV, Slot));
+ }
+ }
+ }
+
+ if (Replacements.empty())
+ return MadeChange;
+ CMap.clear();
+
+ // Now that we have figured out which replacements must be made, do them all
+ // now. This avoid invalidating the pointers in CMap, which are unneeded
+ // now.
+ for (unsigned i = 0, e = Replacements.size(); i != e; ++i) {
+ // Eliminate any uses of the dead global...
+ Replacements[i].first->replaceAllUsesWith(Replacements[i].second);
+
+ // Delete the global value from the module...
+ M.getGlobalList().erase(Replacements[i].first);
+ }
+
+ NumMerged += Replacements.size();
+ Replacements.clear();
+ }
+}
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
new file mode 100644
index 0000000..666db7e
--- /dev/null
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -0,0 +1,944 @@
+//===-- DeadArgumentElimination.cpp - Eliminate dead arguments ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass deletes dead arguments from internal functions. Dead argument
+// elimination removes arguments which are directly dead, as well as arguments
+// only passed into function calls as dead arguments of other functions. This
+// pass also deletes dead return values in a similar way.
+//
+// This pass is often useful as a cleanup pass to run after aggressive
+// interprocedural passes, which add possibly-dead arguments or return values.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "deadargelim"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constant.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Compiler.h"
+#include <map>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumArgumentsEliminated, "Number of unread args removed");
+STATISTIC(NumRetValsEliminated , "Number of unused return values removed");
+
+namespace {
+ /// DAE - The dead argument elimination pass.
+ ///
+ class VISIBILITY_HIDDEN DAE : public ModulePass {
+ public:
+
+ /// Struct that represents (part of) either a return value or a function
+ /// argument. Used so that arguments and return values can be used
+ /// interchangably.
+ struct RetOrArg {
+ RetOrArg(const Function* F, unsigned Idx, bool IsArg) : F(F), Idx(Idx),
+ IsArg(IsArg) {}
+ const Function *F;
+ unsigned Idx;
+ bool IsArg;
+
+ /// Make RetOrArg comparable, so we can put it into a map.
+ bool operator<(const RetOrArg &O) const {
+ if (F != O.F)
+ return F < O.F;
+ else if (Idx != O.Idx)
+ return Idx < O.Idx;
+ else
+ return IsArg < O.IsArg;
+ }
+
+ /// Make RetOrArg comparable, so we can easily iterate the multimap.
+ bool operator==(const RetOrArg &O) const {
+ return F == O.F && Idx == O.Idx && IsArg == O.IsArg;
+ }
+
+ std::string getDescription() const {
+ return std::string((IsArg ? "Argument #" : "Return value #"))
+ + utostr(Idx) + " of function " + F->getName();
+ }
+ };
+
+ /// Liveness enum - During our initial pass over the program, we determine
+ /// that things are either alive or maybe alive. We don't mark anything
+ /// explicitly dead (even if we know they are), since anything not alive
+ /// with no registered uses (in Uses) will never be marked alive and will
+ /// thus become dead in the end.
+ enum Liveness { Live, MaybeLive };
+
+ /// Convenience wrapper
+ RetOrArg CreateRet(const Function *F, unsigned Idx) {
+ return RetOrArg(F, Idx, false);
+ }
+ /// Convenience wrapper
+ RetOrArg CreateArg(const Function *F, unsigned Idx) {
+ return RetOrArg(F, Idx, true);
+ }
+
+ typedef std::multimap<RetOrArg, RetOrArg> UseMap;
+ /// This maps a return value or argument to any MaybeLive return values or
+ /// arguments it uses. This allows the MaybeLive values to be marked live
+ /// when any of its users is marked live.
+ /// For example (indices are left out for clarity):
+ /// - Uses[ret F] = ret G
+ /// This means that F calls G, and F returns the value returned by G.
+ /// - Uses[arg F] = ret G
+ /// This means that some function calls G and passes its result as an
+ /// argument to F.
+ /// - Uses[ret F] = arg F
+ /// This means that F returns one of its own arguments.
+ /// - Uses[arg F] = arg G
+ /// This means that G calls F and passes one of its own (G's) arguments
+ /// directly to F.
+ UseMap Uses;
+
+ typedef std::set<RetOrArg> LiveSet;
+ typedef std::set<const Function*> LiveFuncSet;
+
+ /// This set contains all values that have been determined to be live.
+ LiveSet LiveValues;
+ /// This set contains all values that are cannot be changed in any way.
+ LiveFuncSet LiveFunctions;
+
+ typedef SmallVector<RetOrArg, 5> UseVector;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ DAE() : ModulePass(&ID) {}
+ bool runOnModule(Module &M);
+
+ virtual bool ShouldHackArguments() const { return false; }
+
+ private:
+ Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses);
+ Liveness SurveyUse(Value::use_iterator U, UseVector &MaybeLiveUses,
+ unsigned RetValNum = 0);
+ Liveness SurveyUses(Value *V, UseVector &MaybeLiveUses);
+
+ void SurveyFunction(Function &F);
+ void MarkValue(const RetOrArg &RA, Liveness L,
+ const UseVector &MaybeLiveUses);
+ void MarkLive(const RetOrArg &RA);
+ void MarkLive(const Function &F);
+ void PropagateLiveness(const RetOrArg &RA);
+ bool RemoveDeadStuffFromFunction(Function *F);
+ bool DeleteDeadVarargs(Function &Fn);
+ };
+}
+
+
+char DAE::ID = 0;
+static RegisterPass<DAE>
+X("deadargelim", "Dead Argument Elimination");
+
+namespace {
+ /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but
+ /// deletes arguments to functions which are external. This is only for use
+ /// by bugpoint.
+ struct DAH : public DAE {
+ static char ID;
+ virtual bool ShouldHackArguments() const { return true; }
+ };
+}
+
+char DAH::ID = 0;
+static RegisterPass<DAH>
+Y("deadarghaX0r", "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)");
+
+/// createDeadArgEliminationPass - This pass removes arguments from functions
+/// which are not used by the body of the function.
+///
+ModulePass *llvm::createDeadArgEliminationPass() { return new DAE(); }
+ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); }
+
+/// DeleteDeadVarargs - If this is an function that takes a ... list, and if
+/// llvm.vastart is never called, the varargs list is dead for the function.
+bool DAE::DeleteDeadVarargs(Function &Fn) {
+ assert(Fn.getFunctionType()->isVarArg() && "Function isn't varargs!");
+ if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false;
+
+ // Ensure that the function is only directly called.
+ for (Value::use_iterator I = Fn.use_begin(), E = Fn.use_end(); I != E; ++I) {
+ // If this use is anything other than a call site, give up.
+ CallSite CS = CallSite::get(*I);
+ Instruction *TheCall = CS.getInstruction();
+ if (!TheCall) return false; // Not a direct call site?
+
+ // The addr of this function is passed to the call.
+ if (!CS.isCallee(I)) return false;
+ }
+
+ // Okay, we know we can transform this function if safe. Scan its body
+ // looking for calls to llvm.vastart.
+ for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::vastart)
+ return false;
+ }
+ }
+ }
+
+ // If we get here, there are no calls to llvm.vastart in the function body,
+ // remove the "..." and adjust all the calls.
+
+ // Start by computing a new prototype for the function, which is the same as
+ // the old function, but doesn't have isVarArg set.
+ const FunctionType *FTy = Fn.getFunctionType();
+ std::vector<const Type*> Params(FTy->param_begin(), FTy->param_end());
+ FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false);
+ unsigned NumArgs = Params.size();
+
+ // Create the new function body and insert it into the module...
+ Function *NF = Function::Create(NFTy, Fn.getLinkage());
+ NF->copyAttributesFrom(&Fn);
+ Fn.getParent()->getFunctionList().insert(&Fn, NF);
+ NF->takeName(&Fn);
+
+ // Loop over all of the callers of the function, transforming the call sites
+ // to pass in a smaller number of arguments into the new function.
+ //
+ std::vector<Value*> Args;
+ while (!Fn.use_empty()) {
+ CallSite CS = CallSite::get(Fn.use_back());
+ Instruction *Call = CS.getInstruction();
+
+ // Pass all the same arguments.
+ Args.assign(CS.arg_begin(), CS.arg_begin()+NumArgs);
+
+ // Drop any attributes that were on the vararg arguments.
+ AttrListPtr PAL = CS.getAttributes();
+ if (!PAL.isEmpty() && PAL.getSlot(PAL.getNumSlots() - 1).Index > NumArgs) {
+ SmallVector<AttributeWithIndex, 8> AttributesVec;
+ for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i)
+ AttributesVec.push_back(PAL.getSlot(i));
+ if (Attributes FnAttrs = PAL.getFnAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+ PAL = AttrListPtr::get(AttributesVec.begin(), AttributesVec.end());
+ }
+
+ Instruction *New;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args.begin(), Args.end(), "", Call);
+ cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<InvokeInst>(New)->setAttributes(PAL);
+ } else {
+ New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
+ cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<CallInst>(New)->setAttributes(PAL);
+ if (cast<CallInst>(Call)->isTailCall())
+ cast<CallInst>(New)->setTailCall();
+ }
+ Args.clear();
+
+ if (!Call->use_empty())
+ Call->replaceAllUsesWith(New);
+
+ New->takeName(Call);
+
+ // Finally, remove the old call from the program, reducing the use-count of
+ // F.
+ Call->eraseFromParent();
+ }
+
+ // Since we have now created the new function, splice the body of the old
+ // function right into the new function, leaving the old rotting hulk of the
+ // function empty.
+ NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList());
+
+ // Loop over the argument list, transfering uses of the old arguments over to
+ // the new arguments, also transfering over the names as well. While we're at
+ // it, remove the dead arguments from the DeadArguments list.
+ //
+ for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(),
+ I2 = NF->arg_begin(); I != E; ++I, ++I2) {
+ // Move the name and users over to the new version.
+ I->replaceAllUsesWith(I2);
+ I2->takeName(I);
+ }
+
+ // Finally, nuke the old function.
+ Fn.eraseFromParent();
+ return true;
+}
+
+/// Convenience function that returns the number of return values. It returns 0
+/// for void functions and 1 for functions not returning a struct. It returns
+/// the number of struct elements for functions returning a struct.
+static unsigned NumRetVals(const Function *F) {
+ if (F->getReturnType() == Type::VoidTy)
+ return 0;
+ else if (const StructType *STy = dyn_cast<StructType>(F->getReturnType()))
+ return STy->getNumElements();
+ else
+ return 1;
+}
+
+/// MarkIfNotLive - This checks Use for liveness in LiveValues. If Use is not
+/// live, it adds Use to the MaybeLiveUses argument. Returns the determined
+/// liveness of Use.
+DAE::Liveness DAE::MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses) {
+ // We're live if our use or its Function is already marked as live.
+ if (LiveFunctions.count(Use.F) || LiveValues.count(Use))
+ return Live;
+
+ // We're maybe live otherwise, but remember that we must become live if
+ // Use becomes live.
+ MaybeLiveUses.push_back(Use);
+ return MaybeLive;
+}
+
+
+/// SurveyUse - This looks at a single use of an argument or return value
+/// and determines if it should be alive or not. Adds this use to MaybeLiveUses
+/// if it causes the used value to become MaybeAlive.
+///
+/// RetValNum is the return value number to use when this use is used in a
+/// return instruction. This is used in the recursion, you should always leave
+/// it at 0.
+DAE::Liveness DAE::SurveyUse(Value::use_iterator U, UseVector &MaybeLiveUses,
+ unsigned RetValNum) {
+ Value *V = *U;
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(V)) {
+ // The value is returned from a function. It's only live when the
+ // function's return value is live. We use RetValNum here, for the case
+ // that U is really a use of an insertvalue instruction that uses the
+ // orginal Use.
+ RetOrArg Use = CreateRet(RI->getParent()->getParent(), RetValNum);
+ // We might be live, depending on the liveness of Use.
+ return MarkIfNotLive(Use, MaybeLiveUses);
+ }
+ if (InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) {
+ if (U.getOperandNo() != InsertValueInst::getAggregateOperandIndex()
+ && IV->hasIndices())
+ // The use we are examining is inserted into an aggregate. Our liveness
+ // depends on all uses of that aggregate, but if it is used as a return
+ // value, only index at which we were inserted counts.
+ RetValNum = *IV->idx_begin();
+
+ // Note that if we are used as the aggregate operand to the insertvalue,
+ // we don't change RetValNum, but do survey all our uses.
+
+ Liveness Result = MaybeLive;
+ for (Value::use_iterator I = IV->use_begin(),
+ E = V->use_end(); I != E; ++I) {
+ Result = SurveyUse(I, MaybeLiveUses, RetValNum);
+ if (Result == Live)
+ break;
+ }
+ return Result;
+ }
+ CallSite CS = CallSite::get(V);
+ if (CS.getInstruction()) {
+ Function *F = CS.getCalledFunction();
+ if (F) {
+ // Used in a direct call.
+
+ // Find the argument number. We know for sure that this use is an
+ // argument, since if it was the function argument this would be an
+ // indirect call and the we know can't be looking at a value of the
+ // label type (for the invoke instruction).
+ unsigned ArgNo = CS.getArgumentNo(U.getOperandNo());
+
+ if (ArgNo >= F->getFunctionType()->getNumParams())
+ // The value is passed in through a vararg! Must be live.
+ return Live;
+
+ assert(CS.getArgument(ArgNo)
+ == CS.getInstruction()->getOperand(U.getOperandNo())
+ && "Argument is not where we expected it");
+
+ // Value passed to a normal call. It's only live when the corresponding
+ // argument to the called function turns out live.
+ RetOrArg Use = CreateArg(F, ArgNo);
+ return MarkIfNotLive(Use, MaybeLiveUses);
+ }
+ }
+ // Used in any other way? Value must be live.
+ return Live;
+}
+
+/// SurveyUses - This looks at all the uses of the given value
+/// Returns the Liveness deduced from the uses of this value.
+///
+/// Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses. If
+/// the result is Live, MaybeLiveUses might be modified but its content should
+/// be ignored (since it might not be complete).
+DAE::Liveness DAE::SurveyUses(Value *V, UseVector &MaybeLiveUses) {
+ // Assume it's dead (which will only hold if there are no uses at all..).
+ Liveness Result = MaybeLive;
+ // Check each use.
+ for (Value::use_iterator I = V->use_begin(),
+ E = V->use_end(); I != E; ++I) {
+ Result = SurveyUse(I, MaybeLiveUses);
+ if (Result == Live)
+ break;
+ }
+ return Result;
+}
+
+// SurveyFunction - This performs the initial survey of the specified function,
+// checking out whether or not it uses any of its incoming arguments or whether
+// any callers use the return value. This fills in the LiveValues set and Uses
+// map.
+//
+// We consider arguments of non-internal functions to be intrinsically alive as
+// well as arguments to functions which have their "address taken".
+//
+void DAE::SurveyFunction(Function &F) {
+ unsigned RetCount = NumRetVals(&F);
+ // Assume all return values are dead
+ typedef SmallVector<Liveness, 5> RetVals;
+ RetVals RetValLiveness(RetCount, MaybeLive);
+
+ typedef SmallVector<UseVector, 5> RetUses;
+ // These vectors map each return value to the uses that make it MaybeLive, so
+ // we can add those to the Uses map if the return value really turns out to be
+ // MaybeLive. Initialized to a list of RetCount empty lists.
+ RetUses MaybeLiveRetUses(RetCount);
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
+ if (RI->getNumOperands() != 0 && RI->getOperand(0)->getType()
+ != F.getFunctionType()->getReturnType()) {
+ // We don't support old style multiple return values.
+ MarkLive(F);
+ return;
+ }
+
+ if (!F.hasLocalLinkage() && (!ShouldHackArguments() || F.isIntrinsic())) {
+ MarkLive(F);
+ return;
+ }
+
+ DOUT << "DAE - Inspecting callers for fn: " << F.getName() << "\n";
+ // Keep track of the number of live retvals, so we can skip checks once all
+ // of them turn out to be live.
+ unsigned NumLiveRetVals = 0;
+ const Type *STy = dyn_cast<StructType>(F.getReturnType());
+ // Loop all uses of the function.
+ for (Value::use_iterator I = F.use_begin(), E = F.use_end(); I != E; ++I) {
+ // If the function is PASSED IN as an argument, its address has been
+ // taken.
+ CallSite CS = CallSite::get(*I);
+ if (!CS.getInstruction() || !CS.isCallee(I)) {
+ MarkLive(F);
+ return;
+ }
+
+ // If this use is anything other than a call site, the function is alive.
+ Instruction *TheCall = CS.getInstruction();
+ if (!TheCall) { // Not a direct call site?
+ MarkLive(F);
+ return;
+ }
+
+ // If we end up here, we are looking at a direct call to our function.
+
+ // Now, check how our return value(s) is/are used in this caller. Don't
+ // bother checking return values if all of them are live already.
+ if (NumLiveRetVals != RetCount) {
+ if (STy) {
+ // Check all uses of the return value.
+ for (Value::use_iterator I = TheCall->use_begin(),
+ E = TheCall->use_end(); I != E; ++I) {
+ ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(*I);
+ if (Ext && Ext->hasIndices()) {
+ // This use uses a part of our return value, survey the uses of
+ // that part and store the results for this index only.
+ unsigned Idx = *Ext->idx_begin();
+ if (RetValLiveness[Idx] != Live) {
+ RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]);
+ if (RetValLiveness[Idx] == Live)
+ NumLiveRetVals++;
+ }
+ } else {
+ // Used by something else than extractvalue. Mark all return
+ // values as live.
+ for (unsigned i = 0; i != RetCount; ++i )
+ RetValLiveness[i] = Live;
+ NumLiveRetVals = RetCount;
+ break;
+ }
+ }
+ } else {
+ // Single return value
+ RetValLiveness[0] = SurveyUses(TheCall, MaybeLiveRetUses[0]);
+ if (RetValLiveness[0] == Live)
+ NumLiveRetVals = RetCount;
+ }
+ }
+ }
+
+ // Now we've inspected all callers, record the liveness of our return values.
+ for (unsigned i = 0; i != RetCount; ++i)
+ MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]);
+
+ DOUT << "DAE - Inspecting args for fn: " << F.getName() << "\n";
+
+ // Now, check all of our arguments.
+ unsigned i = 0;
+ UseVector MaybeLiveArgUses;
+ for (Function::arg_iterator AI = F.arg_begin(),
+ E = F.arg_end(); AI != E; ++AI, ++i) {
+ // See what the effect of this use is (recording any uses that cause
+ // MaybeLive in MaybeLiveArgUses).
+ Liveness Result = SurveyUses(AI, MaybeLiveArgUses);
+ // Mark the result.
+ MarkValue(CreateArg(&F, i), Result, MaybeLiveArgUses);
+ // Clear the vector again for the next iteration.
+ MaybeLiveArgUses.clear();
+ }
+}
+
+/// MarkValue - This function marks the liveness of RA depending on L. If L is
+/// MaybeLive, it also takes all uses in MaybeLiveUses and records them in Uses,
+/// such that RA will be marked live if any use in MaybeLiveUses gets marked
+/// live later on.
+void DAE::MarkValue(const RetOrArg &RA, Liveness L,
+ const UseVector &MaybeLiveUses) {
+ switch (L) {
+ case Live: MarkLive(RA); break;
+ case MaybeLive:
+ {
+ // Note any uses of this value, so this return value can be
+ // marked live whenever one of the uses becomes live.
+ for (UseVector::const_iterator UI = MaybeLiveUses.begin(),
+ UE = MaybeLiveUses.end(); UI != UE; ++UI)
+ Uses.insert(std::make_pair(*UI, RA));
+ break;
+ }
+ }
+}
+
+/// MarkLive - Mark the given Function as alive, meaning that it cannot be
+/// changed in any way. Additionally,
+/// mark any values that are used as this function's parameters or by its return
+/// values (according to Uses) live as well.
+void DAE::MarkLive(const Function &F) {
+ DOUT << "DAE - Intrinsically live fn: " << F.getName() << "\n";
+ // Mark the function as live.
+ LiveFunctions.insert(&F);
+ // Mark all arguments as live.
+ for (unsigned i = 0, e = F.arg_size(); i != e; ++i)
+ PropagateLiveness(CreateArg(&F, i));
+ // Mark all return values as live.
+ for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i)
+ PropagateLiveness(CreateRet(&F, i));
+}
+
+/// MarkLive - Mark the given return value or argument as live. Additionally,
+/// mark any values that are used by this value (according to Uses) live as
+/// well.
+void DAE::MarkLive(const RetOrArg &RA) {
+ if (LiveFunctions.count(RA.F))
+ return; // Function was already marked Live.
+
+ if (!LiveValues.insert(RA).second)
+ return; // We were already marked Live.
+
+ DOUT << "DAE - Marking " << RA.getDescription() << " live\n";
+ PropagateLiveness(RA);
+}
+
+/// PropagateLiveness - Given that RA is a live value, propagate it's liveness
+/// to any other values it uses (according to Uses).
+void DAE::PropagateLiveness(const RetOrArg &RA) {
+ // We don't use upper_bound (or equal_range) here, because our recursive call
+ // to ourselves is likely to cause the upper_bound (which is the first value
+ // not belonging to RA) to become erased and the iterator invalidated.
+ UseMap::iterator Begin = Uses.lower_bound(RA);
+ UseMap::iterator E = Uses.end();
+ UseMap::iterator I;
+ for (I = Begin; I != E && I->first == RA; ++I)
+ MarkLive(I->second);
+
+ // Erase RA from the Uses map (from the lower bound to wherever we ended up
+ // after the loop).
+ Uses.erase(Begin, I);
+}
+
+// RemoveDeadStuffFromFunction - Remove any arguments and return values from F
+// that are not in LiveValues. Transform the function and all of the callees of
+// the function to not have these arguments and return values.
+//
+bool DAE::RemoveDeadStuffFromFunction(Function *F) {
+ // Don't modify fully live functions
+ if (LiveFunctions.count(F))
+ return false;
+
+ // Start by computing a new prototype for the function, which is the same as
+ // the old function, but has fewer arguments and a different return type.
+ const FunctionType *FTy = F->getFunctionType();
+ std::vector<const Type*> Params;
+
+ // Set up to build a new list of parameter attributes.
+ SmallVector<AttributeWithIndex, 8> AttributesVec;
+ const AttrListPtr &PAL = F->getAttributes();
+
+ // The existing function return attributes.
+ Attributes RAttrs = PAL.getRetAttributes();
+ Attributes FnAttrs = PAL.getFnAttributes();
+
+ // Find out the new return value.
+
+ const Type *RetTy = FTy->getReturnType();
+ const Type *NRetTy = NULL;
+ unsigned RetCount = NumRetVals(F);
+ // -1 means unused, other numbers are the new index
+ SmallVector<int, 5> NewRetIdxs(RetCount, -1);
+ std::vector<const Type*> RetTypes;
+ if (RetTy == Type::VoidTy) {
+ NRetTy = Type::VoidTy;
+ } else {
+ const StructType *STy = dyn_cast<StructType>(RetTy);
+ if (STy)
+ // Look at each of the original return values individually.
+ for (unsigned i = 0; i != RetCount; ++i) {
+ RetOrArg Ret = CreateRet(F, i);
+ if (LiveValues.erase(Ret)) {
+ RetTypes.push_back(STy->getElementType(i));
+ NewRetIdxs[i] = RetTypes.size() - 1;
+ } else {
+ ++NumRetValsEliminated;
+ DOUT << "DAE - Removing return value " << i << " from "
+ << F->getNameStart() << "\n";
+ }
+ }
+ else
+ // We used to return a single value.
+ if (LiveValues.erase(CreateRet(F, 0))) {
+ RetTypes.push_back(RetTy);
+ NewRetIdxs[0] = 0;
+ } else {
+ DOUT << "DAE - Removing return value from " << F->getNameStart()
+ << "\n";
+ ++NumRetValsEliminated;
+ }
+ if (RetTypes.size() > 1)
+ // More than one return type? Return a struct with them. Also, if we used
+ // to return a struct and didn't change the number of return values,
+ // return a struct again. This prevents changing {something} into
+ // something and {} into void.
+ // Make the new struct packed if we used to return a packed struct
+ // already.
+ NRetTy = StructType::get(RetTypes, STy->isPacked());
+ else if (RetTypes.size() == 1)
+ // One return type? Just a simple value then, but only if we didn't use to
+ // return a struct with that simple value before.
+ NRetTy = RetTypes.front();
+ else if (RetTypes.size() == 0)
+ // No return types? Make it void, but only if we didn't use to return {}.
+ NRetTy = Type::VoidTy;
+ }
+
+ assert(NRetTy && "No new return type found?");
+
+ // Remove any incompatible attributes, but only if we removed all return
+ // values. Otherwise, ensure that we don't have any conflicting attributes
+ // here. Currently, this should not be possible, but special handling might be
+ // required when new return value attributes are added.
+ if (NRetTy == Type::VoidTy)
+ RAttrs &= ~Attribute::typeIncompatible(NRetTy);
+ else
+ assert((RAttrs & Attribute::typeIncompatible(NRetTy)) == 0
+ && "Return attributes no longer compatible?");
+
+ if (RAttrs)
+ AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs));
+
+ // Remember which arguments are still alive.
+ SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false);
+ // Construct the new parameter list from non-dead arguments. Also construct
+ // a new set of parameter attributes to correspond. Skip the first parameter
+ // attribute, since that belongs to the return value.
+ unsigned i = 0;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++i) {
+ RetOrArg Arg = CreateArg(F, i);
+ if (LiveValues.erase(Arg)) {
+ Params.push_back(I->getType());
+ ArgAlive[i] = true;
+
+ // Get the original parameter attributes (skipping the first one, that is
+ // for the return value.
+ if (Attributes Attrs = PAL.getParamAttributes(i + 1))
+ AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs));
+ } else {
+ ++NumArgumentsEliminated;
+ DOUT << "DAE - Removing argument " << i << " (" << I->getNameStart()
+ << ") from " << F->getNameStart() << "\n";
+ }
+ }
+
+ if (FnAttrs != Attribute::None)
+ AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+ // Reconstruct the AttributesList based on the vector we constructed.
+ AttrListPtr NewPAL = AttrListPtr::get(AttributesVec.begin(), AttributesVec.end());
+
+ // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which
+ // have zero fixed arguments.
+ //
+ // Note that we apply this hack for a vararg fuction that does not have any
+ // arguments anymore, but did have them before (so don't bother fixing
+ // functions that were already broken wrt CWriter).
+ bool ExtraArgHack = false;
+ if (Params.empty() && FTy->isVarArg() && FTy->getNumParams() != 0) {
+ ExtraArgHack = true;
+ Params.push_back(Type::Int32Ty);
+ }
+
+ // Create the new function type based on the recomputed parameters.
+ FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg());
+
+ // No change?
+ if (NFTy == FTy)
+ return false;
+
+ // Create the new function body and insert it into the module...
+ Function *NF = Function::Create(NFTy, F->getLinkage());
+ NF->copyAttributesFrom(F);
+ NF->setAttributes(NewPAL);
+ // Insert the new function before the old function, so we won't be processing
+ // it again.
+ F->getParent()->getFunctionList().insert(F, NF);
+ NF->takeName(F);
+
+ // Loop over all of the callers of the function, transforming the call sites
+ // to pass in a smaller number of arguments into the new function.
+ //
+ std::vector<Value*> Args;
+ while (!F->use_empty()) {
+ CallSite CS = CallSite::get(F->use_back());
+ Instruction *Call = CS.getInstruction();
+
+ AttributesVec.clear();
+ const AttrListPtr &CallPAL = CS.getAttributes();
+
+ // The call return attributes.
+ Attributes RAttrs = CallPAL.getRetAttributes();
+ Attributes FnAttrs = CallPAL.getFnAttributes();
+ // Adjust in case the function was changed to return void.
+ RAttrs &= ~Attribute::typeIncompatible(NF->getReturnType());
+ if (RAttrs)
+ AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs));
+
+ // Declare these outside of the loops, so we can reuse them for the second
+ // loop, which loops the varargs.
+ CallSite::arg_iterator I = CS.arg_begin();
+ unsigned i = 0;
+ // Loop over those operands, corresponding to the normal arguments to the
+ // original function, and add those that are still alive.
+ for (unsigned e = FTy->getNumParams(); i != e; ++I, ++i)
+ if (ArgAlive[i]) {
+ Args.push_back(*I);
+ // Get original parameter attributes, but skip return attributes.
+ if (Attributes Attrs = CallPAL.getParamAttributes(i + 1))
+ AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+ }
+
+ if (ExtraArgHack)
+ Args.push_back(UndefValue::get(Type::Int32Ty));
+
+ // Push any varargs arguments on the list. Don't forget their attributes.
+ for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) {
+ Args.push_back(*I);
+ if (Attributes Attrs = CallPAL.getParamAttributes(i + 1))
+ AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+ }
+
+ if (FnAttrs != Attribute::None)
+ AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+ // Reconstruct the AttributesList based on the vector we constructed.
+ AttrListPtr NewCallPAL = AttrListPtr::get(AttributesVec.begin(),
+ AttributesVec.end());
+
+ Instruction *New;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args.begin(), Args.end(), "", Call);
+ cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<InvokeInst>(New)->setAttributes(NewCallPAL);
+ } else {
+ New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
+ cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<CallInst>(New)->setAttributes(NewCallPAL);
+ if (cast<CallInst>(Call)->isTailCall())
+ cast<CallInst>(New)->setTailCall();
+ }
+ Args.clear();
+
+ if (!Call->use_empty()) {
+ if (New->getType() == Call->getType()) {
+ // Return type not changed? Just replace users then.
+ Call->replaceAllUsesWith(New);
+ New->takeName(Call);
+ } else if (New->getType() == Type::VoidTy) {
+ // Our return value has uses, but they will get removed later on.
+ // Replace by null for now.
+ Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
+ } else {
+ assert(isa<StructType>(RetTy) &&
+ "Return type changed, but not into a void. The old return type"
+ " must have been a struct!");
+ Instruction *InsertPt = Call;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ BasicBlock::iterator IP = II->getNormalDest()->begin();
+ while (isa<PHINode>(IP)) ++IP;
+ InsertPt = IP;
+ }
+
+ // We used to return a struct. Instead of doing smart stuff with all the
+ // uses of this struct, we will just rebuild it using
+ // extract/insertvalue chaining and let instcombine clean that up.
+ //
+ // Start out building up our return value from undef
+ Value *RetVal = llvm::UndefValue::get(RetTy);
+ for (unsigned i = 0; i != RetCount; ++i)
+ if (NewRetIdxs[i] != -1) {
+ Value *V;
+ if (RetTypes.size() > 1)
+ // We are still returning a struct, so extract the value from our
+ // return value
+ V = ExtractValueInst::Create(New, NewRetIdxs[i], "newret",
+ InsertPt);
+ else
+ // We are now returning a single element, so just insert that
+ V = New;
+ // Insert the value at the old position
+ RetVal = InsertValueInst::Create(RetVal, V, i, "oldret", InsertPt);
+ }
+ // Now, replace all uses of the old call instruction with the return
+ // struct we built
+ Call->replaceAllUsesWith(RetVal);
+ New->takeName(Call);
+ }
+ }
+
+ // Finally, remove the old call from the program, reducing the use-count of
+ // F.
+ Call->eraseFromParent();
+ }
+
+ // Since we have now created the new function, splice the body of the old
+ // function right into the new function, leaving the old rotting hulk of the
+ // function empty.
+ NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+
+ // Loop over the argument list, transfering uses of the old arguments over to
+ // the new arguments, also transfering over the names as well.
+ i = 0;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
+ I2 = NF->arg_begin(); I != E; ++I, ++i)
+ if (ArgAlive[i]) {
+ // If this is a live argument, move the name and users over to the new
+ // version.
+ I->replaceAllUsesWith(I2);
+ I2->takeName(I);
+ ++I2;
+ } else {
+ // If this argument is dead, replace any uses of it with null constants
+ // (these are guaranteed to become unused later on).
+ I->replaceAllUsesWith(Constant::getNullValue(I->getType()));
+ }
+
+ // If we change the return value of the function we must rewrite any return
+ // instructions. Check this now.
+ if (F->getReturnType() != NF->getReturnType())
+ for (Function::iterator BB = NF->begin(), E = NF->end(); BB != E; ++BB)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ Value *RetVal;
+
+ if (NFTy->getReturnType() == Type::VoidTy) {
+ RetVal = 0;
+ } else {
+ assert (isa<StructType>(RetTy));
+ // The original return value was a struct, insert
+ // extractvalue/insertvalue chains to extract only the values we need
+ // to return and insert them into our new result.
+ // This does generate messy code, but we'll let it to instcombine to
+ // clean that up.
+ Value *OldRet = RI->getOperand(0);
+ // Start out building up our return value from undef
+ RetVal = llvm::UndefValue::get(NRetTy);
+ for (unsigned i = 0; i != RetCount; ++i)
+ if (NewRetIdxs[i] != -1) {
+ ExtractValueInst *EV = ExtractValueInst::Create(OldRet, i,
+ "oldret", RI);
+ if (RetTypes.size() > 1) {
+ // We're still returning a struct, so reinsert the value into
+ // our new return value at the new index
+
+ RetVal = InsertValueInst::Create(RetVal, EV, NewRetIdxs[i],
+ "newret", RI);
+ } else {
+ // We are now only returning a simple value, so just return the
+ // extracted value.
+ RetVal = EV;
+ }
+ }
+ }
+ // Replace the return instruction with one returning the new return
+ // value (possibly 0 if we became void).
+ ReturnInst::Create(RetVal, RI);
+ BB->getInstList().erase(RI);
+ }
+
+ // Now that the old function is dead, delete it.
+ F->eraseFromParent();
+
+ return true;
+}
+
+bool DAE::runOnModule(Module &M) {
+ bool Changed = false;
+
+ // First pass: Do a simple check to see if any functions can have their "..."
+ // removed. We can do this if they never call va_start. This loop cannot be
+ // fused with the next loop, because deleting a function invalidates
+ // information computed while surveying other functions.
+ DOUT << "DAE - Deleting dead varargs\n";
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+ Function &F = *I++;
+ if (F.getFunctionType()->isVarArg())
+ Changed |= DeleteDeadVarargs(F);
+ }
+
+ // Second phase:loop through the module, determining which arguments are live.
+ // We assume all arguments are dead unless proven otherwise (allowing us to
+ // determine that dead arguments passed into recursive functions are dead).
+ //
+ DOUT << "DAE - Determining liveness\n";
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ SurveyFunction(*I);
+
+ // Now, remove all dead arguments and return values from each function in
+ // turn
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+ // Increment now, because the function will probably get removed (ie
+ // replaced by a new one).
+ Function *F = I++;
+ Changed |= RemoveDeadStuffFromFunction(F);
+ }
+ return Changed;
+}
diff --git a/lib/Transforms/IPO/DeadTypeElimination.cpp b/lib/Transforms/IPO/DeadTypeElimination.cpp
new file mode 100644
index 0000000..85aed2b
--- /dev/null
+++ b/lib/Transforms/IPO/DeadTypeElimination.cpp
@@ -0,0 +1,107 @@
+//===- DeadTypeElimination.cpp - Eliminate unused types for symbol table --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is used to cleanup the output of GCC. It eliminate names for types
+// that are unused in the entire translation unit, using the FindUsedTypes pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "deadtypeelim"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+using namespace llvm;
+
+STATISTIC(NumKilled, "Number of unused typenames removed from symtab");
+
+namespace {
+ struct VISIBILITY_HIDDEN DTE : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ DTE() : ModulePass(&ID) {}
+
+ // doPassInitialization - For this pass, it removes global symbol table
+ // entries for primitive types. These are never used for linking in GCC and
+ // they make the output uglier to look at, so we nuke them.
+ //
+ // Also, initialize instance variables.
+ //
+ bool runOnModule(Module &M);
+
+ // getAnalysisUsage - This function needs FindUsedTypes to do its job...
+ //
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<FindUsedTypes>();
+ }
+ };
+}
+
+char DTE::ID = 0;
+static RegisterPass<DTE> X("deadtypeelim", "Dead Type Elimination");
+
+ModulePass *llvm::createDeadTypeEliminationPass() {
+ return new DTE();
+}
+
+
+// ShouldNukeSymtabEntry - Return true if this module level symbol table entry
+// should be eliminated.
+//
+static inline bool ShouldNukeSymtabEntry(const Type *Ty){
+ // Nuke all names for primitive types!
+ if (Ty->isPrimitiveType() || Ty->isInteger())
+ return true;
+
+ // Nuke all pointers to primitive types as well...
+ if (const PointerType *PT = dyn_cast<PointerType>(Ty))
+ if (PT->getElementType()->isPrimitiveType() ||
+ PT->getElementType()->isInteger())
+ return true;
+
+ return false;
+}
+
+// run - For this pass, it removes global symbol table entries for primitive
+// types. These are never used for linking in GCC and they make the output
+// uglier to look at, so we nuke them. Also eliminate types that are never used
+// in the entire program as indicated by FindUsedTypes.
+//
+bool DTE::runOnModule(Module &M) {
+ bool Changed = false;
+
+ TypeSymbolTable &ST = M.getTypeSymbolTable();
+ std::set<const Type *> UsedTypes = getAnalysis<FindUsedTypes>().getTypes();
+
+ // Check the symbol table for superfluous type entries...
+ //
+ // Grab the 'type' plane of the module symbol...
+ TypeSymbolTable::iterator TI = ST.begin();
+ TypeSymbolTable::iterator TE = ST.end();
+ while ( TI != TE ) {
+ // If this entry should be unconditionally removed, or if we detect that
+ // the type is not used, remove it.
+ const Type *RHS = TI->second;
+ if (ShouldNukeSymtabEntry(RHS) || !UsedTypes.count(RHS)) {
+ ST.remove(TI++);
+ ++NumKilled;
+ Changed = true;
+ } else {
+ ++TI;
+ // We only need to leave one name for each type.
+ UsedTypes.erase(RHS);
+ }
+ }
+
+ return Changed;
+}
+
+// vim: sw=2
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
new file mode 100644
index 0000000..0c529d2
--- /dev/null
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -0,0 +1,173 @@
+//===-- ExtractGV.cpp - Global Value extraction pass ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass extracts global values
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Constants.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Support/Compiler.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+ /// @brief A pass to extract specific functions and their dependencies.
+ class VISIBILITY_HIDDEN GVExtractorPass : public ModulePass {
+ std::vector<GlobalValue*> Named;
+ bool deleteStuff;
+ bool reLink;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+
+ /// FunctionExtractorPass - If deleteFn is true, this pass deletes as the
+ /// specified function. Otherwise, it deletes as much of the module as
+ /// possible, except for the function specified.
+ ///
+ explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true,
+ bool relinkCallees = false)
+ : ModulePass(&ID), Named(GVs), deleteStuff(deleteS),
+ reLink(relinkCallees) {}
+
+ bool runOnModule(Module &M) {
+ if (Named.size() == 0) {
+ return false; // Nothing to extract
+ }
+
+ if (deleteStuff)
+ return deleteGV();
+ M.setModuleInlineAsm("");
+ return isolateGV(M);
+ }
+
+ bool deleteGV() {
+ for (std::vector<GlobalValue*>::iterator GI = Named.begin(),
+ GE = Named.end(); GI != GE; ++GI) {
+ if (Function* NamedFunc = dyn_cast<Function>(*GI)) {
+ // If we're in relinking mode, set linkage of all internal callees to
+ // external. This will allow us extract function, and then - link
+ // everything together
+ if (reLink) {
+ for (Function::iterator B = NamedFunc->begin(), BE = NamedFunc->end();
+ B != BE; ++B) {
+ for (BasicBlock::iterator I = B->begin(), E = B->end();
+ I != E; ++I) {
+ if (CallInst* callInst = dyn_cast<CallInst>(&*I)) {
+ Function* Callee = callInst->getCalledFunction();
+ if (Callee && Callee->hasLocalLinkage())
+ Callee->setLinkage(GlobalValue::ExternalLinkage);
+ }
+ }
+ }
+ }
+
+ NamedFunc->setLinkage(GlobalValue::ExternalLinkage);
+ NamedFunc->deleteBody();
+ assert(NamedFunc->isDeclaration() && "This didn't make the function external!");
+ } else {
+ if (!(*GI)->isDeclaration()) {
+ cast<GlobalVariable>(*GI)->setInitializer(0); //clear the initializer
+ (*GI)->setLinkage(GlobalValue::ExternalLinkage);
+ }
+ }
+ }
+ return true;
+ }
+
+ bool isolateGV(Module &M) {
+ // Mark all globals internal
+ // FIXME: what should we do with private linkage?
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I)
+ if (!I->isDeclaration()) {
+ I->setLinkage(GlobalValue::InternalLinkage);
+ }
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!I->isDeclaration()) {
+ I->setLinkage(GlobalValue::InternalLinkage);
+ }
+
+ // Make sure our result is globally accessible...
+ // by putting them in the used array
+ {
+ std::vector<Constant *> AUGs;
+ const Type *SBP= PointerType::getUnqual(Type::Int8Ty);
+ for (std::vector<GlobalValue*>::iterator GI = Named.begin(),
+ GE = Named.end(); GI != GE; ++GI) {
+ (*GI)->setLinkage(GlobalValue::ExternalLinkage);
+ AUGs.push_back(ConstantExpr::getBitCast(*GI, SBP));
+ }
+ ArrayType *AT = ArrayType::get(SBP, AUGs.size());
+ Constant *Init = ConstantArray::get(AT, AUGs);
+ GlobalValue *gv = new GlobalVariable(AT, false,
+ GlobalValue::AppendingLinkage,
+ Init, "llvm.used", &M);
+ gv->setSection("llvm.metadata");
+ }
+
+ // All of the functions may be used by global variables or the named
+ // globals. Loop through them and create a new, external functions that
+ // can be "used", instead of ones with bodies.
+ std::vector<Function*> NewFunctions;
+
+ Function *Last = --M.end(); // Figure out where the last real fn is.
+
+ for (Module::iterator I = M.begin(); ; ++I) {
+ if (std::find(Named.begin(), Named.end(), &*I) == Named.end()) {
+ Function *New = Function::Create(I->getFunctionType(),
+ GlobalValue::ExternalLinkage);
+ New->copyAttributesFrom(I);
+
+ // If it's not the named function, delete the body of the function
+ I->dropAllReferences();
+
+ M.getFunctionList().push_back(New);
+ NewFunctions.push_back(New);
+ New->takeName(I);
+ }
+
+ if (&*I == Last) break; // Stop after processing the last function
+ }
+
+ // Now that we have replacements all set up, loop through the module,
+ // deleting the old functions, replacing them with the newly created
+ // functions.
+ if (!NewFunctions.empty()) {
+ unsigned FuncNum = 0;
+ Module::iterator I = M.begin();
+ do {
+ if (std::find(Named.begin(), Named.end(), &*I) == Named.end()) {
+ // Make everything that uses the old function use the new dummy fn
+ I->replaceAllUsesWith(NewFunctions[FuncNum++]);
+
+ Function *Old = I;
+ ++I; // Move the iterator to the new function
+
+ // Delete the old function!
+ M.getFunctionList().erase(Old);
+
+ } else {
+ ++I; // Skip the function we are extracting
+ }
+ } while (&*I != NewFunctions[0]);
+ }
+
+ return true;
+ }
+ };
+
+ char GVExtractorPass::ID = 0;
+}
+
+ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue*>& GVs,
+ bool deleteFn, bool relinkCallees) {
+ return new GVExtractorPass(GVs, deleteFn, relinkCallees);
+}
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
new file mode 100644
index 0000000..e831524
--- /dev/null
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -0,0 +1,347 @@
+//===- FunctionAttrs.cpp - Pass which marks functions readnone or readonly ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple interprocedural pass which walks the
+// call-graph, looking for functions which do not access or only read
+// non-local memory, and marking them readnone/readonly. In addition,
+// it marks function arguments (of pointer type) 'nocapture' if a call
+// to the function does not create any copies of the pointer value that
+// outlive the call. This more or less means that the pointer is only
+// dereferenced, and not returned from the function or stored in a global.
+// This pass is implemented as a bottom-up traversal of the call-graph.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "functionattrs"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/InstIterator.h"
+using namespace llvm;
+
+STATISTIC(NumReadNone, "Number of functions marked readnone");
+STATISTIC(NumReadOnly, "Number of functions marked readonly");
+STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
+STATISTIC(NumNoAlias, "Number of function returns marked noalias");
+
+namespace {
+ struct VISIBILITY_HIDDEN FunctionAttrs : public CallGraphSCCPass {
+ static char ID; // Pass identification, replacement for typeid
+ FunctionAttrs() : CallGraphSCCPass(&ID) {}
+
+ // runOnSCC - Analyze the SCC, performing the transformation if possible.
+ bool runOnSCC(const std::vector<CallGraphNode *> &SCC);
+
+ // AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
+ bool AddReadAttrs(const std::vector<CallGraphNode *> &SCC);
+
+ // AddNoCaptureAttrs - Deduce nocapture attributes for the SCC.
+ bool AddNoCaptureAttrs(const std::vector<CallGraphNode *> &SCC);
+
+ // IsFunctionMallocLike - Does this function allocate new memory?
+ bool IsFunctionMallocLike(Function *F,
+ SmallPtrSet<CallGraphNode*, 8> &) const;
+
+ // AddNoAliasAttrs - Deduce noalias attributes for the SCC.
+ bool AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ CallGraphSCCPass::getAnalysisUsage(AU);
+ }
+
+ bool PointsToLocalMemory(Value *V);
+ };
+}
+
+char FunctionAttrs::ID = 0;
+static RegisterPass<FunctionAttrs>
+X("functionattrs", "Deduce function attributes");
+
+Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
+
+
+/// PointsToLocalMemory - Returns whether the given pointer value points to
+/// memory that is local to the function. Global constants are considered
+/// local to all functions.
+bool FunctionAttrs::PointsToLocalMemory(Value *V) {
+ V = V->getUnderlyingObject();
+ // An alloca instruction defines local memory.
+ if (isa<AllocaInst>(V))
+ return true;
+ // A global constant counts as local memory for our purposes.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ return GV->isConstant();
+ // Could look through phi nodes and selects here, but it doesn't seem
+ // to be useful in practice.
+ return false;
+}
+
+/// AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
+bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) {
+ SmallPtrSet<CallGraphNode*, 8> SCCNodes;
+ CallGraph &CG = getAnalysis<CallGraph>();
+
+ // Fill SCCNodes with the elements of the SCC. Used for quickly
+ // looking up whether a given CallGraphNode is in this SCC.
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i)
+ SCCNodes.insert(SCC[i]);
+
+ // Check if any of the functions in the SCC read or write memory. If they
+ // write memory then they can't be marked readnone or readonly.
+ bool ReadsMemory = false;
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
+ Function *F = SCC[i]->getFunction();
+
+ if (F == 0)
+ // External node - may write memory. Just give up.
+ return false;
+
+ if (F->doesNotAccessMemory())
+ // Already perfect!
+ continue;
+
+ // Definitions with weak linkage may be overridden at linktime with
+ // something that writes memory, so treat them like declarations.
+ if (F->isDeclaration() || F->mayBeOverridden()) {
+ if (!F->onlyReadsMemory())
+ // May write memory. Just give up.
+ return false;
+
+ ReadsMemory = true;
+ continue;
+ }
+
+ // Scan the function body for instructions that may read or write memory.
+ for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
+ Instruction *I = &*II;
+
+ // Some instructions can be ignored even if they read or write memory.
+ // Detect these now, skipping to the next instruction if one is found.
+ CallSite CS = CallSite::get(I);
+ if (CS.getInstruction()) {
+ // Ignore calls to functions in the same SCC.
+ if (SCCNodes.count(CG[CS.getCalledFunction()]))
+ continue;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ // Ignore loads from local memory.
+ if (PointsToLocalMemory(LI->getPointerOperand()))
+ continue;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Ignore stores to local memory.
+ if (PointsToLocalMemory(SI->getPointerOperand()))
+ continue;
+ }
+
+ // Any remaining instructions need to be taken seriously! Check if they
+ // read or write memory.
+ if (I->mayWriteToMemory())
+ // Writes memory. Just give up.
+ return false;
+
+ if (isa<MallocInst>(I))
+ // MallocInst claims not to write memory! PR3754.
+ return false;
+
+ // If this instruction may read memory, remember that.
+ ReadsMemory |= I->mayReadFromMemory();
+ }
+ }
+
+ // Success! Functions in this SCC do not access memory, or only read memory.
+ // Give them the appropriate attribute.
+ bool MadeChange = false;
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
+ Function *F = SCC[i]->getFunction();
+
+ if (F->doesNotAccessMemory())
+ // Already perfect!
+ continue;
+
+ if (F->onlyReadsMemory() && ReadsMemory)
+ // No change.
+ continue;
+
+ MadeChange = true;
+
+ // Clear out any existing attributes.
+ F->removeAttribute(~0, Attribute::ReadOnly | Attribute::ReadNone);
+
+ // Add in the new attribute.
+ F->addAttribute(~0, ReadsMemory? Attribute::ReadOnly : Attribute::ReadNone);
+
+ if (ReadsMemory)
+ ++NumReadOnly;
+ else
+ ++NumReadNone;
+ }
+
+ return MadeChange;
+}
+
+/// AddNoCaptureAttrs - Deduce nocapture attributes for the SCC.
+bool FunctionAttrs::AddNoCaptureAttrs(const std::vector<CallGraphNode *> &SCC) {
+ bool Changed = false;
+
+ // Check each function in turn, determining which pointer arguments are not
+ // captured.
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
+ Function *F = SCC[i]->getFunction();
+
+ if (F == 0)
+ // External node - skip it;
+ continue;
+
+ // Definitions with weak linkage may be overridden at linktime with
+ // something that writes memory, so treat them like declarations.
+ if (F->isDeclaration() || F->mayBeOverridden())
+ continue;
+
+ for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A!=E; ++A)
+ if (isa<PointerType>(A->getType()) && !A->hasNoCaptureAttr() &&
+ !PointerMayBeCaptured(A, true)) {
+ A->addAttr(Attribute::NoCapture);
+ ++NumNoCapture;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// IsFunctionMallocLike - A function is malloc-like if it returns either null
+/// or a pointer that doesn't alias any other pointer visible to the caller.
+bool FunctionAttrs::IsFunctionMallocLike(Function *F,
+ SmallPtrSet<CallGraphNode*, 8> &SCCNodes) const {
+ CallGraph &CG = getAnalysis<CallGraph>();
+
+ UniqueVector<Value *> FlowsToReturn;
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)
+ if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator()))
+ FlowsToReturn.insert(Ret->getReturnValue());
+
+ for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
+ Value *RetVal = FlowsToReturn[i+1]; // UniqueVector[0] is reserved.
+
+ if (Constant *C = dyn_cast<Constant>(RetVal)) {
+ if (!C->isNullValue() && !isa<UndefValue>(C))
+ return false;
+
+ continue;
+ }
+
+ if (isa<Argument>(RetVal))
+ return false;
+
+ if (Instruction *RVI = dyn_cast<Instruction>(RetVal))
+ switch (RVI->getOpcode()) {
+ // Extend the analysis by looking upwards.
+ case Instruction::GetElementPtr:
+ case Instruction::BitCast:
+ FlowsToReturn.insert(RVI->getOperand(0));
+ continue;
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(RVI);
+ FlowsToReturn.insert(SI->getTrueValue());
+ FlowsToReturn.insert(SI->getFalseValue());
+ } continue;
+ case Instruction::PHI: {
+ PHINode *PN = cast<PHINode>(RVI);
+ for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ FlowsToReturn.insert(PN->getIncomingValue(i));
+ } continue;
+
+ // Check whether the pointer came from an allocation.
+ case Instruction::Alloca:
+ case Instruction::Malloc:
+ break;
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ CallSite CS(RVI);
+ if (CS.paramHasAttr(0, Attribute::NoAlias))
+ break;
+ if (CS.getCalledFunction() &&
+ SCCNodes.count(CG[CS.getCalledFunction()]))
+ break;
+ } // fall-through
+ default:
+ return false; // Did not come from an allocation.
+ }
+
+ if (PointerMayBeCaptured(RetVal, false))
+ return false;
+ }
+
+ return true;
+}
+
+/// AddNoAliasAttrs - Deduce noalias attributes for the SCC.
+bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) {
+ SmallPtrSet<CallGraphNode*, 8> SCCNodes;
+
+ // Fill SCCNodes with the elements of the SCC. Used for quickly
+ // looking up whether a given CallGraphNode is in this SCC.
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i)
+ SCCNodes.insert(SCC[i]);
+
+ // Check each function in turn, determining which functions return noalias
+ // pointers.
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
+ Function *F = SCC[i]->getFunction();
+
+ if (F == 0)
+ // External node - skip it;
+ return false;
+
+ // Already noalias.
+ if (F->doesNotAlias(0))
+ continue;
+
+ // Definitions with weak linkage may be overridden at linktime, so
+ // treat them like declarations.
+ if (F->isDeclaration() || F->mayBeOverridden())
+ return false;
+
+ // We annotate noalias return values, which are only applicable to
+ // pointer types.
+ if (!isa<PointerType>(F->getReturnType()))
+ continue;
+
+ if (!IsFunctionMallocLike(F, SCCNodes))
+ return false;
+ }
+
+ bool MadeChange = false;
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
+ Function *F = SCC[i]->getFunction();
+ if (F->doesNotAlias(0) || !isa<PointerType>(F->getReturnType()))
+ continue;
+
+ F->setDoesNotAlias(0);
+ ++NumNoAlias;
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+bool FunctionAttrs::runOnSCC(const std::vector<CallGraphNode *> &SCC) {
+ bool Changed = AddReadAttrs(SCC);
+ Changed |= AddNoCaptureAttrs(SCC);
+ Changed |= AddNoAliasAttrs(SCC);
+ return Changed;
+}
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
new file mode 100644
index 0000000..db378b0
--- /dev/null
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -0,0 +1,227 @@
+//===-- GlobalDCE.cpp - DCE unreachable internal functions ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transform is designed to eliminate unreachable internal globals from the
+// program. It uses an aggressive algorithm, searching out globals that are
+// known to be alive. After it finds all of the globals which are needed, it
+// deletes whatever is left over. This allows it to delete recursive chunks of
+// the program which are unreachable.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "globaldce"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumAliases , "Number of global aliases removed");
+STATISTIC(NumFunctions, "Number of functions removed");
+STATISTIC(NumVariables, "Number of global variables removed");
+
+namespace {
+ struct VISIBILITY_HIDDEN GlobalDCE : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ GlobalDCE() : ModulePass(&ID) {}
+
+ // run - Do the GlobalDCE pass on the specified module, optionally updating
+ // the specified callgraph to reflect the changes.
+ //
+ bool runOnModule(Module &M);
+
+ private:
+ std::set<GlobalValue*> AliveGlobals;
+
+ /// GlobalIsNeeded - mark the specific global value as needed, and
+ /// recursively mark anything that it uses as also needed.
+ void GlobalIsNeeded(GlobalValue *GV);
+ void MarkUsedGlobalsAsNeeded(Constant *C);
+
+ bool SafeToDestroyConstant(Constant* C);
+ bool RemoveUnusedGlobalValue(GlobalValue &GV);
+ };
+}
+
+char GlobalDCE::ID = 0;
+static RegisterPass<GlobalDCE> X("globaldce", "Dead Global Elimination");
+
+ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); }
+
+bool GlobalDCE::runOnModule(Module &M) {
+ bool Changed = false;
+ // Loop over the module, adding globals which are obviously necessary.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ Changed |= RemoveUnusedGlobalValue(*I);
+ // Functions with external linkage are needed if they have a body
+ if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage() &&
+ !I->isDeclaration() && !I->hasAvailableExternallyLinkage())
+ GlobalIsNeeded(I);
+ }
+
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ Changed |= RemoveUnusedGlobalValue(*I);
+ // Externally visible & appending globals are needed, if they have an
+ // initializer.
+ if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage() &&
+ !I->isDeclaration() && !I->hasAvailableExternallyLinkage())
+ GlobalIsNeeded(I);
+ }
+
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E; ++I) {
+ Changed |= RemoveUnusedGlobalValue(*I);
+ // Externally visible aliases are needed.
+ if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage())
+ GlobalIsNeeded(I);
+ }
+
+ // Now that all globals which are needed are in the AliveGlobals set, we loop
+ // through the program, deleting those which are not alive.
+ //
+
+ // The first pass is to drop initializers of global variables which are dead.
+ std::vector<GlobalVariable*> DeadGlobalVars; // Keep track of dead globals
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I)
+ if (!AliveGlobals.count(I)) {
+ DeadGlobalVars.push_back(I); // Keep track of dead globals
+ I->setInitializer(0);
+ }
+
+ // The second pass drops the bodies of functions which are dead...
+ std::vector<Function*> DeadFunctions;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!AliveGlobals.count(I)) {
+ DeadFunctions.push_back(I); // Keep track of dead globals
+ if (!I->isDeclaration())
+ I->deleteBody();
+ }
+
+ // The third pass drops targets of aliases which are dead...
+ std::vector<GlobalAlias*> DeadAliases;
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E;
+ ++I)
+ if (!AliveGlobals.count(I)) {
+ DeadAliases.push_back(I);
+ I->setAliasee(0);
+ }
+
+ if (!DeadFunctions.empty()) {
+ // Now that all interferences have been dropped, delete the actual objects
+ // themselves.
+ for (unsigned i = 0, e = DeadFunctions.size(); i != e; ++i) {
+ RemoveUnusedGlobalValue(*DeadFunctions[i]);
+ M.getFunctionList().erase(DeadFunctions[i]);
+ }
+ NumFunctions += DeadFunctions.size();
+ Changed = true;
+ }
+
+ if (!DeadGlobalVars.empty()) {
+ for (unsigned i = 0, e = DeadGlobalVars.size(); i != e; ++i) {
+ RemoveUnusedGlobalValue(*DeadGlobalVars[i]);
+ M.getGlobalList().erase(DeadGlobalVars[i]);
+ }
+ NumVariables += DeadGlobalVars.size();
+ Changed = true;
+ }
+
+ // Now delete any dead aliases.
+ if (!DeadAliases.empty()) {
+ for (unsigned i = 0, e = DeadAliases.size(); i != e; ++i) {
+ RemoveUnusedGlobalValue(*DeadAliases[i]);
+ M.getAliasList().erase(DeadAliases[i]);
+ }
+ NumAliases += DeadAliases.size();
+ Changed = true;
+ }
+
+ // Make sure that all memory is released
+ AliveGlobals.clear();
+ return Changed;
+}
+
+/// GlobalIsNeeded - the specific global value as needed, and
+/// recursively mark anything that it uses as also needed.
+void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
+ std::set<GlobalValue*>::iterator I = AliveGlobals.find(G);
+
+ // If the global is already in the set, no need to reprocess it.
+ if (I != AliveGlobals.end()) return;
+
+ // Otherwise insert it now, so we do not infinitely recurse
+ AliveGlobals.insert(I, G);
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) {
+ // If this is a global variable, we must make sure to add any global values
+ // referenced by the initializer to the alive set.
+ if (GV->hasInitializer())
+ MarkUsedGlobalsAsNeeded(GV->getInitializer());
+ } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(G)) {
+ // The target of a global alias is needed.
+ MarkUsedGlobalsAsNeeded(GA->getAliasee());
+ } else {
+ // Otherwise this must be a function object. We have to scan the body of
+ // the function looking for constants and global values which are used as
+ // operands. Any operands of these types must be processed to ensure that
+ // any globals used will be marked as needed.
+ Function *F = cast<Function>(G);
+ // For all basic blocks...
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ // For all instructions...
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ // For all operands...
+ for (User::op_iterator U = I->op_begin(), E = I->op_end(); U != E; ++U)
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(*U))
+ GlobalIsNeeded(GV);
+ else if (Constant *C = dyn_cast<Constant>(*U))
+ MarkUsedGlobalsAsNeeded(C);
+ }
+}
+
+void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ GlobalIsNeeded(GV);
+ else {
+ // Loop over all of the operands of the constant, adding any globals they
+ // use to the list of needed globals.
+ for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I)
+ MarkUsedGlobalsAsNeeded(cast<Constant>(*I));
+ }
+}
+
+// RemoveUnusedGlobalValue - Loop over all of the uses of the specified
+// GlobalValue, looking for the constant pointer ref that may be pointing to it.
+// If found, check to see if the constant pointer ref is safe to destroy, and if
+// so, nuke it. This will reduce the reference count on the global value, which
+// might make it deader.
+//
+bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) {
+ if (GV.use_empty()) return false;
+ GV.removeDeadConstantUsers();
+ return GV.use_empty();
+}
+
+// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
+// by constants itself. Note that constants cannot be cyclic, so this test is
+// pretty easy to implement recursively.
+//
+bool GlobalDCE::SafeToDestroyConstant(Constant *C) {
+ for (Value::use_iterator I = C->use_begin(), E = C->use_end(); I != E; ++I)
+ if (Constant *User = dyn_cast<Constant>(*I)) {
+ if (!SafeToDestroyConstant(User)) return false;
+ } else {
+ return false;
+ }
+ return true;
+}
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
new file mode 100644
index 0000000..2c01cc3
--- /dev/null
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -0,0 +1,2485 @@
+//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms simple global variables that never have their address
+// taken. If obviously true, it marks read/write globals as constant, deletes
+// variables only stored to, etc.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "globalopt"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumMarked , "Number of globals marked constant");
+STATISTIC(NumSRA , "Number of aggregate globals broken into scalars");
+STATISTIC(NumHeapSRA , "Number of heap objects SRA'd");
+STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
+STATISTIC(NumDeleted , "Number of globals deleted");
+STATISTIC(NumFnDeleted , "Number of functions deleted");
+STATISTIC(NumGlobUses , "Number of global uses devirtualized");
+STATISTIC(NumLocalized , "Number of globals localized");
+STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans");
+STATISTIC(NumFastCallFns , "Number of functions converted to fastcc");
+STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
+STATISTIC(NumNestRemoved , "Number of nest attributes removed");
+STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
+STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
+
+namespace {
+ struct VISIBILITY_HIDDEN GlobalOpt : public ModulePass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetData>();
+ }
+ static char ID; // Pass identification, replacement for typeid
+ GlobalOpt() : ModulePass(&ID) {}
+
+ bool runOnModule(Module &M);
+
+ private:
+ GlobalVariable *FindGlobalCtors(Module &M);
+ bool OptimizeFunctions(Module &M);
+ bool OptimizeGlobalVars(Module &M);
+ bool OptimizeGlobalAliases(Module &M);
+ bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
+ bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
+ };
+}
+
+char GlobalOpt::ID = 0;
+static RegisterPass<GlobalOpt> X("globalopt", "Global Variable Optimizer");
+
+ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
+
+namespace {
+
+/// GlobalStatus - As we analyze each global, keep track of some information
+/// about it. If we find out that the address of the global is taken, none of
+/// this info will be accurate.
+struct VISIBILITY_HIDDEN GlobalStatus {
+ /// isLoaded - True if the global is ever loaded. If the global isn't ever
+ /// loaded it can be deleted.
+ bool isLoaded;
+
+ /// StoredType - Keep track of what stores to the global look like.
+ ///
+ enum StoredType {
+ /// NotStored - There is no store to this global. It can thus be marked
+ /// constant.
+ NotStored,
+
+ /// isInitializerStored - This global is stored to, but the only thing
+ /// stored is the constant it was initialized with. This is only tracked
+ /// for scalar globals.
+ isInitializerStored,
+
+ /// isStoredOnce - This global is stored to, but only its initializer and
+ /// one other value is ever stored to it. If this global isStoredOnce, we
+ /// track the value stored to it in StoredOnceValue below. This is only
+ /// tracked for scalar globals.
+ isStoredOnce,
+
+ /// isStored - This global is stored to by multiple values or something else
+ /// that we cannot track.
+ isStored
+ } StoredType;
+
+ /// StoredOnceValue - If only one value (besides the initializer constant) is
+ /// ever stored to this global, keep track of what value it is.
+ Value *StoredOnceValue;
+
+ /// AccessingFunction/HasMultipleAccessingFunctions - These start out
+ /// null/false. When the first accessing function is noticed, it is recorded.
+ /// When a second different accessing function is noticed,
+ /// HasMultipleAccessingFunctions is set to true.
+ Function *AccessingFunction;
+ bool HasMultipleAccessingFunctions;
+
+ /// HasNonInstructionUser - Set to true if this global has a user that is not
+ /// an instruction (e.g. a constant expr or GV initializer).
+ bool HasNonInstructionUser;
+
+ /// HasPHIUser - Set to true if this global has a user that is a PHI node.
+ bool HasPHIUser;
+
+ GlobalStatus() : isLoaded(false), StoredType(NotStored), StoredOnceValue(0),
+ AccessingFunction(0), HasMultipleAccessingFunctions(false),
+ HasNonInstructionUser(false), HasPHIUser(false) {}
+};
+
+}
+
+/// ConstantIsDead - Return true if the specified constant is (transitively)
+/// dead. The constant may be used by other constants (e.g. constant arrays and
+/// constant exprs) as long as they are dead, but it cannot be used by anything
+/// else.
+static bool ConstantIsDead(Constant *C) {
+ if (isa<GlobalValue>(C)) return false;
+
+ for (Value::use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI)
+ if (Constant *CU = dyn_cast<Constant>(*UI)) {
+ if (!ConstantIsDead(CU)) return false;
+ } else
+ return false;
+ return true;
+}
+
+
+/// AnalyzeGlobal - Look at all uses of the global and fill in the GlobalStatus
+/// structure. If the global has its address taken, return true to indicate we
+/// can't do anything with it.
+///
+static bool AnalyzeGlobal(Value *V, GlobalStatus &GS,
+ SmallPtrSet<PHINode*, 16> &PHIUsers) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) {
+ GS.HasNonInstructionUser = true;
+
+ if (AnalyzeGlobal(CE, GS, PHIUsers)) return true;
+
+ } else if (Instruction *I = dyn_cast<Instruction>(*UI)) {
+ if (!GS.HasMultipleAccessingFunctions) {
+ Function *F = I->getParent()->getParent();
+ if (GS.AccessingFunction == 0)
+ GS.AccessingFunction = F;
+ else if (GS.AccessingFunction != F)
+ GS.HasMultipleAccessingFunctions = true;
+ }
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ GS.isLoaded = true;
+ if (LI->isVolatile()) return true; // Don't hack on volatile loads.
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Don't allow a store OF the address, only stores TO the address.
+ if (SI->getOperand(0) == V) return true;
+
+ if (SI->isVolatile()) return true; // Don't hack on volatile stores.
+
+ // If this is a direct store to the global (i.e., the global is a scalar
+ // value, not an aggregate), keep more specific information about
+ // stores.
+ if (GS.StoredType != GlobalStatus::isStored) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(SI->getOperand(1))){
+ Value *StoredVal = SI->getOperand(0);
+ if (StoredVal == GV->getInitializer()) {
+ if (GS.StoredType < GlobalStatus::isInitializerStored)
+ GS.StoredType = GlobalStatus::isInitializerStored;
+ } else if (isa<LoadInst>(StoredVal) &&
+ cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
+ // G = G
+ if (GS.StoredType < GlobalStatus::isInitializerStored)
+ GS.StoredType = GlobalStatus::isInitializerStored;
+ } else if (GS.StoredType < GlobalStatus::isStoredOnce) {
+ GS.StoredType = GlobalStatus::isStoredOnce;
+ GS.StoredOnceValue = StoredVal;
+ } else if (GS.StoredType == GlobalStatus::isStoredOnce &&
+ GS.StoredOnceValue == StoredVal) {
+ // noop.
+ } else {
+ GS.StoredType = GlobalStatus::isStored;
+ }
+ } else {
+ GS.StoredType = GlobalStatus::isStored;
+ }
+ }
+ } else if (isa<GetElementPtrInst>(I)) {
+ if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
+ } else if (isa<SelectInst>(I)) {
+ if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
+ } else if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ // PHI nodes we can check just like select or GEP instructions, but we
+ // have to be careful about infinite recursion.
+ if (PHIUsers.insert(PN)) // Not already visited.
+ if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
+ GS.HasPHIUser = true;
+ } else if (isa<CmpInst>(I)) {
+ } else if (isa<MemTransferInst>(I)) {
+ if (I->getOperand(1) == V)
+ GS.StoredType = GlobalStatus::isStored;
+ if (I->getOperand(2) == V)
+ GS.isLoaded = true;
+ } else if (isa<MemSetInst>(I)) {
+ assert(I->getOperand(1) == V && "Memset only takes one pointer!");
+ GS.StoredType = GlobalStatus::isStored;
+ } else {
+ return true; // Any other non-load instruction might take address!
+ }
+ } else if (Constant *C = dyn_cast<Constant>(*UI)) {
+ GS.HasNonInstructionUser = true;
+ // We might have a dead and dangling constant hanging off of here.
+ if (!ConstantIsDead(C))
+ return true;
+ } else {
+ GS.HasNonInstructionUser = true;
+ // Otherwise must be some other user.
+ return true;
+ }
+
+ return false;
+}
+
+static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(Idx);
+ if (!CI) return 0;
+ unsigned IdxV = CI->getZExtValue();
+
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg)) {
+ if (IdxV < CS->getNumOperands()) return CS->getOperand(IdxV);
+ } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg)) {
+ if (IdxV < CA->getNumOperands()) return CA->getOperand(IdxV);
+ } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Agg)) {
+ if (IdxV < CP->getNumOperands()) return CP->getOperand(IdxV);
+ } else if (isa<ConstantAggregateZero>(Agg)) {
+ if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) {
+ if (IdxV < STy->getNumElements())
+ return Constant::getNullValue(STy->getElementType(IdxV));
+ } else if (const SequentialType *STy =
+ dyn_cast<SequentialType>(Agg->getType())) {
+ return Constant::getNullValue(STy->getElementType());
+ }
+ } else if (isa<UndefValue>(Agg)) {
+ if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) {
+ if (IdxV < STy->getNumElements())
+ return UndefValue::get(STy->getElementType(IdxV));
+ } else if (const SequentialType *STy =
+ dyn_cast<SequentialType>(Agg->getType())) {
+ return UndefValue::get(STy->getElementType());
+ }
+ }
+ return 0;
+}
+
+
+/// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all
+/// users of the global, cleaning up the obvious ones. This is largely just a
+/// quick scan over the use list to clean up the easy and obvious cruft. This
+/// returns true if it made a change.
+static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
+ bool Changed = false;
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) {
+ User *U = *UI++;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ if (Init) {
+ // Replace the load with the initializer.
+ LI->replaceAllUsesWith(Init);
+ LI->eraseFromParent();
+ Changed = true;
+ }
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ // Store must be unreachable or storing Init into the global.
+ SI->eraseFromParent();
+ Changed = true;
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ Constant *SubInit = 0;
+ if (Init)
+ SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
+ Changed |= CleanupConstantGlobalUsers(CE, SubInit);
+ } else if (CE->getOpcode() == Instruction::BitCast &&
+ isa<PointerType>(CE->getType())) {
+ // Pointer cast, delete any stores and memsets to the global.
+ Changed |= CleanupConstantGlobalUsers(CE, 0);
+ }
+
+ if (CE->use_empty()) {
+ CE->destroyConstant();
+ Changed = true;
+ }
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ // Do not transform "gepinst (gep constexpr (GV))" here, because forming
+ // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
+ // and will invalidate our notion of what Init is.
+ Constant *SubInit = 0;
+ if (!isa<ConstantExpr>(GEP->getOperand(0))) {
+ ConstantExpr *CE =
+ dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP));
+ if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
+ SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
+ }
+ Changed |= CleanupConstantGlobalUsers(GEP, SubInit);
+
+ if (GEP->use_empty()) {
+ GEP->eraseFromParent();
+ Changed = true;
+ }
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
+ if (MI->getRawDest() == V) {
+ MI->eraseFromParent();
+ Changed = true;
+ }
+
+ } else if (Constant *C = dyn_cast<Constant>(U)) {
+ // If we have a chain of dead constantexprs or other things dangling from
+ // us, and if they are all dead, nuke them without remorse.
+ if (ConstantIsDead(C)) {
+ C->destroyConstant();
+ // This could have invalidated UI, start over from scratch.
+ CleanupConstantGlobalUsers(V, Init);
+ return true;
+ }
+ }
+ }
+ return Changed;
+}
+
+/// isSafeSROAElementUse - Return true if the specified instruction is a safe
+/// user of a derived expression from a global that we want to SROA.
+static bool isSafeSROAElementUse(Value *V) {
+ // We might have a dead and dangling constant hanging off of here.
+ if (Constant *C = dyn_cast<Constant>(V))
+ return ConstantIsDead(C);
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // Loads are ok.
+ if (isa<LoadInst>(I)) return true;
+
+ // Stores *to* the pointer are ok.
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->getOperand(0) != V;
+
+ // Otherwise, it must be a GEP.
+ GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
+ if (GEPI == 0) return false;
+
+ if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
+ !cast<Constant>(GEPI->getOperand(1))->isNullValue())
+ return false;
+
+ for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end();
+ I != E; ++I)
+ if (!isSafeSROAElementUse(*I))
+ return false;
+ return true;
+}
+
+
+/// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value.
+/// Look at it and its uses and decide whether it is safe to SROA this global.
+///
+static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
+ // The user of the global must be a GEP Inst or a ConstantExpr GEP.
+ if (!isa<GetElementPtrInst>(U) &&
+ (!isa<ConstantExpr>(U) ||
+ cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
+ return false;
+
+ // Check to see if this ConstantExpr GEP is SRA'able. In particular, we
+ // don't like < 3 operand CE's, and we don't like non-constant integer
+ // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some
+ // value of C.
+ if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
+ !cast<Constant>(U->getOperand(1))->isNullValue() ||
+ !isa<ConstantInt>(U->getOperand(2)))
+ return false;
+
+ gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
+ ++GEPI; // Skip over the pointer index.
+
+ // If this is a use of an array allocation, do a bit more checking for sanity.
+ if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) {
+ uint64_t NumElements = AT->getNumElements();
+ ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
+
+ // Check to make sure that index falls within the array. If not,
+ // something funny is going on, so we won't do the optimization.
+ //
+ if (Idx->getZExtValue() >= NumElements)
+ return false;
+
+ // We cannot scalar repl this level of the array unless any array
+ // sub-indices are in-range constants. In particular, consider:
+ // A[0][i]. We cannot know that the user isn't doing invalid things like
+ // allowing i to index an out-of-range subscript that accesses A[1].
+ //
+ // Scalar replacing *just* the outer index of the array is probably not
+ // going to be a win anyway, so just give up.
+ for (++GEPI; // Skip array index.
+ GEPI != E && (isa<ArrayType>(*GEPI) || isa<VectorType>(*GEPI));
+ ++GEPI) {
+ uint64_t NumElements;
+ if (const ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI))
+ NumElements = SubArrayTy->getNumElements();
+ else
+ NumElements = cast<VectorType>(*GEPI)->getNumElements();
+
+ ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
+ if (!IdxVal || IdxVal->getZExtValue() >= NumElements)
+ return false;
+ }
+ }
+
+ for (Value::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I)
+ if (!isSafeSROAElementUse(*I))
+ return false;
+ return true;
+}
+
+/// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it
+/// is safe for us to perform this transformation.
+///
+static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
+ for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end();
+ UI != E; ++UI) {
+ if (!IsUserOfGlobalSafeForSRA(*UI, GV))
+ return false;
+ }
+ return true;
+}
+
+
+/// SRAGlobal - Perform scalar replacement of aggregates on the specified global
+/// variable. This opens the door for other optimizations by exposing the
+/// behavior of the program in a more fine-grained way. We have determined that
+/// this transformation is safe already. We return the first global variable we
+/// insert so that the caller can reprocess it.
+static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
+ // Make sure this global only has simple uses that we can SRA.
+ if (!GlobalUsersSafeToSRA(GV))
+ return 0;
+
+ assert(GV->hasLocalLinkage() && !GV->isConstant());
+ Constant *Init = GV->getInitializer();
+ const Type *Ty = Init->getType();
+
+ std::vector<GlobalVariable*> NewGlobals;
+ Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
+
+ // Get the alignment of the global, either explicit or target-specific.
+ unsigned StartAlignment = GV->getAlignment();
+ if (StartAlignment == 0)
+ StartAlignment = TD.getABITypeAlignment(GV->getType());
+
+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ NewGlobals.reserve(STy->getNumElements());
+ const StructLayout &Layout = *TD.getStructLayout(STy);
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Constant *In = getAggregateConstantElement(Init,
+ ConstantInt::get(Type::Int32Ty, i));
+ assert(In && "Couldn't get element of initializer?");
+ GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
+ GlobalVariable::InternalLinkage,
+ In, GV->getName()+"."+utostr(i),
+ (Module *)NULL,
+ GV->isThreadLocal(),
+ GV->getType()->getAddressSpace());
+ Globals.insert(GV, NGV);
+ NewGlobals.push_back(NGV);
+
+ // Calculate the known alignment of the field. If the original aggregate
+ // had 256 byte alignment for example, something might depend on that:
+ // propagate info to each field.
+ uint64_t FieldOffset = Layout.getElementOffset(i);
+ unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
+ if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i)))
+ NGV->setAlignment(NewAlign);
+ }
+ } else if (const SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
+ unsigned NumElements = 0;
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(STy))
+ NumElements = ATy->getNumElements();
+ else
+ NumElements = cast<VectorType>(STy)->getNumElements();
+
+ if (NumElements > 16 && GV->hasNUsesOrMore(16))
+ return 0; // It's not worth it.
+ NewGlobals.reserve(NumElements);
+
+ uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType());
+ unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
+ for (unsigned i = 0, e = NumElements; i != e; ++i) {
+ Constant *In = getAggregateConstantElement(Init,
+ ConstantInt::get(Type::Int32Ty, i));
+ assert(In && "Couldn't get element of initializer?");
+
+ GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
+ GlobalVariable::InternalLinkage,
+ In, GV->getName()+"."+utostr(i),
+ (Module *)NULL,
+ GV->isThreadLocal(),
+ GV->getType()->getAddressSpace());
+ Globals.insert(GV, NGV);
+ NewGlobals.push_back(NGV);
+
+ // Calculate the known alignment of the field. If the original aggregate
+ // had 256 byte alignment for example, something might depend on that:
+ // propagate info to each field.
+ unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i);
+ if (NewAlign > EltAlign)
+ NGV->setAlignment(NewAlign);
+ }
+ }
+
+ if (NewGlobals.empty())
+ return 0;
+
+ DOUT << "PERFORMING GLOBAL SRA ON: " << *GV;
+
+ Constant *NullInt = Constant::getNullValue(Type::Int32Ty);
+
+ // Loop over all of the uses of the global, replacing the constantexpr geps,
+ // with smaller constantexpr geps or direct references.
+ while (!GV->use_empty()) {
+ User *GEP = GV->use_back();
+ assert(((isa<ConstantExpr>(GEP) &&
+ cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||
+ isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!");
+
+ // Ignore the 1th operand, which has to be zero or else the program is quite
+ // broken (undefined). Get the 2nd operand, which is the structure or array
+ // index.
+ unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
+ if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.
+
+ Value *NewPtr = NewGlobals[Val];
+
+ // Form a shorter GEP if needed.
+ if (GEP->getNumOperands() > 3) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) {
+ SmallVector<Constant*, 8> Idxs;
+ Idxs.push_back(NullInt);
+ for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
+ Idxs.push_back(CE->getOperand(i));
+ NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr),
+ &Idxs[0], Idxs.size());
+ } else {
+ GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
+ SmallVector<Value*, 8> Idxs;
+ Idxs.push_back(NullInt);
+ for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
+ Idxs.push_back(GEPI->getOperand(i));
+ NewPtr = GetElementPtrInst::Create(NewPtr, Idxs.begin(), Idxs.end(),
+ GEPI->getName()+"."+utostr(Val), GEPI);
+ }
+ }
+ GEP->replaceAllUsesWith(NewPtr);
+
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP))
+ GEPI->eraseFromParent();
+ else
+ cast<ConstantExpr>(GEP)->destroyConstant();
+ }
+
+ // Delete the old global, now that it is dead.
+ Globals.erase(GV);
+ ++NumSRA;
+
+ // Loop over the new globals array deleting any globals that are obviously
+ // dead. This can arise due to scalarization of a structure or an array that
+ // has elements that are dead.
+ unsigned FirstGlobal = 0;
+ for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i)
+ if (NewGlobals[i]->use_empty()) {
+ Globals.erase(NewGlobals[i]);
+ if (FirstGlobal == i) ++FirstGlobal;
+ }
+
+ return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0;
+}
+
+/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
+/// value will trap if the value is dynamically null. PHIs keeps track of any
+/// phi nodes we've seen to avoid reprocessing them.
+static bool AllUsesOfValueWillTrapIfNull(Value *V,
+ SmallPtrSet<PHINode*, 8> &PHIs) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
+ if (isa<LoadInst>(*UI)) {
+ // Will trap.
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
+ if (SI->getOperand(0) == V) {
+ //cerr << "NONTRAPPING USE: " << **UI;
+ return false; // Storing the value.
+ }
+ } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
+ if (CI->getOperand(0) != V) {
+ //cerr << "NONTRAPPING USE: " << **UI;
+ return false; // Not calling the ptr
+ }
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) {
+ if (II->getOperand(0) != V) {
+ //cerr << "NONTRAPPING USE: " << **UI;
+ return false; // Not calling the ptr
+ }
+ } else if (BitCastInst *CI = dyn_cast<BitCastInst>(*UI)) {
+ if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI)) {
+ if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
+ } else if (PHINode *PN = dyn_cast<PHINode>(*UI)) {
+ // If we've already seen this phi node, ignore it, it has already been
+ // checked.
+ if (PHIs.insert(PN))
+ return AllUsesOfValueWillTrapIfNull(PN, PHIs);
+ } else if (isa<ICmpInst>(*UI) &&
+ isa<ConstantPointerNull>(UI->getOperand(1))) {
+ // Ignore setcc X, null
+ } else {
+ //cerr << "NONTRAPPING USE: " << **UI;
+ return false;
+ }
+ return true;
+}
+
+/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads
+/// from GV will trap if the loaded value is null. Note that this also permits
+/// comparisons of the loaded value against null, as a special case.
+static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable *GV) {
+ for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI!=E; ++UI)
+ if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+ SmallPtrSet<PHINode*, 8> PHIs;
+ if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
+ return false;
+ } else if (isa<StoreInst>(*UI)) {
+ // Ignore stores to the global.
+ } else {
+ // We don't know or understand this user, bail out.
+ //cerr << "UNKNOWN USER OF GLOBAL!: " << **UI;
+ return false;
+ }
+
+ return true;
+}
+
+static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
+ bool Changed = false;
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) {
+ Instruction *I = cast<Instruction>(*UI++);
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ LI->setOperand(0, NewV);
+ Changed = true;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (SI->getOperand(1) == V) {
+ SI->setOperand(1, NewV);
+ Changed = true;
+ }
+ } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+ if (I->getOperand(0) == V) {
+ // Calling through the pointer! Turn into a direct call, but be careful
+ // that the pointer is not also being passed as an argument.
+ I->setOperand(0, NewV);
+ Changed = true;
+ bool PassedAsArg = false;
+ for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i)
+ if (I->getOperand(i) == V) {
+ PassedAsArg = true;
+ I->setOperand(i, NewV);
+ }
+
+ if (PassedAsArg) {
+ // Being passed as an argument also. Be careful to not invalidate UI!
+ UI = V->use_begin();
+ }
+ }
+ } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ Changed |= OptimizeAwayTrappingUsesOfValue(CI,
+ ConstantExpr::getCast(CI->getOpcode(),
+ NewV, CI->getType()));
+ if (CI->use_empty()) {
+ Changed = true;
+ CI->eraseFromParent();
+ }
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+ // Should handle GEP here.
+ SmallVector<Constant*, 8> Idxs;
+ Idxs.reserve(GEPI->getNumOperands()-1);
+ for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
+ i != e; ++i)
+ if (Constant *C = dyn_cast<Constant>(*i))
+ Idxs.push_back(C);
+ else
+ break;
+ if (Idxs.size() == GEPI->getNumOperands()-1)
+ Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
+ ConstantExpr::getGetElementPtr(NewV, &Idxs[0],
+ Idxs.size()));
+ if (GEPI->use_empty()) {
+ Changed = true;
+ GEPI->eraseFromParent();
+ }
+ }
+ }
+
+ return Changed;
+}
+
+
+/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null
+/// value stored into it. If there are uses of the loaded value that would trap
+/// if the loaded value is dynamically null, then we know that they cannot be
+/// reachable with a null optimize away the load.
+static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
+ bool Changed = false;
+
+ // Keep track of whether we are able to remove all the uses of the global
+ // other than the store that defines it.
+ bool AllNonStoreUsesGone = true;
+
+ // Replace all uses of loads with uses of uses of the stored value.
+ for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){
+ User *GlobalUser = *GUI++;
+ if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
+ Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
+ // If we were able to delete all uses of the loads
+ if (LI->use_empty()) {
+ LI->eraseFromParent();
+ Changed = true;
+ } else {
+ AllNonStoreUsesGone = false;
+ }
+ } else if (isa<StoreInst>(GlobalUser)) {
+ // Ignore the store that stores "LV" to the global.
+ assert(GlobalUser->getOperand(1) == GV &&
+ "Must be storing *to* the global");
+ } else {
+ AllNonStoreUsesGone = false;
+
+ // If we get here we could have other crazy uses that are transitively
+ // loaded.
+ assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
+ isa<ConstantExpr>(GlobalUser)) && "Only expect load and stores!");
+ }
+ }
+
+ if (Changed) {
+ DOUT << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV;
+ ++NumGlobUses;
+ }
+
+ // If we nuked all of the loads, then none of the stores are needed either,
+ // nor is the global.
+ if (AllNonStoreUsesGone) {
+ DOUT << " *** GLOBAL NOW DEAD!\n";
+ CleanupConstantGlobalUsers(GV, 0);
+ if (GV->use_empty()) {
+ GV->eraseFromParent();
+ ++NumDeleted;
+ }
+ Changed = true;
+ }
+ return Changed;
+}
+
+/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
+/// instructions that are foldable.
+static void ConstantPropUsersOf(Value *V) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )
+ if (Instruction *I = dyn_cast<Instruction>(*UI++))
+ if (Constant *NewC = ConstantFoldInstruction(I)) {
+ I->replaceAllUsesWith(NewC);
+
+ // Advance UI to the next non-I use to avoid invalidating it!
+ // Instructions could multiply use V.
+ while (UI != E && *UI == I)
+ ++UI;
+ I->eraseFromParent();
+ }
+}
+
+/// OptimizeGlobalAddressOfMalloc - This function takes the specified global
+/// variable, and transforms the program as if it always contained the result of
+/// the specified malloc. Because it is always the result of the specified
+/// malloc, there is no reason to actually DO the malloc. Instead, turn the
+/// malloc into a global, and any loads of GV as uses of the new global.
+static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
+ MallocInst *MI) {
+ DOUT << "PROMOTING MALLOC GLOBAL: " << *GV << " MALLOC = " << *MI;
+ ConstantInt *NElements = cast<ConstantInt>(MI->getArraySize());
+
+ if (NElements->getZExtValue() != 1) {
+ // If we have an array allocation, transform it to a single element
+ // allocation to make the code below simpler.
+ Type *NewTy = ArrayType::get(MI->getAllocatedType(),
+ NElements->getZExtValue());
+ MallocInst *NewMI =
+ new MallocInst(NewTy, Constant::getNullValue(Type::Int32Ty),
+ MI->getAlignment(), MI->getName(), MI);
+ Value* Indices[2];
+ Indices[0] = Indices[1] = Constant::getNullValue(Type::Int32Ty);
+ Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2,
+ NewMI->getName()+".el0", MI);
+ MI->replaceAllUsesWith(NewGEP);
+ MI->eraseFromParent();
+ MI = NewMI;
+ }
+
+ // Create the new global variable. The contents of the malloc'd memory is
+ // undefined, so initialize with an undef value.
+ Constant *Init = UndefValue::get(MI->getAllocatedType());
+ GlobalVariable *NewGV = new GlobalVariable(MI->getAllocatedType(), false,
+ GlobalValue::InternalLinkage, Init,
+ GV->getName()+".body",
+ (Module *)NULL,
+ GV->isThreadLocal());
+ // FIXME: This new global should have the alignment returned by malloc. Code
+ // could depend on malloc returning large alignment (on the mac, 16 bytes) but
+ // this would only guarantee some lower alignment.
+ GV->getParent()->getGlobalList().insert(GV, NewGV);
+
+ // Anything that used the malloc now uses the global directly.
+ MI->replaceAllUsesWith(NewGV);
+
+ Constant *RepValue = NewGV;
+ if (NewGV->getType() != GV->getType()->getElementType())
+ RepValue = ConstantExpr::getBitCast(RepValue,
+ GV->getType()->getElementType());
+
+ // If there is a comparison against null, we will insert a global bool to
+ // keep track of whether the global was initialized yet or not.
+ GlobalVariable *InitBool =
+ new GlobalVariable(Type::Int1Ty, false, GlobalValue::InternalLinkage,
+ ConstantInt::getFalse(), GV->getName()+".init",
+ (Module *)NULL, GV->isThreadLocal());
+ bool InitBoolUsed = false;
+
+ // Loop over all uses of GV, processing them in turn.
+ std::vector<StoreInst*> Stores;
+ while (!GV->use_empty())
+ if (LoadInst *LI = dyn_cast<LoadInst>(GV->use_back())) {
+ while (!LI->use_empty()) {
+ Use &LoadUse = LI->use_begin().getUse();
+ if (!isa<ICmpInst>(LoadUse.getUser()))
+ LoadUse = RepValue;
+ else {
+ ICmpInst *CI = cast<ICmpInst>(LoadUse.getUser());
+ // Replace the cmp X, 0 with a use of the bool value.
+ Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", CI);
+ InitBoolUsed = true;
+ switch (CI->getPredicate()) {
+ default: assert(0 && "Unknown ICmp Predicate!");
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT:
+ LV = ConstantInt::getFalse(); // X < null -> always false
+ break;
+ case ICmpInst::ICMP_ULE:
+ case ICmpInst::ICMP_SLE:
+ case ICmpInst::ICMP_EQ:
+ LV = BinaryOperator::CreateNot(LV, "notinit", CI);
+ break;
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_UGE:
+ case ICmpInst::ICMP_SGE:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT:
+ break; // no change.
+ }
+ CI->replaceAllUsesWith(LV);
+ CI->eraseFromParent();
+ }
+ }
+ LI->eraseFromParent();
+ } else {
+ StoreInst *SI = cast<StoreInst>(GV->use_back());
+ // The global is initialized when the store to it occurs.
+ new StoreInst(ConstantInt::getTrue(), InitBool, SI);
+ SI->eraseFromParent();
+ }
+
+ // If the initialization boolean was used, insert it, otherwise delete it.
+ if (!InitBoolUsed) {
+ while (!InitBool->use_empty()) // Delete initializations
+ cast<Instruction>(InitBool->use_back())->eraseFromParent();
+ delete InitBool;
+ } else
+ GV->getParent()->getGlobalList().insert(GV, InitBool);
+
+
+ // Now the GV is dead, nuke it and the malloc.
+ GV->eraseFromParent();
+ MI->eraseFromParent();
+
+ // To further other optimizations, loop over all users of NewGV and try to
+ // constant prop them. This will promote GEP instructions with constant
+ // indices into GEP constant-exprs, which will allow global-opt to hack on it.
+ ConstantPropUsersOf(NewGV);
+ if (RepValue != NewGV)
+ ConstantPropUsersOf(RepValue);
+
+ return NewGV;
+}
+
+/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking
+/// to make sure that there are no complex uses of V. We permit simple things
+/// like dereferencing the pointer, but not storing through the address, unless
+/// it is to the specified global.
+static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V,
+ GlobalVariable *GV,
+ SmallPtrSet<PHINode*, 8> &PHIs) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+ Instruction *Inst = dyn_cast<Instruction>(*UI);
+ if (Inst == 0) return false;
+
+ if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
+ continue; // Fine, ignore.
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
+ return false; // Storing the pointer itself... bad.
+ continue; // Otherwise, storing through it, or storing into GV... fine.
+ }
+
+ if (isa<GetElementPtrInst>(Inst)) {
+ if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
+ return false;
+ continue;
+ }
+
+ if (PHINode *PN = dyn_cast<PHINode>(Inst)) {
+ // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI
+ // cycles.
+ if (PHIs.insert(PN))
+ if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
+ return false;
+ continue;
+ }
+
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
+ if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
+ return false;
+ continue;
+ }
+
+ return false;
+ }
+ return true;
+}
+
+/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
+/// somewhere. Transform all uses of the allocation into loads from the
+/// global and uses of the resultant pointer. Further, delete the store into
+/// GV. This assumes that these value pass the
+/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
+static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
+ GlobalVariable *GV) {
+ while (!Alloc->use_empty()) {
+ Instruction *U = cast<Instruction>(*Alloc->use_begin());
+ Instruction *InsertPt = U;
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ // If this is the store of the allocation into the global, remove it.
+ if (SI->getOperand(1) == GV) {
+ SI->eraseFromParent();
+ continue;
+ }
+ } else if (PHINode *PN = dyn_cast<PHINode>(U)) {
+ // Insert the load in the corresponding predecessor, not right before the
+ // PHI.
+ InsertPt = PN->getIncomingBlock(Alloc->use_begin())->getTerminator();
+ } else if (isa<BitCastInst>(U)) {
+ // Must be bitcast between the malloc and store to initialize the global.
+ ReplaceUsesOfMallocWithGlobal(U, GV);
+ U->eraseFromParent();
+ continue;
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+ // If this is a "GEP bitcast" and the user is a store to the global, then
+ // just process it as a bitcast.
+ if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse())
+ if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->use_back()))
+ if (SI->getOperand(1) == GV) {
+ // Must be bitcast GEP between the malloc and store to initialize
+ // the global.
+ ReplaceUsesOfMallocWithGlobal(GEPI, GV);
+ GEPI->eraseFromParent();
+ continue;
+ }
+ }
+
+ // Insert a load from the global, and use it instead of the malloc.
+ Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
+ U->replaceUsesOfWith(Alloc, NL);
+ }
+}
+
+/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi
+/// of a load) are simple enough to perform heap SRA on. This permits GEP's
+/// that index through the array and struct field, icmps of null, and PHIs.
+static bool LoadUsesSimpleEnoughForHeapSRA(Value *V,
+ SmallPtrSet<PHINode*, 32> &LoadUsingPHIs,
+ SmallPtrSet<PHINode*, 32> &LoadUsingPHIsPerLoad) {
+ // We permit two users of the load: setcc comparing against the null
+ // pointer, and a getelementptr of a specific form.
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Comparison against null is ok.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(User)) {
+ if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
+ return false;
+ continue;
+ }
+
+ // getelementptr is also ok, but only a simple form.
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ // Must index into the array and into the struct.
+ if (GEPI->getNumOperands() < 3)
+ return false;
+
+ // Otherwise the GEP is ok.
+ continue;
+ }
+
+ if (PHINode *PN = dyn_cast<PHINode>(User)) {
+ if (!LoadUsingPHIsPerLoad.insert(PN))
+ // This means some phi nodes are dependent on each other.
+ // Avoid infinite looping!
+ return false;
+ if (!LoadUsingPHIs.insert(PN))
+ // If we have already analyzed this PHI, then it is safe.
+ continue;
+
+ // Make sure all uses of the PHI are simple enough to transform.
+ if (!LoadUsesSimpleEnoughForHeapSRA(PN,
+ LoadUsingPHIs, LoadUsingPHIsPerLoad))
+ return false;
+
+ continue;
+ }
+
+ // Otherwise we don't know what this is, not ok.
+ return false;
+ }
+
+ return true;
+}
+
+
+/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
+/// GV are simple enough to perform HeapSRA, return true.
+static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV,
+ MallocInst *MI) {
+ SmallPtrSet<PHINode*, 32> LoadUsingPHIs;
+ SmallPtrSet<PHINode*, 32> LoadUsingPHIsPerLoad;
+ for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;
+ ++UI)
+ if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+ if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
+ LoadUsingPHIsPerLoad))
+ return false;
+ LoadUsingPHIsPerLoad.clear();
+ }
+
+ // If we reach here, we know that all uses of the loads and transitive uses
+ // (through PHI nodes) are simple enough to transform. However, we don't know
+ // that all inputs the to the PHI nodes are in the same equivalence sets.
+ // Check to verify that all operands of the PHIs are either PHIS that can be
+ // transformed, loads from GV, or MI itself.
+ for (SmallPtrSet<PHINode*, 32>::iterator I = LoadUsingPHIs.begin(),
+ E = LoadUsingPHIs.end(); I != E; ++I) {
+ PHINode *PN = *I;
+ for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
+ Value *InVal = PN->getIncomingValue(op);
+
+ // PHI of the stored value itself is ok.
+ if (InVal == MI) continue;
+
+ if (PHINode *InPN = dyn_cast<PHINode>(InVal)) {
+ // One of the PHIs in our set is (optimistically) ok.
+ if (LoadUsingPHIs.count(InPN))
+ continue;
+ return false;
+ }
+
+ // Load from GV is ok.
+ if (LoadInst *LI = dyn_cast<LoadInst>(InVal))
+ if (LI->getOperand(0) == GV)
+ continue;
+
+ // UNDEF? NULL?
+
+ // Anything else is rejected.
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
+ DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+ std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
+
+ if (FieldNo >= FieldVals.size())
+ FieldVals.resize(FieldNo+1);
+
+ // If we already have this value, just reuse the previously scalarized
+ // version.
+ if (Value *FieldVal = FieldVals[FieldNo])
+ return FieldVal;
+
+ // Depending on what instruction this is, we have several cases.
+ Value *Result;
+ if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
+ // This is a scalarized version of the load from the global. Just create
+ // a new Load of the scalarized global.
+ Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
+ InsertedScalarizedValues,
+ PHIsToRewrite),
+ LI->getName()+".f" + utostr(FieldNo), LI);
+ } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ // PN's type is pointer to struct. Make a new PHI of pointer to struct
+ // field.
+ const StructType *ST =
+ cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
+
+ Result =PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
+ PN->getName()+".f"+utostr(FieldNo), PN);
+ PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
+ } else {
+ assert(0 && "Unknown usable value");
+ Result = 0;
+ }
+
+ return FieldVals[FieldNo] = Result;
+}
+
+/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
+/// the load, rewrite the derived value to use the HeapSRoA'd load.
+static void RewriteHeapSROALoadUser(Instruction *LoadUser,
+ DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+ // If this is a comparison against null, handle it.
+ if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
+ assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
+ // If we have a setcc of the loaded pointer, we can use a setcc of any
+ // field.
+ Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
+ InsertedScalarizedValues, PHIsToRewrite);
+
+ Value *New = new ICmpInst(SCI->getPredicate(), NPtr,
+ Constant::getNullValue(NPtr->getType()),
+ SCI->getName(), SCI);
+ SCI->replaceAllUsesWith(New);
+ SCI->eraseFromParent();
+ return;
+ }
+
+ // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
+ assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
+ && "Unexpected GEPI!");
+
+ // Load the pointer for this field.
+ unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
+ Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
+ InsertedScalarizedValues, PHIsToRewrite);
+
+ // Create the new GEP idx vector.
+ SmallVector<Value*, 8> GEPIdx;
+ GEPIdx.push_back(GEPI->getOperand(1));
+ GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
+
+ Value *NGEPI = GetElementPtrInst::Create(NewPtr,
+ GEPIdx.begin(), GEPIdx.end(),
+ GEPI->getName(), GEPI);
+ GEPI->replaceAllUsesWith(NGEPI);
+ GEPI->eraseFromParent();
+ return;
+ }
+
+ // Recursively transform the users of PHI nodes. This will lazily create the
+ // PHIs that are needed for individual elements. Keep track of what PHIs we
+ // see in InsertedScalarizedValues so that we don't get infinite loops (very
+ // antisocial). If the PHI is already in InsertedScalarizedValues, it has
+ // already been seen first by another load, so its uses have already been
+ // processed.
+ PHINode *PN = cast<PHINode>(LoadUser);
+ bool Inserted;
+ DenseMap<Value*, std::vector<Value*> >::iterator InsertPos;
+ tie(InsertPos, Inserted) =
+ InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>()));
+ if (!Inserted) return;
+
+ // If this is the first time we've seen this PHI, recursively process all
+ // users.
+ for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {
+ Instruction *User = cast<Instruction>(*UI++);
+ RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
+ }
+}
+
+/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global. Ptr
+/// is a value loaded from the global. Eliminate all uses of Ptr, making them
+/// use FieldGlobals instead. All uses of loaded values satisfy
+/// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
+static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
+ DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+ for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
+ UI != E; ) {
+ Instruction *User = cast<Instruction>(*UI++);
+ RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
+ }
+
+ if (Load->use_empty()) {
+ Load->eraseFromParent();
+ InsertedScalarizedValues.erase(Load);
+ }
+}
+
+/// PerformHeapAllocSRoA - MI is an allocation of an array of structures. Break
+/// it up into multiple allocations of arrays of the fields.
+static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){
+ DOUT << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *MI;
+ const StructType *STy = cast<StructType>(MI->getAllocatedType());
+
+ // There is guaranteed to be at least one use of the malloc (storing
+ // it into GV). If there are other uses, change them to be uses of
+ // the global to simplify later code. This also deletes the store
+ // into GV.
+ ReplaceUsesOfMallocWithGlobal(MI, GV);
+
+ // Okay, at this point, there are no users of the malloc. Insert N
+ // new mallocs at the same place as MI, and N globals.
+ std::vector<Value*> FieldGlobals;
+ std::vector<MallocInst*> FieldMallocs;
+
+ for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
+ const Type *FieldTy = STy->getElementType(FieldNo);
+ const Type *PFieldTy = PointerType::getUnqual(FieldTy);
+
+ GlobalVariable *NGV =
+ new GlobalVariable(PFieldTy, false, GlobalValue::InternalLinkage,
+ Constant::getNullValue(PFieldTy),
+ GV->getName() + ".f" + utostr(FieldNo), GV,
+ GV->isThreadLocal());
+ FieldGlobals.push_back(NGV);
+
+ MallocInst *NMI = new MallocInst(FieldTy, MI->getArraySize(),
+ MI->getName() + ".f" + utostr(FieldNo),MI);
+ FieldMallocs.push_back(NMI);
+ new StoreInst(NMI, NGV, MI);
+ }
+
+ // The tricky aspect of this transformation is handling the case when malloc
+ // fails. In the original code, malloc failing would set the result pointer
+ // of malloc to null. In this case, some mallocs could succeed and others
+ // could fail. As such, we emit code that looks like this:
+ // F0 = malloc(field0)
+ // F1 = malloc(field1)
+ // F2 = malloc(field2)
+ // if (F0 == 0 || F1 == 0 || F2 == 0) {
+ // if (F0) { free(F0); F0 = 0; }
+ // if (F1) { free(F1); F1 = 0; }
+ // if (F2) { free(F2); F2 = 0; }
+ // }
+ Value *RunningOr = 0;
+ for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
+ Value *Cond = new ICmpInst(ICmpInst::ICMP_EQ, FieldMallocs[i],
+ Constant::getNullValue(FieldMallocs[i]->getType()),
+ "isnull", MI);
+ if (!RunningOr)
+ RunningOr = Cond; // First seteq
+ else
+ RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", MI);
+ }
+
+ // Split the basic block at the old malloc.
+ BasicBlock *OrigBB = MI->getParent();
+ BasicBlock *ContBB = OrigBB->splitBasicBlock(MI, "malloc_cont");
+
+ // Create the block to check the first condition. Put all these blocks at the
+ // end of the function as they are unlikely to be executed.
+ BasicBlock *NullPtrBlock = BasicBlock::Create("malloc_ret_null",
+ OrigBB->getParent());
+
+ // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
+ // branch on RunningOr.
+ OrigBB->getTerminator()->eraseFromParent();
+ BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
+
+ // Within the NullPtrBlock, we need to emit a comparison and branch for each
+ // pointer, because some may be null while others are not.
+ for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+ Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
+ Value *Cmp = new ICmpInst(ICmpInst::ICMP_NE, GVVal,
+ Constant::getNullValue(GVVal->getType()),
+ "tmp", NullPtrBlock);
+ BasicBlock *FreeBlock = BasicBlock::Create("free_it", OrigBB->getParent());
+ BasicBlock *NextBlock = BasicBlock::Create("next", OrigBB->getParent());
+ BranchInst::Create(FreeBlock, NextBlock, Cmp, NullPtrBlock);
+
+ // Fill in FreeBlock.
+ new FreeInst(GVVal, FreeBlock);
+ new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
+ FreeBlock);
+ BranchInst::Create(NextBlock, FreeBlock);
+
+ NullPtrBlock = NextBlock;
+ }
+
+ BranchInst::Create(ContBB, NullPtrBlock);
+
+ // MI is no longer needed, remove it.
+ MI->eraseFromParent();
+
+ /// InsertedScalarizedLoads - As we process loads, if we can't immediately
+ /// update all uses of the load, keep track of what scalarized loads are
+ /// inserted for a given load.
+ DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
+ InsertedScalarizedValues[GV] = FieldGlobals;
+
+ std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
+
+ // Okay, the malloc site is completely handled. All of the uses of GV are now
+ // loads, and all uses of those loads are simple. Rewrite them to use loads
+ // of the per-field globals instead.
+ for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
+ Instruction *User = cast<Instruction>(*UI++);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
+ continue;
+ }
+
+ // Must be a store of null.
+ StoreInst *SI = cast<StoreInst>(User);
+ assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
+ "Unexpected heap-sra user!");
+
+ // Insert a store of null into each global.
+ for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+ const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
+ Constant *Null = Constant::getNullValue(PT->getElementType());
+ new StoreInst(Null, FieldGlobals[i], SI);
+ }
+ // Erase the original store.
+ SI->eraseFromParent();
+ }
+
+ // While we have PHIs that are interesting to rewrite, do it.
+ while (!PHIsToRewrite.empty()) {
+ PHINode *PN = PHIsToRewrite.back().first;
+ unsigned FieldNo = PHIsToRewrite.back().second;
+ PHIsToRewrite.pop_back();
+ PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
+ assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
+
+ // Add all the incoming values. This can materialize more phis.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *InVal = PN->getIncomingValue(i);
+ InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
+ PHIsToRewrite);
+ FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
+ }
+ }
+
+ // Drop all inter-phi links and any loads that made it this far.
+ for (DenseMap<Value*, std::vector<Value*> >::iterator
+ I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+ I != E; ++I) {
+ if (PHINode *PN = dyn_cast<PHINode>(I->first))
+ PN->dropAllReferences();
+ else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+ LI->dropAllReferences();
+ }
+
+ // Delete all the phis and loads now that inter-references are dead.
+ for (DenseMap<Value*, std::vector<Value*> >::iterator
+ I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+ I != E; ++I) {
+ if (PHINode *PN = dyn_cast<PHINode>(I->first))
+ PN->eraseFromParent();
+ else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+ LI->eraseFromParent();
+ }
+
+ // The old global is now dead, remove it.
+ GV->eraseFromParent();
+
+ ++NumHeapSRA;
+ return cast<GlobalVariable>(FieldGlobals[0]);
+}
+
+/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
+/// pointer global variable with a single value stored it that is a malloc or
+/// cast of malloc.
+static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
+ MallocInst *MI,
+ Module::global_iterator &GVI,
+ TargetData &TD) {
+ // If this is a malloc of an abstract type, don't touch it.
+ if (!MI->getAllocatedType()->isSized())
+ return false;
+
+ // We can't optimize this global unless all uses of it are *known* to be
+ // of the malloc value, not of the null initializer value (consider a use
+ // that compares the global's value against zero to see if the malloc has
+ // been reached). To do this, we check to see if all uses of the global
+ // would trap if the global were null: this proves that they must all
+ // happen after the malloc.
+ if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
+ return false;
+
+ // We can't optimize this if the malloc itself is used in a complex way,
+ // for example, being stored into multiple globals. This allows the
+ // malloc to be stored into the specified global, loaded setcc'd, and
+ // GEP'd. These are all things we could transform to using the global
+ // for.
+ {
+ SmallPtrSet<PHINode*, 8> PHIs;
+ if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(MI, GV, PHIs))
+ return false;
+ }
+
+
+ // If we have a global that is only initialized with a fixed size malloc,
+ // transform the program to use global memory instead of malloc'd memory.
+ // This eliminates dynamic allocation, avoids an indirection accessing the
+ // data, and exposes the resultant global to further GlobalOpt.
+ if (ConstantInt *NElements = dyn_cast<ConstantInt>(MI->getArraySize())) {
+ // Restrict this transformation to only working on small allocations
+ // (2048 bytes currently), as we don't want to introduce a 16M global or
+ // something.
+ if (NElements->getZExtValue()*
+ TD.getTypeAllocSize(MI->getAllocatedType()) < 2048) {
+ GVI = OptimizeGlobalAddressOfMalloc(GV, MI);
+ return true;
+ }
+ }
+
+ // If the allocation is an array of structures, consider transforming this
+ // into multiple malloc'd arrays, one for each field. This is basically
+ // SRoA for malloc'd memory.
+ const Type *AllocTy = MI->getAllocatedType();
+
+ // If this is an allocation of a fixed size array of structs, analyze as a
+ // variable size array. malloc [100 x struct],1 -> malloc struct, 100
+ if (!MI->isArrayAllocation())
+ if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
+ AllocTy = AT->getElementType();
+
+ if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) {
+ // This the structure has an unreasonable number of fields, leave it
+ // alone.
+ if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
+ AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, MI)) {
+
+ // If this is a fixed size array, transform the Malloc to be an alloc of
+ // structs. malloc [100 x struct],1 -> malloc struct, 100
+ if (const ArrayType *AT = dyn_cast<ArrayType>(MI->getAllocatedType())) {
+ MallocInst *NewMI =
+ new MallocInst(AllocSTy,
+ ConstantInt::get(Type::Int32Ty, AT->getNumElements()),
+ "", MI);
+ NewMI->takeName(MI);
+ Value *Cast = new BitCastInst(NewMI, MI->getType(), "tmp", MI);
+ MI->replaceAllUsesWith(Cast);
+ MI->eraseFromParent();
+ MI = NewMI;
+ }
+
+ GVI = PerformHeapAllocSRoA(GV, MI);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
+// that only one value (besides its initializer) is ever stored to the global.
+static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
+ Module::global_iterator &GVI,
+ TargetData &TD) {
+ // Ignore no-op GEPs and bitcasts.
+ StoredOnceVal = StoredOnceVal->stripPointerCasts();
+
+ // If we are dealing with a pointer global that is initialized to null and
+ // only has one (non-null) value stored into it, then we can optimize any
+ // users of the loaded value (often calls and loads) that would trap if the
+ // value was null.
+ if (isa<PointerType>(GV->getInitializer()->getType()) &&
+ GV->getInitializer()->isNullValue()) {
+ if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
+ if (GV->getInitializer()->getType() != SOVC->getType())
+ SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
+
+ // Optimize away any trapping uses of the loaded value.
+ if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC))
+ return true;
+ } else if (MallocInst *MI = dyn_cast<MallocInst>(StoredOnceVal)) {
+ if (TryToOptimizeStoreOfMallocToGlobal(GV, MI, GVI, TD))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// TryToShrinkGlobalToBoolean - At this point, we have learned that the only
+/// two values ever stored into GV are its initializer and OtherVal. See if we
+/// can shrink the global into a boolean and select between the two values
+/// whenever it is used. This exposes the values to other scalar optimizations.
+static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
+ const Type *GVElType = GV->getType()->getElementType();
+
+ // If GVElType is already i1, it is already shrunk. If the type of the GV is
+ // an FP value, pointer or vector, don't do this optimization because a select
+ // between them is very expensive and unlikely to lead to later
+ // simplification. In these cases, we typically end up with "cond ? v1 : v2"
+ // where v1 and v2 both require constant pool loads, a big loss.
+ if (GVElType == Type::Int1Ty || GVElType->isFloatingPoint() ||
+ isa<PointerType>(GVElType) || isa<VectorType>(GVElType))
+ return false;
+
+ // Walk the use list of the global seeing if all the uses are load or store.
+ // If there is anything else, bail out.
+ for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I)
+ if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
+ return false;
+
+ DOUT << " *** SHRINKING TO BOOL: " << *GV;
+
+ // Create the new global, initializing it to false.
+ GlobalVariable *NewGV = new GlobalVariable(Type::Int1Ty, false,
+ GlobalValue::InternalLinkage, ConstantInt::getFalse(),
+ GV->getName()+".b",
+ (Module *)NULL,
+ GV->isThreadLocal());
+ GV->getParent()->getGlobalList().insert(GV, NewGV);
+
+ Constant *InitVal = GV->getInitializer();
+ assert(InitVal->getType() != Type::Int1Ty && "No reason to shrink to bool!");
+
+ // If initialized to zero and storing one into the global, we can use a cast
+ // instead of a select to synthesize the desired value.
+ bool IsOneZero = false;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal))
+ IsOneZero = InitVal->isNullValue() && CI->isOne();
+
+ while (!GV->use_empty()) {
+ Instruction *UI = cast<Instruction>(GV->use_back());
+ if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
+ // Change the store into a boolean store.
+ bool StoringOther = SI->getOperand(0) == OtherVal;
+ // Only do this if we weren't storing a loaded value.
+ Value *StoreVal;
+ if (StoringOther || SI->getOperand(0) == InitVal)
+ StoreVal = ConstantInt::get(Type::Int1Ty, StoringOther);
+ else {
+ // Otherwise, we are storing a previously loaded copy. To do this,
+ // change the copy from copying the original value to just copying the
+ // bool.
+ Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
+
+ // If we're already replaced the input, StoredVal will be a cast or
+ // select instruction. If not, it will be a load of the original
+ // global.
+ if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
+ assert(LI->getOperand(0) == GV && "Not a copy!");
+ // Insert a new load, to preserve the saved value.
+ StoreVal = new LoadInst(NewGV, LI->getName()+".b", LI);
+ } else {
+ assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
+ "This is not a form that we understand!");
+ StoreVal = StoredVal->getOperand(0);
+ assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
+ }
+ }
+ new StoreInst(StoreVal, NewGV, SI);
+ } else {
+ // Change the load into a load of bool then a select.
+ LoadInst *LI = cast<LoadInst>(UI);
+ LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", LI);
+ Value *NSI;
+ if (IsOneZero)
+ NSI = new ZExtInst(NLI, LI->getType(), "", LI);
+ else
+ NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
+ NSI->takeName(LI);
+ LI->replaceAllUsesWith(NSI);
+ }
+ UI->eraseFromParent();
+ }
+
+ GV->eraseFromParent();
+ return true;
+}
+
+
+/// ProcessInternalGlobal - Analyze the specified global variable and optimize
+/// it if possible. If we make a change, return true.
+bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
+ Module::global_iterator &GVI) {
+ SmallPtrSet<PHINode*, 16> PHIUsers;
+ GlobalStatus GS;
+ GV->removeDeadConstantUsers();
+
+ if (GV->use_empty()) {
+ DOUT << "GLOBAL DEAD: " << *GV;
+ GV->eraseFromParent();
+ ++NumDeleted;
+ return true;
+ }
+
+ if (!AnalyzeGlobal(GV, GS, PHIUsers)) {
+#if 0
+ cerr << "Global: " << *GV;
+ cerr << " isLoaded = " << GS.isLoaded << "\n";
+ cerr << " StoredType = ";
+ switch (GS.StoredType) {
+ case GlobalStatus::NotStored: cerr << "NEVER STORED\n"; break;
+ case GlobalStatus::isInitializerStored: cerr << "INIT STORED\n"; break;
+ case GlobalStatus::isStoredOnce: cerr << "STORED ONCE\n"; break;
+ case GlobalStatus::isStored: cerr << "stored\n"; break;
+ }
+ if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue)
+ cerr << " StoredOnceValue = " << *GS.StoredOnceValue << "\n";
+ if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions)
+ cerr << " AccessingFunction = " << GS.AccessingFunction->getName()
+ << "\n";
+ cerr << " HasMultipleAccessingFunctions = "
+ << GS.HasMultipleAccessingFunctions << "\n";
+ cerr << " HasNonInstructionUser = " << GS.HasNonInstructionUser<<"\n";
+ cerr << "\n";
+#endif
+
+ // If this is a first class global and has only one accessing function
+ // and this function is main (which we know is not recursive we can make
+ // this global a local variable) we replace the global with a local alloca
+ // in this function.
+ //
+ // NOTE: It doesn't make sense to promote non single-value types since we
+ // are just replacing static memory to stack memory.
+ if (!GS.HasMultipleAccessingFunctions &&
+ GS.AccessingFunction && !GS.HasNonInstructionUser &&
+ GV->getType()->getElementType()->isSingleValueType() &&
+ GS.AccessingFunction->getName() == "main" &&
+ GS.AccessingFunction->hasExternalLinkage()) {
+ DOUT << "LOCALIZING GLOBAL: " << *GV;
+ Instruction* FirstI = GS.AccessingFunction->getEntryBlock().begin();
+ const Type* ElemTy = GV->getType()->getElementType();
+ // FIXME: Pass Global's alignment when globals have alignment
+ AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), FirstI);
+ if (!isa<UndefValue>(GV->getInitializer()))
+ new StoreInst(GV->getInitializer(), Alloca, FirstI);
+
+ GV->replaceAllUsesWith(Alloca);
+ GV->eraseFromParent();
+ ++NumLocalized;
+ return true;
+ }
+
+ // If the global is never loaded (but may be stored to), it is dead.
+ // Delete it now.
+ if (!GS.isLoaded) {
+ DOUT << "GLOBAL NEVER LOADED: " << *GV;
+
+ // Delete any stores we can find to the global. We may not be able to
+ // make it completely dead though.
+ bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer());
+
+ // If the global is dead now, delete it.
+ if (GV->use_empty()) {
+ GV->eraseFromParent();
+ ++NumDeleted;
+ Changed = true;
+ }
+ return Changed;
+
+ } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
+ DOUT << "MARKING CONSTANT: " << *GV;
+ GV->setConstant(true);
+
+ // Clean up any obviously simplifiable users now.
+ CleanupConstantGlobalUsers(GV, GV->getInitializer());
+
+ // If the global is dead now, just nuke it.
+ if (GV->use_empty()) {
+ DOUT << " *** Marking constant allowed us to simplify "
+ << "all users and delete global!\n";
+ GV->eraseFromParent();
+ ++NumDeleted;
+ }
+
+ ++NumMarked;
+ return true;
+ } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
+ if (GlobalVariable *FirstNewGV = SRAGlobal(GV,
+ getAnalysis<TargetData>())) {
+ GVI = FirstNewGV; // Don't skip the newly produced globals!
+ return true;
+ }
+ } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
+ // If the initial value for the global was an undef value, and if only
+ // one other value was stored into it, we can just change the
+ // initializer to be the stored value, then delete all stores to the
+ // global. This allows us to mark it constant.
+ if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
+ if (isa<UndefValue>(GV->getInitializer())) {
+ // Change the initial value here.
+ GV->setInitializer(SOVConstant);
+
+ // Clean up any obviously simplifiable users now.
+ CleanupConstantGlobalUsers(GV, GV->getInitializer());
+
+ if (GV->use_empty()) {
+ DOUT << " *** Substituting initializer allowed us to "
+ << "simplify all users and delete global!\n";
+ GV->eraseFromParent();
+ ++NumDeleted;
+ } else {
+ GVI = GV;
+ }
+ ++NumSubstitute;
+ return true;
+ }
+
+ // Try to optimize globals based on the knowledge that only one value
+ // (besides its initializer) is ever stored to the global.
+ if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI,
+ getAnalysis<TargetData>()))
+ return true;
+
+ // Otherwise, if the global was not a boolean, we can shrink it to be a
+ // boolean.
+ if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
+ if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
+ ++NumShrunkToBool;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// OnlyCalledDirectly - Return true if the specified function is only called
+/// directly. In other words, its address is never taken.
+static bool OnlyCalledDirectly(Function *F) {
+ for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
+ Instruction *User = dyn_cast<Instruction>(*UI);
+ if (!User) return false;
+ if (!isa<CallInst>(User) && !isa<InvokeInst>(User)) return false;
+
+ // See if the function address is passed as an argument.
+ for (User::op_iterator i = User->op_begin() + 1, e = User->op_end();
+ i != e; ++i)
+ if (*i == F) return false;
+ }
+ return true;
+}
+
+/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified
+/// function, changing them to FastCC.
+static void ChangeCalleesToFastCall(Function *F) {
+ for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
+ CallSite User(cast<Instruction>(*UI));
+ User.setCallingConv(CallingConv::Fast);
+ }
+}
+
+static AttrListPtr StripNest(const AttrListPtr &Attrs) {
+ for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
+ if ((Attrs.getSlot(i).Attrs & Attribute::Nest) == 0)
+ continue;
+
+ // There can be only one.
+ return Attrs.removeAttr(Attrs.getSlot(i).Index, Attribute::Nest);
+ }
+
+ return Attrs;
+}
+
+static void RemoveNestAttribute(Function *F) {
+ F->setAttributes(StripNest(F->getAttributes()));
+ for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
+ CallSite User(cast<Instruction>(*UI));
+ User.setAttributes(StripNest(User.getAttributes()));
+ }
+}
+
+bool GlobalOpt::OptimizeFunctions(Module &M) {
+ bool Changed = false;
+ // Optimize functions.
+ for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
+ Function *F = FI++;
+ // Functions without names cannot be referenced outside this module.
+ if (!F->hasName() && !F->isDeclaration())
+ F->setLinkage(GlobalValue::InternalLinkage);
+ F->removeDeadConstantUsers();
+ if (F->use_empty() && (F->hasLocalLinkage() ||
+ F->hasLinkOnceLinkage())) {
+ M.getFunctionList().erase(F);
+ Changed = true;
+ ++NumFnDeleted;
+ } else if (F->hasLocalLinkage()) {
+ if (F->getCallingConv() == CallingConv::C && !F->isVarArg() &&
+ OnlyCalledDirectly(F)) {
+ // If this function has C calling conventions, is not a varargs
+ // function, and is only called directly, promote it to use the Fast
+ // calling convention.
+ F->setCallingConv(CallingConv::Fast);
+ ChangeCalleesToFastCall(F);
+ ++NumFastCallFns;
+ Changed = true;
+ }
+
+ if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
+ OnlyCalledDirectly(F)) {
+ // The function is not used by a trampoline intrinsic, so it is safe
+ // to remove the 'nest' attribute.
+ RemoveNestAttribute(F);
+ ++NumNestRemoved;
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
+}
+
+bool GlobalOpt::OptimizeGlobalVars(Module &M) {
+ bool Changed = false;
+ for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+ GVI != E; ) {
+ GlobalVariable *GV = GVI++;
+ // Global variables without names cannot be referenced outside this module.
+ if (!GV->hasName() && !GV->isDeclaration())
+ GV->setLinkage(GlobalValue::InternalLinkage);
+ if (!GV->isConstant() && GV->hasLocalLinkage() &&
+ GV->hasInitializer())
+ Changed |= ProcessInternalGlobal(GV, GVI);
+ }
+ return Changed;
+}
+
+/// FindGlobalCtors - Find the llvm.globalctors list, verifying that all
+/// initializers have an init priority of 65535.
+GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (I->getName() == "llvm.global_ctors") {
+ // Found it, verify it's an array of { int, void()* }.
+ const ArrayType *ATy =dyn_cast<ArrayType>(I->getType()->getElementType());
+ if (!ATy) return 0;
+ const StructType *STy = dyn_cast<StructType>(ATy->getElementType());
+ if (!STy || STy->getNumElements() != 2 ||
+ STy->getElementType(0) != Type::Int32Ty) return 0;
+ const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1));
+ if (!PFTy) return 0;
+ const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType());
+ if (!FTy || FTy->getReturnType() != Type::VoidTy || FTy->isVarArg() ||
+ FTy->getNumParams() != 0)
+ return 0;
+
+ // Verify that the initializer is simple enough for us to handle.
+ if (!I->hasInitializer()) return 0;
+ ConstantArray *CA = dyn_cast<ConstantArray>(I->getInitializer());
+ if (!CA) return 0;
+ for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(*i)) {
+ if (isa<ConstantPointerNull>(CS->getOperand(1)))
+ continue;
+
+ // Must have a function or null ptr.
+ if (!isa<Function>(CS->getOperand(1)))
+ return 0;
+
+ // Init priority must be standard.
+ ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0));
+ if (!CI || CI->getZExtValue() != 65535)
+ return 0;
+ } else {
+ return 0;
+ }
+
+ return I;
+ }
+ return 0;
+}
+
+/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand,
+/// return a list of the functions and null terminator as a vector.
+static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
+ ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
+ std::vector<Function*> Result;
+ Result.reserve(CA->getNumOperands());
+ for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
+ ConstantStruct *CS = cast<ConstantStruct>(*i);
+ Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
+ }
+ return Result;
+}
+
+/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
+/// specified array, returning the new global to use.
+static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
+ const std::vector<Function*> &Ctors) {
+ // If we made a change, reassemble the initializer list.
+ std::vector<Constant*> CSVals;
+ CSVals.push_back(ConstantInt::get(Type::Int32Ty, 65535));
+ CSVals.push_back(0);
+
+ // Create the new init list.
+ std::vector<Constant*> CAList;
+ for (unsigned i = 0, e = Ctors.size(); i != e; ++i) {
+ if (Ctors[i]) {
+ CSVals[1] = Ctors[i];
+ } else {
+ const Type *FTy = FunctionType::get(Type::VoidTy,
+ std::vector<const Type*>(), false);
+ const PointerType *PFTy = PointerType::getUnqual(FTy);
+ CSVals[1] = Constant::getNullValue(PFTy);
+ CSVals[0] = ConstantInt::get(Type::Int32Ty, 2147483647);
+ }
+ CAList.push_back(ConstantStruct::get(CSVals));
+ }
+
+ // Create the array initializer.
+ const Type *StructTy =
+ cast<ArrayType>(GCL->getType()->getElementType())->getElementType();
+ Constant *CA = ConstantArray::get(ArrayType::get(StructTy, CAList.size()),
+ CAList);
+
+ // If we didn't change the number of elements, don't create a new GV.
+ if (CA->getType() == GCL->getInitializer()->getType()) {
+ GCL->setInitializer(CA);
+ return GCL;
+ }
+
+ // Create the new global and insert it next to the existing list.
+ GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
+ GCL->getLinkage(), CA, "",
+ (Module *)NULL,
+ GCL->isThreadLocal());
+ GCL->getParent()->getGlobalList().insert(GCL, NGV);
+ NGV->takeName(GCL);
+
+ // Nuke the old list, replacing any uses with the new one.
+ if (!GCL->use_empty()) {
+ Constant *V = NGV;
+ if (V->getType() != GCL->getType())
+ V = ConstantExpr::getBitCast(V, GCL->getType());
+ GCL->replaceAllUsesWith(V);
+ }
+ GCL->eraseFromParent();
+
+ if (Ctors.size())
+ return NGV;
+ else
+ return 0;
+}
+
+
+static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues,
+ Value *V) {
+ if (Constant *CV = dyn_cast<Constant>(V)) return CV;
+ Constant *R = ComputedValues[V];
+ assert(R && "Reference to an uncomputed value!");
+ return R;
+}
+
+/// isSimpleEnoughPointerToCommit - Return true if this constant is simple
+/// enough for us to understand. In particular, if it is a cast of something,
+/// we punt. We basically just support direct accesses to globals and GEP's of
+/// globals. This should be kept up to date with CommitValueTo.
+static bool isSimpleEnoughPointerToCommit(Constant *C) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+ if (!GV->hasExternalLinkage() && !GV->hasLocalLinkage())
+ return false; // do not allow weak/linkonce/dllimport/dllexport linkage.
+ return !GV->isDeclaration(); // reject external globals.
+ }
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ // Handle a constantexpr gep.
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ isa<GlobalVariable>(CE->getOperand(0))) {
+ GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+ if (!GV->hasExternalLinkage() && !GV->hasLocalLinkage())
+ return false; // do not allow weak/linkonce/dllimport/dllexport linkage.
+ return GV->hasInitializer() &&
+ ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+ }
+ return false;
+}
+
+/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global
+/// initializer. This returns 'Init' modified to reflect 'Val' stored into it.
+/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.
+static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
+ ConstantExpr *Addr, unsigned OpNo) {
+ // Base case of the recursion.
+ if (OpNo == Addr->getNumOperands()) {
+ assert(Val->getType() == Init->getType() && "Type mismatch!");
+ return Val;
+ }
+
+ if (const StructType *STy = dyn_cast<StructType>(Init->getType())) {
+ std::vector<Constant*> Elts;
+
+ // Break up the constant into its elements.
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) {
+ for (User::op_iterator i = CS->op_begin(), e = CS->op_end(); i != e; ++i)
+ Elts.push_back(cast<Constant>(*i));
+ } else if (isa<ConstantAggregateZero>(Init)) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ Elts.push_back(Constant::getNullValue(STy->getElementType(i)));
+ } else if (isa<UndefValue>(Init)) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ Elts.push_back(UndefValue::get(STy->getElementType(i)));
+ } else {
+ assert(0 && "This code is out of sync with "
+ " ConstantFoldLoadThroughGEPConstantExpr");
+ }
+
+ // Replace the element that we are supposed to.
+ ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
+ unsigned Idx = CU->getZExtValue();
+ assert(Idx < STy->getNumElements() && "Struct index out of range!");
+ Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
+
+ // Return the modified struct.
+ return ConstantStruct::get(&Elts[0], Elts.size(), STy->isPacked());
+ } else {
+ ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
+ const ArrayType *ATy = cast<ArrayType>(Init->getType());
+
+ // Break up the array into elements.
+ std::vector<Constant*> Elts;
+ if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
+ for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
+ Elts.push_back(cast<Constant>(*i));
+ } else if (isa<ConstantAggregateZero>(Init)) {
+ Constant *Elt = Constant::getNullValue(ATy->getElementType());
+ Elts.assign(ATy->getNumElements(), Elt);
+ } else if (isa<UndefValue>(Init)) {
+ Constant *Elt = UndefValue::get(ATy->getElementType());
+ Elts.assign(ATy->getNumElements(), Elt);
+ } else {
+ assert(0 && "This code is out of sync with "
+ " ConstantFoldLoadThroughGEPConstantExpr");
+ }
+
+ assert(CI->getZExtValue() < ATy->getNumElements());
+ Elts[CI->getZExtValue()] =
+ EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
+ return ConstantArray::get(ATy, Elts);
+ }
+}
+
+/// CommitValueTo - We have decided that Addr (which satisfies the predicate
+/// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen.
+static void CommitValueTo(Constant *Val, Constant *Addr) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
+ assert(GV->hasInitializer());
+ GV->setInitializer(Val);
+ return;
+ }
+
+ ConstantExpr *CE = cast<ConstantExpr>(Addr);
+ GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+
+ Constant *Init = GV->getInitializer();
+ Init = EvaluateStoreInto(Init, Val, CE, 2);
+ GV->setInitializer(Init);
+}
+
+/// ComputeLoadResult - Return the value that would be computed by a load from
+/// P after the stores reflected by 'memory' have been performed. If we can't
+/// decide, return null.
+static Constant *ComputeLoadResult(Constant *P,
+ const DenseMap<Constant*, Constant*> &Memory) {
+ // If this memory location has been recently stored, use the stored value: it
+ // is the most up-to-date.
+ DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P);
+ if (I != Memory.end()) return I->second;
+
+ // Access it.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
+ if (GV->hasInitializer())
+ return GV->getInitializer();
+ return 0;
+ }
+
+ // Handle a constantexpr getelementptr.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ isa<GlobalVariable>(CE->getOperand(0))) {
+ GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+ if (GV->hasInitializer())
+ return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+ }
+
+ return 0; // don't know how to evaluate.
+}
+
+/// EvaluateFunction - Evaluate a call to function F, returning true if
+/// successful, false if we can't evaluate it. ActualArgs contains the formal
+/// arguments for the function.
+static bool EvaluateFunction(Function *F, Constant *&RetVal,
+ const std::vector<Constant*> &ActualArgs,
+ std::vector<Function*> &CallStack,
+ DenseMap<Constant*, Constant*> &MutatedMemory,
+ std::vector<GlobalVariable*> &AllocaTmps) {
+ // Check to see if this function is already executing (recursion). If so,
+ // bail out. TODO: we might want to accept limited recursion.
+ if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
+ return false;
+
+ CallStack.push_back(F);
+
+ /// Values - As we compute SSA register values, we store their contents here.
+ DenseMap<Value*, Constant*> Values;
+
+ // Initialize arguments to the incoming values specified.
+ unsigned ArgNo = 0;
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
+ ++AI, ++ArgNo)
+ Values[AI] = ActualArgs[ArgNo];
+
+ /// ExecutedBlocks - We only handle non-looping, non-recursive code. As such,
+ /// we can only evaluate any one basic block at most once. This set keeps
+ /// track of what we have executed so we can detect recursive cases etc.
+ SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
+
+ // CurInst - The current instruction we're evaluating.
+ BasicBlock::iterator CurInst = F->begin()->begin();
+
+ // This is the main evaluation loop.
+ while (1) {
+ Constant *InstResult = 0;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
+ if (SI->isVolatile()) return false; // no volatile accesses.
+ Constant *Ptr = getVal(Values, SI->getOperand(1));
+ if (!isSimpleEnoughPointerToCommit(Ptr))
+ // If this is too complex for us to commit, reject it.
+ return false;
+ Constant *Val = getVal(Values, SI->getOperand(0));
+ MutatedMemory[Ptr] = Val;
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
+ InstResult = ConstantExpr::get(BO->getOpcode(),
+ getVal(Values, BO->getOperand(0)),
+ getVal(Values, BO->getOperand(1)));
+ } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
+ InstResult = ConstantExpr::getCompare(CI->getPredicate(),
+ getVal(Values, CI->getOperand(0)),
+ getVal(Values, CI->getOperand(1)));
+ } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
+ InstResult = ConstantExpr::getCast(CI->getOpcode(),
+ getVal(Values, CI->getOperand(0)),
+ CI->getType());
+ } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
+ InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)),
+ getVal(Values, SI->getOperand(1)),
+ getVal(Values, SI->getOperand(2)));
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
+ Constant *P = getVal(Values, GEP->getOperand(0));
+ SmallVector<Constant*, 8> GEPOps;
+ for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
+ i != e; ++i)
+ GEPOps.push_back(getVal(Values, *i));
+ InstResult = ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size());
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
+ if (LI->isVolatile()) return false; // no volatile accesses.
+ InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)),
+ MutatedMemory);
+ if (InstResult == 0) return false; // Could not evaluate load.
+ } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
+ if (AI->isArrayAllocation()) return false; // Cannot handle array allocs.
+ const Type *Ty = AI->getType()->getElementType();
+ AllocaTmps.push_back(new GlobalVariable(Ty, false,
+ GlobalValue::InternalLinkage,
+ UndefValue::get(Ty),
+ AI->getName()));
+ InstResult = AllocaTmps.back();
+ } else if (CallInst *CI = dyn_cast<CallInst>(CurInst)) {
+
+ // Debug info can safely be ignored here.
+ if (isa<DbgInfoIntrinsic>(CI)) {
+ ++CurInst;
+ continue;
+ }
+
+ // Cannot handle inline asm.
+ if (isa<InlineAsm>(CI->getOperand(0))) return false;
+
+ // Resolve function pointers.
+ Function *Callee = dyn_cast<Function>(getVal(Values, CI->getOperand(0)));
+ if (!Callee) return false; // Cannot resolve.
+
+ std::vector<Constant*> Formals;
+ for (User::op_iterator i = CI->op_begin() + 1, e = CI->op_end();
+ i != e; ++i)
+ Formals.push_back(getVal(Values, *i));
+
+ if (Callee->isDeclaration()) {
+ // If this is a function we can constant fold, do it.
+ if (Constant *C = ConstantFoldCall(Callee, &Formals[0],
+ Formals.size())) {
+ InstResult = C;
+ } else {
+ return false;
+ }
+ } else {
+ if (Callee->getFunctionType()->isVarArg())
+ return false;
+
+ Constant *RetVal;
+ // Execute the call, if successful, use the return value.
+ if (!EvaluateFunction(Callee, RetVal, Formals, CallStack,
+ MutatedMemory, AllocaTmps))
+ return false;
+ InstResult = RetVal;
+ }
+ } else if (isa<TerminatorInst>(CurInst)) {
+ BasicBlock *NewBB = 0;
+ if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
+ if (BI->isUnconditional()) {
+ NewBB = BI->getSuccessor(0);
+ } else {
+ ConstantInt *Cond =
+ dyn_cast<ConstantInt>(getVal(Values, BI->getCondition()));
+ if (!Cond) return false; // Cannot determine.
+
+ NewBB = BI->getSuccessor(!Cond->getZExtValue());
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
+ ConstantInt *Val =
+ dyn_cast<ConstantInt>(getVal(Values, SI->getCondition()));
+ if (!Val) return false; // Cannot determine.
+ NewBB = SI->getSuccessor(SI->findCaseValue(Val));
+ } else if (ReturnInst *RI = dyn_cast<ReturnInst>(CurInst)) {
+ if (RI->getNumOperands())
+ RetVal = getVal(Values, RI->getOperand(0));
+
+ CallStack.pop_back(); // return from fn.
+ return true; // We succeeded at evaluating this ctor!
+ } else {
+ // invoke, unwind, unreachable.
+ return false; // Cannot handle this terminator.
+ }
+
+ // Okay, we succeeded in evaluating this control flow. See if we have
+ // executed the new block before. If so, we have a looping function,
+ // which we cannot evaluate in reasonable time.
+ if (!ExecutedBlocks.insert(NewBB))
+ return false; // looped!
+
+ // Okay, we have never been in this block before. Check to see if there
+ // are any PHI nodes. If so, evaluate them with information about where
+ // we came from.
+ BasicBlock *OldBB = CurInst->getParent();
+ CurInst = NewBB->begin();
+ PHINode *PN;
+ for (; (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
+ Values[PN] = getVal(Values, PN->getIncomingValueForBlock(OldBB));
+
+ // Do NOT increment CurInst. We know that the terminator had no value.
+ continue;
+ } else {
+ // Did not know how to evaluate this!
+ return false;
+ }
+
+ if (!CurInst->use_empty())
+ Values[CurInst] = InstResult;
+
+ // Advance program counter.
+ ++CurInst;
+ }
+}
+
+/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
+/// we can. Return true if we can, false otherwise.
+static bool EvaluateStaticConstructor(Function *F) {
+ /// MutatedMemory - For each store we execute, we update this map. Loads
+ /// check this to get the most up-to-date value. If evaluation is successful,
+ /// this state is committed to the process.
+ DenseMap<Constant*, Constant*> MutatedMemory;
+
+ /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
+ /// to represent its body. This vector is needed so we can delete the
+ /// temporary globals when we are done.
+ std::vector<GlobalVariable*> AllocaTmps;
+
+ /// CallStack - This is used to detect recursion. In pathological situations
+ /// we could hit exponential behavior, but at least there is nothing
+ /// unbounded.
+ std::vector<Function*> CallStack;
+
+ // Call the function.
+ Constant *RetValDummy;
+ bool EvalSuccess = EvaluateFunction(F, RetValDummy, std::vector<Constant*>(),
+ CallStack, MutatedMemory, AllocaTmps);
+ if (EvalSuccess) {
+ // We succeeded at evaluation: commit the result.
+ DOUT << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
+ << F->getName() << "' to " << MutatedMemory.size()
+ << " stores.\n";
+ for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(),
+ E = MutatedMemory.end(); I != E; ++I)
+ CommitValueTo(I->second, I->first);
+ }
+
+ // At this point, we are done interpreting. If we created any 'alloca'
+ // temporaries, release them now.
+ while (!AllocaTmps.empty()) {
+ GlobalVariable *Tmp = AllocaTmps.back();
+ AllocaTmps.pop_back();
+
+ // If there are still users of the alloca, the program is doing something
+ // silly, e.g. storing the address of the alloca somewhere and using it
+ // later. Since this is undefined, we'll just make it be null.
+ if (!Tmp->use_empty())
+ Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
+ delete Tmp;
+ }
+
+ return EvalSuccess;
+}
+
+
+
+/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible.
+/// Return true if anything changed.
+bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
+ std::vector<Function*> Ctors = ParseGlobalCtors(GCL);
+ bool MadeChange = false;
+ if (Ctors.empty()) return false;
+
+ // Loop over global ctors, optimizing them when we can.
+ for (unsigned i = 0; i != Ctors.size(); ++i) {
+ Function *F = Ctors[i];
+ // Found a null terminator in the middle of the list, prune off the rest of
+ // the list.
+ if (F == 0) {
+ if (i != Ctors.size()-1) {
+ Ctors.resize(i+1);
+ MadeChange = true;
+ }
+ break;
+ }
+
+ // We cannot simplify external ctor functions.
+ if (F->empty()) continue;
+
+ // If we can evaluate the ctor at compile time, do.
+ if (EvaluateStaticConstructor(F)) {
+ Ctors.erase(Ctors.begin()+i);
+ MadeChange = true;
+ --i;
+ ++NumCtorsEvaluated;
+ continue;
+ }
+ }
+
+ if (!MadeChange) return false;
+
+ GCL = InstallGlobalCtors(GCL, Ctors);
+ return true;
+}
+
+bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
+ bool Changed = false;
+
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E;) {
+ Module::alias_iterator J = I++;
+ // Aliases without names cannot be referenced outside this module.
+ if (!J->hasName() && !J->isDeclaration())
+ J->setLinkage(GlobalValue::InternalLinkage);
+ // If the aliasee may change at link time, nothing can be done - bail out.
+ if (J->mayBeOverridden())
+ continue;
+
+ Constant *Aliasee = J->getAliasee();
+ GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
+ Target->removeDeadConstantUsers();
+ bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse();
+
+ // Make all users of the alias use the aliasee instead.
+ if (!J->use_empty()) {
+ J->replaceAllUsesWith(Aliasee);
+ ++NumAliasesResolved;
+ Changed = true;
+ }
+
+ // If the aliasee has internal linkage, give it the name and linkage
+ // of the alias, and delete the alias. This turns:
+ // define internal ... @f(...)
+ // @a = alias ... @f
+ // into:
+ // define ... @a(...)
+ if (!Target->hasLocalLinkage())
+ continue;
+
+ // The transform is only useful if the alias does not have internal linkage.
+ if (J->hasLocalLinkage())
+ continue;
+
+ // Do not perform the transform if multiple aliases potentially target the
+ // aliasee. This check also ensures that it is safe to replace the section
+ // and other attributes of the aliasee with those of the alias.
+ if (!hasOneUse)
+ continue;
+
+ // Give the aliasee the name, linkage and other attributes of the alias.
+ Target->takeName(J);
+ Target->setLinkage(J->getLinkage());
+ Target->GlobalValue::copyAttributesFrom(J);
+
+ // Delete the alias.
+ M.getAliasList().erase(J);
+ ++NumAliasesRemoved;
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+bool GlobalOpt::runOnModule(Module &M) {
+ bool Changed = false;
+
+ // Try to find the llvm.globalctors list.
+ GlobalVariable *GlobalCtors = FindGlobalCtors(M);
+
+ bool LocalChange = true;
+ while (LocalChange) {
+ LocalChange = false;
+
+ // Delete functions that are trivially dead, ccc -> fastcc
+ LocalChange |= OptimizeFunctions(M);
+
+ // Optimize global_ctors list.
+ if (GlobalCtors)
+ LocalChange |= OptimizeGlobalCtorsList(GlobalCtors);
+
+ // Optimize non-address-taken globals.
+ LocalChange |= OptimizeGlobalVars(M);
+
+ // Resolve aliases, when possible.
+ LocalChange |= OptimizeGlobalAliases(M);
+ Changed |= LocalChange;
+ }
+
+ // TODO: Move all global ctors functions to the end of the module for code
+ // layout.
+
+ return Changed;
+}
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
new file mode 100644
index 0000000..2dc8558
--- /dev/null
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -0,0 +1,277 @@
+//===-- IPConstantPropagation.cpp - Propagate constants through calls -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements an _extremely_ simple interprocedural constant
+// propagation pass. It could certainly be improved in many different ways,
+// like using a worklist. This pass makes arguments dead, but does not remove
+// them. The existing dead argument elimination pass should be run after this
+// to clean up the mess.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ipconstprop"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+STATISTIC(NumArgumentsProped, "Number of args turned into constants");
+STATISTIC(NumReturnValProped, "Number of return values turned into constants");
+
+namespace {
+ /// IPCP - The interprocedural constant propagation pass
+ ///
+ struct VISIBILITY_HIDDEN IPCP : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ IPCP() : ModulePass(&ID) {}
+
+ bool runOnModule(Module &M);
+ private:
+ bool PropagateConstantsIntoArguments(Function &F);
+ bool PropagateConstantReturn(Function &F);
+ };
+}
+
+char IPCP::ID = 0;
+static RegisterPass<IPCP>
+X("ipconstprop", "Interprocedural constant propagation");
+
+ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); }
+
+bool IPCP::runOnModule(Module &M) {
+ bool Changed = false;
+ bool LocalChange = true;
+
+ // FIXME: instead of using smart algorithms, we just iterate until we stop
+ // making changes.
+ while (LocalChange) {
+ LocalChange = false;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!I->isDeclaration()) {
+ // Delete any klingons.
+ I->removeDeadConstantUsers();
+ if (I->hasLocalLinkage())
+ LocalChange |= PropagateConstantsIntoArguments(*I);
+ Changed |= PropagateConstantReturn(*I);
+ }
+ Changed |= LocalChange;
+ }
+ return Changed;
+}
+
+/// PropagateConstantsIntoArguments - Look at all uses of the specified
+/// function. If all uses are direct call sites, and all pass a particular
+/// constant in for an argument, propagate that constant in as the argument.
+///
+bool IPCP::PropagateConstantsIntoArguments(Function &F) {
+ if (F.arg_empty() || F.use_empty()) return false; // No arguments? Early exit.
+
+ // For each argument, keep track of its constant value and whether it is a
+ // constant or not. The bool is driven to true when found to be non-constant.
+ SmallVector<std::pair<Constant*, bool>, 16> ArgumentConstants;
+ ArgumentConstants.resize(F.arg_size());
+
+ unsigned NumNonconstant = 0;
+ for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) {
+ // Used by a non-instruction, or not the callee of a function, do not
+ // transform.
+ if (!isa<CallInst>(*UI) && !isa<InvokeInst>(*UI))
+ return false;
+
+ CallSite CS = CallSite::get(cast<Instruction>(*UI));
+ if (!CS.isCallee(UI))
+ return false;
+
+ // Check out all of the potentially constant arguments. Note that we don't
+ // inspect varargs here.
+ CallSite::arg_iterator AI = CS.arg_begin();
+ Function::arg_iterator Arg = F.arg_begin();
+ for (unsigned i = 0, e = ArgumentConstants.size(); i != e;
+ ++i, ++AI, ++Arg) {
+
+ // If this argument is known non-constant, ignore it.
+ if (ArgumentConstants[i].second)
+ continue;
+
+ Constant *C = dyn_cast<Constant>(*AI);
+ if (C && ArgumentConstants[i].first == 0) {
+ ArgumentConstants[i].first = C; // First constant seen.
+ } else if (C && ArgumentConstants[i].first == C) {
+ // Still the constant value we think it is.
+ } else if (*AI == &*Arg) {
+ // Ignore recursive calls passing argument down.
+ } else {
+ // Argument became non-constant. If all arguments are non-constant now,
+ // give up on this function.
+ if (++NumNonconstant == ArgumentConstants.size())
+ return false;
+ ArgumentConstants[i].second = true;
+ }
+ }
+ }
+
+ // If we got to this point, there is a constant argument!
+ assert(NumNonconstant != ArgumentConstants.size());
+ bool MadeChange = false;
+ Function::arg_iterator AI = F.arg_begin();
+ for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) {
+ // Do we have a constant argument?
+ if (ArgumentConstants[i].second || AI->use_empty())
+ continue;
+
+ Value *V = ArgumentConstants[i].first;
+ if (V == 0) V = UndefValue::get(AI->getType());
+ AI->replaceAllUsesWith(V);
+ ++NumArgumentsProped;
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+
+// Check to see if this function returns one or more constants. If so, replace
+// all callers that use those return values with the constant value. This will
+// leave in the actual return values and instructions, but deadargelim will
+// clean that up.
+//
+// Additionally if a function always returns one of its arguments directly,
+// callers will be updated to use the value they pass in directly instead of
+// using the return value.
+bool IPCP::PropagateConstantReturn(Function &F) {
+ if (F.getReturnType() == Type::VoidTy)
+ return false; // No return value.
+
+ // If this function could be overridden later in the link stage, we can't
+ // propagate information about its results into callers.
+ if (F.mayBeOverridden())
+ return false;
+
+ // Check to see if this function returns a constant.
+ SmallVector<Value *,4> RetVals;
+ const StructType *STy = dyn_cast<StructType>(F.getReturnType());
+ if (STy)
+ for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i)
+ RetVals.push_back(UndefValue::get(STy->getElementType(i)));
+ else
+ RetVals.push_back(UndefValue::get(F.getReturnType()));
+
+ unsigned NumNonConstant = 0;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ for (unsigned i = 0, e = RetVals.size(); i != e; ++i) {
+ // Already found conflicting return values?
+ Value *RV = RetVals[i];
+ if (!RV)
+ continue;
+
+ // Find the returned value
+ Value *V;
+ if (!STy)
+ V = RI->getOperand(i);
+ else
+ V = FindInsertedValue(RI->getOperand(0), i);
+
+ if (V) {
+ // Ignore undefs, we can change them into anything
+ if (isa<UndefValue>(V))
+ continue;
+
+ // Try to see if all the rets return the same constant or argument.
+ if (isa<Constant>(V) || isa<Argument>(V)) {
+ if (isa<UndefValue>(RV)) {
+ // No value found yet? Try the current one.
+ RetVals[i] = V;
+ continue;
+ }
+ // Returning the same value? Good.
+ if (RV == V)
+ continue;
+ }
+ }
+ // Different or no known return value? Don't propagate this return
+ // value.
+ RetVals[i] = 0;
+ // All values non constant? Stop looking.
+ if (++NumNonConstant == RetVals.size())
+ return false;
+ }
+ }
+
+ // If we got here, the function returns at least one constant value. Loop
+ // over all users, replacing any uses of the return value with the returned
+ // constant.
+ bool MadeChange = false;
+ for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) {
+ CallSite CS = CallSite::get(*UI);
+ Instruction* Call = CS.getInstruction();
+
+ // Not a call instruction or a call instruction that's not calling F
+ // directly?
+ if (!Call || !CS.isCallee(UI))
+ continue;
+
+ // Call result not used?
+ if (Call->use_empty())
+ continue;
+
+ MadeChange = true;
+
+ if (STy == 0) {
+ Value* New = RetVals[0];
+ if (Argument *A = dyn_cast<Argument>(New))
+ // Was an argument returned? Then find the corresponding argument in
+ // the call instruction and use that.
+ New = CS.getArgument(A->getArgNo());
+ Call->replaceAllUsesWith(New);
+ continue;
+ }
+
+ for (Value::use_iterator I = Call->use_begin(), E = Call->use_end();
+ I != E;) {
+ Instruction *Ins = dyn_cast<Instruction>(*I);
+
+ // Increment now, so we can remove the use
+ ++I;
+
+ // Not an instruction? Ignore
+ if (!Ins)
+ continue;
+
+ // Find the index of the retval to replace with
+ int index = -1;
+ if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Ins))
+ if (EV->hasIndices())
+ index = *EV->idx_begin();
+
+ // If this use uses a specific return value, and we have a replacement,
+ // replace it.
+ if (index != -1) {
+ Value *New = RetVals[index];
+ if (New) {
+ if (Argument *A = dyn_cast<Argument>(New))
+ // Was an argument returned? Then find the corresponding argument in
+ // the call instruction and use that.
+ New = CS.getArgument(A->getArgNo());
+ Ins->replaceAllUsesWith(New);
+ Ins->eraseFromParent();
+ }
+ }
+ }
+ }
+
+ if (MadeChange) ++NumReturnValProped;
+ return MadeChange;
+}
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
new file mode 100644
index 0000000..43066076
--- /dev/null
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -0,0 +1,75 @@
+//===-- Scalar.cpp --------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the C bindings for libLLVMIPO.a, which implements
+// several transformations over the LLVM intermediate representation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Transforms/IPO.h"
+#include "llvm/PassManager.h"
+#include "llvm/Transforms/IPO.h"
+
+using namespace llvm;
+
+void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createArgumentPromotionPass());
+}
+
+void LLVMAddConstantMergePass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createConstantMergePass());
+}
+
+void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createDeadArgEliminationPass());
+}
+
+void LLVMAddDeadTypeEliminationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createDeadTypeEliminationPass());
+}
+
+void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createFunctionAttrsPass());
+}
+
+void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createFunctionInliningPass());
+}
+
+void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createGlobalDCEPass());
+}
+
+void LLVMAddGlobalOptimizerPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createGlobalOptimizerPass());
+}
+
+void LLVMAddIPConstantPropagationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createIPConstantPropagationPass());
+}
+
+void LLVMAddLowerSetJmpPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLowerSetJmpPass());
+}
+
+void LLVMAddPruneEHPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createPruneEHPass());
+}
+
+void LLVMAddRaiseAllocationsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createRaiseAllocationsPass());
+}
+
+void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createStripDeadPrototypesPass());
+}
+
+void LLVMAddStripSymbolsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createStripSymbolsPass());
+}
diff --git a/lib/Transforms/IPO/IndMemRemoval.cpp b/lib/Transforms/IPO/IndMemRemoval.cpp
new file mode 100644
index 0000000..b55dea2
--- /dev/null
+++ b/lib/Transforms/IPO/IndMemRemoval.cpp
@@ -0,0 +1,89 @@
+//===-- IndMemRemoval.cpp - Remove indirect allocations and frees ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass finds places where memory allocation functions may escape into
+// indirect land. Some transforms are much easier (aka possible) only if free
+// or malloc are not called indirectly.
+// Thus find places where the address of memory functions are taken and construct
+// bounce functions with direct calls of those functions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "indmemrem"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Pass.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+using namespace llvm;
+
+STATISTIC(NumBounceSites, "Number of sites modified");
+STATISTIC(NumBounce , "Number of bounce functions created");
+
+namespace {
+ class VISIBILITY_HIDDEN IndMemRemPass : public ModulePass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ IndMemRemPass() : ModulePass(&ID) {}
+
+ virtual bool runOnModule(Module &M);
+ };
+} // end anonymous namespace
+
+char IndMemRemPass::ID = 0;
+static RegisterPass<IndMemRemPass>
+X("indmemrem","Indirect Malloc and Free Removal");
+
+bool IndMemRemPass::runOnModule(Module &M) {
+ // In theory, all direct calls of malloc and free should be promoted
+ // to intrinsics. Therefore, this goes through and finds where the
+ // address of free or malloc are taken and replaces those with bounce
+ // functions, ensuring that all malloc and free that might happen
+ // happen through intrinsics.
+ bool changed = false;
+ if (Function* F = M.getFunction("free")) {
+ if (F->isDeclaration() && F->arg_size() == 1 && !F->use_empty()) {
+ Function* FN = Function::Create(F->getFunctionType(),
+ GlobalValue::LinkOnceAnyLinkage,
+ "free_llvm_bounce", &M);
+ BasicBlock* bb = BasicBlock::Create("entry",FN);
+ Instruction* R = ReturnInst::Create(bb);
+ new FreeInst(FN->arg_begin(), R);
+ ++NumBounce;
+ NumBounceSites += F->getNumUses();
+ F->replaceAllUsesWith(FN);
+ changed = true;
+ }
+ }
+ if (Function* F = M.getFunction("malloc")) {
+ if (F->isDeclaration() && F->arg_size() == 1 && !F->use_empty()) {
+ Function* FN = Function::Create(F->getFunctionType(),
+ GlobalValue::LinkOnceAnyLinkage,
+ "malloc_llvm_bounce", &M);
+ FN->setDoesNotAlias(0);
+ BasicBlock* bb = BasicBlock::Create("entry",FN);
+ Instruction* c = CastInst::CreateIntegerCast(
+ FN->arg_begin(), Type::Int32Ty, false, "c", bb);
+ Instruction* a = new MallocInst(Type::Int8Ty, c, "m", bb);
+ ReturnInst::Create(a, bb);
+ ++NumBounce;
+ NumBounceSites += F->getNumUses();
+ F->replaceAllUsesWith(FN);
+ changed = true;
+ }
+ }
+ return changed;
+}
+
+ModulePass *llvm::createIndMemRemPass() {
+ return new IndMemRemPass();
+}
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
new file mode 100644
index 0000000..5f9ea54
--- /dev/null
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -0,0 +1,75 @@
+//===- InlineAlways.cpp - Code to inline always_inline functions ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a custom inliner that handles only functions that
+// are marked as "always inline".
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "inline"
+#include "llvm/CallingConv.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/Transforms/Utils/InlineCost.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+using namespace llvm;
+
+namespace {
+
+ // AlwaysInliner only inlines functions that are mark as "always inline".
+ class VISIBILITY_HIDDEN AlwaysInliner : public Inliner {
+ // Functions that are never inlined
+ SmallPtrSet<const Function*, 16> NeverInline;
+ InlineCostAnalyzer CA;
+ public:
+ // Use extremely low threshold.
+ AlwaysInliner() : Inliner(&ID, -2000000000) {}
+ static char ID; // Pass identification, replacement for typeid
+ InlineCost getInlineCost(CallSite CS) {
+ return CA.getInlineCost(CS, NeverInline);
+ }
+ float getInlineFudgeFactor(CallSite CS) {
+ return CA.getInlineFudgeFactor(CS);
+ }
+ void resetCachedCostInfo(Function *Caller) {
+ return CA.resetCachedCostInfo(Caller);
+ }
+ virtual bool doFinalization(CallGraph &CG) {
+ return removeDeadFunctions(CG, &NeverInline);
+ }
+ virtual bool doInitialization(CallGraph &CG);
+ };
+}
+
+char AlwaysInliner::ID = 0;
+static RegisterPass<AlwaysInliner>
+X("always-inline", "Inliner for always_inline functions");
+
+Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); }
+
+// doInitialization - Initializes the vector of functions that have not
+// been annotated with the "always inline" attribute.
+bool AlwaysInliner::doInitialization(CallGraph &CG) {
+ Module &M = CG.getModule();
+
+ for (Module::iterator I = M.begin(), E = M.end();
+ I != E; ++I)
+ if (!I->isDeclaration() && !I->hasFnAttr(Attribute::AlwaysInline))
+ NeverInline.insert(I);
+
+ return false;
+}
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
new file mode 100644
index 0000000..e107a00
--- /dev/null
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -0,0 +1,106 @@
+//===- InlineSimple.cpp - Code to perform simple function inlining --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements bottom-up inlining of functions into callees.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "inline"
+#include "llvm/CallingConv.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/Transforms/Utils/InlineCost.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+using namespace llvm;
+
+namespace {
+
+ class VISIBILITY_HIDDEN SimpleInliner : public Inliner {
+ // Functions that are never inlined
+ SmallPtrSet<const Function*, 16> NeverInline;
+ InlineCostAnalyzer CA;
+ public:
+ SimpleInliner() : Inliner(&ID) {}
+ SimpleInliner(int Threshold) : Inliner(&ID, Threshold) {}
+ static char ID; // Pass identification, replacement for typeid
+ InlineCost getInlineCost(CallSite CS) {
+ return CA.getInlineCost(CS, NeverInline);
+ }
+ float getInlineFudgeFactor(CallSite CS) {
+ return CA.getInlineFudgeFactor(CS);
+ }
+ void resetCachedCostInfo(Function *Caller) {
+ CA.resetCachedCostInfo(Caller);
+ }
+ virtual bool doInitialization(CallGraph &CG);
+ };
+}
+
+char SimpleInliner::ID = 0;
+static RegisterPass<SimpleInliner>
+X("inline", "Function Integration/Inlining");
+
+Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); }
+
+Pass *llvm::createFunctionInliningPass(int Threshold) {
+ return new SimpleInliner(Threshold);
+}
+
+// doInitialization - Initializes the vector of functions that have been
+// annotated with the noinline attribute.
+bool SimpleInliner::doInitialization(CallGraph &CG) {
+
+ Module &M = CG.getModule();
+
+ for (Module::iterator I = M.begin(), E = M.end();
+ I != E; ++I)
+ if (!I->isDeclaration() && I->hasFnAttr(Attribute::NoInline))
+ NeverInline.insert(I);
+
+ // Get llvm.noinline
+ GlobalVariable *GV = M.getNamedGlobal("llvm.noinline");
+
+ if (GV == 0)
+ return false;
+
+ // Don't crash on invalid code
+ if (!GV->hasInitializer())
+ return false;
+
+ const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+
+ if (InitList == 0)
+ return false;
+
+ // Iterate over each element and add to the NeverInline set
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+
+ // Get Source
+ const Constant *Elt = InitList->getOperand(i);
+
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Elt))
+ if (CE->getOpcode() == Instruction::BitCast)
+ Elt = CE->getOperand(0);
+
+ // Insert into set of functions to never inline
+ if (const Function *F = dyn_cast<Function>(Elt))
+ NeverInline.insert(F);
+ }
+
+ return false;
+}
+
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
new file mode 100644
index 0000000..b382837
--- /dev/null
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -0,0 +1,278 @@
+//===- Inliner.cpp - Code common to all inliners --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the mechanics required to implement inlining without
+// missing any calls and updating the call graph. The decisions of which calls
+// are profitable to inline are implemented elsewhere.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "inline"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumInlined, "Number of functions inlined");
+STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
+
+static cl::opt<int>
+InlineLimit("inline-threshold", cl::Hidden, cl::init(200),
+ cl::desc("Control the amount of inlining to perform (default = 200)"));
+
+Inliner::Inliner(void *ID)
+ : CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {}
+
+Inliner::Inliner(void *ID, int Threshold)
+ : CallGraphSCCPass(ID), InlineThreshold(Threshold) {}
+
+/// getAnalysisUsage - For this class, we declare that we require and preserve
+/// the call graph. If the derived class implements this method, it should
+/// always explicitly call the implementation here.
+void Inliner::getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.addRequired<TargetData>();
+ CallGraphSCCPass::getAnalysisUsage(Info);
+}
+
+// InlineCallIfPossible - If it is possible to inline the specified call site,
+// do so and update the CallGraph for this operation.
+bool Inliner::InlineCallIfPossible(CallSite CS, CallGraph &CG,
+ const SmallPtrSet<Function*, 8> &SCCFunctions,
+ const TargetData &TD) {
+ Function *Callee = CS.getCalledFunction();
+ Function *Caller = CS.getCaller();
+
+ if (!InlineFunction(CS, &CG, &TD)) return false;
+
+ // If the inlined function had a higher stack protection level than the
+ // calling function, then bump up the caller's stack protection level.
+ if (Callee->hasFnAttr(Attribute::StackProtectReq))
+ Caller->addFnAttr(Attribute::StackProtectReq);
+ else if (Callee->hasFnAttr(Attribute::StackProtect) &&
+ !Caller->hasFnAttr(Attribute::StackProtectReq))
+ Caller->addFnAttr(Attribute::StackProtect);
+
+ // If we inlined the last possible call site to the function, delete the
+ // function body now.
+ if (Callee->use_empty() && (Callee->hasLocalLinkage() ||
+ Callee->hasAvailableExternallyLinkage()) &&
+ !SCCFunctions.count(Callee)) {
+ DOUT << " -> Deleting dead function: " << Callee->getName() << "\n";
+ CallGraphNode *CalleeNode = CG[Callee];
+
+ // Remove any call graph edges from the callee to its callees.
+ CalleeNode->removeAllCalledFunctions();
+
+ resetCachedCostInfo(CalleeNode->getFunction());
+
+ // Removing the node for callee from the call graph and delete it.
+ delete CG.removeFunctionFromModule(CalleeNode);
+ ++NumDeleted;
+ }
+ return true;
+}
+
+/// shouldInline - Return true if the inliner should attempt to inline
+/// at the given CallSite.
+bool Inliner::shouldInline(CallSite CS) {
+ InlineCost IC = getInlineCost(CS);
+ float FudgeFactor = getInlineFudgeFactor(CS);
+
+ if (IC.isAlways()) {
+ DOUT << " Inlining: cost=always"
+ << ", Call: " << *CS.getInstruction();
+ return true;
+ }
+
+ if (IC.isNever()) {
+ DOUT << " NOT Inlining: cost=never"
+ << ", Call: " << *CS.getInstruction();
+ return false;
+ }
+
+ int Cost = IC.getValue();
+ int CurrentThreshold = InlineThreshold;
+ Function *Fn = CS.getCaller();
+ if (Fn && !Fn->isDeclaration()
+ && Fn->hasFnAttr(Attribute::OptimizeForSize)
+ && InlineThreshold != 50) {
+ CurrentThreshold = 50;
+ }
+
+ if (Cost >= (int)(CurrentThreshold * FudgeFactor)) {
+ DOUT << " NOT Inlining: cost=" << Cost
+ << ", Call: " << *CS.getInstruction();
+ return false;
+ } else {
+ DOUT << " Inlining: cost=" << Cost
+ << ", Call: " << *CS.getInstruction();
+ return true;
+ }
+}
+
+bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) {
+ CallGraph &CG = getAnalysis<CallGraph>();
+ TargetData &TD = getAnalysis<TargetData>();
+
+ SmallPtrSet<Function*, 8> SCCFunctions;
+ DOUT << "Inliner visiting SCC:";
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
+ Function *F = SCC[i]->getFunction();
+ if (F) SCCFunctions.insert(F);
+ DOUT << " " << (F ? F->getName() : "INDIRECTNODE");
+ }
+
+ // Scan through and identify all call sites ahead of time so that we only
+ // inline call sites in the original functions, not call sites that result
+ // from inlining other functions.
+ std::vector<CallSite> CallSites;
+
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i)
+ if (Function *F = SCC[i]->getFunction())
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ CallSite CS = CallSite::get(I);
+ if (CS.getInstruction() && !isa<DbgInfoIntrinsic>(I) &&
+ (!CS.getCalledFunction() ||
+ !CS.getCalledFunction()->isDeclaration()))
+ CallSites.push_back(CS);
+ }
+
+ DOUT << ": " << CallSites.size() << " call sites.\n";
+
+ // Now that we have all of the call sites, move the ones to functions in the
+ // current SCC to the end of the list.
+ unsigned FirstCallInSCC = CallSites.size();
+ for (unsigned i = 0; i < FirstCallInSCC; ++i)
+ if (Function *F = CallSites[i].getCalledFunction())
+ if (SCCFunctions.count(F))
+ std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
+
+ // Now that we have all of the call sites, loop over them and inline them if
+ // it looks profitable to do so.
+ bool Changed = false;
+ bool LocalChange;
+ do {
+ LocalChange = false;
+ // Iterate over the outer loop because inlining functions can cause indirect
+ // calls to become direct calls.
+ for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi)
+ if (Function *Callee = CallSites[CSi].getCalledFunction()) {
+ // Calls to external functions are never inlinable.
+ if (Callee->isDeclaration()) {
+ if (SCC.size() == 1) {
+ std::swap(CallSites[CSi], CallSites.back());
+ CallSites.pop_back();
+ } else {
+ // Keep the 'in SCC / not in SCC' boundary correct.
+ CallSites.erase(CallSites.begin()+CSi);
+ }
+ --CSi;
+ continue;
+ }
+
+ // If the policy determines that we should inline this function,
+ // try to do so.
+ CallSite CS = CallSites[CSi];
+ if (shouldInline(CS)) {
+ Function *Caller = CS.getCaller();
+ // Attempt to inline the function...
+ if (InlineCallIfPossible(CS, CG, SCCFunctions, TD)) {
+ // Remove any cached cost info for this caller, as inlining the
+ // callee has increased the size of the caller (which may be the
+ // same as the callee).
+ resetCachedCostInfo(Caller);
+
+ // Remove this call site from the list. If possible, use
+ // swap/pop_back for efficiency, but do not use it if doing so would
+ // move a call site to a function in this SCC before the
+ // 'FirstCallInSCC' barrier.
+ if (SCC.size() == 1) {
+ std::swap(CallSites[CSi], CallSites.back());
+ CallSites.pop_back();
+ } else {
+ CallSites.erase(CallSites.begin()+CSi);
+ }
+ --CSi;
+
+ ++NumInlined;
+ Changed = true;
+ LocalChange = true;
+ }
+ }
+ }
+ } while (LocalChange);
+
+ return Changed;
+}
+
+// doFinalization - Remove now-dead linkonce functions at the end of
+// processing to avoid breaking the SCC traversal.
+bool Inliner::doFinalization(CallGraph &CG) {
+ return removeDeadFunctions(CG);
+}
+
+ /// removeDeadFunctions - Remove dead functions that are not included in
+ /// DNR (Do Not Remove) list.
+bool Inliner::removeDeadFunctions(CallGraph &CG,
+ SmallPtrSet<const Function *, 16> *DNR) {
+ std::set<CallGraphNode*> FunctionsToRemove;
+
+ // Scan for all of the functions, looking for ones that should now be removed
+ // from the program. Insert the dead ones in the FunctionsToRemove set.
+ for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) {
+ CallGraphNode *CGN = I->second;
+ if (Function *F = CGN ? CGN->getFunction() : 0) {
+ // If the only remaining users of the function are dead constants, remove
+ // them.
+ F->removeDeadConstantUsers();
+
+ if (DNR && DNR->count(F))
+ continue;
+
+ if ((F->hasLinkOnceLinkage() || F->hasLocalLinkage()) &&
+ F->use_empty()) {
+
+ // Remove any call graph edges from the function to its callees.
+ CGN->removeAllCalledFunctions();
+
+ // Remove any edges from the external node to the function's call graph
+ // node. These edges might have been made irrelegant due to
+ // optimization of the program.
+ CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
+
+ // Removing the node for callee from the call graph and delete it.
+ FunctionsToRemove.insert(CGN);
+ }
+ }
+ }
+
+ // Now that we know which functions to delete, do so. We didn't want to do
+ // this inline, because that would invalidate our CallGraph::iterator
+ // objects. :(
+ bool Changed = false;
+ for (std::set<CallGraphNode*>::iterator I = FunctionsToRemove.begin(),
+ E = FunctionsToRemove.end(); I != E; ++I) {
+ resetCachedCostInfo((*I)->getFunction());
+ delete CG.removeFunctionFromModule(*I);
+ ++NumDeleted;
+ Changed = true;
+ }
+
+ return Changed;
+}
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
new file mode 100644
index 0000000..5093ae9
--- /dev/null
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -0,0 +1,184 @@
+//===-- Internalize.cpp - Mark functions internal -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass loops over all of the functions in the input module, looking for a
+// main function. If a main function is found, all other functions and all
+// global variables with initializers are marked as internal.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "internalize"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Pass.h"
+#include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include <fstream>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumAliases , "Number of aliases internalized");
+STATISTIC(NumFunctions, "Number of functions internalized");
+STATISTIC(NumGlobals , "Number of global vars internalized");
+
+// APIFile - A file which contains a list of symbols that should not be marked
+// external.
+static cl::opt<std::string>
+APIFile("internalize-public-api-file", cl::value_desc("filename"),
+ cl::desc("A file containing list of symbol names to preserve"));
+
+// APIList - A list of symbols that should not be marked internal.
+static cl::list<std::string>
+APIList("internalize-public-api-list", cl::value_desc("list"),
+ cl::desc("A list of symbol names to preserve"),
+ cl::CommaSeparated);
+
+namespace {
+ class VISIBILITY_HIDDEN InternalizePass : public ModulePass {
+ std::set<std::string> ExternalNames;
+ /// If no api symbols were specified and a main function is defined,
+ /// assume the main function is the only API
+ bool AllButMain;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit InternalizePass(bool AllButMain = true);
+ explicit InternalizePass(const std::vector <const char *>& exportList);
+ void LoadFile(const char *Filename);
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreserved<CallGraph>();
+ }
+ };
+} // end anonymous namespace
+
+char InternalizePass::ID = 0;
+static RegisterPass<InternalizePass>
+X("internalize", "Internalize Global Symbols");
+
+InternalizePass::InternalizePass(bool AllButMain)
+ : ModulePass(&ID), AllButMain(AllButMain){
+ if (!APIFile.empty()) // If a filename is specified, use it.
+ LoadFile(APIFile.c_str());
+ if (!APIList.empty()) // If a list is specified, use it as well.
+ ExternalNames.insert(APIList.begin(), APIList.end());
+}
+
+InternalizePass::InternalizePass(const std::vector<const char *>&exportList)
+ : ModulePass(&ID), AllButMain(false){
+ for(std::vector<const char *>::const_iterator itr = exportList.begin();
+ itr != exportList.end(); itr++) {
+ ExternalNames.insert(*itr);
+ }
+}
+
+void InternalizePass::LoadFile(const char *Filename) {
+ // Load the APIFile...
+ std::ifstream In(Filename);
+ if (!In.good()) {
+ cerr << "WARNING: Internalize couldn't load file '" << Filename
+ << "'! Continuing as if it's empty.\n";
+ return; // Just continue as if the file were empty
+ }
+ while (In) {
+ std::string Symbol;
+ In >> Symbol;
+ if (!Symbol.empty())
+ ExternalNames.insert(Symbol);
+ }
+}
+
+bool InternalizePass::runOnModule(Module &M) {
+ CallGraph *CG = getAnalysisIfAvailable<CallGraph>();
+ CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0;
+
+ if (ExternalNames.empty()) {
+ // Return if we're not in 'all but main' mode and have no external api
+ if (!AllButMain)
+ return false;
+ // If no list or file of symbols was specified, check to see if there is a
+ // "main" symbol defined in the module. If so, use it, otherwise do not
+ // internalize the module, it must be a library or something.
+ //
+ Function *MainFunc = M.getFunction("main");
+ if (MainFunc == 0 || MainFunc->isDeclaration())
+ return false; // No main found, must be a library...
+
+ // Preserve main, internalize all else.
+ ExternalNames.insert(MainFunc->getName());
+ }
+
+ bool Changed = false;
+
+ // Mark all functions not in the api as internal.
+ // FIXME: maybe use private linkage?
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!I->isDeclaration() && // Function must be defined here
+ !I->hasLocalLinkage() && // Can't already have internal linkage
+ !ExternalNames.count(I->getName())) {// Not marked to keep external?
+ I->setLinkage(GlobalValue::InternalLinkage);
+ // Remove a callgraph edge from the external node to this function.
+ if (ExternalNode) ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
+ Changed = true;
+ ++NumFunctions;
+ DOUT << "Internalizing func " << I->getName() << "\n";
+ }
+
+ // Never internalize the llvm.used symbol. It is used to implement
+ // attribute((used)).
+ ExternalNames.insert("llvm.used");
+
+ // Never internalize anchors used by the machine module info, else the info
+ // won't find them. (see MachineModuleInfo.)
+ ExternalNames.insert("llvm.dbg.compile_units");
+ ExternalNames.insert("llvm.dbg.global_variables");
+ ExternalNames.insert("llvm.dbg.subprograms");
+ ExternalNames.insert("llvm.global_ctors");
+ ExternalNames.insert("llvm.global_dtors");
+ ExternalNames.insert("llvm.noinline");
+ ExternalNames.insert("llvm.global.annotations");
+
+ // Mark all global variables with initializers that are not in the api as
+ // internal as well.
+ // FIXME: maybe use private linkage?
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (!I->isDeclaration() && !I->hasLocalLinkage() &&
+ !ExternalNames.count(I->getName())) {
+ I->setLinkage(GlobalValue::InternalLinkage);
+ Changed = true;
+ ++NumGlobals;
+ DOUT << "Internalized gvar " << I->getName() << "\n";
+ }
+
+ // Mark all aliases that are not in the api as internal as well.
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E; ++I)
+ if (!I->isDeclaration() && !I->hasInternalLinkage() &&
+ !ExternalNames.count(I->getName())) {
+ I->setLinkage(GlobalValue::InternalLinkage);
+ Changed = true;
+ ++NumAliases;
+ DOUT << "Internalized alias " << I->getName() << "\n";
+ }
+
+ return Changed;
+}
+
+ModulePass *llvm::createInternalizePass(bool AllButMain) {
+ return new InternalizePass(AllButMain);
+}
+
+ModulePass *llvm::createInternalizePass(const std::vector <const char *> &el) {
+ return new InternalizePass(el);
+}
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
new file mode 100644
index 0000000..0c65443
--- /dev/null
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -0,0 +1,261 @@
+//===- LoopExtractor.cpp - Extract each loop into a new function ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass wrapper around the ExtractLoop() scalar transformation to extract each
+// top-level loop into its own new function. If the loop is the ONLY loop in a
+// given function, it is not touched. This is a pass most useful for debugging
+// via bugpoint.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-extract"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/ADT/Statistic.h"
+#include <fstream>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumExtracted, "Number of loops extracted");
+
+namespace {
+ // FIXME: This is not a function pass, but the PassManager doesn't allow
+ // Module passes to require FunctionPasses, so we can't get loop info if we're
+ // not a function pass.
+ struct VISIBILITY_HIDDEN LoopExtractor : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ unsigned NumLoops;
+
+ explicit LoopExtractor(unsigned numLoops = ~0)
+ : FunctionPass(&ID), NumLoops(numLoops) {}
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(BreakCriticalEdgesID);
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+ }
+ };
+}
+
+char LoopExtractor::ID = 0;
+static RegisterPass<LoopExtractor>
+X("loop-extract", "Extract loops into new functions");
+
+namespace {
+ /// SingleLoopExtractor - For bugpoint.
+ struct SingleLoopExtractor : public LoopExtractor {
+ static char ID; // Pass identification, replacement for typeid
+ SingleLoopExtractor() : LoopExtractor(1) {}
+ };
+} // End anonymous namespace
+
+char SingleLoopExtractor::ID = 0;
+static RegisterPass<SingleLoopExtractor>
+Y("loop-extract-single", "Extract at most one loop into a new function");
+
+// createLoopExtractorPass - This pass extracts all natural loops from the
+// program into a function if it can.
+//
+FunctionPass *llvm::createLoopExtractorPass() { return new LoopExtractor(); }
+
+bool LoopExtractor::runOnFunction(Function &F) {
+ LoopInfo &LI = getAnalysis<LoopInfo>();
+
+ // If this function has no loops, there is nothing to do.
+ if (LI.empty())
+ return false;
+
+ DominatorTree &DT = getAnalysis<DominatorTree>();
+
+ // If there is more than one top-level loop in this function, extract all of
+ // the loops.
+ bool Changed = false;
+ if (LI.end()-LI.begin() > 1) {
+ for (LoopInfo::iterator i = LI.begin(), e = LI.end(); i != e; ++i) {
+ if (NumLoops == 0) return Changed;
+ --NumLoops;
+ Changed |= ExtractLoop(DT, *i) != 0;
+ ++NumExtracted;
+ }
+ } else {
+ // Otherwise there is exactly one top-level loop. If this function is more
+ // than a minimal wrapper around the loop, extract the loop.
+ Loop *TLL = *LI.begin();
+ bool ShouldExtractLoop = false;
+
+ // Extract the loop if the entry block doesn't branch to the loop header.
+ TerminatorInst *EntryTI = F.getEntryBlock().getTerminator();
+ if (!isa<BranchInst>(EntryTI) ||
+ !cast<BranchInst>(EntryTI)->isUnconditional() ||
+ EntryTI->getSuccessor(0) != TLL->getHeader())
+ ShouldExtractLoop = true;
+ else {
+ // Check to see if any exits from the loop are more than just return
+ // blocks.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ TLL->getExitBlocks(ExitBlocks);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (!isa<ReturnInst>(ExitBlocks[i]->getTerminator())) {
+ ShouldExtractLoop = true;
+ break;
+ }
+ }
+
+ if (ShouldExtractLoop) {
+ if (NumLoops == 0) return Changed;
+ --NumLoops;
+ Changed |= ExtractLoop(DT, TLL) != 0;
+ ++NumExtracted;
+ } else {
+ // Okay, this function is a minimal container around the specified loop.
+ // If we extract the loop, we will continue to just keep extracting it
+ // infinitely... so don't extract it. However, if the loop contains any
+ // subloops, extract them.
+ for (Loop::iterator i = TLL->begin(), e = TLL->end(); i != e; ++i) {
+ if (NumLoops == 0) return Changed;
+ --NumLoops;
+ Changed |= ExtractLoop(DT, *i) != 0;
+ ++NumExtracted;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+// createSingleLoopExtractorPass - This pass extracts one natural loop from the
+// program into a function if it can. This is used by bugpoint.
+//
+FunctionPass *llvm::createSingleLoopExtractorPass() {
+ return new SingleLoopExtractor();
+}
+
+
+// BlockFile - A file which contains a list of blocks that should not be
+// extracted.
+static cl::opt<std::string>
+BlockFile("extract-blocks-file", cl::value_desc("filename"),
+ cl::desc("A file containing list of basic blocks to not extract"),
+ cl::Hidden);
+
+namespace {
+ /// BlockExtractorPass - This pass is used by bugpoint to extract all blocks
+ /// from the module into their own functions except for those specified by the
+ /// BlocksToNotExtract list.
+ class BlockExtractorPass : public ModulePass {
+ void LoadFile(const char *Filename);
+
+ std::vector<BasicBlock*> BlocksToNotExtract;
+ std::vector<std::pair<std::string, std::string> > BlocksToNotExtractByName;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit BlockExtractorPass(const std::vector<BasicBlock*> &B)
+ : ModulePass(&ID), BlocksToNotExtract(B) {
+ if (!BlockFile.empty())
+ LoadFile(BlockFile.c_str());
+ }
+ BlockExtractorPass() : ModulePass(&ID) {}
+
+ bool runOnModule(Module &M);
+ };
+}
+
+char BlockExtractorPass::ID = 0;
+static RegisterPass<BlockExtractorPass>
+XX("extract-blocks", "Extract Basic Blocks From Module (for bugpoint use)");
+
+// createBlockExtractorPass - This pass extracts all blocks (except those
+// specified in the argument list) from the functions in the module.
+//
+ModulePass *llvm::createBlockExtractorPass(const std::vector<BasicBlock*> &BTNE)
+{
+ return new BlockExtractorPass(BTNE);
+}
+
+void BlockExtractorPass::LoadFile(const char *Filename) {
+ // Load the BlockFile...
+ std::ifstream In(Filename);
+ if (!In.good()) {
+ cerr << "WARNING: BlockExtractor couldn't load file '" << Filename
+ << "'!\n";
+ return;
+ }
+ while (In) {
+ std::string FunctionName, BlockName;
+ In >> FunctionName;
+ In >> BlockName;
+ if (!BlockName.empty())
+ BlocksToNotExtractByName.push_back(
+ std::make_pair(FunctionName, BlockName));
+ }
+}
+
+bool BlockExtractorPass::runOnModule(Module &M) {
+ std::set<BasicBlock*> TranslatedBlocksToNotExtract;
+ for (unsigned i = 0, e = BlocksToNotExtract.size(); i != e; ++i) {
+ BasicBlock *BB = BlocksToNotExtract[i];
+ Function *F = BB->getParent();
+
+ // Map the corresponding function in this module.
+ Function *MF = M.getFunction(F->getName());
+ assert(MF->getFunctionType() == F->getFunctionType() && "Wrong function?");
+
+ // Figure out which index the basic block is in its function.
+ Function::iterator BBI = MF->begin();
+ std::advance(BBI, std::distance(F->begin(), Function::iterator(BB)));
+ TranslatedBlocksToNotExtract.insert(BBI);
+ }
+
+ while (!BlocksToNotExtractByName.empty()) {
+ // There's no way to find BBs by name without looking at every BB inside
+ // every Function. Fortunately, this is always empty except when used by
+ // bugpoint in which case correctness is more important than performance.
+
+ std::string &FuncName = BlocksToNotExtractByName.back().first;
+ std::string &BlockName = BlocksToNotExtractByName.back().second;
+
+ for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
+ Function &F = *FI;
+ if (F.getName() != FuncName) continue;
+
+ for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+ BasicBlock &BB = *BI;
+ if (BB.getName() != BlockName) continue;
+
+ TranslatedBlocksToNotExtract.insert(BI);
+ }
+ }
+
+ BlocksToNotExtractByName.pop_back();
+ }
+
+ // Now that we know which blocks to not extract, figure out which ones we WANT
+ // to extract.
+ std::vector<BasicBlock*> BlocksToExtract;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ if (!TranslatedBlocksToNotExtract.count(BB))
+ BlocksToExtract.push_back(BB);
+
+ for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i)
+ ExtractBasicBlock(BlocksToExtract[i]);
+
+ return !BlocksToExtract.empty();
+}
diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp
new file mode 100644
index 0000000..dfc040b
--- /dev/null
+++ b/lib/Transforms/IPO/LowerSetJmp.cpp
@@ -0,0 +1,536 @@
+//===- LowerSetJmp.cpp - Code pertaining to lowering set/long jumps -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the lowering of setjmp and longjmp to use the
+// LLVM invoke and unwind instructions as necessary.
+//
+// Lowering of longjmp is fairly trivial. We replace the call with a
+// call to the LLVM library function "__llvm_sjljeh_throw_longjmp()".
+// This unwinds the stack for us calling all of the destructors for
+// objects allocated on the stack.
+//
+// At a setjmp call, the basic block is split and the setjmp removed.
+// The calls in a function that have a setjmp are converted to invoke
+// where the except part checks to see if it's a longjmp exception and,
+// if so, if it's handled in the function. If it is, then it gets the
+// value returned by the longjmp and goes to where the basic block was
+// split. Invoke instructions are handled in a similar fashion with the
+// original except block being executed if it isn't a longjmp except
+// that is handled by that function.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FIXME: This pass doesn't deal with PHI statements just yet. That is,
+// we expect this to occur before SSAification is done. This would seem
+// to make sense, but in general, it might be a good idea to make this
+// pass invokable via the "opt" command at will.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lowersetjmp"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include <map>
+using namespace llvm;
+
+STATISTIC(LongJmpsTransformed, "Number of longjmps transformed");
+STATISTIC(SetJmpsTransformed , "Number of setjmps transformed");
+STATISTIC(CallsTransformed , "Number of calls invokified");
+STATISTIC(InvokesTransformed , "Number of invokes modified");
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ // LowerSetJmp pass implementation.
+ class VISIBILITY_HIDDEN LowerSetJmp : public ModulePass,
+ public InstVisitor<LowerSetJmp> {
+ // LLVM library functions...
+ Constant *InitSJMap; // __llvm_sjljeh_init_setjmpmap
+ Constant *DestroySJMap; // __llvm_sjljeh_destroy_setjmpmap
+ Constant *AddSJToMap; // __llvm_sjljeh_add_setjmp_to_map
+ Constant *ThrowLongJmp; // __llvm_sjljeh_throw_longjmp
+ Constant *TryCatchLJ; // __llvm_sjljeh_try_catching_longjmp_exception
+ Constant *IsLJException; // __llvm_sjljeh_is_longjmp_exception
+ Constant *GetLJValue; // __llvm_sjljeh_get_longjmp_value
+
+ typedef std::pair<SwitchInst*, CallInst*> SwitchValuePair;
+
+ // Keep track of those basic blocks reachable via a depth-first search of
+ // the CFG from a setjmp call. We only need to transform those "call" and
+ // "invoke" instructions that are reachable from the setjmp call site.
+ std::set<BasicBlock*> DFSBlocks;
+
+ // The setjmp map is going to hold information about which setjmps
+ // were called (each setjmp gets its own number) and with which
+ // buffer it was called.
+ std::map<Function*, AllocaInst*> SJMap;
+
+ // The rethrow basic block map holds the basic block to branch to if
+ // the exception isn't handled in the current function and needs to
+ // be rethrown.
+ std::map<const Function*, BasicBlock*> RethrowBBMap;
+
+ // The preliminary basic block map holds a basic block that grabs the
+ // exception and determines if it's handled by the current function.
+ std::map<const Function*, BasicBlock*> PrelimBBMap;
+
+ // The switch/value map holds a switch inst/call inst pair. The
+ // switch inst controls which handler (if any) gets called and the
+ // value is the value returned to that handler by the call to
+ // __llvm_sjljeh_get_longjmp_value.
+ std::map<const Function*, SwitchValuePair> SwitchValMap;
+
+ // A map of which setjmps we've seen so far in a function.
+ std::map<const Function*, unsigned> SetJmpIDMap;
+
+ AllocaInst* GetSetJmpMap(Function* Func);
+ BasicBlock* GetRethrowBB(Function* Func);
+ SwitchValuePair GetSJSwitch(Function* Func, BasicBlock* Rethrow);
+
+ void TransformLongJmpCall(CallInst* Inst);
+ void TransformSetJmpCall(CallInst* Inst);
+
+ bool IsTransformableFunction(const std::string& Name);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ LowerSetJmp() : ModulePass(&ID) {}
+
+ void visitCallInst(CallInst& CI);
+ void visitInvokeInst(InvokeInst& II);
+ void visitReturnInst(ReturnInst& RI);
+ void visitUnwindInst(UnwindInst& UI);
+
+ bool runOnModule(Module& M);
+ bool doInitialization(Module& M);
+ };
+} // end anonymous namespace
+
+char LowerSetJmp::ID = 0;
+static RegisterPass<LowerSetJmp> X("lowersetjmp", "Lower Set Jump");
+
+// run - Run the transformation on the program. We grab the function
+// prototypes for longjmp and setjmp. If they are used in the program,
+// then we can go directly to the places they're at and transform them.
+bool LowerSetJmp::runOnModule(Module& M) {
+ bool Changed = false;
+
+ // These are what the functions are called.
+ Function* SetJmp = M.getFunction("llvm.setjmp");
+ Function* LongJmp = M.getFunction("llvm.longjmp");
+
+ // This program doesn't have longjmp and setjmp calls.
+ if ((!LongJmp || LongJmp->use_empty()) &&
+ (!SetJmp || SetJmp->use_empty())) return false;
+
+ // Initialize some values and functions we'll need to transform the
+ // setjmp/longjmp functions.
+ doInitialization(M);
+
+ if (SetJmp) {
+ for (Value::use_iterator B = SetJmp->use_begin(), E = SetJmp->use_end();
+ B != E; ++B) {
+ BasicBlock* BB = cast<Instruction>(*B)->getParent();
+ for (df_ext_iterator<BasicBlock*> I = df_ext_begin(BB, DFSBlocks),
+ E = df_ext_end(BB, DFSBlocks); I != E; ++I)
+ /* empty */;
+ }
+
+ while (!SetJmp->use_empty()) {
+ assert(isa<CallInst>(SetJmp->use_back()) &&
+ "User of setjmp intrinsic not a call?");
+ TransformSetJmpCall(cast<CallInst>(SetJmp->use_back()));
+ Changed = true;
+ }
+ }
+
+ if (LongJmp)
+ while (!LongJmp->use_empty()) {
+ assert(isa<CallInst>(LongJmp->use_back()) &&
+ "User of longjmp intrinsic not a call?");
+ TransformLongJmpCall(cast<CallInst>(LongJmp->use_back()));
+ Changed = true;
+ }
+
+ // Now go through the affected functions and convert calls and invokes
+ // to new invokes...
+ for (std::map<Function*, AllocaInst*>::iterator
+ B = SJMap.begin(), E = SJMap.end(); B != E; ++B) {
+ Function* F = B->first;
+ for (Function::iterator BB = F->begin(), BE = F->end(); BB != BE; ++BB)
+ for (BasicBlock::iterator IB = BB->begin(), IE = BB->end(); IB != IE; ) {
+ visit(*IB++);
+ if (IB != BB->end() && IB->getParent() != BB)
+ break; // The next instruction got moved to a different block!
+ }
+ }
+
+ DFSBlocks.clear();
+ SJMap.clear();
+ RethrowBBMap.clear();
+ PrelimBBMap.clear();
+ SwitchValMap.clear();
+ SetJmpIDMap.clear();
+
+ return Changed;
+}
+
+// doInitialization - For the lower long/setjmp pass, this ensures that a
+// module contains a declaration for the intrisic functions we are going
+// to call to convert longjmp and setjmp calls.
+//
+// This function is always successful, unless it isn't.
+bool LowerSetJmp::doInitialization(Module& M)
+{
+ const Type *SBPTy = PointerType::getUnqual(Type::Int8Ty);
+ const Type *SBPPTy = PointerType::getUnqual(SBPTy);
+
+ // N.B. See llvm/runtime/GCCLibraries/libexception/SJLJ-Exception.h for
+ // a description of the following library functions.
+
+ // void __llvm_sjljeh_init_setjmpmap(void**)
+ InitSJMap = M.getOrInsertFunction("__llvm_sjljeh_init_setjmpmap",
+ Type::VoidTy, SBPPTy, (Type *)0);
+ // void __llvm_sjljeh_destroy_setjmpmap(void**)
+ DestroySJMap = M.getOrInsertFunction("__llvm_sjljeh_destroy_setjmpmap",
+ Type::VoidTy, SBPPTy, (Type *)0);
+
+ // void __llvm_sjljeh_add_setjmp_to_map(void**, void*, unsigned)
+ AddSJToMap = M.getOrInsertFunction("__llvm_sjljeh_add_setjmp_to_map",
+ Type::VoidTy, SBPPTy, SBPTy,
+ Type::Int32Ty, (Type *)0);
+
+ // void __llvm_sjljeh_throw_longjmp(int*, int)
+ ThrowLongJmp = M.getOrInsertFunction("__llvm_sjljeh_throw_longjmp",
+ Type::VoidTy, SBPTy, Type::Int32Ty,
+ (Type *)0);
+
+ // unsigned __llvm_sjljeh_try_catching_longjmp_exception(void **)
+ TryCatchLJ =
+ M.getOrInsertFunction("__llvm_sjljeh_try_catching_longjmp_exception",
+ Type::Int32Ty, SBPPTy, (Type *)0);
+
+ // bool __llvm_sjljeh_is_longjmp_exception()
+ IsLJException = M.getOrInsertFunction("__llvm_sjljeh_is_longjmp_exception",
+ Type::Int1Ty, (Type *)0);
+
+ // int __llvm_sjljeh_get_longjmp_value()
+ GetLJValue = M.getOrInsertFunction("__llvm_sjljeh_get_longjmp_value",
+ Type::Int32Ty, (Type *)0);
+ return true;
+}
+
+// IsTransformableFunction - Return true if the function name isn't one
+// of the ones we don't want transformed. Currently, don't transform any
+// "llvm.{setjmp,longjmp}" functions and none of the setjmp/longjmp error
+// handling functions (beginning with __llvm_sjljeh_...they don't throw
+// exceptions).
+bool LowerSetJmp::IsTransformableFunction(const std::string& Name) {
+ std::string SJLJEh("__llvm_sjljeh");
+
+ if (Name.size() > SJLJEh.size())
+ return std::string(Name.begin(), Name.begin() + SJLJEh.size()) != SJLJEh;
+
+ return true;
+}
+
+// TransformLongJmpCall - Transform a longjmp call into a call to the
+// internal __llvm_sjljeh_throw_longjmp function. It then takes care of
+// throwing the exception for us.
+void LowerSetJmp::TransformLongJmpCall(CallInst* Inst)
+{
+ const Type* SBPTy = PointerType::getUnqual(Type::Int8Ty);
+
+ // Create the call to "__llvm_sjljeh_throw_longjmp". This takes the
+ // same parameters as "longjmp", except that the buffer is cast to a
+ // char*. It returns "void", so it doesn't need to replace any of
+ // Inst's uses and doesn't get a name.
+ CastInst* CI =
+ new BitCastInst(Inst->getOperand(1), SBPTy, "LJBuf", Inst);
+ SmallVector<Value *, 2> Args;
+ Args.push_back(CI);
+ Args.push_back(Inst->getOperand(2));
+ CallInst::Create(ThrowLongJmp, Args.begin(), Args.end(), "", Inst);
+
+ SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()];
+
+ // If the function has a setjmp call in it (they are transformed first)
+ // we should branch to the basic block that determines if this longjmp
+ // is applicable here. Otherwise, issue an unwind.
+ if (SVP.first)
+ BranchInst::Create(SVP.first->getParent(), Inst);
+ else
+ new UnwindInst(Inst);
+
+ // Remove all insts after the branch/unwind inst. Go from back to front to
+ // avoid replaceAllUsesWith if possible.
+ BasicBlock *BB = Inst->getParent();
+ Instruction *Removed;
+ do {
+ Removed = &BB->back();
+ // If the removed instructions have any users, replace them now.
+ if (!Removed->use_empty())
+ Removed->replaceAllUsesWith(UndefValue::get(Removed->getType()));
+ Removed->eraseFromParent();
+ } while (Removed != Inst);
+
+ ++LongJmpsTransformed;
+}
+
+// GetSetJmpMap - Retrieve (create and initialize, if necessary) the
+// setjmp map. This map is going to hold information about which setjmps
+// were called (each setjmp gets its own number) and with which buffer it
+// was called. There can be only one!
+AllocaInst* LowerSetJmp::GetSetJmpMap(Function* Func)
+{
+ if (SJMap[Func]) return SJMap[Func];
+
+ // Insert the setjmp map initialization before the first instruction in
+ // the function.
+ Instruction* Inst = Func->getEntryBlock().begin();
+ assert(Inst && "Couldn't find even ONE instruction in entry block!");
+
+ // Fill in the alloca and call to initialize the SJ map.
+ const Type *SBPTy = PointerType::getUnqual(Type::Int8Ty);
+ AllocaInst* Map = new AllocaInst(SBPTy, 0, "SJMap", Inst);
+ CallInst::Create(InitSJMap, Map, "", Inst);
+ return SJMap[Func] = Map;
+}
+
+// GetRethrowBB - Only one rethrow basic block is needed per function.
+// If this is a longjmp exception but not handled in this block, this BB
+// performs the rethrow.
+BasicBlock* LowerSetJmp::GetRethrowBB(Function* Func)
+{
+ if (RethrowBBMap[Func]) return RethrowBBMap[Func];
+
+ // The basic block we're going to jump to if we need to rethrow the
+ // exception.
+ BasicBlock* Rethrow = BasicBlock::Create("RethrowExcept", Func);
+
+ // Fill in the "Rethrow" BB with a call to rethrow the exception. This
+ // is the last instruction in the BB since at this point the runtime
+ // should exit this function and go to the next function.
+ new UnwindInst(Rethrow);
+ return RethrowBBMap[Func] = Rethrow;
+}
+
+// GetSJSwitch - Return the switch statement that controls which handler
+// (if any) gets called and the value returned to that handler.
+LowerSetJmp::SwitchValuePair LowerSetJmp::GetSJSwitch(Function* Func,
+ BasicBlock* Rethrow)
+{
+ if (SwitchValMap[Func].first) return SwitchValMap[Func];
+
+ BasicBlock* LongJmpPre = BasicBlock::Create("LongJmpBlkPre", Func);
+
+ // Keep track of the preliminary basic block for some of the other
+ // transformations.
+ PrelimBBMap[Func] = LongJmpPre;
+
+ // Grab the exception.
+ CallInst* Cond = CallInst::Create(IsLJException, "IsLJExcept", LongJmpPre);
+
+ // The "decision basic block" gets the number associated with the
+ // setjmp call returning to switch on and the value returned by
+ // longjmp.
+ BasicBlock* DecisionBB = BasicBlock::Create("LJDecisionBB", Func);
+
+ BranchInst::Create(DecisionBB, Rethrow, Cond, LongJmpPre);
+
+ // Fill in the "decision" basic block.
+ CallInst* LJVal = CallInst::Create(GetLJValue, "LJVal", DecisionBB);
+ CallInst* SJNum = CallInst::Create(TryCatchLJ, GetSetJmpMap(Func), "SJNum",
+ DecisionBB);
+
+ SwitchInst* SI = SwitchInst::Create(SJNum, Rethrow, 0, DecisionBB);
+ return SwitchValMap[Func] = SwitchValuePair(SI, LJVal);
+}
+
+// TransformSetJmpCall - The setjmp call is a bit trickier to transform.
+// We're going to convert all setjmp calls to nops. Then all "call" and
+// "invoke" instructions in the function are converted to "invoke" where
+// the "except" branch is used when returning from a longjmp call.
+void LowerSetJmp::TransformSetJmpCall(CallInst* Inst)
+{
+ BasicBlock* ABlock = Inst->getParent();
+ Function* Func = ABlock->getParent();
+
+ // Add this setjmp to the setjmp map.
+ const Type* SBPTy = PointerType::getUnqual(Type::Int8Ty);
+ CastInst* BufPtr =
+ new BitCastInst(Inst->getOperand(1), SBPTy, "SBJmpBuf", Inst);
+ std::vector<Value*> Args =
+ make_vector<Value*>(GetSetJmpMap(Func), BufPtr,
+ ConstantInt::get(Type::Int32Ty,
+ SetJmpIDMap[Func]++), 0);
+ CallInst::Create(AddSJToMap, Args.begin(), Args.end(), "", Inst);
+
+ // We are guaranteed that there are no values live across basic blocks
+ // (because we are "not in SSA form" yet), but there can still be values live
+ // in basic blocks. Because of this, splitting the setjmp block can cause
+ // values above the setjmp to not dominate uses which are after the setjmp
+ // call. For all of these occasions, we must spill the value to the stack.
+ //
+ std::set<Instruction*> InstrsAfterCall;
+
+ // The call is probably very close to the end of the basic block, for the
+ // common usage pattern of: 'if (setjmp(...))', so keep track of the
+ // instructions after the call.
+ for (BasicBlock::iterator I = ++BasicBlock::iterator(Inst), E = ABlock->end();
+ I != E; ++I)
+ InstrsAfterCall.insert(I);
+
+ for (BasicBlock::iterator II = ABlock->begin();
+ II != BasicBlock::iterator(Inst); ++II)
+ // Loop over all of the uses of instruction. If any of them are after the
+ // call, "spill" the value to the stack.
+ for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
+ UI != E; ++UI)
+ if (cast<Instruction>(*UI)->getParent() != ABlock ||
+ InstrsAfterCall.count(cast<Instruction>(*UI))) {
+ DemoteRegToStack(*II);
+ break;
+ }
+ InstrsAfterCall.clear();
+
+ // Change the setjmp call into a branch statement. We'll remove the
+ // setjmp call in a little bit. No worries.
+ BasicBlock* SetJmpContBlock = ABlock->splitBasicBlock(Inst);
+ assert(SetJmpContBlock && "Couldn't split setjmp BB!!");
+
+ SetJmpContBlock->setName(ABlock->getName()+"SetJmpCont");
+
+ // Add the SetJmpContBlock to the set of blocks reachable from a setjmp.
+ DFSBlocks.insert(SetJmpContBlock);
+
+ // This PHI node will be in the new block created from the
+ // splitBasicBlock call.
+ PHINode* PHI = PHINode::Create(Type::Int32Ty, "SetJmpReturn", Inst);
+
+ // Coming from a call to setjmp, the return is 0.
+ PHI->addIncoming(ConstantInt::getNullValue(Type::Int32Ty), ABlock);
+
+ // Add the case for this setjmp's number...
+ SwitchValuePair SVP = GetSJSwitch(Func, GetRethrowBB(Func));
+ SVP.first->addCase(ConstantInt::get(Type::Int32Ty, SetJmpIDMap[Func] - 1),
+ SetJmpContBlock);
+
+ // Value coming from the handling of the exception.
+ PHI->addIncoming(SVP.second, SVP.second->getParent());
+
+ // Replace all uses of this instruction with the PHI node created by
+ // the eradication of setjmp.
+ Inst->replaceAllUsesWith(PHI);
+ Inst->eraseFromParent();
+
+ ++SetJmpsTransformed;
+}
+
+// visitCallInst - This converts all LLVM call instructions into invoke
+// instructions. The except part of the invoke goes to the "LongJmpBlkPre"
+// that grabs the exception and proceeds to determine if it's a longjmp
+// exception or not.
+void LowerSetJmp::visitCallInst(CallInst& CI)
+{
+ if (CI.getCalledFunction())
+ if (!IsTransformableFunction(CI.getCalledFunction()->getName()) ||
+ CI.getCalledFunction()->isIntrinsic()) return;
+
+ BasicBlock* OldBB = CI.getParent();
+
+ // If not reachable from a setjmp call, don't transform.
+ if (!DFSBlocks.count(OldBB)) return;
+
+ BasicBlock* NewBB = OldBB->splitBasicBlock(CI);
+ assert(NewBB && "Couldn't split BB of \"call\" instruction!!");
+ DFSBlocks.insert(NewBB);
+ NewBB->setName("Call2Invoke");
+
+ Function* Func = OldBB->getParent();
+
+ // Construct the new "invoke" instruction.
+ TerminatorInst* Term = OldBB->getTerminator();
+ std::vector<Value*> Params(CI.op_begin() + 1, CI.op_end());
+ InvokeInst* II =
+ InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func],
+ Params.begin(), Params.end(), CI.getName(), Term);
+ II->setCallingConv(CI.getCallingConv());
+ II->setAttributes(CI.getAttributes());
+
+ // Replace the old call inst with the invoke inst and remove the call.
+ CI.replaceAllUsesWith(II);
+ CI.eraseFromParent();
+
+ // The old terminator is useless now that we have the invoke inst.
+ Term->eraseFromParent();
+ ++CallsTransformed;
+}
+
+// visitInvokeInst - Converting the "invoke" instruction is fairly
+// straight-forward. The old exception part is replaced by a query asking
+// if this is a longjmp exception. If it is, then it goes to the longjmp
+// exception blocks. Otherwise, control is passed the old exception.
+void LowerSetJmp::visitInvokeInst(InvokeInst& II)
+{
+ if (II.getCalledFunction())
+ if (!IsTransformableFunction(II.getCalledFunction()->getName()) ||
+ II.getCalledFunction()->isIntrinsic()) return;
+
+ BasicBlock* BB = II.getParent();
+
+ // If not reachable from a setjmp call, don't transform.
+ if (!DFSBlocks.count(BB)) return;
+
+ BasicBlock* ExceptBB = II.getUnwindDest();
+
+ Function* Func = BB->getParent();
+ BasicBlock* NewExceptBB = BasicBlock::Create("InvokeExcept", Func);
+
+ // If this is a longjmp exception, then branch to the preliminary BB of
+ // the longjmp exception handling. Otherwise, go to the old exception.
+ CallInst* IsLJExcept = CallInst::Create(IsLJException, "IsLJExcept",
+ NewExceptBB);
+
+ BranchInst::Create(PrelimBBMap[Func], ExceptBB, IsLJExcept, NewExceptBB);
+
+ II.setUnwindDest(NewExceptBB);
+ ++InvokesTransformed;
+}
+
+// visitReturnInst - We want to destroy the setjmp map upon exit from the
+// function.
+void LowerSetJmp::visitReturnInst(ReturnInst &RI) {
+ Function* Func = RI.getParent()->getParent();
+ CallInst::Create(DestroySJMap, GetSetJmpMap(Func), "", &RI);
+}
+
+// visitUnwindInst - We want to destroy the setjmp map upon exit from the
+// function.
+void LowerSetJmp::visitUnwindInst(UnwindInst &UI) {
+ Function* Func = UI.getParent()->getParent();
+ CallInst::Create(DestroySJMap, GetSetJmpMap(Func), "", &UI);
+}
+
+ModulePass *llvm::createLowerSetJmpPass() {
+ return new LowerSetJmp();
+}
+
diff --git a/lib/Transforms/IPO/Makefile b/lib/Transforms/IPO/Makefile
new file mode 100644
index 0000000..5c42374
--- /dev/null
+++ b/lib/Transforms/IPO/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Transforms/IPO/Makefile -------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMipo
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
new file mode 100644
index 0000000..17bc2d4
--- /dev/null
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -0,0 +1,377 @@
+//===- MergeFunctions.cpp - Merge identical functions ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass looks for equivalent functions that are mergable and folds them.
+//
+// A Function will not be analyzed if:
+// * it is overridable at runtime (except for weak linkage), or
+// * it is used by anything other than the callee parameter of a call/invoke
+//
+// A hash is computed from the function, based on its type and number of
+// basic blocks.
+//
+// Once all hashes are computed, we perform an expensive equality comparison
+// on each function pair. This takes n^2/2 comparisons per bucket, so it's
+// important that the hash function be high quality. The equality comparison
+// iterates through each instruction in each basic block.
+//
+// When a match is found, the functions are folded. We can only fold two
+// functions when we know that the definition of one of them is not
+// overridable.
+// * fold a function marked internal by replacing all of its users.
+// * fold extern or weak functions by replacing them with a global alias
+//
+//===----------------------------------------------------------------------===//
+//
+// Future work:
+//
+// * fold vector<T*>::push_back and vector<S*>::push_back.
+//
+// These two functions have different types, but in a way that doesn't matter
+// to us. As long as we never see an S or T itself, using S* and S** is the
+// same as using a T* and T**.
+//
+// * virtual functions.
+//
+// Many functions have their address taken by the virtual function table for
+// the object they belong to. However, as long as it's only used for a lookup
+// and call, this is irrelevant, and we'd like to fold such implementations.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mergefunc"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Constants.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include <map>
+#include <vector>
+using namespace llvm;
+
+STATISTIC(NumFunctionsMerged, "Number of functions merged");
+STATISTIC(NumMergeFails, "Number of identical function pairings not merged");
+
+namespace {
+ struct VISIBILITY_HIDDEN MergeFunctions : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ MergeFunctions() : ModulePass((intptr_t)&ID) {}
+
+ bool runOnModule(Module &M);
+ };
+}
+
+char MergeFunctions::ID = 0;
+static RegisterPass<MergeFunctions>
+X("mergefunc", "Merge Functions");
+
+ModulePass *llvm::createMergeFunctionsPass() {
+ return new MergeFunctions();
+}
+
+static unsigned long hash(const Function *F) {
+ return F->size() ^ reinterpret_cast<unsigned long>(F->getType());
+ //return F->size() ^ F->arg_size() ^ F->getReturnType();
+}
+
+static bool compare(const Value *V, const Value *U) {
+ assert(!isa<BasicBlock>(V) && !isa<BasicBlock>(U) &&
+ "Must not compare basic blocks.");
+
+ assert(V->getType() == U->getType() &&
+ "Two of the same operation have operands of different type.");
+
+ // TODO: If the constant is an expression of F, we should accept that it's
+ // equal to the same expression in terms of G.
+ if (isa<Constant>(V))
+ return V == U;
+
+ // The caller has ensured that ValueMap[V] != U. Since Arguments are
+ // pre-loaded into the ValueMap, and Instructions are added as we go, we know
+ // that this can only be a mis-match.
+ if (isa<Instruction>(V) || isa<Argument>(V))
+ return false;
+
+ if (isa<InlineAsm>(V) && isa<InlineAsm>(U)) {
+ const InlineAsm *IAF = cast<InlineAsm>(V);
+ const InlineAsm *IAG = cast<InlineAsm>(U);
+ return IAF->getAsmString() == IAG->getAsmString() &&
+ IAF->getConstraintString() == IAG->getConstraintString();
+ }
+
+ return false;
+}
+
+static bool equals(const BasicBlock *BB1, const BasicBlock *BB2,
+ DenseMap<const Value *, const Value *> &ValueMap,
+ DenseMap<const Value *, const Value *> &SpeculationMap) {
+ // Specutively add it anyways. If it's false, we'll notice a difference later, and
+ // this won't matter.
+ ValueMap[BB1] = BB2;
+
+ BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end();
+ BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end();
+
+ do {
+ if (!FI->isSameOperationAs(const_cast<Instruction *>(&*GI)))
+ return false;
+
+ if (FI->getNumOperands() != GI->getNumOperands())
+ return false;
+
+ if (ValueMap[FI] == GI) {
+ ++FI, ++GI;
+ continue;
+ }
+
+ if (ValueMap[FI] != NULL)
+ return false;
+
+ for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
+ Value *OpF = FI->getOperand(i);
+ Value *OpG = GI->getOperand(i);
+
+ if (ValueMap[OpF] == OpG)
+ continue;
+
+ if (ValueMap[OpF] != NULL)
+ return false;
+
+ assert(OpF->getType() == OpG->getType() &&
+ "Two of the same operation has operands of different type.");
+
+ if (OpF->getValueID() != OpG->getValueID())
+ return false;
+
+ if (isa<PHINode>(FI)) {
+ if (SpeculationMap[OpF] == NULL)
+ SpeculationMap[OpF] = OpG;
+ else if (SpeculationMap[OpF] != OpG)
+ return false;
+ continue;
+ } else if (isa<BasicBlock>(OpF)) {
+ assert(isa<TerminatorInst>(FI) &&
+ "BasicBlock referenced by non-Terminator non-PHI");
+ // This call changes the ValueMap, hence we can't use
+ // Value *& = ValueMap[...]
+ if (!equals(cast<BasicBlock>(OpF), cast<BasicBlock>(OpG), ValueMap,
+ SpeculationMap))
+ return false;
+ } else {
+ if (!compare(OpF, OpG))
+ return false;
+ }
+
+ ValueMap[OpF] = OpG;
+ }
+
+ ValueMap[FI] = GI;
+ ++FI, ++GI;
+ } while (FI != FE && GI != GE);
+
+ return FI == FE && GI == GE;
+}
+
+static bool equals(const Function *F, const Function *G) {
+ // We need to recheck everything, but check the things that weren't included
+ // in the hash first.
+
+ if (F->getAttributes() != G->getAttributes())
+ return false;
+
+ if (F->hasGC() != G->hasGC())
+ return false;
+
+ if (F->hasGC() && F->getGC() != G->getGC())
+ return false;
+
+ if (F->hasSection() != G->hasSection())
+ return false;
+
+ if (F->hasSection() && F->getSection() != G->getSection())
+ return false;
+
+ // TODO: if it's internal and only used in direct calls, we could handle this
+ // case too.
+ if (F->getCallingConv() != G->getCallingConv())
+ return false;
+
+ // TODO: We want to permit cases where two functions take T* and S* but
+ // only load or store them into T** and S**.
+ if (F->getType() != G->getType())
+ return false;
+
+ DenseMap<const Value *, const Value *> ValueMap;
+ DenseMap<const Value *, const Value *> SpeculationMap;
+ ValueMap[F] = G;
+
+ assert(F->arg_size() == G->arg_size() &&
+ "Identical functions have a different number of args.");
+
+ for (Function::const_arg_iterator fi = F->arg_begin(), gi = G->arg_begin(),
+ fe = F->arg_end(); fi != fe; ++fi, ++gi)
+ ValueMap[fi] = gi;
+
+ if (!equals(&F->getEntryBlock(), &G->getEntryBlock(), ValueMap,
+ SpeculationMap))
+ return false;
+
+ for (DenseMap<const Value *, const Value *>::iterator
+ I = SpeculationMap.begin(), E = SpeculationMap.end(); I != E; ++I) {
+ if (ValueMap[I->first] != I->second)
+ return false;
+ }
+
+ return true;
+}
+
+static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) {
+ if (FnVec[i]->mayBeOverridden() && !FnVec[j]->mayBeOverridden())
+ std::swap(FnVec[i], FnVec[j]);
+
+ Function *F = FnVec[i];
+ Function *G = FnVec[j];
+
+ if (!F->mayBeOverridden()) {
+ if (G->hasLocalLinkage()) {
+ F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+ G->replaceAllUsesWith(F);
+ G->eraseFromParent();
+ ++NumFunctionsMerged;
+ return true;
+ }
+
+ if (G->hasExternalLinkage() || G->hasWeakLinkage()) {
+ GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "",
+ F, G->getParent());
+ F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+ GA->takeName(G);
+ GA->setVisibility(G->getVisibility());
+ G->replaceAllUsesWith(GA);
+ G->eraseFromParent();
+ ++NumFunctionsMerged;
+ return true;
+ }
+ }
+
+ if (F->hasWeakLinkage() && G->hasWeakLinkage()) {
+ GlobalAlias *GA_F = new GlobalAlias(F->getType(), F->getLinkage(), "",
+ 0, F->getParent());
+ GA_F->takeName(F);
+ GA_F->setVisibility(F->getVisibility());
+ F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+ F->replaceAllUsesWith(GA_F);
+ F->setName("folded." + GA_F->getName());
+ F->setLinkage(GlobalValue::ExternalLinkage);
+ GA_F->setAliasee(F);
+
+ GlobalAlias *GA_G = new GlobalAlias(G->getType(), G->getLinkage(), "",
+ F, G->getParent());
+ GA_G->takeName(G);
+ GA_G->setVisibility(G->getVisibility());
+ G->replaceAllUsesWith(GA_G);
+ G->eraseFromParent();
+
+ ++NumFunctionsMerged;
+ return true;
+ }
+
+ DOUT << "Failed on " << F->getName() << " and " << G->getName() << "\n";
+
+ ++NumMergeFails;
+ return false;
+}
+
+static bool hasAddressTaken(User *U) {
+ for (User::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I) {
+ User *Use = *I;
+
+ // 'call (bitcast @F to ...)' happens a lot.
+ while (isa<ConstantExpr>(Use) && Use->hasOneUse()) {
+ Use = *Use->use_begin();
+ }
+
+ if (isa<ConstantExpr>(Use)) {
+ if (hasAddressTaken(Use))
+ return true;
+ }
+
+ if (!isa<CallInst>(Use) && !isa<InvokeInst>(Use))
+ return true;
+
+ // Make sure we aren't passing U as a parameter to call instead of the
+ // callee.
+ if (CallSite(cast<Instruction>(Use)).hasArgument(U))
+ return true;
+ }
+
+ return false;
+}
+
+bool MergeFunctions::runOnModule(Module &M) {
+ bool Changed = false;
+
+ std::map<unsigned long, std::vector<Function *> > FnMap;
+
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration() || F->isIntrinsic())
+ continue;
+
+ if (!F->hasLocalLinkage() && !F->hasExternalLinkage() &&
+ !F->hasWeakLinkage())
+ continue;
+
+ if (hasAddressTaken(F))
+ continue;
+
+ FnMap[hash(F)].push_back(F);
+ }
+
+ // TODO: instead of running in a loop, we could also fold functions in callgraph
+ // order. Constructing the CFG probably isn't cheaper than just running in a loop.
+
+ bool LocalChanged;
+ do {
+ LocalChanged = false;
+ for (std::map<unsigned long, std::vector<Function *> >::iterator
+ I = FnMap.begin(), E = FnMap.end(); I != E; ++I) {
+ DOUT << "size: " << FnMap.size() << "\n";
+ std::vector<Function *> &FnVec = I->second;
+ DOUT << "hash (" << I->first << "): " << FnVec.size() << "\n";
+
+ for (int i = 0, e = FnVec.size(); i != e; ++i) {
+ for (int j = i + 1; j != e; ++j) {
+ bool isEqual = equals(FnVec[i], FnVec[j]);
+
+ DOUT << " " << FnVec[i]->getName()
+ << (isEqual ? " == " : " != ")
+ << FnVec[j]->getName() << "\n";
+
+ if (isEqual) {
+ if (fold(FnVec, i, j)) {
+ LocalChanged = true;
+ FnVec.erase(FnVec.begin() + j);
+ --j, --e;
+ }
+ }
+ }
+ }
+
+ }
+ Changed |= LocalChanged;
+ } while (LocalChanged);
+
+ return Changed;
+}
diff --git a/lib/Transforms/IPO/PartialSpecialization.cpp b/lib/Transforms/IPO/PartialSpecialization.cpp
new file mode 100644
index 0000000..0e1fdb9
--- /dev/null
+++ b/lib/Transforms/IPO/PartialSpecialization.cpp
@@ -0,0 +1,191 @@
+//===-- PartialSpecialization.cpp - Specialize for common constants--------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass finds function arguments that are often a common constant and
+// specializes a version of the called function for that constant.
+//
+// This pass simply does the cloning for functions it specializes. It depends
+// on IPSCCP and DAE to clean up the results.
+//
+// The initial heuristic favors constant arguments that are used in control
+// flow.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "partialspecialization"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constant.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/DenseSet.h"
+#include <map>
+using namespace llvm;
+
+STATISTIC(numSpecialized, "Number of specialized functions created");
+
+// Call must be used at least occasionally
+static const int CallsMin = 5;
+
+// Must have 10% of calls having the same constant to specialize on
+static const double ConstValPercent = .1;
+
+namespace {
+ class VISIBILITY_HIDDEN PartSpec : public ModulePass {
+ void scanForInterest(Function&, SmallVector<int, 6>&);
+ int scanDistribution(Function&, int, std::map<Constant*, int>&);
+ public :
+ static char ID; // Pass identification, replacement for typeid
+ PartSpec() : ModulePass(&ID) {}
+ bool runOnModule(Module &M);
+ };
+}
+
+char PartSpec::ID = 0;
+static RegisterPass<PartSpec>
+X("partialspecialization", "Partial Specialization");
+
+// Specialize F by replacing the arguments (keys) in replacements with the
+// constants (values). Replace all calls to F with those constants with
+// a call to the specialized function. Returns the specialized function
+static Function*
+SpecializeFunction(Function* F,
+ DenseMap<const Value*, Value*>& replacements) {
+ // arg numbers of deleted arguments
+ DenseSet<unsigned> deleted;
+ for (DenseMap<const Value*, Value*>::iterator
+ repb = replacements.begin(), repe = replacements.end();
+ repb != repe; ++repb)
+ deleted.insert(cast<Argument>(repb->first)->getArgNo());
+
+ Function* NF = CloneFunction(F, replacements);
+ NF->setLinkage(GlobalValue::InternalLinkage);
+ F->getParent()->getFunctionList().push_back(NF);
+
+ for (Value::use_iterator ii = F->use_begin(), ee = F->use_end();
+ ii != ee; ) {
+ Value::use_iterator i = ii;
+ ++ii;
+ if (isa<CallInst>(i) || isa<InvokeInst>(i)) {
+ CallSite CS(cast<Instruction>(i));
+ if (CS.getCalledFunction() == F) {
+
+ SmallVector<Value*, 6> args;
+ for (unsigned x = 0; x < CS.arg_size(); ++x)
+ if (!deleted.count(x))
+ args.push_back(CS.getArgument(x));
+ Value* NCall;
+ if (CallInst *CI = dyn_cast<CallInst>(i)) {
+ NCall = CallInst::Create(NF, args.begin(), args.end(),
+ CI->getName(), CI);
+ cast<CallInst>(NCall)->setTailCall(CI->isTailCall());
+ cast<CallInst>(NCall)->setCallingConv(CI->getCallingConv());
+ } else {
+ InvokeInst *II = cast<InvokeInst>(i);
+ NCall = InvokeInst::Create(NF, II->getNormalDest(),
+ II->getUnwindDest(),
+ args.begin(), args.end(),
+ II->getName(), II);
+ cast<InvokeInst>(NCall)->setCallingConv(II->getCallingConv());
+ }
+ CS.getInstruction()->replaceAllUsesWith(NCall);
+ CS.getInstruction()->eraseFromParent();
+ }
+ }
+ }
+ return NF;
+}
+
+
+bool PartSpec::runOnModule(Module &M) {
+ bool Changed = false;
+ for (Module::iterator I = M.begin(); I != M.end(); ++I) {
+ Function &F = *I;
+ if (F.isDeclaration() || F.mayBeOverridden()) continue;
+ SmallVector<int, 6> interestingArgs;
+ scanForInterest(F, interestingArgs);
+
+ // Find the first interesting Argument that we can specialize on
+ // If there are multiple interesting Arguments, then those will be found
+ // when processing the cloned function.
+ bool breakOuter = false;
+ for (unsigned int x = 0; !breakOuter && x < interestingArgs.size(); ++x) {
+ std::map<Constant*, int> distribution;
+ int total = scanDistribution(F, interestingArgs[x], distribution);
+ if (total > CallsMin)
+ for (std::map<Constant*, int>::iterator ii = distribution.begin(),
+ ee = distribution.end(); ii != ee; ++ii)
+ if (total > ii->second && ii->first &&
+ ii->second > total * ConstValPercent) {
+ DenseMap<const Value*, Value*> m;
+ Function::arg_iterator arg = F.arg_begin();
+ for (int y = 0; y < interestingArgs[x]; ++y)
+ ++arg;
+ m[&*arg] = ii->first;
+ SpecializeFunction(&F, m);
+ ++numSpecialized;
+ breakOuter = true;
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
+}
+
+/// scanForInterest - This function decides which arguments would be worth
+/// specializing on.
+void PartSpec::scanForInterest(Function& F, SmallVector<int, 6>& args) {
+ for(Function::arg_iterator ii = F.arg_begin(), ee = F.arg_end();
+ ii != ee; ++ii) {
+ for(Value::use_iterator ui = ii->use_begin(), ue = ii->use_end();
+ ui != ue; ++ui) {
+
+ bool interesting = false;
+
+ if (isa<CmpInst>(ui)) interesting = true;
+ else if (isa<CallInst>(ui))
+ interesting = ui->getOperand(0) == ii;
+ else if (isa<InvokeInst>(ui))
+ interesting = ui->getOperand(0) == ii;
+ else if (isa<SwitchInst>(ui)) interesting = true;
+ else if (isa<BranchInst>(ui)) interesting = true;
+
+ if (interesting) {
+ args.push_back(std::distance(F.arg_begin(), ii));
+ break;
+ }
+ }
+ }
+}
+
+/// scanDistribution - Construct a histogram of constants for arg of F at arg.
+int PartSpec::scanDistribution(Function& F, int arg,
+ std::map<Constant*, int>& dist) {
+ bool hasIndirect = false;
+ int total = 0;
+ for(Value::use_iterator ii = F.use_begin(), ee = F.use_end();
+ ii != ee; ++ii)
+ if ((isa<CallInst>(ii) || isa<InvokeInst>(ii))
+ && ii->getOperand(0) == &F) {
+ ++dist[dyn_cast<Constant>(ii->getOperand(arg + 1))];
+ ++total;
+ } else
+ hasIndirect = true;
+
+ // Preserve the original address taken function even if all other uses
+ // will be specialized.
+ if (hasIndirect) ++total;
+ return total;
+}
+
+ModulePass* llvm::createPartialSpecializationPass() { return new PartSpec(); }
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
new file mode 100644
index 0000000..2b52f46
--- /dev/null
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -0,0 +1,255 @@
+//===- PruneEH.cpp - Pass which deletes unused exception handlers ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple interprocedural pass which walks the
+// call-graph, turning invoke instructions into calls, iff the callee cannot
+// throw an exception, and marking functions 'nounwind' if they cannot throw.
+// It implements this as a bottom-up traversal of the call-graph.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "prune-eh"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include <set>
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumRemoved, "Number of invokes removed");
+STATISTIC(NumUnreach, "Number of noreturn calls optimized");
+
+namespace {
+ struct VISIBILITY_HIDDEN PruneEH : public CallGraphSCCPass {
+ static char ID; // Pass identification, replacement for typeid
+ PruneEH() : CallGraphSCCPass(&ID) {}
+
+ // runOnSCC - Analyze the SCC, performing the transformation if possible.
+ bool runOnSCC(const std::vector<CallGraphNode *> &SCC);
+
+ bool SimplifyFunction(Function *F);
+ void DeleteBasicBlock(BasicBlock *BB);
+ };
+}
+
+char PruneEH::ID = 0;
+static RegisterPass<PruneEH>
+X("prune-eh", "Remove unused exception handling info");
+
+Pass *llvm::createPruneEHPass() { return new PruneEH(); }
+
+
+bool PruneEH::runOnSCC(const std::vector<CallGraphNode *> &SCC) {
+ SmallPtrSet<CallGraphNode *, 8> SCCNodes;
+ CallGraph &CG = getAnalysis<CallGraph>();
+ bool MadeChange = false;
+
+ // Fill SCCNodes with the elements of the SCC. Used for quickly
+ // looking up whether a given CallGraphNode is in this SCC.
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i)
+ SCCNodes.insert(SCC[i]);
+
+ // First pass, scan all of the functions in the SCC, simplifying them
+ // according to what we know.
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i)
+ if (Function *F = SCC[i]->getFunction())
+ MadeChange |= SimplifyFunction(F);
+
+ // Next, check to see if any callees might throw or if there are any external
+ // functions in this SCC: if so, we cannot prune any functions in this SCC.
+ // Definitions that are weak and not declared non-throwing might be
+ // overridden at linktime with something that throws, so assume that.
+ // If this SCC includes the unwind instruction, we KNOW it throws, so
+ // obviously the SCC might throw.
+ //
+ bool SCCMightUnwind = false, SCCMightReturn = false;
+ for (unsigned i = 0, e = SCC.size();
+ (!SCCMightUnwind || !SCCMightReturn) && i != e; ++i) {
+ Function *F = SCC[i]->getFunction();
+ if (F == 0) {
+ SCCMightUnwind = true;
+ SCCMightReturn = true;
+ } else if (F->isDeclaration() || F->mayBeOverridden()) {
+ SCCMightUnwind |= !F->doesNotThrow();
+ SCCMightReturn |= !F->doesNotReturn();
+ } else {
+ bool CheckUnwind = !SCCMightUnwind && !F->doesNotThrow();
+ bool CheckReturn = !SCCMightReturn && !F->doesNotReturn();
+
+ if (!CheckUnwind && !CheckReturn)
+ continue;
+
+ // Check to see if this function performs an unwind or calls an
+ // unwinding function.
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ if (CheckUnwind && isa<UnwindInst>(BB->getTerminator())) {
+ // Uses unwind!
+ SCCMightUnwind = true;
+ } else if (CheckReturn && isa<ReturnInst>(BB->getTerminator())) {
+ SCCMightReturn = true;
+ }
+
+ // Invoke instructions don't allow unwinding to continue, so we are
+ // only interested in call instructions.
+ if (CheckUnwind && !SCCMightUnwind)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (CI->doesNotThrow()) {
+ // This call cannot throw.
+ } else if (Function *Callee = CI->getCalledFunction()) {
+ CallGraphNode *CalleeNode = CG[Callee];
+ // If the callee is outside our current SCC then we may
+ // throw because it might.
+ if (!SCCNodes.count(CalleeNode)) {
+ SCCMightUnwind = true;
+ break;
+ }
+ } else {
+ // Indirect call, it might throw.
+ SCCMightUnwind = true;
+ break;
+ }
+ }
+ if (SCCMightUnwind && SCCMightReturn) break;
+ }
+ }
+ }
+
+ // If the SCC doesn't unwind or doesn't throw, note this fact.
+ if (!SCCMightUnwind || !SCCMightReturn)
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
+ Attributes NewAttributes = Attribute::None;
+
+ if (!SCCMightUnwind)
+ NewAttributes |= Attribute::NoUnwind;
+ if (!SCCMightReturn)
+ NewAttributes |= Attribute::NoReturn;
+
+ const AttrListPtr &PAL = SCC[i]->getFunction()->getAttributes();
+ const AttrListPtr &NPAL = PAL.addAttr(~0, NewAttributes);
+ if (PAL != NPAL) {
+ MadeChange = true;
+ SCC[i]->getFunction()->setAttributes(NPAL);
+ }
+ }
+
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
+ // Convert any invoke instructions to non-throwing functions in this node
+ // into call instructions with a branch. This makes the exception blocks
+ // dead.
+ if (Function *F = SCC[i]->getFunction())
+ MadeChange |= SimplifyFunction(F);
+ }
+
+ return MadeChange;
+}
+
+
+// SimplifyFunction - Given information about callees, simplify the specified
+// function if we have invokes to non-unwinding functions or code after calls to
+// no-return functions.
+bool PruneEH::SimplifyFunction(Function *F) {
+ CallGraph &CG = getAnalysis<CallGraph>();
+ CallGraphNode *CGN = CG[F];
+
+ bool MadeChange = false;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
+ if (II->doesNotThrow()) {
+ SmallVector<Value*, 8> Args(II->op_begin()+3, II->op_end());
+ // Insert a call instruction before the invoke.
+ CallInst *Call = CallInst::Create(II->getCalledValue(),
+ Args.begin(), Args.end(), "", II);
+ Call->takeName(II);
+ Call->setCallingConv(II->getCallingConv());
+ Call->setAttributes(II->getAttributes());
+
+ // Anything that used the value produced by the invoke instruction
+ // now uses the value produced by the call instruction.
+ II->replaceAllUsesWith(Call);
+ BasicBlock *UnwindBlock = II->getUnwindDest();
+ UnwindBlock->removePredecessor(II->getParent());
+
+ // Fix up the call graph.
+ CGN->replaceCallSite(II, Call);
+
+ // Insert a branch to the normal destination right before the
+ // invoke.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Finally, delete the invoke instruction!
+ BB->getInstList().pop_back();
+
+ // If the unwind block is now dead, nuke it.
+ if (pred_begin(UnwindBlock) == pred_end(UnwindBlock))
+ DeleteBasicBlock(UnwindBlock); // Delete the new BB.
+
+ ++NumRemoved;
+ MadeChange = true;
+ }
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
+ if (CallInst *CI = dyn_cast<CallInst>(I++))
+ if (CI->doesNotReturn() && !isa<UnreachableInst>(I)) {
+ // This call calls a function that cannot return. Insert an
+ // unreachable instruction after it and simplify the code. Do this
+ // by splitting the BB, adding the unreachable, then deleting the
+ // new BB.
+ BasicBlock *New = BB->splitBasicBlock(I);
+
+ // Remove the uncond branch and add an unreachable.
+ BB->getInstList().pop_back();
+ new UnreachableInst(BB);
+
+ DeleteBasicBlock(New); // Delete the new BB.
+ MadeChange = true;
+ ++NumUnreach;
+ break;
+ }
+ }
+
+ return MadeChange;
+}
+
+/// DeleteBasicBlock - remove the specified basic block from the program,
+/// updating the callgraph to reflect any now-obsolete edges due to calls that
+/// exist in the BB.
+void PruneEH::DeleteBasicBlock(BasicBlock *BB) {
+ assert(pred_begin(BB) == pred_end(BB) && "BB is not dead!");
+ CallGraph &CG = getAnalysis<CallGraph>();
+
+ CallGraphNode *CGN = CG[BB->getParent()];
+ for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; ) {
+ --I;
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (!isa<DbgInfoIntrinsic>(I))
+ CGN->removeCallEdgeFor(CI);
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+ CGN->removeCallEdgeFor(II);
+ if (!I->use_empty())
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ }
+
+ // Get the list of successors of this block.
+ std::vector<BasicBlock*> Succs(succ_begin(BB), succ_end(BB));
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+ Succs[i]->removePredecessor(BB);
+
+ BB->eraseFromParent();
+}
diff --git a/lib/Transforms/IPO/RaiseAllocations.cpp b/lib/Transforms/IPO/RaiseAllocations.cpp
new file mode 100644
index 0000000..a81bbdb
--- /dev/null
+++ b/lib/Transforms/IPO/RaiseAllocations.cpp
@@ -0,0 +1,251 @@
+//===- RaiseAllocations.cpp - Convert @malloc & @free calls to insts ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RaiseAllocations pass which convert malloc and free
+// calls to malloc and free instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "raiseallocs"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/Statistic.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumRaised, "Number of allocations raised");
+
+namespace {
+ // RaiseAllocations - Turn @malloc and @free calls into the appropriate
+ // instruction.
+ //
+ class VISIBILITY_HIDDEN RaiseAllocations : public ModulePass {
+ Function *MallocFunc; // Functions in the module we are processing
+ Function *FreeFunc; // Initialized by doPassInitializationVirt
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ RaiseAllocations()
+ : ModulePass(&ID), MallocFunc(0), FreeFunc(0) {}
+
+ // doPassInitialization - For the raise allocations pass, this finds a
+ // declaration for malloc and free if they exist.
+ //
+ void doInitialization(Module &M);
+
+ // run - This method does the actual work of converting instructions over.
+ //
+ bool runOnModule(Module &M);
+ };
+} // end anonymous namespace
+
+char RaiseAllocations::ID = 0;
+static RegisterPass<RaiseAllocations>
+X("raiseallocs", "Raise allocations from calls to instructions");
+
+// createRaiseAllocationsPass - The interface to this file...
+ModulePass *llvm::createRaiseAllocationsPass() {
+ return new RaiseAllocations();
+}
+
+
+// If the module has a symbol table, they might be referring to the malloc and
+// free functions. If this is the case, grab the method pointers that the
+// module is using.
+//
+// Lookup @malloc and @free in the symbol table, for later use. If they don't
+// exist, or are not external, we do not worry about converting calls to that
+// function into the appropriate instruction.
+//
+void RaiseAllocations::doInitialization(Module &M) {
+
+ // Get Malloc and free prototypes if they exist!
+ MallocFunc = M.getFunction("malloc");
+ if (MallocFunc) {
+ const FunctionType* TyWeHave = MallocFunc->getFunctionType();
+
+ // Get the expected prototype for malloc
+ const FunctionType *Malloc1Type =
+ FunctionType::get(PointerType::getUnqual(Type::Int8Ty),
+ std::vector<const Type*>(1, Type::Int64Ty), false);
+
+ // Chck to see if we got the expected malloc
+ if (TyWeHave != Malloc1Type) {
+ // Check to see if the prototype is wrong, giving us sbyte*(uint) * malloc
+ // This handles the common declaration of: 'void *malloc(unsigned);'
+ const FunctionType *Malloc2Type =
+ FunctionType::get(PointerType::getUnqual(Type::Int8Ty),
+ std::vector<const Type*>(1, Type::Int32Ty), false);
+ if (TyWeHave != Malloc2Type) {
+ // Check to see if the prototype is missing, giving us
+ // sbyte*(...) * malloc
+ // This handles the common declaration of: 'void *malloc();'
+ const FunctionType *Malloc3Type =
+ FunctionType::get(PointerType::getUnqual(Type::Int8Ty),
+ std::vector<const Type*>(), true);
+ if (TyWeHave != Malloc3Type)
+ // Give up
+ MallocFunc = 0;
+ }
+ }
+ }
+
+ FreeFunc = M.getFunction("free");
+ if (FreeFunc) {
+ const FunctionType* TyWeHave = FreeFunc->getFunctionType();
+
+ // Get the expected prototype for void free(i8*)
+ const FunctionType *Free1Type = FunctionType::get(Type::VoidTy,
+ std::vector<const Type*>(1, PointerType::getUnqual(Type::Int8Ty)), false);
+
+ if (TyWeHave != Free1Type) {
+ // Check to see if the prototype was forgotten, giving us
+ // void (...) * free
+ // This handles the common forward declaration of: 'void free();'
+ const FunctionType* Free2Type = FunctionType::get(Type::VoidTy,
+ std::vector<const Type*>(),true);
+
+ if (TyWeHave != Free2Type) {
+ // One last try, check to see if we can find free as
+ // int (...)* free. This handles the case where NOTHING was declared.
+ const FunctionType* Free3Type = FunctionType::get(Type::Int32Ty,
+ std::vector<const Type*>(),true);
+
+ if (TyWeHave != Free3Type) {
+ // Give up.
+ FreeFunc = 0;
+ }
+ }
+ }
+ }
+
+ // Don't mess with locally defined versions of these functions...
+ if (MallocFunc && !MallocFunc->isDeclaration()) MallocFunc = 0;
+ if (FreeFunc && !FreeFunc->isDeclaration()) FreeFunc = 0;
+}
+
+// run - Transform calls into instructions...
+//
+bool RaiseAllocations::runOnModule(Module &M) {
+ // Find the malloc/free prototypes...
+ doInitialization(M);
+
+ bool Changed = false;
+
+ // First, process all of the malloc calls...
+ if (MallocFunc) {
+ std::vector<User*> Users(MallocFunc->use_begin(), MallocFunc->use_end());
+ std::vector<Value*> EqPointers; // Values equal to MallocFunc
+ while (!Users.empty()) {
+ User *U = Users.back();
+ Users.pop_back();
+
+ if (Instruction *I = dyn_cast<Instruction>(U)) {
+ CallSite CS = CallSite::get(I);
+ if (CS.getInstruction() && !CS.arg_empty() &&
+ (CS.getCalledFunction() == MallocFunc ||
+ std::find(EqPointers.begin(), EqPointers.end(),
+ CS.getCalledValue()) != EqPointers.end())) {
+
+ Value *Source = *CS.arg_begin();
+
+ // If no prototype was provided for malloc, we may need to cast the
+ // source size.
+ if (Source->getType() != Type::Int32Ty)
+ Source =
+ CastInst::CreateIntegerCast(Source, Type::Int32Ty, false/*ZExt*/,
+ "MallocAmtCast", I);
+
+ MallocInst *MI = new MallocInst(Type::Int8Ty, Source, "", I);
+ MI->takeName(I);
+ I->replaceAllUsesWith(MI);
+
+ // If the old instruction was an invoke, add an unconditional branch
+ // before the invoke, which will become the new terminator.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+ BranchInst::Create(II->getNormalDest(), I);
+
+ // Delete the old call site
+ I->eraseFromParent();
+ Changed = true;
+ ++NumRaised;
+ }
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(U)) {
+ Users.insert(Users.end(), GV->use_begin(), GV->use_end());
+ EqPointers.push_back(GV);
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+ if (CE->isCast()) {
+ Users.insert(Users.end(), CE->use_begin(), CE->use_end());
+ EqPointers.push_back(CE);
+ }
+ }
+ }
+ }
+
+ // Next, process all free calls...
+ if (FreeFunc) {
+ std::vector<User*> Users(FreeFunc->use_begin(), FreeFunc->use_end());
+ std::vector<Value*> EqPointers; // Values equal to FreeFunc
+
+ while (!Users.empty()) {
+ User *U = Users.back();
+ Users.pop_back();
+
+ if (Instruction *I = dyn_cast<Instruction>(U)) {
+ if (isa<InvokeInst>(I))
+ continue;
+ CallSite CS = CallSite::get(I);
+ if (CS.getInstruction() && !CS.arg_empty() &&
+ (CS.getCalledFunction() == FreeFunc ||
+ std::find(EqPointers.begin(), EqPointers.end(),
+ CS.getCalledValue()) != EqPointers.end())) {
+
+ // If no prototype was provided for free, we may need to cast the
+ // source pointer. This should be really uncommon, but it's necessary
+ // just in case we are dealing with weird code like this:
+ // free((long)ptr);
+ //
+ Value *Source = *CS.arg_begin();
+ if (!isa<PointerType>(Source->getType()))
+ Source = new IntToPtrInst(Source,
+ PointerType::getUnqual(Type::Int8Ty),
+ "FreePtrCast", I);
+ new FreeInst(Source, I);
+
+ // If the old instruction was an invoke, add an unconditional branch
+ // before the invoke, which will become the new terminator.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+ BranchInst::Create(II->getNormalDest(), I);
+
+ // Delete the old call site
+ if (I->getType() != Type::VoidTy)
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->eraseFromParent();
+ Changed = true;
+ ++NumRaised;
+ }
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(U)) {
+ Users.insert(Users.end(), GV->use_begin(), GV->use_end());
+ EqPointers.push_back(GV);
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+ if (CE->isCast()) {
+ Users.insert(Users.end(), CE->use_begin(), CE->use_end());
+ EqPointers.push_back(CE);
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp
new file mode 100644
index 0000000..a94d78e
--- /dev/null
+++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -0,0 +1,72 @@
+//===-- StripDeadPrototypes.cpp - Remove unused function declarations ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass loops over all of the functions in the input module, looking for
+// dead declarations and removes them. Dead declarations are declarations of
+// functions for which no implementation is available (i.e., declarations for
+// unused library functions).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "strip-dead-prototypes"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Pass.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+using namespace llvm;
+
+STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed");
+
+namespace {
+
+/// @brief Pass to remove unused function declarations.
+class VISIBILITY_HIDDEN StripDeadPrototypesPass : public ModulePass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ StripDeadPrototypesPass() : ModulePass(&ID) { }
+ virtual bool runOnModule(Module &M);
+};
+
+} // end anonymous namespace
+
+char StripDeadPrototypesPass::ID = 0;
+static RegisterPass<StripDeadPrototypesPass>
+X("strip-dead-prototypes", "Strip Unused Function Prototypes");
+
+bool StripDeadPrototypesPass::runOnModule(Module &M) {
+ bool MadeChange = false;
+
+ // Erase dead function prototypes.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+ Function *F = I++;
+ // Function must be a prototype and unused.
+ if (F->isDeclaration() && F->use_empty()) {
+ F->eraseFromParent();
+ ++NumDeadPrototypes;
+ MadeChange = true;
+ }
+ }
+
+ // Erase dead global var prototypes.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ) {
+ GlobalVariable *GV = I++;
+ // Global must be a prototype and unused.
+ if (GV->isDeclaration() && GV->use_empty())
+ GV->eraseFromParent();
+ }
+
+ // Return an indication of whether we changed anything or not.
+ return MadeChange;
+}
+
+ModulePass *llvm::createStripDeadPrototypesPass() {
+ return new StripDeadPrototypesPass();
+}
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
new file mode 100644
index 0000000..ab8fe5f
--- /dev/null
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -0,0 +1,415 @@
+//===- StripSymbols.cpp - Strip symbols and debug info from a module ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The StripSymbols transformation implements code stripping. Specifically, it
+// can delete:
+//
+// * names for virtual registers
+// * symbols for internal globals and functions
+// * debug information
+//
+// Note that this transformation makes code much less readable, so it should
+// only be used in situations where the 'strip' utility would be used, such as
+// reducing code size or making it harder to reverse engineer code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+namespace {
+ class VISIBILITY_HIDDEN StripSymbols : public ModulePass {
+ bool OnlyDebugInfo;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit StripSymbols(bool ODI = false)
+ : ModulePass(&ID), OnlyDebugInfo(ODI) {}
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+
+ class VISIBILITY_HIDDEN StripNonDebugSymbols : public ModulePass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit StripNonDebugSymbols()
+ : ModulePass(&ID) {}
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+
+ class VISIBILITY_HIDDEN StripDebugDeclare : public ModulePass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit StripDebugDeclare()
+ : ModulePass(&ID) {}
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char StripSymbols::ID = 0;
+static RegisterPass<StripSymbols>
+X("strip", "Strip all symbols from a module");
+
+ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) {
+ return new StripSymbols(OnlyDebugInfo);
+}
+
+char StripNonDebugSymbols::ID = 0;
+static RegisterPass<StripNonDebugSymbols>
+Y("strip-nondebug", "Strip all symbols, except dbg symbols, from a module");
+
+ModulePass *llvm::createStripNonDebugSymbolsPass() {
+ return new StripNonDebugSymbols();
+}
+
+char StripDebugDeclare::ID = 0;
+static RegisterPass<StripDebugDeclare>
+Z("strip-debug-declare", "Strip all llvm.dbg.declare intrinsics");
+
+ModulePass *llvm::createStripDebugDeclarePass() {
+ return new StripDebugDeclare();
+}
+
+/// OnlyUsedBy - Return true if V is only used by Usr.
+static bool OnlyUsedBy(Value *V, Value *Usr) {
+ for(Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
+ User *U = *I;
+ if (U != Usr)
+ return false;
+ }
+ return true;
+}
+
+static void RemoveDeadConstant(Constant *C) {
+ assert(C->use_empty() && "Constant is not dead!");
+ SmallPtrSet<Constant *, 4> Operands;
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
+ if (isa<DerivedType>(C->getOperand(i)->getType()) &&
+ OnlyUsedBy(C->getOperand(i), C))
+ Operands.insert(C->getOperand(i));
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+ if (!GV->hasLocalLinkage()) return; // Don't delete non static globals.
+ GV->eraseFromParent();
+ }
+ else if (!isa<Function>(C))
+ if (isa<CompositeType>(C->getType()))
+ C->destroyConstant();
+
+ // If the constant referenced anything, see if we can delete it as well.
+ for (SmallPtrSet<Constant *, 4>::iterator OI = Operands.begin(),
+ OE = Operands.end(); OI != OE; ++OI)
+ RemoveDeadConstant(*OI);
+}
+
+// Strip the symbol table of its names.
+//
+static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) {
+ for (ValueSymbolTable::iterator VI = ST.begin(), VE = ST.end(); VI != VE; ) {
+ Value *V = VI->getValue();
+ ++VI;
+ if (!isa<GlobalValue>(V) || cast<GlobalValue>(V)->hasLocalLinkage()) {
+ if (!PreserveDbgInfo || strncmp(V->getNameStart(), "llvm.dbg", 8))
+ // Set name to "", removing from symbol table!
+ V->setName("");
+ }
+ }
+}
+
+// Strip the symbol table of its names.
+static void StripTypeSymtab(TypeSymbolTable &ST, bool PreserveDbgInfo) {
+ for (TypeSymbolTable::iterator TI = ST.begin(), E = ST.end(); TI != E; ) {
+ if (PreserveDbgInfo && strncmp(TI->first.c_str(), "llvm.dbg", 8) == 0)
+ ++TI;
+ else
+ ST.remove(TI++);
+ }
+}
+
+/// Find values that are marked as llvm.used.
+void findUsedValues(Module &M,
+ SmallPtrSet<const GlobalValue*, 8>& llvmUsedValues) {
+ if (GlobalVariable *LLVMUsed = M.getGlobalVariable("llvm.used")) {
+ llvmUsedValues.insert(LLVMUsed);
+ // Collect values that are preserved as per explicit request.
+ // llvm.used is used to list these values.
+ if (ConstantArray *Inits =
+ dyn_cast<ConstantArray>(LLVMUsed->getInitializer())) {
+ for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) {
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(Inits->getOperand(i)))
+ llvmUsedValues.insert(GV);
+ else if (ConstantExpr *CE =
+ dyn_cast<ConstantExpr>(Inits->getOperand(i)))
+ if (CE->getOpcode() == Instruction::BitCast)
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(CE->getOperand(0)))
+ llvmUsedValues.insert(GV);
+ }
+ }
+ }
+}
+
+/// StripSymbolNames - Strip symbol names.
+bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
+
+ SmallPtrSet<const GlobalValue*, 8> llvmUsedValues;
+ findUsedValues(M, llvmUsedValues);
+
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
+ if (!PreserveDbgInfo || strncmp(I->getNameStart(), "llvm.dbg", 8))
+ I->setName(""); // Internal symbols can't participate in linkage
+ }
+
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
+ if (!PreserveDbgInfo || strncmp(I->getNameStart(), "llvm.dbg", 8))
+ I->setName(""); // Internal symbols can't participate in linkage
+ StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo);
+ }
+
+ // Remove all names from types.
+ StripTypeSymtab(M.getTypeSymbolTable(), PreserveDbgInfo);
+
+ return true;
+}
+
+// StripDebugInfo - Strip debug info in the module if it exists.
+// To do this, we remove llvm.dbg.func.start, llvm.dbg.stoppoint, and
+// llvm.dbg.region.end calls, and any globals they point to if now dead.
+bool StripDebugInfo(Module &M) {
+
+ SmallPtrSet<const GlobalValue*, 8> llvmUsedValues;
+ findUsedValues(M, llvmUsedValues);
+
+ // Delete all dbg variables.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(I);
+ if (!GV) continue;
+ if (!GV->use_empty() && llvmUsedValues.count(I) == 0) {
+ if (strncmp(GV->getNameStart(), "llvm.dbg", 8) == 0) {
+ GV->replaceAllUsesWith(UndefValue::get(GV->getType()));
+ }
+ }
+ }
+
+ Function *FuncStart = M.getFunction("llvm.dbg.func.start");
+ Function *StopPoint = M.getFunction("llvm.dbg.stoppoint");
+ Function *RegionStart = M.getFunction("llvm.dbg.region.start");
+ Function *RegionEnd = M.getFunction("llvm.dbg.region.end");
+ Function *Declare = M.getFunction("llvm.dbg.declare");
+
+ std::vector<Constant*> DeadConstants;
+
+ // Remove all of the calls to the debugger intrinsics, and remove them from
+ // the module.
+ if (FuncStart) {
+ while (!FuncStart->use_empty()) {
+ CallInst *CI = cast<CallInst>(FuncStart->use_back());
+ Value *Arg = CI->getOperand(1);
+ assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
+ CI->eraseFromParent();
+ if (Arg->use_empty())
+ if (Constant *C = dyn_cast<Constant>(Arg))
+ DeadConstants.push_back(C);
+ }
+ FuncStart->eraseFromParent();
+ }
+ if (StopPoint) {
+ while (!StopPoint->use_empty()) {
+ CallInst *CI = cast<CallInst>(StopPoint->use_back());
+ Value *Arg = CI->getOperand(3);
+ assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
+ CI->eraseFromParent();
+ if (Arg->use_empty())
+ if (Constant *C = dyn_cast<Constant>(Arg))
+ DeadConstants.push_back(C);
+ }
+ StopPoint->eraseFromParent();
+ }
+ if (RegionStart) {
+ while (!RegionStart->use_empty()) {
+ CallInst *CI = cast<CallInst>(RegionStart->use_back());
+ Value *Arg = CI->getOperand(1);
+ assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
+ CI->eraseFromParent();
+ if (Arg->use_empty())
+ if (Constant *C = dyn_cast<Constant>(Arg))
+ DeadConstants.push_back(C);
+ }
+ RegionStart->eraseFromParent();
+ }
+ if (RegionEnd) {
+ while (!RegionEnd->use_empty()) {
+ CallInst *CI = cast<CallInst>(RegionEnd->use_back());
+ Value *Arg = CI->getOperand(1);
+ assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
+ CI->eraseFromParent();
+ if (Arg->use_empty())
+ if (Constant *C = dyn_cast<Constant>(Arg))
+ DeadConstants.push_back(C);
+ }
+ RegionEnd->eraseFromParent();
+ }
+ if (Declare) {
+ while (!Declare->use_empty()) {
+ CallInst *CI = cast<CallInst>(Declare->use_back());
+ Value *Arg1 = CI->getOperand(1);
+ Value *Arg2 = CI->getOperand(2);
+ assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
+ CI->eraseFromParent();
+ if (Arg1->use_empty()) {
+ if (Constant *C = dyn_cast<Constant>(Arg1))
+ DeadConstants.push_back(C);
+ else
+ RecursivelyDeleteTriviallyDeadInstructions(Arg1);
+ }
+ if (Arg2->use_empty())
+ if (Constant *C = dyn_cast<Constant>(Arg2))
+ DeadConstants.push_back(C);
+ }
+ Declare->eraseFromParent();
+ }
+
+ // llvm.dbg.compile_units and llvm.dbg.subprograms are marked as linkonce
+ // but since we are removing all debug information, make them internal now.
+ // FIXME: Use private linkage maybe?
+ if (Constant *C = M.getNamedGlobal("llvm.dbg.compile_units"))
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+ GV->setLinkage(GlobalValue::InternalLinkage);
+
+ if (Constant *C = M.getNamedGlobal("llvm.dbg.subprograms"))
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+ GV->setLinkage(GlobalValue::InternalLinkage);
+
+ if (Constant *C = M.getNamedGlobal("llvm.dbg.global_variables"))
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+ GV->setLinkage(GlobalValue::InternalLinkage);
+
+ // Delete all dbg variables.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(I);
+ if (!GV) continue;
+ if (GV->use_empty() && llvmUsedValues.count(I) == 0
+ && (!GV->hasSection()
+ || strcmp(GV->getSection().c_str(), "llvm.metadata") == 0))
+ DeadConstants.push_back(GV);
+ }
+
+ if (DeadConstants.empty())
+ return false;
+
+ // Delete any internal globals that were only used by the debugger intrinsics.
+ while (!DeadConstants.empty()) {
+ Constant *C = DeadConstants.back();
+ DeadConstants.pop_back();
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+ if (GV->hasLocalLinkage())
+ RemoveDeadConstant(GV);
+ }
+ else
+ RemoveDeadConstant(C);
+ }
+
+ // Remove all llvm.dbg types.
+ TypeSymbolTable &ST = M.getTypeSymbolTable();
+ for (TypeSymbolTable::iterator TI = ST.begin(), TE = ST.end(); TI != TE; ) {
+ if (!strncmp(TI->first.c_str(), "llvm.dbg.", 9))
+ ST.remove(TI++);
+ else
+ ++TI;
+ }
+
+ return true;
+}
+
+bool StripSymbols::runOnModule(Module &M) {
+ bool Changed = false;
+ Changed |= StripDebugInfo(M);
+ if (!OnlyDebugInfo)
+ Changed |= StripSymbolNames(M, false);
+ return Changed;
+}
+
+bool StripNonDebugSymbols::runOnModule(Module &M) {
+ return StripSymbolNames(M, true);
+}
+
+bool StripDebugDeclare::runOnModule(Module &M) {
+
+ Function *Declare = M.getFunction("llvm.dbg.declare");
+ std::vector<Constant*> DeadConstants;
+
+ if (Declare) {
+ while (!Declare->use_empty()) {
+ CallInst *CI = cast<CallInst>(Declare->use_back());
+ Value *Arg1 = CI->getOperand(1);
+ Value *Arg2 = CI->getOperand(2);
+ assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
+ CI->eraseFromParent();
+ if (Arg1->use_empty()) {
+ if (Constant *C = dyn_cast<Constant>(Arg1))
+ DeadConstants.push_back(C);
+ else
+ RecursivelyDeleteTriviallyDeadInstructions(Arg1);
+ }
+ if (Arg2->use_empty())
+ if (Constant *C = dyn_cast<Constant>(Arg2))
+ DeadConstants.push_back(C);
+ }
+ Declare->eraseFromParent();
+ }
+
+ // Delete all llvm.dbg.global_variables.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(I);
+ if (!GV) continue;
+ if (GV->use_empty() && GV->hasName()
+ && strncmp(GV->getNameStart(), "llvm.dbg.global_variable", 24) == 0)
+ DeadConstants.push_back(GV);
+ }
+
+ while (!DeadConstants.empty()) {
+ Constant *C = DeadConstants.back();
+ DeadConstants.pop_back();
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+ if (GV->hasLocalLinkage())
+ RemoveDeadConstant(GV);
+ }
+ else
+ RemoveDeadConstant(C);
+ }
+
+ return true;
+}
diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp
new file mode 100644
index 0000000..9f54388
--- /dev/null
+++ b/lib/Transforms/IPO/StructRetPromotion.cpp
@@ -0,0 +1,351 @@
+//===-- StructRetPromotion.cpp - Promote sret arguments ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass finds functions that return a struct (using a pointer to the struct
+// as the first argument of the function, marked with the 'sret' attribute) and
+// replaces them with a new function that simply returns each of the elements of
+// that struct (using multiple return values).
+//
+// This pass works under a number of conditions:
+// 1. The returned struct must not contain other structs
+// 2. The returned struct must only be used to load values from
+// 3. The placeholder struct passed in is the result of an alloca
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sretpromotion"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+using namespace llvm;
+
+STATISTIC(NumRejectedSRETUses , "Number of sret rejected due to unexpected uses");
+STATISTIC(NumSRET , "Number of sret promoted");
+namespace {
+ /// SRETPromotion - This pass removes sret parameter and updates
+ /// function to use multiple return value.
+ ///
+ struct VISIBILITY_HIDDEN SRETPromotion : public CallGraphSCCPass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ CallGraphSCCPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnSCC(const std::vector<CallGraphNode *> &SCC);
+ static char ID; // Pass identification, replacement for typeid
+ SRETPromotion() : CallGraphSCCPass(&ID) {}
+
+ private:
+ bool PromoteReturn(CallGraphNode *CGN);
+ bool isSafeToUpdateAllCallers(Function *F);
+ Function *cloneFunctionBody(Function *F, const StructType *STy);
+ void updateCallSites(Function *F, Function *NF);
+ bool nestedStructType(const StructType *STy);
+ };
+}
+
+char SRETPromotion::ID = 0;
+static RegisterPass<SRETPromotion>
+X("sretpromotion", "Promote sret arguments to multiple ret values");
+
+Pass *llvm::createStructRetPromotionPass() {
+ return new SRETPromotion();
+}
+
+bool SRETPromotion::runOnSCC(const std::vector<CallGraphNode *> &SCC) {
+ bool Changed = false;
+
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i)
+ Changed |= PromoteReturn(SCC[i]);
+
+ return Changed;
+}
+
+/// PromoteReturn - This method promotes function that uses StructRet paramater
+/// into a function that uses mulitple return value.
+bool SRETPromotion::PromoteReturn(CallGraphNode *CGN) {
+ Function *F = CGN->getFunction();
+
+ if (!F || F->isDeclaration() || !F->hasLocalLinkage())
+ return false;
+
+ // Make sure that function returns struct.
+ if (F->arg_size() == 0 || !F->hasStructRetAttr() || F->doesNotReturn())
+ return false;
+
+ DOUT << "SretPromotion: Looking at sret function " << F->getNameStart() << "\n";
+
+ assert (F->getReturnType() == Type::VoidTy && "Invalid function return type");
+ Function::arg_iterator AI = F->arg_begin();
+ const llvm::PointerType *FArgType = dyn_cast<PointerType>(AI->getType());
+ assert (FArgType && "Invalid sret parameter type");
+ const llvm::StructType *STy =
+ dyn_cast<StructType>(FArgType->getElementType());
+ assert (STy && "Invalid sret parameter element type");
+
+ // Check if it is ok to perform this promotion.
+ if (isSafeToUpdateAllCallers(F) == false) {
+ DOUT << "SretPromotion: Not all callers can be updated\n";
+ NumRejectedSRETUses++;
+ return false;
+ }
+
+ DOUT << "SretPromotion: sret argument will be promoted\n";
+ NumSRET++;
+ // [1] Replace use of sret parameter
+ AllocaInst *TheAlloca = new AllocaInst (STy, NULL, "mrv",
+ F->getEntryBlock().begin());
+ Value *NFirstArg = F->arg_begin();
+ NFirstArg->replaceAllUsesWith(TheAlloca);
+
+ // [2] Find and replace ret instructions
+ for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI)
+ for(BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) {
+ Instruction *I = BI;
+ ++BI;
+ if (isa<ReturnInst>(I)) {
+ Value *NV = new LoadInst(TheAlloca, "mrv.ld", I);
+ ReturnInst *NR = ReturnInst::Create(NV, I);
+ I->replaceAllUsesWith(NR);
+ I->eraseFromParent();
+ }
+ }
+
+ // [3] Create the new function body and insert it into the module.
+ Function *NF = cloneFunctionBody(F, STy);
+
+ // [4] Update all call sites to use new function
+ updateCallSites(F, NF);
+
+ F->eraseFromParent();
+ getAnalysis<CallGraph>().changeFunction(F, NF);
+ return true;
+}
+
+// Check if it is ok to perform this promotion.
+bool SRETPromotion::isSafeToUpdateAllCallers(Function *F) {
+
+ if (F->use_empty())
+ // No users. OK to modify signature.
+ return true;
+
+ for (Value::use_iterator FnUseI = F->use_begin(), FnUseE = F->use_end();
+ FnUseI != FnUseE; ++FnUseI) {
+ // The function is passed in as an argument to (possibly) another function,
+ // we can't change it!
+ CallSite CS = CallSite::get(*FnUseI);
+ Instruction *Call = CS.getInstruction();
+ // The function is used by something else than a call or invoke instruction,
+ // we can't change it!
+ if (!Call || !CS.isCallee(FnUseI))
+ return false;
+ CallSite::arg_iterator AI = CS.arg_begin();
+ Value *FirstArg = *AI;
+
+ if (!isa<AllocaInst>(FirstArg))
+ return false;
+
+ // Check FirstArg's users.
+ for (Value::use_iterator ArgI = FirstArg->use_begin(),
+ ArgE = FirstArg->use_end(); ArgI != ArgE; ++ArgI) {
+
+ // If FirstArg user is a CallInst that does not correspond to current
+ // call site then this function F is not suitable for sret promotion.
+ if (CallInst *CI = dyn_cast<CallInst>(ArgI)) {
+ if (CI != Call)
+ return false;
+ }
+ // If FirstArg user is a GEP whose all users are not LoadInst then
+ // this function F is not suitable for sret promotion.
+ else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(ArgI)) {
+ // TODO : Use dom info and insert PHINodes to collect get results
+ // from multiple call sites for this GEP.
+ if (GEP->getParent() != Call->getParent())
+ return false;
+ for (Value::use_iterator GEPI = GEP->use_begin(), GEPE = GEP->use_end();
+ GEPI != GEPE; ++GEPI)
+ if (!isa<LoadInst>(GEPI))
+ return false;
+ }
+ // Any other FirstArg users make this function unsuitable for sret
+ // promotion.
+ else
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/// cloneFunctionBody - Create a new function based on F and
+/// insert it into module. Remove first argument. Use STy as
+/// the return type for new function.
+Function *SRETPromotion::cloneFunctionBody(Function *F,
+ const StructType *STy) {
+
+ const FunctionType *FTy = F->getFunctionType();
+ std::vector<const Type*> Params;
+
+ // Attributes - Keep track of the parameter attributes for the arguments.
+ SmallVector<AttributeWithIndex, 8> AttributesVec;
+ const AttrListPtr &PAL = F->getAttributes();
+
+ // Add any return attributes.
+ if (Attributes attrs = PAL.getRetAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
+
+ // Skip first argument.
+ Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ ++I;
+ // 0th parameter attribute is reserved for return type.
+ // 1th parameter attribute is for first 1st sret argument.
+ unsigned ParamIndex = 2;
+ while (I != E) {
+ Params.push_back(I->getType());
+ if (Attributes Attrs = PAL.getParamAttributes(ParamIndex))
+ AttributesVec.push_back(AttributeWithIndex::get(ParamIndex - 1, Attrs));
+ ++I;
+ ++ParamIndex;
+ }
+
+ // Add any fn attributes.
+ if (Attributes attrs = PAL.getFnAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
+
+
+ FunctionType *NFTy = FunctionType::get(STy, Params, FTy->isVarArg());
+ Function *NF = Function::Create(NFTy, F->getLinkage());
+ NF->takeName(F);
+ NF->copyAttributesFrom(F);
+ NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end()));
+ F->getParent()->getFunctionList().insert(F, NF);
+ NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+
+ // Replace arguments
+ I = F->arg_begin();
+ E = F->arg_end();
+ Function::arg_iterator NI = NF->arg_begin();
+ ++I;
+ while (I != E) {
+ I->replaceAllUsesWith(NI);
+ NI->takeName(I);
+ ++I;
+ ++NI;
+ }
+
+ return NF;
+}
+
+/// updateCallSites - Update all sites that call F to use NF.
+void SRETPromotion::updateCallSites(Function *F, Function *NF) {
+ CallGraph &CG = getAnalysis<CallGraph>();
+ SmallVector<Value*, 16> Args;
+
+ // Attributes - Keep track of the parameter attributes for the arguments.
+ SmallVector<AttributeWithIndex, 8> ArgAttrsVec;
+
+ while (!F->use_empty()) {
+ CallSite CS = CallSite::get(*F->use_begin());
+ Instruction *Call = CS.getInstruction();
+
+ const AttrListPtr &PAL = F->getAttributes();
+ // Add any return attributes.
+ if (Attributes attrs = PAL.getRetAttributes())
+ ArgAttrsVec.push_back(AttributeWithIndex::get(0, attrs));
+
+ // Copy arguments, however skip first one.
+ CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+ Value *FirstCArg = *AI;
+ ++AI;
+ // 0th parameter attribute is reserved for return type.
+ // 1th parameter attribute is for first 1st sret argument.
+ unsigned ParamIndex = 2;
+ while (AI != AE) {
+ Args.push_back(*AI);
+ if (Attributes Attrs = PAL.getParamAttributes(ParamIndex))
+ ArgAttrsVec.push_back(AttributeWithIndex::get(ParamIndex - 1, Attrs));
+ ++ParamIndex;
+ ++AI;
+ }
+
+ // Add any function attributes.
+ if (Attributes attrs = PAL.getFnAttributes())
+ ArgAttrsVec.push_back(AttributeWithIndex::get(~0, attrs));
+
+ AttrListPtr NewPAL = AttrListPtr::get(ArgAttrsVec.begin(), ArgAttrsVec.end());
+
+ // Build new call instruction.
+ Instruction *New;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args.begin(), Args.end(), "", Call);
+ cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<InvokeInst>(New)->setAttributes(NewPAL);
+ } else {
+ New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
+ cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<CallInst>(New)->setAttributes(NewPAL);
+ if (cast<CallInst>(Call)->isTailCall())
+ cast<CallInst>(New)->setTailCall();
+ }
+ Args.clear();
+ ArgAttrsVec.clear();
+ New->takeName(Call);
+
+ // Update the callgraph to know that the callsite has been transformed.
+ CG[Call->getParent()->getParent()]->replaceCallSite(Call, New);
+
+ // Update all users of sret parameter to extract value using extractvalue.
+ for (Value::use_iterator UI = FirstCArg->use_begin(),
+ UE = FirstCArg->use_end(); UI != UE; ) {
+ User *U2 = *UI++;
+ CallInst *C2 = dyn_cast<CallInst>(U2);
+ if (C2 && (C2 == Call))
+ continue;
+ else if (GetElementPtrInst *UGEP = dyn_cast<GetElementPtrInst>(U2)) {
+ ConstantInt *Idx = dyn_cast<ConstantInt>(UGEP->getOperand(2));
+ assert (Idx && "Unexpected getelementptr index!");
+ Value *GR = ExtractValueInst::Create(New, Idx->getZExtValue(),
+ "evi", UGEP);
+ while(!UGEP->use_empty()) {
+ // isSafeToUpdateAllCallers has checked that all GEP uses are
+ // LoadInsts
+ LoadInst *L = cast<LoadInst>(*UGEP->use_begin());
+ L->replaceAllUsesWith(GR);
+ L->eraseFromParent();
+ }
+ UGEP->eraseFromParent();
+ }
+ else assert( 0 && "Unexpected sret parameter use");
+ }
+ Call->eraseFromParent();
+ }
+}
+
+/// nestedStructType - Return true if STy includes any
+/// other aggregate types
+bool SRETPromotion::nestedStructType(const StructType *STy) {
+ unsigned Num = STy->getNumElements();
+ for (unsigned i = 0; i < Num; i++) {
+ const Type *Ty = STy->getElementType(i);
+ if (!Ty->isSingleValueType() && Ty != Type::VoidTy)
+ return true;
+ }
+ return false;
+}
diff --git a/lib/Transforms/Instrumentation/BlockProfiling.cpp b/lib/Transforms/Instrumentation/BlockProfiling.cpp
new file mode 100644
index 0000000..2bd9809
--- /dev/null
+++ b/lib/Transforms/Instrumentation/BlockProfiling.cpp
@@ -0,0 +1,126 @@
+//===- BlockProfiling.cpp - Insert counters for block profiling -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass instruments the specified program with counters for basic block or
+// function profiling. This is the most basic form of profiling, which can tell
+// which blocks are hot, but cannot reliably detect hot paths through the CFG.
+// Block profiling counts the number of times each basic block executes, and
+// function profiling counts the number of times each function is called.
+//
+// Note that this implementation is very naive. Control equivalent regions of
+// the CFG should not require duplicate counters, but we do put duplicate
+// counters in.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "RSProfiling.h"
+#include "ProfilingUtils.h"
+using namespace llvm;
+
+namespace {
+ class VISIBILITY_HIDDEN FunctionProfiler : public RSProfilers_std {
+ public:
+ static char ID;
+ bool runOnModule(Module &M);
+ };
+}
+
+char FunctionProfiler::ID = 0;
+
+static RegisterPass<FunctionProfiler>
+X("insert-function-profiling",
+ "Insert instrumentation for function profiling");
+static RegisterAnalysisGroup<RSProfilers> XG(X);
+
+ModulePass *llvm::createFunctionProfilerPass() {
+ return new FunctionProfiler();
+}
+
+bool FunctionProfiler::runOnModule(Module &M) {
+ Function *Main = M.getFunction("main");
+ if (Main == 0) {
+ cerr << "WARNING: cannot insert function profiling into a module"
+ << " with no main function!\n";
+ return false; // No main, no instrumentation!
+ }
+
+ unsigned NumFunctions = 0;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!I->isDeclaration())
+ ++NumFunctions;
+
+ const Type *ATy = ArrayType::get(Type::Int32Ty, NumFunctions);
+ GlobalVariable *Counters =
+ new GlobalVariable(ATy, false, GlobalValue::InternalLinkage,
+ Constant::getNullValue(ATy), "FuncProfCounters", &M);
+
+ // Instrument all of the functions...
+ unsigned i = 0;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!I->isDeclaration())
+ // Insert counter at the start of the function
+ IncrementCounterInBlock(&I->getEntryBlock(), i++, Counters);
+
+ // Add the initialization call to main.
+ InsertProfilingInitCall(Main, "llvm_start_func_profiling", Counters);
+ return true;
+}
+
+
+namespace {
+ class BlockProfiler : public RSProfilers_std {
+ bool runOnModule(Module &M);
+ public:
+ static char ID;
+ };
+}
+
+char BlockProfiler::ID = 0;
+static RegisterPass<BlockProfiler>
+Y("insert-block-profiling", "Insert instrumentation for block profiling");
+static RegisterAnalysisGroup<RSProfilers> YG(Y);
+
+ModulePass *llvm::createBlockProfilerPass() { return new BlockProfiler(); }
+
+bool BlockProfiler::runOnModule(Module &M) {
+ Function *Main = M.getFunction("main");
+ if (Main == 0) {
+ cerr << "WARNING: cannot insert block profiling into a module"
+ << " with no main function!\n";
+ return false; // No main, no instrumentation!
+ }
+
+ unsigned NumBlocks = 0;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ NumBlocks += I->size();
+
+ const Type *ATy = ArrayType::get(Type::Int32Ty, NumBlocks);
+ GlobalVariable *Counters =
+ new GlobalVariable(ATy, false, GlobalValue::InternalLinkage,
+ Constant::getNullValue(ATy), "BlockProfCounters", &M);
+
+ // Instrument all of the blocks...
+ unsigned i = 0;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ for (Function::iterator BB = I->begin(), E = I->end(); BB != E; ++BB)
+ // Insert counter at the start of the block
+ IncrementCounterInBlock(BB, i++, Counters);
+
+ // Add the initialization call to main.
+ InsertProfilingInitCall(Main, "llvm_start_block_profiling", Counters);
+ return true;
+}
+
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
new file mode 100644
index 0000000..d7c518d
--- /dev/null
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_llvm_library(LLVMInstrumentation
+ BlockProfiling.cpp
+ EdgeProfiling.cpp
+ ProfilingUtils.cpp
+ RSProfiling.cpp
+ )
diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
new file mode 100644
index 0000000..0831f3b
--- /dev/null
+++ b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
@@ -0,0 +1,101 @@
+//===- EdgeProfiling.cpp - Insert counters for edge profiling -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass instruments the specified program with counters for edge profiling.
+// Edge profiling can give a reasonable approximation of the hot paths through a
+// program, and is used for a wide variety of program transformations.
+//
+// Note that this implementation is very naive. We insert a counter for *every*
+// edge in the program, instead of using control flow information to prune the
+// number of counters inserted.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ProfilingUtils.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include <set>
+using namespace llvm;
+
+namespace {
+ class VISIBILITY_HIDDEN EdgeProfiler : public ModulePass {
+ bool runOnModule(Module &M);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ EdgeProfiler() : ModulePass(&ID) {}
+ };
+}
+
+char EdgeProfiler::ID = 0;
+static RegisterPass<EdgeProfiler>
+X("insert-edge-profiling", "Insert instrumentation for edge profiling");
+
+ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); }
+
+bool EdgeProfiler::runOnModule(Module &M) {
+ Function *Main = M.getFunction("main");
+ if (Main == 0) {
+ cerr << "WARNING: cannot insert edge profiling into a module"
+ << " with no main function!\n";
+ return false; // No main, no instrumentation!
+ }
+
+ std::set<BasicBlock*> BlocksToInstrument;
+ unsigned NumEdges = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ // Keep track of which blocks need to be instrumented. We don't want to
+ // instrument blocks that are added as the result of breaking critical
+ // edges!
+ BlocksToInstrument.insert(BB);
+ NumEdges += BB->getTerminator()->getNumSuccessors();
+ }
+
+ const Type *ATy = ArrayType::get(Type::Int32Ty, NumEdges);
+ GlobalVariable *Counters =
+ new GlobalVariable(ATy, false, GlobalValue::InternalLinkage,
+ Constant::getNullValue(ATy), "EdgeProfCounters", &M);
+
+ // Instrument all of the edges...
+ unsigned i = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ if (BlocksToInstrument.count(BB)) { // Don't instrument inserted blocks
+ // Okay, we have to add a counter of each outgoing edge. If the
+ // outgoing edge is not critical don't split it, just insert the counter
+ // in the source or destination of the edge.
+ TerminatorInst *TI = BB->getTerminator();
+ for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+ // If the edge is critical, split it.
+ SplitCriticalEdge(TI, s, this);
+
+ // Okay, we are guaranteed that the edge is no longer critical. If we
+ // only have a single successor, insert the counter in this block,
+ // otherwise insert it in the successor block.
+ if (TI->getNumSuccessors() == 1) {
+ // Insert counter at the start of the block
+ IncrementCounterInBlock(BB, i++, Counters);
+ } else {
+ // Insert counter at the start of the block
+ IncrementCounterInBlock(TI->getSuccessor(s), i++, Counters);
+ }
+ }
+ }
+
+ // Add the initialization call to main.
+ InsertProfilingInitCall(Main, "llvm_start_edge_profiling", Counters);
+ return true;
+}
+
diff --git a/lib/Transforms/Instrumentation/Makefile b/lib/Transforms/Instrumentation/Makefile
new file mode 100644
index 0000000..6cbc7a9
--- /dev/null
+++ b/lib/Transforms/Instrumentation/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Transforms/Instrumentation/Makefile -------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMInstrumentation
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
new file mode 100644
index 0000000..48071f1
--- /dev/null
+++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -0,0 +1,120 @@
+//===- ProfilingUtils.cpp - Helper functions shared by profilers ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a few helper functions which are used by profile
+// instrumentation code to instrument the code. This allows the profiler pass
+// to worry about *what* to insert, and these functions take care of *how* to do
+// it.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ProfilingUtils.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+
+void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
+ GlobalValue *Array) {
+ const Type *ArgVTy =
+ PointerType::getUnqual(PointerType::getUnqual(Type::Int8Ty));
+ const PointerType *UIntPtr = PointerType::getUnqual(Type::Int32Ty);
+ Module &M = *MainFn->getParent();
+ Constant *InitFn = M.getOrInsertFunction(FnName, Type::Int32Ty, Type::Int32Ty,
+ ArgVTy, UIntPtr, Type::Int32Ty,
+ (Type *)0);
+
+ // This could force argc and argv into programs that wouldn't otherwise have
+ // them, but instead we just pass null values in.
+ std::vector<Value*> Args(4);
+ Args[0] = Constant::getNullValue(Type::Int32Ty);
+ Args[1] = Constant::getNullValue(ArgVTy);
+
+ // Skip over any allocas in the entry block.
+ BasicBlock *Entry = MainFn->begin();
+ BasicBlock::iterator InsertPos = Entry->begin();
+ while (isa<AllocaInst>(InsertPos)) ++InsertPos;
+
+ std::vector<Constant*> GEPIndices(2, Constant::getNullValue(Type::Int32Ty));
+ unsigned NumElements = 0;
+ if (Array) {
+ Args[2] = ConstantExpr::getGetElementPtr(Array, &GEPIndices[0],
+ GEPIndices.size());
+ NumElements =
+ cast<ArrayType>(Array->getType()->getElementType())->getNumElements();
+ } else {
+ // If this profiling instrumentation doesn't have a constant array, just
+ // pass null.
+ Args[2] = ConstantPointerNull::get(UIntPtr);
+ }
+ Args[3] = ConstantInt::get(Type::Int32Ty, NumElements);
+
+ Instruction *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(),
+ "newargc", InsertPos);
+
+ // If argc or argv are not available in main, just pass null values in.
+ Function::arg_iterator AI;
+ switch (MainFn->arg_size()) {
+ default:
+ case 2:
+ AI = MainFn->arg_begin(); ++AI;
+ if (AI->getType() != ArgVTy) {
+ Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy,
+ false);
+ InitCall->setOperand(2,
+ CastInst::Create(opcode, AI, ArgVTy, "argv.cast", InitCall));
+ } else {
+ InitCall->setOperand(2, AI);
+ }
+ /* FALL THROUGH */
+
+ case 1:
+ AI = MainFn->arg_begin();
+ // If the program looked at argc, have it look at the return value of the
+ // init call instead.
+ if (AI->getType() != Type::Int32Ty) {
+ Instruction::CastOps opcode;
+ if (!AI->use_empty()) {
+ opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true);
+ AI->replaceAllUsesWith(
+ CastInst::Create(opcode, InitCall, AI->getType(), "", InsertPos));
+ }
+ opcode = CastInst::getCastOpcode(AI, true, Type::Int32Ty, true);
+ InitCall->setOperand(1,
+ CastInst::Create(opcode, AI, Type::Int32Ty, "argc.cast", InitCall));
+ } else {
+ AI->replaceAllUsesWith(InitCall);
+ InitCall->setOperand(1, AI);
+ }
+
+ case 0: break;
+ }
+}
+
+void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
+ GlobalValue *CounterArray) {
+ // Insert the increment after any alloca or PHI instructions...
+ BasicBlock::iterator InsertPos = BB->getFirstNonPHI();
+ while (isa<AllocaInst>(InsertPos))
+ ++InsertPos;
+
+ // Create the getelementptr constant expression
+ std::vector<Constant*> Indices(2);
+ Indices[0] = Constant::getNullValue(Type::Int32Ty);
+ Indices[1] = ConstantInt::get(Type::Int32Ty, CounterNum);
+ Constant *ElementPtr =
+ ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], Indices.size());
+
+ // Load, increment and store the value back.
+ Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos);
+ Value *NewVal = BinaryOperator::Create(Instruction::Add, OldVal,
+ ConstantInt::get(Type::Int32Ty, 1),
+ "NewFuncCounter", InsertPos);
+ new StoreInst(NewVal, ElementPtr, InsertPos);
+}
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.h b/lib/Transforms/Instrumentation/ProfilingUtils.h
new file mode 100644
index 0000000..94efffe
--- /dev/null
+++ b/lib/Transforms/Instrumentation/ProfilingUtils.h
@@ -0,0 +1,31 @@
+//===- ProfilingUtils.h - Helper functions shared by profilers --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a few helper functions which are used by profile
+// instrumentation code to instrument the code. This allows the profiler pass
+// to worry about *what* to insert, and these functions take care of *how* to do
+// it.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PROFILINGUTILS_H
+#define PROFILINGUTILS_H
+
+namespace llvm {
+ class Function;
+ class GlobalValue;
+ class BasicBlock;
+
+ void InsertProfilingInitCall(Function *MainFn, const char *FnName,
+ GlobalValue *Arr = 0);
+ void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
+ GlobalValue *CounterArray);
+}
+
+#endif
diff --git a/lib/Transforms/Instrumentation/RSProfiling.cpp b/lib/Transforms/Instrumentation/RSProfiling.cpp
new file mode 100644
index 0000000..c6cf4df
--- /dev/null
+++ b/lib/Transforms/Instrumentation/RSProfiling.cpp
@@ -0,0 +1,653 @@
+//===- RSProfiling.cpp - Various profiling using random sampling ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// These passes implement a random sampling based profiling. Different methods
+// of choosing when to sample are supported, as well as different types of
+// profiling. This is done as two passes. The first is a sequence of profiling
+// passes which insert profiling into the program, and remember what they
+// inserted.
+//
+// The second stage duplicates all instructions in a function, ignoring the
+// profiling code, then connects the two versions togeather at the entry and at
+// backedges. At each connection point a choice is made as to whether to jump
+// to the profiled code (take a sample) or execute the unprofiled code.
+//
+// It is highly recommended that after this pass one runs mem2reg and adce
+// (instcombine load-vn gdce dse also are good to run afterwards)
+//
+// This design is intended to make the profiling passes independent of the RS
+// framework, but any profiling pass that implements the RSProfiling interface
+// is compatible with the rs framework (and thus can be sampled)
+//
+// TODO: obviously the block and function profiling are almost identical to the
+// existing ones, so they can be unified (esp since these passes are valid
+// without the rs framework).
+// TODO: Fix choice code so that frequency is not hard coded
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "RSProfiling.h"
+#include <set>
+#include <map>
+#include <queue>
+using namespace llvm;
+
+namespace {
+ enum RandomMeth {
+ GBV, GBVO, HOSTCC
+ };
+}
+
+static cl::opt<RandomMeth> RandomMethod("profile-randomness",
+ cl::desc("How to randomly choose to profile:"),
+ cl::values(
+ clEnumValN(GBV, "global", "global counter"),
+ clEnumValN(GBVO, "ra_global",
+ "register allocated global counter"),
+ clEnumValN(HOSTCC, "rdcc", "cycle counter"),
+ clEnumValEnd));
+
+namespace {
+ /// NullProfilerRS - The basic profiler that does nothing. It is the default
+ /// profiler and thus terminates RSProfiler chains. It is useful for
+ /// measuring framework overhead
+ class VISIBILITY_HIDDEN NullProfilerRS : public RSProfilers {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ bool isProfiling(Value* v) {
+ return false;
+ }
+ bool runOnModule(Module &M) {
+ return false;
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+static RegisterAnalysisGroup<RSProfilers> A("Profiling passes");
+static RegisterPass<NullProfilerRS> NP("insert-null-profiling-rs",
+ "Measure profiling framework overhead");
+static RegisterAnalysisGroup<RSProfilers, true> NPT(NP);
+
+namespace {
+ /// Chooser - Something that chooses when to make a sample of the profiled code
+ class VISIBILITY_HIDDEN Chooser {
+ public:
+ /// ProcessChoicePoint - is called for each basic block inserted to choose
+ /// between normal and sample code
+ virtual void ProcessChoicePoint(BasicBlock*) = 0;
+ /// PrepFunction - is called once per function before other work is done.
+ /// This gives the opertunity to insert new allocas and such.
+ virtual void PrepFunction(Function*) = 0;
+ virtual ~Chooser() {}
+ };
+
+ //Things that implement sampling policies
+ //A global value that is read-mod-stored to choose when to sample.
+ //A sample is taken when the global counter hits 0
+ class VISIBILITY_HIDDEN GlobalRandomCounter : public Chooser {
+ GlobalVariable* Counter;
+ Value* ResetValue;
+ const Type* T;
+ public:
+ GlobalRandomCounter(Module& M, const Type* t, uint64_t resetval);
+ virtual ~GlobalRandomCounter();
+ virtual void PrepFunction(Function* F);
+ virtual void ProcessChoicePoint(BasicBlock* bb);
+ };
+
+ //Same is GRC, but allow register allocation of the global counter
+ class VISIBILITY_HIDDEN GlobalRandomCounterOpt : public Chooser {
+ GlobalVariable* Counter;
+ Value* ResetValue;
+ AllocaInst* AI;
+ const Type* T;
+ public:
+ GlobalRandomCounterOpt(Module& M, const Type* t, uint64_t resetval);
+ virtual ~GlobalRandomCounterOpt();
+ virtual void PrepFunction(Function* F);
+ virtual void ProcessChoicePoint(BasicBlock* bb);
+ };
+
+ //Use the cycle counter intrinsic as a source of pseudo randomness when
+ //deciding when to sample.
+ class VISIBILITY_HIDDEN CycleCounter : public Chooser {
+ uint64_t rm;
+ Constant *F;
+ public:
+ CycleCounter(Module& m, uint64_t resetmask);
+ virtual ~CycleCounter();
+ virtual void PrepFunction(Function* F);
+ virtual void ProcessChoicePoint(BasicBlock* bb);
+ };
+
+ /// ProfilerRS - Insert the random sampling framework
+ struct VISIBILITY_HIDDEN ProfilerRS : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ ProfilerRS() : FunctionPass(&ID) {}
+
+ std::map<Value*, Value*> TransCache;
+ std::set<BasicBlock*> ChoicePoints;
+ Chooser* c;
+
+ //Translate and duplicate values for the new profile free version of stuff
+ Value* Translate(Value* v);
+ //Duplicate an entire function (with out profiling)
+ void Duplicate(Function& F, RSProfilers& LI);
+ //Called once for each backedge, handle the insertion of choice points and
+ //the interconection of the two versions of the code
+ void ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F);
+ bool runOnFunction(Function& F);
+ bool doInitialization(Module &M);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ };
+}
+
+static RegisterPass<ProfilerRS>
+X("insert-rs-profiling-framework",
+ "Insert random sampling instrumentation framework");
+
+char RSProfilers::ID = 0;
+char NullProfilerRS::ID = 0;
+char ProfilerRS::ID = 0;
+
+//Local utilities
+static void ReplacePhiPred(BasicBlock* btarget,
+ BasicBlock* bold, BasicBlock* bnew);
+
+static void CollapsePhi(BasicBlock* btarget, BasicBlock* bsrc);
+
+template<class T>
+static void recBackEdge(BasicBlock* bb, T& BackEdges,
+ std::map<BasicBlock*, int>& color,
+ std::map<BasicBlock*, int>& depth,
+ std::map<BasicBlock*, int>& finish,
+ int& time);
+
+//find the back edges and where they go to
+template<class T>
+static void getBackEdges(Function& F, T& BackEdges);
+
+
+///////////////////////////////////////
+// Methods of choosing when to profile
+///////////////////////////////////////
+
+GlobalRandomCounter::GlobalRandomCounter(Module& M, const Type* t,
+ uint64_t resetval) : T(t) {
+ ConstantInt* Init = ConstantInt::get(T, resetval);
+ ResetValue = Init;
+ Counter = new GlobalVariable(T, false, GlobalValue::InternalLinkage,
+ Init, "RandomSteeringCounter", &M);
+}
+
+GlobalRandomCounter::~GlobalRandomCounter() {}
+
+void GlobalRandomCounter::PrepFunction(Function* F) {}
+
+void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) {
+ BranchInst* t = cast<BranchInst>(bb->getTerminator());
+
+ //decrement counter
+ LoadInst* l = new LoadInst(Counter, "counter", t);
+
+ ICmpInst* s = new ICmpInst(ICmpInst::ICMP_EQ, l, ConstantInt::get(T, 0),
+ "countercc", t);
+
+ Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1),
+ "counternew", t);
+ new StoreInst(nv, Counter, t);
+ t->setCondition(s);
+
+ //reset counter
+ BasicBlock* oldnext = t->getSuccessor(0);
+ BasicBlock* resetblock = BasicBlock::Create("reset", oldnext->getParent(),
+ oldnext);
+ TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock);
+ t->setSuccessor(0, resetblock);
+ new StoreInst(ResetValue, Counter, t2);
+ ReplacePhiPred(oldnext, bb, resetblock);
+}
+
+GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const Type* t,
+ uint64_t resetval)
+ : AI(0), T(t) {
+ ConstantInt* Init = ConstantInt::get(T, resetval);
+ ResetValue = Init;
+ Counter = new GlobalVariable(T, false, GlobalValue::InternalLinkage,
+ Init, "RandomSteeringCounter", &M);
+}
+
+GlobalRandomCounterOpt::~GlobalRandomCounterOpt() {}
+
+void GlobalRandomCounterOpt::PrepFunction(Function* F) {
+ //make a local temporary to cache the global
+ BasicBlock& bb = F->getEntryBlock();
+ BasicBlock::iterator InsertPt = bb.begin();
+ AI = new AllocaInst(T, 0, "localcounter", InsertPt);
+ LoadInst* l = new LoadInst(Counter, "counterload", InsertPt);
+ new StoreInst(l, AI, InsertPt);
+
+ //modify all functions and return values to restore the local variable to/from
+ //the global variable
+ for(Function::iterator fib = F->begin(), fie = F->end();
+ fib != fie; ++fib)
+ for(BasicBlock::iterator bib = fib->begin(), bie = fib->end();
+ bib != bie; ++bib)
+ if (isa<CallInst>(bib)) {
+ LoadInst* l = new LoadInst(AI, "counter", bib);
+ new StoreInst(l, Counter, bib);
+ l = new LoadInst(Counter, "counter", ++bib);
+ new StoreInst(l, AI, bib--);
+ } else if (isa<InvokeInst>(bib)) {
+ LoadInst* l = new LoadInst(AI, "counter", bib);
+ new StoreInst(l, Counter, bib);
+
+ BasicBlock* bb = cast<InvokeInst>(bib)->getNormalDest();
+ BasicBlock::iterator i = bb->getFirstNonPHI();
+ l = new LoadInst(Counter, "counter", i);
+
+ bb = cast<InvokeInst>(bib)->getUnwindDest();
+ i = bb->getFirstNonPHI();
+ l = new LoadInst(Counter, "counter", i);
+ new StoreInst(l, AI, i);
+ } else if (isa<UnwindInst>(&*bib) || isa<ReturnInst>(&*bib)) {
+ LoadInst* l = new LoadInst(AI, "counter", bib);
+ new StoreInst(l, Counter, bib);
+ }
+}
+
+void GlobalRandomCounterOpt::ProcessChoicePoint(BasicBlock* bb) {
+ BranchInst* t = cast<BranchInst>(bb->getTerminator());
+
+ //decrement counter
+ LoadInst* l = new LoadInst(AI, "counter", t);
+
+ ICmpInst* s = new ICmpInst(ICmpInst::ICMP_EQ, l, ConstantInt::get(T, 0),
+ "countercc", t);
+
+ Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1),
+ "counternew", t);
+ new StoreInst(nv, AI, t);
+ t->setCondition(s);
+
+ //reset counter
+ BasicBlock* oldnext = t->getSuccessor(0);
+ BasicBlock* resetblock = BasicBlock::Create("reset", oldnext->getParent(),
+ oldnext);
+ TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock);
+ t->setSuccessor(0, resetblock);
+ new StoreInst(ResetValue, AI, t2);
+ ReplacePhiPred(oldnext, bb, resetblock);
+}
+
+
+CycleCounter::CycleCounter(Module& m, uint64_t resetmask) : rm(resetmask) {
+ F = Intrinsic::getDeclaration(&m, Intrinsic::readcyclecounter);
+}
+
+CycleCounter::~CycleCounter() {}
+
+void CycleCounter::PrepFunction(Function* F) {}
+
+void CycleCounter::ProcessChoicePoint(BasicBlock* bb) {
+ BranchInst* t = cast<BranchInst>(bb->getTerminator());
+
+ CallInst* c = CallInst::Create(F, "rdcc", t);
+ BinaryOperator* b =
+ BinaryOperator::CreateAnd(c, ConstantInt::get(Type::Int64Ty, rm),
+ "mrdcc", t);
+
+ ICmpInst *s = new ICmpInst(ICmpInst::ICMP_EQ, b,
+ ConstantInt::get(Type::Int64Ty, 0),
+ "mrdccc", t);
+
+ t->setCondition(s);
+}
+
+///////////////////////////////////////
+// Profiling:
+///////////////////////////////////////
+bool RSProfilers_std::isProfiling(Value* v) {
+ if (profcode.find(v) != profcode.end())
+ return true;
+ //else
+ RSProfilers& LI = getAnalysis<RSProfilers>();
+ return LI.isProfiling(v);
+}
+
+void RSProfilers_std::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
+ GlobalValue *CounterArray) {
+ // Insert the increment after any alloca or PHI instructions...
+ BasicBlock::iterator InsertPos = BB->getFirstNonPHI();
+ while (isa<AllocaInst>(InsertPos))
+ ++InsertPos;
+
+ // Create the getelementptr constant expression
+ std::vector<Constant*> Indices(2);
+ Indices[0] = Constant::getNullValue(Type::Int32Ty);
+ Indices[1] = ConstantInt::get(Type::Int32Ty, CounterNum);
+ Constant *ElementPtr = ConstantExpr::getGetElementPtr(CounterArray,
+ &Indices[0], 2);
+
+ // Load, increment and store the value back.
+ Value *OldVal = new LoadInst(ElementPtr, "OldCounter", InsertPos);
+ profcode.insert(OldVal);
+ Value *NewVal = BinaryOperator::CreateAdd(OldVal,
+ ConstantInt::get(Type::Int32Ty, 1),
+ "NewCounter", InsertPos);
+ profcode.insert(NewVal);
+ profcode.insert(new StoreInst(NewVal, ElementPtr, InsertPos));
+}
+
+void RSProfilers_std::getAnalysisUsage(AnalysisUsage &AU) const {
+ //grab any outstanding profiler, or get the null one
+ AU.addRequired<RSProfilers>();
+}
+
+///////////////////////////////////////
+// RS Framework
+///////////////////////////////////////
+
+Value* ProfilerRS::Translate(Value* v) {
+ if(TransCache[v])
+ return TransCache[v];
+
+ if (BasicBlock* bb = dyn_cast<BasicBlock>(v)) {
+ if (bb == &bb->getParent()->getEntryBlock())
+ TransCache[bb] = bb; //don't translate entry block
+ else
+ TransCache[bb] = BasicBlock::Create("dup_" + bb->getName(),
+ bb->getParent(), NULL);
+ return TransCache[bb];
+ } else if (Instruction* i = dyn_cast<Instruction>(v)) {
+ //we have already translated this
+ //do not translate entry block allocas
+ if(&i->getParent()->getParent()->getEntryBlock() == i->getParent()) {
+ TransCache[i] = i;
+ return i;
+ } else {
+ //translate this
+ Instruction* i2 = i->clone();
+ if (i->hasName())
+ i2->setName("dup_" + i->getName());
+ TransCache[i] = i2;
+ //NumNewInst++;
+ for (unsigned x = 0; x < i2->getNumOperands(); ++x)
+ i2->setOperand(x, Translate(i2->getOperand(x)));
+ return i2;
+ }
+ } else if (isa<Function>(v) || isa<Constant>(v) || isa<Argument>(v)) {
+ TransCache[v] = v;
+ return v;
+ }
+ assert(0 && "Value not handled");
+ return 0;
+}
+
+void ProfilerRS::Duplicate(Function& F, RSProfilers& LI)
+{
+ //perform a breadth first search, building up a duplicate of the code
+ std::queue<BasicBlock*> worklist;
+ std::set<BasicBlock*> seen;
+
+ //This loop ensures proper BB order, to help performance
+ for (Function::iterator fib = F.begin(), fie = F.end(); fib != fie; ++fib)
+ worklist.push(fib);
+ while (!worklist.empty()) {
+ Translate(worklist.front());
+ worklist.pop();
+ }
+
+ //remember than reg2mem created a new entry block we don't want to duplicate
+ worklist.push(F.getEntryBlock().getTerminator()->getSuccessor(0));
+ seen.insert(&F.getEntryBlock());
+
+ while (!worklist.empty()) {
+ BasicBlock* bb = worklist.front();
+ worklist.pop();
+ if(seen.find(bb) == seen.end()) {
+ BasicBlock* bbtarget = cast<BasicBlock>(Translate(bb));
+ BasicBlock::InstListType& instlist = bbtarget->getInstList();
+ for (BasicBlock::iterator iib = bb->begin(), iie = bb->end();
+ iib != iie; ++iib) {
+ //NumOldInst++;
+ if (!LI.isProfiling(&*iib)) {
+ Instruction* i = cast<Instruction>(Translate(iib));
+ instlist.insert(bbtarget->end(), i);
+ }
+ }
+ //updated search state;
+ seen.insert(bb);
+ TerminatorInst* ti = bb->getTerminator();
+ for (unsigned x = 0; x < ti->getNumSuccessors(); ++x) {
+ BasicBlock* bbs = ti->getSuccessor(x);
+ if (seen.find(bbs) == seen.end()) {
+ worklist.push(bbs);
+ }
+ }
+ }
+ }
+}
+
+void ProfilerRS::ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F) {
+ //given a backedge from B -> A, and translations A' and B',
+ //a: insert C and C'
+ //b: add branches in C to A and A' and in C' to A and A'
+ //c: mod terminators@B, replace A with C
+ //d: mod terminators@B', replace A' with C'
+ //e: mod phis@A for pred B to be pred C
+ // if multiple entries, simplify to one
+ //f: mod phis@A' for pred B' to be pred C'
+ // if multiple entries, simplify to one
+ //g: for all phis@A with pred C using x
+ // add in edge from C' using x'
+ // add in edge from C using x in A'
+
+ //a:
+ Function::iterator BBN = src; ++BBN;
+ BasicBlock* bbC = BasicBlock::Create("choice", &F, BBN);
+ //ChoicePoints.insert(bbC);
+ BBN = cast<BasicBlock>(Translate(src));
+ BasicBlock* bbCp = BasicBlock::Create("choice", &F, ++BBN);
+ ChoicePoints.insert(bbCp);
+
+ //b:
+ BranchInst::Create(cast<BasicBlock>(Translate(dst)), bbC);
+ BranchInst::Create(dst, cast<BasicBlock>(Translate(dst)),
+ ConstantInt::get(Type::Int1Ty, true), bbCp);
+ //c:
+ {
+ TerminatorInst* iB = src->getTerminator();
+ for (unsigned x = 0; x < iB->getNumSuccessors(); ++x)
+ if (iB->getSuccessor(x) == dst)
+ iB->setSuccessor(x, bbC);
+ }
+ //d:
+ {
+ TerminatorInst* iBp = cast<TerminatorInst>(Translate(src->getTerminator()));
+ for (unsigned x = 0; x < iBp->getNumSuccessors(); ++x)
+ if (iBp->getSuccessor(x) == cast<BasicBlock>(Translate(dst)))
+ iBp->setSuccessor(x, bbCp);
+ }
+ //e:
+ ReplacePhiPred(dst, src, bbC);
+ //src could be a switch, in which case we are replacing several edges with one
+ //thus collapse those edges int the Phi
+ CollapsePhi(dst, bbC);
+ //f:
+ ReplacePhiPred(cast<BasicBlock>(Translate(dst)),
+ cast<BasicBlock>(Translate(src)),bbCp);
+ CollapsePhi(cast<BasicBlock>(Translate(dst)), bbCp);
+ //g:
+ for(BasicBlock::iterator ib = dst->begin(), ie = dst->end(); ib != ie;
+ ++ib)
+ if (PHINode* phi = dyn_cast<PHINode>(&*ib)) {
+ for(unsigned x = 0; x < phi->getNumIncomingValues(); ++x)
+ if(bbC == phi->getIncomingBlock(x)) {
+ phi->addIncoming(Translate(phi->getIncomingValue(x)), bbCp);
+ cast<PHINode>(Translate(phi))->addIncoming(phi->getIncomingValue(x),
+ bbC);
+ }
+ phi->removeIncomingValue(bbC);
+ }
+}
+
+bool ProfilerRS::runOnFunction(Function& F) {
+ if (!F.isDeclaration()) {
+ std::set<std::pair<BasicBlock*, BasicBlock*> > BackEdges;
+ RSProfilers& LI = getAnalysis<RSProfilers>();
+
+ getBackEdges(F, BackEdges);
+ Duplicate(F, LI);
+ //assume that stuff worked. now connect the duplicated basic blocks
+ //with the originals in such a way as to preserve ssa. yuk!
+ for (std::set<std::pair<BasicBlock*, BasicBlock*> >::iterator
+ ib = BackEdges.begin(), ie = BackEdges.end(); ib != ie; ++ib)
+ ProcessBackEdge(ib->first, ib->second, F);
+
+ //oh, and add the edge from the reg2mem created entry node to the
+ //duplicated second node
+ TerminatorInst* T = F.getEntryBlock().getTerminator();
+ ReplaceInstWithInst(T, BranchInst::Create(T->getSuccessor(0),
+ cast<BasicBlock>(
+ Translate(T->getSuccessor(0))),
+ ConstantInt::get(Type::Int1Ty,
+ true)));
+
+ //do whatever is needed now that the function is duplicated
+ c->PrepFunction(&F);
+
+ //add entry node to choice points
+ ChoicePoints.insert(&F.getEntryBlock());
+
+ for (std::set<BasicBlock*>::iterator
+ ii = ChoicePoints.begin(), ie = ChoicePoints.end(); ii != ie; ++ii)
+ c->ProcessChoicePoint(*ii);
+
+ ChoicePoints.clear();
+ TransCache.clear();
+
+ return true;
+ }
+ return false;
+}
+
+bool ProfilerRS::doInitialization(Module &M) {
+ switch (RandomMethod) {
+ case GBV:
+ c = new GlobalRandomCounter(M, Type::Int32Ty, (1 << 14) - 1);
+ break;
+ case GBVO:
+ c = new GlobalRandomCounterOpt(M, Type::Int32Ty, (1 << 14) - 1);
+ break;
+ case HOSTCC:
+ c = new CycleCounter(M, (1 << 14) - 1);
+ break;
+ };
+ return true;
+}
+
+void ProfilerRS::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<RSProfilers>();
+ AU.addRequiredID(DemoteRegisterToMemoryID);
+}
+
+///////////////////////////////////////
+// Utilities:
+///////////////////////////////////////
+static void ReplacePhiPred(BasicBlock* btarget,
+ BasicBlock* bold, BasicBlock* bnew) {
+ for(BasicBlock::iterator ib = btarget->begin(), ie = btarget->end();
+ ib != ie; ++ib)
+ if (PHINode* phi = dyn_cast<PHINode>(&*ib)) {
+ for(unsigned x = 0; x < phi->getNumIncomingValues(); ++x)
+ if(bold == phi->getIncomingBlock(x))
+ phi->setIncomingBlock(x, bnew);
+ }
+}
+
+static void CollapsePhi(BasicBlock* btarget, BasicBlock* bsrc) {
+ for(BasicBlock::iterator ib = btarget->begin(), ie = btarget->end();
+ ib != ie; ++ib)
+ if (PHINode* phi = dyn_cast<PHINode>(&*ib)) {
+ std::map<BasicBlock*, Value*> counter;
+ for(unsigned i = 0; i < phi->getNumIncomingValues(); ) {
+ if (counter[phi->getIncomingBlock(i)]) {
+ assert(phi->getIncomingValue(i) == counter[phi->getIncomingBlock(i)]);
+ phi->removeIncomingValue(i, false);
+ } else {
+ counter[phi->getIncomingBlock(i)] = phi->getIncomingValue(i);
+ ++i;
+ }
+ }
+ }
+}
+
+template<class T>
+static void recBackEdge(BasicBlock* bb, T& BackEdges,
+ std::map<BasicBlock*, int>& color,
+ std::map<BasicBlock*, int>& depth,
+ std::map<BasicBlock*, int>& finish,
+ int& time)
+{
+ color[bb] = 1;
+ ++time;
+ depth[bb] = time;
+ TerminatorInst* t= bb->getTerminator();
+ for(unsigned i = 0; i < t->getNumSuccessors(); ++i) {
+ BasicBlock* bbnew = t->getSuccessor(i);
+ if (color[bbnew] == 0)
+ recBackEdge(bbnew, BackEdges, color, depth, finish, time);
+ else if (color[bbnew] == 1) {
+ BackEdges.insert(std::make_pair(bb, bbnew));
+ //NumBackEdges++;
+ }
+ }
+ color[bb] = 2;
+ ++time;
+ finish[bb] = time;
+}
+
+
+
+//find the back edges and where they go to
+template<class T>
+static void getBackEdges(Function& F, T& BackEdges) {
+ std::map<BasicBlock*, int> color;
+ std::map<BasicBlock*, int> depth;
+ std::map<BasicBlock*, int> finish;
+ int time = 0;
+ recBackEdge(&F.getEntryBlock(), BackEdges, color, depth, finish, time);
+ DOUT << F.getName() << " " << BackEdges.size() << "\n";
+}
+
+
+//Creation functions
+ModulePass* llvm::createNullProfilerRSPass() {
+ return new NullProfilerRS();
+}
+
+FunctionPass* llvm::createRSProfilingPass() {
+ return new ProfilerRS();
+}
diff --git a/lib/Transforms/Instrumentation/RSProfiling.h b/lib/Transforms/Instrumentation/RSProfiling.h
new file mode 100644
index 0000000..8bbe7c7
--- /dev/null
+++ b/lib/Transforms/Instrumentation/RSProfiling.h
@@ -0,0 +1,31 @@
+//===- RSProfiling.h - Various profiling using random sampling ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// See notes in RSProfiling.cpp
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/RSProfiling.h"
+#include <set>
+
+namespace llvm {
+ /// RSProfilers_std - a simple support class for profilers that handles most
+ /// of the work of chaining and tracking inserted code.
+ struct RSProfilers_std : public RSProfilers {
+ static char ID;
+ std::set<Value*> profcode;
+ // Lookup up values in profcode
+ virtual bool isProfiling(Value* v);
+ // handles required chaining
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ // places counter updates in basic blocks and recordes added instructions in
+ // profcode
+ void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
+ GlobalValue *CounterArray);
+ };
+}
diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile
new file mode 100644
index 0000000..5fe1eeb
--- /dev/null
+++ b/lib/Transforms/Makefile
@@ -0,0 +1,20 @@
+##===- lib/Transforms/Makefile -----------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+PARALLEL_DIRS = Utils Instrumentation Scalar IPO Hello
+
+include $(LEVEL)/Makefile.config
+
+# No support for plugins on windows targets
+ifeq ($(OS), $(filter $(OS), Cygwin MingW))
+ PARALLEL_DIRS := $(filter-out Hello, $(PARALLEL_DIRS))
+endif
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
new file mode 100644
index 0000000..9c55f66
--- /dev/null
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -0,0 +1,98 @@
+//===- DCE.cpp - Code to perform dead code elimination --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Aggressive Dead Code Elimination pass. This pass
+// optimistically assumes that all instructions are dead until proven otherwise,
+// allowing it to eliminate dead computations that other DCE passes do not
+// catch, particularly involving loop computations.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "adce"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+STATISTIC(NumRemoved, "Number of instructions removed");
+
+namespace {
+ struct VISIBILITY_HIDDEN ADCE : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ ADCE() : FunctionPass(&ID) {}
+
+ virtual bool runOnFunction(Function& F);
+
+ virtual void getAnalysisUsage(AnalysisUsage& AU) const {
+ AU.setPreservesCFG();
+ }
+
+ };
+}
+
+char ADCE::ID = 0;
+static RegisterPass<ADCE> X("adce", "Aggressive Dead Code Elimination");
+
+bool ADCE::runOnFunction(Function& F) {
+ SmallPtrSet<Instruction*, 128> alive;
+ SmallVector<Instruction*, 128> worklist;
+
+ // Collect the set of "root" instructions that are known live.
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+ if (isa<TerminatorInst>(I.getInstructionIterator()) ||
+ isa<DbgInfoIntrinsic>(I.getInstructionIterator()) ||
+ I->mayHaveSideEffects()) {
+ alive.insert(I.getInstructionIterator());
+ worklist.push_back(I.getInstructionIterator());
+ }
+
+ // Propagate liveness backwards to operands.
+ while (!worklist.empty()) {
+ Instruction* curr = worklist.back();
+ worklist.pop_back();
+
+ for (Instruction::op_iterator OI = curr->op_begin(), OE = curr->op_end();
+ OI != OE; ++OI)
+ if (Instruction* Inst = dyn_cast<Instruction>(OI))
+ if (alive.insert(Inst))
+ worklist.push_back(Inst);
+ }
+
+ // The inverse of the live set is the dead set. These are those instructions
+ // which have no side effects and do not influence the control flow or return
+ // value of the function, and may therefore be deleted safely.
+ // NOTE: We reuse the worklist vector here for memory efficiency.
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+ if (!alive.count(I.getInstructionIterator())) {
+ worklist.push_back(I.getInstructionIterator());
+ I->dropAllReferences();
+ }
+
+ for (SmallVector<Instruction*, 1024>::iterator I = worklist.begin(),
+ E = worklist.end(); I != E; ++I) {
+ NumRemoved++;
+ (*I)->eraseFromParent();
+ }
+
+ return !worklist.empty();
+}
+
+FunctionPass *llvm::createAggressiveDCEPass() {
+ return new ADCE();
+}
diff --git a/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
new file mode 100644
index 0000000..fb9b880
--- /dev/null
+++ b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
@@ -0,0 +1,148 @@
+//===-- BasicBlockPlacement.cpp - Basic Block Code Layout optimization ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a very simple profile guided basic block placement
+// algorithm. The idea is to put frequently executed blocks together at the
+// start of the function, and hopefully increase the number of fall-through
+// conditional branches. If there is no profile information for a particular
+// function, this pass basically orders blocks in depth-first order
+//
+// The algorithm implemented here is basically "Algo1" from "Profile Guided Code
+// Positioning" by Pettis and Hansen, except that it uses basic block counts
+// instead of edge counts. This should be improved in many ways, but is very
+// simple for now.
+//
+// Basically we "place" the entry block, then loop over all successors in a DFO,
+// placing the most frequently executed successor until we run out of blocks. I
+// told you this was _extremely_ simplistic. :) This is also much slower than it
+// could be. When it becomes important, this pass will be rewritten to use a
+// better algorithm, and then we can worry about efficiency.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "block-placement"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Transforms/Scalar.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumMoved, "Number of basic blocks moved");
+
+namespace {
+ struct VISIBILITY_HIDDEN BlockPlacement : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ BlockPlacement() : FunctionPass(&ID) {}
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<ProfileInfo>();
+ //AU.addPreserved<ProfileInfo>(); // Does this work?
+ }
+ private:
+ /// PI - The profile information that is guiding us.
+ ///
+ ProfileInfo *PI;
+
+ /// NumMovedBlocks - Every time we move a block, increment this counter.
+ ///
+ unsigned NumMovedBlocks;
+
+ /// PlacedBlocks - Every time we place a block, remember it so we don't get
+ /// into infinite loops.
+ std::set<BasicBlock*> PlacedBlocks;
+
+ /// InsertPos - This an iterator to the next place we want to insert a
+ /// block.
+ Function::iterator InsertPos;
+
+ /// PlaceBlocks - Recursively place the specified blocks and any unplaced
+ /// successors.
+ void PlaceBlocks(BasicBlock *BB);
+ };
+}
+
+char BlockPlacement::ID = 0;
+static RegisterPass<BlockPlacement>
+X("block-placement", "Profile Guided Basic Block Placement");
+
+FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); }
+
+bool BlockPlacement::runOnFunction(Function &F) {
+ PI = &getAnalysis<ProfileInfo>();
+
+ NumMovedBlocks = 0;
+ InsertPos = F.begin();
+
+ // Recursively place all blocks.
+ PlaceBlocks(F.begin());
+
+ PlacedBlocks.clear();
+ NumMoved += NumMovedBlocks;
+ return NumMovedBlocks != 0;
+}
+
+
+/// PlaceBlocks - Recursively place the specified blocks and any unplaced
+/// successors.
+void BlockPlacement::PlaceBlocks(BasicBlock *BB) {
+ assert(!PlacedBlocks.count(BB) && "Already placed this block!");
+ PlacedBlocks.insert(BB);
+
+ // Place the specified block.
+ if (&*InsertPos != BB) {
+ // Use splice to move the block into the right place. This avoids having to
+ // remove the block from the function then readd it, which causes a bunch of
+ // symbol table traffic that is entirely pointless.
+ Function::BasicBlockListType &Blocks = BB->getParent()->getBasicBlockList();
+ Blocks.splice(InsertPos, Blocks, BB);
+
+ ++NumMovedBlocks;
+ } else {
+ // This block is already in the right place, we don't have to do anything.
+ ++InsertPos;
+ }
+
+ // Keep placing successors until we run out of ones to place. Note that this
+ // loop is very inefficient (N^2) for blocks with many successors, like switch
+ // statements. FIXME!
+ while (1) {
+ // Okay, now place any unplaced successors.
+ succ_iterator SI = succ_begin(BB), E = succ_end(BB);
+
+ // Scan for the first unplaced successor.
+ for (; SI != E && PlacedBlocks.count(*SI); ++SI)
+ /*empty*/;
+ if (SI == E) return; // No more successors to place.
+
+ unsigned MaxExecutionCount = PI->getExecutionCount(*SI);
+ BasicBlock *MaxSuccessor = *SI;
+
+ // Scan for more frequently executed successors
+ for (; SI != E; ++SI)
+ if (!PlacedBlocks.count(*SI)) {
+ unsigned Count = PI->getExecutionCount(*SI);
+ if (Count > MaxExecutionCount ||
+ // Prefer to not disturb the code.
+ (Count == MaxExecutionCount && *SI == &*InsertPos)) {
+ MaxExecutionCount = Count;
+ MaxSuccessor = *SI;
+ }
+ }
+
+ // Now that we picked the maximally executed successor, place it.
+ PlaceBlocks(MaxSuccessor);
+ }
+}
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
new file mode 100644
index 0000000..7a7c48b
--- /dev/null
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -0,0 +1,33 @@
+add_llvm_library(LLVMScalarOpts
+ ADCE.cpp
+ BasicBlockPlacement.cpp
+ CodeGenPrepare.cpp
+ CondPropagate.cpp
+ ConstantProp.cpp
+ DCE.cpp
+ DeadStoreElimination.cpp
+ GVN.cpp
+ GVNPRE.cpp
+ IndVarSimplify.cpp
+ InstructionCombining.cpp
+ JumpThreading.cpp
+ LICM.cpp
+ LoopDeletion.cpp
+ LoopIndexSplit.cpp
+ LoopRotation.cpp
+ LoopStrengthReduce.cpp
+ LoopUnroll.cpp
+ LoopUnswitch.cpp
+ MemCpyOptimizer.cpp
+ PredicateSimplifier.cpp
+ Reassociate.cpp
+ Reg2Mem.cpp
+ SCCP.cpp
+ Scalar.cpp
+ ScalarReplAggregates.cpp
+ SimplifyCFGPass.cpp
+ SimplifyHalfPowrLibCalls.cpp
+ SimplifyLibCalls.cpp
+ TailDuplication.cpp
+ TailRecursionElimination.cpp
+ )
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
new file mode 100644
index 0000000..342b1e5
--- /dev/null
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -0,0 +1,873 @@
+//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass munges the code in the input function to better prepare it for
+// SelectionDAG-based code generation. This works around limitations in it's
+// basic-block-at-a-time approach. It should eventually be removed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "codegenprepare"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/AddrModeMatcher.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+static cl::opt<bool> FactorCommonPreds("split-critical-paths-tweak",
+ cl::init(false), cl::Hidden);
+
+namespace {
+ class VISIBILITY_HIDDEN CodeGenPrepare : public FunctionPass {
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// transformation profitability.
+ const TargetLowering *TLI;
+
+ /// BackEdges - Keep a set of all the loop back edges.
+ ///
+ SmallSet<std::pair<const BasicBlock*, const BasicBlock*>, 8> BackEdges;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit CodeGenPrepare(const TargetLowering *tli = 0)
+ : FunctionPass(&ID), TLI(tli) {}
+ bool runOnFunction(Function &F);
+
+ private:
+ bool EliminateMostlyEmptyBlocks(Function &F);
+ bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
+ void EliminateMostlyEmptyBlock(BasicBlock *BB);
+ bool OptimizeBlock(BasicBlock &BB);
+ bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy,
+ DenseMap<Value*,Value*> &SunkAddrs);
+ bool OptimizeInlineAsmInst(Instruction *I, CallSite CS,
+ DenseMap<Value*,Value*> &SunkAddrs);
+ bool OptimizeExtUses(Instruction *I);
+ void findLoopBackEdges(const Function &F);
+ };
+}
+
+char CodeGenPrepare::ID = 0;
+static RegisterPass<CodeGenPrepare> X("codegenprepare",
+ "Optimize for code generation");
+
+FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
+ return new CodeGenPrepare(TLI);
+}
+
+/// findLoopBackEdges - Do a DFS walk to find loop back edges.
+///
+void CodeGenPrepare::findLoopBackEdges(const Function &F) {
+ SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
+ FindFunctionBackedges(F, Edges);
+
+ BackEdges.insert(Edges.begin(), Edges.end());
+}
+
+
+bool CodeGenPrepare::runOnFunction(Function &F) {
+ bool EverMadeChange = false;
+
+ // First pass, eliminate blocks that contain only PHI nodes and an
+ // unconditional branch.
+ EverMadeChange |= EliminateMostlyEmptyBlocks(F);
+
+ // Now find loop back edges.
+ findLoopBackEdges(F);
+
+ bool MadeChange = true;
+ while (MadeChange) {
+ MadeChange = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ MadeChange |= OptimizeBlock(*BB);
+ EverMadeChange |= MadeChange;
+ }
+ return EverMadeChange;
+}
+
+/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
+/// debug info directives, and an unconditional branch. Passes before isel
+/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
+/// isel. Start by eliminating these blocks so we can split them the way we
+/// want them.
+bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
+ bool MadeChange = false;
+ // Note that this intentionally skips the entry block.
+ for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
+ BasicBlock *BB = I++;
+
+ // If this block doesn't end with an uncond branch, ignore it.
+ BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || !BI->isUnconditional())
+ continue;
+
+ // If the instruction before the branch (skipping debug info) isn't a phi
+ // node, then other stuff is happening here.
+ BasicBlock::iterator BBI = BI;
+ if (BBI != BB->begin()) {
+ --BBI;
+ while (isa<DbgInfoIntrinsic>(BBI)) {
+ if (BBI == BB->begin())
+ break;
+ --BBI;
+ }
+ if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
+ continue;
+ }
+
+ // Do not break infinite loops.
+ BasicBlock *DestBB = BI->getSuccessor(0);
+ if (DestBB == BB)
+ continue;
+
+ if (!CanMergeBlocks(BB, DestBB))
+ continue;
+
+ EliminateMostlyEmptyBlock(BB);
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
+/// single uncond branch between them, and BB contains no other non-phi
+/// instructions.
+bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
+ const BasicBlock *DestBB) const {
+ // We only want to eliminate blocks whose phi nodes are used by phi nodes in
+ // the successor. If there are more complex condition (e.g. preheaders),
+ // don't mess around with them.
+ BasicBlock::const_iterator BBI = BB->begin();
+ while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
+ for (Value::use_const_iterator UI = PN->use_begin(), E = PN->use_end();
+ UI != E; ++UI) {
+ const Instruction *User = cast<Instruction>(*UI);
+ if (User->getParent() != DestBB || !isa<PHINode>(User))
+ return false;
+ // If User is inside DestBB block and it is a PHINode then check
+ // incoming value. If incoming value is not from BB then this is
+ // a complex condition (e.g. preheaders) we want to avoid here.
+ if (User->getParent() == DestBB) {
+ if (const PHINode *UPN = dyn_cast<PHINode>(User))
+ for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
+ Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
+ if (Insn && Insn->getParent() == BB &&
+ Insn->getParent() != UPN->getIncomingBlock(I))
+ return false;
+ }
+ }
+ }
+ }
+
+ // If BB and DestBB contain any common predecessors, then the phi nodes in BB
+ // and DestBB may have conflicting incoming values for the block. If so, we
+ // can't merge the block.
+ const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
+ if (!DestBBPN) return true; // no conflict.
+
+ // Collect the preds of BB.
+ SmallPtrSet<const BasicBlock*, 16> BBPreds;
+ if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
+ // It is faster to get preds from a PHI than with pred_iterator.
+ for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
+ BBPreds.insert(BBPN->getIncomingBlock(i));
+ } else {
+ BBPreds.insert(pred_begin(BB), pred_end(BB));
+ }
+
+ // Walk the preds of DestBB.
+ for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
+ if (BBPreds.count(Pred)) { // Common predecessor?
+ BBI = DestBB->begin();
+ while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
+ const Value *V1 = PN->getIncomingValueForBlock(Pred);
+ const Value *V2 = PN->getIncomingValueForBlock(BB);
+
+ // If V2 is a phi node in BB, look up what the mapped value will be.
+ if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
+ if (V2PN->getParent() == BB)
+ V2 = V2PN->getIncomingValueForBlock(Pred);
+
+ // If there is a conflict, bail out.
+ if (V1 != V2) return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+
+/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
+/// an unconditional branch in it.
+void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
+ BranchInst *BI = cast<BranchInst>(BB->getTerminator());
+ BasicBlock *DestBB = BI->getSuccessor(0);
+
+ DOUT << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB;
+
+ // If the destination block has a single pred, then this is a trivial edge,
+ // just collapse it.
+ if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
+ if (SinglePred != DestBB) {
+ // Remember if SinglePred was the entry block of the function. If so, we
+ // will need to move BB back to the entry position.
+ bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+ MergeBasicBlockIntoOnlyPred(DestBB);
+
+ if (isEntry && BB != &BB->getParent()->getEntryBlock())
+ BB->moveBefore(&BB->getParent()->getEntryBlock());
+
+ DOUT << "AFTER:\n" << *DestBB << "\n\n\n";
+ return;
+ }
+ }
+
+ // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
+ // to handle the new incoming edges it is about to have.
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = DestBB->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ // Remove the incoming value for BB, and remember it.
+ Value *InVal = PN->removeIncomingValue(BB, false);
+
+ // Two options: either the InVal is a phi node defined in BB or it is some
+ // value that dominates BB.
+ PHINode *InValPhi = dyn_cast<PHINode>(InVal);
+ if (InValPhi && InValPhi->getParent() == BB) {
+ // Add all of the input values of the input PHI as inputs of this phi.
+ for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
+ PN->addIncoming(InValPhi->getIncomingValue(i),
+ InValPhi->getIncomingBlock(i));
+ } else {
+ // Otherwise, add one instance of the dominating value for each edge that
+ // we will be adding.
+ if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
+ for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
+ PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
+ } else {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ PN->addIncoming(InVal, *PI);
+ }
+ }
+ }
+
+ // The PHIs are now updated, change everything that refers to BB to use
+ // DestBB and remove BB.
+ BB->replaceAllUsesWith(DestBB);
+ BB->eraseFromParent();
+
+ DOUT << "AFTER:\n" << *DestBB << "\n\n\n";
+}
+
+
+/// SplitEdgeNicely - Split the critical edge from TI to its specified
+/// successor if it will improve codegen. We only do this if the successor has
+/// phi nodes (otherwise critical edges are ok). If there is already another
+/// predecessor of the succ that is empty (and thus has no phi nodes), use it
+/// instead of introducing a new block.
+static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
+ SmallSet<std::pair<const BasicBlock*,
+ const BasicBlock*>, 8> &BackEdges,
+ Pass *P) {
+ BasicBlock *TIBB = TI->getParent();
+ BasicBlock *Dest = TI->getSuccessor(SuccNum);
+ assert(isa<PHINode>(Dest->begin()) &&
+ "This should only be called if Dest has a PHI!");
+
+ // Do not split edges to EH landing pads.
+ if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI)) {
+ if (Invoke->getSuccessor(1) == Dest)
+ return;
+ }
+
+ // As a hack, never split backedges of loops. Even though the copy for any
+ // PHIs inserted on the backedge would be dead for exits from the loop, we
+ // assume that the cost of *splitting* the backedge would be too high.
+ if (BackEdges.count(std::make_pair(TIBB, Dest)))
+ return;
+
+ if (!FactorCommonPreds) {
+ /// TIPHIValues - This array is lazily computed to determine the values of
+ /// PHIs in Dest that TI would provide.
+ SmallVector<Value*, 32> TIPHIValues;
+
+ // Check to see if Dest has any blocks that can be used as a split edge for
+ // this terminator.
+ for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) {
+ BasicBlock *Pred = *PI;
+ // To be usable, the pred has to end with an uncond branch to the dest.
+ BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator());
+ if (!PredBr || !PredBr->isUnconditional())
+ continue;
+ // Must be empty other than the branch and debug info.
+ BasicBlock::iterator I = Pred->begin();
+ while (isa<DbgInfoIntrinsic>(I))
+ I++;
+ if (dyn_cast<Instruction>(I) != PredBr)
+ continue;
+ // Cannot be the entry block; its label does not get emitted.
+ if (Pred == &(Dest->getParent()->getEntryBlock()))
+ continue;
+
+ // Finally, since we know that Dest has phi nodes in it, we have to make
+ // sure that jumping to Pred will have the same effect as going to Dest in
+ // terms of PHI values.
+ PHINode *PN;
+ unsigned PHINo = 0;
+ bool FoundMatch = true;
+ for (BasicBlock::iterator I = Dest->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) {
+ if (PHINo == TIPHIValues.size())
+ TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB));
+
+ // If the PHI entry doesn't work, we can't use this pred.
+ if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) {
+ FoundMatch = false;
+ break;
+ }
+ }
+
+ // If we found a workable predecessor, change TI to branch to Succ.
+ if (FoundMatch) {
+ Dest->removePredecessor(TIBB);
+ TI->setSuccessor(SuccNum, Pred);
+ return;
+ }
+ }
+
+ SplitCriticalEdge(TI, SuccNum, P, true);
+ return;
+ }
+
+ PHINode *PN;
+ SmallVector<Value*, 8> TIPHIValues;
+ for (BasicBlock::iterator I = Dest->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I)
+ TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB));
+
+ SmallVector<BasicBlock*, 8> IdenticalPreds;
+ for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) {
+ BasicBlock *Pred = *PI;
+ if (BackEdges.count(std::make_pair(Pred, Dest)))
+ continue;
+ if (PI == TIBB)
+ IdenticalPreds.push_back(Pred);
+ else {
+ bool Identical = true;
+ unsigned PHINo = 0;
+ for (BasicBlock::iterator I = Dest->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo)
+ if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) {
+ Identical = false;
+ break;
+ }
+ if (Identical)
+ IdenticalPreds.push_back(Pred);
+ }
+ }
+
+ assert(!IdenticalPreds.empty());
+ SplitBlockPredecessors(Dest, &IdenticalPreds[0], IdenticalPreds.size(),
+ ".critedge", P);
+}
+
+
+/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
+/// copy (e.g. it's casting from one pointer type to another, int->uint, or
+/// int->sbyte on PPC), sink it into user blocks to reduce the number of virtual
+/// registers that must be created and coalesced.
+///
+/// Return true if any changes are made.
+///
+static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
+ // If this is a noop copy,
+ MVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
+ MVT DstVT = TLI.getValueType(CI->getType());
+
+ // This is an fp<->int conversion?
+ if (SrcVT.isInteger() != DstVT.isInteger())
+ return false;
+
+ // If this is an extension, it will be a zero or sign extension, which
+ // isn't a noop.
+ if (SrcVT.bitsLT(DstVT)) return false;
+
+ // If these values will be promoted, find out what they will be promoted
+ // to. This helps us consider truncates on PPC as noop copies when they
+ // are.
+ if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote)
+ SrcVT = TLI.getTypeToTransformTo(SrcVT);
+ if (TLI.getTypeAction(DstVT) == TargetLowering::Promote)
+ DstVT = TLI.getTypeToTransformTo(DstVT);
+
+ // If, after promotion, these are the same types, this is a noop copy.
+ if (SrcVT != DstVT)
+ return false;
+
+ BasicBlock *DefBB = CI->getParent();
+
+ /// InsertedCasts - Only insert a cast in each block once.
+ DenseMap<BasicBlock*, CastInst*> InsertedCasts;
+
+ bool MadeChange = false;
+ for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
+ UI != E; ) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Figure out which BB this cast is used in. For PHI's this is the
+ // appropriate predecessor block.
+ BasicBlock *UserBB = User->getParent();
+ if (PHINode *PN = dyn_cast<PHINode>(User)) {
+ UserBB = PN->getIncomingBlock(UI);
+ }
+
+ // Preincrement use iterator so we don't invalidate it.
+ ++UI;
+
+ // If this user is in the same block as the cast, don't change the cast.
+ if (UserBB == DefBB) continue;
+
+ // If we have already inserted a cast into this block, use it.
+ CastInst *&InsertedCast = InsertedCasts[UserBB];
+
+ if (!InsertedCast) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
+
+ InsertedCast =
+ CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
+ InsertPt);
+ MadeChange = true;
+ }
+
+ // Replace a use of the cast with a use of the new cast.
+ TheUse = InsertedCast;
+ }
+
+ // If we removed all uses, nuke the cast.
+ if (CI->use_empty()) {
+ CI->eraseFromParent();
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
+/// the number of virtual registers that must be created and coalesced. This is
+/// a clear win except on targets with multiple condition code registers
+/// (PowerPC), where it might lose; some adjustment may be wanted there.
+///
+/// Return true if any changes are made.
+static bool OptimizeCmpExpression(CmpInst *CI) {
+ BasicBlock *DefBB = CI->getParent();
+
+ /// InsertedCmp - Only insert a cmp in each block once.
+ DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
+
+ bool MadeChange = false;
+ for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
+ UI != E; ) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Preincrement use iterator so we don't invalidate it.
+ ++UI;
+
+ // Don't bother for PHI nodes.
+ if (isa<PHINode>(User))
+ continue;
+
+ // Figure out which BB this cmp is used in.
+ BasicBlock *UserBB = User->getParent();
+
+ // If this user is in the same block as the cmp, don't change the cmp.
+ if (UserBB == DefBB) continue;
+
+ // If we have already inserted a cmp into this block, use it.
+ CmpInst *&InsertedCmp = InsertedCmps[UserBB];
+
+ if (!InsertedCmp) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
+
+ InsertedCmp =
+ CmpInst::Create(CI->getOpcode(), CI->getPredicate(), CI->getOperand(0),
+ CI->getOperand(1), "", InsertPt);
+ MadeChange = true;
+ }
+
+ // Replace a use of the cmp with a use of the new cmp.
+ TheUse = InsertedCmp;
+ }
+
+ // If we removed all uses, nuke the cmp.
+ if (CI->use_empty())
+ CI->eraseFromParent();
+
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Memory Optimization
+//===----------------------------------------------------------------------===//
+
+/// IsNonLocalValue - Return true if the specified values are defined in a
+/// different basic block than BB.
+static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() != BB;
+ return false;
+}
+
+/// OptimizeMemoryInst - Load and Store Instructions have often have
+/// addressing modes that can do significant amounts of computation. As such,
+/// instruction selection will try to get the load or store to do as much
+/// computation as possible for the program. The problem is that isel can only
+/// see within a single block. As such, we sink as much legal addressing mode
+/// stuff into the block as possible.
+///
+/// This method is used to optimize both load/store and inline asms with memory
+/// operands.
+bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
+ const Type *AccessTy,
+ DenseMap<Value*,Value*> &SunkAddrs) {
+ // Figure out what addressing mode will be built up for this operation.
+ SmallVector<Instruction*, 16> AddrModeInsts;
+ ExtAddrMode AddrMode = AddressingModeMatcher::Match(Addr, AccessTy,MemoryInst,
+ AddrModeInsts, *TLI);
+
+ // Check to see if any of the instructions supersumed by this addr mode are
+ // non-local to I's BB.
+ bool AnyNonLocal = false;
+ for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
+ if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
+ AnyNonLocal = true;
+ break;
+ }
+ }
+
+ // If all the instructions matched are already in this BB, don't do anything.
+ if (!AnyNonLocal) {
+ DEBUG(cerr << "CGP: Found local addrmode: " << AddrMode << "\n");
+ return false;
+ }
+
+ // Insert this computation right after this user. Since our caller is
+ // scanning from the top of the BB to the bottom, reuse of the expr are
+ // guaranteed to happen later.
+ BasicBlock::iterator InsertPt = MemoryInst;
+
+ // Now that we determined the addressing expression we want to use and know
+ // that we have to sink it into this block. Check to see if we have already
+ // done this for some other load/store instr in this block. If so, reuse the
+ // computation.
+ Value *&SunkAddr = SunkAddrs[Addr];
+ if (SunkAddr) {
+ DEBUG(cerr << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
+ << *MemoryInst);
+ if (SunkAddr->getType() != Addr->getType())
+ SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt);
+ } else {
+ DEBUG(cerr << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
+ << *MemoryInst);
+ const Type *IntPtrTy = TLI->getTargetData()->getIntPtrType();
+
+ Value *Result = 0;
+ // Start with the scale value.
+ if (AddrMode.Scale) {
+ Value *V = AddrMode.ScaledReg;
+ if (V->getType() == IntPtrTy) {
+ // done.
+ } else if (isa<PointerType>(V->getType())) {
+ V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
+ cast<IntegerType>(V->getType())->getBitWidth()) {
+ V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ } else {
+ V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ }
+ if (AddrMode.Scale != 1)
+ V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy,
+ AddrMode.Scale),
+ "sunkaddr", InsertPt);
+ Result = V;
+ }
+
+ // Add in the base register.
+ if (AddrMode.BaseReg) {
+ Value *V = AddrMode.BaseReg;
+ if (V->getType() != IntPtrTy)
+ V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ if (Result)
+ Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+ else
+ Result = V;
+ }
+
+ // Add in the BaseGV if present.
+ if (AddrMode.BaseGV) {
+ Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr",
+ InsertPt);
+ if (Result)
+ Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+ else
+ Result = V;
+ }
+
+ // Add in the Base Offset if present.
+ if (AddrMode.BaseOffs) {
+ Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
+ if (Result)
+ Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+ else
+ Result = V;
+ }
+
+ if (Result == 0)
+ SunkAddr = Constant::getNullValue(Addr->getType());
+ else
+ SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);
+ }
+
+ MemoryInst->replaceUsesOfWith(Addr, SunkAddr);
+
+ if (Addr->use_empty())
+ RecursivelyDeleteTriviallyDeadInstructions(Addr);
+ return true;
+}
+
+/// OptimizeInlineAsmInst - If there are any memory operands, use
+/// OptimizeMemoryInst to sink their address computing into the block when
+/// possible / profitable.
+bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS,
+ DenseMap<Value*,Value*> &SunkAddrs) {
+ bool MadeChange = false;
+ InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+
+ // Do a prepass over the constraints, canonicalizing them, and building up the
+ // ConstraintOperands list.
+ std::vector<InlineAsm::ConstraintInfo>
+ ConstraintInfos = IA->ParseConstraints();
+
+ /// ConstraintOperands - Information about all of the constraints.
+ std::vector<TargetLowering::AsmOperandInfo> ConstraintOperands;
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+ ConstraintOperands.
+ push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i]));
+ TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ if (OpInfo.isIndirect)
+ OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+ break;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
+ }
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI->ComputeConstraintToUse(OpInfo, SDValue(),
+ OpInfo.ConstraintType == TargetLowering::C_Memory);
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ OpInfo.isIndirect) {
+ Value *OpVal = OpInfo.CallOperandVal;
+ MadeChange |= OptimizeMemoryInst(I, OpVal, OpVal->getType(), SunkAddrs);
+ }
+ }
+
+ return MadeChange;
+}
+
+bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
+ BasicBlock *DefBB = I->getParent();
+
+ // If both result of the {s|z}xt and its source are live out, rewrite all
+ // other uses of the source with result of extension.
+ Value *Src = I->getOperand(0);
+ if (Src->hasOneUse())
+ return false;
+
+ // Only do this xform if truncating is free.
+ if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
+ return false;
+
+ // Only safe to perform the optimization if the source is also defined in
+ // this block.
+ if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
+ return false;
+
+ bool DefIsLiveOut = false;
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Figure out which BB this ext is used in.
+ BasicBlock *UserBB = User->getParent();
+ if (UserBB == DefBB) continue;
+ DefIsLiveOut = true;
+ break;
+ }
+ if (!DefIsLiveOut)
+ return false;
+
+ // Make sure non of the uses are PHI nodes.
+ for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ BasicBlock *UserBB = User->getParent();
+ if (UserBB == DefBB) continue;
+ // Be conservative. We don't want this xform to end up introducing
+ // reloads just before load / store instructions.
+ if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User))
+ return false;
+ }
+
+ // InsertedTruncs - Only insert one trunc in each block once.
+ DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
+
+ bool MadeChange = false;
+ for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
+ UI != E; ++UI) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Figure out which BB this ext is used in.
+ BasicBlock *UserBB = User->getParent();
+ if (UserBB == DefBB) continue;
+
+ // Both src and def are live in this block. Rewrite the use.
+ Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
+
+ if (!InsertedTrunc) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
+
+ InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
+ }
+
+ // Replace a use of the {s|z}ext source with a use of the result.
+ TheUse = InsertedTrunc;
+
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+// In this pass we look for GEP and cast instructions that are used
+// across basic blocks and rewrite them to improve basic-block-at-a-time
+// selection.
+bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
+ bool MadeChange = false;
+
+ // Split all critical edges where the dest block has a PHI.
+ TerminatorInst *BBTI = BB.getTerminator();
+ if (BBTI->getNumSuccessors() > 1) {
+ for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *SuccBB = BBTI->getSuccessor(i);
+ if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
+ SplitEdgeNicely(BBTI, i, BackEdges, this);
+ }
+ }
+
+ // Keep track of non-local addresses that have been sunk into this block.
+ // This allows us to avoid inserting duplicate code for blocks with multiple
+ // load/stores of the same address.
+ DenseMap<Value*, Value*> SunkAddrs;
+
+ for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) {
+ Instruction *I = BBI++;
+
+ if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ // If the source of the cast is a constant, then this should have
+ // already been constant folded. The only reason NOT to constant fold
+ // it is if something (e.g. LSR) was careful to place the constant
+ // evaluation in a block other than then one that uses it (e.g. to hoist
+ // the address of globals out of a loop). If this is the case, we don't
+ // want to forward-subst the cast.
+ if (isa<Constant>(CI->getOperand(0)))
+ continue;
+
+ bool Change = false;
+ if (TLI) {
+ Change = OptimizeNoopCopyExpression(CI, *TLI);
+ MadeChange |= Change;
+ }
+
+ if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I)))
+ MadeChange |= OptimizeExtUses(I);
+ } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
+ MadeChange |= OptimizeCmpExpression(CI);
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (TLI)
+ MadeChange |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType(),
+ SunkAddrs);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (TLI)
+ MadeChange |= OptimizeMemoryInst(I, SI->getOperand(1),
+ SI->getOperand(0)->getType(),
+ SunkAddrs);
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+ if (GEPI->hasAllZeroIndices()) {
+ /// The GEP operand must be a pointer, so must its result -> BitCast
+ Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
+ GEPI->getName(), GEPI);
+ GEPI->replaceAllUsesWith(NC);
+ GEPI->eraseFromParent();
+ MadeChange = true;
+ BBI = NC;
+ }
+ } else if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ // If we found an inline asm expession, and if the target knows how to
+ // lower it to normal LLVM code, do so now.
+ if (TLI && isa<InlineAsm>(CI->getCalledValue()))
+ if (const TargetAsmInfo *TAI =
+ TLI->getTargetMachine().getTargetAsmInfo()) {
+ if (TAI->ExpandInlineAsm(CI)) {
+ BBI = BB.begin();
+ // Avoid processing instructions out of order, which could cause
+ // reuse before a value is defined.
+ SunkAddrs.clear();
+ } else
+ // Sink address computing for memory operands into the block.
+ MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs);
+ }
+ }
+ }
+
+ return MadeChange;
+}
diff --git a/lib/Transforms/Scalar/CondPropagate.cpp b/lib/Transforms/Scalar/CondPropagate.cpp
new file mode 100644
index 0000000..c85d031
--- /dev/null
+++ b/lib/Transforms/Scalar/CondPropagate.cpp
@@ -0,0 +1,295 @@
+//===-- CondPropagate.cpp - Propagate Conditional Expressions -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass propagates information about conditional expressions through the
+// program, allowing it to eliminate conditional branches in some cases.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "condprop"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Streams.h"
+using namespace llvm;
+
+STATISTIC(NumBrThread, "Number of CFG edges threaded through branches");
+STATISTIC(NumSwThread, "Number of CFG edges threaded through switches");
+
+namespace {
+ struct VISIBILITY_HIDDEN CondProp : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ CondProp() : FunctionPass(&ID) {}
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(BreakCriticalEdgesID);
+ //AU.addRequired<DominanceFrontier>();
+ }
+
+ private:
+ bool MadeChange;
+ SmallVector<BasicBlock *, 4> DeadBlocks;
+ void SimplifyBlock(BasicBlock *BB);
+ void SimplifyPredecessors(BranchInst *BI);
+ void SimplifyPredecessors(SwitchInst *SI);
+ void RevectorBlockTo(BasicBlock *FromBB, BasicBlock *ToBB);
+ bool RevectorBlockTo(BasicBlock *FromBB, Value *Cond, BranchInst *BI);
+ };
+}
+
+char CondProp::ID = 0;
+static RegisterPass<CondProp> X("condprop", "Conditional Propagation");
+
+FunctionPass *llvm::createCondPropagationPass() {
+ return new CondProp();
+}
+
+bool CondProp::runOnFunction(Function &F) {
+ bool EverMadeChange = false;
+ DeadBlocks.clear();
+
+ // While we are simplifying blocks, keep iterating.
+ do {
+ MadeChange = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E;)
+ SimplifyBlock(BB++);
+ EverMadeChange = EverMadeChange || MadeChange;
+ } while (MadeChange);
+
+ if (EverMadeChange) {
+ while (!DeadBlocks.empty()) {
+ BasicBlock *BB = DeadBlocks.back(); DeadBlocks.pop_back();
+ DeleteDeadBlock(BB);
+ }
+ }
+ return EverMadeChange;
+}
+
+void CondProp::SimplifyBlock(BasicBlock *BB) {
+ if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ // If this is a conditional branch based on a phi node that is defined in
+ // this block, see if we can simplify predecessors of this block.
+ if (BI->isConditional() && isa<PHINode>(BI->getCondition()) &&
+ cast<PHINode>(BI->getCondition())->getParent() == BB)
+ SimplifyPredecessors(BI);
+
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+ if (isa<PHINode>(SI->getCondition()) &&
+ cast<PHINode>(SI->getCondition())->getParent() == BB)
+ SimplifyPredecessors(SI);
+ }
+
+ // If possible, simplify the terminator of this block.
+ if (ConstantFoldTerminator(BB))
+ MadeChange = true;
+
+ // If this block ends with an unconditional branch and the only successor has
+ // only this block as a predecessor, merge the two blocks together.
+ if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
+ if (BI->isUnconditional() && BI->getSuccessor(0)->getSinglePredecessor() &&
+ BB != BI->getSuccessor(0)) {
+ BasicBlock *Succ = BI->getSuccessor(0);
+
+ // If Succ has any PHI nodes, they are all single-entry PHI's. Eliminate
+ // them.
+ FoldSingleEntryPHINodes(Succ);
+
+ // Remove BI.
+ BI->eraseFromParent();
+
+ // Move over all of the instructions.
+ BB->getInstList().splice(BB->end(), Succ->getInstList());
+
+ // Any phi nodes that had entries for Succ now have entries from BB.
+ Succ->replaceAllUsesWith(BB);
+
+ // Succ is now dead, but we cannot delete it without potentially
+ // invalidating iterators elsewhere. Just insert an unreachable
+ // instruction in it and delete this block later on.
+ new UnreachableInst(Succ);
+ DeadBlocks.push_back(Succ);
+ MadeChange = true;
+ }
+}
+
+// SimplifyPredecessors(branches) - We know that BI is a conditional branch
+// based on a PHI node defined in this block. If the phi node contains constant
+// operands, then the blocks corresponding to those operands can be modified to
+// jump directly to the destination instead of going through this block.
+void CondProp::SimplifyPredecessors(BranchInst *BI) {
+ // TODO: We currently only handle the most trival case, where the PHI node has
+ // one use (the branch), and is the only instruction besides the branch and dbg
+ // intrinsics in the block.
+ PHINode *PN = cast<PHINode>(BI->getCondition());
+
+ if (PN->getNumIncomingValues() == 1) {
+ // Eliminate single-entry PHI nodes.
+ FoldSingleEntryPHINodes(PN->getParent());
+ return;
+ }
+
+
+ if (!PN->hasOneUse()) return;
+
+ BasicBlock *BB = BI->getParent();
+ if (&*BB->begin() != PN)
+ return;
+ BasicBlock::iterator BBI = BB->begin();
+ BasicBlock::iterator BBE = BB->end();
+ while (BBI != BBE && isa<DbgInfoIntrinsic>(++BBI)) /* empty */;
+ if (&*BBI != BI)
+ return;
+
+ // Ok, we have this really simple case, walk the PHI operands, looking for
+ // constants. Walk from the end to remove operands from the end when
+ // possible, and to avoid invalidating "i".
+ for (unsigned i = PN->getNumIncomingValues(); i != 0; --i) {
+ Value *InVal = PN->getIncomingValue(i-1);
+ if (!RevectorBlockTo(PN->getIncomingBlock(i-1), InVal, BI))
+ continue;
+
+ ++NumBrThread;
+
+ // If there were two predecessors before this simplification, or if the
+ // PHI node contained all the same value except for the one we just
+ // substituted, the PHI node may be deleted. Don't iterate through it the
+ // last time.
+ if (BI->getCondition() != PN) return;
+ }
+}
+
+// SimplifyPredecessors(switch) - We know that SI is switch based on a PHI node
+// defined in this block. If the phi node contains constant operands, then the
+// blocks corresponding to those operands can be modified to jump directly to
+// the destination instead of going through this block.
+void CondProp::SimplifyPredecessors(SwitchInst *SI) {
+ // TODO: We currently only handle the most trival case, where the PHI node has
+ // one use (the branch), and is the only instruction besides the branch and
+ // dbg intrinsics in the block.
+ PHINode *PN = cast<PHINode>(SI->getCondition());
+ if (!PN->hasOneUse()) return;
+
+ BasicBlock *BB = SI->getParent();
+ if (&*BB->begin() != PN)
+ return;
+ BasicBlock::iterator BBI = BB->begin();
+ BasicBlock::iterator BBE = BB->end();
+ while (BBI != BBE && isa<DbgInfoIntrinsic>(++BBI)) /* empty */;
+ if (&*BBI != SI)
+ return;
+
+ bool RemovedPreds = false;
+
+ // Ok, we have this really simple case, walk the PHI operands, looking for
+ // constants. Walk from the end to remove operands from the end when
+ // possible, and to avoid invalidating "i".
+ for (unsigned i = PN->getNumIncomingValues(); i != 0; --i)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(PN->getIncomingValue(i-1))) {
+ // If we have a constant, forward the edge from its current to its
+ // ultimate destination.
+ unsigned DestCase = SI->findCaseValue(CI);
+ RevectorBlockTo(PN->getIncomingBlock(i-1),
+ SI->getSuccessor(DestCase));
+ ++NumSwThread;
+ RemovedPreds = true;
+
+ // If there were two predecessors before this simplification, or if the
+ // PHI node contained all the same value except for the one we just
+ // substituted, the PHI node may be deleted. Don't iterate through it the
+ // last time.
+ if (SI->getCondition() != PN) return;
+ }
+}
+
+
+// RevectorBlockTo - Revector the unconditional branch at the end of FromBB to
+// the ToBB block, which is one of the successors of its current successor.
+void CondProp::RevectorBlockTo(BasicBlock *FromBB, BasicBlock *ToBB) {
+ BranchInst *FromBr = cast<BranchInst>(FromBB->getTerminator());
+ assert(FromBr->isUnconditional() && "FromBB should end with uncond br!");
+
+ // Get the old block we are threading through.
+ BasicBlock *OldSucc = FromBr->getSuccessor(0);
+
+ // OldSucc had multiple successors. If ToBB has multiple predecessors, then
+ // the edge between them would be critical, which we already took care of.
+ // If ToBB has single operand PHI node then take care of it here.
+ FoldSingleEntryPHINodes(ToBB);
+
+ // Update PHI nodes in OldSucc to know that FromBB no longer branches to it.
+ OldSucc->removePredecessor(FromBB);
+
+ // Change FromBr to branch to the new destination.
+ FromBr->setSuccessor(0, ToBB);
+
+ MadeChange = true;
+}
+
+bool CondProp::RevectorBlockTo(BasicBlock *FromBB, Value *Cond, BranchInst *BI){
+ BranchInst *FromBr = cast<BranchInst>(FromBB->getTerminator());
+ if (!FromBr->isUnconditional())
+ return false;
+
+ // Get the old block we are threading through.
+ BasicBlock *OldSucc = FromBr->getSuccessor(0);
+
+ // If the condition is a constant, simply revector the unconditional branch at
+ // the end of FromBB to one of the successors of its current successor.
+ if (ConstantInt *CB = dyn_cast<ConstantInt>(Cond)) {
+ BasicBlock *ToBB = BI->getSuccessor(CB->isZero());
+
+ // OldSucc had multiple successors. If ToBB has multiple predecessors, then
+ // the edge between them would be critical, which we already took care of.
+ // If ToBB has single operand PHI node then take care of it here.
+ FoldSingleEntryPHINodes(ToBB);
+
+ // Update PHI nodes in OldSucc to know that FromBB no longer branches to it.
+ OldSucc->removePredecessor(FromBB);
+
+ // Change FromBr to branch to the new destination.
+ FromBr->setSuccessor(0, ToBB);
+ } else {
+ BasicBlock *Succ0 = BI->getSuccessor(0);
+ // Do not perform transform if the new destination has PHI nodes. The
+ // transform will add new preds to the PHI's.
+ if (isa<PHINode>(Succ0->begin()))
+ return false;
+
+ BasicBlock *Succ1 = BI->getSuccessor(1);
+ if (isa<PHINode>(Succ1->begin()))
+ return false;
+
+ // Insert the new conditional branch.
+ BranchInst::Create(Succ0, Succ1, Cond, FromBr);
+
+ FoldSingleEntryPHINodes(Succ0);
+ FoldSingleEntryPHINodes(Succ1);
+
+ // Update PHI nodes in OldSucc to know that FromBB no longer branches to it.
+ OldSucc->removePredecessor(FromBB);
+
+ // Delete the old branch.
+ FromBr->eraseFromParent();
+ }
+
+ MadeChange = true;
+ return true;
+}
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
new file mode 100644
index 0000000..b933488
--- /dev/null
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -0,0 +1,90 @@
+//===- ConstantProp.cpp - Code to perform Simple Constant Propagation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements constant propagation and merging:
+//
+// Specifically, this:
+// * Converts instructions like "add int 1, 2" into 3
+//
+// Notice that:
+// * This pass has a habit of making definitions be dead. It is a good idea
+// to run a DIE pass sometime after running this pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "constprop"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Constant.h"
+#include "llvm/Instruction.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumInstKilled, "Number of instructions killed");
+
+namespace {
+ struct VISIBILITY_HIDDEN ConstantPropagation : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ ConstantPropagation() : FunctionPass(&ID) {}
+
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+ };
+}
+
+char ConstantPropagation::ID = 0;
+static RegisterPass<ConstantPropagation>
+X("constprop", "Simple constant propagation");
+
+FunctionPass *llvm::createConstantPropagationPass() {
+ return new ConstantPropagation();
+}
+
+
+bool ConstantPropagation::runOnFunction(Function &F) {
+ // Initialize the worklist to all of the instructions ready to process...
+ std::set<Instruction*> WorkList;
+ for(inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
+ WorkList.insert(&*i);
+ }
+ bool Changed = false;
+
+ while (!WorkList.empty()) {
+ Instruction *I = *WorkList.begin();
+ WorkList.erase(WorkList.begin()); // Get an element from the worklist...
+
+ if (!I->use_empty()) // Don't muck with dead instructions...
+ if (Constant *C = ConstantFoldInstruction(I)) {
+ // Add all of the users of this instruction to the worklist, they might
+ // be constant propagatable now...
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ WorkList.insert(cast<Instruction>(*UI));
+
+ // Replace all of the uses of a variable with uses of the constant.
+ I->replaceAllUsesWith(C);
+
+ // Remove the dead instruction.
+ WorkList.erase(I);
+ I->eraseFromParent();
+
+ // We made a change to the function...
+ Changed = true;
+ ++NumInstKilled;
+ }
+ }
+ return Changed;
+}
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
new file mode 100644
index 0000000..8bb504c
--- /dev/null
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -0,0 +1,133 @@
+//===- DCE.cpp - Code to perform dead code elimination --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements dead inst elimination and dead code elimination.
+//
+// Dead Inst Elimination performs a single pass over the function removing
+// instructions that are obviously dead. Dead Code Elimination is similar, but
+// it rechecks instructions that were used by removed instructions to see if
+// they are newly dead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dce"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Instruction.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(DIEEliminated, "Number of insts removed by DIE pass");
+STATISTIC(DCEEliminated, "Number of insts removed");
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ // DeadInstElimination pass implementation
+ //
+ struct VISIBILITY_HIDDEN DeadInstElimination : public BasicBlockPass {
+ static char ID; // Pass identification, replacement for typeid
+ DeadInstElimination() : BasicBlockPass(&ID) {}
+ virtual bool runOnBasicBlock(BasicBlock &BB) {
+ bool Changed = false;
+ for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
+ Instruction *Inst = DI++;
+ if (isInstructionTriviallyDead(Inst)) {
+ Inst->eraseFromParent();
+ Changed = true;
+ ++DIEEliminated;
+ }
+ }
+ return Changed;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+ };
+}
+
+char DeadInstElimination::ID = 0;
+static RegisterPass<DeadInstElimination>
+X("die", "Dead Instruction Elimination");
+
+Pass *llvm::createDeadInstEliminationPass() {
+ return new DeadInstElimination();
+}
+
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ // DeadCodeElimination pass implementation
+ //
+ struct DCE : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ DCE() : FunctionPass(&ID) {}
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+ };
+}
+
+char DCE::ID = 0;
+static RegisterPass<DCE> Y("dce", "Dead Code Elimination");
+
+bool DCE::runOnFunction(Function &F) {
+ // Start out with all of the instructions in the worklist...
+ std::vector<Instruction*> WorkList;
+ for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i)
+ WorkList.push_back(&*i);
+
+ // Loop over the worklist finding instructions that are dead. If they are
+ // dead make them drop all of their uses, making other instructions
+ // potentially dead, and work until the worklist is empty.
+ //
+ bool MadeChange = false;
+ while (!WorkList.empty()) {
+ Instruction *I = WorkList.back();
+ WorkList.pop_back();
+
+ if (isInstructionTriviallyDead(I)) { // If the instruction is dead.
+ // Loop over all of the values that the instruction uses, if there are
+ // instructions being used, add them to the worklist, because they might
+ // go dead after this one is removed.
+ //
+ for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+ if (Instruction *Used = dyn_cast<Instruction>(*OI))
+ WorkList.push_back(Used);
+
+ // Remove the instruction.
+ I->eraseFromParent();
+
+ // Remove the instruction from the worklist if it still exists in it.
+ for (std::vector<Instruction*>::iterator WI = WorkList.begin();
+ WI != WorkList.end(); ) {
+ if (*WI == I)
+ WI = WorkList.erase(WI);
+ else
+ ++WI;
+ }
+
+ MadeChange = true;
+ ++DCEEliminated;
+ }
+ }
+ return MadeChange;
+}
+
+FunctionPass *llvm::createDeadCodeEliminationPass() {
+ return new DCE();
+}
+
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
new file mode 100644
index 0000000..b923c92
--- /dev/null
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -0,0 +1,461 @@
+//===- DeadStoreElimination.cpp - Fast Dead Store Elimination -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a trivial dead store elimination that only considers
+// basic-block local redundant stores.
+//
+// FIXME: This should eventually be extended to be a post-dominator tree
+// traversal. Doing so would be pretty trivial.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dse"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Compiler.h"
+using namespace llvm;
+
+STATISTIC(NumFastStores, "Number of stores deleted");
+STATISTIC(NumFastOther , "Number of other instrs removed");
+
+namespace {
+ struct VISIBILITY_HIDDEN DSE : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ DSE() : FunctionPass(&ID) {}
+
+ virtual bool runOnFunction(Function &F) {
+ bool Changed = false;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ Changed |= runOnBasicBlock(*I);
+ return Changed;
+ }
+
+ bool runOnBasicBlock(BasicBlock &BB);
+ bool handleFreeWithNonTrivialDependency(FreeInst *F, MemDepResult Dep);
+ bool handleEndBlock(BasicBlock &BB);
+ bool RemoveUndeadPointers(Value* Ptr, uint64_t killPointerSize,
+ BasicBlock::iterator& BBI,
+ SmallPtrSet<Value*, 64>& deadPointers);
+ void DeleteDeadInstruction(Instruction *I,
+ SmallPtrSet<Value*, 64> *deadPointers = 0);
+
+
+ // getAnalysisUsage - We require post dominance frontiers (aka Control
+ // Dependence Graph)
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<TargetData>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<MemoryDependenceAnalysis>();
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<MemoryDependenceAnalysis>();
+ }
+ };
+}
+
+char DSE::ID = 0;
+static RegisterPass<DSE> X("dse", "Dead Store Elimination");
+
+FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
+
+bool DSE::runOnBasicBlock(BasicBlock &BB) {
+ MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+ TargetData &TD = getAnalysis<TargetData>();
+
+ bool MadeChange = false;
+
+ // Do a top-down walk on the BB
+ for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
+ Instruction *Inst = BBI++;
+
+ // If we find a store or a free, get it's memory dependence.
+ if (!isa<StoreInst>(Inst) && !isa<FreeInst>(Inst))
+ continue;
+
+ // Don't molest volatile stores or do queries that will return "clobber".
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ if (SI->isVolatile())
+ continue;
+
+ MemDepResult InstDep = MD.getDependency(Inst);
+
+ // Ignore non-local stores.
+ // FIXME: cross-block DSE would be fun. :)
+ if (InstDep.isNonLocal()) continue;
+
+ // Handle frees whose dependencies are non-trivial.
+ if (FreeInst *FI = dyn_cast<FreeInst>(Inst)) {
+ MadeChange |= handleFreeWithNonTrivialDependency(FI, InstDep);
+ continue;
+ }
+
+ StoreInst *SI = cast<StoreInst>(Inst);
+
+ // If not a definite must-alias dependency, ignore it.
+ if (!InstDep.isDef())
+ continue;
+
+ // If this is a store-store dependence, then the previous store is dead so
+ // long as this store is at least as big as it.
+ if (StoreInst *DepStore = dyn_cast<StoreInst>(InstDep.getInst()))
+ if (TD.getTypeStoreSize(DepStore->getOperand(0)->getType()) <=
+ TD.getTypeStoreSize(SI->getOperand(0)->getType())) {
+ // Delete the store and now-dead instructions that feed it.
+ DeleteDeadInstruction(DepStore);
+ NumFastStores++;
+ MadeChange = true;
+
+ if (BBI != BB.begin())
+ --BBI;
+ continue;
+ }
+
+ // If we're storing the same value back to a pointer that we just
+ // loaded from, then the store can be removed.
+ if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
+ if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
+ SI->getOperand(0) == DepLoad) {
+ DeleteDeadInstruction(SI);
+ if (BBI != BB.begin())
+ --BBI;
+ NumFastStores++;
+ MadeChange = true;
+ continue;
+ }
+ }
+ }
+
+ // If this block ends in a return, unwind, or unreachable, all allocas are
+ // dead at its end, which means stores to them are also dead.
+ if (BB.getTerminator()->getNumSuccessors() == 0)
+ MadeChange |= handleEndBlock(BB);
+
+ return MadeChange;
+}
+
+/// handleFreeWithNonTrivialDependency - Handle frees of entire structures whose
+/// dependency is a store to a field of that structure.
+bool DSE::handleFreeWithNonTrivialDependency(FreeInst *F, MemDepResult Dep) {
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+ StoreInst *Dependency = dyn_cast_or_null<StoreInst>(Dep.getInst());
+ if (!Dependency || Dependency->isVolatile())
+ return false;
+
+ Value *DepPointer = Dependency->getPointerOperand()->getUnderlyingObject();
+
+ // Check for aliasing.
+ if (AA.alias(F->getPointerOperand(), 1, DepPointer, 1) !=
+ AliasAnalysis::MustAlias)
+ return false;
+
+ // DCE instructions only used to calculate that store
+ DeleteDeadInstruction(Dependency);
+ NumFastStores++;
+ return true;
+}
+
+/// handleEndBlock - Remove dead stores to stack-allocated locations in the
+/// function end block. Ex:
+/// %A = alloca i32
+/// ...
+/// store i32 1, i32* %A
+/// ret void
+bool DSE::handleEndBlock(BasicBlock &BB) {
+ TargetData &TD = getAnalysis<TargetData>();
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+ bool MadeChange = false;
+
+ // Pointers alloca'd in this function are dead in the end block
+ SmallPtrSet<Value*, 64> deadPointers;
+
+ // Find all of the alloca'd pointers in the entry block.
+ BasicBlock *Entry = BB.getParent()->begin();
+ for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+ deadPointers.insert(AI);
+
+ // Treat byval arguments the same, stores to them are dead at the end of the
+ // function.
+ for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
+ AE = BB.getParent()->arg_end(); AI != AE; ++AI)
+ if (AI->hasByValAttr())
+ deadPointers.insert(AI);
+
+ // Scan the basic block backwards
+ for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
+ --BBI;
+
+ // If we find a store whose pointer is dead.
+ if (StoreInst* S = dyn_cast<StoreInst>(BBI)) {
+ if (!S->isVolatile()) {
+ // See through pointer-to-pointer bitcasts
+ Value* pointerOperand = S->getPointerOperand()->getUnderlyingObject();
+
+ // Alloca'd pointers or byval arguments (which are functionally like
+ // alloca's) are valid candidates for removal.
+ if (deadPointers.count(pointerOperand)) {
+ // DCE instructions only used to calculate that store.
+ BBI++;
+ DeleteDeadInstruction(S, &deadPointers);
+ NumFastStores++;
+ MadeChange = true;
+ }
+ }
+
+ continue;
+ }
+
+ // We can also remove memcpy's to local variables at the end of a function.
+ if (MemCpyInst *M = dyn_cast<MemCpyInst>(BBI)) {
+ Value *dest = M->getDest()->getUnderlyingObject();
+
+ if (deadPointers.count(dest)) {
+ BBI++;
+ DeleteDeadInstruction(M, &deadPointers);
+ NumFastOther++;
+ MadeChange = true;
+ continue;
+ }
+
+ // Because a memcpy is also a load, we can't skip it if we didn't remove
+ // it.
+ }
+
+ Value* killPointer = 0;
+ uint64_t killPointerSize = ~0UL;
+
+ // If we encounter a use of the pointer, it is no longer considered dead
+ if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
+ // However, if this load is unused and not volatile, we can go ahead and
+ // remove it, and not have to worry about it making our pointer undead!
+ if (L->use_empty() && !L->isVolatile()) {
+ BBI++;
+ DeleteDeadInstruction(L, &deadPointers);
+ NumFastOther++;
+ MadeChange = true;
+ continue;
+ }
+
+ killPointer = L->getPointerOperand();
+ } else if (VAArgInst* V = dyn_cast<VAArgInst>(BBI)) {
+ killPointer = V->getOperand(0);
+ } else if (isa<MemCpyInst>(BBI) &&
+ isa<ConstantInt>(cast<MemCpyInst>(BBI)->getLength())) {
+ killPointer = cast<MemCpyInst>(BBI)->getSource();
+ killPointerSize = cast<ConstantInt>(
+ cast<MemCpyInst>(BBI)->getLength())->getZExtValue();
+ } else if (AllocaInst* A = dyn_cast<AllocaInst>(BBI)) {
+ deadPointers.erase(A);
+
+ // Dead alloca's can be DCE'd when we reach them
+ if (A->use_empty()) {
+ BBI++;
+ DeleteDeadInstruction(A, &deadPointers);
+ NumFastOther++;
+ MadeChange = true;
+ }
+
+ continue;
+ } else if (CallSite::get(BBI).getInstruction() != 0) {
+ // If this call does not access memory, it can't
+ // be undeadifying any of our pointers.
+ CallSite CS = CallSite::get(BBI);
+ if (AA.doesNotAccessMemory(CS))
+ continue;
+
+ unsigned modRef = 0;
+ unsigned other = 0;
+
+ // Remove any pointers made undead by the call from the dead set
+ std::vector<Value*> dead;
+ for (SmallPtrSet<Value*, 64>::iterator I = deadPointers.begin(),
+ E = deadPointers.end(); I != E; ++I) {
+ // HACK: if we detect that our AA is imprecise, it's not
+ // worth it to scan the rest of the deadPointers set. Just
+ // assume that the AA will return ModRef for everything, and
+ // go ahead and bail.
+ if (modRef >= 16 && other == 0) {
+ deadPointers.clear();
+ return MadeChange;
+ }
+
+ // Get size information for the alloca
+ unsigned pointerSize = ~0U;
+ if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) {
+ if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize()))
+ pointerSize = C->getZExtValue() *
+ TD.getTypeAllocSize(A->getAllocatedType());
+ } else {
+ const PointerType* PT = cast<PointerType>(
+ cast<Argument>(*I)->getType());
+ pointerSize = TD.getTypeAllocSize(PT->getElementType());
+ }
+
+ // See if the call site touches it
+ AliasAnalysis::ModRefResult A = AA.getModRefInfo(CS, *I, pointerSize);
+
+ if (A == AliasAnalysis::ModRef)
+ modRef++;
+ else
+ other++;
+
+ if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
+ dead.push_back(*I);
+ }
+
+ for (std::vector<Value*>::iterator I = dead.begin(), E = dead.end();
+ I != E; ++I)
+ deadPointers.erase(*I);
+
+ continue;
+ } else if (isInstructionTriviallyDead(BBI)) {
+ // For any non-memory-affecting non-terminators, DCE them as we reach them
+ Instruction *Inst = BBI;
+ BBI++;
+ DeleteDeadInstruction(Inst, &deadPointers);
+ NumFastOther++;
+ MadeChange = true;
+ continue;
+ }
+
+ if (!killPointer)
+ continue;
+
+ killPointer = killPointer->getUnderlyingObject();
+
+ // Deal with undead pointers
+ MadeChange |= RemoveUndeadPointers(killPointer, killPointerSize, BBI,
+ deadPointers);
+ }
+
+ return MadeChange;
+}
+
+/// RemoveUndeadPointers - check for uses of a pointer that make it
+/// undead when scanning for dead stores to alloca's.
+bool DSE::RemoveUndeadPointers(Value* killPointer, uint64_t killPointerSize,
+ BasicBlock::iterator &BBI,
+ SmallPtrSet<Value*, 64>& deadPointers) {
+ TargetData &TD = getAnalysis<TargetData>();
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+ // If the kill pointer can be easily reduced to an alloca,
+ // don't bother doing extraneous AA queries.
+ if (deadPointers.count(killPointer)) {
+ deadPointers.erase(killPointer);
+ return false;
+ }
+
+ // A global can't be in the dead pointer set.
+ if (isa<GlobalValue>(killPointer))
+ return false;
+
+ bool MadeChange = false;
+
+ SmallVector<Value*, 16> undead;
+
+ for (SmallPtrSet<Value*, 64>::iterator I = deadPointers.begin(),
+ E = deadPointers.end(); I != E; ++I) {
+ // Get size information for the alloca.
+ unsigned pointerSize = ~0U;
+ if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) {
+ if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize()))
+ pointerSize = C->getZExtValue() *
+ TD.getTypeAllocSize(A->getAllocatedType());
+ } else {
+ const PointerType* PT = cast<PointerType>(cast<Argument>(*I)->getType());
+ pointerSize = TD.getTypeAllocSize(PT->getElementType());
+ }
+
+ // See if this pointer could alias it
+ AliasAnalysis::AliasResult A = AA.alias(*I, pointerSize,
+ killPointer, killPointerSize);
+
+ // If it must-alias and a store, we can delete it
+ if (isa<StoreInst>(BBI) && A == AliasAnalysis::MustAlias) {
+ StoreInst* S = cast<StoreInst>(BBI);
+
+ // Remove it!
+ BBI++;
+ DeleteDeadInstruction(S, &deadPointers);
+ NumFastStores++;
+ MadeChange = true;
+
+ continue;
+
+ // Otherwise, it is undead
+ } else if (A != AliasAnalysis::NoAlias)
+ undead.push_back(*I);
+ }
+
+ for (SmallVector<Value*, 16>::iterator I = undead.begin(), E = undead.end();
+ I != E; ++I)
+ deadPointers.erase(*I);
+
+ return MadeChange;
+}
+
+/// DeleteDeadInstruction - Delete this instruction. Before we do, go through
+/// and zero out all the operands of this instruction. If any of them become
+/// dead, delete them and the computation tree that feeds them.
+///
+/// If ValueSet is non-null, remove any deleted instructions from it as well.
+///
+void DSE::DeleteDeadInstruction(Instruction *I,
+ SmallPtrSet<Value*, 64> *ValueSet) {
+ SmallVector<Instruction*, 32> NowDeadInsts;
+
+ NowDeadInsts.push_back(I);
+ --NumFastOther;
+
+ // Before we touch this instruction, remove it from memdep!
+ MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>();
+ while (!NowDeadInsts.empty()) {
+ Instruction *DeadInst = NowDeadInsts.back();
+ NowDeadInsts.pop_back();
+
+ ++NumFastOther;
+
+ // This instruction is dead, zap it, in stages. Start by removing it from
+ // MemDep, which needs to know the operands and needs it to be in the
+ // function.
+ MDA.removeInstruction(DeadInst);
+
+ for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
+ Value *Op = DeadInst->getOperand(op);
+ DeadInst->setOperand(op, 0);
+
+ // If this operand just became dead, add it to the NowDeadInsts list.
+ if (!Op->use_empty()) continue;
+
+ if (Instruction *OpI = dyn_cast<Instruction>(Op))
+ if (isInstructionTriviallyDead(OpI))
+ NowDeadInsts.push_back(OpI);
+ }
+
+ DeadInst->eraseFromParent();
+
+ if (ValueSet) ValueSet->erase(DeadInst);
+ }
+}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
new file mode 100644
index 0000000..733dfa9
--- /dev/null
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -0,0 +1,1738 @@
+//===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs global value numbering to eliminate fully redundant
+// instructions. It also performs simple dead load elimination.
+//
+// Note that this pass does the value numbering itself; it does not use the
+// ValueNumbering analysis passes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "gvn"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Value.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <cstdio>
+using namespace llvm;
+
+STATISTIC(NumGVNInstr, "Number of instructions deleted");
+STATISTIC(NumGVNLoad, "Number of loads deleted");
+STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
+STATISTIC(NumGVNBlocks, "Number of blocks merged");
+STATISTIC(NumPRELoad, "Number of loads PRE'd");
+
+static cl::opt<bool> EnablePRE("enable-pre",
+ cl::init(true), cl::Hidden);
+cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
+
+//===----------------------------------------------------------------------===//
+// ValueTable Class
+//===----------------------------------------------------------------------===//
+
+/// This class holds the mapping between values and value numbers. It is used
+/// as an efficient mechanism to determine the expression-wise equivalence of
+/// two values.
+namespace {
+ struct VISIBILITY_HIDDEN Expression {
+ enum ExpressionOpcode { ADD, SUB, MUL, UDIV, SDIV, FDIV, UREM, SREM,
+ FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ,
+ ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,
+ ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,
+ FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,
+ FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,
+ FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
+ SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI,
+ FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT,
+ PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT,
+ EMPTY, TOMBSTONE };
+
+ ExpressionOpcode opcode;
+ const Type* type;
+ uint32_t firstVN;
+ uint32_t secondVN;
+ uint32_t thirdVN;
+ SmallVector<uint32_t, 4> varargs;
+ Value* function;
+
+ Expression() { }
+ Expression(ExpressionOpcode o) : opcode(o) { }
+
+ bool operator==(const Expression &other) const {
+ if (opcode != other.opcode)
+ return false;
+ else if (opcode == EMPTY || opcode == TOMBSTONE)
+ return true;
+ else if (type != other.type)
+ return false;
+ else if (function != other.function)
+ return false;
+ else if (firstVN != other.firstVN)
+ return false;
+ else if (secondVN != other.secondVN)
+ return false;
+ else if (thirdVN != other.thirdVN)
+ return false;
+ else {
+ if (varargs.size() != other.varargs.size())
+ return false;
+
+ for (size_t i = 0; i < varargs.size(); ++i)
+ if (varargs[i] != other.varargs[i])
+ return false;
+
+ return true;
+ }
+ }
+
+ bool operator!=(const Expression &other) const {
+ return !(*this == other);
+ }
+ };
+
+ class VISIBILITY_HIDDEN ValueTable {
+ private:
+ DenseMap<Value*, uint32_t> valueNumbering;
+ DenseMap<Expression, uint32_t> expressionNumbering;
+ AliasAnalysis* AA;
+ MemoryDependenceAnalysis* MD;
+ DominatorTree* DT;
+
+ uint32_t nextValueNumber;
+
+ Expression::ExpressionOpcode getOpcode(BinaryOperator* BO);
+ Expression::ExpressionOpcode getOpcode(CmpInst* C);
+ Expression::ExpressionOpcode getOpcode(CastInst* C);
+ Expression create_expression(BinaryOperator* BO);
+ Expression create_expression(CmpInst* C);
+ Expression create_expression(ShuffleVectorInst* V);
+ Expression create_expression(ExtractElementInst* C);
+ Expression create_expression(InsertElementInst* V);
+ Expression create_expression(SelectInst* V);
+ Expression create_expression(CastInst* C);
+ Expression create_expression(GetElementPtrInst* G);
+ Expression create_expression(CallInst* C);
+ Expression create_expression(Constant* C);
+ public:
+ ValueTable() : nextValueNumber(1) { }
+ uint32_t lookup_or_add(Value* V);
+ uint32_t lookup(Value* V) const;
+ void add(Value* V, uint32_t num);
+ void clear();
+ void erase(Value* v);
+ unsigned size();
+ void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
+ AliasAnalysis *getAliasAnalysis() const { return AA; }
+ void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
+ void setDomTree(DominatorTree* D) { DT = D; }
+ uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
+ void verifyRemoved(const Value *) const;
+ };
+}
+
+namespace llvm {
+template <> struct DenseMapInfo<Expression> {
+ static inline Expression getEmptyKey() {
+ return Expression(Expression::EMPTY);
+ }
+
+ static inline Expression getTombstoneKey() {
+ return Expression(Expression::TOMBSTONE);
+ }
+
+ static unsigned getHashValue(const Expression e) {
+ unsigned hash = e.opcode;
+
+ hash = e.firstVN + hash * 37;
+ hash = e.secondVN + hash * 37;
+ hash = e.thirdVN + hash * 37;
+
+ hash = ((unsigned)((uintptr_t)e.type >> 4) ^
+ (unsigned)((uintptr_t)e.type >> 9)) +
+ hash * 37;
+
+ for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
+ E = e.varargs.end(); I != E; ++I)
+ hash = *I + hash * 37;
+
+ hash = ((unsigned)((uintptr_t)e.function >> 4) ^
+ (unsigned)((uintptr_t)e.function >> 9)) +
+ hash * 37;
+
+ return hash;
+ }
+ static bool isEqual(const Expression &LHS, const Expression &RHS) {
+ return LHS == RHS;
+ }
+ static bool isPod() { return true; }
+};
+}
+
+//===----------------------------------------------------------------------===//
+// ValueTable Internal Functions
+//===----------------------------------------------------------------------===//
+Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) {
+ switch(BO->getOpcode()) {
+ default: // THIS SHOULD NEVER HAPPEN
+ assert(0 && "Binary operator with unknown opcode?");
+ case Instruction::Add: return Expression::ADD;
+ case Instruction::Sub: return Expression::SUB;
+ case Instruction::Mul: return Expression::MUL;
+ case Instruction::UDiv: return Expression::UDIV;
+ case Instruction::SDiv: return Expression::SDIV;
+ case Instruction::FDiv: return Expression::FDIV;
+ case Instruction::URem: return Expression::UREM;
+ case Instruction::SRem: return Expression::SREM;
+ case Instruction::FRem: return Expression::FREM;
+ case Instruction::Shl: return Expression::SHL;
+ case Instruction::LShr: return Expression::LSHR;
+ case Instruction::AShr: return Expression::ASHR;
+ case Instruction::And: return Expression::AND;
+ case Instruction::Or: return Expression::OR;
+ case Instruction::Xor: return Expression::XOR;
+ }
+}
+
+Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
+ if (isa<ICmpInst>(C) || isa<VICmpInst>(C)) {
+ switch (C->getPredicate()) {
+ default: // THIS SHOULD NEVER HAPPEN
+ assert(0 && "Comparison with unknown predicate?");
+ case ICmpInst::ICMP_EQ: return Expression::ICMPEQ;
+ case ICmpInst::ICMP_NE: return Expression::ICMPNE;
+ case ICmpInst::ICMP_UGT: return Expression::ICMPUGT;
+ case ICmpInst::ICMP_UGE: return Expression::ICMPUGE;
+ case ICmpInst::ICMP_ULT: return Expression::ICMPULT;
+ case ICmpInst::ICMP_ULE: return Expression::ICMPULE;
+ case ICmpInst::ICMP_SGT: return Expression::ICMPSGT;
+ case ICmpInst::ICMP_SGE: return Expression::ICMPSGE;
+ case ICmpInst::ICMP_SLT: return Expression::ICMPSLT;
+ case ICmpInst::ICMP_SLE: return Expression::ICMPSLE;
+ }
+ }
+ assert((isa<FCmpInst>(C) || isa<VFCmpInst>(C)) && "Unknown compare");
+ switch (C->getPredicate()) {
+ default: // THIS SHOULD NEVER HAPPEN
+ assert(0 && "Comparison with unknown predicate?");
+ case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ;
+ case FCmpInst::FCMP_OGT: return Expression::FCMPOGT;
+ case FCmpInst::FCMP_OGE: return Expression::FCMPOGE;
+ case FCmpInst::FCMP_OLT: return Expression::FCMPOLT;
+ case FCmpInst::FCMP_OLE: return Expression::FCMPOLE;
+ case FCmpInst::FCMP_ONE: return Expression::FCMPONE;
+ case FCmpInst::FCMP_ORD: return Expression::FCMPORD;
+ case FCmpInst::FCMP_UNO: return Expression::FCMPUNO;
+ case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ;
+ case FCmpInst::FCMP_UGT: return Expression::FCMPUGT;
+ case FCmpInst::FCMP_UGE: return Expression::FCMPUGE;
+ case FCmpInst::FCMP_ULT: return Expression::FCMPULT;
+ case FCmpInst::FCMP_ULE: return Expression::FCMPULE;
+ case FCmpInst::FCMP_UNE: return Expression::FCMPUNE;
+ }
+}
+
+Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) {
+ switch(C->getOpcode()) {
+ default: // THIS SHOULD NEVER HAPPEN
+ assert(0 && "Cast operator with unknown opcode?");
+ case Instruction::Trunc: return Expression::TRUNC;
+ case Instruction::ZExt: return Expression::ZEXT;
+ case Instruction::SExt: return Expression::SEXT;
+ case Instruction::FPToUI: return Expression::FPTOUI;
+ case Instruction::FPToSI: return Expression::FPTOSI;
+ case Instruction::UIToFP: return Expression::UITOFP;
+ case Instruction::SIToFP: return Expression::SITOFP;
+ case Instruction::FPTrunc: return Expression::FPTRUNC;
+ case Instruction::FPExt: return Expression::FPEXT;
+ case Instruction::PtrToInt: return Expression::PTRTOINT;
+ case Instruction::IntToPtr: return Expression::INTTOPTR;
+ case Instruction::BitCast: return Expression::BITCAST;
+ }
+}
+
+Expression ValueTable::create_expression(CallInst* C) {
+ Expression e;
+
+ e.type = C->getType();
+ e.firstVN = 0;
+ e.secondVN = 0;
+ e.thirdVN = 0;
+ e.function = C->getCalledFunction();
+ e.opcode = Expression::CALL;
+
+ for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end();
+ I != E; ++I)
+ e.varargs.push_back(lookup_or_add(*I));
+
+ return e;
+}
+
+Expression ValueTable::create_expression(BinaryOperator* BO) {
+ Expression e;
+
+ e.firstVN = lookup_or_add(BO->getOperand(0));
+ e.secondVN = lookup_or_add(BO->getOperand(1));
+ e.thirdVN = 0;
+ e.function = 0;
+ e.type = BO->getType();
+ e.opcode = getOpcode(BO);
+
+ return e;
+}
+
+Expression ValueTable::create_expression(CmpInst* C) {
+ Expression e;
+
+ e.firstVN = lookup_or_add(C->getOperand(0));
+ e.secondVN = lookup_or_add(C->getOperand(1));
+ e.thirdVN = 0;
+ e.function = 0;
+ e.type = C->getType();
+ e.opcode = getOpcode(C);
+
+ return e;
+}
+
+Expression ValueTable::create_expression(CastInst* C) {
+ Expression e;
+
+ e.firstVN = lookup_or_add(C->getOperand(0));
+ e.secondVN = 0;
+ e.thirdVN = 0;
+ e.function = 0;
+ e.type = C->getType();
+ e.opcode = getOpcode(C);
+
+ return e;
+}
+
+Expression ValueTable::create_expression(ShuffleVectorInst* S) {
+ Expression e;
+
+ e.firstVN = lookup_or_add(S->getOperand(0));
+ e.secondVN = lookup_or_add(S->getOperand(1));
+ e.thirdVN = lookup_or_add(S->getOperand(2));
+ e.function = 0;
+ e.type = S->getType();
+ e.opcode = Expression::SHUFFLE;
+
+ return e;
+}
+
+Expression ValueTable::create_expression(ExtractElementInst* E) {
+ Expression e;
+
+ e.firstVN = lookup_or_add(E->getOperand(0));
+ e.secondVN = lookup_or_add(E->getOperand(1));
+ e.thirdVN = 0;
+ e.function = 0;
+ e.type = E->getType();
+ e.opcode = Expression::EXTRACT;
+
+ return e;
+}
+
+Expression ValueTable::create_expression(InsertElementInst* I) {
+ Expression e;
+
+ e.firstVN = lookup_or_add(I->getOperand(0));
+ e.secondVN = lookup_or_add(I->getOperand(1));
+ e.thirdVN = lookup_or_add(I->getOperand(2));
+ e.function = 0;
+ e.type = I->getType();
+ e.opcode = Expression::INSERT;
+
+ return e;
+}
+
+Expression ValueTable::create_expression(SelectInst* I) {
+ Expression e;
+
+ e.firstVN = lookup_or_add(I->getCondition());
+ e.secondVN = lookup_or_add(I->getTrueValue());
+ e.thirdVN = lookup_or_add(I->getFalseValue());
+ e.function = 0;
+ e.type = I->getType();
+ e.opcode = Expression::SELECT;
+
+ return e;
+}
+
+Expression ValueTable::create_expression(GetElementPtrInst* G) {
+ Expression e;
+
+ e.firstVN = lookup_or_add(G->getPointerOperand());
+ e.secondVN = 0;
+ e.thirdVN = 0;
+ e.function = 0;
+ e.type = G->getType();
+ e.opcode = Expression::GEP;
+
+ for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
+ I != E; ++I)
+ e.varargs.push_back(lookup_or_add(*I));
+
+ return e;
+}
+
+//===----------------------------------------------------------------------===//
+// ValueTable External Functions
+//===----------------------------------------------------------------------===//
+
+/// add - Insert a value into the table with a specified value number.
+void ValueTable::add(Value* V, uint32_t num) {
+ valueNumbering.insert(std::make_pair(V, num));
+}
+
+/// lookup_or_add - Returns the value number for the specified value, assigning
+/// it a new number if it did not have one before.
+uint32_t ValueTable::lookup_or_add(Value* V) {
+ DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
+ if (VI != valueNumbering.end())
+ return VI->second;
+
+ if (CallInst* C = dyn_cast<CallInst>(V)) {
+ if (AA->doesNotAccessMemory(C)) {
+ Expression e = create_expression(C);
+
+ DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
+ if (EI != expressionNumbering.end()) {
+ valueNumbering.insert(std::make_pair(V, EI->second));
+ return EI->second;
+ } else {
+ expressionNumbering.insert(std::make_pair(e, nextValueNumber));
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+
+ return nextValueNumber++;
+ }
+ } else if (AA->onlyReadsMemory(C)) {
+ Expression e = create_expression(C);
+
+ if (expressionNumbering.find(e) == expressionNumbering.end()) {
+ expressionNumbering.insert(std::make_pair(e, nextValueNumber));
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ return nextValueNumber++;
+ }
+
+ MemDepResult local_dep = MD->getDependency(C);
+
+ if (!local_dep.isDef() && !local_dep.isNonLocal()) {
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ return nextValueNumber++;
+ }
+
+ if (local_dep.isDef()) {
+ CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
+
+ if (local_cdep->getNumOperands() != C->getNumOperands()) {
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ return nextValueNumber++;
+ }
+
+ for (unsigned i = 1; i < C->getNumOperands(); ++i) {
+ uint32_t c_vn = lookup_or_add(C->getOperand(i));
+ uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i));
+ if (c_vn != cd_vn) {
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ return nextValueNumber++;
+ }
+ }
+
+ uint32_t v = lookup_or_add(local_cdep);
+ valueNumbering.insert(std::make_pair(V, v));
+ return v;
+ }
+
+ // Non-local case.
+ const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
+ MD->getNonLocalCallDependency(CallSite(C));
+ // FIXME: call/call dependencies for readonly calls should return def, not
+ // clobber! Move the checking logic to MemDep!
+ CallInst* cdep = 0;
+
+ // Check to see if we have a single dominating call instruction that is
+ // identical to C.
+ for (unsigned i = 0, e = deps.size(); i != e; ++i) {
+ const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i];
+ // Ignore non-local dependencies.
+ if (I->second.isNonLocal())
+ continue;
+
+ // We don't handle non-depedencies. If we already have a call, reject
+ // instruction dependencies.
+ if (I->second.isClobber() || cdep != 0) {
+ cdep = 0;
+ break;
+ }
+
+ CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst());
+ // FIXME: All duplicated with non-local case.
+ if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){
+ cdep = NonLocalDepCall;
+ continue;
+ }
+
+ cdep = 0;
+ break;
+ }
+
+ if (!cdep) {
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ return nextValueNumber++;
+ }
+
+ if (cdep->getNumOperands() != C->getNumOperands()) {
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ return nextValueNumber++;
+ }
+ for (unsigned i = 1; i < C->getNumOperands(); ++i) {
+ uint32_t c_vn = lookup_or_add(C->getOperand(i));
+ uint32_t cd_vn = lookup_or_add(cdep->getOperand(i));
+ if (c_vn != cd_vn) {
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ return nextValueNumber++;
+ }
+ }
+
+ uint32_t v = lookup_or_add(cdep);
+ valueNumbering.insert(std::make_pair(V, v));
+ return v;
+
+ } else {
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ return nextValueNumber++;
+ }
+ } else if (BinaryOperator* BO = dyn_cast<BinaryOperator>(V)) {
+ Expression e = create_expression(BO);
+
+ DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
+ if (EI != expressionNumbering.end()) {
+ valueNumbering.insert(std::make_pair(V, EI->second));
+ return EI->second;
+ } else {
+ expressionNumbering.insert(std::make_pair(e, nextValueNumber));
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+
+ return nextValueNumber++;
+ }
+ } else if (CmpInst* C = dyn_cast<CmpInst>(V)) {
+ Expression e = create_expression(C);
+
+ DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
+ if (EI != expressionNumbering.end()) {
+ valueNumbering.insert(std::make_pair(V, EI->second));
+ return EI->second;
+ } else {
+ expressionNumbering.insert(std::make_pair(e, nextValueNumber));
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+
+ return nextValueNumber++;
+ }
+ } else if (ShuffleVectorInst* U = dyn_cast<ShuffleVectorInst>(V)) {
+ Expression e = create_expression(U);
+
+ DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
+ if (EI != expressionNumbering.end()) {
+ valueNumbering.insert(std::make_pair(V, EI->second));
+ return EI->second;
+ } else {
+ expressionNumbering.insert(std::make_pair(e, nextValueNumber));
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+
+ return nextValueNumber++;
+ }
+ } else if (ExtractElementInst* U = dyn_cast<ExtractElementInst>(V)) {
+ Expression e = create_expression(U);
+
+ DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
+ if (EI != expressionNumbering.end()) {
+ valueNumbering.insert(std::make_pair(V, EI->second));
+ return EI->second;
+ } else {
+ expressionNumbering.insert(std::make_pair(e, nextValueNumber));
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+
+ return nextValueNumber++;
+ }
+ } else if (InsertElementInst* U = dyn_cast<InsertElementInst>(V)) {
+ Expression e = create_expression(U);
+
+ DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
+ if (EI != expressionNumbering.end()) {
+ valueNumbering.insert(std::make_pair(V, EI->second));
+ return EI->second;
+ } else {
+ expressionNumbering.insert(std::make_pair(e, nextValueNumber));
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+
+ return nextValueNumber++;
+ }
+ } else if (SelectInst* U = dyn_cast<SelectInst>(V)) {
+ Expression e = create_expression(U);
+
+ DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
+ if (EI != expressionNumbering.end()) {
+ valueNumbering.insert(std::make_pair(V, EI->second));
+ return EI->second;
+ } else {
+ expressionNumbering.insert(std::make_pair(e, nextValueNumber));
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+
+ return nextValueNumber++;
+ }
+ } else if (CastInst* U = dyn_cast<CastInst>(V)) {
+ Expression e = create_expression(U);
+
+ DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
+ if (EI != expressionNumbering.end()) {
+ valueNumbering.insert(std::make_pair(V, EI->second));
+ return EI->second;
+ } else {
+ expressionNumbering.insert(std::make_pair(e, nextValueNumber));
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+
+ return nextValueNumber++;
+ }
+ } else if (GetElementPtrInst* U = dyn_cast<GetElementPtrInst>(V)) {
+ Expression e = create_expression(U);
+
+ DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
+ if (EI != expressionNumbering.end()) {
+ valueNumbering.insert(std::make_pair(V, EI->second));
+ return EI->second;
+ } else {
+ expressionNumbering.insert(std::make_pair(e, nextValueNumber));
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+
+ return nextValueNumber++;
+ }
+ } else {
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ return nextValueNumber++;
+ }
+}
+
+/// lookup - Returns the value number of the specified value. Fails if
+/// the value has not yet been numbered.
+uint32_t ValueTable::lookup(Value* V) const {
+ DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
+ assert(VI != valueNumbering.end() && "Value not numbered?");
+ return VI->second;
+}
+
+/// clear - Remove all entries from the ValueTable
+void ValueTable::clear() {
+ valueNumbering.clear();
+ expressionNumbering.clear();
+ nextValueNumber = 1;
+}
+
+/// erase - Remove a value from the value numbering
+void ValueTable::erase(Value* V) {
+ valueNumbering.erase(V);
+}
+
+/// verifyRemoved - Verify that the value is removed from all internal data
+/// structures.
+void ValueTable::verifyRemoved(const Value *V) const {
+ for (DenseMap<Value*, uint32_t>::iterator
+ I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
+ assert(I->first != V && "Inst still occurs in value numbering map!");
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// GVN Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+ struct VISIBILITY_HIDDEN ValueNumberScope {
+ ValueNumberScope* parent;
+ DenseMap<uint32_t, Value*> table;
+
+ ValueNumberScope(ValueNumberScope* p) : parent(p) { }
+ };
+}
+
+namespace {
+
+ class VISIBILITY_HIDDEN GVN : public FunctionPass {
+ bool runOnFunction(Function &F);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ GVN() : FunctionPass(&ID) { }
+
+ private:
+ MemoryDependenceAnalysis *MD;
+ DominatorTree *DT;
+
+ ValueTable VN;
+ DenseMap<BasicBlock*, ValueNumberScope*> localAvail;
+
+ typedef DenseMap<Value*, SmallPtrSet<Instruction*, 4> > PhiMapType;
+ PhiMapType phiMap;
+
+
+ // This transformation requires dominator postdominator info
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<MemoryDependenceAnalysis>();
+ AU.addRequired<AliasAnalysis>();
+
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<AliasAnalysis>();
+ }
+
+ // Helper fuctions
+ // FIXME: eliminate or document these better
+ bool processLoad(LoadInst* L,
+ SmallVectorImpl<Instruction*> &toErase);
+ bool processInstruction(Instruction* I,
+ SmallVectorImpl<Instruction*> &toErase);
+ bool processNonLocalLoad(LoadInst* L,
+ SmallVectorImpl<Instruction*> &toErase);
+ bool processBlock(BasicBlock* BB);
+ Value *GetValueForBlock(BasicBlock *BB, Instruction* orig,
+ DenseMap<BasicBlock*, Value*> &Phis,
+ bool top_level = false);
+ void dump(DenseMap<uint32_t, Value*>& d);
+ bool iterateOnFunction(Function &F);
+ Value* CollapsePhi(PHINode* p);
+ bool isSafeReplacement(PHINode* p, Instruction* inst);
+ bool performPRE(Function& F);
+ Value* lookupNumber(BasicBlock* BB, uint32_t num);
+ bool mergeBlockIntoPredecessor(BasicBlock* BB);
+ Value* AttemptRedundancyElimination(Instruction* orig, unsigned valno);
+ void cleanupGlobalSets();
+ void verifyRemoved(const Instruction *I) const;
+ };
+
+ char GVN::ID = 0;
+}
+
+// createGVNPass - The public interface to this file...
+FunctionPass *llvm::createGVNPass() { return new GVN(); }
+
+static RegisterPass<GVN> X("gvn",
+ "Global Value Numbering");
+
+void GVN::dump(DenseMap<uint32_t, Value*>& d) {
+ printf("{\n");
+ for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
+ E = d.end(); I != E; ++I) {
+ printf("%d\n", I->first);
+ I->second->dump();
+ }
+ printf("}\n");
+}
+
+Value* GVN::CollapsePhi(PHINode* p) {
+ Value* constVal = p->hasConstantValue();
+ if (!constVal) return 0;
+
+ Instruction* inst = dyn_cast<Instruction>(constVal);
+ if (!inst)
+ return constVal;
+
+ if (DT->dominates(inst, p))
+ if (isSafeReplacement(p, inst))
+ return inst;
+ return 0;
+}
+
+bool GVN::isSafeReplacement(PHINode* p, Instruction* inst) {
+ if (!isa<PHINode>(inst))
+ return true;
+
+ for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end();
+ UI != E; ++UI)
+ if (PHINode* use_phi = dyn_cast<PHINode>(UI))
+ if (use_phi->getParent() == inst->getParent())
+ return false;
+
+ return true;
+}
+
+/// GetValueForBlock - Get the value to use within the specified basic block.
+/// available values are in Phis.
+Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig,
+ DenseMap<BasicBlock*, Value*> &Phis,
+ bool top_level) {
+
+ // If we have already computed this value, return the previously computed val.
+ DenseMap<BasicBlock*, Value*>::iterator V = Phis.find(BB);
+ if (V != Phis.end() && !top_level) return V->second;
+
+ // If the block is unreachable, just return undef, since this path
+ // can't actually occur at runtime.
+ if (!DT->isReachableFromEntry(BB))
+ return Phis[BB] = UndefValue::get(orig->getType());
+
+ if (BasicBlock *Pred = BB->getSinglePredecessor()) {
+ Value *ret = GetValueForBlock(Pred, orig, Phis);
+ Phis[BB] = ret;
+ return ret;
+ }
+
+ // Get the number of predecessors of this block so we can reserve space later.
+ // If there is already a PHI in it, use the #preds from it, otherwise count.
+ // Getting it from the PHI is constant time.
+ unsigned NumPreds;
+ if (PHINode *ExistingPN = dyn_cast<PHINode>(BB->begin()))
+ NumPreds = ExistingPN->getNumIncomingValues();
+ else
+ NumPreds = std::distance(pred_begin(BB), pred_end(BB));
+
+ // Otherwise, the idom is the loop, so we need to insert a PHI node. Do so
+ // now, then get values to fill in the incoming values for the PHI.
+ PHINode *PN = PHINode::Create(orig->getType(), orig->getName()+".rle",
+ BB->begin());
+ PN->reserveOperandSpace(NumPreds);
+
+ Phis.insert(std::make_pair(BB, PN));
+
+ // Fill in the incoming values for the block.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ Value* val = GetValueForBlock(*PI, orig, Phis);
+ PN->addIncoming(val, *PI);
+ }
+
+ VN.getAliasAnalysis()->copyValue(orig, PN);
+
+ // Attempt to collapse PHI nodes that are trivially redundant
+ Value* v = CollapsePhi(PN);
+ if (!v) {
+ // Cache our phi construction results
+ if (LoadInst* L = dyn_cast<LoadInst>(orig))
+ phiMap[L->getPointerOperand()].insert(PN);
+ else
+ phiMap[orig].insert(PN);
+
+ return PN;
+ }
+
+ PN->replaceAllUsesWith(v);
+ if (isa<PointerType>(v->getType()))
+ MD->invalidateCachedPointerInfo(v);
+
+ for (DenseMap<BasicBlock*, Value*>::iterator I = Phis.begin(),
+ E = Phis.end(); I != E; ++I)
+ if (I->second == PN)
+ I->second = v;
+
+ DEBUG(cerr << "GVN removed: " << *PN);
+ MD->removeInstruction(PN);
+ PN->eraseFromParent();
+ DEBUG(verifyRemoved(PN));
+
+ Phis[BB] = v;
+ return v;
+}
+
+/// IsValueFullyAvailableInBlock - Return true if we can prove that the value
+/// we're analyzing is fully available in the specified block. As we go, keep
+/// track of which blocks we know are fully alive in FullyAvailableBlocks. This
+/// map is actually a tri-state map with the following values:
+/// 0) we know the block *is not* fully available.
+/// 1) we know the block *is* fully available.
+/// 2) we do not know whether the block is fully available or not, but we are
+/// currently speculating that it will be.
+/// 3) we are speculating for this block and have used that to speculate for
+/// other blocks.
+static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
+ DenseMap<BasicBlock*, char> &FullyAvailableBlocks) {
+ // Optimistically assume that the block is fully available and check to see
+ // if we already know about this block in one lookup.
+ std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =
+ FullyAvailableBlocks.insert(std::make_pair(BB, 2));
+
+ // If the entry already existed for this block, return the precomputed value.
+ if (!IV.second) {
+ // If this is a speculative "available" value, mark it as being used for
+ // speculation of other blocks.
+ if (IV.first->second == 2)
+ IV.first->second = 3;
+ return IV.first->second != 0;
+ }
+
+ // Otherwise, see if it is fully available in all predecessors.
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+
+ // If this block has no predecessors, it isn't live-in here.
+ if (PI == PE)
+ goto SpeculationFailure;
+
+ for (; PI != PE; ++PI)
+ // If the value isn't fully available in one of our predecessors, then it
+ // isn't fully available in this block either. Undo our previous
+ // optimistic assumption and bail out.
+ if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
+ goto SpeculationFailure;
+
+ return true;
+
+// SpeculationFailure - If we get here, we found out that this is not, after
+// all, a fully-available block. We have a problem if we speculated on this and
+// used the speculation to mark other blocks as available.
+SpeculationFailure:
+ char &BBVal = FullyAvailableBlocks[BB];
+
+ // If we didn't speculate on this, just return with it set to false.
+ if (BBVal == 2) {
+ BBVal = 0;
+ return false;
+ }
+
+ // If we did speculate on this value, we could have blocks set to 1 that are
+ // incorrect. Walk the (transitive) successors of this block and mark them as
+ // 0 if set to one.
+ SmallVector<BasicBlock*, 32> BBWorklist;
+ BBWorklist.push_back(BB);
+
+ while (!BBWorklist.empty()) {
+ BasicBlock *Entry = BBWorklist.pop_back_val();
+ // Note that this sets blocks to 0 (unavailable) if they happen to not
+ // already be in FullyAvailableBlocks. This is safe.
+ char &EntryVal = FullyAvailableBlocks[Entry];
+ if (EntryVal == 0) continue; // Already unavailable.
+
+ // Mark as unavailable.
+ EntryVal = 0;
+
+ for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I)
+ BBWorklist.push_back(*I);
+ }
+
+ return false;
+}
+
+/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
+/// non-local by performing PHI construction.
+bool GVN::processNonLocalLoad(LoadInst *LI,
+ SmallVectorImpl<Instruction*> &toErase) {
+ // Find the non-local dependencies of the load.
+ SmallVector<MemoryDependenceAnalysis::NonLocalDepEntry, 64> Deps;
+ MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(),
+ Deps);
+ //DEBUG(cerr << "INVESTIGATING NONLOCAL LOAD: " << Deps.size() << *LI);
+
+ // If we had to process more than one hundred blocks to find the
+ // dependencies, this load isn't worth worrying about. Optimizing
+ // it will be too expensive.
+ if (Deps.size() > 100)
+ return false;
+
+ // If we had a phi translation failure, we'll have a single entry which is a
+ // clobber in the current block. Reject this early.
+ if (Deps.size() == 1 && Deps[0].second.isClobber())
+ return false;
+
+ // Filter out useless results (non-locals, etc). Keep track of the blocks
+ // where we have a value available in repl, also keep track of whether we see
+ // dependencies that produce an unknown value for the load (such as a call
+ // that could potentially clobber the load).
+ SmallVector<std::pair<BasicBlock*, Value*>, 16> ValuesPerBlock;
+ SmallVector<BasicBlock*, 16> UnavailableBlocks;
+
+ for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
+ BasicBlock *DepBB = Deps[i].first;
+ MemDepResult DepInfo = Deps[i].second;
+
+ if (DepInfo.isClobber()) {
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+
+ Instruction *DepInst = DepInfo.getInst();
+
+ // Loading the allocation -> undef.
+ if (isa<AllocationInst>(DepInst)) {
+ ValuesPerBlock.push_back(std::make_pair(DepBB,
+ UndefValue::get(LI->getType())));
+ continue;
+ }
+
+ if (StoreInst* S = dyn_cast<StoreInst>(DepInst)) {
+ // Reject loads and stores that are to the same address but are of
+ // different types.
+ // NOTE: 403.gcc does have this case (e.g. in readonly_fields_p) because
+ // of bitfield access, it would be interesting to optimize for it at some
+ // point.
+ if (S->getOperand(0)->getType() != LI->getType()) {
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+
+ ValuesPerBlock.push_back(std::make_pair(DepBB, S->getOperand(0)));
+
+ } else if (LoadInst* LD = dyn_cast<LoadInst>(DepInst)) {
+ if (LD->getType() != LI->getType()) {
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+ ValuesPerBlock.push_back(std::make_pair(DepBB, LD));
+ } else {
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+ }
+
+ // If we have no predecessors that produce a known value for this load, exit
+ // early.
+ if (ValuesPerBlock.empty()) return false;
+
+ // If all of the instructions we depend on produce a known value for this
+ // load, then it is fully redundant and we can use PHI insertion to compute
+ // its value. Insert PHIs and remove the fully redundant value now.
+ if (UnavailableBlocks.empty()) {
+ // Use cached PHI construction information from previous runs
+ SmallPtrSet<Instruction*, 4> &p = phiMap[LI->getPointerOperand()];
+ // FIXME: What does phiMap do? Are we positive it isn't getting invalidated?
+ for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end();
+ I != E; ++I) {
+ if ((*I)->getParent() == LI->getParent()) {
+ DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD #1: " << *LI);
+ LI->replaceAllUsesWith(*I);
+ if (isa<PointerType>((*I)->getType()))
+ MD->invalidateCachedPointerInfo(*I);
+ toErase.push_back(LI);
+ NumGVNLoad++;
+ return true;
+ }
+
+ ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I));
+ }
+
+ DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD: " << *LI);
+
+ DenseMap<BasicBlock*, Value*> BlockReplValues;
+ BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end());
+ // Perform PHI construction.
+ Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true);
+ LI->replaceAllUsesWith(v);
+
+ if (isa<PHINode>(v))
+ v->takeName(LI);
+ if (isa<PointerType>(v->getType()))
+ MD->invalidateCachedPointerInfo(v);
+ toErase.push_back(LI);
+ NumGVNLoad++;
+ return true;
+ }
+
+ if (!EnablePRE || !EnableLoadPRE)
+ return false;
+
+ // Okay, we have *some* definitions of the value. This means that the value
+ // is available in some of our (transitive) predecessors. Lets think about
+ // doing PRE of this load. This will involve inserting a new load into the
+ // predecessor when it's not available. We could do this in general, but
+ // prefer to not increase code size. As such, we only do this when we know
+ // that we only have to insert *one* load (which means we're basically moving
+ // the load, not inserting a new one).
+
+ SmallPtrSet<BasicBlock *, 4> Blockers;
+ for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
+ Blockers.insert(UnavailableBlocks[i]);
+
+ // Lets find first basic block with more than one predecessor. Walk backwards
+ // through predecessors if needed.
+ BasicBlock *LoadBB = LI->getParent();
+ BasicBlock *TmpBB = LoadBB;
+
+ bool isSinglePred = false;
+ while (TmpBB->getSinglePredecessor()) {
+ isSinglePred = true;
+ TmpBB = TmpBB->getSinglePredecessor();
+ if (!TmpBB) // If haven't found any, bail now.
+ return false;
+ if (TmpBB == LoadBB) // Infinite (unreachable) loop.
+ return false;
+ if (Blockers.count(TmpBB))
+ return false;
+ }
+
+ assert(TmpBB);
+ LoadBB = TmpBB;
+
+ // If we have a repl set with LI itself in it, this means we have a loop where
+ // at least one of the values is LI. Since this means that we won't be able
+ // to eliminate LI even if we insert uses in the other predecessors, we will
+ // end up increasing code size. Reject this by scanning for LI.
+ for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
+ if (ValuesPerBlock[i].second == LI)
+ return false;
+
+ if (isSinglePred) {
+ bool isHot = false;
+ for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
+ if (Instruction *I = dyn_cast<Instruction>(ValuesPerBlock[i].second))
+ // "Hot" Instruction is in some loop (because it dominates its dep.
+ // instruction).
+ if (DT->dominates(LI, I)) {
+ isHot = true;
+ break;
+ }
+
+ // We are interested only in "hot" instructions. We don't want to do any
+ // mis-optimizations here.
+ if (!isHot)
+ return false;
+ }
+
+ // Okay, we have some hope :). Check to see if the loaded value is fully
+ // available in all but one predecessor.
+ // FIXME: If we could restructure the CFG, we could make a common pred with
+ // all the preds that don't have an available LI and insert a new load into
+ // that one block.
+ BasicBlock *UnavailablePred = 0;
+
+ DenseMap<BasicBlock*, char> FullyAvailableBlocks;
+ for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
+ FullyAvailableBlocks[ValuesPerBlock[i].first] = true;
+ for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
+ FullyAvailableBlocks[UnavailableBlocks[i]] = false;
+
+ for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
+ PI != E; ++PI) {
+ if (IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
+ continue;
+
+ // If this load is not available in multiple predecessors, reject it.
+ if (UnavailablePred && UnavailablePred != *PI)
+ return false;
+ UnavailablePred = *PI;
+ }
+
+ assert(UnavailablePred != 0 &&
+ "Fully available value should be eliminated above!");
+
+ // If the loaded pointer is PHI node defined in this block, do PHI translation
+ // to get its value in the predecessor.
+ Value *LoadPtr = LI->getOperand(0)->DoPHITranslation(LoadBB, UnavailablePred);
+
+ // Make sure the value is live in the predecessor. If it was defined by a
+ // non-PHI instruction in this block, we don't know how to recompute it above.
+ if (Instruction *LPInst = dyn_cast<Instruction>(LoadPtr))
+ if (!DT->dominates(LPInst->getParent(), UnavailablePred)) {
+ DEBUG(cerr << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: "
+ << *LPInst << *LI << "\n");
+ return false;
+ }
+
+ // We don't currently handle critical edges :(
+ if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) {
+ DEBUG(cerr << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '"
+ << UnavailablePred->getName() << "': " << *LI);
+ return false;
+ }
+
+ // Okay, we can eliminate this load by inserting a reload in the predecessor
+ // and using PHI construction to get the value in the other predecessors, do
+ // it.
+ DEBUG(cerr << "GVN REMOVING PRE LOAD: " << *LI);
+
+ Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
+ LI->getAlignment(),
+ UnavailablePred->getTerminator());
+
+ SmallPtrSet<Instruction*, 4> &p = phiMap[LI->getPointerOperand()];
+ for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end();
+ I != E; ++I)
+ ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I));
+
+ DenseMap<BasicBlock*, Value*> BlockReplValues;
+ BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end());
+ BlockReplValues[UnavailablePred] = NewLoad;
+
+ // Perform PHI construction.
+ Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true);
+ LI->replaceAllUsesWith(v);
+ if (isa<PHINode>(v))
+ v->takeName(LI);
+ if (isa<PointerType>(v->getType()))
+ MD->invalidateCachedPointerInfo(v);
+ toErase.push_back(LI);
+ NumPRELoad++;
+ return true;
+}
+
+/// processLoad - Attempt to eliminate a load, first by eliminating it
+/// locally, and then attempting non-local elimination if that fails.
+bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
+ if (L->isVolatile())
+ return false;
+
+ Value* pointer = L->getPointerOperand();
+
+ // ... to a pointer that has been loaded from before...
+ MemDepResult dep = MD->getDependency(L);
+
+ // If the value isn't available, don't do anything!
+ if (dep.isClobber()) {
+ DEBUG(
+ // fast print dep, using operator<< on instruction would be too slow
+ DOUT << "GVN: load ";
+ WriteAsOperand(*DOUT.stream(), L);
+ Instruction *I = dep.getInst();
+ DOUT << " is clobbered by " << *I;
+ );
+ return false;
+ }
+
+ // If it is defined in another block, try harder.
+ if (dep.isNonLocal())
+ return processNonLocalLoad(L, toErase);
+
+ Instruction *DepInst = dep.getInst();
+ if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
+ // Only forward substitute stores to loads of the same type.
+ // FIXME: Could do better!
+ if (DepSI->getPointerOperand()->getType() != pointer->getType())
+ return false;
+
+ // Remove it!
+ L->replaceAllUsesWith(DepSI->getOperand(0));
+ if (isa<PointerType>(DepSI->getOperand(0)->getType()))
+ MD->invalidateCachedPointerInfo(DepSI->getOperand(0));
+ toErase.push_back(L);
+ NumGVNLoad++;
+ return true;
+ }
+
+ if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
+ // Only forward substitute stores to loads of the same type.
+ // FIXME: Could do better! load i32 -> load i8 -> truncate on little endian.
+ if (DepLI->getType() != L->getType())
+ return false;
+
+ // Remove it!
+ L->replaceAllUsesWith(DepLI);
+ if (isa<PointerType>(DepLI->getType()))
+ MD->invalidateCachedPointerInfo(DepLI);
+ toErase.push_back(L);
+ NumGVNLoad++;
+ return true;
+ }
+
+ // If this load really doesn't depend on anything, then we must be loading an
+ // undef value. This can happen when loading for a fresh allocation with no
+ // intervening stores, for example.
+ if (isa<AllocationInst>(DepInst)) {
+ L->replaceAllUsesWith(UndefValue::get(L->getType()));
+ toErase.push_back(L);
+ NumGVNLoad++;
+ return true;
+ }
+
+ return false;
+}
+
+Value* GVN::lookupNumber(BasicBlock* BB, uint32_t num) {
+ DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB);
+ if (I == localAvail.end())
+ return 0;
+
+ ValueNumberScope* locals = I->second;
+
+ while (locals) {
+ DenseMap<uint32_t, Value*>::iterator I = locals->table.find(num);
+ if (I != locals->table.end())
+ return I->second;
+ else
+ locals = locals->parent;
+ }
+
+ return 0;
+}
+
+/// AttemptRedundancyElimination - If the "fast path" of redundancy elimination
+/// by inheritance from the dominator fails, see if we can perform phi
+/// construction to eliminate the redundancy.
+Value* GVN::AttemptRedundancyElimination(Instruction* orig, unsigned valno) {
+ BasicBlock* BaseBlock = orig->getParent();
+
+ SmallPtrSet<BasicBlock*, 4> Visited;
+ SmallVector<BasicBlock*, 8> Stack;
+ Stack.push_back(BaseBlock);
+
+ DenseMap<BasicBlock*, Value*> Results;
+
+ // Walk backwards through our predecessors, looking for instances of the
+ // value number we're looking for. Instances are recorded in the Results
+ // map, which is then used to perform phi construction.
+ while (!Stack.empty()) {
+ BasicBlock* Current = Stack.back();
+ Stack.pop_back();
+
+ // If we've walked all the way to a proper dominator, then give up. Cases
+ // where the instance is in the dominator will have been caught by the fast
+ // path, and any cases that require phi construction further than this are
+ // probably not worth it anyways. Note that this is a SIGNIFICANT compile
+ // time improvement.
+ if (DT->properlyDominates(Current, orig->getParent())) return 0;
+
+ DenseMap<BasicBlock*, ValueNumberScope*>::iterator LA =
+ localAvail.find(Current);
+ if (LA == localAvail.end()) return 0;
+ DenseMap<uint32_t, Value*>::iterator V = LA->second->table.find(valno);
+
+ if (V != LA->second->table.end()) {
+ // Found an instance, record it.
+ Results.insert(std::make_pair(Current, V->second));
+ continue;
+ }
+
+ // If we reach the beginning of the function, then give up.
+ if (pred_begin(Current) == pred_end(Current))
+ return 0;
+
+ for (pred_iterator PI = pred_begin(Current), PE = pred_end(Current);
+ PI != PE; ++PI)
+ if (Visited.insert(*PI))
+ Stack.push_back(*PI);
+ }
+
+ // If we didn't find instances, give up. Otherwise, perform phi construction.
+ if (Results.size() == 0)
+ return 0;
+ else
+ return GetValueForBlock(BaseBlock, orig, Results, true);
+}
+
+/// processInstruction - When calculating availability, handle an instruction
+/// by inserting it into the appropriate sets
+bool GVN::processInstruction(Instruction *I,
+ SmallVectorImpl<Instruction*> &toErase) {
+ if (LoadInst* L = dyn_cast<LoadInst>(I)) {
+ bool changed = processLoad(L, toErase);
+
+ if (!changed) {
+ unsigned num = VN.lookup_or_add(L);
+ localAvail[I->getParent()]->table.insert(std::make_pair(num, L));
+ }
+
+ return changed;
+ }
+
+ uint32_t nextNum = VN.getNextUnusedValueNumber();
+ unsigned num = VN.lookup_or_add(I);
+
+ if (BranchInst* BI = dyn_cast<BranchInst>(I)) {
+ localAvail[I->getParent()]->table.insert(std::make_pair(num, I));
+
+ if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
+ return false;
+
+ Value* branchCond = BI->getCondition();
+ uint32_t condVN = VN.lookup_or_add(branchCond);
+
+ BasicBlock* trueSucc = BI->getSuccessor(0);
+ BasicBlock* falseSucc = BI->getSuccessor(1);
+
+ if (trueSucc->getSinglePredecessor())
+ localAvail[trueSucc]->table[condVN] = ConstantInt::getTrue();
+ if (falseSucc->getSinglePredecessor())
+ localAvail[falseSucc]->table[condVN] = ConstantInt::getFalse();
+
+ return false;
+
+ // Allocations are always uniquely numbered, so we can save time and memory
+ // by fast failing them.
+ } else if (isa<AllocationInst>(I) || isa<TerminatorInst>(I)) {
+ localAvail[I->getParent()]->table.insert(std::make_pair(num, I));
+ return false;
+ }
+
+ // Collapse PHI nodes
+ if (PHINode* p = dyn_cast<PHINode>(I)) {
+ Value* constVal = CollapsePhi(p);
+
+ if (constVal) {
+ for (PhiMapType::iterator PI = phiMap.begin(), PE = phiMap.end();
+ PI != PE; ++PI)
+ PI->second.erase(p);
+
+ p->replaceAllUsesWith(constVal);
+ if (isa<PointerType>(constVal->getType()))
+ MD->invalidateCachedPointerInfo(constVal);
+ VN.erase(p);
+
+ toErase.push_back(p);
+ } else {
+ localAvail[I->getParent()]->table.insert(std::make_pair(num, I));
+ }
+
+ // If the number we were assigned was a brand new VN, then we don't
+ // need to do a lookup to see if the number already exists
+ // somewhere in the domtree: it can't!
+ } else if (num == nextNum) {
+ localAvail[I->getParent()]->table.insert(std::make_pair(num, I));
+
+ // Perform fast-path value-number based elimination of values inherited from
+ // dominators.
+ } else if (Value* repl = lookupNumber(I->getParent(), num)) {
+ // Remove it!
+ VN.erase(I);
+ I->replaceAllUsesWith(repl);
+ if (isa<PointerType>(repl->getType()))
+ MD->invalidateCachedPointerInfo(repl);
+ toErase.push_back(I);
+ return true;
+
+#if 0
+ // Perform slow-pathvalue-number based elimination with phi construction.
+ } else if (Value* repl = AttemptRedundancyElimination(I, num)) {
+ // Remove it!
+ VN.erase(I);
+ I->replaceAllUsesWith(repl);
+ if (isa<PointerType>(repl->getType()))
+ MD->invalidateCachedPointerInfo(repl);
+ toErase.push_back(I);
+ return true;
+#endif
+ } else {
+ localAvail[I->getParent()]->table.insert(std::make_pair(num, I));
+ }
+
+ return false;
+}
+
+/// runOnFunction - This is the main transformation entry point for a function.
+bool GVN::runOnFunction(Function& F) {
+ MD = &getAnalysis<MemoryDependenceAnalysis>();
+ DT = &getAnalysis<DominatorTree>();
+ VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
+ VN.setMemDep(MD);
+ VN.setDomTree(DT);
+
+ bool changed = false;
+ bool shouldContinue = true;
+
+ // Merge unconditional branches, allowing PRE to catch more
+ // optimization opportunities.
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
+ BasicBlock* BB = FI;
+ ++FI;
+ bool removedBlock = MergeBlockIntoPredecessor(BB, this);
+ if (removedBlock) NumGVNBlocks++;
+
+ changed |= removedBlock;
+ }
+
+ unsigned Iteration = 0;
+
+ while (shouldContinue) {
+ DEBUG(cerr << "GVN iteration: " << Iteration << "\n");
+ shouldContinue = iterateOnFunction(F);
+ changed |= shouldContinue;
+ ++Iteration;
+ }
+
+ if (EnablePRE) {
+ bool PREChanged = true;
+ while (PREChanged) {
+ PREChanged = performPRE(F);
+ changed |= PREChanged;
+ }
+ }
+ // FIXME: Should perform GVN again after PRE does something. PRE can move
+ // computations into blocks where they become fully redundant. Note that
+ // we can't do this until PRE's critical edge splitting updates memdep.
+ // Actually, when this happens, we should just fully integrate PRE into GVN.
+
+ cleanupGlobalSets();
+
+ return changed;
+}
+
+
+bool GVN::processBlock(BasicBlock* BB) {
+ // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and
+ // incrementing BI before processing an instruction).
+ SmallVector<Instruction*, 8> toErase;
+ bool changed_function = false;
+
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+ BI != BE;) {
+ changed_function |= processInstruction(BI, toErase);
+ if (toErase.empty()) {
+ ++BI;
+ continue;
+ }
+
+ // If we need some instructions deleted, do it now.
+ NumGVNInstr += toErase.size();
+
+ // Avoid iterator invalidation.
+ bool AtStart = BI == BB->begin();
+ if (!AtStart)
+ --BI;
+
+ for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
+ E = toErase.end(); I != E; ++I) {
+ DEBUG(cerr << "GVN removed: " << **I);
+ MD->removeInstruction(*I);
+ (*I)->eraseFromParent();
+ DEBUG(verifyRemoved(*I));
+ }
+ toErase.clear();
+
+ if (AtStart)
+ BI = BB->begin();
+ else
+ ++BI;
+ }
+
+ return changed_function;
+}
+
+/// performPRE - Perform a purely local form of PRE that looks for diamond
+/// control flow patterns and attempts to perform simple PRE at the join point.
+bool GVN::performPRE(Function& F) {
+ bool Changed = false;
+ SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
+ DenseMap<BasicBlock*, Value*> predMap;
+ for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
+ DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
+ BasicBlock* CurrentBlock = *DI;
+
+ // Nothing to PRE in the entry block.
+ if (CurrentBlock == &F.getEntryBlock()) continue;
+
+ for (BasicBlock::iterator BI = CurrentBlock->begin(),
+ BE = CurrentBlock->end(); BI != BE; ) {
+ Instruction *CurInst = BI++;
+
+ if (isa<AllocationInst>(CurInst) || isa<TerminatorInst>(CurInst) ||
+ isa<PHINode>(CurInst) || (CurInst->getType() == Type::VoidTy) ||
+ CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
+ isa<DbgInfoIntrinsic>(CurInst))
+ continue;
+
+ uint32_t valno = VN.lookup(CurInst);
+
+ // Look for the predecessors for PRE opportunities. We're
+ // only trying to solve the basic diamond case, where
+ // a value is computed in the successor and one predecessor,
+ // but not the other. We also explicitly disallow cases
+ // where the successor is its own predecessor, because they're
+ // more complicated to get right.
+ unsigned numWith = 0;
+ unsigned numWithout = 0;
+ BasicBlock* PREPred = 0;
+ predMap.clear();
+
+ for (pred_iterator PI = pred_begin(CurrentBlock),
+ PE = pred_end(CurrentBlock); PI != PE; ++PI) {
+ // We're not interested in PRE where the block is its
+ // own predecessor, on in blocks with predecessors
+ // that are not reachable.
+ if (*PI == CurrentBlock) {
+ numWithout = 2;
+ break;
+ } else if (!localAvail.count(*PI)) {
+ numWithout = 2;
+ break;
+ }
+
+ DenseMap<uint32_t, Value*>::iterator predV =
+ localAvail[*PI]->table.find(valno);
+ if (predV == localAvail[*PI]->table.end()) {
+ PREPred = *PI;
+ numWithout++;
+ } else if (predV->second == CurInst) {
+ numWithout = 2;
+ } else {
+ predMap[*PI] = predV->second;
+ numWith++;
+ }
+ }
+
+ // Don't do PRE when it might increase code size, i.e. when
+ // we would need to insert instructions in more than one pred.
+ if (numWithout != 1 || numWith == 0)
+ continue;
+
+ // We can't do PRE safely on a critical edge, so instead we schedule
+ // the edge to be split and perform the PRE the next time we iterate
+ // on the function.
+ unsigned succNum = 0;
+ for (unsigned i = 0, e = PREPred->getTerminator()->getNumSuccessors();
+ i != e; ++i)
+ if (PREPred->getTerminator()->getSuccessor(i) == CurrentBlock) {
+ succNum = i;
+ break;
+ }
+
+ if (isCriticalEdge(PREPred->getTerminator(), succNum)) {
+ toSplit.push_back(std::make_pair(PREPred->getTerminator(), succNum));
+ continue;
+ }
+
+ // Instantiate the expression the in predecessor that lacked it.
+ // Because we are going top-down through the block, all value numbers
+ // will be available in the predecessor by the time we need them. Any
+ // that weren't original present will have been instantiated earlier
+ // in this loop.
+ Instruction* PREInstr = CurInst->clone();
+ bool success = true;
+ for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
+ Value *Op = PREInstr->getOperand(i);
+ if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
+ continue;
+
+ if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) {
+ PREInstr->setOperand(i, V);
+ } else {
+ success = false;
+ break;
+ }
+ }
+
+ // Fail out if we encounter an operand that is not available in
+ // the PRE predecessor. This is typically because of loads which
+ // are not value numbered precisely.
+ if (!success) {
+ delete PREInstr;
+ DEBUG(verifyRemoved(PREInstr));
+ continue;
+ }
+
+ PREInstr->insertBefore(PREPred->getTerminator());
+ PREInstr->setName(CurInst->getName() + ".pre");
+ predMap[PREPred] = PREInstr;
+ VN.add(PREInstr, valno);
+ NumGVNPRE++;
+
+ // Update the availability map to include the new instruction.
+ localAvail[PREPred]->table.insert(std::make_pair(valno, PREInstr));
+
+ // Create a PHI to make the value available in this block.
+ PHINode* Phi = PHINode::Create(CurInst->getType(),
+ CurInst->getName() + ".pre-phi",
+ CurrentBlock->begin());
+ for (pred_iterator PI = pred_begin(CurrentBlock),
+ PE = pred_end(CurrentBlock); PI != PE; ++PI)
+ Phi->addIncoming(predMap[*PI], *PI);
+
+ VN.add(Phi, valno);
+ localAvail[CurrentBlock]->table[valno] = Phi;
+
+ CurInst->replaceAllUsesWith(Phi);
+ if (isa<PointerType>(Phi->getType()))
+ MD->invalidateCachedPointerInfo(Phi);
+ VN.erase(CurInst);
+
+ DEBUG(cerr << "GVN PRE removed: " << *CurInst);
+ MD->removeInstruction(CurInst);
+ CurInst->eraseFromParent();
+ DEBUG(verifyRemoved(CurInst));
+ Changed = true;
+ }
+ }
+
+ for (SmallVector<std::pair<TerminatorInst*, unsigned>, 4>::iterator
+ I = toSplit.begin(), E = toSplit.end(); I != E; ++I)
+ SplitCriticalEdge(I->first, I->second, this);
+
+ return Changed || toSplit.size();
+}
+
+/// iterateOnFunction - Executes one iteration of GVN
+bool GVN::iterateOnFunction(Function &F) {
+ cleanupGlobalSets();
+
+ for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
+ DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
+ if (DI->getIDom())
+ localAvail[DI->getBlock()] =
+ new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]);
+ else
+ localAvail[DI->getBlock()] = new ValueNumberScope(0);
+ }
+
+ // Top-down walk of the dominator tree
+ bool changed = false;
+#if 0
+ // Needed for value numbering with phi construction to work.
+ ReversePostOrderTraversal<Function*> RPOT(&F);
+ for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(),
+ RE = RPOT.end(); RI != RE; ++RI)
+ changed |= processBlock(*RI);
+#else
+ for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
+ DE = df_end(DT->getRootNode()); DI != DE; ++DI)
+ changed |= processBlock(DI->getBlock());
+#endif
+
+ return changed;
+}
+
+void GVN::cleanupGlobalSets() {
+ VN.clear();
+ phiMap.clear();
+
+ for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
+ I = localAvail.begin(), E = localAvail.end(); I != E; ++I)
+ delete I->second;
+ localAvail.clear();
+}
+
+/// verifyRemoved - Verify that the specified instruction does not occur in our
+/// internal data structures.
+void GVN::verifyRemoved(const Instruction *Inst) const {
+ VN.verifyRemoved(Inst);
+
+ // Walk through the PHI map to make sure the instruction isn't hiding in there
+ // somewhere.
+ for (PhiMapType::iterator
+ I = phiMap.begin(), E = phiMap.end(); I != E; ++I) {
+ assert(I->first != Inst && "Inst is still a key in PHI map!");
+
+ for (SmallPtrSet<Instruction*, 4>::iterator
+ II = I->second.begin(), IE = I->second.end(); II != IE; ++II) {
+ assert(*II != Inst && "Inst is still a value in PHI map!");
+ }
+ }
+
+ // Walk through the value number scope to make sure the instruction isn't
+ // ferreted away in it.
+ for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
+ I = localAvail.begin(), E = localAvail.end(); I != E; ++I) {
+ const ValueNumberScope *VNS = I->second;
+
+ while (VNS) {
+ for (DenseMap<uint32_t, Value*>::iterator
+ II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) {
+ assert(II->second != Inst && "Inst still in value numbering scope!");
+ }
+
+ VNS = VNS->parent;
+ }
+ }
+}
diff --git a/lib/Transforms/Scalar/GVNPRE.cpp b/lib/Transforms/Scalar/GVNPRE.cpp
new file mode 100644
index 0000000..e3b0937
--- /dev/null
+++ b/lib/Transforms/Scalar/GVNPRE.cpp
@@ -0,0 +1,1885 @@
+//===- GVNPRE.cpp - Eliminate redundant values and expressions ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs a hybrid of global value numbering and partial redundancy
+// elimination, known as GVN-PRE. It performs partial redundancy elimination on
+// values, rather than lexical expressions, allowing a more comprehensive view
+// the optimization. It replaces redundant values with uses of earlier
+// occurences of the same value. While this is beneficial in that it eliminates
+// unneeded computation, it also increases register pressure by creating large
+// live ranges, and should be used with caution on platforms that are very
+// sensitive to register pressure.
+//
+// Note that this pass does the value numbering itself, it does not use the
+// ValueNumbering analysis passes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "gvnpre"
+#include "llvm/Value.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include <algorithm>
+#include <deque>
+#include <map>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// ValueTable Class
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// This class holds the mapping between values and value numbers. It is used
+/// as an efficient mechanism to determine the expression-wise equivalence of
+/// two values.
+
+struct Expression {
+ enum ExpressionOpcode { ADD, SUB, MUL, UDIV, SDIV, FDIV, UREM, SREM,
+ FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ,
+ ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,
+ ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,
+ FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,
+ FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,
+ FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
+ SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI,
+ FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT,
+ PTRTOINT, INTTOPTR, BITCAST, GEP, EMPTY,
+ TOMBSTONE };
+
+ ExpressionOpcode opcode;
+ const Type* type;
+ uint32_t firstVN;
+ uint32_t secondVN;
+ uint32_t thirdVN;
+ SmallVector<uint32_t, 4> varargs;
+
+ Expression() { }
+ explicit Expression(ExpressionOpcode o) : opcode(o) { }
+
+ bool operator==(const Expression &other) const {
+ if (opcode != other.opcode)
+ return false;
+ else if (opcode == EMPTY || opcode == TOMBSTONE)
+ return true;
+ else if (type != other.type)
+ return false;
+ else if (firstVN != other.firstVN)
+ return false;
+ else if (secondVN != other.secondVN)
+ return false;
+ else if (thirdVN != other.thirdVN)
+ return false;
+ else {
+ if (varargs.size() != other.varargs.size())
+ return false;
+
+ for (size_t i = 0; i < varargs.size(); ++i)
+ if (varargs[i] != other.varargs[i])
+ return false;
+
+ return true;
+ }
+ }
+
+ bool operator!=(const Expression &other) const {
+ if (opcode != other.opcode)
+ return true;
+ else if (opcode == EMPTY || opcode == TOMBSTONE)
+ return false;
+ else if (type != other.type)
+ return true;
+ else if (firstVN != other.firstVN)
+ return true;
+ else if (secondVN != other.secondVN)
+ return true;
+ else if (thirdVN != other.thirdVN)
+ return true;
+ else {
+ if (varargs.size() != other.varargs.size())
+ return true;
+
+ for (size_t i = 0; i < varargs.size(); ++i)
+ if (varargs[i] != other.varargs[i])
+ return true;
+
+ return false;
+ }
+ }
+};
+
+}
+
+namespace {
+ class VISIBILITY_HIDDEN ValueTable {
+ private:
+ DenseMap<Value*, uint32_t> valueNumbering;
+ DenseMap<Expression, uint32_t> expressionNumbering;
+
+ uint32_t nextValueNumber;
+
+ Expression::ExpressionOpcode getOpcode(BinaryOperator* BO);
+ Expression::ExpressionOpcode getOpcode(CmpInst* C);
+ Expression::ExpressionOpcode getOpcode(CastInst* C);
+ Expression create_expression(BinaryOperator* BO);
+ Expression create_expression(CmpInst* C);
+ Expression create_expression(ShuffleVectorInst* V);
+ Expression create_expression(ExtractElementInst* C);
+ Expression create_expression(InsertElementInst* V);
+ Expression create_expression(SelectInst* V);
+ Expression create_expression(CastInst* C);
+ Expression create_expression(GetElementPtrInst* G);
+ public:
+ ValueTable() { nextValueNumber = 1; }
+ uint32_t lookup_or_add(Value* V);
+ uint32_t lookup(Value* V) const;
+ void add(Value* V, uint32_t num);
+ void clear();
+ void erase(Value* v);
+ unsigned size();
+ };
+}
+
+namespace llvm {
+template <> struct DenseMapInfo<Expression> {
+ static inline Expression getEmptyKey() {
+ return Expression(Expression::EMPTY);
+ }
+
+ static inline Expression getTombstoneKey() {
+ return Expression(Expression::TOMBSTONE);
+ }
+
+ static unsigned getHashValue(const Expression e) {
+ unsigned hash = e.opcode;
+
+ hash = e.firstVN + hash * 37;
+ hash = e.secondVN + hash * 37;
+ hash = e.thirdVN + hash * 37;
+
+ hash = ((unsigned)((uintptr_t)e.type >> 4) ^
+ (unsigned)((uintptr_t)e.type >> 9)) +
+ hash * 37;
+
+ for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
+ E = e.varargs.end(); I != E; ++I)
+ hash = *I + hash * 37;
+
+ return hash;
+ }
+ static bool isEqual(const Expression &LHS, const Expression &RHS) {
+ return LHS == RHS;
+ }
+ static bool isPod() { return true; }
+};
+}
+
+//===----------------------------------------------------------------------===//
+// ValueTable Internal Functions
+//===----------------------------------------------------------------------===//
+Expression::ExpressionOpcode
+ ValueTable::getOpcode(BinaryOperator* BO) {
+ switch(BO->getOpcode()) {
+ case Instruction::Add:
+ return Expression::ADD;
+ case Instruction::Sub:
+ return Expression::SUB;
+ case Instruction::Mul:
+ return Expression::MUL;
+ case Instruction::UDiv:
+ return Expression::UDIV;
+ case Instruction::SDiv:
+ return Expression::SDIV;
+ case Instruction::FDiv:
+ return Expression::FDIV;
+ case Instruction::URem:
+ return Expression::UREM;
+ case Instruction::SRem:
+ return Expression::SREM;
+ case Instruction::FRem:
+ return Expression::FREM;
+ case Instruction::Shl:
+ return Expression::SHL;
+ case Instruction::LShr:
+ return Expression::LSHR;
+ case Instruction::AShr:
+ return Expression::ASHR;
+ case Instruction::And:
+ return Expression::AND;
+ case Instruction::Or:
+ return Expression::OR;
+ case Instruction::Xor:
+ return Expression::XOR;
+
+ // THIS SHOULD NEVER HAPPEN
+ default:
+ assert(0 && "Binary operator with unknown opcode?");
+ return Expression::ADD;
+ }
+}
+
+Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
+ if (C->getOpcode() == Instruction::ICmp) {
+ switch (C->getPredicate()) {
+ case ICmpInst::ICMP_EQ:
+ return Expression::ICMPEQ;
+ case ICmpInst::ICMP_NE:
+ return Expression::ICMPNE;
+ case ICmpInst::ICMP_UGT:
+ return Expression::ICMPUGT;
+ case ICmpInst::ICMP_UGE:
+ return Expression::ICMPUGE;
+ case ICmpInst::ICMP_ULT:
+ return Expression::ICMPULT;
+ case ICmpInst::ICMP_ULE:
+ return Expression::ICMPULE;
+ case ICmpInst::ICMP_SGT:
+ return Expression::ICMPSGT;
+ case ICmpInst::ICMP_SGE:
+ return Expression::ICMPSGE;
+ case ICmpInst::ICMP_SLT:
+ return Expression::ICMPSLT;
+ case ICmpInst::ICMP_SLE:
+ return Expression::ICMPSLE;
+
+ // THIS SHOULD NEVER HAPPEN
+ default:
+ assert(0 && "Comparison with unknown predicate?");
+ return Expression::ICMPEQ;
+ }
+ } else {
+ switch (C->getPredicate()) {
+ case FCmpInst::FCMP_OEQ:
+ return Expression::FCMPOEQ;
+ case FCmpInst::FCMP_OGT:
+ return Expression::FCMPOGT;
+ case FCmpInst::FCMP_OGE:
+ return Expression::FCMPOGE;
+ case FCmpInst::FCMP_OLT:
+ return Expression::FCMPOLT;
+ case FCmpInst::FCMP_OLE:
+ return Expression::FCMPOLE;
+ case FCmpInst::FCMP_ONE:
+ return Expression::FCMPONE;
+ case FCmpInst::FCMP_ORD:
+ return Expression::FCMPORD;
+ case FCmpInst::FCMP_UNO:
+ return Expression::FCMPUNO;
+ case FCmpInst::FCMP_UEQ:
+ return Expression::FCMPUEQ;
+ case FCmpInst::FCMP_UGT:
+ return Expression::FCMPUGT;
+ case FCmpInst::FCMP_UGE:
+ return Expression::FCMPUGE;
+ case FCmpInst::FCMP_ULT:
+ return Expression::FCMPULT;
+ case FCmpInst::FCMP_ULE:
+ return Expression::FCMPULE;
+ case FCmpInst::FCMP_UNE:
+ return Expression::FCMPUNE;
+
+ // THIS SHOULD NEVER HAPPEN
+ default:
+ assert(0 && "Comparison with unknown predicate?");
+ return Expression::FCMPOEQ;
+ }
+ }
+}
+
+Expression::ExpressionOpcode
+ ValueTable::getOpcode(CastInst* C) {
+ switch(C->getOpcode()) {
+ case Instruction::Trunc:
+ return Expression::TRUNC;
+ case Instruction::ZExt:
+ return Expression::ZEXT;
+ case Instruction::SExt:
+ return Expression::SEXT;
+ case Instruction::FPToUI:
+ return Expression::FPTOUI;
+ case Instruction::FPToSI:
+ return Expression::FPTOSI;
+ case Instruction::UIToFP:
+ return Expression::UITOFP;
+ case Instruction::SIToFP:
+ return Expression::SITOFP;
+ case Instruction::FPTrunc:
+ return Expression::FPTRUNC;
+ case Instruction::FPExt:
+ return Expression::FPEXT;
+ case Instruction::PtrToInt:
+ return Expression::PTRTOINT;
+ case Instruction::IntToPtr:
+ return Expression::INTTOPTR;
+ case Instruction::BitCast:
+ return Expression::BITCAST;
+
+ // THIS SHOULD NEVER HAPPEN
+ default:
+ assert(0 && "Cast operator with unknown opcode?");
+ return Expression::BITCAST;
+ }
+}
+
+Expression ValueTable::create_expression(BinaryOperator* BO) {
+ Expression e;
+
+ e.firstVN = lookup_or_add(BO->getOperand(0));
+ e.secondVN = lookup_or_add(BO->getOperand(1));
+ e.thirdVN = 0;
+ e.type = BO->getType();
+ e.opcode = getOpcode(BO);
+
+ return e;
+}
+
+Expression ValueTable::create_expression(CmpInst* C) {
+ Expression e;
+
+ e.firstVN = lookup_or_add(C->getOperand(0));
+ e.secondVN = lookup_or_add(C->getOperand(1));
+ e.thirdVN = 0;
+ e.type = C->getType();
+ e.opcode = getOpcode(C);
+
+ return e;
+}
+
+Expression ValueTable::create_expression(CastInst* C) {
+ Expression e;
+
+ e.firstVN = lookup_or_add(C->getOperand(0));
+ e.secondVN = 0;
+ e.thirdVN = 0;
+ e.type = C->getType();
+ e.opcode = getOpcode(C);
+
+ return e;
+}
+
+Expression ValueTable::create_expression(ShuffleVectorInst* S) {
+ Expression e;
+
+ e.firstVN = lookup_or_add(S->getOperand(0));
+ e.secondVN = lookup_or_add(S->getOperand(1));
+ e.thirdVN = lookup_or_add(S->getOperand(2));
+ e.type = S->getType();
+ e.opcode = Expression::SHUFFLE;
+
+ return e;
+}
+
+Expression ValueTable::create_expression(ExtractElementInst* E) {
+ Expression e;
+
+ e.firstVN = lookup_or_add(E->getOperand(0));
+ e.secondVN = lookup_or_add(E->getOperand(1));
+ e.thirdVN = 0;
+ e.type = E->getType();
+ e.opcode = Expression::EXTRACT;
+
+ return e;
+}
+
+Expression ValueTable::create_expression(InsertElementInst* I) {
+ Expression e;
+
+ e.firstVN = lookup_or_add(I->getOperand(0));
+ e.secondVN = lookup_or_add(I->getOperand(1));
+ e.thirdVN = lookup_or_add(I->getOperand(2));
+ e.type = I->getType();
+ e.opcode = Expression::INSERT;
+
+ return e;
+}
+
+Expression ValueTable::create_expression(SelectInst* I) {
+ Expression e;
+
+ e.firstVN = lookup_or_add(I->getCondition());
+ e.secondVN = lookup_or_add(I->getTrueValue());
+ e.thirdVN = lookup_or_add(I->getFalseValue());
+ e.type = I->getType();
+ e.opcode = Expression::SELECT;
+
+ return e;
+}
+
+Expression ValueTable::create_expression(GetElementPtrInst* G) {
+ Expression e;
+
+ e.firstVN = lookup_or_add(G->getPointerOperand());
+ e.secondVN = 0;
+ e.thirdVN = 0;
+ e.type = G->getType();
+ e.opcode = Expression::GEP;
+
+ for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
+ I != E; ++I)
+ e.varargs.push_back(lookup_or_add(*I));
+
+ return e;
+}
+
+//===----------------------------------------------------------------------===//
+// ValueTable External Functions
+//===----------------------------------------------------------------------===//
+
+/// lookup_or_add - Returns the value number for the specified value, assigning
+/// it a new number if it did not have one before.
+uint32_t ValueTable::lookup_or_add(Value* V) {
+ DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
+ if (VI != valueNumbering.end())
+ return VI->second;
+
+
+ if (BinaryOperator* BO = dyn_cast<BinaryOperator>(V)) {
+ Expression e = create_expression(BO);
+
+ DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
+ if (EI != expressionNumbering.end()) {
+ valueNumbering.insert(std::make_pair(V, EI->second));
+ return EI->second;
+ } else {
+ expressionNumbering.insert(std::make_pair(e, nextValueNumber));
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+
+ return nextValueNumber++;
+ }
+ } else if (CmpInst* C = dyn_cast<CmpInst>(V)) {
+ Expression e = create_expression(C);
+
+ DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
+ if (EI != expressionNumbering.end()) {
+ valueNumbering.insert(std::make_pair(V, EI->second));
+ return EI->second;
+ } else {
+ expressionNumbering.insert(std::make_pair(e, nextValueNumber));
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+
+ return nextValueNumber++;
+ }
+ } else if (ShuffleVectorInst* U = dyn_cast<ShuffleVectorInst>(V)) {
+ Expression e = create_expression(U);
+
+ DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
+ if (EI != expressionNumbering.end()) {
+ valueNumbering.insert(std::make_pair(V, EI->second));
+ return EI->second;
+ } else {
+ expressionNumbering.insert(std::make_pair(e, nextValueNumber));
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+
+ return nextValueNumber++;
+ }
+ } else if (ExtractElementInst* U = dyn_cast<ExtractElementInst>(V)) {
+ Expression e = create_expression(U);
+
+ DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
+ if (EI != expressionNumbering.end()) {
+ valueNumbering.insert(std::make_pair(V, EI->second));
+ return EI->second;
+ } else {
+ expressionNumbering.insert(std::make_pair(e, nextValueNumber));
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+
+ return nextValueNumber++;
+ }
+ } else if (InsertElementInst* U = dyn_cast<InsertElementInst>(V)) {
+ Expression e = create_expression(U);
+
+ DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
+ if (EI != expressionNumbering.end()) {
+ valueNumbering.insert(std::make_pair(V, EI->second));
+ return EI->second;
+ } else {
+ expressionNumbering.insert(std::make_pair(e, nextValueNumber));
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+
+ return nextValueNumber++;
+ }
+ } else if (SelectInst* U = dyn_cast<SelectInst>(V)) {
+ Expression e = create_expression(U);
+
+ DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
+ if (EI != expressionNumbering.end()) {
+ valueNumbering.insert(std::make_pair(V, EI->second));
+ return EI->second;
+ } else {
+ expressionNumbering.insert(std::make_pair(e, nextValueNumber));
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+
+ return nextValueNumber++;
+ }
+ } else if (CastInst* U = dyn_cast<CastInst>(V)) {
+ Expression e = create_expression(U);
+
+ DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
+ if (EI != expressionNumbering.end()) {
+ valueNumbering.insert(std::make_pair(V, EI->second));
+ return EI->second;
+ } else {
+ expressionNumbering.insert(std::make_pair(e, nextValueNumber));
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+
+ return nextValueNumber++;
+ }
+ } else if (GetElementPtrInst* U = dyn_cast<GetElementPtrInst>(V)) {
+ Expression e = create_expression(U);
+
+ DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
+ if (EI != expressionNumbering.end()) {
+ valueNumbering.insert(std::make_pair(V, EI->second));
+ return EI->second;
+ } else {
+ expressionNumbering.insert(std::make_pair(e, nextValueNumber));
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+
+ return nextValueNumber++;
+ }
+ } else {
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ return nextValueNumber++;
+ }
+}
+
+/// lookup - Returns the value number of the specified value. Fails if
+/// the value has not yet been numbered.
+uint32_t ValueTable::lookup(Value* V) const {
+ DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
+ if (VI != valueNumbering.end())
+ return VI->second;
+ else
+ assert(0 && "Value not numbered?");
+
+ return 0;
+}
+
+/// add - Add the specified value with the given value number, removing
+/// its old number, if any
+void ValueTable::add(Value* V, uint32_t num) {
+ DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
+ if (VI != valueNumbering.end())
+ valueNumbering.erase(VI);
+ valueNumbering.insert(std::make_pair(V, num));
+}
+
+/// clear - Remove all entries from the ValueTable
+void ValueTable::clear() {
+ valueNumbering.clear();
+ expressionNumbering.clear();
+ nextValueNumber = 1;
+}
+
+/// erase - Remove a value from the value numbering
+void ValueTable::erase(Value* V) {
+ valueNumbering.erase(V);
+}
+
+/// size - Return the number of assigned value numbers
+unsigned ValueTable::size() {
+ // NOTE: zero is never assigned
+ return nextValueNumber;
+}
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// ValueNumberedSet Class
+//===----------------------------------------------------------------------===//
+
+class ValueNumberedSet {
+ private:
+ SmallPtrSet<Value*, 8> contents;
+ BitVector numbers;
+ public:
+ ValueNumberedSet() { numbers.resize(1); }
+ ValueNumberedSet(const ValueNumberedSet& other) {
+ numbers = other.numbers;
+ contents = other.contents;
+ }
+
+ typedef SmallPtrSet<Value*, 8>::iterator iterator;
+
+ iterator begin() { return contents.begin(); }
+ iterator end() { return contents.end(); }
+
+ bool insert(Value* v) { return contents.insert(v); }
+ void insert(iterator I, iterator E) { contents.insert(I, E); }
+ void erase(Value* v) { contents.erase(v); }
+ unsigned count(Value* v) { return contents.count(v); }
+ size_t size() { return contents.size(); }
+
+ void set(unsigned i) {
+ if (i >= numbers.size())
+ numbers.resize(i+1);
+
+ numbers.set(i);
+ }
+
+ void operator=(const ValueNumberedSet& other) {
+ contents = other.contents;
+ numbers = other.numbers;
+ }
+
+ void reset(unsigned i) {
+ if (i < numbers.size())
+ numbers.reset(i);
+ }
+
+ bool test(unsigned i) {
+ if (i >= numbers.size())
+ return false;
+
+ return numbers.test(i);
+ }
+
+ void clear() {
+ contents.clear();
+ numbers.clear();
+ }
+};
+
+}
+
+//===----------------------------------------------------------------------===//
+// GVNPRE Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+ class VISIBILITY_HIDDEN GVNPRE : public FunctionPass {
+ bool runOnFunction(Function &F);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ GVNPRE() : FunctionPass(&ID) {}
+
+ private:
+ ValueTable VN;
+ SmallVector<Instruction*, 8> createdExpressions;
+
+ DenseMap<BasicBlock*, ValueNumberedSet> availableOut;
+ DenseMap<BasicBlock*, ValueNumberedSet> anticipatedIn;
+ DenseMap<BasicBlock*, ValueNumberedSet> generatedPhis;
+
+ // This transformation requires dominator postdominator info
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequiredID(BreakCriticalEdgesID);
+ AU.addRequired<UnifyFunctionExitNodes>();
+ AU.addRequired<DominatorTree>();
+ }
+
+ // Helper fuctions
+ // FIXME: eliminate or document these better
+ void dump(ValueNumberedSet& s) const ;
+ void clean(ValueNumberedSet& set) ;
+ Value* find_leader(ValueNumberedSet& vals, uint32_t v) ;
+ Value* phi_translate(Value* V, BasicBlock* pred, BasicBlock* succ) ;
+ void phi_translate_set(ValueNumberedSet& anticIn, BasicBlock* pred,
+ BasicBlock* succ, ValueNumberedSet& out) ;
+
+ void topo_sort(ValueNumberedSet& set,
+ SmallVector<Value*, 8>& vec) ;
+
+ void cleanup() ;
+ bool elimination() ;
+
+ void val_insert(ValueNumberedSet& s, Value* v) ;
+ void val_replace(ValueNumberedSet& s, Value* v) ;
+ bool dependsOnInvoke(Value* V) ;
+ void buildsets_availout(BasicBlock::iterator I,
+ ValueNumberedSet& currAvail,
+ ValueNumberedSet& currPhis,
+ ValueNumberedSet& currExps,
+ SmallPtrSet<Value*, 16>& currTemps);
+ bool buildsets_anticout(BasicBlock* BB,
+ ValueNumberedSet& anticOut,
+ SmallPtrSet<BasicBlock*, 8>& visited);
+ unsigned buildsets_anticin(BasicBlock* BB,
+ ValueNumberedSet& anticOut,
+ ValueNumberedSet& currExps,
+ SmallPtrSet<Value*, 16>& currTemps,
+ SmallPtrSet<BasicBlock*, 8>& visited);
+ void buildsets(Function& F) ;
+
+ void insertion_pre(Value* e, BasicBlock* BB,
+ DenseMap<BasicBlock*, Value*>& avail,
+ std::map<BasicBlock*,ValueNumberedSet>& new_set);
+ unsigned insertion_mergepoint(SmallVector<Value*, 8>& workList,
+ df_iterator<DomTreeNode*>& D,
+ std::map<BasicBlock*, ValueNumberedSet>& new_set);
+ bool insertion(Function& F) ;
+
+ };
+
+ char GVNPRE::ID = 0;
+
+}
+
+// createGVNPREPass - The public interface to this file...
+FunctionPass *llvm::createGVNPREPass() { return new GVNPRE(); }
+
+static RegisterPass<GVNPRE> X("gvnpre",
+ "Global Value Numbering/Partial Redundancy Elimination");
+
+
+STATISTIC(NumInsertedVals, "Number of values inserted");
+STATISTIC(NumInsertedPhis, "Number of PHI nodes inserted");
+STATISTIC(NumEliminated, "Number of redundant instructions eliminated");
+
+/// find_leader - Given a set and a value number, return the first
+/// element of the set with that value number, or 0 if no such element
+/// is present
+Value* GVNPRE::find_leader(ValueNumberedSet& vals, uint32_t v) {
+ if (!vals.test(v))
+ return 0;
+
+ for (ValueNumberedSet::iterator I = vals.begin(), E = vals.end();
+ I != E; ++I)
+ if (v == VN.lookup(*I))
+ return *I;
+
+ assert(0 && "No leader found, but present bit is set?");
+ return 0;
+}
+
+/// val_insert - Insert a value into a set only if there is not a value
+/// with the same value number already in the set
+void GVNPRE::val_insert(ValueNumberedSet& s, Value* v) {
+ uint32_t num = VN.lookup(v);
+ if (!s.test(num))
+ s.insert(v);
+}
+
+/// val_replace - Insert a value into a set, replacing any values already in
+/// the set that have the same value number
+void GVNPRE::val_replace(ValueNumberedSet& s, Value* v) {
+ if (s.count(v)) return;
+
+ uint32_t num = VN.lookup(v);
+ Value* leader = find_leader(s, num);
+ if (leader != 0)
+ s.erase(leader);
+ s.insert(v);
+ s.set(num);
+}
+
+/// phi_translate - Given a value, its parent block, and a predecessor of its
+/// parent, translate the value into legal for the predecessor block. This
+/// means translating its operands (and recursively, their operands) through
+/// any phi nodes in the parent into values available in the predecessor
+Value* GVNPRE::phi_translate(Value* V, BasicBlock* pred, BasicBlock* succ) {
+ if (V == 0)
+ return 0;
+
+ // Unary Operations
+ if (CastInst* U = dyn_cast<CastInst>(V)) {
+ Value* newOp1 = 0;
+ if (isa<Instruction>(U->getOperand(0)))
+ newOp1 = phi_translate(U->getOperand(0), pred, succ);
+ else
+ newOp1 = U->getOperand(0);
+
+ if (newOp1 == 0)
+ return 0;
+
+ if (newOp1 != U->getOperand(0)) {
+ Instruction* newVal = 0;
+ if (CastInst* C = dyn_cast<CastInst>(U))
+ newVal = CastInst::Create(C->getOpcode(),
+ newOp1, C->getType(),
+ C->getName()+".expr");
+
+ uint32_t v = VN.lookup_or_add(newVal);
+
+ Value* leader = find_leader(availableOut[pred], v);
+ if (leader == 0) {
+ createdExpressions.push_back(newVal);
+ return newVal;
+ } else {
+ VN.erase(newVal);
+ delete newVal;
+ return leader;
+ }
+ }
+
+ // Binary Operations
+ } if (isa<BinaryOperator>(V) || isa<CmpInst>(V) ||
+ isa<ExtractElementInst>(V)) {
+ User* U = cast<User>(V);
+
+ Value* newOp1 = 0;
+ if (isa<Instruction>(U->getOperand(0)))
+ newOp1 = phi_translate(U->getOperand(0), pred, succ);
+ else
+ newOp1 = U->getOperand(0);
+
+ if (newOp1 == 0)
+ return 0;
+
+ Value* newOp2 = 0;
+ if (isa<Instruction>(U->getOperand(1)))
+ newOp2 = phi_translate(U->getOperand(1), pred, succ);
+ else
+ newOp2 = U->getOperand(1);
+
+ if (newOp2 == 0)
+ return 0;
+
+ if (newOp1 != U->getOperand(0) || newOp2 != U->getOperand(1)) {
+ Instruction* newVal = 0;
+ if (BinaryOperator* BO = dyn_cast<BinaryOperator>(U))
+ newVal = BinaryOperator::Create(BO->getOpcode(),
+ newOp1, newOp2,
+ BO->getName()+".expr");
+ else if (CmpInst* C = dyn_cast<CmpInst>(U))
+ newVal = CmpInst::Create(C->getOpcode(),
+ C->getPredicate(),
+ newOp1, newOp2,
+ C->getName()+".expr");
+ else if (ExtractElementInst* E = dyn_cast<ExtractElementInst>(U))
+ newVal = new ExtractElementInst(newOp1, newOp2, E->getName()+".expr");
+
+ uint32_t v = VN.lookup_or_add(newVal);
+
+ Value* leader = find_leader(availableOut[pred], v);
+ if (leader == 0) {
+ createdExpressions.push_back(newVal);
+ return newVal;
+ } else {
+ VN.erase(newVal);
+ delete newVal;
+ return leader;
+ }
+ }
+
+ // Ternary Operations
+ } else if (isa<ShuffleVectorInst>(V) || isa<InsertElementInst>(V) ||
+ isa<SelectInst>(V)) {
+ User* U = cast<User>(V);
+
+ Value* newOp1 = 0;
+ if (isa<Instruction>(U->getOperand(0)))
+ newOp1 = phi_translate(U->getOperand(0), pred, succ);
+ else
+ newOp1 = U->getOperand(0);
+
+ if (newOp1 == 0)
+ return 0;
+
+ Value* newOp2 = 0;
+ if (isa<Instruction>(U->getOperand(1)))
+ newOp2 = phi_translate(U->getOperand(1), pred, succ);
+ else
+ newOp2 = U->getOperand(1);
+
+ if (newOp2 == 0)
+ return 0;
+
+ Value* newOp3 = 0;
+ if (isa<Instruction>(U->getOperand(2)))
+ newOp3 = phi_translate(U->getOperand(2), pred, succ);
+ else
+ newOp3 = U->getOperand(2);
+
+ if (newOp3 == 0)
+ return 0;
+
+ if (newOp1 != U->getOperand(0) ||
+ newOp2 != U->getOperand(1) ||
+ newOp3 != U->getOperand(2)) {
+ Instruction* newVal = 0;
+ if (ShuffleVectorInst* S = dyn_cast<ShuffleVectorInst>(U))
+ newVal = new ShuffleVectorInst(newOp1, newOp2, newOp3,
+ S->getName() + ".expr");
+ else if (InsertElementInst* I = dyn_cast<InsertElementInst>(U))
+ newVal = InsertElementInst::Create(newOp1, newOp2, newOp3,
+ I->getName() + ".expr");
+ else if (SelectInst* I = dyn_cast<SelectInst>(U))
+ newVal = SelectInst::Create(newOp1, newOp2, newOp3,
+ I->getName() + ".expr");
+
+ uint32_t v = VN.lookup_or_add(newVal);
+
+ Value* leader = find_leader(availableOut[pred], v);
+ if (leader == 0) {
+ createdExpressions.push_back(newVal);
+ return newVal;
+ } else {
+ VN.erase(newVal);
+ delete newVal;
+ return leader;
+ }
+ }
+
+ // Varargs operators
+ } else if (GetElementPtrInst* U = dyn_cast<GetElementPtrInst>(V)) {
+ Value* newOp1 = 0;
+ if (isa<Instruction>(U->getPointerOperand()))
+ newOp1 = phi_translate(U->getPointerOperand(), pred, succ);
+ else
+ newOp1 = U->getPointerOperand();
+
+ if (newOp1 == 0)
+ return 0;
+
+ bool changed_idx = false;
+ SmallVector<Value*, 4> newIdx;
+ for (GetElementPtrInst::op_iterator I = U->idx_begin(), E = U->idx_end();
+ I != E; ++I)
+ if (isa<Instruction>(*I)) {
+ Value* newVal = phi_translate(*I, pred, succ);
+ newIdx.push_back(newVal);
+ if (newVal != *I)
+ changed_idx = true;
+ } else {
+ newIdx.push_back(*I);
+ }
+
+ if (newOp1 != U->getPointerOperand() || changed_idx) {
+ Instruction* newVal =
+ GetElementPtrInst::Create(newOp1,
+ newIdx.begin(), newIdx.end(),
+ U->getName()+".expr");
+
+ uint32_t v = VN.lookup_or_add(newVal);
+
+ Value* leader = find_leader(availableOut[pred], v);
+ if (leader == 0) {
+ createdExpressions.push_back(newVal);
+ return newVal;
+ } else {
+ VN.erase(newVal);
+ delete newVal;
+ return leader;
+ }
+ }
+
+ // PHI Nodes
+ } else if (PHINode* P = dyn_cast<PHINode>(V)) {
+ if (P->getParent() == succ)
+ return P->getIncomingValueForBlock(pred);
+ }
+
+ return V;
+}
+
+/// phi_translate_set - Perform phi translation on every element of a set
+void GVNPRE::phi_translate_set(ValueNumberedSet& anticIn,
+ BasicBlock* pred, BasicBlock* succ,
+ ValueNumberedSet& out) {
+ for (ValueNumberedSet::iterator I = anticIn.begin(),
+ E = anticIn.end(); I != E; ++I) {
+ Value* V = phi_translate(*I, pred, succ);
+ if (V != 0 && !out.test(VN.lookup_or_add(V))) {
+ out.insert(V);
+ out.set(VN.lookup(V));
+ }
+ }
+}
+
+/// dependsOnInvoke - Test if a value has an phi node as an operand, any of
+/// whose inputs is an invoke instruction. If this is true, we cannot safely
+/// PRE the instruction or anything that depends on it.
+bool GVNPRE::dependsOnInvoke(Value* V) {
+ if (PHINode* p = dyn_cast<PHINode>(V)) {
+ for (PHINode::op_iterator I = p->op_begin(), E = p->op_end(); I != E; ++I)
+ if (isa<InvokeInst>(*I))
+ return true;
+ return false;
+ } else {
+ return false;
+ }
+}
+
+/// clean - Remove all non-opaque values from the set whose operands are not
+/// themselves in the set, as well as all values that depend on invokes (see
+/// above)
+void GVNPRE::clean(ValueNumberedSet& set) {
+ SmallVector<Value*, 8> worklist;
+ worklist.reserve(set.size());
+ topo_sort(set, worklist);
+
+ for (unsigned i = 0; i < worklist.size(); ++i) {
+ Value* v = worklist[i];
+
+ // Handle unary ops
+ if (CastInst* U = dyn_cast<CastInst>(v)) {
+ bool lhsValid = !isa<Instruction>(U->getOperand(0));
+ lhsValid |= set.test(VN.lookup(U->getOperand(0)));
+ if (lhsValid)
+ lhsValid = !dependsOnInvoke(U->getOperand(0));
+
+ if (!lhsValid) {
+ set.erase(U);
+ set.reset(VN.lookup(U));
+ }
+
+ // Handle binary ops
+ } else if (isa<BinaryOperator>(v) || isa<CmpInst>(v) ||
+ isa<ExtractElementInst>(v)) {
+ User* U = cast<User>(v);
+
+ bool lhsValid = !isa<Instruction>(U->getOperand(0));
+ lhsValid |= set.test(VN.lookup(U->getOperand(0)));
+ if (lhsValid)
+ lhsValid = !dependsOnInvoke(U->getOperand(0));
+
+ bool rhsValid = !isa<Instruction>(U->getOperand(1));
+ rhsValid |= set.test(VN.lookup(U->getOperand(1)));
+ if (rhsValid)
+ rhsValid = !dependsOnInvoke(U->getOperand(1));
+
+ if (!lhsValid || !rhsValid) {
+ set.erase(U);
+ set.reset(VN.lookup(U));
+ }
+
+ // Handle ternary ops
+ } else if (isa<ShuffleVectorInst>(v) || isa<InsertElementInst>(v) ||
+ isa<SelectInst>(v)) {
+ User* U = cast<User>(v);
+
+ bool lhsValid = !isa<Instruction>(U->getOperand(0));
+ lhsValid |= set.test(VN.lookup(U->getOperand(0)));
+ if (lhsValid)
+ lhsValid = !dependsOnInvoke(U->getOperand(0));
+
+ bool rhsValid = !isa<Instruction>(U->getOperand(1));
+ rhsValid |= set.test(VN.lookup(U->getOperand(1)));
+ if (rhsValid)
+ rhsValid = !dependsOnInvoke(U->getOperand(1));
+
+ bool thirdValid = !isa<Instruction>(U->getOperand(2));
+ thirdValid |= set.test(VN.lookup(U->getOperand(2)));
+ if (thirdValid)
+ thirdValid = !dependsOnInvoke(U->getOperand(2));
+
+ if (!lhsValid || !rhsValid || !thirdValid) {
+ set.erase(U);
+ set.reset(VN.lookup(U));
+ }
+
+ // Handle varargs ops
+ } else if (GetElementPtrInst* U = dyn_cast<GetElementPtrInst>(v)) {
+ bool ptrValid = !isa<Instruction>(U->getPointerOperand());
+ ptrValid |= set.test(VN.lookup(U->getPointerOperand()));
+ if (ptrValid)
+ ptrValid = !dependsOnInvoke(U->getPointerOperand());
+
+ bool varValid = true;
+ for (GetElementPtrInst::op_iterator I = U->idx_begin(), E = U->idx_end();
+ I != E; ++I)
+ if (varValid) {
+ varValid &= !isa<Instruction>(*I) || set.test(VN.lookup(*I));
+ varValid &= !dependsOnInvoke(*I);
+ }
+
+ if (!ptrValid || !varValid) {
+ set.erase(U);
+ set.reset(VN.lookup(U));
+ }
+ }
+ }
+}
+
+/// topo_sort - Given a set of values, sort them by topological
+/// order into the provided vector.
+void GVNPRE::topo_sort(ValueNumberedSet& set, SmallVector<Value*, 8>& vec) {
+ SmallPtrSet<Value*, 16> visited;
+ SmallVector<Value*, 8> stack;
+ for (ValueNumberedSet::iterator I = set.begin(), E = set.end();
+ I != E; ++I) {
+ if (visited.count(*I) == 0)
+ stack.push_back(*I);
+
+ while (!stack.empty()) {
+ Value* e = stack.back();
+
+ // Handle unary ops
+ if (CastInst* U = dyn_cast<CastInst>(e)) {
+ Value* l = find_leader(set, VN.lookup(U->getOperand(0)));
+
+ if (l != 0 && isa<Instruction>(l) &&
+ visited.count(l) == 0)
+ stack.push_back(l);
+ else {
+ vec.push_back(e);
+ visited.insert(e);
+ stack.pop_back();
+ }
+
+ // Handle binary ops
+ } else if (isa<BinaryOperator>(e) || isa<CmpInst>(e) ||
+ isa<ExtractElementInst>(e)) {
+ User* U = cast<User>(e);
+ Value* l = find_leader(set, VN.lookup(U->getOperand(0)));
+ Value* r = find_leader(set, VN.lookup(U->getOperand(1)));
+
+ if (l != 0 && isa<Instruction>(l) &&
+ visited.count(l) == 0)
+ stack.push_back(l);
+ else if (r != 0 && isa<Instruction>(r) &&
+ visited.count(r) == 0)
+ stack.push_back(r);
+ else {
+ vec.push_back(e);
+ visited.insert(e);
+ stack.pop_back();
+ }
+
+ // Handle ternary ops
+ } else if (isa<InsertElementInst>(e) || isa<ShuffleVectorInst>(e) ||
+ isa<SelectInst>(e)) {
+ User* U = cast<User>(e);
+ Value* l = find_leader(set, VN.lookup(U->getOperand(0)));
+ Value* r = find_leader(set, VN.lookup(U->getOperand(1)));
+ Value* m = find_leader(set, VN.lookup(U->getOperand(2)));
+
+ if (l != 0 && isa<Instruction>(l) &&
+ visited.count(l) == 0)
+ stack.push_back(l);
+ else if (r != 0 && isa<Instruction>(r) &&
+ visited.count(r) == 0)
+ stack.push_back(r);
+ else if (m != 0 && isa<Instruction>(m) &&
+ visited.count(m) == 0)
+ stack.push_back(m);
+ else {
+ vec.push_back(e);
+ visited.insert(e);
+ stack.pop_back();
+ }
+
+ // Handle vararg ops
+ } else if (GetElementPtrInst* U = dyn_cast<GetElementPtrInst>(e)) {
+ Value* p = find_leader(set, VN.lookup(U->getPointerOperand()));
+
+ if (p != 0 && isa<Instruction>(p) &&
+ visited.count(p) == 0)
+ stack.push_back(p);
+ else {
+ bool push_va = false;
+ for (GetElementPtrInst::op_iterator I = U->idx_begin(),
+ E = U->idx_end(); I != E; ++I) {
+ Value * v = find_leader(set, VN.lookup(*I));
+ if (v != 0 && isa<Instruction>(v) && visited.count(v) == 0) {
+ stack.push_back(v);
+ push_va = true;
+ }
+ }
+
+ if (!push_va) {
+ vec.push_back(e);
+ visited.insert(e);
+ stack.pop_back();
+ }
+ }
+
+ // Handle opaque ops
+ } else {
+ visited.insert(e);
+ vec.push_back(e);
+ stack.pop_back();
+ }
+ }
+
+ stack.clear();
+ }
+}
+
+/// dump - Dump a set of values to standard error
+void GVNPRE::dump(ValueNumberedSet& s) const {
+ DOUT << "{ ";
+ for (ValueNumberedSet::iterator I = s.begin(), E = s.end();
+ I != E; ++I) {
+ DOUT << "" << VN.lookup(*I) << ": ";
+ DEBUG((*I)->dump());
+ }
+ DOUT << "}\n\n";
+}
+
+/// elimination - Phase 3 of the main algorithm. Perform full redundancy
+/// elimination by walking the dominator tree and removing any instruction that
+/// is dominated by another instruction with the same value number.
+bool GVNPRE::elimination() {
+ bool changed_function = false;
+
+ SmallVector<std::pair<Instruction*, Value*>, 8> replace;
+ SmallVector<Instruction*, 8> erase;
+
+ DominatorTree& DT = getAnalysis<DominatorTree>();
+
+ for (df_iterator<DomTreeNode*> DI = df_begin(DT.getRootNode()),
+ E = df_end(DT.getRootNode()); DI != E; ++DI) {
+ BasicBlock* BB = DI->getBlock();
+
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+ BI != BE; ++BI) {
+
+ if (isa<BinaryOperator>(BI) || isa<CmpInst>(BI) ||
+ isa<ShuffleVectorInst>(BI) || isa<InsertElementInst>(BI) ||
+ isa<ExtractElementInst>(BI) || isa<SelectInst>(BI) ||
+ isa<CastInst>(BI) || isa<GetElementPtrInst>(BI)) {
+
+ if (availableOut[BB].test(VN.lookup(BI)) &&
+ !availableOut[BB].count(BI)) {
+ Value *leader = find_leader(availableOut[BB], VN.lookup(BI));
+ if (Instruction* Instr = dyn_cast<Instruction>(leader))
+ if (Instr->getParent() != 0 && Instr != BI) {
+ replace.push_back(std::make_pair(BI, leader));
+ erase.push_back(BI);
+ ++NumEliminated;
+ }
+ }
+ }
+ }
+ }
+
+ while (!replace.empty()) {
+ std::pair<Instruction*, Value*> rep = replace.back();
+ replace.pop_back();
+ rep.first->replaceAllUsesWith(rep.second);
+ changed_function = true;
+ }
+
+ for (SmallVector<Instruction*, 8>::iterator I = erase.begin(),
+ E = erase.end(); I != E; ++I)
+ (*I)->eraseFromParent();
+
+ return changed_function;
+}
+
+/// cleanup - Delete any extraneous values that were created to represent
+/// expressions without leaders.
+void GVNPRE::cleanup() {
+ while (!createdExpressions.empty()) {
+ Instruction* I = createdExpressions.back();
+ createdExpressions.pop_back();
+
+ delete I;
+ }
+}
+
+/// buildsets_availout - When calculating availability, handle an instruction
+/// by inserting it into the appropriate sets
+void GVNPRE::buildsets_availout(BasicBlock::iterator I,
+ ValueNumberedSet& currAvail,
+ ValueNumberedSet& currPhis,
+ ValueNumberedSet& currExps,
+ SmallPtrSet<Value*, 16>& currTemps) {
+ // Handle PHI nodes
+ if (PHINode* p = dyn_cast<PHINode>(I)) {
+ unsigned num = VN.lookup_or_add(p);
+
+ currPhis.insert(p);
+ currPhis.set(num);
+
+ // Handle unary ops
+ } else if (CastInst* U = dyn_cast<CastInst>(I)) {
+ Value* leftValue = U->getOperand(0);
+
+ unsigned num = VN.lookup_or_add(U);
+
+ if (isa<Instruction>(leftValue))
+ if (!currExps.test(VN.lookup(leftValue))) {
+ currExps.insert(leftValue);
+ currExps.set(VN.lookup(leftValue));
+ }
+
+ if (!currExps.test(num)) {
+ currExps.insert(U);
+ currExps.set(num);
+ }
+
+ // Handle binary ops
+ } else if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
+ isa<ExtractElementInst>(I)) {
+ User* U = cast<User>(I);
+ Value* leftValue = U->getOperand(0);
+ Value* rightValue = U->getOperand(1);
+
+ unsigned num = VN.lookup_or_add(U);
+
+ if (isa<Instruction>(leftValue))
+ if (!currExps.test(VN.lookup(leftValue))) {
+ currExps.insert(leftValue);
+ currExps.set(VN.lookup(leftValue));
+ }
+
+ if (isa<Instruction>(rightValue))
+ if (!currExps.test(VN.lookup(rightValue))) {
+ currExps.insert(rightValue);
+ currExps.set(VN.lookup(rightValue));
+ }
+
+ if (!currExps.test(num)) {
+ currExps.insert(U);
+ currExps.set(num);
+ }
+
+ // Handle ternary ops
+ } else if (isa<InsertElementInst>(I) || isa<ShuffleVectorInst>(I) ||
+ isa<SelectInst>(I)) {
+ User* U = cast<User>(I);
+ Value* leftValue = U->getOperand(0);
+ Value* rightValue = U->getOperand(1);
+ Value* thirdValue = U->getOperand(2);
+
+ VN.lookup_or_add(U);
+
+ unsigned num = VN.lookup_or_add(U);
+
+ if (isa<Instruction>(leftValue))
+ if (!currExps.test(VN.lookup(leftValue))) {
+ currExps.insert(leftValue);
+ currExps.set(VN.lookup(leftValue));
+ }
+ if (isa<Instruction>(rightValue))
+ if (!currExps.test(VN.lookup(rightValue))) {
+ currExps.insert(rightValue);
+ currExps.set(VN.lookup(rightValue));
+ }
+ if (isa<Instruction>(thirdValue))
+ if (!currExps.test(VN.lookup(thirdValue))) {
+ currExps.insert(thirdValue);
+ currExps.set(VN.lookup(thirdValue));
+ }
+
+ if (!currExps.test(num)) {
+ currExps.insert(U);
+ currExps.set(num);
+ }
+
+ // Handle vararg ops
+ } else if (GetElementPtrInst* U = dyn_cast<GetElementPtrInst>(I)) {
+ Value* ptrValue = U->getPointerOperand();
+
+ VN.lookup_or_add(U);
+
+ unsigned num = VN.lookup_or_add(U);
+
+ if (isa<Instruction>(ptrValue))
+ if (!currExps.test(VN.lookup(ptrValue))) {
+ currExps.insert(ptrValue);
+ currExps.set(VN.lookup(ptrValue));
+ }
+
+ for (GetElementPtrInst::op_iterator OI = U->idx_begin(), OE = U->idx_end();
+ OI != OE; ++OI)
+ if (isa<Instruction>(*OI) && !currExps.test(VN.lookup(*OI))) {
+ currExps.insert(*OI);
+ currExps.set(VN.lookup(*OI));
+ }
+
+ if (!currExps.test(VN.lookup(U))) {
+ currExps.insert(U);
+ currExps.set(num);
+ }
+
+ // Handle opaque ops
+ } else if (!I->isTerminator()){
+ VN.lookup_or_add(I);
+
+ currTemps.insert(I);
+ }
+
+ if (!I->isTerminator())
+ if (!currAvail.test(VN.lookup(I))) {
+ currAvail.insert(I);
+ currAvail.set(VN.lookup(I));
+ }
+}
+
+/// buildsets_anticout - When walking the postdom tree, calculate the ANTIC_OUT
+/// set as a function of the ANTIC_IN set of the block's predecessors
+bool GVNPRE::buildsets_anticout(BasicBlock* BB,
+ ValueNumberedSet& anticOut,
+ SmallPtrSet<BasicBlock*, 8>& visited) {
+ if (BB->getTerminator()->getNumSuccessors() == 1) {
+ if (BB->getTerminator()->getSuccessor(0) != BB &&
+ visited.count(BB->getTerminator()->getSuccessor(0)) == 0) {
+ return true;
+ }
+ else {
+ phi_translate_set(anticipatedIn[BB->getTerminator()->getSuccessor(0)],
+ BB, BB->getTerminator()->getSuccessor(0), anticOut);
+ }
+ } else if (BB->getTerminator()->getNumSuccessors() > 1) {
+ BasicBlock* first = BB->getTerminator()->getSuccessor(0);
+ for (ValueNumberedSet::iterator I = anticipatedIn[first].begin(),
+ E = anticipatedIn[first].end(); I != E; ++I) {
+ anticOut.insert(*I);
+ anticOut.set(VN.lookup(*I));
+ }
+
+ for (unsigned i = 1; i < BB->getTerminator()->getNumSuccessors(); ++i) {
+ BasicBlock* currSucc = BB->getTerminator()->getSuccessor(i);
+ ValueNumberedSet& succAnticIn = anticipatedIn[currSucc];
+
+ SmallVector<Value*, 16> temp;
+
+ for (ValueNumberedSet::iterator I = anticOut.begin(),
+ E = anticOut.end(); I != E; ++I)
+ if (!succAnticIn.test(VN.lookup(*I)))
+ temp.push_back(*I);
+
+ for (SmallVector<Value*, 16>::iterator I = temp.begin(), E = temp.end();
+ I != E; ++I) {
+ anticOut.erase(*I);
+ anticOut.reset(VN.lookup(*I));
+ }
+ }
+ }
+
+ return false;
+}
+
+/// buildsets_anticin - Walk the postdom tree, calculating ANTIC_OUT for
+/// each block. ANTIC_IN is then a function of ANTIC_OUT and the GEN
+/// sets populated in buildsets_availout
+unsigned GVNPRE::buildsets_anticin(BasicBlock* BB,
+ ValueNumberedSet& anticOut,
+ ValueNumberedSet& currExps,
+ SmallPtrSet<Value*, 16>& currTemps,
+ SmallPtrSet<BasicBlock*, 8>& visited) {
+ ValueNumberedSet& anticIn = anticipatedIn[BB];
+ unsigned old = anticIn.size();
+
+ bool defer = buildsets_anticout(BB, anticOut, visited);
+ if (defer)
+ return 0;
+
+ anticIn.clear();
+
+ for (ValueNumberedSet::iterator I = anticOut.begin(),
+ E = anticOut.end(); I != E; ++I) {
+ anticIn.insert(*I);
+ anticIn.set(VN.lookup(*I));
+ }
+ for (ValueNumberedSet::iterator I = currExps.begin(),
+ E = currExps.end(); I != E; ++I) {
+ if (!anticIn.test(VN.lookup(*I))) {
+ anticIn.insert(*I);
+ anticIn.set(VN.lookup(*I));
+ }
+ }
+
+ for (SmallPtrSet<Value*, 16>::iterator I = currTemps.begin(),
+ E = currTemps.end(); I != E; ++I) {
+ anticIn.erase(*I);
+ anticIn.reset(VN.lookup(*I));
+ }
+
+ clean(anticIn);
+ anticOut.clear();
+
+ if (old != anticIn.size())
+ return 2;
+ else
+ return 1;
+}
+
+/// buildsets - Phase 1 of the main algorithm. Construct the AVAIL_OUT
+/// and the ANTIC_IN sets.
+void GVNPRE::buildsets(Function& F) {
+ DenseMap<BasicBlock*, ValueNumberedSet> generatedExpressions;
+ DenseMap<BasicBlock*, SmallPtrSet<Value*, 16> > generatedTemporaries;
+
+ DominatorTree &DT = getAnalysis<DominatorTree>();
+
+ // Phase 1, Part 1: calculate AVAIL_OUT
+
+ // Top-down walk of the dominator tree
+ for (df_iterator<DomTreeNode*> DI = df_begin(DT.getRootNode()),
+ E = df_end(DT.getRootNode()); DI != E; ++DI) {
+
+ // Get the sets to update for this block
+ ValueNumberedSet& currExps = generatedExpressions[DI->getBlock()];
+ ValueNumberedSet& currPhis = generatedPhis[DI->getBlock()];
+ SmallPtrSet<Value*, 16>& currTemps = generatedTemporaries[DI->getBlock()];
+ ValueNumberedSet& currAvail = availableOut[DI->getBlock()];
+
+ BasicBlock* BB = DI->getBlock();
+
+ // A block inherits AVAIL_OUT from its dominator
+ if (DI->getIDom() != 0)
+ currAvail = availableOut[DI->getIDom()->getBlock()];
+
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+ BI != BE; ++BI)
+ buildsets_availout(BI, currAvail, currPhis, currExps,
+ currTemps);
+
+ }
+
+ // Phase 1, Part 2: calculate ANTIC_IN
+
+ SmallPtrSet<BasicBlock*, 8> visited;
+ SmallPtrSet<BasicBlock*, 4> block_changed;
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+ block_changed.insert(FI);
+
+ bool changed = true;
+ unsigned iterations = 0;
+
+ while (changed) {
+ changed = false;
+ ValueNumberedSet anticOut;
+
+ // Postorder walk of the CFG
+ for (po_iterator<BasicBlock*> BBI = po_begin(&F.getEntryBlock()),
+ BBE = po_end(&F.getEntryBlock()); BBI != BBE; ++BBI) {
+ BasicBlock* BB = *BBI;
+
+ if (block_changed.count(BB) != 0) {
+ unsigned ret = buildsets_anticin(BB, anticOut,generatedExpressions[BB],
+ generatedTemporaries[BB], visited);
+
+ if (ret == 0) {
+ changed = true;
+ continue;
+ } else {
+ visited.insert(BB);
+
+ if (ret == 2)
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ PI != PE; ++PI) {
+ block_changed.insert(*PI);
+ }
+ else
+ block_changed.erase(BB);
+
+ changed |= (ret == 2);
+ }
+ }
+ }
+
+ iterations++;
+ }
+}
+
+/// insertion_pre - When a partial redundancy has been identified, eliminate it
+/// by inserting appropriate values into the predecessors and a phi node in
+/// the main block
+void GVNPRE::insertion_pre(Value* e, BasicBlock* BB,
+ DenseMap<BasicBlock*, Value*>& avail,
+ std::map<BasicBlock*, ValueNumberedSet>& new_sets) {
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
+ Value* e2 = avail[*PI];
+ if (!availableOut[*PI].test(VN.lookup(e2))) {
+ User* U = cast<User>(e2);
+
+ Value* s1 = 0;
+ if (isa<BinaryOperator>(U->getOperand(0)) ||
+ isa<CmpInst>(U->getOperand(0)) ||
+ isa<ShuffleVectorInst>(U->getOperand(0)) ||
+ isa<ExtractElementInst>(U->getOperand(0)) ||
+ isa<InsertElementInst>(U->getOperand(0)) ||
+ isa<SelectInst>(U->getOperand(0)) ||
+ isa<CastInst>(U->getOperand(0)) ||
+ isa<GetElementPtrInst>(U->getOperand(0)))
+ s1 = find_leader(availableOut[*PI], VN.lookup(U->getOperand(0)));
+ else
+ s1 = U->getOperand(0);
+
+ Value* s2 = 0;
+
+ if (isa<BinaryOperator>(U) ||
+ isa<CmpInst>(U) ||
+ isa<ShuffleVectorInst>(U) ||
+ isa<ExtractElementInst>(U) ||
+ isa<InsertElementInst>(U) ||
+ isa<SelectInst>(U)) {
+ if (isa<BinaryOperator>(U->getOperand(1)) ||
+ isa<CmpInst>(U->getOperand(1)) ||
+ isa<ShuffleVectorInst>(U->getOperand(1)) ||
+ isa<ExtractElementInst>(U->getOperand(1)) ||
+ isa<InsertElementInst>(U->getOperand(1)) ||
+ isa<SelectInst>(U->getOperand(1)) ||
+ isa<CastInst>(U->getOperand(1)) ||
+ isa<GetElementPtrInst>(U->getOperand(1))) {
+ s2 = find_leader(availableOut[*PI], VN.lookup(U->getOperand(1)));
+ } else {
+ s2 = U->getOperand(1);
+ }
+ }
+
+ // Ternary Operators
+ Value* s3 = 0;
+ if (isa<ShuffleVectorInst>(U) ||
+ isa<InsertElementInst>(U) ||
+ isa<SelectInst>(U)) {
+ if (isa<BinaryOperator>(U->getOperand(2)) ||
+ isa<CmpInst>(U->getOperand(2)) ||
+ isa<ShuffleVectorInst>(U->getOperand(2)) ||
+ isa<ExtractElementInst>(U->getOperand(2)) ||
+ isa<InsertElementInst>(U->getOperand(2)) ||
+ isa<SelectInst>(U->getOperand(2)) ||
+ isa<CastInst>(U->getOperand(2)) ||
+ isa<GetElementPtrInst>(U->getOperand(2))) {
+ s3 = find_leader(availableOut[*PI], VN.lookup(U->getOperand(2)));
+ } else {
+ s3 = U->getOperand(2);
+ }
+ }
+
+ // Vararg operators
+ SmallVector<Value*, 4> sVarargs;
+ if (GetElementPtrInst* G = dyn_cast<GetElementPtrInst>(U)) {
+ for (GetElementPtrInst::op_iterator OI = G->idx_begin(),
+ OE = G->idx_end(); OI != OE; ++OI) {
+ if (isa<BinaryOperator>(*OI) ||
+ isa<CmpInst>(*OI) ||
+ isa<ShuffleVectorInst>(*OI) ||
+ isa<ExtractElementInst>(*OI) ||
+ isa<InsertElementInst>(*OI) ||
+ isa<SelectInst>(*OI) ||
+ isa<CastInst>(*OI) ||
+ isa<GetElementPtrInst>(*OI)) {
+ sVarargs.push_back(find_leader(availableOut[*PI],
+ VN.lookup(*OI)));
+ } else {
+ sVarargs.push_back(*OI);
+ }
+ }
+ }
+
+ Value* newVal = 0;
+ if (BinaryOperator* BO = dyn_cast<BinaryOperator>(U))
+ newVal = BinaryOperator::Create(BO->getOpcode(), s1, s2,
+ BO->getName()+".gvnpre",
+ (*PI)->getTerminator());
+ else if (CmpInst* C = dyn_cast<CmpInst>(U))
+ newVal = CmpInst::Create(C->getOpcode(), C->getPredicate(), s1, s2,
+ C->getName()+".gvnpre",
+ (*PI)->getTerminator());
+ else if (ShuffleVectorInst* S = dyn_cast<ShuffleVectorInst>(U))
+ newVal = new ShuffleVectorInst(s1, s2, s3, S->getName()+".gvnpre",
+ (*PI)->getTerminator());
+ else if (InsertElementInst* S = dyn_cast<InsertElementInst>(U))
+ newVal = InsertElementInst::Create(s1, s2, s3, S->getName()+".gvnpre",
+ (*PI)->getTerminator());
+ else if (ExtractElementInst* S = dyn_cast<ExtractElementInst>(U))
+ newVal = new ExtractElementInst(s1, s2, S->getName()+".gvnpre",
+ (*PI)->getTerminator());
+ else if (SelectInst* S = dyn_cast<SelectInst>(U))
+ newVal = SelectInst::Create(s1, s2, s3, S->getName()+".gvnpre",
+ (*PI)->getTerminator());
+ else if (CastInst* C = dyn_cast<CastInst>(U))
+ newVal = CastInst::Create(C->getOpcode(), s1, C->getType(),
+ C->getName()+".gvnpre",
+ (*PI)->getTerminator());
+ else if (GetElementPtrInst* G = dyn_cast<GetElementPtrInst>(U))
+ newVal = GetElementPtrInst::Create(s1, sVarargs.begin(), sVarargs.end(),
+ G->getName()+".gvnpre",
+ (*PI)->getTerminator());
+
+ VN.add(newVal, VN.lookup(U));
+
+ ValueNumberedSet& predAvail = availableOut[*PI];
+ val_replace(predAvail, newVal);
+ val_replace(new_sets[*PI], newVal);
+ predAvail.set(VN.lookup(newVal));
+
+ DenseMap<BasicBlock*, Value*>::iterator av = avail.find(*PI);
+ if (av != avail.end())
+ avail.erase(av);
+ avail.insert(std::make_pair(*PI, newVal));
+
+ ++NumInsertedVals;
+ }
+ }
+
+ PHINode* p = 0;
+
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
+ if (p == 0)
+ p = PHINode::Create(avail[*PI]->getType(), "gvnpre-join", BB->begin());
+
+ p->addIncoming(avail[*PI], *PI);
+ }
+
+ VN.add(p, VN.lookup(e));
+ val_replace(availableOut[BB], p);
+ availableOut[BB].set(VN.lookup(e));
+ generatedPhis[BB].insert(p);
+ generatedPhis[BB].set(VN.lookup(e));
+ new_sets[BB].insert(p);
+ new_sets[BB].set(VN.lookup(e));
+
+ ++NumInsertedPhis;
+}
+
+/// insertion_mergepoint - When walking the dom tree, check at each merge
+/// block for the possibility of a partial redundancy. If present, eliminate it
+unsigned GVNPRE::insertion_mergepoint(SmallVector<Value*, 8>& workList,
+ df_iterator<DomTreeNode*>& D,
+ std::map<BasicBlock*, ValueNumberedSet >& new_sets) {
+ bool changed_function = false;
+ bool new_stuff = false;
+
+ BasicBlock* BB = D->getBlock();
+ for (unsigned i = 0; i < workList.size(); ++i) {
+ Value* e = workList[i];
+
+ if (isa<BinaryOperator>(e) || isa<CmpInst>(e) ||
+ isa<ExtractElementInst>(e) || isa<InsertElementInst>(e) ||
+ isa<ShuffleVectorInst>(e) || isa<SelectInst>(e) || isa<CastInst>(e) ||
+ isa<GetElementPtrInst>(e)) {
+ if (availableOut[D->getIDom()->getBlock()].test(VN.lookup(e)))
+ continue;
+
+ DenseMap<BasicBlock*, Value*> avail;
+ bool by_some = false;
+ bool all_same = true;
+ Value * first_s = 0;
+
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;
+ ++PI) {
+ Value *e2 = phi_translate(e, *PI, BB);
+ Value *e3 = find_leader(availableOut[*PI], VN.lookup(e2));
+
+ if (e3 == 0) {
+ DenseMap<BasicBlock*, Value*>::iterator av = avail.find(*PI);
+ if (av != avail.end())
+ avail.erase(av);
+ avail.insert(std::make_pair(*PI, e2));
+ all_same = false;
+ } else {
+ DenseMap<BasicBlock*, Value*>::iterator av = avail.find(*PI);
+ if (av != avail.end())
+ avail.erase(av);
+ avail.insert(std::make_pair(*PI, e3));
+
+ by_some = true;
+ if (first_s == 0)
+ first_s = e3;
+ else if (first_s != e3)
+ all_same = false;
+ }
+ }
+
+ if (by_some && !all_same &&
+ !generatedPhis[BB].test(VN.lookup(e))) {
+ insertion_pre(e, BB, avail, new_sets);
+
+ changed_function = true;
+ new_stuff = true;
+ }
+ }
+ }
+
+ unsigned retval = 0;
+ if (changed_function)
+ retval += 1;
+ if (new_stuff)
+ retval += 2;
+
+ return retval;
+}
+
+/// insert - Phase 2 of the main algorithm. Walk the dominator tree looking for
+/// merge points. When one is found, check for a partial redundancy. If one is
+/// present, eliminate it. Repeat this walk until no changes are made.
+bool GVNPRE::insertion(Function& F) {
+ bool changed_function = false;
+
+ DominatorTree &DT = getAnalysis<DominatorTree>();
+
+ std::map<BasicBlock*, ValueNumberedSet> new_sets;
+ bool new_stuff = true;
+ while (new_stuff) {
+ new_stuff = false;
+ for (df_iterator<DomTreeNode*> DI = df_begin(DT.getRootNode()),
+ E = df_end(DT.getRootNode()); DI != E; ++DI) {
+ BasicBlock* BB = DI->getBlock();
+
+ if (BB == 0)
+ continue;
+
+ ValueNumberedSet& availOut = availableOut[BB];
+ ValueNumberedSet& anticIn = anticipatedIn[BB];
+
+ // Replace leaders with leaders inherited from dominator
+ if (DI->getIDom() != 0) {
+ ValueNumberedSet& dom_set = new_sets[DI->getIDom()->getBlock()];
+ for (ValueNumberedSet::iterator I = dom_set.begin(),
+ E = dom_set.end(); I != E; ++I) {
+ val_replace(new_sets[BB], *I);
+ val_replace(availOut, *I);
+ }
+ }
+
+ // If there is more than one predecessor...
+ if (pred_begin(BB) != pred_end(BB) && ++pred_begin(BB) != pred_end(BB)) {
+ SmallVector<Value*, 8> workList;
+ workList.reserve(anticIn.size());
+ topo_sort(anticIn, workList);
+
+ unsigned result = insertion_mergepoint(workList, DI, new_sets);
+ if (result & 1)
+ changed_function = true;
+ if (result & 2)
+ new_stuff = true;
+ }
+ }
+ }
+
+ return changed_function;
+}
+
+// GVNPRE::runOnFunction - This is the main transformation entry point for a
+// function.
+//
+bool GVNPRE::runOnFunction(Function &F) {
+ // Clean out global sets from any previous functions
+ VN.clear();
+ createdExpressions.clear();
+ availableOut.clear();
+ anticipatedIn.clear();
+ generatedPhis.clear();
+
+ bool changed_function = false;
+
+ // Phase 1: BuildSets
+ // This phase calculates the AVAIL_OUT and ANTIC_IN sets
+ buildsets(F);
+
+ // Phase 2: Insert
+ // This phase inserts values to make partially redundant values
+ // fully redundant
+ changed_function |= insertion(F);
+
+ // Phase 3: Eliminate
+ // This phase performs trivial full redundancy elimination
+ changed_function |= elimination();
+
+ // Phase 4: Cleanup
+ // This phase cleans up values that were created solely
+ // as leaders for expressions
+ cleanup();
+
+ return changed_function;
+}
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
new file mode 100644
index 0000000..ca7aa7b
--- /dev/null
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -0,0 +1,880 @@
+//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation analyzes and transforms the induction variables (and
+// computations derived from them) into simpler forms suitable for subsequent
+// analysis and transformation.
+//
+// This transformation makes the following changes to each loop with an
+// identifiable induction variable:
+// 1. All loops are transformed to have a SINGLE canonical induction variable
+// which starts at zero and steps by one.
+// 2. The canonical induction variable is guaranteed to be the first PHI node
+// in the loop header block.
+// 3. Any pointer arithmetic recurrences are raised to use array subscripts.
+//
+// If the trip count of a loop is computable, this pass also makes the following
+// changes:
+// 1. The exit condition for the loop is canonicalized to compare the
+// induction value against the exit value. This turns loops like:
+// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
+// 2. Any use outside of the loop of an expression derived from the indvar
+// is changed to compute the derived value outside of the loop, eliminating
+// the dependence on the exit value of the induction variable. If the only
+// purpose of the loop is to compute the exit value of some derived
+// expression, this transformation will make the loop dead.
+//
+// This transformation should be followed by strength reduction after all of the
+// desired loop transformations have been performed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "indvars"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+STATISTIC(NumRemoved , "Number of aux indvars removed");
+STATISTIC(NumInserted, "Number of canonical indvars added");
+STATISTIC(NumReplaced, "Number of exit values replaced");
+STATISTIC(NumLFTR , "Number of loop exit tests replaced");
+
+namespace {
+ class VISIBILITY_HIDDEN IndVarSimplify : public LoopPass {
+ IVUsers *IU;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ bool Changed;
+ public:
+
+ static char ID; // Pass identification, replacement for typeid
+ IndVarSimplify() : LoopPass(&ID) {}
+
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequiredID(LCSSAID);
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<IVUsers>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreserved<IVUsers>();
+ AU.addPreservedID(LCSSAID);
+ AU.setPreservesCFG();
+ }
+
+ private:
+
+ void RewriteNonIntegerIVs(Loop *L);
+
+ ICmpInst *LinearFunctionTestReplace(Loop *L, SCEVHandle BackedgeTakenCount,
+ Value *IndVar,
+ BasicBlock *ExitingBlock,
+ BranchInst *BI,
+ SCEVExpander &Rewriter);
+ void RewriteLoopExitValues(Loop *L, const SCEV *BackedgeTakenCount);
+
+ void RewriteIVExpressions(Loop *L, const Type *LargestType,
+ SCEVExpander &Rewriter);
+
+ void SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter);
+
+ void FixUsesBeforeDefs(Loop *L, SCEVExpander &Rewriter);
+
+ void HandleFloatingPointIV(Loop *L, PHINode *PH);
+ };
+}
+
+char IndVarSimplify::ID = 0;
+static RegisterPass<IndVarSimplify>
+X("indvars", "Canonicalize Induction Variables");
+
+Pass *llvm::createIndVarSimplifyPass() {
+ return new IndVarSimplify();
+}
+
+/// LinearFunctionTestReplace - This method rewrites the exit condition of the
+/// loop to be a canonical != comparison against the incremented loop induction
+/// variable. This pass is able to rewrite the exit tests of any loop where the
+/// SCEV analysis can determine a loop-invariant trip count of the loop, which
+/// is actually a much broader range than just linear tests.
+ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
+ SCEVHandle BackedgeTakenCount,
+ Value *IndVar,
+ BasicBlock *ExitingBlock,
+ BranchInst *BI,
+ SCEVExpander &Rewriter) {
+ // If the exiting block is not the same as the backedge block, we must compare
+ // against the preincremented value, otherwise we prefer to compare against
+ // the post-incremented value.
+ Value *CmpIndVar;
+ SCEVHandle RHS = BackedgeTakenCount;
+ if (ExitingBlock == L->getLoopLatch()) {
+ // Add one to the "backedge-taken" count to get the trip count.
+ // If this addition may overflow, we have to be more pessimistic and
+ // cast the induction variable before doing the add.
+ SCEVHandle Zero = SE->getIntegerSCEV(0, BackedgeTakenCount->getType());
+ SCEVHandle N =
+ SE->getAddExpr(BackedgeTakenCount,
+ SE->getIntegerSCEV(1, BackedgeTakenCount->getType()));
+ if ((isa<SCEVConstant>(N) && !N->isZero()) ||
+ SE->isLoopGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
+ // No overflow. Cast the sum.
+ RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType());
+ } else {
+ // Potential overflow. Cast before doing the add.
+ RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
+ IndVar->getType());
+ RHS = SE->getAddExpr(RHS,
+ SE->getIntegerSCEV(1, IndVar->getType()));
+ }
+
+ // The BackedgeTaken expression contains the number of times that the
+ // backedge branches to the loop header. This is one less than the
+ // number of times the loop executes, so use the incremented indvar.
+ CmpIndVar = L->getCanonicalInductionVariableIncrement();
+ } else {
+ // We have to use the preincremented value...
+ RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
+ IndVar->getType());
+ CmpIndVar = IndVar;
+ }
+
+ // Expand the code for the iteration count into the preheader of the loop.
+ BasicBlock *Preheader = L->getLoopPreheader();
+ Value *ExitCnt = Rewriter.expandCodeFor(RHS, CmpIndVar->getType(),
+ Preheader->getTerminator());
+
+ // Insert a new icmp_ne or icmp_eq instruction before the branch.
+ ICmpInst::Predicate Opcode;
+ if (L->contains(BI->getSuccessor(0)))
+ Opcode = ICmpInst::ICMP_NE;
+ else
+ Opcode = ICmpInst::ICMP_EQ;
+
+ DOUT << "INDVARS: Rewriting loop exit condition to:\n"
+ << " LHS:" << *CmpIndVar // includes a newline
+ << " op:\t"
+ << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
+ << " RHS:\t" << *RHS << "\n";
+
+ ICmpInst *Cond = new ICmpInst(Opcode, CmpIndVar, ExitCnt, "exitcond", BI);
+
+ Instruction *OrigCond = cast<Instruction>(BI->getCondition());
+ // It's tempting to use replaceAllUsesWith here to fully replace the old
+ // comparison, but that's not immediately safe, since users of the old
+ // comparison may not be dominated by the new comparison. Instead, just
+ // update the branch to use the new comparison; in the common case this
+ // will make old comparison dead.
+ BI->setCondition(Cond);
+ RecursivelyDeleteTriviallyDeadInstructions(OrigCond);
+
+ ++NumLFTR;
+ Changed = true;
+ return Cond;
+}
+
+/// RewriteLoopExitValues - Check to see if this loop has a computable
+/// loop-invariant execution count. If so, this means that we can compute the
+/// final value of any expressions that are recurrent in the loop, and
+/// substitute the exit values from the loop into any instructions outside of
+/// the loop that use the final values of the current expressions.
+///
+/// This is mostly redundant with the regular IndVarSimplify activities that
+/// happen later, except that it's more powerful in some cases, because it's
+/// able to brute-force evaluate arbitrary instructions as long as they have
+/// constant operands at the beginning of the loop.
+void IndVarSimplify::RewriteLoopExitValues(Loop *L,
+ const SCEV *BackedgeTakenCount) {
+ // Verify the input to the pass in already in LCSSA form.
+ assert(L->isLCSSAForm());
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+
+ // Scan all of the instructions in the loop, looking at those that have
+ // extra-loop users and which are recurrences.
+ SCEVExpander Rewriter(*SE);
+
+ // We insert the code into the preheader of the loop if the loop contains
+ // multiple exit blocks, or in the exit block if there is exactly one.
+ BasicBlock *BlockToInsertInto;
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getUniqueExitBlocks(ExitBlocks);
+ if (ExitBlocks.size() == 1)
+ BlockToInsertInto = ExitBlocks[0];
+ else
+ BlockToInsertInto = Preheader;
+ BasicBlock::iterator InsertPt = BlockToInsertInto->getFirstNonPHI();
+
+ std::map<Instruction*, Value*> ExitValues;
+
+ // Find all values that are computed inside the loop, but used outside of it.
+ // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
+ // the exit blocks of the loop to find them.
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBB = ExitBlocks[i];
+
+ // If there are no PHI nodes in this exit block, then no values defined
+ // inside the loop are used on this path, skip it.
+ PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
+ if (!PN) continue;
+
+ unsigned NumPreds = PN->getNumIncomingValues();
+
+ // Iterate over all of the PHI nodes.
+ BasicBlock::iterator BBI = ExitBB->begin();
+ while ((PN = dyn_cast<PHINode>(BBI++))) {
+ if (PN->use_empty())
+ continue; // dead use, don't replace it
+ // Iterate over all of the values in all the PHI nodes.
+ for (unsigned i = 0; i != NumPreds; ++i) {
+ // If the value being merged in is not integer or is not defined
+ // in the loop, skip it.
+ Value *InVal = PN->getIncomingValue(i);
+ if (!isa<Instruction>(InVal) ||
+ // SCEV only supports integer expressions for now.
+ (!isa<IntegerType>(InVal->getType()) &&
+ !isa<PointerType>(InVal->getType())))
+ continue;
+
+ // If this pred is for a subloop, not L itself, skip it.
+ if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
+ continue; // The Block is in a subloop, skip it.
+
+ // Check that InVal is defined in the loop.
+ Instruction *Inst = cast<Instruction>(InVal);
+ if (!L->contains(Inst->getParent()))
+ continue;
+
+ // Okay, this instruction has a user outside of the current loop
+ // and varies predictably *inside* the loop. Evaluate the value it
+ // contains when the loop exits, if possible.
+ SCEVHandle ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
+ if (!ExitValue->isLoopInvariant(L))
+ continue;
+
+ Changed = true;
+ ++NumReplaced;
+
+ // See if we already computed the exit value for the instruction, if so,
+ // just reuse it.
+ Value *&ExitVal = ExitValues[Inst];
+ if (!ExitVal)
+ ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), InsertPt);
+
+ DOUT << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
+ << " LoopVal = " << *Inst << "\n";
+
+ PN->setIncomingValue(i, ExitVal);
+
+ // If this instruction is dead now, delete it.
+ RecursivelyDeleteTriviallyDeadInstructions(Inst);
+
+ // See if this is a single-entry LCSSA PHI node. If so, we can (and
+ // have to) remove
+ // the PHI entirely. This is safe, because the NewVal won't be variant
+ // in the loop, so we don't need an LCSSA phi node anymore.
+ if (NumPreds == 1) {
+ PN->replaceAllUsesWith(ExitVal);
+ RecursivelyDeleteTriviallyDeadInstructions(PN);
+ break;
+ }
+ }
+ }
+ }
+}
+
+void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
+ // First step. Check to see if there are any floating-point recurrences.
+ // If there are, change them into integer recurrences, permitting analysis by
+ // the SCEV routines.
+ //
+ BasicBlock *Header = L->getHeader();
+
+ SmallVector<WeakVH, 8> PHIs;
+ for (BasicBlock::iterator I = Header->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PHIs.push_back(PN);
+
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+ if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i]))
+ HandleFloatingPointIV(L, PN);
+
+ // If the loop previously had floating-point IV, ScalarEvolution
+ // may not have been able to compute a trip count. Now that we've done some
+ // re-writing, the trip count may be computable.
+ if (Changed)
+ SE->forgetLoopBackedgeTakenCount(L);
+}
+
+bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
+ IU = &getAnalysis<IVUsers>();
+ LI = &getAnalysis<LoopInfo>();
+ SE = &getAnalysis<ScalarEvolution>();
+ Changed = false;
+
+ // If there are any floating-point recurrences, attempt to
+ // transform them to use integer recurrences.
+ RewriteNonIntegerIVs(L);
+
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *ExitingBlock = L->getExitingBlock(); // may be null
+ SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+
+ // Check to see if this loop has a computable loop-invariant execution count.
+ // If so, this means that we can compute the final value of any expressions
+ // that are recurrent in the loop, and substitute the exit values from the
+ // loop into any instructions outside of the loop that use the final values of
+ // the current expressions.
+ //
+ if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+ RewriteLoopExitValues(L, BackedgeTakenCount);
+
+ // Compute the type of the largest recurrence expression, and decide whether
+ // a canonical induction variable should be inserted.
+ const Type *LargestType = 0;
+ bool NeedCannIV = false;
+ if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
+ LargestType = BackedgeTakenCount->getType();
+ LargestType = SE->getEffectiveSCEVType(LargestType);
+ // If we have a known trip count and a single exit block, we'll be
+ // rewriting the loop exit test condition below, which requires a
+ // canonical induction variable.
+ if (ExitingBlock)
+ NeedCannIV = true;
+ }
+ for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
+ SCEVHandle Stride = IU->StrideOrder[i];
+ const Type *Ty = SE->getEffectiveSCEVType(Stride->getType());
+ if (!LargestType ||
+ SE->getTypeSizeInBits(Ty) >
+ SE->getTypeSizeInBits(LargestType))
+ LargestType = Ty;
+
+ std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(IU->StrideOrder[i]);
+ assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+
+ if (!SI->second->Users.empty())
+ NeedCannIV = true;
+ }
+
+ // Create a rewriter object which we'll use to transform the code with.
+ SCEVExpander Rewriter(*SE);
+
+ // Now that we know the largest of of the induction variable expressions
+ // in this loop, insert a canonical induction variable of the largest size.
+ Value *IndVar = 0;
+ if (NeedCannIV) {
+ IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L,LargestType);
+ ++NumInserted;
+ Changed = true;
+ DOUT << "INDVARS: New CanIV: " << *IndVar;
+ }
+
+ // If we have a trip count expression, rewrite the loop's exit condition
+ // using it. We can currently only handle loops with a single exit.
+ ICmpInst *NewICmp = 0;
+ if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && ExitingBlock) {
+ assert(NeedCannIV &&
+ "LinearFunctionTestReplace requires a canonical induction variable");
+ // Can't rewrite non-branch yet.
+ if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()))
+ NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
+ ExitingBlock, BI, Rewriter);
+ }
+
+ Rewriter.setInsertionPoint(Header->getFirstNonPHI());
+
+ // Rewrite IV-derived expressions. Clears the rewriter cache.
+ RewriteIVExpressions(L, LargestType, Rewriter);
+
+ // The Rewriter may only be used for isInsertedInstruction queries from this
+ // point on.
+
+ // Loop-invariant instructions in the preheader that aren't used in the
+ // loop may be sunk below the loop to reduce register pressure.
+ SinkUnusedInvariants(L, Rewriter);
+
+ // Reorder instructions to avoid use-before-def conditions.
+ FixUsesBeforeDefs(L, Rewriter);
+
+ // For completeness, inform IVUsers of the IV use in the newly-created
+ // loop exit test instruction.
+ if (NewICmp)
+ IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)));
+
+ // Clean up dead instructions.
+ DeleteDeadPHIs(L->getHeader());
+ // Check a post-condition.
+ assert(L->isLCSSAForm() && "Indvars did not leave the loop in lcssa form!");
+ return Changed;
+}
+
+void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
+ SCEVExpander &Rewriter) {
+ SmallVector<WeakVH, 16> DeadInsts;
+
+ // Rewrite all induction variable expressions in terms of the canonical
+ // induction variable.
+ //
+ // If there were induction variables of other sizes or offsets, manually
+ // add the offsets to the primary induction variable and cast, avoiding
+ // the need for the code evaluation methods to insert induction variables
+ // of different sizes.
+ for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
+ SCEVHandle Stride = IU->StrideOrder[i];
+
+ std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(IU->StrideOrder[i]);
+ assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+ ilist<IVStrideUse> &List = SI->second->Users;
+ for (ilist<IVStrideUse>::iterator UI = List.begin(),
+ E = List.end(); UI != E; ++UI) {
+ SCEVHandle Offset = UI->getOffset();
+ Value *Op = UI->getOperandValToReplace();
+ Instruction *User = UI->getUser();
+ bool isSigned = UI->isSigned();
+
+ // Compute the final addrec to expand into code.
+ SCEVHandle AR = IU->getReplacementExpr(*UI);
+
+ // FIXME: It is an extremely bad idea to indvar substitute anything more
+ // complex than affine induction variables. Doing so will put expensive
+ // polynomial evaluations inside of the loop, and the str reduction pass
+ // currently can only reduce affine polynomials. For now just disable
+ // indvar subst on anything more complex than an affine addrec, unless
+ // it can be expanded to a trivial value.
+ if (!Stride->isLoopInvariant(L) &&
+ !isa<SCEVConstant>(AR) &&
+ L->contains(User->getParent()))
+ continue;
+
+ Value *NewVal = 0;
+ if (AR->isLoopInvariant(L)) {
+ BasicBlock::iterator I = Rewriter.getInsertionPoint();
+ // Expand loop-invariant values in the loop preheader. They will
+ // be sunk to the exit block later, if possible.
+ NewVal =
+ Rewriter.expandCodeFor(AR, LargestType,
+ L->getLoopPreheader()->getTerminator());
+ Rewriter.setInsertionPoint(I);
+ ++NumReplaced;
+ } else {
+ const Type *IVTy = Offset->getType();
+ const Type *UseTy = Op->getType();
+
+ // Promote the Offset and Stride up to the canonical induction
+ // variable's bit width.
+ SCEVHandle PromotedOffset = Offset;
+ SCEVHandle PromotedStride = Stride;
+ if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType)) {
+ // It doesn't matter for correctness whether zero or sign extension
+ // is used here, since the value is truncated away below, but if the
+ // value is signed, sign extension is more likely to be folded.
+ if (isSigned) {
+ PromotedOffset = SE->getSignExtendExpr(PromotedOffset, LargestType);
+ PromotedStride = SE->getSignExtendExpr(PromotedStride, LargestType);
+ } else {
+ PromotedOffset = SE->getZeroExtendExpr(PromotedOffset, LargestType);
+ // If the stride is obviously negative, use sign extension to
+ // produce things like x-1 instead of x+255.
+ if (isa<SCEVConstant>(PromotedStride) &&
+ cast<SCEVConstant>(PromotedStride)
+ ->getValue()->getValue().isNegative())
+ PromotedStride = SE->getSignExtendExpr(PromotedStride,
+ LargestType);
+ else
+ PromotedStride = SE->getZeroExtendExpr(PromotedStride,
+ LargestType);
+ }
+ }
+
+ // Create the SCEV representing the offset from the canonical
+ // induction variable, still in the canonical induction variable's
+ // type, so that all expanded arithmetic is done in the same type.
+ SCEVHandle NewAR = SE->getAddRecExpr(SE->getIntegerSCEV(0, LargestType),
+ PromotedStride, L);
+ // Add the PromotedOffset as a separate step, because it may not be
+ // loop-invariant.
+ NewAR = SE->getAddExpr(NewAR, PromotedOffset);
+
+ // Expand the addrec into instructions.
+ Value *V = Rewriter.expandCodeFor(NewAR);
+
+ // Insert an explicit cast if necessary to truncate the value
+ // down to the original stride type. This is done outside of
+ // SCEVExpander because in SCEV expressions, a truncate of an
+ // addrec is always folded.
+ if (LargestType != IVTy) {
+ if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType))
+ NewAR = SE->getTruncateExpr(NewAR, IVTy);
+ if (Rewriter.isInsertedExpression(NewAR))
+ V = Rewriter.expandCodeFor(NewAR);
+ else {
+ V = Rewriter.InsertCastOfTo(CastInst::getCastOpcode(V, false,
+ IVTy, false),
+ V, IVTy);
+ assert(!isa<SExtInst>(V) && !isa<ZExtInst>(V) &&
+ "LargestType wasn't actually the largest type!");
+ // Force the rewriter to use this trunc whenever this addrec
+ // appears so that it doesn't insert new phi nodes or
+ // arithmetic in a different type.
+ Rewriter.addInsertedValue(V, NewAR);
+ }
+ }
+
+ DOUT << "INDVARS: Made offset-and-trunc IV for offset "
+ << *IVTy << " " << *Offset << ": ";
+ DEBUG(WriteAsOperand(*DOUT, V, false));
+ DOUT << "\n";
+
+ // Now expand it into actual Instructions and patch it into place.
+ NewVal = Rewriter.expandCodeFor(AR, UseTy);
+ }
+
+ // Patch the new value into place.
+ if (Op->hasName())
+ NewVal->takeName(Op);
+ User->replaceUsesOfWith(Op, NewVal);
+ UI->setOperandValToReplace(NewVal);
+ DOUT << "INDVARS: Rewrote IV '" << *AR << "' " << *Op
+ << " into = " << *NewVal << "\n";
+ ++NumRemoved;
+ Changed = true;
+
+ // The old value may be dead now.
+ DeadInsts.push_back(Op);
+ }
+ }
+
+ // Clear the rewriter cache, because values that are in the rewriter's cache
+ // can be deleted in the loop below, causing the AssertingVH in the cache to
+ // trigger.
+ Rewriter.clear();
+ // Now that we're done iterating through lists, clean up any instructions
+ // which are now dead.
+ while (!DeadInsts.empty()) {
+ Instruction *Inst = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
+ if (Inst)
+ RecursivelyDeleteTriviallyDeadInstructions(Inst);
+ }
+}
+
+/// If there's a single exit block, sink any loop-invariant values that
+/// were defined in the preheader but not used inside the loop into the
+/// exit block to reduce register pressure in the loop.
+void IndVarSimplify::SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter) {
+ BasicBlock *ExitBlock = L->getExitBlock();
+ if (!ExitBlock) return;
+
+ Instruction *NonPHI = ExitBlock->getFirstNonPHI();
+ BasicBlock *Preheader = L->getLoopPreheader();
+ BasicBlock::iterator I = Preheader->getTerminator();
+ while (I != Preheader->begin()) {
+ --I;
+ // New instructions were inserted at the end of the preheader. Only
+ // consider those new instructions.
+ if (!Rewriter.isInsertedInstruction(I))
+ break;
+ // Determine if there is a use in or before the loop (direct or
+ // otherwise).
+ bool UsedInLoop = false;
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI) {
+ BasicBlock *UseBB = cast<Instruction>(UI)->getParent();
+ if (PHINode *P = dyn_cast<PHINode>(UI)) {
+ unsigned i =
+ PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
+ UseBB = P->getIncomingBlock(i);
+ }
+ if (UseBB == Preheader || L->contains(UseBB)) {
+ UsedInLoop = true;
+ break;
+ }
+ }
+ // If there is, the def must remain in the preheader.
+ if (UsedInLoop)
+ continue;
+ // Otherwise, sink it to the exit block.
+ Instruction *ToMove = I;
+ bool Done = false;
+ if (I != Preheader->begin())
+ --I;
+ else
+ Done = true;
+ ToMove->moveBefore(NonPHI);
+ if (Done)
+ break;
+ }
+}
+
+/// Re-schedule the inserted instructions to put defs before uses. This
+/// fixes problems that arrise when SCEV expressions contain loop-variant
+/// values unrelated to the induction variable which are defined inside the
+/// loop. FIXME: It would be better to insert instructions in the right
+/// place so that this step isn't needed.
+void IndVarSimplify::FixUsesBeforeDefs(Loop *L, SCEVExpander &Rewriter) {
+ // Visit all the blocks in the loop in pre-order dom-tree dfs order.
+ DominatorTree *DT = &getAnalysis<DominatorTree>();
+ std::map<Instruction *, unsigned> NumPredsLeft;
+ SmallVector<DomTreeNode *, 16> Worklist;
+ Worklist.push_back(DT->getNode(L->getHeader()));
+ do {
+ DomTreeNode *Node = Worklist.pop_back_val();
+ for (DomTreeNode::iterator I = Node->begin(), E = Node->end(); I != E; ++I)
+ if (L->contains((*I)->getBlock()))
+ Worklist.push_back(*I);
+ BasicBlock *BB = Node->getBlock();
+ // Visit all the instructions in the block top down.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ // Count the number of operands that aren't properly dominating.
+ unsigned NumPreds = 0;
+ if (Rewriter.isInsertedInstruction(I) && !isa<PHINode>(I))
+ for (User::op_iterator OI = I->op_begin(), OE = I->op_end();
+ OI != OE; ++OI)
+ if (Instruction *Inst = dyn_cast<Instruction>(OI))
+ if (L->contains(Inst->getParent()) && !NumPredsLeft.count(Inst))
+ ++NumPreds;
+ NumPredsLeft[I] = NumPreds;
+ // Notify uses of the position of this instruction, and move the
+ // users (and their dependents, recursively) into place after this
+ // instruction if it is their last outstanding operand.
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI) {
+ Instruction *Inst = cast<Instruction>(UI);
+ std::map<Instruction *, unsigned>::iterator Z = NumPredsLeft.find(Inst);
+ if (Z != NumPredsLeft.end() && Z->second != 0 && --Z->second == 0) {
+ SmallVector<Instruction *, 4> UseWorkList;
+ UseWorkList.push_back(Inst);
+ BasicBlock::iterator InsertPt = I;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(InsertPt))
+ InsertPt = II->getNormalDest()->begin();
+ else
+ ++InsertPt;
+ while (isa<PHINode>(InsertPt)) ++InsertPt;
+ do {
+ Instruction *Use = UseWorkList.pop_back_val();
+ Use->moveBefore(InsertPt);
+ NumPredsLeft.erase(Use);
+ for (Value::use_iterator IUI = Use->use_begin(),
+ IUE = Use->use_end(); IUI != IUE; ++IUI) {
+ Instruction *IUIInst = cast<Instruction>(IUI);
+ if (L->contains(IUIInst->getParent()) &&
+ Rewriter.isInsertedInstruction(IUIInst) &&
+ !isa<PHINode>(IUIInst))
+ UseWorkList.push_back(IUIInst);
+ }
+ } while (!UseWorkList.empty());
+ }
+ }
+ }
+ } while (!Worklist.empty());
+}
+
+/// Return true if it is OK to use SIToFPInst for an inducation variable
+/// with given inital and exit values.
+static bool useSIToFPInst(ConstantFP &InitV, ConstantFP &ExitV,
+ uint64_t intIV, uint64_t intEV) {
+
+ if (InitV.getValueAPF().isNegative() || ExitV.getValueAPF().isNegative())
+ return true;
+
+ // If the iteration range can be handled by SIToFPInst then use it.
+ APInt Max = APInt::getSignedMaxValue(32);
+ if (Max.getZExtValue() > static_cast<uint64_t>(abs64(intEV - intIV)))
+ return true;
+
+ return false;
+}
+
+/// convertToInt - Convert APF to an integer, if possible.
+static bool convertToInt(const APFloat &APF, uint64_t *intVal) {
+
+ bool isExact = false;
+ if (&APF.getSemantics() == &APFloat::PPCDoubleDouble)
+ return false;
+ if (APF.convertToInteger(intVal, 32, APF.isNegative(),
+ APFloat::rmTowardZero, &isExact)
+ != APFloat::opOK)
+ return false;
+ if (!isExact)
+ return false;
+ return true;
+
+}
+
+/// HandleFloatingPointIV - If the loop has floating induction variable
+/// then insert corresponding integer induction variable if possible.
+/// For example,
+/// for(double i = 0; i < 10000; ++i)
+/// bar(i)
+/// is converted into
+/// for(int i = 0; i < 10000; ++i)
+/// bar((double)i);
+///
+void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
+
+ unsigned IncomingEdge = L->contains(PH->getIncomingBlock(0));
+ unsigned BackEdge = IncomingEdge^1;
+
+ // Check incoming value.
+ ConstantFP *InitValue = dyn_cast<ConstantFP>(PH->getIncomingValue(IncomingEdge));
+ if (!InitValue) return;
+ uint64_t newInitValue = Type::Int32Ty->getPrimitiveSizeInBits();
+ if (!convertToInt(InitValue->getValueAPF(), &newInitValue))
+ return;
+
+ // Check IV increment. Reject this PH if increement operation is not
+ // an add or increment value can not be represented by an integer.
+ BinaryOperator *Incr =
+ dyn_cast<BinaryOperator>(PH->getIncomingValue(BackEdge));
+ if (!Incr) return;
+ if (Incr->getOpcode() != Instruction::Add) return;
+ ConstantFP *IncrValue = NULL;
+ unsigned IncrVIndex = 1;
+ if (Incr->getOperand(1) == PH)
+ IncrVIndex = 0;
+ IncrValue = dyn_cast<ConstantFP>(Incr->getOperand(IncrVIndex));
+ if (!IncrValue) return;
+ uint64_t newIncrValue = Type::Int32Ty->getPrimitiveSizeInBits();
+ if (!convertToInt(IncrValue->getValueAPF(), &newIncrValue))
+ return;
+
+ // Check Incr uses. One user is PH and the other users is exit condition used
+ // by the conditional terminator.
+ Value::use_iterator IncrUse = Incr->use_begin();
+ Instruction *U1 = cast<Instruction>(IncrUse++);
+ if (IncrUse == Incr->use_end()) return;
+ Instruction *U2 = cast<Instruction>(IncrUse++);
+ if (IncrUse != Incr->use_end()) return;
+
+ // Find exit condition.
+ FCmpInst *EC = dyn_cast<FCmpInst>(U1);
+ if (!EC)
+ EC = dyn_cast<FCmpInst>(U2);
+ if (!EC) return;
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(EC->getParent()->getTerminator())) {
+ if (!BI->isConditional()) return;
+ if (BI->getCondition() != EC) return;
+ }
+
+ // Find exit value. If exit value can not be represented as an interger then
+ // do not handle this floating point PH.
+ ConstantFP *EV = NULL;
+ unsigned EVIndex = 1;
+ if (EC->getOperand(1) == Incr)
+ EVIndex = 0;
+ EV = dyn_cast<ConstantFP>(EC->getOperand(EVIndex));
+ if (!EV) return;
+ uint64_t intEV = Type::Int32Ty->getPrimitiveSizeInBits();
+ if (!convertToInt(EV->getValueAPF(), &intEV))
+ return;
+
+ // Find new predicate for integer comparison.
+ CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
+ switch (EC->getPredicate()) {
+ case CmpInst::FCMP_OEQ:
+ case CmpInst::FCMP_UEQ:
+ NewPred = CmpInst::ICMP_EQ;
+ break;
+ case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_UGT:
+ NewPred = CmpInst::ICMP_UGT;
+ break;
+ case CmpInst::FCMP_OGE:
+ case CmpInst::FCMP_UGE:
+ NewPred = CmpInst::ICMP_UGE;
+ break;
+ case CmpInst::FCMP_OLT:
+ case CmpInst::FCMP_ULT:
+ NewPred = CmpInst::ICMP_ULT;
+ break;
+ case CmpInst::FCMP_OLE:
+ case CmpInst::FCMP_ULE:
+ NewPred = CmpInst::ICMP_ULE;
+ break;
+ default:
+ break;
+ }
+ if (NewPred == CmpInst::BAD_ICMP_PREDICATE) return;
+
+ // Insert new integer induction variable.
+ PHINode *NewPHI = PHINode::Create(Type::Int32Ty,
+ PH->getName()+".int", PH);
+ NewPHI->addIncoming(ConstantInt::get(Type::Int32Ty, newInitValue),
+ PH->getIncomingBlock(IncomingEdge));
+
+ Value *NewAdd = BinaryOperator::CreateAdd(NewPHI,
+ ConstantInt::get(Type::Int32Ty,
+ newIncrValue),
+ Incr->getName()+".int", Incr);
+ NewPHI->addIncoming(NewAdd, PH->getIncomingBlock(BackEdge));
+
+ // The back edge is edge 1 of newPHI, whatever it may have been in the
+ // original PHI.
+ ConstantInt *NewEV = ConstantInt::get(Type::Int32Ty, intEV);
+ Value *LHS = (EVIndex == 1 ? NewPHI->getIncomingValue(1) : NewEV);
+ Value *RHS = (EVIndex == 1 ? NewEV : NewPHI->getIncomingValue(1));
+ ICmpInst *NewEC = new ICmpInst(NewPred, LHS, RHS, EC->getNameStart(),
+ EC->getParent()->getTerminator());
+
+ // In the following deltions, PH may become dead and may be deleted.
+ // Use a WeakVH to observe whether this happens.
+ WeakVH WeakPH = PH;
+
+ // Delete old, floating point, exit comparision instruction.
+ NewEC->takeName(EC);
+ EC->replaceAllUsesWith(NewEC);
+ RecursivelyDeleteTriviallyDeadInstructions(EC);
+
+ // Delete old, floating point, increment instruction.
+ Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
+ RecursivelyDeleteTriviallyDeadInstructions(Incr);
+
+ // Replace floating induction variable, if it isn't already deleted.
+ // Give SIToFPInst preference over UIToFPInst because it is faster on
+ // platforms that are widely used.
+ if (WeakPH && !PH->use_empty()) {
+ if (useSIToFPInst(*InitValue, *EV, newInitValue, intEV)) {
+ SIToFPInst *Conv = new SIToFPInst(NewPHI, PH->getType(), "indvar.conv",
+ PH->getParent()->getFirstNonPHI());
+ PH->replaceAllUsesWith(Conv);
+ } else {
+ UIToFPInst *Conv = new UIToFPInst(NewPHI, PH->getType(), "indvar.conv",
+ PH->getParent()->getFirstNonPHI());
+ PH->replaceAllUsesWith(Conv);
+ }
+ RecursivelyDeleteTriviallyDeadInstructions(PH);
+ }
+
+ // Add a new IVUsers entry for the newly-created integer PHI.
+ IU->AddUsersIfInteresting(NewPHI);
+}
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
new file mode 100644
index 0000000..e6f854f
--- /dev/null
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -0,0 +1,12919 @@
+//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// InstructionCombining - Combine instructions to form fewer, simple
+// instructions. This pass does not modify the CFG. This pass is where
+// algebraic simplification happens.
+//
+// This pass combines things like:
+// %Y = add i32 %X, 1
+// %Z = add i32 %Y, 1
+// into:
+// %Z = add i32 %X, 2
+//
+// This is a simple worklist driven algorithm.
+//
+// This pass guarantees that the following canonicalizations are performed on
+// the program:
+// 1. If a binary operator has a constant operand, it is moved to the RHS
+// 2. Bitwise operators with constant operands are always grouped so that
+// shifts are performed first, then or's, then and's, then xor's.
+// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
+// 4. All cmp instructions on boolean values are replaced with logical ops
+// 5. add X, X is represented as (X*2) => (X << 1)
+// 6. Multiplies with a power-of-two constant argument are transformed into
+// shifts.
+// ... etc.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <climits>
+#include <sstream>
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+STATISTIC(NumCombined , "Number of insts combined");
+STATISTIC(NumConstProp, "Number of constant folds");
+STATISTIC(NumDeadInst , "Number of dead inst eliminated");
+STATISTIC(NumDeadStore, "Number of dead stores eliminated");
+STATISTIC(NumSunkInst , "Number of instructions sunk");
+
+namespace {
+ class VISIBILITY_HIDDEN InstCombiner
+ : public FunctionPass,
+ public InstVisitor<InstCombiner, Instruction*> {
+ // Worklist of all of the instructions that need to be simplified.
+ SmallVector<Instruction*, 256> Worklist;
+ DenseMap<Instruction*, unsigned> WorklistMap;
+ TargetData *TD;
+ bool MustPreserveLCSSA;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ InstCombiner() : FunctionPass(&ID) {}
+
+ /// AddToWorkList - Add the specified instruction to the worklist if it
+ /// isn't already in it.
+ void AddToWorkList(Instruction *I) {
+ if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second)
+ Worklist.push_back(I);
+ }
+
+ // RemoveFromWorkList - remove I from the worklist if it exists.
+ void RemoveFromWorkList(Instruction *I) {
+ DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I);
+ if (It == WorklistMap.end()) return; // Not in worklist.
+
+ // Don't bother moving everything down, just null out the slot.
+ Worklist[It->second] = 0;
+
+ WorklistMap.erase(It);
+ }
+
+ Instruction *RemoveOneFromWorkList() {
+ Instruction *I = Worklist.back();
+ Worklist.pop_back();
+ WorklistMap.erase(I);
+ return I;
+ }
+
+
+ /// AddUsersToWorkList - When an instruction is simplified, add all users of
+ /// the instruction to the work lists because they might get more simplified
+ /// now.
+ ///
+ void AddUsersToWorkList(Value &I) {
+ for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
+ UI != UE; ++UI)
+ AddToWorkList(cast<Instruction>(*UI));
+ }
+
+ /// AddUsesToWorkList - When an instruction is simplified, add operands to
+ /// the work lists because they might get more simplified now.
+ ///
+ void AddUsesToWorkList(Instruction &I) {
+ for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i)
+ if (Instruction *Op = dyn_cast<Instruction>(*i))
+ AddToWorkList(Op);
+ }
+
+ /// AddSoonDeadInstToWorklist - The specified instruction is about to become
+ /// dead. Add all of its operands to the worklist, turning them into
+ /// undef's to reduce the number of uses of those instructions.
+ ///
+ /// Return the specified operand before it is turned into an undef.
+ ///
+ Value *AddSoonDeadInstToWorklist(Instruction &I, unsigned op) {
+ Value *R = I.getOperand(op);
+
+ for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i)
+ if (Instruction *Op = dyn_cast<Instruction>(*i)) {
+ AddToWorkList(Op);
+ // Set the operand to undef to drop the use.
+ *i = UndefValue::get(Op->getType());
+ }
+
+ return R;
+ }
+
+ public:
+ virtual bool runOnFunction(Function &F);
+
+ bool DoOneIteration(Function &F, unsigned ItNum);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetData>();
+ AU.addPreservedID(LCSSAID);
+ AU.setPreservesCFG();
+ }
+
+ TargetData &getTargetData() const { return *TD; }
+
+ // Visitation implementation - Implement instruction combining for different
+ // instruction types. The semantics are as follows:
+ // Return Value:
+ // null - No change was made
+ // I - Change was made, I is still valid, I may be dead though
+ // otherwise - Change was made, replace I with returned instruction
+ //
+ Instruction *visitAdd(BinaryOperator &I);
+ Instruction *visitSub(BinaryOperator &I);
+ Instruction *visitMul(BinaryOperator &I);
+ Instruction *visitURem(BinaryOperator &I);
+ Instruction *visitSRem(BinaryOperator &I);
+ Instruction *visitFRem(BinaryOperator &I);
+ bool SimplifyDivRemOfSelect(BinaryOperator &I);
+ Instruction *commonRemTransforms(BinaryOperator &I);
+ Instruction *commonIRemTransforms(BinaryOperator &I);
+ Instruction *commonDivTransforms(BinaryOperator &I);
+ Instruction *commonIDivTransforms(BinaryOperator &I);
+ Instruction *visitUDiv(BinaryOperator &I);
+ Instruction *visitSDiv(BinaryOperator &I);
+ Instruction *visitFDiv(BinaryOperator &I);
+ Instruction *FoldAndOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS);
+ Instruction *visitAnd(BinaryOperator &I);
+ Instruction *FoldOrOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS);
+ Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
+ Value *A, Value *B, Value *C);
+ Instruction *visitOr (BinaryOperator &I);
+ Instruction *visitXor(BinaryOperator &I);
+ Instruction *visitShl(BinaryOperator &I);
+ Instruction *visitAShr(BinaryOperator &I);
+ Instruction *visitLShr(BinaryOperator &I);
+ Instruction *commonShiftTransforms(BinaryOperator &I);
+ Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI,
+ Constant *RHSC);
+ Instruction *visitFCmpInst(FCmpInst &I);
+ Instruction *visitICmpInst(ICmpInst &I);
+ Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI);
+ Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
+ Instruction *LHS,
+ ConstantInt *RHS);
+ Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
+ ConstantInt *DivRHS);
+
+ Instruction *FoldGEPICmp(User *GEPLHS, Value *RHS,
+ ICmpInst::Predicate Cond, Instruction &I);
+ Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
+ BinaryOperator &I);
+ Instruction *commonCastTransforms(CastInst &CI);
+ Instruction *commonIntCastTransforms(CastInst &CI);
+ Instruction *commonPointerCastTransforms(CastInst &CI);
+ Instruction *visitTrunc(TruncInst &CI);
+ Instruction *visitZExt(ZExtInst &CI);
+ Instruction *visitSExt(SExtInst &CI);
+ Instruction *visitFPTrunc(FPTruncInst &CI);
+ Instruction *visitFPExt(CastInst &CI);
+ Instruction *visitFPToUI(FPToUIInst &FI);
+ Instruction *visitFPToSI(FPToSIInst &FI);
+ Instruction *visitUIToFP(CastInst &CI);
+ Instruction *visitSIToFP(CastInst &CI);
+ Instruction *visitPtrToInt(PtrToIntInst &CI);
+ Instruction *visitIntToPtr(IntToPtrInst &CI);
+ Instruction *visitBitCast(BitCastInst &CI);
+ Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI,
+ Instruction *FI);
+ Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*);
+ Instruction *visitSelectInst(SelectInst &SI);
+ Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
+ Instruction *visitCallInst(CallInst &CI);
+ Instruction *visitInvokeInst(InvokeInst &II);
+ Instruction *visitPHINode(PHINode &PN);
+ Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP);
+ Instruction *visitAllocationInst(AllocationInst &AI);
+ Instruction *visitFreeInst(FreeInst &FI);
+ Instruction *visitLoadInst(LoadInst &LI);
+ Instruction *visitStoreInst(StoreInst &SI);
+ Instruction *visitBranchInst(BranchInst &BI);
+ Instruction *visitSwitchInst(SwitchInst &SI);
+ Instruction *visitInsertElementInst(InsertElementInst &IE);
+ Instruction *visitExtractElementInst(ExtractElementInst &EI);
+ Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI);
+ Instruction *visitExtractValueInst(ExtractValueInst &EV);
+
+ // visitInstruction - Specify what to return for unhandled instructions...
+ Instruction *visitInstruction(Instruction &I) { return 0; }
+
+ private:
+ Instruction *visitCallSite(CallSite CS);
+ bool transformConstExprCastCall(CallSite CS);
+ Instruction *transformCallThroughTrampoline(CallSite CS);
+ Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
+ bool DoXform = true);
+ bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
+ DbgDeclareInst *hasOneUsePlusDeclare(Value *V);
+
+
+ public:
+ // InsertNewInstBefore - insert an instruction New before instruction Old
+ // in the program. Add the new instruction to the worklist.
+ //
+ Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) {
+ assert(New && New->getParent() == 0 &&
+ "New instruction already inserted into a basic block!");
+ BasicBlock *BB = Old.getParent();
+ BB->getInstList().insert(&Old, New); // Insert inst
+ AddToWorkList(New);
+ return New;
+ }
+
+ /// InsertCastBefore - Insert a cast of V to TY before the instruction POS.
+ /// This also adds the cast to the worklist. Finally, this returns the
+ /// cast.
+ Value *InsertCastBefore(Instruction::CastOps opc, Value *V, const Type *Ty,
+ Instruction &Pos) {
+ if (V->getType() == Ty) return V;
+
+ if (Constant *CV = dyn_cast<Constant>(V))
+ return ConstantExpr::getCast(opc, CV, Ty);
+
+ Instruction *C = CastInst::Create(opc, V, Ty, V->getName(), &Pos);
+ AddToWorkList(C);
+ return C;
+ }
+
+ Value *InsertBitCastBefore(Value *V, const Type *Ty, Instruction &Pos) {
+ return InsertCastBefore(Instruction::BitCast, V, Ty, Pos);
+ }
+
+
+ // ReplaceInstUsesWith - This method is to be used when an instruction is
+ // found to be dead, replacable with another preexisting expression. Here
+ // we add all uses of I to the worklist, replace all uses of I with the new
+ // value, then return I, so that the inst combiner will know that I was
+ // modified.
+ //
+ Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
+ AddUsersToWorkList(I); // Add all modified instrs to worklist
+ if (&I != V) {
+ I.replaceAllUsesWith(V);
+ return &I;
+ } else {
+ // If we are replacing the instruction with itself, this must be in a
+ // segment of unreachable code, so just clobber the instruction.
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ return &I;
+ }
+ }
+
+ // EraseInstFromFunction - When dealing with an instruction that has side
+ // effects or produces a void value, we can't rely on DCE to delete the
+ // instruction. Instead, visit methods should return the value returned by
+ // this function.
+ Instruction *EraseInstFromFunction(Instruction &I) {
+ assert(I.use_empty() && "Cannot erase instruction that is used!");
+ AddUsesToWorkList(I);
+ RemoveFromWorkList(&I);
+ I.eraseFromParent();
+ return 0; // Don't do anything with FI
+ }
+
+ void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero,
+ APInt &KnownOne, unsigned Depth = 0) const {
+ return llvm::ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+ }
+
+ bool MaskedValueIsZero(Value *V, const APInt &Mask,
+ unsigned Depth = 0) const {
+ return llvm::MaskedValueIsZero(V, Mask, TD, Depth);
+ }
+ unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0) const {
+ return llvm::ComputeNumSignBits(Op, TD, Depth);
+ }
+
+ private:
+
+ /// SimplifyCommutative - This performs a few simplifications for
+ /// commutative operators.
+ bool SimplifyCommutative(BinaryOperator &I);
+
+ /// SimplifyCompare - This reorders the operands of a CmpInst to get them in
+ /// most-complex to least-complex order.
+ bool SimplifyCompare(CmpInst &I);
+
+ /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
+ /// based on the demanded bits.
+ Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
+ APInt& KnownZero, APInt& KnownOne,
+ unsigned Depth);
+ bool SimplifyDemandedBits(Use &U, APInt DemandedMask,
+ APInt& KnownZero, APInt& KnownOne,
+ unsigned Depth=0);
+
+ /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
+ /// SimplifyDemandedBits knows about. See if the instruction has any
+ /// properties that allow us to simplify its operands.
+ bool SimplifyDemandedInstructionBits(Instruction &Inst);
+
+ Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
+ APInt& UndefElts, unsigned Depth = 0);
+
+ // FoldOpIntoPhi - Given a binary operator or cast instruction which has a
+ // PHI node as operand #0, see if we can fold the instruction into the PHI
+ // (which is only possible if all operands to the PHI are constants).
+ Instruction *FoldOpIntoPhi(Instruction &I);
+
+ // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
+ // operator and they all are only used by the PHI, PHI together their
+ // inputs, and do the operation once, to the result of the PHI.
+ Instruction *FoldPHIArgOpIntoPHI(PHINode &PN);
+ Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN);
+ Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN);
+
+
+ Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS,
+ ConstantInt *AndRHS, BinaryOperator &TheAnd);
+
+ Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask,
+ bool isSub, Instruction &I);
+ Instruction *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
+ bool isSigned, bool Inside, Instruction &IB);
+ Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocationInst &AI);
+ Instruction *MatchBSwap(BinaryOperator &I);
+ bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
+ Instruction *SimplifyMemTransfer(MemIntrinsic *MI);
+ Instruction *SimplifyMemSet(MemSetInst *MI);
+
+
+ Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned);
+
+ bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
+ unsigned CastOpc, int &NumCastsRemoved);
+ unsigned GetOrEnforceKnownAlignment(Value *V,
+ unsigned PrefAlign = 0);
+
+ };
+}
+
+char InstCombiner::ID = 0;
+static RegisterPass<InstCombiner>
+X("instcombine", "Combine redundant instructions");
+
+// getComplexity: Assign a complexity or rank value to LLVM Values...
+// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst
+static unsigned getComplexity(Value *V) {
+ if (isa<Instruction>(V)) {
+ if (BinaryOperator::isNeg(V) || BinaryOperator::isNot(V))
+ return 3;
+ return 4;
+ }
+ if (isa<Argument>(V)) return 3;
+ return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2;
+}
+
+// isOnlyUse - Return true if this instruction will be deleted if we stop using
+// it.
+static bool isOnlyUse(Value *V) {
+ return V->hasOneUse() || isa<Constant>(V);
+}
+
+// getPromotedType - Return the specified type promoted as it would be to pass
+// though a va_arg area...
+static const Type *getPromotedType(const Type *Ty) {
+ if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
+ if (ITy->getBitWidth() < 32)
+ return Type::Int32Ty;
+ }
+ return Ty;
+}
+
+/// getBitCastOperand - If the specified operand is a CastInst, a constant
+/// expression bitcast, or a GetElementPtrInst with all zero indices, return the
+/// operand value, otherwise return null.
+static Value *getBitCastOperand(Value *V) {
+ if (BitCastInst *I = dyn_cast<BitCastInst>(V))
+ // BitCastInst?
+ return I->getOperand(0);
+ else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
+ // GetElementPtrInst?
+ if (GEP->hasAllZeroIndices())
+ return GEP->getOperand(0);
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (CE->getOpcode() == Instruction::BitCast)
+ // BitCast ConstantExp?
+ return CE->getOperand(0);
+ else if (CE->getOpcode() == Instruction::GetElementPtr) {
+ // GetElementPtr ConstantExp?
+ for (User::op_iterator I = CE->op_begin() + 1, E = CE->op_end();
+ I != E; ++I) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(I);
+ if (!CI || !CI->isZero())
+ // Any non-zero indices? Not cast-like.
+ return 0;
+ }
+ // All-zero indices? This is just like casting.
+ return CE->getOperand(0);
+ }
+ }
+ return 0;
+}
+
+/// This function is a wrapper around CastInst::isEliminableCastPair. It
+/// simply extracts arguments and returns what that function returns.
+static Instruction::CastOps
+isEliminableCastPair(
+ const CastInst *CI, ///< The first cast instruction
+ unsigned opcode, ///< The opcode of the second cast instruction
+ const Type *DstTy, ///< The target type for the second cast instruction
+ TargetData *TD ///< The target data for pointer size
+) {
+
+ const Type *SrcTy = CI->getOperand(0)->getType(); // A from above
+ const Type *MidTy = CI->getType(); // B from above
+
+ // Get the opcodes of the two Cast instructions
+ Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
+ Instruction::CastOps secondOp = Instruction::CastOps(opcode);
+
+ unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
+ DstTy, TD->getIntPtrType());
+
+ // We don't want to form an inttoptr or ptrtoint that converts to an integer
+ // type that differs from the pointer size.
+ if ((Res == Instruction::IntToPtr && SrcTy != TD->getIntPtrType()) ||
+ (Res == Instruction::PtrToInt && DstTy != TD->getIntPtrType()))
+ Res = 0;
+
+ return Instruction::CastOps(Res);
+}
+
+/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results
+/// in any code being generated. It does not require codegen if V is simple
+/// enough or if the cast can be folded into other casts.
+static bool ValueRequiresCast(Instruction::CastOps opcode, const Value *V,
+ const Type *Ty, TargetData *TD) {
+ if (V->getType() == Ty || isa<Constant>(V)) return false;
+
+ // If this is another cast that can be eliminated, it isn't codegen either.
+ if (const CastInst *CI = dyn_cast<CastInst>(V))
+ if (isEliminableCastPair(CI, opcode, Ty, TD))
+ return false;
+ return true;
+}
+
+// SimplifyCommutative - This performs a few simplifications for commutative
+// operators:
+//
+// 1. Order operands such that they are listed from right (least complex) to
+// left (most complex). This puts constants before unary operators before
+// binary operators.
+//
+// 2. Transform: (op (op V, C1), C2) ==> (op V, (op C1, C2))
+// 3. Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
+//
+bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {
+ bool Changed = false;
+ if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1)))
+ Changed = !I.swapOperands();
+
+ if (!I.isAssociative()) return Changed;
+ Instruction::BinaryOps Opcode = I.getOpcode();
+ if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0)))
+ if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) {
+ if (isa<Constant>(I.getOperand(1))) {
+ Constant *Folded = ConstantExpr::get(I.getOpcode(),
+ cast<Constant>(I.getOperand(1)),
+ cast<Constant>(Op->getOperand(1)));
+ I.setOperand(0, Op->getOperand(0));
+ I.setOperand(1, Folded);
+ return true;
+ } else if (BinaryOperator *Op1=dyn_cast<BinaryOperator>(I.getOperand(1)))
+ if (Op1->getOpcode() == Opcode && isa<Constant>(Op1->getOperand(1)) &&
+ isOnlyUse(Op) && isOnlyUse(Op1)) {
+ Constant *C1 = cast<Constant>(Op->getOperand(1));
+ Constant *C2 = cast<Constant>(Op1->getOperand(1));
+
+ // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
+ Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2);
+ Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0),
+ Op1->getOperand(0),
+ Op1->getName(), &I);
+ AddToWorkList(New);
+ I.setOperand(0, New);
+ I.setOperand(1, Folded);
+ return true;
+ }
+ }
+ return Changed;
+}
+
+/// SimplifyCompare - For a CmpInst this function just orders the operands
+/// so that theyare listed from right (least complex) to left (most complex).
+/// This puts constants before unary operators before binary operators.
+bool InstCombiner::SimplifyCompare(CmpInst &I) {
+ if (getComplexity(I.getOperand(0)) >= getComplexity(I.getOperand(1)))
+ return false;
+ I.swapOperands();
+ // Compare instructions are not associative so there's nothing else we can do.
+ return true;
+}
+
+// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
+// if the LHS is a constant zero (which is the 'negate' form).
+//
+static inline Value *dyn_castNegVal(Value *V) {
+ if (BinaryOperator::isNeg(V))
+ return BinaryOperator::getNegArgument(V);
+
+ // Constants can be considered to be negated values if they can be folded.
+ if (ConstantInt *C = dyn_cast<ConstantInt>(V))
+ return ConstantExpr::getNeg(C);
+
+ if (ConstantVector *C = dyn_cast<ConstantVector>(V))
+ if (C->getType()->getElementType()->isInteger())
+ return ConstantExpr::getNeg(C);
+
+ return 0;
+}
+
+static inline Value *dyn_castNotVal(Value *V) {
+ if (BinaryOperator::isNot(V))
+ return BinaryOperator::getNotArgument(V);
+
+ // Constants can be considered to be not'ed values...
+ if (ConstantInt *C = dyn_cast<ConstantInt>(V))
+ return ConstantInt::get(~C->getValue());
+ return 0;
+}
+
+// dyn_castFoldableMul - If this value is a multiply that can be folded into
+// other computations (because it has a constant operand), return the
+// non-constant operand of the multiply, and set CST to point to the multiplier.
+// Otherwise, return null.
+//
+static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
+ if (V->hasOneUse() && V->getType()->isInteger())
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (I->getOpcode() == Instruction::Mul)
+ if ((CST = dyn_cast<ConstantInt>(I->getOperand(1))))
+ return I->getOperand(0);
+ if (I->getOpcode() == Instruction::Shl)
+ if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) {
+ // The multiplier is really 1 << CST.
+ uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+ uint32_t CSTVal = CST->getLimitedValue(BitWidth);
+ CST = ConstantInt::get(APInt(BitWidth, 1).shl(CSTVal));
+ return I->getOperand(0);
+ }
+ }
+ return 0;
+}
+
+/// dyn_castGetElementPtr - If this is a getelementptr instruction or constant
+/// expression, return it.
+static User *dyn_castGetElementPtr(Value *V) {
+ if (isa<GetElementPtrInst>(V)) return cast<User>(V);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::GetElementPtr)
+ return cast<User>(V);
+ return false;
+}
+
+/// getOpcode - If this is an Instruction or a ConstantExpr, return the
+/// opcode value. Otherwise return UserOp1.
+static unsigned getOpcode(const Value *V) {
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ return I->getOpcode();
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ return CE->getOpcode();
+ // Use UserOp1 to mean there's no opcode.
+ return Instruction::UserOp1;
+}
+
+/// AddOne - Add one to a ConstantInt
+static ConstantInt *AddOne(ConstantInt *C) {
+ APInt Val(C->getValue());
+ return ConstantInt::get(++Val);
+}
+/// SubOne - Subtract one from a ConstantInt
+static ConstantInt *SubOne(ConstantInt *C) {
+ APInt Val(C->getValue());
+ return ConstantInt::get(--Val);
+}
+/// Add - Add two ConstantInts together
+static ConstantInt *Add(ConstantInt *C1, ConstantInt *C2) {
+ return ConstantInt::get(C1->getValue() + C2->getValue());
+}
+/// And - Bitwise AND two ConstantInts together
+static ConstantInt *And(ConstantInt *C1, ConstantInt *C2) {
+ return ConstantInt::get(C1->getValue() & C2->getValue());
+}
+/// Subtract - Subtract one ConstantInt from another
+static ConstantInt *Subtract(ConstantInt *C1, ConstantInt *C2) {
+ return ConstantInt::get(C1->getValue() - C2->getValue());
+}
+/// Multiply - Multiply two ConstantInts together
+static ConstantInt *Multiply(ConstantInt *C1, ConstantInt *C2) {
+ return ConstantInt::get(C1->getValue() * C2->getValue());
+}
+/// MultiplyOverflows - True if the multiply can not be expressed in an int
+/// this size.
+static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
+ uint32_t W = C1->getBitWidth();
+ APInt LHSExt = C1->getValue(), RHSExt = C2->getValue();
+ if (sign) {
+ LHSExt.sext(W * 2);
+ RHSExt.sext(W * 2);
+ } else {
+ LHSExt.zext(W * 2);
+ RHSExt.zext(W * 2);
+ }
+
+ APInt MulExt = LHSExt * RHSExt;
+
+ if (sign) {
+ APInt Min = APInt::getSignedMinValue(W).sext(W * 2);
+ APInt Max = APInt::getSignedMaxValue(W).sext(W * 2);
+ return MulExt.slt(Min) || MulExt.sgt(Max);
+ } else
+ return MulExt.ugt(APInt::getLowBitsSet(W * 2, W));
+}
+
+
+/// ShrinkDemandedConstant - Check to see if the specified operand of the
+/// specified instruction is a constant integer. If so, check to see if there
+/// are any bits set in the constant that are not demanded. If so, shrink the
+/// constant and return true.
+static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
+ APInt Demanded) {
+ assert(I && "No instruction?");
+ assert(OpNo < I->getNumOperands() && "Operand index too large");
+
+ // If the operand is not a constant integer, nothing to do.
+ ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo));
+ if (!OpC) return false;
+
+ // If there are no bits set that aren't demanded, nothing to do.
+ Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
+ if ((~Demanded & OpC->getValue()) == 0)
+ return false;
+
+ // This instruction is producing bits that are not demanded. Shrink the RHS.
+ Demanded &= OpC->getValue();
+ I->setOperand(OpNo, ConstantInt::get(Demanded));
+ return true;
+}
+
+// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a
+// set of known zero and one bits, compute the maximum and minimum values that
+// could have the specified known zero and known one bits, returning them in
+// min/max.
+static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero,
+ const APInt& KnownOne,
+ APInt& Min, APInt& Max) {
+ assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
+ KnownZero.getBitWidth() == Min.getBitWidth() &&
+ KnownZero.getBitWidth() == Max.getBitWidth() &&
+ "KnownZero, KnownOne and Min, Max must have equal bitwidth.");
+ APInt UnknownBits = ~(KnownZero|KnownOne);
+
+ // The minimum value is when all unknown bits are zeros, EXCEPT for the sign
+ // bit if it is unknown.
+ Min = KnownOne;
+ Max = KnownOne|UnknownBits;
+
+ if (UnknownBits.isNegative()) { // Sign bit is unknown
+ Min.set(Min.getBitWidth()-1);
+ Max.clear(Max.getBitWidth()-1);
+ }
+}
+
+// ComputeUnsignedMinMaxValuesFromKnownBits - Given an unsigned integer type and
+// a set of known zero and one bits, compute the maximum and minimum values that
+// could have the specified known zero and known one bits, returning them in
+// min/max.
+static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
+ const APInt &KnownOne,
+ APInt &Min, APInt &Max) {
+ assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
+ KnownZero.getBitWidth() == Min.getBitWidth() &&
+ KnownZero.getBitWidth() == Max.getBitWidth() &&
+ "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.");
+ APInt UnknownBits = ~(KnownZero|KnownOne);
+
+ // The minimum value is when the unknown bits are all zeros.
+ Min = KnownOne;
+ // The maximum value is when the unknown bits are all ones.
+ Max = KnownOne|UnknownBits;
+}
+
+/// SimplifyDemandedInstructionBits - Inst is an integer instruction that
+/// SimplifyDemandedBits knows about. See if the instruction has any
+/// properties that allow us to simplify its operands.
+bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
+ unsigned BitWidth = cast<IntegerType>(Inst.getType())->getBitWidth();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
+
+ Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask,
+ KnownZero, KnownOne, 0);
+ if (V == 0) return false;
+ if (V == &Inst) return true;
+ ReplaceInstUsesWith(Inst, V);
+ return true;
+}
+
+/// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
+/// specified instruction operand if possible, updating it in place. It returns
+/// true if it made any change and false otherwise.
+bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
+ APInt &KnownZero, APInt &KnownOne,
+ unsigned Depth) {
+ Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
+ KnownZero, KnownOne, Depth);
+ if (NewVal == 0) return false;
+ U.set(NewVal);
+ return true;
+}
+
+
+/// SimplifyDemandedUseBits - This function attempts to replace V with a simpler
+/// value based on the demanded bits. When this function is called, it is known
+/// that only the bits set in DemandedMask of the result of V are ever used
+/// downstream. Consequently, depending on the mask and V, it may be possible
+/// to replace V with a constant or one of its operands. In such cases, this
+/// function does the replacement and returns true. In all other cases, it
+/// returns false after analyzing the expression and setting KnownOne and known
+/// to be one in the expression. KnownZero contains all the bits that are known
+/// to be zero in the expression. These are provided to potentially allow the
+/// caller (which might recursively be SimplifyDemandedBits itself) to simplify
+/// the expression. KnownOne and KnownZero always follow the invariant that
+/// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
+/// the bits in KnownOne and KnownZero may only be accurate for those bits set
+/// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
+/// and KnownOne must all be the same.
+///
+/// This returns null if it did not change anything and it permits no
+/// simplification. This returns V itself if it did some simplification of V's
+/// operands based on the information about what bits are demanded. This returns
+/// some other non-null value if it found out that V is equal to another value
+/// in the context where the specified bits are demanded, but not for all users.
+Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
+ APInt &KnownZero, APInt &KnownOne,
+ unsigned Depth) {
+ assert(V != 0 && "Null pointer of Value???");
+ assert(Depth <= 6 && "Limit Search Depth");
+ uint32_t BitWidth = DemandedMask.getBitWidth();
+ const Type *VTy = V->getType();
+ assert((TD || !isa<PointerType>(VTy)) &&
+ "SimplifyDemandedBits needs to know bit widths!");
+ assert((!TD || TD->getTypeSizeInBits(VTy) == BitWidth) &&
+ (!isa<IntegerType>(VTy) ||
+ VTy->getPrimitiveSizeInBits() == BitWidth) &&
+ KnownZero.getBitWidth() == BitWidth &&
+ KnownOne.getBitWidth() == BitWidth &&
+ "Value *V, DemandedMask, KnownZero and KnownOne \
+ must have same BitWidth");
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ // We know all of the bits for a constant!
+ KnownOne = CI->getValue() & DemandedMask;
+ KnownZero = ~KnownOne & DemandedMask;
+ return 0;
+ }
+ if (isa<ConstantPointerNull>(V)) {
+ // We know all of the bits for a constant!
+ KnownOne.clear();
+ KnownZero = DemandedMask;
+ return 0;
+ }
+
+ KnownZero.clear();
+ KnownOne.clear();
+ if (DemandedMask == 0) { // Not demanding any bits from V.
+ if (isa<UndefValue>(V))
+ return 0;
+ return UndefValue::get(VTy);
+ }
+
+ if (Depth == 6) // Limit search depth.
+ return 0;
+
+ APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+ APInt &RHSKnownZero = KnownZero, &RHSKnownOne = KnownOne;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) {
+ ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
+ return 0; // Only analyze instructions.
+ }
+
+ // If there are multiple uses of this value and we aren't at the root, then
+ // we can't do any simplifications of the operands, because DemandedMask
+ // only reflects the bits demanded by *one* of the users.
+ if (Depth != 0 && !I->hasOneUse()) {
+ // Despite the fact that we can't simplify this instruction in all User's
+ // context, we can at least compute the knownzero/knownone bits, and we can
+ // do simplifications that apply to *just* the one user if we know that
+ // this instruction has a simpler value in that context.
+ if (I->getOpcode() == Instruction::And) {
+ // If either the LHS or the RHS are Zero, the result is zero.
+ ComputeMaskedBits(I->getOperand(1), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownZero,
+ LHSKnownZero, LHSKnownOne, Depth+1);
+
+ // If all of the demanded bits are known 1 on one side, return the other.
+ // These bits cannot contribute to the result of the 'and' in this
+ // context.
+ if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
+ (DemandedMask & ~LHSKnownZero))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
+ (DemandedMask & ~RHSKnownZero))
+ return I->getOperand(1);
+
+ // If all of the demanded bits in the inputs are known zeros, return zero.
+ if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
+ return Constant::getNullValue(VTy);
+
+ } else if (I->getOpcode() == Instruction::Or) {
+ // We can simplify (X|Y) -> X or Y in the user's context if we know that
+ // only bits from X or Y are demanded.
+
+ // If either the LHS or the RHS are One, the result is One.
+ ComputeMaskedBits(I->getOperand(1), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownOne,
+ LHSKnownZero, LHSKnownOne, Depth+1);
+
+ // If all of the demanded bits are known zero on one side, return the
+ // other. These bits cannot contribute to the result of the 'or' in this
+ // context.
+ if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
+ (DemandedMask & ~LHSKnownOne))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
+ (DemandedMask & ~RHSKnownOne))
+ return I->getOperand(1);
+
+ // If all of the potentially set bits on one side are known to be set on
+ // the other side, just use the 'other' side.
+ if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
+ (DemandedMask & (~RHSKnownZero)))
+ return I->getOperand(0);
+ if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
+ (DemandedMask & (~LHSKnownZero)))
+ return I->getOperand(1);
+ }
+
+ // Compute the KnownZero/KnownOne bits to simplify things downstream.
+ ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth);
+ return 0;
+ }
+
+ // If this is the root being simplified, allow it to have multiple uses,
+ // just set the DemandedMask to all bits so that we can try to simplify the
+ // operands. This allows visitTruncInst (for example) to simplify the
+ // operand of a trunc without duplicating all the logic below.
+ if (Depth == 0 && !V->hasOneUse())
+ DemandedMask = APInt::getAllOnesValue(BitWidth);
+
+ switch (I->getOpcode()) {
+ default:
+ ComputeMaskedBits(I, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
+ break;
+ case Instruction::And:
+ // If either the LHS or the RHS are Zero, the result is zero.
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known 1 on one side, return the other.
+ // These bits cannot contribute to the result of the 'and'.
+ if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
+ (DemandedMask & ~LHSKnownZero))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
+ (DemandedMask & ~RHSKnownZero))
+ return I->getOperand(1);
+
+ // If all of the demanded bits in the inputs are known zeros, return zero.
+ if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
+ return Constant::getNullValue(VTy);
+
+ // If the RHS is a constant, see if we can simplify it.
+ if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
+ return I;
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ RHSKnownOne &= LHSKnownOne;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ RHSKnownZero |= LHSKnownZero;
+ break;
+ case Instruction::Or:
+ // If either the LHS or the RHS are One, the result is One.
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'or'.
+ if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
+ (DemandedMask & ~LHSKnownOne))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
+ (DemandedMask & ~RHSKnownOne))
+ return I->getOperand(1);
+
+ // If all of the potentially set bits on one side are known to be set on
+ // the other side, just use the 'other' side.
+ if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
+ (DemandedMask & (~RHSKnownZero)))
+ return I->getOperand(0);
+ if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
+ (DemandedMask & (~LHSKnownZero)))
+ return I->getOperand(1);
+
+ // If the RHS is a constant, see if we can simplify it.
+ if (ShrinkDemandedConstant(I, 1, DemandedMask))
+ return I;
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ RHSKnownZero &= LHSKnownZero;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ RHSKnownOne |= LHSKnownOne;
+ break;
+ case Instruction::Xor: {
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'xor'.
+ if ((DemandedMask & RHSKnownZero) == DemandedMask)
+ return I->getOperand(0);
+ if ((DemandedMask & LHSKnownZero) == DemandedMask)
+ return I->getOperand(1);
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ APInt KnownZeroOut = (RHSKnownZero & LHSKnownZero) |
+ (RHSKnownOne & LHSKnownOne);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ APInt KnownOneOut = (RHSKnownZero & LHSKnownOne) |
+ (RHSKnownOne & LHSKnownZero);
+
+ // If all of the demanded bits are known to be zero on one side or the
+ // other, turn this into an *inclusive* or.
+ // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+ if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
+ Instruction *Or =
+ BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
+ I->getName());
+ return InsertNewInstBefore(Or, *I);
+ }
+
+ // If all of the demanded bits on one side are known, and all of the set
+ // bits on that side are also known to be set on the other side, turn this
+ // into an AND, as we know the bits will be cleared.
+ // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+ if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
+ // all known
+ if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
+ Constant *AndC = ConstantInt::get(~RHSKnownOne & DemandedMask);
+ Instruction *And =
+ BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
+ return InsertNewInstBefore(And, *I);
+ }
+ }
+
+ // If the RHS is a constant, see if we can simplify it.
+ // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
+ if (ShrinkDemandedConstant(I, 1, DemandedMask))
+ return I;
+
+ RHSKnownZero = KnownZeroOut;
+ RHSKnownOne = KnownOneOut;
+ break;
+ }
+ case Instruction::Select:
+ if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
+ ShrinkDemandedConstant(I, 2, DemandedMask))
+ return I;
+
+ // Only known if known in both the LHS and RHS.
+ RHSKnownOne &= LHSKnownOne;
+ RHSKnownZero &= LHSKnownZero;
+ break;
+ case Instruction::Trunc: {
+ unsigned truncBf = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+ DemandedMask.zext(truncBf);
+ RHSKnownZero.zext(truncBf);
+ RHSKnownOne.zext(truncBf);
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1))
+ return I;
+ DemandedMask.trunc(BitWidth);
+ RHSKnownZero.trunc(BitWidth);
+ RHSKnownOne.trunc(BitWidth);
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ break;
+ }
+ case Instruction::BitCast:
+ if (!I->getOperand(0)->getType()->isInteger())
+ return false; // vector->int or fp->int?
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ break;
+ case Instruction::ZExt: {
+ // Compute the bits in the result that are not present in the input.
+ unsigned SrcBitWidth =I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+
+ DemandedMask.trunc(SrcBitWidth);
+ RHSKnownZero.trunc(SrcBitWidth);
+ RHSKnownOne.trunc(SrcBitWidth);
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1))
+ return I;
+ DemandedMask.zext(BitWidth);
+ RHSKnownZero.zext(BitWidth);
+ RHSKnownOne.zext(BitWidth);
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ // The top bits are known to be zero.
+ RHSKnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+ break;
+ }
+ case Instruction::SExt: {
+ // Compute the bits in the result that are not present in the input.
+ unsigned SrcBitWidth =I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+
+ APInt InputDemandedBits = DemandedMask &
+ APInt::getLowBitsSet(BitWidth, SrcBitWidth);
+
+ APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth));
+ // If any of the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ if ((NewBits & DemandedMask) != 0)
+ InputDemandedBits.set(SrcBitWidth-1);
+
+ InputDemandedBits.trunc(SrcBitWidth);
+ RHSKnownZero.trunc(SrcBitWidth);
+ RHSKnownOne.trunc(SrcBitWidth);
+ if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
+ RHSKnownZero, RHSKnownOne, Depth+1))
+ return I;
+ InputDemandedBits.zext(BitWidth);
+ RHSKnownZero.zext(BitWidth);
+ RHSKnownOne.zext(BitWidth);
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+
+ // If the input sign bit is known zero, or if the NewBits are not demanded
+ // convert this into a zero extension.
+ if (RHSKnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
+ // Convert to ZExt cast
+ CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
+ return InsertNewInstBefore(NewCast, *I);
+ } else if (RHSKnownOne[SrcBitWidth-1]) { // Input sign bit known set
+ RHSKnownOne |= NewBits;
+ }
+ break;
+ }
+ case Instruction::Add: {
+ // Figure out what the input bits are. If the top bits of the and result
+ // are not demanded, then the add doesn't demand them from its input
+ // either.
+ unsigned NLZ = DemandedMask.countLeadingZeros();
+
+ // If there is a constant on the RHS, there are a variety of xformations
+ // we can do.
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // If null, this should be simplified elsewhere. Some of the xforms here
+ // won't work if the RHS is zero.
+ if (RHS->isZero())
+ break;
+
+ // If the top bit of the output is demanded, demand everything from the
+ // input. Otherwise, we demand all the input bits except NLZ top bits.
+ APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ));
+
+ // Find information about known zero/one bits in the input.
+ if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+
+ // If the RHS of the add has bits set that can't affect the input, reduce
+ // the constant.
+ if (ShrinkDemandedConstant(I, 1, InDemandedBits))
+ return I;
+
+ // Avoid excess work.
+ if (LHSKnownZero == 0 && LHSKnownOne == 0)
+ break;
+
+ // Turn it into OR if input bits are zero.
+ if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
+ Instruction *Or =
+ BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
+ I->getName());
+ return InsertNewInstBefore(Or, *I);
+ }
+
+ // We can say something about the output known-zero and known-one bits,
+ // depending on potential carries from the input constant and the
+ // unknowns. For example if the LHS is known to have at most the 0x0F0F0
+ // bits set and the RHS constant is 0x01001, then we know we have a known
+ // one mask of 0x00001 and a known zero mask of 0xE0F0E.
+
+ // To compute this, we first compute the potential carry bits. These are
+ // the bits which may be modified. I'm not aware of a better way to do
+ // this scan.
+ const APInt &RHSVal = RHS->getValue();
+ APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal));
+
+ // Now that we know which bits have carries, compute the known-1/0 sets.
+
+ // Bits are known one if they are known zero in one operand and one in the
+ // other, and there is no input carry.
+ RHSKnownOne = ((LHSKnownZero & RHSVal) |
+ (LHSKnownOne & ~RHSVal)) & ~CarryBits;
+
+ // Bits are known zero if they are known zero in both operands and there
+ // is no input carry.
+ RHSKnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
+ } else {
+ // If the high-bits of this ADD are not demanded, then it does not demand
+ // the high bits of its LHS or RHS.
+ if (DemandedMask[BitWidth-1] == 0) {
+ // Right fill the mask of bits for this ADD to demand the most
+ // significant bit and all those below it.
+ APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
+ LHSKnownZero, LHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ }
+ }
+ break;
+ }
+ case Instruction::Sub:
+ // If the high-bits of this SUB are not demanded, then it does not demand
+ // the high bits of its LHS or RHS.
+ if (DemandedMask[BitWidth-1] == 0) {
+ // Right fill the mask of bits for this SUB to demand the most
+ // significant bit and all those below it.
+ uint32_t NLZ = DemandedMask.countLeadingZeros();
+ APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
+ LHSKnownZero, LHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ }
+ // Otherwise just hand the sub off to ComputeMaskedBits to fill in
+ // the known zeros and ones.
+ ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
+ break;
+ case Instruction::Shl:
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+ APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
+ RHSKnownZero, RHSKnownOne, Depth+1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ RHSKnownZero <<= ShiftAmt;
+ RHSKnownOne <<= ShiftAmt;
+ // low bits known zero.
+ if (ShiftAmt)
+ RHSKnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+ }
+ break;
+ case Instruction::LShr:
+ // For a logical shift right
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+
+ // Unsigned shift right.
+ APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
+ RHSKnownZero, RHSKnownOne, Depth+1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt);
+ RHSKnownOne = APIntOps::lshr(RHSKnownOne, ShiftAmt);
+ if (ShiftAmt) {
+ // Compute the new bits that are at the top now.
+ APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
+ RHSKnownZero |= HighBits; // high bits known zero.
+ }
+ }
+ break;
+ case Instruction::AShr:
+ // If this is an arithmetic shift right and only the low-bit is set, we can
+ // always convert this into a logical shr, even if the shift amount is
+ // variable. The low bit of the shift cannot be an input sign bit unless
+ // the shift amount is >= the size of the datatype, which is undefined.
+ if (DemandedMask == 1) {
+ // Perform the logical shift right.
+ Instruction *NewVal = BinaryOperator::CreateLShr(
+ I->getOperand(0), I->getOperand(1), I->getName());
+ return InsertNewInstBefore(NewVal, *I);
+ }
+
+ // If the sign bit is the only bit demanded by this ashr, then there is no
+ // need to do it, the shift doesn't change the high bit.
+ if (DemandedMask.isSignBit())
+ return I->getOperand(0);
+
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint32_t ShiftAmt = SA->getLimitedValue(BitWidth);
+
+ // Signed shift right.
+ APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
+ // If any of the "high bits" are demanded, we should set the sign bit as
+ // demanded.
+ if (DemandedMask.countLeadingZeros() <= ShiftAmt)
+ DemandedMaskIn.set(BitWidth-1);
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
+ RHSKnownZero, RHSKnownOne, Depth+1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ // Compute the new bits that are at the top now.
+ APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
+ RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt);
+ RHSKnownOne = APIntOps::lshr(RHSKnownOne, ShiftAmt);
+
+ // Handle the sign bits.
+ APInt SignBit(APInt::getSignBit(BitWidth));
+ // Adjust to where it is now in the mask.
+ SignBit = APIntOps::lshr(SignBit, ShiftAmt);
+
+ // If the input sign bit is known to be zero, or if none of the top bits
+ // are demanded, turn this into an unsigned shift right.
+ if (BitWidth <= ShiftAmt || RHSKnownZero[BitWidth-ShiftAmt-1] ||
+ (HighBits & ~DemandedMask) == HighBits) {
+ // Perform the logical shift right.
+ Instruction *NewVal = BinaryOperator::CreateLShr(
+ I->getOperand(0), SA, I->getName());
+ return InsertNewInstBefore(NewVal, *I);
+ } else if ((RHSKnownOne & SignBit) != 0) { // New bits are known one.
+ RHSKnownOne |= HighBits;
+ }
+ }
+ break;
+ case Instruction::SRem:
+ if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ APInt RA = Rem->getValue().abs();
+ if (RA.isPowerOf2()) {
+ if (DemandedMask.ule(RA)) // srem won't affect demanded bits
+ return I->getOperand(0);
+
+ APInt LowBits = RA - 1;
+ APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+ if (SimplifyDemandedBits(I->getOperandUse(0), Mask2,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+
+ if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
+ LHSKnownZero |= ~LowBits;
+
+ KnownZero |= LHSKnownZero & DemandedMask;
+
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ }
+ }
+ break;
+ case Instruction::URem: {
+ APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes,
+ KnownZero2, KnownOne2, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), AllOnes,
+ KnownZero2, KnownOne2, Depth+1))
+ return I;
+
+ unsigned Leaders = KnownZero2.countLeadingOnes();
+ Leaders = std::max(Leaders,
+ KnownZero2.countLeadingOnes());
+ KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
+ break;
+ }
+ case Instruction::Call:
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::bswap: {
+ // If the only bits demanded come from one byte of the bswap result,
+ // just shift the input byte into position to eliminate the bswap.
+ unsigned NLZ = DemandedMask.countLeadingZeros();
+ unsigned NTZ = DemandedMask.countTrailingZeros();
+
+ // Round NTZ down to the next byte. If we have 11 trailing zeros, then
+ // we need all the bits down to bit 8. Likewise, round NLZ. If we
+ // have 14 leading zeros, round to 8.
+ NLZ &= ~7;
+ NTZ &= ~7;
+ // If we need exactly one byte, we can do this transformation.
+ if (BitWidth-NLZ-NTZ == 8) {
+ unsigned ResultBit = NTZ;
+ unsigned InputBit = BitWidth-NTZ-8;
+
+ // Replace this with either a left or right shift to get the byte into
+ // the right place.
+ Instruction *NewVal;
+ if (InputBit > ResultBit)
+ NewVal = BinaryOperator::CreateLShr(I->getOperand(1),
+ ConstantInt::get(I->getType(), InputBit-ResultBit));
+ else
+ NewVal = BinaryOperator::CreateShl(I->getOperand(1),
+ ConstantInt::get(I->getType(), ResultBit-InputBit));
+ NewVal->takeName(I);
+ return InsertNewInstBefore(NewVal, *I);
+ }
+
+ // TODO: Could compute known zero/one bits based on the input.
+ break;
+ }
+ }
+ }
+ ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
+ break;
+ }
+
+ // If the client is only demanding bits that we know, return the known
+ // constant.
+ if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
+ Constant *C = ConstantInt::get(RHSKnownOne);
+ if (isa<PointerType>(V->getType()))
+ C = ConstantExpr::getIntToPtr(C, V->getType());
+ return C;
+ }
+ return false;
+}
+
+
+/// SimplifyDemandedVectorElts - The specified value produces a vector with
+/// any number of elements. DemandedElts contains the set of elements that are
+/// actually used by the caller. This method analyzes which elements of the
+/// operand are undef and returns that information in UndefElts.
+///
+/// If the information about demanded elements can be used to simplify the
+/// operation, the operation is simplified, then the resultant value is
+/// returned. This returns null if no change was made.
+Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
+ APInt& UndefElts,
+ unsigned Depth) {
+ unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
+ APInt EltMask(APInt::getAllOnesValue(VWidth));
+ assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
+
+ if (isa<UndefValue>(V)) {
+ // If the entire vector is undefined, just return this info.
+ UndefElts = EltMask;
+ return 0;
+ } else if (DemandedElts == 0) { // If nothing is demanded, provide undef.
+ UndefElts = EltMask;
+ return UndefValue::get(V->getType());
+ }
+
+ UndefElts = 0;
+ if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) {
+ const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
+ Constant *Undef = UndefValue::get(EltTy);
+
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0; i != VWidth; ++i)
+ if (!DemandedElts[i]) { // If not demanded, set to undef.
+ Elts.push_back(Undef);
+ UndefElts.set(i);
+ } else if (isa<UndefValue>(CP->getOperand(i))) { // Already undef.
+ Elts.push_back(Undef);
+ UndefElts.set(i);
+ } else { // Otherwise, defined.
+ Elts.push_back(CP->getOperand(i));
+ }
+
+ // If we changed the constant, return it.
+ Constant *NewCP = ConstantVector::get(Elts);
+ return NewCP != CP ? NewCP : 0;
+ } else if (isa<ConstantAggregateZero>(V)) {
+ // Simplify the CAZ to a ConstantVector where the non-demanded elements are
+ // set to undef.
+
+ // Check if this is identity. If so, return 0 since we are not simplifying
+ // anything.
+ if (DemandedElts == ((1ULL << VWidth) -1))
+ return 0;
+
+ const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
+ Constant *Zero = Constant::getNullValue(EltTy);
+ Constant *Undef = UndefValue::get(EltTy);
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0; i != VWidth; ++i) {
+ Constant *Elt = DemandedElts[i] ? Zero : Undef;
+ Elts.push_back(Elt);
+ }
+ UndefElts = DemandedElts ^ EltMask;
+ return ConstantVector::get(Elts);
+ }
+
+ // Limit search depth.
+ if (Depth == 10)
+ return 0;
+
+ // If multiple users are using the root value, procede with
+ // simplification conservatively assuming that all elements
+ // are needed.
+ if (!V->hasOneUse()) {
+ // Quit if we find multiple users of a non-root value though.
+ // They'll be handled when it's their turn to be visited by
+ // the main instcombine process.
+ if (Depth != 0)
+ // TODO: Just compute the UndefElts information recursively.
+ return 0;
+
+ // Conservatively assume that all elements are needed.
+ DemandedElts = EltMask;
+ }
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return 0; // Only analyze instructions.
+
+ bool MadeChange = false;
+ APInt UndefElts2(VWidth, 0);
+ Value *TmpV;
+ switch (I->getOpcode()) {
+ default: break;
+
+ case Instruction::InsertElement: {
+ // If this is a variable index, we don't know which element it overwrites.
+ // demand exactly the same input as we produce.
+ ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
+ if (Idx == 0) {
+ // Note that we can't propagate undef elt info, because we don't know
+ // which elt is getting updated.
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
+ UndefElts2, Depth+1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+ break;
+ }
+
+ // If this is inserting an element that isn't demanded, remove this
+ // insertelement.
+ unsigned IdxNo = Idx->getZExtValue();
+ if (IdxNo >= VWidth || !DemandedElts[IdxNo])
+ return AddSoonDeadInstToWorklist(*I, 0);
+
+ // Otherwise, the element inserted overwrites whatever was there, so the
+ // input demanded set is simpler than the output set.
+ APInt DemandedElts2 = DemandedElts;
+ DemandedElts2.clear(IdxNo);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
+ UndefElts, Depth+1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+
+ // The inserted element is defined.
+ UndefElts.clear(IdxNo);
+ break;
+ }
+ case Instruction::ShuffleVector: {
+ ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
+ uint64_t LHSVWidth =
+ cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
+ APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
+ for (unsigned i = 0; i < VWidth; i++) {
+ if (DemandedElts[i]) {
+ unsigned MaskVal = Shuffle->getMaskValue(i);
+ if (MaskVal != -1u) {
+ assert(MaskVal < LHSVWidth * 2 &&
+ "shufflevector mask index out of range!");
+ if (MaskVal < LHSVWidth)
+ LeftDemanded.set(MaskVal);
+ else
+ RightDemanded.set(MaskVal - LHSVWidth);
+ }
+ }
+ }
+
+ APInt UndefElts4(LHSVWidth, 0);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
+ UndefElts4, Depth+1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+
+ APInt UndefElts3(LHSVWidth, 0);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
+ UndefElts3, Depth+1);
+ if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+
+ bool NewUndefElts = false;
+ for (unsigned i = 0; i < VWidth; i++) {
+ unsigned MaskVal = Shuffle->getMaskValue(i);
+ if (MaskVal == -1u) {
+ UndefElts.set(i);
+ } else if (MaskVal < LHSVWidth) {
+ if (UndefElts4[MaskVal]) {
+ NewUndefElts = true;
+ UndefElts.set(i);
+ }
+ } else {
+ if (UndefElts3[MaskVal - LHSVWidth]) {
+ NewUndefElts = true;
+ UndefElts.set(i);
+ }
+ }
+ }
+
+ if (NewUndefElts) {
+ // Add additional discovered undefs.
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0; i < VWidth; ++i) {
+ if (UndefElts[i])
+ Elts.push_back(UndefValue::get(Type::Int32Ty));
+ else
+ Elts.push_back(ConstantInt::get(Type::Int32Ty,
+ Shuffle->getMaskValue(i)));
+ }
+ I->setOperand(2, ConstantVector::get(Elts));
+ MadeChange = true;
+ }
+ break;
+ }
+ case Instruction::BitCast: {
+ // Vector->vector casts only.
+ const VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
+ if (!VTy) break;
+ unsigned InVWidth = VTy->getNumElements();
+ APInt InputDemandedElts(InVWidth, 0);
+ unsigned Ratio;
+
+ if (VWidth == InVWidth) {
+ // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
+ // elements as are demanded of us.
+ Ratio = 1;
+ InputDemandedElts = DemandedElts;
+ } else if (VWidth > InVWidth) {
+ // Untested so far.
+ break;
+
+ // If there are more elements in the result than there are in the source,
+ // then an input element is live if any of the corresponding output
+ // elements are live.
+ Ratio = VWidth/InVWidth;
+ for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
+ if (DemandedElts[OutIdx])
+ InputDemandedElts.set(OutIdx/Ratio);
+ }
+ } else {
+ // Untested so far.
+ break;
+
+ // If there are more elements in the source than there are in the result,
+ // then an input element is live if the corresponding output element is
+ // live.
+ Ratio = InVWidth/VWidth;
+ for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
+ if (DemandedElts[InIdx/Ratio])
+ InputDemandedElts.set(InIdx);
+ }
+
+ // div/rem demand all inputs, because they don't want divide by zero.
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
+ UndefElts2, Depth+1);
+ if (TmpV) {
+ I->setOperand(0, TmpV);
+ MadeChange = true;
+ }
+
+ UndefElts = UndefElts2;
+ if (VWidth > InVWidth) {
+ assert(0 && "Unimp");
+ // If there are more elements in the result than there are in the source,
+ // then an output element is undef if the corresponding input element is
+ // undef.
+ for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
+ if (UndefElts2[OutIdx/Ratio])
+ UndefElts.set(OutIdx);
+ } else if (VWidth < InVWidth) {
+ assert(0 && "Unimp");
+ // If there are more elements in the source than there are in the result,
+ // then a result element is undef if all of the corresponding input
+ // elements are undef.
+ UndefElts = ~0ULL >> (64-VWidth); // Start out all undef.
+ for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
+ if (!UndefElts2[InIdx]) // Not undef?
+ UndefElts.clear(InIdx/Ratio); // Clear undef bit.
+ }
+ break;
+ }
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // div/rem demand all inputs, because they don't want divide by zero.
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
+ UndefElts, Depth+1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
+ UndefElts2, Depth+1);
+ if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+
+ // Output elements are undefined if both are undefined. Consider things
+ // like undef&0. The result is known zero, not undef.
+ UndefElts &= UndefElts2;
+ break;
+
+ case Instruction::Call: {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
+ if (!II) break;
+ switch (II->getIntrinsicID()) {
+ default: break;
+
+ // Binary vector operations that work column-wise. A dest element is a
+ // function of the corresponding input elements from the two inputs.
+ case Intrinsic::x86_sse_sub_ss:
+ case Intrinsic::x86_sse_mul_ss:
+ case Intrinsic::x86_sse_min_ss:
+ case Intrinsic::x86_sse_max_ss:
+ case Intrinsic::x86_sse2_sub_sd:
+ case Intrinsic::x86_sse2_mul_sd:
+ case Intrinsic::x86_sse2_min_sd:
+ case Intrinsic::x86_sse2_max_sd:
+ TmpV = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts,
+ UndefElts, Depth+1);
+ if (TmpV) { II->setOperand(1, TmpV); MadeChange = true; }
+ TmpV = SimplifyDemandedVectorElts(II->getOperand(2), DemandedElts,
+ UndefElts2, Depth+1);
+ if (TmpV) { II->setOperand(2, TmpV); MadeChange = true; }
+
+ // If only the low elt is demanded and this is a scalarizable intrinsic,
+ // scalarize it now.
+ if (DemandedElts == 1) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::x86_sse_sub_ss:
+ case Intrinsic::x86_sse_mul_ss:
+ case Intrinsic::x86_sse2_sub_sd:
+ case Intrinsic::x86_sse2_mul_sd:
+ // TODO: Lower MIN/MAX/ABS/etc
+ Value *LHS = II->getOperand(1);
+ Value *RHS = II->getOperand(2);
+ // Extract the element as scalars.
+ LHS = InsertNewInstBefore(new ExtractElementInst(LHS, 0U,"tmp"), *II);
+ RHS = InsertNewInstBefore(new ExtractElementInst(RHS, 0U,"tmp"), *II);
+
+ switch (II->getIntrinsicID()) {
+ default: assert(0 && "Case stmts out of sync!");
+ case Intrinsic::x86_sse_sub_ss:
+ case Intrinsic::x86_sse2_sub_sd:
+ TmpV = InsertNewInstBefore(BinaryOperator::CreateSub(LHS, RHS,
+ II->getName()), *II);
+ break;
+ case Intrinsic::x86_sse_mul_ss:
+ case Intrinsic::x86_sse2_mul_sd:
+ TmpV = InsertNewInstBefore(BinaryOperator::CreateMul(LHS, RHS,
+ II->getName()), *II);
+ break;
+ }
+
+ Instruction *New =
+ InsertElementInst::Create(UndefValue::get(II->getType()), TmpV, 0U,
+ II->getName());
+ InsertNewInstBefore(New, *II);
+ AddSoonDeadInstToWorklist(*II, 0);
+ return New;
+ }
+ }
+
+ // Output elements are undefined if both are undefined. Consider things
+ // like undef&0. The result is known zero, not undef.
+ UndefElts &= UndefElts2;
+ break;
+ }
+ break;
+ }
+ }
+ return MadeChange ? I : 0;
+}
+
+
+/// AssociativeOpt - Perform an optimization on an associative operator. This
+/// function is designed to check a chain of associative operators for a
+/// potential to apply a certain optimization. Since the optimization may be
+/// applicable if the expression was reassociated, this checks the chain, then
+/// reassociates the expression as necessary to expose the optimization
+/// opportunity. This makes use of a special Functor, which must define
+/// 'shouldApply' and 'apply' methods.
+///
+template<typename Functor>
+static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {
+ unsigned Opcode = Root.getOpcode();
+ Value *LHS = Root.getOperand(0);
+
+ // Quick check, see if the immediate LHS matches...
+ if (F.shouldApply(LHS))
+ return F.apply(Root);
+
+ // Otherwise, if the LHS is not of the same opcode as the root, return.
+ Instruction *LHSI = dyn_cast<Instruction>(LHS);
+ while (LHSI && LHSI->getOpcode() == Opcode && LHSI->hasOneUse()) {
+ // Should we apply this transform to the RHS?
+ bool ShouldApply = F.shouldApply(LHSI->getOperand(1));
+
+ // If not to the RHS, check to see if we should apply to the LHS...
+ if (!ShouldApply && F.shouldApply(LHSI->getOperand(0))) {
+ cast<BinaryOperator>(LHSI)->swapOperands(); // Make the LHS the RHS
+ ShouldApply = true;
+ }
+
+ // If the functor wants to apply the optimization to the RHS of LHSI,
+ // reassociate the expression from ((? op A) op B) to (? op (A op B))
+ if (ShouldApply) {
+ // Now all of the instructions are in the current basic block, go ahead
+ // and perform the reassociation.
+ Instruction *TmpLHSI = cast<Instruction>(Root.getOperand(0));
+
+ // First move the selected RHS to the LHS of the root...
+ Root.setOperand(0, LHSI->getOperand(1));
+
+ // Make what used to be the LHS of the root be the user of the root...
+ Value *ExtraOperand = TmpLHSI->getOperand(1);
+ if (&Root == TmpLHSI) {
+ Root.replaceAllUsesWith(Constant::getNullValue(TmpLHSI->getType()));
+ return 0;
+ }
+ Root.replaceAllUsesWith(TmpLHSI); // Users now use TmpLHSI
+ TmpLHSI->setOperand(1, &Root); // TmpLHSI now uses the root
+ BasicBlock::iterator ARI = &Root; ++ARI;
+ TmpLHSI->moveBefore(ARI); // Move TmpLHSI to after Root
+ ARI = Root;
+
+ // Now propagate the ExtraOperand down the chain of instructions until we
+ // get to LHSI.
+ while (TmpLHSI != LHSI) {
+ Instruction *NextLHSI = cast<Instruction>(TmpLHSI->getOperand(0));
+ // Move the instruction to immediately before the chain we are
+ // constructing to avoid breaking dominance properties.
+ NextLHSI->moveBefore(ARI);
+ ARI = NextLHSI;
+
+ Value *NextOp = NextLHSI->getOperand(1);
+ NextLHSI->setOperand(1, ExtraOperand);
+ TmpLHSI = NextLHSI;
+ ExtraOperand = NextOp;
+ }
+
+ // Now that the instructions are reassociated, have the functor perform
+ // the transformation...
+ return F.apply(Root);
+ }
+
+ LHSI = dyn_cast<Instruction>(LHSI->getOperand(0));
+ }
+ return 0;
+}
+
+namespace {
+
+// AddRHS - Implements: X + X --> X << 1
+struct AddRHS {
+ Value *RHS;
+ AddRHS(Value *rhs) : RHS(rhs) {}
+ bool shouldApply(Value *LHS) const { return LHS == RHS; }
+ Instruction *apply(BinaryOperator &Add) const {
+ return BinaryOperator::CreateShl(Add.getOperand(0),
+ ConstantInt::get(Add.getType(), 1));
+ }
+};
+
+// AddMaskingAnd - Implements (A & C1)+(B & C2) --> (A & C1)|(B & C2)
+// iff C1&C2 == 0
+struct AddMaskingAnd {
+ Constant *C2;
+ AddMaskingAnd(Constant *c) : C2(c) {}
+ bool shouldApply(Value *LHS) const {
+ ConstantInt *C1;
+ return match(LHS, m_And(m_Value(), m_ConstantInt(C1))) &&
+ ConstantExpr::getAnd(C1, C2)->isNullValue();
+ }
+ Instruction *apply(BinaryOperator &Add) const {
+ return BinaryOperator::CreateOr(Add.getOperand(0), Add.getOperand(1));
+ }
+};
+
+}
+
+static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
+ InstCombiner *IC) {
+ if (CastInst *CI = dyn_cast<CastInst>(&I)) {
+ return IC->InsertCastBefore(CI->getOpcode(), SO, I.getType(), I);
+ }
+
+ // Figure out if the constant is the left or the right argument.
+ bool ConstIsRHS = isa<Constant>(I.getOperand(1));
+ Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
+
+ if (Constant *SOC = dyn_cast<Constant>(SO)) {
+ if (ConstIsRHS)
+ return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand);
+ return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);
+ }
+
+ Value *Op0 = SO, *Op1 = ConstOperand;
+ if (!ConstIsRHS)
+ std::swap(Op0, Op1);
+ Instruction *New;
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
+ New = BinaryOperator::Create(BO->getOpcode(), Op0, Op1,SO->getName()+".op");
+ else if (CmpInst *CI = dyn_cast<CmpInst>(&I))
+ New = CmpInst::Create(CI->getOpcode(), CI->getPredicate(), Op0, Op1,
+ SO->getName()+".cmp");
+ else {
+ assert(0 && "Unknown binary instruction type!");
+ abort();
+ }
+ return IC->InsertNewInstBefore(New, I);
+}
+
+// FoldOpIntoSelect - Given an instruction with a select as one operand and a
+// constant as the other operand, try to fold the binary operator into the
+// select arguments. This also works for Cast instructions, which obviously do
+// not have a second operand.
+static Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
+ InstCombiner *IC) {
+ // Don't modify shared select instructions
+ if (!SI->hasOneUse()) return 0;
+ Value *TV = SI->getOperand(1);
+ Value *FV = SI->getOperand(2);
+
+ if (isa<Constant>(TV) || isa<Constant>(FV)) {
+ // Bool selects with constant operands can be folded to logical ops.
+ if (SI->getType() == Type::Int1Ty) return 0;
+
+ Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, IC);
+ Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, IC);
+
+ return SelectInst::Create(SI->getCondition(), SelectTrueVal,
+ SelectFalseVal);
+ }
+ return 0;
+}
+
+
+/// FoldOpIntoPhi - Given a binary operator or cast instruction which has a PHI
+/// node as operand #0, see if we can fold the instruction into the PHI (which
+/// is only possible if all operands to the PHI are constants).
+Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
+ PHINode *PN = cast<PHINode>(I.getOperand(0));
+ unsigned NumPHIValues = PN->getNumIncomingValues();
+ if (!PN->hasOneUse() || NumPHIValues == 0) return 0;
+
+ // Check to see if all of the operands of the PHI are constants. If there is
+ // one non-constant value, remember the BB it is. If there is more than one
+ // or if *it* is a PHI, bail out.
+ BasicBlock *NonConstBB = 0;
+ for (unsigned i = 0; i != NumPHIValues; ++i)
+ if (!isa<Constant>(PN->getIncomingValue(i))) {
+ if (NonConstBB) return 0; // More than one non-const value.
+ if (isa<PHINode>(PN->getIncomingValue(i))) return 0; // Itself a phi.
+ NonConstBB = PN->getIncomingBlock(i);
+
+ // If the incoming non-constant value is in I's block, we have an infinite
+ // loop.
+ if (NonConstBB == I.getParent())
+ return 0;
+ }
+
+ // If there is exactly one non-constant value, we can insert a copy of the
+ // operation in that block. However, if this is a critical edge, we would be
+ // inserting the computation one some other paths (e.g. inside a loop). Only
+ // do this if the pred block is unconditionally branching into the phi block.
+ if (NonConstBB) {
+ BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
+ if (!BI || !BI->isUnconditional()) return 0;
+ }
+
+ // Okay, we can do the transformation: create the new PHI node.
+ PHINode *NewPN = PHINode::Create(I.getType(), "");
+ NewPN->reserveOperandSpace(PN->getNumOperands()/2);
+ InsertNewInstBefore(NewPN, *PN);
+ NewPN->takeName(PN);
+
+ // Next, add all of the operands to the PHI.
+ if (I.getNumOperands() == 2) {
+ Constant *C = cast<Constant>(I.getOperand(1));
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ Value *InV = 0;
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
+ if (CmpInst *CI = dyn_cast<CmpInst>(&I))
+ InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
+ else
+ InV = ConstantExpr::get(I.getOpcode(), InC, C);
+ } else {
+ assert(PN->getIncomingBlock(i) == NonConstBB);
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
+ InV = BinaryOperator::Create(BO->getOpcode(),
+ PN->getIncomingValue(i), C, "phitmp",
+ NonConstBB->getTerminator());
+ else if (CmpInst *CI = dyn_cast<CmpInst>(&I))
+ InV = CmpInst::Create(CI->getOpcode(),
+ CI->getPredicate(),
+ PN->getIncomingValue(i), C, "phitmp",
+ NonConstBB->getTerminator());
+ else
+ assert(0 && "Unknown binop!");
+
+ AddToWorkList(cast<Instruction>(InV));
+ }
+ NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+ }
+ } else {
+ CastInst *CI = cast<CastInst>(&I);
+ const Type *RetTy = CI->getType();
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ Value *InV;
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
+ InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
+ } else {
+ assert(PN->getIncomingBlock(i) == NonConstBB);
+ InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i),
+ I.getType(), "phitmp",
+ NonConstBB->getTerminator());
+ AddToWorkList(cast<Instruction>(InV));
+ }
+ NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+ }
+ }
+ return ReplaceInstUsesWith(I, NewPN);
+}
+
+
+/// WillNotOverflowSignedAdd - Return true if we can prove that:
+/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS))
+/// This basically requires proving that the add in the original type would not
+/// overflow to change the sign bit or have a carry out.
+bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
+ // There are different heuristics we can use for this. Here are some simple
+ // ones.
+
+ // Add has the property that adding any two 2's complement numbers can only
+ // have one carry bit which can change a sign. As such, if LHS and RHS each
+ // have at least two sign bits, we know that the addition of the two values will
+ // sign extend fine.
+ if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1)
+ return true;
+
+
+ // If one of the operands only has one non-zero bit, and if the other operand
+ // has a known-zero bit in a more significant place than it (not including the
+ // sign bit) the ripple may go up to and fill the zero, but won't change the
+ // sign. For example, (X & ~4) + 1.
+
+ // TODO: Implement.
+
+ return false;
+}
+
+
+Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
+ bool Changed = SimplifyCommutative(I);
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+ if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+ // X + undef -> undef
+ if (isa<UndefValue>(RHS))
+ return ReplaceInstUsesWith(I, RHS);
+
+ // X + 0 --> X
+ if (!I.getType()->isFPOrFPVector()) { // NOTE: -0 + +0 = +0.
+ if (RHSC->isNullValue())
+ return ReplaceInstUsesWith(I, LHS);
+ } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
+ if (CFP->isExactlyValue(ConstantFP::getNegativeZero
+ (I.getType())->getValueAPF()))
+ return ReplaceInstUsesWith(I, LHS);
+ }
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) {
+ // X + (signbit) --> X ^ signbit
+ const APInt& Val = CI->getValue();
+ uint32_t BitWidth = Val.getBitWidth();
+ if (Val == APInt::getSignBit(BitWidth))
+ return BinaryOperator::CreateXor(LHS, RHS);
+
+ // See if SimplifyDemandedBits can simplify this. This handles stuff like
+ // (X & 254)+1 -> (X&254)|1
+ if (!isa<VectorType>(I.getType()) && SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ // zext(i1) - 1 -> select i1, 0, -1
+ if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
+ if (CI->isAllOnesValue() &&
+ ZI->getOperand(0)->getType() == Type::Int1Ty)
+ return SelectInst::Create(ZI->getOperand(0),
+ Constant::getNullValue(I.getType()),
+ ConstantInt::getAllOnesValue(I.getType()));
+ }
+
+ if (isa<PHINode>(LHS))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+
+ ConstantInt *XorRHS = 0;
+ Value *XorLHS = 0;
+ if (isa<ConstantInt>(RHSC) &&
+ match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
+ uint32_t TySizeBits = I.getType()->getPrimitiveSizeInBits();
+ const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue();
+
+ uint32_t Size = TySizeBits / 2;
+ APInt C0080Val(APInt(TySizeBits, 1ULL).shl(Size - 1));
+ APInt CFF80Val(-C0080Val);
+ do {
+ if (TySizeBits > Size) {
+ // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
+ // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
+ if ((RHSVal == CFF80Val && XorRHS->getValue() == C0080Val) ||
+ (RHSVal == C0080Val && XorRHS->getValue() == CFF80Val)) {
+ // This is a sign extend if the top bits are known zero.
+ if (!MaskedValueIsZero(XorLHS,
+ APInt::getHighBitsSet(TySizeBits, TySizeBits - Size)))
+ Size = 0; // Not a sign ext, but can't be any others either.
+ break;
+ }
+ }
+ Size >>= 1;
+ C0080Val = APIntOps::lshr(C0080Val, Size);
+ CFF80Val = APIntOps::ashr(CFF80Val, Size);
+ } while (Size >= 1);
+
+ // FIXME: This shouldn't be necessary. When the backends can handle types
+ // with funny bit widths then this switch statement should be removed. It
+ // is just here to get the size of the "middle" type back up to something
+ // that the back ends can handle.
+ const Type *MiddleType = 0;
+ switch (Size) {
+ default: break;
+ case 32: MiddleType = Type::Int32Ty; break;
+ case 16: MiddleType = Type::Int16Ty; break;
+ case 8: MiddleType = Type::Int8Ty; break;
+ }
+ if (MiddleType) {
+ Instruction *NewTrunc = new TruncInst(XorLHS, MiddleType, "sext");
+ InsertNewInstBefore(NewTrunc, I);
+ return new SExtInst(NewTrunc, I.getType(), I.getName());
+ }
+ }
+ }
+
+ if (I.getType() == Type::Int1Ty)
+ return BinaryOperator::CreateXor(LHS, RHS);
+
+ // X + X --> X << 1
+ if (I.getType()->isInteger()) {
+ if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS))) return Result;
+
+ if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) {
+ if (RHSI->getOpcode() == Instruction::Sub)
+ if (LHS == RHSI->getOperand(1)) // A + (B - A) --> B
+ return ReplaceInstUsesWith(I, RHSI->getOperand(0));
+ }
+ if (Instruction *LHSI = dyn_cast<Instruction>(LHS)) {
+ if (LHSI->getOpcode() == Instruction::Sub)
+ if (RHS == LHSI->getOperand(1)) // (B - A) + A --> B
+ return ReplaceInstUsesWith(I, LHSI->getOperand(0));
+ }
+ }
+
+ // -A + B --> B - A
+ // -A + -B --> -(A + B)
+ if (Value *LHSV = dyn_castNegVal(LHS)) {
+ if (LHS->getType()->isIntOrIntVector()) {
+ if (Value *RHSV = dyn_castNegVal(RHS)) {
+ Instruction *NewAdd = BinaryOperator::CreateAdd(LHSV, RHSV, "sum");
+ InsertNewInstBefore(NewAdd, I);
+ return BinaryOperator::CreateNeg(NewAdd);
+ }
+ }
+
+ return BinaryOperator::CreateSub(RHS, LHSV);
+ }
+
+ // A + -B --> A - B
+ if (!isa<Constant>(RHS))
+ if (Value *V = dyn_castNegVal(RHS))
+ return BinaryOperator::CreateSub(LHS, V);
+
+
+ ConstantInt *C2;
+ if (Value *X = dyn_castFoldableMul(LHS, C2)) {
+ if (X == RHS) // X*C + X --> X * (C+1)
+ return BinaryOperator::CreateMul(RHS, AddOne(C2));
+
+ // X*C1 + X*C2 --> X * (C1+C2)
+ ConstantInt *C1;
+ if (X == dyn_castFoldableMul(RHS, C1))
+ return BinaryOperator::CreateMul(X, Add(C1, C2));
+ }
+
+ // X + X*C --> X * (C+1)
+ if (dyn_castFoldableMul(RHS, C2) == LHS)
+ return BinaryOperator::CreateMul(LHS, AddOne(C2));
+
+ // X + ~X --> -1 since ~X = -X-1
+ if (dyn_castNotVal(LHS) == RHS || dyn_castNotVal(RHS) == LHS)
+ return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
+
+
+ // (A & C1)+(B & C2) --> (A & C1)|(B & C2) iff C1&C2 == 0
+ if (match(RHS, m_And(m_Value(), m_ConstantInt(C2))))
+ if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2)))
+ return R;
+
+ // A+B --> A|B iff A and B have no bits set in common.
+ if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
+ APInt Mask = APInt::getAllOnesValue(IT->getBitWidth());
+ APInt LHSKnownOne(IT->getBitWidth(), 0);
+ APInt LHSKnownZero(IT->getBitWidth(), 0);
+ ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+ if (LHSKnownZero != 0) {
+ APInt RHSKnownOne(IT->getBitWidth(), 0);
+ APInt RHSKnownZero(IT->getBitWidth(), 0);
+ ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+
+ // No bits in common -> bitwise or.
+ if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
+ return BinaryOperator::CreateOr(LHS, RHS);
+ }
+ }
+
+ // W*X + Y*Z --> W * (X+Z) iff W == Y
+ if (I.getType()->isIntOrIntVector()) {
+ Value *W, *X, *Y, *Z;
+ if (match(LHS, m_Mul(m_Value(W), m_Value(X))) &&
+ match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) {
+ if (W != Y) {
+ if (W == Z) {
+ std::swap(Y, Z);
+ } else if (Y == X) {
+ std::swap(W, X);
+ } else if (X == Z) {
+ std::swap(Y, Z);
+ std::swap(W, X);
+ }
+ }
+
+ if (W == Y) {
+ Value *NewAdd = InsertNewInstBefore(BinaryOperator::CreateAdd(X, Z,
+ LHS->getName()), I);
+ return BinaryOperator::CreateMul(W, NewAdd);
+ }
+ }
+ }
+
+ if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {
+ Value *X = 0;
+ if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X
+ return BinaryOperator::CreateSub(SubOne(CRHS), X);
+
+ // (X & FF00) + xx00 -> (X+xx00) & FF00
+ if (LHS->hasOneUse() && match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {
+ Constant *Anded = And(CRHS, C2);
+ if (Anded == CRHS) {
+ // See if all bits from the first bit set in the Add RHS up are included
+ // in the mask. First, get the rightmost bit.
+ const APInt& AddRHSV = CRHS->getValue();
+
+ // Form a mask of all bits from the lowest bit added through the top.
+ APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
+
+ // See if the and mask includes all of these bits.
+ APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue());
+
+ if (AddRHSHighBits == AddRHSHighBitsAnd) {
+ // Okay, the xform is safe. Insert the new add pronto.
+ Value *NewAdd = InsertNewInstBefore(BinaryOperator::CreateAdd(X, CRHS,
+ LHS->getName()), I);
+ return BinaryOperator::CreateAnd(NewAdd, C2);
+ }
+ }
+ }
+
+ // Try to fold constant add into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(LHS))
+ if (Instruction *R = FoldOpIntoSelect(I, SI, this))
+ return R;
+ }
+
+ // add (cast *A to intptrtype) B ->
+ // cast (GEP (cast *A to sbyte*) B) --> intptrtype
+ {
+ CastInst *CI = dyn_cast<CastInst>(LHS);
+ Value *Other = RHS;
+ if (!CI) {
+ CI = dyn_cast<CastInst>(RHS);
+ Other = LHS;
+ }
+ if (CI && CI->getType()->isSized() &&
+ (CI->getType()->getPrimitiveSizeInBits() ==
+ TD->getIntPtrType()->getPrimitiveSizeInBits())
+ && isa<PointerType>(CI->getOperand(0)->getType())) {
+ unsigned AS =
+ cast<PointerType>(CI->getOperand(0)->getType())->getAddressSpace();
+ Value *I2 = InsertBitCastBefore(CI->getOperand(0),
+ PointerType::get(Type::Int8Ty, AS), I);
+ I2 = InsertNewInstBefore(GetElementPtrInst::Create(I2, Other, "ctg2"), I);
+ return new PtrToIntInst(I2, CI->getType());
+ }
+ }
+
+ // add (select X 0 (sub n A)) A --> select X A n
+ {
+ SelectInst *SI = dyn_cast<SelectInst>(LHS);
+ Value *A = RHS;
+ if (!SI) {
+ SI = dyn_cast<SelectInst>(RHS);
+ A = LHS;
+ }
+ if (SI && SI->hasOneUse()) {
+ Value *TV = SI->getTrueValue();
+ Value *FV = SI->getFalseValue();
+ Value *N;
+
+ // Can we fold the add into the argument of the select?
+ // We check both true and false select arguments for a matching subtract.
+ if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A))))
+ // Fold the add into the true select value.
+ return SelectInst::Create(SI->getCondition(), N, A);
+ if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A))))
+ // Fold the add into the false select value.
+ return SelectInst::Create(SI->getCondition(), A, N);
+ }
+ }
+
+ // Check for X+0.0. Simplify it to X if we know X is not -0.0.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
+ if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
+ return ReplaceInstUsesWith(I, LHS);
+
+ // Check for (add (sext x), y), see if we can merge this into an
+ // integer add followed by a sext.
+ if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
+ // (add (sext x), cst) --> (sext (add x, cst'))
+ if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
+ Constant *CI =
+ ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
+ if (LHSConv->hasOneUse() &&
+ ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
+ // Insert the new, smaller add.
+ Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0),
+ CI, "addconv");
+ InsertNewInstBefore(NewAdd, I);
+ return new SExtInst(NewAdd, I.getType());
+ }
+ }
+
+ // (add (sext x), (sext y)) --> (sext (add int x, y))
+ if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
+ // Only do this if x/y have the same type, if at last one of them has a
+ // single use (so we don't increase the number of sexts), and if the
+ // integer add will not overflow.
+ if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+ (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0))) {
+ // Insert the new integer add.
+ Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0),
+ "addconv");
+ InsertNewInstBefore(NewAdd, I);
+ return new SExtInst(NewAdd, I.getType());
+ }
+ }
+ }
+
+ // Check for (add double (sitofp x), y), see if we can merge this into an
+ // integer add followed by a promotion.
+ if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
+ // (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
+ // ... if the constant fits in the integer value. This is useful for things
+ // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
+ // requires a constant pool load, and generally allows the add to be better
+ // instcombined.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
+ Constant *CI =
+ ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
+ if (LHSConv->hasOneUse() &&
+ ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
+ // Insert the new integer add.
+ Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0),
+ CI, "addconv");
+ InsertNewInstBefore(NewAdd, I);
+ return new SIToFPInst(NewAdd, I.getType());
+ }
+ }
+
+ // (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
+ if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
+ // Only do this if x/y have the same type, if at last one of them has a
+ // single use (so we don't increase the number of int->fp conversions),
+ // and if the integer add will not overflow.
+ if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+ (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0))) {
+ // Insert the new integer add.
+ Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0),
+ "addconv");
+ InsertNewInstBefore(NewAdd, I);
+ return new SIToFPInst(NewAdd, I.getType());
+ }
+ }
+ }
+
+ return Changed ? &I : 0;
+}
+
+Instruction *InstCombiner::visitSub(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Op0 == Op1 && // sub X, X -> 0
+ !I.getType()->isFPOrFPVector())
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+ // If this is a 'B = x-(-A)', change to B = x+A...
+ if (Value *V = dyn_castNegVal(Op1))
+ return BinaryOperator::CreateAdd(Op0, V);
+
+ if (isa<UndefValue>(Op0))
+ return ReplaceInstUsesWith(I, Op0); // undef - X -> undef
+ if (isa<UndefValue>(Op1))
+ return ReplaceInstUsesWith(I, Op1); // X - undef -> undef
+
+ if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
+ // Replace (-1 - A) with (~A)...
+ if (C->isAllOnesValue())
+ return BinaryOperator::CreateNot(Op1);
+
+ // C - ~X == X + (1+C)
+ Value *X = 0;
+ if (match(Op1, m_Not(m_Value(X))))
+ return BinaryOperator::CreateAdd(X, AddOne(C));
+
+ // -(X >>u 31) -> (X >>s 31)
+ // -(X >>s 31) -> (X >>u 31)
+ if (C->isZero()) {
+ if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op1)) {
+ if (SI->getOpcode() == Instruction::LShr) {
+ if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
+ // Check to see if we are shifting out everything but the sign bit.
+ if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
+ SI->getType()->getPrimitiveSizeInBits()-1) {
+ // Ok, the transformation is safe. Insert AShr.
+ return BinaryOperator::Create(Instruction::AShr,
+ SI->getOperand(0), CU, SI->getName());
+ }
+ }
+ }
+ else if (SI->getOpcode() == Instruction::AShr) {
+ if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
+ // Check to see if we are shifting out everything but the sign bit.
+ if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
+ SI->getType()->getPrimitiveSizeInBits()-1) {
+ // Ok, the transformation is safe. Insert LShr.
+ return BinaryOperator::CreateLShr(
+ SI->getOperand(0), CU, SI->getName());
+ }
+ }
+ }
+ }
+ }
+
+ // Try to fold constant sub into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+ if (Instruction *R = FoldOpIntoSelect(I, SI, this))
+ return R;
+ }
+
+ if (I.getType() == Type::Int1Ty)
+ return BinaryOperator::CreateXor(Op0, Op1);
+
+ if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
+ if (Op1I->getOpcode() == Instruction::Add &&
+ !Op0->getType()->isFPOrFPVector()) {
+ if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y
+ return BinaryOperator::CreateNeg(Op1I->getOperand(1), I.getName());
+ else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y
+ return BinaryOperator::CreateNeg(Op1I->getOperand(0), I.getName());
+ else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) {
+ if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1)))
+ // C1-(X+C2) --> (C1-C2)-X
+ return BinaryOperator::CreateSub(Subtract(CI1, CI2),
+ Op1I->getOperand(0));
+ }
+ }
+
+ if (Op1I->hasOneUse()) {
+ // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression
+ // is not used by anyone else...
+ //
+ if (Op1I->getOpcode() == Instruction::Sub &&
+ !Op1I->getType()->isFPOrFPVector()) {
+ // Swap the two operands of the subexpr...
+ Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1);
+ Op1I->setOperand(0, IIOp1);
+ Op1I->setOperand(1, IIOp0);
+
+ // Create the new top level add instruction...
+ return BinaryOperator::CreateAdd(Op0, Op1);
+ }
+
+ // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)...
+ //
+ if (Op1I->getOpcode() == Instruction::And &&
+ (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) {
+ Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0);
+
+ Value *NewNot =
+ InsertNewInstBefore(BinaryOperator::CreateNot(OtherOp, "B.not"), I);
+ return BinaryOperator::CreateAnd(Op0, NewNot);
+ }
+
+ // 0 - (X sdiv C) -> (X sdiv -C)
+ if (Op1I->getOpcode() == Instruction::SDiv)
+ if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
+ if (CSI->isZero())
+ if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1)))
+ return BinaryOperator::CreateSDiv(Op1I->getOperand(0),
+ ConstantExpr::getNeg(DivRHS));
+
+ // X - X*C --> X * (1-C)
+ ConstantInt *C2 = 0;
+ if (dyn_castFoldableMul(Op1I, C2) == Op0) {
+ Constant *CP1 = Subtract(ConstantInt::get(I.getType(), 1), C2);
+ return BinaryOperator::CreateMul(Op0, CP1);
+ }
+ }
+ }
+
+ if (!Op0->getType()->isFPOrFPVector())
+ if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+ if (Op0I->getOpcode() == Instruction::Add) {
+ if (Op0I->getOperand(0) == Op1) // (Y+X)-Y == X
+ return ReplaceInstUsesWith(I, Op0I->getOperand(1));
+ else if (Op0I->getOperand(1) == Op1) // (X+Y)-Y == X
+ return ReplaceInstUsesWith(I, Op0I->getOperand(0));
+ } else if (Op0I->getOpcode() == Instruction::Sub) {
+ if (Op0I->getOperand(0) == Op1) // (X-Y)-X == -Y
+ return BinaryOperator::CreateNeg(Op0I->getOperand(1), I.getName());
+ }
+ }
+
+ ConstantInt *C1;
+ if (Value *X = dyn_castFoldableMul(Op0, C1)) {
+ if (X == Op1) // X*C - X --> X * (C-1)
+ return BinaryOperator::CreateMul(Op1, SubOne(C1));
+
+ ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2)
+ if (X == dyn_castFoldableMul(Op1, C2))
+ return BinaryOperator::CreateMul(X, Subtract(C1, C2));
+ }
+ return 0;
+}
+
+/// isSignBitCheck - Given an exploded icmp instruction, return true if the
+/// comparison only checks the sign bit. If it only checks the sign bit, set
+/// TrueIfSigned if the result of the comparison is true when the input value is
+/// signed.
+static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
+ bool &TrueIfSigned) {
+ switch (pred) {
+ case ICmpInst::ICMP_SLT: // True if LHS s< 0
+ TrueIfSigned = true;
+ return RHS->isZero();
+ case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1
+ TrueIfSigned = true;
+ return RHS->isAllOnesValue();
+ case ICmpInst::ICMP_SGT: // True if LHS s> -1
+ TrueIfSigned = false;
+ return RHS->isAllOnesValue();
+ case ICmpInst::ICMP_UGT:
+ // True if LHS u> RHS and RHS == high-bit-mask - 1
+ TrueIfSigned = true;
+ return RHS->getValue() ==
+ APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits());
+ case ICmpInst::ICMP_UGE:
+ // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
+ TrueIfSigned = true;
+ return RHS->getValue().isSignBit();
+ default:
+ return false;
+ }
+}
+
+Instruction *InstCombiner::visitMul(BinaryOperator &I) {
+ bool Changed = SimplifyCommutative(I);
+ Value *Op0 = I.getOperand(0);
+
+ if (isa<UndefValue>(I.getOperand(1))) // undef * X -> 0
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+ // Simplify mul instructions with a constant RHS...
+ if (Constant *Op1 = dyn_cast<Constant>(I.getOperand(1))) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+
+ // ((X << C1)*C2) == (X * (C2 << C1))
+ if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
+ if (SI->getOpcode() == Instruction::Shl)
+ if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
+ return BinaryOperator::CreateMul(SI->getOperand(0),
+ ConstantExpr::getShl(CI, ShOp));
+
+ if (CI->isZero())
+ return ReplaceInstUsesWith(I, Op1); // X * 0 == 0
+ if (CI->equalsInt(1)) // X * 1 == X
+ return ReplaceInstUsesWith(I, Op0);
+ if (CI->isAllOnesValue()) // X * -1 == 0 - X
+ return BinaryOperator::CreateNeg(Op0, I.getName());
+
+ const APInt& Val = cast<ConstantInt>(CI)->getValue();
+ if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C
+ return BinaryOperator::CreateShl(Op0,
+ ConstantInt::get(Op0->getType(), Val.logBase2()));
+ }
+ } else if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1)) {
+ if (Op1F->isNullValue())
+ return ReplaceInstUsesWith(I, Op1);
+
+ // "In IEEE floating point, x*1 is not equivalent to x for nans. However,
+ // ANSI says we can drop signals, so we can do this anyway." (from GCC)
+ if (Op1F->isExactlyValue(1.0))
+ return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0'
+ } else if (isa<VectorType>(Op1->getType())) {
+ if (isa<ConstantAggregateZero>(Op1))
+ return ReplaceInstUsesWith(I, Op1);
+
+ if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
+ if (Op1V->isAllOnesValue()) // X * -1 == 0 - X
+ return BinaryOperator::CreateNeg(Op0, I.getName());
+
+ // As above, vector X*splat(1.0) -> X in all defined cases.
+ if (Constant *Splat = Op1V->getSplatValue()) {
+ if (ConstantFP *F = dyn_cast<ConstantFP>(Splat))
+ if (F->isExactlyValue(1.0))
+ return ReplaceInstUsesWith(I, Op0);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Splat))
+ if (CI->equalsInt(1))
+ return ReplaceInstUsesWith(I, Op0);
+ }
+ }
+ }
+
+ if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
+ if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() &&
+ isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1)) {
+ // Canonicalize (X+C1)*C2 -> X*C2+C1*C2.
+ Instruction *Add = BinaryOperator::CreateMul(Op0I->getOperand(0),
+ Op1, "tmp");
+ InsertNewInstBefore(Add, I);
+ Value *C1C2 = ConstantExpr::getMul(Op1,
+ cast<Constant>(Op0I->getOperand(1)));
+ return BinaryOperator::CreateAdd(Add, C1C2);
+
+ }
+
+ // Try to fold constant mul into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI, this))
+ return R;
+
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ if (Value *Op0v = dyn_castNegVal(Op0)) // -X * -Y = X*Y
+ if (Value *Op1v = dyn_castNegVal(I.getOperand(1)))
+ return BinaryOperator::CreateMul(Op0v, Op1v);
+
+ // (X / Y) * Y = X - (X % Y)
+ // (X / Y) * -Y = (X % Y) - X
+ {
+ Value *Op1 = I.getOperand(1);
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0);
+ if (!BO ||
+ (BO->getOpcode() != Instruction::UDiv &&
+ BO->getOpcode() != Instruction::SDiv)) {
+ Op1 = Op0;
+ BO = dyn_cast<BinaryOperator>(I.getOperand(1));
+ }
+ Value *Neg = dyn_castNegVal(Op1);
+ if (BO && BO->hasOneUse() &&
+ (BO->getOperand(1) == Op1 || BO->getOperand(1) == Neg) &&
+ (BO->getOpcode() == Instruction::UDiv ||
+ BO->getOpcode() == Instruction::SDiv)) {
+ Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1);
+
+ Instruction *Rem;
+ if (BO->getOpcode() == Instruction::UDiv)
+ Rem = BinaryOperator::CreateURem(Op0BO, Op1BO);
+ else
+ Rem = BinaryOperator::CreateSRem(Op0BO, Op1BO);
+
+ InsertNewInstBefore(Rem, I);
+ Rem->takeName(BO);
+
+ if (Op1BO == Op1)
+ return BinaryOperator::CreateSub(Op0BO, Rem);
+ else
+ return BinaryOperator::CreateSub(Rem, Op0BO);
+ }
+ }
+
+ if (I.getType() == Type::Int1Ty)
+ return BinaryOperator::CreateAnd(Op0, I.getOperand(1));
+
+ // If one of the operands of the multiply is a cast from a boolean value, then
+ // we know the bool is either zero or one, so this is a 'masking' multiply.
+ // See if we can simplify things based on how the boolean was originally
+ // formed.
+ CastInst *BoolCast = 0;
+ if (ZExtInst *CI = dyn_cast<ZExtInst>(Op0))
+ if (CI->getOperand(0)->getType() == Type::Int1Ty)
+ BoolCast = CI;
+ if (!BoolCast)
+ if (ZExtInst *CI = dyn_cast<ZExtInst>(I.getOperand(1)))
+ if (CI->getOperand(0)->getType() == Type::Int1Ty)
+ BoolCast = CI;
+ if (BoolCast) {
+ if (ICmpInst *SCI = dyn_cast<ICmpInst>(BoolCast->getOperand(0))) {
+ Value *SCIOp0 = SCI->getOperand(0), *SCIOp1 = SCI->getOperand(1);
+ const Type *SCOpTy = SCIOp0->getType();
+ bool TIS = false;
+
+ // If the icmp is true iff the sign bit of X is set, then convert this
+ // multiply into a shift/and combination.
+ if (isa<ConstantInt>(SCIOp1) &&
+ isSignBitCheck(SCI->getPredicate(), cast<ConstantInt>(SCIOp1), TIS) &&
+ TIS) {
+ // Shift the X value right to turn it into "all signbits".
+ Constant *Amt = ConstantInt::get(SCIOp0->getType(),
+ SCOpTy->getPrimitiveSizeInBits()-1);
+ Value *V =
+ InsertNewInstBefore(
+ BinaryOperator::Create(Instruction::AShr, SCIOp0, Amt,
+ BoolCast->getOperand(0)->getName()+
+ ".mask"), I);
+
+ // If the multiply type is not the same as the source type, sign extend
+ // or truncate to the multiply type.
+ if (I.getType() != V->getType()) {
+ uint32_t SrcBits = V->getType()->getPrimitiveSizeInBits();
+ uint32_t DstBits = I.getType()->getPrimitiveSizeInBits();
+ Instruction::CastOps opcode =
+ (SrcBits == DstBits ? Instruction::BitCast :
+ (SrcBits < DstBits ? Instruction::SExt : Instruction::Trunc));
+ V = InsertCastBefore(opcode, V, I.getType(), I);
+ }
+
+ Value *OtherOp = Op0 == BoolCast ? I.getOperand(1) : Op0;
+ return BinaryOperator::CreateAnd(V, OtherOp);
+ }
+ }
+ }
+
+ return Changed ? &I : 0;
+}
+
+/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select
+/// instruction.
+bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
+ SelectInst *SI = cast<SelectInst>(I.getOperand(1));
+
+ // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y
+ int NonNullOperand = -1;
+ if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
+ if (ST->isNullValue())
+ NonNullOperand = 2;
+ // div/rem X, (Cond ? Y : 0) -> div/rem X, Y
+ if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
+ if (ST->isNullValue())
+ NonNullOperand = 1;
+
+ if (NonNullOperand == -1)
+ return false;
+
+ Value *SelectCond = SI->getOperand(0);
+
+ // Change the div/rem to use 'Y' instead of the select.
+ I.setOperand(1, SI->getOperand(NonNullOperand));
+
+ // Okay, we know we replace the operand of the div/rem with 'Y' with no
+ // problem. However, the select, or the condition of the select may have
+ // multiple uses. Based on our knowledge that the operand must be non-zero,
+ // propagate the known value for the select into other uses of it, and
+ // propagate a known value of the condition into its other users.
+
+ // If the select and condition only have a single use, don't bother with this,
+ // early exit.
+ if (SI->use_empty() && SelectCond->hasOneUse())
+ return true;
+
+ // Scan the current block backward, looking for other uses of SI.
+ BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin();
+
+ while (BBI != BBFront) {
+ --BBI;
+ // If we found a call to a function, we can't assume it will return, so
+ // information from below it cannot be propagated above it.
+ if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI))
+ break;
+
+ // Replace uses of the select or its condition with the known values.
+ for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end();
+ I != E; ++I) {
+ if (*I == SI) {
+ *I = SI->getOperand(NonNullOperand);
+ AddToWorkList(BBI);
+ } else if (*I == SelectCond) {
+ *I = NonNullOperand == 1 ? ConstantInt::getTrue() :
+ ConstantInt::getFalse();
+ AddToWorkList(BBI);
+ }
+ }
+
+ // If we past the instruction, quit looking for it.
+ if (&*BBI == SI)
+ SI = 0;
+ if (&*BBI == SelectCond)
+ SelectCond = 0;
+
+ // If we ran out of things to eliminate, break out of the loop.
+ if (SelectCond == 0 && SI == 0)
+ break;
+
+ }
+ return true;
+}
+
+
+/// This function implements the transforms on div instructions that work
+/// regardless of the kind of div instruction it is (udiv, sdiv, or fdiv). It is
+/// used by the visitors to those instructions.
+/// @brief Transforms common to all three div instructions
+Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // undef / X -> 0 for integer.
+ // undef / X -> undef for FP (the undef could be a snan).
+ if (isa<UndefValue>(Op0)) {
+ if (Op0->getType()->isFPOrFPVector())
+ return ReplaceInstUsesWith(I, Op0);
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ }
+
+ // X / undef -> undef
+ if (isa<UndefValue>(Op1))
+ return ReplaceInstUsesWith(I, Op1);
+
+ return 0;
+}
+
+/// This function implements the transforms common to both integer division
+/// instructions (udiv and sdiv). It is called by the visitors to those integer
+/// division instructions.
+/// @brief Common integer divide transforms
+Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // (sdiv X, X) --> 1 (udiv X, X) --> 1
+ if (Op0 == Op1) {
+ if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) {
+ ConstantInt *CI = ConstantInt::get(Ty->getElementType(), 1);
+ std::vector<Constant*> Elts(Ty->getNumElements(), CI);
+ return ReplaceInstUsesWith(I, ConstantVector::get(Elts));
+ }
+
+ ConstantInt *CI = ConstantInt::get(I.getType(), 1);
+ return ReplaceInstUsesWith(I, CI);
+ }
+
+ if (Instruction *Common = commonDivTransforms(I))
+ return Common;
+
+ // Handle cases involving: [su]div X, (select Cond, Y, Z)
+ // This does not apply for fdiv.
+ if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+ return &I;
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ // div X, 1 == X
+ if (RHS->equalsInt(1))
+ return ReplaceInstUsesWith(I, Op0);
+
+ // (X / C1) / C2 -> X / (C1*C2)
+ if (Instruction *LHS = dyn_cast<Instruction>(Op0))
+ if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode())
+ if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) {
+ if (MultiplyOverflows(RHS, LHSRHS, I.getOpcode()==Instruction::SDiv))
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ else
+ return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
+ Multiply(RHS, LHSRHS));
+ }
+
+ if (!RHS->isZero()) { // avoid X udiv 0
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI, this))
+ return R;
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+ }
+
+ // 0 / X == 0, we don't need to preserve faults!
+ if (ConstantInt *LHS = dyn_cast<ConstantInt>(Op0))
+ if (LHS->equalsInt(0))
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+ // It can't be division by zero, hence it must be division by one.
+ if (I.getType() == Type::Int1Ty)
+ return ReplaceInstUsesWith(I, Op0);
+
+ if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
+ if (ConstantInt *X = cast_or_null<ConstantInt>(Op1V->getSplatValue()))
+ // div X, 1 == X
+ if (X->isOne())
+ return ReplaceInstUsesWith(I, Op0);
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // Handle the integer div common cases
+ if (Instruction *Common = commonIDivTransforms(I))
+ return Common;
+
+ if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) {
+ // X udiv C^2 -> X >> C
+ // Check to see if this is an unsigned division with an exact power of 2,
+ // if so, convert to a right shift.
+ if (C->getValue().isPowerOf2()) // 0 not included in isPowerOf2
+ return BinaryOperator::CreateLShr(Op0,
+ ConstantInt::get(Op0->getType(), C->getValue().logBase2()));
+
+ // X udiv C, where C >= signbit
+ if (C->getValue().isNegative()) {
+ Value *IC = InsertNewInstBefore(new ICmpInst(ICmpInst::ICMP_ULT, Op0, C),
+ I);
+ return SelectInst::Create(IC, Constant::getNullValue(I.getType()),
+ ConstantInt::get(I.getType(), 1));
+ }
+ }
+
+ // X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2)
+ if (BinaryOperator *RHSI = dyn_cast<BinaryOperator>(I.getOperand(1))) {
+ if (RHSI->getOpcode() == Instruction::Shl &&
+ isa<ConstantInt>(RHSI->getOperand(0))) {
+ const APInt& C1 = cast<ConstantInt>(RHSI->getOperand(0))->getValue();
+ if (C1.isPowerOf2()) {
+ Value *N = RHSI->getOperand(1);
+ const Type *NTy = N->getType();
+ if (uint32_t C2 = C1.logBase2()) {
+ Constant *C2V = ConstantInt::get(NTy, C2);
+ N = InsertNewInstBefore(BinaryOperator::CreateAdd(N, C2V, "tmp"), I);
+ }
+ return BinaryOperator::CreateLShr(Op0, N);
+ }
+ }
+ }
+
+ // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
+ // where C1&C2 are powers of two.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+ if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1)))
+ if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) {
+ const APInt &TVA = STO->getValue(), &FVA = SFO->getValue();
+ if (TVA.isPowerOf2() && FVA.isPowerOf2()) {
+ // Compute the shift amounts
+ uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2();
+ // Construct the "on true" case of the select
+ Constant *TC = ConstantInt::get(Op0->getType(), TSA);
+ Instruction *TSI = BinaryOperator::CreateLShr(
+ Op0, TC, SI->getName()+".t");
+ TSI = InsertNewInstBefore(TSI, I);
+
+ // Construct the "on false" case of the select
+ Constant *FC = ConstantInt::get(Op0->getType(), FSA);
+ Instruction *FSI = BinaryOperator::CreateLShr(
+ Op0, FC, SI->getName()+".f");
+ FSI = InsertNewInstBefore(FSI, I);
+
+ // construct the select instruction and return it.
+ return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName());
+ }
+ }
+ return 0;
+}
+
+Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // Handle the integer div common cases
+ if (Instruction *Common = commonIDivTransforms(I))
+ return Common;
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ // sdiv X, -1 == -X
+ if (RHS->isAllOnesValue())
+ return BinaryOperator::CreateNeg(Op0);
+ }
+
+ // If the sign bits of both operands are zero (i.e. we can prove they are
+ // unsigned inputs), turn this into a udiv.
+ if (I.getType()->isInteger()) {
+ APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
+ if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
+ // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
+ return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+ }
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
+ return commonDivTransforms(I);
+}
+
+/// This function implements the transforms on rem instructions that work
+/// regardless of the kind of rem instruction it is (urem, srem, or frem). It
+/// is used by the visitors to those instructions.
+/// @brief Transforms common to all three rem instructions
+Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (isa<UndefValue>(Op0)) { // undef % X -> 0
+ if (I.getType()->isFPOrFPVector())
+ return ReplaceInstUsesWith(I, Op0); // X % undef -> undef (could be SNaN)
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ }
+ if (isa<UndefValue>(Op1))
+ return ReplaceInstUsesWith(I, Op1); // X % undef -> undef
+
+ // Handle cases involving: rem X, (select Cond, Y, Z)
+ if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+ return &I;
+
+ return 0;
+}
+
+/// This function implements the transforms common to both integer remainder
+/// instructions (urem and srem). It is called by the visitors to those integer
+/// remainder instructions.
+/// @brief Common integer remainder transforms
+Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Instruction *common = commonRemTransforms(I))
+ return common;
+
+ // 0 % X == 0 for integer, we don't need to preserve faults!
+ if (Constant *LHS = dyn_cast<Constant>(Op0))
+ if (LHS->isNullValue())
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ // X % 0 == undef, we don't need to preserve faults!
+ if (RHS->equalsInt(0))
+ return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
+
+ if (RHS->equalsInt(1)) // X % 1 == 0
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+ if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) {
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) {
+ if (Instruction *R = FoldOpIntoSelect(I, SI, this))
+ return R;
+ } else if (isa<PHINode>(Op0I)) {
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ // See if we can fold away this rem instruction.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+ }
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitURem(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Instruction *common = commonIRemTransforms(I))
+ return common;
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ // X urem C^2 -> X and C
+ // Check to see if this is an unsigned remainder with an exact power of 2,
+ // if so, convert to a bitwise and.
+ if (ConstantInt *C = dyn_cast<ConstantInt>(RHS))
+ if (C->getValue().isPowerOf2())
+ return BinaryOperator::CreateAnd(Op0, SubOne(C));
+ }
+
+ if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) {
+ // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
+ if (RHSI->getOpcode() == Instruction::Shl &&
+ isa<ConstantInt>(RHSI->getOperand(0))) {
+ if (cast<ConstantInt>(RHSI->getOperand(0))->getValue().isPowerOf2()) {
+ Constant *N1 = ConstantInt::getAllOnesValue(I.getType());
+ Value *Add = InsertNewInstBefore(BinaryOperator::CreateAdd(RHSI, N1,
+ "tmp"), I);
+ return BinaryOperator::CreateAnd(Op0, Add);
+ }
+ }
+ }
+
+ // urem X, (select Cond, 2^C1, 2^C2) --> select Cond, (and X, C1), (and X, C2)
+ // where C1&C2 are powers of two.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) {
+ if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1)))
+ if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) {
+ // STO == 0 and SFO == 0 handled above.
+ if ((STO->getValue().isPowerOf2()) &&
+ (SFO->getValue().isPowerOf2())) {
+ Value *TrueAnd = InsertNewInstBefore(
+ BinaryOperator::CreateAnd(Op0, SubOne(STO), SI->getName()+".t"), I);
+ Value *FalseAnd = InsertNewInstBefore(
+ BinaryOperator::CreateAnd(Op0, SubOne(SFO), SI->getName()+".f"), I);
+ return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd);
+ }
+ }
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // Handle the integer rem common cases
+ if (Instruction *common = commonIRemTransforms(I))
+ return common;
+
+ if (Value *RHSNeg = dyn_castNegVal(Op1))
+ if (!isa<Constant>(RHSNeg) ||
+ (isa<ConstantInt>(RHSNeg) &&
+ cast<ConstantInt>(RHSNeg)->getValue().isStrictlyPositive())) {
+ // X % -Y -> X % Y
+ AddUsesToWorkList(I);
+ I.setOperand(1, RHSNeg);
+ return &I;
+ }
+
+ // If the sign bits of both operands are zero (i.e. we can prove they are
+ // unsigned inputs), turn this into a urem.
+ if (I.getType()->isInteger()) {
+ APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
+ if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
+ // X srem Y -> X urem Y, iff X and Y don't have sign bit set
+ return BinaryOperator::CreateURem(Op0, Op1, I.getName());
+ }
+ }
+
+ // If it's a constant vector, flip any negative values positive.
+ if (ConstantVector *RHSV = dyn_cast<ConstantVector>(Op1)) {
+ unsigned VWidth = RHSV->getNumOperands();
+
+ bool hasNegative = false;
+ for (unsigned i = 0; !hasNegative && i != VWidth; ++i)
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i)))
+ if (RHS->getValue().isNegative())
+ hasNegative = true;
+
+ if (hasNegative) {
+ std::vector<Constant *> Elts(VWidth);
+ for (unsigned i = 0; i != VWidth; ++i) {
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) {
+ if (RHS->getValue().isNegative())
+ Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS));
+ else
+ Elts[i] = RHS;
+ }
+ }
+
+ Constant *NewRHSV = ConstantVector::get(Elts);
+ if (NewRHSV != RHSV) {
+ AddUsesToWorkList(I);
+ I.setOperand(1, NewRHSV);
+ return &I;
+ }
+ }
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
+ return commonRemTransforms(I);
+}
+
+// isOneBitSet - Return true if there is exactly one bit set in the specified
+// constant.
+static bool isOneBitSet(const ConstantInt *CI) {
+ return CI->getValue().isPowerOf2();
+}
+
+// isHighOnes - Return true if the constant is of the form 1+0+.
+// This is the same as lowones(~X).
+static bool isHighOnes(const ConstantInt *CI) {
+ return (~CI->getValue() + 1).isPowerOf2();
+}
+
+/// getICmpCode - Encode a icmp predicate into a three bit mask. These bits
+/// are carefully arranged to allow folding of expressions such as:
+///
+/// (A < B) | (A > B) --> (A != B)
+///
+/// Note that this is only valid if the first and second predicates have the
+/// same sign. Is illegal to do: (A u< B) | (A s> B)
+///
+/// Three bits are used to represent the condition, as follows:
+/// 0 A > B
+/// 1 A == B
+/// 2 A < B
+///
+/// <=> Value Definition
+/// 000 0 Always false
+/// 001 1 A > B
+/// 010 2 A == B
+/// 011 3 A >= B
+/// 100 4 A < B
+/// 101 5 A != B
+/// 110 6 A <= B
+/// 111 7 Always true
+///
+static unsigned getICmpCode(const ICmpInst *ICI) {
+ switch (ICI->getPredicate()) {
+ // False -> 0
+ case ICmpInst::ICMP_UGT: return 1; // 001
+ case ICmpInst::ICMP_SGT: return 1; // 001
+ case ICmpInst::ICMP_EQ: return 2; // 010
+ case ICmpInst::ICMP_UGE: return 3; // 011
+ case ICmpInst::ICMP_SGE: return 3; // 011
+ case ICmpInst::ICMP_ULT: return 4; // 100
+ case ICmpInst::ICMP_SLT: return 4; // 100
+ case ICmpInst::ICMP_NE: return 5; // 101
+ case ICmpInst::ICMP_ULE: return 6; // 110
+ case ICmpInst::ICMP_SLE: return 6; // 110
+ // True -> 7
+ default:
+ assert(0 && "Invalid ICmp predicate!");
+ return 0;
+ }
+}
+
+/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp
+/// predicate into a three bit mask. It also returns whether it is an ordered
+/// predicate by reference.
+static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
+ isOrdered = false;
+ switch (CC) {
+ case FCmpInst::FCMP_ORD: isOrdered = true; return 0; // 000
+ case FCmpInst::FCMP_UNO: return 0; // 000
+ case FCmpInst::FCMP_OGT: isOrdered = true; return 1; // 001
+ case FCmpInst::FCMP_UGT: return 1; // 001
+ case FCmpInst::FCMP_OEQ: isOrdered = true; return 2; // 010
+ case FCmpInst::FCMP_UEQ: return 2; // 010
+ case FCmpInst::FCMP_OGE: isOrdered = true; return 3; // 011
+ case FCmpInst::FCMP_UGE: return 3; // 011
+ case FCmpInst::FCMP_OLT: isOrdered = true; return 4; // 100
+ case FCmpInst::FCMP_ULT: return 4; // 100
+ case FCmpInst::FCMP_ONE: isOrdered = true; return 5; // 101
+ case FCmpInst::FCMP_UNE: return 5; // 101
+ case FCmpInst::FCMP_OLE: isOrdered = true; return 6; // 110
+ case FCmpInst::FCMP_ULE: return 6; // 110
+ // True -> 7
+ default:
+ // Not expecting FCMP_FALSE and FCMP_TRUE;
+ assert(0 && "Unexpected FCmp predicate!");
+ return 0;
+ }
+}
+
+/// getICmpValue - This is the complement of getICmpCode, which turns an
+/// opcode and two operands into either a constant true or false, or a brand
+/// new ICmp instruction. The sign is passed in to determine which kind
+/// of predicate to use in the new icmp instruction.
+static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS) {
+ switch (code) {
+ default: assert(0 && "Illegal ICmp code!");
+ case 0: return ConstantInt::getFalse();
+ case 1:
+ if (sign)
+ return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS);
+ else
+ return new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS);
+ case 2: return new ICmpInst(ICmpInst::ICMP_EQ, LHS, RHS);
+ case 3:
+ if (sign)
+ return new ICmpInst(ICmpInst::ICMP_SGE, LHS, RHS);
+ else
+ return new ICmpInst(ICmpInst::ICMP_UGE, LHS, RHS);
+ case 4:
+ if (sign)
+ return new ICmpInst(ICmpInst::ICMP_SLT, LHS, RHS);
+ else
+ return new ICmpInst(ICmpInst::ICMP_ULT, LHS, RHS);
+ case 5: return new ICmpInst(ICmpInst::ICMP_NE, LHS, RHS);
+ case 6:
+ if (sign)
+ return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS);
+ else
+ return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS);
+ case 7: return ConstantInt::getTrue();
+ }
+}
+
+/// getFCmpValue - This is the complement of getFCmpCode, which turns an
+/// opcode and two operands into either a FCmp instruction. isordered is passed
+/// in to determine which kind of predicate to use in the new fcmp instruction.
+static Value *getFCmpValue(bool isordered, unsigned code,
+ Value *LHS, Value *RHS) {
+ switch (code) {
+ default: assert(0 && "Illegal FCmp code!");
+ case 0:
+ if (isordered)
+ return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS);
+ else
+ return new FCmpInst(FCmpInst::FCMP_UNO, LHS, RHS);
+ case 1:
+ if (isordered)
+ return new FCmpInst(FCmpInst::FCMP_OGT, LHS, RHS);
+ else
+ return new FCmpInst(FCmpInst::FCMP_UGT, LHS, RHS);
+ case 2:
+ if (isordered)
+ return new FCmpInst(FCmpInst::FCMP_OEQ, LHS, RHS);
+ else
+ return new FCmpInst(FCmpInst::FCMP_UEQ, LHS, RHS);
+ case 3:
+ if (isordered)
+ return new FCmpInst(FCmpInst::FCMP_OGE, LHS, RHS);
+ else
+ return new FCmpInst(FCmpInst::FCMP_UGE, LHS, RHS);
+ case 4:
+ if (isordered)
+ return new FCmpInst(FCmpInst::FCMP_OLT, LHS, RHS);
+ else
+ return new FCmpInst(FCmpInst::FCMP_ULT, LHS, RHS);
+ case 5:
+ if (isordered)
+ return new FCmpInst(FCmpInst::FCMP_ONE, LHS, RHS);
+ else
+ return new FCmpInst(FCmpInst::FCMP_UNE, LHS, RHS);
+ case 6:
+ if (isordered)
+ return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS);
+ else
+ return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS);
+ case 7: return ConstantInt::getTrue();
+ }
+}
+
+/// PredicatesFoldable - Return true if both predicates match sign or if at
+/// least one of them is an equality comparison (which is signless).
+static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
+ return (ICmpInst::isSignedPredicate(p1) == ICmpInst::isSignedPredicate(p2)) ||
+ (ICmpInst::isSignedPredicate(p1) && ICmpInst::isEquality(p2)) ||
+ (ICmpInst::isSignedPredicate(p2) && ICmpInst::isEquality(p1));
+}
+
+namespace {
+// FoldICmpLogical - Implements (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
+struct FoldICmpLogical {
+ InstCombiner &IC;
+ Value *LHS, *RHS;
+ ICmpInst::Predicate pred;
+ FoldICmpLogical(InstCombiner &ic, ICmpInst *ICI)
+ : IC(ic), LHS(ICI->getOperand(0)), RHS(ICI->getOperand(1)),
+ pred(ICI->getPredicate()) {}
+ bool shouldApply(Value *V) const {
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(V))
+ if (PredicatesFoldable(pred, ICI->getPredicate()))
+ return ((ICI->getOperand(0) == LHS && ICI->getOperand(1) == RHS) ||
+ (ICI->getOperand(0) == RHS && ICI->getOperand(1) == LHS));
+ return false;
+ }
+ Instruction *apply(Instruction &Log) const {
+ ICmpInst *ICI = cast<ICmpInst>(Log.getOperand(0));
+ if (ICI->getOperand(0) != LHS) {
+ assert(ICI->getOperand(1) == LHS);
+ ICI->swapOperands(); // Swap the LHS and RHS of the ICmp
+ }
+
+ ICmpInst *RHSICI = cast<ICmpInst>(Log.getOperand(1));
+ unsigned LHSCode = getICmpCode(ICI);
+ unsigned RHSCode = getICmpCode(RHSICI);
+ unsigned Code;
+ switch (Log.getOpcode()) {
+ case Instruction::And: Code = LHSCode & RHSCode; break;
+ case Instruction::Or: Code = LHSCode | RHSCode; break;
+ case Instruction::Xor: Code = LHSCode ^ RHSCode; break;
+ default: assert(0 && "Illegal logical opcode!"); return 0;
+ }
+
+ bool isSigned = ICmpInst::isSignedPredicate(RHSICI->getPredicate()) ||
+ ICmpInst::isSignedPredicate(ICI->getPredicate());
+
+ Value *RV = getICmpValue(isSigned, Code, LHS, RHS);
+ if (Instruction *I = dyn_cast<Instruction>(RV))
+ return I;
+ // Otherwise, it's a constant boolean value...
+ return IC.ReplaceInstUsesWith(Log, RV);
+ }
+};
+} // end anonymous namespace
+
+// OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where
+// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is
+// guaranteed to be a binary operator.
+Instruction *InstCombiner::OptAndOp(Instruction *Op,
+ ConstantInt *OpRHS,
+ ConstantInt *AndRHS,
+ BinaryOperator &TheAnd) {
+ Value *X = Op->getOperand(0);
+ Constant *Together = 0;
+ if (!Op->isShift())
+ Together = And(AndRHS, OpRHS);
+
+ switch (Op->getOpcode()) {
+ case Instruction::Xor:
+ if (Op->hasOneUse()) {
+ // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
+ Instruction *And = BinaryOperator::CreateAnd(X, AndRHS);
+ InsertNewInstBefore(And, TheAnd);
+ And->takeName(Op);
+ return BinaryOperator::CreateXor(And, Together);
+ }
+ break;
+ case Instruction::Or:
+ if (Together == AndRHS) // (X | C) & C --> C
+ return ReplaceInstUsesWith(TheAnd, AndRHS);
+
+ if (Op->hasOneUse() && Together != OpRHS) {
+ // (X | C1) & C2 --> (X | (C1&C2)) & C2
+ Instruction *Or = BinaryOperator::CreateOr(X, Together);
+ InsertNewInstBefore(Or, TheAnd);
+ Or->takeName(Op);
+ return BinaryOperator::CreateAnd(Or, AndRHS);
+ }
+ break;
+ case Instruction::Add:
+ if (Op->hasOneUse()) {
+ // Adding a one to a single bit bit-field should be turned into an XOR
+ // of the bit. First thing to check is to see if this AND is with a
+ // single bit constant.
+ const APInt& AndRHSV = cast<ConstantInt>(AndRHS)->getValue();
+
+ // If there is only one bit set...
+ if (isOneBitSet(cast<ConstantInt>(AndRHS))) {
+ // Ok, at this point, we know that we are masking the result of the
+ // ADD down to exactly one bit. If the constant we are adding has
+ // no bits set below this bit, then we can eliminate the ADD.
+ const APInt& AddRHS = cast<ConstantInt>(OpRHS)->getValue();
+
+ // Check to see if any bits below the one bit set in AndRHSV are set.
+ if ((AddRHS & (AndRHSV-1)) == 0) {
+ // If not, the only thing that can effect the output of the AND is
+ // the bit specified by AndRHSV. If that bit is set, the effect of
+ // the XOR is to toggle the bit. If it is clear, then the ADD has
+ // no effect.
+ if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop
+ TheAnd.setOperand(0, X);
+ return &TheAnd;
+ } else {
+ // Pull the XOR out of the AND.
+ Instruction *NewAnd = BinaryOperator::CreateAnd(X, AndRHS);
+ InsertNewInstBefore(NewAnd, TheAnd);
+ NewAnd->takeName(Op);
+ return BinaryOperator::CreateXor(NewAnd, AndRHS);
+ }
+ }
+ }
+ }
+ break;
+
+ case Instruction::Shl: {
+ // We know that the AND will not produce any of the bits shifted in, so if
+ // the anded constant includes them, clear them now!
+ //
+ uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+ uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+ APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal));
+ ConstantInt *CI = ConstantInt::get(AndRHS->getValue() & ShlMask);
+
+ if (CI->getValue() == ShlMask) {
+ // Masking out bits that the shift already masks
+ return ReplaceInstUsesWith(TheAnd, Op); // No need for the and.
+ } else if (CI != AndRHS) { // Reducing bits set in and.
+ TheAnd.setOperand(1, CI);
+ return &TheAnd;
+ }
+ break;
+ }
+ case Instruction::LShr:
+ {
+ // We know that the AND will not produce any of the bits shifted in, so if
+ // the anded constant includes them, clear them now! This only applies to
+ // unsigned shifts, because a signed shr may bring in set bits!
+ //
+ uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+ uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+ APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
+ ConstantInt *CI = ConstantInt::get(AndRHS->getValue() & ShrMask);
+
+ if (CI->getValue() == ShrMask) {
+ // Masking out bits that the shift already masks.
+ return ReplaceInstUsesWith(TheAnd, Op);
+ } else if (CI != AndRHS) {
+ TheAnd.setOperand(1, CI); // Reduce bits set in and cst.
+ return &TheAnd;
+ }
+ break;
+ }
+ case Instruction::AShr:
+ // Signed shr.
+ // See if this is shifting in some sign extension, then masking it out
+ // with an and.
+ if (Op->hasOneUse()) {
+ uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+ uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+ APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
+ Constant *C = ConstantInt::get(AndRHS->getValue() & ShrMask);
+ if (C == AndRHS) { // Masking out bits shifted in.
+ // (Val ashr C1) & C2 -> (Val lshr C1) & C2
+ // Make the argument unsigned.
+ Value *ShVal = Op->getOperand(0);
+ ShVal = InsertNewInstBefore(
+ BinaryOperator::CreateLShr(ShVal, OpRHS,
+ Op->getName()), TheAnd);
+ return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName());
+ }
+ }
+ break;
+ }
+ return 0;
+}
+
+
+/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
+/// true, otherwise (V < Lo || V >= Hi). In pratice, we emit the more efficient
+/// (V-Lo) <u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates
+/// whether to treat the V, Lo and HI as signed or not. IB is the location to
+/// insert new instructions.
+Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
+ bool isSigned, bool Inside,
+ Instruction &IB) {
+ assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ?
+ ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() &&
+ "Lo is not <= Hi in range emission code!");
+
+ if (Inside) {
+ if (Lo == Hi) // Trivially false.
+ return new ICmpInst(ICmpInst::ICMP_NE, V, V);
+
+ // V >= Min && V < Hi --> V < Hi
+ if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
+ ICmpInst::Predicate pred = (isSigned ?
+ ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT);
+ return new ICmpInst(pred, V, Hi);
+ }
+
+ // Emit V-Lo <u Hi-Lo
+ Constant *NegLo = ConstantExpr::getNeg(Lo);
+ Instruction *Add = BinaryOperator::CreateAdd(V, NegLo, V->getName()+".off");
+ InsertNewInstBefore(Add, IB);
+ Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);
+ return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound);
+ }
+
+ if (Lo == Hi) // Trivially true.
+ return new ICmpInst(ICmpInst::ICMP_EQ, V, V);
+
+ // V < Min || V >= Hi -> V > Hi-1
+ Hi = SubOne(cast<ConstantInt>(Hi));
+ if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
+ ICmpInst::Predicate pred = (isSigned ?
+ ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
+ return new ICmpInst(pred, V, Hi);
+ }
+
+ // Emit V-Lo >u Hi-1-Lo
+ // Note that Hi has already had one subtracted from it, above.
+ ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo));
+ Instruction *Add = BinaryOperator::CreateAdd(V, NegLo, V->getName()+".off");
+ InsertNewInstBefore(Add, IB);
+ Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);
+ return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound);
+}
+
+// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with
+// any number of 0s on either side. The 1s are allowed to wrap from LSB to
+// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is
+// not, since all 1s are not contiguous.
+static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
+ const APInt& V = Val->getValue();
+ uint32_t BitWidth = Val->getType()->getBitWidth();
+ if (!APIntOps::isShiftedMask(BitWidth, V)) return false;
+
+ // look for the first zero bit after the run of ones
+ MB = BitWidth - ((V - 1) ^ V).countLeadingZeros();
+ // look for the first non-zero bit
+ ME = V.getActiveBits();
+ return true;
+}
+
+/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask,
+/// where isSub determines whether the operator is a sub. If we can fold one of
+/// the following xforms:
+///
+/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask
+/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
+/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
+///
+/// return (A +/- B).
+///
+Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
+ ConstantInt *Mask, bool isSub,
+ Instruction &I) {
+ Instruction *LHSI = dyn_cast<Instruction>(LHS);
+ if (!LHSI || LHSI->getNumOperands() != 2 ||
+ !isa<ConstantInt>(LHSI->getOperand(1))) return 0;
+
+ ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1));
+
+ switch (LHSI->getOpcode()) {
+ default: return 0;
+ case Instruction::And:
+ if (And(N, Mask) == Mask) {
+ // If the AndRHS is a power of two minus one (0+1+), this is simple.
+ if ((Mask->getValue().countLeadingZeros() +
+ Mask->getValue().countPopulation()) ==
+ Mask->getValue().getBitWidth())
+ break;
+
+ // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+
+ // part, we don't need any explicit masks to take them out of A. If that
+ // is all N is, ignore it.
+ uint32_t MB = 0, ME = 0;
+ if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive
+ uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth();
+ APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1));
+ if (MaskedValueIsZero(RHS, Mask))
+ break;
+ }
+ }
+ return 0;
+ case Instruction::Or:
+ case Instruction::Xor:
+ // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
+ if ((Mask->getValue().countLeadingZeros() +
+ Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
+ && And(N, Mask)->isZero())
+ break;
+ return 0;
+ }
+
+ Instruction *New;
+ if (isSub)
+ New = BinaryOperator::CreateSub(LHSI->getOperand(0), RHS, "fold");
+ else
+ New = BinaryOperator::CreateAdd(LHSI->getOperand(0), RHS, "fold");
+ return InsertNewInstBefore(New, I);
+}
+
+/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
+Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
+ ICmpInst *LHS, ICmpInst *RHS) {
+ Value *Val, *Val2;
+ ConstantInt *LHSCst, *RHSCst;
+ ICmpInst::Predicate LHSCC, RHSCC;
+
+ // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
+ if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) ||
+ !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst))))
+ return 0;
+
+ // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
+ // where C is a power of 2
+ if (LHSCst == RHSCst && LHSCC == RHSCC && LHSCC == ICmpInst::ICMP_ULT &&
+ LHSCst->getValue().isPowerOf2()) {
+ Instruction *NewOr = BinaryOperator::CreateOr(Val, Val2);
+ InsertNewInstBefore(NewOr, I);
+ return new ICmpInst(LHSCC, NewOr, LHSCst);
+ }
+
+ // From here on, we only handle:
+ // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
+ if (Val != Val2) return 0;
+
+ // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
+ if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
+ RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
+ LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
+ RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
+ return 0;
+
+ // We can't fold (ugt x, C) & (sgt x, C2).
+ if (!PredicatesFoldable(LHSCC, RHSCC))
+ return 0;
+
+ // Ensure that the larger constant is on the RHS.
+ bool ShouldSwap;
+ if (ICmpInst::isSignedPredicate(LHSCC) ||
+ (ICmpInst::isEquality(LHSCC) &&
+ ICmpInst::isSignedPredicate(RHSCC)))
+ ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
+ else
+ ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
+
+ if (ShouldSwap) {
+ std::swap(LHS, RHS);
+ std::swap(LHSCst, RHSCst);
+ std::swap(LHSCC, RHSCC);
+ }
+
+ // At this point, we know we have have two icmp instructions
+ // comparing a value against two constants and and'ing the result
+ // together. Because of the above check, we know that we only have
+ // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know
+ // (from the FoldICmpLogical check above), that the two constants
+ // are not equal and that the larger constant is on the RHS
+ assert(LHSCst != RHSCst && "Compares not folded above?");
+
+ switch (LHSCC) {
+ default: assert(0 && "Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ:
+ switch (RHSCC) {
+ default: assert(0 && "Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false
+ case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false
+ case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13
+ case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13
+ case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13
+ return ReplaceInstUsesWith(I, LHS);
+ }
+ case ICmpInst::ICMP_NE:
+ switch (RHSCC) {
+ default: assert(0 && "Unknown integer condition code!");
+ case ICmpInst::ICMP_ULT:
+ if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13
+ return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst);
+ break; // (X != 13 & X u< 15) -> no change
+ case ICmpInst::ICMP_SLT:
+ if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13
+ return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst);
+ break; // (X != 13 & X s< 15) -> no change
+ case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15
+ case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15
+ case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15
+ return ReplaceInstUsesWith(I, RHS);
+ case ICmpInst::ICMP_NE:
+ if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
+ Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+ Instruction *Add = BinaryOperator::CreateAdd(Val, AddCST,
+ Val->getName()+".off");
+ InsertNewInstBefore(Add, I);
+ return new ICmpInst(ICmpInst::ICMP_UGT, Add,
+ ConstantInt::get(Add->getType(), 1));
+ }
+ break; // (X != 13 & X != 15) -> no change
+ }
+ break;
+ case ICmpInst::ICMP_ULT:
+ switch (RHSCC) {
+ default: assert(0 && "Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false
+ case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13
+ case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13
+ return ReplaceInstUsesWith(I, LHS);
+ case ICmpInst::ICMP_SLT: // (X u< 13 & X s< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_SLT:
+ switch (RHSCC) {
+ default: assert(0 && "Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false
+ case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13
+ case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13
+ return ReplaceInstUsesWith(I, LHS);
+ case ICmpInst::ICMP_ULT: // (X s< 13 & X u< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_UGT:
+ switch (RHSCC) {
+ default: assert(0 && "Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15
+ case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15
+ return ReplaceInstUsesWith(I, RHS);
+ case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE:
+ if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14
+ return new ICmpInst(LHSCC, Val, RHSCst);
+ break; // (X u> 13 & X != 15) -> no change
+ case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1
+ return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, false, true, I);
+ case ICmpInst::ICMP_SLT: // (X u> 13 & X s< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_SGT:
+ switch (RHSCC) {
+ default: assert(0 && "Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15
+ case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15
+ return ReplaceInstUsesWith(I, RHS);
+ case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE:
+ if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14
+ return new ICmpInst(LHSCC, Val, RHSCst);
+ break; // (X s> 13 & X != 15) -> no change
+ case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1
+ return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, true, true, I);
+ case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change
+ break;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+
+Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
+ bool Changed = SimplifyCommutative(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (isa<UndefValue>(Op1)) // X & undef -> 0
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+ // and X, X = X
+ if (Op0 == Op1)
+ return ReplaceInstUsesWith(I, Op1);
+
+ // See if we can simplify any instructions used by the instruction whose sole
+ // purpose is to compute bits we don't care about.
+ if (!isa<VectorType>(I.getType())) {
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+ } else {
+ if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
+ if (CP->isAllOnesValue()) // X & <-1,-1> -> X
+ return ReplaceInstUsesWith(I, I.getOperand(0));
+ } else if (isa<ConstantAggregateZero>(Op1)) {
+ return ReplaceInstUsesWith(I, Op1); // X & <0,0> -> <0,0>
+ }
+ }
+
+ if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
+ const APInt& AndRHSMask = AndRHS->getValue();
+ APInt NotAndRHS(~AndRHSMask);
+
+ // Optimize a variety of ((val OP C1) & C2) combinations...
+ if (isa<BinaryOperator>(Op0)) {
+ Instruction *Op0I = cast<Instruction>(Op0);
+ Value *Op0LHS = Op0I->getOperand(0);
+ Value *Op0RHS = Op0I->getOperand(1);
+ switch (Op0I->getOpcode()) {
+ case Instruction::Xor:
+ case Instruction::Or:
+ // If the mask is only needed on one incoming arm, push it up.
+ if (Op0I->hasOneUse()) {
+ if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
+ // Not masking anything out for the LHS, move to RHS.
+ Instruction *NewRHS = BinaryOperator::CreateAnd(Op0RHS, AndRHS,
+ Op0RHS->getName()+".masked");
+ InsertNewInstBefore(NewRHS, I);
+ return BinaryOperator::Create(
+ cast<BinaryOperator>(Op0I)->getOpcode(), Op0LHS, NewRHS);
+ }
+ if (!isa<Constant>(Op0RHS) &&
+ MaskedValueIsZero(Op0RHS, NotAndRHS)) {
+ // Not masking anything out for the RHS, move to LHS.
+ Instruction *NewLHS = BinaryOperator::CreateAnd(Op0LHS, AndRHS,
+ Op0LHS->getName()+".masked");
+ InsertNewInstBefore(NewLHS, I);
+ return BinaryOperator::Create(
+ cast<BinaryOperator>(Op0I)->getOpcode(), NewLHS, Op0RHS);
+ }
+ }
+
+ break;
+ case Instruction::Add:
+ // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS.
+ // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
+ // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
+ if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I))
+ return BinaryOperator::CreateAnd(V, AndRHS);
+ if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I))
+ return BinaryOperator::CreateAnd(V, AndRHS); // Add commutes
+ break;
+
+ case Instruction::Sub:
+ // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS.
+ // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
+ // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
+ if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I))
+ return BinaryOperator::CreateAnd(V, AndRHS);
+
+ // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS
+ // has 1's for all bits that the subtraction with A might affect.
+ if (Op0I->hasOneUse()) {
+ uint32_t BitWidth = AndRHSMask.getBitWidth();
+ uint32_t Zeros = AndRHSMask.countLeadingZeros();
+ APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros);
+
+ ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS);
+ if (!(A && A->isZero()) && // avoid infinite recursion.
+ MaskedValueIsZero(Op0LHS, Mask)) {
+ Instruction *NewNeg = BinaryOperator::CreateNeg(Op0RHS);
+ InsertNewInstBefore(NewNeg, I);
+ return BinaryOperator::CreateAnd(NewNeg, AndRHS);
+ }
+ }
+ break;
+
+ case Instruction::Shl:
+ case Instruction::LShr:
+ // (1 << x) & 1 --> zext(x == 0)
+ // (1 >> x) & 1 --> zext(x == 0)
+ if (AndRHSMask == 1 && Op0LHS == AndRHS) {
+ Instruction *NewICmp = new ICmpInst(ICmpInst::ICMP_EQ, Op0RHS,
+ Constant::getNullValue(I.getType()));
+ InsertNewInstBefore(NewICmp, I);
+ return new ZExtInst(NewICmp, I.getType());
+ }
+ break;
+ }
+
+ if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
+ if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I))
+ return Res;
+ } else if (CastInst *CI = dyn_cast<CastInst>(Op0)) {
+ // If this is an integer truncation or change from signed-to-unsigned, and
+ // if the source is an and/or with immediate, transform it. This
+ // frequently occurs for bitfield accesses.
+ if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) {
+ if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) &&
+ CastOp->getNumOperands() == 2)
+ if (ConstantInt *AndCI = dyn_cast<ConstantInt>(CastOp->getOperand(1))) {
+ if (CastOp->getOpcode() == Instruction::And) {
+ // Change: and (cast (and X, C1) to T), C2
+ // into : and (cast X to T), trunc_or_bitcast(C1)&C2
+ // This will fold the two constants together, which may allow
+ // other simplifications.
+ Instruction *NewCast = CastInst::CreateTruncOrBitCast(
+ CastOp->getOperand(0), I.getType(),
+ CastOp->getName()+".shrunk");
+ NewCast = InsertNewInstBefore(NewCast, I);
+ // trunc_or_bitcast(C1)&C2
+ Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
+ C3 = ConstantExpr::getAnd(C3, AndRHS);
+ return BinaryOperator::CreateAnd(NewCast, C3);
+ } else if (CastOp->getOpcode() == Instruction::Or) {
+ // Change: and (cast (or X, C1) to T), C2
+ // into : trunc(C1)&C2 iff trunc(C1)&C2 == C2
+ Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
+ if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS) // trunc(C1)&C2
+ return ReplaceInstUsesWith(I, AndRHS);
+ }
+ }
+ }
+ }
+
+ // Try to fold constant and into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI, this))
+ return R;
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ Value *Op0NotVal = dyn_castNotVal(Op0);
+ Value *Op1NotVal = dyn_castNotVal(Op1);
+
+ if (Op0NotVal == Op1 || Op1NotVal == Op0) // A & ~A == ~A & A == 0
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+ // (~A & ~B) == (~(A | B)) - De Morgan's Law
+ if (Op0NotVal && Op1NotVal && isOnlyUse(Op0) && isOnlyUse(Op1)) {
+ Instruction *Or = BinaryOperator::CreateOr(Op0NotVal, Op1NotVal,
+ I.getName()+".demorgan");
+ InsertNewInstBefore(Or, I);
+ return BinaryOperator::CreateNot(Or);
+ }
+
+ {
+ Value *A = 0, *B = 0, *C = 0, *D = 0;
+ if (match(Op0, m_Or(m_Value(A), m_Value(B)))) {
+ if (A == Op1 || B == Op1) // (A | ?) & A --> A
+ return ReplaceInstUsesWith(I, Op1);
+
+ // (A|B) & ~(A&B) -> A^B
+ if (match(Op1, m_Not(m_And(m_Value(C), m_Value(D))))) {
+ if ((A == C && B == D) || (A == D && B == C))
+ return BinaryOperator::CreateXor(A, B);
+ }
+ }
+
+ if (match(Op1, m_Or(m_Value(A), m_Value(B)))) {
+ if (A == Op0 || B == Op0) // A & (A | ?) --> A
+ return ReplaceInstUsesWith(I, Op0);
+
+ // ~(A&B) & (A|B) -> A^B
+ if (match(Op0, m_Not(m_And(m_Value(C), m_Value(D))))) {
+ if ((A == C && B == D) || (A == D && B == C))
+ return BinaryOperator::CreateXor(A, B);
+ }
+ }
+
+ if (Op0->hasOneUse() &&
+ match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
+ if (A == Op1) { // (A^B)&A -> A&(A^B)
+ I.swapOperands(); // Simplify below
+ std::swap(Op0, Op1);
+ } else if (B == Op1) { // (A^B)&B -> B&(B^A)
+ cast<BinaryOperator>(Op0)->swapOperands();
+ I.swapOperands(); // Simplify below
+ std::swap(Op0, Op1);
+ }
+ }
+
+ if (Op1->hasOneUse() &&
+ match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
+ if (B == Op0) { // B&(A^B) -> B&(B^A)
+ cast<BinaryOperator>(Op1)->swapOperands();
+ std::swap(A, B);
+ }
+ if (A == Op0) { // A&(A^B) -> A & ~B
+ Instruction *NotB = BinaryOperator::CreateNot(B, "tmp");
+ InsertNewInstBefore(NotB, I);
+ return BinaryOperator::CreateAnd(A, NotB);
+ }
+ }
+
+ // (A&((~A)|B)) -> A&B
+ if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) ||
+ match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1)))))
+ return BinaryOperator::CreateAnd(A, Op1);
+ if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) ||
+ match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0)))))
+ return BinaryOperator::CreateAnd(A, Op0);
+ }
+
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1)) {
+ // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
+ if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
+ return R;
+
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
+ if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS))
+ return Res;
+ }
+
+ // fold (and (cast A), (cast B)) -> (cast (and A, B))
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
+ if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
+ if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ?
+ const Type *SrcTy = Op0C->getOperand(0)->getType();
+ if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
+ // Only do this if the casts both really cause code to be generated.
+ ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
+ I.getType(), TD) &&
+ ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
+ I.getType(), TD)) {
+ Instruction *NewOp = BinaryOperator::CreateAnd(Op0C->getOperand(0),
+ Op1C->getOperand(0),
+ I.getName());
+ InsertNewInstBefore(NewOp, I);
+ return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
+ }
+ }
+
+ // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts.
+ if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
+ if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
+ if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&
+ SI0->getOperand(1) == SI1->getOperand(1) &&
+ (SI0->hasOneUse() || SI1->hasOneUse())) {
+ Instruction *NewOp =
+ InsertNewInstBefore(BinaryOperator::CreateAnd(SI0->getOperand(0),
+ SI1->getOperand(0),
+ SI0->getName()), I);
+ return BinaryOperator::Create(SI1->getOpcode(), NewOp,
+ SI1->getOperand(1));
+ }
+ }
+
+ // If and'ing two fcmp, try combine them into one.
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) {
+ if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
+ RHS->getPredicate() == FCmpInst::FCMP_ORD) {
+ // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y)
+ if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
+ if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
+ // If either of the constants are nans, then the whole thing returns
+ // false.
+ if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ return new FCmpInst(FCmpInst::FCMP_ORD, LHS->getOperand(0),
+ RHS->getOperand(0));
+ }
+ } else {
+ Value *Op0LHS, *Op0RHS, *Op1LHS, *Op1RHS;
+ FCmpInst::Predicate Op0CC, Op1CC;
+ if (match(Op0, m_FCmp(Op0CC, m_Value(Op0LHS), m_Value(Op0RHS))) &&
+ match(Op1, m_FCmp(Op1CC, m_Value(Op1LHS), m_Value(Op1RHS)))) {
+ if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
+ // Swap RHS operands to match LHS.
+ Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
+ std::swap(Op1LHS, Op1RHS);
+ }
+ if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
+ // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
+ if (Op0CC == Op1CC)
+ return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
+ else if (Op0CC == FCmpInst::FCMP_FALSE ||
+ Op1CC == FCmpInst::FCMP_FALSE)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ else if (Op0CC == FCmpInst::FCMP_TRUE)
+ return ReplaceInstUsesWith(I, Op1);
+ else if (Op1CC == FCmpInst::FCMP_TRUE)
+ return ReplaceInstUsesWith(I, Op0);
+ bool Op0Ordered;
+ bool Op1Ordered;
+ unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
+ unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
+ if (Op1Pred == 0) {
+ std::swap(Op0, Op1);
+ std::swap(Op0Pred, Op1Pred);
+ std::swap(Op0Ordered, Op1Ordered);
+ }
+ if (Op0Pred == 0) {
+ // uno && ueq -> uno && (uno || eq) -> ueq
+ // ord && olt -> ord && (ord && lt) -> olt
+ if (Op0Ordered == Op1Ordered)
+ return ReplaceInstUsesWith(I, Op1);
+ // uno && oeq -> uno && (ord && eq) -> false
+ // uno && ord -> false
+ if (!Op0Ordered)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ // ord && ueq -> ord && (uno || eq) -> oeq
+ return cast<Instruction>(getFCmpValue(true, Op1Pred,
+ Op0LHS, Op0RHS));
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return Changed ? &I : 0;
+}
+
+/// CollectBSwapParts - Analyze the specified subexpression and see if it is
+/// capable of providing pieces of a bswap. The subexpression provides pieces
+/// of a bswap if it is proven that each of the non-zero bytes in the output of
+/// the expression came from the corresponding "byte swapped" byte in some other
+/// value. For example, if the current subexpression is "(shl i32 %X, 24)" then
+/// we know that the expression deposits the low byte of %X into the high byte
+/// of the bswap result and that all other bytes are zero. This expression is
+/// accepted, the high byte of ByteValues is set to X to indicate a correct
+/// match.
+///
+/// This function returns true if the match was unsuccessful and false if so.
+/// On entry to the function the "OverallLeftShift" is a signed integer value
+/// indicating the number of bytes that the subexpression is later shifted. For
+/// example, if the expression is later right shifted by 16 bits, the
+/// OverallLeftShift value would be -2 on entry. This is used to specify which
+/// byte of ByteValues is actually being set.
+///
+/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding
+/// byte is masked to zero by a user. For example, in (X & 255), X will be
+/// processed with a bytemask of 1. Because bytemask is 32-bits, this limits
+/// this function to working on up to 32-byte (256 bit) values. ByteMask is
+/// always in the local (OverallLeftShift) coordinate space.
+///
+static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
+ SmallVector<Value*, 8> &ByteValues) {
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ // If this is an or instruction, it may be an inner node of the bswap.
+ if (I->getOpcode() == Instruction::Or) {
+ return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
+ ByteValues) ||
+ CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask,
+ ByteValues);
+ }
+
+ // If this is a logical shift by a constant multiple of 8, recurse with
+ // OverallLeftShift and ByteMask adjusted.
+ if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
+ unsigned ShAmt =
+ cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
+ // Ensure the shift amount is defined and of a byte value.
+ if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size()))
+ return true;
+
+ unsigned ByteShift = ShAmt >> 3;
+ if (I->getOpcode() == Instruction::Shl) {
+ // X << 2 -> collect(X, +2)
+ OverallLeftShift += ByteShift;
+ ByteMask >>= ByteShift;
+ } else {
+ // X >>u 2 -> collect(X, -2)
+ OverallLeftShift -= ByteShift;
+ ByteMask <<= ByteShift;
+ ByteMask &= (~0U >> (32-ByteValues.size()));
+ }
+
+ if (OverallLeftShift >= (int)ByteValues.size()) return true;
+ if (OverallLeftShift <= -(int)ByteValues.size()) return true;
+
+ return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
+ ByteValues);
+ }
+
+ // If this is a logical 'and' with a mask that clears bytes, clear the
+ // corresponding bytes in ByteMask.
+ if (I->getOpcode() == Instruction::And &&
+ isa<ConstantInt>(I->getOperand(1))) {
+ // Scan every byte of the and mask, seeing if the byte is either 0 or 255.
+ unsigned NumBytes = ByteValues.size();
+ APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255);
+ const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
+
+ for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) {
+ // If this byte is masked out by a later operation, we don't care what
+ // the and mask is.
+ if ((ByteMask & (1 << i)) == 0)
+ continue;
+
+ // If the AndMask is all zeros for this byte, clear the bit.
+ APInt MaskB = AndMask & Byte;
+ if (MaskB == 0) {
+ ByteMask &= ~(1U << i);
+ continue;
+ }
+
+ // If the AndMask is not all ones for this byte, it's not a bytezap.
+ if (MaskB != Byte)
+ return true;
+
+ // Otherwise, this byte is kept.
+ }
+
+ return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
+ ByteValues);
+ }
+ }
+
+ // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be
+ // the input value to the bswap. Some observations: 1) if more than one byte
+ // is demanded from this input, then it could not be successfully assembled
+ // into a byteswap. At least one of the two bytes would not be aligned with
+ // their ultimate destination.
+ if (!isPowerOf2_32(ByteMask)) return true;
+ unsigned InputByteNo = CountTrailingZeros_32(ByteMask);
+
+ // 2) The input and ultimate destinations must line up: if byte 3 of an i32
+ // is demanded, it needs to go into byte 0 of the result. This means that the
+ // byte needs to be shifted until it lands in the right byte bucket. The
+ // shift amount depends on the position: if the byte is coming from the high
+ // part of the value (e.g. byte 3) then it must be shifted right. If from the
+ // low part, it must be shifted left.
+ unsigned DestByteNo = InputByteNo + OverallLeftShift;
+ if (InputByteNo < ByteValues.size()/2) {
+ if (ByteValues.size()-1-DestByteNo != InputByteNo)
+ return true;
+ } else {
+ if (ByteValues.size()-1-DestByteNo != InputByteNo)
+ return true;
+ }
+
+ // If the destination byte value is already defined, the values are or'd
+ // together, which isn't a bswap (unless it's an or of the same bits).
+ if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V)
+ return true;
+ ByteValues[DestByteNo] = V;
+ return false;
+}
+
+/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom.
+/// If so, insert the new bswap intrinsic and return it.
+Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
+ const IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
+ if (!ITy || ITy->getBitWidth() % 16 ||
+ // ByteMask only allows up to 32-byte values.
+ ITy->getBitWidth() > 32*8)
+ return 0; // Can only bswap pairs of bytes. Can't do vectors.
+
+ /// ByteValues - For each byte of the result, we keep track of which value
+ /// defines each byte.
+ SmallVector<Value*, 8> ByteValues;
+ ByteValues.resize(ITy->getBitWidth()/8);
+
+ // Try to find all the pieces corresponding to the bswap.
+ uint32_t ByteMask = ~0U >> (32-ByteValues.size());
+ if (CollectBSwapParts(&I, 0, ByteMask, ByteValues))
+ return 0;
+
+ // Check to see if all of the bytes come from the same value.
+ Value *V = ByteValues[0];
+ if (V == 0) return 0; // Didn't find a byte? Must be zero.
+
+ // Check to make sure that all of the bytes come from the same value.
+ for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
+ if (ByteValues[i] != V)
+ return 0;
+ const Type *Tys[] = { ITy };
+ Module *M = I.getParent()->getParent()->getParent();
+ Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
+ return CallInst::Create(F, V);
+}
+
+/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D). Check
+/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then
+/// we can simplify this expression to "cond ? C : D or B".
+static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
+ Value *C, Value *D) {
+ // If A is not a select of -1/0, this cannot match.
+ Value *Cond = 0;
+ if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond))))
+ return 0;
+
+ // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B.
+ if (match(D, m_SelectCst<0, -1>(m_Specific(Cond))))
+ return SelectInst::Create(Cond, C, B);
+ if (match(D, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond)))))
+ return SelectInst::Create(Cond, C, B);
+ // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D.
+ if (match(B, m_SelectCst<0, -1>(m_Specific(Cond))))
+ return SelectInst::Create(Cond, C, D);
+ if (match(B, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond)))))
+ return SelectInst::Create(Cond, C, D);
+ return 0;
+}
+
+/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
+Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
+ ICmpInst *LHS, ICmpInst *RHS) {
+ Value *Val, *Val2;
+ ConstantInt *LHSCst, *RHSCst;
+ ICmpInst::Predicate LHSCC, RHSCC;
+
+ // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
+ if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) ||
+ !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst))))
+ return 0;
+
+ // From here on, we only handle:
+ // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
+ if (Val != Val2) return 0;
+
+ // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
+ if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
+ RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
+ LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
+ RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
+ return 0;
+
+ // We can't fold (ugt x, C) | (sgt x, C2).
+ if (!PredicatesFoldable(LHSCC, RHSCC))
+ return 0;
+
+ // Ensure that the larger constant is on the RHS.
+ bool ShouldSwap;
+ if (ICmpInst::isSignedPredicate(LHSCC) ||
+ (ICmpInst::isEquality(LHSCC) &&
+ ICmpInst::isSignedPredicate(RHSCC)))
+ ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
+ else
+ ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
+
+ if (ShouldSwap) {
+ std::swap(LHS, RHS);
+ std::swap(LHSCst, RHSCst);
+ std::swap(LHSCC, RHSCC);
+ }
+
+ // At this point, we know we have have two icmp instructions
+ // comparing a value against two constants and or'ing the result
+ // together. Because of the above check, we know that we only have
+ // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the
+ // FoldICmpLogical check above), that the two constants are not
+ // equal.
+ assert(LHSCst != RHSCst && "Compares not folded above?");
+
+ switch (LHSCC) {
+ default: assert(0 && "Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ:
+ switch (RHSCC) {
+ default: assert(0 && "Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ:
+ if (LHSCst == SubOne(RHSCst)) { // (X == 13 | X == 14) -> X-13 <u 2
+ Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+ Instruction *Add = BinaryOperator::CreateAdd(Val, AddCST,
+ Val->getName()+".off");
+ InsertNewInstBefore(Add, I);
+ AddCST = Subtract(AddOne(RHSCst), LHSCst);
+ return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST);
+ }
+ break; // (X == 13 | X == 15) -> no change
+ case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change
+ case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15
+ case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15
+ case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15
+ return ReplaceInstUsesWith(I, RHS);
+ }
+ break;
+ case ICmpInst::ICMP_NE:
+ switch (RHSCC) {
+ default: assert(0 && "Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13
+ case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13
+ case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13
+ return ReplaceInstUsesWith(I, LHS);
+ case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true
+ case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true
+ case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ }
+ break;
+ case ICmpInst::ICMP_ULT:
+ switch (RHSCC) {
+ default: assert(0 && "Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change
+ break;
+ case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2
+ // If RHSCst is [us]MAXINT, it is always false. Not handling
+ // this can cause overflow.
+ if (RHSCst->isMaxValue(false))
+ return ReplaceInstUsesWith(I, LHS);
+ return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), false, false, I);
+ case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15
+ case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15
+ return ReplaceInstUsesWith(I, RHS);
+ case ICmpInst::ICMP_SLT: // (X u< 13 | X s< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_SLT:
+ switch (RHSCC) {
+ default: assert(0 && "Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change
+ break;
+ case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2
+ // If RHSCst is [us]MAXINT, it is always false. Not handling
+ // this can cause overflow.
+ if (RHSCst->isMaxValue(true))
+ return ReplaceInstUsesWith(I, LHS);
+ return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), true, false, I);
+ case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15
+ case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15
+ return ReplaceInstUsesWith(I, RHS);
+ case ICmpInst::ICMP_ULT: // (X s< 13 | X u< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_UGT:
+ switch (RHSCC) {
+ default: assert(0 && "Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13
+ case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13
+ return ReplaceInstUsesWith(I, LHS);
+ case ICmpInst::ICMP_SGT: // (X u> 13 | X s> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true
+ case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_SGT:
+ switch (RHSCC) {
+ default: assert(0 && "Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13
+ case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13
+ return ReplaceInstUsesWith(I, LHS);
+ case ICmpInst::ICMP_UGT: // (X s> 13 | X u> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true
+ case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change
+ break;
+ }
+ break;
+ }
+ return 0;
+}
+
+/// FoldOrWithConstants - This helper function folds:
+///
+/// ((A | B) & C1) | (B & C2)
+///
+/// into:
+///
+/// (A & C1) | B
+///
+/// when the XOR of the two constants is "all ones" (-1).
+Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
+ Value *A, Value *B, Value *C) {
+ ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
+ if (!CI1) return 0;
+
+ Value *V1 = 0;
+ ConstantInt *CI2 = 0;
+ if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0;
+
+ APInt Xor = CI1->getValue() ^ CI2->getValue();
+ if (!Xor.isAllOnesValue()) return 0;
+
+ if (V1 == A || V1 == B) {
+ Instruction *NewOp =
+ InsertNewInstBefore(BinaryOperator::CreateAnd((V1 == A) ? B : A, CI1), I);
+ return BinaryOperator::CreateOr(NewOp, V1);
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitOr(BinaryOperator &I) {
+ bool Changed = SimplifyCommutative(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (isa<UndefValue>(Op1)) // X | undef -> -1
+ return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
+
+ // or X, X = X
+ if (Op0 == Op1)
+ return ReplaceInstUsesWith(I, Op0);
+
+ // See if we can simplify any instructions used by the instruction whose sole
+ // purpose is to compute bits we don't care about.
+ if (!isa<VectorType>(I.getType())) {
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+ } else if (isa<ConstantAggregateZero>(Op1)) {
+ return ReplaceInstUsesWith(I, Op0); // X | <0,0> -> X
+ } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
+ if (CP->isAllOnesValue()) // X | <-1,-1> -> <-1,-1>
+ return ReplaceInstUsesWith(I, I.getOperand(1));
+ }
+
+
+
+ // or X, -1 == -1
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ ConstantInt *C1 = 0; Value *X = 0;
+ // (X & C1) | C2 --> (X | C2) & (C1|C2)
+ if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && isOnlyUse(Op0)) {
+ Instruction *Or = BinaryOperator::CreateOr(X, RHS);
+ InsertNewInstBefore(Or, I);
+ Or->takeName(Op0);
+ return BinaryOperator::CreateAnd(Or,
+ ConstantInt::get(RHS->getValue() | C1->getValue()));
+ }
+
+ // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
+ if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && isOnlyUse(Op0)) {
+ Instruction *Or = BinaryOperator::CreateOr(X, RHS);
+ InsertNewInstBefore(Or, I);
+ Or->takeName(Op0);
+ return BinaryOperator::CreateXor(Or,
+ ConstantInt::get(C1->getValue() & ~RHS->getValue()));
+ }
+
+ // Try to fold constant and into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI, this))
+ return R;
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ Value *A = 0, *B = 0;
+ ConstantInt *C1 = 0, *C2 = 0;
+
+ if (match(Op0, m_And(m_Value(A), m_Value(B))))
+ if (A == Op1 || B == Op1) // (A & ?) | A --> A
+ return ReplaceInstUsesWith(I, Op1);
+ if (match(Op1, m_And(m_Value(A), m_Value(B))))
+ if (A == Op0 || B == Op0) // A | (A & ?) --> A
+ return ReplaceInstUsesWith(I, Op0);
+
+ // (A | B) | C and A | (B | C) -> bswap if possible.
+ // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible.
+ if (match(Op0, m_Or(m_Value(), m_Value())) ||
+ match(Op1, m_Or(m_Value(), m_Value())) ||
+ (match(Op0, m_Shift(m_Value(), m_Value())) &&
+ match(Op1, m_Shift(m_Value(), m_Value())))) {
+ if (Instruction *BSwap = MatchBSwap(I))
+ return BSwap;
+ }
+
+ // (X^C)|Y -> (X|Y)^C iff Y&C == 0
+ if (Op0->hasOneUse() && match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
+ MaskedValueIsZero(Op1, C1->getValue())) {
+ Instruction *NOr = BinaryOperator::CreateOr(A, Op1);
+ InsertNewInstBefore(NOr, I);
+ NOr->takeName(Op0);
+ return BinaryOperator::CreateXor(NOr, C1);
+ }
+
+ // Y|(X^C) -> (X|Y)^C iff Y&C == 0
+ if (Op1->hasOneUse() && match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
+ MaskedValueIsZero(Op0, C1->getValue())) {
+ Instruction *NOr = BinaryOperator::CreateOr(A, Op0);
+ InsertNewInstBefore(NOr, I);
+ NOr->takeName(Op0);
+ return BinaryOperator::CreateXor(NOr, C1);
+ }
+
+ // (A & C)|(B & D)
+ Value *C = 0, *D = 0;
+ if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
+ match(Op1, m_And(m_Value(B), m_Value(D)))) {
+ Value *V1 = 0, *V2 = 0, *V3 = 0;
+ C1 = dyn_cast<ConstantInt>(C);
+ C2 = dyn_cast<ConstantInt>(D);
+ if (C1 && C2) { // (A & C1)|(B & C2)
+ // If we have: ((V + N) & C1) | (V & C2)
+ // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+ // replace with V+N.
+ if (C1->getValue() == ~C2->getValue()) {
+ if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+
+ match(A, m_Add(m_Value(V1), m_Value(V2)))) {
+ // Add commutes, try both ways.
+ if (V1 == B && MaskedValueIsZero(V2, C2->getValue()))
+ return ReplaceInstUsesWith(I, A);
+ if (V2 == B && MaskedValueIsZero(V1, C2->getValue()))
+ return ReplaceInstUsesWith(I, A);
+ }
+ // Or commutes, try both ways.
+ if ((C1->getValue() & (C1->getValue()+1)) == 0 &&
+ match(B, m_Add(m_Value(V1), m_Value(V2)))) {
+ // Add commutes, try both ways.
+ if (V1 == A && MaskedValueIsZero(V2, C1->getValue()))
+ return ReplaceInstUsesWith(I, B);
+ if (V2 == A && MaskedValueIsZero(V1, C1->getValue()))
+ return ReplaceInstUsesWith(I, B);
+ }
+ }
+ V1 = 0; V2 = 0; V3 = 0;
+ }
+
+ // Check to see if we have any common things being and'ed. If so, find the
+ // terms for V1 & (V2|V3).
+ if (isOnlyUse(Op0) || isOnlyUse(Op1)) {
+ if (A == B) // (A & C)|(A & D) == A & (C|D)
+ V1 = A, V2 = C, V3 = D;
+ else if (A == D) // (A & C)|(B & A) == A & (B|C)
+ V1 = A, V2 = B, V3 = C;
+ else if (C == B) // (A & C)|(C & D) == C & (A|D)
+ V1 = C, V2 = A, V3 = D;
+ else if (C == D) // (A & C)|(B & C) == C & (A|B)
+ V1 = C, V2 = A, V3 = B;
+
+ if (V1) {
+ Value *Or =
+ InsertNewInstBefore(BinaryOperator::CreateOr(V2, V3, "tmp"), I);
+ return BinaryOperator::CreateAnd(V1, Or);
+ }
+ }
+
+ // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants
+ if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D))
+ return Match;
+ if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C))
+ return Match;
+ if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D))
+ return Match;
+ if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C))
+ return Match;
+
+ // ((A&~B)|(~A&B)) -> A^B
+ if ((match(C, m_Not(m_Specific(D))) &&
+ match(B, m_Not(m_Specific(A)))))
+ return BinaryOperator::CreateXor(A, D);
+ // ((~B&A)|(~A&B)) -> A^B
+ if ((match(A, m_Not(m_Specific(D))) &&
+ match(B, m_Not(m_Specific(C)))))
+ return BinaryOperator::CreateXor(C, D);
+ // ((A&~B)|(B&~A)) -> A^B
+ if ((match(C, m_Not(m_Specific(B))) &&
+ match(D, m_Not(m_Specific(A)))))
+ return BinaryOperator::CreateXor(A, B);
+ // ((~B&A)|(B&~A)) -> A^B
+ if ((match(A, m_Not(m_Specific(B))) &&
+ match(D, m_Not(m_Specific(C)))))
+ return BinaryOperator::CreateXor(C, B);
+ }
+
+ // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts.
+ if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
+ if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
+ if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&
+ SI0->getOperand(1) == SI1->getOperand(1) &&
+ (SI0->hasOneUse() || SI1->hasOneUse())) {
+ Instruction *NewOp =
+ InsertNewInstBefore(BinaryOperator::CreateOr(SI0->getOperand(0),
+ SI1->getOperand(0),
+ SI0->getName()), I);
+ return BinaryOperator::Create(SI1->getOpcode(), NewOp,
+ SI1->getOperand(1));
+ }
+ }
+
+ // ((A|B)&1)|(B&-2) -> (A&1) | B
+ if (match(Op0, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) ||
+ match(Op0, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) {
+ Instruction *Ret = FoldOrWithConstants(I, Op1, A, B, C);
+ if (Ret) return Ret;
+ }
+ // (B&-2)|((A|B)&1) -> (A&1) | B
+ if (match(Op1, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) ||
+ match(Op1, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) {
+ Instruction *Ret = FoldOrWithConstants(I, Op0, A, B, C);
+ if (Ret) return Ret;
+ }
+
+ if (match(Op0, m_Not(m_Value(A)))) { // ~A | Op1
+ if (A == Op1) // ~A | A == -1
+ return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
+ } else {
+ A = 0;
+ }
+ // Note, A is still live here!
+ if (match(Op1, m_Not(m_Value(B)))) { // Op0 | ~B
+ if (Op0 == B)
+ return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
+
+ // (~A | ~B) == (~(A & B)) - De Morgan's Law
+ if (A && isOnlyUse(Op0) && isOnlyUse(Op1)) {
+ Value *And = InsertNewInstBefore(BinaryOperator::CreateAnd(A, B,
+ I.getName()+".demorgan"), I);
+ return BinaryOperator::CreateNot(And);
+ }
+ }
+
+ // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) {
+ if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
+ return R;
+
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
+ if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS))
+ return Res;
+ }
+
+ // fold (or (cast A), (cast B)) -> (cast (or A, B))
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+ if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
+ if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
+ if (!isa<ICmpInst>(Op0C->getOperand(0)) ||
+ !isa<ICmpInst>(Op1C->getOperand(0))) {
+ const Type *SrcTy = Op0C->getOperand(0)->getType();
+ if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
+ // Only do this if the casts both really cause code to be
+ // generated.
+ ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
+ I.getType(), TD) &&
+ ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
+ I.getType(), TD)) {
+ Instruction *NewOp = BinaryOperator::CreateOr(Op0C->getOperand(0),
+ Op1C->getOperand(0),
+ I.getName());
+ InsertNewInstBefore(NewOp, I);
+ return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
+ }
+ }
+ }
+ }
+
+
+ // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y)
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) {
+ if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
+ RHS->getPredicate() == FCmpInst::FCMP_UNO &&
+ LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) {
+ if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
+ if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
+ // If either of the constants are nans, then the whole thing returns
+ // true.
+ if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+
+ // Otherwise, no need to compare the two constants, compare the
+ // rest.
+ return new FCmpInst(FCmpInst::FCMP_UNO, LHS->getOperand(0),
+ RHS->getOperand(0));
+ }
+ } else {
+ Value *Op0LHS, *Op0RHS, *Op1LHS, *Op1RHS;
+ FCmpInst::Predicate Op0CC, Op1CC;
+ if (match(Op0, m_FCmp(Op0CC, m_Value(Op0LHS), m_Value(Op0RHS))) &&
+ match(Op1, m_FCmp(Op1CC, m_Value(Op1LHS), m_Value(Op1RHS)))) {
+ if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
+ // Swap RHS operands to match LHS.
+ Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
+ std::swap(Op1LHS, Op1RHS);
+ }
+ if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
+ // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y).
+ if (Op0CC == Op1CC)
+ return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
+ else if (Op0CC == FCmpInst::FCMP_TRUE ||
+ Op1CC == FCmpInst::FCMP_TRUE)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ else if (Op0CC == FCmpInst::FCMP_FALSE)
+ return ReplaceInstUsesWith(I, Op1);
+ else if (Op1CC == FCmpInst::FCMP_FALSE)
+ return ReplaceInstUsesWith(I, Op0);
+ bool Op0Ordered;
+ bool Op1Ordered;
+ unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
+ unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
+ if (Op0Ordered == Op1Ordered) {
+ // If both are ordered or unordered, return a new fcmp with
+ // or'ed predicates.
+ Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred,
+ Op0LHS, Op0RHS);
+ if (Instruction *I = dyn_cast<Instruction>(RV))
+ return I;
+ // Otherwise, it's a constant boolean value...
+ return ReplaceInstUsesWith(I, RV);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return Changed ? &I : 0;
+}
+
+namespace {
+
+// XorSelf - Implements: X ^ X --> 0
+struct XorSelf {
+ Value *RHS;
+ XorSelf(Value *rhs) : RHS(rhs) {}
+ bool shouldApply(Value *LHS) const { return LHS == RHS; }
+ Instruction *apply(BinaryOperator &Xor) const {
+ return &Xor;
+ }
+};
+
+}
+
+Instruction *InstCombiner::visitXor(BinaryOperator &I) {
+ bool Changed = SimplifyCommutative(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (isa<UndefValue>(Op1)) {
+ if (isa<UndefValue>(Op0))
+ // Handle undef ^ undef -> 0 special case. This is a common
+ // idiom (misuse).
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ return ReplaceInstUsesWith(I, Op1); // X ^ undef -> undef
+ }
+
+ // xor X, X = 0, even if X is nested in a sequence of Xor's.
+ if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) {
+ assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result;
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ }
+
+ // See if we can simplify any instructions used by the instruction whose sole
+ // purpose is to compute bits we don't care about.
+ if (!isa<VectorType>(I.getType())) {
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+ } else if (isa<ConstantAggregateZero>(Op1)) {
+ return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X
+ }
+
+ // Is this a ~ operation?
+ if (Value *NotOp = dyn_castNotVal(&I)) {
+ // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
+ // ~(~X | Y) === (X & ~Y) - De Morgan's Law
+ if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
+ if (Op0I->getOpcode() == Instruction::And ||
+ Op0I->getOpcode() == Instruction::Or) {
+ if (dyn_castNotVal(Op0I->getOperand(1))) Op0I->swapOperands();
+ if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
+ Instruction *NotY =
+ BinaryOperator::CreateNot(Op0I->getOperand(1),
+ Op0I->getOperand(1)->getName()+".not");
+ InsertNewInstBefore(NotY, I);
+ if (Op0I->getOpcode() == Instruction::And)
+ return BinaryOperator::CreateOr(Op0NotVal, NotY);
+ else
+ return BinaryOperator::CreateAnd(Op0NotVal, NotY);
+ }
+ }
+ }
+ }
+
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ if (RHS == ConstantInt::getTrue() && Op0->hasOneUse()) {
+ // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0))
+ return new ICmpInst(ICI->getInversePredicate(),
+ ICI->getOperand(0), ICI->getOperand(1));
+
+ if (FCmpInst *FCI = dyn_cast<FCmpInst>(Op0))
+ return new FCmpInst(FCI->getInversePredicate(),
+ FCI->getOperand(0), FCI->getOperand(1));
+ }
+
+ // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+ if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
+ if (CI->hasOneUse() && Op0C->hasOneUse()) {
+ Instruction::CastOps Opcode = Op0C->getOpcode();
+ if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) {
+ if (RHS == ConstantExpr::getCast(Opcode, ConstantInt::getTrue(),
+ Op0C->getDestTy())) {
+ Instruction *NewCI = InsertNewInstBefore(CmpInst::Create(
+ CI->getOpcode(), CI->getInversePredicate(),
+ CI->getOperand(0), CI->getOperand(1)), I);
+ NewCI->takeName(CI);
+ return CastInst::Create(Opcode, NewCI, Op0C->getType());
+ }
+ }
+ }
+ }
+ }
+
+ if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+ // ~(c-X) == X-c-1 == X+(-c-1)
+ if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
+ if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) {
+ Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C);
+ Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C,
+ ConstantInt::get(I.getType(), 1));
+ return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS);
+ }
+
+ if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
+ if (Op0I->getOpcode() == Instruction::Add) {
+ // ~(X-c) --> (-c-1)-X
+ if (RHS->isAllOnesValue()) {
+ Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
+ return BinaryOperator::CreateSub(
+ ConstantExpr::getSub(NegOp0CI,
+ ConstantInt::get(I.getType(), 1)),
+ Op0I->getOperand(0));
+ } else if (RHS->getValue().isSignBit()) {
+ // (X + C) ^ signbit -> (X + C + signbit)
+ Constant *C = ConstantInt::get(RHS->getValue() + Op0CI->getValue());
+ return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
+
+ }
+ } else if (Op0I->getOpcode() == Instruction::Or) {
+ // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0
+ if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) {
+ Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
+ // Anything in both C1 and C2 is known to be zero, remove it from
+ // NewRHS.
+ Constant *CommonBits = And(Op0CI, RHS);
+ NewRHS = ConstantExpr::getAnd(NewRHS,
+ ConstantExpr::getNot(CommonBits));
+ AddToWorkList(Op0I);
+ I.setOperand(0, Op0I->getOperand(0));
+ I.setOperand(1, NewRHS);
+ return &I;
+ }
+ }
+ }
+ }
+
+ // Try to fold constant and into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI, this))
+ return R;
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ if (Value *X = dyn_castNotVal(Op0)) // ~A ^ A == -1
+ if (X == Op1)
+ return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
+
+ if (Value *X = dyn_castNotVal(Op1)) // A ^ ~A == -1
+ if (X == Op0)
+ return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
+
+
+ BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1);
+ if (Op1I) {
+ Value *A, *B;
+ if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) {
+ if (A == Op0) { // B^(B|A) == (A|B)^B
+ Op1I->swapOperands();
+ I.swapOperands();
+ std::swap(Op0, Op1);
+ } else if (B == Op0) { // B^(A|B) == (A|B)^B
+ I.swapOperands(); // Simplified below.
+ std::swap(Op0, Op1);
+ }
+ } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) {
+ return ReplaceInstUsesWith(I, B); // A^(A^B) == B
+ } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) {
+ return ReplaceInstUsesWith(I, A); // A^(B^A) == B
+ } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && Op1I->hasOneUse()){
+ if (A == Op0) { // A^(A&B) -> A^(B&A)
+ Op1I->swapOperands();
+ std::swap(A, B);
+ }
+ if (B == Op0) { // A^(B&A) -> (B&A)^A
+ I.swapOperands(); // Simplified below.
+ std::swap(Op0, Op1);
+ }
+ }
+ }
+
+ BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0);
+ if (Op0I) {
+ Value *A, *B;
+ if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && Op0I->hasOneUse()) {
+ if (A == Op1) // (B|A)^B == (A|B)^B
+ std::swap(A, B);
+ if (B == Op1) { // (A|B)^B == A & ~B
+ Instruction *NotB =
+ InsertNewInstBefore(BinaryOperator::CreateNot(Op1, "tmp"), I);
+ return BinaryOperator::CreateAnd(A, NotB);
+ }
+ } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) {
+ return ReplaceInstUsesWith(I, B); // (A^B)^A == B
+ } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) {
+ return ReplaceInstUsesWith(I, A); // (B^A)^A == B
+ } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && Op0I->hasOneUse()){
+ if (A == Op1) // (A&B)^A -> (B&A)^A
+ std::swap(A, B);
+ if (B == Op1 && // (B&A)^A == ~B & A
+ !isa<ConstantInt>(Op1)) { // Canonical form is (B&C)^C
+ Instruction *N =
+ InsertNewInstBefore(BinaryOperator::CreateNot(A, "tmp"), I);
+ return BinaryOperator::CreateAnd(N, Op1);
+ }
+ }
+ }
+
+ // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts.
+ if (Op0I && Op1I && Op0I->isShift() &&
+ Op0I->getOpcode() == Op1I->getOpcode() &&
+ Op0I->getOperand(1) == Op1I->getOperand(1) &&
+ (Op1I->hasOneUse() || Op1I->hasOneUse())) {
+ Instruction *NewOp =
+ InsertNewInstBefore(BinaryOperator::CreateXor(Op0I->getOperand(0),
+ Op1I->getOperand(0),
+ Op0I->getName()), I);
+ return BinaryOperator::Create(Op1I->getOpcode(), NewOp,
+ Op1I->getOperand(1));
+ }
+
+ if (Op0I && Op1I) {
+ Value *A, *B, *C, *D;
+ // (A & B)^(A | B) -> A ^ B
+ if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1I, m_Or(m_Value(C), m_Value(D)))) {
+ if ((A == C && B == D) || (A == D && B == C))
+ return BinaryOperator::CreateXor(A, B);
+ }
+ // (A | B)^(A & B) -> A ^ B
+ if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op1I, m_And(m_Value(C), m_Value(D)))) {
+ if ((A == C && B == D) || (A == D && B == C))
+ return BinaryOperator::CreateXor(A, B);
+ }
+
+ // (A & B)^(C & D)
+ if ((Op0I->hasOneUse() || Op1I->hasOneUse()) &&
+ match(Op0I, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1I, m_And(m_Value(C), m_Value(D)))) {
+ // (X & Y)^(X & Y) -> (Y^Z) & X
+ Value *X = 0, *Y = 0, *Z = 0;
+ if (A == C)
+ X = A, Y = B, Z = D;
+ else if (A == D)
+ X = A, Y = B, Z = C;
+ else if (B == C)
+ X = B, Y = A, Z = D;
+ else if (B == D)
+ X = B, Y = A, Z = C;
+
+ if (X) {
+ Instruction *NewOp =
+ InsertNewInstBefore(BinaryOperator::CreateXor(Y, Z, Op0->getName()), I);
+ return BinaryOperator::CreateAnd(NewOp, X);
+ }
+ }
+ }
+
+ // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
+ if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
+ return R;
+
+ // fold (xor (cast A), (cast B)) -> (cast (xor A, B))
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+ if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
+ if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind?
+ const Type *SrcTy = Op0C->getOperand(0)->getType();
+ if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
+ // Only do this if the casts both really cause code to be generated.
+ ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
+ I.getType(), TD) &&
+ ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
+ I.getType(), TD)) {
+ Instruction *NewOp = BinaryOperator::CreateXor(Op0C->getOperand(0),
+ Op1C->getOperand(0),
+ I.getName());
+ InsertNewInstBefore(NewOp, I);
+ return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
+ }
+ }
+ }
+
+ return Changed ? &I : 0;
+}
+
+/// AddWithOverflow - Compute Result = In1+In2, returning true if the result
+/// overflowed for this type.
+static bool AddWithOverflow(ConstantInt *&Result, ConstantInt *In1,
+ ConstantInt *In2, bool IsSigned = false) {
+ Result = cast<ConstantInt>(Add(In1, In2));
+
+ if (IsSigned)
+ if (In2->getValue().isNegative())
+ return Result->getValue().sgt(In1->getValue());
+ else
+ return Result->getValue().slt(In1->getValue());
+ else
+ return Result->getValue().ult(In1->getValue());
+}
+
+/// SubWithOverflow - Compute Result = In1-In2, returning true if the result
+/// overflowed for this type.
+static bool SubWithOverflow(ConstantInt *&Result, ConstantInt *In1,
+ ConstantInt *In2, bool IsSigned = false) {
+ Result = cast<ConstantInt>(Subtract(In1, In2));
+
+ if (IsSigned)
+ if (In2->getValue().isNegative())
+ return Result->getValue().slt(In1->getValue());
+ else
+ return Result->getValue().sgt(In1->getValue());
+ else
+ return Result->getValue().ugt(In1->getValue());
+}
+
+/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the
+/// code necessary to compute the offset from the base pointer (without adding
+/// in the base pointer). Return the result as a signed integer of intptr size.
+static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
+ TargetData &TD = IC.getTargetData();
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ const Type *IntPtrTy = TD.getIntPtrType();
+ Value *Result = Constant::getNullValue(IntPtrTy);
+
+ // Build a mask for high order bits.
+ unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
+
+ for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
+ ++i, ++GTI) {
+ Value *Op = *i;
+ uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask;
+ if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
+ if (OpC->isZero()) continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+
+ if (ConstantInt *RC = dyn_cast<ConstantInt>(Result))
+ Result = ConstantInt::get(RC->getValue() + APInt(IntPtrWidth, Size));
+ else
+ Result = IC.InsertNewInstBefore(
+ BinaryOperator::CreateAdd(Result,
+ ConstantInt::get(IntPtrTy, Size),
+ GEP->getName()+".offs"), I);
+ continue;
+ }
+
+ Constant *Scale = ConstantInt::get(IntPtrTy, Size);
+ Constant *OC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
+ Scale = ConstantExpr::getMul(OC, Scale);
+ if (Constant *RC = dyn_cast<Constant>(Result))
+ Result = ConstantExpr::getAdd(RC, Scale);
+ else {
+ // Emit an add instruction.
+ Result = IC.InsertNewInstBefore(
+ BinaryOperator::CreateAdd(Result, Scale,
+ GEP->getName()+".offs"), I);
+ }
+ continue;
+ }
+ // Convert to correct type.
+ if (Op->getType() != IntPtrTy) {
+ if (Constant *OpC = dyn_cast<Constant>(Op))
+ Op = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true);
+ else
+ Op = IC.InsertNewInstBefore(CastInst::CreateIntegerCast(Op, IntPtrTy,
+ true,
+ Op->getName()+".c"), I);
+ }
+ if (Size != 1) {
+ Constant *Scale = ConstantInt::get(IntPtrTy, Size);
+ if (Constant *OpC = dyn_cast<Constant>(Op))
+ Op = ConstantExpr::getMul(OpC, Scale);
+ else // We'll let instcombine(mul) convert this to a shl if possible.
+ Op = IC.InsertNewInstBefore(BinaryOperator::CreateMul(Op, Scale,
+ GEP->getName()+".idx"), I);
+ }
+
+ // Emit an add instruction.
+ if (isa<Constant>(Op) && isa<Constant>(Result))
+ Result = ConstantExpr::getAdd(cast<Constant>(Op),
+ cast<Constant>(Result));
+ else
+ Result = IC.InsertNewInstBefore(BinaryOperator::CreateAdd(Op, Result,
+ GEP->getName()+".offs"), I);
+ }
+ return Result;
+}
+
+
+/// EvaluateGEPOffsetExpression - Return an value that can be used to compare of
+/// the *offset* implied by GEP to zero. For example, if we have &A[i], we want
+/// to return 'i' for "icmp ne i, 0". Note that, in general, indices can be
+/// complex, and scales are involved. The above expression would also be legal
+/// to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32). This
+/// later form is less amenable to optimization though, and we are allowed to
+/// generate the first by knowing that pointer arithmetic doesn't overflow.
+///
+/// If we can't emit an optimized form for this expression, this returns null.
+///
+static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,
+ InstCombiner &IC) {
+ TargetData &TD = IC.getTargetData();
+ gep_type_iterator GTI = gep_type_begin(GEP);
+
+ // Check to see if this gep only has a single variable index. If so, and if
+ // any constant indices are a multiple of its scale, then we can compute this
+ // in terms of the scale of the variable index. For example, if the GEP
+ // implies an offset of "12 + i*4", then we can codegen this as "3 + i",
+ // because the expression will cross zero at the same point.
+ unsigned i, e = GEP->getNumOperands();
+ int64_t Offset = 0;
+ for (i = 1; i != e; ++i, ++GTI) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
+ // Compute the aggregate offset of constant indices.
+ if (CI->isZero()) continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+ } else {
+ uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+ Offset += Size*CI->getSExtValue();
+ }
+ } else {
+ // Found our variable index.
+ break;
+ }
+ }
+
+ // If there are no variable indices, we must have a constant offset, just
+ // evaluate it the general way.
+ if (i == e) return 0;
+
+ Value *VariableIdx = GEP->getOperand(i);
+ // Determine the scale factor of the variable element. For example, this is
+ // 4 if the variable index is into an array of i32.
+ uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType());
+
+ // Verify that there are no other variable indices. If so, emit the hard way.
+ for (++i, ++GTI; i != e; ++i, ++GTI) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (!CI) return 0;
+
+ // Compute the aggregate offset of constant indices.
+ if (CI->isZero()) continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+ } else {
+ uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+ Offset += Size*CI->getSExtValue();
+ }
+ }
+
+ // Okay, we know we have a single variable index, which must be a
+ // pointer/array/vector index. If there is no offset, life is simple, return
+ // the index.
+ unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ if (Offset == 0) {
+ // Cast to intptrty in case a truncation occurs. If an extension is needed,
+ // we don't need to bother extending: the extension won't affect where the
+ // computation crosses zero.
+ if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth)
+ VariableIdx = new TruncInst(VariableIdx, TD.getIntPtrType(),
+ VariableIdx->getNameStart(), &I);
+ return VariableIdx;
+ }
+
+ // Otherwise, there is an index. The computation we will do will be modulo
+ // the pointer size, so get it.
+ uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
+
+ Offset &= PtrSizeMask;
+ VariableScale &= PtrSizeMask;
+
+ // To do this transformation, any constant index must be a multiple of the
+ // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i",
+ // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a
+ // multiple of the variable scale.
+ int64_t NewOffs = Offset / (int64_t)VariableScale;
+ if (Offset != NewOffs*(int64_t)VariableScale)
+ return 0;
+
+ // Okay, we can do this evaluation. Start by converting the index to intptr.
+ const Type *IntPtrTy = TD.getIntPtrType();
+ if (VariableIdx->getType() != IntPtrTy)
+ VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy,
+ true /*SExt*/,
+ VariableIdx->getNameStart(), &I);
+ Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
+ return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I);
+}
+
+
+/// FoldGEPICmp - Fold comparisons between a GEP instruction and something
+/// else. At this point we know that the GEP is on the LHS of the comparison.
+Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
+ ICmpInst::Predicate Cond,
+ Instruction &I) {
+ assert(dyn_castGetElementPtr(GEPLHS) && "LHS is not a getelementptr!");
+
+ // Look through bitcasts.
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS))
+ RHS = BCI->getOperand(0);
+
+ Value *PtrBase = GEPLHS->getOperand(0);
+ if (PtrBase == RHS) {
+ // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0).
+ // This transformation (ignoring the base and scales) is valid because we
+ // know pointers can't overflow. See if we can output an optimized form.
+ Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this);
+
+ // If not, synthesize the offset the hard way.
+ if (Offset == 0)
+ Offset = EmitGEPOffset(GEPLHS, I, *this);
+ return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
+ Constant::getNullValue(Offset->getType()));
+ } else if (User *GEPRHS = dyn_castGetElementPtr(RHS)) {
+ // If the base pointers are different, but the indices are the same, just
+ // compare the base pointer.
+ if (PtrBase != GEPRHS->getOperand(0)) {
+ bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands();
+ IndicesTheSame &= GEPLHS->getOperand(0)->getType() ==
+ GEPRHS->getOperand(0)->getType();
+ if (IndicesTheSame)
+ for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
+ if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
+ IndicesTheSame = false;
+ break;
+ }
+
+ // If all indices are the same, just compare the base pointers.
+ if (IndicesTheSame)
+ return new ICmpInst(ICmpInst::getSignedPredicate(Cond),
+ GEPLHS->getOperand(0), GEPRHS->getOperand(0));
+
+ // Otherwise, the base pointers are different and the indices are
+ // different, bail out.
+ return 0;
+ }
+
+ // If one of the GEPs has all zero indices, recurse.
+ bool AllZeros = true;
+ for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
+ if (!isa<Constant>(GEPLHS->getOperand(i)) ||
+ !cast<Constant>(GEPLHS->getOperand(i))->isNullValue()) {
+ AllZeros = false;
+ break;
+ }
+ if (AllZeros)
+ return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0),
+ ICmpInst::getSwappedPredicate(Cond), I);
+
+ // If the other GEP has all zero indices, recurse.
+ AllZeros = true;
+ for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
+ if (!isa<Constant>(GEPRHS->getOperand(i)) ||
+ !cast<Constant>(GEPRHS->getOperand(i))->isNullValue()) {
+ AllZeros = false;
+ break;
+ }
+ if (AllZeros)
+ return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I);
+
+ if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) {
+ // If the GEPs only differ by one index, compare it.
+ unsigned NumDifferences = 0; // Keep track of # differences.
+ unsigned DiffOperand = 0; // The operand that differs.
+ for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
+ if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
+ if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() !=
+ GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) {
+ // Irreconcilable differences.
+ NumDifferences = 2;
+ break;
+ } else {
+ if (NumDifferences++) break;
+ DiffOperand = i;
+ }
+ }
+
+ if (NumDifferences == 0) // SAME GEP?
+ return ReplaceInstUsesWith(I, // No comparison is needed here.
+ ConstantInt::get(Type::Int1Ty,
+ ICmpInst::isTrueWhenEqual(Cond)));
+
+ else if (NumDifferences == 1) {
+ Value *LHSV = GEPLHS->getOperand(DiffOperand);
+ Value *RHSV = GEPRHS->getOperand(DiffOperand);
+ // Make sure we do a signed comparison here.
+ return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV);
+ }
+ }
+
+ // Only lower this if the icmp is the only user of the GEP or if we expect
+ // the result to fold to a constant!
+ if ((isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
+ (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
+ // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2)
+ Value *L = EmitGEPOffset(GEPLHS, I, *this);
+ Value *R = EmitGEPOffset(GEPRHS, I, *this);
+ return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R);
+ }
+ }
+ return 0;
+}
+
+/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible.
+///
+Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
+ Instruction *LHSI,
+ Constant *RHSC) {
+ if (!isa<ConstantFP>(RHSC)) return 0;
+ const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
+
+ // Get the width of the mantissa. We don't want to hack on conversions that
+ // might lose information from the integer, e.g. "i64 -> float"
+ int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
+ if (MantissaWidth == -1) return 0; // Unknown.
+
+ // Check to see that the input is converted from an integer type that is small
+ // enough that preserves all bits. TODO: check here for "known" sign bits.
+ // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
+ unsigned InputSize = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
+
+ // If this is a uitofp instruction, we need an extra bit to hold the sign.
+ bool LHSUnsigned = isa<UIToFPInst>(LHSI);
+ if (LHSUnsigned)
+ ++InputSize;
+
+ // If the conversion would lose info, don't hack on this.
+ if ((int)InputSize > MantissaWidth)
+ return 0;
+
+ // Otherwise, we can potentially simplify the comparison. We know that it
+ // will always come through as an integer value and we know the constant is
+ // not a NAN (it would have been previously simplified).
+ assert(!RHS.isNaN() && "NaN comparison not already folded!");
+
+ ICmpInst::Predicate Pred;
+ switch (I.getPredicate()) {
+ default: assert(0 && "Unexpected predicate!");
+ case FCmpInst::FCMP_UEQ:
+ case FCmpInst::FCMP_OEQ:
+ Pred = ICmpInst::ICMP_EQ;
+ break;
+ case FCmpInst::FCMP_UGT:
+ case FCmpInst::FCMP_OGT:
+ Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT;
+ break;
+ case FCmpInst::FCMP_UGE:
+ case FCmpInst::FCMP_OGE:
+ Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE;
+ break;
+ case FCmpInst::FCMP_ULT:
+ case FCmpInst::FCMP_OLT:
+ Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT;
+ break;
+ case FCmpInst::FCMP_ULE:
+ case FCmpInst::FCMP_OLE:
+ Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE;
+ break;
+ case FCmpInst::FCMP_UNE:
+ case FCmpInst::FCMP_ONE:
+ Pred = ICmpInst::ICMP_NE;
+ break;
+ case FCmpInst::FCMP_ORD:
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ case FCmpInst::FCMP_UNO:
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ }
+
+ const IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
+
+ // Now we know that the APFloat is a normal number, zero or inf.
+
+ // See if the FP constant is too large for the integer. For example,
+ // comparing an i8 to 300.0.
+ unsigned IntWidth = IntTy->getPrimitiveSizeInBits();
+
+ if (!LHSUnsigned) {
+ // If the RHS value is > SignedMax, fold the comparison. This handles +INF
+ // and large values.
+ APFloat SMax(RHS.getSemantics(), APFloat::fcZero, false);
+ SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true,
+ APFloat::rmNearestTiesToEven);
+ if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0
+ if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT ||
+ Pred == ICmpInst::ICMP_SLE)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ }
+ } else {
+ // If the RHS value is > UnsignedMax, fold the comparison. This handles
+ // +INF and large values.
+ APFloat UMax(RHS.getSemantics(), APFloat::fcZero, false);
+ UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false,
+ APFloat::rmNearestTiesToEven);
+ if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0
+ if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT ||
+ Pred == ICmpInst::ICMP_ULE)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ }
+ }
+
+ if (!LHSUnsigned) {
+ // See if the RHS value is < SignedMin.
+ APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false);
+ SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true,
+ APFloat::rmNearestTiesToEven);
+ if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0
+ if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
+ Pred == ICmpInst::ICMP_SGE)
+ return ReplaceInstUsesWith(I,ConstantInt::getTrue());
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ }
+ }
+
+ // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or
+ // [0, UMAX], but it may still be fractional. See if it is fractional by
+ // casting the FP value to the integer value and back, checking for equality.
+ // Don't do this for zero, because -0.0 is not fractional.
+ Constant *RHSInt = LHSUnsigned
+ ? ConstantExpr::getFPToUI(RHSC, IntTy)
+ : ConstantExpr::getFPToSI(RHSC, IntTy);
+ if (!RHS.isZero()) {
+ bool Equal = LHSUnsigned
+ ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC
+ : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC;
+ if (!Equal) {
+ // If we had a comparison against a fractional value, we have to adjust
+ // the compare predicate and sometimes the value. RHSC is rounded towards
+ // zero at this point.
+ switch (Pred) {
+ default: assert(0 && "Unexpected integer comparison!");
+ case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ case ICmpInst::ICMP_ULE:
+ // (float)int <= 4.4 --> int <= 4
+ // (float)int <= -4.4 --> false
+ if (RHS.isNegative())
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ break;
+ case ICmpInst::ICMP_SLE:
+ // (float)int <= 4.4 --> int <= 4
+ // (float)int <= -4.4 --> int < -4
+ if (RHS.isNegative())
+ Pred = ICmpInst::ICMP_SLT;
+ break;
+ case ICmpInst::ICMP_ULT:
+ // (float)int < -4.4 --> false
+ // (float)int < 4.4 --> int <= 4
+ if (RHS.isNegative())
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ Pred = ICmpInst::ICMP_ULE;
+ break;
+ case ICmpInst::ICMP_SLT:
+ // (float)int < -4.4 --> int < -4
+ // (float)int < 4.4 --> int <= 4
+ if (!RHS.isNegative())
+ Pred = ICmpInst::ICMP_SLE;
+ break;
+ case ICmpInst::ICMP_UGT:
+ // (float)int > 4.4 --> int > 4
+ // (float)int > -4.4 --> true
+ if (RHS.isNegative())
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ break;
+ case ICmpInst::ICMP_SGT:
+ // (float)int > 4.4 --> int > 4
+ // (float)int > -4.4 --> int >= -4
+ if (RHS.isNegative())
+ Pred = ICmpInst::ICMP_SGE;
+ break;
+ case ICmpInst::ICMP_UGE:
+ // (float)int >= -4.4 --> true
+ // (float)int >= 4.4 --> int > 4
+ if (!RHS.isNegative())
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ Pred = ICmpInst::ICMP_UGT;
+ break;
+ case ICmpInst::ICMP_SGE:
+ // (float)int >= -4.4 --> int >= -4
+ // (float)int >= 4.4 --> int > 4
+ if (!RHS.isNegative())
+ Pred = ICmpInst::ICMP_SGT;
+ break;
+ }
+ }
+ }
+
+ // Lower this FP comparison into an appropriate integer version of the
+ // comparison.
+ return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt);
+}
+
+Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
+ bool Changed = SimplifyCompare(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // Fold trivial predicates.
+ if (I.getPredicate() == FCmpInst::FCMP_FALSE)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ if (I.getPredicate() == FCmpInst::FCMP_TRUE)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+
+ // Simplify 'fcmp pred X, X'
+ if (Op0 == Op1) {
+ switch (I.getPredicate()) {
+ default: assert(0 && "Unknown predicate!");
+ case FCmpInst::FCMP_UEQ: // True if unordered or equal
+ case FCmpInst::FCMP_UGE: // True if unordered, greater than, or equal
+ case FCmpInst::FCMP_ULE: // True if unordered, less than, or equal
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ case FCmpInst::FCMP_OGT: // True if ordered and greater than
+ case FCmpInst::FCMP_OLT: // True if ordered and less than
+ case FCmpInst::FCMP_ONE: // True if ordered and operands are unequal
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+
+ case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y)
+ case FCmpInst::FCMP_ULT: // True if unordered or less than
+ case FCmpInst::FCMP_UGT: // True if unordered or greater than
+ case FCmpInst::FCMP_UNE: // True if unordered or not equal
+ // Canonicalize these to be 'fcmp uno %X, 0.0'.
+ I.setPredicate(FCmpInst::FCMP_UNO);
+ I.setOperand(1, Constant::getNullValue(Op0->getType()));
+ return &I;
+
+ case FCmpInst::FCMP_ORD: // True if ordered (no nans)
+ case FCmpInst::FCMP_OEQ: // True if ordered and equal
+ case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal
+ case FCmpInst::FCMP_OLE: // True if ordered and less than or equal
+ // Canonicalize these to be 'fcmp ord %X, 0.0'.
+ I.setPredicate(FCmpInst::FCMP_ORD);
+ I.setOperand(1, Constant::getNullValue(Op0->getType()));
+ return &I;
+ }
+ }
+
+ if (isa<UndefValue>(Op1)) // fcmp pred X, undef -> undef
+ return ReplaceInstUsesWith(I, UndefValue::get(Type::Int1Ty));
+
+ // Handle fcmp with constant RHS
+ if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
+ // If the constant is a nan, see if we can fold the comparison based on it.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
+ if (CFP->getValueAPF().isNaN()) {
+ if (FCmpInst::isOrdered(I.getPredicate())) // True if ordered and...
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ assert(FCmpInst::isUnordered(I.getPredicate()) &&
+ "Comparison must be either ordered or unordered!");
+ // True if unordered.
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ }
+ }
+
+ if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+ switch (LHSI->getOpcode()) {
+ case Instruction::PHI:
+ // Only fold fcmp into the PHI if the phi and fcmp are in the same
+ // block. If in the same block, we're encouraging jump threading. If
+ // not, we are just pessimizing the code by making an i1 phi.
+ if (LHSI->getParent() == I.getParent())
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ break;
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC))
+ return NV;
+ break;
+ case Instruction::Select:
+ // If either operand of the select is a constant, we can fold the
+ // comparison into the select arms, which will cause one to be
+ // constant folded and the select turned into a bitwise or.
+ Value *Op1 = 0, *Op2 = 0;
+ if (LHSI->hasOneUse()) {
+ if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
+ // Fold the known value into the constant operand.
+ Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
+ // Insert a new FCmp of the other select operand.
+ Op2 = InsertNewInstBefore(new FCmpInst(I.getPredicate(),
+ LHSI->getOperand(2), RHSC,
+ I.getName()), I);
+ } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
+ // Fold the known value into the constant operand.
+ Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
+ // Insert a new FCmp of the other select operand.
+ Op1 = InsertNewInstBefore(new FCmpInst(I.getPredicate(),
+ LHSI->getOperand(1), RHSC,
+ I.getName()), I);
+ }
+ }
+
+ if (Op1)
+ return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
+ break;
+ }
+ }
+
+ return Changed ? &I : 0;
+}
+
+Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
+ bool Changed = SimplifyCompare(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ const Type *Ty = Op0->getType();
+
+ // icmp X, X
+ if (Op0 == Op1)
+ return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty,
+ I.isTrueWhenEqual()));
+
+ if (isa<UndefValue>(Op1)) // X icmp undef -> undef
+ return ReplaceInstUsesWith(I, UndefValue::get(Type::Int1Ty));
+
+ // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value
+ // addresses never equal each other! We already know that Op0 != Op1.
+ if ((isa<GlobalValue>(Op0) || isa<AllocaInst>(Op0) ||
+ isa<ConstantPointerNull>(Op0)) &&
+ (isa<GlobalValue>(Op1) || isa<AllocaInst>(Op1) ||
+ isa<ConstantPointerNull>(Op1)))
+ return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty,
+ !I.isTrueWhenEqual()));
+
+ // icmp's with boolean values can always be turned into bitwise operations
+ if (Ty == Type::Int1Ty) {
+ switch (I.getPredicate()) {
+ default: assert(0 && "Invalid icmp instruction!");
+ case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B)
+ Instruction *Xor = BinaryOperator::CreateXor(Op0, Op1, I.getName()+"tmp");
+ InsertNewInstBefore(Xor, I);
+ return BinaryOperator::CreateNot(Xor);
+ }
+ case ICmpInst::ICMP_NE: // icmp eq i1 A, B -> A^B
+ return BinaryOperator::CreateXor(Op0, Op1);
+
+ case ICmpInst::ICMP_UGT:
+ std::swap(Op0, Op1); // Change icmp ugt -> icmp ult
+ // FALL THROUGH
+ case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B
+ Instruction *Not = BinaryOperator::CreateNot(Op0, I.getName()+"tmp");
+ InsertNewInstBefore(Not, I);
+ return BinaryOperator::CreateAnd(Not, Op1);
+ }
+ case ICmpInst::ICMP_SGT:
+ std::swap(Op0, Op1); // Change icmp sgt -> icmp slt
+ // FALL THROUGH
+ case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B
+ Instruction *Not = BinaryOperator::CreateNot(Op1, I.getName()+"tmp");
+ InsertNewInstBefore(Not, I);
+ return BinaryOperator::CreateAnd(Not, Op0);
+ }
+ case ICmpInst::ICMP_UGE:
+ std::swap(Op0, Op1); // Change icmp uge -> icmp ule
+ // FALL THROUGH
+ case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B
+ Instruction *Not = BinaryOperator::CreateNot(Op0, I.getName()+"tmp");
+ InsertNewInstBefore(Not, I);
+ return BinaryOperator::CreateOr(Not, Op1);
+ }
+ case ICmpInst::ICMP_SGE:
+ std::swap(Op0, Op1); // Change icmp sge -> icmp sle
+ // FALL THROUGH
+ case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B
+ Instruction *Not = BinaryOperator::CreateNot(Op1, I.getName()+"tmp");
+ InsertNewInstBefore(Not, I);
+ return BinaryOperator::CreateOr(Not, Op0);
+ }
+ }
+ }
+
+ unsigned BitWidth = 0;
+ if (TD)
+ BitWidth = TD->getTypeSizeInBits(Ty);
+ else if (isa<IntegerType>(Ty))
+ BitWidth = Ty->getPrimitiveSizeInBits();
+
+ bool isSignBit = false;
+
+ // See if we are doing a comparison with a constant.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ Value *A = 0, *B = 0;
+
+ // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
+ if (I.isEquality() && CI->isNullValue() &&
+ match(Op0, m_Sub(m_Value(A), m_Value(B)))) {
+ // (icmp cond A B) if cond is equality
+ return new ICmpInst(I.getPredicate(), A, B);
+ }
+
+ // If we have an icmp le or icmp ge instruction, turn it into the
+ // appropriate icmp lt or icmp gt instruction. This allows us to rely on
+ // them being folded in the code below.
+ switch (I.getPredicate()) {
+ default: break;
+ case ICmpInst::ICMP_ULE:
+ if (CI->isMaxValue(false)) // A <=u MAX -> TRUE
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ return new ICmpInst(ICmpInst::ICMP_ULT, Op0, AddOne(CI));
+ case ICmpInst::ICMP_SLE:
+ if (CI->isMaxValue(true)) // A <=s MAX -> TRUE
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ return new ICmpInst(ICmpInst::ICMP_SLT, Op0, AddOne(CI));
+ case ICmpInst::ICMP_UGE:
+ if (CI->isMinValue(false)) // A >=u MIN -> TRUE
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ return new ICmpInst( ICmpInst::ICMP_UGT, Op0, SubOne(CI));
+ case ICmpInst::ICMP_SGE:
+ if (CI->isMinValue(true)) // A >=s MIN -> TRUE
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ return new ICmpInst(ICmpInst::ICMP_SGT, Op0, SubOne(CI));
+ }
+
+ // If this comparison is a normal comparison, it demands all
+ // bits, if it is a sign bit comparison, it only demands the sign bit.
+ bool UnusedBit;
+ isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit);
+ }
+
+ // See if we can fold the comparison based on range information we can get
+ // by checking whether bits are known to be zero or one in the input.
+ if (BitWidth != 0) {
+ APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0);
+ APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0);
+
+ if (SimplifyDemandedBits(I.getOperandUse(0),
+ isSignBit ? APInt::getSignBit(BitWidth)
+ : APInt::getAllOnesValue(BitWidth),
+ Op0KnownZero, Op0KnownOne, 0))
+ return &I;
+ if (SimplifyDemandedBits(I.getOperandUse(1),
+ APInt::getAllOnesValue(BitWidth),
+ Op1KnownZero, Op1KnownOne, 0))
+ return &I;
+
+ // Given the known and unknown bits, compute a range that the LHS could be
+ // in. Compute the Min, Max and RHS values based on the known bits. For the
+ // EQ and NE we use unsigned values.
+ APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
+ APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
+ if (ICmpInst::isSignedPredicate(I.getPredicate())) {
+ ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
+ Op0Min, Op0Max);
+ ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
+ Op1Min, Op1Max);
+ } else {
+ ComputeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
+ Op0Min, Op0Max);
+ ComputeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
+ Op1Min, Op1Max);
+ }
+
+ // If Min and Max are known to be the same, then SimplifyDemandedBits
+ // figured out that the LHS is a constant. Just constant fold this now so
+ // that code below can assume that Min != Max.
+ if (!isa<Constant>(Op0) && Op0Min == Op0Max)
+ return new ICmpInst(I.getPredicate(), ConstantInt::get(Op0Min), Op1);
+ if (!isa<Constant>(Op1) && Op1Min == Op1Max)
+ return new ICmpInst(I.getPredicate(), Op0, ConstantInt::get(Op1Min));
+
+ // Based on the range information we know about the LHS, see if we can
+ // simplify this comparison. For example, (x&4) < 8 is always true.
+ switch (I.getPredicate()) {
+ default: assert(0 && "Unknown icmp opcode!");
+ case ICmpInst::ICMP_EQ:
+ if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ break;
+ case ICmpInst::ICMP_NE:
+ if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ break;
+ case ICmpInst::ICMP_ULT:
+ if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ if (Op1Max == Op0Min+1) // A <u C -> A == C-1 if min(A)+1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI));
+
+ // (x <u 2147483648) -> (x >s -1) -> true if sign bit clear
+ if (CI->isMinValue(true))
+ return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
+ ConstantInt::getAllOnesValue(Op0->getType()));
+ }
+ break;
+ case ICmpInst::ICMP_UGT:
+ if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+
+ if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ if (Op1Min == Op0Max-1) // A >u C -> A == C+1 if max(a)-1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI));
+
+ // (x >u 2147483647) -> (x <s 0) -> true if sign bit set
+ if (CI->isMaxValue(true))
+ return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
+ ConstantInt::getNullValue(Op0->getType()));
+ }
+ break;
+ case ICmpInst::ICMP_SLT:
+ if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ if (Op1Max == Op0Min+1) // A <s C -> A == C-1 if min(A)+1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI));
+ }
+ break;
+ case ICmpInst::ICMP_SGT:
+ if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+
+ if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ if (Op1Min == Op0Max-1) // A >s C -> A == C+1 if max(A)-1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI));
+ }
+ break;
+ case ICmpInst::ICMP_SGE:
+ assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
+ if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ break;
+ case ICmpInst::ICMP_SLE:
+ assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
+ if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ break;
+ case ICmpInst::ICMP_UGE:
+ assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
+ if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ break;
+ case ICmpInst::ICMP_ULE:
+ assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
+ if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ break;
+ }
+
+ // Turn a signed comparison into an unsigned one if both operands
+ // are known to have the same sign.
+ if (I.isSignedPredicate() &&
+ ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) ||
+ (Op0KnownOne.isNegative() && Op1KnownOne.isNegative())))
+ return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
+ }
+
+ // Test if the ICmpInst instruction is used exclusively by a select as
+ // part of a minimum or maximum operation. If so, refrain from doing
+ // any other folding. This helps out other analyses which understand
+ // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
+ // and CodeGen. And in this case, at least one of the comparison
+ // operands has at least one user besides the compare (the select),
+ // which would often largely negate the benefit of folding anyway.
+ if (I.hasOneUse())
+ if (SelectInst *SI = dyn_cast<SelectInst>(*I.use_begin()))
+ if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
+ (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+ return 0;
+
+ // See if we are doing a comparison between a constant and an instruction that
+ // can be folded into the comparison.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ // Since the RHS is a ConstantInt (CI), if the left hand side is an
+ // instruction, see if that instruction also has constants so that the
+ // instruction can be folded into the icmp
+ if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+ if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI))
+ return Res;
+ }
+
+ // Handle icmp with constant (but not simple integer constant) RHS
+ if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
+ if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+ switch (LHSI->getOpcode()) {
+ case Instruction::GetElementPtr:
+ if (RHSC->isNullValue()) {
+ // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null
+ bool isAllZeros = true;
+ for (unsigned i = 1, e = LHSI->getNumOperands(); i != e; ++i)
+ if (!isa<Constant>(LHSI->getOperand(i)) ||
+ !cast<Constant>(LHSI->getOperand(i))->isNullValue()) {
+ isAllZeros = false;
+ break;
+ }
+ if (isAllZeros)
+ return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
+ Constant::getNullValue(LHSI->getOperand(0)->getType()));
+ }
+ break;
+
+ case Instruction::PHI:
+ // Only fold icmp into the PHI if the phi and fcmp are in the same
+ // block. If in the same block, we're encouraging jump threading. If
+ // not, we are just pessimizing the code by making an i1 phi.
+ if (LHSI->getParent() == I.getParent())
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ break;
+ case Instruction::Select: {
+ // If either operand of the select is a constant, we can fold the
+ // comparison into the select arms, which will cause one to be
+ // constant folded and the select turned into a bitwise or.
+ Value *Op1 = 0, *Op2 = 0;
+ if (LHSI->hasOneUse()) {
+ if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
+ // Fold the known value into the constant operand.
+ Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
+ // Insert a new ICmp of the other select operand.
+ Op2 = InsertNewInstBefore(new ICmpInst(I.getPredicate(),
+ LHSI->getOperand(2), RHSC,
+ I.getName()), I);
+ } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
+ // Fold the known value into the constant operand.
+ Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
+ // Insert a new ICmp of the other select operand.
+ Op1 = InsertNewInstBefore(new ICmpInst(I.getPredicate(),
+ LHSI->getOperand(1), RHSC,
+ I.getName()), I);
+ }
+ }
+
+ if (Op1)
+ return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
+ break;
+ }
+ case Instruction::Malloc:
+ // If we have (malloc != null), and if the malloc has a single use, we
+ // can assume it is successful and remove the malloc.
+ if (LHSI->hasOneUse() && isa<ConstantPointerNull>(RHSC)) {
+ AddToWorkList(LHSI);
+ return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty,
+ !I.isTrueWhenEqual()));
+ }
+ break;
+ }
+ }
+
+ // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now.
+ if (User *GEP = dyn_castGetElementPtr(Op0))
+ if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I))
+ return NI;
+ if (User *GEP = dyn_castGetElementPtr(Op1))
+ if (Instruction *NI = FoldGEPICmp(GEP, Op0,
+ ICmpInst::getSwappedPredicate(I.getPredicate()), I))
+ return NI;
+
+ // Test to see if the operands of the icmp are casted versions of other
+ // values. If the ptr->ptr cast can be stripped off both arguments, we do so
+ // now.
+ if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) {
+ if (isa<PointerType>(Op0->getType()) &&
+ (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
+ // We keep moving the cast from the left operand over to the right
+ // operand, where it can often be eliminated completely.
+ Op0 = CI->getOperand(0);
+
+ // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast
+ // so eliminate it as well.
+ if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1))
+ Op1 = CI2->getOperand(0);
+
+ // If Op1 is a constant, we can fold the cast into the constant.
+ if (Op0->getType() != Op1->getType()) {
+ if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+ Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
+ } else {
+ // Otherwise, cast the RHS right before the icmp
+ Op1 = InsertBitCastBefore(Op1, Op0->getType(), I);
+ }
+ }
+ return new ICmpInst(I.getPredicate(), Op0, Op1);
+ }
+ }
+
+ if (isa<CastInst>(Op0)) {
+ // Handle the special case of: icmp (cast bool to X), <cst>
+ // This comes up when you have code like
+ // int X = A < B;
+ // if (X) ...
+ // For generality, we handle any zero-extension of any operand comparison
+ // with a constant or another cast from the same type.
+ if (isa<ConstantInt>(Op1) || isa<CastInst>(Op1))
+ if (Instruction *R = visitICmpInstWithCastAndCast(I))
+ return R;
+ }
+
+ // See if it's the same type of instruction on the left and right.
+ if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+ if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
+ if (Op0I->getOpcode() == Op1I->getOpcode() && Op0I->hasOneUse() &&
+ Op1I->hasOneUse() && Op0I->getOperand(1) == Op1I->getOperand(1)) {
+ switch (Op0I->getOpcode()) {
+ default: break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Xor:
+ if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b
+ return new ICmpInst(I.getPredicate(), Op0I->getOperand(0),
+ Op1I->getOperand(0));
+ // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
+ if (CI->getValue().isSignBit()) {
+ ICmpInst::Predicate Pred = I.isSignedPredicate()
+ ? I.getUnsignedPredicate()
+ : I.getSignedPredicate();
+ return new ICmpInst(Pred, Op0I->getOperand(0),
+ Op1I->getOperand(0));
+ }
+
+ if (CI->getValue().isMaxSignedValue()) {
+ ICmpInst::Predicate Pred = I.isSignedPredicate()
+ ? I.getUnsignedPredicate()
+ : I.getSignedPredicate();
+ Pred = I.getSwappedPredicate(Pred);
+ return new ICmpInst(Pred, Op0I->getOperand(0),
+ Op1I->getOperand(0));
+ }
+ }
+ break;
+ case Instruction::Mul:
+ if (!I.isEquality())
+ break;
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
+ // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask
+ // Mask = -1 >> count-trailing-zeros(Cst).
+ if (!CI->isZero() && !CI->isOne()) {
+ const APInt &AP = CI->getValue();
+ ConstantInt *Mask = ConstantInt::get(
+ APInt::getLowBitsSet(AP.getBitWidth(),
+ AP.getBitWidth() -
+ AP.countTrailingZeros()));
+ Instruction *And1 = BinaryOperator::CreateAnd(Op0I->getOperand(0),
+ Mask);
+ Instruction *And2 = BinaryOperator::CreateAnd(Op1I->getOperand(0),
+ Mask);
+ InsertNewInstBefore(And1, I);
+ InsertNewInstBefore(And2, I);
+ return new ICmpInst(I.getPredicate(), And1, And2);
+ }
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ // ~x < ~y --> y < x
+ { Value *A, *B;
+ if (match(Op0, m_Not(m_Value(A))) &&
+ match(Op1, m_Not(m_Value(B))))
+ return new ICmpInst(I.getPredicate(), B, A);
+ }
+
+ if (I.isEquality()) {
+ Value *A, *B, *C, *D;
+
+ // -x == -y --> x == y
+ if (match(Op0, m_Neg(m_Value(A))) &&
+ match(Op1, m_Neg(m_Value(B))))
+ return new ICmpInst(I.getPredicate(), A, B);
+
+ if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
+ if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0
+ Value *OtherVal = A == Op1 ? B : A;
+ return new ICmpInst(I.getPredicate(), OtherVal,
+ Constant::getNullValue(A->getType()));
+ }
+
+ if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
+ // A^c1 == C^c2 --> A == C^(c1^c2)
+ ConstantInt *C1, *C2;
+ if (match(B, m_ConstantInt(C1)) &&
+ match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) {
+ Constant *NC = ConstantInt::get(C1->getValue() ^ C2->getValue());
+ Instruction *Xor = BinaryOperator::CreateXor(C, NC, "tmp");
+ return new ICmpInst(I.getPredicate(), A,
+ InsertNewInstBefore(Xor, I));
+ }
+
+ // A^B == A^D -> B == D
+ if (A == C) return new ICmpInst(I.getPredicate(), B, D);
+ if (A == D) return new ICmpInst(I.getPredicate(), B, C);
+ if (B == C) return new ICmpInst(I.getPredicate(), A, D);
+ if (B == D) return new ICmpInst(I.getPredicate(), A, C);
+ }
+ }
+
+ if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
+ (A == Op0 || B == Op0)) {
+ // A == (A^B) -> B == 0
+ Value *OtherVal = A == Op0 ? B : A;
+ return new ICmpInst(I.getPredicate(), OtherVal,
+ Constant::getNullValue(A->getType()));
+ }
+
+ // (A-B) == A -> B == 0
+ if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B))))
+ return new ICmpInst(I.getPredicate(), B,
+ Constant::getNullValue(B->getType()));
+
+ // A == (A-B) -> B == 0
+ if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B))))
+ return new ICmpInst(I.getPredicate(), B,
+ Constant::getNullValue(B->getType()));
+
+ // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
+ if (Op0->hasOneUse() && Op1->hasOneUse() &&
+ match(Op0, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1, m_And(m_Value(C), m_Value(D)))) {
+ Value *X = 0, *Y = 0, *Z = 0;
+
+ if (A == C) {
+ X = B; Y = D; Z = A;
+ } else if (A == D) {
+ X = B; Y = C; Z = A;
+ } else if (B == C) {
+ X = A; Y = D; Z = B;
+ } else if (B == D) {
+ X = A; Y = C; Z = B;
+ }
+
+ if (X) { // Build (X^Y) & Z
+ Op1 = InsertNewInstBefore(BinaryOperator::CreateXor(X, Y, "tmp"), I);
+ Op1 = InsertNewInstBefore(BinaryOperator::CreateAnd(Op1, Z, "tmp"), I);
+ I.setOperand(0, Op1);
+ I.setOperand(1, Constant::getNullValue(Op1->getType()));
+ return &I;
+ }
+ }
+ }
+ return Changed ? &I : 0;
+}
+
+
+/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS
+/// and CmpRHS are both known to be integer constants.
+Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
+ ConstantInt *DivRHS) {
+ ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1));
+ const APInt &CmpRHSV = CmpRHS->getValue();
+
+ // FIXME: If the operand types don't match the type of the divide
+ // then don't attempt this transform. The code below doesn't have the
+ // logic to deal with a signed divide and an unsigned compare (and
+ // vice versa). This is because (x /s C1) <s C2 produces different
+ // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even
+ // (x /u C1) <u C2. Simply casting the operands and result won't
+ // work. :( The if statement below tests that condition and bails
+ // if it finds it.
+ bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
+ if (!ICI.isEquality() && DivIsSigned != ICI.isSignedPredicate())
+ return 0;
+ if (DivRHS->isZero())
+ return 0; // The ProdOV computation fails on divide by zero.
+ if (DivIsSigned && DivRHS->isAllOnesValue())
+ return 0; // The overflow computation also screws up here
+ if (DivRHS->isOne())
+ return 0; // Not worth bothering, and eliminates some funny cases
+ // with INT_MIN.
+
+ // Compute Prod = CI * DivRHS. We are essentially solving an equation
+ // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and
+ // C2 (CI). By solving for X we can turn this into a range check
+ // instead of computing a divide.
+ ConstantInt *Prod = Multiply(CmpRHS, DivRHS);
+
+ // Determine if the product overflows by seeing if the product is
+ // not equal to the divide. Make sure we do the same kind of divide
+ // as in the LHS instruction that we're folding.
+ bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
+ ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
+
+ // Get the ICmp opcode
+ ICmpInst::Predicate Pred = ICI.getPredicate();
+
+ // Figure out the interval that is being checked. For example, a comparison
+ // like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
+ // Compute this interval based on the constants involved and the signedness of
+ // the compare/divide. This computes a half-open interval, keeping track of
+ // whether either value in the interval overflows. After analysis each
+ // overflow variable is set to 0 if it's corresponding bound variable is valid
+ // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
+ int LoOverflow = 0, HiOverflow = 0;
+ ConstantInt *LoBound = 0, *HiBound = 0;
+
+ if (!DivIsSigned) { // udiv
+ // e.g. X/5 op 3 --> [15, 20)
+ LoBound = Prod;
+ HiOverflow = LoOverflow = ProdOV;
+ if (!HiOverflow)
+ HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, false);
+ } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
+ if (CmpRHSV == 0) { // (X / pos) op 0
+ // Can't overflow. e.g. X/2 op 0 --> [-1, 2)
+ LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS)));
+ HiBound = DivRHS;
+ } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos
+ LoBound = Prod; // e.g. X/5 op 3 --> [15, 20)
+ HiOverflow = LoOverflow = ProdOV;
+ if (!HiOverflow)
+ HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, true);
+ } else { // (X / pos) op neg
+ // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14)
+ HiBound = AddOne(Prod);
+ LoOverflow = HiOverflow = ProdOV ? -1 : 0;
+ if (!LoOverflow) {
+ ConstantInt* DivNeg = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
+ LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg,
+ true) ? -1 : 0;
+ }
+ }
+ } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0.
+ if (CmpRHSV == 0) { // (X / neg) op 0
+ // e.g. X/-5 op 0 --> [-4, 5)
+ LoBound = AddOne(DivRHS);
+ HiBound = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
+ if (HiBound == DivRHS) { // -INTMIN = INTMIN
+ HiOverflow = 1; // [INTMIN+1, overflow)
+ HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN
+ }
+ } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos
+ // e.g. X/-5 op 3 --> [-19, -14)
+ HiBound = AddOne(Prod);
+ HiOverflow = LoOverflow = ProdOV ? -1 : 0;
+ if (!LoOverflow)
+ LoOverflow = AddWithOverflow(LoBound, HiBound, DivRHS, true) ? -1 : 0;
+ } else { // (X / neg) op neg
+ LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20)
+ LoOverflow = HiOverflow = ProdOV;
+ if (!HiOverflow)
+ HiOverflow = SubWithOverflow(HiBound, Prod, DivRHS, true);
+ }
+
+ // Dividing by a negative swaps the condition. LT <-> GT
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
+ Value *X = DivI->getOperand(0);
+ switch (Pred) {
+ default: assert(0 && "Unhandled icmp opcode!");
+ case ICmpInst::ICMP_EQ:
+ if (LoOverflow && HiOverflow)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse());
+ else if (HiOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+ ICmpInst::ICMP_UGE, X, LoBound);
+ else if (LoOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+ ICmpInst::ICMP_ULT, X, HiBound);
+ else
+ return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI);
+ case ICmpInst::ICMP_NE:
+ if (LoOverflow && HiOverflow)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue());
+ else if (HiOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+ ICmpInst::ICMP_ULT, X, LoBound);
+ else if (LoOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+ ICmpInst::ICMP_UGE, X, HiBound);
+ else
+ return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI);
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT:
+ if (LoOverflow == +1) // Low bound is greater than input range.
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue());
+ if (LoOverflow == -1) // Low bound is less than input range.
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse());
+ return new ICmpInst(Pred, X, LoBound);
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT:
+ if (HiOverflow == +1) // High bound greater than input range.
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse());
+ else if (HiOverflow == -1) // High bound less than input range.
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue());
+ if (Pred == ICmpInst::ICMP_UGT)
+ return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
+ else
+ return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
+ }
+}
+
+
+/// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)".
+///
+Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
+ Instruction *LHSI,
+ ConstantInt *RHS) {
+ const APInt &RHSV = RHS->getValue();
+
+ switch (LHSI->getOpcode()) {
+ case Instruction::Trunc:
+ if (ICI.isEquality() && LHSI->hasOneUse()) {
+ // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
+ // of the high bits truncated out of x are known.
+ unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(),
+ SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
+ APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits));
+ APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0);
+ ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne);
+
+ // If all the high bits are known, we can do this xform.
+ if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) {
+ // Pull in the high bits from known-ones set.
+ APInt NewRHS(RHS->getValue());
+ NewRHS.zext(SrcBits);
+ NewRHS |= KnownOne;
+ return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+ ConstantInt::get(NewRHS));
+ }
+ }
+ break;
+
+ case Instruction::Xor: // (icmp pred (xor X, XorCST), CI)
+ if (ConstantInt *XorCST = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
+ // If this is a comparison that tests the signbit (X < 0) or (x > -1),
+ // fold the xor.
+ if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) ||
+ (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) {
+ Value *CompareVal = LHSI->getOperand(0);
+
+ // If the sign bit of the XorCST is not set, there is no change to
+ // the operation, just stop using the Xor.
+ if (!XorCST->getValue().isNegative()) {
+ ICI.setOperand(0, CompareVal);
+ AddToWorkList(LHSI);
+ return &ICI;
+ }
+
+ // Was the old condition true if the operand is positive?
+ bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT;
+
+ // If so, the new one isn't.
+ isTrueIfPositive ^= true;
+
+ if (isTrueIfPositive)
+ return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal, SubOne(RHS));
+ else
+ return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal, AddOne(RHS));
+ }
+
+ if (LHSI->hasOneUse()) {
+ // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit))
+ if (!ICI.isEquality() && XorCST->getValue().isSignBit()) {
+ const APInt &SignBit = XorCST->getValue();
+ ICmpInst::Predicate Pred = ICI.isSignedPredicate()
+ ? ICI.getUnsignedPredicate()
+ : ICI.getSignedPredicate();
+ return new ICmpInst(Pred, LHSI->getOperand(0),
+ ConstantInt::get(RHSV ^ SignBit));
+ }
+
+ // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A)
+ if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) {
+ const APInt &NotSignBit = XorCST->getValue();
+ ICmpInst::Predicate Pred = ICI.isSignedPredicate()
+ ? ICI.getUnsignedPredicate()
+ : ICI.getSignedPredicate();
+ Pred = ICI.getSwappedPredicate(Pred);
+ return new ICmpInst(Pred, LHSI->getOperand(0),
+ ConstantInt::get(RHSV ^ NotSignBit));
+ }
+ }
+ }
+ break;
+ case Instruction::And: // (icmp pred (and X, AndCST), RHS)
+ if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) &&
+ LHSI->getOperand(0)->hasOneUse()) {
+ ConstantInt *AndCST = cast<ConstantInt>(LHSI->getOperand(1));
+
+ // If the LHS is an AND of a truncating cast, we can widen the
+ // and/compare to be the input width without changing the value
+ // produced, eliminating a cast.
+ if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) {
+ // We can do this transformation if either the AND constant does not
+ // have its sign bit set or if it is an equality comparison.
+ // Extending a relational comparison when we're checking the sign
+ // bit would not work.
+ if (Cast->hasOneUse() &&
+ (ICI.isEquality() ||
+ (AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) {
+ uint32_t BitWidth =
+ cast<IntegerType>(Cast->getOperand(0)->getType())->getBitWidth();
+ APInt NewCST = AndCST->getValue();
+ NewCST.zext(BitWidth);
+ APInt NewCI = RHSV;
+ NewCI.zext(BitWidth);
+ Instruction *NewAnd =
+ BinaryOperator::CreateAnd(Cast->getOperand(0),
+ ConstantInt::get(NewCST),LHSI->getName());
+ InsertNewInstBefore(NewAnd, ICI);
+ return new ICmpInst(ICI.getPredicate(), NewAnd,
+ ConstantInt::get(NewCI));
+ }
+ }
+
+ // If this is: (X >> C1) & C2 != C3 (where any shift and any compare
+ // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This
+ // happens a LOT in code produced by the C front-end, for bitfield
+ // access.
+ BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0));
+ if (Shift && !Shift->isShift())
+ Shift = 0;
+
+ ConstantInt *ShAmt;
+ ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0;
+ const Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift.
+ const Type *AndTy = AndCST->getType(); // Type of the and.
+
+ // We can fold this as long as we can't shift unknown bits
+ // into the mask. This can only happen with signed shift
+ // rights, as they sign-extend.
+ if (ShAmt) {
+ bool CanFold = Shift->isLogicalShift();
+ if (!CanFold) {
+ // To test for the bad case of the signed shr, see if any
+ // of the bits shifted in could be tested after the mask.
+ uint32_t TyBits = Ty->getPrimitiveSizeInBits();
+ int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits);
+
+ uint32_t BitWidth = AndTy->getPrimitiveSizeInBits();
+ if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) &
+ AndCST->getValue()) == 0)
+ CanFold = true;
+ }
+
+ if (CanFold) {
+ Constant *NewCst;
+ if (Shift->getOpcode() == Instruction::Shl)
+ NewCst = ConstantExpr::getLShr(RHS, ShAmt);
+ else
+ NewCst = ConstantExpr::getShl(RHS, ShAmt);
+
+ // Check to see if we are shifting out any of the bits being
+ // compared.
+ if (ConstantExpr::get(Shift->getOpcode(), NewCst, ShAmt) != RHS) {
+ // If we shifted bits out, the fold is not going to work out.
+ // As a special case, check to see if this means that the
+ // result is always true or false now.
+ if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse());
+ if (ICI.getPredicate() == ICmpInst::ICMP_NE)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue());
+ } else {
+ ICI.setOperand(1, NewCst);
+ Constant *NewAndCST;
+ if (Shift->getOpcode() == Instruction::Shl)
+ NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt);
+ else
+ NewAndCST = ConstantExpr::getShl(AndCST, ShAmt);
+ LHSI->setOperand(1, NewAndCST);
+ LHSI->setOperand(0, Shift->getOperand(0));
+ AddToWorkList(Shift); // Shift is dead.
+ AddUsesToWorkList(ICI);
+ return &ICI;
+ }
+ }
+ }
+
+ // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is
+ // preferable because it allows the C<<Y expression to be hoisted out
+ // of a loop if Y is invariant and X is not.
+ if (Shift && Shift->hasOneUse() && RHSV == 0 &&
+ ICI.isEquality() && !Shift->isArithmeticShift() &&
+ !isa<Constant>(Shift->getOperand(0))) {
+ // Compute C << Y.
+ Value *NS;
+ if (Shift->getOpcode() == Instruction::LShr) {
+ NS = BinaryOperator::CreateShl(AndCST,
+ Shift->getOperand(1), "tmp");
+ } else {
+ // Insert a logical shift.
+ NS = BinaryOperator::CreateLShr(AndCST,
+ Shift->getOperand(1), "tmp");
+ }
+ InsertNewInstBefore(cast<Instruction>(NS), ICI);
+
+ // Compute X & (C << Y).
+ Instruction *NewAnd =
+ BinaryOperator::CreateAnd(Shift->getOperand(0), NS, LHSI->getName());
+ InsertNewInstBefore(NewAnd, ICI);
+
+ ICI.setOperand(0, NewAnd);
+ return &ICI;
+ }
+ }
+ break;
+
+ case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI)
+ ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+ if (!ShAmt) break;
+
+ uint32_t TypeBits = RHSV.getBitWidth();
+
+ // Check that the shift amount is in range. If not, don't perform
+ // undefined shifts. When the shift is visited it will be
+ // simplified.
+ if (ShAmt->uge(TypeBits))
+ break;
+
+ if (ICI.isEquality()) {
+ // If we are comparing against bits always shifted out, the
+ // comparison cannot succeed.
+ Constant *Comp =
+ ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt), ShAmt);
+ if (Comp != RHS) {// Comparing against a bit that we know is zero.
+ bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+ Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE);
+ return ReplaceInstUsesWith(ICI, Cst);
+ }
+
+ if (LHSI->hasOneUse()) {
+ // Otherwise strength reduce the shift into an and.
+ uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+ Constant *Mask =
+ ConstantInt::get(APInt::getLowBitsSet(TypeBits, TypeBits-ShAmtVal));
+
+ Instruction *AndI =
+ BinaryOperator::CreateAnd(LHSI->getOperand(0),
+ Mask, LHSI->getName()+".mask");
+ Value *And = InsertNewInstBefore(AndI, ICI);
+ return new ICmpInst(ICI.getPredicate(), And,
+ ConstantInt::get(RHSV.lshr(ShAmtVal)));
+ }
+ }
+
+ // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
+ bool TrueIfSigned = false;
+ if (LHSI->hasOneUse() &&
+ isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) {
+ // (X << 31) <s 0 --> (X&1) != 0
+ Constant *Mask = ConstantInt::get(APInt(TypeBits, 1) <<
+ (TypeBits-ShAmt->getZExtValue()-1));
+ Instruction *AndI =
+ BinaryOperator::CreateAnd(LHSI->getOperand(0),
+ Mask, LHSI->getName()+".mask");
+ Value *And = InsertNewInstBefore(AndI, ICI);
+
+ return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
+ And, Constant::getNullValue(And->getType()));
+ }
+ break;
+ }
+
+ case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI)
+ case Instruction::AShr: {
+ // Only handle equality comparisons of shift-by-constant.
+ ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+ if (!ShAmt || !ICI.isEquality()) break;
+
+ // Check that the shift amount is in range. If not, don't perform
+ // undefined shifts. When the shift is visited it will be
+ // simplified.
+ uint32_t TypeBits = RHSV.getBitWidth();
+ if (ShAmt->uge(TypeBits))
+ break;
+
+ uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+
+ // If we are comparing against bits always shifted out, the
+ // comparison cannot succeed.
+ APInt Comp = RHSV << ShAmtVal;
+ if (LHSI->getOpcode() == Instruction::LShr)
+ Comp = Comp.lshr(ShAmtVal);
+ else
+ Comp = Comp.ashr(ShAmtVal);
+
+ if (Comp != RHSV) { // Comparing against a bit that we know is zero.
+ bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+ Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE);
+ return ReplaceInstUsesWith(ICI, Cst);
+ }
+
+ // Otherwise, check to see if the bits shifted out are known to be zero.
+ // If so, we can compare against the unshifted value:
+ // (X & 4) >> 1 == 2 --> (X & 4) == 4.
+ if (LHSI->hasOneUse() &&
+ MaskedValueIsZero(LHSI->getOperand(0),
+ APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) {
+ return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+ ConstantExpr::getShl(RHS, ShAmt));
+ }
+
+ if (LHSI->hasOneUse()) {
+ // Otherwise strength reduce the shift into an and.
+ APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
+ Constant *Mask = ConstantInt::get(Val);
+
+ Instruction *AndI =
+ BinaryOperator::CreateAnd(LHSI->getOperand(0),
+ Mask, LHSI->getName()+".mask");
+ Value *And = InsertNewInstBefore(AndI, ICI);
+ return new ICmpInst(ICI.getPredicate(), And,
+ ConstantExpr::getShl(RHS, ShAmt));
+ }
+ break;
+ }
+
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ // Fold: icmp pred ([us]div X, C1), C2 -> range test
+ // Fold this div into the comparison, producing a range check.
+ // Determine, based on the divide type, what the range is being
+ // checked. If there is an overflow on the low or high side, remember
+ // it, otherwise compute the range [low, hi) bounding the new value.
+ // See: InsertRangeTest above for the kinds of replacements possible.
+ if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1)))
+ if (Instruction *R = FoldICmpDivCst(ICI, cast<BinaryOperator>(LHSI),
+ DivRHS))
+ return R;
+ break;
+
+ case Instruction::Add:
+ // Fold: icmp pred (add, X, C1), C2
+
+ if (!ICI.isEquality()) {
+ ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+ if (!LHSC) break;
+ const APInt &LHSV = LHSC->getValue();
+
+ ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV)
+ .subtract(LHSV);
+
+ if (ICI.isSignedPredicate()) {
+ if (CR.getLower().isSignBit()) {
+ return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0),
+ ConstantInt::get(CR.getUpper()));
+ } else if (CR.getUpper().isSignBit()) {
+ return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0),
+ ConstantInt::get(CR.getLower()));
+ }
+ } else {
+ if (CR.getLower().isMinValue()) {
+ return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0),
+ ConstantInt::get(CR.getUpper()));
+ } else if (CR.getUpper().isMinValue()) {
+ return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0),
+ ConstantInt::get(CR.getLower()));
+ }
+ }
+ }
+ break;
+ }
+
+ // Simplify icmp_eq and icmp_ne instructions with integer constant RHS.
+ if (ICI.isEquality()) {
+ bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+
+ // If the first operand is (add|sub|and|or|xor|rem) with a constant, and
+ // the second operand is a constant, simplify a bit.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) {
+ switch (BO->getOpcode()) {
+ case Instruction::SRem:
+ // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
+ if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){
+ const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue();
+ if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) {
+ Instruction *NewRem =
+ BinaryOperator::CreateURem(BO->getOperand(0), BO->getOperand(1),
+ BO->getName());
+ InsertNewInstBefore(NewRem, ICI);
+ return new ICmpInst(ICI.getPredicate(), NewRem,
+ Constant::getNullValue(BO->getType()));
+ }
+ }
+ break;
+ case Instruction::Add:
+ // Replace ((add A, B) != C) with (A != C-B) if B & C are constants.
+ if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ if (BO->hasOneUse())
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+ Subtract(RHS, BOp1C));
+ } else if (RHSV == 0) {
+ // Replace ((add A, B) != 0) with (A != -B) if A or B is
+ // efficiently invertible, or if the add has just this one use.
+ Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
+
+ if (Value *NegVal = dyn_castNegVal(BOp1))
+ return new ICmpInst(ICI.getPredicate(), BOp0, NegVal);
+ else if (Value *NegVal = dyn_castNegVal(BOp0))
+ return new ICmpInst(ICI.getPredicate(), NegVal, BOp1);
+ else if (BO->hasOneUse()) {
+ Instruction *Neg = BinaryOperator::CreateNeg(BOp1);
+ InsertNewInstBefore(Neg, ICI);
+ Neg->takeName(BO);
+ return new ICmpInst(ICI.getPredicate(), BOp0, Neg);
+ }
+ }
+ break;
+ case Instruction::Xor:
+ // For the xor case, we can xor two constants together, eliminating
+ // the explicit xor.
+ if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1)))
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+ ConstantExpr::getXor(RHS, BOC));
+
+ // FALLTHROUGH
+ case Instruction::Sub:
+ // Replace (([sub|xor] A, B) != 0) with (A != B)
+ if (RHSV == 0)
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+ BO->getOperand(1));
+ break;
+
+ case Instruction::Or:
+ // If bits are being or'd in that are not present in the constant we
+ // are comparing against, then the comparison could never succeed!
+ if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) {
+ Constant *NotCI = ConstantExpr::getNot(RHS);
+ if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue())
+ return ReplaceInstUsesWith(ICI, ConstantInt::get(Type::Int1Ty,
+ isICMP_NE));
+ }
+ break;
+
+ case Instruction::And:
+ if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ // If bits are being compared against that are and'd out, then the
+ // comparison can never succeed!
+ if ((RHSV & ~BOC->getValue()) != 0)
+ return ReplaceInstUsesWith(ICI, ConstantInt::get(Type::Int1Ty,
+ isICMP_NE));
+
+ // If we have ((X & C) == C), turn it into ((X & C) != 0).
+ if (RHS == BOC && RHSV.isPowerOf2())
+ return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ :
+ ICmpInst::ICMP_NE, LHSI,
+ Constant::getNullValue(RHS->getType()));
+
+ // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
+ if (BOC->getValue().isSignBit()) {
+ Value *X = BO->getOperand(0);
+ Constant *Zero = Constant::getNullValue(X->getType());
+ ICmpInst::Predicate pred = isICMP_NE ?
+ ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
+ return new ICmpInst(pred, X, Zero);
+ }
+
+ // ((X & ~7) == 0) --> X < 8
+ if (RHSV == 0 && isHighOnes(BOC)) {
+ Value *X = BO->getOperand(0);
+ Constant *NegX = ConstantExpr::getNeg(BOC);
+ ICmpInst::Predicate pred = isICMP_NE ?
+ ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
+ return new ICmpInst(pred, X, NegX);
+ }
+ }
+ default: break;
+ }
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) {
+ // Handle icmp {eq|ne} <intrinsic>, intcst.
+ if (II->getIntrinsicID() == Intrinsic::bswap) {
+ AddToWorkList(II);
+ ICI.setOperand(0, II->getOperand(1));
+ ICI.setOperand(1, ConstantInt::get(RHSV.byteSwap()));
+ return &ICI;
+ }
+ }
+ }
+ return 0;
+}
+
+/// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst).
+/// We only handle extending casts so far.
+///
+Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
+ const CastInst *LHSCI = cast<CastInst>(ICI.getOperand(0));
+ Value *LHSCIOp = LHSCI->getOperand(0);
+ const Type *SrcTy = LHSCIOp->getType();
+ const Type *DestTy = LHSCI->getType();
+ Value *RHSCIOp;
+
+ // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
+ // integer type is the same size as the pointer type.
+ if (LHSCI->getOpcode() == Instruction::PtrToInt &&
+ getTargetData().getPointerSizeInBits() ==
+ cast<IntegerType>(DestTy)->getBitWidth()) {
+ Value *RHSOp = 0;
+ if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
+ RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
+ } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) {
+ RHSOp = RHSC->getOperand(0);
+ // If the pointer types don't match, insert a bitcast.
+ if (LHSCIOp->getType() != RHSOp->getType())
+ RHSOp = InsertBitCastBefore(RHSOp, LHSCIOp->getType(), ICI);
+ }
+
+ if (RHSOp)
+ return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp);
+ }
+
+ // The code below only handles extension cast instructions, so far.
+ // Enforce this.
+ if (LHSCI->getOpcode() != Instruction::ZExt &&
+ LHSCI->getOpcode() != Instruction::SExt)
+ return 0;
+
+ bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt;
+ bool isSignedCmp = ICI.isSignedPredicate();
+
+ if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) {
+ // Not an extension from the same type?
+ RHSCIOp = CI->getOperand(0);
+ if (RHSCIOp->getType() != LHSCIOp->getType())
+ return 0;
+
+ // If the signedness of the two casts doesn't agree (i.e. one is a sext
+ // and the other is a zext), then we can't handle this.
+ if (CI->getOpcode() != LHSCI->getOpcode())
+ return 0;
+
+ // Deal with equality cases early.
+ if (ICI.isEquality())
+ return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
+
+ // A signed comparison of sign extended values simplifies into a
+ // signed comparison.
+ if (isSignedCmp && isSignedExt)
+ return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
+
+ // The other three cases all fold into an unsigned comparison.
+ return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, RHSCIOp);
+ }
+
+ // If we aren't dealing with a constant on the RHS, exit early
+ ConstantInt *CI = dyn_cast<ConstantInt>(ICI.getOperand(1));
+ if (!CI)
+ return 0;
+
+ // Compute the constant that would happen if we truncated to SrcTy then
+ // reextended to DestTy.
+ Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy);
+ Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), Res1, DestTy);
+
+ // If the re-extended constant didn't change...
+ if (Res2 == CI) {
+ // Make sure that sign of the Cmp and the sign of the Cast are the same.
+ // For example, we might have:
+ // %A = sext short %X to uint
+ // %B = icmp ugt uint %A, 1330
+ // It is incorrect to transform this into
+ // %B = icmp ugt short %X, 1330
+ // because %A may have negative value.
+ //
+ // However, we allow this when the compare is EQ/NE, because they are
+ // signless.
+ if (isSignedExt == isSignedCmp || ICI.isEquality())
+ return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1);
+ return 0;
+ }
+
+ // The re-extended constant changed so the constant cannot be represented
+ // in the shorter type. Consequently, we cannot emit a simple comparison.
+
+ // First, handle some easy cases. We know the result cannot be equal at this
+ // point so handle the ICI.isEquality() cases
+ if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse());
+ if (ICI.getPredicate() == ICmpInst::ICMP_NE)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue());
+
+ // Evaluate the comparison for LT (we invert for GT below). LE and GE cases
+ // should have been folded away previously and not enter in here.
+ Value *Result;
+ if (isSignedCmp) {
+ // We're performing a signed comparison.
+ if (cast<ConstantInt>(CI)->getValue().isNegative())
+ Result = ConstantInt::getFalse(); // X < (small) --> false
+ else
+ Result = ConstantInt::getTrue(); // X < (large) --> true
+ } else {
+ // We're performing an unsigned comparison.
+ if (isSignedExt) {
+ // We're performing an unsigned comp with a sign extended value.
+ // This is true if the input is >= 0. [aka >s -1]
+ Constant *NegOne = ConstantInt::getAllOnesValue(SrcTy);
+ Result = InsertNewInstBefore(new ICmpInst(ICmpInst::ICMP_SGT, LHSCIOp,
+ NegOne, ICI.getName()), ICI);
+ } else {
+ // Unsigned extend & unsigned compare -> always true.
+ Result = ConstantInt::getTrue();
+ }
+ }
+
+ // Finally, return the value computed.
+ if (ICI.getPredicate() == ICmpInst::ICMP_ULT ||
+ ICI.getPredicate() == ICmpInst::ICMP_SLT)
+ return ReplaceInstUsesWith(ICI, Result);
+
+ assert((ICI.getPredicate()==ICmpInst::ICMP_UGT ||
+ ICI.getPredicate()==ICmpInst::ICMP_SGT) &&
+ "ICmp should be folded!");
+ if (Constant *CI = dyn_cast<Constant>(Result))
+ return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI));
+ return BinaryOperator::CreateNot(Result);
+}
+
+Instruction *InstCombiner::visitShl(BinaryOperator &I) {
+ return commonShiftTransforms(I);
+}
+
+Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
+ return commonShiftTransforms(I);
+}
+
+Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
+ if (Instruction *R = commonShiftTransforms(I))
+ return R;
+
+ Value *Op0 = I.getOperand(0);
+
+ // ashr int -1, X = -1 (for any arithmetic shift rights of ~0)
+ if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
+ if (CSI->isAllOnesValue())
+ return ReplaceInstUsesWith(I, CSI);
+
+ // See if we can turn a signed shr into an unsigned shr.
+ if (!isa<VectorType>(I.getType())) {
+ if (MaskedValueIsZero(Op0,
+ APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())))
+ return BinaryOperator::CreateLShr(Op0, I.getOperand(1));
+
+ // Arithmetic shifting an all-sign-bit value is a no-op.
+ unsigned NumSignBits = ComputeNumSignBits(Op0);
+ if (NumSignBits == Op0->getType()->getPrimitiveSizeInBits())
+ return ReplaceInstUsesWith(I, Op0);
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
+ assert(I.getOperand(1)->getType() == I.getOperand(0)->getType());
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // shl X, 0 == X and shr X, 0 == X
+ // shl 0, X == 0 and shr 0, X == 0
+ if (Op1 == Constant::getNullValue(Op1->getType()) ||
+ Op0 == Constant::getNullValue(Op0->getType()))
+ return ReplaceInstUsesWith(I, Op0);
+
+ if (isa<UndefValue>(Op0)) {
+ if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef
+ return ReplaceInstUsesWith(I, Op0);
+ else // undef << X -> 0, undef >>u X -> 0
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ }
+ if (isa<UndefValue>(Op1)) {
+ if (I.getOpcode() == Instruction::AShr) // X >>s undef -> X
+ return ReplaceInstUsesWith(I, Op0);
+ else // X << undef, X >>u undef -> 0
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ }
+
+ // See if we can fold away this shift.
+ if (!isa<VectorType>(I.getType()) && SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ // Try to fold constant and into select arguments.
+ if (isa<Constant>(Op0))
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+ if (Instruction *R = FoldOpIntoSelect(I, SI, this))
+ return R;
+
+ if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1))
+ if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
+ return Res;
+ return 0;
+}
+
+Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
+ BinaryOperator &I) {
+ bool isLeftShift = I.getOpcode() == Instruction::Shl;
+
+ // See if we can simplify any instructions used by the instruction whose sole
+ // purpose is to compute bits we don't care about.
+ uint32_t TypeBits = Op0->getType()->getPrimitiveSizeInBits();
+
+ // shl uint X, 32 = 0 and shr ubyte Y, 9 = 0, ... just don't eliminate shr
+ // of a signed value.
+ //
+ if (Op1->uge(TypeBits)) {
+ if (I.getOpcode() != Instruction::AShr)
+ return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));
+ else {
+ I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));
+ return &I;
+ }
+ }
+
+ // ((X*C1) << C2) == (X * (C1 << C2))
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
+ if (BO->getOpcode() == Instruction::Mul && isLeftShift)
+ if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
+ return BinaryOperator::CreateMul(BO->getOperand(0),
+ ConstantExpr::getShl(BOOp, Op1));
+
+ // Try to fold constant and into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI, this))
+ return R;
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+
+ // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2))
+ if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) {
+ Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0));
+ // If 'shift2' is an ashr, we would have to get the sign bit into a funny
+ // place. Don't try to do this transformation in this case. Also, we
+ // require that the input operand is a shift-by-constant so that we have
+ // confidence that the shifts will get folded together. We could do this
+ // xform in more cases, but it is unlikely to be profitable.
+ if (TrOp && I.isLogicalShift() && TrOp->isShift() &&
+ isa<ConstantInt>(TrOp->getOperand(1))) {
+ // Okay, we'll do this xform. Make the shift of shift.
+ Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
+ Instruction *NSh = BinaryOperator::Create(I.getOpcode(), TrOp, ShAmt,
+ I.getName());
+ InsertNewInstBefore(NSh, I); // (shift2 (shift1 & 0x00FF), c2)
+
+ // For logical shifts, the truncation has the effect of making the high
+ // part of the register be zeros. Emulate this by inserting an AND to
+ // clear the top bits as needed. This 'and' will usually be zapped by
+ // other xforms later if dead.
+ unsigned SrcSize = TrOp->getType()->getPrimitiveSizeInBits();
+ unsigned DstSize = TI->getType()->getPrimitiveSizeInBits();
+ APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));
+
+ // The mask we constructed says what the trunc would do if occurring
+ // between the shifts. We want to know the effect *after* the second
+ // shift. We know that it is a logical shift by a constant, so adjust the
+ // mask as appropriate.
+ if (I.getOpcode() == Instruction::Shl)
+ MaskV <<= Op1->getZExtValue();
+ else {
+ assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift");
+ MaskV = MaskV.lshr(Op1->getZExtValue());
+ }
+
+ Instruction *And = BinaryOperator::CreateAnd(NSh, ConstantInt::get(MaskV),
+ TI->getName());
+ InsertNewInstBefore(And, I); // shift1 & 0x00FF
+
+ // Return the value truncated to the interesting size.
+ return new TruncInst(And, I.getType());
+ }
+ }
+
+ if (Op0->hasOneUse()) {
+ if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
+ // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C)
+ Value *V1, *V2;
+ ConstantInt *CC;
+ switch (Op0BO->getOpcode()) {
+ default: break;
+ case Instruction::Add:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ // These operators commute.
+ // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C)
+ if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() &&
+ match(Op0BO->getOperand(1), m_Shr(m_Value(V1), m_Specific(Op1)))){
+ Instruction *YS = BinaryOperator::CreateShl(
+ Op0BO->getOperand(0), Op1,
+ Op0BO->getName());
+ InsertNewInstBefore(YS, I); // (Y << C)
+ Instruction *X =
+ BinaryOperator::Create(Op0BO->getOpcode(), YS, V1,
+ Op0BO->getOperand(1)->getName());
+ InsertNewInstBefore(X, I); // (X + (Y << C))
+ uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
+ return BinaryOperator::CreateAnd(X, ConstantInt::get(
+ APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
+ }
+
+ // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C))
+ Value *Op0BOOp1 = Op0BO->getOperand(1);
+ if (isLeftShift && Op0BOOp1->hasOneUse() &&
+ match(Op0BOOp1,
+ m_And(m_Shr(m_Value(V1), m_Specific(Op1)),
+ m_ConstantInt(CC))) &&
+ cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) {
+ Instruction *YS = BinaryOperator::CreateShl(
+ Op0BO->getOperand(0), Op1,
+ Op0BO->getName());
+ InsertNewInstBefore(YS, I); // (Y << C)
+ Instruction *XM =
+ BinaryOperator::CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+ V1->getName()+".mask");
+ InsertNewInstBefore(XM, I); // X & (CC << C)
+
+ return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
+ }
+ }
+
+ // FALL THROUGH.
+ case Instruction::Sub: {
+ // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C)
+ if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
+ match(Op0BO->getOperand(0), m_Shr(m_Value(V1), m_Specific(Op1)))){
+ Instruction *YS = BinaryOperator::CreateShl(
+ Op0BO->getOperand(1), Op1,
+ Op0BO->getName());
+ InsertNewInstBefore(YS, I); // (Y << C)
+ Instruction *X =
+ BinaryOperator::Create(Op0BO->getOpcode(), V1, YS,
+ Op0BO->getOperand(0)->getName());
+ InsertNewInstBefore(X, I); // (X + (Y << C))
+ uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
+ return BinaryOperator::CreateAnd(X, ConstantInt::get(
+ APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
+ }
+
+ // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C)
+ if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
+ match(Op0BO->getOperand(0),
+ m_And(m_Shr(m_Value(V1), m_Value(V2)),
+ m_ConstantInt(CC))) && V2 == Op1 &&
+ cast<BinaryOperator>(Op0BO->getOperand(0))
+ ->getOperand(0)->hasOneUse()) {
+ Instruction *YS = BinaryOperator::CreateShl(
+ Op0BO->getOperand(1), Op1,
+ Op0BO->getName());
+ InsertNewInstBefore(YS, I); // (Y << C)
+ Instruction *XM =
+ BinaryOperator::CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+ V1->getName()+".mask");
+ InsertNewInstBefore(XM, I); // X & (CC << C)
+
+ return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
+ }
+
+ break;
+ }
+ }
+
+
+ // If the operand is an bitwise operator with a constant RHS, and the
+ // shift is the only use, we can pull it out of the shift.
+ if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) {
+ bool isValid = true; // Valid only for And, Or, Xor
+ bool highBitSet = false; // Transform if high bit of constant set?
+
+ switch (Op0BO->getOpcode()) {
+ default: isValid = false; break; // Do not perform transform!
+ case Instruction::Add:
+ isValid = isLeftShift;
+ break;
+ case Instruction::Or:
+ case Instruction::Xor:
+ highBitSet = false;
+ break;
+ case Instruction::And:
+ highBitSet = true;
+ break;
+ }
+
+ // If this is a signed shift right, and the high bit is modified
+ // by the logical operation, do not perform the transformation.
+ // The highBitSet boolean indicates the value of the high bit of
+ // the constant which would cause it to be modified for this
+ // operation.
+ //
+ if (isValid && I.getOpcode() == Instruction::AShr)
+ isValid = Op0C->getValue()[TypeBits-1] == highBitSet;
+
+ if (isValid) {
+ Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
+
+ Instruction *NewShift =
+ BinaryOperator::Create(I.getOpcode(), Op0BO->getOperand(0), Op1);
+ InsertNewInstBefore(NewShift, I);
+ NewShift->takeName(Op0BO);
+
+ return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
+ NewRHS);
+ }
+ }
+ }
+ }
+
+ // Find out if this is a shift of a shift by a constant.
+ BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0);
+ if (ShiftOp && !ShiftOp->isShift())
+ ShiftOp = 0;
+
+ if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
+ ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1));
+ uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
+ uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits);
+ assert(ShiftAmt2 != 0 && "Should have been simplified earlier");
+ if (ShiftAmt1 == 0) return 0; // Will be simplified in the future.
+ Value *X = ShiftOp->getOperand(0);
+
+ uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift.
+
+ const IntegerType *Ty = cast<IntegerType>(I.getType());
+
+ // Check for (X << c1) << c2 and (X >> c1) >> c2
+ if (I.getOpcode() == ShiftOp->getOpcode()) {
+ // If this is oversized composite shift, then unsigned shifts get 0, ashr
+ // saturates.
+ if (AmtSum >= TypeBits) {
+ if (I.getOpcode() != Instruction::AShr)
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr.
+ }
+
+ return BinaryOperator::Create(I.getOpcode(), X,
+ ConstantInt::get(Ty, AmtSum));
+ } else if (ShiftOp->getOpcode() == Instruction::LShr &&
+ I.getOpcode() == Instruction::AShr) {
+ if (AmtSum >= TypeBits)
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+ // ((X >>u C1) >>s C2) -> (X >>u (C1+C2)) since C1 != 0.
+ return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
+ } else if (ShiftOp->getOpcode() == Instruction::AShr &&
+ I.getOpcode() == Instruction::LShr) {
+ // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0.
+ if (AmtSum >= TypeBits)
+ AmtSum = TypeBits-1;
+
+ Instruction *Shift =
+ BinaryOperator::CreateAShr(X, ConstantInt::get(Ty, AmtSum));
+ InsertNewInstBefore(Shift, I);
+
+ APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
+ return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
+ }
+
+ // Okay, if we get here, one shift must be left, and the other shift must be
+ // right. See if the amounts are equal.
+ if (ShiftAmt1 == ShiftAmt2) {
+ // If we have ((X >>? C) << C), turn this into X & (-1 << C).
+ if (I.getOpcode() == Instruction::Shl) {
+ APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
+ return BinaryOperator::CreateAnd(X, ConstantInt::get(Mask));
+ }
+ // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
+ if (I.getOpcode() == Instruction::LShr) {
+ APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
+ return BinaryOperator::CreateAnd(X, ConstantInt::get(Mask));
+ }
+ // We can simplify ((X << C) >>s C) into a trunc + sext.
+ // NOTE: we could do this for any C, but that would make 'unusual' integer
+ // types. For now, just stick to ones well-supported by the code
+ // generators.
+ const Type *SExtType = 0;
+ switch (Ty->getBitWidth() - ShiftAmt1) {
+ case 1 :
+ case 8 :
+ case 16 :
+ case 32 :
+ case 64 :
+ case 128:
+ SExtType = IntegerType::get(Ty->getBitWidth() - ShiftAmt1);
+ break;
+ default: break;
+ }
+ if (SExtType) {
+ Instruction *NewTrunc = new TruncInst(X, SExtType, "sext");
+ InsertNewInstBefore(NewTrunc, I);
+ return new SExtInst(NewTrunc, Ty);
+ }
+ // Otherwise, we can't handle it yet.
+ } else if (ShiftAmt1 < ShiftAmt2) {
+ uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
+
+ // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2)
+ if (I.getOpcode() == Instruction::Shl) {
+ assert(ShiftOp->getOpcode() == Instruction::LShr ||
+ ShiftOp->getOpcode() == Instruction::AShr);
+ Instruction *Shift =
+ BinaryOperator::CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
+ InsertNewInstBefore(Shift, I);
+
+ APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
+ return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
+ }
+
+ // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2)
+ if (I.getOpcode() == Instruction::LShr) {
+ assert(ShiftOp->getOpcode() == Instruction::Shl);
+ Instruction *Shift =
+ BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));
+ InsertNewInstBefore(Shift, I);
+
+ APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
+ return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
+ }
+
+ // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in.
+ } else {
+ assert(ShiftAmt2 < ShiftAmt1);
+ uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
+
+ // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)
+ if (I.getOpcode() == Instruction::Shl) {
+ assert(ShiftOp->getOpcode() == Instruction::LShr ||
+ ShiftOp->getOpcode() == Instruction::AShr);
+ Instruction *Shift =
+ BinaryOperator::Create(ShiftOp->getOpcode(), X,
+ ConstantInt::get(Ty, ShiftDiff));
+ InsertNewInstBefore(Shift, I);
+
+ APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
+ return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
+ }
+
+ // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2)
+ if (I.getOpcode() == Instruction::LShr) {
+ assert(ShiftOp->getOpcode() == Instruction::Shl);
+ Instruction *Shift =
+ BinaryOperator::CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
+ InsertNewInstBefore(Shift, I);
+
+ APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
+ return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
+ }
+
+ // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in.
+ }
+ }
+ return 0;
+}
+
+
+/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear
+/// expression. If so, decompose it, returning some value X, such that Val is
+/// X*Scale+Offset.
+///
+static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
+ int &Offset) {
+ assert(Val->getType() == Type::Int32Ty && "Unexpected allocation size type!");
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+ Offset = CI->getZExtValue();
+ Scale = 0;
+ return ConstantInt::get(Type::Int32Ty, 0);
+ } else if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ if (I->getOpcode() == Instruction::Shl) {
+ // This is a value scaled by '1 << the shift amt'.
+ Scale = 1U << RHS->getZExtValue();
+ Offset = 0;
+ return I->getOperand(0);
+ } else if (I->getOpcode() == Instruction::Mul) {
+ // This value is scaled by 'RHS'.
+ Scale = RHS->getZExtValue();
+ Offset = 0;
+ return I->getOperand(0);
+ } else if (I->getOpcode() == Instruction::Add) {
+ // We have X+C. Check to see if we really have (X*C2)+C1,
+ // where C1 is divisible by C2.
+ unsigned SubScale;
+ Value *SubVal =
+ DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
+ Offset += RHS->getZExtValue();
+ Scale = SubScale;
+ return SubVal;
+ }
+ }
+ }
+
+ // Otherwise, we can't look past this.
+ Scale = 1;
+ Offset = 0;
+ return Val;
+}
+
+
+/// PromoteCastOfAllocation - If we find a cast of an allocation instruction,
+/// try to eliminate the cast by moving the type information into the alloc.
+Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
+ AllocationInst &AI) {
+ const PointerType *PTy = cast<PointerType>(CI.getType());
+
+ // Remove any uses of AI that are dead.
+ assert(!CI.use_empty() && "Dead instructions should be removed earlier!");
+
+ for (Value::use_iterator UI = AI.use_begin(), E = AI.use_end(); UI != E; ) {
+ Instruction *User = cast<Instruction>(*UI++);
+ if (isInstructionTriviallyDead(User)) {
+ while (UI != E && *UI == User)
+ ++UI; // If this instruction uses AI more than once, don't break UI.
+
+ ++NumDeadInst;
+ DOUT << "IC: DCE: " << *User;
+ EraseInstFromFunction(*User);
+ }
+ }
+
+ // Get the type really allocated and the type casted to.
+ const Type *AllocElTy = AI.getAllocatedType();
+ const Type *CastElTy = PTy->getElementType();
+ if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0;
+
+ unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy);
+ unsigned CastElTyAlign = TD->getABITypeAlignment(CastElTy);
+ if (CastElTyAlign < AllocElTyAlign) return 0;
+
+ // If the allocation has multiple uses, only promote it if we are strictly
+ // increasing the alignment of the resultant allocation. If we keep it the
+ // same, we open the door to infinite loops of various kinds. (A reference
+ // from a dbg.declare doesn't count as a use for this purpose.)
+ if (!AI.hasOneUse() && !hasOneUsePlusDeclare(&AI) &&
+ CastElTyAlign == AllocElTyAlign) return 0;
+
+ uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy);
+ uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy);
+ if (CastElTySize == 0 || AllocElTySize == 0) return 0;
+
+ // See if we can satisfy the modulus by pulling a scale out of the array
+ // size argument.
+ unsigned ArraySizeScale;
+ int ArrayOffset;
+ Value *NumElements = // See if the array size is a decomposable linear expr.
+ DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
+
+ // If we can now satisfy the modulus, by using a non-1 scale, we really can
+ // do the xform.
+ if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
+ (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return 0;
+
+ unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
+ Value *Amt = 0;
+ if (Scale == 1) {
+ Amt = NumElements;
+ } else {
+ // If the allocation size is constant, form a constant mul expression
+ Amt = ConstantInt::get(Type::Int32Ty, Scale);
+ if (isa<ConstantInt>(NumElements))
+ Amt = Multiply(cast<ConstantInt>(NumElements), cast<ConstantInt>(Amt));
+ // otherwise multiply the amount and the number of elements
+ else {
+ Instruction *Tmp = BinaryOperator::CreateMul(Amt, NumElements, "tmp");
+ Amt = InsertNewInstBefore(Tmp, AI);
+ }
+ }
+
+ if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
+ Value *Off = ConstantInt::get(Type::Int32Ty, Offset, true);
+ Instruction *Tmp = BinaryOperator::CreateAdd(Amt, Off, "tmp");
+ Amt = InsertNewInstBefore(Tmp, AI);
+ }
+
+ AllocationInst *New;
+ if (isa<MallocInst>(AI))
+ New = new MallocInst(CastElTy, Amt, AI.getAlignment());
+ else
+ New = new AllocaInst(CastElTy, Amt, AI.getAlignment());
+ InsertNewInstBefore(New, AI);
+ New->takeName(&AI);
+
+ // If the allocation has one real use plus a dbg.declare, just remove the
+ // declare.
+ if (DbgDeclareInst *DI = hasOneUsePlusDeclare(&AI)) {
+ EraseInstFromFunction(*DI);
+ }
+ // If the allocation has multiple real uses, insert a cast and change all
+ // things that used it to use the new cast. This will also hack on CI, but it
+ // will die soon.
+ else if (!AI.hasOneUse()) {
+ AddUsesToWorkList(AI);
+ // New is the allocation instruction, pointer typed. AI is the original
+ // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
+ CastInst *NewCast = new BitCastInst(New, AI.getType(), "tmpcast");
+ InsertNewInstBefore(NewCast, AI);
+ AI.replaceAllUsesWith(NewCast);
+ }
+ return ReplaceInstUsesWith(CI, New);
+}
+
+/// CanEvaluateInDifferentType - Return true if we can take the specified value
+/// and return it as type Ty without inserting any new casts and without
+/// changing the computed value. This is used by code that tries to decide
+/// whether promoting or shrinking integer operations to wider or smaller types
+/// will allow us to eliminate a truncate or extend.
+///
+/// This is a truncation operation if Ty is smaller than V->getType(), or an
+/// extension operation if Ty is larger.
+///
+/// If CastOpc is a truncation, then Ty will be a type smaller than V. We
+/// should return true if trunc(V) can be computed by computing V in the smaller
+/// type. If V is an instruction, then trunc(inst(x,y)) can be computed as
+/// inst(trunc(x),trunc(y)), which only makes sense if x and y can be
+/// efficiently truncated.
+///
+/// If CastOpc is a sext or zext, we are asking if the low bits of the value can
+/// bit computed in a larger type, which is then and'd or sext_in_reg'd to get
+/// the final result.
+bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
+ unsigned CastOpc,
+ int &NumCastsRemoved){
+ // We can always evaluate constants in another type.
+ if (isa<ConstantInt>(V))
+ return true;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ const IntegerType *OrigTy = cast<IntegerType>(V->getType());
+
+ // If this is an extension or truncate, we can often eliminate it.
+ if (isa<TruncInst>(I) || isa<ZExtInst>(I) || isa<SExtInst>(I)) {
+ // If this is a cast from the destination type, we can trivially eliminate
+ // it, and this will remove a cast overall.
+ if (I->getOperand(0)->getType() == Ty) {
+ // If the first operand is itself a cast, and is eliminable, do not count
+ // this as an eliminable cast. We would prefer to eliminate those two
+ // casts first.
+ if (!isa<CastInst>(I->getOperand(0)) && I->hasOneUse())
+ ++NumCastsRemoved;
+ return true;
+ }
+ }
+
+ // We can't extend or shrink something that has multiple uses: doing so would
+ // require duplicating the instruction in general, which isn't profitable.
+ if (!I->hasOneUse()) return false;
+
+ unsigned Opc = I->getOpcode();
+ switch (Opc) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // These operators can all arbitrarily be extended or truncated.
+ return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
+ NumCastsRemoved) &&
+ CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc,
+ NumCastsRemoved);
+
+ case Instruction::Shl:
+ // If we are truncating the result of this SHL, and if it's a shift of a
+ // constant amount, we can always perform a SHL in a smaller type.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint32_t BitWidth = Ty->getBitWidth();
+ if (BitWidth < OrigTy->getBitWidth() &&
+ CI->getLimitedValue(BitWidth) < BitWidth)
+ return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
+ NumCastsRemoved);
+ }
+ break;
+ case Instruction::LShr:
+ // If this is a truncate of a logical shr, we can truncate it to a smaller
+ // lshr iff we know that the bits we would otherwise be shifting in are
+ // already zeros.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint32_t OrigBitWidth = OrigTy->getBitWidth();
+ uint32_t BitWidth = Ty->getBitWidth();
+ if (BitWidth < OrigBitWidth &&
+ MaskedValueIsZero(I->getOperand(0),
+ APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
+ CI->getLimitedValue(BitWidth) < BitWidth) {
+ return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
+ NumCastsRemoved);
+ }
+ }
+ break;
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::Trunc:
+ // If this is the same kind of case as our original (e.g. zext+zext), we
+ // can safely replace it. Note that replacing it does not reduce the number
+ // of casts in the input.
+ if (Opc == CastOpc)
+ return true;
+
+ // sext (zext ty1), ty2 -> zext ty2
+ if (CastOpc == Instruction::SExt && Opc == Instruction::ZExt)
+ return true;
+ break;
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(I);
+ return CanEvaluateInDifferentType(SI->getTrueValue(), Ty, CastOpc,
+ NumCastsRemoved) &&
+ CanEvaluateInDifferentType(SI->getFalseValue(), Ty, CastOpc,
+ NumCastsRemoved);
+ }
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands.
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!CanEvaluateInDifferentType(PN->getIncomingValue(i), Ty, CastOpc,
+ NumCastsRemoved))
+ return false;
+ return true;
+ }
+ default:
+ // TODO: Can handle more cases here.
+ break;
+ }
+
+ return false;
+}
+
+/// EvaluateInDifferentType - Given an expression that
+/// CanEvaluateInDifferentType returns true for, actually insert the code to
+/// evaluate the expression.
+Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
+ bool isSigned) {
+ if (Constant *C = dyn_cast<Constant>(V))
+ return ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
+
+ // Otherwise, it must be an instruction.
+ Instruction *I = cast<Instruction>(V);
+ Instruction *Res = 0;
+ unsigned Opc = I->getOpcode();
+ switch (Opc) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::AShr:
+ case Instruction::LShr:
+ case Instruction::Shl: {
+ Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
+ Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
+ Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+ break;
+ }
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // If the source type of the cast is the type we're trying for then we can
+ // just return the source. There's no need to insert it because it is not
+ // new.
+ if (I->getOperand(0)->getType() == Ty)
+ return I->getOperand(0);
+
+ // Otherwise, must be the same type of cast, so just reinsert a new one.
+ Res = CastInst::Create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),
+ Ty);
+ break;
+ case Instruction::Select: {
+ Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
+ Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
+ Res = SelectInst::Create(I->getOperand(0), True, False);
+ break;
+ }
+ case Instruction::PHI: {
+ PHINode *OPN = cast<PHINode>(I);
+ PHINode *NPN = PHINode::Create(Ty);
+ for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
+ Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
+ NPN->addIncoming(V, OPN->getIncomingBlock(i));
+ }
+ Res = NPN;
+ break;
+ }
+ default:
+ // TODO: Can handle more cases here.
+ assert(0 && "Unreachable!");
+ break;
+ }
+
+ Res->takeName(I);
+ return InsertNewInstBefore(Res, *I);
+}
+
+/// @brief Implement the transforms common to all CastInst visitors.
+Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
+ Value *Src = CI.getOperand(0);
+
+ // Many cases of "cast of a cast" are eliminable. If it's eliminable we just
+ // eliminate it now.
+ if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
+ if (Instruction::CastOps opc =
+ isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) {
+ // The first cast (CSrc) is eliminable so we need to fix up or replace
+ // the second cast (CI). CSrc will then have a good chance of being dead.
+ return CastInst::Create(opc, CSrc->getOperand(0), CI.getType());
+ }
+ }
+
+ // If we are casting a select then fold the cast into the select
+ if (SelectInst *SI = dyn_cast<SelectInst>(Src))
+ if (Instruction *NV = FoldOpIntoSelect(CI, SI, this))
+ return NV;
+
+ // If we are casting a PHI then fold the cast into the PHI
+ if (isa<PHINode>(Src))
+ if (Instruction *NV = FoldOpIntoPhi(CI))
+ return NV;
+
+ return 0;
+}
+
+/// FindElementAtOffset - Given a type and a constant offset, determine whether
+/// or not there is a sequence of GEP indices into the type that will land us at
+/// the specified offset. If so, fill them into NewIndices and return the
+/// resultant element type, otherwise return null.
+static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset,
+ SmallVectorImpl<Value*> &NewIndices,
+ const TargetData *TD) {
+ if (!Ty->isSized()) return 0;
+
+ // Start with the index over the outer type. Note that the type size
+ // might be zero (even if the offset isn't zero) if the indexed type
+ // is something like [0 x {int, int}]
+ const Type *IntPtrTy = TD->getIntPtrType();
+ int64_t FirstIdx = 0;
+ if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
+ FirstIdx = Offset/TySize;
+ Offset -= FirstIdx*TySize;
+
+ // Handle hosts where % returns negative instead of values [0..TySize).
+ if (Offset < 0) {
+ --FirstIdx;
+ Offset += TySize;
+ assert(Offset >= 0);
+ }
+ assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
+ }
+
+ NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));
+
+ // Index into the types. If we fail, set OrigBase to null.
+ while (Offset) {
+ // Indexing into tail padding between struct/array elements.
+ if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty))
+ return 0;
+
+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ const StructLayout *SL = TD->getStructLayout(STy);
+ assert(Offset < (int64_t)SL->getSizeInBytes() &&
+ "Offset must stay within the indexed type");
+
+ unsigned Elt = SL->getElementContainingOffset(Offset);
+ NewIndices.push_back(ConstantInt::get(Type::Int32Ty, Elt));
+
+ Offset -= SL->getElementOffset(Elt);
+ Ty = STy->getElementType(Elt);
+ } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+ uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType());
+ assert(EltSize && "Cannot index into a zero-sized array");
+ NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
+ Offset %= EltSize;
+ Ty = AT->getElementType();
+ } else {
+ // Otherwise, we can't index into the middle of this atomic type, bail.
+ return 0;
+ }
+ }
+
+ return Ty;
+}
+
+/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
+Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
+ Value *Src = CI.getOperand(0);
+
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
+ // If casting the result of a getelementptr instruction with no offset, turn
+ // this into a cast of the original pointer!
+ if (GEP->hasAllZeroIndices()) {
+ // Changing the cast operand is usually not a good idea but it is safe
+ // here because the pointer operand is being replaced with another
+ // pointer operand so the opcode doesn't need to change.
+ AddToWorkList(GEP);
+ CI.setOperand(0, GEP->getOperand(0));
+ return &CI;
+ }
+
+ // If the GEP has a single use, and the base pointer is a bitcast, and the
+ // GEP computes a constant offset, see if we can convert these three
+ // instructions into fewer. This typically happens with unions and other
+ // non-type-safe code.
+ if (GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0))) {
+ if (GEP->hasAllConstantIndices()) {
+ // We are guaranteed to get a constant from EmitGEPOffset.
+ ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(GEP, CI, *this));
+ int64_t Offset = OffsetV->getSExtValue();
+
+ // Get the base pointer input of the bitcast, and the type it points to.
+ Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
+ const Type *GEPIdxTy =
+ cast<PointerType>(OrigBase->getType())->getElementType();
+ SmallVector<Value*, 8> NewIndices;
+ if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices, TD)) {
+ // If we were able to index down into an element, create the GEP
+ // and bitcast the result. This eliminates one bitcast, potentially
+ // two.
+ Instruction *NGEP = GetElementPtrInst::Create(OrigBase,
+ NewIndices.begin(),
+ NewIndices.end(), "");
+ InsertNewInstBefore(NGEP, CI);
+ NGEP->takeName(GEP);
+
+ if (isa<BitCastInst>(CI))
+ return new BitCastInst(NGEP, CI.getType());
+ assert(isa<PtrToIntInst>(CI));
+ return new PtrToIntInst(NGEP, CI.getType());
+ }
+ }
+ }
+ }
+
+ return commonCastTransforms(CI);
+}
+
+/// isSafeIntegerType - Return true if this is a basic integer type, not a crazy
+/// type like i42. We don't want to introduce operations on random non-legal
+/// integer types where they don't already exist in the code. In the future,
+/// we should consider making this based off target-data, so that 32-bit targets
+/// won't get i64 operations etc.
+static bool isSafeIntegerType(const Type *Ty) {
+ switch (Ty->getPrimitiveSizeInBits()) {
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// Only the TRUNC, ZEXT, SEXT, and BITCAST can both operand and result as
+/// integer types. This function implements the common transforms for all those
+/// cases.
+/// @brief Implement the transforms common to CastInst with integer operands
+Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
+ if (Instruction *Result = commonCastTransforms(CI))
+ return Result;
+
+ Value *Src = CI.getOperand(0);
+ const Type *SrcTy = Src->getType();
+ const Type *DestTy = CI.getType();
+ uint32_t SrcBitSize = SrcTy->getPrimitiveSizeInBits();
+ uint32_t DestBitSize = DestTy->getPrimitiveSizeInBits();
+
+ // See if we can simplify any instructions used by the LHS whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(CI))
+ return &CI;
+
+ // If the source isn't an instruction or has more than one use then we
+ // can't do anything more.
+ Instruction *SrcI = dyn_cast<Instruction>(Src);
+ if (!SrcI || !Src->hasOneUse())
+ return 0;
+
+ // Attempt to propagate the cast into the instruction for int->int casts.
+ int NumCastsRemoved = 0;
+ if (!isa<BitCastInst>(CI) &&
+ // Only do this if the dest type is a simple type, don't convert the
+ // expression tree to something weird like i93 unless the source is also
+ // strange.
+ (isSafeIntegerType(DestTy) || !isSafeIntegerType(SrcI->getType())) &&
+ CanEvaluateInDifferentType(SrcI, cast<IntegerType>(DestTy),
+ CI.getOpcode(), NumCastsRemoved)) {
+ // If this cast is a truncate, evaluting in a different type always
+ // eliminates the cast, so it is always a win. If this is a zero-extension,
+ // we need to do an AND to maintain the clear top-part of the computation,
+ // so we require that the input have eliminated at least one cast. If this
+ // is a sign extension, we insert two new casts (to do the extension) so we
+ // require that two casts have been eliminated.
+ bool DoXForm = false;
+ bool JustReplace = false;
+ switch (CI.getOpcode()) {
+ default:
+ // All the others use floating point so we shouldn't actually
+ // get here because of the check above.
+ assert(0 && "Unknown cast type");
+ case Instruction::Trunc:
+ DoXForm = true;
+ break;
+ case Instruction::ZExt: {
+ DoXForm = NumCastsRemoved >= 1;
+ if (!DoXForm && 0) {
+ // If it's unnecessary to issue an AND to clear the high bits, it's
+ // always profitable to do this xform.
+ Value *TryRes = EvaluateInDifferentType(SrcI, DestTy, false);
+ APInt Mask(APInt::getBitsSet(DestBitSize, SrcBitSize, DestBitSize));
+ if (MaskedValueIsZero(TryRes, Mask))
+ return ReplaceInstUsesWith(CI, TryRes);
+
+ if (Instruction *TryI = dyn_cast<Instruction>(TryRes))
+ if (TryI->use_empty())
+ EraseInstFromFunction(*TryI);
+ }
+ break;
+ }
+ case Instruction::SExt: {
+ DoXForm = NumCastsRemoved >= 2;
+ if (!DoXForm && !isa<TruncInst>(SrcI) && 0) {
+ // If we do not have to emit the truncate + sext pair, then it's always
+ // profitable to do this xform.
+ //
+ // It's not safe to eliminate the trunc + sext pair if one of the
+ // eliminated cast is a truncate. e.g.
+ // t2 = trunc i32 t1 to i16
+ // t3 = sext i16 t2 to i32
+ // !=
+ // i32 t1
+ Value *TryRes = EvaluateInDifferentType(SrcI, DestTy, true);
+ unsigned NumSignBits = ComputeNumSignBits(TryRes);
+ if (NumSignBits > (DestBitSize - SrcBitSize))
+ return ReplaceInstUsesWith(CI, TryRes);
+
+ if (Instruction *TryI = dyn_cast<Instruction>(TryRes))
+ if (TryI->use_empty())
+ EraseInstFromFunction(*TryI);
+ }
+ break;
+ }
+ }
+
+ if (DoXForm) {
+ DOUT << "ICE: EvaluateInDifferentType converting expression type to avoid"
+ << " cast: " << CI;
+ Value *Res = EvaluateInDifferentType(SrcI, DestTy,
+ CI.getOpcode() == Instruction::SExt);
+ if (JustReplace)
+ // Just replace this cast with the result.
+ return ReplaceInstUsesWith(CI, Res);
+
+ assert(Res->getType() == DestTy);
+ switch (CI.getOpcode()) {
+ default: assert(0 && "Unknown cast type!");
+ case Instruction::Trunc:
+ case Instruction::BitCast:
+ // Just replace this cast with the result.
+ return ReplaceInstUsesWith(CI, Res);
+ case Instruction::ZExt: {
+ assert(SrcBitSize < DestBitSize && "Not a zext?");
+
+ // If the high bits are already zero, just replace this cast with the
+ // result.
+ APInt Mask(APInt::getBitsSet(DestBitSize, SrcBitSize, DestBitSize));
+ if (MaskedValueIsZero(Res, Mask))
+ return ReplaceInstUsesWith(CI, Res);
+
+ // We need to emit an AND to clear the high bits.
+ Constant *C = ConstantInt::get(APInt::getLowBitsSet(DestBitSize,
+ SrcBitSize));
+ return BinaryOperator::CreateAnd(Res, C);
+ }
+ case Instruction::SExt: {
+ // If the high bits are already filled with sign bit, just replace this
+ // cast with the result.
+ unsigned NumSignBits = ComputeNumSignBits(Res);
+ if (NumSignBits > (DestBitSize - SrcBitSize))
+ return ReplaceInstUsesWith(CI, Res);
+
+ // We need to emit a cast to truncate, then a cast to sext.
+ return CastInst::Create(Instruction::SExt,
+ InsertCastBefore(Instruction::Trunc, Res, Src->getType(),
+ CI), DestTy);
+ }
+ }
+ }
+ }
+
+ Value *Op0 = SrcI->getNumOperands() > 0 ? SrcI->getOperand(0) : 0;
+ Value *Op1 = SrcI->getNumOperands() > 1 ? SrcI->getOperand(1) : 0;
+
+ switch (SrcI->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // If we are discarding information, rewrite.
+ if (DestBitSize <= SrcBitSize && DestBitSize != 1) {
+ // Don't insert two casts if they cannot be eliminated. We allow
+ // two casts to be inserted if the sizes are the same. This could
+ // only be converting signedness, which is a noop.
+ if (DestBitSize == SrcBitSize ||
+ !ValueRequiresCast(CI.getOpcode(), Op1, DestTy,TD) ||
+ !ValueRequiresCast(CI.getOpcode(), Op0, DestTy, TD)) {
+ Instruction::CastOps opcode = CI.getOpcode();
+ Value *Op0c = InsertCastBefore(opcode, Op0, DestTy, *SrcI);
+ Value *Op1c = InsertCastBefore(opcode, Op1, DestTy, *SrcI);
+ return BinaryOperator::Create(
+ cast<BinaryOperator>(SrcI)->getOpcode(), Op0c, Op1c);
+ }
+ }
+
+ // cast (xor bool X, true) to int --> xor (cast bool X to int), 1
+ if (isa<ZExtInst>(CI) && SrcBitSize == 1 &&
+ SrcI->getOpcode() == Instruction::Xor &&
+ Op1 == ConstantInt::getTrue() &&
+ (!Op0->hasOneUse() || !isa<CmpInst>(Op0))) {
+ Value *New = InsertCastBefore(Instruction::ZExt, Op0, DestTy, CI);
+ return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1));
+ }
+ break;
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ // If we are just changing the sign, rewrite.
+ if (DestBitSize == SrcBitSize) {
+ // Don't insert two casts if they cannot be eliminated. We allow
+ // two casts to be inserted if the sizes are the same. This could
+ // only be converting signedness, which is a noop.
+ if (!ValueRequiresCast(CI.getOpcode(), Op1, DestTy, TD) ||
+ !ValueRequiresCast(CI.getOpcode(), Op0, DestTy, TD)) {
+ Value *Op0c = InsertCastBefore(Instruction::BitCast,
+ Op0, DestTy, *SrcI);
+ Value *Op1c = InsertCastBefore(Instruction::BitCast,
+ Op1, DestTy, *SrcI);
+ return BinaryOperator::Create(
+ cast<BinaryOperator>(SrcI)->getOpcode(), Op0c, Op1c);
+ }
+ }
+ break;
+
+ case Instruction::Shl:
+ // Allow changing the sign of the source operand. Do not allow
+ // changing the size of the shift, UNLESS the shift amount is a
+ // constant. We must not change variable sized shifts to a smaller
+ // size, because it is undefined to shift more bits out than exist
+ // in the value.
+ if (DestBitSize == SrcBitSize ||
+ (DestBitSize < SrcBitSize && isa<Constant>(Op1))) {
+ Instruction::CastOps opcode = (DestBitSize == SrcBitSize ?
+ Instruction::BitCast : Instruction::Trunc);
+ Value *Op0c = InsertCastBefore(opcode, Op0, DestTy, *SrcI);
+ Value *Op1c = InsertCastBefore(opcode, Op1, DestTy, *SrcI);
+ return BinaryOperator::CreateShl(Op0c, Op1c);
+ }
+ break;
+ case Instruction::AShr:
+ // If this is a signed shr, and if all bits shifted in are about to be
+ // truncated off, turn it into an unsigned shr to allow greater
+ // simplifications.
+ if (DestBitSize < SrcBitSize &&
+ isa<ConstantInt>(Op1)) {
+ uint32_t ShiftAmt = cast<ConstantInt>(Op1)->getLimitedValue(SrcBitSize);
+ if (SrcBitSize > ShiftAmt && SrcBitSize-ShiftAmt >= DestBitSize) {
+ // Insert the new logical shift right.
+ return BinaryOperator::CreateLShr(Op0, Op1);
+ }
+ }
+ break;
+ }
+ return 0;
+}
+
+Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
+ if (Instruction *Result = commonIntCastTransforms(CI))
+ return Result;
+
+ Value *Src = CI.getOperand(0);
+ const Type *Ty = CI.getType();
+ uint32_t DestBitWidth = Ty->getPrimitiveSizeInBits();
+ uint32_t SrcBitWidth = cast<IntegerType>(Src->getType())->getBitWidth();
+
+ // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0)
+ if (DestBitWidth == 1) {
+ Constant *One = ConstantInt::get(Src->getType(), 1);
+ Src = InsertNewInstBefore(BinaryOperator::CreateAnd(Src, One, "tmp"), CI);
+ Value *Zero = Constant::getNullValue(Src->getType());
+ return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
+ }
+
+ // Optimize trunc(lshr(), c) to pull the shift through the truncate.
+ ConstantInt *ShAmtV = 0;
+ Value *ShiftOp = 0;
+ if (Src->hasOneUse() &&
+ match(Src, m_LShr(m_Value(ShiftOp), m_ConstantInt(ShAmtV)))) {
+ uint32_t ShAmt = ShAmtV->getLimitedValue(SrcBitWidth);
+
+ // Get a mask for the bits shifting in.
+ APInt Mask(APInt::getLowBitsSet(SrcBitWidth, ShAmt).shl(DestBitWidth));
+ if (MaskedValueIsZero(ShiftOp, Mask)) {
+ if (ShAmt >= DestBitWidth) // All zeros.
+ return ReplaceInstUsesWith(CI, Constant::getNullValue(Ty));
+
+ // Okay, we can shrink this. Truncate the input, then return a new
+ // shift.
+ Value *V1 = InsertCastBefore(Instruction::Trunc, ShiftOp, Ty, CI);
+ Value *V2 = ConstantExpr::getTrunc(ShAmtV, Ty);
+ return BinaryOperator::CreateLShr(V1, V2);
+ }
+ }
+
+ return 0;
+}
+
+/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations
+/// in order to eliminate the icmp.
+Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
+ bool DoXform) {
+ // If we are just checking for a icmp eq of a single bit and zext'ing it
+ // to an integer, then shift the bit to the appropriate place and then
+ // cast to integer to avoid the comparison.
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
+ const APInt &Op1CV = Op1C->getValue();
+
+ // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
+ // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
+ if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
+ (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())) {
+ if (!DoXform) return ICI;
+
+ Value *In = ICI->getOperand(0);
+ Value *Sh = ConstantInt::get(In->getType(),
+ In->getType()->getPrimitiveSizeInBits()-1);
+ In = InsertNewInstBefore(BinaryOperator::CreateLShr(In, Sh,
+ In->getName()+".lobit"),
+ CI);
+ if (In->getType() != CI.getType())
+ In = CastInst::CreateIntegerCast(In, CI.getType(),
+ false/*ZExt*/, "tmp", &CI);
+
+ if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
+ Constant *One = ConstantInt::get(In->getType(), 1);
+ In = InsertNewInstBefore(BinaryOperator::CreateXor(In, One,
+ In->getName()+".not"),
+ CI);
+ }
+
+ return ReplaceInstUsesWith(CI, In);
+ }
+
+
+
+ // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
+ // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
+ // zext (X == 1) to i32 --> X iff X has only the low bit set.
+ // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set.
+ // zext (X != 0) to i32 --> X iff X has only the low bit set.
+ // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
+ // zext (X != 1) to i32 --> X^1 iff X has only the low bit set.
+ // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
+ if ((Op1CV == 0 || Op1CV.isPowerOf2()) &&
+ // This only works for EQ and NE
+ ICI->isEquality()) {
+ // If Op1C some other power of two, convert:
+ uint32_t BitWidth = Op1C->getType()->getBitWidth();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ APInt TypeMask(APInt::getAllOnesValue(BitWidth));
+ ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne);
+
+ APInt KnownZeroMask(~KnownZero);
+ if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
+ if (!DoXform) return ICI;
+
+ bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE;
+ if (Op1CV != 0 && (Op1CV != KnownZeroMask)) {
+ // (X&4) == 2 --> false
+ // (X&4) != 2 --> true
+ Constant *Res = ConstantInt::get(Type::Int1Ty, isNE);
+ Res = ConstantExpr::getZExt(Res, CI.getType());
+ return ReplaceInstUsesWith(CI, Res);
+ }
+
+ uint32_t ShiftAmt = KnownZeroMask.logBase2();
+ Value *In = ICI->getOperand(0);
+ if (ShiftAmt) {
+ // Perform a logical shr by shiftamt.
+ // Insert the shift to put the result in the low bit.
+ In = InsertNewInstBefore(BinaryOperator::CreateLShr(In,
+ ConstantInt::get(In->getType(), ShiftAmt),
+ In->getName()+".lobit"), CI);
+ }
+
+ if ((Op1CV != 0) == isNE) { // Toggle the low bit.
+ Constant *One = ConstantInt::get(In->getType(), 1);
+ In = BinaryOperator::CreateXor(In, One, "tmp");
+ InsertNewInstBefore(cast<Instruction>(In), CI);
+ }
+
+ if (CI.getType() == In->getType())
+ return ReplaceInstUsesWith(CI, In);
+ else
+ return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
+ }
+ }
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
+ // If one of the common conversion will work ..
+ if (Instruction *Result = commonIntCastTransforms(CI))
+ return Result;
+
+ Value *Src = CI.getOperand(0);
+
+ // If this is a TRUNC followed by a ZEXT then we are dealing with integral
+ // types and if the sizes are just right we can convert this into a logical
+ // 'and' which will be much cheaper than the pair of casts.
+ if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
+ // Get the sizes of the types involved. We know that the intermediate type
+ // will be smaller than A or C, but don't know the relation between A and C.
+ Value *A = CSrc->getOperand(0);
+ unsigned SrcSize = A->getType()->getPrimitiveSizeInBits();
+ unsigned MidSize = CSrc->getType()->getPrimitiveSizeInBits();
+ unsigned DstSize = CI.getType()->getPrimitiveSizeInBits();
+ // If we're actually extending zero bits, then if
+ // SrcSize < DstSize: zext(a & mask)
+ // SrcSize == DstSize: a & mask
+ // SrcSize > DstSize: trunc(a) & mask
+ if (SrcSize < DstSize) {
+ APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
+ Constant *AndConst = ConstantInt::get(AndValue);
+ Instruction *And =
+ BinaryOperator::CreateAnd(A, AndConst, CSrc->getName()+".mask");
+ InsertNewInstBefore(And, CI);
+ return new ZExtInst(And, CI.getType());
+ } else if (SrcSize == DstSize) {
+ APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
+ return BinaryOperator::CreateAnd(A, ConstantInt::get(AndValue));
+ } else if (SrcSize > DstSize) {
+ Instruction *Trunc = new TruncInst(A, CI.getType(), "tmp");
+ InsertNewInstBefore(Trunc, CI);
+ APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
+ return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(AndValue));
+ }
+ }
+
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
+ return transformZExtICmp(ICI, CI);
+
+ BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src);
+ if (SrcI && SrcI->getOpcode() == Instruction::Or) {
+ // zext (or icmp, icmp) --> or (zext icmp), (zext icmp) if at least one
+ // of the (zext icmp) will be transformed.
+ ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
+ ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
+ if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
+ (transformZExtICmp(LHS, CI, false) ||
+ transformZExtICmp(RHS, CI, false))) {
+ Value *LCast = InsertCastBefore(Instruction::ZExt, LHS, CI.getType(), CI);
+ Value *RCast = InsertCastBefore(Instruction::ZExt, RHS, CI.getType(), CI);
+ return BinaryOperator::Create(Instruction::Or, LCast, RCast);
+ }
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitSExt(SExtInst &CI) {
+ if (Instruction *I = commonIntCastTransforms(CI))
+ return I;
+
+ Value *Src = CI.getOperand(0);
+
+ // Canonicalize sign-extend from i1 to a select.
+ if (Src->getType() == Type::Int1Ty)
+ return SelectInst::Create(Src,
+ ConstantInt::getAllOnesValue(CI.getType()),
+ Constant::getNullValue(CI.getType()));
+
+ // See if the value being truncated is already sign extended. If so, just
+ // eliminate the trunc/sext pair.
+ if (getOpcode(Src) == Instruction::Trunc) {
+ Value *Op = cast<User>(Src)->getOperand(0);
+ unsigned OpBits = cast<IntegerType>(Op->getType())->getBitWidth();
+ unsigned MidBits = cast<IntegerType>(Src->getType())->getBitWidth();
+ unsigned DestBits = cast<IntegerType>(CI.getType())->getBitWidth();
+ unsigned NumSignBits = ComputeNumSignBits(Op);
+
+ if (OpBits == DestBits) {
+ // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
+ // bits, it is already ready.
+ if (NumSignBits > DestBits-MidBits)
+ return ReplaceInstUsesWith(CI, Op);
+ } else if (OpBits < DestBits) {
+ // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
+ // bits, just sext from i32.
+ if (NumSignBits > OpBits-MidBits)
+ return new SExtInst(Op, CI.getType(), "tmp");
+ } else {
+ // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
+ // bits, just truncate to i32.
+ if (NumSignBits > OpBits-MidBits)
+ return new TruncInst(Op, CI.getType(), "tmp");
+ }
+ }
+
+ // If the input is a shl/ashr pair of a same constant, then this is a sign
+ // extension from a smaller value. If we could trust arbitrary bitwidth
+ // integers, we could turn this into a truncate to the smaller bit and then
+ // use a sext for the whole extension. Since we don't, look deeper and check
+ // for a truncate. If the source and dest are the same type, eliminate the
+ // trunc and extend and just do shifts. For example, turn:
+ // %a = trunc i32 %i to i8
+ // %b = shl i8 %a, 6
+ // %c = ashr i8 %b, 6
+ // %d = sext i8 %c to i32
+ // into:
+ // %a = shl i32 %i, 30
+ // %d = ashr i32 %a, 30
+ Value *A = 0;
+ ConstantInt *BA = 0, *CA = 0;
+ if (match(Src, m_AShr(m_Shl(m_Value(A), m_ConstantInt(BA)),
+ m_ConstantInt(CA))) &&
+ BA == CA && isa<TruncInst>(A)) {
+ Value *I = cast<TruncInst>(A)->getOperand(0);
+ if (I->getType() == CI.getType()) {
+ unsigned MidSize = Src->getType()->getPrimitiveSizeInBits();
+ unsigned SrcDstSize = CI.getType()->getPrimitiveSizeInBits();
+ unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize;
+ Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt);
+ I = InsertNewInstBefore(BinaryOperator::CreateShl(I, ShAmtV,
+ CI.getName()), CI);
+ return BinaryOperator::CreateAShr(I, ShAmtV);
+ }
+ }
+
+ return 0;
+}
+
+/// FitsInFPType - Return a Constant* for the specified FP constant if it fits
+/// in the specified FP type without changing its value.
+static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
+ bool losesInfo;
+ APFloat F = CFP->getValueAPF();
+ (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
+ if (!losesInfo)
+ return ConstantFP::get(F);
+ return 0;
+}
+
+/// LookThroughFPExtensions - If this is an fp extension instruction, look
+/// through it until we get the source value.
+static Value *LookThroughFPExtensions(Value *V) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (I->getOpcode() == Instruction::FPExt)
+ return LookThroughFPExtensions(I->getOperand(0));
+
+ // If this value is a constant, return the constant in the smallest FP type
+ // that can accurately represent it. This allows us to turn
+ // (float)((double)X+2.0) into x+2.0f.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+ if (CFP->getType() == Type::PPC_FP128Ty)
+ return V; // No constant folding of this.
+ // See if the value can be truncated to float and then reextended.
+ if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle))
+ return V;
+ if (CFP->getType() == Type::DoubleTy)
+ return V; // Won't shrink.
+ if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble))
+ return V;
+ // Don't try to shrink to various long double types.
+ }
+
+ return V;
+}
+
+Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
+ if (Instruction *I = commonCastTransforms(CI))
+ return I;
+
+ // If we have fptrunc(add (fpextend x), (fpextend y)), where x and y are
+ // smaller than the destination type, we can eliminate the truncate by doing
+ // the add as the smaller type. This applies to add/sub/mul/div as well as
+ // many builtins (sqrt, etc).
+ BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0));
+ if (OpI && OpI->hasOneUse()) {
+ switch (OpI->getOpcode()) {
+ default: break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ const Type *SrcTy = OpI->getType();
+ Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0));
+ Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));
+ if (LHSTrunc->getType() != SrcTy &&
+ RHSTrunc->getType() != SrcTy) {
+ unsigned DstSize = CI.getType()->getPrimitiveSizeInBits();
+ // If the source types were both smaller than the destination type of
+ // the cast, do this xform.
+ if (LHSTrunc->getType()->getPrimitiveSizeInBits() <= DstSize &&
+ RHSTrunc->getType()->getPrimitiveSizeInBits() <= DstSize) {
+ LHSTrunc = InsertCastBefore(Instruction::FPExt, LHSTrunc,
+ CI.getType(), CI);
+ RHSTrunc = InsertCastBefore(Instruction::FPExt, RHSTrunc,
+ CI.getType(), CI);
+ return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc);
+ }
+ }
+ break;
+ }
+ }
+ return 0;
+}
+
+Instruction *InstCombiner::visitFPExt(CastInst &CI) {
+ return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
+ Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
+ if (OpI == 0)
+ return commonCastTransforms(FI);
+
+ // fptoui(uitofp(X)) --> X
+ // fptoui(sitofp(X)) --> X
+ // This is safe if the intermediate type has enough bits in its mantissa to
+ // accurately represent all values of X. For example, do not do this with
+ // i64->float->i64. This is also safe for sitofp case, because any negative
+ // 'X' value would cause an undefined result for the fptoui.
+ if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
+ OpI->getOperand(0)->getType() == FI.getType() &&
+ (int)FI.getType()->getPrimitiveSizeInBits() < /*extra bit for sign */
+ OpI->getType()->getFPMantissaWidth())
+ return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+
+ return commonCastTransforms(FI);
+}
+
+Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
+ Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
+ if (OpI == 0)
+ return commonCastTransforms(FI);
+
+ // fptosi(sitofp(X)) --> X
+ // fptosi(uitofp(X)) --> X
+ // This is safe if the intermediate type has enough bits in its mantissa to
+ // accurately represent all values of X. For example, do not do this with
+ // i64->float->i64. This is also safe for sitofp case, because any negative
+ // 'X' value would cause an undefined result for the fptoui.
+ if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
+ OpI->getOperand(0)->getType() == FI.getType() &&
+ (int)FI.getType()->getPrimitiveSizeInBits() <=
+ OpI->getType()->getFPMantissaWidth())
+ return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+
+ return commonCastTransforms(FI);
+}
+
+Instruction *InstCombiner::visitUIToFP(CastInst &CI) {
+ return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitSIToFP(CastInst &CI) {
+ return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
+ // If the destination integer type is smaller than the intptr_t type for
+ // this target, do a ptrtoint to intptr_t then do a trunc. This allows the
+ // trunc to be exposed to other transforms. Don't do this for extending
+ // ptrtoint's, because we don't know if the target sign or zero extends its
+ // pointers.
+ if (CI.getType()->getPrimitiveSizeInBits() < TD->getPointerSizeInBits()) {
+ Value *P = InsertNewInstBefore(new PtrToIntInst(CI.getOperand(0),
+ TD->getIntPtrType(),
+ "tmp"), CI);
+ return new TruncInst(P, CI.getType());
+ }
+
+ return commonPointerCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
+ // If the source integer type is larger than the intptr_t type for
+ // this target, do a trunc to the intptr_t type, then inttoptr of it. This
+ // allows the trunc to be exposed to other transforms. Don't do this for
+ // extending inttoptr's, because we don't know if the target sign or zero
+ // extends to pointers.
+ if (CI.getOperand(0)->getType()->getPrimitiveSizeInBits() >
+ TD->getPointerSizeInBits()) {
+ Value *P = InsertNewInstBefore(new TruncInst(CI.getOperand(0),
+ TD->getIntPtrType(),
+ "tmp"), CI);
+ return new IntToPtrInst(P, CI.getType());
+ }
+
+ if (Instruction *I = commonCastTransforms(CI))
+ return I;
+
+ const Type *DestPointee = cast<PointerType>(CI.getType())->getElementType();
+ if (!DestPointee->isSized()) return 0;
+
+ // If this is inttoptr(add (ptrtoint x), cst), try to turn this into a GEP.
+ ConstantInt *Cst;
+ Value *X;
+ if (match(CI.getOperand(0), m_Add(m_Cast<PtrToIntInst>(m_Value(X)),
+ m_ConstantInt(Cst)))) {
+ // If the source and destination operands have the same type, see if this
+ // is a single-index GEP.
+ if (X->getType() == CI.getType()) {
+ // Get the size of the pointee type.
+ uint64_t Size = TD->getTypeAllocSize(DestPointee);
+
+ // Convert the constant to intptr type.
+ APInt Offset = Cst->getValue();
+ Offset.sextOrTrunc(TD->getPointerSizeInBits());
+
+ // If Offset is evenly divisible by Size, we can do this xform.
+ if (Size && !APIntOps::srem(Offset, APInt(Offset.getBitWidth(), Size))){
+ Offset = APIntOps::sdiv(Offset, APInt(Offset.getBitWidth(), Size));
+ return GetElementPtrInst::Create(X, ConstantInt::get(Offset));
+ }
+ }
+ // TODO: Could handle other cases, e.g. where add is indexing into field of
+ // struct etc.
+ } else if (CI.getOperand(0)->hasOneUse() &&
+ match(CI.getOperand(0), m_Add(m_Value(X), m_ConstantInt(Cst)))) {
+ // Otherwise, if this is inttoptr(add x, cst), try to turn this into an
+ // "inttoptr+GEP" instead of "add+intptr".
+
+ // Get the size of the pointee type.
+ uint64_t Size = TD->getTypeAllocSize(DestPointee);
+
+ // Convert the constant to intptr type.
+ APInt Offset = Cst->getValue();
+ Offset.sextOrTrunc(TD->getPointerSizeInBits());
+
+ // If Offset is evenly divisible by Size, we can do this xform.
+ if (Size && !APIntOps::srem(Offset, APInt(Offset.getBitWidth(), Size))){
+ Offset = APIntOps::sdiv(Offset, APInt(Offset.getBitWidth(), Size));
+
+ Instruction *P = InsertNewInstBefore(new IntToPtrInst(X, CI.getType(),
+ "tmp"), CI);
+ return GetElementPtrInst::Create(P, ConstantInt::get(Offset), "tmp");
+ }
+ }
+ return 0;
+}
+
+Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
+ // If the operands are integer typed then apply the integer transforms,
+ // otherwise just apply the common ones.
+ Value *Src = CI.getOperand(0);
+ const Type *SrcTy = Src->getType();
+ const Type *DestTy = CI.getType();
+
+ if (SrcTy->isInteger() && DestTy->isInteger()) {
+ if (Instruction *Result = commonIntCastTransforms(CI))
+ return Result;
+ } else if (isa<PointerType>(SrcTy)) {
+ if (Instruction *I = commonPointerCastTransforms(CI))
+ return I;
+ } else {
+ if (Instruction *Result = commonCastTransforms(CI))
+ return Result;
+ }
+
+
+ // Get rid of casts from one type to the same type. These are useless and can
+ // be replaced by the operand.
+ if (DestTy == Src->getType())
+ return ReplaceInstUsesWith(CI, Src);
+
+ if (const PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) {
+ const PointerType *SrcPTy = cast<PointerType>(SrcTy);
+ const Type *DstElTy = DstPTy->getElementType();
+ const Type *SrcElTy = SrcPTy->getElementType();
+
+ // If the address spaces don't match, don't eliminate the bitcast, which is
+ // required for changing types.
+ if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace())
+ return 0;
+
+ // If we are casting a malloc or alloca to a pointer to a type of the same
+ // size, rewrite the allocation instruction to allocate the "right" type.
+ if (AllocationInst *AI = dyn_cast<AllocationInst>(Src))
+ if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
+ return V;
+
+ // If the source and destination are pointers, and this cast is equivalent
+ // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
+ // This can enhance SROA and other transforms that want type-safe pointers.
+ Constant *ZeroUInt = Constant::getNullValue(Type::Int32Ty);
+ unsigned NumZeros = 0;
+ while (SrcElTy != DstElTy &&
+ isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) &&
+ SrcElTy->getNumContainedTypes() /* not "{}" */) {
+ SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
+ ++NumZeros;
+ }
+
+ // If we found a path from the src to dest, create the getelementptr now.
+ if (SrcElTy == DstElTy) {
+ SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
+ return GetElementPtrInst::Create(Src, Idxs.begin(), Idxs.end(), "",
+ ((Instruction*) NULL));
+ }
+ }
+
+ if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) {
+ if (SVI->hasOneUse()) {
+ // Okay, we have (bitconvert (shuffle ..)). Check to see if this is
+ // a bitconvert to a vector with the same # elts.
+ if (isa<VectorType>(DestTy) &&
+ cast<VectorType>(DestTy)->getNumElements() ==
+ SVI->getType()->getNumElements() &&
+ SVI->getType()->getNumElements() ==
+ cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements()) {
+ CastInst *Tmp;
+ // If either of the operands is a cast from CI.getType(), then
+ // evaluating the shuffle in the casted destination's type will allow
+ // us to eliminate at least one cast.
+ if (((Tmp = dyn_cast<CastInst>(SVI->getOperand(0))) &&
+ Tmp->getOperand(0)->getType() == DestTy) ||
+ ((Tmp = dyn_cast<CastInst>(SVI->getOperand(1))) &&
+ Tmp->getOperand(0)->getType() == DestTy)) {
+ Value *LHS = InsertCastBefore(Instruction::BitCast,
+ SVI->getOperand(0), DestTy, CI);
+ Value *RHS = InsertCastBefore(Instruction::BitCast,
+ SVI->getOperand(1), DestTy, CI);
+ // Return a new shuffle vector. Use the same element ID's, as we
+ // know the vector types match #elts.
+ return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2));
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+/// GetSelectFoldableOperands - We want to turn code that looks like this:
+/// %C = or %A, %B
+/// %D = select %cond, %C, %A
+/// into:
+/// %C = select %cond, %B, 0
+/// %D = or %A, %C
+///
+/// Assuming that the specified instruction is an operand to the select, return
+/// a bitmask indicating which operands of this instruction are foldable if they
+/// equal the other incoming value of the select.
+///
+static unsigned GetSelectFoldableOperands(Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ return 3; // Can fold through either operand.
+ case Instruction::Sub: // Can only fold on the amount subtracted.
+ case Instruction::Shl: // Can only fold on the shift amount.
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return 1;
+ default:
+ return 0; // Cannot fold
+ }
+}
+
+/// GetSelectFoldableConstant - For the same transformation as the previous
+/// function, return the identity constant that goes into the select.
+static Constant *GetSelectFoldableConstant(Instruction *I) {
+ switch (I->getOpcode()) {
+ default: assert(0 && "This cannot happen!"); abort();
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return Constant::getNullValue(I->getType());
+ case Instruction::And:
+ return Constant::getAllOnesValue(I->getType());
+ case Instruction::Mul:
+ return ConstantInt::get(I->getType(), 1);
+ }
+}
+
+/// FoldSelectOpOp - Here we have (select c, TI, FI), and we know that TI and FI
+/// have the same opcode and only one use each. Try to simplify this.
+Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
+ Instruction *FI) {
+ if (TI->getNumOperands() == 1) {
+ // If this is a non-volatile load or a cast from the same type,
+ // merge.
+ if (TI->isCast()) {
+ if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType())
+ return 0;
+ } else {
+ return 0; // unknown unary op.
+ }
+
+ // Fold this by inserting a select from the input values.
+ SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0),
+ FI->getOperand(0), SI.getName()+".v");
+ InsertNewInstBefore(NewSI, SI);
+ return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI,
+ TI->getType());
+ }
+
+ // Only handle binary operators here.
+ if (!isa<BinaryOperator>(TI))
+ return 0;
+
+ // Figure out if the operations have any operands in common.
+ Value *MatchOp, *OtherOpT, *OtherOpF;
+ bool MatchIsOpZero;
+ if (TI->getOperand(0) == FI->getOperand(0)) {
+ MatchOp = TI->getOperand(0);
+ OtherOpT = TI->getOperand(1);
+ OtherOpF = FI->getOperand(1);
+ MatchIsOpZero = true;
+ } else if (TI->getOperand(1) == FI->getOperand(1)) {
+ MatchOp = TI->getOperand(1);
+ OtherOpT = TI->getOperand(0);
+ OtherOpF = FI->getOperand(0);
+ MatchIsOpZero = false;
+ } else if (!TI->isCommutative()) {
+ return 0;
+ } else if (TI->getOperand(0) == FI->getOperand(1)) {
+ MatchOp = TI->getOperand(0);
+ OtherOpT = TI->getOperand(1);
+ OtherOpF = FI->getOperand(0);
+ MatchIsOpZero = true;
+ } else if (TI->getOperand(1) == FI->getOperand(0)) {
+ MatchOp = TI->getOperand(1);
+ OtherOpT = TI->getOperand(0);
+ OtherOpF = FI->getOperand(1);
+ MatchIsOpZero = true;
+ } else {
+ return 0;
+ }
+
+ // If we reach here, they do have operations in common.
+ SelectInst *NewSI = SelectInst::Create(SI.getCondition(), OtherOpT,
+ OtherOpF, SI.getName()+".v");
+ InsertNewInstBefore(NewSI, SI);
+
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TI)) {
+ if (MatchIsOpZero)
+ return BinaryOperator::Create(BO->getOpcode(), MatchOp, NewSI);
+ else
+ return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp);
+ }
+ assert(0 && "Shouldn't get here");
+ return 0;
+}
+
+static bool isSelect01(Constant *C1, Constant *C2) {
+ ConstantInt *C1I = dyn_cast<ConstantInt>(C1);
+ if (!C1I)
+ return false;
+ ConstantInt *C2I = dyn_cast<ConstantInt>(C2);
+ if (!C2I)
+ return false;
+ return (C1I->isZero() || C1I->isOne()) && (C2I->isZero() || C2I->isOne());
+}
+
+/// FoldSelectIntoOp - Try fold the select into one of the operands to
+/// facilitate further optimization.
+Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
+ Value *FalseVal) {
+ // See the comment above GetSelectFoldableOperands for a description of the
+ // transformation we are doing here.
+ if (Instruction *TVI = dyn_cast<Instruction>(TrueVal)) {
+ if (TVI->hasOneUse() && TVI->getNumOperands() == 2 &&
+ !isa<Constant>(FalseVal)) {
+ if (unsigned SFO = GetSelectFoldableOperands(TVI)) {
+ unsigned OpToFold = 0;
+ if ((SFO & 1) && FalseVal == TVI->getOperand(0)) {
+ OpToFold = 1;
+ } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) {
+ OpToFold = 2;
+ }
+
+ if (OpToFold) {
+ Constant *C = GetSelectFoldableConstant(TVI);
+ Value *OOp = TVI->getOperand(2-OpToFold);
+ // Avoid creating select between 2 constants unless it's selecting
+ // between 0 and 1.
+ if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
+ Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C);
+ InsertNewInstBefore(NewSel, SI);
+ NewSel->takeName(TVI);
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TVI))
+ return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel);
+ assert(0 && "Unknown instruction!!");
+ }
+ }
+ }
+ }
+ }
+
+ if (Instruction *FVI = dyn_cast<Instruction>(FalseVal)) {
+ if (FVI->hasOneUse() && FVI->getNumOperands() == 2 &&
+ !isa<Constant>(TrueVal)) {
+ if (unsigned SFO = GetSelectFoldableOperands(FVI)) {
+ unsigned OpToFold = 0;
+ if ((SFO & 1) && TrueVal == FVI->getOperand(0)) {
+ OpToFold = 1;
+ } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) {
+ OpToFold = 2;
+ }
+
+ if (OpToFold) {
+ Constant *C = GetSelectFoldableConstant(FVI);
+ Value *OOp = FVI->getOperand(2-OpToFold);
+ // Avoid creating select between 2 constants unless it's selecting
+ // between 0 and 1.
+ if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
+ Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp);
+ InsertNewInstBefore(NewSel, SI);
+ NewSel->takeName(FVI);
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FVI))
+ return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel);
+ assert(0 && "Unknown instruction!!");
+ }
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+/// visitSelectInstWithICmp - Visit a SelectInst that has an
+/// ICmpInst as its first operand.
+///
+Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
+ ICmpInst *ICI) {
+ bool Changed = false;
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ Value *CmpLHS = ICI->getOperand(0);
+ Value *CmpRHS = ICI->getOperand(1);
+ Value *TrueVal = SI.getTrueValue();
+ Value *FalseVal = SI.getFalseValue();
+
+ // Check cases where the comparison is with a constant that
+ // can be adjusted to fit the min/max idiom. We may edit ICI in
+ // place here, so make sure the select is the only user.
+ if (ICI->hasOneUse())
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) {
+ switch (Pred) {
+ default: break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT: {
+ // X < MIN ? T : F --> F
+ if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT))
+ return ReplaceInstUsesWith(SI, FalseVal);
+ // X < C ? X : C-1 --> X > C-1 ? C-1 : X
+ Constant *AdjustedRHS = SubOne(CI);
+ if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
+ (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ CmpRHS = AdjustedRHS;
+ std::swap(FalseVal, TrueVal);
+ ICI->setPredicate(Pred);
+ ICI->setOperand(1, CmpRHS);
+ SI.setOperand(1, TrueVal);
+ SI.setOperand(2, FalseVal);
+ Changed = true;
+ }
+ break;
+ }
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT: {
+ // X > MAX ? T : F --> F
+ if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT))
+ return ReplaceInstUsesWith(SI, FalseVal);
+ // X > C ? X : C+1 --> X < C+1 ? C+1 : X
+ Constant *AdjustedRHS = AddOne(CI);
+ if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
+ (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ CmpRHS = AdjustedRHS;
+ std::swap(FalseVal, TrueVal);
+ ICI->setPredicate(Pred);
+ ICI->setOperand(1, CmpRHS);
+ SI.setOperand(1, TrueVal);
+ SI.setOperand(2, FalseVal);
+ Changed = true;
+ }
+ break;
+ }
+ }
+
+ // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed
+ // (x >s -1) ? -1 : 0 -> ashr x, 31 -> all ones if not signed
+ CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
+ if (match(TrueVal, m_ConstantInt<-1>()) &&
+ match(FalseVal, m_ConstantInt<0>()))
+ Pred = ICI->getPredicate();
+ else if (match(TrueVal, m_ConstantInt<0>()) &&
+ match(FalseVal, m_ConstantInt<-1>()))
+ Pred = CmpInst::getInversePredicate(ICI->getPredicate());
+
+ if (Pred != CmpInst::BAD_ICMP_PREDICATE) {
+ // If we are just checking for a icmp eq of a single bit and zext'ing it
+ // to an integer, then shift the bit to the appropriate place and then
+ // cast to integer to avoid the comparison.
+ const APInt &Op1CV = CI->getValue();
+
+ // sext (x <s 0) to i32 --> x>>s31 true if signbit set.
+ // sext (x >s -1) to i32 --> (x>>s31)^-1 true if signbit clear.
+ if ((Pred == ICmpInst::ICMP_SLT && Op1CV == 0) ||
+ (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) {
+ Value *In = ICI->getOperand(0);
+ Value *Sh = ConstantInt::get(In->getType(),
+ In->getType()->getPrimitiveSizeInBits()-1);
+ In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh,
+ In->getName()+".lobit"),
+ *ICI);
+ if (In->getType() != SI.getType())
+ In = CastInst::CreateIntegerCast(In, SI.getType(),
+ true/*SExt*/, "tmp", ICI);
+
+ if (Pred == ICmpInst::ICMP_SGT)
+ In = InsertNewInstBefore(BinaryOperator::CreateNot(In,
+ In->getName()+".not"), *ICI);
+
+ return ReplaceInstUsesWith(SI, In);
+ }
+ }
+ }
+
+ if (CmpLHS == TrueVal && CmpRHS == FalseVal) {
+ // Transform (X == Y) ? X : Y -> Y
+ if (Pred == ICmpInst::ICMP_EQ)
+ return ReplaceInstUsesWith(SI, FalseVal);
+ // Transform (X != Y) ? X : Y -> X
+ if (Pred == ICmpInst::ICMP_NE)
+ return ReplaceInstUsesWith(SI, TrueVal);
+ /// NOTE: if we wanted to, this is where to detect integer MIN/MAX
+
+ } else if (CmpLHS == FalseVal && CmpRHS == TrueVal) {
+ // Transform (X == Y) ? Y : X -> X
+ if (Pred == ICmpInst::ICMP_EQ)
+ return ReplaceInstUsesWith(SI, FalseVal);
+ // Transform (X != Y) ? Y : X -> Y
+ if (Pred == ICmpInst::ICMP_NE)
+ return ReplaceInstUsesWith(SI, TrueVal);
+ /// NOTE: if we wanted to, this is where to detect integer MIN/MAX
+ }
+
+ /// NOTE: if we wanted to, this is where to detect integer ABS
+
+ return Changed ? &SI : 0;
+}
+
+Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
+ Value *CondVal = SI.getCondition();
+ Value *TrueVal = SI.getTrueValue();
+ Value *FalseVal = SI.getFalseValue();
+
+ // select true, X, Y -> X
+ // select false, X, Y -> Y
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CondVal))
+ return ReplaceInstUsesWith(SI, C->getZExtValue() ? TrueVal : FalseVal);
+
+ // select C, X, X -> X
+ if (TrueVal == FalseVal)
+ return ReplaceInstUsesWith(SI, TrueVal);
+
+ if (isa<UndefValue>(TrueVal)) // select C, undef, X -> X
+ return ReplaceInstUsesWith(SI, FalseVal);
+ if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X
+ return ReplaceInstUsesWith(SI, TrueVal);
+ if (isa<UndefValue>(CondVal)) { // select undef, X, Y -> X or Y
+ if (isa<Constant>(TrueVal))
+ return ReplaceInstUsesWith(SI, TrueVal);
+ else
+ return ReplaceInstUsesWith(SI, FalseVal);
+ }
+
+ if (SI.getType() == Type::Int1Ty) {
+ if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) {
+ if (C->getZExtValue()) {
+ // Change: A = select B, true, C --> A = or B, C
+ return BinaryOperator::CreateOr(CondVal, FalseVal);
+ } else {
+ // Change: A = select B, false, C --> A = and !B, C
+ Value *NotCond =
+ InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
+ "not."+CondVal->getName()), SI);
+ return BinaryOperator::CreateAnd(NotCond, FalseVal);
+ }
+ } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
+ if (C->getZExtValue() == false) {
+ // Change: A = select B, C, false --> A = and B, C
+ return BinaryOperator::CreateAnd(CondVal, TrueVal);
+ } else {
+ // Change: A = select B, C, true --> A = or !B, C
+ Value *NotCond =
+ InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
+ "not."+CondVal->getName()), SI);
+ return BinaryOperator::CreateOr(NotCond, TrueVal);
+ }
+ }
+
+ // select a, b, a -> a&b
+ // select a, a, b -> a|b
+ if (CondVal == TrueVal)
+ return BinaryOperator::CreateOr(CondVal, FalseVal);
+ else if (CondVal == FalseVal)
+ return BinaryOperator::CreateAnd(CondVal, TrueVal);
+ }
+
+ // Selecting between two integer constants?
+ if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal))
+ if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal)) {
+ // select C, 1, 0 -> zext C to int
+ if (FalseValC->isZero() && TrueValC->getValue() == 1) {
+ return CastInst::Create(Instruction::ZExt, CondVal, SI.getType());
+ } else if (TrueValC->isZero() && FalseValC->getValue() == 1) {
+ // select C, 0, 1 -> zext !C to int
+ Value *NotCond =
+ InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
+ "not."+CondVal->getName()), SI);
+ return CastInst::Create(Instruction::ZExt, NotCond, SI.getType());
+ }
+
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition())) {
+
+ // (x <s 0) ? -1 : 0 -> ashr x, 31
+ if (TrueValC->isAllOnesValue() && FalseValC->isZero())
+ if (ConstantInt *CmpCst = dyn_cast<ConstantInt>(IC->getOperand(1))) {
+ if (IC->getPredicate() == ICmpInst::ICMP_SLT && CmpCst->isZero()) {
+ // The comparison constant and the result are not neccessarily the
+ // same width. Make an all-ones value by inserting a AShr.
+ Value *X = IC->getOperand(0);
+ uint32_t Bits = X->getType()->getPrimitiveSizeInBits();
+ Constant *ShAmt = ConstantInt::get(X->getType(), Bits-1);
+ Instruction *SRA = BinaryOperator::Create(Instruction::AShr, X,
+ ShAmt, "ones");
+ InsertNewInstBefore(SRA, SI);
+
+ // Then cast to the appropriate width.
+ return CastInst::CreateIntegerCast(SRA, SI.getType(), true);
+ }
+ }
+
+
+ // If one of the constants is zero (we know they can't both be) and we
+ // have an icmp instruction with zero, and we have an 'and' with the
+ // non-constant value, eliminate this whole mess. This corresponds to
+ // cases like this: ((X & 27) ? 27 : 0)
+ if (TrueValC->isZero() || FalseValC->isZero())
+ if (IC->isEquality() && isa<ConstantInt>(IC->getOperand(1)) &&
+ cast<Constant>(IC->getOperand(1))->isNullValue())
+ if (Instruction *ICA = dyn_cast<Instruction>(IC->getOperand(0)))
+ if (ICA->getOpcode() == Instruction::And &&
+ isa<ConstantInt>(ICA->getOperand(1)) &&
+ (ICA->getOperand(1) == TrueValC ||
+ ICA->getOperand(1) == FalseValC) &&
+ isOneBitSet(cast<ConstantInt>(ICA->getOperand(1)))) {
+ // Okay, now we know that everything is set up, we just don't
+ // know whether we have a icmp_ne or icmp_eq and whether the
+ // true or false val is the zero.
+ bool ShouldNotVal = !TrueValC->isZero();
+ ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE;
+ Value *V = ICA;
+ if (ShouldNotVal)
+ V = InsertNewInstBefore(BinaryOperator::Create(
+ Instruction::Xor, V, ICA->getOperand(1)), SI);
+ return ReplaceInstUsesWith(SI, V);
+ }
+ }
+ }
+
+ // See if we are selecting two values based on a comparison of the two values.
+ if (FCmpInst *FCI = dyn_cast<FCmpInst>(CondVal)) {
+ if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) {
+ // Transform (X == Y) ? X : Y -> Y
+ if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
+ // This is not safe in general for floating point:
+ // consider X== -0, Y== +0.
+ // It becomes safe if either operand is a nonzero constant.
+ ConstantFP *CFPt, *CFPf;
+ if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+ !CFPt->getValueAPF().isZero()) ||
+ ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+ !CFPf->getValueAPF().isZero()))
+ return ReplaceInstUsesWith(SI, FalseVal);
+ }
+ // Transform (X != Y) ? X : Y -> X
+ if (FCI->getPredicate() == FCmpInst::FCMP_ONE)
+ return ReplaceInstUsesWith(SI, TrueVal);
+ // NOTE: if we wanted to, this is where to detect MIN/MAX
+
+ } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){
+ // Transform (X == Y) ? Y : X -> X
+ if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
+ // This is not safe in general for floating point:
+ // consider X== -0, Y== +0.
+ // It becomes safe if either operand is a nonzero constant.
+ ConstantFP *CFPt, *CFPf;
+ if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+ !CFPt->getValueAPF().isZero()) ||
+ ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+ !CFPf->getValueAPF().isZero()))
+ return ReplaceInstUsesWith(SI, FalseVal);
+ }
+ // Transform (X != Y) ? Y : X -> Y
+ if (FCI->getPredicate() == FCmpInst::FCMP_ONE)
+ return ReplaceInstUsesWith(SI, TrueVal);
+ // NOTE: if we wanted to, this is where to detect MIN/MAX
+ }
+ // NOTE: if we wanted to, this is where to detect ABS
+ }
+
+ // See if we are selecting two values based on a comparison of the two values.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(CondVal))
+ if (Instruction *Result = visitSelectInstWithICmp(SI, ICI))
+ return Result;
+
+ if (Instruction *TI = dyn_cast<Instruction>(TrueVal))
+ if (Instruction *FI = dyn_cast<Instruction>(FalseVal))
+ if (TI->hasOneUse() && FI->hasOneUse()) {
+ Instruction *AddOp = 0, *SubOp = 0;
+
+ // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
+ if (TI->getOpcode() == FI->getOpcode())
+ if (Instruction *IV = FoldSelectOpOp(SI, TI, FI))
+ return IV;
+
+ // Turn select C, (X+Y), (X-Y) --> (X+(select C, Y, (-Y))). This is
+ // even legal for FP.
+ if (TI->getOpcode() == Instruction::Sub &&
+ FI->getOpcode() == Instruction::Add) {
+ AddOp = FI; SubOp = TI;
+ } else if (FI->getOpcode() == Instruction::Sub &&
+ TI->getOpcode() == Instruction::Add) {
+ AddOp = TI; SubOp = FI;
+ }
+
+ if (AddOp) {
+ Value *OtherAddOp = 0;
+ if (SubOp->getOperand(0) == AddOp->getOperand(0)) {
+ OtherAddOp = AddOp->getOperand(1);
+ } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) {
+ OtherAddOp = AddOp->getOperand(0);
+ }
+
+ if (OtherAddOp) {
+ // So at this point we know we have (Y -> OtherAddOp):
+ // select C, (add X, Y), (sub X, Z)
+ Value *NegVal; // Compute -Z
+ if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) {
+ NegVal = ConstantExpr::getNeg(C);
+ } else {
+ NegVal = InsertNewInstBefore(
+ BinaryOperator::CreateNeg(SubOp->getOperand(1), "tmp"), SI);
+ }
+
+ Value *NewTrueOp = OtherAddOp;
+ Value *NewFalseOp = NegVal;
+ if (AddOp != TI)
+ std::swap(NewTrueOp, NewFalseOp);
+ Instruction *NewSel =
+ SelectInst::Create(CondVal, NewTrueOp,
+ NewFalseOp, SI.getName() + ".p");
+
+ NewSel = InsertNewInstBefore(NewSel, SI);
+ return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);
+ }
+ }
+ }
+
+ // See if we can fold the select into one of our operands.
+ if (SI.getType()->isInteger()) {
+ Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal);
+ if (FoldI)
+ return FoldI;
+ }
+
+ if (BinaryOperator::isNot(CondVal)) {
+ SI.setOperand(0, BinaryOperator::getNotArgument(CondVal));
+ SI.setOperand(1, FalseVal);
+ SI.setOperand(2, TrueVal);
+ return &SI;
+ }
+
+ return 0;
+}
+
+/// EnforceKnownAlignment - If the specified pointer points to an object that
+/// we control, modify the object's alignment to PrefAlign. This isn't
+/// often possible though. If alignment is important, a more reliable approach
+/// is to simply align all global variables and allocation instructions to
+/// their preferred alignment from the beginning.
+///
+static unsigned EnforceKnownAlignment(Value *V,
+ unsigned Align, unsigned PrefAlign) {
+
+ User *U = dyn_cast<User>(V);
+ if (!U) return Align;
+
+ switch (getOpcode(U)) {
+ default: break;
+ case Instruction::BitCast:
+ return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
+ case Instruction::GetElementPtr: {
+ // If all indexes are zero, it is just the alignment of the base pointer.
+ bool AllZeroOperands = true;
+ for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i)
+ if (!isa<Constant>(*i) ||
+ !cast<Constant>(*i)->isNullValue()) {
+ AllZeroOperands = false;
+ break;
+ }
+
+ if (AllZeroOperands) {
+ // Treat this like a bitcast.
+ return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
+ }
+ break;
+ }
+ }
+
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ // If there is a large requested alignment and we can, bump up the alignment
+ // of the global.
+ if (!GV->isDeclaration()) {
+ if (GV->getAlignment() >= PrefAlign)
+ Align = GV->getAlignment();
+ else {
+ GV->setAlignment(PrefAlign);
+ Align = PrefAlign;
+ }
+ }
+ } else if (AllocationInst *AI = dyn_cast<AllocationInst>(V)) {
+ // If there is a requested alignment and if this is an alloca, round up. We
+ // don't do this for malloc, because some systems can't respect the request.
+ if (isa<AllocaInst>(AI)) {
+ if (AI->getAlignment() >= PrefAlign)
+ Align = AI->getAlignment();
+ else {
+ AI->setAlignment(PrefAlign);
+ Align = PrefAlign;
+ }
+ }
+ }
+
+ return Align;
+}
+
+/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that
+/// we can determine, return it, otherwise return 0. If PrefAlign is specified,
+/// and it is more than the alignment of the ultimate object, see if we can
+/// increase the alignment of the ultimate object, making this check succeed.
+unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V,
+ unsigned PrefAlign) {
+ unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) :
+ sizeof(PrefAlign) * CHAR_BIT;
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(V, Mask, KnownZero, KnownOne);
+ unsigned TrailZ = KnownZero.countTrailingOnes();
+ unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
+
+ if (PrefAlign > Align)
+ Align = EnforceKnownAlignment(V, Align, PrefAlign);
+
+ // We don't need to make any adjustment.
+ return Align;
+}
+
+Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
+ unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1));
+ unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2));
+ unsigned MinAlign = std::min(DstAlign, SrcAlign);
+ unsigned CopyAlign = MI->getAlignment();
+
+ if (CopyAlign < MinAlign) {
+ MI->setAlignment(MinAlign);
+ return MI;
+ }
+
+ // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
+ // load/store.
+ ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getOperand(3));
+ if (MemOpLength == 0) return 0;
+
+ // Source and destination pointer types are always "i8*" for intrinsic. See
+ // if the size is something we can handle with a single primitive load/store.
+ // A single load+store correctly handles overlapping memory in the memmove
+ // case.
+ unsigned Size = MemOpLength->getZExtValue();
+ if (Size == 0) return MI; // Delete this mem transfer.
+
+ if (Size > 8 || (Size&(Size-1)))
+ return 0; // If not 1/2/4/8 bytes, exit.
+
+ // Use an integer load+store unless we can find something better.
+ Type *NewPtrTy = PointerType::getUnqual(IntegerType::get(Size<<3));
+
+ // Memcpy forces the use of i8* for the source and destination. That means
+ // that if you're using memcpy to move one double around, you'll get a cast
+ // from double* to i8*. We'd much rather use a double load+store rather than
+ // an i64 load+store, here because this improves the odds that the source or
+ // dest address will be promotable. See if we can find a better type than the
+ // integer datatype.
+ if (Value *Op = getBitCastOperand(MI->getOperand(1))) {
+ const Type *SrcETy = cast<PointerType>(Op->getType())->getElementType();
+ if (SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
+ // The SrcETy might be something like {{{double}}} or [1 x double]. Rip
+ // down through these levels if so.
+ while (!SrcETy->isSingleValueType()) {
+ if (const StructType *STy = dyn_cast<StructType>(SrcETy)) {
+ if (STy->getNumElements() == 1)
+ SrcETy = STy->getElementType(0);
+ else
+ break;
+ } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) {
+ if (ATy->getNumElements() == 1)
+ SrcETy = ATy->getElementType();
+ else
+ break;
+ } else
+ break;
+ }
+
+ if (SrcETy->isSingleValueType())
+ NewPtrTy = PointerType::getUnqual(SrcETy);
+ }
+ }
+
+
+ // If the memcpy/memmove provides better alignment info than we can
+ // infer, use it.
+ SrcAlign = std::max(SrcAlign, CopyAlign);
+ DstAlign = std::max(DstAlign, CopyAlign);
+
+ Value *Src = InsertBitCastBefore(MI->getOperand(2), NewPtrTy, *MI);
+ Value *Dest = InsertBitCastBefore(MI->getOperand(1), NewPtrTy, *MI);
+ Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign);
+ InsertNewInstBefore(L, *MI);
+ InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI);
+
+ // Set the size of the copy to 0, it will be deleted on the next iteration.
+ MI->setOperand(3, Constant::getNullValue(MemOpLength->getType()));
+ return MI;
+}
+
+Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
+ unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest());
+ if (MI->getAlignment() < Alignment) {
+ MI->setAlignment(Alignment);
+ return MI;
+ }
+
+ // Extract the length and alignment and fill if they are constant.
+ ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
+ ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
+ if (!LenC || !FillC || FillC->getType() != Type::Int8Ty)
+ return 0;
+ uint64_t Len = LenC->getZExtValue();
+ Alignment = MI->getAlignment();
+
+ // If the length is zero, this is a no-op
+ if (Len == 0) return MI; // memset(d,c,0,a) -> noop
+
+ // memset(s,c,n) -> store s, c (for n=1,2,4,8)
+ if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
+ const Type *ITy = IntegerType::get(Len*8); // n=1 -> i8.
+
+ Value *Dest = MI->getDest();
+ Dest = InsertBitCastBefore(Dest, PointerType::getUnqual(ITy), *MI);
+
+ // Alignment 0 is identity for alignment 1 for memset, but not store.
+ if (Alignment == 0) Alignment = 1;
+
+ // Extract the fill value and store.
+ uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
+ InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill), Dest, false,
+ Alignment), *MI);
+
+ // Set the size of the copy to 0, it will be deleted on the next iteration.
+ MI->setLength(Constant::getNullValue(LenC->getType()));
+ return MI;
+ }
+
+ return 0;
+}
+
+
+/// visitCallInst - CallInst simplification. This mostly only handles folding
+/// of intrinsic instructions. For normal calls, it allows visitCallSite to do
+/// the heavy lifting.
+///
+Instruction *InstCombiner::visitCallInst(CallInst &CI) {
+ // If the caller function is nounwind, mark the call as nounwind, even if the
+ // callee isn't.
+ if (CI.getParent()->getParent()->doesNotThrow() &&
+ !CI.doesNotThrow()) {
+ CI.setDoesNotThrow();
+ return &CI;
+ }
+
+
+
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
+ if (!II) return visitCallSite(&CI);
+
+ // Intrinsics cannot occur in an invoke, so handle them here instead of in
+ // visitCallSite.
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
+ bool Changed = false;
+
+ // memmove/cpy/set of zero bytes is a noop.
+ if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
+ if (NumBytes->isNullValue()) return EraseInstFromFunction(CI);
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
+ if (CI->getZExtValue() == 1) {
+ // Replace the instruction with just byte operations. We would
+ // transform other cases to loads/stores, but we don't know if
+ // alignment is sufficient.
+ }
+ }
+
+ // If we have a memmove and the source operation is a constant global,
+ // then the source and dest pointers can't alias, so we can change this
+ // into a call to memcpy.
+ if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
+ if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
+ if (GVSrc->isConstant()) {
+ Module *M = CI.getParent()->getParent()->getParent();
+ Intrinsic::ID MemCpyID = Intrinsic::memcpy;
+ const Type *Tys[1];
+ Tys[0] = CI.getOperand(3)->getType();
+ CI.setOperand(0,
+ Intrinsic::getDeclaration(M, MemCpyID, Tys, 1));
+ Changed = true;
+ }
+
+ // memmove(x,x,size) -> noop.
+ if (MMI->getSource() == MMI->getDest())
+ return EraseInstFromFunction(CI);
+ }
+
+ // If we can determine a pointer alignment that is bigger than currently
+ // set, update the alignment.
+ if (isa<MemTransferInst>(MI)) {
+ if (Instruction *I = SimplifyMemTransfer(MI))
+ return I;
+ } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
+ if (Instruction *I = SimplifyMemSet(MSI))
+ return I;
+ }
+
+ if (Changed) return II;
+ }
+
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::bswap:
+ // bswap(bswap(x)) -> x
+ if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1)))
+ if (Operand->getIntrinsicID() == Intrinsic::bswap)
+ return ReplaceInstUsesWith(CI, Operand->getOperand(1));
+ break;
+ case Intrinsic::ppc_altivec_lvx:
+ case Intrinsic::ppc_altivec_lvxl:
+ case Intrinsic::x86_sse_loadu_ps:
+ case Intrinsic::x86_sse2_loadu_pd:
+ case Intrinsic::x86_sse2_loadu_dq:
+ // Turn PPC lvx -> load if the pointer is known aligned.
+ // Turn X86 loadups -> load if the pointer is known aligned.
+ if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
+ Value *Ptr = InsertBitCastBefore(II->getOperand(1),
+ PointerType::getUnqual(II->getType()),
+ CI);
+ return new LoadInst(Ptr);
+ }
+ break;
+ case Intrinsic::ppc_altivec_stvx:
+ case Intrinsic::ppc_altivec_stvxl:
+ // Turn stvx -> store if the pointer is known aligned.
+ if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) {
+ const Type *OpPtrTy =
+ PointerType::getUnqual(II->getOperand(1)->getType());
+ Value *Ptr = InsertBitCastBefore(II->getOperand(2), OpPtrTy, CI);
+ return new StoreInst(II->getOperand(1), Ptr);
+ }
+ break;
+ case Intrinsic::x86_sse_storeu_ps:
+ case Intrinsic::x86_sse2_storeu_pd:
+ case Intrinsic::x86_sse2_storeu_dq:
+ // Turn X86 storeu -> store if the pointer is known aligned.
+ if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
+ const Type *OpPtrTy =
+ PointerType::getUnqual(II->getOperand(2)->getType());
+ Value *Ptr = InsertBitCastBefore(II->getOperand(1), OpPtrTy, CI);
+ return new StoreInst(II->getOperand(2), Ptr);
+ }
+ break;
+
+ case Intrinsic::x86_sse_cvttss2si: {
+ // These intrinsics only demands the 0th element of its input vector. If
+ // we can simplify the input based on that, do so now.
+ unsigned VWidth =
+ cast<VectorType>(II->getOperand(1)->getType())->getNumElements();
+ APInt DemandedElts(VWidth, 1);
+ APInt UndefElts(VWidth, 0);
+ if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts,
+ UndefElts)) {
+ II->setOperand(1, V);
+ return II;
+ }
+ break;
+ }
+
+ case Intrinsic::ppc_altivec_vperm:
+ // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
+ if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) {
+ assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
+
+ // Check that all of the elements are integer constants or undefs.
+ bool AllEltsOk = true;
+ for (unsigned i = 0; i != 16; ++i) {
+ if (!isa<ConstantInt>(Mask->getOperand(i)) &&
+ !isa<UndefValue>(Mask->getOperand(i))) {
+ AllEltsOk = false;
+ break;
+ }
+ }
+
+ if (AllEltsOk) {
+ // Cast the input vectors to byte vectors.
+ Value *Op0 =InsertBitCastBefore(II->getOperand(1),Mask->getType(),CI);
+ Value *Op1 =InsertBitCastBefore(II->getOperand(2),Mask->getType(),CI);
+ Value *Result = UndefValue::get(Op0->getType());
+
+ // Only extract each element once.
+ Value *ExtractedElts[32];
+ memset(ExtractedElts, 0, sizeof(ExtractedElts));
+
+ for (unsigned i = 0; i != 16; ++i) {
+ if (isa<UndefValue>(Mask->getOperand(i)))
+ continue;
+ unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
+ Idx &= 31; // Match the hardware behavior.
+
+ if (ExtractedElts[Idx] == 0) {
+ Instruction *Elt =
+ new ExtractElementInst(Idx < 16 ? Op0 : Op1, Idx&15, "tmp");
+ InsertNewInstBefore(Elt, CI);
+ ExtractedElts[Idx] = Elt;
+ }
+
+ // Insert this value into the result vector.
+ Result = InsertElementInst::Create(Result, ExtractedElts[Idx],
+ i, "tmp");
+ InsertNewInstBefore(cast<Instruction>(Result), CI);
+ }
+ return CastInst::Create(Instruction::BitCast, Result, CI.getType());
+ }
+ }
+ break;
+
+ case Intrinsic::stackrestore: {
+ // If the save is right next to the restore, remove the restore. This can
+ // happen when variable allocas are DCE'd.
+ if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) {
+ if (SS->getIntrinsicID() == Intrinsic::stacksave) {
+ BasicBlock::iterator BI = SS;
+ if (&*++BI == II)
+ return EraseInstFromFunction(CI);
+ }
+ }
+
+ // Scan down this block to see if there is another stack restore in the
+ // same block without an intervening call/alloca.
+ BasicBlock::iterator BI = II;
+ TerminatorInst *TI = II->getParent()->getTerminator();
+ bool CannotRemove = false;
+ for (++BI; &*BI != TI; ++BI) {
+ if (isa<AllocaInst>(BI)) {
+ CannotRemove = true;
+ break;
+ }
+ if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
+ // If there is a stackrestore below this one, remove this one.
+ if (II->getIntrinsicID() == Intrinsic::stackrestore)
+ return EraseInstFromFunction(CI);
+ // Otherwise, ignore the intrinsic.
+ } else {
+ // If we found a non-intrinsic call, we can't remove the stack
+ // restore.
+ CannotRemove = true;
+ break;
+ }
+ }
+ }
+
+ // If the stack restore is in a return/unwind block and if there are no
+ // allocas or calls between the restore and the return, nuke the restore.
+ if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI)))
+ return EraseInstFromFunction(CI);
+ break;
+ }
+ }
+
+ return visitCallSite(II);
+}
+
+// InvokeInst simplification
+//
+Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
+ return visitCallSite(&II);
+}
+
+/// isSafeToEliminateVarargsCast - If this cast does not affect the value
+/// passed through the varargs area, we can eliminate the use of the cast.
+static bool isSafeToEliminateVarargsCast(const CallSite CS,
+ const CastInst * const CI,
+ const TargetData * const TD,
+ const int ix) {
+ if (!CI->isLosslessCast())
+ return false;
+
+ // The size of ByVal arguments is derived from the type, so we
+ // can't change to a type with a different size. If the size were
+ // passed explicitly we could avoid this check.
+ if (!CS.paramHasAttr(ix, Attribute::ByVal))
+ return true;
+
+ const Type* SrcTy =
+ cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
+ const Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
+ if (!SrcTy->isSized() || !DstTy->isSized())
+ return false;
+ if (TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))
+ return false;
+ return true;
+}
+
+// visitCallSite - Improvements for call and invoke instructions.
+//
+Instruction *InstCombiner::visitCallSite(CallSite CS) {
+ bool Changed = false;
+
+ // If the callee is a constexpr cast of a function, attempt to move the cast
+ // to the arguments of the call/invoke.
+ if (transformConstExprCastCall(CS)) return 0;
+
+ Value *Callee = CS.getCalledValue();
+
+ if (Function *CalleeF = dyn_cast<Function>(Callee))
+ if (CalleeF->getCallingConv() != CS.getCallingConv()) {
+ Instruction *OldCall = CS.getInstruction();
+ // If the call and callee calling conventions don't match, this call must
+ // be unreachable, as the call is undefined.
+ new StoreInst(ConstantInt::getTrue(),
+ UndefValue::get(PointerType::getUnqual(Type::Int1Ty)),
+ OldCall);
+ if (!OldCall->use_empty())
+ OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType()));
+ if (isa<CallInst>(OldCall)) // Not worth removing an invoke here.
+ return EraseInstFromFunction(*OldCall);
+ return 0;
+ }
+
+ if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
+ // This instruction is not reachable, just remove it. We insert a store to
+ // undef so that we know that this code is not reachable, despite the fact
+ // that we can't modify the CFG here.
+ new StoreInst(ConstantInt::getTrue(),
+ UndefValue::get(PointerType::getUnqual(Type::Int1Ty)),
+ CS.getInstruction());
+
+ if (!CS.getInstruction()->use_empty())
+ CS.getInstruction()->
+ replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType()));
+
+ if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
+ // Don't break the CFG, insert a dummy cond branch.
+ BranchInst::Create(II->getNormalDest(), II->getUnwindDest(),
+ ConstantInt::getTrue(), II);
+ }
+ return EraseInstFromFunction(*CS.getInstruction());
+ }
+
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee))
+ if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0)))
+ if (In->getIntrinsicID() == Intrinsic::init_trampoline)
+ return transformCallThroughTrampoline(CS);
+
+ const PointerType *PTy = cast<PointerType>(Callee->getType());
+ const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ if (FTy->isVarArg()) {
+ int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1);
+ // See if we can optimize any arguments passed through the varargs area of
+ // the call.
+ for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
+ E = CS.arg_end(); I != E; ++I, ++ix) {
+ CastInst *CI = dyn_cast<CastInst>(*I);
+ if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) {
+ *I = CI->getOperand(0);
+ Changed = true;
+ }
+ }
+ }
+
+ if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
+ // Inline asm calls cannot throw - mark them 'nounwind'.
+ CS.setDoesNotThrow();
+ Changed = true;
+ }
+
+ return Changed ? CS.getInstruction() : 0;
+}
+
+// transformConstExprCastCall - If the callee is a constexpr cast of a function,
+// attempt to move the cast to the arguments of the call/invoke.
+//
+bool InstCombiner::transformConstExprCastCall(CallSite CS) {
+ if (!isa<ConstantExpr>(CS.getCalledValue())) return false;
+ ConstantExpr *CE = cast<ConstantExpr>(CS.getCalledValue());
+ if (CE->getOpcode() != Instruction::BitCast ||
+ !isa<Function>(CE->getOperand(0)))
+ return false;
+ Function *Callee = cast<Function>(CE->getOperand(0));
+ Instruction *Caller = CS.getInstruction();
+ const AttrListPtr &CallerPAL = CS.getAttributes();
+
+ // Okay, this is a cast from a function to a different type. Unless doing so
+ // would cause a type conversion of one of our arguments, change this call to
+ // be a direct call with arguments casted to the appropriate types.
+ //
+ const FunctionType *FT = Callee->getFunctionType();
+ const Type *OldRetTy = Caller->getType();
+ const Type *NewRetTy = FT->getReturnType();
+
+ if (isa<StructType>(NewRetTy))
+ return false; // TODO: Handle multiple return values.
+
+ // Check to see if we are changing the return type...
+ if (OldRetTy != NewRetTy) {
+ if (Callee->isDeclaration() &&
+ // Conversion is ok if changing from one pointer type to another or from
+ // a pointer to an integer of the same size.
+ !((isa<PointerType>(OldRetTy) || OldRetTy == TD->getIntPtrType()) &&
+ (isa<PointerType>(NewRetTy) || NewRetTy == TD->getIntPtrType())))
+ return false; // Cannot transform this return value.
+
+ if (!Caller->use_empty() &&
+ // void -> non-void is handled specially
+ NewRetTy != Type::VoidTy && !CastInst::isCastable(NewRetTy, OldRetTy))
+ return false; // Cannot transform this return value.
+
+ if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
+ Attributes RAttrs = CallerPAL.getRetAttributes();
+ if (RAttrs & Attribute::typeIncompatible(NewRetTy))
+ return false; // Attribute not compatible with transformed value.
+ }
+
+ // If the callsite is an invoke instruction, and the return value is used by
+ // a PHI node in a successor, we cannot change the return type of the call
+ // because there is no place to put the cast instruction (without breaking
+ // the critical edge). Bail out in this case.
+ if (!Caller->use_empty())
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
+ for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
+ UI != E; ++UI)
+ if (PHINode *PN = dyn_cast<PHINode>(*UI))
+ if (PN->getParent() == II->getNormalDest() ||
+ PN->getParent() == II->getUnwindDest())
+ return false;
+ }
+
+ unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
+ unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
+
+ CallSite::arg_iterator AI = CS.arg_begin();
+ for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
+ const Type *ParamTy = FT->getParamType(i);
+ const Type *ActTy = (*AI)->getType();
+
+ if (!CastInst::isCastable(ActTy, ParamTy))
+ return false; // Cannot transform this parameter value.
+
+ if (CallerPAL.getParamAttributes(i + 1)
+ & Attribute::typeIncompatible(ParamTy))
+ return false; // Attribute not compatible with transformed value.
+
+ // Converting from one pointer type to another or between a pointer and an
+ // integer of the same size is safe even if we do not have a body.
+ bool isConvertible = ActTy == ParamTy ||
+ ((isa<PointerType>(ParamTy) || ParamTy == TD->getIntPtrType()) &&
+ (isa<PointerType>(ActTy) || ActTy == TD->getIntPtrType()));
+ if (Callee->isDeclaration() && !isConvertible) return false;
+ }
+
+ if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() &&
+ Callee->isDeclaration())
+ return false; // Do not delete arguments unless we have a function body.
+
+ if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
+ !CallerPAL.isEmpty())
+ // In this case we have more arguments than the new function type, but we
+ // won't be dropping them. Check that these extra arguments have attributes
+ // that are compatible with being a vararg call argument.
+ for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
+ if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams())
+ break;
+ Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs;
+ if (PAttrs & Attribute::VarArgsIncompatible)
+ return false;
+ }
+
+ // Okay, we decided that this is a safe thing to do: go ahead and start
+ // inserting cast instructions as necessary...
+ std::vector<Value*> Args;
+ Args.reserve(NumActualArgs);
+ SmallVector<AttributeWithIndex, 8> attrVec;
+ attrVec.reserve(NumCommonArgs);
+
+ // Get any return attributes.
+ Attributes RAttrs = CallerPAL.getRetAttributes();
+
+ // If the return value is not being used, the type may not be compatible
+ // with the existing attributes. Wipe out any problematic attributes.
+ RAttrs &= ~Attribute::typeIncompatible(NewRetTy);
+
+ // Add the new return attributes.
+ if (RAttrs)
+ attrVec.push_back(AttributeWithIndex::get(0, RAttrs));
+
+ AI = CS.arg_begin();
+ for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
+ const Type *ParamTy = FT->getParamType(i);
+ if ((*AI)->getType() == ParamTy) {
+ Args.push_back(*AI);
+ } else {
+ Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
+ false, ParamTy, false);
+ CastInst *NewCast = CastInst::Create(opcode, *AI, ParamTy, "tmp");
+ Args.push_back(InsertNewInstBefore(NewCast, *Caller));
+ }
+
+ // Add any parameter attributes.
+ if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
+ attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
+ }
+
+ // If the function takes more arguments than the call was taking, add them
+ // now...
+ for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
+ Args.push_back(Constant::getNullValue(FT->getParamType(i)));
+
+ // If we are removing arguments to the function, emit an obnoxious warning...
+ if (FT->getNumParams() < NumActualArgs) {
+ if (!FT->isVarArg()) {
+ cerr << "WARNING: While resolving call to function '"
+ << Callee->getName() << "' arguments were dropped!\n";
+ } else {
+ // Add all of the arguments in their promoted form to the arg list...
+ for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
+ const Type *PTy = getPromotedType((*AI)->getType());
+ if (PTy != (*AI)->getType()) {
+ // Must promote to pass through va_arg area!
+ Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, false,
+ PTy, false);
+ Instruction *Cast = CastInst::Create(opcode, *AI, PTy, "tmp");
+ InsertNewInstBefore(Cast, *Caller);
+ Args.push_back(Cast);
+ } else {
+ Args.push_back(*AI);
+ }
+
+ // Add any parameter attributes.
+ if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
+ attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
+ }
+ }
+ }
+
+ if (Attributes FnAttrs = CallerPAL.getFnAttributes())
+ attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+ if (NewRetTy == Type::VoidTy)
+ Caller->setName(""); // Void type should not have a name.
+
+ const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(),attrVec.end());
+
+ Instruction *NC;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+ NC = InvokeInst::Create(Callee, II->getNormalDest(), II->getUnwindDest(),
+ Args.begin(), Args.end(),
+ Caller->getName(), Caller);
+ cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
+ cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
+ } else {
+ NC = CallInst::Create(Callee, Args.begin(), Args.end(),
+ Caller->getName(), Caller);
+ CallInst *CI = cast<CallInst>(Caller);
+ if (CI->isTailCall())
+ cast<CallInst>(NC)->setTailCall();
+ cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
+ cast<CallInst>(NC)->setAttributes(NewCallerPAL);
+ }
+
+ // Insert a cast of the return type as necessary.
+ Value *NV = NC;
+ if (OldRetTy != NV->getType() && !Caller->use_empty()) {
+ if (NV->getType() != Type::VoidTy) {
+ Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false,
+ OldRetTy, false);
+ NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp");
+
+ // If this is an invoke instruction, we should insert it after the first
+ // non-phi, instruction in the normal successor block.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+ BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI();
+ InsertNewInstBefore(NC, *I);
+ } else {
+ // Otherwise, it's a call, just insert cast right after the call instr
+ InsertNewInstBefore(NC, *Caller);
+ }
+ AddUsersToWorkList(*Caller);
+ } else {
+ NV = UndefValue::get(Caller->getType());
+ }
+ }
+
+ if (Caller->getType() != Type::VoidTy && !Caller->use_empty())
+ Caller->replaceAllUsesWith(NV);
+ Caller->eraseFromParent();
+ RemoveFromWorkList(Caller);
+ return true;
+}
+
+// transformCallThroughTrampoline - Turn a call to a function created by the
+// init_trampoline intrinsic into a direct call to the underlying function.
+//
+Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
+ Value *Callee = CS.getCalledValue();
+ const PointerType *PTy = cast<PointerType>(Callee->getType());
+ const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ const AttrListPtr &Attrs = CS.getAttributes();
+
+ // If the call already has the 'nest' attribute somewhere then give up -
+ // otherwise 'nest' would occur twice after splicing in the chain.
+ if (Attrs.hasAttrSomewhere(Attribute::Nest))
+ return 0;
+
+ IntrinsicInst *Tramp =
+ cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
+
+ Function *NestF = cast<Function>(Tramp->getOperand(2)->stripPointerCasts());
+ const PointerType *NestFPTy = cast<PointerType>(NestF->getType());
+ const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
+
+ const AttrListPtr &NestAttrs = NestF->getAttributes();
+ if (!NestAttrs.isEmpty()) {
+ unsigned NestIdx = 1;
+ const Type *NestTy = 0;
+ Attributes NestAttr = Attribute::None;
+
+ // Look for a parameter marked with the 'nest' attribute.
+ for (FunctionType::param_iterator I = NestFTy->param_begin(),
+ E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
+ if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) {
+ // Record the parameter type and any other attributes.
+ NestTy = *I;
+ NestAttr = NestAttrs.getParamAttributes(NestIdx);
+ break;
+ }
+
+ if (NestTy) {
+ Instruction *Caller = CS.getInstruction();
+ std::vector<Value*> NewArgs;
+ NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
+
+ SmallVector<AttributeWithIndex, 8> NewAttrs;
+ NewAttrs.reserve(Attrs.getNumSlots() + 1);
+
+ // Insert the nest argument into the call argument list, which may
+ // mean appending it. Likewise for attributes.
+
+ // Add any result attributes.
+ if (Attributes Attr = Attrs.getRetAttributes())
+ NewAttrs.push_back(AttributeWithIndex::get(0, Attr));
+
+ {
+ unsigned Idx = 1;
+ CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+ do {
+ if (Idx == NestIdx) {
+ // Add the chain argument and attributes.
+ Value *NestVal = Tramp->getOperand(3);
+ if (NestVal->getType() != NestTy)
+ NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller);
+ NewArgs.push_back(NestVal);
+ NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr));
+ }
+
+ if (I == E)
+ break;
+
+ // Add the original argument and attributes.
+ NewArgs.push_back(*I);
+ if (Attributes Attr = Attrs.getParamAttributes(Idx))
+ NewAttrs.push_back
+ (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr));
+
+ ++Idx, ++I;
+ } while (1);
+ }
+
+ // Add any function attributes.
+ if (Attributes Attr = Attrs.getFnAttributes())
+ NewAttrs.push_back(AttributeWithIndex::get(~0, Attr));
+
+ // The trampoline may have been bitcast to a bogus type (FTy).
+ // Handle this by synthesizing a new function type, equal to FTy
+ // with the chain parameter inserted.
+
+ std::vector<const Type*> NewTypes;
+ NewTypes.reserve(FTy->getNumParams()+1);
+
+ // Insert the chain's type into the list of parameter types, which may
+ // mean appending it.
+ {
+ unsigned Idx = 1;
+ FunctionType::param_iterator I = FTy->param_begin(),
+ E = FTy->param_end();
+
+ do {
+ if (Idx == NestIdx)
+ // Add the chain's type.
+ NewTypes.push_back(NestTy);
+
+ if (I == E)
+ break;
+
+ // Add the original type.
+ NewTypes.push_back(*I);
+
+ ++Idx, ++I;
+ } while (1);
+ }
+
+ // Replace the trampoline call with a direct call. Let the generic
+ // code sort out any function type mismatches.
+ FunctionType *NewFTy =
+ FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg());
+ Constant *NewCallee = NestF->getType() == PointerType::getUnqual(NewFTy) ?
+ NestF : ConstantExpr::getBitCast(NestF, PointerType::getUnqual(NewFTy));
+ const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(),NewAttrs.end());
+
+ Instruction *NewCaller;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+ NewCaller = InvokeInst::Create(NewCallee,
+ II->getNormalDest(), II->getUnwindDest(),
+ NewArgs.begin(), NewArgs.end(),
+ Caller->getName(), Caller);
+ cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
+ cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
+ } else {
+ NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(),
+ Caller->getName(), Caller);
+ if (cast<CallInst>(Caller)->isTailCall())
+ cast<CallInst>(NewCaller)->setTailCall();
+ cast<CallInst>(NewCaller)->
+ setCallingConv(cast<CallInst>(Caller)->getCallingConv());
+ cast<CallInst>(NewCaller)->setAttributes(NewPAL);
+ }
+ if (Caller->getType() != Type::VoidTy && !Caller->use_empty())
+ Caller->replaceAllUsesWith(NewCaller);
+ Caller->eraseFromParent();
+ RemoveFromWorkList(Caller);
+ return 0;
+ }
+ }
+
+ // Replace the trampoline call with a direct call. Since there is no 'nest'
+ // parameter, there is no need to adjust the argument list. Let the generic
+ // code sort out any function type mismatches.
+ Constant *NewCallee =
+ NestF->getType() == PTy ? NestF : ConstantExpr::getBitCast(NestF, PTy);
+ CS.setCalledFunction(NewCallee);
+ return CS.getInstruction();
+}
+
+/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(c,d)]
+/// and if a/b/c/d and the add's all have a single use, turn this into two phi's
+/// and a single binop.
+Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
+ Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
+ assert(isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst));
+ unsigned Opc = FirstInst->getOpcode();
+ Value *LHSVal = FirstInst->getOperand(0);
+ Value *RHSVal = FirstInst->getOperand(1);
+
+ const Type *LHSType = LHSVal->getType();
+ const Type *RHSType = RHSVal->getType();
+
+ // Scan to see if all operands are the same opcode, all have one use, and all
+ // kill their operands (i.e. the operands have one use).
+ for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
+ Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
+ if (!I || I->getOpcode() != Opc || !I->hasOneUse() ||
+ // Verify type of the LHS matches so we don't fold cmp's of different
+ // types or GEP's with different index types.
+ I->getOperand(0)->getType() != LHSType ||
+ I->getOperand(1)->getType() != RHSType)
+ return 0;
+
+ // If they are CmpInst instructions, check their predicates
+ if (Opc == Instruction::ICmp || Opc == Instruction::FCmp)
+ if (cast<CmpInst>(I)->getPredicate() !=
+ cast<CmpInst>(FirstInst)->getPredicate())
+ return 0;
+
+ // Keep track of which operand needs a phi node.
+ if (I->getOperand(0) != LHSVal) LHSVal = 0;
+ if (I->getOperand(1) != RHSVal) RHSVal = 0;
+ }
+
+ // Otherwise, this is safe to transform!
+
+ Value *InLHS = FirstInst->getOperand(0);
+ Value *InRHS = FirstInst->getOperand(1);
+ PHINode *NewLHS = 0, *NewRHS = 0;
+ if (LHSVal == 0) {
+ NewLHS = PHINode::Create(LHSType,
+ FirstInst->getOperand(0)->getName() + ".pn");
+ NewLHS->reserveOperandSpace(PN.getNumOperands()/2);
+ NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0));
+ InsertNewInstBefore(NewLHS, PN);
+ LHSVal = NewLHS;
+ }
+
+ if (RHSVal == 0) {
+ NewRHS = PHINode::Create(RHSType,
+ FirstInst->getOperand(1)->getName() + ".pn");
+ NewRHS->reserveOperandSpace(PN.getNumOperands()/2);
+ NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0));
+ InsertNewInstBefore(NewRHS, PN);
+ RHSVal = NewRHS;
+ }
+
+ // Add all operands to the new PHIs.
+ if (NewLHS || NewRHS) {
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ Instruction *InInst = cast<Instruction>(PN.getIncomingValue(i));
+ if (NewLHS) {
+ Value *NewInLHS = InInst->getOperand(0);
+ NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i));
+ }
+ if (NewRHS) {
+ Value *NewInRHS = InInst->getOperand(1);
+ NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i));
+ }
+ }
+ }
+
+ if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
+ return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal);
+ CmpInst *CIOp = cast<CmpInst>(FirstInst);
+ return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), LHSVal,
+ RHSVal);
+}
+
+Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
+ GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0));
+
+ SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(),
+ FirstInst->op_end());
+ // This is true if all GEP bases are allocas and if all indices into them are
+ // constants.
+ bool AllBasePointersAreAllocas = true;
+
+ // Scan to see if all operands are the same opcode, all have one use, and all
+ // kill their operands (i.e. the operands have one use).
+ for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
+ GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
+ if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() ||
+ GEP->getNumOperands() != FirstInst->getNumOperands())
+ return 0;
+
+ // Keep track of whether or not all GEPs are of alloca pointers.
+ if (AllBasePointersAreAllocas &&
+ (!isa<AllocaInst>(GEP->getOperand(0)) ||
+ !GEP->hasAllConstantIndices()))
+ AllBasePointersAreAllocas = false;
+
+ // Compare the operand lists.
+ for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) {
+ if (FirstInst->getOperand(op) == GEP->getOperand(op))
+ continue;
+
+ // Don't merge two GEPs when two operands differ (introducing phi nodes)
+ // if one of the PHIs has a constant for the index. The index may be
+ // substantially cheaper to compute for the constants, so making it a
+ // variable index could pessimize the path. This also handles the case
+ // for struct indices, which must always be constant.
+ if (isa<ConstantInt>(FirstInst->getOperand(op)) ||
+ isa<ConstantInt>(GEP->getOperand(op)))
+ return 0;
+
+ if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
+ return 0;
+ FixedOperands[op] = 0; // Needs a PHI.
+ }
+ }
+
+ // If all of the base pointers of the PHI'd GEPs are from allocas, don't
+ // bother doing this transformation. At best, this will just save a bit of
+ // offset calculation, but all the predecessors will have to materialize the
+ // stack address into a register anyway. We'd actually rather *clone* the
+ // load up into the predecessors so that we have a load of a gep of an alloca,
+ // which can usually all be folded into the load.
+ if (AllBasePointersAreAllocas)
+ return 0;
+
+ // Otherwise, this is safe to transform. Insert PHI nodes for each operand
+ // that is variable.
+ SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());
+
+ bool HasAnyPHIs = false;
+ for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) {
+ if (FixedOperands[i]) continue; // operand doesn't need a phi.
+ Value *FirstOp = FirstInst->getOperand(i);
+ PHINode *NewPN = PHINode::Create(FirstOp->getType(),
+ FirstOp->getName()+".pn");
+ InsertNewInstBefore(NewPN, PN);
+
+ NewPN->reserveOperandSpace(e);
+ NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));
+ OperandPhis[i] = NewPN;
+ FixedOperands[i] = NewPN;
+ HasAnyPHIs = true;
+ }
+
+
+ // Add all operands to the new PHIs.
+ if (HasAnyPHIs) {
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i));
+ BasicBlock *InBB = PN.getIncomingBlock(i);
+
+ for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op)
+ if (PHINode *OpPhi = OperandPhis[op])
+ OpPhi->addIncoming(InGEP->getOperand(op), InBB);
+ }
+ }
+
+ Value *Base = FixedOperands[0];
+ return GetElementPtrInst::Create(Base, FixedOperands.begin()+1,
+ FixedOperands.end());
+}
+
+
+/// isSafeAndProfitableToSinkLoad - Return true if we know that it is safe to
+/// sink the load out of the block that defines it. This means that it must be
+/// obvious the value of the load is not changed from the point of the load to
+/// the end of the block it is in.
+///
+/// Finally, it is safe, but not profitable, to sink a load targetting a
+/// non-address-taken alloca. Doing so will cause us to not promote the alloca
+/// to a register.
+static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
+ BasicBlock::iterator BBI = L, E = L->getParent()->end();
+
+ for (++BBI; BBI != E; ++BBI)
+ if (BBI->mayWriteToMemory())
+ return false;
+
+ // Check for non-address taken alloca. If not address-taken already, it isn't
+ // profitable to do this xform.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) {
+ bool isAddressTaken = false;
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E; ++UI) {
+ if (isa<LoadInst>(UI)) continue;
+ if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
+ // If storing TO the alloca, then the address isn't taken.
+ if (SI->getOperand(1) == AI) continue;
+ }
+ isAddressTaken = true;
+ break;
+ }
+
+ if (!isAddressTaken && AI->isStaticAlloca())
+ return false;
+ }
+
+ // If this load is a load from a GEP with a constant offset from an alloca,
+ // then we don't want to sink it. In its present form, it will be
+ // load [constant stack offset]. Sinking it will cause us to have to
+ // materialize the stack addresses in each predecessor in a register only to
+ // do a shared load from register in the successor.
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(L->getOperand(0)))
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0)))
+ if (AI->isStaticAlloca() && GEP->hasAllConstantIndices())
+ return false;
+
+ return true;
+}
+
+
+// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
+// operator and they all are only used by the PHI, PHI together their
+// inputs, and do the operation once, to the result of the PHI.
+Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
+ Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
+
+ // Scan the instruction, looking for input operations that can be folded away.
+ // If all input operands to the phi are the same instruction (e.g. a cast from
+ // the same type or "+42") we can pull the operation through the PHI, reducing
+ // code size and simplifying code.
+ Constant *ConstantOp = 0;
+ const Type *CastSrcTy = 0;
+ bool isVolatile = false;
+ if (isa<CastInst>(FirstInst)) {
+ CastSrcTy = FirstInst->getOperand(0)->getType();
+ } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
+ // Can fold binop, compare or shift here if the RHS is a constant,
+ // otherwise call FoldPHIArgBinOpIntoPHI.
+ ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
+ if (ConstantOp == 0)
+ return FoldPHIArgBinOpIntoPHI(PN);
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(FirstInst)) {
+ isVolatile = LI->isVolatile();
+ // We can't sink the load if the loaded value could be modified between the
+ // load and the PHI.
+ if (LI->getParent() != PN.getIncomingBlock(0) ||
+ !isSafeAndProfitableToSinkLoad(LI))
+ return 0;
+
+ // If the PHI is of volatile loads and the load block has multiple
+ // successors, sinking it would remove a load of the volatile value from
+ // the path through the other successor.
+ if (isVolatile &&
+ LI->getParent()->getTerminator()->getNumSuccessors() != 1)
+ return 0;
+
+ } else if (isa<GetElementPtrInst>(FirstInst)) {
+ return FoldPHIArgGEPIntoPHI(PN);
+ } else {
+ return 0; // Cannot fold this operation.
+ }
+
+ // Check to see if all arguments are the same operation.
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ if (!isa<Instruction>(PN.getIncomingValue(i))) return 0;
+ Instruction *I = cast<Instruction>(PN.getIncomingValue(i));
+ if (!I->hasOneUse() || !I->isSameOperationAs(FirstInst))
+ return 0;
+ if (CastSrcTy) {
+ if (I->getOperand(0)->getType() != CastSrcTy)
+ return 0; // Cast operation must match.
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ // We can't sink the load if the loaded value could be modified between
+ // the load and the PHI.
+ if (LI->isVolatile() != isVolatile ||
+ LI->getParent() != PN.getIncomingBlock(i) ||
+ !isSafeAndProfitableToSinkLoad(LI))
+ return 0;
+
+ // If the PHI is of volatile loads and the load block has multiple
+ // successors, sinking it would remove a load of the volatile value from
+ // the path through the other successor.
+ if (isVolatile &&
+ LI->getParent()->getTerminator()->getNumSuccessors() != 1)
+ return 0;
+
+ } else if (I->getOperand(1) != ConstantOp) {
+ return 0;
+ }
+ }
+
+ // Okay, they are all the same operation. Create a new PHI node of the
+ // correct type, and PHI together all of the LHS's of the instructions.
+ PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(),
+ PN.getName()+".in");
+ NewPN->reserveOperandSpace(PN.getNumOperands()/2);
+
+ Value *InVal = FirstInst->getOperand(0);
+ NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
+
+ // Add all operands to the new PHI.
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0);
+ if (NewInVal != InVal)
+ InVal = 0;
+ NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
+ }
+
+ Value *PhiVal;
+ if (InVal) {
+ // The new PHI unions all of the same values together. This is really
+ // common, so we handle it intelligently here for compile-time speed.
+ PhiVal = InVal;
+ delete NewPN;
+ } else {
+ InsertNewInstBefore(NewPN, PN);
+ PhiVal = NewPN;
+ }
+
+ // Insert and return the new operation.
+ if (CastInst* FirstCI = dyn_cast<CastInst>(FirstInst))
+ return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType());
+ if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
+ return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
+ if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst))
+ return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
+ PhiVal, ConstantOp);
+ assert(isa<LoadInst>(FirstInst) && "Unknown operation");
+
+ // If this was a volatile load that we are merging, make sure to loop through
+ // and mark all the input loads as non-volatile. If we don't do this, we will
+ // insert a new volatile load and the old ones will not be deletable.
+ if (isVolatile)
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+ cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
+
+ return new LoadInst(PhiVal, "", isVolatile);
+}
+
+/// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle
+/// that is dead.
+static bool DeadPHICycle(PHINode *PN,
+ SmallPtrSet<PHINode*, 16> &PotentiallyDeadPHIs) {
+ if (PN->use_empty()) return true;
+ if (!PN->hasOneUse()) return false;
+
+ // Remember this node, and if we find the cycle, return.
+ if (!PotentiallyDeadPHIs.insert(PN))
+ return true;
+
+ // Don't scan crazily complex things.
+ if (PotentiallyDeadPHIs.size() == 16)
+ return false;
+
+ if (PHINode *PU = dyn_cast<PHINode>(PN->use_back()))
+ return DeadPHICycle(PU, PotentiallyDeadPHIs);
+
+ return false;
+}
+
+/// PHIsEqualValue - Return true if this phi node is always equal to
+/// NonPhiInVal. This happens with mutually cyclic phi nodes like:
+/// z = some value; x = phi (y, z); y = phi (x, z)
+static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
+ SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) {
+ // See if we already saw this PHI node.
+ if (!ValueEqualPHIs.insert(PN))
+ return true;
+
+ // Don't scan crazily complex things.
+ if (ValueEqualPHIs.size() == 16)
+ return false;
+
+ // Scan the operands to see if they are either phi nodes or are equal to
+ // the value.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *Op = PN->getIncomingValue(i);
+ if (PHINode *OpPN = dyn_cast<PHINode>(Op)) {
+ if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs))
+ return false;
+ } else if (Op != NonPhiInVal)
+ return false;
+ }
+
+ return true;
+}
+
+
+// PHINode simplification
+//
+Instruction *InstCombiner::visitPHINode(PHINode &PN) {
+ // If LCSSA is around, don't mess with Phi nodes
+ if (MustPreserveLCSSA) return 0;
+
+ if (Value *V = PN.hasConstantValue())
+ return ReplaceInstUsesWith(PN, V);
+
+ // If all PHI operands are the same operation, pull them through the PHI,
+ // reducing code size.
+ if (isa<Instruction>(PN.getIncomingValue(0)) &&
+ isa<Instruction>(PN.getIncomingValue(1)) &&
+ cast<Instruction>(PN.getIncomingValue(0))->getOpcode() ==
+ cast<Instruction>(PN.getIncomingValue(1))->getOpcode() &&
+ // FIXME: The hasOneUse check will fail for PHIs that use the value more
+ // than themselves more than once.
+ PN.getIncomingValue(0)->hasOneUse())
+ if (Instruction *Result = FoldPHIArgOpIntoPHI(PN))
+ return Result;
+
+ // If this is a trivial cycle in the PHI node graph, remove it. Basically, if
+ // this PHI only has a single use (a PHI), and if that PHI only has one use (a
+ // PHI)... break the cycle.
+ if (PN.hasOneUse()) {
+ Instruction *PHIUser = cast<Instruction>(PN.use_back());
+ if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) {
+ SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs;
+ PotentiallyDeadPHIs.insert(&PN);
+ if (DeadPHICycle(PU, PotentiallyDeadPHIs))
+ return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
+ }
+
+ // If this phi has a single use, and if that use just computes a value for
+ // the next iteration of a loop, delete the phi. This occurs with unused
+ // induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this
+ // common case here is good because the only other things that catch this
+ // are induction variable analysis (sometimes) and ADCE, which is only run
+ // late.
+ if (PHIUser->hasOneUse() &&
+ (isa<BinaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) &&
+ PHIUser->use_back() == &PN) {
+ return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
+ }
+ }
+
+ // We sometimes end up with phi cycles that non-obviously end up being the
+ // same value, for example:
+ // z = some value; x = phi (y, z); y = phi (x, z)
+ // where the phi nodes don't necessarily need to be in the same block. Do a
+ // quick check to see if the PHI node only contains a single non-phi value, if
+ // so, scan to see if the phi cycle is actually equal to that value.
+ {
+ unsigned InValNo = 0, NumOperandVals = PN.getNumIncomingValues();
+ // Scan for the first non-phi operand.
+ while (InValNo != NumOperandVals &&
+ isa<PHINode>(PN.getIncomingValue(InValNo)))
+ ++InValNo;
+
+ if (InValNo != NumOperandVals) {
+ Value *NonPhiInVal = PN.getOperand(InValNo);
+
+ // Scan the rest of the operands to see if there are any conflicts, if so
+ // there is no need to recursively scan other phis.
+ for (++InValNo; InValNo != NumOperandVals; ++InValNo) {
+ Value *OpVal = PN.getIncomingValue(InValNo);
+ if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))
+ break;
+ }
+
+ // If we scanned over all operands, then we have one unique value plus
+ // phi values. Scan PHI nodes to see if they all merge in each other or
+ // the value.
+ if (InValNo == NumOperandVals) {
+ SmallPtrSet<PHINode*, 16> ValueEqualPHIs;
+ if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs))
+ return ReplaceInstUsesWith(PN, NonPhiInVal);
+ }
+ }
+ }
+ return 0;
+}
+
+static Value *InsertCastToIntPtrTy(Value *V, const Type *DTy,
+ Instruction *InsertPoint,
+ InstCombiner *IC) {
+ unsigned PtrSize = DTy->getPrimitiveSizeInBits();
+ unsigned VTySize = V->getType()->getPrimitiveSizeInBits();
+ // We must cast correctly to the pointer type. Ensure that we
+ // sign extend the integer value if it is smaller as this is
+ // used for address computation.
+ Instruction::CastOps opcode =
+ (VTySize < PtrSize ? Instruction::SExt :
+ (VTySize == PtrSize ? Instruction::BitCast : Instruction::Trunc));
+ return IC->InsertCastBefore(opcode, V, DTy, *InsertPoint);
+}
+
+
+Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
+ Value *PtrOp = GEP.getOperand(0);
+ // Is it 'getelementptr %P, i32 0' or 'getelementptr %P'
+ // If so, eliminate the noop.
+ if (GEP.getNumOperands() == 1)
+ return ReplaceInstUsesWith(GEP, PtrOp);
+
+ if (isa<UndefValue>(GEP.getOperand(0)))
+ return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType()));
+
+ bool HasZeroPointerIndex = false;
+ if (Constant *C = dyn_cast<Constant>(GEP.getOperand(1)))
+ HasZeroPointerIndex = C->isNullValue();
+
+ if (GEP.getNumOperands() == 2 && HasZeroPointerIndex)
+ return ReplaceInstUsesWith(GEP, PtrOp);
+
+ // Eliminate unneeded casts for indices.
+ bool MadeChange = false;
+
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (User::op_iterator i = GEP.op_begin() + 1, e = GEP.op_end();
+ i != e; ++i, ++GTI) {
+ if (isa<SequentialType>(*GTI)) {
+ if (CastInst *CI = dyn_cast<CastInst>(*i)) {
+ if (CI->getOpcode() == Instruction::ZExt ||
+ CI->getOpcode() == Instruction::SExt) {
+ const Type *SrcTy = CI->getOperand(0)->getType();
+ // We can eliminate a cast from i32 to i64 iff the target
+ // is a 32-bit pointer target.
+ if (SrcTy->getPrimitiveSizeInBits() >= TD->getPointerSizeInBits()) {
+ MadeChange = true;
+ *i = CI->getOperand(0);
+ }
+ }
+ }
+ // If we are using a wider index than needed for this platform, shrink it
+ // to what we need. If narrower, sign-extend it to what we need.
+ // If the incoming value needs a cast instruction,
+ // insert it. This explicit cast can make subsequent optimizations more
+ // obvious.
+ Value *Op = *i;
+ if (TD->getTypeSizeInBits(Op->getType()) > TD->getPointerSizeInBits()) {
+ if (Constant *C = dyn_cast<Constant>(Op)) {
+ *i = ConstantExpr::getTrunc(C, TD->getIntPtrType());
+ MadeChange = true;
+ } else {
+ Op = InsertCastBefore(Instruction::Trunc, Op, TD->getIntPtrType(),
+ GEP);
+ *i = Op;
+ MadeChange = true;
+ }
+ } else if (TD->getTypeSizeInBits(Op->getType()) < TD->getPointerSizeInBits()) {
+ if (Constant *C = dyn_cast<Constant>(Op)) {
+ *i = ConstantExpr::getSExt(C, TD->getIntPtrType());
+ MadeChange = true;
+ } else {
+ Op = InsertCastBefore(Instruction::SExt, Op, TD->getIntPtrType(),
+ GEP);
+ *i = Op;
+ MadeChange = true;
+ }
+ }
+ }
+ }
+ if (MadeChange) return &GEP;
+
+ // Combine Indices - If the source pointer to this getelementptr instruction
+ // is a getelementptr instruction, combine the indices of the two
+ // getelementptr instructions into a single instruction.
+ //
+ SmallVector<Value*, 8> SrcGEPOperands;
+ if (User *Src = dyn_castGetElementPtr(PtrOp))
+ SrcGEPOperands.append(Src->op_begin(), Src->op_end());
+
+ if (!SrcGEPOperands.empty()) {
+ // Note that if our source is a gep chain itself that we wait for that
+ // chain to be resolved before we perform this transformation. This
+ // avoids us creating a TON of code in some cases.
+ //
+ if (isa<GetElementPtrInst>(SrcGEPOperands[0]) &&
+ cast<Instruction>(SrcGEPOperands[0])->getNumOperands() == 2)
+ return 0; // Wait until our source is folded to completion.
+
+ SmallVector<Value*, 8> Indices;
+
+ // Find out whether the last index in the source GEP is a sequential idx.
+ bool EndsWithSequential = false;
+ for (gep_type_iterator I = gep_type_begin(*cast<User>(PtrOp)),
+ E = gep_type_end(*cast<User>(PtrOp)); I != E; ++I)
+ EndsWithSequential = !isa<StructType>(*I);
+
+ // Can we combine the two pointer arithmetics offsets?
+ if (EndsWithSequential) {
+ // Replace: gep (gep %P, long B), long A, ...
+ // With: T = long A+B; gep %P, T, ...
+ //
+ Value *Sum, *SO1 = SrcGEPOperands.back(), *GO1 = GEP.getOperand(1);
+ if (SO1 == Constant::getNullValue(SO1->getType())) {
+ Sum = GO1;
+ } else if (GO1 == Constant::getNullValue(GO1->getType())) {
+ Sum = SO1;
+ } else {
+ // If they aren't the same type, convert both to an integer of the
+ // target's pointer size.
+ if (SO1->getType() != GO1->getType()) {
+ if (Constant *SO1C = dyn_cast<Constant>(SO1)) {
+ SO1 = ConstantExpr::getIntegerCast(SO1C, GO1->getType(), true);
+ } else if (Constant *GO1C = dyn_cast<Constant>(GO1)) {
+ GO1 = ConstantExpr::getIntegerCast(GO1C, SO1->getType(), true);
+ } else {
+ unsigned PS = TD->getPointerSizeInBits();
+ if (TD->getTypeSizeInBits(SO1->getType()) == PS) {
+ // Convert GO1 to SO1's type.
+ GO1 = InsertCastToIntPtrTy(GO1, SO1->getType(), &GEP, this);
+
+ } else if (TD->getTypeSizeInBits(GO1->getType()) == PS) {
+ // Convert SO1 to GO1's type.
+ SO1 = InsertCastToIntPtrTy(SO1, GO1->getType(), &GEP, this);
+ } else {
+ const Type *PT = TD->getIntPtrType();
+ SO1 = InsertCastToIntPtrTy(SO1, PT, &GEP, this);
+ GO1 = InsertCastToIntPtrTy(GO1, PT, &GEP, this);
+ }
+ }
+ }
+ if (isa<Constant>(SO1) && isa<Constant>(GO1))
+ Sum = ConstantExpr::getAdd(cast<Constant>(SO1), cast<Constant>(GO1));
+ else {
+ Sum = BinaryOperator::CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
+ InsertNewInstBefore(cast<Instruction>(Sum), GEP);
+ }
+ }
+
+ // Recycle the GEP we already have if possible.
+ if (SrcGEPOperands.size() == 2) {
+ GEP.setOperand(0, SrcGEPOperands[0]);
+ GEP.setOperand(1, Sum);
+ return &GEP;
+ } else {
+ Indices.insert(Indices.end(), SrcGEPOperands.begin()+1,
+ SrcGEPOperands.end()-1);
+ Indices.push_back(Sum);
+ Indices.insert(Indices.end(), GEP.op_begin()+2, GEP.op_end());
+ }
+ } else if (isa<Constant>(*GEP.idx_begin()) &&
+ cast<Constant>(*GEP.idx_begin())->isNullValue() &&
+ SrcGEPOperands.size() != 1) {
+ // Otherwise we can do the fold if the first index of the GEP is a zero
+ Indices.insert(Indices.end(), SrcGEPOperands.begin()+1,
+ SrcGEPOperands.end());
+ Indices.insert(Indices.end(), GEP.idx_begin()+1, GEP.idx_end());
+ }
+
+ if (!Indices.empty())
+ return GetElementPtrInst::Create(SrcGEPOperands[0], Indices.begin(),
+ Indices.end(), GEP.getName());
+
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(PtrOp)) {
+ // GEP of global variable. If all of the indices for this GEP are
+ // constants, we can promote this to a constexpr instead of an instruction.
+
+ // Scan for nonconstants...
+ SmallVector<Constant*, 8> Indices;
+ User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end();
+ for (; I != E && isa<Constant>(*I); ++I)
+ Indices.push_back(cast<Constant>(*I));
+
+ if (I == E) { // If they are all constants...
+ Constant *CE = ConstantExpr::getGetElementPtr(GV,
+ &Indices[0],Indices.size());
+
+ // Replace all uses of the GEP with the new constexpr...
+ return ReplaceInstUsesWith(GEP, CE);
+ }
+ } else if (Value *X = getBitCastOperand(PtrOp)) { // Is the operand a cast?
+ if (!isa<PointerType>(X->getType())) {
+ // Not interesting. Source pointer must be a cast from pointer.
+ } else if (HasZeroPointerIndex) {
+ // transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
+ // into : GEP [10 x i8]* X, i32 0, ...
+ //
+ // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
+ // into : GEP i8* X, ...
+ //
+ // This occurs when the program declares an array extern like "int X[];"
+ const PointerType *CPTy = cast<PointerType>(PtrOp->getType());
+ const PointerType *XTy = cast<PointerType>(X->getType());
+ if (const ArrayType *CATy =
+ dyn_cast<ArrayType>(CPTy->getElementType())) {
+ // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
+ if (CATy->getElementType() == XTy->getElementType()) {
+ // -> GEP i8* X, ...
+ SmallVector<Value*, 8> Indices(GEP.idx_begin()+1, GEP.idx_end());
+ return GetElementPtrInst::Create(X, Indices.begin(), Indices.end(),
+ GEP.getName());
+ } else if (const ArrayType *XATy =
+ dyn_cast<ArrayType>(XTy->getElementType())) {
+ // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
+ if (CATy->getElementType() == XATy->getElementType()) {
+ // -> GEP [10 x i8]* X, i32 0, ...
+ // At this point, we know that the cast source type is a pointer
+ // to an array of the same type as the destination pointer
+ // array. Because the array type is never stepped over (there
+ // is a leading zero) we can fold the cast into this GEP.
+ GEP.setOperand(0, X);
+ return &GEP;
+ }
+ }
+ }
+ } else if (GEP.getNumOperands() == 2) {
+ // Transform things like:
+ // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
+ // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
+ const Type *SrcElTy = cast<PointerType>(X->getType())->getElementType();
+ const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
+ if (isa<ArrayType>(SrcElTy) &&
+ TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
+ TD->getTypeAllocSize(ResElTy)) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::Int32Ty);
+ Idx[1] = GEP.getOperand(1);
+ Value *V = InsertNewInstBefore(
+ GetElementPtrInst::Create(X, Idx, Idx + 2, GEP.getName()), GEP);
+ // V and GEP are both pointer types --> BitCast
+ return new BitCastInst(V, GEP.getType());
+ }
+
+ // Transform things like:
+ // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
+ // (where tmp = 8*tmp2) into:
+ // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
+
+ if (isa<ArrayType>(SrcElTy) && ResElTy == Type::Int8Ty) {
+ uint64_t ArrayEltSize =
+ TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
+
+ // Check to see if "tmp" is a scale by a multiple of ArrayEltSize. We
+ // allow either a mul, shift, or constant here.
+ Value *NewIdx = 0;
+ ConstantInt *Scale = 0;
+ if (ArrayEltSize == 1) {
+ NewIdx = GEP.getOperand(1);
+ Scale = ConstantInt::get(NewIdx->getType(), 1);
+ } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) {
+ NewIdx = ConstantInt::get(CI->getType(), 1);
+ Scale = CI;
+ } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){
+ if (Inst->getOpcode() == Instruction::Shl &&
+ isa<ConstantInt>(Inst->getOperand(1))) {
+ ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1));
+ uint32_t ShAmtVal = ShAmt->getLimitedValue(64);
+ Scale = ConstantInt::get(Inst->getType(), 1ULL << ShAmtVal);
+ NewIdx = Inst->getOperand(0);
+ } else if (Inst->getOpcode() == Instruction::Mul &&
+ isa<ConstantInt>(Inst->getOperand(1))) {
+ Scale = cast<ConstantInt>(Inst->getOperand(1));
+ NewIdx = Inst->getOperand(0);
+ }
+ }
+
+ // If the index will be to exactly the right offset with the scale taken
+ // out, perform the transformation. Note, we don't know whether Scale is
+ // signed or not. We'll use unsigned version of division/modulo
+ // operation after making sure Scale doesn't have the sign bit set.
+ if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL &&
+ Scale->getZExtValue() % ArrayEltSize == 0) {
+ Scale = ConstantInt::get(Scale->getType(),
+ Scale->getZExtValue() / ArrayEltSize);
+ if (Scale->getZExtValue() != 1) {
+ Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(),
+ false /*ZExt*/);
+ Instruction *Sc = BinaryOperator::CreateMul(NewIdx, C, "idxscale");
+ NewIdx = InsertNewInstBefore(Sc, GEP);
+ }
+
+ // Insert the new GEP instruction.
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::Int32Ty);
+ Idx[1] = NewIdx;
+ Instruction *NewGEP =
+ GetElementPtrInst::Create(X, Idx, Idx + 2, GEP.getName());
+ NewGEP = InsertNewInstBefore(NewGEP, GEP);
+ // The NewGEP must be pointer typed, so must the old one -> BitCast
+ return new BitCastInst(NewGEP, GEP.getType());
+ }
+ }
+ }
+ }
+
+ /// See if we can simplify:
+ /// X = bitcast A to B*
+ /// Y = gep X, <...constant indices...>
+ /// into a gep of the original struct. This is important for SROA and alias
+ /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
+ if (!isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) {
+ // Determine how much the GEP moves the pointer. We are guaranteed to get
+ // a constant back from EmitGEPOffset.
+ ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP, GEP, *this));
+ int64_t Offset = OffsetV->getSExtValue();
+
+ // If this GEP instruction doesn't move the pointer, just replace the GEP
+ // with a bitcast of the real input to the dest type.
+ if (Offset == 0) {
+ // If the bitcast is of an allocation, and the allocation will be
+ // converted to match the type of the cast, don't touch this.
+ if (isa<AllocationInst>(BCI->getOperand(0))) {
+ // See if the bitcast simplifies, if so, don't nuke this GEP yet.
+ if (Instruction *I = visitBitCast(*BCI)) {
+ if (I != BCI) {
+ I->takeName(BCI);
+ BCI->getParent()->getInstList().insert(BCI, I);
+ ReplaceInstUsesWith(*BCI, I);
+ }
+ return &GEP;
+ }
+ }
+ return new BitCastInst(BCI->getOperand(0), GEP.getType());
+ }
+
+ // Otherwise, if the offset is non-zero, we need to find out if there is a
+ // field at Offset in 'A's type. If so, we can pull the cast through the
+ // GEP.
+ SmallVector<Value*, 8> NewIndices;
+ const Type *InTy =
+ cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
+ if (FindElementAtOffset(InTy, Offset, NewIndices, TD)) {
+ Instruction *NGEP =
+ GetElementPtrInst::Create(BCI->getOperand(0), NewIndices.begin(),
+ NewIndices.end());
+ if (NGEP->getType() == GEP.getType()) return NGEP;
+ InsertNewInstBefore(NGEP, GEP);
+ NGEP->takeName(&GEP);
+ return new BitCastInst(NGEP, GEP.getType());
+ }
+ }
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
+ // Convert: malloc Ty, C - where C is a constant != 1 into: malloc [C x Ty], 1
+ if (AI.isArrayAllocation()) { // Check C != 1
+ if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
+ const Type *NewTy =
+ ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
+ AllocationInst *New = 0;
+
+ // Create and insert the replacement instruction...
+ if (isa<MallocInst>(AI))
+ New = new MallocInst(NewTy, 0, AI.getAlignment(), AI.getName());
+ else {
+ assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!");
+ New = new AllocaInst(NewTy, 0, AI.getAlignment(), AI.getName());
+ }
+
+ InsertNewInstBefore(New, AI);
+
+ // Scan to the end of the allocation instructions, to skip over a block of
+ // allocas if possible...also skip interleaved debug info
+ //
+ BasicBlock::iterator It = New;
+ while (isa<AllocationInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
+
+ // Now that I is pointing to the first non-allocation-inst in the block,
+ // insert our getelementptr instruction...
+ //
+ Value *NullIdx = Constant::getNullValue(Type::Int32Ty);
+ Value *Idx[2];
+ Idx[0] = NullIdx;
+ Idx[1] = NullIdx;
+ Value *V = GetElementPtrInst::Create(New, Idx, Idx + 2,
+ New->getName()+".sub", It);
+
+ // Now make everything use the getelementptr instead of the original
+ // allocation.
+ return ReplaceInstUsesWith(AI, V);
+ } else if (isa<UndefValue>(AI.getArraySize())) {
+ return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
+ }
+ }
+
+ if (isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) {
+ // If alloca'ing a zero byte object, replace the alloca with a null pointer.
+ // Note that we only do this for alloca's, because malloc should allocate
+ // and return a unique pointer, even for a zero byte allocation.
+ if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0)
+ return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
+
+ // If the alignment is 0 (unspecified), assign it the preferred alignment.
+ if (AI.getAlignment() == 0)
+ AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType()));
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
+ Value *Op = FI.getOperand(0);
+
+ // free undef -> unreachable.
+ if (isa<UndefValue>(Op)) {
+ // Insert a new store to null because we cannot modify the CFG here.
+ new StoreInst(ConstantInt::getTrue(),
+ UndefValue::get(PointerType::getUnqual(Type::Int1Ty)), &FI);
+ return EraseInstFromFunction(FI);
+ }
+
+ // If we have 'free null' delete the instruction. This can happen in stl code
+ // when lots of inlining happens.
+ if (isa<ConstantPointerNull>(Op))
+ return EraseInstFromFunction(FI);
+
+ // Change free <ty>* (cast <ty2>* X to <ty>*) into free <ty2>* X
+ if (BitCastInst *CI = dyn_cast<BitCastInst>(Op)) {
+ FI.setOperand(0, CI->getOperand(0));
+ return &FI;
+ }
+
+ // Change free (gep X, 0,0,0,0) into free(X)
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
+ if (GEPI->hasAllZeroIndices()) {
+ AddToWorkList(GEPI);
+ FI.setOperand(0, GEPI->getOperand(0));
+ return &FI;
+ }
+ }
+
+ // Change free(malloc) into nothing, if the malloc has a single use.
+ if (MallocInst *MI = dyn_cast<MallocInst>(Op))
+ if (MI->hasOneUse()) {
+ EraseInstFromFunction(FI);
+ return EraseInstFromFunction(*MI);
+ }
+
+ return 0;
+}
+
+
+/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
+static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
+ const TargetData *TD) {
+ User *CI = cast<User>(LI.getOperand(0));
+ Value *CastOp = CI->getOperand(0);
+
+ if (TD) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(CI)) {
+ // Instead of loading constant c string, use corresponding integer value
+ // directly if string length is small enough.
+ std::string Str;
+ if (GetConstantStringInfo(CE->getOperand(0), Str) && !Str.empty()) {
+ unsigned len = Str.length();
+ const Type *Ty = cast<PointerType>(CE->getType())->getElementType();
+ unsigned numBits = Ty->getPrimitiveSizeInBits();
+ // Replace LI with immediate integer store.
+ if ((numBits >> 3) == len + 1) {
+ APInt StrVal(numBits, 0);
+ APInt SingleChar(numBits, 0);
+ if (TD->isLittleEndian()) {
+ for (signed i = len-1; i >= 0; i--) {
+ SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
+ StrVal = (StrVal << 8) | SingleChar;
+ }
+ } else {
+ for (unsigned i = 0; i < len; i++) {
+ SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
+ StrVal = (StrVal << 8) | SingleChar;
+ }
+ // Append NULL at the end.
+ SingleChar = 0;
+ StrVal = (StrVal << 8) | SingleChar;
+ }
+ Value *NL = ConstantInt::get(StrVal);
+ return IC.ReplaceInstUsesWith(LI, NL);
+ }
+ }
+ }
+ }
+
+ const PointerType *DestTy = cast<PointerType>(CI->getType());
+ const Type *DestPTy = DestTy->getElementType();
+ if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
+
+ // If the address spaces don't match, don't eliminate the cast.
+ if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
+ return 0;
+
+ const Type *SrcPTy = SrcTy->getElementType();
+
+ if (DestPTy->isInteger() || isa<PointerType>(DestPTy) ||
+ isa<VectorType>(DestPTy)) {
+ // If the source is an array, the code below will not succeed. Check to
+ // see if a trivial 'gep P, 0, 0' will help matters. Only do this for
+ // constants.
+ if (const ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
+ if (Constant *CSrc = dyn_cast<Constant>(CastOp))
+ if (ASrcTy->getNumElements() != 0) {
+ Value *Idxs[2];
+ Idxs[0] = Idxs[1] = Constant::getNullValue(Type::Int32Ty);
+ CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2);
+ SrcTy = cast<PointerType>(CastOp->getType());
+ SrcPTy = SrcTy->getElementType();
+ }
+
+ if ((SrcPTy->isInteger() || isa<PointerType>(SrcPTy) ||
+ isa<VectorType>(SrcPTy)) &&
+ // Do not allow turning this into a load of an integer, which is then
+ // casted to a pointer, this pessimizes pointer analysis a lot.
+ (isa<PointerType>(SrcPTy) == isa<PointerType>(LI.getType())) &&
+ IC.getTargetData().getTypeSizeInBits(SrcPTy) ==
+ IC.getTargetData().getTypeSizeInBits(DestPTy)) {
+
+ // Okay, we are casting from one integer or pointer type to another of
+ // the same size. Instead of casting the pointer before the load, cast
+ // the result of the loaded value.
+ Value *NewLoad = IC.InsertNewInstBefore(new LoadInst(CastOp,
+ CI->getName(),
+ LI.isVolatile()),LI);
+ // Now cast the result of the load.
+ return new BitCastInst(NewLoad, LI.getType());
+ }
+ }
+ }
+ return 0;
+}
+
+/// isSafeToLoadUnconditionally - Return true if we know that executing a load
+/// from this value cannot trap. If it is not obviously safe to load from the
+/// specified pointer, we do a quick local scan of the basic block containing
+/// ScanFrom, to determine if the address is already accessed.
+static bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) {
+ // If it is an alloca it is always safe to load from.
+ if (isa<AllocaInst>(V)) return true;
+
+ // If it is a global variable it is mostly safe to load from.
+ if (const GlobalValue *GV = dyn_cast<GlobalVariable>(V))
+ // Don't try to evaluate aliases. External weak GV can be null.
+ return !isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage();
+
+ // Otherwise, be a little bit agressive by scanning the local block where we
+ // want to check to see if the pointer is already being loaded or stored
+ // from/to. If so, the previous load or store would have already trapped,
+ // so there is no harm doing an extra load (also, CSE will later eliminate
+ // the load entirely).
+ BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
+
+ while (BBI != E) {
+ --BBI;
+
+ // If we see a free or a call (which might do a free) the pointer could be
+ // marked invalid.
+ if (isa<FreeInst>(BBI) ||
+ (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)))
+ return false;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+ if (LI->getOperand(0) == V) return true;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ if (SI->getOperand(1) == V) return true;
+ }
+
+ }
+ return false;
+}
+
+Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
+ Value *Op = LI.getOperand(0);
+
+ // Attempt to improve the alignment.
+ unsigned KnownAlign =
+ GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()));
+ if (KnownAlign >
+ (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) :
+ LI.getAlignment()))
+ LI.setAlignment(KnownAlign);
+
+ // load (cast X) --> cast (load X) iff safe
+ if (isa<CastInst>(Op))
+ if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
+ return Res;
+
+ // None of the following transforms are legal for volatile loads.
+ if (LI.isVolatile()) return 0;
+
+ // Do really simple store-to-load forwarding and load CSE, to catch cases
+ // where there are several consequtive memory accesses to the same location,
+ // separated by a few arithmetic operations.
+ BasicBlock::iterator BBI = &LI;
+ if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
+ return ReplaceInstUsesWith(LI, AvailableVal);
+
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
+ const Value *GEPI0 = GEPI->getOperand(0);
+ // TODO: Consider a target hook for valid address spaces for this xform.
+ if (isa<ConstantPointerNull>(GEPI0) &&
+ cast<PointerType>(GEPI0->getType())->getAddressSpace() == 0) {
+ // Insert a new store to null instruction before the load to indicate
+ // that this code is not reachable. We do this instead of inserting
+ // an unreachable instruction directly because we cannot modify the
+ // CFG.
+ new StoreInst(UndefValue::get(LI.getType()),
+ Constant::getNullValue(Op->getType()), &LI);
+ return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+ }
+ }
+
+ if (Constant *C = dyn_cast<Constant>(Op)) {
+ // load null/undef -> undef
+ // TODO: Consider a target hook for valid address spaces for this xform.
+ if (isa<UndefValue>(C) || (C->isNullValue() &&
+ cast<PointerType>(Op->getType())->getAddressSpace() == 0)) {
+ // Insert a new store to null instruction before the load to indicate that
+ // this code is not reachable. We do this instead of inserting an
+ // unreachable instruction directly because we cannot modify the CFG.
+ new StoreInst(UndefValue::get(LI.getType()),
+ Constant::getNullValue(Op->getType()), &LI);
+ return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+ }
+
+ // Instcombine load (constant global) into the value loaded.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer())
+ return ReplaceInstUsesWith(LI, GV->getInitializer());
+
+ // Instcombine load (constantexpr_GEP global, 0, ...) into the value loaded.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) {
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer())
+ if (Constant *V =
+ ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE))
+ return ReplaceInstUsesWith(LI, V);
+ if (CE->getOperand(0)->isNullValue()) {
+ // Insert a new store to null instruction before the load to indicate
+ // that this code is not reachable. We do this instead of inserting
+ // an unreachable instruction directly because we cannot modify the
+ // CFG.
+ new StoreInst(UndefValue::get(LI.getType()),
+ Constant::getNullValue(Op->getType()), &LI);
+ return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+ }
+
+ } else if (CE->isCast()) {
+ if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
+ return Res;
+ }
+ }
+ }
+
+ // If this load comes from anywhere in a constant global, and if the global
+ // is all undef or zero, we know what it loads.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op->getUnderlyingObject())){
+ if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
+ if (GV->getInitializer()->isNullValue())
+ return ReplaceInstUsesWith(LI, Constant::getNullValue(LI.getType()));
+ else if (isa<UndefValue>(GV->getInitializer()))
+ return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+ }
+ }
+
+ if (Op->hasOneUse()) {
+ // Change select and PHI nodes to select values instead of addresses: this
+ // helps alias analysis out a lot, allows many others simplifications, and
+ // exposes redundancy in the code.
+ //
+ // Note that we cannot do the transformation unless we know that the
+ // introduced loads cannot trap! Something like this is valid as long as
+ // the condition is always false: load (select bool %C, int* null, int* %G),
+ // but it would not be valid if we transformed it to load from null
+ // unconditionally.
+ //
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
+ // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
+ if (isSafeToLoadUnconditionally(SI->getOperand(1), SI) &&
+ isSafeToLoadUnconditionally(SI->getOperand(2), SI)) {
+ Value *V1 = InsertNewInstBefore(new LoadInst(SI->getOperand(1),
+ SI->getOperand(1)->getName()+".val"), LI);
+ Value *V2 = InsertNewInstBefore(new LoadInst(SI->getOperand(2),
+ SI->getOperand(2)->getName()+".val"), LI);
+ return SelectInst::Create(SI->getCondition(), V1, V2);
+ }
+
+ // load (select (cond, null, P)) -> load P
+ if (Constant *C = dyn_cast<Constant>(SI->getOperand(1)))
+ if (C->isNullValue()) {
+ LI.setOperand(0, SI->getOperand(2));
+ return &LI;
+ }
+
+ // load (select (cond, P, null)) -> load P
+ if (Constant *C = dyn_cast<Constant>(SI->getOperand(2)))
+ if (C->isNullValue()) {
+ LI.setOperand(0, SI->getOperand(1));
+ return &LI;
+ }
+ }
+ }
+ return 0;
+}
+
+/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P
+/// when possible. This makes it generally easy to do alias analysis and/or
+/// SROA/mem2reg of the memory object.
+static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
+ User *CI = cast<User>(SI.getOperand(1));
+ Value *CastOp = CI->getOperand(0);
+
+ const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
+ const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
+ if (SrcTy == 0) return 0;
+
+ const Type *SrcPTy = SrcTy->getElementType();
+
+ if (!DestPTy->isInteger() && !isa<PointerType>(DestPTy))
+ return 0;
+
+ /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
+ /// to its first element. This allows us to handle things like:
+ /// store i32 xxx, (bitcast {foo*, float}* %P to i32*)
+ /// on 32-bit hosts.
+ SmallVector<Value*, 4> NewGEPIndices;
+
+ // If the source is an array, the code below will not succeed. Check to
+ // see if a trivial 'gep P, 0, 0' will help matters. Only do this for
+ // constants.
+ if (isa<ArrayType>(SrcPTy) || isa<StructType>(SrcPTy)) {
+ // Index through pointer.
+ Constant *Zero = Constant::getNullValue(Type::Int32Ty);
+ NewGEPIndices.push_back(Zero);
+
+ while (1) {
+ if (const StructType *STy = dyn_cast<StructType>(SrcPTy)) {
+ if (!STy->getNumElements()) /* Struct can be empty {} */
+ break;
+ NewGEPIndices.push_back(Zero);
+ SrcPTy = STy->getElementType(0);
+ } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) {
+ NewGEPIndices.push_back(Zero);
+ SrcPTy = ATy->getElementType();
+ } else {
+ break;
+ }
+ }
+
+ SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
+ }
+
+ if (!SrcPTy->isInteger() && !isa<PointerType>(SrcPTy))
+ return 0;
+
+ // If the pointers point into different address spaces or if they point to
+ // values with different sizes, we can't do the transformation.
+ if (SrcTy->getAddressSpace() !=
+ cast<PointerType>(CI->getType())->getAddressSpace() ||
+ IC.getTargetData().getTypeSizeInBits(SrcPTy) !=
+ IC.getTargetData().getTypeSizeInBits(DestPTy))
+ return 0;
+
+ // Okay, we are casting from one integer or pointer type to another of
+ // the same size. Instead of casting the pointer before
+ // the store, cast the value to be stored.
+ Value *NewCast;
+ Value *SIOp0 = SI.getOperand(0);
+ Instruction::CastOps opcode = Instruction::BitCast;
+ const Type* CastSrcTy = SIOp0->getType();
+ const Type* CastDstTy = SrcPTy;
+ if (isa<PointerType>(CastDstTy)) {
+ if (CastSrcTy->isInteger())
+ opcode = Instruction::IntToPtr;
+ } else if (isa<IntegerType>(CastDstTy)) {
+ if (isa<PointerType>(SIOp0->getType()))
+ opcode = Instruction::PtrToInt;
+ }
+
+ // SIOp0 is a pointer to aggregate and this is a store to the first field,
+ // emit a GEP to index into its first field.
+ if (!NewGEPIndices.empty()) {
+ if (Constant *C = dyn_cast<Constant>(CastOp))
+ CastOp = ConstantExpr::getGetElementPtr(C, &NewGEPIndices[0],
+ NewGEPIndices.size());
+ else
+ CastOp = IC.InsertNewInstBefore(
+ GetElementPtrInst::Create(CastOp, NewGEPIndices.begin(),
+ NewGEPIndices.end()), SI);
+ }
+
+ if (Constant *C = dyn_cast<Constant>(SIOp0))
+ NewCast = ConstantExpr::getCast(opcode, C, CastDstTy);
+ else
+ NewCast = IC.InsertNewInstBefore(
+ CastInst::Create(opcode, SIOp0, CastDstTy, SIOp0->getName()+".c"),
+ SI);
+ return new StoreInst(NewCast, CastOp);
+}
+
+/// equivalentAddressValues - Test if A and B will obviously have the same
+/// value. This includes recognizing that %t0 and %t1 will have the same
+/// value in code like this:
+/// %t0 = getelementptr \@a, 0, 3
+/// store i32 0, i32* %t0
+/// %t1 = getelementptr \@a, 0, 3
+/// %t2 = load i32* %t1
+///
+static bool equivalentAddressValues(Value *A, Value *B) {
+ // Test if the values are trivially equivalent.
+ if (A == B) return true;
+
+ // Test if the values come form identical arithmetic instructions.
+ if (isa<BinaryOperator>(A) ||
+ isa<CastInst>(A) ||
+ isa<PHINode>(A) ||
+ isa<GetElementPtrInst>(A))
+ if (Instruction *BI = dyn_cast<Instruction>(B))
+ if (cast<Instruction>(A)->isIdenticalTo(BI))
+ return true;
+
+ // Otherwise they may not be equivalent.
+ return false;
+}
+
+// If this instruction has two uses, one of which is a llvm.dbg.declare,
+// return the llvm.dbg.declare.
+DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) {
+ if (!V->hasNUses(2))
+ return 0;
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI))
+ return DI;
+ if (isa<BitCastInst>(UI) && UI->hasOneUse()) {
+ if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI->use_begin()))
+ return DI;
+ }
+ }
+ return 0;
+}
+
+Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
+ Value *Val = SI.getOperand(0);
+ Value *Ptr = SI.getOperand(1);
+
+ if (isa<UndefValue>(Ptr)) { // store X, undef -> noop (even if volatile)
+ EraseInstFromFunction(SI);
+ ++NumCombined;
+ return 0;
+ }
+
+ // If the RHS is an alloca with a single use, zapify the store, making the
+ // alloca dead.
+ // If the RHS is an alloca with a two uses, the other one being a
+ // llvm.dbg.declare, zapify the store and the declare, making the
+ // alloca dead. We must do this to prevent declare's from affecting
+ // codegen.
+ if (!SI.isVolatile()) {
+ if (Ptr->hasOneUse()) {
+ if (isa<AllocaInst>(Ptr)) {
+ EraseInstFromFunction(SI);
+ ++NumCombined;
+ return 0;
+ }
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
+ if (isa<AllocaInst>(GEP->getOperand(0))) {
+ if (GEP->getOperand(0)->hasOneUse()) {
+ EraseInstFromFunction(SI);
+ ++NumCombined;
+ return 0;
+ }
+ if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) {
+ EraseInstFromFunction(*DI);
+ EraseInstFromFunction(SI);
+ ++NumCombined;
+ return 0;
+ }
+ }
+ }
+ }
+ if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) {
+ EraseInstFromFunction(*DI);
+ EraseInstFromFunction(SI);
+ ++NumCombined;
+ return 0;
+ }
+ }
+
+ // Attempt to improve the alignment.
+ unsigned KnownAlign =
+ GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()));
+ if (KnownAlign >
+ (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) :
+ SI.getAlignment()))
+ SI.setAlignment(KnownAlign);
+
+ // Do really simple DSE, to catch cases where there are several consecutive
+ // stores to the same location, separated by a few arithmetic operations. This
+ // situation often occurs with bitfield accesses.
+ BasicBlock::iterator BBI = &SI;
+ for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
+ --ScanInsts) {
+ --BBI;
+ // Don't count debug info directives, lest they affect codegen,
+ // and we skip pointer-to-pointer bitcasts, which are NOPs.
+ // It is necessary for correctness to skip those that feed into a
+ // llvm.dbg.declare, as these are not present when debugging is off.
+ if (isa<DbgInfoIntrinsic>(BBI) ||
+ (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) {
+ ScanInsts++;
+ continue;
+ }
+
+ if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
+ // Prev store isn't volatile, and stores to the same location?
+ if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1),
+ SI.getOperand(1))) {
+ ++NumDeadStore;
+ ++BBI;
+ EraseInstFromFunction(*PrevSI);
+ continue;
+ }
+ break;
+ }
+
+ // If this is a load, we have to stop. However, if the loaded value is from
+ // the pointer we're loading and is producing the pointer we're storing,
+ // then *this* store is dead (X = load P; store X -> P).
+ if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+ if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
+ !SI.isVolatile()) {
+ EraseInstFromFunction(SI);
+ ++NumCombined;
+ return 0;
+ }
+ // Otherwise, this is a load from some other location. Stores before it
+ // may not be dead.
+ break;
+ }
+
+ // Don't skip over loads or things that can modify memory.
+ if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
+ break;
+ }
+
+
+ if (SI.isVolatile()) return 0; // Don't hack volatile stores.
+
+ // store X, null -> turns into 'unreachable' in SimplifyCFG
+ if (isa<ConstantPointerNull>(Ptr)) {
+ if (!isa<UndefValue>(Val)) {
+ SI.setOperand(0, UndefValue::get(Val->getType()));
+ if (Instruction *U = dyn_cast<Instruction>(Val))
+ AddToWorkList(U); // Dropped a use.
+ ++NumCombined;
+ }
+ return 0; // Do not modify these!
+ }
+
+ // store undef, Ptr -> noop
+ if (isa<UndefValue>(Val)) {
+ EraseInstFromFunction(SI);
+ ++NumCombined;
+ return 0;
+ }
+
+ // If the pointer destination is a cast, see if we can fold the cast into the
+ // source instead.
+ if (isa<CastInst>(Ptr))
+ if (Instruction *Res = InstCombineStoreToCast(*this, SI))
+ return Res;
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+ if (CE->isCast())
+ if (Instruction *Res = InstCombineStoreToCast(*this, SI))
+ return Res;
+
+
+ // If this store is the last instruction in the basic block (possibly
+ // excepting debug info instructions and the pointer bitcasts that feed
+ // into them), and if the block ends with an unconditional branch, try
+ // to move it to the successor block.
+ BBI = &SI;
+ do {
+ ++BBI;
+ } while (isa<DbgInfoIntrinsic>(BBI) ||
+ (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType())));
+ if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
+ if (BI->isUnconditional())
+ if (SimplifyStoreAtEndOfBlock(SI))
+ return 0; // xform done!
+
+ return 0;
+}
+
+/// SimplifyStoreAtEndOfBlock - Turn things like:
+/// if () { *P = v1; } else { *P = v2 }
+/// into a phi node with a store in the successor.
+///
+/// Simplify things like:
+/// *P = v1; if () { *P = v2; }
+/// into a phi node with a store in the successor.
+///
+bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
+ BasicBlock *StoreBB = SI.getParent();
+
+ // Check to see if the successor block has exactly two incoming edges. If
+ // so, see if the other predecessor contains a store to the same location.
+ // if so, insert a PHI node (if needed) and move the stores down.
+ BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
+
+ // Determine whether Dest has exactly two predecessors and, if so, compute
+ // the other predecessor.
+ pred_iterator PI = pred_begin(DestBB);
+ BasicBlock *OtherBB = 0;
+ if (*PI != StoreBB)
+ OtherBB = *PI;
+ ++PI;
+ if (PI == pred_end(DestBB))
+ return false;
+
+ if (*PI != StoreBB) {
+ if (OtherBB)
+ return false;
+ OtherBB = *PI;
+ }
+ if (++PI != pred_end(DestBB))
+ return false;
+
+ // Bail out if all the relevant blocks aren't distinct (this can happen,
+ // for example, if SI is in an infinite loop)
+ if (StoreBB == DestBB || OtherBB == DestBB)
+ return false;
+
+ // Verify that the other block ends in a branch and is not otherwise empty.
+ BasicBlock::iterator BBI = OtherBB->getTerminator();
+ BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
+ if (!OtherBr || BBI == OtherBB->begin())
+ return false;
+
+ // If the other block ends in an unconditional branch, check for the 'if then
+ // else' case. there is an instruction before the branch.
+ StoreInst *OtherStore = 0;
+ if (OtherBr->isUnconditional()) {
+ --BBI;
+ // Skip over debugging info.
+ while (isa<DbgInfoIntrinsic>(BBI) ||
+ (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) {
+ if (BBI==OtherBB->begin())
+ return false;
+ --BBI;
+ }
+ // If this isn't a store, or isn't a store to the same location, bail out.
+ OtherStore = dyn_cast<StoreInst>(BBI);
+ if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1))
+ return false;
+ } else {
+ // Otherwise, the other block ended with a conditional branch. If one of the
+ // destinations is StoreBB, then we have the if/then case.
+ if (OtherBr->getSuccessor(0) != StoreBB &&
+ OtherBr->getSuccessor(1) != StoreBB)
+ return false;
+
+ // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
+ // if/then triangle. See if there is a store to the same ptr as SI that
+ // lives in OtherBB.
+ for (;; --BBI) {
+ // Check to see if we find the matching store.
+ if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
+ if (OtherStore->getOperand(1) != SI.getOperand(1))
+ return false;
+ break;
+ }
+ // If we find something that may be using or overwriting the stored
+ // value, or if we run out of instructions, we can't do the xform.
+ if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
+ BBI == OtherBB->begin())
+ return false;
+ }
+
+ // In order to eliminate the store in OtherBr, we have to
+ // make sure nothing reads or overwrites the stored value in
+ // StoreBB.
+ for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
+ // FIXME: This should really be AA driven.
+ if (I->mayReadFromMemory() || I->mayWriteToMemory())
+ return false;
+ }
+ }
+
+ // Insert a PHI node now if we need it.
+ Value *MergedVal = OtherStore->getOperand(0);
+ if (MergedVal != SI.getOperand(0)) {
+ PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge");
+ PN->reserveOperandSpace(2);
+ PN->addIncoming(SI.getOperand(0), SI.getParent());
+ PN->addIncoming(OtherStore->getOperand(0), OtherBB);
+ MergedVal = InsertNewInstBefore(PN, DestBB->front());
+ }
+
+ // Advance to a place where it is safe to insert the new store and
+ // insert it.
+ BBI = DestBB->getFirstNonPHI();
+ InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1),
+ OtherStore->isVolatile()), *BBI);
+
+ // Nuke the old stores.
+ EraseInstFromFunction(SI);
+ EraseInstFromFunction(*OtherStore);
+ ++NumCombined;
+ return true;
+}
+
+
+Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
+ // Change br (not X), label True, label False to: br X, label False, True
+ Value *X = 0;
+ BasicBlock *TrueDest;
+ BasicBlock *FalseDest;
+ if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) &&
+ !isa<Constant>(X)) {
+ // Swap Destinations and condition...
+ BI.setCondition(X);
+ BI.setSuccessor(0, FalseDest);
+ BI.setSuccessor(1, TrueDest);
+ return &BI;
+ }
+
+ // Cannonicalize fcmp_one -> fcmp_oeq
+ FCmpInst::Predicate FPred; Value *Y;
+ if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)),
+ TrueDest, FalseDest)))
+ if ((FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE ||
+ FPred == FCmpInst::FCMP_OGE) && BI.getCondition()->hasOneUse()) {
+ FCmpInst *I = cast<FCmpInst>(BI.getCondition());
+ FCmpInst::Predicate NewPred = FCmpInst::getInversePredicate(FPred);
+ Instruction *NewSCC = new FCmpInst(NewPred, X, Y, "", I);
+ NewSCC->takeName(I);
+ // Swap Destinations and condition...
+ BI.setCondition(NewSCC);
+ BI.setSuccessor(0, FalseDest);
+ BI.setSuccessor(1, TrueDest);
+ RemoveFromWorkList(I);
+ I->eraseFromParent();
+ AddToWorkList(NewSCC);
+ return &BI;
+ }
+
+ // Cannonicalize icmp_ne -> icmp_eq
+ ICmpInst::Predicate IPred;
+ if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)),
+ TrueDest, FalseDest)))
+ if ((IPred == ICmpInst::ICMP_NE || IPred == ICmpInst::ICMP_ULE ||
+ IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE ||
+ IPred == ICmpInst::ICMP_SGE) && BI.getCondition()->hasOneUse()) {
+ ICmpInst *I = cast<ICmpInst>(BI.getCondition());
+ ICmpInst::Predicate NewPred = ICmpInst::getInversePredicate(IPred);
+ Instruction *NewSCC = new ICmpInst(NewPred, X, Y, "", I);
+ NewSCC->takeName(I);
+ // Swap Destinations and condition...
+ BI.setCondition(NewSCC);
+ BI.setSuccessor(0, FalseDest);
+ BI.setSuccessor(1, TrueDest);
+ RemoveFromWorkList(I);
+ I->eraseFromParent();;
+ AddToWorkList(NewSCC);
+ return &BI;
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
+ Value *Cond = SI.getCondition();
+ if (Instruction *I = dyn_cast<Instruction>(Cond)) {
+ if (I->getOpcode() == Instruction::Add)
+ if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // change 'switch (X+4) case 1:' into 'switch (X) case -3'
+ for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2)
+ SI.setOperand(i,ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)),
+ AddRHS));
+ SI.setOperand(0, I->getOperand(0));
+ AddToWorkList(I);
+ return &SI;
+ }
+ }
+ return 0;
+}
+
+Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
+ Value *Agg = EV.getAggregateOperand();
+
+ if (!EV.hasIndices())
+ return ReplaceInstUsesWith(EV, Agg);
+
+ if (Constant *C = dyn_cast<Constant>(Agg)) {
+ if (isa<UndefValue>(C))
+ return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType()));
+
+ if (isa<ConstantAggregateZero>(C))
+ return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType()));
+
+ if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
+ // Extract the element indexed by the first index out of the constant
+ Value *V = C->getOperand(*EV.idx_begin());
+ if (EV.getNumIndices() > 1)
+ // Extract the remaining indices out of the constant indexed by the
+ // first index
+ return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end());
+ else
+ return ReplaceInstUsesWith(EV, V);
+ }
+ return 0; // Can't handle other constants
+ }
+ if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
+ // We're extracting from an insertvalue instruction, compare the indices
+ const unsigned *exti, *exte, *insi, *inse;
+ for (exti = EV.idx_begin(), insi = IV->idx_begin(),
+ exte = EV.idx_end(), inse = IV->idx_end();
+ exti != exte && insi != inse;
+ ++exti, ++insi) {
+ if (*insi != *exti)
+ // The insert and extract both reference distinctly different elements.
+ // This means the extract is not influenced by the insert, and we can
+ // replace the aggregate operand of the extract with the aggregate
+ // operand of the insert. i.e., replace
+ // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
+ // %E = extractvalue { i32, { i32 } } %I, 0
+ // with
+ // %E = extractvalue { i32, { i32 } } %A, 0
+ return ExtractValueInst::Create(IV->getAggregateOperand(),
+ EV.idx_begin(), EV.idx_end());
+ }
+ if (exti == exte && insi == inse)
+ // Both iterators are at the end: Index lists are identical. Replace
+ // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
+ // %C = extractvalue { i32, { i32 } } %B, 1, 0
+ // with "i32 42"
+ return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand());
+ if (exti == exte) {
+ // The extract list is a prefix of the insert list. i.e. replace
+ // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
+ // %E = extractvalue { i32, { i32 } } %I, 1
+ // with
+ // %X = extractvalue { i32, { i32 } } %A, 1
+ // %E = insertvalue { i32 } %X, i32 42, 0
+ // by switching the order of the insert and extract (though the
+ // insertvalue should be left in, since it may have other uses).
+ Value *NewEV = InsertNewInstBefore(
+ ExtractValueInst::Create(IV->getAggregateOperand(),
+ EV.idx_begin(), EV.idx_end()),
+ EV);
+ return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
+ insi, inse);
+ }
+ if (insi == inse)
+ // The insert list is a prefix of the extract list
+ // We can simply remove the common indices from the extract and make it
+ // operate on the inserted value instead of the insertvalue result.
+ // i.e., replace
+ // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
+ // %E = extractvalue { i32, { i32 } } %I, 1, 0
+ // with
+ // %E extractvalue { i32 } { i32 42 }, 0
+ return ExtractValueInst::Create(IV->getInsertedValueOperand(),
+ exti, exte);
+ }
+ // Can't simplify extracts from other values. Note that nested extracts are
+ // already simplified implicitely by the above (extract ( extract (insert) )
+ // will be translated into extract ( insert ( extract ) ) first and then just
+ // the value inserted, if appropriate).
+ return 0;
+}
+
+/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
+/// is to leave as a vector operation.
+static bool CheapToScalarize(Value *V, bool isConstant) {
+ if (isa<ConstantAggregateZero>(V))
+ return true;
+ if (ConstantVector *C = dyn_cast<ConstantVector>(V)) {
+ if (isConstant) return true;
+ // If all elts are the same, we can extract.
+ Constant *Op0 = C->getOperand(0);
+ for (unsigned i = 1; i < C->getNumOperands(); ++i)
+ if (C->getOperand(i) != Op0)
+ return false;
+ return true;
+ }
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // Insert element gets simplified to the inserted element or is deleted if
+ // this is constant idx extract element and its a constant idx insertelt.
+ if (I->getOpcode() == Instruction::InsertElement && isConstant &&
+ isa<ConstantInt>(I->getOperand(2)))
+ return true;
+ if (I->getOpcode() == Instruction::Load && I->hasOneUse())
+ return true;
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
+ if (BO->hasOneUse() &&
+ (CheapToScalarize(BO->getOperand(0), isConstant) ||
+ CheapToScalarize(BO->getOperand(1), isConstant)))
+ return true;
+ if (CmpInst *CI = dyn_cast<CmpInst>(I))
+ if (CI->hasOneUse() &&
+ (CheapToScalarize(CI->getOperand(0), isConstant) ||
+ CheapToScalarize(CI->getOperand(1), isConstant)))
+ return true;
+
+ return false;
+}
+
+/// Read and decode a shufflevector mask.
+///
+/// It turns undef elements into values that are larger than the number of
+/// elements in the input.
+static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) {
+ unsigned NElts = SVI->getType()->getNumElements();
+ if (isa<ConstantAggregateZero>(SVI->getOperand(2)))
+ return std::vector<unsigned>(NElts, 0);
+ if (isa<UndefValue>(SVI->getOperand(2)))
+ return std::vector<unsigned>(NElts, 2*NElts);
+
+ std::vector<unsigned> Result;
+ const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2));
+ for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i)
+ if (isa<UndefValue>(*i))
+ Result.push_back(NElts*2); // undef -> 8
+ else
+ Result.push_back(cast<ConstantInt>(*i)->getZExtValue());
+ return Result;
+}
+
+/// FindScalarElement - Given a vector and an element number, see if the scalar
+/// value is already around as a register, for example if it were inserted then
+/// extracted from the vector.
+static Value *FindScalarElement(Value *V, unsigned EltNo) {
+ assert(isa<VectorType>(V->getType()) && "Not looking at a vector?");
+ const VectorType *PTy = cast<VectorType>(V->getType());
+ unsigned Width = PTy->getNumElements();
+ if (EltNo >= Width) // Out of range access.
+ return UndefValue::get(PTy->getElementType());
+
+ if (isa<UndefValue>(V))
+ return UndefValue::get(PTy->getElementType());
+ else if (isa<ConstantAggregateZero>(V))
+ return Constant::getNullValue(PTy->getElementType());
+ else if (ConstantVector *CP = dyn_cast<ConstantVector>(V))
+ return CP->getOperand(EltNo);
+ else if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
+ // If this is an insert to a variable element, we don't know what it is.
+ if (!isa<ConstantInt>(III->getOperand(2)))
+ return 0;
+ unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
+
+ // If this is an insert to the element we are looking for, return the
+ // inserted value.
+ if (EltNo == IIElt)
+ return III->getOperand(1);
+
+ // Otherwise, the insertelement doesn't modify the value, recurse on its
+ // vector input.
+ return FindScalarElement(III->getOperand(0), EltNo);
+ } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
+ unsigned LHSWidth =
+ cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+ unsigned InEl = getShuffleMask(SVI)[EltNo];
+ if (InEl < LHSWidth)
+ return FindScalarElement(SVI->getOperand(0), InEl);
+ else if (InEl < LHSWidth*2)
+ return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
+ else
+ return UndefValue::get(PTy->getElementType());
+ }
+
+ // Otherwise, we don't know.
+ return 0;
+}
+
+Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
+ // If vector val is undef, replace extract with scalar undef.
+ if (isa<UndefValue>(EI.getOperand(0)))
+ return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+
+ // If vector val is constant 0, replace extract with scalar 0.
+ if (isa<ConstantAggregateZero>(EI.getOperand(0)))
+ return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));
+
+ if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) {
+ // If vector val is constant with all elements the same, replace EI with
+ // that element. When the elements are not identical, we cannot replace yet
+ // (we do that below, but only when the index is constant).
+ Constant *op0 = C->getOperand(0);
+ for (unsigned i = 1; i < C->getNumOperands(); ++i)
+ if (C->getOperand(i) != op0) {
+ op0 = 0;
+ break;
+ }
+ if (op0)
+ return ReplaceInstUsesWith(EI, op0);
+ }
+
+ // If extracting a specified index from the vector, see if we can recursively
+ // find a previously computed scalar that was inserted into the vector.
+ if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
+ unsigned IndexVal = IdxC->getZExtValue();
+ unsigned VectorWidth =
+ cast<VectorType>(EI.getOperand(0)->getType())->getNumElements();
+
+ // If this is extracting an invalid index, turn this into undef, to avoid
+ // crashing the code below.
+ if (IndexVal >= VectorWidth)
+ return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+
+ // This instruction only demands the single element from the input vector.
+ // If the input vector has a single use, simplify it based on this use
+ // property.
+ if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
+ APInt UndefElts(VectorWidth, 0);
+ APInt DemandedMask(VectorWidth, 1 << IndexVal);
+ if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
+ DemandedMask, UndefElts)) {
+ EI.setOperand(0, V);
+ return &EI;
+ }
+ }
+
+ if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
+ return ReplaceInstUsesWith(EI, Elt);
+
+ // If the this extractelement is directly using a bitcast from a vector of
+ // the same number of elements, see if we can find the source element from
+ // it. In this case, we will end up needing to bitcast the scalars.
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
+ if (const VectorType *VT =
+ dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
+ if (VT->getNumElements() == VectorWidth)
+ if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
+ return new BitCastInst(Elt, EI.getType());
+ }
+ }
+
+ if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
+ if (I->hasOneUse()) {
+ // Push extractelement into predecessor operation if legal and
+ // profitable to do so
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ bool isConstantElt = isa<ConstantInt>(EI.getOperand(1));
+ if (CheapToScalarize(BO, isConstantElt)) {
+ ExtractElementInst *newEI0 =
+ new ExtractElementInst(BO->getOperand(0), EI.getOperand(1),
+ EI.getName()+".lhs");
+ ExtractElementInst *newEI1 =
+ new ExtractElementInst(BO->getOperand(1), EI.getOperand(1),
+ EI.getName()+".rhs");
+ InsertNewInstBefore(newEI0, EI);
+ InsertNewInstBefore(newEI1, EI);
+ return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
+ }
+ } else if (isa<LoadInst>(I)) {
+ unsigned AS =
+ cast<PointerType>(I->getOperand(0)->getType())->getAddressSpace();
+ Value *Ptr = InsertBitCastBefore(I->getOperand(0),
+ PointerType::get(EI.getType(), AS),EI);
+ GetElementPtrInst *GEP =
+ GetElementPtrInst::Create(Ptr, EI.getOperand(1), I->getName()+".gep");
+ InsertNewInstBefore(GEP, EI);
+ return new LoadInst(GEP);
+ }
+ }
+ if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
+ // Extracting the inserted element?
+ if (IE->getOperand(2) == EI.getOperand(1))
+ return ReplaceInstUsesWith(EI, IE->getOperand(1));
+ // If the inserted and extracted elements are constants, they must not
+ // be the same value, extract from the pre-inserted value instead.
+ if (isa<Constant>(IE->getOperand(2)) &&
+ isa<Constant>(EI.getOperand(1))) {
+ AddUsesToWorkList(EI);
+ EI.setOperand(0, IE->getOperand(0));
+ return &EI;
+ }
+ } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) {
+ // If this is extracting an element from a shufflevector, figure out where
+ // it came from and extract from the appropriate input element instead.
+ if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
+ unsigned SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()];
+ Value *Src;
+ unsigned LHSWidth =
+ cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+
+ if (SrcIdx < LHSWidth)
+ Src = SVI->getOperand(0);
+ else if (SrcIdx < LHSWidth*2) {
+ SrcIdx -= LHSWidth;
+ Src = SVI->getOperand(1);
+ } else {
+ return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+ }
+ return new ExtractElementInst(Src, SrcIdx);
+ }
+ }
+ }
+ return 0;
+}
+
+/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
+/// elements from either LHS or RHS, return the shuffle mask and true.
+/// Otherwise, return false.
+static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
+ std::vector<Constant*> &Mask) {
+ assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
+ "Invalid CollectSingleShuffleElements");
+ unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+
+ if (isa<UndefValue>(V)) {
+ Mask.assign(NumElts, UndefValue::get(Type::Int32Ty));
+ return true;
+ } else if (V == LHS) {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantInt::get(Type::Int32Ty, i));
+ return true;
+ } else if (V == RHS) {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantInt::get(Type::Int32Ty, i+NumElts));
+ return true;
+ } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
+ // If this is an insert of an extract from some other vector, include it.
+ Value *VecOp = IEI->getOperand(0);
+ Value *ScalarOp = IEI->getOperand(1);
+ Value *IdxOp = IEI->getOperand(2);
+
+ if (!isa<ConstantInt>(IdxOp))
+ return false;
+ unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+
+ if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
+ // Okay, we can handle this if the vector we are insertinting into is
+ // transitively ok.
+ if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+ // If so, update the mask to reflect the inserted undef.
+ Mask[InsertedIdx] = UndefValue::get(Type::Int32Ty);
+ return true;
+ }
+ } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
+ if (isa<ConstantInt>(EI->getOperand(1)) &&
+ EI->getOperand(0)->getType() == V->getType()) {
+ unsigned ExtractedIdx =
+ cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+
+ // This must be extracting from either LHS or RHS.
+ if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
+ // Okay, we can handle this if the vector we are insertinting into is
+ // transitively ok.
+ if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+ // If so, update the mask to reflect the inserted value.
+ if (EI->getOperand(0) == LHS) {
+ Mask[InsertedIdx % NumElts] =
+ ConstantInt::get(Type::Int32Ty, ExtractedIdx);
+ } else {
+ assert(EI->getOperand(0) == RHS);
+ Mask[InsertedIdx % NumElts] =
+ ConstantInt::get(Type::Int32Ty, ExtractedIdx+NumElts);
+
+ }
+ return true;
+ }
+ }
+ }
+ }
+ }
+ // TODO: Handle shufflevector here!
+
+ return false;
+}
+
+/// CollectShuffleElements - We are building a shuffle of V, using RHS as the
+/// RHS of the shuffle instruction, if it is not null. Return a shuffle mask
+/// that computes V and the LHS value of the shuffle.
+static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
+ Value *&RHS) {
+ assert(isa<VectorType>(V->getType()) &&
+ (RHS == 0 || V->getType() == RHS->getType()) &&
+ "Invalid shuffle!");
+ unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+
+ if (isa<UndefValue>(V)) {
+ Mask.assign(NumElts, UndefValue::get(Type::Int32Ty));
+ return V;
+ } else if (isa<ConstantAggregateZero>(V)) {
+ Mask.assign(NumElts, ConstantInt::get(Type::Int32Ty, 0));
+ return V;
+ } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
+ // If this is an insert of an extract from some other vector, include it.
+ Value *VecOp = IEI->getOperand(0);
+ Value *ScalarOp = IEI->getOperand(1);
+ Value *IdxOp = IEI->getOperand(2);
+
+ if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
+ if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
+ EI->getOperand(0)->getType() == V->getType()) {
+ unsigned ExtractedIdx =
+ cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+ unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+
+ // Either the extracted from or inserted into vector must be RHSVec,
+ // otherwise we'd end up with a shuffle of three inputs.
+ if (EI->getOperand(0) == RHS || RHS == 0) {
+ RHS = EI->getOperand(0);
+ Value *V = CollectShuffleElements(VecOp, Mask, RHS);
+ Mask[InsertedIdx % NumElts] =
+ ConstantInt::get(Type::Int32Ty, NumElts+ExtractedIdx);
+ return V;
+ }
+
+ if (VecOp == RHS) {
+ Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
+ // Everything but the extracted element is replaced with the RHS.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (i != InsertedIdx)
+ Mask[i] = ConstantInt::get(Type::Int32Ty, NumElts+i);
+ }
+ return V;
+ }
+
+ // If this insertelement is a chain that comes from exactly these two
+ // vectors, return the vector and the effective shuffle.
+ if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask))
+ return EI->getOperand(0);
+
+ }
+ }
+ }
+ // TODO: Handle shufflevector here!
+
+ // Otherwise, can't do anything fancy. Return an identity vector.
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantInt::get(Type::Int32Ty, i));
+ return V;
+}
+
+Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
+ Value *VecOp = IE.getOperand(0);
+ Value *ScalarOp = IE.getOperand(1);
+ Value *IdxOp = IE.getOperand(2);
+
+ // Inserting an undef or into an undefined place, remove this.
+ if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
+ ReplaceInstUsesWith(IE, VecOp);
+
+ // If the inserted element was extracted from some other vector, and if the
+ // indexes are constant, try to turn this into a shufflevector operation.
+ if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
+ if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
+ EI->getOperand(0)->getType() == IE.getType()) {
+ unsigned NumVectorElts = IE.getType()->getNumElements();
+ unsigned ExtractedIdx =
+ cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+ unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+
+ if (ExtractedIdx >= NumVectorElts) // Out of range extract.
+ return ReplaceInstUsesWith(IE, VecOp);
+
+ if (InsertedIdx >= NumVectorElts) // Out of range insert.
+ return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
+
+ // If we are extracting a value from a vector, then inserting it right
+ // back into the same place, just use the input vector.
+ if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
+ return ReplaceInstUsesWith(IE, VecOp);
+
+ // We could theoretically do this for ANY input. However, doing so could
+ // turn chains of insertelement instructions into a chain of shufflevector
+ // instructions, and right now we do not merge shufflevectors. As such,
+ // only do this in a situation where it is clear that there is benefit.
+ if (isa<UndefValue>(VecOp) || isa<ConstantAggregateZero>(VecOp)) {
+ // Turn this into shuffle(EIOp0, VecOp, Mask). The result has all of
+ // the values of VecOp, except then one read from EIOp0.
+ // Build a new shuffle mask.
+ std::vector<Constant*> Mask;
+ if (isa<UndefValue>(VecOp))
+ Mask.assign(NumVectorElts, UndefValue::get(Type::Int32Ty));
+ else {
+ assert(isa<ConstantAggregateZero>(VecOp) && "Unknown thing");
+ Mask.assign(NumVectorElts, ConstantInt::get(Type::Int32Ty,
+ NumVectorElts));
+ }
+ Mask[InsertedIdx] = ConstantInt::get(Type::Int32Ty, ExtractedIdx);
+ return new ShuffleVectorInst(EI->getOperand(0), VecOp,
+ ConstantVector::get(Mask));
+ }
+
+ // If this insertelement isn't used by some other insertelement, turn it
+ // (and any insertelements it points to), into one big shuffle.
+ if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
+ std::vector<Constant*> Mask;
+ Value *RHS = 0;
+ Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
+ if (RHS == 0) RHS = UndefValue::get(LHS->getType());
+ // We now have a shuffle of LHS, RHS, Mask.
+ return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Mask));
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
+ Value *LHS = SVI.getOperand(0);
+ Value *RHS = SVI.getOperand(1);
+ std::vector<unsigned> Mask = getShuffleMask(&SVI);
+
+ bool MadeChange = false;
+
+ // Undefined shuffle mask -> undefined value.
+ if (isa<UndefValue>(SVI.getOperand(2)))
+ return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
+
+ unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
+
+ if (VWidth != cast<VectorType>(LHS->getType())->getNumElements())
+ return 0;
+
+ APInt UndefElts(VWidth, 0);
+ APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ if (SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
+ LHS = SVI.getOperand(0);
+ RHS = SVI.getOperand(1);
+ MadeChange = true;
+ }
+
+ // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask')
+ // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
+ if (LHS == RHS || isa<UndefValue>(LHS)) {
+ if (isa<UndefValue>(LHS) && LHS == RHS) {
+ // shuffle(undef,undef,mask) -> undef.
+ return ReplaceInstUsesWith(SVI, LHS);
+ }
+
+ // Remap any references to RHS to use LHS.
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+ if (Mask[i] >= 2*e)
+ Elts.push_back(UndefValue::get(Type::Int32Ty));
+ else {
+ if ((Mask[i] >= e && isa<UndefValue>(RHS)) ||
+ (Mask[i] < e && isa<UndefValue>(LHS))) {
+ Mask[i] = 2*e; // Turn into undef.
+ Elts.push_back(UndefValue::get(Type::Int32Ty));
+ } else {
+ Mask[i] = Mask[i] % e; // Force to LHS.
+ Elts.push_back(ConstantInt::get(Type::Int32Ty, Mask[i]));
+ }
+ }
+ }
+ SVI.setOperand(0, SVI.getOperand(1));
+ SVI.setOperand(1, UndefValue::get(RHS->getType()));
+ SVI.setOperand(2, ConstantVector::get(Elts));
+ LHS = SVI.getOperand(0);
+ RHS = SVI.getOperand(1);
+ MadeChange = true;
+ }
+
+ // Analyze the shuffle, are the LHS or RHS and identity shuffles?
+ bool isLHSID = true, isRHSID = true;
+
+ for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+ if (Mask[i] >= e*2) continue; // Ignore undef values.
+ // Is this an identity shuffle of the LHS value?
+ isLHSID &= (Mask[i] == i);
+
+ // Is this an identity shuffle of the RHS value?
+ isRHSID &= (Mask[i]-e == i);
+ }
+
+ // Eliminate identity shuffles.
+ if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
+ if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
+
+ // If the LHS is a shufflevector itself, see if we can combine it with this
+ // one without producing an unusual shuffle. Here we are really conservative:
+ // we are absolutely afraid of producing a shuffle mask not in the input
+ // program, because the code gen may not be smart enough to turn a merged
+ // shuffle into two specific shuffles: it may produce worse code. As such,
+ // we only merge two shuffles if the result is one of the two input shuffle
+ // masks. In this case, merging the shuffles just removes one instruction,
+ // which we know is safe. This is good for things like turning:
+ // (splat(splat)) -> splat.
+ if (ShuffleVectorInst *LHSSVI = dyn_cast<ShuffleVectorInst>(LHS)) {
+ if (isa<UndefValue>(RHS)) {
+ std::vector<unsigned> LHSMask = getShuffleMask(LHSSVI);
+
+ std::vector<unsigned> NewMask;
+ for (unsigned i = 0, e = Mask.size(); i != e; ++i)
+ if (Mask[i] >= 2*e)
+ NewMask.push_back(2*e);
+ else
+ NewMask.push_back(LHSMask[Mask[i]]);
+
+ // If the result mask is equal to the src shuffle or this shuffle mask, do
+ // the replacement.
+ if (NewMask == LHSMask || NewMask == Mask) {
+ unsigned LHSInNElts =
+ cast<VectorType>(LHSSVI->getOperand(0)->getType())->getNumElements();
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
+ if (NewMask[i] >= LHSInNElts*2) {
+ Elts.push_back(UndefValue::get(Type::Int32Ty));
+ } else {
+ Elts.push_back(ConstantInt::get(Type::Int32Ty, NewMask[i]));
+ }
+ }
+ return new ShuffleVectorInst(LHSSVI->getOperand(0),
+ LHSSVI->getOperand(1),
+ ConstantVector::get(Elts));
+ }
+ }
+ }
+
+ return MadeChange ? &SVI : 0;
+}
+
+
+
+
+/// TryToSinkInstruction - Try to move the specified instruction from its
+/// current block into the beginning of DestBlock, which can only happen if it's
+/// safe to move the instruction past all of the instructions between it and the
+/// end of its block.
+static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
+ assert(I->hasOneUse() && "Invariants didn't hold!");
+
+ // Cannot move control-flow-involving, volatile loads, vaarg, etc.
+ if (isa<PHINode>(I) || I->mayHaveSideEffects() || isa<TerminatorInst>(I))
+ return false;
+
+ // Do not sink alloca instructions out of the entry block.
+ if (isa<AllocaInst>(I) && I->getParent() ==
+ &DestBlock->getParent()->getEntryBlock())
+ return false;
+
+ // We can only sink load instructions if there is nothing between the load and
+ // the end of block that could change the value.
+ if (I->mayReadFromMemory()) {
+ for (BasicBlock::iterator Scan = I, E = I->getParent()->end();
+ Scan != E; ++Scan)
+ if (Scan->mayWriteToMemory())
+ return false;
+ }
+
+ BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI();
+
+ CopyPrecedingStopPoint(I, InsertPos);
+ I->moveBefore(InsertPos);
+ ++NumSunkInst;
+ return true;
+}
+
+
+/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding
+/// all reachable code to the worklist.
+///
+/// This has a couple of tricks to make the code faster and more powerful. In
+/// particular, we constant fold and DCE instructions as we go, to avoid adding
+/// them to the worklist (this significantly speeds up instcombine on code where
+/// many instructions are dead or constant). Additionally, if we find a branch
+/// whose condition is a known constant, we only visit the reachable successors.
+///
+static void AddReachableCodeToWorklist(BasicBlock *BB,
+ SmallPtrSet<BasicBlock*, 64> &Visited,
+ InstCombiner &IC,
+ const TargetData *TD) {
+ SmallVector<BasicBlock*, 256> Worklist;
+ Worklist.push_back(BB);
+
+ while (!Worklist.empty()) {
+ BB = Worklist.back();
+ Worklist.pop_back();
+
+ // We have now visited this block! If we've already been here, ignore it.
+ if (!Visited.insert(BB)) continue;
+
+ DbgInfoIntrinsic *DBI_Prev = NULL;
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
+ Instruction *Inst = BBI++;
+
+ // DCE instruction if trivially dead.
+ if (isInstructionTriviallyDead(Inst)) {
+ ++NumDeadInst;
+ DOUT << "IC: DCE: " << *Inst;
+ Inst->eraseFromParent();
+ continue;
+ }
+
+ // ConstantProp instruction if trivially constant.
+ if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
+ DOUT << "IC: ConstFold to: " << *C << " from: " << *Inst;
+ Inst->replaceAllUsesWith(C);
+ ++NumConstProp;
+ Inst->eraseFromParent();
+ continue;
+ }
+
+ // If there are two consecutive llvm.dbg.stoppoint calls then
+ // it is likely that the optimizer deleted code in between these
+ // two intrinsics.
+ DbgInfoIntrinsic *DBI_Next = dyn_cast<DbgInfoIntrinsic>(Inst);
+ if (DBI_Next) {
+ if (DBI_Prev
+ && DBI_Prev->getIntrinsicID() == llvm::Intrinsic::dbg_stoppoint
+ && DBI_Next->getIntrinsicID() == llvm::Intrinsic::dbg_stoppoint) {
+ IC.RemoveFromWorkList(DBI_Prev);
+ DBI_Prev->eraseFromParent();
+ }
+ DBI_Prev = DBI_Next;
+ } else {
+ DBI_Prev = 0;
+ }
+
+ IC.AddToWorkList(Inst);
+ }
+
+ // Recursively visit successors. If this is a branch or switch on a
+ // constant, only visit the reachable successor.
+ TerminatorInst *TI = BB->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
+ bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
+ BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
+ Worklist.push_back(ReachableBB);
+ continue;
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
+ // See if this is an explicit destination.
+ for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
+ if (SI->getCaseValue(i) == Cond) {
+ BasicBlock *ReachableBB = SI->getSuccessor(i);
+ Worklist.push_back(ReachableBB);
+ continue;
+ }
+
+ // Otherwise it is the default destination.
+ Worklist.push_back(SI->getSuccessor(0));
+ continue;
+ }
+ }
+
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ Worklist.push_back(TI->getSuccessor(i));
+ }
+}
+
+bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
+ bool Changed = false;
+ TD = &getAnalysis<TargetData>();
+
+ DEBUG(DOUT << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
+ << F.getNameStr() << "\n");
+
+ {
+ // Do a depth-first traversal of the function, populate the worklist with
+ // the reachable instructions. Ignore blocks that are not reachable. Keep
+ // track of which blocks we visit.
+ SmallPtrSet<BasicBlock*, 64> Visited;
+ AddReachableCodeToWorklist(F.begin(), Visited, *this, TD);
+
+ // Do a quick scan over the function. If we find any blocks that are
+ // unreachable, remove any instructions inside of them. This prevents
+ // the instcombine code from having to deal with some bad special cases.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (!Visited.count(BB)) {
+ Instruction *Term = BB->getTerminator();
+ while (Term != BB->begin()) { // Remove instrs bottom-up
+ BasicBlock::iterator I = Term; --I;
+
+ DOUT << "IC: DCE: " << *I;
+ // A debug intrinsic shouldn't force another iteration if we weren't
+ // going to do one without it.
+ if (!isa<DbgInfoIntrinsic>(I)) {
+ ++NumDeadInst;
+ Changed = true;
+ }
+ if (!I->use_empty())
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->eraseFromParent();
+ }
+ }
+ }
+
+ while (!Worklist.empty()) {
+ Instruction *I = RemoveOneFromWorkList();
+ if (I == 0) continue; // skip null values.
+
+ // Check to see if we can DCE the instruction.
+ if (isInstructionTriviallyDead(I)) {
+ // Add operands to the worklist.
+ if (I->getNumOperands() < 4)
+ AddUsesToWorkList(*I);
+ ++NumDeadInst;
+
+ DOUT << "IC: DCE: " << *I;
+
+ I->eraseFromParent();
+ RemoveFromWorkList(I);
+ Changed = true;
+ continue;
+ }
+
+ // Instruction isn't dead, see if we can constant propagate it.
+ if (Constant *C = ConstantFoldInstruction(I, TD)) {
+ DOUT << "IC: ConstFold to: " << *C << " from: " << *I;
+
+ // Add operands to the worklist.
+ AddUsesToWorkList(*I);
+ ReplaceInstUsesWith(*I, C);
+
+ ++NumConstProp;
+ I->eraseFromParent();
+ RemoveFromWorkList(I);
+ Changed = true;
+ continue;
+ }
+
+ if (TD &&
+ (I->getType()->getTypeID() == Type::VoidTyID ||
+ I->isTrapping())) {
+ // See if we can constant fold its operands.
+ for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(i))
+ if (Constant *NewC = ConstantFoldConstantExpression(CE, TD))
+ if (NewC != CE) {
+ i->set(NewC);
+ Changed = true;
+ }
+ }
+
+ // See if we can trivially sink this instruction to a successor basic block.
+ if (I->hasOneUse()) {
+ BasicBlock *BB = I->getParent();
+ BasicBlock *UserParent = cast<Instruction>(I->use_back())->getParent();
+ if (UserParent != BB) {
+ bool UserIsSuccessor = false;
+ // See if the user is one of our successors.
+ for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
+ if (*SI == UserParent) {
+ UserIsSuccessor = true;
+ break;
+ }
+
+ // If the user is one of our immediate successors, and if that successor
+ // only has us as a predecessors (we'd have to split the critical edge
+ // otherwise), we can keep going.
+ if (UserIsSuccessor && !isa<PHINode>(I->use_back()) &&
+ next(pred_begin(UserParent)) == pred_end(UserParent))
+ // Okay, the CFG is simple enough, try to sink this instruction.
+ Changed |= TryToSinkInstruction(I, UserParent);
+ }
+ }
+
+ // Now that we have an instruction, try combining it to simplify it...
+#ifndef NDEBUG
+ std::string OrigI;
+#endif
+ DEBUG(std::ostringstream SS; I->print(SS); OrigI = SS.str(););
+ if (Instruction *Result = visit(*I)) {
+ ++NumCombined;
+ // Should we replace the old instruction with a new one?
+ if (Result != I) {
+ DOUT << "IC: Old = " << *I
+ << " New = " << *Result;
+
+ // Everything uses the new instruction now.
+ I->replaceAllUsesWith(Result);
+
+ // Push the new instruction and any users onto the worklist.
+ AddToWorkList(Result);
+ AddUsersToWorkList(*Result);
+
+ // Move the name to the new instruction first.
+ Result->takeName(I);
+
+ // Insert the new instruction into the basic block...
+ BasicBlock *InstParent = I->getParent();
+ BasicBlock::iterator InsertPos = I;
+
+ if (!isa<PHINode>(Result)) // If combining a PHI, don't insert
+ while (isa<PHINode>(InsertPos)) // middle of a block of PHIs.
+ ++InsertPos;
+
+ InstParent->getInstList().insert(InsertPos, Result);
+
+ // Make sure that we reprocess all operands now that we reduced their
+ // use counts.
+ AddUsesToWorkList(*I);
+
+ // Instructions can end up on the worklist more than once. Make sure
+ // we do not process an instruction that has been deleted.
+ RemoveFromWorkList(I);
+
+ // Erase the old instruction.
+ InstParent->getInstList().erase(I);
+ } else {
+#ifndef NDEBUG
+ DOUT << "IC: Mod = " << OrigI
+ << " New = " << *I;
+#endif
+
+ // If the instruction was modified, it's possible that it is now dead.
+ // if so, remove it.
+ if (isInstructionTriviallyDead(I)) {
+ // Make sure we process all operands now that we are reducing their
+ // use counts.
+ AddUsesToWorkList(*I);
+
+ // Instructions may end up in the worklist more than once. Erase all
+ // occurrences of this instruction.
+ RemoveFromWorkList(I);
+ I->eraseFromParent();
+ } else {
+ AddToWorkList(I);
+ AddUsersToWorkList(*I);
+ }
+ }
+ Changed = true;
+ }
+ }
+
+ assert(WorklistMap.empty() && "Worklist empty, but map not?");
+
+ // Do an explicit clear, this shrinks the map if needed.
+ WorklistMap.clear();
+ return Changed;
+}
+
+
+bool InstCombiner::runOnFunction(Function &F) {
+ MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+
+ bool EverMadeChange = false;
+
+ // Iterate while there is work to do.
+ unsigned Iteration = 0;
+ while (DoOneIteration(F, Iteration++))
+ EverMadeChange = true;
+ return EverMadeChange;
+}
+
+FunctionPass *llvm::createInstructionCombiningPass() {
+ return new InstCombiner();
+}
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
new file mode 100644
index 0000000..c0ca2df
--- /dev/null
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -0,0 +1,954 @@
+//===- JumpThreading.cpp - Thread control through conditional blocks ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Jump Threading pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jump-threading"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+using namespace llvm;
+
+STATISTIC(NumThreads, "Number of jumps threaded");
+STATISTIC(NumFolds, "Number of terminators folded");
+
+static cl::opt<unsigned>
+Threshold("jump-threading-threshold",
+ cl::desc("Max block size to duplicate for jump threading"),
+ cl::init(6), cl::Hidden);
+
+namespace {
+ /// This pass performs 'jump threading', which looks at blocks that have
+ /// multiple predecessors and multiple successors. If one or more of the
+ /// predecessors of the block can be proven to always jump to one of the
+ /// successors, we forward the edge from the predecessor to the successor by
+ /// duplicating the contents of this block.
+ ///
+ /// An example of when this can occur is code like this:
+ ///
+ /// if () { ...
+ /// X = 4;
+ /// }
+ /// if (X < 3) {
+ ///
+ /// In this case, the unconditional branch at the end of the first if can be
+ /// revectored to the false side of the second if.
+ ///
+ class VISIBILITY_HIDDEN JumpThreading : public FunctionPass {
+ TargetData *TD;
+#ifdef NDEBUG
+ SmallPtrSet<BasicBlock*, 16> LoopHeaders;
+#else
+ SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders;
+#endif
+ public:
+ static char ID; // Pass identification
+ JumpThreading() : FunctionPass(&ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetData>();
+ }
+
+ bool runOnFunction(Function &F);
+ void FindLoopHeaders(Function &F);
+
+ bool ProcessBlock(BasicBlock *BB);
+ bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB,
+ unsigned JumpThreadCost);
+ BasicBlock *FactorCommonPHIPreds(PHINode *PN, Constant *CstVal);
+ bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
+ bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
+
+ bool ProcessJumpOnPHI(PHINode *PN);
+ bool ProcessBranchOnLogical(Value *V, BasicBlock *BB, bool isAnd);
+ bool ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB);
+
+ bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
+ };
+}
+
+char JumpThreading::ID = 0;
+static RegisterPass<JumpThreading>
+X("jump-threading", "Jump Threading");
+
+// Public interface to the Jump Threading pass
+FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
+
+/// runOnFunction - Top level algorithm.
+///
+bool JumpThreading::runOnFunction(Function &F) {
+ DOUT << "Jump threading on function '" << F.getNameStart() << "'\n";
+ TD = &getAnalysis<TargetData>();
+
+ FindLoopHeaders(F);
+
+ bool AnotherIteration = true, EverChanged = false;
+ while (AnotherIteration) {
+ AnotherIteration = false;
+ bool Changed = false;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
+ BasicBlock *BB = I;
+ while (ProcessBlock(BB))
+ Changed = true;
+
+ ++I;
+
+ // If the block is trivially dead, zap it. This eliminates the successor
+ // edges which simplifies the CFG.
+ if (pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock()) {
+ DOUT << " JT: Deleting dead block '" << BB->getNameStart()
+ << "' with terminator: " << *BB->getTerminator();
+ LoopHeaders.erase(BB);
+ DeleteDeadBlock(BB);
+ Changed = true;
+ }
+ }
+ AnotherIteration = Changed;
+ EverChanged |= Changed;
+ }
+
+ LoopHeaders.clear();
+ return EverChanged;
+}
+
+/// FindLoopHeaders - We do not want jump threading to turn proper loop
+/// structures into irreducible loops. Doing this breaks up the loop nesting
+/// hierarchy and pessimizes later transformations. To prevent this from
+/// happening, we first have to find the loop headers. Here we approximate this
+/// by finding targets of backedges in the CFG.
+///
+/// Note that there definitely are cases when we want to allow threading of
+/// edges across a loop header. For example, threading a jump from outside the
+/// loop (the preheader) to an exit block of the loop is definitely profitable.
+/// It is also almost always profitable to thread backedges from within the loop
+/// to exit blocks, and is often profitable to thread backedges to other blocks
+/// within the loop (forming a nested loop). This simple analysis is not rich
+/// enough to track all of these properties and keep it up-to-date as the CFG
+/// mutates, so we don't allow any of these transformations.
+///
+void JumpThreading::FindLoopHeaders(Function &F) {
+ SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
+ FindFunctionBackedges(F, Edges);
+
+ for (unsigned i = 0, e = Edges.size(); i != e; ++i)
+ LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
+}
+
+
+/// FactorCommonPHIPreds - If there are multiple preds with the same incoming
+/// value for the PHI, factor them together so we get one block to thread for
+/// the whole group.
+/// This is important for things like "phi i1 [true, true, false, true, x]"
+/// where we only need to clone the block for the true blocks once.
+///
+BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Constant *CstVal) {
+ SmallVector<BasicBlock*, 16> CommonPreds;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == CstVal)
+ CommonPreds.push_back(PN->getIncomingBlock(i));
+
+ if (CommonPreds.size() == 1)
+ return CommonPreds[0];
+
+ DOUT << " Factoring out " << CommonPreds.size()
+ << " common predecessors.\n";
+ return SplitBlockPredecessors(PN->getParent(),
+ &CommonPreds[0], CommonPreds.size(),
+ ".thr_comm", this);
+}
+
+
+/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to
+/// thread across it.
+static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
+ /// Ignore PHI nodes, these will be flattened when duplication happens.
+ BasicBlock::const_iterator I = BB->getFirstNonPHI();
+
+ // Sum up the cost of each instruction until we get to the terminator. Don't
+ // include the terminator because the copy won't include it.
+ unsigned Size = 0;
+ for (; !isa<TerminatorInst>(I); ++I) {
+ // Debugger intrinsics don't incur code size.
+ if (isa<DbgInfoIntrinsic>(I)) continue;
+
+ // If this is a pointer->pointer bitcast, it is free.
+ if (isa<BitCastInst>(I) && isa<PointerType>(I->getType()))
+ continue;
+
+ // All other instructions count for at least one unit.
+ ++Size;
+
+ // Calls are more expensive. If they are non-intrinsic calls, we model them
+ // as having cost of 4. If they are a non-vector intrinsic, we model them
+ // as having cost of 2 total, and if they are a vector intrinsic, we model
+ // them as having cost 1.
+ if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (!isa<IntrinsicInst>(CI))
+ Size += 3;
+ else if (isa<VectorType>(CI->getType()))
+ Size += 1;
+ }
+ }
+
+ // Threading through a switch statement is particularly profitable. If this
+ // block ends in a switch, decrease its cost to make it more likely to happen.
+ if (isa<SwitchInst>(I))
+ Size = Size > 6 ? Size-6 : 0;
+
+ return Size;
+}
+
+/// ProcessBlock - If there are any predecessors whose control can be threaded
+/// through to a successor, transform them now.
+bool JumpThreading::ProcessBlock(BasicBlock *BB) {
+ // If this block has a single predecessor, and if that pred has a single
+ // successor, merge the blocks. This encourages recursive jump threading
+ // because now the condition in this block can be threaded through
+ // predecessors of our predecessor block.
+ if (BasicBlock *SinglePred = BB->getSinglePredecessor())
+ if (SinglePred->getTerminator()->getNumSuccessors() == 1 &&
+ SinglePred != BB) {
+ // If SinglePred was a loop header, BB becomes one.
+ if (LoopHeaders.erase(SinglePred))
+ LoopHeaders.insert(BB);
+
+ // Remember if SinglePred was the entry block of the function. If so, we
+ // will need to move BB back to the entry position.
+ bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+ MergeBasicBlockIntoOnlyPred(BB);
+
+ if (isEntry && BB != &BB->getParent()->getEntryBlock())
+ BB->moveBefore(&BB->getParent()->getEntryBlock());
+ return true;
+ }
+
+ // See if this block ends with a branch or switch. If so, see if the
+ // condition is a phi node. If so, and if an entry of the phi node is a
+ // constant, we can thread the block.
+ Value *Condition;
+ if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ // Can't thread an unconditional jump.
+ if (BI->isUnconditional()) return false;
+ Condition = BI->getCondition();
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
+ Condition = SI->getCondition();
+ else
+ return false; // Must be an invoke.
+
+ // If the terminator of this block is branching on a constant, simplify the
+ // terminator to an unconditional branch. This can occur due to threading in
+ // other blocks.
+ if (isa<ConstantInt>(Condition)) {
+ DOUT << " In block '" << BB->getNameStart()
+ << "' folding terminator: " << *BB->getTerminator();
+ ++NumFolds;
+ ConstantFoldTerminator(BB);
+ return true;
+ }
+
+ // If the terminator is branching on an undef, we can pick any of the
+ // successors to branch to. Since this is arbitrary, we pick the successor
+ // with the fewest predecessors. This should reduce the in-degree of the
+ // others.
+ if (isa<UndefValue>(Condition)) {
+ TerminatorInst *BBTerm = BB->getTerminator();
+ unsigned MinSucc = 0;
+ BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
+ // Compute the successor with the minimum number of predecessors.
+ unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
+ for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
+ TestBB = BBTerm->getSuccessor(i);
+ unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
+ if (NumPreds < MinNumPreds)
+ MinSucc = i;
+ }
+
+ // Fold the branch/switch.
+ for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
+ if (i == MinSucc) continue;
+ BBTerm->getSuccessor(i)->removePredecessor(BB);
+ }
+
+ DOUT << " In block '" << BB->getNameStart()
+ << "' folding undef terminator: " << *BBTerm;
+ BranchInst::Create(BBTerm->getSuccessor(MinSucc), BBTerm);
+ BBTerm->eraseFromParent();
+ return true;
+ }
+
+ Instruction *CondInst = dyn_cast<Instruction>(Condition);
+
+ // If the condition is an instruction defined in another block, see if a
+ // predecessor has the same condition:
+ // br COND, BBX, BBY
+ // BBX:
+ // br COND, BBZ, BBW
+ if (!Condition->hasOneUse() && // Multiple uses.
+ (CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition.
+ pred_iterator PI = pred_begin(BB), E = pred_end(BB);
+ if (isa<BranchInst>(BB->getTerminator())) {
+ for (; PI != E; ++PI)
+ if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+ if (PBI->isConditional() && PBI->getCondition() == Condition &&
+ ProcessBranchOnDuplicateCond(*PI, BB))
+ return true;
+ } else {
+ assert(isa<SwitchInst>(BB->getTerminator()) && "Unknown jump terminator");
+ for (; PI != E; ++PI)
+ if (SwitchInst *PSI = dyn_cast<SwitchInst>((*PI)->getTerminator()))
+ if (PSI->getCondition() == Condition &&
+ ProcessSwitchOnDuplicateCond(*PI, BB))
+ return true;
+ }
+ }
+
+ // If there is only a single predecessor of this block, nothing to fold.
+ if (BB->getSinglePredecessor())
+ return false;
+
+ // All the rest of our checks depend on the condition being an instruction.
+ if (CondInst == 0)
+ return false;
+
+ // See if this is a phi node in the current block.
+ if (PHINode *PN = dyn_cast<PHINode>(CondInst))
+ if (PN->getParent() == BB)
+ return ProcessJumpOnPHI(PN);
+
+ // If this is a conditional branch whose condition is and/or of a phi, try to
+ // simplify it.
+ if ((CondInst->getOpcode() == Instruction::And ||
+ CondInst->getOpcode() == Instruction::Or) &&
+ isa<BranchInst>(BB->getTerminator()) &&
+ ProcessBranchOnLogical(CondInst, BB,
+ CondInst->getOpcode() == Instruction::And))
+ return true;
+
+ // If we have "br (phi != 42)" and the phi node has any constant values as
+ // operands, we can thread through this block.
+ if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst))
+ if (isa<PHINode>(CondCmp->getOperand(0)) &&
+ isa<Constant>(CondCmp->getOperand(1)) &&
+ ProcessBranchOnCompare(CondCmp, BB))
+ return true;
+
+ // Check for some cases that are worth simplifying. Right now we want to look
+ // for loads that are used by a switch or by the condition for the branch. If
+ // we see one, check to see if it's partially redundant. If so, insert a PHI
+ // which can then be used to thread the values.
+ //
+ // This is particularly important because reg2mem inserts loads and stores all
+ // over the place, and this blocks jump threading if we don't zap them.
+ Value *SimplifyValue = CondInst;
+ if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
+ if (isa<Constant>(CondCmp->getOperand(1)))
+ SimplifyValue = CondCmp->getOperand(0);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue))
+ if (SimplifyPartiallyRedundantLoad(LI))
+ return true;
+
+ // TODO: If we have: "br (X > 0)" and we have a predecessor where we know
+ // "(X == 4)" thread through this block.
+
+ return false;
+}
+
+/// ProcessBranchOnDuplicateCond - We found a block and a predecessor of that
+/// block that jump on exactly the same condition. This means that we almost
+/// always know the direction of the edge in the DESTBB:
+/// PREDBB:
+/// br COND, DESTBB, BBY
+/// DESTBB:
+/// br COND, BBZ, BBW
+///
+/// If DESTBB has multiple predecessors, we can't just constant fold the branch
+/// in DESTBB, we have to thread over it.
+bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB,
+ BasicBlock *BB) {
+ BranchInst *PredBI = cast<BranchInst>(PredBB->getTerminator());
+
+ // If both successors of PredBB go to DESTBB, we don't know anything. We can
+ // fold the branch to an unconditional one, which allows other recursive
+ // simplifications.
+ bool BranchDir;
+ if (PredBI->getSuccessor(1) != BB)
+ BranchDir = true;
+ else if (PredBI->getSuccessor(0) != BB)
+ BranchDir = false;
+ else {
+ DOUT << " In block '" << PredBB->getNameStart()
+ << "' folding terminator: " << *PredBB->getTerminator();
+ ++NumFolds;
+ ConstantFoldTerminator(PredBB);
+ return true;
+ }
+
+ BranchInst *DestBI = cast<BranchInst>(BB->getTerminator());
+
+ // If the dest block has one predecessor, just fix the branch condition to a
+ // constant and fold it.
+ if (BB->getSinglePredecessor()) {
+ DOUT << " In block '" << BB->getNameStart()
+ << "' folding condition to '" << BranchDir << "': "
+ << *BB->getTerminator();
+ ++NumFolds;
+ DestBI->setCondition(ConstantInt::get(Type::Int1Ty, BranchDir));
+ ConstantFoldTerminator(BB);
+ return true;
+ }
+
+ // Otherwise we need to thread from PredBB to DestBB's successor which
+ // involves code duplication. Check to see if it is worth it.
+ unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
+ if (JumpThreadCost > Threshold) {
+ DOUT << " Not threading BB '" << BB->getNameStart()
+ << "' - Cost is too high: " << JumpThreadCost << "\n";
+ return false;
+ }
+
+ // Next, figure out which successor we are threading to.
+ BasicBlock *SuccBB = DestBI->getSuccessor(!BranchDir);
+
+ // Ok, try to thread it!
+ return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost);
+}
+
+/// ProcessSwitchOnDuplicateCond - We found a block and a predecessor of that
+/// block that switch on exactly the same condition. This means that we almost
+/// always know the direction of the edge in the DESTBB:
+/// PREDBB:
+/// switch COND [... DESTBB, BBY ... ]
+/// DESTBB:
+/// switch COND [... BBZ, BBW ]
+///
+/// Optimizing switches like this is very important, because simplifycfg builds
+/// switches out of repeated 'if' conditions.
+bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB,
+ BasicBlock *DestBB) {
+ // Can't thread edge to self.
+ if (PredBB == DestBB)
+ return false;
+
+
+ SwitchInst *PredSI = cast<SwitchInst>(PredBB->getTerminator());
+ SwitchInst *DestSI = cast<SwitchInst>(DestBB->getTerminator());
+
+ // There are a variety of optimizations that we can potentially do on these
+ // blocks: we order them from most to least preferable.
+
+ // If DESTBB *just* contains the switch, then we can forward edges from PREDBB
+ // directly to their destination. This does not introduce *any* code size
+ // growth. Skip debug info first.
+ BasicBlock::iterator BBI = DestBB->begin();
+ while (isa<DbgInfoIntrinsic>(BBI))
+ BBI++;
+
+ // FIXME: Thread if it just contains a PHI.
+ if (isa<SwitchInst>(BBI)) {
+ bool MadeChange = false;
+ // Ignore the default edge for now.
+ for (unsigned i = 1, e = DestSI->getNumSuccessors(); i != e; ++i) {
+ ConstantInt *DestVal = DestSI->getCaseValue(i);
+ BasicBlock *DestSucc = DestSI->getSuccessor(i);
+
+ // Okay, DestSI has a case for 'DestVal' that goes to 'DestSucc'. See if
+ // PredSI has an explicit case for it. If so, forward. If it is covered
+ // by the default case, we can't update PredSI.
+ unsigned PredCase = PredSI->findCaseValue(DestVal);
+ if (PredCase == 0) continue;
+
+ // If PredSI doesn't go to DestBB on this value, then it won't reach the
+ // case on this condition.
+ if (PredSI->getSuccessor(PredCase) != DestBB &&
+ DestSI->getSuccessor(i) != DestBB)
+ continue;
+
+ // Otherwise, we're safe to make the change. Make sure that the edge from
+ // DestSI to DestSucc is not critical and has no PHI nodes.
+ DOUT << "FORWARDING EDGE " << *DestVal << " FROM: " << *PredSI;
+ DOUT << "THROUGH: " << *DestSI;
+
+ // If the destination has PHI nodes, just split the edge for updating
+ // simplicity.
+ if (isa<PHINode>(DestSucc->begin()) && !DestSucc->getSinglePredecessor()){
+ SplitCriticalEdge(DestSI, i, this);
+ DestSucc = DestSI->getSuccessor(i);
+ }
+ FoldSingleEntryPHINodes(DestSucc);
+ PredSI->setSuccessor(PredCase, DestSucc);
+ MadeChange = true;
+ }
+
+ if (MadeChange)
+ return true;
+ }
+
+ return false;
+}
+
+
+/// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant
+/// load instruction, eliminate it by replacing it with a PHI node. This is an
+/// important optimization that encourages jump threading, and needs to be run
+/// interlaced with other jump threading tasks.
+bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
+ // Don't hack volatile loads.
+ if (LI->isVolatile()) return false;
+
+ // If the load is defined in a block with exactly one predecessor, it can't be
+ // partially redundant.
+ BasicBlock *LoadBB = LI->getParent();
+ if (LoadBB->getSinglePredecessor())
+ return false;
+
+ Value *LoadedPtr = LI->getOperand(0);
+
+ // If the loaded operand is defined in the LoadBB, it can't be available.
+ // FIXME: Could do PHI translation, that would be fun :)
+ if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr))
+ if (PtrOp->getParent() == LoadBB)
+ return false;
+
+ // Scan a few instructions up from the load, to see if it is obviously live at
+ // the entry to its block.
+ BasicBlock::iterator BBIt = LI;
+
+ if (Value *AvailableVal = FindAvailableLoadedValue(LoadedPtr, LoadBB,
+ BBIt, 6)) {
+ // If the value if the load is locally available within the block, just use
+ // it. This frequently occurs for reg2mem'd allocas.
+ //cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n";
+
+ // If the returned value is the load itself, replace with an undef. This can
+ // only happen in dead loops.
+ if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType());
+ LI->replaceAllUsesWith(AvailableVal);
+ LI->eraseFromParent();
+ return true;
+ }
+
+ // Otherwise, if we scanned the whole block and got to the top of the block,
+ // we know the block is locally transparent to the load. If not, something
+ // might clobber its value.
+ if (BBIt != LoadBB->begin())
+ return false;
+
+
+ SmallPtrSet<BasicBlock*, 8> PredsScanned;
+ typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
+ AvailablePredsTy AvailablePreds;
+ BasicBlock *OneUnavailablePred = 0;
+
+ // If we got here, the loaded value is transparent through to the start of the
+ // block. Check to see if it is available in any of the predecessor blocks.
+ for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
+ PI != PE; ++PI) {
+ BasicBlock *PredBB = *PI;
+
+ // If we already scanned this predecessor, skip it.
+ if (!PredsScanned.insert(PredBB))
+ continue;
+
+ // Scan the predecessor to see if the value is available in the pred.
+ BBIt = PredBB->end();
+ Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6);
+ if (!PredAvailable) {
+ OneUnavailablePred = PredBB;
+ continue;
+ }
+
+ // If so, this load is partially redundant. Remember this info so that we
+ // can create a PHI node.
+ AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable));
+ }
+
+ // If the loaded value isn't available in any predecessor, it isn't partially
+ // redundant.
+ if (AvailablePreds.empty()) return false;
+
+ // Okay, the loaded value is available in at least one (and maybe all!)
+ // predecessors. If the value is unavailable in more than one unique
+ // predecessor, we want to insert a merge block for those common predecessors.
+ // This ensures that we only have to insert one reload, thus not increasing
+ // code size.
+ BasicBlock *UnavailablePred = 0;
+
+ // If there is exactly one predecessor where the value is unavailable, the
+ // already computed 'OneUnavailablePred' block is it. If it ends in an
+ // unconditional branch, we know that it isn't a critical edge.
+ if (PredsScanned.size() == AvailablePreds.size()+1 &&
+ OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) {
+ UnavailablePred = OneUnavailablePred;
+ } else if (PredsScanned.size() != AvailablePreds.size()) {
+ // Otherwise, we had multiple unavailable predecessors or we had a critical
+ // edge from the one.
+ SmallVector<BasicBlock*, 8> PredsToSplit;
+ SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
+
+ for (unsigned i = 0, e = AvailablePreds.size(); i != e; ++i)
+ AvailablePredSet.insert(AvailablePreds[i].first);
+
+ // Add all the unavailable predecessors to the PredsToSplit list.
+ for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
+ PI != PE; ++PI)
+ if (!AvailablePredSet.count(*PI))
+ PredsToSplit.push_back(*PI);
+
+ // Split them out to their own block.
+ UnavailablePred =
+ SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(),
+ "thread-split", this);
+ }
+
+ // If the value isn't available in all predecessors, then there will be
+ // exactly one where it isn't available. Insert a load on that edge and add
+ // it to the AvailablePreds list.
+ if (UnavailablePred) {
+ assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
+ "Can't handle critical edge here!");
+ Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr",
+ UnavailablePred->getTerminator());
+ AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
+ }
+
+ // Now we know that each predecessor of this block has a value in
+ // AvailablePreds, sort them for efficient access as we're walking the preds.
+ array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
+
+ // Create a PHI node at the start of the block for the PRE'd load value.
+ PHINode *PN = PHINode::Create(LI->getType(), "", LoadBB->begin());
+ PN->takeName(LI);
+
+ // Insert new entries into the PHI for each predecessor. A single block may
+ // have multiple entries here.
+ for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E;
+ ++PI) {
+ AvailablePredsTy::iterator I =
+ std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
+ std::make_pair(*PI, (Value*)0));
+
+ assert(I != AvailablePreds.end() && I->first == *PI &&
+ "Didn't find entry for predecessor!");
+
+ PN->addIncoming(I->second, I->first);
+ }
+
+ //cerr << "PRE: " << *LI << *PN << "\n";
+
+ LI->replaceAllUsesWith(PN);
+ LI->eraseFromParent();
+
+ return true;
+}
+
+
+/// ProcessJumpOnPHI - We have a conditional branch of switch on a PHI node in
+/// the current block. See if there are any simplifications we can do based on
+/// inputs to the phi node.
+///
+bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) {
+ // See if the phi node has any constant values. If so, we can determine where
+ // the corresponding predecessor will branch.
+ ConstantInt *PredCst = 0;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if ((PredCst = dyn_cast<ConstantInt>(PN->getIncomingValue(i))))
+ break;
+
+ // If no incoming value has a constant, we don't know the destination of any
+ // predecessors.
+ if (PredCst == 0)
+ return false;
+
+ // See if the cost of duplicating this block is low enough.
+ BasicBlock *BB = PN->getParent();
+ unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
+ if (JumpThreadCost > Threshold) {
+ DOUT << " Not threading BB '" << BB->getNameStart()
+ << "' - Cost is too high: " << JumpThreadCost << "\n";
+ return false;
+ }
+
+ // If so, we can actually do this threading. Merge any common predecessors
+ // that will act the same.
+ BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst);
+
+ // Next, figure out which successor we are threading to.
+ BasicBlock *SuccBB;
+ if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
+ SuccBB = BI->getSuccessor(PredCst == ConstantInt::getFalse());
+ else {
+ SwitchInst *SI = cast<SwitchInst>(BB->getTerminator());
+ SuccBB = SI->getSuccessor(SI->findCaseValue(PredCst));
+ }
+
+ // Ok, try to thread it!
+ return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost);
+}
+
+/// ProcessJumpOnLogicalPHI - PN's basic block contains a conditional branch
+/// whose condition is an AND/OR where one side is PN. If PN has constant
+/// operands that permit us to evaluate the condition for some operand, thread
+/// through the block. For example with:
+/// br (and X, phi(Y, Z, false))
+/// the predecessor corresponding to the 'false' will always jump to the false
+/// destination of the branch.
+///
+bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,
+ bool isAnd) {
+ // If this is a binary operator tree of the same AND/OR opcode, check the
+ // LHS/RHS.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V))
+ if ((isAnd && BO->getOpcode() == Instruction::And) ||
+ (!isAnd && BO->getOpcode() == Instruction::Or)) {
+ if (ProcessBranchOnLogical(BO->getOperand(0), BB, isAnd))
+ return true;
+ if (ProcessBranchOnLogical(BO->getOperand(1), BB, isAnd))
+ return true;
+ }
+
+ // If this isn't a PHI node, we can't handle it.
+ PHINode *PN = dyn_cast<PHINode>(V);
+ if (!PN || PN->getParent() != BB) return false;
+
+ // We can only do the simplification for phi nodes of 'false' with AND or
+ // 'true' with OR. See if we have any entries in the phi for this.
+ unsigned PredNo = ~0U;
+ ConstantInt *PredCst = ConstantInt::get(Type::Int1Ty, !isAnd);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ if (PN->getIncomingValue(i) == PredCst) {
+ PredNo = i;
+ break;
+ }
+ }
+
+ // If no match, bail out.
+ if (PredNo == ~0U)
+ return false;
+
+ // See if the cost of duplicating this block is low enough.
+ unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
+ if (JumpThreadCost > Threshold) {
+ DOUT << " Not threading BB '" << BB->getNameStart()
+ << "' - Cost is too high: " << JumpThreadCost << "\n";
+ return false;
+ }
+
+ // If so, we can actually do this threading. Merge any common predecessors
+ // that will act the same.
+ BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst);
+
+ // Next, figure out which successor we are threading to. If this was an AND,
+ // the constant must be FALSE, and we must be targeting the 'false' block.
+ // If this is an OR, the constant must be TRUE, and we must be targeting the
+ // 'true' block.
+ BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(isAnd);
+
+ // Ok, try to thread it!
+ return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost);
+}
+
+/// ProcessBranchOnCompare - We found a branch on a comparison between a phi
+/// node and a constant. If the PHI node contains any constants as inputs, we
+/// can fold the compare for that edge and thread through it.
+bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
+ PHINode *PN = cast<PHINode>(Cmp->getOperand(0));
+ Constant *RHS = cast<Constant>(Cmp->getOperand(1));
+
+ // If the phi isn't in the current block, an incoming edge to this block
+ // doesn't control the destination.
+ if (PN->getParent() != BB)
+ return false;
+
+ // We can do this simplification if any comparisons fold to true or false.
+ // See if any do.
+ Constant *PredCst = 0;
+ bool TrueDirection = false;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ PredCst = dyn_cast<Constant>(PN->getIncomingValue(i));
+ if (PredCst == 0) continue;
+
+ Constant *Res;
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cmp))
+ Res = ConstantExpr::getICmp(ICI->getPredicate(), PredCst, RHS);
+ else
+ Res = ConstantExpr::getFCmp(cast<FCmpInst>(Cmp)->getPredicate(),
+ PredCst, RHS);
+ // If this folded to a constant expr, we can't do anything.
+ if (ConstantInt *ResC = dyn_cast<ConstantInt>(Res)) {
+ TrueDirection = ResC->getZExtValue();
+ break;
+ }
+ // If this folded to undef, just go the false way.
+ if (isa<UndefValue>(Res)) {
+ TrueDirection = false;
+ break;
+ }
+
+ // Otherwise, we can't fold this input.
+ PredCst = 0;
+ }
+
+ // If no match, bail out.
+ if (PredCst == 0)
+ return false;
+
+ // See if the cost of duplicating this block is low enough.
+ unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
+ if (JumpThreadCost > Threshold) {
+ DOUT << " Not threading BB '" << BB->getNameStart()
+ << "' - Cost is too high: " << JumpThreadCost << "\n";
+ return false;
+ }
+
+ // If so, we can actually do this threading. Merge any common predecessors
+ // that will act the same.
+ BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst);
+
+ // Next, get our successor.
+ BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection);
+
+ // Ok, try to thread it!
+ return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost);
+}
+
+
+/// ThreadEdge - We have decided that it is safe and profitable to thread an
+/// edge from PredBB to SuccBB across BB. Transform the IR to reflect this
+/// change.
+bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
+ BasicBlock *SuccBB, unsigned JumpThreadCost) {
+
+ // If threading to the same block as we come from, we would infinite loop.
+ if (SuccBB == BB) {
+ DOUT << " Not threading across BB '" << BB->getNameStart()
+ << "' - would thread to self!\n";
+ return false;
+ }
+
+ // If threading this would thread across a loop header, don't thread the edge.
+ // See the comments above FindLoopHeaders for justifications and caveats.
+ if (LoopHeaders.count(BB)) {
+ DOUT << " Not threading from '" << PredBB->getNameStart()
+ << "' across loop header BB '" << BB->getNameStart()
+ << "' to dest BB '" << SuccBB->getNameStart()
+ << "' - it might create an irreducible loop!\n";
+ return false;
+ }
+
+ // And finally, do it!
+ DOUT << " Threading edge from '" << PredBB->getNameStart() << "' to '"
+ << SuccBB->getNameStart() << "' with cost: " << JumpThreadCost
+ << ", across block:\n "
+ << *BB << "\n";
+
+ // Jump Threading can not update SSA properties correctly if the values
+ // defined in the duplicated block are used outside of the block itself. For
+ // this reason, we spill all values that are used outside of BB to the stack.
+ for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ if (!I->isUsedOutsideOfBlock(BB))
+ continue;
+
+ // We found a use of I outside of BB. Create a new stack slot to
+ // break this inter-block usage pattern.
+ DemoteRegToStack(*I);
+ }
+
+ // We are going to have to map operands from the original BB block to the new
+ // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
+ // account for entry from PredBB.
+ DenseMap<Instruction*, Value*> ValueMapping;
+
+ BasicBlock *NewBB =
+ BasicBlock::Create(BB->getName()+".thread", BB->getParent(), BB);
+ NewBB->moveAfter(PredBB);
+
+ BasicBlock::iterator BI = BB->begin();
+ for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+ ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
+
+ // Clone the non-phi instructions of BB into NewBB, keeping track of the
+ // mapping and using it to remap operands in the cloned instructions.
+ for (; !isa<TerminatorInst>(BI); ++BI) {
+ Instruction *New = BI->clone();
+ New->setName(BI->getNameStart());
+ NewBB->getInstList().push_back(New);
+ ValueMapping[BI] = New;
+
+ // Remap operands to patch up intra-block references.
+ for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
+ if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i)))
+ if (Value *Remapped = ValueMapping[Inst])
+ New->setOperand(i, Remapped);
+ }
+
+ // We didn't copy the terminator from BB over to NewBB, because there is now
+ // an unconditional jump to SuccBB. Insert the unconditional jump.
+ BranchInst::Create(SuccBB, NewBB);
+
+ // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
+ // PHI nodes for NewBB now.
+ for (BasicBlock::iterator PNI = SuccBB->begin(); isa<PHINode>(PNI); ++PNI) {
+ PHINode *PN = cast<PHINode>(PNI);
+ // Ok, we have a PHI node. Figure out what the incoming value was for the
+ // DestBlock.
+ Value *IV = PN->getIncomingValueForBlock(BB);
+
+ // Remap the value if necessary.
+ if (Instruction *Inst = dyn_cast<Instruction>(IV))
+ if (Value *MappedIV = ValueMapping[Inst])
+ IV = MappedIV;
+ PN->addIncoming(IV, NewBB);
+ }
+
+ // Ok, NewBB is good to go. Update the terminator of PredBB to jump to
+ // NewBB instead of BB. This eliminates predecessors from BB, which requires
+ // us to simplify any PHI nodes in BB.
+ TerminatorInst *PredTerm = PredBB->getTerminator();
+ for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
+ if (PredTerm->getSuccessor(i) == BB) {
+ BB->removePredecessor(PredBB);
+ PredTerm->setSuccessor(i, NewBB);
+ }
+
+ // At this point, the IR is fully up to date and consistent. Do a quick scan
+ // over the new instructions and zap any that are constants or dead. This
+ // frequently happens because of phi translation.
+ BI = NewBB->begin();
+ for (BasicBlock::iterator E = NewBB->end(); BI != E; ) {
+ Instruction *Inst = BI++;
+ if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
+ Inst->replaceAllUsesWith(C);
+ Inst->eraseFromParent();
+ continue;
+ }
+
+ RecursivelyDeleteTriviallyDeadInstructions(Inst);
+ }
+
+ // Threaded an edge!
+ ++NumThreads;
+ return true;
+}
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
new file mode 100644
index 0000000..1021469
--- /dev/null
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -0,0 +1,885 @@
+//===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs loop invariant code motion, attempting to remove as much
+// code from the body of a loop as possible. It does this by either hoisting
+// code into the preheader block, or by sinking code to the exit blocks if it is
+// safe. This pass also promotes must-aliased memory locations in the loop to
+// live in registers, thus hoisting and sinking "invariant" loads and stores.
+//
+// This pass uses alias analysis for two purposes:
+//
+// 1. Moving loop invariant loads and calls out of loops. If we can determine
+// that a load or call inside of a loop never aliases anything stored to,
+// we can hoist it or sink it like any other instruction.
+// 2. Scalar Promotion of Memory - If there is a store instruction inside of
+// the loop, we try to move the store to happen AFTER the loop instead of
+// inside of the loop. This can only happen if a few conditions are true:
+// A. The pointer stored through is loop invariant
+// B. There are no stores or loads in the loop which _may_ alias the
+// pointer. There are no calls in the loop which mod/ref the pointer.
+// If these conditions are true, we can promote the loads and stores in the
+// loop of the pointer to use a temporary alloca'd variable. We then use
+// the mem2reg functionality to construct the appropriate SSA form for the
+// variable.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "licm"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumSunk , "Number of instructions sunk out of loop");
+STATISTIC(NumHoisted , "Number of instructions hoisted out of loop");
+STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
+STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
+STATISTIC(NumPromoted , "Number of memory locations promoted to registers");
+
+static cl::opt<bool>
+DisablePromotion("disable-licm-promotion", cl::Hidden,
+ cl::desc("Disable memory promotion in LICM pass"));
+
+// This feature is currently disabled by default because CodeGen is not yet
+// capable of rematerializing these constants in PIC mode, so it can lead to
+// degraded performance. Compile test/CodeGen/X86/remat-constant.ll with
+// -relocation-model=pic to see an example of this.
+static cl::opt<bool>
+EnableLICMConstantMotion("enable-licm-constant-variables", cl::Hidden,
+ cl::desc("Enable hoisting/sinking of constant "
+ "global variables"));
+
+namespace {
+ struct VISIBILITY_HIDDEN LICM : public LoopPass {
+ static char ID; // Pass identification, replacement for typeid
+ LICM() : LoopPass(&ID) {}
+
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG...
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<DominanceFrontier>(); // For scalar promotion (mem2reg)
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<DominanceFrontier>();
+ }
+
+ bool doFinalization() {
+ // Free the values stored in the map
+ for (std::map<Loop *, AliasSetTracker *>::iterator
+ I = LoopToAliasMap.begin(), E = LoopToAliasMap.end(); I != E; ++I)
+ delete I->second;
+
+ LoopToAliasMap.clear();
+ return false;
+ }
+
+ private:
+ // Various analyses that we use...
+ AliasAnalysis *AA; // Current AliasAnalysis information
+ LoopInfo *LI; // Current LoopInfo
+ DominatorTree *DT; // Dominator Tree for the current Loop...
+ DominanceFrontier *DF; // Current Dominance Frontier
+
+ // State that is updated as we process loops
+ bool Changed; // Set to true when we change anything.
+ BasicBlock *Preheader; // The preheader block of the current loop...
+ Loop *CurLoop; // The current loop we are working on...
+ AliasSetTracker *CurAST; // AliasSet information for the current loop...
+ std::map<Loop *, AliasSetTracker *> LoopToAliasMap;
+
+ /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
+ void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L);
+
+ /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
+ /// set.
+ void deleteAnalysisValue(Value *V, Loop *L);
+
+ /// SinkRegion - Walk the specified region of the CFG (defined by all blocks
+ /// dominated by the specified block, and that are in the current loop) in
+ /// reverse depth first order w.r.t the DominatorTree. This allows us to
+ /// visit uses before definitions, allowing us to sink a loop body in one
+ /// pass without iteration.
+ ///
+ void SinkRegion(DomTreeNode *N);
+
+ /// HoistRegion - Walk the specified region of the CFG (defined by all
+ /// blocks dominated by the specified block, and that are in the current
+ /// loop) in depth first order w.r.t the DominatorTree. This allows us to
+ /// visit definitions before uses, allowing us to hoist a loop body in one
+ /// pass without iteration.
+ ///
+ void HoistRegion(DomTreeNode *N);
+
+ /// inSubLoop - Little predicate that returns true if the specified basic
+ /// block is in a subloop of the current one, not the current one itself.
+ ///
+ bool inSubLoop(BasicBlock *BB) {
+ assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
+ for (Loop::iterator I = CurLoop->begin(), E = CurLoop->end(); I != E; ++I)
+ if ((*I)->contains(BB))
+ return true; // A subloop actually contains this block!
+ return false;
+ }
+
+ /// isExitBlockDominatedByBlockInLoop - This method checks to see if the
+ /// specified exit block of the loop is dominated by the specified block
+ /// that is in the body of the loop. We use these constraints to
+ /// dramatically limit the amount of the dominator tree that needs to be
+ /// searched.
+ bool isExitBlockDominatedByBlockInLoop(BasicBlock *ExitBlock,
+ BasicBlock *BlockInLoop) const {
+ // If the block in the loop is the loop header, it must be dominated!
+ BasicBlock *LoopHeader = CurLoop->getHeader();
+ if (BlockInLoop == LoopHeader)
+ return true;
+
+ DomTreeNode *BlockInLoopNode = DT->getNode(BlockInLoop);
+ DomTreeNode *IDom = DT->getNode(ExitBlock);
+
+ // Because the exit block is not in the loop, we know we have to get _at
+ // least_ its immediate dominator.
+ do {
+ // Get next Immediate Dominator.
+ IDom = IDom->getIDom();
+
+ // If we have got to the header of the loop, then the instructions block
+ // did not dominate the exit node, so we can't hoist it.
+ if (IDom->getBlock() == LoopHeader)
+ return false;
+
+ } while (IDom != BlockInLoopNode);
+
+ return true;
+ }
+
+ /// sink - When an instruction is found to only be used outside of the loop,
+ /// this function moves it to the exit blocks and patches up SSA form as
+ /// needed.
+ ///
+ void sink(Instruction &I);
+
+ /// hoist - When an instruction is found to only use loop invariant operands
+ /// that is safe to hoist, this instruction is called to do the dirty work.
+ ///
+ void hoist(Instruction &I);
+
+ /// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it
+ /// is not a trapping instruction or if it is a trapping instruction and is
+ /// guaranteed to execute.
+ ///
+ bool isSafeToExecuteUnconditionally(Instruction &I);
+
+ /// pointerInvalidatedByLoop - Return true if the body of this loop may
+ /// store into the memory location pointed to by V.
+ ///
+ bool pointerInvalidatedByLoop(Value *V, unsigned Size) {
+ // Check to see if any of the basic blocks in CurLoop invalidate *V.
+ return CurAST->getAliasSetForPointer(V, Size).isMod();
+ }
+
+ bool canSinkOrHoistInst(Instruction &I);
+ bool isLoopInvariantInst(Instruction &I);
+ bool isNotUsedInLoop(Instruction &I);
+
+ /// PromoteValuesInLoop - Look at the stores in the loop and promote as many
+ /// to scalars as we can.
+ ///
+ void PromoteValuesInLoop();
+
+ /// FindPromotableValuesInLoop - Check the current loop for stores to
+ /// definite pointers, which are not loaded and stored through may aliases.
+ /// If these are found, create an alloca for the value, add it to the
+ /// PromotedValues list, and keep track of the mapping from value to
+ /// alloca...
+ ///
+ void FindPromotableValuesInLoop(
+ std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues,
+ std::map<Value*, AllocaInst*> &Val2AlMap);
+ };
+}
+
+char LICM::ID = 0;
+static RegisterPass<LICM> X("licm", "Loop Invariant Code Motion");
+
+Pass *llvm::createLICMPass() { return new LICM(); }
+
+/// Hoist expressions out of the specified loop. Note, alias info for inner
+/// loop is not preserved so it is not a good idea to run LICM multiple
+/// times on one loop.
+///
+bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
+ Changed = false;
+
+ // Get our Loop and Alias Analysis information...
+ LI = &getAnalysis<LoopInfo>();
+ AA = &getAnalysis<AliasAnalysis>();
+ DF = &getAnalysis<DominanceFrontier>();
+ DT = &getAnalysis<DominatorTree>();
+
+ CurAST = new AliasSetTracker(*AA);
+ // Collect Alias info from subloops
+ for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
+ LoopItr != LoopItrE; ++LoopItr) {
+ Loop *InnerL = *LoopItr;
+ AliasSetTracker *InnerAST = LoopToAliasMap[InnerL];
+ assert (InnerAST && "Where is my AST?");
+
+ // What if InnerLoop was modified by other passes ?
+ CurAST->add(*InnerAST);
+ }
+
+ CurLoop = L;
+
+ // Get the preheader block to move instructions into...
+ Preheader = L->getLoopPreheader();
+ assert(Preheader&&"Preheader insertion pass guarantees we have a preheader!");
+
+ // Loop over the body of this loop, looking for calls, invokes, and stores.
+ // Because subloops have already been incorporated into AST, we skip blocks in
+ // subloops.
+ //
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I) {
+ BasicBlock *BB = *I;
+ if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops...
+ CurAST->add(*BB); // Incorporate the specified basic block
+ }
+
+ // We want to visit all of the instructions in this loop... that are not parts
+ // of our subloops (they have already had their invariants hoisted out of
+ // their loop, into this loop, so there is no need to process the BODIES of
+ // the subloops).
+ //
+ // Traverse the body of the loop in depth first order on the dominator tree so
+ // that we are guaranteed to see definitions before we see uses. This allows
+ // us to sink instructions in one pass, without iteration. After sinking
+ // instructions, we perform another pass to hoist them out of the loop.
+ //
+ SinkRegion(DT->getNode(L->getHeader()));
+ HoistRegion(DT->getNode(L->getHeader()));
+
+ // Now that all loop invariants have been removed from the loop, promote any
+ // memory references to scalars that we can...
+ if (!DisablePromotion)
+ PromoteValuesInLoop();
+
+ // Clear out loops state information for the next iteration
+ CurLoop = 0;
+ Preheader = 0;
+
+ LoopToAliasMap[L] = CurAST;
+ return Changed;
+}
+
+/// SinkRegion - Walk the specified region of the CFG (defined by all blocks
+/// dominated by the specified block, and that are in the current loop) in
+/// reverse depth first order w.r.t the DominatorTree. This allows us to visit
+/// uses before definitions, allowing us to sink a loop body in one pass without
+/// iteration.
+///
+void LICM::SinkRegion(DomTreeNode *N) {
+ assert(N != 0 && "Null dominator tree node?");
+ BasicBlock *BB = N->getBlock();
+
+ // If this subregion is not in the top level loop at all, exit.
+ if (!CurLoop->contains(BB)) return;
+
+ // We are processing blocks in reverse dfo, so process children first...
+ const std::vector<DomTreeNode*> &Children = N->getChildren();
+ for (unsigned i = 0, e = Children.size(); i != e; ++i)
+ SinkRegion(Children[i]);
+
+ // Only need to process the contents of this block if it is not part of a
+ // subloop (which would already have been processed).
+ if (inSubLoop(BB)) return;
+
+ for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) {
+ Instruction &I = *--II;
+
+ // Check to see if we can sink this instruction to the exit blocks
+ // of the loop. We can do this if the all users of the instruction are
+ // outside of the loop. In this case, it doesn't even matter if the
+ // operands of the instruction are loop invariant.
+ //
+ if (isNotUsedInLoop(I) && canSinkOrHoistInst(I)) {
+ ++II;
+ sink(I);
+ }
+ }
+}
+
+
+/// HoistRegion - Walk the specified region of the CFG (defined by all blocks
+/// dominated by the specified block, and that are in the current loop) in depth
+/// first order w.r.t the DominatorTree. This allows us to visit definitions
+/// before uses, allowing us to hoist a loop body in one pass without iteration.
+///
+void LICM::HoistRegion(DomTreeNode *N) {
+ assert(N != 0 && "Null dominator tree node?");
+ BasicBlock *BB = N->getBlock();
+
+ // If this subregion is not in the top level loop at all, exit.
+ if (!CurLoop->contains(BB)) return;
+
+ // Only need to process the contents of this block if it is not part of a
+ // subloop (which would already have been processed).
+ if (!inSubLoop(BB))
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) {
+ Instruction &I = *II++;
+
+ // Try hoisting the instruction out to the preheader. We can only do this
+ // if all of the operands of the instruction are loop invariant and if it
+ // is safe to hoist the instruction.
+ //
+ if (isLoopInvariantInst(I) && canSinkOrHoistInst(I) &&
+ isSafeToExecuteUnconditionally(I))
+ hoist(I);
+ }
+
+ const std::vector<DomTreeNode*> &Children = N->getChildren();
+ for (unsigned i = 0, e = Children.size(); i != e; ++i)
+ HoistRegion(Children[i]);
+}
+
+/// canSinkOrHoistInst - Return true if the hoister and sinker can handle this
+/// instruction.
+///
+bool LICM::canSinkOrHoistInst(Instruction &I) {
+ // Loads have extra constraints we have to verify before we can hoist them.
+ if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ if (LI->isVolatile())
+ return false; // Don't hoist volatile loads!
+
+ // Loads from constant memory are always safe to move, even if they end up
+ // in the same alias set as something that ends up being modified.
+ if (EnableLICMConstantMotion &&
+ AA->pointsToConstantMemory(LI->getOperand(0)))
+ return true;
+
+ // Don't hoist loads which have may-aliased stores in loop.
+ unsigned Size = 0;
+ if (LI->getType()->isSized())
+ Size = AA->getTargetData().getTypeStoreSize(LI->getType());
+ return !pointerInvalidatedByLoop(LI->getOperand(0), Size);
+ } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+ // Handle obvious cases efficiently.
+ AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI);
+ if (Behavior == AliasAnalysis::DoesNotAccessMemory)
+ return true;
+ else if (Behavior == AliasAnalysis::OnlyReadsMemory) {
+ // If this call only reads from memory and there are no writes to memory
+ // in the loop, we can hoist or sink the call as appropriate.
+ bool FoundMod = false;
+ for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
+ I != E; ++I) {
+ AliasSet &AS = *I;
+ if (!AS.isForwardingAliasSet() && AS.isMod()) {
+ FoundMod = true;
+ break;
+ }
+ }
+ if (!FoundMod) return true;
+ }
+
+ // FIXME: This should use mod/ref information to see if we can hoist or sink
+ // the call.
+
+ return false;
+ }
+
+ // Otherwise these instructions are hoistable/sinkable
+ return isa<BinaryOperator>(I) || isa<CastInst>(I) ||
+ isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
+ isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
+ isa<ShuffleVectorInst>(I);
+}
+
+/// isNotUsedInLoop - Return true if the only users of this instruction are
+/// outside of the loop. If this is true, we can sink the instruction to the
+/// exit blocks of the loop.
+///
+bool LICM::isNotUsedInLoop(Instruction &I) {
+ for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (PHINode *PN = dyn_cast<PHINode>(User)) {
+ // PHI node uses occur in predecessor blocks!
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == &I)
+ if (CurLoop->contains(PN->getIncomingBlock(i)))
+ return false;
+ } else if (CurLoop->contains(User->getParent())) {
+ return false;
+ }
+ }
+ return true;
+}
+
+
+/// isLoopInvariantInst - Return true if all operands of this instruction are
+/// loop invariant. We also filter out non-hoistable instructions here just for
+/// efficiency.
+///
+bool LICM::isLoopInvariantInst(Instruction &I) {
+ // The instruction is loop invariant if all of its operands are loop-invariant
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+ if (!CurLoop->isLoopInvariant(I.getOperand(i)))
+ return false;
+
+ // If we got this far, the instruction is loop invariant!
+ return true;
+}
+
+/// sink - When an instruction is found to only be used outside of the loop,
+/// this function moves it to the exit blocks and patches up SSA form as needed.
+/// This method is guaranteed to remove the original instruction from its
+/// position, and may either delete it or move it to outside of the loop.
+///
+void LICM::sink(Instruction &I) {
+ DOUT << "LICM sinking instruction: " << I;
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getExitBlocks(ExitBlocks);
+
+ if (isa<LoadInst>(I)) ++NumMovedLoads;
+ else if (isa<CallInst>(I)) ++NumMovedCalls;
+ ++NumSunk;
+ Changed = true;
+
+ // The case where there is only a single exit node of this loop is common
+ // enough that we handle it as a special (more efficient) case. It is more
+ // efficient to handle because there are no PHI nodes that need to be placed.
+ if (ExitBlocks.size() == 1) {
+ if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[0], I.getParent())) {
+ // Instruction is not used, just delete it.
+ CurAST->deleteValue(&I);
+ if (!I.use_empty()) // If I has users in unreachable blocks, eliminate.
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ I.eraseFromParent();
+ } else {
+ // Move the instruction to the start of the exit block, after any PHI
+ // nodes in it.
+ I.removeFromParent();
+
+ BasicBlock::iterator InsertPt = ExitBlocks[0]->getFirstNonPHI();
+ ExitBlocks[0]->getInstList().insert(InsertPt, &I);
+ }
+ } else if (ExitBlocks.empty()) {
+ // The instruction is actually dead if there ARE NO exit blocks.
+ CurAST->deleteValue(&I);
+ if (!I.use_empty()) // If I has users in unreachable blocks, eliminate.
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ I.eraseFromParent();
+ } else {
+ // Otherwise, if we have multiple exits, use the PromoteMem2Reg function to
+ // do all of the hard work of inserting PHI nodes as necessary. We convert
+ // the value into a stack object to get it to do this.
+
+ // Firstly, we create a stack object to hold the value...
+ AllocaInst *AI = 0;
+
+ if (I.getType() != Type::VoidTy) {
+ AI = new AllocaInst(I.getType(), 0, I.getName(),
+ I.getParent()->getParent()->getEntryBlock().begin());
+ CurAST->add(AI);
+ }
+
+ // Secondly, insert load instructions for each use of the instruction
+ // outside of the loop.
+ while (!I.use_empty()) {
+ Instruction *U = cast<Instruction>(I.use_back());
+
+ // If the user is a PHI Node, we actually have to insert load instructions
+ // in all predecessor blocks, not in the PHI block itself!
+ if (PHINode *UPN = dyn_cast<PHINode>(U)) {
+ // Only insert into each predecessor once, so that we don't have
+ // different incoming values from the same block!
+ std::map<BasicBlock*, Value*> InsertedBlocks;
+ for (unsigned i = 0, e = UPN->getNumIncomingValues(); i != e; ++i)
+ if (UPN->getIncomingValue(i) == &I) {
+ BasicBlock *Pred = UPN->getIncomingBlock(i);
+ Value *&PredVal = InsertedBlocks[Pred];
+ if (!PredVal) {
+ // Insert a new load instruction right before the terminator in
+ // the predecessor block.
+ PredVal = new LoadInst(AI, "", Pred->getTerminator());
+ CurAST->add(cast<LoadInst>(PredVal));
+ }
+
+ UPN->setIncomingValue(i, PredVal);
+ }
+
+ } else {
+ LoadInst *L = new LoadInst(AI, "", U);
+ U->replaceUsesOfWith(&I, L);
+ CurAST->add(L);
+ }
+ }
+
+ // Thirdly, insert a copy of the instruction in each exit block of the loop
+ // that is dominated by the instruction, storing the result into the memory
+ // location. Be careful not to insert the instruction into any particular
+ // basic block more than once.
+ std::set<BasicBlock*> InsertedBlocks;
+ BasicBlock *InstOrigBB = I.getParent();
+
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = ExitBlocks[i];
+
+ if (isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB)) {
+ // If we haven't already processed this exit block, do so now.
+ if (InsertedBlocks.insert(ExitBlock).second) {
+ // Insert the code after the last PHI node...
+ BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
+
+ // If this is the first exit block processed, just move the original
+ // instruction, otherwise clone the original instruction and insert
+ // the copy.
+ Instruction *New;
+ if (InsertedBlocks.size() == 1) {
+ I.removeFromParent();
+ ExitBlock->getInstList().insert(InsertPt, &I);
+ New = &I;
+ } else {
+ New = I.clone();
+ CurAST->copyValue(&I, New);
+ if (!I.getName().empty())
+ New->setName(I.getName()+".le");
+ ExitBlock->getInstList().insert(InsertPt, New);
+ }
+
+ // Now that we have inserted the instruction, store it into the alloca
+ if (AI) new StoreInst(New, AI, InsertPt);
+ }
+ }
+ }
+
+ // If the instruction doesn't dominate any exit blocks, it must be dead.
+ if (InsertedBlocks.empty()) {
+ CurAST->deleteValue(&I);
+ I.eraseFromParent();
+ }
+
+ // Finally, promote the fine value to SSA form.
+ if (AI) {
+ std::vector<AllocaInst*> Allocas;
+ Allocas.push_back(AI);
+ PromoteMemToReg(Allocas, *DT, *DF, CurAST);
+ }
+ }
+}
+
+/// hoist - When an instruction is found to only use loop invariant operands
+/// that is safe to hoist, this instruction is called to do the dirty work.
+///
+void LICM::hoist(Instruction &I) {
+ DOUT << "LICM hoisting to " << Preheader->getName() << ": " << I;
+
+ // Remove the instruction from its current basic block... but don't delete the
+ // instruction.
+ I.removeFromParent();
+
+ // Insert the new node in Preheader, before the terminator.
+ Preheader->getInstList().insert(Preheader->getTerminator(), &I);
+
+ if (isa<LoadInst>(I)) ++NumMovedLoads;
+ else if (isa<CallInst>(I)) ++NumMovedCalls;
+ ++NumHoisted;
+ Changed = true;
+}
+
+/// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it is
+/// not a trapping instruction or if it is a trapping instruction and is
+/// guaranteed to execute.
+///
+bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
+ // If it is not a trapping instruction, it is always safe to hoist.
+ if (!Inst.isTrapping()) return true;
+
+ // Otherwise we have to check to make sure that the instruction dominates all
+ // of the exit blocks. If it doesn't, then there is a path out of the loop
+ // which does not execute this instruction, so we can't hoist it.
+
+ // If the instruction is in the header block for the loop (which is very
+ // common), it is always guaranteed to dominate the exit blocks. Since this
+ // is a common case, and can save some work, check it now.
+ if (Inst.getParent() == CurLoop->getHeader())
+ return true;
+
+ // It's always safe to load from a global or alloca.
+ if (isa<LoadInst>(Inst))
+ if (isa<AllocationInst>(Inst.getOperand(0)) ||
+ isa<GlobalVariable>(Inst.getOperand(0)))
+ return true;
+
+ // Get the exit blocks for the current loop.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getExitBlocks(ExitBlocks);
+
+ // For each exit block, get the DT node and walk up the DT until the
+ // instruction's basic block is found or we exit the loop.
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[i], Inst.getParent()))
+ return false;
+
+ return true;
+}
+
+
+/// PromoteValuesInLoop - Try to promote memory values to scalars by sinking
+/// stores out of the loop and moving loads to before the loop. We do this by
+/// looping over the stores in the loop, looking for stores to Must pointers
+/// which are loop invariant. We promote these memory locations to use allocas
+/// instead. These allocas can easily be raised to register values by the
+/// PromoteMem2Reg functionality.
+///
+void LICM::PromoteValuesInLoop() {
+ // PromotedValues - List of values that are promoted out of the loop. Each
+ // value has an alloca instruction for it, and a canonical version of the
+ // pointer.
+ std::vector<std::pair<AllocaInst*, Value*> > PromotedValues;
+ std::map<Value*, AllocaInst*> ValueToAllocaMap; // Map of ptr to alloca
+
+ FindPromotableValuesInLoop(PromotedValues, ValueToAllocaMap);
+ if (ValueToAllocaMap.empty()) return; // If there are values to promote.
+
+ Changed = true;
+ NumPromoted += PromotedValues.size();
+
+ std::vector<Value*> PointerValueNumbers;
+
+ // Emit a copy from the value into the alloca'd value in the loop preheader
+ TerminatorInst *LoopPredInst = Preheader->getTerminator();
+ for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) {
+ Value *Ptr = PromotedValues[i].second;
+
+ // If we are promoting a pointer value, update alias information for the
+ // inserted load.
+ Value *LoadValue = 0;
+ if (isa<PointerType>(cast<PointerType>(Ptr->getType())->getElementType())) {
+ // Locate a load or store through the pointer, and assign the same value
+ // to LI as we are loading or storing. Since we know that the value is
+ // stored in this loop, this will always succeed.
+ for (Value::use_iterator UI = Ptr->use_begin(), E = Ptr->use_end();
+ UI != E; ++UI)
+ if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+ LoadValue = LI;
+ break;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
+ if (SI->getOperand(1) == Ptr) {
+ LoadValue = SI->getOperand(0);
+ break;
+ }
+ }
+ assert(LoadValue && "No store through the pointer found!");
+ PointerValueNumbers.push_back(LoadValue); // Remember this for later.
+ }
+
+ // Load from the memory we are promoting.
+ LoadInst *LI = new LoadInst(Ptr, Ptr->getName()+".promoted", LoopPredInst);
+
+ if (LoadValue) CurAST->copyValue(LoadValue, LI);
+
+ // Store into the temporary alloca.
+ new StoreInst(LI, PromotedValues[i].first, LoopPredInst);
+ }
+
+ // Scan the basic blocks in the loop, replacing uses of our pointers with
+ // uses of the allocas in question.
+ //
+ for (Loop::block_iterator I = CurLoop->block_begin(),
+ E = CurLoop->block_end(); I != E; ++I) {
+ BasicBlock *BB = *I;
+ // Rewrite all loads and stores in the block of the pointer...
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+ if (LoadInst *L = dyn_cast<LoadInst>(II)) {
+ std::map<Value*, AllocaInst*>::iterator
+ I = ValueToAllocaMap.find(L->getOperand(0));
+ if (I != ValueToAllocaMap.end())
+ L->setOperand(0, I->second); // Rewrite load instruction...
+ } else if (StoreInst *S = dyn_cast<StoreInst>(II)) {
+ std::map<Value*, AllocaInst*>::iterator
+ I = ValueToAllocaMap.find(S->getOperand(1));
+ if (I != ValueToAllocaMap.end())
+ S->setOperand(1, I->second); // Rewrite store instruction...
+ }
+ }
+ }
+
+ // Now that the body of the loop uses the allocas instead of the original
+ // memory locations, insert code to copy the alloca value back into the
+ // original memory location on all exits from the loop. Note that we only
+ // want to insert one copy of the code in each exit block, though the loop may
+ // exit to the same block more than once.
+ //
+ SmallPtrSet<BasicBlock*, 16> ProcessedBlocks;
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getExitBlocks(ExitBlocks);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ if (!ProcessedBlocks.insert(ExitBlocks[i]))
+ continue;
+
+ // Copy all of the allocas into their memory locations.
+ BasicBlock::iterator BI = ExitBlocks[i]->getFirstNonPHI();
+ Instruction *InsertPos = BI;
+ unsigned PVN = 0;
+ for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) {
+ // Load from the alloca.
+ LoadInst *LI = new LoadInst(PromotedValues[i].first, "", InsertPos);
+
+ // If this is a pointer type, update alias info appropriately.
+ if (isa<PointerType>(LI->getType()))
+ CurAST->copyValue(PointerValueNumbers[PVN++], LI);
+
+ // Store into the memory we promoted.
+ new StoreInst(LI, PromotedValues[i].second, InsertPos);
+ }
+ }
+
+ // Now that we have done the deed, use the mem2reg functionality to promote
+ // all of the new allocas we just created into real SSA registers.
+ //
+ std::vector<AllocaInst*> PromotedAllocas;
+ PromotedAllocas.reserve(PromotedValues.size());
+ for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i)
+ PromotedAllocas.push_back(PromotedValues[i].first);
+ PromoteMemToReg(PromotedAllocas, *DT, *DF, CurAST);
+}
+
+/// FindPromotableValuesInLoop - Check the current loop for stores to definite
+/// pointers, which are not loaded and stored through may aliases and are safe
+/// for promotion. If these are found, create an alloca for the value, add it
+/// to the PromotedValues list, and keep track of the mapping from value to
+/// alloca.
+void LICM::FindPromotableValuesInLoop(
+ std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues,
+ std::map<Value*, AllocaInst*> &ValueToAllocaMap) {
+ Instruction *FnStart = CurLoop->getHeader()->getParent()->begin()->begin();
+
+ // Loop over all of the alias sets in the tracker object.
+ for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
+ I != E; ++I) {
+ AliasSet &AS = *I;
+ // We can promote this alias set if it has a store, if it is a "Must" alias
+ // set, if the pointer is loop invariant, and if we are not eliminating any
+ // volatile loads or stores.
+ if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
+ AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
+ continue;
+
+ assert(!AS.empty() &&
+ "Must alias set should have at least one pointer element in it!");
+ Value *V = AS.begin()->getValue();
+
+ // Check that all of the pointers in the alias set have the same type. We
+ // cannot (yet) promote a memory location that is loaded and stored in
+ // different sizes.
+ {
+ bool PointerOk = true;
+ for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I)
+ if (V->getType() != I->getValue()->getType()) {
+ PointerOk = false;
+ break;
+ }
+ if (!PointerOk)
+ continue;
+ }
+
+ // It isn't safe to promote a load/store from the loop if the load/store is
+ // conditional. For example, turning:
+ //
+ // for () { if (c) *P += 1; }
+ //
+ // into:
+ //
+ // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
+ //
+ // is not safe, because *P may only be valid to access if 'c' is true.
+ //
+ // It is safe to promote P if all uses are direct load/stores and if at
+ // least one is guaranteed to be executed.
+ bool GuaranteedToExecute = false;
+ bool InvalidInst = false;
+ for (Value::use_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ // Ignore instructions not in this loop.
+ Instruction *Use = dyn_cast<Instruction>(*UI);
+ if (!Use || !CurLoop->contains(Use->getParent()))
+ continue;
+
+ if (!isa<LoadInst>(Use) && !isa<StoreInst>(Use)) {
+ InvalidInst = true;
+ break;
+ }
+
+ if (!GuaranteedToExecute)
+ GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use);
+ }
+
+ // If there is an non-load/store instruction in the loop, we can't promote
+ // it. If there isn't a guaranteed-to-execute instruction, we can't
+ // promote.
+ if (InvalidInst || !GuaranteedToExecute)
+ continue;
+
+ const Type *Ty = cast<PointerType>(V->getType())->getElementType();
+ AllocaInst *AI = new AllocaInst(Ty, 0, V->getName()+".tmp", FnStart);
+ PromotedValues.push_back(std::make_pair(AI, V));
+
+ // Update the AST and alias analysis.
+ CurAST->copyValue(V, AI);
+
+ for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I)
+ ValueToAllocaMap.insert(std::make_pair(I->getValue(), AI));
+
+ DOUT << "LICM: Promoting value: " << *V << "\n";
+ }
+}
+
+/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
+void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
+ AliasSetTracker *AST = LoopToAliasMap[L];
+ if (!AST)
+ return;
+
+ AST->copyValue(From, To);
+}
+
+/// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
+/// set.
+void LICM::deleteAnalysisValue(Value *V, Loop *L) {
+ AliasSetTracker *AST = LoopToAliasMap[L];
+ if (!AST)
+ return;
+
+ AST->deleteValue(V);
+}
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
new file mode 100644
index 0000000..6512672
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -0,0 +1,280 @@
+//===- LoopDeletion.cpp - Dead Loop Deletion Pass ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Dead Loop Deletion Pass. This pass is responsible
+// for eliminating loops with non-infinite computable trip counts that have no
+// side effects or volatile instructions, and do not contribute to the
+// computation of the function's return value.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-delete"
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+
+using namespace llvm;
+
+STATISTIC(NumDeleted, "Number of loops deleted");
+
+namespace {
+ class VISIBILITY_HIDDEN LoopDeletion : public LoopPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopDeletion() : LoopPass(&ID) {}
+
+ // Possibly eliminate loop L if it is dead.
+ bool runOnLoop(Loop* L, LPPassManager& LPM);
+
+ bool SingleDominatingExit(Loop* L,
+ SmallVector<BasicBlock*, 4>& exitingBlocks);
+ bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks,
+ SmallVector<BasicBlock*, 4>& exitBlocks);
+ bool IsLoopInvariantInst(Instruction *I, Loop* L);
+
+ virtual void getAnalysisUsage(AnalysisUsage& AU) const {
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<LoopInfo>();
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved<DominanceFrontier>();
+ }
+ };
+}
+
+char LoopDeletion::ID = 0;
+static RegisterPass<LoopDeletion> X("loop-deletion", "Delete dead loops");
+
+Pass* llvm::createLoopDeletionPass() {
+ return new LoopDeletion();
+}
+
+/// SingleDominatingExit - Checks that there is only a single blocks that
+/// branches out of the loop, and that it also g the latch block. Loops
+/// with multiple or non-latch-dominating exiting blocks could be dead, but we'd
+/// have to do more extensive analysis to make sure, for instance, that the
+/// control flow logic involved was or could be made loop-invariant.
+bool LoopDeletion::SingleDominatingExit(Loop* L,
+ SmallVector<BasicBlock*, 4>& exitingBlocks) {
+
+ if (exitingBlocks.size() != 1)
+ return false;
+
+ BasicBlock* latch = L->getLoopLatch();
+ if (!latch)
+ return false;
+
+ DominatorTree& DT = getAnalysis<DominatorTree>();
+ return DT.dominates(exitingBlocks[0], latch);
+}
+
+/// IsLoopInvariantInst - Checks if an instruction is invariant with respect to
+/// a loop, which is defined as being true if all of its operands are defined
+/// outside of the loop. These instructions can be hoisted out of the loop
+/// if their results are needed. This could be made more aggressive by
+/// recursively checking the operands for invariance, but it's not clear that
+/// it's worth it.
+bool LoopDeletion::IsLoopInvariantInst(Instruction *I, Loop* L) {
+ // PHI nodes are not loop invariant if defined in the loop.
+ if (isa<PHINode>(I) && L->contains(I->getParent()))
+ return false;
+
+ // The instruction is loop invariant if all of its operands are loop-invariant
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (!L->isLoopInvariant(I->getOperand(i)))
+ return false;
+
+ // If we got this far, the instruction is loop invariant!
+ return true;
+}
+
+/// IsLoopDead - Determined if a loop is dead. This assumes that we've already
+/// checked for unique exit and exiting blocks, and that the code is in LCSSA
+/// form.
+bool LoopDeletion::IsLoopDead(Loop* L,
+ SmallVector<BasicBlock*, 4>& exitingBlocks,
+ SmallVector<BasicBlock*, 4>& exitBlocks) {
+ BasicBlock* exitingBlock = exitingBlocks[0];
+ BasicBlock* exitBlock = exitBlocks[0];
+
+ // Make sure that all PHI entries coming from the loop are loop invariant.
+ // Because the code is in LCSSA form, any values used outside of the loop
+ // must pass through a PHI in the exit block, meaning that this check is
+ // sufficient to guarantee that no loop-variant values are used outside
+ // of the loop.
+ BasicBlock::iterator BI = exitBlock->begin();
+ while (PHINode* P = dyn_cast<PHINode>(BI)) {
+ Value* incoming = P->getIncomingValueForBlock(exitingBlock);
+ if (Instruction* I = dyn_cast<Instruction>(incoming))
+ if (!IsLoopInvariantInst(I, L))
+ return false;
+
+ BI++;
+ }
+
+ // Make sure that no instructions in the block have potential side-effects.
+ // This includes instructions that could write to memory, and loads that are
+ // marked volatile. This could be made more aggressive by using aliasing
+ // information to identify readonly and readnone calls.
+ for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+ LI != LE; ++LI) {
+ for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end();
+ BI != BE; ++BI) {
+ if (BI->mayHaveSideEffects())
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/// runOnLoop - Remove dead loops, by which we mean loops that do not impact the
+/// observable behavior of the program other than finite running time. Note
+/// we do ensure that this never remove a loop that might be infinite, as doing
+/// so could change the halting/non-halting nature of a program.
+/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
+/// in order to make various safety checks work.
+bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
+ // We can only remove the loop if there is a preheader that we can
+ // branch from after removing it.
+ BasicBlock* preheader = L->getLoopPreheader();
+ if (!preheader)
+ return false;
+
+ // We can't remove loops that contain subloops. If the subloops were dead,
+ // they would already have been removed in earlier executions of this pass.
+ if (L->begin() != L->end())
+ return false;
+
+ SmallVector<BasicBlock*, 4> exitingBlocks;
+ L->getExitingBlocks(exitingBlocks);
+
+ SmallVector<BasicBlock*, 4> exitBlocks;
+ L->getUniqueExitBlocks(exitBlocks);
+
+ // We require that the loop only have a single exit block. Otherwise, we'd
+ // be in the situation of needing to be able to solve statically which exit
+ // block will be branched to, or trying to preserve the branching logic in
+ // a loop invariant manner.
+ if (exitBlocks.size() != 1)
+ return false;
+
+ // Loops with multiple exits or exits that don't dominate the latch
+ // are too complicated to handle correctly.
+ if (!SingleDominatingExit(L, exitingBlocks))
+ return false;
+
+ // Finally, we have to check that the loop really is dead.
+ if (!IsLoopDead(L, exitingBlocks, exitBlocks))
+ return false;
+
+ // Don't remove loops for which we can't solve the trip count.
+ // They could be infinite, in which case we'd be changing program behavior.
+ ScalarEvolution& SE = getAnalysis<ScalarEvolution>();
+ SCEVHandle S = SE.getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(S))
+ return false;
+
+ // Now that we know the removal is safe, remove the loop by changing the
+ // branch from the preheader to go to the single exit block.
+ BasicBlock* exitBlock = exitBlocks[0];
+ BasicBlock* exitingBlock = exitingBlocks[0];
+
+ // Because we're deleting a large chunk of code at once, the sequence in which
+ // we remove things is very important to avoid invalidation issues. Don't
+ // mess with this unless you have good reason and know what you're doing.
+
+ // Move simple loop-invariant expressions out of the loop, since they
+ // might be needed by the exit phis.
+ for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+ LI != LE; ++LI)
+ for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end();
+ BI != BE; ) {
+ Instruction* I = BI++;
+ if (!I->use_empty() && IsLoopInvariantInst(I, L))
+ I->moveBefore(preheader->getTerminator());
+ }
+
+ // Connect the preheader directly to the exit block.
+ TerminatorInst* TI = preheader->getTerminator();
+ TI->replaceUsesOfWith(L->getHeader(), exitBlock);
+
+ // Rewrite phis in the exit block to get their inputs from
+ // the preheader instead of the exiting block.
+ BasicBlock::iterator BI = exitBlock->begin();
+ while (PHINode* P = dyn_cast<PHINode>(BI)) {
+ P->replaceUsesOfWith(exitingBlock, preheader);
+ BI++;
+ }
+
+ // Update the dominator tree and remove the instructions and blocks that will
+ // be deleted from the reference counting scheme.
+ DominatorTree& DT = getAnalysis<DominatorTree>();
+ DominanceFrontier* DF = getAnalysisIfAvailable<DominanceFrontier>();
+ SmallPtrSet<DomTreeNode*, 8> ChildNodes;
+ for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+ LI != LE; ++LI) {
+ // Move all of the block's children to be children of the preheader, which
+ // allows us to remove the domtree entry for the block.
+ ChildNodes.insert(DT[*LI]->begin(), DT[*LI]->end());
+ for (SmallPtrSet<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(),
+ DE = ChildNodes.end(); DI != DE; ++DI) {
+ DT.changeImmediateDominator(*DI, DT[preheader]);
+ if (DF) DF->changeImmediateDominator((*DI)->getBlock(), preheader, &DT);
+ }
+
+ ChildNodes.clear();
+ DT.eraseNode(*LI);
+ if (DF) DF->removeBlock(*LI);
+
+ // Remove the block from the reference counting scheme, so that we can
+ // delete it freely later.
+ (*LI)->dropAllReferences();
+ }
+
+ // Tell ScalarEvolution that the loop is deleted. Do this before
+ // deleting the loop so that ScalarEvolution can look at the loop
+ // to determine what it needs to clean up.
+ SE.forgetLoopBackedgeTakenCount(L);
+
+ // Erase the instructions and the blocks without having to worry
+ // about ordering because we already dropped the references.
+ // NOTE: This iteration is safe because erasing the block does not remove its
+ // entry from the loop's block list. We do that in the next section.
+ for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+ LI != LE; ++LI)
+ (*LI)->eraseFromParent();
+
+ // Finally, the blocks from loopinfo. This has to happen late because
+ // otherwise our loop iterators won't work.
+ LoopInfo& loopInfo = getAnalysis<LoopInfo>();
+ SmallPtrSet<BasicBlock*, 8> blocks;
+ blocks.insert(L->block_begin(), L->block_end());
+ for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(),
+ E = blocks.end(); I != E; ++I)
+ loopInfo.removeBlock(*I);
+
+ // The last step is to inform the loop pass manager that we've
+ // eliminated this loop.
+ LPM.deleteLoopFromQueue(L);
+
+ NumDeleted++;
+
+ return true;
+}
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
new file mode 100644
index 0000000..9c78596
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -0,0 +1,1237 @@
+//===- LoopIndexSplit.cpp - Loop Index Splitting Pass ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Loop Index Splitting Pass. This pass handles three
+// kinds of loops.
+//
+// [1] A loop may be eliminated if the body is executed exactly once.
+// For example,
+//
+// for (i = 0; i < N; ++i) {
+// if (i == X) {
+// body;
+// }
+// }
+//
+// is transformed to
+//
+// i = X;
+// body;
+//
+// [2] A loop's iteration space may be shrunk if the loop body is executed
+// for a proper sub-range of the loop's iteration space. For example,
+//
+// for (i = 0; i < N; ++i) {
+// if (i > A && i < B) {
+// ...
+// }
+// }
+//
+// is transformed to iterators from A to B, if A > 0 and B < N.
+//
+// [3] A loop may be split if the loop body is dominated by a branch.
+// For example,
+//
+// for (i = LB; i < UB; ++i) { if (i < SV) A; else B; }
+//
+// is transformed into
+//
+// AEV = BSV = SV
+// for (i = LB; i < min(UB, AEV); ++i)
+// A;
+// for (i = max(LB, BSV); i < UB; ++i);
+// B;
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-index-split"
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+STATISTIC(NumIndexSplit, "Number of loop index split");
+STATISTIC(NumIndexSplitRemoved, "Number of loops eliminated by loop index split");
+STATISTIC(NumRestrictBounds, "Number of loop iteration space restricted");
+
+namespace {
+
+ class VISIBILITY_HIDDEN LoopIndexSplit : public LoopPass {
+
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopIndexSplit() : LoopPass(&ID) {}
+
+ // Index split Loop L. Return true if loop is split.
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<ScalarEvolution>();
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<DominanceFrontier>();
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominanceFrontier>();
+ }
+
+ private:
+ /// processOneIterationLoop -- Eliminate loop if loop body is executed
+ /// only once. For example,
+ /// for (i = 0; i < N; ++i) {
+ /// if ( i == X) {
+ /// ...
+ /// }
+ /// }
+ ///
+ bool processOneIterationLoop();
+
+ // -- Routines used by updateLoopIterationSpace();
+
+ /// updateLoopIterationSpace -- Update loop's iteration space if loop
+ /// body is executed for certain IV range only. For example,
+ ///
+ /// for (i = 0; i < N; ++i) {
+ /// if ( i > A && i < B) {
+ /// ...
+ /// }
+ /// }
+ /// is transformed to iterators from A to B, if A > 0 and B < N.
+ ///
+ bool updateLoopIterationSpace();
+
+ /// restrictLoopBound - Op dominates loop body. Op compares an IV based value
+ /// with a loop invariant value. Update loop's lower and upper bound based on
+ /// the loop invariant value.
+ bool restrictLoopBound(ICmpInst &Op);
+
+ // --- Routines used by splitLoop(). --- /
+
+ bool splitLoop();
+
+ /// removeBlocks - Remove basic block DeadBB and all blocks dominated by
+ /// DeadBB. This routine is used to remove split condition's dead branch,
+ /// dominated by DeadBB. LiveBB dominates split conidition's other branch.
+ void removeBlocks(BasicBlock *DeadBB, Loop *LP, BasicBlock *LiveBB);
+
+ /// moveExitCondition - Move exit condition EC into split condition block.
+ void moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB,
+ BasicBlock *ExitBB, ICmpInst *EC, ICmpInst *SC,
+ PHINode *IV, Instruction *IVAdd, Loop *LP,
+ unsigned);
+
+ /// updatePHINodes - CFG has been changed.
+ /// Before
+ /// - ExitBB's single predecessor was Latch
+ /// - Latch's second successor was Header
+ /// Now
+ /// - ExitBB's single predecessor was Header
+ /// - Latch's one and only successor was Header
+ ///
+ /// Update ExitBB PHINodes' to reflect this change.
+ void updatePHINodes(BasicBlock *ExitBB, BasicBlock *Latch,
+ BasicBlock *Header,
+ PHINode *IV, Instruction *IVIncrement, Loop *LP);
+
+ // --- Utility routines --- /
+
+ /// cleanBlock - A block is considered clean if all non terminal
+ /// instructions are either PHINodes or IV based values.
+ bool cleanBlock(BasicBlock *BB);
+
+ /// IVisLT - If Op is comparing IV based value with an loop invariant and
+ /// IV based value is less than the loop invariant then return the loop
+ /// invariant. Otherwise return NULL.
+ Value * IVisLT(ICmpInst &Op);
+
+ /// IVisLE - If Op is comparing IV based value with an loop invariant and
+ /// IV based value is less than or equal to the loop invariant then
+ /// return the loop invariant. Otherwise return NULL.
+ Value * IVisLE(ICmpInst &Op);
+
+ /// IVisGT - If Op is comparing IV based value with an loop invariant and
+ /// IV based value is greater than the loop invariant then return the loop
+ /// invariant. Otherwise return NULL.
+ Value * IVisGT(ICmpInst &Op);
+
+ /// IVisGE - If Op is comparing IV based value with an loop invariant and
+ /// IV based value is greater than or equal to the loop invariant then
+ /// return the loop invariant. Otherwise return NULL.
+ Value * IVisGE(ICmpInst &Op);
+
+ private:
+
+ // Current Loop information.
+ Loop *L;
+ LPPassManager *LPM;
+ LoopInfo *LI;
+ DominatorTree *DT;
+ DominanceFrontier *DF;
+
+ PHINode *IndVar;
+ ICmpInst *ExitCondition;
+ ICmpInst *SplitCondition;
+ Value *IVStartValue;
+ Value *IVExitValue;
+ Instruction *IVIncrement;
+ SmallPtrSet<Value *, 4> IVBasedValues;
+ };
+}
+
+char LoopIndexSplit::ID = 0;
+static RegisterPass<LoopIndexSplit>
+X("loop-index-split", "Index Split Loops");
+
+Pass *llvm::createLoopIndexSplitPass() {
+ return new LoopIndexSplit();
+}
+
+// Index split Loop L. Return true if loop is split.
+bool LoopIndexSplit::runOnLoop(Loop *IncomingLoop, LPPassManager &LPM_Ref) {
+ L = IncomingLoop;
+ LPM = &LPM_Ref;
+
+ // FIXME - Nested loops make dominator info updates tricky.
+ if (!L->getSubLoops().empty())
+ return false;
+
+ DT = &getAnalysis<DominatorTree>();
+ LI = &getAnalysis<LoopInfo>();
+ DF = &getAnalysis<DominanceFrontier>();
+
+ // Initialize loop data.
+ IndVar = L->getCanonicalInductionVariable();
+ if (!IndVar) return false;
+
+ bool P1InLoop = L->contains(IndVar->getIncomingBlock(1));
+ IVStartValue = IndVar->getIncomingValue(!P1InLoop);
+ IVIncrement = dyn_cast<Instruction>(IndVar->getIncomingValue(P1InLoop));
+ if (!IVIncrement) return false;
+
+ IVBasedValues.clear();
+ IVBasedValues.insert(IndVar);
+ IVBasedValues.insert(IVIncrement);
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I)
+ for(BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end();
+ BI != BE; ++BI) {
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(BI))
+ if (BO != IVIncrement
+ && (BO->getOpcode() == Instruction::Add
+ || BO->getOpcode() == Instruction::Sub))
+ if (IVBasedValues.count(BO->getOperand(0))
+ && L->isLoopInvariant(BO->getOperand(1)))
+ IVBasedValues.insert(BO);
+ }
+
+ // Reject loop if loop exit condition is not suitable.
+ BasicBlock *ExitingBlock = L->getExitingBlock();
+ if (!ExitingBlock)
+ return false;
+ BranchInst *EBR = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!EBR) return false;
+ ExitCondition = dyn_cast<ICmpInst>(EBR->getCondition());
+ if (!ExitCondition) return false;
+ if (ExitingBlock != L->getLoopLatch()) return false;
+ IVExitValue = ExitCondition->getOperand(1);
+ if (!L->isLoopInvariant(IVExitValue))
+ IVExitValue = ExitCondition->getOperand(0);
+ if (!L->isLoopInvariant(IVExitValue))
+ return false;
+
+ // If start value is more then exit value where induction variable
+ // increments by 1 then we are potentially dealing with an infinite loop.
+ // Do not index split this loop.
+ if (ConstantInt *SV = dyn_cast<ConstantInt>(IVStartValue))
+ if (ConstantInt *EV = dyn_cast<ConstantInt>(IVExitValue))
+ if (SV->getSExtValue() > EV->getSExtValue())
+ return false;
+
+ if (processOneIterationLoop())
+ return true;
+
+ if (updateLoopIterationSpace())
+ return true;
+
+ if (splitLoop())
+ return true;
+
+ return false;
+}
+
+// --- Helper routines ---
+// isUsedOutsideLoop - Returns true iff V is used outside the loop L.
+static bool isUsedOutsideLoop(Value *V, Loop *L) {
+ for(Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
+ if (!L->contains(cast<Instruction>(*UI)->getParent()))
+ return true;
+ return false;
+}
+
+// Return V+1
+static Value *getPlusOne(Value *V, bool Sign, Instruction *InsertPt) {
+ ConstantInt *One = ConstantInt::get(V->getType(), 1, Sign);
+ return BinaryOperator::CreateAdd(V, One, "lsp", InsertPt);
+}
+
+// Return V-1
+static Value *getMinusOne(Value *V, bool Sign, Instruction *InsertPt) {
+ ConstantInt *One = ConstantInt::get(V->getType(), 1, Sign);
+ return BinaryOperator::CreateSub(V, One, "lsp", InsertPt);
+}
+
+// Return min(V1, V1)
+static Value *getMin(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) {
+
+ Value *C = new ICmpInst(Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ V1, V2, "lsp", InsertPt);
+ return SelectInst::Create(C, V1, V2, "lsp", InsertPt);
+}
+
+// Return max(V1, V2)
+static Value *getMax(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) {
+
+ Value *C = new ICmpInst(Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ V1, V2, "lsp", InsertPt);
+ return SelectInst::Create(C, V2, V1, "lsp", InsertPt);
+}
+
+/// processOneIterationLoop -- Eliminate loop if loop body is executed
+/// only once. For example,
+/// for (i = 0; i < N; ++i) {
+/// if ( i == X) {
+/// ...
+/// }
+/// }
+///
+bool LoopIndexSplit::processOneIterationLoop() {
+ SplitCondition = NULL;
+ BasicBlock *Latch = L->getLoopLatch();
+ BasicBlock *Header = L->getHeader();
+ BranchInst *BR = dyn_cast<BranchInst>(Header->getTerminator());
+ if (!BR) return false;
+ if (!isa<BranchInst>(Latch->getTerminator())) return false;
+ if (BR->isUnconditional()) return false;
+ SplitCondition = dyn_cast<ICmpInst>(BR->getCondition());
+ if (!SplitCondition) return false;
+ if (SplitCondition == ExitCondition) return false;
+ if (SplitCondition->getPredicate() != ICmpInst::ICMP_EQ) return false;
+ if (BR->getOperand(1) != Latch) return false;
+ if (!IVBasedValues.count(SplitCondition->getOperand(0))
+ && !IVBasedValues.count(SplitCondition->getOperand(1)))
+ return false;
+
+ // If IV is used outside the loop then this loop traversal is required.
+ // FIXME: Calculate and use last IV value.
+ if (isUsedOutsideLoop(IVIncrement, L))
+ return false;
+
+ // If BR operands are not IV or not loop invariants then skip this loop.
+ Value *OPV = SplitCondition->getOperand(0);
+ Value *SplitValue = SplitCondition->getOperand(1);
+ if (!L->isLoopInvariant(SplitValue))
+ std::swap(OPV, SplitValue);
+ if (!L->isLoopInvariant(SplitValue))
+ return false;
+ Instruction *OPI = dyn_cast<Instruction>(OPV);
+ if (!OPI)
+ return false;
+ if (OPI->getParent() != Header || isUsedOutsideLoop(OPI, L))
+ return false;
+ Value *StartValue = IVStartValue;
+ Value *ExitValue = IVExitValue;;
+
+ if (OPV != IndVar) {
+ // If BR operand is IV based then use this operand to calculate
+ // effective conditions for loop body.
+ BinaryOperator *BOPV = dyn_cast<BinaryOperator>(OPV);
+ if (!BOPV)
+ return false;
+ if (BOPV->getOpcode() != Instruction::Add)
+ return false;
+ StartValue = BinaryOperator::CreateAdd(OPV, StartValue, "" , BR);
+ ExitValue = BinaryOperator::CreateAdd(OPV, ExitValue, "" , BR);
+ }
+
+ if (!cleanBlock(Header))
+ return false;
+
+ if (!cleanBlock(Latch))
+ return false;
+
+ // If the merge point for BR is not loop latch then skip this loop.
+ if (BR->getSuccessor(0) != Latch) {
+ DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
+ assert (DF0 != DF->end() && "Unable to find dominance frontier");
+ if (!DF0->second.count(Latch))
+ return false;
+ }
+
+ if (BR->getSuccessor(1) != Latch) {
+ DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
+ assert (DF1 != DF->end() && "Unable to find dominance frontier");
+ if (!DF1->second.count(Latch))
+ return false;
+ }
+
+ // Now, Current loop L contains compare instruction
+ // that compares induction variable, IndVar, against loop invariant. And
+ // entire (i.e. meaningful) loop body is dominated by this compare
+ // instruction. In such case eliminate
+ // loop structure surrounding this loop body. For example,
+ // for (int i = start; i < end; ++i) {
+ // if ( i == somevalue) {
+ // loop_body
+ // }
+ // }
+ // can be transformed into
+ // if (somevalue >= start && somevalue < end) {
+ // i = somevalue;
+ // loop_body
+ // }
+
+ // Replace index variable with split value in loop body. Loop body is executed
+ // only when index variable is equal to split value.
+ IndVar->replaceAllUsesWith(SplitValue);
+
+ // Replace split condition in header.
+ // Transform
+ // SplitCondition : icmp eq i32 IndVar, SplitValue
+ // into
+ // c1 = icmp uge i32 SplitValue, StartValue
+ // c2 = icmp ult i32 SplitValue, ExitValue
+ // and i32 c1, c2
+ Instruction *C1 = new ICmpInst(ExitCondition->isSignedPredicate() ?
+ ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE,
+ SplitValue, StartValue, "lisplit", BR);
+
+ CmpInst::Predicate C2P = ExitCondition->getPredicate();
+ BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
+ if (LatchBR->getOperand(0) != Header)
+ C2P = CmpInst::getInversePredicate(C2P);
+ Instruction *C2 = new ICmpInst(C2P, SplitValue, ExitValue, "lisplit", BR);
+ Instruction *NSplitCond = BinaryOperator::CreateAnd(C1, C2, "lisplit", BR);
+
+ SplitCondition->replaceAllUsesWith(NSplitCond);
+ SplitCondition->eraseFromParent();
+
+ // Remove Latch to Header edge.
+ BasicBlock *LatchSucc = NULL;
+ Header->removePredecessor(Latch);
+ for (succ_iterator SI = succ_begin(Latch), E = succ_end(Latch);
+ SI != E; ++SI) {
+ if (Header != *SI)
+ LatchSucc = *SI;
+ }
+
+ // Clean up latch block.
+ Value *LatchBRCond = LatchBR->getCondition();
+ LatchBR->setUnconditionalDest(LatchSucc);
+ RecursivelyDeleteTriviallyDeadInstructions(LatchBRCond);
+
+ LPM->deleteLoopFromQueue(L);
+
+ // Update Dominator Info.
+ // Only CFG change done is to remove Latch to Header edge. This
+ // does not change dominator tree because Latch did not dominate
+ // Header.
+ if (DF) {
+ DominanceFrontier::iterator HeaderDF = DF->find(Header);
+ if (HeaderDF != DF->end())
+ DF->removeFromFrontier(HeaderDF, Header);
+
+ DominanceFrontier::iterator LatchDF = DF->find(Latch);
+ if (LatchDF != DF->end())
+ DF->removeFromFrontier(LatchDF, Header);
+ }
+
+ ++NumIndexSplitRemoved;
+ return true;
+}
+
+/// restrictLoopBound - Op dominates loop body. Op compares an IV based value
+/// with a loop invariant value. Update loop's lower and upper bound based on
+/// the loop invariant value.
+bool LoopIndexSplit::restrictLoopBound(ICmpInst &Op) {
+ bool Sign = Op.isSignedPredicate();
+ Instruction *PHTerm = L->getLoopPreheader()->getTerminator();
+
+ if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) {
+ BranchInst *EBR =
+ cast<BranchInst>(ExitCondition->getParent()->getTerminator());
+ ExitCondition->setPredicate(ExitCondition->getInversePredicate());
+ BasicBlock *T = EBR->getSuccessor(0);
+ EBR->setSuccessor(0, EBR->getSuccessor(1));
+ EBR->setSuccessor(1, T);
+ }
+
+ // New upper and lower bounds.
+ Value *NLB = NULL;
+ Value *NUB = NULL;
+ if (Value *V = IVisLT(Op)) {
+ // Restrict upper bound.
+ if (IVisLE(*ExitCondition))
+ V = getMinusOne(V, Sign, PHTerm);
+ NUB = getMin(V, IVExitValue, Sign, PHTerm);
+ } else if (Value *V = IVisLE(Op)) {
+ // Restrict upper bound.
+ if (IVisLT(*ExitCondition))
+ V = getPlusOne(V, Sign, PHTerm);
+ NUB = getMin(V, IVExitValue, Sign, PHTerm);
+ } else if (Value *V = IVisGT(Op)) {
+ // Restrict lower bound.
+ V = getPlusOne(V, Sign, PHTerm);
+ NLB = getMax(V, IVStartValue, Sign, PHTerm);
+ } else if (Value *V = IVisGE(Op))
+ // Restrict lower bound.
+ NLB = getMax(V, IVStartValue, Sign, PHTerm);
+
+ if (!NLB && !NUB)
+ return false;
+
+ if (NLB) {
+ unsigned i = IndVar->getBasicBlockIndex(L->getLoopPreheader());
+ IndVar->setIncomingValue(i, NLB);
+ }
+
+ if (NUB) {
+ unsigned i = (ExitCondition->getOperand(0) != IVExitValue);
+ ExitCondition->setOperand(i, NUB);
+ }
+ return true;
+}
+
+/// updateLoopIterationSpace -- Update loop's iteration space if loop
+/// body is executed for certain IV range only. For example,
+///
+/// for (i = 0; i < N; ++i) {
+/// if ( i > A && i < B) {
+/// ...
+/// }
+/// }
+/// is transformed to iterators from A to B, if A > 0 and B < N.
+///
+bool LoopIndexSplit::updateLoopIterationSpace() {
+ SplitCondition = NULL;
+ if (ExitCondition->getPredicate() == ICmpInst::ICMP_NE
+ || ExitCondition->getPredicate() == ICmpInst::ICMP_EQ)
+ return false;
+ BasicBlock *Latch = L->getLoopLatch();
+ BasicBlock *Header = L->getHeader();
+ BranchInst *BR = dyn_cast<BranchInst>(Header->getTerminator());
+ if (!BR) return false;
+ if (!isa<BranchInst>(Latch->getTerminator())) return false;
+ if (BR->isUnconditional()) return false;
+ BinaryOperator *AND = dyn_cast<BinaryOperator>(BR->getCondition());
+ if (!AND) return false;
+ if (AND->getOpcode() != Instruction::And) return false;
+ ICmpInst *Op0 = dyn_cast<ICmpInst>(AND->getOperand(0));
+ ICmpInst *Op1 = dyn_cast<ICmpInst>(AND->getOperand(1));
+ if (!Op0 || !Op1)
+ return false;
+ IVBasedValues.insert(AND);
+ IVBasedValues.insert(Op0);
+ IVBasedValues.insert(Op1);
+ if (!cleanBlock(Header)) return false;
+ BasicBlock *ExitingBlock = ExitCondition->getParent();
+ if (!cleanBlock(ExitingBlock)) return false;
+
+ // If the merge point for BR is not loop latch then skip this loop.
+ if (BR->getSuccessor(0) != Latch) {
+ DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
+ assert (DF0 != DF->end() && "Unable to find dominance frontier");
+ if (!DF0->second.count(Latch))
+ return false;
+ }
+
+ if (BR->getSuccessor(1) != Latch) {
+ DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
+ assert (DF1 != DF->end() && "Unable to find dominance frontier");
+ if (!DF1->second.count(Latch))
+ return false;
+ }
+
+ // Verify that loop exiting block has only two predecessor, where one pred
+ // is split condition block. The other predecessor will become exiting block's
+ // dominator after CFG is updated. TODO : Handle CFG's where exiting block has
+ // more then two predecessors. This requires extra work in updating dominator
+ // information.
+ BasicBlock *ExitingBBPred = NULL;
+ for (pred_iterator PI = pred_begin(ExitingBlock), PE = pred_end(ExitingBlock);
+ PI != PE; ++PI) {
+ BasicBlock *BB = *PI;
+ if (Header == BB)
+ continue;
+ if (ExitingBBPred)
+ return false;
+ else
+ ExitingBBPred = BB;
+ }
+
+ if (!restrictLoopBound(*Op0))
+ return false;
+
+ if (!restrictLoopBound(*Op1))
+ return false;
+
+ // Update CFG.
+ if (BR->getSuccessor(0) == ExitingBlock)
+ BR->setUnconditionalDest(BR->getSuccessor(1));
+ else
+ BR->setUnconditionalDest(BR->getSuccessor(0));
+
+ AND->eraseFromParent();
+ if (Op0->use_empty())
+ Op0->eraseFromParent();
+ if (Op1->use_empty())
+ Op1->eraseFromParent();
+
+ // Update domiantor info. Now, ExitingBlock has only one predecessor,
+ // ExitingBBPred, and it is ExitingBlock's immediate domiantor.
+ DT->changeImmediateDominator(ExitingBlock, ExitingBBPred);
+
+ BasicBlock *ExitBlock = ExitingBlock->getTerminator()->getSuccessor(1);
+ if (L->contains(ExitBlock))
+ ExitBlock = ExitingBlock->getTerminator()->getSuccessor(0);
+
+ // If ExitingBlock is a member of the loop basic blocks' DF list then
+ // replace ExitingBlock with header and exit block in the DF list
+ DominanceFrontier::iterator ExitingBlockDF = DF->find(ExitingBlock);
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I) {
+ BasicBlock *BB = *I;
+ if (BB == Header || BB == ExitingBlock)
+ continue;
+ DominanceFrontier::iterator BBDF = DF->find(BB);
+ DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin();
+ DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end();
+ while (DomSetI != DomSetE) {
+ DominanceFrontier::DomSetType::iterator CurrentItr = DomSetI;
+ ++DomSetI;
+ BasicBlock *DFBB = *CurrentItr;
+ if (DFBB == ExitingBlock) {
+ BBDF->second.erase(DFBB);
+ for (DominanceFrontier::DomSetType::iterator
+ EBI = ExitingBlockDF->second.begin(),
+ EBE = ExitingBlockDF->second.end(); EBI != EBE; ++EBI)
+ BBDF->second.insert(*EBI);
+ }
+ }
+ }
+ NumRestrictBounds++;
+ return true;
+}
+
+/// removeBlocks - Remove basic block DeadBB and all blocks dominated by DeadBB.
+/// This routine is used to remove split condition's dead branch, dominated by
+/// DeadBB. LiveBB dominates split conidition's other branch.
+void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP,
+ BasicBlock *LiveBB) {
+
+ // First update DeadBB's dominance frontier.
+ SmallVector<BasicBlock *, 8> FrontierBBs;
+ DominanceFrontier::iterator DeadBBDF = DF->find(DeadBB);
+ if (DeadBBDF != DF->end()) {
+ SmallVector<BasicBlock *, 8> PredBlocks;
+
+ DominanceFrontier::DomSetType DeadBBSet = DeadBBDF->second;
+ for (DominanceFrontier::DomSetType::iterator DeadBBSetI = DeadBBSet.begin(),
+ DeadBBSetE = DeadBBSet.end(); DeadBBSetI != DeadBBSetE; ++DeadBBSetI)
+ {
+ BasicBlock *FrontierBB = *DeadBBSetI;
+ FrontierBBs.push_back(FrontierBB);
+
+ // Rremove any PHI incoming edge from blocks dominated by DeadBB.
+ PredBlocks.clear();
+ for(pred_iterator PI = pred_begin(FrontierBB), PE = pred_end(FrontierBB);
+ PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (P == DeadBB || DT->dominates(DeadBB, P))
+ PredBlocks.push_back(P);
+ }
+
+ for(BasicBlock::iterator FBI = FrontierBB->begin(), FBE = FrontierBB->end();
+ FBI != FBE; ++FBI) {
+ if (PHINode *PN = dyn_cast<PHINode>(FBI)) {
+ for(SmallVector<BasicBlock *, 8>::iterator PI = PredBlocks.begin(),
+ PE = PredBlocks.end(); PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ PN->removeIncomingValue(P);
+ }
+ }
+ else
+ break;
+ }
+ }
+ }
+
+ // Now remove DeadBB and all nodes dominated by DeadBB in df order.
+ SmallVector<BasicBlock *, 32> WorkList;
+ DomTreeNode *DN = DT->getNode(DeadBB);
+ for (df_iterator<DomTreeNode*> DI = df_begin(DN),
+ E = df_end(DN); DI != E; ++DI) {
+ BasicBlock *BB = DI->getBlock();
+ WorkList.push_back(BB);
+ BB->replaceAllUsesWith(UndefValue::get(Type::LabelTy));
+ }
+
+ while (!WorkList.empty()) {
+ BasicBlock *BB = WorkList.back(); WorkList.pop_back();
+ LPM->deleteSimpleAnalysisValue(BB, LP);
+ for(BasicBlock::iterator BBI = BB->begin(), BBE = BB->end();
+ BBI != BBE; ) {
+ Instruction *I = BBI;
+ ++BBI;
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ LPM->deleteSimpleAnalysisValue(I, LP);
+ I->eraseFromParent();
+ }
+ DT->eraseNode(BB);
+ DF->removeBlock(BB);
+ LI->removeBlock(BB);
+ BB->eraseFromParent();
+ }
+
+ // Update Frontier BBs' dominator info.
+ while (!FrontierBBs.empty()) {
+ BasicBlock *FBB = FrontierBBs.back(); FrontierBBs.pop_back();
+ BasicBlock *NewDominator = FBB->getSinglePredecessor();
+ if (!NewDominator) {
+ pred_iterator PI = pred_begin(FBB), PE = pred_end(FBB);
+ NewDominator = *PI;
+ ++PI;
+ if (NewDominator != LiveBB) {
+ for(; PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (P == LiveBB) {
+ NewDominator = LiveBB;
+ break;
+ }
+ NewDominator = DT->findNearestCommonDominator(NewDominator, P);
+ }
+ }
+ }
+ assert (NewDominator && "Unable to fix dominator info.");
+ DT->changeImmediateDominator(FBB, NewDominator);
+ DF->changeImmediateDominator(FBB, NewDominator, DT);
+ }
+
+}
+
+// moveExitCondition - Move exit condition EC into split condition block CondBB.
+void LoopIndexSplit::moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB,
+ BasicBlock *ExitBB, ICmpInst *EC,
+ ICmpInst *SC, PHINode *IV,
+ Instruction *IVAdd, Loop *LP,
+ unsigned ExitValueNum) {
+
+ BasicBlock *ExitingBB = EC->getParent();
+ Instruction *CurrentBR = CondBB->getTerminator();
+
+ // Move exit condition into split condition block.
+ EC->moveBefore(CurrentBR);
+ EC->setOperand(ExitValueNum == 0 ? 1 : 0, IV);
+
+ // Move exiting block's branch into split condition block. Update its branch
+ // destination.
+ BranchInst *ExitingBR = cast<BranchInst>(ExitingBB->getTerminator());
+ ExitingBR->moveBefore(CurrentBR);
+ BasicBlock *OrigDestBB = NULL;
+ if (ExitingBR->getSuccessor(0) == ExitBB) {
+ OrigDestBB = ExitingBR->getSuccessor(1);
+ ExitingBR->setSuccessor(1, ActiveBB);
+ }
+ else {
+ OrigDestBB = ExitingBR->getSuccessor(0);
+ ExitingBR->setSuccessor(0, ActiveBB);
+ }
+
+ // Remove split condition and current split condition branch.
+ SC->eraseFromParent();
+ CurrentBR->eraseFromParent();
+
+ // Connect exiting block to original destination.
+ BranchInst::Create(OrigDestBB, ExitingBB);
+
+ // Update PHINodes
+ updatePHINodes(ExitBB, ExitingBB, CondBB, IV, IVAdd, LP);
+
+ // Fix dominator info.
+ // ExitBB is now dominated by CondBB
+ DT->changeImmediateDominator(ExitBB, CondBB);
+ DF->changeImmediateDominator(ExitBB, CondBB, DT);
+
+ // Blocks outside the loop may have been in the dominance frontier of blocks
+ // inside the condition; this is now impossible because the blocks inside the
+ // condition no loger dominate the exit. Remove the relevant blocks from
+ // the dominance frontiers.
+ for (Loop::block_iterator I = LP->block_begin(), E = LP->block_end();
+ I != E; ++I) {
+ if (*I == CondBB || !DT->dominates(CondBB, *I)) continue;
+ DominanceFrontier::iterator BBDF = DF->find(*I);
+ DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin();
+ DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end();
+ while (DomSetI != DomSetE) {
+ DominanceFrontier::DomSetType::iterator CurrentItr = DomSetI;
+ ++DomSetI;
+ BasicBlock *DFBB = *CurrentItr;
+ if (!LP->contains(DFBB))
+ BBDF->second.erase(DFBB);
+ }
+ }
+}
+
+/// updatePHINodes - CFG has been changed.
+/// Before
+/// - ExitBB's single predecessor was Latch
+/// - Latch's second successor was Header
+/// Now
+/// - ExitBB's single predecessor is Header
+/// - Latch's one and only successor is Header
+///
+/// Update ExitBB PHINodes' to reflect this change.
+void LoopIndexSplit::updatePHINodes(BasicBlock *ExitBB, BasicBlock *Latch,
+ BasicBlock *Header,
+ PHINode *IV, Instruction *IVIncrement,
+ Loop *LP) {
+
+ for (BasicBlock::iterator BI = ExitBB->begin(), BE = ExitBB->end();
+ BI != BE; ) {
+ PHINode *PN = dyn_cast<PHINode>(BI);
+ ++BI;
+ if (!PN)
+ break;
+
+ Value *V = PN->getIncomingValueForBlock(Latch);
+ if (PHINode *PHV = dyn_cast<PHINode>(V)) {
+ // PHV is in Latch. PHV has one use is in ExitBB PHINode. And one use
+ // in Header which is new incoming value for PN.
+ Value *NewV = NULL;
+ for (Value::use_iterator UI = PHV->use_begin(), E = PHV->use_end();
+ UI != E; ++UI)
+ if (PHINode *U = dyn_cast<PHINode>(*UI))
+ if (LP->contains(U->getParent())) {
+ NewV = U;
+ break;
+ }
+
+ // Add incoming value from header only if PN has any use inside the loop.
+ if (NewV)
+ PN->addIncoming(NewV, Header);
+
+ } else if (Instruction *PHI = dyn_cast<Instruction>(V)) {
+ // If this instruction is IVIncrement then IV is new incoming value
+ // from header otherwise this instruction must be incoming value from
+ // header because loop is in LCSSA form.
+ if (PHI == IVIncrement)
+ PN->addIncoming(IV, Header);
+ else
+ PN->addIncoming(V, Header);
+ } else
+ // Otherwise this is an incoming value from header because loop is in
+ // LCSSA form.
+ PN->addIncoming(V, Header);
+
+ // Remove incoming value from Latch.
+ PN->removeIncomingValue(Latch);
+ }
+}
+
+bool LoopIndexSplit::splitLoop() {
+ SplitCondition = NULL;
+ if (ExitCondition->getPredicate() == ICmpInst::ICMP_NE
+ || ExitCondition->getPredicate() == ICmpInst::ICMP_EQ)
+ return false;
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = L->getLoopLatch();
+ BranchInst *SBR = NULL; // Split Condition Branch
+ BranchInst *EBR = cast<BranchInst>(ExitCondition->getParent()->getTerminator());
+ // If Exiting block includes loop variant instructions then this
+ // loop may not be split safely.
+ BasicBlock *ExitingBlock = ExitCondition->getParent();
+ if (!cleanBlock(ExitingBlock)) return false;
+
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I) {
+ BranchInst *BR = dyn_cast<BranchInst>((*I)->getTerminator());
+ if (!BR || BR->isUnconditional()) continue;
+ ICmpInst *CI = dyn_cast<ICmpInst>(BR->getCondition());
+ if (!CI || CI == ExitCondition
+ || CI->getPredicate() == ICmpInst::ICMP_NE
+ || CI->getPredicate() == ICmpInst::ICMP_EQ)
+ continue;
+
+ // Unable to handle triangle loops at the moment.
+ // In triangle loop, split condition is in header and one of the
+ // the split destination is loop latch. If split condition is EQ
+ // then such loops are already handle in processOneIterationLoop().
+ if (Header == (*I)
+ && (Latch == BR->getSuccessor(0) || Latch == BR->getSuccessor(1)))
+ continue;
+
+ // If the block does not dominate the latch then this is not a diamond.
+ // Such loop may not benefit from index split.
+ if (!DT->dominates((*I), Latch))
+ continue;
+
+ // If split condition branches heads do not have single predecessor,
+ // SplitCondBlock, then is not possible to remove inactive branch.
+ if (!BR->getSuccessor(0)->getSinglePredecessor()
+ || !BR->getSuccessor(1)->getSinglePredecessor())
+ return false;
+
+ // If the merge point for BR is not loop latch then skip this condition.
+ if (BR->getSuccessor(0) != Latch) {
+ DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
+ assert (DF0 != DF->end() && "Unable to find dominance frontier");
+ if (!DF0->second.count(Latch))
+ continue;
+ }
+
+ if (BR->getSuccessor(1) != Latch) {
+ DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
+ assert (DF1 != DF->end() && "Unable to find dominance frontier");
+ if (!DF1->second.count(Latch))
+ continue;
+ }
+ SplitCondition = CI;
+ SBR = BR;
+ break;
+ }
+
+ if (!SplitCondition)
+ return false;
+
+ // If the predicate sign does not match then skip.
+ if (ExitCondition->isSignedPredicate() != SplitCondition->isSignedPredicate())
+ return false;
+
+ unsigned EVOpNum = (ExitCondition->getOperand(1) == IVExitValue);
+ unsigned SVOpNum = IVBasedValues.count(SplitCondition->getOperand(0));
+ Value *SplitValue = SplitCondition->getOperand(SVOpNum);
+ if (!L->isLoopInvariant(SplitValue))
+ return false;
+ if (!IVBasedValues.count(SplitCondition->getOperand(!SVOpNum)))
+ return false;
+
+ // Normalize loop conditions so that it is easier to calculate new loop
+ // bounds.
+ if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) {
+ ExitCondition->setPredicate(ExitCondition->getInversePredicate());
+ BasicBlock *T = EBR->getSuccessor(0);
+ EBR->setSuccessor(0, EBR->getSuccessor(1));
+ EBR->setSuccessor(1, T);
+ }
+
+ if (IVisGT(*SplitCondition) || IVisGE(*SplitCondition)) {
+ SplitCondition->setPredicate(SplitCondition->getInversePredicate());
+ BasicBlock *T = SBR->getSuccessor(0);
+ SBR->setSuccessor(0, SBR->getSuccessor(1));
+ SBR->setSuccessor(1, T);
+ }
+
+ //[*] Calculate new loop bounds.
+ Value *AEV = SplitValue;
+ Value *BSV = SplitValue;
+ bool Sign = SplitCondition->isSignedPredicate();
+ Instruction *PHTerm = L->getLoopPreheader()->getTerminator();
+
+ if (IVisLT(*ExitCondition)) {
+ if (IVisLT(*SplitCondition)) {
+ /* Do nothing */
+ }
+ else if (IVisLE(*SplitCondition)) {
+ AEV = getPlusOne(SplitValue, Sign, PHTerm);
+ BSV = getPlusOne(SplitValue, Sign, PHTerm);
+ } else {
+ assert (0 && "Unexpected split condition!");
+ }
+ }
+ else if (IVisLE(*ExitCondition)) {
+ if (IVisLT(*SplitCondition)) {
+ AEV = getMinusOne(SplitValue, Sign, PHTerm);
+ }
+ else if (IVisLE(*SplitCondition)) {
+ BSV = getPlusOne(SplitValue, Sign, PHTerm);
+ } else {
+ assert (0 && "Unexpected split condition!");
+ }
+ } else {
+ assert (0 && "Unexpected exit condition!");
+ }
+ AEV = getMin(AEV, IVExitValue, Sign, PHTerm);
+ BSV = getMax(BSV, IVStartValue, Sign, PHTerm);
+
+ // [*] Clone Loop
+ DenseMap<const Value *, Value *> ValueMap;
+ Loop *BLoop = CloneLoop(L, LPM, LI, ValueMap, this);
+ Loop *ALoop = L;
+
+ // [*] ALoop's exiting edge enters BLoop's header.
+ // ALoop's original exit block becomes BLoop's exit block.
+ PHINode *B_IndVar = cast<PHINode>(ValueMap[IndVar]);
+ BasicBlock *A_ExitingBlock = ExitCondition->getParent();
+ BranchInst *A_ExitInsn =
+ dyn_cast<BranchInst>(A_ExitingBlock->getTerminator());
+ assert (A_ExitInsn && "Unable to find suitable loop exit branch");
+ BasicBlock *B_ExitBlock = A_ExitInsn->getSuccessor(1);
+ BasicBlock *B_Header = BLoop->getHeader();
+ if (ALoop->contains(B_ExitBlock)) {
+ B_ExitBlock = A_ExitInsn->getSuccessor(0);
+ A_ExitInsn->setSuccessor(0, B_Header);
+ } else
+ A_ExitInsn->setSuccessor(1, B_Header);
+
+ // [*] Update ALoop's exit value using new exit value.
+ ExitCondition->setOperand(EVOpNum, AEV);
+
+ // [*] Update BLoop's header phi nodes. Remove incoming PHINode's from
+ // original loop's preheader. Add incoming PHINode values from
+ // ALoop's exiting block. Update BLoop header's domiantor info.
+
+ // Collect inverse map of Header PHINodes.
+ DenseMap<Value *, Value *> InverseMap;
+ for (BasicBlock::iterator BI = ALoop->getHeader()->begin(),
+ BE = ALoop->getHeader()->end(); BI != BE; ++BI) {
+ if (PHINode *PN = dyn_cast<PHINode>(BI)) {
+ PHINode *PNClone = cast<PHINode>(ValueMap[PN]);
+ InverseMap[PNClone] = PN;
+ } else
+ break;
+ }
+
+ BasicBlock *A_Preheader = ALoop->getLoopPreheader();
+ for (BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end();
+ BI != BE; ++BI) {
+ if (PHINode *PN = dyn_cast<PHINode>(BI)) {
+ // Remove incoming value from original preheader.
+ PN->removeIncomingValue(A_Preheader);
+
+ // Add incoming value from A_ExitingBlock.
+ if (PN == B_IndVar)
+ PN->addIncoming(BSV, A_ExitingBlock);
+ else {
+ PHINode *OrigPN = cast<PHINode>(InverseMap[PN]);
+ Value *V2 = NULL;
+ // If loop header is also loop exiting block then
+ // OrigPN is incoming value for B loop header.
+ if (A_ExitingBlock == ALoop->getHeader())
+ V2 = OrigPN;
+ else
+ V2 = OrigPN->getIncomingValueForBlock(A_ExitingBlock);
+ PN->addIncoming(V2, A_ExitingBlock);
+ }
+ } else
+ break;
+ }
+
+ DT->changeImmediateDominator(B_Header, A_ExitingBlock);
+ DF->changeImmediateDominator(B_Header, A_ExitingBlock, DT);
+
+ // [*] Update BLoop's exit block. Its new predecessor is BLoop's exit
+ // block. Remove incoming PHINode values from ALoop's exiting block.
+ // Add new incoming values from BLoop's incoming exiting value.
+ // Update BLoop exit block's dominator info..
+ BasicBlock *B_ExitingBlock = cast<BasicBlock>(ValueMap[A_ExitingBlock]);
+ for (BasicBlock::iterator BI = B_ExitBlock->begin(), BE = B_ExitBlock->end();
+ BI != BE; ++BI) {
+ if (PHINode *PN = dyn_cast<PHINode>(BI)) {
+ PN->addIncoming(ValueMap[PN->getIncomingValueForBlock(A_ExitingBlock)],
+ B_ExitingBlock);
+ PN->removeIncomingValue(A_ExitingBlock);
+ } else
+ break;
+ }
+
+ DT->changeImmediateDominator(B_ExitBlock, B_ExitingBlock);
+ DF->changeImmediateDominator(B_ExitBlock, B_ExitingBlock, DT);
+
+ //[*] Split ALoop's exit edge. This creates a new block which
+ // serves two purposes. First one is to hold PHINode defnitions
+ // to ensure that ALoop's LCSSA form. Second use it to act
+ // as a preheader for BLoop.
+ BasicBlock *A_ExitBlock = SplitEdge(A_ExitingBlock, B_Header, this);
+
+ //[*] Preserve ALoop's LCSSA form. Create new forwarding PHINodes
+ // in A_ExitBlock to redefine outgoing PHI definitions from ALoop.
+ for(BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end();
+ BI != BE; ++BI) {
+ if (PHINode *PN = dyn_cast<PHINode>(BI)) {
+ Value *V1 = PN->getIncomingValueForBlock(A_ExitBlock);
+ PHINode *newPHI = PHINode::Create(PN->getType(), PN->getName());
+ newPHI->addIncoming(V1, A_ExitingBlock);
+ A_ExitBlock->getInstList().push_front(newPHI);
+ PN->removeIncomingValue(A_ExitBlock);
+ PN->addIncoming(newPHI, A_ExitBlock);
+ } else
+ break;
+ }
+
+ //[*] Eliminate split condition's inactive branch from ALoop.
+ BasicBlock *A_SplitCondBlock = SplitCondition->getParent();
+ BranchInst *A_BR = cast<BranchInst>(A_SplitCondBlock->getTerminator());
+ BasicBlock *A_InactiveBranch = NULL;
+ BasicBlock *A_ActiveBranch = NULL;
+ A_ActiveBranch = A_BR->getSuccessor(0);
+ A_InactiveBranch = A_BR->getSuccessor(1);
+ A_BR->setUnconditionalDest(A_ActiveBranch);
+ removeBlocks(A_InactiveBranch, L, A_ActiveBranch);
+
+ //[*] Eliminate split condition's inactive branch in from BLoop.
+ BasicBlock *B_SplitCondBlock = cast<BasicBlock>(ValueMap[A_SplitCondBlock]);
+ BranchInst *B_BR = cast<BranchInst>(B_SplitCondBlock->getTerminator());
+ BasicBlock *B_InactiveBranch = NULL;
+ BasicBlock *B_ActiveBranch = NULL;
+ B_ActiveBranch = B_BR->getSuccessor(1);
+ B_InactiveBranch = B_BR->getSuccessor(0);
+ B_BR->setUnconditionalDest(B_ActiveBranch);
+ removeBlocks(B_InactiveBranch, BLoop, B_ActiveBranch);
+
+ BasicBlock *A_Header = ALoop->getHeader();
+ if (A_ExitingBlock == A_Header)
+ return true;
+
+ //[*] Move exit condition into split condition block to avoid
+ // executing dead loop iteration.
+ ICmpInst *B_ExitCondition = cast<ICmpInst>(ValueMap[ExitCondition]);
+ Instruction *B_IndVarIncrement = cast<Instruction>(ValueMap[IVIncrement]);
+ ICmpInst *B_SplitCondition = cast<ICmpInst>(ValueMap[SplitCondition]);
+
+ moveExitCondition(A_SplitCondBlock, A_ActiveBranch, A_ExitBlock, ExitCondition,
+ cast<ICmpInst>(SplitCondition), IndVar, IVIncrement,
+ ALoop, EVOpNum);
+
+ moveExitCondition(B_SplitCondBlock, B_ActiveBranch,
+ B_ExitBlock, B_ExitCondition,
+ B_SplitCondition, B_IndVar, B_IndVarIncrement,
+ BLoop, EVOpNum);
+
+ NumIndexSplit++;
+ return true;
+}
+
+/// cleanBlock - A block is considered clean if all non terminal instructions
+/// are either, PHINodes, IV based.
+bool LoopIndexSplit::cleanBlock(BasicBlock *BB) {
+ Instruction *Terminator = BB->getTerminator();
+ for(BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+ BI != BE; ++BI) {
+ Instruction *I = BI;
+
+ if (isa<PHINode>(I) || I == Terminator || I == ExitCondition
+ || I == SplitCondition || IVBasedValues.count(I)
+ || isa<DbgInfoIntrinsic>(I))
+ continue;
+
+ if (I->mayHaveSideEffects())
+ return false;
+
+ // I is used only inside this block then it is OK.
+ bool usedOutsideBB = false;
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI) {
+ Instruction *U = cast<Instruction>(UI);
+ if (U->getParent() != BB)
+ usedOutsideBB = true;
+ }
+ if (!usedOutsideBB)
+ continue;
+
+ // Otherwise we have a instruction that may not allow loop spliting.
+ return false;
+ }
+ return true;
+}
+
+/// IVisLT - If Op is comparing IV based value with an loop invariant and
+/// IV based value is less than the loop invariant then return the loop
+/// invariant. Otherwise return NULL.
+Value * LoopIndexSplit::IVisLT(ICmpInst &Op) {
+ ICmpInst::Predicate P = Op.getPredicate();
+ if ((P == ICmpInst::ICMP_SLT || P == ICmpInst::ICMP_ULT)
+ && IVBasedValues.count(Op.getOperand(0))
+ && L->isLoopInvariant(Op.getOperand(1)))
+ return Op.getOperand(1);
+
+ if ((P == ICmpInst::ICMP_SGT || P == ICmpInst::ICMP_UGT)
+ && IVBasedValues.count(Op.getOperand(1))
+ && L->isLoopInvariant(Op.getOperand(0)))
+ return Op.getOperand(0);
+
+ return NULL;
+}
+
+/// IVisLE - If Op is comparing IV based value with an loop invariant and
+/// IV based value is less than or equal to the loop invariant then
+/// return the loop invariant. Otherwise return NULL.
+Value * LoopIndexSplit::IVisLE(ICmpInst &Op) {
+ ICmpInst::Predicate P = Op.getPredicate();
+ if ((P == ICmpInst::ICMP_SLE || P == ICmpInst::ICMP_ULE)
+ && IVBasedValues.count(Op.getOperand(0))
+ && L->isLoopInvariant(Op.getOperand(1)))
+ return Op.getOperand(1);
+
+ if ((P == ICmpInst::ICMP_SGE || P == ICmpInst::ICMP_UGE)
+ && IVBasedValues.count(Op.getOperand(1))
+ && L->isLoopInvariant(Op.getOperand(0)))
+ return Op.getOperand(0);
+
+ return NULL;
+}
+
+/// IVisGT - If Op is comparing IV based value with an loop invariant and
+/// IV based value is greater than the loop invariant then return the loop
+/// invariant. Otherwise return NULL.
+Value * LoopIndexSplit::IVisGT(ICmpInst &Op) {
+ ICmpInst::Predicate P = Op.getPredicate();
+ if ((P == ICmpInst::ICMP_SGT || P == ICmpInst::ICMP_UGT)
+ && IVBasedValues.count(Op.getOperand(0))
+ && L->isLoopInvariant(Op.getOperand(1)))
+ return Op.getOperand(1);
+
+ if ((P == ICmpInst::ICMP_SLT || P == ICmpInst::ICMP_ULT)
+ && IVBasedValues.count(Op.getOperand(1))
+ && L->isLoopInvariant(Op.getOperand(0)))
+ return Op.getOperand(0);
+
+ return NULL;
+}
+
+/// IVisGE - If Op is comparing IV based value with an loop invariant and
+/// IV based value is greater than or equal to the loop invariant then
+/// return the loop invariant. Otherwise return NULL.
+Value * LoopIndexSplit::IVisGE(ICmpInst &Op) {
+ ICmpInst::Predicate P = Op.getPredicate();
+ if ((P == ICmpInst::ICMP_SGE || P == ICmpInst::ICMP_UGE)
+ && IVBasedValues.count(Op.getOperand(0))
+ && L->isLoopInvariant(Op.getOperand(1)))
+ return Op.getOperand(1);
+
+ if ((P == ICmpInst::ICMP_SLE || P == ICmpInst::ICMP_ULE)
+ && IVBasedValues.count(Op.getOperand(1))
+ && L->isLoopInvariant(Op.getOperand(0)))
+ return Op.getOperand(0);
+
+ return NULL;
+}
+
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
new file mode 100644
index 0000000..a088230
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -0,0 +1,572 @@
+//===- LoopRotation.cpp - Loop Rotation Pass ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Loop Rotation Pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-rotate"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+#define MAX_HEADER_SIZE 16
+
+STATISTIC(NumRotated, "Number of loops rotated");
+namespace {
+
+ class VISIBILITY_HIDDEN RenameData {
+ public:
+ RenameData(Instruction *O, Value *P, Instruction *H)
+ : Original(O), PreHeader(P), Header(H) { }
+ public:
+ Instruction *Original; // Original instruction
+ Value *PreHeader; // Original pre-header replacement
+ Instruction *Header; // New header replacement
+ };
+
+ class VISIBILITY_HIDDEN LoopRotate : public LoopPass {
+
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopRotate() : LoopPass(&ID) {}
+
+ // Rotate Loop L as many times as possible. Return true if
+ // loop is rotated at least once.
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ // LCSSA form makes instruction renaming easier.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<LoopInfo>();
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominanceFrontier>();
+ }
+
+ // Helper functions
+
+ /// Do actual work
+ bool rotateLoop(Loop *L, LPPassManager &LPM);
+
+ /// Initialize local data
+ void initialize();
+
+ /// Make sure all Exit block PHINodes have required incoming values.
+ /// If incoming value is constant or defined outside the loop then
+ /// PHINode may not have an entry for original pre-header.
+ void updateExitBlock();
+
+ /// Return true if this instruction is used outside original header.
+ bool usedOutsideOriginalHeader(Instruction *In);
+
+ /// Find Replacement information for instruction. Return NULL if it is
+ /// not available.
+ const RenameData *findReplacementData(Instruction *I);
+
+ /// After loop rotation, loop pre-header has multiple sucessors.
+ /// Insert one forwarding basic block to ensure that loop pre-header
+ /// has only one successor.
+ void preserveCanonicalLoopForm(LPPassManager &LPM);
+
+ private:
+
+ Loop *L;
+ BasicBlock *OrigHeader;
+ BasicBlock *OrigPreHeader;
+ BasicBlock *OrigLatch;
+ BasicBlock *NewHeader;
+ BasicBlock *Exit;
+ LPPassManager *LPM_Ptr;
+ SmallVector<RenameData, MAX_HEADER_SIZE> LoopHeaderInfo;
+ };
+}
+
+char LoopRotate::ID = 0;
+static RegisterPass<LoopRotate> X("loop-rotate", "Rotate Loops");
+
+Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
+
+/// Rotate Loop L as many times as possible. Return true if
+/// loop is rotated at least once.
+bool LoopRotate::runOnLoop(Loop *Lp, LPPassManager &LPM) {
+
+ bool RotatedOneLoop = false;
+ initialize();
+ LPM_Ptr = &LPM;
+
+ // One loop can be rotated multiple times.
+ while (rotateLoop(Lp,LPM)) {
+ RotatedOneLoop = true;
+ initialize();
+ }
+
+ return RotatedOneLoop;
+}
+
+/// Rotate loop LP. Return true if the loop is rotated.
+bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
+ L = Lp;
+
+ OrigHeader = L->getHeader();
+ OrigPreHeader = L->getLoopPreheader();
+ OrigLatch = L->getLoopLatch();
+
+ // If loop has only one block then there is not much to rotate.
+ if (L->getBlocks().size() == 1)
+ return false;
+
+ assert(OrigHeader && OrigLatch && OrigPreHeader &&
+ "Loop is not in canonical form");
+
+ // If loop header is not one of the loop exit block then
+ // either this loop is already rotated or it is not
+ // suitable for loop rotation transformations.
+ if (!L->isLoopExit(OrigHeader))
+ return false;
+
+ BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
+ if (!BI)
+ return false;
+ assert(BI->isConditional() && "Branch Instruction is not conditional");
+
+ // Updating PHInodes in loops with multiple exits adds complexity.
+ // Keep it simple, and restrict loop rotation to loops with one exit only.
+ // In future, lift this restriction and support for multiple exits if
+ // required.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ if (ExitBlocks.size() > 1)
+ return false;
+
+ // Check size of original header and reject
+ // loop if it is very big.
+ unsigned Size = 0;
+
+ // FIXME: Use common api to estimate size.
+ for (BasicBlock::const_iterator OI = OrigHeader->begin(),
+ OE = OrigHeader->end(); OI != OE; ++OI) {
+ if (isa<PHINode>(OI))
+ continue; // PHI nodes don't count.
+ if (isa<DbgInfoIntrinsic>(OI))
+ continue; // Debug intrinsics don't count as size.
+ Size++;
+ }
+
+ if (Size > MAX_HEADER_SIZE)
+ return false;
+
+ // Now, this loop is suitable for rotation.
+
+ // Find new Loop header. NewHeader is a Header's one and only successor
+ // that is inside loop. Header's other successor is outside the
+ // loop. Otherwise loop is not suitable for rotation.
+ Exit = BI->getSuccessor(0);
+ NewHeader = BI->getSuccessor(1);
+ if (L->contains(Exit))
+ std::swap(Exit, NewHeader);
+ assert(NewHeader && "Unable to determine new loop header");
+ assert(L->contains(NewHeader) && !L->contains(Exit) &&
+ "Unable to determine loop header and exit blocks");
+
+ // This code assumes that new header has exactly one predecessor. Remove any
+ // single entry PHI nodes in it.
+ assert(NewHeader->getSinglePredecessor() &&
+ "New header doesn't have one pred!");
+ FoldSingleEntryPHINodes(NewHeader);
+
+ // Copy PHI nodes and other instructions from original header
+ // into original pre-header. Unlike original header, original pre-header is
+ // not a member of loop.
+ //
+ // New loop header is one and only successor of original header that
+ // is inside the loop. All other original header successors are outside
+ // the loop. Copy PHI Nodes from original header into new loop header.
+ // Add second incoming value, from original loop pre-header into these phi
+ // nodes. If a value defined in original header is used outside original
+ // header then new loop header will need new phi nodes with two incoming
+ // values, one definition from original header and second definition is
+ // from original loop pre-header.
+
+ // Remove terminator from Original pre-header. Original pre-header will
+ // receive a clone of original header terminator as a new terminator.
+ OrigPreHeader->getInstList().pop_back();
+ BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
+ PHINode *PN = 0;
+ for (; (PN = dyn_cast<PHINode>(I)); ++I) {
+ // PHI nodes are not copied into original pre-header. Instead their values
+ // are directly propagated.
+ Value *NPV = PN->getIncomingValueForBlock(OrigPreHeader);
+
+ // Create new PHI node with two incoming values for NewHeader.
+ // One incoming value is from OrigLatch (through OrigHeader) and
+ // second incoming value is from original pre-header.
+ PHINode *NH = PHINode::Create(PN->getType(), PN->getName(),
+ NewHeader->begin());
+ NH->addIncoming(PN->getIncomingValueForBlock(OrigLatch), OrigHeader);
+ NH->addIncoming(NPV, OrigPreHeader);
+
+ // "In" can be replaced by NH at various places.
+ LoopHeaderInfo.push_back(RenameData(PN, NPV, NH));
+ }
+
+ // Now, handle non-phi instructions.
+ for (; I != E; ++I) {
+ Instruction *In = I;
+ assert(!isa<PHINode>(In) && "PHINode is not expected here");
+
+ // This is not a PHI instruction. Insert its clone into original pre-header.
+ // If this instruction is using a value from same basic block then
+ // update it to use value from cloned instruction.
+ Instruction *C = In->clone();
+ C->setName(In->getName());
+ OrigPreHeader->getInstList().push_back(C);
+
+ for (unsigned opi = 0, e = In->getNumOperands(); opi != e; ++opi) {
+ Instruction *OpInsn = dyn_cast<Instruction>(In->getOperand(opi));
+ if (!OpInsn) continue; // Ignore non-instruction values.
+ if (const RenameData *D = findReplacementData(OpInsn))
+ C->setOperand(opi, D->PreHeader);
+ }
+
+ // If this instruction is used outside this basic block then
+ // create new PHINode for this instruction.
+ Instruction *NewHeaderReplacement = NULL;
+ if (usedOutsideOriginalHeader(In)) {
+ PHINode *PN = PHINode::Create(In->getType(), In->getName(),
+ NewHeader->begin());
+ PN->addIncoming(In, OrigHeader);
+ PN->addIncoming(C, OrigPreHeader);
+ NewHeaderReplacement = PN;
+ }
+ LoopHeaderInfo.push_back(RenameData(In, C, NewHeaderReplacement));
+ }
+
+ // Rename uses of original header instructions to reflect their new
+ // definitions (either from original pre-header node or from newly created
+ // new header PHINodes.
+ //
+ // Original header instructions are used in
+ // 1) Original header:
+ //
+ // If instruction is used in non-phi instructions then it is using
+ // defintion from original heder iteself. Do not replace this use
+ // with definition from new header or original pre-header.
+ //
+ // If instruction is used in phi node then it is an incoming
+ // value. Rename its use to reflect new definition from new-preheader
+ // or new header.
+ //
+ // 2) Inside loop but not in original header
+ //
+ // Replace this use to reflect definition from new header.
+ for (unsigned LHI = 0, LHI_E = LoopHeaderInfo.size(); LHI != LHI_E; ++LHI) {
+ const RenameData &ILoopHeaderInfo = LoopHeaderInfo[LHI];
+
+ if (!ILoopHeaderInfo.Header)
+ continue;
+
+ Instruction *OldPhi = ILoopHeaderInfo.Original;
+ Instruction *NewPhi = ILoopHeaderInfo.Header;
+
+ // Before replacing uses, collect them first, so that iterator is
+ // not invalidated.
+ SmallVector<Instruction *, 16> AllUses;
+ for (Value::use_iterator UI = OldPhi->use_begin(), UE = OldPhi->use_end();
+ UI != UE; ++UI)
+ AllUses.push_back(cast<Instruction>(UI));
+
+ for (SmallVector<Instruction *, 16>::iterator UI = AllUses.begin(),
+ UE = AllUses.end(); UI != UE; ++UI) {
+ Instruction *U = *UI;
+ BasicBlock *Parent = U->getParent();
+
+ // Used inside original header
+ if (Parent == OrigHeader) {
+ // Do not rename uses inside original header non-phi instructions.
+ PHINode *PU = dyn_cast<PHINode>(U);
+ if (!PU)
+ continue;
+
+ // Do not rename uses inside original header phi nodes, if the
+ // incoming value is for new header.
+ if (PU->getBasicBlockIndex(NewHeader) != -1
+ && PU->getIncomingValueForBlock(NewHeader) == U)
+ continue;
+
+ U->replaceUsesOfWith(OldPhi, NewPhi);
+ continue;
+ }
+
+ // Used inside loop, but not in original header.
+ if (L->contains(U->getParent())) {
+ if (U != NewPhi)
+ U->replaceUsesOfWith(OldPhi, NewPhi);
+ continue;
+ }
+
+ // Used inside Exit Block. Since we are in LCSSA form, U must be PHINode.
+ if (U->getParent() == Exit) {
+ assert(isa<PHINode>(U) && "Use in Exit Block that is not PHINode");
+
+ PHINode *UPhi = cast<PHINode>(U);
+ // UPhi already has one incoming argument from original header.
+ // Add second incoming argument from new Pre header.
+ UPhi->addIncoming(ILoopHeaderInfo.PreHeader, OrigPreHeader);
+ } else {
+ // Used outside Exit block. Create a new PHI node from exit block
+ // to receive value from ne new header ane pre header.
+ PHINode *PN = PHINode::Create(U->getType(), U->getName(),
+ Exit->begin());
+ PN->addIncoming(ILoopHeaderInfo.PreHeader, OrigPreHeader);
+ PN->addIncoming(OldPhi, OrigHeader);
+ U->replaceUsesOfWith(OldPhi, PN);
+ }
+ }
+ }
+
+ /// Make sure all Exit block PHINodes have required incoming values.
+ updateExitBlock();
+
+ // Update CFG
+
+ // Removing incoming branch from loop preheader to original header.
+ // Now original header is inside the loop.
+ for (BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
+ I != E; ++I)
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ PN->removeIncomingValue(OrigPreHeader);
+
+ // Make NewHeader as the new header for the loop.
+ L->moveToHeader(NewHeader);
+
+ preserveCanonicalLoopForm(LPM);
+
+ NumRotated++;
+ return true;
+}
+
+/// Make sure all Exit block PHINodes have required incoming values.
+/// If incoming value is constant or defined outside the loop then
+/// PHINode may not have an entry for original pre-header.
+void LoopRotate::updateExitBlock() {
+
+ for (BasicBlock::iterator I = Exit->begin(), E = Exit->end();
+ I != E; ++I) {
+
+ PHINode *PN = dyn_cast<PHINode>(I);
+ if (!PN)
+ break;
+
+ // There is already one incoming value from original pre-header block.
+ if (PN->getBasicBlockIndex(OrigPreHeader) != -1)
+ continue;
+
+ const RenameData *ILoopHeaderInfo;
+ Value *V = PN->getIncomingValueForBlock(OrigHeader);
+ if (isa<Instruction>(V) &&
+ (ILoopHeaderInfo = findReplacementData(cast<Instruction>(V)))) {
+ assert(ILoopHeaderInfo->PreHeader && "Missing New Preheader Instruction");
+ PN->addIncoming(ILoopHeaderInfo->PreHeader, OrigPreHeader);
+ } else {
+ PN->addIncoming(V, OrigPreHeader);
+ }
+ }
+}
+
+/// Initialize local data
+void LoopRotate::initialize() {
+ L = NULL;
+ OrigHeader = NULL;
+ OrigPreHeader = NULL;
+ NewHeader = NULL;
+ Exit = NULL;
+
+ LoopHeaderInfo.clear();
+}
+
+/// Return true if this instruction is used by any instructions in the loop that
+/// aren't in original header.
+bool LoopRotate::usedOutsideOriginalHeader(Instruction *In) {
+ for (Value::use_iterator UI = In->use_begin(), UE = In->use_end();
+ UI != UE; ++UI) {
+ BasicBlock *UserBB = cast<Instruction>(UI)->getParent();
+ if (UserBB != OrigHeader && L->contains(UserBB))
+ return true;
+ }
+
+ return false;
+}
+
+/// Find Replacement information for instruction. Return NULL if it is
+/// not available.
+const RenameData *LoopRotate::findReplacementData(Instruction *In) {
+
+ // Since LoopHeaderInfo is small, linear walk is OK.
+ for (unsigned LHI = 0, LHI_E = LoopHeaderInfo.size(); LHI != LHI_E; ++LHI) {
+ const RenameData &ILoopHeaderInfo = LoopHeaderInfo[LHI];
+ if (ILoopHeaderInfo.Original == In)
+ return &ILoopHeaderInfo;
+ }
+ return NULL;
+}
+
+/// After loop rotation, loop pre-header has multiple sucessors.
+/// Insert one forwarding basic block to ensure that loop pre-header
+/// has only one successor.
+void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) {
+
+ // Right now original pre-header has two successors, new header and
+ // exit block. Insert new block between original pre-header and
+ // new header such that loop's new pre-header has only one successor.
+ BasicBlock *NewPreHeader = BasicBlock::Create("bb.nph",
+ OrigHeader->getParent(),
+ NewHeader);
+ LoopInfo &LI = LPM.getAnalysis<LoopInfo>();
+ if (Loop *PL = LI.getLoopFor(OrigPreHeader))
+ PL->addBasicBlockToLoop(NewPreHeader, LI.getBase());
+ BranchInst::Create(NewHeader, NewPreHeader);
+
+ BranchInst *OrigPH_BI = cast<BranchInst>(OrigPreHeader->getTerminator());
+ if (OrigPH_BI->getSuccessor(0) == NewHeader)
+ OrigPH_BI->setSuccessor(0, NewPreHeader);
+ else {
+ assert(OrigPH_BI->getSuccessor(1) == NewHeader &&
+ "Unexpected original pre-header terminator");
+ OrigPH_BI->setSuccessor(1, NewPreHeader);
+ }
+
+ for (BasicBlock::iterator I = NewHeader->begin(), E = NewHeader->end();
+ I != E; ++I) {
+ PHINode *PN = dyn_cast<PHINode>(I);
+ if (!PN)
+ break;
+
+ int index = PN->getBasicBlockIndex(OrigPreHeader);
+ assert(index != -1 && "Expected incoming value from Original PreHeader");
+ PN->setIncomingBlock(index, NewPreHeader);
+ assert(PN->getBasicBlockIndex(OrigPreHeader) == -1 &&
+ "Expected only one incoming value from Original PreHeader");
+ }
+
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
+ DT->addNewBlock(NewPreHeader, OrigPreHeader);
+ DT->changeImmediateDominator(L->getHeader(), NewPreHeader);
+ DT->changeImmediateDominator(Exit, OrigPreHeader);
+ for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
+ BI != BE; ++BI) {
+ BasicBlock *B = *BI;
+ if (L->getHeader() != B) {
+ DomTreeNode *Node = DT->getNode(B);
+ if (Node && Node->getBlock() == OrigHeader)
+ DT->changeImmediateDominator(*BI, L->getHeader());
+ }
+ }
+ DT->changeImmediateDominator(OrigHeader, OrigLatch);
+ }
+
+ if (DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>()) {
+ // New Preheader's dominance frontier is Exit block.
+ DominanceFrontier::DomSetType NewPHSet;
+ NewPHSet.insert(Exit);
+ DF->addBasicBlock(NewPreHeader, NewPHSet);
+
+ // New Header's dominance frontier now includes itself and Exit block
+ DominanceFrontier::iterator HeadI = DF->find(L->getHeader());
+ if (HeadI != DF->end()) {
+ DominanceFrontier::DomSetType & HeaderSet = HeadI->second;
+ HeaderSet.clear();
+ HeaderSet.insert(L->getHeader());
+ HeaderSet.insert(Exit);
+ } else {
+ DominanceFrontier::DomSetType HeaderSet;
+ HeaderSet.insert(L->getHeader());
+ HeaderSet.insert(Exit);
+ DF->addBasicBlock(L->getHeader(), HeaderSet);
+ }
+
+ // Original header (new Loop Latch)'s dominance frontier is Exit.
+ DominanceFrontier::iterator LatchI = DF->find(L->getLoopLatch());
+ if (LatchI != DF->end()) {
+ DominanceFrontier::DomSetType &LatchSet = LatchI->second;
+ LatchSet = LatchI->second;
+ LatchSet.clear();
+ LatchSet.insert(Exit);
+ } else {
+ DominanceFrontier::DomSetType LatchSet;
+ LatchSet.insert(Exit);
+ DF->addBasicBlock(L->getHeader(), LatchSet);
+ }
+
+ // If a loop block dominates new loop latch then its frontier is
+ // new header and Exit.
+ BasicBlock *NewLatch = L->getLoopLatch();
+ DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+ for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
+ BI != BE; ++BI) {
+ BasicBlock *B = *BI;
+ if (DT->dominates(B, NewLatch)) {
+ DominanceFrontier::iterator BDFI = DF->find(B);
+ if (BDFI != DF->end()) {
+ DominanceFrontier::DomSetType &BSet = BDFI->second;
+ BSet = BDFI->second;
+ BSet.clear();
+ BSet.insert(L->getHeader());
+ BSet.insert(Exit);
+ } else {
+ DominanceFrontier::DomSetType BSet;
+ BSet.insert(L->getHeader());
+ BSet.insert(Exit);
+ DF->addBasicBlock(B, BSet);
+ }
+ }
+ }
+ }
+
+ // Preserve canonical loop form, which means Exit block should
+ // have only one predecessor.
+ BasicBlock *NExit = SplitEdge(L->getLoopLatch(), Exit, this);
+
+ // Preserve LCSSA.
+ BasicBlock::iterator I = Exit->begin(), E = Exit->end();
+ PHINode *PN = NULL;
+ for (; (PN = dyn_cast<PHINode>(I)); ++I) {
+ unsigned N = PN->getNumIncomingValues();
+ for (unsigned index = 0; index < N; ++index)
+ if (PN->getIncomingBlock(index) == NExit) {
+ PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName(),
+ NExit->begin());
+ NewPN->addIncoming(PN->getIncomingValue(index), L->getLoopLatch());
+ PN->setIncomingValue(index, NewPN);
+ PN->setIncomingBlock(index, NExit);
+ break;
+ }
+ }
+
+ assert(NewHeader && L->getHeader() == NewHeader &&
+ "Invalid loop header after loop rotation");
+ assert(NewPreHeader && L->getLoopPreheader() == NewPreHeader &&
+ "Invalid loop preheader after loop rotation");
+ assert(L->getLoopLatch() &&
+ "Invalid loop latch after loop rotation");
+}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
new file mode 100644
index 0000000..92270b5
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -0,0 +1,2605 @@
+//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation analyzes and transforms the induction variables (and
+// computations derived from them) into forms suitable for efficient execution
+// on the target.
+//
+// This pass performs a strength reduction on array references inside loops that
+// have as one or more of their components the loop induction variable, it
+// rewrites expressions to take advantage of scaled-index addressing modes
+// available on the target, and it performs a variety of other optimizations
+// related to loop induction variables.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-reduce"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Type.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Transforms/Utils/AddrModeMatcher.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumReduced , "Number of IV uses strength reduced");
+STATISTIC(NumInserted, "Number of PHIs inserted");
+STATISTIC(NumVariable, "Number of PHIs with variable strides");
+STATISTIC(NumEliminated, "Number of strides eliminated");
+STATISTIC(NumShadow, "Number of Shadow IVs optimized");
+STATISTIC(NumImmSunk, "Number of common expr immediates sunk into uses");
+STATISTIC(NumLoopCond, "Number of loop terminating conds optimized");
+
+static cl::opt<bool> EnableFullLSRMode("enable-full-lsr",
+ cl::init(false),
+ cl::Hidden);
+
+namespace {
+
+ struct BasedUser;
+
+ /// IVInfo - This structure keeps track of one IV expression inserted during
+ /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as
+ /// well as the PHI node and increment value created for rewrite.
+ struct VISIBILITY_HIDDEN IVExpr {
+ SCEVHandle Stride;
+ SCEVHandle Base;
+ PHINode *PHI;
+
+ IVExpr(const SCEVHandle &stride, const SCEVHandle &base, PHINode *phi)
+ : Stride(stride), Base(base), PHI(phi) {}
+ };
+
+ /// IVsOfOneStride - This structure keeps track of all IV expression inserted
+ /// during StrengthReduceStridedIVUsers for a particular stride of the IV.
+ struct VISIBILITY_HIDDEN IVsOfOneStride {
+ std::vector<IVExpr> IVs;
+
+ void addIV(const SCEVHandle &Stride, const SCEVHandle &Base, PHINode *PHI) {
+ IVs.push_back(IVExpr(Stride, Base, PHI));
+ }
+ };
+
+ class VISIBILITY_HIDDEN LoopStrengthReduce : public LoopPass {
+ IVUsers *IU;
+ LoopInfo *LI;
+ DominatorTree *DT;
+ ScalarEvolution *SE;
+ bool Changed;
+
+ /// IVsByStride - Keep track of all IVs that have been inserted for a
+ /// particular stride.
+ std::map<SCEVHandle, IVsOfOneStride> IVsByStride;
+
+ /// StrideNoReuse - Keep track of all the strides whose ivs cannot be
+ /// reused (nor should they be rewritten to reuse other strides).
+ SmallSet<SCEVHandle, 4> StrideNoReuse;
+
+ /// DeadInsts - Keep track of instructions we may have made dead, so that
+ /// we can remove them after we are done working.
+ SmallVector<WeakVH, 16> DeadInsts;
+
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// transformation profitability.
+ const TargetLowering *TLI;
+
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ explicit LoopStrengthReduce(const TargetLowering *tli = NULL) :
+ LoopPass(&ID), TLI(tli) {
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ // We split critical edges, so we change the CFG. However, we do update
+ // many analyses if they are around.
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreserved<LoopInfo>();
+ AU.addPreserved<DominanceFrontier>();
+ AU.addPreserved<DominatorTree>();
+
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<IVUsers>();
+ AU.addPreserved<IVUsers>();
+ }
+
+ private:
+ ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
+ IVStrideUse* &CondUse,
+ const SCEVHandle* &CondStride);
+
+ void OptimizeIndvars(Loop *L);
+ void OptimizeLoopCountIV(Loop *L);
+ void OptimizeLoopTermCond(Loop *L);
+
+ /// OptimizeShadowIV - If IV is used in a int-to-float cast
+ /// inside the loop then try to eliminate the cast opeation.
+ void OptimizeShadowIV(Loop *L);
+
+ /// OptimizeSMax - Rewrite the loop's terminating condition
+ /// if it uses an smax computation.
+ ICmpInst *OptimizeSMax(Loop *L, ICmpInst *Cond,
+ IVStrideUse* &CondUse);
+
+ bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
+ const SCEVHandle *&CondStride);
+ bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
+ SCEVHandle CheckForIVReuse(bool, bool, bool, const SCEVHandle&,
+ IVExpr&, const Type*,
+ const std::vector<BasedUser>& UsersToProcess);
+ bool ValidScale(bool, int64_t,
+ const std::vector<BasedUser>& UsersToProcess);
+ bool ValidOffset(bool, int64_t, int64_t,
+ const std::vector<BasedUser>& UsersToProcess);
+ SCEVHandle CollectIVUsers(const SCEVHandle &Stride,
+ IVUsersOfOneStride &Uses,
+ Loop *L,
+ bool &AllUsesAreAddresses,
+ bool &AllUsesAreOutsideLoop,
+ std::vector<BasedUser> &UsersToProcess);
+ bool ShouldUseFullStrengthReductionMode(
+ const std::vector<BasedUser> &UsersToProcess,
+ const Loop *L,
+ bool AllUsesAreAddresses,
+ SCEVHandle Stride);
+ void PrepareToStrengthReduceFully(
+ std::vector<BasedUser> &UsersToProcess,
+ SCEVHandle Stride,
+ SCEVHandle CommonExprs,
+ const Loop *L,
+ SCEVExpander &PreheaderRewriter);
+ void PrepareToStrengthReduceFromSmallerStride(
+ std::vector<BasedUser> &UsersToProcess,
+ Value *CommonBaseV,
+ const IVExpr &ReuseIV,
+ Instruction *PreInsertPt);
+ void PrepareToStrengthReduceWithNewPhi(
+ std::vector<BasedUser> &UsersToProcess,
+ SCEVHandle Stride,
+ SCEVHandle CommonExprs,
+ Value *CommonBaseV,
+ Instruction *IVIncInsertPt,
+ const Loop *L,
+ SCEVExpander &PreheaderRewriter);
+ void StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
+ IVUsersOfOneStride &Uses,
+ Loop *L);
+ void DeleteTriviallyDeadInstructions();
+ };
+}
+
+char LoopStrengthReduce::ID = 0;
+static RegisterPass<LoopStrengthReduce>
+X("loop-reduce", "Loop Strength Reduction");
+
+Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
+ return new LoopStrengthReduce(TLI);
+}
+
+/// DeleteTriviallyDeadInstructions - If any of the instructions is the
+/// specified set are trivially dead, delete them and see if this makes any of
+/// their operands subsequently dead.
+void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
+ if (DeadInsts.empty()) return;
+
+ while (!DeadInsts.empty()) {
+ Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.back());
+ DeadInsts.pop_back();
+
+ if (I == 0 || !isInstructionTriviallyDead(I))
+ continue;
+
+ for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) {
+ if (Instruction *U = dyn_cast<Instruction>(*OI)) {
+ *OI = 0;
+ if (U->use_empty())
+ DeadInsts.push_back(U);
+ }
+ }
+
+ I->eraseFromParent();
+ Changed = true;
+ }
+}
+
+/// containsAddRecFromDifferentLoop - Determine whether expression S involves a
+/// subexpression that is an AddRec from a loop other than L. An outer loop
+/// of L is OK, but not an inner loop nor a disjoint loop.
+static bool containsAddRecFromDifferentLoop(SCEVHandle S, Loop *L) {
+ // This is very common, put it first.
+ if (isa<SCEVConstant>(S))
+ return false;
+ if (const SCEVCommutativeExpr *AE = dyn_cast<SCEVCommutativeExpr>(S)) {
+ for (unsigned int i=0; i< AE->getNumOperands(); i++)
+ if (containsAddRecFromDifferentLoop(AE->getOperand(i), L))
+ return true;
+ return false;
+ }
+ if (const SCEVAddRecExpr *AE = dyn_cast<SCEVAddRecExpr>(S)) {
+ if (const Loop *newLoop = AE->getLoop()) {
+ if (newLoop == L)
+ return false;
+ // if newLoop is an outer loop of L, this is OK.
+ if (!LoopInfoBase<BasicBlock>::isNotAlreadyContainedIn(L, newLoop))
+ return false;
+ }
+ return true;
+ }
+ if (const SCEVUDivExpr *DE = dyn_cast<SCEVUDivExpr>(S))
+ return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
+ containsAddRecFromDifferentLoop(DE->getRHS(), L);
+#if 0
+ // SCEVSDivExpr has been backed out temporarily, but will be back; we'll
+ // need this when it is.
+ if (const SCEVSDivExpr *DE = dyn_cast<SCEVSDivExpr>(S))
+ return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
+ containsAddRecFromDifferentLoop(DE->getRHS(), L);
+#endif
+ if (const SCEVCastExpr *CE = dyn_cast<SCEVCastExpr>(S))
+ return containsAddRecFromDifferentLoop(CE->getOperand(), L);
+ return false;
+}
+
+/// isAddressUse - Returns true if the specified instruction is using the
+/// specified value as an address.
+static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
+ bool isAddress = isa<LoadInst>(Inst);
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (SI->getOperand(1) == OperandVal)
+ isAddress = true;
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ // Addressing modes can also be folded into prefetches and a variety
+ // of intrinsics.
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::prefetch:
+ case Intrinsic::x86_sse2_loadu_dq:
+ case Intrinsic::x86_sse2_loadu_pd:
+ case Intrinsic::x86_sse_loadu_ps:
+ case Intrinsic::x86_sse_storeu_ps:
+ case Intrinsic::x86_sse2_storeu_pd:
+ case Intrinsic::x86_sse2_storeu_dq:
+ case Intrinsic::x86_sse2_storel_dq:
+ if (II->getOperand(1) == OperandVal)
+ isAddress = true;
+ break;
+ }
+ }
+ return isAddress;
+}
+
+/// getAccessType - Return the type of the memory being accessed.
+static const Type *getAccessType(const Instruction *Inst) {
+ const Type *AccessTy = Inst->getType();
+ if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ AccessTy = SI->getOperand(0)->getType();
+ else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ // Addressing modes can also be folded into prefetches and a variety
+ // of intrinsics.
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::x86_sse_storeu_ps:
+ case Intrinsic::x86_sse2_storeu_pd:
+ case Intrinsic::x86_sse2_storeu_dq:
+ case Intrinsic::x86_sse2_storel_dq:
+ AccessTy = II->getOperand(1)->getType();
+ break;
+ }
+ }
+ return AccessTy;
+}
+
+namespace {
+ /// BasedUser - For a particular base value, keep information about how we've
+ /// partitioned the expression so far.
+ struct BasedUser {
+ /// SE - The current ScalarEvolution object.
+ ScalarEvolution *SE;
+
+ /// Base - The Base value for the PHI node that needs to be inserted for
+ /// this use. As the use is processed, information gets moved from this
+ /// field to the Imm field (below). BasedUser values are sorted by this
+ /// field.
+ SCEVHandle Base;
+
+ /// Inst - The instruction using the induction variable.
+ Instruction *Inst;
+
+ /// OperandValToReplace - The operand value of Inst to replace with the
+ /// EmittedBase.
+ Value *OperandValToReplace;
+
+ /// isSigned - The stride (and thus also the Base) of this use may be in
+ /// a narrower type than the use itself (OperandValToReplace->getType()).
+ /// When this is the case, the isSigned field indicates whether the
+ /// IV expression should be signed-extended instead of zero-extended to
+ /// fit the type of the use.
+ bool isSigned;
+
+ /// Imm - The immediate value that should be added to the base immediately
+ /// before Inst, because it will be folded into the imm field of the
+ /// instruction. This is also sometimes used for loop-variant values that
+ /// must be added inside the loop.
+ SCEVHandle Imm;
+
+ /// Phi - The induction variable that performs the striding that
+ /// should be used for this user.
+ PHINode *Phi;
+
+ // isUseOfPostIncrementedValue - True if this should use the
+ // post-incremented version of this IV, not the preincremented version.
+ // This can only be set in special cases, such as the terminating setcc
+ // instruction for a loop and uses outside the loop that are dominated by
+ // the loop.
+ bool isUseOfPostIncrementedValue;
+
+ BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
+ : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()),
+ OperandValToReplace(IVSU.getOperandValToReplace()),
+ isSigned(IVSU.isSigned()),
+ Imm(SE->getIntegerSCEV(0, Base->getType())),
+ isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {}
+
+ // Once we rewrite the code to insert the new IVs we want, update the
+ // operands of Inst to use the new expression 'NewBase', with 'Imm' added
+ // to it.
+ void RewriteInstructionToUseNewBase(const SCEVHandle &NewBase,
+ Instruction *InsertPt,
+ SCEVExpander &Rewriter, Loop *L, Pass *P,
+ LoopInfo &LI,
+ SmallVectorImpl<WeakVH> &DeadInsts);
+
+ Value *InsertCodeForBaseAtPosition(const SCEVHandle &NewBase,
+ const Type *Ty,
+ SCEVExpander &Rewriter,
+ Instruction *IP, Loop *L,
+ LoopInfo &LI);
+ void dump() const;
+ };
+}
+
+void BasedUser::dump() const {
+ cerr << " Base=" << *Base;
+ cerr << " Imm=" << *Imm;
+ cerr << " Inst: " << *Inst;
+}
+
+Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase,
+ const Type *Ty,
+ SCEVExpander &Rewriter,
+ Instruction *IP, Loop *L,
+ LoopInfo &LI) {
+ // Figure out where we *really* want to insert this code. In particular, if
+ // the user is inside of a loop that is nested inside of L, we really don't
+ // want to insert this expression before the user, we'd rather pull it out as
+ // many loops as possible.
+ Instruction *BaseInsertPt = IP;
+
+ // Figure out the most-nested loop that IP is in.
+ Loop *InsertLoop = LI.getLoopFor(IP->getParent());
+
+ // If InsertLoop is not L, and InsertLoop is nested inside of L, figure out
+ // the preheader of the outer-most loop where NewBase is not loop invariant.
+ if (L->contains(IP->getParent()))
+ while (InsertLoop && NewBase->isLoopInvariant(InsertLoop)) {
+ BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator();
+ InsertLoop = InsertLoop->getParentLoop();
+ }
+
+ Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt);
+
+ SCEVHandle NewValSCEV = SE->getUnknown(Base);
+
+ // If there is no immediate value, skip the next part.
+ if (!Imm->isZero()) {
+ // If we are inserting the base and imm values in the same block, make sure
+ // to adjust the IP position if insertion reused a result.
+ if (IP == BaseInsertPt)
+ IP = Rewriter.getInsertionPoint();
+
+ // Always emit the immediate (if non-zero) into the same block as the user.
+ NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);
+ }
+
+ if (isSigned)
+ NewValSCEV = SE->getTruncateOrSignExtend(NewValSCEV, Ty);
+ else
+ NewValSCEV = SE->getTruncateOrZeroExtend(NewValSCEV, Ty);
+
+ return Rewriter.expandCodeFor(NewValSCEV, Ty, IP);
+}
+
+
+// Once we rewrite the code to insert the new IVs we want, update the
+// operands of Inst to use the new expression 'NewBase', with 'Imm' added
+// to it. NewBasePt is the last instruction which contributes to the
+// value of NewBase in the case that it's a diffferent instruction from
+// the PHI that NewBase is computed from, or null otherwise.
+//
+void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase,
+ Instruction *NewBasePt,
+ SCEVExpander &Rewriter, Loop *L, Pass *P,
+ LoopInfo &LI,
+ SmallVectorImpl<WeakVH> &DeadInsts) {
+ if (!isa<PHINode>(Inst)) {
+ // By default, insert code at the user instruction.
+ BasicBlock::iterator InsertPt = Inst;
+
+ // However, if the Operand is itself an instruction, the (potentially
+ // complex) inserted code may be shared by many users. Because of this, we
+ // want to emit code for the computation of the operand right before its old
+ // computation. This is usually safe, because we obviously used to use the
+ // computation when it was computed in its current block. However, in some
+ // cases (e.g. use of a post-incremented induction variable) the NewBase
+ // value will be pinned to live somewhere after the original computation.
+ // In this case, we have to back off.
+ //
+ // If this is a use outside the loop (which means after, since it is based
+ // on a loop indvar) we use the post-incremented value, so that we don't
+ // artificially make the preinc value live out the bottom of the loop.
+ if (!isUseOfPostIncrementedValue && L->contains(Inst->getParent())) {
+ if (NewBasePt && isa<PHINode>(OperandValToReplace)) {
+ InsertPt = NewBasePt;
+ ++InsertPt;
+ } else if (Instruction *OpInst
+ = dyn_cast<Instruction>(OperandValToReplace)) {
+ InsertPt = OpInst;
+ while (isa<PHINode>(InsertPt)) ++InsertPt;
+ }
+ }
+ Value *NewVal = InsertCodeForBaseAtPosition(NewBase,
+ OperandValToReplace->getType(),
+ Rewriter, InsertPt, L, LI);
+ // Replace the use of the operand Value with the new Phi we just created.
+ Inst->replaceUsesOfWith(OperandValToReplace, NewVal);
+
+ DOUT << " Replacing with ";
+ DEBUG(WriteAsOperand(*DOUT, NewVal, /*PrintType=*/false));
+ DOUT << ", which has value " << *NewBase << " plus IMM " << *Imm << "\n";
+ return;
+ }
+
+ // PHI nodes are more complex. We have to insert one copy of the NewBase+Imm
+ // expression into each operand block that uses it. Note that PHI nodes can
+ // have multiple entries for the same predecessor. We use a map to make sure
+ // that a PHI node only has a single Value* for each predecessor (which also
+ // prevents us from inserting duplicate code in some blocks).
+ DenseMap<BasicBlock*, Value*> InsertedCode;
+ PHINode *PN = cast<PHINode>(Inst);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ if (PN->getIncomingValue(i) == OperandValToReplace) {
+ // If the original expression is outside the loop, put the replacement
+ // code in the same place as the original expression,
+ // which need not be an immediate predecessor of this PHI. This way we
+ // need only one copy of it even if it is referenced multiple times in
+ // the PHI. We don't do this when the original expression is inside the
+ // loop because multiple copies sometimes do useful sinking of code in
+ // that case(?).
+ Instruction *OldLoc = dyn_cast<Instruction>(OperandValToReplace);
+ if (L->contains(OldLoc->getParent())) {
+ // If this is a critical edge, split the edge so that we do not insert
+ // the code on all predecessor/successor paths. We do this unless this
+ // is the canonical backedge for this loop, as this can make some
+ // inserted code be in an illegal position.
+ BasicBlock *PHIPred = PN->getIncomingBlock(i);
+ if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 &&
+ (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {
+
+ // First step, split the critical edge.
+ SplitCriticalEdge(PHIPred, PN->getParent(), P, false);
+
+ // Next step: move the basic block. In particular, if the PHI node
+ // is outside of the loop, and PredTI is in the loop, we want to
+ // move the block to be immediately before the PHI block, not
+ // immediately after PredTI.
+ if (L->contains(PHIPred) && !L->contains(PN->getParent())) {
+ BasicBlock *NewBB = PN->getIncomingBlock(i);
+ NewBB->moveBefore(PN->getParent());
+ }
+
+ // Splitting the edge can reduce the number of PHI entries we have.
+ e = PN->getNumIncomingValues();
+ }
+ }
+ Value *&Code = InsertedCode[PN->getIncomingBlock(i)];
+ if (!Code) {
+ // Insert the code into the end of the predecessor block.
+ Instruction *InsertPt = (L->contains(OldLoc->getParent())) ?
+ PN->getIncomingBlock(i)->getTerminator() :
+ OldLoc->getParent()->getTerminator();
+ Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(),
+ Rewriter, InsertPt, L, LI);
+
+ DOUT << " Changing PHI use to ";
+ DEBUG(WriteAsOperand(*DOUT, Code, /*PrintType=*/false));
+ DOUT << ", which has value " << *NewBase << " plus IMM " << *Imm << "\n";
+ }
+
+ // Replace the use of the operand Value with the new Phi we just created.
+ PN->setIncomingValue(i, Code);
+ Rewriter.clear();
+ }
+ }
+
+ // PHI node might have become a constant value after SplitCriticalEdge.
+ DeadInsts.push_back(Inst);
+}
+
+
+/// fitsInAddressMode - Return true if V can be subsumed within an addressing
+/// mode, and does not need to be put in a register first.
+static bool fitsInAddressMode(const SCEVHandle &V, const Type *AccessTy,
+ const TargetLowering *TLI, bool HasBaseReg) {
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) {
+ int64_t VC = SC->getValue()->getSExtValue();
+ if (TLI) {
+ TargetLowering::AddrMode AM;
+ AM.BaseOffs = VC;
+ AM.HasBaseReg = HasBaseReg;
+ return TLI->isLegalAddressingMode(AM, AccessTy);
+ } else {
+ // Defaults to PPC. PPC allows a sign-extended 16-bit immediate field.
+ return (VC > -(1 << 16) && VC < (1 << 16)-1);
+ }
+ }
+
+ if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V))
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(SU->getValue())) {
+ if (TLI) {
+ TargetLowering::AddrMode AM;
+ AM.BaseGV = GV;
+ AM.HasBaseReg = HasBaseReg;
+ return TLI->isLegalAddressingMode(AM, AccessTy);
+ } else {
+ // Default: assume global addresses are not legal.
+ }
+ }
+
+ return false;
+}
+
+/// MoveLoopVariantsToImmediateField - Move any subexpressions from Val that are
+/// loop varying to the Imm operand.
+static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm,
+ Loop *L, ScalarEvolution *SE) {
+ if (Val->isLoopInvariant(L)) return; // Nothing to do.
+
+ if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
+ std::vector<SCEVHandle> NewOps;
+ NewOps.reserve(SAE->getNumOperands());
+
+ for (unsigned i = 0; i != SAE->getNumOperands(); ++i)
+ if (!SAE->getOperand(i)->isLoopInvariant(L)) {
+ // If this is a loop-variant expression, it must stay in the immediate
+ // field of the expression.
+ Imm = SE->getAddExpr(Imm, SAE->getOperand(i));
+ } else {
+ NewOps.push_back(SAE->getOperand(i));
+ }
+
+ if (NewOps.empty())
+ Val = SE->getIntegerSCEV(0, Val->getType());
+ else
+ Val = SE->getAddExpr(NewOps);
+ } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
+ // Try to pull immediates out of the start value of nested addrec's.
+ SCEVHandle Start = SARE->getStart();
+ MoveLoopVariantsToImmediateField(Start, Imm, L, SE);
+
+ std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
+ Ops[0] = Start;
+ Val = SE->getAddRecExpr(Ops, SARE->getLoop());
+ } else {
+ // Otherwise, all of Val is variant, move the whole thing over.
+ Imm = SE->getAddExpr(Imm, Val);
+ Val = SE->getIntegerSCEV(0, Val->getType());
+ }
+}
+
+
+/// MoveImmediateValues - Look at Val, and pull out any additions of constants
+/// that can fit into the immediate field of instructions in the target.
+/// Accumulate these immediate values into the Imm value.
+static void MoveImmediateValues(const TargetLowering *TLI,
+ const Type *AccessTy,
+ SCEVHandle &Val, SCEVHandle &Imm,
+ bool isAddress, Loop *L,
+ ScalarEvolution *SE) {
+ if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
+ std::vector<SCEVHandle> NewOps;
+ NewOps.reserve(SAE->getNumOperands());
+
+ for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {
+ SCEVHandle NewOp = SAE->getOperand(i);
+ MoveImmediateValues(TLI, AccessTy, NewOp, Imm, isAddress, L, SE);
+
+ if (!NewOp->isLoopInvariant(L)) {
+ // If this is a loop-variant expression, it must stay in the immediate
+ // field of the expression.
+ Imm = SE->getAddExpr(Imm, NewOp);
+ } else {
+ NewOps.push_back(NewOp);
+ }
+ }
+
+ if (NewOps.empty())
+ Val = SE->getIntegerSCEV(0, Val->getType());
+ else
+ Val = SE->getAddExpr(NewOps);
+ return;
+ } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
+ // Try to pull immediates out of the start value of nested addrec's.
+ SCEVHandle Start = SARE->getStart();
+ MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE);
+
+ if (Start != SARE->getStart()) {
+ std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
+ Ops[0] = Start;
+ Val = SE->getAddRecExpr(Ops, SARE->getLoop());
+ }
+ return;
+ } else if (const SCEVMulExpr *SME = dyn_cast<SCEVMulExpr>(Val)) {
+ // Transform "8 * (4 + v)" -> "32 + 8*V" if "32" fits in the immed field.
+ if (isAddress &&
+ fitsInAddressMode(SME->getOperand(0), AccessTy, TLI, false) &&
+ SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) {
+
+ SCEVHandle SubImm = SE->getIntegerSCEV(0, Val->getType());
+ SCEVHandle NewOp = SME->getOperand(1);
+ MoveImmediateValues(TLI, AccessTy, NewOp, SubImm, isAddress, L, SE);
+
+ // If we extracted something out of the subexpressions, see if we can
+ // simplify this!
+ if (NewOp != SME->getOperand(1)) {
+ // Scale SubImm up by "8". If the result is a target constant, we are
+ // good.
+ SubImm = SE->getMulExpr(SubImm, SME->getOperand(0));
+ if (fitsInAddressMode(SubImm, AccessTy, TLI, false)) {
+ // Accumulate the immediate.
+ Imm = SE->getAddExpr(Imm, SubImm);
+
+ // Update what is left of 'Val'.
+ Val = SE->getMulExpr(SME->getOperand(0), NewOp);
+ return;
+ }
+ }
+ }
+ }
+
+ // Loop-variant expressions must stay in the immediate field of the
+ // expression.
+ if ((isAddress && fitsInAddressMode(Val, AccessTy, TLI, false)) ||
+ !Val->isLoopInvariant(L)) {
+ Imm = SE->getAddExpr(Imm, Val);
+ Val = SE->getIntegerSCEV(0, Val->getType());
+ return;
+ }
+
+ // Otherwise, no immediates to move.
+}
+
+static void MoveImmediateValues(const TargetLowering *TLI,
+ Instruction *User,
+ SCEVHandle &Val, SCEVHandle &Imm,
+ bool isAddress, Loop *L,
+ ScalarEvolution *SE) {
+ const Type *AccessTy = getAccessType(User);
+ MoveImmediateValues(TLI, AccessTy, Val, Imm, isAddress, L, SE);
+}
+
+/// SeparateSubExprs - Decompose Expr into all of the subexpressions that are
+/// added together. This is used to reassociate common addition subexprs
+/// together for maximal sharing when rewriting bases.
+static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs,
+ SCEVHandle Expr,
+ ScalarEvolution *SE) {
+ if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) {
+ for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j)
+ SeparateSubExprs(SubExprs, AE->getOperand(j), SE);
+ } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Expr)) {
+ SCEVHandle Zero = SE->getIntegerSCEV(0, Expr->getType());
+ if (SARE->getOperand(0) == Zero) {
+ SubExprs.push_back(Expr);
+ } else {
+ // Compute the addrec with zero as its base.
+ std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
+ Ops[0] = Zero; // Start with zero base.
+ SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop()));
+
+
+ SeparateSubExprs(SubExprs, SARE->getOperand(0), SE);
+ }
+ } else if (!Expr->isZero()) {
+ // Do not add zero.
+ SubExprs.push_back(Expr);
+ }
+}
+
+// This is logically local to the following function, but C++ says we have
+// to make it file scope.
+struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
+
+/// RemoveCommonExpressionsFromUseBases - Look through all of the Bases of all
+/// the Uses, removing any common subexpressions, except that if all such
+/// subexpressions can be folded into an addressing mode for all uses inside
+/// the loop (this case is referred to as "free" in comments herein) we do
+/// not remove anything. This looks for things like (a+b+c) and
+/// (a+c+d) and computes the common (a+c) subexpression. The common expression
+/// is *removed* from the Bases and returned.
+static SCEVHandle
+RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
+ ScalarEvolution *SE, Loop *L,
+ const TargetLowering *TLI) {
+ unsigned NumUses = Uses.size();
+
+ // Only one use? This is a very common case, so we handle it specially and
+ // cheaply.
+ SCEVHandle Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType());
+ SCEVHandle Result = Zero;
+ SCEVHandle FreeResult = Zero;
+ if (NumUses == 1) {
+ // If the use is inside the loop, use its base, regardless of what it is:
+ // it is clearly shared across all the IV's. If the use is outside the loop
+ // (which means after it) we don't want to factor anything *into* the loop,
+ // so just use 0 as the base.
+ if (L->contains(Uses[0].Inst->getParent()))
+ std::swap(Result, Uses[0].Base);
+ return Result;
+ }
+
+ // To find common subexpressions, count how many of Uses use each expression.
+ // If any subexpressions are used Uses.size() times, they are common.
+ // Also track whether all uses of each expression can be moved into an
+ // an addressing mode "for free"; such expressions are left within the loop.
+ // struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
+ std::map<SCEVHandle, SubExprUseData> SubExpressionUseData;
+
+ // UniqueSubExprs - Keep track of all of the subexpressions we see in the
+ // order we see them.
+ std::vector<SCEVHandle> UniqueSubExprs;
+
+ std::vector<SCEVHandle> SubExprs;
+ unsigned NumUsesInsideLoop = 0;
+ for (unsigned i = 0; i != NumUses; ++i) {
+ // If the user is outside the loop, just ignore it for base computation.
+ // Since the user is outside the loop, it must be *after* the loop (if it
+ // were before, it could not be based on the loop IV). We don't want users
+ // after the loop to affect base computation of values *inside* the loop,
+ // because we can always add their offsets to the result IV after the loop
+ // is done, ensuring we get good code inside the loop.
+ if (!L->contains(Uses[i].Inst->getParent()))
+ continue;
+ NumUsesInsideLoop++;
+
+ // If the base is zero (which is common), return zero now, there are no
+ // CSEs we can find.
+ if (Uses[i].Base == Zero) return Zero;
+
+ // If this use is as an address we may be able to put CSEs in the addressing
+ // mode rather than hoisting them.
+ bool isAddrUse = isAddressUse(Uses[i].Inst, Uses[i].OperandValToReplace);
+ // We may need the AccessTy below, but only when isAddrUse, so compute it
+ // only in that case.
+ const Type *AccessTy = 0;
+ if (isAddrUse)
+ AccessTy = getAccessType(Uses[i].Inst);
+
+ // Split the expression into subexprs.
+ SeparateSubExprs(SubExprs, Uses[i].Base, SE);
+ // Add one to SubExpressionUseData.Count for each subexpr present, and
+ // if the subexpr is not a valid immediate within an addressing mode use,
+ // set SubExpressionUseData.notAllUsesAreFree. We definitely want to
+ // hoist these out of the loop (if they are common to all uses).
+ for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
+ if (++SubExpressionUseData[SubExprs[j]].Count == 1)
+ UniqueSubExprs.push_back(SubExprs[j]);
+ if (!isAddrUse || !fitsInAddressMode(SubExprs[j], AccessTy, TLI, false))
+ SubExpressionUseData[SubExprs[j]].notAllUsesAreFree = true;
+ }
+ SubExprs.clear();
+ }
+
+ // Now that we know how many times each is used, build Result. Iterate over
+ // UniqueSubexprs so that we have a stable ordering.
+ for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) {
+ std::map<SCEVHandle, SubExprUseData>::iterator I =
+ SubExpressionUseData.find(UniqueSubExprs[i]);
+ assert(I != SubExpressionUseData.end() && "Entry not found?");
+ if (I->second.Count == NumUsesInsideLoop) { // Found CSE!
+ if (I->second.notAllUsesAreFree)
+ Result = SE->getAddExpr(Result, I->first);
+ else
+ FreeResult = SE->getAddExpr(FreeResult, I->first);
+ } else
+ // Remove non-cse's from SubExpressionUseData.
+ SubExpressionUseData.erase(I);
+ }
+
+ if (FreeResult != Zero) {
+ // We have some subexpressions that can be subsumed into addressing
+ // modes in every use inside the loop. However, it's possible that
+ // there are so many of them that the combined FreeResult cannot
+ // be subsumed, or that the target cannot handle both a FreeResult
+ // and a Result in the same instruction (for example because it would
+ // require too many registers). Check this.
+ for (unsigned i=0; i<NumUses; ++i) {
+ if (!L->contains(Uses[i].Inst->getParent()))
+ continue;
+ // We know this is an addressing mode use; if there are any uses that
+ // are not, FreeResult would be Zero.
+ const Type *AccessTy = getAccessType(Uses[i].Inst);
+ if (!fitsInAddressMode(FreeResult, AccessTy, TLI, Result!=Zero)) {
+ // FIXME: could split up FreeResult into pieces here, some hoisted
+ // and some not. There is no obvious advantage to this.
+ Result = SE->getAddExpr(Result, FreeResult);
+ FreeResult = Zero;
+ break;
+ }
+ }
+ }
+
+ // If we found no CSE's, return now.
+ if (Result == Zero) return Result;
+
+ // If we still have a FreeResult, remove its subexpressions from
+ // SubExpressionUseData. This means they will remain in the use Bases.
+ if (FreeResult != Zero) {
+ SeparateSubExprs(SubExprs, FreeResult, SE);
+ for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
+ std::map<SCEVHandle, SubExprUseData>::iterator I =
+ SubExpressionUseData.find(SubExprs[j]);
+ SubExpressionUseData.erase(I);
+ }
+ SubExprs.clear();
+ }
+
+ // Otherwise, remove all of the CSE's we found from each of the base values.
+ for (unsigned i = 0; i != NumUses; ++i) {
+ // Uses outside the loop don't necessarily include the common base, but
+ // the final IV value coming into those uses does. Instead of trying to
+ // remove the pieces of the common base, which might not be there,
+ // subtract off the base to compensate for this.
+ if (!L->contains(Uses[i].Inst->getParent())) {
+ Uses[i].Base = SE->getMinusSCEV(Uses[i].Base, Result);
+ continue;
+ }
+
+ // Split the expression into subexprs.
+ SeparateSubExprs(SubExprs, Uses[i].Base, SE);
+
+ // Remove any common subexpressions.
+ for (unsigned j = 0, e = SubExprs.size(); j != e; ++j)
+ if (SubExpressionUseData.count(SubExprs[j])) {
+ SubExprs.erase(SubExprs.begin()+j);
+ --j; --e;
+ }
+
+ // Finally, add the non-shared expressions together.
+ if (SubExprs.empty())
+ Uses[i].Base = Zero;
+ else
+ Uses[i].Base = SE->getAddExpr(SubExprs);
+ SubExprs.clear();
+ }
+
+ return Result;
+}
+
+/// ValidScale - Check whether the given Scale is valid for all loads and
+/// stores in UsersToProcess.
+///
+bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale,
+ const std::vector<BasedUser>& UsersToProcess) {
+ if (!TLI)
+ return true;
+
+ for (unsigned i = 0, e = UsersToProcess.size(); i!=e; ++i) {
+ // If this is a load or other access, pass the type of the access in.
+ const Type *AccessTy = Type::VoidTy;
+ if (isAddressUse(UsersToProcess[i].Inst,
+ UsersToProcess[i].OperandValToReplace))
+ AccessTy = getAccessType(UsersToProcess[i].Inst);
+ else if (isa<PHINode>(UsersToProcess[i].Inst))
+ continue;
+
+ TargetLowering::AddrMode AM;
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
+ AM.BaseOffs = SC->getValue()->getSExtValue();
+ AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero();
+ AM.Scale = Scale;
+
+ // If load[imm+r*scale] is illegal, bail out.
+ if (!TLI->isLegalAddressingMode(AM, AccessTy))
+ return false;
+ }
+ return true;
+}
+
+/// ValidOffset - Check whether the given Offset is valid for all loads and
+/// stores in UsersToProcess.
+///
+bool LoopStrengthReduce::ValidOffset(bool HasBaseReg,
+ int64_t Offset,
+ int64_t Scale,
+ const std::vector<BasedUser>& UsersToProcess) {
+ if (!TLI)
+ return true;
+
+ for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) {
+ // If this is a load or other access, pass the type of the access in.
+ const Type *AccessTy = Type::VoidTy;
+ if (isAddressUse(UsersToProcess[i].Inst,
+ UsersToProcess[i].OperandValToReplace))
+ AccessTy = getAccessType(UsersToProcess[i].Inst);
+ else if (isa<PHINode>(UsersToProcess[i].Inst))
+ continue;
+
+ TargetLowering::AddrMode AM;
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
+ AM.BaseOffs = SC->getValue()->getSExtValue();
+ AM.BaseOffs = (uint64_t)AM.BaseOffs + (uint64_t)Offset;
+ AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero();
+ AM.Scale = Scale;
+
+ // If load[imm+r*scale] is illegal, bail out.
+ if (!TLI->isLegalAddressingMode(AM, AccessTy))
+ return false;
+ }
+ return true;
+}
+
+/// RequiresTypeConversion - Returns true if converting Ty1 to Ty2 is not
+/// a nop.
+bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,
+ const Type *Ty2) {
+ if (Ty1 == Ty2)
+ return false;
+ Ty1 = SE->getEffectiveSCEVType(Ty1);
+ Ty2 = SE->getEffectiveSCEVType(Ty2);
+ if (Ty1 == Ty2)
+ return false;
+ if (Ty1->canLosslesslyBitCastTo(Ty2))
+ return false;
+ if (TLI && TLI->isTruncateFree(Ty1, Ty2))
+ return false;
+ return true;
+}
+
+/// CheckForIVReuse - Returns the multiple if the stride is the multiple
+/// of a previous stride and it is a legal value for the target addressing
+/// mode scale component and optional base reg. This allows the users of
+/// this stride to be rewritten as prev iv * factor. It returns 0 if no
+/// reuse is possible. Factors can be negative on same targets, e.g. ARM.
+///
+/// If all uses are outside the loop, we don't require that all multiplies
+/// be folded into the addressing mode, nor even that the factor be constant;
+/// a multiply (executed once) outside the loop is better than another IV
+/// within. Well, usually.
+SCEVHandle LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
+ bool AllUsesAreAddresses,
+ bool AllUsesAreOutsideLoop,
+ const SCEVHandle &Stride,
+ IVExpr &IV, const Type *Ty,
+ const std::vector<BasedUser>& UsersToProcess) {
+ if (StrideNoReuse.count(Stride))
+ return SE->getIntegerSCEV(0, Stride->getType());
+
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
+ int64_t SInt = SC->getValue()->getSExtValue();
+ for (unsigned NewStride = 0, e = IU->StrideOrder.size();
+ NewStride != e; ++NewStride) {
+ std::map<SCEVHandle, IVsOfOneStride>::iterator SI =
+ IVsByStride.find(IU->StrideOrder[NewStride]);
+ if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) ||
+ StrideNoReuse.count(SI->first))
+ continue;
+ int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
+ if (SI->first != Stride &&
+ (unsigned(abs64(SInt)) < SSInt || (SInt % SSInt) != 0))
+ continue;
+ int64_t Scale = SInt / SSInt;
+ // Check that this stride is valid for all the types used for loads and
+ // stores; if it can be used for some and not others, we might as well use
+ // the original stride everywhere, since we have to create the IV for it
+ // anyway. If the scale is 1, then we don't need to worry about folding
+ // multiplications.
+ if (Scale == 1 ||
+ (AllUsesAreAddresses &&
+ ValidScale(HasBaseReg, Scale, UsersToProcess))) {
+ // Prefer to reuse an IV with a base of zero.
+ for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
+ IE = SI->second.IVs.end(); II != IE; ++II)
+ // Only reuse previous IV if it would not require a type conversion
+ // and if the base difference can be folded.
+ if (II->Base->isZero() &&
+ !RequiresTypeConversion(II->Base->getType(), Ty)) {
+ IV = *II;
+ return SE->getIntegerSCEV(Scale, Stride->getType());
+ }
+ // Otherwise, settle for an IV with a foldable base.
+ if (AllUsesAreAddresses)
+ for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
+ IE = SI->second.IVs.end(); II != IE; ++II)
+ // Only reuse previous IV if it would not require a type conversion
+ // and if the base difference can be folded.
+ if (SE->getEffectiveSCEVType(II->Base->getType()) ==
+ SE->getEffectiveSCEVType(Ty) &&
+ isa<SCEVConstant>(II->Base)) {
+ int64_t Base =
+ cast<SCEVConstant>(II->Base)->getValue()->getSExtValue();
+ if (Base > INT32_MIN && Base <= INT32_MAX &&
+ ValidOffset(HasBaseReg, -Base * Scale,
+ Scale, UsersToProcess)) {
+ IV = *II;
+ return SE->getIntegerSCEV(Scale, Stride->getType());
+ }
+ }
+ }
+ }
+ } else if (AllUsesAreOutsideLoop) {
+ // Accept nonconstant strides here; it is really really right to substitute
+ // an existing IV if we can.
+ for (unsigned NewStride = 0, e = IU->StrideOrder.size();
+ NewStride != e; ++NewStride) {
+ std::map<SCEVHandle, IVsOfOneStride>::iterator SI =
+ IVsByStride.find(IU->StrideOrder[NewStride]);
+ if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
+ continue;
+ int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
+ if (SI->first != Stride && SSInt != 1)
+ continue;
+ for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
+ IE = SI->second.IVs.end(); II != IE; ++II)
+ // Accept nonzero base here.
+ // Only reuse previous IV if it would not require a type conversion.
+ if (!RequiresTypeConversion(II->Base->getType(), Ty)) {
+ IV = *II;
+ return Stride;
+ }
+ }
+ // Special case, old IV is -1*x and this one is x. Can treat this one as
+ // -1*old.
+ for (unsigned NewStride = 0, e = IU->StrideOrder.size();
+ NewStride != e; ++NewStride) {
+ std::map<SCEVHandle, IVsOfOneStride>::iterator SI =
+ IVsByStride.find(IU->StrideOrder[NewStride]);
+ if (SI == IVsByStride.end())
+ continue;
+ if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(SI->first))
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(ME->getOperand(0)))
+ if (Stride == ME->getOperand(1) &&
+ SC->getValue()->getSExtValue() == -1LL)
+ for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
+ IE = SI->second.IVs.end(); II != IE; ++II)
+ // Accept nonzero base here.
+ // Only reuse previous IV if it would not require type conversion.
+ if (!RequiresTypeConversion(II->Base->getType(), Ty)) {
+ IV = *II;
+ return SE->getIntegerSCEV(-1LL, Stride->getType());
+ }
+ }
+ }
+ return SE->getIntegerSCEV(0, Stride->getType());
+}
+
+/// PartitionByIsUseOfPostIncrementedValue - Simple boolean predicate that
+/// returns true if Val's isUseOfPostIncrementedValue is true.
+static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) {
+ return Val.isUseOfPostIncrementedValue;
+}
+
+/// isNonConstantNegative - Return true if the specified scev is negated, but
+/// not a constant.
+static bool isNonConstantNegative(const SCEVHandle &Expr) {
+ const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr);
+ if (!Mul) return false;
+
+ // If there is a constant factor, it will be first.
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
+ if (!SC) return false;
+
+ // Return true if the value is negative, this matches things like (-42 * V).
+ return SC->getValue()->getValue().isNegative();
+}
+
+// CollectIVUsers - Transform our list of users and offsets to a bit more
+// complex table. In this new vector, each 'BasedUser' contains 'Base', the base
+// of the strided accesses, as well as the old information from Uses. We
+// progressively move information from the Base field to the Imm field, until
+// we eventually have the full access expression to rewrite the use.
+SCEVHandle LoopStrengthReduce::CollectIVUsers(const SCEVHandle &Stride,
+ IVUsersOfOneStride &Uses,
+ Loop *L,
+ bool &AllUsesAreAddresses,
+ bool &AllUsesAreOutsideLoop,
+ std::vector<BasedUser> &UsersToProcess) {
+ // FIXME: Generalize to non-affine IV's.
+ if (!Stride->isLoopInvariant(L))
+ return SE->getIntegerSCEV(0, Stride->getType());
+
+ UsersToProcess.reserve(Uses.Users.size());
+ for (ilist<IVStrideUse>::iterator I = Uses.Users.begin(),
+ E = Uses.Users.end(); I != E; ++I) {
+ UsersToProcess.push_back(BasedUser(*I, SE));
+
+ // Move any loop variant operands from the offset field to the immediate
+ // field of the use, so that we don't try to use something before it is
+ // computed.
+ MoveLoopVariantsToImmediateField(UsersToProcess.back().Base,
+ UsersToProcess.back().Imm, L, SE);
+ assert(UsersToProcess.back().Base->isLoopInvariant(L) &&
+ "Base value is not loop invariant!");
+ }
+
+ // We now have a whole bunch of uses of like-strided induction variables, but
+ // they might all have different bases. We want to emit one PHI node for this
+ // stride which we fold as many common expressions (between the IVs) into as
+ // possible. Start by identifying the common expressions in the base values
+ // for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find
+ // "A+B"), emit it to the preheader, then remove the expression from the
+ // UsersToProcess base values.
+ SCEVHandle CommonExprs =
+ RemoveCommonExpressionsFromUseBases(UsersToProcess, SE, L, TLI);
+
+ // Next, figure out what we can represent in the immediate fields of
+ // instructions. If we can represent anything there, move it to the imm
+ // fields of the BasedUsers. We do this so that it increases the commonality
+ // of the remaining uses.
+ unsigned NumPHI = 0;
+ bool HasAddress = false;
+ for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
+ // If the user is not in the current loop, this means it is using the exit
+ // value of the IV. Do not put anything in the base, make sure it's all in
+ // the immediate field to allow as much factoring as possible.
+ if (!L->contains(UsersToProcess[i].Inst->getParent())) {
+ UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm,
+ UsersToProcess[i].Base);
+ UsersToProcess[i].Base =
+ SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType());
+ } else {
+ // Not all uses are outside the loop.
+ AllUsesAreOutsideLoop = false;
+
+ // Addressing modes can be folded into loads and stores. Be careful that
+ // the store is through the expression, not of the expression though.
+ bool isPHI = false;
+ bool isAddress = isAddressUse(UsersToProcess[i].Inst,
+ UsersToProcess[i].OperandValToReplace);
+ if (isa<PHINode>(UsersToProcess[i].Inst)) {
+ isPHI = true;
+ ++NumPHI;
+ }
+
+ if (isAddress)
+ HasAddress = true;
+
+ // If this use isn't an address, then not all uses are addresses.
+ if (!isAddress && !isPHI)
+ AllUsesAreAddresses = false;
+
+ MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base,
+ UsersToProcess[i].Imm, isAddress, L, SE);
+ }
+ }
+
+ // If one of the use is a PHI node and all other uses are addresses, still
+ // allow iv reuse. Essentially we are trading one constant multiplication
+ // for one fewer iv.
+ if (NumPHI > 1)
+ AllUsesAreAddresses = false;
+
+ // There are no in-loop address uses.
+ if (AllUsesAreAddresses && (!HasAddress && !AllUsesAreOutsideLoop))
+ AllUsesAreAddresses = false;
+
+ return CommonExprs;
+}
+
+/// ShouldUseFullStrengthReductionMode - Test whether full strength-reduction
+/// is valid and profitable for the given set of users of a stride. In
+/// full strength-reduction mode, all addresses at the current stride are
+/// strength-reduced all the way down to pointer arithmetic.
+///
+bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode(
+ const std::vector<BasedUser> &UsersToProcess,
+ const Loop *L,
+ bool AllUsesAreAddresses,
+ SCEVHandle Stride) {
+ if (!EnableFullLSRMode)
+ return false;
+
+ // The heuristics below aim to avoid increasing register pressure, but
+ // fully strength-reducing all the addresses increases the number of
+ // add instructions, so don't do this when optimizing for size.
+ // TODO: If the loop is large, the savings due to simpler addresses
+ // may oughtweight the costs of the extra increment instructions.
+ if (L->getHeader()->getParent()->hasFnAttr(Attribute::OptimizeForSize))
+ return false;
+
+ // TODO: For now, don't do full strength reduction if there could
+ // potentially be greater-stride multiples of the current stride
+ // which could reuse the current stride IV.
+ if (IU->StrideOrder.back() != Stride)
+ return false;
+
+ // Iterate through the uses to find conditions that automatically rule out
+ // full-lsr mode.
+ for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) {
+ const SCEV *Base = UsersToProcess[i].Base;
+ const SCEV *Imm = UsersToProcess[i].Imm;
+ // If any users have a loop-variant component, they can't be fully
+ // strength-reduced.
+ if (Imm && !Imm->isLoopInvariant(L))
+ return false;
+ // If there are to users with the same base and the difference between
+ // the two Imm values can't be folded into the address, full
+ // strength reduction would increase register pressure.
+ do {
+ const SCEV *CurImm = UsersToProcess[i].Imm;
+ if ((CurImm || Imm) && CurImm != Imm) {
+ if (!CurImm) CurImm = SE->getIntegerSCEV(0, Stride->getType());
+ if (!Imm) Imm = SE->getIntegerSCEV(0, Stride->getType());
+ const Instruction *Inst = UsersToProcess[i].Inst;
+ const Type *AccessTy = getAccessType(Inst);
+ SCEVHandle Diff = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm);
+ if (!Diff->isZero() &&
+ (!AllUsesAreAddresses ||
+ !fitsInAddressMode(Diff, AccessTy, TLI, /*HasBaseReg=*/true)))
+ return false;
+ }
+ } while (++i != e && Base == UsersToProcess[i].Base);
+ }
+
+ // If there's exactly one user in this stride, fully strength-reducing it
+ // won't increase register pressure. If it's starting from a non-zero base,
+ // it'll be simpler this way.
+ if (UsersToProcess.size() == 1 && !UsersToProcess[0].Base->isZero())
+ return true;
+
+ // Otherwise, if there are any users in this stride that don't require
+ // a register for their base, full strength-reduction will increase
+ // register pressure.
+ for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
+ if (UsersToProcess[i].Base->isZero())
+ return false;
+
+ // Otherwise, go for it.
+ return true;
+}
+
+/// InsertAffinePhi Create and insert a PHI node for an induction variable
+/// with the specified start and step values in the specified loop.
+///
+/// If NegateStride is true, the stride should be negated by using a
+/// subtract instead of an add.
+///
+/// Return the created phi node.
+///
+static PHINode *InsertAffinePhi(SCEVHandle Start, SCEVHandle Step,
+ Instruction *IVIncInsertPt,
+ const Loop *L,
+ SCEVExpander &Rewriter) {
+ assert(Start->isLoopInvariant(L) && "New PHI start is not loop invariant!");
+ assert(Step->isLoopInvariant(L) && "New PHI stride is not loop invariant!");
+
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Preheader = L->getLoopPreheader();
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ const Type *Ty = Start->getType();
+ Ty = Rewriter.SE.getEffectiveSCEVType(Ty);
+
+ PHINode *PN = PHINode::Create(Ty, "lsr.iv", Header->begin());
+ PN->addIncoming(Rewriter.expandCodeFor(Start, Ty, Preheader->getTerminator()),
+ Preheader);
+
+ // If the stride is negative, insert a sub instead of an add for the
+ // increment.
+ bool isNegative = isNonConstantNegative(Step);
+ SCEVHandle IncAmount = Step;
+ if (isNegative)
+ IncAmount = Rewriter.SE.getNegativeSCEV(Step);
+
+ // Insert an add instruction right before the terminator corresponding
+ // to the back-edge or just before the only use. The location is determined
+ // by the caller and passed in as IVIncInsertPt.
+ Value *StepV = Rewriter.expandCodeFor(IncAmount, Ty,
+ Preheader->getTerminator());
+ Instruction *IncV;
+ if (isNegative) {
+ IncV = BinaryOperator::CreateSub(PN, StepV, "lsr.iv.next",
+ IVIncInsertPt);
+ } else {
+ IncV = BinaryOperator::CreateAdd(PN, StepV, "lsr.iv.next",
+ IVIncInsertPt);
+ }
+ if (!isa<ConstantInt>(StepV)) ++NumVariable;
+
+ PN->addIncoming(IncV, LatchBlock);
+
+ ++NumInserted;
+ return PN;
+}
+
+static void SortUsersToProcess(std::vector<BasedUser> &UsersToProcess) {
+ // We want to emit code for users inside the loop first. To do this, we
+ // rearrange BasedUser so that the entries at the end have
+ // isUseOfPostIncrementedValue = false, because we pop off the end of the
+ // vector (so we handle them first).
+ std::partition(UsersToProcess.begin(), UsersToProcess.end(),
+ PartitionByIsUseOfPostIncrementedValue);
+
+ // Sort this by base, so that things with the same base are handled
+ // together. By partitioning first and stable-sorting later, we are
+ // guaranteed that within each base we will pop off users from within the
+ // loop before users outside of the loop with a particular base.
+ //
+ // We would like to use stable_sort here, but we can't. The problem is that
+ // SCEVHandle's don't have a deterministic ordering w.r.t to each other, so
+ // we don't have anything to do a '<' comparison on. Because we think the
+ // number of uses is small, do a horrible bubble sort which just relies on
+ // ==.
+ for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
+ // Get a base value.
+ SCEVHandle Base = UsersToProcess[i].Base;
+
+ // Compact everything with this base to be consecutive with this one.
+ for (unsigned j = i+1; j != e; ++j) {
+ if (UsersToProcess[j].Base == Base) {
+ std::swap(UsersToProcess[i+1], UsersToProcess[j]);
+ ++i;
+ }
+ }
+ }
+}
+
+/// PrepareToStrengthReduceFully - Prepare to fully strength-reduce
+/// UsersToProcess, meaning lowering addresses all the way down to direct
+/// pointer arithmetic.
+///
+void
+LoopStrengthReduce::PrepareToStrengthReduceFully(
+ std::vector<BasedUser> &UsersToProcess,
+ SCEVHandle Stride,
+ SCEVHandle CommonExprs,
+ const Loop *L,
+ SCEVExpander &PreheaderRewriter) {
+ DOUT << " Fully reducing all users\n";
+
+ // Rewrite the UsersToProcess records, creating a separate PHI for each
+ // unique Base value.
+ Instruction *IVIncInsertPt = L->getLoopLatch()->getTerminator();
+ for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) {
+ // TODO: The uses are grouped by base, but not sorted. We arbitrarily
+ // pick the first Imm value here to start with, and adjust it for the
+ // other uses.
+ SCEVHandle Imm = UsersToProcess[i].Imm;
+ SCEVHandle Base = UsersToProcess[i].Base;
+ SCEVHandle Start = SE->getAddExpr(CommonExprs, Base, Imm);
+ PHINode *Phi = InsertAffinePhi(Start, Stride, IVIncInsertPt, L,
+ PreheaderRewriter);
+ // Loop over all the users with the same base.
+ do {
+ UsersToProcess[i].Base = SE->getIntegerSCEV(0, Stride->getType());
+ UsersToProcess[i].Imm = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm);
+ UsersToProcess[i].Phi = Phi;
+ assert(UsersToProcess[i].Imm->isLoopInvariant(L) &&
+ "ShouldUseFullStrengthReductionMode should reject this!");
+ } while (++i != e && Base == UsersToProcess[i].Base);
+ }
+}
+
+/// FindIVIncInsertPt - Return the location to insert the increment instruction.
+/// If the only use if a use of postinc value, (must be the loop termination
+/// condition), then insert it just before the use.
+static Instruction *FindIVIncInsertPt(std::vector<BasedUser> &UsersToProcess,
+ const Loop *L) {
+ if (UsersToProcess.size() == 1 &&
+ UsersToProcess[0].isUseOfPostIncrementedValue &&
+ L->contains(UsersToProcess[0].Inst->getParent()))
+ return UsersToProcess[0].Inst;
+ return L->getLoopLatch()->getTerminator();
+}
+
+/// PrepareToStrengthReduceWithNewPhi - Insert a new induction variable for the
+/// given users to share.
+///
+void
+LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi(
+ std::vector<BasedUser> &UsersToProcess,
+ SCEVHandle Stride,
+ SCEVHandle CommonExprs,
+ Value *CommonBaseV,
+ Instruction *IVIncInsertPt,
+ const Loop *L,
+ SCEVExpander &PreheaderRewriter) {
+ DOUT << " Inserting new PHI:\n";
+
+ PHINode *Phi = InsertAffinePhi(SE->getUnknown(CommonBaseV),
+ Stride, IVIncInsertPt, L,
+ PreheaderRewriter);
+
+ // Remember this in case a later stride is multiple of this.
+ IVsByStride[Stride].addIV(Stride, CommonExprs, Phi);
+
+ // All the users will share this new IV.
+ for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
+ UsersToProcess[i].Phi = Phi;
+
+ DOUT << " IV=";
+ DEBUG(WriteAsOperand(*DOUT, Phi, /*PrintType=*/false));
+ DOUT << "\n";
+}
+
+/// PrepareToStrengthReduceFromSmallerStride - Prepare for the given users to
+/// reuse an induction variable with a stride that is a factor of the current
+/// induction variable.
+///
+void
+LoopStrengthReduce::PrepareToStrengthReduceFromSmallerStride(
+ std::vector<BasedUser> &UsersToProcess,
+ Value *CommonBaseV,
+ const IVExpr &ReuseIV,
+ Instruction *PreInsertPt) {
+ DOUT << " Rewriting in terms of existing IV of STRIDE " << *ReuseIV.Stride
+ << " and BASE " << *ReuseIV.Base << "\n";
+
+ // All the users will share the reused IV.
+ for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
+ UsersToProcess[i].Phi = ReuseIV.PHI;
+
+ Constant *C = dyn_cast<Constant>(CommonBaseV);
+ if (C &&
+ (!C->isNullValue() &&
+ !fitsInAddressMode(SE->getUnknown(CommonBaseV), CommonBaseV->getType(),
+ TLI, false)))
+ // We want the common base emitted into the preheader! This is just
+ // using cast as a copy so BitCast (no-op cast) is appropriate
+ CommonBaseV = new BitCastInst(CommonBaseV, CommonBaseV->getType(),
+ "commonbase", PreInsertPt);
+}
+
+static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset,
+ const Type *AccessTy,
+ std::vector<BasedUser> &UsersToProcess,
+ const TargetLowering *TLI) {
+ SmallVector<Instruction*, 16> AddrModeInsts;
+ for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
+ if (UsersToProcess[i].isUseOfPostIncrementedValue)
+ continue;
+ ExtAddrMode AddrMode =
+ AddressingModeMatcher::Match(UsersToProcess[i].OperandValToReplace,
+ AccessTy, UsersToProcess[i].Inst,
+ AddrModeInsts, *TLI);
+ if (GV && GV != AddrMode.BaseGV)
+ return false;
+ if (Offset && !AddrMode.BaseOffs)
+ // FIXME: How to accurate check it's immediate offset is folded.
+ return false;
+ AddrModeInsts.clear();
+ }
+ return true;
+}
+
+/// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single
+/// stride of IV. All of the users may have different starting values, and this
+/// may not be the only stride.
+void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
+ IVUsersOfOneStride &Uses,
+ Loop *L) {
+ // If all the users are moved to another stride, then there is nothing to do.
+ if (Uses.Users.empty())
+ return;
+
+ // Keep track if every use in UsersToProcess is an address. If they all are,
+ // we may be able to rewrite the entire collection of them in terms of a
+ // smaller-stride IV.
+ bool AllUsesAreAddresses = true;
+
+ // Keep track if every use of a single stride is outside the loop. If so,
+ // we want to be more aggressive about reusing a smaller-stride IV; a
+ // multiply outside the loop is better than another IV inside. Well, usually.
+ bool AllUsesAreOutsideLoop = true;
+
+ // Transform our list of users and offsets to a bit more complex table. In
+ // this new vector, each 'BasedUser' contains 'Base' the base of the
+ // strided accessas well as the old information from Uses. We progressively
+ // move information from the Base field to the Imm field, until we eventually
+ // have the full access expression to rewrite the use.
+ std::vector<BasedUser> UsersToProcess;
+ SCEVHandle CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses,
+ AllUsesAreOutsideLoop,
+ UsersToProcess);
+
+ // Sort the UsersToProcess array so that users with common bases are
+ // next to each other.
+ SortUsersToProcess(UsersToProcess);
+
+ // If we managed to find some expressions in common, we'll need to carry
+ // their value in a register and add it in for each use. This will take up
+ // a register operand, which potentially restricts what stride values are
+ // valid.
+ bool HaveCommonExprs = !CommonExprs->isZero();
+ const Type *ReplacedTy = CommonExprs->getType();
+
+ // If all uses are addresses, consider sinking the immediate part of the
+ // common expression back into uses if they can fit in the immediate fields.
+ if (TLI && HaveCommonExprs && AllUsesAreAddresses) {
+ SCEVHandle NewCommon = CommonExprs;
+ SCEVHandle Imm = SE->getIntegerSCEV(0, ReplacedTy);
+ MoveImmediateValues(TLI, Type::VoidTy, NewCommon, Imm, true, L, SE);
+ if (!Imm->isZero()) {
+ bool DoSink = true;
+
+ // If the immediate part of the common expression is a GV, check if it's
+ // possible to fold it into the target addressing mode.
+ GlobalValue *GV = 0;
+ if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(Imm))
+ GV = dyn_cast<GlobalValue>(SU->getValue());
+ int64_t Offset = 0;
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Imm))
+ Offset = SC->getValue()->getSExtValue();
+ if (GV || Offset)
+ // Pass VoidTy as the AccessTy to be conservative, because
+ // there could be multiple access types among all the uses.
+ DoSink = IsImmFoldedIntoAddrMode(GV, Offset, Type::VoidTy,
+ UsersToProcess, TLI);
+
+ if (DoSink) {
+ DOUT << " Sinking " << *Imm << " back down into uses\n";
+ for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
+ UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, Imm);
+ CommonExprs = NewCommon;
+ HaveCommonExprs = !CommonExprs->isZero();
+ ++NumImmSunk;
+ }
+ }
+ }
+
+ // Now that we know what we need to do, insert the PHI node itself.
+ //
+ DOUT << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE "
+ << *Stride << ":\n"
+ << " Common base: " << *CommonExprs << "\n";
+
+ SCEVExpander Rewriter(*SE);
+ SCEVExpander PreheaderRewriter(*SE);
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ Instruction *PreInsertPt = Preheader->getTerminator();
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ Instruction *IVIncInsertPt = LatchBlock->getTerminator();
+
+ Value *CommonBaseV = Constant::getNullValue(ReplacedTy);
+
+ SCEVHandle RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy);
+ IVExpr ReuseIV(SE->getIntegerSCEV(0, Type::Int32Ty),
+ SE->getIntegerSCEV(0, Type::Int32Ty),
+ 0);
+
+ /// Choose a strength-reduction strategy and prepare for it by creating
+ /// the necessary PHIs and adjusting the bookkeeping.
+ if (ShouldUseFullStrengthReductionMode(UsersToProcess, L,
+ AllUsesAreAddresses, Stride)) {
+ PrepareToStrengthReduceFully(UsersToProcess, Stride, CommonExprs, L,
+ PreheaderRewriter);
+ } else {
+ // Emit the initial base value into the loop preheader.
+ CommonBaseV = PreheaderRewriter.expandCodeFor(CommonExprs, ReplacedTy,
+ PreInsertPt);
+
+ // If all uses are addresses, check if it is possible to reuse an IV. The
+ // new IV must have a stride that is a multiple of the old stride; the
+ // multiple must be a number that can be encoded in the scale field of the
+ // target addressing mode; and we must have a valid instruction after this
+ // substitution, including the immediate field, if any.
+ RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
+ AllUsesAreOutsideLoop,
+ Stride, ReuseIV, ReplacedTy,
+ UsersToProcess);
+ if (!RewriteFactor->isZero())
+ PrepareToStrengthReduceFromSmallerStride(UsersToProcess, CommonBaseV,
+ ReuseIV, PreInsertPt);
+ else {
+ IVIncInsertPt = FindIVIncInsertPt(UsersToProcess, L);
+ PrepareToStrengthReduceWithNewPhi(UsersToProcess, Stride, CommonExprs,
+ CommonBaseV, IVIncInsertPt,
+ L, PreheaderRewriter);
+ }
+ }
+
+ // Process all the users now, replacing their strided uses with
+ // strength-reduced forms. This outer loop handles all bases, the inner
+ // loop handles all users of a particular base.
+ while (!UsersToProcess.empty()) {
+ SCEVHandle Base = UsersToProcess.back().Base;
+ Instruction *Inst = UsersToProcess.back().Inst;
+
+ // Emit the code for Base into the preheader.
+ Value *BaseV = 0;
+ if (!Base->isZero()) {
+ BaseV = PreheaderRewriter.expandCodeFor(Base, 0, PreInsertPt);
+
+ DOUT << " INSERTING code for BASE = " << *Base << ":";
+ if (BaseV->hasName())
+ DOUT << " Result value name = %" << BaseV->getNameStr();
+ DOUT << "\n";
+
+ // If BaseV is a non-zero constant, make sure that it gets inserted into
+ // the preheader, instead of being forward substituted into the uses. We
+ // do this by forcing a BitCast (noop cast) to be inserted into the
+ // preheader in this case.
+ if (!fitsInAddressMode(Base, getAccessType(Inst), TLI, false)) {
+ // We want this constant emitted into the preheader! This is just
+ // using cast as a copy so BitCast (no-op cast) is appropriate
+ BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
+ PreInsertPt);
+ }
+ }
+
+ // Emit the code to add the immediate offset to the Phi value, just before
+ // the instructions that we identified as using this stride and base.
+ do {
+ // FIXME: Use emitted users to emit other users.
+ BasedUser &User = UsersToProcess.back();
+
+ DOUT << " Examining ";
+ if (User.isUseOfPostIncrementedValue)
+ DOUT << "postinc";
+ else
+ DOUT << "preinc";
+ DOUT << " use ";
+ DEBUG(WriteAsOperand(*DOUT, UsersToProcess.back().OperandValToReplace,
+ /*PrintType=*/false));
+ DOUT << " in Inst: " << *(User.Inst);
+
+ // If this instruction wants to use the post-incremented value, move it
+ // after the post-inc and use its value instead of the PHI.
+ Value *RewriteOp = User.Phi;
+ if (User.isUseOfPostIncrementedValue) {
+ RewriteOp = User.Phi->getIncomingValueForBlock(LatchBlock);
+ // If this user is in the loop, make sure it is the last thing in the
+ // loop to ensure it is dominated by the increment. In case it's the
+ // only use of the iv, the increment instruction is already before the
+ // use.
+ if (L->contains(User.Inst->getParent()) && User.Inst != IVIncInsertPt)
+ User.Inst->moveBefore(IVIncInsertPt);
+ }
+
+ SCEVHandle RewriteExpr = SE->getUnknown(RewriteOp);
+
+ if (SE->getEffectiveSCEVType(RewriteOp->getType()) !=
+ SE->getEffectiveSCEVType(ReplacedTy)) {
+ assert(SE->getTypeSizeInBits(RewriteOp->getType()) >
+ SE->getTypeSizeInBits(ReplacedTy) &&
+ "Unexpected widening cast!");
+ RewriteExpr = SE->getTruncateExpr(RewriteExpr, ReplacedTy);
+ }
+
+ // If we had to insert new instructions for RewriteOp, we have to
+ // consider that they may not have been able to end up immediately
+ // next to RewriteOp, because non-PHI instructions may never precede
+ // PHI instructions in a block. In this case, remember where the last
+ // instruction was inserted so that if we're replacing a different
+ // PHI node, we can use the later point to expand the final
+ // RewriteExpr.
+ Instruction *NewBasePt = dyn_cast<Instruction>(RewriteOp);
+ if (RewriteOp == User.Phi) NewBasePt = 0;
+
+ // Clear the SCEVExpander's expression map so that we are guaranteed
+ // to have the code emitted where we expect it.
+ Rewriter.clear();
+
+ // If we are reusing the iv, then it must be multiplied by a constant
+ // factor to take advantage of the addressing mode scale component.
+ if (!RewriteFactor->isZero()) {
+ // If we're reusing an IV with a nonzero base (currently this happens
+ // only when all reuses are outside the loop) subtract that base here.
+ // The base has been used to initialize the PHI node but we don't want
+ // it here.
+ if (!ReuseIV.Base->isZero()) {
+ SCEVHandle typedBase = ReuseIV.Base;
+ if (SE->getEffectiveSCEVType(RewriteExpr->getType()) !=
+ SE->getEffectiveSCEVType(ReuseIV.Base->getType())) {
+ // It's possible the original IV is a larger type than the new IV,
+ // in which case we have to truncate the Base. We checked in
+ // RequiresTypeConversion that this is valid.
+ assert(SE->getTypeSizeInBits(RewriteExpr->getType()) <
+ SE->getTypeSizeInBits(ReuseIV.Base->getType()) &&
+ "Unexpected lengthening conversion!");
+ typedBase = SE->getTruncateExpr(ReuseIV.Base,
+ RewriteExpr->getType());
+ }
+ RewriteExpr = SE->getMinusSCEV(RewriteExpr, typedBase);
+ }
+
+ // Multiply old variable, with base removed, by new scale factor.
+ RewriteExpr = SE->getMulExpr(RewriteFactor,
+ RewriteExpr);
+
+ // The common base is emitted in the loop preheader. But since we
+ // are reusing an IV, it has not been used to initialize the PHI node.
+ // Add it to the expression used to rewrite the uses.
+ // When this use is outside the loop, we earlier subtracted the
+ // common base, and are adding it back here. Use the same expression
+ // as before, rather than CommonBaseV, so DAGCombiner will zap it.
+ if (!CommonExprs->isZero()) {
+ if (L->contains(User.Inst->getParent()))
+ RewriteExpr = SE->getAddExpr(RewriteExpr,
+ SE->getUnknown(CommonBaseV));
+ else
+ RewriteExpr = SE->getAddExpr(RewriteExpr, CommonExprs);
+ }
+ }
+
+ // Now that we know what we need to do, insert code before User for the
+ // immediate and any loop-variant expressions.
+ if (BaseV)
+ // Add BaseV to the PHI value if needed.
+ RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV));
+
+ User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt,
+ Rewriter, L, this, *LI,
+ DeadInsts);
+
+ // Mark old value we replaced as possibly dead, so that it is eliminated
+ // if we just replaced the last use of that value.
+ DeadInsts.push_back(User.OperandValToReplace);
+
+ UsersToProcess.pop_back();
+ ++NumReduced;
+
+ // If there are any more users to process with the same base, process them
+ // now. We sorted by base above, so we just have to check the last elt.
+ } while (!UsersToProcess.empty() && UsersToProcess.back().Base == Base);
+ // TODO: Next, find out which base index is the most common, pull it out.
+ }
+
+ // IMPORTANT TODO: Figure out how to partition the IV's with this stride, but
+ // different starting values, into different PHIs.
+}
+
+/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
+/// set the IV user and stride information and return true, otherwise return
+/// false.
+bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
+ const SCEVHandle *&CondStride) {
+ for (unsigned Stride = 0, e = IU->StrideOrder.size();
+ Stride != e && !CondUse; ++Stride) {
+ std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
+ assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+
+ for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
+ E = SI->second->Users.end(); UI != E; ++UI)
+ if (UI->getUser() == Cond) {
+ // NOTE: we could handle setcc instructions with multiple uses here, but
+ // InstCombine does it as well for simple uses, it's not clear that it
+ // occurs enough in real life to handle.
+ CondUse = UI;
+ CondStride = &SI->first;
+ return true;
+ }
+ }
+ return false;
+}
+
+namespace {
+ // Constant strides come first which in turns are sorted by their absolute
+ // values. If absolute values are the same, then positive strides comes first.
+ // e.g.
+ // 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X
+ struct StrideCompare {
+ const ScalarEvolution *SE;
+ explicit StrideCompare(const ScalarEvolution *se) : SE(se) {}
+
+ bool operator()(const SCEVHandle &LHS, const SCEVHandle &RHS) {
+ const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
+ const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
+ if (LHSC && RHSC) {
+ int64_t LV = LHSC->getValue()->getSExtValue();
+ int64_t RV = RHSC->getValue()->getSExtValue();
+ uint64_t ALV = (LV < 0) ? -LV : LV;
+ uint64_t ARV = (RV < 0) ? -RV : RV;
+ if (ALV == ARV) {
+ if (LV != RV)
+ return LV > RV;
+ } else {
+ return ALV < ARV;
+ }
+
+ // If it's the same value but different type, sort by bit width so
+ // that we emit larger induction variables before smaller
+ // ones, letting the smaller be re-written in terms of larger ones.
+ return SE->getTypeSizeInBits(RHS->getType()) <
+ SE->getTypeSizeInBits(LHS->getType());
+ }
+ return LHSC && !RHSC;
+ }
+ };
+}
+
+/// ChangeCompareStride - If a loop termination compare instruction is the
+/// only use of its stride, and the compaison is against a constant value,
+/// try eliminate the stride by moving the compare instruction to another
+/// stride and change its constant operand accordingly. e.g.
+///
+/// loop:
+/// ...
+/// v1 = v1 + 3
+/// v2 = v2 + 1
+/// if (v2 < 10) goto loop
+/// =>
+/// loop:
+/// ...
+/// v1 = v1 + 3
+/// if (v1 < 30) goto loop
+ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
+ IVStrideUse* &CondUse,
+ const SCEVHandle* &CondStride) {
+ // If there's only one stride in the loop, there's nothing to do here.
+ if (IU->StrideOrder.size() < 2)
+ return Cond;
+ // If there are other users of the condition's stride, don't bother
+ // trying to change the condition because the stride will still
+ // remain.
+ std::map<SCEVHandle, IVUsersOfOneStride *>::iterator I =
+ IU->IVUsesByStride.find(*CondStride);
+ if (I == IU->IVUsesByStride.end() ||
+ I->second->Users.size() != 1)
+ return Cond;
+ // Only handle constant strides for now.
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride);
+ if (!SC) return Cond;
+
+ ICmpInst::Predicate Predicate = Cond->getPredicate();
+ int64_t CmpSSInt = SC->getValue()->getSExtValue();
+ unsigned BitWidth = SE->getTypeSizeInBits((*CondStride)->getType());
+ uint64_t SignBit = 1ULL << (BitWidth-1);
+ const Type *CmpTy = Cond->getOperand(0)->getType();
+ const Type *NewCmpTy = NULL;
+ unsigned TyBits = SE->getTypeSizeInBits(CmpTy);
+ unsigned NewTyBits = 0;
+ SCEVHandle *NewStride = NULL;
+ Value *NewCmpLHS = NULL;
+ Value *NewCmpRHS = NULL;
+ int64_t Scale = 1;
+ SCEVHandle NewOffset = SE->getIntegerSCEV(0, CmpTy);
+
+ if (ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1))) {
+ int64_t CmpVal = C->getValue().getSExtValue();
+
+ // Check stride constant and the comparision constant signs to detect
+ // overflow.
+ if ((CmpVal & SignBit) != (CmpSSInt & SignBit))
+ return Cond;
+
+ // Look for a suitable stride / iv as replacement.
+ for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
+ std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(IU->StrideOrder[i]);
+ if (!isa<SCEVConstant>(SI->first))
+ continue;
+ int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
+ if (SSInt == CmpSSInt ||
+ abs64(SSInt) < abs64(CmpSSInt) ||
+ (SSInt % CmpSSInt) != 0)
+ continue;
+
+ Scale = SSInt / CmpSSInt;
+ int64_t NewCmpVal = CmpVal * Scale;
+ APInt Mul = APInt(BitWidth*2, CmpVal, true);
+ Mul = Mul * APInt(BitWidth*2, Scale, true);
+ // Check for overflow.
+ if (!Mul.isSignedIntN(BitWidth))
+ continue;
+ // Check for overflow in the stride's type too.
+ if (!Mul.isSignedIntN(SE->getTypeSizeInBits(SI->first->getType())))
+ continue;
+
+ // Watch out for overflow.
+ if (ICmpInst::isSignedPredicate(Predicate) &&
+ (CmpVal & SignBit) != (NewCmpVal & SignBit))
+ continue;
+
+ if (NewCmpVal == CmpVal)
+ continue;
+ // Pick the best iv to use trying to avoid a cast.
+ NewCmpLHS = NULL;
+ for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
+ E = SI->second->Users.end(); UI != E; ++UI) {
+ Value *Op = UI->getOperandValToReplace();
+
+ // If the IVStrideUse implies a cast, check for an actual cast which
+ // can be used to find the original IV expression.
+ if (SE->getEffectiveSCEVType(Op->getType()) !=
+ SE->getEffectiveSCEVType(SI->first->getType())) {
+ CastInst *CI = dyn_cast<CastInst>(Op);
+ // If it's not a simple cast, it's complicated.
+ if (!CI)
+ continue;
+ // If it's a cast from a type other than the stride type,
+ // it's complicated.
+ if (CI->getOperand(0)->getType() != SI->first->getType())
+ continue;
+ // Ok, we found the IV expression in the stride's type.
+ Op = CI->getOperand(0);
+ }
+
+ NewCmpLHS = Op;
+ if (NewCmpLHS->getType() == CmpTy)
+ break;
+ }
+ if (!NewCmpLHS)
+ continue;
+
+ NewCmpTy = NewCmpLHS->getType();
+ NewTyBits = SE->getTypeSizeInBits(NewCmpTy);
+ const Type *NewCmpIntTy = IntegerType::get(NewTyBits);
+ if (RequiresTypeConversion(NewCmpTy, CmpTy)) {
+ // Check if it is possible to rewrite it using
+ // an iv / stride of a smaller integer type.
+ unsigned Bits = NewTyBits;
+ if (ICmpInst::isSignedPredicate(Predicate))
+ --Bits;
+ uint64_t Mask = (1ULL << Bits) - 1;
+ if (((uint64_t)NewCmpVal & Mask) != (uint64_t)NewCmpVal)
+ continue;
+ }
+
+ // Don't rewrite if use offset is non-constant and the new type is
+ // of a different type.
+ // FIXME: too conservative?
+ if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->getOffset()))
+ continue;
+
+ bool AllUsesAreAddresses = true;
+ bool AllUsesAreOutsideLoop = true;
+ std::vector<BasedUser> UsersToProcess;
+ SCEVHandle CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
+ AllUsesAreAddresses,
+ AllUsesAreOutsideLoop,
+ UsersToProcess);
+ // Avoid rewriting the compare instruction with an iv of new stride
+ // if it's likely the new stride uses will be rewritten using the
+ // stride of the compare instruction.
+ if (AllUsesAreAddresses &&
+ ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
+ continue;
+
+ // Avoid rewriting the compare instruction with an iv which has
+ // implicit extension or truncation built into it.
+ // TODO: This is over-conservative.
+ if (SE->getTypeSizeInBits(CondUse->getOffset()->getType()) != TyBits)
+ continue;
+
+ // If scale is negative, use swapped predicate unless it's testing
+ // for equality.
+ if (Scale < 0 && !Cond->isEquality())
+ Predicate = ICmpInst::getSwappedPredicate(Predicate);
+
+ NewStride = &IU->StrideOrder[i];
+ if (!isa<PointerType>(NewCmpTy))
+ NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);
+ else {
+ ConstantInt *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal);
+ NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy);
+ }
+ NewOffset = TyBits == NewTyBits
+ ? SE->getMulExpr(CondUse->getOffset(),
+ SE->getConstant(ConstantInt::get(CmpTy, Scale)))
+ : SE->getConstant(ConstantInt::get(NewCmpIntTy,
+ cast<SCEVConstant>(CondUse->getOffset())->getValue()
+ ->getSExtValue()*Scale));
+ break;
+ }
+ }
+
+ // Forgo this transformation if it the increment happens to be
+ // unfortunately positioned after the condition, and the condition
+ // has multiple uses which prevent it from being moved immediately
+ // before the branch. See
+ // test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-*.ll
+ // for an example of this situation.
+ if (!Cond->hasOneUse()) {
+ for (BasicBlock::iterator I = Cond, E = Cond->getParent()->end();
+ I != E; ++I)
+ if (I == NewCmpLHS)
+ return Cond;
+ }
+
+ if (NewCmpRHS) {
+ // Create a new compare instruction using new stride / iv.
+ ICmpInst *OldCond = Cond;
+ // Insert new compare instruction.
+ Cond = new ICmpInst(Predicate, NewCmpLHS, NewCmpRHS,
+ L->getHeader()->getName() + ".termcond",
+ OldCond);
+
+ // Remove the old compare instruction. The old indvar is probably dead too.
+ DeadInsts.push_back(CondUse->getOperandValToReplace());
+ OldCond->replaceAllUsesWith(Cond);
+ OldCond->eraseFromParent();
+
+ IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS, false);
+ CondUse = &IU->IVUsesByStride[*NewStride]->Users.back();
+ CondStride = NewStride;
+ ++NumEliminated;
+ Changed = true;
+ }
+
+ return Cond;
+}
+
+/// OptimizeSMax - Rewrite the loop's terminating condition if it uses
+/// an smax computation.
+///
+/// This is a narrow solution to a specific, but acute, problem. For loops
+/// like this:
+///
+/// i = 0;
+/// do {
+/// p[i] = 0.0;
+/// } while (++i < n);
+///
+/// where the comparison is signed, the trip count isn't just 'n', because
+/// 'n' could be negative. And unfortunately this can come up even for loops
+/// where the user didn't use a C do-while loop. For example, seemingly
+/// well-behaved top-test loops will commonly be lowered like this:
+//
+/// if (n > 0) {
+/// i = 0;
+/// do {
+/// p[i] = 0.0;
+/// } while (++i < n);
+/// }
+///
+/// and then it's possible for subsequent optimization to obscure the if
+/// test in such a way that indvars can't find it.
+///
+/// When indvars can't find the if test in loops like this, it creates a
+/// signed-max expression, which allows it to give the loop a canonical
+/// induction variable:
+///
+/// i = 0;
+/// smax = n < 1 ? 1 : n;
+/// do {
+/// p[i] = 0.0;
+/// } while (++i != smax);
+///
+/// Canonical induction variables are necessary because the loop passes
+/// are designed around them. The most obvious example of this is the
+/// LoopInfo analysis, which doesn't remember trip count values. It
+/// expects to be able to rediscover the trip count each time it is
+/// needed, and it does this using a simple analyis that only succeeds if
+/// the loop has a canonical induction variable.
+///
+/// However, when it comes time to generate code, the maximum operation
+/// can be quite costly, especially if it's inside of an outer loop.
+///
+/// This function solves this problem by detecting this type of loop and
+/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
+/// the instructions for the maximum computation.
+///
+ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,
+ IVStrideUse* &CondUse) {
+ // Check that the loop matches the pattern we're looking for.
+ if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
+ Cond->getPredicate() != CmpInst::ICMP_NE)
+ return Cond;
+
+ SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
+ if (!Sel || !Sel->hasOneUse()) return Cond;
+
+ SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+ return Cond;
+ SCEVHandle One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
+
+ // Add one to the backedge-taken count to get the trip count.
+ SCEVHandle IterationCount = SE->getAddExpr(BackedgeTakenCount, One);
+
+ // Check for a max calculation that matches the pattern.
+ const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(IterationCount);
+ if (!SMax || SMax != SE->getSCEV(Sel)) return Cond;
+
+ SCEVHandle SMaxLHS = SMax->getOperand(0);
+ SCEVHandle SMaxRHS = SMax->getOperand(1);
+ if (!SMaxLHS || SMaxLHS != One) return Cond;
+
+ // Check the relevant induction variable for conformance to
+ // the pattern.
+ SCEVHandle IV = SE->getSCEV(Cond->getOperand(0));
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
+ if (!AR || !AR->isAffine() ||
+ AR->getStart() != One ||
+ AR->getStepRecurrence(*SE) != One)
+ return Cond;
+
+ assert(AR->getLoop() == L &&
+ "Loop condition operand is an addrec in a different loop!");
+
+ // Check the right operand of the select, and remember it, as it will
+ // be used in the new comparison instruction.
+ Value *NewRHS = 0;
+ if (SE->getSCEV(Sel->getOperand(1)) == SMaxRHS)
+ NewRHS = Sel->getOperand(1);
+ else if (SE->getSCEV(Sel->getOperand(2)) == SMaxRHS)
+ NewRHS = Sel->getOperand(2);
+ if (!NewRHS) return Cond;
+
+ // Ok, everything looks ok to change the condition into an SLT or SGE and
+ // delete the max calculation.
+ ICmpInst *NewCond =
+ new ICmpInst(Cond->getPredicate() == CmpInst::ICMP_NE ?
+ CmpInst::ICMP_SLT :
+ CmpInst::ICMP_SGE,
+ Cond->getOperand(0), NewRHS, "scmp", Cond);
+
+ // Delete the max calculation instructions.
+ Cond->replaceAllUsesWith(NewCond);
+ CondUse->setUser(NewCond);
+ Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
+ Cond->eraseFromParent();
+ Sel->eraseFromParent();
+ if (Cmp->use_empty())
+ Cmp->eraseFromParent();
+ return NewCond;
+}
+
+/// OptimizeShadowIV - If IV is used in a int-to-float cast
+/// inside the loop then try to eliminate the cast opeation.
+void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
+
+ SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+ return;
+
+ for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e;
+ ++Stride) {
+ std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
+ assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+ if (!isa<SCEVConstant>(SI->first))
+ continue;
+
+ for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
+ E = SI->second->Users.end(); UI != E; /* empty */) {
+ ilist<IVStrideUse>::iterator CandidateUI = UI;
+ ++UI;
+ Instruction *ShadowUse = CandidateUI->getUser();
+ const Type *DestTy = NULL;
+
+ /* If shadow use is a int->float cast then insert a second IV
+ to eliminate this cast.
+
+ for (unsigned i = 0; i < n; ++i)
+ foo((double)i);
+
+ is transformed into
+
+ double d = 0.0;
+ for (unsigned i = 0; i < n; ++i, ++d)
+ foo(d);
+ */
+ if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
+ DestTy = UCast->getDestTy();
+ else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
+ DestTy = SCast->getDestTy();
+ if (!DestTy) continue;
+
+ if (TLI) {
+ // If target does not support DestTy natively then do not apply
+ // this transformation.
+ MVT DVT = TLI->getValueType(DestTy);
+ if (!TLI->isTypeLegal(DVT)) continue;
+ }
+
+ PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
+ if (!PH) continue;
+ if (PH->getNumIncomingValues() != 2) continue;
+
+ const Type *SrcTy = PH->getType();
+ int Mantissa = DestTy->getFPMantissaWidth();
+ if (Mantissa == -1) continue;
+ if ((int)SE->getTypeSizeInBits(SrcTy) > Mantissa)
+ continue;
+
+ unsigned Entry, Latch;
+ if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
+ Entry = 0;
+ Latch = 1;
+ } else {
+ Entry = 1;
+ Latch = 0;
+ }
+
+ ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
+ if (!Init) continue;
+ ConstantFP *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
+
+ BinaryOperator *Incr =
+ dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
+ if (!Incr) continue;
+ if (Incr->getOpcode() != Instruction::Add
+ && Incr->getOpcode() != Instruction::Sub)
+ continue;
+
+ /* Initialize new IV, double d = 0.0 in above example. */
+ ConstantInt *C = NULL;
+ if (Incr->getOperand(0) == PH)
+ C = dyn_cast<ConstantInt>(Incr->getOperand(1));
+ else if (Incr->getOperand(1) == PH)
+ C = dyn_cast<ConstantInt>(Incr->getOperand(0));
+ else
+ continue;
+
+ if (!C) continue;
+
+ /* Add new PHINode. */
+ PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
+
+ /* create new increment. '++d' in above example. */
+ ConstantFP *CFP = ConstantFP::get(DestTy, C->getZExtValue());
+ BinaryOperator *NewIncr =
+ BinaryOperator::Create(Incr->getOpcode(),
+ NewPH, CFP, "IV.S.next.", Incr);
+
+ NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
+ NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
+
+ /* Remove cast operation */
+ ShadowUse->replaceAllUsesWith(NewPH);
+ ShadowUse->eraseFromParent();
+ NumShadow++;
+ break;
+ }
+ }
+}
+
+// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar
+// uses in the loop, look to see if we can eliminate some, in favor of using
+// common indvars for the different uses.
+void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
+ // TODO: implement optzns here.
+
+ OptimizeShadowIV(L);
+}
+
+/// OptimizeLoopTermCond - Change loop terminating condition to use the
+/// postinc iv when possible.
+void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
+ // Finally, get the terminating condition for the loop if possible. If we
+ // can, we want to change it to use a post-incremented version of its
+ // induction variable, to allow coalescing the live ranges for the IV into
+ // one register value.
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ BasicBlock *ExitBlock = L->getExitingBlock();
+ if (!ExitBlock)
+ // Multiple exits, just look at the exit in the latch block if there is one.
+ ExitBlock = LatchBlock;
+ BranchInst *TermBr = dyn_cast<BranchInst>(ExitBlock->getTerminator());
+ if (!TermBr)
+ return;
+ if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
+ return;
+
+ // Search IVUsesByStride to find Cond's IVUse if there is one.
+ IVStrideUse *CondUse = 0;
+ const SCEVHandle *CondStride = 0;
+ ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
+ if (!FindIVUserForCond(Cond, CondUse, CondStride))
+ return; // setcc doesn't use the IV.
+
+ if (ExitBlock != LatchBlock) {
+ if (!Cond->hasOneUse())
+ // See below, we don't want the condition to be cloned.
+ return;
+
+ // If exiting block is the latch block, we know it's safe and profitable to
+ // transform the icmp to use post-inc iv. Otherwise do so only if it would
+ // not reuse another iv and its iv would be reused by other uses. We are
+ // optimizing for the case where the icmp is the only use of the iv.
+ IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[*CondStride];
+ for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
+ E = StrideUses.Users.end(); I != E; ++I) {
+ if (I->getUser() == Cond)
+ continue;
+ if (!I->isUseOfPostIncrementedValue())
+ return;
+ }
+
+ // FIXME: This is expensive, and worse still ChangeCompareStride does a
+ // similar check. Can we perform all the icmp related transformations after
+ // StrengthReduceStridedIVUsers?
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride)) {
+ int64_t SInt = SC->getValue()->getSExtValue();
+ for (unsigned NewStride = 0, ee = IU->StrideOrder.size(); NewStride != ee;
+ ++NewStride) {
+ std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(IU->StrideOrder[NewStride]);
+ if (!isa<SCEVConstant>(SI->first) || SI->first == *CondStride)
+ continue;
+ int64_t SSInt =
+ cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
+ if (SSInt == SInt)
+ return; // This can definitely be reused.
+ if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0)
+ continue;
+ int64_t Scale = SSInt / SInt;
+ bool AllUsesAreAddresses = true;
+ bool AllUsesAreOutsideLoop = true;
+ std::vector<BasedUser> UsersToProcess;
+ SCEVHandle CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
+ AllUsesAreAddresses,
+ AllUsesAreOutsideLoop,
+ UsersToProcess);
+ // Avoid rewriting the compare instruction with an iv of new stride
+ // if it's likely the new stride uses will be rewritten using the
+ // stride of the compare instruction.
+ if (AllUsesAreAddresses &&
+ ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
+ return;
+ }
+ }
+
+ StrideNoReuse.insert(*CondStride);
+ }
+
+ // If the trip count is computed in terms of an smax (due to ScalarEvolution
+ // being unable to find a sufficient guard, for example), change the loop
+ // comparison to use SLT instead of NE.
+ Cond = OptimizeSMax(L, Cond, CondUse);
+
+ // If possible, change stride and operands of the compare instruction to
+ // eliminate one stride.
+ if (ExitBlock == LatchBlock)
+ Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
+
+ // It's possible for the setcc instruction to be anywhere in the loop, and
+ // possible for it to have multiple users. If it is not immediately before
+ // the latch block branch, move it.
+ if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
+ if (Cond->hasOneUse()) { // Condition has a single use, just move it.
+ Cond->moveBefore(TermBr);
+ } else {
+ // Otherwise, clone the terminating condition and insert into the loopend.
+ Cond = cast<ICmpInst>(Cond->clone());
+ Cond->setName(L->getHeader()->getName() + ".termcond");
+ LatchBlock->getInstList().insert(TermBr, Cond);
+
+ // Clone the IVUse, as the old use still exists!
+ IU->IVUsesByStride[*CondStride]->addUser(CondUse->getOffset(), Cond,
+ CondUse->getOperandValToReplace(),
+ false);
+ CondUse = &IU->IVUsesByStride[*CondStride]->Users.back();
+ }
+ }
+
+ // If we get to here, we know that we can transform the setcc instruction to
+ // use the post-incremented version of the IV, allowing us to coalesce the
+ // live ranges for the IV correctly.
+ CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), *CondStride));
+ CondUse->setIsUseOfPostIncrementedValue(true);
+ Changed = true;
+
+ ++NumLoopCond;
+}
+
+// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
+// when to exit the loop is used only for that purpose, try to rearrange things
+// so it counts down to a test against zero.
+void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
+
+ // If the number of times the loop is executed isn't computable, give up.
+ SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+ return;
+
+ // Get the terminating condition for the loop if possible (this isn't
+ // necessarily in the latch, or a block that's a predecessor of the header).
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ if (ExitBlocks.size() != 1) return;
+
+ // Okay, there is one exit block. Try to find the condition that causes the
+ // loop to be exited.
+ BasicBlock *ExitBlock = ExitBlocks[0];
+
+ BasicBlock *ExitingBlock = 0;
+ for (pred_iterator PI = pred_begin(ExitBlock), E = pred_end(ExitBlock);
+ PI != E; ++PI)
+ if (L->contains(*PI)) {
+ if (ExitingBlock == 0)
+ ExitingBlock = *PI;
+ else
+ return; // More than one block exiting!
+ }
+ assert(ExitingBlock && "No exits from loop, something is broken!");
+
+ // Okay, we've computed the exiting block. See what condition causes us to
+ // exit.
+ //
+ // FIXME: we should be able to handle switch instructions (with a single exit)
+ BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (TermBr == 0) return;
+ assert(TermBr->isConditional() && "If unconditional, it can't be in loop!");
+ if (!isa<ICmpInst>(TermBr->getCondition()))
+ return;
+ ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
+
+ // Handle only tests for equality for the moment, and only stride 1.
+ if (Cond->getPredicate() != CmpInst::ICMP_EQ)
+ return;
+ SCEVHandle IV = SE->getSCEV(Cond->getOperand(0));
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
+ SCEVHandle One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
+ if (!AR || !AR->isAffine() || AR->getStepRecurrence(*SE) != One)
+ return;
+ // If the RHS of the comparison is defined inside the loop, the rewrite
+ // cannot be done.
+ if (Instruction *CR = dyn_cast<Instruction>(Cond->getOperand(1)))
+ if (L->contains(CR->getParent()))
+ return;
+
+ // Make sure the IV is only used for counting. Value may be preinc or
+ // postinc; 2 uses in either case.
+ if (!Cond->getOperand(0)->hasNUses(2))
+ return;
+ PHINode *phi = dyn_cast<PHINode>(Cond->getOperand(0));
+ Instruction *incr;
+ if (phi && phi->getParent()==L->getHeader()) {
+ // value tested is preinc. Find the increment.
+ // A CmpInst is not a BinaryOperator; we depend on this.
+ Instruction::use_iterator UI = phi->use_begin();
+ incr = dyn_cast<BinaryOperator>(UI);
+ if (!incr)
+ incr = dyn_cast<BinaryOperator>(++UI);
+ // 1 use for postinc value, the phi. Unnecessarily conservative?
+ if (!incr || !incr->hasOneUse() || incr->getOpcode()!=Instruction::Add)
+ return;
+ } else {
+ // Value tested is postinc. Find the phi node.
+ incr = dyn_cast<BinaryOperator>(Cond->getOperand(0));
+ if (!incr || incr->getOpcode()!=Instruction::Add)
+ return;
+
+ Instruction::use_iterator UI = Cond->getOperand(0)->use_begin();
+ phi = dyn_cast<PHINode>(UI);
+ if (!phi)
+ phi = dyn_cast<PHINode>(++UI);
+ // 1 use for preinc value, the increment.
+ if (!phi || phi->getParent()!=L->getHeader() || !phi->hasOneUse())
+ return;
+ }
+
+ // Replace the increment with a decrement.
+ BinaryOperator *decr =
+ BinaryOperator::Create(Instruction::Sub, incr->getOperand(0),
+ incr->getOperand(1), "tmp", incr);
+ incr->replaceAllUsesWith(decr);
+ incr->eraseFromParent();
+
+ // Substitute endval-startval for the original startval, and 0 for the
+ // original endval. Since we're only testing for equality this is OK even
+ // if the computation wraps around.
+ BasicBlock *Preheader = L->getLoopPreheader();
+ Instruction *PreInsertPt = Preheader->getTerminator();
+ int inBlock = L->contains(phi->getIncomingBlock(0)) ? 1 : 0;
+ Value *startVal = phi->getIncomingValue(inBlock);
+ Value *endVal = Cond->getOperand(1);
+ // FIXME check for case where both are constant
+ ConstantInt* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);
+ BinaryOperator *NewStartVal =
+ BinaryOperator::Create(Instruction::Sub, endVal, startVal,
+ "tmp", PreInsertPt);
+ phi->setIncomingValue(inBlock, NewStartVal);
+ Cond->setOperand(1, Zero);
+
+ Changed = true;
+}
+
+bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
+
+ IU = &getAnalysis<IVUsers>();
+ LI = &getAnalysis<LoopInfo>();
+ DT = &getAnalysis<DominatorTree>();
+ SE = &getAnalysis<ScalarEvolution>();
+ Changed = false;
+
+ if (!IU->IVUsesByStride.empty()) {
+#ifndef NDEBUG
+ DOUT << "\nLSR on \"" << L->getHeader()->getParent()->getNameStart()
+ << "\" ";
+ DEBUG(L->dump());
+#endif
+
+ // Sort the StrideOrder so we process larger strides first.
+ std::stable_sort(IU->StrideOrder.begin(), IU->StrideOrder.end(),
+ StrideCompare(SE));
+
+ // Optimize induction variables. Some indvar uses can be transformed to use
+ // strides that will be needed for other purposes. A common example of this
+ // is the exit test for the loop, which can often be rewritten to use the
+ // computation of some other indvar to decide when to terminate the loop.
+ OptimizeIndvars(L);
+
+ // Change loop terminating condition to use the postinc iv when possible
+ // and optimize loop terminating compare. FIXME: Move this after
+ // StrengthReduceStridedIVUsers?
+ OptimizeLoopTermCond(L);
+
+ // FIXME: We can shrink overlarge IV's here. e.g. if the code has
+ // computation in i64 values and the target doesn't support i64, demote
+ // the computation to 32-bit if safe.
+
+ // FIXME: Attempt to reuse values across multiple IV's. In particular, we
+ // could have something like "for(i) { foo(i*8); bar(i*16) }", which should
+ // be codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC.
+ // Need to be careful that IV's are all the same type. Only works for
+ // intptr_t indvars.
+
+ // IVsByStride keeps IVs for one particular loop.
+ assert(IVsByStride.empty() && "Stale entries in IVsByStride?");
+
+ // Note: this processes each stride/type pair individually. All users
+ // passed into StrengthReduceStridedIVUsers have the same type AND stride.
+ // Also, note that we iterate over IVUsesByStride indirectly by using
+ // StrideOrder. This extra layer of indirection makes the ordering of
+ // strides deterministic - not dependent on map order.
+ for (unsigned Stride = 0, e = IU->StrideOrder.size();
+ Stride != e; ++Stride) {
+ std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
+ assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+ // FIXME: Generalize to non-affine IV's.
+ if (!SI->first->isLoopInvariant(L))
+ continue;
+ StrengthReduceStridedIVUsers(SI->first, *SI->second, L);
+ }
+ }
+
+ // After all sharing is done, see if we can adjust the loop to test against
+ // zero instead of counting up to a maximum. This is usually faster.
+ OptimizeLoopCountIV(L);
+
+ // We're done analyzing this loop; release all the state we built up for it.
+ IVsByStride.clear();
+ StrideNoReuse.clear();
+
+ // Clean up after ourselves
+ if (!DeadInsts.empty())
+ DeleteTriviallyDeadInstructions();
+
+ // At this point, it is worth checking to see if any recurrence PHIs are also
+ // dead, so that we can remove them as well.
+ DeleteDeadPHIs(L->getHeader());
+
+ return Changed;
+}
diff --git a/lib/Transforms/Scalar/LoopUnroll.cpp b/lib/Transforms/Scalar/LoopUnroll.cpp
new file mode 100644
index 0000000..23757cd
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopUnroll.cpp
@@ -0,0 +1,183 @@
+//===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements a simple loop unroller. It works best when loops have
+// been canonicalized by the -indvars pass, allowing it to determine the trip
+// counts of loops easily.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unroll"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include <climits>
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+UnrollThreshold("unroll-threshold", cl::init(100), cl::Hidden,
+ cl::desc("The cut-off point for automatic loop unrolling"));
+
+static cl::opt<unsigned>
+UnrollCount("unroll-count", cl::init(0), cl::Hidden,
+ cl::desc("Use this unroll count for all loops, for testing purposes"));
+
+static cl::opt<bool>
+UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
+ cl::desc("Allows loops to be partially unrolled until "
+ "-unroll-threshold loop size is reached."));
+
+namespace {
+ class VISIBILITY_HIDDEN LoopUnroll : public LoopPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopUnroll() : LoopPass(&ID) {}
+
+ /// A magic value for use with the Threshold parameter to indicate
+ /// that the loop unroll should be performed regardless of how much
+ /// code expansion would result.
+ static const unsigned NoThreshold = UINT_MAX;
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG...
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addRequired<LoopInfo>();
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved<LoopInfo>();
+ // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
+ // If loop unroll does not preserve dom info then LCSSA pass on next
+ // loop will receive invalid dom info.
+ // For now, recreate dom info, if loop is unrolled.
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominanceFrontier>();
+ }
+ };
+}
+
+char LoopUnroll::ID = 0;
+static RegisterPass<LoopUnroll> X("loop-unroll", "Unroll loops");
+
+Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); }
+
+/// ApproximateLoopSize - Approximate the size of the loop.
+static unsigned ApproximateLoopSize(const Loop *L) {
+ unsigned Size = 0;
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I) {
+ BasicBlock *BB = *I;
+ Instruction *Term = BB->getTerminator();
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ if (isa<PHINode>(I) && BB == L->getHeader()) {
+ // Ignore PHI nodes in the header.
+ } else if (I->hasOneUse() && I->use_back() == Term) {
+ // Ignore instructions only used by the loop terminator.
+ } else if (isa<DbgInfoIntrinsic>(I)) {
+ // Ignore debug instructions
+ } else if (isa<GetElementPtrInst>(I) && I->hasOneUse()) {
+ // Ignore GEP as they generally are subsumed into a load or store.
+ } else if (isa<CallInst>(I)) {
+ // Estimate size overhead introduced by call instructions which
+ // is higher than other instructions. Here 3 and 10 are magic
+ // numbers that help one isolated test case from PR2067 without
+ // negatively impacting measured benchmarks.
+ if (isa<IntrinsicInst>(I))
+ Size = Size + 3;
+ else
+ Size = Size + 10;
+ } else {
+ ++Size;
+ }
+
+ // TODO: Ignore expressions derived from PHI and constants if inval of phi
+ // is a constant, or if operation is associative. This will get induction
+ // variables.
+ }
+ }
+
+ return Size;
+}
+
+bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
+ assert(L->isLCSSAForm());
+ LoopInfo *LI = &getAnalysis<LoopInfo>();
+
+ BasicBlock *Header = L->getHeader();
+ DOUT << "Loop Unroll: F[" << Header->getParent()->getName()
+ << "] Loop %" << Header->getName() << "\n";
+
+ // Find trip count
+ unsigned TripCount = L->getSmallConstantTripCount();
+ unsigned Count = UnrollCount;
+
+ // Automatically select an unroll count.
+ if (Count == 0) {
+ // Conservative heuristic: if we know the trip count, see if we can
+ // completely unroll (subject to the threshold, checked below); otherwise
+ // try to find greatest modulo of the trip count which is still under
+ // threshold value.
+ if (TripCount != 0) {
+ Count = TripCount;
+ } else {
+ return false;
+ }
+ }
+
+ // Enforce the threshold.
+ if (UnrollThreshold != NoThreshold) {
+ unsigned LoopSize = ApproximateLoopSize(L);
+ DOUT << " Loop Size = " << LoopSize << "\n";
+ uint64_t Size = (uint64_t)LoopSize*Count;
+ if (TripCount != 1 && Size > UnrollThreshold) {
+ DOUT << " Too large to fully unroll with count: " << Count
+ << " because size: " << Size << ">" << UnrollThreshold << "\n";
+ if (UnrollAllowPartial) {
+ // Reduce unroll count to be modulo of TripCount for partial unrolling
+ Count = UnrollThreshold / LoopSize;
+ while (Count != 0 && TripCount%Count != 0) {
+ Count--;
+ }
+ if (Count < 2) {
+ DOUT << " could not unroll partially\n";
+ return false;
+ } else {
+ DOUT << " partially unrolling with count: " << Count << "\n";
+ }
+ } else {
+ DOUT << " will not try to unroll partially because "
+ << "-unroll-allow-partial not given\n";
+ return false;
+ }
+ }
+ }
+
+ // Unroll the loop.
+ Function *F = L->getHeader()->getParent();
+ if (!UnrollLoop(L, Count, LI, &LPM))
+ return false;
+
+ // FIXME: Reconstruct dom info, because it is not preserved properly.
+ DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+ if (DT) {
+ DT->runOnFunction(*F);
+ DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>();
+ if (DF)
+ DF->runOnFunction(*F);
+ }
+ return true;
+}
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
new file mode 100644
index 0000000..e3e881f
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -0,0 +1,1098 @@
+//===-- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms loops that contain branches on loop-invariant conditions
+// to have multiple loops. For example, it turns the left into the right code:
+//
+// for (...) if (lic)
+// A for (...)
+// if (lic) A; B; C
+// B else
+// C for (...)
+// A; C
+//
+// This can increase the size of the code exponentially (doubling it every time
+// a loop is unswitched) so we only unswitch if the resultant code will be
+// smaller than a threshold.
+//
+// This pass expects LICM to be run before it to hoist invariant conditions out
+// of the loop, to make the unswitching opportunity obvious.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unswitch"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include <algorithm>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumBranches, "Number of branches unswitched");
+STATISTIC(NumSwitches, "Number of switches unswitched");
+STATISTIC(NumSelects , "Number of selects unswitched");
+STATISTIC(NumTrivial , "Number of unswitches that are trivial");
+STATISTIC(NumSimplify, "Number of simplifications of unswitched code");
+
+static cl::opt<unsigned>
+Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
+ cl::init(10), cl::Hidden);
+
+namespace {
+ class VISIBILITY_HIDDEN LoopUnswitch : public LoopPass {
+ LoopInfo *LI; // Loop information
+ LPPassManager *LPM;
+
+ // LoopProcessWorklist - Used to check if second loop needs processing
+ // after RewriteLoopBodyWithConditionConstant rewrites first loop.
+ std::vector<Loop*> LoopProcessWorklist;
+ SmallPtrSet<Value *,8> UnswitchedVals;
+
+ bool OptimizeForSize;
+ bool redoLoop;
+
+ Loop *currentLoop;
+ DominanceFrontier *DF;
+ DominatorTree *DT;
+ BasicBlock *loopHeader;
+ BasicBlock *loopPreheader;
+
+ // LoopBlocks contains all of the basic blocks of the loop, including the
+ // preheader of the loop, the body of the loop, and the exit blocks of the
+ // loop, in that order.
+ std::vector<BasicBlock*> LoopBlocks;
+ // NewBlocks contained cloned copy of basic blocks from LoopBlocks.
+ std::vector<BasicBlock*> NewBlocks;
+
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ explicit LoopUnswitch(bool Os = false) :
+ LoopPass(&ID), OptimizeForSize(Os), redoLoop(false),
+ currentLoop(NULL), DF(NULL), DT(NULL), loopHeader(NULL),
+ loopPreheader(NULL) {}
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool processCurrentLoop();
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG...
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominanceFrontier>();
+ }
+
+ private:
+
+ /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist,
+ /// remove it.
+ void RemoveLoopFromWorklist(Loop *L) {
+ std::vector<Loop*>::iterator I = std::find(LoopProcessWorklist.begin(),
+ LoopProcessWorklist.end(), L);
+ if (I != LoopProcessWorklist.end())
+ LoopProcessWorklist.erase(I);
+ }
+
+ void initLoopData() {
+ loopHeader = currentLoop->getHeader();
+ loopPreheader = currentLoop->getLoopPreheader();
+ }
+
+ /// Split all of the edges from inside the loop to their exit blocks.
+ /// Update the appropriate Phi nodes as we do so.
+ void SplitExitEdges(Loop *L, const SmallVector<BasicBlock *, 8> &ExitBlocks);
+
+ bool UnswitchIfProfitable(Value *LoopCond, Constant *Val);
+ unsigned getLoopUnswitchCost(Value *LIC);
+ void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
+ BasicBlock *ExitBlock);
+ void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L);
+
+ void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
+ Constant *Val, bool isEqual);
+
+ void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
+ BasicBlock *TrueDest,
+ BasicBlock *FalseDest,
+ Instruction *InsertPt);
+
+ void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
+ void RemoveBlockIfDead(BasicBlock *BB,
+ std::vector<Instruction*> &Worklist, Loop *l);
+ void RemoveLoopFromHierarchy(Loop *L);
+ bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = 0,
+ BasicBlock **LoopExit = 0);
+
+ };
+}
+char LoopUnswitch::ID = 0;
+static RegisterPass<LoopUnswitch> X("loop-unswitch", "Unswitch loops");
+
+Pass *llvm::createLoopUnswitchPass(bool Os) {
+ return new LoopUnswitch(Os);
+}
+
+/// FindLIVLoopCondition - Cond is a condition that occurs in L. If it is
+/// invariant in the loop, or has an invariant piece, return the invariant.
+/// Otherwise, return null.
+static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
+ // Constants should be folded, not unswitched on!
+ if (isa<Constant>(Cond)) return 0;
+
+ // TODO: Handle: br (VARIANT|INVARIANT).
+ // TODO: Hoist simple expressions out of loops.
+ if (L->isLoopInvariant(Cond)) return Cond;
+
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond))
+ if (BO->getOpcode() == Instruction::And ||
+ BO->getOpcode() == Instruction::Or) {
+ // If either the left or right side is invariant, we can unswitch on this,
+ // which will cause the branch to go away in one loop and the condition to
+ // simplify in the other one.
+ if (Value *LHS = FindLIVLoopCondition(BO->getOperand(0), L, Changed))
+ return LHS;
+ if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed))
+ return RHS;
+ }
+
+ return 0;
+}
+
+bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
+ LI = &getAnalysis<LoopInfo>();
+ LPM = &LPM_Ref;
+ DF = getAnalysisIfAvailable<DominanceFrontier>();
+ DT = getAnalysisIfAvailable<DominatorTree>();
+ currentLoop = L;
+ Function *F = currentLoop->getHeader()->getParent();
+ bool Changed = false;
+ do {
+ assert(currentLoop->isLCSSAForm());
+ redoLoop = false;
+ Changed |= processCurrentLoop();
+ } while(redoLoop);
+
+ if (Changed) {
+ // FIXME: Reconstruct dom info, because it is not preserved properly.
+ if (DT)
+ DT->runOnFunction(*F);
+ if (DF)
+ DF->runOnFunction(*F);
+ }
+ return Changed;
+}
+
+/// processCurrentLoop - Do actual work and unswitch loop if possible
+/// and profitable.
+bool LoopUnswitch::processCurrentLoop() {
+ bool Changed = false;
+
+ // Loop over all of the basic blocks in the loop. If we find an interior
+ // block that is branching on a loop-invariant condition, we can unswitch this
+ // loop.
+ for (Loop::block_iterator I = currentLoop->block_begin(),
+ E = currentLoop->block_end();
+ I != E; ++I) {
+ TerminatorInst *TI = (*I)->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ // If this isn't branching on an invariant condition, we can't unswitch
+ // it.
+ if (BI->isConditional()) {
+ // See if this, or some part of it, is loop invariant. If so, we can
+ // unswitch on it if we desire.
+ Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
+ currentLoop, Changed);
+ if (LoopCond && UnswitchIfProfitable(LoopCond,
+ ConstantInt::getTrue())) {
+ ++NumBranches;
+ return true;
+ }
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
+ currentLoop, Changed);
+ if (LoopCond && SI->getNumCases() > 1) {
+ // Find a value to unswitch on:
+ // FIXME: this should chose the most expensive case!
+ Constant *UnswitchVal = SI->getCaseValue(1);
+ // Do not process same value again and again.
+ if (!UnswitchedVals.insert(UnswitchVal))
+ continue;
+
+ if (UnswitchIfProfitable(LoopCond, UnswitchVal)) {
+ ++NumSwitches;
+ return true;
+ }
+ }
+ }
+
+ // Scan the instructions to check for unswitchable values.
+ for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end();
+ BBI != E; ++BBI)
+ if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) {
+ Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
+ currentLoop, Changed);
+ if (LoopCond && UnswitchIfProfitable(LoopCond,
+ ConstantInt::getTrue())) {
+ ++NumSelects;
+ return true;
+ }
+ }
+ }
+ return Changed;
+}
+
+/// isTrivialLoopExitBlock - Check to see if all paths from BB either:
+/// 1. Exit the loop with no side effects.
+/// 2. Branch to the latch block with no side-effects.
+///
+/// If these conditions are true, we return true and set ExitBB to the block we
+/// exit through.
+///
+static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
+ BasicBlock *&ExitBB,
+ std::set<BasicBlock*> &Visited) {
+ if (!Visited.insert(BB).second) {
+ // Already visited and Ok, end of recursion.
+ return true;
+ } else if (!L->contains(BB)) {
+ // Otherwise, this is a loop exit, this is fine so long as this is the
+ // first exit.
+ if (ExitBB != 0) return false;
+ ExitBB = BB;
+ return true;
+ }
+
+ // Otherwise, this is an unvisited intra-loop node. Check all successors.
+ for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) {
+ // Check to see if the successor is a trivial loop exit.
+ if (!isTrivialLoopExitBlockHelper(L, *SI, ExitBB, Visited))
+ return false;
+ }
+
+ // Okay, everything after this looks good, check to make sure that this block
+ // doesn't include any side effects.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (I->mayHaveSideEffects())
+ return false;
+
+ return true;
+}
+
+/// isTrivialLoopExitBlock - Return true if the specified block unconditionally
+/// leads to an exit from the specified loop, and has no side-effects in the
+/// process. If so, return the block that is exited to, otherwise return null.
+static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
+ std::set<BasicBlock*> Visited;
+ Visited.insert(L->getHeader()); // Branches to header are ok.
+ BasicBlock *ExitBB = 0;
+ if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited))
+ return ExitBB;
+ return 0;
+}
+
+/// IsTrivialUnswitchCondition - Check to see if this unswitch condition is
+/// trivial: that is, that the condition controls whether or not the loop does
+/// anything at all. If this is a trivial condition, unswitching produces no
+/// code duplications (equivalently, it produces a simpler loop and a new empty
+/// loop, which gets deleted).
+///
+/// If this is a trivial condition, return true, otherwise return false. When
+/// returning true, this sets Cond and Val to the condition that controls the
+/// trivial condition: when Cond dynamically equals Val, the loop is known to
+/// exit. Finally, this sets LoopExit to the BB that the loop exits to when
+/// Cond == Val.
+///
+bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
+ BasicBlock **LoopExit) {
+ BasicBlock *Header = currentLoop->getHeader();
+ TerminatorInst *HeaderTerm = Header->getTerminator();
+
+ BasicBlock *LoopExitBB = 0;
+ if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) {
+ // If the header block doesn't end with a conditional branch on Cond, we
+ // can't handle it.
+ if (!BI->isConditional() || BI->getCondition() != Cond)
+ return false;
+
+ // Check to see if a successor of the branch is guaranteed to go to the
+ // latch block or exit through a one exit block without having any
+ // side-effects. If so, determine the value of Cond that causes it to do
+ // this.
+ if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ BI->getSuccessor(0)))) {
+ if (Val) *Val = ConstantInt::getTrue();
+ } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ BI->getSuccessor(1)))) {
+ if (Val) *Val = ConstantInt::getFalse();
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) {
+ // If this isn't a switch on Cond, we can't handle it.
+ if (SI->getCondition() != Cond) return false;
+
+ // Check to see if a successor of the switch is guaranteed to go to the
+ // latch block or exit through a one exit block without having any
+ // side-effects. If so, determine the value of Cond that causes it to do
+ // this. Note that we can't trivially unswitch on the default case.
+ for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
+ if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ SI->getSuccessor(i)))) {
+ // Okay, we found a trivial case, remember the value that is trivial.
+ if (Val) *Val = SI->getCaseValue(i);
+ break;
+ }
+ }
+
+ // If we didn't find a single unique LoopExit block, or if the loop exit block
+ // contains phi nodes, this isn't trivial.
+ if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
+ return false; // Can't handle this.
+
+ if (LoopExit) *LoopExit = LoopExitBB;
+
+ // We already know that nothing uses any scalar values defined inside of this
+ // loop. As such, we just have to check to see if this loop will execute any
+ // side-effecting instructions (e.g. stores, calls, volatile loads) in the
+ // part of the loop that the code *would* execute. We already checked the
+ // tail, check the header now.
+ for (BasicBlock::iterator I = Header->begin(), E = Header->end(); I != E; ++I)
+ if (I->mayHaveSideEffects())
+ return false;
+ return true;
+}
+
+/// getLoopUnswitchCost - Return the cost (code size growth) that will happen if
+/// we choose to unswitch current loop on the specified value.
+///
+unsigned LoopUnswitch::getLoopUnswitchCost(Value *LIC) {
+ // If the condition is trivial, always unswitch. There is no code growth for
+ // this case.
+ if (IsTrivialUnswitchCondition(LIC))
+ return 0;
+
+ // FIXME: This is really overly conservative. However, more liberal
+ // estimations have thus far resulted in excessive unswitching, which is bad
+ // both in compile time and in code size. This should be replaced once
+ // someone figures out how a good estimation.
+ return currentLoop->getBlocks().size();
+
+ unsigned Cost = 0;
+ // FIXME: this is brain dead. It should take into consideration code
+ // shrinkage.
+ for (Loop::block_iterator I = currentLoop->block_begin(),
+ E = currentLoop->block_end();
+ I != E; ++I) {
+ BasicBlock *BB = *I;
+ // Do not include empty blocks in the cost calculation. This happen due to
+ // loop canonicalization and will be removed.
+ if (BB->begin() == BasicBlock::iterator(BB->getTerminator()))
+ continue;
+
+ // Count basic blocks.
+ ++Cost;
+ }
+
+ return Cost;
+}
+
+/// UnswitchIfProfitable - We have found that we can unswitch currentLoop when
+/// LoopCond == Val to simplify the loop. If we decide that this is profitable,
+/// unswitch the loop, reprocess the pieces, then return true.
+bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){
+
+ initLoopData();
+ Function *F = loopHeader->getParent();
+
+
+ // Check to see if it would be profitable to unswitch current loop.
+ unsigned Cost = getLoopUnswitchCost(LoopCond);
+
+ // Do not do non-trivial unswitch while optimizing for size.
+ if (Cost && OptimizeForSize)
+ return false;
+ if (Cost && !F->isDeclaration() && F->hasFnAttr(Attribute::OptimizeForSize))
+ return false;
+
+ if (Cost > Threshold) {
+ // FIXME: this should estimate growth by the amount of code shared by the
+ // resultant unswitched loops.
+ //
+ DOUT << "NOT unswitching loop %"
+ << currentLoop->getHeader()->getName() << ", cost too high: "
+ << currentLoop->getBlocks().size() << "\n";
+ return false;
+ }
+
+ Constant *CondVal;
+ BasicBlock *ExitBlock;
+ if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) {
+ UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock);
+ } else {
+ UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
+ }
+
+ return true;
+}
+
+// RemapInstruction - Convert the instruction operands from referencing the
+// current values into those specified by ValueMap.
+//
+static inline void RemapInstruction(Instruction *I,
+ DenseMap<const Value *, Value*> &ValueMap) {
+ for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
+ Value *Op = I->getOperand(op);
+ DenseMap<const Value *, Value*>::iterator It = ValueMap.find(Op);
+ if (It != ValueMap.end()) Op = It->second;
+ I->setOperand(op, Op);
+ }
+}
+
+/// CloneLoop - Recursively clone the specified loop and all of its children,
+/// mapping the blocks with the specified map.
+static Loop *CloneLoop(Loop *L, Loop *PL, DenseMap<const Value*, Value*> &VM,
+ LoopInfo *LI, LPPassManager *LPM) {
+ Loop *New = new Loop();
+
+ LPM->insertLoop(New, PL);
+
+ // Add all of the blocks in L to the new loop.
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I)
+ if (LI->getLoopFor(*I) == L)
+ New->addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), LI->getBase());
+
+ // Add all of the subloops to the new loop.
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ CloneLoop(*I, New, VM, LI, LPM);
+
+ return New;
+}
+
+/// EmitPreheaderBranchOnCondition - Emit a conditional branch on two values
+/// if LIC == Val, branch to TrueDst, otherwise branch to FalseDest. Insert the
+/// code immediately before InsertPt.
+void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
+ BasicBlock *TrueDest,
+ BasicBlock *FalseDest,
+ Instruction *InsertPt) {
+ // Insert a conditional branch on LIC to the two preheaders. The original
+ // code is the true version and the new code is the false version.
+ Value *BranchVal = LIC;
+ if (!isa<ConstantInt>(Val) || Val->getType() != Type::Int1Ty)
+ BranchVal = new ICmpInst(ICmpInst::ICMP_EQ, LIC, Val, "tmp", InsertPt);
+ else if (Val != ConstantInt::getTrue())
+ // We want to enter the new loop when the condition is true.
+ std::swap(TrueDest, FalseDest);
+
+ // Insert the new branch.
+ BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt);
+}
+
+/// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable
+/// condition in it (a cond branch from its header block to its latch block,
+/// where the path through the loop that doesn't execute its body has no
+/// side-effects), unswitch it. This doesn't involve any code duplication, just
+/// moving the conditional branch outside of the loop and updating loop info.
+void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
+ Constant *Val,
+ BasicBlock *ExitBlock) {
+ DOUT << "loop-unswitch: Trivial-Unswitch loop %"
+ << loopHeader->getName() << " [" << L->getBlocks().size()
+ << " blocks] in Function " << L->getHeader()->getParent()->getName()
+ << " on cond: " << *Val << " == " << *Cond << "\n";
+
+ // First step, split the preheader, so that we know that there is a safe place
+ // to insert the conditional branch. We will change loopPreheader to have a
+ // conditional branch on Cond.
+ BasicBlock *NewPH = SplitEdge(loopPreheader, loopHeader, this);
+
+ // Now that we have a place to insert the conditional branch, create a place
+ // to branch to: this is the exit block out of the loop that we should
+ // short-circuit to.
+
+ // Split this block now, so that the loop maintains its exit block, and so
+ // that the jump from the preheader can execute the contents of the exit block
+ // without actually branching to it (the exit block should be dominated by the
+ // loop header, not the preheader).
+ assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
+ BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), this);
+
+ // Okay, now we have a position to branch from and a position to branch to,
+ // insert the new conditional branch.
+ EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH,
+ loopPreheader->getTerminator());
+ LPM->deleteSimpleAnalysisValue(loopPreheader->getTerminator(), L);
+ loopPreheader->getTerminator()->eraseFromParent();
+
+ // We need to reprocess this loop, it could be unswitched again.
+ redoLoop = true;
+
+ // Now that we know that the loop is never entered when this condition is a
+ // particular value, rewrite the loop with this info. We know that this will
+ // at least eliminate the old branch.
+ RewriteLoopBodyWithConditionConstant(L, Cond, Val, false);
+ ++NumTrivial;
+}
+
+/// SplitExitEdges - Split all of the edges from inside the loop to their exit
+/// blocks. Update the appropriate Phi nodes as we do so.
+void LoopUnswitch::SplitExitEdges(Loop *L,
+ const SmallVector<BasicBlock *, 8> &ExitBlocks)
+{
+
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = ExitBlocks[i];
+ std::vector<BasicBlock*> Preds(pred_begin(ExitBlock), pred_end(ExitBlock));
+
+ for (unsigned j = 0, e = Preds.size(); j != e; ++j) {
+ BasicBlock* NewExitBlock = SplitEdge(Preds[j], ExitBlock, this);
+ BasicBlock* StartBlock = Preds[j];
+ BasicBlock* EndBlock;
+ if (NewExitBlock->getSinglePredecessor() == ExitBlock) {
+ EndBlock = NewExitBlock;
+ NewExitBlock = EndBlock->getSinglePredecessor();
+ } else {
+ EndBlock = ExitBlock;
+ }
+
+ std::set<PHINode*> InsertedPHIs;
+ PHINode* OldLCSSA = 0;
+ for (BasicBlock::iterator I = EndBlock->begin();
+ (OldLCSSA = dyn_cast<PHINode>(I)); ++I) {
+ Value* OldValue = OldLCSSA->getIncomingValueForBlock(NewExitBlock);
+ PHINode* NewLCSSA = PHINode::Create(OldLCSSA->getType(),
+ OldLCSSA->getName() + ".us-lcssa",
+ NewExitBlock->getTerminator());
+ NewLCSSA->addIncoming(OldValue, StartBlock);
+ OldLCSSA->setIncomingValue(OldLCSSA->getBasicBlockIndex(NewExitBlock),
+ NewLCSSA);
+ InsertedPHIs.insert(NewLCSSA);
+ }
+
+ BasicBlock::iterator InsertPt = EndBlock->getFirstNonPHI();
+ for (BasicBlock::iterator I = NewExitBlock->begin();
+ (OldLCSSA = dyn_cast<PHINode>(I)) && InsertedPHIs.count(OldLCSSA) == 0;
+ ++I) {
+ PHINode *NewLCSSA = PHINode::Create(OldLCSSA->getType(),
+ OldLCSSA->getName() + ".us-lcssa",
+ InsertPt);
+ OldLCSSA->replaceAllUsesWith(NewLCSSA);
+ NewLCSSA->addIncoming(OldLCSSA, NewExitBlock);
+ }
+
+ }
+ }
+
+}
+
+/// UnswitchNontrivialCondition - We determined that the loop is profitable
+/// to unswitch when LIC equal Val. Split it into loop versions and test the
+/// condition outside of either loop. Return the loops created as Out1/Out2.
+void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
+ Loop *L) {
+ Function *F = loopHeader->getParent();
+ DOUT << "loop-unswitch: Unswitching loop %"
+ << loopHeader->getName() << " [" << L->getBlocks().size()
+ << " blocks] in Function " << F->getName()
+ << " when '" << *Val << "' == " << *LIC << "\n";
+
+ LoopBlocks.clear();
+ NewBlocks.clear();
+
+ // First step, split the preheader and exit blocks, and add these blocks to
+ // the LoopBlocks list.
+ BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, this);
+ LoopBlocks.push_back(NewPreheader);
+
+ // We want the loop to come after the preheader, but before the exit blocks.
+ LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getUniqueExitBlocks(ExitBlocks);
+
+ // Split all of the edges from inside the loop to their exit blocks. Update
+ // the appropriate Phi nodes as we do so.
+ SplitExitEdges(L, ExitBlocks);
+
+ // The exit blocks may have been changed due to edge splitting, recompute.
+ ExitBlocks.clear();
+ L->getUniqueExitBlocks(ExitBlocks);
+
+ // Add exit blocks to the loop blocks.
+ LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end());
+
+ // Next step, clone all of the basic blocks that make up the loop (including
+ // the loop preheader and exit blocks), keeping track of the mapping between
+ // the instructions and blocks.
+ NewBlocks.reserve(LoopBlocks.size());
+ DenseMap<const Value*, Value*> ValueMap;
+ for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
+ BasicBlock *New = CloneBasicBlock(LoopBlocks[i], ValueMap, ".us", F);
+ NewBlocks.push_back(New);
+ ValueMap[LoopBlocks[i]] = New; // Keep the BB mapping.
+ LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], New, L);
+ }
+
+ // Splice the newly inserted blocks into the function right before the
+ // original preheader.
+ F->getBasicBlockList().splice(LoopBlocks[0], F->getBasicBlockList(),
+ NewBlocks[0], F->end());
+
+ // Now we create the new Loop object for the versioned loop.
+ Loop *NewLoop = CloneLoop(L, L->getParentLoop(), ValueMap, LI, LPM);
+ Loop *ParentLoop = L->getParentLoop();
+ if (ParentLoop) {
+ // Make sure to add the cloned preheader and exit blocks to the parent loop
+ // as well.
+ ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase());
+ }
+
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *NewExit = cast<BasicBlock>(ValueMap[ExitBlocks[i]]);
+ // The new exit block should be in the same loop as the old one.
+ if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i]))
+ ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase());
+
+ assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&
+ "Exit block should have been split to have one successor!");
+ BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);
+
+ // If the successor of the exit block had PHI nodes, add an entry for
+ // NewExit.
+ PHINode *PN;
+ for (BasicBlock::iterator I = ExitSucc->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I) {
+ Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
+ DenseMap<const Value *, Value*>::iterator It = ValueMap.find(V);
+ if (It != ValueMap.end()) V = It->second;
+ PN->addIncoming(V, NewExit);
+ }
+ }
+
+ // Rewrite the code to refer to itself.
+ for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
+ for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+ E = NewBlocks[i]->end(); I != E; ++I)
+ RemapInstruction(I, ValueMap);
+
+ // Rewrite the original preheader to select between versions of the loop.
+ BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
+ assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&
+ "Preheader splitting did not work correctly!");
+
+ // Emit the new branch that selects between the two versions of this loop.
+ EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR);
+ LPM->deleteSimpleAnalysisValue(OldBR, L);
+ OldBR->eraseFromParent();
+
+ LoopProcessWorklist.push_back(NewLoop);
+ redoLoop = true;
+
+ // Now we rewrite the original code to know that the condition is true and the
+ // new code to know that the condition is false.
+ RewriteLoopBodyWithConditionConstant(L , LIC, Val, false);
+
+ // It's possible that simplifying one loop could cause the other to be
+ // deleted. If so, don't simplify it.
+ if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop)
+ RewriteLoopBodyWithConditionConstant(NewLoop, LIC, Val, true);
+
+}
+
+/// RemoveFromWorklist - Remove all instances of I from the worklist vector
+/// specified.
+static void RemoveFromWorklist(Instruction *I,
+ std::vector<Instruction*> &Worklist) {
+ std::vector<Instruction*>::iterator WI = std::find(Worklist.begin(),
+ Worklist.end(), I);
+ while (WI != Worklist.end()) {
+ unsigned Offset = WI-Worklist.begin();
+ Worklist.erase(WI);
+ WI = std::find(Worklist.begin()+Offset, Worklist.end(), I);
+ }
+}
+
+/// ReplaceUsesOfWith - When we find that I really equals V, remove I from the
+/// program, replacing all uses with V and update the worklist.
+static void ReplaceUsesOfWith(Instruction *I, Value *V,
+ std::vector<Instruction*> &Worklist,
+ Loop *L, LPPassManager *LPM) {
+ DOUT << "Replace with '" << *V << "': " << *I;
+
+ // Add uses to the worklist, which may be dead now.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
+ Worklist.push_back(Use);
+
+ // Add users to the worklist which may be simplified now.
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI)
+ Worklist.push_back(cast<Instruction>(*UI));
+ LPM->deleteSimpleAnalysisValue(I, L);
+ RemoveFromWorklist(I, Worklist);
+ I->replaceAllUsesWith(V);
+ I->eraseFromParent();
+ ++NumSimplify;
+}
+
+/// RemoveBlockIfDead - If the specified block is dead, remove it, update loop
+/// information, and remove any dead successors it has.
+///
+void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
+ std::vector<Instruction*> &Worklist,
+ Loop *L) {
+ if (pred_begin(BB) != pred_end(BB)) {
+ // This block isn't dead, since an edge to BB was just removed, see if there
+ // are any easy simplifications we can do now.
+ if (BasicBlock *Pred = BB->getSinglePredecessor()) {
+ // If it has one pred, fold phi nodes in BB.
+ while (isa<PHINode>(BB->begin()))
+ ReplaceUsesOfWith(BB->begin(),
+ cast<PHINode>(BB->begin())->getIncomingValue(0),
+ Worklist, L, LPM);
+
+ // If this is the header of a loop and the only pred is the latch, we now
+ // have an unreachable loop.
+ if (Loop *L = LI->getLoopFor(BB))
+ if (loopHeader == BB && L->contains(Pred)) {
+ // Remove the branch from the latch to the header block, this makes
+ // the header dead, which will make the latch dead (because the header
+ // dominates the latch).
+ LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L);
+ Pred->getTerminator()->eraseFromParent();
+ new UnreachableInst(Pred);
+
+ // The loop is now broken, remove it from LI.
+ RemoveLoopFromHierarchy(L);
+
+ // Reprocess the header, which now IS dead.
+ RemoveBlockIfDead(BB, Worklist, L);
+ return;
+ }
+
+ // If pred ends in a uncond branch, add uncond branch to worklist so that
+ // the two blocks will get merged.
+ if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
+ if (BI->isUnconditional())
+ Worklist.push_back(BI);
+ }
+ return;
+ }
+
+ DOUT << "Nuking dead block: " << *BB;
+
+ // Remove the instructions in the basic block from the worklist.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ RemoveFromWorklist(I, Worklist);
+
+ // Anything that uses the instructions in this basic block should have their
+ // uses replaced with undefs.
+ if (!I->use_empty())
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ }
+
+ // If this is the edge to the header block for a loop, remove the loop and
+ // promote all subloops.
+ if (Loop *BBLoop = LI->getLoopFor(BB)) {
+ if (BBLoop->getLoopLatch() == BB)
+ RemoveLoopFromHierarchy(BBLoop);
+ }
+
+ // Remove the block from the loop info, which removes it from any loops it
+ // was in.
+ LI->removeBlock(BB);
+
+
+ // Remove phi node entries in successors for this block.
+ TerminatorInst *TI = BB->getTerminator();
+ SmallVector<BasicBlock*, 4> Succs;
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+ Succs.push_back(TI->getSuccessor(i));
+ TI->getSuccessor(i)->removePredecessor(BB);
+ }
+
+ // Unique the successors, remove anything with multiple uses.
+ array_pod_sort(Succs.begin(), Succs.end());
+ Succs.erase(std::unique(Succs.begin(), Succs.end()), Succs.end());
+
+ // Remove the basic block, including all of the instructions contained in it.
+ LPM->deleteSimpleAnalysisValue(BB, L);
+ BB->eraseFromParent();
+ // Remove successor blocks here that are not dead, so that we know we only
+ // have dead blocks in this list. Nondead blocks have a way of becoming dead,
+ // then getting removed before we revisit them, which is badness.
+ //
+ for (unsigned i = 0; i != Succs.size(); ++i)
+ if (pred_begin(Succs[i]) != pred_end(Succs[i])) {
+ // One exception is loop headers. If this block was the preheader for a
+ // loop, then we DO want to visit the loop so the loop gets deleted.
+ // We know that if the successor is a loop header, that this loop had to
+ // be the preheader: the case where this was the latch block was handled
+ // above and headers can only have two predecessors.
+ if (!LI->isLoopHeader(Succs[i])) {
+ Succs.erase(Succs.begin()+i);
+ --i;
+ }
+ }
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+ RemoveBlockIfDead(Succs[i], Worklist, L);
+}
+
+/// RemoveLoopFromHierarchy - We have discovered that the specified loop has
+/// become unwrapped, either because the backedge was deleted, or because the
+/// edge into the header was removed. If the edge into the header from the
+/// latch block was removed, the loop is unwrapped but subloops are still alive,
+/// so they just reparent loops. If the loops are actually dead, they will be
+/// removed later.
+void LoopUnswitch::RemoveLoopFromHierarchy(Loop *L) {
+ LPM->deleteLoopFromQueue(L);
+ RemoveLoopFromWorklist(L);
+}
+
+// RewriteLoopBodyWithConditionConstant - We know either that the value LIC has
+// the value specified by Val in the specified loop, or we know it does NOT have
+// that value. Rewrite any uses of LIC or of properties correlated to it.
+void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
+ Constant *Val,
+ bool IsEqual) {
+ assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
+
+ // FIXME: Support correlated properties, like:
+ // for (...)
+ // if (li1 < li2)
+ // ...
+ // if (li1 > li2)
+ // ...
+
+ // FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches,
+ // selects, switches.
+ std::vector<User*> Users(LIC->use_begin(), LIC->use_end());
+ std::vector<Instruction*> Worklist;
+
+ // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC
+ // in the loop with the appropriate one directly.
+ if (IsEqual || (isa<ConstantInt>(Val) && Val->getType() == Type::Int1Ty)) {
+ Value *Replacement;
+ if (IsEqual)
+ Replacement = Val;
+ else
+ Replacement = ConstantInt::get(Type::Int1Ty,
+ !cast<ConstantInt>(Val)->getZExtValue());
+
+ for (unsigned i = 0, e = Users.size(); i != e; ++i)
+ if (Instruction *U = cast<Instruction>(Users[i])) {
+ if (!L->contains(U->getParent()))
+ continue;
+ U->replaceUsesOfWith(LIC, Replacement);
+ Worklist.push_back(U);
+ }
+ } else {
+ // Otherwise, we don't know the precise value of LIC, but we do know that it
+ // is certainly NOT "Val". As such, simplify any uses in the loop that we
+ // can. This case occurs when we unswitch switch statements.
+ for (unsigned i = 0, e = Users.size(); i != e; ++i)
+ if (Instruction *U = cast<Instruction>(Users[i])) {
+ if (!L->contains(U->getParent()))
+ continue;
+
+ Worklist.push_back(U);
+
+ // If we know that LIC is not Val, use this info to simplify code.
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(U)) {
+ for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) {
+ if (SI->getCaseValue(i) == Val) {
+ // Found a dead case value. Don't remove PHI nodes in the
+ // successor if they become single-entry, those PHI nodes may
+ // be in the Users list.
+
+ // FIXME: This is a hack. We need to keep the successor around
+ // and hooked up so as to preserve the loop structure, because
+ // trying to update it is complicated. So instead we preserve the
+ // loop structure and put the block on an dead code path.
+
+ BasicBlock *SISucc = SI->getSuccessor(i);
+ BasicBlock* Old = SI->getParent();
+ BasicBlock* Split = SplitBlock(Old, SI, this);
+
+ Instruction* OldTerm = Old->getTerminator();
+ BranchInst::Create(Split, SISucc,
+ ConstantInt::getTrue(), OldTerm);
+
+ LPM->deleteSimpleAnalysisValue(Old->getTerminator(), L);
+ Old->getTerminator()->eraseFromParent();
+
+ PHINode *PN;
+ for (BasicBlock::iterator II = SISucc->begin();
+ (PN = dyn_cast<PHINode>(II)); ++II) {
+ Value *InVal = PN->removeIncomingValue(Split, false);
+ PN->addIncoming(InVal, Old);
+ }
+
+ SI->removeCase(i);
+ break;
+ }
+ }
+ }
+
+ // TODO: We could do other simplifications, for example, turning
+ // LIC == Val -> false.
+ }
+ }
+
+ SimplifyCode(Worklist, L);
+}
+
+/// SimplifyCode - Okay, now that we have simplified some instructions in the
+/// loop, walk over it and constant prop, dce, and fold control flow where
+/// possible. Note that this is effectively a very simple loop-structure-aware
+/// optimizer. During processing of this loop, L could very well be deleted, so
+/// it must not be used.
+///
+/// FIXME: When the loop optimizer is more mature, separate this out to a new
+/// pass.
+///
+void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.back();
+ Worklist.pop_back();
+
+ // Simple constant folding.
+ if (Constant *C = ConstantFoldInstruction(I)) {
+ ReplaceUsesOfWith(I, C, Worklist, L, LPM);
+ continue;
+ }
+
+ // Simple DCE.
+ if (isInstructionTriviallyDead(I)) {
+ DOUT << "Remove dead instruction '" << *I;
+
+ // Add uses to the worklist, which may be dead now.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
+ Worklist.push_back(Use);
+ LPM->deleteSimpleAnalysisValue(I, L);
+ RemoveFromWorklist(I, Worklist);
+ I->eraseFromParent();
+ ++NumSimplify;
+ continue;
+ }
+
+ // Special case hacks that appear commonly in unswitched code.
+ switch (I->getOpcode()) {
+ case Instruction::Select:
+ if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(0))) {
+ ReplaceUsesOfWith(I, I->getOperand(!CB->getZExtValue()+1), Worklist, L,
+ LPM);
+ continue;
+ }
+ break;
+ case Instruction::And:
+ if (isa<ConstantInt>(I->getOperand(0)) &&
+ I->getOperand(0)->getType() == Type::Int1Ty) // constant -> RHS
+ cast<BinaryOperator>(I)->swapOperands();
+ if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1)))
+ if (CB->getType() == Type::Int1Ty) {
+ if (CB->isOne()) // X & 1 -> X
+ ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM);
+ else // X & 0 -> 0
+ ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM);
+ continue;
+ }
+ break;
+ case Instruction::Or:
+ if (isa<ConstantInt>(I->getOperand(0)) &&
+ I->getOperand(0)->getType() == Type::Int1Ty) // constant -> RHS
+ cast<BinaryOperator>(I)->swapOperands();
+ if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1)))
+ if (CB->getType() == Type::Int1Ty) {
+ if (CB->isOne()) // X | 1 -> 1
+ ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM);
+ else // X | 0 -> X
+ ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM);
+ continue;
+ }
+ break;
+ case Instruction::Br: {
+ BranchInst *BI = cast<BranchInst>(I);
+ if (BI->isUnconditional()) {
+ // If BI's parent is the only pred of the successor, fold the two blocks
+ // together.
+ BasicBlock *Pred = BI->getParent();
+ BasicBlock *Succ = BI->getSuccessor(0);
+ BasicBlock *SinglePred = Succ->getSinglePredecessor();
+ if (!SinglePred) continue; // Nothing to do.
+ assert(SinglePred == Pred && "CFG broken");
+
+ DOUT << "Merging blocks: " << Pred->getName() << " <- "
+ << Succ->getName() << "\n";
+
+ // Resolve any single entry PHI nodes in Succ.
+ while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
+ ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM);
+
+ // Move all of the successor contents from Succ to Pred.
+ Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(),
+ Succ->end());
+ LPM->deleteSimpleAnalysisValue(BI, L);
+ BI->eraseFromParent();
+ RemoveFromWorklist(BI, Worklist);
+
+ // If Succ has any successors with PHI nodes, update them to have
+ // entries coming from Pred instead of Succ.
+ Succ->replaceAllUsesWith(Pred);
+
+ // Remove Succ from the loop tree.
+ LI->removeBlock(Succ);
+ LPM->deleteSimpleAnalysisValue(Succ, L);
+ Succ->eraseFromParent();
+ ++NumSimplify;
+ } else if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){
+ // Conditional branch. Turn it into an unconditional branch, then
+ // remove dead blocks.
+ break; // FIXME: Enable.
+
+ DOUT << "Folded branch: " << *BI;
+ BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue());
+ BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue());
+ DeadSucc->removePredecessor(BI->getParent(), true);
+ Worklist.push_back(BranchInst::Create(LiveSucc, BI));
+ LPM->deleteSimpleAnalysisValue(BI, L);
+ BI->eraseFromParent();
+ RemoveFromWorklist(BI, Worklist);
+ ++NumSimplify;
+
+ RemoveBlockIfDead(DeadSucc, Worklist, L);
+ }
+ break;
+ }
+ }
+ }
+}
diff --git a/lib/Transforms/Scalar/Makefile b/lib/Transforms/Scalar/Makefile
new file mode 100644
index 0000000..cc42fd0
--- /dev/null
+++ b/lib/Transforms/Scalar/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Transforms/Scalar/Makefile ----------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMScalarOpts
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
new file mode 100644
index 0000000..5cf0518
--- /dev/null
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -0,0 +1,741 @@
+//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs various transformations related to eliminating memcpy
+// calls, or transforming sets of stores into memset's.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "memcpyopt"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetData.h"
+#include <list>
+using namespace llvm;
+
+STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
+STATISTIC(NumMemSetInfer, "Number of memsets inferred");
+
+/// isBytewiseValue - If the specified value can be set by repeating the same
+/// byte in memory, return the i8 value that it is represented with. This is
+/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
+/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated
+/// byte store (e.g. i16 0x1234), return null.
+static Value *isBytewiseValue(Value *V) {
+ // All byte-wide stores are splatable, even of arbitrary variables.
+ if (V->getType() == Type::Int8Ty) return V;
+
+ // Constant float and double values can be handled as integer values if the
+ // corresponding integer value is "byteable". An important case is 0.0.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+ if (CFP->getType() == Type::FloatTy)
+ V = ConstantExpr::getBitCast(CFP, Type::Int32Ty);
+ if (CFP->getType() == Type::DoubleTy)
+ V = ConstantExpr::getBitCast(CFP, Type::Int64Ty);
+ // Don't handle long double formats, which have strange constraints.
+ }
+
+ // We can handle constant integers that are power of two in size and a
+ // multiple of 8 bits.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ unsigned Width = CI->getBitWidth();
+ if (isPowerOf2_32(Width) && Width > 8) {
+ // We can handle this value if the recursive binary decomposition is the
+ // same at all levels.
+ APInt Val = CI->getValue();
+ APInt Val2;
+ while (Val.getBitWidth() != 8) {
+ unsigned NextWidth = Val.getBitWidth()/2;
+ Val2 = Val.lshr(NextWidth);
+ Val2.trunc(Val.getBitWidth()/2);
+ Val.trunc(Val.getBitWidth()/2);
+
+ // If the top/bottom halves aren't the same, reject it.
+ if (Val != Val2)
+ return 0;
+ }
+ return ConstantInt::get(Val);
+ }
+ }
+
+ // Conceptually, we could handle things like:
+ // %a = zext i8 %X to i16
+ // %b = shl i16 %a, 8
+ // %c = or i16 %a, %b
+ // but until there is an example that actually needs this, it doesn't seem
+ // worth worrying about.
+ return 0;
+}
+
+static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
+ bool &VariableIdxFound, TargetData &TD) {
+ // Skip over the first indices.
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (unsigned i = 1; i != Idx; ++i, ++GTI)
+ /*skip along*/;
+
+ // Compute the offset implied by the rest of the indices.
+ int64_t Offset = 0;
+ for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
+ ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (OpC == 0)
+ return VariableIdxFound = true;
+ if (OpC->isZero()) continue; // No offset.
+
+ // Handle struct indices, which add their field offset to the pointer.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+ continue;
+ }
+
+ // Otherwise, we have a sequential type like an array or vector. Multiply
+ // the index by the ElementSize.
+ uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+ Offset += Size*OpC->getSExtValue();
+ }
+
+ return Offset;
+}
+
+/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a
+/// constant offset, and return that constant offset. For example, Ptr1 might
+/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8.
+static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
+ TargetData &TD) {
+ // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
+ // base. After that base, they may have some number of common (and
+ // potentially variable) indices. After that they handle some constant
+ // offset, which determines their offset from each other. At this point, we
+ // handle no other case.
+ GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
+ GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
+ if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
+ return false;
+
+ // Skip any common indices and track the GEP types.
+ unsigned Idx = 1;
+ for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
+ if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
+ break;
+
+ bool VariableIdxFound = false;
+ int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD);
+ int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
+ if (VariableIdxFound) return false;
+
+ Offset = Offset2-Offset1;
+ return true;
+}
+
+
+/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value.
+/// This allows us to analyze stores like:
+/// store 0 -> P+1
+/// store 0 -> P+0
+/// store 0 -> P+3
+/// store 0 -> P+2
+/// which sometimes happens with stores to arrays of structs etc. When we see
+/// the first store, we make a range [1, 2). The second store extends the range
+/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
+/// two ranges into [0, 3) which is memset'able.
+namespace {
+struct MemsetRange {
+ // Start/End - A semi range that describes the span that this range covers.
+ // The range is closed at the start and open at the end: [Start, End).
+ int64_t Start, End;
+
+ /// StartPtr - The getelementptr instruction that points to the start of the
+ /// range.
+ Value *StartPtr;
+
+ /// Alignment - The known alignment of the first store.
+ unsigned Alignment;
+
+ /// TheStores - The actual stores that make up this range.
+ SmallVector<StoreInst*, 16> TheStores;
+
+ bool isProfitableToUseMemset(const TargetData &TD) const;
+
+};
+} // end anon namespace
+
+bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
+ // If we found more than 8 stores to merge or 64 bytes, use memset.
+ if (TheStores.size() >= 8 || End-Start >= 64) return true;
+
+ // Assume that the code generator is capable of merging pairs of stores
+ // together if it wants to.
+ if (TheStores.size() <= 2) return false;
+
+ // If we have fewer than 8 stores, it can still be worthwhile to do this.
+ // For example, merging 4 i8 stores into an i32 store is useful almost always.
+ // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
+ // memset will be split into 2 32-bit stores anyway) and doing so can
+ // pessimize the llvm optimizer.
+ //
+ // Since we don't have perfect knowledge here, make some assumptions: assume
+ // the maximum GPR width is the same size as the pointer size and assume that
+ // this width can be stored. If so, check to see whether we will end up
+ // actually reducing the number of stores used.
+ unsigned Bytes = unsigned(End-Start);
+ unsigned NumPointerStores = Bytes/TD.getPointerSize();
+
+ // Assume the remaining bytes if any are done a byte at a time.
+ unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize();
+
+ // If we will reduce the # stores (according to this heuristic), do the
+ // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
+ // etc.
+ return TheStores.size() > NumPointerStores+NumByteStores;
+}
+
+
+namespace {
+class MemsetRanges {
+ /// Ranges - A sorted list of the memset ranges. We use std::list here
+ /// because each element is relatively large and expensive to copy.
+ std::list<MemsetRange> Ranges;
+ typedef std::list<MemsetRange>::iterator range_iterator;
+ TargetData &TD;
+public:
+ MemsetRanges(TargetData &td) : TD(td) {}
+
+ typedef std::list<MemsetRange>::const_iterator const_iterator;
+ const_iterator begin() const { return Ranges.begin(); }
+ const_iterator end() const { return Ranges.end(); }
+ bool empty() const { return Ranges.empty(); }
+
+ void addStore(int64_t OffsetFromFirst, StoreInst *SI);
+};
+
+} // end anon namespace
+
+
+/// addStore - Add a new store to the MemsetRanges data structure. This adds a
+/// new range for the specified store at the specified offset, merging into
+/// existing ranges as appropriate.
+void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
+ int64_t End = Start+TD.getTypeStoreSize(SI->getOperand(0)->getType());
+
+ // Do a linear search of the ranges to see if this can be joined and/or to
+ // find the insertion point in the list. We keep the ranges sorted for
+ // simplicity here. This is a linear search of a linked list, which is ugly,
+ // however the number of ranges is limited, so this won't get crazy slow.
+ range_iterator I = Ranges.begin(), E = Ranges.end();
+
+ while (I != E && Start > I->End)
+ ++I;
+
+ // We now know that I == E, in which case we didn't find anything to merge
+ // with, or that Start <= I->End. If End < I->Start or I == E, then we need
+ // to insert a new range. Handle this now.
+ if (I == E || End < I->Start) {
+ MemsetRange &R = *Ranges.insert(I, MemsetRange());
+ R.Start = Start;
+ R.End = End;
+ R.StartPtr = SI->getPointerOperand();
+ R.Alignment = SI->getAlignment();
+ R.TheStores.push_back(SI);
+ return;
+ }
+
+ // This store overlaps with I, add it.
+ I->TheStores.push_back(SI);
+
+ // At this point, we may have an interval that completely contains our store.
+ // If so, just add it to the interval and return.
+ if (I->Start <= Start && I->End >= End)
+ return;
+
+ // Now we know that Start <= I->End and End >= I->Start so the range overlaps
+ // but is not entirely contained within the range.
+
+ // See if the range extends the start of the range. In this case, it couldn't
+ // possibly cause it to join the prior range, because otherwise we would have
+ // stopped on *it*.
+ if (Start < I->Start) {
+ I->Start = Start;
+ I->StartPtr = SI->getPointerOperand();
+ }
+
+ // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
+ // is in or right at the end of I), and that End >= I->Start. Extend I out to
+ // End.
+ if (End > I->End) {
+ I->End = End;
+ range_iterator NextI = I;
+ while (++NextI != E && End >= NextI->Start) {
+ // Merge the range in.
+ I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
+ if (NextI->End > I->End)
+ I->End = NextI->End;
+ Ranges.erase(NextI);
+ NextI = I;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// MemCpyOpt Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+ class VISIBILITY_HIDDEN MemCpyOpt : public FunctionPass {
+ bool runOnFunction(Function &F);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ MemCpyOpt() : FunctionPass(&ID) {}
+
+ private:
+ // This transformation requires dominator postdominator info
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<MemoryDependenceAnalysis>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetData>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<MemoryDependenceAnalysis>();
+ AU.addPreserved<TargetData>();
+ }
+
+ // Helper fuctions
+ bool processStore(StoreInst *SI, BasicBlock::iterator& BBI);
+ bool processMemCpy(MemCpyInst* M);
+ bool performCallSlotOptzn(MemCpyInst* cpy, CallInst* C);
+ bool iterateOnFunction(Function &F);
+ };
+
+ char MemCpyOpt::ID = 0;
+}
+
+// createMemCpyOptPass - The public interface to this file...
+FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
+
+static RegisterPass<MemCpyOpt> X("memcpyopt",
+ "MemCpy Optimization");
+
+
+
+/// processStore - When GVN is scanning forward over instructions, we look for
+/// some other patterns to fold away. In particular, this looks for stores to
+/// neighboring locations of memory. If it sees enough consequtive ones
+/// (currently 4) it attempts to merge them together into a memcpy/memset.
+bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {
+ if (SI->isVolatile()) return false;
+
+ // There are two cases that are interesting for this code to handle: memcpy
+ // and memset. Right now we only handle memset.
+
+ // Ensure that the value being stored is something that can be memset'able a
+ // byte at a time like "0" or "-1" or any width, as well as things like
+ // 0xA0A0A0A0 and 0.0.
+ Value *ByteVal = isBytewiseValue(SI->getOperand(0));
+ if (!ByteVal)
+ return false;
+
+ TargetData &TD = getAnalysis<TargetData>();
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+ // Okay, so we now have a single store that can be splatable. Scan to find
+ // all subsequent stores of the same value to offset from the same pointer.
+ // Join these together into ranges, so we can decide whether contiguous blocks
+ // are stored.
+ MemsetRanges Ranges(TD);
+
+ Value *StartPtr = SI->getPointerOperand();
+
+ BasicBlock::iterator BI = SI;
+ for (++BI; !isa<TerminatorInst>(BI); ++BI) {
+ if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) {
+ // If the call is readnone, ignore it, otherwise bail out. We don't even
+ // allow readonly here because we don't want something like:
+ // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
+ if (AA.getModRefBehavior(CallSite::get(BI)) ==
+ AliasAnalysis::DoesNotAccessMemory)
+ continue;
+
+ // TODO: If this is a memset, try to join it in.
+
+ break;
+ } else if (isa<VAArgInst>(BI) || isa<LoadInst>(BI))
+ break;
+
+ // If this is a non-store instruction it is fine, ignore it.
+ StoreInst *NextStore = dyn_cast<StoreInst>(BI);
+ if (NextStore == 0) continue;
+
+ // If this is a store, see if we can merge it in.
+ if (NextStore->isVolatile()) break;
+
+ // Check to see if this stored value is of the same byte-splattable value.
+ if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
+ break;
+
+ // Check to see if this store is to a constant offset from the start ptr.
+ int64_t Offset;
+ if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, TD))
+ break;
+
+ Ranges.addStore(Offset, NextStore);
+ }
+
+ // If we have no ranges, then we just had a single store with nothing that
+ // could be merged in. This is a very common case of course.
+ if (Ranges.empty())
+ return false;
+
+ // If we had at least one store that could be merged in, add the starting
+ // store as well. We try to avoid this unless there is at least something
+ // interesting as a small compile-time optimization.
+ Ranges.addStore(0, SI);
+
+
+ Function *MemSetF = 0;
+
+ // Now that we have full information about ranges, loop over the ranges and
+ // emit memset's for anything big enough to be worthwhile.
+ bool MadeChange = false;
+ for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
+ I != E; ++I) {
+ const MemsetRange &Range = *I;
+
+ if (Range.TheStores.size() == 1) continue;
+
+ // If it is profitable to lower this range to memset, do so now.
+ if (!Range.isProfitableToUseMemset(TD))
+ continue;
+
+ // Otherwise, we do want to transform this! Create a new memset. We put
+ // the memset right before the first instruction that isn't part of this
+ // memset block. This ensure that the memset is dominated by any addressing
+ // instruction needed by the start of the block.
+ BasicBlock::iterator InsertPt = BI;
+
+ if (MemSetF == 0) {
+ const Type *Tys[] = {Type::Int64Ty};
+ MemSetF = Intrinsic::getDeclaration(SI->getParent()->getParent()
+ ->getParent(), Intrinsic::memset,
+ Tys, 1);
+ }
+
+ // Get the starting pointer of the block.
+ StartPtr = Range.StartPtr;
+
+ // Cast the start ptr to be i8* as memset requires.
+ const Type *i8Ptr = PointerType::getUnqual(Type::Int8Ty);
+ if (StartPtr->getType() != i8Ptr)
+ StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(),
+ InsertPt);
+
+ Value *Ops[] = {
+ StartPtr, ByteVal, // Start, value
+ ConstantInt::get(Type::Int64Ty, Range.End-Range.Start), // size
+ ConstantInt::get(Type::Int32Ty, Range.Alignment) // align
+ };
+ Value *C = CallInst::Create(MemSetF, Ops, Ops+4, "", InsertPt);
+ DEBUG(cerr << "Replace stores:\n";
+ for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
+ cerr << *Range.TheStores[i];
+ cerr << "With: " << *C); C=C;
+
+ // Don't invalidate the iterator
+ BBI = BI;
+
+ // Zap all the stores.
+ for (SmallVector<StoreInst*, 16>::const_iterator SI = Range.TheStores.begin(),
+ SE = Range.TheStores.end(); SI != SE; ++SI)
+ (*SI)->eraseFromParent();
+ ++NumMemSetInfer;
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+
+/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
+/// and checks for the possibility of a call slot optimization by having
+/// the call write its result directly into the destination of the memcpy.
+bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
+ // The general transformation to keep in mind is
+ //
+ // call @func(..., src, ...)
+ // memcpy(dest, src, ...)
+ //
+ // ->
+ //
+ // memcpy(dest, src, ...)
+ // call @func(..., dest, ...)
+ //
+ // Since moving the memcpy is technically awkward, we additionally check that
+ // src only holds uninitialized values at the moment of the call, meaning that
+ // the memcpy can be discarded rather than moved.
+
+ // Deliberately get the source and destination with bitcasts stripped away,
+ // because we'll need to do type comparisons based on the underlying type.
+ Value* cpyDest = cpy->getDest();
+ Value* cpySrc = cpy->getSource();
+ CallSite CS = CallSite::get(C);
+
+ // We need to be able to reason about the size of the memcpy, so we require
+ // that it be a constant.
+ ConstantInt* cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
+ if (!cpyLength)
+ return false;
+
+ // Require that src be an alloca. This simplifies the reasoning considerably.
+ AllocaInst* srcAlloca = dyn_cast<AllocaInst>(cpySrc);
+ if (!srcAlloca)
+ return false;
+
+ // Check that all of src is copied to dest.
+ TargetData& TD = getAnalysis<TargetData>();
+
+ ConstantInt* srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
+ if (!srcArraySize)
+ return false;
+
+ uint64_t srcSize = TD.getTypeAllocSize(srcAlloca->getAllocatedType()) *
+ srcArraySize->getZExtValue();
+
+ if (cpyLength->getZExtValue() < srcSize)
+ return false;
+
+ // Check that accessing the first srcSize bytes of dest will not cause a
+ // trap. Otherwise the transform is invalid since it might cause a trap
+ // to occur earlier than it otherwise would.
+ if (AllocaInst* A = dyn_cast<AllocaInst>(cpyDest)) {
+ // The destination is an alloca. Check it is larger than srcSize.
+ ConstantInt* destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
+ if (!destArraySize)
+ return false;
+
+ uint64_t destSize = TD.getTypeAllocSize(A->getAllocatedType()) *
+ destArraySize->getZExtValue();
+
+ if (destSize < srcSize)
+ return false;
+ } else if (Argument* A = dyn_cast<Argument>(cpyDest)) {
+ // If the destination is an sret parameter then only accesses that are
+ // outside of the returned struct type can trap.
+ if (!A->hasStructRetAttr())
+ return false;
+
+ const Type* StructTy = cast<PointerType>(A->getType())->getElementType();
+ uint64_t destSize = TD.getTypeAllocSize(StructTy);
+
+ if (destSize < srcSize)
+ return false;
+ } else {
+ return false;
+ }
+
+ // Check that src is not accessed except via the call and the memcpy. This
+ // guarantees that it holds only undefined values when passed in (so the final
+ // memcpy can be dropped), that it is not read or written between the call and
+ // the memcpy, and that writing beyond the end of it is undefined.
+ SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(),
+ srcAlloca->use_end());
+ while (!srcUseList.empty()) {
+ User* UI = srcUseList.back();
+ srcUseList.pop_back();
+
+ if (isa<BitCastInst>(UI)) {
+ for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
+ I != E; ++I)
+ srcUseList.push_back(*I);
+ } else if (GetElementPtrInst* G = dyn_cast<GetElementPtrInst>(UI)) {
+ if (G->hasAllZeroIndices())
+ for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
+ I != E; ++I)
+ srcUseList.push_back(*I);
+ else
+ return false;
+ } else if (UI != C && UI != cpy) {
+ return false;
+ }
+ }
+
+ // Since we're changing the parameter to the callsite, we need to make sure
+ // that what would be the new parameter dominates the callsite.
+ DominatorTree& DT = getAnalysis<DominatorTree>();
+ if (Instruction* cpyDestInst = dyn_cast<Instruction>(cpyDest))
+ if (!DT.dominates(cpyDestInst, C))
+ return false;
+
+ // In addition to knowing that the call does not access src in some
+ // unexpected manner, for example via a global, which we deduce from
+ // the use analysis, we also need to know that it does not sneakily
+ // access dest. We rely on AA to figure this out for us.
+ AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
+ if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) !=
+ AliasAnalysis::NoModRef)
+ return false;
+
+ // All the checks have passed, so do the transformation.
+ bool changedArgument = false;
+ for (unsigned i = 0; i < CS.arg_size(); ++i)
+ if (CS.getArgument(i)->stripPointerCasts() == cpySrc) {
+ if (cpySrc->getType() != cpyDest->getType())
+ cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
+ cpyDest->getName(), C);
+ changedArgument = true;
+ if (CS.getArgument(i)->getType() != cpyDest->getType())
+ CS.setArgument(i, CastInst::CreatePointerCast(cpyDest,
+ CS.getArgument(i)->getType(), cpyDest->getName(), C));
+ else
+ CS.setArgument(i, cpyDest);
+ }
+
+ if (!changedArgument)
+ return false;
+
+ // Drop any cached information about the call, because we may have changed
+ // its dependence information by changing its parameter.
+ MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+ MD.removeInstruction(C);
+
+ // Remove the memcpy
+ MD.removeInstruction(cpy);
+ cpy->eraseFromParent();
+ NumMemCpyInstr++;
+
+ return true;
+}
+
+/// processMemCpy - perform simplication of memcpy's. If we have memcpy A which
+/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
+/// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
+/// This allows later passes to remove the first memcpy altogether.
+bool MemCpyOpt::processMemCpy(MemCpyInst* M) {
+ MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+
+ // The are two possible optimizations we can do for memcpy:
+ // a) memcpy-memcpy xform which exposes redundance for DSE
+ // b) call-memcpy xform for return slot optimization
+ MemDepResult dep = MD.getDependency(M);
+ if (!dep.isClobber())
+ return false;
+ if (!isa<MemCpyInst>(dep.getInst())) {
+ if (CallInst* C = dyn_cast<CallInst>(dep.getInst()))
+ return performCallSlotOptzn(M, C);
+ return false;
+ }
+
+ MemCpyInst* MDep = cast<MemCpyInst>(dep.getInst());
+
+ // We can only transforms memcpy's where the dest of one is the source of the
+ // other
+ if (M->getSource() != MDep->getDest())
+ return false;
+
+ // Second, the length of the memcpy's must be the same, or the preceeding one
+ // must be larger than the following one.
+ ConstantInt* C1 = dyn_cast<ConstantInt>(MDep->getLength());
+ ConstantInt* C2 = dyn_cast<ConstantInt>(M->getLength());
+ if (!C1 || !C2)
+ return false;
+
+ uint64_t DepSize = C1->getValue().getZExtValue();
+ uint64_t CpySize = C2->getValue().getZExtValue();
+
+ if (DepSize < CpySize)
+ return false;
+
+ // Finally, we have to make sure that the dest of the second does not
+ // alias the source of the first
+ AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
+ if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) !=
+ AliasAnalysis::NoAlias)
+ return false;
+ else if (AA.alias(M->getRawDest(), CpySize, M->getRawSource(), CpySize) !=
+ AliasAnalysis::NoAlias)
+ return false;
+ else if (AA.alias(MDep->getRawDest(), DepSize, MDep->getRawSource(), DepSize)
+ != AliasAnalysis::NoAlias)
+ return false;
+
+ // If all checks passed, then we can transform these memcpy's
+ const Type *Tys[1];
+ Tys[0] = M->getLength()->getType();
+ Function* MemCpyFun = Intrinsic::getDeclaration(
+ M->getParent()->getParent()->getParent(),
+ M->getIntrinsicID(), Tys, 1);
+
+ Value *Args[4] = {
+ M->getRawDest(), MDep->getRawSource(), M->getLength(), M->getAlignmentCst()
+ };
+
+ CallInst* C = CallInst::Create(MemCpyFun, Args, Args+4, "", M);
+
+
+ // If C and M don't interfere, then this is a valid transformation. If they
+ // did, this would mean that the two sources overlap, which would be bad.
+ if (MD.getDependency(C) == dep) {
+ MD.removeInstruction(M);
+ M->eraseFromParent();
+ NumMemCpyInstr++;
+ return true;
+ }
+
+ // Otherwise, there was no point in doing this, so we remove the call we
+ // inserted and act like nothing happened.
+ MD.removeInstruction(C);
+ C->eraseFromParent();
+ return false;
+}
+
+// MemCpyOpt::runOnFunction - This is the main transformation entry point for a
+// function.
+//
+bool MemCpyOpt::runOnFunction(Function& F) {
+
+ bool changed = false;
+ bool shouldContinue = true;
+
+ while (shouldContinue) {
+ shouldContinue = iterateOnFunction(F);
+ changed |= shouldContinue;
+ }
+
+ return changed;
+}
+
+
+// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN
+bool MemCpyOpt::iterateOnFunction(Function &F) {
+ bool changed_function = false;
+
+ // Walk all instruction in the function
+ for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+ BI != BE;) {
+ // Avoid invalidating the iterator
+ Instruction* I = BI++;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ changed_function |= processStore(SI, BI);
+ else if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) {
+ changed_function |= processMemCpy(M);
+ }
+ }
+ }
+
+ return changed_function;
+}
diff --git a/lib/Transforms/Scalar/PredicateSimplifier.cpp b/lib/Transforms/Scalar/PredicateSimplifier.cpp
new file mode 100644
index 0000000..a7e4d6e
--- /dev/null
+++ b/lib/Transforms/Scalar/PredicateSimplifier.cpp
@@ -0,0 +1,2725 @@
+//===-- PredicateSimplifier.cpp - Path Sensitive Simplifier ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Path-sensitive optimizer. In a branch where x == y, replace uses of
+// x with y. Permits further optimization, such as the elimination of
+// the unreachable call:
+//
+// void test(int *p, int *q)
+// {
+// if (p != q)
+// return;
+//
+// if (*p != *q)
+// foo(); // unreachable
+// }
+//
+//===----------------------------------------------------------------------===//
+//
+// The InequalityGraph focusses on four properties; equals, not equals,
+// less-than and less-than-or-equals-to. The greater-than forms are also held
+// just to allow walking from a lesser node to a greater one. These properties
+// are stored in a lattice; LE can become LT or EQ, NE can become LT or GT.
+//
+// These relationships define a graph between values of the same type. Each
+// Value is stored in a map table that retrieves the associated Node. This
+// is how EQ relationships are stored; the map contains pointers from equal
+// Value to the same node. The node contains a most canonical Value* form
+// and the list of known relationships with other nodes.
+//
+// If two nodes are known to be inequal, then they will contain pointers to
+// each other with an "NE" relationship. If node getNode(%x) is less than
+// getNode(%y), then the %x node will contain <%y, GT> and %y will contain
+// <%x, LT>. This allows us to tie nodes together into a graph like this:
+//
+// %a < %b < %c < %d
+//
+// with four nodes representing the properties. The InequalityGraph provides
+// querying with "isRelatedBy" and mutators "addEquality" and "addInequality".
+// To find a relationship, we start with one of the nodes any binary search
+// through its list to find where the relationships with the second node start.
+// Then we iterate through those to find the first relationship that dominates
+// our context node.
+//
+// To create these properties, we wait until a branch or switch instruction
+// implies that a particular value is true (or false). The VRPSolver is
+// responsible for analyzing the variable and seeing what new inferences
+// can be made from each property. For example:
+//
+// %P = icmp ne i32* %ptr, null
+// %a = and i1 %P, %Q
+// br i1 %a label %cond_true, label %cond_false
+//
+// For the true branch, the VRPSolver will start with %a EQ true and look at
+// the definition of %a and find that it can infer that %P and %Q are both
+// true. From %P being true, it can infer that %ptr NE null. For the false
+// branch it can't infer anything from the "and" instruction.
+//
+// Besides branches, we can also infer properties from instruction that may
+// have undefined behaviour in certain cases. For example, the dividend of
+// a division may never be zero. After the division instruction, we may assume
+// that the dividend is not equal to zero.
+//
+//===----------------------------------------------------------------------===//
+//
+// The ValueRanges class stores the known integer bounds of a Value. When we
+// encounter i8 %a u< %b, the ValueRanges stores that %a = [1, 255] and
+// %b = [0, 254].
+//
+// It never stores an empty range, because that means that the code is
+// unreachable. It never stores a single-element range since that's an equality
+// relationship and better stored in the InequalityGraph, nor an empty range
+// since that is better stored in UnreachableBlocks.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "predsimplify"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+#include <deque>
+#include <stack>
+using namespace llvm;
+
+STATISTIC(NumVarsReplaced, "Number of argument substitutions");
+STATISTIC(NumInstruction , "Number of instructions removed");
+STATISTIC(NumSimple , "Number of simple replacements");
+STATISTIC(NumBlocks , "Number of blocks marked unreachable");
+STATISTIC(NumSnuggle , "Number of comparisons snuggled");
+
+namespace {
+ class DomTreeDFS {
+ public:
+ class Node {
+ friend class DomTreeDFS;
+ public:
+ typedef std::vector<Node *>::iterator iterator;
+ typedef std::vector<Node *>::const_iterator const_iterator;
+
+ unsigned getDFSNumIn() const { return DFSin; }
+ unsigned getDFSNumOut() const { return DFSout; }
+
+ BasicBlock *getBlock() const { return BB; }
+
+ iterator begin() { return Children.begin(); }
+ iterator end() { return Children.end(); }
+
+ const_iterator begin() const { return Children.begin(); }
+ const_iterator end() const { return Children.end(); }
+
+ bool dominates(const Node *N) const {
+ return DFSin <= N->DFSin && DFSout >= N->DFSout;
+ }
+
+ bool DominatedBy(const Node *N) const {
+ return N->dominates(this);
+ }
+
+ /// Sorts by the number of descendants. With this, you can iterate
+ /// through a sorted list and the first matching entry is the most
+ /// specific match for your basic block. The order provided is stable;
+ /// DomTreeDFS::Nodes with the same number of descendants are sorted by
+ /// DFS in number.
+ bool operator<(const Node &N) const {
+ unsigned spread = DFSout - DFSin;
+ unsigned N_spread = N.DFSout - N.DFSin;
+ if (spread == N_spread) return DFSin < N.DFSin;
+ return spread < N_spread;
+ }
+ bool operator>(const Node &N) const { return N < *this; }
+
+ private:
+ unsigned DFSin, DFSout;
+ BasicBlock *BB;
+
+ std::vector<Node *> Children;
+ };
+
+ // XXX: this may be slow. Instead of using "new" for each node, consider
+ // putting them in a vector to keep them contiguous.
+ explicit DomTreeDFS(DominatorTree *DT) {
+ std::stack<std::pair<Node *, DomTreeNode *> > S;
+
+ Entry = new Node;
+ Entry->BB = DT->getRootNode()->getBlock();
+ S.push(std::make_pair(Entry, DT->getRootNode()));
+
+ NodeMap[Entry->BB] = Entry;
+
+ while (!S.empty()) {
+ std::pair<Node *, DomTreeNode *> &Pair = S.top();
+ Node *N = Pair.first;
+ DomTreeNode *DTNode = Pair.second;
+ S.pop();
+
+ for (DomTreeNode::iterator I = DTNode->begin(), E = DTNode->end();
+ I != E; ++I) {
+ Node *NewNode = new Node;
+ NewNode->BB = (*I)->getBlock();
+ N->Children.push_back(NewNode);
+ S.push(std::make_pair(NewNode, *I));
+
+ NodeMap[NewNode->BB] = NewNode;
+ }
+ }
+
+ renumber();
+
+#ifndef NDEBUG
+ DEBUG(dump());
+#endif
+ }
+
+#ifndef NDEBUG
+ virtual
+#endif
+ ~DomTreeDFS() {
+ std::stack<Node *> S;
+
+ S.push(Entry);
+ while (!S.empty()) {
+ Node *N = S.top(); S.pop();
+
+ for (Node::iterator I = N->begin(), E = N->end(); I != E; ++I)
+ S.push(*I);
+
+ delete N;
+ }
+ }
+
+ /// getRootNode - This returns the entry node for the CFG of the function.
+ Node *getRootNode() const { return Entry; }
+
+ /// getNodeForBlock - return the node for the specified basic block.
+ Node *getNodeForBlock(BasicBlock *BB) const {
+ if (!NodeMap.count(BB)) return 0;
+ return const_cast<DomTreeDFS*>(this)->NodeMap[BB];
+ }
+
+ /// dominates - returns true if the basic block for I1 dominates that of
+ /// the basic block for I2. If the instructions belong to the same basic
+ /// block, the instruction first instruction sequentially in the block is
+ /// considered dominating.
+ bool dominates(Instruction *I1, Instruction *I2) {
+ BasicBlock *BB1 = I1->getParent(),
+ *BB2 = I2->getParent();
+ if (BB1 == BB2) {
+ if (isa<TerminatorInst>(I1)) return false;
+ if (isa<TerminatorInst>(I2)) return true;
+ if ( isa<PHINode>(I1) && !isa<PHINode>(I2)) return true;
+ if (!isa<PHINode>(I1) && isa<PHINode>(I2)) return false;
+
+ for (BasicBlock::const_iterator I = BB2->begin(), E = BB2->end();
+ I != E; ++I) {
+ if (&*I == I1) return true;
+ else if (&*I == I2) return false;
+ }
+ assert(!"Instructions not found in parent BasicBlock?");
+ } else {
+ Node *Node1 = getNodeForBlock(BB1),
+ *Node2 = getNodeForBlock(BB2);
+ return Node1 && Node2 && Node1->dominates(Node2);
+ }
+ return false; // Not reached
+ }
+
+ private:
+ /// renumber - calculates the depth first search numberings and applies
+ /// them onto the nodes.
+ void renumber() {
+ std::stack<std::pair<Node *, Node::iterator> > S;
+ unsigned n = 0;
+
+ Entry->DFSin = ++n;
+ S.push(std::make_pair(Entry, Entry->begin()));
+
+ while (!S.empty()) {
+ std::pair<Node *, Node::iterator> &Pair = S.top();
+ Node *N = Pair.first;
+ Node::iterator &I = Pair.second;
+
+ if (I == N->end()) {
+ N->DFSout = ++n;
+ S.pop();
+ } else {
+ Node *Next = *I++;
+ Next->DFSin = ++n;
+ S.push(std::make_pair(Next, Next->begin()));
+ }
+ }
+ }
+
+#ifndef NDEBUG
+ virtual void dump() const {
+ dump(*cerr.stream());
+ }
+
+ void dump(std::ostream &os) const {
+ os << "Predicate simplifier DomTreeDFS: \n";
+ dump(Entry, 0, os);
+ os << "\n\n";
+ }
+
+ void dump(Node *N, int depth, std::ostream &os) const {
+ ++depth;
+ for (int i = 0; i < depth; ++i) { os << " "; }
+ os << "[" << depth << "] ";
+
+ os << N->getBlock()->getName() << " (" << N->getDFSNumIn()
+ << ", " << N->getDFSNumOut() << ")\n";
+
+ for (Node::iterator I = N->begin(), E = N->end(); I != E; ++I)
+ dump(*I, depth, os);
+ }
+#endif
+
+ Node *Entry;
+ std::map<BasicBlock *, Node *> NodeMap;
+ };
+
+ // SLT SGT ULT UGT EQ
+ // 0 1 0 1 0 -- GT 10
+ // 0 1 0 1 1 -- GE 11
+ // 0 1 1 0 0 -- SGTULT 12
+ // 0 1 1 0 1 -- SGEULE 13
+ // 0 1 1 1 0 -- SGT 14
+ // 0 1 1 1 1 -- SGE 15
+ // 1 0 0 1 0 -- SLTUGT 18
+ // 1 0 0 1 1 -- SLEUGE 19
+ // 1 0 1 0 0 -- LT 20
+ // 1 0 1 0 1 -- LE 21
+ // 1 0 1 1 0 -- SLT 22
+ // 1 0 1 1 1 -- SLE 23
+ // 1 1 0 1 0 -- UGT 26
+ // 1 1 0 1 1 -- UGE 27
+ // 1 1 1 0 0 -- ULT 28
+ // 1 1 1 0 1 -- ULE 29
+ // 1 1 1 1 0 -- NE 30
+ enum LatticeBits {
+ EQ_BIT = 1, UGT_BIT = 2, ULT_BIT = 4, SGT_BIT = 8, SLT_BIT = 16
+ };
+ enum LatticeVal {
+ GT = SGT_BIT | UGT_BIT,
+ GE = GT | EQ_BIT,
+ LT = SLT_BIT | ULT_BIT,
+ LE = LT | EQ_BIT,
+ NE = SLT_BIT | SGT_BIT | ULT_BIT | UGT_BIT,
+ SGTULT = SGT_BIT | ULT_BIT,
+ SGEULE = SGTULT | EQ_BIT,
+ SLTUGT = SLT_BIT | UGT_BIT,
+ SLEUGE = SLTUGT | EQ_BIT,
+ ULT = SLT_BIT | SGT_BIT | ULT_BIT,
+ UGT = SLT_BIT | SGT_BIT | UGT_BIT,
+ SLT = SLT_BIT | ULT_BIT | UGT_BIT,
+ SGT = SGT_BIT | ULT_BIT | UGT_BIT,
+ SLE = SLT | EQ_BIT,
+ SGE = SGT | EQ_BIT,
+ ULE = ULT | EQ_BIT,
+ UGE = UGT | EQ_BIT
+ };
+
+#ifndef NDEBUG
+ /// validPredicate - determines whether a given value is actually a lattice
+ /// value. Only used in assertions or debugging.
+ static bool validPredicate(LatticeVal LV) {
+ switch (LV) {
+ case GT: case GE: case LT: case LE: case NE:
+ case SGTULT: case SGT: case SGEULE:
+ case SLTUGT: case SLT: case SLEUGE:
+ case ULT: case UGT:
+ case SLE: case SGE: case ULE: case UGE:
+ return true;
+ default:
+ return false;
+ }
+ }
+#endif
+
+ /// reversePredicate - reverse the direction of the inequality
+ static LatticeVal reversePredicate(LatticeVal LV) {
+ unsigned reverse = LV ^ (SLT_BIT|SGT_BIT|ULT_BIT|UGT_BIT); //preserve EQ_BIT
+
+ if ((reverse & (SLT_BIT|SGT_BIT)) == 0)
+ reverse |= (SLT_BIT|SGT_BIT);
+
+ if ((reverse & (ULT_BIT|UGT_BIT)) == 0)
+ reverse |= (ULT_BIT|UGT_BIT);
+
+ LatticeVal Rev = static_cast<LatticeVal>(reverse);
+ assert(validPredicate(Rev) && "Failed reversing predicate.");
+ return Rev;
+ }
+
+ /// ValueNumbering stores the scope-specific value numbers for a given Value.
+ class VISIBILITY_HIDDEN ValueNumbering {
+
+ /// VNPair is a tuple of {Value, index number, DomTreeDFS::Node}. It
+ /// includes the comparison operators necessary to allow you to store it
+ /// in a sorted vector.
+ class VISIBILITY_HIDDEN VNPair {
+ public:
+ Value *V;
+ unsigned index;
+ DomTreeDFS::Node *Subtree;
+
+ VNPair(Value *V, unsigned index, DomTreeDFS::Node *Subtree)
+ : V(V), index(index), Subtree(Subtree) {}
+
+ bool operator==(const VNPair &RHS) const {
+ return V == RHS.V && Subtree == RHS.Subtree;
+ }
+
+ bool operator<(const VNPair &RHS) const {
+ if (V != RHS.V) return V < RHS.V;
+ return *Subtree < *RHS.Subtree;
+ }
+
+ bool operator<(Value *RHS) const {
+ return V < RHS;
+ }
+
+ bool operator>(Value *RHS) const {
+ return V > RHS;
+ }
+
+ friend bool operator<(Value *RHS, const VNPair &pair) {
+ return pair.operator>(RHS);
+ }
+ };
+
+ typedef std::vector<VNPair> VNMapType;
+ VNMapType VNMap;
+
+ /// The canonical choice for value number at index.
+ std::vector<Value *> Values;
+
+ DomTreeDFS *DTDFS;
+
+ public:
+#ifndef NDEBUG
+ virtual ~ValueNumbering() {}
+ virtual void dump() {
+ dump(*cerr.stream());
+ }
+
+ void dump(std::ostream &os) {
+ for (unsigned i = 1; i <= Values.size(); ++i) {
+ os << i << " = ";
+ WriteAsOperand(os, Values[i-1]);
+ os << " {";
+ for (unsigned j = 0; j < VNMap.size(); ++j) {
+ if (VNMap[j].index == i) {
+ WriteAsOperand(os, VNMap[j].V);
+ os << " (" << VNMap[j].Subtree->getDFSNumIn() << ") ";
+ }
+ }
+ os << "}\n";
+ }
+ }
+#endif
+
+ /// compare - returns true if V1 is a better canonical value than V2.
+ bool compare(Value *V1, Value *V2) const {
+ if (isa<Constant>(V1))
+ return !isa<Constant>(V2);
+ else if (isa<Constant>(V2))
+ return false;
+ else if (isa<Argument>(V1))
+ return !isa<Argument>(V2);
+ else if (isa<Argument>(V2))
+ return false;
+
+ Instruction *I1 = dyn_cast<Instruction>(V1);
+ Instruction *I2 = dyn_cast<Instruction>(V2);
+
+ if (!I1 || !I2)
+ return V1->getNumUses() < V2->getNumUses();
+
+ return DTDFS->dominates(I1, I2);
+ }
+
+ ValueNumbering(DomTreeDFS *DTDFS) : DTDFS(DTDFS) {}
+
+ /// valueNumber - finds the value number for V under the Subtree. If
+ /// there is no value number, returns zero.
+ unsigned valueNumber(Value *V, DomTreeDFS::Node *Subtree) {
+ if (!(isa<Constant>(V) || isa<Argument>(V) || isa<Instruction>(V))
+ || V->getType() == Type::VoidTy) return 0;
+
+ VNMapType::iterator E = VNMap.end();
+ VNPair pair(V, 0, Subtree);
+ VNMapType::iterator I = std::lower_bound(VNMap.begin(), E, pair);
+ while (I != E && I->V == V) {
+ if (I->Subtree->dominates(Subtree))
+ return I->index;
+ ++I;
+ }
+ return 0;
+ }
+
+ /// getOrInsertVN - always returns a value number, creating it if necessary.
+ unsigned getOrInsertVN(Value *V, DomTreeDFS::Node *Subtree) {
+ if (unsigned n = valueNumber(V, Subtree))
+ return n;
+ else
+ return newVN(V);
+ }
+
+ /// newVN - creates a new value number. Value V must not already have a
+ /// value number assigned.
+ unsigned newVN(Value *V) {
+ assert((isa<Constant>(V) || isa<Argument>(V) || isa<Instruction>(V)) &&
+ "Bad Value for value numbering.");
+ assert(V->getType() != Type::VoidTy && "Won't value number a void value");
+
+ Values.push_back(V);
+
+ VNPair pair = VNPair(V, Values.size(), DTDFS->getRootNode());
+ VNMapType::iterator I = std::lower_bound(VNMap.begin(), VNMap.end(), pair);
+ assert((I == VNMap.end() || value(I->index) != V) &&
+ "Attempt to create a duplicate value number.");
+ VNMap.insert(I, pair);
+
+ return Values.size();
+ }
+
+ /// value - returns the Value associated with a value number.
+ Value *value(unsigned index) const {
+ assert(index != 0 && "Zero index is reserved for not found.");
+ assert(index <= Values.size() && "Index out of range.");
+ return Values[index-1];
+ }
+
+ /// canonicalize - return a Value that is equal to V under Subtree.
+ Value *canonicalize(Value *V, DomTreeDFS::Node *Subtree) {
+ if (isa<Constant>(V)) return V;
+
+ if (unsigned n = valueNumber(V, Subtree))
+ return value(n);
+ else
+ return V;
+ }
+
+ /// addEquality - adds that value V belongs to the set of equivalent
+ /// values defined by value number n under Subtree.
+ void addEquality(unsigned n, Value *V, DomTreeDFS::Node *Subtree) {
+ assert(canonicalize(value(n), Subtree) == value(n) &&
+ "Node's 'canonical' choice isn't best within this subtree.");
+
+ // Suppose that we are given "%x -> node #1 (%y)". The problem is that
+ // we may already have "%z -> node #2 (%x)" somewhere above us in the
+ // graph. We need to find those edges and add "%z -> node #1 (%y)"
+ // to keep the lookups canonical.
+
+ std::vector<Value *> ToRepoint(1, V);
+
+ if (unsigned Conflict = valueNumber(V, Subtree)) {
+ for (VNMapType::iterator I = VNMap.begin(), E = VNMap.end();
+ I != E; ++I) {
+ if (I->index == Conflict && I->Subtree->dominates(Subtree))
+ ToRepoint.push_back(I->V);
+ }
+ }
+
+ for (std::vector<Value *>::iterator VI = ToRepoint.begin(),
+ VE = ToRepoint.end(); VI != VE; ++VI) {
+ Value *V = *VI;
+
+ VNPair pair(V, n, Subtree);
+ VNMapType::iterator B = VNMap.begin(), E = VNMap.end();
+ VNMapType::iterator I = std::lower_bound(B, E, pair);
+ if (I != E && I->V == V && I->Subtree == Subtree)
+ I->index = n; // Update best choice
+ else
+ VNMap.insert(I, pair); // New Value
+
+ // XXX: we currently don't have to worry about updating values with
+ // more specific Subtrees, but we will need to for PHI node support.
+
+#ifndef NDEBUG
+ Value *V_n = value(n);
+ if (isa<Constant>(V) && isa<Constant>(V_n)) {
+ assert(V == V_n && "Constant equals different constant?");
+ }
+#endif
+ }
+ }
+
+ /// remove - removes all references to value V.
+ void remove(Value *V) {
+ VNMapType::iterator B = VNMap.begin(), E = VNMap.end();
+ VNPair pair(V, 0, DTDFS->getRootNode());
+ VNMapType::iterator J = std::upper_bound(B, E, pair);
+ VNMapType::iterator I = J;
+
+ while (I != B && (I == E || I->V == V)) --I;
+
+ VNMap.erase(I, J);
+ }
+ };
+
+ /// The InequalityGraph stores the relationships between values.
+ /// Each Value in the graph is assigned to a Node. Nodes are pointer
+ /// comparable for equality. The caller is expected to maintain the logical
+ /// consistency of the system.
+ ///
+ /// The InequalityGraph class may invalidate Node*s after any mutator call.
+ /// @brief The InequalityGraph stores the relationships between values.
+ class VISIBILITY_HIDDEN InequalityGraph {
+ ValueNumbering &VN;
+ DomTreeDFS::Node *TreeRoot;
+
+ InequalityGraph(); // DO NOT IMPLEMENT
+ InequalityGraph(InequalityGraph &); // DO NOT IMPLEMENT
+ public:
+ InequalityGraph(ValueNumbering &VN, DomTreeDFS::Node *TreeRoot)
+ : VN(VN), TreeRoot(TreeRoot) {}
+
+ class Node;
+
+ /// An Edge is contained inside a Node making one end of the edge implicit
+ /// and contains a pointer to the other end. The edge contains a lattice
+ /// value specifying the relationship and an DomTreeDFS::Node specifying
+ /// the root in the dominator tree to which this edge applies.
+ class VISIBILITY_HIDDEN Edge {
+ public:
+ Edge(unsigned T, LatticeVal V, DomTreeDFS::Node *ST)
+ : To(T), LV(V), Subtree(ST) {}
+
+ unsigned To;
+ LatticeVal LV;
+ DomTreeDFS::Node *Subtree;
+
+ bool operator<(const Edge &edge) const {
+ if (To != edge.To) return To < edge.To;
+ return *Subtree < *edge.Subtree;
+ }
+
+ bool operator<(unsigned to) const {
+ return To < to;
+ }
+
+ bool operator>(unsigned to) const {
+ return To > to;
+ }
+
+ friend bool operator<(unsigned to, const Edge &edge) {
+ return edge.operator>(to);
+ }
+ };
+
+ /// A single node in the InequalityGraph. This stores the canonical Value
+ /// for the node, as well as the relationships with the neighbours.
+ ///
+ /// @brief A single node in the InequalityGraph.
+ class VISIBILITY_HIDDEN Node {
+ friend class InequalityGraph;
+
+ typedef SmallVector<Edge, 4> RelationsType;
+ RelationsType Relations;
+
+ // TODO: can this idea improve performance?
+ //friend class std::vector<Node>;
+ //Node(Node &N) { RelationsType.swap(N.RelationsType); }
+
+ public:
+ typedef RelationsType::iterator iterator;
+ typedef RelationsType::const_iterator const_iterator;
+
+#ifndef NDEBUG
+ virtual ~Node() {}
+ virtual void dump() const {
+ dump(*cerr.stream());
+ }
+ private:
+ void dump(std::ostream &os) const {
+ static const std::string names[32] =
+ { "000000", "000001", "000002", "000003", "000004", "000005",
+ "000006", "000007", "000008", "000009", " >", " >=",
+ " s>u<", "s>=u<=", " s>", " s>=", "000016", "000017",
+ " s<u>", "s<=u>=", " <", " <=", " s<", " s<=",
+ "000024", "000025", " u>", " u>=", " u<", " u<=",
+ " !=", "000031" };
+ for (Node::const_iterator NI = begin(), NE = end(); NI != NE; ++NI) {
+ os << names[NI->LV] << " " << NI->To
+ << " (" << NI->Subtree->getDFSNumIn() << "), ";
+ }
+ }
+ public:
+#endif
+
+ iterator begin() { return Relations.begin(); }
+ iterator end() { return Relations.end(); }
+ const_iterator begin() const { return Relations.begin(); }
+ const_iterator end() const { return Relations.end(); }
+
+ iterator find(unsigned n, DomTreeDFS::Node *Subtree) {
+ iterator E = end();
+ for (iterator I = std::lower_bound(begin(), E, n);
+ I != E && I->To == n; ++I) {
+ if (Subtree->DominatedBy(I->Subtree))
+ return I;
+ }
+ return E;
+ }
+
+ const_iterator find(unsigned n, DomTreeDFS::Node *Subtree) const {
+ const_iterator E = end();
+ for (const_iterator I = std::lower_bound(begin(), E, n);
+ I != E && I->To == n; ++I) {
+ if (Subtree->DominatedBy(I->Subtree))
+ return I;
+ }
+ return E;
+ }
+
+ /// update - updates the lattice value for a given node, creating a new
+ /// entry if one doesn't exist. The new lattice value must not be
+ /// inconsistent with any previously existing value.
+ void update(unsigned n, LatticeVal R, DomTreeDFS::Node *Subtree) {
+ assert(validPredicate(R) && "Invalid predicate.");
+
+ Edge edge(n, R, Subtree);
+ iterator B = begin(), E = end();
+ iterator I = std::lower_bound(B, E, edge);
+
+ iterator J = I;
+ while (J != E && J->To == n) {
+ if (Subtree->DominatedBy(J->Subtree))
+ break;
+ ++J;
+ }
+
+ if (J != E && J->To == n) {
+ edge.LV = static_cast<LatticeVal>(J->LV & R);
+ assert(validPredicate(edge.LV) && "Invalid union of lattice values.");
+
+ if (edge.LV == J->LV)
+ return; // This update adds nothing new.
+ }
+
+ if (I != B) {
+ // We also have to tighten any edge beneath our update.
+ for (iterator K = I - 1; K->To == n; --K) {
+ if (K->Subtree->DominatedBy(Subtree)) {
+ LatticeVal LV = static_cast<LatticeVal>(K->LV & edge.LV);
+ assert(validPredicate(LV) && "Invalid union of lattice values");
+ K->LV = LV;
+ }
+ if (K == B) break;
+ }
+ }
+
+ // Insert new edge at Subtree if it isn't already there.
+ if (I == E || I->To != n || Subtree != I->Subtree)
+ Relations.insert(I, edge);
+ }
+ };
+
+ private:
+
+ std::vector<Node> Nodes;
+
+ public:
+ /// node - returns the node object at a given value number. The pointer
+ /// returned may be invalidated on the next call to node().
+ Node *node(unsigned index) {
+ assert(VN.value(index)); // This triggers the necessary checks.
+ if (Nodes.size() < index) Nodes.resize(index);
+ return &Nodes[index-1];
+ }
+
+ /// isRelatedBy - true iff n1 op n2
+ bool isRelatedBy(unsigned n1, unsigned n2, DomTreeDFS::Node *Subtree,
+ LatticeVal LV) {
+ if (n1 == n2) return LV & EQ_BIT;
+
+ Node *N1 = node(n1);
+ Node::iterator I = N1->find(n2, Subtree), E = N1->end();
+ if (I != E) return (I->LV & LV) == I->LV;
+
+ return false;
+ }
+
+ // The add* methods assume that your input is logically valid and may
+ // assertion-fail or infinitely loop if you attempt a contradiction.
+
+ /// addInequality - Sets n1 op n2.
+ /// It is also an error to call this on an inequality that is already true.
+ void addInequality(unsigned n1, unsigned n2, DomTreeDFS::Node *Subtree,
+ LatticeVal LV1) {
+ assert(n1 != n2 && "A node can't be inequal to itself.");
+
+ if (LV1 != NE)
+ assert(!isRelatedBy(n1, n2, Subtree, reversePredicate(LV1)) &&
+ "Contradictory inequality.");
+
+ // Suppose we're adding %n1 < %n2. Find all the %a < %n1 and
+ // add %a < %n2 too. This keeps the graph fully connected.
+ if (LV1 != NE) {
+ // Break up the relationship into signed and unsigned comparison parts.
+ // If the signed parts of %a op1 %n1 match that of %n1 op2 %n2, and
+ // op1 and op2 aren't NE, then add %a op3 %n2. The new relationship
+ // should have the EQ_BIT iff it's set for both op1 and op2.
+
+ unsigned LV1_s = LV1 & (SLT_BIT|SGT_BIT);
+ unsigned LV1_u = LV1 & (ULT_BIT|UGT_BIT);
+
+ for (Node::iterator I = node(n1)->begin(), E = node(n1)->end(); I != E; ++I) {
+ if (I->LV != NE && I->To != n2) {
+
+ DomTreeDFS::Node *Local_Subtree = NULL;
+ if (Subtree->DominatedBy(I->Subtree))
+ Local_Subtree = Subtree;
+ else if (I->Subtree->DominatedBy(Subtree))
+ Local_Subtree = I->Subtree;
+
+ if (Local_Subtree) {
+ unsigned new_relationship = 0;
+ LatticeVal ILV = reversePredicate(I->LV);
+ unsigned ILV_s = ILV & (SLT_BIT|SGT_BIT);
+ unsigned ILV_u = ILV & (ULT_BIT|UGT_BIT);
+
+ if (LV1_s != (SLT_BIT|SGT_BIT) && ILV_s == LV1_s)
+ new_relationship |= ILV_s;
+ if (LV1_u != (ULT_BIT|UGT_BIT) && ILV_u == LV1_u)
+ new_relationship |= ILV_u;
+
+ if (new_relationship) {
+ if ((new_relationship & (SLT_BIT|SGT_BIT)) == 0)
+ new_relationship |= (SLT_BIT|SGT_BIT);
+ if ((new_relationship & (ULT_BIT|UGT_BIT)) == 0)
+ new_relationship |= (ULT_BIT|UGT_BIT);
+ if ((LV1 & EQ_BIT) && (ILV & EQ_BIT))
+ new_relationship |= EQ_BIT;
+
+ LatticeVal NewLV = static_cast<LatticeVal>(new_relationship);
+
+ node(I->To)->update(n2, NewLV, Local_Subtree);
+ node(n2)->update(I->To, reversePredicate(NewLV), Local_Subtree);
+ }
+ }
+ }
+ }
+
+ for (Node::iterator I = node(n2)->begin(), E = node(n2)->end(); I != E; ++I) {
+ if (I->LV != NE && I->To != n1) {
+ DomTreeDFS::Node *Local_Subtree = NULL;
+ if (Subtree->DominatedBy(I->Subtree))
+ Local_Subtree = Subtree;
+ else if (I->Subtree->DominatedBy(Subtree))
+ Local_Subtree = I->Subtree;
+
+ if (Local_Subtree) {
+ unsigned new_relationship = 0;
+ unsigned ILV_s = I->LV & (SLT_BIT|SGT_BIT);
+ unsigned ILV_u = I->LV & (ULT_BIT|UGT_BIT);
+
+ if (LV1_s != (SLT_BIT|SGT_BIT) && ILV_s == LV1_s)
+ new_relationship |= ILV_s;
+
+ if (LV1_u != (ULT_BIT|UGT_BIT) && ILV_u == LV1_u)
+ new_relationship |= ILV_u;
+
+ if (new_relationship) {
+ if ((new_relationship & (SLT_BIT|SGT_BIT)) == 0)
+ new_relationship |= (SLT_BIT|SGT_BIT);
+ if ((new_relationship & (ULT_BIT|UGT_BIT)) == 0)
+ new_relationship |= (ULT_BIT|UGT_BIT);
+ if ((LV1 & EQ_BIT) && (I->LV & EQ_BIT))
+ new_relationship |= EQ_BIT;
+
+ LatticeVal NewLV = static_cast<LatticeVal>(new_relationship);
+
+ node(n1)->update(I->To, NewLV, Local_Subtree);
+ node(I->To)->update(n1, reversePredicate(NewLV), Local_Subtree);
+ }
+ }
+ }
+ }
+ }
+
+ node(n1)->update(n2, LV1, Subtree);
+ node(n2)->update(n1, reversePredicate(LV1), Subtree);
+ }
+
+ /// remove - removes a node from the graph by removing all references to
+ /// and from it.
+ void remove(unsigned n) {
+ Node *N = node(n);
+ for (Node::iterator NI = N->begin(), NE = N->end(); NI != NE; ++NI) {
+ Node::iterator Iter = node(NI->To)->find(n, TreeRoot);
+ do {
+ node(NI->To)->Relations.erase(Iter);
+ Iter = node(NI->To)->find(n, TreeRoot);
+ } while (Iter != node(NI->To)->end());
+ }
+ N->Relations.clear();
+ }
+
+#ifndef NDEBUG
+ virtual ~InequalityGraph() {}
+ virtual void dump() {
+ dump(*cerr.stream());
+ }
+
+ void dump(std::ostream &os) {
+ for (unsigned i = 1; i <= Nodes.size(); ++i) {
+ os << i << " = {";
+ node(i)->dump(os);
+ os << "}\n";
+ }
+ }
+#endif
+ };
+
+ class VRPSolver;
+
+ /// ValueRanges tracks the known integer ranges and anti-ranges of the nodes
+ /// in the InequalityGraph.
+ class VISIBILITY_HIDDEN ValueRanges {
+ ValueNumbering &VN;
+ TargetData *TD;
+
+ class VISIBILITY_HIDDEN ScopedRange {
+ typedef std::vector<std::pair<DomTreeDFS::Node *, ConstantRange> >
+ RangeListType;
+ RangeListType RangeList;
+
+ static bool swo(const std::pair<DomTreeDFS::Node *, ConstantRange> &LHS,
+ const std::pair<DomTreeDFS::Node *, ConstantRange> &RHS) {
+ return *LHS.first < *RHS.first;
+ }
+
+ public:
+#ifndef NDEBUG
+ virtual ~ScopedRange() {}
+ virtual void dump() const {
+ dump(*cerr.stream());
+ }
+
+ void dump(std::ostream &os) const {
+ os << "{";
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ os << &I->second << " (" << I->first->getDFSNumIn() << "), ";
+ }
+ os << "}";
+ }
+#endif
+
+ typedef RangeListType::iterator iterator;
+ typedef RangeListType::const_iterator const_iterator;
+
+ iterator begin() { return RangeList.begin(); }
+ iterator end() { return RangeList.end(); }
+ const_iterator begin() const { return RangeList.begin(); }
+ const_iterator end() const { return RangeList.end(); }
+
+ iterator find(DomTreeDFS::Node *Subtree) {
+ static ConstantRange empty(1, false);
+ iterator E = end();
+ iterator I = std::lower_bound(begin(), E,
+ std::make_pair(Subtree, empty), swo);
+
+ while (I != E && !I->first->dominates(Subtree)) ++I;
+ return I;
+ }
+
+ const_iterator find(DomTreeDFS::Node *Subtree) const {
+ static const ConstantRange empty(1, false);
+ const_iterator E = end();
+ const_iterator I = std::lower_bound(begin(), E,
+ std::make_pair(Subtree, empty), swo);
+
+ while (I != E && !I->first->dominates(Subtree)) ++I;
+ return I;
+ }
+
+ void update(const ConstantRange &CR, DomTreeDFS::Node *Subtree) {
+ assert(!CR.isEmptySet() && "Empty ConstantRange.");
+ assert(!CR.isSingleElement() && "Refusing to store single element.");
+
+ static ConstantRange empty(1, false);
+ iterator E = end();
+ iterator I =
+ std::lower_bound(begin(), E, std::make_pair(Subtree, empty), swo);
+
+ if (I != end() && I->first == Subtree) {
+ ConstantRange CR2 = I->second.maximalIntersectWith(CR);
+ assert(!CR2.isEmptySet() && !CR2.isSingleElement() &&
+ "Invalid union of ranges.");
+ I->second = CR2;
+ } else
+ RangeList.insert(I, std::make_pair(Subtree, CR));
+ }
+ };
+
+ std::vector<ScopedRange> Ranges;
+
+ void update(unsigned n, const ConstantRange &CR, DomTreeDFS::Node *Subtree){
+ if (CR.isFullSet()) return;
+ if (Ranges.size() < n) Ranges.resize(n);
+ Ranges[n-1].update(CR, Subtree);
+ }
+
+ /// create - Creates a ConstantRange that matches the given LatticeVal
+ /// relation with a given integer.
+ ConstantRange create(LatticeVal LV, const ConstantRange &CR) {
+ assert(!CR.isEmptySet() && "Can't deal with empty set.");
+
+ if (LV == NE)
+ return makeConstantRange(ICmpInst::ICMP_NE, CR);
+
+ unsigned LV_s = LV & (SGT_BIT|SLT_BIT);
+ unsigned LV_u = LV & (UGT_BIT|ULT_BIT);
+ bool hasEQ = LV & EQ_BIT;
+
+ ConstantRange Range(CR.getBitWidth());
+
+ if (LV_s == SGT_BIT) {
+ Range = Range.maximalIntersectWith(makeConstantRange(
+ hasEQ ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_SGT, CR));
+ } else if (LV_s == SLT_BIT) {
+ Range = Range.maximalIntersectWith(makeConstantRange(
+ hasEQ ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_SLT, CR));
+ }
+
+ if (LV_u == UGT_BIT) {
+ Range = Range.maximalIntersectWith(makeConstantRange(
+ hasEQ ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_UGT, CR));
+ } else if (LV_u == ULT_BIT) {
+ Range = Range.maximalIntersectWith(makeConstantRange(
+ hasEQ ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT, CR));
+ }
+
+ return Range;
+ }
+
+ /// makeConstantRange - Creates a ConstantRange representing the set of all
+ /// value that match the ICmpInst::Predicate with any of the values in CR.
+ ConstantRange makeConstantRange(ICmpInst::Predicate ICmpOpcode,
+ const ConstantRange &CR) {
+ uint32_t W = CR.getBitWidth();
+ switch (ICmpOpcode) {
+ default: assert(!"Invalid ICmp opcode to makeConstantRange()");
+ case ICmpInst::ICMP_EQ:
+ return ConstantRange(CR.getLower(), CR.getUpper());
+ case ICmpInst::ICMP_NE:
+ if (CR.isSingleElement())
+ return ConstantRange(CR.getUpper(), CR.getLower());
+ return ConstantRange(W);
+ case ICmpInst::ICMP_ULT:
+ return ConstantRange(APInt::getMinValue(W), CR.getUnsignedMax());
+ case ICmpInst::ICMP_SLT:
+ return ConstantRange(APInt::getSignedMinValue(W), CR.getSignedMax());
+ case ICmpInst::ICMP_ULE: {
+ APInt UMax(CR.getUnsignedMax());
+ if (UMax.isMaxValue())
+ return ConstantRange(W);
+ return ConstantRange(APInt::getMinValue(W), UMax + 1);
+ }
+ case ICmpInst::ICMP_SLE: {
+ APInt SMax(CR.getSignedMax());
+ if (SMax.isMaxSignedValue() || (SMax+1).isMaxSignedValue())
+ return ConstantRange(W);
+ return ConstantRange(APInt::getSignedMinValue(W), SMax + 1);
+ }
+ case ICmpInst::ICMP_UGT:
+ return ConstantRange(CR.getUnsignedMin() + 1, APInt::getNullValue(W));
+ case ICmpInst::ICMP_SGT:
+ return ConstantRange(CR.getSignedMin() + 1,
+ APInt::getSignedMinValue(W));
+ case ICmpInst::ICMP_UGE: {
+ APInt UMin(CR.getUnsignedMin());
+ if (UMin.isMinValue())
+ return ConstantRange(W);
+ return ConstantRange(UMin, APInt::getNullValue(W));
+ }
+ case ICmpInst::ICMP_SGE: {
+ APInt SMin(CR.getSignedMin());
+ if (SMin.isMinSignedValue())
+ return ConstantRange(W);
+ return ConstantRange(SMin, APInt::getSignedMinValue(W));
+ }
+ }
+ }
+
+#ifndef NDEBUG
+ bool isCanonical(Value *V, DomTreeDFS::Node *Subtree) {
+ return V == VN.canonicalize(V, Subtree);
+ }
+#endif
+
+ public:
+
+ ValueRanges(ValueNumbering &VN, TargetData *TD) : VN(VN), TD(TD) {}
+
+#ifndef NDEBUG
+ virtual ~ValueRanges() {}
+
+ virtual void dump() const {
+ dump(*cerr.stream());
+ }
+
+ void dump(std::ostream &os) const {
+ for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
+ os << (i+1) << " = ";
+ Ranges[i].dump(os);
+ os << "\n";
+ }
+ }
+#endif
+
+ /// range - looks up the ConstantRange associated with a value number.
+ ConstantRange range(unsigned n, DomTreeDFS::Node *Subtree) {
+ assert(VN.value(n)); // performs range checks
+
+ if (n <= Ranges.size()) {
+ ScopedRange::iterator I = Ranges[n-1].find(Subtree);
+ if (I != Ranges[n-1].end()) return I->second;
+ }
+
+ Value *V = VN.value(n);
+ ConstantRange CR = range(V);
+ return CR;
+ }
+
+ /// range - determine a range from a Value without performing any lookups.
+ ConstantRange range(Value *V) const {
+ if (ConstantInt *C = dyn_cast<ConstantInt>(V))
+ return ConstantRange(C->getValue());
+ else if (isa<ConstantPointerNull>(V))
+ return ConstantRange(APInt::getNullValue(typeToWidth(V->getType())));
+ else
+ return ConstantRange(typeToWidth(V->getType()));
+ }
+
+ // typeToWidth - returns the number of bits necessary to store a value of
+ // this type, or zero if unknown.
+ uint32_t typeToWidth(const Type *Ty) const {
+ if (TD)
+ return TD->getTypeSizeInBits(Ty);
+ else
+ return Ty->getPrimitiveSizeInBits();
+ }
+
+ static bool isRelatedBy(const ConstantRange &CR1, const ConstantRange &CR2,
+ LatticeVal LV) {
+ switch (LV) {
+ default: assert(!"Impossible lattice value!");
+ case NE:
+ return CR1.maximalIntersectWith(CR2).isEmptySet();
+ case ULT:
+ return CR1.getUnsignedMax().ult(CR2.getUnsignedMin());
+ case ULE:
+ return CR1.getUnsignedMax().ule(CR2.getUnsignedMin());
+ case UGT:
+ return CR1.getUnsignedMin().ugt(CR2.getUnsignedMax());
+ case UGE:
+ return CR1.getUnsignedMin().uge(CR2.getUnsignedMax());
+ case SLT:
+ return CR1.getSignedMax().slt(CR2.getSignedMin());
+ case SLE:
+ return CR1.getSignedMax().sle(CR2.getSignedMin());
+ case SGT:
+ return CR1.getSignedMin().sgt(CR2.getSignedMax());
+ case SGE:
+ return CR1.getSignedMin().sge(CR2.getSignedMax());
+ case LT:
+ return CR1.getUnsignedMax().ult(CR2.getUnsignedMin()) &&
+ CR1.getSignedMax().slt(CR2.getUnsignedMin());
+ case LE:
+ return CR1.getUnsignedMax().ule(CR2.getUnsignedMin()) &&
+ CR1.getSignedMax().sle(CR2.getUnsignedMin());
+ case GT:
+ return CR1.getUnsignedMin().ugt(CR2.getUnsignedMax()) &&
+ CR1.getSignedMin().sgt(CR2.getSignedMax());
+ case GE:
+ return CR1.getUnsignedMin().uge(CR2.getUnsignedMax()) &&
+ CR1.getSignedMin().sge(CR2.getSignedMax());
+ case SLTUGT:
+ return CR1.getSignedMax().slt(CR2.getSignedMin()) &&
+ CR1.getUnsignedMin().ugt(CR2.getUnsignedMax());
+ case SLEUGE:
+ return CR1.getSignedMax().sle(CR2.getSignedMin()) &&
+ CR1.getUnsignedMin().uge(CR2.getUnsignedMax());
+ case SGTULT:
+ return CR1.getSignedMin().sgt(CR2.getSignedMax()) &&
+ CR1.getUnsignedMax().ult(CR2.getUnsignedMin());
+ case SGEULE:
+ return CR1.getSignedMin().sge(CR2.getSignedMax()) &&
+ CR1.getUnsignedMax().ule(CR2.getUnsignedMin());
+ }
+ }
+
+ bool isRelatedBy(unsigned n1, unsigned n2, DomTreeDFS::Node *Subtree,
+ LatticeVal LV) {
+ ConstantRange CR1 = range(n1, Subtree);
+ ConstantRange CR2 = range(n2, Subtree);
+
+ // True iff all values in CR1 are LV to all values in CR2.
+ return isRelatedBy(CR1, CR2, LV);
+ }
+
+ void addToWorklist(Value *V, Constant *C, ICmpInst::Predicate Pred,
+ VRPSolver *VRP);
+ void markBlock(VRPSolver *VRP);
+
+ void mergeInto(Value **I, unsigned n, unsigned New,
+ DomTreeDFS::Node *Subtree, VRPSolver *VRP) {
+ ConstantRange CR_New = range(New, Subtree);
+ ConstantRange Merged = CR_New;
+
+ for (; n != 0; ++I, --n) {
+ unsigned i = VN.valueNumber(*I, Subtree);
+ ConstantRange CR_Kill = i ? range(i, Subtree) : range(*I);
+ if (CR_Kill.isFullSet()) continue;
+ Merged = Merged.maximalIntersectWith(CR_Kill);
+ }
+
+ if (Merged.isFullSet() || Merged == CR_New) return;
+
+ applyRange(New, Merged, Subtree, VRP);
+ }
+
+ void applyRange(unsigned n, const ConstantRange &CR,
+ DomTreeDFS::Node *Subtree, VRPSolver *VRP) {
+ ConstantRange Merged = CR.maximalIntersectWith(range(n, Subtree));
+ if (Merged.isEmptySet()) {
+ markBlock(VRP);
+ return;
+ }
+
+ if (const APInt *I = Merged.getSingleElement()) {
+ Value *V = VN.value(n); // XXX: redesign worklist.
+ const Type *Ty = V->getType();
+ if (Ty->isInteger()) {
+ addToWorklist(V, ConstantInt::get(*I), ICmpInst::ICMP_EQ, VRP);
+ return;
+ } else if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) {
+ assert(*I == 0 && "Pointer is null but not zero?");
+ addToWorklist(V, ConstantPointerNull::get(PTy),
+ ICmpInst::ICMP_EQ, VRP);
+ return;
+ }
+ }
+
+ update(n, Merged, Subtree);
+ }
+
+ void addNotEquals(unsigned n1, unsigned n2, DomTreeDFS::Node *Subtree,
+ VRPSolver *VRP) {
+ ConstantRange CR1 = range(n1, Subtree);
+ ConstantRange CR2 = range(n2, Subtree);
+
+ uint32_t W = CR1.getBitWidth();
+
+ if (const APInt *I = CR1.getSingleElement()) {
+ if (CR2.isFullSet()) {
+ ConstantRange NewCR2(CR1.getUpper(), CR1.getLower());
+ applyRange(n2, NewCR2, Subtree, VRP);
+ } else if (*I == CR2.getLower()) {
+ APInt NewLower(CR2.getLower() + 1),
+ NewUpper(CR2.getUpper());
+ if (NewLower == NewUpper)
+ NewLower = NewUpper = APInt::getMinValue(W);
+
+ ConstantRange NewCR2(NewLower, NewUpper);
+ applyRange(n2, NewCR2, Subtree, VRP);
+ } else if (*I == CR2.getUpper() - 1) {
+ APInt NewLower(CR2.getLower()),
+ NewUpper(CR2.getUpper() - 1);
+ if (NewLower == NewUpper)
+ NewLower = NewUpper = APInt::getMinValue(W);
+
+ ConstantRange NewCR2(NewLower, NewUpper);
+ applyRange(n2, NewCR2, Subtree, VRP);
+ }
+ }
+
+ if (const APInt *I = CR2.getSingleElement()) {
+ if (CR1.isFullSet()) {
+ ConstantRange NewCR1(CR2.getUpper(), CR2.getLower());
+ applyRange(n1, NewCR1, Subtree, VRP);
+ } else if (*I == CR1.getLower()) {
+ APInt NewLower(CR1.getLower() + 1),
+ NewUpper(CR1.getUpper());
+ if (NewLower == NewUpper)
+ NewLower = NewUpper = APInt::getMinValue(W);
+
+ ConstantRange NewCR1(NewLower, NewUpper);
+ applyRange(n1, NewCR1, Subtree, VRP);
+ } else if (*I == CR1.getUpper() - 1) {
+ APInt NewLower(CR1.getLower()),
+ NewUpper(CR1.getUpper() - 1);
+ if (NewLower == NewUpper)
+ NewLower = NewUpper = APInt::getMinValue(W);
+
+ ConstantRange NewCR1(NewLower, NewUpper);
+ applyRange(n1, NewCR1, Subtree, VRP);
+ }
+ }
+ }
+
+ void addInequality(unsigned n1, unsigned n2, DomTreeDFS::Node *Subtree,
+ LatticeVal LV, VRPSolver *VRP) {
+ assert(!isRelatedBy(n1, n2, Subtree, LV) && "Asked to do useless work.");
+
+ if (LV == NE) {
+ addNotEquals(n1, n2, Subtree, VRP);
+ return;
+ }
+
+ ConstantRange CR1 = range(n1, Subtree);
+ ConstantRange CR2 = range(n2, Subtree);
+
+ if (!CR1.isSingleElement()) {
+ ConstantRange NewCR1 = CR1.maximalIntersectWith(create(LV, CR2));
+ if (NewCR1 != CR1)
+ applyRange(n1, NewCR1, Subtree, VRP);
+ }
+
+ if (!CR2.isSingleElement()) {
+ ConstantRange NewCR2 = CR2.maximalIntersectWith(
+ create(reversePredicate(LV), CR1));
+ if (NewCR2 != CR2)
+ applyRange(n2, NewCR2, Subtree, VRP);
+ }
+ }
+ };
+
+ /// UnreachableBlocks keeps tracks of blocks that are for one reason or
+ /// another discovered to be unreachable. This is used to cull the graph when
+ /// analyzing instructions, and to mark blocks with the "unreachable"
+ /// terminator instruction after the function has executed.
+ class VISIBILITY_HIDDEN UnreachableBlocks {
+ private:
+ std::vector<BasicBlock *> DeadBlocks;
+
+ public:
+ /// mark - mark a block as dead
+ void mark(BasicBlock *BB) {
+ std::vector<BasicBlock *>::iterator E = DeadBlocks.end();
+ std::vector<BasicBlock *>::iterator I =
+ std::lower_bound(DeadBlocks.begin(), E, BB);
+
+ if (I == E || *I != BB) DeadBlocks.insert(I, BB);
+ }
+
+ /// isDead - returns whether a block is known to be dead already
+ bool isDead(BasicBlock *BB) {
+ std::vector<BasicBlock *>::iterator E = DeadBlocks.end();
+ std::vector<BasicBlock *>::iterator I =
+ std::lower_bound(DeadBlocks.begin(), E, BB);
+
+ return I != E && *I == BB;
+ }
+
+ /// kill - replace the dead blocks' terminator with an UnreachableInst.
+ bool kill() {
+ bool modified = false;
+ for (std::vector<BasicBlock *>::iterator I = DeadBlocks.begin(),
+ E = DeadBlocks.end(); I != E; ++I) {
+ BasicBlock *BB = *I;
+
+ DOUT << "unreachable block: " << BB->getName() << "\n";
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
+ SI != SE; ++SI) {
+ BasicBlock *Succ = *SI;
+ Succ->removePredecessor(BB);
+ }
+
+ TerminatorInst *TI = BB->getTerminator();
+ TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+ TI->eraseFromParent();
+ new UnreachableInst(BB);
+ ++NumBlocks;
+ modified = true;
+ }
+ DeadBlocks.clear();
+ return modified;
+ }
+ };
+
+ /// VRPSolver keeps track of how changes to one variable affect other
+ /// variables, and forwards changes along to the InequalityGraph. It
+ /// also maintains the correct choice for "canonical" in the IG.
+ /// @brief VRPSolver calculates inferences from a new relationship.
+ class VISIBILITY_HIDDEN VRPSolver {
+ private:
+ friend class ValueRanges;
+
+ struct Operation {
+ Value *LHS, *RHS;
+ ICmpInst::Predicate Op;
+
+ BasicBlock *ContextBB; // XXX use a DomTreeDFS::Node instead
+ Instruction *ContextInst;
+ };
+ std::deque<Operation> WorkList;
+
+ ValueNumbering &VN;
+ InequalityGraph &IG;
+ UnreachableBlocks &UB;
+ ValueRanges &VR;
+ DomTreeDFS *DTDFS;
+ DomTreeDFS::Node *Top;
+ BasicBlock *TopBB;
+ Instruction *TopInst;
+ bool &modified;
+
+ typedef InequalityGraph::Node Node;
+
+ // below - true if the Instruction is dominated by the current context
+ // block or instruction
+ bool below(Instruction *I) {
+ BasicBlock *BB = I->getParent();
+ if (TopInst && TopInst->getParent() == BB) {
+ if (isa<TerminatorInst>(TopInst)) return false;
+ if (isa<TerminatorInst>(I)) return true;
+ if ( isa<PHINode>(TopInst) && !isa<PHINode>(I)) return true;
+ if (!isa<PHINode>(TopInst) && isa<PHINode>(I)) return false;
+
+ for (BasicBlock::const_iterator Iter = BB->begin(), E = BB->end();
+ Iter != E; ++Iter) {
+ if (&*Iter == TopInst) return true;
+ else if (&*Iter == I) return false;
+ }
+ assert(!"Instructions not found in parent BasicBlock?");
+ } else {
+ DomTreeDFS::Node *Node = DTDFS->getNodeForBlock(BB);
+ if (!Node) return false;
+ return Top->dominates(Node);
+ }
+ return false; // Not reached
+ }
+
+ // aboveOrBelow - true if the Instruction either dominates or is dominated
+ // by the current context block or instruction
+ bool aboveOrBelow(Instruction *I) {
+ BasicBlock *BB = I->getParent();
+ DomTreeDFS::Node *Node = DTDFS->getNodeForBlock(BB);
+ if (!Node) return false;
+
+ return Top == Node || Top->dominates(Node) || Node->dominates(Top);
+ }
+
+ bool makeEqual(Value *V1, Value *V2) {
+ DOUT << "makeEqual(" << *V1 << ", " << *V2 << ")\n";
+ DOUT << "context is ";
+ if (TopInst) DOUT << "I: " << *TopInst << "\n";
+ else DOUT << "BB: " << TopBB->getName()
+ << "(" << Top->getDFSNumIn() << ")\n";
+
+ assert(V1->getType() == V2->getType() &&
+ "Can't make two values with different types equal.");
+
+ if (V1 == V2) return true;
+
+ if (isa<Constant>(V1) && isa<Constant>(V2))
+ return false;
+
+ unsigned n1 = VN.valueNumber(V1, Top), n2 = VN.valueNumber(V2, Top);
+
+ if (n1 && n2) {
+ if (n1 == n2) return true;
+ if (IG.isRelatedBy(n1, n2, Top, NE)) return false;
+ }
+
+ if (n1) assert(V1 == VN.value(n1) && "Value isn't canonical.");
+ if (n2) assert(V2 == VN.value(n2) && "Value isn't canonical.");
+
+ assert(!VN.compare(V2, V1) && "Please order parameters to makeEqual.");
+
+ assert(!isa<Constant>(V2) && "Tried to remove a constant.");
+
+ SetVector<unsigned> Remove;
+ if (n2) Remove.insert(n2);
+
+ if (n1 && n2) {
+ // Suppose we're being told that %x == %y, and %x <= %z and %y >= %z.
+ // We can't just merge %x and %y because the relationship with %z would
+ // be EQ and that's invalid. What we're doing is looking for any nodes
+ // %z such that %x <= %z and %y >= %z, and vice versa.
+
+ Node::iterator end = IG.node(n2)->end();
+
+ // Find the intersection between N1 and N2 which is dominated by
+ // Top. If we find %x where N1 <= %x <= N2 (or >=) then add %x to
+ // Remove.
+ for (Node::iterator I = IG.node(n1)->begin(), E = IG.node(n1)->end();
+ I != E; ++I) {
+ if (!(I->LV & EQ_BIT) || !Top->DominatedBy(I->Subtree)) continue;
+
+ unsigned ILV_s = I->LV & (SLT_BIT|SGT_BIT);
+ unsigned ILV_u = I->LV & (ULT_BIT|UGT_BIT);
+ Node::iterator NI = IG.node(n2)->find(I->To, Top);
+ if (NI != end) {
+ LatticeVal NILV = reversePredicate(NI->LV);
+ unsigned NILV_s = NILV & (SLT_BIT|SGT_BIT);
+ unsigned NILV_u = NILV & (ULT_BIT|UGT_BIT);
+
+ if ((ILV_s != (SLT_BIT|SGT_BIT) && ILV_s == NILV_s) ||
+ (ILV_u != (ULT_BIT|UGT_BIT) && ILV_u == NILV_u))
+ Remove.insert(I->To);
+ }
+ }
+
+ // See if one of the nodes about to be removed is actually a better
+ // canonical choice than n1.
+ unsigned orig_n1 = n1;
+ SetVector<unsigned>::iterator DontRemove = Remove.end();
+ for (SetVector<unsigned>::iterator I = Remove.begin()+1 /* skip n2 */,
+ E = Remove.end(); I != E; ++I) {
+ unsigned n = *I;
+ Value *V = VN.value(n);
+ if (VN.compare(V, V1)) {
+ V1 = V;
+ n1 = n;
+ DontRemove = I;
+ }
+ }
+ if (DontRemove != Remove.end()) {
+ unsigned n = *DontRemove;
+ Remove.remove(n);
+ Remove.insert(orig_n1);
+ }
+ }
+
+ // We'd like to allow makeEqual on two values to perform a simple
+ // substitution without creating nodes in the IG whenever possible.
+ //
+ // The first iteration through this loop operates on V2 before going
+ // through the Remove list and operating on those too. If all of the
+ // iterations performed simple replacements then we exit early.
+ bool mergeIGNode = false;
+ unsigned i = 0;
+ for (Value *R = V2; i == 0 || i < Remove.size(); ++i) {
+ if (i) R = VN.value(Remove[i]); // skip n2.
+
+ // Try to replace the whole instruction. If we can, we're done.
+ Instruction *I2 = dyn_cast<Instruction>(R);
+ if (I2 && below(I2)) {
+ std::vector<Instruction *> ToNotify;
+ for (Value::use_iterator UI = R->use_begin(), UE = R->use_end();
+ UI != UE;) {
+ Use &TheUse = UI.getUse();
+ ++UI;
+ if (Instruction *I = dyn_cast<Instruction>(TheUse.getUser()))
+ ToNotify.push_back(I);
+ }
+
+ DOUT << "Simply removing " << *I2
+ << ", replacing with " << *V1 << "\n";
+ I2->replaceAllUsesWith(V1);
+ // leave it dead; it'll get erased later.
+ ++NumInstruction;
+ modified = true;
+
+ for (std::vector<Instruction *>::iterator II = ToNotify.begin(),
+ IE = ToNotify.end(); II != IE; ++II) {
+ opsToDef(*II);
+ }
+
+ continue;
+ }
+
+ // Otherwise, replace all dominated uses.
+ for (Value::use_iterator UI = R->use_begin(), UE = R->use_end();
+ UI != UE;) {
+ Use &TheUse = UI.getUse();
+ ++UI;
+ if (Instruction *I = dyn_cast<Instruction>(TheUse.getUser())) {
+ if (below(I)) {
+ TheUse.set(V1);
+ modified = true;
+ ++NumVarsReplaced;
+ opsToDef(I);
+ }
+ }
+ }
+
+ // If that killed the instruction, stop here.
+ if (I2 && isInstructionTriviallyDead(I2)) {
+ DOUT << "Killed all uses of " << *I2
+ << ", replacing with " << *V1 << "\n";
+ continue;
+ }
+
+ // If we make it to here, then we will need to create a node for N1.
+ // Otherwise, we can skip out early!
+ mergeIGNode = true;
+ }
+
+ if (!isa<Constant>(V1)) {
+ if (Remove.empty()) {
+ VR.mergeInto(&V2, 1, VN.getOrInsertVN(V1, Top), Top, this);
+ } else {
+ std::vector<Value*> RemoveVals;
+ RemoveVals.reserve(Remove.size());
+
+ for (SetVector<unsigned>::iterator I = Remove.begin(),
+ E = Remove.end(); I != E; ++I) {
+ Value *V = VN.value(*I);
+ if (!V->use_empty())
+ RemoveVals.push_back(V);
+ }
+ VR.mergeInto(&RemoveVals[0], RemoveVals.size(),
+ VN.getOrInsertVN(V1, Top), Top, this);
+ }
+ }
+
+ if (mergeIGNode) {
+ // Create N1.
+ if (!n1) n1 = VN.getOrInsertVN(V1, Top);
+ IG.node(n1); // Ensure that IG.Nodes won't get resized
+
+ // Migrate relationships from removed nodes to N1.
+ for (SetVector<unsigned>::iterator I = Remove.begin(), E = Remove.end();
+ I != E; ++I) {
+ unsigned n = *I;
+ for (Node::iterator NI = IG.node(n)->begin(), NE = IG.node(n)->end();
+ NI != NE; ++NI) {
+ if (NI->Subtree->DominatedBy(Top)) {
+ if (NI->To == n1) {
+ assert((NI->LV & EQ_BIT) && "Node inequal to itself.");
+ continue;
+ }
+ if (Remove.count(NI->To))
+ continue;
+
+ IG.node(NI->To)->update(n1, reversePredicate(NI->LV), Top);
+ IG.node(n1)->update(NI->To, NI->LV, Top);
+ }
+ }
+ }
+
+ // Point V2 (and all items in Remove) to N1.
+ if (!n2)
+ VN.addEquality(n1, V2, Top);
+ else {
+ for (SetVector<unsigned>::iterator I = Remove.begin(),
+ E = Remove.end(); I != E; ++I) {
+ VN.addEquality(n1, VN.value(*I), Top);
+ }
+ }
+
+ // If !Remove.empty() then V2 = Remove[0]->getValue().
+ // Even when Remove is empty, we still want to process V2.
+ i = 0;
+ for (Value *R = V2; i == 0 || i < Remove.size(); ++i) {
+ if (i) R = VN.value(Remove[i]); // skip n2.
+
+ if (Instruction *I2 = dyn_cast<Instruction>(R)) {
+ if (aboveOrBelow(I2))
+ defToOps(I2);
+ }
+ for (Value::use_iterator UI = V2->use_begin(), UE = V2->use_end();
+ UI != UE;) {
+ Use &TheUse = UI.getUse();
+ ++UI;
+ if (Instruction *I = dyn_cast<Instruction>(TheUse.getUser())) {
+ if (aboveOrBelow(I))
+ opsToDef(I);
+ }
+ }
+ }
+ }
+
+ // re-opsToDef all dominated users of V1.
+ if (Instruction *I = dyn_cast<Instruction>(V1)) {
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE;) {
+ Use &TheUse = UI.getUse();
+ ++UI;
+ Value *V = TheUse.getUser();
+ if (!V->use_empty()) {
+ if (Instruction *Inst = dyn_cast<Instruction>(V)) {
+ if (aboveOrBelow(Inst))
+ opsToDef(Inst);
+ }
+ }
+ }
+ }
+
+ return true;
+ }
+
+ /// cmpInstToLattice - converts an CmpInst::Predicate to lattice value
+ /// Requires that the lattice value be valid; does not accept ICMP_EQ.
+ static LatticeVal cmpInstToLattice(ICmpInst::Predicate Pred) {
+ switch (Pred) {
+ case ICmpInst::ICMP_EQ:
+ assert(!"No matching lattice value.");
+ return static_cast<LatticeVal>(EQ_BIT);
+ default:
+ assert(!"Invalid 'icmp' predicate.");
+ case ICmpInst::ICMP_NE:
+ return NE;
+ case ICmpInst::ICMP_UGT:
+ return UGT;
+ case ICmpInst::ICMP_UGE:
+ return UGE;
+ case ICmpInst::ICMP_ULT:
+ return ULT;
+ case ICmpInst::ICMP_ULE:
+ return ULE;
+ case ICmpInst::ICMP_SGT:
+ return SGT;
+ case ICmpInst::ICMP_SGE:
+ return SGE;
+ case ICmpInst::ICMP_SLT:
+ return SLT;
+ case ICmpInst::ICMP_SLE:
+ return SLE;
+ }
+ }
+
+ public:
+ VRPSolver(ValueNumbering &VN, InequalityGraph &IG, UnreachableBlocks &UB,
+ ValueRanges &VR, DomTreeDFS *DTDFS, bool &modified,
+ BasicBlock *TopBB)
+ : VN(VN),
+ IG(IG),
+ UB(UB),
+ VR(VR),
+ DTDFS(DTDFS),
+ Top(DTDFS->getNodeForBlock(TopBB)),
+ TopBB(TopBB),
+ TopInst(NULL),
+ modified(modified)
+ {
+ assert(Top && "VRPSolver created for unreachable basic block.");
+ }
+
+ VRPSolver(ValueNumbering &VN, InequalityGraph &IG, UnreachableBlocks &UB,
+ ValueRanges &VR, DomTreeDFS *DTDFS, bool &modified,
+ Instruction *TopInst)
+ : VN(VN),
+ IG(IG),
+ UB(UB),
+ VR(VR),
+ DTDFS(DTDFS),
+ Top(DTDFS->getNodeForBlock(TopInst->getParent())),
+ TopBB(TopInst->getParent()),
+ TopInst(TopInst),
+ modified(modified)
+ {
+ assert(Top && "VRPSolver created for unreachable basic block.");
+ assert(Top->getBlock() == TopInst->getParent() && "Context mismatch.");
+ }
+
+ bool isRelatedBy(Value *V1, Value *V2, ICmpInst::Predicate Pred) const {
+ if (Constant *C1 = dyn_cast<Constant>(V1))
+ if (Constant *C2 = dyn_cast<Constant>(V2))
+ return ConstantExpr::getCompare(Pred, C1, C2) ==
+ ConstantInt::getTrue();
+
+ unsigned n1 = VN.valueNumber(V1, Top);
+ unsigned n2 = VN.valueNumber(V2, Top);
+
+ if (n1 && n2) {
+ if (n1 == n2) return Pred == ICmpInst::ICMP_EQ ||
+ Pred == ICmpInst::ICMP_ULE ||
+ Pred == ICmpInst::ICMP_UGE ||
+ Pred == ICmpInst::ICMP_SLE ||
+ Pred == ICmpInst::ICMP_SGE;
+ if (Pred == ICmpInst::ICMP_EQ) return false;
+ if (IG.isRelatedBy(n1, n2, Top, cmpInstToLattice(Pred))) return true;
+ if (VR.isRelatedBy(n1, n2, Top, cmpInstToLattice(Pred))) return true;
+ }
+
+ if ((n1 && !n2 && isa<Constant>(V2)) ||
+ (n2 && !n1 && isa<Constant>(V1))) {
+ ConstantRange CR1 = n1 ? VR.range(n1, Top) : VR.range(V1);
+ ConstantRange CR2 = n2 ? VR.range(n2, Top) : VR.range(V2);
+
+ if (Pred == ICmpInst::ICMP_EQ)
+ return CR1.isSingleElement() &&
+ CR1.getSingleElement() == CR2.getSingleElement();
+
+ return VR.isRelatedBy(CR1, CR2, cmpInstToLattice(Pred));
+ }
+ if (Pred == ICmpInst::ICMP_EQ) return V1 == V2;
+ return false;
+ }
+
+ /// add - adds a new property to the work queue
+ void add(Value *V1, Value *V2, ICmpInst::Predicate Pred,
+ Instruction *I = NULL) {
+ DOUT << "adding " << *V1 << " " << Pred << " " << *V2;
+ if (I) DOUT << " context: " << *I;
+ else DOUT << " default context (" << Top->getDFSNumIn() << ")";
+ DOUT << "\n";
+
+ assert(V1->getType() == V2->getType() &&
+ "Can't relate two values with different types.");
+
+ WorkList.push_back(Operation());
+ Operation &O = WorkList.back();
+ O.LHS = V1, O.RHS = V2, O.Op = Pred, O.ContextInst = I;
+ O.ContextBB = I ? I->getParent() : TopBB;
+ }
+
+ /// defToOps - Given an instruction definition that we've learned something
+ /// new about, find any new relationships between its operands.
+ void defToOps(Instruction *I) {
+ Instruction *NewContext = below(I) ? I : TopInst;
+ Value *Canonical = VN.canonicalize(I, Top);
+
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ const Type *Ty = BO->getType();
+ assert(!Ty->isFPOrFPVector() && "Float in work queue!");
+
+ Value *Op0 = VN.canonicalize(BO->getOperand(0), Top);
+ Value *Op1 = VN.canonicalize(BO->getOperand(1), Top);
+
+ // TODO: "and i32 -1, %x" EQ %y then %x EQ %y.
+
+ switch (BO->getOpcode()) {
+ case Instruction::And: {
+ // "and i32 %a, %b" EQ -1 then %a EQ -1 and %b EQ -1
+ ConstantInt *CI = ConstantInt::getAllOnesValue(Ty);
+ if (Canonical == CI) {
+ add(CI, Op0, ICmpInst::ICMP_EQ, NewContext);
+ add(CI, Op1, ICmpInst::ICMP_EQ, NewContext);
+ }
+ } break;
+ case Instruction::Or: {
+ // "or i32 %a, %b" EQ 0 then %a EQ 0 and %b EQ 0
+ Constant *Zero = Constant::getNullValue(Ty);
+ if (Canonical == Zero) {
+ add(Zero, Op0, ICmpInst::ICMP_EQ, NewContext);
+ add(Zero, Op1, ICmpInst::ICMP_EQ, NewContext);
+ }
+ } break;
+ case Instruction::Xor: {
+ // "xor i32 %c, %a" EQ %b then %a EQ %c ^ %b
+ // "xor i32 %c, %a" EQ %c then %a EQ 0
+ // "xor i32 %c, %a" NE %c then %a NE 0
+ // Repeat the above, with order of operands reversed.
+ Value *LHS = Op0;
+ Value *RHS = Op1;
+ if (!isa<Constant>(LHS)) std::swap(LHS, RHS);
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Canonical)) {
+ if (ConstantInt *Arg = dyn_cast<ConstantInt>(LHS)) {
+ add(RHS, ConstantInt::get(CI->getValue() ^ Arg->getValue()),
+ ICmpInst::ICMP_EQ, NewContext);
+ }
+ }
+ if (Canonical == LHS) {
+ if (isa<ConstantInt>(Canonical))
+ add(RHS, Constant::getNullValue(Ty), ICmpInst::ICMP_EQ,
+ NewContext);
+ } else if (isRelatedBy(LHS, Canonical, ICmpInst::ICMP_NE)) {
+ add(RHS, Constant::getNullValue(Ty), ICmpInst::ICMP_NE,
+ NewContext);
+ }
+ } break;
+ default:
+ break;
+ }
+ } else if (ICmpInst *IC = dyn_cast<ICmpInst>(I)) {
+ // "icmp ult i32 %a, %y" EQ true then %a u< y
+ // etc.
+
+ if (Canonical == ConstantInt::getTrue()) {
+ add(IC->getOperand(0), IC->getOperand(1), IC->getPredicate(),
+ NewContext);
+ } else if (Canonical == ConstantInt::getFalse()) {
+ add(IC->getOperand(0), IC->getOperand(1),
+ ICmpInst::getInversePredicate(IC->getPredicate()), NewContext);
+ }
+ } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ if (I->getType()->isFPOrFPVector()) return;
+
+ // Given: "%a = select i1 %x, i32 %b, i32 %c"
+ // %a EQ %b and %b NE %c then %x EQ true
+ // %a EQ %c and %b NE %c then %x EQ false
+
+ Value *True = SI->getTrueValue();
+ Value *False = SI->getFalseValue();
+ if (isRelatedBy(True, False, ICmpInst::ICMP_NE)) {
+ if (Canonical == VN.canonicalize(True, Top) ||
+ isRelatedBy(Canonical, False, ICmpInst::ICMP_NE))
+ add(SI->getCondition(), ConstantInt::getTrue(),
+ ICmpInst::ICMP_EQ, NewContext);
+ else if (Canonical == VN.canonicalize(False, Top) ||
+ isRelatedBy(Canonical, True, ICmpInst::ICMP_NE))
+ add(SI->getCondition(), ConstantInt::getFalse(),
+ ICmpInst::ICMP_EQ, NewContext);
+ }
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+ for (GetElementPtrInst::op_iterator OI = GEPI->idx_begin(),
+ OE = GEPI->idx_end(); OI != OE; ++OI) {
+ ConstantInt *Op = dyn_cast<ConstantInt>(VN.canonicalize(*OI, Top));
+ if (!Op || !Op->isZero()) return;
+ }
+ // TODO: The GEPI indices are all zero. Copy from definition to operand,
+ // jumping the type plane as needed.
+ if (isRelatedBy(GEPI, Constant::getNullValue(GEPI->getType()),
+ ICmpInst::ICMP_NE)) {
+ Value *Ptr = GEPI->getPointerOperand();
+ add(Ptr, Constant::getNullValue(Ptr->getType()), ICmpInst::ICMP_NE,
+ NewContext);
+ }
+ } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ const Type *SrcTy = CI->getSrcTy();
+
+ unsigned ci = VN.getOrInsertVN(CI, Top);
+ uint32_t W = VR.typeToWidth(SrcTy);
+ if (!W) return;
+ ConstantRange CR = VR.range(ci, Top);
+
+ if (CR.isFullSet()) return;
+
+ switch (CI->getOpcode()) {
+ default: break;
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ VR.applyRange(VN.getOrInsertVN(CI->getOperand(0), Top),
+ CR.truncate(W), Top, this);
+ break;
+ case Instruction::BitCast:
+ VR.applyRange(VN.getOrInsertVN(CI->getOperand(0), Top),
+ CR, Top, this);
+ break;
+ }
+ }
+ }
+
+ /// opsToDef - A new relationship was discovered involving one of this
+ /// instruction's operands. Find any new relationship involving the
+ /// definition, or another operand.
+ void opsToDef(Instruction *I) {
+ Instruction *NewContext = below(I) ? I : TopInst;
+
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ Value *Op0 = VN.canonicalize(BO->getOperand(0), Top);
+ Value *Op1 = VN.canonicalize(BO->getOperand(1), Top);
+
+ if (ConstantInt *CI0 = dyn_cast<ConstantInt>(Op0))
+ if (ConstantInt *CI1 = dyn_cast<ConstantInt>(Op1)) {
+ add(BO, ConstantExpr::get(BO->getOpcode(), CI0, CI1),
+ ICmpInst::ICMP_EQ, NewContext);
+ return;
+ }
+
+ // "%y = and i1 true, %x" then %x EQ %y
+ // "%y = or i1 false, %x" then %x EQ %y
+ // "%x = add i32 %y, 0" then %x EQ %y
+ // "%x = mul i32 %y, 0" then %x EQ 0
+
+ Instruction::BinaryOps Opcode = BO->getOpcode();
+ const Type *Ty = BO->getType();
+ assert(!Ty->isFPOrFPVector() && "Float in work queue!");
+
+ Constant *Zero = Constant::getNullValue(Ty);
+ Constant *One = ConstantInt::get(Ty, 1);
+ ConstantInt *AllOnes = ConstantInt::getAllOnesValue(Ty);
+
+ switch (Opcode) {
+ default: break;
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::Shl:
+ if (Op1 == Zero) {
+ add(BO, Op0, ICmpInst::ICMP_EQ, NewContext);
+ return;
+ }
+ break;
+ case Instruction::Sub:
+ if (Op1 == Zero) {
+ add(BO, Op0, ICmpInst::ICMP_EQ, NewContext);
+ return;
+ }
+ if (ConstantInt *CI0 = dyn_cast<ConstantInt>(Op0)) {
+ unsigned n_ci0 = VN.getOrInsertVN(Op1, Top);
+ ConstantRange CR = VR.range(n_ci0, Top);
+ if (!CR.isFullSet()) {
+ CR.subtract(CI0->getValue());
+ unsigned n_bo = VN.getOrInsertVN(BO, Top);
+ VR.applyRange(n_bo, CR, Top, this);
+ return;
+ }
+ }
+ if (ConstantInt *CI1 = dyn_cast<ConstantInt>(Op1)) {
+ unsigned n_ci1 = VN.getOrInsertVN(Op0, Top);
+ ConstantRange CR = VR.range(n_ci1, Top);
+ if (!CR.isFullSet()) {
+ CR.subtract(CI1->getValue());
+ unsigned n_bo = VN.getOrInsertVN(BO, Top);
+ VR.applyRange(n_bo, CR, Top, this);
+ return;
+ }
+ }
+ break;
+ case Instruction::Or:
+ if (Op0 == AllOnes || Op1 == AllOnes) {
+ add(BO, AllOnes, ICmpInst::ICMP_EQ, NewContext);
+ return;
+ }
+ if (Op0 == Zero) {
+ add(BO, Op1, ICmpInst::ICMP_EQ, NewContext);
+ return;
+ } else if (Op1 == Zero) {
+ add(BO, Op0, ICmpInst::ICMP_EQ, NewContext);
+ return;
+ }
+ break;
+ case Instruction::Add:
+ if (ConstantInt *CI0 = dyn_cast<ConstantInt>(Op0)) {
+ unsigned n_ci0 = VN.getOrInsertVN(Op1, Top);
+ ConstantRange CR = VR.range(n_ci0, Top);
+ if (!CR.isFullSet()) {
+ CR.subtract(-CI0->getValue());
+ unsigned n_bo = VN.getOrInsertVN(BO, Top);
+ VR.applyRange(n_bo, CR, Top, this);
+ return;
+ }
+ }
+ if (ConstantInt *CI1 = dyn_cast<ConstantInt>(Op1)) {
+ unsigned n_ci1 = VN.getOrInsertVN(Op0, Top);
+ ConstantRange CR = VR.range(n_ci1, Top);
+ if (!CR.isFullSet()) {
+ CR.subtract(-CI1->getValue());
+ unsigned n_bo = VN.getOrInsertVN(BO, Top);
+ VR.applyRange(n_bo, CR, Top, this);
+ return;
+ }
+ }
+ // fall-through
+ case Instruction::Xor:
+ if (Op0 == Zero) {
+ add(BO, Op1, ICmpInst::ICMP_EQ, NewContext);
+ return;
+ } else if (Op1 == Zero) {
+ add(BO, Op0, ICmpInst::ICMP_EQ, NewContext);
+ return;
+ }
+ break;
+ case Instruction::And:
+ if (Op0 == AllOnes) {
+ add(BO, Op1, ICmpInst::ICMP_EQ, NewContext);
+ return;
+ } else if (Op1 == AllOnes) {
+ add(BO, Op0, ICmpInst::ICMP_EQ, NewContext);
+ return;
+ }
+ if (Op0 == Zero || Op1 == Zero) {
+ add(BO, Zero, ICmpInst::ICMP_EQ, NewContext);
+ return;
+ }
+ break;
+ case Instruction::Mul:
+ if (Op0 == Zero || Op1 == Zero) {
+ add(BO, Zero, ICmpInst::ICMP_EQ, NewContext);
+ return;
+ }
+ if (Op0 == One) {
+ add(BO, Op1, ICmpInst::ICMP_EQ, NewContext);
+ return;
+ } else if (Op1 == One) {
+ add(BO, Op0, ICmpInst::ICMP_EQ, NewContext);
+ return;
+ }
+ break;
+ }
+
+ // "%x = add i32 %y, %z" and %x EQ %y then %z EQ 0
+ // "%x = add i32 %y, %z" and %x EQ %z then %y EQ 0
+ // "%x = shl i32 %y, %z" and %x EQ %y and %y NE 0 then %z EQ 0
+ // "%x = udiv i32 %y, %z" and %x EQ %y and %y NE 0 then %z EQ 1
+
+ Value *Known = Op0, *Unknown = Op1,
+ *TheBO = VN.canonicalize(BO, Top);
+ if (Known != TheBO) std::swap(Known, Unknown);
+ if (Known == TheBO) {
+ switch (Opcode) {
+ default: break;
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::Shl:
+ if (!isRelatedBy(Known, Zero, ICmpInst::ICMP_NE)) break;
+ // otherwise, fall-through.
+ case Instruction::Sub:
+ if (Unknown == Op0) break;
+ // otherwise, fall-through.
+ case Instruction::Xor:
+ case Instruction::Add:
+ add(Unknown, Zero, ICmpInst::ICMP_EQ, NewContext);
+ break;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ if (Unknown == Op1) break;
+ if (isRelatedBy(Known, Zero, ICmpInst::ICMP_NE))
+ add(Unknown, One, ICmpInst::ICMP_EQ, NewContext);
+ break;
+ }
+ }
+
+ // TODO: "%a = add i32 %b, 1" and %b > %z then %a >= %z.
+
+ } else if (ICmpInst *IC = dyn_cast<ICmpInst>(I)) {
+ // "%a = icmp ult i32 %b, %c" and %b u< %c then %a EQ true
+ // "%a = icmp ult i32 %b, %c" and %b u>= %c then %a EQ false
+ // etc.
+
+ Value *Op0 = VN.canonicalize(IC->getOperand(0), Top);
+ Value *Op1 = VN.canonicalize(IC->getOperand(1), Top);
+
+ ICmpInst::Predicate Pred = IC->getPredicate();
+ if (isRelatedBy(Op0, Op1, Pred))
+ add(IC, ConstantInt::getTrue(), ICmpInst::ICMP_EQ, NewContext);
+ else if (isRelatedBy(Op0, Op1, ICmpInst::getInversePredicate(Pred)))
+ add(IC, ConstantInt::getFalse(), ICmpInst::ICMP_EQ, NewContext);
+
+ } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ if (I->getType()->isFPOrFPVector()) return;
+
+ // Given: "%a = select i1 %x, i32 %b, i32 %c"
+ // %x EQ true then %a EQ %b
+ // %x EQ false then %a EQ %c
+ // %b EQ %c then %a EQ %b
+
+ Value *Canonical = VN.canonicalize(SI->getCondition(), Top);
+ if (Canonical == ConstantInt::getTrue()) {
+ add(SI, SI->getTrueValue(), ICmpInst::ICMP_EQ, NewContext);
+ } else if (Canonical == ConstantInt::getFalse()) {
+ add(SI, SI->getFalseValue(), ICmpInst::ICMP_EQ, NewContext);
+ } else if (VN.canonicalize(SI->getTrueValue(), Top) ==
+ VN.canonicalize(SI->getFalseValue(), Top)) {
+ add(SI, SI->getTrueValue(), ICmpInst::ICMP_EQ, NewContext);
+ }
+ } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ const Type *DestTy = CI->getDestTy();
+ if (DestTy->isFPOrFPVector()) return;
+
+ Value *Op = VN.canonicalize(CI->getOperand(0), Top);
+ Instruction::CastOps Opcode = CI->getOpcode();
+
+ if (Constant *C = dyn_cast<Constant>(Op)) {
+ add(CI, ConstantExpr::getCast(Opcode, C, DestTy),
+ ICmpInst::ICMP_EQ, NewContext);
+ }
+
+ uint32_t W = VR.typeToWidth(DestTy);
+ unsigned ci = VN.getOrInsertVN(CI, Top);
+ ConstantRange CR = VR.range(VN.getOrInsertVN(Op, Top), Top);
+
+ if (!CR.isFullSet()) {
+ switch (Opcode) {
+ default: break;
+ case Instruction::ZExt:
+ VR.applyRange(ci, CR.zeroExtend(W), Top, this);
+ break;
+ case Instruction::SExt:
+ VR.applyRange(ci, CR.signExtend(W), Top, this);
+ break;
+ case Instruction::Trunc: {
+ ConstantRange Result = CR.truncate(W);
+ if (!Result.isFullSet())
+ VR.applyRange(ci, Result, Top, this);
+ } break;
+ case Instruction::BitCast:
+ VR.applyRange(ci, CR, Top, this);
+ break;
+ // TODO: other casts?
+ }
+ }
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+ for (GetElementPtrInst::op_iterator OI = GEPI->idx_begin(),
+ OE = GEPI->idx_end(); OI != OE; ++OI) {
+ ConstantInt *Op = dyn_cast<ConstantInt>(VN.canonicalize(*OI, Top));
+ if (!Op || !Op->isZero()) return;
+ }
+ // TODO: The GEPI indices are all zero. Copy from operand to definition,
+ // jumping the type plane as needed.
+ Value *Ptr = GEPI->getPointerOperand();
+ if (isRelatedBy(Ptr, Constant::getNullValue(Ptr->getType()),
+ ICmpInst::ICMP_NE)) {
+ add(GEPI, Constant::getNullValue(GEPI->getType()), ICmpInst::ICMP_NE,
+ NewContext);
+ }
+ }
+ }
+
+ /// solve - process the work queue
+ void solve() {
+ //DOUT << "WorkList entry, size: " << WorkList.size() << "\n";
+ while (!WorkList.empty()) {
+ //DOUT << "WorkList size: " << WorkList.size() << "\n";
+
+ Operation &O = WorkList.front();
+ TopInst = O.ContextInst;
+ TopBB = O.ContextBB;
+ Top = DTDFS->getNodeForBlock(TopBB); // XXX move this into Context
+
+ O.LHS = VN.canonicalize(O.LHS, Top);
+ O.RHS = VN.canonicalize(O.RHS, Top);
+
+ assert(O.LHS == VN.canonicalize(O.LHS, Top) && "Canonicalize isn't.");
+ assert(O.RHS == VN.canonicalize(O.RHS, Top) && "Canonicalize isn't.");
+
+ DOUT << "solving " << *O.LHS << " " << O.Op << " " << *O.RHS;
+ if (O.ContextInst) DOUT << " context inst: " << *O.ContextInst;
+ else DOUT << " context block: " << O.ContextBB->getName();
+ DOUT << "\n";
+
+ DEBUG(VN.dump());
+ DEBUG(IG.dump());
+ DEBUG(VR.dump());
+
+ // If they're both Constant, skip it. Check for contradiction and mark
+ // the BB as unreachable if so.
+ if (Constant *CI_L = dyn_cast<Constant>(O.LHS)) {
+ if (Constant *CI_R = dyn_cast<Constant>(O.RHS)) {
+ if (ConstantExpr::getCompare(O.Op, CI_L, CI_R) ==
+ ConstantInt::getFalse())
+ UB.mark(TopBB);
+
+ WorkList.pop_front();
+ continue;
+ }
+ }
+
+ if (VN.compare(O.LHS, O.RHS)) {
+ std::swap(O.LHS, O.RHS);
+ O.Op = ICmpInst::getSwappedPredicate(O.Op);
+ }
+
+ if (O.Op == ICmpInst::ICMP_EQ) {
+ if (!makeEqual(O.RHS, O.LHS))
+ UB.mark(TopBB);
+ } else {
+ LatticeVal LV = cmpInstToLattice(O.Op);
+
+ if ((LV & EQ_BIT) &&
+ isRelatedBy(O.LHS, O.RHS, ICmpInst::getSwappedPredicate(O.Op))) {
+ if (!makeEqual(O.RHS, O.LHS))
+ UB.mark(TopBB);
+ } else {
+ if (isRelatedBy(O.LHS, O.RHS, ICmpInst::getInversePredicate(O.Op))){
+ UB.mark(TopBB);
+ WorkList.pop_front();
+ continue;
+ }
+
+ unsigned n1 = VN.getOrInsertVN(O.LHS, Top);
+ unsigned n2 = VN.getOrInsertVN(O.RHS, Top);
+
+ if (n1 == n2) {
+ if (O.Op != ICmpInst::ICMP_UGE && O.Op != ICmpInst::ICMP_ULE &&
+ O.Op != ICmpInst::ICMP_SGE && O.Op != ICmpInst::ICMP_SLE)
+ UB.mark(TopBB);
+
+ WorkList.pop_front();
+ continue;
+ }
+
+ if (VR.isRelatedBy(n1, n2, Top, LV) ||
+ IG.isRelatedBy(n1, n2, Top, LV)) {
+ WorkList.pop_front();
+ continue;
+ }
+
+ VR.addInequality(n1, n2, Top, LV, this);
+ if ((!isa<ConstantInt>(O.RHS) && !isa<ConstantInt>(O.LHS)) ||
+ LV == NE)
+ IG.addInequality(n1, n2, Top, LV);
+
+ if (Instruction *I1 = dyn_cast<Instruction>(O.LHS)) {
+ if (aboveOrBelow(I1))
+ defToOps(I1);
+ }
+ if (isa<Instruction>(O.LHS) || isa<Argument>(O.LHS)) {
+ for (Value::use_iterator UI = O.LHS->use_begin(),
+ UE = O.LHS->use_end(); UI != UE;) {
+ Use &TheUse = UI.getUse();
+ ++UI;
+ if (Instruction *I = dyn_cast<Instruction>(TheUse.getUser())) {
+ if (aboveOrBelow(I))
+ opsToDef(I);
+ }
+ }
+ }
+ if (Instruction *I2 = dyn_cast<Instruction>(O.RHS)) {
+ if (aboveOrBelow(I2))
+ defToOps(I2);
+ }
+ if (isa<Instruction>(O.RHS) || isa<Argument>(O.RHS)) {
+ for (Value::use_iterator UI = O.RHS->use_begin(),
+ UE = O.RHS->use_end(); UI != UE;) {
+ Use &TheUse = UI.getUse();
+ ++UI;
+ if (Instruction *I = dyn_cast<Instruction>(TheUse.getUser())) {
+ if (aboveOrBelow(I))
+ opsToDef(I);
+ }
+ }
+ }
+ }
+ }
+ WorkList.pop_front();
+ }
+ }
+ };
+
+ void ValueRanges::addToWorklist(Value *V, Constant *C,
+ ICmpInst::Predicate Pred, VRPSolver *VRP) {
+ VRP->add(V, C, Pred, VRP->TopInst);
+ }
+
+ void ValueRanges::markBlock(VRPSolver *VRP) {
+ VRP->UB.mark(VRP->TopBB);
+ }
+
+ /// PredicateSimplifier - This class is a simplifier that replaces
+ /// one equivalent variable with another. It also tracks what
+ /// can't be equal and will solve setcc instructions when possible.
+ /// @brief Root of the predicate simplifier optimization.
+ class VISIBILITY_HIDDEN PredicateSimplifier : public FunctionPass {
+ DomTreeDFS *DTDFS;
+ bool modified;
+ ValueNumbering *VN;
+ InequalityGraph *IG;
+ UnreachableBlocks UB;
+ ValueRanges *VR;
+
+ std::vector<DomTreeDFS::Node *> WorkList;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ PredicateSimplifier() : FunctionPass(&ID) {}
+
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(BreakCriticalEdgesID);
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<TargetData>();
+ AU.addPreserved<TargetData>();
+ }
+
+ private:
+ /// Forwards - Adds new properties to VRPSolver and uses them to
+ /// simplify instructions. Because new properties sometimes apply to
+ /// a transition from one BasicBlock to another, this will use the
+ /// PredicateSimplifier::proceedToSuccessor(s) interface to enter the
+ /// basic block.
+ /// @brief Performs abstract execution of the program.
+ class VISIBILITY_HIDDEN Forwards : public InstVisitor<Forwards> {
+ friend class InstVisitor<Forwards>;
+ PredicateSimplifier *PS;
+ DomTreeDFS::Node *DTNode;
+
+ public:
+ ValueNumbering &VN;
+ InequalityGraph &IG;
+ UnreachableBlocks &UB;
+ ValueRanges &VR;
+
+ Forwards(PredicateSimplifier *PS, DomTreeDFS::Node *DTNode)
+ : PS(PS), DTNode(DTNode), VN(*PS->VN), IG(*PS->IG), UB(PS->UB),
+ VR(*PS->VR) {}
+
+ void visitTerminatorInst(TerminatorInst &TI);
+ void visitBranchInst(BranchInst &BI);
+ void visitSwitchInst(SwitchInst &SI);
+
+ void visitAllocaInst(AllocaInst &AI);
+ void visitLoadInst(LoadInst &LI);
+ void visitStoreInst(StoreInst &SI);
+
+ void visitSExtInst(SExtInst &SI);
+ void visitZExtInst(ZExtInst &ZI);
+
+ void visitBinaryOperator(BinaryOperator &BO);
+ void visitICmpInst(ICmpInst &IC);
+ };
+
+ // Used by terminator instructions to proceed from the current basic
+ // block to the next. Verifies that "current" dominates "next",
+ // then calls visitBasicBlock.
+ void proceedToSuccessors(DomTreeDFS::Node *Current) {
+ for (DomTreeDFS::Node::iterator I = Current->begin(),
+ E = Current->end(); I != E; ++I) {
+ WorkList.push_back(*I);
+ }
+ }
+
+ void proceedToSuccessor(DomTreeDFS::Node *Next) {
+ WorkList.push_back(Next);
+ }
+
+ // Visits each instruction in the basic block.
+ void visitBasicBlock(DomTreeDFS::Node *Node) {
+ BasicBlock *BB = Node->getBlock();
+ DOUT << "Entering Basic Block: " << BB->getName()
+ << " (" << Node->getDFSNumIn() << ")\n";
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
+ visitInstruction(I++, Node);
+ }
+ }
+
+ // Tries to simplify each Instruction and add new properties.
+ void visitInstruction(Instruction *I, DomTreeDFS::Node *DT) {
+ DOUT << "Considering instruction " << *I << "\n";
+ DEBUG(VN->dump());
+ DEBUG(IG->dump());
+ DEBUG(VR->dump());
+
+ // Sometimes instructions are killed in earlier analysis.
+ if (isInstructionTriviallyDead(I)) {
+ ++NumSimple;
+ modified = true;
+ if (unsigned n = VN->valueNumber(I, DTDFS->getRootNode()))
+ if (VN->value(n) == I) IG->remove(n);
+ VN->remove(I);
+ I->eraseFromParent();
+ return;
+ }
+
+#ifndef NDEBUG
+ // Try to replace the whole instruction.
+ Value *V = VN->canonicalize(I, DT);
+ assert(V == I && "Late instruction canonicalization.");
+ if (V != I) {
+ modified = true;
+ ++NumInstruction;
+ DOUT << "Removing " << *I << ", replacing with " << *V << "\n";
+ if (unsigned n = VN->valueNumber(I, DTDFS->getRootNode()))
+ if (VN->value(n) == I) IG->remove(n);
+ VN->remove(I);
+ I->replaceAllUsesWith(V);
+ I->eraseFromParent();
+ return;
+ }
+
+ // Try to substitute operands.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Value *Oper = I->getOperand(i);
+ Value *V = VN->canonicalize(Oper, DT);
+ assert(V == Oper && "Late operand canonicalization.");
+ if (V != Oper) {
+ modified = true;
+ ++NumVarsReplaced;
+ DOUT << "Resolving " << *I;
+ I->setOperand(i, V);
+ DOUT << " into " << *I;
+ }
+ }
+#endif
+
+ std::string name = I->getParent()->getName();
+ DOUT << "push (%" << name << ")\n";
+ Forwards visit(this, DT);
+ visit.visit(*I);
+ DOUT << "pop (%" << name << ")\n";
+ }
+ };
+
+ bool PredicateSimplifier::runOnFunction(Function &F) {
+ DominatorTree *DT = &getAnalysis<DominatorTree>();
+ DTDFS = new DomTreeDFS(DT);
+ TargetData *TD = &getAnalysis<TargetData>();
+
+ DOUT << "Entering Function: " << F.getName() << "\n";
+
+ modified = false;
+ DomTreeDFS::Node *Root = DTDFS->getRootNode();
+ VN = new ValueNumbering(DTDFS);
+ IG = new InequalityGraph(*VN, Root);
+ VR = new ValueRanges(*VN, TD);
+ WorkList.push_back(Root);
+
+ do {
+ DomTreeDFS::Node *DTNode = WorkList.back();
+ WorkList.pop_back();
+ if (!UB.isDead(DTNode->getBlock())) visitBasicBlock(DTNode);
+ } while (!WorkList.empty());
+
+ delete DTDFS;
+ delete VR;
+ delete IG;
+ delete VN;
+
+ modified |= UB.kill();
+
+ return modified;
+ }
+
+ void PredicateSimplifier::Forwards::visitTerminatorInst(TerminatorInst &TI) {
+ PS->proceedToSuccessors(DTNode);
+ }
+
+ void PredicateSimplifier::Forwards::visitBranchInst(BranchInst &BI) {
+ if (BI.isUnconditional()) {
+ PS->proceedToSuccessors(DTNode);
+ return;
+ }
+
+ Value *Condition = BI.getCondition();
+ BasicBlock *TrueDest = BI.getSuccessor(0);
+ BasicBlock *FalseDest = BI.getSuccessor(1);
+
+ if (isa<Constant>(Condition) || TrueDest == FalseDest) {
+ PS->proceedToSuccessors(DTNode);
+ return;
+ }
+
+ for (DomTreeDFS::Node::iterator I = DTNode->begin(), E = DTNode->end();
+ I != E; ++I) {
+ BasicBlock *Dest = (*I)->getBlock();
+ DOUT << "Branch thinking about %" << Dest->getName()
+ << "(" << PS->DTDFS->getNodeForBlock(Dest)->getDFSNumIn() << ")\n";
+
+ if (Dest == TrueDest) {
+ DOUT << "(" << DTNode->getBlock()->getName() << ") true set:\n";
+ VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, Dest);
+ VRP.add(ConstantInt::getTrue(), Condition, ICmpInst::ICMP_EQ);
+ VRP.solve();
+ DEBUG(VN.dump());
+ DEBUG(IG.dump());
+ DEBUG(VR.dump());
+ } else if (Dest == FalseDest) {
+ DOUT << "(" << DTNode->getBlock()->getName() << ") false set:\n";
+ VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, Dest);
+ VRP.add(ConstantInt::getFalse(), Condition, ICmpInst::ICMP_EQ);
+ VRP.solve();
+ DEBUG(VN.dump());
+ DEBUG(IG.dump());
+ DEBUG(VR.dump());
+ }
+
+ PS->proceedToSuccessor(*I);
+ }
+ }
+
+ void PredicateSimplifier::Forwards::visitSwitchInst(SwitchInst &SI) {
+ Value *Condition = SI.getCondition();
+
+ // Set the EQProperty in each of the cases BBs, and the NEProperties
+ // in the default BB.
+
+ for (DomTreeDFS::Node::iterator I = DTNode->begin(), E = DTNode->end();
+ I != E; ++I) {
+ BasicBlock *BB = (*I)->getBlock();
+ DOUT << "Switch thinking about BB %" << BB->getName()
+ << "(" << PS->DTDFS->getNodeForBlock(BB)->getDFSNumIn() << ")\n";
+
+ VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, BB);
+ if (BB == SI.getDefaultDest()) {
+ for (unsigned i = 1, e = SI.getNumCases(); i < e; ++i)
+ if (SI.getSuccessor(i) != BB)
+ VRP.add(Condition, SI.getCaseValue(i), ICmpInst::ICMP_NE);
+ VRP.solve();
+ } else if (ConstantInt *CI = SI.findCaseDest(BB)) {
+ VRP.add(Condition, CI, ICmpInst::ICMP_EQ);
+ VRP.solve();
+ }
+ PS->proceedToSuccessor(*I);
+ }
+ }
+
+ void PredicateSimplifier::Forwards::visitAllocaInst(AllocaInst &AI) {
+ VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &AI);
+ VRP.add(Constant::getNullValue(AI.getType()), &AI, ICmpInst::ICMP_NE);
+ VRP.solve();
+ }
+
+ void PredicateSimplifier::Forwards::visitLoadInst(LoadInst &LI) {
+ Value *Ptr = LI.getPointerOperand();
+ // avoid "load i8* null" -> null NE null.
+ if (isa<Constant>(Ptr)) return;
+
+ VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &LI);
+ VRP.add(Constant::getNullValue(Ptr->getType()), Ptr, ICmpInst::ICMP_NE);
+ VRP.solve();
+ }
+
+ void PredicateSimplifier::Forwards::visitStoreInst(StoreInst &SI) {
+ Value *Ptr = SI.getPointerOperand();
+ if (isa<Constant>(Ptr)) return;
+
+ VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &SI);
+ VRP.add(Constant::getNullValue(Ptr->getType()), Ptr, ICmpInst::ICMP_NE);
+ VRP.solve();
+ }
+
+ void PredicateSimplifier::Forwards::visitSExtInst(SExtInst &SI) {
+ VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &SI);
+ uint32_t SrcBitWidth = cast<IntegerType>(SI.getSrcTy())->getBitWidth();
+ uint32_t DstBitWidth = cast<IntegerType>(SI.getDestTy())->getBitWidth();
+ APInt Min(APInt::getHighBitsSet(DstBitWidth, DstBitWidth-SrcBitWidth+1));
+ APInt Max(APInt::getLowBitsSet(DstBitWidth, SrcBitWidth-1));
+ VRP.add(ConstantInt::get(Min), &SI, ICmpInst::ICMP_SLE);
+ VRP.add(ConstantInt::get(Max), &SI, ICmpInst::ICMP_SGE);
+ VRP.solve();
+ }
+
+ void PredicateSimplifier::Forwards::visitZExtInst(ZExtInst &ZI) {
+ VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &ZI);
+ uint32_t SrcBitWidth = cast<IntegerType>(ZI.getSrcTy())->getBitWidth();
+ uint32_t DstBitWidth = cast<IntegerType>(ZI.getDestTy())->getBitWidth();
+ APInt Max(APInt::getLowBitsSet(DstBitWidth, SrcBitWidth));
+ VRP.add(ConstantInt::get(Max), &ZI, ICmpInst::ICMP_UGE);
+ VRP.solve();
+ }
+
+ void PredicateSimplifier::Forwards::visitBinaryOperator(BinaryOperator &BO) {
+ Instruction::BinaryOps ops = BO.getOpcode();
+
+ switch (ops) {
+ default: break;
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::UDiv:
+ case Instruction::SDiv: {
+ Value *Divisor = BO.getOperand(1);
+ VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &BO);
+ VRP.add(Constant::getNullValue(Divisor->getType()), Divisor,
+ ICmpInst::ICMP_NE);
+ VRP.solve();
+ break;
+ }
+ }
+
+ switch (ops) {
+ default: break;
+ case Instruction::Shl: {
+ VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &BO);
+ VRP.add(&BO, BO.getOperand(0), ICmpInst::ICMP_UGE);
+ VRP.solve();
+ } break;
+ case Instruction::AShr: {
+ VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &BO);
+ VRP.add(&BO, BO.getOperand(0), ICmpInst::ICMP_SLE);
+ VRP.solve();
+ } break;
+ case Instruction::LShr:
+ case Instruction::UDiv: {
+ VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &BO);
+ VRP.add(&BO, BO.getOperand(0), ICmpInst::ICMP_ULE);
+ VRP.solve();
+ } break;
+ case Instruction::URem: {
+ VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &BO);
+ VRP.add(&BO, BO.getOperand(1), ICmpInst::ICMP_ULE);
+ VRP.solve();
+ } break;
+ case Instruction::And: {
+ VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &BO);
+ VRP.add(&BO, BO.getOperand(0), ICmpInst::ICMP_ULE);
+ VRP.add(&BO, BO.getOperand(1), ICmpInst::ICMP_ULE);
+ VRP.solve();
+ } break;
+ case Instruction::Or: {
+ VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &BO);
+ VRP.add(&BO, BO.getOperand(0), ICmpInst::ICMP_UGE);
+ VRP.add(&BO, BO.getOperand(1), ICmpInst::ICMP_UGE);
+ VRP.solve();
+ } break;
+ }
+ }
+
+ void PredicateSimplifier::Forwards::visitICmpInst(ICmpInst &IC) {
+ // If possible, squeeze the ICmp predicate into something simpler.
+ // Eg., if x = [0, 4) and we're being asked icmp uge %x, 3 then change
+ // the predicate to eq.
+
+ // XXX: once we do full PHI handling, modifying the instruction in the
+ // Forwards visitor will cause missed optimizations.
+
+ ICmpInst::Predicate Pred = IC.getPredicate();
+
+ switch (Pred) {
+ default: break;
+ case ICmpInst::ICMP_ULE: Pred = ICmpInst::ICMP_ULT; break;
+ case ICmpInst::ICMP_UGE: Pred = ICmpInst::ICMP_UGT; break;
+ case ICmpInst::ICMP_SLE: Pred = ICmpInst::ICMP_SLT; break;
+ case ICmpInst::ICMP_SGE: Pred = ICmpInst::ICMP_SGT; break;
+ }
+ if (Pred != IC.getPredicate()) {
+ VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &IC);
+ if (VRP.isRelatedBy(IC.getOperand(1), IC.getOperand(0),
+ ICmpInst::ICMP_NE)) {
+ ++NumSnuggle;
+ PS->modified = true;
+ IC.setPredicate(Pred);
+ }
+ }
+
+ Pred = IC.getPredicate();
+
+ if (ConstantInt *Op1 = dyn_cast<ConstantInt>(IC.getOperand(1))) {
+ ConstantInt *NextVal = 0;
+ switch (Pred) {
+ default: break;
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_ULT:
+ if (Op1->getValue() != 0)
+ NextVal = ConstantInt::get(Op1->getValue()-1);
+ break;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_UGT:
+ if (!Op1->getValue().isAllOnesValue())
+ NextVal = ConstantInt::get(Op1->getValue()+1);
+ break;
+ }
+
+ if (NextVal) {
+ VRPSolver VRP(VN, IG, UB, VR, PS->DTDFS, PS->modified, &IC);
+ if (VRP.isRelatedBy(IC.getOperand(0), NextVal,
+ ICmpInst::getInversePredicate(Pred))) {
+ ICmpInst *NewIC = new ICmpInst(ICmpInst::ICMP_EQ, IC.getOperand(0),
+ NextVal, "", &IC);
+ NewIC->takeName(&IC);
+ IC.replaceAllUsesWith(NewIC);
+
+ // XXX: prove this isn't necessary
+ if (unsigned n = VN.valueNumber(&IC, PS->DTDFS->getRootNode()))
+ if (VN.value(n) == &IC) IG.remove(n);
+ VN.remove(&IC);
+
+ IC.eraseFromParent();
+ ++NumSnuggle;
+ PS->modified = true;
+ }
+ }
+ }
+ }
+}
+
+char PredicateSimplifier::ID = 0;
+static RegisterPass<PredicateSimplifier>
+X("predsimplify", "Predicate Simplifier");
+
+FunctionPass *llvm::createPredicateSimplifierPass() {
+ return new PredicateSimplifier();
+}
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
new file mode 100644
index 0000000..293cf92
--- /dev/null
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -0,0 +1,896 @@
+//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass reassociates commutative expressions in an order that is designed
+// to promote better constant propagation, GCSE, LICM, PRE...
+//
+// For example: 4 + (x + 5) -> x + (4 + 5)
+//
+// In the implementation of this algorithm, constants are assigned rank = 0,
+// function arguments are rank = 1, and other values are assigned ranks
+// corresponding to the reverse post order traversal of current function
+// (starting at 2), which effectively gives values in deep loops higher rank
+// than values not in loops.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reassociate"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include <algorithm>
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumLinear , "Number of insts linearized");
+STATISTIC(NumChanged, "Number of insts reassociated");
+STATISTIC(NumAnnihil, "Number of expr tree annihilated");
+STATISTIC(NumFactor , "Number of multiplies factored");
+
+namespace {
+ struct VISIBILITY_HIDDEN ValueEntry {
+ unsigned Rank;
+ Value *Op;
+ ValueEntry(unsigned R, Value *O) : Rank(R), Op(O) {}
+ };
+ inline bool operator<(const ValueEntry &LHS, const ValueEntry &RHS) {
+ return LHS.Rank > RHS.Rank; // Sort so that highest rank goes to start.
+ }
+}
+
+#ifndef NDEBUG
+/// PrintOps - Print out the expression identified in the Ops list.
+///
+static void PrintOps(Instruction *I, const std::vector<ValueEntry> &Ops) {
+ Module *M = I->getParent()->getParent()->getParent();
+ cerr << Instruction::getOpcodeName(I->getOpcode()) << " "
+ << *Ops[0].Op->getType();
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ WriteAsOperand(*cerr.stream() << " ", Ops[i].Op, false, M);
+ cerr << "," << Ops[i].Rank;
+ }
+}
+#endif
+
+namespace {
+ class VISIBILITY_HIDDEN Reassociate : public FunctionPass {
+ std::map<BasicBlock*, unsigned> RankMap;
+ std::map<AssertingVH<>, unsigned> ValueRankMap;
+ bool MadeChange;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ Reassociate() : FunctionPass(&ID) {}
+
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+ private:
+ void BuildRankMap(Function &F);
+ unsigned getRank(Value *V);
+ void ReassociateExpression(BinaryOperator *I);
+ void RewriteExprTree(BinaryOperator *I, std::vector<ValueEntry> &Ops,
+ unsigned Idx = 0);
+ Value *OptimizeExpression(BinaryOperator *I, std::vector<ValueEntry> &Ops);
+ void LinearizeExprTree(BinaryOperator *I, std::vector<ValueEntry> &Ops);
+ void LinearizeExpr(BinaryOperator *I);
+ Value *RemoveFactorFromExpression(Value *V, Value *Factor);
+ void ReassociateBB(BasicBlock *BB);
+
+ void RemoveDeadBinaryOp(Value *V);
+ };
+}
+
+char Reassociate::ID = 0;
+static RegisterPass<Reassociate> X("reassociate", "Reassociate expressions");
+
+// Public interface to the Reassociate pass
+FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
+
+void Reassociate::RemoveDeadBinaryOp(Value *V) {
+ Instruction *Op = dyn_cast<Instruction>(V);
+ if (!Op || !isa<BinaryOperator>(Op) || !isa<CmpInst>(Op) || !Op->use_empty())
+ return;
+
+ Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1);
+ RemoveDeadBinaryOp(LHS);
+ RemoveDeadBinaryOp(RHS);
+}
+
+
+static bool isUnmovableInstruction(Instruction *I) {
+ if (I->getOpcode() == Instruction::PHI ||
+ I->getOpcode() == Instruction::Alloca ||
+ I->getOpcode() == Instruction::Load ||
+ I->getOpcode() == Instruction::Malloc ||
+ I->getOpcode() == Instruction::Invoke ||
+ (I->getOpcode() == Instruction::Call &&
+ !isa<DbgInfoIntrinsic>(I)) ||
+ I->getOpcode() == Instruction::UDiv ||
+ I->getOpcode() == Instruction::SDiv ||
+ I->getOpcode() == Instruction::FDiv ||
+ I->getOpcode() == Instruction::URem ||
+ I->getOpcode() == Instruction::SRem ||
+ I->getOpcode() == Instruction::FRem)
+ return true;
+ return false;
+}
+
+void Reassociate::BuildRankMap(Function &F) {
+ unsigned i = 2;
+
+ // Assign distinct ranks to function arguments
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
+ ValueRankMap[&*I] = ++i;
+
+ ReversePostOrderTraversal<Function*> RPOT(&F);
+ for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
+ E = RPOT.end(); I != E; ++I) {
+ BasicBlock *BB = *I;
+ unsigned BBRank = RankMap[BB] = ++i << 16;
+
+ // Walk the basic block, adding precomputed ranks for any instructions that
+ // we cannot move. This ensures that the ranks for these instructions are
+ // all different in the block.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (isUnmovableInstruction(I))
+ ValueRankMap[&*I] = ++BBRank;
+ }
+}
+
+unsigned Reassociate::getRank(Value *V) {
+ if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument...
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return 0; // Otherwise it's a global or constant, rank 0.
+
+ unsigned &CachedRank = ValueRankMap[I];
+ if (CachedRank) return CachedRank; // Rank already known?
+
+ // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
+ // we can reassociate expressions for code motion! Since we do not recurse
+ // for PHI nodes, we cannot have infinite recursion here, because there
+ // cannot be loops in the value graph that do not go through PHI nodes.
+ unsigned Rank = 0, MaxRank = RankMap[I->getParent()];
+ for (unsigned i = 0, e = I->getNumOperands();
+ i != e && Rank != MaxRank; ++i)
+ Rank = std::max(Rank, getRank(I->getOperand(i)));
+
+ // If this is a not or neg instruction, do not count it for rank. This
+ // assures us that X and ~X will have the same rank.
+ if (!I->getType()->isInteger() ||
+ (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I)))
+ ++Rank;
+
+ //DOUT << "Calculated Rank[" << V->getName() << "] = "
+ // << Rank << "\n";
+
+ return CachedRank = Rank;
+}
+
+/// isReassociableOp - Return true if V is an instruction of the specified
+/// opcode and if it only has one use.
+static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
+ if ((V->hasOneUse() || V->use_empty()) && isa<Instruction>(V) &&
+ cast<Instruction>(V)->getOpcode() == Opcode)
+ return cast<BinaryOperator>(V);
+ return 0;
+}
+
+/// LowerNegateToMultiply - Replace 0-X with X*-1.
+///
+static Instruction *LowerNegateToMultiply(Instruction *Neg,
+ std::map<AssertingVH<>, unsigned> &ValueRankMap) {
+ Constant *Cst = ConstantInt::getAllOnesValue(Neg->getType());
+
+ Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);
+ ValueRankMap.erase(Neg);
+ Res->takeName(Neg);
+ Neg->replaceAllUsesWith(Res);
+ Neg->eraseFromParent();
+ return Res;
+}
+
+// Given an expression of the form '(A+B)+(D+C)', turn it into '(((A+B)+C)+D)'.
+// Note that if D is also part of the expression tree that we recurse to
+// linearize it as well. Besides that case, this does not recurse into A,B, or
+// C.
+void Reassociate::LinearizeExpr(BinaryOperator *I) {
+ BinaryOperator *LHS = cast<BinaryOperator>(I->getOperand(0));
+ BinaryOperator *RHS = cast<BinaryOperator>(I->getOperand(1));
+ assert(isReassociableOp(LHS, I->getOpcode()) &&
+ isReassociableOp(RHS, I->getOpcode()) &&
+ "Not an expression that needs linearization?");
+
+ DOUT << "Linear" << *LHS << *RHS << *I;
+
+ // Move the RHS instruction to live immediately before I, avoiding breaking
+ // dominator properties.
+ RHS->moveBefore(I);
+
+ // Move operands around to do the linearization.
+ I->setOperand(1, RHS->getOperand(0));
+ RHS->setOperand(0, LHS);
+ I->setOperand(0, RHS);
+
+ ++NumLinear;
+ MadeChange = true;
+ DOUT << "Linearized: " << *I;
+
+ // If D is part of this expression tree, tail recurse.
+ if (isReassociableOp(I->getOperand(1), I->getOpcode()))
+ LinearizeExpr(I);
+}
+
+
+/// LinearizeExprTree - Given an associative binary expression tree, traverse
+/// all of the uses putting it into canonical form. This forces a left-linear
+/// form of the the expression (((a+b)+c)+d), and collects information about the
+/// rank of the non-tree operands.
+///
+/// NOTE: These intentionally destroys the expression tree operands (turning
+/// them into undef values) to reduce #uses of the values. This means that the
+/// caller MUST use something like RewriteExprTree to put the values back in.
+///
+void Reassociate::LinearizeExprTree(BinaryOperator *I,
+ std::vector<ValueEntry> &Ops) {
+ Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
+ unsigned Opcode = I->getOpcode();
+
+ // First step, linearize the expression if it is in ((A+B)+(C+D)) form.
+ BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode);
+ BinaryOperator *RHSBO = isReassociableOp(RHS, Opcode);
+
+ // If this is a multiply expression tree and it contains internal negations,
+ // transform them into multiplies by -1 so they can be reassociated.
+ if (I->getOpcode() == Instruction::Mul) {
+ if (!LHSBO && LHS->hasOneUse() && BinaryOperator::isNeg(LHS)) {
+ LHS = LowerNegateToMultiply(cast<Instruction>(LHS), ValueRankMap);
+ LHSBO = isReassociableOp(LHS, Opcode);
+ }
+ if (!RHSBO && RHS->hasOneUse() && BinaryOperator::isNeg(RHS)) {
+ RHS = LowerNegateToMultiply(cast<Instruction>(RHS), ValueRankMap);
+ RHSBO = isReassociableOp(RHS, Opcode);
+ }
+ }
+
+ if (!LHSBO) {
+ if (!RHSBO) {
+ // Neither the LHS or RHS as part of the tree, thus this is a leaf. As
+ // such, just remember these operands and their rank.
+ Ops.push_back(ValueEntry(getRank(LHS), LHS));
+ Ops.push_back(ValueEntry(getRank(RHS), RHS));
+
+ // Clear the leaves out.
+ I->setOperand(0, UndefValue::get(I->getType()));
+ I->setOperand(1, UndefValue::get(I->getType()));
+ return;
+ } else {
+ // Turn X+(Y+Z) -> (Y+Z)+X
+ std::swap(LHSBO, RHSBO);
+ std::swap(LHS, RHS);
+ bool Success = !I->swapOperands();
+ assert(Success && "swapOperands failed");
+ Success = false;
+ MadeChange = true;
+ }
+ } else if (RHSBO) {
+ // Turn (A+B)+(C+D) -> (((A+B)+C)+D). This guarantees the the RHS is not
+ // part of the expression tree.
+ LinearizeExpr(I);
+ LHS = LHSBO = cast<BinaryOperator>(I->getOperand(0));
+ RHS = I->getOperand(1);
+ RHSBO = 0;
+ }
+
+ // Okay, now we know that the LHS is a nested expression and that the RHS is
+ // not. Perform reassociation.
+ assert(!isReassociableOp(RHS, Opcode) && "LinearizeExpr failed!");
+
+ // Move LHS right before I to make sure that the tree expression dominates all
+ // values.
+ LHSBO->moveBefore(I);
+
+ // Linearize the expression tree on the LHS.
+ LinearizeExprTree(LHSBO, Ops);
+
+ // Remember the RHS operand and its rank.
+ Ops.push_back(ValueEntry(getRank(RHS), RHS));
+
+ // Clear the RHS leaf out.
+ I->setOperand(1, UndefValue::get(I->getType()));
+}
+
+// RewriteExprTree - Now that the operands for this expression tree are
+// linearized and optimized, emit them in-order. This function is written to be
+// tail recursive.
+void Reassociate::RewriteExprTree(BinaryOperator *I,
+ std::vector<ValueEntry> &Ops,
+ unsigned i) {
+ if (i+2 == Ops.size()) {
+ if (I->getOperand(0) != Ops[i].Op ||
+ I->getOperand(1) != Ops[i+1].Op) {
+ Value *OldLHS = I->getOperand(0);
+ DOUT << "RA: " << *I;
+ I->setOperand(0, Ops[i].Op);
+ I->setOperand(1, Ops[i+1].Op);
+ DOUT << "TO: " << *I;
+ MadeChange = true;
+ ++NumChanged;
+
+ // If we reassociated a tree to fewer operands (e.g. (1+a+2) -> (a+3)
+ // delete the extra, now dead, nodes.
+ RemoveDeadBinaryOp(OldLHS);
+ }
+ return;
+ }
+ assert(i+2 < Ops.size() && "Ops index out of range!");
+
+ if (I->getOperand(1) != Ops[i].Op) {
+ DOUT << "RA: " << *I;
+ I->setOperand(1, Ops[i].Op);
+ DOUT << "TO: " << *I;
+ MadeChange = true;
+ ++NumChanged;
+ }
+
+ BinaryOperator *LHS = cast<BinaryOperator>(I->getOperand(0));
+ assert(LHS->getOpcode() == I->getOpcode() &&
+ "Improper expression tree!");
+
+ // Compactify the tree instructions together with each other to guarantee
+ // that the expression tree is dominated by all of Ops.
+ LHS->moveBefore(I);
+ RewriteExprTree(LHS, Ops, i+1);
+}
+
+
+
+// NegateValue - Insert instructions before the instruction pointed to by BI,
+// that computes the negative version of the value specified. The negative
+// version of the value is returned, and BI is left pointing at the instruction
+// that should be processed next by the reassociation pass.
+//
+static Value *NegateValue(Value *V, Instruction *BI) {
+ // We are trying to expose opportunity for reassociation. One of the things
+ // that we want to do to achieve this is to push a negation as deep into an
+ // expression chain as possible, to expose the add instructions. In practice,
+ // this means that we turn this:
+ // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D
+ // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
+ // the constants. We assume that instcombine will clean up the mess later if
+ // we introduce tons of unnecessary negation instructions...
+ //
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (I->getOpcode() == Instruction::Add && I->hasOneUse()) {
+ // Push the negates through the add.
+ I->setOperand(0, NegateValue(I->getOperand(0), BI));
+ I->setOperand(1, NegateValue(I->getOperand(1), BI));
+
+ // We must move the add instruction here, because the neg instructions do
+ // not dominate the old add instruction in general. By moving it, we are
+ // assured that the neg instructions we just inserted dominate the
+ // instruction we are about to insert after them.
+ //
+ I->moveBefore(BI);
+ I->setName(I->getName()+".neg");
+ return I;
+ }
+
+ // Insert a 'neg' instruction that subtracts the value from zero to get the
+ // negation.
+ //
+ return BinaryOperator::CreateNeg(V, V->getName() + ".neg", BI);
+}
+
+/// ShouldBreakUpSubtract - Return true if we should break up this subtract of
+/// X-Y into (X + -Y).
+static bool ShouldBreakUpSubtract(Instruction *Sub) {
+ // If this is a negation, we can't split it up!
+ if (BinaryOperator::isNeg(Sub))
+ return false;
+
+ // Don't bother to break this up unless either the LHS is an associable add or
+ // subtract or if this is only used by one.
+ if (isReassociableOp(Sub->getOperand(0), Instruction::Add) ||
+ isReassociableOp(Sub->getOperand(0), Instruction::Sub))
+ return true;
+ if (isReassociableOp(Sub->getOperand(1), Instruction::Add) ||
+ isReassociableOp(Sub->getOperand(1), Instruction::Sub))
+ return true;
+ if (Sub->hasOneUse() &&
+ (isReassociableOp(Sub->use_back(), Instruction::Add) ||
+ isReassociableOp(Sub->use_back(), Instruction::Sub)))
+ return true;
+
+ return false;
+}
+
+/// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is
+/// only used by an add, transform this into (X+(0-Y)) to promote better
+/// reassociation.
+static Instruction *BreakUpSubtract(Instruction *Sub,
+ std::map<AssertingVH<>, unsigned> &ValueRankMap) {
+ // Convert a subtract into an add and a neg instruction... so that sub
+ // instructions can be commuted with other add instructions...
+ //
+ // Calculate the negative value of Operand 1 of the sub instruction...
+ // and set it as the RHS of the add instruction we just made...
+ //
+ Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
+ Instruction *New =
+ BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub);
+ New->takeName(Sub);
+
+ // Everyone now refers to the add instruction.
+ ValueRankMap.erase(Sub);
+ Sub->replaceAllUsesWith(New);
+ Sub->eraseFromParent();
+
+ DOUT << "Negated: " << *New;
+ return New;
+}
+
+/// ConvertShiftToMul - If this is a shift of a reassociable multiply or is used
+/// by one, change this into a multiply by a constant to assist with further
+/// reassociation.
+static Instruction *ConvertShiftToMul(Instruction *Shl,
+ std::map<AssertingVH<>, unsigned> &ValueRankMap) {
+ // If an operand of this shift is a reassociable multiply, or if the shift
+ // is used by a reassociable multiply or add, turn into a multiply.
+ if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) ||
+ (Shl->hasOneUse() &&
+ (isReassociableOp(Shl->use_back(), Instruction::Mul) ||
+ isReassociableOp(Shl->use_back(), Instruction::Add)))) {
+ Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
+ MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1)));
+
+ Instruction *Mul = BinaryOperator::CreateMul(Shl->getOperand(0), MulCst,
+ "", Shl);
+ ValueRankMap.erase(Shl);
+ Mul->takeName(Shl);
+ Shl->replaceAllUsesWith(Mul);
+ Shl->eraseFromParent();
+ return Mul;
+ }
+ return 0;
+}
+
+// Scan backwards and forwards among values with the same rank as element i to
+// see if X exists. If X does not exist, return i.
+static unsigned FindInOperandList(std::vector<ValueEntry> &Ops, unsigned i,
+ Value *X) {
+ unsigned XRank = Ops[i].Rank;
+ unsigned e = Ops.size();
+ for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j)
+ if (Ops[j].Op == X)
+ return j;
+ // Scan backwards
+ for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j)
+ if (Ops[j].Op == X)
+ return j;
+ return i;
+}
+
+/// EmitAddTreeOfValues - Emit a tree of add instructions, summing Ops together
+/// and returning the result. Insert the tree before I.
+static Value *EmitAddTreeOfValues(Instruction *I, std::vector<Value*> &Ops) {
+ if (Ops.size() == 1) return Ops.back();
+
+ Value *V1 = Ops.back();
+ Ops.pop_back();
+ Value *V2 = EmitAddTreeOfValues(I, Ops);
+ return BinaryOperator::CreateAdd(V2, V1, "tmp", I);
+}
+
+/// RemoveFactorFromExpression - If V is an expression tree that is a
+/// multiplication sequence, and if this sequence contains a multiply by Factor,
+/// remove Factor from the tree and return the new tree.
+Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
+ BinaryOperator *BO = isReassociableOp(V, Instruction::Mul);
+ if (!BO) return 0;
+
+ std::vector<ValueEntry> Factors;
+ LinearizeExprTree(BO, Factors);
+
+ bool FoundFactor = false;
+ for (unsigned i = 0, e = Factors.size(); i != e; ++i)
+ if (Factors[i].Op == Factor) {
+ FoundFactor = true;
+ Factors.erase(Factors.begin()+i);
+ break;
+ }
+ if (!FoundFactor) {
+ // Make sure to restore the operands to the expression tree.
+ RewriteExprTree(BO, Factors);
+ return 0;
+ }
+
+ if (Factors.size() == 1) return Factors[0].Op;
+
+ RewriteExprTree(BO, Factors);
+ return BO;
+}
+
+/// FindSingleUseMultiplyFactors - If V is a single-use multiply, recursively
+/// add its operands as factors, otherwise add V to the list of factors.
+static void FindSingleUseMultiplyFactors(Value *V,
+ std::vector<Value*> &Factors) {
+ BinaryOperator *BO;
+ if ((!V->hasOneUse() && !V->use_empty()) ||
+ !(BO = dyn_cast<BinaryOperator>(V)) ||
+ BO->getOpcode() != Instruction::Mul) {
+ Factors.push_back(V);
+ return;
+ }
+
+ // Otherwise, add the LHS and RHS to the list of factors.
+ FindSingleUseMultiplyFactors(BO->getOperand(1), Factors);
+ FindSingleUseMultiplyFactors(BO->getOperand(0), Factors);
+}
+
+
+
+Value *Reassociate::OptimizeExpression(BinaryOperator *I,
+ std::vector<ValueEntry> &Ops) {
+ // Now that we have the linearized expression tree, try to optimize it.
+ // Start by folding any constants that we found.
+ bool IterateOptimization = false;
+ if (Ops.size() == 1) return Ops[0].Op;
+
+ unsigned Opcode = I->getOpcode();
+
+ if (Constant *V1 = dyn_cast<Constant>(Ops[Ops.size()-2].Op))
+ if (Constant *V2 = dyn_cast<Constant>(Ops.back().Op)) {
+ Ops.pop_back();
+ Ops.back().Op = ConstantExpr::get(Opcode, V1, V2);
+ return OptimizeExpression(I, Ops);
+ }
+
+ // Check for destructive annihilation due to a constant being used.
+ if (ConstantInt *CstVal = dyn_cast<ConstantInt>(Ops.back().Op))
+ switch (Opcode) {
+ default: break;
+ case Instruction::And:
+ if (CstVal->isZero()) { // ... & 0 -> 0
+ ++NumAnnihil;
+ return CstVal;
+ } else if (CstVal->isAllOnesValue()) { // ... & -1 -> ...
+ Ops.pop_back();
+ }
+ break;
+ case Instruction::Mul:
+ if (CstVal->isZero()) { // ... * 0 -> 0
+ ++NumAnnihil;
+ return CstVal;
+ } else if (cast<ConstantInt>(CstVal)->isOne()) {
+ Ops.pop_back(); // ... * 1 -> ...
+ }
+ break;
+ case Instruction::Or:
+ if (CstVal->isAllOnesValue()) { // ... | -1 -> -1
+ ++NumAnnihil;
+ return CstVal;
+ }
+ // FALLTHROUGH!
+ case Instruction::Add:
+ case Instruction::Xor:
+ if (CstVal->isZero()) // ... [|^+] 0 -> ...
+ Ops.pop_back();
+ break;
+ }
+ if (Ops.size() == 1) return Ops[0].Op;
+
+ // Handle destructive annihilation do to identities between elements in the
+ // argument list here.
+ switch (Opcode) {
+ default: break;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
+ // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ // First, check for X and ~X in the operand list.
+ assert(i < Ops.size());
+ if (BinaryOperator::isNot(Ops[i].Op)) { // Cannot occur for ^.
+ Value *X = BinaryOperator::getNotArgument(Ops[i].Op);
+ unsigned FoundX = FindInOperandList(Ops, i, X);
+ if (FoundX != i) {
+ if (Opcode == Instruction::And) { // ...&X&~X = 0
+ ++NumAnnihil;
+ return Constant::getNullValue(X->getType());
+ } else if (Opcode == Instruction::Or) { // ...|X|~X = -1
+ ++NumAnnihil;
+ return ConstantInt::getAllOnesValue(X->getType());
+ }
+ }
+ }
+
+ // Next, check for duplicate pairs of values, which we assume are next to
+ // each other, due to our sorting criteria.
+ assert(i < Ops.size());
+ if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) {
+ if (Opcode == Instruction::And || Opcode == Instruction::Or) {
+ // Drop duplicate values.
+ Ops.erase(Ops.begin()+i);
+ --i; --e;
+ IterateOptimization = true;
+ ++NumAnnihil;
+ } else {
+ assert(Opcode == Instruction::Xor);
+ if (e == 2) {
+ ++NumAnnihil;
+ return Constant::getNullValue(Ops[0].Op->getType());
+ }
+ // ... X^X -> ...
+ Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
+ i -= 1; e -= 2;
+ IterateOptimization = true;
+ ++NumAnnihil;
+ }
+ }
+ }
+ break;
+
+ case Instruction::Add:
+ // Scan the operand lists looking for X and -X pairs. If we find any, we
+ // can simplify the expression. X+-X == 0.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ assert(i < Ops.size());
+ // Check for X and -X in the operand list.
+ if (BinaryOperator::isNeg(Ops[i].Op)) {
+ Value *X = BinaryOperator::getNegArgument(Ops[i].Op);
+ unsigned FoundX = FindInOperandList(Ops, i, X);
+ if (FoundX != i) {
+ // Remove X and -X from the operand list.
+ if (Ops.size() == 2) {
+ ++NumAnnihil;
+ return Constant::getNullValue(X->getType());
+ } else {
+ Ops.erase(Ops.begin()+i);
+ if (i < FoundX)
+ --FoundX;
+ else
+ --i; // Need to back up an extra one.
+ Ops.erase(Ops.begin()+FoundX);
+ IterateOptimization = true;
+ ++NumAnnihil;
+ --i; // Revisit element.
+ e -= 2; // Removed two elements.
+ }
+ }
+ }
+ }
+
+
+ // Scan the operand list, checking to see if there are any common factors
+ // between operands. Consider something like A*A+A*B*C+D. We would like to
+ // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
+ // To efficiently find this, we count the number of times a factor occurs
+ // for any ADD operands that are MULs.
+ std::map<Value*, unsigned> FactorOccurrences;
+ unsigned MaxOcc = 0;
+ Value *MaxOccVal = 0;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op)) {
+ if (BOp->getOpcode() == Instruction::Mul && BOp->use_empty()) {
+ // Compute all of the factors of this added value.
+ std::vector<Value*> Factors;
+ FindSingleUseMultiplyFactors(BOp, Factors);
+ assert(Factors.size() > 1 && "Bad linearize!");
+
+ // Add one to FactorOccurrences for each unique factor in this op.
+ if (Factors.size() == 2) {
+ unsigned Occ = ++FactorOccurrences[Factors[0]];
+ if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factors[0]; }
+ if (Factors[0] != Factors[1]) { // Don't double count A*A.
+ Occ = ++FactorOccurrences[Factors[1]];
+ if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factors[1]; }
+ }
+ } else {
+ std::set<Value*> Duplicates;
+ for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
+ if (Duplicates.insert(Factors[i]).second) {
+ unsigned Occ = ++FactorOccurrences[Factors[i]];
+ if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factors[i]; }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // If any factor occurred more than one time, we can pull it out.
+ if (MaxOcc > 1) {
+ DOUT << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << "\n";
+
+ // Create a new instruction that uses the MaxOccVal twice. If we don't do
+ // this, we could otherwise run into situations where removing a factor
+ // from an expression will drop a use of maxocc, and this can cause
+ // RemoveFactorFromExpression on successive values to behave differently.
+ Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal);
+ std::vector<Value*> NewMulOps;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
+ NewMulOps.push_back(V);
+ Ops.erase(Ops.begin()+i);
+ --i; --e;
+ }
+ }
+
+ // No need for extra uses anymore.
+ delete DummyInst;
+
+ unsigned NumAddedValues = NewMulOps.size();
+ Value *V = EmitAddTreeOfValues(I, NewMulOps);
+ Value *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I);
+
+ // Now that we have inserted V and its sole use, optimize it. This allows
+ // us to handle cases that require multiple factoring steps, such as this:
+ // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C))
+ if (NumAddedValues > 1)
+ ReassociateExpression(cast<BinaryOperator>(V));
+
+ ++NumFactor;
+
+ if (Ops.empty())
+ return V2;
+
+ // Add the new value to the list of things being added.
+ Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
+
+ // Rewrite the tree so that there is now a use of V.
+ RewriteExprTree(I, Ops);
+ return OptimizeExpression(I, Ops);
+ }
+ break;
+ //case Instruction::Mul:
+ }
+
+ if (IterateOptimization)
+ return OptimizeExpression(I, Ops);
+ return 0;
+}
+
+
+/// ReassociateBB - Inspect all of the instructions in this basic block,
+/// reassociating them as we go.
+void Reassociate::ReassociateBB(BasicBlock *BB) {
+ for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) {
+ Instruction *BI = BBI++;
+ if (BI->getOpcode() == Instruction::Shl &&
+ isa<ConstantInt>(BI->getOperand(1)))
+ if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) {
+ MadeChange = true;
+ BI = NI;
+ }
+
+ // Reject cases where it is pointless to do this.
+ if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPoint() ||
+ isa<VectorType>(BI->getType()))
+ continue; // Floating point ops are not associative.
+
+ // If this is a subtract instruction which is not already in negate form,
+ // see if we can convert it to X+-Y.
+ if (BI->getOpcode() == Instruction::Sub) {
+ if (ShouldBreakUpSubtract(BI)) {
+ BI = BreakUpSubtract(BI, ValueRankMap);
+ MadeChange = true;
+ } else if (BinaryOperator::isNeg(BI)) {
+ // Otherwise, this is a negation. See if the operand is a multiply tree
+ // and if this is not an inner node of a multiply tree.
+ if (isReassociableOp(BI->getOperand(1), Instruction::Mul) &&
+ (!BI->hasOneUse() ||
+ !isReassociableOp(BI->use_back(), Instruction::Mul))) {
+ BI = LowerNegateToMultiply(BI, ValueRankMap);
+ MadeChange = true;
+ }
+ }
+ }
+
+ // If this instruction is a commutative binary operator, process it.
+ if (!BI->isAssociative()) continue;
+ BinaryOperator *I = cast<BinaryOperator>(BI);
+
+ // If this is an interior node of a reassociable tree, ignore it until we
+ // get to the root of the tree, to avoid N^2 analysis.
+ if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode()))
+ continue;
+
+ // If this is an add tree that is used by a sub instruction, ignore it
+ // until we process the subtract.
+ if (I->hasOneUse() && I->getOpcode() == Instruction::Add &&
+ cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub)
+ continue;
+
+ ReassociateExpression(I);
+ }
+}
+
+void Reassociate::ReassociateExpression(BinaryOperator *I) {
+
+ // First, walk the expression tree, linearizing the tree, collecting
+ std::vector<ValueEntry> Ops;
+ LinearizeExprTree(I, Ops);
+
+ DOUT << "RAIn:\t"; DEBUG(PrintOps(I, Ops)); DOUT << "\n";
+
+ // Now that we have linearized the tree to a list and have gathered all of
+ // the operands and their ranks, sort the operands by their rank. Use a
+ // stable_sort so that values with equal ranks will have their relative
+ // positions maintained (and so the compiler is deterministic). Note that
+ // this sorts so that the highest ranking values end up at the beginning of
+ // the vector.
+ std::stable_sort(Ops.begin(), Ops.end());
+
+ // OptimizeExpression - Now that we have the expression tree in a convenient
+ // sorted form, optimize it globally if possible.
+ if (Value *V = OptimizeExpression(I, Ops)) {
+ // This expression tree simplified to something that isn't a tree,
+ // eliminate it.
+ DOUT << "Reassoc to scalar: " << *V << "\n";
+ I->replaceAllUsesWith(V);
+ RemoveDeadBinaryOp(I);
+ return;
+ }
+
+ // We want to sink immediates as deeply as possible except in the case where
+ // this is a multiply tree used only by an add, and the immediate is a -1.
+ // In this case we reassociate to put the negation on the outside so that we
+ // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
+ if (I->getOpcode() == Instruction::Mul && I->hasOneUse() &&
+ cast<Instruction>(I->use_back())->getOpcode() == Instruction::Add &&
+ isa<ConstantInt>(Ops.back().Op) &&
+ cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) {
+ Ops.insert(Ops.begin(), Ops.back());
+ Ops.pop_back();
+ }
+
+ DOUT << "RAOut:\t"; DEBUG(PrintOps(I, Ops)); DOUT << "\n";
+
+ if (Ops.size() == 1) {
+ // This expression tree simplified to something that isn't a tree,
+ // eliminate it.
+ I->replaceAllUsesWith(Ops[0].Op);
+ RemoveDeadBinaryOp(I);
+ } else {
+ // Now that we ordered and optimized the expressions, splat them back into
+ // the expression tree, removing any unneeded nodes.
+ RewriteExprTree(I, Ops);
+ }
+}
+
+
+bool Reassociate::runOnFunction(Function &F) {
+ // Recalculate the rank map for F
+ BuildRankMap(F);
+
+ MadeChange = false;
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+ ReassociateBB(FI);
+
+ // We are done with the rank map...
+ RankMap.clear();
+ ValueRankMap.clear();
+ return MadeChange;
+}
+
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
new file mode 100644
index 0000000..46b2952
--- /dev/null
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -0,0 +1,125 @@
+//===- Reg2Mem.cpp - Convert registers to allocas -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file demotes all registers to memory references. It is intented to be
+// the inverse of PromoteMemoryToRegister. By converting to loads, the only
+// values live accross basic blocks are allocas and loads before phi nodes.
+// It is intended that this should make CFG hacking much easier.
+// To make later hacking easier, the entry block is split into two, such that
+// all introduced allocas and nothing else are in the entry block.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reg2mem"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CFG.h"
+#include <list>
+using namespace llvm;
+
+STATISTIC(NumRegsDemoted, "Number of registers demoted");
+STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted");
+
+namespace {
+ struct VISIBILITY_HIDDEN RegToMem : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ RegToMem() : FunctionPass(&ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(BreakCriticalEdgesID);
+ AU.addPreservedID(BreakCriticalEdgesID);
+ }
+
+ bool valueEscapes(Instruction* i) {
+ BasicBlock* bb = i->getParent();
+ for (Value::use_iterator ii = i->use_begin(), ie = i->use_end();
+ ii != ie; ++ii)
+ if (cast<Instruction>(*ii)->getParent() != bb ||
+ isa<PHINode>(*ii))
+ return true;
+ return false;
+ }
+
+ virtual bool runOnFunction(Function &F) {
+ if (!F.isDeclaration()) {
+ // Insert all new allocas into entry block.
+ BasicBlock* BBEntry = &F.getEntryBlock();
+ assert(pred_begin(BBEntry) == pred_end(BBEntry) &&
+ "Entry block to function must not have predecessors!");
+
+ // Find first non-alloca instruction and create insertion point. This is
+ // safe if block is well-formed: it always have terminator, otherwise
+ // we'll get and assertion.
+ BasicBlock::iterator I = BBEntry->begin();
+ while (isa<AllocaInst>(I)) ++I;
+
+ CastInst *AllocaInsertionPoint =
+ CastInst::Create(Instruction::BitCast,
+ Constant::getNullValue(Type::Int32Ty), Type::Int32Ty,
+ "reg2mem alloca point", I);
+
+ // Find the escaped instructions. But don't create stack slots for
+ // allocas in entry block.
+ std::list<Instruction*> worklist;
+ for (Function::iterator ibb = F.begin(), ibe = F.end();
+ ibb != ibe; ++ibb)
+ for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
+ iib != iie; ++iib) {
+ if (!(isa<AllocaInst>(iib) && iib->getParent() == BBEntry) &&
+ valueEscapes(iib)) {
+ worklist.push_front(&*iib);
+ }
+ }
+
+ // Demote escaped instructions
+ NumRegsDemoted += worklist.size();
+ for (std::list<Instruction*>::iterator ilb = worklist.begin(),
+ ile = worklist.end(); ilb != ile; ++ilb)
+ DemoteRegToStack(**ilb, false, AllocaInsertionPoint);
+
+ worklist.clear();
+
+ // Find all phi's
+ for (Function::iterator ibb = F.begin(), ibe = F.end();
+ ibb != ibe; ++ibb)
+ for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
+ iib != iie; ++iib)
+ if (isa<PHINode>(iib))
+ worklist.push_front(&*iib);
+
+ // Demote phi nodes
+ NumPhisDemoted += worklist.size();
+ for (std::list<Instruction*>::iterator ilb = worklist.begin(),
+ ile = worklist.end(); ilb != ile; ++ilb)
+ DemotePHIToStack(cast<PHINode>(*ilb), AllocaInsertionPoint);
+
+ return true;
+ }
+ return false;
+ }
+ };
+}
+
+char RegToMem::ID = 0;
+static RegisterPass<RegToMem>
+X("reg2mem", "Demote all values to stack slots");
+
+// createDemoteRegisterToMemory - Provide an entry point to create this pass.
+//
+const PassInfo *const llvm::DemoteRegisterToMemoryID = &X;
+FunctionPass *llvm::createDemoteRegisterToMemoryPass() {
+ return new RegToMem();
+}
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
new file mode 100644
index 0000000..d73519c
--- /dev/null
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -0,0 +1,1855 @@
+//===- SCCP.cpp - Sparse Conditional Constant Propagation -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements sparse conditional constant propagation and merging:
+//
+// Specifically, this:
+// * Assumes values are constant unless proven otherwise
+// * Assumes BasicBlocks are dead unless proven otherwise
+// * Proves values to be constant, and replaces them with constants
+// * Proves conditional branches to be unconditional
+//
+// Notice that:
+// * This pass has a habit of making definitions be dead. It is a good idea
+// to to run a DCE pass sometime after running this pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sccp"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumInstRemoved, "Number of instructions removed");
+STATISTIC(NumDeadBlocks , "Number of basic blocks unreachable");
+
+STATISTIC(IPNumInstRemoved, "Number of instructions removed by IPSCCP");
+STATISTIC(IPNumDeadBlocks , "Number of basic blocks unreachable by IPSCCP");
+STATISTIC(IPNumArgsElimed ,"Number of arguments constant propagated by IPSCCP");
+STATISTIC(IPNumGlobalConst, "Number of globals found to be constant by IPSCCP");
+
+namespace {
+/// LatticeVal class - This class represents the different lattice values that
+/// an LLVM value may occupy. It is a simple class with value semantics.
+///
+class VISIBILITY_HIDDEN LatticeVal {
+ enum {
+ /// undefined - This LLVM Value has no known value yet.
+ undefined,
+
+ /// constant - This LLVM Value has a specific constant value.
+ constant,
+
+ /// forcedconstant - This LLVM Value was thought to be undef until
+ /// ResolvedUndefsIn. This is treated just like 'constant', but if merged
+ /// with another (different) constant, it goes to overdefined, instead of
+ /// asserting.
+ forcedconstant,
+
+ /// overdefined - This instruction is not known to be constant, and we know
+ /// it has a value.
+ overdefined
+ } LatticeValue; // The current lattice position
+
+ Constant *ConstantVal; // If Constant value, the current value
+public:
+ inline LatticeVal() : LatticeValue(undefined), ConstantVal(0) {}
+
+ // markOverdefined - Return true if this is a new status to be in...
+ inline bool markOverdefined() {
+ if (LatticeValue != overdefined) {
+ LatticeValue = overdefined;
+ return true;
+ }
+ return false;
+ }
+
+ // markConstant - Return true if this is a new status for us.
+ inline bool markConstant(Constant *V) {
+ if (LatticeValue != constant) {
+ if (LatticeValue == undefined) {
+ LatticeValue = constant;
+ assert(V && "Marking constant with NULL");
+ ConstantVal = V;
+ } else {
+ assert(LatticeValue == forcedconstant &&
+ "Cannot move from overdefined to constant!");
+ // Stay at forcedconstant if the constant is the same.
+ if (V == ConstantVal) return false;
+
+ // Otherwise, we go to overdefined. Assumptions made based on the
+ // forced value are possibly wrong. Assuming this is another constant
+ // could expose a contradiction.
+ LatticeValue = overdefined;
+ }
+ return true;
+ } else {
+ assert(ConstantVal == V && "Marking constant with different value");
+ }
+ return false;
+ }
+
+ inline void markForcedConstant(Constant *V) {
+ assert(LatticeValue == undefined && "Can't force a defined value!");
+ LatticeValue = forcedconstant;
+ ConstantVal = V;
+ }
+
+ inline bool isUndefined() const { return LatticeValue == undefined; }
+ inline bool isConstant() const {
+ return LatticeValue == constant || LatticeValue == forcedconstant;
+ }
+ inline bool isOverdefined() const { return LatticeValue == overdefined; }
+
+ inline Constant *getConstant() const {
+ assert(isConstant() && "Cannot get the constant of a non-constant!");
+ return ConstantVal;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+//
+/// SCCPSolver - This class is a general purpose solver for Sparse Conditional
+/// Constant Propagation.
+///
+class SCCPSolver : public InstVisitor<SCCPSolver> {
+ DenseSet<BasicBlock*> BBExecutable;// The basic blocks that are executable
+ std::map<Value*, LatticeVal> ValueState; // The state each value is in.
+
+ /// GlobalValue - If we are tracking any values for the contents of a global
+ /// variable, we keep a mapping from the constant accessor to the element of
+ /// the global, to the currently known value. If the value becomes
+ /// overdefined, it's entry is simply removed from this map.
+ DenseMap<GlobalVariable*, LatticeVal> TrackedGlobals;
+
+ /// TrackedRetVals - If we are tracking arguments into and the return
+ /// value out of a function, it will have an entry in this map, indicating
+ /// what the known return value for the function is.
+ DenseMap<Function*, LatticeVal> TrackedRetVals;
+
+ /// TrackedMultipleRetVals - Same as TrackedRetVals, but used for functions
+ /// that return multiple values.
+ DenseMap<std::pair<Function*, unsigned>, LatticeVal> TrackedMultipleRetVals;
+
+ // The reason for two worklists is that overdefined is the lowest state
+ // on the lattice, and moving things to overdefined as fast as possible
+ // makes SCCP converge much faster.
+ // By having a separate worklist, we accomplish this because everything
+ // possibly overdefined will become overdefined at the soonest possible
+ // point.
+ SmallVector<Value*, 64> OverdefinedInstWorkList;
+ SmallVector<Value*, 64> InstWorkList;
+
+
+ SmallVector<BasicBlock*, 64> BBWorkList; // The BasicBlock work list
+
+ /// UsersOfOverdefinedPHIs - Keep track of any users of PHI nodes that are not
+ /// overdefined, despite the fact that the PHI node is overdefined.
+ std::multimap<PHINode*, Instruction*> UsersOfOverdefinedPHIs;
+
+ /// KnownFeasibleEdges - Entries in this set are edges which have already had
+ /// PHI nodes retriggered.
+ typedef std::pair<BasicBlock*, BasicBlock*> Edge;
+ DenseSet<Edge> KnownFeasibleEdges;
+public:
+
+ /// MarkBlockExecutable - This method can be used by clients to mark all of
+ /// the blocks that are known to be intrinsically live in the processed unit.
+ void MarkBlockExecutable(BasicBlock *BB) {
+ DOUT << "Marking Block Executable: " << BB->getNameStart() << "\n";
+ BBExecutable.insert(BB); // Basic block is executable!
+ BBWorkList.push_back(BB); // Add the block to the work list!
+ }
+
+ /// TrackValueOfGlobalVariable - Clients can use this method to
+ /// inform the SCCPSolver that it should track loads and stores to the
+ /// specified global variable if it can. This is only legal to call if
+ /// performing Interprocedural SCCP.
+ void TrackValueOfGlobalVariable(GlobalVariable *GV) {
+ const Type *ElTy = GV->getType()->getElementType();
+ if (ElTy->isFirstClassType()) {
+ LatticeVal &IV = TrackedGlobals[GV];
+ if (!isa<UndefValue>(GV->getInitializer()))
+ IV.markConstant(GV->getInitializer());
+ }
+ }
+
+ /// AddTrackedFunction - If the SCCP solver is supposed to track calls into
+ /// and out of the specified function (which cannot have its address taken),
+ /// this method must be called.
+ void AddTrackedFunction(Function *F) {
+ assert(F->hasLocalLinkage() && "Can only track internal functions!");
+ // Add an entry, F -> undef.
+ if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ TrackedMultipleRetVals.insert(std::make_pair(std::make_pair(F, i),
+ LatticeVal()));
+ } else
+ TrackedRetVals.insert(std::make_pair(F, LatticeVal()));
+ }
+
+ /// Solve - Solve for constants and executable blocks.
+ ///
+ void Solve();
+
+ /// ResolvedUndefsIn - While solving the dataflow for a function, we assume
+ /// that branches on undef values cannot reach any of their successors.
+ /// However, this is not a safe assumption. After we solve dataflow, this
+ /// method should be use to handle this. If this returns true, the solver
+ /// should be rerun.
+ bool ResolvedUndefsIn(Function &F);
+
+ bool isBlockExecutable(BasicBlock *BB) const {
+ return BBExecutable.count(BB);
+ }
+
+ /// getValueMapping - Once we have solved for constants, return the mapping of
+ /// LLVM values to LatticeVals.
+ std::map<Value*, LatticeVal> &getValueMapping() {
+ return ValueState;
+ }
+
+ /// getTrackedRetVals - Get the inferred return value map.
+ ///
+ const DenseMap<Function*, LatticeVal> &getTrackedRetVals() {
+ return TrackedRetVals;
+ }
+
+ /// getTrackedGlobals - Get and return the set of inferred initializers for
+ /// global variables.
+ const DenseMap<GlobalVariable*, LatticeVal> &getTrackedGlobals() {
+ return TrackedGlobals;
+ }
+
+ inline void markOverdefined(Value *V) {
+ markOverdefined(ValueState[V], V);
+ }
+
+private:
+ // markConstant - Make a value be marked as "constant". If the value
+ // is not already a constant, add it to the instruction work list so that
+ // the users of the instruction are updated later.
+ //
+ inline void markConstant(LatticeVal &IV, Value *V, Constant *C) {
+ if (IV.markConstant(C)) {
+ DOUT << "markConstant: " << *C << ": " << *V;
+ InstWorkList.push_back(V);
+ }
+ }
+
+ inline void markForcedConstant(LatticeVal &IV, Value *V, Constant *C) {
+ IV.markForcedConstant(C);
+ DOUT << "markForcedConstant: " << *C << ": " << *V;
+ InstWorkList.push_back(V);
+ }
+
+ inline void markConstant(Value *V, Constant *C) {
+ markConstant(ValueState[V], V, C);
+ }
+
+ // markOverdefined - Make a value be marked as "overdefined". If the
+ // value is not already overdefined, add it to the overdefined instruction
+ // work list so that the users of the instruction are updated later.
+ inline void markOverdefined(LatticeVal &IV, Value *V) {
+ if (IV.markOverdefined()) {
+ DEBUG(DOUT << "markOverdefined: ";
+ if (Function *F = dyn_cast<Function>(V))
+ DOUT << "Function '" << F->getName() << "'\n";
+ else
+ DOUT << *V);
+ // Only instructions go on the work list
+ OverdefinedInstWorkList.push_back(V);
+ }
+ }
+
+ inline void mergeInValue(LatticeVal &IV, Value *V, LatticeVal &MergeWithV) {
+ if (IV.isOverdefined() || MergeWithV.isUndefined())
+ return; // Noop.
+ if (MergeWithV.isOverdefined())
+ markOverdefined(IV, V);
+ else if (IV.isUndefined())
+ markConstant(IV, V, MergeWithV.getConstant());
+ else if (IV.getConstant() != MergeWithV.getConstant())
+ markOverdefined(IV, V);
+ }
+
+ inline void mergeInValue(Value *V, LatticeVal &MergeWithV) {
+ return mergeInValue(ValueState[V], V, MergeWithV);
+ }
+
+
+ // getValueState - Return the LatticeVal object that corresponds to the value.
+ // This function is necessary because not all values should start out in the
+ // underdefined state... Argument's should be overdefined, and
+ // constants should be marked as constants. If a value is not known to be an
+ // Instruction object, then use this accessor to get its value from the map.
+ //
+ inline LatticeVal &getValueState(Value *V) {
+ std::map<Value*, LatticeVal>::iterator I = ValueState.find(V);
+ if (I != ValueState.end()) return I->second; // Common case, in the map
+
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ if (isa<UndefValue>(V)) {
+ // Nothing to do, remain undefined.
+ } else {
+ LatticeVal &LV = ValueState[C];
+ LV.markConstant(C); // Constants are constant
+ return LV;
+ }
+ }
+ // All others are underdefined by default...
+ return ValueState[V];
+ }
+
+ // markEdgeExecutable - Mark a basic block as executable, adding it to the BB
+ // work list if it is not already executable...
+ //
+ void markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) {
+ if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second)
+ return; // This edge is already known to be executable!
+
+ if (BBExecutable.count(Dest)) {
+ DOUT << "Marking Edge Executable: " << Source->getNameStart()
+ << " -> " << Dest->getNameStart() << "\n";
+
+ // The destination is already executable, but we just made an edge
+ // feasible that wasn't before. Revisit the PHI nodes in the block
+ // because they have potentially new operands.
+ for (BasicBlock::iterator I = Dest->begin(); isa<PHINode>(I); ++I)
+ visitPHINode(*cast<PHINode>(I));
+
+ } else {
+ MarkBlockExecutable(Dest);
+ }
+ }
+
+ // getFeasibleSuccessors - Return a vector of booleans to indicate which
+ // successors are reachable from a given terminator instruction.
+ //
+ void getFeasibleSuccessors(TerminatorInst &TI, SmallVector<bool, 16> &Succs);
+
+ // isEdgeFeasible - Return true if the control flow edge from the 'From' basic
+ // block to the 'To' basic block is currently feasible...
+ //
+ bool isEdgeFeasible(BasicBlock *From, BasicBlock *To);
+
+ // OperandChangedState - This method is invoked on all of the users of an
+ // instruction that was just changed state somehow.... Based on this
+ // information, we need to update the specified user of this instruction.
+ //
+ void OperandChangedState(User *U) {
+ // Only instructions use other variable values!
+ Instruction &I = cast<Instruction>(*U);
+ if (BBExecutable.count(I.getParent())) // Inst is executable?
+ visit(I);
+ }
+
+private:
+ friend class InstVisitor<SCCPSolver>;
+
+ // visit implementations - Something changed in this instruction... Either an
+ // operand made a transition, or the instruction is newly executable. Change
+ // the value type of I to reflect these changes if appropriate.
+ //
+ void visitPHINode(PHINode &I);
+
+ // Terminators
+ void visitReturnInst(ReturnInst &I);
+ void visitTerminatorInst(TerminatorInst &TI);
+
+ void visitCastInst(CastInst &I);
+ void visitSelectInst(SelectInst &I);
+ void visitBinaryOperator(Instruction &I);
+ void visitCmpInst(CmpInst &I);
+ void visitExtractElementInst(ExtractElementInst &I);
+ void visitInsertElementInst(InsertElementInst &I);
+ void visitShuffleVectorInst(ShuffleVectorInst &I);
+ void visitExtractValueInst(ExtractValueInst &EVI);
+ void visitInsertValueInst(InsertValueInst &IVI);
+
+ // Instructions that cannot be folded away...
+ void visitStoreInst (Instruction &I);
+ void visitLoadInst (LoadInst &I);
+ void visitGetElementPtrInst(GetElementPtrInst &I);
+ void visitCallInst (CallInst &I) { visitCallSite(CallSite::get(&I)); }
+ void visitInvokeInst (InvokeInst &II) {
+ visitCallSite(CallSite::get(&II));
+ visitTerminatorInst(II);
+ }
+ void visitCallSite (CallSite CS);
+ void visitUnwindInst (TerminatorInst &I) { /*returns void*/ }
+ void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
+ void visitAllocationInst(Instruction &I) { markOverdefined(&I); }
+ void visitVANextInst (Instruction &I) { markOverdefined(&I); }
+ void visitVAArgInst (Instruction &I) { markOverdefined(&I); }
+ void visitFreeInst (Instruction &I) { /*returns void*/ }
+
+ void visitInstruction(Instruction &I) {
+ // If a new instruction is added to LLVM that we don't handle...
+ cerr << "SCCP: Don't know how to handle: " << I;
+ markOverdefined(&I); // Just in case
+ }
+};
+
+} // end anonymous namespace
+
+
+// getFeasibleSuccessors - Return a vector of booleans to indicate which
+// successors are reachable from a given terminator instruction.
+//
+void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
+ SmallVector<bool, 16> &Succs) {
+ Succs.resize(TI.getNumSuccessors());
+ if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) {
+ if (BI->isUnconditional()) {
+ Succs[0] = true;
+ } else {
+ LatticeVal &BCValue = getValueState(BI->getCondition());
+ if (BCValue.isOverdefined() ||
+ (BCValue.isConstant() && !isa<ConstantInt>(BCValue.getConstant()))) {
+ // Overdefined condition variables, and branches on unfoldable constant
+ // conditions, mean the branch could go either way.
+ Succs[0] = Succs[1] = true;
+ } else if (BCValue.isConstant()) {
+ // Constant condition variables mean the branch can only go a single way
+ Succs[BCValue.getConstant() == ConstantInt::getFalse()] = true;
+ }
+ }
+ } else if (isa<InvokeInst>(&TI)) {
+ // Invoke instructions successors are always executable.
+ Succs[0] = Succs[1] = true;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(&TI)) {
+ LatticeVal &SCValue = getValueState(SI->getCondition());
+ if (SCValue.isOverdefined() || // Overdefined condition?
+ (SCValue.isConstant() && !isa<ConstantInt>(SCValue.getConstant()))) {
+ // All destinations are executable!
+ Succs.assign(TI.getNumSuccessors(), true);
+ } else if (SCValue.isConstant())
+ Succs[SI->findCaseValue(cast<ConstantInt>(SCValue.getConstant()))] = true;
+ } else {
+ assert(0 && "SCCP: Don't know how to handle this terminator!");
+ }
+}
+
+
+// isEdgeFeasible - Return true if the control flow edge from the 'From' basic
+// block to the 'To' basic block is currently feasible...
+//
+bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
+ assert(BBExecutable.count(To) && "Dest should always be alive!");
+
+ // Make sure the source basic block is executable!!
+ if (!BBExecutable.count(From)) return false;
+
+ // Check to make sure this edge itself is actually feasible now...
+ TerminatorInst *TI = From->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isUnconditional())
+ return true;
+ else {
+ LatticeVal &BCValue = getValueState(BI->getCondition());
+ if (BCValue.isOverdefined()) {
+ // Overdefined condition variables mean the branch could go either way.
+ return true;
+ } else if (BCValue.isConstant()) {
+ // Not branching on an evaluatable constant?
+ if (!isa<ConstantInt>(BCValue.getConstant())) return true;
+
+ // Constant condition variables mean the branch can only go a single way
+ return BI->getSuccessor(BCValue.getConstant() ==
+ ConstantInt::getFalse()) == To;
+ }
+ return false;
+ }
+ } else if (isa<InvokeInst>(TI)) {
+ // Invoke instructions successors are always executable.
+ return true;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ LatticeVal &SCValue = getValueState(SI->getCondition());
+ if (SCValue.isOverdefined()) { // Overdefined condition?
+ // All destinations are executable!
+ return true;
+ } else if (SCValue.isConstant()) {
+ Constant *CPV = SCValue.getConstant();
+ if (!isa<ConstantInt>(CPV))
+ return true; // not a foldable constant?
+
+ // Make sure to skip the "default value" which isn't a value
+ for (unsigned i = 1, E = SI->getNumSuccessors(); i != E; ++i)
+ if (SI->getSuccessorValue(i) == CPV) // Found the taken branch...
+ return SI->getSuccessor(i) == To;
+
+ // Constant value not equal to any of the branches... must execute
+ // default branch then...
+ return SI->getDefaultDest() == To;
+ }
+ return false;
+ } else {
+ cerr << "Unknown terminator instruction: " << *TI;
+ abort();
+ }
+}
+
+// visit Implementations - Something changed in this instruction... Either an
+// operand made a transition, or the instruction is newly executable. Change
+// the value type of I to reflect these changes if appropriate. This method
+// makes sure to do the following actions:
+//
+// 1. If a phi node merges two constants in, and has conflicting value coming
+// from different branches, or if the PHI node merges in an overdefined
+// value, then the PHI node becomes overdefined.
+// 2. If a phi node merges only constants in, and they all agree on value, the
+// PHI node becomes a constant value equal to that.
+// 3. If V <- x (op) y && isConstant(x) && isConstant(y) V = Constant
+// 4. If V <- x (op) y && (isOverdefined(x) || isOverdefined(y)) V = Overdefined
+// 5. If V <- MEM or V <- CALL or V <- (unknown) then V = Overdefined
+// 6. If a conditional branch has a value that is constant, make the selected
+// destination executable
+// 7. If a conditional branch has a value that is overdefined, make all
+// successors executable.
+//
+void SCCPSolver::visitPHINode(PHINode &PN) {
+ LatticeVal &PNIV = getValueState(&PN);
+ if (PNIV.isOverdefined()) {
+ // There may be instructions using this PHI node that are not overdefined
+ // themselves. If so, make sure that they know that the PHI node operand
+ // changed.
+ std::multimap<PHINode*, Instruction*>::iterator I, E;
+ tie(I, E) = UsersOfOverdefinedPHIs.equal_range(&PN);
+ if (I != E) {
+ SmallVector<Instruction*, 16> Users;
+ for (; I != E; ++I) Users.push_back(I->second);
+ while (!Users.empty()) {
+ visit(Users.back());
+ Users.pop_back();
+ }
+ }
+ return; // Quick exit
+ }
+
+ // Super-extra-high-degree PHI nodes are unlikely to ever be marked constant,
+ // and slow us down a lot. Just mark them overdefined.
+ if (PN.getNumIncomingValues() > 64) {
+ markOverdefined(PNIV, &PN);
+ return;
+ }
+
+ // Look at all of the executable operands of the PHI node. If any of them
+ // are overdefined, the PHI becomes overdefined as well. If they are all
+ // constant, and they agree with each other, the PHI becomes the identical
+ // constant. If they are constant and don't agree, the PHI is overdefined.
+ // If there are no executable operands, the PHI remains undefined.
+ //
+ Constant *OperandVal = 0;
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ LatticeVal &IV = getValueState(PN.getIncomingValue(i));
+ if (IV.isUndefined()) continue; // Doesn't influence PHI node.
+
+ if (isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent())) {
+ if (IV.isOverdefined()) { // PHI node becomes overdefined!
+ markOverdefined(&PN);
+ return;
+ }
+
+ if (OperandVal == 0) { // Grab the first value...
+ OperandVal = IV.getConstant();
+ } else { // Another value is being merged in!
+ // There is already a reachable operand. If we conflict with it,
+ // then the PHI node becomes overdefined. If we agree with it, we
+ // can continue on.
+
+ // Check to see if there are two different constants merging...
+ if (IV.getConstant() != OperandVal) {
+ // Yes there is. This means the PHI node is not constant.
+ // You must be overdefined poor PHI.
+ //
+ markOverdefined(&PN); // The PHI node now becomes overdefined
+ return; // I'm done analyzing you
+ }
+ }
+ }
+ }
+
+ // If we exited the loop, this means that the PHI node only has constant
+ // arguments that agree with each other(and OperandVal is the constant) or
+ // OperandVal is null because there are no defined incoming arguments. If
+ // this is the case, the PHI remains undefined.
+ //
+ if (OperandVal)
+ markConstant(&PN, OperandVal); // Acquire operand value
+}
+
+void SCCPSolver::visitReturnInst(ReturnInst &I) {
+ if (I.getNumOperands() == 0) return; // Ret void
+
+ Function *F = I.getParent()->getParent();
+ // If we are tracking the return value of this function, merge it in.
+ if (!F->hasLocalLinkage())
+ return;
+
+ if (!TrackedRetVals.empty() && I.getNumOperands() == 1) {
+ DenseMap<Function*, LatticeVal>::iterator TFRVI =
+ TrackedRetVals.find(F);
+ if (TFRVI != TrackedRetVals.end() &&
+ !TFRVI->second.isOverdefined()) {
+ LatticeVal &IV = getValueState(I.getOperand(0));
+ mergeInValue(TFRVI->second, F, IV);
+ return;
+ }
+ }
+
+ // Handle functions that return multiple values.
+ if (!TrackedMultipleRetVals.empty() && I.getNumOperands() > 1) {
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ DenseMap<std::pair<Function*, unsigned>, LatticeVal>::iterator
+ It = TrackedMultipleRetVals.find(std::make_pair(F, i));
+ if (It == TrackedMultipleRetVals.end()) break;
+ mergeInValue(It->second, F, getValueState(I.getOperand(i)));
+ }
+ } else if (!TrackedMultipleRetVals.empty() &&
+ I.getNumOperands() == 1 &&
+ isa<StructType>(I.getOperand(0)->getType())) {
+ for (unsigned i = 0, e = I.getOperand(0)->getType()->getNumContainedTypes();
+ i != e; ++i) {
+ DenseMap<std::pair<Function*, unsigned>, LatticeVal>::iterator
+ It = TrackedMultipleRetVals.find(std::make_pair(F, i));
+ if (It == TrackedMultipleRetVals.end()) break;
+ Value *Val = FindInsertedValue(I.getOperand(0), i);
+ mergeInValue(It->second, F, getValueState(Val));
+ }
+ }
+}
+
+void SCCPSolver::visitTerminatorInst(TerminatorInst &TI) {
+ SmallVector<bool, 16> SuccFeasible;
+ getFeasibleSuccessors(TI, SuccFeasible);
+
+ BasicBlock *BB = TI.getParent();
+
+ // Mark all feasible successors executable...
+ for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i)
+ if (SuccFeasible[i])
+ markEdgeExecutable(BB, TI.getSuccessor(i));
+}
+
+void SCCPSolver::visitCastInst(CastInst &I) {
+ Value *V = I.getOperand(0);
+ LatticeVal &VState = getValueState(V);
+ if (VState.isOverdefined()) // Inherit overdefinedness of operand
+ markOverdefined(&I);
+ else if (VState.isConstant()) // Propagate constant value
+ markConstant(&I, ConstantExpr::getCast(I.getOpcode(),
+ VState.getConstant(), I.getType()));
+}
+
+void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
+ Value *Aggr = EVI.getAggregateOperand();
+
+ // If the operand to the extractvalue is an undef, the result is undef.
+ if (isa<UndefValue>(Aggr))
+ return;
+
+ // Currently only handle single-index extractvalues.
+ if (EVI.getNumIndices() != 1) {
+ markOverdefined(&EVI);
+ return;
+ }
+
+ Function *F = 0;
+ if (CallInst *CI = dyn_cast<CallInst>(Aggr))
+ F = CI->getCalledFunction();
+ else if (InvokeInst *II = dyn_cast<InvokeInst>(Aggr))
+ F = II->getCalledFunction();
+
+ // TODO: If IPSCCP resolves the callee of this function, we could propagate a
+ // result back!
+ if (F == 0 || TrackedMultipleRetVals.empty()) {
+ markOverdefined(&EVI);
+ return;
+ }
+
+ // See if we are tracking the result of the callee. If not tracking this
+ // function (for example, it is a declaration) just move to overdefined.
+ if (!TrackedMultipleRetVals.count(std::make_pair(F, *EVI.idx_begin()))) {
+ markOverdefined(&EVI);
+ return;
+ }
+
+ // Otherwise, the value will be merged in here as a result of CallSite
+ // handling.
+}
+
+void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
+ Value *Aggr = IVI.getAggregateOperand();
+ Value *Val = IVI.getInsertedValueOperand();
+
+ // If the operands to the insertvalue are undef, the result is undef.
+ if (isa<UndefValue>(Aggr) && isa<UndefValue>(Val))
+ return;
+
+ // Currently only handle single-index insertvalues.
+ if (IVI.getNumIndices() != 1) {
+ markOverdefined(&IVI);
+ return;
+ }
+
+ // Currently only handle insertvalue instructions that are in a single-use
+ // chain that builds up a return value.
+ for (const InsertValueInst *TmpIVI = &IVI; ; ) {
+ if (!TmpIVI->hasOneUse()) {
+ markOverdefined(&IVI);
+ return;
+ }
+ const Value *V = *TmpIVI->use_begin();
+ if (isa<ReturnInst>(V))
+ break;
+ TmpIVI = dyn_cast<InsertValueInst>(V);
+ if (!TmpIVI) {
+ markOverdefined(&IVI);
+ return;
+ }
+ }
+
+ // See if we are tracking the result of the callee.
+ Function *F = IVI.getParent()->getParent();
+ DenseMap<std::pair<Function*, unsigned>, LatticeVal>::iterator
+ It = TrackedMultipleRetVals.find(std::make_pair(F, *IVI.idx_begin()));
+
+ // Merge in the inserted member value.
+ if (It != TrackedMultipleRetVals.end())
+ mergeInValue(It->second, F, getValueState(Val));
+
+ // Mark the aggregate result of the IVI overdefined; any tracking that we do
+ // will be done on the individual member values.
+ markOverdefined(&IVI);
+}
+
+void SCCPSolver::visitSelectInst(SelectInst &I) {
+ LatticeVal &CondValue = getValueState(I.getCondition());
+ if (CondValue.isUndefined())
+ return;
+ if (CondValue.isConstant()) {
+ if (ConstantInt *CondCB = dyn_cast<ConstantInt>(CondValue.getConstant())){
+ mergeInValue(&I, getValueState(CondCB->getZExtValue() ? I.getTrueValue()
+ : I.getFalseValue()));
+ return;
+ }
+ }
+
+ // Otherwise, the condition is overdefined or a constant we can't evaluate.
+ // See if we can produce something better than overdefined based on the T/F
+ // value.
+ LatticeVal &TVal = getValueState(I.getTrueValue());
+ LatticeVal &FVal = getValueState(I.getFalseValue());
+
+ // select ?, C, C -> C.
+ if (TVal.isConstant() && FVal.isConstant() &&
+ TVal.getConstant() == FVal.getConstant()) {
+ markConstant(&I, FVal.getConstant());
+ return;
+ }
+
+ if (TVal.isUndefined()) { // select ?, undef, X -> X.
+ mergeInValue(&I, FVal);
+ } else if (FVal.isUndefined()) { // select ?, X, undef -> X.
+ mergeInValue(&I, TVal);
+ } else {
+ markOverdefined(&I);
+ }
+}
+
+// Handle BinaryOperators and Shift Instructions...
+void SCCPSolver::visitBinaryOperator(Instruction &I) {
+ LatticeVal &IV = ValueState[&I];
+ if (IV.isOverdefined()) return;
+
+ LatticeVal &V1State = getValueState(I.getOperand(0));
+ LatticeVal &V2State = getValueState(I.getOperand(1));
+
+ if (V1State.isOverdefined() || V2State.isOverdefined()) {
+ // If this is an AND or OR with 0 or -1, it doesn't matter that the other
+ // operand is overdefined.
+ if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Or) {
+ LatticeVal *NonOverdefVal = 0;
+ if (!V1State.isOverdefined()) {
+ NonOverdefVal = &V1State;
+ } else if (!V2State.isOverdefined()) {
+ NonOverdefVal = &V2State;
+ }
+
+ if (NonOverdefVal) {
+ if (NonOverdefVal->isUndefined()) {
+ // Could annihilate value.
+ if (I.getOpcode() == Instruction::And)
+ markConstant(IV, &I, Constant::getNullValue(I.getType()));
+ else if (const VectorType *PT = dyn_cast<VectorType>(I.getType()))
+ markConstant(IV, &I, ConstantVector::getAllOnesValue(PT));
+ else
+ markConstant(IV, &I, ConstantInt::getAllOnesValue(I.getType()));
+ return;
+ } else {
+ if (I.getOpcode() == Instruction::And) {
+ if (NonOverdefVal->getConstant()->isNullValue()) {
+ markConstant(IV, &I, NonOverdefVal->getConstant());
+ return; // X and 0 = 0
+ }
+ } else {
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(NonOverdefVal->getConstant()))
+ if (CI->isAllOnesValue()) {
+ markConstant(IV, &I, NonOverdefVal->getConstant());
+ return; // X or -1 = -1
+ }
+ }
+ }
+ }
+ }
+
+
+ // If both operands are PHI nodes, it is possible that this instruction has
+ // a constant value, despite the fact that the PHI node doesn't. Check for
+ // this condition now.
+ if (PHINode *PN1 = dyn_cast<PHINode>(I.getOperand(0)))
+ if (PHINode *PN2 = dyn_cast<PHINode>(I.getOperand(1)))
+ if (PN1->getParent() == PN2->getParent()) {
+ // Since the two PHI nodes are in the same basic block, they must have
+ // entries for the same predecessors. Walk the predecessor list, and
+ // if all of the incoming values are constants, and the result of
+ // evaluating this expression with all incoming value pairs is the
+ // same, then this expression is a constant even though the PHI node
+ // is not a constant!
+ LatticeVal Result;
+ for (unsigned i = 0, e = PN1->getNumIncomingValues(); i != e; ++i) {
+ LatticeVal &In1 = getValueState(PN1->getIncomingValue(i));
+ BasicBlock *InBlock = PN1->getIncomingBlock(i);
+ LatticeVal &In2 =
+ getValueState(PN2->getIncomingValueForBlock(InBlock));
+
+ if (In1.isOverdefined() || In2.isOverdefined()) {
+ Result.markOverdefined();
+ break; // Cannot fold this operation over the PHI nodes!
+ } else if (In1.isConstant() && In2.isConstant()) {
+ Constant *V = ConstantExpr::get(I.getOpcode(), In1.getConstant(),
+ In2.getConstant());
+ if (Result.isUndefined())
+ Result.markConstant(V);
+ else if (Result.isConstant() && Result.getConstant() != V) {
+ Result.markOverdefined();
+ break;
+ }
+ }
+ }
+
+ // If we found a constant value here, then we know the instruction is
+ // constant despite the fact that the PHI nodes are overdefined.
+ if (Result.isConstant()) {
+ markConstant(IV, &I, Result.getConstant());
+ // Remember that this instruction is virtually using the PHI node
+ // operands.
+ UsersOfOverdefinedPHIs.insert(std::make_pair(PN1, &I));
+ UsersOfOverdefinedPHIs.insert(std::make_pair(PN2, &I));
+ return;
+ } else if (Result.isUndefined()) {
+ return;
+ }
+
+ // Okay, this really is overdefined now. Since we might have
+ // speculatively thought that this was not overdefined before, and
+ // added ourselves to the UsersOfOverdefinedPHIs list for the PHIs,
+ // make sure to clean out any entries that we put there, for
+ // efficiency.
+ std::multimap<PHINode*, Instruction*>::iterator It, E;
+ tie(It, E) = UsersOfOverdefinedPHIs.equal_range(PN1);
+ while (It != E) {
+ if (It->second == &I) {
+ UsersOfOverdefinedPHIs.erase(It++);
+ } else
+ ++It;
+ }
+ tie(It, E) = UsersOfOverdefinedPHIs.equal_range(PN2);
+ while (It != E) {
+ if (It->second == &I) {
+ UsersOfOverdefinedPHIs.erase(It++);
+ } else
+ ++It;
+ }
+ }
+
+ markOverdefined(IV, &I);
+ } else if (V1State.isConstant() && V2State.isConstant()) {
+ markConstant(IV, &I, ConstantExpr::get(I.getOpcode(), V1State.getConstant(),
+ V2State.getConstant()));
+ }
+}
+
+// Handle ICmpInst instruction...
+void SCCPSolver::visitCmpInst(CmpInst &I) {
+ LatticeVal &IV = ValueState[&I];
+ if (IV.isOverdefined()) return;
+
+ LatticeVal &V1State = getValueState(I.getOperand(0));
+ LatticeVal &V2State = getValueState(I.getOperand(1));
+
+ if (V1State.isOverdefined() || V2State.isOverdefined()) {
+ // If both operands are PHI nodes, it is possible that this instruction has
+ // a constant value, despite the fact that the PHI node doesn't. Check for
+ // this condition now.
+ if (PHINode *PN1 = dyn_cast<PHINode>(I.getOperand(0)))
+ if (PHINode *PN2 = dyn_cast<PHINode>(I.getOperand(1)))
+ if (PN1->getParent() == PN2->getParent()) {
+ // Since the two PHI nodes are in the same basic block, they must have
+ // entries for the same predecessors. Walk the predecessor list, and
+ // if all of the incoming values are constants, and the result of
+ // evaluating this expression with all incoming value pairs is the
+ // same, then this expression is a constant even though the PHI node
+ // is not a constant!
+ LatticeVal Result;
+ for (unsigned i = 0, e = PN1->getNumIncomingValues(); i != e; ++i) {
+ LatticeVal &In1 = getValueState(PN1->getIncomingValue(i));
+ BasicBlock *InBlock = PN1->getIncomingBlock(i);
+ LatticeVal &In2 =
+ getValueState(PN2->getIncomingValueForBlock(InBlock));
+
+ if (In1.isOverdefined() || In2.isOverdefined()) {
+ Result.markOverdefined();
+ break; // Cannot fold this operation over the PHI nodes!
+ } else if (In1.isConstant() && In2.isConstant()) {
+ Constant *V = ConstantExpr::getCompare(I.getPredicate(),
+ In1.getConstant(),
+ In2.getConstant());
+ if (Result.isUndefined())
+ Result.markConstant(V);
+ else if (Result.isConstant() && Result.getConstant() != V) {
+ Result.markOverdefined();
+ break;
+ }
+ }
+ }
+
+ // If we found a constant value here, then we know the instruction is
+ // constant despite the fact that the PHI nodes are overdefined.
+ if (Result.isConstant()) {
+ markConstant(IV, &I, Result.getConstant());
+ // Remember that this instruction is virtually using the PHI node
+ // operands.
+ UsersOfOverdefinedPHIs.insert(std::make_pair(PN1, &I));
+ UsersOfOverdefinedPHIs.insert(std::make_pair(PN2, &I));
+ return;
+ } else if (Result.isUndefined()) {
+ return;
+ }
+
+ // Okay, this really is overdefined now. Since we might have
+ // speculatively thought that this was not overdefined before, and
+ // added ourselves to the UsersOfOverdefinedPHIs list for the PHIs,
+ // make sure to clean out any entries that we put there, for
+ // efficiency.
+ std::multimap<PHINode*, Instruction*>::iterator It, E;
+ tie(It, E) = UsersOfOverdefinedPHIs.equal_range(PN1);
+ while (It != E) {
+ if (It->second == &I) {
+ UsersOfOverdefinedPHIs.erase(It++);
+ } else
+ ++It;
+ }
+ tie(It, E) = UsersOfOverdefinedPHIs.equal_range(PN2);
+ while (It != E) {
+ if (It->second == &I) {
+ UsersOfOverdefinedPHIs.erase(It++);
+ } else
+ ++It;
+ }
+ }
+
+ markOverdefined(IV, &I);
+ } else if (V1State.isConstant() && V2State.isConstant()) {
+ markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(),
+ V1State.getConstant(),
+ V2State.getConstant()));
+ }
+}
+
+void SCCPSolver::visitExtractElementInst(ExtractElementInst &I) {
+ // FIXME : SCCP does not handle vectors properly.
+ markOverdefined(&I);
+ return;
+
+#if 0
+ LatticeVal &ValState = getValueState(I.getOperand(0));
+ LatticeVal &IdxState = getValueState(I.getOperand(1));
+
+ if (ValState.isOverdefined() || IdxState.isOverdefined())
+ markOverdefined(&I);
+ else if(ValState.isConstant() && IdxState.isConstant())
+ markConstant(&I, ConstantExpr::getExtractElement(ValState.getConstant(),
+ IdxState.getConstant()));
+#endif
+}
+
+void SCCPSolver::visitInsertElementInst(InsertElementInst &I) {
+ // FIXME : SCCP does not handle vectors properly.
+ markOverdefined(&I);
+ return;
+#if 0
+ LatticeVal &ValState = getValueState(I.getOperand(0));
+ LatticeVal &EltState = getValueState(I.getOperand(1));
+ LatticeVal &IdxState = getValueState(I.getOperand(2));
+
+ if (ValState.isOverdefined() || EltState.isOverdefined() ||
+ IdxState.isOverdefined())
+ markOverdefined(&I);
+ else if(ValState.isConstant() && EltState.isConstant() &&
+ IdxState.isConstant())
+ markConstant(&I, ConstantExpr::getInsertElement(ValState.getConstant(),
+ EltState.getConstant(),
+ IdxState.getConstant()));
+ else if (ValState.isUndefined() && EltState.isConstant() &&
+ IdxState.isConstant())
+ markConstant(&I,ConstantExpr::getInsertElement(UndefValue::get(I.getType()),
+ EltState.getConstant(),
+ IdxState.getConstant()));
+#endif
+}
+
+void SCCPSolver::visitShuffleVectorInst(ShuffleVectorInst &I) {
+ // FIXME : SCCP does not handle vectors properly.
+ markOverdefined(&I);
+ return;
+#if 0
+ LatticeVal &V1State = getValueState(I.getOperand(0));
+ LatticeVal &V2State = getValueState(I.getOperand(1));
+ LatticeVal &MaskState = getValueState(I.getOperand(2));
+
+ if (MaskState.isUndefined() ||
+ (V1State.isUndefined() && V2State.isUndefined()))
+ return; // Undefined output if mask or both inputs undefined.
+
+ if (V1State.isOverdefined() || V2State.isOverdefined() ||
+ MaskState.isOverdefined()) {
+ markOverdefined(&I);
+ } else {
+ // A mix of constant/undef inputs.
+ Constant *V1 = V1State.isConstant() ?
+ V1State.getConstant() : UndefValue::get(I.getType());
+ Constant *V2 = V2State.isConstant() ?
+ V2State.getConstant() : UndefValue::get(I.getType());
+ Constant *Mask = MaskState.isConstant() ?
+ MaskState.getConstant() : UndefValue::get(I.getOperand(2)->getType());
+ markConstant(&I, ConstantExpr::getShuffleVector(V1, V2, Mask));
+ }
+#endif
+}
+
+// Handle getelementptr instructions... if all operands are constants then we
+// can turn this into a getelementptr ConstantExpr.
+//
+void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
+ LatticeVal &IV = ValueState[&I];
+ if (IV.isOverdefined()) return;
+
+ SmallVector<Constant*, 8> Operands;
+ Operands.reserve(I.getNumOperands());
+
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ LatticeVal &State = getValueState(I.getOperand(i));
+ if (State.isUndefined())
+ return; // Operands are not resolved yet...
+ else if (State.isOverdefined()) {
+ markOverdefined(IV, &I);
+ return;
+ }
+ assert(State.isConstant() && "Unknown state!");
+ Operands.push_back(State.getConstant());
+ }
+
+ Constant *Ptr = Operands[0];
+ Operands.erase(Operands.begin()); // Erase the pointer from idx list...
+
+ markConstant(IV, &I, ConstantExpr::getGetElementPtr(Ptr, &Operands[0],
+ Operands.size()));
+}
+
+void SCCPSolver::visitStoreInst(Instruction &SI) {
+ if (TrackedGlobals.empty() || !isa<GlobalVariable>(SI.getOperand(1)))
+ return;
+ GlobalVariable *GV = cast<GlobalVariable>(SI.getOperand(1));
+ DenseMap<GlobalVariable*, LatticeVal>::iterator I = TrackedGlobals.find(GV);
+ if (I == TrackedGlobals.end() || I->second.isOverdefined()) return;
+
+ // Get the value we are storing into the global.
+ LatticeVal &PtrVal = getValueState(SI.getOperand(0));
+
+ mergeInValue(I->second, GV, PtrVal);
+ if (I->second.isOverdefined())
+ TrackedGlobals.erase(I); // No need to keep tracking this!
+}
+
+
+// Handle load instructions. If the operand is a constant pointer to a constant
+// global, we can replace the load with the loaded constant value!
+void SCCPSolver::visitLoadInst(LoadInst &I) {
+ LatticeVal &IV = ValueState[&I];
+ if (IV.isOverdefined()) return;
+
+ LatticeVal &PtrVal = getValueState(I.getOperand(0));
+ if (PtrVal.isUndefined()) return; // The pointer is not resolved yet!
+ if (PtrVal.isConstant() && !I.isVolatile()) {
+ Value *Ptr = PtrVal.getConstant();
+ // TODO: Consider a target hook for valid address spaces for this xform.
+ if (isa<ConstantPointerNull>(Ptr) &&
+ cast<PointerType>(Ptr->getType())->getAddressSpace() == 0) {
+ // load null -> null
+ markConstant(IV, &I, Constant::getNullValue(I.getType()));
+ return;
+ }
+
+ // Transform load (constant global) into the value loaded.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
+ if (GV->isConstant()) {
+ if (GV->hasDefinitiveInitializer()) {
+ markConstant(IV, &I, GV->getInitializer());
+ return;
+ }
+ } else if (!TrackedGlobals.empty()) {
+ // If we are tracking this global, merge in the known value for it.
+ DenseMap<GlobalVariable*, LatticeVal>::iterator It =
+ TrackedGlobals.find(GV);
+ if (It != TrackedGlobals.end()) {
+ mergeInValue(IV, &I, It->second);
+ return;
+ }
+ }
+ }
+
+ // Transform load (constantexpr_GEP global, 0, ...) into the value loaded.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+ if (CE->getOpcode() == Instruction::GetElementPtr)
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer())
+ if (Constant *V =
+ ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE)) {
+ markConstant(IV, &I, V);
+ return;
+ }
+ }
+
+ // Otherwise we cannot say for certain what value this load will produce.
+ // Bail out.
+ markOverdefined(IV, &I);
+}
+
+void SCCPSolver::visitCallSite(CallSite CS) {
+ Function *F = CS.getCalledFunction();
+ Instruction *I = CS.getInstruction();
+
+ // The common case is that we aren't tracking the callee, either because we
+ // are not doing interprocedural analysis or the callee is indirect, or is
+ // external. Handle these cases first.
+ if (F == 0 || !F->hasLocalLinkage()) {
+CallOverdefined:
+ // Void return and not tracking callee, just bail.
+ if (I->getType() == Type::VoidTy) return;
+
+ // Otherwise, if we have a single return value case, and if the function is
+ // a declaration, maybe we can constant fold it.
+ if (!isa<StructType>(I->getType()) && F && F->isDeclaration() &&
+ canConstantFoldCallTo(F)) {
+
+ SmallVector<Constant*, 8> Operands;
+ for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end();
+ AI != E; ++AI) {
+ LatticeVal &State = getValueState(*AI);
+ if (State.isUndefined())
+ return; // Operands are not resolved yet.
+ else if (State.isOverdefined()) {
+ markOverdefined(I);
+ return;
+ }
+ assert(State.isConstant() && "Unknown state!");
+ Operands.push_back(State.getConstant());
+ }
+
+ // If we can constant fold this, mark the result of the call as a
+ // constant.
+ if (Constant *C = ConstantFoldCall(F, Operands.data(), Operands.size())) {
+ markConstant(I, C);
+ return;
+ }
+ }
+
+ // Otherwise, we don't know anything about this call, mark it overdefined.
+ markOverdefined(I);
+ return;
+ }
+
+ // If this is a single/zero retval case, see if we're tracking the function.
+ DenseMap<Function*, LatticeVal>::iterator TFRVI = TrackedRetVals.find(F);
+ if (TFRVI != TrackedRetVals.end()) {
+ // If so, propagate the return value of the callee into this call result.
+ mergeInValue(I, TFRVI->second);
+ } else if (isa<StructType>(I->getType())) {
+ // Check to see if we're tracking this callee, if not, handle it in the
+ // common path above.
+ DenseMap<std::pair<Function*, unsigned>, LatticeVal>::iterator
+ TMRVI = TrackedMultipleRetVals.find(std::make_pair(F, 0));
+ if (TMRVI == TrackedMultipleRetVals.end())
+ goto CallOverdefined;
+
+ // If we are tracking this callee, propagate the return values of the call
+ // into this call site. We do this by walking all the uses. Single-index
+ // ExtractValueInst uses can be tracked; anything more complicated is
+ // currently handled conservatively.
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(*UI)) {
+ if (EVI->getNumIndices() == 1) {
+ mergeInValue(EVI,
+ TrackedMultipleRetVals[std::make_pair(F, *EVI->idx_begin())]);
+ continue;
+ }
+ }
+ // The aggregate value is used in a way not handled here. Assume nothing.
+ markOverdefined(*UI);
+ }
+ } else {
+ // Otherwise we're not tracking this callee, so handle it in the
+ // common path above.
+ goto CallOverdefined;
+ }
+
+ // Finally, if this is the first call to the function hit, mark its entry
+ // block executable.
+ if (!BBExecutable.count(F->begin()))
+ MarkBlockExecutable(F->begin());
+
+ // Propagate information from this call site into the callee.
+ CallSite::arg_iterator CAI = CS.arg_begin();
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI, ++CAI) {
+ LatticeVal &IV = ValueState[AI];
+ if (!IV.isOverdefined())
+ mergeInValue(IV, AI, getValueState(*CAI));
+ }
+}
+
+
+void SCCPSolver::Solve() {
+ // Process the work lists until they are empty!
+ while (!BBWorkList.empty() || !InstWorkList.empty() ||
+ !OverdefinedInstWorkList.empty()) {
+ // Process the instruction work list...
+ while (!OverdefinedInstWorkList.empty()) {
+ Value *I = OverdefinedInstWorkList.back();
+ OverdefinedInstWorkList.pop_back();
+
+ DOUT << "\nPopped off OI-WL: " << *I;
+
+ // "I" got into the work list because it either made the transition from
+ // bottom to constant
+ //
+ // Anything on this worklist that is overdefined need not be visited
+ // since all of its users will have already been marked as overdefined
+ // Update all of the users of this instruction's value...
+ //
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI)
+ OperandChangedState(*UI);
+ }
+ // Process the instruction work list...
+ while (!InstWorkList.empty()) {
+ Value *I = InstWorkList.back();
+ InstWorkList.pop_back();
+
+ DOUT << "\nPopped off I-WL: " << *I;
+
+ // "I" got into the work list because it either made the transition from
+ // bottom to constant
+ //
+ // Anything on this worklist that is overdefined need not be visited
+ // since all of its users will have already been marked as overdefined.
+ // Update all of the users of this instruction's value...
+ //
+ if (!getValueState(I).isOverdefined())
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI)
+ OperandChangedState(*UI);
+ }
+
+ // Process the basic block work list...
+ while (!BBWorkList.empty()) {
+ BasicBlock *BB = BBWorkList.back();
+ BBWorkList.pop_back();
+
+ DOUT << "\nPopped off BBWL: " << *BB;
+
+ // Notify all instructions in this basic block that they are newly
+ // executable.
+ visit(BB);
+ }
+ }
+}
+
+/// ResolvedUndefsIn - While solving the dataflow for a function, we assume
+/// that branches on undef values cannot reach any of their successors.
+/// However, this is not a safe assumption. After we solve dataflow, this
+/// method should be use to handle this. If this returns true, the solver
+/// should be rerun.
+///
+/// This method handles this by finding an unresolved branch and marking it one
+/// of the edges from the block as being feasible, even though the condition
+/// doesn't say it would otherwise be. This allows SCCP to find the rest of the
+/// CFG and only slightly pessimizes the analysis results (by marking one,
+/// potentially infeasible, edge feasible). This cannot usefully modify the
+/// constraints on the condition of the branch, as that would impact other users
+/// of the value.
+///
+/// This scan also checks for values that use undefs, whose results are actually
+/// defined. For example, 'zext i8 undef to i32' should produce all zeros
+/// conservatively, as "(zext i8 X -> i32) & 0xFF00" must always return zero,
+/// even if X isn't defined.
+bool SCCPSolver::ResolvedUndefsIn(Function &F) {
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (!BBExecutable.count(BB))
+ continue;
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ // Look for instructions which produce undef values.
+ if (I->getType() == Type::VoidTy) continue;
+
+ LatticeVal &LV = getValueState(I);
+ if (!LV.isUndefined()) continue;
+
+ // Get the lattice values of the first two operands for use below.
+ LatticeVal &Op0LV = getValueState(I->getOperand(0));
+ LatticeVal Op1LV;
+ if (I->getNumOperands() == 2) {
+ // If this is a two-operand instruction, and if both operands are
+ // undefs, the result stays undef.
+ Op1LV = getValueState(I->getOperand(1));
+ if (Op0LV.isUndefined() && Op1LV.isUndefined())
+ continue;
+ }
+
+ // If this is an instructions whose result is defined even if the input is
+ // not fully defined, propagate the information.
+ const Type *ITy = I->getType();
+ switch (I->getOpcode()) {
+ default: break; // Leave the instruction as an undef.
+ case Instruction::ZExt:
+ // After a zero extend, we know the top part is zero. SExt doesn't have
+ // to be handled here, because we don't know whether the top part is 1's
+ // or 0's.
+ assert(Op0LV.isUndefined());
+ markForcedConstant(LV, I, Constant::getNullValue(ITy));
+ return true;
+ case Instruction::Mul:
+ case Instruction::And:
+ // undef * X -> 0. X could be zero.
+ // undef & X -> 0. X could be zero.
+ markForcedConstant(LV, I, Constant::getNullValue(ITy));
+ return true;
+
+ case Instruction::Or:
+ // undef | X -> -1. X could be -1.
+ if (const VectorType *PTy = dyn_cast<VectorType>(ITy))
+ markForcedConstant(LV, I, ConstantVector::getAllOnesValue(PTy));
+ else
+ markForcedConstant(LV, I, ConstantInt::getAllOnesValue(ITy));
+ return true;
+
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ // X / undef -> undef. No change.
+ // X % undef -> undef. No change.
+ if (Op1LV.isUndefined()) break;
+
+ // undef / X -> 0. X could be maxint.
+ // undef % X -> 0. X could be 1.
+ markForcedConstant(LV, I, Constant::getNullValue(ITy));
+ return true;
+
+ case Instruction::AShr:
+ // undef >>s X -> undef. No change.
+ if (Op0LV.isUndefined()) break;
+
+ // X >>s undef -> X. X could be 0, X could have the high-bit known set.
+ if (Op0LV.isConstant())
+ markForcedConstant(LV, I, Op0LV.getConstant());
+ else
+ markOverdefined(LV, I);
+ return true;
+ case Instruction::LShr:
+ case Instruction::Shl:
+ // undef >> X -> undef. No change.
+ // undef << X -> undef. No change.
+ if (Op0LV.isUndefined()) break;
+
+ // X >> undef -> 0. X could be 0.
+ // X << undef -> 0. X could be 0.
+ markForcedConstant(LV, I, Constant::getNullValue(ITy));
+ return true;
+ case Instruction::Select:
+ // undef ? X : Y -> X or Y. There could be commonality between X/Y.
+ if (Op0LV.isUndefined()) {
+ if (!Op1LV.isConstant()) // Pick the constant one if there is any.
+ Op1LV = getValueState(I->getOperand(2));
+ } else if (Op1LV.isUndefined()) {
+ // c ? undef : undef -> undef. No change.
+ Op1LV = getValueState(I->getOperand(2));
+ if (Op1LV.isUndefined())
+ break;
+ // Otherwise, c ? undef : x -> x.
+ } else {
+ // Leave Op1LV as Operand(1)'s LatticeValue.
+ }
+
+ if (Op1LV.isConstant())
+ markForcedConstant(LV, I, Op1LV.getConstant());
+ else
+ markOverdefined(LV, I);
+ return true;
+ case Instruction::Call:
+ // If a call has an undef result, it is because it is constant foldable
+ // but one of the inputs was undef. Just force the result to
+ // overdefined.
+ markOverdefined(LV, I);
+ return true;
+ }
+ }
+
+ TerminatorInst *TI = BB->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (!BI->isConditional()) continue;
+ if (!getValueState(BI->getCondition()).isUndefined())
+ continue;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ if (SI->getNumSuccessors()<2) // no cases
+ continue;
+ if (!getValueState(SI->getCondition()).isUndefined())
+ continue;
+ } else {
+ continue;
+ }
+
+ // If the edge to the second successor isn't thought to be feasible yet,
+ // mark it so now. We pick the second one so that this goes to some
+ // enumerated value in a switch instead of going to the default destination.
+ if (KnownFeasibleEdges.count(Edge(BB, TI->getSuccessor(1))))
+ continue;
+
+ // Otherwise, it isn't already thought to be feasible. Mark it as such now
+ // and return. This will make other blocks reachable, which will allow new
+ // values to be discovered and existing ones to be moved in the lattice.
+ markEdgeExecutable(BB, TI->getSuccessor(1));
+
+ // This must be a conditional branch of switch on undef. At this point,
+ // force the old terminator to branch to the first successor. This is
+ // required because we are now influencing the dataflow of the function with
+ // the assumption that this edge is taken. If we leave the branch condition
+ // as undef, then further analysis could think the undef went another way
+ // leading to an inconsistent set of conclusions.
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ BI->setCondition(ConstantInt::getFalse());
+ } else {
+ SwitchInst *SI = cast<SwitchInst>(TI);
+ SI->setCondition(SI->getCaseValue(1));
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ //
+ /// SCCP Class - This class uses the SCCPSolver to implement a per-function
+ /// Sparse Conditional Constant Propagator.
+ ///
+ struct VISIBILITY_HIDDEN SCCP : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ SCCP() : FunctionPass(&ID) {}
+
+ // runOnFunction - Run the Sparse Conditional Constant Propagation
+ // algorithm, and return true if the function was modified.
+ //
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+ };
+} // end anonymous namespace
+
+char SCCP::ID = 0;
+static RegisterPass<SCCP>
+X("sccp", "Sparse Conditional Constant Propagation");
+
+// createSCCPPass - This is the public interface to this file...
+FunctionPass *llvm::createSCCPPass() {
+ return new SCCP();
+}
+
+
+// runOnFunction() - Run the Sparse Conditional Constant Propagation algorithm,
+// and return true if the function was modified.
+//
+bool SCCP::runOnFunction(Function &F) {
+ DOUT << "SCCP on function '" << F.getNameStart() << "'\n";
+ SCCPSolver Solver;
+
+ // Mark the first block of the function as being executable.
+ Solver.MarkBlockExecutable(F.begin());
+
+ // Mark all arguments to the function as being overdefined.
+ for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E;++AI)
+ Solver.markOverdefined(AI);
+
+ // Solve for constants.
+ bool ResolvedUndefs = true;
+ while (ResolvedUndefs) {
+ Solver.Solve();
+ DOUT << "RESOLVING UNDEFs\n";
+ ResolvedUndefs = Solver.ResolvedUndefsIn(F);
+ }
+
+ bool MadeChanges = false;
+
+ // If we decided that there are basic blocks that are dead in this function,
+ // delete their contents now. Note that we cannot actually delete the blocks,
+ // as we cannot modify the CFG of the function.
+ //
+ SmallVector<Instruction*, 512> Insts;
+ std::map<Value*, LatticeVal> &Values = Solver.getValueMapping();
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (!Solver.isBlockExecutable(BB)) {
+ DOUT << " BasicBlock Dead:" << *BB;
+ ++NumDeadBlocks;
+
+ // Delete the instructions backwards, as it has a reduced likelihood of
+ // having to update as many def-use and use-def chains.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->getTerminator();
+ I != E; ++I)
+ Insts.push_back(I);
+ while (!Insts.empty()) {
+ Instruction *I = Insts.back();
+ Insts.pop_back();
+ if (!I->use_empty())
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ BB->getInstList().erase(I);
+ MadeChanges = true;
+ ++NumInstRemoved;
+ }
+ } else {
+ // Iterate over all of the instructions in a function, replacing them with
+ // constants if we have found them to be of constant values.
+ //
+ for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
+ Instruction *Inst = BI++;
+ if (Inst->getType() == Type::VoidTy ||
+ isa<TerminatorInst>(Inst))
+ continue;
+
+ LatticeVal &IV = Values[Inst];
+ if (!IV.isConstant() && !IV.isUndefined())
+ continue;
+
+ Constant *Const = IV.isConstant()
+ ? IV.getConstant() : UndefValue::get(Inst->getType());
+ DOUT << " Constant: " << *Const << " = " << *Inst;
+
+ // Replaces all of the uses of a variable with uses of the constant.
+ Inst->replaceAllUsesWith(Const);
+
+ // Delete the instruction.
+ Inst->eraseFromParent();
+
+ // Hey, we just changed something!
+ MadeChanges = true;
+ ++NumInstRemoved;
+ }
+ }
+
+ return MadeChanges;
+}
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ //
+ /// IPSCCP Class - This class implements interprocedural Sparse Conditional
+ /// Constant Propagation.
+ ///
+ struct VISIBILITY_HIDDEN IPSCCP : public ModulePass {
+ static char ID;
+ IPSCCP() : ModulePass(&ID) {}
+ bool runOnModule(Module &M);
+ };
+} // end anonymous namespace
+
+char IPSCCP::ID = 0;
+static RegisterPass<IPSCCP>
+Y("ipsccp", "Interprocedural Sparse Conditional Constant Propagation");
+
+// createIPSCCPPass - This is the public interface to this file...
+ModulePass *llvm::createIPSCCPPass() {
+ return new IPSCCP();
+}
+
+
+static bool AddressIsTaken(GlobalValue *GV) {
+ // Delete any dead constantexpr klingons.
+ GV->removeDeadConstantUsers();
+
+ for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end();
+ UI != E; ++UI)
+ if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
+ if (SI->getOperand(0) == GV || SI->isVolatile())
+ return true; // Storing addr of GV.
+ } else if (isa<InvokeInst>(*UI) || isa<CallInst>(*UI)) {
+ // Make sure we are calling the function, not passing the address.
+ CallSite CS = CallSite::get(cast<Instruction>(*UI));
+ if (CS.hasArgument(GV))
+ return true;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+ if (LI->isVolatile())
+ return true;
+ } else {
+ return true;
+ }
+ return false;
+}
+
+bool IPSCCP::runOnModule(Module &M) {
+ SCCPSolver Solver;
+
+ // Loop over all functions, marking arguments to those with their addresses
+ // taken or that are external as overdefined.
+ //
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
+ if (!F->hasLocalLinkage() || AddressIsTaken(F)) {
+ if (!F->isDeclaration())
+ Solver.MarkBlockExecutable(F->begin());
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI)
+ Solver.markOverdefined(AI);
+ } else {
+ Solver.AddTrackedFunction(F);
+ }
+
+ // Loop over global variables. We inform the solver about any internal global
+ // variables that do not have their 'addresses taken'. If they don't have
+ // their addresses taken, we can propagate constants through them.
+ for (Module::global_iterator G = M.global_begin(), E = M.global_end();
+ G != E; ++G)
+ if (!G->isConstant() && G->hasLocalLinkage() && !AddressIsTaken(G))
+ Solver.TrackValueOfGlobalVariable(G);
+
+ // Solve for constants.
+ bool ResolvedUndefs = true;
+ while (ResolvedUndefs) {
+ Solver.Solve();
+
+ DOUT << "RESOLVING UNDEFS\n";
+ ResolvedUndefs = false;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
+ ResolvedUndefs |= Solver.ResolvedUndefsIn(*F);
+ }
+
+ bool MadeChanges = false;
+
+ // Iterate over all of the instructions in the module, replacing them with
+ // constants if we have found them to be of constant values.
+ //
+ SmallVector<Instruction*, 512> Insts;
+ SmallVector<BasicBlock*, 512> BlocksToErase;
+ std::map<Value*, LatticeVal> &Values = Solver.getValueMapping();
+
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI)
+ if (!AI->use_empty()) {
+ LatticeVal &IV = Values[AI];
+ if (IV.isConstant() || IV.isUndefined()) {
+ Constant *CST = IV.isConstant() ?
+ IV.getConstant() : UndefValue::get(AI->getType());
+ DOUT << "*** Arg " << *AI << " = " << *CST <<"\n";
+
+ // Replaces all of the uses of a variable with uses of the
+ // constant.
+ AI->replaceAllUsesWith(CST);
+ ++IPNumArgsElimed;
+ }
+ }
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ if (!Solver.isBlockExecutable(BB)) {
+ DOUT << " BasicBlock Dead:" << *BB;
+ ++IPNumDeadBlocks;
+
+ // Delete the instructions backwards, as it has a reduced likelihood of
+ // having to update as many def-use and use-def chains.
+ TerminatorInst *TI = BB->getTerminator();
+ for (BasicBlock::iterator I = BB->begin(), E = TI; I != E; ++I)
+ Insts.push_back(I);
+
+ while (!Insts.empty()) {
+ Instruction *I = Insts.back();
+ Insts.pop_back();
+ if (!I->use_empty())
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ BB->getInstList().erase(I);
+ MadeChanges = true;
+ ++IPNumInstRemoved;
+ }
+
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *Succ = TI->getSuccessor(i);
+ if (!Succ->empty() && isa<PHINode>(Succ->begin()))
+ TI->getSuccessor(i)->removePredecessor(BB);
+ }
+ if (!TI->use_empty())
+ TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+ BB->getInstList().erase(TI);
+
+ if (&*BB != &F->front())
+ BlocksToErase.push_back(BB);
+ else
+ new UnreachableInst(BB);
+
+ } else {
+ for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
+ Instruction *Inst = BI++;
+ if (Inst->getType() == Type::VoidTy)
+ continue;
+
+ LatticeVal &IV = Values[Inst];
+ if (!IV.isConstant() && !IV.isUndefined())
+ continue;
+
+ Constant *Const = IV.isConstant()
+ ? IV.getConstant() : UndefValue::get(Inst->getType());
+ DOUT << " Constant: " << *Const << " = " << *Inst;
+
+ // Replaces all of the uses of a variable with uses of the
+ // constant.
+ Inst->replaceAllUsesWith(Const);
+
+ // Delete the instruction.
+ if (!isa<CallInst>(Inst) && !isa<TerminatorInst>(Inst))
+ Inst->eraseFromParent();
+
+ // Hey, we just changed something!
+ MadeChanges = true;
+ ++IPNumInstRemoved;
+ }
+ }
+
+ // Now that all instructions in the function are constant folded, erase dead
+ // blocks, because we can now use ConstantFoldTerminator to get rid of
+ // in-edges.
+ for (unsigned i = 0, e = BlocksToErase.size(); i != e; ++i) {
+ // If there are any PHI nodes in this successor, drop entries for BB now.
+ BasicBlock *DeadBB = BlocksToErase[i];
+ while (!DeadBB->use_empty()) {
+ Instruction *I = cast<Instruction>(DeadBB->use_back());
+ bool Folded = ConstantFoldTerminator(I->getParent());
+ if (!Folded) {
+ // The constant folder may not have been able to fold the terminator
+ // if this is a branch or switch on undef. Fold it manually as a
+ // branch to the first successor.
+#ifndef NDEBUG
+ if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+ assert(BI->isConditional() && isa<UndefValue>(BI->getCondition()) &&
+ "Branch should be foldable!");
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+ assert(isa<UndefValue>(SI->getCondition()) && "Switch should fold");
+ } else {
+ assert(0 && "Didn't fold away reference to block!");
+ }
+#endif
+
+ // Make this an uncond branch to the first successor.
+ TerminatorInst *TI = I->getParent()->getTerminator();
+ BranchInst::Create(TI->getSuccessor(0), TI);
+
+ // Remove entries in successor phi nodes to remove edges.
+ for (unsigned i = 1, e = TI->getNumSuccessors(); i != e; ++i)
+ TI->getSuccessor(i)->removePredecessor(TI->getParent());
+
+ // Remove the old terminator.
+ TI->eraseFromParent();
+ }
+ }
+
+ // Finally, delete the basic block.
+ F->getBasicBlockList().erase(DeadBB);
+ }
+ BlocksToErase.clear();
+ }
+
+ // If we inferred constant or undef return values for a function, we replaced
+ // all call uses with the inferred value. This means we don't need to bother
+ // actually returning anything from the function. Replace all return
+ // instructions with return undef.
+ // TODO: Process multiple value ret instructions also.
+ const DenseMap<Function*, LatticeVal> &RV = Solver.getTrackedRetVals();
+ for (DenseMap<Function*, LatticeVal>::const_iterator I = RV.begin(),
+ E = RV.end(); I != E; ++I)
+ if (!I->second.isOverdefined() &&
+ I->first->getReturnType() != Type::VoidTy) {
+ Function *F = I->first;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
+ if (!isa<UndefValue>(RI->getOperand(0)))
+ RI->setOperand(0, UndefValue::get(F->getReturnType()));
+ }
+
+ // If we infered constant or undef values for globals variables, we can delete
+ // the global and any stores that remain to it.
+ const DenseMap<GlobalVariable*, LatticeVal> &TG = Solver.getTrackedGlobals();
+ for (DenseMap<GlobalVariable*, LatticeVal>::const_iterator I = TG.begin(),
+ E = TG.end(); I != E; ++I) {
+ GlobalVariable *GV = I->first;
+ assert(!I->second.isOverdefined() &&
+ "Overdefined values should have been taken out of the map!");
+ DOUT << "Found that GV '" << GV->getNameStart() << "' is constant!\n";
+ while (!GV->use_empty()) {
+ StoreInst *SI = cast<StoreInst>(GV->use_back());
+ SI->eraseFromParent();
+ }
+ M.getGlobalList().erase(GV);
+ ++IPNumGlobalConst;
+ }
+
+ return MadeChanges;
+}
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
new file mode 100644
index 0000000..5669da0
--- /dev/null
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -0,0 +1,111 @@
+//===-- Scalar.cpp --------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the C bindings for libLLVMScalarOpts.a, which implements
+// several scalar transformations over the LLVM intermediate representation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Transforms/Scalar.h"
+#include "llvm/PassManager.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createAggressiveDCEPass());
+}
+
+void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createCFGSimplificationPass());
+}
+
+void LLVMAddCondPropagationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createCondPropagationPass());
+}
+
+void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createDeadStoreEliminationPass());
+}
+
+void LLVMAddGVNPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createGVNPass());
+}
+
+void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createIndVarSimplifyPass());
+}
+
+void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createInstructionCombiningPass());
+}
+
+void LLVMAddJumpThreadingPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createJumpThreadingPass());
+}
+
+void LLVMAddLICMPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLICMPass());
+}
+
+void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopDeletionPass());
+}
+
+void LLVMAddLoopIndexSplitPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopIndexSplitPass());
+}
+
+void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopRotatePass());
+}
+
+void LLVMAddLoopUnrollPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopUnrollPass());
+}
+
+void LLVMAddLoopUnswitchPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopUnswitchPass());
+}
+
+void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createMemCpyOptPass());
+}
+
+void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createPromoteMemoryToRegisterPass());
+}
+
+void LLVMAddReassociatePass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createReassociatePass());
+}
+
+void LLVMAddSCCPPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createSCCPPass());
+}
+
+void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createScalarReplAggregatesPass());
+}
+
+void LLVMAddSimplifyLibCallsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createSimplifyLibCallsPass());
+}
+
+void LLVMAddTailCallEliminationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createTailCallEliminationPass());
+}
+
+void LLVMAddConstantPropagationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createConstantPropagationPass());
+}
+
+void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createDemoteRegisterToMemoryPass());
+}
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
new file mode 100644
index 0000000..9935f12
--- /dev/null
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -0,0 +1,1820 @@
+//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation implements the well known scalar replacement of
+// aggregates transformation. This xform breaks up alloca instructions of
+// aggregate type (structure or array) into individual alloca instructions for
+// each member (if possible). Then, if possible, it transforms the individual
+// alloca instructions into nice clean scalar SSA form.
+//
+// This combines a simple SRoA algorithm with the Mem2Reg algorithm because
+// often interact, especially for C++ programs. As such, iterating between
+// SRoA, then Mem2Reg until we run out of things to promote works well.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scalarrepl"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+STATISTIC(NumReplaced, "Number of allocas broken up");
+STATISTIC(NumPromoted, "Number of allocas promoted");
+STATISTIC(NumConverted, "Number of aggregates converted to scalar");
+STATISTIC(NumGlobals, "Number of allocas copied from constant global");
+
+namespace {
+ struct VISIBILITY_HIDDEN SROA : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ explicit SROA(signed T = -1) : FunctionPass(&ID) {
+ if (T == -1)
+ SRThreshold = 128;
+ else
+ SRThreshold = T;
+ }
+
+ bool runOnFunction(Function &F);
+
+ bool performScalarRepl(Function &F);
+ bool performPromotion(Function &F);
+
+ // getAnalysisUsage - This pass does not require any passes, but we know it
+ // will not alter the CFG, so say so.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<DominanceFrontier>();
+ AU.addRequired<TargetData>();
+ AU.setPreservesCFG();
+ }
+
+ private:
+ TargetData *TD;
+
+ /// AllocaInfo - When analyzing uses of an alloca instruction, this captures
+ /// information about the uses. All these fields are initialized to false
+ /// and set to true when something is learned.
+ struct AllocaInfo {
+ /// isUnsafe - This is set to true if the alloca cannot be SROA'd.
+ bool isUnsafe : 1;
+
+ /// needsCleanup - This is set to true if there is some use of the alloca
+ /// that requires cleanup.
+ bool needsCleanup : 1;
+
+ /// isMemCpySrc - This is true if this aggregate is memcpy'd from.
+ bool isMemCpySrc : 1;
+
+ /// isMemCpyDst - This is true if this aggregate is memcpy'd into.
+ bool isMemCpyDst : 1;
+
+ AllocaInfo()
+ : isUnsafe(false), needsCleanup(false),
+ isMemCpySrc(false), isMemCpyDst(false) {}
+ };
+
+ unsigned SRThreshold;
+
+ void MarkUnsafe(AllocaInfo &I) { I.isUnsafe = true; }
+
+ int isSafeAllocaToScalarRepl(AllocationInst *AI);
+
+ void isSafeUseOfAllocation(Instruction *User, AllocationInst *AI,
+ AllocaInfo &Info);
+ void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI,
+ AllocaInfo &Info);
+ void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI,
+ unsigned OpNo, AllocaInfo &Info);
+ void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocationInst *AI,
+ AllocaInfo &Info);
+
+ void DoScalarReplacement(AllocationInst *AI,
+ std::vector<AllocationInst*> &WorkList);
+ void CleanupGEP(GetElementPtrInst *GEP);
+ void CleanupAllocaUsers(AllocationInst *AI);
+ AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocationInst *Base);
+
+ void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts);
+
+ void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
+ AllocationInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocationInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts);
+
+ bool CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
+ bool &SawVec, uint64_t Offset, unsigned AllocaSize);
+ void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
+ Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
+ uint64_t Offset, IRBuilder<> &Builder);
+ Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
+ uint64_t Offset, IRBuilder<> &Builder);
+ static Instruction *isOnlyCopiedFromConstantGlobal(AllocationInst *AI);
+ };
+}
+
+char SROA::ID = 0;
+static RegisterPass<SROA> X("scalarrepl", "Scalar Replacement of Aggregates");
+
+// Public interface to the ScalarReplAggregates pass
+FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) {
+ return new SROA(Threshold);
+}
+
+
+bool SROA::runOnFunction(Function &F) {
+ TD = &getAnalysis<TargetData>();
+
+ bool Changed = performPromotion(F);
+ while (1) {
+ bool LocalChange = performScalarRepl(F);
+ if (!LocalChange) break; // No need to repromote if no scalarrepl
+ Changed = true;
+ LocalChange = performPromotion(F);
+ if (!LocalChange) break; // No need to re-scalarrepl if no promotion
+ }
+
+ return Changed;
+}
+
+
+bool SROA::performPromotion(Function &F) {
+ std::vector<AllocaInst*> Allocas;
+ DominatorTree &DT = getAnalysis<DominatorTree>();
+ DominanceFrontier &DF = getAnalysis<DominanceFrontier>();
+
+ BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
+
+ bool Changed = false;
+
+ while (1) {
+ Allocas.clear();
+
+ // Find allocas that are safe to promote, by looking at all instructions in
+ // the entry node
+ for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
+ if (isAllocaPromotable(AI))
+ Allocas.push_back(AI);
+
+ if (Allocas.empty()) break;
+
+ PromoteMemToReg(Allocas, DT, DF);
+ NumPromoted += Allocas.size();
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// getNumSAElements - Return the number of elements in the specific struct or
+/// array.
+static uint64_t getNumSAElements(const Type *T) {
+ if (const StructType *ST = dyn_cast<StructType>(T))
+ return ST->getNumElements();
+ return cast<ArrayType>(T)->getNumElements();
+}
+
+// performScalarRepl - This algorithm is a simple worklist driven algorithm,
+// which runs on all of the malloc/alloca instructions in the function, removing
+// them if they are only used by getelementptr instructions.
+//
+bool SROA::performScalarRepl(Function &F) {
+ std::vector<AllocationInst*> WorkList;
+
+ // Scan the entry basic block, adding any alloca's and mallocs to the worklist
+ BasicBlock &BB = F.getEntryBlock();
+ for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
+ if (AllocationInst *A = dyn_cast<AllocationInst>(I))
+ WorkList.push_back(A);
+
+ // Process the worklist
+ bool Changed = false;
+ while (!WorkList.empty()) {
+ AllocationInst *AI = WorkList.back();
+ WorkList.pop_back();
+
+ // Handle dead allocas trivially. These can be formed by SROA'ing arrays
+ // with unused elements.
+ if (AI->use_empty()) {
+ AI->eraseFromParent();
+ continue;
+ }
+
+ // If this alloca is impossible for us to promote, reject it early.
+ if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
+ continue;
+
+ // Check to see if this allocation is only modified by a memcpy/memmove from
+ // a constant global. If this is the case, we can change all users to use
+ // the constant global instead. This is commonly produced by the CFE by
+ // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
+ // is only subsequently read.
+ if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) {
+ DOUT << "Found alloca equal to global: " << *AI;
+ DOUT << " memcpy = " << *TheCopy;
+ Constant *TheSrc = cast<Constant>(TheCopy->getOperand(2));
+ AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
+ TheCopy->eraseFromParent(); // Don't mutate the global.
+ AI->eraseFromParent();
+ ++NumGlobals;
+ Changed = true;
+ continue;
+ }
+
+ // Check to see if we can perform the core SROA transformation. We cannot
+ // transform the allocation instruction if it is an array allocation
+ // (allocations OF arrays are ok though), and an allocation of a scalar
+ // value cannot be decomposed at all.
+ uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
+
+ // Do not promote any struct whose size is too big.
+ if (AllocaSize > SRThreshold) continue;
+
+ if ((isa<StructType>(AI->getAllocatedType()) ||
+ isa<ArrayType>(AI->getAllocatedType())) &&
+ // Do not promote any struct into more than "32" separate vars.
+ getNumSAElements(AI->getAllocatedType()) <= SRThreshold/4) {
+ // Check that all of the users of the allocation are capable of being
+ // transformed.
+ switch (isSafeAllocaToScalarRepl(AI)) {
+ default: assert(0 && "Unexpected value!");
+ case 0: // Not safe to scalar replace.
+ break;
+ case 1: // Safe, but requires cleanup/canonicalizations first
+ CleanupAllocaUsers(AI);
+ // FALL THROUGH.
+ case 3: // Safe to scalar replace.
+ DoScalarReplacement(AI, WorkList);
+ Changed = true;
+ continue;
+ }
+ }
+
+ // If we can turn this aggregate value (potentially with casts) into a
+ // simple scalar value that can be mem2reg'd into a register value.
+ // IsNotTrivial tracks whether this is something that mem2reg could have
+ // promoted itself. If so, we don't want to transform it needlessly. Note
+ // that we can't just check based on the type: the alloca may be of an i32
+ // but that has pointer arithmetic to set byte 3 of it or something.
+ bool IsNotTrivial = false;
+ const Type *VectorTy = 0;
+ bool HadAVector = false;
+ if (CanConvertToScalar(AI, IsNotTrivial, VectorTy, HadAVector,
+ 0, unsigned(AllocaSize)) && IsNotTrivial) {
+ AllocaInst *NewAI;
+ // If we were able to find a vector type that can handle this with
+ // insert/extract elements, and if there was at least one use that had
+ // a vector type, promote this to a vector. We don't want to promote
+ // random stuff that doesn't use vectors (e.g. <9 x double>) because then
+ // we just get a lot of insert/extracts. If at least one vector is
+ // involved, then we probably really do have a union of vector/array.
+ if (VectorTy && isa<VectorType>(VectorTy) && HadAVector) {
+ DOUT << "CONVERT TO VECTOR: " << *AI << " TYPE = " << *VectorTy <<"\n";
+
+ // Create and insert the vector alloca.
+ NewAI = new AllocaInst(VectorTy, 0, "", AI->getParent()->begin());
+ ConvertUsesToScalar(AI, NewAI, 0);
+ } else {
+ DOUT << "CONVERT TO SCALAR INTEGER: " << *AI << "\n";
+
+ // Create and insert the integer alloca.
+ const Type *NewTy = IntegerType::get(AllocaSize*8);
+ NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
+ ConvertUsesToScalar(AI, NewAI, 0);
+ }
+ NewAI->takeName(AI);
+ AI->eraseFromParent();
+ ++NumConverted;
+ Changed = true;
+ continue;
+ }
+
+ // Otherwise, couldn't process this alloca.
+ }
+
+ return Changed;
+}
+
+/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl
+/// predicate, do SROA now.
+void SROA::DoScalarReplacement(AllocationInst *AI,
+ std::vector<AllocationInst*> &WorkList) {
+ DOUT << "Found inst to SROA: " << *AI;
+ SmallVector<AllocaInst*, 32> ElementAllocas;
+ if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
+ ElementAllocas.reserve(ST->getNumContainedTypes());
+ for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
+ AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
+ AI->getAlignment(),
+ AI->getName() + "." + utostr(i), AI);
+ ElementAllocas.push_back(NA);
+ WorkList.push_back(NA); // Add to worklist for recursive processing
+ }
+ } else {
+ const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType());
+ ElementAllocas.reserve(AT->getNumElements());
+ const Type *ElTy = AT->getElementType();
+ for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+ AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(),
+ AI->getName() + "." + utostr(i), AI);
+ ElementAllocas.push_back(NA);
+ WorkList.push_back(NA); // Add to worklist for recursive processing
+ }
+ }
+
+ // Now that we have created the alloca instructions that we want to use,
+ // expand the getelementptr instructions to use them.
+ //
+ while (!AI->use_empty()) {
+ Instruction *User = cast<Instruction>(AI->use_back());
+ if (BitCastInst *BCInst = dyn_cast<BitCastInst>(User)) {
+ RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas);
+ BCInst->eraseFromParent();
+ continue;
+ }
+
+ // Replace:
+ // %res = load { i32, i32 }* %alloc
+ // with:
+ // %load.0 = load i32* %alloc.0
+ // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0
+ // %load.1 = load i32* %alloc.1
+ // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
+ // (Also works for arrays instead of structs)
+ if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ Value *Insert = UndefValue::get(LI->getType());
+ for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) {
+ Value *Load = new LoadInst(ElementAllocas[i], "load", LI);
+ Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI);
+ }
+ LI->replaceAllUsesWith(Insert);
+ LI->eraseFromParent();
+ continue;
+ }
+
+ // Replace:
+ // store { i32, i32 } %val, { i32, i32 }* %alloc
+ // with:
+ // %val.0 = extractvalue { i32, i32 } %val, 0
+ // store i32 %val.0, i32* %alloc.0
+ // %val.1 = extractvalue { i32, i32 } %val, 1
+ // store i32 %val.1, i32* %alloc.1
+ // (Also works for arrays instead of structs)
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ Value *Val = SI->getOperand(0);
+ for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) {
+ Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI);
+ new StoreInst(Extract, ElementAllocas[i], SI);
+ }
+ SI->eraseFromParent();
+ continue;
+ }
+
+ GetElementPtrInst *GEPI = cast<GetElementPtrInst>(User);
+ // We now know that the GEP is of the form: GEP <ptr>, 0, <cst>
+ unsigned Idx =
+ (unsigned)cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
+
+ assert(Idx < ElementAllocas.size() && "Index out of range?");
+ AllocaInst *AllocaToUse = ElementAllocas[Idx];
+
+ Value *RepValue;
+ if (GEPI->getNumOperands() == 3) {
+ // Do not insert a new getelementptr instruction with zero indices, only
+ // to have it optimized out later.
+ RepValue = AllocaToUse;
+ } else {
+ // We are indexing deeply into the structure, so we still need a
+ // getelement ptr instruction to finish the indexing. This may be
+ // expanded itself once the worklist is rerun.
+ //
+ SmallVector<Value*, 8> NewArgs;
+ NewArgs.push_back(Constant::getNullValue(Type::Int32Ty));
+ NewArgs.append(GEPI->op_begin()+3, GEPI->op_end());
+ RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(),
+ NewArgs.end(), "", GEPI);
+ RepValue->takeName(GEPI);
+ }
+
+ // If this GEP is to the start of the aggregate, check for memcpys.
+ if (Idx == 0 && GEPI->hasAllZeroIndices())
+ RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas);
+
+ // Move all of the users over to the new GEP.
+ GEPI->replaceAllUsesWith(RepValue);
+ // Delete the old GEP
+ GEPI->eraseFromParent();
+ }
+
+ // Finally, delete the Alloca instruction
+ AI->eraseFromParent();
+ NumReplaced++;
+}
+
+
+/// isSafeElementUse - Check to see if this use is an allowed use for a
+/// getelementptr instruction of an array aggregate allocation. isFirstElt
+/// indicates whether Ptr is known to the start of the aggregate.
+///
+void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI,
+ AllocaInfo &Info) {
+ for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end();
+ I != E; ++I) {
+ Instruction *User = cast<Instruction>(*I);
+ switch (User->getOpcode()) {
+ case Instruction::Load: break;
+ case Instruction::Store:
+ // Store is ok if storing INTO the pointer, not storing the pointer
+ if (User->getOperand(0) == Ptr) return MarkUnsafe(Info);
+ break;
+ case Instruction::GetElementPtr: {
+ GetElementPtrInst *GEP = cast<GetElementPtrInst>(User);
+ bool AreAllZeroIndices = isFirstElt;
+ if (GEP->getNumOperands() > 1) {
+ if (!isa<ConstantInt>(GEP->getOperand(1)) ||
+ !cast<ConstantInt>(GEP->getOperand(1))->isZero())
+ // Using pointer arithmetic to navigate the array.
+ return MarkUnsafe(Info);
+
+ if (AreAllZeroIndices)
+ AreAllZeroIndices = GEP->hasAllZeroIndices();
+ }
+ isSafeElementUse(GEP, AreAllZeroIndices, AI, Info);
+ if (Info.isUnsafe) return;
+ break;
+ }
+ case Instruction::BitCast:
+ if (isFirstElt) {
+ isSafeUseOfBitCastedAllocation(cast<BitCastInst>(User), AI, Info);
+ if (Info.isUnsafe) return;
+ break;
+ }
+ DOUT << " Transformation preventing inst: " << *User;
+ return MarkUnsafe(Info);
+ case Instruction::Call:
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
+ if (isFirstElt) {
+ isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info);
+ if (Info.isUnsafe) return;
+ break;
+ }
+ }
+ DOUT << " Transformation preventing inst: " << *User;
+ return MarkUnsafe(Info);
+ default:
+ DOUT << " Transformation preventing inst: " << *User;
+ return MarkUnsafe(Info);
+ }
+ }
+ return; // All users look ok :)
+}
+
+/// AllUsersAreLoads - Return true if all users of this value are loads.
+static bool AllUsersAreLoads(Value *Ptr) {
+ for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end();
+ I != E; ++I)
+ if (cast<Instruction>(*I)->getOpcode() != Instruction::Load)
+ return false;
+ return true;
+}
+
+/// isSafeUseOfAllocation - Check to see if this user is an allowed use for an
+/// aggregate allocation.
+///
+void SROA::isSafeUseOfAllocation(Instruction *User, AllocationInst *AI,
+ AllocaInfo &Info) {
+ if (BitCastInst *C = dyn_cast<BitCastInst>(User))
+ return isSafeUseOfBitCastedAllocation(C, AI, Info);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(User))
+ if (!LI->isVolatile())
+ return;// Loads (returning a first class aggregrate) are always rewritable
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User))
+ if (!SI->isVolatile() && SI->getOperand(0) != AI)
+ return;// Store is ok if storing INTO the pointer, not storing the pointer
+
+ GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User);
+ if (GEPI == 0)
+ return MarkUnsafe(Info);
+
+ gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI);
+
+ // The GEP is not safe to transform if not of the form "GEP <ptr>, 0, <cst>".
+ if (I == E ||
+ I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) {
+ return MarkUnsafe(Info);
+ }
+
+ ++I;
+ if (I == E) return MarkUnsafe(Info); // ran out of GEP indices??
+
+ bool IsAllZeroIndices = true;
+
+ // If the first index is a non-constant index into an array, see if we can
+ // handle it as a special case.
+ if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) {
+ if (!isa<ConstantInt>(I.getOperand())) {
+ IsAllZeroIndices = 0;
+ uint64_t NumElements = AT->getNumElements();
+
+ // If this is an array index and the index is not constant, we cannot
+ // promote... that is unless the array has exactly one or two elements in
+ // it, in which case we CAN promote it, but we have to canonicalize this
+ // out if this is the only problem.
+ if ((NumElements == 1 || NumElements == 2) &&
+ AllUsersAreLoads(GEPI)) {
+ Info.needsCleanup = true;
+ return; // Canonicalization required!
+ }
+ return MarkUnsafe(Info);
+ }
+ }
+
+ // Walk through the GEP type indices, checking the types that this indexes
+ // into.
+ for (; I != E; ++I) {
+ // Ignore struct elements, no extra checking needed for these.
+ if (isa<StructType>(*I))
+ continue;
+
+ ConstantInt *IdxVal = dyn_cast<ConstantInt>(I.getOperand());
+ if (!IdxVal) return MarkUnsafe(Info);
+
+ // Are all indices still zero?
+ IsAllZeroIndices &= IdxVal->isZero();
+
+ if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) {
+ // This GEP indexes an array. Verify that this is an in-range constant
+ // integer. Specifically, consider A[0][i]. We cannot know that the user
+ // isn't doing invalid things like allowing i to index an out-of-range
+ // subscript that accesses A[1]. Because of this, we have to reject SROA
+ // of any accesses into structs where any of the components are variables.
+ if (IdxVal->getZExtValue() >= AT->getNumElements())
+ return MarkUnsafe(Info);
+ } else if (const VectorType *VT = dyn_cast<VectorType>(*I)) {
+ if (IdxVal->getZExtValue() >= VT->getNumElements())
+ return MarkUnsafe(Info);
+ }
+ }
+
+ // If there are any non-simple uses of this getelementptr, make sure to reject
+ // them.
+ return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info);
+}
+
+/// isSafeMemIntrinsicOnAllocation - Return true if the specified memory
+/// intrinsic can be promoted by SROA. At this point, we know that the operand
+/// of the memintrinsic is a pointer to the beginning of the allocation.
+void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI,
+ unsigned OpNo, AllocaInfo &Info) {
+ // If not constant length, give up.
+ ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
+ if (!Length) return MarkUnsafe(Info);
+
+ // If not the whole aggregate, give up.
+ if (Length->getZExtValue() !=
+ TD->getTypeAllocSize(AI->getType()->getElementType()))
+ return MarkUnsafe(Info);
+
+ // We only know about memcpy/memset/memmove.
+ if (!isa<MemIntrinsic>(MI))
+ return MarkUnsafe(Info);
+
+ // Otherwise, we can transform it. Determine whether this is a memcpy/set
+ // into or out of the aggregate.
+ if (OpNo == 1)
+ Info.isMemCpyDst = true;
+ else {
+ assert(OpNo == 2);
+ Info.isMemCpySrc = true;
+ }
+}
+
+/// isSafeUseOfBitCastedAllocation - Return true if all users of this bitcast
+/// are
+void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocationInst *AI,
+ AllocaInfo &Info) {
+ for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end();
+ UI != E; ++UI) {
+ if (BitCastInst *BCU = dyn_cast<BitCastInst>(UI)) {
+ isSafeUseOfBitCastedAllocation(BCU, AI, Info);
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) {
+ isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
+ if (SI->isVolatile())
+ return MarkUnsafe(Info);
+
+ // If storing the entire alloca in one chunk through a bitcasted pointer
+ // to integer, we can transform it. This happens (for example) when you
+ // cast a {i32,i32}* to i64* and store through it. This is similar to the
+ // memcpy case and occurs in various "byval" cases and emulated memcpys.
+ if (isa<IntegerType>(SI->getOperand(0)->getType()) &&
+ TD->getTypeAllocSize(SI->getOperand(0)->getType()) ==
+ TD->getTypeAllocSize(AI->getType()->getElementType())) {
+ Info.isMemCpyDst = true;
+ continue;
+ }
+ return MarkUnsafe(Info);
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
+ if (LI->isVolatile())
+ return MarkUnsafe(Info);
+
+ // If loading the entire alloca in one chunk through a bitcasted pointer
+ // to integer, we can transform it. This happens (for example) when you
+ // cast a {i32,i32}* to i64* and load through it. This is similar to the
+ // memcpy case and occurs in various "byval" cases and emulated memcpys.
+ if (isa<IntegerType>(LI->getType()) &&
+ TD->getTypeAllocSize(LI->getType()) ==
+ TD->getTypeAllocSize(AI->getType()->getElementType())) {
+ Info.isMemCpySrc = true;
+ continue;
+ }
+ return MarkUnsafe(Info);
+ } else if (isa<DbgInfoIntrinsic>(UI)) {
+ // If one user is DbgInfoIntrinsic then check if all users are
+ // DbgInfoIntrinsics.
+ if (OnlyUsedByDbgInfoIntrinsics(BC)) {
+ Info.needsCleanup = true;
+ return;
+ }
+ else
+ MarkUnsafe(Info);
+ }
+ else {
+ return MarkUnsafe(Info);
+ }
+ if (Info.isUnsafe) return;
+ }
+}
+
+/// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes
+/// to its first element. Transform users of the cast to use the new values
+/// instead.
+void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end();
+ while (UI != UE) {
+ Instruction *User = cast<Instruction>(*UI++);
+ if (BitCastInst *BCU = dyn_cast<BitCastInst>(User)) {
+ RewriteBitCastUserOfAlloca(BCU, AI, NewElts);
+ if (BCU->use_empty()) BCU->eraseFromParent();
+ continue;
+ }
+
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
+ // This must be memcpy/memmove/memset of the entire aggregate.
+ // Split into one per element.
+ RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts);
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // If this is a store of the entire alloca from an integer, rewrite it.
+ RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
+ continue;
+ }
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ // If this is a load of the entire alloca to an integer, rewrite it.
+ RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
+ continue;
+ }
+
+ // Otherwise it must be some other user of a gep of the first pointer. Just
+ // leave these alone.
+ continue;
+ }
+}
+
+/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
+/// Rewrite it to copy or set the elements of the scalarized memory.
+void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
+ AllocationInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+
+ // If this is a memcpy/memmove, construct the other pointer as the
+ // appropriate type. The "Other" pointer is the pointer that goes to memory
+ // that doesn't have anything to do with the alloca that we are promoting. For
+ // memset, this Value* stays null.
+ Value *OtherPtr = 0;
+ unsigned MemAlignment = MI->getAlignment();
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
+ if (BCInst == MTI->getRawDest())
+ OtherPtr = MTI->getRawSource();
+ else {
+ assert(BCInst == MTI->getRawSource());
+ OtherPtr = MTI->getRawDest();
+ }
+ }
+
+ // If there is an other pointer, we want to convert it to the same pointer
+ // type as AI has, so we can GEP through it safely.
+ if (OtherPtr) {
+ // It is likely that OtherPtr is a bitcast, if so, remove it.
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr))
+ OtherPtr = BC->getOperand(0);
+ // All zero GEPs are effectively bitcasts.
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(OtherPtr))
+ if (GEP->hasAllZeroIndices())
+ OtherPtr = GEP->getOperand(0);
+
+ if (ConstantExpr *BCE = dyn_cast<ConstantExpr>(OtherPtr))
+ if (BCE->getOpcode() == Instruction::BitCast)
+ OtherPtr = BCE->getOperand(0);
+
+ // If the pointer is not the right type, insert a bitcast to the right
+ // type.
+ if (OtherPtr->getType() != AI->getType())
+ OtherPtr = new BitCastInst(OtherPtr, AI->getType(), OtherPtr->getName(),
+ MI);
+ }
+
+ // Process each element of the aggregate.
+ Value *TheFn = MI->getOperand(0);
+ const Type *BytePtrTy = MI->getRawDest()->getType();
+ bool SROADest = MI->getRawDest() == BCInst;
+
+ Constant *Zero = Constant::getNullValue(Type::Int32Ty);
+
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ // If this is a memcpy/memmove, emit a GEP of the other element address.
+ Value *OtherElt = 0;
+ unsigned OtherEltAlign = MemAlignment;
+
+ if (OtherPtr) {
+ Value *Idx[2] = { Zero, ConstantInt::get(Type::Int32Ty, i) };
+ OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2,
+ OtherPtr->getNameStr()+"."+utostr(i),
+ MI);
+ uint64_t EltOffset;
+ const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
+ if (const StructType *ST =
+ dyn_cast<StructType>(OtherPtrTy->getElementType())) {
+ EltOffset = TD->getStructLayout(ST)->getElementOffset(i);
+ } else {
+ const Type *EltTy =
+ cast<SequentialType>(OtherPtr->getType())->getElementType();
+ EltOffset = TD->getTypeAllocSize(EltTy)*i;
+ }
+
+ // The alignment of the other pointer is the guaranteed alignment of the
+ // element, which is affected by both the known alignment of the whole
+ // mem intrinsic and the alignment of the element. If the alignment of
+ // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the
+ // known alignment is just 4 bytes.
+ OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset);
+ }
+
+ Value *EltPtr = NewElts[i];
+ const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
+
+ // If we got down to a scalar, insert a load or store as appropriate.
+ if (EltTy->isSingleValueType()) {
+ if (isa<MemTransferInst>(MI)) {
+ if (SROADest) {
+ // From Other to Alloca.
+ Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI);
+ new StoreInst(Elt, EltPtr, MI);
+ } else {
+ // From Alloca to Other.
+ Value *Elt = new LoadInst(EltPtr, "tmp", MI);
+ new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI);
+ }
+ continue;
+ }
+ assert(isa<MemSetInst>(MI));
+
+ // If the stored element is zero (common case), just store a null
+ // constant.
+ Constant *StoreVal;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) {
+ if (CI->isZero()) {
+ StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0>
+ } else {
+ // If EltTy is a vector type, get the element type.
+ const Type *ValTy = EltTy;
+ if (const VectorType *VTy = dyn_cast<VectorType>(ValTy))
+ ValTy = VTy->getElementType();
+
+ // Construct an integer with the right value.
+ unsigned EltSize = TD->getTypeSizeInBits(ValTy);
+ APInt OneVal(EltSize, CI->getZExtValue());
+ APInt TotalVal(OneVal);
+ // Set each byte.
+ for (unsigned i = 0; 8*i < EltSize; ++i) {
+ TotalVal = TotalVal.shl(8);
+ TotalVal |= OneVal;
+ }
+
+ // Convert the integer value to the appropriate type.
+ StoreVal = ConstantInt::get(TotalVal);
+ if (isa<PointerType>(ValTy))
+ StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
+ else if (ValTy->isFloatingPoint())
+ StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
+ assert(StoreVal->getType() == ValTy && "Type mismatch!");
+
+ // If the requested value was a vector constant, create it.
+ if (EltTy != ValTy) {
+ unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
+ SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
+ StoreVal = ConstantVector::get(&Elts[0], NumElts);
+ }
+ }
+ new StoreInst(StoreVal, EltPtr, MI);
+ continue;
+ }
+ // Otherwise, if we're storing a byte variable, use a memset call for
+ // this element.
+ }
+
+ // Cast the element pointer to BytePtrTy.
+ if (EltPtr->getType() != BytePtrTy)
+ EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getNameStr(), MI);
+
+ // Cast the other pointer (if we have one) to BytePtrTy.
+ if (OtherElt && OtherElt->getType() != BytePtrTy)
+ OtherElt = new BitCastInst(OtherElt, BytePtrTy,OtherElt->getNameStr(),
+ MI);
+
+ unsigned EltSize = TD->getTypeAllocSize(EltTy);
+
+ // Finally, insert the meminst for this element.
+ if (isa<MemTransferInst>(MI)) {
+ Value *Ops[] = {
+ SROADest ? EltPtr : OtherElt, // Dest ptr
+ SROADest ? OtherElt : EltPtr, // Src ptr
+ ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
+ ConstantInt::get(Type::Int32Ty, OtherEltAlign) // Align
+ };
+ CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
+ } else {
+ assert(isa<MemSetInst>(MI));
+ Value *Ops[] = {
+ EltPtr, MI->getOperand(2), // Dest, Value,
+ ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
+ Zero // Align
+ };
+ CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
+ }
+ }
+ MI->eraseFromParent();
+}
+
+/// RewriteStoreUserOfWholeAlloca - We found an store of an integer that
+/// overwrites the entire allocation. Extract out the pieces of the stored
+/// integer and store them individually.
+void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
+ AllocationInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts){
+ // Extract each element out of the integer according to its structure offset
+ // and store the element value to the individual alloca.
+ Value *SrcVal = SI->getOperand(0);
+ const Type *AllocaEltTy = AI->getType()->getElementType();
+ uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
+
+ // If this isn't a store of an integer to the whole alloca, it may be a store
+ // to the first element. Just ignore the store in this case and normal SROA
+ // will handle it.
+ if (!isa<IntegerType>(SrcVal->getType()) ||
+ TD->getTypeAllocSizeInBits(SrcVal->getType()) != AllocaSizeBits)
+ return;
+ // Handle tail padding by extending the operand
+ if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
+ SrcVal = new ZExtInst(SrcVal, IntegerType::get(AllocaSizeBits), "", SI);
+
+ DOUT << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << *SI;
+
+ // There are two forms here: AI could be an array or struct. Both cases
+ // have different ways to compute the element offset.
+ if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
+ const StructLayout *Layout = TD->getStructLayout(EltSTy);
+
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ // Get the number of bits to shift SrcVal to get the value.
+ const Type *FieldTy = EltSTy->getElementType(i);
+ uint64_t Shift = Layout->getElementOffsetInBits(i);
+
+ if (TD->isBigEndian())
+ Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy);
+
+ Value *EltVal = SrcVal;
+ if (Shift) {
+ Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
+ EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
+ "sroa.store.elt", SI);
+ }
+
+ // Truncate down to an integer of the right size.
+ uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
+
+ // Ignore zero sized fields like {}, they obviously contain no data.
+ if (FieldSizeBits == 0) continue;
+
+ if (FieldSizeBits != AllocaSizeBits)
+ EltVal = new TruncInst(EltVal, IntegerType::get(FieldSizeBits), "", SI);
+ Value *DestField = NewElts[i];
+ if (EltVal->getType() == FieldTy) {
+ // Storing to an integer field of this size, just do it.
+ } else if (FieldTy->isFloatingPoint() || isa<VectorType>(FieldTy)) {
+ // Bitcast to the right element type (for fp/vector values).
+ EltVal = new BitCastInst(EltVal, FieldTy, "", SI);
+ } else {
+ // Otherwise, bitcast the dest pointer (for aggregates).
+ DestField = new BitCastInst(DestField,
+ PointerType::getUnqual(EltVal->getType()),
+ "", SI);
+ }
+ new StoreInst(EltVal, DestField, SI);
+ }
+
+ } else {
+ const ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
+ const Type *ArrayEltTy = ATy->getElementType();
+ uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
+ uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
+
+ uint64_t Shift;
+
+ if (TD->isBigEndian())
+ Shift = AllocaSizeBits-ElementOffset;
+ else
+ Shift = 0;
+
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ // Ignore zero sized fields like {}, they obviously contain no data.
+ if (ElementSizeBits == 0) continue;
+
+ Value *EltVal = SrcVal;
+ if (Shift) {
+ Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
+ EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
+ "sroa.store.elt", SI);
+ }
+
+ // Truncate down to an integer of the right size.
+ if (ElementSizeBits != AllocaSizeBits)
+ EltVal = new TruncInst(EltVal, IntegerType::get(ElementSizeBits),"",SI);
+ Value *DestField = NewElts[i];
+ if (EltVal->getType() == ArrayEltTy) {
+ // Storing to an integer field of this size, just do it.
+ } else if (ArrayEltTy->isFloatingPoint() || isa<VectorType>(ArrayEltTy)) {
+ // Bitcast to the right element type (for fp/vector values).
+ EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI);
+ } else {
+ // Otherwise, bitcast the dest pointer (for aggregates).
+ DestField = new BitCastInst(DestField,
+ PointerType::getUnqual(EltVal->getType()),
+ "", SI);
+ }
+ new StoreInst(EltVal, DestField, SI);
+
+ if (TD->isBigEndian())
+ Shift -= ElementOffset;
+ else
+ Shift += ElementOffset;
+ }
+ }
+
+ SI->eraseFromParent();
+}
+
+/// RewriteLoadUserOfWholeAlloca - We found an load of the entire allocation to
+/// an integer. Load the individual pieces to form the aggregate value.
+void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ // Extract each element out of the NewElts according to its structure offset
+ // and form the result value.
+ const Type *AllocaEltTy = AI->getType()->getElementType();
+ uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
+
+ // If this isn't a load of the whole alloca to an integer, it may be a load
+ // of the first element. Just ignore the load in this case and normal SROA
+ // will handle it.
+ if (!isa<IntegerType>(LI->getType()) ||
+ TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits)
+ return;
+
+ DOUT << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << *LI;
+
+ // There are two forms here: AI could be an array or struct. Both cases
+ // have different ways to compute the element offset.
+ const StructLayout *Layout = 0;
+ uint64_t ArrayEltBitOffset = 0;
+ if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
+ Layout = TD->getStructLayout(EltSTy);
+ } else {
+ const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
+ ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
+ }
+
+ Value *ResultVal = Constant::getNullValue(IntegerType::get(AllocaSizeBits));
+
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ // Load the value from the alloca. If the NewElt is an aggregate, cast
+ // the pointer to an integer of the same size before doing the load.
+ Value *SrcField = NewElts[i];
+ const Type *FieldTy =
+ cast<PointerType>(SrcField->getType())->getElementType();
+ uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
+
+ // Ignore zero sized fields like {}, they obviously contain no data.
+ if (FieldSizeBits == 0) continue;
+
+ const IntegerType *FieldIntTy = IntegerType::get(FieldSizeBits);
+ if (!isa<IntegerType>(FieldTy) && !FieldTy->isFloatingPoint() &&
+ !isa<VectorType>(FieldTy))
+ SrcField = new BitCastInst(SrcField, PointerType::getUnqual(FieldIntTy),
+ "", LI);
+ SrcField = new LoadInst(SrcField, "sroa.load.elt", LI);
+
+ // If SrcField is a fp or vector of the right size but that isn't an
+ // integer type, bitcast to an integer so we can shift it.
+ if (SrcField->getType() != FieldIntTy)
+ SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI);
+
+ // Zero extend the field to be the same size as the final alloca so that
+ // we can shift and insert it.
+ if (SrcField->getType() != ResultVal->getType())
+ SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI);
+
+ // Determine the number of bits to shift SrcField.
+ uint64_t Shift;
+ if (Layout) // Struct case.
+ Shift = Layout->getElementOffsetInBits(i);
+ else // Array case.
+ Shift = i*ArrayEltBitOffset;
+
+ if (TD->isBigEndian())
+ Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
+
+ if (Shift) {
+ Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift);
+ SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
+ }
+
+ ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI);
+ }
+
+ // Handle tail padding by truncating the result
+ if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
+ ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI);
+
+ LI->replaceAllUsesWith(ResultVal);
+ LI->eraseFromParent();
+}
+
+
+/// HasPadding - Return true if the specified type has any structure or
+/// alignment padding, false otherwise.
+static bool HasPadding(const Type *Ty, const TargetData &TD) {
+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ const StructLayout *SL = TD.getStructLayout(STy);
+ unsigned PrevFieldBitOffset = 0;
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
+
+ // Padding in sub-elements?
+ if (HasPadding(STy->getElementType(i), TD))
+ return true;
+
+ // Check to see if there is any padding between this element and the
+ // previous one.
+ if (i) {
+ unsigned PrevFieldEnd =
+ PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1));
+ if (PrevFieldEnd < FieldBitOffset)
+ return true;
+ }
+
+ PrevFieldBitOffset = FieldBitOffset;
+ }
+
+ // Check for tail padding.
+ if (unsigned EltCount = STy->getNumElements()) {
+ unsigned PrevFieldEnd = PrevFieldBitOffset +
+ TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
+ if (PrevFieldEnd < SL->getSizeInBits())
+ return true;
+ }
+
+ } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ return HasPadding(ATy->getElementType(), TD);
+ } else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ return HasPadding(VTy->getElementType(), TD);
+ }
+ return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
+}
+
+/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
+/// an aggregate can be broken down into elements. Return 0 if not, 3 if safe,
+/// or 1 if safe after canonicalization has been performed.
+///
+int SROA::isSafeAllocaToScalarRepl(AllocationInst *AI) {
+ // Loop over the use list of the alloca. We can only transform it if all of
+ // the users are safe to transform.
+ AllocaInfo Info;
+
+ for (Value::use_iterator I = AI->use_begin(), E = AI->use_end();
+ I != E; ++I) {
+ isSafeUseOfAllocation(cast<Instruction>(*I), AI, Info);
+ if (Info.isUnsafe) {
+ DOUT << "Cannot transform: " << *AI << " due to user: " << **I;
+ return 0;
+ }
+ }
+
+ // Okay, we know all the users are promotable. If the aggregate is a memcpy
+ // source and destination, we have to be careful. In particular, the memcpy
+ // could be moving around elements that live in structure padding of the LLVM
+ // types, but may actually be used. In these cases, we refuse to promote the
+ // struct.
+ if (Info.isMemCpySrc && Info.isMemCpyDst &&
+ HasPadding(AI->getType()->getElementType(), *TD))
+ return 0;
+
+ // If we require cleanup, return 1, otherwise return 3.
+ return Info.needsCleanup ? 1 : 3;
+}
+
+/// CleanupGEP - GEP is used by an Alloca, which can be prompted after the GEP
+/// is canonicalized here.
+void SROA::CleanupGEP(GetElementPtrInst *GEPI) {
+ gep_type_iterator I = gep_type_begin(GEPI);
+ ++I;
+
+ const ArrayType *AT = dyn_cast<ArrayType>(*I);
+ if (!AT)
+ return;
+
+ uint64_t NumElements = AT->getNumElements();
+
+ if (isa<ConstantInt>(I.getOperand()))
+ return;
+
+ if (NumElements == 1) {
+ GEPI->setOperand(2, Constant::getNullValue(Type::Int32Ty));
+ return;
+ }
+
+ assert(NumElements == 2 && "Unhandled case!");
+ // All users of the GEP must be loads. At each use of the GEP, insert
+ // two loads of the appropriate indexed GEP and select between them.
+ Value *IsOne = new ICmpInst(ICmpInst::ICMP_NE, I.getOperand(),
+ Constant::getNullValue(I.getOperand()->getType()),
+ "isone", GEPI);
+ // Insert the new GEP instructions, which are properly indexed.
+ SmallVector<Value*, 8> Indices(GEPI->op_begin()+1, GEPI->op_end());
+ Indices[1] = Constant::getNullValue(Type::Int32Ty);
+ Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0),
+ Indices.begin(),
+ Indices.end(),
+ GEPI->getName()+".0", GEPI);
+ Indices[1] = ConstantInt::get(Type::Int32Ty, 1);
+ Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0),
+ Indices.begin(),
+ Indices.end(),
+ GEPI->getName()+".1", GEPI);
+ // Replace all loads of the variable index GEP with loads from both
+ // indexes and a select.
+ while (!GEPI->use_empty()) {
+ LoadInst *LI = cast<LoadInst>(GEPI->use_back());
+ Value *Zero = new LoadInst(ZeroIdx, LI->getName()+".0", LI);
+ Value *One = new LoadInst(OneIdx , LI->getName()+".1", LI);
+ Value *R = SelectInst::Create(IsOne, One, Zero, LI->getName(), LI);
+ LI->replaceAllUsesWith(R);
+ LI->eraseFromParent();
+ }
+ GEPI->eraseFromParent();
+}
+
+
+/// CleanupAllocaUsers - If SROA reported that it can promote the specified
+/// allocation, but only if cleaned up, perform the cleanups required.
+void SROA::CleanupAllocaUsers(AllocationInst *AI) {
+ // At this point, we know that the end result will be SROA'd and promoted, so
+ // we can insert ugly code if required so long as sroa+mem2reg will clean it
+ // up.
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E; ) {
+ User *U = *UI++;
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U))
+ CleanupGEP(GEPI);
+ else if (Instruction *I = dyn_cast<Instruction>(U)) {
+ SmallVector<DbgInfoIntrinsic *, 2> DbgInUses;
+ if (!isa<StoreInst>(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) {
+ // Safe to remove debug info uses.
+ while (!DbgInUses.empty()) {
+ DbgInfoIntrinsic *DI = DbgInUses.back(); DbgInUses.pop_back();
+ DI->eraseFromParent();
+ }
+ I->eraseFromParent();
+ }
+ }
+ }
+}
+
+/// MergeInType - Add the 'In' type to the accumulated type (Accum) so far at
+/// the offset specified by Offset (which is specified in bytes).
+///
+/// There are two cases we handle here:
+/// 1) A union of vector types of the same size and potentially its elements.
+/// Here we turn element accesses into insert/extract element operations.
+/// This promotes a <4 x float> with a store of float to the third element
+/// into a <4 x float> that uses insert element.
+/// 2) A fully general blob of memory, which we turn into some (potentially
+/// large) integer type with extract and insert operations where the loads
+/// and stores would mutate the memory.
+static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
+ unsigned AllocaSize, const TargetData &TD) {
+ // If this could be contributing to a vector, analyze it.
+ if (VecTy != Type::VoidTy) { // either null or a vector type.
+
+ // If the In type is a vector that is the same size as the alloca, see if it
+ // matches the existing VecTy.
+ if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
+ if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
+ // If we're storing/loading a vector of the right size, allow it as a
+ // vector. If this the first vector we see, remember the type so that
+ // we know the element size.
+ if (VecTy == 0)
+ VecTy = VInTy;
+ return;
+ }
+ } else if (In == Type::FloatTy || In == Type::DoubleTy ||
+ (isa<IntegerType>(In) && In->getPrimitiveSizeInBits() >= 8 &&
+ isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
+ // If we're accessing something that could be an element of a vector, see
+ // if the implied vector agrees with what we already have and if Offset is
+ // compatible with it.
+ unsigned EltSize = In->getPrimitiveSizeInBits()/8;
+ if (Offset % EltSize == 0 &&
+ AllocaSize % EltSize == 0 &&
+ (VecTy == 0 ||
+ cast<VectorType>(VecTy)->getElementType()
+ ->getPrimitiveSizeInBits()/8 == EltSize)) {
+ if (VecTy == 0)
+ VecTy = VectorType::get(In, AllocaSize/EltSize);
+ return;
+ }
+ }
+ }
+
+ // Otherwise, we have a case that we can't handle with an optimized vector
+ // form. We can still turn this into a large integer.
+ VecTy = Type::VoidTy;
+}
+
+/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all
+/// its accesses to use a to single vector type, return true, and set VecTy to
+/// the new type. If we could convert the alloca into a single promotable
+/// integer, return true but set VecTy to VoidTy. Further, if the use is not a
+/// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset
+/// is the current offset from the base of the alloca being analyzed.
+///
+/// If we see at least one access to the value that is as a vector type, set the
+/// SawVec flag.
+///
+bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
+ bool &SawVec, uint64_t Offset,
+ unsigned AllocaSize) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ // Don't break volatile loads.
+ if (LI->isVolatile())
+ return false;
+ MergeInType(LI->getType(), Offset, VecTy, AllocaSize, *TD);
+ SawVec |= isa<VectorType>(LI->getType());
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Storing the pointer, not into the value?
+ if (SI->getOperand(0) == V || SI->isVolatile()) return 0;
+ MergeInType(SI->getOperand(0)->getType(), Offset, VecTy, AllocaSize, *TD);
+ SawVec |= isa<VectorType>(SI->getOperand(0)->getType());
+ continue;
+ }
+
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
+ if (!CanConvertToScalar(BCI, IsNotTrivial, VecTy, SawVec, Offset,
+ AllocaSize))
+ return false;
+ IsNotTrivial = true;
+ continue;
+ }
+
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
+ // If this is a GEP with a variable indices, we can't handle it.
+ if (!GEP->hasAllConstantIndices())
+ return false;
+
+ // Compute the offset that this GEP adds to the pointer.
+ SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+ uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(),
+ &Indices[0], Indices.size());
+ // See if all uses can be converted.
+ if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset,
+ AllocaSize))
+ return false;
+ IsNotTrivial = true;
+ continue;
+ }
+
+ // If this is a constant sized memset of a constant value (e.g. 0) we can
+ // handle it.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
+ // Store of constant value and constant size.
+ if (isa<ConstantInt>(MSI->getValue()) &&
+ isa<ConstantInt>(MSI->getLength())) {
+ IsNotTrivial = true;
+ continue;
+ }
+ }
+
+ // If this is a memcpy or memmove into or out of the whole allocation, we
+ // can handle it like a load or store of the scalar type.
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
+ if (ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength()))
+ if (Len->getZExtValue() == AllocaSize && Offset == 0) {
+ IsNotTrivial = true;
+ continue;
+ }
+ }
+
+ // Ignore dbg intrinsic.
+ if (isa<DbgInfoIntrinsic>(User))
+ continue;
+
+ // Otherwise, we cannot handle this!
+ return false;
+ }
+
+ return true;
+}
+
+
+/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
+/// directly. This happens when we are converting an "integer union" to a
+/// single integer scalar, or when we are converting a "vector union" to a
+/// vector with insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right. By the end of this, there should be no uses of Ptr.
+void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
+ while (!Ptr->use_empty()) {
+ Instruction *User = cast<Instruction>(Ptr->use_back());
+
+ if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
+ ConvertUsesToScalar(CI, NewAI, Offset);
+ CI->eraseFromParent();
+ continue;
+ }
+
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
+ // Compute the offset that this GEP adds to the pointer.
+ SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+ uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(),
+ &Indices[0], Indices.size());
+ ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
+ GEP->eraseFromParent();
+ continue;
+ }
+
+ IRBuilder<> Builder(User->getParent(), User);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ // The load is a bit extract from NewAI shifted right by Offset bits.
+ Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp");
+ Value *NewLoadVal
+ = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
+ LI->replaceAllUsesWith(NewLoadVal);
+ LI->eraseFromParent();
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ assert(SI->getOperand(0) != Ptr && "Consistency error!");
+ Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").c_str());
+ Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
+ Builder);
+ Builder.CreateStore(New, NewAI);
+ SI->eraseFromParent();
+ continue;
+ }
+
+ // If this is a constant sized memset of a constant value (e.g. 0) we can
+ // transform it into a store of the expanded constant value.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
+ assert(MSI->getRawDest() == Ptr && "Consistency error!");
+ unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+ if (NumBytes != 0) {
+ unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
+
+ // Compute the value replicated the right number of times.
+ APInt APVal(NumBytes*8, Val);
+
+ // Splat the value if non-zero.
+ if (Val)
+ for (unsigned i = 1; i != NumBytes; ++i)
+ APVal |= APVal << 8;
+
+ Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").c_str());
+ Value *New = ConvertScalar_InsertValue(ConstantInt::get(APVal), Old,
+ Offset, Builder);
+ Builder.CreateStore(New, NewAI);
+ }
+ MSI->eraseFromParent();
+ continue;
+ }
+
+ // If this is a memcpy or memmove into or out of the whole allocation, we
+ // can handle it like a load or store of the scalar type.
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
+ assert(Offset == 0 && "must be store to start of alloca");
+
+ // If the source and destination are both to the same alloca, then this is
+ // a noop copy-to-self, just delete it. Otherwise, emit a load and store
+ // as appropriate.
+ AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject());
+
+ if (MTI->getSource()->getUnderlyingObject() != OrigAI) {
+ // Dest must be OrigAI, change this to be a load from the original
+ // pointer (bitcasted), then a store to our new alloca.
+ assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
+ Value *SrcPtr = MTI->getSource();
+ SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType());
+
+ LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
+ SrcVal->setAlignment(MTI->getAlignment());
+ Builder.CreateStore(SrcVal, NewAI);
+ } else if (MTI->getDest()->getUnderlyingObject() != OrigAI) {
+ // Src must be OrigAI, change this to be a load from NewAI then a store
+ // through the original dest pointer (bitcasted).
+ assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
+ LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
+
+ Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType());
+ StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
+ NewStore->setAlignment(MTI->getAlignment());
+ } else {
+ // Noop transfer. Src == Dst
+ }
+
+
+ MTI->eraseFromParent();
+ continue;
+ }
+
+ // If user is a dbg info intrinsic then it is safe to remove it.
+ if (isa<DbgInfoIntrinsic>(User)) {
+ User->eraseFromParent();
+ continue;
+ }
+
+ assert(0 && "Unsupported operation!");
+ abort();
+ }
+}
+
+/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
+/// or vector value FromVal, extracting the bits from the offset specified by
+/// Offset. This returns the value, which is of type ToType.
+///
+/// This happens when we are converting an "integer union" to a single
+/// integer scalar, or when we are converting a "vector union" to a vector with
+/// insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right.
+Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
+ uint64_t Offset, IRBuilder<> &Builder) {
+ // If the load is of the whole new alloca, no conversion is needed.
+ if (FromVal->getType() == ToType && Offset == 0)
+ return FromVal;
+
+ // If the result alloca is a vector type, this is either an element
+ // access or a bitcast to another vector type of the same size.
+ if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
+ if (isa<VectorType>(ToType))
+ return Builder.CreateBitCast(FromVal, ToType, "tmp");
+
+ // Otherwise it must be an element access.
+ unsigned Elt = 0;
+ if (Offset) {
+ unsigned EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType());
+ Elt = Offset/EltSize;
+ assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
+ }
+ // Return the element extracted out of it.
+ Value *V = Builder.CreateExtractElement(FromVal,
+ ConstantInt::get(Type::Int32Ty,Elt),
+ "tmp");
+ if (V->getType() != ToType)
+ V = Builder.CreateBitCast(V, ToType, "tmp");
+ return V;
+ }
+
+ // If ToType is a first class aggregate, extract out each of the pieces and
+ // use insertvalue's to form the FCA.
+ if (const StructType *ST = dyn_cast<StructType>(ToType)) {
+ const StructLayout &Layout = *TD->getStructLayout(ST);
+ Value *Res = UndefValue::get(ST);
+ for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
+ Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
+ Offset+Layout.getElementOffsetInBits(i),
+ Builder);
+ Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
+ }
+ return Res;
+ }
+
+ if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
+ uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType());
+ Value *Res = UndefValue::get(AT);
+ for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+ Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
+ Offset+i*EltSize, Builder);
+ Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
+ }
+ return Res;
+ }
+
+ // Otherwise, this must be a union that was converted to an integer value.
+ const IntegerType *NTy = cast<IntegerType>(FromVal->getType());
+
+ // If this is a big-endian system and the load is narrower than the
+ // full alloca type, we need to do a shift to get the right bits.
+ int ShAmt = 0;
+ if (TD->isBigEndian()) {
+ // On big-endian machines, the lowest bit is stored at the bit offset
+ // from the pointer given by getTypeStoreSizeInBits. This matters for
+ // integers with a bitwidth that is not a multiple of 8.
+ ShAmt = TD->getTypeStoreSizeInBits(NTy) -
+ TD->getTypeStoreSizeInBits(ToType) - Offset;
+ } else {
+ ShAmt = Offset;
+ }
+
+ // Note: we support negative bitwidths (with shl) which are not defined.
+ // We do this to support (f.e.) loads off the end of a structure where
+ // only some bits are used.
+ if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
+ FromVal = Builder.CreateLShr(FromVal, ConstantInt::get(FromVal->getType(),
+ ShAmt), "tmp");
+ else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
+ FromVal = Builder.CreateShl(FromVal, ConstantInt::get(FromVal->getType(),
+ -ShAmt), "tmp");
+
+ // Finally, unconditionally truncate the integer to the right width.
+ unsigned LIBitWidth = TD->getTypeSizeInBits(ToType);
+ if (LIBitWidth < NTy->getBitWidth())
+ FromVal = Builder.CreateTrunc(FromVal, IntegerType::get(LIBitWidth), "tmp");
+ else if (LIBitWidth > NTy->getBitWidth())
+ FromVal = Builder.CreateZExt(FromVal, IntegerType::get(LIBitWidth), "tmp");
+
+ // If the result is an integer, this is a trunc or bitcast.
+ if (isa<IntegerType>(ToType)) {
+ // Should be done.
+ } else if (ToType->isFloatingPoint() || isa<VectorType>(ToType)) {
+ // Just do a bitcast, we know the sizes match up.
+ FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
+ } else {
+ // Otherwise must be a pointer.
+ FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp");
+ }
+ assert(FromVal->getType() == ToType && "Didn't convert right?");
+ return FromVal;
+}
+
+
+/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
+/// or vector value "Old" at the offset specified by Offset.
+///
+/// This happens when we are converting an "integer union" to a
+/// single integer scalar, or when we are converting a "vector union" to a
+/// vector with insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right.
+Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
+ uint64_t Offset, IRBuilder<> &Builder) {
+
+ // Convert the stored type to the actual type, shift it left to insert
+ // then 'or' into place.
+ const Type *AllocaType = Old->getType();
+
+ if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
+ uint64_t VecSize = TD->getTypeAllocSizeInBits(VTy);
+ uint64_t ValSize = TD->getTypeAllocSizeInBits(SV->getType());
+
+ // Changing the whole vector with memset or with an access of a different
+ // vector type?
+ if (ValSize == VecSize)
+ return Builder.CreateBitCast(SV, AllocaType, "tmp");
+
+ uint64_t EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType());
+
+ // Must be an element insertion.
+ unsigned Elt = Offset/EltSize;
+
+ if (SV->getType() != VTy->getElementType())
+ SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
+
+ SV = Builder.CreateInsertElement(Old, SV,
+ ConstantInt::get(Type::Int32Ty, Elt),
+ "tmp");
+ return SV;
+ }
+
+ // If SV is a first-class aggregate value, insert each value recursively.
+ if (const StructType *ST = dyn_cast<StructType>(SV->getType())) {
+ const StructLayout &Layout = *TD->getStructLayout(ST);
+ for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
+ Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
+ Old = ConvertScalar_InsertValue(Elt, Old,
+ Offset+Layout.getElementOffsetInBits(i),
+ Builder);
+ }
+ return Old;
+ }
+
+ if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
+ uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType());
+ for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+ Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
+ Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder);
+ }
+ return Old;
+ }
+
+ // If SV is a float, convert it to the appropriate integer type.
+ // If it is a pointer, do the same.
+ unsigned SrcWidth = TD->getTypeSizeInBits(SV->getType());
+ unsigned DestWidth = TD->getTypeSizeInBits(AllocaType);
+ unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType());
+ unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType);
+ if (SV->getType()->isFloatingPoint() || isa<VectorType>(SV->getType()))
+ SV = Builder.CreateBitCast(SV, IntegerType::get(SrcWidth), "tmp");
+ else if (isa<PointerType>(SV->getType()))
+ SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(), "tmp");
+
+ // Zero extend or truncate the value if needed.
+ if (SV->getType() != AllocaType) {
+ if (SV->getType()->getPrimitiveSizeInBits() <
+ AllocaType->getPrimitiveSizeInBits())
+ SV = Builder.CreateZExt(SV, AllocaType, "tmp");
+ else {
+ // Truncation may be needed if storing more than the alloca can hold
+ // (undefined behavior).
+ SV = Builder.CreateTrunc(SV, AllocaType, "tmp");
+ SrcWidth = DestWidth;
+ SrcStoreWidth = DestStoreWidth;
+ }
+ }
+
+ // If this is a big-endian system and the store is narrower than the
+ // full alloca type, we need to do a shift to get the right bits.
+ int ShAmt = 0;
+ if (TD->isBigEndian()) {
+ // On big-endian machines, the lowest bit is stored at the bit offset
+ // from the pointer given by getTypeStoreSizeInBits. This matters for
+ // integers with a bitwidth that is not a multiple of 8.
+ ShAmt = DestStoreWidth - SrcStoreWidth - Offset;
+ } else {
+ ShAmt = Offset;
+ }
+
+ // Note: we support negative bitwidths (with shr) which are not defined.
+ // We do this to support (f.e.) stores off the end of a structure where
+ // only some bits in the structure are set.
+ APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
+ if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
+ SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt), "tmp");
+ Mask <<= ShAmt;
+ } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
+ SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), -ShAmt), "tmp");
+ Mask = Mask.lshr(-ShAmt);
+ }
+
+ // Mask out the bits we are about to insert from the old value, and or
+ // in the new bits.
+ if (SrcWidth != DestWidth) {
+ assert(DestWidth > SrcWidth);
+ Old = Builder.CreateAnd(Old, ConstantInt::get(~Mask), "mask");
+ SV = Builder.CreateOr(Old, SV, "ins");
+ }
+ return SV;
+}
+
+
+
+/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to
+/// some part of a constant global variable. This intentionally only accepts
+/// constant expressions because we don't can't rewrite arbitrary instructions.
+static bool PointsToConstantGlobal(Value *V) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ return GV->isConstant();
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::BitCast ||
+ CE->getOpcode() == Instruction::GetElementPtr)
+ return PointsToConstantGlobal(CE->getOperand(0));
+ return false;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
+/// pointer to an alloca. Ignore any reads of the pointer, return false if we
+/// see any stores or other unknown uses. If we see pointer arithmetic, keep
+/// track of whether it moves the pointer (with isOffset) but otherwise traverse
+/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
+/// the alloca, and if the source pointer is a pointer to a constant global, we
+/// can optimize this.
+static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy,
+ bool isOffset) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(*UI))
+ // Ignore non-volatile loads, they are always ok.
+ if (!LI->isVolatile())
+ continue;
+
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) {
+ // If uses of the bitcast are ok, we are ok.
+ if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset))
+ return false;
+ continue;
+ }
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) {
+ // If the GEP has all zero indices, it doesn't offset the pointer. If it
+ // doesn't, it does.
+ if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy,
+ isOffset || !GEP->hasAllZeroIndices()))
+ return false;
+ continue;
+ }
+
+ // If this is isn't our memcpy/memmove, reject it as something we can't
+ // handle.
+ if (!isa<MemTransferInst>(*UI))
+ return false;
+
+ // If we already have seen a copy, reject the second one.
+ if (TheCopy) return false;
+
+ // If the pointer has been offset from the start of the alloca, we can't
+ // safely handle this.
+ if (isOffset) return false;
+
+ // If the memintrinsic isn't using the alloca as the dest, reject it.
+ if (UI.getOperandNo() != 1) return false;
+
+ MemIntrinsic *MI = cast<MemIntrinsic>(*UI);
+
+ // If the source of the memcpy/move is not a constant global, reject it.
+ if (!PointsToConstantGlobal(MI->getOperand(2)))
+ return false;
+
+ // Otherwise, the transform is safe. Remember the copy instruction.
+ TheCopy = MI;
+ }
+ return true;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
+/// modified by a copy from a constant global. If we can prove this, we can
+/// replace any uses of the alloca with uses of the global directly.
+Instruction *SROA::isOnlyCopiedFromConstantGlobal(AllocationInst *AI) {
+ Instruction *TheCopy = 0;
+ if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false))
+ return TheCopy;
+ return 0;
+}
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
new file mode 100644
index 0000000..b499279
--- /dev/null
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -0,0 +1,232 @@
+//===- SimplifyCFGPass.cpp - CFG Simplification Pass ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements dead code elimination and basic block merging, along
+// with a collection of other peephole control flow optimizations. For example:
+//
+// * Removes basic blocks with no predecessors.
+// * Merges a basic block into its predecessor if there is only one and the
+// predecessor only has one successor.
+// * Eliminates PHI nodes for basic blocks with a single predecessor.
+// * Eliminates a basic block that only contains an unconditional branch.
+// * Changes invoke instructions to nounwind functions to be calls.
+// * Change things like "if (x) if (y)" into "if (x&y)".
+// * etc..
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplifycfg"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Attributes.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSimpl, "Number of blocks simplified");
+
+namespace {
+ struct VISIBILITY_HIDDEN CFGSimplifyPass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ CFGSimplifyPass() : FunctionPass(&ID) {}
+
+ virtual bool runOnFunction(Function &F);
+ };
+}
+
+char CFGSimplifyPass::ID = 0;
+static RegisterPass<CFGSimplifyPass> X("simplifycfg", "Simplify the CFG");
+
+// Public interface to the CFGSimplification pass
+FunctionPass *llvm::createCFGSimplificationPass() {
+ return new CFGSimplifyPass();
+}
+
+/// ChangeToUnreachable - Insert an unreachable instruction before the specified
+/// instruction, making it and the rest of the code in the block dead.
+static void ChangeToUnreachable(Instruction *I) {
+ BasicBlock *BB = I->getParent();
+ // Loop over all of the successors, removing BB's entry from any PHI
+ // nodes.
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ (*SI)->removePredecessor(BB);
+
+ new UnreachableInst(I);
+
+ // All instructions after this are dead.
+ BasicBlock::iterator BBI = I, BBE = BB->end();
+ while (BBI != BBE) {
+ if (!BBI->use_empty())
+ BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+ BB->getInstList().erase(BBI++);
+ }
+}
+
+/// ChangeToCall - Convert the specified invoke into a normal call.
+static void ChangeToCall(InvokeInst *II) {
+ BasicBlock *BB = II->getParent();
+ SmallVector<Value*, 8> Args(II->op_begin()+3, II->op_end());
+ CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args.begin(),
+ Args.end(), "", II);
+ NewCall->takeName(II);
+ NewCall->setCallingConv(II->getCallingConv());
+ NewCall->setAttributes(II->getAttributes());
+ II->replaceAllUsesWith(NewCall);
+
+ // Follow the call by a branch to the normal destination.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Update PHI nodes in the unwind destination
+ II->getUnwindDest()->removePredecessor(BB);
+ BB->getInstList().erase(II);
+}
+
+static bool MarkAliveBlocks(BasicBlock *BB,
+ SmallPtrSet<BasicBlock*, 128> &Reachable) {
+
+ SmallVector<BasicBlock*, 128> Worklist;
+ Worklist.push_back(BB);
+ bool Changed = false;
+ while (!Worklist.empty()) {
+ BB = Worklist.back();
+ Worklist.pop_back();
+
+ if (!Reachable.insert(BB))
+ continue;
+
+ // Do a quick scan of the basic block, turning any obviously unreachable
+ // instructions into LLVM unreachable insts. The instruction combining pass
+ // canonicalizes unreachable insts into stores to null or undef.
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
+ if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
+ if (CI->doesNotReturn()) {
+ // If we found a call to a no-return function, insert an unreachable
+ // instruction after it. Make sure there isn't *already* one there
+ // though.
+ ++BBI;
+ if (!isa<UnreachableInst>(BBI)) {
+ ChangeToUnreachable(BBI);
+ Changed = true;
+ }
+ break;
+ }
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
+ if (isa<ConstantPointerNull>(SI->getOperand(1)) ||
+ isa<UndefValue>(SI->getOperand(1))) {
+ ChangeToUnreachable(SI);
+ Changed = true;
+ break;
+ }
+ }
+
+ // Turn invokes that call 'nounwind' functions into ordinary calls.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
+ if (II->doesNotThrow()) {
+ ChangeToCall(II);
+ Changed = true;
+ }
+
+ Changed |= ConstantFoldTerminator(BB);
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ Worklist.push_back(*SI);
+ }
+ return Changed;
+}
+
+/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even
+/// if they are in a dead cycle. Return true if a change was made, false
+/// otherwise.
+static bool RemoveUnreachableBlocksFromFn(Function &F) {
+ SmallPtrSet<BasicBlock*, 128> Reachable;
+ bool Changed = MarkAliveBlocks(F.begin(), Reachable);
+
+ // If there are unreachable blocks in the CFG...
+ if (Reachable.size() == F.size())
+ return Changed;
+
+ assert(Reachable.size() < F.size());
+ NumSimpl += F.size()-Reachable.size();
+
+ // Loop over all of the basic blocks that are not reachable, dropping all of
+ // their internal references...
+ for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
+ if (Reachable.count(BB))
+ continue;
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ if (Reachable.count(*SI))
+ (*SI)->removePredecessor(BB);
+ BB->dropAllReferences();
+ }
+
+ for (Function::iterator I = ++F.begin(); I != F.end();)
+ if (!Reachable.count(I))
+ I = F.getBasicBlockList().erase(I);
+ else
+ ++I;
+
+ return true;
+}
+
+/// IterativeSimplifyCFG - Call SimplifyCFG on all the blocks in the function,
+/// iterating until no more changes are made.
+static bool IterativeSimplifyCFG(Function &F) {
+ bool Changed = false;
+ bool LocalChange = true;
+ while (LocalChange) {
+ LocalChange = false;
+
+ // Loop over all of the basic blocks (except the first one) and remove them
+ // if they are unneeded...
+ //
+ for (Function::iterator BBIt = ++F.begin(); BBIt != F.end(); ) {
+ if (SimplifyCFG(BBIt++)) {
+ LocalChange = true;
+ ++NumSimpl;
+ }
+ }
+ Changed |= LocalChange;
+ }
+ return Changed;
+}
+
+// It is possible that we may require multiple passes over the code to fully
+// simplify the CFG.
+//
+bool CFGSimplifyPass::runOnFunction(Function &F) {
+ bool EverChanged = RemoveUnreachableBlocksFromFn(F);
+ EverChanged |= IterativeSimplifyCFG(F);
+
+ // If neither pass changed anything, we're done.
+ if (!EverChanged) return false;
+
+ // IterativeSimplifyCFG can (rarely) make some loops dead. If this happens,
+ // RemoveUnreachableBlocksFromFn is needed to nuke them, which means we should
+ // iterate between the two optimizations. We structure the code like this to
+ // avoid reruning IterativeSimplifyCFG if the second pass of
+ // RemoveUnreachableBlocksFromFn doesn't do anything.
+ if (!RemoveUnreachableBlocksFromFn(F))
+ return true;
+
+ do {
+ EverChanged = IterativeSimplifyCFG(F);
+ EverChanged |= RemoveUnreachableBlocksFromFn(F);
+ } while (EverChanged);
+
+ return true;
+}
diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
new file mode 100644
index 0000000..4aad17d
--- /dev/null
+++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
@@ -0,0 +1,159 @@
+//===- SimplifyHalfPowrLibCalls.cpp - Optimize specific half_powr calls ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple pass that applies an experimental
+// transformation on calls to specific functions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplify-libcalls-halfpowr"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+
+namespace {
+ /// This pass optimizes well half_powr function calls.
+ ///
+ class VISIBILITY_HIDDEN SimplifyHalfPowrLibCalls : public FunctionPass {
+ const TargetData *TD;
+ public:
+ static char ID; // Pass identification
+ SimplifyHalfPowrLibCalls() : FunctionPass(&ID) {}
+
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetData>();
+ }
+
+ Instruction *
+ InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
+ Instruction *InsertPt);
+ };
+ char SimplifyHalfPowrLibCalls::ID = 0;
+} // end anonymous namespace.
+
+static RegisterPass<SimplifyHalfPowrLibCalls>
+X("simplify-libcalls-halfpowr", "Simplify half_powr library calls");
+
+// Public interface to the Simplify HalfPowr LibCalls pass.
+FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() {
+ return new SimplifyHalfPowrLibCalls();
+}
+
+/// InlineHalfPowrs - Inline a sequence of adjacent half_powr calls, rearranging
+/// their control flow to better facilitate subsequent optimization.
+Instruction *
+SimplifyHalfPowrLibCalls::InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
+ Instruction *InsertPt) {
+ std::vector<BasicBlock *> Bodies;
+ BasicBlock *NewBlock = 0;
+
+ for (unsigned i = 0, e = HalfPowrs.size(); i != e; ++i) {
+ CallInst *Call = cast<CallInst>(HalfPowrs[i]);
+ Function *Callee = Call->getCalledFunction();
+
+ // Minimally sanity-check the CFG of half_powr to ensure that it contains
+ // the the kind of code we expect. If we're running this pass, we have
+ // reason to believe it will be what we expect.
+ Function::iterator I = Callee->begin();
+ BasicBlock *Prologue = I++;
+ if (I == Callee->end()) break;
+ BasicBlock *SubnormalHandling = I++;
+ if (I == Callee->end()) break;
+ BasicBlock *Body = I++;
+ if (I != Callee->end()) break;
+ if (SubnormalHandling->getSinglePredecessor() != Prologue)
+ break;
+ BranchInst *PBI = dyn_cast<BranchInst>(Prologue->getTerminator());
+ if (!PBI || !PBI->isConditional())
+ break;
+ BranchInst *SNBI = dyn_cast<BranchInst>(SubnormalHandling->getTerminator());
+ if (!SNBI || SNBI->isConditional())
+ break;
+ if (!isa<ReturnInst>(Body->getTerminator()))
+ break;
+
+ Instruction *NextInst = next(BasicBlock::iterator(Call));
+
+ // Inline the call, taking care of what code ends up where.
+ NewBlock = SplitBlock(NextInst->getParent(), NextInst, this);
+
+ bool B = InlineFunction(Call, 0, TD);
+ assert(B && "half_powr didn't inline?"); B=B;
+
+ BasicBlock *NewBody = NewBlock->getSinglePredecessor();
+ assert(NewBody);
+ Bodies.push_back(NewBody);
+ }
+
+ if (!NewBlock)
+ return InsertPt;
+
+ // Put the code for all the bodies into one block, to facilitate
+ // subsequent optimization.
+ (void)SplitEdge(NewBlock->getSinglePredecessor(), NewBlock, this);
+ for (unsigned i = 0, e = Bodies.size(); i != e; ++i) {
+ BasicBlock *Body = Bodies[i];
+ Instruction *FNP = Body->getFirstNonPHI();
+ // Splice the insts from body into NewBlock.
+ NewBlock->getInstList().splice(NewBlock->begin(), Body->getInstList(),
+ FNP, Body->getTerminator());
+ }
+
+ return NewBlock->begin();
+}
+
+/// runOnFunction - Top level algorithm.
+///
+bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) {
+ TD = &getAnalysis<TargetData>();
+
+ bool Changed = false;
+ std::vector<Instruction *> HalfPowrs;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ // Look for calls.
+ bool IsHalfPowr = false;
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ // Look for direct calls and calls to non-external functions.
+ Function *Callee = CI->getCalledFunction();
+ if (Callee && Callee->hasExternalLinkage()) {
+ // Look for calls with well-known names.
+ const char *CalleeName = Callee->getNameStart();
+ if (strcmp(CalleeName, "__half_powrf4") == 0)
+ IsHalfPowr = true;
+ }
+ }
+ if (IsHalfPowr)
+ HalfPowrs.push_back(I);
+ // We're looking for sequences of up to three such calls, which we'll
+ // simplify as a group.
+ if ((!IsHalfPowr && !HalfPowrs.empty()) || HalfPowrs.size() == 3) {
+ I = InlineHalfPowrs(HalfPowrs, I);
+ E = I->getParent()->end();
+ HalfPowrs.clear();
+ Changed = true;
+ }
+ }
+ assert(HalfPowrs.empty() && "Block had no terminator!");
+ }
+
+ return Changed;
+}
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
new file mode 100644
index 0000000..4b00640
--- /dev/null
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -0,0 +1,2429 @@
+//===- SimplifyLibCalls.cpp - Optimize specific well-known library calls --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple pass that applies a variety of small
+// optimizations for calls to specific well-known function calls (e.g. runtime
+// library functions). For example, a call to the function "exit(3)" that
+// occurs within the main() function can be transformed into a simple "return 3"
+// instruction. Any optimization that takes this form (replace call to library
+// function with simpler code that provides the same result) belongs in this
+// file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplify-libcalls"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+
+STATISTIC(NumSimplified, "Number of library calls simplified");
+STATISTIC(NumAnnotated, "Number of attributes added to library functions");
+
+//===----------------------------------------------------------------------===//
+// Optimizer Base Class
+//===----------------------------------------------------------------------===//
+
+/// This class is the abstract base class for the set of optimizations that
+/// corresponds to one library call.
+namespace {
+class VISIBILITY_HIDDEN LibCallOptimization {
+protected:
+ Function *Caller;
+ const TargetData *TD;
+public:
+ LibCallOptimization() { }
+ virtual ~LibCallOptimization() {}
+
+ /// CallOptimizer - This pure virtual method is implemented by base classes to
+ /// do various optimizations. If this returns null then no transformation was
+ /// performed. If it returns CI, then it transformed the call and CI is to be
+ /// deleted. If it returns something else, replace CI with the new value and
+ /// delete CI.
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
+ =0;
+
+ Value *OptimizeCall(CallInst *CI, const TargetData &TD, IRBuilder<> &B) {
+ Caller = CI->getParent()->getParent();
+ this->TD = &TD;
+ return CallOptimizer(CI->getCalledFunction(), CI, B);
+ }
+
+ /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
+ Value *CastToCStr(Value *V, IRBuilder<> &B);
+
+ /// EmitStrLen - Emit a call to the strlen function to the builder, for the
+ /// specified pointer. Ptr is required to be some pointer type, and the
+ /// return value has 'intptr_t' type.
+ Value *EmitStrLen(Value *Ptr, IRBuilder<> &B);
+
+ /// EmitMemCpy - Emit a call to the memcpy function to the builder. This
+ /// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
+ Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len,
+ unsigned Align, IRBuilder<> &B);
+
+ /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is
+ /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
+ Value *EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B);
+
+ /// EmitMemCmp - Emit a call to the memcmp function.
+ Value *EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B);
+
+ /// EmitMemSet - Emit a call to the memset function
+ Value *EmitMemSet(Value *Dst, Value *Val, Value *Len, IRBuilder<> &B);
+
+ /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
+ /// 'floor'). This function is known to take a single of type matching 'Op'
+ /// and returns one value with the same type. If 'Op' is a long double, 'l'
+ /// is added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
+ Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B);
+
+ /// EmitPutChar - Emit a call to the putchar function. This assumes that Char
+ /// is an integer.
+ void EmitPutChar(Value *Char, IRBuilder<> &B);
+
+ /// EmitPutS - Emit a call to the puts function. This assumes that Str is
+ /// some pointer.
+ void EmitPutS(Value *Str, IRBuilder<> &B);
+
+ /// EmitFPutC - Emit a call to the fputc function. This assumes that Char is
+ /// an i32, and File is a pointer to FILE.
+ void EmitFPutC(Value *Char, Value *File, IRBuilder<> &B);
+
+ /// EmitFPutS - Emit a call to the puts function. Str is required to be a
+ /// pointer and File is a pointer to FILE.
+ void EmitFPutS(Value *Str, Value *File, IRBuilder<> &B);
+
+ /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is
+ /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
+ void EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B);
+
+};
+} // End anonymous namespace.
+
+/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
+Value *LibCallOptimization::CastToCStr(Value *V, IRBuilder<> &B) {
+ return B.CreateBitCast(V, PointerType::getUnqual(Type::Int8Ty), "cstr");
+}
+
+/// EmitStrLen - Emit a call to the strlen function to the builder, for the
+/// specified pointer. This always returns an integer value of size intptr_t.
+Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) {
+ Module *M = Caller->getParent();
+ AttributeWithIndex AWI[2];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+ Attribute::NoUnwind);
+
+ Constant *StrLen =M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2),
+ TD->getIntPtrType(),
+ PointerType::getUnqual(Type::Int8Ty),
+ NULL);
+ return B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
+}
+
+/// EmitMemCpy - Emit a call to the memcpy function to the builder. This always
+/// expects that the size has type 'intptr_t' and Dst/Src are pointers.
+Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len,
+ unsigned Align, IRBuilder<> &B) {
+ Module *M = Caller->getParent();
+ Intrinsic::ID IID = Intrinsic::memcpy;
+ const Type *Tys[1];
+ Tys[0] = Len->getType();
+ Value *MemCpy = Intrinsic::getDeclaration(M, IID, Tys, 1);
+ return B.CreateCall4(MemCpy, CastToCStr(Dst, B), CastToCStr(Src, B), Len,
+ ConstantInt::get(Type::Int32Ty, Align));
+}
+
+/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is
+/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
+Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val,
+ Value *Len, IRBuilder<> &B) {
+ Module *M = Caller->getParent();
+ AttributeWithIndex AWI;
+ AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
+
+ Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1),
+ PointerType::getUnqual(Type::Int8Ty),
+ PointerType::getUnqual(Type::Int8Ty),
+ Type::Int32Ty, TD->getIntPtrType(),
+ NULL);
+ return B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
+}
+
+/// EmitMemCmp - Emit a call to the memcmp function.
+Value *LibCallOptimization::EmitMemCmp(Value *Ptr1, Value *Ptr2,
+ Value *Len, IRBuilder<> &B) {
+ Module *M = Caller->getParent();
+ AttributeWithIndex AWI[3];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+ AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+ Attribute::NoUnwind);
+
+ Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3),
+ Type::Int32Ty,
+ PointerType::getUnqual(Type::Int8Ty),
+ PointerType::getUnqual(Type::Int8Ty),
+ TD->getIntPtrType(), NULL);
+ return B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
+ Len, "memcmp");
+}
+
+/// EmitMemSet - Emit a call to the memset function
+Value *LibCallOptimization::EmitMemSet(Value *Dst, Value *Val,
+ Value *Len, IRBuilder<> &B) {
+ Module *M = Caller->getParent();
+ Intrinsic::ID IID = Intrinsic::memset;
+ const Type *Tys[1];
+ Tys[0] = Len->getType();
+ Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 1);
+ Value *Align = ConstantInt::get(Type::Int32Ty, 1);
+ return B.CreateCall4(MemSet, CastToCStr(Dst, B), Val, Len, Align);
+}
+
+/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
+/// 'floor'). This function is known to take a single of type matching 'Op' and
+/// returns one value with the same type. If 'Op' is a long double, 'l' is
+/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
+Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name,
+ IRBuilder<> &B) {
+ char NameBuffer[20];
+ if (Op->getType() != Type::DoubleTy) {
+ // If we need to add a suffix, copy into NameBuffer.
+ unsigned NameLen = strlen(Name);
+ assert(NameLen < sizeof(NameBuffer)-2);
+ memcpy(NameBuffer, Name, NameLen);
+ if (Op->getType() == Type::FloatTy)
+ NameBuffer[NameLen] = 'f'; // floorf
+ else
+ NameBuffer[NameLen] = 'l'; // floorl
+ NameBuffer[NameLen+1] = 0;
+ Name = NameBuffer;
+ }
+
+ Module *M = Caller->getParent();
+ Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+ Op->getType(), NULL);
+ return B.CreateCall(Callee, Op, Name);
+}
+
+/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
+/// is an integer.
+void LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) {
+ Module *M = Caller->getParent();
+ Value *F = M->getOrInsertFunction("putchar", Type::Int32Ty,
+ Type::Int32Ty, NULL);
+ B.CreateCall(F, B.CreateIntCast(Char, Type::Int32Ty, "chari"), "putchar");
+}
+
+/// EmitPutS - Emit a call to the puts function. This assumes that Str is
+/// some pointer.
+void LibCallOptimization::EmitPutS(Value *Str, IRBuilder<> &B) {
+ Module *M = Caller->getParent();
+ AttributeWithIndex AWI[2];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+
+ Value *F = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
+ Type::Int32Ty,
+ PointerType::getUnqual(Type::Int8Ty), NULL);
+ B.CreateCall(F, CastToCStr(Str, B), "puts");
+}
+
+/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is
+/// an integer and File is a pointer to FILE.
+void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) {
+ Module *M = Caller->getParent();
+ AttributeWithIndex AWI[2];
+ AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ Constant *F;
+ if (isa<PointerType>(File->getType()))
+ F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), Type::Int32Ty,
+ Type::Int32Ty, File->getType(), NULL);
+
+ else
+ F = M->getOrInsertFunction("fputc", Type::Int32Ty, Type::Int32Ty,
+ File->getType(), NULL);
+ Char = B.CreateIntCast(Char, Type::Int32Ty, "chari");
+ B.CreateCall2(F, Char, File, "fputc");
+}
+
+/// EmitFPutS - Emit a call to the puts function. Str is required to be a
+/// pointer and File is a pointer to FILE.
+void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B) {
+ Module *M = Caller->getParent();
+ AttributeWithIndex AWI[3];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+ AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ Constant *F;
+ if (isa<PointerType>(File->getType()))
+ F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3), Type::Int32Ty,
+ PointerType::getUnqual(Type::Int8Ty),
+ File->getType(), NULL);
+ else
+ F = M->getOrInsertFunction("fputs", Type::Int32Ty,
+ PointerType::getUnqual(Type::Int8Ty),
+ File->getType(), NULL);
+ B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
+}
+
+/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is
+/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
+void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File,
+ IRBuilder<> &B) {
+ Module *M = Caller->getParent();
+ AttributeWithIndex AWI[3];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture);
+ AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ Constant *F;
+ if (isa<PointerType>(File->getType()))
+ F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3),
+ TD->getIntPtrType(),
+ PointerType::getUnqual(Type::Int8Ty),
+ TD->getIntPtrType(), TD->getIntPtrType(),
+ File->getType(), NULL);
+ else
+ F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(),
+ PointerType::getUnqual(Type::Int8Ty),
+ TD->getIntPtrType(), TD->getIntPtrType(),
+ File->getType(), NULL);
+ B.CreateCall4(F, CastToCStr(Ptr, B), Size,
+ ConstantInt::get(TD->getIntPtrType(), 1), File);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// GetStringLengthH - If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'. If we can't, return 0.
+static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
+ // Look through noop bitcast instructions.
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+ return GetStringLengthH(BCI->getOperand(0), PHIs);
+
+ // If this is a PHI node, there are two cases: either we have already seen it
+ // or we haven't.
+ if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ if (!PHIs.insert(PN))
+ return ~0ULL; // already in the set.
+
+ // If it was new, see if all the input strings are the same length.
+ uint64_t LenSoFar = ~0ULL;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs);
+ if (Len == 0) return 0; // Unknown length -> unknown.
+
+ if (Len == ~0ULL) continue;
+
+ if (Len != LenSoFar && LenSoFar != ~0ULL)
+ return 0; // Disagree -> unknown.
+ LenSoFar = Len;
+ }
+
+ // Success, all agree.
+ return LenSoFar;
+ }
+
+ // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
+ if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+ uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs);
+ if (Len1 == 0) return 0;
+ uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs);
+ if (Len2 == 0) return 0;
+ if (Len1 == ~0ULL) return Len2;
+ if (Len2 == ~0ULL) return Len1;
+ if (Len1 != Len2) return 0;
+ return Len1;
+ }
+
+ // If the value is not a GEP instruction nor a constant expression with a
+ // GEP instruction, then return unknown.
+ User *GEP = 0;
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
+ GEP = GEPI;
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (CE->getOpcode() != Instruction::GetElementPtr)
+ return 0;
+ GEP = CE;
+ } else {
+ return 0;
+ }
+
+ // Make sure the GEP has exactly three arguments.
+ if (GEP->getNumOperands() != 3)
+ return 0;
+
+ // Check to make sure that the first operand of the GEP is an integer and
+ // has value 0 so that we are sure we're indexing into the initializer.
+ if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) {
+ if (!Idx->isZero())
+ return 0;
+ } else
+ return 0;
+
+ // If the second index isn't a ConstantInt, then this is a variable index
+ // into the array. If this occurs, we can't say anything meaningful about
+ // the string.
+ uint64_t StartIdx = 0;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
+ StartIdx = CI->getZExtValue();
+ else
+ return 0;
+
+ // The GEP instruction, constant or instruction, must reference a global
+ // variable that is a constant and is initialized. The referenced constant
+ // initializer is the array that we'll use for optimization.
+ GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
+ if (!GV || !GV->isConstant() || !GV->hasInitializer())
+ return 0;
+ Constant *GlobalInit = GV->getInitializer();
+
+ // Handle the ConstantAggregateZero case, which is a degenerate case. The
+ // initializer is constant zero so the length of the string must be zero.
+ if (isa<ConstantAggregateZero>(GlobalInit))
+ return 1; // Len = 0 offset by 1.
+
+ // Must be a Constant Array
+ ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
+ if (!Array || Array->getType()->getElementType() != Type::Int8Ty)
+ return false;
+
+ // Get the number of elements in the array
+ uint64_t NumElts = Array->getType()->getNumElements();
+
+ // Traverse the constant array from StartIdx (derived above) which is
+ // the place the GEP refers to in the array.
+ for (unsigned i = StartIdx; i != NumElts; ++i) {
+ Constant *Elt = Array->getOperand(i);
+ ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
+ if (!CI) // This array isn't suitable, non-int initializer.
+ return 0;
+ if (CI->isZero())
+ return i-StartIdx+1; // We found end of string, success!
+ }
+
+ return 0; // The array isn't null terminated, conservatively return 'unknown'.
+}
+
+/// GetStringLength - If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'. If we can't, return 0.
+static uint64_t GetStringLength(Value *V) {
+ if (!isa<PointerType>(V->getType())) return 0;
+
+ SmallPtrSet<PHINode*, 32> PHIs;
+ uint64_t Len = GetStringLengthH(V, PHIs);
+ // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
+ // an empty string as a length.
+ return Len == ~0ULL ? 1 : Len;
+}
+
+/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
+/// value is equal or not-equal to zero.
+static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ if (IC->isEquality())
+ if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+ if (C->isNullValue())
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous LibCall Optimizations
+//===----------------------------------------------------------------------===//
+
+namespace {
+//===---------------------------------------===//
+// 'exit' Optimizations
+
+/// ExitOpt - int main() { exit(4); } --> int main() { return 4; }
+struct VISIBILITY_HIDDEN ExitOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify we have a reasonable prototype for exit.
+ if (Callee->arg_size() == 0 || !CI->use_empty())
+ return 0;
+
+ // Verify the caller is main, and that the result type of main matches the
+ // argument type of exit.
+ if (!Caller->isName("main") || !Caller->hasExternalLinkage() ||
+ Caller->getReturnType() != CI->getOperand(1)->getType())
+ return 0;
+
+ TerminatorInst *OldTI = CI->getParent()->getTerminator();
+
+ // Create the return after the call.
+ ReturnInst *RI = B.CreateRet(CI->getOperand(1));
+
+ // Drop all successor phi node entries.
+ for (unsigned i = 0, e = OldTI->getNumSuccessors(); i != e; ++i)
+ OldTI->getSuccessor(i)->removePredecessor(CI->getParent());
+
+ // Erase all instructions from after our return instruction until the end of
+ // the block.
+ BasicBlock::iterator FirstDead = RI; ++FirstDead;
+ CI->getParent()->getInstList().erase(FirstDead, CI->getParent()->end());
+ return CI;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// String and Memory LibCall Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'strcat' Optimizations
+
+struct VISIBILITY_HIDDEN StrCatOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strcat" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != PointerType::getUnqual(Type::Int8Ty) ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ FT->getParamType(1) != FT->getReturnType())
+ return 0;
+
+ // Extract some information from the instruction
+ Value *Dst = CI->getOperand(1);
+ Value *Src = CI->getOperand(2);
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+ --Len; // Unbias length.
+
+ // Handle the simple, do-nothing case: strcat(x, "") -> x
+ if (Len == 0)
+ return Dst;
+
+ EmitStrLenMemCpy(Src, Dst, Len, B);
+ return Dst;
+ }
+
+ void EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) {
+ // We need to find the end of the destination string. That's where the
+ // memory is to be moved to. We just generate a call to strlen.
+ Value *DstLen = EmitStrLen(Dst, B);
+
+ // Now that we have the destination's length, we must index into the
+ // destination's pointer to get the actual memcpy destination (end of
+ // the string .. we're concatenating).
+ Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr");
+
+ // We have enough information to now generate the memcpy call to do the
+ // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
+ EmitMemCpy(CpyDst, Src, ConstantInt::get(TD->getIntPtrType(), Len+1), 1, B);
+ }
+};
+
+//===---------------------------------------===//
+// 'strncat' Optimizations
+
+struct VISIBILITY_HIDDEN StrNCatOpt : public StrCatOpt {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strncat" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 ||
+ FT->getReturnType() != PointerType::getUnqual(Type::Int8Ty) ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ FT->getParamType(1) != FT->getReturnType() ||
+ !isa<IntegerType>(FT->getParamType(2)))
+ return 0;
+
+ // Extract some information from the instruction
+ Value *Dst = CI->getOperand(1);
+ Value *Src = CI->getOperand(2);
+ uint64_t Len;
+
+ // We don't do anything if length is not constant
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getOperand(3)))
+ Len = LengthArg->getZExtValue();
+ else
+ return 0;
+
+ // See if we can get the length of the input string.
+ uint64_t SrcLen = GetStringLength(Src);
+ if (SrcLen == 0) return 0;
+ --SrcLen; // Unbias length.
+
+ // Handle the simple, do-nothing cases:
+ // strncat(x, "", c) -> x
+ // strncat(x, c, 0) -> x
+ if (SrcLen == 0 || Len == 0) return Dst;
+
+ // We don't optimize this case
+ if (Len < SrcLen) return 0;
+
+ // strncat(x, s, c) -> strcat(x, s)
+ // s is constant so the strcat can be optimized further
+ EmitStrLenMemCpy(Src, Dst, SrcLen, B);
+ return Dst;
+ }
+};
+
+//===---------------------------------------===//
+// 'strchr' Optimizations
+
+struct VISIBILITY_HIDDEN StrChrOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strchr" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != PointerType::getUnqual(Type::Int8Ty) ||
+ FT->getParamType(0) != FT->getReturnType())
+ return 0;
+
+ Value *SrcStr = CI->getOperand(1);
+
+ // If the second operand is non-constant, see if we can compute the length
+ // of the input string and turn this into memchr.
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getOperand(2));
+ if (CharC == 0) {
+ uint64_t Len = GetStringLength(SrcStr);
+ if (Len == 0 || FT->getParamType(1) != Type::Int32Ty) // memchr needs i32.
+ return 0;
+
+ return EmitMemChr(SrcStr, CI->getOperand(2), // include nul.
+ ConstantInt::get(TD->getIntPtrType(), Len), B);
+ }
+
+ // Otherwise, the character is a constant, see if the first argument is
+ // a string literal. If so, we can constant fold.
+ std::string Str;
+ if (!GetConstantStringInfo(SrcStr, Str))
+ return 0;
+
+ // strchr can find the nul character.
+ Str += '\0';
+ char CharValue = CharC->getSExtValue();
+
+ // Compute the offset.
+ uint64_t i = 0;
+ while (1) {
+ if (i == Str.size()) // Didn't find the char. strchr returns null.
+ return Constant::getNullValue(CI->getType());
+ // Did we find our match?
+ if (Str[i] == CharValue)
+ break;
+ ++i;
+ }
+
+ // strchr(s+n,c) -> gep(s+n+i,c)
+ Value *Idx = ConstantInt::get(Type::Int64Ty, i);
+ return B.CreateGEP(SrcStr, Idx, "strchr");
+ }
+};
+
+//===---------------------------------------===//
+// 'strcmp' Optimizations
+
+struct VISIBILITY_HIDDEN StrCmpOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strcmp" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || FT->getReturnType() != Type::Int32Ty ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != PointerType::getUnqual(Type::Int8Ty))
+ return 0;
+
+ Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2);
+ if (Str1P == Str2P) // strcmp(x,x) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ std::string Str1, Str2;
+ bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
+
+ if (HasStr1 && Str1.empty()) // strcmp("", x) -> *x
+ return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType());
+
+ if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
+ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+ // strcmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2)
+ return ConstantInt::get(CI->getType(), strcmp(Str1.c_str(),Str2.c_str()));
+
+ // strcmp(P, "x") -> memcmp(P, "x", 2)
+ uint64_t Len1 = GetStringLength(Str1P);
+ uint64_t Len2 = GetStringLength(Str2P);
+ if (Len1 || Len2) {
+ // Choose the smallest Len excluding 0 which means 'unknown'.
+ if (!Len1 || (Len2 && Len2 < Len1))
+ Len1 = Len2;
+ return EmitMemCmp(Str1P, Str2P,
+ ConstantInt::get(TD->getIntPtrType(), Len1), B);
+ }
+
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'strncmp' Optimizations
+
+struct VISIBILITY_HIDDEN StrNCmpOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strncmp" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != Type::Int32Ty ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != PointerType::getUnqual(Type::Int8Ty) ||
+ !isa<IntegerType>(FT->getParamType(2)))
+ return 0;
+
+ Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2);
+ if (Str1P == Str2P) // strncmp(x,x,n) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ // Get the length argument if it is constant.
+ uint64_t Length;
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getOperand(3)))
+ Length = LengthArg->getZExtValue();
+ else
+ return 0;
+
+ if (Length == 0) // strncmp(x,y,0) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ std::string Str1, Str2;
+ bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
+
+ if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> *x
+ return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType());
+
+ if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
+ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+ // strncmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2)
+ return ConstantInt::get(CI->getType(),
+ strncmp(Str1.c_str(), Str2.c_str(), Length));
+ return 0;
+ }
+};
+
+
+//===---------------------------------------===//
+// 'strcpy' Optimizations
+
+struct VISIBILITY_HIDDEN StrCpyOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strcpy" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != PointerType::getUnqual(Type::Int8Ty))
+ return 0;
+
+ Value *Dst = CI->getOperand(1), *Src = CI->getOperand(2);
+ if (Dst == Src) // strcpy(x,x) -> x
+ return Src;
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+
+ // We have enough information to now generate the memcpy call to do the
+ // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
+ EmitMemCpy(Dst, Src, ConstantInt::get(TD->getIntPtrType(), Len), 1, B);
+ return Dst;
+ }
+};
+
+//===---------------------------------------===//
+// 'strncpy' Optimizations
+
+struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != PointerType::getUnqual(Type::Int8Ty) ||
+ !isa<IntegerType>(FT->getParamType(2)))
+ return 0;
+
+ Value *Dst = CI->getOperand(1);
+ Value *Src = CI->getOperand(2);
+ Value *LenOp = CI->getOperand(3);
+
+ // See if we can get the length of the input string.
+ uint64_t SrcLen = GetStringLength(Src);
+ if (SrcLen == 0) return 0;
+ --SrcLen;
+
+ if (SrcLen == 0) {
+ // strncpy(x, "", y) -> memset(x, '\0', y, 1)
+ EmitMemSet(Dst, ConstantInt::get(Type::Int8Ty, '\0'), LenOp, B);
+ return Dst;
+ }
+
+ uint64_t Len;
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp))
+ Len = LengthArg->getZExtValue();
+ else
+ return 0;
+
+ if (Len == 0) return Dst; // strncpy(x, y, 0) -> x
+
+ // Let strncpy handle the zero padding
+ if (Len > SrcLen+1) return 0;
+
+ // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
+ EmitMemCpy(Dst, Src, ConstantInt::get(TD->getIntPtrType(), Len), 1, B);
+
+ return Dst;
+ }
+};
+
+//===---------------------------------------===//
+// 'strlen' Optimizations
+
+struct VISIBILITY_HIDDEN StrLenOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 1 ||
+ FT->getParamType(0) != PointerType::getUnqual(Type::Int8Ty) ||
+ !isa<IntegerType>(FT->getReturnType()))
+ return 0;
+
+ Value *Src = CI->getOperand(1);
+
+ // Constant folding: strlen("xyz") -> 3
+ if (uint64_t Len = GetStringLength(Src))
+ return ConstantInt::get(CI->getType(), Len-1);
+
+ // Handle strlen(p) != 0.
+ if (!IsOnlyUsedInZeroEqualityComparison(CI)) return 0;
+
+ // strlen(x) != 0 --> *x != 0
+ // strlen(x) == 0 --> *x == 0
+ return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType());
+ }
+};
+
+//===---------------------------------------===//
+// 'strto*' Optimizations
+
+struct VISIBILITY_HIDDEN StrToOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
+ !isa<PointerType>(FT->getParamType(0)) ||
+ !isa<PointerType>(FT->getParamType(1)))
+ return 0;
+
+ Value *EndPtr = CI->getOperand(2);
+ if (isa<ConstantPointerNull>(EndPtr)) {
+ CI->setOnlyReadsMemory();
+ CI->addAttribute(1, Attribute::NoCapture);
+ }
+
+ return 0;
+ }
+};
+
+
+//===---------------------------------------===//
+// 'memcmp' Optimizations
+
+struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || !isa<PointerType>(FT->getParamType(0)) ||
+ !isa<PointerType>(FT->getParamType(1)) ||
+ FT->getReturnType() != Type::Int32Ty)
+ return 0;
+
+ Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2);
+
+ if (LHS == RHS) // memcmp(s,s,x) -> 0
+ return Constant::getNullValue(CI->getType());
+
+ // Make sure we have a constant length.
+ ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getOperand(3));
+ if (!LenC) return 0;
+ uint64_t Len = LenC->getZExtValue();
+
+ if (Len == 0) // memcmp(s1,s2,0) -> 0
+ return Constant::getNullValue(CI->getType());
+
+ if (Len == 1) { // memcmp(S1,S2,1) -> *LHS - *RHS
+ Value *LHSV = B.CreateLoad(CastToCStr(LHS, B), "lhsv");
+ Value *RHSV = B.CreateLoad(CastToCStr(RHS, B), "rhsv");
+ return B.CreateSExt(B.CreateSub(LHSV, RHSV, "chardiff"), CI->getType());
+ }
+
+ // memcmp(S1,S2,2) != 0 -> (*(short*)LHS ^ *(short*)RHS) != 0
+ // memcmp(S1,S2,4) != 0 -> (*(int*)LHS ^ *(int*)RHS) != 0
+ if ((Len == 2 || Len == 4) && IsOnlyUsedInZeroEqualityComparison(CI)) {
+ const Type *PTy = PointerType::getUnqual(Len == 2 ?
+ Type::Int16Ty : Type::Int32Ty);
+ LHS = B.CreateBitCast(LHS, PTy, "tmp");
+ RHS = B.CreateBitCast(RHS, PTy, "tmp");
+ LoadInst *LHSV = B.CreateLoad(LHS, "lhsv");
+ LoadInst *RHSV = B.CreateLoad(RHS, "rhsv");
+ LHSV->setAlignment(1); RHSV->setAlignment(1); // Unaligned loads.
+ return B.CreateZExt(B.CreateXor(LHSV, RHSV, "shortdiff"), CI->getType());
+ }
+
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'memcpy' Optimizations
+
+struct VISIBILITY_HIDDEN MemCpyOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ !isa<PointerType>(FT->getParamType(0)) ||
+ !isa<PointerType>(FT->getParamType(1)) ||
+ FT->getParamType(2) != TD->getIntPtrType())
+ return 0;
+
+ // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
+ EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, B);
+ return CI->getOperand(1);
+ }
+};
+
+//===---------------------------------------===//
+// 'memmove' Optimizations
+
+struct VISIBILITY_HIDDEN MemMoveOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ !isa<PointerType>(FT->getParamType(0)) ||
+ !isa<PointerType>(FT->getParamType(1)) ||
+ FT->getParamType(2) != TD->getIntPtrType())
+ return 0;
+
+ // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
+ Module *M = Caller->getParent();
+ Intrinsic::ID IID = Intrinsic::memmove;
+ const Type *Tys[1];
+ Tys[0] = TD->getIntPtrType();
+ Value *MemMove = Intrinsic::getDeclaration(M, IID, Tys, 1);
+ Value *Dst = CastToCStr(CI->getOperand(1), B);
+ Value *Src = CastToCStr(CI->getOperand(2), B);
+ Value *Size = CI->getOperand(3);
+ Value *Align = ConstantInt::get(Type::Int32Ty, 1);
+ B.CreateCall4(MemMove, Dst, Src, Size, Align);
+ return CI->getOperand(1);
+ }
+};
+
+//===---------------------------------------===//
+// 'memset' Optimizations
+
+struct VISIBILITY_HIDDEN MemSetOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ !isa<PointerType>(FT->getParamType(0)) ||
+ FT->getParamType(1) != TD->getIntPtrType() ||
+ FT->getParamType(2) != TD->getIntPtrType())
+ return 0;
+
+ // memset(p, v, n) -> llvm.memset(p, v, n, 1)
+ Value *Val = B.CreateTrunc(CI->getOperand(2), Type::Int8Ty);
+ EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B);
+ return CI->getOperand(1);
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Math Library Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'pow*' Optimizations
+
+struct VISIBILITY_HIDDEN PowOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 2 arguments of the same FP type, which match the
+ // result type.
+ if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ !FT->getParamType(0)->isFloatingPoint())
+ return 0;
+
+ Value *Op1 = CI->getOperand(1), *Op2 = CI->getOperand(2);
+ if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
+ if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0
+ return Op1C;
+ if (Op1C->isExactlyValue(2.0)) // pow(2.0, x) -> exp2(x)
+ return EmitUnaryFloatFnCall(Op2, "exp2", B);
+ }
+
+ ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
+ if (Op2C == 0) return 0;
+
+ if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
+ return ConstantFP::get(CI->getType(), 1.0);
+
+ if (Op2C->isExactlyValue(0.5)) {
+ // FIXME: This is not safe for -0.0 and -inf. This can only be done when
+ // 'unsafe' math optimizations are allowed.
+ // x pow(x, 0.5) sqrt(x)
+ // ---------------------------------------------
+ // -0.0 +0.0 -0.0
+ // -inf +inf NaN
+#if 0
+ // pow(x, 0.5) -> sqrt(x)
+ return B.CreateCall(get_sqrt(), Op1, "sqrt");
+#endif
+ }
+
+ if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x
+ return Op1;
+ if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x
+ return B.CreateMul(Op1, Op1, "pow2");
+ if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
+ return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip");
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'exp2' Optimizations
+
+struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 1 argument of FP type, which matches the
+ // result type.
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isFloatingPoint())
+ return 0;
+
+ Value *Op = CI->getOperand(1);
+ // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
+ // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
+ Value *LdExpArg = 0;
+ if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
+ if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
+ LdExpArg = B.CreateSExt(OpC->getOperand(0), Type::Int32Ty, "tmp");
+ } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
+ if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
+ LdExpArg = B.CreateZExt(OpC->getOperand(0), Type::Int32Ty, "tmp");
+ }
+
+ if (LdExpArg) {
+ const char *Name;
+ if (Op->getType() == Type::FloatTy)
+ Name = "ldexpf";
+ else if (Op->getType() == Type::DoubleTy)
+ Name = "ldexp";
+ else
+ Name = "ldexpl";
+
+ Constant *One = ConstantFP::get(APFloat(1.0f));
+ if (Op->getType() != Type::FloatTy)
+ One = ConstantExpr::getFPExtend(One, Op->getType());
+
+ Module *M = Caller->getParent();
+ Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+ Op->getType(), Type::Int32Ty,NULL);
+ return B.CreateCall2(Callee, One, LdExpArg);
+ }
+ return 0;
+ }
+};
+
+
+//===---------------------------------------===//
+// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
+
+struct VISIBILITY_HIDDEN UnaryDoubleFPOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 1 || FT->getReturnType() != Type::DoubleTy ||
+ FT->getParamType(0) != Type::DoubleTy)
+ return 0;
+
+ // If this is something like 'floor((double)floatval)', convert to floorf.
+ FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getOperand(1));
+ if (Cast == 0 || Cast->getOperand(0)->getType() != Type::FloatTy)
+ return 0;
+
+ // floor((double)floatval) -> (double)floorf(floatval)
+ Value *V = Cast->getOperand(0);
+ V = EmitUnaryFloatFnCall(V, Callee->getNameStart(), B);
+ return B.CreateFPExt(V, Type::DoubleTy);
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Integer Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'ffs*' Optimizations
+
+struct VISIBILITY_HIDDEN FFSOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 2 arguments of the same FP type, which match the
+ // result type.
+ if (FT->getNumParams() != 1 || FT->getReturnType() != Type::Int32Ty ||
+ !isa<IntegerType>(FT->getParamType(0)))
+ return 0;
+
+ Value *Op = CI->getOperand(1);
+
+ // Constant fold.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ if (CI->getValue() == 0) // ffs(0) -> 0.
+ return Constant::getNullValue(CI->getType());
+ return ConstantInt::get(Type::Int32Ty, // ffs(c) -> cttz(c)+1
+ CI->getValue().countTrailingZeros()+1);
+ }
+
+ // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
+ const Type *ArgType = Op->getType();
+ Value *F = Intrinsic::getDeclaration(Callee->getParent(),
+ Intrinsic::cttz, &ArgType, 1);
+ Value *V = B.CreateCall(F, Op, "cttz");
+ V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp");
+ V = B.CreateIntCast(V, Type::Int32Ty, false, "tmp");
+
+ Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp");
+ return B.CreateSelect(Cond, V, ConstantInt::get(Type::Int32Ty, 0));
+ }
+};
+
+//===---------------------------------------===//
+// 'isdigit' Optimizations
+
+struct VISIBILITY_HIDDEN IsDigitOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // We require integer(i32)
+ if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
+ FT->getParamType(0) != Type::Int32Ty)
+ return 0;
+
+ // isdigit(c) -> (c-'0') <u 10
+ Value *Op = CI->getOperand(1);
+ Op = B.CreateSub(Op, ConstantInt::get(Type::Int32Ty, '0'), "isdigittmp");
+ Op = B.CreateICmpULT(Op, ConstantInt::get(Type::Int32Ty, 10), "isdigit");
+ return B.CreateZExt(Op, CI->getType());
+ }
+};
+
+//===---------------------------------------===//
+// 'isascii' Optimizations
+
+struct VISIBILITY_HIDDEN IsAsciiOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // We require integer(i32)
+ if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
+ FT->getParamType(0) != Type::Int32Ty)
+ return 0;
+
+ // isascii(c) -> c <u 128
+ Value *Op = CI->getOperand(1);
+ Op = B.CreateICmpULT(Op, ConstantInt::get(Type::Int32Ty, 128), "isascii");
+ return B.CreateZExt(Op, CI->getType());
+ }
+};
+
+//===---------------------------------------===//
+// 'abs', 'labs', 'llabs' Optimizations
+
+struct VISIBILITY_HIDDEN AbsOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // We require integer(integer) where the types agree.
+ if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
+ FT->getParamType(0) != FT->getReturnType())
+ return 0;
+
+ // abs(x) -> x >s -1 ? x : -x
+ Value *Op = CI->getOperand(1);
+ Value *Pos = B.CreateICmpSGT(Op,ConstantInt::getAllOnesValue(Op->getType()),
+ "ispos");
+ Value *Neg = B.CreateNeg(Op, "neg");
+ return B.CreateSelect(Pos, Op, Neg);
+ }
+};
+
+
+//===---------------------------------------===//
+// 'toascii' Optimizations
+
+struct VISIBILITY_HIDDEN ToAsciiOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // We require i32(i32)
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != Type::Int32Ty)
+ return 0;
+
+ // isascii(c) -> c & 0x7f
+ return B.CreateAnd(CI->getOperand(1), ConstantInt::get(CI->getType(),0x7F));
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Formatting and IO Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'printf' Optimizations
+
+struct VISIBILITY_HIDDEN PrintFOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require one fixed pointer argument and an integer/void result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() < 1 || !isa<PointerType>(FT->getParamType(0)) ||
+ !(isa<IntegerType>(FT->getReturnType()) ||
+ FT->getReturnType() == Type::VoidTy))
+ return 0;
+
+ // Check for a fixed format string.
+ std::string FormatStr;
+ if (!GetConstantStringInfo(CI->getOperand(1), FormatStr))
+ return 0;
+
+ // Empty format string -> noop.
+ if (FormatStr.empty()) // Tolerate printf's declared void.
+ return CI->use_empty() ? (Value*)CI : ConstantInt::get(CI->getType(), 0);
+
+ // printf("x") -> putchar('x'), even for '%'.
+ if (FormatStr.size() == 1) {
+ EmitPutChar(ConstantInt::get(Type::Int32Ty, FormatStr[0]), B);
+ return CI->use_empty() ? (Value*)CI : ConstantInt::get(CI->getType(), 1);
+ }
+
+ // printf("foo\n") --> puts("foo")
+ if (FormatStr[FormatStr.size()-1] == '\n' &&
+ FormatStr.find('%') == std::string::npos) { // no format characters.
+ // Create a string literal with no \n on it. We expect the constant merge
+ // pass to be run after this pass, to merge duplicate strings.
+ FormatStr.erase(FormatStr.end()-1);
+ Constant *C = ConstantArray::get(FormatStr, true);
+ C = new GlobalVariable(C->getType(), true,GlobalVariable::InternalLinkage,
+ C, "str", Callee->getParent());
+ EmitPutS(C, B);
+ return CI->use_empty() ? (Value*)CI :
+ ConstantInt::get(CI->getType(), FormatStr.size()+1);
+ }
+
+ // Optimize specific format strings.
+ // printf("%c", chr) --> putchar(*(i8*)dst)
+ if (FormatStr == "%c" && CI->getNumOperands() > 2 &&
+ isa<IntegerType>(CI->getOperand(2)->getType())) {
+ EmitPutChar(CI->getOperand(2), B);
+ return CI->use_empty() ? (Value*)CI : ConstantInt::get(CI->getType(), 1);
+ }
+
+ // printf("%s\n", str) --> puts(str)
+ if (FormatStr == "%s\n" && CI->getNumOperands() > 2 &&
+ isa<PointerType>(CI->getOperand(2)->getType()) &&
+ CI->use_empty()) {
+ EmitPutS(CI->getOperand(2), B);
+ return CI;
+ }
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'sprintf' Optimizations
+
+struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require two fixed pointer arguments and an integer result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !isa<PointerType>(FT->getParamType(0)) ||
+ !isa<PointerType>(FT->getParamType(1)) ||
+ !isa<IntegerType>(FT->getReturnType()))
+ return 0;
+
+ // Check for a fixed format string.
+ std::string FormatStr;
+ if (!GetConstantStringInfo(CI->getOperand(2), FormatStr))
+ return 0;
+
+ // If we just have a format string (nothing else crazy) transform it.
+ if (CI->getNumOperands() == 3) {
+ // Make sure there's no % in the constant array. We could try to handle
+ // %% -> % in the future if we cared.
+ for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+ if (FormatStr[i] == '%')
+ return 0; // we found a format specifier, bail out.
+
+ // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
+ EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte.
+ ConstantInt::get(TD->getIntPtrType(), FormatStr.size()+1),1,B);
+ return ConstantInt::get(CI->getType(), FormatStr.size());
+ }
+
+ // The remaining optimizations require the format string to be "%s" or "%c"
+ // and have an extra operand.
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4)
+ return 0;
+
+ // Decode the second character of the format string.
+ if (FormatStr[1] == 'c') {
+ // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
+ if (!isa<IntegerType>(CI->getOperand(3)->getType())) return 0;
+ Value *V = B.CreateTrunc(CI->getOperand(3), Type::Int8Ty, "char");
+ Value *Ptr = CastToCStr(CI->getOperand(1), B);
+ B.CreateStore(V, Ptr);
+ Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::Int32Ty, 1), "nul");
+ B.CreateStore(Constant::getNullValue(Type::Int8Ty), Ptr);
+
+ return ConstantInt::get(CI->getType(), 1);
+ }
+
+ if (FormatStr[1] == 's') {
+ // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
+ if (!isa<PointerType>(CI->getOperand(3)->getType())) return 0;
+
+ Value *Len = EmitStrLen(CI->getOperand(3), B);
+ Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1),
+ "leninc");
+ EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B);
+
+ // The sprintf result is the unincremented number of bytes in the string.
+ return B.CreateIntCast(Len, CI->getType(), false);
+ }
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'fwrite' Optimizations
+
+struct VISIBILITY_HIDDEN FWriteOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require a pointer, an integer, an integer, a pointer, returning integer.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 4 || !isa<PointerType>(FT->getParamType(0)) ||
+ !isa<IntegerType>(FT->getParamType(1)) ||
+ !isa<IntegerType>(FT->getParamType(2)) ||
+ !isa<PointerType>(FT->getParamType(3)) ||
+ !isa<IntegerType>(FT->getReturnType()))
+ return 0;
+
+ // Get the element size and count.
+ ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getOperand(2));
+ ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getOperand(3));
+ if (!SizeC || !CountC) return 0;
+ uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue();
+
+ // If this is writing zero records, remove the call (it's a noop).
+ if (Bytes == 0)
+ return ConstantInt::get(CI->getType(), 0);
+
+ // If this is writing one byte, turn it into fputc.
+ if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F)
+ Value *Char = B.CreateLoad(CastToCStr(CI->getOperand(1), B), "char");
+ EmitFPutC(Char, CI->getOperand(4), B);
+ return ConstantInt::get(CI->getType(), 1);
+ }
+
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'fputs' Optimizations
+
+struct VISIBILITY_HIDDEN FPutsOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require two pointers. Also, we can't optimize if return value is used.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !isa<PointerType>(FT->getParamType(0)) ||
+ !isa<PointerType>(FT->getParamType(1)) ||
+ !CI->use_empty())
+ return 0;
+
+ // fputs(s,F) --> fwrite(s,1,strlen(s),F)
+ uint64_t Len = GetStringLength(CI->getOperand(1));
+ if (!Len) return 0;
+ EmitFWrite(CI->getOperand(1), ConstantInt::get(TD->getIntPtrType(), Len-1),
+ CI->getOperand(2), B);
+ return CI; // Known to have no uses (see above).
+ }
+};
+
+//===---------------------------------------===//
+// 'fprintf' Optimizations
+
+struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require two fixed paramters as pointers and integer result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !isa<PointerType>(FT->getParamType(0)) ||
+ !isa<PointerType>(FT->getParamType(1)) ||
+ !isa<IntegerType>(FT->getReturnType()))
+ return 0;
+
+ // All the optimizations depend on the format string.
+ std::string FormatStr;
+ if (!GetConstantStringInfo(CI->getOperand(2), FormatStr))
+ return 0;
+
+ // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
+ if (CI->getNumOperands() == 3) {
+ for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+ if (FormatStr[i] == '%') // Could handle %% -> % if we cared.
+ return 0; // We found a format specifier.
+
+ EmitFWrite(CI->getOperand(2), ConstantInt::get(TD->getIntPtrType(),
+ FormatStr.size()),
+ CI->getOperand(1), B);
+ return ConstantInt::get(CI->getType(), FormatStr.size());
+ }
+
+ // The remaining optimizations require the format string to be "%s" or "%c"
+ // and have an extra operand.
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4)
+ return 0;
+
+ // Decode the second character of the format string.
+ if (FormatStr[1] == 'c') {
+ // fprintf(F, "%c", chr) --> *(i8*)dst = chr
+ if (!isa<IntegerType>(CI->getOperand(3)->getType())) return 0;
+ EmitFPutC(CI->getOperand(3), CI->getOperand(1), B);
+ return ConstantInt::get(CI->getType(), 1);
+ }
+
+ if (FormatStr[1] == 's') {
+ // fprintf(F, "%s", str) -> fputs(str, F)
+ if (!isa<PointerType>(CI->getOperand(3)->getType()) || !CI->use_empty())
+ return 0;
+ EmitFPutS(CI->getOperand(3), CI->getOperand(1), B);
+ return CI;
+ }
+ return 0;
+ }
+};
+
+} // end anonymous namespace.
+
+//===----------------------------------------------------------------------===//
+// SimplifyLibCalls Pass Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// This pass optimizes well known library functions from libc and libm.
+ ///
+ class VISIBILITY_HIDDEN SimplifyLibCalls : public FunctionPass {
+ StringMap<LibCallOptimization*> Optimizations;
+ // Miscellaneous LibCall Optimizations
+ ExitOpt Exit;
+ // String and Memory LibCall Optimizations
+ StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrCmpOpt StrCmp;
+ StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrNCpyOpt StrNCpy; StrLenOpt StrLen;
+ StrToOpt StrTo; MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove;
+ MemSetOpt MemSet;
+ // Math Library Optimizations
+ PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP;
+ // Integer Optimizations
+ FFSOpt FFS; AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii;
+ ToAsciiOpt ToAscii;
+ // Formatting and IO Optimizations
+ SPrintFOpt SPrintF; PrintFOpt PrintF;
+ FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF;
+
+ bool Modified; // This is only used by doInitialization.
+ public:
+ static char ID; // Pass identification
+ SimplifyLibCalls() : FunctionPass(&ID) {}
+
+ void InitOptimizations();
+ bool runOnFunction(Function &F);
+
+ void setDoesNotAccessMemory(Function &F);
+ void setOnlyReadsMemory(Function &F);
+ void setDoesNotThrow(Function &F);
+ void setDoesNotCapture(Function &F, unsigned n);
+ void setDoesNotAlias(Function &F, unsigned n);
+ bool doInitialization(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetData>();
+ }
+ };
+ char SimplifyLibCalls::ID = 0;
+} // end anonymous namespace.
+
+static RegisterPass<SimplifyLibCalls>
+X("simplify-libcalls", "Simplify well-known library calls");
+
+// Public interface to the Simplify LibCalls pass.
+FunctionPass *llvm::createSimplifyLibCallsPass() {
+ return new SimplifyLibCalls();
+}
+
+/// Optimizations - Populate the Optimizations map with all the optimizations
+/// we know.
+void SimplifyLibCalls::InitOptimizations() {
+ // Miscellaneous LibCall Optimizations
+ Optimizations["exit"] = &Exit;
+
+ // String and Memory LibCall Optimizations
+ Optimizations["strcat"] = &StrCat;
+ Optimizations["strncat"] = &StrNCat;
+ Optimizations["strchr"] = &StrChr;
+ Optimizations["strcmp"] = &StrCmp;
+ Optimizations["strncmp"] = &StrNCmp;
+ Optimizations["strcpy"] = &StrCpy;
+ Optimizations["strncpy"] = &StrNCpy;
+ Optimizations["strlen"] = &StrLen;
+ Optimizations["strtol"] = &StrTo;
+ Optimizations["strtod"] = &StrTo;
+ Optimizations["strtof"] = &StrTo;
+ Optimizations["strtoul"] = &StrTo;
+ Optimizations["strtoll"] = &StrTo;
+ Optimizations["strtold"] = &StrTo;
+ Optimizations["strtoull"] = &StrTo;
+ Optimizations["memcmp"] = &MemCmp;
+ Optimizations["memcpy"] = &MemCpy;
+ Optimizations["memmove"] = &MemMove;
+ Optimizations["memset"] = &MemSet;
+
+ // Math Library Optimizations
+ Optimizations["powf"] = &Pow;
+ Optimizations["pow"] = &Pow;
+ Optimizations["powl"] = &Pow;
+ Optimizations["llvm.pow.f32"] = &Pow;
+ Optimizations["llvm.pow.f64"] = &Pow;
+ Optimizations["llvm.pow.f80"] = &Pow;
+ Optimizations["llvm.pow.f128"] = &Pow;
+ Optimizations["llvm.pow.ppcf128"] = &Pow;
+ Optimizations["exp2l"] = &Exp2;
+ Optimizations["exp2"] = &Exp2;
+ Optimizations["exp2f"] = &Exp2;
+ Optimizations["llvm.exp2.ppcf128"] = &Exp2;
+ Optimizations["llvm.exp2.f128"] = &Exp2;
+ Optimizations["llvm.exp2.f80"] = &Exp2;
+ Optimizations["llvm.exp2.f64"] = &Exp2;
+ Optimizations["llvm.exp2.f32"] = &Exp2;
+
+#ifdef HAVE_FLOORF
+ Optimizations["floor"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_CEILF
+ Optimizations["ceil"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_ROUNDF
+ Optimizations["round"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_RINTF
+ Optimizations["rint"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_NEARBYINTF
+ Optimizations["nearbyint"] = &UnaryDoubleFP;
+#endif
+
+ // Integer Optimizations
+ Optimizations["ffs"] = &FFS;
+ Optimizations["ffsl"] = &FFS;
+ Optimizations["ffsll"] = &FFS;
+ Optimizations["abs"] = &Abs;
+ Optimizations["labs"] = &Abs;
+ Optimizations["llabs"] = &Abs;
+ Optimizations["isdigit"] = &IsDigit;
+ Optimizations["isascii"] = &IsAscii;
+ Optimizations["toascii"] = &ToAscii;
+
+ // Formatting and IO Optimizations
+ Optimizations["sprintf"] = &SPrintF;
+ Optimizations["printf"] = &PrintF;
+ Optimizations["fwrite"] = &FWrite;
+ Optimizations["fputs"] = &FPuts;
+ Optimizations["fprintf"] = &FPrintF;
+}
+
+
+/// runOnFunction - Top level algorithm.
+///
+bool SimplifyLibCalls::runOnFunction(Function &F) {
+ if (Optimizations.empty())
+ InitOptimizations();
+
+ const TargetData &TD = getAnalysis<TargetData>();
+
+ IRBuilder<> Builder;
+
+ bool Changed = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+ // Ignore non-calls.
+ CallInst *CI = dyn_cast<CallInst>(I++);
+ if (!CI) continue;
+
+ // Ignore indirect calls and calls to non-external functions.
+ Function *Callee = CI->getCalledFunction();
+ if (Callee == 0 || !Callee->isDeclaration() ||
+ !(Callee->hasExternalLinkage() || Callee->hasDLLImportLinkage()))
+ continue;
+
+ // Ignore unknown calls.
+ const char *CalleeName = Callee->getNameStart();
+ StringMap<LibCallOptimization*>::iterator OMI =
+ Optimizations.find(CalleeName, CalleeName+Callee->getNameLen());
+ if (OMI == Optimizations.end()) continue;
+
+ // Set the builder to the instruction after the call.
+ Builder.SetInsertPoint(BB, I);
+
+ // Try to optimize this call.
+ Value *Result = OMI->second->OptimizeCall(CI, TD, Builder);
+ if (Result == 0) continue;
+
+ DEBUG(DOUT << "SimplifyLibCalls simplified: " << *CI;
+ DOUT << " into: " << *Result << "\n");
+
+ // Something changed!
+ Changed = true;
+ ++NumSimplified;
+
+ // Inspect the instruction after the call (which was potentially just
+ // added) next.
+ I = CI; ++I;
+
+ if (CI != Result && !CI->use_empty()) {
+ CI->replaceAllUsesWith(Result);
+ if (!Result->hasName())
+ Result->takeName(CI);
+ }
+ CI->eraseFromParent();
+ }
+ }
+ return Changed;
+}
+
+// Utility methods for doInitialization.
+
+void SimplifyLibCalls::setDoesNotAccessMemory(Function &F) {
+ if (!F.doesNotAccessMemory()) {
+ F.setDoesNotAccessMemory();
+ ++NumAnnotated;
+ Modified = true;
+ }
+}
+void SimplifyLibCalls::setOnlyReadsMemory(Function &F) {
+ if (!F.onlyReadsMemory()) {
+ F.setOnlyReadsMemory();
+ ++NumAnnotated;
+ Modified = true;
+ }
+}
+void SimplifyLibCalls::setDoesNotThrow(Function &F) {
+ if (!F.doesNotThrow()) {
+ F.setDoesNotThrow();
+ ++NumAnnotated;
+ Modified = true;
+ }
+}
+void SimplifyLibCalls::setDoesNotCapture(Function &F, unsigned n) {
+ if (!F.doesNotCapture(n)) {
+ F.setDoesNotCapture(n);
+ ++NumAnnotated;
+ Modified = true;
+ }
+}
+void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) {
+ if (!F.doesNotAlias(n)) {
+ F.setDoesNotAlias(n);
+ ++NumAnnotated;
+ Modified = true;
+ }
+}
+
+/// doInitialization - Add attributes to well-known functions.
+///
+bool SimplifyLibCalls::doInitialization(Module &M) {
+ Modified = false;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ Function &F = *I;
+ if (!F.isDeclaration())
+ continue;
+
+ unsigned NameLen = F.getNameLen();
+ if (!NameLen)
+ continue;
+
+ const FunctionType *FTy = F.getFunctionType();
+
+ const char *NameStr = F.getNameStart();
+ switch (NameStr[0]) {
+ case 's':
+ if (NameLen == 6 && !strcmp(NameStr, "strlen")) {
+ if (FTy->getNumParams() != 1 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if ((NameLen == 6 && !strcmp(NameStr, "strcpy")) ||
+ (NameLen == 6 && !strcmp(NameStr, "stpcpy")) ||
+ (NameLen == 6 && !strcmp(NameStr, "strcat")) ||
+ (NameLen == 6 && !strcmp(NameStr, "strtol")) ||
+ (NameLen == 6 && !strcmp(NameStr, "strtod")) ||
+ (NameLen == 6 && !strcmp(NameStr, "strtof")) ||
+ (NameLen == 7 && !strcmp(NameStr, "strtoul")) ||
+ (NameLen == 7 && !strcmp(NameStr, "strtoll")) ||
+ (NameLen == 7 && !strcmp(NameStr, "strtold")) ||
+ (NameLen == 7 && !strcmp(NameStr, "strncat")) ||
+ (NameLen == 7 && !strcmp(NameStr, "strncpy")) ||
+ (NameLen == 8 && !strcmp(NameStr, "strtoull"))) {
+ if (FTy->getNumParams() < 2 ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (NameLen == 7 && !strcmp(NameStr, "strxfrm")) {
+ if (FTy->getNumParams() != 3 ||
+ !isa<PointerType>(FTy->getParamType(0)) ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if ((NameLen == 6 && !strcmp(NameStr, "strcmp")) ||
+ (NameLen == 6 && !strcmp(NameStr, "strspn")) ||
+ (NameLen == 7 && !strcmp(NameStr, "strncmp")) ||
+ (NameLen == 7 && !strcmp(NameStr, "strcspn")) ||
+ (NameLen == 7 && !strcmp(NameStr, "strcoll")) ||
+ (NameLen == 10 && !strcmp(NameStr, "strcasecmp")) ||
+ (NameLen == 11 && !strcmp(NameStr, "strncasecmp"))) {
+ if (FTy->getNumParams() < 2 ||
+ !isa<PointerType>(FTy->getParamType(0)) ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if ((NameLen == 6 && !strcmp(NameStr, "strstr")) ||
+ (NameLen == 7 && !strcmp(NameStr, "strpbrk"))) {
+ if (FTy->getNumParams() != 2 ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if ((NameLen == 6 && !strcmp(NameStr, "strtok")) ||
+ (NameLen == 8 && !strcmp(NameStr, "strtok_r"))) {
+ if (FTy->getNumParams() < 2 ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if ((NameLen == 5 && !strcmp(NameStr, "scanf")) ||
+ (NameLen == 6 && !strcmp(NameStr, "setbuf")) ||
+ (NameLen == 7 && !strcmp(NameStr, "setvbuf"))) {
+ if (FTy->getNumParams() < 1 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if ((NameLen == 6 && !strcmp(NameStr, "strdup")) ||
+ (NameLen == 7 && !strcmp(NameStr, "strndup"))) {
+ if (FTy->getNumParams() < 1 ||
+ !isa<PointerType>(FTy->getReturnType()) ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ } else if ((NameLen == 4 && !strcmp(NameStr, "stat")) ||
+ (NameLen == 6 && !strcmp(NameStr, "sscanf")) ||
+ (NameLen == 7 && !strcmp(NameStr, "sprintf")) ||
+ (NameLen == 7 && !strcmp(NameStr, "statvfs"))) {
+ if (FTy->getNumParams() < 2 ||
+ !isa<PointerType>(FTy->getParamType(0)) ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (NameLen == 8 && !strcmp(NameStr, "snprintf")) {
+ if (FTy->getNumParams() != 3 ||
+ !isa<PointerType>(FTy->getParamType(0)) ||
+ !isa<PointerType>(FTy->getParamType(2)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 3);
+ } else if (NameLen == 9 && !strcmp(NameStr, "setitimer")) {
+ if (FTy->getNumParams() != 3 ||
+ !isa<PointerType>(FTy->getParamType(1)) ||
+ !isa<PointerType>(FTy->getParamType(2)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ setDoesNotCapture(F, 3);
+ } else if (NameLen == 6 && !strcmp(NameStr, "system")) {
+ if (FTy->getNumParams() != 1 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ // May throw; "system" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'm':
+ if (NameLen == 6 && !strcmp(NameStr, "memcmp")) {
+ if (FTy->getNumParams() != 3 ||
+ !isa<PointerType>(FTy->getParamType(0)) ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if ((NameLen == 6 && !strcmp(NameStr, "memchr")) ||
+ (NameLen == 7 && !strcmp(NameStr, "memrchr"))) {
+ if (FTy->getNumParams() != 3)
+ continue;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ } else if ((NameLen == 4 && !strcmp(NameStr, "modf")) ||
+ (NameLen == 5 && !strcmp(NameStr, "modff")) ||
+ (NameLen == 5 && !strcmp(NameStr, "modfl")) ||
+ (NameLen == 6 && !strcmp(NameStr, "memcpy")) ||
+ (NameLen == 7 && !strcmp(NameStr, "memccpy")) ||
+ (NameLen == 7 && !strcmp(NameStr, "memmove"))) {
+ if (FTy->getNumParams() < 2 ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (NameLen == 8 && !strcmp(NameStr, "memalign")) {
+ if (!isa<PointerType>(FTy->getReturnType()))
+ continue;
+ setDoesNotAlias(F, 0);
+ } else if ((NameLen == 5 && !strcmp(NameStr, "mkdir")) ||
+ (NameLen == 6 && !strcmp(NameStr, "mktime"))) {
+ if (FTy->getNumParams() == 0 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'r':
+ if (NameLen == 7 && !strcmp(NameStr, "realloc")) {
+ if (FTy->getNumParams() != 2 ||
+ !isa<PointerType>(FTy->getParamType(0)) ||
+ !isa<PointerType>(FTy->getReturnType()))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ } else if (NameLen == 4 && !strcmp(NameStr, "read")) {
+ if (FTy->getNumParams() != 3 ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ // May throw; "read" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 2);
+ } else if ((NameLen == 5 && !strcmp(NameStr, "rmdir")) ||
+ (NameLen == 6 && !strcmp(NameStr, "rewind")) ||
+ (NameLen == 6 && !strcmp(NameStr, "remove")) ||
+ (NameLen == 8 && !strcmp(NameStr, "realpath"))) {
+ if (FTy->getNumParams() < 1 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if ((NameLen == 6 && !strcmp(NameStr, "rename")) ||
+ (NameLen == 8 && !strcmp(NameStr, "readlink"))) {
+ if (FTy->getNumParams() < 2 ||
+ !isa<PointerType>(FTy->getParamType(0)) ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ }
+ break;
+ case 'w':
+ if (NameLen == 5 && !strcmp(NameStr, "write")) {
+ if (FTy->getNumParams() != 3 ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ // May throw; "write" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 2);
+ }
+ break;
+ case 'b':
+ if (NameLen == 5 && !strcmp(NameStr, "bcopy")) {
+ if (FTy->getNumParams() != 3 ||
+ !isa<PointerType>(FTy->getParamType(0)) ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (NameLen == 4 && !strcmp(NameStr, "bcmp")) {
+ if (FTy->getNumParams() != 3 ||
+ !isa<PointerType>(FTy->getParamType(0)) ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (NameLen == 5 && !strcmp(NameStr, "bzero")) {
+ if (FTy->getNumParams() != 2 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'c':
+ if (NameLen == 6 && !strcmp(NameStr, "calloc")) {
+ if (FTy->getNumParams() != 2 ||
+ !isa<PointerType>(FTy->getReturnType()))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ } else if ((NameLen == 5 && !strcmp(NameStr, "chmod")) ||
+ (NameLen == 5 && !strcmp(NameStr, "chown")) ||
+ (NameLen == 7 && !strcmp(NameStr, "ctermid")) ||
+ (NameLen == 8 && !strcmp(NameStr, "clearerr")) ||
+ (NameLen == 8 && !strcmp(NameStr, "closedir"))) {
+ if (FTy->getNumParams() == 0 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'a':
+ if ((NameLen == 4 && !strcmp(NameStr, "atoi")) ||
+ (NameLen == 4 && !strcmp(NameStr, "atol")) ||
+ (NameLen == 4 && !strcmp(NameStr, "atof")) ||
+ (NameLen == 5 && !strcmp(NameStr, "atoll"))) {
+ if (FTy->getNumParams() != 1 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ } else if (NameLen == 6 && !strcmp(NameStr, "access")) {
+ if (FTy->getNumParams() != 2 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'f':
+ if (NameLen == 5 && !strcmp(NameStr, "fopen")) {
+ if (FTy->getNumParams() != 2 ||
+ !isa<PointerType>(FTy->getReturnType()) ||
+ !isa<PointerType>(FTy->getParamType(0)) ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (NameLen == 6 && !strcmp(NameStr, "fdopen")) {
+ if (FTy->getNumParams() != 2 ||
+ !isa<PointerType>(FTy->getReturnType()) ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 2);
+ } else if ((NameLen == 4 && !strcmp(NameStr, "feof")) ||
+ (NameLen == 4 && !strcmp(NameStr, "free")) ||
+ (NameLen == 5 && !strcmp(NameStr, "fseek")) ||
+ (NameLen == 5 && !strcmp(NameStr, "ftell")) ||
+ (NameLen == 5 && !strcmp(NameStr, "fgetc")) ||
+ (NameLen == 6 && !strcmp(NameStr, "fseeko")) ||
+ (NameLen == 6 && !strcmp(NameStr, "ftello")) ||
+ (NameLen == 6 && !strcmp(NameStr, "fileno")) ||
+ (NameLen == 6 && !strcmp(NameStr, "fflush")) ||
+ (NameLen == 6 && !strcmp(NameStr, "fclose")) ||
+ (NameLen == 7 && !strcmp(NameStr, "fsetpos")) ||
+ (NameLen == 9 && !strcmp(NameStr, "flockfile")) ||
+ (NameLen == 11 && !strcmp(NameStr, "funlockfile")) ||
+ (NameLen == 12 && !strcmp(NameStr, "ftrylockfile"))) {
+ if (FTy->getNumParams() == 0 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (NameLen == 6 && !strcmp(NameStr, "ferror")) {
+ if (FTy->getNumParams() != 1 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F);
+ } else if ((NameLen == 5 && !strcmp(NameStr, "fputc")) ||
+ (NameLen == 5 && !strcmp(NameStr, "fstat")) ||
+ (NameLen == 5 && !strcmp(NameStr, "frexp")) ||
+ (NameLen == 6 && !strcmp(NameStr, "frexpf")) ||
+ (NameLen == 6 && !strcmp(NameStr, "frexpl")) ||
+ (NameLen == 8 && !strcmp(NameStr, "fstatvfs"))) {
+ if (FTy->getNumParams() != 2 ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (NameLen == 5 && !strcmp(NameStr, "fgets")) {
+ if (FTy->getNumParams() != 3 ||
+ !isa<PointerType>(FTy->getParamType(0)) ||
+ !isa<PointerType>(FTy->getParamType(2)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 3);
+ } else if ((NameLen == 5 && !strcmp(NameStr, "fread")) ||
+ (NameLen == 6 && !strcmp(NameStr, "fwrite"))) {
+ if (FTy->getNumParams() != 4 ||
+ !isa<PointerType>(FTy->getParamType(0)) ||
+ !isa<PointerType>(FTy->getParamType(3)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 4);
+ } else if ((NameLen == 5 && !strcmp(NameStr, "fputs")) ||
+ (NameLen == 6 && !strcmp(NameStr, "fscanf")) ||
+ (NameLen == 7 && !strcmp(NameStr, "fprintf")) ||
+ (NameLen == 7 && !strcmp(NameStr, "fgetpos"))) {
+ if (FTy->getNumParams() < 2 ||
+ !isa<PointerType>(FTy->getParamType(0)) ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ }
+ break;
+ case 'g':
+ if ((NameLen == 4 && !strcmp(NameStr, "getc")) ||
+ (NameLen == 10 && !strcmp(NameStr, "getlogin_r")) ||
+ (NameLen == 13 && !strcmp(NameStr, "getc_unlocked"))) {
+ if (FTy->getNumParams() == 0 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (NameLen == 6 && !strcmp(NameStr, "getenv")) {
+ if (FTy->getNumParams() != 1 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ } else if ((NameLen == 4 && !strcmp(NameStr, "gets")) ||
+ (NameLen == 7 && !strcmp(NameStr, "getchar"))) {
+ setDoesNotThrow(F);
+ } else if (NameLen == 9 && !strcmp(NameStr, "getitimer")) {
+ if (FTy->getNumParams() != 2 ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (NameLen == 8 && !strcmp(NameStr, "getpwnam")) {
+ if (FTy->getNumParams() != 1 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'u':
+ if (NameLen == 6 && !strcmp(NameStr, "ungetc")) {
+ if (FTy->getNumParams() != 2 ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if ((NameLen == 5 && !strcmp(NameStr, "uname")) ||
+ (NameLen == 6 && !strcmp(NameStr, "unlink")) ||
+ (NameLen == 8 && !strcmp(NameStr, "unsetenv"))) {
+ if (FTy->getNumParams() != 1 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if ((NameLen == 5 && !strcmp(NameStr, "utime")) ||
+ (NameLen == 6 && !strcmp(NameStr, "utimes"))) {
+ if (FTy->getNumParams() != 2 ||
+ !isa<PointerType>(FTy->getParamType(0)) ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ }
+ break;
+ case 'p':
+ if (NameLen == 4 && !strcmp(NameStr, "putc")) {
+ if (FTy->getNumParams() != 2 ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if ((NameLen == 4 && !strcmp(NameStr, "puts")) ||
+ (NameLen == 6 && !strcmp(NameStr, "printf")) ||
+ (NameLen == 6 && !strcmp(NameStr, "perror"))) {
+ if (FTy->getNumParams() != 1 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if ((NameLen == 5 && !strcmp(NameStr, "pread")) ||
+ (NameLen == 6 && !strcmp(NameStr, "pwrite"))) {
+ if (FTy->getNumParams() != 4 ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ // May throw; these are valid pthread cancellation points.
+ setDoesNotCapture(F, 2);
+ } else if (NameLen == 7 && !strcmp(NameStr, "putchar")) {
+ setDoesNotThrow(F);
+ } else if (NameLen == 5 && !strcmp(NameStr, "popen")) {
+ if (FTy->getNumParams() != 2 ||
+ !isa<PointerType>(FTy->getReturnType()) ||
+ !isa<PointerType>(FTy->getParamType(0)) ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (NameLen == 6 && !strcmp(NameStr, "pclose")) {
+ if (FTy->getNumParams() != 1 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'v':
+ if (NameLen == 6 && !strcmp(NameStr, "vscanf")) {
+ if (FTy->getNumParams() != 2 ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if ((NameLen == 7 && !strcmp(NameStr, "vsscanf")) ||
+ (NameLen == 7 && !strcmp(NameStr, "vfscanf"))) {
+ if (FTy->getNumParams() != 3 ||
+ !isa<PointerType>(FTy->getParamType(1)) ||
+ !isa<PointerType>(FTy->getParamType(2)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (NameLen == 6 && !strcmp(NameStr, "valloc")) {
+ if (!isa<PointerType>(FTy->getReturnType()))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ } else if (NameLen == 7 && !strcmp(NameStr, "vprintf")) {
+ if (FTy->getNumParams() != 2 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if ((NameLen == 8 && !strcmp(NameStr, "vfprintf")) ||
+ (NameLen == 8 && !strcmp(NameStr, "vsprintf"))) {
+ if (FTy->getNumParams() != 3 ||
+ !isa<PointerType>(FTy->getParamType(0)) ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (NameLen == 9 && !strcmp(NameStr, "vsnprintf")) {
+ if (FTy->getNumParams() != 4 ||
+ !isa<PointerType>(FTy->getParamType(0)) ||
+ !isa<PointerType>(FTy->getParamType(2)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 3);
+ }
+ break;
+ case 'o':
+ if (NameLen == 4 && !strcmp(NameStr, "open")) {
+ if (FTy->getNumParams() < 2 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ // May throw; "open" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ } else if (NameLen == 7 && !strcmp(NameStr, "opendir")) {
+ if (FTy->getNumParams() != 1 ||
+ !isa<PointerType>(FTy->getReturnType()) ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 't':
+ if (NameLen == 7 && !strcmp(NameStr, "tmpfile")) {
+ if (!isa<PointerType>(FTy->getReturnType()))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ } else if (NameLen == 5 && !strcmp(NameStr, "times")) {
+ if (FTy->getNumParams() != 1 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'h':
+ if ((NameLen == 5 && !strcmp(NameStr, "htonl")) ||
+ (NameLen == 5 && !strcmp(NameStr, "htons"))) {
+ setDoesNotThrow(F);
+ setDoesNotAccessMemory(F);
+ }
+ break;
+ case 'n':
+ if ((NameLen == 5 && !strcmp(NameStr, "ntohl")) ||
+ (NameLen == 5 && !strcmp(NameStr, "ntohs"))) {
+ setDoesNotThrow(F);
+ setDoesNotAccessMemory(F);
+ }
+ break;
+ case 'l':
+ if (NameLen == 5 && !strcmp(NameStr, "lstat")) {
+ if (FTy->getNumParams() != 2 ||
+ !isa<PointerType>(FTy->getParamType(0)) ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (NameLen == 6 && !strcmp(NameStr, "lchown")) {
+ if (FTy->getNumParams() != 3 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'q':
+ if (NameLen == 5 && !strcmp(NameStr, "qsort")) {
+ if (FTy->getNumParams() != 4 ||
+ !isa<PointerType>(FTy->getParamType(3)))
+ continue;
+ // May throw; places call through function pointer.
+ setDoesNotCapture(F, 4);
+ }
+ break;
+ case '_':
+ if ((NameLen == 8 && !strcmp(NameStr, "__strdup")) ||
+ (NameLen == 9 && !strcmp(NameStr, "__strndup"))) {
+ if (FTy->getNumParams() < 1 ||
+ !isa<PointerType>(FTy->getReturnType()) ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ } else if (NameLen == 10 && !strcmp(NameStr, "__strtok_r")) {
+ if (FTy->getNumParams() != 3 ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (NameLen == 8 && !strcmp(NameStr, "_IO_getc")) {
+ if (FTy->getNumParams() != 1 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (NameLen == 8 && !strcmp(NameStr, "_IO_putc")) {
+ if (FTy->getNumParams() != 2 ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ }
+ break;
+ case 1:
+ if (NameLen == 15 && !strcmp(NameStr, "\1__isoc99_scanf")) {
+ if (FTy->getNumParams() < 1 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if ((NameLen == 7 && !strcmp(NameStr, "\1stat64")) ||
+ (NameLen == 8 && !strcmp(NameStr, "\1lstat64")) ||
+ (NameLen == 10 && !strcmp(NameStr, "\1statvfs64")) ||
+ (NameLen == 16 && !strcmp(NameStr, "\1__isoc99_sscanf"))) {
+ if (FTy->getNumParams() < 1 ||
+ !isa<PointerType>(FTy->getParamType(0)) ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (NameLen == 8 && !strcmp(NameStr, "\1fopen64")) {
+ if (FTy->getNumParams() != 2 ||
+ !isa<PointerType>(FTy->getReturnType()) ||
+ !isa<PointerType>(FTy->getParamType(0)) ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if ((NameLen == 9 && !strcmp(NameStr, "\1fseeko64")) ||
+ (NameLen == 9 && !strcmp(NameStr, "\1ftello64"))) {
+ if (FTy->getNumParams() == 0 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (NameLen == 10 && !strcmp(NameStr, "\1tmpfile64")) {
+ if (!isa<PointerType>(FTy->getReturnType()))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ } else if ((NameLen == 8 && !strcmp(NameStr, "\1fstat64")) ||
+ (NameLen == 11 && !strcmp(NameStr, "\1fstatvfs64"))) {
+ if (FTy->getNumParams() != 2 ||
+ !isa<PointerType>(FTy->getParamType(1)))
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (NameLen == 7 && !strcmp(NameStr, "\1open64")) {
+ if (FTy->getNumParams() < 2 ||
+ !isa<PointerType>(FTy->getParamType(0)))
+ continue;
+ // May throw; "open" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ }
+ }
+ return Modified;
+}
+
+// TODO:
+// Additional cases that we need to add to this file:
+//
+// cbrt:
+// * cbrt(expN(X)) -> expN(x/3)
+// * cbrt(sqrt(x)) -> pow(x,1/6)
+// * cbrt(sqrt(x)) -> pow(x,1/9)
+//
+// cos, cosf, cosl:
+// * cos(-x) -> cos(x)
+//
+// exp, expf, expl:
+// * exp(log(x)) -> x
+//
+// log, logf, logl:
+// * log(exp(x)) -> x
+// * log(x**y) -> y*log(x)
+// * log(exp(y)) -> y*log(e)
+// * log(exp2(y)) -> y*log(2)
+// * log(exp10(y)) -> y*log(10)
+// * log(sqrt(x)) -> 0.5*log(x)
+// * log(pow(x,y)) -> y*log(x)
+//
+// lround, lroundf, lroundl:
+// * lround(cnst) -> cnst'
+//
+// memcmp:
+// * memcmp(x,y,l) -> cnst
+// (if all arguments are constant and strlen(x) <= l and strlen(y) <= l)
+//
+// pow, powf, powl:
+// * pow(exp(x),y) -> exp(x*y)
+// * pow(sqrt(x),y) -> pow(x,y*0.5)
+// * pow(pow(x,y),z)-> pow(x,y*z)
+//
+// puts:
+// * puts("") -> putchar("\n")
+//
+// round, roundf, roundl:
+// * round(cnst) -> cnst'
+//
+// signbit:
+// * signbit(cnst) -> cnst'
+// * signbit(nncst) -> 0 (if pstv is a non-negative constant)
+//
+// sqrt, sqrtf, sqrtl:
+// * sqrt(expN(x)) -> expN(x*0.5)
+// * sqrt(Nroot(x)) -> pow(x,1/(2*N))
+// * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
+//
+// stpcpy:
+// * stpcpy(str, "literal") ->
+// llvm.memcpy(str,"literal",strlen("literal")+1,1)
+// strrchr:
+// * strrchr(s,c) -> reverse_offset_of_in(c,s)
+// (if c is a constant integer and s is a constant string)
+// * strrchr(s1,0) -> strchr(s1,0)
+//
+// strpbrk:
+// * strpbrk(s,a) -> offset_in_for(s,a)
+// (if s and a are both constant strings)
+// * strpbrk(s,"") -> 0
+// * strpbrk(s,a) -> strchr(s,a[0]) (if a is constant string of length 1)
+//
+// strspn, strcspn:
+// * strspn(s,a) -> const_int (if both args are constant)
+// * strspn("",a) -> 0
+// * strspn(s,"") -> 0
+// * strcspn(s,a) -> const_int (if both args are constant)
+// * strcspn("",a) -> 0
+// * strcspn(s,"") -> strlen(a)
+//
+// strstr:
+// * strstr(x,x) -> x
+// * strstr(s1,s2) -> offset_of_s2_in(s1)
+// (if s1 and s2 are constant strings)
+//
+// tan, tanf, tanl:
+// * tan(atan(x)) -> x
+//
+// trunc, truncf, truncl:
+// * trunc(cnst) -> cnst'
+//
+//
diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp
new file mode 100644
index 0000000..99a7dee
--- /dev/null
+++ b/lib/Transforms/Scalar/TailDuplication.cpp
@@ -0,0 +1,365 @@
+//===- TailDuplication.cpp - Simplify CFG through tail duplication --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs a limited form of tail duplication, intended to simplify
+// CFGs by removing some unconditional branches. This pass is necessary to
+// straighten out loops created by the C front-end, but also is capable of
+// making other code nicer. After this pass is run, the CFG simplify pass
+// should be run to clean up the mess.
+//
+// This pass could be enhanced in the future to use profile information to be
+// more aggressive.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tailduplicate"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constant.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumEliminated, "Number of unconditional branches eliminated");
+
+static cl::opt<unsigned>
+TailDupThreshold("taildup-threshold",
+ cl::desc("Max block size to tail duplicate"),
+ cl::init(1), cl::Hidden);
+
+namespace {
+ class VISIBILITY_HIDDEN TailDup : public FunctionPass {
+ bool runOnFunction(Function &F);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ TailDup() : FunctionPass(&ID) {}
+
+ private:
+ inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned);
+ inline void eliminateUnconditionalBranch(BranchInst *BI);
+ SmallPtrSet<BasicBlock*, 4> CycleDetector;
+ };
+}
+
+char TailDup::ID = 0;
+static RegisterPass<TailDup> X("tailduplicate", "Tail Duplication");
+
+// Public interface to the Tail Duplication pass
+FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); }
+
+/// runOnFunction - Top level algorithm - Loop over each unconditional branch in
+/// the function, eliminating it if it looks attractive enough. CycleDetector
+/// prevents infinite loops by checking that we aren't redirecting a branch to
+/// a place it already pointed to earlier; see PR 2323.
+bool TailDup::runOnFunction(Function &F) {
+ bool Changed = false;
+ CycleDetector.clear();
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+ if (shouldEliminateUnconditionalBranch(I->getTerminator(),
+ TailDupThreshold)) {
+ eliminateUnconditionalBranch(cast<BranchInst>(I->getTerminator()));
+ Changed = true;
+ } else {
+ ++I;
+ CycleDetector.clear();
+ }
+ }
+ return Changed;
+}
+
+/// shouldEliminateUnconditionalBranch - Return true if this branch looks
+/// attractive to eliminate. We eliminate the branch if the destination basic
+/// block has <= 5 instructions in it, not counting PHI nodes. In practice,
+/// since one of these is a terminator instruction, this means that we will add
+/// up to 4 instructions to the new block.
+///
+/// We don't count PHI nodes in the count since they will be removed when the
+/// contents of the block are copied over.
+///
+bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI,
+ unsigned Threshold) {
+ BranchInst *BI = dyn_cast<BranchInst>(TI);
+ if (!BI || !BI->isUnconditional()) return false; // Not an uncond branch!
+
+ BasicBlock *Dest = BI->getSuccessor(0);
+ if (Dest == BI->getParent()) return false; // Do not loop infinitely!
+
+ // Do not inline a block if we will just get another branch to the same block!
+ TerminatorInst *DTI = Dest->getTerminator();
+ if (BranchInst *DBI = dyn_cast<BranchInst>(DTI))
+ if (DBI->isUnconditional() && DBI->getSuccessor(0) == Dest)
+ return false; // Do not loop infinitely!
+
+ // FIXME: DemoteRegToStack cannot yet demote invoke instructions to the stack,
+ // because doing so would require breaking critical edges. This should be
+ // fixed eventually.
+ if (!DTI->use_empty())
+ return false;
+
+ // Do not bother with blocks with only a single predecessor: simplify
+ // CFG will fold these two blocks together!
+ pred_iterator PI = pred_begin(Dest), PE = pred_end(Dest);
+ ++PI;
+ if (PI == PE) return false; // Exactly one predecessor!
+
+ BasicBlock::iterator I = Dest->getFirstNonPHI();
+
+ for (unsigned Size = 0; I != Dest->end(); ++I) {
+ if (Size == Threshold) return false; // The block is too large.
+
+ // Don't tail duplicate call instructions. They are very large compared to
+ // other instructions.
+ if (isa<CallInst>(I) || isa<InvokeInst>(I)) return false;
+
+ // Allso alloca and malloc.
+ if (isa<AllocationInst>(I)) return false;
+
+ // Some vector instructions can expand into a number of instructions.
+ if (isa<ShuffleVectorInst>(I) || isa<ExtractElementInst>(I) ||
+ isa<InsertElementInst>(I)) return false;
+
+ // Only count instructions that are not debugger intrinsics.
+ if (!isa<DbgInfoIntrinsic>(I)) ++Size;
+ }
+
+ // Do not tail duplicate a block that has thousands of successors into a block
+ // with a single successor if the block has many other predecessors. This can
+ // cause an N^2 explosion in CFG edges (and PHI node entries), as seen in
+ // cases that have a large number of indirect gotos.
+ unsigned NumSuccs = DTI->getNumSuccessors();
+ if (NumSuccs > 8) {
+ unsigned TooMany = 128;
+ if (NumSuccs >= TooMany) return false;
+ TooMany = TooMany/NumSuccs;
+ for (; PI != PE; ++PI)
+ if (TooMany-- == 0) return false;
+ }
+
+ // If this unconditional branch is a fall-through, be careful about
+ // tail duplicating it. In particular, we don't want to taildup it if the
+ // original block will still be there after taildup is completed: doing so
+ // would eliminate the fall-through, requiring unconditional branches.
+ Function::iterator DestI = Dest;
+ if (&*--DestI == BI->getParent()) {
+ // The uncond branch is a fall-through. Tail duplication of the block is
+ // will eliminate the fall-through-ness and end up cloning the terminator
+ // at the end of the Dest block. Since the original Dest block will
+ // continue to exist, this means that one or the other will not be able to
+ // fall through. One typical example that this helps with is code like:
+ // if (a)
+ // foo();
+ // if (b)
+ // foo();
+ // Cloning the 'if b' block into the end of the first foo block is messy.
+
+ // The messy case is when the fall-through block falls through to other
+ // blocks. This is what we would be preventing if we cloned the block.
+ DestI = Dest;
+ if (++DestI != Dest->getParent()->end()) {
+ BasicBlock *DestSucc = DestI;
+ // If any of Dest's successors are fall-throughs, don't do this xform.
+ for (succ_iterator SI = succ_begin(Dest), SE = succ_end(Dest);
+ SI != SE; ++SI)
+ if (*SI == DestSucc)
+ return false;
+ }
+ }
+
+ // Finally, check that we haven't redirected to this target block earlier;
+ // there are cases where we loop forever if we don't check this (PR 2323).
+ if (!CycleDetector.insert(Dest))
+ return false;
+
+ return true;
+}
+
+/// FindObviousSharedDomOf - We know there is a branch from SrcBlock to
+/// DestBlock, and that SrcBlock is not the only predecessor of DstBlock. If we
+/// can find a predecessor of SrcBlock that is a dominator of both SrcBlock and
+/// DstBlock, return it.
+static BasicBlock *FindObviousSharedDomOf(BasicBlock *SrcBlock,
+ BasicBlock *DstBlock) {
+ // SrcBlock must have a single predecessor.
+ pred_iterator PI = pred_begin(SrcBlock), PE = pred_end(SrcBlock);
+ if (PI == PE || ++PI != PE) return 0;
+
+ BasicBlock *SrcPred = *pred_begin(SrcBlock);
+
+ // Look at the predecessors of DstBlock. One of them will be SrcBlock. If
+ // there is only one other pred, get it, otherwise we can't handle it.
+ PI = pred_begin(DstBlock); PE = pred_end(DstBlock);
+ BasicBlock *DstOtherPred = 0;
+ if (*PI == SrcBlock) {
+ if (++PI == PE) return 0;
+ DstOtherPred = *PI;
+ if (++PI != PE) return 0;
+ } else {
+ DstOtherPred = *PI;
+ if (++PI == PE || *PI != SrcBlock || ++PI != PE) return 0;
+ }
+
+ // We can handle two situations here: "if then" and "if then else" blocks. An
+ // 'if then' situation is just where DstOtherPred == SrcPred.
+ if (DstOtherPred == SrcPred)
+ return SrcPred;
+
+ // Check to see if we have an "if then else" situation, which means that
+ // DstOtherPred will have a single predecessor and it will be SrcPred.
+ PI = pred_begin(DstOtherPred); PE = pred_end(DstOtherPred);
+ if (PI != PE && *PI == SrcPred) {
+ if (++PI != PE) return 0; // Not a single pred.
+ return SrcPred; // Otherwise, it's an "if then" situation. Return the if.
+ }
+
+ // Otherwise, this is something we can't handle.
+ return 0;
+}
+
+
+/// eliminateUnconditionalBranch - Clone the instructions from the destination
+/// block into the source block, eliminating the specified unconditional branch.
+/// If the destination block defines values used by successors of the dest
+/// block, we may need to insert PHI nodes.
+///
+void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
+ BasicBlock *SourceBlock = Branch->getParent();
+ BasicBlock *DestBlock = Branch->getSuccessor(0);
+ assert(SourceBlock != DestBlock && "Our predicate is broken!");
+
+ DOUT << "TailDuplication[" << SourceBlock->getParent()->getName()
+ << "]: Eliminating branch: " << *Branch;
+
+ // See if we can avoid duplicating code by moving it up to a dominator of both
+ // blocks.
+ if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) {
+ DOUT << "Found shared dominator: " << DomBlock->getName() << "\n";
+
+ // If there are non-phi instructions in DestBlock that have no operands
+ // defined in DestBlock, and if the instruction has no side effects, we can
+ // move the instruction to DomBlock instead of duplicating it.
+ BasicBlock::iterator BBI = DestBlock->getFirstNonPHI();
+ while (!isa<TerminatorInst>(BBI)) {
+ Instruction *I = BBI++;
+
+ bool CanHoist = !I->isTrapping() && !I->mayHaveSideEffects();
+ if (CanHoist) {
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
+ if (Instruction *OpI = dyn_cast<Instruction>(I->getOperand(op)))
+ if (OpI->getParent() == DestBlock ||
+ (isa<InvokeInst>(OpI) && OpI->getParent() == DomBlock)) {
+ CanHoist = false;
+ break;
+ }
+ if (CanHoist) {
+ // Remove from DestBlock, move right before the term in DomBlock.
+ DestBlock->getInstList().remove(I);
+ DomBlock->getInstList().insert(DomBlock->getTerminator(), I);
+ DOUT << "Hoisted: " << *I;
+ }
+ }
+ }
+ }
+
+ // Tail duplication can not update SSA properties correctly if the values
+ // defined in the duplicated tail are used outside of the tail itself. For
+ // this reason, we spill all values that are used outside of the tail to the
+ // stack.
+ for (BasicBlock::iterator I = DestBlock->begin(); I != DestBlock->end(); ++I)
+ if (I->isUsedOutsideOfBlock(DestBlock)) {
+ // We found a use outside of the tail. Create a new stack slot to
+ // break this inter-block usage pattern.
+ DemoteRegToStack(*I);
+ }
+
+ // We are going to have to map operands from the original block B to the new
+ // copy of the block B'. If there are PHI nodes in the DestBlock, these PHI
+ // nodes also define part of this mapping. Loop over these PHI nodes, adding
+ // them to our mapping.
+ //
+ std::map<Value*, Value*> ValueMapping;
+
+ BasicBlock::iterator BI = DestBlock->begin();
+ bool HadPHINodes = isa<PHINode>(BI);
+ for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+ ValueMapping[PN] = PN->getIncomingValueForBlock(SourceBlock);
+
+ // Clone the non-phi instructions of the dest block into the source block,
+ // keeping track of the mapping...
+ //
+ for (; BI != DestBlock->end(); ++BI) {
+ Instruction *New = BI->clone();
+ New->setName(BI->getName());
+ SourceBlock->getInstList().push_back(New);
+ ValueMapping[BI] = New;
+ }
+
+ // Now that we have built the mapping information and cloned all of the
+ // instructions (giving us a new terminator, among other things), walk the new
+ // instructions, rewriting references of old instructions to use new
+ // instructions.
+ //
+ BI = Branch; ++BI; // Get an iterator to the first new instruction
+ for (; BI != SourceBlock->end(); ++BI)
+ for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i)
+ if (Value *Remapped = ValueMapping[BI->getOperand(i)])
+ BI->setOperand(i, Remapped);
+
+ // Next we check to see if any of the successors of DestBlock had PHI nodes.
+ // If so, we need to add entries to the PHI nodes for SourceBlock now.
+ for (succ_iterator SI = succ_begin(DestBlock), SE = succ_end(DestBlock);
+ SI != SE; ++SI) {
+ BasicBlock *Succ = *SI;
+ for (BasicBlock::iterator PNI = Succ->begin(); isa<PHINode>(PNI); ++PNI) {
+ PHINode *PN = cast<PHINode>(PNI);
+ // Ok, we have a PHI node. Figure out what the incoming value was for the
+ // DestBlock.
+ Value *IV = PN->getIncomingValueForBlock(DestBlock);
+
+ // Remap the value if necessary...
+ if (Value *MappedIV = ValueMapping[IV])
+ IV = MappedIV;
+ PN->addIncoming(IV, SourceBlock);
+ }
+ }
+
+ // Next, remove the old branch instruction, and any PHI node entries that we
+ // had.
+ BI = Branch; ++BI; // Get an iterator to the first new instruction
+ DestBlock->removePredecessor(SourceBlock); // Remove entries in PHI nodes...
+ SourceBlock->getInstList().erase(Branch); // Destroy the uncond branch...
+
+ // Final step: now that we have finished everything up, walk the cloned
+ // instructions one last time, constant propagating and DCE'ing them, because
+ // they may not be needed anymore.
+ //
+ if (HadPHINodes) {
+ while (BI != SourceBlock->end()) {
+ Instruction *Inst = BI++;
+ if (isInstructionTriviallyDead(Inst))
+ Inst->eraseFromParent();
+ else if (Constant *C = ConstantFoldInstruction(Inst)) {
+ Inst->replaceAllUsesWith(C);
+ Inst->eraseFromParent();
+ }
+ }
+ }
+
+ ++NumEliminated; // We just killed a branch!
+}
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
new file mode 100644
index 0000000..682d069
--- /dev/null
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -0,0 +1,479 @@
+//===- TailRecursionElimination.cpp - Eliminate Tail Calls ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file transforms calls of the current function (self recursion) followed
+// by a return instruction with a branch to the entry of the function, creating
+// a loop. This pass also implements the following extensions to the basic
+// algorithm:
+//
+// 1. Trivial instructions between the call and return do not prevent the
+// transformation from taking place, though currently the analysis cannot
+// support moving any really useful instructions (only dead ones).
+// 2. This pass transforms functions that are prevented from being tail
+// recursive by an associative expression to use an accumulator variable,
+// thus compiling the typical naive factorial or 'fib' implementation into
+// efficient code.
+// 3. TRE is performed if the function returns void, if the return
+// returns the result returned by the call, or if the function returns a
+// run-time constant on all exits from the function. It is possible, though
+// unlikely, that the return returns something else (like constant 0), and
+// can still be TRE'd. It can be TRE'd if ALL OTHER return instructions in
+// the function return the exact same value.
+// 4. If it can prove that callees do not access theier caller stack frame,
+// they are marked as eligible for tail call elimination (by the code
+// generator).
+//
+// There are several improvements that could be made:
+//
+// 1. If the function has any alloca instructions, these instructions will be
+// moved out of the entry block of the function, causing them to be
+// evaluated each time through the tail recursion. Safely keeping allocas
+// in the entry block requires analysis to proves that the tail-called
+// function does not read or write the stack object.
+// 2. Tail recursion is only performed if the call immediately preceeds the
+// return instruction. It's possible that there could be a jump between
+// the call and the return.
+// 3. There can be intervening operations between the call and the return that
+// prevent the TRE from occurring. For example, there could be GEP's and
+// stores to memory that will not be read or written by the call. This
+// requires some substantial analysis (such as with DSA) to prove safe to
+// move ahead of the call, but doing so could allow many more TREs to be
+// performed, for example in TreeAdd/TreeAlloc from the treeadd benchmark.
+// 4. The algorithm we use to detect if callees access their caller stack
+// frames is very primitive.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tailcallelim"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+using namespace llvm;
+
+STATISTIC(NumEliminated, "Number of tail calls removed");
+STATISTIC(NumAccumAdded, "Number of accumulators introduced");
+
+namespace {
+ struct VISIBILITY_HIDDEN TailCallElim : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ TailCallElim() : FunctionPass(&ID) {}
+
+ virtual bool runOnFunction(Function &F);
+
+ private:
+ bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ std::vector<PHINode*> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail);
+ bool CanMoveAboveCall(Instruction *I, CallInst *CI);
+ Value *CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI);
+ };
+}
+
+char TailCallElim::ID = 0;
+static RegisterPass<TailCallElim> X("tailcallelim", "Tail Call Elimination");
+
+// Public interface to the TailCallElimination pass
+FunctionPass *llvm::createTailCallEliminationPass() {
+ return new TailCallElim();
+}
+
+
+/// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by
+/// callees of this function. We only do very simple analysis right now, this
+/// could be expanded in the future to use mod/ref information for particular
+/// call sites if desired.
+static bool AllocaMightEscapeToCalls(AllocaInst *AI) {
+ // FIXME: do simple 'address taken' analysis.
+ return true;
+}
+
+/// FunctionContainsAllocas - Scan the specified basic block for alloca
+/// instructions. If it contains any that might be accessed by calls, return
+/// true.
+static bool CheckForEscapingAllocas(BasicBlock *BB,
+ bool &CannotTCETailMarkedCall) {
+ bool RetVal = false;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+ RetVal |= AllocaMightEscapeToCalls(AI);
+
+ // If this alloca is in the body of the function, or if it is a variable
+ // sized allocation, we cannot tail call eliminate calls marked 'tail'
+ // with this mechanism.
+ if (BB != &BB->getParent()->getEntryBlock() ||
+ !isa<ConstantInt>(AI->getArraySize()))
+ CannotTCETailMarkedCall = true;
+ }
+ return RetVal;
+}
+
+bool TailCallElim::runOnFunction(Function &F) {
+ // If this function is a varargs function, we won't be able to PHI the args
+ // right, so don't even try to convert it...
+ if (F.getFunctionType()->isVarArg()) return false;
+
+ BasicBlock *OldEntry = 0;
+ bool TailCallsAreMarkedTail = false;
+ std::vector<PHINode*> ArgumentPHIs;
+ bool MadeChange = false;
+
+ bool FunctionContainsEscapingAllocas = false;
+
+ // CannotTCETailMarkedCall - If true, we cannot perform TCE on tail calls
+ // marked with the 'tail' attribute, because doing so would cause the stack
+ // size to increase (real TCE would deallocate variable sized allocas, TCE
+ // doesn't).
+ bool CannotTCETailMarkedCall = false;
+
+ // Loop over the function, looking for any returning blocks, and keeping track
+ // of whether this function has any non-trivially used allocas.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (FunctionContainsEscapingAllocas && CannotTCETailMarkedCall)
+ break;
+
+ FunctionContainsEscapingAllocas |=
+ CheckForEscapingAllocas(BB, CannotTCETailMarkedCall);
+ }
+
+ /// FIXME: The code generator produces really bad code when an 'escaping
+ /// alloca' is changed from being a static alloca to being a dynamic alloca.
+ /// Until this is resolved, disable this transformation if that would ever
+ /// happen. This bug is PR962.
+ if (FunctionContainsEscapingAllocas)
+ return false;
+
+
+ // Second pass, change any tail calls to loops.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator()))
+ MadeChange |= ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
+ ArgumentPHIs,CannotTCETailMarkedCall);
+
+ // If we eliminated any tail recursions, it's possible that we inserted some
+ // silly PHI nodes which just merge an initial value (the incoming operand)
+ // with themselves. Check to see if we did and clean up our mess if so. This
+ // occurs when a function passes an argument straight through to its tail
+ // call.
+ if (!ArgumentPHIs.empty()) {
+ for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) {
+ PHINode *PN = ArgumentPHIs[i];
+
+ // If the PHI Node is a dynamic constant, replace it with the value it is.
+ if (Value *PNV = PN->hasConstantValue()) {
+ PN->replaceAllUsesWith(PNV);
+ PN->eraseFromParent();
+ }
+ }
+ }
+
+ // Finally, if this function contains no non-escaping allocas, mark all calls
+ // in the function as eligible for tail calls (there is no stack memory for
+ // them to access).
+ if (!FunctionContainsEscapingAllocas)
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ CI->setTailCall();
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+
+/// CanMoveAboveCall - Return true if it is safe to move the specified
+/// instruction from after the call to before the call, assuming that all
+/// instructions between the call and this instruction are movable.
+///
+bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
+ // FIXME: We can move load/store/call/free instructions above the call if the
+ // call does not mod/ref the memory location being processed.
+ if (I->mayHaveSideEffects() || isa<LoadInst>(I))
+ return false;
+
+ // Otherwise, if this is a side-effect free instruction, check to make sure
+ // that it does not use the return value of the call. If it doesn't use the
+ // return value of the call, it must only use things that are defined before
+ // the call, or movable instructions between the call and the instruction
+ // itself.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (I->getOperand(i) == CI)
+ return false;
+ return true;
+}
+
+// isDynamicConstant - Return true if the specified value is the same when the
+// return would exit as it was when the initial iteration of the recursive
+// function was executed.
+//
+// We currently handle static constants and arguments that are not modified as
+// part of the recursion.
+//
+static bool isDynamicConstant(Value *V, CallInst *CI) {
+ if (isa<Constant>(V)) return true; // Static constants are always dyn consts
+
+ // Check to see if this is an immutable argument, if so, the value
+ // will be available to initialize the accumulator.
+ if (Argument *Arg = dyn_cast<Argument>(V)) {
+ // Figure out which argument number this is...
+ unsigned ArgNo = 0;
+ Function *F = CI->getParent()->getParent();
+ for (Function::arg_iterator AI = F->arg_begin(); &*AI != Arg; ++AI)
+ ++ArgNo;
+
+ // If we are passing this argument into call as the corresponding
+ // argument operand, then the argument is dynamically constant.
+ // Otherwise, we cannot transform this function safely.
+ if (CI->getOperand(ArgNo+1) == Arg)
+ return true;
+ }
+ // Not a constant or immutable argument, we can't safely transform.
+ return false;
+}
+
+// getCommonReturnValue - Check to see if the function containing the specified
+// return instruction and tail call consistently returns the same
+// runtime-constant value at all exit points. If so, return the returned value.
+//
+static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) {
+ Function *F = TheRI->getParent()->getParent();
+ Value *ReturnedValue = 0;
+
+ // TODO: Handle multiple value ret instructions;
+ if (isa<StructType>(F->getReturnType()))
+ return 0;
+
+ for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
+ if (RI != TheRI) {
+ Value *RetOp = RI->getOperand(0);
+
+ // We can only perform this transformation if the value returned is
+ // evaluatable at the start of the initial invocation of the function,
+ // instead of at the end of the evaluation.
+ //
+ if (!isDynamicConstant(RetOp, CI))
+ return 0;
+
+ if (ReturnedValue && RetOp != ReturnedValue)
+ return 0; // Cannot transform if differing values are returned.
+ ReturnedValue = RetOp;
+ }
+ return ReturnedValue;
+}
+
+/// CanTransformAccumulatorRecursion - If the specified instruction can be
+/// transformed using accumulator recursion elimination, return the constant
+/// which is the start of the accumulator value. Otherwise return null.
+///
+Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
+ CallInst *CI) {
+ if (!I->isAssociative()) return 0;
+ assert(I->getNumOperands() == 2 &&
+ "Associative operations should have 2 args!");
+
+ // Exactly one operand should be the result of the call instruction...
+ if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
+ (I->getOperand(0) != CI && I->getOperand(1) != CI))
+ return 0;
+
+ // The only user of this instruction we allow is a single return instruction.
+ if (!I->hasOneUse() || !isa<ReturnInst>(I->use_back()))
+ return 0;
+
+ // Ok, now we have to check all of the other return instructions in this
+ // function. If they return non-constants or differing values, then we cannot
+ // transform the function safely.
+ return getCommonReturnValue(cast<ReturnInst>(I->use_back()), CI);
+}
+
+bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ std::vector<PHINode*> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail) {
+ BasicBlock *BB = Ret->getParent();
+ Function *F = BB->getParent();
+
+ if (&BB->front() == Ret) // Make sure there is something before the ret...
+ return false;
+
+ // If the return is in the entry block, then making this transformation would
+ // turn infinite recursion into an infinite loop. This transformation is ok
+ // in theory, but breaks some code like:
+ // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
+ // disable this xform in this case, because the code generator will lower the
+ // call to fabs into inline code.
+ if (BB == &F->getEntryBlock())
+ return false;
+
+ // Scan backwards from the return, checking to see if there is a tail call in
+ // this block. If so, set CI to it.
+ CallInst *CI;
+ BasicBlock::iterator BBI = Ret;
+ while (1) {
+ CI = dyn_cast<CallInst>(BBI);
+ if (CI && CI->getCalledFunction() == F)
+ break;
+
+ if (BBI == BB->begin())
+ return false; // Didn't find a potential tail call.
+ --BBI;
+ }
+
+ // If this call is marked as a tail call, and if there are dynamic allocas in
+ // the function, we cannot perform this optimization.
+ if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
+ return false;
+
+ // If we are introducing accumulator recursion to eliminate associative
+ // operations after the call instruction, this variable contains the initial
+ // value for the accumulator. If this value is set, we actually perform
+ // accumulator recursion elimination instead of simple tail recursion
+ // elimination.
+ Value *AccumulatorRecursionEliminationInitVal = 0;
+ Instruction *AccumulatorRecursionInstr = 0;
+
+ // Ok, we found a potential tail call. We can currently only transform the
+ // tail call if all of the instructions between the call and the return are
+ // movable to above the call itself, leaving the call next to the return.
+ // Check that this is the case now.
+ for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI)
+ if (!CanMoveAboveCall(BBI, CI)) {
+ // If we can't move the instruction above the call, it might be because it
+ // is an associative operation that could be tranformed using accumulator
+ // recursion elimination. Check to see if this is the case, and if so,
+ // remember the initial accumulator value for later.
+ if ((AccumulatorRecursionEliminationInitVal =
+ CanTransformAccumulatorRecursion(BBI, CI))) {
+ // Yes, this is accumulator recursion. Remember which instruction
+ // accumulates.
+ AccumulatorRecursionInstr = BBI;
+ } else {
+ return false; // Otherwise, we cannot eliminate the tail recursion!
+ }
+ }
+
+ // We can only transform call/return pairs that either ignore the return value
+ // of the call and return void, ignore the value of the call and return a
+ // constant, return the value returned by the tail call, or that are being
+ // accumulator recursion variable eliminated.
+ if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI &&
+ !isa<UndefValue>(Ret->getReturnValue()) &&
+ AccumulatorRecursionEliminationInitVal == 0 &&
+ !getCommonReturnValue(Ret, CI))
+ return false;
+
+ // OK! We can transform this tail call. If this is the first one found,
+ // create the new entry block, allowing us to branch back to the old entry.
+ if (OldEntry == 0) {
+ OldEntry = &F->getEntryBlock();
+ BasicBlock *NewEntry = BasicBlock::Create("", F, OldEntry);
+ NewEntry->takeName(OldEntry);
+ OldEntry->setName("tailrecurse");
+ BranchInst::Create(OldEntry, NewEntry);
+
+ // If this tail call is marked 'tail' and if there are any allocas in the
+ // entry block, move them up to the new entry block.
+ TailCallsAreMarkedTail = CI->isTailCall();
+ if (TailCallsAreMarkedTail)
+ // Move all fixed sized allocas from OldEntry to NewEntry.
+ for (BasicBlock::iterator OEBI = OldEntry->begin(), E = OldEntry->end(),
+ NEBI = NewEntry->begin(); OEBI != E; )
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
+ if (isa<ConstantInt>(AI->getArraySize()))
+ AI->moveBefore(NEBI);
+
+ // Now that we have created a new block, which jumps to the entry
+ // block, insert a PHI node for each argument of the function.
+ // For now, we initialize each PHI to only have the real arguments
+ // which are passed in.
+ Instruction *InsertPos = OldEntry->begin();
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I) {
+ PHINode *PN = PHINode::Create(I->getType(),
+ I->getName() + ".tr", InsertPos);
+ I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
+ PN->addIncoming(I, NewEntry);
+ ArgumentPHIs.push_back(PN);
+ }
+ }
+
+ // If this function has self recursive calls in the tail position where some
+ // are marked tail and some are not, only transform one flavor or another. We
+ // have to choose whether we move allocas in the entry block to the new entry
+ // block or not, so we can't make a good choice for both. NOTE: We could do
+ // slightly better here in the case that the function has no entry block
+ // allocas.
+ if (TailCallsAreMarkedTail && !CI->isTailCall())
+ return false;
+
+ // Ok, now that we know we have a pseudo-entry block WITH all of the
+ // required PHI nodes, add entries into the PHI node for the actual
+ // parameters passed into the tail-recursive call.
+ for (unsigned i = 0, e = CI->getNumOperands()-1; i != e; ++i)
+ ArgumentPHIs[i]->addIncoming(CI->getOperand(i+1), BB);
+
+ // If we are introducing an accumulator variable to eliminate the recursion,
+ // do so now. Note that we _know_ that no subsequent tail recursion
+ // eliminations will happen on this function because of the way the
+ // accumulator recursion predicate is set up.
+ //
+ if (AccumulatorRecursionEliminationInitVal) {
+ Instruction *AccRecInstr = AccumulatorRecursionInstr;
+ // Start by inserting a new PHI node for the accumulator.
+ PHINode *AccPN = PHINode::Create(AccRecInstr->getType(), "accumulator.tr",
+ OldEntry->begin());
+
+ // Loop over all of the predecessors of the tail recursion block. For the
+ // real entry into the function we seed the PHI with the initial value,
+ // computed earlier. For any other existing branches to this block (due to
+ // other tail recursions eliminated) the accumulator is not modified.
+ // Because we haven't added the branch in the current block to OldEntry yet,
+ // it will not show up as a predecessor.
+ for (pred_iterator PI = pred_begin(OldEntry), PE = pred_end(OldEntry);
+ PI != PE; ++PI) {
+ if (*PI == &F->getEntryBlock())
+ AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, *PI);
+ else
+ AccPN->addIncoming(AccPN, *PI);
+ }
+
+ // Add an incoming argument for the current block, which is computed by our
+ // associative accumulator instruction.
+ AccPN->addIncoming(AccRecInstr, BB);
+
+ // Next, rewrite the accumulator recursion instruction so that it does not
+ // use the result of the call anymore, instead, use the PHI node we just
+ // inserted.
+ AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN);
+
+ // Finally, rewrite any return instructions in the program to return the PHI
+ // node instead of the "initval" that they do currently. This loop will
+ // actually rewrite the return value we are destroying, but that's ok.
+ for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
+ RI->setOperand(0, AccPN);
+ ++NumAccumAdded;
+ }
+
+ // Now that all of the PHI nodes are in place, remove the call and
+ // ret instructions, replacing them with an unconditional branch.
+ BranchInst::Create(OldEntry, Ret);
+ BB->getInstList().erase(Ret); // Remove return.
+ BB->getInstList().erase(CI); // Remove call.
+ ++NumEliminated;
+ return true;
+}
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
new file mode 100644
index 0000000..71049fa
--- /dev/null
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -0,0 +1,594 @@
+//===- AddrModeMatcher.cpp - Addressing mode matching facility --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements target addressing mode matcher class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/AddrModeMatcher.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instruction.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+void ExtAddrMode::print(OStream &OS) const {
+ bool NeedPlus = false;
+ OS << "[";
+ if (BaseGV) {
+ OS << (NeedPlus ? " + " : "")
+ << "GV:";
+ WriteAsOperand(*OS.stream(), BaseGV, /*PrintType=*/false);
+ NeedPlus = true;
+ }
+
+ if (BaseOffs)
+ OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true;
+
+ if (BaseReg) {
+ OS << (NeedPlus ? " + " : "")
+ << "Base:";
+ WriteAsOperand(*OS.stream(), BaseReg, /*PrintType=*/false);
+ NeedPlus = true;
+ }
+ if (Scale) {
+ OS << (NeedPlus ? " + " : "")
+ << Scale << "*";
+ WriteAsOperand(*OS.stream(), ScaledReg, /*PrintType=*/false);
+ NeedPlus = true;
+ }
+
+ OS << ']';
+}
+
+void ExtAddrMode::dump() const {
+ print(cerr);
+ cerr << '\n';
+}
+
+
+/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
+/// Return true and update AddrMode if this addr mode is legal for the target,
+/// false if not.
+bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
+ unsigned Depth) {
+ // If Scale is 1, then this is the same as adding ScaleReg to the addressing
+ // mode. Just process that directly.
+ if (Scale == 1)
+ return MatchAddr(ScaleReg, Depth);
+
+ // If the scale is 0, it takes nothing to add this.
+ if (Scale == 0)
+ return true;
+
+ // If we already have a scale of this value, we can add to it, otherwise, we
+ // need an available scale field.
+ if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
+ return false;
+
+ ExtAddrMode TestAddrMode = AddrMode;
+
+ // Add scale to turn X*4+X*3 -> X*7. This could also do things like
+ // [A+B + A*7] -> [B+A*8].
+ TestAddrMode.Scale += Scale;
+ TestAddrMode.ScaledReg = ScaleReg;
+
+ // If the new address isn't legal, bail out.
+ if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
+ return false;
+
+ // It was legal, so commit it.
+ AddrMode = TestAddrMode;
+
+ // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
+ // to see if ScaleReg is actually X+C. If so, we can turn this into adding
+ // X*Scale + C*Scale to addr mode.
+ ConstantInt *CI = 0; Value *AddLHS = 0;
+ if (isa<Instruction>(ScaleReg) && // not a constant expr.
+ match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
+ TestAddrMode.ScaledReg = AddLHS;
+ TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
+
+ // If this addressing mode is legal, commit it and remember that we folded
+ // this instruction.
+ if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
+ AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
+ AddrMode = TestAddrMode;
+ return true;
+ }
+ }
+
+ // Otherwise, not (x+c)*scale, just return what we have.
+ return true;
+}
+
+/// MightBeFoldableInst - This is a little filter, which returns true if an
+/// addressing computation involving I might be folded into a load/store
+/// accessing it. This doesn't need to be perfect, but needs to accept at least
+/// the set of instructions that MatchOperationAddr can.
+static bool MightBeFoldableInst(Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::BitCast:
+ // Don't touch identity bitcasts.
+ if (I->getType() == I->getOperand(0)->getType())
+ return false;
+ return isa<PointerType>(I->getType()) || isa<IntegerType>(I->getType());
+ case Instruction::PtrToInt:
+ // PtrToInt is always a noop, as we know that the int type is pointer sized.
+ return true;
+ case Instruction::IntToPtr:
+ // We know the input is intptr_t, so this is foldable.
+ return true;
+ case Instruction::Add:
+ return true;
+ case Instruction::Mul:
+ case Instruction::Shl:
+ // Can only handle X*C and X << C.
+ return isa<ConstantInt>(I->getOperand(1));
+ case Instruction::GetElementPtr:
+ return true;
+ default:
+ return false;
+ }
+}
+
+
+/// MatchOperationAddr - Given an instruction or constant expr, see if we can
+/// fold the operation into the addressing mode. If so, update the addressing
+/// mode and return true, otherwise return false without modifying AddrMode.
+bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
+ unsigned Depth) {
+ // Avoid exponential behavior on extremely deep expression trees.
+ if (Depth >= 5) return false;
+
+ switch (Opcode) {
+ case Instruction::PtrToInt:
+ // PtrToInt is always a noop, as we know that the int type is pointer sized.
+ return MatchAddr(AddrInst->getOperand(0), Depth);
+ case Instruction::IntToPtr:
+ // This inttoptr is a no-op if the integer type is pointer sized.
+ if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
+ TLI.getPointerTy())
+ return MatchAddr(AddrInst->getOperand(0), Depth);
+ return false;
+ case Instruction::BitCast:
+ // BitCast is always a noop, and we can handle it as long as it is
+ // int->int or pointer->pointer (we don't want int<->fp or something).
+ if ((isa<PointerType>(AddrInst->getOperand(0)->getType()) ||
+ isa<IntegerType>(AddrInst->getOperand(0)->getType())) &&
+ // Don't touch identity bitcasts. These were probably put here by LSR,
+ // and we don't want to mess around with them. Assume it knows what it
+ // is doing.
+ AddrInst->getOperand(0)->getType() != AddrInst->getType())
+ return MatchAddr(AddrInst->getOperand(0), Depth);
+ return false;
+ case Instruction::Add: {
+ // Check to see if we can merge in the RHS then the LHS. If so, we win.
+ ExtAddrMode BackupAddrMode = AddrMode;
+ unsigned OldSize = AddrModeInsts.size();
+ if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
+ MatchAddr(AddrInst->getOperand(0), Depth+1))
+ return true;
+
+ // Restore the old addr mode info.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+
+ // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
+ if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
+ MatchAddr(AddrInst->getOperand(1), Depth+1))
+ return true;
+
+ // Otherwise we definitely can't merge the ADD in.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ break;
+ }
+ //case Instruction::Or:
+ // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
+ //break;
+ case Instruction::Mul:
+ case Instruction::Shl: {
+ // Can only handle X*C and X << C.
+ ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
+ if (!RHS) return false;
+ int64_t Scale = RHS->getSExtValue();
+ if (Opcode == Instruction::Shl)
+ Scale = 1 << Scale;
+
+ return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
+ }
+ case Instruction::GetElementPtr: {
+ // Scan the GEP. We check it if it contains constant offsets and at most
+ // one variable offset.
+ int VariableOperand = -1;
+ unsigned VariableScale = 0;
+
+ int64_t ConstantOffset = 0;
+ const TargetData *TD = TLI.getTargetData();
+ gep_type_iterator GTI = gep_type_begin(AddrInst);
+ for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = TD->getStructLayout(STy);
+ unsigned Idx =
+ cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
+ ConstantOffset += SL->getElementOffset(Idx);
+ } else {
+ uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
+ ConstantOffset += CI->getSExtValue()*TypeSize;
+ } else if (TypeSize) { // Scales of zero don't do anything.
+ // We only allow one variable index at the moment.
+ if (VariableOperand != -1)
+ return false;
+
+ // Remember the variable index.
+ VariableOperand = i;
+ VariableScale = TypeSize;
+ }
+ }
+ }
+
+ // A common case is for the GEP to only do a constant offset. In this case,
+ // just add it to the disp field and check validity.
+ if (VariableOperand == -1) {
+ AddrMode.BaseOffs += ConstantOffset;
+ if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
+ // Check to see if we can fold the base pointer in too.
+ if (MatchAddr(AddrInst->getOperand(0), Depth+1))
+ return true;
+ }
+ AddrMode.BaseOffs -= ConstantOffset;
+ return false;
+ }
+
+ // Save the valid addressing mode in case we can't match.
+ ExtAddrMode BackupAddrMode = AddrMode;
+ unsigned OldSize = AddrModeInsts.size();
+
+ // See if the scale and offset amount is valid for this target.
+ AddrMode.BaseOffs += ConstantOffset;
+
+ // Match the base operand of the GEP.
+ if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
+ // If it couldn't be matched, just stuff the value in a register.
+ if (AddrMode.HasBaseReg) {
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ return false;
+ }
+ AddrMode.HasBaseReg = true;
+ AddrMode.BaseReg = AddrInst->getOperand(0);
+ }
+
+ // Match the remaining variable portion of the GEP.
+ if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
+ Depth)) {
+ // If it couldn't be matched, try stuffing the base into a register
+ // instead of matching it, and retrying the match of the scale.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ if (AddrMode.HasBaseReg)
+ return false;
+ AddrMode.HasBaseReg = true;
+ AddrMode.BaseReg = AddrInst->getOperand(0);
+ AddrMode.BaseOffs += ConstantOffset;
+ if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
+ VariableScale, Depth)) {
+ // If even that didn't work, bail.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ return false;
+ }
+ }
+
+ return true;
+ }
+ }
+ return false;
+}
+
+/// MatchAddr - If we can, try to add the value of 'Addr' into the current
+/// addressing mode. If Addr can't be added to AddrMode this returns false and
+/// leaves AddrMode unmodified. This assumes that Addr is either a pointer type
+/// or intptr_t for the target.
+///
+bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
+ // Fold in immediates if legal for the target.
+ AddrMode.BaseOffs += CI->getSExtValue();
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ return true;
+ AddrMode.BaseOffs -= CI->getSExtValue();
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
+ // If this is a global variable, try to fold it into the addressing mode.
+ if (AddrMode.BaseGV == 0) {
+ AddrMode.BaseGV = GV;
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ return true;
+ AddrMode.BaseGV = 0;
+ }
+ } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
+ ExtAddrMode BackupAddrMode = AddrMode;
+ unsigned OldSize = AddrModeInsts.size();
+
+ // Check to see if it is possible to fold this operation.
+ if (MatchOperationAddr(I, I->getOpcode(), Depth)) {
+ // Okay, it's possible to fold this. Check to see if it is actually
+ // *profitable* to do so. We use a simple cost model to avoid increasing
+ // register pressure too much.
+ if (I->hasOneUse() ||
+ IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
+ AddrModeInsts.push_back(I);
+ return true;
+ }
+
+ // It isn't profitable to do this, roll back.
+ //cerr << "NOT FOLDING: " << *I;
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ }
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
+ if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
+ return true;
+ } else if (isa<ConstantPointerNull>(Addr)) {
+ // Null pointer gets folded without affecting the addressing mode.
+ return true;
+ }
+
+ // Worse case, the target should support [reg] addressing modes. :)
+ if (!AddrMode.HasBaseReg) {
+ AddrMode.HasBaseReg = true;
+ AddrMode.BaseReg = Addr;
+ // Still check for legality in case the target supports [imm] but not [i+r].
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ return true;
+ AddrMode.HasBaseReg = false;
+ AddrMode.BaseReg = 0;
+ }
+
+ // If the base register is already taken, see if we can do [r+r].
+ if (AddrMode.Scale == 0) {
+ AddrMode.Scale = 1;
+ AddrMode.ScaledReg = Addr;
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ return true;
+ AddrMode.Scale = 0;
+ AddrMode.ScaledReg = 0;
+ }
+ // Couldn't match.
+ return false;
+}
+
+
+/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
+/// inline asm call are due to memory operands. If so, return true, otherwise
+/// return false.
+static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
+ const TargetLowering &TLI) {
+ std::vector<InlineAsm::ConstraintInfo>
+ Constraints = IA->ParseConstraints();
+
+ unsigned ArgNo = 1; // ArgNo - The operand of the CallInst.
+ for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
+ TargetLowering::AsmOperandInfo OpInfo(Constraints[i]);
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ if (OpInfo.isIndirect)
+ OpInfo.CallOperandVal = CI->getOperand(ArgNo++);
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = CI->getOperand(ArgNo++);
+ break;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
+ }
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI.ComputeConstraintToUse(OpInfo, SDValue(),
+ OpInfo.ConstraintType == TargetLowering::C_Memory);
+
+ // If this asm operand is our Value*, and if it isn't an indirect memory
+ // operand, we can't fold it!
+ if (OpInfo.CallOperandVal == OpVal &&
+ (OpInfo.ConstraintType != TargetLowering::C_Memory ||
+ !OpInfo.isIndirect))
+ return false;
+ }
+
+ return true;
+}
+
+
+/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
+/// memory use. If we find an obviously non-foldable instruction, return true.
+/// Add the ultimately found memory instructions to MemoryUses.
+static bool FindAllMemoryUses(Instruction *I,
+ SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
+ SmallPtrSet<Instruction*, 16> &ConsideredInsts,
+ const TargetLowering &TLI) {
+ // If we already considered this instruction, we're done.
+ if (!ConsideredInsts.insert(I))
+ return false;
+
+ // If this is an obviously unfoldable instruction, bail out.
+ if (!MightBeFoldableInst(I))
+ return true;
+
+ // Loop over all the uses, recursively processing them.
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+ MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
+ if (UI.getOperandNo() == 0) return true; // Storing addr, not into addr.
+ MemoryUses.push_back(std::make_pair(SI, UI.getOperandNo()));
+ continue;
+ }
+
+ if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
+ InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
+ if (IA == 0) return true;
+
+ // If this is a memory operand, we're cool, otherwise bail out.
+ if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
+ return true;
+ continue;
+ }
+
+ if (FindAllMemoryUses(cast<Instruction>(*UI), MemoryUses, ConsideredInsts,
+ TLI))
+ return true;
+ }
+
+ return false;
+}
+
+
+/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
+/// the use site that we're folding it into. If so, there is no cost to
+/// include it in the addressing mode. KnownLive1 and KnownLive2 are two values
+/// that we know are live at the instruction already.
+bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
+ Value *KnownLive2) {
+ // If Val is either of the known-live values, we know it is live!
+ if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
+ return true;
+
+ // All values other than instructions and arguments (e.g. constants) are live.
+ if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
+
+ // If Val is a constant sized alloca in the entry block, it is live, this is
+ // true because it is just a reference to the stack/frame pointer, which is
+ // live for the whole function.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
+ if (AI->isStaticAlloca())
+ return true;
+
+ // Check to see if this value is already used in the memory instruction's
+ // block. If so, it's already live into the block at the very least, so we
+ // can reasonably fold it.
+ BasicBlock *MemBB = MemoryInst->getParent();
+ for (Value::use_iterator UI = Val->use_begin(), E = Val->use_end();
+ UI != E; ++UI)
+ // We know that uses of arguments and instructions have to be instructions.
+ if (cast<Instruction>(*UI)->getParent() == MemBB)
+ return true;
+
+ return false;
+}
+
+
+
+/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
+/// mode of the machine to fold the specified instruction into a load or store
+/// that ultimately uses it. However, the specified instruction has multiple
+/// uses. Given this, it may actually increase register pressure to fold it
+/// into the load. For example, consider this code:
+///
+/// X = ...
+/// Y = X+1
+/// use(Y) -> nonload/store
+/// Z = Y+1
+/// load Z
+///
+/// In this case, Y has multiple uses, and can be folded into the load of Z
+/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
+/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
+/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
+/// number of computations either.
+///
+/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
+/// X was live across 'load Z' for other reasons, we actually *would* want to
+/// fold the addressing mode in the Z case. This would make Y die earlier.
+bool AddressingModeMatcher::
+IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
+ ExtAddrMode &AMAfter) {
+ if (IgnoreProfitability) return true;
+
+ // AMBefore is the addressing mode before this instruction was folded into it,
+ // and AMAfter is the addressing mode after the instruction was folded. Get
+ // the set of registers referenced by AMAfter and subtract out those
+ // referenced by AMBefore: this is the set of values which folding in this
+ // address extends the lifetime of.
+ //
+ // Note that there are only two potential values being referenced here,
+ // BaseReg and ScaleReg (global addresses are always available, as are any
+ // folded immediates).
+ Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
+
+ // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
+ // lifetime wasn't extended by adding this instruction.
+ if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+ BaseReg = 0;
+ if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+ ScaledReg = 0;
+
+ // If folding this instruction (and it's subexprs) didn't extend any live
+ // ranges, we're ok with it.
+ if (BaseReg == 0 && ScaledReg == 0)
+ return true;
+
+ // If all uses of this instruction are ultimately load/store/inlineasm's,
+ // check to see if their addressing modes will include this instruction. If
+ // so, we can fold it into all uses, so it doesn't matter if it has multiple
+ // uses.
+ SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
+ SmallPtrSet<Instruction*, 16> ConsideredInsts;
+ if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
+ return false; // Has a non-memory, non-foldable use!
+
+ // Now that we know that all uses of this instruction are part of a chain of
+ // computation involving only operations that could theoretically be folded
+ // into a memory use, loop over each of these uses and see if they could
+ // *actually* fold the instruction.
+ SmallVector<Instruction*, 32> MatchedAddrModeInsts;
+ for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
+ Instruction *User = MemoryUses[i].first;
+ unsigned OpNo = MemoryUses[i].second;
+
+ // Get the access type of this use. If the use isn't a pointer, we don't
+ // know what it accesses.
+ Value *Address = User->getOperand(OpNo);
+ if (!isa<PointerType>(Address->getType()))
+ return false;
+ const Type *AddressAccessTy =
+ cast<PointerType>(Address->getType())->getElementType();
+
+ // Do a match against the root of this address, ignoring profitability. This
+ // will tell us if the addressing mode for the memory operation will
+ // *actually* cover the shared instruction.
+ ExtAddrMode Result;
+ AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
+ MemoryInst, Result);
+ Matcher.IgnoreProfitability = true;
+ bool Success = Matcher.MatchAddr(Address, 0);
+ Success = Success; assert(Success && "Couldn't select *anything*?");
+
+ // If the match didn't cover I, then it won't be shared by it.
+ if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
+ I) == MatchedAddrModeInsts.end())
+ return false;
+
+ MatchedAddrModeInsts.clear();
+ }
+
+ return true;
+}
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
new file mode 100644
index 0000000..6d1180d
--- /dev/null
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -0,0 +1,622 @@
+//===-- BasicBlockUtils.cpp - BasicBlock Utilities -------------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform manipulations on basic blocks, and
+// instructions contained within basic blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Constant.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/ValueHandle.h"
+#include <algorithm>
+using namespace llvm;
+
+/// DeleteDeadBlock - Delete the specified block, which must have no
+/// predecessors.
+void llvm::DeleteDeadBlock(BasicBlock *BB) {
+ assert((pred_begin(BB) == pred_end(BB) ||
+ // Can delete self loop.
+ BB->getSinglePredecessor() == BB) && "Block is not dead!");
+ TerminatorInst *BBTerm = BB->getTerminator();
+
+ // Loop through all of our successors and make sure they know that one
+ // of their predecessors is going away.
+ for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i)
+ BBTerm->getSuccessor(i)->removePredecessor(BB);
+
+ // Zap all the instructions in the block.
+ while (!BB->empty()) {
+ Instruction &I = BB->back();
+ // If this instruction is used, replace uses with an arbitrary value.
+ // Because control flow can't get here, we don't care what we replace the
+ // value with. Note that since this block is unreachable, and all values
+ // contained within it must dominate their uses, that all uses will
+ // eventually be removed (they are themselves dead).
+ if (!I.use_empty())
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ BB->getInstList().pop_back();
+ }
+
+ // Zap the block!
+ BB->eraseFromParent();
+}
+
+/// FoldSingleEntryPHINodes - We know that BB has one predecessor. If there are
+/// any single-entry PHI nodes in it, fold them away. This handles the case
+/// when all entries to the PHI nodes in a block are guaranteed equal, such as
+/// when the block has exactly one predecessor.
+void llvm::FoldSingleEntryPHINodes(BasicBlock *BB) {
+ if (!isa<PHINode>(BB->begin()))
+ return;
+
+ while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+ if (PN->getIncomingValue(0) != PN)
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ else
+ PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+ PN->eraseFromParent();
+ }
+}
+
+
+/// DeleteDeadPHIs - Examine each PHI in the given block and delete it if it
+/// is dead. Also recursively delete any operands that become dead as
+/// a result. This includes tracing the def-use list from the PHI to see if
+/// it is ultimately unused or if it reaches an unused cycle.
+void llvm::DeleteDeadPHIs(BasicBlock *BB) {
+ // Recursively deleting a PHI may cause multiple PHIs to be deleted
+ // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete.
+ SmallVector<WeakVH, 8> PHIs;
+ for (BasicBlock::iterator I = BB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PHIs.push_back(PN);
+
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+ if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*()))
+ RecursivelyDeleteDeadPHINode(PN);
+}
+
+/// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
+/// if possible. The return value indicates success or failure.
+bool llvm::MergeBlockIntoPredecessor(BasicBlock* BB, Pass* P) {
+ pred_iterator PI(pred_begin(BB)), PE(pred_end(BB));
+ // Can't merge the entry block.
+ if (pred_begin(BB) == pred_end(BB)) return false;
+
+ BasicBlock *PredBB = *PI++;
+ for (; PI != PE; ++PI) // Search all predecessors, see if they are all same
+ if (*PI != PredBB) {
+ PredBB = 0; // There are multiple different predecessors...
+ break;
+ }
+
+ // Can't merge if there are multiple predecessors.
+ if (!PredBB) return false;
+ // Don't break self-loops.
+ if (PredBB == BB) return false;
+ // Don't break invokes.
+ if (isa<InvokeInst>(PredBB->getTerminator())) return false;
+
+ succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB));
+ BasicBlock* OnlySucc = BB;
+ for (; SI != SE; ++SI)
+ if (*SI != OnlySucc) {
+ OnlySucc = 0; // There are multiple distinct successors!
+ break;
+ }
+
+ // Can't merge if there are multiple successors.
+ if (!OnlySucc) return false;
+
+ // Can't merge if there is PHI loop.
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
+ if (PHINode *PN = dyn_cast<PHINode>(BI)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == PN)
+ return false;
+ } else
+ break;
+ }
+
+ // Begin by getting rid of unneeded PHIs.
+ while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ BB->getInstList().pop_front(); // Delete the phi node...
+ }
+
+ // Delete the unconditional branch from the predecessor...
+ PredBB->getInstList().pop_back();
+
+ // Move all definitions in the successor to the predecessor...
+ PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
+
+ // Make all PHI nodes that referred to BB now refer to Pred as their
+ // source...
+ BB->replaceAllUsesWith(PredBB);
+
+ // Inherit predecessors name if it exists.
+ if (!PredBB->hasName())
+ PredBB->takeName(BB);
+
+ // Finally, erase the old block and update dominator info.
+ if (P) {
+ if (DominatorTree* DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+ DomTreeNode* DTN = DT->getNode(BB);
+ DomTreeNode* PredDTN = DT->getNode(PredBB);
+
+ if (DTN) {
+ SmallPtrSet<DomTreeNode*, 8> Children(DTN->begin(), DTN->end());
+ for (SmallPtrSet<DomTreeNode*, 8>::iterator DI = Children.begin(),
+ DE = Children.end(); DI != DE; ++DI)
+ DT->changeImmediateDominator(*DI, PredDTN);
+
+ DT->eraseNode(BB);
+ }
+ }
+ }
+
+ BB->eraseFromParent();
+
+
+ return true;
+}
+
+/// ReplaceInstWithValue - Replace all uses of an instruction (specified by BI)
+/// with a value, then remove and delete the original instruction.
+///
+void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL,
+ BasicBlock::iterator &BI, Value *V) {
+ Instruction &I = *BI;
+ // Replaces all of the uses of the instruction with uses of the value
+ I.replaceAllUsesWith(V);
+
+ // Make sure to propagate a name if there is one already.
+ if (I.hasName() && !V->hasName())
+ V->takeName(&I);
+
+ // Delete the unnecessary instruction now...
+ BI = BIL.erase(BI);
+}
+
+
+/// ReplaceInstWithInst - Replace the instruction specified by BI with the
+/// instruction specified by I. The original instruction is deleted and BI is
+/// updated to point to the new instruction.
+///
+void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL,
+ BasicBlock::iterator &BI, Instruction *I) {
+ assert(I->getParent() == 0 &&
+ "ReplaceInstWithInst: Instruction already inserted into basic block!");
+
+ // Insert the new instruction into the basic block...
+ BasicBlock::iterator New = BIL.insert(BI, I);
+
+ // Replace all uses of the old instruction, and delete it.
+ ReplaceInstWithValue(BIL, BI, I);
+
+ // Move BI back to point to the newly inserted instruction
+ BI = New;
+}
+
+/// ReplaceInstWithInst - Replace the instruction specified by From with the
+/// instruction specified by To.
+///
+void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
+ BasicBlock::iterator BI(From);
+ ReplaceInstWithInst(From->getParent()->getInstList(), BI, To);
+}
+
+/// RemoveSuccessor - Change the specified terminator instruction such that its
+/// successor SuccNum no longer exists. Because this reduces the outgoing
+/// degree of the current basic block, the actual terminator instruction itself
+/// may have to be changed. In the case where the last successor of the block
+/// is deleted, a return instruction is inserted in its place which can cause a
+/// surprising change in program behavior if it is not expected.
+///
+void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {
+ assert(SuccNum < TI->getNumSuccessors() &&
+ "Trying to remove a nonexistant successor!");
+
+ // If our old successor block contains any PHI nodes, remove the entry in the
+ // PHI nodes that comes from this branch...
+ //
+ BasicBlock *BB = TI->getParent();
+ TI->getSuccessor(SuccNum)->removePredecessor(BB);
+
+ TerminatorInst *NewTI = 0;
+ switch (TI->getOpcode()) {
+ case Instruction::Br:
+ // If this is a conditional branch... convert to unconditional branch.
+ if (TI->getNumSuccessors() == 2) {
+ cast<BranchInst>(TI)->setUnconditionalDest(TI->getSuccessor(1-SuccNum));
+ } else { // Otherwise convert to a return instruction...
+ Value *RetVal = 0;
+
+ // Create a value to return... if the function doesn't return null...
+ if (BB->getParent()->getReturnType() != Type::VoidTy)
+ RetVal = Constant::getNullValue(BB->getParent()->getReturnType());
+
+ // Create the return...
+ NewTI = ReturnInst::Create(RetVal);
+ }
+ break;
+
+ case Instruction::Invoke: // Should convert to call
+ case Instruction::Switch: // Should remove entry
+ default:
+ case Instruction::Ret: // Cannot happen, has no successors!
+ assert(0 && "Unhandled terminator instruction type in RemoveSuccessor!");
+ abort();
+ }
+
+ if (NewTI) // If it's a different instruction, replace.
+ ReplaceInstWithInst(TI, NewTI);
+}
+
+/// SplitEdge - Split the edge connecting specified block. Pass P must
+/// not be NULL.
+BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
+ TerminatorInst *LatchTerm = BB->getTerminator();
+ unsigned SuccNum = 0;
+#ifndef NDEBUG
+ unsigned e = LatchTerm->getNumSuccessors();
+#endif
+ for (unsigned i = 0; ; ++i) {
+ assert(i != e && "Didn't find edge?");
+ if (LatchTerm->getSuccessor(i) == Succ) {
+ SuccNum = i;
+ break;
+ }
+ }
+
+ // If this is a critical edge, let SplitCriticalEdge do it.
+ if (SplitCriticalEdge(BB->getTerminator(), SuccNum, P))
+ return LatchTerm->getSuccessor(SuccNum);
+
+ // If the edge isn't critical, then BB has a single successor or Succ has a
+ // single pred. Split the block.
+ BasicBlock::iterator SplitPoint;
+ if (BasicBlock *SP = Succ->getSinglePredecessor()) {
+ // If the successor only has a single pred, split the top of the successor
+ // block.
+ assert(SP == BB && "CFG broken");
+ SP = NULL;
+ return SplitBlock(Succ, Succ->begin(), P);
+ } else {
+ // Otherwise, if BB has a single successor, split it at the bottom of the
+ // block.
+ assert(BB->getTerminator()->getNumSuccessors() == 1 &&
+ "Should have a single succ!");
+ return SplitBlock(BB, BB->getTerminator(), P);
+ }
+}
+
+/// SplitBlock - Split the specified block at the specified instruction - every
+/// thing before SplitPt stays in Old and everything starting with SplitPt moves
+/// to a new block. The two blocks are joined by an unconditional branch and
+/// the loop info is updated.
+///
+BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
+ BasicBlock::iterator SplitIt = SplitPt;
+ while (isa<PHINode>(SplitIt))
+ ++SplitIt;
+ BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split");
+
+ // The new block lives in whichever loop the old one did.
+ if (LoopInfo* LI = P->getAnalysisIfAvailable<LoopInfo>())
+ if (Loop *L = LI->getLoopFor(Old))
+ L->addBasicBlockToLoop(New, LI->getBase());
+
+ if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>())
+ {
+ // Old dominates New. New node domiantes all other nodes dominated by Old.
+ DomTreeNode *OldNode = DT->getNode(Old);
+ std::vector<DomTreeNode *> Children;
+ for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
+ I != E; ++I)
+ Children.push_back(*I);
+
+ DomTreeNode *NewNode = DT->addNewBlock(New,Old);
+
+ for (std::vector<DomTreeNode *>::iterator I = Children.begin(),
+ E = Children.end(); I != E; ++I)
+ DT->changeImmediateDominator(*I, NewNode);
+ }
+
+ if (DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>())
+ DF->splitBlock(Old);
+
+ return New;
+}
+
+
+/// SplitBlockPredecessors - This method transforms BB by introducing a new
+/// basic block into the function, and moving some of the predecessors of BB to
+/// be predecessors of the new block. The new predecessors are indicated by the
+/// Preds array, which has NumPreds elements in it. The new block is given a
+/// suffix of 'Suffix'.
+///
+/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree and
+/// DominanceFrontier, but no other analyses.
+BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
+ BasicBlock *const *Preds,
+ unsigned NumPreds, const char *Suffix,
+ Pass *P) {
+ // Create new basic block, insert right before the original block.
+ BasicBlock *NewBB =
+ BasicBlock::Create(BB->getName()+Suffix, BB->getParent(), BB);
+
+ // The new block unconditionally branches to the old block.
+ BranchInst *BI = BranchInst::Create(BB, NewBB);
+
+ // Move the edges from Preds to point to NewBB instead of BB.
+ for (unsigned i = 0; i != NumPreds; ++i)
+ Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
+
+ // Update dominator tree and dominator frontier if available.
+ DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0;
+ if (DT)
+ DT->splitBlock(NewBB);
+ if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0)
+ DF->splitBlock(NewBB);
+ AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0;
+
+
+ // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
+ // node becomes an incoming value for BB's phi node. However, if the Preds
+ // list is empty, we need to insert dummy entries into the PHI nodes in BB to
+ // account for the newly created predecessor.
+ if (NumPreds == 0) {
+ // Insert dummy values as the incoming value.
+ for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
+ cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
+ return NewBB;
+ }
+
+ // Otherwise, create a new PHI node in NewBB for each PHI node in BB.
+ for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) {
+ PHINode *PN = cast<PHINode>(I++);
+
+ // Check to see if all of the values coming in are the same. If so, we
+ // don't need to create a new PHI node.
+ Value *InVal = PN->getIncomingValueForBlock(Preds[0]);
+ for (unsigned i = 1; i != NumPreds; ++i)
+ if (InVal != PN->getIncomingValueForBlock(Preds[i])) {
+ InVal = 0;
+ break;
+ }
+
+ if (InVal) {
+ // If all incoming values for the new PHI would be the same, just don't
+ // make a new PHI. Instead, just remove the incoming values from the old
+ // PHI.
+ for (unsigned i = 0; i != NumPreds; ++i)
+ PN->removeIncomingValue(Preds[i], false);
+ } else {
+ // If the values coming into the block are not the same, we need a PHI.
+ // Create the new PHI node, insert it into NewBB at the end of the block
+ PHINode *NewPHI =
+ PHINode::Create(PN->getType(), PN->getName()+".ph", BI);
+ if (AA) AA->copyValue(PN, NewPHI);
+
+ // Move all of the PHI values for 'Preds' to the new PHI.
+ for (unsigned i = 0; i != NumPreds; ++i) {
+ Value *V = PN->removeIncomingValue(Preds[i], false);
+ NewPHI->addIncoming(V, Preds[i]);
+ }
+ InVal = NewPHI;
+ }
+
+ // Add an incoming value to the PHI node in the loop for the preheader
+ // edge.
+ PN->addIncoming(InVal, NewBB);
+
+ // Check to see if we can eliminate this phi node.
+ if (Value *V = PN->hasConstantValue(DT != 0)) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I || DT == 0 || DT->dominates(I, PN)) {
+ PN->replaceAllUsesWith(V);
+ if (AA) AA->deleteValue(PN);
+ PN->eraseFromParent();
+ }
+ }
+ }
+
+ return NewBB;
+}
+
+/// FindFunctionBackedges - Analyze the specified function to find all of the
+/// loop backedges in the function and return them. This is a relatively cheap
+/// (compared to computing dominators and loop info) analysis.
+///
+/// The output is added to Result, as pairs of <from,to> edge info.
+void llvm::FindFunctionBackedges(const Function &F,
+ SmallVectorImpl<std::pair<const BasicBlock*,const BasicBlock*> > &Result) {
+ const BasicBlock *BB = &F.getEntryBlock();
+ if (succ_begin(BB) == succ_end(BB))
+ return;
+
+ SmallPtrSet<const BasicBlock*, 8> Visited;
+ SmallVector<std::pair<const BasicBlock*, succ_const_iterator>, 8> VisitStack;
+ SmallPtrSet<const BasicBlock*, 8> InStack;
+
+ Visited.insert(BB);
+ VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
+ InStack.insert(BB);
+ do {
+ std::pair<const BasicBlock*, succ_const_iterator> &Top = VisitStack.back();
+ const BasicBlock *ParentBB = Top.first;
+ succ_const_iterator &I = Top.second;
+
+ bool FoundNew = false;
+ while (I != succ_end(ParentBB)) {
+ BB = *I++;
+ if (Visited.insert(BB)) {
+ FoundNew = true;
+ break;
+ }
+ // Successor is in VisitStack, it's a back edge.
+ if (InStack.count(BB))
+ Result.push_back(std::make_pair(ParentBB, BB));
+ }
+
+ if (FoundNew) {
+ // Go down one level if there is a unvisited successor.
+ InStack.insert(BB);
+ VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
+ } else {
+ // Go up one level.
+ InStack.erase(VisitStack.pop_back_val().first);
+ }
+ } while (!VisitStack.empty());
+
+
+}
+
+
+
+/// AreEquivalentAddressValues - Test if A and B will obviously have the same
+/// value. This includes recognizing that %t0 and %t1 will have the same
+/// value in code like this:
+/// %t0 = getelementptr \@a, 0, 3
+/// store i32 0, i32* %t0
+/// %t1 = getelementptr \@a, 0, 3
+/// %t2 = load i32* %t1
+///
+static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
+ // Test if the values are trivially equivalent.
+ if (A == B) return true;
+
+ // Test if the values come form identical arithmetic instructions.
+ if (isa<BinaryOperator>(A) || isa<CastInst>(A) ||
+ isa<PHINode>(A) || isa<GetElementPtrInst>(A))
+ if (const Instruction *BI = dyn_cast<Instruction>(B))
+ if (cast<Instruction>(A)->isIdenticalTo(BI))
+ return true;
+
+ // Otherwise they may not be equivalent.
+ return false;
+}
+
+/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at the
+/// instruction before ScanFrom) checking to see if we have the value at the
+/// memory address *Ptr locally available within a small number of instructions.
+/// If the value is available, return it.
+///
+/// If not, return the iterator for the last validated instruction that the
+/// value would be live through. If we scanned the entire block and didn't find
+/// something that invalidates *Ptr or provides it, ScanFrom would be left at
+/// begin() and this returns null. ScanFrom could also be left
+///
+/// MaxInstsToScan specifies the maximum instructions to scan in the block. If
+/// it is set to 0, it will scan the whole block. You can also optionally
+/// specify an alias analysis implementation, which makes this more precise.
+Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
+ BasicBlock::iterator &ScanFrom,
+ unsigned MaxInstsToScan,
+ AliasAnalysis *AA) {
+ if (MaxInstsToScan == 0) MaxInstsToScan = ~0U;
+
+ // If we're using alias analysis to disambiguate get the size of *Ptr.
+ unsigned AccessSize = 0;
+ if (AA) {
+ const Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
+ AccessSize = AA->getTargetData().getTypeStoreSizeInBits(AccessTy);
+ }
+
+ while (ScanFrom != ScanBB->begin()) {
+ // We must ignore debug info directives when counting (otherwise they
+ // would affect codegen).
+ Instruction *Inst = --ScanFrom;
+ if (isa<DbgInfoIntrinsic>(Inst))
+ continue;
+ // We skip pointer-to-pointer bitcasts, which are NOPs.
+ // It is necessary for correctness to skip those that feed into a
+ // llvm.dbg.declare, as these are not present when debugging is off.
+ if (isa<BitCastInst>(Inst) && isa<PointerType>(Inst->getType()))
+ continue;
+
+ // Restore ScanFrom to expected value in case next test succeeds
+ ScanFrom++;
+
+ // Don't scan huge blocks.
+ if (MaxInstsToScan-- == 0) return 0;
+
+ --ScanFrom;
+ // If this is a load of Ptr, the loaded value is available.
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ if (AreEquivalentAddressValues(LI->getOperand(0), Ptr))
+ return LI;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // If this is a store through Ptr, the value is available!
+ if (AreEquivalentAddressValues(SI->getOperand(1), Ptr))
+ return SI->getOperand(0);
+
+ // If Ptr is an alloca and this is a store to a different alloca, ignore
+ // the store. This is a trivial form of alias analysis that is important
+ // for reg2mem'd code.
+ if ((isa<AllocaInst>(Ptr) || isa<GlobalVariable>(Ptr)) &&
+ (isa<AllocaInst>(SI->getOperand(1)) ||
+ isa<GlobalVariable>(SI->getOperand(1))))
+ continue;
+
+ // If we have alias analysis and it says the store won't modify the loaded
+ // value, ignore the store.
+ if (AA &&
+ (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
+ continue;
+
+ // Otherwise the store that may or may not alias the pointer, bail out.
+ ++ScanFrom;
+ return 0;
+ }
+
+ // If this is some other instruction that may clobber Ptr, bail out.
+ if (Inst->mayWriteToMemory()) {
+ // If alias analysis claims that it really won't modify the load,
+ // ignore it.
+ if (AA &&
+ (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
+ continue;
+
+ // May modify the pointer, bail out.
+ ++ScanFrom;
+ return 0;
+ }
+ }
+
+ // Got to the start of the block, we didn't find it, but are done for this
+ // block.
+ return 0;
+}
+
+/// CopyPrecedingStopPoint - If I is immediately preceded by a StopPoint,
+/// make a copy of the stoppoint before InsertPos (presumably before copying
+/// or moving I).
+void llvm::CopyPrecedingStopPoint(Instruction *I,
+ BasicBlock::iterator InsertPos) {
+ if (I != I->getParent()->begin()) {
+ BasicBlock::iterator BBI = I; --BBI;
+ if (DbgStopPointInst *DSPI = dyn_cast<DbgStopPointInst>(BBI)) {
+ CallInst *newDSPI = DSPI->clone();
+ newDSPI->insertBefore(InsertPos);
+ }
+ }
+}
diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp
new file mode 100644
index 0000000..1650cfa
--- /dev/null
+++ b/lib/Transforms/Utils/BasicInliner.cpp
@@ -0,0 +1,181 @@
+//===- BasicInliner.cpp - Basic function level inliner --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a simple function based inliner that does not use
+// call graph information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "basicinliner"
+
+#include "llvm/Module.h"
+#include "llvm/Function.h"
+#include "llvm/Transforms/Utils/BasicInliner.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <vector>
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+BasicInlineThreshold("basic-inline-threshold", cl::Hidden, cl::init(200),
+ cl::desc("Control the amount of basic inlining to perform (default = 200)"));
+
+namespace llvm {
+
+ /// BasicInlinerImpl - BasicInliner implemantation class. This hides
+ /// container info, used by basic inliner, from public interface.
+ struct VISIBILITY_HIDDEN BasicInlinerImpl {
+
+ BasicInlinerImpl(const BasicInlinerImpl&); // DO NOT IMPLEMENT
+ void operator=(const BasicInlinerImpl&); // DO NO IMPLEMENT
+ public:
+ BasicInlinerImpl(TargetData *T) : TD(T) {}
+
+ /// addFunction - Add function into the list of functions to process.
+ /// All functions must be inserted using this interface before invoking
+ /// inlineFunctions().
+ void addFunction(Function *F) {
+ Functions.push_back(F);
+ }
+
+ /// neverInlineFunction - Sometimes a function is never to be inlined
+ /// because of one or other reason.
+ void neverInlineFunction(Function *F) {
+ NeverInline.insert(F);
+ }
+
+ /// inlineFuctions - Walk all call sites in all functions supplied by
+ /// client. Inline as many call sites as possible. Delete completely
+ /// inlined functions.
+ void inlineFunctions();
+
+ private:
+ TargetData *TD;
+ std::vector<Function *> Functions;
+ SmallPtrSet<const Function *, 16> NeverInline;
+ SmallPtrSet<Function *, 8> DeadFunctions;
+ InlineCostAnalyzer CA;
+ };
+
+/// inlineFuctions - Walk all call sites in all functions supplied by
+/// client. Inline as many call sites as possible. Delete completely
+/// inlined functions.
+void BasicInlinerImpl::inlineFunctions() {
+
+ // Scan through and identify all call sites ahead of time so that we only
+ // inline call sites in the original functions, not call sites that result
+ // from inlining other functions.
+ std::vector<CallSite> CallSites;
+
+ for (std::vector<Function *>::iterator FI = Functions.begin(),
+ FE = Functions.end(); FI != FE; ++FI) {
+ Function *F = *FI;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ CallSite CS = CallSite::get(I);
+ if (CS.getInstruction() && CS.getCalledFunction()
+ && !CS.getCalledFunction()->isDeclaration())
+ CallSites.push_back(CS);
+ }
+ }
+
+ DOUT << ": " << CallSites.size() << " call sites.\n";
+
+ // Inline call sites.
+ bool Changed = false;
+ do {
+ Changed = false;
+ for (unsigned index = 0; index != CallSites.size() && !CallSites.empty();
+ ++index) {
+ CallSite CS = CallSites[index];
+ if (Function *Callee = CS.getCalledFunction()) {
+
+ // Eliminate calls that are never inlinable.
+ if (Callee->isDeclaration() ||
+ CS.getInstruction()->getParent()->getParent() == Callee) {
+ CallSites.erase(CallSites.begin() + index);
+ --index;
+ continue;
+ }
+ InlineCost IC = CA.getInlineCost(CS, NeverInline);
+ if (IC.isAlways()) {
+ DOUT << " Inlining: cost=always"
+ <<", call: " << *CS.getInstruction();
+ } else if (IC.isNever()) {
+ DOUT << " NOT Inlining: cost=never"
+ <<", call: " << *CS.getInstruction();
+ continue;
+ } else {
+ int Cost = IC.getValue();
+
+ if (Cost >= (int) BasicInlineThreshold) {
+ DOUT << " NOT Inlining: cost = " << Cost
+ << ", call: " << *CS.getInstruction();
+ continue;
+ } else {
+ DOUT << " Inlining: cost = " << Cost
+ << ", call: " << *CS.getInstruction();
+ }
+ }
+
+ // Inline
+ if (InlineFunction(CS, NULL, TD)) {
+ if (Callee->use_empty() && (Callee->hasLocalLinkage() ||
+ Callee->hasAvailableExternallyLinkage()))
+ DeadFunctions.insert(Callee);
+ Changed = true;
+ CallSites.erase(CallSites.begin() + index);
+ --index;
+ }
+ }
+ }
+ } while (Changed);
+
+ // Remove completely inlined functions from module.
+ for(SmallPtrSet<Function *, 8>::iterator I = DeadFunctions.begin(),
+ E = DeadFunctions.end(); I != E; ++I) {
+ Function *D = *I;
+ Module *M = D->getParent();
+ M->getFunctionList().remove(D);
+ }
+}
+
+BasicInliner::BasicInliner(TargetData *TD) {
+ Impl = new BasicInlinerImpl(TD);
+}
+
+BasicInliner::~BasicInliner() {
+ delete Impl;
+}
+
+/// addFunction - Add function into the list of functions to process.
+/// All functions must be inserted using this interface before invoking
+/// inlineFunctions().
+void BasicInliner::addFunction(Function *F) {
+ Impl->addFunction(F);
+}
+
+/// neverInlineFunction - Sometimes a function is never to be inlined because
+/// of one or other reason.
+void BasicInliner::neverInlineFunction(Function *F) {
+ Impl->neverInlineFunction(F);
+}
+
+/// inlineFuctions - Walk all call sites in all functions supplied by
+/// client. Inline as many call sites as possible. Delete completely
+/// inlined functions.
+void BasicInliner::inlineFunctions() {
+ Impl->inlineFunctions();
+}
+
+}
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
new file mode 100644
index 0000000..c4fd1ea
--- /dev/null
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -0,0 +1,282 @@
+//===- BreakCriticalEdges.cpp - Critical Edge Elimination Pass ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// BreakCriticalEdges pass - Break all of the critical edges in the CFG by
+// inserting a dummy basic block. This pass may be "required" by passes that
+// cannot deal with critical edges. For this usage, the structure type is
+// forward declared. This pass obviously invalidates the CFG, but can update
+// forward dominator (set, immediate dominators, tree, and frontier)
+// information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "break-crit-edges"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumBroken, "Number of blocks inserted");
+
+namespace {
+ struct VISIBILITY_HIDDEN BreakCriticalEdges : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ BreakCriticalEdges() : FunctionPass(&ID) {}
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominanceFrontier>();
+ AU.addPreserved<LoopInfo>();
+
+ // No loop canonicalization guarantees are broken by this pass.
+ AU.addPreservedID(LoopSimplifyID);
+ }
+ };
+}
+
+char BreakCriticalEdges::ID = 0;
+static RegisterPass<BreakCriticalEdges>
+X("break-crit-edges", "Break critical edges in CFG");
+
+// Publically exposed interface to pass...
+const PassInfo *const llvm::BreakCriticalEdgesID = &X;
+FunctionPass *llvm::createBreakCriticalEdgesPass() {
+ return new BreakCriticalEdges();
+}
+
+// runOnFunction - Loop over all of the edges in the CFG, breaking critical
+// edges as they are found.
+//
+bool BreakCriticalEdges::runOnFunction(Function &F) {
+ bool Changed = false;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ TerminatorInst *TI = I->getTerminator();
+ if (TI->getNumSuccessors() > 1)
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ if (SplitCriticalEdge(TI, i, this)) {
+ ++NumBroken;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Implementation of the external critical edge manipulation functions
+//===----------------------------------------------------------------------===//
+
+// isCriticalEdge - Return true if the specified edge is a critical edge.
+// Critical edges are edges from a block with multiple successors to a block
+// with multiple predecessors.
+//
+bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
+ bool AllowIdenticalEdges) {
+ assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!");
+ if (TI->getNumSuccessors() == 1) return false;
+
+ const BasicBlock *Dest = TI->getSuccessor(SuccNum);
+ pred_const_iterator I = pred_begin(Dest), E = pred_end(Dest);
+
+ // If there is more than one predecessor, this is a critical edge...
+ assert(I != E && "No preds, but we have an edge to the block?");
+ const BasicBlock *FirstPred = *I;
+ ++I; // Skip one edge due to the incoming arc from TI.
+ if (!AllowIdenticalEdges)
+ return I != E;
+
+ // If AllowIdenticalEdges is true, then we allow this edge to be considered
+ // non-critical iff all preds come from TI's block.
+ while (I != E) {
+ if (*I != FirstPred)
+ return true;
+ // Note: leave this as is until no one ever compiles with either gcc 4.0.1
+ // or Xcode 2. This seems to work around the pred_iterator assert in PR 2207
+ E = pred_end(*I);
+ ++I;
+ }
+ return false;
+}
+
+/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to
+/// split the critical edge. This will update DominatorTree and
+/// DominatorFrontier information if it is available, thus calling this pass
+/// will not invalidate any of them. This returns true if the edge was split,
+/// false otherwise. This ensures that all edges to that dest go to one block
+/// instead of each going to a different block.
+//
+bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P,
+ bool MergeIdenticalEdges) {
+ if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return false;
+ BasicBlock *TIBB = TI->getParent();
+ BasicBlock *DestBB = TI->getSuccessor(SuccNum);
+
+ // Create a new basic block, linking it into the CFG.
+ BasicBlock *NewBB = BasicBlock::Create(TIBB->getName() + "." +
+ DestBB->getName() + "_crit_edge");
+ // Create our unconditional branch...
+ BranchInst::Create(DestBB, NewBB);
+
+ // Branch to the new block, breaking the edge.
+ TI->setSuccessor(SuccNum, NewBB);
+
+ // Insert the block into the function... right after the block TI lives in.
+ Function &F = *TIBB->getParent();
+ Function::iterator FBBI = TIBB;
+ F.getBasicBlockList().insert(++FBBI, NewBB);
+
+ // If there are any PHI nodes in DestBB, we need to update them so that they
+ // merge incoming values from NewBB instead of from TIBB.
+ //
+ for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ // We no longer enter through TIBB, now we come in through NewBB. Revector
+ // exactly one entry in the PHI node that used to come from TIBB to come
+ // from NewBB.
+ int BBIdx = PN->getBasicBlockIndex(TIBB);
+ PN->setIncomingBlock(BBIdx, NewBB);
+ }
+
+ // If there are any other edges from TIBB to DestBB, update those to go
+ // through the split block, making those edges non-critical as well (and
+ // reducing the number of phi entries in the DestBB if relevant).
+ if (MergeIdenticalEdges) {
+ for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) {
+ if (TI->getSuccessor(i) != DestBB) continue;
+
+ // Remove an entry for TIBB from DestBB phi nodes.
+ DestBB->removePredecessor(TIBB);
+
+ // We found another edge to DestBB, go to NewBB instead.
+ TI->setSuccessor(i, NewBB);
+ }
+ }
+
+
+
+ // If we don't have a pass object, we can't update anything...
+ if (P == 0) return true;
+
+ // Now update analysis information. Since the only predecessor of NewBB is
+ // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate
+ // anything, as there are other successors of DestBB. However, if all other
+ // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a
+ // loop header) then NewBB dominates DestBB.
+ SmallVector<BasicBlock*, 8> OtherPreds;
+
+ for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; ++I)
+ if (*I != NewBB)
+ OtherPreds.push_back(*I);
+
+ bool NewBBDominatesDestBB = true;
+
+ // Should we update DominatorTree information?
+ if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+ DomTreeNode *TINode = DT->getNode(TIBB);
+
+ // The new block is not the immediate dominator for any other nodes, but
+ // TINode is the immediate dominator for the new node.
+ //
+ if (TINode) { // Don't break unreachable code!
+ DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB);
+ DomTreeNode *DestBBNode = 0;
+
+ // If NewBBDominatesDestBB hasn't been computed yet, do so with DT.
+ if (!OtherPreds.empty()) {
+ DestBBNode = DT->getNode(DestBB);
+ while (!OtherPreds.empty() && NewBBDominatesDestBB) {
+ if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back()))
+ NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode);
+ OtherPreds.pop_back();
+ }
+ OtherPreds.clear();
+ }
+
+ // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it
+ // doesn't dominate anything.
+ if (NewBBDominatesDestBB) {
+ if (!DestBBNode) DestBBNode = DT->getNode(DestBB);
+ DT->changeImmediateDominator(DestBBNode, NewBBNode);
+ }
+ }
+ }
+
+ // Should we update DominanceFrontier information?
+ if (DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>()) {
+ // If NewBBDominatesDestBB hasn't been computed yet, do so with DF.
+ if (!OtherPreds.empty()) {
+ // FIXME: IMPLEMENT THIS!
+ assert(0 && "Requiring domfrontiers but not idom/domtree/domset."
+ " not implemented yet!");
+ }
+
+ // Since the new block is dominated by its only predecessor TIBB,
+ // it cannot be in any block's dominance frontier. If NewBB dominates
+ // DestBB, its dominance frontier is the same as DestBB's, otherwise it is
+ // just {DestBB}.
+ DominanceFrontier::DomSetType NewDFSet;
+ if (NewBBDominatesDestBB) {
+ DominanceFrontier::iterator I = DF->find(DestBB);
+ if (I != DF->end()) {
+ DF->addBasicBlock(NewBB, I->second);
+
+ if (I->second.count(DestBB)) {
+ // However NewBB's frontier does not include DestBB.
+ DominanceFrontier::iterator NF = DF->find(NewBB);
+ DF->removeFromFrontier(NF, DestBB);
+ }
+ }
+ else
+ DF->addBasicBlock(NewBB, DominanceFrontier::DomSetType());
+ } else {
+ DominanceFrontier::DomSetType NewDFSet;
+ NewDFSet.insert(DestBB);
+ DF->addBasicBlock(NewBB, NewDFSet);
+ }
+ }
+
+ // Update LoopInfo if it is around.
+ if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>()) {
+ // If one or the other blocks were not in a loop, the new block is not
+ // either, and thus LI doesn't need to be updated.
+ if (Loop *TIL = LI->getLoopFor(TIBB))
+ if (Loop *DestLoop = LI->getLoopFor(DestBB)) {
+ if (TIL == DestLoop) {
+ // Both in the same loop, the NewBB joins loop.
+ DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ } else if (TIL->contains(DestLoop->getHeader())) {
+ // Edge from an outer loop to an inner loop. Add to the outer loop.
+ TIL->addBasicBlockToLoop(NewBB, LI->getBase());
+ } else if (DestLoop->contains(TIL->getHeader())) {
+ // Edge from an inner loop to an outer loop. Add to the outer loop.
+ DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ } else {
+ // Edge from two loops with no containment relation. Because these
+ // are natural loops, we know that the destination block must be the
+ // header of its loop (adding a branch into a loop elsewhere would
+ // create an irreducible loop).
+ assert(DestLoop->getHeader() == DestBB &&
+ "Should not create irreducible loops!");
+ if (Loop *P = DestLoop->getParentLoop())
+ P->addBasicBlockToLoop(NewBB, LI->getBase());
+ }
+ }
+ }
+ return true;
+}
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
new file mode 100644
index 0000000..6628b4b
--- /dev/null
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -0,0 +1,27 @@
+add_llvm_library(LLVMTransformUtils
+ AddrModeMatcher.cpp
+ BasicBlockUtils.cpp
+ BasicInliner.cpp
+ BreakCriticalEdges.cpp
+ CloneFunction.cpp
+ CloneLoop.cpp
+ CloneModule.cpp
+ CloneTrace.cpp
+ CodeExtractor.cpp
+ DemoteRegToStack.cpp
+ InlineCost.cpp
+ InlineFunction.cpp
+ LCSSA.cpp
+ Local.cpp
+ LoopSimplify.cpp
+ LowerAllocations.cpp
+ LowerInvoke.cpp
+ LowerSwitch.cpp
+ Mem2Reg.cpp
+ PromoteMemoryToRegister.cpp
+ SimplifyCFG.cpp
+ UnifyFunctionExitNodes.cpp
+ UnrollLoop.cpp
+ ValueMapper.cpp
+ InstructionNamer.cpp
+ )
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
new file mode 100644
index 0000000..d0fdefa
--- /dev/null
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -0,0 +1,533 @@
+//===- CloneFunction.cpp - Clone a function into another function ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneFunctionInto interface, which is used as the
+// low-level function cloner. This is used by the CloneFunction and function
+// inliner to do the dirty work of copying the body of a function around.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Function.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include <map>
+using namespace llvm;
+
+// CloneBasicBlock - See comments in Cloning.h
+BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
+ DenseMap<const Value*, Value*> &ValueMap,
+ const char *NameSuffix, Function *F,
+ ClonedCodeInfo *CodeInfo) {
+ BasicBlock *NewBB = BasicBlock::Create("", F);
+ if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+
+ bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
+
+ // Loop over all instructions, and copy them over.
+ for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
+ II != IE; ++II) {
+ Instruction *NewInst = II->clone();
+ if (II->hasName())
+ NewInst->setName(II->getName()+NameSuffix);
+ NewBB->getInstList().push_back(NewInst);
+ ValueMap[II] = NewInst; // Add instruction map to value.
+
+ hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (isa<ConstantInt>(AI->getArraySize()))
+ hasStaticAllocas = true;
+ else
+ hasDynamicAllocas = true;
+ }
+ }
+
+ if (CodeInfo) {
+ CodeInfo->ContainsCalls |= hasCalls;
+ CodeInfo->ContainsUnwinds |= isa<UnwindInst>(BB->getTerminator());
+ CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
+ CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
+ BB != &BB->getParent()->getEntryBlock();
+ }
+ return NewBB;
+}
+
+// Clone OldFunc into NewFunc, transforming the old arguments into references to
+// ArgMap values.
+//
+void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
+ DenseMap<const Value*, Value*> &ValueMap,
+ std::vector<ReturnInst*> &Returns,
+ const char *NameSuffix, ClonedCodeInfo *CodeInfo) {
+ assert(NameSuffix && "NameSuffix cannot be null!");
+
+#ifndef NDEBUG
+ for (Function::const_arg_iterator I = OldFunc->arg_begin(),
+ E = OldFunc->arg_end(); I != E; ++I)
+ assert(ValueMap.count(I) && "No mapping from source argument specified!");
+#endif
+
+ // Clone any attributes.
+ if (NewFunc->arg_size() == OldFunc->arg_size())
+ NewFunc->copyAttributesFrom(OldFunc);
+ else {
+ //Some arguments were deleted with the ValueMap. Copy arguments one by one
+ for (Function::const_arg_iterator I = OldFunc->arg_begin(),
+ E = OldFunc->arg_end(); I != E; ++I)
+ if (Argument* Anew = dyn_cast<Argument>(ValueMap[I]))
+ Anew->addAttr( OldFunc->getAttributes()
+ .getParamAttributes(I->getArgNo() + 1));
+ NewFunc->setAttributes(NewFunc->getAttributes()
+ .addAttr(0, OldFunc->getAttributes()
+ .getRetAttributes()));
+ NewFunc->setAttributes(NewFunc->getAttributes()
+ .addAttr(~0, OldFunc->getAttributes()
+ .getFnAttributes()));
+
+ }
+
+ // Loop over all of the basic blocks in the function, cloning them as
+ // appropriate. Note that we save BE this way in order to handle cloning of
+ // recursive functions into themselves.
+ //
+ for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
+ BI != BE; ++BI) {
+ const BasicBlock &BB = *BI;
+
+ // Create a new basic block and copy instructions into it!
+ BasicBlock *CBB = CloneBasicBlock(&BB, ValueMap, NameSuffix, NewFunc,
+ CodeInfo);
+ ValueMap[&BB] = CBB; // Add basic block mapping.
+
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
+ Returns.push_back(RI);
+ }
+
+ // Loop over all of the instructions in the function, fixing up operand
+ // references as we go. This uses ValueMap to do all the hard work.
+ //
+ for (Function::iterator BB = cast<BasicBlock>(ValueMap[OldFunc->begin()]),
+ BE = NewFunc->end(); BB != BE; ++BB)
+ // Loop over all instructions, fixing each one as we find it...
+ for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
+ RemapInstruction(II, ValueMap);
+}
+
+/// CloneFunction - Return a copy of the specified function, but without
+/// embedding the function into another module. Also, any references specified
+/// in the ValueMap are changed to refer to their mapped value instead of the
+/// original one. If any of the arguments to the function are in the ValueMap,
+/// the arguments are deleted from the resultant function. The ValueMap is
+/// updated to include mappings from all of the instructions and basicblocks in
+/// the function from their old to new values.
+///
+Function *llvm::CloneFunction(const Function *F,
+ DenseMap<const Value*, Value*> &ValueMap,
+ ClonedCodeInfo *CodeInfo) {
+ std::vector<const Type*> ArgTypes;
+
+ // The user might be deleting arguments to the function by specifying them in
+ // the ValueMap. If so, we need to not add the arguments to the arg ty vector
+ //
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I)
+ if (ValueMap.count(I) == 0) // Haven't mapped the argument to anything yet?
+ ArgTypes.push_back(I->getType());
+
+ // Create a new function type...
+ FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(),
+ ArgTypes, F->getFunctionType()->isVarArg());
+
+ // Create the new function...
+ Function *NewF = Function::Create(FTy, F->getLinkage(), F->getName());
+
+ // Loop over the arguments, copying the names of the mapped arguments over...
+ Function::arg_iterator DestI = NewF->arg_begin();
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I)
+ if (ValueMap.count(I) == 0) { // Is this argument preserved?
+ DestI->setName(I->getName()); // Copy the name over...
+ ValueMap[I] = DestI++; // Add mapping to ValueMap
+ }
+
+ std::vector<ReturnInst*> Returns; // Ignore returns cloned...
+ CloneFunctionInto(NewF, F, ValueMap, Returns, "", CodeInfo);
+ return NewF;
+}
+
+
+
+namespace {
+ /// PruningFunctionCloner - This class is a private class used to implement
+ /// the CloneAndPruneFunctionInto method.
+ struct VISIBILITY_HIDDEN PruningFunctionCloner {
+ Function *NewFunc;
+ const Function *OldFunc;
+ DenseMap<const Value*, Value*> &ValueMap;
+ std::vector<ReturnInst*> &Returns;
+ const char *NameSuffix;
+ ClonedCodeInfo *CodeInfo;
+ const TargetData *TD;
+ Value *DbgFnStart;
+ public:
+ PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
+ DenseMap<const Value*, Value*> &valueMap,
+ std::vector<ReturnInst*> &returns,
+ const char *nameSuffix,
+ ClonedCodeInfo *codeInfo,
+ const TargetData *td)
+ : NewFunc(newFunc), OldFunc(oldFunc), ValueMap(valueMap), Returns(returns),
+ NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td), DbgFnStart(NULL) {
+ }
+
+ /// CloneBlock - The specified block is found to be reachable, clone it and
+ /// anything that it can reach.
+ void CloneBlock(const BasicBlock *BB,
+ std::vector<const BasicBlock*> &ToClone);
+
+ public:
+ /// ConstantFoldMappedInstruction - Constant fold the specified instruction,
+ /// mapping its operands through ValueMap if they are available.
+ Constant *ConstantFoldMappedInstruction(const Instruction *I);
+ };
+}
+
+/// CloneBlock - The specified block is found to be reachable, clone it and
+/// anything that it can reach.
+void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
+ std::vector<const BasicBlock*> &ToClone){
+ Value *&BBEntry = ValueMap[BB];
+
+ // Have we already cloned this block?
+ if (BBEntry) return;
+
+ // Nope, clone it now.
+ BasicBlock *NewBB;
+ BBEntry = NewBB = BasicBlock::Create();
+ if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+
+ bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
+
+ // Loop over all instructions, and copy them over, DCE'ing as we go. This
+ // loop doesn't include the terminator.
+ for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end();
+ II != IE; ++II) {
+ // If this instruction constant folds, don't bother cloning the instruction,
+ // instead, just add the constant to the value map.
+ if (Constant *C = ConstantFoldMappedInstruction(II)) {
+ ValueMap[II] = C;
+ continue;
+ }
+
+ // Do not clone llvm.dbg.region.end. It will be adjusted by the inliner.
+ if (const DbgFuncStartInst *DFSI = dyn_cast<DbgFuncStartInst>(II)) {
+ if (DbgFnStart == NULL) {
+ DISubprogram SP(cast<GlobalVariable>(DFSI->getSubprogram()));
+ if (SP.describes(BB->getParent()))
+ DbgFnStart = DFSI->getSubprogram();
+ }
+ }
+ if (const DbgRegionEndInst *DREIS = dyn_cast<DbgRegionEndInst>(II)) {
+ if (DREIS->getContext() == DbgFnStart)
+ continue;
+ }
+
+ Instruction *NewInst = II->clone();
+ if (II->hasName())
+ NewInst->setName(II->getName()+NameSuffix);
+ NewBB->getInstList().push_back(NewInst);
+ ValueMap[II] = NewInst; // Add instruction map to value.
+
+ hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (isa<ConstantInt>(AI->getArraySize()))
+ hasStaticAllocas = true;
+ else
+ hasDynamicAllocas = true;
+ }
+ }
+
+ // Finally, clone over the terminator.
+ const TerminatorInst *OldTI = BB->getTerminator();
+ bool TerminatorDone = false;
+ if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
+ if (BI->isConditional()) {
+ // If the condition was a known constant in the callee...
+ ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
+ // Or is a known constant in the caller...
+ if (Cond == 0)
+ Cond = dyn_cast_or_null<ConstantInt>(ValueMap[BI->getCondition()]);
+
+ // Constant fold to uncond branch!
+ if (Cond) {
+ BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue());
+ ValueMap[OldTI] = BranchInst::Create(Dest, NewBB);
+ ToClone.push_back(Dest);
+ TerminatorDone = true;
+ }
+ }
+ } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
+ // If switching on a value known constant in the caller.
+ ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
+ if (Cond == 0) // Or known constant after constant prop in the callee...
+ Cond = dyn_cast_or_null<ConstantInt>(ValueMap[SI->getCondition()]);
+ if (Cond) { // Constant fold to uncond branch!
+ BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond));
+ ValueMap[OldTI] = BranchInst::Create(Dest, NewBB);
+ ToClone.push_back(Dest);
+ TerminatorDone = true;
+ }
+ }
+
+ if (!TerminatorDone) {
+ Instruction *NewInst = OldTI->clone();
+ if (OldTI->hasName())
+ NewInst->setName(OldTI->getName()+NameSuffix);
+ NewBB->getInstList().push_back(NewInst);
+ ValueMap[OldTI] = NewInst; // Add instruction map to value.
+
+ // Recursively clone any reachable successor blocks.
+ const TerminatorInst *TI = BB->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ ToClone.push_back(TI->getSuccessor(i));
+ }
+
+ if (CodeInfo) {
+ CodeInfo->ContainsCalls |= hasCalls;
+ CodeInfo->ContainsUnwinds |= isa<UnwindInst>(OldTI);
+ CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
+ CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
+ BB != &BB->getParent()->front();
+ }
+
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(NewBB->getTerminator()))
+ Returns.push_back(RI);
+}
+
+/// ConstantFoldMappedInstruction - Constant fold the specified instruction,
+/// mapping its operands through ValueMap if they are available.
+Constant *PruningFunctionCloner::
+ConstantFoldMappedInstruction(const Instruction *I) {
+ SmallVector<Constant*, 8> Ops;
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
+ ValueMap)))
+ Ops.push_back(Op);
+ else
+ return 0; // All operands not constant!
+
+ if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+ return ConstantFoldCompareInstOperands(CI->getPredicate(),
+ &Ops[0], Ops.size(), TD);
+
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
+ if (!LI->isVolatile() && CE->getOpcode() == Instruction::GetElementPtr)
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer())
+ return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(),
+ CE);
+
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(), &Ops[0],
+ Ops.size(), TD);
+}
+
+/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
+/// except that it does some simple constant prop and DCE on the fly. The
+/// effect of this is to copy significantly less code in cases where (for
+/// example) a function call with constant arguments is inlined, and those
+/// constant arguments cause a significant amount of code in the callee to be
+/// dead. Since this doesn't produce an exact copy of the input, it can't be
+/// used for things like CloneFunction or CloneModule.
+void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
+ DenseMap<const Value*, Value*> &ValueMap,
+ std::vector<ReturnInst*> &Returns,
+ const char *NameSuffix,
+ ClonedCodeInfo *CodeInfo,
+ const TargetData *TD) {
+ assert(NameSuffix && "NameSuffix cannot be null!");
+
+#ifndef NDEBUG
+ for (Function::const_arg_iterator II = OldFunc->arg_begin(),
+ E = OldFunc->arg_end(); II != E; ++II)
+ assert(ValueMap.count(II) && "No mapping from source argument specified!");
+#endif
+
+ PruningFunctionCloner PFC(NewFunc, OldFunc, ValueMap, Returns,
+ NameSuffix, CodeInfo, TD);
+
+ // Clone the entry block, and anything recursively reachable from it.
+ std::vector<const BasicBlock*> CloneWorklist;
+ CloneWorklist.push_back(&OldFunc->getEntryBlock());
+ while (!CloneWorklist.empty()) {
+ const BasicBlock *BB = CloneWorklist.back();
+ CloneWorklist.pop_back();
+ PFC.CloneBlock(BB, CloneWorklist);
+ }
+
+ // Loop over all of the basic blocks in the old function. If the block was
+ // reachable, we have cloned it and the old block is now in the value map:
+ // insert it into the new function in the right order. If not, ignore it.
+ //
+ // Defer PHI resolution until rest of function is resolved.
+ std::vector<const PHINode*> PHIToResolve;
+ for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
+ BI != BE; ++BI) {
+ BasicBlock *NewBB = cast_or_null<BasicBlock>(ValueMap[BI]);
+ if (NewBB == 0) continue; // Dead block.
+
+ // Add the new block to the new function.
+ NewFunc->getBasicBlockList().push_back(NewBB);
+
+ // Loop over all of the instructions in the block, fixing up operand
+ // references as we go. This uses ValueMap to do all the hard work.
+ //
+ BasicBlock::iterator I = NewBB->begin();
+
+ // Handle PHI nodes specially, as we have to remove references to dead
+ // blocks.
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ // Skip over all PHI nodes, remembering them for later.
+ BasicBlock::const_iterator OldI = BI->begin();
+ for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI)
+ PHIToResolve.push_back(cast<PHINode>(OldI));
+ }
+
+ // Otherwise, remap the rest of the instructions normally.
+ for (; I != NewBB->end(); ++I)
+ RemapInstruction(I, ValueMap);
+ }
+
+ // Defer PHI resolution until rest of function is resolved, PHI resolution
+ // requires the CFG to be up-to-date.
+ for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) {
+ const PHINode *OPN = PHIToResolve[phino];
+ unsigned NumPreds = OPN->getNumIncomingValues();
+ const BasicBlock *OldBB = OPN->getParent();
+ BasicBlock *NewBB = cast<BasicBlock>(ValueMap[OldBB]);
+
+ // Map operands for blocks that are live and remove operands for blocks
+ // that are dead.
+ for (; phino != PHIToResolve.size() &&
+ PHIToResolve[phino]->getParent() == OldBB; ++phino) {
+ OPN = PHIToResolve[phino];
+ PHINode *PN = cast<PHINode>(ValueMap[OPN]);
+ for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
+ if (BasicBlock *MappedBlock =
+ cast_or_null<BasicBlock>(ValueMap[PN->getIncomingBlock(pred)])) {
+ Value *InVal = MapValue(PN->getIncomingValue(pred), ValueMap);
+ assert(InVal && "Unknown input value?");
+ PN->setIncomingValue(pred, InVal);
+ PN->setIncomingBlock(pred, MappedBlock);
+ } else {
+ PN->removeIncomingValue(pred, false);
+ --pred, --e; // Revisit the next entry.
+ }
+ }
+ }
+
+ // The loop above has removed PHI entries for those blocks that are dead
+ // and has updated others. However, if a block is live (i.e. copied over)
+ // but its terminator has been changed to not go to this block, then our
+ // phi nodes will have invalid entries. Update the PHI nodes in this
+ // case.
+ PHINode *PN = cast<PHINode>(NewBB->begin());
+ NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB));
+ if (NumPreds != PN->getNumIncomingValues()) {
+ assert(NumPreds < PN->getNumIncomingValues());
+ // Count how many times each predecessor comes to this block.
+ std::map<BasicBlock*, unsigned> PredCount;
+ for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB);
+ PI != E; ++PI)
+ --PredCount[*PI];
+
+ // Figure out how many entries to remove from each PHI.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ ++PredCount[PN->getIncomingBlock(i)];
+
+ // At this point, the excess predecessor entries are positive in the
+ // map. Loop over all of the PHIs and remove excess predecessor
+ // entries.
+ BasicBlock::iterator I = NewBB->begin();
+ for (; (PN = dyn_cast<PHINode>(I)); ++I) {
+ for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(),
+ E = PredCount.end(); PCI != E; ++PCI) {
+ BasicBlock *Pred = PCI->first;
+ for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove)
+ PN->removeIncomingValue(Pred, false);
+ }
+ }
+ }
+
+ // If the loops above have made these phi nodes have 0 or 1 operand,
+ // replace them with undef or the input value. We must do this for
+ // correctness, because 0-operand phis are not valid.
+ PN = cast<PHINode>(NewBB->begin());
+ if (PN->getNumIncomingValues() == 0) {
+ BasicBlock::iterator I = NewBB->begin();
+ BasicBlock::const_iterator OldI = OldBB->begin();
+ while ((PN = dyn_cast<PHINode>(I++))) {
+ Value *NV = UndefValue::get(PN->getType());
+ PN->replaceAllUsesWith(NV);
+ assert(ValueMap[OldI] == PN && "ValueMap mismatch");
+ ValueMap[OldI] = NV;
+ PN->eraseFromParent();
+ ++OldI;
+ }
+ }
+ // NOTE: We cannot eliminate single entry phi nodes here, because of
+ // ValueMap. Single entry phi nodes can have multiple ValueMap entries
+ // pointing at them. Thus, deleting one would require scanning the ValueMap
+ // to update any entries in it that would require that. This would be
+ // really slow.
+ }
+
+ // Now that the inlined function body has been fully constructed, go through
+ // and zap unconditional fall-through branches. This happen all the time when
+ // specializing code: code specialization turns conditional branches into
+ // uncond branches, and this code folds them.
+ Function::iterator I = cast<BasicBlock>(ValueMap[&OldFunc->getEntryBlock()]);
+ while (I != NewFunc->end()) {
+ BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
+ if (!BI || BI->isConditional()) { ++I; continue; }
+
+ // Note that we can't eliminate uncond branches if the destination has
+ // single-entry PHI nodes. Eliminating the single-entry phi nodes would
+ // require scanning the ValueMap to update any entries that point to the phi
+ // node.
+ BasicBlock *Dest = BI->getSuccessor(0);
+ if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) {
+ ++I; continue;
+ }
+
+ // We know all single-entry PHI nodes in the inlined function have been
+ // removed, so we just need to splice the blocks.
+ BI->eraseFromParent();
+
+ // Move all the instructions in the succ to the pred.
+ I->getInstList().splice(I->end(), Dest->getInstList());
+
+ // Make all PHI nodes that referred to Dest now refer to I as their source.
+ Dest->replaceAllUsesWith(I);
+
+ // Remove the dest block.
+ Dest->eraseFromParent();
+
+ // Do not increment I, iteratively merge all things this block branches to.
+ }
+}
diff --git a/lib/Transforms/Utils/CloneLoop.cpp b/lib/Transforms/Utils/CloneLoop.cpp
new file mode 100644
index 0000000..7e000a1
--- /dev/null
+++ b/lib/Transforms/Utils/CloneLoop.cpp
@@ -0,0 +1,152 @@
+//===- CloneLoop.cpp - Clone loop nest ------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneLoop interface which makes a copy of a loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/ADT/DenseMap.h"
+
+
+using namespace llvm;
+
+/// CloneDominatorInfo - Clone basicblock's dominator tree and, if available,
+/// dominance info. It is expected that basic block is already cloned.
+static void CloneDominatorInfo(BasicBlock *BB,
+ DenseMap<const Value *, Value *> &ValueMap,
+ DominatorTree *DT,
+ DominanceFrontier *DF) {
+
+ assert (DT && "DominatorTree is not available");
+ DenseMap<const Value *, Value*>::iterator BI = ValueMap.find(BB);
+ assert (BI != ValueMap.end() && "BasicBlock clone is missing");
+ BasicBlock *NewBB = cast<BasicBlock>(BI->second);
+
+ // NewBB already got dominator info.
+ if (DT->getNode(NewBB))
+ return;
+
+ assert (DT->getNode(BB) && "BasicBlock does not have dominator info");
+ // Entry block is not expected here. Infinite loops are not to cloned.
+ assert (DT->getNode(BB)->getIDom() && "BasicBlock does not have immediate dominator");
+ BasicBlock *BBDom = DT->getNode(BB)->getIDom()->getBlock();
+
+ // NewBB's dominator is either BB's dominator or BB's dominator's clone.
+ BasicBlock *NewBBDom = BBDom;
+ DenseMap<const Value *, Value*>::iterator BBDomI = ValueMap.find(BBDom);
+ if (BBDomI != ValueMap.end()) {
+ NewBBDom = cast<BasicBlock>(BBDomI->second);
+ if (!DT->getNode(NewBBDom))
+ CloneDominatorInfo(BBDom, ValueMap, DT, DF);
+ }
+ DT->addNewBlock(NewBB, NewBBDom);
+
+ // Copy cloned dominance frontiner set
+ if (DF) {
+ DominanceFrontier::DomSetType NewDFSet;
+ DominanceFrontier::iterator DFI = DF->find(BB);
+ if ( DFI != DF->end()) {
+ DominanceFrontier::DomSetType S = DFI->second;
+ for (DominanceFrontier::DomSetType::iterator I = S.begin(), E = S.end();
+ I != E; ++I) {
+ BasicBlock *DB = *I;
+ DenseMap<const Value*, Value*>::iterator IDM = ValueMap.find(DB);
+ if (IDM != ValueMap.end())
+ NewDFSet.insert(cast<BasicBlock>(IDM->second));
+ else
+ NewDFSet.insert(DB);
+ }
+ }
+ DF->addBasicBlock(NewBB, NewDFSet);
+ }
+}
+
+/// CloneLoop - Clone Loop. Clone dominator info. Populate ValueMap
+/// using old blocks to new blocks mapping.
+Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI,
+ DenseMap<const Value *, Value *> &ValueMap, Pass *P) {
+
+ DominatorTree *DT = NULL;
+ DominanceFrontier *DF = NULL;
+ if (P) {
+ DT = P->getAnalysisIfAvailable<DominatorTree>();
+ DF = P->getAnalysisIfAvailable<DominanceFrontier>();
+ }
+
+ SmallVector<BasicBlock *, 16> NewBlocks;
+
+ // Populate loop nest.
+ SmallVector<Loop *, 8> LoopNest;
+ LoopNest.push_back(OrigL);
+
+
+ Loop *NewParentLoop = NULL;
+ while (!LoopNest.empty()) {
+ Loop *L = LoopNest.pop_back_val();
+ Loop *NewLoop = new Loop();
+
+ if (!NewParentLoop)
+ NewParentLoop = NewLoop;
+
+ LPM->insertLoop(NewLoop, L->getParentLoop());
+
+ // Clone Basic Blocks.
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I) {
+ BasicBlock *BB = *I;
+ BasicBlock *NewBB = CloneBasicBlock(BB, ValueMap, ".clone");
+ ValueMap[BB] = NewBB;
+ if (P)
+ LPM->cloneBasicBlockSimpleAnalysis(BB, NewBB, L);
+ NewLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ NewBlocks.push_back(NewBB);
+ }
+
+ // Clone dominator info.
+ if (DT)
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I) {
+ BasicBlock *BB = *I;
+ CloneDominatorInfo(BB, ValueMap, DT, DF);
+ }
+
+ // Process sub loops
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ LoopNest.push_back(*I);
+ }
+
+ // Remap instructions to reference operands from ValueMap.
+ for(SmallVector<BasicBlock *, 16>::iterator NBItr = NewBlocks.begin(),
+ NBE = NewBlocks.end(); NBItr != NBE; ++NBItr) {
+ BasicBlock *NB = *NBItr;
+ for(BasicBlock::iterator BI = NB->begin(), BE = NB->end();
+ BI != BE; ++BI) {
+ Instruction *Insn = BI;
+ for (unsigned index = 0, num_ops = Insn->getNumOperands();
+ index != num_ops; ++index) {
+ Value *Op = Insn->getOperand(index);
+ DenseMap<const Value *, Value *>::iterator OpItr = ValueMap.find(Op);
+ if (OpItr != ValueMap.end())
+ Insn->setOperand(index, OpItr->second);
+ }
+ }
+ }
+
+ BasicBlock *Latch = OrigL->getLoopLatch();
+ Function *F = Latch->getParent();
+ F->getBasicBlockList().insert(OrigL->getHeader(),
+ NewBlocks.begin(), NewBlocks.end());
+
+
+ return NewParentLoop;
+}
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
new file mode 100644
index 0000000..337fa8a
--- /dev/null
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -0,0 +1,126 @@
+//===- CloneModule.cpp - Clone an entire module ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneModule interface which makes a copy of an
+// entire module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/Constant.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+/// CloneModule - Return an exact copy of the specified module. This is not as
+/// easy as it might seem because we have to worry about making copies of global
+/// variables and functions, and making their (initializers and references,
+/// respectively) refer to the right globals.
+///
+Module *llvm::CloneModule(const Module *M) {
+ // Create the value map that maps things from the old module over to the new
+ // module.
+ DenseMap<const Value*, Value*> ValueMap;
+ return CloneModule(M, ValueMap);
+}
+
+Module *llvm::CloneModule(const Module *M,
+ DenseMap<const Value*, Value*> &ValueMap) {
+ // First off, we need to create the new module...
+ Module *New = new Module(M->getModuleIdentifier());
+ New->setDataLayout(M->getDataLayout());
+ New->setTargetTriple(M->getTargetTriple());
+ New->setModuleInlineAsm(M->getModuleInlineAsm());
+
+ // Copy all of the type symbol table entries over.
+ const TypeSymbolTable &TST = M->getTypeSymbolTable();
+ for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end();
+ TI != TE; ++TI)
+ New->addTypeName(TI->first, TI->second);
+
+ // Copy all of the dependent libraries over.
+ for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I)
+ New->addLibrary(*I);
+
+ // Loop over all of the global variables, making corresponding globals in the
+ // new module. Here we add them to the ValueMap and to the new Module. We
+ // don't worry about attributes or initializers, they will come later.
+ //
+ for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+ I != E; ++I) {
+ GlobalVariable *GV = new GlobalVariable(I->getType()->getElementType(),
+ false,
+ GlobalValue::ExternalLinkage, 0,
+ I->getName(), New);
+ GV->setAlignment(I->getAlignment());
+ ValueMap[I] = GV;
+ }
+
+ // Loop over the functions in the module, making external functions as before
+ for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+ Function *NF =
+ Function::Create(cast<FunctionType>(I->getType()->getElementType()),
+ GlobalValue::ExternalLinkage, I->getName(), New);
+ NF->copyAttributesFrom(I);
+ ValueMap[I] = NF;
+ }
+
+ // Loop over the aliases in the module
+ for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E; ++I)
+ ValueMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage,
+ I->getName(), NULL, New);
+
+ // Now that all of the things that global variable initializer can refer to
+ // have been created, loop through and copy the global variable referrers
+ // over... We also set the attributes on the global now.
+ //
+ for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+ I != E; ++I) {
+ GlobalVariable *GV = cast<GlobalVariable>(ValueMap[I]);
+ if (I->hasInitializer())
+ GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(),
+ ValueMap)));
+ GV->setLinkage(I->getLinkage());
+ GV->setThreadLocal(I->isThreadLocal());
+ GV->setConstant(I->isConstant());
+ }
+
+ // Similarly, copy over function bodies now...
+ //
+ for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+ Function *F = cast<Function>(ValueMap[I]);
+ if (!I->isDeclaration()) {
+ Function::arg_iterator DestI = F->arg_begin();
+ for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end();
+ ++J) {
+ DestI->setName(J->getName());
+ ValueMap[J] = DestI++;
+ }
+
+ std::vector<ReturnInst*> Returns; // Ignore returns cloned...
+ CloneFunctionInto(F, I, ValueMap, Returns);
+ }
+
+ F->setLinkage(I->getLinkage());
+ }
+
+ // And aliases
+ for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E; ++I) {
+ GlobalAlias *GA = cast<GlobalAlias>(ValueMap[I]);
+ GA->setLinkage(I->getLinkage());
+ if (const Constant* C = I->getAliasee())
+ GA->setAliasee(cast<Constant>(MapValue(C, ValueMap)));
+ }
+
+ return New;
+}
diff --git a/lib/Transforms/Utils/CloneTrace.cpp b/lib/Transforms/Utils/CloneTrace.cpp
new file mode 100644
index 0000000..0711139
--- /dev/null
+++ b/lib/Transforms/Utils/CloneTrace.cpp
@@ -0,0 +1,119 @@
+//===- CloneTrace.cpp - Clone a trace -------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneTrace interface, which is used when writing
+// runtime optimizations. It takes a vector of basic blocks clones the basic
+// blocks, removes internal phi nodes, adds it to the same function as the
+// original (although there is no jump to it) and returns the new vector of
+// basic blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Trace.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+//Clones the trace (a vector of basic blocks)
+std::vector<BasicBlock *>
+llvm::CloneTrace(const std::vector<BasicBlock*> &origTrace) {
+ std::vector<BasicBlock *> clonedTrace;
+ DenseMap<const Value*, Value*> ValueMap;
+
+ //First, loop over all the Basic Blocks in the trace and copy
+ //them using CloneBasicBlock. Also fix the phi nodes during
+ //this loop. To fix the phi nodes, we delete incoming branches
+ //that are not in the trace.
+ for (std::vector<BasicBlock *>::const_iterator T = origTrace.begin(),
+ End = origTrace.end(); T != End; ++T) {
+
+ //Clone Basic Block
+ BasicBlock *clonedBlock =
+ CloneBasicBlock(*T, ValueMap, ".tr", (*T)->getParent());
+
+ //Add it to our new trace
+ clonedTrace.push_back(clonedBlock);
+
+ //Add this new mapping to our Value Map
+ ValueMap[*T] = clonedBlock;
+
+ //Loop over the phi instructions and delete operands
+ //that are from blocks not in the trace
+ //only do this if we are NOT the first block
+ if (T != origTrace.begin()) {
+ for (BasicBlock::iterator I = clonedBlock->begin();
+ isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ //get incoming value for the previous BB
+ Value *V = PN->getIncomingValueForBlock(*(T-1));
+ assert(V && "No incoming value from a BasicBlock in our trace!");
+
+ //remap our phi node to point to incoming value
+ ValueMap[*&I] = V;
+
+ //remove phi node
+ clonedBlock->getInstList().erase(PN);
+ }
+ }
+ }
+
+ //Second loop to do the remapping
+ for (std::vector<BasicBlock *>::const_iterator BB = clonedTrace.begin(),
+ BE = clonedTrace.end(); BB != BE; ++BB) {
+ for (BasicBlock::iterator I = (*BB)->begin(); I != (*BB)->end(); ++I) {
+ //Loop over all the operands of the instruction
+ for (unsigned op=0, E = I->getNumOperands(); op != E; ++op) {
+ const Value *Op = I->getOperand(op);
+
+ //Get it out of the value map
+ Value *V = ValueMap[Op];
+
+ //If not in the value map, then its outside our trace so ignore
+ if (V != 0)
+ I->setOperand(op,V);
+ }
+ }
+ }
+
+ //return new vector of basic blocks
+ return clonedTrace;
+}
+
+/// CloneTraceInto - Clone T into NewFunc. Original<->clone mapping is
+/// saved in ValueMap.
+///
+void llvm::CloneTraceInto(Function *NewFunc, Trace &T,
+ DenseMap<const Value*, Value*> &ValueMap,
+ const char *NameSuffix) {
+ assert(NameSuffix && "NameSuffix cannot be null!");
+
+ // Loop over all of the basic blocks in the trace, cloning them as
+ // appropriate.
+ //
+ for (Trace::const_iterator BI = T.begin(), BE = T.end(); BI != BE; ++BI) {
+ const BasicBlock *BB = *BI;
+
+ // Create a new basic block and copy instructions into it!
+ BasicBlock *CBB = CloneBasicBlock(BB, ValueMap, NameSuffix, NewFunc);
+ ValueMap[BB] = CBB; // Add basic block mapping.
+ }
+
+ // Loop over all of the instructions in the new function, fixing up operand
+ // references as we go. This uses ValueMap to do all the hard work.
+ //
+ for (Function::iterator BB =
+ cast<BasicBlock>(ValueMap[T.getEntryBasicBlock()]),
+ BE = NewFunc->end(); BB != BE; ++BB)
+ // Loop over all instructions, fixing each one as we find it...
+ for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
+ RemapInstruction(II, ValueMap);
+}
+
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
new file mode 100644
index 0000000..6d5904e
--- /dev/null
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -0,0 +1,746 @@
+//===- CodeExtractor.cpp - Pull code region into a new function -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interface to tear out a code region, such as an
+// individual loop or a parallel section, into a new function, replacing it with
+// a call to the new function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <set>
+using namespace llvm;
+
+// Provide a command-line option to aggregate function arguments into a struct
+// for functions produced by the code extractor. This is useful when converting
+// extracted functions to pthread-based code, as only one argument (void*) can
+// be passed in to pthread_create().
+static cl::opt<bool>
+AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
+ cl::desc("Aggregate arguments to code-extracted functions"));
+
+namespace {
+ class VISIBILITY_HIDDEN CodeExtractor {
+ typedef std::vector<Value*> Values;
+ std::set<BasicBlock*> BlocksToExtract;
+ DominatorTree* DT;
+ bool AggregateArgs;
+ unsigned NumExitBlocks;
+ const Type *RetTy;
+ public:
+ CodeExtractor(DominatorTree* dt = 0, bool AggArgs = false)
+ : DT(dt), AggregateArgs(AggArgs||AggregateArgsOpt), NumExitBlocks(~0U) {}
+
+ Function *ExtractCodeRegion(const std::vector<BasicBlock*> &code);
+
+ bool isEligible(const std::vector<BasicBlock*> &code);
+
+ private:
+ /// definedInRegion - Return true if the specified value is defined in the
+ /// extracted region.
+ bool definedInRegion(Value *V) const {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (BlocksToExtract.count(I->getParent()))
+ return true;
+ return false;
+ }
+
+ /// definedInCaller - Return true if the specified value is defined in the
+ /// function being code extracted, but not in the region being extracted.
+ /// These values must be passed in as live-ins to the function.
+ bool definedInCaller(Value *V) const {
+ if (isa<Argument>(V)) return true;
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (!BlocksToExtract.count(I->getParent()))
+ return true;
+ return false;
+ }
+
+ void severSplitPHINodes(BasicBlock *&Header);
+ void splitReturnBlocks();
+ void findInputsOutputs(Values &inputs, Values &outputs);
+
+ Function *constructFunction(const Values &inputs,
+ const Values &outputs,
+ BasicBlock *header,
+ BasicBlock *newRootNode, BasicBlock *newHeader,
+ Function *oldFunction, Module *M);
+
+ void moveCodeToFunction(Function *newFunction);
+
+ void emitCallAndSwitchStatement(Function *newFunction,
+ BasicBlock *newHeader,
+ Values &inputs,
+ Values &outputs);
+
+ };
+}
+
+/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the
+/// region, we need to split the entry block of the region so that the PHI node
+/// is easier to deal with.
+void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
+ bool HasPredsFromRegion = false;
+ unsigned NumPredsOutsideRegion = 0;
+
+ if (Header != &Header->getParent()->getEntryBlock()) {
+ PHINode *PN = dyn_cast<PHINode>(Header->begin());
+ if (!PN) return; // No PHI nodes.
+
+ // If the header node contains any PHI nodes, check to see if there is more
+ // than one entry from outside the region. If so, we need to sever the
+ // header block into two.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (BlocksToExtract.count(PN->getIncomingBlock(i)))
+ HasPredsFromRegion = true;
+ else
+ ++NumPredsOutsideRegion;
+
+ // If there is one (or fewer) predecessor from outside the region, we don't
+ // need to do anything special.
+ if (NumPredsOutsideRegion <= 1) return;
+ }
+
+ // Otherwise, we need to split the header block into two pieces: one
+ // containing PHI nodes merging values from outside of the region, and a
+ // second that contains all of the code for the block and merges back any
+ // incoming values from inside of the region.
+ BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI();
+ BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs,
+ Header->getName()+".ce");
+
+ // We only want to code extract the second block now, and it becomes the new
+ // header of the region.
+ BasicBlock *OldPred = Header;
+ BlocksToExtract.erase(OldPred);
+ BlocksToExtract.insert(NewBB);
+ Header = NewBB;
+
+ // Okay, update dominator sets. The blocks that dominate the new one are the
+ // blocks that dominate TIBB plus the new block itself.
+ if (DT)
+ DT->splitBlock(NewBB);
+
+ // Okay, now we need to adjust the PHI nodes and any branches from within the
+ // region to go to the new header block instead of the old header block.
+ if (HasPredsFromRegion) {
+ PHINode *PN = cast<PHINode>(OldPred->begin());
+ // Loop over all of the predecessors of OldPred that are in the region,
+ // changing them to branch to NewBB instead.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (BlocksToExtract.count(PN->getIncomingBlock(i))) {
+ TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator();
+ TI->replaceUsesOfWith(OldPred, NewBB);
+ }
+
+ // Okay, everthing within the region is now branching to the right block, we
+ // just have to update the PHI nodes now, inserting PHI nodes into NewBB.
+ for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) {
+ PHINode *PN = cast<PHINode>(AfterPHIs);
+ // Create a new PHI node in the new region, which has an incoming value
+ // from OldPred of PN.
+ PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".ce",
+ NewBB->begin());
+ NewPN->addIncoming(PN, OldPred);
+
+ // Loop over all of the incoming value in PN, moving them to NewPN if they
+ // are from the extracted region.
+ for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
+ if (BlocksToExtract.count(PN->getIncomingBlock(i))) {
+ NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i));
+ PN->removeIncomingValue(i);
+ --i;
+ }
+ }
+ }
+ }
+}
+
+void CodeExtractor::splitReturnBlocks() {
+ for (std::set<BasicBlock*>::iterator I = BlocksToExtract.begin(),
+ E = BlocksToExtract.end(); I != E; ++I)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator()))
+ (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
+}
+
+// findInputsOutputs - Find inputs to, outputs from the code region.
+//
+void CodeExtractor::findInputsOutputs(Values &inputs, Values &outputs) {
+ std::set<BasicBlock*> ExitBlocks;
+ for (std::set<BasicBlock*>::const_iterator ci = BlocksToExtract.begin(),
+ ce = BlocksToExtract.end(); ci != ce; ++ci) {
+ BasicBlock *BB = *ci;
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ // If a used value is defined outside the region, it's an input. If an
+ // instruction is used outside the region, it's an output.
+ for (User::op_iterator O = I->op_begin(), E = I->op_end(); O != E; ++O)
+ if (definedInCaller(*O))
+ inputs.push_back(*O);
+
+ // Consider uses of this instruction (outputs).
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI)
+ if (!definedInRegion(*UI)) {
+ outputs.push_back(I);
+ break;
+ }
+ } // for: insts
+
+ // Keep track of the exit blocks from the region.
+ TerminatorInst *TI = BB->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ if (!BlocksToExtract.count(TI->getSuccessor(i)))
+ ExitBlocks.insert(TI->getSuccessor(i));
+ } // for: basic blocks
+
+ NumExitBlocks = ExitBlocks.size();
+
+ // Eliminate duplicates.
+ std::sort(inputs.begin(), inputs.end());
+ inputs.erase(std::unique(inputs.begin(), inputs.end()), inputs.end());
+ std::sort(outputs.begin(), outputs.end());
+ outputs.erase(std::unique(outputs.begin(), outputs.end()), outputs.end());
+}
+
+/// constructFunction - make a function based on inputs and outputs, as follows:
+/// f(in0, ..., inN, out0, ..., outN)
+///
+Function *CodeExtractor::constructFunction(const Values &inputs,
+ const Values &outputs,
+ BasicBlock *header,
+ BasicBlock *newRootNode,
+ BasicBlock *newHeader,
+ Function *oldFunction,
+ Module *M) {
+ DOUT << "inputs: " << inputs.size() << "\n";
+ DOUT << "outputs: " << outputs.size() << "\n";
+
+ // This function returns unsigned, outputs will go back by reference.
+ switch (NumExitBlocks) {
+ case 0:
+ case 1: RetTy = Type::VoidTy; break;
+ case 2: RetTy = Type::Int1Ty; break;
+ default: RetTy = Type::Int16Ty; break;
+ }
+
+ std::vector<const Type*> paramTy;
+
+ // Add the types of the input values to the function's argument list
+ for (Values::const_iterator i = inputs.begin(),
+ e = inputs.end(); i != e; ++i) {
+ const Value *value = *i;
+ DOUT << "value used in func: " << *value << "\n";
+ paramTy.push_back(value->getType());
+ }
+
+ // Add the types of the output values to the function's argument list.
+ for (Values::const_iterator I = outputs.begin(), E = outputs.end();
+ I != E; ++I) {
+ DOUT << "instr used in func: " << **I << "\n";
+ if (AggregateArgs)
+ paramTy.push_back((*I)->getType());
+ else
+ paramTy.push_back(PointerType::getUnqual((*I)->getType()));
+ }
+
+ DOUT << "Function type: " << *RetTy << " f(";
+ for (std::vector<const Type*>::iterator i = paramTy.begin(),
+ e = paramTy.end(); i != e; ++i)
+ DOUT << **i << ", ";
+ DOUT << ")\n";
+
+ if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
+ PointerType *StructPtr = PointerType::getUnqual(StructType::get(paramTy));
+ paramTy.clear();
+ paramTy.push_back(StructPtr);
+ }
+ const FunctionType *funcType = FunctionType::get(RetTy, paramTy, false);
+
+ // Create the new function
+ Function *newFunction = Function::Create(funcType,
+ GlobalValue::InternalLinkage,
+ oldFunction->getName() + "_" +
+ header->getName(), M);
+ // If the old function is no-throw, so is the new one.
+ if (oldFunction->doesNotThrow())
+ newFunction->setDoesNotThrow(true);
+
+ newFunction->getBasicBlockList().push_back(newRootNode);
+
+ // Create an iterator to name all of the arguments we inserted.
+ Function::arg_iterator AI = newFunction->arg_begin();
+
+ // Rewrite all users of the inputs in the extracted region to use the
+ // arguments (or appropriate addressing into struct) instead.
+ for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
+ Value *RewriteVal;
+ if (AggregateArgs) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::Int32Ty);
+ Idx[1] = ConstantInt::get(Type::Int32Ty, i);
+ std::string GEPname = "gep_" + inputs[i]->getName();
+ TerminatorInst *TI = newFunction->begin()->getTerminator();
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(AI, Idx, Idx+2,
+ GEPname, TI);
+ RewriteVal = new LoadInst(GEP, "load" + GEPname, TI);
+ } else
+ RewriteVal = AI++;
+
+ std::vector<User*> Users(inputs[i]->use_begin(), inputs[i]->use_end());
+ for (std::vector<User*>::iterator use = Users.begin(), useE = Users.end();
+ use != useE; ++use)
+ if (Instruction* inst = dyn_cast<Instruction>(*use))
+ if (BlocksToExtract.count(inst->getParent()))
+ inst->replaceUsesOfWith(inputs[i], RewriteVal);
+ }
+
+ // Set names for input and output arguments.
+ if (!AggregateArgs) {
+ AI = newFunction->arg_begin();
+ for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI)
+ AI->setName(inputs[i]->getName());
+ for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI)
+ AI->setName(outputs[i]->getName()+".out");
+ }
+
+ // Rewrite branches to basic blocks outside of the loop to new dummy blocks
+ // within the new function. This must be done before we lose track of which
+ // blocks were originally in the code region.
+ std::vector<User*> Users(header->use_begin(), header->use_end());
+ for (unsigned i = 0, e = Users.size(); i != e; ++i)
+ // The BasicBlock which contains the branch is not in the region
+ // modify the branch target to a new block
+ if (TerminatorInst *TI = dyn_cast<TerminatorInst>(Users[i]))
+ if (!BlocksToExtract.count(TI->getParent()) &&
+ TI->getParent()->getParent() == oldFunction)
+ TI->replaceUsesOfWith(header, newHeader);
+
+ return newFunction;
+}
+
+/// emitCallAndSwitchStatement - This method sets up the caller side by adding
+/// the call instruction, splitting any PHI nodes in the header block as
+/// necessary.
+void CodeExtractor::
+emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
+ Values &inputs, Values &outputs) {
+ // Emit a call to the new function, passing in: *pointer to struct (if
+ // aggregating parameters), or plan inputs and allocated memory for outputs
+ std::vector<Value*> params, StructValues, ReloadOutputs;
+
+ // Add inputs as params, or to be filled into the struct
+ for (Values::iterator i = inputs.begin(), e = inputs.end(); i != e; ++i)
+ if (AggregateArgs)
+ StructValues.push_back(*i);
+ else
+ params.push_back(*i);
+
+ // Create allocas for the outputs
+ for (Values::iterator i = outputs.begin(), e = outputs.end(); i != e; ++i) {
+ if (AggregateArgs) {
+ StructValues.push_back(*i);
+ } else {
+ AllocaInst *alloca =
+ new AllocaInst((*i)->getType(), 0, (*i)->getName()+".loc",
+ codeReplacer->getParent()->begin()->begin());
+ ReloadOutputs.push_back(alloca);
+ params.push_back(alloca);
+ }
+ }
+
+ AllocaInst *Struct = 0;
+ if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
+ std::vector<const Type*> ArgTypes;
+ for (Values::iterator v = StructValues.begin(),
+ ve = StructValues.end(); v != ve; ++v)
+ ArgTypes.push_back((*v)->getType());
+
+ // Allocate a struct at the beginning of this function
+ Type *StructArgTy = StructType::get(ArgTypes);
+ Struct =
+ new AllocaInst(StructArgTy, 0, "structArg",
+ codeReplacer->getParent()->begin()->begin());
+ params.push_back(Struct);
+
+ for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::Int32Ty);
+ Idx[1] = ConstantInt::get(Type::Int32Ty, i);
+ GetElementPtrInst *GEP =
+ GetElementPtrInst::Create(Struct, Idx, Idx + 2,
+ "gep_" + StructValues[i]->getName());
+ codeReplacer->getInstList().push_back(GEP);
+ StoreInst *SI = new StoreInst(StructValues[i], GEP);
+ codeReplacer->getInstList().push_back(SI);
+ }
+ }
+
+ // Emit the call to the function
+ CallInst *call = CallInst::Create(newFunction, params.begin(), params.end(),
+ NumExitBlocks > 1 ? "targetBlock" : "");
+ codeReplacer->getInstList().push_back(call);
+
+ Function::arg_iterator OutputArgBegin = newFunction->arg_begin();
+ unsigned FirstOut = inputs.size();
+ if (!AggregateArgs)
+ std::advance(OutputArgBegin, inputs.size());
+
+ // Reload the outputs passed in by reference
+ for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
+ Value *Output = 0;
+ if (AggregateArgs) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::Int32Ty);
+ Idx[1] = ConstantInt::get(Type::Int32Ty, FirstOut + i);
+ GetElementPtrInst *GEP
+ = GetElementPtrInst::Create(Struct, Idx, Idx + 2,
+ "gep_reload_" + outputs[i]->getName());
+ codeReplacer->getInstList().push_back(GEP);
+ Output = GEP;
+ } else {
+ Output = ReloadOutputs[i];
+ }
+ LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload");
+ codeReplacer->getInstList().push_back(load);
+ std::vector<User*> Users(outputs[i]->use_begin(), outputs[i]->use_end());
+ for (unsigned u = 0, e = Users.size(); u != e; ++u) {
+ Instruction *inst = cast<Instruction>(Users[u]);
+ if (!BlocksToExtract.count(inst->getParent()))
+ inst->replaceUsesOfWith(outputs[i], load);
+ }
+ }
+
+ // Now we can emit a switch statement using the call as a value.
+ SwitchInst *TheSwitch =
+ SwitchInst::Create(ConstantInt::getNullValue(Type::Int16Ty),
+ codeReplacer, 0, codeReplacer);
+
+ // Since there may be multiple exits from the original region, make the new
+ // function return an unsigned, switch on that number. This loop iterates
+ // over all of the blocks in the extracted region, updating any terminator
+ // instructions in the to-be-extracted region that branch to blocks that are
+ // not in the region to be extracted.
+ std::map<BasicBlock*, BasicBlock*> ExitBlockMap;
+
+ unsigned switchVal = 0;
+ for (std::set<BasicBlock*>::const_iterator i = BlocksToExtract.begin(),
+ e = BlocksToExtract.end(); i != e; ++i) {
+ TerminatorInst *TI = (*i)->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ if (!BlocksToExtract.count(TI->getSuccessor(i))) {
+ BasicBlock *OldTarget = TI->getSuccessor(i);
+ // add a new basic block which returns the appropriate value
+ BasicBlock *&NewTarget = ExitBlockMap[OldTarget];
+ if (!NewTarget) {
+ // If we don't already have an exit stub for this non-extracted
+ // destination, create one now!
+ NewTarget = BasicBlock::Create(OldTarget->getName() + ".exitStub",
+ newFunction);
+ unsigned SuccNum = switchVal++;
+
+ Value *brVal = 0;
+ switch (NumExitBlocks) {
+ case 0:
+ case 1: break; // No value needed.
+ case 2: // Conditional branch, return a bool
+ brVal = ConstantInt::get(Type::Int1Ty, !SuccNum);
+ break;
+ default:
+ brVal = ConstantInt::get(Type::Int16Ty, SuccNum);
+ break;
+ }
+
+ ReturnInst *NTRet = ReturnInst::Create(brVal, NewTarget);
+
+ // Update the switch instruction.
+ TheSwitch->addCase(ConstantInt::get(Type::Int16Ty, SuccNum),
+ OldTarget);
+
+ // Restore values just before we exit
+ Function::arg_iterator OAI = OutputArgBegin;
+ for (unsigned out = 0, e = outputs.size(); out != e; ++out) {
+ // For an invoke, the normal destination is the only one that is
+ // dominated by the result of the invocation
+ BasicBlock *DefBlock = cast<Instruction>(outputs[out])->getParent();
+
+ bool DominatesDef = true;
+
+ if (InvokeInst *Invoke = dyn_cast<InvokeInst>(outputs[out])) {
+ DefBlock = Invoke->getNormalDest();
+
+ // Make sure we are looking at the original successor block, not
+ // at a newly inserted exit block, which won't be in the dominator
+ // info.
+ for (std::map<BasicBlock*, BasicBlock*>::iterator I =
+ ExitBlockMap.begin(), E = ExitBlockMap.end(); I != E; ++I)
+ if (DefBlock == I->second) {
+ DefBlock = I->first;
+ break;
+ }
+
+ // In the extract block case, if the block we are extracting ends
+ // with an invoke instruction, make sure that we don't emit a
+ // store of the invoke value for the unwind block.
+ if (!DT && DefBlock != OldTarget)
+ DominatesDef = false;
+ }
+
+ if (DT)
+ DominatesDef = DT->dominates(DefBlock, OldTarget);
+
+ if (DominatesDef) {
+ if (AggregateArgs) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::Int32Ty);
+ Idx[1] = ConstantInt::get(Type::Int32Ty,FirstOut+out);
+ GetElementPtrInst *GEP =
+ GetElementPtrInst::Create(OAI, Idx, Idx + 2,
+ "gep_" + outputs[out]->getName(),
+ NTRet);
+ new StoreInst(outputs[out], GEP, NTRet);
+ } else {
+ new StoreInst(outputs[out], OAI, NTRet);
+ }
+ }
+ // Advance output iterator even if we don't emit a store
+ if (!AggregateArgs) ++OAI;
+ }
+ }
+
+ // rewrite the original branch instruction with this new target
+ TI->setSuccessor(i, NewTarget);
+ }
+ }
+
+ // Now that we've done the deed, simplify the switch instruction.
+ const Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType();
+ switch (NumExitBlocks) {
+ case 0:
+ // There are no successors (the block containing the switch itself), which
+ // means that previously this was the last part of the function, and hence
+ // this should be rewritten as a `ret'
+
+ // Check if the function should return a value
+ if (OldFnRetTy == Type::VoidTy) {
+ ReturnInst::Create(0, TheSwitch); // Return void
+ } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) {
+ // return what we have
+ ReturnInst::Create(TheSwitch->getCondition(), TheSwitch);
+ } else {
+ // Otherwise we must have code extracted an unwind or something, just
+ // return whatever we want.
+ ReturnInst::Create(Constant::getNullValue(OldFnRetTy), TheSwitch);
+ }
+
+ TheSwitch->eraseFromParent();
+ break;
+ case 1:
+ // Only a single destination, change the switch into an unconditional
+ // branch.
+ BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch);
+ TheSwitch->eraseFromParent();
+ break;
+ case 2:
+ BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2),
+ call, TheSwitch);
+ TheSwitch->eraseFromParent();
+ break;
+ default:
+ // Otherwise, make the default destination of the switch instruction be one
+ // of the other successors.
+ TheSwitch->setOperand(0, call);
+ TheSwitch->setSuccessor(0, TheSwitch->getSuccessor(NumExitBlocks));
+ TheSwitch->removeCase(NumExitBlocks); // Remove redundant case
+ break;
+ }
+}
+
+void CodeExtractor::moveCodeToFunction(Function *newFunction) {
+ Function *oldFunc = (*BlocksToExtract.begin())->getParent();
+ Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList();
+ Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList();
+
+ for (std::set<BasicBlock*>::const_iterator i = BlocksToExtract.begin(),
+ e = BlocksToExtract.end(); i != e; ++i) {
+ // Delete the basic block from the old function, and the list of blocks
+ oldBlocks.remove(*i);
+
+ // Insert this basic block into the new function
+ newBlocks.push_back(*i);
+ }
+}
+
+/// ExtractRegion - Removes a loop from a function, replaces it with a call to
+/// new function. Returns pointer to the new function.
+///
+/// algorithm:
+///
+/// find inputs and outputs for the region
+///
+/// for inputs: add to function as args, map input instr* to arg#
+/// for outputs: add allocas for scalars,
+/// add to func as args, map output instr* to arg#
+///
+/// rewrite func to use argument #s instead of instr*
+///
+/// for each scalar output in the function: at every exit, store intermediate
+/// computed result back into memory.
+///
+Function *CodeExtractor::
+ExtractCodeRegion(const std::vector<BasicBlock*> &code) {
+ if (!isEligible(code))
+ return 0;
+
+ // 1) Find inputs, outputs
+ // 2) Construct new function
+ // * Add allocas for defs, pass as args by reference
+ // * Pass in uses as args
+ // 3) Move code region, add call instr to func
+ //
+ BlocksToExtract.insert(code.begin(), code.end());
+
+ Values inputs, outputs;
+
+ // Assumption: this is a single-entry code region, and the header is the first
+ // block in the region.
+ BasicBlock *header = code[0];
+
+ for (unsigned i = 1, e = code.size(); i != e; ++i)
+ for (pred_iterator PI = pred_begin(code[i]), E = pred_end(code[i]);
+ PI != E; ++PI)
+ assert(BlocksToExtract.count(*PI) &&
+ "No blocks in this region may have entries from outside the region"
+ " except for the first block!");
+
+ // If we have to split PHI nodes or the entry block, do so now.
+ severSplitPHINodes(header);
+
+ // If we have any return instructions in the region, split those blocks so
+ // that the return is not in the region.
+ splitReturnBlocks();
+
+ Function *oldFunction = header->getParent();
+
+ // This takes place of the original loop
+ BasicBlock *codeReplacer = BasicBlock::Create("codeRepl", oldFunction,
+ header);
+
+ // The new function needs a root node because other nodes can branch to the
+ // head of the region, but the entry node of a function cannot have preds.
+ BasicBlock *newFuncRoot = BasicBlock::Create("newFuncRoot");
+ newFuncRoot->getInstList().push_back(BranchInst::Create(header));
+
+ // Find inputs to, outputs from the code region.
+ findInputsOutputs(inputs, outputs);
+
+ // Construct new function based on inputs/outputs & add allocas for all defs.
+ Function *newFunction = constructFunction(inputs, outputs, header,
+ newFuncRoot,
+ codeReplacer, oldFunction,
+ oldFunction->getParent());
+
+ emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
+
+ moveCodeToFunction(newFunction);
+
+ // Loop over all of the PHI nodes in the header block, and change any
+ // references to the old incoming edge to be the new incoming edge.
+ for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!BlocksToExtract.count(PN->getIncomingBlock(i)))
+ PN->setIncomingBlock(i, newFuncRoot);
+ }
+
+ // Look at all successors of the codeReplacer block. If any of these blocks
+ // had PHI nodes in them, we need to update the "from" block to be the code
+ // replacer, not the original block in the extracted region.
+ std::vector<BasicBlock*> Succs(succ_begin(codeReplacer),
+ succ_end(codeReplacer));
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+ for (BasicBlock::iterator I = Succs[i]->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ std::set<BasicBlock*> ProcessedPreds;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (BlocksToExtract.count(PN->getIncomingBlock(i))) {
+ if (ProcessedPreds.insert(PN->getIncomingBlock(i)).second)
+ PN->setIncomingBlock(i, codeReplacer);
+ else {
+ // There were multiple entries in the PHI for this block, now there
+ // is only one, so remove the duplicated entries.
+ PN->removeIncomingValue(i, false);
+ --i; --e;
+ }
+ }
+ }
+
+ //cerr << "NEW FUNCTION: " << *newFunction;
+ // verifyFunction(*newFunction);
+
+ // cerr << "OLD FUNCTION: " << *oldFunction;
+ // verifyFunction(*oldFunction);
+
+ DEBUG(if (verifyFunction(*newFunction)) abort());
+ return newFunction;
+}
+
+bool CodeExtractor::isEligible(const std::vector<BasicBlock*> &code) {
+ // Deny code region if it contains allocas or vastarts.
+ for (std::vector<BasicBlock*>::const_iterator BB = code.begin(), e=code.end();
+ BB != e; ++BB)
+ for (BasicBlock::const_iterator I = (*BB)->begin(), Ie = (*BB)->end();
+ I != Ie; ++I)
+ if (isa<AllocaInst>(*I))
+ return false;
+ else if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (const Function *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::vastart)
+ return false;
+ return true;
+}
+
+
+/// ExtractCodeRegion - slurp a sequence of basic blocks into a brand new
+/// function
+///
+Function* llvm::ExtractCodeRegion(DominatorTree &DT,
+ const std::vector<BasicBlock*> &code,
+ bool AggregateArgs) {
+ return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(code);
+}
+
+/// ExtractBasicBlock - slurp a natural loop into a brand new function
+///
+Function* llvm::ExtractLoop(DominatorTree &DT, Loop *L, bool AggregateArgs) {
+ return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(L->getBlocks());
+}
+
+/// ExtractBasicBlock - slurp a basic block into a brand new function
+///
+Function* llvm::ExtractBasicBlock(BasicBlock *BB, bool AggregateArgs) {
+ std::vector<BasicBlock*> Blocks;
+ Blocks.push_back(BB);
+ return CodeExtractor(0, AggregateArgs).ExtractCodeRegion(Blocks);
+}
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
new file mode 100644
index 0000000..b8dd754
--- /dev/null
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -0,0 +1,144 @@
+//===- DemoteRegToStack.cpp - Move a virtual register to the stack --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provide the function DemoteRegToStack(). This function takes a
+// virtual register computed by an Instruction and replaces it with a slot in
+// the stack frame, allocated via alloca. It returns the pointer to the
+// AllocaInst inserted. After this function is called on an instruction, we are
+// guaranteed that the only user of the instruction is a store that is
+// immediately after it.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include <map>
+using namespace llvm;
+
+/// DemoteRegToStack - This function takes a virtual register computed by an
+/// Instruction and replaces it with a slot in the stack frame, allocated via
+/// alloca. This allows the CFG to be changed around without fear of
+/// invalidating the SSA information for the value. It returns the pointer to
+/// the alloca inserted to create a stack slot for I.
+///
+AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
+ Instruction *AllocaPoint) {
+ if (I.use_empty()) {
+ I.eraseFromParent();
+ return 0;
+ }
+
+ // Create a stack slot to hold the value.
+ AllocaInst *Slot;
+ if (AllocaPoint) {
+ Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem", AllocaPoint);
+ } else {
+ Function *F = I.getParent()->getParent();
+ Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem",
+ F->getEntryBlock().begin());
+ }
+
+ // Change all of the users of the instruction to read from the stack slot
+ // instead.
+ while (!I.use_empty()) {
+ Instruction *U = cast<Instruction>(I.use_back());
+ if (PHINode *PN = dyn_cast<PHINode>(U)) {
+ // If this is a PHI node, we can't insert a load of the value before the
+ // use. Instead, insert the load in the predecessor block corresponding
+ // to the incoming value.
+ //
+ // Note that if there are multiple edges from a basic block to this PHI
+ // node that we cannot multiple loads. The problem is that the resultant
+ // PHI node will have multiple values (from each load) coming in from the
+ // same block, which is illegal SSA form. For this reason, we keep track
+ // and reuse loads we insert.
+ std::map<BasicBlock*, Value*> Loads;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == &I) {
+ Value *&V = Loads[PN->getIncomingBlock(i)];
+ if (V == 0) {
+ // Insert the load into the predecessor block
+ V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads,
+ PN->getIncomingBlock(i)->getTerminator());
+ }
+ PN->setIncomingValue(i, V);
+ }
+
+ } else {
+ // If this is a normal instruction, just insert a load.
+ Value *V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, U);
+ U->replaceUsesOfWith(&I, V);
+ }
+ }
+
+
+ // Insert stores of the computed value into the stack slot. We have to be
+ // careful is I is an invoke instruction though, because we can't insert the
+ // store AFTER the terminator instruction.
+ BasicBlock::iterator InsertPt;
+ if (!isa<TerminatorInst>(I)) {
+ InsertPt = &I;
+ ++InsertPt;
+ } else {
+ // We cannot demote invoke instructions to the stack if their normal edge
+ // is critical.
+ InvokeInst &II = cast<InvokeInst>(I);
+ assert(II.getNormalDest()->getSinglePredecessor() &&
+ "Cannot demote invoke with a critical successor!");
+ InsertPt = II.getNormalDest()->begin();
+ }
+
+ for (; isa<PHINode>(InsertPt); ++InsertPt)
+ /* empty */; // Don't insert before any PHI nodes.
+ new StoreInst(&I, Slot, InsertPt);
+
+ return Slot;
+}
+
+
+/// DemotePHIToStack - This function takes a virtual register computed by a phi
+/// node and replaces it with a slot in the stack frame, allocated via alloca.
+/// The phi node is deleted and it returns the pointer to the alloca inserted.
+AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
+ if (P->use_empty()) {
+ P->eraseFromParent();
+ return 0;
+ }
+
+ // Create a stack slot to hold the value.
+ AllocaInst *Slot;
+ if (AllocaPoint) {
+ Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem", AllocaPoint);
+ } else {
+ Function *F = P->getParent()->getParent();
+ Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem",
+ F->getEntryBlock().begin());
+ }
+
+ // Iterate over each operand, insert store in each predecessor.
+ for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) {
+ assert(II->getParent() != P->getIncomingBlock(i) &&
+ "Invoke edge not supported yet"); II=II;
+ }
+ new StoreInst(P->getIncomingValue(i), Slot,
+ P->getIncomingBlock(i)->getTerminator());
+ }
+
+ // Insert load in place of the phi and replace all uses.
+ Value *V = new LoadInst(Slot, P->getName()+".reload", P);
+ P->replaceAllUsesWith(V);
+
+ // Delete phi.
+ P->eraseFromParent();
+
+ return Slot;
+}
diff --git a/lib/Transforms/Utils/InlineCost.cpp b/lib/Transforms/Utils/InlineCost.cpp
new file mode 100644
index 0000000..87aff01
--- /dev/null
+++ b/lib/Transforms/Utils/InlineCost.cpp
@@ -0,0 +1,315 @@
+//===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements inline cost analysis.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/Transforms/Utils/InlineCost.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/CallingConv.h"
+#include "llvm/IntrinsicInst.h"
+
+using namespace llvm;
+
+// CountCodeReductionForConstant - Figure out an approximation for how many
+// instructions will be constant folded if the specified value is constant.
+//
+unsigned InlineCostAnalyzer::FunctionInfo::
+ CountCodeReductionForConstant(Value *V) {
+ unsigned Reduction = 0;
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
+ if (isa<BranchInst>(*UI))
+ Reduction += 40; // Eliminating a conditional branch is a big win
+ else if (SwitchInst *SI = dyn_cast<SwitchInst>(*UI))
+ // Eliminating a switch is a big win, proportional to the number of edges
+ // deleted.
+ Reduction += (SI->getNumSuccessors()-1) * 40;
+ else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
+ // Turning an indirect call into a direct call is a BIG win
+ Reduction += CI->getCalledValue() == V ? 500 : 0;
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) {
+ // Turning an indirect call into a direct call is a BIG win
+ Reduction += II->getCalledValue() == V ? 500 : 0;
+ } else {
+ // Figure out if this instruction will be removed due to simple constant
+ // propagation.
+ Instruction &Inst = cast<Instruction>(**UI);
+ bool AllOperandsConstant = true;
+ for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
+ if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
+ AllOperandsConstant = false;
+ break;
+ }
+
+ if (AllOperandsConstant) {
+ // We will get to remove this instruction...
+ Reduction += 7;
+
+ // And any other instructions that use it which become constants
+ // themselves.
+ Reduction += CountCodeReductionForConstant(&Inst);
+ }
+ }
+
+ return Reduction;
+}
+
+// CountCodeReductionForAlloca - Figure out an approximation of how much smaller
+// the function will be if it is inlined into a context where an argument
+// becomes an alloca.
+//
+unsigned InlineCostAnalyzer::FunctionInfo::
+ CountCodeReductionForAlloca(Value *V) {
+ if (!isa<PointerType>(V->getType())) return 0; // Not a pointer
+ unsigned Reduction = 0;
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+ Instruction *I = cast<Instruction>(*UI);
+ if (isa<LoadInst>(I) || isa<StoreInst>(I))
+ Reduction += 10;
+ else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ // If the GEP has variable indices, we won't be able to do much with it.
+ if (!GEP->hasAllConstantIndices())
+ Reduction += CountCodeReductionForAlloca(GEP)+15;
+ } else {
+ // If there is some other strange instruction, we're not going to be able
+ // to do much if we inline this.
+ return 0;
+ }
+ }
+
+ return Reduction;
+}
+
+/// analyzeFunction - Fill in the current structure with information gleaned
+/// from the specified function.
+void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
+ unsigned NumInsts = 0, NumBlocks = 0, NumVectorInsts = 0;
+
+ // Look at the size of the callee. Each basic block counts as 20 units, and
+ // each instruction counts as 5.
+ for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
+ II != E; ++II) {
+ if (isa<PHINode>(II)) continue; // PHI nodes don't count.
+
+ // Special handling for calls.
+ if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
+ if (isa<DbgInfoIntrinsic>(II))
+ continue; // Debug intrinsics don't count as size.
+
+ CallSite CS = CallSite::get(const_cast<Instruction*>(&*II));
+
+ // If this function contains a call to setjmp or _setjmp, never inline
+ // it. This is a hack because we depend on the user marking their local
+ // variables as volatile if they are live across a setjmp call, and they
+ // probably won't do this in callers.
+ if (Function *F = CS.getCalledFunction())
+ if (F->isDeclaration() &&
+ (F->isName("setjmp") || F->isName("_setjmp"))) {
+ NeverInline = true;
+ return;
+ }
+
+ // Calls often compile into many machine instructions. Bump up their
+ // cost to reflect this.
+ if (!isa<IntrinsicInst>(II))
+ NumInsts += 5;
+ }
+
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (!AI->isStaticAlloca())
+ this->usesDynamicAlloca = true;
+ }
+
+ if (isa<ExtractElementInst>(II) || isa<VectorType>(II->getType()))
+ ++NumVectorInsts;
+
+ // Noop casts, including ptr <-> int, don't count.
+ if (const CastInst *CI = dyn_cast<CastInst>(II)) {
+ if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) ||
+ isa<PtrToIntInst>(CI))
+ continue;
+ } else if (const GetElementPtrInst *GEPI =
+ dyn_cast<GetElementPtrInst>(II)) {
+ // If a GEP has all constant indices, it will probably be folded with
+ // a load/store.
+ if (GEPI->hasAllConstantIndices())
+ continue;
+ }
+
+ ++NumInsts;
+ }
+
+ ++NumBlocks;
+ }
+
+ this->NumBlocks = NumBlocks;
+ this->NumInsts = NumInsts;
+ this->NumVectorInsts = NumVectorInsts;
+
+ // Check out all of the arguments to the function, figuring out how much
+ // code can be eliminated if one of the arguments is a constant.
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
+ ArgumentWeights.push_back(ArgInfo(CountCodeReductionForConstant(I),
+ CountCodeReductionForAlloca(I)));
+}
+
+
+
+// getInlineCost - The heuristic used to determine if we should inline the
+// function call or not.
+//
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
+ SmallPtrSet<const Function *, 16> &NeverInline) {
+ Instruction *TheCall = CS.getInstruction();
+ Function *Callee = CS.getCalledFunction();
+ Function *Caller = TheCall->getParent()->getParent();
+
+ // Don't inline functions which can be redefined at link-time to mean
+ // something else.
+ if (Callee->mayBeOverridden() ||
+ // Don't inline functions marked noinline.
+ Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee))
+ return llvm::InlineCost::getNever();
+
+ // InlineCost - This value measures how good of an inline candidate this call
+ // site is to inline. A lower inline cost make is more likely for the call to
+ // be inlined. This value may go negative.
+ //
+ int InlineCost = 0;
+
+ // If there is only one call of the function, and it has internal linkage,
+ // make it almost guaranteed to be inlined.
+ //
+ if ((Callee->hasLocalLinkage() || Callee->hasAvailableExternallyLinkage()) &&
+ Callee->hasOneUse())
+ InlineCost -= 15000;
+
+ // If this function uses the coldcc calling convention, prefer not to inline
+ // it.
+ if (Callee->getCallingConv() == CallingConv::Cold)
+ InlineCost += 2000;
+
+ // If the instruction after the call, or if the normal destination of the
+ // invoke is an unreachable instruction, the function is noreturn. As such,
+ // there is little point in inlining this.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+ if (isa<UnreachableInst>(II->getNormalDest()->begin()))
+ InlineCost += 10000;
+ } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall)))
+ InlineCost += 10000;
+
+ // Get information about the callee...
+ FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CalleeFI.NumBlocks == 0)
+ CalleeFI.analyzeFunction(Callee);
+
+ // If we should never inline this, return a huge cost.
+ if (CalleeFI.NeverInline)
+ return InlineCost::getNever();
+
+ // FIXME: It would be nice to kill off CalleeFI.NeverInline. Then we
+ // could move this up and avoid computing the FunctionInfo for
+ // things we are going to just return always inline for. This
+ // requires handling setjmp somewhere else, however.
+ if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline))
+ return InlineCost::getAlways();
+
+ if (CalleeFI.usesDynamicAlloca) {
+ // Get infomation about the caller...
+ FunctionInfo &CallerFI = CachedFunctionInfo[Caller];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CallerFI.NumBlocks == 0)
+ CallerFI.analyzeFunction(Caller);
+
+ // Don't inline a callee with dynamic alloca into a caller without them.
+ // Functions containing dynamic alloca's are inefficient in various ways;
+ // don't create more inefficiency.
+ if (!CallerFI.usesDynamicAlloca)
+ return InlineCost::getNever();
+ }
+
+ // Add to the inline quality for properties that make the call valuable to
+ // inline. This includes factors that indicate that the result of inlining
+ // the function will be optimizable. Currently this just looks at arguments
+ // passed into the function.
+ //
+ unsigned ArgNo = 0;
+ for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+ I != E; ++I, ++ArgNo) {
+ // Each argument passed in has a cost at both the caller and the callee
+ // sides. This favors functions that take many arguments over functions
+ // that take few arguments.
+ InlineCost -= 20;
+
+ // If this is a function being passed in, it is very likely that we will be
+ // able to turn an indirect function call into a direct function call.
+ if (isa<Function>(I))
+ InlineCost -= 100;
+
+ // If an alloca is passed in, inlining this function is likely to allow
+ // significant future optimization possibilities (like scalar promotion, and
+ // scalarization), so encourage the inlining of the function.
+ //
+ else if (isa<AllocaInst>(I)) {
+ if (ArgNo < CalleeFI.ArgumentWeights.size())
+ InlineCost -= CalleeFI.ArgumentWeights[ArgNo].AllocaWeight;
+
+ // If this is a constant being passed into the function, use the argument
+ // weights calculated for the callee to determine how much will be folded
+ // away with this information.
+ } else if (isa<Constant>(I)) {
+ if (ArgNo < CalleeFI.ArgumentWeights.size())
+ InlineCost -= CalleeFI.ArgumentWeights[ArgNo].ConstantWeight;
+ }
+ }
+
+ // Now that we have considered all of the factors that make the call site more
+ // likely to be inlined, look at factors that make us not want to inline it.
+
+ // Don't inline into something too big, which would make it bigger.
+ //
+ InlineCost += Caller->size()/15;
+
+ // Look at the size of the callee. Each instruction counts as 5.
+ InlineCost += CalleeFI.NumInsts*5;
+
+ return llvm::InlineCost::get(InlineCost);
+}
+
+// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
+// higher threshold to determine if the function call should be inlined.
+float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
+ Function *Callee = CS.getCalledFunction();
+
+ // Get information about the callee...
+ FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CalleeFI.NumBlocks == 0)
+ CalleeFI.analyzeFunction(Callee);
+
+ float Factor = 1.0f;
+ // Single BB functions are often written to be inlined.
+ if (CalleeFI.NumBlocks == 1)
+ Factor += 0.5f;
+
+ // Be more aggressive if the function contains a good chunk (if it mades up
+ // at least 10% of the instructions) of vector instructions.
+ if (CalleeFI.NumVectorInsts > CalleeFI.NumInsts/2)
+ Factor += 2.0f;
+ else if (CalleeFI.NumVectorInsts > CalleeFI.NumInsts/10)
+ Factor += 1.5f;
+ return Factor;
+}
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
new file mode 100644
index 0000000..4989c00
--- /dev/null
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -0,0 +1,656 @@
+//===- InlineFunction.cpp - Code to perform function inlining -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements inlining of a function into a call site, resolving
+// parameters and the return value as appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Attributes.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CallSite.h"
+using namespace llvm;
+
+bool llvm::InlineFunction(CallInst *CI, CallGraph *CG, const TargetData *TD) {
+ return InlineFunction(CallSite(CI), CG, TD);
+}
+bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD) {
+ return InlineFunction(CallSite(II), CG, TD);
+}
+
+/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
+/// in the body of the inlined function into invokes and turn unwind
+/// instructions into branches to the invoke unwind dest.
+///
+/// II is the invoke instruction being inlined. FirstNewBlock is the first
+/// block of the inlined code (the last block is the end of the function),
+/// and InlineCodeInfo is information about the code that got inlined.
+static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
+ ClonedCodeInfo &InlinedCodeInfo,
+ CallGraph *CG) {
+ BasicBlock *InvokeDest = II->getUnwindDest();
+ std::vector<Value*> InvokeDestPHIValues;
+
+ // If there are PHI nodes in the unwind destination block, we need to
+ // keep track of which values came into them from this invoke, then remove
+ // the entry for this block.
+ BasicBlock *InvokeBlock = II->getParent();
+ for (BasicBlock::iterator I = InvokeDest->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ // Save the value to use for this edge.
+ InvokeDestPHIValues.push_back(PN->getIncomingValueForBlock(InvokeBlock));
+ }
+
+ Function *Caller = FirstNewBlock->getParent();
+
+ // The inlined code is currently at the end of the function, scan from the
+ // start of the inlined code to its end, checking for stuff we need to
+ // rewrite.
+ if (InlinedCodeInfo.ContainsCalls || InlinedCodeInfo.ContainsUnwinds) {
+ for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+ BB != E; ++BB) {
+ if (InlinedCodeInfo.ContainsCalls) {
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ){
+ Instruction *I = BBI++;
+
+ // We only need to check for function calls: inlined invoke
+ // instructions require no special handling.
+ if (!isa<CallInst>(I)) continue;
+ CallInst *CI = cast<CallInst>(I);
+
+ // If this call cannot unwind, don't convert it to an invoke.
+ if (CI->doesNotThrow())
+ continue;
+
+ // Convert this function call into an invoke instruction.
+ // First, split the basic block.
+ BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
+
+ // Next, create the new invoke instruction, inserting it at the end
+ // of the old basic block.
+ SmallVector<Value*, 8> InvokeArgs(CI->op_begin()+1, CI->op_end());
+ InvokeInst *II =
+ InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest,
+ InvokeArgs.begin(), InvokeArgs.end(),
+ CI->getName(), BB->getTerminator());
+ II->setCallingConv(CI->getCallingConv());
+ II->setAttributes(CI->getAttributes());
+
+ // Make sure that anything using the call now uses the invoke!
+ CI->replaceAllUsesWith(II);
+
+ // Update the callgraph.
+ if (CG) {
+ // We should be able to do this:
+ // (*CG)[Caller]->replaceCallSite(CI, II);
+ // but that fails if the old call site isn't in the call graph,
+ // which, because of LLVM bug 3601, it sometimes isn't.
+ CallGraphNode *CGN = (*CG)[Caller];
+ for (CallGraphNode::iterator NI = CGN->begin(), NE = CGN->end();
+ NI != NE; ++NI) {
+ if (NI->first == CI) {
+ NI->first = II;
+ break;
+ }
+ }
+ }
+
+ // Delete the unconditional branch inserted by splitBasicBlock
+ BB->getInstList().pop_back();
+ Split->getInstList().pop_front(); // Delete the original call
+
+ // Update any PHI nodes in the exceptional block to indicate that
+ // there is now a new entry in them.
+ unsigned i = 0;
+ for (BasicBlock::iterator I = InvokeDest->begin();
+ isa<PHINode>(I); ++I, ++i) {
+ PHINode *PN = cast<PHINode>(I);
+ PN->addIncoming(InvokeDestPHIValues[i], BB);
+ }
+
+ // This basic block is now complete, start scanning the next one.
+ break;
+ }
+ }
+
+ if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+ // An UnwindInst requires special handling when it gets inlined into an
+ // invoke site. Once this happens, we know that the unwind would cause
+ // a control transfer to the invoke exception destination, so we can
+ // transform it into a direct branch to the exception destination.
+ BranchInst::Create(InvokeDest, UI);
+
+ // Delete the unwind instruction!
+ UI->eraseFromParent();
+
+ // Update any PHI nodes in the exceptional block to indicate that
+ // there is now a new entry in them.
+ unsigned i = 0;
+ for (BasicBlock::iterator I = InvokeDest->begin();
+ isa<PHINode>(I); ++I, ++i) {
+ PHINode *PN = cast<PHINode>(I);
+ PN->addIncoming(InvokeDestPHIValues[i], BB);
+ }
+ }
+ }
+ }
+
+ // Now that everything is happy, we have one final detail. The PHI nodes in
+ // the exception destination block still have entries due to the original
+ // invoke instruction. Eliminate these entries (which might even delete the
+ // PHI node) now.
+ InvokeDest->removePredecessor(II->getParent());
+}
+
+/// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee
+/// into the caller, update the specified callgraph to reflect the changes we
+/// made. Note that it's possible that not all code was copied over, so only
+/// some edges of the callgraph may remain.
+static void UpdateCallGraphAfterInlining(CallSite CS,
+ Function::iterator FirstNewBlock,
+ DenseMap<const Value*, Value*> &ValueMap,
+ CallGraph &CG) {
+ const Function *Caller = CS.getInstruction()->getParent()->getParent();
+ const Function *Callee = CS.getCalledFunction();
+ CallGraphNode *CalleeNode = CG[Callee];
+ CallGraphNode *CallerNode = CG[Caller];
+
+ // Since we inlined some uninlined call sites in the callee into the caller,
+ // add edges from the caller to all of the callees of the callee.
+ CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end();
+
+ // Consider the case where CalleeNode == CallerNode.
+ CallGraphNode::CalledFunctionsVector CallCache;
+ if (CalleeNode == CallerNode) {
+ CallCache.assign(I, E);
+ I = CallCache.begin();
+ E = CallCache.end();
+ }
+
+ for (; I != E; ++I) {
+ const Instruction *OrigCall = I->first.getInstruction();
+
+ DenseMap<const Value*, Value*>::iterator VMI = ValueMap.find(OrigCall);
+ // Only copy the edge if the call was inlined!
+ if (VMI != ValueMap.end() && VMI->second) {
+ // If the call was inlined, but then constant folded, there is no edge to
+ // add. Check for this case.
+ if (Instruction *NewCall = dyn_cast<Instruction>(VMI->second))
+ CallerNode->addCalledFunction(CallSite::get(NewCall), I->second);
+ }
+ }
+ // Update the call graph by deleting the edge from Callee to Caller. We must
+ // do this after the loop above in case Caller and Callee are the same.
+ CallerNode->removeCallEdgeFor(CS);
+}
+
+/// findFnRegionEndMarker - This is a utility routine that is used by
+/// InlineFunction. Return llvm.dbg.region.end intrinsic that corresponds
+/// to the llvm.dbg.func.start of the function F. Otherwise return NULL.
+static const DbgRegionEndInst *findFnRegionEndMarker(const Function *F) {
+
+ GlobalVariable *FnStart = NULL;
+ const DbgRegionEndInst *FnEnd = NULL;
+ for (Function::const_iterator FI = F->begin(), FE =F->end(); FI != FE; ++FI)
+ for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end(); BI != BE;
+ ++BI) {
+ if (FnStart == NULL) {
+ if (const DbgFuncStartInst *FSI = dyn_cast<DbgFuncStartInst>(BI)) {
+ DISubprogram SP(cast<GlobalVariable>(FSI->getSubprogram()));
+ assert (SP.isNull() == false && "Invalid llvm.dbg.func.start");
+ if (SP.describes(F))
+ FnStart = SP.getGV();
+ }
+ } else {
+ if (const DbgRegionEndInst *REI = dyn_cast<DbgRegionEndInst>(BI))
+ if (REI->getContext() == FnStart)
+ FnEnd = REI;
+ }
+ }
+ return FnEnd;
+}
+
+// InlineFunction - This function inlines the called function into the basic
+// block of the caller. This returns false if it is not possible to inline this
+// call. The program is still in a well defined state if this occurs though.
+//
+// Note that this only does one level of inlining. For example, if the
+// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
+// exists in the instruction stream. Similiarly this will inline a recursive
+// function by one level.
+//
+bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
+ Instruction *TheCall = CS.getInstruction();
+ assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
+ "Instruction not in function!");
+
+ const Function *CalledFunc = CS.getCalledFunction();
+ if (CalledFunc == 0 || // Can't inline external function or indirect
+ CalledFunc->isDeclaration() || // call, or call to a vararg function!
+ CalledFunc->getFunctionType()->isVarArg()) return false;
+
+
+ // If the call to the callee is not a tail call, we must clear the 'tail'
+ // flags on any calls that we inline.
+ bool MustClearTailCallFlags =
+ !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall());
+
+ // If the call to the callee cannot throw, set the 'nounwind' flag on any
+ // calls that we inline.
+ bool MarkNoUnwind = CS.doesNotThrow();
+
+ BasicBlock *OrigBB = TheCall->getParent();
+ Function *Caller = OrigBB->getParent();
+
+ // GC poses two hazards to inlining, which only occur when the callee has GC:
+ // 1. If the caller has no GC, then the callee's GC must be propagated to the
+ // caller.
+ // 2. If the caller has a differing GC, it is invalid to inline.
+ if (CalledFunc->hasGC()) {
+ if (!Caller->hasGC())
+ Caller->setGC(CalledFunc->getGC());
+ else if (CalledFunc->getGC() != Caller->getGC())
+ return false;
+ }
+
+ // Get an iterator to the last basic block in the function, which will have
+ // the new function inlined after it.
+ //
+ Function::iterator LastBlock = &Caller->back();
+
+ // Make sure to capture all of the return instructions from the cloned
+ // function.
+ std::vector<ReturnInst*> Returns;
+ ClonedCodeInfo InlinedFunctionInfo;
+ Function::iterator FirstNewBlock;
+
+ { // Scope to destroy ValueMap after cloning.
+ DenseMap<const Value*, Value*> ValueMap;
+
+ assert(CalledFunc->arg_size() == CS.arg_size() &&
+ "No varargs calls can be inlined!");
+
+ // Calculate the vector of arguments to pass into the function cloner, which
+ // matches up the formal to the actual argument values.
+ CallSite::arg_iterator AI = CS.arg_begin();
+ unsigned ArgNo = 0;
+ for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
+ E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
+ Value *ActualArg = *AI;
+
+ // When byval arguments actually inlined, we need to make the copy implied
+ // by them explicit. However, we don't do this if the callee is readonly
+ // or readnone, because the copy would be unneeded: the callee doesn't
+ // modify the struct.
+ if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) &&
+ !CalledFunc->onlyReadsMemory()) {
+ const Type *AggTy = cast<PointerType>(I->getType())->getElementType();
+ const Type *VoidPtrTy = PointerType::getUnqual(Type::Int8Ty);
+
+ // Create the alloca. If we have TargetData, use nice alignment.
+ unsigned Align = 1;
+ if (TD) Align = TD->getPrefTypeAlignment(AggTy);
+ Value *NewAlloca = new AllocaInst(AggTy, 0, Align, I->getName(),
+ Caller->begin()->begin());
+ // Emit a memcpy.
+ const Type *Tys[] = { Type::Int64Ty };
+ Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
+ Intrinsic::memcpy,
+ Tys, 1);
+ Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
+ Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall);
+
+ Value *Size;
+ if (TD == 0)
+ Size = ConstantExpr::getSizeOf(AggTy);
+ else
+ Size = ConstantInt::get(Type::Int64Ty, TD->getTypeStoreSize(AggTy));
+
+ // Always generate a memcpy of alignment 1 here because we don't know
+ // the alignment of the src pointer. Other optimizations can infer
+ // better alignment.
+ Value *CallArgs[] = {
+ DestCast, SrcCast, Size, ConstantInt::get(Type::Int32Ty, 1)
+ };
+ CallInst *TheMemCpy =
+ CallInst::Create(MemCpyFn, CallArgs, CallArgs+4, "", TheCall);
+
+ // If we have a call graph, update it.
+ if (CG) {
+ CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn);
+ CallGraphNode *CallerNode = (*CG)[Caller];
+ CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN);
+ }
+
+ // Uses of the argument in the function should use our new alloca
+ // instead.
+ ActualArg = NewAlloca;
+ }
+
+ ValueMap[I] = ActualArg;
+ }
+
+ // Adjust llvm.dbg.region.end. If the CalledFunc has region end
+ // marker then clone that marker after next stop point at the
+ // call site. The function body cloner does not clone original
+ // region end marker from the CalledFunc. This will ensure that
+ // inlined function's scope ends at the right place.
+ const DbgRegionEndInst *DREI = findFnRegionEndMarker(CalledFunc);
+ if (DREI) {
+ for (BasicBlock::iterator BI = TheCall,
+ BE = TheCall->getParent()->end(); BI != BE; ++BI) {
+ if (DbgStopPointInst *DSPI = dyn_cast<DbgStopPointInst>(BI)) {
+ if (DbgRegionEndInst *NewDREI =
+ dyn_cast<DbgRegionEndInst>(DREI->clone()))
+ NewDREI->insertAfter(DSPI);
+ break;
+ }
+ }
+ }
+
+ // We want the inliner to prune the code as it copies. We would LOVE to
+ // have no dead or constant instructions leftover after inlining occurs
+ // (which can happen, e.g., because an argument was constant), but we'll be
+ // happy with whatever the cloner can do.
+ CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i",
+ &InlinedFunctionInfo, TD);
+
+ // Remember the first block that is newly cloned over.
+ FirstNewBlock = LastBlock; ++FirstNewBlock;
+
+ // Update the callgraph if requested.
+ if (CG)
+ UpdateCallGraphAfterInlining(CS, FirstNewBlock, ValueMap, *CG);
+ }
+
+ // If there are any alloca instructions in the block that used to be the entry
+ // block for the callee, move them to the entry block of the caller. First
+ // calculate which instruction they should be inserted before. We insert the
+ // instructions at the end of the current alloca list.
+ //
+ {
+ BasicBlock::iterator InsertPoint = Caller->begin()->begin();
+ for (BasicBlock::iterator I = FirstNewBlock->begin(),
+ E = FirstNewBlock->end(); I != E; )
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I++)) {
+ // If the alloca is now dead, remove it. This often occurs due to code
+ // specialization.
+ if (AI->use_empty()) {
+ AI->eraseFromParent();
+ continue;
+ }
+
+ if (isa<Constant>(AI->getArraySize())) {
+ // Scan for the block of allocas that we can move over, and move them
+ // all at once.
+ while (isa<AllocaInst>(I) &&
+ isa<Constant>(cast<AllocaInst>(I)->getArraySize()))
+ ++I;
+
+ // Transfer all of the allocas over in a block. Using splice means
+ // that the instructions aren't removed from the symbol table, then
+ // reinserted.
+ Caller->getEntryBlock().getInstList().splice(
+ InsertPoint,
+ FirstNewBlock->getInstList(),
+ AI, I);
+ }
+ }
+ }
+
+ // If the inlined code contained dynamic alloca instructions, wrap the inlined
+ // code with llvm.stacksave/llvm.stackrestore intrinsics.
+ if (InlinedFunctionInfo.ContainsDynamicAllocas) {
+ Module *M = Caller->getParent();
+ // Get the two intrinsics we care about.
+ Constant *StackSave, *StackRestore;
+ StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
+ StackRestore = Intrinsic::getDeclaration(M, Intrinsic::stackrestore);
+
+ // If we are preserving the callgraph, add edges to the stacksave/restore
+ // functions for the calls we insert.
+ CallGraphNode *StackSaveCGN = 0, *StackRestoreCGN = 0, *CallerNode = 0;
+ if (CG) {
+ // We know that StackSave/StackRestore are Function*'s, because they are
+ // intrinsics which must have the right types.
+ StackSaveCGN = CG->getOrInsertFunction(cast<Function>(StackSave));
+ StackRestoreCGN = CG->getOrInsertFunction(cast<Function>(StackRestore));
+ CallerNode = (*CG)[Caller];
+ }
+
+ // Insert the llvm.stacksave.
+ CallInst *SavedPtr = CallInst::Create(StackSave, "savedstack",
+ FirstNewBlock->begin());
+ if (CG) CallerNode->addCalledFunction(SavedPtr, StackSaveCGN);
+
+ // Insert a call to llvm.stackrestore before any return instructions in the
+ // inlined function.
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", Returns[i]);
+ if (CG) CallerNode->addCalledFunction(CI, StackRestoreCGN);
+ }
+
+ // Count the number of StackRestore calls we insert.
+ unsigned NumStackRestores = Returns.size();
+
+ // If we are inlining an invoke instruction, insert restores before each
+ // unwind. These unwinds will be rewritten into branches later.
+ if (InlinedFunctionInfo.ContainsUnwinds && isa<InvokeInst>(TheCall)) {
+ for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+ BB != E; ++BB)
+ if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+ CallInst::Create(StackRestore, SavedPtr, "", UI);
+ ++NumStackRestores;
+ }
+ }
+ }
+
+ // If we are inlining tail call instruction through a call site that isn't
+ // marked 'tail', we must remove the tail marker for any calls in the inlined
+ // code. Also, calls inlined through a 'nounwind' call site should be marked
+ // 'nounwind'.
+ if (InlinedFunctionInfo.ContainsCalls &&
+ (MustClearTailCallFlags || MarkNoUnwind)) {
+ for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+ BB != E; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (MustClearTailCallFlags)
+ CI->setTailCall(false);
+ if (MarkNoUnwind)
+ CI->setDoesNotThrow();
+ }
+ }
+
+ // If we are inlining through a 'nounwind' call site then any inlined 'unwind'
+ // instructions are unreachable.
+ if (InlinedFunctionInfo.ContainsUnwinds && MarkNoUnwind)
+ for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+ BB != E; ++BB) {
+ TerminatorInst *Term = BB->getTerminator();
+ if (isa<UnwindInst>(Term)) {
+ new UnreachableInst(Term);
+ BB->getInstList().erase(Term);
+ }
+ }
+
+ // If we are inlining for an invoke instruction, we must make sure to rewrite
+ // any inlined 'unwind' instructions into branches to the invoke exception
+ // destination, and call instructions into invoke instructions.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
+ HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo, CG);
+
+ // If we cloned in _exactly one_ basic block, and if that block ends in a
+ // return instruction, we splice the body of the inlined callee directly into
+ // the calling basic block.
+ if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
+ // Move all of the instructions right before the call.
+ OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
+ FirstNewBlock->begin(), FirstNewBlock->end());
+ // Remove the cloned basic block.
+ Caller->getBasicBlockList().pop_back();
+
+ // If the call site was an invoke instruction, add a branch to the normal
+ // destination.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
+ BranchInst::Create(II->getNormalDest(), TheCall);
+
+ // If the return instruction returned a value, replace uses of the call with
+ // uses of the returned value.
+ if (!TheCall->use_empty()) {
+ ReturnInst *R = Returns[0];
+ if (TheCall == R->getReturnValue())
+ TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+ else
+ TheCall->replaceAllUsesWith(R->getReturnValue());
+ }
+ // Since we are now done with the Call/Invoke, we can delete it.
+ TheCall->eraseFromParent();
+
+ // Since we are now done with the return instruction, delete it also.
+ Returns[0]->eraseFromParent();
+
+ // We are now done with the inlining.
+ return true;
+ }
+
+ // Otherwise, we have the normal case, of more than one block to inline or
+ // multiple return sites.
+
+ // We want to clone the entire callee function into the hole between the
+ // "starter" and "ender" blocks. How we accomplish this depends on whether
+ // this is an invoke instruction or a call instruction.
+ BasicBlock *AfterCallBB;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+
+ // Add an unconditional branch to make this look like the CallInst case...
+ BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+
+ // Split the basic block. This guarantees that no PHI nodes will have to be
+ // updated due to new incoming edges, and make the invoke case more
+ // symmetric to the call case.
+ AfterCallBB = OrigBB->splitBasicBlock(NewBr,
+ CalledFunc->getName()+".exit");
+
+ } else { // It's a call
+ // If this is a call instruction, we need to split the basic block that
+ // the call lives in.
+ //
+ AfterCallBB = OrigBB->splitBasicBlock(TheCall,
+ CalledFunc->getName()+".exit");
+ }
+
+ // Change the branch that used to go to AfterCallBB to branch to the first
+ // basic block of the inlined function.
+ //
+ TerminatorInst *Br = OrigBB->getTerminator();
+ assert(Br && Br->getOpcode() == Instruction::Br &&
+ "splitBasicBlock broken!");
+ Br->setOperand(0, FirstNewBlock);
+
+
+ // Now that the function is correct, make it a little bit nicer. In
+ // particular, move the basic blocks inserted from the end of the function
+ // into the space made by splitting the source basic block.
+ Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
+ FirstNewBlock, Caller->end());
+
+ // Handle all of the return instructions that we just cloned in, and eliminate
+ // any users of the original call/invoke instruction.
+ const Type *RTy = CalledFunc->getReturnType();
+
+ if (Returns.size() > 1) {
+ // The PHI node should go at the front of the new basic block to merge all
+ // possible incoming values.
+ PHINode *PHI = 0;
+ if (!TheCall->use_empty()) {
+ PHI = PHINode::Create(RTy, TheCall->getName(),
+ AfterCallBB->begin());
+ // Anything that used the result of the function call should now use the
+ // PHI node as their operand.
+ TheCall->replaceAllUsesWith(PHI);
+ }
+
+ // Loop over all of the return instructions adding entries to the PHI node
+ // as appropriate.
+ if (PHI) {
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ ReturnInst *RI = Returns[i];
+ assert(RI->getReturnValue()->getType() == PHI->getType() &&
+ "Ret value not consistent in function!");
+ PHI->addIncoming(RI->getReturnValue(), RI->getParent());
+ }
+ }
+
+ // Add a branch to the merge points and remove return instructions.
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ ReturnInst *RI = Returns[i];
+ BranchInst::Create(AfterCallBB, RI);
+ RI->eraseFromParent();
+ }
+ } else if (!Returns.empty()) {
+ // Otherwise, if there is exactly one return value, just replace anything
+ // using the return value of the call with the computed value.
+ if (!TheCall->use_empty()) {
+ if (TheCall == Returns[0]->getReturnValue())
+ TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+ else
+ TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
+ }
+
+ // Splice the code from the return block into the block that it will return
+ // to, which contains the code that was after the call.
+ BasicBlock *ReturnBB = Returns[0]->getParent();
+ AfterCallBB->getInstList().splice(AfterCallBB->begin(),
+ ReturnBB->getInstList());
+
+ // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
+ ReturnBB->replaceAllUsesWith(AfterCallBB);
+
+ // Delete the return instruction now and empty ReturnBB now.
+ Returns[0]->eraseFromParent();
+ ReturnBB->eraseFromParent();
+ } else if (!TheCall->use_empty()) {
+ // No returns, but something is using the return value of the call. Just
+ // nuke the result.
+ TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+ }
+
+ // Since we are now done with the Call/Invoke, we can delete it.
+ TheCall->eraseFromParent();
+
+ // We should always be able to fold the entry block of the function into the
+ // single predecessor of the block...
+ assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
+ BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);
+
+ // Splice the code entry block into calling block, right before the
+ // unconditional branch.
+ OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
+ CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes
+
+ // Remove the unconditional branch.
+ OrigBB->getInstList().erase(Br);
+
+ // Now we can remove the CalleeEntry block, which is now empty.
+ Caller->getBasicBlockList().erase(CalleeEntry);
+
+ return true;
+}
diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp
new file mode 100644
index 0000000..4f8a160
--- /dev/null
+++ b/lib/Transforms/Utils/InstructionNamer.cpp
@@ -0,0 +1,63 @@
+//===- InstructionNamer.cpp - Give anonymous instructions names -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a little utility pass that gives instructions names, this is mostly
+// useful when diffing the effect of an optimization because deleting an
+// unnamed instruction can change all other instruction numbering, making the
+// diff very noisy.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+using namespace llvm;
+
+namespace {
+ struct InstNamer : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ InstNamer() : FunctionPass(&ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.setPreservesAll();
+ }
+
+ bool runOnFunction(Function &F) {
+ for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
+ AI != AE; ++AI)
+ if (!AI->hasName() && AI->getType() != Type::VoidTy)
+ AI->setName("tmp");
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (!BB->hasName())
+ BB->setName("BB");
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (!I->hasName() && I->getType() != Type::VoidTy)
+ I->setName("tmp");
+ }
+ return true;
+ }
+ };
+
+ char InstNamer::ID = 0;
+ static RegisterPass<InstNamer> X("instnamer",
+ "Assign names to anonymous instructions");
+}
+
+
+const PassInfo *const llvm::InstructionNamerID = &X;
+//===----------------------------------------------------------------------===//
+//
+// InstructionNamer - Give any unnamed non-void instructions "tmp" names.
+//
+FunctionPass *llvm::createInstructionNamerPass() {
+ return new InstNamer();
+}
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
new file mode 100644
index 0000000..7d4f3a3
--- /dev/null
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -0,0 +1,276 @@
+//===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms loops by placing phi nodes at the end of the loops for
+// all values that are live across the loop boundary. For example, it turns
+// the left into the right code:
+//
+// for (...) for (...)
+// if (c) if (c)
+// X1 = ... X1 = ...
+// else else
+// X2 = ... X2 = ...
+// X3 = phi(X1, X2) X3 = phi(X1, X2)
+// ... = X3 + 4 X4 = phi(X3)
+// ... = X4 + 4
+//
+// This is still valid LLVM; the extra phi nodes are purely redundant, and will
+// be trivially eliminated by InstCombine. The major benefit of this
+// transformation is that it makes many other loop optimizations, such as
+// LoopUnswitching, simpler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lcssa"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/PredIteratorCache.h"
+#include <algorithm>
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumLCSSA, "Number of live out of a loop variables");
+
+namespace {
+ struct VISIBILITY_HIDDEN LCSSA : public LoopPass {
+ static char ID; // Pass identification, replacement for typeid
+ LCSSA() : LoopPass(&ID) {}
+
+ // Cached analysis information for the current function.
+ LoopInfo *LI;
+ DominatorTree *DT;
+ std::vector<BasicBlock*> LoopBlocks;
+ PredIteratorCache PredCache;
+
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ void ProcessInstruction(Instruction* Instr,
+ const SmallVector<BasicBlock*, 8>& exitBlocks);
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG. It maintains both of these,
+ /// as well as the CFG. It also requires dominator information.
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<DominatorTree>();
+
+ // Request DominanceFrontier now, even though LCSSA does
+ // not use it. This allows Pass Manager to schedule Dominance
+ // Frontier early enough such that one LPPassManager can handle
+ // multiple loop transformation passes.
+ AU.addRequired<DominanceFrontier>();
+ AU.addPreserved<DominanceFrontier>();
+ }
+ private:
+ void getLoopValuesUsedOutsideLoop(Loop *L,
+ SetVector<Instruction*> &AffectedValues,
+ const SmallVector<BasicBlock*, 8>& exitBlocks);
+
+ Value *GetValueForBlock(DomTreeNode *BB, Instruction *OrigInst,
+ DenseMap<DomTreeNode*, Value*> &Phis);
+
+ /// inLoop - returns true if the given block is within the current loop
+ bool inLoop(BasicBlock* B) {
+ return std::binary_search(LoopBlocks.begin(), LoopBlocks.end(), B);
+ }
+ };
+}
+
+char LCSSA::ID = 0;
+static RegisterPass<LCSSA> X("lcssa", "Loop-Closed SSA Form Pass");
+
+Pass *llvm::createLCSSAPass() { return new LCSSA(); }
+const PassInfo *const llvm::LCSSAID = &X;
+
+/// runOnFunction - Process all loops in the function, inner-most out.
+bool LCSSA::runOnLoop(Loop *L, LPPassManager &LPM) {
+ PredCache.clear();
+
+ LI = &LPM.getAnalysis<LoopInfo>();
+ DT = &getAnalysis<DominatorTree>();
+
+ // Speed up queries by creating a sorted list of blocks
+ LoopBlocks.clear();
+ LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
+ std::sort(LoopBlocks.begin(), LoopBlocks.end());
+
+ SmallVector<BasicBlock*, 8> exitBlocks;
+ L->getExitBlocks(exitBlocks);
+
+ SetVector<Instruction*> AffectedValues;
+ getLoopValuesUsedOutsideLoop(L, AffectedValues, exitBlocks);
+
+ // If no values are affected, we can save a lot of work, since we know that
+ // nothing will be changed.
+ if (AffectedValues.empty())
+ return false;
+
+ // Iterate over all affected values for this loop and insert Phi nodes
+ // for them in the appropriate exit blocks
+
+ for (SetVector<Instruction*>::iterator I = AffectedValues.begin(),
+ E = AffectedValues.end(); I != E; ++I)
+ ProcessInstruction(*I, exitBlocks);
+
+ assert(L->isLCSSAForm());
+
+ return true;
+}
+
+/// processInstruction - Given a live-out instruction, insert LCSSA Phi nodes,
+/// eliminate all out-of-loop uses.
+void LCSSA::ProcessInstruction(Instruction *Instr,
+ const SmallVector<BasicBlock*, 8>& exitBlocks) {
+ ++NumLCSSA; // We are applying the transformation
+
+ // Keep track of the blocks that have the value available already.
+ DenseMap<DomTreeNode*, Value*> Phis;
+
+ DomTreeNode *InstrNode = DT->getNode(Instr->getParent());
+
+ // Insert the LCSSA phi's into the exit blocks (dominated by the value), and
+ // add them to the Phi's map.
+ for (SmallVector<BasicBlock*, 8>::const_iterator BBI = exitBlocks.begin(),
+ BBE = exitBlocks.end(); BBI != BBE; ++BBI) {
+ BasicBlock *BB = *BBI;
+ DomTreeNode *ExitBBNode = DT->getNode(BB);
+ Value *&Phi = Phis[ExitBBNode];
+ if (!Phi && DT->dominates(InstrNode, ExitBBNode)) {
+ PHINode *PN = PHINode::Create(Instr->getType(), Instr->getName()+".lcssa",
+ BB->begin());
+ PN->reserveOperandSpace(PredCache.GetNumPreds(BB));
+
+ // Remember that this phi makes the value alive in this block.
+ Phi = PN;
+
+ // Add inputs from inside the loop for this PHI.
+ for (BasicBlock** PI = PredCache.GetPreds(BB); *PI; ++PI)
+ PN->addIncoming(Instr, *PI);
+ }
+ }
+
+
+ // Record all uses of Instr outside the loop. We need to rewrite these. The
+ // LCSSA phis won't be included because they use the value in the loop.
+ for (Value::use_iterator UI = Instr->use_begin(), E = Instr->use_end();
+ UI != E;) {
+ BasicBlock *UserBB = cast<Instruction>(*UI)->getParent();
+ if (PHINode *P = dyn_cast<PHINode>(*UI)) {
+ UserBB = P->getIncomingBlock(UI);
+ }
+
+ // If the user is in the loop, don't rewrite it!
+ if (UserBB == Instr->getParent() || inLoop(UserBB)) {
+ ++UI;
+ continue;
+ }
+
+ // Otherwise, patch up uses of the value with the appropriate LCSSA Phi,
+ // inserting PHI nodes into join points where needed.
+ Value *Val = GetValueForBlock(DT->getNode(UserBB), Instr, Phis);
+
+ // Preincrement the iterator to avoid invalidating it when we change the
+ // value.
+ Use &U = UI.getUse();
+ ++UI;
+ U.set(Val);
+ }
+}
+
+/// getLoopValuesUsedOutsideLoop - Return any values defined in the loop that
+/// are used by instructions outside of it.
+void LCSSA::getLoopValuesUsedOutsideLoop(Loop *L,
+ SetVector<Instruction*> &AffectedValues,
+ const SmallVector<BasicBlock*, 8>& exitBlocks) {
+ // FIXME: For large loops, we may be able to avoid a lot of use-scanning
+ // by using dominance information. In particular, if a block does not
+ // dominate any of the loop exits, then none of the values defined in the
+ // block could be used outside the loop.
+ for (Loop::block_iterator BB = L->block_begin(), BE = L->block_end();
+ BB != BE; ++BB) {
+ for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ++I)
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE;
+ ++UI) {
+ BasicBlock *UserBB = cast<Instruction>(*UI)->getParent();
+ if (PHINode* p = dyn_cast<PHINode>(*UI)) {
+ UserBB = p->getIncomingBlock(UI);
+ }
+
+ if (*BB != UserBB && !inLoop(UserBB)) {
+ AffectedValues.insert(I);
+ break;
+ }
+ }
+ }
+}
+
+/// GetValueForBlock - Get the value to use within the specified basic block.
+/// available values are in Phis.
+Value *LCSSA::GetValueForBlock(DomTreeNode *BB, Instruction *OrigInst,
+ DenseMap<DomTreeNode*, Value*> &Phis) {
+ // If there is no dominator info for this BB, it is unreachable.
+ if (BB == 0)
+ return UndefValue::get(OrigInst->getType());
+
+ // If we have already computed this value, return the previously computed val.
+ if (Phis.count(BB)) return Phis[BB];
+
+ DomTreeNode *IDom = BB->getIDom();
+
+ // Otherwise, there are two cases: we either have to insert a PHI node or we
+ // don't. We need to insert a PHI node if this block is not dominated by one
+ // of the exit nodes from the loop (the loop could have multiple exits, and
+ // though the value defined *inside* the loop dominated all its uses, each
+ // exit by itself may not dominate all the uses).
+ //
+ // The simplest way to check for this condition is by checking to see if the
+ // idom is in the loop. If so, we *know* that none of the exit blocks
+ // dominate this block. Note that we *know* that the block defining the
+ // original instruction is in the idom chain, because if it weren't, then the
+ // original value didn't dominate this use.
+ if (!inLoop(IDom->getBlock())) {
+ // Idom is not in the loop, we must still be "below" the exit block and must
+ // be fully dominated by the value live in the idom.
+ Value* val = GetValueForBlock(IDom, OrigInst, Phis);
+ Phis.insert(std::make_pair(BB, val));
+ return val;
+ }
+
+ BasicBlock *BBN = BB->getBlock();
+
+ // Otherwise, the idom is the loop, so we need to insert a PHI node. Do so
+ // now, then get values to fill in the incoming values for the PHI.
+ PHINode *PN = PHINode::Create(OrigInst->getType(),
+ OrigInst->getName() + ".lcssa", BBN->begin());
+ PN->reserveOperandSpace(PredCache.GetNumPreds(BBN));
+ Phis.insert(std::make_pair(BB, PN));
+
+ // Fill in the incoming values for the block.
+ for (BasicBlock** PI = PredCache.GetPreds(BBN); *PI; ++PI)
+ PN->addIncoming(GetValueForBlock(DT->getNode(*PI), OrigInst, Phis), *PI);
+ return PN;
+}
+
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
new file mode 100644
index 0000000..94483b8
--- /dev/null
+++ b/lib/Transforms/Utils/Local.cpp
@@ -0,0 +1,338 @@
+//===-- Local.cpp - Functions to perform local transformations ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform various local transformations to the
+// program.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Local constant propagation.
+//
+
+// ConstantFoldTerminator - If a terminator instruction is predicated on a
+// constant value, convert it into an unconditional branch to the constant
+// destination.
+//
+bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
+ TerminatorInst *T = BB->getTerminator();
+
+ // Branch - See if we are conditional jumping on constant
+ if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
+ if (BI->isUnconditional()) return false; // Can't optimize uncond branch
+ BasicBlock *Dest1 = BI->getSuccessor(0);
+ BasicBlock *Dest2 = BI->getSuccessor(1);
+
+ if (ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
+ // Are we branching on constant?
+ // YES. Change to unconditional branch...
+ BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2;
+ BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1;
+
+ //cerr << "Function: " << T->getParent()->getParent()
+ // << "\nRemoving branch from " << T->getParent()
+ // << "\n\nTo: " << OldDest << endl;
+
+ // Let the basic block know that we are letting go of it. Based on this,
+ // it will adjust it's PHI nodes.
+ assert(BI->getParent() && "Terminator not inserted in block!");
+ OldDest->removePredecessor(BI->getParent());
+
+ // Set the unconditional destination, and change the insn to be an
+ // unconditional branch.
+ BI->setUnconditionalDest(Destination);
+ return true;
+ } else if (Dest2 == Dest1) { // Conditional branch to same location?
+ // This branch matches something like this:
+ // br bool %cond, label %Dest, label %Dest
+ // and changes it into: br label %Dest
+
+ // Let the basic block know that we are letting go of one copy of it.
+ assert(BI->getParent() && "Terminator not inserted in block!");
+ Dest1->removePredecessor(BI->getParent());
+
+ // Change a conditional branch to unconditional.
+ BI->setUnconditionalDest(Dest1);
+ return true;
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
+ // If we are switching on a constant, we can convert the switch into a
+ // single branch instruction!
+ ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition());
+ BasicBlock *TheOnlyDest = SI->getSuccessor(0); // The default dest
+ BasicBlock *DefaultDest = TheOnlyDest;
+ assert(TheOnlyDest == SI->getDefaultDest() &&
+ "Default destination is not successor #0?");
+
+ // Figure out which case it goes to...
+ for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
+ // Found case matching a constant operand?
+ if (SI->getSuccessorValue(i) == CI) {
+ TheOnlyDest = SI->getSuccessor(i);
+ break;
+ }
+
+ // Check to see if this branch is going to the same place as the default
+ // dest. If so, eliminate it as an explicit compare.
+ if (SI->getSuccessor(i) == DefaultDest) {
+ // Remove this entry...
+ DefaultDest->removePredecessor(SI->getParent());
+ SI->removeCase(i);
+ --i; --e; // Don't skip an entry...
+ continue;
+ }
+
+ // Otherwise, check to see if the switch only branches to one destination.
+ // We do this by reseting "TheOnlyDest" to null when we find two non-equal
+ // destinations.
+ if (SI->getSuccessor(i) != TheOnlyDest) TheOnlyDest = 0;
+ }
+
+ if (CI && !TheOnlyDest) {
+ // Branching on a constant, but not any of the cases, go to the default
+ // successor.
+ TheOnlyDest = SI->getDefaultDest();
+ }
+
+ // If we found a single destination that we can fold the switch into, do so
+ // now.
+ if (TheOnlyDest) {
+ // Insert the new branch..
+ BranchInst::Create(TheOnlyDest, SI);
+ BasicBlock *BB = SI->getParent();
+
+ // Remove entries from PHI nodes which we no longer branch to...
+ for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
+ // Found case matching a constant operand?
+ BasicBlock *Succ = SI->getSuccessor(i);
+ if (Succ == TheOnlyDest)
+ TheOnlyDest = 0; // Don't modify the first branch to TheOnlyDest
+ else
+ Succ->removePredecessor(BB);
+ }
+
+ // Delete the old switch...
+ BB->getInstList().erase(SI);
+ return true;
+ } else if (SI->getNumSuccessors() == 2) {
+ // Otherwise, we can fold this switch into a conditional branch
+ // instruction if it has only one non-default destination.
+ Value *Cond = new ICmpInst(ICmpInst::ICMP_EQ, SI->getCondition(),
+ SI->getSuccessorValue(1), "cond", SI);
+ // Insert the new branch...
+ BranchInst::Create(SI->getSuccessor(1), SI->getSuccessor(0), Cond, SI);
+
+ // Delete the old switch...
+ SI->eraseFromParent();
+ return true;
+ }
+ }
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Local dead code elimination...
+//
+
+/// isInstructionTriviallyDead - Return true if the result produced by the
+/// instruction is not used, and the instruction has no side effects.
+///
+bool llvm::isInstructionTriviallyDead(Instruction *I) {
+ if (!I->use_empty() || isa<TerminatorInst>(I)) return false;
+
+ // We don't want debug info removed by anything this general.
+ if (isa<DbgInfoIntrinsic>(I)) return false;
+
+ if (!I->mayHaveSideEffects()) return true;
+
+ // Special case intrinsics that "may have side effects" but can be deleted
+ // when dead.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ // Safe to delete llvm.stacksave if dead.
+ if (II->getIntrinsicID() == Intrinsic::stacksave)
+ return true;
+ return false;
+}
+
+/// RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a
+/// trivially dead instruction, delete it. If that makes any of its operands
+/// trivially dead, delete them too, recursively.
+void llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I || !I->use_empty() || !isInstructionTriviallyDead(I))
+ return;
+
+ SmallVector<Instruction*, 16> DeadInsts;
+ DeadInsts.push_back(I);
+
+ while (!DeadInsts.empty()) {
+ I = DeadInsts.pop_back_val();
+
+ // Null out all of the instruction's operands to see if any operand becomes
+ // dead as we go.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Value *OpV = I->getOperand(i);
+ I->setOperand(i, 0);
+
+ if (!OpV->use_empty()) continue;
+
+ // If the operand is an instruction that became dead as we nulled out the
+ // operand, and if it is 'trivially' dead, delete it in a future loop
+ // iteration.
+ if (Instruction *OpI = dyn_cast<Instruction>(OpV))
+ if (isInstructionTriviallyDead(OpI))
+ DeadInsts.push_back(OpI);
+ }
+
+ I->eraseFromParent();
+ }
+}
+
+/// RecursivelyDeleteDeadPHINode - If the specified value is an effectively
+/// dead PHI node, due to being a def-use chain of single-use nodes that
+/// either forms a cycle or is terminated by a trivially dead instruction,
+/// delete it. If that makes any of its operands trivially dead, delete them
+/// too, recursively.
+void
+llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
+
+ // We can remove a PHI if it is on a cycle in the def-use graph
+ // where each node in the cycle has degree one, i.e. only one use,
+ // and is an instruction with no side effects.
+ if (!PN->hasOneUse())
+ return;
+
+ SmallPtrSet<PHINode *, 4> PHIs;
+ PHIs.insert(PN);
+ for (Instruction *J = cast<Instruction>(*PN->use_begin());
+ J->hasOneUse() && !J->mayHaveSideEffects();
+ J = cast<Instruction>(*J->use_begin()))
+ // If we find a PHI more than once, we're on a cycle that
+ // won't prove fruitful.
+ if (PHINode *JP = dyn_cast<PHINode>(J))
+ if (!PHIs.insert(cast<PHINode>(JP))) {
+ // Break the cycle and delete the PHI and its operands.
+ JP->replaceAllUsesWith(UndefValue::get(JP->getType()));
+ RecursivelyDeleteTriviallyDeadInstructions(JP);
+ break;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Control Flow Graph Restructuring...
+//
+
+/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
+/// predecessor is known to have one successor (DestBB!). Eliminate the edge
+/// between them, moving the instructions in the predecessor into DestBB and
+/// deleting the predecessor block.
+///
+void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB) {
+ // If BB has single-entry PHI nodes, fold them.
+ while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
+ Value *NewVal = PN->getIncomingValue(0);
+ // Replace self referencing PHI with undef, it must be dead.
+ if (NewVal == PN) NewVal = UndefValue::get(PN->getType());
+ PN->replaceAllUsesWith(NewVal);
+ PN->eraseFromParent();
+ }
+
+ BasicBlock *PredBB = DestBB->getSinglePredecessor();
+ assert(PredBB && "Block doesn't have a single predecessor!");
+
+ // Splice all the instructions from PredBB to DestBB.
+ PredBB->getTerminator()->eraseFromParent();
+ DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
+
+ // Anything that branched to PredBB now branches to DestBB.
+ PredBB->replaceAllUsesWith(DestBB);
+
+ // Nuke BB.
+ PredBB->eraseFromParent();
+}
+
+/// OnlyUsedByDbgIntrinsics - Return true if the instruction I is only used
+/// by DbgIntrinsics. If DbgInUses is specified then the vector is filled
+/// with the DbgInfoIntrinsic that use the instruction I.
+bool llvm::OnlyUsedByDbgInfoIntrinsics(Instruction *I,
+ SmallVectorImpl<DbgInfoIntrinsic *> *DbgInUses) {
+ if (DbgInUses)
+ DbgInUses->clear();
+
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE;
+ ++UI) {
+ if (DbgInfoIntrinsic *DI = dyn_cast<DbgInfoIntrinsic>(*UI)) {
+ if (DbgInUses)
+ DbgInUses->push_back(DI);
+ } else {
+ if (DbgInUses)
+ DbgInUses->clear();
+ return false;
+ }
+ }
+ return true;
+}
+
+/// UserIsDebugInfo - Return true if U is a constant expr used by
+/// llvm.dbg.variable or llvm.dbg.global_variable
+bool llvm::UserIsDebugInfo(User *U) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(U);
+
+ if (!CE || CE->getNumUses() != 1)
+ return false;
+
+ Constant *Init = dyn_cast<Constant>(CE->use_back());
+ if (!Init || Init->getNumUses() != 1)
+ return false;
+
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(Init->use_back());
+ if (!GV || !GV->hasInitializer() || GV->getInitializer() != Init)
+ return false;
+
+ DIVariable DV(GV);
+ if (!DV.isNull())
+ return true; // User is llvm.dbg.variable
+
+ DIGlobalVariable DGV(GV);
+ if (!DGV.isNull())
+ return true; // User is llvm.dbg.global_variable
+
+ return false;
+}
+
+/// RemoveDbgInfoUser - Remove an User which is representing debug info.
+void llvm::RemoveDbgInfoUser(User *U) {
+ assert (UserIsDebugInfo(U) && "Unexpected User!");
+ ConstantExpr *CE = cast<ConstantExpr>(U);
+ while (!CE->use_empty()) {
+ Constant *C = cast<Constant>(CE->use_back());
+ while (!C->use_empty()) {
+ GlobalVariable *GV = cast<GlobalVariable>(C->use_back());
+ GV->eraseFromParent();
+ }
+ C->destroyConstant();
+ }
+ CE->destroyConstant();
+}
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
new file mode 100644
index 0000000..03d273d
--- /dev/null
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -0,0 +1,600 @@
+//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs several transformations to transform natural loops into a
+// simpler form, which makes subsequent analyses and transformations simpler and
+// more effective.
+//
+// Loop pre-header insertion guarantees that there is a single, non-critical
+// entry edge from outside of the loop to the loop header. This simplifies a
+// number of analyses and transformations, such as LICM.
+//
+// Loop exit-block insertion guarantees that all exit blocks from the loop
+// (blocks which are outside of the loop that have predecessors inside of the
+// loop) only have predecessors from inside of the loop (and are thus dominated
+// by the loop header). This simplifies transformations such as store-sinking
+// that are built into LICM.
+//
+// This pass also guarantees that loops will have exactly one backedge.
+//
+// Note that the simplifycfg pass will clean up blocks which are split out but
+// end up being unnecessary, so usage of this pass should not pessimize
+// generated code.
+//
+// This pass obviously modifies the CFG, but updates loop information and
+// dominator information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loopsimplify"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+using namespace llvm;
+
+STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
+STATISTIC(NumNested , "Number of nested loops split out");
+
+namespace {
+ struct VISIBILITY_HIDDEN LoopSimplify : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ LoopSimplify() : FunctionPass(&ID) {}
+
+ // AA - If we have an alias analysis object to update, this is it, otherwise
+ // this is null.
+ AliasAnalysis *AA;
+ LoopInfo *LI;
+ DominatorTree *DT;
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ // We need loop information to identify the loops...
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<DominatorTree>();
+
+ AU.addPreserved<LoopInfo>();
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominanceFrontier>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
+ }
+
+ /// verifyAnalysis() - Verify loop nest.
+ void verifyAnalysis() const {
+#ifndef NDEBUG
+ LoopInfo *NLI = &getAnalysis<LoopInfo>();
+ for (LoopInfo::iterator I = NLI->begin(), E = NLI->end(); I != E; ++I)
+ (*I)->verifyLoop();
+#endif
+ }
+
+ private:
+ bool ProcessLoop(Loop *L);
+ BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
+ void InsertPreheaderForLoop(Loop *L);
+ Loop *SeparateNestedLoop(Loop *L);
+ void InsertUniqueBackedgeBlock(Loop *L);
+ void PlaceSplitBlockCarefully(BasicBlock *NewBB,
+ SmallVectorImpl<BasicBlock*> &SplitPreds,
+ Loop *L);
+ };
+}
+
+char LoopSimplify::ID = 0;
+static RegisterPass<LoopSimplify>
+X("loopsimplify", "Canonicalize natural loops", true);
+
+// Publically exposed interface to pass...
+const PassInfo *const llvm::LoopSimplifyID = &X;
+FunctionPass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
+
+/// runOnFunction - Run down all loops in the CFG (recursively, but we could do
+/// it in any convenient order) inserting preheaders...
+///
+bool LoopSimplify::runOnFunction(Function &F) {
+ bool Changed = false;
+ LI = &getAnalysis<LoopInfo>();
+ AA = getAnalysisIfAvailable<AliasAnalysis>();
+ DT = &getAnalysis<DominatorTree>();
+
+ // Check to see that no blocks (other than the header) in loops have
+ // predecessors that are not in loops. This is not valid for natural loops,
+ // but can occur if the blocks are unreachable. Since they are unreachable we
+ // can just shamelessly destroy their terminators to make them not branch into
+ // the loop!
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ // This case can only occur for unreachable blocks. Blocks that are
+ // unreachable can't be in loops, so filter those blocks out.
+ if (LI->getLoopFor(BB)) continue;
+
+ bool BlockUnreachable = false;
+ TerminatorInst *TI = BB->getTerminator();
+
+ // Check to see if any successors of this block are non-loop-header loops
+ // that are not the header.
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+ // If this successor is not in a loop, BB is clearly ok.
+ Loop *L = LI->getLoopFor(TI->getSuccessor(i));
+ if (!L) continue;
+
+ // If the succ is the loop header, and if L is a top-level loop, then this
+ // is an entrance into a loop through the header, which is also ok.
+ if (L->getHeader() == TI->getSuccessor(i) && L->getParentLoop() == 0)
+ continue;
+
+ // Otherwise, this is an entrance into a loop from some place invalid.
+ // Either the loop structure is invalid and this is not a natural loop (in
+ // which case the compiler is buggy somewhere else) or BB is unreachable.
+ BlockUnreachable = true;
+ break;
+ }
+
+ // If this block is ok, check the next one.
+ if (!BlockUnreachable) continue;
+
+ // Otherwise, this block is dead. To clean up the CFG and to allow later
+ // loop transformations to ignore this case, we delete the edges into the
+ // loop by replacing the terminator.
+
+ // Remove PHI entries from the successors.
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ TI->getSuccessor(i)->removePredecessor(BB);
+
+ // Add a new unreachable instruction before the old terminator.
+ new UnreachableInst(TI);
+
+ // Delete the dead terminator.
+ if (AA) AA->deleteValue(TI);
+ if (!TI->use_empty())
+ TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+ TI->eraseFromParent();
+ Changed |= true;
+ }
+
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ Changed |= ProcessLoop(*I);
+
+ return Changed;
+}
+
+/// ProcessLoop - Walk the loop structure in depth first order, ensuring that
+/// all loops have preheaders.
+///
+bool LoopSimplify::ProcessLoop(Loop *L) {
+ bool Changed = false;
+ReprocessLoop:
+
+ // Canonicalize inner loops before outer loops. Inner loop canonicalization
+ // can provide work for the outer loop to canonicalize.
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ Changed |= ProcessLoop(*I);
+
+ assert(L->getBlocks()[0] == L->getHeader() &&
+ "Header isn't first block in loop?");
+
+ // Does the loop already have a preheader? If so, don't insert one.
+ if (L->getLoopPreheader() == 0) {
+ InsertPreheaderForLoop(L);
+ NumInserted++;
+ Changed = true;
+ }
+
+ // Next, check to make sure that all exit nodes of the loop only have
+ // predecessors that are inside of the loop. This check guarantees that the
+ // loop preheader/header will dominate the exit blocks. If the exit block has
+ // predecessors from outside of the loop, split the edge now.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+
+ SetVector<BasicBlock*> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end());
+ for (SetVector<BasicBlock*>::iterator I = ExitBlockSet.begin(),
+ E = ExitBlockSet.end(); I != E; ++I) {
+ BasicBlock *ExitBlock = *I;
+ for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
+ PI != PE; ++PI)
+ // Must be exactly this loop: no subloops, parent loops, or non-loop preds
+ // allowed.
+ if (!L->contains(*PI)) {
+ RewriteLoopExitBlock(L, ExitBlock);
+ NumInserted++;
+ Changed = true;
+ break;
+ }
+ }
+
+ // If the header has more than two predecessors at this point (from the
+ // preheader and from multiple backedges), we must adjust the loop.
+ unsigned NumBackedges = L->getNumBackEdges();
+ if (NumBackedges != 1) {
+ // If this is really a nested loop, rip it out into a child loop. Don't do
+ // this for loops with a giant number of backedges, just factor them into a
+ // common backedge instead.
+ if (NumBackedges < 8) {
+ if (Loop *NL = SeparateNestedLoop(L)) {
+ ++NumNested;
+ // This is a big restructuring change, reprocess the whole loop.
+ ProcessLoop(NL);
+ Changed = true;
+ // GCC doesn't tail recursion eliminate this.
+ goto ReprocessLoop;
+ }
+ }
+
+ // If we either couldn't, or didn't want to, identify nesting of the loops,
+ // insert a new block that all backedges target, then make it jump to the
+ // loop header.
+ InsertUniqueBackedgeBlock(L);
+ NumInserted++;
+ Changed = true;
+ }
+
+ // Scan over the PHI nodes in the loop header. Since they now have only two
+ // incoming values (the loop is canonicalized), we may have simplified the PHI
+ // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
+ PHINode *PN;
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ (PN = dyn_cast<PHINode>(I++)); )
+ if (Value *V = PN->hasConstantValue()) {
+ if (AA) AA->deleteValue(PN);
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ }
+
+ return Changed;
+}
+
+/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a
+/// preheader, this method is called to insert one. This method has two phases:
+/// preheader insertion and analysis updating.
+///
+void LoopSimplify::InsertPreheaderForLoop(Loop *L) {
+ BasicBlock *Header = L->getHeader();
+
+ // Compute the set of predecessors of the loop that are not in the loop.
+ SmallVector<BasicBlock*, 8> OutsideBlocks;
+ for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
+ PI != PE; ++PI)
+ if (!L->contains(*PI)) // Coming in from outside the loop?
+ OutsideBlocks.push_back(*PI); // Keep track of it...
+
+ // Split out the loop pre-header.
+ BasicBlock *NewBB =
+ SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
+ ".preheader", this);
+
+
+ //===--------------------------------------------------------------------===//
+ // Update analysis results now that we have performed the transformation
+ //
+
+ // We know that we have loop information to update... update it now.
+ if (Loop *Parent = L->getParentLoop())
+ Parent->addBasicBlockToLoop(NewBB, LI->getBase());
+
+ // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+ // code layout too horribly.
+ PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L);
+}
+
+/// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit
+/// blocks. This method is used to split exit blocks that have predecessors
+/// outside of the loop.
+BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
+ SmallVector<BasicBlock*, 8> LoopBlocks;
+ for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I)
+ if (L->contains(*I))
+ LoopBlocks.push_back(*I);
+
+ assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
+ BasicBlock *NewBB = SplitBlockPredecessors(Exit, &LoopBlocks[0],
+ LoopBlocks.size(), ".loopexit",
+ this);
+
+ // Update Loop Information - we know that the new block will be in whichever
+ // loop the Exit block is in. Note that it may not be in that immediate loop,
+ // if the successor is some other loop header. In that case, we continue
+ // walking up the loop tree to find a loop that contains both the successor
+ // block and the predecessor block.
+ Loop *SuccLoop = LI->getLoopFor(Exit);
+ while (SuccLoop && !SuccLoop->contains(L->getHeader()))
+ SuccLoop = SuccLoop->getParentLoop();
+ if (SuccLoop)
+ SuccLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+
+ return NewBB;
+}
+
+/// AddBlockAndPredsToSet - Add the specified block, and all of its
+/// predecessors, to the specified set, if it's not already in there. Stop
+/// predecessor traversal when we reach StopBlock.
+static void AddBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
+ std::set<BasicBlock*> &Blocks) {
+ std::vector<BasicBlock *> WorkList;
+ WorkList.push_back(InputBB);
+ do {
+ BasicBlock *BB = WorkList.back(); WorkList.pop_back();
+ if (Blocks.insert(BB).second && BB != StopBlock)
+ // If BB is not already processed and it is not a stop block then
+ // insert its predecessor in the work list
+ for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
+ BasicBlock *WBB = *I;
+ WorkList.push_back(WBB);
+ }
+ } while(!WorkList.empty());
+}
+
+/// FindPHIToPartitionLoops - The first part of loop-nestification is to find a
+/// PHI node that tells us how to partition the loops.
+static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
+ AliasAnalysis *AA) {
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
+ PHINode *PN = cast<PHINode>(I);
+ ++I;
+ if (Value *V = PN->hasConstantValue())
+ if (!isa<Instruction>(V) || DT->dominates(cast<Instruction>(V), PN)) {
+ // This is a degenerate PHI already, don't modify it!
+ PN->replaceAllUsesWith(V);
+ if (AA) AA->deleteValue(PN);
+ PN->eraseFromParent();
+ continue;
+ }
+
+ // Scan this PHI node looking for a use of the PHI node by itself.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == PN &&
+ L->contains(PN->getIncomingBlock(i)))
+ // We found something tasty to remove.
+ return PN;
+ }
+ return 0;
+}
+
+// PlaceSplitBlockCarefully - If the block isn't already, move the new block to
+// right after some 'outside block' block. This prevents the preheader from
+// being placed inside the loop body, e.g. when the loop hasn't been rotated.
+void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
+ SmallVectorImpl<BasicBlock*> &SplitPreds,
+ Loop *L) {
+ // Check to see if NewBB is already well placed.
+ Function::iterator BBI = NewBB; --BBI;
+ for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+ if (&*BBI == SplitPreds[i])
+ return;
+ }
+
+ // If it isn't already after an outside block, move it after one. This is
+ // always good as it makes the uncond branch from the outside block into a
+ // fall-through.
+
+ // Figure out *which* outside block to put this after. Prefer an outside
+ // block that neighbors a BB actually in the loop.
+ BasicBlock *FoundBB = 0;
+ for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+ Function::iterator BBI = SplitPreds[i];
+ if (++BBI != NewBB->getParent()->end() &&
+ L->contains(BBI)) {
+ FoundBB = SplitPreds[i];
+ break;
+ }
+ }
+
+ // If our heuristic for a *good* bb to place this after doesn't find
+ // anything, just pick something. It's likely better than leaving it within
+ // the loop.
+ if (!FoundBB)
+ FoundBB = SplitPreds[0];
+ NewBB->moveAfter(FoundBB);
+}
+
+
+/// SeparateNestedLoop - If this loop has multiple backedges, try to pull one of
+/// them out into a nested loop. This is important for code that looks like
+/// this:
+///
+/// Loop:
+/// ...
+/// br cond, Loop, Next
+/// ...
+/// br cond2, Loop, Out
+///
+/// To identify this common case, we look at the PHI nodes in the header of the
+/// loop. PHI nodes with unchanging values on one backedge correspond to values
+/// that change in the "outer" loop, but not in the "inner" loop.
+///
+/// If we are able to separate out a loop, return the new outer loop that was
+/// created.
+///
+Loop *LoopSimplify::SeparateNestedLoop(Loop *L) {
+ PHINode *PN = FindPHIToPartitionLoops(L, DT, AA);
+ if (PN == 0) return 0; // No known way to partition.
+
+ // Pull out all predecessors that have varying values in the loop. This
+ // handles the case when a PHI node has multiple instances of itself as
+ // arguments.
+ SmallVector<BasicBlock*, 8> OuterLoopPreds;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) != PN ||
+ !L->contains(PN->getIncomingBlock(i)))
+ OuterLoopPreds.push_back(PN->getIncomingBlock(i));
+
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0],
+ OuterLoopPreds.size(),
+ ".outer", this);
+
+ // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+ // code layout too horribly.
+ PlaceSplitBlockCarefully(NewBB, OuterLoopPreds, L);
+
+ // Create the new outer loop.
+ Loop *NewOuter = new Loop();
+
+ // Change the parent loop to use the outer loop as its child now.
+ if (Loop *Parent = L->getParentLoop())
+ Parent->replaceChildLoopWith(L, NewOuter);
+ else
+ LI->changeTopLevelLoop(L, NewOuter);
+
+ // This block is going to be our new header block: add it to this loop and all
+ // parent loops.
+ NewOuter->addBasicBlockToLoop(NewBB, LI->getBase());
+
+ // L is now a subloop of our outer loop.
+ NewOuter->addChildLoop(L);
+
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I)
+ NewOuter->addBlockEntry(*I);
+
+ // Determine which blocks should stay in L and which should be moved out to
+ // the Outer loop now.
+ std::set<BasicBlock*> BlocksInL;
+ for (pred_iterator PI = pred_begin(Header), E = pred_end(Header); PI!=E; ++PI)
+ if (DT->dominates(Header, *PI))
+ AddBlockAndPredsToSet(*PI, Header, BlocksInL);
+
+
+ // Scan all of the loop children of L, moving them to OuterLoop if they are
+ // not part of the inner loop.
+ const std::vector<Loop*> &SubLoops = L->getSubLoops();
+ for (size_t I = 0; I != SubLoops.size(); )
+ if (BlocksInL.count(SubLoops[I]->getHeader()))
+ ++I; // Loop remains in L
+ else
+ NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I));
+
+ // Now that we know which blocks are in L and which need to be moved to
+ // OuterLoop, move any blocks that need it.
+ for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
+ BasicBlock *BB = L->getBlocks()[i];
+ if (!BlocksInL.count(BB)) {
+ // Move this block to the parent, updating the exit blocks sets
+ L->removeBlockFromLoop(BB);
+ if ((*LI)[BB] == L)
+ LI->changeLoopFor(BB, NewOuter);
+ --i;
+ }
+ }
+
+ return NewOuter;
+}
+
+
+
+/// InsertUniqueBackedgeBlock - This method is called when the specified loop
+/// has more than one backedge in it. If this occurs, revector all of these
+/// backedges to target a new basic block and have that block branch to the loop
+/// header. This ensures that loops have exactly one backedge.
+///
+void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L) {
+ assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
+
+ // Get information about the loop
+ BasicBlock *Preheader = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ Function *F = Header->getParent();
+
+ // Figure out which basic blocks contain back-edges to the loop header.
+ std::vector<BasicBlock*> BackedgeBlocks;
+ for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I)
+ if (*I != Preheader) BackedgeBlocks.push_back(*I);
+
+ // Create and insert the new backedge block...
+ BasicBlock *BEBlock = BasicBlock::Create(Header->getName()+".backedge", F);
+ BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
+
+ // Move the new backedge block to right after the last backedge block.
+ Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos;
+ F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock);
+
+ // Now that the block has been inserted into the function, create PHI nodes in
+ // the backedge block which correspond to any PHI nodes in the header block.
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".be",
+ BETerminator);
+ NewPN->reserveOperandSpace(BackedgeBlocks.size());
+ if (AA) AA->copyValue(PN, NewPN);
+
+ // Loop over the PHI node, moving all entries except the one for the
+ // preheader over to the new PHI node.
+ unsigned PreheaderIdx = ~0U;
+ bool HasUniqueIncomingValue = true;
+ Value *UniqueValue = 0;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *IBB = PN->getIncomingBlock(i);
+ Value *IV = PN->getIncomingValue(i);
+ if (IBB == Preheader) {
+ PreheaderIdx = i;
+ } else {
+ NewPN->addIncoming(IV, IBB);
+ if (HasUniqueIncomingValue) {
+ if (UniqueValue == 0)
+ UniqueValue = IV;
+ else if (UniqueValue != IV)
+ HasUniqueIncomingValue = false;
+ }
+ }
+ }
+
+ // Delete all of the incoming values from the old PN except the preheader's
+ assert(PreheaderIdx != ~0U && "PHI has no preheader entry??");
+ if (PreheaderIdx != 0) {
+ PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx));
+ PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx));
+ }
+ // Nuke all entries except the zero'th.
+ for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i)
+ PN->removeIncomingValue(e-i, false);
+
+ // Finally, add the newly constructed PHI node as the entry for the BEBlock.
+ PN->addIncoming(NewPN, BEBlock);
+
+ // As an optimization, if all incoming values in the new PhiNode (which is a
+ // subset of the incoming values of the old PHI node) have the same value,
+ // eliminate the PHI Node.
+ if (HasUniqueIncomingValue) {
+ NewPN->replaceAllUsesWith(UniqueValue);
+ if (AA) AA->deleteValue(NewPN);
+ BEBlock->getInstList().erase(NewPN);
+ }
+ }
+
+ // Now that all of the PHI nodes have been inserted and adjusted, modify the
+ // backedge blocks to just to the BEBlock instead of the header.
+ for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) {
+ TerminatorInst *TI = BackedgeBlocks[i]->getTerminator();
+ for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op)
+ if (TI->getSuccessor(Op) == Header)
+ TI->setSuccessor(Op, BEBlock);
+ }
+
+ //===--- Update all analyses which we must preserve now -----------------===//
+
+ // Update Loop Information - we know that this block is now in the current
+ // loop and all parent loops.
+ L->addBasicBlockToLoop(BEBlock, LI->getBase());
+
+ // Update dominator information
+ DT->splitBlock(BEBlock);
+ if (DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>())
+ DF->splitBlock(BEBlock);
+}
diff --git a/lib/Transforms/Utils/LowerAllocations.cpp b/lib/Transforms/Utils/LowerAllocations.cpp
new file mode 100644
index 0000000..3249895
--- /dev/null
+++ b/lib/Transforms/Utils/LowerAllocations.cpp
@@ -0,0 +1,177 @@
+//===- LowerAllocations.cpp - Reduce malloc & free insts to calls ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LowerAllocations transformation is a target-dependent tranformation
+// because it depends on the size of data types and alignment constraints.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lowerallocs"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Constants.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/Compiler.h"
+using namespace llvm;
+
+STATISTIC(NumLowered, "Number of allocations lowered");
+
+namespace {
+ /// LowerAllocations - Turn malloc and free instructions into %malloc and
+ /// %free calls.
+ ///
+ class VISIBILITY_HIDDEN LowerAllocations : public BasicBlockPass {
+ Constant *MallocFunc; // Functions in the module we are processing
+ Constant *FreeFunc; // Initialized by doInitialization
+ bool LowerMallocArgToInteger;
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ explicit LowerAllocations(bool LowerToInt = false)
+ : BasicBlockPass(&ID), MallocFunc(0), FreeFunc(0),
+ LowerMallocArgToInteger(LowerToInt) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetData>();
+ AU.setPreservesCFG();
+
+ // This is a cluster of orthogonal Transforms:
+ AU.addPreserved<UnifyFunctionExitNodes>();
+ AU.addPreservedID(PromoteMemoryToRegisterID);
+ AU.addPreservedID(LowerSwitchID);
+ AU.addPreservedID(LowerInvokePassID);
+ }
+
+ /// doPassInitialization - For the lower allocations pass, this ensures that
+ /// a module contains a declaration for a malloc and a free function.
+ ///
+ bool doInitialization(Module &M);
+
+ virtual bool doInitialization(Function &F) {
+ return doInitialization(*F.getParent());
+ }
+
+ /// runOnBasicBlock - This method does the actual work of converting
+ /// instructions over, assuming that the pass has already been initialized.
+ ///
+ bool runOnBasicBlock(BasicBlock &BB);
+ };
+}
+
+char LowerAllocations::ID = 0;
+static RegisterPass<LowerAllocations>
+X("lowerallocs", "Lower allocations from instructions to calls");
+
+// Publically exposed interface to pass...
+const PassInfo *const llvm::LowerAllocationsID = &X;
+// createLowerAllocationsPass - Interface to this file...
+Pass *llvm::createLowerAllocationsPass(bool LowerMallocArgToInteger) {
+ return new LowerAllocations(LowerMallocArgToInteger);
+}
+
+
+// doInitialization - For the lower allocations pass, this ensures that a
+// module contains a declaration for a malloc and a free function.
+//
+// This function is always successful.
+//
+bool LowerAllocations::doInitialization(Module &M) {
+ const Type *BPTy = PointerType::getUnqual(Type::Int8Ty);
+ // Prototype malloc as "char* malloc(...)", because we don't know in
+ // doInitialization whether size_t is int or long.
+ FunctionType *FT = FunctionType::get(BPTy, std::vector<const Type*>(), true);
+ MallocFunc = M.getOrInsertFunction("malloc", FT);
+ FreeFunc = M.getOrInsertFunction("free" , Type::VoidTy, BPTy, (Type *)0);
+ return true;
+}
+
+// runOnBasicBlock - This method does the actual work of converting
+// instructions over, assuming that the pass has already been initialized.
+//
+bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) {
+ bool Changed = false;
+ assert(MallocFunc && FreeFunc && "Pass not initialized!");
+
+ BasicBlock::InstListType &BBIL = BB.getInstList();
+
+ const TargetData &TD = getAnalysis<TargetData>();
+ const Type *IntPtrTy = TD.getIntPtrType();
+
+ // Loop over all of the instructions, looking for malloc or free instructions
+ for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
+ if (MallocInst *MI = dyn_cast<MallocInst>(I)) {
+ const Type *AllocTy = MI->getType()->getElementType();
+
+ // malloc(type) becomes sbyte *malloc(size)
+ Value *MallocArg;
+ if (LowerMallocArgToInteger)
+ MallocArg = ConstantInt::get(Type::Int64Ty,
+ TD.getTypeAllocSize(AllocTy));
+ else
+ MallocArg = ConstantExpr::getSizeOf(AllocTy);
+ MallocArg = ConstantExpr::getTruncOrBitCast(cast<Constant>(MallocArg),
+ IntPtrTy);
+
+ if (MI->isArrayAllocation()) {
+ if (isa<ConstantInt>(MallocArg) &&
+ cast<ConstantInt>(MallocArg)->isOne()) {
+ MallocArg = MI->getOperand(0); // Operand * 1 = Operand
+ } else if (Constant *CO = dyn_cast<Constant>(MI->getOperand(0))) {
+ CO = ConstantExpr::getIntegerCast(CO, IntPtrTy, false /*ZExt*/);
+ MallocArg = ConstantExpr::getMul(CO, cast<Constant>(MallocArg));
+ } else {
+ Value *Scale = MI->getOperand(0);
+ if (Scale->getType() != IntPtrTy)
+ Scale = CastInst::CreateIntegerCast(Scale, IntPtrTy, false /*ZExt*/,
+ "", I);
+
+ // Multiply it by the array size if necessary...
+ MallocArg = BinaryOperator::Create(Instruction::Mul, Scale,
+ MallocArg, "", I);
+ }
+ }
+
+ // Create the call to Malloc.
+ CallInst *MCall = CallInst::Create(MallocFunc, MallocArg, "", I);
+ MCall->setTailCall();
+
+ // Create a cast instruction to convert to the right type...
+ Value *MCast;
+ if (MCall->getType() != Type::VoidTy)
+ MCast = new BitCastInst(MCall, MI->getType(), "", I);
+ else
+ MCast = Constant::getNullValue(MI->getType());
+
+ // Replace all uses of the old malloc inst with the cast inst
+ MI->replaceAllUsesWith(MCast);
+ I = --BBIL.erase(I); // remove and delete the malloc instr...
+ Changed = true;
+ ++NumLowered;
+ } else if (FreeInst *FI = dyn_cast<FreeInst>(I)) {
+ Value *PtrCast =
+ new BitCastInst(FI->getOperand(0),
+ PointerType::getUnqual(Type::Int8Ty), "", I);
+
+ // Insert a call to the free function...
+ CallInst::Create(FreeFunc, PtrCast, "", I)->setTailCall();
+
+ // Delete the old free instruction
+ I = --BBIL.erase(I);
+ Changed = true;
+ ++NumLowered;
+ }
+ }
+
+ return Changed;
+}
+
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
new file mode 100644
index 0000000..1f6b1a2
--- /dev/null
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -0,0 +1,614 @@
+//===- LowerInvoke.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which do not yet
+// support stack unwinding. This pass supports two models of exception handling
+// lowering, the 'cheap' support and the 'expensive' support.
+//
+// 'Cheap' exception handling support gives the program the ability to execute
+// any program which does not "throw an exception", by turning 'invoke'
+// instructions into calls and by turning 'unwind' instructions into calls to
+// abort(). If the program does dynamically use the unwind instruction, the
+// program will print a message then abort.
+//
+// 'Expensive' exception handling support gives the full exception handling
+// support to the program at the cost of making the 'invoke' instruction
+// really expensive. It basically inserts setjmp/longjmp calls to emulate the
+// exception handling as necessary.
+//
+// Because the 'expensive' support slows down programs a lot, and EH is only
+// used for a subset of the programs, it must be specifically enabled by an
+// option.
+//
+// Note that after this pass runs the CFG is not entirely accurate (exceptional
+// control flow edges are not correct anymore) so only very simple things should
+// be done after the lowerinvoke pass has run (like generation of native code).
+// This should not be used as a general purpose "my LLVM-to-LLVM pass doesn't
+// support the invoke instruction yet" lowering pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lowerinvoke"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetLowering.h"
+#include <csetjmp>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumInvokes, "Number of invokes replaced");
+STATISTIC(NumUnwinds, "Number of unwinds replaced");
+STATISTIC(NumSpilled, "Number of registers live across unwind edges");
+
+static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support",
+ cl::desc("Make the -lowerinvoke pass insert expensive, but correct, EH code"));
+
+namespace {
+ class VISIBILITY_HIDDEN LowerInvoke : public FunctionPass {
+ // Used for both models.
+ Constant *WriteFn;
+ Constant *AbortFn;
+ Value *AbortMessage;
+ unsigned AbortMessageLength;
+
+ // Used for expensive EH support.
+ const Type *JBLinkTy;
+ GlobalVariable *JBListHead;
+ Constant *SetJmpFn, *LongJmpFn;
+
+ // We peek in TLI to grab the target's jmp_buf size and alignment
+ const TargetLowering *TLI;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit LowerInvoke(const TargetLowering *tli = NULL)
+ : FunctionPass(&ID), TLI(tli) { }
+ bool doInitialization(Module &M);
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ // This is a cluster of orthogonal Transforms
+ AU.addPreservedID(PromoteMemoryToRegisterID);
+ AU.addPreservedID(LowerSwitchID);
+ AU.addPreservedID(LowerAllocationsID);
+ }
+
+ private:
+ void createAbortMessage(Module *M);
+ void writeAbortMessage(Instruction *IB);
+ bool insertCheapEHSupport(Function &F);
+ void splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes);
+ void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
+ AllocaInst *InvokeNum, SwitchInst *CatchSwitch);
+ bool insertExpensiveEHSupport(Function &F);
+ };
+}
+
+char LowerInvoke::ID = 0;
+static RegisterPass<LowerInvoke>
+X("lowerinvoke", "Lower invoke and unwind, for unwindless code generators");
+
+const PassInfo *const llvm::LowerInvokePassID = &X;
+
+// Public Interface To the LowerInvoke pass.
+FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {
+ return new LowerInvoke(TLI);
+}
+
+// doInitialization - Make sure that there is a prototype for abort in the
+// current module.
+bool LowerInvoke::doInitialization(Module &M) {
+ const Type *VoidPtrTy = PointerType::getUnqual(Type::Int8Ty);
+ AbortMessage = 0;
+ if (ExpensiveEHSupport) {
+ // Insert a type for the linked list of jump buffers.
+ unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
+ JBSize = JBSize ? JBSize : 200;
+ const Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
+
+ { // The type is recursive, so use a type holder.
+ std::vector<const Type*> Elements;
+ Elements.push_back(JmpBufTy);
+ OpaqueType *OT = OpaqueType::get();
+ Elements.push_back(PointerType::getUnqual(OT));
+ PATypeHolder JBLType(StructType::get(Elements));
+ OT->refineAbstractTypeTo(JBLType.get()); // Complete the cycle.
+ JBLinkTy = JBLType.get();
+ M.addTypeName("llvm.sjljeh.jmpbufty", JBLinkTy);
+ }
+
+ const Type *PtrJBList = PointerType::getUnqual(JBLinkTy);
+
+ // Now that we've done that, insert the jmpbuf list head global, unless it
+ // already exists.
+ if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList))) {
+ JBListHead = new GlobalVariable(PtrJBList, false,
+ GlobalValue::LinkOnceAnyLinkage,
+ Constant::getNullValue(PtrJBList),
+ "llvm.sjljeh.jblist", &M);
+ }
+
+// VisualStudio defines setjmp as _setjmp via #include <csetjmp> / <setjmp.h>,
+// so it looks like Intrinsic::_setjmp
+#if defined(_MSC_VER) && defined(setjmp)
+#define setjmp_undefined_for_visual_studio
+#undef setjmp
+#endif
+
+ SetJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::setjmp);
+
+#if defined(_MSC_VER) && defined(setjmp_undefined_for_visual_studio)
+// let's return it to _setjmp state in case anyone ever needs it after this
+// point under VisualStudio
+#define setjmp _setjmp
+#endif
+
+ LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp);
+ }
+
+ // We need the 'write' and 'abort' functions for both models.
+ AbortFn = M.getOrInsertFunction("abort", Type::VoidTy, (Type *)0);
+#if 0 // "write" is Unix-specific.. code is going away soon anyway.
+ WriteFn = M.getOrInsertFunction("write", Type::VoidTy, Type::Int32Ty,
+ VoidPtrTy, Type::Int32Ty, (Type *)0);
+#else
+ WriteFn = 0;
+#endif
+ return true;
+}
+
+void LowerInvoke::createAbortMessage(Module *M) {
+ if (ExpensiveEHSupport) {
+ // The abort message for expensive EH support tells the user that the
+ // program 'unwound' without an 'invoke' instruction.
+ Constant *Msg =
+ ConstantArray::get("ERROR: Exception thrown, but not caught!\n");
+ AbortMessageLength = Msg->getNumOperands()-1; // don't include \0
+
+ GlobalVariable *MsgGV = new GlobalVariable(Msg->getType(), true,
+ GlobalValue::InternalLinkage,
+ Msg, "abortmsg", M);
+ std::vector<Constant*> GEPIdx(2, Constant::getNullValue(Type::Int32Ty));
+ AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2);
+ } else {
+ // The abort message for cheap EH support tells the user that EH is not
+ // enabled.
+ Constant *Msg =
+ ConstantArray::get("Exception handler needed, but not enabled. Recompile"
+ " program with -enable-correct-eh-support.\n");
+ AbortMessageLength = Msg->getNumOperands()-1; // don't include \0
+
+ GlobalVariable *MsgGV = new GlobalVariable(Msg->getType(), true,
+ GlobalValue::InternalLinkage,
+ Msg, "abortmsg", M);
+ std::vector<Constant*> GEPIdx(2, Constant::getNullValue(Type::Int32Ty));
+ AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2);
+ }
+}
+
+
+void LowerInvoke::writeAbortMessage(Instruction *IB) {
+#if 0
+ if (AbortMessage == 0)
+ createAbortMessage(IB->getParent()->getParent()->getParent());
+
+ // These are the arguments we WANT...
+ Value* Args[3];
+ Args[0] = ConstantInt::get(Type::Int32Ty, 2);
+ Args[1] = AbortMessage;
+ Args[2] = ConstantInt::get(Type::Int32Ty, AbortMessageLength);
+ (new CallInst(WriteFn, Args, 3, "", IB))->setTailCall();
+#endif
+}
+
+bool LowerInvoke::insertCheapEHSupport(Function &F) {
+ bool Changed = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ std::vector<Value*> CallArgs(II->op_begin()+3, II->op_end());
+ // Insert a normal call instruction...
+ CallInst *NewCall = CallInst::Create(II->getCalledValue(),
+ CallArgs.begin(), CallArgs.end(), "",II);
+ NewCall->takeName(II);
+ NewCall->setCallingConv(II->getCallingConv());
+ NewCall->setAttributes(II->getAttributes());
+ II->replaceAllUsesWith(NewCall);
+
+ // Insert an unconditional branch to the normal destination.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Remove any PHI node entries from the exception destination.
+ II->getUnwindDest()->removePredecessor(BB);
+
+ // Remove the invoke instruction now.
+ BB->getInstList().erase(II);
+
+ ++NumInvokes; Changed = true;
+ } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+ // Insert a new call to write(2, AbortMessage, AbortMessageLength);
+ writeAbortMessage(UI);
+
+ // Insert a call to abort()
+ CallInst::Create(AbortFn, "", UI)->setTailCall();
+
+ // Insert a return instruction. This really should be a "barrier", as it
+ // is unreachable.
+ ReturnInst::Create(F.getReturnType() == Type::VoidTy ? 0 :
+ Constant::getNullValue(F.getReturnType()), UI);
+
+ // Remove the unwind instruction now.
+ BB->getInstList().erase(UI);
+
+ ++NumUnwinds; Changed = true;
+ }
+ return Changed;
+}
+
+/// rewriteExpensiveInvoke - Insert code and hack the function to replace the
+/// specified invoke instruction with a call.
+void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
+ AllocaInst *InvokeNum,
+ SwitchInst *CatchSwitch) {
+ ConstantInt *InvokeNoC = ConstantInt::get(Type::Int32Ty, InvokeNo);
+
+ // If the unwind edge has phi nodes, split the edge.
+ if (isa<PHINode>(II->getUnwindDest()->begin())) {
+ SplitCriticalEdge(II, 1, this);
+
+ // If there are any phi nodes left, they must have a single predecessor.
+ while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) {
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ PN->eraseFromParent();
+ }
+ }
+
+ // Insert a store of the invoke num before the invoke and store zero into the
+ // location afterward.
+ new StoreInst(InvokeNoC, InvokeNum, true, II); // volatile
+
+ BasicBlock::iterator NI = II->getNormalDest()->getFirstNonPHI();
+ // nonvolatile.
+ new StoreInst(Constant::getNullValue(Type::Int32Ty), InvokeNum, false, NI);
+
+ // Add a switch case to our unwind block.
+ CatchSwitch->addCase(InvokeNoC, II->getUnwindDest());
+
+ // Insert a normal call instruction.
+ std::vector<Value*> CallArgs(II->op_begin()+3, II->op_end());
+ CallInst *NewCall = CallInst::Create(II->getCalledValue(),
+ CallArgs.begin(), CallArgs.end(), "",
+ II);
+ NewCall->takeName(II);
+ NewCall->setCallingConv(II->getCallingConv());
+ NewCall->setAttributes(II->getAttributes());
+ II->replaceAllUsesWith(NewCall);
+
+ // Replace the invoke with an uncond branch.
+ BranchInst::Create(II->getNormalDest(), NewCall->getParent());
+ II->eraseFromParent();
+}
+
+/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
+/// we reach blocks we've already seen.
+static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
+ if (!LiveBBs.insert(BB).second) return; // already been here.
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ MarkBlocksLiveIn(*PI, LiveBBs);
+}
+
+// First thing we need to do is scan the whole function for values that are
+// live across unwind edges. Each value that is live across an unwind edge
+// we spill into a stack location, guaranteeing that there is nothing live
+// across the unwind edge. This process also splits all critical edges
+// coming out of invoke's.
+void LowerInvoke::
+splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes) {
+ // First step, split all critical edges from invoke instructions.
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ InvokeInst *II = Invokes[i];
+ SplitCriticalEdge(II, 0, this);
+ SplitCriticalEdge(II, 1, this);
+ assert(!isa<PHINode>(II->getNormalDest()) &&
+ !isa<PHINode>(II->getUnwindDest()) &&
+ "critical edge splitting left single entry phi nodes?");
+ }
+
+ Function *F = Invokes.back()->getParent()->getParent();
+
+ // To avoid having to handle incoming arguments specially, we lower each arg
+ // to a copy instruction in the entry block. This ensures that the argument
+ // value itself cannot be live across the entry block.
+ BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
+ while (isa<AllocaInst>(AfterAllocaInsertPt) &&
+ isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
+ ++AfterAllocaInsertPt;
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI) {
+ // This is always a no-op cast because we're casting AI to AI->getType() so
+ // src and destination types are identical. BitCast is the only possibility.
+ CastInst *NC = new BitCastInst(
+ AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
+ AI->replaceAllUsesWith(NC);
+ // Normally its is forbidden to replace a CastInst's operand because it
+ // could cause the opcode to reflect an illegal conversion. However, we're
+ // replacing it here with the same value it was constructed with to simply
+ // make NC its user.
+ NC->setOperand(0, AI);
+ }
+
+ // Finally, scan the code looking for instructions with bad live ranges.
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+ // Ignore obvious cases we don't have to handle. In particular, most
+ // instructions either have no uses or only have a single use inside the
+ // current block. Ignore them quickly.
+ Instruction *Inst = II;
+ if (Inst->use_empty()) continue;
+ if (Inst->hasOneUse() &&
+ cast<Instruction>(Inst->use_back())->getParent() == BB &&
+ !isa<PHINode>(Inst->use_back())) continue;
+
+ // If this is an alloca in the entry block, it's not a real register
+ // value.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
+ if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
+ continue;
+
+ // Avoid iterator invalidation by copying users to a temporary vector.
+ std::vector<Instruction*> Users;
+ for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (User->getParent() != BB || isa<PHINode>(User))
+ Users.push_back(User);
+ }
+
+ // Scan all of the uses and see if the live range is live across an unwind
+ // edge. If we find a use live across an invoke edge, create an alloca
+ // and spill the value.
+ std::set<InvokeInst*> InvokesWithStoreInserted;
+
+ // Find all of the blocks that this value is live in.
+ std::set<BasicBlock*> LiveBBs;
+ LiveBBs.insert(Inst->getParent());
+ while (!Users.empty()) {
+ Instruction *U = Users.back();
+ Users.pop_back();
+
+ if (!isa<PHINode>(U)) {
+ MarkBlocksLiveIn(U->getParent(), LiveBBs);
+ } else {
+ // Uses for a PHI node occur in their predecessor block.
+ PHINode *PN = cast<PHINode>(U);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == Inst)
+ MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
+ }
+ }
+
+ // Now that we know all of the blocks that this thing is live in, see if
+ // it includes any of the unwind locations.
+ bool NeedsSpill = false;
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+ if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+ NeedsSpill = true;
+ }
+ }
+
+ // If we decided we need a spill, do it.
+ if (NeedsSpill) {
+ ++NumSpilled;
+ DemoteRegToStack(*Inst, true);
+ }
+ }
+}
+
+bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
+ std::vector<ReturnInst*> Returns;
+ std::vector<UnwindInst*> Unwinds;
+ std::vector<InvokeInst*> Invokes;
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ // Remember all return instructions in case we insert an invoke into this
+ // function.
+ Returns.push_back(RI);
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ Invokes.push_back(II);
+ } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+ Unwinds.push_back(UI);
+ }
+
+ if (Unwinds.empty() && Invokes.empty()) return false;
+
+ NumInvokes += Invokes.size();
+ NumUnwinds += Unwinds.size();
+
+ // TODO: This is not an optimal way to do this. In particular, this always
+ // inserts setjmp calls into the entries of functions with invoke instructions
+ // even though there are possibly paths through the function that do not
+ // execute any invokes. In particular, for functions with early exits, e.g.
+ // the 'addMove' method in hexxagon, it would be nice to not have to do the
+ // setjmp stuff on the early exit path. This requires a bit of dataflow, but
+ // would not be too hard to do.
+
+ // If we have an invoke instruction, insert a setjmp that dominates all
+ // invokes. After the setjmp, use a cond branch that goes to the original
+ // code path on zero, and to a designated 'catch' block of nonzero.
+ Value *OldJmpBufPtr = 0;
+ if (!Invokes.empty()) {
+ // First thing we need to do is scan the whole function for values that are
+ // live across unwind edges. Each value that is live across an unwind edge
+ // we spill into a stack location, guaranteeing that there is nothing live
+ // across the unwind edge. This process also splits all critical edges
+ // coming out of invoke's.
+ splitLiveRangesLiveAcrossInvokes(Invokes);
+
+ BasicBlock *EntryBB = F.begin();
+
+ // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+ // that needs to be restored on all exits from the function. This is an
+ // alloca because the value needs to be live across invokes.
+ unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0;
+ AllocaInst *JmpBuf =
+ new AllocaInst(JBLinkTy, 0, Align, "jblink", F.begin()->begin());
+
+ std::vector<Value*> Idx;
+ Idx.push_back(Constant::getNullValue(Type::Int32Ty));
+ Idx.push_back(ConstantInt::get(Type::Int32Ty, 1));
+ OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(),
+ "OldBuf", EntryBB->getTerminator());
+
+ // Copy the JBListHead to the alloca.
+ Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true,
+ EntryBB->getTerminator());
+ new StoreInst(OldBuf, OldJmpBufPtr, true, EntryBB->getTerminator());
+
+ // Add the new jumpbuf to the list.
+ new StoreInst(JmpBuf, JBListHead, true, EntryBB->getTerminator());
+
+ // Create the catch block. The catch block is basically a big switch
+ // statement that goes to all of the invoke catch blocks.
+ BasicBlock *CatchBB = BasicBlock::Create("setjmp.catch", &F);
+
+ // Create an alloca which keeps track of which invoke is currently
+ // executing. For normal calls it contains zero.
+ AllocaInst *InvokeNum = new AllocaInst(Type::Int32Ty, 0, "invokenum",
+ EntryBB->begin());
+ new StoreInst(ConstantInt::get(Type::Int32Ty, 0), InvokeNum, true,
+ EntryBB->getTerminator());
+
+ // Insert a load in the Catch block, and a switch on its value. By default,
+ // we go to a block that just does an unwind (which is the correct action
+ // for a standard call).
+ BasicBlock *UnwindBB = BasicBlock::Create("unwindbb", &F);
+ Unwinds.push_back(new UnwindInst(UnwindBB));
+
+ Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB);
+ SwitchInst *CatchSwitch =
+ SwitchInst::Create(CatchLoad, UnwindBB, Invokes.size(), CatchBB);
+
+ // Now that things are set up, insert the setjmp call itself.
+
+ // Split the entry block to insert the conditional branch for the setjmp.
+ BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
+ "setjmp.cont");
+
+ Idx[1] = ConstantInt::get(Type::Int32Ty, 0);
+ Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(),
+ "TheJmpBuf",
+ EntryBB->getTerminator());
+ JmpBufPtr = new BitCastInst(JmpBufPtr, PointerType::getUnqual(Type::Int8Ty),
+ "tmp", EntryBB->getTerminator());
+ Value *SJRet = CallInst::Create(SetJmpFn, JmpBufPtr, "sjret",
+ EntryBB->getTerminator());
+
+ // Compare the return value to zero.
+ Value *IsNormal = new ICmpInst(ICmpInst::ICMP_EQ, SJRet,
+ Constant::getNullValue(SJRet->getType()),
+ "notunwind", EntryBB->getTerminator());
+ // Nuke the uncond branch.
+ EntryBB->getTerminator()->eraseFromParent();
+
+ // Put in a new condbranch in its place.
+ BranchInst::Create(ContBlock, CatchBB, IsNormal, EntryBB);
+
+ // At this point, we are all set up, rewrite each invoke instruction.
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
+ rewriteExpensiveInvoke(Invokes[i], i+1, InvokeNum, CatchSwitch);
+ }
+
+ // We know that there is at least one unwind.
+
+ // Create three new blocks, the block to load the jmpbuf ptr and compare
+ // against null, the block to do the longjmp, and the error block for if it
+ // is null. Add them at the end of the function because they are not hot.
+ BasicBlock *UnwindHandler = BasicBlock::Create("dounwind", &F);
+ BasicBlock *UnwindBlock = BasicBlock::Create("unwind", &F);
+ BasicBlock *TermBlock = BasicBlock::Create("unwinderror", &F);
+
+ // If this function contains an invoke, restore the old jumpbuf ptr.
+ Value *BufPtr;
+ if (OldJmpBufPtr) {
+ // Before the return, insert a copy from the saved value to the new value.
+ BufPtr = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", UnwindHandler);
+ new StoreInst(BufPtr, JBListHead, UnwindHandler);
+ } else {
+ BufPtr = new LoadInst(JBListHead, "ehlist", UnwindHandler);
+ }
+
+ // Load the JBList, if it's null, then there was no catch!
+ Value *NotNull = new ICmpInst(ICmpInst::ICMP_NE, BufPtr,
+ Constant::getNullValue(BufPtr->getType()),
+ "notnull", UnwindHandler);
+ BranchInst::Create(UnwindBlock, TermBlock, NotNull, UnwindHandler);
+
+ // Create the block to do the longjmp.
+ // Get a pointer to the jmpbuf and longjmp.
+ std::vector<Value*> Idx;
+ Idx.push_back(Constant::getNullValue(Type::Int32Ty));
+ Idx.push_back(ConstantInt::get(Type::Int32Ty, 0));
+ Idx[0] = GetElementPtrInst::Create(BufPtr, Idx.begin(), Idx.end(), "JmpBuf",
+ UnwindBlock);
+ Idx[0] = new BitCastInst(Idx[0], PointerType::getUnqual(Type::Int8Ty),
+ "tmp", UnwindBlock);
+ Idx[1] = ConstantInt::get(Type::Int32Ty, 1);
+ CallInst::Create(LongJmpFn, Idx.begin(), Idx.end(), "", UnwindBlock);
+ new UnreachableInst(UnwindBlock);
+
+ // Set up the term block ("throw without a catch").
+ new UnreachableInst(TermBlock);
+
+ // Insert a new call to write(2, AbortMessage, AbortMessageLength);
+ writeAbortMessage(TermBlock->getTerminator());
+
+ // Insert a call to abort()
+ CallInst::Create(AbortFn, "",
+ TermBlock->getTerminator())->setTailCall();
+
+
+ // Replace all unwinds with a branch to the unwind handler.
+ for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
+ BranchInst::Create(UnwindHandler, Unwinds[i]);
+ Unwinds[i]->eraseFromParent();
+ }
+
+ // Finally, for any returns from this function, if this function contains an
+ // invoke, restore the old jmpbuf pointer to its input value.
+ if (OldJmpBufPtr) {
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ ReturnInst *R = Returns[i];
+
+ // Before the return, insert a copy from the saved value to the new value.
+ Value *OldBuf = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", true, R);
+ new StoreInst(OldBuf, JBListHead, true, R);
+ }
+ }
+
+ return true;
+}
+
+bool LowerInvoke::runOnFunction(Function &F) {
+ if (ExpensiveEHSupport)
+ return insertExpensiveEHSupport(F);
+ else
+ return insertCheapEHSupport(F);
+}
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
new file mode 100644
index 0000000..1da5936
--- /dev/null
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -0,0 +1,323 @@
+//===- LowerSwitch.cpp - Eliminate Switch instructions --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LowerSwitch transformation rewrites switch instructions with a sequence
+// of branches, which allows targets to get away with not implementing the
+// switch instruction until it is convenient.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+ /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch
+ /// instructions. Note that this cannot be a BasicBlock pass because it
+ /// modifies the CFG!
+ class VISIBILITY_HIDDEN LowerSwitch : public FunctionPass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ LowerSwitch() : FunctionPass(&ID) {}
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ // This is a cluster of orthogonal Transforms
+ AU.addPreserved<UnifyFunctionExitNodes>();
+ AU.addPreservedID(PromoteMemoryToRegisterID);
+ AU.addPreservedID(LowerInvokePassID);
+ AU.addPreservedID(LowerAllocationsID);
+ }
+
+ struct CaseRange {
+ Constant* Low;
+ Constant* High;
+ BasicBlock* BB;
+
+ CaseRange() : Low(0), High(0), BB(0) { }
+ CaseRange(Constant* low, Constant* high, BasicBlock* bb) :
+ Low(low), High(high), BB(bb) { }
+ };
+
+ typedef std::vector<CaseRange> CaseVector;
+ typedef std::vector<CaseRange>::iterator CaseItr;
+ private:
+ void processSwitchInst(SwitchInst *SI);
+
+ BasicBlock* switchConvert(CaseItr Begin, CaseItr End, Value* Val,
+ BasicBlock* OrigBlock, BasicBlock* Default);
+ BasicBlock* newLeafBlock(CaseRange& Leaf, Value* Val,
+ BasicBlock* OrigBlock, BasicBlock* Default);
+ unsigned Clusterify(CaseVector& Cases, SwitchInst *SI);
+ };
+
+ /// The comparison function for sorting the switch case values in the vector.
+ /// WARNING: Case ranges should be disjoint!
+ struct CaseCmp {
+ bool operator () (const LowerSwitch::CaseRange& C1,
+ const LowerSwitch::CaseRange& C2) {
+
+ const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+ const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+ return CI1->getValue().slt(CI2->getValue());
+ }
+ };
+}
+
+char LowerSwitch::ID = 0;
+static RegisterPass<LowerSwitch>
+X("lowerswitch", "Lower SwitchInst's to branches");
+
+// Publically exposed interface to pass...
+const PassInfo *const llvm::LowerSwitchID = &X;
+// createLowerSwitchPass - Interface to this file...
+FunctionPass *llvm::createLowerSwitchPass() {
+ return new LowerSwitch();
+}
+
+bool LowerSwitch::runOnFunction(Function &F) {
+ bool Changed = false;
+
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+ BasicBlock *Cur = I++; // Advance over block so we don't traverse new blocks
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) {
+ Changed = true;
+ processSwitchInst(SI);
+ }
+ }
+
+ return Changed;
+}
+
+// operator<< - Used for debugging purposes.
+//
+static std::ostream& operator<<(std::ostream &O,
+ const LowerSwitch::CaseVector &C) {
+ O << "[";
+
+ for (LowerSwitch::CaseVector::const_iterator B = C.begin(),
+ E = C.end(); B != E; ) {
+ O << *B->Low << " -" << *B->High;
+ if (++B != E) O << ", ";
+ }
+
+ return O << "]";
+}
+
+static OStream& operator<<(OStream &O, const LowerSwitch::CaseVector &C) {
+ if (O.stream()) *O.stream() << C;
+ return O;
+}
+
+// switchConvert - Convert the switch statement into a binary lookup of
+// the case values. The function recursively builds this tree.
+//
+BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
+ Value* Val, BasicBlock* OrigBlock,
+ BasicBlock* Default)
+{
+ unsigned Size = End - Begin;
+
+ if (Size == 1)
+ return newLeafBlock(*Begin, Val, OrigBlock, Default);
+
+ unsigned Mid = Size / 2;
+ std::vector<CaseRange> LHS(Begin, Begin + Mid);
+ DOUT << "LHS: " << LHS << "\n";
+ std::vector<CaseRange> RHS(Begin + Mid, End);
+ DOUT << "RHS: " << RHS << "\n";
+
+ CaseRange& Pivot = *(Begin + Mid);
+ DEBUG(errs() << "Pivot ==> "
+ << cast<ConstantInt>(Pivot.Low)->getValue() << " -"
+ << cast<ConstantInt>(Pivot.High)->getValue() << "\n");
+
+ BasicBlock* LBranch = switchConvert(LHS.begin(), LHS.end(), Val,
+ OrigBlock, Default);
+ BasicBlock* RBranch = switchConvert(RHS.begin(), RHS.end(), Val,
+ OrigBlock, Default);
+
+ // Create a new node that checks if the value is < pivot. Go to the
+ // left branch if it is and right branch if not.
+ Function* F = OrigBlock->getParent();
+ BasicBlock* NewNode = BasicBlock::Create("NodeBlock");
+ Function::iterator FI = OrigBlock;
+ F->getBasicBlockList().insert(++FI, NewNode);
+
+ ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, Val, Pivot.Low, "Pivot");
+ NewNode->getInstList().push_back(Comp);
+ BranchInst::Create(LBranch, RBranch, Comp, NewNode);
+ return NewNode;
+}
+
+// newLeafBlock - Create a new leaf block for the binary lookup tree. It
+// checks if the switch's value == the case's value. If not, then it
+// jumps to the default branch. At this point in the tree, the value
+// can't be another valid case value, so the jump to the "default" branch
+// is warranted.
+//
+BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
+ BasicBlock* OrigBlock,
+ BasicBlock* Default)
+{
+ Function* F = OrigBlock->getParent();
+ BasicBlock* NewLeaf = BasicBlock::Create("LeafBlock");
+ Function::iterator FI = OrigBlock;
+ F->getBasicBlockList().insert(++FI, NewLeaf);
+
+ // Emit comparison
+ ICmpInst* Comp = NULL;
+ if (Leaf.Low == Leaf.High) {
+ // Make the seteq instruction...
+ Comp = new ICmpInst(ICmpInst::ICMP_EQ, Val, Leaf.Low,
+ "SwitchLeaf", NewLeaf);
+ } else {
+ // Make range comparison
+ if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) {
+ // Val >= Min && Val <= Hi --> Val <= Hi
+ Comp = new ICmpInst(ICmpInst::ICMP_SLE, Val, Leaf.High,
+ "SwitchLeaf", NewLeaf);
+ } else if (cast<ConstantInt>(Leaf.Low)->isZero()) {
+ // Val >= 0 && Val <= Hi --> Val <=u Hi
+ Comp = new ICmpInst(ICmpInst::ICMP_ULE, Val, Leaf.High,
+ "SwitchLeaf", NewLeaf);
+ } else {
+ // Emit V-Lo <=u Hi-Lo
+ Constant* NegLo = ConstantExpr::getNeg(Leaf.Low);
+ Instruction* Add = BinaryOperator::CreateAdd(Val, NegLo,
+ Val->getName()+".off",
+ NewLeaf);
+ Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High);
+ Comp = new ICmpInst(ICmpInst::ICMP_ULE, Add, UpperBound,
+ "SwitchLeaf", NewLeaf);
+ }
+ }
+
+ // Make the conditional branch...
+ BasicBlock* Succ = Leaf.BB;
+ BranchInst::Create(Succ, Default, Comp, NewLeaf);
+
+ // If there were any PHI nodes in this successor, rewrite one entry
+ // from OrigBlock to come from NewLeaf.
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode* PN = cast<PHINode>(I);
+ // Remove all but one incoming entries from the cluster
+ uint64_t Range = cast<ConstantInt>(Leaf.High)->getSExtValue() -
+ cast<ConstantInt>(Leaf.Low)->getSExtValue();
+ for (uint64_t j = 0; j < Range; ++j) {
+ PN->removeIncomingValue(OrigBlock);
+ }
+
+ int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
+ assert(BlockIdx != -1 && "Switch didn't go to this successor??");
+ PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf);
+ }
+
+ return NewLeaf;
+}
+
+// Clusterify - Transform simple list of Cases into list of CaseRange's
+unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
+ unsigned numCmps = 0;
+
+ // Start with "simple" cases
+ for (unsigned i = 1; i < SI->getNumSuccessors(); ++i)
+ Cases.push_back(CaseRange(SI->getSuccessorValue(i),
+ SI->getSuccessorValue(i),
+ SI->getSuccessor(i)));
+ std::sort(Cases.begin(), Cases.end(), CaseCmp());
+
+ // Merge case into clusters
+ if (Cases.size()>=2)
+ for (CaseItr I=Cases.begin(), J=next(Cases.begin()); J!=Cases.end(); ) {
+ int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
+ int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
+ BasicBlock* nextBB = J->BB;
+ BasicBlock* currentBB = I->BB;
+
+ // If the two neighboring cases go to the same destination, merge them
+ // into a single case.
+ if ((nextValue-currentValue==1) && (currentBB == nextBB)) {
+ I->High = J->High;
+ J = Cases.erase(J);
+ } else {
+ I = J++;
+ }
+ }
+
+ for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+ if (I->Low != I->High)
+ // A range counts double, since it requires two compares.
+ ++numCmps;
+ }
+
+ return numCmps;
+}
+
+// processSwitchInst - Replace the specified switch instruction with a sequence
+// of chained if-then insts in a balanced binary search.
+//
+void LowerSwitch::processSwitchInst(SwitchInst *SI) {
+ BasicBlock *CurBlock = SI->getParent();
+ BasicBlock *OrigBlock = CurBlock;
+ Function *F = CurBlock->getParent();
+ Value *Val = SI->getOperand(0); // The value we are switching on...
+ BasicBlock* Default = SI->getDefaultDest();
+
+ // If there is only the default destination, don't bother with the code below.
+ if (SI->getNumOperands() == 2) {
+ BranchInst::Create(SI->getDefaultDest(), CurBlock);
+ CurBlock->getInstList().erase(SI);
+ return;
+ }
+
+ // Create a new, empty default block so that the new hierarchy of
+ // if-then statements go to this and the PHI nodes are happy.
+ BasicBlock* NewDefault = BasicBlock::Create("NewDefault");
+ F->getBasicBlockList().insert(Default, NewDefault);
+
+ BranchInst::Create(Default, NewDefault);
+
+ // If there is an entry in any PHI nodes for the default edge, make sure
+ // to update them as well.
+ for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
+ assert(BlockIdx != -1 && "Switch didn't go to this successor??");
+ PN->setIncomingBlock((unsigned)BlockIdx, NewDefault);
+ }
+
+ // Prepare cases vector.
+ CaseVector Cases;
+ unsigned numCmps = Clusterify(Cases, SI);
+
+ DOUT << "Clusterify finished. Total clusters: " << Cases.size()
+ << ". Total compares: " << numCmps << "\n";
+ DOUT << "Cases: " << Cases << "\n";
+
+ BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val,
+ OrigBlock, NewDefault);
+
+ // Branch to our shiny new if-then stuff...
+ BranchInst::Create(SwitchBlock, OrigBlock);
+
+ // We are now done with the switch instruction, delete it.
+ CurBlock->getInstList().erase(SI);
+}
diff --git a/lib/Transforms/Utils/Makefile b/lib/Transforms/Utils/Makefile
new file mode 100644
index 0000000..d1e9336
--- /dev/null
+++ b/lib/Transforms/Utils/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Transforms/Utils/Makefile -----------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMTransformUtils
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
new file mode 100644
index 0000000..2b06d77
--- /dev/null
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -0,0 +1,92 @@
+//===- Mem2Reg.cpp - The -mem2reg pass, a wrapper around the Utils lib ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is a simple pass wrapper around the PromoteMemToReg function call
+// exposed by the Utils library.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mem2reg"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+using namespace llvm;
+
+STATISTIC(NumPromoted, "Number of alloca's promoted");
+
+namespace {
+ struct VISIBILITY_HIDDEN PromotePass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ PromotePass() : FunctionPass(&ID) {}
+
+ // runOnFunction - To run this pass, first we calculate the alloca
+ // instructions that are safe for promotion, then we promote each one.
+ //
+ virtual bool runOnFunction(Function &F);
+
+ // getAnalysisUsage - We need dominance frontiers
+ //
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<DominanceFrontier>();
+ AU.setPreservesCFG();
+ // This is a cluster of orthogonal Transforms
+ AU.addPreserved<UnifyFunctionExitNodes>();
+ AU.addPreservedID(LowerSwitchID);
+ AU.addPreservedID(LowerInvokePassID);
+ AU.addPreservedID(LowerAllocationsID);
+ }
+ };
+} // end of anonymous namespace
+
+char PromotePass::ID = 0;
+static RegisterPass<PromotePass> X("mem2reg", "Promote Memory to Register");
+
+bool PromotePass::runOnFunction(Function &F) {
+ std::vector<AllocaInst*> Allocas;
+
+ BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
+
+ bool Changed = false;
+
+ DominatorTree &DT = getAnalysis<DominatorTree>();
+ DominanceFrontier &DF = getAnalysis<DominanceFrontier>();
+
+ while (1) {
+ Allocas.clear();
+
+ // Find allocas that are safe to promote, by looking at all instructions in
+ // the entry node
+ for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
+ if (isAllocaPromotable(AI))
+ Allocas.push_back(AI);
+
+ if (Allocas.empty()) break;
+
+ PromoteMemToReg(Allocas, DT, DF);
+ NumPromoted += Allocas.size();
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+// Publically exposed interface to pass...
+const PassInfo *const llvm::PromoteMemoryToRegisterID = &X;
+// createPromoteMemoryToRegister - Provide an entry point to create this pass.
+//
+FunctionPass *llvm::createPromoteMemoryToRegisterPass() {
+ return new PromotePass();
+}
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
new file mode 100644
index 0000000..b717699
--- /dev/null
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -0,0 +1,1003 @@
+//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file promotes memory references to be register references. It promotes
+// alloca instructions which only have loads and stores as uses. An alloca is
+// transformed by using dominator frontiers to place PHI nodes, then traversing
+// the function in depth-first order to rewrite loads and stores as appropriate.
+// This is just the standard SSA construction algorithm to construct "pruned"
+// SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mem2reg"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
+STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store");
+STATISTIC(NumDeadAlloca, "Number of dead alloca's removed");
+STATISTIC(NumPHIInsert, "Number of PHI nodes inserted");
+
+// Provide DenseMapInfo for all pointers.
+namespace llvm {
+template<>
+struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > {
+ typedef std::pair<BasicBlock*, unsigned> EltTy;
+ static inline EltTy getEmptyKey() {
+ return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U);
+ }
+ static inline EltTy getTombstoneKey() {
+ return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U);
+ }
+ static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) {
+ return DenseMapInfo<void*>::getHashValue(Val.first) + Val.second*2;
+ }
+ static bool isEqual(const EltTy &LHS, const EltTy &RHS) {
+ return LHS == RHS;
+ }
+ static bool isPod() { return true; }
+};
+}
+
+/// isAllocaPromotable - Return true if this alloca is legal for promotion.
+/// This is true if there are only loads and stores to the alloca.
+///
+bool llvm::isAllocaPromotable(const AllocaInst *AI) {
+ // FIXME: If the memory unit is of pointer or integer type, we can permit
+ // assignments to subsections of the memory unit.
+
+ // Only allow direct and non-volatile loads and stores...
+ for (Value::use_const_iterator UI = AI->use_begin(), UE = AI->use_end();
+ UI != UE; ++UI) // Loop over all of the uses of the alloca
+ if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+ if (LI->isVolatile())
+ return false;
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
+ if (SI->getOperand(0) == AI)
+ return false; // Don't allow a store OF the AI, only INTO the AI.
+ if (SI->isVolatile())
+ return false;
+ } else if (const BitCastInst *BC = dyn_cast<BitCastInst>(*UI)) {
+ // A bitcast that does not feed into debug info inhibits promotion.
+ if (!BC->hasOneUse() || !isa<DbgInfoIntrinsic>(*BC->use_begin()))
+ return false;
+ // If the only use is by debug info, this alloca will not exist in
+ // non-debug code, so don't try to promote; this ensures the same
+ // codegen with debug info. Otherwise, debug info should not
+ // inhibit promotion (but we must examine other uses).
+ if (AI->hasOneUse())
+ return false;
+ } else {
+ return false;
+ }
+
+ return true;
+}
+
+namespace {
+ struct AllocaInfo;
+
+ // Data package used by RenamePass()
+ class VISIBILITY_HIDDEN RenamePassData {
+ public:
+ typedef std::vector<Value *> ValVector;
+
+ RenamePassData() {}
+ RenamePassData(BasicBlock *B, BasicBlock *P,
+ const ValVector &V) : BB(B), Pred(P), Values(V) {}
+ BasicBlock *BB;
+ BasicBlock *Pred;
+ ValVector Values;
+
+ void swap(RenamePassData &RHS) {
+ std::swap(BB, RHS.BB);
+ std::swap(Pred, RHS.Pred);
+ Values.swap(RHS.Values);
+ }
+ };
+
+ /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of
+ /// load/store instructions in the block that directly load or store an alloca.
+ ///
+ /// This functionality is important because it avoids scanning large basic
+ /// blocks multiple times when promoting many allocas in the same block.
+ class VISIBILITY_HIDDEN LargeBlockInfo {
+ /// InstNumbers - For each instruction that we track, keep the index of the
+ /// instruction. The index starts out as the number of the instruction from
+ /// the start of the block.
+ DenseMap<const Instruction *, unsigned> InstNumbers;
+ public:
+
+ /// isInterestingInstruction - This code only looks at accesses to allocas.
+ static bool isInterestingInstruction(const Instruction *I) {
+ return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
+ (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
+ }
+
+ /// getInstructionIndex - Get or calculate the index of the specified
+ /// instruction.
+ unsigned getInstructionIndex(const Instruction *I) {
+ assert(isInterestingInstruction(I) &&
+ "Not a load/store to/from an alloca?");
+
+ // If we already have this instruction number, return it.
+ DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
+ if (It != InstNumbers.end()) return It->second;
+
+ // Scan the whole block to get the instruction. This accumulates
+ // information for every interesting instruction in the block, in order to
+ // avoid gratuitus rescans.
+ const BasicBlock *BB = I->getParent();
+ unsigned InstNo = 0;
+ for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end();
+ BBI != E; ++BBI)
+ if (isInterestingInstruction(BBI))
+ InstNumbers[BBI] = InstNo++;
+ It = InstNumbers.find(I);
+
+ assert(It != InstNumbers.end() && "Didn't insert instruction?");
+ return It->second;
+ }
+
+ void deleteValue(const Instruction *I) {
+ InstNumbers.erase(I);
+ }
+
+ void clear() {
+ InstNumbers.clear();
+ }
+ };
+
+ struct VISIBILITY_HIDDEN PromoteMem2Reg {
+ /// Allocas - The alloca instructions being promoted.
+ ///
+ std::vector<AllocaInst*> Allocas;
+ DominatorTree &DT;
+ DominanceFrontier &DF;
+
+ /// AST - An AliasSetTracker object to update. If null, don't update it.
+ ///
+ AliasSetTracker *AST;
+
+ /// AllocaLookup - Reverse mapping of Allocas.
+ ///
+ std::map<AllocaInst*, unsigned> AllocaLookup;
+
+ /// NewPhiNodes - The PhiNodes we're adding.
+ ///
+ DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes;
+
+ /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas
+ /// it corresponds to.
+ DenseMap<PHINode*, unsigned> PhiToAllocaMap;
+
+ /// PointerAllocaValues - If we are updating an AliasSetTracker, then for
+ /// each alloca that is of pointer type, we keep track of what to copyValue
+ /// to the inserted PHI nodes here.
+ ///
+ std::vector<Value*> PointerAllocaValues;
+
+ /// Visited - The set of basic blocks the renamer has already visited.
+ ///
+ SmallPtrSet<BasicBlock*, 16> Visited;
+
+ /// BBNumbers - Contains a stable numbering of basic blocks to avoid
+ /// non-determinstic behavior.
+ DenseMap<BasicBlock*, unsigned> BBNumbers;
+
+ /// BBNumPreds - Lazily compute the number of predecessors a block has.
+ DenseMap<const BasicBlock*, unsigned> BBNumPreds;
+ public:
+ PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
+ DominanceFrontier &df, AliasSetTracker *ast)
+ : Allocas(A), DT(dt), DF(df), AST(ast) {}
+
+ void run();
+
+ /// properlyDominates - Return true if I1 properly dominates I2.
+ ///
+ bool properlyDominates(Instruction *I1, Instruction *I2) const {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(I1))
+ I1 = II->getNormalDest()->begin();
+ return DT.properlyDominates(I1->getParent(), I2->getParent());
+ }
+
+ /// dominates - Return true if BB1 dominates BB2 using the DominatorTree.
+ ///
+ bool dominates(BasicBlock *BB1, BasicBlock *BB2) const {
+ return DT.dominates(BB1, BB2);
+ }
+
+ private:
+ void RemoveFromAllocasList(unsigned &AllocaIdx) {
+ Allocas[AllocaIdx] = Allocas.back();
+ Allocas.pop_back();
+ --AllocaIdx;
+ }
+
+ unsigned getNumPreds(const BasicBlock *BB) {
+ unsigned &NP = BBNumPreds[BB];
+ if (NP == 0)
+ NP = std::distance(pred_begin(BB), pred_end(BB))+1;
+ return NP-1;
+ }
+
+ void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
+ AllocaInfo &Info);
+ void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
+ const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
+ SmallPtrSet<BasicBlock*, 32> &LiveInBlocks);
+
+ void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
+ LargeBlockInfo &LBI);
+ void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
+ LargeBlockInfo &LBI);
+
+
+ void RenamePass(BasicBlock *BB, BasicBlock *Pred,
+ RenamePassData::ValVector &IncVals,
+ std::vector<RenamePassData> &Worklist);
+ bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version,
+ SmallPtrSet<PHINode*, 16> &InsertedPHINodes);
+ };
+
+ struct AllocaInfo {
+ std::vector<BasicBlock*> DefiningBlocks;
+ std::vector<BasicBlock*> UsingBlocks;
+
+ StoreInst *OnlyStore;
+ BasicBlock *OnlyBlock;
+ bool OnlyUsedInOneBlock;
+
+ Value *AllocaPointerVal;
+
+ void clear() {
+ DefiningBlocks.clear();
+ UsingBlocks.clear();
+ OnlyStore = 0;
+ OnlyBlock = 0;
+ OnlyUsedInOneBlock = true;
+ AllocaPointerVal = 0;
+ }
+
+ /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our
+ /// ivars.
+ void AnalyzeAlloca(AllocaInst *AI) {
+ clear();
+
+ // As we scan the uses of the alloca instruction, keep track of stores,
+ // and decide whether all of the loads and stores to the alloca are within
+ // the same basic block.
+ for (Value::use_iterator U = AI->use_begin(), E = AI->use_end();
+ U != E;) {
+ Instruction *User = cast<Instruction>(*U);
+ ++U;
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+ // Remove any uses of this alloca in DbgInfoInstrinsics.
+ assert(BC->hasOneUse() && "Unexpected alloca uses!");
+ DbgInfoIntrinsic *DI = cast<DbgInfoIntrinsic>(*BC->use_begin());
+ DI->eraseFromParent();
+ BC->eraseFromParent();
+ continue;
+ }
+ else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Remember the basic blocks which define new values for the alloca
+ DefiningBlocks.push_back(SI->getParent());
+ AllocaPointerVal = SI->getOperand(0);
+ OnlyStore = SI;
+ } else {
+ LoadInst *LI = cast<LoadInst>(User);
+ // Otherwise it must be a load instruction, keep track of variable
+ // reads.
+ UsingBlocks.push_back(LI->getParent());
+ AllocaPointerVal = LI;
+ }
+
+ if (OnlyUsedInOneBlock) {
+ if (OnlyBlock == 0)
+ OnlyBlock = User->getParent();
+ else if (OnlyBlock != User->getParent())
+ OnlyUsedInOneBlock = false;
+ }
+ }
+ }
+ };
+} // end of anonymous namespace
+
+
+void PromoteMem2Reg::run() {
+ Function &F = *DF.getRoot()->getParent();
+
+ if (AST) PointerAllocaValues.resize(Allocas.size());
+
+ AllocaInfo Info;
+ LargeBlockInfo LBI;
+
+ for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
+ AllocaInst *AI = Allocas[AllocaNum];
+
+ assert(isAllocaPromotable(AI) &&
+ "Cannot promote non-promotable alloca!");
+ assert(AI->getParent()->getParent() == &F &&
+ "All allocas should be in the same function, which is same as DF!");
+
+ if (AI->use_empty()) {
+ // If there are no uses of the alloca, just delete it now.
+ if (AST) AST->deleteValue(AI);
+ AI->eraseFromParent();
+
+ // Remove the alloca from the Allocas list, since it has been processed
+ RemoveFromAllocasList(AllocaNum);
+ ++NumDeadAlloca;
+ continue;
+ }
+
+ // Calculate the set of read and write-locations for each alloca. This is
+ // analogous to finding the 'uses' and 'definitions' of each variable.
+ Info.AnalyzeAlloca(AI);
+
+ // If there is only a single store to this value, replace any loads of
+ // it that are directly dominated by the definition with the value stored.
+ if (Info.DefiningBlocks.size() == 1) {
+ RewriteSingleStoreAlloca(AI, Info, LBI);
+
+ // Finally, after the scan, check to see if the store is all that is left.
+ if (Info.UsingBlocks.empty()) {
+ // Remove the (now dead) store and alloca.
+ Info.OnlyStore->eraseFromParent();
+ LBI.deleteValue(Info.OnlyStore);
+
+ if (AST) AST->deleteValue(AI);
+ AI->eraseFromParent();
+ LBI.deleteValue(AI);
+
+ // The alloca has been processed, move on.
+ RemoveFromAllocasList(AllocaNum);
+
+ ++NumSingleStore;
+ continue;
+ }
+ }
+
+ // If the alloca is only read and written in one basic block, just perform a
+ // linear sweep over the block to eliminate it.
+ if (Info.OnlyUsedInOneBlock) {
+ PromoteSingleBlockAlloca(AI, Info, LBI);
+
+ // Finally, after the scan, check to see if the stores are all that is
+ // left.
+ if (Info.UsingBlocks.empty()) {
+
+ // Remove the (now dead) stores and alloca.
+ while (!AI->use_empty()) {
+ StoreInst *SI = cast<StoreInst>(AI->use_back());
+ SI->eraseFromParent();
+ LBI.deleteValue(SI);
+ }
+
+ if (AST) AST->deleteValue(AI);
+ AI->eraseFromParent();
+ LBI.deleteValue(AI);
+
+ // The alloca has been processed, move on.
+ RemoveFromAllocasList(AllocaNum);
+
+ ++NumLocalPromoted;
+ continue;
+ }
+ }
+
+ // If we haven't computed a numbering for the BB's in the function, do so
+ // now.
+ if (BBNumbers.empty()) {
+ unsigned ID = 0;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ BBNumbers[I] = ID++;
+ }
+
+ // If we have an AST to keep updated, remember some pointer value that is
+ // stored into the alloca.
+ if (AST)
+ PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal;
+
+ // Keep the reverse mapping of the 'Allocas' array for the rename pass.
+ AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
+
+ // At this point, we're committed to promoting the alloca using IDF's, and
+ // the standard SSA construction algorithm. Determine which blocks need PHI
+ // nodes and see if we can optimize out some work by avoiding insertion of
+ // dead phi nodes.
+ DetermineInsertionPoint(AI, AllocaNum, Info);
+ }
+
+ if (Allocas.empty())
+ return; // All of the allocas must have been trivial!
+
+ LBI.clear();
+
+
+ // Set the incoming values for the basic block to be null values for all of
+ // the alloca's. We do this in case there is a load of a value that has not
+ // been stored yet. In this case, it will get this null value.
+ //
+ RenamePassData::ValVector Values(Allocas.size());
+ for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
+ Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
+
+ // Walks all basic blocks in the function performing the SSA rename algorithm
+ // and inserting the phi nodes we marked as necessary
+ //
+ std::vector<RenamePassData> RenamePassWorkList;
+ RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values));
+ while (!RenamePassWorkList.empty()) {
+ RenamePassData RPD;
+ RPD.swap(RenamePassWorkList.back());
+ RenamePassWorkList.pop_back();
+ // RenamePass may add new worklist entries.
+ RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
+ }
+
+ // The renamer uses the Visited set to avoid infinite loops. Clear it now.
+ Visited.clear();
+
+ // Remove the allocas themselves from the function.
+ for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
+ Instruction *A = Allocas[i];
+
+ // If there are any uses of the alloca instructions left, they must be in
+ // sections of dead code that were not processed on the dominance frontier.
+ // Just delete the users now.
+ //
+ if (!A->use_empty())
+ A->replaceAllUsesWith(UndefValue::get(A->getType()));
+ if (AST) AST->deleteValue(A);
+ A->eraseFromParent();
+ }
+
+
+ // Loop over all of the PHI nodes and see if there are any that we can get
+ // rid of because they merge all of the same incoming values. This can
+ // happen due to undef values coming into the PHI nodes. This process is
+ // iterative, because eliminating one PHI node can cause others to be removed.
+ bool EliminatedAPHI = true;
+ while (EliminatedAPHI) {
+ EliminatedAPHI = false;
+
+ for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
+ NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) {
+ PHINode *PN = I->second;
+
+ // If this PHI node merges one value and/or undefs, get the value.
+ if (Value *V = PN->hasConstantValue(true)) {
+ if (!isa<Instruction>(V) ||
+ properlyDominates(cast<Instruction>(V), PN)) {
+ if (AST && isa<PointerType>(PN->getType()))
+ AST->deleteValue(PN);
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ NewPhiNodes.erase(I++);
+ EliminatedAPHI = true;
+ continue;
+ }
+ }
+ ++I;
+ }
+ }
+
+ // At this point, the renamer has added entries to PHI nodes for all reachable
+ // code. Unfortunately, there may be unreachable blocks which the renamer
+ // hasn't traversed. If this is the case, the PHI nodes may not
+ // have incoming values for all predecessors. Loop over all PHI nodes we have
+ // created, inserting undef values if they are missing any incoming values.
+ //
+ for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
+ NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) {
+ // We want to do this once per basic block. As such, only process a block
+ // when we find the PHI that is the first entry in the block.
+ PHINode *SomePHI = I->second;
+ BasicBlock *BB = SomePHI->getParent();
+ if (&BB->front() != SomePHI)
+ continue;
+
+ // Only do work here if there the PHI nodes are missing incoming values. We
+ // know that all PHI nodes that were inserted in a block will have the same
+ // number of incoming values, so we can just check any of them.
+ if (SomePHI->getNumIncomingValues() == getNumPreds(BB))
+ continue;
+
+ // Get the preds for BB.
+ SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
+
+ // Ok, now we know that all of the PHI nodes are missing entries for some
+ // basic blocks. Start by sorting the incoming predecessors for efficient
+ // access.
+ std::sort(Preds.begin(), Preds.end());
+
+ // Now we loop through all BB's which have entries in SomePHI and remove
+ // them from the Preds list.
+ for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
+ // Do a log(n) search of the Preds list for the entry we want.
+ SmallVector<BasicBlock*, 16>::iterator EntIt =
+ std::lower_bound(Preds.begin(), Preds.end(),
+ SomePHI->getIncomingBlock(i));
+ assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&&
+ "PHI node has entry for a block which is not a predecessor!");
+
+ // Remove the entry
+ Preds.erase(EntIt);
+ }
+
+ // At this point, the blocks left in the preds list must have dummy
+ // entries inserted into every PHI nodes for the block. Update all the phi
+ // nodes in this block that we are inserting (there could be phis before
+ // mem2reg runs).
+ unsigned NumBadPreds = SomePHI->getNumIncomingValues();
+ BasicBlock::iterator BBI = BB->begin();
+ while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
+ SomePHI->getNumIncomingValues() == NumBadPreds) {
+ Value *UndefVal = UndefValue::get(SomePHI->getType());
+ for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred)
+ SomePHI->addIncoming(UndefVal, Preds[pred]);
+ }
+ }
+
+ NewPhiNodes.clear();
+}
+
+
+/// ComputeLiveInBlocks - Determine which blocks the value is live in. These
+/// are blocks which lead to uses. Knowing this allows us to avoid inserting
+/// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes
+/// would be dead).
+void PromoteMem2Reg::
+ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
+ const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
+ SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) {
+
+ // To determine liveness, we must iterate through the predecessors of blocks
+ // where the def is live. Blocks are added to the worklist if we need to
+ // check their predecessors. Start with all the using blocks.
+ SmallVector<BasicBlock*, 64> LiveInBlockWorklist;
+ LiveInBlockWorklist.insert(LiveInBlockWorklist.end(),
+ Info.UsingBlocks.begin(), Info.UsingBlocks.end());
+
+ // If any of the using blocks is also a definition block, check to see if the
+ // definition occurs before or after the use. If it happens before the use,
+ // the value isn't really live-in.
+ for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
+ BasicBlock *BB = LiveInBlockWorklist[i];
+ if (!DefBlocks.count(BB)) continue;
+
+ // Okay, this is a block that both uses and defines the value. If the first
+ // reference to the alloca is a def (store), then we know it isn't live-in.
+ for (BasicBlock::iterator I = BB->begin(); ; ++I) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (SI->getOperand(1) != AI) continue;
+
+ // We found a store to the alloca before a load. The alloca is not
+ // actually live-in here.
+ LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
+ LiveInBlockWorklist.pop_back();
+ --i, --e;
+ break;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (LI->getOperand(0) != AI) continue;
+
+ // Okay, we found a load before a store to the alloca. It is actually
+ // live into this block.
+ break;
+ }
+ }
+ }
+
+ // Now that we have a set of blocks where the phi is live-in, recursively add
+ // their predecessors until we find the full region the value is live.
+ while (!LiveInBlockWorklist.empty()) {
+ BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
+
+ // The block really is live in here, insert it into the set. If already in
+ // the set, then it has already been processed.
+ if (!LiveInBlocks.insert(BB))
+ continue;
+
+ // Since the value is live into BB, it is either defined in a predecessor or
+ // live into it to. Add the preds to the worklist unless they are a
+ // defining block.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+
+ // The value is not live into a predecessor if it defines the value.
+ if (DefBlocks.count(P))
+ continue;
+
+ // Otherwise it is, add to the worklist.
+ LiveInBlockWorklist.push_back(P);
+ }
+ }
+}
+
+/// DetermineInsertionPoint - At this point, we're committed to promoting the
+/// alloca using IDF's, and the standard SSA construction algorithm. Determine
+/// which blocks need phi nodes and see if we can optimize out some work by
+/// avoiding insertion of dead phi nodes.
+void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
+ AllocaInfo &Info) {
+
+ // Unique the set of defining blocks for efficient lookup.
+ SmallPtrSet<BasicBlock*, 32> DefBlocks;
+ DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
+
+ // Determine which blocks the value is live in. These are blocks which lead
+ // to uses.
+ SmallPtrSet<BasicBlock*, 32> LiveInBlocks;
+ ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
+
+ // Compute the locations where PhiNodes need to be inserted. Look at the
+ // dominance frontier of EACH basic-block we have a write in.
+ unsigned CurrentVersion = 0;
+ SmallPtrSet<PHINode*, 16> InsertedPHINodes;
+ std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks;
+ while (!Info.DefiningBlocks.empty()) {
+ BasicBlock *BB = Info.DefiningBlocks.back();
+ Info.DefiningBlocks.pop_back();
+
+ // Look up the DF for this write, add it to defining blocks.
+ DominanceFrontier::const_iterator it = DF.find(BB);
+ if (it == DF.end()) continue;
+
+ const DominanceFrontier::DomSetType &S = it->second;
+
+ // In theory we don't need the indirection through the DFBlocks vector.
+ // In practice, the order of calling QueuePhiNode would depend on the
+ // (unspecified) ordering of basic blocks in the dominance frontier,
+ // which would give PHI nodes non-determinstic subscripts. Fix this by
+ // processing blocks in order of the occurance in the function.
+ for (DominanceFrontier::DomSetType::const_iterator P = S.begin(),
+ PE = S.end(); P != PE; ++P) {
+ // If the frontier block is not in the live-in set for the alloca, don't
+ // bother processing it.
+ if (!LiveInBlocks.count(*P))
+ continue;
+
+ DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P));
+ }
+
+ // Sort by which the block ordering in the function.
+ if (DFBlocks.size() > 1)
+ std::sort(DFBlocks.begin(), DFBlocks.end());
+
+ for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) {
+ BasicBlock *BB = DFBlocks[i].second;
+ if (QueuePhiNode(BB, AllocaNum, CurrentVersion, InsertedPHINodes))
+ Info.DefiningBlocks.push_back(BB);
+ }
+ DFBlocks.clear();
+ }
+}
+
+/// RewriteSingleStoreAlloca - If there is only a single store to this value,
+/// replace any loads of it that are directly dominated by the definition with
+/// the value stored.
+void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI,
+ AllocaInfo &Info,
+ LargeBlockInfo &LBI) {
+ StoreInst *OnlyStore = Info.OnlyStore;
+ bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
+ BasicBlock *StoreBB = OnlyStore->getParent();
+ int StoreIndex = -1;
+
+ // Clear out UsingBlocks. We will reconstruct it here if needed.
+ Info.UsingBlocks.clear();
+
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) {
+ Instruction *UserInst = cast<Instruction>(*UI++);
+ if (!isa<LoadInst>(UserInst)) {
+ assert(UserInst == OnlyStore && "Should only have load/stores");
+ continue;
+ }
+ LoadInst *LI = cast<LoadInst>(UserInst);
+
+ // Okay, if we have a load from the alloca, we want to replace it with the
+ // only value stored to the alloca. We can do this if the value is
+ // dominated by the store. If not, we use the rest of the mem2reg machinery
+ // to insert the phi nodes as needed.
+ if (!StoringGlobalVal) { // Non-instructions are always dominated.
+ if (LI->getParent() == StoreBB) {
+ // If we have a use that is in the same block as the store, compare the
+ // indices of the two instructions to see which one came first. If the
+ // load came before the store, we can't handle it.
+ if (StoreIndex == -1)
+ StoreIndex = LBI.getInstructionIndex(OnlyStore);
+
+ if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
+ // Can't handle this load, bail out.
+ Info.UsingBlocks.push_back(StoreBB);
+ continue;
+ }
+
+ } else if (LI->getParent() != StoreBB &&
+ !dominates(StoreBB, LI->getParent())) {
+ // If the load and store are in different blocks, use BB dominance to
+ // check their relationships. If the store doesn't dom the use, bail
+ // out.
+ Info.UsingBlocks.push_back(LI->getParent());
+ continue;
+ }
+ }
+
+ // Otherwise, we *can* safely rewrite this load.
+ LI->replaceAllUsesWith(OnlyStore->getOperand(0));
+ if (AST && isa<PointerType>(LI->getType()))
+ AST->deleteValue(LI);
+ LI->eraseFromParent();
+ LBI.deleteValue(LI);
+ }
+}
+
+
+/// StoreIndexSearchPredicate - This is a helper predicate used to search by the
+/// first element of a pair.
+struct StoreIndexSearchPredicate {
+ bool operator()(const std::pair<unsigned, StoreInst*> &LHS,
+ const std::pair<unsigned, StoreInst*> &RHS) {
+ return LHS.first < RHS.first;
+ }
+};
+
+/// PromoteSingleBlockAlloca - Many allocas are only used within a single basic
+/// block. If this is the case, avoid traversing the CFG and inserting a lot of
+/// potentially useless PHI nodes by just performing a single linear pass over
+/// the basic block using the Alloca.
+///
+/// If we cannot promote this alloca (because it is read before it is written),
+/// return true. This is necessary in cases where, due to control flow, the
+/// alloca is potentially undefined on some control flow paths. e.g. code like
+/// this is potentially correct:
+///
+/// for (...) { if (c) { A = undef; undef = B; } }
+///
+/// ... so long as A is not used before undef is set.
+///
+void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
+ LargeBlockInfo &LBI) {
+ // The trickiest case to handle is when we have large blocks. Because of this,
+ // this code is optimized assuming that large blocks happen. This does not
+ // significantly pessimize the small block case. This uses LargeBlockInfo to
+ // make it efficient to get the index of various operations in the block.
+
+ // Clear out UsingBlocks. We will reconstruct it here if needed.
+ Info.UsingBlocks.clear();
+
+ // Walk the use-def list of the alloca, getting the locations of all stores.
+ typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy;
+ StoresByIndexTy StoresByIndex;
+
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E; ++UI)
+ if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+ StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
+
+ // If there are no stores to the alloca, just replace any loads with undef.
+ if (StoresByIndex.empty()) {
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;)
+ if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) {
+ LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
+ if (AST && isa<PointerType>(LI->getType()))
+ AST->deleteValue(LI);
+ LBI.deleteValue(LI);
+ LI->eraseFromParent();
+ }
+ return;
+ }
+
+ // Sort the stores by their index, making it efficient to do a lookup with a
+ // binary search.
+ std::sort(StoresByIndex.begin(), StoresByIndex.end());
+
+ // Walk all of the loads from this alloca, replacing them with the nearest
+ // store above them, if any.
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI++);
+ if (!LI) continue;
+
+ unsigned LoadIdx = LBI.getInstructionIndex(LI);
+
+ // Find the nearest store that has a lower than this load.
+ StoresByIndexTy::iterator I =
+ std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
+ std::pair<unsigned, StoreInst*>(LoadIdx, 0),
+ StoreIndexSearchPredicate());
+
+ // If there is no store before this load, then we can't promote this load.
+ if (I == StoresByIndex.begin()) {
+ // Can't handle this load, bail out.
+ Info.UsingBlocks.push_back(LI->getParent());
+ continue;
+ }
+
+ // Otherwise, there was a store before this load, the load takes its value.
+ --I;
+ LI->replaceAllUsesWith(I->second->getOperand(0));
+ if (AST && isa<PointerType>(LI->getType()))
+ AST->deleteValue(LI);
+ LI->eraseFromParent();
+ LBI.deleteValue(LI);
+ }
+}
+
+
+// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific
+// Alloca returns true if there wasn't already a phi-node for that variable
+//
+bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
+ unsigned &Version,
+ SmallPtrSet<PHINode*, 16> &InsertedPHINodes) {
+ // Look up the basic-block in question.
+ PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)];
+
+ // If the BB already has a phi node added for the i'th alloca then we're done!
+ if (PN) return false;
+
+ // Create a PhiNode using the dereferenced type... and add the phi-node to the
+ // BasicBlock.
+ PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(),
+ Allocas[AllocaNo]->getName() + "." +
+ utostr(Version++), BB->begin());
+ ++NumPHIInsert;
+ PhiToAllocaMap[PN] = AllocaNo;
+ PN->reserveOperandSpace(getNumPreds(BB));
+
+ InsertedPHINodes.insert(PN);
+
+ if (AST && isa<PointerType>(PN->getType()))
+ AST->copyValue(PointerAllocaValues[AllocaNo], PN);
+
+ return true;
+}
+
+// RenamePass - Recursively traverse the CFG of the function, renaming loads and
+// stores to the allocas which we are promoting. IncomingVals indicates what
+// value each Alloca contains on exit from the predecessor block Pred.
+//
+void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
+ RenamePassData::ValVector &IncomingVals,
+ std::vector<RenamePassData> &Worklist) {
+NextIteration:
+ // If we are inserting any phi nodes into this BB, they will already be in the
+ // block.
+ if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) {
+ // If we have PHI nodes to update, compute the number of edges from Pred to
+ // BB.
+ if (PhiToAllocaMap.count(APN)) {
+ // We want to be able to distinguish between PHI nodes being inserted by
+ // this invocation of mem2reg from those phi nodes that already existed in
+ // the IR before mem2reg was run. We determine that APN is being inserted
+ // because it is missing incoming edges. All other PHI nodes being
+ // inserted by this pass of mem2reg will have the same number of incoming
+ // operands so far. Remember this count.
+ unsigned NewPHINumOperands = APN->getNumOperands();
+
+ unsigned NumEdges = 0;
+ for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I)
+ if (*I == BB)
+ ++NumEdges;
+ assert(NumEdges && "Must be at least one edge from Pred to BB!");
+
+ // Add entries for all the phis.
+ BasicBlock::iterator PNI = BB->begin();
+ do {
+ unsigned AllocaNo = PhiToAllocaMap[APN];
+
+ // Add N incoming values to the PHI node.
+ for (unsigned i = 0; i != NumEdges; ++i)
+ APN->addIncoming(IncomingVals[AllocaNo], Pred);
+
+ // The currently active variable for this block is now the PHI.
+ IncomingVals[AllocaNo] = APN;
+
+ // Get the next phi node.
+ ++PNI;
+ APN = dyn_cast<PHINode>(PNI);
+ if (APN == 0) break;
+
+ // Verify that it is missing entries. If not, it is not being inserted
+ // by this mem2reg invocation so we want to ignore it.
+ } while (APN->getNumOperands() == NewPHINumOperands);
+ }
+ }
+
+ // Don't revisit blocks.
+ if (!Visited.insert(BB)) return;
+
+ for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) {
+ Instruction *I = II++; // get the instruction, increment iterator
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
+ if (!Src) continue;
+
+ std::map<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src);
+ if (AI == AllocaLookup.end()) continue;
+
+ Value *V = IncomingVals[AI->second];
+
+ // Anything using the load now uses the current value.
+ LI->replaceAllUsesWith(V);
+ if (AST && isa<PointerType>(LI->getType()))
+ AST->deleteValue(LI);
+ BB->getInstList().erase(LI);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Delete this instruction and mark the name as the current holder of the
+ // value
+ AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
+ if (!Dest) continue;
+
+ std::map<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
+ if (ai == AllocaLookup.end())
+ continue;
+
+ // what value were we writing?
+ IncomingVals[ai->second] = SI->getOperand(0);
+ BB->getInstList().erase(SI);
+ }
+ }
+
+ // 'Recurse' to our successors.
+ succ_iterator I = succ_begin(BB), E = succ_end(BB);
+ if (I == E) return;
+
+ // Keep track of the successors so we don't visit the same successor twice
+ SmallPtrSet<BasicBlock*, 8> VisitedSuccs;
+
+ // Handle the first successor without using the worklist.
+ VisitedSuccs.insert(*I);
+ Pred = BB;
+ BB = *I;
+ ++I;
+
+ for (; I != E; ++I)
+ if (VisitedSuccs.insert(*I))
+ Worklist.push_back(RenamePassData(*I, Pred, IncomingVals));
+
+ goto NextIteration;
+}
+
+/// PromoteMemToReg - Promote the specified list of alloca instructions into
+/// scalar registers, inserting PHI nodes as appropriate. This function makes
+/// use of DominanceFrontier information. This function does not modify the CFG
+/// of the function at all. All allocas must be from the same function.
+///
+/// If AST is specified, the specified tracker is updated to reflect changes
+/// made to the IR.
+///
+void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
+ DominatorTree &DT, DominanceFrontier &DF,
+ AliasSetTracker *AST) {
+ // If there is nothing to do, bail out...
+ if (Allocas.empty()) return;
+
+ PromoteMem2Reg(Allocas, DT, DF, AST).run();
+}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
new file mode 100644
index 0000000..2cde765
--- /dev/null
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -0,0 +1,2213 @@
+//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Peephole optimize the CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplifycfg"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Type.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include <algorithm>
+#include <functional>
+#include <set>
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumSpeculations, "Number of speculative executed instructions");
+
+/// SafeToMergeTerminators - Return true if it is safe to merge these two
+/// terminator instructions together.
+///
+static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
+ if (SI1 == SI2) return false; // Can't merge with self!
+
+ // It is not safe to merge these two switch instructions if they have a common
+ // successor, and if that successor has a PHI node, and if *that* PHI node has
+ // conflicting incoming values from the two switch blocks.
+ BasicBlock *SI1BB = SI1->getParent();
+ BasicBlock *SI2BB = SI2->getParent();
+ SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
+
+ for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I)
+ if (SI1Succs.count(*I))
+ for (BasicBlock::iterator BBI = (*I)->begin();
+ isa<PHINode>(BBI); ++BBI) {
+ PHINode *PN = cast<PHINode>(BBI);
+ if (PN->getIncomingValueForBlock(SI1BB) !=
+ PN->getIncomingValueForBlock(SI2BB))
+ return false;
+ }
+
+ return true;
+}
+
+/// AddPredecessorToBlock - Update PHI nodes in Succ to indicate that there will
+/// now be entries in it from the 'NewPred' block. The values that will be
+/// flowing into the PHI nodes will be the same as those coming in from
+/// ExistPred, an existing predecessor of Succ.
+static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
+ BasicBlock *ExistPred) {
+ assert(std::find(succ_begin(ExistPred), succ_end(ExistPred), Succ) !=
+ succ_end(ExistPred) && "ExistPred is not a predecessor of Succ!");
+ if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do
+
+ PHINode *PN;
+ for (BasicBlock::iterator I = Succ->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I)
+ PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred);
+}
+
+/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
+/// almost-empty BB ending in an unconditional branch to Succ, into succ.
+///
+/// Assumption: Succ is the single successor for BB.
+///
+static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
+ assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
+
+ DOUT << "Looking to fold " << BB->getNameStart() << " into "
+ << Succ->getNameStart() << "\n";
+ // Shortcut, if there is only a single predecessor it must be BB and merging
+ // is always safe
+ if (Succ->getSinglePredecessor()) return true;
+
+ typedef SmallPtrSet<Instruction*, 16> InstrSet;
+ InstrSet BBPHIs;
+
+ // Make a list of all phi nodes in BB
+ BasicBlock::iterator BBI = BB->begin();
+ while (isa<PHINode>(*BBI)) BBPHIs.insert(BBI++);
+
+ // Make a list of the predecessors of BB
+ typedef SmallPtrSet<BasicBlock*, 16> BlockSet;
+ BlockSet BBPreds(pred_begin(BB), pred_end(BB));
+
+ // Use that list to make another list of common predecessors of BB and Succ
+ BlockSet CommonPreds;
+ for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
+ PI != PE; ++PI)
+ if (BBPreds.count(*PI))
+ CommonPreds.insert(*PI);
+
+ // Shortcut, if there are no common predecessors, merging is always safe
+ if (CommonPreds.empty())
+ return true;
+
+ // Look at all the phi nodes in Succ, to see if they present a conflict when
+ // merging these blocks
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+
+ // If the incoming value from BB is again a PHINode in
+ // BB which has the same incoming value for *PI as PN does, we can
+ // merge the phi nodes and then the blocks can still be merged
+ PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB));
+ if (BBPN && BBPN->getParent() == BB) {
+ for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
+ PI != PE; PI++) {
+ if (BBPN->getIncomingValueForBlock(*PI)
+ != PN->getIncomingValueForBlock(*PI)) {
+ DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in "
+ << Succ->getNameStart() << " is conflicting with "
+ << BBPN->getNameStart() << " with regard to common predecessor "
+ << (*PI)->getNameStart() << "\n";
+ return false;
+ }
+ }
+ // Remove this phinode from the list of phis in BB, since it has been
+ // handled.
+ BBPHIs.erase(BBPN);
+ } else {
+ Value* Val = PN->getIncomingValueForBlock(BB);
+ for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
+ PI != PE; PI++) {
+ // See if the incoming value for the common predecessor is equal to the
+ // one for BB, in which case this phi node will not prevent the merging
+ // of the block.
+ if (Val != PN->getIncomingValueForBlock(*PI)) {
+ DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in "
+ << Succ->getNameStart() << " is conflicting with regard to common "
+ << "predecessor " << (*PI)->getNameStart() << "\n";
+ return false;
+ }
+ }
+ }
+ }
+
+ // If there are any other phi nodes in BB that don't have a phi node in Succ
+ // to merge with, they must be moved to Succ completely. However, for any
+ // predecessors of Succ, branches will be added to the phi node that just
+ // point to itself. So, for any common predecessors, this must not cause
+ // conflicts.
+ for (InstrSet::iterator I = BBPHIs.begin(), E = BBPHIs.end();
+ I != E; I++) {
+ PHINode *PN = cast<PHINode>(*I);
+ for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
+ PI != PE; PI++)
+ if (PN->getIncomingValueForBlock(*PI) != PN) {
+ DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in "
+ << BB->getNameStart() << " is conflicting with regard to common "
+ << "predecessor " << (*PI)->getNameStart() << "\n";
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/// TryToSimplifyUncondBranchFromEmptyBlock - BB contains an unconditional
+/// branch to Succ, and contains no instructions other than PHI nodes and the
+/// branch. If possible, eliminate BB.
+static bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
+ BasicBlock *Succ) {
+ // Check to see if merging these blocks would cause conflicts for any of the
+ // phi nodes in BB or Succ. If not, we can safely merge.
+ if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
+
+ DOUT << "Killing Trivial BB: \n" << *BB;
+
+ if (isa<PHINode>(Succ->begin())) {
+ // If there is more than one pred of succ, and there are PHI nodes in
+ // the successor, then we need to add incoming edges for the PHI nodes
+ //
+ const SmallVector<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
+
+ // Loop over all of the PHI nodes in the successor of BB.
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ Value *OldVal = PN->removeIncomingValue(BB, false);
+ assert(OldVal && "No entry in PHI for Pred BB!");
+
+ // If this incoming value is one of the PHI nodes in BB, the new entries
+ // in the PHI node are the entries from the old PHI.
+ if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
+ PHINode *OldValPN = cast<PHINode>(OldVal);
+ for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i)
+ // Note that, since we are merging phi nodes and BB and Succ might
+ // have common predecessors, we could end up with a phi node with
+ // identical incoming branches. This will be cleaned up later (and
+ // will trigger asserts if we try to clean it up now, without also
+ // simplifying the corresponding conditional branch).
+ PN->addIncoming(OldValPN->getIncomingValue(i),
+ OldValPN->getIncomingBlock(i));
+ } else {
+ // Add an incoming value for each of the new incoming values.
+ for (unsigned i = 0, e = BBPreds.size(); i != e; ++i)
+ PN->addIncoming(OldVal, BBPreds[i]);
+ }
+ }
+ }
+
+ if (isa<PHINode>(&BB->front())) {
+ SmallVector<BasicBlock*, 16>
+ OldSuccPreds(pred_begin(Succ), pred_end(Succ));
+
+ // Move all PHI nodes in BB to Succ if they are alive, otherwise
+ // delete them.
+ while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
+ if (PN->use_empty()) {
+ // Just remove the dead phi. This happens if Succ's PHIs were the only
+ // users of the PHI nodes.
+ PN->eraseFromParent();
+ continue;
+ }
+
+ // The instruction is alive, so this means that BB must dominate all
+ // predecessors of Succ (Since all uses of the PN are after its
+ // definition, so in Succ or a block dominated by Succ. If a predecessor
+ // of Succ would not be dominated by BB, PN would violate the def before
+ // use SSA demand). Therefore, we can simply move the phi node to the
+ // next block.
+ Succ->getInstList().splice(Succ->begin(),
+ BB->getInstList(), BB->begin());
+
+ // We need to add new entries for the PHI node to account for
+ // predecessors of Succ that the PHI node does not take into
+ // account. At this point, since we know that BB dominated succ and all
+ // of its predecessors, this means that we should any newly added
+ // incoming edges should use the PHI node itself as the value for these
+ // edges, because they are loop back edges.
+ for (unsigned i = 0, e = OldSuccPreds.size(); i != e; ++i)
+ if (OldSuccPreds[i] != BB)
+ PN->addIncoming(PN, OldSuccPreds[i]);
+ }
+ }
+
+ // Everything that jumped to BB now goes to Succ.
+ BB->replaceAllUsesWith(Succ);
+ if (!Succ->hasName()) Succ->takeName(BB);
+ BB->eraseFromParent(); // Delete the old basic block.
+ return true;
+}
+
+/// GetIfCondition - Given a basic block (BB) with two predecessors (and
+/// presumably PHI nodes in it), check to see if the merge at this block is due
+/// to an "if condition". If so, return the boolean condition that determines
+/// which entry into BB will be taken. Also, return by references the block
+/// that will be entered from if the condition is true, and the block that will
+/// be entered if the condition is false.
+///
+///
+static Value *GetIfCondition(BasicBlock *BB,
+ BasicBlock *&IfTrue, BasicBlock *&IfFalse) {
+ assert(std::distance(pred_begin(BB), pred_end(BB)) == 2 &&
+ "Function can only handle blocks with 2 predecessors!");
+ BasicBlock *Pred1 = *pred_begin(BB);
+ BasicBlock *Pred2 = *++pred_begin(BB);
+
+ // We can only handle branches. Other control flow will be lowered to
+ // branches if possible anyway.
+ if (!isa<BranchInst>(Pred1->getTerminator()) ||
+ !isa<BranchInst>(Pred2->getTerminator()))
+ return 0;
+ BranchInst *Pred1Br = cast<BranchInst>(Pred1->getTerminator());
+ BranchInst *Pred2Br = cast<BranchInst>(Pred2->getTerminator());
+
+ // Eliminate code duplication by ensuring that Pred1Br is conditional if
+ // either are.
+ if (Pred2Br->isConditional()) {
+ // If both branches are conditional, we don't have an "if statement". In
+ // reality, we could transform this case, but since the condition will be
+ // required anyway, we stand no chance of eliminating it, so the xform is
+ // probably not profitable.
+ if (Pred1Br->isConditional())
+ return 0;
+
+ std::swap(Pred1, Pred2);
+ std::swap(Pred1Br, Pred2Br);
+ }
+
+ if (Pred1Br->isConditional()) {
+ // If we found a conditional branch predecessor, make sure that it branches
+ // to BB and Pred2Br. If it doesn't, this isn't an "if statement".
+ if (Pred1Br->getSuccessor(0) == BB &&
+ Pred1Br->getSuccessor(1) == Pred2) {
+ IfTrue = Pred1;
+ IfFalse = Pred2;
+ } else if (Pred1Br->getSuccessor(0) == Pred2 &&
+ Pred1Br->getSuccessor(1) == BB) {
+ IfTrue = Pred2;
+ IfFalse = Pred1;
+ } else {
+ // We know that one arm of the conditional goes to BB, so the other must
+ // go somewhere unrelated, and this must not be an "if statement".
+ return 0;
+ }
+
+ // The only thing we have to watch out for here is to make sure that Pred2
+ // doesn't have incoming edges from other blocks. If it does, the condition
+ // doesn't dominate BB.
+ if (++pred_begin(Pred2) != pred_end(Pred2))
+ return 0;
+
+ return Pred1Br->getCondition();
+ }
+
+ // Ok, if we got here, both predecessors end with an unconditional branch to
+ // BB. Don't panic! If both blocks only have a single (identical)
+ // predecessor, and THAT is a conditional branch, then we're all ok!
+ if (pred_begin(Pred1) == pred_end(Pred1) ||
+ ++pred_begin(Pred1) != pred_end(Pred1) ||
+ pred_begin(Pred2) == pred_end(Pred2) ||
+ ++pred_begin(Pred2) != pred_end(Pred2) ||
+ *pred_begin(Pred1) != *pred_begin(Pred2))
+ return 0;
+
+ // Otherwise, if this is a conditional branch, then we can use it!
+ BasicBlock *CommonPred = *pred_begin(Pred1);
+ if (BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator())) {
+ assert(BI->isConditional() && "Two successors but not conditional?");
+ if (BI->getSuccessor(0) == Pred1) {
+ IfTrue = Pred1;
+ IfFalse = Pred2;
+ } else {
+ IfTrue = Pred2;
+ IfFalse = Pred1;
+ }
+ return BI->getCondition();
+ }
+ return 0;
+}
+
+/// DominatesMergePoint - If we have a merge point of an "if condition" as
+/// accepted above, return true if the specified value dominates the block. We
+/// don't handle the true generality of domination here, just a special case
+/// which works well enough for us.
+///
+/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
+/// see if V (which must be an instruction) is cheap to compute and is
+/// non-trapping. If both are true, the instruction is inserted into the set
+/// and true is returned.
+static bool DominatesMergePoint(Value *V, BasicBlock *BB,
+ std::set<Instruction*> *AggressiveInsts) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) {
+ // Non-instructions all dominate instructions, but not all constantexprs
+ // can be executed unconditionally.
+ if (ConstantExpr *C = dyn_cast<ConstantExpr>(V))
+ if (C->canTrap())
+ return false;
+ return true;
+ }
+ BasicBlock *PBB = I->getParent();
+
+ // We don't want to allow weird loops that might have the "if condition" in
+ // the bottom of this block.
+ if (PBB == BB) return false;
+
+ // If this instruction is defined in a block that contains an unconditional
+ // branch to BB, then it must be in the 'conditional' part of the "if
+ // statement".
+ if (BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator()))
+ if (BI->isUnconditional() && BI->getSuccessor(0) == BB) {
+ if (!AggressiveInsts) return false;
+ // Okay, it looks like the instruction IS in the "condition". Check to
+ // see if its a cheap instruction to unconditionally compute, and if it
+ // only uses stuff defined outside of the condition. If so, hoist it out.
+ switch (I->getOpcode()) {
+ default: return false; // Cannot hoist this out safely.
+ case Instruction::Load: {
+ // We can hoist loads that are non-volatile and obviously cannot trap.
+ if (cast<LoadInst>(I)->isVolatile())
+ return false;
+ // FIXME: A computation of a constant can trap!
+ if (!isa<AllocaInst>(I->getOperand(0)) &&
+ !isa<Constant>(I->getOperand(0)))
+ return false;
+ // External weak globals may have address 0, so we can't load them.
+ Value *V2 = I->getOperand(0)->getUnderlyingObject();
+ if (V2) {
+ GlobalVariable* GV = dyn_cast<GlobalVariable>(V2);
+ if (GV && GV->hasExternalWeakLinkage())
+ return false;
+ }
+ // Finally, we have to check to make sure there are no instructions
+ // before the load in its basic block, as we are going to hoist the loop
+ // out to its predecessor.
+ BasicBlock::iterator IP = PBB->begin();
+ while (isa<DbgInfoIntrinsic>(IP))
+ IP++;
+ if (IP != BasicBlock::iterator(I))
+ return false;
+ break;
+ }
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ if (I->getOperand(0)->getType()->isFPOrFPVector())
+ return false; // FP arithmetic might trap.
+ break; // These are all cheap and non-trapping instructions.
+ }
+
+ // Okay, we can only really hoist these out if their operands are not
+ // defined in the conditional region.
+ for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
+ if (!DominatesMergePoint(*i, BB, 0))
+ return false;
+ // Okay, it's safe to do this! Remember this instruction.
+ AggressiveInsts->insert(I);
+ }
+
+ return true;
+}
+
+/// GatherConstantSetEQs - Given a potentially 'or'd together collection of
+/// icmp_eq instructions that compare a value against a constant, return the
+/// value being compared, and stick the constant into the Values vector.
+static Value *GatherConstantSetEQs(Value *V, std::vector<ConstantInt*> &Values){
+ if (Instruction *Inst = dyn_cast<Instruction>(V)) {
+ if (Inst->getOpcode() == Instruction::ICmp &&
+ cast<ICmpInst>(Inst)->getPredicate() == ICmpInst::ICMP_EQ) {
+ if (ConstantInt *C = dyn_cast<ConstantInt>(Inst->getOperand(1))) {
+ Values.push_back(C);
+ return Inst->getOperand(0);
+ } else if (ConstantInt *C = dyn_cast<ConstantInt>(Inst->getOperand(0))) {
+ Values.push_back(C);
+ return Inst->getOperand(1);
+ }
+ } else if (Inst->getOpcode() == Instruction::Or) {
+ if (Value *LHS = GatherConstantSetEQs(Inst->getOperand(0), Values))
+ if (Value *RHS = GatherConstantSetEQs(Inst->getOperand(1), Values))
+ if (LHS == RHS)
+ return LHS;
+ }
+ }
+ return 0;
+}
+
+/// GatherConstantSetNEs - Given a potentially 'and'd together collection of
+/// setne instructions that compare a value against a constant, return the value
+/// being compared, and stick the constant into the Values vector.
+static Value *GatherConstantSetNEs(Value *V, std::vector<ConstantInt*> &Values){
+ if (Instruction *Inst = dyn_cast<Instruction>(V)) {
+ if (Inst->getOpcode() == Instruction::ICmp &&
+ cast<ICmpInst>(Inst)->getPredicate() == ICmpInst::ICMP_NE) {
+ if (ConstantInt *C = dyn_cast<ConstantInt>(Inst->getOperand(1))) {
+ Values.push_back(C);
+ return Inst->getOperand(0);
+ } else if (ConstantInt *C = dyn_cast<ConstantInt>(Inst->getOperand(0))) {
+ Values.push_back(C);
+ return Inst->getOperand(1);
+ }
+ } else if (Inst->getOpcode() == Instruction::And) {
+ if (Value *LHS = GatherConstantSetNEs(Inst->getOperand(0), Values))
+ if (Value *RHS = GatherConstantSetNEs(Inst->getOperand(1), Values))
+ if (LHS == RHS)
+ return LHS;
+ }
+ }
+ return 0;
+}
+
+/// GatherValueComparisons - If the specified Cond is an 'and' or 'or' of a
+/// bunch of comparisons of one value against constants, return the value and
+/// the constants being compared.
+static bool GatherValueComparisons(Instruction *Cond, Value *&CompVal,
+ std::vector<ConstantInt*> &Values) {
+ if (Cond->getOpcode() == Instruction::Or) {
+ CompVal = GatherConstantSetEQs(Cond, Values);
+
+ // Return true to indicate that the condition is true if the CompVal is
+ // equal to one of the constants.
+ return true;
+ } else if (Cond->getOpcode() == Instruction::And) {
+ CompVal = GatherConstantSetNEs(Cond, Values);
+
+ // Return false to indicate that the condition is false if the CompVal is
+ // equal to one of the constants.
+ return false;
+ }
+ return false;
+}
+
+static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
+ Instruction* Cond = 0;
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ Cond = dyn_cast<Instruction>(SI->getCondition());
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isConditional())
+ Cond = dyn_cast<Instruction>(BI->getCondition());
+ }
+
+ TI->eraseFromParent();
+ if (Cond) RecursivelyDeleteTriviallyDeadInstructions(Cond);
+}
+
+/// isValueEqualityComparison - Return true if the specified terminator checks
+/// to see if a value is equal to constant integer value.
+static Value *isValueEqualityComparison(TerminatorInst *TI) {
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ // Do not permit merging of large switch instructions into their
+ // predecessors unless there is only one predecessor.
+ if (SI->getNumSuccessors() * std::distance(pred_begin(SI->getParent()),
+ pred_end(SI->getParent())) > 128)
+ return 0;
+
+ return SI->getCondition();
+ }
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI))
+ if (BI->isConditional() && BI->getCondition()->hasOneUse())
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
+ if ((ICI->getPredicate() == ICmpInst::ICMP_EQ ||
+ ICI->getPredicate() == ICmpInst::ICMP_NE) &&
+ isa<ConstantInt>(ICI->getOperand(1)))
+ return ICI->getOperand(0);
+ return 0;
+}
+
+/// GetValueEqualityComparisonCases - Given a value comparison instruction,
+/// decode all of the 'cases' that it represents and return the 'default' block.
+static BasicBlock *
+GetValueEqualityComparisonCases(TerminatorInst *TI,
+ std::vector<std::pair<ConstantInt*,
+ BasicBlock*> > &Cases) {
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ Cases.reserve(SI->getNumCases());
+ for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+ Cases.push_back(std::make_pair(SI->getCaseValue(i), SI->getSuccessor(i)));
+ return SI->getDefaultDest();
+ }
+
+ BranchInst *BI = cast<BranchInst>(TI);
+ ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
+ Cases.push_back(std::make_pair(cast<ConstantInt>(ICI->getOperand(1)),
+ BI->getSuccessor(ICI->getPredicate() ==
+ ICmpInst::ICMP_NE)));
+ return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
+}
+
+
+/// EliminateBlockCases - Given a vector of bb/value pairs, remove any entries
+/// in the list that match the specified block.
+static void EliminateBlockCases(BasicBlock *BB,
+ std::vector<std::pair<ConstantInt*, BasicBlock*> > &Cases) {
+ for (unsigned i = 0, e = Cases.size(); i != e; ++i)
+ if (Cases[i].second == BB) {
+ Cases.erase(Cases.begin()+i);
+ --i; --e;
+ }
+}
+
+/// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as
+/// well.
+static bool
+ValuesOverlap(std::vector<std::pair<ConstantInt*, BasicBlock*> > &C1,
+ std::vector<std::pair<ConstantInt*, BasicBlock*> > &C2) {
+ std::vector<std::pair<ConstantInt*, BasicBlock*> > *V1 = &C1, *V2 = &C2;
+
+ // Make V1 be smaller than V2.
+ if (V1->size() > V2->size())
+ std::swap(V1, V2);
+
+ if (V1->size() == 0) return false;
+ if (V1->size() == 1) {
+ // Just scan V2.
+ ConstantInt *TheVal = (*V1)[0].first;
+ for (unsigned i = 0, e = V2->size(); i != e; ++i)
+ if (TheVal == (*V2)[i].first)
+ return true;
+ }
+
+ // Otherwise, just sort both lists and compare element by element.
+ std::sort(V1->begin(), V1->end());
+ std::sort(V2->begin(), V2->end());
+ unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
+ while (i1 != e1 && i2 != e2) {
+ if ((*V1)[i1].first == (*V2)[i2].first)
+ return true;
+ if ((*V1)[i1].first < (*V2)[i2].first)
+ ++i1;
+ else
+ ++i2;
+ }
+ return false;
+}
+
+/// SimplifyEqualityComparisonWithOnlyPredecessor - If TI is known to be a
+/// terminator instruction and its block is known to only have a single
+/// predecessor block, check to see if that predecessor is also a value
+/// comparison with the same value, and if that comparison determines the
+/// outcome of this comparison. If so, simplify TI. This does a very limited
+/// form of jump threading.
+static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
+ BasicBlock *Pred) {
+ Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
+ if (!PredVal) return false; // Not a value comparison in predecessor.
+
+ Value *ThisVal = isValueEqualityComparison(TI);
+ assert(ThisVal && "This isn't a value comparison!!");
+ if (ThisVal != PredVal) return false; // Different predicates.
+
+ // Find out information about when control will move from Pred to TI's block.
+ std::vector<std::pair<ConstantInt*, BasicBlock*> > PredCases;
+ BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(),
+ PredCases);
+ EliminateBlockCases(PredDef, PredCases); // Remove default from cases.
+
+ // Find information about how control leaves this block.
+ std::vector<std::pair<ConstantInt*, BasicBlock*> > ThisCases;
+ BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
+ EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
+
+ // If TI's block is the default block from Pred's comparison, potentially
+ // simplify TI based on this knowledge.
+ if (PredDef == TI->getParent()) {
+ // If we are here, we know that the value is none of those cases listed in
+ // PredCases. If there are any cases in ThisCases that are in PredCases, we
+ // can simplify TI.
+ if (ValuesOverlap(PredCases, ThisCases)) {
+ if (isa<BranchInst>(TI)) {
+ // Okay, one of the successors of this condbr is dead. Convert it to a
+ // uncond br.
+ assert(ThisCases.size() == 1 && "Branch can only have one case!");
+ // Insert the new branch.
+ Instruction *NI = BranchInst::Create(ThisDef, TI);
+
+ // Remove PHI node entries for the dead edge.
+ ThisCases[0].second->removePredecessor(TI->getParent());
+
+ DOUT << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n";
+
+ EraseTerminatorInstAndDCECond(TI);
+ return true;
+
+ } else {
+ SwitchInst *SI = cast<SwitchInst>(TI);
+ // Okay, TI has cases that are statically dead, prune them away.
+ SmallPtrSet<Constant*, 16> DeadCases;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ DeadCases.insert(PredCases[i].first);
+
+ DOUT << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI;
+
+ for (unsigned i = SI->getNumCases()-1; i != 0; --i)
+ if (DeadCases.count(SI->getCaseValue(i))) {
+ SI->getSuccessor(i)->removePredecessor(TI->getParent());
+ SI->removeCase(i);
+ }
+
+ DOUT << "Leaving: " << *TI << "\n";
+ return true;
+ }
+ }
+
+ } else {
+ // Otherwise, TI's block must correspond to some matched value. Find out
+ // which value (or set of values) this is.
+ ConstantInt *TIV = 0;
+ BasicBlock *TIBB = TI->getParent();
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].second == TIBB) {
+ if (TIV == 0)
+ TIV = PredCases[i].first;
+ else
+ return false; // Cannot handle multiple values coming to this block.
+ }
+ assert(TIV && "No edge from pred to succ?");
+
+ // Okay, we found the one constant that our value can be if we get into TI's
+ // BB. Find out which successor will unconditionally be branched to.
+ BasicBlock *TheRealDest = 0;
+ for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
+ if (ThisCases[i].first == TIV) {
+ TheRealDest = ThisCases[i].second;
+ break;
+ }
+
+ // If not handled by any explicit cases, it is handled by the default case.
+ if (TheRealDest == 0) TheRealDest = ThisDef;
+
+ // Remove PHI node entries for dead edges.
+ BasicBlock *CheckEdge = TheRealDest;
+ for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI)
+ if (*SI != CheckEdge)
+ (*SI)->removePredecessor(TIBB);
+ else
+ CheckEdge = 0;
+
+ // Insert the new branch.
+ Instruction *NI = BranchInst::Create(TheRealDest, TI);
+
+ DOUT << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n";
+
+ EraseTerminatorInstAndDCECond(TI);
+ return true;
+ }
+ return false;
+}
+
+namespace {
+ /// ConstantIntOrdering - This class implements a stable ordering of constant
+ /// integers that does not depend on their address. This is important for
+ /// applications that sort ConstantInt's to ensure uniqueness.
+ struct ConstantIntOrdering {
+ bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
+ return LHS->getValue().ult(RHS->getValue());
+ }
+ };
+}
+
+/// FoldValueComparisonIntoPredecessors - The specified terminator is a value
+/// equality comparison instruction (either a switch or a branch on "X == c").
+/// See if any of the predecessors of the terminator block are value comparisons
+/// on the same value. If so, and if safe to do so, fold them together.
+static bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI) {
+ BasicBlock *BB = TI->getParent();
+ Value *CV = isValueEqualityComparison(TI); // CondVal
+ assert(CV && "Not a comparison?");
+ bool Changed = false;
+
+ SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
+ while (!Preds.empty()) {
+ BasicBlock *Pred = Preds.pop_back_val();
+
+ // See if the predecessor is a comparison with the same value.
+ TerminatorInst *PTI = Pred->getTerminator();
+ Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
+
+ if (PCV == CV && SafeToMergeTerminators(TI, PTI)) {
+ // Figure out which 'cases' to copy from SI to PSI.
+ std::vector<std::pair<ConstantInt*, BasicBlock*> > BBCases;
+ BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
+
+ std::vector<std::pair<ConstantInt*, BasicBlock*> > PredCases;
+ BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
+
+ // Based on whether the default edge from PTI goes to BB or not, fill in
+ // PredCases and PredDefault with the new switch cases we would like to
+ // build.
+ SmallVector<BasicBlock*, 8> NewSuccessors;
+
+ if (PredDefault == BB) {
+ // If this is the default destination from PTI, only the edges in TI
+ // that don't occur in PTI, or that branch to BB will be activated.
+ std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].second != BB)
+ PTIHandled.insert(PredCases[i].first);
+ else {
+ // The default destination is BB, we don't need explicit targets.
+ std::swap(PredCases[i], PredCases.back());
+ PredCases.pop_back();
+ --i; --e;
+ }
+
+ // Reconstruct the new switch statement we will be building.
+ if (PredDefault != BBDefault) {
+ PredDefault->removePredecessor(Pred);
+ PredDefault = BBDefault;
+ NewSuccessors.push_back(BBDefault);
+ }
+ for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+ if (!PTIHandled.count(BBCases[i].first) &&
+ BBCases[i].second != BBDefault) {
+ PredCases.push_back(BBCases[i]);
+ NewSuccessors.push_back(BBCases[i].second);
+ }
+
+ } else {
+ // If this is not the default destination from PSI, only the edges
+ // in SI that occur in PSI with a destination of BB will be
+ // activated.
+ std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].second == BB) {
+ PTIHandled.insert(PredCases[i].first);
+ std::swap(PredCases[i], PredCases.back());
+ PredCases.pop_back();
+ --i; --e;
+ }
+
+ // Okay, now we know which constants were sent to BB from the
+ // predecessor. Figure out where they will all go now.
+ for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+ if (PTIHandled.count(BBCases[i].first)) {
+ // If this is one we are capable of getting...
+ PredCases.push_back(BBCases[i]);
+ NewSuccessors.push_back(BBCases[i].second);
+ PTIHandled.erase(BBCases[i].first);// This constant is taken care of
+ }
+
+ // If there are any constants vectored to BB that TI doesn't handle,
+ // they must go to the default destination of TI.
+ for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I =
+ PTIHandled.begin(),
+ E = PTIHandled.end(); I != E; ++I) {
+ PredCases.push_back(std::make_pair(*I, BBDefault));
+ NewSuccessors.push_back(BBDefault);
+ }
+ }
+
+ // Okay, at this point, we know which new successor Pred will get. Make
+ // sure we update the number of entries in the PHI nodes for these
+ // successors.
+ for (unsigned i = 0, e = NewSuccessors.size(); i != e; ++i)
+ AddPredecessorToBlock(NewSuccessors[i], Pred, BB);
+
+ // Now that the successors are updated, create the new Switch instruction.
+ SwitchInst *NewSI = SwitchInst::Create(CV, PredDefault,
+ PredCases.size(), PTI);
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ NewSI->addCase(PredCases[i].first, PredCases[i].second);
+
+ EraseTerminatorInstAndDCECond(PTI);
+
+ // Okay, last check. If BB is still a successor of PSI, then we must
+ // have an infinite loop case. If so, add an infinitely looping block
+ // to handle the case to preserve the behavior of the code.
+ BasicBlock *InfLoopBlock = 0;
+ for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
+ if (NewSI->getSuccessor(i) == BB) {
+ if (InfLoopBlock == 0) {
+ // Insert it at the end of the function, because it's either code,
+ // or it won't matter if it's hot. :)
+ InfLoopBlock = BasicBlock::Create("infloop", BB->getParent());
+ BranchInst::Create(InfLoopBlock, InfLoopBlock);
+ }
+ NewSI->setSuccessor(i, InfLoopBlock);
+ }
+
+ Changed = true;
+ }
+ }
+ return Changed;
+}
+
+/// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and
+/// BB2, hoist any common code in the two blocks up into the branch block. The
+/// caller of this function guarantees that BI's block dominates BB1 and BB2.
+static bool HoistThenElseCodeToIf(BranchInst *BI) {
+ // This does very trivial matching, with limited scanning, to find identical
+ // instructions in the two blocks. In particular, we don't want to get into
+ // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As
+ // such, we currently just scan for obviously identical instructions in an
+ // identical order.
+ BasicBlock *BB1 = BI->getSuccessor(0); // The true destination.
+ BasicBlock *BB2 = BI->getSuccessor(1); // The false destination
+
+ BasicBlock::iterator BB1_Itr = BB1->begin();
+ BasicBlock::iterator BB2_Itr = BB2->begin();
+
+ Instruction *I1 = BB1_Itr++, *I2 = BB2_Itr++;
+ while (isa<DbgInfoIntrinsic>(I1))
+ I1 = BB1_Itr++;
+ while (isa<DbgInfoIntrinsic>(I2))
+ I2 = BB2_Itr++;
+ if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) ||
+ isa<InvokeInst>(I1) || !I1->isIdenticalTo(I2))
+ return false;
+
+ // If we get here, we can hoist at least one instruction.
+ BasicBlock *BIParent = BI->getParent();
+
+ do {
+ // If we are hoisting the terminator instruction, don't move one (making a
+ // broken BB), instead clone it, and remove BI.
+ if (isa<TerminatorInst>(I1))
+ goto HoistTerminator;
+
+ // For a normal instruction, we just move one to right before the branch,
+ // then replace all uses of the other with the first. Finally, we remove
+ // the now redundant second instruction.
+ BIParent->getInstList().splice(BI, BB1->getInstList(), I1);
+ if (!I2->use_empty())
+ I2->replaceAllUsesWith(I1);
+ BB2->getInstList().erase(I2);
+
+ I1 = BB1_Itr++;
+ while (isa<DbgInfoIntrinsic>(I1))
+ I1 = BB1_Itr++;
+ I2 = BB2_Itr++;
+ while (isa<DbgInfoIntrinsic>(I2))
+ I2 = BB2_Itr++;
+ } while (I1->getOpcode() == I2->getOpcode() && I1->isIdenticalTo(I2));
+
+ return true;
+
+HoistTerminator:
+ // Okay, it is safe to hoist the terminator.
+ Instruction *NT = I1->clone();
+ BIParent->getInstList().insert(BI, NT);
+ if (NT->getType() != Type::VoidTy) {
+ I1->replaceAllUsesWith(NT);
+ I2->replaceAllUsesWith(NT);
+ NT->takeName(I1);
+ }
+
+ // Hoisting one of the terminators from our successor is a great thing.
+ // Unfortunately, the successors of the if/else blocks may have PHI nodes in
+ // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI
+ // nodes, so we insert select instruction to compute the final result.
+ std::map<std::pair<Value*,Value*>, SelectInst*> InsertedSelects;
+ for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = SI->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ if (BB1V != BB2V) {
+ // These values do not agree. Insert a select instruction before NT
+ // that determines the right value.
+ SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
+ if (SI == 0)
+ SI = SelectInst::Create(BI->getCondition(), BB1V, BB2V,
+ BB1V->getName()+"."+BB2V->getName(), NT);
+ // Make the PHI node use the select for all incoming values for BB1/BB2
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2)
+ PN->setIncomingValue(i, SI);
+ }
+ }
+ }
+
+ // Update any PHI nodes in our new successors.
+ for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI)
+ AddPredecessorToBlock(*SI, BIParent, BB1);
+
+ EraseTerminatorInstAndDCECond(BI);
+ return true;
+}
+
+/// SpeculativelyExecuteBB - Given a conditional branch that goes to BB1
+/// and an BB2 and the only successor of BB1 is BB2, hoist simple code
+/// (for now, restricted to a single instruction that's side effect free) from
+/// the BB1 into the branch block to speculatively execute it.
+static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
+ // Only speculatively execution a single instruction (not counting the
+ // terminator) for now.
+ Instruction *HInst = NULL;
+ Instruction *Term = BB1->getTerminator();
+ for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end();
+ BBI != BBE; ++BBI) {
+ Instruction *I = BBI;
+ // Skip debug info.
+ if (isa<DbgInfoIntrinsic>(I)) continue;
+ if (I == Term) break;
+
+ if (!HInst)
+ HInst = I;
+ else
+ return false;
+ }
+ if (!HInst)
+ return false;
+
+ // Be conservative for now. FP select instruction can often be expensive.
+ Value *BrCond = BI->getCondition();
+ if (isa<Instruction>(BrCond) &&
+ cast<Instruction>(BrCond)->getOpcode() == Instruction::FCmp)
+ return false;
+
+ // If BB1 is actually on the false edge of the conditional branch, remember
+ // to swap the select operands later.
+ bool Invert = false;
+ if (BB1 != BI->getSuccessor(0)) {
+ assert(BB1 == BI->getSuccessor(1) && "No edge from 'if' block?");
+ Invert = true;
+ }
+
+ // Turn
+ // BB:
+ // %t1 = icmp
+ // br i1 %t1, label %BB1, label %BB2
+ // BB1:
+ // %t3 = add %t2, c
+ // br label BB2
+ // BB2:
+ // =>
+ // BB:
+ // %t1 = icmp
+ // %t4 = add %t2, c
+ // %t3 = select i1 %t1, %t2, %t3
+ switch (HInst->getOpcode()) {
+ default: return false; // Not safe / profitable to hoist.
+ case Instruction::Add:
+ case Instruction::Sub:
+ // FP arithmetic might trap. Not worth doing for vector ops.
+ if (HInst->getType()->isFloatingPoint()
+ || isa<VectorType>(HInst->getType()))
+ return false;
+ break;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ // Don't mess with vector operations.
+ if (isa<VectorType>(HInst->getType()))
+ return false;
+ break; // These are all cheap and non-trapping instructions.
+ }
+
+ // If the instruction is obviously dead, don't try to predicate it.
+ if (HInst->use_empty()) {
+ HInst->eraseFromParent();
+ return true;
+ }
+
+ // Can we speculatively execute the instruction? And what is the value
+ // if the condition is false? Consider the phi uses, if the incoming value
+ // from the "if" block are all the same V, then V is the value of the
+ // select if the condition is false.
+ BasicBlock *BIParent = BI->getParent();
+ SmallVector<PHINode*, 4> PHIUses;
+ Value *FalseV = NULL;
+
+ BasicBlock *BB2 = BB1->getTerminator()->getSuccessor(0);
+ for (Value::use_iterator UI = HInst->use_begin(), E = HInst->use_end();
+ UI != E; ++UI) {
+ // Ignore any user that is not a PHI node in BB2. These can only occur in
+ // unreachable blocks, because they would not be dominated by the instr.
+ PHINode *PN = dyn_cast<PHINode>(UI);
+ if (!PN || PN->getParent() != BB2)
+ return false;
+ PHIUses.push_back(PN);
+
+ Value *PHIV = PN->getIncomingValueForBlock(BIParent);
+ if (!FalseV)
+ FalseV = PHIV;
+ else if (FalseV != PHIV)
+ return false; // Inconsistent value when condition is false.
+ }
+
+ assert(FalseV && "Must have at least one user, and it must be a PHI");
+
+ // Do not hoist the instruction if any of its operands are defined but not
+ // used in this BB. The transformation will prevent the operand from
+ // being sunk into the use block.
+ for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end();
+ i != e; ++i) {
+ Instruction *OpI = dyn_cast<Instruction>(*i);
+ if (OpI && OpI->getParent() == BIParent &&
+ !OpI->isUsedInBasicBlock(BIParent))
+ return false;
+ }
+
+ // If we get here, we can hoist the instruction. Try to place it
+ // before the icmp instruction preceding the conditional branch.
+ BasicBlock::iterator InsertPos = BI;
+ if (InsertPos != BIParent->begin())
+ --InsertPos;
+ // Skip debug info between condition and branch.
+ while (InsertPos != BIParent->begin() && isa<DbgInfoIntrinsic>(InsertPos))
+ --InsertPos;
+ if (InsertPos == BrCond && !isa<PHINode>(BrCond)) {
+ SmallPtrSet<Instruction *, 4> BB1Insns;
+ for(BasicBlock::iterator BB1I = BB1->begin(), BB1E = BB1->end();
+ BB1I != BB1E; ++BB1I)
+ BB1Insns.insert(BB1I);
+ for(Value::use_iterator UI = BrCond->use_begin(), UE = BrCond->use_end();
+ UI != UE; ++UI) {
+ Instruction *Use = cast<Instruction>(*UI);
+ if (BB1Insns.count(Use)) {
+ // If BrCond uses the instruction that place it just before
+ // branch instruction.
+ InsertPos = BI;
+ break;
+ }
+ }
+ } else
+ InsertPos = BI;
+ BIParent->getInstList().splice(InsertPos, BB1->getInstList(), HInst);
+
+ // Create a select whose true value is the speculatively executed value and
+ // false value is the previously determined FalseV.
+ SelectInst *SI;
+ if (Invert)
+ SI = SelectInst::Create(BrCond, FalseV, HInst,
+ FalseV->getName() + "." + HInst->getName(), BI);
+ else
+ SI = SelectInst::Create(BrCond, HInst, FalseV,
+ HInst->getName() + "." + FalseV->getName(), BI);
+
+ // Make the PHI node use the select for all incoming values for "then" and
+ // "if" blocks.
+ for (unsigned i = 0, e = PHIUses.size(); i != e; ++i) {
+ PHINode *PN = PHIUses[i];
+ for (unsigned j = 0, ee = PN->getNumIncomingValues(); j != ee; ++j)
+ if (PN->getIncomingBlock(j) == BB1 ||
+ PN->getIncomingBlock(j) == BIParent)
+ PN->setIncomingValue(j, SI);
+ }
+
+ ++NumSpeculations;
+ return true;
+}
+
+/// BlockIsSimpleEnoughToThreadThrough - Return true if we can thread a branch
+/// across this block.
+static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
+ BranchInst *BI = cast<BranchInst>(BB->getTerminator());
+ unsigned Size = 0;
+
+ for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
+ if (isa<DbgInfoIntrinsic>(BBI))
+ continue;
+ if (Size > 10) return false; // Don't clone large BB's.
+ ++Size;
+
+ // We can only support instructions that do not define values that are
+ // live outside of the current basic block.
+ for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
+ UI != E; ++UI) {
+ Instruction *U = cast<Instruction>(*UI);
+ if (U->getParent() != BB || isa<PHINode>(U)) return false;
+ }
+
+ // Looks ok, continue checking.
+ }
+
+ return true;
+}
+
+/// FoldCondBranchOnPHI - If we have a conditional branch on a PHI node value
+/// that is defined in the same block as the branch and if any PHI entries are
+/// constants, thread edges corresponding to that entry to be branches to their
+/// ultimate destination.
+static bool FoldCondBranchOnPHI(BranchInst *BI) {
+ BasicBlock *BB = BI->getParent();
+ PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
+ // NOTE: we currently cannot transform this case if the PHI node is used
+ // outside of the block.
+ if (!PN || PN->getParent() != BB || !PN->hasOneUse())
+ return false;
+
+ // Degenerate case of a single entry PHI.
+ if (PN->getNumIncomingValues() == 1) {
+ FoldSingleEntryPHINodes(PN->getParent());
+ return true;
+ }
+
+ // Now we know that this block has multiple preds and two succs.
+ if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false;
+
+ // Okay, this is a simple enough basic block. See if any phi values are
+ // constants.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ ConstantInt *CB;
+ if ((CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i))) &&
+ CB->getType() == Type::Int1Ty) {
+ // Okay, we now know that all edges from PredBB should be revectored to
+ // branch to RealDest.
+ BasicBlock *PredBB = PN->getIncomingBlock(i);
+ BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
+
+ if (RealDest == BB) continue; // Skip self loops.
+
+ // The dest block might have PHI nodes, other predecessors and other
+ // difficult cases. Instead of being smart about this, just insert a new
+ // block that jumps to the destination block, effectively splitting
+ // the edge we are about to create.
+ BasicBlock *EdgeBB = BasicBlock::Create(RealDest->getName()+".critedge",
+ RealDest->getParent(), RealDest);
+ BranchInst::Create(RealDest, EdgeBB);
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = RealDest->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ Value *V = PN->getIncomingValueForBlock(BB);
+ PN->addIncoming(V, EdgeBB);
+ }
+
+ // BB may have instructions that are being threaded over. Clone these
+ // instructions into EdgeBB. We know that there will be no uses of the
+ // cloned instructions outside of EdgeBB.
+ BasicBlock::iterator InsertPt = EdgeBB->begin();
+ std::map<Value*, Value*> TranslateMap; // Track translated values.
+ for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
+ if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
+ TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
+ } else {
+ // Clone the instruction.
+ Instruction *N = BBI->clone();
+ if (BBI->hasName()) N->setName(BBI->getName()+".c");
+
+ // Update operands due to translation.
+ for (User::op_iterator i = N->op_begin(), e = N->op_end();
+ i != e; ++i) {
+ std::map<Value*, Value*>::iterator PI =
+ TranslateMap.find(*i);
+ if (PI != TranslateMap.end())
+ *i = PI->second;
+ }
+
+ // Check for trivial simplification.
+ if (Constant *C = ConstantFoldInstruction(N)) {
+ TranslateMap[BBI] = C;
+ delete N; // Constant folded away, don't need actual inst
+ } else {
+ // Insert the new instruction into its new home.
+ EdgeBB->getInstList().insert(InsertPt, N);
+ if (!BBI->use_empty())
+ TranslateMap[BBI] = N;
+ }
+ }
+ }
+
+ // Loop over all of the edges from PredBB to BB, changing them to branch
+ // to EdgeBB instead.
+ TerminatorInst *PredBBTI = PredBB->getTerminator();
+ for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
+ if (PredBBTI->getSuccessor(i) == BB) {
+ BB->removePredecessor(PredBB);
+ PredBBTI->setSuccessor(i, EdgeBB);
+ }
+
+ // Recurse, simplifying any other constants.
+ return FoldCondBranchOnPHI(BI) | true;
+ }
+ }
+
+ return false;
+}
+
+/// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry
+/// PHI node, see if we can eliminate it.
+static bool FoldTwoEntryPHINode(PHINode *PN) {
+ // Ok, this is a two entry PHI node. Check to see if this is a simple "if
+ // statement", which has a very simple dominance structure. Basically, we
+ // are trying to find the condition that is being branched on, which
+ // subsequently causes this merge to happen. We really want control
+ // dependence information for this check, but simplifycfg can't keep it up
+ // to date, and this catches most of the cases we care about anyway.
+ //
+ BasicBlock *BB = PN->getParent();
+ BasicBlock *IfTrue, *IfFalse;
+ Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse);
+ if (!IfCond) return false;
+
+ // Okay, we found that we can merge this two-entry phi node into a select.
+ // Doing so would require us to fold *all* two entry phi nodes in this block.
+ // At some point this becomes non-profitable (particularly if the target
+ // doesn't support cmov's). Only do this transformation if there are two or
+ // fewer PHI nodes in this block.
+ unsigned NumPhis = 0;
+ for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
+ if (NumPhis > 2)
+ return false;
+
+ DOUT << "FOUND IF CONDITION! " << *IfCond << " T: "
+ << IfTrue->getName() << " F: " << IfFalse->getName() << "\n";
+
+ // Loop over the PHI's seeing if we can promote them all to select
+ // instructions. While we are at it, keep track of the instructions
+ // that need to be moved to the dominating block.
+ std::set<Instruction*> AggressiveInsts;
+
+ BasicBlock::iterator AfterPHIIt = BB->begin();
+ while (isa<PHINode>(AfterPHIIt)) {
+ PHINode *PN = cast<PHINode>(AfterPHIIt++);
+ if (PN->getIncomingValue(0) == PN->getIncomingValue(1)) {
+ if (PN->getIncomingValue(0) != PN)
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ else
+ PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+ } else if (!DominatesMergePoint(PN->getIncomingValue(0), BB,
+ &AggressiveInsts) ||
+ !DominatesMergePoint(PN->getIncomingValue(1), BB,
+ &AggressiveInsts)) {
+ return false;
+ }
+ }
+
+ // If we all PHI nodes are promotable, check to make sure that all
+ // instructions in the predecessor blocks can be promoted as well. If
+ // not, we won't be able to get rid of the control flow, so it's not
+ // worth promoting to select instructions.
+ BasicBlock *DomBlock = 0, *IfBlock1 = 0, *IfBlock2 = 0;
+ PN = cast<PHINode>(BB->begin());
+ BasicBlock *Pred = PN->getIncomingBlock(0);
+ if (cast<BranchInst>(Pred->getTerminator())->isUnconditional()) {
+ IfBlock1 = Pred;
+ DomBlock = *pred_begin(Pred);
+ for (BasicBlock::iterator I = Pred->begin();
+ !isa<TerminatorInst>(I); ++I)
+ if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
+ // This is not an aggressive instruction that we can promote.
+ // Because of this, we won't be able to get rid of the control
+ // flow, so the xform is not worth it.
+ return false;
+ }
+ }
+
+ Pred = PN->getIncomingBlock(1);
+ if (cast<BranchInst>(Pred->getTerminator())->isUnconditional()) {
+ IfBlock2 = Pred;
+ DomBlock = *pred_begin(Pred);
+ for (BasicBlock::iterator I = Pred->begin();
+ !isa<TerminatorInst>(I); ++I)
+ if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
+ // This is not an aggressive instruction that we can promote.
+ // Because of this, we won't be able to get rid of the control
+ // flow, so the xform is not worth it.
+ return false;
+ }
+ }
+
+ // If we can still promote the PHI nodes after this gauntlet of tests,
+ // do all of the PHI's now.
+
+ // Move all 'aggressive' instructions, which are defined in the
+ // conditional parts of the if's up to the dominating block.
+ if (IfBlock1) {
+ DomBlock->getInstList().splice(DomBlock->getTerminator(),
+ IfBlock1->getInstList(),
+ IfBlock1->begin(),
+ IfBlock1->getTerminator());
+ }
+ if (IfBlock2) {
+ DomBlock->getInstList().splice(DomBlock->getTerminator(),
+ IfBlock2->getInstList(),
+ IfBlock2->begin(),
+ IfBlock2->getTerminator());
+ }
+
+ while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+ // Change the PHI node into a select instruction.
+ Value *TrueVal =
+ PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
+ Value *FalseVal =
+ PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
+
+ Value *NV = SelectInst::Create(IfCond, TrueVal, FalseVal, "", AfterPHIIt);
+ PN->replaceAllUsesWith(NV);
+ NV->takeName(PN);
+
+ BB->getInstList().erase(PN);
+ }
+ return true;
+}
+
+/// isTerminatorFirstRelevantInsn - Return true if Term is very first
+/// instruction ignoring Phi nodes and dbg intrinsics.
+static bool isTerminatorFirstRelevantInsn(BasicBlock *BB, Instruction *Term) {
+ BasicBlock::iterator BBI = Term;
+ while (BBI != BB->begin()) {
+ --BBI;
+ if (!isa<DbgInfoIntrinsic>(BBI))
+ break;
+ }
+
+ if (isa<PHINode>(BBI) || &*BBI == Term || isa<DbgInfoIntrinsic>(BBI))
+ return true;
+ return false;
+}
+
+/// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes
+/// to two returning blocks, try to merge them together into one return,
+/// introducing a select if the return values disagree.
+static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {
+ assert(BI->isConditional() && "Must be a conditional branch");
+ BasicBlock *TrueSucc = BI->getSuccessor(0);
+ BasicBlock *FalseSucc = BI->getSuccessor(1);
+ ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator());
+ ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator());
+
+ // Check to ensure both blocks are empty (just a return) or optionally empty
+ // with PHI nodes. If there are other instructions, merging would cause extra
+ // computation on one path or the other.
+ if (!isTerminatorFirstRelevantInsn(TrueSucc, TrueRet))
+ return false;
+ if (!isTerminatorFirstRelevantInsn(FalseSucc, FalseRet))
+ return false;
+
+ // Okay, we found a branch that is going to two return nodes. If
+ // there is no return value for this function, just change the
+ // branch into a return.
+ if (FalseRet->getNumOperands() == 0) {
+ TrueSucc->removePredecessor(BI->getParent());
+ FalseSucc->removePredecessor(BI->getParent());
+ ReturnInst::Create(0, BI);
+ EraseTerminatorInstAndDCECond(BI);
+ return true;
+ }
+
+ // Otherwise, figure out what the true and false return values are
+ // so we can insert a new select instruction.
+ Value *TrueValue = TrueRet->getReturnValue();
+ Value *FalseValue = FalseRet->getReturnValue();
+
+ // Unwrap any PHI nodes in the return blocks.
+ if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue))
+ if (TVPN->getParent() == TrueSucc)
+ TrueValue = TVPN->getIncomingValueForBlock(BI->getParent());
+ if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue))
+ if (FVPN->getParent() == FalseSucc)
+ FalseValue = FVPN->getIncomingValueForBlock(BI->getParent());
+
+ // In order for this transformation to be safe, we must be able to
+ // unconditionally execute both operands to the return. This is
+ // normally the case, but we could have a potentially-trapping
+ // constant expression that prevents this transformation from being
+ // safe.
+ if (ConstantExpr *TCV = dyn_cast_or_null<ConstantExpr>(TrueValue))
+ if (TCV->canTrap())
+ return false;
+ if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue))
+ if (FCV->canTrap())
+ return false;
+
+ // Okay, we collected all the mapped values and checked them for sanity, and
+ // defined to really do this transformation. First, update the CFG.
+ TrueSucc->removePredecessor(BI->getParent());
+ FalseSucc->removePredecessor(BI->getParent());
+
+ // Insert select instructions where needed.
+ Value *BrCond = BI->getCondition();
+ if (TrueValue) {
+ // Insert a select if the results differ.
+ if (TrueValue == FalseValue || isa<UndefValue>(FalseValue)) {
+ } else if (isa<UndefValue>(TrueValue)) {
+ TrueValue = FalseValue;
+ } else {
+ TrueValue = SelectInst::Create(BrCond, TrueValue,
+ FalseValue, "retval", BI);
+ }
+ }
+
+ Value *RI = !TrueValue ?
+ ReturnInst::Create(BI) :
+ ReturnInst::Create(TrueValue, BI);
+
+ DOUT << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
+ << "\n " << *BI << "NewRet = " << *RI
+ << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc;
+
+ EraseTerminatorInstAndDCECond(BI);
+
+ return true;
+}
+
+/// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch,
+/// and if a predecessor branches to us and one of our successors, fold the
+/// setcc into the predecessor and use logical operations to pick the right
+/// destination.
+static bool FoldBranchToCommonDest(BranchInst *BI) {
+ BasicBlock *BB = BI->getParent();
+ Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
+ if (Cond == 0) return false;
+
+
+ // Only allow this if the condition is a simple instruction that can be
+ // executed unconditionally. It must be in the same block as the branch, and
+ // must be at the front of the block.
+ BasicBlock::iterator FrontIt = BB->front();
+ // Ignore dbg intrinsics.
+ while(isa<DbgInfoIntrinsic>(FrontIt))
+ ++FrontIt;
+ if ((!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
+ Cond->getParent() != BB || &*FrontIt != Cond || !Cond->hasOneUse()) {
+ return false;
+ }
+
+ // Make sure the instruction after the condition is the cond branch.
+ BasicBlock::iterator CondIt = Cond; ++CondIt;
+ // Ingore dbg intrinsics.
+ while(isa<DbgInfoIntrinsic>(CondIt))
+ ++CondIt;
+ if (&*CondIt != BI) {
+ assert (!isa<DbgInfoIntrinsic>(CondIt) && "Hey do not forget debug info!");
+ return false;
+ }
+
+ // Cond is known to be a compare or binary operator. Check to make sure that
+ // neither operand is a potentially-trapping constant expression.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0)))
+ if (CE->canTrap())
+ return false;
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
+ if (CE->canTrap())
+ return false;
+
+
+ // Finally, don't infinitely unroll conditional loops.
+ BasicBlock *TrueDest = BI->getSuccessor(0);
+ BasicBlock *FalseDest = BI->getSuccessor(1);
+ if (TrueDest == BB || FalseDest == BB)
+ return false;
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *PredBlock = *PI;
+ BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
+
+ // Check that we have two conditional branches. If there is a PHI node in
+ // the common successor, verify that the same value flows in from both
+ // blocks.
+ if (PBI == 0 || PBI->isUnconditional() ||
+ !SafeToMergeTerminators(BI, PBI))
+ continue;
+
+ Instruction::BinaryOps Opc;
+ bool InvertPredCond = false;
+
+ if (PBI->getSuccessor(0) == TrueDest)
+ Opc = Instruction::Or;
+ else if (PBI->getSuccessor(1) == FalseDest)
+ Opc = Instruction::And;
+ else if (PBI->getSuccessor(0) == FalseDest)
+ Opc = Instruction::And, InvertPredCond = true;
+ else if (PBI->getSuccessor(1) == TrueDest)
+ Opc = Instruction::Or, InvertPredCond = true;
+ else
+ continue;
+
+ DOUT << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB;
+
+ // If we need to invert the condition in the pred block to match, do so now.
+ if (InvertPredCond) {
+ Value *NewCond =
+ BinaryOperator::CreateNot(PBI->getCondition(),
+ PBI->getCondition()->getName()+".not", PBI);
+ PBI->setCondition(NewCond);
+ BasicBlock *OldTrue = PBI->getSuccessor(0);
+ BasicBlock *OldFalse = PBI->getSuccessor(1);
+ PBI->setSuccessor(0, OldFalse);
+ PBI->setSuccessor(1, OldTrue);
+ }
+
+ // Clone Cond into the predecessor basic block, and or/and the
+ // two conditions together.
+ Instruction *New = Cond->clone();
+ PredBlock->getInstList().insert(PBI, New);
+ New->takeName(Cond);
+ Cond->setName(New->getName()+".old");
+
+ Value *NewCond = BinaryOperator::Create(Opc, PBI->getCondition(),
+ New, "or.cond", PBI);
+ PBI->setCondition(NewCond);
+ if (PBI->getSuccessor(0) == BB) {
+ AddPredecessorToBlock(TrueDest, PredBlock, BB);
+ PBI->setSuccessor(0, TrueDest);
+ }
+ if (PBI->getSuccessor(1) == BB) {
+ AddPredecessorToBlock(FalseDest, PredBlock, BB);
+ PBI->setSuccessor(1, FalseDest);
+ }
+ return true;
+ }
+ return false;
+}
+
+/// SimplifyCondBranchToCondBranch - If we have a conditional branch as a
+/// predecessor of another block, this function tries to simplify it. We know
+/// that PBI and BI are both conditional branches, and BI is in one of the
+/// successor blocks of PBI - PBI branches to BI.
+static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
+ assert(PBI->isConditional() && BI->isConditional());
+ BasicBlock *BB = BI->getParent();
+
+ // If this block ends with a branch instruction, and if there is a
+ // predecessor that ends on a branch of the same condition, make
+ // this conditional branch redundant.
+ if (PBI->getCondition() == BI->getCondition() &&
+ PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+ // Okay, the outcome of this conditional branch is statically
+ // knowable. If this block had a single pred, handle specially.
+ if (BB->getSinglePredecessor()) {
+ // Turn this into a branch on constant.
+ bool CondIsTrue = PBI->getSuccessor(0) == BB;
+ BI->setCondition(ConstantInt::get(Type::Int1Ty, CondIsTrue));
+ return true; // Nuke the branch on constant.
+ }
+
+ // Otherwise, if there are multiple predecessors, insert a PHI that merges
+ // in the constant and simplify the block result. Subsequent passes of
+ // simplifycfg will thread the block.
+ if (BlockIsSimpleEnoughToThreadThrough(BB)) {
+ PHINode *NewPN = PHINode::Create(Type::Int1Ty,
+ BI->getCondition()->getName() + ".pr",
+ BB->begin());
+ // Okay, we're going to insert the PHI node. Since PBI is not the only
+ // predecessor, compute the PHI'd conditional value for all of the preds.
+ // Any predecessor where the condition is not computable we keep symbolic.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ if ((PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) &&
+ PBI != BI && PBI->isConditional() &&
+ PBI->getCondition() == BI->getCondition() &&
+ PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+ bool CondIsTrue = PBI->getSuccessor(0) == BB;
+ NewPN->addIncoming(ConstantInt::get(Type::Int1Ty,
+ CondIsTrue), *PI);
+ } else {
+ NewPN->addIncoming(BI->getCondition(), *PI);
+ }
+
+ BI->setCondition(NewPN);
+ return true;
+ }
+ }
+
+ // If this is a conditional branch in an empty block, and if any
+ // predecessors is a conditional branch to one of our destinations,
+ // fold the conditions into logical ops and one cond br.
+ BasicBlock::iterator BBI = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(BBI))
+ ++BBI;
+ if (&*BBI != BI)
+ return false;
+
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
+ if (CE->canTrap())
+ return false;
+
+ int PBIOp, BIOp;
+ if (PBI->getSuccessor(0) == BI->getSuccessor(0))
+ PBIOp = BIOp = 0;
+ else if (PBI->getSuccessor(0) == BI->getSuccessor(1))
+ PBIOp = 0, BIOp = 1;
+ else if (PBI->getSuccessor(1) == BI->getSuccessor(0))
+ PBIOp = 1, BIOp = 0;
+ else if (PBI->getSuccessor(1) == BI->getSuccessor(1))
+ PBIOp = BIOp = 1;
+ else
+ return false;
+
+ // Check to make sure that the other destination of this branch
+ // isn't BB itself. If so, this is an infinite loop that will
+ // keep getting unwound.
+ if (PBI->getSuccessor(PBIOp) == BB)
+ return false;
+
+ // Do not perform this transformation if it would require
+ // insertion of a large number of select instructions. For targets
+ // without predication/cmovs, this is a big pessimization.
+ BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
+
+ unsigned NumPhis = 0;
+ for (BasicBlock::iterator II = CommonDest->begin();
+ isa<PHINode>(II); ++II, ++NumPhis)
+ if (NumPhis > 2) // Disable this xform.
+ return false;
+
+ // Finally, if everything is ok, fold the branches to logical ops.
+ BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
+
+ DOUT << "FOLDING BRs:" << *PBI->getParent()
+ << "AND: " << *BI->getParent();
+
+
+ // If OtherDest *is* BB, then BB is a basic block with a single conditional
+ // branch in it, where one edge (OtherDest) goes back to itself but the other
+ // exits. We don't *know* that the program avoids the infinite loop
+ // (even though that seems likely). If we do this xform naively, we'll end up
+ // recursively unpeeling the loop. Since we know that (after the xform is
+ // done) that the block *is* infinite if reached, we just make it an obviously
+ // infinite loop with no cond branch.
+ if (OtherDest == BB) {
+ // Insert it at the end of the function, because it's either code,
+ // or it won't matter if it's hot. :)
+ BasicBlock *InfLoopBlock = BasicBlock::Create("infloop", BB->getParent());
+ BranchInst::Create(InfLoopBlock, InfLoopBlock);
+ OtherDest = InfLoopBlock;
+ }
+
+ DOUT << *PBI->getParent()->getParent();
+
+ // BI may have other predecessors. Because of this, we leave
+ // it alone, but modify PBI.
+
+ // Make sure we get to CommonDest on True&True directions.
+ Value *PBICond = PBI->getCondition();
+ if (PBIOp)
+ PBICond = BinaryOperator::CreateNot(PBICond,
+ PBICond->getName()+".not",
+ PBI);
+ Value *BICond = BI->getCondition();
+ if (BIOp)
+ BICond = BinaryOperator::CreateNot(BICond,
+ BICond->getName()+".not",
+ PBI);
+ // Merge the conditions.
+ Value *Cond = BinaryOperator::CreateOr(PBICond, BICond, "brmerge", PBI);
+
+ // Modify PBI to branch on the new condition to the new dests.
+ PBI->setCondition(Cond);
+ PBI->setSuccessor(0, CommonDest);
+ PBI->setSuccessor(1, OtherDest);
+
+ // OtherDest may have phi nodes. If so, add an entry from PBI's
+ // block that are identical to the entries for BI's block.
+ PHINode *PN;
+ for (BasicBlock::iterator II = OtherDest->begin();
+ (PN = dyn_cast<PHINode>(II)); ++II) {
+ Value *V = PN->getIncomingValueForBlock(BB);
+ PN->addIncoming(V, PBI->getParent());
+ }
+
+ // We know that the CommonDest already had an edge from PBI to
+ // it. If it has PHIs though, the PHIs may have different
+ // entries for BB and PBI's BB. If so, insert a select to make
+ // them agree.
+ for (BasicBlock::iterator II = CommonDest->begin();
+ (PN = dyn_cast<PHINode>(II)); ++II) {
+ Value *BIV = PN->getIncomingValueForBlock(BB);
+ unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
+ Value *PBIV = PN->getIncomingValue(PBBIdx);
+ if (BIV != PBIV) {
+ // Insert a select in PBI to pick the right value.
+ Value *NV = SelectInst::Create(PBICond, PBIV, BIV,
+ PBIV->getName()+".mux", PBI);
+ PN->setIncomingValue(PBBIdx, NV);
+ }
+ }
+
+ DOUT << "INTO: " << *PBI->getParent();
+
+ DOUT << *PBI->getParent()->getParent();
+
+ // This basic block is probably dead. We know it has at least
+ // one fewer predecessor.
+ return true;
+}
+
+
+/// SimplifyCFG - This function is used to do simplification of a CFG. For
+/// example, it adjusts branches to branches to eliminate the extra hop, it
+/// eliminates unreachable basic blocks, and does other "peephole" optimization
+/// of the CFG. It returns true if a modification was made.
+///
+/// WARNING: The entry node of a function may not be simplified.
+///
+bool llvm::SimplifyCFG(BasicBlock *BB) {
+ bool Changed = false;
+ Function *M = BB->getParent();
+
+ assert(BB && BB->getParent() && "Block not embedded in function!");
+ assert(BB->getTerminator() && "Degenerate basic block encountered!");
+ assert(&BB->getParent()->getEntryBlock() != BB &&
+ "Can't Simplify entry block!");
+
+ // Remove basic blocks that have no predecessors... or that just have themself
+ // as a predecessor. These are unreachable.
+ if (pred_begin(BB) == pred_end(BB) || BB->getSinglePredecessor() == BB) {
+ DOUT << "Removing BB: \n" << *BB;
+ DeleteDeadBlock(BB);
+ return true;
+ }
+
+ // Check to see if we can constant propagate this terminator instruction
+ // away...
+ Changed |= ConstantFoldTerminator(BB);
+
+ // If there is a trivial two-entry PHI node in this basic block, and we can
+ // eliminate it, do so now.
+ if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
+ if (PN->getNumIncomingValues() == 2)
+ Changed |= FoldTwoEntryPHINode(PN);
+
+ // If this is a returning block with only PHI nodes in it, fold the return
+ // instruction into any unconditional branch predecessors.
+ //
+ // If any predecessor is a conditional branch that just selects among
+ // different return values, fold the replace the branch/return with a select
+ // and return.
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ if (isTerminatorFirstRelevantInsn(BB, BB->getTerminator())) {
+ // Find predecessors that end with branches.
+ SmallVector<BasicBlock*, 8> UncondBranchPreds;
+ SmallVector<BranchInst*, 8> CondBranchPreds;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ TerminatorInst *PTI = (*PI)->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
+ if (BI->isUnconditional())
+ UncondBranchPreds.push_back(*PI);
+ else
+ CondBranchPreds.push_back(BI);
+ }
+ }
+
+ // If we found some, do the transformation!
+ if (!UncondBranchPreds.empty()) {
+ while (!UncondBranchPreds.empty()) {
+ BasicBlock *Pred = UncondBranchPreds.pop_back_val();
+ DOUT << "FOLDING: " << *BB
+ << "INTO UNCOND BRANCH PRED: " << *Pred;
+ Instruction *UncondBranch = Pred->getTerminator();
+ // Clone the return and add it to the end of the predecessor.
+ Instruction *NewRet = RI->clone();
+ Pred->getInstList().push_back(NewRet);
+
+ BasicBlock::iterator BBI = RI;
+ if (BBI != BB->begin()) {
+ // Move region end info into the predecessor.
+ if (DbgRegionEndInst *DREI = dyn_cast<DbgRegionEndInst>(--BBI))
+ DREI->moveBefore(NewRet);
+ }
+
+ // If the return instruction returns a value, and if the value was a
+ // PHI node in "BB", propagate the right value into the return.
+ for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
+ i != e; ++i)
+ if (PHINode *PN = dyn_cast<PHINode>(*i))
+ if (PN->getParent() == BB)
+ *i = PN->getIncomingValueForBlock(Pred);
+
+ // Update any PHI nodes in the returning block to realize that we no
+ // longer branch to them.
+ BB->removePredecessor(Pred);
+ Pred->getInstList().erase(UncondBranch);
+ }
+
+ // If we eliminated all predecessors of the block, delete the block now.
+ if (pred_begin(BB) == pred_end(BB))
+ // We know there are no successors, so just nuke the block.
+ M->getBasicBlockList().erase(BB);
+
+ return true;
+ }
+
+ // Check out all of the conditional branches going to this return
+ // instruction. If any of them just select between returns, change the
+ // branch itself into a select/return pair.
+ while (!CondBranchPreds.empty()) {
+ BranchInst *BI = CondBranchPreds.pop_back_val();
+
+ // Check to see if the non-BB successor is also a return block.
+ if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
+ isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) &&
+ SimplifyCondBranchToTwoReturns(BI))
+ return true;
+ }
+ }
+ } else if (isa<UnwindInst>(BB->begin())) {
+ // Check to see if the first instruction in this block is just an unwind.
+ // If so, replace any invoke instructions which use this as an exception
+ // destination with call instructions, and any unconditional branch
+ // predecessor with an unwind.
+ //
+ SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
+ while (!Preds.empty()) {
+ BasicBlock *Pred = Preds.back();
+ if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator())) {
+ if (BI->isUnconditional()) {
+ Pred->getInstList().pop_back(); // nuke uncond branch
+ new UnwindInst(Pred); // Use unwind.
+ Changed = true;
+ }
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator()))
+ if (II->getUnwindDest() == BB) {
+ // Insert a new branch instruction before the invoke, because this
+ // is now a fall through...
+ BranchInst *BI = BranchInst::Create(II->getNormalDest(), II);
+ Pred->getInstList().remove(II); // Take out of symbol table
+
+ // Insert the call now...
+ SmallVector<Value*,8> Args(II->op_begin()+3, II->op_end());
+ CallInst *CI = CallInst::Create(II->getCalledValue(),
+ Args.begin(), Args.end(),
+ II->getName(), BI);
+ CI->setCallingConv(II->getCallingConv());
+ CI->setAttributes(II->getAttributes());
+ // If the invoke produced a value, the Call now does instead
+ II->replaceAllUsesWith(CI);
+ delete II;
+ Changed = true;
+ }
+
+ Preds.pop_back();
+ }
+
+ // If this block is now dead, remove it.
+ if (pred_begin(BB) == pred_end(BB)) {
+ // We know there are no successors, so just nuke the block.
+ M->getBasicBlockList().erase(BB);
+ return true;
+ }
+
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+ if (isValueEqualityComparison(SI)) {
+ // If we only have one predecessor, and if it is a branch on this value,
+ // see if that predecessor totally determines the outcome of this switch.
+ if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+ if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred))
+ return SimplifyCFG(BB) || 1;
+
+ // If the block only contains the switch, see if we can fold the block
+ // away into any preds.
+ BasicBlock::iterator BBI = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(BBI))
+ ++BBI;
+ if (SI == &*BBI)
+ if (FoldValueComparisonIntoPredecessors(SI))
+ return SimplifyCFG(BB) || 1;
+ }
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ if (BI->isUnconditional()) {
+ BasicBlock::iterator BBI = BB->getFirstNonPHI();
+
+ BasicBlock *Succ = BI->getSuccessor(0);
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(BBI))
+ ++BBI;
+ if (BBI->isTerminator() && // Terminator is the only non-phi instruction!
+ Succ != BB) // Don't hurt infinite loops!
+ if (TryToSimplifyUncondBranchFromEmptyBlock(BB, Succ))
+ return true;
+
+ } else { // Conditional branch
+ if (isValueEqualityComparison(BI)) {
+ // If we only have one predecessor, and if it is a branch on this value,
+ // see if that predecessor totally determines the outcome of this
+ // switch.
+ if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+ if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred))
+ return SimplifyCFG(BB) || 1;
+
+ // This block must be empty, except for the setcond inst, if it exists.
+ // Ignore dbg intrinsics.
+ BasicBlock::iterator I = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(I))
+ ++I;
+ if (&*I == BI) {
+ if (FoldValueComparisonIntoPredecessors(BI))
+ return SimplifyCFG(BB) | true;
+ } else if (&*I == cast<Instruction>(BI->getCondition())){
+ ++I;
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(I))
+ ++I;
+ if(&*I == BI) {
+ if (FoldValueComparisonIntoPredecessors(BI))
+ return SimplifyCFG(BB) | true;
+ }
+ }
+ }
+
+ // If this is a branch on a phi node in the current block, thread control
+ // through this block if any PHI node entries are constants.
+ if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
+ if (PN->getParent() == BI->getParent())
+ if (FoldCondBranchOnPHI(BI))
+ return SimplifyCFG(BB) | true;
+
+ // If this basic block is ONLY a setcc and a branch, and if a predecessor
+ // branches to us and one of our successors, fold the setcc into the
+ // predecessor and use logical operations to pick the right destination.
+ if (FoldBranchToCommonDest(BI))
+ return SimplifyCFG(BB) | 1;
+
+
+ // Scan predecessor blocks for conditional branches.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+ if (PBI != BI && PBI->isConditional())
+ if (SimplifyCondBranchToCondBranch(PBI, BI))
+ return SimplifyCFG(BB) | true;
+ }
+ } else if (isa<UnreachableInst>(BB->getTerminator())) {
+ // If there are any instructions immediately before the unreachable that can
+ // be removed, do so.
+ Instruction *Unreachable = BB->getTerminator();
+ while (Unreachable != BB->begin()) {
+ BasicBlock::iterator BBI = Unreachable;
+ --BBI;
+ // Do not delete instructions that can have side effects, like calls
+ // (which may never return) and volatile loads and stores.
+ if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
+ if (SI->isVolatile())
+ break;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(BBI))
+ if (LI->isVolatile())
+ break;
+
+ // Delete this instruction
+ BB->getInstList().erase(BBI);
+ Changed = true;
+ }
+
+ // If the unreachable instruction is the first in the block, take a gander
+ // at all of the predecessors of this instruction, and simplify them.
+ if (&BB->front() == Unreachable) {
+ SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ TerminatorInst *TI = Preds[i]->getTerminator();
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isUnconditional()) {
+ if (BI->getSuccessor(0) == BB) {
+ new UnreachableInst(TI);
+ TI->eraseFromParent();
+ Changed = true;
+ }
+ } else {
+ if (BI->getSuccessor(0) == BB) {
+ BranchInst::Create(BI->getSuccessor(1), BI);
+ EraseTerminatorInstAndDCECond(BI);
+ } else if (BI->getSuccessor(1) == BB) {
+ BranchInst::Create(BI->getSuccessor(0), BI);
+ EraseTerminatorInstAndDCECond(BI);
+ Changed = true;
+ }
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+ if (SI->getSuccessor(i) == BB) {
+ BB->removePredecessor(SI->getParent());
+ SI->removeCase(i);
+ --i; --e;
+ Changed = true;
+ }
+ // If the default value is unreachable, figure out the most popular
+ // destination and make it the default.
+ if (SI->getSuccessor(0) == BB) {
+ std::map<BasicBlock*, unsigned> Popularity;
+ for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+ Popularity[SI->getSuccessor(i)]++;
+
+ // Find the most popular block.
+ unsigned MaxPop = 0;
+ BasicBlock *MaxBlock = 0;
+ for (std::map<BasicBlock*, unsigned>::iterator
+ I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
+ if (I->second > MaxPop) {
+ MaxPop = I->second;
+ MaxBlock = I->first;
+ }
+ }
+ if (MaxBlock) {
+ // Make this the new default, allowing us to delete any explicit
+ // edges to it.
+ SI->setSuccessor(0, MaxBlock);
+ Changed = true;
+
+ // If MaxBlock has phinodes in it, remove MaxPop-1 entries from
+ // it.
+ if (isa<PHINode>(MaxBlock->begin()))
+ for (unsigned i = 0; i != MaxPop-1; ++i)
+ MaxBlock->removePredecessor(SI->getParent());
+
+ for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+ if (SI->getSuccessor(i) == MaxBlock) {
+ SI->removeCase(i);
+ --i; --e;
+ }
+ }
+ }
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) {
+ if (II->getUnwindDest() == BB) {
+ // Convert the invoke to a call instruction. This would be a good
+ // place to note that the call does not throw though.
+ BranchInst *BI = BranchInst::Create(II->getNormalDest(), II);
+ II->removeFromParent(); // Take out of symbol table
+
+ // Insert the call now...
+ SmallVector<Value*, 8> Args(II->op_begin()+3, II->op_end());
+ CallInst *CI = CallInst::Create(II->getCalledValue(),
+ Args.begin(), Args.end(),
+ II->getName(), BI);
+ CI->setCallingConv(II->getCallingConv());
+ CI->setAttributes(II->getAttributes());
+ // If the invoke produced a value, the Call does now instead.
+ II->replaceAllUsesWith(CI);
+ delete II;
+ Changed = true;
+ }
+ }
+ }
+
+ // If this block is now dead, remove it.
+ if (pred_begin(BB) == pred_end(BB)) {
+ // We know there are no successors, so just nuke the block.
+ M->getBasicBlockList().erase(BB);
+ return true;
+ }
+ }
+ }
+
+ // Merge basic blocks into their predecessor if there is only one distinct
+ // pred, and if there is only one distinct successor of the predecessor, and
+ // if there are no PHI nodes.
+ //
+ if (MergeBlockIntoPredecessor(BB))
+ return true;
+
+ // Otherwise, if this block only has a single predecessor, and if that block
+ // is a conditional branch, see if we can hoist any code from this block up
+ // into our predecessor.
+ pred_iterator PI(pred_begin(BB)), PE(pred_end(BB));
+ BasicBlock *OnlyPred = *PI++;
+ for (; PI != PE; ++PI) // Search all predecessors, see if they are all same
+ if (*PI != OnlyPred) {
+ OnlyPred = 0; // There are multiple different predecessors...
+ break;
+ }
+
+ if (OnlyPred)
+ if (BranchInst *BI = dyn_cast<BranchInst>(OnlyPred->getTerminator()))
+ if (BI->isConditional()) {
+ // Get the other block.
+ BasicBlock *OtherBB = BI->getSuccessor(BI->getSuccessor(0) == BB);
+ PI = pred_begin(OtherBB);
+ ++PI;
+
+ if (PI == pred_end(OtherBB)) {
+ // We have a conditional branch to two blocks that are only reachable
+ // from the condbr. We know that the condbr dominates the two blocks,
+ // so see if there is any identical code in the "then" and "else"
+ // blocks. If so, we can hoist it up to the branching block.
+ Changed |= HoistThenElseCodeToIf(BI);
+ } else {
+ BasicBlock* OnlySucc = NULL;
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
+ SI != SE; ++SI) {
+ if (!OnlySucc)
+ OnlySucc = *SI;
+ else if (*SI != OnlySucc) {
+ OnlySucc = 0; // There are multiple distinct successors!
+ break;
+ }
+ }
+
+ if (OnlySucc == OtherBB) {
+ // If BB's only successor is the other successor of the predecessor,
+ // i.e. a triangle, see if we can hoist any code from this block up
+ // to the "if" block.
+ Changed |= SpeculativelyExecuteBB(BI, BB);
+ }
+ }
+ }
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ if (BranchInst *BI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+ // Change br (X == 0 | X == 1), T, F into a switch instruction.
+ if (BI->isConditional() && isa<Instruction>(BI->getCondition())) {
+ Instruction *Cond = cast<Instruction>(BI->getCondition());
+ // If this is a bunch of seteq's or'd together, or if it's a bunch of
+ // 'setne's and'ed together, collect them.
+ Value *CompVal = 0;
+ std::vector<ConstantInt*> Values;
+ bool TrueWhenEqual = GatherValueComparisons(Cond, CompVal, Values);
+ if (CompVal && CompVal->getType()->isInteger()) {
+ // There might be duplicate constants in the list, which the switch
+ // instruction can't handle, remove them now.
+ std::sort(Values.begin(), Values.end(), ConstantIntOrdering());
+ Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
+
+ // Figure out which block is which destination.
+ BasicBlock *DefaultBB = BI->getSuccessor(1);
+ BasicBlock *EdgeBB = BI->getSuccessor(0);
+ if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
+
+ // Create the new switch instruction now.
+ SwitchInst *New = SwitchInst::Create(CompVal, DefaultBB,
+ Values.size(), BI);
+
+ // Add all of the 'cases' to the switch instruction.
+ for (unsigned i = 0, e = Values.size(); i != e; ++i)
+ New->addCase(Values[i], EdgeBB);
+
+ // We added edges from PI to the EdgeBB. As such, if there were any
+ // PHI nodes in EdgeBB, they need entries to be added corresponding to
+ // the number of edges added.
+ for (BasicBlock::iterator BBI = EdgeBB->begin();
+ isa<PHINode>(BBI); ++BBI) {
+ PHINode *PN = cast<PHINode>(BBI);
+ Value *InVal = PN->getIncomingValueForBlock(*PI);
+ for (unsigned i = 0, e = Values.size()-1; i != e; ++i)
+ PN->addIncoming(InVal, *PI);
+ }
+
+ // Erase the old branch instruction.
+ EraseTerminatorInstAndDCECond(BI);
+ return true;
+ }
+ }
+
+ return Changed;
+}
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
new file mode 100644
index 0000000..848f2b8
--- /dev/null
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -0,0 +1,139 @@
+//===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is used to ensure that functions have at most one return
+// instruction in them. Additionally, it keeps track of which node is the new
+// exit node of the CFG. If there are no exit nodes in the CFG, the getExitNode
+// method will return a null pointer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+char UnifyFunctionExitNodes::ID = 0;
+static RegisterPass<UnifyFunctionExitNodes>
+X("mergereturn", "Unify function exit nodes");
+
+Pass *llvm::createUnifyFunctionExitNodesPass() {
+ return new UnifyFunctionExitNodes();
+}
+
+void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
+ // We preserve the non-critical-edgeness property
+ AU.addPreservedID(BreakCriticalEdgesID);
+ // This is a cluster of orthogonal Transforms
+ AU.addPreservedID(PromoteMemoryToRegisterID);
+ AU.addPreservedID(LowerSwitchID);
+}
+
+// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new
+// BasicBlock, and converting all returns to unconditional branches to this
+// new basic block. The singular exit node is returned.
+//
+// If there are no return stmts in the Function, a null pointer is returned.
+//
+bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
+ // Loop over all of the blocks in a function, tracking all of the blocks that
+ // return.
+ //
+ std::vector<BasicBlock*> ReturningBlocks;
+ std::vector<BasicBlock*> UnwindingBlocks;
+ std::vector<BasicBlock*> UnreachableBlocks;
+ for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ if (isa<ReturnInst>(I->getTerminator()))
+ ReturningBlocks.push_back(I);
+ else if (isa<UnwindInst>(I->getTerminator()))
+ UnwindingBlocks.push_back(I);
+ else if (isa<UnreachableInst>(I->getTerminator()))
+ UnreachableBlocks.push_back(I);
+
+ // Handle unwinding blocks first.
+ if (UnwindingBlocks.empty()) {
+ UnwindBlock = 0;
+ } else if (UnwindingBlocks.size() == 1) {
+ UnwindBlock = UnwindingBlocks.front();
+ } else {
+ UnwindBlock = BasicBlock::Create("UnifiedUnwindBlock", &F);
+ new UnwindInst(UnwindBlock);
+
+ for (std::vector<BasicBlock*>::iterator I = UnwindingBlocks.begin(),
+ E = UnwindingBlocks.end(); I != E; ++I) {
+ BasicBlock *BB = *I;
+ BB->getInstList().pop_back(); // Remove the unwind insn
+ BranchInst::Create(UnwindBlock, BB);
+ }
+ }
+
+ // Then unreachable blocks.
+ if (UnreachableBlocks.empty()) {
+ UnreachableBlock = 0;
+ } else if (UnreachableBlocks.size() == 1) {
+ UnreachableBlock = UnreachableBlocks.front();
+ } else {
+ UnreachableBlock = BasicBlock::Create("UnifiedUnreachableBlock", &F);
+ new UnreachableInst(UnreachableBlock);
+
+ for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(),
+ E = UnreachableBlocks.end(); I != E; ++I) {
+ BasicBlock *BB = *I;
+ BB->getInstList().pop_back(); // Remove the unreachable inst.
+ BranchInst::Create(UnreachableBlock, BB);
+ }
+ }
+
+ // Now handle return blocks.
+ if (ReturningBlocks.empty()) {
+ ReturnBlock = 0;
+ return false; // No blocks return
+ } else if (ReturningBlocks.size() == 1) {
+ ReturnBlock = ReturningBlocks.front(); // Already has a single return block
+ return false;
+ }
+
+ // Otherwise, we need to insert a new basic block into the function, add a PHI
+ // nodes (if the function returns values), and convert all of the return
+ // instructions into unconditional branches.
+ //
+ BasicBlock *NewRetBlock = BasicBlock::Create("UnifiedReturnBlock", &F);
+
+ PHINode *PN = 0;
+ if (F.getReturnType() == Type::VoidTy) {
+ ReturnInst::Create(NULL, NewRetBlock);
+ } else {
+ // If the function doesn't return void... add a PHI node to the block...
+ PN = PHINode::Create(F.getReturnType(), "UnifiedRetVal");
+ NewRetBlock->getInstList().push_back(PN);
+ ReturnInst::Create(PN, NewRetBlock);
+ }
+
+ // Loop over all of the blocks, replacing the return instruction with an
+ // unconditional branch.
+ //
+ for (std::vector<BasicBlock*>::iterator I = ReturningBlocks.begin(),
+ E = ReturningBlocks.end(); I != E; ++I) {
+ BasicBlock *BB = *I;
+
+ // Add an incoming element to the PHI node for every return instruction that
+ // is merging into this new block...
+ if (PN)
+ PN->addIncoming(BB->getTerminator()->getOperand(0), BB);
+
+ BB->getInstList().pop_back(); // Remove the return insn
+ BranchInst::Create(NewRetBlock, BB);
+ }
+ ReturnBlock = NewRetBlock;
+ return true;
+}
diff --git a/lib/Transforms/Utils/UnrollLoop.cpp b/lib/Transforms/Utils/UnrollLoop.cpp
new file mode 100644
index 0000000..caef7ec
--- /dev/null
+++ b/lib/Transforms/Utils/UnrollLoop.cpp
@@ -0,0 +1,369 @@
+//===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some loop unrolling utilities. It does not define any
+// actual pass or policy, but provides a single function to perform loop
+// unrolling.
+//
+// It works best when loops have been canonicalized by the -indvars pass,
+// allowing it to determine the trip counts of loops easily.
+//
+// The process of unrolling can produce extraneous basic blocks linked with
+// unconditional branches. This will be corrected in the future.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unroll"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <cstdio>
+
+using namespace llvm;
+
+// TODO: Should these be here or in LoopUnroll?
+STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
+STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
+
+/// RemapInstruction - Convert the instruction operands from referencing the
+/// current values into those specified by ValueMap.
+static inline void RemapInstruction(Instruction *I,
+ DenseMap<const Value *, Value*> &ValueMap) {
+ for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
+ Value *Op = I->getOperand(op);
+ DenseMap<const Value *, Value*>::iterator It = ValueMap.find(Op);
+ if (It != ValueMap.end()) Op = It->second;
+ I->setOperand(op, Op);
+ }
+}
+
+/// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it
+/// only has one predecessor, and that predecessor only has one successor.
+/// The LoopInfo Analysis that is passed will be kept consistent.
+/// Returns the new combined block.
+static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
+ // Merge basic blocks into their predecessor if there is only one distinct
+ // pred, and if there is only one distinct successor of the predecessor, and
+ // if there are no PHI nodes.
+ BasicBlock *OnlyPred = BB->getSinglePredecessor();
+ if (!OnlyPred) return 0;
+
+ if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
+ return 0;
+
+ DOUT << "Merging: " << *BB << "into: " << *OnlyPred;
+
+ // Resolve any PHI nodes at the start of the block. They are all
+ // guaranteed to have exactly one entry if they exist, unless there are
+ // multiple duplicate (but guaranteed to be equal) entries for the
+ // incoming edges. This occurs when there are multiple edges from
+ // OnlyPred to OnlySucc.
+ FoldSingleEntryPHINodes(BB);
+
+ // Delete the unconditional branch from the predecessor...
+ OnlyPred->getInstList().pop_back();
+
+ // Move all definitions in the successor to the predecessor...
+ OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
+
+ // Make all PHI nodes that referred to BB now refer to Pred as their
+ // source...
+ BB->replaceAllUsesWith(OnlyPred);
+
+ std::string OldName = BB->getName();
+
+ // Erase basic block from the function...
+ LI->removeBlock(BB);
+ BB->eraseFromParent();
+
+ // Inherit predecessor's name if it exists...
+ if (!OldName.empty() && !OnlyPred->hasName())
+ OnlyPred->setName(OldName);
+
+ return OnlyPred;
+}
+
+/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
+/// if unrolling was succesful, or false if the loop was unmodified. Unrolling
+/// can only fail when the loop's latch block is not terminated by a conditional
+/// branch instruction. However, if the trip count (and multiple) are not known,
+/// loop unrolling will mostly produce more code that is no faster.
+///
+/// The LoopInfo Analysis that is passed will be kept consistent.
+///
+/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
+/// removed from the LoopPassManager as well. LPM can also be NULL.
+bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) {
+ assert(L->isLCSSAForm());
+
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
+
+ if (!BI || BI->isUnconditional()) {
+ // The loop-rotate pass can be helpful to avoid this in many cases.
+ DOUT << " Can't unroll; loop not terminated by a conditional branch.\n";
+ return false;
+ }
+
+ // Find trip count
+ unsigned TripCount = L->getSmallConstantTripCount();
+ // Find trip multiple if count is not available
+ unsigned TripMultiple = 1;
+ if (TripCount == 0)
+ TripMultiple = L->getSmallConstantTripMultiple();
+
+ if (TripCount != 0)
+ DOUT << " Trip Count = " << TripCount << "\n";
+ if (TripMultiple != 1)
+ DOUT << " Trip Multiple = " << TripMultiple << "\n";
+
+ // Effectively "DCE" unrolled iterations that are beyond the tripcount
+ // and will never be executed.
+ if (TripCount != 0 && Count > TripCount)
+ Count = TripCount;
+
+ assert(Count > 0);
+ assert(TripMultiple > 0);
+ assert(TripCount == 0 || TripCount % TripMultiple == 0);
+
+ // Are we eliminating the loop control altogether?
+ bool CompletelyUnroll = Count == TripCount;
+
+ // If we know the trip count, we know the multiple...
+ unsigned BreakoutTrip = 0;
+ if (TripCount != 0) {
+ BreakoutTrip = TripCount % Count;
+ TripMultiple = 0;
+ } else {
+ // Figure out what multiple to use.
+ BreakoutTrip = TripMultiple =
+ (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
+ }
+
+ if (CompletelyUnroll) {
+ DOUT << "COMPLETELY UNROLLING loop %" << Header->getName()
+ << " with trip count " << TripCount << "!\n";
+ } else {
+ DOUT << "UNROLLING loop %" << Header->getName()
+ << " by " << Count;
+ if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
+ DOUT << " with a breakout at trip " << BreakoutTrip;
+ } else if (TripMultiple != 1) {
+ DOUT << " with " << TripMultiple << " trips per branch";
+ }
+ DOUT << "!\n";
+ }
+
+ std::vector<BasicBlock*> LoopBlocks = L->getBlocks();
+
+ bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
+ BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
+
+ // For the first iteration of the loop, we should use the precloned values for
+ // PHI nodes. Insert associations now.
+ typedef DenseMap<const Value*, Value*> ValueMapTy;
+ ValueMapTy LastValueMap;
+ std::vector<PHINode*> OrigPHINode;
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ OrigPHINode.push_back(PN);
+ if (Instruction *I =
+ dyn_cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)))
+ if (L->contains(I->getParent()))
+ LastValueMap[I] = I;
+ }
+
+ std::vector<BasicBlock*> Headers;
+ std::vector<BasicBlock*> Latches;
+ Headers.push_back(Header);
+ Latches.push_back(LatchBlock);
+
+ for (unsigned It = 1; It != Count; ++It) {
+ char SuffixBuffer[100];
+ sprintf(SuffixBuffer, ".%d", It);
+
+ std::vector<BasicBlock*> NewBlocks;
+
+ for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(),
+ E = LoopBlocks.end(); BB != E; ++BB) {
+ ValueMapTy ValueMap;
+ BasicBlock *New = CloneBasicBlock(*BB, ValueMap, SuffixBuffer);
+ Header->getParent()->getBasicBlockList().push_back(New);
+
+ // Loop over all of the PHI nodes in the block, changing them to use the
+ // incoming values from the previous block.
+ if (*BB == Header)
+ for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
+ PHINode *NewPHI = cast<PHINode>(ValueMap[OrigPHINode[i]]);
+ Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
+ if (Instruction *InValI = dyn_cast<Instruction>(InVal))
+ if (It > 1 && L->contains(InValI->getParent()))
+ InVal = LastValueMap[InValI];
+ ValueMap[OrigPHINode[i]] = InVal;
+ New->getInstList().erase(NewPHI);
+ }
+
+ // Update our running map of newest clones
+ LastValueMap[*BB] = New;
+ for (ValueMapTy::iterator VI = ValueMap.begin(), VE = ValueMap.end();
+ VI != VE; ++VI)
+ LastValueMap[VI->first] = VI->second;
+
+ L->addBasicBlockToLoop(New, LI->getBase());
+
+ // Add phi entries for newly created values to all exit blocks except
+ // the successor of the latch block. The successor of the exit block will
+ // be updated specially after unrolling all the way.
+ if (*BB != LatchBlock)
+ for (Value::use_iterator UI = (*BB)->use_begin(), UE = (*BB)->use_end();
+ UI != UE;) {
+ Instruction *UseInst = cast<Instruction>(*UI);
+ ++UI;
+ if (isa<PHINode>(UseInst) && !L->contains(UseInst->getParent())) {
+ PHINode *phi = cast<PHINode>(UseInst);
+ Value *Incoming = phi->getIncomingValueForBlock(*BB);
+ phi->addIncoming(Incoming, New);
+ }
+ }
+
+ // Keep track of new headers and latches as we create them, so that
+ // we can insert the proper branches later.
+ if (*BB == Header)
+ Headers.push_back(New);
+ if (*BB == LatchBlock) {
+ Latches.push_back(New);
+
+ // Also, clear out the new latch's back edge so that it doesn't look
+ // like a new loop, so that it's amenable to being merged with adjacent
+ // blocks later on.
+ TerminatorInst *Term = New->getTerminator();
+ assert(L->contains(Term->getSuccessor(!ContinueOnTrue)));
+ assert(Term->getSuccessor(ContinueOnTrue) == LoopExit);
+ Term->setSuccessor(!ContinueOnTrue, NULL);
+ }
+
+ NewBlocks.push_back(New);
+ }
+
+ // Remap all instructions in the most recent iteration
+ for (unsigned i = 0; i < NewBlocks.size(); ++i)
+ for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+ E = NewBlocks[i]->end(); I != E; ++I)
+ RemapInstruction(I, LastValueMap);
+ }
+
+ // The latch block exits the loop. If there are any PHI nodes in the
+ // successor blocks, update them to use the appropriate values computed as the
+ // last iteration of the loop.
+ if (Count != 1) {
+ SmallPtrSet<PHINode*, 8> Users;
+ for (Value::use_iterator UI = LatchBlock->use_begin(),
+ UE = LatchBlock->use_end(); UI != UE; ++UI)
+ if (PHINode *phi = dyn_cast<PHINode>(*UI))
+ Users.insert(phi);
+
+ BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]);
+ for (SmallPtrSet<PHINode*,8>::iterator SI = Users.begin(), SE = Users.end();
+ SI != SE; ++SI) {
+ PHINode *PN = *SI;
+ Value *InVal = PN->removeIncomingValue(LatchBlock, false);
+ // If this value was defined in the loop, take the value defined by the
+ // last iteration of the loop.
+ if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
+ if (L->contains(InValI->getParent()))
+ InVal = LastValueMap[InVal];
+ }
+ PN->addIncoming(InVal, LastIterationBB);
+ }
+ }
+
+ // Now, if we're doing complete unrolling, loop over the PHI nodes in the
+ // original block, setting them to their incoming values.
+ if (CompletelyUnroll) {
+ BasicBlock *Preheader = L->getLoopPreheader();
+ for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
+ PHINode *PN = OrigPHINode[i];
+ PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
+ Header->getInstList().erase(PN);
+ }
+ }
+
+ // Now that all the basic blocks for the unrolled iterations are in place,
+ // set up the branches to connect them.
+ for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+ // The original branch was replicated in each unrolled iteration.
+ BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
+
+ // The branch destination.
+ unsigned j = (i + 1) % e;
+ BasicBlock *Dest = Headers[j];
+ bool NeedConditional = true;
+
+ // For a complete unroll, make the last iteration end with a branch
+ // to the exit block.
+ if (CompletelyUnroll && j == 0) {
+ Dest = LoopExit;
+ NeedConditional = false;
+ }
+
+ // If we know the trip count or a multiple of it, we can safely use an
+ // unconditional branch for some iterations.
+ if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
+ NeedConditional = false;
+ }
+
+ if (NeedConditional) {
+ // Update the conditional branch's successor for the following
+ // iteration.
+ Term->setSuccessor(!ContinueOnTrue, Dest);
+ } else {
+ Term->setUnconditionalDest(Dest);
+ // Merge adjacent basic blocks, if possible.
+ if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) {
+ std::replace(Latches.begin(), Latches.end(), Dest, Fold);
+ std::replace(Headers.begin(), Headers.end(), Dest, Fold);
+ }
+ }
+ }
+
+ // At this point, the code is well formed. We now do a quick sweep over the
+ // inserted code, doing constant propagation and dead code elimination as we
+ // go.
+ const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
+ for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
+ BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
+ for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
+ Instruction *Inst = I++;
+
+ if (isInstructionTriviallyDead(Inst))
+ (*BB)->getInstList().erase(Inst);
+ else if (Constant *C = ConstantFoldInstruction(Inst)) {
+ Inst->replaceAllUsesWith(C);
+ (*BB)->getInstList().erase(Inst);
+ }
+ }
+
+ NumCompletelyUnrolled += CompletelyUnroll;
+ ++NumUnrolled;
+ // Remove the loop from the LoopPassManager if it's completely removed.
+ if (CompletelyUnroll && LPM != NULL)
+ LPM->deleteLoopFromQueue(L);
+
+ // If we didn't completely unroll the loop, it should still be in LCSSA form.
+ if (!CompletelyUnroll)
+ assert(L->isLCSSAForm());
+
+ return true;
+}
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
new file mode 100644
index 0000000..20b676d
--- /dev/null
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -0,0 +1,143 @@
+//===- ValueMapper.cpp - Interface shared by lib/Transforms/Utils ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MapValue function, which is shared by various parts of
+// the lib/Transforms/Utils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instruction.h"
+#include "llvm/MDNode.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
+ Value *&VMSlot = VM[V];
+ if (VMSlot) return VMSlot; // Does it exist in the map yet?
+
+ // NOTE: VMSlot can be invalidated by any reference to VM, which can grow the
+ // DenseMap. This includes any recursive calls to MapValue.
+
+ // Global values do not need to be seeded into the ValueMap if they are using
+ // the identity mapping.
+ if (isa<GlobalValue>(V) || isa<InlineAsm>(V))
+ return VMSlot = const_cast<Value*>(V);
+
+ if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {
+ if (isa<ConstantInt>(C) || isa<ConstantFP>(C) ||
+ isa<ConstantPointerNull>(C) || isa<ConstantAggregateZero>(C) ||
+ isa<UndefValue>(C) || isa<MDString>(C))
+ return VMSlot = C; // Primitive constants map directly
+ else if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
+ for (User::op_iterator b = CA->op_begin(), i = b, e = CA->op_end();
+ i != e; ++i) {
+ Value *MV = MapValue(*i, VM);
+ if (MV != *i) {
+ // This array must contain a reference to a global, make a new array
+ // and return it.
+ //
+ std::vector<Constant*> Values;
+ Values.reserve(CA->getNumOperands());
+ for (User::op_iterator j = b; j != i; ++j)
+ Values.push_back(cast<Constant>(*j));
+ Values.push_back(cast<Constant>(MV));
+ for (++i; i != e; ++i)
+ Values.push_back(cast<Constant>(MapValue(*i, VM)));
+ return VM[V] = ConstantArray::get(CA->getType(), Values);
+ }
+ }
+ return VM[V] = C;
+
+ } else if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
+ for (User::op_iterator b = CS->op_begin(), i = b, e = CS->op_end();
+ i != e; ++i) {
+ Value *MV = MapValue(*i, VM);
+ if (MV != *i) {
+ // This struct must contain a reference to a global, make a new struct
+ // and return it.
+ //
+ std::vector<Constant*> Values;
+ Values.reserve(CS->getNumOperands());
+ for (User::op_iterator j = b; j != i; ++j)
+ Values.push_back(cast<Constant>(*j));
+ Values.push_back(cast<Constant>(MV));
+ for (++i; i != e; ++i)
+ Values.push_back(cast<Constant>(MapValue(*i, VM)));
+ return VM[V] = ConstantStruct::get(CS->getType(), Values);
+ }
+ }
+ return VM[V] = C;
+
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ std::vector<Constant*> Ops;
+ for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i)
+ Ops.push_back(cast<Constant>(MapValue(*i, VM)));
+ return VM[V] = CE->getWithOperands(Ops);
+ } else if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+ for (User::op_iterator b = CP->op_begin(), i = b, e = CP->op_end();
+ i != e; ++i) {
+ Value *MV = MapValue(*i, VM);
+ if (MV != *i) {
+ // This vector value must contain a reference to a global, make a new
+ // vector constant and return it.
+ //
+ std::vector<Constant*> Values;
+ Values.reserve(CP->getNumOperands());
+ for (User::op_iterator j = b; j != i; ++j)
+ Values.push_back(cast<Constant>(*j));
+ Values.push_back(cast<Constant>(MV));
+ for (++i; i != e; ++i)
+ Values.push_back(cast<Constant>(MapValue(*i, VM)));
+ return VM[V] = ConstantVector::get(Values);
+ }
+ }
+ return VM[V] = C;
+
+ } else if (MDNode *N = dyn_cast<MDNode>(C)) {
+ for (MDNode::const_elem_iterator b = N->elem_begin(), i = b,
+ e = N->elem_end(); i != e; ++i) {
+ if (!*i) continue;
+
+ Value *MV = MapValue(*i, VM);
+ if (MV != *i) {
+ // This MDNode must contain a reference to a global, make a new MDNode
+ // and return it.
+ SmallVector<Value*, 8> Values;
+ Values.reserve(N->getNumElements());
+ for (MDNode::const_elem_iterator j = b; j != i; ++j)
+ Values.push_back(*j);
+ Values.push_back(MV);
+ for (++i; i != e; ++i)
+ Values.push_back(MapValue(*i, VM));
+ return VM[V] = MDNode::get(Values.data(), Values.size());
+ }
+ }
+ return VM[V] = C;
+
+ } else {
+ assert(0 && "Unknown type of constant!");
+ }
+ }
+
+ return 0;
+}
+
+/// RemapInstruction - Convert the instruction operands from referencing the
+/// current values into those specified by ValueMap.
+///
+void llvm::RemapInstruction(Instruction *I, ValueMapTy &ValueMap) {
+ for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
+ Value *V = MapValue(*op, ValueMap);
+ assert(V && "Referenced value not in value map!");
+ *op = V;
+ }
+}
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
new file mode 100644
index 0000000..6b369b6
--- /dev/null
+++ b/lib/VMCore/AsmWriter.cpp
@@ -0,0 +1,1880 @@
+//===-- AsmWriter.cpp - Printing LLVM as an assembly file -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library implements the functionality defined in llvm/Assembly/Writer.h
+//
+// Note that these routines must be extremely tolerant of various errors in the
+// LLVM code, because it can be used for debugging transformations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Assembly/AsmAnnotationWriter.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
+#include "llvm/MDNode.h"
+#include "llvm/Module.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cctype>
+using namespace llvm;
+
+// Make virtual table appear in this compilation unit.
+AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {}
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+static const Module *getModuleFromVal(const Value *V) {
+ if (const Argument *MA = dyn_cast<Argument>(V))
+ return MA->getParent() ? MA->getParent()->getParent() : 0;
+
+ if (const BasicBlock *BB = dyn_cast<BasicBlock>(V))
+ return BB->getParent() ? BB->getParent()->getParent() : 0;
+
+ if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ const Function *M = I->getParent() ? I->getParent()->getParent() : 0;
+ return M ? M->getParent() : 0;
+ }
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+ return GV->getParent();
+ return 0;
+}
+
+// PrintEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+static void PrintEscapedString(const char *Str, unsigned Length,
+ raw_ostream &Out) {
+ for (unsigned i = 0; i != Length; ++i) {
+ unsigned char C = Str[i];
+ if (isprint(C) && C != '\\' && C != '"')
+ Out << C;
+ else
+ Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F);
+ }
+}
+
+// PrintEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+static void PrintEscapedString(const std::string &Str, raw_ostream &Out) {
+ PrintEscapedString(Str.c_str(), Str.size(), Out);
+}
+
+enum PrefixType {
+ GlobalPrefix,
+ LabelPrefix,
+ LocalPrefix,
+ NoPrefix
+};
+
+/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either
+/// prefixed with % (if the string only contains simple characters) or is
+/// surrounded with ""'s (if it has special chars in it). Print it out.
+static void PrintLLVMName(raw_ostream &OS, const char *NameStr,
+ unsigned NameLen, PrefixType Prefix) {
+ assert(NameStr && "Cannot get empty name!");
+ switch (Prefix) {
+ default: assert(0 && "Bad prefix!");
+ case NoPrefix: break;
+ case GlobalPrefix: OS << '@'; break;
+ case LabelPrefix: break;
+ case LocalPrefix: OS << '%'; break;
+ }
+
+ // Scan the name to see if it needs quotes first.
+ bool NeedsQuotes = isdigit(NameStr[0]);
+ if (!NeedsQuotes) {
+ for (unsigned i = 0; i != NameLen; ++i) {
+ char C = NameStr[i];
+ if (!isalnum(C) && C != '-' && C != '.' && C != '_') {
+ NeedsQuotes = true;
+ break;
+ }
+ }
+ }
+
+ // If we didn't need any quotes, just write out the name in one blast.
+ if (!NeedsQuotes) {
+ OS.write(NameStr, NameLen);
+ return;
+ }
+
+ // Okay, we need quotes. Output the quotes and escape any scary characters as
+ // needed.
+ OS << '"';
+ PrintEscapedString(NameStr, NameLen, OS);
+ OS << '"';
+}
+
+/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either
+/// prefixed with % (if the string only contains simple characters) or is
+/// surrounded with ""'s (if it has special chars in it). Print it out.
+static void PrintLLVMName(raw_ostream &OS, const Value *V) {
+ PrintLLVMName(OS, V->getNameStart(), V->getNameLen(),
+ isa<GlobalValue>(V) ? GlobalPrefix : LocalPrefix);
+}
+
+//===----------------------------------------------------------------------===//
+// TypePrinting Class: Type printing machinery
+//===----------------------------------------------------------------------===//
+
+static DenseMap<const Type *, std::string> &getTypeNamesMap(void *M) {
+ return *static_cast<DenseMap<const Type *, std::string>*>(M);
+}
+
+void TypePrinting::clear() {
+ getTypeNamesMap(TypeNames).clear();
+}
+
+bool TypePrinting::hasTypeName(const Type *Ty) const {
+ return getTypeNamesMap(TypeNames).count(Ty);
+}
+
+void TypePrinting::addTypeName(const Type *Ty, const std::string &N) {
+ getTypeNamesMap(TypeNames).insert(std::make_pair(Ty, N));
+}
+
+
+TypePrinting::TypePrinting() {
+ TypeNames = new DenseMap<const Type *, std::string>();
+}
+
+TypePrinting::~TypePrinting() {
+ delete &getTypeNamesMap(TypeNames);
+}
+
+/// CalcTypeName - Write the specified type to the specified raw_ostream, making
+/// use of type names or up references to shorten the type name where possible.
+void TypePrinting::CalcTypeName(const Type *Ty,
+ SmallVectorImpl<const Type *> &TypeStack,
+ raw_ostream &OS, bool IgnoreTopLevelName) {
+ // Check to see if the type is named.
+ if (!IgnoreTopLevelName) {
+ DenseMap<const Type *, std::string> &TM = getTypeNamesMap(TypeNames);
+ DenseMap<const Type *, std::string>::iterator I = TM.find(Ty);
+ if (I != TM.end()) {
+ OS << I->second;
+ return;
+ }
+ }
+
+ // Check to see if the Type is already on the stack...
+ unsigned Slot = 0, CurSize = TypeStack.size();
+ while (Slot < CurSize && TypeStack[Slot] != Ty) ++Slot; // Scan for type
+
+ // This is another base case for the recursion. In this case, we know
+ // that we have looped back to a type that we have previously visited.
+ // Generate the appropriate upreference to handle this.
+ if (Slot < CurSize) {
+ OS << '\\' << unsigned(CurSize-Slot); // Here's the upreference
+ return;
+ }
+
+ TypeStack.push_back(Ty); // Recursive case: Add us to the stack..
+
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: OS << "void"; break;
+ case Type::FloatTyID: OS << "float"; break;
+ case Type::DoubleTyID: OS << "double"; break;
+ case Type::X86_FP80TyID: OS << "x86_fp80"; break;
+ case Type::FP128TyID: OS << "fp128"; break;
+ case Type::PPC_FP128TyID: OS << "ppc_fp128"; break;
+ case Type::LabelTyID: OS << "label"; break;
+ case Type::MetadataTyID: OS << "metadata"; break;
+ case Type::IntegerTyID:
+ OS << 'i' << cast<IntegerType>(Ty)->getBitWidth();
+ break;
+
+ case Type::FunctionTyID: {
+ const FunctionType *FTy = cast<FunctionType>(Ty);
+ CalcTypeName(FTy->getReturnType(), TypeStack, OS);
+ OS << " (";
+ for (FunctionType::param_iterator I = FTy->param_begin(),
+ E = FTy->param_end(); I != E; ++I) {
+ if (I != FTy->param_begin())
+ OS << ", ";
+ CalcTypeName(*I, TypeStack, OS);
+ }
+ if (FTy->isVarArg()) {
+ if (FTy->getNumParams()) OS << ", ";
+ OS << "...";
+ }
+ OS << ')';
+ break;
+ }
+ case Type::StructTyID: {
+ const StructType *STy = cast<StructType>(Ty);
+ if (STy->isPacked())
+ OS << '<';
+ OS << "{ ";
+ for (StructType::element_iterator I = STy->element_begin(),
+ E = STy->element_end(); I != E; ++I) {
+ CalcTypeName(*I, TypeStack, OS);
+ if (next(I) != STy->element_end())
+ OS << ',';
+ OS << ' ';
+ }
+ OS << '}';
+ if (STy->isPacked())
+ OS << '>';
+ break;
+ }
+ case Type::PointerTyID: {
+ const PointerType *PTy = cast<PointerType>(Ty);
+ CalcTypeName(PTy->getElementType(), TypeStack, OS);
+ if (unsigned AddressSpace = PTy->getAddressSpace())
+ OS << " addrspace(" << AddressSpace << ')';
+ OS << '*';
+ break;
+ }
+ case Type::ArrayTyID: {
+ const ArrayType *ATy = cast<ArrayType>(Ty);
+ OS << '[' << ATy->getNumElements() << " x ";
+ CalcTypeName(ATy->getElementType(), TypeStack, OS);
+ OS << ']';
+ break;
+ }
+ case Type::VectorTyID: {
+ const VectorType *PTy = cast<VectorType>(Ty);
+ OS << "<" << PTy->getNumElements() << " x ";
+ CalcTypeName(PTy->getElementType(), TypeStack, OS);
+ OS << '>';
+ break;
+ }
+ case Type::OpaqueTyID:
+ OS << "opaque";
+ break;
+ default:
+ OS << "<unrecognized-type>";
+ break;
+ }
+
+ TypeStack.pop_back(); // Remove self from stack.
+}
+
+/// printTypeInt - The internal guts of printing out a type that has a
+/// potentially named portion.
+///
+void TypePrinting::print(const Type *Ty, raw_ostream &OS,
+ bool IgnoreTopLevelName) {
+ // Check to see if the type is named.
+ DenseMap<const Type*, std::string> &TM = getTypeNamesMap(TypeNames);
+ if (!IgnoreTopLevelName) {
+ DenseMap<const Type*, std::string>::iterator I = TM.find(Ty);
+ if (I != TM.end()) {
+ OS << I->second;
+ return;
+ }
+ }
+
+ // Otherwise we have a type that has not been named but is a derived type.
+ // Carefully recurse the type hierarchy to print out any contained symbolic
+ // names.
+ SmallVector<const Type *, 16> TypeStack;
+ std::string TypeName;
+
+ raw_string_ostream TypeOS(TypeName);
+ CalcTypeName(Ty, TypeStack, TypeOS, IgnoreTopLevelName);
+ OS << TypeOS.str();
+
+ // Cache type name for later use.
+ if (!IgnoreTopLevelName)
+ TM.insert(std::make_pair(Ty, TypeOS.str()));
+}
+
+namespace {
+ class TypeFinder {
+ // To avoid walking constant expressions multiple times and other IR
+ // objects, we keep several helper maps.
+ DenseSet<const Value*> VisitedConstants;
+ DenseSet<const Type*> VisitedTypes;
+
+ TypePrinting &TP;
+ std::vector<const Type*> &NumberedTypes;
+ public:
+ TypeFinder(TypePrinting &tp, std::vector<const Type*> &numberedTypes)
+ : TP(tp), NumberedTypes(numberedTypes) {}
+
+ void Run(const Module &M) {
+ // Get types from the type symbol table. This gets opaque types referened
+ // only through derived named types.
+ const TypeSymbolTable &ST = M.getTypeSymbolTable();
+ for (TypeSymbolTable::const_iterator TI = ST.begin(), E = ST.end();
+ TI != E; ++TI)
+ IncorporateType(TI->second);
+
+ // Get types from global variables.
+ for (Module::const_global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I) {
+ IncorporateType(I->getType());
+ if (I->hasInitializer())
+ IncorporateValue(I->getInitializer());
+ }
+
+ // Get types from aliases.
+ for (Module::const_alias_iterator I = M.alias_begin(),
+ E = M.alias_end(); I != E; ++I) {
+ IncorporateType(I->getType());
+ IncorporateValue(I->getAliasee());
+ }
+
+ // Get types from functions.
+ for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) {
+ IncorporateType(FI->getType());
+
+ for (Function::const_iterator BB = FI->begin(), E = FI->end();
+ BB != E;++BB)
+ for (BasicBlock::const_iterator II = BB->begin(),
+ E = BB->end(); II != E; ++II) {
+ const Instruction &I = *II;
+ // Incorporate the type of the instruction and all its operands.
+ IncorporateType(I.getType());
+ for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
+ OI != OE; ++OI)
+ IncorporateValue(*OI);
+ }
+ }
+ }
+
+ private:
+ void IncorporateType(const Type *Ty) {
+ // Check to see if we're already visited this type.
+ if (!VisitedTypes.insert(Ty).second)
+ return;
+
+ // If this is a structure or opaque type, add a name for the type.
+ if (((isa<StructType>(Ty) && cast<StructType>(Ty)->getNumElements())
+ || isa<OpaqueType>(Ty)) && !TP.hasTypeName(Ty)) {
+ TP.addTypeName(Ty, "%"+utostr(unsigned(NumberedTypes.size())));
+ NumberedTypes.push_back(Ty);
+ }
+
+ // Recursively walk all contained types.
+ for (Type::subtype_iterator I = Ty->subtype_begin(),
+ E = Ty->subtype_end(); I != E; ++I)
+ IncorporateType(*I);
+ }
+
+ /// IncorporateValue - This method is used to walk operand lists finding
+ /// types hiding in constant expressions and other operands that won't be
+ /// walked in other ways. GlobalValues, basic blocks, instructions, and
+ /// inst operands are all explicitly enumerated.
+ void IncorporateValue(const Value *V) {
+ if (V == 0 || !isa<Constant>(V) || isa<GlobalValue>(V)) return;
+
+ // Already visited?
+ if (!VisitedConstants.insert(V).second)
+ return;
+
+ // Check this type.
+ IncorporateType(V->getType());
+
+ // Look in operands for types.
+ const Constant *C = cast<Constant>(V);
+ for (Constant::const_op_iterator I = C->op_begin(),
+ E = C->op_end(); I != E;++I)
+ IncorporateValue(*I);
+ }
+ };
+} // end anonymous namespace
+
+
+/// AddModuleTypesToPrinter - Add all of the symbolic type names for types in
+/// the specified module to the TypePrinter and all numbered types to it and the
+/// NumberedTypes table.
+static void AddModuleTypesToPrinter(TypePrinting &TP,
+ std::vector<const Type*> &NumberedTypes,
+ const Module *M) {
+ if (M == 0) return;
+
+ // If the module has a symbol table, take all global types and stuff their
+ // names into the TypeNames map.
+ const TypeSymbolTable &ST = M->getTypeSymbolTable();
+ for (TypeSymbolTable::const_iterator TI = ST.begin(), E = ST.end();
+ TI != E; ++TI) {
+ const Type *Ty = cast<Type>(TI->second);
+
+ // As a heuristic, don't insert pointer to primitive types, because
+ // they are used too often to have a single useful name.
+ if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) {
+ const Type *PETy = PTy->getElementType();
+ if ((PETy->isPrimitiveType() || PETy->isInteger()) &&
+ !isa<OpaqueType>(PETy))
+ continue;
+ }
+
+ // Likewise don't insert primitives either.
+ if (Ty->isInteger() || Ty->isPrimitiveType())
+ continue;
+
+ // Get the name as a string and insert it into TypeNames.
+ std::string NameStr;
+ raw_string_ostream NameOS(NameStr);
+ PrintLLVMName(NameOS, TI->first.c_str(), TI->first.length(), LocalPrefix);
+ TP.addTypeName(Ty, NameOS.str());
+ }
+
+ // Walk the entire module to find references to unnamed structure and opaque
+ // types. This is required for correctness by opaque types (because multiple
+ // uses of an unnamed opaque type needs to be referred to by the same ID) and
+ // it shrinks complex recursive structure types substantially in some cases.
+ TypeFinder(TP, NumberedTypes).Run(*M);
+}
+
+
+/// WriteTypeSymbolic - This attempts to write the specified type as a symbolic
+/// type, iff there is an entry in the modules symbol table for the specified
+/// type or one of it's component types.
+///
+void llvm::WriteTypeSymbolic(raw_ostream &OS, const Type *Ty, const Module *M) {
+ TypePrinting Printer;
+ std::vector<const Type*> NumberedTypes;
+ AddModuleTypesToPrinter(Printer, NumberedTypes, M);
+ Printer.print(Ty, OS);
+}
+
+//===----------------------------------------------------------------------===//
+// SlotTracker Class: Enumerate slot numbers for unnamed values
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// This class provides computation of slot numbers for LLVM Assembly writing.
+///
+class SlotTracker {
+public:
+ /// ValueMap - A mapping of Values to slot numbers
+ typedef DenseMap<const Value*, unsigned> ValueMap;
+
+private:
+ /// TheModule - The module for which we are holding slot numbers
+ const Module* TheModule;
+
+ /// TheFunction - The function for which we are holding slot numbers
+ const Function* TheFunction;
+ bool FunctionProcessed;
+
+ /// mMap - The TypePlanes map for the module level data
+ ValueMap mMap;
+ unsigned mNext;
+
+ /// fMap - The TypePlanes map for the function level data
+ ValueMap fMap;
+ unsigned fNext;
+
+public:
+ /// Construct from a module
+ explicit SlotTracker(const Module *M);
+ /// Construct from a function, starting out in incorp state.
+ explicit SlotTracker(const Function *F);
+
+ /// Return the slot number of the specified value in it's type
+ /// plane. If something is not in the SlotTracker, return -1.
+ int getLocalSlot(const Value *V);
+ int getGlobalSlot(const GlobalValue *V);
+
+ /// If you'd like to deal with a function instead of just a module, use
+ /// this method to get its data into the SlotTracker.
+ void incorporateFunction(const Function *F) {
+ TheFunction = F;
+ FunctionProcessed = false;
+ }
+
+ /// After calling incorporateFunction, use this method to remove the
+ /// most recently incorporated function from the SlotTracker. This
+ /// will reset the state of the machine back to just the module contents.
+ void purgeFunction();
+
+ // Implementation Details
+private:
+ /// This function does the actual initialization.
+ inline void initialize();
+
+ /// CreateModuleSlot - Insert the specified GlobalValue* into the slot table.
+ void CreateModuleSlot(const GlobalValue *V);
+
+ /// CreateFunctionSlot - Insert the specified Value* into the slot table.
+ void CreateFunctionSlot(const Value *V);
+
+ /// Add all of the module level global variables (and their initializers)
+ /// and function declarations, but not the contents of those functions.
+ void processModule();
+
+ /// Add all of the functions arguments, basic blocks, and instructions
+ void processFunction();
+
+ SlotTracker(const SlotTracker &); // DO NOT IMPLEMENT
+ void operator=(const SlotTracker &); // DO NOT IMPLEMENT
+};
+
+} // end anonymous namespace
+
+
+static SlotTracker *createSlotTracker(const Value *V) {
+ if (const Argument *FA = dyn_cast<Argument>(V))
+ return new SlotTracker(FA->getParent());
+
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ return new SlotTracker(I->getParent()->getParent());
+
+ if (const BasicBlock *BB = dyn_cast<BasicBlock>(V))
+ return new SlotTracker(BB->getParent());
+
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ return new SlotTracker(GV->getParent());
+
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
+ return new SlotTracker(GA->getParent());
+
+ if (const Function *Func = dyn_cast<Function>(V))
+ return new SlotTracker(Func);
+
+ return 0;
+}
+
+#if 0
+#define ST_DEBUG(X) cerr << X
+#else
+#define ST_DEBUG(X)
+#endif
+
+// Module level constructor. Causes the contents of the Module (sans functions)
+// to be added to the slot table.
+SlotTracker::SlotTracker(const Module *M)
+ : TheModule(M), TheFunction(0), FunctionProcessed(false), mNext(0), fNext(0) {
+}
+
+// Function level constructor. Causes the contents of the Module and the one
+// function provided to be added to the slot table.
+SlotTracker::SlotTracker(const Function *F)
+ : TheModule(F ? F->getParent() : 0), TheFunction(F), FunctionProcessed(false),
+ mNext(0), fNext(0) {
+}
+
+inline void SlotTracker::initialize() {
+ if (TheModule) {
+ processModule();
+ TheModule = 0; ///< Prevent re-processing next time we're called.
+ }
+
+ if (TheFunction && !FunctionProcessed)
+ processFunction();
+}
+
+// Iterate through all the global variables, functions, and global
+// variable initializers and create slots for them.
+void SlotTracker::processModule() {
+ ST_DEBUG("begin processModule!\n");
+
+ // Add all of the unnamed global variables to the value table.
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I)
+ if (!I->hasName())
+ CreateModuleSlot(I);
+
+ // Add all the unnamed functions to the table.
+ for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+ I != E; ++I)
+ if (!I->hasName())
+ CreateModuleSlot(I);
+
+ ST_DEBUG("end processModule!\n");
+}
+
+
+// Process the arguments, basic blocks, and instructions of a function.
+void SlotTracker::processFunction() {
+ ST_DEBUG("begin processFunction!\n");
+ fNext = 0;
+
+ // Add all the function arguments with no names.
+ for(Function::const_arg_iterator AI = TheFunction->arg_begin(),
+ AE = TheFunction->arg_end(); AI != AE; ++AI)
+ if (!AI->hasName())
+ CreateFunctionSlot(AI);
+
+ ST_DEBUG("Inserting Instructions:\n");
+
+ // Add all of the basic blocks and instructions with no names.
+ for (Function::const_iterator BB = TheFunction->begin(),
+ E = TheFunction->end(); BB != E; ++BB) {
+ if (!BB->hasName())
+ CreateFunctionSlot(BB);
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (I->getType() != Type::VoidTy && !I->hasName())
+ CreateFunctionSlot(I);
+ }
+
+ FunctionProcessed = true;
+
+ ST_DEBUG("end processFunction!\n");
+}
+
+/// Clean up after incorporating a function. This is the only way to get out of
+/// the function incorporation state that affects get*Slot/Create*Slot. Function
+/// incorporation state is indicated by TheFunction != 0.
+void SlotTracker::purgeFunction() {
+ ST_DEBUG("begin purgeFunction!\n");
+ fMap.clear(); // Simply discard the function level map
+ TheFunction = 0;
+ FunctionProcessed = false;
+ ST_DEBUG("end purgeFunction!\n");
+}
+
+/// getGlobalSlot - Get the slot number of a global value.
+int SlotTracker::getGlobalSlot(const GlobalValue *V) {
+ // Check for uninitialized state and do lazy initialization.
+ initialize();
+
+ // Find the type plane in the module map
+ ValueMap::iterator MI = mMap.find(V);
+ return MI == mMap.end() ? -1 : (int)MI->second;
+}
+
+
+/// getLocalSlot - Get the slot number for a value that is local to a function.
+int SlotTracker::getLocalSlot(const Value *V) {
+ assert(!isa<Constant>(V) && "Can't get a constant or global slot with this!");
+
+ // Check for uninitialized state and do lazy initialization.
+ initialize();
+
+ ValueMap::iterator FI = fMap.find(V);
+ return FI == fMap.end() ? -1 : (int)FI->second;
+}
+
+
+/// CreateModuleSlot - Insert the specified GlobalValue* into the slot table.
+void SlotTracker::CreateModuleSlot(const GlobalValue *V) {
+ assert(V && "Can't insert a null Value into SlotTracker!");
+ assert(V->getType() != Type::VoidTy && "Doesn't need a slot!");
+ assert(!V->hasName() && "Doesn't need a slot!");
+
+ unsigned DestSlot = mNext++;
+ mMap[V] = DestSlot;
+
+ ST_DEBUG(" Inserting value [" << V->getType() << "] = " << V << " slot=" <<
+ DestSlot << " [");
+ // G = Global, F = Function, A = Alias, o = other
+ ST_DEBUG((isa<GlobalVariable>(V) ? 'G' :
+ (isa<Function>(V) ? 'F' :
+ (isa<GlobalAlias>(V) ? 'A' : 'o'))) << "]\n");
+}
+
+
+/// CreateSlot - Create a new slot for the specified value if it has no name.
+void SlotTracker::CreateFunctionSlot(const Value *V) {
+ assert(V->getType() != Type::VoidTy && !V->hasName() &&
+ "Doesn't need a slot!");
+
+ unsigned DestSlot = fNext++;
+ fMap[V] = DestSlot;
+
+ // G = Global, F = Function, o = other
+ ST_DEBUG(" Inserting value [" << V->getType() << "] = " << V << " slot=" <<
+ DestSlot << " [o]\n");
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// AsmWriter Implementation
+//===----------------------------------------------------------------------===//
+
+static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
+ TypePrinting &TypePrinter,
+ SlotTracker *Machine);
+
+
+
+static const char *getPredicateText(unsigned predicate) {
+ const char * pred = "unknown";
+ switch (predicate) {
+ case FCmpInst::FCMP_FALSE: pred = "false"; break;
+ case FCmpInst::FCMP_OEQ: pred = "oeq"; break;
+ case FCmpInst::FCMP_OGT: pred = "ogt"; break;
+ case FCmpInst::FCMP_OGE: pred = "oge"; break;
+ case FCmpInst::FCMP_OLT: pred = "olt"; break;
+ case FCmpInst::FCMP_OLE: pred = "ole"; break;
+ case FCmpInst::FCMP_ONE: pred = "one"; break;
+ case FCmpInst::FCMP_ORD: pred = "ord"; break;
+ case FCmpInst::FCMP_UNO: pred = "uno"; break;
+ case FCmpInst::FCMP_UEQ: pred = "ueq"; break;
+ case FCmpInst::FCMP_UGT: pred = "ugt"; break;
+ case FCmpInst::FCMP_UGE: pred = "uge"; break;
+ case FCmpInst::FCMP_ULT: pred = "ult"; break;
+ case FCmpInst::FCMP_ULE: pred = "ule"; break;
+ case FCmpInst::FCMP_UNE: pred = "une"; break;
+ case FCmpInst::FCMP_TRUE: pred = "true"; break;
+ case ICmpInst::ICMP_EQ: pred = "eq"; break;
+ case ICmpInst::ICMP_NE: pred = "ne"; break;
+ case ICmpInst::ICMP_SGT: pred = "sgt"; break;
+ case ICmpInst::ICMP_SGE: pred = "sge"; break;
+ case ICmpInst::ICMP_SLT: pred = "slt"; break;
+ case ICmpInst::ICMP_SLE: pred = "sle"; break;
+ case ICmpInst::ICMP_UGT: pred = "ugt"; break;
+ case ICmpInst::ICMP_UGE: pred = "uge"; break;
+ case ICmpInst::ICMP_ULT: pred = "ult"; break;
+ case ICmpInst::ICMP_ULE: pred = "ule"; break;
+ }
+ return pred;
+}
+
+static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
+ TypePrinting &TypePrinter, SlotTracker *Machine) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ if (CI->getType() == Type::Int1Ty) {
+ Out << (CI->getZExtValue() ? "true" : "false");
+ return;
+ }
+ Out << CI->getValue();
+ return;
+ }
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+ if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble ||
+ &CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle) {
+ // We would like to output the FP constant value in exponential notation,
+ // but we cannot do this if doing so will lose precision. Check here to
+ // make sure that we only output it in exponential format if we can parse
+ // the value back and get the same value.
+ //
+ bool ignored;
+ bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble;
+ double Val = isDouble ? CFP->getValueAPF().convertToDouble() :
+ CFP->getValueAPF().convertToFloat();
+ std::string StrVal = ftostr(CFP->getValueAPF());
+
+ // Check to make sure that the stringized number is not some string like
+ // "Inf" or NaN, that atof will accept, but the lexer will not. Check
+ // that the string matches the "[-+]?[0-9]" regex.
+ //
+ if ((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+ ((StrVal[0] == '-' || StrVal[0] == '+') &&
+ (StrVal[1] >= '0' && StrVal[1] <= '9'))) {
+ // Reparse stringized version!
+ if (atof(StrVal.c_str()) == Val) {
+ Out << StrVal;
+ return;
+ }
+ }
+ // Otherwise we could not reparse it to exactly the same value, so we must
+ // output the string in hexadecimal format! Note that loading and storing
+ // floating point types changes the bits of NaNs on some hosts, notably
+ // x86, so we must not use these types.
+ assert(sizeof(double) == sizeof(uint64_t) &&
+ "assuming that double is 64 bits!");
+ char Buffer[40];
+ APFloat apf = CFP->getValueAPF();
+ // Floats are represented in ASCII IR as double, convert.
+ if (!isDouble)
+ apf.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+ &ignored);
+ Out << "0x" <<
+ utohex_buffer(uint64_t(apf.bitcastToAPInt().getZExtValue()),
+ Buffer+40);
+ return;
+ }
+
+ // Some form of long double. These appear as a magic letter identifying
+ // the type, then a fixed number of hex digits.
+ Out << "0x";
+ if (&CFP->getValueAPF().getSemantics() == &APFloat::x87DoubleExtended) {
+ Out << 'K';
+ // api needed to prevent premature destruction
+ APInt api = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t* p = api.getRawData();
+ uint64_t word = p[1];
+ int shiftcount=12;
+ int width = api.getBitWidth();
+ for (int j=0; j<width; j+=4, shiftcount-=4) {
+ unsigned int nibble = (word>>shiftcount) & 15;
+ if (nibble < 10)
+ Out << (unsigned char)(nibble + '0');
+ else
+ Out << (unsigned char)(nibble - 10 + 'A');
+ if (shiftcount == 0 && j+4 < width) {
+ word = *p;
+ shiftcount = 64;
+ if (width-j-4 < 64)
+ shiftcount = width-j-4;
+ }
+ }
+ return;
+ } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad)
+ Out << 'L';
+ else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble)
+ Out << 'M';
+ else
+ assert(0 && "Unsupported floating point type");
+ // api needed to prevent premature destruction
+ APInt api = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t* p = api.getRawData();
+ uint64_t word = *p;
+ int shiftcount=60;
+ int width = api.getBitWidth();
+ for (int j=0; j<width; j+=4, shiftcount-=4) {
+ unsigned int nibble = (word>>shiftcount) & 15;
+ if (nibble < 10)
+ Out << (unsigned char)(nibble + '0');
+ else
+ Out << (unsigned char)(nibble - 10 + 'A');
+ if (shiftcount == 0 && j+4 < width) {
+ word = *(++p);
+ shiftcount = 64;
+ if (width-j-4 < 64)
+ shiftcount = width-j-4;
+ }
+ }
+ return;
+ }
+
+ if (isa<ConstantAggregateZero>(CV)) {
+ Out << "zeroinitializer";
+ return;
+ }
+
+ if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
+ // As a special case, print the array as a string if it is an array of
+ // i8 with ConstantInt values.
+ //
+ const Type *ETy = CA->getType()->getElementType();
+ if (CA->isString()) {
+ Out << "c\"";
+ PrintEscapedString(CA->getAsString(), Out);
+ Out << '"';
+ } else { // Cannot output in string format...
+ Out << '[';
+ if (CA->getNumOperands()) {
+ TypePrinter.print(ETy, Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, CA->getOperand(0),
+ TypePrinter, Machine);
+ for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) {
+ Out << ", ";
+ TypePrinter.print(ETy, Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, CA->getOperand(i), TypePrinter, Machine);
+ }
+ }
+ Out << ']';
+ }
+ return;
+ }
+
+ if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
+ if (CS->getType()->isPacked())
+ Out << '<';
+ Out << '{';
+ unsigned N = CS->getNumOperands();
+ if (N) {
+ Out << ' ';
+ TypePrinter.print(CS->getOperand(0)->getType(), Out);
+ Out << ' ';
+
+ WriteAsOperandInternal(Out, CS->getOperand(0), TypePrinter, Machine);
+
+ for (unsigned i = 1; i < N; i++) {
+ Out << ", ";
+ TypePrinter.print(CS->getOperand(i)->getType(), Out);
+ Out << ' ';
+
+ WriteAsOperandInternal(Out, CS->getOperand(i), TypePrinter, Machine);
+ }
+ Out << ' ';
+ }
+
+ Out << '}';
+ if (CS->getType()->isPacked())
+ Out << '>';
+ return;
+ }
+
+ if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+ const Type *ETy = CP->getType()->getElementType();
+ assert(CP->getNumOperands() > 0 &&
+ "Number of operands for a PackedConst must be > 0");
+ Out << '<';
+ TypePrinter.print(ETy, Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, CP->getOperand(0), TypePrinter, Machine);
+ for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) {
+ Out << ", ";
+ TypePrinter.print(ETy, Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, CP->getOperand(i), TypePrinter, Machine);
+ }
+ Out << '>';
+ return;
+ }
+
+ if (isa<ConstantPointerNull>(CV)) {
+ Out << "null";
+ return;
+ }
+
+ if (isa<UndefValue>(CV)) {
+ Out << "undef";
+ return;
+ }
+
+ if (const MDString *S = dyn_cast<MDString>(CV)) {
+ Out << "!\"";
+ PrintEscapedString(S->begin(), S->size(), Out);
+ Out << '"';
+ return;
+ }
+
+ if (const MDNode *N = dyn_cast<MDNode>(CV)) {
+ Out << "!{";
+ for (MDNode::const_elem_iterator I = N->elem_begin(), E = N->elem_end();
+ I != E;) {
+ if (!*I) {
+ Out << "null";
+ } else {
+ TypePrinter.print((*I)->getType(), Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, *I, TypePrinter, Machine);
+ }
+
+ if (++I != E)
+ Out << ", ";
+ }
+ Out << "}";
+ return;
+ }
+
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ Out << CE->getOpcodeName();
+ if (CE->isCompare())
+ Out << ' ' << getPredicateText(CE->getPredicate());
+ Out << " (";
+
+ for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) {
+ TypePrinter.print((*OI)->getType(), Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, *OI, TypePrinter, Machine);
+ if (OI+1 != CE->op_end())
+ Out << ", ";
+ }
+
+ if (CE->hasIndices()) {
+ const SmallVector<unsigned, 4> &Indices = CE->getIndices();
+ for (unsigned i = 0, e = Indices.size(); i != e; ++i)
+ Out << ", " << Indices[i];
+ }
+
+ if (CE->isCast()) {
+ Out << " to ";
+ TypePrinter.print(CE->getType(), Out);
+ }
+
+ Out << ')';
+ return;
+ }
+
+ Out << "<placeholder or erroneous Constant>";
+}
+
+
+/// WriteAsOperand - Write the name of the specified value out to the specified
+/// ostream. This can be useful when you just want to print int %reg126, not
+/// the whole instruction that generated it.
+///
+static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
+ TypePrinting &TypePrinter,
+ SlotTracker *Machine) {
+ if (V->hasName()) {
+ PrintLLVMName(Out, V);
+ return;
+ }
+
+ const Constant *CV = dyn_cast<Constant>(V);
+ if (CV && !isa<GlobalValue>(CV)) {
+ WriteConstantInt(Out, CV, TypePrinter, Machine);
+ return;
+ }
+
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
+ Out << "asm ";
+ if (IA->hasSideEffects())
+ Out << "sideeffect ";
+ Out << '"';
+ PrintEscapedString(IA->getAsmString(), Out);
+ Out << "\", \"";
+ PrintEscapedString(IA->getConstraintString(), Out);
+ Out << '"';
+ return;
+ }
+
+ char Prefix = '%';
+ int Slot;
+ if (Machine) {
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ Slot = Machine->getGlobalSlot(GV);
+ Prefix = '@';
+ } else {
+ Slot = Machine->getLocalSlot(V);
+ }
+ } else {
+ Machine = createSlotTracker(V);
+ if (Machine) {
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ Slot = Machine->getGlobalSlot(GV);
+ Prefix = '@';
+ } else {
+ Slot = Machine->getLocalSlot(V);
+ }
+ } else {
+ Slot = -1;
+ }
+ delete Machine;
+ }
+
+ if (Slot != -1)
+ Out << Prefix << Slot;
+ else
+ Out << "<badref>";
+}
+
+/// WriteAsOperand - Write the name of the specified value out to the specified
+/// ostream. This can be useful when you just want to print int %reg126, not
+/// the whole instruction that generated it.
+///
+void llvm::WriteAsOperand(std::ostream &Out, const Value *V, bool PrintType,
+ const Module *Context) {
+ raw_os_ostream OS(Out);
+ WriteAsOperand(OS, V, PrintType, Context);
+}
+
+void llvm::WriteAsOperand(raw_ostream &Out, const Value *V, bool PrintType,
+ const Module *Context) {
+ if (Context == 0) Context = getModuleFromVal(V);
+
+ TypePrinting TypePrinter;
+ std::vector<const Type*> NumberedTypes;
+ AddModuleTypesToPrinter(TypePrinter, NumberedTypes, Context);
+ if (PrintType) {
+ TypePrinter.print(V->getType(), Out);
+ Out << ' ';
+ }
+
+ WriteAsOperandInternal(Out, V, TypePrinter, 0);
+}
+
+
+namespace {
+
+class AssemblyWriter {
+ raw_ostream &Out;
+ SlotTracker &Machine;
+ const Module *TheModule;
+ TypePrinting TypePrinter;
+ AssemblyAnnotationWriter *AnnotationWriter;
+ std::vector<const Type*> NumberedTypes;
+public:
+ inline AssemblyWriter(raw_ostream &o, SlotTracker &Mac, const Module *M,
+ AssemblyAnnotationWriter *AAW)
+ : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW) {
+ AddModuleTypesToPrinter(TypePrinter, NumberedTypes, M);
+ }
+
+ void write(const Module *M) { printModule(M); }
+
+ void write(const GlobalValue *G) {
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(G))
+ printGlobal(GV);
+ else if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(G))
+ printAlias(GA);
+ else if (const Function *F = dyn_cast<Function>(G))
+ printFunction(F);
+ else
+ assert(0 && "Unknown global");
+ }
+
+ void write(const BasicBlock *BB) { printBasicBlock(BB); }
+ void write(const Instruction *I) { printInstruction(*I); }
+
+ void writeOperand(const Value *Op, bool PrintType);
+ void writeParamOperand(const Value *Operand, Attributes Attrs);
+
+ const Module* getModule() { return TheModule; }
+
+private:
+ void printModule(const Module *M);
+ void printTypeSymbolTable(const TypeSymbolTable &ST);
+ void printGlobal(const GlobalVariable *GV);
+ void printAlias(const GlobalAlias *GV);
+ void printFunction(const Function *F);
+ void printArgument(const Argument *FA, Attributes Attrs);
+ void printBasicBlock(const BasicBlock *BB);
+ void printInstruction(const Instruction &I);
+
+ // printInfoComment - Print a little comment after the instruction indicating
+ // which slot it occupies.
+ void printInfoComment(const Value &V);
+};
+} // end of anonymous namespace
+
+
+void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
+ if (Operand == 0) {
+ Out << "<null operand!>";
+ } else {
+ if (PrintType) {
+ TypePrinter.print(Operand->getType(), Out);
+ Out << ' ';
+ }
+ WriteAsOperandInternal(Out, Operand, TypePrinter, &Machine);
+ }
+}
+
+void AssemblyWriter::writeParamOperand(const Value *Operand,
+ Attributes Attrs) {
+ if (Operand == 0) {
+ Out << "<null operand!>";
+ } else {
+ // Print the type
+ TypePrinter.print(Operand->getType(), Out);
+ // Print parameter attributes list
+ if (Attrs != Attribute::None)
+ Out << ' ' << Attribute::getAsString(Attrs);
+ Out << ' ';
+ // Print the operand
+ WriteAsOperandInternal(Out, Operand, TypePrinter, &Machine);
+ }
+}
+
+void AssemblyWriter::printModule(const Module *M) {
+ if (!M->getModuleIdentifier().empty() &&
+ // Don't print the ID if it will start a new line (which would
+ // require a comment char before it).
+ M->getModuleIdentifier().find('\n') == std::string::npos)
+ Out << "; ModuleID = '" << M->getModuleIdentifier() << "'\n";
+
+ if (!M->getDataLayout().empty())
+ Out << "target datalayout = \"" << M->getDataLayout() << "\"\n";
+ if (!M->getTargetTriple().empty())
+ Out << "target triple = \"" << M->getTargetTriple() << "\"\n";
+
+ if (!M->getModuleInlineAsm().empty()) {
+ // Split the string into lines, to make it easier to read the .ll file.
+ std::string Asm = M->getModuleInlineAsm();
+ size_t CurPos = 0;
+ size_t NewLine = Asm.find_first_of('\n', CurPos);
+ while (NewLine != std::string::npos) {
+ // We found a newline, print the portion of the asm string from the
+ // last newline up to this newline.
+ Out << "module asm \"";
+ PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.begin()+NewLine),
+ Out);
+ Out << "\"\n";
+ CurPos = NewLine+1;
+ NewLine = Asm.find_first_of('\n', CurPos);
+ }
+ Out << "module asm \"";
+ PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.end()), Out);
+ Out << "\"\n";
+ }
+
+ // Loop over the dependent libraries and emit them.
+ Module::lib_iterator LI = M->lib_begin();
+ Module::lib_iterator LE = M->lib_end();
+ if (LI != LE) {
+ Out << "deplibs = [ ";
+ while (LI != LE) {
+ Out << '"' << *LI << '"';
+ ++LI;
+ if (LI != LE)
+ Out << ", ";
+ }
+ Out << " ]\n";
+ }
+
+ // Loop over the symbol table, emitting all id'd types.
+ printTypeSymbolTable(M->getTypeSymbolTable());
+
+ for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+ I != E; ++I)
+ printGlobal(I);
+
+ // Output all aliases.
+ if (!M->alias_empty()) Out << "\n";
+ for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E; ++I)
+ printAlias(I);
+
+ // Output all of the functions.
+ for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I)
+ printFunction(I);
+}
+
+static void PrintLinkage(GlobalValue::LinkageTypes LT, raw_ostream &Out) {
+ switch (LT) {
+ case GlobalValue::PrivateLinkage: Out << "private "; break;
+ case GlobalValue::InternalLinkage: Out << "internal "; break;
+ case GlobalValue::AvailableExternallyLinkage:
+ Out << "available_externally ";
+ break;
+ case GlobalValue::LinkOnceAnyLinkage: Out << "linkonce "; break;
+ case GlobalValue::LinkOnceODRLinkage: Out << "linkonce_odr "; break;
+ case GlobalValue::WeakAnyLinkage: Out << "weak "; break;
+ case GlobalValue::WeakODRLinkage: Out << "weak_odr "; break;
+ case GlobalValue::CommonLinkage: Out << "common "; break;
+ case GlobalValue::AppendingLinkage: Out << "appending "; break;
+ case GlobalValue::DLLImportLinkage: Out << "dllimport "; break;
+ case GlobalValue::DLLExportLinkage: Out << "dllexport "; break;
+ case GlobalValue::ExternalWeakLinkage: Out << "extern_weak "; break;
+ case GlobalValue::ExternalLinkage: break;
+ case GlobalValue::GhostLinkage:
+ Out << "GhostLinkage not allowed in AsmWriter!\n";
+ abort();
+ }
+}
+
+
+static void PrintVisibility(GlobalValue::VisibilityTypes Vis,
+ raw_ostream &Out) {
+ switch (Vis) {
+ default: assert(0 && "Invalid visibility style!");
+ case GlobalValue::DefaultVisibility: break;
+ case GlobalValue::HiddenVisibility: Out << "hidden "; break;
+ case GlobalValue::ProtectedVisibility: Out << "protected "; break;
+ }
+}
+
+void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
+ if (GV->hasName()) {
+ PrintLLVMName(Out, GV);
+ Out << " = ";
+ }
+
+ if (!GV->hasInitializer() && GV->hasExternalLinkage())
+ Out << "external ";
+
+ PrintLinkage(GV->getLinkage(), Out);
+ PrintVisibility(GV->getVisibility(), Out);
+
+ if (GV->isThreadLocal()) Out << "thread_local ";
+ if (unsigned AddressSpace = GV->getType()->getAddressSpace())
+ Out << "addrspace(" << AddressSpace << ") ";
+ Out << (GV->isConstant() ? "constant " : "global ");
+ TypePrinter.print(GV->getType()->getElementType(), Out);
+
+ if (GV->hasInitializer()) {
+ Out << ' ';
+ writeOperand(GV->getInitializer(), false);
+ }
+
+ if (GV->hasSection())
+ Out << ", section \"" << GV->getSection() << '"';
+ if (GV->getAlignment())
+ Out << ", align " << GV->getAlignment();
+
+ printInfoComment(*GV);
+ Out << '\n';
+}
+
+void AssemblyWriter::printAlias(const GlobalAlias *GA) {
+ // Don't crash when dumping partially built GA
+ if (!GA->hasName())
+ Out << "<<nameless>> = ";
+ else {
+ PrintLLVMName(Out, GA);
+ Out << " = ";
+ }
+ PrintVisibility(GA->getVisibility(), Out);
+
+ Out << "alias ";
+
+ PrintLinkage(GA->getLinkage(), Out);
+
+ const Constant *Aliasee = GA->getAliasee();
+
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Aliasee)) {
+ TypePrinter.print(GV->getType(), Out);
+ Out << ' ';
+ PrintLLVMName(Out, GV);
+ } else if (const Function *F = dyn_cast<Function>(Aliasee)) {
+ TypePrinter.print(F->getFunctionType(), Out);
+ Out << "* ";
+
+ WriteAsOperandInternal(Out, F, TypePrinter, &Machine);
+ } else if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Aliasee)) {
+ TypePrinter.print(GA->getType(), Out);
+ Out << ' ';
+ PrintLLVMName(Out, GA);
+ } else {
+ const ConstantExpr *CE = cast<ConstantExpr>(Aliasee);
+ // The only valid GEP is an all zero GEP.
+ assert((CE->getOpcode() == Instruction::BitCast ||
+ CE->getOpcode() == Instruction::GetElementPtr) &&
+ "Unsupported aliasee");
+ writeOperand(CE, false);
+ }
+
+ printInfoComment(*GA);
+ Out << '\n';
+}
+
+void AssemblyWriter::printTypeSymbolTable(const TypeSymbolTable &ST) {
+ // Emit all numbered types.
+ for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i) {
+ Out << "\ttype ";
+
+ // Make sure we print out at least one level of the type structure, so
+ // that we do not get %2 = type %2
+ TypePrinter.printAtLeastOneLevel(NumberedTypes[i], Out);
+ Out << "\t\t; type %" << i << '\n';
+ }
+
+ // Print the named types.
+ for (TypeSymbolTable::const_iterator TI = ST.begin(), TE = ST.end();
+ TI != TE; ++TI) {
+ Out << '\t';
+ PrintLLVMName(Out, &TI->first[0], TI->first.size(), LocalPrefix);
+ Out << " = type ";
+
+ // Make sure we print out at least one level of the type structure, so
+ // that we do not get %FILE = type %FILE
+ TypePrinter.printAtLeastOneLevel(TI->second, Out);
+ Out << '\n';
+ }
+}
+
+/// printFunction - Print all aspects of a function.
+///
+void AssemblyWriter::printFunction(const Function *F) {
+ // Print out the return type and name.
+ Out << '\n';
+
+ if (AnnotationWriter) AnnotationWriter->emitFunctionAnnot(F, Out);
+
+ if (F->isDeclaration())
+ Out << "declare ";
+ else
+ Out << "define ";
+
+ PrintLinkage(F->getLinkage(), Out);
+ PrintVisibility(F->getVisibility(), Out);
+
+ // Print the calling convention.
+ switch (F->getCallingConv()) {
+ case CallingConv::C: break; // default
+ case CallingConv::Fast: Out << "fastcc "; break;
+ case CallingConv::Cold: Out << "coldcc "; break;
+ case CallingConv::X86_StdCall: Out << "x86_stdcallcc "; break;
+ case CallingConv::X86_FastCall: Out << "x86_fastcallcc "; break;
+ default: Out << "cc" << F->getCallingConv() << " "; break;
+ }
+
+ const FunctionType *FT = F->getFunctionType();
+ const AttrListPtr &Attrs = F->getAttributes();
+ Attributes RetAttrs = Attrs.getRetAttributes();
+ if (RetAttrs != Attribute::None)
+ Out << Attribute::getAsString(Attrs.getRetAttributes()) << ' ';
+ TypePrinter.print(F->getReturnType(), Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, F, TypePrinter, &Machine);
+ Out << '(';
+ Machine.incorporateFunction(F);
+
+ // Loop over the arguments, printing them...
+
+ unsigned Idx = 1;
+ if (!F->isDeclaration()) {
+ // If this isn't a declaration, print the argument names as well.
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I) {
+ // Insert commas as we go... the first arg doesn't get a comma
+ if (I != F->arg_begin()) Out << ", ";
+ printArgument(I, Attrs.getParamAttributes(Idx));
+ Idx++;
+ }
+ } else {
+ // Otherwise, print the types from the function type.
+ for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
+ // Insert commas as we go... the first arg doesn't get a comma
+ if (i) Out << ", ";
+
+ // Output type...
+ TypePrinter.print(FT->getParamType(i), Out);
+
+ Attributes ArgAttrs = Attrs.getParamAttributes(i+1);
+ if (ArgAttrs != Attribute::None)
+ Out << ' ' << Attribute::getAsString(ArgAttrs);
+ }
+ }
+
+ // Finish printing arguments...
+ if (FT->isVarArg()) {
+ if (FT->getNumParams()) Out << ", ";
+ Out << "..."; // Output varargs portion of signature!
+ }
+ Out << ')';
+ Attributes FnAttrs = Attrs.getFnAttributes();
+ if (FnAttrs != Attribute::None)
+ Out << ' ' << Attribute::getAsString(Attrs.getFnAttributes());
+ if (F->hasSection())
+ Out << " section \"" << F->getSection() << '"';
+ if (F->getAlignment())
+ Out << " align " << F->getAlignment();
+ if (F->hasGC())
+ Out << " gc \"" << F->getGC() << '"';
+ if (F->isDeclaration()) {
+ Out << "\n";
+ } else {
+ Out << " {";
+
+ // Output all of its basic blocks... for the function
+ for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I)
+ printBasicBlock(I);
+
+ Out << "}\n";
+ }
+
+ Machine.purgeFunction();
+}
+
+/// printArgument - This member is called for every argument that is passed into
+/// the function. Simply print it out
+///
+void AssemblyWriter::printArgument(const Argument *Arg,
+ Attributes Attrs) {
+ // Output type...
+ TypePrinter.print(Arg->getType(), Out);
+
+ // Output parameter attributes list
+ if (Attrs != Attribute::None)
+ Out << ' ' << Attribute::getAsString(Attrs);
+
+ // Output name, if available...
+ if (Arg->hasName()) {
+ Out << ' ';
+ PrintLLVMName(Out, Arg);
+ }
+}
+
+/// printBasicBlock - This member is called for each basic block in a method.
+///
+void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
+ if (BB->hasName()) { // Print out the label if it exists...
+ Out << "\n";
+ PrintLLVMName(Out, BB->getNameStart(), BB->getNameLen(), LabelPrefix);
+ Out << ':';
+ } else if (!BB->use_empty()) { // Don't print block # of no uses...
+ Out << "\n; <label>:";
+ int Slot = Machine.getLocalSlot(BB);
+ if (Slot != -1)
+ Out << Slot;
+ else
+ Out << "<badref>";
+ }
+
+ if (BB->getParent() == 0)
+ Out << "\t\t; Error: Block without parent!";
+ else if (BB != &BB->getParent()->getEntryBlock()) { // Not the entry block?
+ // Output predecessors for the block...
+ Out << "\t\t;";
+ pred_const_iterator PI = pred_begin(BB), PE = pred_end(BB);
+
+ if (PI == PE) {
+ Out << " No predecessors!";
+ } else {
+ Out << " preds = ";
+ writeOperand(*PI, false);
+ for (++PI; PI != PE; ++PI) {
+ Out << ", ";
+ writeOperand(*PI, false);
+ }
+ }
+ }
+
+ Out << "\n";
+
+ if (AnnotationWriter) AnnotationWriter->emitBasicBlockStartAnnot(BB, Out);
+
+ // Output all of the instructions in the basic block...
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ printInstruction(*I);
+
+ if (AnnotationWriter) AnnotationWriter->emitBasicBlockEndAnnot(BB, Out);
+}
+
+
+/// printInfoComment - Print a little comment after the instruction indicating
+/// which slot it occupies.
+///
+void AssemblyWriter::printInfoComment(const Value &V) {
+ if (V.getType() != Type::VoidTy) {
+ Out << "\t\t; <";
+ TypePrinter.print(V.getType(), Out);
+ Out << '>';
+
+ if (!V.hasName() && !isa<Instruction>(V)) {
+ int SlotNum;
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(&V))
+ SlotNum = Machine.getGlobalSlot(GV);
+ else
+ SlotNum = Machine.getLocalSlot(&V);
+ if (SlotNum == -1)
+ Out << ":<badref>";
+ else
+ Out << ':' << SlotNum; // Print out the def slot taken.
+ }
+ Out << " [#uses=" << V.getNumUses() << ']'; // Output # uses
+ }
+}
+
+// This member is called for each Instruction in a function..
+void AssemblyWriter::printInstruction(const Instruction &I) {
+ if (AnnotationWriter) AnnotationWriter->emitInstructionAnnot(&I, Out);
+
+ Out << '\t';
+
+ // Print out name if it exists...
+ if (I.hasName()) {
+ PrintLLVMName(Out, &I);
+ Out << " = ";
+ } else if (I.getType() != Type::VoidTy) {
+ // Print out the def slot taken.
+ int SlotNum = Machine.getLocalSlot(&I);
+ if (SlotNum == -1)
+ Out << "<badref> = ";
+ else
+ Out << '%' << SlotNum << " = ";
+ }
+
+ // If this is a volatile load or store, print out the volatile marker.
+ if ((isa<LoadInst>(I) && cast<LoadInst>(I).isVolatile()) ||
+ (isa<StoreInst>(I) && cast<StoreInst>(I).isVolatile())) {
+ Out << "volatile ";
+ } else if (isa<CallInst>(I) && cast<CallInst>(I).isTailCall()) {
+ // If this is a call, check if it's a tail call.
+ Out << "tail ";
+ }
+
+ // Print out the opcode...
+ Out << I.getOpcodeName();
+
+ // Print out the compare instruction predicates
+ if (const CmpInst *CI = dyn_cast<CmpInst>(&I))
+ Out << ' ' << getPredicateText(CI->getPredicate());
+
+ // Print out the type of the operands...
+ const Value *Operand = I.getNumOperands() ? I.getOperand(0) : 0;
+
+ // Special case conditional branches to swizzle the condition out to the front
+ if (isa<BranchInst>(I) && cast<BranchInst>(I).isConditional()) {
+ BranchInst &BI(cast<BranchInst>(I));
+ Out << ' ';
+ writeOperand(BI.getCondition(), true);
+ Out << ", ";
+ writeOperand(BI.getSuccessor(0), true);
+ Out << ", ";
+ writeOperand(BI.getSuccessor(1), true);
+
+ } else if (isa<SwitchInst>(I)) {
+ // Special case switch statement to get formatting nice and correct...
+ Out << ' ';
+ writeOperand(Operand , true);
+ Out << ", ";
+ writeOperand(I.getOperand(1), true);
+ Out << " [";
+
+ for (unsigned op = 2, Eop = I.getNumOperands(); op < Eop; op += 2) {
+ Out << "\n\t\t";
+ writeOperand(I.getOperand(op ), true);
+ Out << ", ";
+ writeOperand(I.getOperand(op+1), true);
+ }
+ Out << "\n\t]";
+ } else if (isa<PHINode>(I)) {
+ Out << ' ';
+ TypePrinter.print(I.getType(), Out);
+ Out << ' ';
+
+ for (unsigned op = 0, Eop = I.getNumOperands(); op < Eop; op += 2) {
+ if (op) Out << ", ";
+ Out << "[ ";
+ writeOperand(I.getOperand(op ), false); Out << ", ";
+ writeOperand(I.getOperand(op+1), false); Out << " ]";
+ }
+ } else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&I)) {
+ Out << ' ';
+ writeOperand(I.getOperand(0), true);
+ for (const unsigned *i = EVI->idx_begin(), *e = EVI->idx_end(); i != e; ++i)
+ Out << ", " << *i;
+ } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&I)) {
+ Out << ' ';
+ writeOperand(I.getOperand(0), true); Out << ", ";
+ writeOperand(I.getOperand(1), true);
+ for (const unsigned *i = IVI->idx_begin(), *e = IVI->idx_end(); i != e; ++i)
+ Out << ", " << *i;
+ } else if (isa<ReturnInst>(I) && !Operand) {
+ Out << " void";
+ } else if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
+ // Print the calling convention being used.
+ switch (CI->getCallingConv()) {
+ case CallingConv::C: break; // default
+ case CallingConv::Fast: Out << " fastcc"; break;
+ case CallingConv::Cold: Out << " coldcc"; break;
+ case CallingConv::X86_StdCall: Out << " x86_stdcallcc"; break;
+ case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break;
+ default: Out << " cc" << CI->getCallingConv(); break;
+ }
+
+ const PointerType *PTy = cast<PointerType>(Operand->getType());
+ const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ const Type *RetTy = FTy->getReturnType();
+ const AttrListPtr &PAL = CI->getAttributes();
+
+ if (PAL.getRetAttributes() != Attribute::None)
+ Out << ' ' << Attribute::getAsString(PAL.getRetAttributes());
+
+ // If possible, print out the short form of the call instruction. We can
+ // only do this if the first argument is a pointer to a nonvararg function,
+ // and if the return type is not a pointer to a function.
+ //
+ Out << ' ';
+ if (!FTy->isVarArg() &&
+ (!isa<PointerType>(RetTy) ||
+ !isa<FunctionType>(cast<PointerType>(RetTy)->getElementType()))) {
+ TypePrinter.print(RetTy, Out);
+ Out << ' ';
+ writeOperand(Operand, false);
+ } else {
+ writeOperand(Operand, true);
+ }
+ Out << '(';
+ for (unsigned op = 1, Eop = I.getNumOperands(); op < Eop; ++op) {
+ if (op > 1)
+ Out << ", ";
+ writeParamOperand(I.getOperand(op), PAL.getParamAttributes(op));
+ }
+ Out << ')';
+ if (PAL.getFnAttributes() != Attribute::None)
+ Out << ' ' << Attribute::getAsString(PAL.getFnAttributes());
+ } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
+ const PointerType *PTy = cast<PointerType>(Operand->getType());
+ const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ const Type *RetTy = FTy->getReturnType();
+ const AttrListPtr &PAL = II->getAttributes();
+
+ // Print the calling convention being used.
+ switch (II->getCallingConv()) {
+ case CallingConv::C: break; // default
+ case CallingConv::Fast: Out << " fastcc"; break;
+ case CallingConv::Cold: Out << " coldcc"; break;
+ case CallingConv::X86_StdCall: Out << " x86_stdcallcc"; break;
+ case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break;
+ default: Out << " cc" << II->getCallingConv(); break;
+ }
+
+ if (PAL.getRetAttributes() != Attribute::None)
+ Out << ' ' << Attribute::getAsString(PAL.getRetAttributes());
+
+ // If possible, print out the short form of the invoke instruction. We can
+ // only do this if the first argument is a pointer to a nonvararg function,
+ // and if the return type is not a pointer to a function.
+ //
+ Out << ' ';
+ if (!FTy->isVarArg() &&
+ (!isa<PointerType>(RetTy) ||
+ !isa<FunctionType>(cast<PointerType>(RetTy)->getElementType()))) {
+ TypePrinter.print(RetTy, Out);
+ Out << ' ';
+ writeOperand(Operand, false);
+ } else {
+ writeOperand(Operand, true);
+ }
+ Out << '(';
+ for (unsigned op = 3, Eop = I.getNumOperands(); op < Eop; ++op) {
+ if (op > 3)
+ Out << ", ";
+ writeParamOperand(I.getOperand(op), PAL.getParamAttributes(op-2));
+ }
+
+ Out << ')';
+ if (PAL.getFnAttributes() != Attribute::None)
+ Out << ' ' << Attribute::getAsString(PAL.getFnAttributes());
+
+ Out << "\n\t\t\tto ";
+ writeOperand(II->getNormalDest(), true);
+ Out << " unwind ";
+ writeOperand(II->getUnwindDest(), true);
+
+ } else if (const AllocationInst *AI = dyn_cast<AllocationInst>(&I)) {
+ Out << ' ';
+ TypePrinter.print(AI->getType()->getElementType(), Out);
+ if (AI->isArrayAllocation()) {
+ Out << ", ";
+ writeOperand(AI->getArraySize(), true);
+ }
+ if (AI->getAlignment()) {
+ Out << ", align " << AI->getAlignment();
+ }
+ } else if (isa<CastInst>(I)) {
+ if (Operand) {
+ Out << ' ';
+ writeOperand(Operand, true); // Work with broken code
+ }
+ Out << " to ";
+ TypePrinter.print(I.getType(), Out);
+ } else if (isa<VAArgInst>(I)) {
+ if (Operand) {
+ Out << ' ';
+ writeOperand(Operand, true); // Work with broken code
+ }
+ Out << ", ";
+ TypePrinter.print(I.getType(), Out);
+ } else if (Operand) { // Print the normal way.
+
+ // PrintAllTypes - Instructions who have operands of all the same type
+ // omit the type from all but the first operand. If the instruction has
+ // different type operands (for example br), then they are all printed.
+ bool PrintAllTypes = false;
+ const Type *TheType = Operand->getType();
+
+ // Select, Store and ShuffleVector always print all types.
+ if (isa<SelectInst>(I) || isa<StoreInst>(I) || isa<ShuffleVectorInst>(I)
+ || isa<ReturnInst>(I)) {
+ PrintAllTypes = true;
+ } else {
+ for (unsigned i = 1, E = I.getNumOperands(); i != E; ++i) {
+ Operand = I.getOperand(i);
+ // note that Operand shouldn't be null, but the test helps make dump()
+ // more tolerant of malformed IR
+ if (Operand && Operand->getType() != TheType) {
+ PrintAllTypes = true; // We have differing types! Print them all!
+ break;
+ }
+ }
+ }
+
+ if (!PrintAllTypes) {
+ Out << ' ';
+ TypePrinter.print(TheType, Out);
+ }
+
+ Out << ' ';
+ for (unsigned i = 0, E = I.getNumOperands(); i != E; ++i) {
+ if (i) Out << ", ";
+ writeOperand(I.getOperand(i), PrintAllTypes);
+ }
+ }
+
+ // Print post operand alignment for load/store
+ if (isa<LoadInst>(I) && cast<LoadInst>(I).getAlignment()) {
+ Out << ", align " << cast<LoadInst>(I).getAlignment();
+ } else if (isa<StoreInst>(I) && cast<StoreInst>(I).getAlignment()) {
+ Out << ", align " << cast<StoreInst>(I).getAlignment();
+ }
+
+ printInfoComment(I);
+ Out << '\n';
+}
+
+
+//===----------------------------------------------------------------------===//
+// External Interface declarations
+//===----------------------------------------------------------------------===//
+
+void Module::print(std::ostream &o, AssemblyAnnotationWriter *AAW) const {
+ raw_os_ostream OS(o);
+ print(OS, AAW);
+}
+void Module::print(raw_ostream &OS, AssemblyAnnotationWriter *AAW) const {
+ SlotTracker SlotTable(this);
+ AssemblyWriter W(OS, SlotTable, this, AAW);
+ W.write(this);
+}
+
+void Type::print(std::ostream &o) const {
+ raw_os_ostream OS(o);
+ print(OS);
+}
+
+void Type::print(raw_ostream &OS) const {
+ if (this == 0) {
+ OS << "<null Type>";
+ return;
+ }
+ TypePrinting().print(this, OS);
+}
+
+void Value::print(raw_ostream &OS, AssemblyAnnotationWriter *AAW) const {
+ if (this == 0) {
+ OS << "printing a <null> value\n";
+ return;
+ }
+
+ if (const Instruction *I = dyn_cast<Instruction>(this)) {
+ const Function *F = I->getParent() ? I->getParent()->getParent() : 0;
+ SlotTracker SlotTable(F);
+ AssemblyWriter W(OS, SlotTable, F ? F->getParent() : 0, AAW);
+ W.write(I);
+ } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(this)) {
+ SlotTracker SlotTable(BB->getParent());
+ AssemblyWriter W(OS, SlotTable,
+ BB->getParent() ? BB->getParent()->getParent() : 0, AAW);
+ W.write(BB);
+ } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(this)) {
+ SlotTracker SlotTable(GV->getParent());
+ AssemblyWriter W(OS, SlotTable, GV->getParent(), AAW);
+ W.write(GV);
+ } else if (const Constant *C = dyn_cast<Constant>(this)) {
+ TypePrinting TypePrinter;
+ TypePrinter.print(C->getType(), OS);
+ OS << ' ';
+ WriteConstantInt(OS, C, TypePrinter, 0);
+ } else if (const Argument *A = dyn_cast<Argument>(this)) {
+ WriteAsOperand(OS, this, true,
+ A->getParent() ? A->getParent()->getParent() : 0);
+ } else if (isa<InlineAsm>(this)) {
+ WriteAsOperand(OS, this, true, 0);
+ } else {
+ assert(0 && "Unknown value to print out!");
+ }
+}
+
+void Value::print(std::ostream &O, AssemblyAnnotationWriter *AAW) const {
+ raw_os_ostream OS(O);
+ print(OS, AAW);
+}
+
+// Value::dump - allow easy printing of Values from the debugger.
+void Value::dump() const { print(errs()); errs() << '\n'; }
+
+// Type::dump - allow easy printing of Types from the debugger.
+// This one uses type names from the given context module
+void Type::dump(const Module *Context) const {
+ WriteTypeSymbolic(errs(), this, Context);
+ errs() << '\n';
+}
+
+// Type::dump - allow easy printing of Types from the debugger.
+void Type::dump() const { dump(0); }
+
+// Module::dump() - Allow printing of Modules from the debugger.
+void Module::dump() const { print(errs(), 0); }
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
new file mode 100644
index 0000000..5a8fad9
--- /dev/null
+++ b/lib/VMCore/Attributes.cpp
@@ -0,0 +1,310 @@
+//===-- Attributes.cpp - Implement AttributesList -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AttributesList class and Attribute utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Attributes.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Support/ManagedStatic.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Attribute Function Definitions
+//===----------------------------------------------------------------------===//
+
+std::string Attribute::getAsString(Attributes Attrs) {
+ std::string Result;
+ if (Attrs & Attribute::ZExt)
+ Result += "zeroext ";
+ if (Attrs & Attribute::SExt)
+ Result += "signext ";
+ if (Attrs & Attribute::NoReturn)
+ Result += "noreturn ";
+ if (Attrs & Attribute::NoUnwind)
+ Result += "nounwind ";
+ if (Attrs & Attribute::InReg)
+ Result += "inreg ";
+ if (Attrs & Attribute::NoAlias)
+ Result += "noalias ";
+ if (Attrs & Attribute::NoCapture)
+ Result += "nocapture ";
+ if (Attrs & Attribute::StructRet)
+ Result += "sret ";
+ if (Attrs & Attribute::ByVal)
+ Result += "byval ";
+ if (Attrs & Attribute::Nest)
+ Result += "nest ";
+ if (Attrs & Attribute::ReadNone)
+ Result += "readnone ";
+ if (Attrs & Attribute::ReadOnly)
+ Result += "readonly ";
+ if (Attrs & Attribute::OptimizeForSize)
+ Result += "optsize ";
+ if (Attrs & Attribute::NoInline)
+ Result += "noinline ";
+ if (Attrs & Attribute::AlwaysInline)
+ Result += "alwaysinline ";
+ if (Attrs & Attribute::StackProtect)
+ Result += "ssp ";
+ if (Attrs & Attribute::StackProtectReq)
+ Result += "sspreq ";
+ if (Attrs & Attribute::Alignment) {
+ Result += "align ";
+ Result += utostr(Attribute::getAlignmentFromAttrs(Attrs));
+ Result += " ";
+ }
+ // Trim the trailing space.
+ assert(!Result.empty() && "Unknown attribute!");
+ Result.erase(Result.end()-1);
+ return Result;
+}
+
+Attributes Attribute::typeIncompatible(const Type *Ty) {
+ Attributes Incompatible = None;
+
+ if (!Ty->isInteger())
+ // Attributes that only apply to integers.
+ Incompatible |= SExt | ZExt;
+
+ if (!isa<PointerType>(Ty))
+ // Attributes that only apply to pointers.
+ Incompatible |= ByVal | Nest | NoAlias | StructRet | NoCapture;
+
+ return Incompatible;
+}
+
+//===----------------------------------------------------------------------===//
+// AttributeListImpl Definition
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+class AttributeListImpl : public FoldingSetNode {
+ unsigned RefCount;
+
+ // AttributesList is uniqued, these should not be publicly available.
+ void operator=(const AttributeListImpl &); // Do not implement
+ AttributeListImpl(const AttributeListImpl &); // Do not implement
+ ~AttributeListImpl(); // Private implementation
+public:
+ SmallVector<AttributeWithIndex, 4> Attrs;
+
+ AttributeListImpl(const AttributeWithIndex *Attr, unsigned NumAttrs)
+ : Attrs(Attr, Attr+NumAttrs) {
+ RefCount = 0;
+ }
+
+ void AddRef() { ++RefCount; }
+ void DropRef() { if (--RefCount == 0) delete this; }
+
+ void Profile(FoldingSetNodeID &ID) const {
+ Profile(ID, Attrs.data(), Attrs.size());
+ }
+ static void Profile(FoldingSetNodeID &ID, const AttributeWithIndex *Attr,
+ unsigned NumAttrs) {
+ for (unsigned i = 0; i != NumAttrs; ++i)
+ ID.AddInteger(uint64_t(Attr[i].Attrs) << 32 | unsigned(Attr[i].Index));
+ }
+};
+}
+
+static ManagedStatic<FoldingSet<AttributeListImpl> > AttributesLists;
+
+AttributeListImpl::~AttributeListImpl() {
+ AttributesLists->RemoveNode(this);
+}
+
+
+AttrListPtr AttrListPtr::get(const AttributeWithIndex *Attrs, unsigned NumAttrs) {
+ // If there are no attributes then return a null AttributesList pointer.
+ if (NumAttrs == 0)
+ return AttrListPtr();
+
+#ifndef NDEBUG
+ for (unsigned i = 0; i != NumAttrs; ++i) {
+ assert(Attrs[i].Attrs != Attribute::None &&
+ "Pointless attribute!");
+ assert((!i || Attrs[i-1].Index < Attrs[i].Index) &&
+ "Misordered AttributesList!");
+ }
+#endif
+
+ // Otherwise, build a key to look up the existing attributes.
+ FoldingSetNodeID ID;
+ AttributeListImpl::Profile(ID, Attrs, NumAttrs);
+ void *InsertPos;
+ AttributeListImpl *PAL =
+ AttributesLists->FindNodeOrInsertPos(ID, InsertPos);
+
+ // If we didn't find any existing attributes of the same shape then
+ // create a new one and insert it.
+ if (!PAL) {
+ PAL = new AttributeListImpl(Attrs, NumAttrs);
+ AttributesLists->InsertNode(PAL, InsertPos);
+ }
+
+ // Return the AttributesList that we found or created.
+ return AttrListPtr(PAL);
+}
+
+
+//===----------------------------------------------------------------------===//
+// AttrListPtr Method Implementations
+//===----------------------------------------------------------------------===//
+
+AttrListPtr::AttrListPtr(AttributeListImpl *LI) : AttrList(LI) {
+ if (LI) LI->AddRef();
+}
+
+AttrListPtr::AttrListPtr(const AttrListPtr &P) : AttrList(P.AttrList) {
+ if (AttrList) AttrList->AddRef();
+}
+
+const AttrListPtr &AttrListPtr::operator=(const AttrListPtr &RHS) {
+ if (AttrList == RHS.AttrList) return *this;
+ if (AttrList) AttrList->DropRef();
+ AttrList = RHS.AttrList;
+ if (AttrList) AttrList->AddRef();
+ return *this;
+}
+
+AttrListPtr::~AttrListPtr() {
+ if (AttrList) AttrList->DropRef();
+}
+
+/// getNumSlots - Return the number of slots used in this attribute list.
+/// This is the number of arguments that have an attribute set on them
+/// (including the function itself).
+unsigned AttrListPtr::getNumSlots() const {
+ return AttrList ? AttrList->Attrs.size() : 0;
+}
+
+/// getSlot - Return the AttributeWithIndex at the specified slot. This
+/// holds a number plus a set of attributes.
+const AttributeWithIndex &AttrListPtr::getSlot(unsigned Slot) const {
+ assert(AttrList && Slot < AttrList->Attrs.size() && "Slot # out of range!");
+ return AttrList->Attrs[Slot];
+}
+
+
+/// getAttributes - The attributes for the specified index are
+/// returned. Attributes for the result are denoted with Idx = 0.
+/// Function notes are denoted with idx = ~0.
+Attributes AttrListPtr::getAttributes(unsigned Idx) const {
+ if (AttrList == 0) return Attribute::None;
+
+ const SmallVector<AttributeWithIndex, 4> &Attrs = AttrList->Attrs;
+ for (unsigned i = 0, e = Attrs.size(); i != e && Attrs[i].Index <= Idx; ++i)
+ if (Attrs[i].Index == Idx)
+ return Attrs[i].Attrs;
+ return Attribute::None;
+}
+
+/// hasAttrSomewhere - Return true if the specified attribute is set for at
+/// least one parameter or for the return value.
+bool AttrListPtr::hasAttrSomewhere(Attributes Attr) const {
+ if (AttrList == 0) return false;
+
+ const SmallVector<AttributeWithIndex, 4> &Attrs = AttrList->Attrs;
+ for (unsigned i = 0, e = Attrs.size(); i != e; ++i)
+ if (Attrs[i].Attrs & Attr)
+ return true;
+ return false;
+}
+
+
+AttrListPtr AttrListPtr::addAttr(unsigned Idx, Attributes Attrs) const {
+ Attributes OldAttrs = getAttributes(Idx);
+#ifndef NDEBUG
+ // FIXME it is not obvious how this should work for alignment.
+ // For now, say we can't change a known alignment.
+ Attributes OldAlign = OldAttrs & Attribute::Alignment;
+ Attributes NewAlign = Attrs & Attribute::Alignment;
+ assert((!OldAlign || !NewAlign || OldAlign == NewAlign) &&
+ "Attempt to change alignment!");
+#endif
+
+ Attributes NewAttrs = OldAttrs | Attrs;
+ if (NewAttrs == OldAttrs)
+ return *this;
+
+ SmallVector<AttributeWithIndex, 8> NewAttrList;
+ if (AttrList == 0)
+ NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs));
+ else {
+ const SmallVector<AttributeWithIndex, 4> &OldAttrList = AttrList->Attrs;
+ unsigned i = 0, e = OldAttrList.size();
+ // Copy attributes for arguments before this one.
+ for (; i != e && OldAttrList[i].Index < Idx; ++i)
+ NewAttrList.push_back(OldAttrList[i]);
+
+ // If there are attributes already at this index, merge them in.
+ if (i != e && OldAttrList[i].Index == Idx) {
+ Attrs |= OldAttrList[i].Attrs;
+ ++i;
+ }
+
+ NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs));
+
+ // Copy attributes for arguments after this one.
+ NewAttrList.insert(NewAttrList.end(),
+ OldAttrList.begin()+i, OldAttrList.end());
+ }
+
+ return get(NewAttrList.data(), NewAttrList.size());
+}
+
+AttrListPtr AttrListPtr::removeAttr(unsigned Idx, Attributes Attrs) const {
+#ifndef NDEBUG
+ // FIXME it is not obvious how this should work for alignment.
+ // For now, say we can't pass in alignment, which no current use does.
+ assert(!(Attrs & Attribute::Alignment) && "Attempt to exclude alignment!");
+#endif
+ if (AttrList == 0) return AttrListPtr();
+
+ Attributes OldAttrs = getAttributes(Idx);
+ Attributes NewAttrs = OldAttrs & ~Attrs;
+ if (NewAttrs == OldAttrs)
+ return *this;
+
+ SmallVector<AttributeWithIndex, 8> NewAttrList;
+ const SmallVector<AttributeWithIndex, 4> &OldAttrList = AttrList->Attrs;
+ unsigned i = 0, e = OldAttrList.size();
+
+ // Copy attributes for arguments before this one.
+ for (; i != e && OldAttrList[i].Index < Idx; ++i)
+ NewAttrList.push_back(OldAttrList[i]);
+
+ // If there are attributes already at this index, merge them in.
+ assert(OldAttrList[i].Index == Idx && "Attribute isn't set?");
+ Attrs = OldAttrList[i].Attrs & ~Attrs;
+ ++i;
+ if (Attrs) // If any attributes left for this parameter, add them.
+ NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs));
+
+ // Copy attributes for arguments after this one.
+ NewAttrList.insert(NewAttrList.end(),
+ OldAttrList.begin()+i, OldAttrList.end());
+
+ return get(NewAttrList.data(), NewAttrList.size());
+}
+
+void AttrListPtr::dump() const {
+ cerr << "PAL[ ";
+ for (unsigned i = 0; i < getNumSlots(); ++i) {
+ const AttributeWithIndex &PAWI = getSlot(i);
+ cerr << "{" << PAWI.Index << "," << PAWI.Attrs << "} ";
+ }
+
+ cerr << "]\n";
+}
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
new file mode 100644
index 0000000..dd36607
--- /dev/null
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -0,0 +1,430 @@
+//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the auto-upgrade helper functions
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/AutoUpgrade.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/ADT/SmallVector.h"
+#include <cstring>
+using namespace llvm;
+
+
+static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
+ assert(F && "Illegal to upgrade a non-existent Function.");
+
+ // Get the Function's name.
+ const std::string& Name = F->getName();
+
+ // Convenience
+ const FunctionType *FTy = F->getFunctionType();
+
+ // Quickly eliminate it, if it's not a candidate.
+ if (Name.length() <= 8 || Name[0] != 'l' || Name[1] != 'l' ||
+ Name[2] != 'v' || Name[3] != 'm' || Name[4] != '.')
+ return false;
+
+ Module *M = F->getParent();
+ switch (Name[5]) {
+ default: break;
+ case 'a':
+ // This upgrades the llvm.atomic.lcs, llvm.atomic.las, llvm.atomic.lss,
+ // and atomics with default address spaces to their new names to their new
+ // function name (e.g. llvm.atomic.add.i32 => llvm.atomic.add.i32.p0i32)
+ if (Name.compare(5,7,"atomic.",7) == 0) {
+ if (Name.compare(12,3,"lcs",3) == 0) {
+ std::string::size_type delim = Name.find('.',12);
+ F->setName("llvm.atomic.cmp.swap" + Name.substr(delim) +
+ ".p0" + Name.substr(delim+1));
+ NewFn = F;
+ return true;
+ }
+ else if (Name.compare(12,3,"las",3) == 0) {
+ std::string::size_type delim = Name.find('.',12);
+ F->setName("llvm.atomic.load.add"+Name.substr(delim)
+ + ".p0" + Name.substr(delim+1));
+ NewFn = F;
+ return true;
+ }
+ else if (Name.compare(12,3,"lss",3) == 0) {
+ std::string::size_type delim = Name.find('.',12);
+ F->setName("llvm.atomic.load.sub"+Name.substr(delim)
+ + ".p0" + Name.substr(delim+1));
+ NewFn = F;
+ return true;
+ }
+ else if (Name.rfind(".p") == std::string::npos) {
+ // We don't have an address space qualifier so this has be upgraded
+ // to the new name. Copy the type name at the end of the intrinsic
+ // and add to it
+ std::string::size_type delim = Name.find_last_of('.');
+ assert(delim != std::string::npos && "can not find type");
+ F->setName(Name + ".p0" + Name.substr(delim+1));
+ NewFn = F;
+ return true;
+ }
+ }
+ break;
+ case 'b':
+ // This upgrades the name of the llvm.bswap intrinsic function to only use
+ // a single type name for overloading. We only care about the old format
+ // 'llvm.bswap.i*.i*', so check for 'bswap.' and then for there being
+ // a '.' after 'bswap.'
+ if (Name.compare(5,6,"bswap.",6) == 0) {
+ std::string::size_type delim = Name.find('.',11);
+
+ if (delim != std::string::npos) {
+ // Construct the new name as 'llvm.bswap' + '.i*'
+ F->setName(Name.substr(0,10)+Name.substr(delim));
+ NewFn = F;
+ return true;
+ }
+ }
+ break;
+
+ case 'c':
+ // We only want to fix the 'llvm.ct*' intrinsics which do not have the
+ // correct return type, so we check for the name, and then check if the
+ // return type does not match the parameter type.
+ if ( (Name.compare(5,5,"ctpop",5) == 0 ||
+ Name.compare(5,4,"ctlz",4) == 0 ||
+ Name.compare(5,4,"cttz",4) == 0) &&
+ FTy->getReturnType() != FTy->getParamType(0)) {
+ // We first need to change the name of the old (bad) intrinsic, because
+ // its type is incorrect, but we cannot overload that name. We
+ // arbitrarily unique it here allowing us to construct a correctly named
+ // and typed function below.
+ F->setName("");
+
+ // Now construct the new intrinsic with the correct name and type. We
+ // leave the old function around in order to query its type, whatever it
+ // may be, and correctly convert up to the new type.
+ NewFn = cast<Function>(M->getOrInsertFunction(Name,
+ FTy->getParamType(0),
+ FTy->getParamType(0),
+ (Type *)0));
+ return true;
+ }
+ break;
+
+ case 'p':
+ // This upgrades the llvm.part.select overloaded intrinsic names to only
+ // use one type specifier in the name. We only care about the old format
+ // 'llvm.part.select.i*.i*', and solve as above with bswap.
+ if (Name.compare(5,12,"part.select.",12) == 0) {
+ std::string::size_type delim = Name.find('.',17);
+
+ if (delim != std::string::npos) {
+ // Construct a new name as 'llvm.part.select' + '.i*'
+ F->setName(Name.substr(0,16)+Name.substr(delim));
+ NewFn = F;
+ return true;
+ }
+ break;
+ }
+
+ // This upgrades the llvm.part.set intrinsics similarly as above, however
+ // we care about 'llvm.part.set.i*.i*.i*', but only the first two types
+ // must match. There is an additional type specifier after these two
+ // matching types that we must retain when upgrading. Thus, we require
+ // finding 2 periods, not just one, after the intrinsic name.
+ if (Name.compare(5,9,"part.set.",9) == 0) {
+ std::string::size_type delim = Name.find('.',14);
+
+ if (delim != std::string::npos &&
+ Name.find('.',delim+1) != std::string::npos) {
+ // Construct a new name as 'llvm.part.select' + '.i*.i*'
+ F->setName(Name.substr(0,13)+Name.substr(delim));
+ NewFn = F;
+ return true;
+ }
+ break;
+ }
+
+ break;
+ case 'x':
+ // This fixes all MMX shift intrinsic instructions to take a
+ // v1i64 instead of a v2i32 as the second parameter.
+ if (Name.compare(5,10,"x86.mmx.ps",10) == 0 &&
+ (Name.compare(13,4,"psll", 4) == 0 ||
+ Name.compare(13,4,"psra", 4) == 0 ||
+ Name.compare(13,4,"psrl", 4) == 0) && Name[17] != 'i') {
+
+ const llvm::Type *VT = VectorType::get(IntegerType::get(64), 1);
+
+ // We don't have to do anything if the parameter already has
+ // the correct type.
+ if (FTy->getParamType(1) == VT)
+ break;
+
+ // We first need to change the name of the old (bad) intrinsic, because
+ // its type is incorrect, but we cannot overload that name. We
+ // arbitrarily unique it here allowing us to construct a correctly named
+ // and typed function below.
+ F->setName("");
+
+ assert(FTy->getNumParams() == 2 && "MMX shift intrinsics take 2 args!");
+
+ // Now construct the new intrinsic with the correct name and type. We
+ // leave the old function around in order to query its type, whatever it
+ // may be, and correctly convert up to the new type.
+ NewFn = cast<Function>(M->getOrInsertFunction(Name,
+ FTy->getReturnType(),
+ FTy->getParamType(0),
+ VT,
+ (Type *)0));
+ return true;
+ } else if (Name.compare(5,17,"x86.sse2.loadh.pd",17) == 0 ||
+ Name.compare(5,17,"x86.sse2.loadl.pd",17) == 0 ||
+ Name.compare(5,16,"x86.sse2.movl.dq",16) == 0 ||
+ Name.compare(5,15,"x86.sse2.movs.d",15) == 0 ||
+ Name.compare(5,16,"x86.sse2.shuf.pd",16) == 0 ||
+ Name.compare(5,18,"x86.sse2.unpckh.pd",18) == 0 ||
+ Name.compare(5,18,"x86.sse2.unpckl.pd",18) == 0 ||
+ Name.compare(5,20,"x86.sse2.punpckh.qdq",20) == 0 ||
+ Name.compare(5,20,"x86.sse2.punpckl.qdq",20) == 0) {
+ // Calls to these intrinsics are transformed into ShuffleVector's.
+ NewFn = 0;
+ return true;
+ }
+
+ break;
+ }
+
+ // This may not belong here. This function is effectively being overloaded
+ // to both detect an intrinsic which needs upgrading, and to provide the
+ // upgraded form of the intrinsic. We should perhaps have two separate
+ // functions for this.
+ return false;
+}
+
+bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
+ NewFn = 0;
+ bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
+
+ // Upgrade intrinsic attributes. This does not change the function.
+ if (NewFn)
+ F = NewFn;
+ if (unsigned id = F->getIntrinsicID())
+ F->setAttributes(Intrinsic::getAttributes((Intrinsic::ID)id));
+ return Upgraded;
+}
+
+// UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
+// upgraded intrinsic. All argument and return casting must be provided in
+// order to seamlessly integrate with existing context.
+void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
+ Function *F = CI->getCalledFunction();
+ assert(F && "CallInst has no function associated with it.");
+
+ if (!NewFn) {
+ bool isLoadH = false, isLoadL = false, isMovL = false;
+ bool isMovSD = false, isShufPD = false;
+ bool isUnpckhPD = false, isUnpcklPD = false;
+ bool isPunpckhQPD = false, isPunpcklQPD = false;
+ if (strcmp(F->getNameStart(), "llvm.x86.sse2.loadh.pd") == 0)
+ isLoadH = true;
+ else if (strcmp(F->getNameStart(), "llvm.x86.sse2.loadl.pd") == 0)
+ isLoadL = true;
+ else if (strcmp(F->getNameStart(), "llvm.x86.sse2.movl.dq") == 0)
+ isMovL = true;
+ else if (strcmp(F->getNameStart(), "llvm.x86.sse2.movs.d") == 0)
+ isMovSD = true;
+ else if (strcmp(F->getNameStart(), "llvm.x86.sse2.shuf.pd") == 0)
+ isShufPD = true;
+ else if (strcmp(F->getNameStart(), "llvm.x86.sse2.unpckh.pd") == 0)
+ isUnpckhPD = true;
+ else if (strcmp(F->getNameStart(), "llvm.x86.sse2.unpckl.pd") == 0)
+ isUnpcklPD = true;
+ else if (strcmp(F->getNameStart(), "llvm.x86.sse2.punpckh.qdq") == 0)
+ isPunpckhQPD = true;
+ else if (strcmp(F->getNameStart(), "llvm.x86.sse2.punpckl.qdq") == 0)
+ isPunpcklQPD = true;
+
+ if (isLoadH || isLoadL || isMovL || isMovSD || isShufPD ||
+ isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) {
+ std::vector<Constant*> Idxs;
+ Value *Op0 = CI->getOperand(1);
+ ShuffleVectorInst *SI = NULL;
+ if (isLoadH || isLoadL) {
+ Value *Op1 = UndefValue::get(Op0->getType());
+ Value *Addr = new BitCastInst(CI->getOperand(2),
+ PointerType::getUnqual(Type::DoubleTy),
+ "upgraded.", CI);
+ Value *Load = new LoadInst(Addr, "upgraded.", false, 8, CI);
+ Value *Idx = ConstantInt::get(Type::Int32Ty, 0);
+ Op1 = InsertElementInst::Create(Op1, Load, Idx, "upgraded.", CI);
+
+ if (isLoadH) {
+ Idxs.push_back(ConstantInt::get(Type::Int32Ty, 0));
+ Idxs.push_back(ConstantInt::get(Type::Int32Ty, 2));
+ } else {
+ Idxs.push_back(ConstantInt::get(Type::Int32Ty, 2));
+ Idxs.push_back(ConstantInt::get(Type::Int32Ty, 1));
+ }
+ Value *Mask = ConstantVector::get(Idxs);
+ SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
+ } else if (isMovL) {
+ Constant *Zero = ConstantInt::get(Type::Int32Ty, 0);
+ Idxs.push_back(Zero);
+ Idxs.push_back(Zero);
+ Idxs.push_back(Zero);
+ Idxs.push_back(Zero);
+ Value *ZeroV = ConstantVector::get(Idxs);
+
+ Idxs.clear();
+ Idxs.push_back(ConstantInt::get(Type::Int32Ty, 4));
+ Idxs.push_back(ConstantInt::get(Type::Int32Ty, 5));
+ Idxs.push_back(ConstantInt::get(Type::Int32Ty, 2));
+ Idxs.push_back(ConstantInt::get(Type::Int32Ty, 3));
+ Value *Mask = ConstantVector::get(Idxs);
+ SI = new ShuffleVectorInst(ZeroV, Op0, Mask, "upgraded.", CI);
+ } else if (isMovSD ||
+ isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) {
+ Value *Op1 = CI->getOperand(2);
+ if (isMovSD) {
+ Idxs.push_back(ConstantInt::get(Type::Int32Ty, 2));
+ Idxs.push_back(ConstantInt::get(Type::Int32Ty, 1));
+ } else if (isUnpckhPD || isPunpckhQPD) {
+ Idxs.push_back(ConstantInt::get(Type::Int32Ty, 1));
+ Idxs.push_back(ConstantInt::get(Type::Int32Ty, 3));
+ } else {
+ Idxs.push_back(ConstantInt::get(Type::Int32Ty, 0));
+ Idxs.push_back(ConstantInt::get(Type::Int32Ty, 2));
+ }
+ Value *Mask = ConstantVector::get(Idxs);
+ SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
+ } else if (isShufPD) {
+ Value *Op1 = CI->getOperand(2);
+ unsigned MaskVal = cast<ConstantInt>(CI->getOperand(3))->getZExtValue();
+ Idxs.push_back(ConstantInt::get(Type::Int32Ty, MaskVal & 1));
+ Idxs.push_back(ConstantInt::get(Type::Int32Ty, ((MaskVal >> 1) & 1)+2));
+ Value *Mask = ConstantVector::get(Idxs);
+ SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
+ }
+
+ assert(SI && "Unexpected!");
+
+ // Handle any uses of the old CallInst.
+ if (!CI->use_empty())
+ // Replace all uses of the old call with the new cast which has the
+ // correct type.
+ CI->replaceAllUsesWith(SI);
+
+ // Clean up the old call now that it has been completely upgraded.
+ CI->eraseFromParent();
+ } else {
+ assert(0 && "Unknown function for CallInst upgrade.");
+ }
+ return;
+ }
+
+ switch (NewFn->getIntrinsicID()) {
+ default: assert(0 && "Unknown function for CallInst upgrade.");
+ case Intrinsic::x86_mmx_psll_d:
+ case Intrinsic::x86_mmx_psll_q:
+ case Intrinsic::x86_mmx_psll_w:
+ case Intrinsic::x86_mmx_psra_d:
+ case Intrinsic::x86_mmx_psra_w:
+ case Intrinsic::x86_mmx_psrl_d:
+ case Intrinsic::x86_mmx_psrl_q:
+ case Intrinsic::x86_mmx_psrl_w: {
+ Value *Operands[2];
+
+ Operands[0] = CI->getOperand(1);
+
+ // Cast the second parameter to the correct type.
+ BitCastInst *BC = new BitCastInst(CI->getOperand(2),
+ NewFn->getFunctionType()->getParamType(1),
+ "upgraded.", CI);
+ Operands[1] = BC;
+
+ // Construct a new CallInst
+ CallInst *NewCI = CallInst::Create(NewFn, Operands, Operands+2,
+ "upgraded."+CI->getName(), CI);
+ NewCI->setTailCall(CI->isTailCall());
+ NewCI->setCallingConv(CI->getCallingConv());
+
+ // Handle any uses of the old CallInst.
+ if (!CI->use_empty())
+ // Replace all uses of the old call with the new cast which has the
+ // correct type.
+ CI->replaceAllUsesWith(NewCI);
+
+ // Clean up the old call now that it has been completely upgraded.
+ CI->eraseFromParent();
+ break;
+ }
+ case Intrinsic::ctlz:
+ case Intrinsic::ctpop:
+ case Intrinsic::cttz: {
+ // Build a small vector of the 1..(N-1) operands, which are the
+ // parameters.
+ SmallVector<Value*, 8> Operands(CI->op_begin()+1, CI->op_end());
+
+ // Construct a new CallInst
+ CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
+ "upgraded."+CI->getName(), CI);
+ NewCI->setTailCall(CI->isTailCall());
+ NewCI->setCallingConv(CI->getCallingConv());
+
+ // Handle any uses of the old CallInst.
+ if (!CI->use_empty()) {
+ // Check for sign extend parameter attributes on the return values.
+ bool SrcSExt = NewFn->getAttributes().paramHasAttr(0, Attribute::SExt);
+ bool DestSExt = F->getAttributes().paramHasAttr(0, Attribute::SExt);
+
+ // Construct an appropriate cast from the new return type to the old.
+ CastInst *RetCast = CastInst::Create(
+ CastInst::getCastOpcode(NewCI, SrcSExt,
+ F->getReturnType(),
+ DestSExt),
+ NewCI, F->getReturnType(),
+ NewCI->getName(), CI);
+ NewCI->moveBefore(RetCast);
+
+ // Replace all uses of the old call with the new cast which has the
+ // correct type.
+ CI->replaceAllUsesWith(RetCast);
+ }
+
+ // Clean up the old call now that it has been completely upgraded.
+ CI->eraseFromParent();
+ }
+ break;
+ }
+}
+
+// This tests each Function to determine if it needs upgrading. When we find
+// one we are interested in, we then upgrade all calls to reflect the new
+// function.
+void llvm::UpgradeCallsToIntrinsic(Function* F) {
+ assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
+
+ // Upgrade the function and check if it is a totaly new function.
+ Function* NewFn;
+ if (UpgradeIntrinsicFunction(F, NewFn)) {
+ if (NewFn != F) {
+ // Replace all uses to the old function with the new one if necessary.
+ for (Value::use_iterator UI = F->use_begin(), UE = F->use_end();
+ UI != UE; ) {
+ if (CallInst* CI = dyn_cast<CallInst>(*UI++))
+ UpgradeIntrinsicCall(CI, NewFn);
+ }
+ // Remove old function, no longer used, from the module.
+ F->eraseFromParent();
+ }
+ }
+}
diff --git a/lib/VMCore/BasicBlock.cpp b/lib/VMCore/BasicBlock.cpp
new file mode 100644
index 0000000..3065766
--- /dev/null
+++ b/lib/VMCore/BasicBlock.cpp
@@ -0,0 +1,274 @@
+//===-- BasicBlock.cpp - Implement BasicBlock related methods -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the BasicBlock class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BasicBlock.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/Compiler.h"
+#include "SymbolTableListTraitsImpl.h"
+#include <algorithm>
+using namespace llvm;
+
+ValueSymbolTable *BasicBlock::getValueSymbolTable() {
+ if (Function *F = getParent())
+ return &F->getValueSymbolTable();
+ return 0;
+}
+
+// Explicit instantiation of SymbolTableListTraits since some of the methods
+// are not in the public header file...
+template class SymbolTableListTraits<Instruction, BasicBlock>;
+
+
+BasicBlock::BasicBlock(const std::string &Name, Function *NewParent,
+ BasicBlock *InsertBefore)
+ : Value(Type::LabelTy, Value::BasicBlockVal), Parent(0) {
+
+ // Make sure that we get added to a function
+ LeakDetector::addGarbageObject(this);
+
+ if (InsertBefore) {
+ assert(NewParent &&
+ "Cannot insert block before another block with no function!");
+ NewParent->getBasicBlockList().insert(InsertBefore, this);
+ } else if (NewParent) {
+ NewParent->getBasicBlockList().push_back(this);
+ }
+
+ setName(Name);
+}
+
+
+BasicBlock::~BasicBlock() {
+ assert(getParent() == 0 && "BasicBlock still linked into the program!");
+ dropAllReferences();
+ InstList.clear();
+}
+
+void BasicBlock::setParent(Function *parent) {
+ if (getParent())
+ LeakDetector::addGarbageObject(this);
+
+ // Set Parent=parent, updating instruction symtab entries as appropriate.
+ InstList.setSymTabObject(&Parent, parent);
+
+ if (getParent())
+ LeakDetector::removeGarbageObject(this);
+}
+
+void BasicBlock::removeFromParent() {
+ getParent()->getBasicBlockList().remove(this);
+}
+
+void BasicBlock::eraseFromParent() {
+ getParent()->getBasicBlockList().erase(this);
+}
+
+/// moveBefore - Unlink this basic block from its current function and
+/// insert it into the function that MovePos lives in, right before MovePos.
+void BasicBlock::moveBefore(BasicBlock *MovePos) {
+ MovePos->getParent()->getBasicBlockList().splice(MovePos,
+ getParent()->getBasicBlockList(), this);
+}
+
+/// moveAfter - Unlink this basic block from its current function and
+/// insert it into the function that MovePos lives in, right after MovePos.
+void BasicBlock::moveAfter(BasicBlock *MovePos) {
+ Function::iterator I = MovePos;
+ MovePos->getParent()->getBasicBlockList().splice(++I,
+ getParent()->getBasicBlockList(), this);
+}
+
+
+TerminatorInst *BasicBlock::getTerminator() {
+ if (InstList.empty()) return 0;
+ return dyn_cast<TerminatorInst>(&InstList.back());
+}
+
+const TerminatorInst *BasicBlock::getTerminator() const {
+ if (InstList.empty()) return 0;
+ return dyn_cast<TerminatorInst>(&InstList.back());
+}
+
+Instruction* BasicBlock::getFirstNonPHI() {
+ BasicBlock::iterator i = begin();
+ // All valid basic blocks should have a terminator,
+ // which is not a PHINode. If we have an invalid basic
+ // block we'll get an assertion failure when dereferencing
+ // a past-the-end iterator.
+ while (isa<PHINode>(i)) ++i;
+ return &*i;
+}
+
+void BasicBlock::dropAllReferences() {
+ for(iterator I = begin(), E = end(); I != E; ++I)
+ I->dropAllReferences();
+}
+
+/// getSinglePredecessor - If this basic block has a single predecessor block,
+/// return the block, otherwise return a null pointer.
+BasicBlock *BasicBlock::getSinglePredecessor() {
+ pred_iterator PI = pred_begin(this), E = pred_end(this);
+ if (PI == E) return 0; // No preds.
+ BasicBlock *ThePred = *PI;
+ ++PI;
+ return (PI == E) ? ThePred : 0 /*multiple preds*/;
+}
+
+/// getUniquePredecessor - If this basic block has a unique predecessor block,
+/// return the block, otherwise return a null pointer.
+/// Note that unique predecessor doesn't mean single edge, there can be
+/// multiple edges from the unique predecessor to this block (for example
+/// a switch statement with multiple cases having the same destination).
+BasicBlock *BasicBlock::getUniquePredecessor() {
+ pred_iterator PI = pred_begin(this), E = pred_end(this);
+ if (PI == E) return 0; // No preds.
+ BasicBlock *PredBB = *PI;
+ ++PI;
+ for (;PI != E; ++PI) {
+ if (*PI != PredBB)
+ return 0;
+ // The same predecessor appears multiple times in the predecessor list.
+ // This is OK.
+ }
+ return PredBB;
+}
+
+/// removePredecessor - This method is used to notify a BasicBlock that the
+/// specified Predecessor of the block is no longer able to reach it. This is
+/// actually not used to update the Predecessor list, but is actually used to
+/// update the PHI nodes that reside in the block. Note that this should be
+/// called while the predecessor still refers to this block.
+///
+void BasicBlock::removePredecessor(BasicBlock *Pred,
+ bool DontDeleteUselessPHIs) {
+ assert((hasNUsesOrMore(16)||// Reduce cost of this assertion for complex CFGs.
+ find(pred_begin(this), pred_end(this), Pred) != pred_end(this)) &&
+ "removePredecessor: BB is not a predecessor!");
+
+ if (InstList.empty()) return;
+ PHINode *APN = dyn_cast<PHINode>(&front());
+ if (!APN) return; // Quick exit.
+
+ // If there are exactly two predecessors, then we want to nuke the PHI nodes
+ // altogether. However, we cannot do this, if this in this case:
+ //
+ // Loop:
+ // %x = phi [X, Loop]
+ // %x2 = add %x, 1 ;; This would become %x2 = add %x2, 1
+ // br Loop ;; %x2 does not dominate all uses
+ //
+ // This is because the PHI node input is actually taken from the predecessor
+ // basic block. The only case this can happen is with a self loop, so we
+ // check for this case explicitly now.
+ //
+ unsigned max_idx = APN->getNumIncomingValues();
+ assert(max_idx != 0 && "PHI Node in block with 0 predecessors!?!?!");
+ if (max_idx == 2) {
+ BasicBlock *Other = APN->getIncomingBlock(APN->getIncomingBlock(0) == Pred);
+
+ // Disable PHI elimination!
+ if (this == Other) max_idx = 3;
+ }
+
+ // <= Two predecessors BEFORE I remove one?
+ if (max_idx <= 2 && !DontDeleteUselessPHIs) {
+ // Yup, loop through and nuke the PHI nodes
+ while (PHINode *PN = dyn_cast<PHINode>(&front())) {
+ // Remove the predecessor first.
+ PN->removeIncomingValue(Pred, !DontDeleteUselessPHIs);
+
+ // If the PHI _HAD_ two uses, replace PHI node with its now *single* value
+ if (max_idx == 2) {
+ if (PN->getOperand(0) != PN)
+ PN->replaceAllUsesWith(PN->getOperand(0));
+ else
+ // We are left with an infinite loop with no entries: kill the PHI.
+ PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+ getInstList().pop_front(); // Remove the PHI node
+ }
+
+ // If the PHI node already only had one entry, it got deleted by
+ // removeIncomingValue.
+ }
+ } else {
+ // Okay, now we know that we need to remove predecessor #pred_idx from all
+ // PHI nodes. Iterate over each PHI node fixing them up
+ PHINode *PN;
+ for (iterator II = begin(); (PN = dyn_cast<PHINode>(II)); ) {
+ ++II;
+ PN->removeIncomingValue(Pred, false);
+ // If all incoming values to the Phi are the same, we can replace the Phi
+ // with that value.
+ Value* PNV = 0;
+ if (!DontDeleteUselessPHIs && (PNV = PN->hasConstantValue())) {
+ PN->replaceAllUsesWith(PNV);
+ PN->eraseFromParent();
+ }
+ }
+ }
+}
+
+
+/// splitBasicBlock - This splits a basic block into two at the specified
+/// instruction. Note that all instructions BEFORE the specified iterator stay
+/// as part of the original basic block, an unconditional branch is added to
+/// the new BB, and the rest of the instructions in the BB are moved to the new
+/// BB, including the old terminator. This invalidates the iterator.
+///
+/// Note that this only works on well formed basic blocks (must have a
+/// terminator), and 'I' must not be the end of instruction list (which would
+/// cause a degenerate basic block to be formed, having a terminator inside of
+/// the basic block).
+///
+BasicBlock *BasicBlock::splitBasicBlock(iterator I, const std::string &BBName) {
+ assert(getTerminator() && "Can't use splitBasicBlock on degenerate BB!");
+ assert(I != InstList.end() &&
+ "Trying to get me to create degenerate basic block!");
+
+ BasicBlock *InsertBefore = next(Function::iterator(this))
+ .getNodePtrUnchecked();
+ BasicBlock *New = BasicBlock::Create(BBName, getParent(), InsertBefore);
+
+ // Move all of the specified instructions from the original basic block into
+ // the new basic block.
+ New->getInstList().splice(New->end(), this->getInstList(), I, end());
+
+ // Add a branch instruction to the newly formed basic block.
+ BranchInst::Create(New, this);
+
+ // Now we must loop through all of the successors of the New block (which
+ // _were_ the successors of the 'this' block), and update any PHI nodes in
+ // successors. If there were PHI nodes in the successors, then they need to
+ // know that incoming branches will be from New, not from Old.
+ //
+ for (succ_iterator I = succ_begin(New), E = succ_end(New); I != E; ++I) {
+ // Loop over any phi nodes in the basic block, updating the BB field of
+ // incoming values...
+ BasicBlock *Successor = *I;
+ PHINode *PN;
+ for (BasicBlock::iterator II = Successor->begin();
+ (PN = dyn_cast<PHINode>(II)); ++II) {
+ int IDX = PN->getBasicBlockIndex(this);
+ while (IDX != -1) {
+ PN->setIncomingBlock((unsigned)IDX, New);
+ IDX = PN->getBasicBlockIndex(this);
+ }
+ }
+ }
+ return New;
+}
diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt
new file mode 100644
index 0000000..d78e093
--- /dev/null
+++ b/lib/VMCore/CMakeLists.txt
@@ -0,0 +1,30 @@
+add_llvm_library(LLVMCore
+ AsmWriter.cpp
+ Attributes.cpp
+ AutoUpgrade.cpp
+ BasicBlock.cpp
+ ConstantFold.cpp
+ Constants.cpp
+ Core.cpp
+ Dominators.cpp
+ Function.cpp
+ Globals.cpp
+ InlineAsm.cpp
+ Instruction.cpp
+ Instructions.cpp
+ IntrinsicInst.cpp
+ LeakDetector.cpp
+ Mangler.cpp
+ Module.cpp
+ ModuleProvider.cpp
+ Pass.cpp
+ PassManager.cpp
+ PrintModulePass.cpp
+ Type.cpp
+ TypeSymbolTable.cpp
+ Use.cpp
+ Value.cpp
+ ValueSymbolTable.cpp
+ ValueTypes.cpp
+ Verifier.cpp
+ )
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
new file mode 100644
index 0000000..7e4902f
--- /dev/null
+++ b/lib/VMCore/ConstantFold.cpp
@@ -0,0 +1,1681 @@
+//===- ConstantFold.cpp - LLVM constant folder ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements folding of constants for LLVM. This implements the
+// (internal) ConstantFold.h interface, which is used by the
+// ConstantExpr::get* methods to automatically fold constants when possible.
+//
+// The current constant folding implementation is implemented in two pieces: the
+// template-based folder for simple primitive constants like ConstantInt, and
+// the special case hackery that we use to symbolically evaluate expressions
+// that use ConstantExprs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ConstantFold.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include <limits>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// ConstantFold*Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+/// BitCastConstantVector - Convert the specified ConstantVector node to the
+/// specified vector type. At this point, we know that the elements of the
+/// input vector constant are all simple integer or FP values.
+static Constant *BitCastConstantVector(ConstantVector *CV,
+ const VectorType *DstTy) {
+ // If this cast changes element count then we can't handle it here:
+ // doing so requires endianness information. This should be handled by
+ // Analysis/ConstantFolding.cpp
+ unsigned NumElts = DstTy->getNumElements();
+ if (NumElts != CV->getNumOperands())
+ return 0;
+
+ // Check to verify that all elements of the input are simple.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!isa<ConstantInt>(CV->getOperand(i)) &&
+ !isa<ConstantFP>(CV->getOperand(i)))
+ return 0;
+ }
+
+ // Bitcast each element now.
+ std::vector<Constant*> Result;
+ const Type *DstEltTy = DstTy->getElementType();
+ for (unsigned i = 0; i != NumElts; ++i)
+ Result.push_back(ConstantExpr::getBitCast(CV->getOperand(i), DstEltTy));
+ return ConstantVector::get(Result);
+}
+
+/// This function determines which opcode to use to fold two constant cast
+/// expressions together. It uses CastInst::isEliminableCastPair to determine
+/// the opcode. Consequently its just a wrapper around that function.
+/// @brief Determine if it is valid to fold a cast of a cast
+static unsigned
+foldConstantCastPair(
+ unsigned opc, ///< opcode of the second cast constant expression
+ const ConstantExpr*Op, ///< the first cast constant expression
+ const Type *DstTy ///< desintation type of the first cast
+) {
+ assert(Op && Op->isCast() && "Can't fold cast of cast without a cast!");
+ assert(DstTy && DstTy->isFirstClassType() && "Invalid cast destination type");
+ assert(CastInst::isCast(opc) && "Invalid cast opcode");
+
+ // The the types and opcodes for the two Cast constant expressions
+ const Type *SrcTy = Op->getOperand(0)->getType();
+ const Type *MidTy = Op->getType();
+ Instruction::CastOps firstOp = Instruction::CastOps(Op->getOpcode());
+ Instruction::CastOps secondOp = Instruction::CastOps(opc);
+
+ // Let CastInst::isEliminableCastPair do the heavy lifting.
+ return CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, DstTy,
+ Type::Int64Ty);
+}
+
+static Constant *FoldBitCast(Constant *V, const Type *DestTy) {
+ const Type *SrcTy = V->getType();
+ if (SrcTy == DestTy)
+ return V; // no-op cast
+
+ // Check to see if we are casting a pointer to an aggregate to a pointer to
+ // the first element. If so, return the appropriate GEP instruction.
+ if (const PointerType *PTy = dyn_cast<PointerType>(V->getType()))
+ if (const PointerType *DPTy = dyn_cast<PointerType>(DestTy))
+ if (PTy->getAddressSpace() == DPTy->getAddressSpace()) {
+ SmallVector<Value*, 8> IdxList;
+ IdxList.push_back(Constant::getNullValue(Type::Int32Ty));
+ const Type *ElTy = PTy->getElementType();
+ while (ElTy != DPTy->getElementType()) {
+ if (const StructType *STy = dyn_cast<StructType>(ElTy)) {
+ if (STy->getNumElements() == 0) break;
+ ElTy = STy->getElementType(0);
+ IdxList.push_back(Constant::getNullValue(Type::Int32Ty));
+ } else if (const SequentialType *STy =
+ dyn_cast<SequentialType>(ElTy)) {
+ if (isa<PointerType>(ElTy)) break; // Can't index into pointers!
+ ElTy = STy->getElementType();
+ IdxList.push_back(IdxList[0]);
+ } else {
+ break;
+ }
+ }
+
+ if (ElTy == DPTy->getElementType())
+ return ConstantExpr::getGetElementPtr(V, &IdxList[0], IdxList.size());
+ }
+
+ // Handle casts from one vector constant to another. We know that the src
+ // and dest type have the same size (otherwise its an illegal cast).
+ if (const VectorType *DestPTy = dyn_cast<VectorType>(DestTy)) {
+ if (const VectorType *SrcTy = dyn_cast<VectorType>(V->getType())) {
+ assert(DestPTy->getBitWidth() == SrcTy->getBitWidth() &&
+ "Not cast between same sized vectors!");
+ SrcTy = NULL;
+ // First, check for null. Undef is already handled.
+ if (isa<ConstantAggregateZero>(V))
+ return Constant::getNullValue(DestTy);
+
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(V))
+ return BitCastConstantVector(CV, DestPTy);
+ }
+
+ // Canonicalize scalar-to-vector bitcasts into vector-to-vector bitcasts
+ // This allows for other simplifications (although some of them
+ // can only be handled by Analysis/ConstantFolding.cpp).
+ if (isa<ConstantInt>(V) || isa<ConstantFP>(V))
+ return ConstantExpr::getBitCast(ConstantVector::get(&V, 1), DestPTy);
+ }
+
+ // Finally, implement bitcast folding now. The code below doesn't handle
+ // bitcast right.
+ if (isa<ConstantPointerNull>(V)) // ptr->ptr cast.
+ return ConstantPointerNull::get(cast<PointerType>(DestTy));
+
+ // Handle integral constant input.
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (DestTy->isInteger())
+ // Integral -> Integral. This is a no-op because the bit widths must
+ // be the same. Consequently, we just fold to V.
+ return V;
+
+ if (DestTy->isFloatingPoint())
+ return ConstantFP::get(APFloat(CI->getValue(),
+ DestTy != Type::PPC_FP128Ty));
+
+ // Otherwise, can't fold this (vector?)
+ return 0;
+ }
+
+ // Handle ConstantFP input.
+ if (const ConstantFP *FP = dyn_cast<ConstantFP>(V))
+ // FP -> Integral.
+ return ConstantInt::get(FP->getValueAPF().bitcastToAPInt());
+
+ return 0;
+}
+
+
+Constant *llvm::ConstantFoldCastInstruction(unsigned opc, const Constant *V,
+ const Type *DestTy) {
+ if (isa<UndefValue>(V)) {
+ // zext(undef) = 0, because the top bits will be zero.
+ // sext(undef) = 0, because the top bits will all be the same.
+ // [us]itofp(undef) = 0, because the result value is bounded.
+ if (opc == Instruction::ZExt || opc == Instruction::SExt ||
+ opc == Instruction::UIToFP || opc == Instruction::SIToFP)
+ return Constant::getNullValue(DestTy);
+ return UndefValue::get(DestTy);
+ }
+ // No compile-time operations on this type yet.
+ if (V->getType() == Type::PPC_FP128Ty || DestTy == Type::PPC_FP128Ty)
+ return 0;
+
+ // If the cast operand is a constant expression, there's a few things we can
+ // do to try to simplify it.
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (CE->isCast()) {
+ // Try hard to fold cast of cast because they are often eliminable.
+ if (unsigned newOpc = foldConstantCastPair(opc, CE, DestTy))
+ return ConstantExpr::getCast(newOpc, CE->getOperand(0), DestTy);
+ } else if (CE->getOpcode() == Instruction::GetElementPtr) {
+ // If all of the indexes in the GEP are null values, there is no pointer
+ // adjustment going on. We might as well cast the source pointer.
+ bool isAllNull = true;
+ for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
+ if (!CE->getOperand(i)->isNullValue()) {
+ isAllNull = false;
+ break;
+ }
+ if (isAllNull)
+ // This is casting one pointer type to another, always BitCast
+ return ConstantExpr::getPointerCast(CE->getOperand(0), DestTy);
+ }
+ }
+
+ // We actually have to do a cast now. Perform the cast according to the
+ // opcode specified.
+ switch (opc) {
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ if (const ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
+ bool ignored;
+ APFloat Val = FPC->getValueAPF();
+ Val.convert(DestTy == Type::FloatTy ? APFloat::IEEEsingle :
+ DestTy == Type::DoubleTy ? APFloat::IEEEdouble :
+ DestTy == Type::X86_FP80Ty ? APFloat::x87DoubleExtended :
+ DestTy == Type::FP128Ty ? APFloat::IEEEquad :
+ APFloat::Bogus,
+ APFloat::rmNearestTiesToEven, &ignored);
+ return ConstantFP::get(Val);
+ }
+ return 0; // Can't fold.
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ if (const ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
+ const APFloat &V = FPC->getValueAPF();
+ bool ignored;
+ uint64_t x[2];
+ uint32_t DestBitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+ (void) V.convertToInteger(x, DestBitWidth, opc==Instruction::FPToSI,
+ APFloat::rmTowardZero, &ignored);
+ APInt Val(DestBitWidth, 2, x);
+ return ConstantInt::get(Val);
+ }
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
+ std::vector<Constant*> res;
+ const VectorType *DestVecTy = cast<VectorType>(DestTy);
+ const Type *DstEltTy = DestVecTy->getElementType();
+ for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
+ res.push_back(ConstantExpr::getCast(opc, CV->getOperand(i), DstEltTy));
+ return ConstantVector::get(DestVecTy, res);
+ }
+ return 0; // Can't fold.
+ case Instruction::IntToPtr: //always treated as unsigned
+ if (V->isNullValue()) // Is it an integral null value?
+ return ConstantPointerNull::get(cast<PointerType>(DestTy));
+ return 0; // Other pointer types cannot be casted
+ case Instruction::PtrToInt: // always treated as unsigned
+ if (V->isNullValue()) // is it a null pointer value?
+ return ConstantInt::get(DestTy, 0);
+ return 0; // Other pointer types cannot be casted
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ APInt api = CI->getValue();
+ const uint64_t zero[] = {0, 0};
+ APFloat apf = APFloat(APInt(DestTy->getPrimitiveSizeInBits(),
+ 2, zero));
+ (void)apf.convertFromAPInt(api,
+ opc==Instruction::SIToFP,
+ APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(apf);
+ }
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
+ std::vector<Constant*> res;
+ const VectorType *DestVecTy = cast<VectorType>(DestTy);
+ const Type *DstEltTy = DestVecTy->getElementType();
+ for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
+ res.push_back(ConstantExpr::getCast(opc, CV->getOperand(i), DstEltTy));
+ return ConstantVector::get(DestVecTy, res);
+ }
+ return 0;
+ case Instruction::ZExt:
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+ APInt Result(CI->getValue());
+ Result.zext(BitWidth);
+ return ConstantInt::get(Result);
+ }
+ return 0;
+ case Instruction::SExt:
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+ APInt Result(CI->getValue());
+ Result.sext(BitWidth);
+ return ConstantInt::get(Result);
+ }
+ return 0;
+ case Instruction::Trunc:
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+ APInt Result(CI->getValue());
+ Result.trunc(BitWidth);
+ return ConstantInt::get(Result);
+ }
+ return 0;
+ case Instruction::BitCast:
+ return FoldBitCast(const_cast<Constant*>(V), DestTy);
+ default:
+ assert(!"Invalid CE CastInst opcode");
+ break;
+ }
+
+ assert(0 && "Failed to cast constant expression");
+ return 0;
+}
+
+Constant *llvm::ConstantFoldSelectInstruction(const Constant *Cond,
+ const Constant *V1,
+ const Constant *V2) {
+ if (const ConstantInt *CB = dyn_cast<ConstantInt>(Cond))
+ return const_cast<Constant*>(CB->getZExtValue() ? V1 : V2);
+
+ if (isa<UndefValue>(V1)) return const_cast<Constant*>(V2);
+ if (isa<UndefValue>(V2)) return const_cast<Constant*>(V1);
+ if (isa<UndefValue>(Cond)) return const_cast<Constant*>(V1);
+ if (V1 == V2) return const_cast<Constant*>(V1);
+ return 0;
+}
+
+Constant *llvm::ConstantFoldExtractElementInstruction(const Constant *Val,
+ const Constant *Idx) {
+ if (isa<UndefValue>(Val)) // ee(undef, x) -> undef
+ return UndefValue::get(cast<VectorType>(Val->getType())->getElementType());
+ if (Val->isNullValue()) // ee(zero, x) -> zero
+ return Constant::getNullValue(
+ cast<VectorType>(Val->getType())->getElementType());
+
+ if (const ConstantVector *CVal = dyn_cast<ConstantVector>(Val)) {
+ if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx)) {
+ return CVal->getOperand(CIdx->getZExtValue());
+ } else if (isa<UndefValue>(Idx)) {
+ // ee({w,x,y,z}, undef) -> w (an arbitrary value).
+ return CVal->getOperand(0);
+ }
+ }
+ return 0;
+}
+
+Constant *llvm::ConstantFoldInsertElementInstruction(const Constant *Val,
+ const Constant *Elt,
+ const Constant *Idx) {
+ const ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx);
+ if (!CIdx) return 0;
+ APInt idxVal = CIdx->getValue();
+ if (isa<UndefValue>(Val)) {
+ // Insertion of scalar constant into vector undef
+ // Optimize away insertion of undef
+ if (isa<UndefValue>(Elt))
+ return const_cast<Constant*>(Val);
+ // Otherwise break the aggregate undef into multiple undefs and do
+ // the insertion
+ unsigned numOps =
+ cast<VectorType>(Val->getType())->getNumElements();
+ std::vector<Constant*> Ops;
+ Ops.reserve(numOps);
+ for (unsigned i = 0; i < numOps; ++i) {
+ const Constant *Op =
+ (idxVal == i) ? Elt : UndefValue::get(Elt->getType());
+ Ops.push_back(const_cast<Constant*>(Op));
+ }
+ return ConstantVector::get(Ops);
+ }
+ if (isa<ConstantAggregateZero>(Val)) {
+ // Insertion of scalar constant into vector aggregate zero
+ // Optimize away insertion of zero
+ if (Elt->isNullValue())
+ return const_cast<Constant*>(Val);
+ // Otherwise break the aggregate zero into multiple zeros and do
+ // the insertion
+ unsigned numOps =
+ cast<VectorType>(Val->getType())->getNumElements();
+ std::vector<Constant*> Ops;
+ Ops.reserve(numOps);
+ for (unsigned i = 0; i < numOps; ++i) {
+ const Constant *Op =
+ (idxVal == i) ? Elt : Constant::getNullValue(Elt->getType());
+ Ops.push_back(const_cast<Constant*>(Op));
+ }
+ return ConstantVector::get(Ops);
+ }
+ if (const ConstantVector *CVal = dyn_cast<ConstantVector>(Val)) {
+ // Insertion of scalar constant into vector constant
+ std::vector<Constant*> Ops;
+ Ops.reserve(CVal->getNumOperands());
+ for (unsigned i = 0; i < CVal->getNumOperands(); ++i) {
+ const Constant *Op =
+ (idxVal == i) ? Elt : cast<Constant>(CVal->getOperand(i));
+ Ops.push_back(const_cast<Constant*>(Op));
+ }
+ return ConstantVector::get(Ops);
+ }
+
+ return 0;
+}
+
+/// GetVectorElement - If C is a ConstantVector, ConstantAggregateZero or Undef
+/// return the specified element value. Otherwise return null.
+static Constant *GetVectorElement(const Constant *C, unsigned EltNo) {
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(C))
+ return CV->getOperand(EltNo);
+
+ const Type *EltTy = cast<VectorType>(C->getType())->getElementType();
+ if (isa<ConstantAggregateZero>(C))
+ return Constant::getNullValue(EltTy);
+ if (isa<UndefValue>(C))
+ return UndefValue::get(EltTy);
+ return 0;
+}
+
+Constant *llvm::ConstantFoldShuffleVectorInstruction(const Constant *V1,
+ const Constant *V2,
+ const Constant *Mask) {
+ // Undefined shuffle mask -> undefined value.
+ if (isa<UndefValue>(Mask)) return UndefValue::get(V1->getType());
+
+ unsigned MaskNumElts = cast<VectorType>(Mask->getType())->getNumElements();
+ unsigned SrcNumElts = cast<VectorType>(V1->getType())->getNumElements();
+ const Type *EltTy = cast<VectorType>(V1->getType())->getElementType();
+
+ // Loop over the shuffle mask, evaluating each element.
+ SmallVector<Constant*, 32> Result;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ Constant *InElt = GetVectorElement(Mask, i);
+ if (InElt == 0) return 0;
+
+ if (isa<UndefValue>(InElt))
+ InElt = UndefValue::get(EltTy);
+ else if (ConstantInt *CI = dyn_cast<ConstantInt>(InElt)) {
+ unsigned Elt = CI->getZExtValue();
+ if (Elt >= SrcNumElts*2)
+ InElt = UndefValue::get(EltTy);
+ else if (Elt >= SrcNumElts)
+ InElt = GetVectorElement(V2, Elt - SrcNumElts);
+ else
+ InElt = GetVectorElement(V1, Elt);
+ if (InElt == 0) return 0;
+ } else {
+ // Unknown value.
+ return 0;
+ }
+ Result.push_back(InElt);
+ }
+
+ return ConstantVector::get(&Result[0], Result.size());
+}
+
+Constant *llvm::ConstantFoldExtractValueInstruction(const Constant *Agg,
+ const unsigned *Idxs,
+ unsigned NumIdx) {
+ // Base case: no indices, so return the entire value.
+ if (NumIdx == 0)
+ return const_cast<Constant *>(Agg);
+
+ if (isa<UndefValue>(Agg)) // ev(undef, x) -> undef
+ return UndefValue::get(ExtractValueInst::getIndexedType(Agg->getType(),
+ Idxs,
+ Idxs + NumIdx));
+
+ if (isa<ConstantAggregateZero>(Agg)) // ev(0, x) -> 0
+ return
+ Constant::getNullValue(ExtractValueInst::getIndexedType(Agg->getType(),
+ Idxs,
+ Idxs + NumIdx));
+
+ // Otherwise recurse.
+ return ConstantFoldExtractValueInstruction(Agg->getOperand(*Idxs),
+ Idxs+1, NumIdx-1);
+}
+
+Constant *llvm::ConstantFoldInsertValueInstruction(const Constant *Agg,
+ const Constant *Val,
+ const unsigned *Idxs,
+ unsigned NumIdx) {
+ // Base case: no indices, so replace the entire value.
+ if (NumIdx == 0)
+ return const_cast<Constant *>(Val);
+
+ if (isa<UndefValue>(Agg)) {
+ // Insertion of constant into aggregate undef
+ // Optimize away insertion of undef
+ if (isa<UndefValue>(Val))
+ return const_cast<Constant*>(Agg);
+ // Otherwise break the aggregate undef into multiple undefs and do
+ // the insertion
+ const CompositeType *AggTy = cast<CompositeType>(Agg->getType());
+ unsigned numOps;
+ if (const ArrayType *AR = dyn_cast<ArrayType>(AggTy))
+ numOps = AR->getNumElements();
+ else
+ numOps = cast<StructType>(AggTy)->getNumElements();
+ std::vector<Constant*> Ops(numOps);
+ for (unsigned i = 0; i < numOps; ++i) {
+ const Type *MemberTy = AggTy->getTypeAtIndex(i);
+ const Constant *Op =
+ (*Idxs == i) ?
+ ConstantFoldInsertValueInstruction(UndefValue::get(MemberTy),
+ Val, Idxs+1, NumIdx-1) :
+ UndefValue::get(MemberTy);
+ Ops[i] = const_cast<Constant*>(Op);
+ }
+ if (isa<StructType>(AggTy))
+ return ConstantStruct::get(Ops);
+ else
+ return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
+ }
+ if (isa<ConstantAggregateZero>(Agg)) {
+ // Insertion of constant into aggregate zero
+ // Optimize away insertion of zero
+ if (Val->isNullValue())
+ return const_cast<Constant*>(Agg);
+ // Otherwise break the aggregate zero into multiple zeros and do
+ // the insertion
+ const CompositeType *AggTy = cast<CompositeType>(Agg->getType());
+ unsigned numOps;
+ if (const ArrayType *AR = dyn_cast<ArrayType>(AggTy))
+ numOps = AR->getNumElements();
+ else
+ numOps = cast<StructType>(AggTy)->getNumElements();
+ std::vector<Constant*> Ops(numOps);
+ for (unsigned i = 0; i < numOps; ++i) {
+ const Type *MemberTy = AggTy->getTypeAtIndex(i);
+ const Constant *Op =
+ (*Idxs == i) ?
+ ConstantFoldInsertValueInstruction(Constant::getNullValue(MemberTy),
+ Val, Idxs+1, NumIdx-1) :
+ Constant::getNullValue(MemberTy);
+ Ops[i] = const_cast<Constant*>(Op);
+ }
+ if (isa<StructType>(AggTy))
+ return ConstantStruct::get(Ops);
+ else
+ return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
+ }
+ if (isa<ConstantStruct>(Agg) || isa<ConstantArray>(Agg)) {
+ // Insertion of constant into aggregate constant
+ std::vector<Constant*> Ops(Agg->getNumOperands());
+ for (unsigned i = 0; i < Agg->getNumOperands(); ++i) {
+ const Constant *Op =
+ (*Idxs == i) ?
+ ConstantFoldInsertValueInstruction(Agg->getOperand(i),
+ Val, Idxs+1, NumIdx-1) :
+ Agg->getOperand(i);
+ Ops[i] = const_cast<Constant*>(Op);
+ }
+ Constant *C;
+ if (isa<StructType>(Agg->getType()))
+ C = ConstantStruct::get(Ops);
+ else
+ C = ConstantArray::get(cast<ArrayType>(Agg->getType()), Ops);
+ return C;
+ }
+
+ return 0;
+}
+
+/// EvalVectorOp - Given two vector constants and a function pointer, apply the
+/// function pointer to each element pair, producing a new ConstantVector
+/// constant. Either or both of V1 and V2 may be NULL, meaning a
+/// ConstantAggregateZero operand.
+static Constant *EvalVectorOp(const ConstantVector *V1,
+ const ConstantVector *V2,
+ const VectorType *VTy,
+ Constant *(*FP)(Constant*, Constant*)) {
+ std::vector<Constant*> Res;
+ const Type *EltTy = VTy->getElementType();
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ const Constant *C1 = V1 ? V1->getOperand(i) : Constant::getNullValue(EltTy);
+ const Constant *C2 = V2 ? V2->getOperand(i) : Constant::getNullValue(EltTy);
+ Res.push_back(FP(const_cast<Constant*>(C1),
+ const_cast<Constant*>(C2)));
+ }
+ return ConstantVector::get(Res);
+}
+
+Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
+ const Constant *C1,
+ const Constant *C2) {
+ // No compile-time operations on this type yet.
+ if (C1->getType() == Type::PPC_FP128Ty)
+ return 0;
+
+ // Handle UndefValue up front
+ if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
+ switch (Opcode) {
+ case Instruction::Xor:
+ if (isa<UndefValue>(C1) && isa<UndefValue>(C2))
+ // Handle undef ^ undef -> 0 special case. This is a common
+ // idiom (misuse).
+ return Constant::getNullValue(C1->getType());
+ // Fallthrough
+ case Instruction::Add:
+ case Instruction::Sub:
+ return UndefValue::get(C1->getType());
+ case Instruction::Mul:
+ case Instruction::And:
+ return Constant::getNullValue(C1->getType());
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ if (!isa<UndefValue>(C2)) // undef / X -> 0
+ return Constant::getNullValue(C1->getType());
+ return const_cast<Constant*>(C2); // X / undef -> undef
+ case Instruction::Or: // X | undef -> -1
+ if (const VectorType *PTy = dyn_cast<VectorType>(C1->getType()))
+ return ConstantVector::getAllOnesValue(PTy);
+ return ConstantInt::getAllOnesValue(C1->getType());
+ case Instruction::LShr:
+ if (isa<UndefValue>(C2) && isa<UndefValue>(C1))
+ return const_cast<Constant*>(C1); // undef lshr undef -> undef
+ return Constant::getNullValue(C1->getType()); // X lshr undef -> 0
+ // undef lshr X -> 0
+ case Instruction::AShr:
+ if (!isa<UndefValue>(C2))
+ return const_cast<Constant*>(C1); // undef ashr X --> undef
+ else if (isa<UndefValue>(C1))
+ return const_cast<Constant*>(C1); // undef ashr undef -> undef
+ else
+ return const_cast<Constant*>(C1); // X ashr undef --> X
+ case Instruction::Shl:
+ // undef << X -> 0 or X << undef -> 0
+ return Constant::getNullValue(C1->getType());
+ }
+ }
+
+ // Handle simplifications of the RHS when a constant int.
+ if (const ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
+ switch (Opcode) {
+ case Instruction::Add:
+ if (CI2->equalsInt(0)) return const_cast<Constant*>(C1); // X + 0 == X
+ break;
+ case Instruction::Sub:
+ if (CI2->equalsInt(0)) return const_cast<Constant*>(C1); // X - 0 == X
+ break;
+ case Instruction::Mul:
+ if (CI2->equalsInt(0)) return const_cast<Constant*>(C2); // X * 0 == 0
+ if (CI2->equalsInt(1))
+ return const_cast<Constant*>(C1); // X * 1 == X
+ break;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ if (CI2->equalsInt(1))
+ return const_cast<Constant*>(C1); // X / 1 == X
+ if (CI2->equalsInt(0))
+ return UndefValue::get(CI2->getType()); // X / 0 == undef
+ break;
+ case Instruction::URem:
+ case Instruction::SRem:
+ if (CI2->equalsInt(1))
+ return Constant::getNullValue(CI2->getType()); // X % 1 == 0
+ if (CI2->equalsInt(0))
+ return UndefValue::get(CI2->getType()); // X % 0 == undef
+ break;
+ case Instruction::And:
+ if (CI2->isZero()) return const_cast<Constant*>(C2); // X & 0 == 0
+ if (CI2->isAllOnesValue())
+ return const_cast<Constant*>(C1); // X & -1 == X
+
+ if (const ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+ // (zext i32 to i64) & 4294967295 -> (zext i32 to i64)
+ if (CE1->getOpcode() == Instruction::ZExt) {
+ unsigned DstWidth = CI2->getType()->getBitWidth();
+ unsigned SrcWidth =
+ CE1->getOperand(0)->getType()->getPrimitiveSizeInBits();
+ APInt PossiblySetBits(APInt::getLowBitsSet(DstWidth, SrcWidth));
+ if ((PossiblySetBits & CI2->getValue()) == PossiblySetBits)
+ return const_cast<Constant*>(C1);
+ }
+
+ // If and'ing the address of a global with a constant, fold it.
+ if (CE1->getOpcode() == Instruction::PtrToInt &&
+ isa<GlobalValue>(CE1->getOperand(0))) {
+ GlobalValue *GV = cast<GlobalValue>(CE1->getOperand(0));
+
+ // Functions are at least 4-byte aligned.
+ unsigned GVAlign = GV->getAlignment();
+ if (isa<Function>(GV))
+ GVAlign = std::max(GVAlign, 4U);
+
+ if (GVAlign > 1) {
+ unsigned DstWidth = CI2->getType()->getBitWidth();
+ unsigned SrcWidth = std::min(DstWidth, Log2_32(GVAlign));
+ APInt BitsNotSet(APInt::getLowBitsSet(DstWidth, SrcWidth));
+
+ // If checking bits we know are clear, return zero.
+ if ((CI2->getValue() & BitsNotSet) == CI2->getValue())
+ return Constant::getNullValue(CI2->getType());
+ }
+ }
+ }
+ break;
+ case Instruction::Or:
+ if (CI2->equalsInt(0)) return const_cast<Constant*>(C1); // X | 0 == X
+ if (CI2->isAllOnesValue())
+ return const_cast<Constant*>(C2); // X | -1 == -1
+ break;
+ case Instruction::Xor:
+ if (CI2->equalsInt(0)) return const_cast<Constant*>(C1); // X ^ 0 == X
+ break;
+ case Instruction::AShr:
+ // ashr (zext C to Ty), C2 -> lshr (zext C, CSA), C2
+ if (const ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1))
+ if (CE1->getOpcode() == Instruction::ZExt) // Top bits known zero.
+ return ConstantExpr::getLShr(const_cast<Constant*>(C1),
+ const_cast<Constant*>(C2));
+ break;
+ }
+ }
+
+ // At this point we know neither constant is an UndefValue.
+ if (const ConstantInt *CI1 = dyn_cast<ConstantInt>(C1)) {
+ if (const ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
+ using namespace APIntOps;
+ const APInt &C1V = CI1->getValue();
+ const APInt &C2V = CI2->getValue();
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::Add:
+ return ConstantInt::get(C1V + C2V);
+ case Instruction::Sub:
+ return ConstantInt::get(C1V - C2V);
+ case Instruction::Mul:
+ return ConstantInt::get(C1V * C2V);
+ case Instruction::UDiv:
+ assert(!CI2->isNullValue() && "Div by zero handled above");
+ return ConstantInt::get(C1V.udiv(C2V));
+ case Instruction::SDiv:
+ assert(!CI2->isNullValue() && "Div by zero handled above");
+ if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
+ return UndefValue::get(CI1->getType()); // MIN_INT / -1 -> undef
+ return ConstantInt::get(C1V.sdiv(C2V));
+ case Instruction::URem:
+ assert(!CI2->isNullValue() && "Div by zero handled above");
+ return ConstantInt::get(C1V.urem(C2V));
+ case Instruction::SRem:
+ assert(!CI2->isNullValue() && "Div by zero handled above");
+ if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
+ return UndefValue::get(CI1->getType()); // MIN_INT % -1 -> undef
+ return ConstantInt::get(C1V.srem(C2V));
+ case Instruction::And:
+ return ConstantInt::get(C1V & C2V);
+ case Instruction::Or:
+ return ConstantInt::get(C1V | C2V);
+ case Instruction::Xor:
+ return ConstantInt::get(C1V ^ C2V);
+ case Instruction::Shl: {
+ uint32_t shiftAmt = C2V.getZExtValue();
+ if (shiftAmt < C1V.getBitWidth())
+ return ConstantInt::get(C1V.shl(shiftAmt));
+ else
+ return UndefValue::get(C1->getType()); // too big shift is undef
+ }
+ case Instruction::LShr: {
+ uint32_t shiftAmt = C2V.getZExtValue();
+ if (shiftAmt < C1V.getBitWidth())
+ return ConstantInt::get(C1V.lshr(shiftAmt));
+ else
+ return UndefValue::get(C1->getType()); // too big shift is undef
+ }
+ case Instruction::AShr: {
+ uint32_t shiftAmt = C2V.getZExtValue();
+ if (shiftAmt < C1V.getBitWidth())
+ return ConstantInt::get(C1V.ashr(shiftAmt));
+ else
+ return UndefValue::get(C1->getType()); // too big shift is undef
+ }
+ }
+ }
+ } else if (const ConstantFP *CFP1 = dyn_cast<ConstantFP>(C1)) {
+ if (const ConstantFP *CFP2 = dyn_cast<ConstantFP>(C2)) {
+ APFloat C1V = CFP1->getValueAPF();
+ APFloat C2V = CFP2->getValueAPF();
+ APFloat C3V = C1V; // copy for modification
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::Add:
+ (void)C3V.add(C2V, APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(C3V);
+ case Instruction::Sub:
+ (void)C3V.subtract(C2V, APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(C3V);
+ case Instruction::Mul:
+ (void)C3V.multiply(C2V, APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(C3V);
+ case Instruction::FDiv:
+ (void)C3V.divide(C2V, APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(C3V);
+ case Instruction::FRem:
+ (void)C3V.mod(C2V, APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(C3V);
+ }
+ }
+ } else if (const VectorType *VTy = dyn_cast<VectorType>(C1->getType())) {
+ const ConstantVector *CP1 = dyn_cast<ConstantVector>(C1);
+ const ConstantVector *CP2 = dyn_cast<ConstantVector>(C2);
+ if ((CP1 != NULL || isa<ConstantAggregateZero>(C1)) &&
+ (CP2 != NULL || isa<ConstantAggregateZero>(C2))) {
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::Add:
+ return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getAdd);
+ case Instruction::Sub:
+ return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getSub);
+ case Instruction::Mul:
+ return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getMul);
+ case Instruction::UDiv:
+ return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getUDiv);
+ case Instruction::SDiv:
+ return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getSDiv);
+ case Instruction::FDiv:
+ return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getFDiv);
+ case Instruction::URem:
+ return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getURem);
+ case Instruction::SRem:
+ return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getSRem);
+ case Instruction::FRem:
+ return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getFRem);
+ case Instruction::And:
+ return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getAnd);
+ case Instruction::Or:
+ return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getOr);
+ case Instruction::Xor:
+ return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getXor);
+ case Instruction::LShr:
+ return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getLShr);
+ case Instruction::AShr:
+ return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getAShr);
+ case Instruction::Shl:
+ return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getShl);
+ }
+ }
+ }
+
+ if (isa<ConstantExpr>(C1)) {
+ // There are many possible foldings we could do here. We should probably
+ // at least fold add of a pointer with an integer into the appropriate
+ // getelementptr. This will improve alias analysis a bit.
+ } else if (isa<ConstantExpr>(C2)) {
+ // If C2 is a constant expr and C1 isn't, flop them around and fold the
+ // other way if possible.
+ switch (Opcode) {
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // No change of opcode required.
+ return ConstantFoldBinaryInstruction(Opcode, C2, C1);
+
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::Sub:
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ default: // These instructions cannot be flopped around.
+ break;
+ }
+ }
+
+ // We don't know how to fold this.
+ return 0;
+}
+
+/// isZeroSizedType - This type is zero sized if its an array or structure of
+/// zero sized types. The only leaf zero sized type is an empty structure.
+static bool isMaybeZeroSizedType(const Type *Ty) {
+ if (isa<OpaqueType>(Ty)) return true; // Can't say.
+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+
+ // If all of elements have zero size, this does too.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ if (!isMaybeZeroSizedType(STy->getElementType(i))) return false;
+ return true;
+
+ } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ return isMaybeZeroSizedType(ATy->getElementType());
+ }
+ return false;
+}
+
+/// IdxCompare - Compare the two constants as though they were getelementptr
+/// indices. This allows coersion of the types to be the same thing.
+///
+/// If the two constants are the "same" (after coersion), return 0. If the
+/// first is less than the second, return -1, if the second is less than the
+/// first, return 1. If the constants are not integral, return -2.
+///
+static int IdxCompare(Constant *C1, Constant *C2, const Type *ElTy) {
+ if (C1 == C2) return 0;
+
+ // Ok, we found a different index. If they are not ConstantInt, we can't do
+ // anything with them.
+ if (!isa<ConstantInt>(C1) || !isa<ConstantInt>(C2))
+ return -2; // don't know!
+
+ // Ok, we have two differing integer indices. Sign extend them to be the same
+ // type. Long is always big enough, so we use it.
+ if (C1->getType() != Type::Int64Ty)
+ C1 = ConstantExpr::getSExt(C1, Type::Int64Ty);
+
+ if (C2->getType() != Type::Int64Ty)
+ C2 = ConstantExpr::getSExt(C2, Type::Int64Ty);
+
+ if (C1 == C2) return 0; // They are equal
+
+ // If the type being indexed over is really just a zero sized type, there is
+ // no pointer difference being made here.
+ if (isMaybeZeroSizedType(ElTy))
+ return -2; // dunno.
+
+ // If they are really different, now that they are the same type, then we
+ // found a difference!
+ if (cast<ConstantInt>(C1)->getSExtValue() <
+ cast<ConstantInt>(C2)->getSExtValue())
+ return -1;
+ else
+ return 1;
+}
+
+/// evaluateFCmpRelation - This function determines if there is anything we can
+/// decide about the two constants provided. This doesn't need to handle simple
+/// things like ConstantFP comparisons, but should instead handle ConstantExprs.
+/// If we can determine that the two constants have a particular relation to
+/// each other, we should return the corresponding FCmpInst predicate,
+/// otherwise return FCmpInst::BAD_FCMP_PREDICATE. This is used below in
+/// ConstantFoldCompareInstruction.
+///
+/// To simplify this code we canonicalize the relation so that the first
+/// operand is always the most "complex" of the two. We consider ConstantFP
+/// to be the simplest, and ConstantExprs to be the most complex.
+static FCmpInst::Predicate evaluateFCmpRelation(const Constant *V1,
+ const Constant *V2) {
+ assert(V1->getType() == V2->getType() &&
+ "Cannot compare values of different types!");
+
+ // No compile-time operations on this type yet.
+ if (V1->getType() == Type::PPC_FP128Ty)
+ return FCmpInst::BAD_FCMP_PREDICATE;
+
+ // Handle degenerate case quickly
+ if (V1 == V2) return FCmpInst::FCMP_OEQ;
+
+ if (!isa<ConstantExpr>(V1)) {
+ if (!isa<ConstantExpr>(V2)) {
+ // We distilled thisUse the standard constant folder for a few cases
+ ConstantInt *R = 0;
+ Constant *C1 = const_cast<Constant*>(V1);
+ Constant *C2 = const_cast<Constant*>(V2);
+ R = dyn_cast<ConstantInt>(
+ ConstantExpr::getFCmp(FCmpInst::FCMP_OEQ, C1, C2));
+ if (R && !R->isZero())
+ return FCmpInst::FCMP_OEQ;
+ R = dyn_cast<ConstantInt>(
+ ConstantExpr::getFCmp(FCmpInst::FCMP_OLT, C1, C2));
+ if (R && !R->isZero())
+ return FCmpInst::FCMP_OLT;
+ R = dyn_cast<ConstantInt>(
+ ConstantExpr::getFCmp(FCmpInst::FCMP_OGT, C1, C2));
+ if (R && !R->isZero())
+ return FCmpInst::FCMP_OGT;
+
+ // Nothing more we can do
+ return FCmpInst::BAD_FCMP_PREDICATE;
+ }
+
+ // If the first operand is simple and second is ConstantExpr, swap operands.
+ FCmpInst::Predicate SwappedRelation = evaluateFCmpRelation(V2, V1);
+ if (SwappedRelation != FCmpInst::BAD_FCMP_PREDICATE)
+ return FCmpInst::getSwappedPredicate(SwappedRelation);
+ } else {
+ // Ok, the LHS is known to be a constantexpr. The RHS can be any of a
+ // constantexpr or a simple constant.
+ const ConstantExpr *CE1 = cast<ConstantExpr>(V1);
+ switch (CE1->getOpcode()) {
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ // We might be able to do something with these but we don't right now.
+ break;
+ default:
+ break;
+ }
+ }
+ // There are MANY other foldings that we could perform here. They will
+ // probably be added on demand, as they seem needed.
+ return FCmpInst::BAD_FCMP_PREDICATE;
+}
+
+/// evaluateICmpRelation - This function determines if there is anything we can
+/// decide about the two constants provided. This doesn't need to handle simple
+/// things like integer comparisons, but should instead handle ConstantExprs
+/// and GlobalValues. If we can determine that the two constants have a
+/// particular relation to each other, we should return the corresponding ICmp
+/// predicate, otherwise return ICmpInst::BAD_ICMP_PREDICATE.
+///
+/// To simplify this code we canonicalize the relation so that the first
+/// operand is always the most "complex" of the two. We consider simple
+/// constants (like ConstantInt) to be the simplest, followed by
+/// GlobalValues, followed by ConstantExpr's (the most complex).
+///
+static ICmpInst::Predicate evaluateICmpRelation(const Constant *V1,
+ const Constant *V2,
+ bool isSigned) {
+ assert(V1->getType() == V2->getType() &&
+ "Cannot compare different types of values!");
+ if (V1 == V2) return ICmpInst::ICMP_EQ;
+
+ if (!isa<ConstantExpr>(V1) && !isa<GlobalValue>(V1)) {
+ if (!isa<GlobalValue>(V2) && !isa<ConstantExpr>(V2)) {
+ // We distilled this down to a simple case, use the standard constant
+ // folder.
+ ConstantInt *R = 0;
+ Constant *C1 = const_cast<Constant*>(V1);
+ Constant *C2 = const_cast<Constant*>(V2);
+ ICmpInst::Predicate pred = ICmpInst::ICMP_EQ;
+ R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, C1, C2));
+ if (R && !R->isZero())
+ return pred;
+ pred = isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+ R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, C1, C2));
+ if (R && !R->isZero())
+ return pred;
+ pred = isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, C1, C2));
+ if (R && !R->isZero())
+ return pred;
+
+ // If we couldn't figure it out, bail.
+ return ICmpInst::BAD_ICMP_PREDICATE;
+ }
+
+ // If the first operand is simple, swap operands.
+ ICmpInst::Predicate SwappedRelation =
+ evaluateICmpRelation(V2, V1, isSigned);
+ if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
+ return ICmpInst::getSwappedPredicate(SwappedRelation);
+
+ } else if (const GlobalValue *CPR1 = dyn_cast<GlobalValue>(V1)) {
+ if (isa<ConstantExpr>(V2)) { // Swap as necessary.
+ ICmpInst::Predicate SwappedRelation =
+ evaluateICmpRelation(V2, V1, isSigned);
+ if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
+ return ICmpInst::getSwappedPredicate(SwappedRelation);
+ else
+ return ICmpInst::BAD_ICMP_PREDICATE;
+ }
+
+ // Now we know that the RHS is a GlobalValue or simple constant,
+ // which (since the types must match) means that it's a ConstantPointerNull.
+ if (const GlobalValue *CPR2 = dyn_cast<GlobalValue>(V2)) {
+ // Don't try to decide equality of aliases.
+ if (!isa<GlobalAlias>(CPR1) && !isa<GlobalAlias>(CPR2))
+ if (!CPR1->hasExternalWeakLinkage() || !CPR2->hasExternalWeakLinkage())
+ return ICmpInst::ICMP_NE;
+ } else {
+ assert(isa<ConstantPointerNull>(V2) && "Canonicalization guarantee!");
+ // GlobalVals can never be null. Don't try to evaluate aliases.
+ if (!CPR1->hasExternalWeakLinkage() && !isa<GlobalAlias>(CPR1))
+ return ICmpInst::ICMP_NE;
+ }
+ } else {
+ // Ok, the LHS is known to be a constantexpr. The RHS can be any of a
+ // constantexpr, a CPR, or a simple constant.
+ const ConstantExpr *CE1 = cast<ConstantExpr>(V1);
+ const Constant *CE1Op0 = CE1->getOperand(0);
+
+ switch (CE1->getOpcode()) {
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ break; // We can't evaluate floating point casts or truncations.
+
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::BitCast:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // If the cast is not actually changing bits, and the second operand is a
+ // null pointer, do the comparison with the pre-casted value.
+ if (V2->isNullValue() &&
+ (isa<PointerType>(CE1->getType()) || CE1->getType()->isInteger())) {
+ bool sgnd = isSigned;
+ if (CE1->getOpcode() == Instruction::ZExt) isSigned = false;
+ if (CE1->getOpcode() == Instruction::SExt) isSigned = true;
+ return evaluateICmpRelation(CE1Op0,
+ Constant::getNullValue(CE1Op0->getType()),
+ sgnd);
+ }
+
+ // If the dest type is a pointer type, and the RHS is a constantexpr cast
+ // from the same type as the src of the LHS, evaluate the inputs. This is
+ // important for things like "icmp eq (cast 4 to int*), (cast 5 to int*)",
+ // which happens a lot in compilers with tagged integers.
+ if (const ConstantExpr *CE2 = dyn_cast<ConstantExpr>(V2))
+ if (CE2->isCast() && isa<PointerType>(CE1->getType()) &&
+ CE1->getOperand(0)->getType() == CE2->getOperand(0)->getType() &&
+ CE1->getOperand(0)->getType()->isInteger()) {
+ bool sgnd = isSigned;
+ if (CE1->getOpcode() == Instruction::ZExt) isSigned = false;
+ if (CE1->getOpcode() == Instruction::SExt) isSigned = true;
+ return evaluateICmpRelation(CE1->getOperand(0), CE2->getOperand(0),
+ sgnd);
+ }
+ break;
+
+ case Instruction::GetElementPtr:
+ // Ok, since this is a getelementptr, we know that the constant has a
+ // pointer type. Check the various cases.
+ if (isa<ConstantPointerNull>(V2)) {
+ // If we are comparing a GEP to a null pointer, check to see if the base
+ // of the GEP equals the null pointer.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(CE1Op0)) {
+ if (GV->hasExternalWeakLinkage())
+ // Weak linkage GVals could be zero or not. We're comparing that
+ // to null pointer so its greater-or-equal
+ return isSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
+ else
+ // If its not weak linkage, the GVal must have a non-zero address
+ // so the result is greater-than
+ return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ } else if (isa<ConstantPointerNull>(CE1Op0)) {
+ // If we are indexing from a null pointer, check to see if we have any
+ // non-zero indices.
+ for (unsigned i = 1, e = CE1->getNumOperands(); i != e; ++i)
+ if (!CE1->getOperand(i)->isNullValue())
+ // Offsetting from null, must not be equal.
+ return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ // Only zero indexes from null, must still be zero.
+ return ICmpInst::ICMP_EQ;
+ }
+ // Otherwise, we can't really say if the first operand is null or not.
+ } else if (const GlobalValue *CPR2 = dyn_cast<GlobalValue>(V2)) {
+ if (isa<ConstantPointerNull>(CE1Op0)) {
+ if (CPR2->hasExternalWeakLinkage())
+ // Weak linkage GVals could be zero or not. We're comparing it to
+ // a null pointer, so its less-or-equal
+ return isSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+ else
+ // If its not weak linkage, the GVal must have a non-zero address
+ // so the result is less-than
+ return isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+ } else if (const GlobalValue *CPR1 = dyn_cast<GlobalValue>(CE1Op0)) {
+ if (CPR1 == CPR2) {
+ // If this is a getelementptr of the same global, then it must be
+ // different. Because the types must match, the getelementptr could
+ // only have at most one index, and because we fold getelementptr's
+ // with a single zero index, it must be nonzero.
+ assert(CE1->getNumOperands() == 2 &&
+ !CE1->getOperand(1)->isNullValue() &&
+ "Suprising getelementptr!");
+ return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ } else {
+ // If they are different globals, we don't know what the value is,
+ // but they can't be equal.
+ return ICmpInst::ICMP_NE;
+ }
+ }
+ } else {
+ const ConstantExpr *CE2 = cast<ConstantExpr>(V2);
+ const Constant *CE2Op0 = CE2->getOperand(0);
+
+ // There are MANY other foldings that we could perform here. They will
+ // probably be added on demand, as they seem needed.
+ switch (CE2->getOpcode()) {
+ default: break;
+ case Instruction::GetElementPtr:
+ // By far the most common case to handle is when the base pointers are
+ // obviously to the same or different globals.
+ if (isa<GlobalValue>(CE1Op0) && isa<GlobalValue>(CE2Op0)) {
+ if (CE1Op0 != CE2Op0) // Don't know relative ordering, but not equal
+ return ICmpInst::ICMP_NE;
+ // Ok, we know that both getelementptr instructions are based on the
+ // same global. From this, we can precisely determine the relative
+ // ordering of the resultant pointers.
+ unsigned i = 1;
+
+ // Compare all of the operands the GEP's have in common.
+ gep_type_iterator GTI = gep_type_begin(CE1);
+ for (;i != CE1->getNumOperands() && i != CE2->getNumOperands();
+ ++i, ++GTI)
+ switch (IdxCompare(CE1->getOperand(i), CE2->getOperand(i),
+ GTI.getIndexedType())) {
+ case -1: return isSigned ? ICmpInst::ICMP_SLT:ICmpInst::ICMP_ULT;
+ case 1: return isSigned ? ICmpInst::ICMP_SGT:ICmpInst::ICMP_UGT;
+ case -2: return ICmpInst::BAD_ICMP_PREDICATE;
+ }
+
+ // Ok, we ran out of things they have in common. If any leftovers
+ // are non-zero then we have a difference, otherwise we are equal.
+ for (; i < CE1->getNumOperands(); ++i)
+ if (!CE1->getOperand(i)->isNullValue()) {
+ if (isa<ConstantInt>(CE1->getOperand(i)))
+ return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ else
+ return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal.
+ }
+
+ for (; i < CE2->getNumOperands(); ++i)
+ if (!CE2->getOperand(i)->isNullValue()) {
+ if (isa<ConstantInt>(CE2->getOperand(i)))
+ return isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+ else
+ return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal.
+ }
+ return ICmpInst::ICMP_EQ;
+ }
+ }
+ }
+ default:
+ break;
+ }
+ }
+
+ return ICmpInst::BAD_ICMP_PREDICATE;
+}
+
+Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
+ const Constant *C1,
+ const Constant *C2) {
+ // Fold FCMP_FALSE/FCMP_TRUE unconditionally.
+ if (pred == FCmpInst::FCMP_FALSE) {
+ if (const VectorType *VT = dyn_cast<VectorType>(C1->getType()))
+ return Constant::getNullValue(VectorType::getInteger(VT));
+ else
+ return ConstantInt::getFalse();
+ }
+
+ if (pred == FCmpInst::FCMP_TRUE) {
+ if (const VectorType *VT = dyn_cast<VectorType>(C1->getType()))
+ return Constant::getAllOnesValue(VectorType::getInteger(VT));
+ else
+ return ConstantInt::getTrue();
+ }
+
+ // Handle some degenerate cases first
+ if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
+ // vicmp/vfcmp -> [vector] undef
+ if (const VectorType *VTy = dyn_cast<VectorType>(C1->getType()))
+ return UndefValue::get(VectorType::getInteger(VTy));
+
+ // icmp/fcmp -> i1 undef
+ return UndefValue::get(Type::Int1Ty);
+ }
+
+ // No compile-time operations on this type yet.
+ if (C1->getType() == Type::PPC_FP128Ty)
+ return 0;
+
+ // icmp eq/ne(null,GV) -> false/true
+ if (C1->isNullValue()) {
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C2))
+ // Don't try to evaluate aliases. External weak GV can be null.
+ if (!isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage()) {
+ if (pred == ICmpInst::ICMP_EQ)
+ return ConstantInt::getFalse();
+ else if (pred == ICmpInst::ICMP_NE)
+ return ConstantInt::getTrue();
+ }
+ // icmp eq/ne(GV,null) -> false/true
+ } else if (C2->isNullValue()) {
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C1))
+ // Don't try to evaluate aliases. External weak GV can be null.
+ if (!isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage()) {
+ if (pred == ICmpInst::ICMP_EQ)
+ return ConstantInt::getFalse();
+ else if (pred == ICmpInst::ICMP_NE)
+ return ConstantInt::getTrue();
+ }
+ }
+
+ if (isa<ConstantInt>(C1) && isa<ConstantInt>(C2)) {
+ APInt V1 = cast<ConstantInt>(C1)->getValue();
+ APInt V2 = cast<ConstantInt>(C2)->getValue();
+ switch (pred) {
+ default: assert(0 && "Invalid ICmp Predicate"); return 0;
+ case ICmpInst::ICMP_EQ: return ConstantInt::get(Type::Int1Ty, V1 == V2);
+ case ICmpInst::ICMP_NE: return ConstantInt::get(Type::Int1Ty, V1 != V2);
+ case ICmpInst::ICMP_SLT:return ConstantInt::get(Type::Int1Ty, V1.slt(V2));
+ case ICmpInst::ICMP_SGT:return ConstantInt::get(Type::Int1Ty, V1.sgt(V2));
+ case ICmpInst::ICMP_SLE:return ConstantInt::get(Type::Int1Ty, V1.sle(V2));
+ case ICmpInst::ICMP_SGE:return ConstantInt::get(Type::Int1Ty, V1.sge(V2));
+ case ICmpInst::ICMP_ULT:return ConstantInt::get(Type::Int1Ty, V1.ult(V2));
+ case ICmpInst::ICMP_UGT:return ConstantInt::get(Type::Int1Ty, V1.ugt(V2));
+ case ICmpInst::ICMP_ULE:return ConstantInt::get(Type::Int1Ty, V1.ule(V2));
+ case ICmpInst::ICMP_UGE:return ConstantInt::get(Type::Int1Ty, V1.uge(V2));
+ }
+ } else if (isa<ConstantFP>(C1) && isa<ConstantFP>(C2)) {
+ APFloat C1V = cast<ConstantFP>(C1)->getValueAPF();
+ APFloat C2V = cast<ConstantFP>(C2)->getValueAPF();
+ APFloat::cmpResult R = C1V.compare(C2V);
+ switch (pred) {
+ default: assert(0 && "Invalid FCmp Predicate"); return 0;
+ case FCmpInst::FCMP_FALSE: return ConstantInt::getFalse();
+ case FCmpInst::FCMP_TRUE: return ConstantInt::getTrue();
+ case FCmpInst::FCMP_UNO:
+ return ConstantInt::get(Type::Int1Ty, R==APFloat::cmpUnordered);
+ case FCmpInst::FCMP_ORD:
+ return ConstantInt::get(Type::Int1Ty, R!=APFloat::cmpUnordered);
+ case FCmpInst::FCMP_UEQ:
+ return ConstantInt::get(Type::Int1Ty, R==APFloat::cmpUnordered ||
+ R==APFloat::cmpEqual);
+ case FCmpInst::FCMP_OEQ:
+ return ConstantInt::get(Type::Int1Ty, R==APFloat::cmpEqual);
+ case FCmpInst::FCMP_UNE:
+ return ConstantInt::get(Type::Int1Ty, R!=APFloat::cmpEqual);
+ case FCmpInst::FCMP_ONE:
+ return ConstantInt::get(Type::Int1Ty, R==APFloat::cmpLessThan ||
+ R==APFloat::cmpGreaterThan);
+ case FCmpInst::FCMP_ULT:
+ return ConstantInt::get(Type::Int1Ty, R==APFloat::cmpUnordered ||
+ R==APFloat::cmpLessThan);
+ case FCmpInst::FCMP_OLT:
+ return ConstantInt::get(Type::Int1Ty, R==APFloat::cmpLessThan);
+ case FCmpInst::FCMP_UGT:
+ return ConstantInt::get(Type::Int1Ty, R==APFloat::cmpUnordered ||
+ R==APFloat::cmpGreaterThan);
+ case FCmpInst::FCMP_OGT:
+ return ConstantInt::get(Type::Int1Ty, R==APFloat::cmpGreaterThan);
+ case FCmpInst::FCMP_ULE:
+ return ConstantInt::get(Type::Int1Ty, R!=APFloat::cmpGreaterThan);
+ case FCmpInst::FCMP_OLE:
+ return ConstantInt::get(Type::Int1Ty, R==APFloat::cmpLessThan ||
+ R==APFloat::cmpEqual);
+ case FCmpInst::FCMP_UGE:
+ return ConstantInt::get(Type::Int1Ty, R!=APFloat::cmpLessThan);
+ case FCmpInst::FCMP_OGE:
+ return ConstantInt::get(Type::Int1Ty, R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpEqual);
+ }
+ } else if (isa<VectorType>(C1->getType())) {
+ SmallVector<Constant*, 16> C1Elts, C2Elts;
+ C1->getVectorElements(C1Elts);
+ C2->getVectorElements(C2Elts);
+
+ // If we can constant fold the comparison of each element, constant fold
+ // the whole vector comparison.
+ SmallVector<Constant*, 4> ResElts;
+ const Type *InEltTy = C1Elts[0]->getType();
+ bool isFP = InEltTy->isFloatingPoint();
+ const Type *ResEltTy = InEltTy;
+ if (isFP)
+ ResEltTy = IntegerType::get(InEltTy->getPrimitiveSizeInBits());
+
+ for (unsigned i = 0, e = C1Elts.size(); i != e; ++i) {
+ // Compare the elements, producing an i1 result or constant expr.
+ Constant *C;
+ if (isFP)
+ C = ConstantExpr::getFCmp(pred, C1Elts[i], C2Elts[i]);
+ else
+ C = ConstantExpr::getICmp(pred, C1Elts[i], C2Elts[i]);
+
+ // If it is a bool or undef result, convert to the dest type.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+ if (CI->isZero())
+ ResElts.push_back(Constant::getNullValue(ResEltTy));
+ else
+ ResElts.push_back(Constant::getAllOnesValue(ResEltTy));
+ } else if (isa<UndefValue>(C)) {
+ ResElts.push_back(UndefValue::get(ResEltTy));
+ } else {
+ break;
+ }
+ }
+
+ if (ResElts.size() == C1Elts.size())
+ return ConstantVector::get(&ResElts[0], ResElts.size());
+ }
+
+ if (C1->getType()->isFloatingPoint()) {
+ int Result = -1; // -1 = unknown, 0 = known false, 1 = known true.
+ switch (evaluateFCmpRelation(C1, C2)) {
+ default: assert(0 && "Unknown relation!");
+ case FCmpInst::FCMP_UNO:
+ case FCmpInst::FCMP_ORD:
+ case FCmpInst::FCMP_UEQ:
+ case FCmpInst::FCMP_UNE:
+ case FCmpInst::FCMP_ULT:
+ case FCmpInst::FCMP_UGT:
+ case FCmpInst::FCMP_ULE:
+ case FCmpInst::FCMP_UGE:
+ case FCmpInst::FCMP_TRUE:
+ case FCmpInst::FCMP_FALSE:
+ case FCmpInst::BAD_FCMP_PREDICATE:
+ break; // Couldn't determine anything about these constants.
+ case FCmpInst::FCMP_OEQ: // We know that C1 == C2
+ Result = (pred == FCmpInst::FCMP_UEQ || pred == FCmpInst::FCMP_OEQ ||
+ pred == FCmpInst::FCMP_ULE || pred == FCmpInst::FCMP_OLE ||
+ pred == FCmpInst::FCMP_UGE || pred == FCmpInst::FCMP_OGE);
+ break;
+ case FCmpInst::FCMP_OLT: // We know that C1 < C2
+ Result = (pred == FCmpInst::FCMP_UNE || pred == FCmpInst::FCMP_ONE ||
+ pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT ||
+ pred == FCmpInst::FCMP_ULE || pred == FCmpInst::FCMP_OLE);
+ break;
+ case FCmpInst::FCMP_OGT: // We know that C1 > C2
+ Result = (pred == FCmpInst::FCMP_UNE || pred == FCmpInst::FCMP_ONE ||
+ pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT ||
+ pred == FCmpInst::FCMP_UGE || pred == FCmpInst::FCMP_OGE);
+ break;
+ case FCmpInst::FCMP_OLE: // We know that C1 <= C2
+ // We can only partially decide this relation.
+ if (pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT)
+ Result = 0;
+ else if (pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT)
+ Result = 1;
+ break;
+ case FCmpInst::FCMP_OGE: // We known that C1 >= C2
+ // We can only partially decide this relation.
+ if (pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT)
+ Result = 0;
+ else if (pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT)
+ Result = 1;
+ break;
+ case ICmpInst::ICMP_NE: // We know that C1 != C2
+ // We can only partially decide this relation.
+ if (pred == FCmpInst::FCMP_OEQ || pred == FCmpInst::FCMP_UEQ)
+ Result = 0;
+ else if (pred == FCmpInst::FCMP_ONE || pred == FCmpInst::FCMP_UNE)
+ Result = 1;
+ break;
+ }
+
+ // If we evaluated the result, return it now.
+ if (Result != -1) {
+ if (const VectorType *VT = dyn_cast<VectorType>(C1->getType())) {
+ if (Result == 0)
+ return Constant::getNullValue(VectorType::getInteger(VT));
+ else
+ return Constant::getAllOnesValue(VectorType::getInteger(VT));
+ }
+ return ConstantInt::get(Type::Int1Ty, Result);
+ }
+
+ } else {
+ // Evaluate the relation between the two constants, per the predicate.
+ int Result = -1; // -1 = unknown, 0 = known false, 1 = known true.
+ switch (evaluateICmpRelation(C1, C2, CmpInst::isSigned(pred))) {
+ default: assert(0 && "Unknown relational!");
+ case ICmpInst::BAD_ICMP_PREDICATE:
+ break; // Couldn't determine anything about these constants.
+ case ICmpInst::ICMP_EQ: // We know the constants are equal!
+ // If we know the constants are equal, we can decide the result of this
+ // computation precisely.
+ Result = (pred == ICmpInst::ICMP_EQ ||
+ pred == ICmpInst::ICMP_ULE ||
+ pred == ICmpInst::ICMP_SLE ||
+ pred == ICmpInst::ICMP_UGE ||
+ pred == ICmpInst::ICMP_SGE);
+ break;
+ case ICmpInst::ICMP_ULT:
+ // If we know that C1 < C2, we can decide the result of this computation
+ // precisely.
+ Result = (pred == ICmpInst::ICMP_ULT ||
+ pred == ICmpInst::ICMP_NE ||
+ pred == ICmpInst::ICMP_ULE);
+ break;
+ case ICmpInst::ICMP_SLT:
+ // If we know that C1 < C2, we can decide the result of this computation
+ // precisely.
+ Result = (pred == ICmpInst::ICMP_SLT ||
+ pred == ICmpInst::ICMP_NE ||
+ pred == ICmpInst::ICMP_SLE);
+ break;
+ case ICmpInst::ICMP_UGT:
+ // If we know that C1 > C2, we can decide the result of this computation
+ // precisely.
+ Result = (pred == ICmpInst::ICMP_UGT ||
+ pred == ICmpInst::ICMP_NE ||
+ pred == ICmpInst::ICMP_UGE);
+ break;
+ case ICmpInst::ICMP_SGT:
+ // If we know that C1 > C2, we can decide the result of this computation
+ // precisely.
+ Result = (pred == ICmpInst::ICMP_SGT ||
+ pred == ICmpInst::ICMP_NE ||
+ pred == ICmpInst::ICMP_SGE);
+ break;
+ case ICmpInst::ICMP_ULE:
+ // If we know that C1 <= C2, we can only partially decide this relation.
+ if (pred == ICmpInst::ICMP_UGT) Result = 0;
+ if (pred == ICmpInst::ICMP_ULT) Result = 1;
+ break;
+ case ICmpInst::ICMP_SLE:
+ // If we know that C1 <= C2, we can only partially decide this relation.
+ if (pred == ICmpInst::ICMP_SGT) Result = 0;
+ if (pred == ICmpInst::ICMP_SLT) Result = 1;
+ break;
+
+ case ICmpInst::ICMP_UGE:
+ // If we know that C1 >= C2, we can only partially decide this relation.
+ if (pred == ICmpInst::ICMP_ULT) Result = 0;
+ if (pred == ICmpInst::ICMP_UGT) Result = 1;
+ break;
+ case ICmpInst::ICMP_SGE:
+ // If we know that C1 >= C2, we can only partially decide this relation.
+ if (pred == ICmpInst::ICMP_SLT) Result = 0;
+ if (pred == ICmpInst::ICMP_SGT) Result = 1;
+ break;
+
+ case ICmpInst::ICMP_NE:
+ // If we know that C1 != C2, we can only partially decide this relation.
+ if (pred == ICmpInst::ICMP_EQ) Result = 0;
+ if (pred == ICmpInst::ICMP_NE) Result = 1;
+ break;
+ }
+
+ // If we evaluated the result, return it now.
+ if (Result != -1) {
+ if (const VectorType *VT = dyn_cast<VectorType>(C1->getType())) {
+ if (Result == 0)
+ return Constant::getNullValue(VT);
+ else
+ return Constant::getAllOnesValue(VT);
+ }
+ return ConstantInt::get(Type::Int1Ty, Result);
+ }
+
+ if (!isa<ConstantExpr>(C1) && isa<ConstantExpr>(C2)) {
+ // If C2 is a constant expr and C1 isn't, flop them around and fold the
+ // other way if possible.
+ switch (pred) {
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_NE:
+ // No change of predicate required.
+ return ConstantFoldCompareInstruction(pred, C2, C1);
+
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_ULE:
+ case ICmpInst::ICMP_SLE:
+ case ICmpInst::ICMP_UGE:
+ case ICmpInst::ICMP_SGE:
+ // Change the predicate as necessary to swap the operands.
+ pred = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)pred);
+ return ConstantFoldCompareInstruction(pred, C2, C1);
+
+ default: // These predicates cannot be flopped around.
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+Constant *llvm::ConstantFoldGetElementPtr(const Constant *C,
+ Constant* const *Idxs,
+ unsigned NumIdx) {
+ if (NumIdx == 0 ||
+ (NumIdx == 1 && Idxs[0]->isNullValue()))
+ return const_cast<Constant*>(C);
+
+ if (isa<UndefValue>(C)) {
+ const PointerType *Ptr = cast<PointerType>(C->getType());
+ const Type *Ty = GetElementPtrInst::getIndexedType(Ptr,
+ (Value **)Idxs,
+ (Value **)Idxs+NumIdx);
+ assert(Ty != 0 && "Invalid indices for GEP!");
+ return UndefValue::get(PointerType::get(Ty, Ptr->getAddressSpace()));
+ }
+
+ Constant *Idx0 = Idxs[0];
+ if (C->isNullValue()) {
+ bool isNull = true;
+ for (unsigned i = 0, e = NumIdx; i != e; ++i)
+ if (!Idxs[i]->isNullValue()) {
+ isNull = false;
+ break;
+ }
+ if (isNull) {
+ const PointerType *Ptr = cast<PointerType>(C->getType());
+ const Type *Ty = GetElementPtrInst::getIndexedType(Ptr,
+ (Value**)Idxs,
+ (Value**)Idxs+NumIdx);
+ assert(Ty != 0 && "Invalid indices for GEP!");
+ return
+ ConstantPointerNull::get(PointerType::get(Ty,Ptr->getAddressSpace()));
+ }
+ }
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(const_cast<Constant*>(C))) {
+ // Combine Indices - If the source pointer to this getelementptr instruction
+ // is a getelementptr instruction, combine the indices of the two
+ // getelementptr instructions into a single instruction.
+ //
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ const Type *LastTy = 0;
+ for (gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE);
+ I != E; ++I)
+ LastTy = *I;
+
+ if ((LastTy && isa<ArrayType>(LastTy)) || Idx0->isNullValue()) {
+ SmallVector<Value*, 16> NewIndices;
+ NewIndices.reserve(NumIdx + CE->getNumOperands());
+ for (unsigned i = 1, e = CE->getNumOperands()-1; i != e; ++i)
+ NewIndices.push_back(CE->getOperand(i));
+
+ // Add the last index of the source with the first index of the new GEP.
+ // Make sure to handle the case when they are actually different types.
+ Constant *Combined = CE->getOperand(CE->getNumOperands()-1);
+ // Otherwise it must be an array.
+ if (!Idx0->isNullValue()) {
+ const Type *IdxTy = Combined->getType();
+ if (IdxTy != Idx0->getType()) {
+ Constant *C1 = ConstantExpr::getSExtOrBitCast(Idx0, Type::Int64Ty);
+ Constant *C2 = ConstantExpr::getSExtOrBitCast(Combined,
+ Type::Int64Ty);
+ Combined = ConstantExpr::get(Instruction::Add, C1, C2);
+ } else {
+ Combined =
+ ConstantExpr::get(Instruction::Add, Idx0, Combined);
+ }
+ }
+
+ NewIndices.push_back(Combined);
+ NewIndices.insert(NewIndices.end(), Idxs+1, Idxs+NumIdx);
+ return ConstantExpr::getGetElementPtr(CE->getOperand(0), &NewIndices[0],
+ NewIndices.size());
+ }
+ }
+
+ // Implement folding of:
+ // int* getelementptr ([2 x int]* cast ([3 x int]* %X to [2 x int]*),
+ // long 0, long 0)
+ // To: int* getelementptr ([3 x int]* %X, long 0, long 0)
+ //
+ if (CE->isCast() && NumIdx > 1 && Idx0->isNullValue()) {
+ if (const PointerType *SPT =
+ dyn_cast<PointerType>(CE->getOperand(0)->getType()))
+ if (const ArrayType *SAT = dyn_cast<ArrayType>(SPT->getElementType()))
+ if (const ArrayType *CAT =
+ dyn_cast<ArrayType>(cast<PointerType>(C->getType())->getElementType()))
+ if (CAT->getElementType() == SAT->getElementType())
+ return ConstantExpr::getGetElementPtr(
+ (Constant*)CE->getOperand(0), Idxs, NumIdx);
+ }
+
+ // Fold: getelementptr (i8* inttoptr (i64 1 to i8*), i32 -1)
+ // Into: inttoptr (i64 0 to i8*)
+ // This happens with pointers to member functions in C++.
+ if (CE->getOpcode() == Instruction::IntToPtr && NumIdx == 1 &&
+ isa<ConstantInt>(CE->getOperand(0)) && isa<ConstantInt>(Idxs[0]) &&
+ cast<PointerType>(CE->getType())->getElementType() == Type::Int8Ty) {
+ Constant *Base = CE->getOperand(0);
+ Constant *Offset = Idxs[0];
+
+ // Convert the smaller integer to the larger type.
+ if (Offset->getType()->getPrimitiveSizeInBits() <
+ Base->getType()->getPrimitiveSizeInBits())
+ Offset = ConstantExpr::getSExt(Offset, Base->getType());
+ else if (Base->getType()->getPrimitiveSizeInBits() <
+ Offset->getType()->getPrimitiveSizeInBits())
+ Base = ConstantExpr::getZExt(Base, Offset->getType());
+
+ Base = ConstantExpr::getAdd(Base, Offset);
+ return ConstantExpr::getIntToPtr(Base, CE->getType());
+ }
+ }
+ return 0;
+}
+
diff --git a/lib/VMCore/ConstantFold.h b/lib/VMCore/ConstantFold.h
new file mode 100644
index 0000000..49aea11
--- /dev/null
+++ b/lib/VMCore/ConstantFold.h
@@ -0,0 +1,60 @@
+//===-- ConstantFolding.h - Internal Constant Folding Interface -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the (internal) constant folding interfaces for LLVM. These
+// interfaces are used by the ConstantExpr::get* methods to automatically fold
+// constants when possible.
+//
+// These operators may return a null object if they don't know how to perform
+// the specified operation on the specified constant types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CONSTANTFOLDING_H
+#define CONSTANTFOLDING_H
+
+namespace llvm {
+ class Value;
+ class Constant;
+ class Type;
+
+ // Constant fold various types of instruction...
+ Constant *ConstantFoldCastInstruction(
+ unsigned opcode, ///< The opcode of the cast
+ const Constant *V, ///< The source constant
+ const Type *DestTy ///< The destination type
+ );
+ Constant *ConstantFoldSelectInstruction(const Constant *Cond,
+ const Constant *V1,
+ const Constant *V2);
+ Constant *ConstantFoldExtractElementInstruction(const Constant *Val,
+ const Constant *Idx);
+ Constant *ConstantFoldInsertElementInstruction(const Constant *Val,
+ const Constant *Elt,
+ const Constant *Idx);
+ Constant *ConstantFoldShuffleVectorInstruction(const Constant *V1,
+ const Constant *V2,
+ const Constant *Mask);
+ Constant *ConstantFoldExtractValueInstruction(const Constant *Agg,
+ const unsigned *Idxs,
+ unsigned NumIdx);
+ Constant *ConstantFoldInsertValueInstruction(const Constant *Agg,
+ const Constant *Val,
+ const unsigned* Idxs,
+ unsigned NumIdx);
+ Constant *ConstantFoldBinaryInstruction(unsigned Opcode, const Constant *V1,
+ const Constant *V2);
+ Constant *ConstantFoldCompareInstruction(unsigned short predicate,
+ const Constant *C1,
+ const Constant *C2);
+ Constant *ConstantFoldGetElementPtr(const Constant *C,
+ Constant* const *Idxs, unsigned NumIdx);
+} // End llvm namespace
+
+#endif
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
new file mode 100644
index 0000000..97f3ac9
--- /dev/null
+++ b/lib/VMCore/Constants.cpp
@@ -0,0 +1,2832 @@
+//===-- Constants.cpp - Implement Constant nodes --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Constant* classes...
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "ConstantFold.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/MDNode.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include <algorithm>
+#include <map>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Constant Class
+//===----------------------------------------------------------------------===//
+
+void Constant::destroyConstantImpl() {
+ // When a Constant is destroyed, there may be lingering
+ // references to the constant by other constants in the constant pool. These
+ // constants are implicitly dependent on the module that is being deleted,
+ // but they don't know that. Because we only find out when the CPV is
+ // deleted, we must now notify all of our users (that should only be
+ // Constants) that they are, in fact, invalid now and should be deleted.
+ //
+ while (!use_empty()) {
+ Value *V = use_back();
+#ifndef NDEBUG // Only in -g mode...
+ if (!isa<Constant>(V))
+ DOUT << "While deleting: " << *this
+ << "\n\nUse still stuck around after Def is destroyed: "
+ << *V << "\n\n";
+#endif
+ assert(isa<Constant>(V) && "References remain to Constant being destroyed");
+ Constant *CV = cast<Constant>(V);
+ CV->destroyConstant();
+
+ // The constant should remove itself from our use list...
+ assert((use_empty() || use_back() != V) && "Constant not removed!");
+ }
+
+ // Value has no outstanding references it is safe to delete it now...
+ delete this;
+}
+
+/// canTrap - Return true if evaluation of this constant could trap. This is
+/// true for things like constant expressions that could divide by zero.
+bool Constant::canTrap() const {
+ assert(getType()->isFirstClassType() && "Cannot evaluate aggregate vals!");
+ // The only thing that could possibly trap are constant exprs.
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(this);
+ if (!CE) return false;
+
+ // ConstantExpr traps if any operands can trap.
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (getOperand(i)->canTrap())
+ return true;
+
+ // Otherwise, only specific operations can trap.
+ switch (CE->getOpcode()) {
+ default:
+ return false;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ // Div and rem can trap if the RHS is not known to be non-zero.
+ if (!isa<ConstantInt>(getOperand(1)) || getOperand(1)->isNullValue())
+ return true;
+ return false;
+ }
+}
+
+/// ContainsRelocations - Return true if the constant value contains relocations
+/// which cannot be resolved at compile time. Kind argument is used to filter
+/// only 'interesting' sorts of relocations.
+bool Constant::ContainsRelocations(unsigned Kind) const {
+ if (const GlobalValue* GV = dyn_cast<GlobalValue>(this)) {
+ bool isLocal = GV->hasLocalLinkage();
+ if ((Kind & Reloc::Local) && isLocal) {
+ // Global has local linkage and 'local' kind of relocations are
+ // requested
+ return true;
+ }
+
+ if ((Kind & Reloc::Global) && !isLocal) {
+ // Global has non-local linkage and 'global' kind of relocations are
+ // requested
+ return true;
+ }
+
+ return false;
+ }
+
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (getOperand(i)->ContainsRelocations(Kind))
+ return true;
+
+ return false;
+}
+
+// Static constructor to create a '0' constant of arbitrary type...
+Constant *Constant::getNullValue(const Type *Ty) {
+ static uint64_t zero[2] = {0, 0};
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID:
+ return ConstantInt::get(Ty, 0);
+ case Type::FloatTyID:
+ return ConstantFP::get(APFloat(APInt(32, 0)));
+ case Type::DoubleTyID:
+ return ConstantFP::get(APFloat(APInt(64, 0)));
+ case Type::X86_FP80TyID:
+ return ConstantFP::get(APFloat(APInt(80, 2, zero)));
+ case Type::FP128TyID:
+ return ConstantFP::get(APFloat(APInt(128, 2, zero), true));
+ case Type::PPC_FP128TyID:
+ return ConstantFP::get(APFloat(APInt(128, 2, zero)));
+ case Type::PointerTyID:
+ return ConstantPointerNull::get(cast<PointerType>(Ty));
+ case Type::StructTyID:
+ case Type::ArrayTyID:
+ case Type::VectorTyID:
+ return ConstantAggregateZero::get(Ty);
+ default:
+ // Function, Label, or Opaque type?
+ assert(!"Cannot create a null constant of that type!");
+ return 0;
+ }
+}
+
+Constant *Constant::getAllOnesValue(const Type *Ty) {
+ if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty))
+ return ConstantInt::get(APInt::getAllOnesValue(ITy->getBitWidth()));
+ return ConstantVector::getAllOnesValue(cast<VectorType>(Ty));
+}
+
+// Static constructor to create an integral constant with all bits set
+ConstantInt *ConstantInt::getAllOnesValue(const Type *Ty) {
+ if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty))
+ return ConstantInt::get(APInt::getAllOnesValue(ITy->getBitWidth()));
+ return 0;
+}
+
+/// @returns the value for a vector integer constant of the given type that
+/// has all its bits set to true.
+/// @brief Get the all ones value
+ConstantVector *ConstantVector::getAllOnesValue(const VectorType *Ty) {
+ std::vector<Constant*> Elts;
+ Elts.resize(Ty->getNumElements(),
+ ConstantInt::getAllOnesValue(Ty->getElementType()));
+ assert(Elts[0] && "Not a vector integer type!");
+ return cast<ConstantVector>(ConstantVector::get(Elts));
+}
+
+
+/// getVectorElements - This method, which is only valid on constant of vector
+/// type, returns the elements of the vector in the specified smallvector.
+/// This handles breaking down a vector undef into undef elements, etc. For
+/// constant exprs and other cases we can't handle, we return an empty vector.
+void Constant::getVectorElements(SmallVectorImpl<Constant*> &Elts) const {
+ assert(isa<VectorType>(getType()) && "Not a vector constant!");
+
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(this)) {
+ for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i)
+ Elts.push_back(CV->getOperand(i));
+ return;
+ }
+
+ const VectorType *VT = cast<VectorType>(getType());
+ if (isa<ConstantAggregateZero>(this)) {
+ Elts.assign(VT->getNumElements(),
+ Constant::getNullValue(VT->getElementType()));
+ return;
+ }
+
+ if (isa<UndefValue>(this)) {
+ Elts.assign(VT->getNumElements(), UndefValue::get(VT->getElementType()));
+ return;
+ }
+
+ // Unknown type, must be constant expr etc.
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// ConstantInt
+//===----------------------------------------------------------------------===//
+
+ConstantInt::ConstantInt(const IntegerType *Ty, const APInt& V)
+ : Constant(Ty, ConstantIntVal, 0, 0), Val(V) {
+ assert(V.getBitWidth() == Ty->getBitWidth() && "Invalid constant for type");
+}
+
+ConstantInt *ConstantInt::TheTrueVal = 0;
+ConstantInt *ConstantInt::TheFalseVal = 0;
+
+namespace llvm {
+ void CleanupTrueFalse(void *) {
+ ConstantInt::ResetTrueFalse();
+ }
+}
+
+static ManagedCleanup<llvm::CleanupTrueFalse> TrueFalseCleanup;
+
+ConstantInt *ConstantInt::CreateTrueFalseVals(bool WhichOne) {
+ assert(TheTrueVal == 0 && TheFalseVal == 0);
+ TheTrueVal = get(Type::Int1Ty, 1);
+ TheFalseVal = get(Type::Int1Ty, 0);
+
+ // Ensure that llvm_shutdown nulls out TheTrueVal/TheFalseVal.
+ TrueFalseCleanup.Register();
+
+ return WhichOne ? TheTrueVal : TheFalseVal;
+}
+
+
+namespace {
+ struct DenseMapAPIntKeyInfo {
+ struct KeyTy {
+ APInt val;
+ const Type* type;
+ KeyTy(const APInt& V, const Type* Ty) : val(V), type(Ty) {}
+ KeyTy(const KeyTy& that) : val(that.val), type(that.type) {}
+ bool operator==(const KeyTy& that) const {
+ return type == that.type && this->val == that.val;
+ }
+ bool operator!=(const KeyTy& that) const {
+ return !this->operator==(that);
+ }
+ };
+ static inline KeyTy getEmptyKey() { return KeyTy(APInt(1,0), 0); }
+ static inline KeyTy getTombstoneKey() { return KeyTy(APInt(1,1), 0); }
+ static unsigned getHashValue(const KeyTy &Key) {
+ return DenseMapInfo<void*>::getHashValue(Key.type) ^
+ Key.val.getHashValue();
+ }
+ static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
+ return LHS == RHS;
+ }
+ static bool isPod() { return false; }
+ };
+}
+
+
+typedef DenseMap<DenseMapAPIntKeyInfo::KeyTy, ConstantInt*,
+ DenseMapAPIntKeyInfo> IntMapTy;
+static ManagedStatic<IntMapTy> IntConstants;
+
+ConstantInt *ConstantInt::get(const Type *Ty, uint64_t V, bool isSigned) {
+ const IntegerType *ITy = cast<IntegerType>(Ty);
+ return get(APInt(ITy->getBitWidth(), V, isSigned));
+}
+
+// Get a ConstantInt from an APInt. Note that the value stored in the DenseMap
+// as the key, is a DenseMapAPIntKeyInfo::KeyTy which has provided the
+// operator== and operator!= to ensure that the DenseMap doesn't attempt to
+// compare APInt's of different widths, which would violate an APInt class
+// invariant which generates an assertion.
+ConstantInt *ConstantInt::get(const APInt& V) {
+ // Get the corresponding integer type for the bit width of the value.
+ const IntegerType *ITy = IntegerType::get(V.getBitWidth());
+ // get an existing value or the insertion position
+ DenseMapAPIntKeyInfo::KeyTy Key(V, ITy);
+ ConstantInt *&Slot = (*IntConstants)[Key];
+ // if it exists, return it.
+ if (Slot)
+ return Slot;
+ // otherwise create a new one, insert it, and return it.
+ return Slot = new ConstantInt(ITy, V);
+}
+
+//===----------------------------------------------------------------------===//
+// ConstantFP
+//===----------------------------------------------------------------------===//
+
+static const fltSemantics *TypeToFloatSemantics(const Type *Ty) {
+ if (Ty == Type::FloatTy)
+ return &APFloat::IEEEsingle;
+ if (Ty == Type::DoubleTy)
+ return &APFloat::IEEEdouble;
+ if (Ty == Type::X86_FP80Ty)
+ return &APFloat::x87DoubleExtended;
+ else if (Ty == Type::FP128Ty)
+ return &APFloat::IEEEquad;
+
+ assert(Ty == Type::PPC_FP128Ty && "Unknown FP format");
+ return &APFloat::PPCDoubleDouble;
+}
+
+ConstantFP::ConstantFP(const Type *Ty, const APFloat& V)
+ : Constant(Ty, ConstantFPVal, 0, 0), Val(V) {
+ assert(&V.getSemantics() == TypeToFloatSemantics(Ty) &&
+ "FP type Mismatch");
+}
+
+bool ConstantFP::isNullValue() const {
+ return Val.isZero() && !Val.isNegative();
+}
+
+ConstantFP *ConstantFP::getNegativeZero(const Type *Ty) {
+ APFloat apf = cast <ConstantFP>(Constant::getNullValue(Ty))->getValueAPF();
+ apf.changeSign();
+ return ConstantFP::get(apf);
+}
+
+bool ConstantFP::isExactlyValue(const APFloat& V) const {
+ return Val.bitwiseIsEqual(V);
+}
+
+namespace {
+ struct DenseMapAPFloatKeyInfo {
+ struct KeyTy {
+ APFloat val;
+ KeyTy(const APFloat& V) : val(V){}
+ KeyTy(const KeyTy& that) : val(that.val) {}
+ bool operator==(const KeyTy& that) const {
+ return this->val.bitwiseIsEqual(that.val);
+ }
+ bool operator!=(const KeyTy& that) const {
+ return !this->operator==(that);
+ }
+ };
+ static inline KeyTy getEmptyKey() {
+ return KeyTy(APFloat(APFloat::Bogus,1));
+ }
+ static inline KeyTy getTombstoneKey() {
+ return KeyTy(APFloat(APFloat::Bogus,2));
+ }
+ static unsigned getHashValue(const KeyTy &Key) {
+ return Key.val.getHashValue();
+ }
+ static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
+ return LHS == RHS;
+ }
+ static bool isPod() { return false; }
+ };
+}
+
+//---- ConstantFP::get() implementation...
+//
+typedef DenseMap<DenseMapAPFloatKeyInfo::KeyTy, ConstantFP*,
+ DenseMapAPFloatKeyInfo> FPMapTy;
+
+static ManagedStatic<FPMapTy> FPConstants;
+
+ConstantFP *ConstantFP::get(const APFloat &V) {
+ DenseMapAPFloatKeyInfo::KeyTy Key(V);
+ ConstantFP *&Slot = (*FPConstants)[Key];
+ if (Slot) return Slot;
+
+ const Type *Ty;
+ if (&V.getSemantics() == &APFloat::IEEEsingle)
+ Ty = Type::FloatTy;
+ else if (&V.getSemantics() == &APFloat::IEEEdouble)
+ Ty = Type::DoubleTy;
+ else if (&V.getSemantics() == &APFloat::x87DoubleExtended)
+ Ty = Type::X86_FP80Ty;
+ else if (&V.getSemantics() == &APFloat::IEEEquad)
+ Ty = Type::FP128Ty;
+ else {
+ assert(&V.getSemantics() == &APFloat::PPCDoubleDouble&&"Unknown FP format");
+ Ty = Type::PPC_FP128Ty;
+ }
+
+ return Slot = new ConstantFP(Ty, V);
+}
+
+/// get() - This returns a constant fp for the specified value in the
+/// specified type. This should only be used for simple constant values like
+/// 2.0/1.0 etc, that are known-valid both as double and as the target format.
+ConstantFP *ConstantFP::get(const Type *Ty, double V) {
+ APFloat FV(V);
+ bool ignored;
+ FV.convert(*TypeToFloatSemantics(Ty), APFloat::rmNearestTiesToEven, &ignored);
+ return get(FV);
+}
+
+//===----------------------------------------------------------------------===//
+// ConstantXXX Classes
+//===----------------------------------------------------------------------===//
+
+
+ConstantArray::ConstantArray(const ArrayType *T,
+ const std::vector<Constant*> &V)
+ : Constant(T, ConstantArrayVal,
+ OperandTraits<ConstantArray>::op_end(this) - V.size(),
+ V.size()) {
+ assert(V.size() == T->getNumElements() &&
+ "Invalid initializer vector for constant array");
+ Use *OL = OperandList;
+ for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
+ I != E; ++I, ++OL) {
+ Constant *C = *I;
+ assert((C->getType() == T->getElementType() ||
+ (T->isAbstract() &&
+ C->getType()->getTypeID() == T->getElementType()->getTypeID())) &&
+ "Initializer for array element doesn't match array element type!");
+ *OL = C;
+ }
+}
+
+
+ConstantStruct::ConstantStruct(const StructType *T,
+ const std::vector<Constant*> &V)
+ : Constant(T, ConstantStructVal,
+ OperandTraits<ConstantStruct>::op_end(this) - V.size(),
+ V.size()) {
+ assert(V.size() == T->getNumElements() &&
+ "Invalid initializer vector for constant structure");
+ Use *OL = OperandList;
+ for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
+ I != E; ++I, ++OL) {
+ Constant *C = *I;
+ assert((C->getType() == T->getElementType(I-V.begin()) ||
+ ((T->getElementType(I-V.begin())->isAbstract() ||
+ C->getType()->isAbstract()) &&
+ T->getElementType(I-V.begin())->getTypeID() ==
+ C->getType()->getTypeID())) &&
+ "Initializer for struct element doesn't match struct element type!");
+ *OL = C;
+ }
+}
+
+
+ConstantVector::ConstantVector(const VectorType *T,
+ const std::vector<Constant*> &V)
+ : Constant(T, ConstantVectorVal,
+ OperandTraits<ConstantVector>::op_end(this) - V.size(),
+ V.size()) {
+ Use *OL = OperandList;
+ for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
+ I != E; ++I, ++OL) {
+ Constant *C = *I;
+ assert((C->getType() == T->getElementType() ||
+ (T->isAbstract() &&
+ C->getType()->getTypeID() == T->getElementType()->getTypeID())) &&
+ "Initializer for vector element doesn't match vector element type!");
+ *OL = C;
+ }
+}
+
+
+namespace llvm {
+// We declare several classes private to this file, so use an anonymous
+// namespace
+namespace {
+
+/// UnaryConstantExpr - This class is private to Constants.cpp, and is used
+/// behind the scenes to implement unary constant exprs.
+class VISIBILITY_HIDDEN UnaryConstantExpr : public ConstantExpr {
+ void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
+public:
+ // allocate space for exactly one operand
+ void *operator new(size_t s) {
+ return User::operator new(s, 1);
+ }
+ UnaryConstantExpr(unsigned Opcode, Constant *C, const Type *Ty)
+ : ConstantExpr(Ty, Opcode, &Op<0>(), 1) {
+ Op<0>() = C;
+ }
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// BinaryConstantExpr - This class is private to Constants.cpp, and is used
+/// behind the scenes to implement binary constant exprs.
+class VISIBILITY_HIDDEN BinaryConstantExpr : public ConstantExpr {
+ void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
+public:
+ // allocate space for exactly two operands
+ void *operator new(size_t s) {
+ return User::operator new(s, 2);
+ }
+ BinaryConstantExpr(unsigned Opcode, Constant *C1, Constant *C2)
+ : ConstantExpr(C1->getType(), Opcode, &Op<0>(), 2) {
+ Op<0>() = C1;
+ Op<1>() = C2;
+ }
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// SelectConstantExpr - This class is private to Constants.cpp, and is used
+/// behind the scenes to implement select constant exprs.
+class VISIBILITY_HIDDEN SelectConstantExpr : public ConstantExpr {
+ void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
+public:
+ // allocate space for exactly three operands
+ void *operator new(size_t s) {
+ return User::operator new(s, 3);
+ }
+ SelectConstantExpr(Constant *C1, Constant *C2, Constant *C3)
+ : ConstantExpr(C2->getType(), Instruction::Select, &Op<0>(), 3) {
+ Op<0>() = C1;
+ Op<1>() = C2;
+ Op<2>() = C3;
+ }
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// ExtractElementConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// extractelement constant exprs.
+class VISIBILITY_HIDDEN ExtractElementConstantExpr : public ConstantExpr {
+ void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
+public:
+ // allocate space for exactly two operands
+ void *operator new(size_t s) {
+ return User::operator new(s, 2);
+ }
+ ExtractElementConstantExpr(Constant *C1, Constant *C2)
+ : ConstantExpr(cast<VectorType>(C1->getType())->getElementType(),
+ Instruction::ExtractElement, &Op<0>(), 2) {
+ Op<0>() = C1;
+ Op<1>() = C2;
+ }
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// InsertElementConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// insertelement constant exprs.
+class VISIBILITY_HIDDEN InsertElementConstantExpr : public ConstantExpr {
+ void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
+public:
+ // allocate space for exactly three operands
+ void *operator new(size_t s) {
+ return User::operator new(s, 3);
+ }
+ InsertElementConstantExpr(Constant *C1, Constant *C2, Constant *C3)
+ : ConstantExpr(C1->getType(), Instruction::InsertElement,
+ &Op<0>(), 3) {
+ Op<0>() = C1;
+ Op<1>() = C2;
+ Op<2>() = C3;
+ }
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// ShuffleVectorConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// shufflevector constant exprs.
+class VISIBILITY_HIDDEN ShuffleVectorConstantExpr : public ConstantExpr {
+ void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
+public:
+ // allocate space for exactly three operands
+ void *operator new(size_t s) {
+ return User::operator new(s, 3);
+ }
+ ShuffleVectorConstantExpr(Constant *C1, Constant *C2, Constant *C3)
+ : ConstantExpr(VectorType::get(
+ cast<VectorType>(C1->getType())->getElementType(),
+ cast<VectorType>(C3->getType())->getNumElements()),
+ Instruction::ShuffleVector,
+ &Op<0>(), 3) {
+ Op<0>() = C1;
+ Op<1>() = C2;
+ Op<2>() = C3;
+ }
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// ExtractValueConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// extractvalue constant exprs.
+class VISIBILITY_HIDDEN ExtractValueConstantExpr : public ConstantExpr {
+ void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
+public:
+ // allocate space for exactly one operand
+ void *operator new(size_t s) {
+ return User::operator new(s, 1);
+ }
+ ExtractValueConstantExpr(Constant *Agg,
+ const SmallVector<unsigned, 4> &IdxList,
+ const Type *DestTy)
+ : ConstantExpr(DestTy, Instruction::ExtractValue, &Op<0>(), 1),
+ Indices(IdxList) {
+ Op<0>() = Agg;
+ }
+
+ /// Indices - These identify which value to extract.
+ const SmallVector<unsigned, 4> Indices;
+
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// InsertValueConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// insertvalue constant exprs.
+class VISIBILITY_HIDDEN InsertValueConstantExpr : public ConstantExpr {
+ void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
+public:
+ // allocate space for exactly one operand
+ void *operator new(size_t s) {
+ return User::operator new(s, 2);
+ }
+ InsertValueConstantExpr(Constant *Agg, Constant *Val,
+ const SmallVector<unsigned, 4> &IdxList,
+ const Type *DestTy)
+ : ConstantExpr(DestTy, Instruction::InsertValue, &Op<0>(), 2),
+ Indices(IdxList) {
+ Op<0>() = Agg;
+ Op<1>() = Val;
+ }
+
+ /// Indices - These identify the position for the insertion.
+ const SmallVector<unsigned, 4> Indices;
+
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+
+/// GetElementPtrConstantExpr - This class is private to Constants.cpp, and is
+/// used behind the scenes to implement getelementpr constant exprs.
+class VISIBILITY_HIDDEN GetElementPtrConstantExpr : public ConstantExpr {
+ GetElementPtrConstantExpr(Constant *C, const std::vector<Constant*> &IdxList,
+ const Type *DestTy);
+public:
+ static GetElementPtrConstantExpr *Create(Constant *C,
+ const std::vector<Constant*>&IdxList,
+ const Type *DestTy) {
+ return new(IdxList.size() + 1)
+ GetElementPtrConstantExpr(C, IdxList, DestTy);
+ }
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+// CompareConstantExpr - This class is private to Constants.cpp, and is used
+// behind the scenes to implement ICmp and FCmp constant expressions. This is
+// needed in order to store the predicate value for these instructions.
+struct VISIBILITY_HIDDEN CompareConstantExpr : public ConstantExpr {
+ void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
+ // allocate space for exactly two operands
+ void *operator new(size_t s) {
+ return User::operator new(s, 2);
+ }
+ unsigned short predicate;
+ CompareConstantExpr(const Type *ty, Instruction::OtherOps opc,
+ unsigned short pred, Constant* LHS, Constant* RHS)
+ : ConstantExpr(ty, opc, &Op<0>(), 2), predicate(pred) {
+ Op<0>() = LHS;
+ Op<1>() = RHS;
+ }
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+} // end anonymous namespace
+
+template <>
+struct OperandTraits<UnaryConstantExpr> : FixedNumOperandTraits<1> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryConstantExpr, Value)
+
+template <>
+struct OperandTraits<BinaryConstantExpr> : FixedNumOperandTraits<2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryConstantExpr, Value)
+
+template <>
+struct OperandTraits<SelectConstantExpr> : FixedNumOperandTraits<3> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectConstantExpr, Value)
+
+template <>
+struct OperandTraits<ExtractElementConstantExpr> : FixedNumOperandTraits<2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementConstantExpr, Value)
+
+template <>
+struct OperandTraits<InsertElementConstantExpr> : FixedNumOperandTraits<3> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementConstantExpr, Value)
+
+template <>
+struct OperandTraits<ShuffleVectorConstantExpr> : FixedNumOperandTraits<3> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorConstantExpr, Value)
+
+template <>
+struct OperandTraits<ExtractValueConstantExpr> : FixedNumOperandTraits<1> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractValueConstantExpr, Value)
+
+template <>
+struct OperandTraits<InsertValueConstantExpr> : FixedNumOperandTraits<2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueConstantExpr, Value)
+
+template <>
+struct OperandTraits<GetElementPtrConstantExpr> : VariadicOperandTraits<1> {
+};
+
+GetElementPtrConstantExpr::GetElementPtrConstantExpr
+ (Constant *C,
+ const std::vector<Constant*> &IdxList,
+ const Type *DestTy)
+ : ConstantExpr(DestTy, Instruction::GetElementPtr,
+ OperandTraits<GetElementPtrConstantExpr>::op_end(this)
+ - (IdxList.size()+1),
+ IdxList.size()+1) {
+ OperandList[0] = C;
+ for (unsigned i = 0, E = IdxList.size(); i != E; ++i)
+ OperandList[i+1] = IdxList[i];
+}
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrConstantExpr, Value)
+
+
+template <>
+struct OperandTraits<CompareConstantExpr> : FixedNumOperandTraits<2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CompareConstantExpr, Value)
+
+
+} // End llvm namespace
+
+
+// Utility function for determining if a ConstantExpr is a CastOp or not. This
+// can't be inline because we don't want to #include Instruction.h into
+// Constant.h
+bool ConstantExpr::isCast() const {
+ return Instruction::isCast(getOpcode());
+}
+
+bool ConstantExpr::isCompare() const {
+ return getOpcode() == Instruction::ICmp || getOpcode() == Instruction::FCmp ||
+ getOpcode() == Instruction::VICmp || getOpcode() == Instruction::VFCmp;
+}
+
+bool ConstantExpr::hasIndices() const {
+ return getOpcode() == Instruction::ExtractValue ||
+ getOpcode() == Instruction::InsertValue;
+}
+
+const SmallVector<unsigned, 4> &ConstantExpr::getIndices() const {
+ if (const ExtractValueConstantExpr *EVCE =
+ dyn_cast<ExtractValueConstantExpr>(this))
+ return EVCE->Indices;
+
+ return cast<InsertValueConstantExpr>(this)->Indices;
+}
+
+/// ConstantExpr::get* - Return some common constants without having to
+/// specify the full Instruction::OPCODE identifier.
+///
+Constant *ConstantExpr::getNeg(Constant *C) {
+ return get(Instruction::Sub,
+ ConstantExpr::getZeroValueForNegationExpr(C->getType()),
+ C);
+}
+Constant *ConstantExpr::getNot(Constant *C) {
+ assert((isa<IntegerType>(C->getType()) ||
+ cast<VectorType>(C->getType())->getElementType()->isInteger()) &&
+ "Cannot NOT a nonintegral value!");
+ return get(Instruction::Xor, C,
+ Constant::getAllOnesValue(C->getType()));
+}
+Constant *ConstantExpr::getAdd(Constant *C1, Constant *C2) {
+ return get(Instruction::Add, C1, C2);
+}
+Constant *ConstantExpr::getSub(Constant *C1, Constant *C2) {
+ return get(Instruction::Sub, C1, C2);
+}
+Constant *ConstantExpr::getMul(Constant *C1, Constant *C2) {
+ return get(Instruction::Mul, C1, C2);
+}
+Constant *ConstantExpr::getUDiv(Constant *C1, Constant *C2) {
+ return get(Instruction::UDiv, C1, C2);
+}
+Constant *ConstantExpr::getSDiv(Constant *C1, Constant *C2) {
+ return get(Instruction::SDiv, C1, C2);
+}
+Constant *ConstantExpr::getFDiv(Constant *C1, Constant *C2) {
+ return get(Instruction::FDiv, C1, C2);
+}
+Constant *ConstantExpr::getURem(Constant *C1, Constant *C2) {
+ return get(Instruction::URem, C1, C2);
+}
+Constant *ConstantExpr::getSRem(Constant *C1, Constant *C2) {
+ return get(Instruction::SRem, C1, C2);
+}
+Constant *ConstantExpr::getFRem(Constant *C1, Constant *C2) {
+ return get(Instruction::FRem, C1, C2);
+}
+Constant *ConstantExpr::getAnd(Constant *C1, Constant *C2) {
+ return get(Instruction::And, C1, C2);
+}
+Constant *ConstantExpr::getOr(Constant *C1, Constant *C2) {
+ return get(Instruction::Or, C1, C2);
+}
+Constant *ConstantExpr::getXor(Constant *C1, Constant *C2) {
+ return get(Instruction::Xor, C1, C2);
+}
+unsigned ConstantExpr::getPredicate() const {
+ assert(getOpcode() == Instruction::FCmp ||
+ getOpcode() == Instruction::ICmp ||
+ getOpcode() == Instruction::VFCmp ||
+ getOpcode() == Instruction::VICmp);
+ return ((const CompareConstantExpr*)this)->predicate;
+}
+Constant *ConstantExpr::getShl(Constant *C1, Constant *C2) {
+ return get(Instruction::Shl, C1, C2);
+}
+Constant *ConstantExpr::getLShr(Constant *C1, Constant *C2) {
+ return get(Instruction::LShr, C1, C2);
+}
+Constant *ConstantExpr::getAShr(Constant *C1, Constant *C2) {
+ return get(Instruction::AShr, C1, C2);
+}
+
+/// getWithOperandReplaced - Return a constant expression identical to this
+/// one, but with the specified operand set to the specified value.
+Constant *
+ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
+ assert(OpNo < getNumOperands() && "Operand num is out of range!");
+ assert(Op->getType() == getOperand(OpNo)->getType() &&
+ "Replacing operand with value of different type!");
+ if (getOperand(OpNo) == Op)
+ return const_cast<ConstantExpr*>(this);
+
+ Constant *Op0, *Op1, *Op2;
+ switch (getOpcode()) {
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ return ConstantExpr::getCast(getOpcode(), Op, getType());
+ case Instruction::Select:
+ Op0 = (OpNo == 0) ? Op : getOperand(0);
+ Op1 = (OpNo == 1) ? Op : getOperand(1);
+ Op2 = (OpNo == 2) ? Op : getOperand(2);
+ return ConstantExpr::getSelect(Op0, Op1, Op2);
+ case Instruction::InsertElement:
+ Op0 = (OpNo == 0) ? Op : getOperand(0);
+ Op1 = (OpNo == 1) ? Op : getOperand(1);
+ Op2 = (OpNo == 2) ? Op : getOperand(2);
+ return ConstantExpr::getInsertElement(Op0, Op1, Op2);
+ case Instruction::ExtractElement:
+ Op0 = (OpNo == 0) ? Op : getOperand(0);
+ Op1 = (OpNo == 1) ? Op : getOperand(1);
+ return ConstantExpr::getExtractElement(Op0, Op1);
+ case Instruction::ShuffleVector:
+ Op0 = (OpNo == 0) ? Op : getOperand(0);
+ Op1 = (OpNo == 1) ? Op : getOperand(1);
+ Op2 = (OpNo == 2) ? Op : getOperand(2);
+ return ConstantExpr::getShuffleVector(Op0, Op1, Op2);
+ case Instruction::GetElementPtr: {
+ SmallVector<Constant*, 8> Ops;
+ Ops.resize(getNumOperands()-1);
+ for (unsigned i = 1, e = getNumOperands(); i != e; ++i)
+ Ops[i-1] = getOperand(i);
+ if (OpNo == 0)
+ return ConstantExpr::getGetElementPtr(Op, &Ops[0], Ops.size());
+ Ops[OpNo-1] = Op;
+ return ConstantExpr::getGetElementPtr(getOperand(0), &Ops[0], Ops.size());
+ }
+ default:
+ assert(getNumOperands() == 2 && "Must be binary operator?");
+ Op0 = (OpNo == 0) ? Op : getOperand(0);
+ Op1 = (OpNo == 1) ? Op : getOperand(1);
+ return ConstantExpr::get(getOpcode(), Op0, Op1);
+ }
+}
+
+/// getWithOperands - This returns the current constant expression with the
+/// operands replaced with the specified values. The specified operands must
+/// match count and type with the existing ones.
+Constant *ConstantExpr::
+getWithOperands(Constant* const *Ops, unsigned NumOps) const {
+ assert(NumOps == getNumOperands() && "Operand count mismatch!");
+ bool AnyChange = false;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ assert(Ops[i]->getType() == getOperand(i)->getType() &&
+ "Operand type mismatch!");
+ AnyChange |= Ops[i] != getOperand(i);
+ }
+ if (!AnyChange) // No operands changed, return self.
+ return const_cast<ConstantExpr*>(this);
+
+ switch (getOpcode()) {
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ return ConstantExpr::getCast(getOpcode(), Ops[0], getType());
+ case Instruction::Select:
+ return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
+ case Instruction::InsertElement:
+ return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
+ case Instruction::ExtractElement:
+ return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
+ case Instruction::ShuffleVector:
+ return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
+ case Instruction::GetElementPtr:
+ return ConstantExpr::getGetElementPtr(Ops[0], &Ops[1], NumOps-1);
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ case Instruction::VICmp:
+ case Instruction::VFCmp:
+ return ConstantExpr::getCompare(getPredicate(), Ops[0], Ops[1]);
+ default:
+ assert(getNumOperands() == 2 && "Must be binary operator?");
+ return ConstantExpr::get(getOpcode(), Ops[0], Ops[1]);
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// isValueValidForType implementations
+
+bool ConstantInt::isValueValidForType(const Type *Ty, uint64_t Val) {
+ unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); // assert okay
+ if (Ty == Type::Int1Ty)
+ return Val == 0 || Val == 1;
+ if (NumBits >= 64)
+ return true; // always true, has to fit in largest type
+ uint64_t Max = (1ll << NumBits) - 1;
+ return Val <= Max;
+}
+
+bool ConstantInt::isValueValidForType(const Type *Ty, int64_t Val) {
+ unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); // assert okay
+ if (Ty == Type::Int1Ty)
+ return Val == 0 || Val == 1 || Val == -1;
+ if (NumBits >= 64)
+ return true; // always true, has to fit in largest type
+ int64_t Min = -(1ll << (NumBits-1));
+ int64_t Max = (1ll << (NumBits-1)) - 1;
+ return (Val >= Min && Val <= Max);
+}
+
+bool ConstantFP::isValueValidForType(const Type *Ty, const APFloat& Val) {
+ // convert modifies in place, so make a copy.
+ APFloat Val2 = APFloat(Val);
+ bool losesInfo;
+ switch (Ty->getTypeID()) {
+ default:
+ return false; // These can't be represented as floating point!
+
+ // FIXME rounding mode needs to be more flexible
+ case Type::FloatTyID: {
+ if (&Val2.getSemantics() == &APFloat::IEEEsingle)
+ return true;
+ Val2.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &losesInfo);
+ return !losesInfo;
+ }
+ case Type::DoubleTyID: {
+ if (&Val2.getSemantics() == &APFloat::IEEEsingle ||
+ &Val2.getSemantics() == &APFloat::IEEEdouble)
+ return true;
+ Val2.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &losesInfo);
+ return !losesInfo;
+ }
+ case Type::X86_FP80TyID:
+ return &Val2.getSemantics() == &APFloat::IEEEsingle ||
+ &Val2.getSemantics() == &APFloat::IEEEdouble ||
+ &Val2.getSemantics() == &APFloat::x87DoubleExtended;
+ case Type::FP128TyID:
+ return &Val2.getSemantics() == &APFloat::IEEEsingle ||
+ &Val2.getSemantics() == &APFloat::IEEEdouble ||
+ &Val2.getSemantics() == &APFloat::IEEEquad;
+ case Type::PPC_FP128TyID:
+ return &Val2.getSemantics() == &APFloat::IEEEsingle ||
+ &Val2.getSemantics() == &APFloat::IEEEdouble ||
+ &Val2.getSemantics() == &APFloat::PPCDoubleDouble;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Factory Function Implementation
+
+
+// The number of operands for each ConstantCreator::create method is
+// determined by the ConstantTraits template.
+// ConstantCreator - A class that is used to create constants by
+// ValueMap*. This class should be partially specialized if there is
+// something strange that needs to be done to interface to the ctor for the
+// constant.
+//
+namespace llvm {
+ template<class ValType>
+ struct ConstantTraits;
+
+ template<typename T, typename Alloc>
+ struct VISIBILITY_HIDDEN ConstantTraits< std::vector<T, Alloc> > {
+ static unsigned uses(const std::vector<T, Alloc>& v) {
+ return v.size();
+ }
+ };
+
+ template<class ConstantClass, class TypeClass, class ValType>
+ struct VISIBILITY_HIDDEN ConstantCreator {
+ static ConstantClass *create(const TypeClass *Ty, const ValType &V) {
+ return new(ConstantTraits<ValType>::uses(V)) ConstantClass(Ty, V);
+ }
+ };
+
+ template<class ConstantClass, class TypeClass>
+ struct VISIBILITY_HIDDEN ConvertConstantType {
+ static void convert(ConstantClass *OldC, const TypeClass *NewTy) {
+ assert(0 && "This type cannot be converted!\n");
+ abort();
+ }
+ };
+
+ template<class ValType, class TypeClass, class ConstantClass,
+ bool HasLargeKey = false /*true for arrays and structs*/ >
+ class VISIBILITY_HIDDEN ValueMap : public AbstractTypeUser {
+ public:
+ typedef std::pair<const Type*, ValType> MapKey;
+ typedef std::map<MapKey, Constant *> MapTy;
+ typedef std::map<Constant*, typename MapTy::iterator> InverseMapTy;
+ typedef std::map<const Type*, typename MapTy::iterator> AbstractTypeMapTy;
+ private:
+ /// Map - This is the main map from the element descriptor to the Constants.
+ /// This is the primary way we avoid creating two of the same shape
+ /// constant.
+ MapTy Map;
+
+ /// InverseMap - If "HasLargeKey" is true, this contains an inverse mapping
+ /// from the constants to their element in Map. This is important for
+ /// removal of constants from the array, which would otherwise have to scan
+ /// through the map with very large keys.
+ InverseMapTy InverseMap;
+
+ /// AbstractTypeMap - Map for abstract type constants.
+ ///
+ AbstractTypeMapTy AbstractTypeMap;
+
+ public:
+ typename MapTy::iterator map_end() { return Map.end(); }
+
+ /// InsertOrGetItem - Return an iterator for the specified element.
+ /// If the element exists in the map, the returned iterator points to the
+ /// entry and Exists=true. If not, the iterator points to the newly
+ /// inserted entry and returns Exists=false. Newly inserted entries have
+ /// I->second == 0, and should be filled in.
+ typename MapTy::iterator InsertOrGetItem(std::pair<MapKey, Constant *>
+ &InsertVal,
+ bool &Exists) {
+ std::pair<typename MapTy::iterator, bool> IP = Map.insert(InsertVal);
+ Exists = !IP.second;
+ return IP.first;
+ }
+
+private:
+ typename MapTy::iterator FindExistingElement(ConstantClass *CP) {
+ if (HasLargeKey) {
+ typename InverseMapTy::iterator IMI = InverseMap.find(CP);
+ assert(IMI != InverseMap.end() && IMI->second != Map.end() &&
+ IMI->second->second == CP &&
+ "InverseMap corrupt!");
+ return IMI->second;
+ }
+
+ typename MapTy::iterator I =
+ Map.find(MapKey(static_cast<const TypeClass*>(CP->getRawType()),
+ getValType(CP)));
+ if (I == Map.end() || I->second != CP) {
+ // FIXME: This should not use a linear scan. If this gets to be a
+ // performance problem, someone should look at this.
+ for (I = Map.begin(); I != Map.end() && I->second != CP; ++I)
+ /* empty */;
+ }
+ return I;
+ }
+public:
+
+ /// getOrCreate - Return the specified constant from the map, creating it if
+ /// necessary.
+ ConstantClass *getOrCreate(const TypeClass *Ty, const ValType &V) {
+ MapKey Lookup(Ty, V);
+ typename MapTy::iterator I = Map.find(Lookup);
+ // Is it in the map?
+ if (I != Map.end())
+ return static_cast<ConstantClass *>(I->second);
+
+ // If no preexisting value, create one now...
+ ConstantClass *Result =
+ ConstantCreator<ConstantClass,TypeClass,ValType>::create(Ty, V);
+
+ assert(Result->getType() == Ty && "Type specified is not correct!");
+ I = Map.insert(I, std::make_pair(MapKey(Ty, V), Result));
+
+ if (HasLargeKey) // Remember the reverse mapping if needed.
+ InverseMap.insert(std::make_pair(Result, I));
+
+ // If the type of the constant is abstract, make sure that an entry exists
+ // for it in the AbstractTypeMap.
+ if (Ty->isAbstract()) {
+ typename AbstractTypeMapTy::iterator TI = AbstractTypeMap.find(Ty);
+
+ if (TI == AbstractTypeMap.end()) {
+ // Add ourselves to the ATU list of the type.
+ cast<DerivedType>(Ty)->addAbstractTypeUser(this);
+
+ AbstractTypeMap.insert(TI, std::make_pair(Ty, I));
+ }
+ }
+ return Result;
+ }
+
+ void remove(ConstantClass *CP) {
+ typename MapTy::iterator I = FindExistingElement(CP);
+ assert(I != Map.end() && "Constant not found in constant table!");
+ assert(I->second == CP && "Didn't find correct element?");
+
+ if (HasLargeKey) // Remember the reverse mapping if needed.
+ InverseMap.erase(CP);
+
+ // Now that we found the entry, make sure this isn't the entry that
+ // the AbstractTypeMap points to.
+ const TypeClass *Ty = static_cast<const TypeClass *>(I->first.first);
+ if (Ty->isAbstract()) {
+ assert(AbstractTypeMap.count(Ty) &&
+ "Abstract type not in AbstractTypeMap?");
+ typename MapTy::iterator &ATMEntryIt = AbstractTypeMap[Ty];
+ if (ATMEntryIt == I) {
+ // Yes, we are removing the representative entry for this type.
+ // See if there are any other entries of the same type.
+ typename MapTy::iterator TmpIt = ATMEntryIt;
+
+ // First check the entry before this one...
+ if (TmpIt != Map.begin()) {
+ --TmpIt;
+ if (TmpIt->first.first != Ty) // Not the same type, move back...
+ ++TmpIt;
+ }
+
+ // If we didn't find the same type, try to move forward...
+ if (TmpIt == ATMEntryIt) {
+ ++TmpIt;
+ if (TmpIt == Map.end() || TmpIt->first.first != Ty)
+ --TmpIt; // No entry afterwards with the same type
+ }
+
+ // If there is another entry in the map of the same abstract type,
+ // update the AbstractTypeMap entry now.
+ if (TmpIt != ATMEntryIt) {
+ ATMEntryIt = TmpIt;
+ } else {
+ // Otherwise, we are removing the last instance of this type
+ // from the table. Remove from the ATM, and from user list.
+ cast<DerivedType>(Ty)->removeAbstractTypeUser(this);
+ AbstractTypeMap.erase(Ty);
+ }
+ }
+ }
+
+ Map.erase(I);
+ }
+
+
+ /// MoveConstantToNewSlot - If we are about to change C to be the element
+ /// specified by I, update our internal data structures to reflect this
+ /// fact.
+ void MoveConstantToNewSlot(ConstantClass *C, typename MapTy::iterator I) {
+ // First, remove the old location of the specified constant in the map.
+ typename MapTy::iterator OldI = FindExistingElement(C);
+ assert(OldI != Map.end() && "Constant not found in constant table!");
+ assert(OldI->second == C && "Didn't find correct element?");
+
+ // If this constant is the representative element for its abstract type,
+ // update the AbstractTypeMap so that the representative element is I.
+ if (C->getType()->isAbstract()) {
+ typename AbstractTypeMapTy::iterator ATI =
+ AbstractTypeMap.find(C->getType());
+ assert(ATI != AbstractTypeMap.end() &&
+ "Abstract type not in AbstractTypeMap?");
+ if (ATI->second == OldI)
+ ATI->second = I;
+ }
+
+ // Remove the old entry from the map.
+ Map.erase(OldI);
+
+ // Update the inverse map so that we know that this constant is now
+ // located at descriptor I.
+ if (HasLargeKey) {
+ assert(I->second == C && "Bad inversemap entry!");
+ InverseMap[C] = I;
+ }
+ }
+
+ void refineAbstractType(const DerivedType *OldTy, const Type *NewTy) {
+ typename AbstractTypeMapTy::iterator I =
+ AbstractTypeMap.find(cast<Type>(OldTy));
+
+ assert(I != AbstractTypeMap.end() &&
+ "Abstract type not in AbstractTypeMap?");
+
+ // Convert a constant at a time until the last one is gone. The last one
+ // leaving will remove() itself, causing the AbstractTypeMapEntry to be
+ // eliminated eventually.
+ do {
+ ConvertConstantType<ConstantClass,
+ TypeClass>::convert(
+ static_cast<ConstantClass *>(I->second->second),
+ cast<TypeClass>(NewTy));
+
+ I = AbstractTypeMap.find(cast<Type>(OldTy));
+ } while (I != AbstractTypeMap.end());
+ }
+
+ // If the type became concrete without being refined to any other existing
+ // type, we just remove ourselves from the ATU list.
+ void typeBecameConcrete(const DerivedType *AbsTy) {
+ AbsTy->removeAbstractTypeUser(this);
+ }
+
+ void dump() const {
+ DOUT << "Constant.cpp: ValueMap\n";
+ }
+ };
+}
+
+
+
+//---- ConstantAggregateZero::get() implementation...
+//
+namespace llvm {
+ // ConstantAggregateZero does not take extra "value" argument...
+ template<class ValType>
+ struct ConstantCreator<ConstantAggregateZero, Type, ValType> {
+ static ConstantAggregateZero *create(const Type *Ty, const ValType &V){
+ return new ConstantAggregateZero(Ty);
+ }
+ };
+
+ template<>
+ struct ConvertConstantType<ConstantAggregateZero, Type> {
+ static void convert(ConstantAggregateZero *OldC, const Type *NewTy) {
+ // Make everyone now use a constant of the new type...
+ Constant *New = ConstantAggregateZero::get(NewTy);
+ assert(New != OldC && "Didn't replace constant??");
+ OldC->uncheckedReplaceAllUsesWith(New);
+ OldC->destroyConstant(); // This constant is now dead, destroy it.
+ }
+ };
+}
+
+static ManagedStatic<ValueMap<char, Type,
+ ConstantAggregateZero> > AggZeroConstants;
+
+static char getValType(ConstantAggregateZero *CPZ) { return 0; }
+
+ConstantAggregateZero *ConstantAggregateZero::get(const Type *Ty) {
+ assert((isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) &&
+ "Cannot create an aggregate zero of non-aggregate type!");
+ return AggZeroConstants->getOrCreate(Ty, 0);
+}
+
+/// destroyConstant - Remove the constant from the constant table...
+///
+void ConstantAggregateZero::destroyConstant() {
+ AggZeroConstants->remove(this);
+ destroyConstantImpl();
+}
+
+//---- ConstantArray::get() implementation...
+//
+namespace llvm {
+ template<>
+ struct ConvertConstantType<ConstantArray, ArrayType> {
+ static void convert(ConstantArray *OldC, const ArrayType *NewTy) {
+ // Make everyone now use a constant of the new type...
+ std::vector<Constant*> C;
+ for (unsigned i = 0, e = OldC->getNumOperands(); i != e; ++i)
+ C.push_back(cast<Constant>(OldC->getOperand(i)));
+ Constant *New = ConstantArray::get(NewTy, C);
+ assert(New != OldC && "Didn't replace constant??");
+ OldC->uncheckedReplaceAllUsesWith(New);
+ OldC->destroyConstant(); // This constant is now dead, destroy it.
+ }
+ };
+}
+
+static std::vector<Constant*> getValType(ConstantArray *CA) {
+ std::vector<Constant*> Elements;
+ Elements.reserve(CA->getNumOperands());
+ for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
+ Elements.push_back(cast<Constant>(CA->getOperand(i)));
+ return Elements;
+}
+
+typedef ValueMap<std::vector<Constant*>, ArrayType,
+ ConstantArray, true /*largekey*/> ArrayConstantsTy;
+static ManagedStatic<ArrayConstantsTy> ArrayConstants;
+
+Constant *ConstantArray::get(const ArrayType *Ty,
+ const std::vector<Constant*> &V) {
+ // If this is an all-zero array, return a ConstantAggregateZero object
+ if (!V.empty()) {
+ Constant *C = V[0];
+ if (!C->isNullValue())
+ return ArrayConstants->getOrCreate(Ty, V);
+ for (unsigned i = 1, e = V.size(); i != e; ++i)
+ if (V[i] != C)
+ return ArrayConstants->getOrCreate(Ty, V);
+ }
+ return ConstantAggregateZero::get(Ty);
+}
+
+/// destroyConstant - Remove the constant from the constant table...
+///
+void ConstantArray::destroyConstant() {
+ ArrayConstants->remove(this);
+ destroyConstantImpl();
+}
+
+/// ConstantArray::get(const string&) - Return an array that is initialized to
+/// contain the specified string. If length is zero then a null terminator is
+/// added to the specified string so that it may be used in a natural way.
+/// Otherwise, the length parameter specifies how much of the string to use
+/// and it won't be null terminated.
+///
+Constant *ConstantArray::get(const std::string &Str, bool AddNull) {
+ std::vector<Constant*> ElementVals;
+ for (unsigned i = 0; i < Str.length(); ++i)
+ ElementVals.push_back(ConstantInt::get(Type::Int8Ty, Str[i]));
+
+ // Add a null terminator to the string...
+ if (AddNull) {
+ ElementVals.push_back(ConstantInt::get(Type::Int8Ty, 0));
+ }
+
+ ArrayType *ATy = ArrayType::get(Type::Int8Ty, ElementVals.size());
+ return ConstantArray::get(ATy, ElementVals);
+}
+
+/// isString - This method returns true if the array is an array of i8, and
+/// if the elements of the array are all ConstantInt's.
+bool ConstantArray::isString() const {
+ // Check the element type for i8...
+ if (getType()->getElementType() != Type::Int8Ty)
+ return false;
+ // Check the elements to make sure they are all integers, not constant
+ // expressions.
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (!isa<ConstantInt>(getOperand(i)))
+ return false;
+ return true;
+}
+
+/// isCString - This method returns true if the array is a string (see
+/// isString) and it ends in a null byte \\0 and does not contains any other
+/// null bytes except its terminator.
+bool ConstantArray::isCString() const {
+ // Check the element type for i8...
+ if (getType()->getElementType() != Type::Int8Ty)
+ return false;
+ Constant *Zero = Constant::getNullValue(getOperand(0)->getType());
+ // Last element must be a null.
+ if (getOperand(getNumOperands()-1) != Zero)
+ return false;
+ // Other elements must be non-null integers.
+ for (unsigned i = 0, e = getNumOperands()-1; i != e; ++i) {
+ if (!isa<ConstantInt>(getOperand(i)))
+ return false;
+ if (getOperand(i) == Zero)
+ return false;
+ }
+ return true;
+}
+
+
+/// getAsString - If the sub-element type of this array is i8
+/// then this method converts the array to an std::string and returns it.
+/// Otherwise, it asserts out.
+///
+std::string ConstantArray::getAsString() const {
+ assert(isString() && "Not a string!");
+ std::string Result;
+ Result.reserve(getNumOperands());
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ Result.push_back((char)cast<ConstantInt>(getOperand(i))->getZExtValue());
+ return Result;
+}
+
+
+//---- ConstantStruct::get() implementation...
+//
+
+namespace llvm {
+ template<>
+ struct ConvertConstantType<ConstantStruct, StructType> {
+ static void convert(ConstantStruct *OldC, const StructType *NewTy) {
+ // Make everyone now use a constant of the new type...
+ std::vector<Constant*> C;
+ for (unsigned i = 0, e = OldC->getNumOperands(); i != e; ++i)
+ C.push_back(cast<Constant>(OldC->getOperand(i)));
+ Constant *New = ConstantStruct::get(NewTy, C);
+ assert(New != OldC && "Didn't replace constant??");
+
+ OldC->uncheckedReplaceAllUsesWith(New);
+ OldC->destroyConstant(); // This constant is now dead, destroy it.
+ }
+ };
+}
+
+typedef ValueMap<std::vector<Constant*>, StructType,
+ ConstantStruct, true /*largekey*/> StructConstantsTy;
+static ManagedStatic<StructConstantsTy> StructConstants;
+
+static std::vector<Constant*> getValType(ConstantStruct *CS) {
+ std::vector<Constant*> Elements;
+ Elements.reserve(CS->getNumOperands());
+ for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i)
+ Elements.push_back(cast<Constant>(CS->getOperand(i)));
+ return Elements;
+}
+
+Constant *ConstantStruct::get(const StructType *Ty,
+ const std::vector<Constant*> &V) {
+ // Create a ConstantAggregateZero value if all elements are zeros...
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ if (!V[i]->isNullValue())
+ return StructConstants->getOrCreate(Ty, V);
+
+ return ConstantAggregateZero::get(Ty);
+}
+
+Constant *ConstantStruct::get(const std::vector<Constant*> &V, bool packed) {
+ std::vector<const Type*> StructEls;
+ StructEls.reserve(V.size());
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ StructEls.push_back(V[i]->getType());
+ return get(StructType::get(StructEls, packed), V);
+}
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantStruct::destroyConstant() {
+ StructConstants->remove(this);
+ destroyConstantImpl();
+}
+
+//---- ConstantVector::get() implementation...
+//
+namespace llvm {
+ template<>
+ struct ConvertConstantType<ConstantVector, VectorType> {
+ static void convert(ConstantVector *OldC, const VectorType *NewTy) {
+ // Make everyone now use a constant of the new type...
+ std::vector<Constant*> C;
+ for (unsigned i = 0, e = OldC->getNumOperands(); i != e; ++i)
+ C.push_back(cast<Constant>(OldC->getOperand(i)));
+ Constant *New = ConstantVector::get(NewTy, C);
+ assert(New != OldC && "Didn't replace constant??");
+ OldC->uncheckedReplaceAllUsesWith(New);
+ OldC->destroyConstant(); // This constant is now dead, destroy it.
+ }
+ };
+}
+
+static std::vector<Constant*> getValType(ConstantVector *CP) {
+ std::vector<Constant*> Elements;
+ Elements.reserve(CP->getNumOperands());
+ for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
+ Elements.push_back(CP->getOperand(i));
+ return Elements;
+}
+
+static ManagedStatic<ValueMap<std::vector<Constant*>, VectorType,
+ ConstantVector> > VectorConstants;
+
+Constant *ConstantVector::get(const VectorType *Ty,
+ const std::vector<Constant*> &V) {
+ assert(!V.empty() && "Vectors can't be empty");
+ // If this is an all-undef or alll-zero vector, return a
+ // ConstantAggregateZero or UndefValue.
+ Constant *C = V[0];
+ bool isZero = C->isNullValue();
+ bool isUndef = isa<UndefValue>(C);
+
+ if (isZero || isUndef) {
+ for (unsigned i = 1, e = V.size(); i != e; ++i)
+ if (V[i] != C) {
+ isZero = isUndef = false;
+ break;
+ }
+ }
+
+ if (isZero)
+ return ConstantAggregateZero::get(Ty);
+ if (isUndef)
+ return UndefValue::get(Ty);
+ return VectorConstants->getOrCreate(Ty, V);
+}
+
+Constant *ConstantVector::get(const std::vector<Constant*> &V) {
+ assert(!V.empty() && "Cannot infer type if V is empty");
+ return get(VectorType::get(V.front()->getType(),V.size()), V);
+}
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantVector::destroyConstant() {
+ VectorConstants->remove(this);
+ destroyConstantImpl();
+}
+
+/// This function will return true iff every element in this vector constant
+/// is set to all ones.
+/// @returns true iff this constant's emements are all set to all ones.
+/// @brief Determine if the value is all ones.
+bool ConstantVector::isAllOnesValue() const {
+ // Check out first element.
+ const Constant *Elt = getOperand(0);
+ const ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
+ if (!CI || !CI->isAllOnesValue()) return false;
+ // Then make sure all remaining elements point to the same value.
+ for (unsigned I = 1, E = getNumOperands(); I < E; ++I) {
+ if (getOperand(I) != Elt) return false;
+ }
+ return true;
+}
+
+/// getSplatValue - If this is a splat constant, where all of the
+/// elements have the same value, return that value. Otherwise return null.
+Constant *ConstantVector::getSplatValue() {
+ // Check out first element.
+ Constant *Elt = getOperand(0);
+ // Then make sure all remaining elements point to the same value.
+ for (unsigned I = 1, E = getNumOperands(); I < E; ++I)
+ if (getOperand(I) != Elt) return 0;
+ return Elt;
+}
+
+//---- ConstantPointerNull::get() implementation...
+//
+
+namespace llvm {
+ // ConstantPointerNull does not take extra "value" argument...
+ template<class ValType>
+ struct ConstantCreator<ConstantPointerNull, PointerType, ValType> {
+ static ConstantPointerNull *create(const PointerType *Ty, const ValType &V){
+ return new ConstantPointerNull(Ty);
+ }
+ };
+
+ template<>
+ struct ConvertConstantType<ConstantPointerNull, PointerType> {
+ static void convert(ConstantPointerNull *OldC, const PointerType *NewTy) {
+ // Make everyone now use a constant of the new type...
+ Constant *New = ConstantPointerNull::get(NewTy);
+ assert(New != OldC && "Didn't replace constant??");
+ OldC->uncheckedReplaceAllUsesWith(New);
+ OldC->destroyConstant(); // This constant is now dead, destroy it.
+ }
+ };
+}
+
+static ManagedStatic<ValueMap<char, PointerType,
+ ConstantPointerNull> > NullPtrConstants;
+
+static char getValType(ConstantPointerNull *) {
+ return 0;
+}
+
+
+ConstantPointerNull *ConstantPointerNull::get(const PointerType *Ty) {
+ return NullPtrConstants->getOrCreate(Ty, 0);
+}
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantPointerNull::destroyConstant() {
+ NullPtrConstants->remove(this);
+ destroyConstantImpl();
+}
+
+
+//---- UndefValue::get() implementation...
+//
+
+namespace llvm {
+ // UndefValue does not take extra "value" argument...
+ template<class ValType>
+ struct ConstantCreator<UndefValue, Type, ValType> {
+ static UndefValue *create(const Type *Ty, const ValType &V) {
+ return new UndefValue(Ty);
+ }
+ };
+
+ template<>
+ struct ConvertConstantType<UndefValue, Type> {
+ static void convert(UndefValue *OldC, const Type *NewTy) {
+ // Make everyone now use a constant of the new type.
+ Constant *New = UndefValue::get(NewTy);
+ assert(New != OldC && "Didn't replace constant??");
+ OldC->uncheckedReplaceAllUsesWith(New);
+ OldC->destroyConstant(); // This constant is now dead, destroy it.
+ }
+ };
+}
+
+static ManagedStatic<ValueMap<char, Type, UndefValue> > UndefValueConstants;
+
+static char getValType(UndefValue *) {
+ return 0;
+}
+
+
+UndefValue *UndefValue::get(const Type *Ty) {
+ return UndefValueConstants->getOrCreate(Ty, 0);
+}
+
+// destroyConstant - Remove the constant from the constant table.
+//
+void UndefValue::destroyConstant() {
+ UndefValueConstants->remove(this);
+ destroyConstantImpl();
+}
+
+//---- MDString::get() implementation
+//
+
+MDString::MDString(const char *begin, const char *end)
+ : Constant(Type::MetadataTy, MDStringVal, 0, 0),
+ StrBegin(begin), StrEnd(end) {}
+
+static ManagedStatic<StringMap<MDString*> > MDStringCache;
+
+MDString *MDString::get(const char *StrBegin, const char *StrEnd) {
+ StringMapEntry<MDString *> &Entry = MDStringCache->GetOrCreateValue(StrBegin,
+ StrEnd);
+ MDString *&S = Entry.getValue();
+ if (!S) S = new MDString(Entry.getKeyData(),
+ Entry.getKeyData() + Entry.getKeyLength());
+ return S;
+}
+
+void MDString::destroyConstant() {
+ MDStringCache->erase(MDStringCache->find(StrBegin, StrEnd));
+ destroyConstantImpl();
+}
+
+//---- MDNode::get() implementation
+//
+
+static ManagedStatic<FoldingSet<MDNode> > MDNodeSet;
+
+MDNode::MDNode(Value*const* Vals, unsigned NumVals)
+ : Constant(Type::MetadataTy, MDNodeVal, 0, 0) {
+ for (unsigned i = 0; i != NumVals; ++i)
+ Node.push_back(ElementVH(Vals[i], this));
+}
+
+void MDNode::Profile(FoldingSetNodeID &ID) const {
+ for (const_elem_iterator I = elem_begin(), E = elem_end(); I != E; ++I)
+ ID.AddPointer(*I);
+}
+
+MDNode *MDNode::get(Value*const* Vals, unsigned NumVals) {
+ FoldingSetNodeID ID;
+ for (unsigned i = 0; i != NumVals; ++i)
+ ID.AddPointer(Vals[i]);
+
+ void *InsertPoint;
+ if (MDNode *N = MDNodeSet->FindNodeOrInsertPos(ID, InsertPoint))
+ return N;
+
+ // InsertPoint will have been set by the FindNodeOrInsertPos call.
+ MDNode *N = new(0) MDNode(Vals, NumVals);
+ MDNodeSet->InsertNode(N, InsertPoint);
+ return N;
+}
+
+void MDNode::destroyConstant() {
+ MDNodeSet->RemoveNode(this);
+ destroyConstantImpl();
+}
+
+//---- ConstantExpr::get() implementations...
+//
+
+namespace {
+
+struct ExprMapKeyType {
+ typedef SmallVector<unsigned, 4> IndexList;
+
+ ExprMapKeyType(unsigned opc,
+ const std::vector<Constant*> &ops,
+ unsigned short pred = 0,
+ const IndexList &inds = IndexList())
+ : opcode(opc), predicate(pred), operands(ops), indices(inds) {}
+ uint16_t opcode;
+ uint16_t predicate;
+ std::vector<Constant*> operands;
+ IndexList indices;
+ bool operator==(const ExprMapKeyType& that) const {
+ return this->opcode == that.opcode &&
+ this->predicate == that.predicate &&
+ this->operands == that.operands &&
+ this->indices == that.indices;
+ }
+ bool operator<(const ExprMapKeyType & that) const {
+ return this->opcode < that.opcode ||
+ (this->opcode == that.opcode && this->predicate < that.predicate) ||
+ (this->opcode == that.opcode && this->predicate == that.predicate &&
+ this->operands < that.operands) ||
+ (this->opcode == that.opcode && this->predicate == that.predicate &&
+ this->operands == that.operands && this->indices < that.indices);
+ }
+
+ bool operator!=(const ExprMapKeyType& that) const {
+ return !(*this == that);
+ }
+};
+
+}
+
+namespace llvm {
+ template<>
+ struct ConstantCreator<ConstantExpr, Type, ExprMapKeyType> {
+ static ConstantExpr *create(const Type *Ty, const ExprMapKeyType &V,
+ unsigned short pred = 0) {
+ if (Instruction::isCast(V.opcode))
+ return new UnaryConstantExpr(V.opcode, V.operands[0], Ty);
+ if ((V.opcode >= Instruction::BinaryOpsBegin &&
+ V.opcode < Instruction::BinaryOpsEnd))
+ return new BinaryConstantExpr(V.opcode, V.operands[0], V.operands[1]);
+ if (V.opcode == Instruction::Select)
+ return new SelectConstantExpr(V.operands[0], V.operands[1],
+ V.operands[2]);
+ if (V.opcode == Instruction::ExtractElement)
+ return new ExtractElementConstantExpr(V.operands[0], V.operands[1]);
+ if (V.opcode == Instruction::InsertElement)
+ return new InsertElementConstantExpr(V.operands[0], V.operands[1],
+ V.operands[2]);
+ if (V.opcode == Instruction::ShuffleVector)
+ return new ShuffleVectorConstantExpr(V.operands[0], V.operands[1],
+ V.operands[2]);
+ if (V.opcode == Instruction::InsertValue)
+ return new InsertValueConstantExpr(V.operands[0], V.operands[1],
+ V.indices, Ty);
+ if (V.opcode == Instruction::ExtractValue)
+ return new ExtractValueConstantExpr(V.operands[0], V.indices, Ty);
+ if (V.opcode == Instruction::GetElementPtr) {
+ std::vector<Constant*> IdxList(V.operands.begin()+1, V.operands.end());
+ return GetElementPtrConstantExpr::Create(V.operands[0], IdxList, Ty);
+ }
+
+ // The compare instructions are weird. We have to encode the predicate
+ // value and it is combined with the instruction opcode by multiplying
+ // the opcode by one hundred. We must decode this to get the predicate.
+ if (V.opcode == Instruction::ICmp)
+ return new CompareConstantExpr(Ty, Instruction::ICmp, V.predicate,
+ V.operands[0], V.operands[1]);
+ if (V.opcode == Instruction::FCmp)
+ return new CompareConstantExpr(Ty, Instruction::FCmp, V.predicate,
+ V.operands[0], V.operands[1]);
+ if (V.opcode == Instruction::VICmp)
+ return new CompareConstantExpr(Ty, Instruction::VICmp, V.predicate,
+ V.operands[0], V.operands[1]);
+ if (V.opcode == Instruction::VFCmp)
+ return new CompareConstantExpr(Ty, Instruction::VFCmp, V.predicate,
+ V.operands[0], V.operands[1]);
+ assert(0 && "Invalid ConstantExpr!");
+ return 0;
+ }
+ };
+
+ template<>
+ struct ConvertConstantType<ConstantExpr, Type> {
+ static void convert(ConstantExpr *OldC, const Type *NewTy) {
+ Constant *New;
+ switch (OldC->getOpcode()) {
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ New = ConstantExpr::getCast(OldC->getOpcode(), OldC->getOperand(0),
+ NewTy);
+ break;
+ case Instruction::Select:
+ New = ConstantExpr::getSelectTy(NewTy, OldC->getOperand(0),
+ OldC->getOperand(1),
+ OldC->getOperand(2));
+ break;
+ default:
+ assert(OldC->getOpcode() >= Instruction::BinaryOpsBegin &&
+ OldC->getOpcode() < Instruction::BinaryOpsEnd);
+ New = ConstantExpr::getTy(NewTy, OldC->getOpcode(), OldC->getOperand(0),
+ OldC->getOperand(1));
+ break;
+ case Instruction::GetElementPtr:
+ // Make everyone now use a constant of the new type...
+ std::vector<Value*> Idx(OldC->op_begin()+1, OldC->op_end());
+ New = ConstantExpr::getGetElementPtrTy(NewTy, OldC->getOperand(0),
+ &Idx[0], Idx.size());
+ break;
+ }
+
+ assert(New != OldC && "Didn't replace constant??");
+ OldC->uncheckedReplaceAllUsesWith(New);
+ OldC->destroyConstant(); // This constant is now dead, destroy it.
+ }
+ };
+} // end namespace llvm
+
+
+static ExprMapKeyType getValType(ConstantExpr *CE) {
+ std::vector<Constant*> Operands;
+ Operands.reserve(CE->getNumOperands());
+ for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
+ Operands.push_back(cast<Constant>(CE->getOperand(i)));
+ return ExprMapKeyType(CE->getOpcode(), Operands,
+ CE->isCompare() ? CE->getPredicate() : 0,
+ CE->hasIndices() ?
+ CE->getIndices() : SmallVector<unsigned, 4>());
+}
+
+static ManagedStatic<ValueMap<ExprMapKeyType, Type,
+ ConstantExpr> > ExprConstants;
+
+/// This is a utility function to handle folding of casts and lookup of the
+/// cast in the ExprConstants map. It is used by the various get* methods below.
+static inline Constant *getFoldedCast(
+ Instruction::CastOps opc, Constant *C, const Type *Ty) {
+ assert(Ty->isFirstClassType() && "Cannot cast to an aggregate type!");
+ // Fold a few common cases
+ if (Constant *FC = ConstantFoldCastInstruction(opc, C, Ty))
+ return FC;
+
+ // Look up the constant in the table first to ensure uniqueness
+ std::vector<Constant*> argVec(1, C);
+ ExprMapKeyType Key(opc, argVec);
+ return ExprConstants->getOrCreate(Ty, Key);
+}
+
+Constant *ConstantExpr::getCast(unsigned oc, Constant *C, const Type *Ty) {
+ Instruction::CastOps opc = Instruction::CastOps(oc);
+ assert(Instruction::isCast(opc) && "opcode out of range");
+ assert(C && Ty && "Null arguments to getCast");
+ assert(Ty->isFirstClassType() && "Cannot cast to an aggregate type!");
+
+ switch (opc) {
+ default:
+ assert(0 && "Invalid cast opcode");
+ break;
+ case Instruction::Trunc: return getTrunc(C, Ty);
+ case Instruction::ZExt: return getZExt(C, Ty);
+ case Instruction::SExt: return getSExt(C, Ty);
+ case Instruction::FPTrunc: return getFPTrunc(C, Ty);
+ case Instruction::FPExt: return getFPExtend(C, Ty);
+ case Instruction::UIToFP: return getUIToFP(C, Ty);
+ case Instruction::SIToFP: return getSIToFP(C, Ty);
+ case Instruction::FPToUI: return getFPToUI(C, Ty);
+ case Instruction::FPToSI: return getFPToSI(C, Ty);
+ case Instruction::PtrToInt: return getPtrToInt(C, Ty);
+ case Instruction::IntToPtr: return getIntToPtr(C, Ty);
+ case Instruction::BitCast: return getBitCast(C, Ty);
+ }
+ return 0;
+}
+
+Constant *ConstantExpr::getZExtOrBitCast(Constant *C, const Type *Ty) {
+ if (C->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+ return getCast(Instruction::BitCast, C, Ty);
+ return getCast(Instruction::ZExt, C, Ty);
+}
+
+Constant *ConstantExpr::getSExtOrBitCast(Constant *C, const Type *Ty) {
+ if (C->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+ return getCast(Instruction::BitCast, C, Ty);
+ return getCast(Instruction::SExt, C, Ty);
+}
+
+Constant *ConstantExpr::getTruncOrBitCast(Constant *C, const Type *Ty) {
+ if (C->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+ return getCast(Instruction::BitCast, C, Ty);
+ return getCast(Instruction::Trunc, C, Ty);
+}
+
+Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) {
+ assert(isa<PointerType>(S->getType()) && "Invalid cast");
+ assert((Ty->isInteger() || isa<PointerType>(Ty)) && "Invalid cast");
+
+ if (Ty->isInteger())
+ return getCast(Instruction::PtrToInt, S, Ty);
+ return getCast(Instruction::BitCast, S, Ty);
+}
+
+Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty,
+ bool isSigned) {
+ assert(C->getType()->isInteger() && Ty->isInteger() && "Invalid cast");
+ unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
+ unsigned DstBits = Ty->getPrimitiveSizeInBits();
+ Instruction::CastOps opcode =
+ (SrcBits == DstBits ? Instruction::BitCast :
+ (SrcBits > DstBits ? Instruction::Trunc :
+ (isSigned ? Instruction::SExt : Instruction::ZExt)));
+ return getCast(opcode, C, Ty);
+}
+
+Constant *ConstantExpr::getFPCast(Constant *C, const Type *Ty) {
+ assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() &&
+ "Invalid cast");
+ unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
+ unsigned DstBits = Ty->getPrimitiveSizeInBits();
+ if (SrcBits == DstBits)
+ return C; // Avoid a useless cast
+ Instruction::CastOps opcode =
+ (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt);
+ return getCast(opcode, C, Ty);
+}
+
+Constant *ConstantExpr::getTrunc(Constant *C, const Type *Ty) {
+ assert(C->getType()->isInteger() && "Trunc operand must be integer");
+ assert(Ty->isInteger() && "Trunc produces only integral");
+ assert(C->getType()->getPrimitiveSizeInBits() > Ty->getPrimitiveSizeInBits()&&
+ "SrcTy must be larger than DestTy for Trunc!");
+
+ return getFoldedCast(Instruction::Trunc, C, Ty);
+}
+
+Constant *ConstantExpr::getSExt(Constant *C, const Type *Ty) {
+ assert(C->getType()->isInteger() && "SEXt operand must be integral");
+ assert(Ty->isInteger() && "SExt produces only integer");
+ assert(C->getType()->getPrimitiveSizeInBits() < Ty->getPrimitiveSizeInBits()&&
+ "SrcTy must be smaller than DestTy for SExt!");
+
+ return getFoldedCast(Instruction::SExt, C, Ty);
+}
+
+Constant *ConstantExpr::getZExt(Constant *C, const Type *Ty) {
+ assert(C->getType()->isInteger() && "ZEXt operand must be integral");
+ assert(Ty->isInteger() && "ZExt produces only integer");
+ assert(C->getType()->getPrimitiveSizeInBits() < Ty->getPrimitiveSizeInBits()&&
+ "SrcTy must be smaller than DestTy for ZExt!");
+
+ return getFoldedCast(Instruction::ZExt, C, Ty);
+}
+
+Constant *ConstantExpr::getFPTrunc(Constant *C, const Type *Ty) {
+ assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() &&
+ C->getType()->getPrimitiveSizeInBits() > Ty->getPrimitiveSizeInBits()&&
+ "This is an illegal floating point truncation!");
+ return getFoldedCast(Instruction::FPTrunc, C, Ty);
+}
+
+Constant *ConstantExpr::getFPExtend(Constant *C, const Type *Ty) {
+ assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() &&
+ C->getType()->getPrimitiveSizeInBits() < Ty->getPrimitiveSizeInBits()&&
+ "This is an illegal floating point extension!");
+ return getFoldedCast(Instruction::FPExt, C, Ty);
+}
+
+Constant *ConstantExpr::getUIToFP(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isIntOrIntVector() && Ty->isFPOrFPVector() &&
+ "This is an illegal uint to floating point cast!");
+ return getFoldedCast(Instruction::UIToFP, C, Ty);
+}
+
+Constant *ConstantExpr::getSIToFP(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isIntOrIntVector() && Ty->isFPOrFPVector() &&
+ "This is an illegal sint to floating point cast!");
+ return getFoldedCast(Instruction::SIToFP, C, Ty);
+}
+
+Constant *ConstantExpr::getFPToUI(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isFPOrFPVector() && Ty->isIntOrIntVector() &&
+ "This is an illegal floating point to uint cast!");
+ return getFoldedCast(Instruction::FPToUI, C, Ty);
+}
+
+Constant *ConstantExpr::getFPToSI(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isFPOrFPVector() && Ty->isIntOrIntVector() &&
+ "This is an illegal floating point to sint cast!");
+ return getFoldedCast(Instruction::FPToSI, C, Ty);
+}
+
+Constant *ConstantExpr::getPtrToInt(Constant *C, const Type *DstTy) {
+ assert(isa<PointerType>(C->getType()) && "PtrToInt source must be pointer");
+ assert(DstTy->isInteger() && "PtrToInt destination must be integral");
+ return getFoldedCast(Instruction::PtrToInt, C, DstTy);
+}
+
+Constant *ConstantExpr::getIntToPtr(Constant *C, const Type *DstTy) {
+ assert(C->getType()->isInteger() && "IntToPtr source must be integral");
+ assert(isa<PointerType>(DstTy) && "IntToPtr destination must be a pointer");
+ return getFoldedCast(Instruction::IntToPtr, C, DstTy);
+}
+
+Constant *ConstantExpr::getBitCast(Constant *C, const Type *DstTy) {
+ // BitCast implies a no-op cast of type only. No bits change. However, you
+ // can't cast pointers to anything but pointers.
+#ifndef NDEBUG
+ const Type *SrcTy = C->getType();
+ assert((isa<PointerType>(SrcTy) == isa<PointerType>(DstTy)) &&
+ "BitCast cannot cast pointer to non-pointer and vice versa");
+
+ // Now we know we're not dealing with mismatched pointer casts (ptr->nonptr
+ // or nonptr->ptr). For all the other types, the cast is okay if source and
+ // destination bit widths are identical.
+ unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
+ unsigned DstBitSize = DstTy->getPrimitiveSizeInBits();
+#endif
+ assert(SrcBitSize == DstBitSize && "BitCast requires types of same width");
+
+ // It is common to ask for a bitcast of a value to its own type, handle this
+ // speedily.
+ if (C->getType() == DstTy) return C;
+
+ return getFoldedCast(Instruction::BitCast, C, DstTy);
+}
+
+Constant *ConstantExpr::getAlignOf(const Type *Ty) {
+ // alignof is implemented as: (i64) gep ({i8,Ty}*)null, 0, 1
+ const Type *AligningTy = StructType::get(Type::Int8Ty, Ty, NULL);
+ Constant *NullPtr = getNullValue(AligningTy->getPointerTo());
+ Constant *Zero = ConstantInt::get(Type::Int32Ty, 0);
+ Constant *One = ConstantInt::get(Type::Int32Ty, 1);
+ Constant *Indices[2] = { Zero, One };
+ Constant *GEP = getGetElementPtr(NullPtr, Indices, 2);
+ return getCast(Instruction::PtrToInt, GEP, Type::Int32Ty);
+}
+
+Constant *ConstantExpr::getSizeOf(const Type *Ty) {
+ // sizeof is implemented as: (i64) gep (Ty*)null, 1
+ Constant *GEPIdx = ConstantInt::get(Type::Int32Ty, 1);
+ Constant *GEP =
+ getGetElementPtr(getNullValue(PointerType::getUnqual(Ty)), &GEPIdx, 1);
+ return getCast(Instruction::PtrToInt, GEP, Type::Int64Ty);
+}
+
+Constant *ConstantExpr::getTy(const Type *ReqTy, unsigned Opcode,
+ Constant *C1, Constant *C2) {
+ // Check the operands for consistency first
+ assert(Opcode >= Instruction::BinaryOpsBegin &&
+ Opcode < Instruction::BinaryOpsEnd &&
+ "Invalid opcode in binary constant expression");
+ assert(C1->getType() == C2->getType() &&
+ "Operand types in binary constant expression should match");
+
+ if (ReqTy == C1->getType() || ReqTy == Type::Int1Ty)
+ if (Constant *FC = ConstantFoldBinaryInstruction(Opcode, C1, C2))
+ return FC; // Fold a few common cases...
+
+ std::vector<Constant*> argVec(1, C1); argVec.push_back(C2);
+ ExprMapKeyType Key(Opcode, argVec);
+ return ExprConstants->getOrCreate(ReqTy, Key);
+}
+
+Constant *ConstantExpr::getCompareTy(unsigned short predicate,
+ Constant *C1, Constant *C2) {
+ bool isVectorType = C1->getType()->getTypeID() == Type::VectorTyID;
+ switch (predicate) {
+ default: assert(0 && "Invalid CmpInst predicate");
+ case CmpInst::FCMP_FALSE: case CmpInst::FCMP_OEQ: case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLT: case CmpInst::FCMP_OLE:
+ case CmpInst::FCMP_ONE: case CmpInst::FCMP_ORD: case CmpInst::FCMP_UNO:
+ case CmpInst::FCMP_UEQ: case CmpInst::FCMP_UGT: case CmpInst::FCMP_UGE:
+ case CmpInst::FCMP_ULT: case CmpInst::FCMP_ULE: case CmpInst::FCMP_UNE:
+ case CmpInst::FCMP_TRUE:
+ return isVectorType ? getVFCmp(predicate, C1, C2)
+ : getFCmp(predicate, C1, C2);
+ case CmpInst::ICMP_EQ: case CmpInst::ICMP_NE: case CmpInst::ICMP_UGT:
+ case CmpInst::ICMP_UGE: case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE:
+ case CmpInst::ICMP_SGT: case CmpInst::ICMP_SGE: case CmpInst::ICMP_SLT:
+ case CmpInst::ICMP_SLE:
+ return isVectorType ? getVICmp(predicate, C1, C2)
+ : getICmp(predicate, C1, C2);
+ }
+}
+
+Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2) {
+#ifndef NDEBUG
+ switch (Opcode) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ assert((C1->getType()->isInteger() || C1->getType()->isFloatingPoint() ||
+ isa<VectorType>(C1->getType())) &&
+ "Tried to create an arithmetic operation on a non-arithmetic type!");
+ break;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ assert((C1->getType()->isInteger() || (isa<VectorType>(C1->getType()) &&
+ cast<VectorType>(C1->getType())->getElementType()->isInteger())) &&
+ "Tried to create an arithmetic operation on a non-arithmetic type!");
+ break;
+ case Instruction::FDiv:
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ assert((C1->getType()->isFloatingPoint() || (isa<VectorType>(C1->getType())
+ && cast<VectorType>(C1->getType())->getElementType()->isFloatingPoint()))
+ && "Tried to create an arithmetic operation on a non-arithmetic type!");
+ break;
+ case Instruction::URem:
+ case Instruction::SRem:
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ assert((C1->getType()->isInteger() || (isa<VectorType>(C1->getType()) &&
+ cast<VectorType>(C1->getType())->getElementType()->isInteger())) &&
+ "Tried to create an arithmetic operation on a non-arithmetic type!");
+ break;
+ case Instruction::FRem:
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ assert((C1->getType()->isFloatingPoint() || (isa<VectorType>(C1->getType())
+ && cast<VectorType>(C1->getType())->getElementType()->isFloatingPoint()))
+ && "Tried to create an arithmetic operation on a non-arithmetic type!");
+ break;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ assert((C1->getType()->isInteger() || isa<VectorType>(C1->getType())) &&
+ "Tried to create a logical operation on a non-integral type!");
+ break;
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ assert(C1->getType()->isIntOrIntVector() &&
+ "Tried to create a shift operation on a non-integer type!");
+ break;
+ default:
+ break;
+ }
+#endif
+
+ return getTy(C1->getType(), Opcode, C1, C2);
+}
+
+Constant *ConstantExpr::getCompare(unsigned short pred,
+ Constant *C1, Constant *C2) {
+ assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ return getCompareTy(pred, C1, C2);
+}
+
+Constant *ConstantExpr::getSelectTy(const Type *ReqTy, Constant *C,
+ Constant *V1, Constant *V2) {
+ assert(!SelectInst::areInvalidOperands(C, V1, V2)&&"Invalid select operands");
+
+ if (ReqTy == V1->getType())
+ if (Constant *SC = ConstantFoldSelectInstruction(C, V1, V2))
+ return SC; // Fold common cases
+
+ std::vector<Constant*> argVec(3, C);
+ argVec[1] = V1;
+ argVec[2] = V2;
+ ExprMapKeyType Key(Instruction::Select, argVec);
+ return ExprConstants->getOrCreate(ReqTy, Key);
+}
+
+Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C,
+ Value* const *Idxs,
+ unsigned NumIdx) {
+ assert(GetElementPtrInst::getIndexedType(C->getType(), Idxs,
+ Idxs+NumIdx) ==
+ cast<PointerType>(ReqTy)->getElementType() &&
+ "GEP indices invalid!");
+
+ if (Constant *FC = ConstantFoldGetElementPtr(C, (Constant**)Idxs, NumIdx))
+ return FC; // Fold a few common cases...
+
+ assert(isa<PointerType>(C->getType()) &&
+ "Non-pointer type for constant GetElementPtr expression");
+ // Look up the constant in the table first to ensure uniqueness
+ std::vector<Constant*> ArgVec;
+ ArgVec.reserve(NumIdx+1);
+ ArgVec.push_back(C);
+ for (unsigned i = 0; i != NumIdx; ++i)
+ ArgVec.push_back(cast<Constant>(Idxs[i]));
+ const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec);
+ return ExprConstants->getOrCreate(ReqTy, Key);
+}
+
+Constant *ConstantExpr::getGetElementPtr(Constant *C, Value* const *Idxs,
+ unsigned NumIdx) {
+ // Get the result type of the getelementptr!
+ const Type *Ty =
+ GetElementPtrInst::getIndexedType(C->getType(), Idxs, Idxs+NumIdx);
+ assert(Ty && "GEP indices invalid!");
+ unsigned As = cast<PointerType>(C->getType())->getAddressSpace();
+ return getGetElementPtrTy(PointerType::get(Ty, As), C, Idxs, NumIdx);
+}
+
+Constant *ConstantExpr::getGetElementPtr(Constant *C, Constant* const *Idxs,
+ unsigned NumIdx) {
+ return getGetElementPtr(C, (Value* const *)Idxs, NumIdx);
+}
+
+
+Constant *
+ConstantExpr::getICmp(unsigned short pred, Constant* LHS, Constant* RHS) {
+ assert(LHS->getType() == RHS->getType());
+ assert(pred >= ICmpInst::FIRST_ICMP_PREDICATE &&
+ pred <= ICmpInst::LAST_ICMP_PREDICATE && "Invalid ICmp Predicate");
+
+ if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS))
+ return FC; // Fold a few common cases...
+
+ // Look up the constant in the table first to ensure uniqueness
+ std::vector<Constant*> ArgVec;
+ ArgVec.push_back(LHS);
+ ArgVec.push_back(RHS);
+ // Get the key type with both the opcode and predicate
+ const ExprMapKeyType Key(Instruction::ICmp, ArgVec, pred);
+ return ExprConstants->getOrCreate(Type::Int1Ty, Key);
+}
+
+Constant *
+ConstantExpr::getFCmp(unsigned short pred, Constant* LHS, Constant* RHS) {
+ assert(LHS->getType() == RHS->getType());
+ assert(pred <= FCmpInst::LAST_FCMP_PREDICATE && "Invalid FCmp Predicate");
+
+ if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS))
+ return FC; // Fold a few common cases...
+
+ // Look up the constant in the table first to ensure uniqueness
+ std::vector<Constant*> ArgVec;
+ ArgVec.push_back(LHS);
+ ArgVec.push_back(RHS);
+ // Get the key type with both the opcode and predicate
+ const ExprMapKeyType Key(Instruction::FCmp, ArgVec, pred);
+ return ExprConstants->getOrCreate(Type::Int1Ty, Key);
+}
+
+Constant *
+ConstantExpr::getVICmp(unsigned short pred, Constant* LHS, Constant* RHS) {
+ assert(isa<VectorType>(LHS->getType()) && LHS->getType() == RHS->getType() &&
+ "Tried to create vicmp operation on non-vector type!");
+ assert(pred >= ICmpInst::FIRST_ICMP_PREDICATE &&
+ pred <= ICmpInst::LAST_ICMP_PREDICATE && "Invalid VICmp Predicate");
+
+ const VectorType *VTy = cast<VectorType>(LHS->getType());
+ const Type *EltTy = VTy->getElementType();
+ unsigned NumElts = VTy->getNumElements();
+
+ // See if we can fold the element-wise comparison of the LHS and RHS.
+ SmallVector<Constant *, 16> LHSElts, RHSElts;
+ LHS->getVectorElements(LHSElts);
+ RHS->getVectorElements(RHSElts);
+
+ if (!LHSElts.empty() && !RHSElts.empty()) {
+ SmallVector<Constant *, 16> Elts;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *FC = ConstantFoldCompareInstruction(pred, LHSElts[i],
+ RHSElts[i]);
+ if (ConstantInt *FCI = dyn_cast_or_null<ConstantInt>(FC)) {
+ if (FCI->getZExtValue())
+ Elts.push_back(ConstantInt::getAllOnesValue(EltTy));
+ else
+ Elts.push_back(ConstantInt::get(EltTy, 0ULL));
+ } else if (FC && isa<UndefValue>(FC)) {
+ Elts.push_back(UndefValue::get(EltTy));
+ } else {
+ break;
+ }
+ }
+ if (Elts.size() == NumElts)
+ return ConstantVector::get(&Elts[0], Elts.size());
+ }
+
+ // Look up the constant in the table first to ensure uniqueness
+ std::vector<Constant*> ArgVec;
+ ArgVec.push_back(LHS);
+ ArgVec.push_back(RHS);
+ // Get the key type with both the opcode and predicate
+ const ExprMapKeyType Key(Instruction::VICmp, ArgVec, pred);
+ return ExprConstants->getOrCreate(LHS->getType(), Key);
+}
+
+Constant *
+ConstantExpr::getVFCmp(unsigned short pred, Constant* LHS, Constant* RHS) {
+ assert(isa<VectorType>(LHS->getType()) &&
+ "Tried to create vfcmp operation on non-vector type!");
+ assert(LHS->getType() == RHS->getType());
+ assert(pred <= FCmpInst::LAST_FCMP_PREDICATE && "Invalid VFCmp Predicate");
+
+ const VectorType *VTy = cast<VectorType>(LHS->getType());
+ unsigned NumElts = VTy->getNumElements();
+ const Type *EltTy = VTy->getElementType();
+ const Type *REltTy = IntegerType::get(EltTy->getPrimitiveSizeInBits());
+ const Type *ResultTy = VectorType::get(REltTy, NumElts);
+
+ // See if we can fold the element-wise comparison of the LHS and RHS.
+ SmallVector<Constant *, 16> LHSElts, RHSElts;
+ LHS->getVectorElements(LHSElts);
+ RHS->getVectorElements(RHSElts);
+
+ if (!LHSElts.empty() && !RHSElts.empty()) {
+ SmallVector<Constant *, 16> Elts;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *FC = ConstantFoldCompareInstruction(pred, LHSElts[i],
+ RHSElts[i]);
+ if (ConstantInt *FCI = dyn_cast_or_null<ConstantInt>(FC)) {
+ if (FCI->getZExtValue())
+ Elts.push_back(ConstantInt::getAllOnesValue(REltTy));
+ else
+ Elts.push_back(ConstantInt::get(REltTy, 0ULL));
+ } else if (FC && isa<UndefValue>(FC)) {
+ Elts.push_back(UndefValue::get(REltTy));
+ } else {
+ break;
+ }
+ }
+ if (Elts.size() == NumElts)
+ return ConstantVector::get(&Elts[0], Elts.size());
+ }
+
+ // Look up the constant in the table first to ensure uniqueness
+ std::vector<Constant*> ArgVec;
+ ArgVec.push_back(LHS);
+ ArgVec.push_back(RHS);
+ // Get the key type with both the opcode and predicate
+ const ExprMapKeyType Key(Instruction::VFCmp, ArgVec, pred);
+ return ExprConstants->getOrCreate(ResultTy, Key);
+}
+
+Constant *ConstantExpr::getExtractElementTy(const Type *ReqTy, Constant *Val,
+ Constant *Idx) {
+ if (Constant *FC = ConstantFoldExtractElementInstruction(Val, Idx))
+ return FC; // Fold a few common cases...
+ // Look up the constant in the table first to ensure uniqueness
+ std::vector<Constant*> ArgVec(1, Val);
+ ArgVec.push_back(Idx);
+ const ExprMapKeyType Key(Instruction::ExtractElement,ArgVec);
+ return ExprConstants->getOrCreate(ReqTy, Key);
+}
+
+Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
+ assert(isa<VectorType>(Val->getType()) &&
+ "Tried to create extractelement operation on non-vector type!");
+ assert(Idx->getType() == Type::Int32Ty &&
+ "Extractelement index must be i32 type!");
+ return getExtractElementTy(cast<VectorType>(Val->getType())->getElementType(),
+ Val, Idx);
+}
+
+Constant *ConstantExpr::getInsertElementTy(const Type *ReqTy, Constant *Val,
+ Constant *Elt, Constant *Idx) {
+ if (Constant *FC = ConstantFoldInsertElementInstruction(Val, Elt, Idx))
+ return FC; // Fold a few common cases...
+ // Look up the constant in the table first to ensure uniqueness
+ std::vector<Constant*> ArgVec(1, Val);
+ ArgVec.push_back(Elt);
+ ArgVec.push_back(Idx);
+ const ExprMapKeyType Key(Instruction::InsertElement,ArgVec);
+ return ExprConstants->getOrCreate(ReqTy, Key);
+}
+
+Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt,
+ Constant *Idx) {
+ assert(isa<VectorType>(Val->getType()) &&
+ "Tried to create insertelement operation on non-vector type!");
+ assert(Elt->getType() == cast<VectorType>(Val->getType())->getElementType()
+ && "Insertelement types must match!");
+ assert(Idx->getType() == Type::Int32Ty &&
+ "Insertelement index must be i32 type!");
+ return getInsertElementTy(Val->getType(), Val, Elt, Idx);
+}
+
+Constant *ConstantExpr::getShuffleVectorTy(const Type *ReqTy, Constant *V1,
+ Constant *V2, Constant *Mask) {
+ if (Constant *FC = ConstantFoldShuffleVectorInstruction(V1, V2, Mask))
+ return FC; // Fold a few common cases...
+ // Look up the constant in the table first to ensure uniqueness
+ std::vector<Constant*> ArgVec(1, V1);
+ ArgVec.push_back(V2);
+ ArgVec.push_back(Mask);
+ const ExprMapKeyType Key(Instruction::ShuffleVector,ArgVec);
+ return ExprConstants->getOrCreate(ReqTy, Key);
+}
+
+Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2,
+ Constant *Mask) {
+ assert(ShuffleVectorInst::isValidOperands(V1, V2, Mask) &&
+ "Invalid shuffle vector constant expr operands!");
+
+ unsigned NElts = cast<VectorType>(Mask->getType())->getNumElements();
+ const Type *EltTy = cast<VectorType>(V1->getType())->getElementType();
+ const Type *ShufTy = VectorType::get(EltTy, NElts);
+ return getShuffleVectorTy(ShufTy, V1, V2, Mask);
+}
+
+Constant *ConstantExpr::getInsertValueTy(const Type *ReqTy, Constant *Agg,
+ Constant *Val,
+ const unsigned *Idxs, unsigned NumIdx) {
+ assert(ExtractValueInst::getIndexedType(Agg->getType(), Idxs,
+ Idxs+NumIdx) == Val->getType() &&
+ "insertvalue indices invalid!");
+ assert(Agg->getType() == ReqTy &&
+ "insertvalue type invalid!");
+ assert(Agg->getType()->isFirstClassType() &&
+ "Non-first-class type for constant InsertValue expression");
+ Constant *FC = ConstantFoldInsertValueInstruction(Agg, Val, Idxs, NumIdx);
+ assert(FC && "InsertValue constant expr couldn't be folded!");
+ return FC;
+}
+
+Constant *ConstantExpr::getInsertValue(Constant *Agg, Constant *Val,
+ const unsigned *IdxList, unsigned NumIdx) {
+ assert(Agg->getType()->isFirstClassType() &&
+ "Tried to create insertelement operation on non-first-class type!");
+
+ const Type *ReqTy = Agg->getType();
+#ifndef NDEBUG
+ const Type *ValTy =
+ ExtractValueInst::getIndexedType(Agg->getType(), IdxList, IdxList+NumIdx);
+#endif
+ assert(ValTy == Val->getType() && "insertvalue indices invalid!");
+ return getInsertValueTy(ReqTy, Agg, Val, IdxList, NumIdx);
+}
+
+Constant *ConstantExpr::getExtractValueTy(const Type *ReqTy, Constant *Agg,
+ const unsigned *Idxs, unsigned NumIdx) {
+ assert(ExtractValueInst::getIndexedType(Agg->getType(), Idxs,
+ Idxs+NumIdx) == ReqTy &&
+ "extractvalue indices invalid!");
+ assert(Agg->getType()->isFirstClassType() &&
+ "Non-first-class type for constant extractvalue expression");
+ Constant *FC = ConstantFoldExtractValueInstruction(Agg, Idxs, NumIdx);
+ assert(FC && "ExtractValue constant expr couldn't be folded!");
+ return FC;
+}
+
+Constant *ConstantExpr::getExtractValue(Constant *Agg,
+ const unsigned *IdxList, unsigned NumIdx) {
+ assert(Agg->getType()->isFirstClassType() &&
+ "Tried to create extractelement operation on non-first-class type!");
+
+ const Type *ReqTy =
+ ExtractValueInst::getIndexedType(Agg->getType(), IdxList, IdxList+NumIdx);
+ assert(ReqTy && "extractvalue indices invalid!");
+ return getExtractValueTy(ReqTy, Agg, IdxList, NumIdx);
+}
+
+Constant *ConstantExpr::getZeroValueForNegationExpr(const Type *Ty) {
+ if (const VectorType *PTy = dyn_cast<VectorType>(Ty))
+ if (PTy->getElementType()->isFloatingPoint()) {
+ std::vector<Constant*> zeros(PTy->getNumElements(),
+ ConstantFP::getNegativeZero(PTy->getElementType()));
+ return ConstantVector::get(PTy, zeros);
+ }
+
+ if (Ty->isFloatingPoint())
+ return ConstantFP::getNegativeZero(Ty);
+
+ return Constant::getNullValue(Ty);
+}
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantExpr::destroyConstant() {
+ ExprConstants->remove(this);
+ destroyConstantImpl();
+}
+
+const char *ConstantExpr::getOpcodeName() const {
+ return Instruction::getOpcodeName(getOpcode());
+}
+
+//===----------------------------------------------------------------------===//
+// replaceUsesOfWithOnConstant implementations
+
+/// replaceUsesOfWithOnConstant - Update this constant array to change uses of
+/// 'From' to be uses of 'To'. This must update the uniquing data structures
+/// etc.
+///
+/// Note that we intentionally replace all uses of From with To here. Consider
+/// a large array that uses 'From' 1000 times. By handling this case all here,
+/// ConstantArray::replaceUsesOfWithOnConstant is only invoked once, and that
+/// single invocation handles all 1000 uses. Handling them one at a time would
+/// work, but would be really slow because it would have to unique each updated
+/// array instance.
+void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
+ Use *U) {
+ assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
+ Constant *ToC = cast<Constant>(To);
+
+ std::pair<ArrayConstantsTy::MapKey, Constant*> Lookup;
+ Lookup.first.first = getType();
+ Lookup.second = this;
+
+ std::vector<Constant*> &Values = Lookup.first.second;
+ Values.reserve(getNumOperands()); // Build replacement array.
+
+ // Fill values with the modified operands of the constant array. Also,
+ // compute whether this turns into an all-zeros array.
+ bool isAllZeros = false;
+ unsigned NumUpdated = 0;
+ if (!ToC->isNullValue()) {
+ for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
+ Constant *Val = cast<Constant>(O->get());
+ if (Val == From) {
+ Val = ToC;
+ ++NumUpdated;
+ }
+ Values.push_back(Val);
+ }
+ } else {
+ isAllZeros = true;
+ for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
+ Constant *Val = cast<Constant>(O->get());
+ if (Val == From) {
+ Val = ToC;
+ ++NumUpdated;
+ }
+ Values.push_back(Val);
+ if (isAllZeros) isAllZeros = Val->isNullValue();
+ }
+ }
+
+ Constant *Replacement = 0;
+ if (isAllZeros) {
+ Replacement = ConstantAggregateZero::get(getType());
+ } else {
+ // Check to see if we have this array type already.
+ bool Exists;
+ ArrayConstantsTy::MapTy::iterator I =
+ ArrayConstants->InsertOrGetItem(Lookup, Exists);
+
+ if (Exists) {
+ Replacement = I->second;
+ } else {
+ // Okay, the new shape doesn't exist in the system yet. Instead of
+ // creating a new constant array, inserting it, replaceallusesof'ing the
+ // old with the new, then deleting the old... just update the current one
+ // in place!
+ ArrayConstants->MoveConstantToNewSlot(this, I);
+
+ // Update to the new value. Optimize for the case when we have a single
+ // operand that we're changing, but handle bulk updates efficiently.
+ if (NumUpdated == 1) {
+ unsigned OperandToUpdate = U-OperandList;
+ assert(getOperand(OperandToUpdate) == From &&
+ "ReplaceAllUsesWith broken!");
+ setOperand(OperandToUpdate, ToC);
+ } else {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (getOperand(i) == From)
+ setOperand(i, ToC);
+ }
+ return;
+ }
+ }
+
+ // Otherwise, I do need to replace this with an existing value.
+ assert(Replacement != this && "I didn't contain From!");
+
+ // Everyone using this now uses the replacement.
+ uncheckedReplaceAllUsesWith(Replacement);
+
+ // Delete the old constant!
+ destroyConstant();
+}
+
+void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
+ Use *U) {
+ assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
+ Constant *ToC = cast<Constant>(To);
+
+ unsigned OperandToUpdate = U-OperandList;
+ assert(getOperand(OperandToUpdate) == From && "ReplaceAllUsesWith broken!");
+
+ std::pair<StructConstantsTy::MapKey, Constant*> Lookup;
+ Lookup.first.first = getType();
+ Lookup.second = this;
+ std::vector<Constant*> &Values = Lookup.first.second;
+ Values.reserve(getNumOperands()); // Build replacement struct.
+
+
+ // Fill values with the modified operands of the constant struct. Also,
+ // compute whether this turns into an all-zeros struct.
+ bool isAllZeros = false;
+ if (!ToC->isNullValue()) {
+ for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O)
+ Values.push_back(cast<Constant>(O->get()));
+ } else {
+ isAllZeros = true;
+ for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
+ Constant *Val = cast<Constant>(O->get());
+ Values.push_back(Val);
+ if (isAllZeros) isAllZeros = Val->isNullValue();
+ }
+ }
+ Values[OperandToUpdate] = ToC;
+
+ Constant *Replacement = 0;
+ if (isAllZeros) {
+ Replacement = ConstantAggregateZero::get(getType());
+ } else {
+ // Check to see if we have this array type already.
+ bool Exists;
+ StructConstantsTy::MapTy::iterator I =
+ StructConstants->InsertOrGetItem(Lookup, Exists);
+
+ if (Exists) {
+ Replacement = I->second;
+ } else {
+ // Okay, the new shape doesn't exist in the system yet. Instead of
+ // creating a new constant struct, inserting it, replaceallusesof'ing the
+ // old with the new, then deleting the old... just update the current one
+ // in place!
+ StructConstants->MoveConstantToNewSlot(this, I);
+
+ // Update to the new value.
+ setOperand(OperandToUpdate, ToC);
+ return;
+ }
+ }
+
+ assert(Replacement != this && "I didn't contain From!");
+
+ // Everyone using this now uses the replacement.
+ uncheckedReplaceAllUsesWith(Replacement);
+
+ // Delete the old constant!
+ destroyConstant();
+}
+
+void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
+ Use *U) {
+ assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
+
+ std::vector<Constant*> Values;
+ Values.reserve(getNumOperands()); // Build replacement array...
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ Constant *Val = getOperand(i);
+ if (Val == From) Val = cast<Constant>(To);
+ Values.push_back(Val);
+ }
+
+ Constant *Replacement = ConstantVector::get(getType(), Values);
+ assert(Replacement != this && "I didn't contain From!");
+
+ // Everyone using this now uses the replacement.
+ uncheckedReplaceAllUsesWith(Replacement);
+
+ // Delete the old constant!
+ destroyConstant();
+}
+
+void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
+ Use *U) {
+ assert(isa<Constant>(ToV) && "Cannot make Constant refer to non-constant!");
+ Constant *To = cast<Constant>(ToV);
+
+ Constant *Replacement = 0;
+ if (getOpcode() == Instruction::GetElementPtr) {
+ SmallVector<Constant*, 8> Indices;
+ Constant *Pointer = getOperand(0);
+ Indices.reserve(getNumOperands()-1);
+ if (Pointer == From) Pointer = To;
+
+ for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+ Constant *Val = getOperand(i);
+ if (Val == From) Val = To;
+ Indices.push_back(Val);
+ }
+ Replacement = ConstantExpr::getGetElementPtr(Pointer,
+ &Indices[0], Indices.size());
+ } else if (getOpcode() == Instruction::ExtractValue) {
+ Constant *Agg = getOperand(0);
+ if (Agg == From) Agg = To;
+
+ const SmallVector<unsigned, 4> &Indices = getIndices();
+ Replacement = ConstantExpr::getExtractValue(Agg,
+ &Indices[0], Indices.size());
+ } else if (getOpcode() == Instruction::InsertValue) {
+ Constant *Agg = getOperand(0);
+ Constant *Val = getOperand(1);
+ if (Agg == From) Agg = To;
+ if (Val == From) Val = To;
+
+ const SmallVector<unsigned, 4> &Indices = getIndices();
+ Replacement = ConstantExpr::getInsertValue(Agg, Val,
+ &Indices[0], Indices.size());
+ } else if (isCast()) {
+ assert(getOperand(0) == From && "Cast only has one use!");
+ Replacement = ConstantExpr::getCast(getOpcode(), To, getType());
+ } else if (getOpcode() == Instruction::Select) {
+ Constant *C1 = getOperand(0);
+ Constant *C2 = getOperand(1);
+ Constant *C3 = getOperand(2);
+ if (C1 == From) C1 = To;
+ if (C2 == From) C2 = To;
+ if (C3 == From) C3 = To;
+ Replacement = ConstantExpr::getSelect(C1, C2, C3);
+ } else if (getOpcode() == Instruction::ExtractElement) {
+ Constant *C1 = getOperand(0);
+ Constant *C2 = getOperand(1);
+ if (C1 == From) C1 = To;
+ if (C2 == From) C2 = To;
+ Replacement = ConstantExpr::getExtractElement(C1, C2);
+ } else if (getOpcode() == Instruction::InsertElement) {
+ Constant *C1 = getOperand(0);
+ Constant *C2 = getOperand(1);
+ Constant *C3 = getOperand(1);
+ if (C1 == From) C1 = To;
+ if (C2 == From) C2 = To;
+ if (C3 == From) C3 = To;
+ Replacement = ConstantExpr::getInsertElement(C1, C2, C3);
+ } else if (getOpcode() == Instruction::ShuffleVector) {
+ Constant *C1 = getOperand(0);
+ Constant *C2 = getOperand(1);
+ Constant *C3 = getOperand(2);
+ if (C1 == From) C1 = To;
+ if (C2 == From) C2 = To;
+ if (C3 == From) C3 = To;
+ Replacement = ConstantExpr::getShuffleVector(C1, C2, C3);
+ } else if (isCompare()) {
+ Constant *C1 = getOperand(0);
+ Constant *C2 = getOperand(1);
+ if (C1 == From) C1 = To;
+ if (C2 == From) C2 = To;
+ if (getOpcode() == Instruction::ICmp)
+ Replacement = ConstantExpr::getICmp(getPredicate(), C1, C2);
+ else if (getOpcode() == Instruction::FCmp)
+ Replacement = ConstantExpr::getFCmp(getPredicate(), C1, C2);
+ else if (getOpcode() == Instruction::VICmp)
+ Replacement = ConstantExpr::getVICmp(getPredicate(), C1, C2);
+ else {
+ assert(getOpcode() == Instruction::VFCmp);
+ Replacement = ConstantExpr::getVFCmp(getPredicate(), C1, C2);
+ }
+ } else if (getNumOperands() == 2) {
+ Constant *C1 = getOperand(0);
+ Constant *C2 = getOperand(1);
+ if (C1 == From) C1 = To;
+ if (C2 == From) C2 = To;
+ Replacement = ConstantExpr::get(getOpcode(), C1, C2);
+ } else {
+ assert(0 && "Unknown ConstantExpr type!");
+ return;
+ }
+
+ assert(Replacement != this && "I didn't contain From!");
+
+ // Everyone using this now uses the replacement.
+ uncheckedReplaceAllUsesWith(Replacement);
+
+ // Delete the old constant!
+ destroyConstant();
+}
+
+void MDNode::replaceElement(Value *From, Value *To) {
+ SmallVector<Value*, 4> Values;
+ Values.reserve(getNumElements()); // Build replacement array...
+ for (unsigned i = 0, e = getNumElements(); i != e; ++i) {
+ Value *Val = getElement(i);
+ if (Val == From) Val = To;
+ Values.push_back(Val);
+ }
+
+ MDNode *Replacement = MDNode::get(&Values[0], Values.size());
+ assert(Replacement != this && "I didn't contain From!");
+
+ uncheckedReplaceAllUsesWith(Replacement);
+
+ destroyConstant();
+}
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
new file mode 100644
index 0000000..f85dbe7
--- /dev/null
+++ b/lib/VMCore/Core.cpp
@@ -0,0 +1,1450 @@
+//===-- Core.cpp ----------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the C bindings for libLLVMCore.a, which implements
+// the LLVM intermediate representation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Core.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/ModuleProvider.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/CallSite.h"
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+
+using namespace llvm;
+
+
+/*===-- Error handling ----------------------------------------------------===*/
+
+void LLVMDisposeMessage(char *Message) {
+ free(Message);
+}
+
+
+/*===-- Operations on modules ---------------------------------------------===*/
+
+LLVMModuleRef LLVMModuleCreateWithName(const char *ModuleID) {
+ return wrap(new Module(ModuleID));
+}
+
+void LLVMDisposeModule(LLVMModuleRef M) {
+ delete unwrap(M);
+}
+
+/*--.. Data layout .........................................................--*/
+const char * LLVMGetDataLayout(LLVMModuleRef M) {
+ return unwrap(M)->getDataLayout().c_str();
+}
+
+void LLVMSetDataLayout(LLVMModuleRef M, const char *Triple) {
+ unwrap(M)->setDataLayout(Triple);
+}
+
+/*--.. Target triple .......................................................--*/
+const char * LLVMGetTarget(LLVMModuleRef M) {
+ return unwrap(M)->getTargetTriple().c_str();
+}
+
+void LLVMSetTarget(LLVMModuleRef M, const char *Triple) {
+ unwrap(M)->setTargetTriple(Triple);
+}
+
+/*--.. Type names ..........................................................--*/
+int LLVMAddTypeName(LLVMModuleRef M, const char *Name, LLVMTypeRef Ty) {
+ return unwrap(M)->addTypeName(Name, unwrap(Ty));
+}
+
+void LLVMDeleteTypeName(LLVMModuleRef M, const char *Name) {
+ std::string N(Name);
+
+ TypeSymbolTable &TST = unwrap(M)->getTypeSymbolTable();
+ for (TypeSymbolTable::iterator I = TST.begin(), E = TST.end(); I != E; ++I)
+ if (I->first == N)
+ TST.remove(I);
+}
+
+void LLVMDumpModule(LLVMModuleRef M) {
+ unwrap(M)->dump();
+}
+
+
+/*===-- Operations on types -----------------------------------------------===*/
+
+/*--.. Operations on all types (mostly) ....................................--*/
+
+LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
+ return static_cast<LLVMTypeKind>(unwrap(Ty)->getTypeID());
+}
+
+/*--.. Operations on integer types .........................................--*/
+
+LLVMTypeRef LLVMInt1Type(void) { return (LLVMTypeRef) Type::Int1Ty; }
+LLVMTypeRef LLVMInt8Type(void) { return (LLVMTypeRef) Type::Int8Ty; }
+LLVMTypeRef LLVMInt16Type(void) { return (LLVMTypeRef) Type::Int16Ty; }
+LLVMTypeRef LLVMInt32Type(void) { return (LLVMTypeRef) Type::Int32Ty; }
+LLVMTypeRef LLVMInt64Type(void) { return (LLVMTypeRef) Type::Int64Ty; }
+
+LLVMTypeRef LLVMIntType(unsigned NumBits) {
+ return wrap(IntegerType::get(NumBits));
+}
+
+unsigned LLVMGetIntTypeWidth(LLVMTypeRef IntegerTy) {
+ return unwrap<IntegerType>(IntegerTy)->getBitWidth();
+}
+
+/*--.. Operations on real types ............................................--*/
+
+LLVMTypeRef LLVMFloatType(void) { return (LLVMTypeRef) Type::FloatTy; }
+LLVMTypeRef LLVMDoubleType(void) { return (LLVMTypeRef) Type::DoubleTy; }
+LLVMTypeRef LLVMX86FP80Type(void) { return (LLVMTypeRef) Type::X86_FP80Ty; }
+LLVMTypeRef LLVMFP128Type(void) { return (LLVMTypeRef) Type::FP128Ty; }
+LLVMTypeRef LLVMPPCFP128Type(void) { return (LLVMTypeRef) Type::PPC_FP128Ty; }
+
+/*--.. Operations on function types ........................................--*/
+
+LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType,
+ LLVMTypeRef *ParamTypes, unsigned ParamCount,
+ int IsVarArg) {
+ std::vector<const Type*> Tys;
+ for (LLVMTypeRef *I = ParamTypes, *E = ParamTypes + ParamCount; I != E; ++I)
+ Tys.push_back(unwrap(*I));
+
+ return wrap(FunctionType::get(unwrap(ReturnType), Tys, IsVarArg != 0));
+}
+
+int LLVMIsFunctionVarArg(LLVMTypeRef FunctionTy) {
+ return unwrap<FunctionType>(FunctionTy)->isVarArg();
+}
+
+LLVMTypeRef LLVMGetReturnType(LLVMTypeRef FunctionTy) {
+ return wrap(unwrap<FunctionType>(FunctionTy)->getReturnType());
+}
+
+unsigned LLVMCountParamTypes(LLVMTypeRef FunctionTy) {
+ return unwrap<FunctionType>(FunctionTy)->getNumParams();
+}
+
+void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest) {
+ FunctionType *Ty = unwrap<FunctionType>(FunctionTy);
+ for (FunctionType::param_iterator I = Ty->param_begin(),
+ E = Ty->param_end(); I != E; ++I)
+ *Dest++ = wrap(*I);
+}
+
+/*--.. Operations on struct types ..........................................--*/
+
+LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes,
+ unsigned ElementCount, int Packed) {
+ std::vector<const Type*> Tys;
+ for (LLVMTypeRef *I = ElementTypes,
+ *E = ElementTypes + ElementCount; I != E; ++I)
+ Tys.push_back(unwrap(*I));
+
+ return wrap(StructType::get(Tys, Packed != 0));
+}
+
+unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy) {
+ return unwrap<StructType>(StructTy)->getNumElements();
+}
+
+void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest) {
+ StructType *Ty = unwrap<StructType>(StructTy);
+ for (FunctionType::param_iterator I = Ty->element_begin(),
+ E = Ty->element_end(); I != E; ++I)
+ *Dest++ = wrap(*I);
+}
+
+int LLVMIsPackedStruct(LLVMTypeRef StructTy) {
+ return unwrap<StructType>(StructTy)->isPacked();
+}
+
+/*--.. Operations on array, pointer, and vector types (sequence types) .....--*/
+
+LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) {
+ return wrap(ArrayType::get(unwrap(ElementType), ElementCount));
+}
+
+LLVMTypeRef LLVMPointerType(LLVMTypeRef ElementType, unsigned AddressSpace) {
+ return wrap(PointerType::get(unwrap(ElementType), AddressSpace));
+}
+
+LLVMTypeRef LLVMVectorType(LLVMTypeRef ElementType, unsigned ElementCount) {
+ return wrap(VectorType::get(unwrap(ElementType), ElementCount));
+}
+
+LLVMTypeRef LLVMGetElementType(LLVMTypeRef Ty) {
+ return wrap(unwrap<SequentialType>(Ty)->getElementType());
+}
+
+unsigned LLVMGetArrayLength(LLVMTypeRef ArrayTy) {
+ return unwrap<ArrayType>(ArrayTy)->getNumElements();
+}
+
+unsigned LLVMGetPointerAddressSpace(LLVMTypeRef PointerTy) {
+ return unwrap<PointerType>(PointerTy)->getAddressSpace();
+}
+
+unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy) {
+ return unwrap<VectorType>(VectorTy)->getNumElements();
+}
+
+/*--.. Operations on other types ...........................................--*/
+
+LLVMTypeRef LLVMVoidType(void) { return (LLVMTypeRef) Type::VoidTy; }
+LLVMTypeRef LLVMLabelType(void) { return (LLVMTypeRef) Type::LabelTy; }
+
+LLVMTypeRef LLVMOpaqueType(void) {
+ return wrap(llvm::OpaqueType::get());
+}
+
+/*--.. Operations on type handles ..........................................--*/
+
+LLVMTypeHandleRef LLVMCreateTypeHandle(LLVMTypeRef PotentiallyAbstractTy) {
+ return wrap(new PATypeHolder(unwrap(PotentiallyAbstractTy)));
+}
+
+void LLVMDisposeTypeHandle(LLVMTypeHandleRef TypeHandle) {
+ delete unwrap(TypeHandle);
+}
+
+LLVMTypeRef LLVMResolveTypeHandle(LLVMTypeHandleRef TypeHandle) {
+ return wrap(unwrap(TypeHandle)->get());
+}
+
+void LLVMRefineType(LLVMTypeRef AbstractTy, LLVMTypeRef ConcreteTy) {
+ unwrap<DerivedType>(AbstractTy)->refineAbstractTypeTo(unwrap(ConcreteTy));
+}
+
+
+/*===-- Operations on values ----------------------------------------------===*/
+
+/*--.. Operations on all values ............................................--*/
+
+LLVMTypeRef LLVMTypeOf(LLVMValueRef Val) {
+ return wrap(unwrap(Val)->getType());
+}
+
+const char *LLVMGetValueName(LLVMValueRef Val) {
+ return unwrap(Val)->getNameStart();
+}
+
+void LLVMSetValueName(LLVMValueRef Val, const char *Name) {
+ unwrap(Val)->setName(Name);
+}
+
+void LLVMDumpValue(LLVMValueRef Val) {
+ unwrap(Val)->dump();
+}
+
+
+/*--.. Conversion functions ................................................--*/
+
+#define LLVM_DEFINE_VALUE_CAST(name) \
+ LLVMValueRef LLVMIsA##name(LLVMValueRef Val) { \
+ return wrap(static_cast<Value*>(dyn_cast_or_null<name>(unwrap(Val)))); \
+ }
+
+LLVM_FOR_EACH_VALUE_SUBCLASS(LLVM_DEFINE_VALUE_CAST)
+
+
+/*--.. Operations on constants of any type .................................--*/
+
+LLVMValueRef LLVMConstNull(LLVMTypeRef Ty) {
+ return wrap(Constant::getNullValue(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMConstAllOnes(LLVMTypeRef Ty) {
+ return wrap(Constant::getAllOnesValue(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMGetUndef(LLVMTypeRef Ty) {
+ return wrap(UndefValue::get(unwrap(Ty)));
+}
+
+int LLVMIsConstant(LLVMValueRef Ty) {
+ return isa<Constant>(unwrap(Ty));
+}
+
+int LLVMIsNull(LLVMValueRef Val) {
+ if (Constant *C = dyn_cast<Constant>(unwrap(Val)))
+ return C->isNullValue();
+ return false;
+}
+
+int LLVMIsUndef(LLVMValueRef Val) {
+ return isa<UndefValue>(unwrap(Val));
+}
+
+/*--.. Operations on scalar constants ......................................--*/
+
+LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
+ int SignExtend) {
+ return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), N, SignExtend != 0));
+}
+
+static const fltSemantics &SemanticsForType(Type *Ty) {
+ assert(Ty->isFloatingPoint() && "Type is not floating point!");
+ if (Ty == Type::FloatTy)
+ return APFloat::IEEEsingle;
+ if (Ty == Type::DoubleTy)
+ return APFloat::IEEEdouble;
+ if (Ty == Type::X86_FP80Ty)
+ return APFloat::x87DoubleExtended;
+ if (Ty == Type::FP128Ty)
+ return APFloat::IEEEquad;
+ if (Ty == Type::PPC_FP128Ty)
+ return APFloat::PPCDoubleDouble;
+ return APFloat::Bogus;
+}
+
+LLVMValueRef LLVMConstReal(LLVMTypeRef RealTy, double N) {
+ APFloat APN(N);
+ bool ignored;
+ APN.convert(SemanticsForType(unwrap(RealTy)), APFloat::rmNearestTiesToEven,
+ &ignored);
+ return wrap(ConstantFP::get(APN));
+}
+
+LLVMValueRef LLVMConstRealOfString(LLVMTypeRef RealTy, const char *Text) {
+ return wrap(ConstantFP::get(APFloat(SemanticsForType(unwrap(RealTy)), Text)));
+}
+
+/*--.. Operations on composite constants ...................................--*/
+
+LLVMValueRef LLVMConstString(const char *Str, unsigned Length,
+ int DontNullTerminate) {
+ /* Inverted the sense of AddNull because ', 0)' is a
+ better mnemonic for null termination than ', 1)'. */
+ return wrap(ConstantArray::get(std::string(Str, Length),
+ DontNullTerminate == 0));
+}
+
+LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
+ LLVMValueRef *ConstantVals, unsigned Length) {
+ return wrap(ConstantArray::get(ArrayType::get(unwrap(ElementTy), Length),
+ unwrap<Constant>(ConstantVals, Length),
+ Length));
+}
+
+LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
+ int Packed) {
+ return wrap(ConstantStruct::get(unwrap<Constant>(ConstantVals, Count),
+ Count, Packed != 0));
+}
+
+LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) {
+ return wrap(ConstantVector::get(unwrap<Constant>(ScalarConstantVals, Size),
+ Size));
+}
+
+/*--.. Constant expressions ................................................--*/
+
+LLVMValueRef LLVMAlignOf(LLVMTypeRef Ty) {
+ return wrap(ConstantExpr::getAlignOf(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMSizeOf(LLVMTypeRef Ty) {
+ return wrap(ConstantExpr::getSizeOf(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMConstNeg(LLVMValueRef ConstantVal) {
+ return wrap(ConstantExpr::getNeg(unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstNot(LLVMValueRef ConstantVal) {
+ return wrap(ConstantExpr::getNot(unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getAdd(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getSub(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getMul(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getUDiv(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getSDiv(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getFDiv(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstURem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getURem(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstSRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getSRem(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getFRem(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstAnd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getAnd(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstOr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getOr(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstXor(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getXor(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstICmp(LLVMIntPredicate Predicate,
+ LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getICmp(Predicate,
+ unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFCmp(LLVMRealPredicate Predicate,
+ LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getFCmp(Predicate,
+ unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstShl(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getShl(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstLShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getLShr(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstAShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getAShr(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal,
+ LLVMValueRef *ConstantIndices, unsigned NumIndices) {
+ return wrap(ConstantExpr::getGetElementPtr(unwrap<Constant>(ConstantVal),
+ unwrap<Constant>(ConstantIndices,
+ NumIndices),
+ NumIndices));
+}
+
+LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getTrunc(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getSExt(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstZExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getZExt(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getFPTrunc(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getFPExtend(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstUIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getUIToFP(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getSIToFP(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPToUI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getFPToUI(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPToSI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getFPToSI(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstPtrToInt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getPtrToInt(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstIntToPtr(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getIntToPtr(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getBitCast(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSelect(LLVMValueRef ConstantCondition,
+ LLVMValueRef ConstantIfTrue,
+ LLVMValueRef ConstantIfFalse) {
+ return wrap(ConstantExpr::getSelect(unwrap<Constant>(ConstantCondition),
+ unwrap<Constant>(ConstantIfTrue),
+ unwrap<Constant>(ConstantIfFalse)));
+}
+
+LLVMValueRef LLVMConstExtractElement(LLVMValueRef VectorConstant,
+ LLVMValueRef IndexConstant) {
+ return wrap(ConstantExpr::getExtractElement(unwrap<Constant>(VectorConstant),
+ unwrap<Constant>(IndexConstant)));
+}
+
+LLVMValueRef LLVMConstInsertElement(LLVMValueRef VectorConstant,
+ LLVMValueRef ElementValueConstant,
+ LLVMValueRef IndexConstant) {
+ return wrap(ConstantExpr::getInsertElement(unwrap<Constant>(VectorConstant),
+ unwrap<Constant>(ElementValueConstant),
+ unwrap<Constant>(IndexConstant)));
+}
+
+LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant,
+ LLVMValueRef VectorBConstant,
+ LLVMValueRef MaskConstant) {
+ return wrap(ConstantExpr::getShuffleVector(unwrap<Constant>(VectorAConstant),
+ unwrap<Constant>(VectorBConstant),
+ unwrap<Constant>(MaskConstant)));
+}
+
+LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList,
+ unsigned NumIdx) {
+ return wrap(ConstantExpr::getExtractValue(unwrap<Constant>(AggConstant),
+ IdxList, NumIdx));
+}
+
+LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant,
+ LLVMValueRef ElementValueConstant,
+ unsigned *IdxList, unsigned NumIdx) {
+ return wrap(ConstantExpr::getInsertValue(unwrap<Constant>(AggConstant),
+ unwrap<Constant>(ElementValueConstant),
+ IdxList, NumIdx));
+}
+
+LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString,
+ const char *Constraints, int HasSideEffects) {
+ return wrap(InlineAsm::get(dyn_cast<FunctionType>(unwrap(Ty)), AsmString,
+ Constraints, HasSideEffects));
+}
+
+/*--.. Operations on global variables, functions, and aliases (globals) ....--*/
+
+LLVMModuleRef LLVMGetGlobalParent(LLVMValueRef Global) {
+ return wrap(unwrap<GlobalValue>(Global)->getParent());
+}
+
+int LLVMIsDeclaration(LLVMValueRef Global) {
+ return unwrap<GlobalValue>(Global)->isDeclaration();
+}
+
+LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) {
+ return static_cast<LLVMLinkage>(unwrap<GlobalValue>(Global)->getLinkage());
+}
+
+void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) {
+ unwrap<GlobalValue>(Global)
+ ->setLinkage(static_cast<GlobalValue::LinkageTypes>(Linkage));
+}
+
+const char *LLVMGetSection(LLVMValueRef Global) {
+ return unwrap<GlobalValue>(Global)->getSection().c_str();
+}
+
+void LLVMSetSection(LLVMValueRef Global, const char *Section) {
+ unwrap<GlobalValue>(Global)->setSection(Section);
+}
+
+LLVMVisibility LLVMGetVisibility(LLVMValueRef Global) {
+ return static_cast<LLVMVisibility>(
+ unwrap<GlobalValue>(Global)->getVisibility());
+}
+
+void LLVMSetVisibility(LLVMValueRef Global, LLVMVisibility Viz) {
+ unwrap<GlobalValue>(Global)
+ ->setVisibility(static_cast<GlobalValue::VisibilityTypes>(Viz));
+}
+
+unsigned LLVMGetAlignment(LLVMValueRef Global) {
+ return unwrap<GlobalValue>(Global)->getAlignment();
+}
+
+void LLVMSetAlignment(LLVMValueRef Global, unsigned Bytes) {
+ unwrap<GlobalValue>(Global)->setAlignment(Bytes);
+}
+
+/*--.. Operations on global variables ......................................--*/
+
+LLVMValueRef LLVMAddGlobal(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name) {
+ return wrap(new GlobalVariable(unwrap(Ty), false,
+ GlobalValue::ExternalLinkage, 0, Name,
+ unwrap(M)));
+}
+
+LLVMValueRef LLVMGetNamedGlobal(LLVMModuleRef M, const char *Name) {
+ return wrap(unwrap(M)->getNamedGlobal(Name));
+}
+
+LLVMValueRef LLVMGetFirstGlobal(LLVMModuleRef M) {
+ Module *Mod = unwrap(M);
+ Module::global_iterator I = Mod->global_begin();
+ if (I == Mod->global_end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastGlobal(LLVMModuleRef M) {
+ Module *Mod = unwrap(M);
+ Module::global_iterator I = Mod->global_end();
+ if (I == Mod->global_begin())
+ return 0;
+ return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextGlobal(LLVMValueRef GlobalVar) {
+ GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
+ Module::global_iterator I = GV;
+ if (++I == GV->getParent()->global_end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousGlobal(LLVMValueRef GlobalVar) {
+ GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
+ Module::global_iterator I = GV;
+ if (I == GV->getParent()->global_begin())
+ return 0;
+ return wrap(--I);
+}
+
+void LLVMDeleteGlobal(LLVMValueRef GlobalVar) {
+ unwrap<GlobalVariable>(GlobalVar)->eraseFromParent();
+}
+
+LLVMValueRef LLVMGetInitializer(LLVMValueRef GlobalVar) {
+ return wrap(unwrap<GlobalVariable>(GlobalVar)->getInitializer());
+}
+
+void LLVMSetInitializer(LLVMValueRef GlobalVar, LLVMValueRef ConstantVal) {
+ unwrap<GlobalVariable>(GlobalVar)
+ ->setInitializer(unwrap<Constant>(ConstantVal));
+}
+
+int LLVMIsThreadLocal(LLVMValueRef GlobalVar) {
+ return unwrap<GlobalVariable>(GlobalVar)->isThreadLocal();
+}
+
+void LLVMSetThreadLocal(LLVMValueRef GlobalVar, int IsThreadLocal) {
+ unwrap<GlobalVariable>(GlobalVar)->setThreadLocal(IsThreadLocal != 0);
+}
+
+int LLVMIsGlobalConstant(LLVMValueRef GlobalVar) {
+ return unwrap<GlobalVariable>(GlobalVar)->isConstant();
+}
+
+void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, int IsConstant) {
+ unwrap<GlobalVariable>(GlobalVar)->setConstant(IsConstant != 0);
+}
+
+/*--.. Operations on aliases ......................................--*/
+
+LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
+ const char *Name) {
+ return wrap(new GlobalAlias(unwrap(Ty), GlobalValue::ExternalLinkage, Name,
+ unwrap<Constant>(Aliasee), unwrap (M)));
+}
+
+/*--.. Operations on functions .............................................--*/
+
+LLVMValueRef LLVMAddFunction(LLVMModuleRef M, const char *Name,
+ LLVMTypeRef FunctionTy) {
+ return wrap(Function::Create(unwrap<FunctionType>(FunctionTy),
+ GlobalValue::ExternalLinkage, Name, unwrap(M)));
+}
+
+LLVMValueRef LLVMGetNamedFunction(LLVMModuleRef M, const char *Name) {
+ return wrap(unwrap(M)->getFunction(Name));
+}
+
+LLVMValueRef LLVMGetFirstFunction(LLVMModuleRef M) {
+ Module *Mod = unwrap(M);
+ Module::iterator I = Mod->begin();
+ if (I == Mod->end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M) {
+ Module *Mod = unwrap(M);
+ Module::iterator I = Mod->end();
+ if (I == Mod->begin())
+ return 0;
+ return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextFunction(LLVMValueRef Fn) {
+ Function *Func = unwrap<Function>(Fn);
+ Module::iterator I = Func;
+ if (++I == Func->getParent()->end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousFunction(LLVMValueRef Fn) {
+ Function *Func = unwrap<Function>(Fn);
+ Module::iterator I = Func;
+ if (I == Func->getParent()->begin())
+ return 0;
+ return wrap(--I);
+}
+
+void LLVMDeleteFunction(LLVMValueRef Fn) {
+ unwrap<Function>(Fn)->eraseFromParent();
+}
+
+unsigned LLVMGetIntrinsicID(LLVMValueRef Fn) {
+ if (Function *F = dyn_cast<Function>(unwrap(Fn)))
+ return F->getIntrinsicID();
+ return 0;
+}
+
+unsigned LLVMGetFunctionCallConv(LLVMValueRef Fn) {
+ return unwrap<Function>(Fn)->getCallingConv();
+}
+
+void LLVMSetFunctionCallConv(LLVMValueRef Fn, unsigned CC) {
+ return unwrap<Function>(Fn)->setCallingConv(CC);
+}
+
+const char *LLVMGetGC(LLVMValueRef Fn) {
+ Function *F = unwrap<Function>(Fn);
+ return F->hasGC()? F->getGC() : 0;
+}
+
+void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
+ Function *F = unwrap<Function>(Fn);
+ if (GC)
+ F->setGC(GC);
+ else
+ F->clearGC();
+}
+
+void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
+ Function *Func = unwrap<Function>(Fn);
+ const AttrListPtr PAL = Func->getAttributes();
+ const AttrListPtr PALnew = PAL.addAttr(0, PA);
+ Func->setAttributes(PALnew);
+}
+
+void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
+ Function *Func = unwrap<Function>(Fn);
+ const AttrListPtr PAL = Func->getAttributes();
+ const AttrListPtr PALnew = PAL.removeAttr(0, PA);
+ Func->setAttributes(PALnew);
+}
+
+/*--.. Operations on parameters ............................................--*/
+
+unsigned LLVMCountParams(LLVMValueRef FnRef) {
+ // This function is strictly redundant to
+ // LLVMCountParamTypes(LLVMGetElementType(LLVMTypeOf(FnRef)))
+ return unwrap<Function>(FnRef)->arg_size();
+}
+
+void LLVMGetParams(LLVMValueRef FnRef, LLVMValueRef *ParamRefs) {
+ Function *Fn = unwrap<Function>(FnRef);
+ for (Function::arg_iterator I = Fn->arg_begin(),
+ E = Fn->arg_end(); I != E; I++)
+ *ParamRefs++ = wrap(I);
+}
+
+LLVMValueRef LLVMGetParam(LLVMValueRef FnRef, unsigned index) {
+ Function::arg_iterator AI = unwrap<Function>(FnRef)->arg_begin();
+ while (index --> 0)
+ AI++;
+ return wrap(AI);
+}
+
+LLVMValueRef LLVMGetParamParent(LLVMValueRef V) {
+ return wrap(unwrap<Argument>(V)->getParent());
+}
+
+LLVMValueRef LLVMGetFirstParam(LLVMValueRef Fn) {
+ Function *Func = unwrap<Function>(Fn);
+ Function::arg_iterator I = Func->arg_begin();
+ if (I == Func->arg_end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastParam(LLVMValueRef Fn) {
+ Function *Func = unwrap<Function>(Fn);
+ Function::arg_iterator I = Func->arg_end();
+ if (I == Func->arg_begin())
+ return 0;
+ return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextParam(LLVMValueRef Arg) {
+ Argument *A = unwrap<Argument>(Arg);
+ Function::arg_iterator I = A;
+ if (++I == A->getParent()->arg_end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) {
+ Argument *A = unwrap<Argument>(Arg);
+ Function::arg_iterator I = A;
+ if (I == A->getParent()->arg_begin())
+ return 0;
+ return wrap(--I);
+}
+
+void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
+ unwrap<Argument>(Arg)->addAttr(PA);
+}
+
+void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
+ unwrap<Argument>(Arg)->removeAttr(PA);
+}
+
+void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) {
+ unwrap<Argument>(Arg)->addAttr(
+ Attribute::constructAlignmentFromInt(align));
+}
+
+/*--.. Operations on basic blocks ..........................................--*/
+
+LLVMValueRef LLVMBasicBlockAsValue(LLVMBasicBlockRef BB) {
+ return wrap(static_cast<Value*>(unwrap(BB)));
+}
+
+int LLVMValueIsBasicBlock(LLVMValueRef Val) {
+ return isa<BasicBlock>(unwrap(Val));
+}
+
+LLVMBasicBlockRef LLVMValueAsBasicBlock(LLVMValueRef Val) {
+ return wrap(unwrap<BasicBlock>(Val));
+}
+
+LLVMValueRef LLVMGetBasicBlockParent(LLVMBasicBlockRef BB) {
+ return wrap(unwrap(BB)->getParent());
+}
+
+unsigned LLVMCountBasicBlocks(LLVMValueRef FnRef) {
+ return unwrap<Function>(FnRef)->size();
+}
+
+void LLVMGetBasicBlocks(LLVMValueRef FnRef, LLVMBasicBlockRef *BasicBlocksRefs){
+ Function *Fn = unwrap<Function>(FnRef);
+ for (Function::iterator I = Fn->begin(), E = Fn->end(); I != E; I++)
+ *BasicBlocksRefs++ = wrap(I);
+}
+
+LLVMBasicBlockRef LLVMGetEntryBasicBlock(LLVMValueRef Fn) {
+ return wrap(&unwrap<Function>(Fn)->getEntryBlock());
+}
+
+LLVMBasicBlockRef LLVMGetFirstBasicBlock(LLVMValueRef Fn) {
+ Function *Func = unwrap<Function>(Fn);
+ Function::iterator I = Func->begin();
+ if (I == Func->end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMBasicBlockRef LLVMGetLastBasicBlock(LLVMValueRef Fn) {
+ Function *Func = unwrap<Function>(Fn);
+ Function::iterator I = Func->end();
+ if (I == Func->begin())
+ return 0;
+ return wrap(--I);
+}
+
+LLVMBasicBlockRef LLVMGetNextBasicBlock(LLVMBasicBlockRef BB) {
+ BasicBlock *Block = unwrap(BB);
+ Function::iterator I = Block;
+ if (++I == Block->getParent()->end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB) {
+ BasicBlock *Block = unwrap(BB);
+ Function::iterator I = Block;
+ if (I == Block->getParent()->begin())
+ return 0;
+ return wrap(--I);
+}
+
+LLVMBasicBlockRef LLVMAppendBasicBlock(LLVMValueRef FnRef, const char *Name) {
+ return wrap(BasicBlock::Create(Name, unwrap<Function>(FnRef)));
+}
+
+LLVMBasicBlockRef LLVMInsertBasicBlock(LLVMBasicBlockRef InsertBeforeBBRef,
+ const char *Name) {
+ BasicBlock *InsertBeforeBB = unwrap(InsertBeforeBBRef);
+ return wrap(BasicBlock::Create(Name, InsertBeforeBB->getParent(),
+ InsertBeforeBB));
+}
+
+void LLVMDeleteBasicBlock(LLVMBasicBlockRef BBRef) {
+ unwrap(BBRef)->eraseFromParent();
+}
+
+/*--.. Operations on instructions ..........................................--*/
+
+LLVMBasicBlockRef LLVMGetInstructionParent(LLVMValueRef Inst) {
+ return wrap(unwrap<Instruction>(Inst)->getParent());
+}
+
+LLVMValueRef LLVMGetFirstInstruction(LLVMBasicBlockRef BB) {
+ BasicBlock *Block = unwrap(BB);
+ BasicBlock::iterator I = Block->begin();
+ if (I == Block->end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastInstruction(LLVMBasicBlockRef BB) {
+ BasicBlock *Block = unwrap(BB);
+ BasicBlock::iterator I = Block->end();
+ if (I == Block->begin())
+ return 0;
+ return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextInstruction(LLVMValueRef Inst) {
+ Instruction *Instr = unwrap<Instruction>(Inst);
+ BasicBlock::iterator I = Instr;
+ if (++I == Instr->getParent()->end())
+ return 0;
+ return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousInstruction(LLVMValueRef Inst) {
+ Instruction *Instr = unwrap<Instruction>(Inst);
+ BasicBlock::iterator I = Instr;
+ if (I == Instr->getParent()->begin())
+ return 0;
+ return wrap(--I);
+}
+
+/*--.. Call and invoke instructions ........................................--*/
+
+unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr) {
+ Value *V = unwrap(Instr);
+ if (CallInst *CI = dyn_cast<CallInst>(V))
+ return CI->getCallingConv();
+ else if (InvokeInst *II = dyn_cast<InvokeInst>(V))
+ return II->getCallingConv();
+ assert(0 && "LLVMGetInstructionCallConv applies only to call and invoke!");
+ return 0;
+}
+
+void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) {
+ Value *V = unwrap(Instr);
+ if (CallInst *CI = dyn_cast<CallInst>(V))
+ return CI->setCallingConv(CC);
+ else if (InvokeInst *II = dyn_cast<InvokeInst>(V))
+ return II->setCallingConv(CC);
+ assert(0 && "LLVMSetInstructionCallConv applies only to call and invoke!");
+}
+
+void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index,
+ LLVMAttribute PA) {
+ CallSite Call = CallSite(unwrap<Instruction>(Instr));
+ Call.setAttributes(
+ Call.getAttributes().addAttr(index, PA));
+}
+
+void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index,
+ LLVMAttribute PA) {
+ CallSite Call = CallSite(unwrap<Instruction>(Instr));
+ Call.setAttributes(
+ Call.getAttributes().removeAttr(index, PA));
+}
+
+void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
+ unsigned align) {
+ CallSite Call = CallSite(unwrap<Instruction>(Instr));
+ Call.setAttributes(
+ Call.getAttributes().addAttr(index,
+ Attribute::constructAlignmentFromInt(align)));
+}
+
+/*--.. Operations on call instructions (only) ..............................--*/
+
+int LLVMIsTailCall(LLVMValueRef Call) {
+ return unwrap<CallInst>(Call)->isTailCall();
+}
+
+void LLVMSetTailCall(LLVMValueRef Call, int isTailCall) {
+ unwrap<CallInst>(Call)->setTailCall(isTailCall);
+}
+
+/*--.. Operations on phi nodes .............................................--*/
+
+void LLVMAddIncoming(LLVMValueRef PhiNode, LLVMValueRef *IncomingValues,
+ LLVMBasicBlockRef *IncomingBlocks, unsigned Count) {
+ PHINode *PhiVal = unwrap<PHINode>(PhiNode);
+ for (unsigned I = 0; I != Count; ++I)
+ PhiVal->addIncoming(unwrap(IncomingValues[I]), unwrap(IncomingBlocks[I]));
+}
+
+unsigned LLVMCountIncoming(LLVMValueRef PhiNode) {
+ return unwrap<PHINode>(PhiNode)->getNumIncomingValues();
+}
+
+LLVMValueRef LLVMGetIncomingValue(LLVMValueRef PhiNode, unsigned Index) {
+ return wrap(unwrap<PHINode>(PhiNode)->getIncomingValue(Index));
+}
+
+LLVMBasicBlockRef LLVMGetIncomingBlock(LLVMValueRef PhiNode, unsigned Index) {
+ return wrap(unwrap<PHINode>(PhiNode)->getIncomingBlock(Index));
+}
+
+
+/*===-- Instruction builders ----------------------------------------------===*/
+
+LLVMBuilderRef LLVMCreateBuilder(void) {
+ return wrap(new IRBuilder<>());
+}
+
+void LLVMPositionBuilder(LLVMBuilderRef Builder, LLVMBasicBlockRef Block,
+ LLVMValueRef Instr) {
+ BasicBlock *BB = unwrap(Block);
+ Instruction *I = Instr? unwrap<Instruction>(Instr) : (Instruction*) BB->end();
+ unwrap(Builder)->SetInsertPoint(BB, I);
+}
+
+void LLVMPositionBuilderBefore(LLVMBuilderRef Builder, LLVMValueRef Instr) {
+ Instruction *I = unwrap<Instruction>(Instr);
+ unwrap(Builder)->SetInsertPoint(I->getParent(), I);
+}
+
+void LLVMPositionBuilderAtEnd(LLVMBuilderRef Builder, LLVMBasicBlockRef Block) {
+ BasicBlock *BB = unwrap(Block);
+ unwrap(Builder)->SetInsertPoint(BB);
+}
+
+LLVMBasicBlockRef LLVMGetInsertBlock(LLVMBuilderRef Builder) {
+ return wrap(unwrap(Builder)->GetInsertBlock());
+}
+
+void LLVMClearInsertionPosition(LLVMBuilderRef Builder) {
+ unwrap(Builder)->ClearInsertionPoint ();
+}
+
+void LLVMInsertIntoBuilder(LLVMBuilderRef Builder, LLVMValueRef Instr) {
+ unwrap(Builder)->Insert(unwrap<Instruction>(Instr));
+}
+
+void LLVMDisposeBuilder(LLVMBuilderRef Builder) {
+ delete unwrap(Builder);
+}
+
+/*--.. Instruction builders ................................................--*/
+
+LLVMValueRef LLVMBuildRetVoid(LLVMBuilderRef B) {
+ return wrap(unwrap(B)->CreateRetVoid());
+}
+
+LLVMValueRef LLVMBuildRet(LLVMBuilderRef B, LLVMValueRef V) {
+ return wrap(unwrap(B)->CreateRet(unwrap(V)));
+}
+
+LLVMValueRef LLVMBuildBr(LLVMBuilderRef B, LLVMBasicBlockRef Dest) {
+ return wrap(unwrap(B)->CreateBr(unwrap(Dest)));
+}
+
+LLVMValueRef LLVMBuildCondBr(LLVMBuilderRef B, LLVMValueRef If,
+ LLVMBasicBlockRef Then, LLVMBasicBlockRef Else) {
+ return wrap(unwrap(B)->CreateCondBr(unwrap(If), unwrap(Then), unwrap(Else)));
+}
+
+LLVMValueRef LLVMBuildSwitch(LLVMBuilderRef B, LLVMValueRef V,
+ LLVMBasicBlockRef Else, unsigned NumCases) {
+ return wrap(unwrap(B)->CreateSwitch(unwrap(V), unwrap(Else), NumCases));
+}
+
+LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn,
+ LLVMValueRef *Args, unsigned NumArgs,
+ LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateInvoke(unwrap(Fn), unwrap(Then), unwrap(Catch),
+ unwrap(Args), unwrap(Args) + NumArgs,
+ Name));
+}
+
+LLVMValueRef LLVMBuildUnwind(LLVMBuilderRef B) {
+ return wrap(unwrap(B)->CreateUnwind());
+}
+
+LLVMValueRef LLVMBuildUnreachable(LLVMBuilderRef B) {
+ return wrap(unwrap(B)->CreateUnreachable());
+}
+
+void LLVMAddCase(LLVMValueRef Switch, LLVMValueRef OnVal,
+ LLVMBasicBlockRef Dest) {
+ unwrap<SwitchInst>(Switch)->addCase(unwrap<ConstantInt>(OnVal), unwrap(Dest));
+}
+
+/*--.. Arithmetic ..........................................................--*/
+
+LLVMValueRef LLVMBuildAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildUDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateUDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildSDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateSDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateFDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildURem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateURem(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildSRem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateSRem(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFRem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateFRem(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildShl(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateShl(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildLShr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateLShr(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildAShr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateAShr(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildAnd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateAnd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildOr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateOr(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildXor(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateXor(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
+ return wrap(unwrap(B)->CreateNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildNot(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
+ return wrap(unwrap(B)->CreateNot(unwrap(V), Name));
+}
+
+/*--.. Memory ..............................................................--*/
+
+LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateMalloc(unwrap(Ty), 0, Name));
+}
+
+LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
+ LLVMValueRef Val, const char *Name) {
+ return wrap(unwrap(B)->CreateMalloc(unwrap(Ty), unwrap(Val), Name));
+}
+
+LLVMValueRef LLVMBuildAlloca(LLVMBuilderRef B, LLVMTypeRef Ty,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateAlloca(unwrap(Ty), 0, Name));
+}
+
+LLVMValueRef LLVMBuildArrayAlloca(LLVMBuilderRef B, LLVMTypeRef Ty,
+ LLVMValueRef Val, const char *Name) {
+ return wrap(unwrap(B)->CreateAlloca(unwrap(Ty), unwrap(Val), Name));
+}
+
+LLVMValueRef LLVMBuildFree(LLVMBuilderRef B, LLVMValueRef PointerVal) {
+ return wrap(unwrap(B)->CreateFree(unwrap(PointerVal)));
+}
+
+
+LLVMValueRef LLVMBuildLoad(LLVMBuilderRef B, LLVMValueRef PointerVal,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateLoad(unwrap(PointerVal), Name));
+}
+
+LLVMValueRef LLVMBuildStore(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMValueRef PointerVal) {
+ return wrap(unwrap(B)->CreateStore(unwrap(Val), unwrap(PointerVal)));
+}
+
+LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
+ LLVMValueRef *Indices, unsigned NumIndices,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateGEP(unwrap(Pointer), unwrap(Indices),
+ unwrap(Indices) + NumIndices, Name));
+}
+
+/*--.. Casts ...............................................................--*/
+
+LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateTrunc(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildZExt(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateZExt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildSExt(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateSExt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPToUI(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateFPToUI(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPToSI(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateFPToSI(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildUIToFP(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateUIToFP(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildSIToFP(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateSIToFP(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPTrunc(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateFPTrunc(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPExt(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateFPExt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildPtrToInt(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreatePtrToInt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildIntToPtr(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateIntToPtr(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateBitCast(unwrap(Val), unwrap(DestTy), Name));
+}
+
+/*--.. Comparisons .........................................................--*/
+
+LLVMValueRef LLVMBuildICmp(LLVMBuilderRef B, LLVMIntPredicate Op,
+ LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateICmp(static_cast<ICmpInst::Predicate>(Op),
+ unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFCmp(LLVMBuilderRef B, LLVMRealPredicate Op,
+ LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateFCmp(static_cast<FCmpInst::Predicate>(Op),
+ unwrap(LHS), unwrap(RHS), Name));
+}
+
+/*--.. Miscellaneous instructions ..........................................--*/
+
+LLVMValueRef LLVMBuildPhi(LLVMBuilderRef B, LLVMTypeRef Ty, const char *Name) {
+ return wrap(unwrap(B)->CreatePHI(unwrap(Ty), Name));
+}
+
+LLVMValueRef LLVMBuildCall(LLVMBuilderRef B, LLVMValueRef Fn,
+ LLVMValueRef *Args, unsigned NumArgs,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateCall(unwrap(Fn), unwrap(Args),
+ unwrap(Args) + NumArgs, Name));
+}
+
+LLVMValueRef LLVMBuildSelect(LLVMBuilderRef B, LLVMValueRef If,
+ LLVMValueRef Then, LLVMValueRef Else,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateSelect(unwrap(If), unwrap(Then), unwrap(Else),
+ Name));
+}
+
+LLVMValueRef LLVMBuildVAArg(LLVMBuilderRef B, LLVMValueRef List,
+ LLVMTypeRef Ty, const char *Name) {
+ return wrap(unwrap(B)->CreateVAArg(unwrap(List), unwrap(Ty), Name));
+}
+
+LLVMValueRef LLVMBuildExtractElement(LLVMBuilderRef B, LLVMValueRef VecVal,
+ LLVMValueRef Index, const char *Name) {
+ return wrap(unwrap(B)->CreateExtractElement(unwrap(VecVal), unwrap(Index),
+ Name));
+}
+
+LLVMValueRef LLVMBuildInsertElement(LLVMBuilderRef B, LLVMValueRef VecVal,
+ LLVMValueRef EltVal, LLVMValueRef Index,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateInsertElement(unwrap(VecVal), unwrap(EltVal),
+ unwrap(Index), Name));
+}
+
+LLVMValueRef LLVMBuildShuffleVector(LLVMBuilderRef B, LLVMValueRef V1,
+ LLVMValueRef V2, LLVMValueRef Mask,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateShuffleVector(unwrap(V1), unwrap(V2),
+ unwrap(Mask), Name));
+}
+
+LLVMValueRef LLVMBuildExtractValue(LLVMBuilderRef B, LLVMValueRef AggVal,
+ unsigned Index, const char *Name) {
+ return wrap(unwrap(B)->CreateExtractValue(unwrap(AggVal), Index, Name));
+}
+
+LLVMValueRef LLVMBuildInsertValue(LLVMBuilderRef B, LLVMValueRef AggVal,
+ LLVMValueRef EltVal, unsigned Index,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateInsertValue(unwrap(AggVal), unwrap(EltVal),
+ Index, Name));
+}
+
+
+/*===-- Module providers --------------------------------------------------===*/
+
+LLVMModuleProviderRef
+LLVMCreateModuleProviderForExistingModule(LLVMModuleRef M) {
+ return wrap(new ExistingModuleProvider(unwrap(M)));
+}
+
+void LLVMDisposeModuleProvider(LLVMModuleProviderRef MP) {
+ delete unwrap(MP);
+}
+
+
+/*===-- Memory buffers ----------------------------------------------------===*/
+
+int LLVMCreateMemoryBufferWithContentsOfFile(const char *Path,
+ LLVMMemoryBufferRef *OutMemBuf,
+ char **OutMessage) {
+ std::string Error;
+ if (MemoryBuffer *MB = MemoryBuffer::getFile(Path, &Error)) {
+ *OutMemBuf = wrap(MB);
+ return 0;
+ }
+
+ *OutMessage = strdup(Error.c_str());
+ return 1;
+}
+
+int LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
+ char **OutMessage) {
+ if (MemoryBuffer *MB = MemoryBuffer::getSTDIN()) {
+ *OutMemBuf = wrap(MB);
+ return 0;
+ }
+
+ *OutMessage = strdup("stdin is empty.");
+ return 1;
+}
+
+void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) {
+ delete unwrap(MemBuf);
+}
diff --git a/lib/VMCore/Dominators.cpp b/lib/VMCore/Dominators.cpp
new file mode 100644
index 0000000..735a70c
--- /dev/null
+++ b/lib/VMCore/Dominators.cpp
@@ -0,0 +1,287 @@
+//===- Dominators.cpp - Dominator Calculation -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements simple dominator construction algorithms for finding
+// forward dominators. Postdominators are available in libanalysis, but are not
+// included in libvmcore, because it's not needed. Forward dominators are
+// needed to support the Verifier pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/Streams.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace llvm {
+static std::ostream &operator<<(std::ostream &o,
+ const std::set<BasicBlock*> &BBs) {
+ for (std::set<BasicBlock*>::const_iterator I = BBs.begin(), E = BBs.end();
+ I != E; ++I)
+ if (*I)
+ WriteAsOperand(o, *I, false);
+ else
+ o << " <<exit node>>";
+ return o;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// DominatorTree Implementation
+//===----------------------------------------------------------------------===//
+//
+// Provide public access to DominatorTree information. Implementation details
+// can be found in DominatorCalculation.h.
+//
+//===----------------------------------------------------------------------===//
+
+TEMPLATE_INSTANTIATION(class DomTreeNodeBase<BasicBlock>);
+TEMPLATE_INSTANTIATION(class DominatorTreeBase<BasicBlock>);
+
+char DominatorTree::ID = 0;
+static RegisterPass<DominatorTree>
+E("domtree", "Dominator Tree Construction", true, true);
+
+bool DominatorTree::runOnFunction(Function &F) {
+ DT->recalculate(F);
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// DominanceFrontier Implementation
+//===----------------------------------------------------------------------===//
+
+char DominanceFrontier::ID = 0;
+static RegisterPass<DominanceFrontier>
+G("domfrontier", "Dominance Frontier Construction", true, true);
+
+// NewBB is split and now it has one successor. Update dominace frontier to
+// reflect this change.
+void DominanceFrontier::splitBlock(BasicBlock *NewBB) {
+ assert(NewBB->getTerminator()->getNumSuccessors() == 1
+ && "NewBB should have a single successor!");
+ BasicBlock *NewBBSucc = NewBB->getTerminator()->getSuccessor(0);
+
+ std::vector<BasicBlock*> PredBlocks;
+ for (pred_iterator PI = pred_begin(NewBB), PE = pred_end(NewBB);
+ PI != PE; ++PI)
+ PredBlocks.push_back(*PI);
+
+ if (PredBlocks.empty())
+ // If NewBB does not have any predecessors then it is a entry block.
+ // In this case, NewBB and its successor NewBBSucc dominates all
+ // other blocks.
+ return;
+
+ // NewBBSucc inherits original NewBB frontier.
+ DominanceFrontier::iterator NewBBI = find(NewBB);
+ if (NewBBI != end()) {
+ DominanceFrontier::DomSetType NewBBSet = NewBBI->second;
+ DominanceFrontier::DomSetType NewBBSuccSet;
+ NewBBSuccSet.insert(NewBBSet.begin(), NewBBSet.end());
+ addBasicBlock(NewBBSucc, NewBBSuccSet);
+ }
+
+ // If NewBB dominates NewBBSucc, then DF(NewBB) is now going to be the
+ // DF(PredBlocks[0]) without the stuff that the new block does not dominate
+ // a predecessor of.
+ DominatorTree &DT = getAnalysis<DominatorTree>();
+ if (DT.dominates(NewBB, NewBBSucc)) {
+ DominanceFrontier::iterator DFI = find(PredBlocks[0]);
+ if (DFI != end()) {
+ DominanceFrontier::DomSetType Set = DFI->second;
+ // Filter out stuff in Set that we do not dominate a predecessor of.
+ for (DominanceFrontier::DomSetType::iterator SetI = Set.begin(),
+ E = Set.end(); SetI != E;) {
+ bool DominatesPred = false;
+ for (pred_iterator PI = pred_begin(*SetI), E = pred_end(*SetI);
+ PI != E; ++PI)
+ if (DT.dominates(NewBB, *PI))
+ DominatesPred = true;
+ if (!DominatesPred)
+ Set.erase(SetI++);
+ else
+ ++SetI;
+ }
+
+ if (NewBBI != end()) {
+ for (DominanceFrontier::DomSetType::iterator SetI = Set.begin(),
+ E = Set.end(); SetI != E; ++SetI) {
+ BasicBlock *SB = *SetI;
+ addToFrontier(NewBBI, SB);
+ }
+ } else
+ addBasicBlock(NewBB, Set);
+ }
+
+ } else {
+ // DF(NewBB) is {NewBBSucc} because NewBB does not strictly dominate
+ // NewBBSucc, but it does dominate itself (and there is an edge (NewBB ->
+ // NewBBSucc)). NewBBSucc is the single successor of NewBB.
+ DominanceFrontier::DomSetType NewDFSet;
+ NewDFSet.insert(NewBBSucc);
+ addBasicBlock(NewBB, NewDFSet);
+ }
+
+ // Now we must loop over all of the dominance frontiers in the function,
+ // replacing occurrences of NewBBSucc with NewBB in some cases. All
+ // blocks that dominate a block in PredBlocks and contained NewBBSucc in
+ // their dominance frontier must be updated to contain NewBB instead.
+ //
+ for (Function::iterator FI = NewBB->getParent()->begin(),
+ FE = NewBB->getParent()->end(); FI != FE; ++FI) {
+ DominanceFrontier::iterator DFI = find(FI);
+ if (DFI == end()) continue; // unreachable block.
+
+ // Only consider nodes that have NewBBSucc in their dominator frontier.
+ if (!DFI->second.count(NewBBSucc)) continue;
+
+ // Verify whether this block dominates a block in predblocks. If not, do
+ // not update it.
+ bool BlockDominatesAny = false;
+ for (std::vector<BasicBlock*>::const_iterator BI = PredBlocks.begin(),
+ BE = PredBlocks.end(); BI != BE; ++BI) {
+ if (DT.dominates(FI, *BI)) {
+ BlockDominatesAny = true;
+ break;
+ }
+ }
+
+ // If NewBBSucc should not stay in our dominator frontier, remove it.
+ // We remove it unless there is a predecessor of NewBBSucc that we
+ // dominate, but we don't strictly dominate NewBBSucc.
+ bool ShouldRemove = true;
+ if ((BasicBlock*)FI == NewBBSucc || !DT.dominates(FI, NewBBSucc)) {
+ // Okay, we know that PredDom does not strictly dominate NewBBSucc.
+ // Check to see if it dominates any predecessors of NewBBSucc.
+ for (pred_iterator PI = pred_begin(NewBBSucc),
+ E = pred_end(NewBBSucc); PI != E; ++PI)
+ if (DT.dominates(FI, *PI)) {
+ ShouldRemove = false;
+ break;
+ }
+ }
+
+ if (ShouldRemove)
+ removeFromFrontier(DFI, NewBBSucc);
+ if (BlockDominatesAny && (&*FI == NewBB || !DT.dominates(FI, NewBB)))
+ addToFrontier(DFI, NewBB);
+ }
+}
+
+namespace {
+ class DFCalculateWorkObject {
+ public:
+ DFCalculateWorkObject(BasicBlock *B, BasicBlock *P,
+ const DomTreeNode *N,
+ const DomTreeNode *PN)
+ : currentBB(B), parentBB(P), Node(N), parentNode(PN) {}
+ BasicBlock *currentBB;
+ BasicBlock *parentBB;
+ const DomTreeNode *Node;
+ const DomTreeNode *parentNode;
+ };
+}
+
+const DominanceFrontier::DomSetType &
+DominanceFrontier::calculate(const DominatorTree &DT,
+ const DomTreeNode *Node) {
+ BasicBlock *BB = Node->getBlock();
+ DomSetType *Result = NULL;
+
+ std::vector<DFCalculateWorkObject> workList;
+ SmallPtrSet<BasicBlock *, 32> visited;
+
+ workList.push_back(DFCalculateWorkObject(BB, NULL, Node, NULL));
+ do {
+ DFCalculateWorkObject *currentW = &workList.back();
+ assert (currentW && "Missing work object.");
+
+ BasicBlock *currentBB = currentW->currentBB;
+ BasicBlock *parentBB = currentW->parentBB;
+ const DomTreeNode *currentNode = currentW->Node;
+ const DomTreeNode *parentNode = currentW->parentNode;
+ assert (currentBB && "Invalid work object. Missing current Basic Block");
+ assert (currentNode && "Invalid work object. Missing current Node");
+ DomSetType &S = Frontiers[currentBB];
+
+ // Visit each block only once.
+ if (visited.count(currentBB) == 0) {
+ visited.insert(currentBB);
+
+ // Loop over CFG successors to calculate DFlocal[currentNode]
+ for (succ_iterator SI = succ_begin(currentBB), SE = succ_end(currentBB);
+ SI != SE; ++SI) {
+ // Does Node immediately dominate this successor?
+ if (DT[*SI]->getIDom() != currentNode)
+ S.insert(*SI);
+ }
+ }
+
+ // At this point, S is DFlocal. Now we union in DFup's of our children...
+ // Loop through and visit the nodes that Node immediately dominates (Node's
+ // children in the IDomTree)
+ bool visitChild = false;
+ for (DomTreeNode::const_iterator NI = currentNode->begin(),
+ NE = currentNode->end(); NI != NE; ++NI) {
+ DomTreeNode *IDominee = *NI;
+ BasicBlock *childBB = IDominee->getBlock();
+ if (visited.count(childBB) == 0) {
+ workList.push_back(DFCalculateWorkObject(childBB, currentBB,
+ IDominee, currentNode));
+ visitChild = true;
+ }
+ }
+
+ // If all children are visited or there is any child then pop this block
+ // from the workList.
+ if (!visitChild) {
+
+ if (!parentBB) {
+ Result = &S;
+ break;
+ }
+
+ DomSetType::const_iterator CDFI = S.begin(), CDFE = S.end();
+ DomSetType &parentSet = Frontiers[parentBB];
+ for (; CDFI != CDFE; ++CDFI) {
+ if (!DT.properlyDominates(parentNode, DT[*CDFI]))
+ parentSet.insert(*CDFI);
+ }
+ workList.pop_back();
+ }
+
+ } while (!workList.empty());
+
+ return *Result;
+}
+
+void DominanceFrontierBase::print(std::ostream &o, const Module* ) const {
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ o << " DomFrontier for BB";
+ if (I->first)
+ WriteAsOperand(o, I->first, false);
+ else
+ o << " <<exit node>>";
+ o << " is:\t" << I->second << "\n";
+ }
+}
+
+void DominanceFrontierBase::dump() {
+ print (llvm::cerr);
+}
+
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
new file mode 100644
index 0000000..3a991f6
--- /dev/null
+++ b/lib/VMCore/Function.cpp
@@ -0,0 +1,367 @@
+//===-- Function.cpp - Implement the Global object classes ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Function class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/StringPool.h"
+#include "SymbolTableListTraitsImpl.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+
+// Explicit instantiations of SymbolTableListTraits since some of the methods
+// are not in the public header file...
+template class SymbolTableListTraits<Argument, Function>;
+template class SymbolTableListTraits<BasicBlock, Function>;
+
+//===----------------------------------------------------------------------===//
+// Argument Implementation
+//===----------------------------------------------------------------------===//
+
+Argument::Argument(const Type *Ty, const std::string &Name, Function *Par)
+ : Value(Ty, Value::ArgumentVal) {
+ Parent = 0;
+
+ // Make sure that we get added to a function
+ LeakDetector::addGarbageObject(this);
+
+ if (Par)
+ Par->getArgumentList().push_back(this);
+ setName(Name);
+}
+
+void Argument::setParent(Function *parent) {
+ if (getParent())
+ LeakDetector::addGarbageObject(this);
+ Parent = parent;
+ if (getParent())
+ LeakDetector::removeGarbageObject(this);
+}
+
+/// getArgNo - Return the index of this formal argument in its containing
+/// function. For example in "void foo(int a, float b)" a is 0 and b is 1.
+unsigned Argument::getArgNo() const {
+ const Function *F = getParent();
+ assert(F && "Argument is not in a function");
+
+ Function::const_arg_iterator AI = F->arg_begin();
+ unsigned ArgIdx = 0;
+ for (; &*AI != this; ++AI)
+ ++ArgIdx;
+
+ return ArgIdx;
+}
+
+/// hasByValAttr - Return true if this argument has the byval attribute on it
+/// in its containing function.
+bool Argument::hasByValAttr() const {
+ if (!isa<PointerType>(getType())) return false;
+ return getParent()->paramHasAttr(getArgNo()+1, Attribute::ByVal);
+}
+
+/// hasNoAliasAttr - Return true if this argument has the noalias attribute on
+/// it in its containing function.
+bool Argument::hasNoAliasAttr() const {
+ if (!isa<PointerType>(getType())) return false;
+ return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoAlias);
+}
+
+/// hasNoCaptureAttr - Return true if this argument has the nocapture attribute
+/// on it in its containing function.
+bool Argument::hasNoCaptureAttr() const {
+ if (!isa<PointerType>(getType())) return false;
+ return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoCapture);
+}
+
+/// hasSRetAttr - Return true if this argument has the sret attribute on
+/// it in its containing function.
+bool Argument::hasStructRetAttr() const {
+ if (!isa<PointerType>(getType())) return false;
+ if (this != getParent()->arg_begin())
+ return false; // StructRet param must be first param
+ return getParent()->paramHasAttr(1, Attribute::StructRet);
+}
+
+/// addAttr - Add a Attribute to an argument
+void Argument::addAttr(Attributes attr) {
+ getParent()->addAttribute(getArgNo() + 1, attr);
+}
+
+/// removeAttr - Remove a Attribute from an argument
+void Argument::removeAttr(Attributes attr) {
+ getParent()->removeAttribute(getArgNo() + 1, attr);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Helper Methods in Function
+//===----------------------------------------------------------------------===//
+
+const FunctionType *Function::getFunctionType() const {
+ return cast<FunctionType>(getType()->getElementType());
+}
+
+bool Function::isVarArg() const {
+ return getFunctionType()->isVarArg();
+}
+
+const Type *Function::getReturnType() const {
+ return getFunctionType()->getReturnType();
+}
+
+void Function::removeFromParent() {
+ getParent()->getFunctionList().remove(this);
+}
+
+void Function::eraseFromParent() {
+ getParent()->getFunctionList().erase(this);
+}
+
+//===----------------------------------------------------------------------===//
+// Function Implementation
+//===----------------------------------------------------------------------===//
+
+Function::Function(const FunctionType *Ty, LinkageTypes Linkage,
+ const std::string &name, Module *ParentModule)
+ : GlobalValue(PointerType::getUnqual(Ty),
+ Value::FunctionVal, 0, 0, Linkage, name) {
+ assert(FunctionType::isValidReturnType(getReturnType()) &&
+ !isa<OpaqueType>(getReturnType()) && "invalid return type");
+ SymTab = new ValueSymbolTable();
+
+ // If the function has arguments, mark them as lazily built.
+ if (Ty->getNumParams())
+ SubclassData = 1; // Set the "has lazy arguments" bit.
+
+ // Make sure that we get added to a function
+ LeakDetector::addGarbageObject(this);
+
+ if (ParentModule)
+ ParentModule->getFunctionList().push_back(this);
+
+ // Ensure intrinsics have the right parameter attributes.
+ if (unsigned IID = getIntrinsicID())
+ setAttributes(Intrinsic::getAttributes(Intrinsic::ID(IID)));
+
+}
+
+Function::~Function() {
+ dropAllReferences(); // After this it is safe to delete instructions.
+
+ // Delete all of the method arguments and unlink from symbol table...
+ ArgumentList.clear();
+ delete SymTab;
+
+ // Remove the function from the on-the-side GC table.
+ clearGC();
+}
+
+void Function::BuildLazyArguments() const {
+ // Create the arguments vector, all arguments start out unnamed.
+ const FunctionType *FT = getFunctionType();
+ for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
+ assert(FT->getParamType(i) != Type::VoidTy &&
+ "Cannot have void typed arguments!");
+ ArgumentList.push_back(new Argument(FT->getParamType(i)));
+ }
+
+ // Clear the lazy arguments bit.
+ const_cast<Function*>(this)->SubclassData &= ~1;
+}
+
+size_t Function::arg_size() const {
+ return getFunctionType()->getNumParams();
+}
+bool Function::arg_empty() const {
+ return getFunctionType()->getNumParams() == 0;
+}
+
+void Function::setParent(Module *parent) {
+ if (getParent())
+ LeakDetector::addGarbageObject(this);
+ Parent = parent;
+ if (getParent())
+ LeakDetector::removeGarbageObject(this);
+}
+
+// dropAllReferences() - This function causes all the subinstructions to "let
+// go" of all references that they are maintaining. This allows one to
+// 'delete' a whole class at a time, even though there may be circular
+// references... first all references are dropped, and all use counts go to
+// zero. Then everything is deleted for real. Note that no operations are
+// valid on an object that has "dropped all references", except operator
+// delete.
+//
+void Function::dropAllReferences() {
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ I->dropAllReferences();
+ BasicBlocks.clear(); // Delete all basic blocks...
+}
+
+void Function::addAttribute(unsigned i, Attributes attr) {
+ AttrListPtr PAL = getAttributes();
+ PAL = PAL.addAttr(i, attr);
+ setAttributes(PAL);
+}
+
+void Function::removeAttribute(unsigned i, Attributes attr) {
+ AttrListPtr PAL = getAttributes();
+ PAL = PAL.removeAttr(i, attr);
+ setAttributes(PAL);
+}
+
+// Maintain the GC name for each function in an on-the-side table. This saves
+// allocating an additional word in Function for programs which do not use GC
+// (i.e., most programs) at the cost of increased overhead for clients which do
+// use GC.
+static DenseMap<const Function*,PooledStringPtr> *GCNames;
+static StringPool *GCNamePool;
+
+bool Function::hasGC() const {
+ return GCNames && GCNames->count(this);
+}
+
+const char *Function::getGC() const {
+ assert(hasGC() && "Function has no collector");
+ return *(*GCNames)[this];
+}
+
+void Function::setGC(const char *Str) {
+ if (!GCNamePool)
+ GCNamePool = new StringPool();
+ if (!GCNames)
+ GCNames = new DenseMap<const Function*,PooledStringPtr>();
+ (*GCNames)[this] = GCNamePool->intern(Str);
+}
+
+void Function::clearGC() {
+ if (GCNames) {
+ GCNames->erase(this);
+ if (GCNames->empty()) {
+ delete GCNames;
+ GCNames = 0;
+ if (GCNamePool->empty()) {
+ delete GCNamePool;
+ GCNamePool = 0;
+ }
+ }
+ }
+}
+
+/// copyAttributesFrom - copy all additional attributes (those not needed to
+/// create a Function) from the Function Src to this one.
+void Function::copyAttributesFrom(const GlobalValue *Src) {
+ assert(isa<Function>(Src) && "Expected a Function!");
+ GlobalValue::copyAttributesFrom(Src);
+ const Function *SrcF = cast<Function>(Src);
+ setCallingConv(SrcF->getCallingConv());
+ setAttributes(SrcF->getAttributes());
+ if (SrcF->hasGC())
+ setGC(SrcF->getGC());
+ else
+ clearGC();
+}
+
+/// getIntrinsicID - This method returns the ID number of the specified
+/// function, or Intrinsic::not_intrinsic if the function is not an
+/// intrinsic, or if the pointer is null. This value is always defined to be
+/// zero to allow easy checking for whether a function is intrinsic or not. The
+/// particular intrinsic functions which correspond to this value are defined in
+/// llvm/Intrinsics.h.
+///
+unsigned Function::getIntrinsicID() const {
+ const ValueName *ValName = this->getValueName();
+ if (!ValName)
+ return 0;
+ unsigned Len = ValName->getKeyLength();
+ const char *Name = ValName->getKeyData();
+
+ if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l'
+ || Name[2] != 'v' || Name[3] != 'm')
+ return 0; // All intrinsics start with 'llvm.'
+
+#define GET_FUNCTION_RECOGNIZER
+#include "llvm/Intrinsics.gen"
+#undef GET_FUNCTION_RECOGNIZER
+ return 0;
+}
+
+std::string Intrinsic::getName(ID id, const Type **Tys, unsigned numTys) {
+ assert(id < num_intrinsics && "Invalid intrinsic ID!");
+ const char * const Table[] = {
+ "not_intrinsic",
+#define GET_INTRINSIC_NAME_TABLE
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_NAME_TABLE
+ };
+ if (numTys == 0)
+ return Table[id];
+ std::string Result(Table[id]);
+ for (unsigned i = 0; i < numTys; ++i) {
+ if (const PointerType* PTyp = dyn_cast<PointerType>(Tys[i])) {
+ Result += ".p" + llvm::utostr(PTyp->getAddressSpace()) +
+ MVT::getMVT(PTyp->getElementType()).getMVTString();
+ }
+ else if (Tys[i])
+ Result += "." + MVT::getMVT(Tys[i]).getMVTString();
+ }
+ return Result;
+}
+
+const FunctionType *Intrinsic::getType(ID id, const Type **Tys,
+ unsigned numTys) {
+ const Type *ResultTy = NULL;
+ std::vector<const Type*> ArgTys;
+ bool IsVarArg = false;
+
+#define GET_INTRINSIC_GENERATOR
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_GENERATOR
+
+ return FunctionType::get(ResultTy, ArgTys, IsVarArg);
+}
+
+bool Intrinsic::isOverloaded(ID id) {
+ const bool OTable[] = {
+ false,
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+ };
+ return OTable[id];
+}
+
+/// This defines the "Intrinsic::getAttributes(ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+Function *Intrinsic::getDeclaration(Module *M, ID id, const Type **Tys,
+ unsigned numTys) {
+ // There can never be multiple globals with the same name of different types,
+ // because intrinsics must be a specific type.
+ return
+ cast<Function>(M->getOrInsertFunction(getName(id, Tys, numTys),
+ getType(id, Tys, numTys)));
+}
+
+// This defines the "Intrinsic::getIntrinsicForGCCBuiltin()" method.
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "llvm/Intrinsics.gen"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+
+// vim: sw=2 ai
diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp
new file mode 100644
index 0000000..5abe1f9
--- /dev/null
+++ b/lib/VMCore/Globals.cpp
@@ -0,0 +1,273 @@
+//===-- Globals.cpp - Implement the GlobalValue & GlobalVariable class ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the GlobalValue & GlobalVariable classes for the VMCore
+// library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/LeakDetector.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// GlobalValue Class
+//===----------------------------------------------------------------------===//
+
+/// removeDeadUsersOfConstant - If the specified constantexpr is dead, remove
+/// it. This involves recursively eliminating any dead users of the
+/// constantexpr.
+static bool removeDeadUsersOfConstant(const Constant *C) {
+ if (isa<GlobalValue>(C)) return false; // Cannot remove this
+
+ while (!C->use_empty()) {
+ const Constant *User = dyn_cast<Constant>(C->use_back());
+ if (!User) return false; // Non-constant usage;
+ if (!removeDeadUsersOfConstant(User))
+ return false; // Constant wasn't dead
+ }
+
+ const_cast<Constant*>(C)->destroyConstant();
+ return true;
+}
+
+/// removeDeadConstantUsers - If there are any dead constant users dangling
+/// off of this global value, remove them. This method is useful for clients
+/// that want to check to see if a global is unused, but don't want to deal
+/// with potentially dead constants hanging off of the globals.
+void GlobalValue::removeDeadConstantUsers() const {
+ Value::use_const_iterator I = use_begin(), E = use_end();
+ Value::use_const_iterator LastNonDeadUser = E;
+ while (I != E) {
+ if (const Constant *User = dyn_cast<Constant>(*I)) {
+ if (!removeDeadUsersOfConstant(User)) {
+ // If the constant wasn't dead, remember that this was the last live use
+ // and move on to the next constant.
+ LastNonDeadUser = I;
+ ++I;
+ } else {
+ // If the constant was dead, then the iterator is invalidated.
+ if (LastNonDeadUser == E) {
+ I = use_begin();
+ if (I == E) break;
+ } else {
+ I = LastNonDeadUser;
+ ++I;
+ }
+ }
+ } else {
+ LastNonDeadUser = I;
+ ++I;
+ }
+ }
+}
+
+/// Override destroyConstant to make sure it doesn't get called on
+/// GlobalValue's because they shouldn't be treated like other constants.
+void GlobalValue::destroyConstant() {
+ assert(0 && "You can't GV->destroyConstant()!");
+ abort();
+}
+
+/// copyAttributesFrom - copy all additional attributes (those not needed to
+/// create a GlobalValue) from the GlobalValue Src to this one.
+void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
+ setAlignment(Src->getAlignment());
+ setSection(Src->getSection());
+ setVisibility(Src->getVisibility());
+}
+
+
+//===----------------------------------------------------------------------===//
+// GlobalVariable Implementation
+//===----------------------------------------------------------------------===//
+
+GlobalVariable::GlobalVariable(const Type *Ty, bool constant, LinkageTypes Link,
+ Constant *InitVal, const std::string &Name,
+ Module *ParentModule, bool ThreadLocal,
+ unsigned AddressSpace)
+ : GlobalValue(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal,
+ OperandTraits<GlobalVariable>::op_begin(this),
+ InitVal != 0, Link, Name),
+ isConstantGlobal(constant), isThreadLocalSymbol(ThreadLocal) {
+ if (InitVal) {
+ assert(InitVal->getType() == Ty &&
+ "Initializer should be the same type as the GlobalVariable!");
+ Op<0>() = InitVal;
+ }
+
+ LeakDetector::addGarbageObject(this);
+
+ if (ParentModule)
+ ParentModule->getGlobalList().push_back(this);
+}
+
+GlobalVariable::GlobalVariable(const Type *Ty, bool constant, LinkageTypes Link,
+ Constant *InitVal, const std::string &Name,
+ GlobalVariable *Before, bool ThreadLocal,
+ unsigned AddressSpace)
+ : GlobalValue(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal,
+ OperandTraits<GlobalVariable>::op_begin(this),
+ InitVal != 0, Link, Name),
+ isConstantGlobal(constant), isThreadLocalSymbol(ThreadLocal) {
+ if (InitVal) {
+ assert(InitVal->getType() == Ty &&
+ "Initializer should be the same type as the GlobalVariable!");
+ Op<0>() = InitVal;
+ }
+
+ LeakDetector::addGarbageObject(this);
+
+ if (Before)
+ Before->getParent()->getGlobalList().insert(Before, this);
+}
+
+void GlobalVariable::setParent(Module *parent) {
+ if (getParent())
+ LeakDetector::addGarbageObject(this);
+ Parent = parent;
+ if (getParent())
+ LeakDetector::removeGarbageObject(this);
+}
+
+void GlobalVariable::removeFromParent() {
+ getParent()->getGlobalList().remove(this);
+}
+
+void GlobalVariable::eraseFromParent() {
+ getParent()->getGlobalList().erase(this);
+}
+
+void GlobalVariable::replaceUsesOfWithOnConstant(Value *From, Value *To,
+ Use *U) {
+ // If you call this, then you better know this GVar has a constant
+ // initializer worth replacing. Enforce that here.
+ assert(getNumOperands() == 1 &&
+ "Attempt to replace uses of Constants on a GVar with no initializer");
+
+ // And, since you know it has an initializer, the From value better be
+ // the initializer :)
+ assert(getOperand(0) == From &&
+ "Attempt to replace wrong constant initializer in GVar");
+
+ // And, you better have a constant for the replacement value
+ assert(isa<Constant>(To) &&
+ "Attempt to replace GVar initializer with non-constant");
+
+ // Okay, preconditions out of the way, replace the constant initializer.
+ this->setOperand(0, cast<Constant>(To));
+}
+
+/// copyAttributesFrom - copy all additional attributes (those not needed to
+/// create a GlobalVariable) from the GlobalVariable Src to this one.
+void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) {
+ assert(isa<GlobalVariable>(Src) && "Expected a GlobalVariable!");
+ GlobalValue::copyAttributesFrom(Src);
+ const GlobalVariable *SrcVar = cast<GlobalVariable>(Src);
+ setThreadLocal(SrcVar->isThreadLocal());
+}
+
+
+//===----------------------------------------------------------------------===//
+// GlobalAlias Implementation
+//===----------------------------------------------------------------------===//
+
+GlobalAlias::GlobalAlias(const Type *Ty, LinkageTypes Link,
+ const std::string &Name, Constant* aliasee,
+ Module *ParentModule)
+ : GlobalValue(Ty, Value::GlobalAliasVal, &Op<0>(), 1, Link, Name) {
+ LeakDetector::addGarbageObject(this);
+
+ if (aliasee)
+ assert(aliasee->getType() == Ty && "Alias and aliasee types should match!");
+ Op<0>() = aliasee;
+
+ if (ParentModule)
+ ParentModule->getAliasList().push_back(this);
+}
+
+void GlobalAlias::setParent(Module *parent) {
+ if (getParent())
+ LeakDetector::addGarbageObject(this);
+ Parent = parent;
+ if (getParent())
+ LeakDetector::removeGarbageObject(this);
+}
+
+void GlobalAlias::removeFromParent() {
+ getParent()->getAliasList().remove(this);
+}
+
+void GlobalAlias::eraseFromParent() {
+ getParent()->getAliasList().erase(this);
+}
+
+bool GlobalAlias::isDeclaration() const {
+ const GlobalValue* AV = getAliasedGlobal();
+ if (AV)
+ return AV->isDeclaration();
+ else
+ return false;
+}
+
+void GlobalAlias::setAliasee(Constant *Aliasee)
+{
+ if (Aliasee)
+ assert(Aliasee->getType() == getType() &&
+ "Alias and aliasee types should match!");
+
+ setOperand(0, Aliasee);
+}
+
+const GlobalValue *GlobalAlias::getAliasedGlobal() const {
+ const Constant *C = getAliasee();
+ if (C) {
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return GV;
+ else {
+ const ConstantExpr *CE = 0;
+ if ((CE = dyn_cast<ConstantExpr>(C)) &&
+ (CE->getOpcode() == Instruction::BitCast ||
+ CE->getOpcode() == Instruction::GetElementPtr))
+ return dyn_cast<GlobalValue>(CE->getOperand(0));
+ else
+ assert(0 && "Unsupported aliasee");
+ }
+ }
+ return 0;
+}
+
+const GlobalValue *GlobalAlias::resolveAliasedGlobal(bool stopOnWeak) const {
+ SmallPtrSet<const GlobalValue*, 3> Visited;
+
+ // Check if we need to stop early.
+ if (stopOnWeak && mayBeOverridden())
+ return this;
+
+ const GlobalValue *GV = getAliasedGlobal();
+ Visited.insert(GV);
+
+ // Iterate over aliasing chain, stopping on weak alias if necessary.
+ while (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) {
+ if (stopOnWeak && GA->mayBeOverridden())
+ break;
+
+ GV = GA->getAliasedGlobal();
+
+ if (!Visited.insert(GV))
+ return NULL;
+ }
+
+ return GV;
+}
diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp
new file mode 100644
index 0000000..524e294
--- /dev/null
+++ b/lib/VMCore/InlineAsm.cpp
@@ -0,0 +1,231 @@
+//===-- InlineAsm.cpp - Implement the InlineAsm class ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the InlineAsm class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InlineAsm.h"
+#include "llvm/DerivedTypes.h"
+#include <algorithm>
+#include <cctype>
+using namespace llvm;
+
+// Implement the first virtual method in this class in this file so the
+// InlineAsm vtable is emitted here.
+InlineAsm::~InlineAsm() {
+}
+
+
+// NOTE: when memoizing the function type, we have to be careful to handle the
+// case when the type gets refined.
+
+InlineAsm *InlineAsm::get(const FunctionType *Ty, const std::string &AsmString,
+ const std::string &Constraints, bool hasSideEffects) {
+ // FIXME: memoize!
+ return new InlineAsm(Ty, AsmString, Constraints, hasSideEffects);
+}
+
+InlineAsm::InlineAsm(const FunctionType *Ty, const std::string &asmString,
+ const std::string &constraints, bool hasSideEffects)
+ : Value(PointerType::getUnqual(Ty),
+ Value::InlineAsmVal),
+ AsmString(asmString),
+ Constraints(constraints), HasSideEffects(hasSideEffects) {
+
+ // Do various checks on the constraint string and type.
+ assert(Verify(Ty, constraints) && "Function type not legal for constraints!");
+}
+
+const FunctionType *InlineAsm::getFunctionType() const {
+ return cast<FunctionType>(getType()->getElementType());
+}
+
+/// Parse - Analyze the specified string (e.g. "==&{eax}") and fill in the
+/// fields in this structure. If the constraint string is not understood,
+/// return true, otherwise return false.
+bool InlineAsm::ConstraintInfo::Parse(const std::string &Str,
+ std::vector<InlineAsm::ConstraintInfo> &ConstraintsSoFar) {
+ std::string::const_iterator I = Str.begin(), E = Str.end();
+
+ // Initialize
+ Type = isInput;
+ isEarlyClobber = false;
+ MatchingInput = -1;
+ isCommutative = false;
+ isIndirect = false;
+
+ // Parse prefixes.
+ if (*I == '~') {
+ Type = isClobber;
+ ++I;
+ } else if (*I == '=') {
+ ++I;
+ Type = isOutput;
+ }
+
+ if (*I == '*') {
+ isIndirect = true;
+ ++I;
+ }
+
+ if (I == E) return true; // Just a prefix, like "==" or "~".
+
+ // Parse the modifiers.
+ bool DoneWithModifiers = false;
+ while (!DoneWithModifiers) {
+ switch (*I) {
+ default:
+ DoneWithModifiers = true;
+ break;
+ case '&': // Early clobber.
+ if (Type != isOutput || // Cannot early clobber anything but output.
+ isEarlyClobber) // Reject &&&&&&
+ return true;
+ isEarlyClobber = true;
+ break;
+ case '%': // Commutative.
+ if (Type == isClobber || // Cannot commute clobbers.
+ isCommutative) // Reject %%%%%
+ return true;
+ isCommutative = true;
+ break;
+ case '#': // Comment.
+ case '*': // Register preferencing.
+ return true; // Not supported.
+ }
+
+ if (!DoneWithModifiers) {
+ ++I;
+ if (I == E) return true; // Just prefixes and modifiers!
+ }
+ }
+
+ // Parse the various constraints.
+ while (I != E) {
+ if (*I == '{') { // Physical register reference.
+ // Find the end of the register name.
+ std::string::const_iterator ConstraintEnd = std::find(I+1, E, '}');
+ if (ConstraintEnd == E) return true; // "{foo"
+ Codes.push_back(std::string(I, ConstraintEnd+1));
+ I = ConstraintEnd+1;
+ } else if (isdigit(*I)) { // Matching Constraint
+ // Maximal munch numbers.
+ std::string::const_iterator NumStart = I;
+ while (I != E && isdigit(*I))
+ ++I;
+ Codes.push_back(std::string(NumStart, I));
+ unsigned N = atoi(Codes.back().c_str());
+ // Check that this is a valid matching constraint!
+ if (N >= ConstraintsSoFar.size() || ConstraintsSoFar[N].Type != isOutput||
+ Type != isInput)
+ return true; // Invalid constraint number.
+
+ // If Operand N already has a matching input, reject this. An output
+ // can't be constrained to the same value as multiple inputs.
+ if (ConstraintsSoFar[N].hasMatchingInput())
+ return true;
+
+ // Note that operand #n has a matching input.
+ ConstraintsSoFar[N].MatchingInput = ConstraintsSoFar.size();
+ } else {
+ // Single letter constraint.
+ Codes.push_back(std::string(I, I+1));
+ ++I;
+ }
+ }
+
+ return false;
+}
+
+std::vector<InlineAsm::ConstraintInfo>
+InlineAsm::ParseConstraints(const std::string &Constraints) {
+ std::vector<ConstraintInfo> Result;
+
+ // Scan the constraints string.
+ for (std::string::const_iterator I = Constraints.begin(),
+ E = Constraints.end(); I != E; ) {
+ ConstraintInfo Info;
+
+ // Find the end of this constraint.
+ std::string::const_iterator ConstraintEnd = std::find(I, E, ',');
+
+ if (ConstraintEnd == I || // Empty constraint like ",,"
+ Info.Parse(std::string(I, ConstraintEnd), Result)) {
+ Result.clear(); // Erroneous constraint?
+ break;
+ }
+
+ Result.push_back(Info);
+
+ // ConstraintEnd may be either the next comma or the end of the string. In
+ // the former case, we skip the comma.
+ I = ConstraintEnd;
+ if (I != E) {
+ ++I;
+ if (I == E) { Result.clear(); break; } // don't allow "xyz,"
+ }
+ }
+
+ return Result;
+}
+
+
+/// Verify - Verify that the specified constraint string is reasonable for the
+/// specified function type, and otherwise validate the constraint string.
+bool InlineAsm::Verify(const FunctionType *Ty, const std::string &ConstStr) {
+ if (Ty->isVarArg()) return false;
+
+ std::vector<ConstraintInfo> Constraints = ParseConstraints(ConstStr);
+
+ // Error parsing constraints.
+ if (Constraints.empty() && !ConstStr.empty()) return false;
+
+ unsigned NumOutputs = 0, NumInputs = 0, NumClobbers = 0;
+ unsigned NumIndirect = 0;
+
+ for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
+ switch (Constraints[i].Type) {
+ case InlineAsm::isOutput:
+ if ((NumInputs-NumIndirect) != 0 || NumClobbers != 0)
+ return false; // outputs before inputs and clobbers.
+ if (!Constraints[i].isIndirect) {
+ ++NumOutputs;
+ break;
+ }
+ ++NumIndirect;
+ // FALLTHROUGH for Indirect Outputs.
+ case InlineAsm::isInput:
+ if (NumClobbers) return false; // inputs before clobbers.
+ ++NumInputs;
+ break;
+ case InlineAsm::isClobber:
+ ++NumClobbers;
+ break;
+ }
+ }
+
+ switch (NumOutputs) {
+ case 0:
+ if (Ty->getReturnType() != Type::VoidTy) return false;
+ break;
+ case 1:
+ if (isa<StructType>(Ty->getReturnType())) return false;
+ break;
+ default:
+ const StructType *STy = dyn_cast<StructType>(Ty->getReturnType());
+ if (STy == 0 || STy->getNumElements() != NumOutputs)
+ return false;
+ break;
+ }
+
+ if (Ty->getNumParams() != NumInputs) return false;
+ return true;
+}
+
diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp
new file mode 100644
index 0000000..9e030b7
--- /dev/null
+++ b/lib/VMCore/Instruction.cpp
@@ -0,0 +1,387 @@
+//===-- Instruction.cpp - Implement the Instruction class -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Instruction class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Type.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/LeakDetector.h"
+using namespace llvm;
+
+Instruction::Instruction(const Type *ty, unsigned it, Use *Ops, unsigned NumOps,
+ Instruction *InsertBefore)
+ : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(0) {
+ // Make sure that we get added to a basicblock
+ LeakDetector::addGarbageObject(this);
+
+ // If requested, insert this instruction into a basic block...
+ if (InsertBefore) {
+ assert(InsertBefore->getParent() &&
+ "Instruction to insert before is not in a basic block!");
+ InsertBefore->getParent()->getInstList().insert(InsertBefore, this);
+ }
+}
+
+Instruction::Instruction(const Type *ty, unsigned it, Use *Ops, unsigned NumOps,
+ BasicBlock *InsertAtEnd)
+ : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(0) {
+ // Make sure that we get added to a basicblock
+ LeakDetector::addGarbageObject(this);
+
+ // append this instruction into the basic block
+ assert(InsertAtEnd && "Basic block to append to may not be NULL!");
+ InsertAtEnd->getInstList().push_back(this);
+}
+
+
+// Out of line virtual method, so the vtable, etc has a home.
+Instruction::~Instruction() {
+ assert(Parent == 0 && "Instruction still linked in the program!");
+}
+
+
+void Instruction::setParent(BasicBlock *P) {
+ if (getParent()) {
+ if (!P) LeakDetector::addGarbageObject(this);
+ } else {
+ if (P) LeakDetector::removeGarbageObject(this);
+ }
+
+ Parent = P;
+}
+
+void Instruction::removeFromParent() {
+ getParent()->getInstList().remove(this);
+}
+
+void Instruction::eraseFromParent() {
+ getParent()->getInstList().erase(this);
+}
+
+/// insertBefore - Insert an unlinked instructions into a basic block
+/// immediately before the specified instruction.
+void Instruction::insertBefore(Instruction *InsertPos) {
+ InsertPos->getParent()->getInstList().insert(InsertPos, this);
+}
+
+/// insertAfter - Insert an unlinked instructions into a basic block
+/// immediately after the specified instruction.
+void Instruction::insertAfter(Instruction *InsertPos) {
+ InsertPos->getParent()->getInstList().insertAfter(InsertPos, this);
+}
+
+/// moveBefore - Unlink this instruction from its current basic block and
+/// insert it into the basic block that MovePos lives in, right before
+/// MovePos.
+void Instruction::moveBefore(Instruction *MovePos) {
+ MovePos->getParent()->getInstList().splice(MovePos,getParent()->getInstList(),
+ this);
+}
+
+
+const char *Instruction::getOpcodeName(unsigned OpCode) {
+ switch (OpCode) {
+ // Terminators
+ case Ret: return "ret";
+ case Br: return "br";
+ case Switch: return "switch";
+ case Invoke: return "invoke";
+ case Unwind: return "unwind";
+ case Unreachable: return "unreachable";
+
+ // Standard binary operators...
+ case Add: return "add";
+ case Sub: return "sub";
+ case Mul: return "mul";
+ case UDiv: return "udiv";
+ case SDiv: return "sdiv";
+ case FDiv: return "fdiv";
+ case URem: return "urem";
+ case SRem: return "srem";
+ case FRem: return "frem";
+
+ // Logical operators...
+ case And: return "and";
+ case Or : return "or";
+ case Xor: return "xor";
+
+ // Memory instructions...
+ case Malloc: return "malloc";
+ case Free: return "free";
+ case Alloca: return "alloca";
+ case Load: return "load";
+ case Store: return "store";
+ case GetElementPtr: return "getelementptr";
+
+ // Convert instructions...
+ case Trunc: return "trunc";
+ case ZExt: return "zext";
+ case SExt: return "sext";
+ case FPTrunc: return "fptrunc";
+ case FPExt: return "fpext";
+ case FPToUI: return "fptoui";
+ case FPToSI: return "fptosi";
+ case UIToFP: return "uitofp";
+ case SIToFP: return "sitofp";
+ case IntToPtr: return "inttoptr";
+ case PtrToInt: return "ptrtoint";
+ case BitCast: return "bitcast";
+
+ // Other instructions...
+ case ICmp: return "icmp";
+ case FCmp: return "fcmp";
+ case VICmp: return "vicmp";
+ case VFCmp: return "vfcmp";
+ case PHI: return "phi";
+ case Select: return "select";
+ case Call: return "call";
+ case Shl: return "shl";
+ case LShr: return "lshr";
+ case AShr: return "ashr";
+ case VAArg: return "va_arg";
+ case ExtractElement: return "extractelement";
+ case InsertElement: return "insertelement";
+ case ShuffleVector: return "shufflevector";
+ case ExtractValue: return "extractvalue";
+ case InsertValue: return "insertvalue";
+
+ default: return "<Invalid operator> ";
+ }
+
+ return 0;
+}
+
+/// isIdenticalTo - Return true if the specified instruction is exactly
+/// identical to the current one. This means that all operands match and any
+/// extra information (e.g. load is volatile) agree.
+bool Instruction::isIdenticalTo(const Instruction *I) const {
+ if (getOpcode() != I->getOpcode() ||
+ getNumOperands() != I->getNumOperands() ||
+ getType() != I->getType())
+ return false;
+
+ // We have two instructions of identical opcode and #operands. Check to see
+ // if all operands are the same.
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (getOperand(i) != I->getOperand(i))
+ return false;
+
+ // Check special state that is a part of some instructions.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(this))
+ return LI->isVolatile() == cast<LoadInst>(I)->isVolatile() &&
+ LI->getAlignment() == cast<LoadInst>(I)->getAlignment();
+ if (const StoreInst *SI = dyn_cast<StoreInst>(this))
+ return SI->isVolatile() == cast<StoreInst>(I)->isVolatile() &&
+ SI->getAlignment() == cast<StoreInst>(I)->getAlignment();
+ if (const CmpInst *CI = dyn_cast<CmpInst>(this))
+ return CI->getPredicate() == cast<CmpInst>(I)->getPredicate();
+ if (const CallInst *CI = dyn_cast<CallInst>(this))
+ return CI->isTailCall() == cast<CallInst>(I)->isTailCall() &&
+ CI->getCallingConv() == cast<CallInst>(I)->getCallingConv() &&
+ CI->getAttributes().getRawPointer() ==
+ cast<CallInst>(I)->getAttributes().getRawPointer();
+ if (const InvokeInst *CI = dyn_cast<InvokeInst>(this))
+ return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
+ CI->getAttributes().getRawPointer() ==
+ cast<InvokeInst>(I)->getAttributes().getRawPointer();
+ if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this)) {
+ if (IVI->getNumIndices() != cast<InsertValueInst>(I)->getNumIndices())
+ return false;
+ for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i)
+ if (IVI->idx_begin()[i] != cast<InsertValueInst>(I)->idx_begin()[i])
+ return false;
+ return true;
+ }
+ if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this)) {
+ if (EVI->getNumIndices() != cast<ExtractValueInst>(I)->getNumIndices())
+ return false;
+ for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i)
+ if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I)->idx_begin()[i])
+ return false;
+ return true;
+ }
+
+ return true;
+}
+
+// isSameOperationAs
+bool Instruction::isSameOperationAs(const Instruction *I) const {
+ if (getOpcode() != I->getOpcode() || getType() != I->getType() ||
+ getNumOperands() != I->getNumOperands())
+ return false;
+
+ // We have two instructions of identical opcode and #operands. Check to see
+ // if all operands are the same type
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (getOperand(i)->getType() != I->getOperand(i)->getType())
+ return false;
+
+ // Check special state that is a part of some instructions.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(this))
+ return LI->isVolatile() == cast<LoadInst>(I)->isVolatile() &&
+ LI->getAlignment() == cast<LoadInst>(I)->getAlignment();
+ if (const StoreInst *SI = dyn_cast<StoreInst>(this))
+ return SI->isVolatile() == cast<StoreInst>(I)->isVolatile() &&
+ SI->getAlignment() == cast<StoreInst>(I)->getAlignment();
+ if (const CmpInst *CI = dyn_cast<CmpInst>(this))
+ return CI->getPredicate() == cast<CmpInst>(I)->getPredicate();
+ if (const CallInst *CI = dyn_cast<CallInst>(this))
+ return CI->isTailCall() == cast<CallInst>(I)->isTailCall() &&
+ CI->getCallingConv() == cast<CallInst>(I)->getCallingConv() &&
+ CI->getAttributes().getRawPointer() ==
+ cast<CallInst>(I)->getAttributes().getRawPointer();
+ if (const InvokeInst *CI = dyn_cast<InvokeInst>(this))
+ return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
+ CI->getAttributes().getRawPointer() ==
+ cast<InvokeInst>(I)->getAttributes().getRawPointer();
+ if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this)) {
+ if (IVI->getNumIndices() != cast<InsertValueInst>(I)->getNumIndices())
+ return false;
+ for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i)
+ if (IVI->idx_begin()[i] != cast<InsertValueInst>(I)->idx_begin()[i])
+ return false;
+ return true;
+ }
+ if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this)) {
+ if (EVI->getNumIndices() != cast<ExtractValueInst>(I)->getNumIndices())
+ return false;
+ for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i)
+ if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I)->idx_begin()[i])
+ return false;
+ return true;
+ }
+
+ return true;
+}
+
+/// isUsedOutsideOfBlock - Return true if there are any uses of I outside of the
+/// specified block. Note that PHI nodes are considered to evaluate their
+/// operands in the corresponding predecessor block.
+bool Instruction::isUsedOutsideOfBlock(const BasicBlock *BB) const {
+ for (use_const_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+ // PHI nodes uses values in the corresponding predecessor block. For other
+ // instructions, just check to see whether the parent of the use matches up.
+ const PHINode *PN = dyn_cast<PHINode>(*UI);
+ if (PN == 0) {
+ if (cast<Instruction>(*UI)->getParent() != BB)
+ return true;
+ continue;
+ }
+
+ if (PN->getIncomingBlock(UI) != BB)
+ return true;
+ }
+ return false;
+}
+
+/// mayReadFromMemory - Return true if this instruction may read memory.
+///
+bool Instruction::mayReadFromMemory() const {
+ switch (getOpcode()) {
+ default: return false;
+ case Instruction::Free:
+ case Instruction::VAArg:
+ case Instruction::Load:
+ return true;
+ case Instruction::Call:
+ return !cast<CallInst>(this)->doesNotAccessMemory();
+ case Instruction::Invoke:
+ return !cast<InvokeInst>(this)->doesNotAccessMemory();
+ case Instruction::Store:
+ return cast<StoreInst>(this)->isVolatile();
+ }
+}
+
+/// mayWriteToMemory - Return true if this instruction may modify memory.
+///
+bool Instruction::mayWriteToMemory() const {
+ switch (getOpcode()) {
+ default: return false;
+ case Instruction::Free:
+ case Instruction::Store:
+ case Instruction::VAArg:
+ return true;
+ case Instruction::Call:
+ return !cast<CallInst>(this)->onlyReadsMemory();
+ case Instruction::Invoke:
+ return !cast<InvokeInst>(this)->onlyReadsMemory();
+ case Instruction::Load:
+ return cast<LoadInst>(this)->isVolatile();
+ }
+}
+
+/// mayThrow - Return true if this instruction may throw an exception.
+///
+bool Instruction::mayThrow() const {
+ if (const CallInst *CI = dyn_cast<CallInst>(this))
+ return !CI->doesNotThrow();
+ return false;
+}
+
+/// isAssociative - Return true if the instruction is associative:
+///
+/// Associative operators satisfy: x op (y op z) === (x op y) op z)
+///
+/// In LLVM, the Add, Mul, And, Or, and Xor operators are associative, when not
+/// applied to floating point types.
+///
+bool Instruction::isAssociative(unsigned Opcode, const Type *Ty) {
+ if (Opcode == And || Opcode == Or || Opcode == Xor)
+ return true;
+
+ // Add/Mul reassociate unless they are FP or FP vectors.
+ if (Opcode == Add || Opcode == Mul)
+ return !Ty->isFPOrFPVector();
+ return 0;
+}
+
+/// isCommutative - Return true if the instruction is commutative:
+///
+/// Commutative operators satisfy: (x op y) === (y op x)
+///
+/// In LLVM, these are the associative operators, plus SetEQ and SetNE, when
+/// applied to any type.
+///
+bool Instruction::isCommutative(unsigned op) {
+ switch (op) {
+ case Add:
+ case Mul:
+ case And:
+ case Or:
+ case Xor:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// isTrapping - Return true if the instruction may trap.
+///
+bool Instruction::isTrapping(unsigned op) {
+ switch(op) {
+ case UDiv:
+ case SDiv:
+ case FDiv:
+ case URem:
+ case SRem:
+ case FRem:
+ case Load:
+ case Store:
+ case Call:
+ case Invoke:
+ case VAArg:
+ return true;
+ default:
+ return false;
+ }
+}
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
new file mode 100644
index 0000000..fe30271
--- /dev/null
+++ b/lib/VMCore/Instructions.cpp
@@ -0,0 +1,2963 @@
+//===-- Instructions.cpp - Implement the LLVM instructions ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements all of the non-inline methods for the LLVM instruction
+// classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// CallSite Class
+//===----------------------------------------------------------------------===//
+
+#define CALLSITE_DELEGATE_GETTER(METHOD) \
+ Instruction *II(getInstruction()); \
+ return isCall() \
+ ? cast<CallInst>(II)->METHOD \
+ : cast<InvokeInst>(II)->METHOD
+
+#define CALLSITE_DELEGATE_SETTER(METHOD) \
+ Instruction *II(getInstruction()); \
+ if (isCall()) \
+ cast<CallInst>(II)->METHOD; \
+ else \
+ cast<InvokeInst>(II)->METHOD
+
+CallSite::CallSite(Instruction *C) {
+ assert((isa<CallInst>(C) || isa<InvokeInst>(C)) && "Not a call!");
+ I.setPointer(C);
+ I.setInt(isa<CallInst>(C));
+}
+unsigned CallSite::getCallingConv() const {
+ CALLSITE_DELEGATE_GETTER(getCallingConv());
+}
+void CallSite::setCallingConv(unsigned CC) {
+ CALLSITE_DELEGATE_SETTER(setCallingConv(CC));
+}
+const AttrListPtr &CallSite::getAttributes() const {
+ CALLSITE_DELEGATE_GETTER(getAttributes());
+}
+void CallSite::setAttributes(const AttrListPtr &PAL) {
+ CALLSITE_DELEGATE_SETTER(setAttributes(PAL));
+}
+bool CallSite::paramHasAttr(uint16_t i, Attributes attr) const {
+ CALLSITE_DELEGATE_GETTER(paramHasAttr(i, attr));
+}
+uint16_t CallSite::getParamAlignment(uint16_t i) const {
+ CALLSITE_DELEGATE_GETTER(getParamAlignment(i));
+}
+bool CallSite::doesNotAccessMemory() const {
+ CALLSITE_DELEGATE_GETTER(doesNotAccessMemory());
+}
+void CallSite::setDoesNotAccessMemory(bool doesNotAccessMemory) {
+ CALLSITE_DELEGATE_SETTER(setDoesNotAccessMemory(doesNotAccessMemory));
+}
+bool CallSite::onlyReadsMemory() const {
+ CALLSITE_DELEGATE_GETTER(onlyReadsMemory());
+}
+void CallSite::setOnlyReadsMemory(bool onlyReadsMemory) {
+ CALLSITE_DELEGATE_SETTER(setOnlyReadsMemory(onlyReadsMemory));
+}
+bool CallSite::doesNotReturn() const {
+ CALLSITE_DELEGATE_GETTER(doesNotReturn());
+}
+void CallSite::setDoesNotReturn(bool doesNotReturn) {
+ CALLSITE_DELEGATE_SETTER(setDoesNotReturn(doesNotReturn));
+}
+bool CallSite::doesNotThrow() const {
+ CALLSITE_DELEGATE_GETTER(doesNotThrow());
+}
+void CallSite::setDoesNotThrow(bool doesNotThrow) {
+ CALLSITE_DELEGATE_SETTER(setDoesNotThrow(doesNotThrow));
+}
+
+bool CallSite::hasArgument(const Value *Arg) const {
+ for (arg_iterator AI = this->arg_begin(), E = this->arg_end(); AI != E; ++AI)
+ if (AI->get() == Arg)
+ return true;
+ return false;
+}
+
+#undef CALLSITE_DELEGATE_GETTER
+#undef CALLSITE_DELEGATE_SETTER
+
+//===----------------------------------------------------------------------===//
+// TerminatorInst Class
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method, so the vtable, etc has a home.
+TerminatorInst::~TerminatorInst() {
+}
+
+//===----------------------------------------------------------------------===//
+// UnaryInstruction Class
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method, so the vtable, etc has a home.
+UnaryInstruction::~UnaryInstruction() {
+}
+
+//===----------------------------------------------------------------------===//
+// SelectInst Class
+//===----------------------------------------------------------------------===//
+
+/// areInvalidOperands - Return a string if the specified operands are invalid
+/// for a select operation, otherwise return null.
+const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) {
+ if (Op1->getType() != Op2->getType())
+ return "both values to select must have same type";
+
+ if (const VectorType *VT = dyn_cast<VectorType>(Op0->getType())) {
+ // Vector select.
+ if (VT->getElementType() != Type::Int1Ty)
+ return "vector select condition element type must be i1";
+ const VectorType *ET = dyn_cast<VectorType>(Op1->getType());
+ if (ET == 0)
+ return "selected values for vector select must be vectors";
+ if (ET->getNumElements() != VT->getNumElements())
+ return "vector select requires selected vectors to have "
+ "the same vector length as select condition";
+ } else if (Op0->getType() != Type::Int1Ty) {
+ return "select condition must be i1 or <n x i1>";
+ }
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// PHINode Class
+//===----------------------------------------------------------------------===//
+
+PHINode::PHINode(const PHINode &PN)
+ : Instruction(PN.getType(), Instruction::PHI,
+ allocHungoffUses(PN.getNumOperands()), PN.getNumOperands()),
+ ReservedSpace(PN.getNumOperands()) {
+ Use *OL = OperandList;
+ for (unsigned i = 0, e = PN.getNumOperands(); i != e; i+=2) {
+ OL[i] = PN.getOperand(i);
+ OL[i+1] = PN.getOperand(i+1);
+ }
+}
+
+PHINode::~PHINode() {
+ if (OperandList)
+ dropHungoffUses(OperandList);
+}
+
+// removeIncomingValue - Remove an incoming value. This is useful if a
+// predecessor basic block is deleted.
+Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) {
+ unsigned NumOps = getNumOperands();
+ Use *OL = OperandList;
+ assert(Idx*2 < NumOps && "BB not in PHI node!");
+ Value *Removed = OL[Idx*2];
+
+ // Move everything after this operand down.
+ //
+ // FIXME: we could just swap with the end of the list, then erase. However,
+ // client might not expect this to happen. The code as it is thrashes the
+ // use/def lists, which is kinda lame.
+ for (unsigned i = (Idx+1)*2; i != NumOps; i += 2) {
+ OL[i-2] = OL[i];
+ OL[i-2+1] = OL[i+1];
+ }
+
+ // Nuke the last value.
+ OL[NumOps-2].set(0);
+ OL[NumOps-2+1].set(0);
+ NumOperands = NumOps-2;
+
+ // If the PHI node is dead, because it has zero entries, nuke it now.
+ if (NumOps == 2 && DeletePHIIfEmpty) {
+ // If anyone is using this PHI, make them use a dummy value instead...
+ replaceAllUsesWith(UndefValue::get(getType()));
+ eraseFromParent();
+ }
+ return Removed;
+}
+
+/// resizeOperands - resize operands - This adjusts the length of the operands
+/// list according to the following behavior:
+/// 1. If NumOps == 0, grow the operand list in response to a push_back style
+/// of operation. This grows the number of ops by 1.5 times.
+/// 2. If NumOps > NumOperands, reserve space for NumOps operands.
+/// 3. If NumOps == NumOperands, trim the reserved space.
+///
+void PHINode::resizeOperands(unsigned NumOps) {
+ unsigned e = getNumOperands();
+ if (NumOps == 0) {
+ NumOps = e*3/2;
+ if (NumOps < 4) NumOps = 4; // 4 op PHI nodes are VERY common.
+ } else if (NumOps*2 > NumOperands) {
+ // No resize needed.
+ if (ReservedSpace >= NumOps) return;
+ } else if (NumOps == NumOperands) {
+ if (ReservedSpace == NumOps) return;
+ } else {
+ return;
+ }
+
+ ReservedSpace = NumOps;
+ Use *OldOps = OperandList;
+ Use *NewOps = allocHungoffUses(NumOps);
+ std::copy(OldOps, OldOps + e, NewOps);
+ OperandList = NewOps;
+ if (OldOps) Use::zap(OldOps, OldOps + e, true);
+}
+
+/// hasConstantValue - If the specified PHI node always merges together the same
+/// value, return the value, otherwise return null.
+///
+Value *PHINode::hasConstantValue(bool AllowNonDominatingInstruction) const {
+ // If the PHI node only has one incoming value, eliminate the PHI node...
+ if (getNumIncomingValues() == 1) {
+ if (getIncomingValue(0) != this) // not X = phi X
+ return getIncomingValue(0);
+ else
+ return UndefValue::get(getType()); // Self cycle is dead.
+ }
+
+ // Otherwise if all of the incoming values are the same for the PHI, replace
+ // the PHI node with the incoming value.
+ //
+ Value *InVal = 0;
+ bool HasUndefInput = false;
+ for (unsigned i = 0, e = getNumIncomingValues(); i != e; ++i)
+ if (isa<UndefValue>(getIncomingValue(i))) {
+ HasUndefInput = true;
+ } else if (getIncomingValue(i) != this) { // Not the PHI node itself...
+ if (InVal && getIncomingValue(i) != InVal)
+ return 0; // Not the same, bail out.
+ else
+ InVal = getIncomingValue(i);
+ }
+
+ // The only case that could cause InVal to be null is if we have a PHI node
+ // that only has entries for itself. In this case, there is no entry into the
+ // loop, so kill the PHI.
+ //
+ if (InVal == 0) InVal = UndefValue::get(getType());
+
+ // If we have a PHI node like phi(X, undef, X), where X is defined by some
+ // instruction, we cannot always return X as the result of the PHI node. Only
+ // do this if X is not an instruction (thus it must dominate the PHI block),
+ // or if the client is prepared to deal with this possibility.
+ if (HasUndefInput && !AllowNonDominatingInstruction)
+ if (Instruction *IV = dyn_cast<Instruction>(InVal))
+ // If it's in the entry block, it dominates everything.
+ if (IV->getParent() != &IV->getParent()->getParent()->getEntryBlock() ||
+ isa<InvokeInst>(IV))
+ return 0; // Cannot guarantee that InVal dominates this PHINode.
+
+ // All of the incoming values are the same, return the value now.
+ return InVal;
+}
+
+
+//===----------------------------------------------------------------------===//
+// CallInst Implementation
+//===----------------------------------------------------------------------===//
+
+CallInst::~CallInst() {
+}
+
+void CallInst::init(Value *Func, Value* const *Params, unsigned NumParams) {
+ assert(NumOperands == NumParams+1 && "NumOperands not set up?");
+ Use *OL = OperandList;
+ OL[0] = Func;
+
+ const FunctionType *FTy =
+ cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
+ FTy = FTy; // silence warning.
+
+ assert((NumParams == FTy->getNumParams() ||
+ (FTy->isVarArg() && NumParams > FTy->getNumParams())) &&
+ "Calling a function with bad signature!");
+ for (unsigned i = 0; i != NumParams; ++i) {
+ assert((i >= FTy->getNumParams() ||
+ FTy->getParamType(i) == Params[i]->getType()) &&
+ "Calling a function with a bad signature!");
+ OL[i+1] = Params[i];
+ }
+}
+
+void CallInst::init(Value *Func, Value *Actual1, Value *Actual2) {
+ assert(NumOperands == 3 && "NumOperands not set up?");
+ Use *OL = OperandList;
+ OL[0] = Func;
+ OL[1] = Actual1;
+ OL[2] = Actual2;
+
+ const FunctionType *FTy =
+ cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
+ FTy = FTy; // silence warning.
+
+ assert((FTy->getNumParams() == 2 ||
+ (FTy->isVarArg() && FTy->getNumParams() < 2)) &&
+ "Calling a function with bad signature");
+ assert((0 >= FTy->getNumParams() ||
+ FTy->getParamType(0) == Actual1->getType()) &&
+ "Calling a function with a bad signature!");
+ assert((1 >= FTy->getNumParams() ||
+ FTy->getParamType(1) == Actual2->getType()) &&
+ "Calling a function with a bad signature!");
+}
+
+void CallInst::init(Value *Func, Value *Actual) {
+ assert(NumOperands == 2 && "NumOperands not set up?");
+ Use *OL = OperandList;
+ OL[0] = Func;
+ OL[1] = Actual;
+
+ const FunctionType *FTy =
+ cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
+ FTy = FTy; // silence warning.
+
+ assert((FTy->getNumParams() == 1 ||
+ (FTy->isVarArg() && FTy->getNumParams() == 0)) &&
+ "Calling a function with bad signature");
+ assert((0 == FTy->getNumParams() ||
+ FTy->getParamType(0) == Actual->getType()) &&
+ "Calling a function with a bad signature!");
+}
+
+void CallInst::init(Value *Func) {
+ assert(NumOperands == 1 && "NumOperands not set up?");
+ Use *OL = OperandList;
+ OL[0] = Func;
+
+ const FunctionType *FTy =
+ cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
+ FTy = FTy; // silence warning.
+
+ assert(FTy->getNumParams() == 0 && "Calling a function with bad signature");
+}
+
+CallInst::CallInst(Value *Func, Value* Actual, const std::string &Name,
+ Instruction *InsertBefore)
+ : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
+ ->getElementType())->getReturnType(),
+ Instruction::Call,
+ OperandTraits<CallInst>::op_end(this) - 2,
+ 2, InsertBefore) {
+ init(Func, Actual);
+ setName(Name);
+}
+
+CallInst::CallInst(Value *Func, Value* Actual, const std::string &Name,
+ BasicBlock *InsertAtEnd)
+ : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
+ ->getElementType())->getReturnType(),
+ Instruction::Call,
+ OperandTraits<CallInst>::op_end(this) - 2,
+ 2, InsertAtEnd) {
+ init(Func, Actual);
+ setName(Name);
+}
+CallInst::CallInst(Value *Func, const std::string &Name,
+ Instruction *InsertBefore)
+ : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
+ ->getElementType())->getReturnType(),
+ Instruction::Call,
+ OperandTraits<CallInst>::op_end(this) - 1,
+ 1, InsertBefore) {
+ init(Func);
+ setName(Name);
+}
+
+CallInst::CallInst(Value *Func, const std::string &Name,
+ BasicBlock *InsertAtEnd)
+ : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
+ ->getElementType())->getReturnType(),
+ Instruction::Call,
+ OperandTraits<CallInst>::op_end(this) - 1,
+ 1, InsertAtEnd) {
+ init(Func);
+ setName(Name);
+}
+
+CallInst::CallInst(const CallInst &CI)
+ : Instruction(CI.getType(), Instruction::Call,
+ OperandTraits<CallInst>::op_end(this) - CI.getNumOperands(),
+ CI.getNumOperands()) {
+ setAttributes(CI.getAttributes());
+ SubclassData = CI.SubclassData;
+ Use *OL = OperandList;
+ Use *InOL = CI.OperandList;
+ for (unsigned i = 0, e = CI.getNumOperands(); i != e; ++i)
+ OL[i] = InOL[i];
+}
+
+void CallInst::addAttribute(unsigned i, Attributes attr) {
+ AttrListPtr PAL = getAttributes();
+ PAL = PAL.addAttr(i, attr);
+ setAttributes(PAL);
+}
+
+void CallInst::removeAttribute(unsigned i, Attributes attr) {
+ AttrListPtr PAL = getAttributes();
+ PAL = PAL.removeAttr(i, attr);
+ setAttributes(PAL);
+}
+
+bool CallInst::paramHasAttr(unsigned i, Attributes attr) const {
+ if (AttributeList.paramHasAttr(i, attr))
+ return true;
+ if (const Function *F = getCalledFunction())
+ return F->paramHasAttr(i, attr);
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// InvokeInst Implementation
+//===----------------------------------------------------------------------===//
+
+void InvokeInst::init(Value *Fn, BasicBlock *IfNormal, BasicBlock *IfException,
+ Value* const *Args, unsigned NumArgs) {
+ assert(NumOperands == 3+NumArgs && "NumOperands not set up?");
+ Use *OL = OperandList;
+ OL[0] = Fn;
+ OL[1] = IfNormal;
+ OL[2] = IfException;
+ const FunctionType *FTy =
+ cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType());
+ FTy = FTy; // silence warning.
+
+ assert(((NumArgs == FTy->getNumParams()) ||
+ (FTy->isVarArg() && NumArgs > FTy->getNumParams())) &&
+ "Calling a function with bad signature");
+
+ for (unsigned i = 0, e = NumArgs; i != e; i++) {
+ assert((i >= FTy->getNumParams() ||
+ FTy->getParamType(i) == Args[i]->getType()) &&
+ "Invoking a function with a bad signature!");
+
+ OL[i+3] = Args[i];
+ }
+}
+
+InvokeInst::InvokeInst(const InvokeInst &II)
+ : TerminatorInst(II.getType(), Instruction::Invoke,
+ OperandTraits<InvokeInst>::op_end(this)
+ - II.getNumOperands(),
+ II.getNumOperands()) {
+ setAttributes(II.getAttributes());
+ SubclassData = II.SubclassData;
+ Use *OL = OperandList, *InOL = II.OperandList;
+ for (unsigned i = 0, e = II.getNumOperands(); i != e; ++i)
+ OL[i] = InOL[i];
+}
+
+BasicBlock *InvokeInst::getSuccessorV(unsigned idx) const {
+ return getSuccessor(idx);
+}
+unsigned InvokeInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+ return setSuccessor(idx, B);
+}
+
+bool InvokeInst::paramHasAttr(unsigned i, Attributes attr) const {
+ if (AttributeList.paramHasAttr(i, attr))
+ return true;
+ if (const Function *F = getCalledFunction())
+ return F->paramHasAttr(i, attr);
+ return false;
+}
+
+void InvokeInst::addAttribute(unsigned i, Attributes attr) {
+ AttrListPtr PAL = getAttributes();
+ PAL = PAL.addAttr(i, attr);
+ setAttributes(PAL);
+}
+
+void InvokeInst::removeAttribute(unsigned i, Attributes attr) {
+ AttrListPtr PAL = getAttributes();
+ PAL = PAL.removeAttr(i, attr);
+ setAttributes(PAL);
+}
+
+
+//===----------------------------------------------------------------------===//
+// ReturnInst Implementation
+//===----------------------------------------------------------------------===//
+
+ReturnInst::ReturnInst(const ReturnInst &RI)
+ : TerminatorInst(Type::VoidTy, Instruction::Ret,
+ OperandTraits<ReturnInst>::op_end(this) -
+ RI.getNumOperands(),
+ RI.getNumOperands()) {
+ if (RI.getNumOperands())
+ Op<0>() = RI.Op<0>();
+}
+
+ReturnInst::ReturnInst(Value *retVal, Instruction *InsertBefore)
+ : TerminatorInst(Type::VoidTy, Instruction::Ret,
+ OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
+ InsertBefore) {
+ if (retVal)
+ Op<0>() = retVal;
+}
+ReturnInst::ReturnInst(Value *retVal, BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::VoidTy, Instruction::Ret,
+ OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
+ InsertAtEnd) {
+ if (retVal)
+ Op<0>() = retVal;
+}
+ReturnInst::ReturnInst(BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::VoidTy, Instruction::Ret,
+ OperandTraits<ReturnInst>::op_end(this), 0, InsertAtEnd) {
+}
+
+unsigned ReturnInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+
+/// Out-of-line ReturnInst method, put here so the C++ compiler can choose to
+/// emit the vtable for the class in this translation unit.
+void ReturnInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
+ assert(0 && "ReturnInst has no successors!");
+}
+
+BasicBlock *ReturnInst::getSuccessorV(unsigned idx) const {
+ assert(0 && "ReturnInst has no successors!");
+ abort();
+ return 0;
+}
+
+ReturnInst::~ReturnInst() {
+}
+
+//===----------------------------------------------------------------------===//
+// UnwindInst Implementation
+//===----------------------------------------------------------------------===//
+
+UnwindInst::UnwindInst(Instruction *InsertBefore)
+ : TerminatorInst(Type::VoidTy, Instruction::Unwind, 0, 0, InsertBefore) {
+}
+UnwindInst::UnwindInst(BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::VoidTy, Instruction::Unwind, 0, 0, InsertAtEnd) {
+}
+
+
+unsigned UnwindInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+
+void UnwindInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
+ assert(0 && "UnwindInst has no successors!");
+}
+
+BasicBlock *UnwindInst::getSuccessorV(unsigned idx) const {
+ assert(0 && "UnwindInst has no successors!");
+ abort();
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// UnreachableInst Implementation
+//===----------------------------------------------------------------------===//
+
+UnreachableInst::UnreachableInst(Instruction *InsertBefore)
+ : TerminatorInst(Type::VoidTy, Instruction::Unreachable, 0, 0, InsertBefore) {
+}
+UnreachableInst::UnreachableInst(BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::VoidTy, Instruction::Unreachable, 0, 0, InsertAtEnd) {
+}
+
+unsigned UnreachableInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+
+void UnreachableInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
+ assert(0 && "UnwindInst has no successors!");
+}
+
+BasicBlock *UnreachableInst::getSuccessorV(unsigned idx) const {
+ assert(0 && "UnwindInst has no successors!");
+ abort();
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// BranchInst Implementation
+//===----------------------------------------------------------------------===//
+
+void BranchInst::AssertOK() {
+ if (isConditional())
+ assert(getCondition()->getType() == Type::Int1Ty &&
+ "May only branch on boolean predicates!");
+}
+
+BranchInst::BranchInst(BasicBlock *IfTrue, Instruction *InsertBefore)
+ : TerminatorInst(Type::VoidTy, Instruction::Br,
+ OperandTraits<BranchInst>::op_end(this) - 1,
+ 1, InsertBefore) {
+ assert(IfTrue != 0 && "Branch destination may not be null!");
+ Op<-1>() = IfTrue;
+}
+BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
+ Instruction *InsertBefore)
+ : TerminatorInst(Type::VoidTy, Instruction::Br,
+ OperandTraits<BranchInst>::op_end(this) - 3,
+ 3, InsertBefore) {
+ Op<-1>() = IfTrue;
+ Op<-2>() = IfFalse;
+ Op<-3>() = Cond;
+#ifndef NDEBUG
+ AssertOK();
+#endif
+}
+
+BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::VoidTy, Instruction::Br,
+ OperandTraits<BranchInst>::op_end(this) - 1,
+ 1, InsertAtEnd) {
+ assert(IfTrue != 0 && "Branch destination may not be null!");
+ Op<-1>() = IfTrue;
+}
+
+BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
+ BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::VoidTy, Instruction::Br,
+ OperandTraits<BranchInst>::op_end(this) - 3,
+ 3, InsertAtEnd) {
+ Op<-1>() = IfTrue;
+ Op<-2>() = IfFalse;
+ Op<-3>() = Cond;
+#ifndef NDEBUG
+ AssertOK();
+#endif
+}
+
+
+BranchInst::BranchInst(const BranchInst &BI) :
+ TerminatorInst(Type::VoidTy, Instruction::Br,
+ OperandTraits<BranchInst>::op_end(this) - BI.getNumOperands(),
+ BI.getNumOperands()) {
+ Op<-1>() = BI.Op<-1>();
+ if (BI.getNumOperands() != 1) {
+ assert(BI.getNumOperands() == 3 && "BR can have 1 or 3 operands!");
+ Op<-3>() = BI.Op<-3>();
+ Op<-2>() = BI.Op<-2>();
+ }
+}
+
+
+Use* Use::getPrefix() {
+ PointerIntPair<Use**, 2, PrevPtrTag> &PotentialPrefix(this[-1].Prev);
+ if (PotentialPrefix.getOpaqueValue())
+ return 0;
+
+ return reinterpret_cast<Use*>((char*)&PotentialPrefix + 1);
+}
+
+BranchInst::~BranchInst() {
+ if (NumOperands == 1) {
+ if (Use *Prefix = OperandList->getPrefix()) {
+ Op<-1>() = 0;
+ //
+ // mark OperandList to have a special value for scrutiny
+ // by baseclass destructors and operator delete
+ OperandList = Prefix;
+ } else {
+ NumOperands = 3;
+ OperandList = op_begin();
+ }
+ }
+}
+
+
+BasicBlock *BranchInst::getSuccessorV(unsigned idx) const {
+ return getSuccessor(idx);
+}
+unsigned BranchInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+void BranchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+ setSuccessor(idx, B);
+}
+
+
+//===----------------------------------------------------------------------===//
+// AllocationInst Implementation
+//===----------------------------------------------------------------------===//
+
+static Value *getAISize(Value *Amt) {
+ if (!Amt)
+ Amt = ConstantInt::get(Type::Int32Ty, 1);
+ else {
+ assert(!isa<BasicBlock>(Amt) &&
+ "Passed basic block into allocation size parameter! Use other ctor");
+ assert(Amt->getType() == Type::Int32Ty &&
+ "Malloc/Allocation array size is not a 32-bit integer!");
+ }
+ return Amt;
+}
+
+AllocationInst::AllocationInst(const Type *Ty, Value *ArraySize, unsigned iTy,
+ unsigned Align, const std::string &Name,
+ Instruction *InsertBefore)
+ : UnaryInstruction(PointerType::getUnqual(Ty), iTy, getAISize(ArraySize),
+ InsertBefore) {
+ setAlignment(Align);
+ assert(Ty != Type::VoidTy && "Cannot allocate void!");
+ setName(Name);
+}
+
+AllocationInst::AllocationInst(const Type *Ty, Value *ArraySize, unsigned iTy,
+ unsigned Align, const std::string &Name,
+ BasicBlock *InsertAtEnd)
+ : UnaryInstruction(PointerType::getUnqual(Ty), iTy, getAISize(ArraySize),
+ InsertAtEnd) {
+ setAlignment(Align);
+ assert(Ty != Type::VoidTy && "Cannot allocate void!");
+ setName(Name);
+}
+
+// Out of line virtual method, so the vtable, etc has a home.
+AllocationInst::~AllocationInst() {
+}
+
+void AllocationInst::setAlignment(unsigned Align) {
+ assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+ SubclassData = Log2_32(Align) + 1;
+ assert(getAlignment() == Align && "Alignment representation error!");
+}
+
+bool AllocationInst::isArrayAllocation() const {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(getOperand(0)))
+ return CI->getZExtValue() != 1;
+ return true;
+}
+
+const Type *AllocationInst::getAllocatedType() const {
+ return getType()->getElementType();
+}
+
+AllocaInst::AllocaInst(const AllocaInst &AI)
+ : AllocationInst(AI.getType()->getElementType(), (Value*)AI.getOperand(0),
+ Instruction::Alloca, AI.getAlignment()) {
+}
+
+/// isStaticAlloca - Return true if this alloca is in the entry block of the
+/// function and is a constant size. If so, the code generator will fold it
+/// into the prolog/epilog code, so it is basically free.
+bool AllocaInst::isStaticAlloca() const {
+ // Must be constant size.
+ if (!isa<ConstantInt>(getArraySize())) return false;
+
+ // Must be in the entry block.
+ const BasicBlock *Parent = getParent();
+ return Parent == &Parent->getParent()->front();
+}
+
+MallocInst::MallocInst(const MallocInst &MI)
+ : AllocationInst(MI.getType()->getElementType(), (Value*)MI.getOperand(0),
+ Instruction::Malloc, MI.getAlignment()) {
+}
+
+//===----------------------------------------------------------------------===//
+// FreeInst Implementation
+//===----------------------------------------------------------------------===//
+
+void FreeInst::AssertOK() {
+ assert(isa<PointerType>(getOperand(0)->getType()) &&
+ "Can not free something of nonpointer type!");
+}
+
+FreeInst::FreeInst(Value *Ptr, Instruction *InsertBefore)
+ : UnaryInstruction(Type::VoidTy, Free, Ptr, InsertBefore) {
+ AssertOK();
+}
+
+FreeInst::FreeInst(Value *Ptr, BasicBlock *InsertAtEnd)
+ : UnaryInstruction(Type::VoidTy, Free, Ptr, InsertAtEnd) {
+ AssertOK();
+}
+
+
+//===----------------------------------------------------------------------===//
+// LoadInst Implementation
+//===----------------------------------------------------------------------===//
+
+void LoadInst::AssertOK() {
+ assert(isa<PointerType>(getOperand(0)->getType()) &&
+ "Ptr must have pointer type.");
+}
+
+LoadInst::LoadInst(Value *Ptr, const std::string &Name, Instruction *InsertBef)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertBef) {
+ setVolatile(false);
+ setAlignment(0);
+ AssertOK();
+ setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const std::string &Name, BasicBlock *InsertAE)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertAE) {
+ setVolatile(false);
+ setAlignment(0);
+ AssertOK();
+ setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const std::string &Name, bool isVolatile,
+ Instruction *InsertBef)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertBef) {
+ setVolatile(isVolatile);
+ setAlignment(0);
+ AssertOK();
+ setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const std::string &Name, bool isVolatile,
+ unsigned Align, Instruction *InsertBef)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertBef) {
+ setVolatile(isVolatile);
+ setAlignment(Align);
+ AssertOK();
+ setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const std::string &Name, bool isVolatile,
+ unsigned Align, BasicBlock *InsertAE)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertAE) {
+ setVolatile(isVolatile);
+ setAlignment(Align);
+ AssertOK();
+ setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const std::string &Name, bool isVolatile,
+ BasicBlock *InsertAE)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertAE) {
+ setVolatile(isVolatile);
+ setAlignment(0);
+ AssertOK();
+ setName(Name);
+}
+
+
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, Instruction *InsertBef)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertBef) {
+ setVolatile(false);
+ setAlignment(0);
+ AssertOK();
+ if (Name && Name[0]) setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, BasicBlock *InsertAE)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertAE) {
+ setVolatile(false);
+ setAlignment(0);
+ AssertOK();
+ if (Name && Name[0]) setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile,
+ Instruction *InsertBef)
+: UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertBef) {
+ setVolatile(isVolatile);
+ setAlignment(0);
+ AssertOK();
+ if (Name && Name[0]) setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile,
+ BasicBlock *InsertAE)
+ : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+ Load, Ptr, InsertAE) {
+ setVolatile(isVolatile);
+ setAlignment(0);
+ AssertOK();
+ if (Name && Name[0]) setName(Name);
+}
+
+void LoadInst::setAlignment(unsigned Align) {
+ assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+ SubclassData = (SubclassData & 1) | ((Log2_32(Align)+1)<<1);
+}
+
+//===----------------------------------------------------------------------===//
+// StoreInst Implementation
+//===----------------------------------------------------------------------===//
+
+void StoreInst::AssertOK() {
+ assert(getOperand(0) && getOperand(1) && "Both operands must be non-null!");
+ assert(isa<PointerType>(getOperand(1)->getType()) &&
+ "Ptr must have pointer type!");
+ assert(getOperand(0)->getType() ==
+ cast<PointerType>(getOperand(1)->getType())->getElementType()
+ && "Ptr must be a pointer to Val type!");
+}
+
+
+StoreInst::StoreInst(Value *val, Value *addr, Instruction *InsertBefore)
+ : Instruction(Type::VoidTy, Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this),
+ InsertBefore) {
+ Op<0>() = val;
+ Op<1>() = addr;
+ setVolatile(false);
+ setAlignment(0);
+ AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, BasicBlock *InsertAtEnd)
+ : Instruction(Type::VoidTy, Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this),
+ InsertAtEnd) {
+ Op<0>() = val;
+ Op<1>() = addr;
+ setVolatile(false);
+ setAlignment(0);
+ AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+ Instruction *InsertBefore)
+ : Instruction(Type::VoidTy, Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this),
+ InsertBefore) {
+ Op<0>() = val;
+ Op<1>() = addr;
+ setVolatile(isVolatile);
+ setAlignment(0);
+ AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+ unsigned Align, Instruction *InsertBefore)
+ : Instruction(Type::VoidTy, Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this),
+ InsertBefore) {
+ Op<0>() = val;
+ Op<1>() = addr;
+ setVolatile(isVolatile);
+ setAlignment(Align);
+ AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+ unsigned Align, BasicBlock *InsertAtEnd)
+ : Instruction(Type::VoidTy, Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this),
+ InsertAtEnd) {
+ Op<0>() = val;
+ Op<1>() = addr;
+ setVolatile(isVolatile);
+ setAlignment(Align);
+ AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+ BasicBlock *InsertAtEnd)
+ : Instruction(Type::VoidTy, Store,
+ OperandTraits<StoreInst>::op_begin(this),
+ OperandTraits<StoreInst>::operands(this),
+ InsertAtEnd) {
+ Op<0>() = val;
+ Op<1>() = addr;
+ setVolatile(isVolatile);
+ setAlignment(0);
+ AssertOK();
+}
+
+void StoreInst::setAlignment(unsigned Align) {
+ assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+ SubclassData = (SubclassData & 1) | ((Log2_32(Align)+1)<<1);
+}
+
+//===----------------------------------------------------------------------===//
+// GetElementPtrInst Implementation
+//===----------------------------------------------------------------------===//
+
+static unsigned retrieveAddrSpace(const Value *Val) {
+ return cast<PointerType>(Val->getType())->getAddressSpace();
+}
+
+void GetElementPtrInst::init(Value *Ptr, Value* const *Idx, unsigned NumIdx,
+ const std::string &Name) {
+ assert(NumOperands == 1+NumIdx && "NumOperands not initialized?");
+ Use *OL = OperandList;
+ OL[0] = Ptr;
+
+ for (unsigned i = 0; i != NumIdx; ++i)
+ OL[i+1] = Idx[i];
+
+ setName(Name);
+}
+
+void GetElementPtrInst::init(Value *Ptr, Value *Idx, const std::string &Name) {
+ assert(NumOperands == 2 && "NumOperands not initialized?");
+ Use *OL = OperandList;
+ OL[0] = Ptr;
+ OL[1] = Idx;
+
+ setName(Name);
+}
+
+GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI)
+ : Instruction(GEPI.getType(), GetElementPtr,
+ OperandTraits<GetElementPtrInst>::op_end(this)
+ - GEPI.getNumOperands(),
+ GEPI.getNumOperands()) {
+ Use *OL = OperandList;
+ Use *GEPIOL = GEPI.OperandList;
+ for (unsigned i = 0, E = NumOperands; i != E; ++i)
+ OL[i] = GEPIOL[i];
+}
+
+GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx,
+ const std::string &Name, Instruction *InBe)
+ : Instruction(PointerType::get(checkType(getIndexedType(Ptr->getType(),Idx)),
+ retrieveAddrSpace(Ptr)),
+ GetElementPtr,
+ OperandTraits<GetElementPtrInst>::op_end(this) - 2,
+ 2, InBe) {
+ init(Ptr, Idx, Name);
+}
+
+GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx,
+ const std::string &Name, BasicBlock *IAE)
+ : Instruction(PointerType::get(checkType(getIndexedType(Ptr->getType(),Idx)),
+ retrieveAddrSpace(Ptr)),
+ GetElementPtr,
+ OperandTraits<GetElementPtrInst>::op_end(this) - 2,
+ 2, IAE) {
+ init(Ptr, Idx, Name);
+}
+
+/// getIndexedType - Returns the type of the element that would be accessed with
+/// a gep instruction with the specified parameters.
+///
+/// The Idxs pointer should point to a continuous piece of memory containing the
+/// indices, either as Value* or uint64_t.
+///
+/// A null type is returned if the indices are invalid for the specified
+/// pointer type.
+///
+template <typename IndexTy>
+static const Type* getIndexedTypeInternal(const Type *Ptr, IndexTy const *Idxs,
+ unsigned NumIdx) {
+ const PointerType *PTy = dyn_cast<PointerType>(Ptr);
+ if (!PTy) return 0; // Type isn't a pointer type!
+ const Type *Agg = PTy->getElementType();
+
+ // Handle the special case of the empty set index set, which is always valid.
+ if (NumIdx == 0)
+ return Agg;
+
+ // If there is at least one index, the top level type must be sized, otherwise
+ // it cannot be 'stepped over'. We explicitly allow abstract types (those
+ // that contain opaque types) under the assumption that it will be resolved to
+ // a sane type later.
+ if (!Agg->isSized() && !Agg->isAbstract())
+ return 0;
+
+ unsigned CurIdx = 1;
+ for (; CurIdx != NumIdx; ++CurIdx) {
+ const CompositeType *CT = dyn_cast<CompositeType>(Agg);
+ if (!CT || isa<PointerType>(CT)) return 0;
+ IndexTy Index = Idxs[CurIdx];
+ if (!CT->indexValid(Index)) return 0;
+ Agg = CT->getTypeAtIndex(Index);
+
+ // If the new type forwards to another type, then it is in the middle
+ // of being refined to another type (and hence, may have dropped all
+ // references to what it was using before). So, use the new forwarded
+ // type.
+ if (const Type *Ty = Agg->getForwardedType())
+ Agg = Ty;
+ }
+ return CurIdx == NumIdx ? Agg : 0;
+}
+
+const Type* GetElementPtrInst::getIndexedType(const Type *Ptr,
+ Value* const *Idxs,
+ unsigned NumIdx) {
+ return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
+}
+
+const Type* GetElementPtrInst::getIndexedType(const Type *Ptr,
+ uint64_t const *Idxs,
+ unsigned NumIdx) {
+ return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
+}
+
+const Type* GetElementPtrInst::getIndexedType(const Type *Ptr, Value *Idx) {
+ const PointerType *PTy = dyn_cast<PointerType>(Ptr);
+ if (!PTy) return 0; // Type isn't a pointer type!
+
+ // Check the pointer index.
+ if (!PTy->indexValid(Idx)) return 0;
+
+ return PTy->getElementType();
+}
+
+
+/// hasAllZeroIndices - Return true if all of the indices of this GEP are
+/// zeros. If so, the result pointer and the first operand have the same
+/// value, just potentially different types.
+bool GetElementPtrInst::hasAllZeroIndices() const {
+ for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(getOperand(i))) {
+ if (!CI->isZero()) return false;
+ } else {
+ return false;
+ }
+ }
+ return true;
+}
+
+/// hasAllConstantIndices - Return true if all of the indices of this GEP are
+/// constant integers. If so, the result pointer and the first operand have
+/// a constant offset between them.
+bool GetElementPtrInst::hasAllConstantIndices() const {
+ for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+ if (!isa<ConstantInt>(getOperand(i)))
+ return false;
+ }
+ return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+// ExtractElementInst Implementation
+//===----------------------------------------------------------------------===//
+
+ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
+ const std::string &Name,
+ Instruction *InsertBef)
+ : Instruction(cast<VectorType>(Val->getType())->getElementType(),
+ ExtractElement,
+ OperandTraits<ExtractElementInst>::op_begin(this),
+ 2, InsertBef) {
+ assert(isValidOperands(Val, Index) &&
+ "Invalid extractelement instruction operands!");
+ Op<0>() = Val;
+ Op<1>() = Index;
+ setName(Name);
+}
+
+ExtractElementInst::ExtractElementInst(Value *Val, unsigned IndexV,
+ const std::string &Name,
+ Instruction *InsertBef)
+ : Instruction(cast<VectorType>(Val->getType())->getElementType(),
+ ExtractElement,
+ OperandTraits<ExtractElementInst>::op_begin(this),
+ 2, InsertBef) {
+ Constant *Index = ConstantInt::get(Type::Int32Ty, IndexV);
+ assert(isValidOperands(Val, Index) &&
+ "Invalid extractelement instruction operands!");
+ Op<0>() = Val;
+ Op<1>() = Index;
+ setName(Name);
+}
+
+
+ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
+ const std::string &Name,
+ BasicBlock *InsertAE)
+ : Instruction(cast<VectorType>(Val->getType())->getElementType(),
+ ExtractElement,
+ OperandTraits<ExtractElementInst>::op_begin(this),
+ 2, InsertAE) {
+ assert(isValidOperands(Val, Index) &&
+ "Invalid extractelement instruction operands!");
+
+ Op<0>() = Val;
+ Op<1>() = Index;
+ setName(Name);
+}
+
+ExtractElementInst::ExtractElementInst(Value *Val, unsigned IndexV,
+ const std::string &Name,
+ BasicBlock *InsertAE)
+ : Instruction(cast<VectorType>(Val->getType())->getElementType(),
+ ExtractElement,
+ OperandTraits<ExtractElementInst>::op_begin(this),
+ 2, InsertAE) {
+ Constant *Index = ConstantInt::get(Type::Int32Ty, IndexV);
+ assert(isValidOperands(Val, Index) &&
+ "Invalid extractelement instruction operands!");
+
+ Op<0>() = Val;
+ Op<1>() = Index;
+ setName(Name);
+}
+
+
+bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) {
+ if (!isa<VectorType>(Val->getType()) || Index->getType() != Type::Int32Ty)
+ return false;
+ return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+// InsertElementInst Implementation
+//===----------------------------------------------------------------------===//
+
+InsertElementInst::InsertElementInst(const InsertElementInst &IE)
+ : Instruction(IE.getType(), InsertElement,
+ OperandTraits<InsertElementInst>::op_begin(this), 3) {
+ Op<0>() = IE.Op<0>();
+ Op<1>() = IE.Op<1>();
+ Op<2>() = IE.Op<2>();
+}
+InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index,
+ const std::string &Name,
+ Instruction *InsertBef)
+ : Instruction(Vec->getType(), InsertElement,
+ OperandTraits<InsertElementInst>::op_begin(this),
+ 3, InsertBef) {
+ assert(isValidOperands(Vec, Elt, Index) &&
+ "Invalid insertelement instruction operands!");
+ Op<0>() = Vec;
+ Op<1>() = Elt;
+ Op<2>() = Index;
+ setName(Name);
+}
+
+InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, unsigned IndexV,
+ const std::string &Name,
+ Instruction *InsertBef)
+ : Instruction(Vec->getType(), InsertElement,
+ OperandTraits<InsertElementInst>::op_begin(this),
+ 3, InsertBef) {
+ Constant *Index = ConstantInt::get(Type::Int32Ty, IndexV);
+ assert(isValidOperands(Vec, Elt, Index) &&
+ "Invalid insertelement instruction operands!");
+ Op<0>() = Vec;
+ Op<1>() = Elt;
+ Op<2>() = Index;
+ setName(Name);
+}
+
+
+InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index,
+ const std::string &Name,
+ BasicBlock *InsertAE)
+ : Instruction(Vec->getType(), InsertElement,
+ OperandTraits<InsertElementInst>::op_begin(this),
+ 3, InsertAE) {
+ assert(isValidOperands(Vec, Elt, Index) &&
+ "Invalid insertelement instruction operands!");
+
+ Op<0>() = Vec;
+ Op<1>() = Elt;
+ Op<2>() = Index;
+ setName(Name);
+}
+
+InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, unsigned IndexV,
+ const std::string &Name,
+ BasicBlock *InsertAE)
+: Instruction(Vec->getType(), InsertElement,
+ OperandTraits<InsertElementInst>::op_begin(this),
+ 3, InsertAE) {
+ Constant *Index = ConstantInt::get(Type::Int32Ty, IndexV);
+ assert(isValidOperands(Vec, Elt, Index) &&
+ "Invalid insertelement instruction operands!");
+
+ Op<0>() = Vec;
+ Op<1>() = Elt;
+ Op<2>() = Index;
+ setName(Name);
+}
+
+bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt,
+ const Value *Index) {
+ if (!isa<VectorType>(Vec->getType()))
+ return false; // First operand of insertelement must be vector type.
+
+ if (Elt->getType() != cast<VectorType>(Vec->getType())->getElementType())
+ return false;// Second operand of insertelement must be vector element type.
+
+ if (Index->getType() != Type::Int32Ty)
+ return false; // Third operand of insertelement must be uint.
+ return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+// ShuffleVectorInst Implementation
+//===----------------------------------------------------------------------===//
+
+ShuffleVectorInst::ShuffleVectorInst(const ShuffleVectorInst &SV)
+ : Instruction(SV.getType(), ShuffleVector,
+ OperandTraits<ShuffleVectorInst>::op_begin(this),
+ OperandTraits<ShuffleVectorInst>::operands(this)) {
+ Op<0>() = SV.Op<0>();
+ Op<1>() = SV.Op<1>();
+ Op<2>() = SV.Op<2>();
+}
+
+ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
+ const std::string &Name,
+ Instruction *InsertBefore)
+: Instruction(VectorType::get(cast<VectorType>(V1->getType())->getElementType(),
+ cast<VectorType>(Mask->getType())->getNumElements()),
+ ShuffleVector,
+ OperandTraits<ShuffleVectorInst>::op_begin(this),
+ OperandTraits<ShuffleVectorInst>::operands(this),
+ InsertBefore) {
+ assert(isValidOperands(V1, V2, Mask) &&
+ "Invalid shuffle vector instruction operands!");
+ Op<0>() = V1;
+ Op<1>() = V2;
+ Op<2>() = Mask;
+ setName(Name);
+}
+
+ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
+ const std::string &Name,
+ BasicBlock *InsertAtEnd)
+ : Instruction(V1->getType(), ShuffleVector,
+ OperandTraits<ShuffleVectorInst>::op_begin(this),
+ OperandTraits<ShuffleVectorInst>::operands(this),
+ InsertAtEnd) {
+ assert(isValidOperands(V1, V2, Mask) &&
+ "Invalid shuffle vector instruction operands!");
+
+ Op<0>() = V1;
+ Op<1>() = V2;
+ Op<2>() = Mask;
+ setName(Name);
+}
+
+bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
+ const Value *Mask) {
+ if (!isa<VectorType>(V1->getType()) || V1->getType() != V2->getType())
+ return false;
+
+ const VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
+ if (!isa<Constant>(Mask) || MaskTy == 0 ||
+ MaskTy->getElementType() != Type::Int32Ty)
+ return false;
+ return true;
+}
+
+/// getMaskValue - Return the index from the shuffle mask for the specified
+/// output result. This is either -1 if the element is undef or a number less
+/// than 2*numelements.
+int ShuffleVectorInst::getMaskValue(unsigned i) const {
+ const Constant *Mask = cast<Constant>(getOperand(2));
+ if (isa<UndefValue>(Mask)) return -1;
+ if (isa<ConstantAggregateZero>(Mask)) return 0;
+ const ConstantVector *MaskCV = cast<ConstantVector>(Mask);
+ assert(i < MaskCV->getNumOperands() && "Index out of range");
+
+ if (isa<UndefValue>(MaskCV->getOperand(i)))
+ return -1;
+ return cast<ConstantInt>(MaskCV->getOperand(i))->getZExtValue();
+}
+
+//===----------------------------------------------------------------------===//
+// InsertValueInst Class
+//===----------------------------------------------------------------------===//
+
+void InsertValueInst::init(Value *Agg, Value *Val, const unsigned *Idx,
+ unsigned NumIdx, const std::string &Name) {
+ assert(NumOperands == 2 && "NumOperands not initialized?");
+ Op<0>() = Agg;
+ Op<1>() = Val;
+
+ Indices.insert(Indices.end(), Idx, Idx + NumIdx);
+ setName(Name);
+}
+
+void InsertValueInst::init(Value *Agg, Value *Val, unsigned Idx,
+ const std::string &Name) {
+ assert(NumOperands == 2 && "NumOperands not initialized?");
+ Op<0>() = Agg;
+ Op<1>() = Val;
+
+ Indices.push_back(Idx);
+ setName(Name);
+}
+
+InsertValueInst::InsertValueInst(const InsertValueInst &IVI)
+ : Instruction(IVI.getType(), InsertValue,
+ OperandTraits<InsertValueInst>::op_begin(this), 2),
+ Indices(IVI.Indices) {
+ Op<0>() = IVI.getOperand(0);
+ Op<1>() = IVI.getOperand(1);
+}
+
+InsertValueInst::InsertValueInst(Value *Agg,
+ Value *Val,
+ unsigned Idx,
+ const std::string &Name,
+ Instruction *InsertBefore)
+ : Instruction(Agg->getType(), InsertValue,
+ OperandTraits<InsertValueInst>::op_begin(this),
+ 2, InsertBefore) {
+ init(Agg, Val, Idx, Name);
+}
+
+InsertValueInst::InsertValueInst(Value *Agg,
+ Value *Val,
+ unsigned Idx,
+ const std::string &Name,
+ BasicBlock *InsertAtEnd)
+ : Instruction(Agg->getType(), InsertValue,
+ OperandTraits<InsertValueInst>::op_begin(this),
+ 2, InsertAtEnd) {
+ init(Agg, Val, Idx, Name);
+}
+
+//===----------------------------------------------------------------------===//
+// ExtractValueInst Class
+//===----------------------------------------------------------------------===//
+
+void ExtractValueInst::init(const unsigned *Idx, unsigned NumIdx,
+ const std::string &Name) {
+ assert(NumOperands == 1 && "NumOperands not initialized?");
+
+ Indices.insert(Indices.end(), Idx, Idx + NumIdx);
+ setName(Name);
+}
+
+void ExtractValueInst::init(unsigned Idx, const std::string &Name) {
+ assert(NumOperands == 1 && "NumOperands not initialized?");
+
+ Indices.push_back(Idx);
+ setName(Name);
+}
+
+ExtractValueInst::ExtractValueInst(const ExtractValueInst &EVI)
+ : UnaryInstruction(EVI.getType(), ExtractValue, EVI.getOperand(0)),
+ Indices(EVI.Indices) {
+}
+
+// getIndexedType - Returns the type of the element that would be extracted
+// with an extractvalue instruction with the specified parameters.
+//
+// A null type is returned if the indices are invalid for the specified
+// pointer type.
+//
+const Type* ExtractValueInst::getIndexedType(const Type *Agg,
+ const unsigned *Idxs,
+ unsigned NumIdx) {
+ unsigned CurIdx = 0;
+ for (; CurIdx != NumIdx; ++CurIdx) {
+ const CompositeType *CT = dyn_cast<CompositeType>(Agg);
+ if (!CT || isa<PointerType>(CT) || isa<VectorType>(CT)) return 0;
+ unsigned Index = Idxs[CurIdx];
+ if (!CT->indexValid(Index)) return 0;
+ Agg = CT->getTypeAtIndex(Index);
+
+ // If the new type forwards to another type, then it is in the middle
+ // of being refined to another type (and hence, may have dropped all
+ // references to what it was using before). So, use the new forwarded
+ // type.
+ if (const Type *Ty = Agg->getForwardedType())
+ Agg = Ty;
+ }
+ return CurIdx == NumIdx ? Agg : 0;
+}
+
+const Type* ExtractValueInst::getIndexedType(const Type *Agg,
+ unsigned Idx) {
+ return getIndexedType(Agg, &Idx, 1);
+}
+
+//===----------------------------------------------------------------------===//
+// BinaryOperator Class
+//===----------------------------------------------------------------------===//
+
+BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
+ const Type *Ty, const std::string &Name,
+ Instruction *InsertBefore)
+ : Instruction(Ty, iType,
+ OperandTraits<BinaryOperator>::op_begin(this),
+ OperandTraits<BinaryOperator>::operands(this),
+ InsertBefore) {
+ Op<0>() = S1;
+ Op<1>() = S2;
+ init(iType);
+ setName(Name);
+}
+
+BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
+ const Type *Ty, const std::string &Name,
+ BasicBlock *InsertAtEnd)
+ : Instruction(Ty, iType,
+ OperandTraits<BinaryOperator>::op_begin(this),
+ OperandTraits<BinaryOperator>::operands(this),
+ InsertAtEnd) {
+ Op<0>() = S1;
+ Op<1>() = S2;
+ init(iType);
+ setName(Name);
+}
+
+
+void BinaryOperator::init(BinaryOps iType) {
+ Value *LHS = getOperand(0), *RHS = getOperand(1);
+ LHS = LHS; RHS = RHS; // Silence warnings.
+ assert(LHS->getType() == RHS->getType() &&
+ "Binary operator operand types must match!");
+#ifndef NDEBUG
+ switch (iType) {
+ case Add: case Sub:
+ case Mul:
+ assert(getType() == LHS->getType() &&
+ "Arithmetic operation should return same type as operands!");
+ assert((getType()->isInteger() || getType()->isFloatingPoint() ||
+ isa<VectorType>(getType())) &&
+ "Tried to create an arithmetic operation on a non-arithmetic type!");
+ break;
+ case UDiv:
+ case SDiv:
+ assert(getType() == LHS->getType() &&
+ "Arithmetic operation should return same type as operands!");
+ assert((getType()->isInteger() || (isa<VectorType>(getType()) &&
+ cast<VectorType>(getType())->getElementType()->isInteger())) &&
+ "Incorrect operand type (not integer) for S/UDIV");
+ break;
+ case FDiv:
+ assert(getType() == LHS->getType() &&
+ "Arithmetic operation should return same type as operands!");
+ assert((getType()->isFloatingPoint() || (isa<VectorType>(getType()) &&
+ cast<VectorType>(getType())->getElementType()->isFloatingPoint()))
+ && "Incorrect operand type (not floating point) for FDIV");
+ break;
+ case URem:
+ case SRem:
+ assert(getType() == LHS->getType() &&
+ "Arithmetic operation should return same type as operands!");
+ assert((getType()->isInteger() || (isa<VectorType>(getType()) &&
+ cast<VectorType>(getType())->getElementType()->isInteger())) &&
+ "Incorrect operand type (not integer) for S/UREM");
+ break;
+ case FRem:
+ assert(getType() == LHS->getType() &&
+ "Arithmetic operation should return same type as operands!");
+ assert((getType()->isFloatingPoint() || (isa<VectorType>(getType()) &&
+ cast<VectorType>(getType())->getElementType()->isFloatingPoint()))
+ && "Incorrect operand type (not floating point) for FREM");
+ break;
+ case Shl:
+ case LShr:
+ case AShr:
+ assert(getType() == LHS->getType() &&
+ "Shift operation should return same type as operands!");
+ assert((getType()->isInteger() ||
+ (isa<VectorType>(getType()) &&
+ cast<VectorType>(getType())->getElementType()->isInteger())) &&
+ "Tried to create a shift operation on a non-integral type!");
+ break;
+ case And: case Or:
+ case Xor:
+ assert(getType() == LHS->getType() &&
+ "Logical operation should return same type as operands!");
+ assert((getType()->isInteger() ||
+ (isa<VectorType>(getType()) &&
+ cast<VectorType>(getType())->getElementType()->isInteger())) &&
+ "Tried to create a logical operation on a non-integral type!");
+ break;
+ default:
+ break;
+ }
+#endif
+}
+
+BinaryOperator *BinaryOperator::Create(BinaryOps Op, Value *S1, Value *S2,
+ const std::string &Name,
+ Instruction *InsertBefore) {
+ assert(S1->getType() == S2->getType() &&
+ "Cannot create binary operator with two operands of differing type!");
+ return new BinaryOperator(Op, S1, S2, S1->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::Create(BinaryOps Op, Value *S1, Value *S2,
+ const std::string &Name,
+ BasicBlock *InsertAtEnd) {
+ BinaryOperator *Res = Create(Op, S1, S2, Name);
+ InsertAtEnd->getInstList().push_back(Res);
+ return Res;
+}
+
+BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const std::string &Name,
+ Instruction *InsertBefore) {
+ Value *zero = ConstantExpr::getZeroValueForNegationExpr(Op->getType());
+ return new BinaryOperator(Instruction::Sub,
+ zero, Op,
+ Op->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const std::string &Name,
+ BasicBlock *InsertAtEnd) {
+ Value *zero = ConstantExpr::getZeroValueForNegationExpr(Op->getType());
+ return new BinaryOperator(Instruction::Sub,
+ zero, Op,
+ Op->getType(), Name, InsertAtEnd);
+}
+
+BinaryOperator *BinaryOperator::CreateNot(Value *Op, const std::string &Name,
+ Instruction *InsertBefore) {
+ Constant *C;
+ if (const VectorType *PTy = dyn_cast<VectorType>(Op->getType())) {
+ C = ConstantInt::getAllOnesValue(PTy->getElementType());
+ C = ConstantVector::get(std::vector<Constant*>(PTy->getNumElements(), C));
+ } else {
+ C = ConstantInt::getAllOnesValue(Op->getType());
+ }
+
+ return new BinaryOperator(Instruction::Xor, Op, C,
+ Op->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateNot(Value *Op, const std::string &Name,
+ BasicBlock *InsertAtEnd) {
+ Constant *AllOnes;
+ if (const VectorType *PTy = dyn_cast<VectorType>(Op->getType())) {
+ // Create a vector of all ones values.
+ Constant *Elt = ConstantInt::getAllOnesValue(PTy->getElementType());
+ AllOnes =
+ ConstantVector::get(std::vector<Constant*>(PTy->getNumElements(), Elt));
+ } else {
+ AllOnes = ConstantInt::getAllOnesValue(Op->getType());
+ }
+
+ return new BinaryOperator(Instruction::Xor, Op, AllOnes,
+ Op->getType(), Name, InsertAtEnd);
+}
+
+
+// isConstantAllOnes - Helper function for several functions below
+static inline bool isConstantAllOnes(const Value *V) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ return CI->isAllOnesValue();
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(V))
+ return CV->isAllOnesValue();
+ return false;
+}
+
+bool BinaryOperator::isNeg(const Value *V) {
+ if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
+ if (Bop->getOpcode() == Instruction::Sub)
+ return Bop->getOperand(0) ==
+ ConstantExpr::getZeroValueForNegationExpr(Bop->getType());
+ return false;
+}
+
+bool BinaryOperator::isNot(const Value *V) {
+ if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
+ return (Bop->getOpcode() == Instruction::Xor &&
+ (isConstantAllOnes(Bop->getOperand(1)) ||
+ isConstantAllOnes(Bop->getOperand(0))));
+ return false;
+}
+
+Value *BinaryOperator::getNegArgument(Value *BinOp) {
+ assert(isNeg(BinOp) && "getNegArgument from non-'neg' instruction!");
+ return cast<BinaryOperator>(BinOp)->getOperand(1);
+}
+
+const Value *BinaryOperator::getNegArgument(const Value *BinOp) {
+ return getNegArgument(const_cast<Value*>(BinOp));
+}
+
+Value *BinaryOperator::getNotArgument(Value *BinOp) {
+ assert(isNot(BinOp) && "getNotArgument on non-'not' instruction!");
+ BinaryOperator *BO = cast<BinaryOperator>(BinOp);
+ Value *Op0 = BO->getOperand(0);
+ Value *Op1 = BO->getOperand(1);
+ if (isConstantAllOnes(Op0)) return Op1;
+
+ assert(isConstantAllOnes(Op1));
+ return Op0;
+}
+
+const Value *BinaryOperator::getNotArgument(const Value *BinOp) {
+ return getNotArgument(const_cast<Value*>(BinOp));
+}
+
+
+// swapOperands - Exchange the two operands to this instruction. This
+// instruction is safe to use on any binary instruction and does not
+// modify the semantics of the instruction. If the instruction is
+// order dependent (SetLT f.e.) the opcode is changed.
+//
+bool BinaryOperator::swapOperands() {
+ if (!isCommutative())
+ return true; // Can't commute operands
+ Op<0>().swap(Op<1>());
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// CastInst Class
+//===----------------------------------------------------------------------===//
+
+// Just determine if this cast only deals with integral->integral conversion.
+bool CastInst::isIntegerCast() const {
+ switch (getOpcode()) {
+ default: return false;
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::Trunc:
+ return true;
+ case Instruction::BitCast:
+ return getOperand(0)->getType()->isInteger() && getType()->isInteger();
+ }
+}
+
+bool CastInst::isLosslessCast() const {
+ // Only BitCast can be lossless, exit fast if we're not BitCast
+ if (getOpcode() != Instruction::BitCast)
+ return false;
+
+ // Identity cast is always lossless
+ const Type* SrcTy = getOperand(0)->getType();
+ const Type* DstTy = getType();
+ if (SrcTy == DstTy)
+ return true;
+
+ // Pointer to pointer is always lossless.
+ if (isa<PointerType>(SrcTy))
+ return isa<PointerType>(DstTy);
+ return false; // Other types have no identity values
+}
+
+/// This function determines if the CastInst does not require any bits to be
+/// changed in order to effect the cast. Essentially, it identifies cases where
+/// no code gen is necessary for the cast, hence the name no-op cast. For
+/// example, the following are all no-op casts:
+/// # bitcast i32* %x to i8*
+/// # bitcast <2 x i32> %x to <4 x i16>
+/// # ptrtoint i32* %x to i32 ; on 32-bit plaforms only
+/// @brief Determine if a cast is a no-op.
+bool CastInst::isNoopCast(const Type *IntPtrTy) const {
+ switch (getOpcode()) {
+ default:
+ assert(!"Invalid CastOp");
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ return false; // These always modify bits
+ case Instruction::BitCast:
+ return true; // BitCast never modifies bits.
+ case Instruction::PtrToInt:
+ return IntPtrTy->getPrimitiveSizeInBits() ==
+ getType()->getPrimitiveSizeInBits();
+ case Instruction::IntToPtr:
+ return IntPtrTy->getPrimitiveSizeInBits() ==
+ getOperand(0)->getType()->getPrimitiveSizeInBits();
+ }
+}
+
+/// This function determines if a pair of casts can be eliminated and what
+/// opcode should be used in the elimination. This assumes that there are two
+/// instructions like this:
+/// * %F = firstOpcode SrcTy %x to MidTy
+/// * %S = secondOpcode MidTy %F to DstTy
+/// The function returns a resultOpcode so these two casts can be replaced with:
+/// * %Replacement = resultOpcode %SrcTy %x to DstTy
+/// If no such cast is permited, the function returns 0.
+unsigned CastInst::isEliminableCastPair(
+ Instruction::CastOps firstOp, Instruction::CastOps secondOp,
+ const Type *SrcTy, const Type *MidTy, const Type *DstTy, const Type *IntPtrTy)
+{
+ // Define the 144 possibilities for these two cast instructions. The values
+ // in this matrix determine what to do in a given situation and select the
+ // case in the switch below. The rows correspond to firstOp, the columns
+ // correspond to secondOp. In looking at the table below, keep in mind
+ // the following cast properties:
+ //
+ // Size Compare Source Destination
+ // Operator Src ? Size Type Sign Type Sign
+ // -------- ------------ ------------------- ---------------------
+ // TRUNC > Integer Any Integral Any
+ // ZEXT < Integral Unsigned Integer Any
+ // SEXT < Integral Signed Integer Any
+ // FPTOUI n/a FloatPt n/a Integral Unsigned
+ // FPTOSI n/a FloatPt n/a Integral Signed
+ // UITOFP n/a Integral Unsigned FloatPt n/a
+ // SITOFP n/a Integral Signed FloatPt n/a
+ // FPTRUNC > FloatPt n/a FloatPt n/a
+ // FPEXT < FloatPt n/a FloatPt n/a
+ // PTRTOINT n/a Pointer n/a Integral Unsigned
+ // INTTOPTR n/a Integral Unsigned Pointer n/a
+ // BITCONVERT = FirstClass n/a FirstClass n/a
+ //
+ // NOTE: some transforms are safe, but we consider them to be non-profitable.
+ // For example, we could merge "fptoui double to uint" + "zext uint to ulong",
+ // into "fptoui double to ulong", but this loses information about the range
+ // of the produced value (we no longer know the top-part is all zeros).
+ // Further this conversion is often much more expensive for typical hardware,
+ // and causes issues when building libgcc. We disallow fptosi+sext for the
+ // same reason.
+ const unsigned numCastOps =
+ Instruction::CastOpsEnd - Instruction::CastOpsBegin;
+ static const uint8_t CastResults[numCastOps][numCastOps] = {
+ // T F F U S F F P I B -+
+ // R Z S P P I I T P 2 N T |
+ // U E E 2 2 2 2 R E I T C +- secondOp
+ // N X X U S F F N X N 2 V |
+ // C T T I I P P C T T P T -+
+ { 1, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // Trunc -+
+ { 8, 1, 9,99,99, 2, 0,99,99,99, 2, 3 }, // ZExt |
+ { 8, 0, 1,99,99, 0, 2,99,99,99, 0, 3 }, // SExt |
+ { 0, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // FPToUI |
+ { 0, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // FPToSI |
+ { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4 }, // UIToFP +- firstOp
+ { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4 }, // SIToFP |
+ { 99,99,99, 0, 0,99,99, 1, 0,99,99, 4 }, // FPTrunc |
+ { 99,99,99, 2, 2,99,99,10, 2,99,99, 4 }, // FPExt |
+ { 1, 0, 0,99,99, 0, 0,99,99,99, 7, 3 }, // PtrToInt |
+ { 99,99,99,99,99,99,99,99,99,13,99,12 }, // IntToPtr |
+ { 5, 5, 5, 6, 6, 5, 5, 6, 6,11, 5, 1 }, // BitCast -+
+ };
+
+ int ElimCase = CastResults[firstOp-Instruction::CastOpsBegin]
+ [secondOp-Instruction::CastOpsBegin];
+ switch (ElimCase) {
+ case 0:
+ // categorically disallowed
+ return 0;
+ case 1:
+ // allowed, use first cast's opcode
+ return firstOp;
+ case 2:
+ // allowed, use second cast's opcode
+ return secondOp;
+ case 3:
+ // no-op cast in second op implies firstOp as long as the DestTy
+ // is integer
+ if (DstTy->isInteger())
+ return firstOp;
+ return 0;
+ case 4:
+ // no-op cast in second op implies firstOp as long as the DestTy
+ // is floating point
+ if (DstTy->isFloatingPoint())
+ return firstOp;
+ return 0;
+ case 5:
+ // no-op cast in first op implies secondOp as long as the SrcTy
+ // is an integer
+ if (SrcTy->isInteger())
+ return secondOp;
+ return 0;
+ case 6:
+ // no-op cast in first op implies secondOp as long as the SrcTy
+ // is a floating point
+ if (SrcTy->isFloatingPoint())
+ return secondOp;
+ return 0;
+ case 7: {
+ // ptrtoint, inttoptr -> bitcast (ptr -> ptr) if int size is >= ptr size
+ unsigned PtrSize = IntPtrTy->getPrimitiveSizeInBits();
+ unsigned MidSize = MidTy->getPrimitiveSizeInBits();
+ if (MidSize >= PtrSize)
+ return Instruction::BitCast;
+ return 0;
+ }
+ case 8: {
+ // ext, trunc -> bitcast, if the SrcTy and DstTy are same size
+ // ext, trunc -> ext, if sizeof(SrcTy) < sizeof(DstTy)
+ // ext, trunc -> trunc, if sizeof(SrcTy) > sizeof(DstTy)
+ unsigned SrcSize = SrcTy->getPrimitiveSizeInBits();
+ unsigned DstSize = DstTy->getPrimitiveSizeInBits();
+ if (SrcSize == DstSize)
+ return Instruction::BitCast;
+ else if (SrcSize < DstSize)
+ return firstOp;
+ return secondOp;
+ }
+ case 9: // zext, sext -> zext, because sext can't sign extend after zext
+ return Instruction::ZExt;
+ case 10:
+ // fpext followed by ftrunc is allowed if the bit size returned to is
+ // the same as the original, in which case its just a bitcast
+ if (SrcTy == DstTy)
+ return Instruction::BitCast;
+ return 0; // If the types are not the same we can't eliminate it.
+ case 11:
+ // bitcast followed by ptrtoint is allowed as long as the bitcast
+ // is a pointer to pointer cast.
+ if (isa<PointerType>(SrcTy) && isa<PointerType>(MidTy))
+ return secondOp;
+ return 0;
+ case 12:
+ // inttoptr, bitcast -> intptr if bitcast is a ptr to ptr cast
+ if (isa<PointerType>(MidTy) && isa<PointerType>(DstTy))
+ return firstOp;
+ return 0;
+ case 13: {
+ // inttoptr, ptrtoint -> bitcast if SrcSize<=PtrSize and SrcSize==DstSize
+ unsigned PtrSize = IntPtrTy->getPrimitiveSizeInBits();
+ unsigned SrcSize = SrcTy->getPrimitiveSizeInBits();
+ unsigned DstSize = DstTy->getPrimitiveSizeInBits();
+ if (SrcSize <= PtrSize && SrcSize == DstSize)
+ return Instruction::BitCast;
+ return 0;
+ }
+ case 99:
+ // cast combination can't happen (error in input). This is for all cases
+ // where the MidTy is not the same for the two cast instructions.
+ assert(!"Invalid Cast Combination");
+ return 0;
+ default:
+ assert(!"Error in CastResults table!!!");
+ return 0;
+ }
+ return 0;
+}
+
+CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty,
+ const std::string &Name, Instruction *InsertBefore) {
+ // Construct and return the appropriate CastInst subclass
+ switch (op) {
+ case Trunc: return new TruncInst (S, Ty, Name, InsertBefore);
+ case ZExt: return new ZExtInst (S, Ty, Name, InsertBefore);
+ case SExt: return new SExtInst (S, Ty, Name, InsertBefore);
+ case FPTrunc: return new FPTruncInst (S, Ty, Name, InsertBefore);
+ case FPExt: return new FPExtInst (S, Ty, Name, InsertBefore);
+ case UIToFP: return new UIToFPInst (S, Ty, Name, InsertBefore);
+ case SIToFP: return new SIToFPInst (S, Ty, Name, InsertBefore);
+ case FPToUI: return new FPToUIInst (S, Ty, Name, InsertBefore);
+ case FPToSI: return new FPToSIInst (S, Ty, Name, InsertBefore);
+ case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertBefore);
+ case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertBefore);
+ case BitCast: return new BitCastInst (S, Ty, Name, InsertBefore);
+ default:
+ assert(!"Invalid opcode provided");
+ }
+ return 0;
+}
+
+CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty,
+ const std::string &Name, BasicBlock *InsertAtEnd) {
+ // Construct and return the appropriate CastInst subclass
+ switch (op) {
+ case Trunc: return new TruncInst (S, Ty, Name, InsertAtEnd);
+ case ZExt: return new ZExtInst (S, Ty, Name, InsertAtEnd);
+ case SExt: return new SExtInst (S, Ty, Name, InsertAtEnd);
+ case FPTrunc: return new FPTruncInst (S, Ty, Name, InsertAtEnd);
+ case FPExt: return new FPExtInst (S, Ty, Name, InsertAtEnd);
+ case UIToFP: return new UIToFPInst (S, Ty, Name, InsertAtEnd);
+ case SIToFP: return new SIToFPInst (S, Ty, Name, InsertAtEnd);
+ case FPToUI: return new FPToUIInst (S, Ty, Name, InsertAtEnd);
+ case FPToSI: return new FPToSIInst (S, Ty, Name, InsertAtEnd);
+ case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertAtEnd);
+ case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertAtEnd);
+ case BitCast: return new BitCastInst (S, Ty, Name, InsertAtEnd);
+ default:
+ assert(!"Invalid opcode provided");
+ }
+ return 0;
+}
+
+CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty,
+ const std::string &Name,
+ Instruction *InsertBefore) {
+ if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+ return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+ return Create(Instruction::ZExt, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty,
+ const std::string &Name,
+ BasicBlock *InsertAtEnd) {
+ if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+ return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+ return Create(Instruction::ZExt, S, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty,
+ const std::string &Name,
+ Instruction *InsertBefore) {
+ if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+ return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+ return Create(Instruction::SExt, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty,
+ const std::string &Name,
+ BasicBlock *InsertAtEnd) {
+ if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+ return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+ return Create(Instruction::SExt, S, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
+ const std::string &Name,
+ Instruction *InsertBefore) {
+ if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+ return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+ return Create(Instruction::Trunc, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
+ const std::string &Name,
+ BasicBlock *InsertAtEnd) {
+ if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+ return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+ return Create(Instruction::Trunc, S, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
+ const std::string &Name,
+ BasicBlock *InsertAtEnd) {
+ assert(isa<PointerType>(S->getType()) && "Invalid cast");
+ assert((Ty->isInteger() || isa<PointerType>(Ty)) &&
+ "Invalid cast");
+
+ if (Ty->isInteger())
+ return Create(Instruction::PtrToInt, S, Ty, Name, InsertAtEnd);
+ return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+}
+
+/// @brief Create a BitCast or a PtrToInt cast instruction
+CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
+ const std::string &Name,
+ Instruction *InsertBefore) {
+ assert(isa<PointerType>(S->getType()) && "Invalid cast");
+ assert((Ty->isInteger() || isa<PointerType>(Ty)) &&
+ "Invalid cast");
+
+ if (Ty->isInteger())
+ return Create(Instruction::PtrToInt, S, Ty, Name, InsertBefore);
+ return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
+ bool isSigned, const std::string &Name,
+ Instruction *InsertBefore) {
+ assert(C->getType()->isInteger() && Ty->isInteger() && "Invalid cast");
+ unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
+ unsigned DstBits = Ty->getPrimitiveSizeInBits();
+ Instruction::CastOps opcode =
+ (SrcBits == DstBits ? Instruction::BitCast :
+ (SrcBits > DstBits ? Instruction::Trunc :
+ (isSigned ? Instruction::SExt : Instruction::ZExt)));
+ return Create(opcode, C, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
+ bool isSigned, const std::string &Name,
+ BasicBlock *InsertAtEnd) {
+ assert(C->getType()->isInteger() && Ty->isInteger() && "Invalid cast");
+ unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
+ unsigned DstBits = Ty->getPrimitiveSizeInBits();
+ Instruction::CastOps opcode =
+ (SrcBits == DstBits ? Instruction::BitCast :
+ (SrcBits > DstBits ? Instruction::Trunc :
+ (isSigned ? Instruction::SExt : Instruction::ZExt)));
+ return Create(opcode, C, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty,
+ const std::string &Name,
+ Instruction *InsertBefore) {
+ assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() &&
+ "Invalid cast");
+ unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
+ unsigned DstBits = Ty->getPrimitiveSizeInBits();
+ Instruction::CastOps opcode =
+ (SrcBits == DstBits ? Instruction::BitCast :
+ (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt));
+ return Create(opcode, C, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty,
+ const std::string &Name,
+ BasicBlock *InsertAtEnd) {
+ assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() &&
+ "Invalid cast");
+ unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
+ unsigned DstBits = Ty->getPrimitiveSizeInBits();
+ Instruction::CastOps opcode =
+ (SrcBits == DstBits ? Instruction::BitCast :
+ (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt));
+ return Create(opcode, C, Ty, Name, InsertAtEnd);
+}
+
+// Check whether it is valid to call getCastOpcode for these types.
+// This routine must be kept in sync with getCastOpcode.
+bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) {
+ if (!SrcTy->isFirstClassType() || !DestTy->isFirstClassType())
+ return false;
+
+ if (SrcTy == DestTy)
+ return true;
+
+ // Get the bit sizes, we'll need these
+ unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr/vector
+ unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr/vector
+
+ // Run through the possibilities ...
+ if (DestTy->isInteger()) { // Casting to integral
+ if (SrcTy->isInteger()) { // Casting from integral
+ return true;
+ } else if (SrcTy->isFloatingPoint()) { // Casting from floating pt
+ return true;
+ } else if (const VectorType *PTy = dyn_cast<VectorType>(SrcTy)) {
+ // Casting from vector
+ return DestBits == PTy->getBitWidth();
+ } else { // Casting from something else
+ return isa<PointerType>(SrcTy);
+ }
+ } else if (DestTy->isFloatingPoint()) { // Casting to floating pt
+ if (SrcTy->isInteger()) { // Casting from integral
+ return true;
+ } else if (SrcTy->isFloatingPoint()) { // Casting from floating pt
+ return true;
+ } else if (const VectorType *PTy = dyn_cast<VectorType>(SrcTy)) {
+ // Casting from vector
+ return DestBits == PTy->getBitWidth();
+ } else { // Casting from something else
+ return false;
+ }
+ } else if (const VectorType *DestPTy = dyn_cast<VectorType>(DestTy)) {
+ // Casting to vector
+ if (const VectorType *SrcPTy = dyn_cast<VectorType>(SrcTy)) {
+ // Casting from vector
+ return DestPTy->getBitWidth() == SrcPTy->getBitWidth();
+ } else { // Casting from something else
+ return DestPTy->getBitWidth() == SrcBits;
+ }
+ } else if (isa<PointerType>(DestTy)) { // Casting to pointer
+ if (isa<PointerType>(SrcTy)) { // Casting from pointer
+ return true;
+ } else if (SrcTy->isInteger()) { // Casting from integral
+ return true;
+ } else { // Casting from something else
+ return false;
+ }
+ } else { // Casting to something else
+ return false;
+ }
+}
+
+// Provide a way to get a "cast" where the cast opcode is inferred from the
+// types and size of the operand. This, basically, is a parallel of the
+// logic in the castIsValid function below. This axiom should hold:
+// castIsValid( getCastOpcode(Val, Ty), Val, Ty)
+// should not assert in castIsValid. In other words, this produces a "correct"
+// casting opcode for the arguments passed to it.
+// This routine must be kept in sync with isCastable.
+Instruction::CastOps
+CastInst::getCastOpcode(
+ const Value *Src, bool SrcIsSigned, const Type *DestTy, bool DestIsSigned) {
+ // Get the bit sizes, we'll need these
+ const Type *SrcTy = Src->getType();
+ unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr/vector
+ unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr/vector
+
+ assert(SrcTy->isFirstClassType() && DestTy->isFirstClassType() &&
+ "Only first class types are castable!");
+
+ // Run through the possibilities ...
+ if (DestTy->isInteger()) { // Casting to integral
+ if (SrcTy->isInteger()) { // Casting from integral
+ if (DestBits < SrcBits)
+ return Trunc; // int -> smaller int
+ else if (DestBits > SrcBits) { // its an extension
+ if (SrcIsSigned)
+ return SExt; // signed -> SEXT
+ else
+ return ZExt; // unsigned -> ZEXT
+ } else {
+ return BitCast; // Same size, No-op cast
+ }
+ } else if (SrcTy->isFloatingPoint()) { // Casting from floating pt
+ if (DestIsSigned)
+ return FPToSI; // FP -> sint
+ else
+ return FPToUI; // FP -> uint
+ } else if (const VectorType *PTy = dyn_cast<VectorType>(SrcTy)) {
+ assert(DestBits == PTy->getBitWidth() &&
+ "Casting vector to integer of different width");
+ PTy = NULL;
+ return BitCast; // Same size, no-op cast
+ } else {
+ assert(isa<PointerType>(SrcTy) &&
+ "Casting from a value that is not first-class type");
+ return PtrToInt; // ptr -> int
+ }
+ } else if (DestTy->isFloatingPoint()) { // Casting to floating pt
+ if (SrcTy->isInteger()) { // Casting from integral
+ if (SrcIsSigned)
+ return SIToFP; // sint -> FP
+ else
+ return UIToFP; // uint -> FP
+ } else if (SrcTy->isFloatingPoint()) { // Casting from floating pt
+ if (DestBits < SrcBits) {
+ return FPTrunc; // FP -> smaller FP
+ } else if (DestBits > SrcBits) {
+ return FPExt; // FP -> larger FP
+ } else {
+ return BitCast; // same size, no-op cast
+ }
+ } else if (const VectorType *PTy = dyn_cast<VectorType>(SrcTy)) {
+ assert(DestBits == PTy->getBitWidth() &&
+ "Casting vector to floating point of different width");
+ PTy = NULL;
+ return BitCast; // same size, no-op cast
+ } else {
+ assert(0 && "Casting pointer or non-first class to float");
+ }
+ } else if (const VectorType *DestPTy = dyn_cast<VectorType>(DestTy)) {
+ if (const VectorType *SrcPTy = dyn_cast<VectorType>(SrcTy)) {
+ assert(DestPTy->getBitWidth() == SrcPTy->getBitWidth() &&
+ "Casting vector to vector of different widths");
+ SrcPTy = NULL;
+ return BitCast; // vector -> vector
+ } else if (DestPTy->getBitWidth() == SrcBits) {
+ return BitCast; // float/int -> vector
+ } else {
+ assert(!"Illegal cast to vector (wrong type or size)");
+ }
+ } else if (isa<PointerType>(DestTy)) {
+ if (isa<PointerType>(SrcTy)) {
+ return BitCast; // ptr -> ptr
+ } else if (SrcTy->isInteger()) {
+ return IntToPtr; // int -> ptr
+ } else {
+ assert(!"Casting pointer to other than pointer or int");
+ }
+ } else {
+ assert(!"Casting to type that is not first-class");
+ }
+
+ // If we fall through to here we probably hit an assertion cast above
+ // and assertions are not turned on. Anything we return is an error, so
+ // BitCast is as good a choice as any.
+ return BitCast;
+}
+
+//===----------------------------------------------------------------------===//
+// CastInst SubClass Constructors
+//===----------------------------------------------------------------------===//
+
+/// Check that the construction parameters for a CastInst are correct. This
+/// could be broken out into the separate constructors but it is useful to have
+/// it in one place and to eliminate the redundant code for getting the sizes
+/// of the types involved.
+bool
+CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) {
+
+ // Check for type sanity on the arguments
+ const Type *SrcTy = S->getType();
+ if (!SrcTy->isFirstClassType() || !DstTy->isFirstClassType())
+ return false;
+
+ // Get the size of the types in bits, we'll need this later
+ unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
+ unsigned DstBitSize = DstTy->getPrimitiveSizeInBits();
+
+ // Switch on the opcode provided
+ switch (op) {
+ default: return false; // This is an input error
+ case Instruction::Trunc:
+ return SrcTy->isIntOrIntVector() &&
+ DstTy->isIntOrIntVector()&& SrcBitSize > DstBitSize;
+ case Instruction::ZExt:
+ return SrcTy->isIntOrIntVector() &&
+ DstTy->isIntOrIntVector()&& SrcBitSize < DstBitSize;
+ case Instruction::SExt:
+ return SrcTy->isIntOrIntVector() &&
+ DstTy->isIntOrIntVector()&& SrcBitSize < DstBitSize;
+ case Instruction::FPTrunc:
+ return SrcTy->isFPOrFPVector() &&
+ DstTy->isFPOrFPVector() &&
+ SrcBitSize > DstBitSize;
+ case Instruction::FPExt:
+ return SrcTy->isFPOrFPVector() &&
+ DstTy->isFPOrFPVector() &&
+ SrcBitSize < DstBitSize;
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ if (const VectorType *SVTy = dyn_cast<VectorType>(SrcTy)) {
+ if (const VectorType *DVTy = dyn_cast<VectorType>(DstTy)) {
+ return SVTy->getElementType()->isIntOrIntVector() &&
+ DVTy->getElementType()->isFPOrFPVector() &&
+ SVTy->getNumElements() == DVTy->getNumElements();
+ }
+ }
+ return SrcTy->isIntOrIntVector() && DstTy->isFPOrFPVector();
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ if (const VectorType *SVTy = dyn_cast<VectorType>(SrcTy)) {
+ if (const VectorType *DVTy = dyn_cast<VectorType>(DstTy)) {
+ return SVTy->getElementType()->isFPOrFPVector() &&
+ DVTy->getElementType()->isIntOrIntVector() &&
+ SVTy->getNumElements() == DVTy->getNumElements();
+ }
+ }
+ return SrcTy->isFPOrFPVector() && DstTy->isIntOrIntVector();
+ case Instruction::PtrToInt:
+ return isa<PointerType>(SrcTy) && DstTy->isInteger();
+ case Instruction::IntToPtr:
+ return SrcTy->isInteger() && isa<PointerType>(DstTy);
+ case Instruction::BitCast:
+ // BitCast implies a no-op cast of type only. No bits change.
+ // However, you can't cast pointers to anything but pointers.
+ if (isa<PointerType>(SrcTy) != isa<PointerType>(DstTy))
+ return false;
+
+ // Now we know we're not dealing with a pointer/non-pointer mismatch. In all
+ // these cases, the cast is okay if the source and destination bit widths
+ // are identical.
+ return SrcBitSize == DstBitSize;
+ }
+}
+
+TruncInst::TruncInst(
+ Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+) : CastInst(Ty, Trunc, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal Trunc");
+}
+
+TruncInst::TruncInst(
+ Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, Trunc, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal Trunc");
+}
+
+ZExtInst::ZExtInst(
+ Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+) : CastInst(Ty, ZExt, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt");
+}
+
+ZExtInst::ZExtInst(
+ Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, ZExt, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt");
+}
+SExtInst::SExtInst(
+ Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+) : CastInst(Ty, SExt, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt");
+}
+
+SExtInst::SExtInst(
+ Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, SExt, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt");
+}
+
+FPTruncInst::FPTruncInst(
+ Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPTrunc, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc");
+}
+
+FPTruncInst::FPTruncInst(
+ Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPTrunc, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc");
+}
+
+FPExtInst::FPExtInst(
+ Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPExt, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt");
+}
+
+FPExtInst::FPExtInst(
+ Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPExt, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt");
+}
+
+UIToFPInst::UIToFPInst(
+ Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+) : CastInst(Ty, UIToFP, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP");
+}
+
+UIToFPInst::UIToFPInst(
+ Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, UIToFP, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP");
+}
+
+SIToFPInst::SIToFPInst(
+ Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+) : CastInst(Ty, SIToFP, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP");
+}
+
+SIToFPInst::SIToFPInst(
+ Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, SIToFP, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP");
+}
+
+FPToUIInst::FPToUIInst(
+ Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPToUI, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI");
+}
+
+FPToUIInst::FPToUIInst(
+ Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPToUI, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI");
+}
+
+FPToSIInst::FPToSIInst(
+ Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPToSI, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI");
+}
+
+FPToSIInst::FPToSIInst(
+ Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPToSI, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI");
+}
+
+PtrToIntInst::PtrToIntInst(
+ Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+) : CastInst(Ty, PtrToInt, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt");
+}
+
+PtrToIntInst::PtrToIntInst(
+ Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, PtrToInt, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt");
+}
+
+IntToPtrInst::IntToPtrInst(
+ Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+) : CastInst(Ty, IntToPtr, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr");
+}
+
+IntToPtrInst::IntToPtrInst(
+ Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, IntToPtr, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr");
+}
+
+BitCastInst::BitCastInst(
+ Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore
+) : CastInst(Ty, BitCast, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast");
+}
+
+BitCastInst::BitCastInst(
+ Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, BitCast, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast");
+}
+
+//===----------------------------------------------------------------------===//
+// CmpInst Classes
+//===----------------------------------------------------------------------===//
+
+CmpInst::CmpInst(const Type *ty, OtherOps op, unsigned short predicate,
+ Value *LHS, Value *RHS, const std::string &Name,
+ Instruction *InsertBefore)
+ : Instruction(ty, op,
+ OperandTraits<CmpInst>::op_begin(this),
+ OperandTraits<CmpInst>::operands(this),
+ InsertBefore) {
+ Op<0>() = LHS;
+ Op<1>() = RHS;
+ SubclassData = predicate;
+ setName(Name);
+}
+
+CmpInst::CmpInst(const Type *ty, OtherOps op, unsigned short predicate,
+ Value *LHS, Value *RHS, const std::string &Name,
+ BasicBlock *InsertAtEnd)
+ : Instruction(ty, op,
+ OperandTraits<CmpInst>::op_begin(this),
+ OperandTraits<CmpInst>::operands(this),
+ InsertAtEnd) {
+ Op<0>() = LHS;
+ Op<1>() = RHS;
+ SubclassData = predicate;
+ setName(Name);
+}
+
+CmpInst *
+CmpInst::Create(OtherOps Op, unsigned short predicate, Value *S1, Value *S2,
+ const std::string &Name, Instruction *InsertBefore) {
+ if (Op == Instruction::ICmp) {
+ return new ICmpInst(CmpInst::Predicate(predicate), S1, S2, Name,
+ InsertBefore);
+ }
+ if (Op == Instruction::FCmp) {
+ return new FCmpInst(CmpInst::Predicate(predicate), S1, S2, Name,
+ InsertBefore);
+ }
+ if (Op == Instruction::VICmp) {
+ return new VICmpInst(CmpInst::Predicate(predicate), S1, S2, Name,
+ InsertBefore);
+ }
+ return new VFCmpInst(CmpInst::Predicate(predicate), S1, S2, Name,
+ InsertBefore);
+}
+
+CmpInst *
+CmpInst::Create(OtherOps Op, unsigned short predicate, Value *S1, Value *S2,
+ const std::string &Name, BasicBlock *InsertAtEnd) {
+ if (Op == Instruction::ICmp) {
+ return new ICmpInst(CmpInst::Predicate(predicate), S1, S2, Name,
+ InsertAtEnd);
+ }
+ if (Op == Instruction::FCmp) {
+ return new FCmpInst(CmpInst::Predicate(predicate), S1, S2, Name,
+ InsertAtEnd);
+ }
+ if (Op == Instruction::VICmp) {
+ return new VICmpInst(CmpInst::Predicate(predicate), S1, S2, Name,
+ InsertAtEnd);
+ }
+ return new VFCmpInst(CmpInst::Predicate(predicate), S1, S2, Name,
+ InsertAtEnd);
+}
+
+void CmpInst::swapOperands() {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(this))
+ IC->swapOperands();
+ else
+ cast<FCmpInst>(this)->swapOperands();
+}
+
+bool CmpInst::isCommutative() {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(this))
+ return IC->isCommutative();
+ return cast<FCmpInst>(this)->isCommutative();
+}
+
+bool CmpInst::isEquality() {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(this))
+ return IC->isEquality();
+ return cast<FCmpInst>(this)->isEquality();
+}
+
+
+CmpInst::Predicate CmpInst::getInversePredicate(Predicate pred) {
+ switch (pred) {
+ default: assert(!"Unknown cmp predicate!");
+ case ICMP_EQ: return ICMP_NE;
+ case ICMP_NE: return ICMP_EQ;
+ case ICMP_UGT: return ICMP_ULE;
+ case ICMP_ULT: return ICMP_UGE;
+ case ICMP_UGE: return ICMP_ULT;
+ case ICMP_ULE: return ICMP_UGT;
+ case ICMP_SGT: return ICMP_SLE;
+ case ICMP_SLT: return ICMP_SGE;
+ case ICMP_SGE: return ICMP_SLT;
+ case ICMP_SLE: return ICMP_SGT;
+
+ case FCMP_OEQ: return FCMP_UNE;
+ case FCMP_ONE: return FCMP_UEQ;
+ case FCMP_OGT: return FCMP_ULE;
+ case FCMP_OLT: return FCMP_UGE;
+ case FCMP_OGE: return FCMP_ULT;
+ case FCMP_OLE: return FCMP_UGT;
+ case FCMP_UEQ: return FCMP_ONE;
+ case FCMP_UNE: return FCMP_OEQ;
+ case FCMP_UGT: return FCMP_OLE;
+ case FCMP_ULT: return FCMP_OGE;
+ case FCMP_UGE: return FCMP_OLT;
+ case FCMP_ULE: return FCMP_OGT;
+ case FCMP_ORD: return FCMP_UNO;
+ case FCMP_UNO: return FCMP_ORD;
+ case FCMP_TRUE: return FCMP_FALSE;
+ case FCMP_FALSE: return FCMP_TRUE;
+ }
+}
+
+ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) {
+ switch (pred) {
+ default: assert(! "Unknown icmp predicate!");
+ case ICMP_EQ: case ICMP_NE:
+ case ICMP_SGT: case ICMP_SLT: case ICMP_SGE: case ICMP_SLE:
+ return pred;
+ case ICMP_UGT: return ICMP_SGT;
+ case ICMP_ULT: return ICMP_SLT;
+ case ICMP_UGE: return ICMP_SGE;
+ case ICMP_ULE: return ICMP_SLE;
+ }
+}
+
+ICmpInst::Predicate ICmpInst::getUnsignedPredicate(Predicate pred) {
+ switch (pred) {
+ default: assert(! "Unknown icmp predicate!");
+ case ICMP_EQ: case ICMP_NE:
+ case ICMP_UGT: case ICMP_ULT: case ICMP_UGE: case ICMP_ULE:
+ return pred;
+ case ICMP_SGT: return ICMP_UGT;
+ case ICMP_SLT: return ICMP_ULT;
+ case ICMP_SGE: return ICMP_UGE;
+ case ICMP_SLE: return ICMP_ULE;
+ }
+}
+
+bool ICmpInst::isSignedPredicate(Predicate pred) {
+ switch (pred) {
+ default: assert(! "Unknown icmp predicate!");
+ case ICMP_SGT: case ICMP_SLT: case ICMP_SGE: case ICMP_SLE:
+ return true;
+ case ICMP_EQ: case ICMP_NE: case ICMP_UGT: case ICMP_ULT:
+ case ICMP_UGE: case ICMP_ULE:
+ return false;
+ }
+}
+
+/// Initialize a set of values that all satisfy the condition with C.
+///
+ConstantRange
+ICmpInst::makeConstantRange(Predicate pred, const APInt &C) {
+ APInt Lower(C);
+ APInt Upper(C);
+ uint32_t BitWidth = C.getBitWidth();
+ switch (pred) {
+ default: assert(0 && "Invalid ICmp opcode to ConstantRange ctor!");
+ case ICmpInst::ICMP_EQ: Upper++; break;
+ case ICmpInst::ICMP_NE: Lower++; break;
+ case ICmpInst::ICMP_ULT: Lower = APInt::getMinValue(BitWidth); break;
+ case ICmpInst::ICMP_SLT: Lower = APInt::getSignedMinValue(BitWidth); break;
+ case ICmpInst::ICMP_UGT:
+ Lower++; Upper = APInt::getMinValue(BitWidth); // Min = Next(Max)
+ break;
+ case ICmpInst::ICMP_SGT:
+ Lower++; Upper = APInt::getSignedMinValue(BitWidth); // Min = Next(Max)
+ break;
+ case ICmpInst::ICMP_ULE:
+ Lower = APInt::getMinValue(BitWidth); Upper++;
+ break;
+ case ICmpInst::ICMP_SLE:
+ Lower = APInt::getSignedMinValue(BitWidth); Upper++;
+ break;
+ case ICmpInst::ICMP_UGE:
+ Upper = APInt::getMinValue(BitWidth); // Min = Next(Max)
+ break;
+ case ICmpInst::ICMP_SGE:
+ Upper = APInt::getSignedMinValue(BitWidth); // Min = Next(Max)
+ break;
+ }
+ return ConstantRange(Lower, Upper);
+}
+
+CmpInst::Predicate CmpInst::getSwappedPredicate(Predicate pred) {
+ switch (pred) {
+ default: assert(!"Unknown cmp predicate!");
+ case ICMP_EQ: case ICMP_NE:
+ return pred;
+ case ICMP_SGT: return ICMP_SLT;
+ case ICMP_SLT: return ICMP_SGT;
+ case ICMP_SGE: return ICMP_SLE;
+ case ICMP_SLE: return ICMP_SGE;
+ case ICMP_UGT: return ICMP_ULT;
+ case ICMP_ULT: return ICMP_UGT;
+ case ICMP_UGE: return ICMP_ULE;
+ case ICMP_ULE: return ICMP_UGE;
+
+ case FCMP_FALSE: case FCMP_TRUE:
+ case FCMP_OEQ: case FCMP_ONE:
+ case FCMP_UEQ: case FCMP_UNE:
+ case FCMP_ORD: case FCMP_UNO:
+ return pred;
+ case FCMP_OGT: return FCMP_OLT;
+ case FCMP_OLT: return FCMP_OGT;
+ case FCMP_OGE: return FCMP_OLE;
+ case FCMP_OLE: return FCMP_OGE;
+ case FCMP_UGT: return FCMP_ULT;
+ case FCMP_ULT: return FCMP_UGT;
+ case FCMP_UGE: return FCMP_ULE;
+ case FCMP_ULE: return FCMP_UGE;
+ }
+}
+
+bool CmpInst::isUnsigned(unsigned short predicate) {
+ switch (predicate) {
+ default: return false;
+ case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE: return true;
+ }
+}
+
+bool CmpInst::isSigned(unsigned short predicate){
+ switch (predicate) {
+ default: return false;
+ case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE: return true;
+ }
+}
+
+bool CmpInst::isOrdered(unsigned short predicate) {
+ switch (predicate) {
+ default: return false;
+ case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_OGT:
+ case FCmpInst::FCMP_OLT: case FCmpInst::FCMP_OGE: case FCmpInst::FCMP_OLE:
+ case FCmpInst::FCMP_ORD: return true;
+ }
+}
+
+bool CmpInst::isUnordered(unsigned short predicate) {
+ switch (predicate) {
+ default: return false;
+ case FCmpInst::FCMP_UEQ: case FCmpInst::FCMP_UNE: case FCmpInst::FCMP_UGT:
+ case FCmpInst::FCMP_ULT: case FCmpInst::FCMP_UGE: case FCmpInst::FCMP_ULE:
+ case FCmpInst::FCMP_UNO: return true;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// SwitchInst Implementation
+//===----------------------------------------------------------------------===//
+
+void SwitchInst::init(Value *Value, BasicBlock *Default, unsigned NumCases) {
+ assert(Value && Default);
+ ReservedSpace = 2+NumCases*2;
+ NumOperands = 2;
+ OperandList = allocHungoffUses(ReservedSpace);
+
+ OperandList[0] = Value;
+ OperandList[1] = Default;
+}
+
+/// SwitchInst ctor - Create a new switch instruction, specifying a value to
+/// switch on and a default destination. The number of additional cases can
+/// be specified here to make memory allocation more efficient. This
+/// constructor can also autoinsert before another instruction.
+SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
+ Instruction *InsertBefore)
+ : TerminatorInst(Type::VoidTy, Instruction::Switch, 0, 0, InsertBefore) {
+ init(Value, Default, NumCases);
+}
+
+/// SwitchInst ctor - Create a new switch instruction, specifying a value to
+/// switch on and a default destination. The number of additional cases can
+/// be specified here to make memory allocation more efficient. This
+/// constructor also autoinserts at the end of the specified BasicBlock.
+SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
+ BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::VoidTy, Instruction::Switch, 0, 0, InsertAtEnd) {
+ init(Value, Default, NumCases);
+}
+
+SwitchInst::SwitchInst(const SwitchInst &SI)
+ : TerminatorInst(Type::VoidTy, Instruction::Switch,
+ allocHungoffUses(SI.getNumOperands()), SI.getNumOperands()) {
+ Use *OL = OperandList, *InOL = SI.OperandList;
+ for (unsigned i = 0, E = SI.getNumOperands(); i != E; i+=2) {
+ OL[i] = InOL[i];
+ OL[i+1] = InOL[i+1];
+ }
+}
+
+SwitchInst::~SwitchInst() {
+ dropHungoffUses(OperandList);
+}
+
+
+/// addCase - Add an entry to the switch instruction...
+///
+void SwitchInst::addCase(ConstantInt *OnVal, BasicBlock *Dest) {
+ unsigned OpNo = NumOperands;
+ if (OpNo+2 > ReservedSpace)
+ resizeOperands(0); // Get more space!
+ // Initialize some new operands.
+ assert(OpNo+1 < ReservedSpace && "Growing didn't work!");
+ NumOperands = OpNo+2;
+ OperandList[OpNo] = OnVal;
+ OperandList[OpNo+1] = Dest;
+}
+
+/// removeCase - This method removes the specified successor from the switch
+/// instruction. Note that this cannot be used to remove the default
+/// destination (successor #0).
+///
+void SwitchInst::removeCase(unsigned idx) {
+ assert(idx != 0 && "Cannot remove the default case!");
+ assert(idx*2 < getNumOperands() && "Successor index out of range!!!");
+
+ unsigned NumOps = getNumOperands();
+ Use *OL = OperandList;
+
+ // Move everything after this operand down.
+ //
+ // FIXME: we could just swap with the end of the list, then erase. However,
+ // client might not expect this to happen. The code as it is thrashes the
+ // use/def lists, which is kinda lame.
+ for (unsigned i = (idx+1)*2; i != NumOps; i += 2) {
+ OL[i-2] = OL[i];
+ OL[i-2+1] = OL[i+1];
+ }
+
+ // Nuke the last value.
+ OL[NumOps-2].set(0);
+ OL[NumOps-2+1].set(0);
+ NumOperands = NumOps-2;
+}
+
+/// resizeOperands - resize operands - This adjusts the length of the operands
+/// list according to the following behavior:
+/// 1. If NumOps == 0, grow the operand list in response to a push_back style
+/// of operation. This grows the number of ops by 3 times.
+/// 2. If NumOps > NumOperands, reserve space for NumOps operands.
+/// 3. If NumOps == NumOperands, trim the reserved space.
+///
+void SwitchInst::resizeOperands(unsigned NumOps) {
+ unsigned e = getNumOperands();
+ if (NumOps == 0) {
+ NumOps = e*3;
+ } else if (NumOps*2 > NumOperands) {
+ // No resize needed.
+ if (ReservedSpace >= NumOps) return;
+ } else if (NumOps == NumOperands) {
+ if (ReservedSpace == NumOps) return;
+ } else {
+ return;
+ }
+
+ ReservedSpace = NumOps;
+ Use *NewOps = allocHungoffUses(NumOps);
+ Use *OldOps = OperandList;
+ for (unsigned i = 0; i != e; ++i) {
+ NewOps[i] = OldOps[i];
+ }
+ OperandList = NewOps;
+ if (OldOps) Use::zap(OldOps, OldOps + e, true);
+}
+
+
+BasicBlock *SwitchInst::getSuccessorV(unsigned idx) const {
+ return getSuccessor(idx);
+}
+unsigned SwitchInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+void SwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+ setSuccessor(idx, B);
+}
+
+// Define these methods here so vtables don't get emitted into every translation
+// unit that uses these classes.
+
+GetElementPtrInst *GetElementPtrInst::clone() const {
+ return new(getNumOperands()) GetElementPtrInst(*this);
+}
+
+BinaryOperator *BinaryOperator::clone() const {
+ return Create(getOpcode(), Op<0>(), Op<1>());
+}
+
+FCmpInst* FCmpInst::clone() const {
+ return new FCmpInst(getPredicate(), Op<0>(), Op<1>());
+}
+ICmpInst* ICmpInst::clone() const {
+ return new ICmpInst(getPredicate(), Op<0>(), Op<1>());
+}
+
+VFCmpInst* VFCmpInst::clone() const {
+ return new VFCmpInst(getPredicate(), Op<0>(), Op<1>());
+}
+VICmpInst* VICmpInst::clone() const {
+ return new VICmpInst(getPredicate(), Op<0>(), Op<1>());
+}
+
+ExtractValueInst *ExtractValueInst::clone() const {
+ return new ExtractValueInst(*this);
+}
+InsertValueInst *InsertValueInst::clone() const {
+ return new InsertValueInst(*this);
+}
+
+
+MallocInst *MallocInst::clone() const { return new MallocInst(*this); }
+AllocaInst *AllocaInst::clone() const { return new AllocaInst(*this); }
+FreeInst *FreeInst::clone() const { return new FreeInst(getOperand(0)); }
+LoadInst *LoadInst::clone() const { return new LoadInst(*this); }
+StoreInst *StoreInst::clone() const { return new StoreInst(*this); }
+CastInst *TruncInst::clone() const { return new TruncInst(*this); }
+CastInst *ZExtInst::clone() const { return new ZExtInst(*this); }
+CastInst *SExtInst::clone() const { return new SExtInst(*this); }
+CastInst *FPTruncInst::clone() const { return new FPTruncInst(*this); }
+CastInst *FPExtInst::clone() const { return new FPExtInst(*this); }
+CastInst *UIToFPInst::clone() const { return new UIToFPInst(*this); }
+CastInst *SIToFPInst::clone() const { return new SIToFPInst(*this); }
+CastInst *FPToUIInst::clone() const { return new FPToUIInst(*this); }
+CastInst *FPToSIInst::clone() const { return new FPToSIInst(*this); }
+CastInst *PtrToIntInst::clone() const { return new PtrToIntInst(*this); }
+CastInst *IntToPtrInst::clone() const { return new IntToPtrInst(*this); }
+CastInst *BitCastInst::clone() const { return new BitCastInst(*this); }
+CallInst *CallInst::clone() const {
+ return new(getNumOperands()) CallInst(*this);
+}
+SelectInst *SelectInst::clone() const {
+ return new(getNumOperands()) SelectInst(*this);
+}
+VAArgInst *VAArgInst::clone() const { return new VAArgInst(*this); }
+
+ExtractElementInst *ExtractElementInst::clone() const {
+ return new ExtractElementInst(*this);
+}
+InsertElementInst *InsertElementInst::clone() const {
+ return InsertElementInst::Create(*this);
+}
+ShuffleVectorInst *ShuffleVectorInst::clone() const {
+ return new ShuffleVectorInst(*this);
+}
+PHINode *PHINode::clone() const { return new PHINode(*this); }
+ReturnInst *ReturnInst::clone() const {
+ return new(getNumOperands()) ReturnInst(*this);
+}
+BranchInst *BranchInst::clone() const {
+ unsigned Ops(getNumOperands());
+ return new(Ops, Ops == 1) BranchInst(*this);
+}
+SwitchInst *SwitchInst::clone() const { return new SwitchInst(*this); }
+InvokeInst *InvokeInst::clone() const {
+ return new(getNumOperands()) InvokeInst(*this);
+}
+UnwindInst *UnwindInst::clone() const { return new UnwindInst(); }
+UnreachableInst *UnreachableInst::clone() const { return new UnreachableInst();}
diff --git a/lib/VMCore/IntrinsicInst.cpp b/lib/VMCore/IntrinsicInst.cpp
new file mode 100644
index 0000000..8bdc968
--- /dev/null
+++ b/lib/VMCore/IntrinsicInst.cpp
@@ -0,0 +1,77 @@
+//===-- InstrinsicInst.cpp - Intrinsic Instruction Wrappers -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements methods that make it really easy to deal with intrinsic
+// functions with the isa/dyncast family of functions. In particular, this
+// allows you to do things like:
+//
+// if (DbgStopPointInst *SPI = dyn_cast<DbgStopPointInst>(Inst))
+// ... SPI->getFileName() ... SPI->getDirectory() ...
+//
+// All intrinsic function calls are instances of the call instruction, so these
+// are all subclasses of the CallInst class. Note that none of these classes
+// has state or virtual methods, which is an important part of this gross/neat
+// hack working.
+//
+// In some cases, arguments to intrinsics need to be generic and are defined as
+// type pointer to empty struct { }*. To access the real item of interest the
+// cast instruction needs to be stripped away.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+/// DbgInfoIntrinsic - This is the common base class for debug info intrinsics
+///
+
+static Value *CastOperand(Value *C) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ if (CE->isCast())
+ return CE->getOperand(0);
+ return NULL;
+}
+
+Value *DbgInfoIntrinsic::StripCast(Value *C) {
+ if (Value *CO = CastOperand(C)) {
+ C = StripCast(CO);
+ } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+ if (GV->hasInitializer())
+ if (Value *CO = CastOperand(GV->getInitializer()))
+ C = StripCast(CO);
+ }
+ return dyn_cast<GlobalVariable>(C);
+}
+
+//===----------------------------------------------------------------------===//
+/// DbgStopPointInst - This represents the llvm.dbg.stoppoint instruction.
+///
+
+Value *DbgStopPointInst::getFileName() const {
+ // Once the operand indices are verified, update this assert
+ assert(LLVMDebugVersion == (7 << 16) && "Verify operand indices");
+ GlobalVariable *GV = cast<GlobalVariable>(getContext());
+ if (!GV->hasInitializer()) return NULL;
+ ConstantStruct *CS = cast<ConstantStruct>(GV->getInitializer());
+ return CS->getOperand(3);
+}
+
+Value *DbgStopPointInst::getDirectory() const {
+ // Once the operand indices are verified, update this assert
+ assert(LLVMDebugVersion == (7 << 16) && "Verify operand indices");
+ GlobalVariable *GV = cast<GlobalVariable>(getContext());
+ if (!GV->hasInitializer()) return NULL;
+ ConstantStruct *CS = cast<ConstantStruct>(GV->getInitializer());
+ return CS->getOperand(4);
+}
diff --git a/lib/VMCore/LeakDetector.cpp b/lib/VMCore/LeakDetector.cpp
new file mode 100644
index 0000000..1bf9171
--- /dev/null
+++ b/lib/VMCore/LeakDetector.cpp
@@ -0,0 +1,131 @@
+//===-- LeakDetector.cpp - Implement LeakDetector interface ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LeakDetector class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Value.h"
+using namespace llvm;
+
+namespace {
+ template <class T>
+ struct VISIBILITY_HIDDEN PrinterTrait {
+ static void print(const T* P) { cerr << P; }
+ };
+
+ template<>
+ struct VISIBILITY_HIDDEN PrinterTrait<Value> {
+ static void print(const Value* P) { cerr << *P; }
+ };
+
+ template <typename T>
+ struct VISIBILITY_HIDDEN LeakDetectorImpl {
+ explicit LeakDetectorImpl(const char* const name) : Cache(0), Name(name) { }
+
+ // Because the most common usage pattern, by far, is to add a
+ // garbage object, then remove it immediately, we optimize this
+ // case. When an object is added, it is not added to the set
+ // immediately, it is added to the CachedValue Value. If it is
+ // immediately removed, no set search need be performed.
+ void addGarbage(const T* o) {
+ if (Cache) {
+ assert(Ts.count(Cache) == 0 && "Object already in set!");
+ Ts.insert(Cache);
+ }
+ Cache = o;
+ }
+
+ void removeGarbage(const T* o) {
+ if (o == Cache)
+ Cache = 0; // Cache hit
+ else
+ Ts.erase(o);
+ }
+
+ bool hasGarbage(const std::string& Message) {
+ addGarbage(0); // Flush the Cache
+
+ assert(Cache == 0 && "No value should be cached anymore!");
+
+ if (!Ts.empty()) {
+ cerr << "Leaked " << Name << " objects found: " << Message << ":\n";
+ for (typename SmallPtrSet<const T*, 8>::iterator I = Ts.begin(),
+ E = Ts.end(); I != E; ++I) {
+ cerr << "\t";
+ PrinterTrait<T>::print(*I);
+ cerr << "\n";
+ }
+ cerr << '\n';
+
+ return true;
+ }
+ return false;
+ }
+
+ private:
+ SmallPtrSet<const T*, 8> Ts;
+ const T* Cache;
+ const char* const Name;
+ };
+
+ static LeakDetectorImpl<void> *Objects;
+ static LeakDetectorImpl<Value> *LLVMObjects;
+
+ static LeakDetectorImpl<void> &getObjects() {
+ if (Objects == 0)
+ Objects = new LeakDetectorImpl<void>("GENERIC");
+ return *Objects;
+ }
+
+ static LeakDetectorImpl<Value> &getLLVMObjects() {
+ if (LLVMObjects == 0)
+ LLVMObjects = new LeakDetectorImpl<Value>("LLVM");
+ return *LLVMObjects;
+ }
+
+ static void clearGarbage() {
+ delete Objects;
+ delete LLVMObjects;
+ Objects = 0;
+ LLVMObjects = 0;
+ }
+}
+
+void LeakDetector::addGarbageObjectImpl(void *Object) {
+ getObjects().addGarbage(Object);
+}
+
+void LeakDetector::addGarbageObjectImpl(const Value *Object) {
+ getLLVMObjects().addGarbage(Object);
+}
+
+void LeakDetector::removeGarbageObjectImpl(void *Object) {
+ getObjects().removeGarbage(Object);
+}
+
+void LeakDetector::removeGarbageObjectImpl(const Value *Object) {
+ getLLVMObjects().removeGarbage(Object);
+}
+
+void LeakDetector::checkForGarbageImpl(const std::string &Message) {
+ // use non-short-circuit version so that both checks are performed
+ if (getObjects().hasGarbage(Message) |
+ getLLVMObjects().hasGarbage(Message))
+ cerr << "\nThis is probably because you removed an object, but didn't "
+ << "delete it. Please check your code for memory leaks.\n";
+
+ // Clear out results so we don't get duplicate warnings on
+ // next call...
+ clearGarbage();
+}
diff --git a/lib/VMCore/Makefile b/lib/VMCore/Makefile
new file mode 100644
index 0000000..e9d3dc8
--- /dev/null
+++ b/lib/VMCore/Makefile
@@ -0,0 +1,33 @@
+##===- lib/VMCore/Makefile ------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../..
+LIBRARYNAME = LLVMCore
+BUILD_ARCHIVE = 1
+
+BUILT_SOURCES = $(PROJ_OBJ_ROOT)/include/llvm/Intrinsics.gen
+
+include $(LEVEL)/Makefile.common
+
+GENFILE:=$(PROJ_OBJ_ROOT)/include/llvm/Intrinsics.gen
+
+INTRINSICTD := $(PROJ_SRC_ROOT)/include/llvm/Intrinsics.td
+INTRINSICTDS := $(wildcard $(PROJ_SRC_ROOT)/include/llvm/Intrinsics*.td)
+
+$(ObjDir)/Intrinsics.gen.tmp: $(ObjDir)/.dir $(INTRINSICTDS) $(TBLGEN)
+ $(Echo) Building Intrinsics.gen.tmp from Intrinsics.td
+ $(Verb) $(TableGen) $(call SYSPATH, $(INTRINSICTD)) -o $(call SYSPATH, $@) -gen-intrinsic
+
+$(GENFILE): $(ObjDir)/Intrinsics.gen.tmp
+ $(Verb) $(CMP) -s $@ $< || ( $(CP) $< $@ && \
+ $(EchoCmd) Updated Intrinsics.gen because Intrinsics.gen.tmp \
+ changed significantly. )
+
+install-local:: $(GENFILE)
+ $(Echo) Installing $(PROJ_includedir)/llvm/Intrinsics.gen
+ $(Verb) $(DataInstall) $(GENFILE) $(PROJ_includedir)/llvm/Intrinsics.gen
diff --git a/lib/VMCore/Mangler.cpp b/lib/VMCore/Mangler.cpp
new file mode 100644
index 0000000..0bd190a
--- /dev/null
+++ b/lib/VMCore/Mangler.cpp
@@ -0,0 +1,196 @@
+//===-- Mangler.cpp - Self-contained c/asm llvm name mangler --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Unified name mangler for CWriter and assembly backends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Mangler.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+using namespace llvm;
+
+static char HexDigit(int V) {
+ return V < 10 ? V+'0' : V+'A'-10;
+}
+
+static std::string MangleLetter(unsigned char C) {
+ char Result[] = { '_', HexDigit(C >> 4), HexDigit(C & 15), '_', 0 };
+ return Result;
+}
+
+/// makeNameProper - We don't want identifier names non-C-identifier characters
+/// in them, so mangle them as appropriate.
+///
+std::string Mangler::makeNameProper(const std::string &X, const char *Prefix,
+ const char *PrivatePrefix) {
+ if (X.empty()) return X; // Empty names are uniqued by the caller.
+
+ // If PreserveAsmNames is set, names with asm identifiers are not modified.
+ if (PreserveAsmNames && X[0] == 1)
+ return X;
+
+ if (!UseQuotes) {
+ std::string Result;
+
+ // If X does not start with (char)1, add the prefix.
+ bool NeedPrefix = true;
+ std::string::const_iterator I = X.begin();
+ if (*I == 1) {
+ NeedPrefix = false;
+ ++I; // Skip over the marker.
+ }
+
+ // Mangle the first letter specially, don't allow numbers.
+ if (*I >= '0' && *I <= '9')
+ Result += MangleLetter(*I++);
+
+ for (std::string::const_iterator E = X.end(); I != E; ++I) {
+ if (!isCharAcceptable(*I))
+ Result += MangleLetter(*I);
+ else
+ Result += *I;
+ }
+
+ if (NeedPrefix) {
+ if (Prefix)
+ Result = Prefix + Result;
+ if (PrivatePrefix)
+ Result = PrivatePrefix + Result;
+ }
+ return Result;
+ }
+
+ bool NeedPrefix = true;
+ bool NeedQuotes = false;
+ std::string Result;
+ std::string::const_iterator I = X.begin();
+ if (*I == 1) {
+ NeedPrefix = false;
+ ++I; // Skip over the marker.
+ }
+
+ // If the first character is a number, we need quotes.
+ if (*I >= '0' && *I <= '9')
+ NeedQuotes = true;
+
+ // Do an initial scan of the string, checking to see if we need quotes or
+ // to escape a '"' or not.
+ if (!NeedQuotes)
+ for (std::string::const_iterator E = X.end(); I != E; ++I)
+ if (!isCharAcceptable(*I)) {
+ NeedQuotes = true;
+ break;
+ }
+
+ // In the common case, we don't need quotes. Handle this quickly.
+ if (!NeedQuotes) {
+ if (NeedPrefix) {
+ if (Prefix)
+ Result = Prefix + X;
+ else
+ Result = X;
+ if (PrivatePrefix)
+ Result = PrivatePrefix + Result;
+ return Result;
+ } else
+ return X.substr(1);
+ }
+
+ // Otherwise, construct the string the expensive way.
+ for (std::string::const_iterator E = X.end(); I != E; ++I) {
+ if (*I == '"')
+ Result += "_QQ_";
+ else if (*I == '\n')
+ Result += "_NL_";
+ else
+ Result += *I;
+ }
+
+ if (NeedPrefix) {
+ if (Prefix)
+ Result = Prefix + X;
+ else
+ Result = X;
+ if (PrivatePrefix)
+ Result = PrivatePrefix + Result;
+ }
+ Result = '"' + Result + '"';
+ return Result;
+}
+
+/// getTypeID - Return a unique ID for the specified LLVM type.
+///
+unsigned Mangler::getTypeID(const Type *Ty) {
+ unsigned &E = TypeMap[Ty];
+ if (E == 0) E = ++TypeCounter;
+ return E;
+}
+
+std::string Mangler::getValueName(const Value *V) {
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+ return getValueName(GV);
+
+ std::string &Name = Memo[V];
+ if (!Name.empty())
+ return Name; // Return the already-computed name for V.
+
+ // Always mangle local names.
+ Name = "ltmp_" + utostr(Count++) + "_" + utostr(getTypeID(V->getType()));
+ return Name;
+}
+
+
+std::string Mangler::getValueName(const GlobalValue *GV, const char * Suffix) {
+ // Check to see whether we've already named V.
+ std::string &Name = Memo[GV];
+ if (!Name.empty())
+ return Name; // Return the already-computed name for V.
+
+ // Name mangling occurs as follows:
+ // - If V is an intrinsic function, do not change name at all
+ // - Otherwise, mangling occurs if global collides with existing name.
+ if (isa<Function>(GV) && cast<Function>(GV)->isIntrinsic()) {
+ Name = GV->getNameStart(); // Is an intrinsic function
+ } else if (!GV->hasName()) {
+ // Must mangle the global into a unique ID.
+ unsigned TypeUniqueID = getTypeID(GV->getType());
+ static unsigned GlobalID = 0;
+ Name = "__unnamed_" + utostr(TypeUniqueID) + "_" + utostr(GlobalID++);
+ } else {
+ if (GV->hasPrivateLinkage())
+ Name = makeNameProper(GV->getName() + Suffix, Prefix, PrivatePrefix);
+ else
+ Name = makeNameProper(GV->getName() + Suffix, Prefix);
+ }
+
+ return Name;
+}
+
+Mangler::Mangler(Module &M, const char *prefix, const char *privatePrefix)
+ : Prefix(prefix), PrivatePrefix (privatePrefix), UseQuotes(false),
+ PreserveAsmNames(false), Count(0), TypeCounter(0) {
+ std::fill(AcceptableChars, array_endof(AcceptableChars), 0);
+
+ // Letters and numbers are acceptable.
+ for (unsigned char X = 'a'; X <= 'z'; ++X)
+ markCharAcceptable(X);
+ for (unsigned char X = 'A'; X <= 'Z'; ++X)
+ markCharAcceptable(X);
+ for (unsigned char X = '0'; X <= '9'; ++X)
+ markCharAcceptable(X);
+
+ // These chars are acceptable.
+ markCharAcceptable('_');
+ markCharAcceptable('$');
+ markCharAcceptable('.');
+}
diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp
new file mode 100644
index 0000000..a598005
--- /dev/null
+++ b/lib/VMCore/Module.cpp
@@ -0,0 +1,381 @@
+//===-- Module.cpp - Implement the Module class ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Module class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Module.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/LeakDetector.h"
+#include "SymbolTableListTraitsImpl.h"
+#include "llvm/TypeSymbolTable.h"
+#include <algorithm>
+#include <cstdarg>
+#include <cstdlib>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Methods to implement the globals and functions lists.
+//
+
+GlobalVariable *ilist_traits<GlobalVariable>::createSentinel() {
+ GlobalVariable *Ret = new GlobalVariable(Type::Int32Ty, false,
+ GlobalValue::ExternalLinkage);
+ // This should not be garbage monitored.
+ LeakDetector::removeGarbageObject(Ret);
+ return Ret;
+}
+GlobalAlias *ilist_traits<GlobalAlias>::createSentinel() {
+ GlobalAlias *Ret = new GlobalAlias(Type::Int32Ty,
+ GlobalValue::ExternalLinkage);
+ // This should not be garbage monitored.
+ LeakDetector::removeGarbageObject(Ret);
+ return Ret;
+}
+
+// Explicit instantiations of SymbolTableListTraits since some of the methods
+// are not in the public header file.
+template class SymbolTableListTraits<GlobalVariable, Module>;
+template class SymbolTableListTraits<Function, Module>;
+template class SymbolTableListTraits<GlobalAlias, Module>;
+
+//===----------------------------------------------------------------------===//
+// Primitive Module methods.
+//
+
+Module::Module(const std::string &MID)
+ : ModuleID(MID), DataLayout("") {
+ ValSymTab = new ValueSymbolTable();
+ TypeSymTab = new TypeSymbolTable();
+}
+
+Module::~Module() {
+ dropAllReferences();
+ GlobalList.clear();
+ FunctionList.clear();
+ AliasList.clear();
+ LibraryList.clear();
+ delete ValSymTab;
+ delete TypeSymTab;
+}
+
+/// Target endian information...
+Module::Endianness Module::getEndianness() const {
+ std::string temp = DataLayout;
+ Module::Endianness ret = AnyEndianness;
+
+ while (!temp.empty()) {
+ std::string token = getToken(temp, "-");
+
+ if (token[0] == 'e') {
+ ret = LittleEndian;
+ } else if (token[0] == 'E') {
+ ret = BigEndian;
+ }
+ }
+
+ return ret;
+}
+
+/// Target Pointer Size information...
+Module::PointerSize Module::getPointerSize() const {
+ std::string temp = DataLayout;
+ Module::PointerSize ret = AnyPointerSize;
+
+ while (!temp.empty()) {
+ std::string token = getToken(temp, "-");
+ char signal = getToken(token, ":")[0];
+
+ if (signal == 'p') {
+ int size = atoi(getToken(token, ":").c_str());
+ if (size == 32)
+ ret = Pointer32;
+ else if (size == 64)
+ ret = Pointer64;
+ }
+ }
+
+ return ret;
+}
+
+/// getNamedValue - Return the first global value in the module with
+/// the specified name, of arbitrary type. This method returns null
+/// if a global with the specified name is not found.
+GlobalValue *Module::getNamedValue(const std::string &Name) const {
+ return cast_or_null<GlobalValue>(getValueSymbolTable().lookup(Name));
+}
+
+GlobalValue *Module::getNamedValue(const char *Name) const {
+ llvm::Value *V = getValueSymbolTable().lookup(Name, Name+strlen(Name));
+ return cast_or_null<GlobalValue>(V);
+}
+
+//===----------------------------------------------------------------------===//
+// Methods for easy access to the functions in the module.
+//
+
+// getOrInsertFunction - Look up the specified function in the module symbol
+// table. If it does not exist, add a prototype for the function and return
+// it. This is nice because it allows most passes to get away with not handling
+// the symbol table directly for this common task.
+//
+Constant *Module::getOrInsertFunction(const std::string &Name,
+ const FunctionType *Ty,
+ AttrListPtr AttributeList) {
+ // See if we have a definition for the specified function already.
+ GlobalValue *F = getNamedValue(Name);
+ if (F == 0) {
+ // Nope, add it
+ Function *New = Function::Create(Ty, GlobalVariable::ExternalLinkage, Name);
+ if (!New->isIntrinsic()) // Intrinsics get attrs set on construction
+ New->setAttributes(AttributeList);
+ FunctionList.push_back(New);
+ return New; // Return the new prototype.
+ }
+
+ // Okay, the function exists. Does it have externally visible linkage?
+ if (F->hasLocalLinkage()) {
+ // Clear the function's name.
+ F->setName("");
+ // Retry, now there won't be a conflict.
+ Constant *NewF = getOrInsertFunction(Name, Ty);
+ F->setName(&Name[0], Name.size());
+ return NewF;
+ }
+
+ // If the function exists but has the wrong type, return a bitcast to the
+ // right type.
+ if (F->getType() != PointerType::getUnqual(Ty))
+ return ConstantExpr::getBitCast(F, PointerType::getUnqual(Ty));
+
+ // Otherwise, we just found the existing function or a prototype.
+ return F;
+}
+
+Constant *Module::getOrInsertTargetIntrinsic(const std::string &Name,
+ const FunctionType *Ty,
+ AttrListPtr AttributeList) {
+ // See if we have a definition for the specified function already.
+ GlobalValue *F = getNamedValue(Name);
+ if (F == 0) {
+ // Nope, add it
+ Function *New = Function::Create(Ty, GlobalVariable::ExternalLinkage, Name);
+ New->setAttributes(AttributeList);
+ FunctionList.push_back(New);
+ return New; // Return the new prototype.
+ }
+
+ // Otherwise, we just found the existing function or a prototype.
+ return F;
+}
+
+Constant *Module::getOrInsertFunction(const std::string &Name,
+ const FunctionType *Ty) {
+ AttrListPtr AttributeList = AttrListPtr::get((AttributeWithIndex *)0, 0);
+ return getOrInsertFunction(Name, Ty, AttributeList);
+}
+
+// getOrInsertFunction - Look up the specified function in the module symbol
+// table. If it does not exist, add a prototype for the function and return it.
+// This version of the method takes a null terminated list of function
+// arguments, which makes it easier for clients to use.
+//
+Constant *Module::getOrInsertFunction(const std::string &Name,
+ AttrListPtr AttributeList,
+ const Type *RetTy, ...) {
+ va_list Args;
+ va_start(Args, RetTy);
+
+ // Build the list of argument types...
+ std::vector<const Type*> ArgTys;
+ while (const Type *ArgTy = va_arg(Args, const Type*))
+ ArgTys.push_back(ArgTy);
+
+ va_end(Args);
+
+ // Build the function type and chain to the other getOrInsertFunction...
+ return getOrInsertFunction(Name, FunctionType::get(RetTy, ArgTys, false),
+ AttributeList);
+}
+
+Constant *Module::getOrInsertFunction(const std::string &Name,
+ const Type *RetTy, ...) {
+ va_list Args;
+ va_start(Args, RetTy);
+
+ // Build the list of argument types...
+ std::vector<const Type*> ArgTys;
+ while (const Type *ArgTy = va_arg(Args, const Type*))
+ ArgTys.push_back(ArgTy);
+
+ va_end(Args);
+
+ // Build the function type and chain to the other getOrInsertFunction...
+ return getOrInsertFunction(Name, FunctionType::get(RetTy, ArgTys, false),
+ AttrListPtr::get((AttributeWithIndex *)0, 0));
+}
+
+// getFunction - Look up the specified function in the module symbol table.
+// If it does not exist, return null.
+//
+Function *Module::getFunction(const std::string &Name) const {
+ return dyn_cast_or_null<Function>(getNamedValue(Name));
+}
+
+Function *Module::getFunction(const char *Name) const {
+ return dyn_cast_or_null<Function>(getNamedValue(Name));
+}
+
+//===----------------------------------------------------------------------===//
+// Methods for easy access to the global variables in the module.
+//
+
+/// getGlobalVariable - Look up the specified global variable in the module
+/// symbol table. If it does not exist, return null. The type argument
+/// should be the underlying type of the global, i.e., it should not have
+/// the top-level PointerType, which represents the address of the global.
+/// If AllowLocal is set to true, this function will return types that
+/// have an local. By default, these types are not returned.
+///
+GlobalVariable *Module::getGlobalVariable(const std::string &Name,
+ bool AllowLocal) const {
+ if (GlobalVariable *Result =
+ dyn_cast_or_null<GlobalVariable>(getNamedValue(Name)))
+ if (AllowLocal || !Result->hasLocalLinkage())
+ return Result;
+ return 0;
+}
+
+/// getOrInsertGlobal - Look up the specified global in the module symbol table.
+/// 1. If it does not exist, add a declaration of the global and return it.
+/// 2. Else, the global exists but has the wrong type: return the function
+/// with a constantexpr cast to the right type.
+/// 3. Finally, if the existing global is the correct delclaration, return the
+/// existing global.
+Constant *Module::getOrInsertGlobal(const std::string &Name, const Type *Ty) {
+ // See if we have a definition for the specified global already.
+ GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(getNamedValue(Name));
+ if (GV == 0) {
+ // Nope, add it
+ GlobalVariable *New =
+ new GlobalVariable(Ty, false, GlobalVariable::ExternalLinkage, 0, Name);
+ GlobalList.push_back(New);
+ return New; // Return the new declaration.
+ }
+
+ // If the variable exists but has the wrong type, return a bitcast to the
+ // right type.
+ if (GV->getType() != PointerType::getUnqual(Ty))
+ return ConstantExpr::getBitCast(GV, PointerType::getUnqual(Ty));
+
+ // Otherwise, we just found the existing function or a prototype.
+ return GV;
+}
+
+//===----------------------------------------------------------------------===//
+// Methods for easy access to the global variables in the module.
+//
+
+// getNamedAlias - Look up the specified global in the module symbol table.
+// If it does not exist, return null.
+//
+GlobalAlias *Module::getNamedAlias(const std::string &Name) const {
+ return dyn_cast_or_null<GlobalAlias>(getNamedValue(Name));
+}
+
+//===----------------------------------------------------------------------===//
+// Methods for easy access to the types in the module.
+//
+
+
+// addTypeName - Insert an entry in the symbol table mapping Str to Type. If
+// there is already an entry for this name, true is returned and the symbol
+// table is not modified.
+//
+bool Module::addTypeName(const std::string &Name, const Type *Ty) {
+ TypeSymbolTable &ST = getTypeSymbolTable();
+
+ if (ST.lookup(Name)) return true; // Already in symtab...
+
+ // Not in symbol table? Set the name with the Symtab as an argument so the
+ // type knows what to update...
+ ST.insert(Name, Ty);
+
+ return false;
+}
+
+/// getTypeByName - Return the type with the specified name in this module, or
+/// null if there is none by that name.
+const Type *Module::getTypeByName(const std::string &Name) const {
+ const TypeSymbolTable &ST = getTypeSymbolTable();
+ return cast_or_null<Type>(ST.lookup(Name));
+}
+
+// getTypeName - If there is at least one entry in the symbol table for the
+// specified type, return it.
+//
+std::string Module::getTypeName(const Type *Ty) const {
+ const TypeSymbolTable &ST = getTypeSymbolTable();
+
+ TypeSymbolTable::const_iterator TI = ST.begin();
+ TypeSymbolTable::const_iterator TE = ST.end();
+ if ( TI == TE ) return ""; // No names for types
+
+ while (TI != TE && TI->second != Ty)
+ ++TI;
+
+ if (TI != TE) // Must have found an entry!
+ return TI->first;
+ return ""; // Must not have found anything...
+}
+
+//===----------------------------------------------------------------------===//
+// Other module related stuff.
+//
+
+
+// dropAllReferences() - This function causes all the subelementss to "let go"
+// of all references that they are maintaining. This allows one to 'delete' a
+// whole module at a time, even though there may be circular references... first
+// all references are dropped, and all use counts go to zero. Then everything
+// is deleted for real. Note that no operations are valid on an object that
+// has "dropped all references", except operator delete.
+//
+void Module::dropAllReferences() {
+ for(Module::iterator I = begin(), E = end(); I != E; ++I)
+ I->dropAllReferences();
+
+ for(Module::global_iterator I = global_begin(), E = global_end(); I != E; ++I)
+ I->dropAllReferences();
+
+ for(Module::alias_iterator I = alias_begin(), E = alias_end(); I != E; ++I)
+ I->dropAllReferences();
+}
+
+void Module::addLibrary(const std::string& Lib) {
+ for (Module::lib_iterator I = lib_begin(), E = lib_end(); I != E; ++I)
+ if (*I == Lib)
+ return;
+ LibraryList.push_back(Lib);
+}
+
+void Module::removeLibrary(const std::string& Lib) {
+ LibraryListType::iterator I = LibraryList.begin();
+ LibraryListType::iterator E = LibraryList.end();
+ for (;I != E; ++I)
+ if (*I == Lib) {
+ LibraryList.erase(I);
+ return;
+ }
+}
diff --git a/lib/VMCore/ModuleProvider.cpp b/lib/VMCore/ModuleProvider.cpp
new file mode 100644
index 0000000..cfff97c
--- /dev/null
+++ b/lib/VMCore/ModuleProvider.cpp
@@ -0,0 +1,26 @@
+//===-- ModuleProvider.cpp - Base implementation for module providers -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Minimal implementation of the abstract interface for providing a module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ModuleProvider.h"
+#include "llvm/Module.h"
+using namespace llvm;
+
+/// ctor - always have a valid Module
+///
+ModuleProvider::ModuleProvider() : TheModule(0) { }
+
+/// dtor - when we leave, we take our Module with us
+///
+ModuleProvider::~ModuleProvider() {
+ delete TheModule;
+}
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
new file mode 100644
index 0000000..6db5d7e
--- /dev/null
+++ b/lib/VMCore/Pass.cpp
@@ -0,0 +1,323 @@
+//===- Pass.cpp - LLVM Pass Infrastructure Implementation -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVM Pass infrastructure. It is primarily
+// responsible with ensuring that passes are executed and batched together
+// optimally.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Module.h"
+#include "llvm/ModuleProvider.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ManagedStatic.h"
+#include <algorithm>
+#include <map>
+#include <set>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Pass Implementation
+//
+
+// Force out-of-line virtual method.
+Pass::~Pass() {
+ delete Resolver;
+}
+
+// Force out-of-line virtual method.
+ModulePass::~ModulePass() { }
+
+bool Pass::mustPreserveAnalysisID(const PassInfo *AnalysisID) const {
+ return Resolver->getAnalysisIfAvailable(AnalysisID, true) != 0;
+}
+
+// dumpPassStructure - Implement the -debug-passes=Structure option
+void Pass::dumpPassStructure(unsigned Offset) {
+ cerr << std::string(Offset*2, ' ') << getPassName() << "\n";
+}
+
+/// getPassName - Return a nice clean name for a pass. This usually
+/// implemented in terms of the name that is registered by one of the
+/// Registration templates, but can be overloaded directly.
+///
+const char *Pass::getPassName() const {
+ if (const PassInfo *PI = getPassInfo())
+ return PI->getPassName();
+ return "Unnamed pass: implement Pass::getPassName()";
+}
+
+// print - Print out the internal state of the pass. This is called by Analyze
+// to print out the contents of an analysis. Otherwise it is not necessary to
+// implement this method.
+//
+void Pass::print(std::ostream &O,const Module*) const {
+ O << "Pass::print not implemented for pass: '" << getPassName() << "'!\n";
+}
+
+// dump - call print(cerr);
+void Pass::dump() const {
+ print(*cerr.stream(), 0);
+}
+
+//===----------------------------------------------------------------------===//
+// ImmutablePass Implementation
+//
+// Force out-of-line virtual method.
+ImmutablePass::~ImmutablePass() { }
+
+//===----------------------------------------------------------------------===//
+// FunctionPass Implementation
+//
+
+// run - On a module, we run this pass by initializing, runOnFunction'ing once
+// for every function in the module, then by finalizing.
+//
+bool FunctionPass::runOnModule(Module &M) {
+ bool Changed = doInitialization(M);
+
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!I->isDeclaration()) // Passes are not run on external functions!
+ Changed |= runOnFunction(*I);
+
+ return Changed | doFinalization(M);
+}
+
+// run - On a function, we simply initialize, run the function, then finalize.
+//
+bool FunctionPass::run(Function &F) {
+ // Passes are not run on external functions!
+ if (F.isDeclaration()) return false;
+
+ bool Changed = doInitialization(*F.getParent());
+ Changed |= runOnFunction(F);
+ return Changed | doFinalization(*F.getParent());
+}
+
+//===----------------------------------------------------------------------===//
+// BasicBlockPass Implementation
+//
+
+// To run this pass on a function, we simply call runOnBasicBlock once for each
+// function.
+//
+bool BasicBlockPass::runOnFunction(Function &F) {
+ bool Changed = doInitialization(F);
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ Changed |= runOnBasicBlock(*I);
+ return Changed | doFinalization(F);
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Registration mechanism
+//
+namespace {
+class PassRegistrar {
+ /// PassInfoMap - Keep track of the passinfo object for each registered llvm
+ /// pass.
+ typedef std::map<intptr_t, const PassInfo*> MapType;
+ MapType PassInfoMap;
+
+ /// AnalysisGroupInfo - Keep track of information for each analysis group.
+ struct AnalysisGroupInfo {
+ const PassInfo *DefaultImpl;
+ std::set<const PassInfo *> Implementations;
+ AnalysisGroupInfo() : DefaultImpl(0) {}
+ };
+
+ /// AnalysisGroupInfoMap - Information for each analysis group.
+ std::map<const PassInfo *, AnalysisGroupInfo> AnalysisGroupInfoMap;
+
+public:
+
+ const PassInfo *GetPassInfo(intptr_t TI) const {
+ MapType::const_iterator I = PassInfoMap.find(TI);
+ return I != PassInfoMap.end() ? I->second : 0;
+ }
+
+ void RegisterPass(const PassInfo &PI) {
+ bool Inserted =
+ PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second;
+ assert(Inserted && "Pass registered multiple times!"); Inserted=Inserted;
+ }
+
+ void UnregisterPass(const PassInfo &PI) {
+ MapType::iterator I = PassInfoMap.find(PI.getTypeInfo());
+ assert(I != PassInfoMap.end() && "Pass registered but not in map!");
+
+ // Remove pass from the map.
+ PassInfoMap.erase(I);
+ }
+
+ void EnumerateWith(PassRegistrationListener *L) {
+ for (MapType::const_iterator I = PassInfoMap.begin(),
+ E = PassInfoMap.end(); I != E; ++I)
+ L->passEnumerate(I->second);
+ }
+
+
+ /// Analysis Group Mechanisms.
+ void RegisterAnalysisGroup(PassInfo *InterfaceInfo,
+ const PassInfo *ImplementationInfo,
+ bool isDefault) {
+ AnalysisGroupInfo &AGI = AnalysisGroupInfoMap[InterfaceInfo];
+ assert(AGI.Implementations.count(ImplementationInfo) == 0 &&
+ "Cannot add a pass to the same analysis group more than once!");
+ AGI.Implementations.insert(ImplementationInfo);
+ if (isDefault) {
+ assert(AGI.DefaultImpl == 0 && InterfaceInfo->getNormalCtor() == 0 &&
+ "Default implementation for analysis group already specified!");
+ assert(ImplementationInfo->getNormalCtor() &&
+ "Cannot specify pass as default if it does not have a default ctor");
+ AGI.DefaultImpl = ImplementationInfo;
+ InterfaceInfo->setNormalCtor(ImplementationInfo->getNormalCtor());
+ }
+ }
+};
+}
+
+static std::vector<PassRegistrationListener*> *Listeners = 0;
+
+// FIXME: This should use ManagedStatic to manage the pass registrar.
+// Unfortunately, we can't do this, because passes are registered with static
+// ctors, and having llvm_shutdown clear this map prevents successful
+// ressurection after llvm_shutdown is run.
+static PassRegistrar *getPassRegistrar() {
+ static PassRegistrar *PassRegistrarObj = 0;
+ if (!PassRegistrarObj)
+ PassRegistrarObj = new PassRegistrar();
+ return PassRegistrarObj;
+}
+
+// getPassInfo - Return the PassInfo data structure that corresponds to this
+// pass...
+const PassInfo *Pass::getPassInfo() const {
+ return lookupPassInfo(PassID);
+}
+
+const PassInfo *Pass::lookupPassInfo(intptr_t TI) {
+ return getPassRegistrar()->GetPassInfo(TI);
+}
+
+void PassInfo::registerPass() {
+ getPassRegistrar()->RegisterPass(*this);
+
+ // Notify any listeners.
+ if (Listeners)
+ for (std::vector<PassRegistrationListener*>::iterator
+ I = Listeners->begin(), E = Listeners->end(); I != E; ++I)
+ (*I)->passRegistered(this);
+}
+
+void PassInfo::unregisterPass() {
+ getPassRegistrar()->UnregisterPass(*this);
+}
+
+//===----------------------------------------------------------------------===//
+// Analysis Group Implementation Code
+//===----------------------------------------------------------------------===//
+
+// RegisterAGBase implementation
+//
+RegisterAGBase::RegisterAGBase(const char *Name, intptr_t InterfaceID,
+ intptr_t PassID, bool isDefault)
+ : PassInfo(Name, InterfaceID),
+ ImplementationInfo(0), isDefaultImplementation(isDefault) {
+
+ InterfaceInfo = const_cast<PassInfo*>(Pass::lookupPassInfo(InterfaceID));
+ if (InterfaceInfo == 0) {
+ // First reference to Interface, register it now.
+ registerPass();
+ InterfaceInfo = this;
+ }
+ assert(isAnalysisGroup() &&
+ "Trying to join an analysis group that is a normal pass!");
+
+ if (PassID) {
+ ImplementationInfo = Pass::lookupPassInfo(PassID);
+ assert(ImplementationInfo &&
+ "Must register pass before adding to AnalysisGroup!");
+
+ // Make sure we keep track of the fact that the implementation implements
+ // the interface.
+ PassInfo *IIPI = const_cast<PassInfo*>(ImplementationInfo);
+ IIPI->addInterfaceImplemented(InterfaceInfo);
+
+ getPassRegistrar()->RegisterAnalysisGroup(InterfaceInfo, IIPI, isDefault);
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// PassRegistrationListener implementation
+//
+
+// PassRegistrationListener ctor - Add the current object to the list of
+// PassRegistrationListeners...
+PassRegistrationListener::PassRegistrationListener() {
+ if (!Listeners) Listeners = new std::vector<PassRegistrationListener*>();
+ Listeners->push_back(this);
+}
+
+// dtor - Remove object from list of listeners...
+PassRegistrationListener::~PassRegistrationListener() {
+ std::vector<PassRegistrationListener*>::iterator I =
+ std::find(Listeners->begin(), Listeners->end(), this);
+ assert(Listeners && I != Listeners->end() &&
+ "PassRegistrationListener not registered!");
+ Listeners->erase(I);
+
+ if (Listeners->empty()) {
+ delete Listeners;
+ Listeners = 0;
+ }
+}
+
+// enumeratePasses - Iterate over the registered passes, calling the
+// passEnumerate callback on each PassInfo object.
+//
+void PassRegistrationListener::enumeratePasses() {
+ getPassRegistrar()->EnumerateWith(this);
+}
+
+//===----------------------------------------------------------------------===//
+// AnalysisUsage Class Implementation
+//
+
+namespace {
+ struct GetCFGOnlyPasses : public PassRegistrationListener {
+ typedef AnalysisUsage::VectorType VectorType;
+ VectorType &CFGOnlyList;
+ GetCFGOnlyPasses(VectorType &L) : CFGOnlyList(L) {}
+
+ void passEnumerate(const PassInfo *P) {
+ if (P->isCFGOnlyPass())
+ CFGOnlyList.push_back(P);
+ }
+ };
+}
+
+// setPreservesCFG - This function should be called to by the pass, iff they do
+// not:
+//
+// 1. Add or remove basic blocks from the function
+// 2. Modify terminator instructions in any way.
+//
+// This function annotates the AnalysisUsage info object to say that analyses
+// that only depend on the CFG are preserved by this pass.
+//
+void AnalysisUsage::setPreservesCFG() {
+ // Since this transformation doesn't modify the CFG, it preserves all analyses
+ // that only depend on the CFG (like dominators, loop info, etc...)
+ GetCFGOnlyPasses(Preserved).enumeratePasses();
+}
+
+
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
new file mode 100644
index 0000000..4799915
--- /dev/null
+++ b/lib/VMCore/PassManager.cpp
@@ -0,0 +1,1710 @@
+//===- PassManager.cpp - LLVM Pass Infrastructure Implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVM Pass Manager infrastructure.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/PassManagers.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Module.h"
+#include "llvm/ModuleProvider.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm-c/Core.h"
+#include <algorithm>
+#include <cstdio>
+#include <map>
+using namespace llvm;
+
+// See PassManagers.h for Pass Manager infrastructure overview.
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// Pass debugging information. Often it is useful to find out what pass is
+// running when a crash occurs in a utility. When this library is compiled with
+// debugging on, a command line option (--debug-pass) is enabled that causes the
+// pass name to be printed before it executes.
+//
+
+// Different debug levels that can be enabled...
+enum PassDebugLevel {
+ None, Arguments, Structure, Executions, Details
+};
+
+// Always verify dominfo if expensive checking is enabled.
+#ifdef XDEBUG
+bool VerifyDomInfo = true;
+#else
+bool VerifyDomInfo = false;
+#endif
+static cl::opt<bool,true>
+VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo),
+ cl::desc("Verify dominator info (time consuming)"));
+
+static cl::opt<enum PassDebugLevel>
+PassDebugging("debug-pass", cl::Hidden,
+ cl::desc("Print PassManager debugging information"),
+ cl::values(
+ clEnumVal(None , "disable debug output"),
+ clEnumVal(Arguments , "print pass arguments to pass to 'opt'"),
+ clEnumVal(Structure , "print pass structure before run()"),
+ clEnumVal(Executions, "print pass name before it is executed"),
+ clEnumVal(Details , "print pass details when it is executed"),
+ clEnumValEnd));
+} // End of llvm namespace
+
+void PassManagerPrettyStackEntry::print(raw_ostream &OS) const {
+ if (V == 0 && M == 0)
+ OS << "Releasing pass '";
+ else
+ OS << "Running pass '";
+
+ OS << P->getPassName() << "'";
+
+ if (M) {
+ OS << " on module '" << M->getModuleIdentifier() << "'.\n";
+ return;
+ }
+ if (V == 0) {
+ OS << '\n';
+ return;
+ }
+
+ OS << " on ";
+ if (isa<Function>(V))
+ OS << "function";
+ else if (isa<BasicBlock>(V))
+ OS << "basic block";
+ else
+ OS << "value";
+
+ OS << " '";
+ WriteAsOperand(OS, V, /*PrintTy=*/false, M);
+ OS << "'\n";
+}
+
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// BBPassManager
+//
+/// BBPassManager manages BasicBlockPass. It batches all the
+/// pass together and sequence them to process one basic block before
+/// processing next basic block.
+class VISIBILITY_HIDDEN BBPassManager : public PMDataManager,
+ public FunctionPass {
+
+public:
+ static char ID;
+ explicit BBPassManager(int Depth)
+ : PMDataManager(Depth), FunctionPass(&ID) {}
+
+ /// Execute all of the passes scheduled for execution. Keep track of
+ /// whether any of the passes modifies the function, and if so, return true.
+ bool runOnFunction(Function &F);
+
+ /// Pass Manager itself does not invalidate any analysis info.
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.setPreservesAll();
+ }
+
+ bool doInitialization(Module &M);
+ bool doInitialization(Function &F);
+ bool doFinalization(Module &M);
+ bool doFinalization(Function &F);
+
+ virtual const char *getPassName() const {
+ return "BasicBlock Pass Manager";
+ }
+
+ // Print passes managed by this manager
+ void dumpPassStructure(unsigned Offset) {
+ llvm::cerr << std::string(Offset*2, ' ') << "BasicBlockPass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ BasicBlockPass *BP = getContainedPass(Index);
+ BP->dumpPassStructure(Offset + 1);
+ dumpLastUses(BP, Offset+1);
+ }
+ }
+
+ BasicBlockPass *getContainedPass(unsigned N) {
+ assert(N < PassVector.size() && "Pass number out of range!");
+ BasicBlockPass *BP = static_cast<BasicBlockPass *>(PassVector[N]);
+ return BP;
+ }
+
+ virtual PassManagerType getPassManagerType() const {
+ return PMT_BasicBlockPassManager;
+ }
+};
+
+char BBPassManager::ID = 0;
+}
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// FunctionPassManagerImpl
+//
+/// FunctionPassManagerImpl manages FPPassManagers
+class FunctionPassManagerImpl : public Pass,
+ public PMDataManager,
+ public PMTopLevelManager {
+public:
+ static char ID;
+ explicit FunctionPassManagerImpl(int Depth) :
+ Pass(&ID), PMDataManager(Depth),
+ PMTopLevelManager(TLM_Function) { }
+
+ /// add - Add a pass to the queue of passes to run. This passes ownership of
+ /// the Pass to the PassManager. When the PassManager is destroyed, the pass
+ /// will be destroyed as well, so there is no need to delete the pass. This
+ /// implies that all passes MUST be allocated with 'new'.
+ void add(Pass *P) {
+ schedulePass(P);
+ }
+
+ /// run - Execute all of the passes scheduled for execution. Keep track of
+ /// whether any of the passes modifies the module, and if so, return true.
+ bool run(Function &F);
+
+ /// doInitialization - Run all of the initializers for the function passes.
+ ///
+ bool doInitialization(Module &M);
+
+ /// doFinalization - Run all of the finalizers for the function passes.
+ ///
+ bool doFinalization(Module &M);
+
+ /// Pass Manager itself does not invalidate any analysis info.
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.setPreservesAll();
+ }
+
+ inline void addTopLevelPass(Pass *P) {
+
+ if (ImmutablePass *IP = dynamic_cast<ImmutablePass *> (P)) {
+
+ // P is a immutable pass and it will be managed by this
+ // top level manager. Set up analysis resolver to connect them.
+ AnalysisResolver *AR = new AnalysisResolver(*this);
+ P->setResolver(AR);
+ initializeAnalysisImpl(P);
+ addImmutablePass(IP);
+ recordAvailableAnalysis(IP);
+ } else {
+ P->assignPassManager(activeStack);
+ }
+
+ }
+
+ FPPassManager *getContainedManager(unsigned N) {
+ assert(N < PassManagers.size() && "Pass number out of range!");
+ FPPassManager *FP = static_cast<FPPassManager *>(PassManagers[N]);
+ return FP;
+ }
+};
+
+char FunctionPassManagerImpl::ID = 0;
+//===----------------------------------------------------------------------===//
+// MPPassManager
+//
+/// MPPassManager manages ModulePasses and function pass managers.
+/// It batches all Module passes and function pass managers together and
+/// sequences them to process one module.
+class MPPassManager : public Pass, public PMDataManager {
+public:
+ static char ID;
+ explicit MPPassManager(int Depth) :
+ Pass(&ID), PMDataManager(Depth) { }
+
+ // Delete on the fly managers.
+ virtual ~MPPassManager() {
+ for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
+ I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
+ I != E; ++I) {
+ FunctionPassManagerImpl *FPP = I->second;
+ delete FPP;
+ }
+ }
+
+ /// run - Execute all of the passes scheduled for execution. Keep track of
+ /// whether any of the passes modifies the module, and if so, return true.
+ bool runOnModule(Module &M);
+
+ /// Pass Manager itself does not invalidate any analysis info.
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.setPreservesAll();
+ }
+
+ /// Add RequiredPass into list of lower level passes required by pass P.
+ /// RequiredPass is run on the fly by Pass Manager when P requests it
+ /// through getAnalysis interface.
+ virtual void addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass);
+
+ /// Return function pass corresponding to PassInfo PI, that is
+ /// required by module pass MP. Instantiate analysis pass, by using
+ /// its runOnFunction() for function F.
+ virtual Pass* getOnTheFlyPass(Pass *MP, const PassInfo *PI, Function &F);
+
+ virtual const char *getPassName() const {
+ return "Module Pass Manager";
+ }
+
+ // Print passes managed by this manager
+ void dumpPassStructure(unsigned Offset) {
+ llvm::cerr << std::string(Offset*2, ' ') << "ModulePass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ ModulePass *MP = getContainedPass(Index);
+ MP->dumpPassStructure(Offset + 1);
+ if (FunctionPassManagerImpl *FPP = OnTheFlyManagers[MP])
+ FPP->dumpPassStructure(Offset + 2);
+ dumpLastUses(MP, Offset+1);
+ }
+ }
+
+ ModulePass *getContainedPass(unsigned N) {
+ assert(N < PassVector.size() && "Pass number out of range!");
+ return static_cast<ModulePass *>(PassVector[N]);
+ }
+
+ virtual PassManagerType getPassManagerType() const {
+ return PMT_ModulePassManager;
+ }
+
+ private:
+ /// Collection of on the fly FPPassManagers. These managers manage
+ /// function passes that are required by module passes.
+ std::map<Pass *, FunctionPassManagerImpl *> OnTheFlyManagers;
+};
+
+char MPPassManager::ID = 0;
+//===----------------------------------------------------------------------===//
+// PassManagerImpl
+//
+
+/// PassManagerImpl manages MPPassManagers
+class PassManagerImpl : public Pass,
+ public PMDataManager,
+ public PMTopLevelManager {
+
+public:
+ static char ID;
+ explicit PassManagerImpl(int Depth) :
+ Pass(&ID), PMDataManager(Depth), PMTopLevelManager(TLM_Pass) { }
+
+ /// add - Add a pass to the queue of passes to run. This passes ownership of
+ /// the Pass to the PassManager. When the PassManager is destroyed, the pass
+ /// will be destroyed as well, so there is no need to delete the pass. This
+ /// implies that all passes MUST be allocated with 'new'.
+ void add(Pass *P) {
+ schedulePass(P);
+ }
+
+ /// run - Execute all of the passes scheduled for execution. Keep track of
+ /// whether any of the passes modifies the module, and if so, return true.
+ bool run(Module &M);
+
+ /// Pass Manager itself does not invalidate any analysis info.
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.setPreservesAll();
+ }
+
+ inline void addTopLevelPass(Pass *P) {
+ if (ImmutablePass *IP = dynamic_cast<ImmutablePass *> (P)) {
+
+ // P is a immutable pass and it will be managed by this
+ // top level manager. Set up analysis resolver to connect them.
+ AnalysisResolver *AR = new AnalysisResolver(*this);
+ P->setResolver(AR);
+ initializeAnalysisImpl(P);
+ addImmutablePass(IP);
+ recordAvailableAnalysis(IP);
+ } else {
+ P->assignPassManager(activeStack);
+ }
+ }
+
+ MPPassManager *getContainedManager(unsigned N) {
+ assert(N < PassManagers.size() && "Pass number out of range!");
+ MPPassManager *MP = static_cast<MPPassManager *>(PassManagers[N]);
+ return MP;
+ }
+};
+
+char PassManagerImpl::ID = 0;
+} // End of llvm namespace
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+/// TimingInfo Class - This class is used to calculate information about the
+/// amount of time each pass takes to execute. This only happens when
+/// -time-passes is enabled on the command line.
+///
+class VISIBILITY_HIDDEN TimingInfo {
+ std::map<Pass*, Timer> TimingData;
+ TimerGroup TG;
+
+public:
+ // Use 'create' member to get this.
+ TimingInfo() : TG("... Pass execution timing report ...") {}
+
+ // TimingDtor - Print out information about timing information
+ ~TimingInfo() {
+ // Delete all of the timers...
+ TimingData.clear();
+ // TimerGroup is deleted next, printing the report.
+ }
+
+ // createTheTimeInfo - This method either initializes the TheTimeInfo pointer
+ // to a non null value (if the -time-passes option is enabled) or it leaves it
+ // null. It may be called multiple times.
+ static void createTheTimeInfo();
+
+ void passStarted(Pass *P) {
+ if (dynamic_cast<PMDataManager *>(P))
+ return;
+
+ std::map<Pass*, Timer>::iterator I = TimingData.find(P);
+ if (I == TimingData.end())
+ I=TimingData.insert(std::make_pair(P, Timer(P->getPassName(), TG))).first;
+ I->second.startTimer();
+ }
+ void passEnded(Pass *P) {
+ if (dynamic_cast<PMDataManager *>(P))
+ return;
+
+ std::map<Pass*, Timer>::iterator I = TimingData.find(P);
+ assert(I != TimingData.end() && "passStarted/passEnded not nested right!");
+ I->second.stopTimer();
+ }
+};
+
+} // End of anon namespace
+
+static TimingInfo *TheTimeInfo;
+
+//===----------------------------------------------------------------------===//
+// PMTopLevelManager implementation
+
+/// Initialize top level manager. Create first pass manager.
+PMTopLevelManager::PMTopLevelManager(enum TopLevelManagerType t) {
+ if (t == TLM_Pass) {
+ MPPassManager *MPP = new MPPassManager(1);
+ MPP->setTopLevelManager(this);
+ addPassManager(MPP);
+ activeStack.push(MPP);
+ } else if (t == TLM_Function) {
+ FPPassManager *FPP = new FPPassManager(1);
+ FPP->setTopLevelManager(this);
+ addPassManager(FPP);
+ activeStack.push(FPP);
+ }
+}
+
+/// Set pass P as the last user of the given analysis passes.
+void PMTopLevelManager::setLastUser(SmallVector<Pass *, 12> &AnalysisPasses,
+ Pass *P) {
+ for (SmallVector<Pass *, 12>::iterator I = AnalysisPasses.begin(),
+ E = AnalysisPasses.end(); I != E; ++I) {
+ Pass *AP = *I;
+ LastUser[AP] = P;
+
+ if (P == AP)
+ continue;
+
+ // If AP is the last user of other passes then make P last user of
+ // such passes.
+ for (DenseMap<Pass *, Pass *>::iterator LUI = LastUser.begin(),
+ LUE = LastUser.end(); LUI != LUE; ++LUI) {
+ if (LUI->second == AP)
+ // DenseMap iterator is not invalidated here because
+ // this is just updating exisitng entry.
+ LastUser[LUI->first] = P;
+ }
+ }
+}
+
+/// Collect passes whose last user is P
+void PMTopLevelManager::collectLastUses(SmallVector<Pass *, 12> &LastUses,
+ Pass *P) {
+ DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator DMI =
+ InversedLastUser.find(P);
+ if (DMI == InversedLastUser.end())
+ return;
+
+ SmallPtrSet<Pass *, 8> &LU = DMI->second;
+ for (SmallPtrSet<Pass *, 8>::iterator I = LU.begin(),
+ E = LU.end(); I != E; ++I) {
+ LastUses.push_back(*I);
+ }
+
+}
+
+AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) {
+ AnalysisUsage *AnUsage = NULL;
+ DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.find(P);
+ if (DMI != AnUsageMap.end())
+ AnUsage = DMI->second;
+ else {
+ AnUsage = new AnalysisUsage();
+ P->getAnalysisUsage(*AnUsage);
+ AnUsageMap[P] = AnUsage;
+ }
+ return AnUsage;
+}
+
+/// Schedule pass P for execution. Make sure that passes required by
+/// P are run before P is run. Update analysis info maintained by
+/// the manager. Remove dead passes. This is a recursive function.
+void PMTopLevelManager::schedulePass(Pass *P) {
+
+ // TODO : Allocate function manager for this pass, other wise required set
+ // may be inserted into previous function manager
+
+ // Give pass a chance to prepare the stage.
+ P->preparePassManager(activeStack);
+
+ // If P is an analysis pass and it is available then do not
+ // generate the analysis again. Stale analysis info should not be
+ // available at this point.
+ if (P->getPassInfo() &&
+ P->getPassInfo()->isAnalysis() && findAnalysisPass(P->getPassInfo())) {
+ delete P;
+ return;
+ }
+
+ AnalysisUsage *AnUsage = findAnalysisUsage(P);
+
+ bool checkAnalysis = true;
+ while (checkAnalysis) {
+ checkAnalysis = false;
+
+ const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
+ for (AnalysisUsage::VectorType::const_iterator I = RequiredSet.begin(),
+ E = RequiredSet.end(); I != E; ++I) {
+
+ Pass *AnalysisPass = findAnalysisPass(*I);
+ if (!AnalysisPass) {
+ AnalysisPass = (*I)->createPass();
+ if (P->getPotentialPassManagerType () ==
+ AnalysisPass->getPotentialPassManagerType())
+ // Schedule analysis pass that is managed by the same pass manager.
+ schedulePass(AnalysisPass);
+ else if (P->getPotentialPassManagerType () >
+ AnalysisPass->getPotentialPassManagerType()) {
+ // Schedule analysis pass that is managed by a new manager.
+ schedulePass(AnalysisPass);
+ // Recheck analysis passes to ensure that required analysises that
+ // are already checked are still available.
+ checkAnalysis = true;
+ }
+ else
+ // Do not schedule this analysis. Lower level analsyis
+ // passes are run on the fly.
+ delete AnalysisPass;
+ }
+ }
+ }
+
+ // Now all required passes are available.
+ addTopLevelPass(P);
+}
+
+/// Find the pass that implements Analysis AID. Search immutable
+/// passes and all pass managers. If desired pass is not found
+/// then return NULL.
+Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
+
+ Pass *P = NULL;
+ // Check pass managers
+ for (SmallVector<PMDataManager *, 8>::iterator I = PassManagers.begin(),
+ E = PassManagers.end(); P == NULL && I != E; ++I) {
+ PMDataManager *PMD = *I;
+ P = PMD->findAnalysisPass(AID, false);
+ }
+
+ // Check other pass managers
+ for (SmallVector<PMDataManager *, 8>::iterator
+ I = IndirectPassManagers.begin(),
+ E = IndirectPassManagers.end(); P == NULL && I != E; ++I)
+ P = (*I)->findAnalysisPass(AID, false);
+
+ for (SmallVector<ImmutablePass *, 8>::iterator I = ImmutablePasses.begin(),
+ E = ImmutablePasses.end(); P == NULL && I != E; ++I) {
+ const PassInfo *PI = (*I)->getPassInfo();
+ if (PI == AID)
+ P = *I;
+
+ // If Pass not found then check the interfaces implemented by Immutable Pass
+ if (!P) {
+ const std::vector<const PassInfo*> &ImmPI =
+ PI->getInterfacesImplemented();
+ if (std::find(ImmPI.begin(), ImmPI.end(), AID) != ImmPI.end())
+ P = *I;
+ }
+ }
+
+ return P;
+}
+
+// Print passes managed by this top level manager.
+void PMTopLevelManager::dumpPasses() const {
+
+ if (PassDebugging < Structure)
+ return;
+
+ // Print out the immutable passes
+ for (unsigned i = 0, e = ImmutablePasses.size(); i != e; ++i) {
+ ImmutablePasses[i]->dumpPassStructure(0);
+ }
+
+ // Every class that derives from PMDataManager also derives from Pass
+ // (sometimes indirectly), but there's no inheritance relationship
+ // between PMDataManager and Pass, so we have to dynamic_cast to get
+ // from a PMDataManager* to a Pass*.
+ for (SmallVector<PMDataManager *, 8>::const_iterator I = PassManagers.begin(),
+ E = PassManagers.end(); I != E; ++I)
+ dynamic_cast<Pass *>(*I)->dumpPassStructure(1);
+}
+
+void PMTopLevelManager::dumpArguments() const {
+
+ if (PassDebugging < Arguments)
+ return;
+
+ cerr << "Pass Arguments: ";
+ for (SmallVector<PMDataManager *, 8>::const_iterator I = PassManagers.begin(),
+ E = PassManagers.end(); I != E; ++I)
+ (*I)->dumpPassArguments();
+ cerr << "\n";
+}
+
+void PMTopLevelManager::initializeAllAnalysisInfo() {
+ for (SmallVector<PMDataManager *, 8>::iterator I = PassManagers.begin(),
+ E = PassManagers.end(); I != E; ++I)
+ (*I)->initializeAnalysisInfo();
+
+ // Initailize other pass managers
+ for (SmallVector<PMDataManager *, 8>::iterator I = IndirectPassManagers.begin(),
+ E = IndirectPassManagers.end(); I != E; ++I)
+ (*I)->initializeAnalysisInfo();
+
+ for (DenseMap<Pass *, Pass *>::iterator DMI = LastUser.begin(),
+ DME = LastUser.end(); DMI != DME; ++DMI) {
+ DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator InvDMI =
+ InversedLastUser.find(DMI->second);
+ if (InvDMI != InversedLastUser.end()) {
+ SmallPtrSet<Pass *, 8> &L = InvDMI->second;
+ L.insert(DMI->first);
+ } else {
+ SmallPtrSet<Pass *, 8> L; L.insert(DMI->first);
+ InversedLastUser[DMI->second] = L;
+ }
+ }
+}
+
+/// Destructor
+PMTopLevelManager::~PMTopLevelManager() {
+ for (SmallVector<PMDataManager *, 8>::iterator I = PassManagers.begin(),
+ E = PassManagers.end(); I != E; ++I)
+ delete *I;
+
+ for (SmallVector<ImmutablePass *, 8>::iterator
+ I = ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
+ delete *I;
+
+ for (DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.begin(),
+ DME = AnUsageMap.end(); DMI != DME; ++DMI)
+ delete DMI->second;
+}
+
+//===----------------------------------------------------------------------===//
+// PMDataManager implementation
+
+/// Augement AvailableAnalysis by adding analysis made available by pass P.
+void PMDataManager::recordAvailableAnalysis(Pass *P) {
+ const PassInfo *PI = P->getPassInfo();
+ if (PI == 0) return;
+
+ AvailableAnalysis[PI] = P;
+
+ //This pass is the current implementation of all of the interfaces it
+ //implements as well.
+ const std::vector<const PassInfo*> &II = PI->getInterfacesImplemented();
+ for (unsigned i = 0, e = II.size(); i != e; ++i)
+ AvailableAnalysis[II[i]] = P;
+}
+
+// Return true if P preserves high level analysis used by other
+// passes managed by this manager
+bool PMDataManager::preserveHigherLevelAnalysis(Pass *P) {
+ AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+ if (AnUsage->getPreservesAll())
+ return true;
+
+ const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
+ for (SmallVector<Pass *, 8>::iterator I = HigherLevelAnalysis.begin(),
+ E = HigherLevelAnalysis.end(); I != E; ++I) {
+ Pass *P1 = *I;
+ if (!dynamic_cast<ImmutablePass*>(P1) &&
+ std::find(PreservedSet.begin(), PreservedSet.end(),
+ P1->getPassInfo()) ==
+ PreservedSet.end())
+ return false;
+ }
+
+ return true;
+}
+
+/// verifyPreservedAnalysis -- Verify analysis preserved by pass P.
+void PMDataManager::verifyPreservedAnalysis(Pass *P) {
+ // Don't do this unless assertions are enabled.
+#ifdef NDEBUG
+ return;
+#endif
+ AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+ const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
+
+ // Verify preserved analysis
+ for (AnalysisUsage::VectorType::const_iterator I = PreservedSet.begin(),
+ E = PreservedSet.end(); I != E; ++I) {
+ AnalysisID AID = *I;
+ if (Pass *AP = findAnalysisPass(AID, true))
+ AP->verifyAnalysis();
+ }
+}
+
+/// verifyDomInfo - Verify dominator information if it is available.
+void PMDataManager::verifyDomInfo(Pass &P, Function &F) {
+ if (!VerifyDomInfo || !P.getResolver())
+ return;
+
+ DominatorTree *DT = P.getAnalysisIfAvailable<DominatorTree>();
+ if (!DT)
+ return;
+
+ DominatorTree OtherDT;
+ OtherDT.getBase().recalculate(F);
+ if (DT->compare(OtherDT)) {
+ cerr << "Dominator Information for " << F.getNameStart() << "\n";
+ cerr << "Pass '" << P.getPassName() << "'\n";
+ cerr << "----- Valid -----\n";
+ OtherDT.dump();
+ cerr << "----- Invalid -----\n";
+ DT->dump();
+ assert(0 && "Invalid dominator info");
+ }
+
+ DominanceFrontier *DF = P.getAnalysisIfAvailable<DominanceFrontier>();
+ if (!DF)
+ return;
+
+ DominanceFrontier OtherDF;
+ std::vector<BasicBlock*> DTRoots = DT->getRoots();
+ OtherDF.calculate(*DT, DT->getNode(DTRoots[0]));
+ if (DF->compare(OtherDF)) {
+ cerr << "Dominator Information for " << F.getNameStart() << "\n";
+ cerr << "Pass '" << P.getPassName() << "'\n";
+ cerr << "----- Valid -----\n";
+ OtherDF.dump();
+ cerr << "----- Invalid -----\n";
+ DF->dump();
+ assert(0 && "Invalid dominator info");
+ }
+}
+
+/// Remove Analysis not preserved by Pass P
+void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
+ AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+ if (AnUsage->getPreservesAll())
+ return;
+
+ const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
+ for (std::map<AnalysisID, Pass*>::iterator I = AvailableAnalysis.begin(),
+ E = AvailableAnalysis.end(); I != E; ) {
+ std::map<AnalysisID, Pass*>::iterator Info = I++;
+ if (!dynamic_cast<ImmutablePass*>(Info->second)
+ && std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
+ PreservedSet.end()) {
+ // Remove this analysis
+ if (PassDebugging >= Details) {
+ Pass *S = Info->second;
+ cerr << " -- '" << P->getPassName() << "' is not preserving '";
+ cerr << S->getPassName() << "'\n";
+ }
+ AvailableAnalysis.erase(Info);
+ }
+ }
+
+ // Check inherited analysis also. If P is not preserving analysis
+ // provided by parent manager then remove it here.
+ for (unsigned Index = 0; Index < PMT_Last; ++Index) {
+
+ if (!InheritedAnalysis[Index])
+ continue;
+
+ for (std::map<AnalysisID, Pass*>::iterator
+ I = InheritedAnalysis[Index]->begin(),
+ E = InheritedAnalysis[Index]->end(); I != E; ) {
+ std::map<AnalysisID, Pass *>::iterator Info = I++;
+ if (!dynamic_cast<ImmutablePass*>(Info->second) &&
+ std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
+ PreservedSet.end())
+ // Remove this analysis
+ InheritedAnalysis[Index]->erase(Info);
+ }
+ }
+}
+
+/// Remove analysis passes that are not used any longer
+void PMDataManager::removeDeadPasses(Pass *P, const char *Msg,
+ enum PassDebuggingString DBG_STR) {
+
+ SmallVector<Pass *, 12> DeadPasses;
+
+ // If this is a on the fly manager then it does not have TPM.
+ if (!TPM)
+ return;
+
+ TPM->collectLastUses(DeadPasses, P);
+
+ if (PassDebugging >= Details && !DeadPasses.empty()) {
+ cerr << " -*- '" << P->getPassName();
+ cerr << "' is the last user of following pass instances.";
+ cerr << " Free these instances\n";
+ }
+
+ for (SmallVector<Pass *, 12>::iterator I = DeadPasses.begin(),
+ E = DeadPasses.end(); I != E; ++I) {
+
+ dumpPassInfo(*I, FREEING_MSG, DBG_STR, Msg);
+
+ {
+ // If the pass crashes releasing memory, remember this.
+ PassManagerPrettyStackEntry X(*I);
+
+ if (TheTimeInfo) TheTimeInfo->passStarted(*I);
+ (*I)->releaseMemory();
+ if (TheTimeInfo) TheTimeInfo->passEnded(*I);
+ }
+ if (const PassInfo *PI = (*I)->getPassInfo()) {
+ std::map<AnalysisID, Pass*>::iterator Pos =
+ AvailableAnalysis.find(PI);
+
+ // It is possible that pass is already removed from the AvailableAnalysis
+ if (Pos != AvailableAnalysis.end())
+ AvailableAnalysis.erase(Pos);
+
+ // Remove all interfaces this pass implements, for which it is also
+ // listed as the available implementation.
+ const std::vector<const PassInfo*> &II = PI->getInterfacesImplemented();
+ for (unsigned i = 0, e = II.size(); i != e; ++i) {
+ Pos = AvailableAnalysis.find(II[i]);
+ if (Pos != AvailableAnalysis.end() && Pos->second == *I)
+ AvailableAnalysis.erase(Pos);
+ }
+ }
+ }
+}
+
+/// Add pass P into the PassVector. Update
+/// AvailableAnalysis appropriately if ProcessAnalysis is true.
+void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
+ // This manager is going to manage pass P. Set up analysis resolver
+ // to connect them.
+ AnalysisResolver *AR = new AnalysisResolver(*this);
+ P->setResolver(AR);
+
+ // If a FunctionPass F is the last user of ModulePass info M
+ // then the F's manager, not F, records itself as a last user of M.
+ SmallVector<Pass *, 12> TransferLastUses;
+
+ if (!ProcessAnalysis) {
+ // Add pass
+ PassVector.push_back(P);
+ return;
+ }
+
+ // At the moment, this pass is the last user of all required passes.
+ SmallVector<Pass *, 12> LastUses;
+ SmallVector<Pass *, 8> RequiredPasses;
+ SmallVector<AnalysisID, 8> ReqAnalysisNotAvailable;
+
+ unsigned PDepth = this->getDepth();
+
+ collectRequiredAnalysis(RequiredPasses,
+ ReqAnalysisNotAvailable, P);
+ for (SmallVector<Pass *, 8>::iterator I = RequiredPasses.begin(),
+ E = RequiredPasses.end(); I != E; ++I) {
+ Pass *PRequired = *I;
+ unsigned RDepth = 0;
+
+ assert(PRequired->getResolver() && "Analysis Resolver is not set");
+ PMDataManager &DM = PRequired->getResolver()->getPMDataManager();
+ RDepth = DM.getDepth();
+
+ if (PDepth == RDepth)
+ LastUses.push_back(PRequired);
+ else if (PDepth > RDepth) {
+ // Let the parent claim responsibility of last use
+ TransferLastUses.push_back(PRequired);
+ // Keep track of higher level analysis used by this manager.
+ HigherLevelAnalysis.push_back(PRequired);
+ } else
+ assert(0 && "Unable to accomodate Required Pass");
+ }
+
+ // Set P as P's last user until someone starts using P.
+ // However, if P is a Pass Manager then it does not need
+ // to record its last user.
+ if (!dynamic_cast<PMDataManager *>(P))
+ LastUses.push_back(P);
+ TPM->setLastUser(LastUses, P);
+
+ if (!TransferLastUses.empty()) {
+ Pass *My_PM = dynamic_cast<Pass *>(this);
+ TPM->setLastUser(TransferLastUses, My_PM);
+ TransferLastUses.clear();
+ }
+
+ // Now, take care of required analysises that are not available.
+ for (SmallVector<AnalysisID, 8>::iterator
+ I = ReqAnalysisNotAvailable.begin(),
+ E = ReqAnalysisNotAvailable.end() ;I != E; ++I) {
+ Pass *AnalysisPass = (*I)->createPass();
+ this->addLowerLevelRequiredPass(P, AnalysisPass);
+ }
+
+ // Take a note of analysis required and made available by this pass.
+ // Remove the analysis not preserved by this pass
+ removeNotPreservedAnalysis(P);
+ recordAvailableAnalysis(P);
+
+ // Add pass
+ PassVector.push_back(P);
+}
+
+
+/// Populate RP with analysis pass that are required by
+/// pass P and are available. Populate RP_NotAvail with analysis
+/// pass that are required by pass P but are not available.
+void PMDataManager::collectRequiredAnalysis(SmallVector<Pass *, 8>&RP,
+ SmallVector<AnalysisID, 8> &RP_NotAvail,
+ Pass *P) {
+ AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+ const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
+ for (AnalysisUsage::VectorType::const_iterator
+ I = RequiredSet.begin(), E = RequiredSet.end(); I != E; ++I) {
+ if (Pass *AnalysisPass = findAnalysisPass(*I, true))
+ RP.push_back(AnalysisPass);
+ else
+ RP_NotAvail.push_back(*I);
+ }
+
+ const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet();
+ for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(),
+ E = IDs.end(); I != E; ++I) {
+ if (Pass *AnalysisPass = findAnalysisPass(*I, true))
+ RP.push_back(AnalysisPass);
+ else
+ RP_NotAvail.push_back(*I);
+ }
+}
+
+// All Required analyses should be available to the pass as it runs! Here
+// we fill in the AnalysisImpls member of the pass so that it can
+// successfully use the getAnalysis() method to retrieve the
+// implementations it needs.
+//
+void PMDataManager::initializeAnalysisImpl(Pass *P) {
+ AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+
+ for (AnalysisUsage::VectorType::const_iterator
+ I = AnUsage->getRequiredSet().begin(),
+ E = AnUsage->getRequiredSet().end(); I != E; ++I) {
+ Pass *Impl = findAnalysisPass(*I, true);
+ if (Impl == 0)
+ // This may be analysis pass that is initialized on the fly.
+ // If that is not the case then it will raise an assert when it is used.
+ continue;
+ AnalysisResolver *AR = P->getResolver();
+ assert(AR && "Analysis Resolver is not set");
+ AR->addAnalysisImplsPair(*I, Impl);
+ }
+}
+
+/// Find the pass that implements Analysis AID. If desired pass is not found
+/// then return NULL.
+Pass *PMDataManager::findAnalysisPass(AnalysisID AID, bool SearchParent) {
+
+ // Check if AvailableAnalysis map has one entry.
+ std::map<AnalysisID, Pass*>::const_iterator I = AvailableAnalysis.find(AID);
+
+ if (I != AvailableAnalysis.end())
+ return I->second;
+
+ // Search Parents through TopLevelManager
+ if (SearchParent)
+ return TPM->findAnalysisPass(AID);
+
+ return NULL;
+}
+
+// Print list of passes that are last used by P.
+void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{
+
+ SmallVector<Pass *, 12> LUses;
+
+ // If this is a on the fly manager then it does not have TPM.
+ if (!TPM)
+ return;
+
+ TPM->collectLastUses(LUses, P);
+
+ for (SmallVector<Pass *, 12>::iterator I = LUses.begin(),
+ E = LUses.end(); I != E; ++I) {
+ llvm::cerr << "--" << std::string(Offset*2, ' ');
+ (*I)->dumpPassStructure(0);
+ }
+}
+
+void PMDataManager::dumpPassArguments() const {
+ for (SmallVector<Pass *, 8>::const_iterator I = PassVector.begin(),
+ E = PassVector.end(); I != E; ++I) {
+ if (PMDataManager *PMD = dynamic_cast<PMDataManager *>(*I))
+ PMD->dumpPassArguments();
+ else
+ if (const PassInfo *PI = (*I)->getPassInfo())
+ if (!PI->isAnalysisGroup())
+ cerr << " -" << PI->getPassArgument();
+ }
+}
+
+void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1,
+ enum PassDebuggingString S2,
+ const char *Msg) {
+ if (PassDebugging < Executions)
+ return;
+ cerr << (void*)this << std::string(getDepth()*2+1, ' ');
+ switch (S1) {
+ case EXECUTION_MSG:
+ cerr << "Executing Pass '" << P->getPassName();
+ break;
+ case MODIFICATION_MSG:
+ cerr << "Made Modification '" << P->getPassName();
+ break;
+ case FREEING_MSG:
+ cerr << " Freeing Pass '" << P->getPassName();
+ break;
+ default:
+ break;
+ }
+ switch (S2) {
+ case ON_BASICBLOCK_MSG:
+ cerr << "' on BasicBlock '" << Msg << "'...\n";
+ break;
+ case ON_FUNCTION_MSG:
+ cerr << "' on Function '" << Msg << "'...\n";
+ break;
+ case ON_MODULE_MSG:
+ cerr << "' on Module '" << Msg << "'...\n";
+ break;
+ case ON_LOOP_MSG:
+ cerr << "' on Loop " << Msg << "'...\n";
+ break;
+ case ON_CG_MSG:
+ cerr << "' on Call Graph " << Msg << "'...\n";
+ break;
+ default:
+ break;
+ }
+}
+
+void PMDataManager::dumpRequiredSet(const Pass *P) const {
+ if (PassDebugging < Details)
+ return;
+
+ AnalysisUsage analysisUsage;
+ P->getAnalysisUsage(analysisUsage);
+ dumpAnalysisUsage("Required", P, analysisUsage.getRequiredSet());
+}
+
+void PMDataManager::dumpPreservedSet(const Pass *P) const {
+ if (PassDebugging < Details)
+ return;
+
+ AnalysisUsage analysisUsage;
+ P->getAnalysisUsage(analysisUsage);
+ dumpAnalysisUsage("Preserved", P, analysisUsage.getPreservedSet());
+}
+
+void PMDataManager::dumpAnalysisUsage(const char *Msg, const Pass *P,
+ const AnalysisUsage::VectorType &Set) const {
+ assert(PassDebugging >= Details);
+ if (Set.empty())
+ return;
+ cerr << (void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:";
+ for (unsigned i = 0; i != Set.size(); ++i) {
+ if (i) cerr << ",";
+ cerr << " " << Set[i]->getPassName();
+ }
+ cerr << "\n";
+}
+
+/// Add RequiredPass into list of lower level passes required by pass P.
+/// RequiredPass is run on the fly by Pass Manager when P requests it
+/// through getAnalysis interface.
+/// This should be handled by specific pass manager.
+void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
+ if (TPM) {
+ TPM->dumpArguments();
+ TPM->dumpPasses();
+ }
+
+ // Module Level pass may required Function Level analysis info
+ // (e.g. dominator info). Pass manager uses on the fly function pass manager
+ // to provide this on demand. In that case, in Pass manager terminology,
+ // module level pass is requiring lower level analysis info managed by
+ // lower level pass manager.
+
+ // When Pass manager is not able to order required analysis info, Pass manager
+ // checks whether any lower level manager will be able to provide this
+ // analysis info on demand or not.
+#ifndef NDEBUG
+ cerr << "Unable to schedule '" << RequiredPass->getPassName();
+ cerr << "' required by '" << P->getPassName() << "'\n";
+#endif
+ assert(0 && "Unable to schedule pass");
+}
+
+// Destructor
+PMDataManager::~PMDataManager() {
+ for (SmallVector<Pass *, 8>::iterator I = PassVector.begin(),
+ E = PassVector.end(); I != E; ++I)
+ delete *I;
+}
+
+//===----------------------------------------------------------------------===//
+// NOTE: Is this the right place to define this method ?
+// getAnalysisIfAvailable - Return analysis result or null if it doesn't exist.
+Pass *AnalysisResolver::getAnalysisIfAvailable(AnalysisID ID, bool dir) const {
+ return PM.findAnalysisPass(ID, dir);
+}
+
+Pass *AnalysisResolver::findImplPass(Pass *P, const PassInfo *AnalysisPI,
+ Function &F) {
+ return PM.getOnTheFlyPass(P, AnalysisPI, F);
+}
+
+//===----------------------------------------------------------------------===//
+// BBPassManager implementation
+
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnBasicBlock method. Keep track of whether any of the passes modifies
+/// the function, and if so, return true.
+bool BBPassManager::runOnFunction(Function &F) {
+ if (F.isDeclaration())
+ return false;
+
+ bool Changed = doInitialization(F);
+
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ BasicBlockPass *BP = getContainedPass(Index);
+
+ dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, I->getNameStart());
+ dumpRequiredSet(BP);
+
+ initializeAnalysisImpl(BP);
+
+ {
+ // If the pass crashes, remember this.
+ PassManagerPrettyStackEntry X(BP, *I);
+
+ if (TheTimeInfo) TheTimeInfo->passStarted(BP);
+ Changed |= BP->runOnBasicBlock(*I);
+ if (TheTimeInfo) TheTimeInfo->passEnded(BP);
+ }
+
+ if (Changed)
+ dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG,
+ I->getNameStart());
+ dumpPreservedSet(BP);
+
+ verifyPreservedAnalysis(BP);
+ removeNotPreservedAnalysis(BP);
+ recordAvailableAnalysis(BP);
+ removeDeadPasses(BP, I->getNameStart(), ON_BASICBLOCK_MSG);
+ }
+
+ return Changed |= doFinalization(F);
+}
+
+// Implement doInitialization and doFinalization
+bool BBPassManager::doInitialization(Module &M) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+ Changed |= getContainedPass(Index)->doInitialization(M);
+
+ return Changed;
+}
+
+bool BBPassManager::doFinalization(Module &M) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+ Changed |= getContainedPass(Index)->doFinalization(M);
+
+ return Changed;
+}
+
+bool BBPassManager::doInitialization(Function &F) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ BasicBlockPass *BP = getContainedPass(Index);
+ Changed |= BP->doInitialization(F);
+ }
+
+ return Changed;
+}
+
+bool BBPassManager::doFinalization(Function &F) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ BasicBlockPass *BP = getContainedPass(Index);
+ Changed |= BP->doFinalization(F);
+ }
+
+ return Changed;
+}
+
+
+//===----------------------------------------------------------------------===//
+// FunctionPassManager implementation
+
+/// Create new Function pass manager
+FunctionPassManager::FunctionPassManager(ModuleProvider *P) {
+ FPM = new FunctionPassManagerImpl(0);
+ // FPM is the top level manager.
+ FPM->setTopLevelManager(FPM);
+
+ AnalysisResolver *AR = new AnalysisResolver(*FPM);
+ FPM->setResolver(AR);
+
+ MP = P;
+}
+
+FunctionPassManager::~FunctionPassManager() {
+ delete FPM;
+}
+
+/// add - Add a pass to the queue of passes to run. This passes
+/// ownership of the Pass to the PassManager. When the
+/// PassManager_X is destroyed, the pass will be destroyed as well, so
+/// there is no need to delete the pass. (TODO delete passes.)
+/// This implies that all passes MUST be allocated with 'new'.
+void FunctionPassManager::add(Pass *P) {
+ FPM->add(P);
+}
+
+/// run - Execute all of the passes scheduled for execution. Keep
+/// track of whether any of the passes modifies the function, and if
+/// so, return true.
+///
+bool FunctionPassManager::run(Function &F) {
+ std::string errstr;
+ if (MP->materializeFunction(&F, &errstr)) {
+ cerr << "Error reading bitcode file: " << errstr << "\n";
+ abort();
+ }
+ return FPM->run(F);
+}
+
+
+/// doInitialization - Run all of the initializers for the function passes.
+///
+bool FunctionPassManager::doInitialization() {
+ return FPM->doInitialization(*MP->getModule());
+}
+
+/// doFinalization - Run all of the finalizers for the function passes.
+///
+bool FunctionPassManager::doFinalization() {
+ return FPM->doFinalization(*MP->getModule());
+}
+
+//===----------------------------------------------------------------------===//
+// FunctionPassManagerImpl implementation
+//
+bool FunctionPassManagerImpl::doInitialization(Module &M) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+ Changed |= getContainedManager(Index)->doInitialization(M);
+
+ return Changed;
+}
+
+bool FunctionPassManagerImpl::doFinalization(Module &M) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+ Changed |= getContainedManager(Index)->doFinalization(M);
+
+ return Changed;
+}
+
+/// cleanup - After running all passes, clean up pass manager cache.
+void FPPassManager::cleanup() {
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ FunctionPass *FP = getContainedPass(Index);
+ AnalysisResolver *AR = FP->getResolver();
+ assert(AR && "Analysis Resolver is not set");
+ AR->clearAnalysisImpls();
+ }
+}
+
+// Execute all the passes managed by this top level manager.
+// Return true if any function is modified by a pass.
+bool FunctionPassManagerImpl::run(Function &F) {
+ bool Changed = false;
+ TimingInfo::createTheTimeInfo();
+
+ dumpArguments();
+ dumpPasses();
+
+ initializeAllAnalysisInfo();
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+ Changed |= getContainedManager(Index)->runOnFunction(F);
+
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+ getContainedManager(Index)->cleanup();
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// FPPassManager implementation
+
+char FPPassManager::ID = 0;
+/// Print passes managed by this manager
+void FPPassManager::dumpPassStructure(unsigned Offset) {
+ llvm::cerr << std::string(Offset*2, ' ') << "FunctionPass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ FunctionPass *FP = getContainedPass(Index);
+ FP->dumpPassStructure(Offset + 1);
+ dumpLastUses(FP, Offset+1);
+ }
+}
+
+
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnFunction method. Keep track of whether any of the passes modifies
+/// the function, and if so, return true.
+bool FPPassManager::runOnFunction(Function &F) {
+ if (F.isDeclaration())
+ return false;
+
+ bool Changed = false;
+
+ // Collect inherited analysis from Module level pass manager.
+ populateInheritedAnalysis(TPM->activeStack);
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ FunctionPass *FP = getContainedPass(Index);
+
+ dumpPassInfo(FP, EXECUTION_MSG, ON_FUNCTION_MSG, F.getNameStart());
+ dumpRequiredSet(FP);
+
+ initializeAnalysisImpl(FP);
+
+ {
+ PassManagerPrettyStackEntry X(FP, F);
+
+ if (TheTimeInfo) TheTimeInfo->passStarted(FP);
+ Changed |= FP->runOnFunction(F);
+ if (TheTimeInfo) TheTimeInfo->passEnded(FP);
+ }
+
+ if (Changed)
+ dumpPassInfo(FP, MODIFICATION_MSG, ON_FUNCTION_MSG, F.getNameStart());
+ dumpPreservedSet(FP);
+
+ verifyPreservedAnalysis(FP);
+ removeNotPreservedAnalysis(FP);
+ recordAvailableAnalysis(FP);
+ removeDeadPasses(FP, F.getNameStart(), ON_FUNCTION_MSG);
+
+ // If dominator information is available then verify the info if requested.
+ verifyDomInfo(*FP, F);
+ }
+ return Changed;
+}
+
+bool FPPassManager::runOnModule(Module &M) {
+ bool Changed = doInitialization(M);
+
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ runOnFunction(*I);
+
+ return Changed |= doFinalization(M);
+}
+
+bool FPPassManager::doInitialization(Module &M) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+ Changed |= getContainedPass(Index)->doInitialization(M);
+
+ return Changed;
+}
+
+bool FPPassManager::doFinalization(Module &M) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+ Changed |= getContainedPass(Index)->doFinalization(M);
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// MPPassManager implementation
+
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnModule method. Keep track of whether any of the passes modifies
+/// the module, and if so, return true.
+bool
+MPPassManager::runOnModule(Module &M) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ ModulePass *MP = getContainedPass(Index);
+
+ dumpPassInfo(MP, EXECUTION_MSG, ON_MODULE_MSG,
+ M.getModuleIdentifier().c_str());
+ dumpRequiredSet(MP);
+
+ initializeAnalysisImpl(MP);
+
+ {
+ PassManagerPrettyStackEntry X(MP, M);
+ if (TheTimeInfo) TheTimeInfo->passStarted(MP);
+ Changed |= MP->runOnModule(M);
+ if (TheTimeInfo) TheTimeInfo->passEnded(MP);
+ }
+
+ if (Changed)
+ dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG,
+ M.getModuleIdentifier().c_str());
+ dumpPreservedSet(MP);
+
+ verifyPreservedAnalysis(MP);
+ removeNotPreservedAnalysis(MP);
+ recordAvailableAnalysis(MP);
+ removeDeadPasses(MP, M.getModuleIdentifier().c_str(), ON_MODULE_MSG);
+ }
+ return Changed;
+}
+
+/// Add RequiredPass into list of lower level passes required by pass P.
+/// RequiredPass is run on the fly by Pass Manager when P requests it
+/// through getAnalysis interface.
+void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
+ assert(P->getPotentialPassManagerType() == PMT_ModulePassManager &&
+ "Unable to handle Pass that requires lower level Analysis pass");
+ assert((P->getPotentialPassManagerType() <
+ RequiredPass->getPotentialPassManagerType()) &&
+ "Unable to handle Pass that requires lower level Analysis pass");
+
+ FunctionPassManagerImpl *FPP = OnTheFlyManagers[P];
+ if (!FPP) {
+ FPP = new FunctionPassManagerImpl(0);
+ // FPP is the top level manager.
+ FPP->setTopLevelManager(FPP);
+
+ OnTheFlyManagers[P] = FPP;
+ }
+ FPP->add(RequiredPass);
+
+ // Register P as the last user of RequiredPass.
+ SmallVector<Pass *, 12> LU;
+ LU.push_back(RequiredPass);
+ FPP->setLastUser(LU, P);
+}
+
+/// Return function pass corresponding to PassInfo PI, that is
+/// required by module pass MP. Instantiate analysis pass, by using
+/// its runOnFunction() for function F.
+Pass* MPPassManager::getOnTheFlyPass(Pass *MP, const PassInfo *PI, Function &F){
+ FunctionPassManagerImpl *FPP = OnTheFlyManagers[MP];
+ assert(FPP && "Unable to find on the fly pass");
+
+ FPP->run(F);
+ return (dynamic_cast<PMTopLevelManager *>(FPP))->findAnalysisPass(PI);
+}
+
+
+//===----------------------------------------------------------------------===//
+// PassManagerImpl implementation
+//
+/// run - Execute all of the passes scheduled for execution. Keep track of
+/// whether any of the passes modifies the module, and if so, return true.
+bool PassManagerImpl::run(Module &M) {
+ bool Changed = false;
+ TimingInfo::createTheTimeInfo();
+
+ dumpArguments();
+ dumpPasses();
+
+ initializeAllAnalysisInfo();
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+ Changed |= getContainedManager(Index)->runOnModule(M);
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// PassManager implementation
+
+/// Create new pass manager
+PassManager::PassManager() {
+ PM = new PassManagerImpl(0);
+ // PM is the top level manager
+ PM->setTopLevelManager(PM);
+}
+
+PassManager::~PassManager() {
+ delete PM;
+}
+
+/// add - Add a pass to the queue of passes to run. This passes ownership of
+/// the Pass to the PassManager. When the PassManager is destroyed, the pass
+/// will be destroyed as well, so there is no need to delete the pass. This
+/// implies that all passes MUST be allocated with 'new'.
+void PassManager::add(Pass *P) {
+ PM->add(P);
+}
+
+/// run - Execute all of the passes scheduled for execution. Keep track of
+/// whether any of the passes modifies the module, and if so, return true.
+bool PassManager::run(Module &M) {
+ return PM->run(M);
+}
+
+//===----------------------------------------------------------------------===//
+// TimingInfo Class - This class is used to calculate information about the
+// amount of time each pass takes to execute. This only happens with
+// -time-passes is enabled on the command line.
+//
+bool llvm::TimePassesIsEnabled = false;
+static cl::opt<bool,true>
+EnableTiming("time-passes", cl::location(TimePassesIsEnabled),
+ cl::desc("Time each pass, printing elapsed time for each on exit"));
+
+// createTheTimeInfo - This method either initializes the TheTimeInfo pointer to
+// a non null value (if the -time-passes option is enabled) or it leaves it
+// null. It may be called multiple times.
+void TimingInfo::createTheTimeInfo() {
+ if (!TimePassesIsEnabled || TheTimeInfo) return;
+
+ // Constructed the first time this is called, iff -time-passes is enabled.
+ // This guarantees that the object will be constructed before static globals,
+ // thus it will be destroyed before them.
+ static ManagedStatic<TimingInfo> TTI;
+ TheTimeInfo = &*TTI;
+}
+
+/// If TimingInfo is enabled then start pass timer.
+void StartPassTimer(Pass *P) {
+ if (TheTimeInfo)
+ TheTimeInfo->passStarted(P);
+}
+
+/// If TimingInfo is enabled then stop pass timer.
+void StopPassTimer(Pass *P) {
+ if (TheTimeInfo)
+ TheTimeInfo->passEnded(P);
+}
+
+//===----------------------------------------------------------------------===//
+// PMStack implementation
+//
+
+// Pop Pass Manager from the stack and clear its analysis info.
+void PMStack::pop() {
+
+ PMDataManager *Top = this->top();
+ Top->initializeAnalysisInfo();
+
+ S.pop_back();
+}
+
+// Push PM on the stack and set its top level manager.
+void PMStack::push(PMDataManager *PM) {
+ assert(PM && "Unable to push. Pass Manager expected");
+
+ if (!this->empty()) {
+ PMTopLevelManager *TPM = this->top()->getTopLevelManager();
+
+ assert(TPM && "Unable to find top level manager");
+ TPM->addIndirectPassManager(PM);
+ PM->setTopLevelManager(TPM);
+ }
+
+ S.push_back(PM);
+}
+
+// Dump content of the pass manager stack.
+void PMStack::dump() {
+ for (std::deque<PMDataManager *>::iterator I = S.begin(),
+ E = S.end(); I != E; ++I)
+ printf("%s ", dynamic_cast<Pass *>(*I)->getPassName());
+
+ if (!S.empty())
+ printf("\n");
+}
+
+/// Find appropriate Module Pass Manager in the PM Stack and
+/// add self into that manager.
+void ModulePass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+ // Find Module Pass Manager
+ while(!PMS.empty()) {
+ PassManagerType TopPMType = PMS.top()->getPassManagerType();
+ if (TopPMType == PreferredType)
+ break; // We found desired pass manager
+ else if (TopPMType > PMT_ModulePassManager)
+ PMS.pop(); // Pop children pass managers
+ else
+ break;
+ }
+ assert(!PMS.empty() && "Unable to find appropriate Pass Manager");
+ PMS.top()->add(this);
+}
+
+/// Find appropriate Function Pass Manager or Call Graph Pass Manager
+/// in the PM Stack and add self into that manager.
+void FunctionPass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+
+ // Find Module Pass Manager
+ while(!PMS.empty()) {
+ if (PMS.top()->getPassManagerType() > PMT_FunctionPassManager)
+ PMS.pop();
+ else
+ break;
+ }
+ FPPassManager *FPP = dynamic_cast<FPPassManager *>(PMS.top());
+
+ // Create new Function Pass Manager
+ if (!FPP) {
+ assert(!PMS.empty() && "Unable to create Function Pass Manager");
+ PMDataManager *PMD = PMS.top();
+
+ // [1] Create new Function Pass Manager
+ FPP = new FPPassManager(PMD->getDepth() + 1);
+ FPP->populateInheritedAnalysis(PMS);
+
+ // [2] Set up new manager's top level manager
+ PMTopLevelManager *TPM = PMD->getTopLevelManager();
+ TPM->addIndirectPassManager(FPP);
+
+ // [3] Assign manager to manage this new manager. This may create
+ // and push new managers into PMS
+ FPP->assignPassManager(PMS, PMD->getPassManagerType());
+
+ // [4] Push new manager into PMS
+ PMS.push(FPP);
+ }
+
+ // Assign FPP as the manager of this pass.
+ FPP->add(this);
+}
+
+/// Find appropriate Basic Pass Manager or Call Graph Pass Manager
+/// in the PM Stack and add self into that manager.
+void BasicBlockPass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+ BBPassManager *BBP = NULL;
+
+ // Basic Pass Manager is a leaf pass manager. It does not handle
+ // any other pass manager.
+ if (!PMS.empty())
+ BBP = dynamic_cast<BBPassManager *>(PMS.top());
+
+ // If leaf manager is not Basic Block Pass manager then create new
+ // basic Block Pass manager.
+
+ if (!BBP) {
+ assert(!PMS.empty() && "Unable to create BasicBlock Pass Manager");
+ PMDataManager *PMD = PMS.top();
+
+ // [1] Create new Basic Block Manager
+ BBP = new BBPassManager(PMD->getDepth() + 1);
+
+ // [2] Set up new manager's top level manager
+ // Basic Block Pass Manager does not live by itself
+ PMTopLevelManager *TPM = PMD->getTopLevelManager();
+ TPM->addIndirectPassManager(BBP);
+
+ // [3] Assign manager to manage this new manager. This may create
+ // and push new managers into PMS
+ BBP->assignPassManager(PMS);
+
+ // [4] Push new manager into PMS
+ PMS.push(BBP);
+ }
+
+ // Assign BBP as the manager of this pass.
+ BBP->add(this);
+}
+
+PassManagerBase::~PassManagerBase() {}
+
+/*===-- C Bindings --------------------------------------------------------===*/
+
+LLVMPassManagerRef LLVMCreatePassManager() {
+ return wrap(new PassManager());
+}
+
+LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef P) {
+ return wrap(new FunctionPassManager(unwrap(P)));
+}
+
+int LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) {
+ return unwrap<PassManager>(PM)->run(*unwrap(M));
+}
+
+int LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM) {
+ return unwrap<FunctionPassManager>(FPM)->doInitialization();
+}
+
+int LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F) {
+ return unwrap<FunctionPassManager>(FPM)->run(*unwrap<Function>(F));
+}
+
+int LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM) {
+ return unwrap<FunctionPassManager>(FPM)->doFinalization();
+}
+
+void LLVMDisposePassManager(LLVMPassManagerRef PM) {
+ delete unwrap(PM);
+}
diff --git a/lib/VMCore/PrintModulePass.cpp b/lib/VMCore/PrintModulePass.cpp
new file mode 100644
index 0000000..0a7f449
--- /dev/null
+++ b/lib/VMCore/PrintModulePass.cpp
@@ -0,0 +1,99 @@
+//===--- VMCore/PrintModulePass.cpp - Module/Function Printer -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// PrintModulePass and PrintFunctionPass implementations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Assembly/PrintModulePass.h"
+
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+ class VISIBILITY_HIDDEN PrintModulePass : public ModulePass {
+ raw_ostream *Out; // raw_ostream to print on
+ bool DeleteStream; // Delete the ostream in our dtor?
+ public:
+ static char ID;
+ PrintModulePass() : ModulePass(&ID), Out(&errs()),
+ DeleteStream(false) {}
+ PrintModulePass(raw_ostream *o, bool DS)
+ : ModulePass(&ID), Out(o), DeleteStream(DS) {}
+
+ ~PrintModulePass() {
+ if (DeleteStream) delete Out;
+ }
+
+ bool runOnModule(Module &M) {
+ (*Out) << M;
+ return false;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+
+ class PrintFunctionPass : public FunctionPass {
+ std::string Banner; // String to print before each function
+ raw_ostream *Out; // raw_ostream to print on
+ bool DeleteStream; // Delete the ostream in our dtor?
+ public:
+ static char ID;
+ PrintFunctionPass() : FunctionPass(&ID), Banner(""), Out(&errs()),
+ DeleteStream(false) {}
+ PrintFunctionPass(const std::string &B, raw_ostream *o, bool DS)
+ : FunctionPass(&ID), Banner(B), Out(o), DeleteStream(DS) {}
+
+ inline ~PrintFunctionPass() {
+ if (DeleteStream) delete Out;
+ }
+
+ // runOnFunction - This pass just prints a banner followed by the
+ // function as it's processed.
+ //
+ bool runOnFunction(Function &F) {
+ (*Out) << Banner << static_cast<Value&>(F);
+ return false;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char PrintModulePass::ID = 0;
+static RegisterPass<PrintModulePass>
+X("print-module", "Print module to stderr");
+char PrintFunctionPass::ID = 0;
+static RegisterPass<PrintFunctionPass>
+Y("print-function","Print function to stderr");
+
+/// createPrintModulePass - Create and return a pass that writes the
+/// module to the specified raw_ostream.
+ModulePass *llvm::createPrintModulePass(llvm::raw_ostream *OS,
+ bool DeleteStream) {
+ return new PrintModulePass(OS, DeleteStream);
+}
+
+/// createPrintFunctionPass - Create and return a pass that prints
+/// functions to the specified raw_ostream as they are processed.
+FunctionPass *llvm::createPrintFunctionPass(const std::string &Banner,
+ llvm::raw_ostream *OS,
+ bool DeleteStream) {
+ return new PrintFunctionPass(Banner, OS, DeleteStream);
+}
+
diff --git a/lib/VMCore/SymbolTableListTraitsImpl.h b/lib/VMCore/SymbolTableListTraitsImpl.h
new file mode 100644
index 0000000..72687bb
--- /dev/null
+++ b/lib/VMCore/SymbolTableListTraitsImpl.h
@@ -0,0 +1,118 @@
+//===-- llvm/SymbolTableListTraitsImpl.h - Implementation ------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the stickier parts of the SymbolTableListTraits class,
+// and is explicitly instantiated where needed to avoid defining all this code
+// in a widely used header.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYMBOLTABLELISTTRAITS_IMPL_H
+#define LLVM_SYMBOLTABLELISTTRAITS_IMPL_H
+
+#include "llvm/SymbolTableListTraits.h"
+#include "llvm/ValueSymbolTable.h"
+
+namespace llvm {
+
+/// setSymTabObject - This is called when (f.e.) the parent of a basic block
+/// changes. This requires us to remove all the instruction symtab entries from
+/// the current function and reinsert them into the new function.
+template<typename ValueSubClass, typename ItemParentClass>
+template<typename TPtr>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::setSymTabObject(TPtr *Dest, TPtr Src) {
+ // Get the old symtab and value list before doing the assignment.
+ ValueSymbolTable *OldST = TraitsClass::getSymTab(getListOwner());
+
+ // Do it.
+ *Dest = Src;
+
+ // Get the new SymTab object.
+ ValueSymbolTable *NewST = TraitsClass::getSymTab(getListOwner());
+
+ // If there is nothing to do, quick exit.
+ if (OldST == NewST) return;
+
+ // Move all the elements from the old symtab to the new one.
+ iplist<ValueSubClass> &ItemList = TraitsClass::getList(getListOwner());
+ if (ItemList.empty()) return;
+
+ if (OldST) {
+ // Remove all entries from the previous symtab.
+ for (typename iplist<ValueSubClass>::iterator I = ItemList.begin();
+ I != ItemList.end(); ++I)
+ if (I->hasName())
+ OldST->removeValueName(I->getValueName());
+ }
+
+ if (NewST) {
+ // Add all of the items to the new symtab.
+ for (typename iplist<ValueSubClass>::iterator I = ItemList.begin();
+ I != ItemList.end(); ++I)
+ if (I->hasName())
+ NewST->reinsertValue(I);
+ }
+
+}
+
+template<typename ValueSubClass, typename ItemParentClass>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::addNodeToList(ValueSubClass *V) {
+ assert(V->getParent() == 0 && "Value already in a container!!");
+ ItemParentClass *Owner = getListOwner();
+ V->setParent(Owner);
+ if (V->hasName())
+ if (ValueSymbolTable *ST = TraitsClass::getSymTab(Owner))
+ ST->reinsertValue(V);
+}
+
+template<typename ValueSubClass, typename ItemParentClass>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::removeNodeFromList(ValueSubClass *V) {
+ V->setParent(0);
+ if (V->hasName())
+ if (ValueSymbolTable *ST = TraitsClass::getSymTab(getListOwner()))
+ ST->removeValueName(V->getValueName());
+}
+
+template<typename ValueSubClass, typename ItemParentClass>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::transferNodesFromList(ilist_traits<ValueSubClass> &L2,
+ ilist_iterator<ValueSubClass> first,
+ ilist_iterator<ValueSubClass> last) {
+ // We only have to do work here if transferring instructions between BBs
+ ItemParentClass *NewIP = getListOwner(), *OldIP = L2.getListOwner();
+ if (NewIP == OldIP) return; // No work to do at all...
+
+ // We only have to update symbol table entries if we are transferring the
+ // instructions to a different symtab object...
+ ValueSymbolTable *NewST = TraitsClass::getSymTab(NewIP);
+ ValueSymbolTable *OldST = TraitsClass::getSymTab(OldIP);
+ if (NewST != OldST) {
+ for (; first != last; ++first) {
+ ValueSubClass &V = *first;
+ bool HasName = V.hasName();
+ if (OldST && HasName)
+ OldST->removeValueName(V.getValueName());
+ V.setParent(NewIP);
+ if (NewST && HasName)
+ NewST->reinsertValue(&V);
+ }
+ } else {
+ // Just transferring between blocks in the same function, simply update the
+ // parent fields in the instructions...
+ for (; first != last; ++first)
+ first->setParent(NewIP);
+ }
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
new file mode 100644
index 0000000..11b93ff
--- /dev/null
+++ b/lib/VMCore/Type.cpp
@@ -0,0 +1,1457 @@
+//===-- Type.cpp - Implement the Type class -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Type class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DerivedTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstdarg>
+using namespace llvm;
+
+// DEBUG_MERGE_TYPES - Enable this #define to see how and when derived types are
+// created and later destroyed, all in an effort to make sure that there is only
+// a single canonical version of a type.
+//
+// #define DEBUG_MERGE_TYPES 1
+
+AbstractTypeUser::~AbstractTypeUser() {}
+
+
+//===----------------------------------------------------------------------===//
+// Type Class Implementation
+//===----------------------------------------------------------------------===//
+
+// Concrete/Abstract TypeDescriptions - We lazily calculate type descriptions
+// for types as they are needed. Because resolution of types must invalidate
+// all of the abstract type descriptions, we keep them in a seperate map to make
+// this easy.
+static ManagedStatic<TypePrinting> ConcreteTypeDescriptions;
+static ManagedStatic<TypePrinting> AbstractTypeDescriptions;
+
+/// Because of the way Type subclasses are allocated, this function is necessary
+/// to use the correct kind of "delete" operator to deallocate the Type object.
+/// Some type objects (FunctionTy, StructTy) allocate additional space after
+/// the space for their derived type to hold the contained types array of
+/// PATypeHandles. Using this allocation scheme means all the PATypeHandles are
+/// allocated with the type object, decreasing allocations and eliminating the
+/// need for a std::vector to be used in the Type class itself.
+/// @brief Type destruction function
+void Type::destroy() const {
+
+ // Structures and Functions allocate their contained types past the end of
+ // the type object itself. These need to be destroyed differently than the
+ // other types.
+ if (isa<FunctionType>(this) || isa<StructType>(this)) {
+ // First, make sure we destruct any PATypeHandles allocated by these
+ // subclasses. They must be manually destructed.
+ for (unsigned i = 0; i < NumContainedTys; ++i)
+ ContainedTys[i].PATypeHandle::~PATypeHandle();
+
+ // Now call the destructor for the subclass directly because we're going
+ // to delete this as an array of char.
+ if (isa<FunctionType>(this))
+ static_cast<const FunctionType*>(this)->FunctionType::~FunctionType();
+ else
+ static_cast<const StructType*>(this)->StructType::~StructType();
+
+ // Finally, remove the memory as an array deallocation of the chars it was
+ // constructed from.
+ operator delete(const_cast<Type *>(this));
+
+ return;
+ }
+
+ // For all the other type subclasses, there is either no contained types or
+ // just one (all Sequentials). For Sequentials, the PATypeHandle is not
+ // allocated past the type object, its included directly in the SequentialType
+ // class. This means we can safely just do "normal" delete of this object and
+ // all the destructors that need to run will be run.
+ delete this;
+}
+
+const Type *Type::getPrimitiveType(TypeID IDNumber) {
+ switch (IDNumber) {
+ case VoidTyID : return VoidTy;
+ case FloatTyID : return FloatTy;
+ case DoubleTyID : return DoubleTy;
+ case X86_FP80TyID : return X86_FP80Ty;
+ case FP128TyID : return FP128Ty;
+ case PPC_FP128TyID : return PPC_FP128Ty;
+ case LabelTyID : return LabelTy;
+ case MetadataTyID : return MetadataTy;
+ default:
+ return 0;
+ }
+}
+
+const Type *Type::getVAArgsPromotedType() const {
+ if (ID == IntegerTyID && getSubclassData() < 32)
+ return Type::Int32Ty;
+ else if (ID == FloatTyID)
+ return Type::DoubleTy;
+ else
+ return this;
+}
+
+/// isIntOrIntVector - Return true if this is an integer type or a vector of
+/// integer types.
+///
+bool Type::isIntOrIntVector() const {
+ if (isInteger())
+ return true;
+ if (ID != Type::VectorTyID) return false;
+
+ return cast<VectorType>(this)->getElementType()->isInteger();
+}
+
+/// isFPOrFPVector - Return true if this is a FP type or a vector of FP types.
+///
+bool Type::isFPOrFPVector() const {
+ if (ID == Type::FloatTyID || ID == Type::DoubleTyID ||
+ ID == Type::FP128TyID || ID == Type::X86_FP80TyID ||
+ ID == Type::PPC_FP128TyID)
+ return true;
+ if (ID != Type::VectorTyID) return false;
+
+ return cast<VectorType>(this)->getElementType()->isFloatingPoint();
+}
+
+// canLosslesllyBitCastTo - Return true if this type can be converted to
+// 'Ty' without any reinterpretation of bits. For example, uint to int.
+//
+bool Type::canLosslesslyBitCastTo(const Type *Ty) const {
+ // Identity cast means no change so return true
+ if (this == Ty)
+ return true;
+
+ // They are not convertible unless they are at least first class types
+ if (!this->isFirstClassType() || !Ty->isFirstClassType())
+ return false;
+
+ // Vector -> Vector conversions are always lossless if the two vector types
+ // have the same size, otherwise not.
+ if (const VectorType *thisPTy = dyn_cast<VectorType>(this))
+ if (const VectorType *thatPTy = dyn_cast<VectorType>(Ty))
+ return thisPTy->getBitWidth() == thatPTy->getBitWidth();
+
+ // At this point we have only various mismatches of the first class types
+ // remaining and ptr->ptr. Just select the lossless conversions. Everything
+ // else is not lossless.
+ if (isa<PointerType>(this))
+ return isa<PointerType>(Ty);
+ return false; // Other types have no identity values
+}
+
+unsigned Type::getPrimitiveSizeInBits() const {
+ switch (getTypeID()) {
+ case Type::FloatTyID: return 32;
+ case Type::DoubleTyID: return 64;
+ case Type::X86_FP80TyID: return 80;
+ case Type::FP128TyID: return 128;
+ case Type::PPC_FP128TyID: return 128;
+ case Type::IntegerTyID: return cast<IntegerType>(this)->getBitWidth();
+ case Type::VectorTyID: return cast<VectorType>(this)->getBitWidth();
+ default: return 0;
+ }
+}
+
+/// isSizedDerivedType - Derived types like structures and arrays are sized
+/// iff all of the members of the type are sized as well. Since asking for
+/// their size is relatively uncommon, move this operation out of line.
+bool Type::isSizedDerivedType() const {
+ if (isa<IntegerType>(this))
+ return true;
+
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(this))
+ return ATy->getElementType()->isSized();
+
+ if (const VectorType *PTy = dyn_cast<VectorType>(this))
+ return PTy->getElementType()->isSized();
+
+ if (!isa<StructType>(this))
+ return false;
+
+ // Okay, our struct is sized if all of the elements are...
+ for (subtype_iterator I = subtype_begin(), E = subtype_end(); I != E; ++I)
+ if (!(*I)->isSized())
+ return false;
+
+ return true;
+}
+
+/// getForwardedTypeInternal - This method is used to implement the union-find
+/// algorithm for when a type is being forwarded to another type.
+const Type *Type::getForwardedTypeInternal() const {
+ assert(ForwardType && "This type is not being forwarded to another type!");
+
+ // Check to see if the forwarded type has been forwarded on. If so, collapse
+ // the forwarding links.
+ const Type *RealForwardedType = ForwardType->getForwardedType();
+ if (!RealForwardedType)
+ return ForwardType; // No it's not forwarded again
+
+ // Yes, it is forwarded again. First thing, add the reference to the new
+ // forward type.
+ if (RealForwardedType->isAbstract())
+ cast<DerivedType>(RealForwardedType)->addRef();
+
+ // Now drop the old reference. This could cause ForwardType to get deleted.
+ cast<DerivedType>(ForwardType)->dropRef();
+
+ // Return the updated type.
+ ForwardType = RealForwardedType;
+ return ForwardType;
+}
+
+void Type::refineAbstractType(const DerivedType *OldTy, const Type *NewTy) {
+ abort();
+}
+void Type::typeBecameConcrete(const DerivedType *AbsTy) {
+ abort();
+}
+
+
+std::string Type::getDescription() const {
+ TypePrinting &Map =
+ isAbstract() ? *AbstractTypeDescriptions : *ConcreteTypeDescriptions;
+
+ std::string DescStr;
+ raw_string_ostream DescOS(DescStr);
+ Map.print(this, DescOS);
+ return DescOS.str();
+}
+
+
+bool StructType::indexValid(const Value *V) const {
+ // Structure indexes require 32-bit integer constants.
+ if (V->getType() == Type::Int32Ty)
+ if (const ConstantInt *CU = dyn_cast<ConstantInt>(V))
+ return indexValid(CU->getZExtValue());
+ return false;
+}
+
+bool StructType::indexValid(unsigned V) const {
+ return V < NumContainedTys;
+}
+
+// getTypeAtIndex - Given an index value into the type, return the type of the
+// element. For a structure type, this must be a constant value...
+//
+const Type *StructType::getTypeAtIndex(const Value *V) const {
+ unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue();
+ return getTypeAtIndex(Idx);
+}
+
+const Type *StructType::getTypeAtIndex(unsigned Idx) const {
+ assert(indexValid(Idx) && "Invalid structure index!");
+ return ContainedTys[Idx];
+}
+
+//===----------------------------------------------------------------------===//
+// Primitive 'Type' data
+//===----------------------------------------------------------------------===//
+
+const Type *Type::VoidTy = new Type(Type::VoidTyID);
+const Type *Type::FloatTy = new Type(Type::FloatTyID);
+const Type *Type::DoubleTy = new Type(Type::DoubleTyID);
+const Type *Type::X86_FP80Ty = new Type(Type::X86_FP80TyID);
+const Type *Type::FP128Ty = new Type(Type::FP128TyID);
+const Type *Type::PPC_FP128Ty = new Type(Type::PPC_FP128TyID);
+const Type *Type::LabelTy = new Type(Type::LabelTyID);
+const Type *Type::MetadataTy = new Type(Type::MetadataTyID);
+
+namespace {
+ struct BuiltinIntegerType : public IntegerType {
+ explicit BuiltinIntegerType(unsigned W) : IntegerType(W) {}
+ };
+}
+const IntegerType *Type::Int1Ty = new BuiltinIntegerType(1);
+const IntegerType *Type::Int8Ty = new BuiltinIntegerType(8);
+const IntegerType *Type::Int16Ty = new BuiltinIntegerType(16);
+const IntegerType *Type::Int32Ty = new BuiltinIntegerType(32);
+const IntegerType *Type::Int64Ty = new BuiltinIntegerType(64);
+
+//===----------------------------------------------------------------------===//
+// Derived Type Constructors
+//===----------------------------------------------------------------------===//
+
+/// isValidReturnType - Return true if the specified type is valid as a return
+/// type.
+bool FunctionType::isValidReturnType(const Type *RetTy) {
+ if (RetTy->isFirstClassType()) {
+ if (const PointerType *PTy = dyn_cast<PointerType>(RetTy))
+ return PTy->getElementType() != Type::MetadataTy;
+ return true;
+ }
+ if (RetTy == Type::VoidTy || RetTy == Type::MetadataTy ||
+ isa<OpaqueType>(RetTy))
+ return true;
+
+ // If this is a multiple return case, verify that each return is a first class
+ // value and that there is at least one value.
+ const StructType *SRetTy = dyn_cast<StructType>(RetTy);
+ if (SRetTy == 0 || SRetTy->getNumElements() == 0)
+ return false;
+
+ for (unsigned i = 0, e = SRetTy->getNumElements(); i != e; ++i)
+ if (!SRetTy->getElementType(i)->isFirstClassType())
+ return false;
+ return true;
+}
+
+FunctionType::FunctionType(const Type *Result,
+ const std::vector<const Type*> &Params,
+ bool IsVarArgs)
+ : DerivedType(FunctionTyID), isVarArgs(IsVarArgs) {
+ ContainedTys = reinterpret_cast<PATypeHandle*>(this+1);
+ NumContainedTys = Params.size() + 1; // + 1 for result type
+ assert(isValidReturnType(Result) && "invalid return type for function");
+
+
+ bool isAbstract = Result->isAbstract();
+ new (&ContainedTys[0]) PATypeHandle(Result, this);
+
+ for (unsigned i = 0; i != Params.size(); ++i) {
+ assert((Params[i]->isFirstClassType() || isa<OpaqueType>(Params[i])) &&
+ "Function arguments must be value types!");
+ assert((!isa<PointerType>(Params[i]) ||
+ cast<PointerType>(Params[i])->getElementType() != Type::MetadataTy)
+ && "Attempt to use metadata* as function argument type!");
+ new (&ContainedTys[i+1]) PATypeHandle(Params[i], this);
+ isAbstract |= Params[i]->isAbstract();
+ }
+
+ // Calculate whether or not this type is abstract
+ setAbstract(isAbstract);
+}
+
+StructType::StructType(const std::vector<const Type*> &Types, bool isPacked)
+ : CompositeType(StructTyID) {
+ ContainedTys = reinterpret_cast<PATypeHandle*>(this + 1);
+ NumContainedTys = Types.size();
+ setSubclassData(isPacked);
+ bool isAbstract = false;
+ for (unsigned i = 0; i < Types.size(); ++i) {
+ assert(Types[i] && "<null> type for structure field!");
+ assert(Types[i] != Type::VoidTy && "Void type for structure field!");
+ assert(Types[i] != Type::LabelTy && "Label type for structure field!");
+ assert(Types[i] != Type::MetadataTy && "Metadata type for structure field");
+ assert((!isa<PointerType>(Types[i]) ||
+ cast<PointerType>(Types[i])->getElementType() != Type::MetadataTy)
+ && "Type 'metadata*' is invalid for structure field.");
+ new (&ContainedTys[i]) PATypeHandle(Types[i], this);
+ isAbstract |= Types[i]->isAbstract();
+ }
+
+ // Calculate whether or not this type is abstract
+ setAbstract(isAbstract);
+}
+
+ArrayType::ArrayType(const Type *ElType, uint64_t NumEl)
+ : SequentialType(ArrayTyID, ElType) {
+ NumElements = NumEl;
+
+ // Calculate whether or not this type is abstract
+ setAbstract(ElType->isAbstract());
+}
+
+VectorType::VectorType(const Type *ElType, unsigned NumEl)
+ : SequentialType(VectorTyID, ElType) {
+ NumElements = NumEl;
+ setAbstract(ElType->isAbstract());
+ assert(NumEl > 0 && "NumEl of a VectorType must be greater than 0");
+ assert((ElType->isInteger() || ElType->isFloatingPoint() ||
+ isa<OpaqueType>(ElType)) &&
+ "Elements of a VectorType must be a primitive type");
+
+}
+
+
+PointerType::PointerType(const Type *E, unsigned AddrSpace)
+ : SequentialType(PointerTyID, E) {
+ AddressSpace = AddrSpace;
+ // Calculate whether or not this type is abstract
+ setAbstract(E->isAbstract());
+}
+
+OpaqueType::OpaqueType() : DerivedType(OpaqueTyID) {
+ setAbstract(true);
+#ifdef DEBUG_MERGE_TYPES
+ DOUT << "Derived new type: " << *this << "\n";
+#endif
+}
+
+void PATypeHolder::destroy() {
+ Ty = 0;
+}
+
+// dropAllTypeUses - When this (abstract) type is resolved to be equal to
+// another (more concrete) type, we must eliminate all references to other
+// types, to avoid some circular reference problems.
+void DerivedType::dropAllTypeUses() {
+ if (NumContainedTys != 0) {
+ // The type must stay abstract. To do this, we insert a pointer to a type
+ // that will never get resolved, thus will always be abstract.
+ static Type *AlwaysOpaqueTy = OpaqueType::get();
+ static PATypeHolder Holder(AlwaysOpaqueTy);
+ ContainedTys[0] = AlwaysOpaqueTy;
+
+ // Change the rest of the types to be Int32Ty's. It doesn't matter what we
+ // pick so long as it doesn't point back to this type. We choose something
+ // concrete to avoid overhead for adding to AbstracTypeUser lists and stuff.
+ for (unsigned i = 1, e = NumContainedTys; i != e; ++i)
+ ContainedTys[i] = Type::Int32Ty;
+ }
+}
+
+
+namespace {
+
+/// TypePromotionGraph and graph traits - this is designed to allow us to do
+/// efficient SCC processing of type graphs. This is the exact same as
+/// GraphTraits<Type*>, except that we pretend that concrete types have no
+/// children to avoid processing them.
+struct TypePromotionGraph {
+ Type *Ty;
+ TypePromotionGraph(Type *T) : Ty(T) {}
+};
+
+}
+
+namespace llvm {
+ template <> struct GraphTraits<TypePromotionGraph> {
+ typedef Type NodeType;
+ typedef Type::subtype_iterator ChildIteratorType;
+
+ static inline NodeType *getEntryNode(TypePromotionGraph G) { return G.Ty; }
+ static inline ChildIteratorType child_begin(NodeType *N) {
+ if (N->isAbstract())
+ return N->subtype_begin();
+ else // No need to process children of concrete types.
+ return N->subtype_end();
+ }
+ static inline ChildIteratorType child_end(NodeType *N) {
+ return N->subtype_end();
+ }
+ };
+}
+
+
+// PromoteAbstractToConcrete - This is a recursive function that walks a type
+// graph calculating whether or not a type is abstract.
+//
+void Type::PromoteAbstractToConcrete() {
+ if (!isAbstract()) return;
+
+ scc_iterator<TypePromotionGraph> SI = scc_begin(TypePromotionGraph(this));
+ scc_iterator<TypePromotionGraph> SE = scc_end (TypePromotionGraph(this));
+
+ for (; SI != SE; ++SI) {
+ std::vector<Type*> &SCC = *SI;
+
+ // Concrete types are leaves in the tree. Since an SCC will either be all
+ // abstract or all concrete, we only need to check one type.
+ if (SCC[0]->isAbstract()) {
+ if (isa<OpaqueType>(SCC[0]))
+ return; // Not going to be concrete, sorry.
+
+ // If all of the children of all of the types in this SCC are concrete,
+ // then this SCC is now concrete as well. If not, neither this SCC, nor
+ // any parent SCCs will be concrete, so we might as well just exit.
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i)
+ for (Type::subtype_iterator CI = SCC[i]->subtype_begin(),
+ E = SCC[i]->subtype_end(); CI != E; ++CI)
+ if ((*CI)->isAbstract())
+ // If the child type is in our SCC, it doesn't make the entire SCC
+ // abstract unless there is a non-SCC abstract type.
+ if (std::find(SCC.begin(), SCC.end(), *CI) == SCC.end())
+ return; // Not going to be concrete, sorry.
+
+ // Okay, we just discovered this whole SCC is now concrete, mark it as
+ // such!
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
+ assert(SCC[i]->isAbstract() && "Why are we processing concrete types?");
+
+ SCC[i]->setAbstract(false);
+ }
+
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
+ assert(!SCC[i]->isAbstract() && "Concrete type became abstract?");
+ // The type just became concrete, notify all users!
+ cast<DerivedType>(SCC[i])->notifyUsesThatTypeBecameConcrete();
+ }
+ }
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Type Structural Equality Testing
+//===----------------------------------------------------------------------===//
+
+// TypesEqual - Two types are considered structurally equal if they have the
+// same "shape": Every level and element of the types have identical primitive
+// ID's, and the graphs have the same edges/nodes in them. Nodes do not have to
+// be pointer equals to be equivalent though. This uses an optimistic algorithm
+// that assumes that two graphs are the same until proven otherwise.
+//
+static bool TypesEqual(const Type *Ty, const Type *Ty2,
+ std::map<const Type *, const Type *> &EqTypes) {
+ if (Ty == Ty2) return true;
+ if (Ty->getTypeID() != Ty2->getTypeID()) return false;
+ if (isa<OpaqueType>(Ty))
+ return false; // Two unequal opaque types are never equal
+
+ std::map<const Type*, const Type*>::iterator It = EqTypes.find(Ty);
+ if (It != EqTypes.end())
+ return It->second == Ty2; // Looping back on a type, check for equality
+
+ // Otherwise, add the mapping to the table to make sure we don't get
+ // recursion on the types...
+ EqTypes.insert(It, std::make_pair(Ty, Ty2));
+
+ // Two really annoying special cases that breaks an otherwise nice simple
+ // algorithm is the fact that arraytypes have sizes that differentiates types,
+ // and that function types can be varargs or not. Consider this now.
+ //
+ if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
+ const IntegerType *ITy2 = cast<IntegerType>(Ty2);
+ return ITy->getBitWidth() == ITy2->getBitWidth();
+ } else if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) {
+ const PointerType *PTy2 = cast<PointerType>(Ty2);
+ return PTy->getAddressSpace() == PTy2->getAddressSpace() &&
+ TypesEqual(PTy->getElementType(), PTy2->getElementType(), EqTypes);
+ } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ const StructType *STy2 = cast<StructType>(Ty2);
+ if (STy->getNumElements() != STy2->getNumElements()) return false;
+ if (STy->isPacked() != STy2->isPacked()) return false;
+ for (unsigned i = 0, e = STy2->getNumElements(); i != e; ++i)
+ if (!TypesEqual(STy->getElementType(i), STy2->getElementType(i), EqTypes))
+ return false;
+ return true;
+ } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ const ArrayType *ATy2 = cast<ArrayType>(Ty2);
+ return ATy->getNumElements() == ATy2->getNumElements() &&
+ TypesEqual(ATy->getElementType(), ATy2->getElementType(), EqTypes);
+ } else if (const VectorType *PTy = dyn_cast<VectorType>(Ty)) {
+ const VectorType *PTy2 = cast<VectorType>(Ty2);
+ return PTy->getNumElements() == PTy2->getNumElements() &&
+ TypesEqual(PTy->getElementType(), PTy2->getElementType(), EqTypes);
+ } else if (const FunctionType *FTy = dyn_cast<FunctionType>(Ty)) {
+ const FunctionType *FTy2 = cast<FunctionType>(Ty2);
+ if (FTy->isVarArg() != FTy2->isVarArg() ||
+ FTy->getNumParams() != FTy2->getNumParams() ||
+ !TypesEqual(FTy->getReturnType(), FTy2->getReturnType(), EqTypes))
+ return false;
+ for (unsigned i = 0, e = FTy2->getNumParams(); i != e; ++i) {
+ if (!TypesEqual(FTy->getParamType(i), FTy2->getParamType(i), EqTypes))
+ return false;
+ }
+ return true;
+ } else {
+ assert(0 && "Unknown derived type!");
+ return false;
+ }
+}
+
+static bool TypesEqual(const Type *Ty, const Type *Ty2) {
+ std::map<const Type *, const Type *> EqTypes;
+ return TypesEqual(Ty, Ty2, EqTypes);
+}
+
+// AbstractTypeHasCycleThrough - Return true there is a path from CurTy to
+// TargetTy in the type graph. We know that Ty is an abstract type, so if we
+// ever reach a non-abstract type, we know that we don't need to search the
+// subgraph.
+static bool AbstractTypeHasCycleThrough(const Type *TargetTy, const Type *CurTy,
+ SmallPtrSet<const Type*, 128> &VisitedTypes) {
+ if (TargetTy == CurTy) return true;
+ if (!CurTy->isAbstract()) return false;
+
+ if (!VisitedTypes.insert(CurTy))
+ return false; // Already been here.
+
+ for (Type::subtype_iterator I = CurTy->subtype_begin(),
+ E = CurTy->subtype_end(); I != E; ++I)
+ if (AbstractTypeHasCycleThrough(TargetTy, *I, VisitedTypes))
+ return true;
+ return false;
+}
+
+static bool ConcreteTypeHasCycleThrough(const Type *TargetTy, const Type *CurTy,
+ SmallPtrSet<const Type*, 128> &VisitedTypes) {
+ if (TargetTy == CurTy) return true;
+
+ if (!VisitedTypes.insert(CurTy))
+ return false; // Already been here.
+
+ for (Type::subtype_iterator I = CurTy->subtype_begin(),
+ E = CurTy->subtype_end(); I != E; ++I)
+ if (ConcreteTypeHasCycleThrough(TargetTy, *I, VisitedTypes))
+ return true;
+ return false;
+}
+
+/// TypeHasCycleThroughItself - Return true if the specified type has a cycle
+/// back to itself.
+static bool TypeHasCycleThroughItself(const Type *Ty) {
+ SmallPtrSet<const Type*, 128> VisitedTypes;
+
+ if (Ty->isAbstract()) { // Optimized case for abstract types.
+ for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+ I != E; ++I)
+ if (AbstractTypeHasCycleThrough(Ty, *I, VisitedTypes))
+ return true;
+ } else {
+ for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+ I != E; ++I)
+ if (ConcreteTypeHasCycleThrough(Ty, *I, VisitedTypes))
+ return true;
+ }
+ return false;
+}
+
+/// getSubElementHash - Generate a hash value for all of the SubType's of this
+/// type. The hash value is guaranteed to be zero if any of the subtypes are
+/// an opaque type. Otherwise we try to mix them in as well as possible, but do
+/// not look at the subtype's subtype's.
+static unsigned getSubElementHash(const Type *Ty) {
+ unsigned HashVal = 0;
+ for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+ I != E; ++I) {
+ HashVal *= 32;
+ const Type *SubTy = I->get();
+ HashVal += SubTy->getTypeID();
+ switch (SubTy->getTypeID()) {
+ default: break;
+ case Type::OpaqueTyID: return 0; // Opaque -> hash = 0 no matter what.
+ case Type::IntegerTyID:
+ HashVal ^= (cast<IntegerType>(SubTy)->getBitWidth() << 3);
+ break;
+ case Type::FunctionTyID:
+ HashVal ^= cast<FunctionType>(SubTy)->getNumParams()*2 +
+ cast<FunctionType>(SubTy)->isVarArg();
+ break;
+ case Type::ArrayTyID:
+ HashVal ^= cast<ArrayType>(SubTy)->getNumElements();
+ break;
+ case Type::VectorTyID:
+ HashVal ^= cast<VectorType>(SubTy)->getNumElements();
+ break;
+ case Type::StructTyID:
+ HashVal ^= cast<StructType>(SubTy)->getNumElements();
+ break;
+ case Type::PointerTyID:
+ HashVal ^= cast<PointerType>(SubTy)->getAddressSpace();
+ break;
+ }
+ }
+ return HashVal ? HashVal : 1; // Do not return zero unless opaque subty.
+}
+
+//===----------------------------------------------------------------------===//
+// Derived Type Factory Functions
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+class TypeMapBase {
+protected:
+ /// TypesByHash - Keep track of types by their structure hash value. Note
+ /// that we only keep track of types that have cycles through themselves in
+ /// this map.
+ ///
+ std::multimap<unsigned, PATypeHolder> TypesByHash;
+
+public:
+ ~TypeMapBase() {
+ // PATypeHolder won't destroy non-abstract types.
+ // We can't destroy them by simply iterating, because
+ // they may contain references to each-other.
+#if 0
+ for (std::multimap<unsigned, PATypeHolder>::iterator I
+ = TypesByHash.begin(), E = TypesByHash.end(); I != E; ++I) {
+ Type *Ty = const_cast<Type*>(I->second.Ty);
+ I->second.destroy();
+ // We can't invoke destroy or delete, because the type may
+ // contain references to already freed types.
+ // So we have to destruct the object the ugly way.
+ if (Ty) {
+ Ty->AbstractTypeUsers.clear();
+ static_cast<const Type*>(Ty)->Type::~Type();
+ operator delete(Ty);
+ }
+ }
+#endif
+ }
+
+ void RemoveFromTypesByHash(unsigned Hash, const Type *Ty) {
+ std::multimap<unsigned, PATypeHolder>::iterator I =
+ TypesByHash.lower_bound(Hash);
+ for (; I != TypesByHash.end() && I->first == Hash; ++I) {
+ if (I->second == Ty) {
+ TypesByHash.erase(I);
+ return;
+ }
+ }
+
+ // This must be do to an opaque type that was resolved. Switch down to hash
+ // code of zero.
+ assert(Hash && "Didn't find type entry!");
+ RemoveFromTypesByHash(0, Ty);
+ }
+
+ /// TypeBecameConcrete - When Ty gets a notification that TheType just became
+ /// concrete, drop uses and make Ty non-abstract if we should.
+ void TypeBecameConcrete(DerivedType *Ty, const DerivedType *TheType) {
+ // If the element just became concrete, remove 'ty' from the abstract
+ // type user list for the type. Do this for as many times as Ty uses
+ // OldType.
+ for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+ I != E; ++I)
+ if (I->get() == TheType)
+ TheType->removeAbstractTypeUser(Ty);
+
+ // If the type is currently thought to be abstract, rescan all of our
+ // subtypes to see if the type has just become concrete! Note that this
+ // may send out notifications to AbstractTypeUsers that types become
+ // concrete.
+ if (Ty->isAbstract())
+ Ty->PromoteAbstractToConcrete();
+ }
+};
+}
+
+
+// TypeMap - Make sure that only one instance of a particular type may be
+// created on any given run of the compiler... note that this involves updating
+// our map if an abstract type gets refined somehow.
+//
+namespace llvm {
+template<class ValType, class TypeClass>
+class TypeMap : public TypeMapBase {
+ std::map<ValType, PATypeHolder> Map;
+public:
+ typedef typename std::map<ValType, PATypeHolder>::iterator iterator;
+ ~TypeMap() { print("ON EXIT"); }
+
+ inline TypeClass *get(const ValType &V) {
+ iterator I = Map.find(V);
+ return I != Map.end() ? cast<TypeClass>((Type*)I->second.get()) : 0;
+ }
+
+ inline void add(const ValType &V, TypeClass *Ty) {
+ Map.insert(std::make_pair(V, Ty));
+
+ // If this type has a cycle, remember it.
+ TypesByHash.insert(std::make_pair(ValType::hashTypeStructure(Ty), Ty));
+ print("add");
+ }
+
+ /// RefineAbstractType - This method is called after we have merged a type
+ /// with another one. We must now either merge the type away with
+ /// some other type or reinstall it in the map with it's new configuration.
+ void RefineAbstractType(TypeClass *Ty, const DerivedType *OldType,
+ const Type *NewType) {
+#ifdef DEBUG_MERGE_TYPES
+ DOUT << "RefineAbstractType(" << (void*)OldType << "[" << *OldType
+ << "], " << (void*)NewType << " [" << *NewType << "])\n";
+#endif
+
+ // Otherwise, we are changing one subelement type into another. Clearly the
+ // OldType must have been abstract, making us abstract.
+ assert(Ty->isAbstract() && "Refining a non-abstract type!");
+ assert(OldType != NewType);
+
+ // Make a temporary type holder for the type so that it doesn't disappear on
+ // us when we erase the entry from the map.
+ PATypeHolder TyHolder = Ty;
+
+ // The old record is now out-of-date, because one of the children has been
+ // updated. Remove the obsolete entry from the map.
+ unsigned NumErased = Map.erase(ValType::get(Ty));
+ assert(NumErased && "Element not found!"); NumErased = NumErased;
+
+ // Remember the structural hash for the type before we start hacking on it,
+ // in case we need it later.
+ unsigned OldTypeHash = ValType::hashTypeStructure(Ty);
+
+ // Find the type element we are refining... and change it now!
+ for (unsigned i = 0, e = Ty->getNumContainedTypes(); i != e; ++i)
+ if (Ty->ContainedTys[i] == OldType)
+ Ty->ContainedTys[i] = NewType;
+ unsigned NewTypeHash = ValType::hashTypeStructure(Ty);
+
+ // If there are no cycles going through this node, we can do a simple,
+ // efficient lookup in the map, instead of an inefficient nasty linear
+ // lookup.
+ if (!TypeHasCycleThroughItself(Ty)) {
+ typename std::map<ValType, PATypeHolder>::iterator I;
+ bool Inserted;
+
+ tie(I, Inserted) = Map.insert(std::make_pair(ValType::get(Ty), Ty));
+ if (!Inserted) {
+ // Refined to a different type altogether?
+ RemoveFromTypesByHash(OldTypeHash, Ty);
+
+ // We already have this type in the table. Get rid of the newly refined
+ // type.
+ TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get());
+ Ty->refineAbstractTypeTo(NewTy);
+ return;
+ }
+ } else {
+ // Now we check to see if there is an existing entry in the table which is
+ // structurally identical to the newly refined type. If so, this type
+ // gets refined to the pre-existing type.
+ //
+ std::multimap<unsigned, PATypeHolder>::iterator I, E, Entry;
+ tie(I, E) = TypesByHash.equal_range(NewTypeHash);
+ Entry = E;
+ for (; I != E; ++I) {
+ if (I->second == Ty) {
+ // Remember the position of the old type if we see it in our scan.
+ Entry = I;
+ } else {
+ if (TypesEqual(Ty, I->second)) {
+ TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get());
+
+ // Remove the old entry form TypesByHash. If the hash values differ
+ // now, remove it from the old place. Otherwise, continue scanning
+ // withing this hashcode to reduce work.
+ if (NewTypeHash != OldTypeHash) {
+ RemoveFromTypesByHash(OldTypeHash, Ty);
+ } else {
+ if (Entry == E) {
+ // Find the location of Ty in the TypesByHash structure if we
+ // haven't seen it already.
+ while (I->second != Ty) {
+ ++I;
+ assert(I != E && "Structure doesn't contain type??");
+ }
+ Entry = I;
+ }
+ TypesByHash.erase(Entry);
+ }
+ Ty->refineAbstractTypeTo(NewTy);
+ return;
+ }
+ }
+ }
+
+ // If there is no existing type of the same structure, we reinsert an
+ // updated record into the map.
+ Map.insert(std::make_pair(ValType::get(Ty), Ty));
+ }
+
+ // If the hash codes differ, update TypesByHash
+ if (NewTypeHash != OldTypeHash) {
+ RemoveFromTypesByHash(OldTypeHash, Ty);
+ TypesByHash.insert(std::make_pair(NewTypeHash, Ty));
+ }
+
+ // If the type is currently thought to be abstract, rescan all of our
+ // subtypes to see if the type has just become concrete! Note that this
+ // may send out notifications to AbstractTypeUsers that types become
+ // concrete.
+ if (Ty->isAbstract())
+ Ty->PromoteAbstractToConcrete();
+ }
+
+ void print(const char *Arg) const {
+#ifdef DEBUG_MERGE_TYPES
+ DOUT << "TypeMap<>::" << Arg << " table contents:\n";
+ unsigned i = 0;
+ for (typename std::map<ValType, PATypeHolder>::const_iterator I
+ = Map.begin(), E = Map.end(); I != E; ++I)
+ DOUT << " " << (++i) << ". " << (void*)I->second.get() << " "
+ << *I->second.get() << "\n";
+#endif
+ }
+
+ void dump() const { print("dump output"); }
+};
+}
+
+
+//===----------------------------------------------------------------------===//
+// Function Type Factory and Value Class...
+//
+
+//===----------------------------------------------------------------------===//
+// Integer Type Factory...
+//
+namespace llvm {
+class IntegerValType {
+ uint32_t bits;
+public:
+ IntegerValType(uint16_t numbits) : bits(numbits) {}
+
+ static IntegerValType get(const IntegerType *Ty) {
+ return IntegerValType(Ty->getBitWidth());
+ }
+
+ static unsigned hashTypeStructure(const IntegerType *Ty) {
+ return (unsigned)Ty->getBitWidth();
+ }
+
+ inline bool operator<(const IntegerValType &IVT) const {
+ return bits < IVT.bits;
+ }
+};
+}
+
+static ManagedStatic<TypeMap<IntegerValType, IntegerType> > IntegerTypes;
+
+const IntegerType *IntegerType::get(unsigned NumBits) {
+ assert(NumBits >= MIN_INT_BITS && "bitwidth too small");
+ assert(NumBits <= MAX_INT_BITS && "bitwidth too large");
+
+ // Check for the built-in integer types
+ switch (NumBits) {
+ case 1: return cast<IntegerType>(Type::Int1Ty);
+ case 8: return cast<IntegerType>(Type::Int8Ty);
+ case 16: return cast<IntegerType>(Type::Int16Ty);
+ case 32: return cast<IntegerType>(Type::Int32Ty);
+ case 64: return cast<IntegerType>(Type::Int64Ty);
+ default:
+ break;
+ }
+
+ IntegerValType IVT(NumBits);
+ IntegerType *ITy = IntegerTypes->get(IVT);
+ if (ITy) return ITy; // Found a match, return it!
+
+ // Value not found. Derive a new type!
+ ITy = new IntegerType(NumBits);
+ IntegerTypes->add(IVT, ITy);
+
+#ifdef DEBUG_MERGE_TYPES
+ DOUT << "Derived new type: " << *ITy << "\n";
+#endif
+ return ITy;
+}
+
+bool IntegerType::isPowerOf2ByteWidth() const {
+ unsigned BitWidth = getBitWidth();
+ return (BitWidth > 7) && isPowerOf2_32(BitWidth);
+}
+
+APInt IntegerType::getMask() const {
+ return APInt::getAllOnesValue(getBitWidth());
+}
+
+// FunctionValType - Define a class to hold the key that goes into the TypeMap
+//
+namespace llvm {
+class FunctionValType {
+ const Type *RetTy;
+ std::vector<const Type*> ArgTypes;
+ bool isVarArg;
+public:
+ FunctionValType(const Type *ret, const std::vector<const Type*> &args,
+ bool isVA) : RetTy(ret), ArgTypes(args), isVarArg(isVA) {}
+
+ static FunctionValType get(const FunctionType *FT);
+
+ static unsigned hashTypeStructure(const FunctionType *FT) {
+ unsigned Result = FT->getNumParams()*2 + FT->isVarArg();
+ return Result;
+ }
+
+ inline bool operator<(const FunctionValType &MTV) const {
+ if (RetTy < MTV.RetTy) return true;
+ if (RetTy > MTV.RetTy) return false;
+ if (isVarArg < MTV.isVarArg) return true;
+ if (isVarArg > MTV.isVarArg) return false;
+ if (ArgTypes < MTV.ArgTypes) return true;
+ if (ArgTypes > MTV.ArgTypes) return false;
+ return false;
+ }
+};
+}
+
+// Define the actual map itself now...
+static ManagedStatic<TypeMap<FunctionValType, FunctionType> > FunctionTypes;
+
+FunctionValType FunctionValType::get(const FunctionType *FT) {
+ // Build up a FunctionValType
+ std::vector<const Type *> ParamTypes;
+ ParamTypes.reserve(FT->getNumParams());
+ for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i)
+ ParamTypes.push_back(FT->getParamType(i));
+ return FunctionValType(FT->getReturnType(), ParamTypes, FT->isVarArg());
+}
+
+
+// FunctionType::get - The factory function for the FunctionType class...
+FunctionType *FunctionType::get(const Type *ReturnType,
+ const std::vector<const Type*> &Params,
+ bool isVarArg) {
+ FunctionValType VT(ReturnType, Params, isVarArg);
+ FunctionType *FT = FunctionTypes->get(VT);
+ if (FT)
+ return FT;
+
+ FT = (FunctionType*) operator new(sizeof(FunctionType) +
+ sizeof(PATypeHandle)*(Params.size()+1));
+ new (FT) FunctionType(ReturnType, Params, isVarArg);
+ FunctionTypes->add(VT, FT);
+
+#ifdef DEBUG_MERGE_TYPES
+ DOUT << "Derived new type: " << FT << "\n";
+#endif
+ return FT;
+}
+
+//===----------------------------------------------------------------------===//
+// Array Type Factory...
+//
+namespace llvm {
+class ArrayValType {
+ const Type *ValTy;
+ uint64_t Size;
+public:
+ ArrayValType(const Type *val, uint64_t sz) : ValTy(val), Size(sz) {}
+
+ static ArrayValType get(const ArrayType *AT) {
+ return ArrayValType(AT->getElementType(), AT->getNumElements());
+ }
+
+ static unsigned hashTypeStructure(const ArrayType *AT) {
+ return (unsigned)AT->getNumElements();
+ }
+
+ inline bool operator<(const ArrayValType &MTV) const {
+ if (Size < MTV.Size) return true;
+ return Size == MTV.Size && ValTy < MTV.ValTy;
+ }
+};
+}
+static ManagedStatic<TypeMap<ArrayValType, ArrayType> > ArrayTypes;
+
+
+ArrayType *ArrayType::get(const Type *ElementType, uint64_t NumElements) {
+ assert(ElementType && "Can't get array of <null> types!");
+ assert(ElementType != Type::VoidTy && "Array of void is not valid!");
+ assert(ElementType != Type::LabelTy && "Array of labels is not valid!");
+ assert(ElementType != Type::MetadataTy && "Array of metadata is not valid!");
+ assert((!isa<PointerType>(ElementType) ||
+ cast<PointerType>(ElementType)->getElementType() != Type::MetadataTy)
+ && "Array of metadata* is not valid!");
+
+ ArrayValType AVT(ElementType, NumElements);
+ ArrayType *AT = ArrayTypes->get(AVT);
+ if (AT) return AT; // Found a match, return it!
+
+ // Value not found. Derive a new type!
+ ArrayTypes->add(AVT, AT = new ArrayType(ElementType, NumElements));
+
+#ifdef DEBUG_MERGE_TYPES
+ DOUT << "Derived new type: " << *AT << "\n";
+#endif
+ return AT;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Vector Type Factory...
+//
+namespace llvm {
+class VectorValType {
+ const Type *ValTy;
+ unsigned Size;
+public:
+ VectorValType(const Type *val, int sz) : ValTy(val), Size(sz) {}
+
+ static VectorValType get(const VectorType *PT) {
+ return VectorValType(PT->getElementType(), PT->getNumElements());
+ }
+
+ static unsigned hashTypeStructure(const VectorType *PT) {
+ return PT->getNumElements();
+ }
+
+ inline bool operator<(const VectorValType &MTV) const {
+ if (Size < MTV.Size) return true;
+ return Size == MTV.Size && ValTy < MTV.ValTy;
+ }
+};
+}
+static ManagedStatic<TypeMap<VectorValType, VectorType> > VectorTypes;
+
+
+VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) {
+ assert(ElementType && "Can't get vector of <null> types!");
+
+ VectorValType PVT(ElementType, NumElements);
+ VectorType *PT = VectorTypes->get(PVT);
+ if (PT) return PT; // Found a match, return it!
+
+ // Value not found. Derive a new type!
+ VectorTypes->add(PVT, PT = new VectorType(ElementType, NumElements));
+
+#ifdef DEBUG_MERGE_TYPES
+ DOUT << "Derived new type: " << *PT << "\n";
+#endif
+ return PT;
+}
+
+//===----------------------------------------------------------------------===//
+// Struct Type Factory...
+//
+
+namespace llvm {
+// StructValType - Define a class to hold the key that goes into the TypeMap
+//
+class StructValType {
+ std::vector<const Type*> ElTypes;
+ bool packed;
+public:
+ StructValType(const std::vector<const Type*> &args, bool isPacked)
+ : ElTypes(args), packed(isPacked) {}
+
+ static StructValType get(const StructType *ST) {
+ std::vector<const Type *> ElTypes;
+ ElTypes.reserve(ST->getNumElements());
+ for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i)
+ ElTypes.push_back(ST->getElementType(i));
+
+ return StructValType(ElTypes, ST->isPacked());
+ }
+
+ static unsigned hashTypeStructure(const StructType *ST) {
+ return ST->getNumElements();
+ }
+
+ inline bool operator<(const StructValType &STV) const {
+ if (ElTypes < STV.ElTypes) return true;
+ else if (ElTypes > STV.ElTypes) return false;
+ else return (int)packed < (int)STV.packed;
+ }
+};
+}
+
+static ManagedStatic<TypeMap<StructValType, StructType> > StructTypes;
+
+StructType *StructType::get(const std::vector<const Type*> &ETypes,
+ bool isPacked) {
+ StructValType STV(ETypes, isPacked);
+ StructType *ST = StructTypes->get(STV);
+ if (ST) return ST;
+
+ // Value not found. Derive a new type!
+ ST = (StructType*) operator new(sizeof(StructType) +
+ sizeof(PATypeHandle) * ETypes.size());
+ new (ST) StructType(ETypes, isPacked);
+ StructTypes->add(STV, ST);
+
+#ifdef DEBUG_MERGE_TYPES
+ DOUT << "Derived new type: " << *ST << "\n";
+#endif
+ return ST;
+}
+
+StructType *StructType::get(const Type *type, ...) {
+ va_list ap;
+ std::vector<const llvm::Type*> StructFields;
+ va_start(ap, type);
+ while (type) {
+ StructFields.push_back(type);
+ type = va_arg(ap, llvm::Type*);
+ }
+ return llvm::StructType::get(StructFields);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Pointer Type Factory...
+//
+
+// PointerValType - Define a class to hold the key that goes into the TypeMap
+//
+namespace llvm {
+class PointerValType {
+ const Type *ValTy;
+ unsigned AddressSpace;
+public:
+ PointerValType(const Type *val, unsigned as) : ValTy(val), AddressSpace(as) {}
+
+ static PointerValType get(const PointerType *PT) {
+ return PointerValType(PT->getElementType(), PT->getAddressSpace());
+ }
+
+ static unsigned hashTypeStructure(const PointerType *PT) {
+ return getSubElementHash(PT);
+ }
+
+ bool operator<(const PointerValType &MTV) const {
+ if (AddressSpace < MTV.AddressSpace) return true;
+ return AddressSpace == MTV.AddressSpace && ValTy < MTV.ValTy;
+ }
+};
+}
+
+static ManagedStatic<TypeMap<PointerValType, PointerType> > PointerTypes;
+
+PointerType *PointerType::get(const Type *ValueType, unsigned AddressSpace) {
+ assert(ValueType && "Can't get a pointer to <null> type!");
+ assert(ValueType != Type::VoidTy &&
+ "Pointer to void is not valid, use i8* instead!");
+ assert(ValueType != Type::LabelTy && "Pointer to label is not valid!");
+ assert((!isa<PointerType>(ValueType) ||
+ cast<PointerType>(ValueType)->getElementType() != Type::MetadataTy)
+ && "Pointer to metadata* is not valid!");
+ PointerValType PVT(ValueType, AddressSpace);
+
+ PointerType *PT = PointerTypes->get(PVT);
+ if (PT) return PT;
+
+ // Value not found. Derive a new type!
+ PointerTypes->add(PVT, PT = new PointerType(ValueType, AddressSpace));
+
+#ifdef DEBUG_MERGE_TYPES
+ DOUT << "Derived new type: " << *PT << "\n";
+#endif
+ return PT;
+}
+
+PointerType *Type::getPointerTo(unsigned addrs) const {
+ return PointerType::get(this, addrs);
+}
+
+//===----------------------------------------------------------------------===//
+// Derived Type Refinement Functions
+//===----------------------------------------------------------------------===//
+
+// removeAbstractTypeUser - Notify an abstract type that a user of the class
+// no longer has a handle to the type. This function is called primarily by
+// the PATypeHandle class. When there are no users of the abstract type, it
+// is annihilated, because there is no way to get a reference to it ever again.
+//
+void Type::removeAbstractTypeUser(AbstractTypeUser *U) const {
+ // Search from back to front because we will notify users from back to
+ // front. Also, it is likely that there will be a stack like behavior to
+ // users that register and unregister users.
+ //
+ unsigned i;
+ for (i = AbstractTypeUsers.size(); AbstractTypeUsers[i-1] != U; --i)
+ assert(i != 0 && "AbstractTypeUser not in user list!");
+
+ --i; // Convert to be in range 0 <= i < size()
+ assert(i < AbstractTypeUsers.size() && "Index out of range!"); // Wraparound?
+
+ AbstractTypeUsers.erase(AbstractTypeUsers.begin()+i);
+
+#ifdef DEBUG_MERGE_TYPES
+ DOUT << " remAbstractTypeUser[" << (void*)this << ", "
+ << *this << "][" << i << "] User = " << U << "\n";
+#endif
+
+ if (AbstractTypeUsers.empty() && getRefCount() == 0 && isAbstract()) {
+#ifdef DEBUG_MERGE_TYPES
+ DOUT << "DELETEing unused abstract type: <" << *this
+ << ">[" << (void*)this << "]" << "\n";
+#endif
+ this->destroy();
+ }
+}
+
+// refineAbstractTypeTo - This function is used when it is discovered that
+// the 'this' abstract type is actually equivalent to the NewType specified.
+// This causes all users of 'this' to switch to reference the more concrete type
+// NewType and for 'this' to be deleted.
+//
+void DerivedType::refineAbstractTypeTo(const Type *NewType) {
+ assert(isAbstract() && "refineAbstractTypeTo: Current type is not abstract!");
+ assert(this != NewType && "Can't refine to myself!");
+ assert(ForwardType == 0 && "This type has already been refined!");
+
+ // The descriptions may be out of date. Conservatively clear them all!
+ if (AbstractTypeDescriptions.isConstructed())
+ AbstractTypeDescriptions->clear();
+
+#ifdef DEBUG_MERGE_TYPES
+ DOUT << "REFINING abstract type [" << (void*)this << " "
+ << *this << "] to [" << (void*)NewType << " "
+ << *NewType << "]!\n";
+#endif
+
+ // Make sure to put the type to be refined to into a holder so that if IT gets
+ // refined, that we will not continue using a dead reference...
+ //
+ PATypeHolder NewTy(NewType);
+
+ // Any PATypeHolders referring to this type will now automatically forward to
+ // the type we are resolved to.
+ ForwardType = NewType;
+ if (NewType->isAbstract())
+ cast<DerivedType>(NewType)->addRef();
+
+ // Add a self use of the current type so that we don't delete ourself until
+ // after the function exits.
+ //
+ PATypeHolder CurrentTy(this);
+
+ // To make the situation simpler, we ask the subclass to remove this type from
+ // the type map, and to replace any type uses with uses of non-abstract types.
+ // This dramatically limits the amount of recursive type trouble we can find
+ // ourselves in.
+ dropAllTypeUses();
+
+ // Iterate over all of the uses of this type, invoking callback. Each user
+ // should remove itself from our use list automatically. We have to check to
+ // make sure that NewTy doesn't _become_ 'this'. If it does, resolving types
+ // will not cause users to drop off of the use list. If we resolve to ourself
+ // we succeed!
+ //
+ while (!AbstractTypeUsers.empty() && NewTy != this) {
+ AbstractTypeUser *User = AbstractTypeUsers.back();
+
+ unsigned OldSize = AbstractTypeUsers.size(); OldSize=OldSize;
+#ifdef DEBUG_MERGE_TYPES
+ DOUT << " REFINING user " << OldSize-1 << "[" << (void*)User
+ << "] of abstract type [" << (void*)this << " "
+ << *this << "] to [" << (void*)NewTy.get() << " "
+ << *NewTy << "]!\n";
+#endif
+ User->refineAbstractType(this, NewTy);
+
+ assert(AbstractTypeUsers.size() != OldSize &&
+ "AbsTyUser did not remove self from user list!");
+ }
+
+ // If we were successful removing all users from the type, 'this' will be
+ // deleted when the last PATypeHolder is destroyed or updated from this type.
+ // This may occur on exit of this function, as the CurrentTy object is
+ // destroyed.
+}
+
+// notifyUsesThatTypeBecameConcrete - Notify AbstractTypeUsers of this type that
+// the current type has transitioned from being abstract to being concrete.
+//
+void DerivedType::notifyUsesThatTypeBecameConcrete() {
+#ifdef DEBUG_MERGE_TYPES
+ DOUT << "typeIsREFINED type: " << (void*)this << " " << *this << "\n";
+#endif
+
+ unsigned OldSize = AbstractTypeUsers.size(); OldSize=OldSize;
+ while (!AbstractTypeUsers.empty()) {
+ AbstractTypeUser *ATU = AbstractTypeUsers.back();
+ ATU->typeBecameConcrete(this);
+
+ assert(AbstractTypeUsers.size() < OldSize-- &&
+ "AbstractTypeUser did not remove itself from the use list!");
+ }
+}
+
+// refineAbstractType - Called when a contained type is found to be more
+// concrete - this could potentially change us from an abstract type to a
+// concrete type.
+//
+void FunctionType::refineAbstractType(const DerivedType *OldType,
+ const Type *NewType) {
+ FunctionTypes->RefineAbstractType(this, OldType, NewType);
+}
+
+void FunctionType::typeBecameConcrete(const DerivedType *AbsTy) {
+ FunctionTypes->TypeBecameConcrete(this, AbsTy);
+}
+
+
+// refineAbstractType - Called when a contained type is found to be more
+// concrete - this could potentially change us from an abstract type to a
+// concrete type.
+//
+void ArrayType::refineAbstractType(const DerivedType *OldType,
+ const Type *NewType) {
+ ArrayTypes->RefineAbstractType(this, OldType, NewType);
+}
+
+void ArrayType::typeBecameConcrete(const DerivedType *AbsTy) {
+ ArrayTypes->TypeBecameConcrete(this, AbsTy);
+}
+
+// refineAbstractType - Called when a contained type is found to be more
+// concrete - this could potentially change us from an abstract type to a
+// concrete type.
+//
+void VectorType::refineAbstractType(const DerivedType *OldType,
+ const Type *NewType) {
+ VectorTypes->RefineAbstractType(this, OldType, NewType);
+}
+
+void VectorType::typeBecameConcrete(const DerivedType *AbsTy) {
+ VectorTypes->TypeBecameConcrete(this, AbsTy);
+}
+
+// refineAbstractType - Called when a contained type is found to be more
+// concrete - this could potentially change us from an abstract type to a
+// concrete type.
+//
+void StructType::refineAbstractType(const DerivedType *OldType,
+ const Type *NewType) {
+ StructTypes->RefineAbstractType(this, OldType, NewType);
+}
+
+void StructType::typeBecameConcrete(const DerivedType *AbsTy) {
+ StructTypes->TypeBecameConcrete(this, AbsTy);
+}
+
+// refineAbstractType - Called when a contained type is found to be more
+// concrete - this could potentially change us from an abstract type to a
+// concrete type.
+//
+void PointerType::refineAbstractType(const DerivedType *OldType,
+ const Type *NewType) {
+ PointerTypes->RefineAbstractType(this, OldType, NewType);
+}
+
+void PointerType::typeBecameConcrete(const DerivedType *AbsTy) {
+ PointerTypes->TypeBecameConcrete(this, AbsTy);
+}
+
+bool SequentialType::indexValid(const Value *V) const {
+ if (isa<IntegerType>(V->getType()))
+ return true;
+ return false;
+}
+
+namespace llvm {
+std::ostream &operator<<(std::ostream &OS, const Type *T) {
+ if (T == 0)
+ OS << "<null> value!\n";
+ else
+ T->print(OS);
+ return OS;
+}
+
+std::ostream &operator<<(std::ostream &OS, const Type &T) {
+ T.print(OS);
+ return OS;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const Type &T) {
+ T.print(OS);
+ return OS;
+}
+}
diff --git a/lib/VMCore/TypeSymbolTable.cpp b/lib/VMCore/TypeSymbolTable.cpp
new file mode 100644
index 0000000..475d719
--- /dev/null
+++ b/lib/VMCore/TypeSymbolTable.cpp
@@ -0,0 +1,165 @@
+//===-- TypeSymbolTable.cpp - Implement the TypeSymbolTable class ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TypeSymbolTable class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Streams.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_SYMBOL_TABLE 0
+#define DEBUG_ABSTYPE 0
+
+TypeSymbolTable::~TypeSymbolTable() {
+ // Drop all abstract type references in the type plane...
+ for (iterator TI = tmap.begin(), TE = tmap.end(); TI != TE; ++TI) {
+ if (TI->second->isAbstract()) // If abstract, drop the reference...
+ cast<DerivedType>(TI->second)->removeAbstractTypeUser(this);
+ }
+}
+
+std::string TypeSymbolTable::getUniqueName(const std::string &BaseName) const {
+ std::string TryName = BaseName;
+ const_iterator End = tmap.end();
+
+ // See if the name exists
+ while (tmap.find(TryName) != End) // Loop until we find a free
+ TryName = BaseName + utostr(++LastUnique); // name in the symbol table
+ return TryName;
+}
+
+// lookup a type by name - returns null on failure
+Type* TypeSymbolTable::lookup(const std::string& Name) const {
+ const_iterator TI = tmap.find(Name);
+ if (TI != tmap.end())
+ return const_cast<Type*>(TI->second);
+ return 0;
+}
+
+// remove - Remove a type from the symbol table...
+Type* TypeSymbolTable::remove(iterator Entry) {
+ assert(Entry != tmap.end() && "Invalid entry to remove!");
+
+ const Type* Result = Entry->second;
+
+#if DEBUG_SYMBOL_TABLE
+ dump();
+ cerr << " Removing Value: " << Result->getName() << "\n";
+#endif
+
+ tmap.erase(Entry);
+
+ // If we are removing an abstract type, remove the symbol table from it's use
+ // list...
+ if (Result->isAbstract()) {
+#if DEBUG_ABSTYPE
+ cerr << "Removing abstract type from symtab"
+ << Result->getDescription()
+ << "\n";
+#endif
+ cast<DerivedType>(Result)->removeAbstractTypeUser(this);
+ }
+
+ return const_cast<Type*>(Result);
+}
+
+
+// insert - Insert a type into the symbol table with the specified name...
+void TypeSymbolTable::insert(const std::string& Name, const Type* T) {
+ assert(T && "Can't insert null type into symbol table!");
+
+ if (tmap.insert(make_pair(Name, T)).second) {
+ // Type inserted fine with no conflict.
+
+#if DEBUG_SYMBOL_TABLE
+ dump();
+ cerr << " Inserted type: " << Name << ": " << T->getDescription() << "\n";
+#endif
+ } else {
+ // If there is a name conflict...
+
+ // Check to see if there is a naming conflict. If so, rename this type!
+ std::string UniqueName = Name;
+ if (lookup(Name))
+ UniqueName = getUniqueName(Name);
+
+#if DEBUG_SYMBOL_TABLE
+ dump();
+ cerr << " Inserting type: " << UniqueName << ": "
+ << T->getDescription() << "\n";
+#endif
+
+ // Insert the tmap entry
+ tmap.insert(make_pair(UniqueName, T));
+ }
+
+ // If we are adding an abstract type, add the symbol table to it's use list.
+ if (T->isAbstract()) {
+ cast<DerivedType>(T)->addAbstractTypeUser(this);
+#if DEBUG_ABSTYPE
+ cerr << "Added abstract type to ST: " << T->getDescription() << "\n";
+#endif
+ }
+}
+
+// This function is called when one of the types in the type plane are refined
+void TypeSymbolTable::refineAbstractType(const DerivedType *OldType,
+ const Type *NewType) {
+
+ // Loop over all of the types in the symbol table, replacing any references
+ // to OldType with references to NewType. Note that there may be multiple
+ // occurrences, and although we only need to remove one at a time, it's
+ // faster to remove them all in one pass.
+ //
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ if (I->second == (Type*)OldType) { // FIXME when Types aren't const.
+#if DEBUG_ABSTYPE
+ cerr << "Removing type " << OldType->getDescription() << "\n";
+#endif
+ OldType->removeAbstractTypeUser(this);
+
+ I->second = (Type*)NewType; // TODO FIXME when types aren't const
+ if (NewType->isAbstract()) {
+#if DEBUG_ABSTYPE
+ cerr << "Added type " << NewType->getDescription() << "\n";
+#endif
+ cast<DerivedType>(NewType)->addAbstractTypeUser(this);
+ }
+ }
+ }
+}
+
+
+// Handle situation where type becomes Concreate from Abstract
+void TypeSymbolTable::typeBecameConcrete(const DerivedType *AbsTy) {
+ // Loop over all of the types in the symbol table, dropping any abstract
+ // type user entries for AbsTy which occur because there are names for the
+ // type.
+ for (iterator TI = begin(), TE = end(); TI != TE; ++TI)
+ if (TI->second == const_cast<Type*>(static_cast<const Type*>(AbsTy)))
+ AbsTy->removeAbstractTypeUser(this);
+}
+
+static void DumpTypes(const std::pair<const std::string, const Type*>& T ) {
+ cerr << " '" << T.first << "' = ";
+ T.second->dump();
+ cerr << "\n";
+}
+
+void TypeSymbolTable::dump() const {
+ cerr << "TypeSymbolPlane: ";
+ for_each(tmap.begin(), tmap.end(), DumpTypes);
+}
+
+// vim: sw=2 ai
diff --git a/lib/VMCore/Use.cpp b/lib/VMCore/Use.cpp
new file mode 100644
index 0000000..b25415a
--- /dev/null
+++ b/lib/VMCore/Use.cpp
@@ -0,0 +1,233 @@
+//===-- Use.cpp - Implement the Use class ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the algorithm for finding the User of a Use.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/User.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// Use swap Implementation
+//===----------------------------------------------------------------------===//
+
+void Use::swap(Use &RHS) {
+ Value *V1(Val);
+ Value *V2(RHS.Val);
+ if (V1 != V2) {
+ if (V1) {
+ removeFromList();
+ }
+
+ if (V2) {
+ RHS.removeFromList();
+ Val = V2;
+ V2->addUse(*this);
+ } else {
+ Val = 0;
+ }
+
+ if (V1) {
+ RHS.Val = V1;
+ V1->addUse(RHS);
+ } else {
+ RHS.Val = 0;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Use getImpliedUser Implementation
+//===----------------------------------------------------------------------===//
+
+const Use *Use::getImpliedUser() const {
+ const Use *Current = this;
+
+ while (true) {
+ unsigned Tag = (Current++)->Prev.getInt();
+ switch (Tag) {
+ case zeroDigitTag:
+ case oneDigitTag:
+ continue;
+
+ case stopTag: {
+ ++Current;
+ ptrdiff_t Offset = 1;
+ while (true) {
+ unsigned Tag = Current->Prev.getInt();
+ switch (Tag) {
+ case zeroDigitTag:
+ case oneDigitTag:
+ ++Current;
+ Offset = (Offset << 1) + Tag;
+ continue;
+ default:
+ return Current + Offset;
+ }
+ }
+ }
+
+ case fullStopTag:
+ return Current;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Use initTags Implementation
+//===----------------------------------------------------------------------===//
+
+Use *Use::initTags(Use * const Start, Use *Stop, ptrdiff_t Done) {
+ ptrdiff_t Count = Done;
+ while (Start != Stop) {
+ --Stop;
+ Stop->Val = 0;
+ if (!Count) {
+ Stop->Prev.setFromOpaqueValue(reinterpret_cast<Use**>(Done == 0
+ ? fullStopTag
+ : stopTag));
+ ++Done;
+ Count = Done;
+ } else {
+ Stop->Prev.setFromOpaqueValue(reinterpret_cast<Use**>(Count & 1));
+ Count >>= 1;
+ ++Done;
+ }
+ }
+
+ return Start;
+}
+
+//===----------------------------------------------------------------------===//
+// Use zap Implementation
+//===----------------------------------------------------------------------===//
+
+void Use::zap(Use *Start, const Use *Stop, bool del) {
+ if (del) {
+ while (Start != Stop) {
+ (--Stop)->~Use();
+ }
+ ::operator delete(Start);
+ return;
+ }
+
+ while (Start != Stop) {
+ (Start++)->set(0);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// AugmentedUse layout struct
+//===----------------------------------------------------------------------===//
+
+struct AugmentedUse : Use {
+ PointerIntPair<User*, 1, Tag> ref;
+ AugmentedUse(); // not implemented
+};
+
+
+//===----------------------------------------------------------------------===//
+// Use getUser Implementation
+//===----------------------------------------------------------------------===//
+
+User *Use::getUser() const {
+ const Use *End = getImpliedUser();
+ const PointerIntPair<User*, 1, Tag>& ref(
+ static_cast<const AugmentedUse*>(End - 1)->ref);
+ User *She = ref.getPointer();
+ return ref.getInt()
+ ? She
+ : (User*)End;
+}
+
+//===----------------------------------------------------------------------===//
+// User allocHungoffUses Implementation
+//===----------------------------------------------------------------------===//
+
+Use *User::allocHungoffUses(unsigned N) const {
+ Use *Begin = static_cast<Use*>(::operator new(sizeof(Use) * N
+ + sizeof(AugmentedUse)
+ - sizeof(Use)));
+ Use *End = Begin + N;
+ PointerIntPair<User*, 1, Tag>& ref(static_cast<AugmentedUse&>(End[-1]).ref);
+ ref.setPointer(const_cast<User*>(this));
+ ref.setInt(tagOne);
+ return Use::initTags(Begin, End);
+}
+
+//===----------------------------------------------------------------------===//
+// User operator new Implementations
+//===----------------------------------------------------------------------===//
+
+void *User::operator new(size_t s, unsigned Us) {
+ void *Storage = ::operator new(s + sizeof(Use) * Us);
+ Use *Start = static_cast<Use*>(Storage);
+ Use *End = Start + Us;
+ User *Obj = reinterpret_cast<User*>(End);
+ Obj->OperandList = Start;
+ Obj->NumOperands = Us;
+ Use::initTags(Start, End);
+ return Obj;
+}
+
+/// Prefixed allocation - just before the first Use, allocate a NULL pointer.
+/// The destructor can detect its presence and readjust the OperandList
+/// for deletition.
+///
+void *User::operator new(size_t s, unsigned Us, bool Prefix) {
+ // currently prefixed allocation only admissible for
+ // unconditional branch instructions
+ if (!Prefix)
+ return operator new(s, Us);
+
+ assert(Us == 1 && "Other than one Use allocated?");
+ typedef PointerIntPair<void*, 2, Use::PrevPtrTag> TaggedPrefix;
+ void *Raw = ::operator new(s + sizeof(TaggedPrefix) + sizeof(Use) * Us);
+ TaggedPrefix *Pre = static_cast<TaggedPrefix*>(Raw);
+ Pre->setFromOpaqueValue(0);
+ void *Storage = Pre + 1; // skip over prefix
+ Use *Start = static_cast<Use*>(Storage);
+ Use *End = Start + Us;
+ User *Obj = reinterpret_cast<User*>(End);
+ Obj->OperandList = Start;
+ Obj->NumOperands = Us;
+ Use::initTags(Start, End);
+ return Obj;
+}
+
+//===----------------------------------------------------------------------===//
+// User operator delete Implementation
+//===----------------------------------------------------------------------===//
+
+void User::operator delete(void *Usr) {
+ User *Start = static_cast<User*>(Usr);
+ Use *Storage = static_cast<Use*>(Usr) - Start->NumOperands;
+ //
+ // look for a variadic User
+ if (Storage == Start->OperandList) {
+ ::operator delete(Storage);
+ return;
+ }
+ //
+ // check for the flag whether the destructor has detected a prefixed
+ // allocation, in which case we remove the flag and delete starting
+ // at OperandList
+ if (reinterpret_cast<intptr_t>(Start->OperandList) & 1) {
+ ::operator delete(reinterpret_cast<char*>(Start->OperandList) - 1);
+ return;
+ }
+ //
+ // in all other cases just delete the nullary User (covers hung-off
+ // uses also
+ ::operator delete(Usr);
+}
+
+} // End llvm namespace
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
new file mode 100644
index 0000000..3af161f
--- /dev/null
+++ b/lib/VMCore/Value.cpp
@@ -0,0 +1,581 @@
+//===-- Value.cpp - Implement the Value class -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Value, ValueHandle, and User classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constant.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/ADT/DenseMap.h"
+#include <algorithm>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Value Class
+//===----------------------------------------------------------------------===//
+
+static inline const Type *checkType(const Type *Ty) {
+ assert(Ty && "Value defined with a null type: Error!");
+ return Ty;
+}
+
+Value::Value(const Type *ty, unsigned scid)
+ : SubclassID(scid), HasValueHandle(0), SubclassData(0), VTy(checkType(ty)),
+ UseList(0), Name(0) {
+ if (isa<CallInst>(this) || isa<InvokeInst>(this))
+ assert((VTy->isFirstClassType() || VTy == Type::VoidTy ||
+ isa<OpaqueType>(ty) || VTy->getTypeID() == Type::StructTyID) &&
+ "invalid CallInst type!");
+ else if (!isa<Constant>(this) && !isa<BasicBlock>(this))
+ assert((VTy->isFirstClassType() || VTy == Type::VoidTy ||
+ isa<OpaqueType>(ty)) &&
+ "Cannot create non-first-class values except for constants!");
+}
+
+Value::~Value() {
+ // Notify all ValueHandles (if present) that this value is going away.
+ if (HasValueHandle)
+ ValueHandleBase::ValueIsDeleted(this);
+
+#ifndef NDEBUG // Only in -g mode...
+ // Check to make sure that there are no uses of this value that are still
+ // around when the value is destroyed. If there are, then we have a dangling
+ // reference and something is wrong. This code is here to print out what is
+ // still being referenced. The value in question should be printed as
+ // a <badref>
+ //
+ if (!use_empty()) {
+ cerr << "While deleting: " << *VTy << " %" << getNameStr() << "\n";
+ for (use_iterator I = use_begin(), E = use_end(); I != E; ++I)
+ cerr << "Use still stuck around after Def is destroyed:"
+ << **I << "\n";
+ }
+#endif
+ assert(use_empty() && "Uses remain when a value is destroyed!");
+
+ // If this value is named, destroy the name. This should not be in a symtab
+ // at this point.
+ if (Name)
+ Name->Destroy();
+
+ // There should be no uses of this object anymore, remove it.
+ LeakDetector::removeGarbageObject(this);
+}
+
+/// hasNUses - Return true if this Value has exactly N users.
+///
+bool Value::hasNUses(unsigned N) const {
+ use_const_iterator UI = use_begin(), E = use_end();
+
+ for (; N; --N, ++UI)
+ if (UI == E) return false; // Too few.
+ return UI == E;
+}
+
+/// hasNUsesOrMore - Return true if this value has N users or more. This is
+/// logically equivalent to getNumUses() >= N.
+///
+bool Value::hasNUsesOrMore(unsigned N) const {
+ use_const_iterator UI = use_begin(), E = use_end();
+
+ for (; N; --N, ++UI)
+ if (UI == E) return false; // Too few.
+
+ return true;
+}
+
+/// isUsedInBasicBlock - Return true if this value is used in the specified
+/// basic block.
+bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
+ for (use_const_iterator I = use_begin(), E = use_end(); I != E; ++I) {
+ const Instruction *User = dyn_cast<Instruction>(*I);
+ if (User && User->getParent() == BB)
+ return true;
+ }
+ return false;
+}
+
+
+/// getNumUses - This method computes the number of uses of this Value. This
+/// is a linear time operation. Use hasOneUse or hasNUses to check for specific
+/// values.
+unsigned Value::getNumUses() const {
+ return (unsigned)std::distance(use_begin(), use_end());
+}
+
+static bool getSymTab(Value *V, ValueSymbolTable *&ST) {
+ ST = 0;
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (BasicBlock *P = I->getParent())
+ if (Function *PP = P->getParent())
+ ST = &PP->getValueSymbolTable();
+ } else if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) {
+ if (Function *P = BB->getParent())
+ ST = &P->getValueSymbolTable();
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ if (Module *P = GV->getParent())
+ ST = &P->getValueSymbolTable();
+ } else if (Argument *A = dyn_cast<Argument>(V)) {
+ if (Function *P = A->getParent())
+ ST = &P->getValueSymbolTable();
+ } else {
+ assert(isa<Constant>(V) && "Unknown value type!");
+ return true; // no name is setable for this.
+ }
+ return false;
+}
+
+/// getNameStart - Return a pointer to a null terminated string for this name.
+/// Note that names can have null characters within the string as well as at
+/// their end. This always returns a non-null pointer.
+const char *Value::getNameStart() const {
+ if (Name == 0) return "";
+ return Name->getKeyData();
+}
+
+/// getNameLen - Return the length of the string, correctly handling nul
+/// characters embedded into them.
+unsigned Value::getNameLen() const {
+ return Name ? Name->getKeyLength() : 0;
+}
+
+/// isName - Return true if this value has the name specified by the provided
+/// nul terminated string.
+bool Value::isName(const char *N) const {
+ unsigned InLen = strlen(N);
+ return InLen == getNameLen() && memcmp(getNameStart(), N, InLen) == 0;
+}
+
+
+std::string Value::getNameStr() const {
+ if (Name == 0) return "";
+ return std::string(Name->getKeyData(),
+ Name->getKeyData()+Name->getKeyLength());
+}
+
+void Value::setName(const std::string &name) {
+ setName(&name[0], name.size());
+}
+
+void Value::setName(const char *Name) {
+ setName(Name, Name ? strlen(Name) : 0);
+}
+
+void Value::setName(const char *NameStr, unsigned NameLen) {
+ if (NameLen == 0 && !hasName()) return;
+ assert(getType() != Type::VoidTy && "Cannot assign a name to void values!");
+
+ // Get the symbol table to update for this object.
+ ValueSymbolTable *ST;
+ if (getSymTab(this, ST))
+ return; // Cannot set a name on this value (e.g. constant).
+
+ if (!ST) { // No symbol table to update? Just do the change.
+ if (NameLen == 0) {
+ // Free the name for this value.
+ Name->Destroy();
+ Name = 0;
+ return;
+ }
+
+ if (Name) {
+ // Name isn't changing?
+ if (NameLen == Name->getKeyLength() &&
+ !memcmp(Name->getKeyData(), NameStr, NameLen))
+ return;
+ Name->Destroy();
+ }
+
+ // NOTE: Could optimize for the case the name is shrinking to not deallocate
+ // then reallocated.
+
+ // Create the new name.
+ Name = ValueName::Create(NameStr, NameStr+NameLen);
+ Name->setValue(this);
+ return;
+ }
+
+ // NOTE: Could optimize for the case the name is shrinking to not deallocate
+ // then reallocated.
+ if (hasName()) {
+ // Name isn't changing?
+ if (NameLen == Name->getKeyLength() &&
+ !memcmp(Name->getKeyData(), NameStr, NameLen))
+ return;
+
+ // Remove old name.
+ ST->removeValueName(Name);
+ Name->Destroy();
+ Name = 0;
+
+ if (NameLen == 0)
+ return;
+ }
+
+ // Name is changing to something new.
+ Name = ST->createValueName(NameStr, NameLen, this);
+}
+
+
+/// takeName - transfer the name from V to this value, setting V's name to
+/// empty. It is an error to call V->takeName(V).
+void Value::takeName(Value *V) {
+ ValueSymbolTable *ST = 0;
+ // If this value has a name, drop it.
+ if (hasName()) {
+ // Get the symtab this is in.
+ if (getSymTab(this, ST)) {
+ // We can't set a name on this value, but we need to clear V's name if
+ // it has one.
+ if (V->hasName()) V->setName(0, 0);
+ return; // Cannot set a name on this value (e.g. constant).
+ }
+
+ // Remove old name.
+ if (ST)
+ ST->removeValueName(Name);
+ Name->Destroy();
+ Name = 0;
+ }
+
+ // Now we know that this has no name.
+
+ // If V has no name either, we're done.
+ if (!V->hasName()) return;
+
+ // Get this's symtab if we didn't before.
+ if (!ST) {
+ if (getSymTab(this, ST)) {
+ // Clear V's name.
+ V->setName(0, 0);
+ return; // Cannot set a name on this value (e.g. constant).
+ }
+ }
+
+ // Get V's ST, this should always succed, because V has a name.
+ ValueSymbolTable *VST;
+ bool Failure = getSymTab(V, VST);
+ assert(!Failure && "V has a name, so it should have a ST!"); Failure=Failure;
+
+ // If these values are both in the same symtab, we can do this very fast.
+ // This works even if both values have no symtab yet.
+ if (ST == VST) {
+ // Take the name!
+ Name = V->Name;
+ V->Name = 0;
+ Name->setValue(this);
+ return;
+ }
+
+ // Otherwise, things are slightly more complex. Remove V's name from VST and
+ // then reinsert it into ST.
+
+ if (VST)
+ VST->removeValueName(V->Name);
+ Name = V->Name;
+ V->Name = 0;
+ Name->setValue(this);
+
+ if (ST)
+ ST->reinsertValue(this);
+}
+
+
+// uncheckedReplaceAllUsesWith - This is exactly the same as replaceAllUsesWith,
+// except that it doesn't have all of the asserts. The asserts fail because we
+// are half-way done resolving types, which causes some types to exist as two
+// different Type*'s at the same time. This is a sledgehammer to work around
+// this problem.
+//
+void Value::uncheckedReplaceAllUsesWith(Value *New) {
+ // Notify all ValueHandles (if present) that this value is going away.
+ if (HasValueHandle)
+ ValueHandleBase::ValueIsRAUWd(this, New);
+
+ while (!use_empty()) {
+ Use &U = *UseList;
+ // Must handle Constants specially, we cannot call replaceUsesOfWith on a
+ // constant because they are uniqued.
+ if (Constant *C = dyn_cast<Constant>(U.getUser())) {
+ if (!isa<GlobalValue>(C)) {
+ C->replaceUsesOfWithOnConstant(this, New, &U);
+ continue;
+ }
+ }
+
+ U.set(New);
+ }
+}
+
+void Value::replaceAllUsesWith(Value *New) {
+ assert(New && "Value::replaceAllUsesWith(<null>) is invalid!");
+ assert(New != this && "this->replaceAllUsesWith(this) is NOT valid!");
+ assert(New->getType() == getType() &&
+ "replaceAllUses of value with new value of different type!");
+
+ uncheckedReplaceAllUsesWith(New);
+}
+
+Value *Value::stripPointerCasts() {
+ if (!isa<PointerType>(getType()))
+ return this;
+ Value *V = this;
+ do {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
+ if (!CE->getOperand(i)->isNullValue())
+ return V;
+ V = CE->getOperand(0);
+ } else if (CE->getOpcode() == Instruction::BitCast) {
+ V = CE->getOperand(0);
+ } else {
+ return V;
+ }
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
+ if (!GEP->hasAllZeroIndices())
+ return V;
+ V = GEP->getOperand(0);
+ } else if (BitCastInst *CI = dyn_cast<BitCastInst>(V)) {
+ V = CI->getOperand(0);
+ } else {
+ return V;
+ }
+ assert(isa<PointerType>(V->getType()) && "Unexpected operand type!");
+ } while (1);
+}
+
+Value *Value::getUnderlyingObject() {
+ if (!isa<PointerType>(getType()))
+ return this;
+ Value *V = this;
+ unsigned MaxLookup = 6;
+ do {
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (!isa<BitCastInst>(I) && !isa<GetElementPtrInst>(I))
+ return V;
+ V = I->getOperand(0);
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (CE->getOpcode() != Instruction::BitCast &&
+ CE->getOpcode() != Instruction::GetElementPtr)
+ return V;
+ V = CE->getOperand(0);
+ } else {
+ return V;
+ }
+ assert(isa<PointerType>(V->getType()) && "Unexpected operand type!");
+ } while (--MaxLookup);
+ return V;
+}
+
+/// DoPHITranslation - If this value is a PHI node with CurBB as its parent,
+/// return the value in the PHI node corresponding to PredBB. If not, return
+/// ourself. This is useful if you want to know the value something has in a
+/// predecessor block.
+Value *Value::DoPHITranslation(const BasicBlock *CurBB,
+ const BasicBlock *PredBB) {
+ PHINode *PN = dyn_cast<PHINode>(this);
+ if (PN && PN->getParent() == CurBB)
+ return PN->getIncomingValueForBlock(PredBB);
+ return this;
+}
+
+//===----------------------------------------------------------------------===//
+// ValueHandleBase Class
+//===----------------------------------------------------------------------===//
+
+/// ValueHandles - This map keeps track of all of the value handles that are
+/// watching a Value*. The Value::HasValueHandle bit is used to know whether or
+/// not a value has an entry in this map.
+typedef DenseMap<Value*, ValueHandleBase*> ValueHandlesTy;
+static ManagedStatic<ValueHandlesTy> ValueHandles;
+
+/// AddToExistingUseList - Add this ValueHandle to the use list for VP, where
+/// List is known to point into the existing use list.
+void ValueHandleBase::AddToExistingUseList(ValueHandleBase **List) {
+ assert(List && "Handle list is null?");
+
+ // Splice ourselves into the list.
+ Next = *List;
+ *List = this;
+ setPrevPtr(List);
+ if (Next) {
+ Next->setPrevPtr(&Next);
+ assert(VP == Next->VP && "Added to wrong list?");
+ }
+}
+
+/// AddToUseList - Add this ValueHandle to the use list for VP.
+void ValueHandleBase::AddToUseList() {
+ assert(VP && "Null pointer doesn't have a use list!");
+ if (VP->HasValueHandle) {
+ // If this value already has a ValueHandle, then it must be in the
+ // ValueHandles map already.
+ ValueHandleBase *&Entry = (*ValueHandles)[VP];
+ assert(Entry != 0 && "Value doesn't have any handles?");
+ return AddToExistingUseList(&Entry);
+ }
+
+ // Ok, it doesn't have any handles yet, so we must insert it into the
+ // DenseMap. However, doing this insertion could cause the DenseMap to
+ // reallocate itself, which would invalidate all of the PrevP pointers that
+ // point into the old table. Handle this by checking for reallocation and
+ // updating the stale pointers only if needed.
+ ValueHandlesTy &Handles = *ValueHandles;
+ const void *OldBucketPtr = Handles.getPointerIntoBucketsArray();
+
+ ValueHandleBase *&Entry = Handles[VP];
+ assert(Entry == 0 && "Value really did already have handles?");
+ AddToExistingUseList(&Entry);
+ VP->HasValueHandle = true;
+
+ // If reallocation didn't happen or if this was the first insertion, don't
+ // walk the table.
+ if (Handles.isPointerIntoBucketsArray(OldBucketPtr) ||
+ Handles.size() == 1)
+ return;
+
+ // Okay, reallocation did happen. Fix the Prev Pointers.
+ for (ValueHandlesTy::iterator I = Handles.begin(), E = Handles.end();
+ I != E; ++I) {
+ assert(I->second && I->first == I->second->VP && "List invariant broken!");
+ I->second->setPrevPtr(&I->second);
+ }
+}
+
+/// RemoveFromUseList - Remove this ValueHandle from its current use list.
+void ValueHandleBase::RemoveFromUseList() {
+ assert(VP && VP->HasValueHandle && "Pointer doesn't have a use list!");
+
+ // Unlink this from its use list.
+ ValueHandleBase **PrevPtr = getPrevPtr();
+ assert(*PrevPtr == this && "List invariant broken");
+
+ *PrevPtr = Next;
+ if (Next) {
+ assert(Next->getPrevPtr() == &Next && "List invariant broken");
+ Next->setPrevPtr(PrevPtr);
+ return;
+ }
+
+ // If the Next pointer was null, then it is possible that this was the last
+ // ValueHandle watching VP. If so, delete its entry from the ValueHandles
+ // map.
+ ValueHandlesTy &Handles = *ValueHandles;
+ if (Handles.isPointerIntoBucketsArray(PrevPtr)) {
+ Handles.erase(VP);
+ VP->HasValueHandle = false;
+ }
+}
+
+
+void ValueHandleBase::ValueIsDeleted(Value *V) {
+ assert(V->HasValueHandle && "Should only be called if ValueHandles present");
+
+ // Get the linked list base, which is guaranteed to exist since the
+ // HasValueHandle flag is set.
+ ValueHandleBase *Entry = (*ValueHandles)[V];
+ assert(Entry && "Value bit set but no entries exist");
+
+ while (Entry) {
+ // Advance pointer to avoid invalidation.
+ ValueHandleBase *ThisNode = Entry;
+ Entry = Entry->Next;
+
+ switch (ThisNode->getKind()) {
+ case Assert:
+#ifndef NDEBUG // Only in -g mode...
+ cerr << "While deleting: " << *V->getType() << " %" << V->getNameStr()
+ << "\n";
+#endif
+ cerr << "An asserting value handle still pointed to this value!\n";
+ abort();
+ case Weak:
+ // Weak just goes to null, which will unlink it from the list.
+ ThisNode->operator=(0);
+ break;
+ case Callback:
+ // Forward to the subclass's implementation.
+ static_cast<CallbackVH*>(ThisNode)->deleted();
+ break;
+ }
+ }
+
+ // All callbacks and weak references should be dropped by now.
+ assert(!V->HasValueHandle && "All references to V were not removed?");
+}
+
+
+void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
+ assert(Old->HasValueHandle &&"Should only be called if ValueHandles present");
+ assert(Old != New && "Changing value into itself!");
+
+ // Get the linked list base, which is guaranteed to exist since the
+ // HasValueHandle flag is set.
+ ValueHandleBase *Entry = (*ValueHandles)[Old];
+ assert(Entry && "Value bit set but no entries exist");
+
+ while (Entry) {
+ // Advance pointer to avoid invalidation.
+ ValueHandleBase *ThisNode = Entry;
+ Entry = Entry->Next;
+
+ switch (ThisNode->getKind()) {
+ case Assert:
+ // Asserting handle does not follow RAUW implicitly.
+ break;
+ case Weak:
+ // Weak goes to the new value, which will unlink it from Old's list.
+ ThisNode->operator=(New);
+ break;
+ case Callback:
+ // Forward to the subclass's implementation.
+ static_cast<CallbackVH*>(ThisNode)->allUsesReplacedWith(New);
+ break;
+ }
+ }
+}
+
+/// ~CallbackVH. Empty, but defined here to avoid emitting the vtable
+/// more than once.
+CallbackVH::~CallbackVH() {}
+
+
+//===----------------------------------------------------------------------===//
+// User Class
+//===----------------------------------------------------------------------===//
+
+// replaceUsesOfWith - Replaces all references to the "From" definition with
+// references to the "To" definition.
+//
+void User::replaceUsesOfWith(Value *From, Value *To) {
+ if (From == To) return; // Duh what?
+
+ assert((!isa<Constant>(this) || isa<GlobalValue>(this)) &&
+ "Cannot call User::replaceUsesofWith on a constant!");
+
+ for (unsigned i = 0, E = getNumOperands(); i != E; ++i)
+ if (getOperand(i) == From) { // Is This operand is pointing to oldval?
+ // The side effects of this setOperand call include linking to
+ // "To", adding "this" to the uses list of To, and
+ // most importantly, removing "this" from the use list of "From".
+ setOperand(i, To); // Fix it now...
+ }
+}
+
diff --git a/lib/VMCore/ValueSymbolTable.cpp b/lib/VMCore/ValueSymbolTable.cpp
new file mode 100644
index 0000000..eee18a1
--- /dev/null
+++ b/lib/VMCore/ValueSymbolTable.cpp
@@ -0,0 +1,137 @@
+//===-- ValueSymbolTable.cpp - Implement the ValueSymbolTable class -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ValueSymbolTable class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "valuesymtab"
+#include "llvm/GlobalValue.h"
+#include "llvm/Type.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+// Class destructor
+ValueSymbolTable::~ValueSymbolTable() {
+#ifndef NDEBUG // Only do this in -g mode...
+ for (iterator VI = vmap.begin(), VE = vmap.end(); VI != VE; ++VI)
+ cerr << "Value still in symbol table! Type = '"
+ << VI->getValue()->getType()->getDescription() << "' Name = '"
+ << VI->getKeyData() << "'\n";
+ assert(vmap.empty() && "Values remain in symbol table!");
+#endif
+}
+
+// lookup a value - Returns null on failure...
+//
+Value *ValueSymbolTable::lookup(const std::string &Name) const {
+ const_iterator VI = vmap.find(Name.data(), Name.data() + Name.size());
+ if (VI != vmap.end()) // We found the symbol
+ return VI->getValue();
+ return 0;
+}
+
+Value *ValueSymbolTable::lookup(const char *NameBegin,
+ const char *NameEnd) const {
+ const_iterator VI = vmap.find(NameBegin, NameEnd);
+ if (VI != vmap.end()) // We found the symbol
+ return VI->getValue();
+ return 0;
+}
+
+// Insert a value into the symbol table with the specified name...
+//
+void ValueSymbolTable::reinsertValue(Value* V) {
+ assert(V->hasName() && "Can't insert nameless Value into symbol table");
+
+ // Try inserting the name, assuming it won't conflict.
+ if (vmap.insert(V->Name)) {
+ //DOUT << " Inserted value: " << V->Name << ": " << *V << "\n";
+ return;
+ }
+
+ // Otherwise, there is a naming conflict. Rename this value.
+ SmallString<128> UniqueName(V->getNameStart(), V->getNameEnd());
+
+ // The name is too already used, just free it so we can allocate a new name.
+ V->Name->Destroy();
+
+ unsigned BaseSize = UniqueName.size();
+ while (1) {
+ // Trim any suffix off.
+ UniqueName.resize(BaseSize);
+ UniqueName.append_uint_32(++LastUnique);
+ // Try insert the vmap entry with this suffix.
+ ValueName &NewName =
+ vmap.GetOrCreateValue(UniqueName.data(),
+ UniqueName.data() + UniqueName.size());
+ if (NewName.getValue() == 0) {
+ // Newly inserted name. Success!
+ NewName.setValue(V);
+ V->Name = &NewName;
+ //DEBUG(DOUT << " Inserted value: " << UniqueName << ": " << *V << "\n");
+ return;
+ }
+ }
+}
+
+void ValueSymbolTable::removeValueName(ValueName *V) {
+ //DEBUG(DOUT << " Removing Value: " << V->getKeyData() << "\n");
+ // Remove the value from the symbol table.
+ vmap.remove(V);
+}
+
+/// createValueName - This method attempts to create a value name and insert
+/// it into the symbol table with the specified name. If it conflicts, it
+/// auto-renames the name and returns that instead.
+ValueName *ValueSymbolTable::createValueName(const char *NameStart,
+ unsigned NameLen, Value *V) {
+ // In the common case, the name is not already in the symbol table.
+ ValueName &Entry = vmap.GetOrCreateValue(NameStart, NameStart+NameLen);
+ if (Entry.getValue() == 0) {
+ Entry.setValue(V);
+ //DEBUG(DOUT << " Inserted value: " << Entry.getKeyData() << ": "
+ // << *V << "\n");
+ return &Entry;
+ }
+
+ // Otherwise, there is a naming conflict. Rename this value.
+ SmallString<128> UniqueName(NameStart, NameStart+NameLen);
+
+ while (1) {
+ // Trim any suffix off.
+ UniqueName.resize(NameLen);
+ UniqueName.append_uint_32(++LastUnique);
+
+ // Try insert the vmap entry with this suffix.
+ ValueName &NewName =
+ vmap.GetOrCreateValue(UniqueName.data(),
+ UniqueName.data() + UniqueName.size());
+ if (NewName.getValue() == 0) {
+ // Newly inserted name. Success!
+ NewName.setValue(V);
+ //DEBUG(DOUT << " Inserted value: " << UniqueName << ": " << *V << "\n");
+ return &NewName;
+ }
+ }
+}
+
+
+// dump - print out the symbol table
+//
+void ValueSymbolTable::dump() const {
+ //DOUT << "ValueSymbolTable:\n";
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ //DOUT << " '" << I->getKeyData() << "' = ";
+ I->getValue()->dump();
+ //DOUT << "\n";
+ }
+}
diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp
new file mode 100644
index 0000000..fe4af05
--- /dev/null
+++ b/lib/VMCore/ValueTypes.cpp
@@ -0,0 +1,185 @@
+//===----------- ValueTypes.cpp - Implementation of MVT methods -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements methods in the CodeGen/ValueTypes.h header.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Type.h"
+#include "llvm/DerivedTypes.h"
+using namespace llvm;
+
+MVT MVT::getExtendedIntegerVT(unsigned BitWidth) {
+ MVT VT;
+ VT.LLVMTy = IntegerType::get(BitWidth);
+ assert(VT.isExtended() && "Type is not extended!");
+ return VT;
+}
+
+MVT MVT::getExtendedVectorVT(MVT VT, unsigned NumElements) {
+ MVT ResultVT;
+ ResultVT.LLVMTy = VectorType::get(VT.getTypeForMVT(), NumElements);
+ assert(ResultVT.isExtended() && "Type is not extended!");
+ return ResultVT;
+}
+
+bool MVT::isExtendedFloatingPoint() const {
+ assert(isExtended() && "Type is not extended!");
+ return LLVMTy->isFPOrFPVector();
+}
+
+bool MVT::isExtendedInteger() const {
+ assert(isExtended() && "Type is not extended!");
+ return LLVMTy->isIntOrIntVector();
+}
+
+bool MVT::isExtendedVector() const {
+ assert(isExtended() && "Type is not extended!");
+ return isa<VectorType>(LLVMTy);
+}
+
+bool MVT::isExtended64BitVector() const {
+ return isExtendedVector() && getSizeInBits() == 64;
+}
+
+bool MVT::isExtended128BitVector() const {
+ return isExtendedVector() && getSizeInBits() == 128;
+}
+
+MVT MVT::getExtendedVectorElementType() const {
+ assert(isExtended() && "Type is not extended!");
+ return MVT::getMVT(cast<VectorType>(LLVMTy)->getElementType());
+}
+
+unsigned MVT::getExtendedVectorNumElements() const {
+ assert(isExtended() && "Type is not extended!");
+ return cast<VectorType>(LLVMTy)->getNumElements();
+}
+
+unsigned MVT::getExtendedSizeInBits() const {
+ assert(isExtended() && "Type is not extended!");
+ if (const IntegerType *ITy = dyn_cast<IntegerType>(LLVMTy))
+ return ITy->getBitWidth();
+ if (const VectorType *VTy = dyn_cast<VectorType>(LLVMTy))
+ return VTy->getBitWidth();
+ assert(false && "Unrecognized extended type!");
+ return 0; // Suppress warnings.
+}
+
+/// getMVTString - This function returns value type as a string, e.g. "i32".
+std::string MVT::getMVTString() const {
+ switch (V) {
+ default:
+ if (isVector())
+ return "v" + utostr(getVectorNumElements()) +
+ getVectorElementType().getMVTString();
+ if (isInteger())
+ return "i" + utostr(getSizeInBits());
+ assert(0 && "Invalid MVT!");
+ return "?";
+ case MVT::i1: return "i1";
+ case MVT::i8: return "i8";
+ case MVT::i16: return "i16";
+ case MVT::i32: return "i32";
+ case MVT::i64: return "i64";
+ case MVT::i128: return "i128";
+ case MVT::f32: return "f32";
+ case MVT::f64: return "f64";
+ case MVT::f80: return "f80";
+ case MVT::f128: return "f128";
+ case MVT::ppcf128: return "ppcf128";
+ case MVT::isVoid: return "isVoid";
+ case MVT::Other: return "ch";
+ case MVT::Flag: return "flag";
+ case MVT::v2i8: return "v2i8";
+ case MVT::v4i8: return "v4i8";
+ case MVT::v2i16: return "v2i16";
+ case MVT::v8i8: return "v8i8";
+ case MVT::v4i16: return "v4i16";
+ case MVT::v2i32: return "v2i32";
+ case MVT::v1i64: return "v1i64";
+ case MVT::v16i8: return "v16i8";
+ case MVT::v8i16: return "v8i16";
+ case MVT::v4i32: return "v4i32";
+ case MVT::v2i64: return "v2i64";
+ case MVT::v2f32: return "v2f32";
+ case MVT::v4f32: return "v4f32";
+ case MVT::v2f64: return "v2f64";
+ case MVT::v3i32: return "v3i32";
+ case MVT::v3f32: return "v3f32";
+ }
+}
+
+/// getTypeForMVT - This method returns an LLVM type corresponding to the
+/// specified MVT. For integer types, this returns an unsigned type. Note
+/// that this will abort for types that cannot be represented.
+const Type *MVT::getTypeForMVT() const {
+ switch (V) {
+ default:
+ assert(isExtended() && "Type is not extended!");
+ return LLVMTy;
+ case MVT::isVoid: return Type::VoidTy;
+ case MVT::i1: return Type::Int1Ty;
+ case MVT::i8: return Type::Int8Ty;
+ case MVT::i16: return Type::Int16Ty;
+ case MVT::i32: return Type::Int32Ty;
+ case MVT::i64: return Type::Int64Ty;
+ case MVT::i128: return IntegerType::get(128);
+ case MVT::f32: return Type::FloatTy;
+ case MVT::f64: return Type::DoubleTy;
+ case MVT::f80: return Type::X86_FP80Ty;
+ case MVT::f128: return Type::FP128Ty;
+ case MVT::ppcf128: return Type::PPC_FP128Ty;
+ case MVT::v2i8: return VectorType::get(Type::Int8Ty, 2);
+ case MVT::v4i8: return VectorType::get(Type::Int8Ty, 4);
+ case MVT::v2i16: return VectorType::get(Type::Int16Ty, 2);
+ case MVT::v8i8: return VectorType::get(Type::Int8Ty, 8);
+ case MVT::v4i16: return VectorType::get(Type::Int16Ty, 4);
+ case MVT::v2i32: return VectorType::get(Type::Int32Ty, 2);
+ case MVT::v1i64: return VectorType::get(Type::Int64Ty, 1);
+ case MVT::v16i8: return VectorType::get(Type::Int8Ty, 16);
+ case MVT::v8i16: return VectorType::get(Type::Int16Ty, 8);
+ case MVT::v4i32: return VectorType::get(Type::Int32Ty, 4);
+ case MVT::v2i64: return VectorType::get(Type::Int64Ty, 2);
+ case MVT::v2f32: return VectorType::get(Type::FloatTy, 2);
+ case MVT::v4f32: return VectorType::get(Type::FloatTy, 4);
+ case MVT::v2f64: return VectorType::get(Type::DoubleTy, 2);
+ case MVT::v3i32: return VectorType::get(Type::Int32Ty, 3);
+ case MVT::v3f32: return VectorType::get(Type::FloatTy, 3);
+ }
+}
+
+/// getMVT - Return the value type corresponding to the specified type. This
+/// returns all pointers as MVT::iPTR. If HandleUnknown is true, unknown types
+/// are returned as Other, otherwise they are invalid.
+MVT MVT::getMVT(const Type *Ty, bool HandleUnknown){
+ switch (Ty->getTypeID()) {
+ default:
+ if (HandleUnknown) return MVT::Other;
+ assert(0 && "Unknown type!");
+ return MVT::isVoid;
+ case Type::VoidTyID:
+ return MVT::isVoid;
+ case Type::IntegerTyID:
+ return getIntegerVT(cast<IntegerType>(Ty)->getBitWidth());
+ case Type::FloatTyID: return MVT::f32;
+ case Type::DoubleTyID: return MVT::f64;
+ case Type::X86_FP80TyID: return MVT::f80;
+ case Type::FP128TyID: return MVT::f128;
+ case Type::PPC_FP128TyID: return MVT::ppcf128;
+ case Type::PointerTyID: return MVT::iPTR;
+ case Type::VectorTyID: {
+ const VectorType *VTy = cast<VectorType>(Ty);
+ return getVectorVT(getMVT(VTy->getElementType(), false),
+ VTy->getNumElements());
+ }
+ }
+}
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
new file mode 100644
index 0000000..59ec3be
--- /dev/null
+++ b/lib/VMCore/Verifier.cpp
@@ -0,0 +1,1770 @@
+//===-- Verifier.cpp - Implement the Module Verifier -------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the function verifier interface, that can be used for some
+// sanity checking of input to the system.
+//
+// Note that this does not provide full `Java style' security and verifications,
+// instead it just tries to ensure that code is well-formed.
+//
+// * Both of a binary operator's parameters are of the same type
+// * Verify that the indices of mem access instructions match other operands
+// * Verify that arithmetic and other things are only performed on first-class
+// types. Verify that shifts & logicals only happen on integrals f.e.
+// * All of the constants in a switch statement are of the correct type
+// * The code is in valid SSA form
+// * It should be illegal to put a label into any other type (like a structure)
+// or to return one. [except constant arrays!]
+// * Only phi nodes can be self referential: 'add i32 %0, %0 ; <int>:0' is bad
+// * PHI nodes must have an entry for each predecessor, with no extras.
+// * PHI nodes must be the first thing in a basic block, all grouped together
+// * PHI nodes must have at least one entry
+// * All basic blocks should only end with terminator insts, not contain them
+// * The entry node to a function must not have predecessors
+// * All Instructions must be embedded into a basic block
+// * Functions cannot take a void-typed parameter
+// * Verify that a function's argument list agrees with it's declared type.
+// * It is illegal to specify a name for a void value.
+// * It is illegal to have a internal global value with no initializer
+// * It is illegal to have a ret instruction that returns a value that does not
+// agree with the function return value type.
+// * Function call argument types match the function prototype
+// * All other things that are tested by asserts spread about the code...
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/MDNode.h"
+#include "llvm/Module.h"
+#include "llvm/ModuleProvider.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <sstream>
+#include <cstdarg>
+using namespace llvm;
+
+namespace { // Anonymous namespace for class
+ struct VISIBILITY_HIDDEN PreVerifier : public FunctionPass {
+ static char ID; // Pass ID, replacement for typeid
+
+ PreVerifier() : FunctionPass(&ID) { }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ // Check that the prerequisites for successful DominatorTree construction
+ // are satisfied.
+ bool runOnFunction(Function &F) {
+ bool Broken = false;
+
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ if (I->empty() || !I->back().isTerminator()) {
+ cerr << "Basic Block does not have terminator!\n";
+ WriteAsOperand(*cerr, I, true);
+ cerr << "\n";
+ Broken = true;
+ }
+ }
+
+ if (Broken)
+ abort();
+
+ return false;
+ }
+ };
+}
+
+char PreVerifier::ID = 0;
+static RegisterPass<PreVerifier>
+PreVer("preverify", "Preliminary module verification");
+static const PassInfo *const PreVerifyID = &PreVer;
+
+namespace {
+ struct VISIBILITY_HIDDEN
+ Verifier : public FunctionPass, InstVisitor<Verifier> {
+ static char ID; // Pass ID, replacement for typeid
+ bool Broken; // Is this module found to be broken?
+ bool RealPass; // Are we not being run by a PassManager?
+ VerifierFailureAction action;
+ // What to do if verification fails.
+ Module *Mod; // Module we are verifying right now
+ DominatorTree *DT; // Dominator Tree, caution can be null!
+ std::stringstream msgs; // A stringstream to collect messages
+
+ /// InstInThisBlock - when verifying a basic block, keep track of all of the
+ /// instructions we have seen so far. This allows us to do efficient
+ /// dominance checks for the case when an instruction has an operand that is
+ /// an instruction in the same block.
+ SmallPtrSet<Instruction*, 16> InstsInThisBlock;
+
+ Verifier()
+ : FunctionPass(&ID),
+ Broken(false), RealPass(true), action(AbortProcessAction),
+ DT(0), msgs( std::ios::app | std::ios::out ) {}
+ explicit Verifier(VerifierFailureAction ctn)
+ : FunctionPass(&ID),
+ Broken(false), RealPass(true), action(ctn), DT(0),
+ msgs( std::ios::app | std::ios::out ) {}
+ explicit Verifier(bool AB)
+ : FunctionPass(&ID),
+ Broken(false), RealPass(true),
+ action( AB ? AbortProcessAction : PrintMessageAction), DT(0),
+ msgs( std::ios::app | std::ios::out ) {}
+ explicit Verifier(DominatorTree &dt)
+ : FunctionPass(&ID),
+ Broken(false), RealPass(false), action(PrintMessageAction),
+ DT(&dt), msgs( std::ios::app | std::ios::out ) {}
+
+
+ bool doInitialization(Module &M) {
+ Mod = &M;
+ verifyTypeSymbolTable(M.getTypeSymbolTable());
+
+ // If this is a real pass, in a pass manager, we must abort before
+ // returning back to the pass manager, or else the pass manager may try to
+ // run other passes on the broken module.
+ if (RealPass)
+ return abortIfBroken();
+ return false;
+ }
+
+ bool runOnFunction(Function &F) {
+ // Get dominator information if we are being run by PassManager
+ if (RealPass) DT = &getAnalysis<DominatorTree>();
+
+ Mod = F.getParent();
+
+ visit(F);
+ InstsInThisBlock.clear();
+
+ // If this is a real pass, in a pass manager, we must abort before
+ // returning back to the pass manager, or else the pass manager may try to
+ // run other passes on the broken module.
+ if (RealPass)
+ return abortIfBroken();
+
+ return false;
+ }
+
+ bool doFinalization(Module &M) {
+ // Scan through, checking all of the external function's linkage now...
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ visitGlobalValue(*I);
+
+ // Check to make sure function prototypes are okay.
+ if (I->isDeclaration()) visitFunction(*I);
+ }
+
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ visitGlobalVariable(*I);
+
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E; ++I)
+ visitGlobalAlias(*I);
+
+ // If the module is broken, abort at this time.
+ return abortIfBroken();
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredID(PreVerifyID);
+ if (RealPass)
+ AU.addRequired<DominatorTree>();
+ }
+
+ /// abortIfBroken - If the module is broken and we are supposed to abort on
+ /// this condition, do so.
+ ///
+ bool abortIfBroken() {
+ if (!Broken) return false;
+ msgs << "Broken module found, ";
+ switch (action) {
+ default: assert(0 && "Unknown action");
+ case AbortProcessAction:
+ msgs << "compilation aborted!\n";
+ cerr << msgs.str();
+ abort();
+ case PrintMessageAction:
+ msgs << "verification continues.\n";
+ cerr << msgs.str();
+ return false;
+ case ReturnStatusAction:
+ msgs << "compilation terminated.\n";
+ return true;
+ }
+ }
+
+
+ // Verification methods...
+ void verifyTypeSymbolTable(TypeSymbolTable &ST);
+ void visitGlobalValue(GlobalValue &GV);
+ void visitGlobalVariable(GlobalVariable &GV);
+ void visitGlobalAlias(GlobalAlias &GA);
+ void visitFunction(Function &F);
+ void visitBasicBlock(BasicBlock &BB);
+ using InstVisitor<Verifier>::visit;
+
+ void visit(Instruction &I);
+
+ void visitTruncInst(TruncInst &I);
+ void visitZExtInst(ZExtInst &I);
+ void visitSExtInst(SExtInst &I);
+ void visitFPTruncInst(FPTruncInst &I);
+ void visitFPExtInst(FPExtInst &I);
+ void visitFPToUIInst(FPToUIInst &I);
+ void visitFPToSIInst(FPToSIInst &I);
+ void visitUIToFPInst(UIToFPInst &I);
+ void visitSIToFPInst(SIToFPInst &I);
+ void visitIntToPtrInst(IntToPtrInst &I);
+ void visitPtrToIntInst(PtrToIntInst &I);
+ void visitBitCastInst(BitCastInst &I);
+ void visitPHINode(PHINode &PN);
+ void visitBinaryOperator(BinaryOperator &B);
+ void visitICmpInst(ICmpInst &IC);
+ void visitFCmpInst(FCmpInst &FC);
+ void visitExtractElementInst(ExtractElementInst &EI);
+ void visitInsertElementInst(InsertElementInst &EI);
+ void visitShuffleVectorInst(ShuffleVectorInst &EI);
+ void visitVAArgInst(VAArgInst &VAA) { visitInstruction(VAA); }
+ void visitCallInst(CallInst &CI);
+ void visitInvokeInst(InvokeInst &II);
+ void visitGetElementPtrInst(GetElementPtrInst &GEP);
+ void visitLoadInst(LoadInst &LI);
+ void visitStoreInst(StoreInst &SI);
+ void visitInstruction(Instruction &I);
+ void visitTerminatorInst(TerminatorInst &I);
+ void visitReturnInst(ReturnInst &RI);
+ void visitSwitchInst(SwitchInst &SI);
+ void visitSelectInst(SelectInst &SI);
+ void visitUserOp1(Instruction &I);
+ void visitUserOp2(Instruction &I) { visitUserOp1(I); }
+ void visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI);
+ void visitAllocationInst(AllocationInst &AI);
+ void visitExtractValueInst(ExtractValueInst &EVI);
+ void visitInsertValueInst(InsertValueInst &IVI);
+
+ void VerifyCallSite(CallSite CS);
+ bool PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
+ int VT, unsigned ArgNo, std::string &Suffix);
+ void VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F,
+ unsigned RetNum, unsigned ParamNum, ...);
+ void VerifyAttrs(Attributes Attrs, const Type *Ty,
+ bool isReturnValue, const Value *V);
+ void VerifyFunctionAttrs(const FunctionType *FT, const AttrListPtr &Attrs,
+ const Value *V);
+ bool VerifyMDNode(const MDNode *N);
+
+ void WriteValue(const Value *V) {
+ if (!V) return;
+ if (isa<Instruction>(V)) {
+ msgs << *V;
+ } else {
+ WriteAsOperand(msgs, V, true, Mod);
+ msgs << "\n";
+ }
+ }
+
+ void WriteType(const Type *T) {
+ if (!T) return;
+ raw_os_ostream RO(msgs);
+ RO << ' ';
+ WriteTypeSymbolic(RO, T, Mod);
+ }
+
+
+ // CheckFailed - A check failed, so print out the condition and the message
+ // that failed. This provides a nice place to put a breakpoint if you want
+ // to see why something is not correct.
+ void CheckFailed(const std::string &Message,
+ const Value *V1 = 0, const Value *V2 = 0,
+ const Value *V3 = 0, const Value *V4 = 0) {
+ msgs << Message << "\n";
+ WriteValue(V1);
+ WriteValue(V2);
+ WriteValue(V3);
+ WriteValue(V4);
+ Broken = true;
+ }
+
+ void CheckFailed( const std::string& Message, const Value* V1,
+ const Type* T2, const Value* V3 = 0 ) {
+ msgs << Message << "\n";
+ WriteValue(V1);
+ WriteType(T2);
+ WriteValue(V3);
+ Broken = true;
+ }
+ };
+} // End anonymous namespace
+
+char Verifier::ID = 0;
+static RegisterPass<Verifier> X("verify", "Module Verifier");
+
+// Assert - We know that cond should be true, if not print an error message.
+#define Assert(C, M) \
+ do { if (!(C)) { CheckFailed(M); return; } } while (0)
+#define Assert1(C, M, V1) \
+ do { if (!(C)) { CheckFailed(M, V1); return; } } while (0)
+#define Assert2(C, M, V1, V2) \
+ do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0)
+#define Assert3(C, M, V1, V2, V3) \
+ do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0)
+#define Assert4(C, M, V1, V2, V3, V4) \
+ do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0)
+
+void Verifier::visit(Instruction &I) {
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+ Assert1(I.getOperand(i) != 0, "Operand is null", &I);
+ InstVisitor<Verifier>::visit(I);
+}
+
+
+void Verifier::visitGlobalValue(GlobalValue &GV) {
+ Assert1(!GV.isDeclaration() ||
+ GV.hasExternalLinkage() ||
+ GV.hasDLLImportLinkage() ||
+ GV.hasExternalWeakLinkage() ||
+ GV.hasGhostLinkage() ||
+ (isa<GlobalAlias>(GV) &&
+ (GV.hasLocalLinkage() || GV.hasWeakLinkage())),
+ "Global is external, but doesn't have external or dllimport or weak linkage!",
+ &GV);
+
+ Assert1(!GV.hasDLLImportLinkage() || GV.isDeclaration(),
+ "Global is marked as dllimport, but not external", &GV);
+
+ Assert1(!GV.hasAppendingLinkage() || isa<GlobalVariable>(GV),
+ "Only global variables can have appending linkage!", &GV);
+
+ if (GV.hasAppendingLinkage()) {
+ GlobalVariable &GVar = cast<GlobalVariable>(GV);
+ Assert1(isa<ArrayType>(GVar.getType()->getElementType()),
+ "Only global arrays can have appending linkage!", &GV);
+ }
+}
+
+void Verifier::visitGlobalVariable(GlobalVariable &GV) {
+ if (GV.hasInitializer()) {
+ Assert1(GV.getInitializer()->getType() == GV.getType()->getElementType(),
+ "Global variable initializer type does not match global "
+ "variable type!", &GV);
+
+ // Verify that any metadata used in a global initializer points only to
+ // other globals.
+ if (MDNode *FirstNode = dyn_cast<MDNode>(GV.getInitializer())) {
+ if (VerifyMDNode(FirstNode)) {
+ SmallVector<const MDNode *, 4> NodesToAnalyze;
+ NodesToAnalyze.push_back(FirstNode);
+ while (!NodesToAnalyze.empty()) {
+ const MDNode *N = NodesToAnalyze.back();
+ NodesToAnalyze.pop_back();
+
+ for (MDNode::const_elem_iterator I = N->elem_begin(),
+ E = N->elem_end(); I != E; ++I)
+ if (const Value *V = *I) {
+ if (const MDNode *Next = dyn_cast<MDNode>(V))
+ NodesToAnalyze.push_back(Next);
+ else
+ Assert3(isa<Constant>(V),
+ "reference to instruction from global metadata node",
+ &GV, N, V);
+ }
+ }
+ }
+ }
+ } else {
+ Assert1(GV.hasExternalLinkage() || GV.hasDLLImportLinkage() ||
+ GV.hasExternalWeakLinkage(),
+ "invalid linkage type for global declaration", &GV);
+ }
+
+ visitGlobalValue(GV);
+}
+
+void Verifier::visitGlobalAlias(GlobalAlias &GA) {
+ Assert1(!GA.getName().empty(),
+ "Alias name cannot be empty!", &GA);
+ Assert1(GA.hasExternalLinkage() || GA.hasLocalLinkage() ||
+ GA.hasWeakLinkage(),
+ "Alias should have external or external weak linkage!", &GA);
+ Assert1(GA.getAliasee(),
+ "Aliasee cannot be NULL!", &GA);
+ Assert1(GA.getType() == GA.getAliasee()->getType(),
+ "Alias and aliasee types should match!", &GA);
+
+ if (!isa<GlobalValue>(GA.getAliasee())) {
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(GA.getAliasee());
+ Assert1(CE &&
+ (CE->getOpcode() == Instruction::BitCast ||
+ CE->getOpcode() == Instruction::GetElementPtr) &&
+ isa<GlobalValue>(CE->getOperand(0)),
+ "Aliasee should be either GlobalValue or bitcast of GlobalValue",
+ &GA);
+ }
+
+ const GlobalValue* Aliasee = GA.resolveAliasedGlobal(/*stopOnWeak*/ false);
+ Assert1(Aliasee,
+ "Aliasing chain should end with function or global variable", &GA);
+
+ visitGlobalValue(GA);
+}
+
+void Verifier::verifyTypeSymbolTable(TypeSymbolTable &ST) {
+}
+
+// VerifyAttrs - Check the given parameter attributes for an argument or return
+// value of the specified type. The value V is printed in error messages.
+void Verifier::VerifyAttrs(Attributes Attrs, const Type *Ty,
+ bool isReturnValue, const Value *V) {
+ if (Attrs == Attribute::None)
+ return;
+
+ if (isReturnValue) {
+ Attributes RetI = Attrs & Attribute::ParameterOnly;
+ Assert1(!RetI, "Attribute " + Attribute::getAsString(RetI) +
+ " does not apply to return values!", V);
+ }
+ Attributes FnCheckAttr = Attrs & Attribute::FunctionOnly;
+ Assert1(!FnCheckAttr, "Attribute " + Attribute::getAsString(FnCheckAttr) +
+ " only applies to functions!", V);
+
+ for (unsigned i = 0;
+ i < array_lengthof(Attribute::MutuallyIncompatible); ++i) {
+ Attributes MutI = Attrs & Attribute::MutuallyIncompatible[i];
+ Assert1(!(MutI & (MutI - 1)), "Attributes " +
+ Attribute::getAsString(MutI) + " are incompatible!", V);
+ }
+
+ Attributes TypeI = Attrs & Attribute::typeIncompatible(Ty);
+ Assert1(!TypeI, "Wrong type for attribute " +
+ Attribute::getAsString(TypeI), V);
+
+ Attributes ByValI = Attrs & Attribute::ByVal;
+ if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) {
+ Assert1(!ByValI || PTy->getElementType()->isSized(),
+ "Attribute " + Attribute::getAsString(ByValI) +
+ " does not support unsized types!", V);
+ } else {
+ Assert1(!ByValI,
+ "Attribute " + Attribute::getAsString(ByValI) +
+ " only applies to parameters with pointer type!", V);
+ }
+}
+
+// VerifyFunctionAttrs - Check parameter attributes against a function type.
+// The value V is printed in error messages.
+void Verifier::VerifyFunctionAttrs(const FunctionType *FT,
+ const AttrListPtr &Attrs,
+ const Value *V) {
+ if (Attrs.isEmpty())
+ return;
+
+ bool SawNest = false;
+
+ for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
+ const AttributeWithIndex &Attr = Attrs.getSlot(i);
+
+ const Type *Ty;
+ if (Attr.Index == 0)
+ Ty = FT->getReturnType();
+ else if (Attr.Index-1 < FT->getNumParams())
+ Ty = FT->getParamType(Attr.Index-1);
+ else
+ break; // VarArgs attributes, don't verify.
+
+ VerifyAttrs(Attr.Attrs, Ty, Attr.Index == 0, V);
+
+ if (Attr.Attrs & Attribute::Nest) {
+ Assert1(!SawNest, "More than one parameter has attribute nest!", V);
+ SawNest = true;
+ }
+
+ if (Attr.Attrs & Attribute::StructRet)
+ Assert1(Attr.Index == 1, "Attribute sret not on first parameter!", V);
+ }
+
+ Attributes FAttrs = Attrs.getFnAttributes();
+ Assert1(!(FAttrs & (~Attribute::FunctionOnly)),
+ "Attribute " + Attribute::getAsString(FAttrs) +
+ " does not apply to function!", V);
+
+ for (unsigned i = 0;
+ i < array_lengthof(Attribute::MutuallyIncompatible); ++i) {
+ Attributes MutI = FAttrs & Attribute::MutuallyIncompatible[i];
+ Assert1(!(MutI & (MutI - 1)), "Attributes " +
+ Attribute::getAsString(MutI) + " are incompatible!", V);
+ }
+}
+
+static bool VerifyAttributeCount(const AttrListPtr &Attrs, unsigned Params) {
+ if (Attrs.isEmpty())
+ return true;
+
+ unsigned LastSlot = Attrs.getNumSlots() - 1;
+ unsigned LastIndex = Attrs.getSlot(LastSlot).Index;
+ if (LastIndex <= Params
+ || (LastIndex == (unsigned)~0
+ && (LastSlot == 0 || Attrs.getSlot(LastSlot - 1).Index <= Params)))
+ return true;
+
+ return false;
+}
+// visitFunction - Verify that a function is ok.
+//
+void Verifier::visitFunction(Function &F) {
+ // Check function arguments.
+ const FunctionType *FT = F.getFunctionType();
+ unsigned NumArgs = F.arg_size();
+
+ Assert2(FT->getNumParams() == NumArgs,
+ "# formal arguments must match # of arguments for function type!",
+ &F, FT);
+ Assert1(F.getReturnType()->isFirstClassType() ||
+ F.getReturnType() == Type::VoidTy ||
+ isa<StructType>(F.getReturnType()),
+ "Functions cannot return aggregate values!", &F);
+
+ Assert1(!F.hasStructRetAttr() || F.getReturnType() == Type::VoidTy,
+ "Invalid struct return type!", &F);
+
+ const AttrListPtr &Attrs = F.getAttributes();
+
+ Assert1(VerifyAttributeCount(Attrs, FT->getNumParams()),
+ "Attributes after last parameter!", &F);
+
+ // Check function attributes.
+ VerifyFunctionAttrs(FT, Attrs, &F);
+
+ // Check that this function meets the restrictions on this calling convention.
+ switch (F.getCallingConv()) {
+ default:
+ break;
+ case CallingConv::C:
+ break;
+ case CallingConv::Fast:
+ case CallingConv::Cold:
+ case CallingConv::X86_FastCall:
+ Assert1(!F.isVarArg(),
+ "Varargs functions must have C calling conventions!", &F);
+ break;
+ }
+
+ bool isLLVMdotName = F.getName().size() >= 5 &&
+ F.getName().substr(0, 5) == "llvm.";
+ if (!isLLVMdotName)
+ Assert1(F.getReturnType() != Type::MetadataTy,
+ "Function may not return metadata unless it's an intrinsic", &F);
+
+ // Check that the argument values match the function type for this function...
+ unsigned i = 0;
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
+ I != E; ++I, ++i) {
+ Assert2(I->getType() == FT->getParamType(i),
+ "Argument value does not match function argument type!",
+ I, FT->getParamType(i));
+ Assert1(I->getType()->isFirstClassType(),
+ "Function arguments must have first-class types!", I);
+ if (!isLLVMdotName)
+ Assert2(I->getType() != Type::MetadataTy,
+ "Function takes metadata but isn't an intrinsic", I, &F);
+ }
+
+ if (F.isDeclaration()) {
+ Assert1(F.hasExternalLinkage() || F.hasDLLImportLinkage() ||
+ F.hasExternalWeakLinkage() || F.hasGhostLinkage(),
+ "invalid linkage type for function declaration", &F);
+ } else {
+ // Verify that this function (which has a body) is not named "llvm.*". It
+ // is not legal to define intrinsics.
+ Assert1(!isLLVMdotName, "llvm intrinsics cannot be defined!", &F);
+
+ // Check the entry node
+ BasicBlock *Entry = &F.getEntryBlock();
+ Assert1(pred_begin(Entry) == pred_end(Entry),
+ "Entry block to function must not have predecessors!", Entry);
+ }
+}
+
+
+// verifyBasicBlock - Verify that a basic block is well formed...
+//
+void Verifier::visitBasicBlock(BasicBlock &BB) {
+ InstsInThisBlock.clear();
+
+ // Ensure that basic blocks have terminators!
+ Assert1(BB.getTerminator(), "Basic Block does not have terminator!", &BB);
+
+ // Check constraints that this basic block imposes on all of the PHI nodes in
+ // it.
+ if (isa<PHINode>(BB.front())) {
+ SmallVector<BasicBlock*, 8> Preds(pred_begin(&BB), pred_end(&BB));
+ SmallVector<std::pair<BasicBlock*, Value*>, 8> Values;
+ std::sort(Preds.begin(), Preds.end());
+ PHINode *PN;
+ for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I));++I) {
+
+ // Ensure that PHI nodes have at least one entry!
+ Assert1(PN->getNumIncomingValues() != 0,
+ "PHI nodes must have at least one entry. If the block is dead, "
+ "the PHI should be removed!", PN);
+ Assert1(PN->getNumIncomingValues() == Preds.size(),
+ "PHINode should have one entry for each predecessor of its "
+ "parent basic block!", PN);
+
+ // Get and sort all incoming values in the PHI node...
+ Values.clear();
+ Values.reserve(PN->getNumIncomingValues());
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ Values.push_back(std::make_pair(PN->getIncomingBlock(i),
+ PN->getIncomingValue(i)));
+ std::sort(Values.begin(), Values.end());
+
+ for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+ // Check to make sure that if there is more than one entry for a
+ // particular basic block in this PHI node, that the incoming values are
+ // all identical.
+ //
+ Assert4(i == 0 || Values[i].first != Values[i-1].first ||
+ Values[i].second == Values[i-1].second,
+ "PHI node has multiple entries for the same basic block with "
+ "different incoming values!", PN, Values[i].first,
+ Values[i].second, Values[i-1].second);
+
+ // Check to make sure that the predecessors and PHI node entries are
+ // matched up.
+ Assert3(Values[i].first == Preds[i],
+ "PHI node entries do not match predecessors!", PN,
+ Values[i].first, Preds[i]);
+ }
+ }
+ }
+}
+
+void Verifier::visitTerminatorInst(TerminatorInst &I) {
+ // Ensure that terminators only exist at the end of the basic block.
+ Assert1(&I == I.getParent()->getTerminator(),
+ "Terminator found in the middle of a basic block!", I.getParent());
+ visitInstruction(I);
+}
+
+void Verifier::visitReturnInst(ReturnInst &RI) {
+ Function *F = RI.getParent()->getParent();
+ unsigned N = RI.getNumOperands();
+ if (F->getReturnType() == Type::VoidTy)
+ Assert2(N == 0,
+ "Found return instr that returns non-void in Function of void "
+ "return type!", &RI, F->getReturnType());
+ else if (N == 1 && F->getReturnType() == RI.getOperand(0)->getType()) {
+ // Exactly one return value and it matches the return type. Good.
+ } else if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
+ // The return type is a struct; check for multiple return values.
+ Assert2(STy->getNumElements() == N,
+ "Incorrect number of return values in ret instruction!",
+ &RI, F->getReturnType());
+ for (unsigned i = 0; i != N; ++i)
+ Assert2(STy->getElementType(i) == RI.getOperand(i)->getType(),
+ "Function return type does not match operand "
+ "type of return inst!", &RI, F->getReturnType());
+ } else if (const ArrayType *ATy = dyn_cast<ArrayType>(F->getReturnType())) {
+ // The return type is an array; check for multiple return values.
+ Assert2(ATy->getNumElements() == N,
+ "Incorrect number of return values in ret instruction!",
+ &RI, F->getReturnType());
+ for (unsigned i = 0; i != N; ++i)
+ Assert2(ATy->getElementType() == RI.getOperand(i)->getType(),
+ "Function return type does not match operand "
+ "type of return inst!", &RI, F->getReturnType());
+ } else {
+ CheckFailed("Function return type does not match operand "
+ "type of return inst!", &RI, F->getReturnType());
+ }
+
+ // Check to make sure that the return value has necessary properties for
+ // terminators...
+ visitTerminatorInst(RI);
+}
+
+void Verifier::visitSwitchInst(SwitchInst &SI) {
+ // Check to make sure that all of the constants in the switch instruction
+ // have the same type as the switched-on value.
+ const Type *SwitchTy = SI.getCondition()->getType();
+ for (unsigned i = 1, e = SI.getNumCases(); i != e; ++i)
+ Assert1(SI.getCaseValue(i)->getType() == SwitchTy,
+ "Switch constants must all be same type as switch value!", &SI);
+
+ visitTerminatorInst(SI);
+}
+
+void Verifier::visitSelectInst(SelectInst &SI) {
+ Assert1(!SelectInst::areInvalidOperands(SI.getOperand(0), SI.getOperand(1),
+ SI.getOperand(2)),
+ "Invalid operands for select instruction!", &SI);
+
+ Assert1(SI.getTrueValue()->getType() == SI.getType(),
+ "Select values must have same type as select instruction!", &SI);
+ visitInstruction(SI);
+}
+
+
+/// visitUserOp1 - User defined operators shouldn't live beyond the lifetime of
+/// a pass, if any exist, it's an error.
+///
+void Verifier::visitUserOp1(Instruction &I) {
+ Assert1(0, "User-defined operators should not live outside of a pass!", &I);
+}
+
+void Verifier::visitTruncInst(TruncInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+
+ // Get the size of the types in bits, we'll need this later
+ unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
+ unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+
+ Assert1(SrcTy->isIntOrIntVector(), "Trunc only operates on integer", &I);
+ Assert1(DestTy->isIntOrIntVector(), "Trunc only produces integer", &I);
+ Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
+ "trunc source and destination must both be a vector or neither", &I);
+ Assert1(SrcBitSize > DestBitSize,"DestTy too big for Trunc", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitZExtInst(ZExtInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+
+ // Get the size of the types in bits, we'll need this later
+ Assert1(SrcTy->isIntOrIntVector(), "ZExt only operates on integer", &I);
+ Assert1(DestTy->isIntOrIntVector(), "ZExt only produces an integer", &I);
+ Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
+ "zext source and destination must both be a vector or neither", &I);
+ unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
+ unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+
+ Assert1(SrcBitSize < DestBitSize,"Type too small for ZExt", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitSExtInst(SExtInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+
+ // Get the size of the types in bits, we'll need this later
+ unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
+ unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+
+ Assert1(SrcTy->isIntOrIntVector(), "SExt only operates on integer", &I);
+ Assert1(DestTy->isIntOrIntVector(), "SExt only produces an integer", &I);
+ Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
+ "sext source and destination must both be a vector or neither", &I);
+ Assert1(SrcBitSize < DestBitSize,"Type too small for SExt", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitFPTruncInst(FPTruncInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+ // Get the size of the types in bits, we'll need this later
+ unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
+ unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+
+ Assert1(SrcTy->isFPOrFPVector(),"FPTrunc only operates on FP", &I);
+ Assert1(DestTy->isFPOrFPVector(),"FPTrunc only produces an FP", &I);
+ Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
+ "fptrunc source and destination must both be a vector or neither",&I);
+ Assert1(SrcBitSize > DestBitSize,"DestTy too big for FPTrunc", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitFPExtInst(FPExtInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+
+ // Get the size of the types in bits, we'll need this later
+ unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
+ unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+
+ Assert1(SrcTy->isFPOrFPVector(),"FPExt only operates on FP", &I);
+ Assert1(DestTy->isFPOrFPVector(),"FPExt only produces an FP", &I);
+ Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
+ "fpext source and destination must both be a vector or neither", &I);
+ Assert1(SrcBitSize < DestBitSize,"DestTy too small for FPExt", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitUIToFPInst(UIToFPInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+
+ bool SrcVec = isa<VectorType>(SrcTy);
+ bool DstVec = isa<VectorType>(DestTy);
+
+ Assert1(SrcVec == DstVec,
+ "UIToFP source and dest must both be vector or scalar", &I);
+ Assert1(SrcTy->isIntOrIntVector(),
+ "UIToFP source must be integer or integer vector", &I);
+ Assert1(DestTy->isFPOrFPVector(),
+ "UIToFP result must be FP or FP vector", &I);
+
+ if (SrcVec && DstVec)
+ Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+ cast<VectorType>(DestTy)->getNumElements(),
+ "UIToFP source and dest vector length mismatch", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitSIToFPInst(SIToFPInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+
+ bool SrcVec = SrcTy->getTypeID() == Type::VectorTyID;
+ bool DstVec = DestTy->getTypeID() == Type::VectorTyID;
+
+ Assert1(SrcVec == DstVec,
+ "SIToFP source and dest must both be vector or scalar", &I);
+ Assert1(SrcTy->isIntOrIntVector(),
+ "SIToFP source must be integer or integer vector", &I);
+ Assert1(DestTy->isFPOrFPVector(),
+ "SIToFP result must be FP or FP vector", &I);
+
+ if (SrcVec && DstVec)
+ Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+ cast<VectorType>(DestTy)->getNumElements(),
+ "SIToFP source and dest vector length mismatch", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitFPToUIInst(FPToUIInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+
+ bool SrcVec = isa<VectorType>(SrcTy);
+ bool DstVec = isa<VectorType>(DestTy);
+
+ Assert1(SrcVec == DstVec,
+ "FPToUI source and dest must both be vector or scalar", &I);
+ Assert1(SrcTy->isFPOrFPVector(), "FPToUI source must be FP or FP vector", &I);
+ Assert1(DestTy->isIntOrIntVector(),
+ "FPToUI result must be integer or integer vector", &I);
+
+ if (SrcVec && DstVec)
+ Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+ cast<VectorType>(DestTy)->getNumElements(),
+ "FPToUI source and dest vector length mismatch", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitFPToSIInst(FPToSIInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+
+ bool SrcVec = isa<VectorType>(SrcTy);
+ bool DstVec = isa<VectorType>(DestTy);
+
+ Assert1(SrcVec == DstVec,
+ "FPToSI source and dest must both be vector or scalar", &I);
+ Assert1(SrcTy->isFPOrFPVector(),
+ "FPToSI source must be FP or FP vector", &I);
+ Assert1(DestTy->isIntOrIntVector(),
+ "FPToSI result must be integer or integer vector", &I);
+
+ if (SrcVec && DstVec)
+ Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+ cast<VectorType>(DestTy)->getNumElements(),
+ "FPToSI source and dest vector length mismatch", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+
+ Assert1(isa<PointerType>(SrcTy), "PtrToInt source must be pointer", &I);
+ Assert1(DestTy->isInteger(), "PtrToInt result must be integral", &I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+
+ Assert1(SrcTy->isInteger(), "IntToPtr source must be an integral", &I);
+ Assert1(isa<PointerType>(DestTy), "IntToPtr result must be a pointer",&I);
+
+ visitInstruction(I);
+}
+
+void Verifier::visitBitCastInst(BitCastInst &I) {
+ // Get the source and destination types
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DestTy = I.getType();
+
+ // Get the size of the types in bits, we'll need this later
+ unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
+ unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+
+ // BitCast implies a no-op cast of type only. No bits change.
+ // However, you can't cast pointers to anything but pointers.
+ Assert1(isa<PointerType>(DestTy) == isa<PointerType>(DestTy),
+ "Bitcast requires both operands to be pointer or neither", &I);
+ Assert1(SrcBitSize == DestBitSize, "Bitcast requies types of same width", &I);
+
+ // Disallow aggregates.
+ Assert1(!SrcTy->isAggregateType(),
+ "Bitcast operand must not be aggregate", &I);
+ Assert1(!DestTy->isAggregateType(),
+ "Bitcast type must not be aggregate", &I);
+
+ visitInstruction(I);
+}
+
+/// visitPHINode - Ensure that a PHI node is well formed.
+///
+void Verifier::visitPHINode(PHINode &PN) {
+ // Ensure that the PHI nodes are all grouped together at the top of the block.
+ // This can be tested by checking whether the instruction before this is
+ // either nonexistent (because this is begin()) or is a PHI node. If not,
+ // then there is some other instruction before a PHI.
+ Assert2(&PN == &PN.getParent()->front() ||
+ isa<PHINode>(--BasicBlock::iterator(&PN)),
+ "PHI nodes not grouped at top of basic block!",
+ &PN, PN.getParent());
+
+ // Check that all of the operands of the PHI node have the same type as the
+ // result.
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+ Assert1(PN.getType() == PN.getIncomingValue(i)->getType(),
+ "PHI node operands are not the same type as the result!", &PN);
+
+ // All other PHI node constraints are checked in the visitBasicBlock method.
+
+ visitInstruction(PN);
+}
+
+void Verifier::VerifyCallSite(CallSite CS) {
+ Instruction *I = CS.getInstruction();
+
+ Assert1(isa<PointerType>(CS.getCalledValue()->getType()),
+ "Called function must be a pointer!", I);
+ const PointerType *FPTy = cast<PointerType>(CS.getCalledValue()->getType());
+ Assert1(isa<FunctionType>(FPTy->getElementType()),
+ "Called function is not pointer to function type!", I);
+
+ const FunctionType *FTy = cast<FunctionType>(FPTy->getElementType());
+
+ // Verify that the correct number of arguments are being passed
+ if (FTy->isVarArg())
+ Assert1(CS.arg_size() >= FTy->getNumParams(),
+ "Called function requires more parameters than were provided!",I);
+ else
+ Assert1(CS.arg_size() == FTy->getNumParams(),
+ "Incorrect number of arguments passed to called function!", I);
+
+ // Verify that all arguments to the call match the function type...
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+ Assert3(CS.getArgument(i)->getType() == FTy->getParamType(i),
+ "Call parameter type does not match function signature!",
+ CS.getArgument(i), FTy->getParamType(i), I);
+
+ const AttrListPtr &Attrs = CS.getAttributes();
+
+ Assert1(VerifyAttributeCount(Attrs, CS.arg_size()),
+ "Attributes after last parameter!", I);
+
+ // Verify call attributes.
+ VerifyFunctionAttrs(FTy, Attrs, I);
+
+ if (FTy->isVarArg())
+ // Check attributes on the varargs part.
+ for (unsigned Idx = 1 + FTy->getNumParams(); Idx <= CS.arg_size(); ++Idx) {
+ Attributes Attr = Attrs.getParamAttributes(Idx);
+
+ VerifyAttrs(Attr, CS.getArgument(Idx-1)->getType(), false, I);
+
+ Attributes VArgI = Attr & Attribute::VarArgsIncompatible;
+ Assert1(!VArgI, "Attribute " + Attribute::getAsString(VArgI) +
+ " cannot be used for vararg call arguments!", I);
+ }
+
+ // Verify that there's no metadata unless it's a direct call to an intrinsic.
+ if (!CS.getCalledFunction() || CS.getCalledFunction()->getName().size() < 5 ||
+ CS.getCalledFunction()->getName().substr(0, 5) != "llvm.") {
+ Assert1(FTy->getReturnType() != Type::MetadataTy,
+ "Only intrinsics may return metadata", I);
+ for (FunctionType::param_iterator PI = FTy->param_begin(),
+ PE = FTy->param_end(); PI != PE; ++PI)
+ Assert1(PI->get() != Type::MetadataTy, "Function has metadata parameter "
+ "but isn't an intrinsic", I);
+ }
+
+ visitInstruction(*I);
+}
+
+void Verifier::visitCallInst(CallInst &CI) {
+ VerifyCallSite(&CI);
+
+ if (Function *F = CI.getCalledFunction())
+ if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
+ visitIntrinsicFunctionCall(ID, CI);
+}
+
+void Verifier::visitInvokeInst(InvokeInst &II) {
+ VerifyCallSite(&II);
+}
+
+/// visitBinaryOperator - Check that both arguments to the binary operator are
+/// of the same type!
+///
+void Verifier::visitBinaryOperator(BinaryOperator &B) {
+ Assert1(B.getOperand(0)->getType() == B.getOperand(1)->getType(),
+ "Both operands to a binary operator are not of the same type!", &B);
+
+ switch (B.getOpcode()) {
+ // Check that logical operators are only used with integral operands.
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ Assert1(B.getType()->isInteger() ||
+ (isa<VectorType>(B.getType()) &&
+ cast<VectorType>(B.getType())->getElementType()->isInteger()),
+ "Logical operators only work with integral types!", &B);
+ Assert1(B.getType() == B.getOperand(0)->getType(),
+ "Logical operators must have same type for operands and result!",
+ &B);
+ break;
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ Assert1(B.getType()->isInteger() ||
+ (isa<VectorType>(B.getType()) &&
+ cast<VectorType>(B.getType())->getElementType()->isInteger()),
+ "Shifts only work with integral types!", &B);
+ Assert1(B.getType() == B.getOperand(0)->getType(),
+ "Shift return type must be same as operands!", &B);
+ /* FALL THROUGH */
+ default:
+ // Arithmetic operators only work on integer or fp values
+ Assert1(B.getType() == B.getOperand(0)->getType(),
+ "Arithmetic operators must have same type for operands and result!",
+ &B);
+ Assert1(B.getType()->isInteger() || B.getType()->isFloatingPoint() ||
+ isa<VectorType>(B.getType()),
+ "Arithmetic operators must have integer, fp, or vector type!", &B);
+ break;
+ }
+
+ visitInstruction(B);
+}
+
+void Verifier::visitICmpInst(ICmpInst& IC) {
+ // Check that the operands are the same type
+ const Type* Op0Ty = IC.getOperand(0)->getType();
+ const Type* Op1Ty = IC.getOperand(1)->getType();
+ Assert1(Op0Ty == Op1Ty,
+ "Both operands to ICmp instruction are not of the same type!", &IC);
+ // Check that the operands are the right type
+ Assert1(Op0Ty->isIntOrIntVector() || isa<PointerType>(Op0Ty),
+ "Invalid operand types for ICmp instruction", &IC);
+
+ visitInstruction(IC);
+}
+
+void Verifier::visitFCmpInst(FCmpInst& FC) {
+ // Check that the operands are the same type
+ const Type* Op0Ty = FC.getOperand(0)->getType();
+ const Type* Op1Ty = FC.getOperand(1)->getType();
+ Assert1(Op0Ty == Op1Ty,
+ "Both operands to FCmp instruction are not of the same type!", &FC);
+ // Check that the operands are the right type
+ Assert1(Op0Ty->isFPOrFPVector(),
+ "Invalid operand types for FCmp instruction", &FC);
+ visitInstruction(FC);
+}
+
+void Verifier::visitExtractElementInst(ExtractElementInst &EI) {
+ Assert1(ExtractElementInst::isValidOperands(EI.getOperand(0),
+ EI.getOperand(1)),
+ "Invalid extractelement operands!", &EI);
+ visitInstruction(EI);
+}
+
+void Verifier::visitInsertElementInst(InsertElementInst &IE) {
+ Assert1(InsertElementInst::isValidOperands(IE.getOperand(0),
+ IE.getOperand(1),
+ IE.getOperand(2)),
+ "Invalid insertelement operands!", &IE);
+ visitInstruction(IE);
+}
+
+void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) {
+ Assert1(ShuffleVectorInst::isValidOperands(SV.getOperand(0), SV.getOperand(1),
+ SV.getOperand(2)),
+ "Invalid shufflevector operands!", &SV);
+
+ const VectorType *VTy = dyn_cast<VectorType>(SV.getOperand(0)->getType());
+ Assert1(VTy, "Operands are not a vector type", &SV);
+
+ // Check to see if Mask is valid.
+ if (const ConstantVector *MV = dyn_cast<ConstantVector>(SV.getOperand(2))) {
+ for (unsigned i = 0, e = MV->getNumOperands(); i != e; ++i) {
+ if (ConstantInt* CI = dyn_cast<ConstantInt>(MV->getOperand(i))) {
+ Assert1(!CI->uge(VTy->getNumElements()*2),
+ "Invalid shufflevector shuffle mask!", &SV);
+ } else {
+ Assert1(isa<UndefValue>(MV->getOperand(i)),
+ "Invalid shufflevector shuffle mask!", &SV);
+ }
+ }
+ } else {
+ Assert1(isa<UndefValue>(SV.getOperand(2)) ||
+ isa<ConstantAggregateZero>(SV.getOperand(2)),
+ "Invalid shufflevector shuffle mask!", &SV);
+ }
+
+ visitInstruction(SV);
+}
+
+void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
+ SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end());
+ const Type *ElTy =
+ GetElementPtrInst::getIndexedType(GEP.getOperand(0)->getType(),
+ Idxs.begin(), Idxs.end());
+ Assert1(ElTy, "Invalid indices for GEP pointer type!", &GEP);
+ Assert2(isa<PointerType>(GEP.getType()) &&
+ cast<PointerType>(GEP.getType())->getElementType() == ElTy,
+ "GEP is not of right type for indices!", &GEP, ElTy);
+ visitInstruction(GEP);
+}
+
+void Verifier::visitLoadInst(LoadInst &LI) {
+ const Type *ElTy =
+ cast<PointerType>(LI.getOperand(0)->getType())->getElementType();
+ Assert2(ElTy == LI.getType(),
+ "Load result type does not match pointer operand type!", &LI, ElTy);
+ Assert1(ElTy != Type::MetadataTy, "Can't load metadata!", &LI);
+ visitInstruction(LI);
+}
+
+void Verifier::visitStoreInst(StoreInst &SI) {
+ const Type *ElTy =
+ cast<PointerType>(SI.getOperand(1)->getType())->getElementType();
+ Assert2(ElTy == SI.getOperand(0)->getType(),
+ "Stored value type does not match pointer operand type!", &SI, ElTy);
+ Assert1(ElTy != Type::MetadataTy, "Can't store metadata!", &SI);
+ visitInstruction(SI);
+}
+
+void Verifier::visitAllocationInst(AllocationInst &AI) {
+ const PointerType *PTy = AI.getType();
+ Assert1(PTy->getAddressSpace() == 0,
+ "Allocation instruction pointer not in the generic address space!",
+ &AI);
+ Assert1(PTy->getElementType()->isSized(), "Cannot allocate unsized type",
+ &AI);
+ visitInstruction(AI);
+}
+
+void Verifier::visitExtractValueInst(ExtractValueInst &EVI) {
+ Assert1(ExtractValueInst::getIndexedType(EVI.getAggregateOperand()->getType(),
+ EVI.idx_begin(), EVI.idx_end()) ==
+ EVI.getType(),
+ "Invalid ExtractValueInst operands!", &EVI);
+
+ visitInstruction(EVI);
+}
+
+void Verifier::visitInsertValueInst(InsertValueInst &IVI) {
+ Assert1(ExtractValueInst::getIndexedType(IVI.getAggregateOperand()->getType(),
+ IVI.idx_begin(), IVI.idx_end()) ==
+ IVI.getOperand(1)->getType(),
+ "Invalid InsertValueInst operands!", &IVI);
+
+ visitInstruction(IVI);
+}
+
+/// verifyInstruction - Verify that an instruction is well formed.
+///
+void Verifier::visitInstruction(Instruction &I) {
+ BasicBlock *BB = I.getParent();
+ Assert1(BB, "Instruction not embedded in basic block!", &I);
+
+ if (!isa<PHINode>(I)) { // Check that non-phi nodes are not self referential
+ for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
+ UI != UE; ++UI)
+ Assert1(*UI != (User*)&I || !DT->isReachableFromEntry(BB),
+ "Only PHI nodes may reference their own value!", &I);
+ }
+
+ // Verify that if this is a terminator that it is at the end of the block.
+ if (isa<TerminatorInst>(I))
+ Assert1(BB->getTerminator() == &I, "Terminator not at end of block!", &I);
+
+
+ // Check that void typed values don't have names
+ Assert1(I.getType() != Type::VoidTy || !I.hasName(),
+ "Instruction has a name, but provides a void value!", &I);
+
+ // Check that the return value of the instruction is either void or a legal
+ // value type.
+ Assert1(I.getType() == Type::VoidTy || I.getType()->isFirstClassType()
+ || ((isa<CallInst>(I) || isa<InvokeInst>(I))
+ && isa<StructType>(I.getType())),
+ "Instruction returns a non-scalar type!", &I);
+
+ // Check that the instruction doesn't produce metadata or metadata*. Calls
+ // all already checked against the callee type.
+ Assert1(I.getType() != Type::MetadataTy ||
+ isa<CallInst>(I) || isa<InvokeInst>(I),
+ "Invalid use of metadata!", &I);
+
+ if (const PointerType *PTy = dyn_cast<PointerType>(I.getType()))
+ Assert1(PTy->getElementType() != Type::MetadataTy,
+ "Instructions may not produce pointer to metadata.", &I);
+
+
+ // Check that all uses of the instruction, if they are instructions
+ // themselves, actually have parent basic blocks. If the use is not an
+ // instruction, it is an error!
+ for (User::use_iterator UI = I.use_begin(), UE = I.use_end();
+ UI != UE; ++UI) {
+ Assert1(isa<Instruction>(*UI), "Use of instruction is not an instruction!",
+ *UI);
+ Instruction *Used = cast<Instruction>(*UI);
+ Assert2(Used->getParent() != 0, "Instruction referencing instruction not"
+ " embedded in a basic block!", &I, Used);
+ }
+
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ Assert1(I.getOperand(i) != 0, "Instruction has null operand!", &I);
+
+ // Check to make sure that only first-class-values are operands to
+ // instructions.
+ if (!I.getOperand(i)->getType()->isFirstClassType()) {
+ Assert1(0, "Instruction operands must be first-class values!", &I);
+ }
+
+ if (const PointerType *PTy =
+ dyn_cast<PointerType>(I.getOperand(i)->getType()))
+ Assert1(PTy->getElementType() != Type::MetadataTy,
+ "Invalid use of metadata pointer.", &I);
+
+ if (Function *F = dyn_cast<Function>(I.getOperand(i))) {
+ // Check to make sure that the "address of" an intrinsic function is never
+ // taken.
+ Assert1(!F->isIntrinsic() || (i == 0 && isa<CallInst>(I)),
+ "Cannot take the address of an intrinsic!", &I);
+ Assert1(F->getParent() == Mod, "Referencing function in another module!",
+ &I);
+ } else if (BasicBlock *OpBB = dyn_cast<BasicBlock>(I.getOperand(i))) {
+ Assert1(OpBB->getParent() == BB->getParent(),
+ "Referring to a basic block in another function!", &I);
+ } else if (Argument *OpArg = dyn_cast<Argument>(I.getOperand(i))) {
+ Assert1(OpArg->getParent() == BB->getParent(),
+ "Referring to an argument in another function!", &I);
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(I.getOperand(i))) {
+ Assert1(GV->getParent() == Mod, "Referencing global in another module!",
+ &I);
+ } else if (Instruction *Op = dyn_cast<Instruction>(I.getOperand(i))) {
+ BasicBlock *OpBlock = Op->getParent();
+
+ // Check that a definition dominates all of its uses.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Op)) {
+ // Invoke results are only usable in the normal destination, not in the
+ // exceptional destination.
+ BasicBlock *NormalDest = II->getNormalDest();
+
+ Assert2(NormalDest != II->getUnwindDest(),
+ "No uses of invoke possible due to dominance structure!",
+ Op, &I);
+
+ // PHI nodes differ from other nodes because they actually "use" the
+ // value in the predecessor basic blocks they correspond to.
+ BasicBlock *UseBlock = BB;
+ if (isa<PHINode>(I))
+ UseBlock = cast<BasicBlock>(I.getOperand(i+1));
+
+ if (isa<PHINode>(I) && UseBlock == OpBlock) {
+ // Special case of a phi node in the normal destination or the unwind
+ // destination.
+ Assert2(BB == NormalDest || !DT->isReachableFromEntry(UseBlock),
+ "Invoke result not available in the unwind destination!",
+ Op, &I);
+ } else {
+ Assert2(DT->dominates(NormalDest, UseBlock) ||
+ !DT->isReachableFromEntry(UseBlock),
+ "Invoke result does not dominate all uses!", Op, &I);
+
+ // If the normal successor of an invoke instruction has multiple
+ // predecessors, then the normal edge from the invoke is critical,
+ // so the invoke value can only be live if the destination block
+ // dominates all of it's predecessors (other than the invoke).
+ if (!NormalDest->getSinglePredecessor() &&
+ DT->isReachableFromEntry(UseBlock))
+ // If it is used by something non-phi, then the other case is that
+ // 'NormalDest' dominates all of its predecessors other than the
+ // invoke. In this case, the invoke value can still be used.
+ for (pred_iterator PI = pred_begin(NormalDest),
+ E = pred_end(NormalDest); PI != E; ++PI)
+ if (*PI != II->getParent() && !DT->dominates(NormalDest, *PI) &&
+ DT->isReachableFromEntry(*PI)) {
+ CheckFailed("Invoke result does not dominate all uses!", Op,&I);
+ return;
+ }
+ }
+ } else if (isa<PHINode>(I)) {
+ // PHI nodes are more difficult than other nodes because they actually
+ // "use" the value in the predecessor basic blocks they correspond to.
+ BasicBlock *PredBB = cast<BasicBlock>(I.getOperand(i+1));
+ Assert2(DT->dominates(OpBlock, PredBB) ||
+ !DT->isReachableFromEntry(PredBB),
+ "Instruction does not dominate all uses!", Op, &I);
+ } else {
+ if (OpBlock == BB) {
+ // If they are in the same basic block, make sure that the definition
+ // comes before the use.
+ Assert2(InstsInThisBlock.count(Op) || !DT->isReachableFromEntry(BB),
+ "Instruction does not dominate all uses!", Op, &I);
+ }
+
+ // Definition must dominate use unless use is unreachable!
+ Assert2(InstsInThisBlock.count(Op) || DT->dominates(Op, &I) ||
+ !DT->isReachableFromEntry(BB),
+ "Instruction does not dominate all uses!", Op, &I);
+ }
+ } else if (isa<InlineAsm>(I.getOperand(i))) {
+ Assert1(i == 0 && (isa<CallInst>(I) || isa<InvokeInst>(I)),
+ "Cannot take the address of an inline asm!", &I);
+ }
+ }
+ InstsInThisBlock.insert(&I);
+}
+
+// Flags used by TableGen to mark intrinsic parameters with the
+// LLVMExtendedElementVectorType and LLVMTruncatedElementVectorType classes.
+static const unsigned ExtendedElementVectorType = 0x40000000;
+static const unsigned TruncatedElementVectorType = 0x20000000;
+
+/// visitIntrinsicFunction - Allow intrinsics to be verified in different ways.
+///
+void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
+ Function *IF = CI.getCalledFunction();
+ Assert1(IF->isDeclaration(), "Intrinsic functions should never be defined!",
+ IF);
+
+#define GET_INTRINSIC_VERIFIER
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_VERIFIER
+
+ switch (ID) {
+ default:
+ break;
+ case Intrinsic::dbg_declare: // llvm.dbg.declare
+ if (Constant *C = dyn_cast<Constant>(CI.getOperand(1)))
+ Assert1(C && !isa<ConstantPointerNull>(C),
+ "invalid llvm.dbg.declare intrinsic call", &CI);
+ break;
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memset:
+ Assert1(isa<ConstantInt>(CI.getOperand(4)),
+ "alignment argument of memory intrinsics must be a constant int",
+ &CI);
+ break;
+ case Intrinsic::gcroot:
+ case Intrinsic::gcwrite:
+ case Intrinsic::gcread:
+ if (ID == Intrinsic::gcroot) {
+ AllocaInst *AI =
+ dyn_cast<AllocaInst>(CI.getOperand(1)->stripPointerCasts());
+ Assert1(AI && isa<PointerType>(AI->getType()->getElementType()),
+ "llvm.gcroot parameter #1 must be a pointer alloca.", &CI);
+ Assert1(isa<Constant>(CI.getOperand(2)),
+ "llvm.gcroot parameter #2 must be a constant.", &CI);
+ }
+
+ Assert1(CI.getParent()->getParent()->hasGC(),
+ "Enclosing function does not use GC.", &CI);
+ break;
+ case Intrinsic::init_trampoline:
+ Assert1(isa<Function>(CI.getOperand(2)->stripPointerCasts()),
+ "llvm.init_trampoline parameter #2 must resolve to a function.",
+ &CI);
+ break;
+ case Intrinsic::prefetch:
+ Assert1(isa<ConstantInt>(CI.getOperand(2)) &&
+ isa<ConstantInt>(CI.getOperand(3)) &&
+ cast<ConstantInt>(CI.getOperand(2))->getZExtValue() < 2 &&
+ cast<ConstantInt>(CI.getOperand(3))->getZExtValue() < 4,
+ "invalid arguments to llvm.prefetch",
+ &CI);
+ break;
+ case Intrinsic::stackprotector:
+ Assert1(isa<AllocaInst>(CI.getOperand(2)->stripPointerCasts()),
+ "llvm.stackprotector parameter #2 must resolve to an alloca.",
+ &CI);
+ break;
+ }
+}
+
+/// Produce a string to identify an intrinsic parameter or return value.
+/// The ArgNo value numbers the return values from 0 to NumRets-1 and the
+/// parameters beginning with NumRets.
+///
+static std::string IntrinsicParam(unsigned ArgNo, unsigned NumRets) {
+ if (ArgNo < NumRets) {
+ if (NumRets == 1)
+ return "Intrinsic result type";
+ else
+ return "Intrinsic result type #" + utostr(ArgNo);
+ } else
+ return "Intrinsic parameter #" + utostr(ArgNo - NumRets);
+}
+
+bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
+ int VT, unsigned ArgNo, std::string &Suffix) {
+ const FunctionType *FTy = F->getFunctionType();
+
+ unsigned NumElts = 0;
+ const Type *EltTy = Ty;
+ const VectorType *VTy = dyn_cast<VectorType>(Ty);
+ if (VTy) {
+ EltTy = VTy->getElementType();
+ NumElts = VTy->getNumElements();
+ }
+
+ const Type *RetTy = FTy->getReturnType();
+ const StructType *ST = dyn_cast<StructType>(RetTy);
+ unsigned NumRets = 1;
+ if (ST)
+ NumRets = ST->getNumElements();
+
+ if (VT < 0) {
+ int Match = ~VT;
+
+ // Check flags that indicate a type that is an integral vector type with
+ // elements that are larger or smaller than the elements of the matched
+ // type.
+ if ((Match & (ExtendedElementVectorType |
+ TruncatedElementVectorType)) != 0) {
+ const IntegerType *IEltTy = dyn_cast<IntegerType>(EltTy);
+ if (!VTy || !IEltTy) {
+ CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not "
+ "an integral vector type.", F);
+ return false;
+ }
+ // Adjust the current Ty (in the opposite direction) rather than
+ // the type being matched against.
+ if ((Match & ExtendedElementVectorType) != 0) {
+ if ((IEltTy->getBitWidth() & 1) != 0) {
+ CheckFailed(IntrinsicParam(ArgNo, NumRets) + " vector "
+ "element bit-width is odd.", F);
+ return false;
+ }
+ Ty = VectorType::getTruncatedElementVectorType(VTy);
+ } else
+ Ty = VectorType::getExtendedElementVectorType(VTy);
+ Match &= ~(ExtendedElementVectorType | TruncatedElementVectorType);
+ }
+
+ if (Match <= static_cast<int>(NumRets - 1)) {
+ if (ST)
+ RetTy = ST->getElementType(Match);
+
+ if (Ty != RetTy) {
+ CheckFailed(IntrinsicParam(ArgNo, NumRets) + " does not "
+ "match return type.", F);
+ return false;
+ }
+ } else {
+ if (Ty != FTy->getParamType(Match - 1)) {
+ CheckFailed(IntrinsicParam(ArgNo, NumRets) + " does not "
+ "match parameter %" + utostr(Match - 1) + ".", F);
+ return false;
+ }
+ }
+ } else if (VT == MVT::iAny) {
+ if (!EltTy->isInteger()) {
+ CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not "
+ "an integer type.", F);
+ return false;
+ }
+
+ unsigned GotBits = cast<IntegerType>(EltTy)->getBitWidth();
+ Suffix += ".";
+
+ if (EltTy != Ty)
+ Suffix += "v" + utostr(NumElts);
+
+ Suffix += "i" + utostr(GotBits);
+
+ // Check some constraints on various intrinsics.
+ switch (ID) {
+ default: break; // Not everything needs to be checked.
+ case Intrinsic::bswap:
+ if (GotBits < 16 || GotBits % 16 != 0) {
+ CheckFailed("Intrinsic requires even byte width argument", F);
+ return false;
+ }
+ break;
+ }
+ } else if (VT == MVT::fAny) {
+ if (!EltTy->isFloatingPoint()) {
+ CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not "
+ "a floating-point type.", F);
+ return false;
+ }
+
+ Suffix += ".";
+
+ if (EltTy != Ty)
+ Suffix += "v" + utostr(NumElts);
+
+ Suffix += MVT::getMVT(EltTy).getMVTString();
+ } else if (VT == MVT::iPTR) {
+ if (!isa<PointerType>(Ty)) {
+ CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not a "
+ "pointer and a pointer is required.", F);
+ return false;
+ }
+ } else if (VT == MVT::iPTRAny) {
+ // Outside of TableGen, we don't distinguish iPTRAny (to any address space)
+ // and iPTR. In the verifier, we can not distinguish which case we have so
+ // allow either case to be legal.
+ if (const PointerType* PTyp = dyn_cast<PointerType>(Ty)) {
+ Suffix += ".p" + utostr(PTyp->getAddressSpace()) +
+ MVT::getMVT(PTyp->getElementType()).getMVTString();
+ } else {
+ CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not a "
+ "pointer and a pointer is required.", F);
+ return false;
+ }
+ } else if (MVT((MVT::SimpleValueType)VT).isVector()) {
+ MVT VVT = MVT((MVT::SimpleValueType)VT);
+
+ // If this is a vector argument, verify the number and type of elements.
+ if (VVT.getVectorElementType() != MVT::getMVT(EltTy)) {
+ CheckFailed("Intrinsic prototype has incorrect vector element type!", F);
+ return false;
+ }
+
+ if (VVT.getVectorNumElements() != NumElts) {
+ CheckFailed("Intrinsic prototype has incorrect number of "
+ "vector elements!", F);
+ return false;
+ }
+ } else if (MVT((MVT::SimpleValueType)VT).getTypeForMVT() != EltTy) {
+ CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is wrong!", F);
+ return false;
+ } else if (EltTy != Ty) {
+ CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is a vector "
+ "and a scalar is required.", F);
+ return false;
+ }
+
+ return true;
+}
+
+/// VerifyIntrinsicPrototype - TableGen emits calls to this function into
+/// Intrinsics.gen. This implements a little state machine that verifies the
+/// prototype of intrinsics.
+void Verifier::VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F,
+ unsigned RetNum,
+ unsigned ParamNum, ...) {
+ va_list VA;
+ va_start(VA, ParamNum);
+ const FunctionType *FTy = F->getFunctionType();
+
+ // For overloaded intrinsics, the Suffix of the function name must match the
+ // types of the arguments. This variable keeps track of the expected
+ // suffix, to be checked at the end.
+ std::string Suffix;
+
+ if (FTy->getNumParams() + FTy->isVarArg() != ParamNum) {
+ CheckFailed("Intrinsic prototype has incorrect number of arguments!", F);
+ return;
+ }
+
+ const Type *Ty = FTy->getReturnType();
+ const StructType *ST = dyn_cast<StructType>(Ty);
+
+ // Verify the return types.
+ if (ST && ST->getNumElements() != RetNum) {
+ CheckFailed("Intrinsic prototype has incorrect number of return types!", F);
+ return;
+ }
+
+ for (unsigned ArgNo = 0; ArgNo < RetNum; ++ArgNo) {
+ int VT = va_arg(VA, int); // An MVT::SimpleValueType when non-negative.
+
+ if (ST) Ty = ST->getElementType(ArgNo);
+
+ if (!PerformTypeCheck(ID, F, Ty, VT, ArgNo, Suffix))
+ break;
+ }
+
+ // Verify the parameter types.
+ for (unsigned ArgNo = 0; ArgNo < ParamNum; ++ArgNo) {
+ int VT = va_arg(VA, int); // An MVT::SimpleValueType when non-negative.
+
+ if (VT == MVT::isVoid && ArgNo > 0) {
+ if (!FTy->isVarArg())
+ CheckFailed("Intrinsic prototype has no '...'!", F);
+ break;
+ }
+
+ if (!PerformTypeCheck(ID, F, FTy->getParamType(ArgNo), VT, ArgNo + RetNum,
+ Suffix))
+ break;
+ }
+
+ va_end(VA);
+
+ // For intrinsics without pointer arguments, if we computed a Suffix then the
+ // intrinsic is overloaded and we need to make sure that the name of the
+ // function is correct. We add the suffix to the name of the intrinsic and
+ // compare against the given function name. If they are not the same, the
+ // function name is invalid. This ensures that overloading of intrinsics
+ // uses a sane and consistent naming convention. Note that intrinsics with
+ // pointer argument may or may not be overloaded so we will check assuming it
+ // has a suffix and not.
+ if (!Suffix.empty()) {
+ std::string Name(Intrinsic::getName(ID));
+ if (Name + Suffix != F->getName()) {
+ CheckFailed("Overloaded intrinsic has incorrect suffix: '" +
+ F->getName().substr(Name.length()) + "'. It should be '" +
+ Suffix + "'", F);
+ }
+ }
+
+ // Check parameter attributes.
+ Assert1(F->getAttributes() == Intrinsic::getAttributes(ID),
+ "Intrinsic has wrong parameter attributes!", F);
+}
+
+/// Verify that an MDNode is not cyclic.
+bool Verifier::VerifyMDNode(const MDNode *N) {
+ if (N->elem_empty()) return true;
+
+ // The current DFS path through the nodes. Node and element number.
+ typedef std::pair<const MDNode *, MDNode::const_elem_iterator> Edge;
+ SmallVector<Edge, 8> Path;
+
+ Path.push_back(std::make_pair(N, N->elem_begin()));
+ while (!Path.empty()) {
+ Edge &e = Path.back();
+ const MDNode *&e_N = e.first;
+ MDNode::const_elem_iterator &e_I = e.second;
+
+ if (e_N->elem_end() == e_I) {
+ Path.pop_back();
+ continue;
+ }
+
+ for (MDNode::const_elem_iterator e_E = e_N->elem_end(); e_I != e_E; ++e_I) {
+ if (const MDNode *C = dyn_cast_or_null<MDNode>(e_I->operator Value*())) {
+ // Is child MDNode C already in the Path?
+ for (SmallVectorImpl<Edge>::iterator I = Path.begin(), E = Path.end();
+ I != E; ++I) {
+ if (I->first != C) {
+ CheckFailed("MDNode is cyclic.", C);
+ return false;
+ }
+ }
+
+ Path.push_back(std::make_pair(C, C->elem_begin()));
+ break;
+ }
+ }
+ }
+ return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Implement the public interfaces to this file...
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createVerifierPass(VerifierFailureAction action) {
+ return new Verifier(action);
+}
+
+
+// verifyFunction - Create
+bool llvm::verifyFunction(const Function &f, VerifierFailureAction action) {
+ Function &F = const_cast<Function&>(f);
+ assert(!F.isDeclaration() && "Cannot verify external functions");
+
+ ExistingModuleProvider MP(F.getParent());
+ FunctionPassManager FPM(&MP);
+ Verifier *V = new Verifier(action);
+ FPM.add(V);
+ FPM.run(F);
+ MP.releaseModule();
+ return V->Broken;
+}
+
+/// verifyModule - Check a module for errors, printing messages on stderr.
+/// Return true if the module is corrupt.
+///
+bool llvm::verifyModule(const Module &M, VerifierFailureAction action,
+ std::string *ErrorInfo) {
+ PassManager PM;
+ Verifier *V = new Verifier(action);
+ PM.add(V);
+ PM.run(const_cast<Module&>(M));
+
+ if (ErrorInfo && V->Broken)
+ *ErrorInfo = V->msgs.str();
+ return V->Broken;
+}
+
+// vim: sw=2
OpenPOWER on IntegriCloud